vendor/llvm/llvm-r84119

author: Roman Divacky <rdivacky@FreeBSD.org> 2009-10-14 17:57:32 +0000
committer: Roman Divacky <rdivacky@FreeBSD.org> 2009-10-14 17:57:32 +0000
commit: 59850d0874429601812bc13408cb1f776649027c (patch)
tree: b21f6de4e08b89bb7931806bab798fc2a5e3a686
parent: 18f153bdb9db52e7089a2d5293b96c45a3124a26 (diff)
download: src-test2-59850d0874429601812bc13408cb1f776649027c.tar.gz
src-test2-59850d0874429601812bc13408cb1f776649027c.zip
5530 files changed, 217402 insertions, 66091 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 56f9355d8eb4..f7126584ba78 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(LLVM)
 cmake_minimum_required(VERSION 2.6.1)
 
 set(PACKAGE_NAME llvm)
-set(PACKAGE_VERSION 2.6svn)
+set(PACKAGE_VERSION 2.7svn)
 set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
 set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu")
 
@@ -19,8 +19,6 @@ endif()
 
 string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
 
-include(FindPerl)
-
 set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include)
 set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@@ -28,26 +26,42 @@ set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin)
 set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
 set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
 
+if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR )
+  file(GLOB_RECURSE
+    tablegenned_files_on_include_dir
+    "${LLVM_MAIN_SRC_DIR}/include/llvm/*.gen")
+  file(GLOB_RECURSE
+    tablegenned_files_on_lib_dir
+    "${LLVM_MAIN_SRC_DIR}/lib/Target/*.inc")
+  if( tablegenned_files_on_include_dir OR tablegenned_files_on_lib_dir)
+    message(FATAL_ERROR "Apparently there is a previous in-source build,
+probably as the result of running `configure' and `make' on
+${LLVM_MAIN_SRC_DIR}.
+This may cause problems. The suspicious files are:
+${tablegenned_files_on_lib_dir}
+${tablegenned_files_on_include_dir}
+Please clean the source directory.")
+  endif()
+endif()
+
 set(LLVM_ALL_TARGETS
   Alpha
   ARM
+  Blackfin
   CBackend
   CellSPU
   CppBackend
-  IA64
   Mips
   MSIL
+  MSP430
   PIC16
   PowerPC
   Sparc
+  SystemZ
   X86
   XCore
   )
 
-# List of targets whose asmprinters need to be forced to link
-# into executables on some platforms (i.e. Windows):
-set(LLVM_ASMPRINTERS_FORCE_LINK X86 PowerPC)
-
 if( MSVC )
   set(LLVM_TARGETS_TO_BUILD X86
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
@@ -56,6 +70,9 @@ else( MSVC )
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 endif( MSVC )
 
+set(LLVM_TARGET_ARCH "host"
+  CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.")
+
 option(LLVM_ENABLE_THREADS "Use threads if available." ON)
 
 if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
@@ -65,7 +82,10 @@ else()
 endif()
 
 if( LLVM_ENABLE_ASSERTIONS )
-  add_definitions( -D_DEBUG )
+  # MSVC doesn't like _DEBUG on release builds. See PR 4379.
+  if( NOT MSVC )
+    add_definitions( -D_DEBUG )
+  endif()
   # On Release builds cmake automatically defines NDEBUG, so we
   # explicitly undefine it:
   if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
@@ -85,7 +105,7 @@ set(LLVM_ENUM_TARGETS "")
 foreach(c ${LLVM_TARGETS_TO_BUILD})
   list(FIND LLVM_ALL_TARGETS ${c} idx)
   if( idx LESS 0 )
-    message(FATAL_ERROR "The target `${c}' does not exists.
+    message(FATAL_ERROR "The target `${c}' does not exist.
     It should be one of\n${LLVM_ALL_TARGETS}")
   else()
     set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${c})\n")
@@ -125,7 +145,11 @@ else(WIN32)
   if(UNIX)
     set(LLVM_ON_WIN32 0)
     set(LLVM_ON_UNIX 1)
-    set(LTDL_SHLIB_EXT ".so")
+    if(APPLE)
+      set(LTDL_SHLIB_EXT ".dylib")
+    else(APPLE)
+      set(LTDL_SHLIB_EXT ".so")
+    endif(APPLE)
     set(EXEEXT "")
     # FIXME: Maximum path length is currently set to 'safe' fixed value
     set(MAXPATHLEN 2024)
@@ -134,13 +158,9 @@ else(WIN32)
   endif(UNIX)
 endif(WIN32)
 
-if( EXISTS ${LLVM_TOOLS_BINARY_DIR}/llvm-config )
-  set(HAVE_LLVM_CONFIG 1)
-endif( EXISTS ${LLVM_TOOLS_BINARY_DIR}/llvm-config )
-
 include(config-ix)
 
-option(LLVM_ENABLE_PIC "Build Position-Independent Code" OFF)
+option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
 
 set(ENABLE_PIC 0)
 if( LLVM_ENABLE_PIC )
@@ -170,7 +190,6 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
     add_llvm_definitions( -m32 )
     list(APPEND CMAKE_EXE_LINKER_FLAGS -m32)
     list(APPEND CMAKE_SHARED_LINKER_FLAGS -m32)
-    set( LLVM_PLO_FLAGS -melf_i386 ${LLVM_PLO_FLAGS} )
   endif( LLVM_BUILD_32_BITS )
 endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
 
@@ -187,6 +206,9 @@ if( MSVC )
   add_llvm_definitions( -wd4146 -wd4503 -wd4996 -wd4800 -wd4244 -wd4624 )
   add_llvm_definitions( -wd4355 -wd4715 -wd4180 -wd4345 -wd4224 )
 
+  # Suppress 'new behavior: elements of array 'array' will be default initialized'
+  add_llvm_definitions( -wd4351 )
+
   if (NOT ${LLVM_USE_CRT} STREQUAL "")
     list(FIND MSVC_CRT ${LLVM_USE_CRT} idx)
     if (idx LESS 0)
@@ -199,6 +221,10 @@ endif( MSVC )
 
 include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
 
+if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
+   SET(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-include llvm/System/Solaris.h")
+endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
+
 include(AddLLVM)
 include(TableGen)
 
@@ -237,16 +263,29 @@ add_subdirectory(lib/Linker)
 add_subdirectory(lib/Analysis)
 add_subdirectory(lib/Analysis/IPA)
 add_subdirectory(lib/MC)
+add_subdirectory(test)
 
- set(LLVM_ENUM_ASM_PRINTERS "")
- foreach(t ${LLVM_TARGETS_TO_BUILD})
+add_subdirectory(utils/FileCheck)
+add_subdirectory(utils/count)
+add_subdirectory(utils/not)
+
+set(LLVM_ENUM_ASM_PRINTERS "")
+set(LLVM_ENUM_ASM_PARSERS "")
+foreach(t ${LLVM_TARGETS_TO_BUILD})
   message(STATUS "Targeting ${t}")
   add_subdirectory(lib/Target/${t})
+  add_subdirectory(lib/Target/${t}/TargetInfo)
   if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
-   add_subdirectory(lib/Target/${t}/AsmPrinter)
+    add_subdirectory(lib/Target/${t}/AsmPrinter)
     set(LLVM_ENUM_ASM_PRINTERS 
-        "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
- endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
+      "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
+  endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
+  if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt )
+    add_subdirectory(lib/Target/${t}/AsmParser)
+    set(LLVM_ENUM_ASM_PARSERS 
+      "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
+  endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt )
+  set(CURRENT_LLVM_TARGET)
 endforeach(t)
 
 # Produce llvm/Config/AsmPrinters.def
@@ -255,19 +294,28 @@ configure_file(
   ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
   )
 
+# Produce llvm/Config/AsmParsers.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
+  )
+
 add_subdirectory(lib/ExecutionEngine)
 add_subdirectory(lib/ExecutionEngine/Interpreter)
 add_subdirectory(lib/ExecutionEngine/JIT)
 add_subdirectory(lib/Target)
 add_subdirectory(lib/AsmParser)
-add_subdirectory(lib/Debugger)
 add_subdirectory(lib/Archive)
 
 add_subdirectory(projects)
-add_subdirectory(tools)
 
-option(LLVM_EXAMPLES "Build LLVM example programs." OFF)
-if (LLVM_EXAMPLES)
+option(LLVM_BUILD_TOOLS "Build LLVM tool programs." ON)
+if(LLVM_BUILD_TOOLS)
+  add_subdirectory(tools)
+endif()
+
+option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." ON)
+if(LLVM_BUILD_EXAMPLES)
   add_subdirectory(examples)
 endif ()
 
@@ -276,6 +324,7 @@ install(DIRECTORY include
   PATTERN ".svn" EXCLUDE
   PATTERN "*.cmake" EXCLUDE
   PATTERN "*.in" EXCLUDE
+  PATTERN "*.tmp" EXCLUDE
   )
 
 install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include
diff --git a/CREDITS.TXT b/CREDITS.TXT
index e1bad67c0cd7..f6467abfc038 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -90,6 +90,10 @@ N: Alkis Evlogimenos
 E: alkis@evlogimenos.com
 D: Linear scan register allocator, many codegen improvements, Java frontend
 
+N: Ryan Flynn
+E: pizza@parseerror.com
+D: Miscellaneous bug fixes
+
 N: Brian Gaeke
 E: gaeke@uiuc.edu
 W: http://www.students.uiuc.edu/~gaeke/
@@ -117,6 +121,10 @@ N: Dan Gohman
 E: gohman@apple.com
 D: Miscellaneous bug fixes
 
+N: David Goodwin
+E: david@goodwinz.net
+D: Thumb-2 code generator
+
 N: David Greene
 E: greened@obbligato.org
 D: Miscellaneous bug fixes
@@ -160,6 +168,10 @@ N: Brad Jones
 E: kungfoomaster@nondot.org
 D: Support for packed types
 
+N: Rod Kay
+E: rkay@auroraux.org
+D: Author of LLVM Ada bindings
+
 N: Eric Kidd
 W: http://randomhacks.net/
 D: llvm-config script
@@ -174,6 +186,10 @@ N: Sumant Kowshik
 E: kowshik@uiuc.edu
 D: Author of the original C backend
 
+N: Benjamin Kramer
+E: benny.kra@gmail.com
+D: Miscellaneous bug fixes
+
 N: Christopher Lamb
 E: christopher.lamb@gmail.com
 D: aligned load/store support, parts of noalias and restrict support
@@ -242,6 +258,11 @@ N: Morten Ofstad
 E: morten@hue.no
 D: Visual C++ compatibility fixes
 
+N: Jakob Stoklund Olesen
+E: stoklund@2pi.dk
+D: Machine code verifier
+D: Blackfin backend
+
 N: Richard Osborne
 E: richard@xmos.com
 D: XCore backend
@@ -252,6 +273,10 @@ D: LTO tool, PassManager rewrite, Loop Pass Manager, Loop Rotate
 D: GCC PCH Integration (llvm-gcc), llvm-gcc improvements
 D: Optimizer improvements, Loop Index Split
 
+N: Sandeep Patel
+E: deeppatel1987@gmail.com
+D: ARM calling conventions rewrite, hard float support
+
 N: Vladimir Prus
 W: http://vladimir_prus.blogspot.com
 E: ghost@cs.msu.su
@@ -299,6 +324,10 @@ E: lauro.venancio@indt.org.br
 D: ARM backend improvements
 D: Thread Local Storage implementation
 
+N: Xerxes Ranby
+E: xerxes@zafena.se
+D: Cmake dependency chain and various bug fixes
+
 N: Bill Wendling
 E: isanbard@gmail.com
 D: Bunches of stuff
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 0dca8ce7bd8e..fd49172664b5 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -61,9 +61,9 @@ licenses, and/or restrictions:
 
 Program             Directory
 -------             ---------
-System Library      llvm/lib/System
 Autoconf            llvm/autoconf
                     llvm/projects/ModuleMaker/autoconf
                     llvm/projects/sample/autoconf
 CellSPU backend     llvm/lib/Target/CellSPU/README.txt
 Google Test         llvm/utils/unittest/googletest
+OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
diff --git a/Makefile b/Makefile
index e750889ae45c..f3bf3f2345eb 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,7 @@ ifeq ($(BUILD_DIRS_ONLY),1)
   OPTIONAL_DIRS :=
 else
   DIRS := lib/System lib/Support utils lib/VMCore lib tools/llvm-config \
-          tools runtime docs
+          tools runtime docs unittests
   OPTIONAL_DIRS := examples projects bindings
 endif
 
@@ -36,7 +36,7 @@ include $(LEVEL)/Makefile.config
 # FIXME: Remove runtime entirely once we have an understanding of where
 # libprofile etc should go.
 #ifeq ($(LLVMGCC_MAJVERS),4)
-  DIRS := $(filter-out runtime, $(DIRS))
+#  DIRS := $(filter-out runtime, $(DIRS))
 #endif
 
 ifeq ($(MAKECMDGOALS),libs-only)
@@ -62,7 +62,7 @@ ifeq ($(MAKECMDGOALS),install-clang)
 endif
 
 ifeq ($(MAKECMDGOALS),clang-only)
-  DIRS := $(filter-out tools runtime docs, $(DIRS)) tools/clang
+  DIRS := $(filter-out tools runtime docs unittests, $(DIRS)) tools/clang
   OPTIONAL_DIRS :=
 endif
 
@@ -88,10 +88,19 @@ cross-compile-build-tools:
 	$(Verb) if [ ! -f BuildTools/Makefile ]; then \
           $(MKDIR) BuildTools; \
 	  cd BuildTools ; \
-	  $(PROJ_SRC_DIR)/configure ; \
+	  $(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \
+		--host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE); \
 	  cd .. ; \
 	fi; \
-        ($(MAKE) -C BuildTools BUILD_DIRS_ONLY=1 ) || exit 1;
+        ($(MAKE) -C BuildTools \
+	  BUILD_DIRS_ONLY=1 \
+	  UNIVERSAL= \
+	  ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \
+	  ENABLE_PROFILING=$(ENABLE_PROFILING) \
+	  ENABLE_COVERAGE=$(ENABLE_COVERAGE) \
+	  DISABLE_ASSERTIONS=$(DISABLE_ASSERTIONS) \
+	  ENABLE_EXPENSIVE_CHECKS=$(ENABLE_EXPENSIVE_CHECKS) \
+	) || exit 1;
 endif
 
 # Include the main makefile machinery.
@@ -117,7 +126,6 @@ debug-opt-prof:
 dist-hook::
 	$(Echo) Eliminating files constructed by configure
 	$(Verb) $(RM) -f \
-	  $(TopDistDir)/include/llvm/ADT/iterator.h  \
 	  $(TopDistDir)/include/llvm/Config/config.h  \
 	  $(TopDistDir)/include/llvm/Support/DataTypes.h  \
 	  $(TopDistDir)/include/llvm/Support/ThreadSupport.h
@@ -137,7 +145,7 @@ FilesToConfig := \
   include/llvm/Config/Targets.def \
 	include/llvm/Config/AsmPrinters.def \
   include/llvm/Support/DataTypes.h \
-  include/llvm/ADT/iterator.h
+	tools/llvmc/plugins/Base/Base.td
 FilesToConfigPATH  := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig))
 
 all-local:: $(FilesToConfigPATH)
diff --git a/Makefile.config.in b/Makefile.config.in
index e2d2c57b4d5f..fc84c0bcb1b4 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -89,8 +89,11 @@ PROJ_mandir     := $(DESTDIR)$(PROJ_prefix)/share/man
 LLVM_ON_UNIX:=@LLVM_ON_UNIX@
 LLVM_ON_WIN32:=@LLVM_ON_WIN32@
 
-# Target operating system for which LLVM will be compiled.
+# Host operating system for which LLVM will be run.
 OS=@OS@
+HOST_OS=@HOST_OS@
+# Target operating system for which LLVM will compile for.
+TARGET_OS=@TARGET_OS@
 
 # Target hardware architecture
 ARCH=@ARCH@
@@ -107,6 +110,9 @@ BUILD_EXEEXT=@BUILD_EXEEXT@
 BUILD_CC=@BUILD_CC@
 BUILD_CXX=@BUILD_CXX@
 
+# Triple for configuring build tools when cross-compiling
+BUILD_TRIPLE=@build@
+
 # Target triple (cpu-vendor-os) for which we should generate code
 TARGET_TRIPLE=@target@
 
@@ -128,6 +134,7 @@ LDFLAGS+=@LDFLAGS@
 
 # Path to the library archiver program.
 AR_PATH = @AR@
+AR = @AR@
 
 # Path to the nm program
 NM_PATH = @NM@
@@ -238,6 +245,11 @@ RDYNAMIC := @RDYNAMIC@
 #DEBUG_RUNTIME = 1
 @DEBUG_RUNTIME@
 
+# When DEBUG_SYMBOLS is enabled, the compiler libraries will retain debug
+# symbols.
+#DEBUG_SYMBOLS = 1
+@DEBUG_SYMBOLS@
+
 # When ENABLE_PROFILING is enabled, the llvm source base is built with profile
 # information to allow gprof to be used to get execution frequencies.
 #ENABLE_PROFILING = 1
diff --git a/Makefile.rules b/Makefile.rules
index 3ae2db8916de..e3f388d54348 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -19,10 +19,11 @@
 #--------------------------------------------------------------------
 # Define the various target sets
 #--------------------------------------------------------------------
-RecursiveTargets := all clean clean-all install uninstall install-bytecode
+RecursiveTargets := all clean clean-all install uninstall install-bytecode \
+                    unitcheck
 LocalTargets     := all-local clean-local clean-all-local check-local \
                     install-local printvars uninstall-local \
-		    install-bytecode-local unittests
+		    install-bytecode-local
 TopLevelTargets  := check dist dist-check dist-clean dist-gzip dist-bzip2 \
                     dist-zip unittests
 UserTargets      := $(RecursiveTargets) $(LocalTargets) $(TopLevelTargets)
@@ -128,8 +129,11 @@ reconfigure:
 	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
 	  $(ConfigStatusScript)
 
+# FIXME: The {PIC16,MSP430}/AsmPrinter line here is a hack to force a reconfigure to pick
+# up AsmPrinter changes. Remove it after a reasonable delay from 2009-08-13.
+
 .PRECIOUS: $(ConfigStatusScript)
-$(ConfigStatusScript): $(ConfigureScript)
+$(ConfigStatusScript): $(ConfigureScript) $(LLVM_SRC_ROOT)/lib/Target/PIC16/AsmPrinter/Makefile $(LLVM_SRC_ROOT)/lib/Target/MSP430/AsmPrinter/Makefile
 	$(Echo) Reconfiguring with $<
 	$(Verb) cd $(PROJ_OBJ_ROOT) && \
 	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
@@ -242,6 +246,12 @@ LLVMC_BUILTIN_PLUGIN_2 = $(word 2, $(LLVMC_BUILTIN_PLUGINS))
 LLVMC_BUILTIN_PLUGIN_3 = $(word 3, $(LLVMC_BUILTIN_PLUGINS))
 LLVMC_BUILTIN_PLUGIN_4 = $(word 4, $(LLVMC_BUILTIN_PLUGINS))
 LLVMC_BUILTIN_PLUGIN_5 = $(word 5, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_6 = $(word 6, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_7 = $(word 7, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_8 = $(word 8, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_9 = $(word 9, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_10 = $(word 10, $(LLVMC_BUILTIN_PLUGINS))
+
 
 ifneq ($(LLVMC_BUILTIN_PLUGIN_1),)
 CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_1=$(LLVMC_BUILTIN_PLUGIN_1)
@@ -263,6 +273,27 @@ ifneq ($(LLVMC_BUILTIN_PLUGIN_5),)
 CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_5)
 endif
 
+ifneq ($(LLVMC_BUILTIN_PLUGIN_6),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_6)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_7),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_7)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_8),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_8)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_9),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_9)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_10),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_10)
+endif
+
+
 endif
 
 endif # LLVMC_BASED_DRIVER
@@ -284,7 +315,7 @@ CPP.Defines :=
 # OPTIMIZE_OPTION - The optimization level option we want to build LLVM with
 # this can be overridden on the make command line.
 ifndef OPTIMIZE_OPTION
-  ifneq ($(OS),MingW)
+  ifneq ($(HOST_OS),MingW)
     OPTIMIZE_OPTION := -O3
   else
     OPTIMIZE_OPTION := -O2
@@ -294,8 +325,8 @@ endif
 ifeq ($(ENABLE_OPTIMIZED),1)
   BuildMode := Release
   # Don't use -fomit-frame-pointer on Darwin or FreeBSD.
-  ifneq ($(OS),FreeBSD)
-  ifneq ($(OS),Darwin)
+  ifneq ($(HOST_OS),FreeBSD)
+  ifneq ($(HOST_OS),Darwin)
     OmitFramePointer := -fomit-frame-pointer
   endif
   endif
@@ -303,12 +334,19 @@ ifeq ($(ENABLE_OPTIMIZED),1)
   # Darwin requires -fstrict-aliasing to be explicitly enabled.
   # Avoid -fstrict-aliasing on Darwin for now, there are unresolved issues
   # with -fstrict-aliasing and ipa-type-escape radr://6756684
-  #ifeq ($(OS),Darwin)
+  #ifeq ($(HOST_OS),Darwin)
   #  EXTRA_OPTIONS += -fstrict-aliasing -Wstrict-aliasing
   #endif
   CXX.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer)
   C.Flags   += $(OPTIMIZE_OPTION) $(OmitFramePointer)
   LD.Flags  += $(OPTIMIZE_OPTION)
+  ifdef DEBUG_SYMBOLS
+    BuildMode := $(BuildMode)+Debug
+    CXX.Flags += -g
+    C.Flags   += -g
+    LD.Flags  += -g
+    KEEP_SYMBOLS := 1
+  endif
 else
   BuildMode := Debug
   CXX.Flags += -g
@@ -334,9 +372,16 @@ ifndef REQUIRES_EH
   CXX.Flags += -fno-exceptions
 endif
 
-# IF REQUIRES_RTTI=1 is specified then don't disable run-time type id
-ifndef REQUIRES_RTTI
-#  CXX.Flags += -fno-rtti
+ifdef REQUIRES_FRAME_POINTER
+  CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags))
+  C.Flags   := $(filter-out -fomit-frame-pointer,$(C.Flags))
+  LD.Flags  := $(filter-out -fomit-frame-pointer,$(LD.Flags))
+endif
+
+# If REQUIRES_RTTI=1 is specified then don't disable run-time type id.
+ifeq ($(REQUIRES_RTTI), 1)
+  CXX.Flags := $(filter-out -fno-rtti,$(CXX.Flags))
+  CXXFLAGS := $(filter-out -fno-rtti,$(CXXFLAGS))
 endif
 
 ifdef ENABLE_COVERAGE
@@ -376,10 +421,10 @@ ifdef SHARED_LIBRARY
 endif
 
 ifeq ($(ENABLE_PIC),1)
-  ifeq ($(OS), $(filter $(OS), Cygwin MingW))
+  ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
     # Nothing. Win32 defaults to PIC and warns when given -fPIC
   else
-    ifeq ($(OS),Darwin)
+    ifeq ($(HOST_OS),Darwin)
       # Common symbols not allowed in dylib files
       CXX.Flags += -fno-common
       C.Flags   += -fno-common
@@ -390,17 +435,14 @@ ifeq ($(ENABLE_PIC),1)
     endif
   endif
 else
-  ifeq ($(OS),Darwin)
+  ifeq ($(HOST_OS),Darwin)
       CXX.Flags += -mdynamic-no-pic
       C.Flags   += -mdynamic-no-pic
   endif
 endif
 
-CXX.Flags     += $(CXXFLAGS) -Woverloaded-virtual
-C.Flags       += $(CFLAGS)
-CPP.Defines   += $(CPPFLAGS)
+CXX.Flags     += -Woverloaded-virtual
 CPP.BaseFlags += $(CPP.Defines)
-LD.Flags      += $(LDFLAGS)
 AR.Flags      := cru
 
 # Make Floating point IEEE compliant on Alpha.
@@ -417,7 +459,7 @@ ifeq ($(ARCH),Alpha)
   LD.Flags += -Wl,--no-relax
 endif
 
-ifeq ($(OS),MingW)
+ifeq ($(HOST_OS),MingW)
   ifeq ($(LLVM_CROSS_COMPILING),1)
     # Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=525016
     ifdef TOOLNAME
@@ -430,6 +472,7 @@ ifdef ENABLE_EXPENSIVE_CHECKS
   # GNU libstdc++ uses RTTI if you define _GLIBCXX_DEBUG, which we did above.
   # See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40160
   CXX.Flags := $(filter-out -fno-rtti,$(CXX.Flags))
+  CXXFLAGS := $(filter-out -fno-rtti,$(CXXFLAGS))
 endif
 
 #--------------------------------------------------------------------
@@ -499,7 +542,7 @@ endif
 # Adjust to user's request
 #--------------------------------------------------------------------
 
-ifeq ($(OS),Darwin)
+ifeq ($(HOST_OS),Darwin)
   DARWIN_VERSION := `sw_vers -productVersion`
   # Strip a number like 10.4.7 to 10.4
   DARWIN_VERSION := $(shell echo $(DARWIN_VERSION)| sed -E 's/(10.[0-9]).*/\1/')
@@ -507,10 +550,12 @@ ifeq ($(OS),Darwin)
   DARWIN_MAJVERS := $(shell echo $(DARWIN_VERSION)| sed -E 's/10.([0-9]).*/\1/')
 
   SharedLinkOptions=-Wl,-flat_namespace -Wl,-undefined -Wl,suppress \
-                    -dynamiclib -mmacosx-version-min=$(DARWIN_VERSION)
-  TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+                    -dynamiclib
+  ifneq ($(ARCH),ARM)
+    SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+  endif
 else
-  ifeq ($(OS),Cygwin)
+  ifeq ($(HOST_OS),Cygwin)
     SharedLinkOptions=-shared -nostdlib -Wl,--export-all-symbols \
                       -Wl,--enable-auto-import -Wl,--enable-auto-image-base
   else
@@ -518,6 +563,12 @@ else
   endif
 endif
 
+ifeq ($(TARGET_OS),Darwin)
+  ifneq ($(ARCH),ARM)
+    TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+  endif
+endif
+
 # Adjust LD.Flags depending on the kind of library that is to be built. Note
 # that if LOADABLE_MODULE is specified then the resulting shared library can
 # be opened with dlopen.
@@ -555,7 +606,7 @@ ifndef KEEP_SYMBOLS
 endif
 
 # Adjust linker flags for building an executable
-ifneq ($(OS),Darwin)
+ifneq ($(HOST_OS),Darwin)
 ifneq ($(DARWIN_MAJVERS),4)
 ifdef TOOLNAME
 ifdef EXAMPLE_TOOL
@@ -577,7 +628,7 @@ endif
 CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
                      $(EXTRA_OPTIONS)
 
-ifeq ($(OS),HP-UX)
+ifeq ($(HOST_OS),HP-UX)
   CompileCommonOpts := -D_REENTRANT -D_HPUX_SOURCE
 endif
 
@@ -605,7 +656,7 @@ ifdef UNIVERSAL
   # Building universal cannot compute dependencies automatically.
   DISABLE_AUTO_DEPENDENCIES=1
 else
-  ifeq ($(OS),Darwin)
+  ifeq ($(TARGET_OS),Darwin)
     ifeq ($(ARCH),x86_64)
       TargetCommonOpts = -m64
     else
@@ -616,10 +667,14 @@ else
   endif
 endif
 
-ifeq ($(OS),SunOS)
+ifeq ($(HOST_OS),SunOS)
 CPP.BaseFlags += -include llvm/System/Solaris.h
 endif
 
+ifeq ($(HOST_OS),AuroraUX)
+CPP.BaseFlags += -include llvm/System/Solaris.h
+endif # !HOST_OS - AuroraUX.
+
 LD.Flags      += -L$(LibDir) -L$(LLVMLibDir)
 CPP.BaseFlags += -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS
 # All -I flags should go here, so that they don't confuse llvm-config.
@@ -630,31 +685,35 @@ CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
 	         $(CPP.BaseFlags)
 
 ifeq ($(BUILD_COMPONENT), 1)
-  Compile.C     = $(BUILD_CC) $(CPP.Flags) $(C.Flags) \
+  Compile.C     = $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) \
+  Compile.CXX   = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(TargetCommonOpts) \
+  Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \
                   $(CompileCommonOpts) $(CXX.Flags) -E
-  Link          = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) \
+  Link          = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(LDFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip)
 else
-  Compile.C     = $(CC) $(CPP.Flags) $(C.Flags) \
+  Compile.C     = $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(CXX) $(CPP.Flags) $(CXX.Flags) \
+  Compile.CXX   = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) \
+  Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \
                   $(CompileCommonOpts) $(CXX.Flags) -E
-  Link          = $(CXX) $(CPP.Flags) $(CXX.Flags) \
+  Link          = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LDFLAGS) \
                   $(TargetCommonOpts)  $(CompileCommonOpts) $(LD.Flags) $(Strip)
 endif
 
-BCCompile.C   = $(LLVMGCCWITHPATH) $(CPP.Flags) $(C.Flags) \
+BCCompile.C   = $(LLVMGCCWITHPATH) $(CPP.Flags) $(C.Flags) $(CFLAGS) \
+		$(CPPFLAGS) \
                 $(TargetCommonOpts) $(CompileCommonOpts)
-Preprocess.C  = $(CC) $(CPP.Flags) $(C.Flags) \
+Preprocess.C  = $(CC) $(CPP.Flags) $(C.Flags) $(CPPFLAGS) \
                 $(TargetCommonOpts) $(CompileCommonOpts) -E
 
-BCCompile.CXX = $(LLVMGXXWITHPATH) $(CPP.Flags) $(CXX.Flags) \
+BCCompile.CXX = $(LLVMGXXWITHPATH) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		$(CPPFLAGS) \
                 $(TargetCommonOpts) $(CompileCommonOpts)
 
 ProgInstall   = $(INSTALL) $(Install.StripFlag) -m 0755
@@ -780,6 +839,7 @@ clean-all:: $(addsuffix /.makeclean-all,$(PARALLEL_DIRS))
 install  :: $(addsuffix /.makeinstall  ,$(PARALLEL_DIRS))
 uninstall:: $(addsuffix /.makeuninstall,$(PARALLEL_DIRS))
 install-bytecode  :: $(addsuffix /.makeinstall-bytecode,$(PARALLEL_DIRS))
+unitcheck:: $(addsuffix /.makeunitcheck,$(PARALLEL_DIRS))
 
 ParallelTargets := $(foreach T,$(RecursiveTargets),%/.make$(T))
 
@@ -888,7 +948,7 @@ $(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT): $(LLVM_CONFIG)
 
 LLVMLibsOptions += $(shell $(LLVM_CONFIG) --libs     $(LINK_COMPONENTS))
 LLVMLibsPaths   += $(LLVM_CONFIG) \
-                  $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS))
+                   $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS))
 endif
 endif
 
@@ -960,7 +1020,7 @@ endif
 # if we're building a library ...
 ifdef LIBRARYNAME
 
-# Make sure there isn't any extranous whitespace on the LIBRARYNAME option
+# Make sure there isn't any extraneous whitespace on the LIBRARYNAME option
 LIBRARYNAME := $(strip $(LIBRARYNAME))
 ifdef LOADABLE_MODULE
 LibName.A  := $(LibDir)/$(LIBRARYNAME).a
@@ -1045,9 +1105,9 @@ $(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir $(LLVMLD) \
                 $(LLVMToolDir)/llvm-ar
 	$(Echo) Building $(BuildMode) Bytecode Archive $(notdir $@) \
 	  "(internalize)"
-	$(Verb) $(BCLinkLib) -o $(ObjDir)/$(LIBRARYNAME).o $(ObjectsBC)
+	$(Verb) $(BCLinkLib) -o $(ObjDir)/$(LIBRARYNAME).internalize $(ObjectsBC)
 	$(Verb) $(RM) -f $@
-	$(Verb) $(LArchive) $@ $(ObjDir)/$(LIBRARYNAME).o
+	$(Verb) $(LArchive) $@ $(ObjDir)/$(LIBRARYNAME).internalize.bc
 else
 $(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir \
                 $(LLVMToolDir)/llvm-ar
@@ -1169,7 +1229,7 @@ endif
 # not exporting all of the weak symbols from the binary.  This reduces dyld
 # startup time by 4x on darwin in some cases.
 ifdef TOOL_NO_EXPORTS
-ifeq ($(OS),Darwin)
+ifeq ($(HOST_OS),Darwin)
 
 # Tiger tools don't support this.
 ifneq ($(DARWIN_MAJVERS),4)
@@ -1177,7 +1237,7 @@ LD.Flags += -Wl,-exported_symbol -Wl,_main
 endif
 endif
 
-ifeq ($(OS), $(filter $(OS), Linux NetBSD FreeBSD))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD))
 LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map
 endif
 endif
@@ -1212,7 +1272,7 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestTool = $(PROJ_bindir)/$(TOOLNAME)
+DestTool = $(PROJ_bindir)/$(TOOLNAME)$(EXEEXT)
 
 install-local:: $(DestTool)
 
@@ -1231,7 +1291,7 @@ endif
 ###############################################################################
 
 # FIXME: This should be checking for "if not GCC or ICC", not for "if HP-UX"
-ifeq ($(OS),HP-UX)
+ifeq ($(HOST_OS),HP-UX)
   DISABLE_AUTO_DEPENDENCIES=1
 endif
 
@@ -1251,7 +1311,7 @@ DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.d.tmp" "$(ObjDir)/$*.d"; \
                   else $(RM) "$(ObjDir)/$*.d.tmp"; exit 1; fi
 
 $(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
-	$(Echo) "Compiling $*.cpp for $(BuildMode) build " $(PIC_FLAG)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
@@ -1354,14 +1414,13 @@ $(ObjDir)/%.s: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 
 # make the C and C++ compilers strip debug info out of bytecode libraries.
 ifdef DEBUG_RUNTIME
-$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LLVMAS) $(LOPT)
+$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT)
 	$(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)"
-	$(Verb) $(LLVMAS) $< -o - | $(LOPT) -std-compile-opts -o $@ -f
+	$(Verb) $(LOPT) $< -std-compile-opts -o $@
 else
-$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LLVMAS) $(LOPT)
+$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT)
 	$(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)"
-	$(Verb) $(LLVMAS) $< -o - | \
-	   $(LOPT) -std-compile-opts -strip-debug -o $@ -f
+	$(Verb) $(LOPT) $< -std-compile-opts -strip-debug -o $@
 endif
 
 
@@ -1450,6 +1509,11 @@ $(ObjDir)/%GenAsmWriter1.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) assembly writer #1 with tblgen"
 	$(Verb) $(TableGen) -gen-asm-writer -asmwriternum=1 -o $(call SYSPATH, $@) $<
 
+$(TARGET:%=$(ObjDir)/%GenAsmMatcher.inc.tmp): \
+$(ObjDir)/%GenAsmMatcher.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) assembly matcher with tblgen"
+	$(Verb) $(TableGen) -gen-asm-matcher -o $(call SYSPATH, $@) $<
+
 $(TARGET:%=$(ObjDir)/%GenCodeEmitter.inc.tmp): \
 $(ObjDir)/%GenCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) code emitter with tblgen"
@@ -1575,6 +1639,30 @@ check::
 	  $(EchoCmd) No test directory ; \
 	fi
 
+check-lit::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-lit ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+check-all::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-all ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
 ###############################################################################
 # UNITTESTS: Running the unittests test suite
 ###############################################################################
@@ -1583,7 +1671,7 @@ unittests::
 	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/unittests" ; then \
 	  if test -f "$(PROJ_OBJ_ROOT)/unittests/Makefile" ; then \
 	    $(EchoCmd) Running unittests test suite ; \
-	    $(MAKE) -C $(PROJ_OBJ_ROOT)/unittests ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/unittests unitcheck; \
 	  else \
 	    $(EchoCmd) No Makefile in unittests directory ; \
 	  fi ; \
diff --git a/README.txt b/README.txt
index 34d3766feb46..c78a9ee159f9 100644
--- a/README.txt
+++ b/README.txt
@@ -1,9 +1,9 @@
 Low Level Virtual Machine (LLVM)
 ================================
 
-This directory and its subdirectories contain source code for the Low Level 
+This directory and its subdirectories contain source code for the Low Level
 Virtual Machine, a toolkit for the construction of highly optimized compilers,
-optimizers, and runtime environments. 
+optimizers, and runtime environments.
 
 LLVM is open source software. You may freely distribute it under the terms of
 the license agreement found in LICENSE.txt.
diff --git a/Xcode/LLVM.xcodeproj/project.pbxproj b/Xcode/LLVM.xcodeproj/project.pbxproj
index 383a2ad225f7..e2f40f4cba4d 100644
--- a/Xcode/LLVM.xcodeproj/project.pbxproj
+++ b/Xcode/LLVM.xcodeproj/project.pbxproj
@@ -255,8 +255,6 @@
 		CF33BE160AF62B4200E93805 /* SmallString.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SmallString.h; sourceTree = "<group>"; };
 		CF341DAD0AB07A8B0099B064 /* AlphaTargetAsmInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AlphaTargetAsmInfo.h; sourceTree = "<group>"; };
 		CF341DAE0AB07A8B0099B064 /* AlphaTargetAsmInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = AlphaTargetAsmInfo.cpp; sourceTree = "<group>"; };
-		CF341DE80AB07F890099B064 /* IA64TargetAsmInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IA64TargetAsmInfo.h; sourceTree = "<group>"; };
-		CF341DE90AB07F890099B064 /* IA64TargetAsmInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = IA64TargetAsmInfo.cpp; sourceTree = "<group>"; };
 		CF341E010AB080220099B064 /* PPCTargetAsmInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PPCTargetAsmInfo.h; sourceTree = "<group>"; };
 		CF341E020AB080220099B064 /* PPCTargetAsmInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PPCTargetAsmInfo.cpp; sourceTree = "<group>"; };
 		CF341E220AB0814B0099B064 /* SparcTargetAsmInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SparcTargetAsmInfo.h; sourceTree = "<group>"; };
@@ -315,7 +313,6 @@
 		CF73C0AE098A51AD00627152 /* Alarm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Alarm.h; sourceTree = "<group>"; };
 		CF73C0AF098A51DD00627152 /* RSProfiling.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSProfiling.h; sourceTree = "<group>"; };
 		CF73C0B0098A523C00627152 /* ConstantFolding.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConstantFolding.cpp; sourceTree = "<group>"; };
-		CF73C0B6098A53EF00627152 /* IA64Bundling.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = IA64Bundling.cpp; sourceTree = "<group>"; };
 		CF73C0B7098A546000627152 /* RSProfiling.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = RSProfiling.cpp; sourceTree = "<group>"; };
 		CF73C0B8098A546000627152 /* RSProfiling.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSProfiling.h; sourceTree = "<group>"; };
 		CF73C0B9098A546000627152 /* Reg2Mem.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Reg2Mem.cpp; sourceTree = "<group>"; };
@@ -415,13 +412,6 @@
 		CFA702C10A6FA85F0006009A /* AlphaGenRegisterInfo.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = AlphaGenRegisterInfo.inc; sourceTree = "<group>"; };
 		CFA702C20A6FA85F0006009A /* AlphaGenRegisterNames.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = AlphaGenRegisterNames.inc; sourceTree = "<group>"; };
 		CFA702C30A6FA85F0006009A /* AlphaGenSubtarget.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = AlphaGenSubtarget.inc; sourceTree = "<group>"; };
-		CFA702C40A6FA8910006009A /* IA64GenAsmWriter.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenAsmWriter.inc; sourceTree = "<group>"; };
-		CFA702C50A6FA8910006009A /* IA64GenDAGISel.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenDAGISel.inc; sourceTree = "<group>"; };
-		CFA702C60A6FA8910006009A /* IA64GenInstrInfo.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenInstrInfo.inc; sourceTree = "<group>"; };
-		CFA702C70A6FA8910006009A /* IA64GenInstrNames.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenInstrNames.inc; sourceTree = "<group>"; };
-		CFA702C80A6FA8910006009A /* IA64GenRegisterInfo.h.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenRegisterInfo.h.inc; sourceTree = "<group>"; };
-		CFA702C90A6FA8910006009A /* IA64GenRegisterInfo.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenRegisterInfo.inc; sourceTree = "<group>"; };
-		CFA702CA0A6FA8910006009A /* IA64GenRegisterNames.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = IA64GenRegisterNames.inc; sourceTree = "<group>"; };
 		CFA702CB0A6FA8AD0006009A /* PPCGenAsmWriter.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = PPCGenAsmWriter.inc; sourceTree = "<group>"; };
 		CFA702CC0A6FA8AD0006009A /* PPCGenCodeEmitter.inc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.pascal; path = PPCGenCodeEmitter.inc; sourceTree = "<group>"; };
 		CFA702CD0A6FA8AD0006009A /* PPCGenDAGISel.inc */ = {isa = PBXFileReference; explicitFileType = sourcecode.pascal; fileEncoding = 4; path = PPCGenDAGISel.inc; sourceTree = "<group>"; };
@@ -450,9 +440,6 @@
 		CFC244BB0959F24C009F8C47 /* X86ISelDAGToDAG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = X86ISelDAGToDAG.cpp; sourceTree = "<group>"; };
 		CFC244BC0959F24C009F8C47 /* X86ISelLowering.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = X86ISelLowering.cpp; sourceTree = "<group>"; };
 		CFC244BD0959F24C009F8C47 /* X86ISelLowering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = X86ISelLowering.h; sourceTree = "<group>"; };
-		CFC244BF0959F2E3009F8C47 /* IA64ISelDAGToDAG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = IA64ISelDAGToDAG.cpp; sourceTree = "<group>"; };
-		CFC244C00959F2E3009F8C47 /* IA64ISelLowering.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = IA64ISelLowering.cpp; sourceTree = "<group>"; };
-		CFC244C10959F2E3009F8C47 /* IA64ISelLowering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IA64ISelLowering.h; sourceTree = "<group>"; };
 		CFD7E4F30A798FC3000C7379 /* LinkAllCodegenComponents.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LinkAllCodegenComponents.h; sourceTree = "<group>"; };
 		CFD99AA80AFE827B0068D19C /* LICENSE.TXT */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = LICENSE.TXT; path = ../LICENSE.TXT; sourceTree = SOURCE_ROOT; };
 		CFD99AAD0AFE827B0068D19C /* README.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = README.txt; path = ../README.txt; sourceTree = SOURCE_ROOT; };
@@ -682,20 +669,6 @@
 		DE66EEAF08ABEE5E00323D32 /* AlphaTargetMachine.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = AlphaTargetMachine.cpp; sourceTree = "<group>"; };
 		DE66EEB008ABEE5E00323D32 /* AlphaTargetMachine.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = AlphaTargetMachine.h; sourceTree = "<group>"; };
 		DE66EECA08ABEE5E00323D32 /* CTargetMachine.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = CTargetMachine.h; sourceTree = "<group>"; };
-		DE66EEF808ABEE5E00323D32 /* IA64.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IA64.h; sourceTree = "<group>"; };
-		DE66EEF908ABEE5E00323D32 /* IA64.td */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = IA64.td; sourceTree = "<group>"; };
-		DE66EEFA08ABEE5E00323D32 /* IA64AsmPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = IA64AsmPrinter.cpp; sourceTree = "<group>"; };
-		DE66EF0108ABEE5E00323D32 /* IA64InstrBuilder.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IA64InstrBuilder.h; sourceTree = "<group>"; };
-		DE66EF0208ABEE5E00323D32 /* IA64InstrFormats.td */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = IA64InstrFormats.td; sourceTree = "<group>"; };
-		DE66EF0308ABEE5E00323D32 /* IA64InstrInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = IA64InstrInfo.cpp; sourceTree = "<group>"; };
-		DE66EF0408ABEE5E00323D32 /* IA64InstrInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IA64InstrInfo.h; sourceTree = "<group>"; };
-		DE66EF0508ABEE5E00323D32 /* IA64InstrInfo.td */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = IA64InstrInfo.td; sourceTree = "<group>"; };
-		DE66EF0708ABEE5E00323D32 /* IA64MachineFunctionInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IA64MachineFunctionInfo.h; sourceTree = "<group>"; };
-		DE66EF0808ABEE5E00323D32 /* IA64RegisterInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = IA64RegisterInfo.cpp; sourceTree = "<group>"; };
-		DE66EF0908ABEE5E00323D32 /* IA64RegisterInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IA64RegisterInfo.h; sourceTree = "<group>"; };
-		DE66EF0A08ABEE5E00323D32 /* IA64RegisterInfo.td */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = IA64RegisterInfo.td; sourceTree = "<group>"; };
-		DE66EF0B08ABEE5E00323D32 /* IA64TargetMachine.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = IA64TargetMachine.cpp; sourceTree = "<group>"; };
-		DE66EF0C08ABEE5E00323D32 /* IA64TargetMachine.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IA64TargetMachine.h; sourceTree = "<group>"; };
 		DE66EF0E08ABEE5E00323D32 /* README */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = README; sourceTree = "<group>"; };
 		DE66EF1008ABEE5E00323D32 /* TargetRegisterInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = TargetRegisterInfo.cpp; sourceTree = "<group>"; };
 		DE66F08A08ABEE6000323D32 /* Target.td */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = Target.td; sourceTree = "<group>"; };
@@ -1830,7 +1803,6 @@
 				DE66EE9708ABEE5D00323D32 /* Alpha */,
 				CF8F1BCF0B64FC8A00BB4199 /* ARM */,
 				DE66EEC908ABEE5E00323D32 /* CBackend */,
-				DE66EEE508ABEE5E00323D32 /* IA64 */,
 				9F7794120C73CB6100551F9C /* Mips */,
 				DE66EF1108ABEE5E00323D32 /* PowerPC */,
 				DE66EF7008ABEE5F00323D32 /* Sparc */,
@@ -1906,41 +1878,6 @@
 			path = CBackend;
 			sourceTree = "<group>";
 		};
-		DE66EEE508ABEE5E00323D32 /* IA64 */ = {
-			isa = PBXGroup;
-			children = (
-				CFA702C40A6FA8910006009A /* IA64GenAsmWriter.inc */,
-				CFA702C50A6FA8910006009A /* IA64GenDAGISel.inc */,
-				CFA702C60A6FA8910006009A /* IA64GenInstrInfo.inc */,
-				CFA702C70A6FA8910006009A /* IA64GenInstrNames.inc */,
-				CFA702C80A6FA8910006009A /* IA64GenRegisterInfo.h.inc */,
-				CFA702C90A6FA8910006009A /* IA64GenRegisterInfo.inc */,
-				CFA702CA0A6FA8910006009A /* IA64GenRegisterNames.inc */,
-				DE66EEF808ABEE5E00323D32 /* IA64.h */,
-				DE66EEF908ABEE5E00323D32 /* IA64.td */,
-				DE66EEFA08ABEE5E00323D32 /* IA64AsmPrinter.cpp */,
-				CF73C0B6098A53EF00627152 /* IA64Bundling.cpp */,
-				DE66EF0108ABEE5E00323D32 /* IA64InstrBuilder.h */,
-				DE66EF0208ABEE5E00323D32 /* IA64InstrFormats.td */,
-				DE66EF0308ABEE5E00323D32 /* IA64InstrInfo.cpp */,
-				DE66EF0408ABEE5E00323D32 /* IA64InstrInfo.h */,
-				DE66EF0508ABEE5E00323D32 /* IA64InstrInfo.td */,
-				CFC244BF0959F2E3009F8C47 /* IA64ISelDAGToDAG.cpp */,
-				CFC244C00959F2E3009F8C47 /* IA64ISelLowering.cpp */,
-				CFC244C10959F2E3009F8C47 /* IA64ISelLowering.h */,
-				DE66EF0708ABEE5E00323D32 /* IA64MachineFunctionInfo.h */,
-				DE66EF0808ABEE5E00323D32 /* IA64RegisterInfo.cpp */,
-				DE66EF0908ABEE5E00323D32 /* IA64RegisterInfo.h */,
-				DE66EF0A08ABEE5E00323D32 /* IA64RegisterInfo.td */,
-				CF341DE90AB07F890099B064 /* IA64TargetAsmInfo.cpp */,
-				CF341DE80AB07F890099B064 /* IA64TargetAsmInfo.h */,
-				DE66EF0B08ABEE5E00323D32 /* IA64TargetMachine.cpp */,
-				DE66EF0C08ABEE5E00323D32 /* IA64TargetMachine.h */,
-				DE66EF0E08ABEE5E00323D32 /* README */,
-			);
-			path = IA64;
-			sourceTree = "<group>";
-		};
 		DE66EF1108ABEE5E00323D32 /* PowerPC */ = {
 			isa = PBXGroup;
 			children = (
diff --git a/autoconf/AutoRegen.sh b/autoconf/AutoRegen.sh
index 3dca38673718..7809667ac5f1 100755
--- a/autoconf/AutoRegen.sh
+++ b/autoconf/AutoRegen.sh
@@ -1,33 +1,43 @@
-#!/bin/sh
-die () {
-	echo "$@" 1>&2
-	exit 1
+#!/bin/bash
+
+die() {
+  echo "$@" 1>&2
+  exit 1
+}
+
+clean() {
+  echo $1 | sed -e 's/\\//g'
 }
 
-### NOTE: ############################################################"
-### The below two variables specify the auto* versions
-### periods should be escaped with backslash, for use by grep
+### NOTE: ############################################################
+### These variables specify the tool versions we want to use.
+### Periods should be escaped with backslash for use by grep.
+###
+### If you update these, please also update docs/GettingStarted.html
 want_autoconf_version='2\.60'
 want_autoheader_version=$want_autoconf_version
-### END NOTE #########################################################"
-
+want_aclocal_version='1\.9\.6'
+want_libtool_version='1\.5\.22'
+### END NOTE #########################################################
 
 outfile=configure
 configfile=configure.ac
 
-want_autoconf_version_clean=`echo $want_autoconf_version | sed -e 's/\\\\//g'`
-want_autoheader_version_clean=`echo $want_autoheader_version | sed -e 's/\\\\//g'`
+want_autoconf_version_clean=$(clean $want_autoconf_version)
+want_autoheader_version_clean=$(clean $want_autoheader_version)
+want_aclocal_version_clean=$(clean $want_aclocal_version)
+want_libtool_version_clean=$(clean $want_libtool_version)
 
 test -d autoconf && test -f autoconf/$configfile && cd autoconf
 test -f $configfile || die "Can't find 'autoconf' dir; please cd into it first"
 autoconf --version | grep $want_autoconf_version > /dev/null
 test $? -eq 0 || die "Your autoconf was not detected as being $want_autoconf_version_clean"
-aclocal --version | grep '^aclocal.*1\.9\.6' > /dev/null
-test $? -eq 0 || die "Your aclocal was not detected as being 1.9.6"
+aclocal --version | grep '^aclocal.*'$want_aclocal_version > /dev/null
+test $? -eq 0 || die "Your aclocal was not detected as being $want_aclocal_version_clean"
 autoheader --version | grep '^autoheader.*'$want_autoheader_version > /dev/null
 test $? -eq 0 || die "Your autoheader was not detected as being $want_autoheader_version_clean"
-libtool --version | grep '1\.5\.22' > /dev/null
-test $? -eq 0 || die "Your libtool was not detected as being 1.5.22"
+libtool --version | grep $want_libtool_version > /dev/null
+test $? -eq 0 || die "Your libtool was not detected as being $want_libtool_version_clean"
 echo ""
 echo "### NOTE: ############################################################"
 echo "### If you get *any* warnings from autoconf below you MUST fix the"
@@ -37,7 +47,7 @@ echo "### commit any configure script that was generated with warnings"
 echo "### present. You should get just three 'Regenerating..' lines."
 echo "######################################################################"
 echo ""
-echo "Regenerating aclocal.m4 with aclocal 1.9.6"
+echo "Regenerating aclocal.m4 with aclocal $want_aclocal_version_clean"
 cwd=`pwd`
 aclocal --force -I $cwd/m4 || die "aclocal failed"
 echo "Regenerating configure with autoconf $want_autoconf_version_clean"
diff --git a/autoconf/config.guess b/autoconf/config.guess
index 7d0185e019ed..e792aac60807 100755
--- a/autoconf/config.guess
+++ b/autoconf/config.guess
@@ -1,9 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+#   Free Software Foundation, Inc.
 
-timestamp='2004-09-07'
+timestamp='2009-09-18'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -17,23 +18,25 @@ timestamp='2004-09-07'
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
 #
 # This script attempts to guess a canonical system name similar to
 # config.sub.  If it succeeds, it prints the system name on stdout, and
 # exits with 0.  Otherwise, it exits with 1.
 #
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
 
 me=`echo "$0" | sed -e 's,.*/,,'`
 
@@ -53,8 +56,8 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
-Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -66,11 +69,11 @@ Try \`$me --help' for more information."
 while test $# -gt 0 ; do
   case $1 in
     --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit 0 ;;
+       echo "$timestamp" ; exit ;;
     --version | -v )
-       echo "$version" ; exit 0 ;;
+       echo "$version" ; exit ;;
     --help | --h* | -h )
-       echo "$usage"; exit 0 ;;
+       echo "$usage"; exit ;;
     -- )     # Stop option processing
        shift; break ;;
     - )	# Use stdin as input.
@@ -104,7 +107,7 @@ set_cc_for_build='
 trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
 trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
 : ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
  { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
  { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
  { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
@@ -123,7 +126,7 @@ case $CC_FOR_BUILD,$HOST_CC,$CC in
 	;;
  ,,*)   CC_FOR_BUILD=$CC ;;
  ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-esac ;'
+esac ; set_cc_for_build= ;'
 
 # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
 # (ghazi@noc.rutgers.edu 1994-08-24)
@@ -158,6 +161,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
 	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
@@ -166,7 +170,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep __ELF__ >/dev/null
+			| grep -q __ELF__
 		then
 		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
 		    # Return netbsd for either.  FIX?
@@ -196,55 +200,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
 	echo "${machine}-${os}${release}"
-	exit 0 ;;
-    amd64:OpenBSD:*:*)
-	echo x86_64-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    amiga:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    cats:OpenBSD:*:*)
-	echo arm-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    hp300:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    luna88k:OpenBSD:*:*)
-    	echo m88k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mac68k:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    macppc:OpenBSD:*:*)
-	echo powerpc-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvme68k:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvme88k:OpenBSD:*:*)
-	echo m88k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvmeppc:OpenBSD:*:*)
-	echo powerpc-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sgi:OpenBSD:*:*)
-	echo mips64-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sun3:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:OpenBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+	exit ;;
     *:ekkoBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
+    *:SolidBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+	exit ;;
     macppc:MirBSD:*:*)
-	echo powerppc-unknown-mirbsd${UNAME_RELEASE}
-	exit 0 ;;
+	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+	exit ;;
     *:MirBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
@@ -297,37 +269,43 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit 0 ;;
+	exit ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
 	# of the specific Alpha model?
 	echo alpha-pc-interix
-	exit 0 ;;
+	exit ;;
     21064:Windows_NT:50:3)
 	echo alpha-dec-winnt3.5
-	exit 0 ;;
+	exit ;;
     Amiga*:UNIX_System_V:4.0:*)
 	echo m68k-unknown-sysv4
-	exit 0;;
+	exit ;;
     *:[Aa]miga[Oo][Ss]:*:*)
 	echo ${UNAME_MACHINE}-unknown-amigaos
-	exit 0 ;;
+	exit ;;
     *:[Mm]orph[Oo][Ss]:*:*)
 	echo ${UNAME_MACHINE}-unknown-morphos
-	exit 0 ;;
+	exit ;;
     *:OS/390:*:*)
 	echo i370-ibm-openedition
-	exit 0 ;;
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
     *:OS400:*:*)
         echo powerpc-ibm-os400
-	exit 0 ;;
+	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
-	exit 0;;
+	exit ;;
+    arm:riscos:*:*|arm:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
 	echo hppa1.1-hitachi-hiuxmpp
-	exit 0;;
+	exit ;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
 	if test "`(/bin/universe) 2>/dev/null`" = att ; then
@@ -335,32 +313,48 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	else
 		echo pyramid-pyramid-bsd
 	fi
-	exit 0 ;;
+	exit ;;
     NILE*:*:*:dcosx)
 	echo pyramid-pyramid-svr4
-	exit 0 ;;
+	exit ;;
     DRS?6000:unix:4.0:6*)
 	echo sparc-icl-nx6
-	exit 0 ;;
-    DRS?6000:UNIX_SV:4.2*:7*)
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
 	case `/usr/bin/uname -p` in
-	    sparc) echo sparc-icl-nx7 && exit 0 ;;
+	    sparc) echo sparc-icl-nx7; exit ;;
 	esac ;;
+    s390x:SunOS:*:*)
+	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
     sun4H:SunOS:5.*:*)
 	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
 	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
-    i86pc:SunOS:5.*:*)
-	echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval $set_cc_for_build
+	SUN_ARCH="i386"
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH="x86_64"
+	    fi
+	fi
+	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
 	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
     sun4*:SunOS:*:*)
 	case "`/usr/bin/arch -k`" in
 	    Series*|S4*)
@@ -369,10 +363,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	esac
 	# Japanese Language versions have a version number like `4.1.3-JL'.
 	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
-	exit 0 ;;
+	exit ;;
     sun3*:SunOS:*:*)
 	echo m68k-sun-sunos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     sun*:*:4.2BSD:*)
 	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
 	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
@@ -384,10 +378,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		echo sparc-sun-sunos${UNAME_RELEASE}
 		;;
 	esac
-	exit 0 ;;
+	exit ;;
     aushp:SunOS:*:*)
 	echo sparc-auspex-sunos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
     # "atarist" or "atariste" at least should have a processor
@@ -398,40 +392,40 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
         echo m68k-atari-mint${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit 0 ;;
+        exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
         echo m68k-atari-mint${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
         echo m68k-milan-mint${UNAME_RELEASE}
-        exit 0 ;;
+        exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
         echo m68k-hades-mint${UNAME_RELEASE}
-        exit 0 ;;
+        exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
         echo m68k-unknown-mint${UNAME_RELEASE}
-        exit 0 ;;
+        exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     powerpc:machten:*:*)
 	echo powerpc-apple-machten${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     RISC*:Mach:*:*)
 	echo mips-dec-mach_bsd4.3
-	exit 0 ;;
+	exit ;;
     RISC*:ULTRIX:*:*)
 	echo mips-dec-ultrix${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     VAX*:ULTRIX*:*:*)
 	echo vax-dec-ultrix${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
 	echo clipper-intergraph-clix${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
@@ -455,32 +449,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	  exit (-1);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c \
-	  && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
-	  && exit 0
+	$CC_FOR_BUILD -o $dummy $dummy.c &&
+	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`$dummy $dummyarg` &&
+	    { echo "$SYSTEM_NAME"; exit; }
 	echo mips-mips-riscos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     Motorola:PowerMAX_OS:*:*)
 	echo powerpc-motorola-powermax
-	exit 0 ;;
+	exit ;;
     Motorola:*:4.3:PL8-*)
 	echo powerpc-harris-powermax
-	exit 0 ;;
+	exit ;;
     Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
 	echo powerpc-harris-powermax
-	exit 0 ;;
+	exit ;;
     Night_Hawk:Power_UNIX:*:*)
 	echo powerpc-harris-powerunix
-	exit 0 ;;
+	exit ;;
     m88k:CX/UX:7*:*)
 	echo m88k-harris-cxux7
-	exit 0 ;;
+	exit ;;
     m88k:*:4*:R4*)
 	echo m88k-motorola-sysv4
-	exit 0 ;;
+	exit ;;
     m88k:*:3*:R3*)
 	echo m88k-motorola-sysv3
-	exit 0 ;;
+	exit ;;
     AViiON:dgux:*:*)
         # DG/UX returns AViiON for all architectures
         UNAME_PROCESSOR=`/usr/bin/uname -p`
@@ -496,29 +491,29 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit 0 ;;
+ 	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
-	exit 0 ;;
+	exit ;;
     M88*:*:R3*:*)
 	# Delta 88k system running SVR3
 	echo m88k-motorola-sysv3
-	exit 0 ;;
+	exit ;;
     XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
 	echo m88k-tektronix-sysv3
-	exit 0 ;;
+	exit ;;
     Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
 	echo m68k-tektronix-bsd
-	exit 0 ;;
+	exit ;;
     *:IRIX*:*:*)
 	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
-	exit 0 ;;
+	exit ;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
-	exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
     i*86:AIX:*:*)
 	echo i386-ibm-aix
-	exit 0 ;;
+	exit ;;
     ia64:AIX:*:*)
 	if [ -x /usr/bin/oslevel ] ; then
 		IBM_REV=`/usr/bin/oslevel`
@@ -526,7 +521,7 @@ EOF
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
 	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
-	exit 0 ;;
+	exit ;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
 		eval $set_cc_for_build
@@ -541,15 +536,19 @@ EOF
 			exit(0);
 			}
 EOF
-		$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
-		echo rs6000-ibm-aix3.2.5
+		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
 	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
 		echo rs6000-ibm-aix3.2.4
 	else
 		echo rs6000-ibm-aix3.2
 	fi
-	exit 0 ;;
-    *:AIX:*:[45])
+	exit ;;
+    *:AIX:*:[456])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -562,28 +561,28 @@ EOF
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
 	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
-	exit 0 ;;
+	exit ;;
     *:AIX:*:*)
 	echo rs6000-ibm-aix
-	exit 0 ;;
+	exit ;;
     ibmrt:4.4BSD:*|romp-ibm:BSD:*)
 	echo romp-ibm-bsd4.4
-	exit 0 ;;
+	exit ;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
 	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
-	exit 0 ;;                           # report: romp-ibm BSD 4.3
+	exit ;;                             # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
 	echo rs6000-bull-bosx
-	exit 0 ;;
+	exit ;;
     DPX/2?00:B.O.S.:*:*)
 	echo m68k-bull-sysv3
-	exit 0 ;;
+	exit ;;
     9000/[34]??:4.3bsd:1.*:*)
 	echo m68k-hp-bsd
-	exit 0 ;;
+	exit ;;
     hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
 	echo m68k-hp-bsd4.4
-	exit 0 ;;
+	exit ;;
     9000/[34678]??:HP-UX:*:*)
 	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
 	case "${UNAME_MACHINE}" in
@@ -645,9 +644,19 @@ EOF
 	esac
 	if [ ${HP_ARCH} = "hppa2.0w" ]
 	then
-	    # avoid double evaluation of $set_cc_for_build
-	    test -n "$CC_FOR_BUILD" || eval $set_cc_for_build
-	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null
+	    eval $set_cc_for_build
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
 	    then
 		HP_ARCH="hppa2.0w"
 	    else
@@ -655,11 +664,11 @@ EOF
 	    fi
 	fi
 	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
-	exit 0 ;;
+	exit ;;
     ia64:HP-UX:*:*)
 	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
 	echo ia64-hp-hpux${HPUX_REV}
-	exit 0 ;;
+	exit ;;
     3050*:HI-UX:*:*)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
@@ -687,216 +696,244 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
+	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+		{ echo "$SYSTEM_NAME"; exit; }
 	echo unknown-hitachi-hiuxwe2
-	exit 0 ;;
+	exit ;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
 	echo hppa1.1-hp-bsd
-	exit 0 ;;
+	exit ;;
     9000/8??:4.3bsd:*:*)
 	echo hppa1.0-hp-bsd
-	exit 0 ;;
+	exit ;;
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
 	echo hppa1.0-hp-mpeix
-	exit 0 ;;
+	exit ;;
     hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
 	echo hppa1.1-hp-osf
-	exit 0 ;;
+	exit ;;
     hp8??:OSF1:*:*)
 	echo hppa1.0-hp-osf
-	exit 0 ;;
+	exit ;;
     i*86:OSF1:*:*)
 	if [ -x /usr/sbin/sysversion ] ; then
 	    echo ${UNAME_MACHINE}-unknown-osf1mk
 	else
 	    echo ${UNAME_MACHINE}-unknown-osf1
 	fi
-	exit 0 ;;
+	exit ;;
     parisc*:Lites*:*:*)
 	echo hppa1.1-hp-lites
-	exit 0 ;;
+	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit 0 ;;
+        exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit 0 ;;
+        exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit 0 ;;
+        exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit 0 ;;
+        exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit 0 ;;
+        exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*[A-Z]90:*:*:*)
 	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
 	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
 	      -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*TS:*:*:*)
 	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*T3E:*:*:*)
 	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*SV1:*:*:*)
 	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     *:UNICOS/mp:*:*)
 	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
         FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
         FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
         echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit 0 ;;
+        exit ;;
     5000:UNIX_System_V:4.*:*)
         FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
         FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
         echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit 0 ;;
+	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     sparc*:BSD/OS:*:*)
 	echo sparc-unknown-bsdi${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:BSD/OS:*:*)
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:FreeBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
-	exit 0 ;;
+	case ${UNAME_MACHINE} in
+	    pc98)
+		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    amd64)
+		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    *)
+		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	esac
+	exit ;;
     i*:CYGWIN*:*)
 	echo ${UNAME_MACHINE}-pc-cygwin
-	exit 0 ;;
-    i*:MINGW*:*)
+	exit ;;
+    *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
-	exit 0 ;;
+	exit ;;
+    i*:windows32*:*)
+    	# uname -m includes "-pc" on this system.
+    	echo ${UNAME_MACHINE}-mingw32
+	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
-	exit 0 ;;
-    x86:Interix*:[34]*)
-	echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//'
-	exit 0 ;;
+	exit ;;
+    *:Interix*:[3456]*)
+    	case ${UNAME_MACHINE} in
+	    x86)
+		echo i586-pc-interix${UNAME_RELEASE}
+		exit ;;
+	    EM64T | authenticamd | genuineintel)
+		echo x86_64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	esac ;;
     [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
 	echo i${UNAME_MACHINE}-pc-mks
-	exit 0 ;;
+	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
     i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
 	# UNAME_MACHINE based on the output of uname instead of i386?
 	echo i586-pc-interix
-	exit 0 ;;
+	exit ;;
     i*:UWIN*:*)
 	echo ${UNAME_MACHINE}-pc-uwin
-	exit 0 ;;
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
     p*:CYGWIN*:*)
 	echo powerpcle-unknown-cygwin
-	exit 0 ;;
+	exit ;;
     prep*:SunOS:5.*:*)
 	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
     *:GNU:*:*)
 	# the GNU system
 	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
-	exit 0 ;;
+	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
 	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
-	exit 0 ;;
+	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
-	exit 0 ;;
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+        esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit ;;
     arm*:Linux:*:*)
+	eval $set_cc_for_build
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo ${UNAME_MACHINE}-unknown-linux-gnu
+	else
+	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     cris:Linux:*:*)
 	echo cris-axis-linux-gnu
-	exit 0 ;;
+	exit ;;
     crisv32:Linux:*:*)
 	echo crisv32-axis-linux-gnu
-	exit 0 ;;
+	exit ;;
     frv:Linux:*:*)
     	echo frv-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
+    i*86:Linux:*:*)
+	echo ${UNAME_MACHINE}-pc-linux-gnu
+	exit ;;
     ia64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     m32r*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     m68*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
-    mips:Linux:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#undef CPU
-	#undef mips
-	#undef mipsel
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mipsel
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips
-	#else
-	CPU=
-	#endif
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
-	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
-	;;
-    mips64:Linux:*:*)
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
 	#undef CPU
-	#undef mips64
-	#undef mips64el
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
 	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mips64el
+	CPU=${UNAME_MACHINE}el
 	#else
 	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips64
+	CPU=${UNAME_MACHINE}
 	#else
 	CPU=
 	#endif
 	#endif
 EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
-	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+	    /^CPU/{
+		s: ::g
+		p
+	    }'`"
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
-	exit 0 ;;
-    ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
-	exit 0 ;;
-    alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
-	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
-	exit 0 ;;
+    or32:Linux:*:*)
+	echo or32-unknown-linux-gnu
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-gnu
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
@@ -904,87 +941,40 @@ EOF
 	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
 	  *)    echo hppa-unknown-linux-gnu ;;
 	esac
-	exit 0 ;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
+	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux
-	exit 0 ;;
+	exit ;;
     sh64*:Linux:*:*)
     	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
+    vax:Linux:*:*)
+	echo ${UNAME_MACHINE}-dec-linux-gnu
+	exit ;;
     x86_64:Linux:*:*)
 	echo x86_64-unknown-linux-gnu
-	exit 0 ;;
-    i*86:Linux:*:*)
-	# The BFD linker knows what the default object file format is, so
-	# first see if it will tell us. cd to the root directory to prevent
-	# problems with other programs or directories called `ld' in the path.
-	# Set LC_ALL=C to ensure ld outputs messages in English.
-	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
-			 | sed -ne '/supported targets:/!d
-				    s/[ 	][ 	]*/ /g
-				    s/.*supported targets: *//
-				    s/ .*//
-				    p'`
-        case "$ld_supported_targets" in
-	  elf32-i386)
-		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
-		;;
-	  a.out-i386-linux)
-		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
-		exit 0 ;;
-	  coff-i386)
-		echo "${UNAME_MACHINE}-pc-linux-gnucoff"
-		exit 0 ;;
-	  "")
-		# Either a pre-BFD a.out linker (linux-gnuoldld) or
-		# one that does not give us useful --help.
-		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
-		exit 0 ;;
-	esac
-	# Determine whether the default compiler is a.out or elf
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#include <features.h>
-	#ifdef __ELF__
-	# ifdef __GLIBC__
-	#  if __GLIBC__ >= 2
-	LIBC=gnu
-	#  else
-	LIBC=gnulibc1
-	#  endif
-	# else
-	LIBC=gnulibc1
-	# endif
-	#else
-	#ifdef __INTEL_COMPILER
-	LIBC=gnu
-	#else
-	LIBC=gnuaout
-	#endif
-	#endif
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
-	test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
-	test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
-	;;
+	exit ;;
+    xtensa*:Linux:*:*)
+    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
 	# sysname and nodename.
 	echo i386-sequent-sysv4
-	exit 0 ;;
+	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
         # Unixware is an offshoot of SVR4, but it has its own version
         # number series starting with 2...
@@ -992,27 +982,27 @@ EOF
 	# I just have to hope.  -- rms.
         # Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
-	exit 0 ;;
+	exit ;;
     i*86:OS/2:*:*)
 	# If we were able to find `uname', then EMX Unix compatibility
 	# is probably installed.
 	echo ${UNAME_MACHINE}-pc-os2-emx
-	exit 0 ;;
+	exit ;;
     i*86:XTS-300:*:STOP)
 	echo ${UNAME_MACHINE}-unknown-stop
-	exit 0 ;;
+	exit ;;
     i*86:atheos:*:*)
 	echo ${UNAME_MACHINE}-unknown-atheos
-	exit 0 ;;
-	i*86:syllable:*:*)
+	exit ;;
+    i*86:syllable:*:*)
 	echo ${UNAME_MACHINE}-pc-syllable
-	exit 0 ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
 	echo i386-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     i*86:*DOS:*:*)
 	echo ${UNAME_MACHINE}-pc-msdosdjgpp
-	exit 0 ;;
+	exit ;;
     i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
 	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
@@ -1020,15 +1010,16 @@ EOF
 	else
 		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
 	fi
-	exit 0 ;;
-    i*86:*:5:[78]*)
+	exit ;;
+    i*86:*:5:[678]*)
+    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
 	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
-	exit 0 ;;
+	exit ;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
 		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
@@ -1046,73 +1037,86 @@ EOF
 	else
 		echo ${UNAME_MACHINE}-pc-sysv32
 	fi
-	exit 0 ;;
+	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
         # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i386.
-	echo i386-pc-msdosdjgpp
-        exit 0 ;;
+        # the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configury will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+        exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
-	exit 0 ;;
+	exit ;;
     paragon:*:*:*)
 	echo i860-intel-osf1
-	exit 0 ;;
+	exit ;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
 	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
 	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
 	fi
-	exit 0 ;;
+	exit ;;
     mini*:CTIX:SYS*5:*)
 	# "miniframe"
 	echo m68010-convergent-sysv
-	exit 0 ;;
+	exit ;;
     mc68k:UNIX:SYSTEM5:3.51m)
 	echo m68k-convergent-sysv
-	exit 0 ;;
+	exit ;;
     M680?0:D-NIX:5.3:*)
 	echo m68k-diab-dnix
-	exit 0 ;;
+	exit ;;
     M68*:*:R3V[5678]*:*)
-	test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
     3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
 	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
         /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && echo i486-ncr-sysv4 && exit 0 ;;
+          && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
 	echo m68k-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     mc68030:UNIX_System_V:4.*:*)
 	echo m68k-atari-sysv4
-	exit 0 ;;
+	exit ;;
     TSUNAMI:LynxOS:2.*:*)
 	echo sparc-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     rs6000:LynxOS:2.*:*)
 	echo rs6000-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
 	echo powerpc-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     SM[BE]S:UNIX_SV:*:*)
 	echo mips-dde-sysv${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     RM*:ReliantUNIX-*:*:*)
 	echo mips-sni-sysv4
-	exit 0 ;;
+	exit ;;
     RM*:SINIX-*:*:*)
 	echo mips-sni-sysv4
-	exit 0 ;;
+	exit ;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
 		UNAME_MACHINE=`(uname -p) 2>/dev/null`
@@ -1120,69 +1124,94 @@ EOF
 	else
 		echo ns32k-sni-sysv
 	fi
-	exit 0 ;;
+	exit ;;
     PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
                       # says <Richard.M.Bartel@ccMail.Census.GOV>
         echo i586-unisys-sysv4
-        exit 0 ;;
+        exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
 	echo hppa1.1-stratus-sysv4
-	exit 0 ;;
+	exit ;;
     *:*:*:FTX*)
 	# From seanf@swdc.stratus.com.
 	echo i860-stratus-sysv4
-	exit 0 ;;
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo ${UNAME_MACHINE}-stratus-vos
+	exit ;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
 	echo hppa1.1-stratus-vos
-	exit 0 ;;
+	exit ;;
     mc68*:A/UX:*:*)
 	echo m68k-apple-aux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     news*:NEWS-OS:6*:*)
 	echo mips-sony-newsos6
-	exit 0 ;;
+	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
 	        echo mips-nec-sysv${UNAME_RELEASE}
 	else
 	        echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit 0 ;;
+        exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
-	exit 0 ;;
+	exit ;;
     BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
 	echo powerpc-apple-beos
-	exit 0 ;;
+	exit ;;
     BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
 	echo i586-pc-beos
-	exit 0 ;;
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
     SX-4:SUPER-UX:*:*)
 	echo sx4-nec-superux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     SX-5:SUPER-UX:*:*)
 	echo sx5-nec-superux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     SX-6:SUPER-UX:*:*)
 	echo sx6-nec-superux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux${UNAME_RELEASE}
+	exit ;;
     Power*:Rhapsody:*:*)
 	echo powerpc-apple-rhapsody${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:Rhapsody:*:*)
 	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:Darwin:*:*)
 	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
 	case $UNAME_PROCESSOR in
-	    *86) UNAME_PROCESSOR=i686 ;;
+	    i386)
+		eval $set_cc_for_build
+		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		      grep IS_64BIT_ARCH >/dev/null
+		  then
+		      UNAME_PROCESSOR="x86_64"
+		  fi
+		fi ;;
 	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
 	UNAME_PROCESSOR=`uname -p`
 	if test "$UNAME_PROCESSOR" = "x86"; then
@@ -1190,22 +1219,25 @@ EOF
 		UNAME_MACHINE=pc
 	fi
 	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:QNX:*:4*)
 	echo i386-pc-qnx
-	exit 0 ;;
+	exit ;;
+    NSE-?:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSR-?:NONSTOP_KERNEL:*:*)
 	echo nsr-tandem-nsk${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:NonStop-UX:*:*)
 	echo mips-compaq-nonstopux
-	exit 0 ;;
+	exit ;;
     BS2000:POSIX*:*:*)
 	echo bs2000-siemens-sysv
-	exit 0 ;;
+	exit ;;
     DS/*:UNIX_System_V:*:*)
 	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
@@ -1216,38 +1248,50 @@ EOF
 	    UNAME_MACHINE="$cputype"
 	fi
 	echo ${UNAME_MACHINE}-unknown-plan9
-	exit 0 ;;
+	exit ;;
     *:TOPS-10:*:*)
 	echo pdp10-unknown-tops10
-	exit 0 ;;
+	exit ;;
     *:TENEX:*:*)
 	echo pdp10-unknown-tenex
-	exit 0 ;;
+	exit ;;
     KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
 	echo pdp10-dec-tops20
-	exit 0 ;;
+	exit ;;
     XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
 	echo pdp10-xkl-tops20
-	exit 0 ;;
+	exit ;;
     *:TOPS-20:*:*)
 	echo pdp10-unknown-tops20
-	exit 0 ;;
+	exit ;;
     *:ITS:*:*)
 	echo pdp10-unknown-its
-	exit 0 ;;
+	exit ;;
     SEI:*:*:SEIUX)
         echo mips-sei-seiux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
-	exit 0 ;;
+	exit ;;
     *:*VMS:*:*)
     	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
-	    A*) echo alpha-dec-vms && exit 0 ;;
-	    I*) echo ia64-dec-vms && exit 0 ;;
-	    V*) echo vax-dec-vms && exit 0 ;;
-	esac
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+	exit ;;
+    i*86:rdos:*:*)
+	echo ${UNAME_MACHINE}-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo ${UNAME_MACHINE}-pc-aros
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1279,7 +1323,7 @@ main ()
 #endif
 
 #if defined (__arm) && defined (__acorn) && defined (__unix)
-  printf ("arm-acorn-riscix"); exit (0);
+  printf ("arm-acorn-riscix\n"); exit (0);
 #endif
 
 #if defined (hp300) && !defined (hpux)
@@ -1368,11 +1412,12 @@ main ()
 }
 EOF
 
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+	{ echo "$SYSTEM_NAME"; exit; }
 
 # Apollos put the system type in the environment.
 
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
 
 # Convex versions that predate uname can use getsysinfo(1)
 
@@ -1381,22 +1426,22 @@ then
     case `getsysinfo -f cpu_type` in
     c1*)
 	echo c1-convex-bsd
-	exit 0 ;;
+	exit ;;
     c2*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-	exit 0 ;;
+	exit ;;
     c34*)
 	echo c34-convex-bsd
-	exit 0 ;;
+	exit ;;
     c38*)
 	echo c38-convex-bsd
-	exit 0 ;;
+	exit ;;
     c4*)
 	echo c4-convex-bsd
-	exit 0 ;;
+	exit ;;
     esac
 fi
 
@@ -1407,7 +1452,9 @@ This script, last modified $timestamp, has failed to recognize
 the operating system you are using. It is advised that you
 download the most up to date version of the config scripts from
 
-    ftp://ftp.gnu.org/pub/gnu/config/
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+and
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
 
 If the version you run ($0) is already up to date, please
 send the following data and any information you think might be
diff --git a/autoconf/config.sub b/autoconf/config.sub
index edb6b663ca2b..8ca084bf3340 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -1,9 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+#   Free Software Foundation, Inc.
 
-timestamp='2004-08-29'
+timestamp='2009-08-19'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -21,22 +22,26 @@ timestamp='2004-08-29'
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
-
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
+
 # Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+# diff and a properly formatted GNU ChangeLog entry.
 #
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
 # If it is invalid, we print an error message on stderr and exit with code 1.
 # Otherwise, we print the canonical config type on stdout and succeed.
 
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
 # that are meaningful with *any* GNU software.
@@ -70,8 +75,8 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
-Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -83,11 +88,11 @@ Try \`$me --help' for more information."
 while test $# -gt 0 ; do
   case $1 in
     --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit 0 ;;
+       echo "$timestamp" ; exit ;;
     --version | -v )
-       echo "$version" ; exit 0 ;;
+       echo "$version" ; exit ;;
     --help | --h* | -h )
-       echo "$usage"; exit 0 ;;
+       echo "$usage"; exit ;;
     -- )     # Stop option processing
        shift; break ;;
     - )	# Use stdin as input.
@@ -99,7 +104,7 @@ while test $# -gt 0 ; do
     *local*)
        # First pass through any local machine types.
        echo $1
-       exit 0;;
+       exit ;;
 
     * )
        break ;;
@@ -118,8 +123,10 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
-  kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
+  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
@@ -145,10 +152,13 @@ case $os in
 	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
 	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
 	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis | -knuth | -cray)
+	-apple | -axis | -knuth | -cray | -microblaze)
 		os=
 		basic_machine=$1
 		;;
+        -bluegene*)
+	        os=-cnk
+		;;
 	-sim | -cisco | -oki | -wec | -winbond)
 		os=
 		basic_machine=$1
@@ -170,6 +180,10 @@ case $os in
 	-hiux*)
 		os=-hiuxwe2
 		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
 	-sco5)
 		os=-sco3.2v5
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@@ -186,6 +200,10 @@ case $os in
 		# Don't forget version if it is 3.2v4 or newer.
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
 		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
 	-sco*)
 		os=-sco3.2v2
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@@ -230,22 +248,28 @@ case $basic_machine in
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
-	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
-	| fr30 | frv \
+	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
-	| m32r | m32rle | m68000 | m68k | m88k | mcore \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | mcore | mep | metag \
 	| mips | mipsbe | mipseb | mipsel | mipsle \
 	| mips16 \
 	| mips64 | mips64el \
-	| mips64vr | mips64vrel \
+	| mips64octeon | mips64octeonel \
 	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
 	| mips64vr4100 | mips64vr4100el \
 	| mips64vr4300 | mips64vr4300el \
 	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
 	| mipsisa32 | mipsisa32el \
 	| mipsisa32r2 | mipsisa32r2el \
 	| mipsisa64 | mipsisa64el \
@@ -254,21 +278,26 @@ case $basic_machine in
 	| mipsisa64sr71k | mipsisa64sr71kel \
 	| mipstx39 | mipstx39el \
 	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
 	| msp430 \
+	| nios | nios2 \
 	| ns16k | ns32k \
-	| openrisc | or32 \
+	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
 	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
 	| pyramid \
-	| sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
-	| sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv8 | sparcv9 | sparcv9b \
-	| strongarm \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu | strongarm \
 	| tahoe | thumb | tic4x | tic80 | tron \
 	| v850 | v850e \
 	| we32k \
-	| x86 | xscale | xstormy16 | xtensa \
-	| z8k)
+	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
+	| z8k | z80)
 		basic_machine=$basic_machine-unknown
 		;;
 	m6811 | m68hc11 | m6812 | m68hc12)
@@ -278,6 +307,9 @@ case $basic_machine in
 		;;
 	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
 		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
 
 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
@@ -297,28 +329,32 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
-	| avr-* \
-	| bs2000-* \
+	| avr-* | avr32-* \
+	| bfin-* | bs2000-* \
 	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
 	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
-	| f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
-	| m32r-* | m32rle-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | mcore-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
 	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
 	| mips16-* \
 	| mips64-* | mips64el-* \
-	| mips64vr-* | mips64vrel-* \
+	| mips64octeon-* | mips64octeonel-* \
 	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
 	| mips64vr4100-* | mips64vr4100el-* \
 	| mips64vr4300-* | mips64vr4300el-* \
 	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
 	| mipsisa32-* | mipsisa32el-* \
 	| mipsisa32r2-* | mipsisa32r2el-* \
 	| mipsisa64-* | mipsisa64el-* \
@@ -327,26 +363,33 @@ case $basic_machine in
 	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
 	| mipstx39-* | mipstx39el-* \
 	| mmix-* \
+	| mt-* \
 	| msp430-* \
+	| nios-* | nios2-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
 	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
 	| pyramid-* \
 	| romp-* | rs6000-* \
-	| sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
-	| sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
 	| tahoe-* | thumb-* \
-	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \
 	| tron-* \
 	| v850-* | v850e-* | vax-* \
 	| we32k-* \
-	| x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
-	| xtensa-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
+	| xstormy16-* | xtensa*-* \
 	| ymp-* \
-	| z8k-*)
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
 		;;
 	# Recognize the various machine names and aliases which stand
 	# for a CPU type and a company and sometimes even an OS.
@@ -410,6 +453,10 @@ case $basic_machine in
 		basic_machine=m68k-apollo
 		os=-bsd
 		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
 	aux)
 		basic_machine=m68k-apple
 		os=-aux
@@ -418,10 +465,26 @@ case $basic_machine in
 		basic_machine=ns32k-sequent
 		os=-dynix
 		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
 	c90)
 		basic_machine=c90-cray
 		os=-unicos
 		;;
+        cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
 	convex-c1)
 		basic_machine=c1-convex
 		os=-bsd
@@ -450,8 +513,8 @@ case $basic_machine in
 		basic_machine=craynv-cray
 		os=-unicosmp
 		;;
-	cr16c)
-		basic_machine=cr16c-unknown
+	cr16)
+		basic_machine=cr16-unknown
 		os=-elf
 		;;
 	crds | unos)
@@ -489,6 +552,14 @@ case $basic_machine in
 		basic_machine=m88k-motorola
 		os=-sysv3
 		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
 	dpx20 | dpx20-*)
 		basic_machine=rs6000-bull
 		os=-bosx
@@ -639,6 +710,14 @@ case $basic_machine in
 		basic_machine=m68k-isi
 		os=-sysv
 		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
 	m88k-omron*)
 		basic_machine=m88k-omron
 		;;
@@ -650,10 +729,17 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
+        microblaze)
+		basic_machine=microblaze-xilinx
+		;;
 	mingw32)
 		basic_machine=i386-pc
 		os=-mingw32
 		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
 	miniframe)
 		basic_machine=m68000-convergent
 		;;
@@ -679,6 +765,9 @@ case $basic_machine in
 		basic_machine=i386-pc
 		os=-msdos
 		;;
+	ms1-*)
+		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+		;;
 	mvs)
 		basic_machine=i370-ibm
 		os=-mvs
@@ -754,9 +843,8 @@ case $basic_machine in
 		basic_machine=hppa1.1-oki
 		os=-proelf
 		;;
-	or32 | or32-*)
+	openrisc | openrisc-*)
 		basic_machine=or32-unknown
-		os=-coff
 		;;
 	os400)
 		basic_machine=powerpc-ibm
@@ -778,6 +866,14 @@ case $basic_machine in
 		basic_machine=i860-intel
 		os=-osf
 		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
 	pbd)
 		basic_machine=sparc-tti
 		;;
@@ -787,6 +883,12 @@ case $basic_machine in
 	pc532 | pc532-*)
 		basic_machine=ns32k-pc532
 		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	pentium | p5 | k5 | k6 | nexgen | viac3)
 		basic_machine=i586-pc
 		;;
@@ -843,6 +945,10 @@ case $basic_machine in
 		basic_machine=i586-unknown
 		os=-pw32
 		;;
+	rdos)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
 	rom68k)
 		basic_machine=m68k-rom68k
 		os=-coff
@@ -869,6 +975,10 @@ case $basic_machine in
 	sb1el)
 		basic_machine=mipsisa64sb1el-unknown
 		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
 	sei)
 		basic_machine=mips-sei
 		os=-seiux
@@ -880,6 +990,9 @@ case $basic_machine in
 		basic_machine=sh-hitachi
 		os=-hms
 		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
 	sh64)
 		basic_machine=sh64-unknown
 		;;
@@ -969,6 +1082,10 @@ case $basic_machine in
 		basic_machine=tic6x-unknown
 		os=-coff
 		;;
+	tile*)
+		basic_machine=tile-unknown
+		os=-linux-gnu
+		;;
 	tx39)
 		basic_machine=mipstx39-unknown
 		;;
@@ -1029,6 +1146,10 @@ case $basic_machine in
 		basic_machine=hppa1.1-winbond
 		os=-proelf
 		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
 	xps | xps100)
 		basic_machine=xps100-honeywell
 		;;
@@ -1040,6 +1161,10 @@ case $basic_machine in
 		basic_machine=z8k-unknown
 		os=-sim
 		;;
+	z80-*-coff)
+		basic_machine=z80-unknown
+		os=-sim
+		;;
 	none)
 		basic_machine=none-none
 		os=-none
@@ -1078,13 +1203,10 @@ case $basic_machine in
 	we32k)
 		basic_machine=we32k-att
 		;;
-	sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele)
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
 		basic_machine=sh-unknown
 		;;
-	sh64)
-		basic_machine=sh64-unknown
-		;;
-	sparc | sparcv8 | sparcv9 | sparcv9b)
+	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
 		basic_machine=sparc-sun
 		;;
 	cydra)
@@ -1151,26 +1273,30 @@ case $os in
 	# Each alternative MUST END IN A *, to match a version number.
 	# -sysv* is not here because it comes later, after sysvr4.
 	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
 	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -kopensolaris* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* \
+	      | -aos* | -aros* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
 	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -openbsd* | -solidbsd* \
 	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
 	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* \
+	      | -chorusos* | -chorusrdb* | -cegcc* \
 	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@@ -1188,7 +1314,7 @@ case $os in
 		os=`echo $os | sed -e 's|nto|nto-qnx|'`
 		;;
 	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
-	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
 	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
 		;;
 	-mac*)
@@ -1297,6 +1423,12 @@ case $os in
 	-kaos*)
 		os=-kaos
 		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
 	-none)
 		;;
 	*)
@@ -1319,6 +1451,12 @@ else
 # system, and we'll never get to this point.
 
 case $basic_machine in
+        score-*)
+		os=-elf
+		;;
+        spu-*)
+		os=-elf
+		;;
 	*-acorn)
 		os=-riscix1.2
 		;;
@@ -1328,9 +1466,9 @@ case $basic_machine in
 	arm*-semi)
 		os=-aout
 		;;
-    c4x-* | tic4x-*)
-        os=-coff
-        ;;
+        c4x-* | tic4x-*)
+        	os=-coff
+		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
 		os=-tops20
@@ -1356,6 +1494,9 @@ case $basic_machine in
 	m68*-cisco)
 		os=-aout
 		;;
+        mep-*)
+		os=-elf
+		;;
 	mips*-cisco)
 		os=-elf
 		;;
@@ -1374,6 +1515,9 @@ case $basic_machine in
 	*-be)
 		os=-beos
 		;;
+	*-haiku)
+		os=-haiku
+		;;
 	*-ibm)
 		os=-aix
 		;;
@@ -1482,7 +1626,7 @@ case $basic_machine in
 			-sunos*)
 				vendor=sun
 				;;
-			-aix*)
+			-cnk*|-aix*)
 				vendor=ibm
 				;;
 			-beos*)
@@ -1545,7 +1689,7 @@ case $basic_machine in
 esac
 
 echo $basic_machine$os
-exit 0
+exit
 
 # Local variables:
 # eval: (add-hook 'write-file-hooks 'time-stamp)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 6b3c4caac0ca..f1b060e9487a 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,12 +31,12 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[2.6svn]],[llvmbugs@cs.uiuc.edu])
+AC_INIT([[llvm]],[[2.7svn]],[llvmbugs@cs.uiuc.edu])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
-AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign."])
-AC_COPYRIGHT([Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign.])
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.])
 
 dnl Indicate that we require autoconf 2.59 or later. Ths is needed because we
 dnl use some autoconf macros only available in 2.59.
@@ -165,6 +165,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
     llvm_cv_os_type="SunOS"
     llvm_cv_platform_type="Unix" ;;
+  *-*-auroraux*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="AuroraUX"
+    llvm_cv_platform_type="Unix" ;;
   *-*-win32*)
     llvm_cv_link_all_option="-Wl,--whole-archive"
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
@@ -175,6 +180,21 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="MingW"
     llvm_cv_platform_type="Win32" ;;
+  *-*-haiku*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Haiku"
+    llvm_cv_platform_type="Unix" ;;  
+  *-unknown-eabi*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-elf*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
   *)
     llvm_cv_link_all_option=""
     llvm_cv_no_link_all_option=""
@@ -182,6 +202,47 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_platform_type="Unknown" ;;
 esac])
 
+AC_CACHE_CHECK([type of operating system we're going to target],
+               [llvm_cv_target_os_type],
+[case $target in
+  *-*-aix*)
+    llvm_cv_target_os_type="AIX" ;;
+  *-*-irix*)
+    llvm_cv_target_os_type="IRIX" ;;
+  *-*-cygwin*)
+    llvm_cv_target_os_type="Cygwin" ;;
+  *-*-darwin*)
+    llvm_cv_target_os_type="Darwin" ;;
+  *-*-freebsd*)
+    llvm_cv_target_os_type="FreeBSD" ;;
+  *-*-openbsd*)
+    llvm_cv_target_os_type="OpenBSD" ;;
+  *-*-netbsd*)
+    llvm_cv_target_os_type="NetBSD" ;;
+  *-*-dragonfly*)
+    llvm_cv_target_os_type="DragonFly" ;;
+  *-*-hpux*)
+    llvm_cv_target_os_type="HP-UX" ;;
+  *-*-interix*)
+    llvm_cv_target_os_type="Interix" ;;
+  *-*-linux*)
+    llvm_cv_target_os_type="Linux" ;;
+  *-*-solaris*)
+    llvm_cv_target_os_type="SunOS" ;;
+  *-*-auroraux*)
+    llvm_cv_target_os_type="AuroraUX" ;;
+  *-*-win32*)
+    llvm_cv_target_os_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_target_os_type="MingW" ;;
+  *-*-haiku*)
+    llvm_cv_target_os_type="Haiku" ;;  
+  *-unknown-eabi*)
+    llvm_cv_target_os_type="Freestanding" ;;
+  *)
+    llvm_cv_target_os_type="Unknown" ;;
+esac])
+
 dnl Make sure we aren't attempting to configure for an unknown system
 if test "$llvm_cv_os_type" = "Unknown" ; then
   AC_MSG_ERROR([Operating system is unknown, configure can't continue])
@@ -190,6 +251,8 @@ fi
 dnl Set the "OS" Makefile variable based on the platform type so the
 dnl makefile can configure itself to specific build hosts
 AC_SUBST(OS,$llvm_cv_os_type)
+AC_SUBST(HOST_OS,$llvm_cv_os_type)
+AC_SUBST(TARGET_OS,$llvm_cv_target_os_type)
 
 dnl Set the LINKALL and NOLINKALL Makefile variables based on the platform
 AC_SUBST(LINKALL,$llvm_cv_link_all_option)
@@ -221,12 +284,13 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   alpha*-*)               llvm_cv_target_arch="Alpha" ;;
-  ia64-*)                 llvm_cv_target_arch="IA64" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   mips-*)                 llvm_cv_target_arch="Mips" ;;
   pic16-*)                llvm_cv_target_arch="PIC16" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
+  bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac])
 
@@ -331,6 +395,16 @@ else
   AC_SUBST(DEBUG_RUNTIME,[[DEBUG_RUNTIME=1]])
 fi
 
+dnl --enable-debug-symbols : should even optimized compiler libraries
+dnl have debug symbols?
+AC_ARG_ENABLE(debug-symbols,
+   AS_HELP_STRING(--enable-debug-symbols,[Build compiler with debug symbols (default is NO if optimization is on and YES if it's off)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(DEBUG_SYMBOLS,[[]])
+else
+  AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]])
+fi
+
 dnl --enable-jit: check whether they want to enable the jit
 AC_ARG_ENABLE(jit,
   AS_HELP_STRING(--enable-jit,
@@ -341,18 +415,19 @@ then
   AC_SUBST(JIT,[[]])
 else
   case "$llvm_cv_target_arch" in
-    x86)     AC_SUBST(TARGET_HAS_JIT,1) ;;
-    Sparc)   AC_SUBST(TARGET_HAS_JIT,0) ;;
-    PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;;
-    x86_64)  AC_SUBST(TARGET_HAS_JIT,1) ;;
-    Alpha)   AC_SUBST(TARGET_HAS_JIT,1) ;;
-    IA64)    AC_SUBST(TARGET_HAS_JIT,0) ;;
-    ARM)     AC_SUBST(TARGET_HAS_JIT,0) ;;
-    Mips)    AC_SUBST(TARGET_HAS_JIT,0) ;;
-    PIC16)   AC_SUBST(TARGET_HAS_JIT,0) ;;
-    XCore)   AC_SUBST(TARGET_HAS_JIT,0) ;;
-    MSP430)  AC_SUBST(TARGET_HAS_JIT,0) ;;
-    *)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    x86)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Sparc)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
+    x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Alpha)       AC_SUBST(TARGET_HAS_JIT,1) ;;
+    ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Mips)        AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PIC16)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    SystemZ)     AC_SUBST(TARGET_HAS_JIT,0) ;;
+    Blackfin)    AC_SUBST(TARGET_HAS_JIT,0) ;;
+    *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
   esac
 fi
 
@@ -398,44 +473,49 @@ AC_DEFINE_UNQUOTED([ENABLE_PIC],$ENABLE_PIC,
 dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
-    [Build specific host targets: all,host-only,{target-name} (default=all)]),,
+    [Build specific host targets: all or target1,target2,... Valid targets are:
+     host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu, pic16,
+     xcore, msp430, systemz, blackfin, cbe, msil, and cpp (default=all)]),,
     enableval=all)
+if test "$enableval" = host-only ; then
+  enableval=host
+fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha IA64 ARM Mips CellSPU PIC16 XCore MSP430 CBackend MSIL CppBackend" ;;
-  host-only)
-    case "$llvm_cv_target_arch" in
-      x86)     TARGETS_TO_BUILD="X86" ;;
-      x86_64)  TARGETS_TO_BUILD="X86" ;;
-      Sparc)   TARGETS_TO_BUILD="Sparc" ;;
-      PowerPC) TARGETS_TO_BUILD="PowerPC" ;;
-      Alpha)   TARGETS_TO_BUILD="Alpha" ;;
-      IA64)    TARGETS_TO_BUILD="IA64" ;;
-      ARM)     TARGETS_TO_BUILD="ARM" ;;
-      Mips)    TARGETS_TO_BUILD="Mips" ;;
-      CellSPU|SPU) TARGETS_TO_BUILD="CellSPU" ;;
-      PIC16)   TARGETS_TO_BUILD="PIC16" ;;
-      XCore)   TARGETS_TO_BUILD="XCore" ;;
-      MSP430)  TARGETS_TO_BUILD="MSP430" ;;
-      *)       AC_MSG_ERROR([Can not set target to build]) ;;
-    esac
-    ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
-        x86)     TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
-        x86_64)  TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
-        sparc)   TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
-        powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-        alpha)   TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
-        ia64)    TARGETS_TO_BUILD="IA64 $TARGETS_TO_BUILD" ;;
-        arm)     TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
-        mips)    TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
-        spu)     TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
-        pic16)   TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
-        xcore)   TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
-        msp430)  TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-        cbe)     TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
-        msil)    TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;;
-        cpp)     TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        alpha)    TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+        pic16)    TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
+        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+        blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+        cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
+        msil)     TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;;
+        cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        host) case "$llvm_cv_target_arch" in
+            x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            Alpha)       TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+            PIC16)       TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
+            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+            MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            *)       AC_MSG_ERROR([Can not set target to build]) ;;
+          esac ;;
         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
       esac
   done
@@ -454,17 +534,22 @@ for a_target in $TARGETS_TO_BUILD; do
 done
 
 # Build the LLVM_TARGET and LLVM_ASM_PRINTER macro uses for
-# Targets.def and AsmPrinters.def.
+# Targets.def, AsmPrinters.def, and AsmParsers.def.
 LLVM_ENUM_TARGETS=""
 LLVM_ENUM_ASM_PRINTERS=""
+LLVM_ENUM_ASM_PARSERS=""
 for target_to_build in $TARGETS_TO_BUILD; do
   LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
   if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then
     LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
   fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
+    LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS";
+  fi
 done
 AC_SUBST(LLVM_ENUM_TARGETS)
 AC_SUBST(LLVM_ENUM_ASM_PRINTERS)
+AC_SUBST(LLVM_ENUM_ASM_PARSERS)
 
 dnl Prevent the CBackend from using printf("%a") for floating point so older
 dnl C compilers that cannot deal with the 0x0p+0 hex floating point format
@@ -673,6 +758,46 @@ if test "$DOT" != "echo dot" ; then
   AC_DEFINE_UNQUOTED([LLVM_PATH_DOT],"$DOT${EXEEXT}",
    [Define to path to dot program if found or 'echo dot' otherwise])
 fi
+AC_PATH_PROG(FDP, [fdp], [echo fdp])
+if test "$FDP" != "echo fdp" ; then
+  AC_DEFINE([HAVE_FDP],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    FDP=`echo $FDP | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_FDP],"$FDP${EXEEXT}",
+   [Define to path to fdp program if found or 'echo fdp' otherwise])
+fi
+AC_PATH_PROG(NEATO, [neato], [echo neato])
+if test "$NEATO" != "echo neato" ; then
+  AC_DEFINE([HAVE_NEATO],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    NEATO=`echo $NEATO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_NEATO],"$NEATO${EXEEXT}",
+   [Define to path to neato program if found or 'echo neato' otherwise])
+fi
+AC_PATH_PROG(TWOPI, [twopi], [echo twopi])
+if test "$TWOPI" != "echo twopi" ; then
+  AC_DEFINE([HAVE_TWOPI],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    TWOPI=`echo $TWOPI | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_TWOPI],"$TWOPI${EXEEXT}",
+   [Define to path to twopi program if found or 'echo twopi' otherwise])
+fi
+AC_PATH_PROG(CIRCO, [circo], [echo circo])
+if test "$CIRCO" != "echo circo" ; then
+  AC_DEFINE([HAVE_CIRCO],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    CIRCO=`echo $CIRCO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_CIRCO],"$CIRCO${EXEEXT}",
+   [Define to path to circo program if found or 'echo circo' otherwise])
+fi
 AC_PATH_PROGS(GV, [gv gsview32], [echo gv])
 if test "$GV" != "echo gv" ; then
   AC_DEFINE([HAVE_GV],[1],[Define if the gv program is available])
@@ -824,8 +949,6 @@ dnl=== SECTION 5: Check for libraries
 dnl===
 dnl===-----------------------------------------------------------------------===
 
-dnl libelf is for sparc only; we can ignore it if we don't have it
-AC_CHECK_LIB(elf, elf_begin)
 AC_CHECK_LIB(m,sin)
 if test "$llvm_cv_os_type" = "MingW" ; then
   AC_CHECK_LIB(imagehlp, main)
@@ -851,7 +974,7 @@ AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
 dnl pthread locking functions are optional - but llvm will not be thread-safe
 dnl without locks.
 if test "$ENABLE_THREADS" -eq 1 ; then
-  AC_CHECK_LIB(pthread,pthread_mutex_init)
+  AC_CHECK_LIB(pthread, pthread_mutex_init)
   AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
                  AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
                            [Have pthread_mutex_lock]))
@@ -882,6 +1005,42 @@ AC_ARG_WITH(udis86,
 AC_DEFINE_UNQUOTED([USE_UDIS86],$USE_UDIS86,
                    [Define if use udis86 library])
 
+dnl Allow OProfile support for JIT output.
+AC_ARG_WITH(oprofile,
+  AS_HELP_STRING([--with-oprofile=<prefix>],
+    [Tell OProfile >= 0.9.4 how to symbolize JIT output]),
+    [
+      AC_SUBST(USE_OPROFILE, [1])
+      case "$withval" in
+        /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;;
+        no) llvm_cv_oppath=
+            AC_SUBST(USE_OPROFILE, [0]) ;;
+        *) llvm_cv_oppath="${withval}/lib/oprofile"
+           CPPFLAGS="-I${withval}/include";;
+      esac
+      if test -n "$llvm_cv_oppath" ; then
+        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+        dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
+        dnl libbfd is not included properly in libopagent in some Debian
+        dnl versions.  If libbfd isn't found at all, we assume opagent works
+        dnl anyway.
+        AC_SEARCH_LIBS(bfd_init, bfd, [], [])
+        AC_SEARCH_LIBS(op_open_agent, opagent, [], [
+          echo "Error! You need to have libopagent around."
+          exit -1
+        ])
+        AC_CHECK_HEADER([opagent.h], [], [
+          echo "Error! You need to have opagent.h around."
+          exit -1
+          ])
+      fi
+    ],
+    [
+      AC_SUBST(USE_OPROFILE, [0])
+    ])
+AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE,
+                   [Define if we have the oprofile JIT-support library])
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 6: Check for header files
@@ -944,10 +1103,11 @@ AC_CHECK_FUNCS([powf fmodf strtof round ])
 AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
 AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
 AC_CHECK_FUNCS([mktemp realpath sbrk setrlimit strdup ])
-AC_CHECK_FUNCS([strerror strerror_r strerror_s ])
+AC_CHECK_FUNCS([strerror strerror_r strerror_s setenv ])
 AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
 AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp])
 AC_C_PRINTF_A
+dnl FIXME: This is no longer used, please remove (but test)!!!
 AC_FUNC_ALLOCA
 AC_FUNC_RAND48
 
@@ -1003,6 +1163,16 @@ dnl=== SECTION 9: Additional checks, variables, etc.
 dnl===
 dnl===-----------------------------------------------------------------------===
 
+dnl Handle 32-bit linux systems running a 64-bit kernel.
+dnl This has to come after section 4 because it invokes the compiler.
+if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
+  AC_IS_LINUX_MIXED
+  if test "$llvm_cv_linux_mixed" = "yes"; then
+    llvm_cv_target_arch="x86"
+    ARCH="x86"
+  fi
+fi
+
 dnl Check, whether __dso_handle is present
 AC_CHECK_FUNCS([__dso_handle])
 
@@ -1178,8 +1348,8 @@ dnl files can be updated automatically when their *.in sources change.
 AC_CONFIG_HEADERS([include/llvm/Config/config.h])
 AC_CONFIG_FILES([include/llvm/Config/Targets.def])
 AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def])
+AC_CONFIG_FILES([include/llvm/Config/AsmParsers.def])
 AC_CONFIG_HEADERS([include/llvm/Support/DataTypes.h])
-AC_CONFIG_HEADERS([include/llvm/ADT/iterator.h])
 
 dnl Configure the makefile's configuration data
 AC_CONFIG_FILES([Makefile.config])
diff --git a/autoconf/m4/config_makefile.m4 b/autoconf/m4/config_makefile.m4
index f21a25631edf..b1eaffdcd85c 100644
--- a/autoconf/m4/config_makefile.m4
+++ b/autoconf/m4/config_makefile.m4
@@ -5,5 +5,5 @@
 AC_DEFUN([AC_CONFIG_MAKEFILE],
 [AC_CONFIG_COMMANDS($1,
   [${llvm_src}/autoconf/mkinstalldirs `dirname $1`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/$1 $1])
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/$1 $1])
 ])
diff --git a/autoconf/m4/linux_mixed_64_32.m4 b/autoconf/m4/linux_mixed_64_32.m4
new file mode 100644
index 000000000000..123491f87e5e
--- /dev/null
+++ b/autoconf/m4/linux_mixed_64_32.m4
@@ -0,0 +1,17 @@
+#
+# Some Linux machines run a 64-bit kernel with a 32-bit userspace. 'uname -m'
+# shows these as x86_64. Ask the system 'gcc' what it thinks.
+#
+AC_DEFUN([AC_IS_LINUX_MIXED],
+[AC_CACHE_CHECK(for 32-bit userspace on 64-bit system,llvm_cv_linux_mixed,
+[ AC_LANG_PUSH([C])
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+      [[#ifndef __x86_64__
+       error: Not x86-64 even if uname says so!
+      #endif
+      ]])],
+      [llvm_cv_linux_mixed=no],
+      [llvm_cv_linux_mixed=yes])
+  AC_LANG_POP([C])
+])
+])
diff --git a/bindings/ada/analysis/llvm_analysis-binding.ads b/bindings/ada/analysis/llvm_analysis-binding.ads
new file mode 100644
index 000000000000..c51a50353f11
--- /dev/null
+++ b/bindings/ada/analysis/llvm_analysis-binding.ads
@@ -0,0 +1,32 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with llvm;
+with Interfaces.C.Strings;
+
+
+package LLVM_Analysis.Binding is
+
+   function LLVMVerifyModule
+     (M          : in llvm.LLVMModuleRef;
+      Action     : in LLVM_Analysis.LLVMVerifierFailureAction;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+   function LLVMVerifyFunction
+     (Fn     : in llvm.LLVMValueRef;
+      Action : in LLVM_Analysis.LLVMVerifierFailureAction)
+      return   Interfaces.C.int;
+
+   procedure LLVMViewFunctionCFG (Fn : in llvm.LLVMValueRef);
+
+   procedure LLVMViewFunctionCFGOnly (Fn : in llvm.LLVMValueRef);
+
+private
+
+   pragma Import (C, LLVMVerifyModule, "Ada_LLVMVerifyModule");
+   pragma Import (C, LLVMVerifyFunction, "Ada_LLVMVerifyFunction");
+   pragma Import (C, LLVMViewFunctionCFG, "Ada_LLVMViewFunctionCFG");
+   pragma Import (C, LLVMViewFunctionCFGOnly, "Ada_LLVMViewFunctionCFGOnly");
+
+end LLVM_Analysis.Binding;
diff --git a/bindings/ada/analysis/llvm_analysis.ads b/bindings/ada/analysis/llvm_analysis.ads
new file mode 100644
index 000000000000..aa7b3f0e2e91
--- /dev/null
+++ b/bindings/ada/analysis/llvm_analysis.ads
@@ -0,0 +1,30 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C;
+
+
+package LLVM_Analysis is
+
+   -- LLVMVerifierFailureAction
+   --
+   type LLVMVerifierFailureAction is (
+      LLVMAbortProcessAction,
+      LLVMPrintMessageAction,
+      LLVMReturnStatusAction);
+
+   for LLVMVerifierFailureAction use
+     (LLVMAbortProcessAction => 0,
+      LLVMPrintMessageAction => 1,
+      LLVMReturnStatusAction => 2);
+
+   pragma Convention (C, LLVMVerifierFailureAction);
+
+   type LLVMVerifierFailureAction_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Analysis.LLVMVerifierFailureAction;
+
+   type LLVMVerifierFailureAction_view is access all
+     LLVM_Analysis.LLVMVerifierFailureAction;
+
+end LLVM_Analysis;
diff --git a/bindings/ada/analysis/llvm_analysis_wrap.cxx b/bindings/ada/analysis/llvm_analysis_wrap.cxx
new file mode 100644
index 000000000000..f2a8637343de
--- /dev/null
+++ b/bindings/ada/analysis/llvm_analysis_wrap.cxx
@@ -0,0 +1,369 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Analysis (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Analysis(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+#include "llvm-c/Analysis.h"
+//#include "llvm-c/BitReader.h"
+//#include "llvm-c/BitWriter.h"
+//#include "llvm-c/Core.h"
+//#include "llvm-c/ExecutionEngine.h"
+//#include "llvm-c/LinkTimeOptimizer.h"
+//#include "llvm-c/lto.h"
+//#include "llvm-c/Target.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport int SWIGSTDCALL Ada_LLVMVerifyModule (
+  void * jarg1
+  ,
+  
+  int jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMVerifierFailureAction arg2 ;
+  char **arg3 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = (LLVMVerifierFailureAction) jarg2; 
+  
+  arg3 = (char **)jarg3; 
+  
+  result = (int)LLVMVerifyModule(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMVerifyFunction (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMVerifierFailureAction arg2 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMVerifierFailureAction) jarg2; 
+  
+  result = (int)LLVMVerifyFunction(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMViewFunctionCFG (
+  void * jarg1
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  LLVMViewFunctionCFG(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMViewFunctionCFGOnly (
+  void * jarg1
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  LLVMViewFunctionCFGOnly(arg1);
+  
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/bitreader/llvm_bit_reader-binding.ads b/bindings/ada/bitreader/llvm_bit_reader-binding.ads
new file mode 100644
index 000000000000..4fcdb4a84fcf
--- /dev/null
+++ b/bindings/ada/bitreader/llvm_bit_reader-binding.ads
@@ -0,0 +1,52 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with llvm;
+with Interfaces.C.Strings;
+
+
+package LLVM_bit_Reader.Binding is
+
+   function LLVMParseBitcode
+     (MemBuf     : in llvm.LLVMMemoryBufferRef;
+      OutModule  : access llvm.LLVMModuleRef;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+   function LLVMParseBitcodeInContext
+     (MemBuf     : in llvm.LLVMMemoryBufferRef;
+      ContextRef : in llvm.LLVMContextRef;
+      OutModule  : access llvm.LLVMModuleRef;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+   function LLVMGetBitcodeModuleProvider
+     (MemBuf     : in llvm.LLVMMemoryBufferRef;
+      OutMP      : access llvm.LLVMModuleProviderRef;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+   function LLVMGetBitcodeModuleProviderInContext
+     (MemBuf     : in llvm.LLVMMemoryBufferRef;
+      ContextRef : in llvm.LLVMContextRef;
+      OutMP      : access llvm.LLVMModuleProviderRef;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+private
+
+   pragma Import (C, LLVMParseBitcode, "Ada_LLVMParseBitcode");
+   pragma Import
+     (C,
+      LLVMParseBitcodeInContext,
+      "Ada_LLVMParseBitcodeInContext");
+   pragma Import
+     (C,
+      LLVMGetBitcodeModuleProvider,
+      "Ada_LLVMGetBitcodeModuleProvider");
+   pragma Import
+     (C,
+      LLVMGetBitcodeModuleProviderInContext,
+      "Ada_LLVMGetBitcodeModuleProviderInContext");
+
+end LLVM_bit_Reader.Binding;
diff --git a/bindings/ada/bitreader/llvm_bit_reader.ads b/bindings/ada/bitreader/llvm_bit_reader.ads
new file mode 100644
index 000000000000..7579dea2819d
--- /dev/null
+++ b/bindings/ada/bitreader/llvm_bit_reader.ads
@@ -0,0 +1,6 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+package LLVM_bit_Reader is
+
+end LLVM_bit_Reader;
diff --git a/bindings/ada/bitreader/llvm_bitreader_wrap.cxx b/bindings/ada/bitreader/llvm_bitreader_wrap.cxx
new file mode 100644
index 000000000000..b7ecbed355af
--- /dev/null
+++ b/bindings/ada/bitreader/llvm_bitreader_wrap.cxx
@@ -0,0 +1,423 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_bit_Reader (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_bit_Reader(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+//#include "llvm-c/Analysis.h"
+#include "llvm-c/BitReader.h"
+//#include "llvm-c/BitWriter.h"
+//#include "llvm-c/Core.h"
+//#include "llvm-c/ExecutionEngine.h"
+//#include "llvm-c/LinkTimeOptimizer.h"
+//#include "llvm-c/lto.h"
+//#include "llvm-c/Target.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport int SWIGSTDCALL Ada_LLVMParseBitcode (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
+  LLVMModuleRef *arg2 = (LLVMModuleRef *) 0 ;
+  char **arg3 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMMemoryBufferRef)jarg1; 
+  
+  arg2 = (LLVMModuleRef *)jarg2; 
+  
+  arg3 = (char **)jarg3; 
+  
+  result = (int)LLVMParseBitcode(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMParseBitcodeInContext (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  )
+{
+  int jresult ;
+  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
+  LLVMContextRef arg2 = (LLVMContextRef) 0 ;
+  LLVMModuleRef *arg3 = (LLVMModuleRef *) 0 ;
+  char **arg4 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMMemoryBufferRef)jarg1; 
+  
+  arg2 = (LLVMContextRef)jarg2; 
+  
+  arg3 = (LLVMModuleRef *)jarg3; 
+  
+  arg4 = (char **)jarg4; 
+  
+  result = (int)LLVMParseBitcodeInContext(arg1,arg2,arg3,arg4);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMGetBitcodeModuleProvider (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
+  LLVMModuleProviderRef *arg2 = (LLVMModuleProviderRef *) 0 ;
+  char **arg3 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMMemoryBufferRef)jarg1; 
+  
+  arg2 = (LLVMModuleProviderRef *)jarg2; 
+  
+  arg3 = (char **)jarg3; 
+  
+  result = (int)LLVMGetBitcodeModuleProvider(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMGetBitcodeModuleProviderInContext (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  )
+{
+  int jresult ;
+  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
+  LLVMContextRef arg2 = (LLVMContextRef) 0 ;
+  LLVMModuleProviderRef *arg3 = (LLVMModuleProviderRef *) 0 ;
+  char **arg4 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMMemoryBufferRef)jarg1; 
+  
+  arg2 = (LLVMContextRef)jarg2; 
+  
+  arg3 = (LLVMModuleProviderRef *)jarg3; 
+  
+  arg4 = (char **)jarg4; 
+  
+  result = (int)LLVMGetBitcodeModuleProviderInContext(arg1,arg2,arg3,arg4);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/bitwriter/llvm_bit_writer-binding.ads b/bindings/ada/bitwriter/llvm_bit_writer-binding.ads
new file mode 100644
index 000000000000..b5542df0e062
--- /dev/null
+++ b/bindings/ada/bitwriter/llvm_bit_writer-binding.ads
@@ -0,0 +1,28 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with llvm;
+with Interfaces.C.Strings;
+
+
+package LLVM_bit_Writer.Binding is
+
+   function LLVMWriteBitcodeToFileHandle
+     (M      : in llvm.LLVMModuleRef;
+      Handle : in Interfaces.C.int)
+      return   Interfaces.C.int;
+
+   function LLVMWriteBitcodeToFile
+     (M    : in llvm.LLVMModuleRef;
+      Path : in Interfaces.C.Strings.chars_ptr)
+      return Interfaces.C.int;
+
+private
+
+   pragma Import
+     (C,
+      LLVMWriteBitcodeToFileHandle,
+      "Ada_LLVMWriteBitcodeToFileHandle");
+   pragma Import (C, LLVMWriteBitcodeToFile, "Ada_LLVMWriteBitcodeToFile");
+
+end LLVM_bit_Writer.Binding;
diff --git a/bindings/ada/bitwriter/llvm_bit_writer.ads b/bindings/ada/bitwriter/llvm_bit_writer.ads
new file mode 100644
index 000000000000..35b1f38aa996
--- /dev/null
+++ b/bindings/ada/bitwriter/llvm_bit_writer.ads
@@ -0,0 +1,6 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+package LLVM_bit_Writer is
+
+end LLVM_bit_Writer;
diff --git a/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx b/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx
new file mode 100644
index 000000000000..4abf44fffd5c
--- /dev/null
+++ b/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx
@@ -0,0 +1,335 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_bit_Writer (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_bit_Writer(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+#include "llvm-c/Analysis.h"
+#include "llvm-c/BitReader.h"
+#include "llvm-c/BitWriter.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm-c/LinkTimeOptimizer.h"
+#include "llvm-c/lto.h"
+#include "llvm-c/Target.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport int SWIGSTDCALL Ada_LLVMWriteBitcodeToFileHandle (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  int jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  int arg2 ;
+  int result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  
+  arg2 = (int) jarg2; 
+  
+  
+  result = (int)LLVMWriteBitcodeToFileHandle(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMWriteBitcodeToFile (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  int jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  int result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (int)LLVMWriteBitcodeToFile(arg1,(char const *)arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/executionengine/llvm_execution_engine-binding.ads b/bindings/ada/executionengine/llvm_execution_engine-binding.ads
new file mode 100644
index 000000000000..a37c462cf324
--- /dev/null
+++ b/bindings/ada/executionengine/llvm_execution_engine-binding.ads
@@ -0,0 +1,192 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with llvm;
+with Interfaces.C.Strings;
+
+
+package LLVM_execution_Engine.Binding is
+
+   procedure LLVMLinkInJIT;
+
+   procedure LLVMLinkInInterpreter;
+
+   function LLVMCreateGenericValueOfInt
+     (Ty       : in llvm.LLVMTypeRef;
+      N        : in Interfaces.C.Extensions.unsigned_long_long;
+      IsSigned : in Interfaces.C.int)
+      return     LLVM_execution_Engine.LLVMGenericValueRef;
+
+   function LLVMCreateGenericValueOfPointer
+     (P    : access Interfaces.C.Extensions.void)
+      return LLVM_execution_Engine.LLVMGenericValueRef;
+
+   function LLVMCreateGenericValueOfFloat
+     (Ty   : in llvm.LLVMTypeRef;
+      N    : in Interfaces.C.double)
+      return LLVM_execution_Engine.LLVMGenericValueRef;
+
+   function LLVMGenericValueIntWidth
+     (GenValRef : in LLVM_execution_Engine.LLVMGenericValueRef)
+      return      Interfaces.C.unsigned;
+
+   function LLVMGenericValueToInt
+     (GenVal   : in LLVM_execution_Engine.LLVMGenericValueRef;
+      IsSigned : in Interfaces.C.int)
+      return     Interfaces.C.Extensions.unsigned_long_long;
+
+   function LLVMGenericValueToPointer
+     (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef)
+      return   access Interfaces.C.Extensions.void;
+
+   function LLVMGenericValueToFloat
+     (TyRef  : in llvm.LLVMTypeRef;
+      GenVal : in LLVM_execution_Engine.LLVMGenericValueRef)
+      return   Interfaces.C.double;
+
+   procedure LLVMDisposeGenericValue
+     (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef);
+
+   function LLVMCreateExecutionEngine
+     (OutEE    : access LLVM_execution_Engine.LLVMExecutionEngineRef;
+      MP       : in llvm.LLVMModuleProviderRef;
+      OutError : access Interfaces.C.Strings.chars_ptr)
+      return     Interfaces.C.int;
+
+   function LLVMCreateInterpreter
+     (OutInterp : access LLVM_execution_Engine.LLVMExecutionEngineRef;
+      MP        : in llvm.LLVMModuleProviderRef;
+      OutError  : access Interfaces.C.Strings.chars_ptr)
+      return      Interfaces.C.int;
+
+   function LLVMCreateJITCompiler
+     (OutJIT   : access LLVM_execution_Engine.LLVMExecutionEngineRef;
+      MP       : in llvm.LLVMModuleProviderRef;
+      OptLevel : in Interfaces.C.unsigned;
+      OutError : access Interfaces.C.Strings.chars_ptr)
+      return     Interfaces.C.int;
+
+   procedure LLVMDisposeExecutionEngine
+     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef);
+
+   procedure LLVMRunStaticConstructors
+     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef);
+
+   procedure LLVMRunStaticDestructors
+     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef);
+
+   function LLVMRunFunctionAsMain
+     (EE   : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      F    : in llvm.LLVMValueRef;
+      ArgC : in Interfaces.C.unsigned;
+      ArgV : access Interfaces.C.Strings.chars_ptr;
+      EnvP : access Interfaces.C.Strings.chars_ptr)
+      return Interfaces.C.int;
+
+   function LLVMRunFunction
+     (EE      : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      F       : in llvm.LLVMValueRef;
+      NumArgs : in Interfaces.C.unsigned;
+      Args    : access LLVM_execution_Engine.LLVMGenericValueRef)
+      return    LLVM_execution_Engine.LLVMGenericValueRef;
+
+   procedure LLVMFreeMachineCodeForFunction
+     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      F  : in llvm.LLVMValueRef);
+
+   procedure LLVMAddModuleProvider
+     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      MP : in llvm.LLVMModuleProviderRef);
+
+   function LLVMRemoveModuleProvider
+     (EE       : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      MP       : in llvm.LLVMModuleProviderRef;
+      OutMod   : access llvm.LLVMModuleRef;
+      OutError : access Interfaces.C.Strings.chars_ptr)
+      return     Interfaces.C.int;
+
+   function LLVMFindFunction
+     (EE    : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      Name  : in Interfaces.C.Strings.chars_ptr;
+      OutFn : access llvm.LLVMValueRef)
+      return  Interfaces.C.int;
+
+   function LLVMGetExecutionEngineTargetData
+     (EE   : in LLVM_execution_Engine.LLVMExecutionEngineRef)
+      return LLVM_execution_Engine.LLVMTargetDataRef;
+
+   procedure LLVMAddGlobalMapping
+     (EE     : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      Global : in llvm.LLVMValueRef;
+      Addr   : access Interfaces.C.Extensions.void);
+
+   function LLVMGetPointerToGlobal
+     (EE     : in LLVM_execution_Engine.LLVMExecutionEngineRef;
+      Global : in llvm.LLVMValueRef)
+      return   access Interfaces.C.Extensions.void;
+
+private
+
+   pragma Import (C, LLVMLinkInJIT, "Ada_LLVMLinkInJIT");
+   pragma Import (C, LLVMLinkInInterpreter, "Ada_LLVMLinkInInterpreter");
+   pragma Import
+     (C,
+      LLVMCreateGenericValueOfInt,
+      "Ada_LLVMCreateGenericValueOfInt");
+   pragma Import
+     (C,
+      LLVMCreateGenericValueOfPointer,
+      "Ada_LLVMCreateGenericValueOfPointer");
+   pragma Import
+     (C,
+      LLVMCreateGenericValueOfFloat,
+      "Ada_LLVMCreateGenericValueOfFloat");
+   pragma Import
+     (C,
+      LLVMGenericValueIntWidth,
+      "Ada_LLVMGenericValueIntWidth");
+   pragma Import (C, LLVMGenericValueToInt, "Ada_LLVMGenericValueToInt");
+   pragma Import
+     (C,
+      LLVMGenericValueToPointer,
+      "Ada_LLVMGenericValueToPointer");
+   pragma Import (C, LLVMGenericValueToFloat, "Ada_LLVMGenericValueToFloat");
+   pragma Import (C, LLVMDisposeGenericValue, "Ada_LLVMDisposeGenericValue");
+   pragma Import
+     (C,
+      LLVMCreateExecutionEngine,
+      "Ada_LLVMCreateExecutionEngine");
+   pragma Import (C, LLVMCreateInterpreter, "Ada_LLVMCreateInterpreter");
+   pragma Import (C, LLVMCreateJITCompiler, "Ada_LLVMCreateJITCompiler");
+   pragma Import
+     (C,
+      LLVMDisposeExecutionEngine,
+      "Ada_LLVMDisposeExecutionEngine");
+   pragma Import
+     (C,
+      LLVMRunStaticConstructors,
+      "Ada_LLVMRunStaticConstructors");
+   pragma Import
+     (C,
+      LLVMRunStaticDestructors,
+      "Ada_LLVMRunStaticDestructors");
+   pragma Import (C, LLVMRunFunctionAsMain, "Ada_LLVMRunFunctionAsMain");
+   pragma Import (C, LLVMRunFunction, "Ada_LLVMRunFunction");
+   pragma Import
+     (C,
+      LLVMFreeMachineCodeForFunction,
+      "Ada_LLVMFreeMachineCodeForFunction");
+   pragma Import (C, LLVMAddModuleProvider, "Ada_LLVMAddModuleProvider");
+   pragma Import
+     (C,
+      LLVMRemoveModuleProvider,
+      "Ada_LLVMRemoveModuleProvider");
+   pragma Import (C, LLVMFindFunction, "Ada_LLVMFindFunction");
+   pragma Import
+     (C,
+      LLVMGetExecutionEngineTargetData,
+      "Ada_LLVMGetExecutionEngineTargetData");
+   pragma Import (C, LLVMAddGlobalMapping, "Ada_LLVMAddGlobalMapping");
+   pragma Import (C, LLVMGetPointerToGlobal, "Ada_LLVMGetPointerToGlobal");
+
+end LLVM_execution_Engine.Binding;
diff --git a/bindings/ada/executionengine/llvm_execution_engine.ads b/bindings/ada/executionengine/llvm_execution_engine.ads
new file mode 100644
index 000000000000..c7669920f7ac
--- /dev/null
+++ b/bindings/ada/executionengine/llvm_execution_engine.ads
@@ -0,0 +1,90 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C.Extensions;
+
+
+package LLVM_execution_Engine is
+
+   -- LLVMOpaqueGenericValue
+   --
+   type LLVMOpaqueGenericValue is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueGenericValue_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.LLVMOpaqueGenericValue;
+
+   type LLVMOpaqueGenericValue_view is access all
+     LLVM_execution_Engine.LLVMOpaqueGenericValue;
+
+   -- LLVMGenericValueRef
+   --
+   type LLVMGenericValueRef is access all
+     LLVM_execution_Engine.LLVMOpaqueGenericValue;
+
+   type LLVMGenericValueRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.LLVMGenericValueRef;
+
+   type LLVMGenericValueRef_view is access all
+     LLVM_execution_Engine.LLVMGenericValueRef;
+
+   -- LLVMOpaqueExecutionEngine
+   --
+   type LLVMOpaqueExecutionEngine is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueExecutionEngine_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.LLVMOpaqueExecutionEngine;
+
+   type LLVMOpaqueExecutionEngine_view is access all
+     LLVM_execution_Engine.LLVMOpaqueExecutionEngine;
+
+   -- LLVMExecutionEngineRef
+   --
+   type LLVMExecutionEngineRef is access all
+     LLVM_execution_Engine.LLVMOpaqueExecutionEngine;
+
+   type LLVMExecutionEngineRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.LLVMExecutionEngineRef;
+
+   type LLVMExecutionEngineRef_view is access all
+     LLVM_execution_Engine.LLVMExecutionEngineRef;
+
+   -- LLVMTargetDataRef
+   --
+   type LLVMTargetDataRef is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMTargetDataRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.LLVMTargetDataRef;
+
+   type LLVMTargetDataRef_view is access all
+     LLVM_execution_Engine.LLVMTargetDataRef;
+
+   -- GenericValue
+   --
+   type GenericValue is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type GenericValue_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.GenericValue;
+
+   type GenericValue_view is access all LLVM_execution_Engine.GenericValue;
+
+   -- ExecutionEngine
+   --
+   type ExecutionEngine is new Interfaces.C.Extensions.incomplete_class_def;
+
+   type ExecutionEngine_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_execution_Engine.ExecutionEngine;
+
+   type ExecutionEngine_view is access all
+     LLVM_execution_Engine.ExecutionEngine;
+
+
+end LLVM_execution_Engine;
diff --git a/bindings/ada/executionengine/llvm_executionengine_wrap.cxx b/bindings/ada/executionengine/llvm_executionengine_wrap.cxx
new file mode 100644
index 000000000000..b63acacb361f
--- /dev/null
+++ b/bindings/ada/executionengine/llvm_executionengine_wrap.cxx
@@ -0,0 +1,924 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_execution_Engine (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_execution_Engine(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+#include "llvm-c/ExecutionEngine.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport void SWIGSTDCALL Ada_LLVMLinkInJIT (
+  )
+{
+  LLVMLinkInJIT();
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMLinkInInterpreter (
+  )
+{
+  LLVMLinkInInterpreter();
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfInt (
+  void * jarg1
+  ,
+  
+  unsigned long long jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned long long arg2 ;
+  int arg3 ;
+  LLVMGenericValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (unsigned long long) jarg2; 
+  
+  
+  
+  arg3 = (int) jarg3; 
+  
+  
+  result = (LLVMGenericValueRef)LLVMCreateGenericValueOfInt(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfPointer (
+  void* jarg1
+  )
+{
+  void * jresult ;
+  void *arg1 = (void *) 0 ;
+  LLVMGenericValueRef result;
+  
+  arg1 = (void *)jarg1; 
+  
+  result = (LLVMGenericValueRef)LLVMCreateGenericValueOfPointer(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfFloat (
+  void * jarg1
+  ,
+  
+  double jarg2
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  double arg2 ;
+  LLVMGenericValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (double) jarg2; 
+  
+  
+  result = (LLVMGenericValueRef)LLVMCreateGenericValueOfFloat(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGenericValueIntWidth (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMGenericValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMGenericValueIntWidth(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned long long SWIGSTDCALL Ada_LLVMGenericValueToInt (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  unsigned long long jresult ;
+  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
+  int arg2 ;
+  unsigned long long result;
+  
+  arg1 = (LLVMGenericValueRef)jarg1; 
+  
+  
+  arg2 = (int) jarg2; 
+  
+  
+  result = (unsigned long long)LLVMGenericValueToInt(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void* SWIGSTDCALL Ada_LLVMGenericValueToPointer (
+  void * jarg1
+  )
+{
+  void* jresult ;
+  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
+  void *result = 0 ;
+  
+  arg1 = (LLVMGenericValueRef)jarg1; 
+  
+  result = (void *)LLVMGenericValueToPointer(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport double SWIGSTDCALL Ada_LLVMGenericValueToFloat (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  double jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMGenericValueRef arg2 = (LLVMGenericValueRef) 0 ;
+  double result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = (LLVMGenericValueRef)jarg2; 
+  
+  result = (double)LLVMGenericValueToFloat(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeGenericValue (
+  void * jarg1
+  )
+{
+  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
+  
+  arg1 = (LLVMGenericValueRef)jarg1; 
+  
+  LLVMDisposeGenericValue(arg1);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMCreateExecutionEngine (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ;
+  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
+  char **arg3 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMExecutionEngineRef *)jarg1; 
+  
+  arg2 = (LLVMModuleProviderRef)jarg2; 
+  
+  arg3 = (char **)jarg3; 
+  
+  result = (int)LLVMCreateExecutionEngine(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMCreateInterpreter (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ;
+  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
+  char **arg3 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMExecutionEngineRef *)jarg1; 
+  
+  arg2 = (LLVMModuleProviderRef)jarg2; 
+  
+  arg3 = (char **)jarg3; 
+  
+  result = (int)LLVMCreateInterpreter(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMCreateJITCompiler (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  ,
+  
+  void * jarg4
+  )
+{
+  int jresult ;
+  LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ;
+  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
+  unsigned int arg3 ;
+  char **arg4 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMExecutionEngineRef *)jarg1; 
+  
+  arg2 = (LLVMModuleProviderRef)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  arg4 = (char **)jarg4; 
+  
+  result = (int)LLVMCreateJITCompiler(arg1,arg2,arg3,arg4);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeExecutionEngine (
+  void * jarg1
+  )
+{
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  LLVMDisposeExecutionEngine(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMRunStaticConstructors (
+  void * jarg1
+  )
+{
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  LLVMRunStaticConstructors(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMRunStaticDestructors (
+  void * jarg1
+  )
+{
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  LLVMRunStaticDestructors(arg1);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMRunFunctionAsMain (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  ,
+  
+  void * jarg4
+  ,
+  
+  void * jarg5
+  )
+{
+  int jresult ;
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  unsigned int arg3 ;
+  char **arg4 = (char **) 0 ;
+  char **arg5 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  arg4 = (char **)jarg4; 
+  
+  arg5 = (char **)jarg5; 
+  
+  result = (int)LLVMRunFunctionAsMain(arg1,arg2,arg3,(char const *const *)arg4,(char const *const *)arg5);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMRunFunction (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  ,
+  
+  void * jarg4
+  )
+{
+  void * jresult ;
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  unsigned int arg3 ;
+  LLVMGenericValueRef *arg4 = (LLVMGenericValueRef *) 0 ;
+  LLVMGenericValueRef result;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  arg4 = (LLVMGenericValueRef *)jarg4; 
+  
+  result = (LLVMGenericValueRef)LLVMRunFunction(arg1,arg2,arg3,arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMFreeMachineCodeForFunction (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  LLVMFreeMachineCodeForFunction(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddModuleProvider (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMModuleProviderRef)jarg2; 
+  
+  LLVMAddModuleProvider(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMRemoveModuleProvider (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  )
+{
+  int jresult ;
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
+  LLVMModuleRef *arg3 = (LLVMModuleRef *) 0 ;
+  char **arg4 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMModuleProviderRef)jarg2; 
+  
+  arg3 = (LLVMModuleRef *)jarg3; 
+  
+  arg4 = (char **)jarg4; 
+  
+  result = (int)LLVMRemoveModuleProvider(arg1,arg2,arg3,arg4);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMFindFunction (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
+  int result;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  arg3 = (LLVMValueRef *)jarg3; 
+  
+  result = (int)LLVMFindFunction(arg1,(char const *)arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport LLVMTargetDataRef SWIGSTDCALL Ada_LLVMGetExecutionEngineTargetData (
+  void * jarg1
+  )
+{
+  LLVMTargetDataRef jresult ;
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMTargetDataRef result;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  result = LLVMGetExecutionEngineTargetData(arg1);
+  
+  jresult = result; 
+  //jresult = new LLVMTargetDataRef ((LLVMTargetDataRef &) result); 
+  
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddGlobalMapping (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void* jarg3
+  )
+{
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  void *arg3 = (void *) 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (void *)jarg3; 
+  
+  LLVMAddGlobalMapping(arg1,arg2,arg3);
+  
+  
+}
+
+
+
+DllExport void* SWIGSTDCALL Ada_LLVMGetPointerToGlobal (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void* jresult ;
+  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  void *result = 0 ;
+  
+  arg1 = (LLVMExecutionEngineRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (void *)LLVMGetPointerToGlobal(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/llvm.gpr b/bindings/ada/llvm.gpr
new file mode 100644
index 000000000000..8e87af4fa12e
--- /dev/null
+++ b/bindings/ada/llvm.gpr
@@ -0,0 +1,34 @@
+project LLVM is
+
+   for Languages use ("Ada", "C++");
+   for Source_Dirs use (".", "analysis", "bitreader", "bitwriter", "executionengine", "llvm", "target", "transforms");
+   for Object_Dir use "build";
+   for Exec_Dir use ".";
+   for Library_Name use "llvm_ada";
+   for Library_Dir use "lib";
+   for Library_Ali_Dir use "objects";
+
+   package Naming is
+      for Specification_Suffix ("c++") use ".h";
+      for Implementation_Suffix ("c++") use ".cxx";
+   end Naming;
+
+   package Builder is
+      for Default_Switches ("ada") use ("-g");
+   end Builder;
+
+   package Compiler is
+      for Default_Switches ("ada") use ("-gnato", "-fstack-check", "-g", "-gnata", "-gnat05", "-I/usr/local/include");
+      for Default_Switches ("c++") use ("-D__STDC_LIMIT_MACROS", "-D__STDC_CONSTANT_MACROS", "-I../../include", "-g");
+   end Compiler;
+
+   package Binder is
+      for Default_Switches ("ada") use ("-E");
+   end Binder;
+
+   package Linker is
+      for Default_Switches ("c++") use ("-g");
+   end Linker;
+
+end LLVM;
+
diff --git a/bindings/ada/llvm/llvm-binding.ads b/bindings/ada/llvm/llvm-binding.ads
new file mode 100644
index 000000000000..c0e48a1b5bf3
--- /dev/null
+++ b/bindings/ada/llvm/llvm-binding.ads
@@ -0,0 +1,1974 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C.Strings;
+
+
+package llvm.Binding is
+
+   procedure LLVMDisposeMessage
+     (Message : in Interfaces.C.Strings.chars_ptr);
+
+   function LLVMContextCreate return  llvm.LLVMContextRef;
+
+   function LLVMGetGlobalContext return  llvm.LLVMContextRef;
+
+   procedure LLVMContextDispose (C : in llvm.LLVMContextRef);
+
+   function LLVMModuleCreateWithName
+     (ModuleID : in Interfaces.C.Strings.chars_ptr)
+      return     llvm.LLVMModuleRef;
+
+   function LLVMModuleCreateWithNameInContext
+     (ModuleID : in Interfaces.C.Strings.chars_ptr;
+      C        : in llvm.LLVMContextRef)
+      return     llvm.LLVMModuleRef;
+
+   procedure LLVMDisposeModule (M : in llvm.LLVMModuleRef);
+
+   function LLVMGetDataLayout
+     (M    : in llvm.LLVMModuleRef)
+      return Interfaces.C.Strings.chars_ptr;
+
+   procedure LLVMSetDataLayout
+     (M      : in llvm.LLVMModuleRef;
+      Triple : in Interfaces.C.Strings.chars_ptr);
+
+   function LLVMGetTarget
+     (M    : in llvm.LLVMModuleRef)
+      return Interfaces.C.Strings.chars_ptr;
+
+   procedure LLVMSetTarget
+     (M      : in llvm.LLVMModuleRef;
+      Triple : in Interfaces.C.Strings.chars_ptr);
+
+   function LLVMAddTypeName
+     (M    : in llvm.LLVMModuleRef;
+      Name : in Interfaces.C.Strings.chars_ptr;
+      Ty   : in llvm.LLVMTypeRef)
+      return Interfaces.C.int;
+
+   procedure LLVMDeleteTypeName
+     (M    : in llvm.LLVMModuleRef;
+      Name : in Interfaces.C.Strings.chars_ptr);
+
+   function LLVMGetTypeByName
+     (M    : in llvm.LLVMModuleRef;
+      Name : in Interfaces.C.Strings.chars_ptr)
+      return llvm.LLVMTypeRef;
+
+   procedure LLVMDumpModule (M : in llvm.LLVMModuleRef);
+
+   function LLVMGetTypeKind
+     (Ty   : in llvm.LLVMTypeRef)
+      return llvm.LLVMTypeKind;
+
+   function LLVMInt1Type return  llvm.LLVMTypeRef;
+
+   function LLVMInt8Type return  llvm.LLVMTypeRef;
+
+   function LLVMInt16Type return  llvm.LLVMTypeRef;
+
+   function LLVMInt32Type return  llvm.LLVMTypeRef;
+
+   function LLVMInt64Type return  llvm.LLVMTypeRef;
+
+   function LLVMIntType
+     (NumBits : in Interfaces.C.unsigned)
+      return    llvm.LLVMTypeRef;
+
+   function LLVMGetIntTypeWidth
+     (IntegerTy : in llvm.LLVMTypeRef)
+      return      Interfaces.C.unsigned;
+
+   function LLVMFloatType return  llvm.LLVMTypeRef;
+
+   function LLVMDoubleType return  llvm.LLVMTypeRef;
+
+   function LLVMX86FP80Type return  llvm.LLVMTypeRef;
+
+   function LLVMFP128Type return  llvm.LLVMTypeRef;
+
+   function LLVMPPCFP128Type return  llvm.LLVMTypeRef;
+
+   function LLVMFunctionType
+     (ReturnType : in llvm.LLVMTypeRef;
+      ParamTypes : access llvm.LLVMTypeRef;
+      ParamCount : in Interfaces.C.unsigned;
+      IsVarArg   : in Interfaces.C.int)
+      return       llvm.LLVMTypeRef;
+
+   function LLVMIsFunctionVarArg
+     (FunctionTy : in llvm.LLVMTypeRef)
+      return       Interfaces.C.int;
+
+   function LLVMGetReturnType
+     (FunctionTy : in llvm.LLVMTypeRef)
+      return       llvm.LLVMTypeRef;
+
+   function LLVMCountParamTypes
+     (FunctionTy : in llvm.LLVMTypeRef)
+      return       Interfaces.C.unsigned;
+
+   procedure LLVMGetParamTypes
+     (FunctionTy : in llvm.LLVMTypeRef;
+      Dest       : access llvm.LLVMTypeRef);
+
+   function LLVMStructType
+     (ElementTypes : access llvm.LLVMTypeRef;
+      ElementCount : in Interfaces.C.unsigned;
+      Packed       : in Interfaces.C.int)
+      return         llvm.LLVMTypeRef;
+
+   function LLVMCountStructElementTypes
+     (StructTy : in llvm.LLVMTypeRef)
+      return     Interfaces.C.unsigned;
+
+   procedure LLVMGetStructElementTypes
+     (StructTy : in llvm.LLVMTypeRef;
+      Dest     : access llvm.LLVMTypeRef);
+
+   function LLVMIsPackedStruct
+     (StructTy : in llvm.LLVMTypeRef)
+      return     Interfaces.C.int;
+
+   function LLVMArrayType
+     (ElementType  : in llvm.LLVMTypeRef;
+      ElementCount : in Interfaces.C.unsigned)
+      return         llvm.LLVMTypeRef;
+
+   function LLVMPointerType
+     (ElementType  : in llvm.LLVMTypeRef;
+      AddressSpace : in Interfaces.C.unsigned)
+      return         llvm.LLVMTypeRef;
+
+   function LLVMVectorType
+     (ElementType  : in llvm.LLVMTypeRef;
+      ElementCount : in Interfaces.C.unsigned)
+      return         llvm.LLVMTypeRef;
+
+   function LLVMGetElementType
+     (Ty   : in llvm.LLVMTypeRef)
+      return llvm.LLVMTypeRef;
+
+   function LLVMGetArrayLength
+     (ArrayTy : in llvm.LLVMTypeRef)
+      return    Interfaces.C.unsigned;
+
+   function LLVMGetPointerAddressSpace
+     (PointerTy : in llvm.LLVMTypeRef)
+      return      Interfaces.C.unsigned;
+
+   function LLVMGetVectorSize
+     (VectorTy : in llvm.LLVMTypeRef)
+      return     Interfaces.C.unsigned;
+
+   function LLVMVoidType return  llvm.LLVMTypeRef;
+
+   function LLVMLabelType return  llvm.LLVMTypeRef;
+
+   function LLVMOpaqueType return  llvm.LLVMTypeRef;
+
+   function LLVMCreateTypeHandle
+     (PotentiallyAbstractTy : in llvm.LLVMTypeRef)
+      return                  llvm.LLVMTypeHandleRef;
+
+   procedure LLVMRefineType
+     (AbstractTy : in llvm.LLVMTypeRef;
+      ConcreteTy : in llvm.LLVMTypeRef);
+
+   function LLVMResolveTypeHandle
+     (TypeHandle : in llvm.LLVMTypeHandleRef)
+      return       llvm.LLVMTypeRef;
+
+   procedure LLVMDisposeTypeHandle (TypeHandle : in llvm.LLVMTypeHandleRef);
+
+   function LLVMTypeOf (Val : in llvm.LLVMValueRef) return llvm.LLVMTypeRef;
+
+   function LLVMGetValueName
+     (Val  : in llvm.LLVMValueRef)
+      return Interfaces.C.Strings.chars_ptr;
+
+   procedure LLVMSetValueName
+     (Val  : in llvm.LLVMValueRef;
+      Name : in Interfaces.C.Strings.chars_ptr);
+
+   procedure LLVMDumpValue (Val : in llvm.LLVMValueRef);
+
+   function LLVMIsAArgument
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsABasicBlock
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAInlineAsm
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAUser
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstant
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantAggregateZero
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantArray
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantExpr
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantFP
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantInt
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantPointerNull
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantStruct
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAConstantVector
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAGlobalValue
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFunction
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAGlobalAlias
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAGlobalVariable
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAUndefValue
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAInstruction
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsABinaryOperator
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsACallInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAIntrinsicInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsADbgInfoIntrinsic
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsADbgDeclareInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsADbgFuncStartInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsADbgRegionEndInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsADbgRegionStartInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsADbgStopPointInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAEHSelectorInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAMemIntrinsic
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAMemCpyInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAMemMoveInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAMemSetInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsACmpInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFCmpInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAICmpInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAExtractElementInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAGetElementPtrInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAInsertElementInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAInsertValueInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAPHINode
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsASelectInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAShuffleVectorInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAStoreInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsATerminatorInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsABranchInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAInvokeInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAReturnInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsASwitchInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAUnreachableInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAUnwindInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAUnaryInstruction
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAAllocationInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAAllocaInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAMallocInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsACastInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsABitCastInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFPExtInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFPToSIInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFPToUIInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFPTruncInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAIntToPtrInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAPtrToIntInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsASExtInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsASIToFPInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsATruncInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAUIToFPInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAZExtInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAExtractValueInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAFreeInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsALoadInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsAVAArgInst
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMConstNull
+     (Ty   : in llvm.LLVMTypeRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMConstAllOnes
+     (Ty   : in llvm.LLVMTypeRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetUndef
+     (Ty   : in llvm.LLVMTypeRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMIsConstant
+     (Val  : in llvm.LLVMValueRef)
+      return Interfaces.C.int;
+
+   function LLVMIsNull (Val : in llvm.LLVMValueRef) return Interfaces.C.int;
+
+   function LLVMIsUndef
+     (Val  : in llvm.LLVMValueRef)
+      return Interfaces.C.int;
+
+   function LLVMConstPointerNull
+     (Ty   : in llvm.LLVMTypeRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMConstInt
+     (IntTy      : in llvm.LLVMTypeRef;
+      N          : in Interfaces.C.Extensions.unsigned_long_long;
+      SignExtend : in Interfaces.C.int)
+      return       llvm.LLVMValueRef;
+
+   function LLVMConstReal
+     (RealTy : in llvm.LLVMTypeRef;
+      N      : in Interfaces.C.double)
+      return   llvm.LLVMValueRef;
+
+   function LLVMConstRealOfString
+     (RealTy : in llvm.LLVMTypeRef;
+      Text   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMConstString
+     (Str               : in Interfaces.C.Strings.chars_ptr;
+      Length            : in Interfaces.C.unsigned;
+      DontNullTerminate : in Interfaces.C.int)
+      return              llvm.LLVMValueRef;
+
+   function LLVMConstArray
+     (ElementTy    : in llvm.LLVMTypeRef;
+      ConstantVals : access llvm.LLVMValueRef;
+      Length       : in Interfaces.C.unsigned)
+      return         llvm.LLVMValueRef;
+
+   function LLVMConstStruct
+     (ConstantVals : access llvm.LLVMValueRef;
+      Count        : in Interfaces.C.unsigned;
+      packed       : in Interfaces.C.int)
+      return         llvm.LLVMValueRef;
+
+   function LLVMConstVector
+     (ScalarConstantVals : access llvm.LLVMValueRef;
+      Size               : in Interfaces.C.unsigned)
+      return               llvm.LLVMValueRef;
+
+   function LLVMSizeOf (Ty : in llvm.LLVMTypeRef) return llvm.LLVMValueRef;
+
+   function LLVMConstNeg
+     (ConstantVal : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstNot
+     (ConstantVal : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstAdd
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstSub
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstMul
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstUDiv
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstSDiv
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFDiv
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstURem
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstSRem
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFRem
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstAnd
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstOr
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstXor
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstICmp
+     (Predicate   : in llvm.LLVMIntPredicate;
+      LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFCmp
+     (Predicate   : in llvm.LLVMRealPredicate;
+      LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstShl
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstLShr
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstAShr
+     (LHSConstant : in llvm.LLVMValueRef;
+      RHSConstant : in llvm.LLVMValueRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstGEP
+     (ConstantVal     : in llvm.LLVMValueRef;
+      ConstantIndices : access llvm.LLVMValueRef;
+      NumIndices      : in Interfaces.C.unsigned)
+      return            llvm.LLVMValueRef;
+
+   function LLVMConstTrunc
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstSExt
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstZExt
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFPTrunc
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFPExt
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstUIToFP
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstSIToFP
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFPToUI
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstFPToSI
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstPtrToInt
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstIntToPtr
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstBitCast
+     (ConstantVal : in llvm.LLVMValueRef;
+      ToType      : in llvm.LLVMTypeRef)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstSelect
+     (ConstantCondition : in llvm.LLVMValueRef;
+      ConstantIfTrue    : in llvm.LLVMValueRef;
+      ConstantIfFalse   : in llvm.LLVMValueRef)
+      return              llvm.LLVMValueRef;
+
+   function LLVMConstExtractElement
+     (VectorConstant : in llvm.LLVMValueRef;
+      IndexConstant  : in llvm.LLVMValueRef)
+      return           llvm.LLVMValueRef;
+
+   function LLVMConstInsertElement
+     (VectorConstant       : in llvm.LLVMValueRef;
+      ElementValueConstant : in llvm.LLVMValueRef;
+      IndexConstant        : in llvm.LLVMValueRef)
+      return                 llvm.LLVMValueRef;
+
+   function LLVMConstShuffleVector
+     (VectorAConstant : in llvm.LLVMValueRef;
+      VectorBConstant : in llvm.LLVMValueRef;
+      MaskConstant    : in llvm.LLVMValueRef)
+      return            llvm.LLVMValueRef;
+
+   function LLVMConstExtractValue
+     (AggConstant : in llvm.LLVMValueRef;
+      IdxList     : access Interfaces.C.unsigned;
+      NumIdx      : in Interfaces.C.unsigned)
+      return        llvm.LLVMValueRef;
+
+   function LLVMConstInsertValue
+     (AggConstant          : in llvm.LLVMValueRef;
+      ElementValueConstant : in llvm.LLVMValueRef;
+      IdxList              : access Interfaces.C.unsigned;
+      NumIdx               : in Interfaces.C.unsigned)
+      return                 llvm.LLVMValueRef;
+
+   function LLVMConstInlineAsm
+     (Ty             : in llvm.LLVMTypeRef;
+      AsmString      : in Interfaces.C.Strings.chars_ptr;
+      Constraints    : in Interfaces.C.Strings.chars_ptr;
+      HasSideEffects : in Interfaces.C.int)
+      return           llvm.LLVMValueRef;
+
+   function LLVMGetGlobalParent
+     (Global : in llvm.LLVMValueRef)
+      return   llvm.LLVMModuleRef;
+
+   function LLVMIsDeclaration
+     (Global : in llvm.LLVMValueRef)
+      return   Interfaces.C.int;
+
+   function LLVMGetLinkage
+     (Global : in llvm.LLVMValueRef)
+      return   llvm.LLVMLinkage;
+
+   procedure LLVMSetLinkage
+     (Global  : in llvm.LLVMValueRef;
+      Linkage : in llvm.LLVMLinkage);
+
+   function LLVMGetSection
+     (Global : in llvm.LLVMValueRef)
+      return   Interfaces.C.Strings.chars_ptr;
+
+   procedure LLVMSetSection
+     (Global  : in llvm.LLVMValueRef;
+      Section : in Interfaces.C.Strings.chars_ptr);
+
+   function LLVMGetVisibility
+     (Global : in llvm.LLVMValueRef)
+      return   llvm.LLVMVisibility;
+
+   procedure LLVMSetVisibility
+     (Global : in llvm.LLVMValueRef;
+      Viz    : in llvm.LLVMVisibility);
+
+   function LLVMGetAlignment
+     (Global : in llvm.LLVMValueRef)
+      return   Interfaces.C.unsigned;
+
+   procedure LLVMSetAlignment
+     (Global : in llvm.LLVMValueRef;
+      Bytes  : in Interfaces.C.unsigned);
+
+   function LLVMAddGlobal
+     (M    : in llvm.LLVMModuleRef;
+      Ty   : in llvm.LLVMTypeRef;
+      Name : in Interfaces.C.Strings.chars_ptr)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetNamedGlobal
+     (M    : in llvm.LLVMModuleRef;
+      Name : in Interfaces.C.Strings.chars_ptr)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetFirstGlobal
+     (M    : in llvm.LLVMModuleRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetLastGlobal
+     (M    : in llvm.LLVMModuleRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetNextGlobal
+     (GlobalVar : in llvm.LLVMValueRef)
+      return      llvm.LLVMValueRef;
+
+   function LLVMGetPreviousGlobal
+     (GlobalVar : in llvm.LLVMValueRef)
+      return      llvm.LLVMValueRef;
+
+   procedure LLVMDeleteGlobal (GlobalVar : in llvm.LLVMValueRef);
+
+   function LLVMGetInitializer
+     (GlobalVar : in llvm.LLVMValueRef)
+      return      llvm.LLVMValueRef;
+
+   procedure LLVMSetInitializer
+     (GlobalVar   : in llvm.LLVMValueRef;
+      ConstantVal : in llvm.LLVMValueRef);
+
+   function LLVMIsThreadLocal
+     (GlobalVar : in llvm.LLVMValueRef)
+      return      Interfaces.C.int;
+
+   procedure LLVMSetThreadLocal
+     (GlobalVar     : in llvm.LLVMValueRef;
+      IsThreadLocal : in Interfaces.C.int);
+
+   function LLVMIsGlobalConstant
+     (GlobalVar : in llvm.LLVMValueRef)
+      return      Interfaces.C.int;
+
+   procedure LLVMSetGlobalConstant
+     (GlobalVar  : in llvm.LLVMValueRef;
+      IsConstant : in Interfaces.C.int);
+
+   function LLVMAddAlias
+     (M       : in llvm.LLVMModuleRef;
+      Ty      : in llvm.LLVMTypeRef;
+      Aliasee : in llvm.LLVMValueRef;
+      Name    : in Interfaces.C.Strings.chars_ptr)
+      return    llvm.LLVMValueRef;
+
+   function LLVMAddFunction
+     (M          : in llvm.LLVMModuleRef;
+      Name       : in Interfaces.C.Strings.chars_ptr;
+      FunctionTy : in llvm.LLVMTypeRef)
+      return       llvm.LLVMValueRef;
+
+   function LLVMGetNamedFunction
+     (M    : in llvm.LLVMModuleRef;
+      Name : in Interfaces.C.Strings.chars_ptr)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetFirstFunction
+     (M    : in llvm.LLVMModuleRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetLastFunction
+     (M    : in llvm.LLVMModuleRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetNextFunction
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetPreviousFunction
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   procedure LLVMDeleteFunction (Fn : in llvm.LLVMValueRef);
+
+   function LLVMGetIntrinsicID
+     (Fn   : in llvm.LLVMValueRef)
+      return Interfaces.C.unsigned;
+
+   function LLVMGetFunctionCallConv
+     (Fn   : in llvm.LLVMValueRef)
+      return Interfaces.C.unsigned;
+
+   procedure LLVMSetFunctionCallConv
+     (Fn : in llvm.LLVMValueRef;
+      CC : in Interfaces.C.unsigned);
+
+   function LLVMGetGC
+     (Fn   : in llvm.LLVMValueRef)
+      return Interfaces.C.Strings.chars_ptr;
+
+   procedure LLVMSetGC
+     (Fn   : in llvm.LLVMValueRef;
+      Name : in Interfaces.C.Strings.chars_ptr);
+
+   procedure LLVMAddFunctionAttr
+     (Fn : in llvm.LLVMValueRef;
+      PA : in llvm.LLVMAttribute);
+
+   procedure LLVMRemoveFunctionAttr
+     (Fn : in llvm.LLVMValueRef;
+      PA : in llvm.LLVMAttribute);
+
+   function LLVMCountParams
+     (Fn   : in llvm.LLVMValueRef)
+      return Interfaces.C.unsigned;
+
+   procedure LLVMGetParams
+     (Fn     : in llvm.LLVMValueRef;
+      Params : access llvm.LLVMValueRef);
+
+   function LLVMGetParam
+     (Fn    : in llvm.LLVMValueRef;
+      Index : in Interfaces.C.unsigned)
+      return  llvm.LLVMValueRef;
+
+   function LLVMGetParamParent
+     (Inst : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetFirstParam
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetLastParam
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetNextParam
+     (Arg  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetPreviousParam
+     (Arg  : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   procedure LLVMAddAttribute
+     (Arg : in llvm.LLVMValueRef;
+      PA  : in llvm.LLVMAttribute);
+
+   procedure LLVMRemoveAttribute
+     (Arg : in llvm.LLVMValueRef;
+      PA  : in llvm.LLVMAttribute);
+
+   procedure LLVMSetParamAlignment
+     (Arg   : in llvm.LLVMValueRef;
+      align : in Interfaces.C.unsigned);
+
+   function LLVMBasicBlockAsValue
+     (BB   : in llvm.LLVMBasicBlockRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMValueIsBasicBlock
+     (Val  : in llvm.LLVMValueRef)
+      return Interfaces.C.int;
+
+   function LLVMValueAsBasicBlock
+     (Val  : in llvm.LLVMValueRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMGetBasicBlockParent
+     (BB   : in llvm.LLVMBasicBlockRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMCountBasicBlocks
+     (Fn   : in llvm.LLVMValueRef)
+      return Interfaces.C.unsigned;
+
+   procedure LLVMGetBasicBlocks
+     (Fn          : in llvm.LLVMValueRef;
+      BasicBlocks : access llvm.LLVMBasicBlockRef);
+
+   function LLVMGetFirstBasicBlock
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMGetLastBasicBlock
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMGetNextBasicBlock
+     (BB   : in llvm.LLVMBasicBlockRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMGetPreviousBasicBlock
+     (BB   : in llvm.LLVMBasicBlockRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMGetEntryBasicBlock
+     (Fn   : in llvm.LLVMValueRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMAppendBasicBlock
+     (Fn   : in llvm.LLVMValueRef;
+      Name : in Interfaces.C.Strings.chars_ptr)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMInsertBasicBlock
+     (InsertBeforeBB : in llvm.LLVMBasicBlockRef;
+      Name           : in Interfaces.C.Strings.chars_ptr)
+      return           llvm.LLVMBasicBlockRef;
+
+   procedure LLVMDeleteBasicBlock (BB : in llvm.LLVMBasicBlockRef);
+
+   function LLVMGetInstructionParent
+     (Inst : in llvm.LLVMValueRef)
+      return llvm.LLVMBasicBlockRef;
+
+   function LLVMGetFirstInstruction
+     (BB   : in llvm.LLVMBasicBlockRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetLastInstruction
+     (BB   : in llvm.LLVMBasicBlockRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetNextInstruction
+     (Inst : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   function LLVMGetPreviousInstruction
+     (Inst : in llvm.LLVMValueRef)
+      return llvm.LLVMValueRef;
+
+   procedure LLVMSetInstructionCallConv
+     (Instr : in llvm.LLVMValueRef;
+      CC    : in Interfaces.C.unsigned);
+
+   function LLVMGetInstructionCallConv
+     (Instr : in llvm.LLVMValueRef)
+      return  Interfaces.C.unsigned;
+
+   procedure LLVMAddInstrAttribute
+     (Instr : in llvm.LLVMValueRef;
+      index : in Interfaces.C.unsigned;
+      arg_1 : in llvm.LLVMAttribute);
+
+   procedure LLVMRemoveInstrAttribute
+     (Instr : in llvm.LLVMValueRef;
+      index : in Interfaces.C.unsigned;
+      arg_1 : in llvm.LLVMAttribute);
+
+   procedure LLVMSetInstrParamAlignment
+     (Instr : in llvm.LLVMValueRef;
+      index : in Interfaces.C.unsigned;
+      align : in Interfaces.C.unsigned);
+
+   function LLVMIsTailCall
+     (CallInst : in llvm.LLVMValueRef)
+      return     Interfaces.C.int;
+
+   procedure LLVMSetTailCall
+     (CallInst   : in llvm.LLVMValueRef;
+      IsTailCall : in Interfaces.C.int);
+
+   procedure LLVMAddIncoming
+     (PhiNode        : in llvm.LLVMValueRef;
+      IncomingValues : access llvm.LLVMValueRef;
+      IncomingBlocks : access llvm.LLVMBasicBlockRef;
+      Count          : in Interfaces.C.unsigned);
+
+   function LLVMCountIncoming
+     (PhiNode : in llvm.LLVMValueRef)
+      return    Interfaces.C.unsigned;
+
+   function LLVMGetIncomingValue
+     (PhiNode : in llvm.LLVMValueRef;
+      Index   : in Interfaces.C.unsigned)
+      return    llvm.LLVMValueRef;
+
+   function LLVMGetIncomingBlock
+     (PhiNode : in llvm.LLVMValueRef;
+      Index   : in Interfaces.C.unsigned)
+      return    llvm.LLVMBasicBlockRef;
+
+   function LLVMCreateBuilder return  llvm.LLVMBuilderRef;
+
+   procedure LLVMPositionBuilder
+     (Builder : in llvm.LLVMBuilderRef;
+      Block   : in llvm.LLVMBasicBlockRef;
+      Instr   : in llvm.LLVMValueRef);
+
+   procedure LLVMPositionBuilderBefore
+     (Builder : in llvm.LLVMBuilderRef;
+      Instr   : in llvm.LLVMValueRef);
+
+   procedure LLVMPositionBuilderAtEnd
+     (Builder : in llvm.LLVMBuilderRef;
+      Block   : in llvm.LLVMBasicBlockRef);
+
+   function LLVMGetInsertBlock
+     (Builder : in llvm.LLVMBuilderRef)
+      return    llvm.LLVMBasicBlockRef;
+
+   procedure LLVMClearInsertionPosition (Builder : in llvm.LLVMBuilderRef);
+
+   procedure LLVMInsertIntoBuilder
+     (Builder : in llvm.LLVMBuilderRef;
+      Instr   : in llvm.LLVMValueRef);
+
+   procedure LLVMDisposeBuilder (Builder : in llvm.LLVMBuilderRef);
+
+   function LLVMBuildRetVoid
+     (arg_1 : in llvm.LLVMBuilderRef)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildRet
+     (arg_1 : in llvm.LLVMBuilderRef;
+      V     : in llvm.LLVMValueRef)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildBr
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Dest  : in llvm.LLVMBasicBlockRef)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildCondBr
+     (arg_1    : in llvm.LLVMBuilderRef;
+      the_If   : in llvm.LLVMValueRef;
+      the_Then : in llvm.LLVMBasicBlockRef;
+      the_Else : in llvm.LLVMBasicBlockRef)
+      return     llvm.LLVMValueRef;
+
+   function LLVMBuildSwitch
+     (arg_1    : in llvm.LLVMBuilderRef;
+      V        : in llvm.LLVMValueRef;
+      the_Else : in llvm.LLVMBasicBlockRef;
+      NumCases : in Interfaces.C.unsigned)
+      return     llvm.LLVMValueRef;
+
+   function LLVMBuildInvoke
+     (arg_1    : in llvm.LLVMBuilderRef;
+      Fn       : in llvm.LLVMValueRef;
+      Args     : access llvm.LLVMValueRef;
+      NumArgs  : in Interfaces.C.unsigned;
+      the_Then : in llvm.LLVMBasicBlockRef;
+      Catch    : in llvm.LLVMBasicBlockRef;
+      Name     : in Interfaces.C.Strings.chars_ptr)
+      return     llvm.LLVMValueRef;
+
+   function LLVMBuildUnwind
+     (arg_1 : in llvm.LLVMBuilderRef)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildUnreachable
+     (arg_1 : in llvm.LLVMBuilderRef)
+      return  llvm.LLVMValueRef;
+
+   procedure LLVMAddCase
+     (Switch : in llvm.LLVMValueRef;
+      OnVal  : in llvm.LLVMValueRef;
+      Dest   : in llvm.LLVMBasicBlockRef);
+
+   function LLVMBuildAdd
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildSub
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildMul
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildUDiv
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildSDiv
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildFDiv
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildURem
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildSRem
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildFRem
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildShl
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildLShr
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildAShr
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildAnd
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildOr
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildXor
+     (arg_1 : in llvm.LLVMBuilderRef;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildNeg
+     (arg_1 : in llvm.LLVMBuilderRef;
+      V     : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildNot
+     (arg_1 : in llvm.LLVMBuilderRef;
+      V     : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildMalloc
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Ty    : in llvm.LLVMTypeRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildArrayMalloc
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Ty    : in llvm.LLVMTypeRef;
+      Val   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildAlloca
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Ty    : in llvm.LLVMTypeRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildArrayAlloca
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Ty    : in llvm.LLVMTypeRef;
+      Val   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildFree
+     (arg_1      : in llvm.LLVMBuilderRef;
+      PointerVal : in llvm.LLVMValueRef)
+      return       llvm.LLVMValueRef;
+
+   function LLVMBuildLoad
+     (arg_1      : in llvm.LLVMBuilderRef;
+      PointerVal : in llvm.LLVMValueRef;
+      Name       : in Interfaces.C.Strings.chars_ptr)
+      return       llvm.LLVMValueRef;
+
+   function LLVMBuildStore
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Val   : in llvm.LLVMValueRef;
+      Ptr   : in llvm.LLVMValueRef)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildGEP
+     (B          : in llvm.LLVMBuilderRef;
+      Pointer    : in llvm.LLVMValueRef;
+      Indices    : access llvm.LLVMValueRef;
+      NumIndices : in Interfaces.C.unsigned;
+      Name       : in Interfaces.C.Strings.chars_ptr)
+      return       llvm.LLVMValueRef;
+
+   function LLVMBuildTrunc
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildZExt
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildSExt
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildFPToUI
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildFPToSI
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildUIToFP
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildSIToFP
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildFPTrunc
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildFPExt
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildPtrToInt
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildIntToPtr
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildBitCast
+     (arg_1  : in llvm.LLVMBuilderRef;
+      Val    : in llvm.LLVMValueRef;
+      DestTy : in llvm.LLVMTypeRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildICmp
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Op    : in llvm.LLVMIntPredicate;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildFCmp
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Op    : in llvm.LLVMRealPredicate;
+      LHS   : in llvm.LLVMValueRef;
+      RHS   : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildPhi
+     (arg_1 : in llvm.LLVMBuilderRef;
+      Ty    : in llvm.LLVMTypeRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildCall
+     (arg_1   : in llvm.LLVMBuilderRef;
+      Fn      : in llvm.LLVMValueRef;
+      Args    : access llvm.LLVMValueRef;
+      NumArgs : in Interfaces.C.unsigned;
+      Name    : in Interfaces.C.Strings.chars_ptr)
+      return    llvm.LLVMValueRef;
+
+   function LLVMBuildSelect
+     (arg_1    : in llvm.LLVMBuilderRef;
+      the_If   : in llvm.LLVMValueRef;
+      the_Then : in llvm.LLVMValueRef;
+      the_Else : in llvm.LLVMValueRef;
+      Name     : in Interfaces.C.Strings.chars_ptr)
+      return     llvm.LLVMValueRef;
+
+   function LLVMBuildVAArg
+     (arg_1 : in llvm.LLVMBuilderRef;
+      List  : in llvm.LLVMValueRef;
+      Ty    : in llvm.LLVMTypeRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildExtractElement
+     (arg_1  : in llvm.LLVMBuilderRef;
+      VecVal : in llvm.LLVMValueRef;
+      Index  : in llvm.LLVMValueRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildInsertElement
+     (arg_1  : in llvm.LLVMBuilderRef;
+      VecVal : in llvm.LLVMValueRef;
+      EltVal : in llvm.LLVMValueRef;
+      Index  : in llvm.LLVMValueRef;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildShuffleVector
+     (arg_1 : in llvm.LLVMBuilderRef;
+      V1    : in llvm.LLVMValueRef;
+      V2    : in llvm.LLVMValueRef;
+      Mask  : in llvm.LLVMValueRef;
+      Name  : in Interfaces.C.Strings.chars_ptr)
+      return  llvm.LLVMValueRef;
+
+   function LLVMBuildExtractValue
+     (arg_1  : in llvm.LLVMBuilderRef;
+      AggVal : in llvm.LLVMValueRef;
+      Index  : in Interfaces.C.unsigned;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMBuildInsertValue
+     (arg_1  : in llvm.LLVMBuilderRef;
+      AggVal : in llvm.LLVMValueRef;
+      EltVal : in llvm.LLVMValueRef;
+      Index  : in Interfaces.C.unsigned;
+      Name   : in Interfaces.C.Strings.chars_ptr)
+      return   llvm.LLVMValueRef;
+
+   function LLVMCreateModuleProviderForExistingModule
+     (M    : in llvm.LLVMModuleRef)
+      return llvm.LLVMModuleProviderRef;
+
+   procedure LLVMDisposeModuleProvider (MP : in llvm.LLVMModuleProviderRef);
+
+   function LLVMCreateMemoryBufferWithContentsOfFile
+     (Path       : in Interfaces.C.Strings.chars_ptr;
+      OutMemBuf  : access llvm.LLVMMemoryBufferRef;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+   function LLVMCreateMemoryBufferWithSTDIN
+     (OutMemBuf  : access llvm.LLVMMemoryBufferRef;
+      OutMessage : access Interfaces.C.Strings.chars_ptr)
+      return       Interfaces.C.int;
+
+   procedure LLVMDisposeMemoryBuffer (MemBuf : in llvm.LLVMMemoryBufferRef);
+
+   function LLVMCreatePassManager return  llvm.LLVMPassManagerRef;
+
+   function LLVMCreateFunctionPassManager
+     (MP   : in llvm.LLVMModuleProviderRef)
+      return llvm.LLVMPassManagerRef;
+
+   function LLVMRunPassManager
+     (PM   : in llvm.LLVMPassManagerRef;
+      M    : in llvm.LLVMModuleRef)
+      return Interfaces.C.int;
+
+   function LLVMInitializeFunctionPassManager
+     (FPM  : in llvm.LLVMPassManagerRef)
+      return Interfaces.C.int;
+
+   function LLVMRunFunctionPassManager
+     (FPM  : in llvm.LLVMPassManagerRef;
+      F    : in llvm.LLVMValueRef)
+      return Interfaces.C.int;
+
+   function LLVMFinalizeFunctionPassManager
+     (FPM  : in llvm.LLVMPassManagerRef)
+      return Interfaces.C.int;
+
+   procedure LLVMDisposePassManager (PM : in llvm.LLVMPassManagerRef);
+
+private
+
+   pragma Import (C, LLVMDisposeMessage, "Ada_LLVMDisposeMessage");
+   pragma Import (C, LLVMContextCreate, "Ada_LLVMContextCreate");
+   pragma Import (C, LLVMGetGlobalContext, "Ada_LLVMGetGlobalContext");
+   pragma Import (C, LLVMContextDispose, "Ada_LLVMContextDispose");
+   pragma Import
+     (C,
+      LLVMModuleCreateWithName,
+      "Ada_LLVMModuleCreateWithName");
+   pragma Import
+     (C,
+      LLVMModuleCreateWithNameInContext,
+      "Ada_LLVMModuleCreateWithNameInContext");
+   pragma Import (C, LLVMDisposeModule, "Ada_LLVMDisposeModule");
+   pragma Import (C, LLVMGetDataLayout, "Ada_LLVMGetDataLayout");
+   pragma Import (C, LLVMSetDataLayout, "Ada_LLVMSetDataLayout");
+   pragma Import (C, LLVMGetTarget, "Ada_LLVMGetTarget");
+   pragma Import (C, LLVMSetTarget, "Ada_LLVMSetTarget");
+   pragma Import (C, LLVMAddTypeName, "Ada_LLVMAddTypeName");
+   pragma Import (C, LLVMDeleteTypeName, "Ada_LLVMDeleteTypeName");
+   pragma Import (C, LLVMGetTypeByName, "Ada_LLVMGetTypeByName");
+   pragma Import (C, LLVMDumpModule, "Ada_LLVMDumpModule");
+   pragma Import (C, LLVMGetTypeKind, "Ada_LLVMGetTypeKind");
+   pragma Import (C, LLVMInt1Type, "Ada_LLVMInt1Type");
+   pragma Import (C, LLVMInt8Type, "Ada_LLVMInt8Type");
+   pragma Import (C, LLVMInt16Type, "Ada_LLVMInt16Type");
+   pragma Import (C, LLVMInt32Type, "Ada_LLVMInt32Type");
+   pragma Import (C, LLVMInt64Type, "Ada_LLVMInt64Type");
+   pragma Import (C, LLVMIntType, "Ada_LLVMIntType");
+   pragma Import (C, LLVMGetIntTypeWidth, "Ada_LLVMGetIntTypeWidth");
+   pragma Import (C, LLVMFloatType, "Ada_LLVMFloatType");
+   pragma Import (C, LLVMDoubleType, "Ada_LLVMDoubleType");
+   pragma Import (C, LLVMX86FP80Type, "Ada_LLVMX86FP80Type");
+   pragma Import (C, LLVMFP128Type, "Ada_LLVMFP128Type");
+   pragma Import (C, LLVMPPCFP128Type, "Ada_LLVMPPCFP128Type");
+   pragma Import (C, LLVMFunctionType, "Ada_LLVMFunctionType");
+   pragma Import (C, LLVMIsFunctionVarArg, "Ada_LLVMIsFunctionVarArg");
+   pragma Import (C, LLVMGetReturnType, "Ada_LLVMGetReturnType");
+   pragma Import (C, LLVMCountParamTypes, "Ada_LLVMCountParamTypes");
+   pragma Import (C, LLVMGetParamTypes, "Ada_LLVMGetParamTypes");
+   pragma Import (C, LLVMStructType, "Ada_LLVMStructType");
+   pragma Import
+     (C,
+      LLVMCountStructElementTypes,
+      "Ada_LLVMCountStructElementTypes");
+   pragma Import
+     (C,
+      LLVMGetStructElementTypes,
+      "Ada_LLVMGetStructElementTypes");
+   pragma Import (C, LLVMIsPackedStruct, "Ada_LLVMIsPackedStruct");
+   pragma Import (C, LLVMArrayType, "Ada_LLVMArrayType");
+   pragma Import (C, LLVMPointerType, "Ada_LLVMPointerType");
+   pragma Import (C, LLVMVectorType, "Ada_LLVMVectorType");
+   pragma Import (C, LLVMGetElementType, "Ada_LLVMGetElementType");
+   pragma Import (C, LLVMGetArrayLength, "Ada_LLVMGetArrayLength");
+   pragma Import
+     (C,
+      LLVMGetPointerAddressSpace,
+      "Ada_LLVMGetPointerAddressSpace");
+   pragma Import (C, LLVMGetVectorSize, "Ada_LLVMGetVectorSize");
+   pragma Import (C, LLVMVoidType, "Ada_LLVMVoidType");
+   pragma Import (C, LLVMLabelType, "Ada_LLVMLabelType");
+   pragma Import (C, LLVMOpaqueType, "Ada_LLVMOpaqueType");
+   pragma Import (C, LLVMCreateTypeHandle, "Ada_LLVMCreateTypeHandle");
+   pragma Import (C, LLVMRefineType, "Ada_LLVMRefineType");
+   pragma Import (C, LLVMResolveTypeHandle, "Ada_LLVMResolveTypeHandle");
+   pragma Import (C, LLVMDisposeTypeHandle, "Ada_LLVMDisposeTypeHandle");
+   pragma Import (C, LLVMTypeOf, "Ada_LLVMTypeOf");
+   pragma Import (C, LLVMGetValueName, "Ada_LLVMGetValueName");
+   pragma Import (C, LLVMSetValueName, "Ada_LLVMSetValueName");
+   pragma Import (C, LLVMDumpValue, "Ada_LLVMDumpValue");
+   pragma Import (C, LLVMIsAArgument, "Ada_LLVMIsAArgument");
+   pragma Import (C, LLVMIsABasicBlock, "Ada_LLVMIsABasicBlock");
+   pragma Import (C, LLVMIsAInlineAsm, "Ada_LLVMIsAInlineAsm");
+   pragma Import (C, LLVMIsAUser, "Ada_LLVMIsAUser");
+   pragma Import (C, LLVMIsAConstant, "Ada_LLVMIsAConstant");
+   pragma Import
+     (C,
+      LLVMIsAConstantAggregateZero,
+      "Ada_LLVMIsAConstantAggregateZero");
+   pragma Import (C, LLVMIsAConstantArray, "Ada_LLVMIsAConstantArray");
+   pragma Import (C, LLVMIsAConstantExpr, "Ada_LLVMIsAConstantExpr");
+   pragma Import (C, LLVMIsAConstantFP, "Ada_LLVMIsAConstantFP");
+   pragma Import (C, LLVMIsAConstantInt, "Ada_LLVMIsAConstantInt");
+   pragma Import
+     (C,
+      LLVMIsAConstantPointerNull,
+      "Ada_LLVMIsAConstantPointerNull");
+   pragma Import (C, LLVMIsAConstantStruct, "Ada_LLVMIsAConstantStruct");
+   pragma Import (C, LLVMIsAConstantVector, "Ada_LLVMIsAConstantVector");
+   pragma Import (C, LLVMIsAGlobalValue, "Ada_LLVMIsAGlobalValue");
+   pragma Import (C, LLVMIsAFunction, "Ada_LLVMIsAFunction");
+   pragma Import (C, LLVMIsAGlobalAlias, "Ada_LLVMIsAGlobalAlias");
+   pragma Import (C, LLVMIsAGlobalVariable, "Ada_LLVMIsAGlobalVariable");
+   pragma Import (C, LLVMIsAUndefValue, "Ada_LLVMIsAUndefValue");
+   pragma Import (C, LLVMIsAInstruction, "Ada_LLVMIsAInstruction");
+   pragma Import (C, LLVMIsABinaryOperator, "Ada_LLVMIsABinaryOperator");
+   pragma Import (C, LLVMIsACallInst, "Ada_LLVMIsACallInst");
+   pragma Import (C, LLVMIsAIntrinsicInst, "Ada_LLVMIsAIntrinsicInst");
+   pragma Import (C, LLVMIsADbgInfoIntrinsic, "Ada_LLVMIsADbgInfoIntrinsic");
+   pragma Import (C, LLVMIsADbgDeclareInst, "Ada_LLVMIsADbgDeclareInst");
+   pragma Import (C, LLVMIsADbgFuncStartInst, "Ada_LLVMIsADbgFuncStartInst");
+   pragma Import (C, LLVMIsADbgRegionEndInst, "Ada_LLVMIsADbgRegionEndInst");
+   pragma Import
+     (C,
+      LLVMIsADbgRegionStartInst,
+      "Ada_LLVMIsADbgRegionStartInst");
+   pragma Import (C, LLVMIsADbgStopPointInst, "Ada_LLVMIsADbgStopPointInst");
+   pragma Import (C, LLVMIsAEHSelectorInst, "Ada_LLVMIsAEHSelectorInst");
+   pragma Import (C, LLVMIsAMemIntrinsic, "Ada_LLVMIsAMemIntrinsic");
+   pragma Import (C, LLVMIsAMemCpyInst, "Ada_LLVMIsAMemCpyInst");
+   pragma Import (C, LLVMIsAMemMoveInst, "Ada_LLVMIsAMemMoveInst");
+   pragma Import (C, LLVMIsAMemSetInst, "Ada_LLVMIsAMemSetInst");
+   pragma Import (C, LLVMIsACmpInst, "Ada_LLVMIsACmpInst");
+   pragma Import (C, LLVMIsAFCmpInst, "Ada_LLVMIsAFCmpInst");
+   pragma Import (C, LLVMIsAICmpInst, "Ada_LLVMIsAICmpInst");
+   pragma Import
+     (C,
+      LLVMIsAExtractElementInst,
+      "Ada_LLVMIsAExtractElementInst");
+   pragma Import
+     (C,
+      LLVMIsAGetElementPtrInst,
+      "Ada_LLVMIsAGetElementPtrInst");
+   pragma Import
+     (C,
+      LLVMIsAInsertElementInst,
+      "Ada_LLVMIsAInsertElementInst");
+   pragma Import (C, LLVMIsAInsertValueInst, "Ada_LLVMIsAInsertValueInst");
+   pragma Import (C, LLVMIsAPHINode, "Ada_LLVMIsAPHINode");
+   pragma Import (C, LLVMIsASelectInst, "Ada_LLVMIsASelectInst");
+   pragma Import
+     (C,
+      LLVMIsAShuffleVectorInst,
+      "Ada_LLVMIsAShuffleVectorInst");
+   pragma Import (C, LLVMIsAStoreInst, "Ada_LLVMIsAStoreInst");
+   pragma Import (C, LLVMIsATerminatorInst, "Ada_LLVMIsATerminatorInst");
+   pragma Import (C, LLVMIsABranchInst, "Ada_LLVMIsABranchInst");
+   pragma Import (C, LLVMIsAInvokeInst, "Ada_LLVMIsAInvokeInst");
+   pragma Import (C, LLVMIsAReturnInst, "Ada_LLVMIsAReturnInst");
+   pragma Import (C, LLVMIsASwitchInst, "Ada_LLVMIsASwitchInst");
+   pragma Import (C, LLVMIsAUnreachableInst, "Ada_LLVMIsAUnreachableInst");
+   pragma Import (C, LLVMIsAUnwindInst, "Ada_LLVMIsAUnwindInst");
+   pragma Import (C, LLVMIsAUnaryInstruction, "Ada_LLVMIsAUnaryInstruction");
+   pragma Import (C, LLVMIsAAllocationInst, "Ada_LLVMIsAAllocationInst");
+   pragma Import (C, LLVMIsAAllocaInst, "Ada_LLVMIsAAllocaInst");
+   pragma Import (C, LLVMIsAMallocInst, "Ada_LLVMIsAMallocInst");
+   pragma Import (C, LLVMIsACastInst, "Ada_LLVMIsACastInst");
+   pragma Import (C, LLVMIsABitCastInst, "Ada_LLVMIsABitCastInst");
+   pragma Import (C, LLVMIsAFPExtInst, "Ada_LLVMIsAFPExtInst");
+   pragma Import (C, LLVMIsAFPToSIInst, "Ada_LLVMIsAFPToSIInst");
+   pragma Import (C, LLVMIsAFPToUIInst, "Ada_LLVMIsAFPToUIInst");
+   pragma Import (C, LLVMIsAFPTruncInst, "Ada_LLVMIsAFPTruncInst");
+   pragma Import (C, LLVMIsAIntToPtrInst, "Ada_LLVMIsAIntToPtrInst");
+   pragma Import (C, LLVMIsAPtrToIntInst, "Ada_LLVMIsAPtrToIntInst");
+   pragma Import (C, LLVMIsASExtInst, "Ada_LLVMIsASExtInst");
+   pragma Import (C, LLVMIsASIToFPInst, "Ada_LLVMIsASIToFPInst");
+   pragma Import (C, LLVMIsATruncInst, "Ada_LLVMIsATruncInst");
+   pragma Import (C, LLVMIsAUIToFPInst, "Ada_LLVMIsAUIToFPInst");
+   pragma Import (C, LLVMIsAZExtInst, "Ada_LLVMIsAZExtInst");
+   pragma Import (C, LLVMIsAExtractValueInst, "Ada_LLVMIsAExtractValueInst");
+   pragma Import (C, LLVMIsAFreeInst, "Ada_LLVMIsAFreeInst");
+   pragma Import (C, LLVMIsALoadInst, "Ada_LLVMIsALoadInst");
+   pragma Import (C, LLVMIsAVAArgInst, "Ada_LLVMIsAVAArgInst");
+   pragma Import (C, LLVMConstNull, "Ada_LLVMConstNull");
+   pragma Import (C, LLVMConstAllOnes, "Ada_LLVMConstAllOnes");
+   pragma Import (C, LLVMGetUndef, "Ada_LLVMGetUndef");
+   pragma Import (C, LLVMIsConstant, "Ada_LLVMIsConstant");
+   pragma Import (C, LLVMIsNull, "Ada_LLVMIsNull");
+   pragma Import (C, LLVMIsUndef, "Ada_LLVMIsUndef");
+   pragma Import (C, LLVMConstPointerNull, "Ada_LLVMConstPointerNull");
+   pragma Import (C, LLVMConstInt, "Ada_LLVMConstInt");
+   pragma Import (C, LLVMConstReal, "Ada_LLVMConstReal");
+   pragma Import (C, LLVMConstRealOfString, "Ada_LLVMConstRealOfString");
+   pragma Import (C, LLVMConstString, "Ada_LLVMConstString");
+   pragma Import (C, LLVMConstArray, "Ada_LLVMConstArray");
+   pragma Import (C, LLVMConstStruct, "Ada_LLVMConstStruct");
+   pragma Import (C, LLVMConstVector, "Ada_LLVMConstVector");
+   pragma Import (C, LLVMSizeOf, "Ada_LLVMSizeOf");
+   pragma Import (C, LLVMConstNeg, "Ada_LLVMConstNeg");
+   pragma Import (C, LLVMConstNot, "Ada_LLVMConstNot");
+   pragma Import (C, LLVMConstAdd, "Ada_LLVMConstAdd");
+   pragma Import (C, LLVMConstSub, "Ada_LLVMConstSub");
+   pragma Import (C, LLVMConstMul, "Ada_LLVMConstMul");
+   pragma Import (C, LLVMConstUDiv, "Ada_LLVMConstUDiv");
+   pragma Import (C, LLVMConstSDiv, "Ada_LLVMConstSDiv");
+   pragma Import (C, LLVMConstFDiv, "Ada_LLVMConstFDiv");
+   pragma Import (C, LLVMConstURem, "Ada_LLVMConstURem");
+   pragma Import (C, LLVMConstSRem, "Ada_LLVMConstSRem");
+   pragma Import (C, LLVMConstFRem, "Ada_LLVMConstFRem");
+   pragma Import (C, LLVMConstAnd, "Ada_LLVMConstAnd");
+   pragma Import (C, LLVMConstOr, "Ada_LLVMConstOr");
+   pragma Import (C, LLVMConstXor, "Ada_LLVMConstXor");
+   pragma Import (C, LLVMConstICmp, "Ada_LLVMConstICmp");
+   pragma Import (C, LLVMConstFCmp, "Ada_LLVMConstFCmp");
+   pragma Import (C, LLVMConstShl, "Ada_LLVMConstShl");
+   pragma Import (C, LLVMConstLShr, "Ada_LLVMConstLShr");
+   pragma Import (C, LLVMConstAShr, "Ada_LLVMConstAShr");
+   pragma Import (C, LLVMConstGEP, "Ada_LLVMConstGEP");
+   pragma Import (C, LLVMConstTrunc, "Ada_LLVMConstTrunc");
+   pragma Import (C, LLVMConstSExt, "Ada_LLVMConstSExt");
+   pragma Import (C, LLVMConstZExt, "Ada_LLVMConstZExt");
+   pragma Import (C, LLVMConstFPTrunc, "Ada_LLVMConstFPTrunc");
+   pragma Import (C, LLVMConstFPExt, "Ada_LLVMConstFPExt");
+   pragma Import (C, LLVMConstUIToFP, "Ada_LLVMConstUIToFP");
+   pragma Import (C, LLVMConstSIToFP, "Ada_LLVMConstSIToFP");
+   pragma Import (C, LLVMConstFPToUI, "Ada_LLVMConstFPToUI");
+   pragma Import (C, LLVMConstFPToSI, "Ada_LLVMConstFPToSI");
+   pragma Import (C, LLVMConstPtrToInt, "Ada_LLVMConstPtrToInt");
+   pragma Import (C, LLVMConstIntToPtr, "Ada_LLVMConstIntToPtr");
+   pragma Import (C, LLVMConstBitCast, "Ada_LLVMConstBitCast");
+   pragma Import (C, LLVMConstSelect, "Ada_LLVMConstSelect");
+   pragma Import (C, LLVMConstExtractElement, "Ada_LLVMConstExtractElement");
+   pragma Import (C, LLVMConstInsertElement, "Ada_LLVMConstInsertElement");
+   pragma Import (C, LLVMConstShuffleVector, "Ada_LLVMConstShuffleVector");
+   pragma Import (C, LLVMConstExtractValue, "Ada_LLVMConstExtractValue");
+   pragma Import (C, LLVMConstInsertValue, "Ada_LLVMConstInsertValue");
+   pragma Import (C, LLVMConstInlineAsm, "Ada_LLVMConstInlineAsm");
+   pragma Import (C, LLVMGetGlobalParent, "Ada_LLVMGetGlobalParent");
+   pragma Import (C, LLVMIsDeclaration, "Ada_LLVMIsDeclaration");
+   pragma Import (C, LLVMGetLinkage, "Ada_LLVMGetLinkage");
+   pragma Import (C, LLVMSetLinkage, "Ada_LLVMSetLinkage");
+   pragma Import (C, LLVMGetSection, "Ada_LLVMGetSection");
+   pragma Import (C, LLVMSetSection, "Ada_LLVMSetSection");
+   pragma Import (C, LLVMGetVisibility, "Ada_LLVMGetVisibility");
+   pragma Import (C, LLVMSetVisibility, "Ada_LLVMSetVisibility");
+   pragma Import (C, LLVMGetAlignment, "Ada_LLVMGetAlignment");
+   pragma Import (C, LLVMSetAlignment, "Ada_LLVMSetAlignment");
+   pragma Import (C, LLVMAddGlobal, "Ada_LLVMAddGlobal");
+   pragma Import (C, LLVMGetNamedGlobal, "Ada_LLVMGetNamedGlobal");
+   pragma Import (C, LLVMGetFirstGlobal, "Ada_LLVMGetFirstGlobal");
+   pragma Import (C, LLVMGetLastGlobal, "Ada_LLVMGetLastGlobal");
+   pragma Import (C, LLVMGetNextGlobal, "Ada_LLVMGetNextGlobal");
+   pragma Import (C, LLVMGetPreviousGlobal, "Ada_LLVMGetPreviousGlobal");
+   pragma Import (C, LLVMDeleteGlobal, "Ada_LLVMDeleteGlobal");
+   pragma Import (C, LLVMGetInitializer, "Ada_LLVMGetInitializer");
+   pragma Import (C, LLVMSetInitializer, "Ada_LLVMSetInitializer");
+   pragma Import (C, LLVMIsThreadLocal, "Ada_LLVMIsThreadLocal");
+   pragma Import (C, LLVMSetThreadLocal, "Ada_LLVMSetThreadLocal");
+   pragma Import (C, LLVMIsGlobalConstant, "Ada_LLVMIsGlobalConstant");
+   pragma Import (C, LLVMSetGlobalConstant, "Ada_LLVMSetGlobalConstant");
+   pragma Import (C, LLVMAddAlias, "Ada_LLVMAddAlias");
+   pragma Import (C, LLVMAddFunction, "Ada_LLVMAddFunction");
+   pragma Import (C, LLVMGetNamedFunction, "Ada_LLVMGetNamedFunction");
+   pragma Import (C, LLVMGetFirstFunction, "Ada_LLVMGetFirstFunction");
+   pragma Import (C, LLVMGetLastFunction, "Ada_LLVMGetLastFunction");
+   pragma Import (C, LLVMGetNextFunction, "Ada_LLVMGetNextFunction");
+   pragma Import (C, LLVMGetPreviousFunction, "Ada_LLVMGetPreviousFunction");
+   pragma Import (C, LLVMDeleteFunction, "Ada_LLVMDeleteFunction");
+   pragma Import (C, LLVMGetIntrinsicID, "Ada_LLVMGetIntrinsicID");
+   pragma Import (C, LLVMGetFunctionCallConv, "Ada_LLVMGetFunctionCallConv");
+   pragma Import (C, LLVMSetFunctionCallConv, "Ada_LLVMSetFunctionCallConv");
+   pragma Import (C, LLVMGetGC, "Ada_LLVMGetGC");
+   pragma Import (C, LLVMSetGC, "Ada_LLVMSetGC");
+   pragma Import (C, LLVMAddFunctionAttr, "Ada_LLVMAddFunctionAttr");
+   pragma Import (C, LLVMRemoveFunctionAttr, "Ada_LLVMRemoveFunctionAttr");
+   pragma Import (C, LLVMCountParams, "Ada_LLVMCountParams");
+   pragma Import (C, LLVMGetParams, "Ada_LLVMGetParams");
+   pragma Import (C, LLVMGetParam, "Ada_LLVMGetParam");
+   pragma Import (C, LLVMGetParamParent, "Ada_LLVMGetParamParent");
+   pragma Import (C, LLVMGetFirstParam, "Ada_LLVMGetFirstParam");
+   pragma Import (C, LLVMGetLastParam, "Ada_LLVMGetLastParam");
+   pragma Import (C, LLVMGetNextParam, "Ada_LLVMGetNextParam");
+   pragma Import (C, LLVMGetPreviousParam, "Ada_LLVMGetPreviousParam");
+   pragma Import (C, LLVMAddAttribute, "Ada_LLVMAddAttribute");
+   pragma Import (C, LLVMRemoveAttribute, "Ada_LLVMRemoveAttribute");
+   pragma Import (C, LLVMSetParamAlignment, "Ada_LLVMSetParamAlignment");
+   pragma Import (C, LLVMBasicBlockAsValue, "Ada_LLVMBasicBlockAsValue");
+   pragma Import (C, LLVMValueIsBasicBlock, "Ada_LLVMValueIsBasicBlock");
+   pragma Import (C, LLVMValueAsBasicBlock, "Ada_LLVMValueAsBasicBlock");
+   pragma Import (C, LLVMGetBasicBlockParent, "Ada_LLVMGetBasicBlockParent");
+   pragma Import (C, LLVMCountBasicBlocks, "Ada_LLVMCountBasicBlocks");
+   pragma Import (C, LLVMGetBasicBlocks, "Ada_LLVMGetBasicBlocks");
+   pragma Import (C, LLVMGetFirstBasicBlock, "Ada_LLVMGetFirstBasicBlock");
+   pragma Import (C, LLVMGetLastBasicBlock, "Ada_LLVMGetLastBasicBlock");
+   pragma Import (C, LLVMGetNextBasicBlock, "Ada_LLVMGetNextBasicBlock");
+   pragma Import
+     (C,
+      LLVMGetPreviousBasicBlock,
+      "Ada_LLVMGetPreviousBasicBlock");
+   pragma Import (C, LLVMGetEntryBasicBlock, "Ada_LLVMGetEntryBasicBlock");
+   pragma Import (C, LLVMAppendBasicBlock, "Ada_LLVMAppendBasicBlock");
+   pragma Import (C, LLVMInsertBasicBlock, "Ada_LLVMInsertBasicBlock");
+   pragma Import (C, LLVMDeleteBasicBlock, "Ada_LLVMDeleteBasicBlock");
+   pragma Import
+     (C,
+      LLVMGetInstructionParent,
+      "Ada_LLVMGetInstructionParent");
+   pragma Import (C, LLVMGetFirstInstruction, "Ada_LLVMGetFirstInstruction");
+   pragma Import (C, LLVMGetLastInstruction, "Ada_LLVMGetLastInstruction");
+   pragma Import (C, LLVMGetNextInstruction, "Ada_LLVMGetNextInstruction");
+   pragma Import
+     (C,
+      LLVMGetPreviousInstruction,
+      "Ada_LLVMGetPreviousInstruction");
+   pragma Import
+     (C,
+      LLVMSetInstructionCallConv,
+      "Ada_LLVMSetInstructionCallConv");
+   pragma Import
+     (C,
+      LLVMGetInstructionCallConv,
+      "Ada_LLVMGetInstructionCallConv");
+   pragma Import (C, LLVMAddInstrAttribute, "Ada_LLVMAddInstrAttribute");
+   pragma Import
+     (C,
+      LLVMRemoveInstrAttribute,
+      "Ada_LLVMRemoveInstrAttribute");
+   pragma Import
+     (C,
+      LLVMSetInstrParamAlignment,
+      "Ada_LLVMSetInstrParamAlignment");
+   pragma Import (C, LLVMIsTailCall, "Ada_LLVMIsTailCall");
+   pragma Import (C, LLVMSetTailCall, "Ada_LLVMSetTailCall");
+   pragma Import (C, LLVMAddIncoming, "Ada_LLVMAddIncoming");
+   pragma Import (C, LLVMCountIncoming, "Ada_LLVMCountIncoming");
+   pragma Import (C, LLVMGetIncomingValue, "Ada_LLVMGetIncomingValue");
+   pragma Import (C, LLVMGetIncomingBlock, "Ada_LLVMGetIncomingBlock");
+   pragma Import (C, LLVMCreateBuilder, "Ada_LLVMCreateBuilder");
+   pragma Import (C, LLVMPositionBuilder, "Ada_LLVMPositionBuilder");
+   pragma Import
+     (C,
+      LLVMPositionBuilderBefore,
+      "Ada_LLVMPositionBuilderBefore");
+   pragma Import
+     (C,
+      LLVMPositionBuilderAtEnd,
+      "Ada_LLVMPositionBuilderAtEnd");
+   pragma Import (C, LLVMGetInsertBlock, "Ada_LLVMGetInsertBlock");
+   pragma Import
+     (C,
+      LLVMClearInsertionPosition,
+      "Ada_LLVMClearInsertionPosition");
+   pragma Import (C, LLVMInsertIntoBuilder, "Ada_LLVMInsertIntoBuilder");
+   pragma Import (C, LLVMDisposeBuilder, "Ada_LLVMDisposeBuilder");
+   pragma Import (C, LLVMBuildRetVoid, "Ada_LLVMBuildRetVoid");
+   pragma Import (C, LLVMBuildRet, "Ada_LLVMBuildRet");
+   pragma Import (C, LLVMBuildBr, "Ada_LLVMBuildBr");
+   pragma Import (C, LLVMBuildCondBr, "Ada_LLVMBuildCondBr");
+   pragma Import (C, LLVMBuildSwitch, "Ada_LLVMBuildSwitch");
+   pragma Import (C, LLVMBuildInvoke, "Ada_LLVMBuildInvoke");
+   pragma Import (C, LLVMBuildUnwind, "Ada_LLVMBuildUnwind");
+   pragma Import (C, LLVMBuildUnreachable, "Ada_LLVMBuildUnreachable");
+   pragma Import (C, LLVMAddCase, "Ada_LLVMAddCase");
+   pragma Import (C, LLVMBuildAdd, "Ada_LLVMBuildAdd");
+   pragma Import (C, LLVMBuildSub, "Ada_LLVMBuildSub");
+   pragma Import (C, LLVMBuildMul, "Ada_LLVMBuildMul");
+   pragma Import (C, LLVMBuildUDiv, "Ada_LLVMBuildUDiv");
+   pragma Import (C, LLVMBuildSDiv, "Ada_LLVMBuildSDiv");
+   pragma Import (C, LLVMBuildFDiv, "Ada_LLVMBuildFDiv");
+   pragma Import (C, LLVMBuildURem, "Ada_LLVMBuildURem");
+   pragma Import (C, LLVMBuildSRem, "Ada_LLVMBuildSRem");
+   pragma Import (C, LLVMBuildFRem, "Ada_LLVMBuildFRem");
+   pragma Import (C, LLVMBuildShl, "Ada_LLVMBuildShl");
+   pragma Import (C, LLVMBuildLShr, "Ada_LLVMBuildLShr");
+   pragma Import (C, LLVMBuildAShr, "Ada_LLVMBuildAShr");
+   pragma Import (C, LLVMBuildAnd, "Ada_LLVMBuildAnd");
+   pragma Import (C, LLVMBuildOr, "Ada_LLVMBuildOr");
+   pragma Import (C, LLVMBuildXor, "Ada_LLVMBuildXor");
+   pragma Import (C, LLVMBuildNeg, "Ada_LLVMBuildNeg");
+   pragma Import (C, LLVMBuildNot, "Ada_LLVMBuildNot");
+   pragma Import (C, LLVMBuildMalloc, "Ada_LLVMBuildMalloc");
+   pragma Import (C, LLVMBuildArrayMalloc, "Ada_LLVMBuildArrayMalloc");
+   pragma Import (C, LLVMBuildAlloca, "Ada_LLVMBuildAlloca");
+   pragma Import (C, LLVMBuildArrayAlloca, "Ada_LLVMBuildArrayAlloca");
+   pragma Import (C, LLVMBuildFree, "Ada_LLVMBuildFree");
+   pragma Import (C, LLVMBuildLoad, "Ada_LLVMBuildLoad");
+   pragma Import (C, LLVMBuildStore, "Ada_LLVMBuildStore");
+   pragma Import (C, LLVMBuildGEP, "Ada_LLVMBuildGEP");
+   pragma Import (C, LLVMBuildTrunc, "Ada_LLVMBuildTrunc");
+   pragma Import (C, LLVMBuildZExt, "Ada_LLVMBuildZExt");
+   pragma Import (C, LLVMBuildSExt, "Ada_LLVMBuildSExt");
+   pragma Import (C, LLVMBuildFPToUI, "Ada_LLVMBuildFPToUI");
+   pragma Import (C, LLVMBuildFPToSI, "Ada_LLVMBuildFPToSI");
+   pragma Import (C, LLVMBuildUIToFP, "Ada_LLVMBuildUIToFP");
+   pragma Import (C, LLVMBuildSIToFP, "Ada_LLVMBuildSIToFP");
+   pragma Import (C, LLVMBuildFPTrunc, "Ada_LLVMBuildFPTrunc");
+   pragma Import (C, LLVMBuildFPExt, "Ada_LLVMBuildFPExt");
+   pragma Import (C, LLVMBuildPtrToInt, "Ada_LLVMBuildPtrToInt");
+   pragma Import (C, LLVMBuildIntToPtr, "Ada_LLVMBuildIntToPtr");
+   pragma Import (C, LLVMBuildBitCast, "Ada_LLVMBuildBitCast");
+   pragma Import (C, LLVMBuildICmp, "Ada_LLVMBuildICmp");
+   pragma Import (C, LLVMBuildFCmp, "Ada_LLVMBuildFCmp");
+   pragma Import (C, LLVMBuildPhi, "Ada_LLVMBuildPhi");
+   pragma Import (C, LLVMBuildCall, "Ada_LLVMBuildCall");
+   pragma Import (C, LLVMBuildSelect, "Ada_LLVMBuildSelect");
+   pragma Import (C, LLVMBuildVAArg, "Ada_LLVMBuildVAArg");
+   pragma Import (C, LLVMBuildExtractElement, "Ada_LLVMBuildExtractElement");
+   pragma Import (C, LLVMBuildInsertElement, "Ada_LLVMBuildInsertElement");
+   pragma Import (C, LLVMBuildShuffleVector, "Ada_LLVMBuildShuffleVector");
+   pragma Import (C, LLVMBuildExtractValue, "Ada_LLVMBuildExtractValue");
+   pragma Import (C, LLVMBuildInsertValue, "Ada_LLVMBuildInsertValue");
+   pragma Import
+     (C,
+      LLVMCreateModuleProviderForExistingModule,
+      "Ada_LLVMCreateModuleProviderForExistingModule");
+   pragma Import
+     (C,
+      LLVMDisposeModuleProvider,
+      "Ada_LLVMDisposeModuleProvider");
+   pragma Import
+     (C,
+      LLVMCreateMemoryBufferWithContentsOfFile,
+      "Ada_LLVMCreateMemoryBufferWithContentsOfFile");
+   pragma Import
+     (C,
+      LLVMCreateMemoryBufferWithSTDIN,
+      "Ada_LLVMCreateMemoryBufferWithSTDIN");
+   pragma Import (C, LLVMDisposeMemoryBuffer, "Ada_LLVMDisposeMemoryBuffer");
+   pragma Import (C, LLVMCreatePassManager, "Ada_LLVMCreatePassManager");
+   pragma Import
+     (C,
+      LLVMCreateFunctionPassManager,
+      "Ada_LLVMCreateFunctionPassManager");
+   pragma Import (C, LLVMRunPassManager, "Ada_LLVMRunPassManager");
+   pragma Import
+     (C,
+      LLVMInitializeFunctionPassManager,
+      "Ada_LLVMInitializeFunctionPassManager");
+   pragma Import
+     (C,
+      LLVMRunFunctionPassManager,
+      "Ada_LLVMRunFunctionPassManager");
+   pragma Import
+     (C,
+      LLVMFinalizeFunctionPassManager,
+      "Ada_LLVMFinalizeFunctionPassManager");
+   pragma Import (C, LLVMDisposePassManager, "Ada_LLVMDisposePassManager");
+
+end llvm.Binding;
diff --git a/bindings/ada/llvm/llvm.ads b/bindings/ada/llvm/llvm.ads
new file mode 100644
index 000000000000..d9820f114910
--- /dev/null
+++ b/bindings/ada/llvm/llvm.ads
@@ -0,0 +1,493 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C.Extensions;
+
+
+package llvm is
+
+   -- LLVMCtxt
+   --
+   type LLVMCtxt is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMCtxt_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMCtxt;
+
+   type LLVMCtxt_view is access all llvm.LLVMCtxt;
+
+   -- LLVMContextRef
+   --
+   type LLVMContextRef is access all llvm.LLVMCtxt;
+
+   type LLVMContextRef_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMContextRef;
+
+   type LLVMContextRef_view is access all llvm.LLVMContextRef;
+
+   -- LLVMOpaqueModule
+   --
+   type LLVMOpaqueModule is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueModule_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueModule;
+
+   type LLVMOpaqueModule_view is access all llvm.LLVMOpaqueModule;
+
+   -- LLVMModuleRef
+   --
+   type LLVMModuleRef is access all llvm.LLVMOpaqueModule;
+
+   type LLVMModuleRef_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMModuleRef;
+
+   type LLVMModuleRef_view is access all llvm.LLVMModuleRef;
+
+   -- LLVMOpaqueType
+   --
+   type LLVMOpaqueType is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueType_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueType;
+
+   type LLVMOpaqueType_view is access all llvm.LLVMOpaqueType;
+
+   -- LLVMTypeRef
+   --
+   type LLVMTypeRef is access all llvm.LLVMOpaqueType;
+
+   type LLVMTypeRef_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMTypeRef;
+
+   type LLVMTypeRef_view is access all llvm.LLVMTypeRef;
+
+   -- LLVMOpaqueTypeHandle
+   --
+   type LLVMOpaqueTypeHandle is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueTypeHandle_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMOpaqueTypeHandle;
+
+   type LLVMOpaqueTypeHandle_view is access all llvm.LLVMOpaqueTypeHandle;
+
+   -- LLVMTypeHandleRef
+   --
+   type LLVMTypeHandleRef is access all llvm.LLVMOpaqueTypeHandle;
+
+   type LLVMTypeHandleRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMTypeHandleRef;
+
+   type LLVMTypeHandleRef_view is access all llvm.LLVMTypeHandleRef;
+
+   -- LLVMOpaqueValue
+   --
+   type LLVMOpaqueValue is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueValue_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueValue;
+
+   type LLVMOpaqueValue_view is access all llvm.LLVMOpaqueValue;
+
+   -- LLVMValueRef
+   --
+   type LLVMValueRef is access all llvm.LLVMOpaqueValue;
+
+   type LLVMValueRef_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMValueRef;
+
+   type LLVMValueRef_view is access all llvm.LLVMValueRef;
+
+   -- LLVMOpaqueBasicBlock
+   --
+   type LLVMOpaqueBasicBlock is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueBasicBlock_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMOpaqueBasicBlock;
+
+   type LLVMOpaqueBasicBlock_view is access all llvm.LLVMOpaqueBasicBlock;
+
+   -- LLVMBasicBlockRef
+   --
+   type LLVMBasicBlockRef is access all llvm.LLVMOpaqueBasicBlock;
+
+   type LLVMBasicBlockRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMBasicBlockRef;
+
+   type LLVMBasicBlockRef_view is access all llvm.LLVMBasicBlockRef;
+
+   -- LLVMOpaqueBuilder
+   --
+   type LLVMOpaqueBuilder is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueBuilder_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMOpaqueBuilder;
+
+   type LLVMOpaqueBuilder_view is access all llvm.LLVMOpaqueBuilder;
+
+   -- LLVMBuilderRef
+   --
+   type LLVMBuilderRef is access all llvm.LLVMOpaqueBuilder;
+
+   type LLVMBuilderRef_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMBuilderRef;
+
+   type LLVMBuilderRef_view is access all llvm.LLVMBuilderRef;
+
+   -- LLVMOpaqueModuleProvider
+   --
+   type LLVMOpaqueModuleProvider is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueModuleProvider_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMOpaqueModuleProvider;
+
+   type LLVMOpaqueModuleProvider_view is access all
+     llvm.LLVMOpaqueModuleProvider;
+
+   -- LLVMModuleProviderRef
+   --
+   type LLVMModuleProviderRef is access all llvm.LLVMOpaqueModuleProvider;
+
+   type LLVMModuleProviderRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMModuleProviderRef;
+
+   type LLVMModuleProviderRef_view is access all llvm.LLVMModuleProviderRef;
+
+   -- LLVMOpaqueMemoryBuffer
+   --
+   type LLVMOpaqueMemoryBuffer is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueMemoryBuffer_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMOpaqueMemoryBuffer;
+
+   type LLVMOpaqueMemoryBuffer_view is access all llvm.LLVMOpaqueMemoryBuffer;
+
+   -- LLVMMemoryBufferRef
+   --
+   type LLVMMemoryBufferRef is access all llvm.LLVMOpaqueMemoryBuffer;
+
+   type LLVMMemoryBufferRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMMemoryBufferRef;
+
+   type LLVMMemoryBufferRef_view is access all llvm.LLVMMemoryBufferRef;
+
+   -- LLVMOpaquePassManager
+   --
+   type LLVMOpaquePassManager is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaquePassManager_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMOpaquePassManager;
+
+   type LLVMOpaquePassManager_view is access all llvm.LLVMOpaquePassManager;
+
+   -- LLVMPassManagerRef
+   --
+   type LLVMPassManagerRef is access all llvm.LLVMOpaquePassManager;
+
+   type LLVMPassManagerRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMPassManagerRef;
+
+   type LLVMPassManagerRef_view is access all llvm.LLVMPassManagerRef;
+
+   -- LLVMAttribute
+   --
+   type LLVMAttribute is (
+      LLVMZExtAttribute,
+      LLVMSExtAttribute,
+      LLVMNoReturnAttribute,
+      LLVMInRegAttribute,
+      LLVMStructRetAttribute,
+      LLVMNoUnwindAttribute,
+      LLVMNoAliasAttribute,
+      LLVMByValAttribute,
+      LLVMNestAttribute,
+      LLVMReadNoneAttribute,
+      LLVMReadOnlyAttribute,
+      LLVMNoInlineAttribute,
+      LLVMAlwaysInlineAttribute,
+      LLVMOptimizeForSizeAttribute,
+      LLVMStackProtectAttribute,
+      LLVMStackProtectReqAttribute,
+      LLVMNoCaptureAttribute,
+      LLVMNoRedZoneAttribute,
+      LLVMNoImplicitFloatAttribute,
+      LLVMNakedAttribute);
+
+   for LLVMAttribute use
+     (LLVMZExtAttribute            => 1,
+      LLVMSExtAttribute            => 2,
+      LLVMNoReturnAttribute        => 4,
+      LLVMInRegAttribute           => 8,
+      LLVMStructRetAttribute       => 16,
+      LLVMNoUnwindAttribute        => 32,
+      LLVMNoAliasAttribute         => 64,
+      LLVMByValAttribute           => 128,
+      LLVMNestAttribute            => 256,
+      LLVMReadNoneAttribute        => 512,
+      LLVMReadOnlyAttribute        => 1024,
+      LLVMNoInlineAttribute        => 2048,
+      LLVMAlwaysInlineAttribute    => 4096,
+      LLVMOptimizeForSizeAttribute => 8192,
+      LLVMStackProtectAttribute    => 16384,
+      LLVMStackProtectReqAttribute => 32768,
+      LLVMNoCaptureAttribute       => 2097152,
+      LLVMNoRedZoneAttribute       => 4194304,
+      LLVMNoImplicitFloatAttribute => 8388608,
+      LLVMNakedAttribute           => 16777216);
+
+   pragma Convention (C, LLVMAttribute);
+
+   type LLVMAttribute_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMAttribute;
+
+   type LLVMAttribute_view is access all llvm.LLVMAttribute;
+
+   -- LLVMTypeKind
+   --
+   type LLVMTypeKind is (
+      LLVMVoidTypeKind,
+      LLVMFloatTypeKind,
+      LLVMDoubleTypeKind,
+      LLVMX86_FP80TypeKind,
+      LLVMFP128TypeKind,
+      LLVMPPC_FP128TypeKind,
+      LLVMLabelTypeKind,
+      LLVMIntegerTypeKind,
+      LLVMFunctionTypeKind,
+      LLVMStructTypeKind,
+      LLVMArrayTypeKind,
+      LLVMPointerTypeKind,
+      LLVMOpaqueTypeKind,
+      LLVMVectorTypeKind,
+      LLVMMetadataTypeKind);
+
+   for LLVMTypeKind use
+     (LLVMVoidTypeKind      => 0,
+      LLVMFloatTypeKind     => 1,
+      LLVMDoubleTypeKind    => 2,
+      LLVMX86_FP80TypeKind  => 3,
+      LLVMFP128TypeKind     => 4,
+      LLVMPPC_FP128TypeKind => 5,
+      LLVMLabelTypeKind     => 6,
+      LLVMIntegerTypeKind   => 7,
+      LLVMFunctionTypeKind  => 8,
+      LLVMStructTypeKind    => 9,
+      LLVMArrayTypeKind     => 10,
+      LLVMPointerTypeKind   => 11,
+      LLVMOpaqueTypeKind    => 12,
+      LLVMVectorTypeKind    => 13,
+      LLVMMetadataTypeKind  => 14);
+
+   pragma Convention (C, LLVMTypeKind);
+
+   type LLVMTypeKind_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMTypeKind;
+
+   type LLVMTypeKind_view is access all llvm.LLVMTypeKind;
+
+   -- LLVMLinkage
+   --
+   type LLVMLinkage is (
+      LLVMExternalLinkage,
+      LLVMAvailableExternallyLinkage,
+      LLVMLinkOnceAnyLinkage,
+      LLVMLinkOnceODRLinkage,
+      LLVMWeakAnyLinkage,
+      LLVMWeakODRLinkage,
+      LLVMAppendingLinkage,
+      LLVMInternalLinkage,
+      LLVMPrivateLinkage,
+      LLVMDLLImportLinkage,
+      LLVMDLLExportLinkage,
+      LLVMExternalWeakLinkage,
+      LLVMGhostLinkage,
+      LLVMCommonLinkage,
+      LLVMLinkerPrivateLinkage);
+
+   for LLVMLinkage use
+     (LLVMExternalLinkage            => 0,
+      LLVMAvailableExternallyLinkage => 1,
+      LLVMLinkOnceAnyLinkage         => 2,
+      LLVMLinkOnceODRLinkage         => 3,
+      LLVMWeakAnyLinkage             => 4,
+      LLVMWeakODRLinkage             => 5,
+      LLVMAppendingLinkage           => 6,
+      LLVMInternalLinkage            => 7,
+      LLVMPrivateLinkage             => 8,
+      LLVMDLLImportLinkage           => 9,
+      LLVMDLLExportLinkage           => 10,
+      LLVMExternalWeakLinkage        => 11,
+      LLVMGhostLinkage               => 12,
+      LLVMCommonLinkage              => 13,
+      LLVMLinkerPrivateLinkage       => 14);
+
+   pragma Convention (C, LLVMLinkage);
+
+   type LLVMLinkage_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMLinkage;
+
+   type LLVMLinkage_view is access all llvm.LLVMLinkage;
+
+   -- LLVMVisibility
+   --
+   type LLVMVisibility is (
+      LLVMDefaultVisibility,
+      LLVMHiddenVisibility,
+      LLVMProtectedVisibility);
+
+   for LLVMVisibility use
+     (LLVMDefaultVisibility   => 0,
+      LLVMHiddenVisibility    => 1,
+      LLVMProtectedVisibility => 2);
+
+   pragma Convention (C, LLVMVisibility);
+
+   type LLVMVisibility_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMVisibility;
+
+   type LLVMVisibility_view is access all llvm.LLVMVisibility;
+
+   -- LLVMCallConv
+   --
+   type LLVMCallConv is (
+      LLVMCCallConv,
+      LLVMFastCallConv,
+      LLVMColdCallConv,
+      LLVMX86StdcallCallConv,
+      LLVMX86FastcallCallConv);
+
+   for LLVMCallConv use
+     (LLVMCCallConv           => 0,
+      LLVMFastCallConv        => 8,
+      LLVMColdCallConv        => 9,
+      LLVMX86StdcallCallConv  => 64,
+      LLVMX86FastcallCallConv => 65);
+
+   pragma Convention (C, LLVMCallConv);
+
+   type LLVMCallConv_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMCallConv;
+
+   type LLVMCallConv_view is access all llvm.LLVMCallConv;
+
+   -- LLVMIntPredicate
+   --
+   type LLVMIntPredicate is (
+      LLVMIntEQ,
+      LLVMIntNE,
+      LLVMIntUGT,
+      LLVMIntUGE,
+      LLVMIntULT,
+      LLVMIntULE,
+      LLVMIntSGT,
+      LLVMIntSGE,
+      LLVMIntSLT,
+      LLVMIntSLE);
+
+   for LLVMIntPredicate use
+     (LLVMIntEQ  => 32,
+      LLVMIntNE  => 33,
+      LLVMIntUGT => 34,
+      LLVMIntUGE => 35,
+      LLVMIntULT => 36,
+      LLVMIntULE => 37,
+      LLVMIntSGT => 38,
+      LLVMIntSGE => 39,
+      LLVMIntSLT => 40,
+      LLVMIntSLE => 41);
+
+   pragma Convention (C, LLVMIntPredicate);
+
+   type LLVMIntPredicate_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMIntPredicate;
+
+   type LLVMIntPredicate_view is access all llvm.LLVMIntPredicate;
+
+   -- LLVMRealPredicate
+   --
+   type LLVMRealPredicate is (
+      LLVMRealPredicateFalse,
+      LLVMRealOEQ,
+      LLVMRealOGT,
+      LLVMRealOGE,
+      LLVMRealOLT,
+      LLVMRealOLE,
+      LLVMRealONE,
+      LLVMRealORD,
+      LLVMRealUNO,
+      LLVMRealUEQ,
+      LLVMRealUGT,
+      LLVMRealUGE,
+      LLVMRealULT,
+      LLVMRealULE,
+      LLVMRealUNE,
+      LLVMRealPredicateTrue);
+
+   for LLVMRealPredicate use
+     (LLVMRealPredicateFalse => 0,
+      LLVMRealOEQ            => 1,
+      LLVMRealOGT            => 2,
+      LLVMRealOGE            => 3,
+      LLVMRealOLT            => 4,
+      LLVMRealOLE            => 5,
+      LLVMRealONE            => 6,
+      LLVMRealORD            => 7,
+      LLVMRealUNO            => 8,
+      LLVMRealUEQ            => 9,
+      LLVMRealUGT            => 10,
+      LLVMRealUGE            => 11,
+      LLVMRealULT            => 12,
+      LLVMRealULE            => 13,
+      LLVMRealUNE            => 14,
+      LLVMRealPredicateTrue  => 15);
+
+   pragma Convention (C, LLVMRealPredicate);
+
+   type LLVMRealPredicate_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased llvm.LLVMRealPredicate;
+
+   type LLVMRealPredicate_view is access all llvm.LLVMRealPredicate;
+
+   -- ModuleProvider
+   --
+   type ModuleProvider is new Interfaces.C.Extensions.incomplete_class_def;
+
+   type ModuleProvider_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.ModuleProvider;
+
+   type ModuleProvider_view is access all llvm.ModuleProvider;
+
+   -- MemoryBuffer
+   --
+   type MemoryBuffer is new Interfaces.C.Extensions.incomplete_class_def;
+
+   type MemoryBuffer_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.MemoryBuffer;
+
+   type MemoryBuffer_view is access all llvm.MemoryBuffer;
+
+   -- PassManagerBase
+   --
+   type PassManagerBase is new Interfaces.C.Extensions.incomplete_class_def;
+
+   type PassManagerBase_array is
+     array (Interfaces.C.size_t range <>) of aliased llvm.PassManagerBase;
+
+   type PassManagerBase_view is access all llvm.PassManagerBase;
+
+end llvm;
diff --git a/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads b/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads
new file mode 100644
index 000000000000..7c0b086b4282
--- /dev/null
+++ b/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads
@@ -0,0 +1,207 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C.Strings;
+
+
+package LLVM_link_time_Optimizer.Binding is
+
+   LTO_H           : constant := 1;
+   LTO_API_VERSION : constant := 3;
+
+   function lto_get_version return  Interfaces.C.Strings.chars_ptr;
+
+   function lto_get_error_message return  Interfaces.C.Strings.chars_ptr;
+
+   function lto_module_is_object_file
+     (path : in Interfaces.C.Strings.chars_ptr)
+      return Interfaces.C.Extensions.bool;
+
+   function lto_module_is_object_file_for_target
+     (path                 : in Interfaces.C.Strings.chars_ptr;
+      target_triple_prefix : in Interfaces.C.Strings.chars_ptr)
+      return                 Interfaces.C.Extensions.bool;
+
+   function lto_module_is_object_file_in_memory
+     (mem    : access Interfaces.C.Extensions.void;
+      length : in Interfaces.C.size_t)
+      return   Interfaces.C.Extensions.bool;
+
+   function lto_module_is_object_file_in_memory_for_target
+     (mem                  : access Interfaces.C.Extensions.void;
+      length               : in Interfaces.C.size_t;
+      target_triple_prefix : in Interfaces.C.Strings.chars_ptr)
+      return                 Interfaces.C.Extensions.bool;
+
+   function lto_module_create
+     (path : in Interfaces.C.Strings.chars_ptr)
+      return LLVM_link_time_Optimizer.lto_module_t;
+
+   function lto_module_create_from_memory
+     (mem    : access Interfaces.C.Extensions.void;
+      length : in Interfaces.C.size_t)
+      return   LLVM_link_time_Optimizer.lto_module_t;
+
+   procedure lto_module_dispose
+     (the_mod : in LLVM_link_time_Optimizer.lto_module_t);
+
+   function lto_module_get_target_triple
+     (the_mod : in LLVM_link_time_Optimizer.lto_module_t)
+      return    Interfaces.C.Strings.chars_ptr;
+
+   function lto_module_get_num_symbols
+     (the_mod : in LLVM_link_time_Optimizer.lto_module_t)
+      return    Interfaces.C.unsigned;
+
+   function lto_module_get_symbol_name
+     (the_mod : in LLVM_link_time_Optimizer.lto_module_t;
+      index   : in Interfaces.C.unsigned)
+      return    Interfaces.C.Strings.chars_ptr;
+
+   function lto_module_get_symbol_attribute
+     (the_mod : in LLVM_link_time_Optimizer.lto_module_t;
+      index   : in Interfaces.C.unsigned)
+      return    LLVM_link_time_Optimizer.lto_symbol_attributes;
+
+   function lto_codegen_create return  LLVM_link_time_Optimizer.lto_code_gen_t;
+
+   procedure lto_codegen_dispose
+     (arg_1 : in LLVM_link_time_Optimizer.lto_code_gen_t);
+
+   function lto_codegen_add_module
+     (cg      : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      the_mod : in LLVM_link_time_Optimizer.lto_module_t)
+      return    Interfaces.C.Extensions.bool;
+
+   function lto_codegen_set_debug_model
+     (cg    : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      arg_1 : in LLVM_link_time_Optimizer.lto_debug_model)
+      return  Interfaces.C.Extensions.bool;
+
+   function lto_codegen_set_pic_model
+     (cg    : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      arg_1 : in LLVM_link_time_Optimizer.lto_codegen_model)
+      return  Interfaces.C.Extensions.bool;
+
+   procedure lto_codegen_set_gcc_path
+     (cg   : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      path : in Interfaces.C.Strings.chars_ptr);
+
+   procedure lto_codegen_set_assembler_path
+     (cg   : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      path : in Interfaces.C.Strings.chars_ptr);
+
+   procedure lto_codegen_add_must_preserve_symbol
+     (cg     : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      symbol : in Interfaces.C.Strings.chars_ptr);
+
+   function lto_codegen_write_merged_modules
+     (cg   : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      path : in Interfaces.C.Strings.chars_ptr)
+      return Interfaces.C.Extensions.bool;
+
+   function lto_codegen_compile
+     (cg     : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      length : access Interfaces.C.size_t)
+      return   access Interfaces.C.Extensions.void;
+
+   procedure lto_codegen_debug_options
+     (cg    : in LLVM_link_time_Optimizer.lto_code_gen_t;
+      arg_1 : in Interfaces.C.Strings.chars_ptr);
+
+   function llvm_create_optimizer return
+     LLVM_link_time_Optimizer.llvm_lto_t;
+
+   procedure llvm_destroy_optimizer
+     (lto : in LLVM_link_time_Optimizer.llvm_lto_t);
+
+   function llvm_read_object_file
+     (lto            : in LLVM_link_time_Optimizer.llvm_lto_t;
+      input_filename : in Interfaces.C.Strings.chars_ptr)
+      return           LLVM_link_time_Optimizer.llvm_lto_status_t;
+
+   function llvm_optimize_modules
+     (lto             : in LLVM_link_time_Optimizer.llvm_lto_t;
+      output_filename : in Interfaces.C.Strings.chars_ptr)
+      return            LLVM_link_time_Optimizer.llvm_lto_status_t;
+
+private
+
+   pragma Import (C, lto_get_version, "Ada_lto_get_version");
+   pragma Import (C, lto_get_error_message, "Ada_lto_get_error_message");
+   pragma Import
+     (C,
+      lto_module_is_object_file,
+      "Ada_lto_module_is_object_file");
+   pragma Import
+     (C,
+      lto_module_is_object_file_for_target,
+      "Ada_lto_module_is_object_file_for_target");
+   pragma Import
+     (C,
+      lto_module_is_object_file_in_memory,
+      "Ada_lto_module_is_object_file_in_memory");
+   pragma Import
+     (C,
+      lto_module_is_object_file_in_memory_for_target,
+      "Ada_lto_module_is_object_file_in_memory_for_target");
+   pragma Import (C, lto_module_create, "Ada_lto_module_create");
+   pragma Import
+     (C,
+      lto_module_create_from_memory,
+      "Ada_lto_module_create_from_memory");
+   pragma Import (C, lto_module_dispose, "Ada_lto_module_dispose");
+   pragma Import
+     (C,
+      lto_module_get_target_triple,
+      "Ada_lto_module_get_target_triple");
+   pragma Import
+     (C,
+      lto_module_get_num_symbols,
+      "Ada_lto_module_get_num_symbols");
+   pragma Import
+     (C,
+      lto_module_get_symbol_name,
+      "Ada_lto_module_get_symbol_name");
+   pragma Import
+     (C,
+      lto_module_get_symbol_attribute,
+      "Ada_lto_module_get_symbol_attribute");
+   pragma Import (C, lto_codegen_create, "Ada_lto_codegen_create");
+   pragma Import (C, lto_codegen_dispose, "Ada_lto_codegen_dispose");
+   pragma Import (C, lto_codegen_add_module, "Ada_lto_codegen_add_module");
+   pragma Import
+     (C,
+      lto_codegen_set_debug_model,
+      "Ada_lto_codegen_set_debug_model");
+   pragma Import
+     (C,
+      lto_codegen_set_pic_model,
+      "Ada_lto_codegen_set_pic_model");
+   pragma Import
+     (C,
+      lto_codegen_set_gcc_path,
+      "Ada_lto_codegen_set_gcc_path");
+   pragma Import
+     (C,
+      lto_codegen_set_assembler_path,
+      "Ada_lto_codegen_set_assembler_path");
+   pragma Import
+     (C,
+      lto_codegen_add_must_preserve_symbol,
+      "Ada_lto_codegen_add_must_preserve_symbol");
+   pragma Import
+     (C,
+      lto_codegen_write_merged_modules,
+      "Ada_lto_codegen_write_merged_modules");
+   pragma Import (C, lto_codegen_compile, "Ada_lto_codegen_compile");
+   pragma Import
+     (C,
+      lto_codegen_debug_options,
+      "Ada_lto_codegen_debug_options");
+   pragma Import (C, llvm_create_optimizer, "Ada_llvm_create_optimizer");
+   pragma Import (C, llvm_destroy_optimizer, "Ada_llvm_destroy_optimizer");
+   pragma Import (C, llvm_read_object_file, "Ada_llvm_read_object_file");
+   pragma Import (C, llvm_optimize_modules, "Ada_llvm_optimize_modules");
+
+end LLVM_link_time_Optimizer.Binding;
diff --git a/bindings/ada/llvm/llvm_link_time_optimizer.ads b/bindings/ada/llvm/llvm_link_time_optimizer.ads
new file mode 100644
index 000000000000..c27f7c5893b6
--- /dev/null
+++ b/bindings/ada/llvm/llvm_link_time_optimizer.ads
@@ -0,0 +1,184 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C.Extensions;
+
+
+package LLVM_link_time_Optimizer is
+
+   -- lto_symbol_attributes
+   --
+   type lto_symbol_attributes is (
+      LTO_SYMBOL_ALIGNMENT_MASK,
+      LTO_SYMBOL_PERMISSIONS_RODATA,
+      LTO_SYMBOL_PERMISSIONS_CODE,
+      LTO_SYMBOL_PERMISSIONS_DATA,
+      LTO_SYMBOL_PERMISSIONS_MASK,
+      LTO_SYMBOL_DEFINITION_REGULAR,
+      LTO_SYMBOL_DEFINITION_TENTATIVE,
+      LTO_SYMBOL_DEFINITION_WEAK,
+      LTO_SYMBOL_DEFINITION_UNDEFINED,
+      LTO_SYMBOL_DEFINITION_WEAKUNDEF,
+      LTO_SYMBOL_DEFINITION_MASK,
+      LTO_SYMBOL_SCOPE_INTERNAL,
+      LTO_SYMBOL_SCOPE_HIDDEN,
+      LTO_SYMBOL_SCOPE_DEFAULT,
+      LTO_SYMBOL_SCOPE_PROTECTED,
+      LTO_SYMBOL_SCOPE_MASK);
+
+   for lto_symbol_attributes use
+     (LTO_SYMBOL_ALIGNMENT_MASK       => 31,
+      LTO_SYMBOL_PERMISSIONS_RODATA   => 128,
+      LTO_SYMBOL_PERMISSIONS_CODE     => 160,
+      LTO_SYMBOL_PERMISSIONS_DATA     => 192,
+      LTO_SYMBOL_PERMISSIONS_MASK     => 224,
+      LTO_SYMBOL_DEFINITION_REGULAR   => 256,
+      LTO_SYMBOL_DEFINITION_TENTATIVE => 512,
+      LTO_SYMBOL_DEFINITION_WEAK      => 768,
+      LTO_SYMBOL_DEFINITION_UNDEFINED => 1024,
+      LTO_SYMBOL_DEFINITION_WEAKUNDEF => 1280,
+      LTO_SYMBOL_DEFINITION_MASK      => 1792,
+      LTO_SYMBOL_SCOPE_INTERNAL       => 2048,
+      LTO_SYMBOL_SCOPE_HIDDEN         => 4096,
+      LTO_SYMBOL_SCOPE_DEFAULT        => 6144,
+      LTO_SYMBOL_SCOPE_PROTECTED      => 8192,
+      LTO_SYMBOL_SCOPE_MASK           => 14336);
+
+   pragma Convention (C, lto_symbol_attributes);
+
+   type lto_symbol_attributes_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.lto_symbol_attributes;
+
+   type lto_symbol_attributes_view is access all
+     LLVM_link_time_Optimizer.lto_symbol_attributes;
+
+   -- lto_debug_model
+   --
+   type lto_debug_model is (LTO_DEBUG_MODEL_NONE, LTO_DEBUG_MODEL_DWARF);
+
+   for lto_debug_model use
+     (LTO_DEBUG_MODEL_NONE  => 0,
+      LTO_DEBUG_MODEL_DWARF => 1);
+
+   pragma Convention (C, lto_debug_model);
+
+   type lto_debug_model_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.lto_debug_model;
+
+   type lto_debug_model_view is access all
+     LLVM_link_time_Optimizer.lto_debug_model;
+
+   -- lto_codegen_model
+   --
+   type lto_codegen_model is (
+      LTO_CODEGEN_PIC_MODEL_STATIC,
+      LTO_CODEGEN_PIC_MODEL_DYNAMIC,
+      LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC);
+
+   for lto_codegen_model use
+     (LTO_CODEGEN_PIC_MODEL_STATIC         => 0,
+      LTO_CODEGEN_PIC_MODEL_DYNAMIC        => 1,
+      LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC => 2);
+
+   pragma Convention (C, lto_codegen_model);
+
+   type lto_codegen_model_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.lto_codegen_model;
+
+   type lto_codegen_model_view is access all
+     LLVM_link_time_Optimizer.lto_codegen_model;
+
+   -- LTOModule
+   --
+   type LTOModule is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LTOModule_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.LTOModule;
+
+   type LTOModule_view is access all LLVM_link_time_Optimizer.LTOModule;
+
+   -- lto_module_t
+   --
+   type lto_module_t is access all LLVM_link_time_Optimizer.LTOModule;
+
+   type lto_module_t_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.lto_module_t;
+
+   type lto_module_t_view is access all LLVM_link_time_Optimizer.lto_module_t;
+
+   -- LTOCodeGenerator
+   --
+   type LTOCodeGenerator is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LTOCodeGenerator_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.LTOCodeGenerator;
+
+   type LTOCodeGenerator_view is access all
+     LLVM_link_time_Optimizer.LTOCodeGenerator;
+
+   -- lto_code_gen_t
+   --
+   type lto_code_gen_t is access all LLVM_link_time_Optimizer.LTOCodeGenerator;
+
+   type lto_code_gen_t_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.lto_code_gen_t;
+
+   type lto_code_gen_t_view is access all
+     LLVM_link_time_Optimizer.lto_code_gen_t;
+
+   -- llvm_lto_status_t
+   --
+   type llvm_lto_status_t is (
+      LLVM_LTO_UNKNOWN,
+      LLVM_LTO_OPT_SUCCESS,
+      LLVM_LTO_READ_SUCCESS,
+      LLVM_LTO_READ_FAILURE,
+      LLVM_LTO_WRITE_FAILURE,
+      LLVM_LTO_NO_TARGET,
+      LLVM_LTO_NO_WORK,
+      LLVM_LTO_MODULE_MERGE_FAILURE,
+      LLVM_LTO_ASM_FAILURE,
+      LLVM_LTO_NULL_OBJECT);
+
+   for llvm_lto_status_t use
+     (LLVM_LTO_UNKNOWN              => 0,
+      LLVM_LTO_OPT_SUCCESS          => 1,
+      LLVM_LTO_READ_SUCCESS         => 2,
+      LLVM_LTO_READ_FAILURE         => 3,
+      LLVM_LTO_WRITE_FAILURE        => 4,
+      LLVM_LTO_NO_TARGET            => 5,
+      LLVM_LTO_NO_WORK              => 6,
+      LLVM_LTO_MODULE_MERGE_FAILURE => 7,
+      LLVM_LTO_ASM_FAILURE          => 8,
+      LLVM_LTO_NULL_OBJECT          => 9);
+
+   pragma Convention (C, llvm_lto_status_t);
+
+   type llvm_lto_status_t_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.llvm_lto_status_t;
+
+   type llvm_lto_status_t_view is access all
+     LLVM_link_time_Optimizer.llvm_lto_status_t;
+
+
+   -- llvm_lto_t
+   --
+   type llvm_lto_t is access all Interfaces.C.Extensions.void;
+
+   type llvm_lto_t_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_link_time_Optimizer.llvm_lto_t;
+
+   type llvm_lto_t_view is access all
+     LLVM_link_time_Optimizer.llvm_lto_t;
+
+
+end LLVM_link_time_Optimizer;
diff --git a/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx b/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx
new file mode 100644
index 000000000000..eb2e7ab15633
--- /dev/null
+++ b/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx
@@ -0,0 +1,923 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_link_time_Optimizer (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_link_time_Optimizer(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+#include "llvm-c/lto.h"
+#include "llvm-c/LinkTimeOptimizer.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport char * SWIGSTDCALL Ada_lto_get_version (
+  )
+{
+  char * jresult ;
+  char *result = 0 ;
+  
+  result = (char *)lto_get_version();
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_lto_get_error_message (
+  )
+{
+  char * jresult ;
+  char *result = 0 ;
+  
+  result = (char *)lto_get_error_message();
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file (
+  char * jarg1
+  )
+{
+  unsigned int jresult ;
+  char *arg1 = (char *) 0 ;
+  bool result;
+  
+  arg1 = jarg1; 
+  
+  result = (bool)lto_module_is_object_file((char const *)arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_for_target (
+  char * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  unsigned int jresult ;
+  char *arg1 = (char *) 0 ;
+  char *arg2 = (char *) 0 ;
+  bool result;
+  
+  arg1 = jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (bool)lto_module_is_object_file_for_target((char const *)arg1,(char const *)arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_in_memory (
+  void* jarg1
+  ,
+  
+  size_t jarg2
+  )
+{
+  unsigned int jresult ;
+  void *arg1 = (void *) 0 ;
+  size_t arg2 ;
+  bool result;
+  
+  arg1 = (void *)jarg1; 
+  
+  
+  arg2 = (size_t) jarg2; 
+  
+  
+  result = (bool)lto_module_is_object_file_in_memory((void const *)arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_in_memory_for_target (
+  void* jarg1
+  ,
+  
+  size_t jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  unsigned int jresult ;
+  void *arg1 = (void *) 0 ;
+  size_t arg2 ;
+  char *arg3 = (char *) 0 ;
+  bool result;
+  
+  arg1 = (void *)jarg1; 
+  
+  
+  arg2 = (size_t) jarg2; 
+  
+  
+  arg3 = jarg3; 
+  
+  result = (bool)lto_module_is_object_file_in_memory_for_target((void const *)arg1,arg2,(char const *)arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_lto_module_create (
+  char * jarg1
+  )
+{
+  void * jresult ;
+  char *arg1 = (char *) 0 ;
+  lto_module_t result;
+  
+  arg1 = jarg1; 
+  
+  result = (lto_module_t)lto_module_create((char const *)arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_lto_module_create_from_memory (
+  void* jarg1
+  ,
+  
+  size_t jarg2
+  )
+{
+  void * jresult ;
+  void *arg1 = (void *) 0 ;
+  size_t arg2 ;
+  lto_module_t result;
+  
+  arg1 = (void *)jarg1; 
+  
+  
+  arg2 = (size_t) jarg2; 
+  
+  
+  result = (lto_module_t)lto_module_create_from_memory((void const *)arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_lto_module_dispose (
+  void * jarg1
+  )
+{
+  lto_module_t arg1 = (lto_module_t) 0 ;
+  
+  arg1 = (lto_module_t)jarg1; 
+  
+  lto_module_dispose(arg1);
+  
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_lto_module_get_target_triple (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  lto_module_t arg1 = (lto_module_t) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (lto_module_t)jarg1; 
+  
+  result = (char *)lto_module_get_target_triple(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_module_get_num_symbols (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  lto_module_t arg1 = (lto_module_t) 0 ;
+  unsigned int result;
+  
+  arg1 = (lto_module_t)jarg1; 
+  
+  result = (unsigned int)lto_module_get_num_symbols(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_lto_module_get_symbol_name (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  char * jresult ;
+  lto_module_t arg1 = (lto_module_t) 0 ;
+  unsigned int arg2 ;
+  char *result = 0 ;
+  
+  arg1 = (lto_module_t)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (char *)lto_module_get_symbol_name(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_lto_module_get_symbol_attribute (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  int jresult ;
+  lto_module_t arg1 = (lto_module_t) 0 ;
+  unsigned int arg2 ;
+  lto_symbol_attributes result;
+  
+  arg1 = (lto_module_t)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (lto_symbol_attributes)lto_module_get_symbol_attribute(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_lto_codegen_create (
+  )
+{
+  void * jresult ;
+  lto_code_gen_t result;
+  
+  result = (lto_code_gen_t)lto_codegen_create();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_lto_codegen_dispose (
+  void * jarg1
+  )
+{
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  lto_codegen_dispose(arg1);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_add_module (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned int jresult ;
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  lto_module_t arg2 = (lto_module_t) 0 ;
+  bool result;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = (lto_module_t)jarg2; 
+  
+  result = (bool)lto_codegen_add_module(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_set_debug_model (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  unsigned int jresult ;
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  lto_debug_model arg2 ;
+  bool result;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = (lto_debug_model) jarg2; 
+  
+  result = (bool)lto_codegen_set_debug_model(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_set_pic_model (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  unsigned int jresult ;
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  lto_codegen_model arg2 ;
+  bool result;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = (lto_codegen_model) jarg2; 
+  
+  result = (bool)lto_codegen_set_pic_model(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_lto_codegen_set_gcc_path (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  lto_codegen_set_gcc_path(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_lto_codegen_set_assembler_path (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  lto_codegen_set_assembler_path(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_lto_codegen_add_must_preserve_symbol (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  lto_codegen_add_must_preserve_symbol(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_write_merged_modules (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  unsigned int jresult ;
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  bool result;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (bool)lto_codegen_write_merged_modules(arg1,(char const *)arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void* SWIGSTDCALL Ada_lto_codegen_compile (
+  void * jarg1
+  ,
+  
+  size_t* jarg2
+  )
+{
+  void* jresult ;
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  size_t *arg2 = (size_t *) 0 ;
+  void *result = 0 ;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  
+  arg2 = (size_t *) jarg2;
+  
+  
+  result = (void *)lto_codegen_compile(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_lto_codegen_debug_options (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (lto_code_gen_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  lto_codegen_debug_options(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport void* SWIGSTDCALL Ada_llvm_create_optimizer (
+  )
+{
+  void* jresult ;
+  llvm_lto_t result;
+  
+  result = (llvm_lto_t)llvm_create_optimizer();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_llvm_destroy_optimizer (
+  void* jarg1
+  )
+{
+  llvm_lto_t arg1 = (llvm_lto_t) 0 ;
+  
+  arg1 = (llvm_lto_t)jarg1; 
+  
+  llvm_destroy_optimizer(arg1);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_llvm_read_object_file (
+  void* jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  int jresult ;
+  llvm_lto_t arg1 = (llvm_lto_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  llvm_lto_status_t result;
+  
+  arg1 = (llvm_lto_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (llvm_lto_status_t)llvm_read_object_file(arg1,(char const *)arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_llvm_optimize_modules (
+  void* jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  int jresult ;
+  llvm_lto_t arg1 = (llvm_lto_t) 0 ;
+  char *arg2 = (char *) 0 ;
+  llvm_lto_status_t result;
+  
+  arg1 = (llvm_lto_t)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (llvm_lto_status_t)llvm_optimize_modules(arg1,(char const *)arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/llvm/llvm_wrap.cxx b/bindings/ada/llvm/llvm_wrap.cxx
new file mode 100644
index 000000000000..79b19ff4c0bb
--- /dev/null
+++ b/bindings/ada/llvm/llvm_wrap.cxx
@@ -0,0 +1,8817 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_llvm (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_llvm(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+//#include "llvm-c/Analysis.h"
+//#include "llvm-c/BitReader.h"
+//#include "llvm-c/BitWriter.h"
+#include "llvm-c/Core.h"
+//#include "llvm-c/ExecutionEngine.h"
+//#include "llvm-c/LinkTimeOptimizer.h"
+//#include "llvm-c/lto.h"
+//#include "llvm-c/Target.h"
+
+
+
+  struct LLVMCtxt;
+//  struct LLVMOpaqueType;
+//  struct LLVMOpaqueValue;
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport void SWIGSTDCALL Ada_LLVMDisposeMessage (
+  char * jarg1
+  )
+{
+  char *arg1 = (char *) 0 ;
+  
+  arg1 = jarg1; 
+  
+  LLVMDisposeMessage(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMContextCreate (
+  )
+{
+  void * jresult ;
+  LLVMContextRef result;
+  
+  result = (LLVMContextRef)LLVMContextCreate();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetGlobalContext (
+  )
+{
+  void * jresult ;
+  LLVMContextRef result;
+  
+  result = (LLVMContextRef)LLVMGetGlobalContext();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMContextDispose (
+  void * jarg1
+  )
+{
+  LLVMContextRef arg1 = (LLVMContextRef) 0 ;
+  
+  arg1 = (LLVMContextRef)jarg1; 
+  
+  LLVMContextDispose(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMModuleCreateWithName (
+  char * jarg1
+  )
+{
+  void * jresult ;
+  char *arg1 = (char *) 0 ;
+  LLVMModuleRef result;
+  
+  arg1 = jarg1; 
+  
+  result = (LLVMModuleRef)LLVMModuleCreateWithName((char const *)arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMModuleCreateWithNameInContext (
+  char * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  char *arg1 = (char *) 0 ;
+  LLVMContextRef arg2 = (LLVMContextRef) 0 ;
+  LLVMModuleRef result;
+  
+  arg1 = jarg1; 
+  
+  arg2 = (LLVMContextRef)jarg2; 
+  
+  result = (LLVMModuleRef)LLVMModuleCreateWithNameInContext((char const *)arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeModule (
+  void * jarg1
+  )
+{
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  LLVMDisposeModule(arg1);
+  
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_LLVMGetDataLayout (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (char *)LLVMGetDataLayout(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetDataLayout (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  LLVMSetDataLayout(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_LLVMGetTarget (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (char *)LLVMGetTarget(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetTarget (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  LLVMSetTarget(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMAddTypeName (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  result = (int)LLVMAddTypeName(arg1,(char const *)arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDeleteTypeName (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  LLVMDeleteTypeName(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetTypeByName (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (LLVMTypeRef)LLVMGetTypeByName(arg1,(char const *)arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDumpModule (
+  void * jarg1
+  )
+{
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  LLVMDumpModule(arg1);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMGetTypeKind (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeKind result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMTypeKind)LLVMGetTypeKind(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMInt1Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMInt1Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMInt8Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMInt8Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMInt16Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMInt16Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMInt32Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMInt32Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMInt64Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMInt64Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIntType (
+  unsigned int jarg1
+  )
+{
+  void * jresult ;
+  unsigned int arg1 ;
+  LLVMTypeRef result;
+  
+  
+  arg1 = (unsigned int) jarg1; 
+  
+  
+  result = (LLVMTypeRef)LLVMIntType(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetIntTypeWidth (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetIntTypeWidth(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMFloatType (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMFloatType();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMDoubleType (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMDoubleType();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMX86FP80Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMX86FP80Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMFP128Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMFP128Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMPPCFP128Type (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMPPCFP128Type();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMFunctionType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  ,
+  
+  int jarg4
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ;
+  unsigned int arg3 ;
+  int arg4 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef *)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  
+  arg4 = (int) jarg4; 
+  
+  
+  result = (LLVMTypeRef)LLVMFunctionType(arg1,arg2,arg3,arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsFunctionVarArg (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (int)LLVMIsFunctionVarArg(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetReturnType (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMTypeRef)LLVMGetReturnType(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMCountParamTypes (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (unsigned int)LLVMCountParamTypes(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMGetParamTypes (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef *)jarg2; 
+  
+  LLVMGetParamTypes(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMStructType (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  void * jresult ;
+  LLVMTypeRef *arg1 = (LLVMTypeRef *) 0 ;
+  unsigned int arg2 ;
+  int arg3 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef *)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  
+  arg3 = (int) jarg3; 
+  
+  
+  result = (LLVMTypeRef)LLVMStructType(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMCountStructElementTypes (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (unsigned int)LLVMCountStructElementTypes(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMGetStructElementTypes (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef *)jarg2; 
+  
+  LLVMGetStructElementTypes(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsPackedStruct (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (int)LLVMIsPackedStruct(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMArrayType (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int arg2 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMTypeRef)LLVMArrayType(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMPointerType (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int arg2 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMTypeRef)LLVMPointerType(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMVectorType (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int arg2 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMTypeRef)LLVMVectorType(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetElementType (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMTypeRef)LLVMGetElementType(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetArrayLength (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetArrayLength(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetPointerAddressSpace (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetPointerAddressSpace(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetVectorSize (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetVectorSize(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMVoidType (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMVoidType();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMLabelType (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMLabelType();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMOpaqueType (
+  )
+{
+  void * jresult ;
+  LLVMTypeRef result;
+  
+  result = (LLVMTypeRef)LLVMOpaqueType();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateTypeHandle (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeHandleRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMTypeHandleRef)LLVMCreateTypeHandle(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMRefineType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  LLVMRefineType(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMResolveTypeHandle (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeHandleRef arg1 = (LLVMTypeHandleRef) 0 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTypeHandleRef)jarg1; 
+  
+  result = (LLVMTypeRef)LLVMResolveTypeHandle(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeTypeHandle (
+  void * jarg1
+  )
+{
+  LLVMTypeHandleRef arg1 = (LLVMTypeHandleRef) 0 ;
+  
+  arg1 = (LLVMTypeHandleRef)jarg1; 
+  
+  LLVMDisposeTypeHandle(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMTypeOf (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMTypeRef)LLVMTypeOf(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_LLVMGetValueName (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (char *)LLVMGetValueName(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetValueName (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  LLVMSetValueName(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDumpValue (
+  void * jarg1
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  LLVMDumpValue(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAArgument (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAArgument(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsABasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsABasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAInlineAsm (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAInlineAsm(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAUser (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAUser(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstant (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstant(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantAggregateZero (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantAggregateZero(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantArray (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantArray(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantExpr (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantExpr(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantFP (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantFP(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantInt (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantInt(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantPointerNull (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantPointerNull(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantStruct (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantStruct(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantVector (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAConstantVector(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalValue (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAGlobalValue(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFunction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFunction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalAlias (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAGlobalAlias(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalVariable (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAGlobalVariable(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAUndefValue (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAUndefValue(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAInstruction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAInstruction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsABinaryOperator (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsABinaryOperator(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsACallInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsACallInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAIntrinsicInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAIntrinsicInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsADbgInfoIntrinsic (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsADbgInfoIntrinsic(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsADbgDeclareInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsADbgDeclareInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsADbgFuncStartInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsADbgFuncStartInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsADbgRegionEndInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsADbgRegionEndInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsADbgRegionStartInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsADbgRegionStartInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsADbgStopPointInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsADbgStopPointInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAEHSelectorInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAEHSelectorInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAMemIntrinsic (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAMemIntrinsic(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAMemCpyInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAMemCpyInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAMemMoveInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAMemMoveInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAMemSetInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAMemSetInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsACmpInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsACmpInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFCmpInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFCmpInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAICmpInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAICmpInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAExtractElementInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAExtractElementInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAGetElementPtrInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAGetElementPtrInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAInsertElementInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAInsertElementInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAInsertValueInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAInsertValueInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAPHINode (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAPHINode(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsASelectInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsASelectInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAShuffleVectorInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAShuffleVectorInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAStoreInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAStoreInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsATerminatorInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsATerminatorInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsABranchInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsABranchInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAInvokeInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAInvokeInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAReturnInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAReturnInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsASwitchInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsASwitchInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAUnreachableInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAUnreachableInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAUnwindInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAUnwindInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAUnaryInstruction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAUnaryInstruction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAAllocationInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAAllocationInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAAllocaInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAAllocaInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAMallocInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAMallocInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsACastInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsACastInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsABitCastInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsABitCastInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFPExtInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFPExtInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFPToSIInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFPToSIInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFPToUIInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFPToUIInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFPTruncInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFPTruncInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAIntToPtrInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAIntToPtrInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAPtrToIntInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAPtrToIntInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsASExtInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsASExtInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsASIToFPInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsASIToFPInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsATruncInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsATruncInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAUIToFPInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAUIToFPInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAZExtInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAZExtInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAExtractValueInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAExtractValueInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAFreeInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAFreeInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsALoadInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsALoadInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIsAVAArgInst (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMIsAVAArgInst(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstNull (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMConstNull(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstAllOnes (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMConstAllOnes(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetUndef (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetUndef(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsConstant (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsConstant(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsNull (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsNull(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsUndef (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsUndef(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstPointerNull (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMConstPointerNull(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstInt (
+  void * jarg1
+  ,
+  
+  unsigned long long jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  unsigned long long arg2 ;
+  int arg3 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (unsigned long long) jarg2; 
+  
+  
+  
+  arg3 = (int) jarg3; 
+  
+  
+  result = (LLVMValueRef)LLVMConstInt(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstReal (
+  void * jarg1
+  ,
+  
+  double jarg2
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  double arg2 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  
+  arg2 = (double) jarg2; 
+  
+  
+  result = (LLVMValueRef)LLVMConstReal(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstRealOfString (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstRealOfString(arg1,(char const *)arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstString (
+  char * jarg1
+  ,
+  
+  unsigned int jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  void * jresult ;
+  char *arg1 = (char *) 0 ;
+  unsigned int arg2 ;
+  int arg3 ;
+  LLVMValueRef result;
+  
+  arg1 = jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  
+  arg3 = (int) jarg3; 
+  
+  
+  result = (LLVMValueRef)LLVMConstString((char const *)arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstArray (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
+  unsigned int arg3 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = (LLVMValueRef *)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  result = (LLVMValueRef)LLVMConstArray(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstStruct (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  void * jresult ;
+  LLVMValueRef *arg1 = (LLVMValueRef *) 0 ;
+  unsigned int arg2 ;
+  int arg3 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef *)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  
+  arg3 = (int) jarg3; 
+  
+  
+  result = (LLVMValueRef)LLVMConstStruct(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstVector (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef *arg1 = (LLVMValueRef *) 0 ;
+  unsigned int arg2 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef *)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMValueRef)LLVMConstVector(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMSizeOf (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMSizeOf(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstNeg (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMConstNeg(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstNot (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMConstNot(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstAdd (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstAdd(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstSub (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstSub(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstMul (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstMul(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstUDiv (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstUDiv(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstSDiv (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstSDiv(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFDiv (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstFDiv(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstURem (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstURem(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstSRem (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstSRem(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFRem (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstFRem(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstAnd (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstAnd(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstOr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstOr(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstXor (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstXor(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstICmp (
+  int jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMIntPredicate arg1 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMIntPredicate) jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMConstICmp(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFCmp (
+  int jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMRealPredicate arg1 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMRealPredicate) jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMConstFCmp(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstShl (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstShl(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstLShr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstLShr(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstAShr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstAShr(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstGEP (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
+  unsigned int arg3 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef *)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  result = (LLVMValueRef)LLVMConstGEP(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstTrunc (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstTrunc(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstSExt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstSExt(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstZExt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstZExt(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFPTrunc (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstFPTrunc(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFPExt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstFPExt(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstUIToFP (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstUIToFP(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstSIToFP (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstSIToFP(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFPToUI (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstFPToUI(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstFPToSI (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstFPToSI(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstPtrToInt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstPtrToInt(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstIntToPtr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstIntToPtr(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstBitCast (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstBitCast(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstSelect (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMConstSelect(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstExtractElement (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMConstExtractElement(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstInsertElement (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMConstInsertElement(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstShuffleVector (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMConstShuffleVector(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstExtractValue (
+  void * jarg1
+  ,
+  
+  unsigned int* jarg2
+  ,
+  
+  unsigned int jarg3
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int *arg2 = (unsigned int *) 0 ;
+  unsigned int arg3 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int *) jarg2;
+  
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  result = (LLVMValueRef)LLVMConstExtractValue(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstInsertValue (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int* jarg3
+  ,
+  
+  unsigned int jarg4
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  unsigned int *arg3 = (unsigned int *) 0 ;
+  unsigned int arg4 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  
+  arg3 = (unsigned int *) jarg3;
+  
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  result = (LLVMValueRef)LLVMConstInsertValue(arg1,arg2,arg3,arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMConstInlineAsm (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  ,
+  
+  char * jarg3
+  ,
+  
+  int jarg4
+  )
+{
+  void * jresult ;
+  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  char *arg3 = (char *) 0 ;
+  int arg4 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMTypeRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  arg3 = jarg3; 
+  
+  
+  arg4 = (int) jarg4; 
+  
+  
+  result = (LLVMValueRef)LLVMConstInlineAsm(arg1,(char const *)arg2,(char const *)arg3,arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetGlobalParent (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMModuleRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMModuleRef)LLVMGetGlobalParent(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsDeclaration (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsDeclaration(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMGetLinkage (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMLinkage result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMLinkage)LLVMGetLinkage(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetLinkage (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMLinkage arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMLinkage) jarg2; 
+  
+  LLVMSetLinkage(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_LLVMGetSection (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (char *)LLVMGetSection(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetSection (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  LLVMSetSection(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMGetVisibility (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMVisibility result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMVisibility)LLVMGetVisibility(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetVisibility (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMVisibility arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMVisibility) jarg2; 
+  
+  LLVMSetVisibility(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetAlignment (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetAlignment(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetAlignment (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  LLVMSetAlignment(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMAddGlobal (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMAddGlobal(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNamedGlobal (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (LLVMValueRef)LLVMGetNamedGlobal(arg1,(char const *)arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetFirstGlobal (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetFirstGlobal(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetLastGlobal (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetLastGlobal(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNextGlobal (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetNextGlobal(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousGlobal (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetPreviousGlobal(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDeleteGlobal (
+  void * jarg1
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  LLVMDeleteGlobal(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetInitializer (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetInitializer(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetInitializer (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  LLVMSetInitializer(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsThreadLocal (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsThreadLocal(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetThreadLocal (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (int) jarg2; 
+  
+  
+  LLVMSetThreadLocal(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsGlobalConstant (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsGlobalConstant(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetGlobalConstant (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (int) jarg2; 
+  
+  
+  LLVMSetGlobalConstant(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMAddAlias (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMAddAlias(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMAddFunction (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMAddFunction(arg1,(char const *)arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNamedFunction (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (LLVMValueRef)LLVMGetNamedFunction(arg1,(char const *)arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetFirstFunction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetFirstFunction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetLastFunction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetLastFunction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNextFunction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetNextFunction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousFunction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetPreviousFunction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDeleteFunction (
+  void * jarg1
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  LLVMDeleteFunction(arg1);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetIntrinsicID (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetIntrinsicID(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetFunctionCallConv (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetFunctionCallConv(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetFunctionCallConv (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  LLVMSetFunctionCallConv(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_LLVMGetGC (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (char *)LLVMGetGC(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetGC (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  LLVMSetGC(arg1,(char const *)arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddFunctionAttr (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMAttribute arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMAttribute) jarg2; 
+  
+  LLVMAddFunctionAttr(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMRemoveFunctionAttr (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMAttribute arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMAttribute) jarg2; 
+  
+  LLVMRemoveFunctionAttr(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMCountParams (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMCountParams(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMGetParams (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef *)jarg2; 
+  
+  LLVMGetParams(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetParam (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMValueRef)LLVMGetParam(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetParamParent (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetParamParent(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetFirstParam (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetFirstParam(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetLastParam (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetLastParam(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNextParam (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetNextParam(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousParam (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetPreviousParam(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddAttribute (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMAttribute arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMAttribute) jarg2; 
+  
+  LLVMAddAttribute(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMRemoveAttribute (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMAttribute arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMAttribute) jarg2; 
+  
+  LLVMRemoveAttribute(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetParamAlignment (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  LLVMSetParamAlignment(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBasicBlockAsValue (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMBasicBlockAsValue(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMValueIsBasicBlock (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMValueIsBasicBlock(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMValueAsBasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMValueAsBasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetBasicBlockParent (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetBasicBlockParent(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMCountBasicBlocks (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMCountBasicBlocks(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMGetBasicBlocks (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef *arg2 = (LLVMBasicBlockRef *) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMBasicBlockRef *)jarg2; 
+  
+  LLVMGetBasicBlocks(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetFirstBasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetFirstBasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetLastBasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetLastBasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNextBasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetNextBasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousBasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetPreviousBasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetEntryBasicBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetEntryBasicBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMAppendBasicBlock (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (LLVMBasicBlockRef)LLVMAppendBasicBlock(arg1,(char const *)arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMInsertBasicBlock (
+  void * jarg1
+  ,
+  
+  char * jarg2
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  char *arg2 = (char *) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  arg2 = jarg2; 
+  
+  result = (LLVMBasicBlockRef)LLVMInsertBasicBlock(arg1,(char const *)arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDeleteBasicBlock (
+  void * jarg1
+  )
+{
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  LLVMDeleteBasicBlock(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetInstructionParent (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetInstructionParent(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetFirstInstruction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetFirstInstruction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetLastInstruction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBasicBlockRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetLastInstruction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetNextInstruction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetNextInstruction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousInstruction (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMGetPreviousInstruction(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetInstructionCallConv (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  LLVMSetInstructionCallConv(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMGetInstructionCallConv (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMGetInstructionCallConv(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddInstrAttribute (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  LLVMAttribute arg3 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  arg3 = (LLVMAttribute) jarg3; 
+  
+  LLVMAddInstrAttribute(arg1,arg2,arg3);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMRemoveInstrAttribute (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  ,
+  
+  int jarg3
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  LLVMAttribute arg3 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  arg3 = (LLVMAttribute) jarg3; 
+  
+  LLVMRemoveInstrAttribute(arg1,arg2,arg3);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetInstrParamAlignment (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  ,
+  
+  unsigned int jarg3
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  unsigned int arg3 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  LLVMSetInstrParamAlignment(arg1,arg2,arg3);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMIsTailCall (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (int)LLVMIsTailCall(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMSetTailCall (
+  void * jarg1
+  ,
+  
+  int jarg2
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  int arg2 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (int) jarg2; 
+  
+  
+  LLVMSetTailCall(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddIncoming (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  unsigned int jarg4
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
+  LLVMBasicBlockRef *arg3 = (LLVMBasicBlockRef *) 0 ;
+  unsigned int arg4 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef *)jarg2; 
+  
+  arg3 = (LLVMBasicBlockRef *)jarg3; 
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  LLVMAddIncoming(arg1,arg2,arg3,arg4);
+  
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMCountIncoming (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  result = (unsigned int)LLVMCountIncoming(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetIncomingValue (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMValueRef)LLVMGetIncomingValue(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetIncomingBlock (
+  void * jarg1
+  ,
+  
+  unsigned int jarg2
+  )
+{
+  void * jresult ;
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  unsigned int arg2 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  
+  arg2 = (unsigned int) jarg2; 
+  
+  
+  result = (LLVMBasicBlockRef)LLVMGetIncomingBlock(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateBuilder (
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef result;
+  
+  result = (LLVMBuilderRef)LLVMCreateBuilder();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMPositionBuilder (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMBasicBlockRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  LLVMPositionBuilder(arg1,arg2,arg3);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMPositionBuilderBefore (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  LLVMPositionBuilderBefore(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMPositionBuilderAtEnd (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMBasicBlockRef)jarg2; 
+  
+  LLVMPositionBuilderAtEnd(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMGetInsertBlock (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMBasicBlockRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  result = (LLVMBasicBlockRef)LLVMGetInsertBlock(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMClearInsertionPosition (
+  void * jarg1
+  )
+{
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  LLVMClearInsertionPosition(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMInsertIntoBuilder (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  LLVMInsertIntoBuilder(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeBuilder (
+  void * jarg1
+  )
+{
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  LLVMDisposeBuilder(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildRetVoid (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMBuildRetVoid(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildRet (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMBuildRet(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildBr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMBasicBlockRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMBuildBr(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildCondBr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ;
+  LLVMBasicBlockRef arg4 = (LLVMBasicBlockRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMBasicBlockRef)jarg3; 
+  
+  arg4 = (LLVMBasicBlockRef)jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildCondBr(arg1,arg2,arg3,arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSwitch (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  unsigned int jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ;
+  unsigned int arg4 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMBasicBlockRef)jarg3; 
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  result = (LLVMValueRef)LLVMBuildSwitch(arg1,arg2,arg3,arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildInvoke (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  unsigned int jarg4
+  ,
+  
+  void * jarg5
+  ,
+  
+  void * jarg6
+  ,
+  
+  char * jarg7
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
+  unsigned int arg4 ;
+  LLVMBasicBlockRef arg5 = (LLVMBasicBlockRef) 0 ;
+  LLVMBasicBlockRef arg6 = (LLVMBasicBlockRef) 0 ;
+  char *arg7 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef *)jarg3; 
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  arg5 = (LLVMBasicBlockRef)jarg5; 
+  
+  arg6 = (LLVMBasicBlockRef)jarg6; 
+  
+  arg7 = jarg7; 
+  
+  result = (LLVMValueRef)LLVMBuildInvoke(arg1,arg2,arg3,arg4,arg5,arg6,(char const *)arg7);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildUnwind (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMBuildUnwind(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildUnreachable (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  result = (LLVMValueRef)LLVMBuildUnreachable(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddCase (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ;
+  
+  arg1 = (LLVMValueRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMBasicBlockRef)jarg3; 
+  
+  LLVMAddCase(arg1,arg2,arg3);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildAdd (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildAdd(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSub (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildSub(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildMul (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildMul(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildUDiv (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildUDiv(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSDiv (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildSDiv(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFDiv (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildFDiv(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildURem (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildURem(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSRem (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildSRem(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFRem (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildFRem(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildShl (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildShl(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildLShr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildLShr(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildAShr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildAShr(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildAnd (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildAnd(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildOr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildOr(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildXor (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildXor(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildNeg (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildNeg(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildNot (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildNot(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildMalloc (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildMalloc(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildArrayMalloc (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildArrayMalloc(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildAlloca (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildAlloca(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildArrayAlloca (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildArrayAlloca(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFree (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (LLVMValueRef)LLVMBuildFree(arg1,arg2);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildLoad (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildLoad(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildStore (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildStore(arg1,arg2,arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildGEP (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  unsigned int jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
+  unsigned int arg4 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef *)jarg3; 
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildGEP(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildTrunc (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildTrunc(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildZExt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildZExt(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSExt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildSExt(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFPToUI (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildFPToUI(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFPToSI (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildFPToSI(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildUIToFP (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildUIToFP(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSIToFP (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildSIToFP(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFPTrunc (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildFPTrunc(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFPExt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildFPExt(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildPtrToInt (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildPtrToInt(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildIntToPtr (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildIntToPtr(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildBitCast (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildBitCast(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildICmp (
+  void * jarg1
+  ,
+  
+  int jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMIntPredicate arg2 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMIntPredicate) jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = (LLVMValueRef)jarg4; 
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildICmp(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildFCmp (
+  void * jarg1
+  ,
+  
+  int jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMRealPredicate arg2 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMRealPredicate) jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = (LLVMValueRef)jarg4; 
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildFCmp(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildPhi (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  char * jarg3
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  char *arg3 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  arg3 = jarg3; 
+  
+  result = (LLVMValueRef)LLVMBuildPhi(arg1,arg2,(char const *)arg3);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildCall (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  unsigned int jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
+  unsigned int arg4 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef *)jarg3; 
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildCall(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildSelect (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = (LLVMValueRef)jarg4; 
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildSelect(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildVAArg (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMTypeRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildVAArg(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildExtractElement (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildExtractElement(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildInsertElement (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = (LLVMValueRef)jarg4; 
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildInsertElement(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildShuffleVector (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  void * jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  arg4 = (LLVMValueRef)jarg4; 
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildShuffleVector(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildExtractValue (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  ,
+  
+  char * jarg4
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  unsigned int arg3 ;
+  char *arg4 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  arg4 = jarg4; 
+  
+  result = (LLVMValueRef)LLVMBuildExtractValue(arg1,arg2,arg3,(char const *)arg4);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMBuildInsertValue (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  ,
+  
+  unsigned int jarg4
+  ,
+  
+  char * jarg5
+  )
+{
+  void * jresult ;
+  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
+  unsigned int arg4 ;
+  char *arg5 = (char *) 0 ;
+  LLVMValueRef result;
+  
+  arg1 = (LLVMBuilderRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  arg3 = (LLVMValueRef)jarg3; 
+  
+  
+  arg4 = (unsigned int) jarg4; 
+  
+  
+  arg5 = jarg5; 
+  
+  result = (LLVMValueRef)LLVMBuildInsertValue(arg1,arg2,arg3,arg4,(char const *)arg5);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateModuleProviderForExistingModule (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
+  LLVMModuleProviderRef result;
+  
+  arg1 = (LLVMModuleRef)jarg1; 
+  
+  result = (LLVMModuleProviderRef)LLVMCreateModuleProviderForExistingModule(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeModuleProvider (
+  void * jarg1
+  )
+{
+  LLVMModuleProviderRef arg1 = (LLVMModuleProviderRef) 0 ;
+  
+  arg1 = (LLVMModuleProviderRef)jarg1; 
+  
+  LLVMDisposeModuleProvider(arg1);
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMCreateMemoryBufferWithContentsOfFile (
+  char * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  void * jarg3
+  )
+{
+  int jresult ;
+  char *arg1 = (char *) 0 ;
+  LLVMMemoryBufferRef *arg2 = (LLVMMemoryBufferRef *) 0 ;
+  char **arg3 = (char **) 0 ;
+  int result;
+  
+  arg1 = jarg1; 
+  
+  arg2 = (LLVMMemoryBufferRef *)jarg2; 
+  
+  arg3 = (char **)jarg3; 
+  
+  result = (int)LLVMCreateMemoryBufferWithContentsOfFile((char const *)arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMCreateMemoryBufferWithSTDIN (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  int jresult ;
+  LLVMMemoryBufferRef *arg1 = (LLVMMemoryBufferRef *) 0 ;
+  char **arg2 = (char **) 0 ;
+  int result;
+  
+  arg1 = (LLVMMemoryBufferRef *)jarg1; 
+  
+  arg2 = (char **)jarg2; 
+  
+  result = (int)LLVMCreateMemoryBufferWithSTDIN(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeMemoryBuffer (
+  void * jarg1
+  )
+{
+  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
+  
+  arg1 = (LLVMMemoryBufferRef)jarg1; 
+  
+  LLVMDisposeMemoryBuffer(arg1);
+  
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreatePassManager (
+  )
+{
+  void * jresult ;
+  LLVMPassManagerRef result;
+  
+  result = (LLVMPassManagerRef)LLVMCreatePassManager();
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateFunctionPassManager (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMModuleProviderRef arg1 = (LLVMModuleProviderRef) 0 ;
+  LLVMPassManagerRef result;
+  
+  arg1 = (LLVMModuleProviderRef)jarg1; 
+  
+  result = (LLVMPassManagerRef)LLVMCreateFunctionPassManager(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMRunPassManager (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  int jresult ;
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  LLVMModuleRef arg2 = (LLVMModuleRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  arg2 = (LLVMModuleRef)jarg2; 
+  
+  result = (int)LLVMRunPassManager(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMInitializeFunctionPassManager (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  result = (int)LLVMInitializeFunctionPassManager(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMRunFunctionPassManager (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  int jresult ;
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (int)LLVMRunFunctionPassManager(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMFinalizeFunctionPassManager (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  int result;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  result = (int)LLVMFinalizeFunctionPassManager(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposePassManager (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMDisposePassManager(arg1);
+  
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/target/llvm_target-binding.ads b/bindings/ada/target/llvm_target-binding.ads
new file mode 100644
index 000000000000..61201c8d1753
--- /dev/null
+++ b/bindings/ada/target/llvm_target-binding.ads
@@ -0,0 +1,138 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with llvm;
+with Interfaces.C.Strings;
+
+
+package LLVM_Target.Binding is
+
+   LLVMBigEndian    : constant := 0;
+   LLVMLittleEndian : constant := 1;
+
+   procedure LLVMInitializeAllTargets;
+
+   function LLVMInitializeNativeTarget return  Interfaces.C.int;
+
+   function LLVMCreateTargetData
+     (StringRep : in Interfaces.C.Strings.chars_ptr)
+      return      LLVM_Target.LLVMTargetDataRef;
+
+   procedure LLVMAddTargetData
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMPassManagerRef);
+
+   function LLVMCopyStringRepOfTargetData
+     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
+      return  Interfaces.C.Strings.chars_ptr;
+
+   function LLVMByteOrder
+     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
+      return  LLVM_Target.LLVMByteOrdering;
+
+   function LLVMPointerSize
+     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
+      return  Interfaces.C.unsigned;
+
+   function LLVMIntPtrType
+     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
+      return  llvm.LLVMTypeRef;
+
+   function LLVMSizeOfTypeInBits
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMTypeRef)
+      return    Interfaces.C.Extensions.unsigned_long_long;
+
+   function LLVMStoreSizeOfType
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMTypeRef)
+      return    Interfaces.C.Extensions.unsigned_long_long;
+
+   function LLVMABISizeOfType
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMTypeRef)
+      return    Interfaces.C.Extensions.unsigned_long_long;
+
+   function LLVMABIAlignmentOfType
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMTypeRef)
+      return    Interfaces.C.unsigned;
+
+   function LLVMCallFrameAlignmentOfType
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMTypeRef)
+      return    Interfaces.C.unsigned;
+
+   function LLVMPreferredAlignmentOfType
+     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
+      arg_2_2 : in llvm.LLVMTypeRef)
+      return    Interfaces.C.unsigned;
+
+   function LLVMPreferredAlignmentOfGlobal
+     (arg_1     : in LLVM_Target.LLVMTargetDataRef;
+      GlobalVar : in llvm.LLVMValueRef)
+      return      Interfaces.C.unsigned;
+
+   function LLVMElementAtOffset
+     (arg_1    : in LLVM_Target.LLVMTargetDataRef;
+      StructTy : in llvm.LLVMTypeRef;
+      Offset   : in Interfaces.C.Extensions.unsigned_long_long)
+      return     Interfaces.C.unsigned;
+
+   function LLVMOffsetOfElement
+     (arg_1    : in LLVM_Target.LLVMTargetDataRef;
+      StructTy : in llvm.LLVMTypeRef;
+      Element  : in Interfaces.C.unsigned)
+      return     Interfaces.C.Extensions.unsigned_long_long;
+
+   procedure LLVMInvalidateStructLayout
+     (arg_1    : in LLVM_Target.LLVMTargetDataRef;
+      StructTy : in llvm.LLVMTypeRef);
+
+   procedure LLVMDisposeTargetData
+     (arg_1 : in LLVM_Target.LLVMTargetDataRef);
+
+private
+
+   pragma Import
+     (C,
+      LLVMInitializeAllTargets,
+      "Ada_LLVMInitializeAllTargets");
+   pragma Import
+     (C,
+      LLVMInitializeNativeTarget,
+      "Ada_LLVMInitializeNativeTarget");
+   pragma Import (C, LLVMCreateTargetData, "Ada_LLVMCreateTargetData");
+   pragma Import (C, LLVMAddTargetData, "Ada_LLVMAddTargetData");
+   pragma Import
+     (C,
+      LLVMCopyStringRepOfTargetData,
+      "Ada_LLVMCopyStringRepOfTargetData");
+   pragma Import (C, LLVMByteOrder, "Ada_LLVMByteOrder");
+   pragma Import (C, LLVMPointerSize, "Ada_LLVMPointerSize");
+   pragma Import (C, LLVMIntPtrType, "Ada_LLVMIntPtrType");
+   pragma Import (C, LLVMSizeOfTypeInBits, "Ada_LLVMSizeOfTypeInBits");
+   pragma Import (C, LLVMStoreSizeOfType, "Ada_LLVMStoreSizeOfType");
+   pragma Import (C, LLVMABISizeOfType, "Ada_LLVMABISizeOfType");
+   pragma Import (C, LLVMABIAlignmentOfType, "Ada_LLVMABIAlignmentOfType");
+   pragma Import
+     (C,
+      LLVMCallFrameAlignmentOfType,
+      "Ada_LLVMCallFrameAlignmentOfType");
+   pragma Import
+     (C,
+      LLVMPreferredAlignmentOfType,
+      "Ada_LLVMPreferredAlignmentOfType");
+   pragma Import
+     (C,
+      LLVMPreferredAlignmentOfGlobal,
+      "Ada_LLVMPreferredAlignmentOfGlobal");
+   pragma Import (C, LLVMElementAtOffset, "Ada_LLVMElementAtOffset");
+   pragma Import (C, LLVMOffsetOfElement, "Ada_LLVMOffsetOfElement");
+   pragma Import
+     (C,
+      LLVMInvalidateStructLayout,
+      "Ada_LLVMInvalidateStructLayout");
+   pragma Import (C, LLVMDisposeTargetData, "Ada_LLVMDisposeTargetData");
+
+end LLVM_Target.Binding;
diff --git a/bindings/ada/target/llvm_target.ads b/bindings/ada/target/llvm_target.ads
new file mode 100644
index 000000000000..11cb05d55b35
--- /dev/null
+++ b/bindings/ada/target/llvm_target.ads
@@ -0,0 +1,72 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with Interfaces.C.Extensions;
+
+
+package LLVM_Target is
+
+   -- LLVMOpaqueTargetData
+   --
+   type LLVMOpaqueTargetData is new
+     Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMOpaqueTargetData_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Target.LLVMOpaqueTargetData;
+
+   type LLVMOpaqueTargetData_view is access all
+     LLVM_Target.LLVMOpaqueTargetData;
+
+   -- LLVMTargetDataRef
+   --
+   type LLVMTargetDataRef is access all LLVM_Target.LLVMOpaqueTargetData;
+
+   type LLVMTargetDataRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Target.LLVMTargetDataRef;
+
+   type LLVMTargetDataRef_view is access all LLVM_Target.LLVMTargetDataRef;
+
+   -- LLVMStructLayout
+   --
+   type LLVMStructLayout is new Interfaces.C.Extensions.opaque_structure_def;
+
+   type LLVMStructLayout_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Target.LLVMStructLayout;
+
+   type LLVMStructLayout_view is access all LLVM_Target.LLVMStructLayout;
+
+   -- LLVMStructLayoutRef
+   --
+   type LLVMStructLayoutRef is access all LLVM_Target.LLVMStructLayout;
+
+   type LLVMStructLayoutRef_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Target.LLVMStructLayoutRef;
+
+   type LLVMStructLayoutRef_view is access all LLVM_Target.LLVMStructLayoutRef;
+
+   -- TargetData
+   --
+   type TargetData is new Interfaces.C.Extensions.incomplete_class_def;
+
+   type TargetData_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Target.TargetData;
+
+   type TargetData_view is access all LLVM_Target.TargetData;
+
+   -- LLVMByteOrdering
+   --
+   type LLVMByteOrdering is new Interfaces.C.int;
+
+   type LLVMByteOrdering_array is
+     array (Interfaces.C.size_t range <>)
+            of aliased LLVM_Target.LLVMByteOrdering;
+
+   type LLVMByteOrdering_view is access all LLVM_Target.LLVMByteOrdering;
+
+
+end LLVM_Target;
diff --git a/bindings/ada/target/llvm_target_wrap.cxx b/bindings/ada/target/llvm_target_wrap.cxx
new file mode 100644
index 000000000000..16aca8a4379a
--- /dev/null
+++ b/bindings/ada/target/llvm_target_wrap.cxx
@@ -0,0 +1,720 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Target (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Target(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+#include "llvm-c/Target.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport void SWIGSTDCALL Ada_LLVMInitializeAllTargets (
+  )
+{
+  LLVMInitializeAllTargets();
+  
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMInitializeNativeTarget (
+  )
+{
+  int jresult ;
+  int result;
+  
+  result = (int)LLVMInitializeNativeTarget();
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMCreateTargetData (
+  char * jarg1
+  )
+{
+  void * jresult ;
+  char *arg1 = (char *) 0 ;
+  LLVMTargetDataRef result;
+  
+  arg1 = jarg1; 
+  
+  result = (LLVMTargetDataRef)LLVMCreateTargetData((char const *)arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddTargetData (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMPassManagerRef arg2 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMPassManagerRef)jarg2; 
+  
+  LLVMAddTargetData(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport char * SWIGSTDCALL Ada_LLVMCopyStringRepOfTargetData (
+  void * jarg1
+  )
+{
+  char * jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  char *result = 0 ;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  result = (char *)LLVMCopyStringRepOfTargetData(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport int SWIGSTDCALL Ada_LLVMByteOrder (
+  void * jarg1
+  )
+{
+  int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMByteOrdering result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  result = (LLVMByteOrdering)LLVMByteOrder(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMPointerSize (
+  void * jarg1
+  )
+{
+  unsigned int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  result = (unsigned int)LLVMPointerSize(arg1);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void * SWIGSTDCALL Ada_LLVMIntPtrType (
+  void * jarg1
+  )
+{
+  void * jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  result = (LLVMTypeRef)LLVMIntPtrType(arg1);
+  jresult = (void *) result;      
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned long long SWIGSTDCALL Ada_LLVMSizeOfTypeInBits (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned long long jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned long long result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (unsigned long long)LLVMSizeOfTypeInBits(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned long long SWIGSTDCALL Ada_LLVMStoreSizeOfType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned long long jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned long long result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (unsigned long long)LLVMStoreSizeOfType(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned long long SWIGSTDCALL Ada_LLVMABISizeOfType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned long long jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned long long result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (unsigned long long)LLVMABISizeOfType(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMABIAlignmentOfType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (unsigned int)LLVMABIAlignmentOfType(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMCallFrameAlignmentOfType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (unsigned int)LLVMCallFrameAlignmentOfType(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMPreferredAlignmentOfType (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  result = (unsigned int)LLVMPreferredAlignmentOfType(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMPreferredAlignmentOfGlobal (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  unsigned int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMValueRef)jarg2; 
+  
+  result = (unsigned int)LLVMPreferredAlignmentOfGlobal(arg1,arg2);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned int SWIGSTDCALL Ada_LLVMElementAtOffset (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned long long jarg3
+  )
+{
+  unsigned int jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned long long arg3 ;
+  unsigned int result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  
+  arg3 = (unsigned long long) jarg3; 
+  
+  
+  result = (unsigned int)LLVMElementAtOffset(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport unsigned long long SWIGSTDCALL Ada_LLVMOffsetOfElement (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  ,
+  
+  unsigned int jarg3
+  )
+{
+  unsigned long long jresult ;
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  unsigned int arg3 ;
+  unsigned long long result;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  
+  arg3 = (unsigned int) jarg3; 
+  
+  
+  result = (unsigned long long)LLVMOffsetOfElement(arg1,arg2,arg3);
+  jresult = result; 
+  
+  
+  
+  return jresult;
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMInvalidateStructLayout (
+  void * jarg1
+  ,
+  
+  void * jarg2
+  )
+{
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  arg2 = (LLVMTypeRef)jarg2; 
+  
+  LLVMInvalidateStructLayout(arg1,arg2);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMDisposeTargetData (
+  void * jarg1
+  )
+{
+  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
+  
+  arg1 = (LLVMTargetDataRef)jarg1; 
+  
+  LLVMDisposeTargetData(arg1);
+  
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ada/transforms/llvm_transforms-binding.ads b/bindings/ada/transforms/llvm_transforms-binding.ads
new file mode 100644
index 000000000000..2254b6eec2c3
--- /dev/null
+++ b/bindings/ada/transforms/llvm_transforms-binding.ads
@@ -0,0 +1,206 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+with llvm;
+
+
+package LLVM_Transforms.Binding is
+
+   procedure LLVMAddArgumentPromotionPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddConstantMergePass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddDeadArgEliminationPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddDeadTypeEliminationPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddFunctionAttrsPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddFunctionInliningPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddGlobalDCEPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddGlobalOptimizerPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddIPConstantPropagationPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLowerSetJmpPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddPruneEHPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddRaiseAllocationsPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddStripDeadPrototypesPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddStripSymbolsPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddAggressiveDCEPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddCFGSimplificationPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddCondPropagationPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddDeadStoreEliminationPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddGVNPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddIndVarSimplifyPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddInstructionCombiningPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddJumpThreadingPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLICMPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLoopDeletionPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLoopIndexSplitPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLoopRotatePass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLoopUnrollPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddLoopUnswitchPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddMemCpyOptPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddPromoteMemoryToRegisterPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddReassociatePass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddSCCPPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddScalarReplAggregatesPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddSimplifyLibCallsPass (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddTailCallEliminationPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddConstantPropagationPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+   procedure LLVMAddDemoteMemoryToRegisterPass
+     (PM : in llvm.LLVMPassManagerRef);
+
+private
+
+   pragma Import
+     (C,
+      LLVMAddArgumentPromotionPass,
+      "Ada_LLVMAddArgumentPromotionPass");
+   pragma Import
+     (C,
+      LLVMAddConstantMergePass,
+      "Ada_LLVMAddConstantMergePass");
+   pragma Import
+     (C,
+      LLVMAddDeadArgEliminationPass,
+      "Ada_LLVMAddDeadArgEliminationPass");
+   pragma Import
+     (C,
+      LLVMAddDeadTypeEliminationPass,
+      "Ada_LLVMAddDeadTypeEliminationPass");
+   pragma Import
+     (C,
+      LLVMAddFunctionAttrsPass,
+      "Ada_LLVMAddFunctionAttrsPass");
+   pragma Import
+     (C,
+      LLVMAddFunctionInliningPass,
+      "Ada_LLVMAddFunctionInliningPass");
+   pragma Import (C, LLVMAddGlobalDCEPass, "Ada_LLVMAddGlobalDCEPass");
+   pragma Import
+     (C,
+      LLVMAddGlobalOptimizerPass,
+      "Ada_LLVMAddGlobalOptimizerPass");
+   pragma Import
+     (C,
+      LLVMAddIPConstantPropagationPass,
+      "Ada_LLVMAddIPConstantPropagationPass");
+   pragma Import (C, LLVMAddLowerSetJmpPass, "Ada_LLVMAddLowerSetJmpPass");
+   pragma Import (C, LLVMAddPruneEHPass, "Ada_LLVMAddPruneEHPass");
+   pragma Import
+     (C,
+      LLVMAddRaiseAllocationsPass,
+      "Ada_LLVMAddRaiseAllocationsPass");
+   pragma Import
+     (C,
+      LLVMAddStripDeadPrototypesPass,
+      "Ada_LLVMAddStripDeadPrototypesPass");
+   pragma Import (C, LLVMAddStripSymbolsPass, "Ada_LLVMAddStripSymbolsPass");
+   pragma Import
+     (C,
+      LLVMAddAggressiveDCEPass,
+      "Ada_LLVMAddAggressiveDCEPass");
+   pragma Import
+     (C,
+      LLVMAddCFGSimplificationPass,
+      "Ada_LLVMAddCFGSimplificationPass");
+   pragma Import
+     (C,
+      LLVMAddCondPropagationPass,
+      "Ada_LLVMAddCondPropagationPass");
+   pragma Import
+     (C,
+      LLVMAddDeadStoreEliminationPass,
+      "Ada_LLVMAddDeadStoreEliminationPass");
+   pragma Import (C, LLVMAddGVNPass, "Ada_LLVMAddGVNPass");
+   pragma Import
+     (C,
+      LLVMAddIndVarSimplifyPass,
+      "Ada_LLVMAddIndVarSimplifyPass");
+   pragma Import
+     (C,
+      LLVMAddInstructionCombiningPass,
+      "Ada_LLVMAddInstructionCombiningPass");
+   pragma Import
+     (C,
+      LLVMAddJumpThreadingPass,
+      "Ada_LLVMAddJumpThreadingPass");
+   pragma Import (C, LLVMAddLICMPass, "Ada_LLVMAddLICMPass");
+   pragma Import (C, LLVMAddLoopDeletionPass, "Ada_LLVMAddLoopDeletionPass");
+   pragma Import
+     (C,
+      LLVMAddLoopIndexSplitPass,
+      "Ada_LLVMAddLoopIndexSplitPass");
+   pragma Import (C, LLVMAddLoopRotatePass, "Ada_LLVMAddLoopRotatePass");
+   pragma Import (C, LLVMAddLoopUnrollPass, "Ada_LLVMAddLoopUnrollPass");
+   pragma Import (C, LLVMAddLoopUnswitchPass, "Ada_LLVMAddLoopUnswitchPass");
+   pragma Import (C, LLVMAddMemCpyOptPass, "Ada_LLVMAddMemCpyOptPass");
+   pragma Import
+     (C,
+      LLVMAddPromoteMemoryToRegisterPass,
+      "Ada_LLVMAddPromoteMemoryToRegisterPass");
+   pragma Import (C, LLVMAddReassociatePass, "Ada_LLVMAddReassociatePass");
+   pragma Import (C, LLVMAddSCCPPass, "Ada_LLVMAddSCCPPass");
+   pragma Import
+     (C,
+      LLVMAddScalarReplAggregatesPass,
+      "Ada_LLVMAddScalarReplAggregatesPass");
+   pragma Import
+     (C,
+      LLVMAddSimplifyLibCallsPass,
+      "Ada_LLVMAddSimplifyLibCallsPass");
+   pragma Import
+     (C,
+      LLVMAddTailCallEliminationPass,
+      "Ada_LLVMAddTailCallEliminationPass");
+   pragma Import
+     (C,
+      LLVMAddConstantPropagationPass,
+      "Ada_LLVMAddConstantPropagationPass");
+   pragma Import
+     (C,
+      LLVMAddDemoteMemoryToRegisterPass,
+      "Ada_LLVMAddDemoteMemoryToRegisterPass");
+
+end LLVM_Transforms.Binding;
diff --git a/bindings/ada/transforms/llvm_transforms.ads b/bindings/ada/transforms/llvm_transforms.ads
new file mode 100644
index 000000000000..4f37aafe805c
--- /dev/null
+++ b/bindings/ada/transforms/llvm_transforms.ads
@@ -0,0 +1,6 @@
+-- This file is generated by SWIG. Do *not* modify by hand.
+--
+
+package LLVM_Transforms is
+
+end LLVM_Transforms;
diff --git a/bindings/ada/transforms/llvm_transforms_wrap.cxx b/bindings/ada/transforms/llvm_transforms_wrap.cxx
new file mode 100644
index 000000000000..8cb04db791aa
--- /dev/null
+++ b/bindings/ada/transforms/llvm_transforms_wrap.cxx
@@ -0,0 +1,828 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 1.3.36
+ * 
+ * This file is not intended to be easily readable and contains a number of 
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG 
+ * interface file instead. 
+ * ----------------------------------------------------------------------------- */
+
+
+#ifdef __cplusplus
+template<typename T> class SwigValueWrapper {
+    T *tt;
+public:
+    SwigValueWrapper() : tt(0) { }
+    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
+    SwigValueWrapper(const T& t) : tt(new T(t)) { }
+    ~SwigValueWrapper() { delete tt; } 
+    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
+    operator T&() const { return *tt; }
+    T *operator&() { return tt; }
+private:
+    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__)) 
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__)) 
+# else
+#   define SWIGUNUSED 
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef GCC_HASCLASSVISIBILITY
+#    define GCC_HASCLASSVISIBILITY
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif 
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#if defined(_WIN32) || defined(__CYGWIN32__)
+#  define DllExport   __declspec( dllexport )
+#  define SWIGSTDCALL __stdcall
+#else
+#  define DllExport  
+#  define SWIGSTDCALL
+#endif 
+
+
+#ifdef __cplusplus
+#  include <new>
+#endif
+
+
+
+
+/* Support for throwing Ada exceptions from C/C++ */
+
+typedef enum 
+{
+  SWIG_AdaException,
+  SWIG_AdaOutOfMemoryException,
+  SWIG_AdaIndexOutOfRangeException,
+  SWIG_AdaDivideByZeroException,
+  SWIG_AdaArgumentOutOfRangeException,
+  SWIG_AdaNullReferenceException
+} SWIG_AdaExceptionCodes;
+
+
+typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
+
+
+typedef struct 
+{
+  SWIG_AdaExceptionCodes code;
+  SWIG_AdaExceptionCallback_t callback;
+} 
+  SWIG_AdaExceptions_t;
+
+
+static 
+SWIG_AdaExceptions_t 
+SWIG_ada_exceptions[] = 
+{
+  { SWIG_AdaException, NULL },
+  { SWIG_AdaOutOfMemoryException, NULL },
+  { SWIG_AdaIndexOutOfRangeException, NULL },
+  { SWIG_AdaDivideByZeroException, NULL },
+  { SWIG_AdaArgumentOutOfRangeException, NULL },
+  { SWIG_AdaNullReferenceException, NULL } 
+};
+
+
+static 
+void 
+SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
+{
+  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
+  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
+    callback = SWIG_ada_exceptions[code].callback;
+  }
+  callback(msg);
+}
+
+
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+
+DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Transforms (SWIG_AdaExceptionCallback_t systemException,
+                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
+                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
+                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
+                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
+                                                                   SWIG_AdaExceptionCallback_t nullReference) 
+{
+  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
+  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
+  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
+  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
+  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
+}
+
+
+/* Callback for returning strings to Ada without leaking memory */
+
+typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
+static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
+
+
+
+/* probably obsolete ...
+#ifdef __cplusplus
+extern "C" 
+#endif
+DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Transforms(SWIG_AdaStringHelperCallback callback) {
+  SWIG_ada_string_callback = callback;
+}
+*/
+
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
+
+
+#define protected public
+#define private   public
+
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm-c/Transforms/Scalar.h"
+
+
+
+//  struct LLVMCtxt;
+
+
+#undef protected
+#undef private
+#ifdef __cplusplus 
+extern "C" {
+#endif
+DllExport void SWIGSTDCALL Ada_LLVMAddArgumentPromotionPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddArgumentPromotionPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddConstantMergePass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddConstantMergePass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddDeadArgEliminationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddDeadArgEliminationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddDeadTypeEliminationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddDeadTypeEliminationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddFunctionAttrsPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddFunctionAttrsPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddFunctionInliningPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddFunctionInliningPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddGlobalDCEPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddGlobalDCEPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddGlobalOptimizerPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddGlobalOptimizerPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddIPConstantPropagationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddIPConstantPropagationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLowerSetJmpPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLowerSetJmpPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddPruneEHPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddPruneEHPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddRaiseAllocationsPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddRaiseAllocationsPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddStripDeadPrototypesPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddStripDeadPrototypesPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddStripSymbolsPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddStripSymbolsPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddAggressiveDCEPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddAggressiveDCEPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddCFGSimplificationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddCFGSimplificationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddCondPropagationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddCondPropagationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddDeadStoreEliminationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddDeadStoreEliminationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddGVNPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddGVNPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddIndVarSimplifyPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddIndVarSimplifyPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddInstructionCombiningPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddInstructionCombiningPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddJumpThreadingPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddJumpThreadingPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLICMPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLICMPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLoopDeletionPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLoopDeletionPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLoopIndexSplitPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLoopIndexSplitPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLoopRotatePass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLoopRotatePass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLoopUnrollPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLoopUnrollPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddLoopUnswitchPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddLoopUnswitchPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddMemCpyOptPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddMemCpyOptPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddPromoteMemoryToRegisterPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddPromoteMemoryToRegisterPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddReassociatePass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddReassociatePass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddSCCPPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddSCCPPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddScalarReplAggregatesPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddScalarReplAggregatesPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddSimplifyLibCallsPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddSimplifyLibCallsPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddTailCallEliminationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddTailCallEliminationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddConstantPropagationPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddConstantPropagationPass(arg1);
+  
+  
+}
+
+
+
+DllExport void SWIGSTDCALL Ada_LLVMAddDemoteMemoryToRegisterPass (
+  void * jarg1
+  )
+{
+  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
+  
+  arg1 = (LLVMPassManagerRef)jarg1; 
+  
+  LLVMAddDemoteMemoryToRegisterPass(arg1);
+  
+  
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/bindings/ocaml/Makefile.ocaml b/bindings/ocaml/Makefile.ocaml
index d98a489f7c42..c46f6027cf63 100644
--- a/bindings/ocaml/Makefile.ocaml
+++ b/bindings/ocaml/Makefile.ocaml
@@ -31,13 +31,16 @@ PROJ_libocamldir := $(DESTDIR)$(OCAML_LIBDIR)
 OcamlDir := $(LibDir)/ocaml
 
 # Info from llvm-config and similar
+ifndef IS_CLEANING_TARGET
 ifdef UsedComponents
 UsedLibs = $(shell $(LLVM_CONFIG) --libs $(UsedComponents))
 UsedLibNames = $(shell $(LLVM_CONFIG) --libnames $(UsedComponents))
 endif
+endif
 
 # Tools
 OCAMLCFLAGS += -I $(ObjDir) -I $(OcamlDir)
+ifndef IS_CLEANING_TARGET
 ifneq ($(ObjectsO),)
 OCAMLAFLAGS += $(patsubst %,-cclib %, \
                  $(filter-out -L$(LibDir),-l$(LIBRARYNAME) \
@@ -48,6 +51,7 @@ OCAMLAFLAGS += $(patsubst %,-cclib %, \
                  $(filter-out -L$(LibDir),$(shell $(LLVM_CONFIG) --ldflags)) \
                                           $(UsedLibs))
 endif
+endif
  
 # -g was introduced in 3.10.0.
 #ifneq ($(ENABLE_OPTIMIZED),1)
diff --git a/bindings/ocaml/bitreader/bitreader_ocaml.c b/bindings/ocaml/bitreader/bitreader_ocaml.c
index 0fd484f12343..5fd9f854d9da 100644
--- a/bindings/ocaml/bitreader/bitreader_ocaml.c
+++ b/bindings/ocaml/bitreader/bitreader_ocaml.c
@@ -45,27 +45,29 @@ static void llvm_raise(value Prototype, char *Message) {
 
 /*===-- Modules -----------------------------------------------------------===*/
 
-/* Llvm.llmemorybuffer -> Llvm.module */
-CAMLprim value llvm_get_module_provider(LLVMMemoryBufferRef MemBuf) {
+/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
+CAMLprim value llvm_get_module_provider(LLVMContextRef C,
+                                        LLVMMemoryBufferRef MemBuf) {
   CAMLparam0();
   CAMLlocal2(Variant, MessageVal);
   char *Message;
   
   LLVMModuleProviderRef MP;
-  if (LLVMGetBitcodeModuleProvider(MemBuf, &MP, &Message))
+  if (LLVMGetBitcodeModuleProviderInContext(C, MemBuf, &MP, &Message))
     llvm_raise(llvm_bitreader_error_exn, Message);
   
   CAMLreturn((value) MemBuf);
 }
 
-/* Llvm.llmemorybuffer -> Llvm.llmodule */
-CAMLprim value llvm_parse_bitcode(LLVMMemoryBufferRef MemBuf) {
+/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
+CAMLprim value llvm_parse_bitcode(LLVMContextRef C,
+                                  LLVMMemoryBufferRef MemBuf) {
   CAMLparam0();
   CAMLlocal2(Variant, MessageVal);
   LLVMModuleRef M;
   char *Message;
   
-  if (LLVMParseBitcode(MemBuf, &M, &Message))
+  if (LLVMParseBitcodeInContext(C, MemBuf, &M, &Message))
     llvm_raise(llvm_bitreader_error_exn, Message);
   
   CAMLreturn((value) M);
diff --git a/bindings/ocaml/bitreader/llvm_bitreader.ml b/bindings/ocaml/bitreader/llvm_bitreader.ml
index 816e1565526c..88587cbe1ef9 100644
--- a/bindings/ocaml/bitreader/llvm_bitreader.ml
+++ b/bindings/ocaml/bitreader/llvm_bitreader.ml
@@ -13,7 +13,9 @@ exception Error of string
 external register_exns : exn -> unit = "llvm_register_bitreader_exns"
 let _ = register_exns (Error "")
 
-external get_module_provider : Llvm.llmemorybuffer -> Llvm.llmoduleprovider
+external get_module_provider : Llvm.llcontext -> Llvm.llmemorybuffer ->
+                               Llvm.llmoduleprovider
                              = "llvm_get_module_provider"
-external parse_bitcode : Llvm.llmemorybuffer -> Llvm.llmodule
+
+external parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
                        = "llvm_parse_bitcode"
diff --git a/bindings/ocaml/bitreader/llvm_bitreader.mli b/bindings/ocaml/bitreader/llvm_bitreader.mli
index 15b389bb83c7..5648b35fee22 100644
--- a/bindings/ocaml/bitreader/llvm_bitreader.mli
+++ b/bindings/ocaml/bitreader/llvm_bitreader.mli
@@ -14,16 +14,18 @@
 
 exception Error of string
 
-(** [read_bitcode_file path] reads the bitcode for a new module [m] from the
-    file at [path]. Returns [Success m] if successful, and [Failure msg]
-    otherwise, where [msg] is a description of the error encountered.
-    See the function [llvm::getBitcodeModuleProvider]. *)
-external get_module_provider : Llvm.llmemorybuffer -> Llvm.llmoduleprovider
+(** [get_module_provider context mb] reads the bitcode for a new
+    module provider [m] from the memory buffer [mb] in the context [context].
+    Returns [m] if successful, or raises [Error msg] otherwise, where [msg] is a
+    description of the error encountered. See the function
+    [llvm::getBitcodeModuleProvider]. *)
+external get_module_provider : Llvm.llcontext -> Llvm.llmemorybuffer ->
+                               Llvm.llmoduleprovider
                              = "llvm_get_module_provider"
 
-(** [parse_bitcode mb] parses the bitcode for a new module [m] from the memory
-    buffer [mb]. Returns [Success m] if successful, and [Failure msg] otherwise,
-    where [msg] is a description of the error encountered.
-    See the function [llvm::ParseBitcodeFile]. *)
-external parse_bitcode : Llvm.llmemorybuffer -> Llvm.llmodule
+(** [parse_bitcode context mb] parses the bitcode for a new module [m] from the
+    memory buffer [mb] in the context [context]. Returns [m] if successful, or
+	 	raises [Error msg] otherwise, where [msg] is a description of the error
+	 	encountered. See the function [llvm::ParseBitcodeFile]. *)
+external parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
                        = "llvm_parse_bitcode"
diff --git a/bindings/ocaml/executionengine/executionengine_ocaml.c b/bindings/ocaml/executionengine/executionengine_ocaml.c
index 647759fb0744..072d583bf8fb 100644
--- a/bindings/ocaml/executionengine/executionengine_ocaml.c
+++ b/bindings/ocaml/executionengine/executionengine_ocaml.c
@@ -24,11 +24,15 @@
 #include <string.h>
 #include <assert.h>
 
-/* Force the LLVM interpreter, JIT, and native target to be linked in. */
+/* Force the LLVM interpreter and JIT to be linked in. */
 void llvm_initialize(void) {
   LLVMLinkInInterpreter();
   LLVMLinkInJIT();
-  LLVMInitializeNativeTarget();
+}
+
+/* unit -> bool */
+CAMLprim value llvm_initialize_native_target(value Unit) {
+  return Val_bool(LLVMInitializeNativeTarget());
 }
 
 /* Can't use the recommended caml_named_value mechanism for backwards
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.ml b/bindings/ocaml/executionengine/llvm_executionengine.ml
index cf9acc7cb6b8..c9e8f18b2240 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.ml
+++ b/bindings/ocaml/executionengine/llvm_executionengine.ml
@@ -109,3 +109,6 @@ module ExecutionEngine = struct
   
    *)
 end
+
+external initialize_native_target : unit -> bool
+                                  = "llvm_initialize_native_target"
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.mli b/bindings/ocaml/executionengine/llvm_executionengine.mli
index 17da1dffe556..6c2fdfb7868c 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.mli
+++ b/bindings/ocaml/executionengine/llvm_executionengine.mli
@@ -89,14 +89,14 @@ module ExecutionEngine: sig
       module provider [mp] if successful. Creates a JIT if possible, else falls
       back to an interpreter. Raises [Error msg] if an error occurrs. The
       execution engine is not garbage collected and must be destroyed with
-      [dispose ee]. See the function [llvm::ExecutionEngine::create]. *)
+      [dispose ee]. See the function [llvm::EngineBuilder::create]. *)
   val create: Llvm.llmoduleprovider -> t
   
   (** [create_interpreter mp] creates a new interpreter, taking ownership of the
       module provider [mp] if successful. Raises [Error msg] if an error
       occurrs. The execution engine is not garbage collected and must be
       destroyed with [dispose ee].
-      See the function [llvm::ExecutionEngine::create]. *)
+      See the function [llvm::EngineBuilder::create]. *)
   val create_interpreter: Llvm.llmoduleprovider -> t
   
   (** [create_jit mp] creates a new JIT (just-in-time compiler), taking
@@ -104,7 +104,7 @@ module ExecutionEngine: sig
       a JIT which favors code quality over compilation speed. Raises [Error msg]
       if an error occurrs. The execution engine is not garbage collected and
       must be destroyed with [dispose ee].
-      See the function [llvm::ExecutionEngine::create]. *)
+      See the function [llvm::EngineBuilder::create]. *)
   val create_jit: Llvm.llmoduleprovider -> t
   
   (** [create_fast_jit mp] creates a new JIT (just-in-time compiler) which
@@ -112,7 +112,7 @@ module ExecutionEngine: sig
       module provider [mp] if successful. Raises [Error msg] if an error
       occurrs. The execution engine is not garbage collected and must be
       destroyed with [dispose ee].
-      See the function [llvm::ExecutionEngine::create]. *)
+      See the function [llvm::EngineBuilder::create]. *)
   val create_fast_jit: Llvm.llmoduleprovider -> t
   
   (** [dispose ee] releases the memory used by the execution engine and must be
@@ -161,3 +161,6 @@ module ExecutionEngine: sig
       [ee]. *)
   val target_data: t -> Llvm_target.TargetData.t
 end
+
+external initialize_native_target : unit -> bool
+                                  = "llvm_initialize_native_target"
diff --git a/bindings/ocaml/llvm/Makefile b/bindings/ocaml/llvm/Makefile
index cd974d482011..99e347bc1312 100644
--- a/bindings/ocaml/llvm/Makefile
+++ b/bindings/ocaml/llvm/Makefile
@@ -1,4 +1,4 @@
-##===- bindings/ocaml/bitwriter/Makefile -------------------*- Makefile -*-===##
+##===- bindings/ocaml/llvm/Makefile ------------------------*- Makefile -*-===##
 # 
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 49975650a592..37d0fd7c85f4 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -8,6 +8,7 @@
  *===----------------------------------------------------------------------===*)
 
 
+type llcontext
 type llmodule
 type lltype
 type lltypehandle
@@ -26,7 +27,6 @@ module TypeKind = struct
   | Fp128
   | Ppc_fp128
   | Label
-  | Metadata
   | Integer
   | Function
   | Struct
@@ -34,6 +34,7 @@ module TypeKind = struct
   | Pointer
   | Opaque
   | Vector
+  | Metadata
 end
 
 module Linkage = struct
@@ -41,13 +42,18 @@ module Linkage = struct
   | External
   | Available_externally
   | Link_once
+  | Link_once_odr
   | Weak
+  | Weak_odr
   | Appending
   | Internal
+  | Private
   | Dllimport
   | Dllexport
   | External_weak
   | Ghost
+  | Common
+  | Linker_private
 end
 
 module Visibility = struct
@@ -78,6 +84,16 @@ module Attribute = struct
   | Nest
   | Readnone
   | Readonly
+  | Noinline
+  | Alwaysinline
+  | Optsize
+  | Ssp
+  | Sspreq
+  | Nocapture
+  | Noredzone
+  | Noimplicitfloat
+  | Naked
+  | Inlinehint
 end
 
 module Icmp = struct
@@ -127,10 +143,13 @@ type ('a, 'b) llrev_pos =
 | At_start of 'a
 | After of 'b
 
+(*===-- Contexts ----------------------------------------------------------===*)
+external create_context : unit -> llcontext = "llvm_create_context"
+external dispose_context : unit -> llcontext = "llvm_dispose_context"
+external global_context : unit -> llcontext = "llvm_global_context"
 
 (*===-- Modules -----------------------------------------------------------===*)
-
-external create_module : string -> llmodule = "llvm_create_module"
+external create_module : llcontext -> string -> llmodule = "llvm_create_module"
 external dispose_module : llmodule -> unit = "llvm_dispose_module"
 external target_triple: llmodule -> string
                       = "llvm_target_triple"
@@ -147,37 +166,25 @@ external delete_type_name : string -> llmodule -> unit
 external dump_module : llmodule -> unit = "llvm_dump_module"
 
 (*===-- Types -------------------------------------------------------------===*)
-
 external classify_type : lltype -> TypeKind.t = "llvm_classify_type"
+external type_context : lltype -> llcontext = "llvm_type_context"
 
 (*--... Operations on integer types ........................................--*)
-external _i1_type : unit -> lltype = "llvm_i1_type"
-external _i8_type : unit -> lltype = "llvm_i8_type"
-external _i16_type : unit -> lltype = "llvm_i16_type"
-external _i32_type : unit -> lltype = "llvm_i32_type"
-external _i64_type : unit -> lltype = "llvm_i64_type"
-
-let i1_type = _i1_type ()
-let i8_type = _i8_type ()
-let i16_type = _i16_type ()
-let i32_type = _i32_type ()
-let i64_type = _i64_type ()
-
-external integer_type : int -> lltype = "llvm_integer_type"
+external i1_type : llcontext -> lltype = "llvm_i1_type"
+external i8_type : llcontext -> lltype = "llvm_i8_type"
+external i16_type : llcontext -> lltype = "llvm_i16_type"
+external i32_type : llcontext -> lltype = "llvm_i32_type"
+external i64_type : llcontext -> lltype = "llvm_i64_type"
+
+external integer_type : llcontext -> int -> lltype = "llvm_integer_type"
 external integer_bitwidth : lltype -> int = "llvm_integer_bitwidth"
 
 (*--... Operations on real types ...........................................--*)
-external _float_type : unit -> lltype = "llvm_float_type"
-external _double_type : unit -> lltype = "llvm_double_type"
-external _x86fp80_type : unit -> lltype = "llvm_x86fp80_type"
-external _fp128_type : unit -> lltype = "llvm_fp128_type"
-external _ppc_fp128_type : unit -> lltype = "llvm_ppc_fp128_type"
-
-let float_type = _float_type ()
-let double_type = _double_type ()
-let x86fp80_type = _x86fp80_type ()
-let fp128_type = _fp128_type ()
-let ppc_fp128_type = _ppc_fp128_type ()
+external float_type : llcontext -> lltype = "llvm_float_type"
+external double_type : llcontext -> lltype = "llvm_double_type"
+external x86fp80_type : llcontext -> lltype = "llvm_x86fp80_type"
+external fp128_type : llcontext -> lltype = "llvm_fp128_type"
+external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type"
 
 (*--... Operations on function types .......................................--*)
 external function_type : lltype -> lltype array -> lltype = "llvm_function_type"
@@ -188,8 +195,9 @@ external return_type : lltype -> lltype = "LLVMGetReturnType"
 external param_types : lltype -> lltype array = "llvm_param_types"
 
 (*--... Operations on struct types .........................................--*)
-external struct_type : lltype array -> lltype = "llvm_struct_type"
-external packed_struct_type : lltype array -> lltype = "llvm_packed_struct_type"
+external struct_type : llcontext -> lltype array -> lltype = "llvm_struct_type"
+external packed_struct_type : llcontext -> lltype array -> lltype
+                            = "llvm_packed_struct_type"
 external element_types : lltype -> lltype array = "llvm_element_types"
 external is_packed : lltype -> bool = "llvm_is_packed"
 
@@ -206,12 +214,9 @@ external address_space : lltype -> int = "llvm_address_space"
 external vector_size : lltype -> int = "llvm_vector_size"
 
 (*--... Operations on other types ..........................................--*)
-external opaque_type : unit -> lltype = "llvm_opaque_type"
-external _void_type : unit -> lltype = "llvm_void_type"
-external _label_type : unit -> lltype = "llvm_label_type"
-
-let void_type = _void_type ()
-let label_type = _label_type ()
+external opaque_type : llcontext -> lltype = "llvm_opaque_type"
+external void_type : llcontext -> lltype = "llvm_void_type"
+external label_type : llcontext -> lltype = "llvm_label_type"
 
 (*--... Operations on type handles .........................................--*)
 external handle_to_type : lltype -> lltypehandle = "llvm_handle_to_type"
@@ -220,7 +225,6 @@ external refine_type : lltype -> lltype -> unit = "llvm_refine_type"
 
 
 (*===-- Values ------------------------------------------------------------===*)
-
 external type_of : llvalue -> lltype = "llvm_type_of"
 external value_name : llvalue -> string = "llvm_value_name"
 external set_value_name : string -> llvalue -> unit = "llvm_set_value_name"
@@ -238,14 +242,19 @@ external is_undef : llvalue -> bool = "llvm_is_undef"
 external const_int : lltype -> int -> llvalue = "llvm_const_int"
 external const_of_int64 : lltype -> Int64.t -> bool -> llvalue
                         = "llvm_const_of_int64"
+external const_int_of_string : lltype -> string -> int -> llvalue
+                             = "llvm_const_int_of_string"
 external const_float : lltype -> float -> llvalue = "llvm_const_float"
+external const_float_of_string : lltype -> string -> llvalue
+                               = "llvm_const_float_of_string"
 
 (*--... Operations on composite constants ..................................--*)
-external const_string : string -> llvalue = "llvm_const_string"
-external const_stringz : string -> llvalue = "llvm_const_stringz"
+external const_string : llcontext -> string -> llvalue = "llvm_const_string"
+external const_stringz : llcontext -> string -> llvalue = "llvm_const_stringz"
 external const_array : lltype -> llvalue array -> llvalue = "llvm_const_array"
-external const_struct : llvalue array -> llvalue = "llvm_const_struct"
-external const_packed_struct : llvalue array -> llvalue
+external const_struct : llcontext -> llvalue array -> llvalue
+                      = "llvm_const_struct"
+external const_packed_struct : llcontext -> llvalue array -> llvalue
                              = "llvm_const_packed_struct"
 external const_vector : llvalue array -> llvalue = "llvm_const_vector"
 
@@ -253,12 +262,18 @@ external const_vector : llvalue array -> llvalue = "llvm_const_vector"
 external align_of : lltype -> llvalue = "LLVMAlignOf"
 external size_of : lltype -> llvalue = "LLVMSizeOf"
 external const_neg : llvalue -> llvalue = "LLVMConstNeg"
+external const_fneg : llvalue -> llvalue = "LLVMConstFNeg"
 external const_not : llvalue -> llvalue = "LLVMConstNot"
 external const_add : llvalue -> llvalue -> llvalue = "LLVMConstAdd"
+external const_nsw_add : llvalue -> llvalue -> llvalue = "LLVMConstNSWAdd"
+external const_fadd : llvalue -> llvalue -> llvalue = "LLVMConstFAdd"
 external const_sub : llvalue -> llvalue -> llvalue = "LLVMConstSub"
+external const_fsub : llvalue -> llvalue -> llvalue = "LLVMConstFSub"
 external const_mul : llvalue -> llvalue -> llvalue = "LLVMConstMul"
+external const_fmul : llvalue -> llvalue -> llvalue = "LLVMConstFMul"
 external const_udiv : llvalue -> llvalue -> llvalue = "LLVMConstUDiv"
 external const_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstSDiv"
+external const_exact_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstExactSDiv"
 external const_fdiv : llvalue -> llvalue -> llvalue = "LLVMConstFDiv"
 external const_urem : llvalue -> llvalue -> llvalue = "LLVMConstURem"
 external const_srem : llvalue -> llvalue -> llvalue = "LLVMConstSRem"
@@ -274,6 +289,8 @@ external const_shl : llvalue -> llvalue -> llvalue = "LLVMConstShl"
 external const_lshr : llvalue -> llvalue -> llvalue = "LLVMConstLShr"
 external const_ashr : llvalue -> llvalue -> llvalue = "LLVMConstAShr"
 external const_gep : llvalue -> llvalue array -> llvalue = "llvm_const_gep"
+external const_in_bounds_gep : llvalue -> llvalue array -> llvalue
+                            = "llvm_const_in_bounds_gep"
 external const_trunc : llvalue -> lltype -> llvalue = "LLVMConstTrunc"
 external const_sext : llvalue -> lltype -> llvalue = "LLVMConstSExt"
 external const_zext : llvalue -> lltype -> llvalue = "LLVMConstZExt"
@@ -286,6 +303,16 @@ external const_fptosi : llvalue -> lltype -> llvalue = "LLVMConstFPToSI"
 external const_ptrtoint : llvalue -> lltype -> llvalue = "LLVMConstPtrToInt"
 external const_inttoptr : llvalue -> lltype -> llvalue = "LLVMConstIntToPtr"
 external const_bitcast : llvalue -> lltype -> llvalue = "LLVMConstBitCast"
+external const_zext_or_bitcast : llvalue -> lltype -> llvalue
+                             = "LLVMConstZExtOrBitCast"
+external const_sext_or_bitcast : llvalue -> lltype -> llvalue
+                             = "LLVMConstSExtOrBitCast"
+external const_trunc_or_bitcast : llvalue -> lltype -> llvalue
+                              = "LLVMConstTruncOrBitCast"
+external const_pointercast : llvalue -> lltype -> llvalue
+                           = "LLVMConstPointerCast"
+external const_intcast : llvalue -> lltype -> llvalue = "LLVMConstIntCast"
+external const_fpcast : llvalue -> lltype -> llvalue = "LLVMConstFPCast"
 external const_select : llvalue -> llvalue -> llvalue -> llvalue
                       = "LLVMConstSelect"
 external const_extractelement : llvalue -> llvalue -> llvalue
@@ -294,6 +321,10 @@ external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
                              = "LLVMConstInsertElement"
 external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
                              = "LLVMConstShuffleVector"
+external const_extractvalue : llvalue -> int array -> llvalue
+                            = "llvm_const_extractvalue"
+external const_insertvalue : llvalue -> llvalue -> int array -> llvalue
+                           = "llvm_const_insertvalue"
 
 (*--... Operations on global variables, functions, and aliases (globals) ...--*)
 external global_parent : llvalue -> llmodule = "LLVMGetGlobalParent"
@@ -504,8 +535,9 @@ external block_parent : llbasicblock -> llvalue = "LLVMGetBasicBlockParent"
 external basic_blocks : llvalue -> llbasicblock array = "llvm_basic_blocks"
 external entry_block : llvalue -> llbasicblock = "LLVMGetEntryBasicBlock"
 external delete_block : llbasicblock -> unit = "llvm_delete_block"
-external append_block : string -> llvalue -> llbasicblock = "llvm_append_block"
-external insert_block : string -> llbasicblock -> llbasicblock
+external append_block : llcontext -> string -> llvalue -> llbasicblock
+                      = "llvm_append_block"
+external insert_block : llcontext -> string -> llbasicblock -> llbasicblock
                       = "llvm_insert_block"
 external block_begin : llvalue -> (llvalue, llbasicblock) llpos
                      = "llvm_block_begin"
@@ -629,18 +661,20 @@ external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming"
 
 
 (*===-- Instruction builders ----------------------------------------------===*)
-external builder : unit -> llbuilder = "llvm_builder"
+external builder : llcontext -> llbuilder = "llvm_builder"
 external position_builder : (llbasicblock, llvalue) llpos -> llbuilder -> unit
                           = "llvm_position_builder"
 external insertion_block : llbuilder -> llbasicblock = "llvm_insertion_block"
+external insert_into_builder : llvalue -> string -> llbuilder -> unit
+                             = "llvm_insert_into_builder"
 
-let builder_at ip =
-  let b = builder () in
+let builder_at context ip =
+  let b = builder context in
   position_builder ip b;
   b
 
-let builder_before i = builder_at (Before i)
-let builder_at_end bb = builder_at (At_end bb)
+let builder_before context i = builder_at context (Before i)
+let builder_at_end context bb = builder_at context (At_end bb)
 
 let position_before i = position_builder (Before i)
 let position_at_end bb = position_builder (At_end bb)
@@ -649,6 +683,8 @@ let position_at_end bb = position_builder (At_end bb)
 (*--... Terminators ........................................................--*)
 external build_ret_void : llbuilder -> llvalue = "llvm_build_ret_void"
 external build_ret : llvalue -> llbuilder -> llvalue = "llvm_build_ret"
+external build_aggregate_ret : llvalue array -> llbuilder -> llvalue
+                             = "llvm_build_aggregate_ret"
 external build_br : llbasicblock -> llbuilder -> llvalue = "llvm_build_br"
 external build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llbuilder ->
                          llvalue = "llvm_build_cond_br"
@@ -665,14 +701,24 @@ external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable"
 (*--... Arithmetic .........................................................--*)
 external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
                    = "llvm_build_add"
+external build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nsw_add"
+external build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fadd"
 external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
                    = "llvm_build_sub"
+external build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fsub"
 external build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
                    = "llvm_build_mul"
+external build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fmul"
 external build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
                     = "llvm_build_udiv"
 external build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
                     = "llvm_build_sdiv"
+external build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                          = "llvm_build_exact_sdiv"
 external build_fdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
                     = "llvm_build_fdiv"
 external build_urem : llvalue -> llvalue -> string -> llbuilder -> llvalue
@@ -714,6 +760,15 @@ external build_store : llvalue -> llvalue -> llbuilder -> llvalue
                      = "llvm_build_store"
 external build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue
                    = "llvm_build_gep"
+external build_in_bounds_gep : llvalue -> llvalue array -> string ->
+                             llbuilder -> llvalue = "llvm_build_in_bounds_gep"
+external build_struct_gep : llvalue -> int -> string -> llbuilder -> llvalue
+                         = "llvm_build_struct_gep"
+
+external build_global_string : string -> string -> llbuilder -> llvalue
+                             = "llvm_build_global_string"
+external build_global_stringptr  : string -> string -> llbuilder -> llvalue
+                                 = "llvm_build_global_stringptr"
 
 (*--... Casts ..............................................................--*)
 external build_trunc : llvalue -> lltype -> string -> llbuilder -> llvalue
@@ -740,6 +795,18 @@ external build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue
                         = "llvm_build_inttoptr"
 external build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue
                        = "llvm_build_bitcast"
+external build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue = "llvm_build_zext_or_bitcast"
+external build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue = "llvm_build_sext_or_bitcast"
+external build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                  llvalue = "llvm_build_trunc_or_bitcast"
+external build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                           = "llvm_build_pointercast"
+external build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                       = "llvm_build_intcast"
+external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_fpcast"
 
 (*--... Comparisons ........................................................--*)
 external build_icmp : Icmp.t -> llvalue -> llvalue -> string ->
@@ -762,7 +829,17 @@ external build_insertelement : llvalue -> llvalue -> llvalue -> string ->
                                llbuilder -> llvalue = "llvm_build_insertelement"
 external build_shufflevector : llvalue -> llvalue -> llvalue -> string ->
                                llbuilder -> llvalue = "llvm_build_shufflevector"
-
+external build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue
+                            = "llvm_build_extractvalue"
+external build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder ->
+                             llvalue = "llvm_build_insertvalue"
+
+external build_is_null : llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_is_null"
+external build_is_not_null : llvalue -> string -> llbuilder -> llvalue
+                           = "llvm_build_is_not_null"
+external build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_ptrdiff"
 
 (*===-- Module providers --------------------------------------------------===*)
 
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index 35c218a75264..a7c2bcfd719b 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -17,6 +17,10 @@
 
     These abstract types correlate directly to the LLVM VMCore classes. *)
 
+(** The top-level container for all LLVM global data. See the
+    [llvm::LLVMContext] class. *)
+type llcontext
+
 (** The top-level container for all other LLVM Intermediate Representation (IR)
     objects. See the [llvm::Module] class. *)
 type llmodule
@@ -61,7 +65,6 @@ module TypeKind : sig
   | Fp128
   | Ppc_fp128
   | Label
-  | Metadata
   | Integer
   | Function
   | Struct
@@ -69,6 +72,7 @@ module TypeKind : sig
   | Pointer
   | Opaque
   | Vector
+  | Metadata
 end
 
 (** The linkage of a global value, accessed with {!linkage} and
@@ -78,13 +82,18 @@ module Linkage : sig
     External
   | Available_externally
   | Link_once
+  | Link_once_odr
   | Weak
+  | Weak_odr
   | Appending
   | Internal
+  | Private
   | Dllimport
   | Dllexport
   | External_weak
   | Ghost
+  | Common
+  | Linker_private
 end
 
 (** The linker visibility of a global value, accessed with {!visibility} and
@@ -125,6 +134,16 @@ module Attribute : sig
   | Nest
   | Readnone
   | Readonly
+  | Noinline
+  | Alwaysinline
+  | Optsize
+  | Ssp
+  | Sspreq
+  | Nocapture
+  | Noredzone
+  | Noimplicitfloat
+  | Naked
+  | Inlinehint
 end
 
 (** The predicate for an integer comparison ([icmp]) instruction.
@@ -188,12 +207,27 @@ type ('a, 'b) llrev_pos =
 exception IoError of string
 
 
+(** {6 Contexts} *)
+
+(** [create_context ()] creates a context for storing the "global" state in
+    LLVM. See the constructor [llvm::LLVMContext]. *)
+external create_context : unit -> llcontext = "llvm_create_context"
+
+(** [destroy_context ()] destroys a context. See the destructor
+    [llvm::LLVMContext::~LLVMContext]. *)
+external dispose_context : unit -> llcontext = "llvm_dispose_context"
+
+(** See the function [llvm::getGlobalContext]. *)
+external global_context : unit -> llcontext = "llvm_global_context"
+
+
 (** {6 Modules} *)
 
-(** [create_module id] creates a module with the supplied module ID. Modules are
-    not garbage collected; it is mandatory to call {!dispose_module} to free
-    memory. See the constructor [llvm::Module::Module]. *)
-external create_module : string -> llmodule = "llvm_create_module"
+(** [create_module context id] creates a module with the supplied module ID in
+    the context [context].  Modules are not garbage collected; it is mandatory
+    to call {!dispose_module} to free memory. See the constructor
+    [llvm::Module::Module]. *)
+external create_module : llcontext -> string -> llmodule = "llvm_create_module"
 
 (** [dispose_module m] destroys a module [m] and all of the IR objects it
     contained. All references to subordinate objects are invalidated;
@@ -245,51 +279,65 @@ external dump_module : llmodule -> unit = "llvm_dump_module"
     See the method [llvm::Type::getTypeID]. *)
 external classify_type : lltype -> TypeKind.t = "llvm_classify_type"
 
+(** [type_context ty] returns the {!llcontext} corresponding to the type [ty].
+    See the method [llvm::Type::getContext]. *)
+external type_context : lltype -> llcontext = "llvm_type_context"
+
 (** [string_of_lltype ty] returns a string describing the type [ty]. *)
 val string_of_lltype : lltype -> string
 
 (** {7 Operations on integer types} *)
 
-(** The 1-bit integer type. See [llvm::Type::Int1Ty]. *)
-val i1_type : lltype
+(** [i1_type c] returns an integer type of bitwidth 1 in the context [c]. See
+    [llvm::Type::Int1Ty]. *)
+external i1_type : llcontext -> lltype = "llvm_i1_type"
 
-(** The 8-bit integer type. See [llvm::Type::Int8Ty]. *)
-val i8_type : lltype
+(** [i8_type c] returns an integer type of bitwidth 8 in the context [c]. See
+    [llvm::Type::Int8Ty]. *)
+external i8_type : llcontext -> lltype = "llvm_i8_type"
 
-(** The 16-bit integer type. See [llvm::Type::Int16Ty]. *)
-val i16_type : lltype
+(** [i16_type c] returns an integer type of bitwidth 16 in the context [c]. See
+    [llvm::Type::Int16Ty]. *)
+external i16_type : llcontext -> lltype = "llvm_i16_type"
 
-(** The 32-bit integer type. See [llvm::Type::Int32Ty]. *)
-val i32_type : lltype
+(** [i32_type c] returns an integer type of bitwidth 32 in the context [c]. See
+    [llvm::Type::Int32Ty]. *)
+external i32_type : llcontext -> lltype = "llvm_i32_type"
 
-(** The 64-bit integer type. See [llvm::Type::Int64Ty]. *)
-val i64_type : lltype
+(** [i64_type c] returns an integer type of bitwidth 64 in the context [c]. See
+    [llvm::Type::Int64Ty]. *)
+external i64_type : llcontext -> lltype = "llvm_i64_type"
 
-(** [integer_type n] returns an integer type of bitwidth [n].
-    See the method [llvm::IntegerType::get]. *)
-external integer_type : int -> lltype = "llvm_integer_type"
+(** [integer_type c n] returns an integer type of bitwidth [n] in the context
+    [c]. See the method [llvm::IntegerType::get]. *)
+external integer_type : llcontext -> int -> lltype = "llvm_integer_type"
 
-(** [integer_bitwidth ty] returns the number of bits in the integer type [ty].
-    See the method [llvm::IntegerType::getBitWidth]. *)
+(** [integer_bitwidth c ty] returns the number of bits in the integer type [ty]
+    in the context [c].  See the method [llvm::IntegerType::getBitWidth]. *)
 external integer_bitwidth : lltype -> int = "llvm_integer_bitwidth"
 
 
 (** {7 Operations on real types} *)
 
-(** The IEEE 32-bit floating point type. See [llvm::Type::FloatTy]. *)
-val float_type : lltype
+(** [float_type c] returns the IEEE 32-bit floating point type in the context
+    [c]. See [llvm::Type::FloatTy]. *)
+external float_type : llcontext -> lltype = "llvm_float_type"
 
-(** The IEEE 64-bit floating point type. See [llvm::Type::DoubleTy]. *)
-val double_type : lltype
+(** [double_type c] returns the IEEE 64-bit floating point type in the context
+    [c]. See [llvm::Type::DoubleTy]. *)
+external double_type : llcontext -> lltype = "llvm_double_type"
 
-(** The x87 80-bit floating point type. See [llvm::Type::X86_FP80Ty]. *)
-val x86fp80_type : lltype
+(** [x86fp80_type c] returns the x87 80-bit floating point type in the context
+    [c]. See [llvm::Type::X86_FP80Ty]. *)
+external x86fp80_type : llcontext -> lltype = "llvm_x86fp80_type"
 
-(** The IEEE 128-bit floating point type. See [llvm::Type::FP128Ty]. *)
-val fp128_type : lltype
+(** [fp128_type c] returns the IEEE 128-bit floating point type in the context
+    [c]. See [llvm::Type::FP128Ty]. *)
+external fp128_type : llcontext -> lltype = "llvm_fp128_type"
 
-(** The PowerPC 128-bit floating point type. See [llvm::Type::PPC_FP128Ty]. *)
-val ppc_fp128_type : lltype
+(** [ppc_fp128_type c] returns the PowerPC 128-bit floating point type in the
+    context [c]. See [llvm::Type::PPC_FP128Ty]. *)
+external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type"
 
 
 (** {7 Operations on function types} *)
@@ -321,13 +369,17 @@ external param_types : lltype -> lltype array = "llvm_param_types"
 
 (** {7 Operations on struct types} *)
 
-(** [struct_type tys] returns the structure type containing in the types in the
-    array [tys]. See the method [llvm::StructType::get]. *)
-external struct_type : lltype array -> lltype = "llvm_struct_type"
+(** [struct_type context tys] returns the structure type in the context
+    [context] containing in the types in the array [tys]. See the method
+    [llvm::StructType::get]. *)
+external struct_type : llcontext -> lltype array -> lltype
+                     = "llvm_struct_type"
 
-(** [struct_type tys] returns the packed structure type containing in the types
-    in the array [tys]. See the method [llvm::StructType::get]. *)
-external packed_struct_type : lltype array -> lltype = "llvm_packed_struct_type"
+(** [packed_struct_type context ys] returns the packed structure type in the
+    context [context] containing in the types in the array [tys]. See the method
+    [llvm::StructType::get]. *)
+external packed_struct_type : llcontext -> lltype array -> lltype
+                            = "llvm_packed_struct_type"
 
 (** [element_types sty] returns the constituent types of the struct type [sty].
     See the method [llvm::StructType::getElementType]. *)
@@ -378,18 +430,18 @@ external vector_size : lltype -> int = "llvm_vector_size"
 
 (** {7 Operations on other types} *)
 
-(** [opaque_type ()] creates a new opaque type distinct from any other.
-    Opaque types are useful for building recursive types in combination with
-    {!refine_type}.
-    See [llvm::OpaqueType::get]. *)
-external opaque_type : unit -> lltype = "llvm_opaque_type"
+(** [opaque_type c] creates a new opaque type distinct from any other in the
+    context [c]. Opaque types are useful for building recursive types in
+    combination with {!refine_type}. See [llvm::OpaqueType::get]. *)
+external opaque_type : llcontext -> lltype = "llvm_opaque_type"
 
-(** [void_type] is the type of a function which does not return any value.
-    See [llvm::Type::VoidTy]. *)
-val void_type : lltype
+(** [void_type c] creates a type of a function which does not return any
+    value in the context [c]. See [llvm::Type::VoidTy]. *)
+external void_type : llcontext -> lltype = "llvm_void_type"
 
-(** [label_type] is the type of a basic block. See [llvm::Type::LabelTy]. *)
-val label_type : lltype
+(** [label_type c] creates a type of a basic block in the context [c]. See
+    [llvm::Type::LabelTy]. *)
+external label_type : llcontext -> lltype = "llvm_label_type"
 
 (** {7 Operations on type handles} *)
 
@@ -469,24 +521,35 @@ external const_int : lltype -> int -> llvalue = "llvm_const_int"
 external const_of_int64 : lltype -> Int64.t -> bool -> llvalue
                         = "llvm_const_of_int64"
 
+(** [const_int_of_string ty s r] returns the integer constant of type [ty] and
+ * value [s], with the radix [r]. See the method [llvm::ConstantInt::get]. *)
+external const_int_of_string : lltype -> string -> int -> llvalue
+                   = "llvm_const_int_of_string"
+
 (** [const_float ty n] returns the floating point constant of type [ty] and
-    value [n]. See the method [llvm::ConstantInt::get]. *)
+    value [n]. See the method [llvm::ConstantFP::get]. *)
 external const_float : lltype -> float -> llvalue = "llvm_const_float"
 
+(** [const_float_of_string ty s] returns the floating point constant of type
+    [ty] and value [n]. See the method [llvm::ConstantFP::get]. *)
+external const_float_of_string : lltype -> string -> llvalue
+                               = "llvm_const_float_of_string"
+
 
 (** {7 Operations on composite constants} *)
 
-(** [const_string s] returns the constant [i8] array with the values of the
-    characters in the string [s]. The array is not null-terminated (but see
-    {!const_stringz}). This value can in turn be used as the initializer for a
-    global variable. See the method [llvm::ConstantArray::get]. *)
-external const_string : string -> llvalue = "llvm_const_string"
+(** [const_string c s] returns the constant [i8] array with the values of the
+    characters in the string [s] in the context [c]. The array is not 
+    null-terminated (but see {!const_stringz}). This value can in turn be used
+    as the initializer for a global variable. See the method
+    [llvm::ConstantArray::get]. *)
+external const_string : llcontext -> string -> llvalue = "llvm_const_string"
 
-(** [const_stringz s] returns the constant [i8] array with the values of the
-    characters in the string [s] and a null terminator. This value can in turn
-    be used as the initializer for a global variable.
+(** [const_stringz c s] returns the constant [i8] array with the values of the
+    characters in the string [s] and a null terminator in the context [c]. This
+    value can in turn be used as the initializer for a global variable.
     See the method [llvm::ConstantArray::get]. *)
-external const_stringz : string -> llvalue = "llvm_const_stringz"
+external const_stringz : llcontext -> string -> llvalue = "llvm_const_stringz"
 
 (** [const_array ty elts] returns the constant array of type
     [array_type ty (Array.length elts)] and containing the values [elts].
@@ -494,17 +557,19 @@ external const_stringz : string -> llvalue = "llvm_const_stringz"
     See the method [llvm::ConstantArray::get]. *)
 external const_array : lltype -> llvalue array -> llvalue = "llvm_const_array"
 
-(** [const_struct elts] returns the structured constant of type
-    [struct_type (Array.map type_of elts)] and containing the values [elts].
-    This value can in turn be used as the initializer for a global variable.
-    See the method [llvm::ConstantStruct::get]. *)
-external const_struct : llvalue array -> llvalue = "llvm_const_struct"
-
-(** [const_packed_struct elts] returns the structured constant of type
-    {!packed_struct_type} [(Array.map type_of elts)] and containing the values
-    [elts]. This value can in turn be used as the initializer for a global
-    variable. See the method [llvm::ConstantStruct::get]. *)
-external const_packed_struct : llvalue array -> llvalue
+(** [const_struct context elts] returns the structured constant of type
+    [struct_type (Array.map type_of elts)] and containing the values [elts]
+    in the context [context]. This value can in turn be used as the initializer
+    for a global variable. See the method [llvm::ConstantStruct::get]. *)
+external const_struct : llcontext -> llvalue array -> llvalue
+                      = "llvm_const_struct"
+
+(** [const_packed_struct context elts] returns the structured constant of
+    type {!packed_struct_type} [(Array.map type_of elts)] and containing the
+    values [elts] in the context [context]. This value can in turn be used as
+    the initializer for a global variable. See the method
+    [llvm::ConstantStruct::get]. *)
+external const_packed_struct : llcontext -> llvalue array -> llvalue
                              = "llvm_const_packed_struct"
 
 (** [const_vector elts] returns the vector constant of type
@@ -531,6 +596,10 @@ external size_of : lltype -> llvalue = "LLVMSizeOf"
     See the method [llvm::ConstantExpr::getNeg]. *)
 external const_neg : llvalue -> llvalue = "LLVMConstNeg"
 
+(** [const_fneg c] returns the arithmetic negation of the constant float [c].
+    See the method [llvm::ConstantExpr::getFNeg]. *)
+external const_fneg : llvalue -> llvalue = "LLVMConstFNeg"
+
 (** [const_not c] returns the bitwise inverse of the constant [c].
     See the method [llvm::ConstantExpr::getNot]. *)
 external const_not : llvalue -> llvalue = "LLVMConstNot"
@@ -539,14 +608,31 @@ external const_not : llvalue -> llvalue = "LLVMConstNot"
     See the method [llvm::ConstantExpr::getAdd]. *)
 external const_add : llvalue -> llvalue -> llvalue = "LLVMConstAdd"
 
+(** [const_nsw_add c1 c2] returns the constant sum of two constants with no
+    signed wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWAdd]. *)
+external const_nsw_add : llvalue -> llvalue -> llvalue = "LLVMConstNSWAdd"
+
+(** [const_fadd c1 c2] returns the constant sum of two constant floats.
+    See the method [llvm::ConstantExpr::getFAdd]. *)
+external const_fadd : llvalue -> llvalue -> llvalue = "LLVMConstFAdd"
+
 (** [const_sub c1 c2] returns the constant difference, [c1 - c2], of two
     constants. See the method [llvm::ConstantExpr::getSub]. *)
 external const_sub : llvalue -> llvalue -> llvalue = "LLVMConstSub"
 
+(** [const_fsub c1 c2] returns the constant difference, [c1 - c2], of two
+    constant floats. See the method [llvm::ConstantExpr::getFSub]. *)
+external const_fsub : llvalue -> llvalue -> llvalue = "LLVMConstFSub"
+
 (** [const_mul c1 c2] returns the constant product of two constants.
     See the method [llvm::ConstantExpr::getMul]. *)
 external const_mul : llvalue -> llvalue -> llvalue = "LLVMConstMul"
 
+(** [const_fmul c1 c2] returns the constant product of two constants floats.
+    See the method [llvm::ConstantExpr::getFMul]. *)
+external const_fmul : llvalue -> llvalue -> llvalue = "LLVMConstFMul"
+
 (** [const_udiv c1 c2] returns the constant quotient [c1 / c2] of two unsigned
     integer constants.
     See the method [llvm::ConstantExpr::getUDiv]. *)
@@ -554,20 +640,25 @@ external const_udiv : llvalue -> llvalue -> llvalue = "LLVMConstUDiv"
 
 (** [const_sdiv c1 c2] returns the constant quotient [c1 / c2] of two signed
     integer constants.
-    See the method [llvm::ConstantExpr::]. *)
+    See the method [llvm::ConstantExpr::getSDiv]. *)
 external const_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstSDiv"
 
+(** [const_exact_sdiv c1 c2] returns the constant quotient [c1 / c2] of two
+    signed integer constants. The result is undefined if the result is rounded
+    or overflows. See the method [llvm::ConstantExpr::getExactSDiv]. *)
+external const_exact_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstExactSDiv"
+
 (** [const_fdiv c1 c2] returns the constant quotient [c1 / c2] of two floating
     point constants.
     See the method [llvm::ConstantExpr::getFDiv]. *)
 external const_fdiv : llvalue -> llvalue -> llvalue = "LLVMConstFDiv"
 
-(** [const_udiv c1 c2] returns the constant remainder [c1 MOD c2] of two
+(** [const_urem c1 c2] returns the constant remainder [c1 MOD c2] of two
     unsigned integer constants.
     See the method [llvm::ConstantExpr::getURem]. *)
 external const_urem : llvalue -> llvalue -> llvalue = "LLVMConstURem"
 
-(** [const_sdiv c1 c2] returns the constant remainder [c1 MOD c2] of two
+(** [const_srem c1 c2] returns the constant remainder [c1 MOD c2] of two
     signed integer constants.
     See the method [llvm::ConstantExpr::getSRem]. *)
 external const_srem : llvalue -> llvalue -> llvalue = "LLVMConstSRem"
@@ -624,6 +715,12 @@ external const_ashr : llvalue -> llvalue -> llvalue = "LLVMConstAShr"
     See the method [llvm::ConstantExpr::getGetElementPtr]. *)
 external const_gep : llvalue -> llvalue array -> llvalue = "llvm_const_gep"
 
+(** [const_in_bounds_gep pc indices] returns the constant [getElementPtr] of [p1]
+    with the constant integers indices from the array [indices].
+    See the method [llvm::ConstantExpr::getInBoundsGetElementPtr]. *)
+external const_in_bounds_gep : llvalue -> llvalue array -> llvalue
+                            = "llvm_const_in_bounds_gep"
+
 (** [const_trunc c ty] returns the constant truncation of integer constant [c]
     to the smaller integer type [ty].
     See the method [llvm::ConstantExpr::getTrunc]. *)
@@ -684,6 +781,42 @@ external const_inttoptr : llvalue -> lltype -> llvalue = "LLVMConstIntToPtr"
     See the method [llvm::ConstantExpr::getBitCast]. *)
 external const_bitcast : llvalue -> lltype -> llvalue = "LLVMConstBitCast"
 
+(** [const_zext_or_bitcast c ty] returns a constant zext or bitwise cast
+    conversion of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getZExtOrBitCast]. *)
+external const_zext_or_bitcast : llvalue -> lltype -> llvalue
+                               = "LLVMConstZExtOrBitCast"
+
+(** [const_sext_or_bitcast c ty] returns a constant sext or bitwise cast
+    conversion of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getSExtOrBitCast]. *)
+external const_sext_or_bitcast : llvalue -> lltype -> llvalue
+                               = "LLVMConstSExtOrBitCast"
+
+(** [const_trunc_or_bitcast c ty] returns a constant trunc or bitwise cast
+    conversion of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getTruncOrBitCast]. *)
+external const_trunc_or_bitcast : llvalue -> lltype -> llvalue
+                                = "LLVMConstTruncOrBitCast"
+
+(** [const_pointercast c ty] returns a constant bitcast or a pointer-to-int
+    cast conversion of constant [c] to type [ty] of equal size.
+    See the method [llvm::ConstantExpr::getPointerCast]. *)
+external const_pointercast : llvalue -> lltype -> llvalue
+                           = "LLVMConstPointerCast"
+
+(** [const_intcast c ty] returns a constant zext, bitcast, or trunc for integer
+    -> integer casts of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getIntCast]. *)
+external const_intcast : llvalue -> lltype -> llvalue
+                       = "LLVMConstIntCast"
+
+(** [const_fpcast c ty] returns a constant fpext, bitcast, or fptrunc for fp ->
+    fp casts of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getFPCast]. *)
+external const_fpcast : llvalue -> lltype -> llvalue
+                      = "LLVMConstFPCast"
+
 (** [const_select cond t f] returns the constant conditional which returns value
     [t] if the boolean constant [cond] is true and the value [f] otherwise.
     See the method [llvm::ConstantExpr::getSelect]. *)
@@ -713,6 +846,18 @@ external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
 external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
                              = "LLVMConstShuffleVector"
 
+(** [const_extractvalue agg idxs] returns the constant [idxs]th value of
+    constant aggregate [agg]. Each [idxs] must be less than the size of the
+    aggregate.  See the method [llvm::ConstantExpr::getExtractValue]. *)
+external const_extractvalue : llvalue -> int array -> llvalue
+                            = "llvm_const_extractvalue"
+
+(** [const_insertvalue agg val idxs] inserts the value [val] in the specified
+    indexs [idxs] in the aggegate [agg]. Each [idxs] must be less than the size
+    of the aggregate. See the method [llvm::ConstantExpr::getInsertValue]. *)
+external const_insertvalue : llvalue -> llvalue -> int array -> llvalue
+                           = "llvm_const_insertvalue"
+
 
 (** {7 Operations on global variables, functions, and aliases (globals)} *)
 
@@ -1040,15 +1185,16 @@ external entry_block : llvalue -> llbasicblock = "LLVMGetEntryBasicBlock"
     See the method [llvm::BasicBlock::eraseFromParent]. *)
 external delete_block : llbasicblock -> unit = "llvm_delete_block"
 
-(** [append_block name f] creates a new basic block named [name] at the end of
-    function [f].
+(** [append_block c name f] creates a new basic block named [name] at the end of
+    function [f] in the context [c].
     See the constructor of [llvm::BasicBlock]. *)
-external append_block : string -> llvalue -> llbasicblock = "llvm_append_block"
+external append_block : llcontext -> string -> llvalue -> llbasicblock
+                      = "llvm_append_block"
 
-(** [insert_block name bb] creates a new basic block named [name] before the
-    basic block [bb].
+(** [insert_block c name bb] creates a new basic block named [name] before the
+    basic block [bb] in the context [c].
     See the constructor of [llvm::BasicBlock]. *)
-external insert_block : string -> llbasicblock -> llbasicblock
+external insert_block : llcontext -> string -> llbasicblock -> llbasicblock
                       = "llvm_insert_block"
 
 (** [block_parent bb] returns the parent function that owns the basic block.
@@ -1207,22 +1353,23 @@ external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming"
 
 (** {6 Instruction builders} *)
 
-(** [builder ()] creates an instruction builder with no position. It is invalid
-    to use this builder until its position is set with {!position_before} or
-    {!position_at_end}. See the constructor for [llvm::LLVMBuilder]. *)
-external builder : unit -> llbuilder = "llvm_builder"
+(** [builder context] creates an instruction builder with no position in
+    the context [context]. It is invalid to use this builder until its position
+    is set with {!position_before} or {!position_at_end}. See the constructor
+    for [llvm::LLVMBuilder]. *)
+external builder : llcontext -> llbuilder = "llvm_builder"
 
 (** [builder_at ip] creates an instruction builder positioned at [ip].
     See the constructor for [llvm::LLVMBuilder]. *)
-val builder_at : (llbasicblock, llvalue) llpos -> llbuilder
+val builder_at : llcontext -> (llbasicblock, llvalue) llpos -> llbuilder
 
 (** [builder_before ins] creates an instruction builder positioned before the
     instruction [isn]. See the constructor for [llvm::LLVMBuilder]. *)
-val builder_before : llvalue -> llbuilder
+val builder_before : llcontext -> llvalue -> llbuilder
 
 (** [builder_at_end bb] creates an instruction builder positioned at the end of
     the basic block [bb]. See the constructor for [llvm::LLVMBuilder]. *)
-val builder_at_end : llbasicblock -> llbuilder
+val builder_at_end : llcontext -> llbasicblock -> llbuilder
 
 (** [position_builder ip bb] moves the instruction builder [bb] to the position
     [ip].
@@ -1244,6 +1391,12 @@ val position_at_end : llbasicblock -> llbuilder -> unit
     See the method [llvm::LLVMBuilder::GetInsertBlock]. *)
 external insertion_block : llbuilder -> llbasicblock = "llvm_insertion_block"
 
+(** [insert_into_builder i name b] inserts the specified instruction [i] at the
+    position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::Insert]. *)
+external insert_into_builder : llvalue -> string -> llbuilder -> unit
+                             = "llvm_insert_into_builder"
+
 
 (** {7 Terminators} *)
 
@@ -1259,6 +1412,13 @@ external build_ret_void : llbuilder -> llvalue = "llvm_build_ret_void"
     See the method [llvm::LLVMBuilder::CreateRet]. *)
 external build_ret : llvalue -> llbuilder -> llvalue = "llvm_build_ret"
 
+(** [build_aggregate_ret vs b] creates a
+    [ret {...} { %v1, %v2, ... } ]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAggregateRet]. *)
+external build_aggregate_ret : llvalue array -> llbuilder -> llvalue
+                             = "llvm_build_aggregate_ret"
+
 (** [build_br bb b] creates a
     [b %bb]
     instruction at the position specified by the instruction builder [b].
@@ -1316,6 +1476,20 @@ external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable"
 external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
                    = "llvm_build_add"
 
+(** [build_nswadd x y name b] creates a
+    [%name = nsw add %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNSWAdd]. *)
+external build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                      = "llvm_build_nsw_add"
+
+(** [build_fadd x y name b] creates a
+    [%name = fadd %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFAdd]. *)
+external build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fadd"
+
 (** [build_sub x y name b] creates a
     [%name = sub %x, %y]
     instruction at the position specified by the instruction builder [b].
@@ -1323,6 +1497,13 @@ external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
 external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
                    = "llvm_build_sub"
 
+(** [build_fsub x y name b] creates a
+    [%name = fsub %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFSub]. *)
+external build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fsub"
+
 (** [build_mul x y name b] creates a
     [%name = mul %x, %y]
     instruction at the position specified by the instruction builder [b].
@@ -1330,6 +1511,13 @@ external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
 external build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
                    = "llvm_build_mul"
 
+(** [build_fmul x y name b] creates a
+    [%name = fmul %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFMul]. *)
+external build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fmul"
+
 (** [build_udiv x y name b] creates a
     [%name = udiv %x, %y]
     instruction at the position specified by the instruction builder [b].
@@ -1344,6 +1532,13 @@ external build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
 external build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
                     = "llvm_build_sdiv"
 
+(** [build_exact_sdiv x y name b] creates a
+    [%name = exact sdiv %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateExactSDiv]. *)
+external build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                          = "llvm_build_exact_sdiv"
+
 (** [build_fdiv x y name b] creates a
     [%name = fdiv %x, %y]
     instruction at the position specified by the instruction builder [b].
@@ -1482,12 +1677,39 @@ external build_store : llvalue -> llvalue -> llbuilder -> llvalue
                      = "llvm_build_store"
 
 (** [build_gep p indices name b] creates a
-    [%name = gep %p, indices...]
+    [%name = getelementptr %p, indices...]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateGetElementPtr]. *)
 external build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue
                    = "llvm_build_gep"
 
+(** [build_in_bounds_gep p indices name b] creates a
+    [%name = gelementptr inbounds %p, indices...]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateInBoundsGetElementPtr]. *)
+external build_in_bounds_gep : llvalue -> llvalue array -> string -> llbuilder ->
+                               llvalue = "llvm_build_in_bounds_gep"
+
+(** [build_struct_gep p idx name b] creates a
+    [%name = getelementptr %p, 0, idx]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateStructGetElementPtr]. *)
+external build_struct_gep : llvalue -> int -> string -> llbuilder ->
+                            llvalue = "llvm_build_struct_gep"
+
+(** [build_global_string str name b] creates a series of instructions that adds
+    a global string at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateGlobalString]. *)
+external build_global_string : string -> string -> llbuilder -> llvalue
+                             = "llvm_build_global_string"
+
+(** [build_global_stringptr str name b] creates a series of instructions that
+    adds a global string pointer at the position specified by the instruction
+    builder [b].
+    See the method [llvm::LLVMBuilder::CreateGlobalStringPtr]. *)
+external build_global_stringptr : string -> string -> llbuilder -> llvalue
+                                = "llvm_build_global_stringptr"
+
 
 (** {7 Casts} *)
 
@@ -1571,10 +1793,46 @@ external build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue
 (** [build_bitcast v ty name b] creates a
     [%name = bitcast %p to %ty]
     instruction at the position specified by the instruction builder [b].
-    See the method [llvm::LLVMBuilder::CreateBitcast]. *)
+    See the method [llvm::LLVMBuilder::CreateBitCast]. *)
 external build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue
                        = "llvm_build_bitcast"
 
+(** [build_zext_or_bitcast v ty name b] creates a zext or bitcast
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *)
+external build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue = "llvm_build_zext_or_bitcast"
+
+(** [build_sext_or_bitcast v ty name b] creates a sext or bitcast
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSExtOrBitCast]. *)
+external build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue = "llvm_build_sext_or_bitcast"
+
+(** [build_trunc_or_bitcast v ty name b] creates a trunc or bitcast
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *)
+external build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                  llvalue = "llvm_build_trunc_or_bitcast"
+
+(** [build_pointercast v ty name b] creates a bitcast or pointer-to-int
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreatePointerCast]. *)
+external build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                           = "llvm_build_pointercast"
+
+(** [build_intcast v ty name b] creates a zext, bitcast, or trunc
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIntCast]. *)
+external build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                       = "llvm_build_intcast"
+
+(** [build_fpcast v ty name b] creates a fpext, bitcast, or fptrunc
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFPCast]. *)
+external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_fpcast"
+
 
 (** {7 Comparisons} *)
 
@@ -1645,6 +1903,40 @@ external build_insertelement : llvalue -> llvalue -> llvalue -> string ->
 external build_shufflevector : llvalue -> llvalue -> llvalue -> string ->
                                llbuilder -> llvalue = "llvm_build_shufflevector"
 
+(** [build_insertvalue agg idx name b] creates a
+    [%name = extractvalue %agg, %idx]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateExtractValue]. *)
+external build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue
+                            = "llvm_build_extractvalue"
+
+(** [build_insertvalue agg val idx name b] creates a
+    [%name = insertvalue %agg, %val, %idx]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateInsertValue]. *)
+external build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder ->
+                             llvalue = "llvm_build_insertvalue"
+
+(** [build_is_null val name b] creates a
+    [%name = icmp eq %val, null]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIsNull]. *)
+external build_is_null : llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_is_null"
+
+(** [build_is_not_null val name b] creates a
+    [%name = icmp ne %val, null]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIsNotNull]. *)
+external build_is_not_null : llvalue -> string -> llbuilder -> llvalue
+                           = "llvm_build_is_not_null"
+
+(** [build_ptrdiff lhs rhs name b] creates a series of instructions that measure
+    the difference between two pointer values at the position specified by the
+    instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreatePtrDiff]. *)
+external build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_ptrdiff"
 
 (** {6 Module providers} *)
 
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index c4eba13db0fd..8868d07ffc7a 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -92,6 +92,24 @@ static value alloc_variant(int tag, void *Value) {
   }
 
 
+/*===-- Contexts ----------------------------------------------------------===*/
+
+/* unit -> llcontext */
+CAMLprim LLVMContextRef llvm_create_context(value Unit) {
+  return LLVMContextCreate();
+}
+
+/* llcontext -> unit */
+CAMLprim value llvm_dispose_context(LLVMContextRef C) {
+  LLVMContextDispose(C);
+  return Val_unit;
+}
+
+/* unit -> llcontext */
+CAMLprim LLVMContextRef llvm_global_context(value Unit) {
+  return LLVMGetGlobalContext();
+}
+
 /*===-- Modules -----------------------------------------------------------===*/
 
 /* string -> llmodule */
@@ -153,18 +171,41 @@ CAMLprim value llvm_classify_type(LLVMTypeRef Ty) {
   return Val_int(LLVMGetTypeKind(Ty));
 }
 
+/* lltype -> llcontext */
+CAMLprim LLVMContextRef llvm_type_context(LLVMTypeRef Ty) {
+  return LLVMGetTypeContext(Ty);
+}
+
 /*--... Operations on integer types ........................................--*/
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_i1_type (value Unit) { return LLVMInt1Type();  }
-CAMLprim LLVMTypeRef llvm_i8_type (value Unit) { return LLVMInt8Type();  }
-CAMLprim LLVMTypeRef llvm_i16_type(value Unit) { return LLVMInt16Type(); }
-CAMLprim LLVMTypeRef llvm_i32_type(value Unit) { return LLVMInt32Type(); }
-CAMLprim LLVMTypeRef llvm_i64_type(value Unit) { return LLVMInt64Type(); }
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i1_type (LLVMContextRef Context) {
+  return LLVMInt1TypeInContext(Context);
+}
 
-/* int -> lltype */
-CAMLprim LLVMTypeRef llvm_integer_type(value Width) {
-  return LLVMIntType(Int_val(Width));
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i8_type (LLVMContextRef Context) {
+  return LLVMInt8TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i16_type (LLVMContextRef Context) {
+  return LLVMInt16TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i32_type (LLVMContextRef Context) {
+  return LLVMInt32TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i64_type (LLVMContextRef Context) {
+  return LLVMInt64TypeInContext(Context);
+}
+
+/* llcontext -> int -> lltype */
+CAMLprim LLVMTypeRef llvm_integer_type(LLVMContextRef Context, value Width) {
+  return LLVMIntTypeInContext(Context, Int_val(Width));
 }
 
 /* lltype -> int */
@@ -174,29 +215,29 @@ CAMLprim value llvm_integer_bitwidth(LLVMTypeRef IntegerTy) {
 
 /*--... Operations on real types ...........................................--*/
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_float_type(value Unit) {
-  return LLVMFloatType();
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_float_type(LLVMContextRef Context) {
+  return LLVMFloatTypeInContext(Context);
 }
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_double_type(value Unit) {
-  return LLVMDoubleType();
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_double_type(LLVMContextRef Context) {
+  return LLVMDoubleTypeInContext(Context);
 }
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_x86fp80_type(value Unit) {
-  return LLVMX86FP80Type();
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_x86fp80_type(LLVMContextRef Context) {
+  return LLVMX86FP80TypeInContext(Context);
 }
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_fp128_type(value Unit) {
-  return LLVMFP128Type();
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_fp128_type(LLVMContextRef Context) {
+  return LLVMFP128TypeInContext(Context);
 }
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_ppc_fp128_type(value Unit) {
-  return LLVMPPCFP128Type();
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_ppc_fp128_type(LLVMContextRef Context) {
+  return LLVMPPCFP128TypeInContext(Context);
 }
 
 /*--... Operations on function types .......................................--*/
@@ -228,16 +269,17 @@ CAMLprim value llvm_param_types(LLVMTypeRef FunTy) {
 
 /*--... Operations on struct types .........................................--*/
 
-/* lltype array -> lltype */
-CAMLprim LLVMTypeRef llvm_struct_type(value ElementTypes) {
-  return LLVMStructType((LLVMTypeRef *) ElementTypes,
-                        Wosize_val(ElementTypes), 0);
+/* llcontext -> lltype array -> lltype */
+CAMLprim LLVMTypeRef llvm_struct_type(LLVMContextRef C, value ElementTypes) {
+  return LLVMStructTypeInContext(C, (LLVMTypeRef *) ElementTypes,
+                                 Wosize_val(ElementTypes), 0);
 }
 
-/* lltype array -> lltype */
-CAMLprim LLVMTypeRef llvm_packed_struct_type(value ElementTypes) {
-  return LLVMStructType((LLVMTypeRef *) ElementTypes,
-                        Wosize_val(ElementTypes), 1);
+/* llcontext -> lltype array -> lltype */
+CAMLprim LLVMTypeRef llvm_packed_struct_type(LLVMContextRef C,
+                                             value ElementTypes) {
+  return LLVMStructTypeInContext(C, (LLVMTypeRef *) ElementTypes,
+                                 Wosize_val(ElementTypes), 1);
 }
 
 /* lltype -> lltype array */
@@ -292,13 +334,19 @@ CAMLprim value llvm_vector_size(LLVMTypeRef VectorTy) {
 
 /*--... Operations on other types ..........................................--*/
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_void_type (value Unit) { return LLVMVoidType();  }
-CAMLprim LLVMTypeRef llvm_label_type(value Unit) { return LLVMLabelType(); }
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_void_type (LLVMContextRef Context) {
+  return LLVMVoidTypeInContext(Context);
+}
 
-/* unit -> lltype */
-CAMLprim LLVMTypeRef llvm_opaque_type(value Unit) {
-  return LLVMOpaqueType();
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_label_type(LLVMContextRef Context) {
+  return LLVMLabelTypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_opaque_type(LLVMContextRef Context) {
+  return LLVMOpaqueTypeInContext(Context);
 }
 
 /*--... Operations on type handles .........................................--*/
@@ -388,21 +436,38 @@ CAMLprim LLVMValueRef llvm_const_of_int64(LLVMTypeRef IntTy, value N,
   return LLVMConstInt(IntTy, Int64_val(N), Bool_val(SExt));
 }
 
+/* lltype -> string -> int -> llvalue */
+CAMLprim LLVMValueRef llvm_const_int_of_string(LLVMTypeRef IntTy, value S,
+                                               value Radix) {
+  return LLVMConstIntOfStringAndSize(IntTy, String_val(S), caml_string_length(S),
+                                     Int_val(Radix));
+}
+
 /* lltype -> float -> llvalue */
 CAMLprim LLVMValueRef llvm_const_float(LLVMTypeRef RealTy, value N) {
   return LLVMConstReal(RealTy, Double_val(N));
 }
 
+/* lltype -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_const_float_of_string(LLVMTypeRef RealTy, value S) {
+  return LLVMConstRealOfStringAndSize(RealTy, String_val(S),
+                                      caml_string_length(S));
+}
+
 /*--... Operations on composite constants ..................................--*/
 
-/* string -> llvalue */
-CAMLprim LLVMValueRef llvm_const_string(value Str, value NullTerminate) {
-  return LLVMConstString(String_val(Str), string_length(Str), 1);
+/* llcontext -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_const_string(LLVMContextRef Context, value Str,
+                                        value NullTerminate) {
+  return LLVMConstStringInContext(Context, String_val(Str), string_length(Str),
+                                  1);
 }
 
-/* string -> llvalue */
-CAMLprim LLVMValueRef llvm_const_stringz(value Str, value NullTerminate) {
-  return LLVMConstString(String_val(Str), string_length(Str), 0);
+/* llcontext -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_const_stringz(LLVMContextRef Context, value Str,
+                                         value NullTerminate) {
+  return LLVMConstStringInContext(Context, String_val(Str), string_length(Str),
+                                  0);
 }
 
 /* lltype -> llvalue array -> llvalue */
@@ -412,16 +477,17 @@ CAMLprim LLVMValueRef llvm_const_array(LLVMTypeRef ElementTy,
                         Wosize_val(ElementVals));
 }
 
-/* llvalue array -> llvalue */
-CAMLprim LLVMValueRef llvm_const_struct(value ElementVals) {
-  return LLVMConstStruct((LLVMValueRef *) Op_val(ElementVals),
-                         Wosize_val(ElementVals), 0);
+/* llcontext -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_struct(LLVMContextRef C, value ElementVals) {
+  return LLVMConstStructInContext(C, (LLVMValueRef *) Op_val(ElementVals),
+                                  Wosize_val(ElementVals), 0);
 }
 
-/* llvalue array -> llvalue */
-CAMLprim LLVMValueRef llvm_const_packed_struct(value ElementVals) {
-  return LLVMConstStruct((LLVMValueRef *) Op_val(ElementVals),
-                         Wosize_val(ElementVals), 1);
+/* llcontext -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_packed_struct(LLVMContextRef C,
+                                               value ElementVals) {
+  return LLVMConstStructInContext(C, (LLVMValueRef *) Op_val(ElementVals),
+                                  Wosize_val(ElementVals), 1);
 }
 
 /* llvalue array -> llvalue */
@@ -452,6 +518,49 @@ CAMLprim LLVMValueRef llvm_const_gep(LLVMValueRef ConstantVal, value Indices) {
                       Wosize_val(Indices));
 }
 
+/* llvalue -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_in_bounds_gep(LLVMValueRef ConstantVal,
+                                               value Indices) {
+  return LLVMConstInBoundsGEP(ConstantVal, (LLVMValueRef*) Op_val(Indices),
+                              Wosize_val(Indices));
+}
+
+/* llvalue -> int array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_extractvalue(LLVMValueRef Aggregate,
+                                              value Indices) {
+  CAMLparam1(Indices);
+  int size = Wosize_val(Indices);
+  int i;
+  LLVMValueRef result;
+
+  unsigned* idxs = (unsigned*)malloc(size * sizeof(unsigned));
+  for (i = 0; i < size; i++) {
+    idxs[i] = Int_val(Field(Indices, i));
+  }
+
+  result = LLVMConstExtractValue(Aggregate, idxs, size);
+  free(idxs);
+  CAMLreturnT(LLVMValueRef, result);
+}
+
+/* llvalue -> llvalue -> int array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_insertvalue(LLVMValueRef Aggregate,
+                                             LLVMValueRef Val, value Indices) {
+  CAMLparam1(Indices);
+  int size = Wosize_val(Indices);
+  int i;
+  LLVMValueRef result;
+
+  unsigned* idxs = (unsigned*)malloc(size * sizeof(unsigned));
+  for (i = 0; i < size; i++) {
+    idxs[i] = Int_val(Field(Indices, i));
+  }
+
+  result = LLVMConstInsertValue(Aggregate, Val, idxs, size);
+  free(idxs);
+  CAMLreturnT(LLVMValueRef, result);
+}
+
 /*--... Operations on global variables, functions, and aliases (globals) ...--*/
 
 /* llvalue -> bool */
@@ -616,7 +725,7 @@ CAMLprim value llvm_lookup_function(value Name, LLVMModuleRef M) {
 CAMLprim LLVMValueRef llvm_define_function(value Name, LLVMTypeRef Ty,
                                            LLVMModuleRef M) {
   LLVMValueRef Fn = LLVMAddFunction(M, String_val(Name), Ty);
-  LLVMAppendBasicBlock(Fn, "entry");
+  LLVMAppendBasicBlockInContext(LLVMGetTypeContext(Ty), Fn, "entry");
   return Fn;
 }
 
@@ -729,13 +838,15 @@ CAMLprim value llvm_delete_block(LLVMBasicBlockRef BB) {
 }
 
 /* string -> llvalue -> llbasicblock */
-CAMLprim LLVMBasicBlockRef llvm_append_block(value Name, LLVMValueRef Fn) {
-  return LLVMAppendBasicBlock(Fn, String_val(Name));
+CAMLprim LLVMBasicBlockRef llvm_append_block(LLVMContextRef Context, value Name,
+                                             LLVMValueRef Fn) {
+  return LLVMAppendBasicBlockInContext(Context, Fn, String_val(Name));
 }
 
 /* string -> llbasicblock -> llbasicblock */
-CAMLprim LLVMBasicBlockRef llvm_insert_block(value Name, LLVMBasicBlockRef BB) {
-  return LLVMInsertBasicBlock(BB, String_val(Name));
+CAMLprim LLVMBasicBlockRef llvm_insert_block(LLVMContextRef Context, value Name,
+                                             LLVMBasicBlockRef BB) {
+  return LLVMInsertBasicBlockInContext(Context, BB, String_val(Name));
 }
 
 /* llvalue -> bool */
@@ -849,9 +960,9 @@ static value alloc_builder(LLVMBuilderRef B) {
   return V;
 }
 
-/* unit-> llbuilder */
-CAMLprim value llvm_builder(value Unit) {
-  return alloc_builder(LLVMCreateBuilder());
+/* llcontext -> llbuilder */
+CAMLprim value llvm_builder(LLVMContextRef C) {
+  return alloc_builder(LLVMCreateBuilderInContext(C));
 }
 
 /* (llbasicblock, llvalue) llpos -> llbuilder -> unit */
@@ -874,6 +985,13 @@ CAMLprim LLVMBasicBlockRef llvm_insertion_block(LLVMBuilderRef B) {
   return InsertBlock;
 }
 
+/* llvalue -> string -> llbuilder -> unit */
+CAMLprim value llvm_insert_into_builder(LLVMValueRef I, value Name,
+                                               LLVMBuilderRef B) {
+  LLVMInsertIntoBuilderWithName(B, I, String_val(Name));
+  return Val_unit;
+}
+
 /*--... Terminators ........................................................--*/
 
 /* llbuilder -> llvalue */
@@ -886,6 +1004,12 @@ CAMLprim LLVMValueRef llvm_build_ret(LLVMValueRef Val, value B) {
   return LLVMBuildRet(Builder_val(B), Val);
 }
 
+/* llvalue array -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_aggregate_ret(value RetVals, value B) {
+  return LLVMBuildAggregateRet(Builder_val(B), (LLVMValueRef *) Op_val(RetVals),
+                               Wosize_val(RetVals));
+}
+
 /* llbasicblock -> llbuilder -> llvalue */
 CAMLprim LLVMValueRef llvm_build_br(LLVMBasicBlockRef BB, value B) {
   return LLVMBuildBr(Builder_val(B), BB);
@@ -952,18 +1076,42 @@ CAMLprim LLVMValueRef llvm_build_add(LLVMValueRef LHS, LLVMValueRef RHS,
 }
 
 /* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nsw_add(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNSWAdd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fadd(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFAdd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
 CAMLprim LLVMValueRef llvm_build_sub(LLVMValueRef LHS, LLVMValueRef RHS,
                                      value Name, value B) {
   return LLVMBuildSub(Builder_val(B), LHS, RHS, String_val(Name));
 }
 
 /* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fsub(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFSub(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
 CAMLprim LLVMValueRef llvm_build_mul(LLVMValueRef LHS, LLVMValueRef RHS,
                                      value Name, value B) {
   return LLVMBuildMul(Builder_val(B), LHS, RHS, String_val(Name));
 }
 
 /* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fmul(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFMul(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
 CAMLprim LLVMValueRef llvm_build_udiv(LLVMValueRef LHS, LLVMValueRef RHS,
                                       value Name, value B) {
   return LLVMBuildUDiv(Builder_val(B), LHS, RHS, String_val(Name));
@@ -976,6 +1124,12 @@ CAMLprim LLVMValueRef llvm_build_sdiv(LLVMValueRef LHS, LLVMValueRef RHS,
 }
 
 /* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_exact_sdiv(LLVMValueRef LHS, LLVMValueRef RHS,
+                                            value Name, value B) {
+  return LLVMBuildExactSDiv(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
 CAMLprim LLVMValueRef llvm_build_fdiv(LLVMValueRef LHS, LLVMValueRef RHS,
                                       value Name, value B) {
   return LLVMBuildFDiv(Builder_val(B), LHS, RHS, String_val(Name));
@@ -1098,6 +1252,37 @@ CAMLprim LLVMValueRef llvm_build_gep(LLVMValueRef Pointer, value Indices,
                       String_val(Name));
 }
 
+/* llvalue -> llvalue array -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_in_bounds_gep(LLVMValueRef Pointer,
+                                               value Indices, value Name,
+                                               value B) {
+  return LLVMBuildInBoundsGEP(Builder_val(B), Pointer,
+                              (LLVMValueRef *) Op_val(Indices),
+                              Wosize_val(Indices), String_val(Name));
+}
+
+/* llvalue -> int -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_struct_gep(LLVMValueRef Pointer,
+                                               value Indices, value Name,
+                                               value B) {
+  return LLVMBuildInBoundsGEP(Builder_val(B), Pointer,
+                              (LLVMValueRef *) Op_val(Indices),
+                              Wosize_val(Indices), String_val(Name));
+}
+
+/* string -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_global_string(value Str, value Name, value B) {
+  return LLVMBuildGlobalString(Builder_val(B), String_val(Str),
+                               String_val(Name));
+}
+
+/* string -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_global_stringptr(value Str, value Name,
+                                                  value B) {
+  return LLVMBuildGlobalStringPtr(Builder_val(B), String_val(Str),
+                                  String_val(Name));
+}
+
 /*--... Casts ..............................................................--*/
 
 /* llvalue -> lltype -> string -> llbuilder -> llvalue */
@@ -1172,6 +1357,43 @@ CAMLprim LLVMValueRef llvm_build_bitcast(LLVMValueRef X, LLVMTypeRef Ty,
   return LLVMBuildBitCast(Builder_val(B), X, Ty, String_val(Name));
 }
 
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_zext_or_bitcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                                 value Name, value B) {
+  return LLVMBuildZExtOrBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_sext_or_bitcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                                 value Name, value B) {
+  return LLVMBuildSExtOrBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_trunc_or_bitcast(LLVMValueRef X,
+                                                  LLVMTypeRef Ty, value Name,
+                                                  value B) {
+  return LLVMBuildTruncOrBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_pointercast(LLVMValueRef X, LLVMTypeRef Ty,
+                                             value Name, value B) {
+  return LLVMBuildPointerCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_intcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                         value Name, value B) {
+  return LLVMBuildIntCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fpcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildFPCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
 /*--... Comparisons ........................................................--*/
 
 /* Icmp.t -> llvalue -> llvalue -> string -> llbuilder -> llvalue */
@@ -1256,6 +1478,38 @@ CAMLprim LLVMValueRef llvm_build_shufflevector(LLVMValueRef V1, LLVMValueRef V2,
   return LLVMBuildShuffleVector(Builder_val(B), V1, V2, Mask, String_val(Name));
 }
 
+/* llvalue -> int -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_extractvalue(LLVMValueRef Aggregate,
+                                              value Idx, value Name, value B) {
+  return LLVMBuildExtractValue(Builder_val(B), Aggregate, Int_val(Idx),
+                               String_val(Name));
+}
+
+/* llvalue -> llvalue -> int -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_insertvalue(LLVMValueRef Aggregate,
+                                             LLVMValueRef Val, value Idx,
+                                             value Name, value B) {
+  return LLVMBuildInsertValue(Builder_val(B), Aggregate, Val, Int_val(Idx),
+                              String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_is_null(LLVMValueRef Val, value Name,
+                                         value B) {
+  return LLVMBuildIsNull(Builder_val(B), Val, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_is_not_null(LLVMValueRef Val, value Name,
+                                             value B) {
+  return LLVMBuildIsNotNull(Builder_val(B), Val, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_ptrdiff(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildPtrDiff(Builder_val(B), LHS, RHS, String_val(Name));
+}
 
 /*===-- Module Providers --------------------------------------------------===*/
 
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 731071ef85b2..320335cf0fe0 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -1,7 +1,28 @@
+include(CheckIncludeFile)
+include(CheckLibraryExists)
+include(CheckSymbolExists)
+include(CheckFunctionExists)
+include(CheckCXXSourceCompiles)
+
+# Helper macros and functions
+macro(add_cxx_include result files)
+  set(${result} "")
+  foreach (file_name ${files})
+     set(${result} "${${result}}#include<${file_name}>\n")
+  endforeach()
+endmacro(add_cxx_include files result)
+
+function(check_type_exists type files variable)
+  add_cxx_include(includes "${files}")
+  CHECK_CXX_SOURCE_COMPILES("
+    ${includes} ${type} typeVar;
+    int main() {
+        return 0;
+    }
+    " ${variable})
+endfunction()
 
 # include checks
-include(CheckIncludeFile)
-check_include_file(alloca.h HAVE_ALLOCA_H)
 check_include_file(argz.h HAVE_ARGZ_H)
 check_include_file(assert.h HAVE_ASSERT_H)
 check_include_file(dirent.h HAVE_DIRENT_H)
@@ -42,22 +63,19 @@ check_include_file(utime.h HAVE_UTIME_H)
 check_include_file(windows.h HAVE_WINDOWS_H)
 
 # library checks
-include(CheckLibraryExists)
 check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
 check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC)
 check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT)
 check_library_exists(dl dlopen "" HAVE_LIBDL)
 
 # function checks
-include(CheckSymbolExists)
-include(CheckFunctionExists)
-check_symbol_exists(alloca alloca.h HAVE_ALLOCA)
 check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
 check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE)
 check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT)
 check_function_exists(isatty HAVE_ISATTY)
 check_symbol_exists(isinf cmath HAVE_ISINF_IN_CMATH)
 check_symbol_exists(isinf math.h HAVE_ISINF_IN_MATH_H)
+check_symbol_exists(finite ieeefp.h HAVE_FINITE_IN_IEEEFP_H)
 check_symbol_exists(isnan cmath HAVE_ISNAN_IN_CMATH)
 check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
 check_symbol_exists(ceilf math.h HAVE_CEILF)
@@ -65,32 +83,70 @@ check_symbol_exists(floorf math.h HAVE_FLOORF)
 check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
 check_symbol_exists(malloc_zone_statistics malloc/malloc.h
                     HAVE_MALLOC_ZONE_STATISTICS)
+check_symbol_exists(mkdtemp unistd.h HAVE_MKDTEMP)
+check_symbol_exists(mkstemp unistd.h HAVE_MKSTEMP)
+check_symbol_exists(mktemp unistd.h HAVE_MKTEMP)
 check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
+check_symbol_exists(sbrk unistd.h HAVE_SBRK)
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
 check_symbol_exists(strerror string.h HAVE_STRERROR)
 check_symbol_exists(strerror_r string.h HAVE_STRERROR_R)
 check_symbol_exists(strerror_s string.h HAVE_STRERROR_S)
+check_symbol_exists(setenv stdlib.h HAVE_SETENV)
 
 check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
 if( LLVM_USING_GLIBC )
   add_llvm_definitions( -D_GNU_SOURCE )
 endif()
 
+# Type checks
+check_type_exists(std::bidirectional_iterator<int,int> "iterator;iostream" HAVE_BI_ITERATOR)
+check_type_exists(std::iterator<int,int,int> iterator HAVE_STD_ITERATOR)
+check_type_exists(std::forward_iterator<int,int> iterator HAVE_FWD_ITERATOR)
+
+set(headers "")
+if (HAVE_SYS_TYPES_H)
+  set(headers ${headers} "sys/types.h")
+endif()
+
+if (HAVE_INTTYPES_H)
+  set(headers ${headers} "inttypes.h")
+endif()
+
+if (HAVE_STDINT_H)
+  set(headers ${headers} "stdint.h")
+endif()
+
+check_type_exists(uint64_t "${headers}" HAVE_UINT64_T)
+check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T)
+
+# Define LLVM_MULTITHREADED if gcc atomic builtins exists.
+include(CheckAtomic)
+
 include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG)
+# On windows all code is position-independent and mingw warns if -fPIC
+# is in the command-line.
+if( NOT WIN32 )
+  check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG)
+endif()
 
 include(GetTargetTriple)
 get_target_triple(LLVM_HOSTTRIPLE)
 message(STATUS "LLVM_HOSTTRIPLE: ${LLVM_HOSTTRIPLE}")
 
 # Determine the native architecture.
-# FIXME: this will have to change for cross-compiling.
-string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE})
+string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH)
+if( LLVM_NATIVE_ARCH STREQUAL "host" )
+  string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE})
+endif ()
+
 if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86")
   set(LLVM_NATIVE_ARCH X86)
-elseif (LLVM_NATIVE_ARCH STREQUAL amd64)
+elseif (LLVM_NATIVE_ARCH STREQUAL "x86")
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH STREQUAL "amd64")
   set(LLVM_NATIVE_ARCH X86)
-elseif (LLVM_NATIVE_ARCH STREQUAL x86_64)
+elseif (LLVM_NATIVE_ARCH STREQUAL "x86_64")
   set(LLVM_NATIVE_ARCH X86)
 elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
   set(LLVM_NATIVE_ARCH Sparc)
@@ -98,8 +154,6 @@ elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
   set(LLVM_NATIVE_ARCH PowerPC)
 elseif (LLVM_NATIVE_ARCH MATCHES "alpha")
   set(LLVM_NATIVE_ARCH Alpha)
-elseif (LLVM_NATIVE_ARCH MATCHES "ia64")
-  set(LLVM_NATIVE_ARCH IA64)
 elseif (LLVM_NATIVE_ARCH MATCHES "arm")
   set(LLVM_NATIVE_ARCH ARM)
 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
@@ -175,11 +229,6 @@ configure_file(
   )
 
 configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT/iterator.cmake
-  ${LLVM_BINARY_DIR}/include/llvm/ADT/iterator.h
-  )
-
-configure_file(
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
   ${LLVM_BINARY_DIR}/include/llvm/Support/DataTypes.h
   )
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 660bd70003ee..205ddb766399 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -12,6 +12,13 @@ macro(add_llvm_library name)
   install(TARGETS ${name}
     LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
     ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+  # The LLVM Target library shall be built before its sublibraries
+  # (asmprinter, etc) because those may use tablegenned files which
+  # generation is triggered by the main LLVM target library. Necessary
+  # for parallel builds:
+  if( CURRENT_LLVM_TARGET )
+    add_dependencies(${name} ${CURRENT_LLVM_TARGET})
+  endif()
 endmacro(add_llvm_library name)
 
 
@@ -26,11 +33,13 @@ macro(add_llvm_executable name)
   if( LLVM_LINK_COMPONENTS )
     llvm_config(${name} ${LLVM_LINK_COMPONENTS})
   endif( LLVM_LINK_COMPONENTS )
-  target_link_libraries(${name} ${llvm_libs})
   get_system_libs(llvm_system_libs)
   if( llvm_system_libs )
     target_link_libraries(${name} ${llvm_system_libs})
   endif()
+  if( LLVM_COMMON_DEPENDS )
+    add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
+  endif( LLVM_COMMON_DEPENDS )
 endmacro(add_llvm_executable name)
 
 
@@ -61,4 +70,5 @@ macro(add_llvm_target target_name)
   if ( TABLEGEN_OUTPUT )
     add_dependencies(LLVM${target_name} ${target_name}Table_gen)
   endif (TABLEGEN_OUTPUT)
+  set(CURRENT_LLVM_TARGET LLVM${target_name} PARENT_SCOPE)
 endmacro(add_llvm_target)
diff --git a/cmake/modules/CheckAtomic.cmake b/cmake/modules/CheckAtomic.cmake
new file mode 100644
index 000000000000..27bbaba6998c
--- /dev/null
+++ b/cmake/modules/CheckAtomic.cmake
@@ -0,0 +1,18 @@
+# atomic builtins are required for threading support.
+
+INCLUDE(CheckCXXSourceCompiles)
+	
+CHECK_CXX_SOURCE_COMPILES("
+int main() {
+        volatile unsigned long val = 1;
+        __sync_synchronize();
+        __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
+        return 0;
+      }
+" LLVM_MULTITHREADED)
+
+if( NOT LLVM_MULTITHREADED )
+  message(STATUS "Warning: LLVM will be built thread-unsafe because atomic builtins are missing")
+endif()
diff --git a/cmake/modules/GetTargetTriple.cmake b/cmake/modules/GetTargetTriple.cmake
index c915a9a6ae60..87262add59d3 100644
--- a/cmake/modules/GetTargetTriple.cmake
+++ b/cmake/modules/GetTargetTriple.cmake
@@ -3,7 +3,13 @@
 
 function( get_target_triple var )
   if( MSVC )
-    set( ${var} "i686-pc-win32" PARENT_SCOPE )
+    if( CMAKE_CL_64 )
+      set( ${var} "x86_64-pc-win32" PARENT_SCOPE )
+    else()
+      set( ${var} "i686-pc-win32" PARENT_SCOPE )
+    endif()
+  elseif( MINGW AND NOT MSYS )
+    set( ${var} "i686-pc-mingw32" PARENT_SCOPE )
   else( MSVC )
     set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess)
     execute_process(COMMAND sh ${config_guess}
diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVMConfig.cmake
index 5fa08a39d6a8..d1c297c15861 100755
--- a/cmake/modules/LLVMConfig.cmake
+++ b/cmake/modules/LLVMConfig.cmake
@@ -1,6 +1,3 @@
-include(FindPerl)
-
-
 function(get_system_libs return_var)
   # Returns in `return_var' a list of system libraries used by LLVM.
   if( NOT MSVC )
@@ -27,43 +24,15 @@ endmacro(llvm_config)
 function(explicit_llvm_config executable)
   set( link_components ${ARGN} )
 
-  set(lfgs)
-  if (MSVC)
-    if( CMAKE_CL_64 )
-      set(include_lflag "/INCLUDE:")
-    else( CMAKE_CL_64 )
-      set(include_lflag "/INCLUDE:_")
-    endif()
-    foreach(c ${link_components})
-      if( c STREQUAL "jit" )
-        set(lfgs "${lfgs} ${include_lflag}X86TargetMachineModule")
-      endif( c STREQUAL "jit" )
-      list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
-      if( NOT idx LESS 0 )
-        set(lfgs "${lfgs} ${include_lflag}${c}TargetMachineModule")
-        list(FIND LLVM_ASMPRINTERS_FORCE_LINK ${c} idx)
-        if( NOT idx LESS 0 )
-	  set(lfgs "${lfgs} ${include_lflag}${c}AsmPrinterForceLink")
-        endif()
-      endif()
-    endforeach(c)
-  endif ()
-
   explicit_map_components_to_libraries(LIBRARIES ${link_components})
   target_link_libraries(${executable} ${LIBRARIES})
-
-  if( lfgs )
-    set_target_properties(${executable}
-      PROPERTIES
-      LINK_FLAGS ${lfgs})
-  endif()
 endfunction(explicit_llvm_config)
 
 
 function(explicit_map_components_to_libraries out_libs)
   set( link_components ${ARGN} )
   foreach(c ${link_components})
-    # add codegen/asmprinter
+    # add codegen, asmprinter, asmparser
     list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
     if( NOT idx LESS 0 )
       list(FIND llvm_libs "LLVM${c}CodeGen" idx)
@@ -81,12 +50,18 @@ function(explicit_map_components_to_libraries out_libs)
       if( NOT asmidx LESS 0 )
         list(APPEND expanded_components "LLVM${c}AsmPrinter")
       endif()
+      list(FIND llvm_libs "LLVM${c}AsmParser" asmidx)
+      if( NOT asmidx LESS 0 )
+        list(APPEND expanded_components "LLVM${c}AsmParser")
+      endif()
+      list(FIND llvm_libs "LLVM${c}Info" asmidx)
+      if( NOT asmidx LESS 0 )
+        list(APPEND expanded_components "LLVM${c}Info")
+      endif()
     elseif( c STREQUAL "native" )
-      # TODO: we assume ARCH is X86. In this case, we must use nativecodegen
-      # component instead. Do nothing, as in llvm-config script.
+      list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen")
     elseif( c STREQUAL "nativecodegen" )
-      # TODO: we assume ARCH is X86.
-      list(APPEND expanded_components "LLVMX86CodeGen")
+      list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen")
     elseif( c STREQUAL "backend" )
       # same case as in `native'.
     elseif( c STREQUAL "engine" )
@@ -101,94 +76,50 @@ function(explicit_map_components_to_libraries out_libs)
   # We must match capitalization.
   string(TOUPPER "${llvm_libs}" capitalized_libs)
   list(REMOVE_DUPLICATES expanded_components)
-  set(curr_idx 0)
   list(LENGTH expanded_components lst_size)
-  while( ${curr_idx} LESS ${lst_size} )
-    list(GET expanded_components ${curr_idx} c)
+  set(result "")
+  while( 0 LESS ${lst_size} )
+    list(GET expanded_components 0 c)
     string(TOUPPER "${c}" capitalized)
     list(FIND capitalized_libs ${capitalized} idx)
     if( idx LESS 0 )
       message(FATAL_ERROR "Library ${c} not found in list of llvm libraries.")
     endif( idx LESS 0 )
     list(GET llvm_libs ${idx} canonical_lib)
+    list(REMOVE_ITEM result ${canonical_lib})
     list(APPEND result ${canonical_lib})
-    list(APPEND result ${MSVC_LIB_DEPS_${canonical_lib}})
+    foreach(c ${MSVC_LIB_DEPS_${canonical_lib}})
+      list(REMOVE_ITEM expanded_components ${c})
+    endforeach()
     list(APPEND expanded_components ${MSVC_LIB_DEPS_${canonical_lib}})
-    list(REMOVE_DUPLICATES expanded_components)
+    list(REMOVE_AT expanded_components 0)
     list(LENGTH expanded_components lst_size)
-    math(EXPR curr_idx "${curr_idx} + 1")
-  endwhile( ${curr_idx} LESS ${lst_size} )
-  list(REMOVE_DUPLICATES result)
+  endwhile( 0 LESS ${lst_size} )
   set(${out_libs} ${result} PARENT_SCOPE)
 endfunction(explicit_map_components_to_libraries)
 
-# This data is used to establish executable/library
-# dependencies.  Comes from the llvm-config script, which is built and
-# installed on the bin directory for MinGW or Linux. At the end of the
-# script, you'll see lines like this:
+
+# The library dependency data is contained in the file
+# LLVMLibDeps.cmake on this directory. It is automatically generated
+# by tools/llvm-config/CMakeLists.txt when the build comprises all the
+# targets and we are on a environment Posix enough to build the
+# llvm-config script. This, in practice, just excludes MSVC.
+
+# When you remove or rename a library from the build, be sure to
+# remove its file from lib/ as well, or the GenLibDeps.pl script will
+# include it on its analysis!
+
+# The format generated by GenLibDeps.pl
 
 # LLVMARMAsmPrinter.o: LLVMARMCodeGen.o libLLVMAsmPrinter.a libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMTarget.a
 
-# This is translated to:
+# is translated to:
 
 # set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
 
-# It is necessary to remove the `lib' prefix, the `.a' and `.o'
-# suffixes.  Watch out for this line:
-
-# LLVMExecutionEngine.o LLVMJIT.o: libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMSystem.a libLLVMTarget.a
-
-# See how there are two elements before the colon. This must be
-# translated as if it were:
-
-# LLVMExecutionEngine.o: libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMSystem.a libLLVMTarget.a
-# LLVMJIT.o: libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMSystem.a libLLVMTarget.a
-
-# TODO: do this transformations on cmake.
-
-# It is very important that the LLVM built for extracting this data
-# must contain all targets, not just X86.
-
-
-set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
-set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMIA64 LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMInterpreter LLVMExecutionEngine LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
-set(MSVC_LIB_DEPS_LLVMMips LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMPowerPCCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMSparcAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMX86CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMXCore LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMDebugger LLVMAnalysis LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMHello LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMCore LLVMScalarOpts LLVMSupport LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMSystem )
-set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMipa)
-set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
+# It is necessary to remove the `lib' prefix and the `.a'.
+
+# This 'sed' script should do the trick:
+# sed -e s'#\.a##g' -e 's#libLLVM#LLVM#g' -e 's#: # #' -e 's#\(.*\)#set(MSVC_LIB_DEPS_\1)#' ~/llvm/tools/llvm-config/LibDeps.txt
+
+include(LLVMLibDeps)
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
new file mode 100644
index 000000000000..fba999ee7729
--- /dev/null
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -0,0 +1,68 @@
+set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC)
+set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMDebugger LLVMAnalysis LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMHello LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMSystem )
+set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMipa)
+set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMX86CodeGen LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86Info LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index 0a96b5514db6..f6da1b83cddb 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -3,20 +3,15 @@
 # Adds the name of the generated file to TABLEGEN_OUTPUT.
 
 macro(tablegen ofn)
-  file(GLOB all_tds "*.td")
+  file(GLOB local_tds "*.td")
+  file(GLOB_RECURSE global_tds "${LLVM_MAIN_SRC_DIR}/include/llvm/*.td")
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
     COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
     -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS} 
-    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    DEPENDS tblgen ${all_tds}
-    COMMENT "Building ${ofn}.tmp..."
-    )
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different 
-    ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
-    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+    DEPENDS tblgen ${local_tds} ${global_tds}
     COMMENT "Building ${ofn}..."
     )
   set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn})
diff --git a/configure b/configure
index a38067bc2d0e..102d3f033a38 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for llvm 2.6svn.
+# Generated by GNU Autoconf 2.60 for llvm 2.7svn.
 #
 # Report bugs to <llvmbugs@cs.uiuc.edu>.
 #
@@ -9,7 +9,7 @@
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign.
+# Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
 ## --------------------- ##
 ## M4sh Initialization.  ##
 ## --------------------- ##
@@ -715,8 +715,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='llvm'
 PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='2.6svn'
-PACKAGE_STRING='llvm 2.6svn'
+PACKAGE_VERSION='2.7svn'
+PACKAGE_STRING='llvm 2.7svn'
 PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
 
 ac_unique_file="lib/VMCore/Module.cpp"
@@ -808,6 +808,8 @@ target_cpu
 target_vendor
 target_os
 OS
+HOST_OS
+TARGET_OS
 LINKALL
 NOLINKALL
 LLVM_ON_UNIX
@@ -835,6 +837,7 @@ DISABLE_ASSERTIONS
 ENABLE_EXPENSIVE_CHECKS
 EXPENSIVE_CHECKS
 DEBUG_RUNTIME
+DEBUG_SYMBOLS
 JIT
 TARGET_HAS_JIT
 ENABLE_DOXYGEN
@@ -843,9 +846,12 @@ ENABLE_PIC
 TARGETS_TO_BUILD
 LLVM_ENUM_TARGETS
 LLVM_ENUM_ASM_PRINTERS
+LLVM_ENUM_ASM_PARSERS
 ENABLE_CBE_PRINTF_A
 EXTRA_OPTIONS
 BINUTILS_INCDIR
+ENABLE_LLVMC_DYNAMIC
+ENABLE_LLVMC_DYNAMIC_PLUGINS
 CXX
 CXXFLAGS
 ac_ct_CXX
@@ -865,6 +871,10 @@ TAR
 BINPWD
 GRAPHVIZ
 DOT
+FDP
+NEATO
+TWOPI
+CIRCO
 GV
 DOTTY
 PERL
@@ -904,6 +914,7 @@ LLVMGXXCOMMAND
 LLVMGCC
 LLVMGXX
 USE_UDIS86
+USE_OPROFILE
 HAVE_PTHREAD
 HUGE_VAL_SANITY
 ALLOCA
@@ -1462,7 +1473,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures llvm 2.6svn to adapt to many kinds of systems.
+\`configure' configures llvm 2.7svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1528,7 +1539,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of llvm 2.6svn:";;
+     short | recursive ) echo "Configuration of llvm 2.7svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1544,18 +1555,27 @@ Optional Features:
                           is NO)
   --enable-debug-runtime  Build runtime libs with debug symbols (default is
                           NO)
+  --enable-debug-symbols  Build compiler with debug symbols (default is NO if
+                          optimization is on and YES if it's off)
   --enable-jit            Enable Just In Time Compiling (default is YES)
   --enable-doxygen        Build doxygen documentation (default is NO)
   --enable-threads        Use threads if available (default is YES)
   --enable-pic            Build LLVM with Position Independent Code (default
                           is YES)
-  --enable-targets        Build specific host targets:
-                          all,host-only,{target-name} (default=all)
+  --enable-targets        Build specific host targets: all or
+                          target1,target2,... Valid targets are: host, x86,
+                          x86_64, sparc, powerpc, alpha, arm, mips, spu,
+                          pic16, xcore, msp430, systemz, blackfin, cbe, msil,
+                          and cpp (default=all)
   --enable-cbe-printf-a   Enable C Backend output with hex floating point via
                           %a (default is YES)
   --enable-bindings       Build specific language bindings:
                           all,auto,none,{binding-name} (default=auto)
   --enable-libffi         Check for the presence of libffi (default is YES)
+  --enable-llvmc-dynamic  Link LLVMC dynamically (default is NO, unless on
+                          Win32)
+  --enable-llvmc-dynamic-plugins
+                          Enable dynamic LLVMC plugins (default is YES)
   --enable-ltdl-install   install libltdl
   --enable-shared[=PKGS]  build shared libraries
                           [default=yes]
@@ -1586,6 +1606,8 @@ Optional Packages:
                           both]
   --with-tags[=TAGS]      include additional configurations [automatic]
   --with-udis86=<path>    Use udis86 external x86 disassembler library
+  --with-oprofile=<prefix>
+                          Tell OProfile >= 0.9.4 how to symbolize JIT output
 
 Some influential environment variables:
   CC          C compiler command
@@ -1665,7 +1687,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-llvm configure 2.6svn
+llvm configure 2.7svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1673,7 +1695,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
 _ACEOF
   exit
 fi
@@ -1681,7 +1703,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by llvm $as_me 2.6svn, which was
+It was created by llvm $as_me 2.7svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -2035,7 +2057,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-LLVM_COPYRIGHT="Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign."
+LLVM_COPYRIGHT="Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."
 
 
 
@@ -2316,6 +2338,11 @@ else
     llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
     llvm_cv_os_type="SunOS"
     llvm_cv_platform_type="Unix" ;;
+  *-*-auroraux*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="AuroraUX"
+    llvm_cv_platform_type="Unix" ;;
   *-*-win32*)
     llvm_cv_link_all_option="-Wl,--whole-archive"
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
@@ -2326,6 +2353,21 @@ else
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="MingW"
     llvm_cv_platform_type="Win32" ;;
+  *-*-haiku*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Haiku"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-eabi*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-elf*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
   *)
     llvm_cv_link_all_option=""
     llvm_cv_no_link_all_option=""
@@ -2336,6 +2378,53 @@ fi
 { echo "$as_me:$LINENO: result: $llvm_cv_os_type" >&5
 echo "${ECHO_T}$llvm_cv_os_type" >&6; }
 
+{ echo "$as_me:$LINENO: checking type of operating system we're going to target" >&5
+echo $ECHO_N "checking type of operating system we're going to target... $ECHO_C" >&6; }
+if test "${llvm_cv_target_os_type+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $target in
+  *-*-aix*)
+    llvm_cv_target_os_type="AIX" ;;
+  *-*-irix*)
+    llvm_cv_target_os_type="IRIX" ;;
+  *-*-cygwin*)
+    llvm_cv_target_os_type="Cygwin" ;;
+  *-*-darwin*)
+    llvm_cv_target_os_type="Darwin" ;;
+  *-*-freebsd*)
+    llvm_cv_target_os_type="FreeBSD" ;;
+  *-*-openbsd*)
+    llvm_cv_target_os_type="OpenBSD" ;;
+  *-*-netbsd*)
+    llvm_cv_target_os_type="NetBSD" ;;
+  *-*-dragonfly*)
+    llvm_cv_target_os_type="DragonFly" ;;
+  *-*-hpux*)
+    llvm_cv_target_os_type="HP-UX" ;;
+  *-*-interix*)
+    llvm_cv_target_os_type="Interix" ;;
+  *-*-linux*)
+    llvm_cv_target_os_type="Linux" ;;
+  *-*-solaris*)
+    llvm_cv_target_os_type="SunOS" ;;
+  *-*-auroraux*)
+    llvm_cv_target_os_type="AuroraUX" ;;
+  *-*-win32*)
+    llvm_cv_target_os_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_target_os_type="MingW" ;;
+  *-*-haiku*)
+    llvm_cv_target_os_type="Haiku" ;;
+  *-unknown-eabi*)
+    llvm_cv_target_os_type="Freestanding" ;;
+  *)
+    llvm_cv_target_os_type="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_target_os_type" >&5
+echo "${ECHO_T}$llvm_cv_target_os_type" >&6; }
+
 if test "$llvm_cv_os_type" = "Unknown" ; then
   { { echo "$as_me:$LINENO: error: Operating system is unknown, configure can't continue" >&5
 echo "$as_me: error: Operating system is unknown, configure can't continue" >&2;}
@@ -2344,6 +2433,10 @@ fi
 
 OS=$llvm_cv_os_type
 
+HOST_OS=$llvm_cv_os_type
+
+TARGET_OS=$llvm_cv_target_os_type
+
 
 LINKALL=$llvm_cv_link_all_option
 
@@ -2386,12 +2479,13 @@ else
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   alpha*-*)               llvm_cv_target_arch="Alpha" ;;
-  ia64-*)                 llvm_cv_target_arch="IA64" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   mips-*)                 llvm_cv_target_arch="Mips" ;;
   pic16-*)                llvm_cv_target_arch="PIC16" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
+  bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac
 fi
@@ -4805,6 +4899,21 @@ else
 
 fi
 
+# Check whether --enable-debug-symbols was given.
+if test "${enable_debug_symbols+set}" = set; then
+  enableval=$enable_debug_symbols;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  DEBUG_SYMBOLS=
+
+else
+  DEBUG_SYMBOLS=DEBUG_SYMBOLS=1
+
+fi
+
 # Check whether --enable-jit was given.
 if test "${enable_jit+set}" = set; then
   enableval=$enable_jit;
@@ -4818,29 +4927,31 @@ then
 
 else
   case "$llvm_cv_target_arch" in
-    x86)     TARGET_HAS_JIT=1
+    x86)         TARGET_HAS_JIT=1
  ;;
-    Sparc)   TARGET_HAS_JIT=0
+    Sparc)       TARGET_HAS_JIT=0
  ;;
-    PowerPC) TARGET_HAS_JIT=1
+    PowerPC)     TARGET_HAS_JIT=1
  ;;
-    x86_64)  TARGET_HAS_JIT=1
+    x86_64)      TARGET_HAS_JIT=1
  ;;
-    Alpha)   TARGET_HAS_JIT=1
+    Alpha)       TARGET_HAS_JIT=1
  ;;
-    IA64)    TARGET_HAS_JIT=0
+    ARM)         TARGET_HAS_JIT=1
  ;;
-    ARM)     TARGET_HAS_JIT=0
+    Mips)        TARGET_HAS_JIT=0
  ;;
-    Mips)    TARGET_HAS_JIT=0
+    PIC16)       TARGET_HAS_JIT=0
  ;;
-    PIC16)   TARGET_HAS_JIT=0
+    XCore)       TARGET_HAS_JIT=0
  ;;
-    XCore)   TARGET_HAS_JIT=0
+    MSP430)      TARGET_HAS_JIT=0
  ;;
-    MSP430)  TARGET_HAS_JIT=0
+    SystemZ)     TARGET_HAS_JIT=0
  ;;
-    *)       TARGET_HAS_JIT=0
+    Blackfin)    TARGET_HAS_JIT=0
+ ;;
+    *)           TARGET_HAS_JIT=0
  ;;
   esac
 fi
@@ -4920,44 +5031,47 @@ else
   enableval=all
 fi
 
+if test "$enableval" = host-only ; then
+  enableval=host
+fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha IA64 ARM Mips CellSPU PIC16 XCore MSP430 CBackend MSIL CppBackend" ;;
-  host-only)
-    case "$llvm_cv_target_arch" in
-      x86)     TARGETS_TO_BUILD="X86" ;;
-      x86_64)  TARGETS_TO_BUILD="X86" ;;
-      Sparc)   TARGETS_TO_BUILD="Sparc" ;;
-      PowerPC) TARGETS_TO_BUILD="PowerPC" ;;
-      Alpha)   TARGETS_TO_BUILD="Alpha" ;;
-      IA64)    TARGETS_TO_BUILD="IA64" ;;
-      ARM)     TARGETS_TO_BUILD="ARM" ;;
-      Mips)    TARGETS_TO_BUILD="Mips" ;;
-      CellSPU|SPU) TARGETS_TO_BUILD="CellSPU" ;;
-      PIC16)   TARGETS_TO_BUILD="PIC16" ;;
-      XCore)   TARGETS_TO_BUILD="XCore" ;;
-      MSP430)  TARGETS_TO_BUILD="MSP430" ;;
-      *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
-echo "$as_me: error: Can not set target to build" >&2;}
-   { (exit 1); exit 1; }; } ;;
-    esac
-    ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
-        x86)     TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
-        x86_64)  TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
-        sparc)   TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
-        powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-        alpha)   TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
-        ia64)    TARGETS_TO_BUILD="IA64 $TARGETS_TO_BUILD" ;;
-        arm)     TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
-        mips)    TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
-        spu)     TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
-        pic16)   TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
-        xcore)   TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
-        msp430)  TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-        cbe)     TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
-        msil)    TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;;
-        cpp)     TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        alpha)    TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+        pic16)    TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
+        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+        blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+        cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
+        msil)     TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;;
+        cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        host) case "$llvm_cv_target_arch" in
+            x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            Alpha)       TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+            PIC16)       TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
+            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+            MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
+echo "$as_me: error: Can not set target to build" >&2;}
+   { (exit 1); exit 1; }; } ;;
+          esac ;;
         *) { { echo "$as_me:$LINENO: error: Unrecognized target $a_target" >&5
 echo "$as_me: error: Unrecognized target $a_target" >&2;}
    { (exit 1); exit 1; }; } ;;
@@ -4982,18 +5096,23 @@ _ACEOF
 done
 
 # Build the LLVM_TARGET and LLVM_ASM_PRINTER macro uses for
-# Targets.def and AsmPrinters.def.
+# Targets.def, AsmPrinters.def, and AsmParsers.def.
 LLVM_ENUM_TARGETS=""
 LLVM_ENUM_ASM_PRINTERS=""
+LLVM_ENUM_ASM_PARSERS=""
 for target_to_build in $TARGETS_TO_BUILD; do
   LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
   if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then
     LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
   fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
+    LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS";
+  fi
 done
 
 
 
+
 # Check whether --enable-cbe-printf-a was given.
 if test "${enable_cbe_printf_a+set}" = set; then
   enableval=$enable_cbe_printf_a;
@@ -5165,6 +5284,42 @@ echo "$as_me: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\""
    { (exit 1); exit 1; }; } ;;
 esac
 
+if test "$llvm_cv_os_type" = "Win32" ; then
+   llvmc_dynamic="yes"
+else
+   llvmc_dynamic="no"
+fi
+
+# Check whether --enable-llvmc-dynamic was given.
+if test "${enable_llvmc_dynamic+set}" = set; then
+  enableval=$enable_llvmc_dynamic;
+else
+  enableval=$llvmc_dynamic
+fi
+
+if test ${enableval} = "yes" && test "$ENABLE_PIC" -eq 1 ; then
+   ENABLE_LLVMC_DYNAMIC=ENABLE_LLVMC_DYNAMIC=1
+
+else
+   ENABLE_LLVMC_DYNAMIC=
+
+fi
+
+# Check whether --enable-llvmc-dynamic-plugins was given.
+if test "${enable_llvmc_dynamic_plugins+set}" = set; then
+  enableval=$enable_llvmc_dynamic_plugins;
+else
+  enableval=yes
+fi
+
+if test ${enableval} = "yes" ; then
+   ENABLE_LLVMC_DYNAMIC_PLUGINS=ENABLE_LLVMC_DYNAMIC_PLUGINS=1
+
+else
+   ENABLE_LLVMC_DYNAMIC_PLUGINS=
+
+fi
+
 
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
@@ -7174,6 +7329,230 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 fi
+# Extract the first word of "fdp", so it can be a program name with args.
+set dummy fdp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_FDP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $FDP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_FDP="$FDP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_FDP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_FDP" && ac_cv_path_FDP="echo fdp"
+  ;;
+esac
+fi
+FDP=$ac_cv_path_FDP
+if test -n "$FDP"; then
+  { echo "$as_me:$LINENO: result: $FDP" >&5
+echo "${ECHO_T}$FDP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$FDP" != "echo fdp" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FDP 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    FDP=`echo $FDP | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_FDP "$FDP${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "neato", so it can be a program name with args.
+set dummy neato; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_NEATO+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $NEATO in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_NEATO="$NEATO" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_NEATO="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_NEATO" && ac_cv_path_NEATO="echo neato"
+  ;;
+esac
+fi
+NEATO=$ac_cv_path_NEATO
+if test -n "$NEATO"; then
+  { echo "$as_me:$LINENO: result: $NEATO" >&5
+echo "${ECHO_T}$NEATO" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$NEATO" != "echo neato" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_NEATO 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    NEATO=`echo $NEATO | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_NEATO "$NEATO${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "twopi", so it can be a program name with args.
+set dummy twopi; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TWOPI+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TWOPI in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TWOPI="$TWOPI" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TWOPI="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_TWOPI" && ac_cv_path_TWOPI="echo twopi"
+  ;;
+esac
+fi
+TWOPI=$ac_cv_path_TWOPI
+if test -n "$TWOPI"; then
+  { echo "$as_me:$LINENO: result: $TWOPI" >&5
+echo "${ECHO_T}$TWOPI" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$TWOPI" != "echo twopi" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_TWOPI 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    TWOPI=`echo $TWOPI | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_TWOPI "$TWOPI${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "circo", so it can be a program name with args.
+set dummy circo; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CIRCO+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CIRCO in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CIRCO="$CIRCO" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CIRCO="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CIRCO" && ac_cv_path_CIRCO="echo circo"
+  ;;
+esac
+fi
+CIRCO=$ac_cv_path_CIRCO
+if test -n "$CIRCO"; then
+  { echo "$as_me:$LINENO: result: $CIRCO" >&5
+echo "${ECHO_T}$CIRCO" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$CIRCO" != "echo circo" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CIRCO 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    CIRCO=`echo $CIRCO | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_CIRCO "$CIRCO${EXEEXT}"
+_ACEOF
+
+fi
 for ac_prog in gv gsview32
 do
   # Extract the first word of "$ac_prog", so it can be a program name with args.
@@ -10629,7 +11008,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10632 "configure"
+#line 11011 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12773,7 +13152,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 12776 "configure"' > conftest.$ac_ext
+  echo '#line 13155 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -14491,11 +14870,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14494: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14873: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14498: \$? = $ac_status" >&5
+   echo "$as_me:14877: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14759,11 +15138,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14762: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15141: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14766: \$? = $ac_status" >&5
+   echo "$as_me:15145: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14863,11 +15242,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14866: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15245: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:14870: \$? = $ac_status" >&5
+   echo "$as_me:15249: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -17315,7 +17694,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17318 "configure"
+#line 17697 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17415,7 +17794,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17418 "configure"
+#line 17797 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19783,11 +20162,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19786: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:20165: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:19790: \$? = $ac_status" >&5
+   echo "$as_me:20169: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -19887,11 +20266,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19890: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:20269: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:19894: \$? = $ac_status" >&5
+   echo "$as_me:20273: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -21457,11 +21836,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21460: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21839: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:21464: \$? = $ac_status" >&5
+   echo "$as_me:21843: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -21561,11 +21940,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21564: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21943: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:21568: \$? = $ac_status" >&5
+   echo "$as_me:21947: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -23796,11 +24175,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:23799: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24178: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:23803: \$? = $ac_status" >&5
+   echo "$as_me:24182: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24064,11 +24443,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24067: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24446: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24071: \$? = $ac_status" >&5
+   echo "$as_me:24450: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24168,11 +24547,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24171: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24550: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:24175: \$? = $ac_status" >&5
+   echo "$as_me:24554: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -27082,93 +27461,6 @@ echo "${ECHO_T}ok" >&6; }
 
 
 
-{ echo "$as_me:$LINENO: checking for elf_begin in -lelf" >&5
-echo $ECHO_N "checking for elf_begin in -lelf... $ECHO_C" >&6; }
-if test "${ac_cv_lib_elf_elf_begin+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-lelf  $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char elf_begin ();
-int
-main ()
-{
-return elf_begin ();
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest$ac_exeext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_cv_lib_elf_elf_begin=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_lib_elf_elf_begin=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
-      conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_lib_elf_elf_begin" >&5
-echo "${ECHO_T}$ac_cv_lib_elf_elf_begin" >&6; }
-if test $ac_cv_lib_elf_elf_begin = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBELF 1
-_ACEOF
-
-  LIBS="-lelf $LIBS"
-
-fi
-
-
 { echo "$as_me:$LINENO: checking for sin in -lm" >&5
 echo $ECHO_N "checking for sin in -lm... $ECHO_C" >&6; }
 if test "${ac_cv_lib_m_sin+set}" = set; then
@@ -28254,6 +28546,404 @@ _ACEOF
 
 
 
+# Check whether --with-oprofile was given.
+if test "${with_oprofile+set}" = set; then
+  withval=$with_oprofile;
+      USE_OPROFILE=1
+
+      case "$withval" in
+        /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;;
+        no) llvm_cv_oppath=
+            USE_OPROFILE=0
+ ;;
+        *) llvm_cv_oppath="${withval}/lib/oprofile"
+           CPPFLAGS="-I${withval}/include";;
+      esac
+      if test -n "$llvm_cv_oppath" ; then
+        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+                                        { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
+echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; }
+if test "${ac_cv_search_bfd_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char bfd_init ();
+int
+main ()
+{
+return bfd_init ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' bfd; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_bfd_init=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_bfd_init+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_bfd_init+set}" = set; then
+  :
+else
+  ac_cv_search_bfd_init=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_bfd_init" >&5
+echo "${ECHO_T}$ac_cv_search_bfd_init" >&6; }
+ac_res=$ac_cv_search_bfd_init
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+        { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
+echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; }
+if test "${ac_cv_search_op_open_agent+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char op_open_agent ();
+int
+main ()
+{
+return op_open_agent ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' opagent; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_op_open_agent=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_op_open_agent+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_op_open_agent+set}" = set; then
+  :
+else
+  ac_cv_search_op_open_agent=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_op_open_agent" >&5
+echo "${ECHO_T}$ac_cv_search_op_open_agent" >&6; }
+ac_res=$ac_cv_search_op_open_agent
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+          echo "Error! You need to have libopagent around."
+          exit -1
+
+fi
+
+        if test "${ac_cv_header_opagent_h+set}" = set; then
+  { echo "$as_me:$LINENO: checking for opagent.h" >&5
+echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_opagent_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5
+echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking opagent.h usability" >&5
+echo $ECHO_N "checking opagent.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <opagent.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking opagent.h presence" >&5
+echo $ECHO_N "checking opagent.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <opagent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: opagent.h: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: opagent.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: opagent.h: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: opagent.h:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: opagent.h: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: opagent.h:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for opagent.h" >&5
+echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_opagent_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_header_opagent_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5
+echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; }
+
+fi
+if test $ac_cv_header_opagent_h = yes; then
+  :
+else
+
+          echo "Error! You need to have opagent.h around."
+          exit -1
+
+fi
+
+
+      fi
+
+else
+
+      USE_OPROFILE=0
+
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define USE_OPROFILE $USE_OPROFILE
+_ACEOF
+
+
+
 
 
 
@@ -31468,7 +32158,8 @@ done
 
 
 
-for ac_func in strerror strerror_r strerror_s
+
+for ac_func in strerror strerror_r strerror_s setenv
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -34060,6 +34751,96 @@ rm -f core conftest.err conftest.$ac_objext \
 
 
 
+if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
+  { echo "$as_me:$LINENO: checking for 32-bit userspace on 64-bit system" >&5
+echo $ECHO_N "checking for 32-bit userspace on 64-bit system... $ECHO_C" >&6; }
+if test "${llvm_cv_linux_mixed+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifndef __x86_64__
+       error: Not x86-64 even if uname says so!
+      #endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_linux_mixed=no
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_linux_mixed=yes
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_linux_mixed" >&5
+echo "${ECHO_T}$llvm_cv_linux_mixed" >&6; }
+
+  if test "$llvm_cv_linux_mixed" = "yes"; then
+    llvm_cv_target_arch="x86"
+    ARCH="x86"
+  fi
+fi
+
 
 for ac_func in __dso_handle
 do
@@ -34477,9 +35258,9 @@ ac_config_files="$ac_config_files include/llvm/Config/Targets.def"
 
 ac_config_files="$ac_config_files include/llvm/Config/AsmPrinters.def"
 
-ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h"
+ac_config_files="$ac_config_files include/llvm/Config/AsmParsers.def"
 
-ac_config_headers="$ac_config_headers include/llvm/ADT/iterator.h"
+ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h"
 
 
 ac_config_files="$ac_config_files Makefile.config"
@@ -34936,7 +35717,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by llvm $as_me 2.6svn, which was
+This file was extended by llvm $as_me 2.7svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -34989,7 +35770,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-llvm config.status 2.6svn
+llvm config.status 2.7svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -35105,8 +35886,8 @@ do
     "include/llvm/Config/config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/config.h" ;;
     "include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;;
     "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;;
+    "include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;;
     "include/llvm/Support/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Support/DataTypes.h" ;;
-    "include/llvm/ADT/iterator.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/ADT/iterator.h" ;;
     "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
     "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
     "docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxygen.cfg" ;;
@@ -35240,6 +36021,8 @@ target_cpu!$target_cpu$ac_delim
 target_vendor!$target_vendor$ac_delim
 target_os!$target_os$ac_delim
 OS!$OS$ac_delim
+HOST_OS!$HOST_OS$ac_delim
+TARGET_OS!$TARGET_OS$ac_delim
 LINKALL!$LINKALL$ac_delim
 NOLINKALL!$NOLINKALL$ac_delim
 LLVM_ON_UNIX!$LLVM_ON_UNIX$ac_delim
@@ -35267,6 +36050,7 @@ DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
 ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
 DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
+DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
@@ -35275,16 +36059,13 @@ ENABLE_PIC!$ENABLE_PIC$ac_delim
 TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
 LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
 LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
+LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
 ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
+ENABLE_LLVMC_DYNAMIC!$ENABLE_LLVMC_DYNAMIC$ac_delim
+ENABLE_LLVMC_DYNAMIC_PLUGINS!$ENABLE_LLVMC_DYNAMIC_PLUGINS$ac_delim
 CXX!$CXX$ac_delim
-CXXFLAGS!$CXXFLAGS$ac_delim
-ac_ct_CXX!$ac_ct_CXX$ac_delim
-NM!$NM$ac_delim
-ifGNUmake!$ifGNUmake$ac_delim
-LN_S!$LN_S$ac_delim
-CMP!$CMP$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -35326,6 +36107,12 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+CXXFLAGS!$CXXFLAGS$ac_delim
+ac_ct_CXX!$ac_ct_CXX$ac_delim
+NM!$NM$ac_delim
+ifGNUmake!$ifGNUmake$ac_delim
+LN_S!$LN_S$ac_delim
+CMP!$CMP$ac_delim
 CP!$CP$ac_delim
 DATE!$DATE$ac_delim
 FIND!$FIND$ac_delim
@@ -35338,6 +36125,10 @@ TAR!$TAR$ac_delim
 BINPWD!$BINPWD$ac_delim
 GRAPHVIZ!$GRAPHVIZ$ac_delim
 DOT!$DOT$ac_delim
+FDP!$FDP$ac_delim
+NEATO!$NEATO$ac_delim
+TWOPI!$TWOPI$ac_delim
+CIRCO!$CIRCO$ac_delim
 GV!$GV$ac_delim
 DOTTY!$DOTTY$ac_delim
 PERL!$PERL$ac_delim
@@ -35377,6 +36168,7 @@ LLVMGXXCOMMAND!$LLVMGXXCOMMAND$ac_delim
 LLVMGCC!$LLVMGCC$ac_delim
 LLVMGXX!$LLVMGXX$ac_delim
 USE_UDIS86!$USE_UDIS86$ac_delim
+USE_OPROFILE!$USE_OPROFILE$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 ALLOCA!$ALLOCA$ac_delim
@@ -35409,7 +36201,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 81; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
@@ -35813,31 +36605,31 @@ echo "$as_me: executing $ac_file commands" >&6;}
 
   case $ac_file$ac_mode in
     "Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/Makefile Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
     "Makefile.common":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile.common`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/Makefile.common Makefile.common ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile.common Makefile.common ;;
     "examples/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname examples/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/examples/Makefile examples/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/examples/Makefile examples/Makefile ;;
     "lib/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/lib/Makefile lib/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
     "runtime/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname runtime/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/runtime/Makefile runtime/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/runtime/Makefile runtime/Makefile ;;
     "test/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname test/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/test/Makefile test/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/test/Makefile test/Makefile ;;
     "test/Makefile.tests":C) ${llvm_src}/autoconf/mkinstalldirs `dirname test/Makefile.tests`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/test/Makefile.tests test/Makefile.tests ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/test/Makefile.tests test/Makefile.tests ;;
     "unittests/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname unittests/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/unittests/Makefile unittests/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/unittests/Makefile unittests/Makefile ;;
     "tools/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/tools/Makefile tools/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
     "utils/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname utils/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/utils/Makefile utils/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/utils/Makefile utils/Makefile ;;
     "projects/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname projects/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/projects/Makefile projects/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/projects/Makefile projects/Makefile ;;
     "bindings/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname bindings/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/bindings/Makefile bindings/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/bindings/Makefile bindings/Makefile ;;
     "bindings/ocaml/Makefile.ocaml":C) ${llvm_src}/autoconf/mkinstalldirs `dirname bindings/ocaml/Makefile.ocaml`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/bindings/ocaml/Makefile.ocaml bindings/ocaml/Makefile.ocaml ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/bindings/ocaml/Makefile.ocaml bindings/ocaml/Makefile.ocaml ;;
 
   esac
 done # for ac_tag
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index df0a74d79854..e75887b73902 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -254,7 +254,7 @@ Blocks in a bitstream denote nested regions of the stream, and are identified by
 a content-specific id number (for example, LLVM IR uses an ID of 12 to represent
 function bodies).  Block IDs 0-7 are reserved for <a href="#stdblocks">standard blocks</a>
 whose meaning is defined by Bitcode; block IDs 8 and greater are
-application specific. Nested blocks capture the hierachical structure of the data
+application specific. Nested blocks capture the hierarchical structure of the data
 encoded in it, and various properties are associated with blocks as the file is
 parsed.  Block definitions allow the reader to efficiently skip blocks
 in constant time if the reader wants a summary of blocks, or if it wants to
@@ -462,23 +462,23 @@ emitted as their code, followed by the extra data.
 <p>The possible operand encodings are:</p>
 
 <ol>
-<li value="1">Fixed: The field should be emitted as
+<li>Fixed: The field should be emitted as
     a <a href="#fixedwidth">fixed-width value</a>, whose width is specified by
     the operand's extra data.</li>
-<li value="2">VBR: The field should be emitted as
+<li>VBR: The field should be emitted as
     a <a href="#variablewidth">variable-width value</a>, whose width is
     specified by the operand's extra data.</li>
-<li value="3">Array: This field is an array of values.  The array operand
+<li>Array: This field is an array of values.  The array operand
     has no extra data, but expects another operand to follow it which indicates
     the element type of the array.  When reading an array in an abbreviated
     record, the first integer is a vbr6 that indicates the array length,
     followed by the encoded elements of the array.  An array may only occur as
     the last operand of an abbreviation (except for the one final operand that
     gives the array's type).</li>
-<li value="4">Char6: This field should be emitted as
+<li>Char6: This field should be emitted as
     a <a href="#char6">char6-encoded value</a>.  This operand type takes no
     extra data.</li>
-<li value="5">Blob: This field is emitted as a vbr6, followed by padding to a
+<li>Blob: This field is emitted as a vbr6, followed by padding to a
     32-bit boundary (for alignment) and an array of 8-bit objects.  The array of
     bytes is further followed by tail padding to ensure that its total length is
     a multiple of 4 bytes.  This makes it very efficient for the reader to
@@ -755,7 +755,7 @@ LLVM IR is defined with the following blocks:
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2009-04-27 00:21:57 +0200 (Mon, 27 Apr 2009) $
+Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html
index 7b2679689534..c89b4c5826a7 100644
--- a/docs/Bugpoint.html
+++ b/docs/Bugpoint.html
@@ -216,6 +216,17 @@ non-obvious ways.  Here are some hints and tips:<p>
     the list of specified optimizations to be randomized and applied to the 
     program. This process will repeat until a bug is found or the user
     kills <tt>bugpoint</tt>.
+
+<li><p><tt>bugpoint</tt> does not understand the <tt>-O</tt> option
+    that is used to specify optimization level to <tt>opt</tt>. You
+    can use e.g.</p>
+
+<div class="doc_code">
+<p><tt>opt -O2 -debug-pass=Arguments foo.bc -disable-output</tt></p>
+</div>
+
+    <p>to get a list of passes that are used with <tt>-O2</tt> and
+    then pass this list to <tt>bugpoint</tt>.</p>
     
 </ol>
 
@@ -232,7 +243,7 @@ non-obvious ways.  Here are some hints and tips:<p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2009-10-12 20:12:47 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/CMake.html b/docs/CMake.html
index 1f50d397f006..25f471081f59 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -67,7 +67,7 @@
 
 <ol>
 
-  <li><p><a href=http://www.cmake.org/cmake/resources/software.html>Download</a>
+  <li><p><a href="http://www.cmake.org/cmake/resources/software.html">Download</a>
       and install CMake. Version 2.6.2 is the minimum required.</p>
 
   <li><p>Open a shell. Your development tools must be reachable from this
@@ -180,7 +180,7 @@
   </div>
 
   <p>Variables are stored on the CMake cache. This is a file
-    named <it>CMakeCache.txt</it> on the root of the build
+    named <tt>CMakeCache.txt</tt> on the root of the build
     directory. Do not hand-edit it.</p>
 
   <p>Variables are listed here appending its type after a colon. It is
@@ -250,6 +250,12 @@
     to <i>X86</i>. On the other cases defaults to <i>all</i>. Example:
     <i>-DLLVM_TARGETS_TO_BUILD="X86;PowerPC;Alpha"</i>.</dd>
 
+  <dt><b>LLVM_BUILD_TOOLS</b>:BOOL</dt>
+  <dd>Build LLVM tools. Defaults to ON.</dd>
+
+  <dt><b>LLVM_BUILD_EXAMPLES</b>:BOOL</dt>
+  <dd>Build LLVM examples. Defaults to ON.</dd>
+
   <dt><b>LLVM_ENABLE_THREADS</b>:BOOL</dt>
   <dd>Build with threads support, if available. Defaults to ON.</dd>
 
@@ -258,18 +264,21 @@
     CMAKE_BUILD_TYPE is <i>Release</i>.</dd>
 
   <dt><b>LLVM_ENABLE_PIC</b>:BOOL</dt>
-  <dd>Add the <i>-fPIC</i> flag to the compiler command-line, if the
-    compiler supports this flag. Some systems, like Windows, does not
-    need this flag. Defaults to OFF.</dd>
+  <dd>Add the <i>-fPIC</i> flag for the compiler command-line, if the
+    compiler supports this flag. Some systems, like Windows, do not
+    need this flag. Defaults to ON.</dd>
 
   <dt><b>LLVM_BUILD_32_BITS</b>:BOOL</dt>
   <dd>Build 32-bits executables and libraries on 64-bits systems. This
   option is available only on some 64-bits unix systems. Defaults to
   OFF.</dd>
 
-  <dt><b>LLVM_PLO_FLAGS</b>:STRING</dt>
-  <dd>Extra flags for creating partially linked objects. Visual C++
-    does not use this.</dd>
+  <dt><b>LLVM_TARGET_ARCH</b>:STRING</dt>
+  <dd>LLVM target to use for native code generation. This is required
+    for JIT generation. It defaults to "host", meaning that it shall
+    pick the architecture of the machine where LLVM is being built. If
+    you are cross-compiling, set it to the target architecture
+    name.</dd>
 
   <dt><b>LLVM_TABLEGEN</b>:STRING</dt>
   <dd>Full path to a native TableGen executable (usually
@@ -309,6 +318,9 @@
     <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling#Information_how_to_set_up_various_cross_compiling_toolchains">this
     section</a> for a quick solution.</p>
 
+<p>Also see the <a href="#llvmvars">LLVM-specific variables</a>
+  section for variables used when cross-compiling.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -337,26 +349,6 @@
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection">
-  <a name="msvc">Microsoft Visual C++</a>
-</div>
-
-<div class="doc_text">
-
-  <p>For linking the JIT into your executable, add</p>
-
-  <div class="doc_code">
-    <p><tt>/INCLUDE:_X86TargetMachineModule</tt></p>
-  </div>
-
-  <p>to your linker options. This is required for adding the relevant
-    LLVM object code to the executable. Not doing this will result on
-    some methods returning NULL (<i>ExecutionEngine::create</i>, for
-    instance).</p>
-
-</div>
-
 <!-- *********************************************************************** -->
 
 <hr>
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 25101fc31bf6..d39de19ec6a5 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -1380,9 +1380,9 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
    for <tt>RegisterClass</tt>, the last parameter of which is a list of
    registers. Just commenting some out is one simple way to avoid them being
    used. A more polite way is to explicitly exclude some registers from
-   the <i>allocation order</i>. See the definition of the <tt>GR</tt> register
-   class in <tt>lib/Target/IA64/IA64RegisterInfo.td</tt> for an example of this
-   (e.g., <tt>numReservedRegs</tt> registers are hidden.)</p>
+   the <i>allocation order</i>. See the definition of the <tt>GR8</tt> register
+   class in <tt>lib/Target/X86/X86RegisterInfo.td</tt> for an example of this.
+   </p>
 
 <p>Virtual registers are also denoted by integer numbers. Contrary to physical
    registers, different virtual registers never share the same number. The
@@ -1616,9 +1616,9 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
 
 <div class="doc_code">
 <pre>
-$ llc -f -regalloc=simple file.bc -o sp.s;
-$ llc -f -regalloc=local file.bc -o lc.s;
-$ llc -f -regalloc=linearscan file.bc -o ln.s;
+$ llc -regalloc=simple file.bc -o sp.s;
+$ llc -regalloc=local file.bc -o lc.s;
+$ llc -regalloc=linearscan file.bc -o ln.s;
 </pre>
 </div>
 
@@ -1812,24 +1812,27 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 
 <div class="doc_code">
 <pre>
-Base + [1,2,4,8] * IndexReg + Disp32
+SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32
 </pre>
 </div>
 
-<p>In order to represent this, LLVM tracks no less than 4 operands for each
+<p>In order to represent this, LLVM tracks no less than 5 operands for each
    memory operand of this form.  This means that the "load" form of
    '<tt>mov</tt>' has the following <tt>MachineOperand</tt>s in this order:</p>
 
 <div class="doc_code">
 <pre>
-Index:        0     |    1        2       3           4
-Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement
-OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm
+Index:        0     |    1        2       3           4          5
+Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement Segment
+OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
 </pre>
 </div>
 
 <p>Stores, and all other instructions, treat the four memory operands in the
-   same way and in the same order.</p>
+   same way and in the same order.  If the segment register is unspecified
+   (regno = 0), then no segment override is generated.  "Lea" operations do not
+   have a segment register specified, so they only have 4 operands for their
+   memory reference.</p>
 
 </div>
 
@@ -2118,7 +2121,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-15 12:17:44 +0000 (Mon, 15 Jun 2009) $
+  Last modified: $Date: 2009-10-10 23:30:55 +0200 (Sat, 10 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
index cf9111071f56..f93e1eac5de8 100644
--- a/docs/CodingStandards.html
+++ b/docs/CodingStandards.html
@@ -41,8 +41,12 @@
           <li><a href="#hl_dontinclude">#include as Little as Possible</a></li>
           <li><a href="#hl_privateheaders">Keep "internal" Headers
               Private</a></li>
-          <li><a href="#ll_iostream"><tt>#include &lt;iostream&gt;</tt> is
-              <em>forbidden</em></a></li>
+          <li><a href="#hl_earlyexit">Use Early Exits and 'continue' to Simplify
+              Code</a></li>
+          <li><a href="#hl_else_after_return">Don't use "else" after a
+              return</a></li>
+          <li><a href="#hl_predicateloops">Turn Predicate Loops into Predicate
+              Functions</a></li>
         </ol></li>
       <li><a href="#micro">The Low Level Issues</a>
         <ol>
@@ -52,16 +56,27 @@
               classes in headers</a></li>
           <li><a href="#ll_end">Don't evaluate end() every time through a
               loop</a></li>
-          <li><a href="#ll_preincrement">Prefer Preincrement</a></li>
+          <li><a href="#ll_iostream"><tt>#include &lt;iostream&gt;</tt> is
+              <em>forbidden</em></a></li>
           <li><a href="#ll_avoidendl">Avoid <tt>std::endl</tt></a></li>
+          <li><a href="#ll_raw_ostream">Use <tt>raw_ostream</tt></a</li>
         </ol></li>
+        
+      <li><a href="#nano">Microscopic Details</a>
+        <ol>
+          <li><a href="#micro_spaceparen">Spaces Before Parentheses</a></li>
+          <li><a href="#micro_preincrement">Prefer Preincrement</a></li>
+          <li><a href="#micro_namespaceindent">Namespace Indentation</a></li>
+          <li><a href="#micro_anonns">Anonymous Namespaces</a></li>
+        </ol></li>
+
+        
     </ol></li>
   <li><a href="#seealso">See Also</a></li>
 </ol>
 
 <div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
-     <a href="mailto:void@nondot.org">Bill Wendling</a></p>
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
 </div>
 
 
@@ -118,7 +133,9 @@ href="mailto:sabre@nondot.org">Chris</a>.</p>
 <div class="doc_text">
 
 <p>Comments are one critical part of readability and maintainability.  Everyone
-knows they should comment, so should you.  Although we all should probably
+knows they should comment, so should you.  When writing comments, write them as
+English prose, which means they should use proper capitalization, punctuation,
+etc.  Although we all should probably
 comment our code more than we do, there are a few very critical places that
 documentation is very useful:</p>
 
@@ -286,7 +303,7 @@ for debate.</p>
 <div class="doc_text">
 
 <p>In all cases, prefer spaces to tabs in source files.  People have different
-prefered indentation levels, and different styles of indentation that they
+preferred indentation levels, and different styles of indentation that they
 like... this is fine.  What isn't is that different editors/viewers expand tabs
 out to different tab stops.  This can cause your code to look completely
 unreadable, and it is not worth dealing with.</p>
@@ -402,7 +419,8 @@ different symbols based on whether <tt>class</tt> or <tt>struct</tt> was used to
 declare the symbol.  This can lead to problems at link time.</p> 
 
 <p>So, the rule for LLVM is to always use the <tt>class</tt> keyword, unless
-<b>all</b> members are public, in which case <tt>struct</tt> is allowed.</p>
+<b>all</b> members are public and the type is a C++ "POD" type, in which case 
+<tt>struct</tt> is allowed.</p>
 
 </div>
 
@@ -417,6 +435,7 @@ declare the symbol.  This can lead to problems at link time.</p>
 <div class="doc_subsection">
   <a name="macro">The High Level Issues</a>
 </div>
+<!-- ======================================================================= -->
 
 
 <!-- _______________________________________________________________________ -->
@@ -472,7 +491,7 @@ most cases, you simply don't need the definition of a class... and not
 <b>must</b> include all of the header files that you are using -- you can 
 include them either directly
 or indirectly (through another header file).  To make sure that you don't
-accidently forget to include a header file in your module header, make sure to
+accidentally forget to include a header file in your module header, make sure to
 include your module header <b>first</b> in the implementation file (as mentioned
 above).  This way there won't be any hidden dependencies that you'll find out
 about later...</p>
@@ -502,34 +521,256 @@ class itself... just make them private (or protected), and all is well.</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is forbidden</a>
+  <a name="hl_earlyexit">Use Early Exits and 'continue' to Simplify Code</a>
 </div>
 
 <div class="doc_text">
 
-<p>The use of <tt>#include &lt;iostream&gt;</tt> in library files is
-hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
-support clients using LLVM libraries as part of larger systems. In particular,
-we statically link LLVM into some dynamic libraries. Even if LLVM isn't used,
-the static c'tors are run whenever an application start up that uses the dynamic
-library. There are two problems with this:</p>
+<p>When reading code, keep in mind how much state and how many previous
+decisions have to be remembered by the reader to understand a block of code.
+Aim to reduce indentation where possible when it doesn't make it more difficult
+to understand the code.  One great way to do this is by making use of early
+exits and the 'continue' keyword in long loops.  As an example of using an early
+exit from a function, consider this "bad" code:</p>
 
-<ol>
-  <li>The time to run the static c'tors impacts startup time of
-      applications&mdash;a critical time for GUI apps.</li>
-  <li>The static c'tors cause the app to pull many extra pages of memory off the
-      disk: both the code for the static c'tors in each <tt>.o</tt> file and the
-      small amount of data that gets touched. In addition, touched/dirty pages
-      put more pressure on the VM system on low-memory machines.</li>
-</ol>
+<div class="doc_code">
+<pre>
+Value *DoSomething(Instruction *I) {
+  if (!isa&lt;TerminatorInst&gt;(I) &amp;&amp;
+      I-&gt;hasOneUse() &amp;&amp; SomeOtherThing(I)) {
+    ... some long code ....
+  }
+  
+  return 0;
+}
+</pre>
+</div>
 
-<p>Note that using the other stream headers (<tt>&lt;sstream&gt;</tt> for
-example) is allowed normally, it is just <tt>&lt;iostream&gt;</tt> that is
-causing problems.</p>
+<p>This code has several problems if the body of the 'if' is large.  When you're
+looking at the top of the function, it isn't immediately clear that this
+<em>only</em> does interesting things with non-terminator instructions, and only
+applies to things with the other predicates.  Second, it is relatively difficult
+to describe (in comments) why these predicates are important because the if
+statement makes it difficult to lay out the comments.  Third, when you're deep
+within the body of the code, it is indented an extra level.   Finally, when
+reading the top of the function, it isn't clear what the result is if the
+predicate isn't true, you have to read to the end of the function to know that
+it returns null.</p>
+
+<p>It is much preferred to format the code like this:</p>
+
+<div class="doc_code">
+<pre>
+Value *DoSomething(Instruction *I) {
+  // Terminators never need 'something' done to them because, ... 
+  if (isa&lt;TerminatorInst&gt;(I))
+    return 0;
+
+  // We conservatively avoid transforming instructions with multiple uses
+  // because goats like cheese.
+  if (!I-&gt;hasOneUse())
+    return 0;
+
+  // This is really just here for example.
+  if (!SomeOtherThing(I))
+    return 0;
+    
+  ... some long code ....
+}
+</pre>
+</div>
+
+<p>This fixes these problems.  A similar problem frequently happens in for
+loops.  A silly example is something like this:</p>
+
+<div class="doc_code">
+<pre>
+  for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; ++II) {
+    if (BinaryOperator *BO = dyn_cast&lt;BinaryOperator&gt;(II)) {
+      Value *LHS = BO-&gt;getOperand(0);
+      Value *RHS = BO-&gt;getOperand(1);
+      if (LHS != RHS) {
+        ...
+      }
+    }
+  }
+</pre>
+</div>
 
-<p>The preferred replacement for stream functionality is the
-<tt>llvm::raw_ostream</tt> class (for writing to output streams of various
-sorts) and the <tt>llvm::MemoryBuffer</tt> API (for reading in files).</p>
+<p>When you have very very small loops, this sort of structure is fine, but if
+it exceeds more than 10-15 lines, it becomes difficult for people to read and
+understand at a glance.
+The problem with this sort of code is that it gets very nested very quickly,
+meaning that the reader of the code has to keep a lot of context in their brain
+to remember what is going immediately on in the loop, because they don't know
+if/when the if conditions will have elses etc.  It is strongly preferred to
+structure the loop like this:</p>
+
+<div class="doc_code">
+<pre>
+  for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; ++II) {
+    BinaryOperator *BO = dyn_cast&lt;BinaryOperator&gt;(II);
+    if (!BO) continue;
+    
+    Value *LHS = BO-&gt;getOperand(0);
+    Value *RHS = BO-&gt;getOperand(1);
+    if (LHS == RHS) continue;
+  }
+</pre>
+</div>
+
+<p>This has all the benefits of using early exits from functions: it reduces
+nesting of the loop, it makes it easier to describe why the conditions are true,
+and it makes it obvious to the reader that there is no "else" coming up that
+they have to push context into their brain for.  If a loop is large, this can
+be a big understandability win.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_else_after_return">Don't use "else" after a return</a>
+</div>
+
+<div class="doc_text">
+
+<p>For similar reasons above (reduction of indentation and easier reading),
+   please do not use "else" or "else if" after something that interrupts
+   control flow like return, break, continue, goto, etc.  For example, this is
+   "bad":</p>
+   
+<div class="doc_code">
+<pre>
+  case 'J': {
+    if (Signed) {
+      Type = Context.getsigjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_sigjmp_buf;
+        return QualType();
+      } else {
+        break;
+      }
+    } else {
+      Type = Context.getjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_jmp_buf;
+        return QualType();
+      } else {
+        break;
+      }
+    }
+  }
+  }
+</pre>
+</div>
+
+<p>It is better to write this something like:</p>
+
+<div class="doc_code">
+<pre>
+  case 'J':
+    if (Signed) {
+      Type = Context.getsigjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_sigjmp_buf;
+        return QualType();
+      }
+    } else {
+      Type = Context.getjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_jmp_buf;
+        return QualType();
+      }
+    }
+    break;
+</pre>
+</div>
+
+<p>Or better yet (in this case), as:</p>
+
+<div class="doc_code">
+<pre>
+  case 'J':
+    if (Signed)
+      Type = Context.getsigjmp_bufType();
+    else
+      Type = Context.getjmp_bufType();
+    
+    if (Type.isNull()) {
+      Error = Signed ? ASTContext::GE_Missing_sigjmp_buf :
+                       ASTContext::GE_Missing_jmp_buf;
+      return QualType();
+    }
+    break;
+</pre>
+</div>
+
+<p>The idea is to reduce indentation and the amount of code you have to keep
+   track of when reading the code.</p>
+              
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_predicateloops">Turn Predicate Loops into Predicate Functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>It is very common to write small loops that just compute a boolean
+   value.  There are a number of ways that people commonly write these, but an
+   example of this sort of thing is:</p>
+   
+<div class="doc_code">
+<pre>
+  <b>bool FoundFoo = false;</b>
+  for (unsigned i = 0, e = BarList.size(); i != e; ++i)
+    if (BarList[i]-&gt;isFoo()) {
+      <b>FoundFoo = true;</b>
+      break;
+    }
+    
+  <b>if (FoundFoo) {</b>
+    ...
+  }
+</pre>
+</div>
+
+<p>This sort of code is awkward to write, and is almost always a bad sign.
+Instead of this sort of loop, we strongly prefer to use a predicate function
+(which may be <a href="#micro_anonns">static</a>) that uses
+<a href="#hl_earlyexit">early exits</a> to compute the predicate.  We prefer
+the code to be structured like this:
+</p>
+
+
+<div class="doc_code">
+<pre>
+/// ListContainsFoo - Return true if the specified list has an element that is
+/// a foo.
+static bool ListContainsFoo(const std::vector&lt;Bar*&gt; &amp;List) {
+  for (unsigned i = 0, e = List.size(); i != e; ++i)
+    if (List[i]-&gt;isFoo())
+      return true;
+  return false;
+}
+...
+
+  <b>if (ListContainsFoo(BarList)) {</b>
+    ...
+  }
+</pre>
+</div>
+
+<p>There are many reasons for doing this: it reduces indentation and factors out
+code which can often be shared by other code that checks for the same predicate.
+More importantly, it <em>forces you to pick a name</em> for the function, and
+forces you to write a comment for it.  In this silly example, this doesn't add
+much value.  However, if the condition is complex, this can make it a lot easier
+for the reader to understand the code that queries for this predicate.  Instead
+of being faced with the in-line details of how we check to see if the BarList
+contains a foo, we can trust the function name and continue reading with better
+locality.</p>
 
 </div>
 
@@ -538,6 +779,7 @@ sorts) and the <tt>llvm::MemoryBuffer</tt> API (for reading in files).</p>
 <div class="doc_subsection">
   <a name="micro">The Low Level Issues</a>
 </div>
+<!-- ======================================================================= -->
 
 
 <!-- _______________________________________________________________________ -->
@@ -548,7 +790,7 @@ sorts) and the <tt>llvm::MemoryBuffer</tt> API (for reading in files).</p>
 <div class="doc_text">
 
 <p>Use the "<tt>assert</tt>" function to its fullest.  Check all of your
-preconditions and assumptions, you never know when a bug (not neccesarily even
+preconditions and assumptions, you never know when a bug (not necessarily even
 yours) might be caught early by an assertion, which reduces debugging time
 dramatically.  The "<tt>&lt;cassert&gt;</tt>" header file is probably already
 included by the header files you are using, so it doesn't cost anything to use
@@ -724,10 +966,156 @@ prefer it.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is forbidden</a>
+</div>
+
+<div class="doc_text">
+
+<p>The use of <tt>#include &lt;iostream&gt;</tt> in library files is
+hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
+support clients using LLVM libraries as part of larger systems. In particular,
+we statically link LLVM into some dynamic libraries. Even if LLVM isn't used,
+the static c'tors are run whenever an application start up that uses the dynamic
+library. There are two problems with this:</p>
+
+<ol>
+  <li>The time to run the static c'tors impacts startup time of
+      applications&mdash;a critical time for GUI apps.</li>
+  <li>The static c'tors cause the app to pull many extra pages of memory off the
+      disk: both the code for the static c'tors in each <tt>.o</tt> file and the
+      small amount of data that gets touched. In addition, touched/dirty pages
+      put more pressure on the VM system on low-memory machines.</li>
+</ol>
+
+<p>Note that using the other stream headers (<tt>&lt;sstream&gt;</tt> for
+example) is not problematic in this regard (just <tt>&lt;iostream&gt;</tt>).
+However, raw_ostream provides various APIs that are better performing for almost
+every use than std::ostream style APIs, so you should just use it for new
+code.</p>
+
+<p><b>New code should always
+use <a href="#ll_raw_ostream"><tt>raw_ostream</tt></a> for writing, or
+the <tt>llvm::MemoryBuffer</tt> API for reading files.</b></p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>std::endl</tt> modifier, when used with iostreams outputs a newline
+to the output stream specified.  In addition to doing this, however, it also
+flushes the output stream.  In other words, these are equivalent:</p>
+
+<div class="doc_code">
+<pre>
+std::cout &lt;&lt; std::endl;
+std::cout &lt;&lt; '\n' &lt;&lt; std::flush;
+</pre>
+</div>
+
+<p>Most of the time, you probably have no reason to flush the output stream, so
+it's better to use a literal <tt>'\n'</tt>.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_raw_ostream">Use <tt>raw_ostream</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM includes a lightweight, simple, and efficient stream implementation
+in <tt>llvm/Support/raw_ostream.h</tt> which provides all of the common features
+of <tt>std::ostream</tt>.  All new code should use <tt>raw_ostream</tt> instead
+of <tt>ostream</tt>.</p>
+
+<p>Unlike <tt>std::ostream</tt>, <tt>raw_ostream</tt> is not a template and can
+be forward declared as <tt>class raw_ostream</tt>.  Public headers should
+generally not include the <tt>raw_ostream</tt> header, but use forward
+declarations and constant references to <tt>raw_ostream</tt> instances.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="nano">Microscopic Details</a>
+</div>
+<!-- ======================================================================= -->
+
+<p>This section describes preferred low-level formatting guidelines along with
+reasoning on why we prefer them.</p>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="micro_spaceparen">Spaces Before Parentheses</a>
+</div>
+
+<div class="doc_text">
+
+<p>We prefer to put a space before a parentheses only in control flow
+statements, but not in normal function call expressions and function-like
+macros.  For example, this is good:</p>
+
+<div class="doc_code">
+<pre>
+  <b>if (</b>x) ...
+  <b>for (</b>i = 0; i != 100; ++i) ...
+  <b>while (</b>llvm_rocks) ...
+
+  <b>somefunc(</b>42);
+  <b><a href="#ll_assert">assert</a>(</b>3 != 4 &amp;&amp; "laws of math are failing me");
+  
+  a = <b>foo(</b>42, 92) + <b>bar(</b>x);
+  </pre>
+</div>
+
+<p>... and this is bad:</p>
+
+<div class="doc_code">
+<pre>
+  <b>if(</b>x) ...
+  <b>for(</b>i = 0; i != 100; ++i) ...
+  <b>while(</b>llvm_rocks) ...
+
+  <b>somefunc (</b>42);
+  <b><a href="#ll_assert">assert</a> (</b>3 != 4 &amp;&amp; "laws of math are failing me");
+  
+  a = <b>foo (</b>42, 92) + <b>bar (</b>x);
+</pre>
+</div>
+
+<p>The reason for doing this is not completely arbitrary.  This style makes
+   control flow operators stand out more, and makes expressions flow better. The
+   function call operator binds very tightly as a postfix operator.  Putting
+   a space after a function name (as in the last example) makes it appear that
+   the code might bind the arguments of the left-hand-side of a binary operator
+   with the argument list of a function and the name of the right side.  More
+   specifically, it is easy to misread the "a" example as:</p>
+   
+<div class="doc_code">
+<pre>
+  a = foo <b>(</b>(42, 92) + bar<b>)</b> (x);
+</pre>
+</div>
+
+<p>... when skimming through the code.  By avoiding a space in a function, we
+avoid this misinterpretation.</p>
+
+</div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_preincrement">Prefer Preincrement</a>
+  <a name="micro_preincrement">Prefer Preincrement</a>
 </div>
 
 <div class="doc_text">
@@ -747,27 +1135,178 @@ get in the habit of always using preincrement, and you won't have a problem.</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
+  <a name="micro_namespaceindent">Namespace Indentation</a>
 </div>
 
 <div class="doc_text">
 
-<p>The <tt>std::endl</tt> modifier, when used with iostreams outputs a newline
-to the output stream specified.  In addition to doing this, however, it also
-flushes the output stream.  In other words, these are equivalent:</p>
+<p>
+In general, we strive to reduce indentation where ever possible.  This is useful
+because we want code to <a href="#scf_codewidth">fit into 80 columns</a> without
+wrapping horribly, but also because it makes it easier to understand the code.
+Namespaces are a funny thing: they are often large, and we often desire to put
+lots of stuff into them (so they can be large).  Other times they are tiny,
+because they just hold an enum or something similar.  In order to balance this,
+we use different approaches for small versus large namespaces.  
+</p>
+
+<p>
+If a namespace definition is small and <em>easily</em> fits on a screen (say,
+less than 35 lines of code), then you should indent its body.  Here's an
+example:
+</p>
 
 <div class="doc_code">
 <pre>
-std::cout &lt;&lt; std::endl;
-std::cout &lt;&lt; '\n' &lt;&lt; std::flush;
+namespace llvm {
+  namespace X86 {
+    /// RelocationType - An enum for the x86 relocation codes. Note that
+    /// the terminology here doesn't follow x86 convention - word means
+    /// 32-bit and dword means 64-bit.
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+      
+      /// reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
+      /// add the relocated value to the value already in memory.
+      reloc_absolute_word = 2,
+      reloc_absolute_dword = 3
+    };
+  }
+}
 </pre>
 </div>
 
-<p>Most of the time, you probably have no reason to flush the output stream, so
-it's better to use a literal <tt>'\n'</tt>.</p>
+<p>Since the body is small, indenting adds value because it makes it very clear
+where the namespace starts and ends, and it is easy to take the whole thing in
+in one "gulp" when reading the code.  If the blob of code in the namespace is
+larger (as it typically is in a header in the llvm or clang namespaces), do not
+indent the code, and add a comment indicating what namespace is being closed.
+For example:</p>
 
+<div class="doc_code">
+<pre>
+namespace llvm {
+namespace knowledge {
+
+/// Grokable - This class represents things that Smith can have an intimate
+/// understanding of and contains the data associated with it.
+class Grokable {
+...
+public:
+  explicit Grokable() { ... }
+  virtual ~Grokable() = 0;
+  
+  ...
+
+};
+
+} // end namespace knowledge
+} // end namespace llvm
+</pre>
 </div>
 
+<p>Because the class is large, we don't expect that the reader can easily
+understand the entire concept in a glance, and the end of the file (where the
+namespaces end) may be a long ways away from the place they open.  As such,
+indenting the contents of the namespace doesn't add any value, and detracts from
+the readability of the class.  In these cases it is best to <em>not</em> indent
+the contents of the namespace.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="micro_anonns">Anonymous Namespaces</a>
+</div>
+
+<div class="doc_text">
+
+<p>After talking about namespaces in general, you may be wondering about
+anonymous namespaces in particular.
+Anonymous namespaces are a great language feature that tells the C++ compiler
+that the contents of the namespace are only visible within the current
+translation unit, allowing more aggressive optimization and eliminating the
+possibility of symbol name collisions.  Anonymous namespaces are to C++ as 
+"static" is to C functions and global variables.  While "static" is available
+in C++, anonymous namespaces are more general: they can make entire classes
+private to a file.</p>
+
+<p>The problem with anonymous namespaces is that they naturally want to
+encourage indentation of their body, and they reduce locality of reference: if
+you see a random function definition in a C++ file, it is easy to see if it is
+marked static, but seeing if it is in an anonymous namespace requires scanning
+a big chunk of the file.</p>
+
+<p>Because of this, we have a simple guideline: make anonymous namespaces as
+small as possible, and only use them for class declarations.  For example, this
+is good:</p>
+
+<div class="doc_code">
+<pre>
+<b>namespace {</b>
+  class StringSort {
+  ...
+  public:
+    StringSort(...)
+    bool operator&lt;(const char *RHS) const;
+  };
+<b>} // end anonymous namespace</b>
+
+static void Helper() { 
+  ... 
+}
+
+bool StringSort::operator&lt;(const char *RHS) const {
+  ...
+}
+
+</pre>
+</div>
+
+<p>This is bad:</p>
+
+
+<div class="doc_code">
+<pre>
+<b>namespace {</b>
+class StringSort {
+...
+public:
+  StringSort(...)
+  bool operator&lt;(const char *RHS) const;
+};
+
+void Helper() { 
+  ... 
+}
+
+bool StringSort::operator&lt;(const char *RHS) const {
+  ...
+}
+
+<b>} // end anonymous namespace</b>
+
+</pre>
+</div>
+
+
+<p>This is bad specifically because if you're looking at "Helper" in the middle
+of a large C++ file, that you have no immediate way to tell if it is local to
+the file.  When it is marked static explicitly, this is immediately obvious.
+Also, there is no reason to enclose the definition of "operator&lt;" in the
+namespace just because it was declared there.
+</p>
+
+</div>
+
+
 
 <!-- *********************************************************************** -->
 <div class="doc_section">
@@ -807,7 +1346,7 @@ something.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-30 08:27:54 +0200 (Tue, 30 Jun 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/FileCheck.pod b/docs/CommandGuide/FileCheck.pod
new file mode 100644
index 000000000000..539f66fea545
--- /dev/null
+++ b/docs/CommandGuide/FileCheck.pod
@@ -0,0 +1,65 @@
+
+=pod
+
+=head1 NAME
+
+FileCheck - Flexible pattern matching file verifier
+
+=head1 SYNOPSIS
+
+B<FileCheck> I<match-filename> [I<--check-prefix=XXX>] [I<--strict-whitespace>]
+
+=head1 DESCRIPTION
+
+B<FileCheck> reads two files (one from standard input, and one specified on the
+command line) and uses one to verify the other.  This behavior is particularly
+useful for the testsuite, which wants to verify that the output of some tool
+(e.g. llc) contains the expected information (for example, a movsd from esp or
+whatever is interesting).  This is similar to using grep, but it is optimized
+for matching multiple different inputs in one file in a specific order.
+
+The I<match-filename> file specifies the file that contains the patterns to
+match.  The file to verify is always read from standard input.
+
+The input and output of B<FileCheck> is beyond the scope of this short
+introduction. Please see the I<TestingGuide> page in the LLVM documentation.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<--check-prefix> I<prefix>
+
+FileCheck searches the contents of I<match-filename> for patterns to match.  By
+default, these patterns are prefixed with "CHECK:".  If you'd like to use a
+different prefix (e.g. because the same input file is checking multiple
+different tool or options), the B<--check-prefix> argument allows you to specify
+a specific prefix to match.
+
+=item B<--strict-whitespace>
+
+By default, FileCheck canonicalizes input horizontal whitespace (spaces and
+tabs) which causes it to ignore these differences (a space will match a tab).
+The --strict-whitespace argument disables this behavior.
+
+=item B<-version>
+
+Show the version number of this program.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<FileCheck> verifies that the file matches the expected contents, it exits
+with 0.  Otherwise, if not, or if an error occurs, it will exit with a non-zero
+value.
+
+=head1 AUTHORS
+
+Maintained by The LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/Makefile b/docs/CommandGuide/Makefile
index cf77e6a33db0..3b6518310726 100644
--- a/docs/CommandGuide/Makefile
+++ b/docs/CommandGuide/Makefile
@@ -48,6 +48,12 @@ HTML := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_HTML_DIR)%.html, $(POD))
 MAN  := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_MAN_DIR)%.1, $(POD))
 PS   := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_PS_DIR)%.ps, $(POD))
 
+# The set of man pages we will not install
+NO_INSTALL_MANS = $(DST_MAN_DIR)FileCheck.1
+
+# The set of man pages that we will install
+INSTALL_MANS = $(filter-out $(NO_INSTALL_MANS), $(MAN))
+
 .SUFFIXES:
 .SUFFIXES: .html .pod .1 .ps
 
@@ -75,7 +81,7 @@ HTML_DIR := $(PROJ_docsdir)/html/CommandGuide
 MAN_DIR  := $(PROJ_mandir)/man1
 PS_DIR   := $(PROJ_docsdir)/ps
 
-install-local:: $(HTML) $(MAN) $(PS)
+install-local:: $(HTML) $(INSTALL_MANS) $(PS)
 	$(Echo) Installing HTML CommandGuide Documentation
 	$(Verb) $(MKDIR) $(HTML_DIR)
 	$(Verb) $(DataInstall) $(HTML) $(HTML_DIR)
@@ -83,7 +89,7 @@ install-local:: $(HTML) $(MAN) $(PS)
 	$(Verb) $(DataInstall) $(PROJ_SRC_DIR)/manpage.css $(HTML_DIR)
 	$(Echo) Installing MAN CommandGuide Documentation
 	$(Verb) $(MKDIR) $(MAN_DIR)
-	$(Verb) $(DataInstall) $(MAN) $(MAN_DIR)
+	$(Verb) $(DataInstall) $(INSTALL_MANS) $(MAN_DIR)
 	$(Echo) Installing PS CommandGuide Documentation
 	$(Verb) $(MKDIR) $(PS_DIR)
 	$(Verb) $(DataInstall) $(PS) $(PS_DIR)
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index f05260bfebb3..f1046fac4480 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -128,6 +128,8 @@ options) arguments to the tool you are interested in.</p>
 <div class="doc_text">
 <ul>
 
+<li><a href="/cmds/FileCheck.html"><b>FileCheck</b></a> -
+    Flexible file verifier used extensively by the testing harness</li>
 <li><a href="/cmds/tblgen.html"><b>tblgen</b></a> -
     target description reader and generator</li>
 
@@ -144,7 +146,7 @@ options) arguments to the tool you are interested in.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-12-11 18:12:52 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2009-08-15 17:38:11 +0200 (Sat, 15 Aug 2009) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
new file mode 100644
index 000000000000..a818302c242a
--- /dev/null
+++ b/docs/CommandGuide/lit.pod
@@ -0,0 +1,222 @@
+=pod
+
+=head1 NAME
+
+lit - LLVM Integrated Tester
+
+=head1 SYNOPSIS
+
+B<lit> [I<options>] [I<tests>]
+
+=head1 DESCRIPTION
+
+B<lit> is a portable tool for executing LLVM and Clang style test suites,
+summarizing their results, and providing indication of failures. B<lit> is
+designed to be a lightweight testing tool with as simple a user interface as
+possible.
+
+B<lit> should be run with one or more I<tests> to run specified on the command
+line. Tests can be either individual test files or directories to search for
+tests (see L<"TEST DISCOVERY">).
+
+Each specified test will be executed (potentially in parallel) and once all
+tests have been run B<lit> will print summary information on the number of tests
+which passed or failed (see L<"TEST STATUS RESULTS">). The B<lit> program will
+execute with a non-zero exit code if any tests fail.
+
+By default B<lit> will use a succinct progress display and will only print
+summary information for test failures. See L<"OUTPUT OPTIONS"> for options
+controlling the B<lit> progress display and output.
+
+B<lit> also includes a number of options for controlling how tests are exected
+(specific features may depend on the particular test format). See L<"EXECUTION
+OPTIONS"> for more information.
+
+Finally, B<lit> also supports additional options for only running a subset of
+the options specified on the command line, see L<"SELECTION OPTIONS"> for
+more information.
+
+=head1 GENERAL OPTIONS
+
+=over
+
+=item B<-h>, B<--help>
+
+Show the B<lit> help message.
+
+=item B<-j> I<N>, B<--threads>=I<N>
+
+Run I<N> tests in parallel. By default, this is automatically chose to match the
+number of detected available CPUs.
+
+=back 
+
+=head1 OUTPUT OPTIONS
+
+=over
+
+=item B<-q>, B<--quiet>
+
+Suppress any output except for test failures.
+
+=item B<-s>, B<--succinct>
+
+Show less output, for example don't show information on tests that pass.
+
+=item B<-v>, B<--verbose>
+
+Show more information on test failures, for example the entire test output
+instead of just the test result.
+
+=item B<--no-progress-bar>
+
+Do not use curses based progress bar.
+
+=back 
+
+=head1 EXECUTION OPTIONS
+
+=over
+
+=item B<--path>=I<PATH>
+
+Specify an addition I<PATH> to use when searching for executables in tests.
+
+=item B<--vg>
+
+Run individual tests under valgrind (using the memcheck tool). The
+I<--error-exitcode> argument for valgrind is used so that valgrind failures will
+cause the program to exit with a non-zero status.
+
+=item B<--vg-arg>=I<ARG>
+
+When I<--vg> is used, specify an additional argument to pass to valgrind itself.
+
+=item B<--time-tests>
+
+Track the wall time individual tests take to execute and includes the results in
+the summary output. This is useful for determining which tests in a test suite
+take the most time to execute. Note that this option is most useful with I<-j
+1>.
+
+=back
+
+=head1 SELECTION OPTIONS
+
+=over
+
+=item B<--max-tests>=I<N>
+
+Run at most I<N> tests and then terminate.
+
+=item B<--max-time>=I<N>
+
+Spend at most I<N> seconds (approximately) running tests and then terminate.
+
+=item B<--shuffle>
+
+Run the tests in a random order.
+
+=back
+
+=head1 ADDITIONAL OPTIONS
+
+=over
+
+=item B<--debug>
+
+Run B<lit> in debug mode, for debugging configuration issues and B<lit> itself.
+
+=item B<--show-suites>
+
+List the discovered test suites as part of the standard output.
+
+=item B<--no-tcl-as-sh>
+
+Run Tcl scripts internally (instead of converting to shell scripts).
+
+=back
+
+=head1 EXIT STATUS
+
+B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS
+results. Otherwise, it will exit with the status 0. Other exit codes used for
+non-test related failures (for example a user error or an internal program
+error).
+
+=head1 TEST DISCOVERY
+
+The inputs passed to B<lit> can be either individual tests, or entire
+directories or hierarchies of tests to run. When B<lit> starts up, the first
+thing it does is convert the inputs into a complete list of tests to run as part
+of I<test discovery>.
+
+In the B<lit> model, every test must exist inside some I<test suite>. B<lit>
+resolves the inputs specified on the command line to test suites by searching
+upwards from the input path until it finds a I<lit.cfg> or I<lit.site.cfg>
+file. These files serve as both a marker of test suites and as configuration
+files which B<lit> loads in order to understand how to find and run the tests
+inside the test suite.
+
+Once B<lit> has mapped the inputs into test suites it traverses the list of
+inputs adding tests for individual files and recursively searching for tests in
+directories.
+
+This behavior makes it easy to specify a subset of tests to run, while still
+allowing the test suite configuration to control exactly how tests are
+interpreted. In addition, B<lit> always identifies tests by the test suite they
+are in, and their relative path inside the test suite. For appropriately
+configured projects, this allows B<lit> to provide convenient and flexible
+support for out-of-tree builds.
+
+=head1 TEST STATUS RESULTS
+
+Each test ultimately produces one of the following six results:
+
+=over
+
+=item B<PASS>
+
+The test succeeded.
+
+=item B<XFAIL>
+
+The test failed, but that is expected. This is used for test formats which allow
+specifying that a test does not currently work, but wish to leave it in the test
+suite.
+
+=item B<XPASS>
+
+The test succeeded, but it was expected to fail. This is used for tests which
+were specified as expected to fail, but are now succeeding (generally because
+the feautre they test was broken and has been fixed).
+
+=item B<FAIL>
+
+The test failed.
+
+=item B<UNRESOLVED>
+
+The test result could not be determined. For example, this occurs when the test
+could not be run, the test itself is invalid, or the test was interrupted.
+
+=item B<UNSUPPORTED>
+
+The test is not supported in this environment. This is used by test formats
+which can report unsupported tests.
+
+=back
+
+Depending on the test format tests may produce additional information about
+their status (generally only for failures). See the L<Output|"LIT OUTPUT">
+section for more information.
+
+=head1 SEE ALSO
+
+L<valgrind(1)>
+
+=head1 AUTHOR
+
+Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod
index eba7859e2882..8adfb682be01 100644
--- a/docs/CommandGuide/llc.pod
+++ b/docs/CommandGuide/llc.pod
@@ -10,18 +10,19 @@ B<llc> [I<options>] [I<filename>]
 
 =head1 DESCRIPTION
 
-The B<llc> command compiles LLVM bitcode into assembly language for a
+The B<llc> command compiles LLVM source inputs into assembly language for a
 specified architecture.  The assembly language output can then be passed through
 a native assembler and linker to generate a native executable.
 
 The choice of architecture for the output assembly code is automatically
-determined from the input bitcode file, unless the B<-march> option is used to
-override the default.
+determined from the input file, unless the B<-march> option is used to override
+the default.
 
 =head1 OPTIONS
 
-If I<filename> is - or omitted, B<llc> reads LLVM bitcode from standard input.
-Otherwise, it will read LLVM bitcode from I<filename>.
+If I<filename> is - or omitted, B<llc> reads from standard input.  Otherwise, it
+will from I<filename>.  Inputs can be in either the LLVM assembly language
+format (.ll) or the LLVM bitcode format (.bc).
 
 If the B<-o> option is omitted, then B<llc> will send its output to standard
 output if the input is from standard input.  If the B<-o> option specifies -,
@@ -47,20 +48,15 @@ Generate code at different optimization levels. These correspond to the I<-O0>,
 I<-O1>, I<-O2>, I<-O3>, and I<-O4> optimization levels used by B<llvm-gcc> and
 B<clang>.
 
-=item B<-f>
-
-Overwrite output files. By default, B<llc> will refuse to overwrite
-an output file which already exists.
-
 =item B<-mtriple>=I<target triple>
 
-Override the target triple specified in the input bitcode file with the 
-specified string.
+Override the target triple specified in the input file with the specified
+string.
 
 =item B<-march>=I<arch>
 
 Specify the architecture for which to generate assembly, overriding the target
-encoded in the bitcode file.  See the output of B<llc --help> for a list of
+encoded in the input file.  See the output of B<llc --help> for a list of
 valid architectures.  By default this is inferred from the target triple or
 autodetected to the current architecture.
 
diff --git a/docs/CommandGuide/llvm-as.pod b/docs/CommandGuide/llvm-as.pod
index 2befed13ae00..045a9245b609 100644
--- a/docs/CommandGuide/llvm-as.pod
+++ b/docs/CommandGuide/llvm-as.pod
@@ -46,9 +46,9 @@ suffix is appended.
 
 =item B<-f>
 
-Force overwrite.  Normally, B<llvm-as> will refuse to overwrite an
-output file that already exists.  With this option, B<llvm-as>
-will overwrite the output file and replace it with new bitcode.
+Enable binary output on terminals.  Normally, B<llvm-as> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-as> will write raw bitcode regardless of the output device.
 
 =item B<--help>
 
diff --git a/docs/CommandGuide/llvm-dis.pod b/docs/CommandGuide/llvm-dis.pod
index 8df382d2e9c0..2b83290c9b2e 100644
--- a/docs/CommandGuide/llvm-dis.pod
+++ b/docs/CommandGuide/llvm-dis.pod
@@ -29,9 +29,9 @@ B<-o> option.
 
 =item B<-f>
 
-Force overwrite.  Normally, B<llvm-dis> will refuse to overwrite
-an output file that already exists.  With this option, B<llvm-dis>
-will overwrite the output file.
+Enable binary output on terminals.  Normally, B<llvm-dis> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-dis> will write raw bitcode regardless of the output device.
 
 =item B<--help>
 
diff --git a/docs/CommandGuide/llvm-extract.pod b/docs/CommandGuide/llvm-extract.pod
index d916612ec5c6..b62e8ae312bf 100644
--- a/docs/CommandGuide/llvm-extract.pod
+++ b/docs/CommandGuide/llvm-extract.pod
@@ -28,9 +28,9 @@ unless the B<-o> option is specified (see below).
 
 =item B<-f>
 
-Force overwrite.  Normally, B<llvm-extract> will refuse to overwrite an
-output file that already exists.  With this option, B<llvm-extract>
-will overwrite the output file and replace it with new bitcode.
+Enable binary output on terminals.  Normally, B<llvm-extract> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-extract> will write raw bitcode regardless of the output device.
 
 =item B<--func> I<function-name>
 
@@ -45,6 +45,10 @@ Print a summary of command line options.
 Specify the output filename.  If filename is "-" (the default), then
 B<llvm-extract> sends its output to standard output.
 
+=item B<-S>
+
+Write output in LLVM intermediate language (instead of bitcode).
+
 =back
 
 =head1 EXIT STATUS
diff --git a/docs/CommandGuide/llvm-ld.pod b/docs/CommandGuide/llvm-ld.pod
index 224939c77c94..536ab0fa43d5 100644
--- a/docs/CommandGuide/llvm-ld.pod
+++ b/docs/CommandGuide/llvm-ld.pod
@@ -104,6 +104,12 @@ should be generated by the linker. By default, B<llvm-ld> generates a file named
 F<a.out> for compatibility with B<ld>. The output will be written to
 F<filename>.
 
+=item B<-b> F<filename>
+
+This option can be used to override the output bitcode file name. By default, 
+the name of the bitcode output file is one more ".bc" suffix added to the name 
+specified by B<-o filename> option.
+
 =item B<-l>F<name>
 
 This option specifies the F<name> of a library to search when resolving symbols
diff --git a/docs/CommandGuide/llvm-link.pod b/docs/CommandGuide/llvm-link.pod
index 5f4dcb6e354d..e1a1267c52ec 100644
--- a/docs/CommandGuide/llvm-link.pod
+++ b/docs/CommandGuide/llvm-link.pod
@@ -33,14 +33,19 @@ the order in which they were specified on the command line.
 
 =item B<-f>
 
-Overwrite output files.  By default, B<llvm-link> will not overwrite an output
-file if it already exists.
+Enable binary output on terminals.  Normally, B<llvm-link> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-link> will write raw bitcode regardless of the output device.
 
 =item B<-o> F<filename>
 
 Specify the output file name.  If F<filename> is C<->, then B<llvm-link> will
 write its output to standard output.
 
+=item B<-S>
+
+Write output in LLVM intermediate language (instead of bitcode).
+
 =item B<-d>
 
 If specified, B<llvm-link> prints a human-readable version of the output
diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod
index 97445edf4127..e3031e123d38 100644
--- a/docs/CommandGuide/llvmc.pod
+++ b/docs/CommandGuide/llvmc.pod
@@ -71,6 +71,12 @@ write files into the directory specified with the I<-o> option. The
 I<--save-temps=cwd> and I<--save-temps> switches are both synonyms for the
 default behaviour.
 
+=item B<--temp-dir> I<directory>
+
+Store temporary files in the given directory. This directory is deleted on exit
+unless I<--save-temps> is specified. If I<--save-temps=obj> is also specified,
+I<--temp-dir> is given the precedence.
+
 =item B<--help>
 
 Print a summary of command-line options and exit.
diff --git a/docs/CommandGuide/opt.pod b/docs/CommandGuide/opt.pod
index 75b7eddd4cf1..d1d1db5ef67e 100644
--- a/docs/CommandGuide/opt.pod
+++ b/docs/CommandGuide/opt.pod
@@ -11,24 +11,25 @@ B<opt> [I<options>] [I<filename>]
 =head1 DESCRIPTION
 
 The B<opt> command is the modular LLVM optimizer and analyzer.  It takes LLVM 
-bitcode as input, runs the specified optimizations or analyses on it, and then
-outputs the optimized LLVM bitcode or the analysis results.  The function of 
+source files as input, runs the specified optimizations or analyses on it, and then
+outputs the optimized file or the analysis results.  The function of 
 B<opt> depends on whether the B<-analyze> option is given. 
 
-When B<-analyze> is specified, B<opt> performs various analyses of LLVM 
-bitcode.  It will usually print the results on standard output, but in a few 
-cases, it will print output to standard error or generate a file with the 
-analysis output, which is usually done when the output is meant for another 
-program.  
+When B<-analyze> is specified, B<opt> performs various analyses of the input
+source.  It will usually print the results on standard output, but in a few
+cases, it will print output to standard error or generate a file with the
+analysis output, which is usually done when the output is meant for another
+program.
 
 While B<-analyze> is I<not> given, B<opt> attempts to produce an optimized 
-bitcode file.  The optimizations available via B<opt> depend upon what 
+output file.  The optimizations available via B<opt> depend upon what 
 libraries were linked into it as well as any additional libraries that have 
 been loaded with the B<-load> option.  Use the B<-help> option to determine 
 what optimizations you can use.
 
 If I<filename> is omitted from the command line or is I<->, B<opt> reads its
-input from standard input. The input must be an LLVM bitcode file.
+input from standard input. Inputs can be in either the LLVM assembly language
+format (.ll) or the LLVM bitcode format (.bc).
 
 If an output filename is not specified with the B<-o> option, B<opt>
 writes its output to the standard output.
@@ -39,9 +40,9 @@ writes its output to the standard output.
 
 =item B<-f>
 
-Force overwrite.  Normally, B<opt> will refuse to overwrite an
-output file that already exists.  With this option, B<opt> will
-overwrite the output file and replace it with new bitcode.
+Enable binary output on terminals.  Normally, B<opt> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<opt> will write raw bitcode regardless of the output device.
 
 =item B<-help>
 
@@ -51,6 +52,10 @@ Print a summary of command line options.
 
 Specify the output filename.
 
+=item B<-S>
+
+Write output in LLVM intermediate language (instead of bitcode).
+
 =item B<-{passname}>
 
 B<opt> provides the ability to run any of LLVM's optimization or analysis passes
diff --git a/docs/CommandLine.html b/docs/CommandLine.html
index d6cf48ce5111..7e6e2f2e13d3 100644
--- a/docs/CommandLine.html
+++ b/docs/CommandLine.html
@@ -331,13 +331,13 @@ OPTIONS:
 <div class="doc_text">
 
 <p>In addition to input and output filenames, we would like the compiler example
-to support three boolean flags: "<tt>-f</tt>" to force overwriting of the output
-file, "<tt>--quiet</tt>" to enable quiet mode, and "<tt>-q</tt>" for backwards
-compatibility with some of our users.  We can support these by declaring options
-of boolean type like this:</p>
+to support three boolean flags: "<tt>-f</tt>" to force writing binary output to
+a terminal, "<tt>--quiet</tt>" to enable quiet mode, and "<tt>-q</tt>" for
+backwards compatibility with some of our users.  We can support these by
+declaring options of boolean type like this:</p>
 
 <div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Overwrite output files</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable binary output on terminals</i>"));
 <a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
 <a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet2("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"), <a href="#cl::Hidden">cl::Hidden</a>);
 </pre></div>
@@ -378,7 +378,7 @@ library calls to parse the string value into the specified data type.</p>
 USAGE: compiler [options] &lt;input file&gt;
 
 OPTIONS:
-  <b>-f     - Overwrite output files</b>
+  <b>-f     - Enable binary output on terminals</b>
   -o     - Override output filename
   <b>-quiet - Don't print informational messages</b>
   -help  - display available options (--help-hidden for more)
@@ -390,7 +390,7 @@ OPTIONS:
 USAGE: compiler [options] &lt;input file&gt;
 
 OPTIONS:
-  -f     - Overwrite output files
+  -f     - Enable binary output on terminals
   -o     - Override output filename
   <b>-q     - Don't print informational messages</b>
   -quiet - Don't print informational messages
@@ -530,7 +530,7 @@ OPTIONS:
     -O1         - Enable trivial optimizations
     -O2         - Enable default optimizations
     -O3         - Enable expensive optimizations</b>
-  -f            - Overwrite output files
+  -f            - Enable binary output on terminals
   -help         - display available options (--help-hidden for more)
   -o &lt;filename&gt; - Specify output filename
   -quiet        - Don't print informational messages
@@ -614,7 +614,7 @@ OPTIONS:
     =none       - disable debug information
     =quick      - enable quick debug information
     =detailed   - enable detailed debug information</b>
-  -f            - Overwrite output files
+  -f            - Enable binary output on terminals
   -help         - display available options (--help-hidden for more)
   -o &lt;filename&gt; - Specify output filename
   -quiet        - Don't print informational messages
@@ -1022,7 +1022,7 @@ files that use them.  This is called the internal storage model.</p>
 code from the storage of the value parsed.  For example, lets say that we have a
 '<tt>-debug</tt>' option that we would like to use to enable debug information
 across the entire body of our program.  In this case, the boolean value
-controlling the debug code should be globally accessable (in a header file, for
+controlling the debug code should be globally accessible (in a header file, for
 example) yet the command line option processing code should not be exposed to
 all of these clients (requiring lots of .cpp files to #include
 <tt>CommandLine.h</tt>).</p>
@@ -1107,7 +1107,7 @@ a command line option.  Look <a href="#value_desc_example">here</a> for an
 example.</li>
 
 <li><a name="cl::init">The <b><tt>cl::init</tt></b></a> attribute specifies an
-inital value for a <a href="#cl::opt">scalar</a> option.  If this attribute is
+initial value for a <a href="#cl::opt">scalar</a> option.  If this attribute is
 not specified then the command line option value defaults to the value created
 by the default constructor for the type. <b>Warning</b>: If you specify both
 <b><tt>cl::init</tt></b> and <b><tt>cl::location</tt></b> for an option,
@@ -1178,7 +1178,7 @@ href="#cl::list">cl::list</a></tt>.  These modifiers give you the ability to
 tweak how options are parsed and how <tt>--help</tt> output is generated to fit
 your application well.</p>
 
-<p>These options fall into five main catagories:</p>
+<p>These options fall into five main categories:</p>
 
 <ol>
 <li><a href="#hiding">Hiding an option from <tt>--help</tt> output</a></li>
@@ -1190,9 +1190,9 @@ your application well.</p>
 <li><a href="#misc">Miscellaneous option modifiers</a></li>
 </ol>
 
-<p>It is not possible to specify two options from the same catagory (you'll get
+<p>It is not possible to specify two options from the same category (you'll get
 a runtime error) to a single option, except for options in the miscellaneous
-catagory.  The CommandLine library specifies defaults for all of these settings
+category.  The CommandLine library specifies defaults for all of these settings
 that are the most useful in practice and the most common, which mean that you
 usually shouldn't have to worry about these.</p>
 
@@ -1441,9 +1441,9 @@ string "<tt>-pos1 -foo -bar baz -pos2 -bork</tt>" would cause the "<tt>-foo -bar
 
 <li><a name="cl::Sink">The <b><tt>cl::Sink</tt></b></a> modifier is
 used to handle unknown options. If there is at least one option with
-<b><tt>cl::Sink</tt></b></a> modifier specified, the parser passes
+<tt>cl::Sink</tt> modifier specified, the parser passes
 unrecognized option strings to it as values instead of signaling an
-error. As with <b><tt>cl::CommaSeparated</tt></b></a>, this modifier
+error. As with <tt>cl::CommaSeparated</tt>, this modifier
 only makes sense with a <a href="#cl::list">cl::list</a> option.</li>
 
 </ul>
@@ -1536,7 +1536,7 @@ not be available, it can't just look in <tt>argv[0]</tt>), the name of the
 environment variable to examine, the optional
 <a href="#description">additional extra text</a> to emit when the
 <tt>--help</tt> option is invoked, and the boolean
-switch that controls whether <a href="#response">reponse files</a>
+switch that controls whether <a href="#response">response files</a>
 should be read.</p>
 
 <p><tt>cl::ParseEnvironmentOptions</tt> will break the environment
@@ -1883,7 +1883,7 @@ our example, we implement <tt>parse</tt> as:</p>
 
     default:
       <i>// Print an error message if unrecognized character!</i>
-      <b>return</b> O.error(": '" + Arg + "' value invalid for file size argument!");
+      <b>return</b> O.error("'" + Arg + "' value invalid for file size argument!");
     }
   }
 }
@@ -1972,7 +1972,7 @@ tutorial.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-17 03:09:39 +0000 (Wed, 17 Jun 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index 6b531c4e47f0..7a40a4d83a55 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -114,6 +114,10 @@ delete them on exit. This option can also take an argument: the
 <tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> switch will write files into the directory specified with
 the <tt class="docutils literal"><span class="pre">-o</span></tt> option. The <tt class="docutils literal"><span class="pre">--save-temps=cwd</span></tt> and <tt class="docutils literal"><span class="pre">--save-temps</span></tt> switches are
 both synonyms for the default behaviour.</li>
+<li><tt class="docutils literal"><span class="pre">--temp-dir</span> <span class="pre">DIRECTORY</span></tt> - Store temporary files in the given directory. This
+directory is deleted on exit unless <tt class="docutils literal"><span class="pre">--save-temps</span></tt> is specified. If
+<tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> is also specified, <tt class="docutils literal"><span class="pre">--temp-dir</span></tt> is given the
+precedence.</li>
 <li><tt class="docutils literal"><span class="pre">--check-graph</span></tt> - Check the compilation for common errors like mismatched
 output/input language names, multiple default edges and cycles. Because of
 plugins, these checks can't be performed at compile-time. Exit with code zero
@@ -303,13 +307,13 @@ separate option groups syntactically.</p>
 <tt class="docutils literal"><span class="pre">-std=c99</span></tt>. It is also allowed to use spaces instead of the equality
 sign: <tt class="docutils literal"><span class="pre">-std</span> <span class="pre">c99</span></tt>. At most one occurrence is allowed.</li>
 <li><tt class="docutils literal"><span class="pre">parameter_list_option</span></tt> - same as the above, but more than one option
-occurence is allowed.</li>
+occurrence is allowed.</li>
 <li><tt class="docutils literal"><span class="pre">prefix_option</span></tt> - same as the parameter_option, but the option name and
 argument do not have to be separated. Example: <tt class="docutils literal"><span class="pre">-ofile</span></tt>. This can be also
 specified as <tt class="docutils literal"><span class="pre">-o</span> <span class="pre">file</span></tt>; however, <tt class="docutils literal"><span class="pre">-o=file</span></tt> will be parsed incorrectly
 (<tt class="docutils literal"><span class="pre">=file</span></tt> will be interpreted as option value). At most one occurrence is
 allowed.</li>
-<li><tt class="docutils literal"><span class="pre">prefix_list_option</span></tt> - same as the above, but more than one occurence of
+<li><tt class="docutils literal"><span class="pre">prefix_list_option</span></tt> - same as the above, but more than one occurrence of
 the option is allowed; example: <tt class="docutils literal"><span class="pre">-lm</span> <span class="pre">-lpthread</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">alias_option</span></tt> - a special option type for creating aliases. Unlike other
 option types, aliases are not allowed to have any properties besides the
@@ -341,6 +345,11 @@ output.</li>
 special cases). Usage example: <tt class="docutils literal"><span class="pre">(parameter_list_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(multi_val</span>
 <span class="pre">3))</span></tt>. Only list options can have this attribute; you can, however, use
 the <tt class="docutils literal"><span class="pre">one_or_more</span></tt> and <tt class="docutils literal"><span class="pre">zero_or_one</span></tt> properties.</li>
+<li><tt class="docutils literal"><span class="pre">init</span></tt> - this option has a default value, either a string (if it is a
+parameter), or a boolean (if it is a switch; boolean constants are called
+<tt class="docutils literal"><span class="pre">true</span></tt> and <tt class="docutils literal"><span class="pre">false</span></tt>). List options can't have this attribute. Usage
+examples: <tt class="docutils literal"><span class="pre">(switch_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(init</span> <span class="pre">true))</span></tt>; <tt class="docutils literal"><span class="pre">(prefix_option</span> <span class="pre">&quot;bar&quot;,</span>
+<span class="pre">(init</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">extern</span></tt> - this option is defined in some other plugin, see below.</li>
 </ul>
 </blockquote>
@@ -358,7 +367,8 @@ for. Example:</p>
 (switch_option &quot;E&quot;, (extern))
 ...
 </pre>
-<p>See also the section on plugin <a class="reference internal" href="#priorities">priorities</a>.</p>
+<p>If an external option has additional attributes besides 'extern', they are
+ignored. See also the section on plugin <a class="reference internal" href="#priorities">priorities</a>.</p>
 </div>
 </div>
 <div class="section" id="conditional-evaluation">
@@ -428,15 +438,21 @@ user.
 Example: <tt class="docutils literal"><span class="pre">(not_empty</span> <span class="pre">&quot;o&quot;)</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">empty</span></tt> - The opposite of <tt class="docutils literal"><span class="pre">not_empty</span></tt>. Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(not_empty</span>
 <span class="pre">X))</span></tt>. Provided for convenience.</li>
+<li><tt class="docutils literal"><span class="pre">single_input_file</span></tt> - Returns true if there was only one input file
+provided on the command-line. Used without arguments:
+<tt class="docutils literal"><span class="pre">(single_input_file)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">multiple_input_files</span></tt> - Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(single_input_file))</span></tt> (the
+case of zero input files is considered an error).</li>
 <li><tt class="docutils literal"><span class="pre">default</span></tt> - Always evaluates to true. Should always be the last
 test in the <tt class="docutils literal"><span class="pre">case</span></tt> expression.</li>
-<li><tt class="docutils literal"><span class="pre">and</span></tt> - A standard logical combinator that returns true iff all
-of its arguments return true. Used like this: <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(test1),</span>
-<span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN))</span></tt>. Nesting of <tt class="docutils literal"><span class="pre">and</span></tt> and <tt class="docutils literal"><span class="pre">or</span></tt> is allowed,
-but not encouraged.</li>
-<li><tt class="docutils literal"><span class="pre">or</span></tt> - Another logical combinator that returns true only if any
-one of its arguments returns true. Example: <tt class="docutils literal"><span class="pre">(or</span> <span class="pre">(test1),</span>
-<span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">and</span></tt> - A standard binary logical combinator that returns true iff all of
+its arguments return true. Used like this: <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span>
+<span class="pre">...</span> <span class="pre">(testN))</span></tt>. Nesting of <tt class="docutils literal"><span class="pre">and</span></tt> and <tt class="docutils literal"><span class="pre">or</span></tt> is allowed, but not
+encouraged.</li>
+<li><tt class="docutils literal"><span class="pre">or</span></tt> - A binary logical combinator that returns true iff any of its
+arguments returns true. Example: <tt class="docutils literal"><span class="pre">(or</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">not</span></tt> - Standard unary logical combinator that negates its
+argument. Example: <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(or</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN)))</span></tt>.</li>
 </ul>
 </li>
 </ul>
@@ -666,7 +682,7 @@ the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behav
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2009-06-30 02:16:43 +0200 (Tue, 30 Jun 2009) $
+Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address></div>
 </div>
 </div>
diff --git a/docs/DebuggingJITedCode.html b/docs/DebuggingJITedCode.html
new file mode 100644
index 000000000000..92570f454c92
--- /dev/null
+++ b/docs/DebuggingJITedCode.html
@@ -0,0 +1,171 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Debugging JITed Code With GDB</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Debugging JITed Code With GDB</div>
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#quickstart">Quickstart</a></li>
+  <li><a href="#example">Example with clang and lli</a></li>
+</ol>
+<div class="doc_author">Written by Reid Kleckner</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>Without special runtime support, debugging dynamically generated code with
+GDB (as well as most debuggers) can be quite painful.  Debuggers generally read
+debug information from the object file of the code, but for JITed code, there is
+no such file to look for.
+</p>
+
+<p>Depending on the architecture, this can impact the debugging experience in
+different ways.  For example, on most 32-bit x86 architectures, you can simply
+compile with -fno-omit-framepointer for GCC and -fdisable-fp-elim for LLVM.
+When GDB creates a backtrace, it can properly unwind the stack, but the stack
+frames owned by JITed code have ??'s instead of the appropriate symbol name.
+However, on Linux x86_64 in particular, GDB relies on the DWARF CFA debug
+information to unwind the stack, so even if you compile your program to leave
+the frame pointer untouched, GDB will usually be unable to unwind the stack past
+any JITed code stack frames.
+</p>
+
+<p>In order to communicate the necessary debug info to GDB, an interface for
+registering JITed code with debuggers has been designed and implemented for
+GDB and LLVM.  At a high level, whenever LLVM generates new machine code, it
+also generates an object file in memory containing the debug information.  LLVM
+then adds the object file to the global list of object files and calls a special
+function (__jit_debug_register_code) marked noinline that GDB knows about.  When
+GDB attaches to a process, it puts a breakpoint in this function and loads all
+of the object files in the global list.  When LLVM calls the registration
+function, GDB catches the breakpoint signal, loads the new object file from
+LLVM's memory, and resumes the execution.  In this way, GDB can get the
+necessary debug information.
+</p>
+
+<p>At the time of this writing, LLVM only supports architectures that use ELF
+object files and it only generates symbols and DWARF CFA information.  However,
+it would be easy to add more information to the object file, so we don't need to
+coordinate with GDB to get better debug information.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="quickstart">Quickstart</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>In order to debug code JITed by LLVM, you need to install a recent version
+of GDB.  The interface was added on 2009-08-19, so you need a snapshot of GDB
+more recent than that.  Either download a snapshot of GDB or checkout CVS as
+instructed <a href="http://www.gnu.org/software/gdb/current/">here</a>.  Here
+are the commands for doing a checkout and building the code:
+</p>
+
+<pre class="doc_code">
+$ cvs -z 3 -d :pserver:anoncvs@sourceware.org:/cvs/src co gdb
+$ mv src gdb   # You probably don't want this checkout called "src".
+$ cd gdb
+$ ./configure --prefix="$GDB_INSTALL"
+$ make
+$ make install
+</pre>
+
+<p>You can then use -jit-emit-debug in the LLVM command line arguments to enable
+the interface.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="example">Example with clang and lli</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>For example, consider debugging running lli on the following C code in
+foo.c:
+</p>
+
+<pre class="doc_code">
+#include &lt;stdio.h&gt;
+
+void foo() {
+    printf("%d\n", *(int*)NULL);  // Crash here
+}
+
+void bar() {
+    foo();
+}
+
+void baz() {
+    bar();
+}
+
+int main(int argc, char **argv) {
+    baz();
+}
+</pre>
+
+<p>Here are the commands to run that application under GDB and print the stack
+trace at the crash:
+</p>
+
+<pre class="doc_code">
+# Compile foo.c to bitcode.  You can use either clang or llvm-gcc with this
+# command line.  Both require -fexceptions, or the calls are all marked
+# 'nounwind' which disables DWARF CFA info.
+$ clang foo.c -fexceptions -emit-llvm -c -o foo.bc
+
+# Run foo.bc under lli with -jit-emit-debug.  If you built lli in debug mode,
+# -jit-emit-debug defaults to true.
+$ $GDB_INSTALL/gdb --args lli -jit-emit-debug foo.bc
+...
+
+# Run the code.
+(gdb) run
+Starting program: /tmp/gdb/lli -jit-emit-debug foo.bc
+[Thread debugging using libthread_db enabled]
+
+Program received signal SIGSEGV, Segmentation fault.
+0x00007ffff7f55164 in foo ()
+
+# Print the backtrace, this time with symbols instead of ??.
+(gdb) bt
+#0  0x00007ffff7f55164 in foo ()
+#1  0x00007ffff7f550f9 in bar ()
+#2  0x00007ffff7f55099 in baz ()
+#3  0x00007ffff7f5502a in main ()
+#4  0x00000000007c0225 in llvm::JIT::runFunction(llvm::Function*,
+    std::vector&lt;llvm::GenericValue,
+    std::allocator&lt;llvm::GenericValue&gt; &gt; const&) ()
+#5  0x00000000007d6d98 in
+    llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*,
+    std::vector&lt;std::string,
+    std::allocator&lt;std::string&gt; &gt; const&, char const* const*) ()
+#6  0x00000000004dab76 in main ()
+</pre>
+</div>
+
+<p>As you can see, GDB can correctly unwind the stack and has the appropriate
+function names.
+</p>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="mailto:reid.kleckner@gmail.com">Reid Kleckner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2009-01-01 23:10:51 -0800 (Thu, 01 Jan 2009) $
+</address>
+</body>
+</html>
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index 13a908e5a9d3..49866061c9f6 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -99,7 +99,9 @@
 
 <ol>
   <li>Make your patch against the Subversion trunk, not a branch, and not an old
-      version of LLVM.  This makes it easy to apply the patch.</li>
+      version of LLVM.  This makes it easy to apply the patch.  For information
+      on how to check out SVN trunk, please see the <a
+      href="GettingStarted.html#checkout">Getting Started Guide</a>.</li>
         
   <li>Similarly, patches should be submitted soon after they are generated.  Old
       patches may not apply correctly if the underlying code changes between the
@@ -185,14 +187,18 @@ svn diff
    else.  The current code owners are:</p>
   
 <ol>
+  <li><b>Evan Cheng</b>: Code generator and all targets.</li>
+
+  <li><b>Doug Gregor</b>: Clang Basic, Lex, Parse, and Sema Libraries.</li>
+
   <li><b>Anton Korobeynikov</b>: Exception handling, debug information, and
       Windows codegen.</li>
 
-  <li><b>Duncan Sands</b>: llvm-gcc 4.2.</li>
-
-  <li><b>Evan Cheng</b>: Code generator and all targets.</li>
+  <li><b>Ted Kremenek</b>: Clang Static Analyzer.</li>
 
-  <li><b>Chris Lattner</b>: Everything else.</li>
+  <li><b>Chris Lattner</b>: Everything not covered by someone else.</li>
+  
+  <li><b>Duncan Sands</b>: llvm-gcc 4.2.</li>
 </ol>
   
 <p>Note that code ownership is completely different than reviewers: anyone can
@@ -287,9 +293,12 @@ svn diff
 </ul>
   
 <p>We prefer for this to be handled before submission but understand that it
-   isn't possible to test all of this for every submission.  Our nightly testing
-   infrastructure normally finds these problems.  A good rule of thumb is to
-   check the nightly testers for regressions the day after your change.</p>
+   isn't possible to test all of this for every submission.  Our build bots and
+   nightly testing infrastructure normally finds these problems.  A good rule of
+   thumb is to check the nightly testers for regressions the day after your
+   change.  Build bots will directly email you if a group of commits that
+   included yours caused a failure.  You are expected to check the build bot
+   messages to see if they are your fault and, if so, fix the breakage.</p>
 
 <p>Commits that violate these quality standards (e.g. are very broken) may be
    reverted. This is necessary when the change blocks other developers from
@@ -592,7 +601,7 @@ Changes</a></div>
   Written by the 
   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-15 04:18:54 +0000 (Mon, 15 Jun 2009) $
+  Last modified: $Date: 2009-10-10 23:37:16 +0200 (Sat, 10 Oct 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
index a03568f7e367..0ca702f477bf 100644
--- a/docs/ExceptionHandling.html
+++ b/docs/ExceptionHandling.html
@@ -3,8 +3,12 @@
 <html>
 <head>
   <title>Exception Handling in LLVM</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="description"
+        content="Exception Handling in LLVM.">
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
+
 <body>
 
 <div class="doc_title">Exception Handling in LLVM</div>
@@ -16,6 +20,7 @@
   <li><a href="#introduction">Introduction</a>
   <ol>
     <li><a href="#itanium">Itanium ABI Zero-cost Exception Handling</a></li>
+    <li><a href="#sjlj">Setjmp/Longjmp Exception Handling</a></li>
     <li><a href="#overview">Overview</a></li>
   </ol></li>
   <li><a href="#codegen">LLVM Code Generation</a>
@@ -33,6 +38,7 @@
   	<li><a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_lsda"><tt>llvm.eh.sjlj.lsda</tt></a></li>
   </ol></li>
   <li><a href="#asm">Asm Table Formats</a>
   <ol>
@@ -50,17 +56,17 @@
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="introduction">Introduction</a></div> 
+<div class="doc_section"><a name="introduction">Introduction</a></div>
 <!-- *********************************************************************** -->
 
 <div class="doc_text">
 
 <p>This document is the central repository for all information pertaining to
-exception handling in LLVM.  It describes the format that LLVM exception
-handling information takes, which is useful for those interested in creating
-front-ends or dealing directly with the information.  Further, this document
-provides specific examples of what exception handling information is used for
-C/C++.</p>
+   exception handling in LLVM.  It describes the format that LLVM exception
+   handling information takes, which is useful for those interested in creating
+   front-ends or dealing directly with the information.  Further, this document
+   provides specific examples of what exception handling information is used for
+   in C/C++.</p>
 
 </div>
 
@@ -72,72 +78,108 @@ C/C++.</p>
 <div class="doc_text">
 
 <p>Exception handling for most programming languages is designed to recover from
-conditions that rarely occur during general use of an application.  To that end,
-exception handling should not interfere with the main flow of an
-application's algorithm by performing checkpointing tasks such as saving
-the current pc or register state.</p>
+   conditions that rarely occur during general use of an application.  To that
+   end, exception handling should not interfere with the main flow of an
+   application's algorithm by performing checkpointing tasks, such as saving the
+   current pc or register state.</p>
 
 <p>The Itanium ABI Exception Handling Specification defines a methodology for
-providing outlying data in the form of exception tables without inlining
-speculative exception handling code in the flow of an application's main
-algorithm.  Thus, the specification is said to add "zero-cost" to the normal
-execution of an application.</p>
+   providing outlying data in the form of exception tables without inlining
+   speculative exception handling code in the flow of an application's main
+   algorithm.  Thus, the specification is said to add "zero-cost" to the normal
+   execution of an application.</p>
 
 <p>A more complete description of the Itanium ABI exception handling runtime
-support of can be found at <a
-href="http://www.codesourcery.com/cxx-abi/abi-eh.html">Itanium C++ ABI:
-Exception Handling.</a> A description of the exception frame format can be found
-at <a href="http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-
-Core-generic/ehframechpt.html">Exception Frames</a>, with details of the Dwarf
-specification at <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">Dwarf 3
-Standard.</a> A description for the C++ exception table formats can be found at
-<a href="http://www.codesourcery.com/cxx-abi/exceptions.pdf">Exception Handling
-Tables.</a></p>
+   support of can be found at
+   <a href="http://www.codesourcery.com/cxx-abi/abi-eh.html">Itanium C++ ABI:
+   Exception Handling</a>. A description of the exception frame format can be
+   found at
+   <a href="http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html">Exception
+   Frames</a>, with details of the DWARF 3 specification at
+   <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3 Standard</a>.
+   A description for the C++ exception table formats can be found at
+   <a href="http://www.codesourcery.com/cxx-abi/exceptions.pdf">Exception Handling
+   Tables</a>.</p>
 
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
+  <a name="sjlj">Setjmp/Longjmp Exception Handling</a>
+</div>
+
+<div class="doc_text">
+
+<p>Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics
+   <a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a> and
+   <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a> to
+   handle control flow for exception handling.</p>
+
+<p>For each function which does exception processing, be it try/catch blocks
+   or cleanups, that function registers itself on a global frame list. When
+   exceptions are being unwound, the runtime uses this list to identify which
+   functions need processing.<p>
+
+<p>Landing pad selection is encoded in the call site entry of the function
+   context. The runtime returns to the function via
+   <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a>, where
+   a switch table transfers control to the appropriate landing pad based on
+   the index stored in the function context.</p>
+
+<p>In contrast to DWARF exception handling, which encodes exception regions
+   and frame information in out-of-line tables, SJLJ exception handling
+   builds and removes the unwind frame context at runtime. This results in
+   faster exception handling at the expense of slower execution when no
+   exceptions are thrown. As exceptions are, by their nature, intended for
+   uncommon code paths, DWARF exception handling is generally preferred to
+   SJLJ.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
   <a name="overview">Overview</a>
 </div>
 
 <div class="doc_text">
 
-<p>When an exception is thrown in llvm code, the runtime does a best effort to
-find a handler suited to process the circumstance.</p>
+<p>When an exception is thrown in LLVM code, the runtime does its best to find a
+   handler suited to processing the circumstance.</p>
 
 <p>The runtime first attempts to find an <i>exception frame</i> corresponding to
-the function where the exception was thrown.  If the programming language (ex.
-C++) supports exception handling, the exception frame contains a reference to an
-exception table describing how to process the exception.  If the language (ex.
-C) does not support exception handling or if the exception needs to be forwarded
-to a prior activation, the exception frame contains information about how to
-unwind the current activation and restore the state of the prior activation.
-This process is repeated until the exception is handled.  If the exception is
-not handled and no activations remain, then the application is terminated with
-an appropriate error message.</p>
-
-<p>Since different programming languages have different behaviors when handling
-exceptions, the exception handling ABI provides a mechanism for supplying
-<i>personalities.</i> An exception handling personality is defined by way of a
-<i>personality function</i> (ex. for C++ <tt>__gxx_personality_v0</tt>) which
-receives the context of the exception, an <i>exception structure</i> containing
-the exception object type and value, and a reference to the exception table for
-the current function.  The personality function for the current compile unit is
-specified in a <i>common exception frame</i>.</p>
+   the function where the exception was thrown.  If the programming language
+   (e.g. C++) supports exception handling, the exception frame contains a
+   reference to an exception table describing how to process the exception.  If
+   the language (e.g. C) does not support exception handling, or if the
+   exception needs to be forwarded to a prior activation, the exception frame
+   contains information about how to unwind the current activation and restore
+   the state of the prior activation.  This process is repeated until the
+   exception is handled.  If the exception is not handled and no activations
+   remain, then the application is terminated with an appropriate error
+   message.</p>
+
+<p>Because different programming languages have different behaviors when
+   handling exceptions, the exception handling ABI provides a mechanism for
+   supplying <i>personalities.</i> An exception handling personality is defined
+   by way of a <i>personality function</i> (e.g. <tt>__gxx_personality_v0</tt>
+   in C++), which receives the context of the exception, an <i>exception
+   structure</i> containing the exception object type and value, and a reference
+   to the exception table for the current function.  The personality function
+   for the current compile unit is specified in a <i>common exception
+   frame</i>.</p>
 
 <p>The organization of an exception table is language dependent.  For C++, an
-exception table is organized as a series of code ranges defining what to do if
-an exception occurs in that range.  Typically, the information associated with a
-range defines which types of exception objects (using C++ <i>type info</i>) that
-are handled in that range, and an associated action that should take place.
-Actions typically pass control to a <i>landing pad</i>.</p>
+   exception table is organized as a series of code ranges defining what to do
+   if an exception occurs in that range.  Typically, the information associated
+   with a range defines which types of exception objects (using C++ <i>type
+   info</i>) that are handled in that range, and an associated action that
+   should take place.  Actions typically pass control to a <i>landing
+   pad</i>.</p>
 
-<p>A landing pad corresponds to the code found in the catch portion of a
-try/catch sequence.  When execution resumes at a landing pad, it receives the
-exception structure and a selector corresponding to the <i>type</i> of exception
-thrown.  The selector is then used to determine which catch should actually
-process the exception.</p>
+<p>A landing pad corresponds to the code found in the <i>catch</i> portion of
+   a <i>try</i>/<i>catch</i> sequence.  When execution resumes at a landing
+   pad, it receives the exception structure and a selector corresponding to
+   the <i>type</i> of exception thrown.  The selector is then used to determine
+   which <i>catch</i> should actually process the exception.</p>
 
 </div>
 
@@ -149,12 +191,12 @@ process the exception.</p>
 <div class="doc_text">
 
 <p>At the time of this writing, only C++ exception handling support is available
-in LLVM.  So the remainder of this document will be somewhat C++-centric.</p>
+   in LLVM.  So the remainder of this document will be somewhat C++-centric.</p>
 
 <p>From the C++ developers perspective, exceptions are defined in terms of the
-<tt>throw</tt> and <tt>try/catch</tt> statements.  In this section we will
-describe the implementation of llvm exception handling in terms of C++
-examples.</p>
+   <tt>throw</tt> and <tt>try</tt>/<tt>catch</tt> statements.  In this section
+   we will describe the implementation of LLVM exception handling in terms of
+   C++ examples.</p>
 
 </div>
 
@@ -166,17 +208,17 @@ examples.</p>
 <div class="doc_text">
 
 <p>Languages that support exception handling typically provide a <tt>throw</tt>
-operation to initiate the exception process.  Internally, a throw operation
-breaks down into two steps.  First, a request is made to allocate exception
-space for an exception structure.  This structure needs to survive beyond the
-current activation.  This structure will contain the type and value of the
-object being thrown.  Second, a call is made to the runtime to raise the
-exception, passing the exception structure as an argument.</p>
+   operation to initiate the exception process.  Internally, a throw operation
+   breaks down into two steps.  First, a request is made to allocate exception
+   space for an exception structure.  This structure needs to survive beyond the
+   current activation.  This structure will contain the type and value of the
+   object being thrown.  Second, a call is made to the runtime to raise the
+   exception, passing the exception structure as an argument.</p>
 
-<p>In C++, the allocation of the exception structure is done by the
-<tt>__cxa_allocate_exception</tt> runtime function.  The exception raising is
-handled by <tt>__cxa_throw</tt>.  The type of the exception is represented using
-a C++ RTTI type info structure.</p>
+<p>In C++, the allocation of the exception structure is done by
+   the <tt>__cxa_allocate_exception</tt> runtime function.  The exception
+   raising is handled by <tt>__cxa_throw</tt>.  The type of the exception is
+   represented using a C++ RTTI structure.</p>
 
 </div>
 
@@ -187,67 +229,84 @@ a C++ RTTI type info structure.</p>
 
 <div class="doc_text">
 
-<p>A call within the scope of a try statement can potentially raise an exception.
-In those circumstances, the LLVM C++ front-end replaces the call with an
-<tt>invoke</tt> instruction.  Unlike a call, the invoke has two potential
-continuation points; where to continue when the call succeeds as per normal, and
-where to continue if the call raises an exception, either by a throw or the
-unwinding of a throw.</p>
-
-<p>The term used to define a the place where an invoke continues after an
-exception is called a <i>landing pad</i>.  LLVM landing pads are conceptually
-alternative function entry points where a exception structure reference and a type
-info index are passed in as arguments.  The landing pad saves the exception
-structure reference and then proceeds to select the catch block that corresponds
-to the type info of the exception object.</p>
+<p>A call within the scope of a <i>try</i> statement can potentially raise an
+   exception.  In those circumstances, the LLVM C++ front-end replaces the call
+   with an <tt>invoke</tt> instruction.  Unlike a call, the <tt>invoke</tt> has
+   two potential continuation points: where to continue when the call succeeds
+   as per normal; and where to continue if the call raises an exception, either
+   by a throw or the unwinding of a throw.</p>
 
-<p>Two llvm intrinsic functions are used convey information about the landing
-pad to the back end.</p>
+<p>The term used to define a the place where an <tt>invoke</tt> continues after
+   an exception is called a <i>landing pad</i>.  LLVM landing pads are
+   conceptually alternative function entry points where an exception structure
+   reference and a type info index are passed in as arguments.  The landing pad
+   saves the exception structure reference and then proceeds to select the catch
+   block that corresponds to the type info of the exception object.</p>
 
-<p><a href="#llvm_eh_exception"><tt>llvm.eh.exception</tt></a> takes no
-arguments and returns a pointer to the exception structure.  This only returns a
-sensible value if called after an invoke has branched to a landing pad.  Due to
-codegen limitations, it must currently be called in the landing pad itself.</p>
+<p>Two LLVM intrinsic functions are used to convey information about the landing
+   pad to the back end.</p>
 
-<p><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum of
-three arguments.  The first argument is the reference to the exception
-structure. The second argument is a reference to the personality function to be
-used for this try catch sequence. Each of the remaining arguments is either a
-reference to the type info for a catch statement,
-a <a href="#throw_filters">filter</a> expression,
-or the number zero representing a <a href="#cleanups">cleanup</a>.
-The exception is tested against the arguments sequentially from first to last.
-The result of the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a
-positive number if the exception matched a type info, a negative number if it matched
-a filter, and zero if it matched a cleanup.  If nothing is matched, the behaviour of
-the program is <a href="#restrictions">undefined</a>.
-This only returns a sensible value if called after an invoke has branched to a
-landing pad.  Due to codegen limitations, it must currently be called in the
-landing pad itself.
-If a type info matched then the selector value is the index of the type info in
-the exception table, which can be obtained using the
-<a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic.</p>
+<ol>
+  <li><a href="#llvm_eh_exception"><tt>llvm.eh.exception</tt></a> takes no
+      arguments and returns a pointer to the exception structure.  This only
+      returns a sensible value if called after an <tt>invoke</tt> has branched
+      to a landing pad.  Due to code generation limitations, it must currently
+      be called in the landing pad itself.</li>
+
+  <li><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum
+      of three arguments.  The first argument is the reference to the exception
+      structure. The second argument is a reference to the personality function
+      to be used for this <tt>try</tt>/<tt>catch</tt> sequence. Each of the
+      remaining arguments is either a reference to the type info for
+      a <tt>catch</tt> statement, a <a href="#throw_filters">filter</a>
+      expression, or the number zero (<tt>0</tt>) representing
+      a <a href="#cleanups">cleanup</a>.  The exception is tested against the
+      arguments sequentially from first to last.  The result of
+      the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a
+      positive number if the exception matched a type info, a negative number if
+      it matched a filter, and zero if it matched a cleanup.  If nothing is
+      matched, the behaviour of the program
+      is <a href="#restrictions">undefined</a>.  This only returns a sensible
+      value if called after an <tt>invoke</tt> has branched to a landing pad.
+      Due to codegen limitations, it must currently be called in the landing pad
+      itself.  If a type info matched, then the selector value is the index of
+      the type info in the exception table, which can be obtained using the
+      <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a>
+      intrinsic.</li>
+</ol>
 
 <p>Once the landing pad has the type info selector, the code branches to the
-code for the first catch.  The catch then checks the value of the type info
-selector against the index of type info for that catch.  Since the type info
-index is not known until all the type info have been gathered in the backend,
-the catch code will call the <a
-href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic to
-determine the index for a given type info.  If the catch fails to match the
-selector then control is passed on to the next catch. Note: Since the landing
-pad will not be used if there is no match in the list of type info on the call
-to <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>, then neither the
-last catch nor <i>catch all</i> need to perform the the check against the
-selector.</p>
-
-<p>Finally, the entry and exit of catch code is bracketed with calls to
-<tt>__cxa_begin_catch</tt> and <tt>__cxa_end_catch</tt>.
-<tt>__cxa_begin_catch</tt> takes a exception structure reference as an argument
-and returns the value of the exception object. <tt>__cxa_end_catch</tt>
-takes a exception structure reference as an argument. This function clears the
-exception from the exception space.  Note: a rethrow from within the catch may
-replace this call with a <tt>__cxa_rethrow</tt>.</p>
+   code for the first catch.  The catch then checks the value of the type info
+   selector against the index of type info for that catch.  Since the type info
+   index is not known until all the type info have been gathered in the backend,
+   the catch code will call the
+   <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic
+   to determine the index for a given type info.  If the catch fails to match
+   the selector then control is passed on to the next catch. Note: Since the
+   landing pad will not be used if there is no match in the list of type info on
+   the call to <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>, then
+   neither the last catch nor <i>catch all</i> need to perform the check
+   against the selector.</p>
+
+<p>Finally, the entry and exit of catch code is bracketed with calls
+   to <tt>__cxa_begin_catch</tt> and <tt>__cxa_end_catch</tt>.</p>
+
+<ul>
+  <li><tt>__cxa_begin_catch</tt> takes a exception structure reference as an
+      argument and returns the value of the exception object.</li>
+
+  <li><tt>__cxa_end_catch</tt> takes no arguments. This function:<br><br>
+    <ol>
+      <li>Locates the most recently caught exception and decrements its handler
+          count,</li>
+      <li>Removes the exception from the "caught" stack if the handler count
+          goes to zero, and</li>
+      <li>Destroys the exception if the handler count goes to zero, and the
+          exception was not re-thrown by throw.</li>
+    </ol>
+    <p>Note: a rethrow from within the catch may replace this call with
+       a <tt>__cxa_rethrow</tt>.</p></li>
+</ul>
 
 </div>
 
@@ -258,16 +317,15 @@ replace this call with a <tt>__cxa_rethrow</tt>.</p>
 
 <div class="doc_text">
 
-<p>To handle destructors and cleanups in try code, control may not run directly
-from a landing pad to the first catch.  Control may actually flow from the
-landing pad to clean up code and then to the first catch.  Since the required
-clean up for each invoke in a try may be different (ex., intervening
-constructor), there may be several landing pads for a given try.  If cleanups
-need to be run, the number zero should be passed as the last
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument.
-However for C++ a <tt>null i8*</tt> <a href="#restrictions">must</a> be passed
-instead.
-</p>
+<p>To handle destructors and cleanups in <tt>try</tt> code, control may not run
+   directly from a landing pad to the first catch.  Control may actually flow
+   from the landing pad to clean up code and then to the first catch.  Since the
+   required clean up for each <tt>invoke</tt> in a <tt>try</tt> may be different
+   (e.g. intervening constructor), there may be several landing pads for a given
+   try.  If cleanups need to be run, an <tt>i32 0</tt> should be passed as the
+   last <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument.
+   However, when using DWARF exception handling with C++, a <tt>i8* null</tt>
+   <a href="#restrictions">must</a> be passed instead.</p>
 
 </div>
 
@@ -278,23 +336,23 @@ instead.
 
 <div class="doc_text">
 
-<p>C++ allows the specification of which exception types can be thrown from
-a function.  To represent this a top level landing pad may exist to filter out
-invalid types.  To express this in LLVM code the landing pad will call <a
-href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>.  The arguments are a
-reference to the exception structure, a reference to the personality function,
-the length of the filter expression (the number of type infos plus one),
-followed by the type infos themselves.
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> will return a negative
-value if the exception does not match any of the type infos.  If no match is
-found then a call to <tt>__cxa_call_unexpected</tt> should be made, otherwise
-<tt>_Unwind_Resume</tt>.  Each of these functions requires a reference to the
-exception structure.  Note that the most general form of an
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> call can contain
-any number of type infos, filter expressions and cleanups (though having more
-than one cleanup is pointless).  The LLVM C++ front-end can generate such
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> calls due to inlining
-creating nested exception handling scopes.</p>
+<p>C++ allows the specification of which exception types can be thrown from a
+   function.  To represent this a top level landing pad may exist to filter out
+   invalid types.  To express this in LLVM code the landing pad will
+   call <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>.  The
+   arguments are a reference to the exception structure, a reference to the
+   personality function, the length of the filter expression (the number of type
+   infos plus one), followed by the type infos themselves.
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> will return a
+   negative value if the exception does not match any of the type infos.  If no
+   match is found then a call to <tt>__cxa_call_unexpected</tt> should be made,
+   otherwise <tt>_Unwind_Resume</tt>.  Each of these functions requires a
+   reference to the exception structure.  Note that the most general form of an
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> call can contain
+   any number of type infos, filter expressions and cleanups (though having more
+   than one cleanup is pointless).  The LLVM C++ front-end can generate such
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> calls due to
+   inlining creating nested exception handling scopes.</p>
 
 </div>
 
@@ -306,23 +364,21 @@ creating nested exception handling scopes.</p>
 <div class="doc_text">
 
 <p>The semantics of the invoke instruction require that any exception that
-unwinds through an invoke call should result in a branch to the invoke's unwind
-label.  However such a branch will only happen if the
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> matches.
-Thus in order to ensure correct operation, the front-end must only generate
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> calls that are
-guaranteed to always match whatever exception unwinds through the invoke.
-For most languages it is enough to pass zero, indicating the presence of
-a <a href="#cleanups">cleanup</a>, as the last
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument.
-However for C++ this is not sufficient, because the C++ personality function
-will terminate the program if it detects that unwinding the exception only
-results in matches with cleanups.  For C++ a <tt>null i8*</tt> should
-be passed as the last
-<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument instead.
-This is interpreted as a catch-all by the C++ personality function, and will
-always match.
-</p>
+   unwinds through an invoke call should result in a branch to the invoke's
+   unwind label.  However such a branch will only happen if the
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> matches. Thus in
+   order to ensure correct operation, the front-end must only generate
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> calls that are
+   guaranteed to always match whatever exception unwinds through the invoke.
+   For most languages it is enough to pass zero, indicating the presence of
+   a <a href="#cleanups">cleanup</a>, as the
+   last <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument.
+   However for C++ this is not sufficient, because the C++ personality function
+   will terminate the program if it detects that unwinding the exception only
+   results in matches with cleanups.  For C++ a <tt>null i8*</tt> should be
+   passed as the last <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>
+   argument instead.  This is interpreted as a catch-all by the C++ personality
+   function, and will always match.</p>
 
 </div>
 
@@ -334,7 +390,8 @@ always match.
 <div class="doc_text">
 
 <p>LLVM uses several intrinsic functions (name prefixed with "llvm.eh") to
-provide exception handling information at various points in generated code.</p>
+   provide exception handling information at various points in generated
+   code.</p>
 
 </div>
 
@@ -344,6 +401,7 @@ provide exception handling information at various points in generated code.</p>
 </div>
 
 <div class="doc_text">
+
 <pre>
   i8* %<a href="#llvm_eh_exception">llvm.eh.exception</a>( )
 </pre>
@@ -358,29 +416,29 @@ provide exception handling information at various points in generated code.</p>
 </div>
 
 <div class="doc_text">
+
 <pre>
-  i32 %<a href="#llvm_eh_selector">llvm.eh.selector.i32</a>(i8*, i8*, i8*, ...)
-  i64 %<a href="#llvm_eh_selector">llvm.eh.selector.i64</a>(i8*, i8*, i8*, ...)
+  i32 %<a href="#llvm_eh_selector">llvm.eh.selector</a>(i8*, i8*, i8*, ...)
 </pre>
 
 <p>This intrinsic is used to compare the exception with the given type infos,
-filters and cleanups.</p>
+   filters and cleanups.</p>
 
 <p><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum of
-three arguments.  The first argument is the reference to the exception
-structure. The second argument is a reference to the personality function to be
-used for this try catch sequence. Each of the remaining arguments is either a
-reference to the type info for a catch statement,
-a <a href="#throw_filters">filter</a> expression,
-or the number zero representing a <a href="#cleanups">cleanup</a>.
-The exception is tested against the arguments sequentially from first to last.
-The result of the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a
-positive number if the exception matched a type info, a negative number if it matched
-a filter, and zero if it matched a cleanup.  If nothing is matched, the behaviour of
-the program is <a href="#restrictions">undefined</a>.
-If a type info matched then the selector value is the index of the type info in
-the exception table, which can be obtained using the
-<a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic.</p>
+   three arguments.  The first argument is the reference to the exception
+   structure. The second argument is a reference to the personality function to
+   be used for this try catch sequence. Each of the remaining arguments is
+   either a reference to the type info for a catch statement,
+   a <a href="#throw_filters">filter</a> expression, or the number zero
+   representing a <a href="#cleanups">cleanup</a>.  The exception is tested
+   against the arguments sequentially from first to last.  The result of
+   the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a positive
+   number if the exception matched a type info, a negative number if it matched
+   a filter, and zero if it matched a cleanup.  If nothing is matched, the
+   behaviour of the program is <a href="#restrictions">undefined</a>.  If a type
+   info matched then the selector value is the index of the type info in the
+   exception table, which can be obtained using the
+   <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic.</p>
 
 </div>
 
@@ -390,15 +448,15 @@ the exception table, which can be obtained using the
 </div>
 
 <div class="doc_text">
+
 <pre>
-  i32 %<a href="#llvm_eh_typeid_for">llvm.eh.typeid.for.i32</a>(i8*)
-  i64 %<a href="#llvm_eh_typeid_for">llvm.eh.typeid.for.i64</a>(i8*)
+  i32 %<a href="#llvm_eh_typeid_for">llvm.eh.typeid.for</a>(i8*)
 </pre>
 
 <p>This intrinsic returns the type info index in the exception table of the
-current function.  This value can be used to compare against the result of <a
-href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>.  The single argument is
-a reference to a type info.</p>
+   current function.  This value can be used to compare against the result
+   of <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>.  The single
+   argument is a reference to a type info.</p>
 
 </div>
 
@@ -408,23 +466,47 @@ a reference to a type info.</p>
 </div>
 
 <div class="doc_text">
+
 <pre>
   i32 %<a href="#llvm_eh_sjlj_setjmp">llvm.eh.sjlj.setjmp</a>(i8*)
 </pre>
 
-<p>The SJLJ exception handling uses this intrinsic to force register saving
-for the current function and to store the address of the following instruction
-for use as a destination address by <a href="#llvm_eh_sjlj_setjmp">
-<tt>llvm.eh.sjlj.longjmp</tt></a>. The buffer format and the overall functioning
-of this intrinsic is compatible with the GCC <tt>__builtin_setjmp</tt> 
-implementation, allowing code built with the two compilers to interoperate.</p>
+<p>The SJLJ exception handling uses this intrinsic to force register saving for
+   the current function and to store the address of the following instruction
+   for use as a destination address by <a href="#llvm_eh_sjlj_longjmp">
+   <tt>llvm.eh.sjlj.longjmp</tt></a>. The buffer format and the overall
+   functioning of this intrinsic is compatible with the GCC
+   <tt>__builtin_setjmp</tt> implementation, allowing code built with the
+   two compilers to interoperate.</p>
+
+<p>The single parameter is a pointer to a five word buffer in which the calling
+   context is saved. The front end places the frame pointer in the first word,
+   and the target implementation of this intrinsic should place the destination
+   address for a
+   <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a> in the
+   second word. The following three words are available for use in a
+   target-specific manner.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  i8* %<a href="#llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>( )
+</pre>
+
+<p>Used for SJLJ based exception handling, the <a href="#llvm_eh_sjlj_lsda">
+   <tt>llvm.eh.sjlj.lsda</tt></a> intrinsic returns the address of the Language
+   Specific Data Area (LSDA) for the current function. The SJLJ front-end code
+   stores this address in the exception handling function context for use by the
+   runtime.</p>
 
-<p>The single parameter is a pointer to a five word buffer in which the
-calling context is saved. The front end places the frame pointer in the
-first word, and the target implementation of this intrinsic should place the
-destination address for a <a href="#llvm_eh_sjlj_longjmp"><tt>
-llvm.eh.sjlj.longjmp</tt></a> in the second word. The following three words
-are available for use in a target-specific manner.</p>
+</div>
 
 <!-- ======================================================================= -->
 <div class="doc_section">
@@ -434,7 +516,7 @@ are available for use in a target-specific manner.</p>
 <div class="doc_text">
 
 <p>There are two tables that are used by the exception handling runtime to
-determine which actions should take place when an exception is thrown.</p>
+   determine which actions should take place when an exception is thrown.</p>
 
 </div>
 
@@ -446,11 +528,11 @@ determine which actions should take place when an exception is thrown.</p>
 <div class="doc_text">
 
 <p>An exception handling frame <tt>eh_frame</tt> is very similar to the unwind
-frame used by dwarf debug info.  The frame contains all the information
-necessary to tear down the current frame and restore the state of the prior
-frame.  There is an exception handling frame for each function in a compile
-unit, plus a common exception handling frame that defines information common to
-all functions in the unit.</p>
+   frame used by dwarf debug info.  The frame contains all the information
+   necessary to tear down the current frame and restore the state of the prior
+   frame.  There is an exception handling frame for each function in a compile
+   unit, plus a common exception handling frame that defines information common
+   to all functions in the unit.</p>
 
 <p>Todo - Table details here.</p>
 
@@ -464,9 +546,9 @@ all functions in the unit.</p>
 <div class="doc_text">
 
 <p>An exception table contains information about what actions to take when an
-exception is thrown in a particular part of a function's code.  There is
-one exception table per function except leaf routines and functions that have
-only calls to non-throwing functions will not need an exception table.</p>
+   exception is thrown in a particular part of a function's code.  There is one
+   exception table per function except leaf routines and functions that have
+   only calls to non-throwing functions will not need an exception table.</p>
 
 <p>Todo - Table details here.</p>
 
@@ -481,7 +563,7 @@ only calls to non-throwing functions will not need an exception table.</p>
 
 <ol>
 
-<li><p>Testing/Testing/Testing.</p></li>
+  <li>Testing/Testing/Testing.</li>
 
 </ol>
 
@@ -498,7 +580,7 @@ only calls to non-throwing functions will not need an exception table.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-14 17:44:15 +0200 (Thu, 14 May 2009) $
+  Last modified: $Date: 2009-10-14 18:11:37 +0200 (Wed, 14 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/FAQ.html b/docs/FAQ.html
index 9fd89288f54c..00746381f7b9 100644
--- a/docs/FAQ.html
+++ b/docs/FAQ.html
@@ -685,7 +685,7 @@ Stop.
 <p>Also, there are a number of other limitations of the C backend that cause it
    to produce code that does not fully conform to the C++ ABI on most
    platforms. Some of the C++ programs in LLVM's test suite are known to fail
-   when compiled with the C back end because of ABI incompatiblities with
+   when compiled with the C back end because of ABI incompatibilities with
    standard C++ libraries.</p>
 </div>
 
@@ -700,7 +700,7 @@ Stop.
    portable is by using the preprocessor to include platform-specific code. In
    practice, information about other platforms is lost after preprocessing, so
    the result is inherently dependent on the platform that the preprocessing was
-   targetting.</p>
+   targeting.</p>
 
 <p>Another example is <tt>sizeof</tt>. It's common for <tt>sizeof(long)</tt> to
    vary between platforms. In most C front-ends, <tt>sizeof</tt> is expanded to
@@ -931,7 +931,7 @@ F.i:
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-30 19:10:19 +0200 (Tue, 30 Jun 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html
index 9d2243f58986..873faa6c5373 100644
--- a/docs/GCCFEBuildInstrs.html
+++ b/docs/GCCFEBuildInstrs.html
@@ -88,6 +88,7 @@ top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
       are <a href="http://gcc.gnu.org/releases.html">gcc-4.2</a> and the
       2005, 2006 and 2007 versions of the
       <a href="http://libre.adacore.com/">GNAT GPL Edition</a>.
+      <b>GNAT GPL 2008, gcc-4.3 and later will not work</b>.
       The LLVM parts of llvm-gcc are written in C++ so a C++ compiler is
       needed to build them.  The rest of gcc is written in C.
       Some linux distributions provide a version of gcc that supports all
@@ -271,7 +272,7 @@ More information is <a href="FAQ.html#license">available in the FAQ</a>.
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-04-27 13:21:35 +0200 (Mon, 27 Apr 2009) $
+  Last modified: $Date: 2009-07-05 14:01:44 +0200 (Sun, 05 Jul 2009) $
 </address>
 
 </body>
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
index 562025f13eae..a372f697f9b2 100644
--- a/docs/GarbageCollection.html
+++ b/docs/GarbageCollection.html
@@ -334,11 +334,11 @@ void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
     
     // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
     for (unsigned e = R->Map->NumMeta; i != e; ++i)
-      Visitor(&R->Roots[i], R->Map->Meta[i]);
+      Visitor(&amp;R->Roots[i], R->Map->Meta[i]);
     
     // For roots [NumMeta, NumRoots), the metadata pointer is null.
     for (unsigned e = R->Map->NumRoots; i != e; ++i)
-      Visitor(&R->Roots[i], NULL);
+      Visitor(&amp;R->Roots[i], NULL);
   }
 }</pre></div>
 
@@ -398,7 +398,7 @@ program.</p>
 </div>
 
 <div class="doc_code"><tt>
-  define <i>ty</i> @<i>name</i>(...) <u>gc "<i>name</i>"</u> { ...
+  define <i>ty</i> @<i>name</i>(...) <span style="text-decoration: underline">gc "<i>name</i>"</span> { ...
 </tt></div>
 
 <div class="doc_text">
@@ -1380,7 +1380,7 @@ Fergus Henderson. International Symposium on Memory Management 2002.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-13 20:02:09 +0200 (Wed, 13 May 2009) $
+  Last modified: $Date: 2009-08-05 17:42:44 +0200 (Wed, 05 Aug 2009) $
 </address>
 
 </body>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
index f4b096a10de5..d5863e8a52b7 100644
--- a/docs/GetElementPtr.html
+++ b/docs/GetElementPtr.html
@@ -40,7 +40,7 @@
 <div class="doc_text"> 
   <p>This document seeks to dispel the mystery and confusion surrounding LLVM's
   GetElementPtr (GEP) instruction. Questions about the wiley GEP instruction are
-  probably the most frequently occuring questions once a developer gets down to
+  probably the most frequently occurring questions once a developer gets down to
   coding with LLVM. Here we lay out the sources of confusion and show that the
   GEP instruction is really quite simple.
   </p>
@@ -303,13 +303,14 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
   <p>In this example, <tt>idx1</tt> computes the address of the second integer
-  in the array that is in the structure in %MyVar, that is <tt>MyVar+4</tt>. The 
-  type of <tt>idx1</tt> is <tt>i32*</tt>. However, <tt>idx2</tt> computes the 
-  address of <i>the next</i> structure after <tt>%MyVar</tt>. The type of 
-  <tt>idx2</tt> is <tt>{ [10 x i32] }*</tt> and its value is equivalent 
-  to <tt>MyVar + 40</tt> because it indexes past the ten 4-byte integers 
-  in <tt>MyVar</tt>. Obviously, in such a situation, the pointers don't 
-  alias.</p>
+  in the array that is in the structure in <tt>%MyVar</tt>, that is
+  <tt>MyVar+4</tt>. The type of <tt>idx1</tt> is <tt>i32*</tt>. However,
+  <tt>idx2</tt> computes the address of <i>the next</i> structure after
+  <tt>%MyVar</tt>. The type of <tt>idx2</tt> is <tt>{ [10 x i32] }*</tt> and its
+  value is equivalent to <tt>MyVar + 40</tt> because it indexes past the ten
+  4-byte integers in <tt>MyVar</tt>. Obviously, in such a situation, the
+  pointers don't alias.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -364,7 +365,7 @@ idx3 = (char*) &amp;MyVar + 8
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
-  Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index 26a46885082d..8a8bce377d13 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -216,11 +216,21 @@ software you will need.</p>
   <th>Compilers</th>
 </tr>
 <tr>
+  <td>AuroraUX</td>
+  <td>x86<sup><a href="#pf_1">1</a></sup></td>
+  <td>GCC</td>
+</tr>
+<tr>
   <td>Linux</td>
   <td>x86<sup><a href="#pf_1">1</a></sup></td>
   <td>GCC</td>
 </tr>
 <tr>
+  <td>Linux</td>
+  <td>amd64</td>
+  <td>GCC</td>
+</tr>
+<tr>
   <td>Solaris</td>
   <td>V9 (Ultrasparc)</td>
   <td>GCC</td>
@@ -239,7 +249,6 @@ software you will need.</p>
   <td>MacOS X<sup><a href="#pf_2">2</a>,<a href="#pf_9">9</a></sup></td>
   <td>x86</td>
   <td>GCC</td>
-
 </tr>
 <tr>
   <td>Cygwin/Win32</td>
@@ -248,14 +257,10 @@ software you will need.</p>
 </tr>
 <tr>
   <td>MinGW/Win32</td>
-  <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_6">6</a>,<a href="#pf_8">8</a></sup></td>
+  <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_6">6</a>,
+     <a href="#pf_8">8</a>, <a href="#pf_10">10</a></sup></td>
   <td>GCC 3.4.X, binutils 2.15</td>
 </tr>
-<tr>
-  <td>Linux</td>
-  <td>amd64</td>
-  <td>GCC</td>
-</tr>
 </table>
 
 <p>LLVM has partial support for the following platforms:</p>
@@ -321,6 +326,11 @@ up</a></li>
     levels greater than 0 (i.e., <i>"-O1"</i> and higher).
     Add <i>OPTIMIZE_OPTION="-O0"</i> to the build command line
     if compiling for LLVM Release or bootstrapping the LLVM toolchain.</li>
+<li><a name="pf_10">For MSYS/MinGW on Windows, be sure to install the MSYS
+    version of the perl package, and be sure it appears in your path
+    before any Windows-based versions such as Strawberry Perl and
+    ActivePerl, as these have Windows-specifics that will cause the
+    build to fail.</a></li>
 </ol>
 </div>
 
@@ -410,19 +420,19 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
 
     <tr>
       <td><a href="http://www.gnu.org/software/autoconf">GNU Autoconf</a></td>
-      <td>2.59</td>
+      <td>2.60</td>
       <td>Configuration script builder<sup><a href="#sf4">4</a></sup></td>
     </tr>
 
     <tr>
       <td><a href="http://www.gnu.org/software/automake">GNU Automake</a></td>
-      <td>1.9.2</td>
+      <td>1.9.6</td>
       <td>aclocal macro generator<sup><a href="#sf4">4</a></sup></td>
     </tr>
 
     <tr>
       <td><a href="http://savannah.gnu.org/projects/libtool">libtool</a></td>
-      <td>1.5.10</td>
+      <td>1.5.22</td>
       <td>Shared library manager<sup><a href="#sf4">4</a></sup></td>
     </tr>
 
@@ -548,7 +558,10 @@ as the previous one. It appears to work with ENABLE_OPTIMIZED=0 (the default).</
 <p><b>Cygwin GCC 4.3.2 20080827 (beta) 2</b>:
   Users <a href="http://llvm.org/PR4145">reported</a> various problems related
   with link errors when using this GCC version.</p>
-
+<p><b>Debian GCC 4.3.2 on X86</b>: Crashes building some files in LLVM 2.6.</p>
+<p><b>GCC 4.3.3 (Debian 4.3.3-10) on ARM</b>: Miscompiles parts of LLVM 2.6
+when optimizations are turned on. The symptom is an infinite loop in
+FoldingSetImpl::RemoveNode while running the code generator.</p>
 <p><b>GNU ld 2.16.X</b>. Some 2.16.X versions of the ld linker will produce very
 long warning messages complaining that some ".gnu.linkonce.t.*" symbol was
 defined in a discarded section. You can safely ignore these messages as they are
@@ -1622,7 +1635,7 @@ out:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-04 12:24:46 +0200 (Mon, 04 May 2009) $
+  Last modified: $Date: 2009-09-27 06:56:27 +0200 (Sun, 27 Sep 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html
index 809fda0a6f35..c0024506e360 100644
--- a/docs/GettingStartedVS.html
+++ b/docs/GettingStartedVS.html
@@ -140,15 +140,15 @@
     <li>If you used CMake, then the directory you created the project files,
     the root directory will have an <tt>llvm.sln</tt> file, just
     double-click on that to open Visual Studio.</li>
-  </ol></li>
+  </ul></li>
 
   <li>Build the LLVM Suite:
-  <ol>
+  <ul>
     <li>Simply build the solution.</li>
     <li>The Fibonacci project is a sample program that uses the JIT.  Modify
     the project's debugging properties to provide a numeric command line
     argument.  The program will print the corresponding fibonacci value.</li>
-  </ol></li>
+  </ul></li>
 
 </ol>
 
@@ -411,7 +411,7 @@ out:</p>
 
   <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-04-03 22:32:13 +0200 (Fri, 03 Apr 2009) $
+  Last modified: $Date: 2009-08-05 17:42:44 +0200 (Wed, 05 Aug 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index b5148ab3312e..77a417f5710d 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -127,8 +127,9 @@ void foo4(void) {
 
 --- command lines ---
 $ llvm-gcc -flto a.c -c -o a.o              # &lt;-- a.o is LLVM bitcode file
+$ ar q a.a a.o                              # &lt;-- a.a is an archive with LLVM bitcode
 $ llvm-gcc b.c -c -o b.o                    # &lt;-- b.o is native object file
-$ llvm-gcc -use-gold-plugin a.o b.o -o main # &lt;-- link with LLVMgold plugin
+$ llvm-gcc -use-gold-plugin a.a b.o -o main # &lt;-- link with LLVMgold plugin
 </pre>
   <p>Gold informs the plugin that foo3 is never referenced outside the IR,
   leading LLVM to delete that function. However, unlike in the
diff --git a/docs/HistoricalNotes/2007-OriginalClangReadme.txt b/docs/HistoricalNotes/2007-OriginalClangReadme.txt
new file mode 100644
index 000000000000..611dc9d2c01c
--- /dev/null
+++ b/docs/HistoricalNotes/2007-OriginalClangReadme.txt
@@ -0,0 +1,178 @@
+//===----------------------------------------------------------------------===//
+// C Language Family Front-end
+//===----------------------------------------------------------------------===//
+                                                             Chris Lattner
+
+I. Introduction:
+ 
+ clang: noun
+    1. A loud, resonant, metallic sound.
+    2. The strident call of a crane or goose.
+    3. C-language family front-end toolkit.
+
+ The world needs better compiler tools, tools which are built as libraries. This
+ design point allows reuse of the tools in new and novel ways. However, building
+ the tools as libraries isn't enough: they must have clean APIs, be as
+ decoupled from each other as possible, and be easy to modify/extend.  This
+ requires clean layering, decent design, and avoiding tying the libraries to a
+ specific use.  Oh yeah, did I mention that we want the resultant libraries to
+ be as fast as possible? :)
+
+ This front-end is built as a component of the LLVM toolkit that can be used
+ with the LLVM backend or independently of it.  In this spirit, the API has been
+ carefully designed as the following components:
+ 
+   libsupport  - Basic support library, reused from LLVM.
+
+   libsystem   - System abstraction library, reused from LLVM.
+   
+   libbasic    - Diagnostics, SourceLocations, SourceBuffer abstraction,
+                 file system caching for input source files.  This depends on
+                 libsupport and libsystem.
+
+   libast      - Provides classes to represent the C AST, the C type system,
+                 builtin functions, and various helpers for analyzing and
+                 manipulating the AST (visitors, pretty printers, etc).  This
+                 library depends on libbasic.
+
+
+   liblex      - C/C++/ObjC lexing and preprocessing, identifier hash table,
+                 pragma handling, tokens, and macros.  This depends on libbasic.
+
+   libparse    - C (for now) parsing and local semantic analysis. This library
+                 invokes coarse-grained 'Actions' provided by the client to do
+                 stuff (e.g. libsema builds ASTs).  This depends on liblex.
+
+   libsema     - Provides a set of parser actions to build a standardized AST
+                 for programs.  AST's are 'streamed' out a top-level declaration
+                 at a time, allowing clients to use decl-at-a-time processing,
+                 build up entire translation units, or even build 'whole
+                 program' ASTs depending on how they use the APIs.  This depends
+                 on libast and libparse.
+
+   librewrite  - Fast, scalable rewriting of source code.  This operates on
+                 the raw syntactic text of source code, allowing a client
+                 to insert and delete text in very large source files using
+                 the same source location information embedded in ASTs.  This
+                 is intended to be a low-level API that is useful for
+                 higher-level clients and libraries such as code refactoring.
+
+   libanalysis - Source-level dataflow analysis useful for performing analyses
+                 such as computing live variables.  It also includes a
+                 path-sensitive "graph-reachability" engine for writing
+                 analyses that reason about different possible paths of
+                 execution through source code.  This is currently being
+                 employed to write a set of checks for finding bugs in software.
+
+   libcodegen  - Lower the AST to LLVM IR for optimization & codegen.  Depends
+                 on libast.
+                 
+   clang       - An example driver, client of the libraries at various levels.
+                 This depends on all these libraries, and on LLVM VMCore.
+
+ This front-end has been intentionally built as a DAG of libraries, making it
+ easy to  reuse individual parts or replace pieces if desired. For example, to
+ build a preprocessor, you take the Basic and Lexer libraries. If you want an
+ indexer, you take those plus the Parser library and provide some actions for
+ indexing.  If you want a refactoring, static analysis, or source-to-source
+ compiler tool, it makes sense to take those plus the AST building and semantic
+ analyzer library.  Finally, if you want to use this with the LLVM backend,
+ you'd take these components plus the AST to LLVM lowering code.
+ 
+ In the future I hope this toolkit will grow to include new and interesting
+ components, including a C++ front-end, ObjC support, and a whole lot of other
+ things.
+
+ Finally, it should be pointed out that the goal here is to build something that
+ is high-quality and industrial-strength: all the obnoxious features of the C
+ family must be correctly supported (trigraphs, preprocessor arcana, K&R-style
+ prototypes, GCC/MS extensions, etc).  It cannot be used if it is not 'real'.
+
+
+II. Usage of clang driver:
+
+ * Basic Command-Line Options:
+   - Help: clang --help
+   - Standard GCC options accepted: -E, -I*, -i*, -pedantic, -std=c90, etc.
+   - To make diagnostics more gcc-like: -fno-caret-diagnostics -fno-show-column
+   - Enable metric printing: -stats
+
+ * -fsyntax-only is currently the default mode.
+
+ * -E mode works the same way as GCC.
+
+ * -Eonly mode does all preprocessing, but does not print the output,
+     useful for timing the preprocessor.
+ 
+ * -fsyntax-only is currently partially implemented, lacking some
+     semantic analysis (some errors and warnings are not produced).
+
+ * -parse-noop parses code without building an AST.  This is useful
+     for timing the cost of the parser without including AST building
+     time.
+ 
+ * -parse-ast builds ASTs, but doesn't print them.  This is most
+     useful for timing AST building vs -parse-noop.
+ 
+ * -parse-ast-print pretty prints most expression and statements nodes.
+
+ * -parse-ast-check checks that diagnostic messages that are expected
+     are reported and that those which are reported are expected.
+
+ * -dump-cfg builds ASTs and then CFGs.  CFGs are then pretty-printed.
+
+ * -view-cfg builds ASTs and then CFGs.  CFGs are then visualized by
+     invoking Graphviz.
+
+     For more information on getting Graphviz to work with clang/LLVM,
+     see: http://llvm.org/docs/ProgrammersManual.html#ViewGraph
+
+
+III. Current advantages over GCC:
+
+ * Column numbers are fully tracked (no 256 col limit, no GCC-style pruning).
+ * All diagnostics have column numbers, includes 'caret diagnostics', and they
+   highlight regions of interesting code (e.g. the LHS and RHS of a binop).
+ * Full diagnostic customization by client (can format diagnostics however they
+   like, e.g. in an IDE or refactoring tool) through DiagnosticClient interface.
+ * Built as a framework, can be reused by multiple tools.
+ * All languages supported linked into same library (no cc1,cc1obj, ...).
+ * mmap's code in read-only, does not dirty the pages like GCC (mem footprint).
+ * LLVM License, can be linked into non-GPL projects.
+ * Full diagnostic control, per diagnostic.  Diagnostics are identified by ID.
+ * Significantly faster than GCC at semantic analysis, parsing, preprocessing
+   and lexing.
+ * Defers exposing platform-specific stuff to as late as possible, tracks use of
+   platform-specific features (e.g. #ifdef PPC) to allow 'portable bytecodes'.
+ * The lexer doesn't rely on the "lexer hack": it has no notion of scope and
+   does not categorize identifiers as types or variables -- this is up to the
+   parser to decide.
+
+Potential Future Features:
+
+ * Fine grained diag control within the source (#pragma enable/disable warning).
+ * Better token tracking within macros?  (Token came from this line, which is
+   a macro argument instantiated here, recursively instantiated here).
+ * Fast #import with a module system.
+ * Dependency tracking: change to header file doesn't recompile every function
+   that texually depends on it: recompile only those functions that need it.
+   This is aka 'incremental parsing'.
+
+
+IV. Missing Functionality / Improvements
+
+Lexer:
+ * Source character mapping.  GCC supports ASCII and UTF-8.
+   See GCC options: -ftarget-charset and -ftarget-wide-charset.
+ * Universal character support.  Experimental in GCC, enabled with
+   -fextended-identifiers.
+ * -fpreprocessed mode.
+
+Preprocessor:
+ * #assert/#unassert
+ * MSExtension: "L#param" stringizes to a wide string literal.
+ * Add support for -M*
+
+Traditional Preprocessor:
+ * Currently, we have none. :)
+
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
index 53945a533ef5..d6ef416e166c 100644
--- a/docs/HowToReleaseLLVM.html
+++ b/docs/HowToReleaseLLVM.html
@@ -10,14 +10,15 @@
 <div class="doc_title">How To Release LLVM To The Public</div>
 <ol>
   <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#criteria">Qualification Criteria</a></li>
   <li><a href="#introduction">Release Timeline</a></li>
   <li><a href="#process">Release Process</a></li>
-  <li><a href="#dist_targets">Distribution Targets</a></li>
 </ol>
 <div class="doc_author">
-  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
-  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>,
-  <a href="mailto:tonic@nondot.org">Tanya Lattner</a></p>
+  <p>Written by <a href="mailto:tonic@nondot.org">Tanya Lattner</a>,
+  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
+  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>
+  </p>
 </div>
 
 <!-- *********************************************************************** -->
@@ -26,32 +27,23 @@
 
 <div class="doc_text">
   <p>
-  This document collects information about successfully releasing LLVM to the
-  public. It is the release manager's guide to ensuring that a high quality
+  This document collects information about successfully releasing LLVM 
+  (including subprojects llvm-gcc and Clang) to the public. 
+  It is the release manager's responsibility to ensure that a high quality 
   build of LLVM is released. 
   </p>
-
-  <p>
-  The following is the basic criteria for releasing LLVM:
-  </p>
-
-  <ol>
-    <li>Successful configure and build.</li>
-    <li>Clean 'make check'.</li>
-    <li>No regressions in the testsuite from the previous release. This may 
-    include performance regressions for major benchmarks.</li>
-  </ol>
 </div>
 
 <!-- *********************************************************************** -->
 <div class="doc_section"><a name="process">Release Timeline</a></div>
 <!-- *********************************************************************** -->
 <div class="doc_text">
-The release manager should attempt to have a release every 3-4 months because LLVM 
-does time based releases (instead of feature based). The release schedule should
-be roughly as follows:
+  <p>LLVM is released on a time based schedule (currently every 6 months). We
+  do not have dot releases because of the nature of LLVM incremental 
+  development philosophy. The release schedule is roughly as follows:
+  </p>
 <ol>
-<li>Set code freeze and branch creation date for 3 months after last release 
+<li>Set code freeze and branch creation date for 6 months after last code freeze 
 date. Announce release schedule to the LLVM community and update the website.</li>
 <li>Create release branch and begin release process. </li>
 <li>Send out pre-release for first round of testing. Testing will last 7-10 days.
@@ -71,44 +63,76 @@ pre-release testing.</li>
 <div class="doc_section"><a name="process">Release Process</a></div>
 <!-- *********************************************************************** -->
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="overview">Process Overview</a></div>
 <div class="doc_text">
   <ol>
+    <li><a href="#release-admin">Release Administrative Tasks</a></li>
+    <ol>
     <li><a href="#branch">Create Release Branch</a></li>
-    <li><a href="#verchanges">Update LLVM Version </a></li>
+    <li><a href="#verchanges">Update Version Numbers</a></li>
+    </ol>
+    <li><a href="#release-build">Building the Release</a></li>
+    <ol>
     <li><a href="#dist">Build the LLVM Source Distributions</a></li>
     <li><a href="#build">Build LLVM</a></li>
-    <li><a href="#llvmgccbin">Build the LLVM GCC Binary Distribution</a></li>
-     <li><a href="#rpm">Build RPM Packages (optional)</a></li>
-    <li><a href="#check">Run 'make check'</a></li>
-    <li><a href="#test">Run LLVM Test Suite</a></li>
-    <li><a href="#prerelease">Pre-Release Testing</a></li>
-    <li><a href="#tag">Tag the LLVM Release Branch</a></li>
+    <li><a href="#llvmgccbin">Build the LLVM-GCC Binary Distribution</a></li>
+    <li><a href="#clangbin">Build the Clang Binary Distribution</a></li>
+    <li><a href="#target-build">Target Specific Build Details</a></li>
+    </ol>
+    
+    <li><a href="#release-qualify">Release Qualification Criteria</a></li>
+    <ol>
+    <li><a href="#llvm-qualify">Qualify LLVM</a></li>
+    <li><a href="#llvmgcc-qualify">Qualify LLVM-GCC</a></li>
+    <li><a href="#clang-qualify">Qualify Clang</a></li>
+    <li><a href="#targets">Specific Target Qualification Details</a></li>
+    </ol>
+    
+    <li><a href="#commTest">Community Testing</a></li>    
+    <li><a href="#release-patch">Release Patch Rules</a></li>
+
+    
+    <li><a href="#release-final">Release final tasks</a></li>
+    <ol>
     <li><a href="#updocs">Update Documentation</a></li>
+    <li><a href="#tag">Tag the LLVM Release Branch</a></li>
     <li><a href="#updemo">Update the LLVM Demo Page</a></li>
     <li><a href="#webupdates">Update the LLVM Website</a></li>
     <li><a href="#announce">Announce the Release</a></li>
+    </ol>
     
   </ol>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="branch">Create Release Branch</a></div>
+<div class="doc_subsection"><a name="release-admin">
+Release Administrative Tasks</a></div>
+
+<div class="doc_text">
+This section describes a few administrative tasks that need to be done for the
+release process to begin. Specifically, it involves creating the release branch,
+ resetting version numbers, and creating the release tarballs for the release 
+ team to begin testing.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="branch">Create Release Branch</a></div>
 <div class="doc_text">
 <p>Branch the Subversion HEAD using the following procedure:</p>
   <ol>
     <li>
-    <p>Verify that the current Subversion HEAD is in decent shape by examining nightly
-  tester results.</p></li>
+    <p>Verify that the current Subversion HEAD is in decent shape by examining 
+    nightly tester or buildbot results.</p></li>
     <li>
     <p>Request all developers to refrain from committing. Offenders get commit
     rights taken away (temporarily).</p></li>
   <li>
-  <p> Create the release branch for <tt>llvm</tt>, <tt>llvm-gcc4.2</tt>, and
-  the <tt>test-suite</tt>. The branch name will be <tt>release_XX</tt>,
-  where <tt>XX</tt> is the major and minor release numbers. These branches can
-  be created without checking out anything from subversion.
+  <p> Create the release branch for <tt>llvm</tt>, <tt>llvm-gcc4.2</tt>, 
+  <tt>clang</tt>, and the <tt>test-suite</tt>. The branch name will be 
+  <tt>release_XX</tt>,where <tt>XX</tt> is the major and minor release numbers.
+  <tt>Clang</tt> will have a different release number than <tt>llvm</tt>/
+  <tt>llvm-gcc4</tt> since its first release was years later 
+  (still deciding if this will be true or not). These branches 
+  can be created without checking out anything from subversion.
   </p>
   
   <div class="doc_code">
@@ -119,6 +143,8 @@ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \
          https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XX</i>
 svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
          https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i>
+svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
+         https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XX</i>
 </pre>
   </div>
 
@@ -135,34 +161,36 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
 svn co https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XX</i>
 svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XX</i>
 svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i>
+svn co https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XX</i>
 </pre>
 </div></li>
 
-</div>
   </ol>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="verchanges">Update LLVM Version</a></div>
+<div class="doc_subsubsection"><a name="verchanges">Update LLVM Version</a></div>
 <div class="doc_text">
   <p>
   After creating the LLVM release branch, update the release branches'
   autoconf/configure.ac version from X.Xsvn to just X.X. Update it on mainline
   as well to be the next version (X.X+1svn). Regenerated the configure script
-  for both. This must be done for both llvm and the test-suite.
+  for both. This must be done for both <tt>llvm</tt> and the 
+  <tt>test-suite</tt>.
   </p>
+  <p>FIXME: Add a note about <tt>clang</tt>.</p>
   <p>In addition, the version number of all the Bugzilla components must be
   updated for the next release.
   </p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="dist">Build the LLVM Source Distributions</a></div>
+<div class="doc_subsubsection"><a name="dist">Build the LLVM Source Distributions</a></div>
 <div class="doc_text">
   <p>
-  Create source distributions for LLVM, LLVM GCC, and the LLVM Test Suite by
-  exporting the source from Subversion and archiving it.  This can be done with
-  the following commands:
+  Create source distributions for <tt>LLVM</tt>, <tt>LLVM-GCC</tt>,  
+  <tt>clang</tt>, and the llvm <tt>test-suite</tt> by exporting the source from 
+  Subversion and archiving it.  This can be done with the following commands:
   </p>
 
 <div class="doc_code">
@@ -170,25 +198,43 @@ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i>
 svn export https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XX</i> llvm-X.X
 svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XX</i> llvm-gcc4.2-X.X.source
 svn export https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i> llvm-test-X.X
-tar -cvf - llvm-X.X          | gzip &gt; llvm-X.X.tar.gz
-tar -cvf - llvm-test-X.X     | gzip &gt; llvm-test-X.X.tar.gz
-tar -cvf - llvm-gcc4.2-X.X.source | gzip &gt; llvm-gcc-4.2-X.X.source.tar.gz
+svn export https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XX</i> clang-X.X
+tar -czvf - llvm-X.X          | gzip &gt; llvm-X.X.tar.gz
+tar -czvf - llvm-test-X.X     | gzip &gt; llvm-test-X.X.tar.gz
+tar -czvf - llvm-gcc4.2-X.X.source | gzip &gt; llvm-gcc-4.2-X.X.source.tar.gz
+tar -czvf - clang-X.X | gzip &gt; clang-X.X.tar.gz
 </pre>
 </div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="build">Build LLVM</a></div>
+<div class="doc_subsection"><a name="release-build">
+Building the Release</a></div>
+
+<div class="doc_text">
+The build of <tt>llvm</tt>, <tt>llvm-gcc</tt>, and <tt>clang</tt> must be free
+of errors and warnings in both debug, release, and release-asserts builds. 
+If all builds are clean, then the release passes build qualification.
+
+<ol>
+<li>debug: ENABLE_OPTIMIZED=0</li>
+<li>release: ENABLE_OPTIMIZED=1</li>
+<li>release-asserts: ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</li>
+</ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="build">Build LLVM</a></div>
 <div class="doc_text">
   <p>
-  Build both debug and release (optimized) versions of LLVM on all
-  platforms. Ensure the build is warning and error free on each platform.
-  Note that when building the LLVM GCC Binary, use a release build of LLVM.
+  Build both debug, release (optimized), and release-asserts versions of 
+  LLVM on all supported platforms. Direction to build llvm are 
+  <a href="http://llvm.org/docs/GettingStarted.html#quickstart">here</a>.
   </p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="llvmgccbin">Build the LLVM GCC Binary Distribution</a></div>
+<div class="doc_subsubsection"><a name="llvmgccbin">Build the LLVM GCC Binary Distribution</a></div>
 <div class="doc_text">
   <p>
   Creating the LLVM GCC binary distribution (release/optimized) requires 
@@ -198,128 +244,202 @@ tar -cvf - llvm-gcc4.2-X.X.source | gzip &gt; llvm-gcc-4.2-X.X.source.tar.gz
   <ol>
     <li>
     Build the LLVM GCC front-end by following the directions in the README.LLVM
-    file. Be sure to build with LLVM_VERSION_INFO=X.X, where X is the major and
+    file. The frontend must be compiled with c, c++, objc (mac only), 
+    objc++ (mac only) and fortran support. </li>
+    <li>Please boostrap as well.</li>
+    <li>Be sure to build with LLVM_VERSION_INFO=X.X, where X is the major and
     minor release numbers.
     </li>
 
     <li>
     Copy the installation directory to a directory named for the specific target.
     For example on Red Hat Enterprise Linux, the directory would be named
-    <tt>llvm-gcc4.0-2.1-x86-linux-RHEL4</tt>. Archive and compress the new directory.  
+    <tt>llvm-gcc4.2-2.6-x86-linux-RHEL4</tt>. Archive and compress the new directory.  
     </li>
   </ol>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="check">Run 'make check'</a></div>
+<div class="doc_subsubsection"><a name="clangbin">Build Clang 
+Binary Distribution</a></div>
 <div class="doc_text">
   <p>
-  Using the newly built llvm-gcc and llvm, reconfigure llvm to locate llvm-gcc.
-  Run <tt>make check</tt> and ensure there are no unexpected failures. If there
-  are, resolve the failures or file a bug. If there is a fix commited to mainline,
-  merge back into the release branch, and restart testing by 
-  <a href="#build">re-building LLVM</a> and <a href="#build">llvm-gcc</a>. If no
-  fix will be made, XFAIL the test and commit back to the release branch.
+  Creating the Clang binary distribution (debug/release/release-asserts) requires 
+  performing the following steps for each supported platform:
   </p>
 
+  <ol>
+    <li>
+    Build clang according to the directions 
+    <a href="http://clang.llvm.org/get_started.html">here</a>.
+    </li>
+    
+    <li>Build both a debug and release version of clang, but the binary
+    will be a release build.</lI>
+
+    <li>
+    Package clang (details to follow).
+    </li>
+  </ol>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="target-build">Target Specific Build 
+Details</a></div>
+<div class="doc_text">
   <p>
-  Ensure that '<tt>make check</tt>' passes on all platforms for all targets. The 
-  test suite must complete with "0 unexpected failures" before sending out the
-  pre-releases for testing.
+  The table below specifies which compilers are used for each arch/os combination
+  when qualifying the build of <tt>llvm</tt>, <tt>llvm-gcc</tt>, <tt>clang.
+  </tt></p>
+  
+  <p>
+  <table>
+  <tr><th>Architecture</th><th>OS</th><th>compiler</th></tr>
+  <tr><td>x86-32</td><td>Mac OS 10.5</td><td>gcc 4.0.1</td></tr>
+  <tr><td>x86-32</td><td>Linux</td><td>gcc 4.2.X, gcc 4.3.X</td></tr>
+  <tr><td>x86-32</td><td>FreeBSD</td><td>gcc 4.2.X</td></tr>
+   <tr><td>x86-32</td><td>mingw</td><td>gcc 3.4.5</td></tr>
+  <tr><td>x86-64</td><td>Mac OS 10.5</td><td>gcc 4.0.1</td></tr>
+  <tr><td>x86-64</td><td>Linux</td><td>gcc 4.2.X, gcc 4.3.X</td></tr>
+  <tr><td>x86-64</td><td>FreeBSD</td><td>gcc 4.2.X</td></tr>
+ 
+  </table> 
   </p>
+
 </div>
 
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-qualify">
+Building the Release</a></div>
+
+<div class="doc_text">
+ A release is qualified when it has no regressions from the previous 
+ release (or baseline). Regressions are related to correctness only and not 
+ performance at this time. <b>Regressions are new failures in the set of tests that
+ are used to qualify each product and only include things on the list. 
+ Ultimately, there is no end to the number of possible bugs in a release.  We 
+ need a very concrete and definitive release criteria that ensures we have 
+ monotonically improving quality on some metric.  The metric we use is 
+ described below.  This doesn't mean that we don't care about other things, 
+ but this are things that must be satisfied before a release can go out</b>
+</div>
+
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="test">LLVM Test Suite</a></div>
+<div class="doc_subsubsection"><a name="llvm-qualify">Qualify LLVM</a></div>
 <div class="doc_text">
   <p>
-  Run the <tt>llvm-test</tt> suite and ensure there are no unacceptable
-  failures. Unacceptable failures are regression from the previous release
-  and (optionally) major performance regressions from the previous release. 
-  If a regression is found a bug is filled, but the pre-releases may still go
-  out.</p>
+  LLVM is qualified when it has a clean dejagnu test run without a frontend and 
+  it has no regressions when using either <tt>llvm-gcc</tt> or <tt>clang</tt> 
+  with the <tt>test-suite</tt> from the previous release.
+</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="rpm">Building RPM packages (optional)</a></div>
+<div class="doc_subsubsection"><a name="llvmgcc-qualify">Qualify LLVM-GCC</a></div>
 <div class="doc_text">
   <p>
-  You can, optionally, create source and binary RPM packages for LLVM. These may
-  make it easier to get LLVM into a distribution. This can be done with the
-  following commands:
-  </p>
+  <tt>LLVM-GCC</tt> is qualified when front-end specific tests in the 
+  <tt>llvm</tt> dejagnu test suite all pass and there are no regressions in 
+  the <tt>test-suite</tt>.</p>
+  <p>We do not use the gcc dejagnu test suite as release criteria.</p>
+</div>
 
-<div class="doc_code">
-<pre>
-make dist        # Build the distribution source tarball
-make dist-check  # Check that the source tarball can build itself.
-cp llvm-M.m.tar.gz /usr/src/redhat/SOURCES  # Required by rpmbuild
-make srpm # for source rpm
-make rpm  # for binary rpm
-</pre>
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="clang-qualify">Qualify Clang</a></div>
+<div class="doc_text">
+    <tt>Clang</tt> is qualified when front-end specific tests in the 
+  <tt>llvm</tt> dejagnu test suite all pass, clang's own test suite passes 
+  cleanly, and there are no regressions in the <tt>test-suite</tt>.</p>
 </div>
 
-  <p>
-  First, use <tt>make dist</tt> to simply build the distribution. Any failures
-  need to be corrected (on the branch). Once <tt>make dist</tt> can be
-  successful, do <tt>make dist-check</tt>. This target will do the same thing as
-  the 'dist' target but also test that distribution to make sure it can build
-  itself and runs <tt>make check</tt> as well. This ensures that needed files
-  are not missing and that the src tarball can be successfully unpacked, built,
-  installed, and cleaned. Once you have a reliable tarball, you need to copy it
-  to the <tt>/usr/src/redhat/SOURCES</tt> directory which is a requirement of
-  the rpmbuild tool. The last two <tt>make</tt> invocations just run rpmbuild to
-  build either a source (<tt>srpm</tt>) or binary (<tt>rpm</tt>) RPM package.
-  </p>
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="targets">Specific Target 
+Qualification Details</a></div>
+<div class="doc_text">
+  <p><table>
+  <tr><th>Architecture</th><th>OS</th><th>llvm-gcc baseline</th><th>clang baseline
+  </th><th>tests</th></tr>
+  <tr><td>x86-32</td><td>Mac OS 10.5</td><td>last release</td><td>none</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-32</td><td>Linux</td><td>last release</td><td>none</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-32</td><td>FreeBSD</td><td>none</td><td>none</td><td>llvm dejagnu, clang tests, test-suite</td></tr>
+  <tr><td>x86-32</td><td>mingw</td><td>last release</td><td>none</td><td>QT</td></tr>
+  <tr><td>x86-64</td><td>Mac OS 10.5</td><td>last release</td><td>none</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-64</td><td>Linux</td><td>last release</td><td>none</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-64</td><td>FreeBSD</td><td>none</td><td>none</td><td>llvm dejagnu, clang tests, test-suite</td></tr>
+  </table></p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="prerelease">Pre-Release Testing</a></div>
+<div class="doc_subsection"><a name="commTest">Community Testing</a></div>
 <div class="doc_text">
   <p>
   Once all testing has been completed and appropriate bugs filed, the pre-release
   tar balls may be put on the website and the LLVM community is notified. Ask that
   all LLVM developers test the release in 2 ways:</p>
   <ol>
-  <li>Download llvm-X.X, llvm-test-X.X, and the appropriate llvm-gcc4 binary. 
-  Run "make check" and the full llvm-test suite (make TEST=nightly report).<li>
-  <li>Download llvm-X.X, llvm-test-X.X, and the llvm-gcc4 source. Compile 
-  everything. Run "make check" and the full llvm-test suite (make TEST=nightly 
+  <li>Download llvm-X.X, llvm-test-X.X, and the appropriate llvm-gcc4 
+  and/or clang binary. Build LLVM.
+  Run "make check" and the full llvm-test suite (make TEST=nightly report).</li>
+  <li>Download llvm-X.X, llvm-test-X.X, and the llvm-gcc4 and/or clang source. 
+  Compile everything. Run "make check" and the full llvm-test suite (make TEST=nightly 
   report).</li>
   </ol>
   <p>Ask LLVM developers to submit the report and make check results to the list.
-  Verify that there are no regressions from the previous release. For
-  unsupported targets, verify that make check at least is clean.</p>
+  Attempt to verify that there are no regressions from the previous release. 
+  The results are not used to qualify a release, but to spot other potential 
+  problems. For unsupported targets, verify that make check at least is 
+  clean.</p>
   
-  <p>The first round of pre-release testing will be the longest. During this time,
-  all regressions must be fixed before the second pre-release is created (repeat
-  steps 4-8).</p>
+  <p>During the first round of testing time,
+  all regressions must be fixed before the second pre-release is created.</p>
   
-  <p>If this is the second round of testing, this is only to ensure the bug fixes
-  previously merged in have not created new major problems. This is not the time
-  to solve additional and unrelated bugs. If no patches are merged in, the release
-  is determined to be ready and the release manager may move onto the next step.</p>
+  <p>If this is the second round of testing, this is only to ensure the bug 
+  fixes previously merged in have not created new major problems. This is not 
+  the time to solve additional and unrelated bugs. If no patches are merged in, 
+  the release is determined to be ready and the release manager may move onto 
+  the next step.
+  </p>
 </div>
 
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="tag">Tag the Release Branch</a></div>
+<div class="doc_subsection"><a name="release-patch">Release Patch Rules 
+</a></div>
 <div class="doc_text">
-  <p>Tag the release branch using the following procedure:</p>
-<div class="doc_code">
-<pre>
-svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XX \
-         https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX \
-         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
-         https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XX</i>
-</pre>
+  <p>
+  Below are the rules regarding patching the release branch.</p>
+  <p>
+  <li>Patches applied to the release branch are only applied by the release 
+  manager.</li>
+  <li>During the first round of testing, patches that fix regressions or that
+  are small and relatively risk free (verified by the appropriate code owner)
+  are applied to the branch. Code owners are asked to be very conservative in 
+  approving patches for the branch and we reserve the right to reject any patch 
+  that does not fix a regression as previously defined.</li>
+  <li>During the remaining rounds of testing, only patches that fix regressions
+  may be applied.</li>
+  
+  </p>
 </div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-final">Release Final Tasks 
+</a></div>
+<div class="doc_text">
+  <p>
+  The final stages of the release process involving taging the release branch,
+  updating documentation that refers to the release, and updating the demo
+  page.</p>
+  <p>FIXME: Add a note if anything needs to be done to the clang website. 
+  Eventually the websites will be merged hopefully.</p>
 </div>
 
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="updocs">Update Documentation</a></div>
+<div class="doc_subsubsection"><a name="updocs">Update Documentation</a></div>
 <div class="doc_text">
   <p>
   Review the documentation and ensure that it is up to date.  The Release Notes
@@ -332,6 +452,24 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
 </div>
 
 <!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="tag">Tag the Release Branch</a></div>
+<div class="doc_text">
+  <p>Tag the release branch using the following procedure:</p>
+<div class="doc_code">
+<pre>
+svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XX \
+         https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XX</i>
+svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX \
+         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XX</i>
+svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
+         https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XX</i>
+</pre>
+</div>
+</div>
+
+
+
+<!-- ======================================================================= -->
 <div class="doc_subsection"><a name="updemo">Update the LLVM Demo Page</a></div>
 <div class="doc_text">
   <p>
@@ -341,7 +479,7 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="webupdates">Update the LLVM Website</a></div>
+<div class="doc_subsubsection"><a name="webupdates">Update the LLVM Website</a></div>
 <div class="doc_text">
   <p>
   The website must be updated before the release announcement is sent out. Here is
@@ -349,7 +487,8 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
   <ol>
   <li> Check out the <tt>website</tt> module from CVS. </li> 
   <li> Create a new subdirectory X.X in the releases directory. </li> 
-  <li> Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>llvm-gcc</tt> source, 
+  <li> Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>llvm-gcc</tt> source,
+  <tt>clang source</tt>, <tt>clang binaries</tt>, 
   and <tt>llvm-gcc</tt> binaries in this new directory. </li>
   <li> Copy and commit the <tt>llvm/docs</tt> and <tt>LICENSE.txt</tt>
   files into this new directory. The docs should be built with BUILD_FOR_WEBSITE=1.</li>
@@ -360,232 +499,17 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
   release documentation.</li>
   <li> Finally, update the main page (<tt>index.html</tt> and sidebar) to
   point to the new release and release announcement. Make sure this all gets
-  commited back into Subversion.</li>
+  committed back into Subversion.</li>
   </ol>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="announce">Announce the Release</a></div>
+<div class="doc_subsubsection"><a name="announce">Announce the Release</a></div>
 <div class="doc_text">
   <p>Have Chris send out the release announcement when everything is finished.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="dist_targets">Distribution Targets</a></div>
-<!-- *********************************************************************** -->
-
-<!-- ======================================================================= -->
-<div class="doc_subsection">Overview</div>
-<div class="doc_text">
-  <p>
-  The first thing you need to understand is that there are multiple make targets
-  to support this feature. Here's an overview, we'll delve into the details
-  later.
-  </p>
-
-  <ul>
-    <li><b>distdir</b> - builds the distribution directory from which the 
-        distribution will be packaged</li>
-    <li><b>dist</b> - builds each of the distribution tarballs (tar.gz, 
-        tar.bzip2, .zip). These can be built individually as well, with separate
-        targets.</li>
-    <li><b>dist-check</b> - this is identical to <tt>dist</tt> but includes a
-        check on the distribution that ensures the tarball can: unpack
-         successfully, compile correctly, pass '<tt>make check</tt>', and pass
-         '<tt>make clean</tt>'.</li>
-    <li><b>dist-clean</b>- this just does a normal clean but also cleans up the
-        stuff generated by the other three <tt>dist</tt> targets (above).</li>
-  </ul>
-
-  <p>
-  Okay, that's the basic functionality. When making a release, we want to ensure
-  that the tree you build the distribution from passes
-  <tt>dist-check</tt>. Beyond fixing the usual bugs, there is generally one
-  impediment to making the release in this fashion: missing files. The
-  <tt>dist-check</tt> process guards against that possibility. It will either
-  fail and that failure will indicate what's missing, or it will succeed meaning
-  that it has proved that the tarballs can actually succeed in building LLVM
-  correctly and that it passes <tt>make check</tt>.
-  </p>
-</div>
-
-<!-- ======================================================================= -->
-
-<div class="doc_subsection">distdir</div>
-<div class="doc_text">
-  <p>
-  This target builds the distribution directory which is the directory from 
-  which the tarballs are generated. The distribution directory has the same 
-  name as the release, e.g. LLVM-1.7). This target goes through the following 
-  process:
-  </p>
-
-  <ol>
-    <li>First, if there was an old distribution directory (for the current 
-        release), it is removed in its entirety and you see <tt>Removing old 
-        LLVM-1.7</tt></li>
-    <li>Second, it issues a <tt>make all ENABLE_OPTIMIZED=3D1</tt> to ensure 
-        that the everything in your tree can be built in release mode. Often
-        times there are discrepancies in building between debug and release
-        modes so it enforces release mode first. If that fails, the
-        <tt>distdir</tt> target fails too. This is preceded by the message
-        <tt>Making 'all' to verify build</tt>.</li>
-    <li>Next, it traverses your source tree and copies it to a new directory 
-        that has the name of the release (<tt>LLVM-M.m</tt> in our current
-        case). This is the directory that will get tar'd. It contains all the
-        software that needs to be in the distribution. During the copying
-        process, it omits generated files, SVN directories, and any other
-        "cruft" that's in your build tree. This is done to eliminate the
-        possibility of huge distribution tarballs that include useless or
-        irrelevant stuff in them. This is the trickiest part of making the
-        distribution. Done manually you will either include stuff that
-        shouldn't be in the distribution or exclude stuff that should. This
-        step is preceded by the message <tt>Building Distribution Directory
-        LLVM-1.7</tt></li>
-    <li>The distribution directory is then traversed and all <tt>CVS</tt> or 
-        <tt>.svn</tt> directories are removed. You see: <tt>Eliminating CVS/.svn
-        directories from distribution</tt></li>
-    <li>The recursive <tt>dist-hook</tt> target is executed. This gives each 
-        directory a chance to modify the distribution in some way (more on this 
-        below).</li>
-    <li>The distribution directory is traversed and the correct file
-        permissions and modes are set based on the type of file.</li>
-  </ol>
-
-  <p>
-  To control the process of making the distribution directory correctly, each
-  Makefile can utilize two features:
-  </p>
-
-  <ol>
-    <li><b><tt>EXTRA_DIST</tt></B> - this make variable specifies which files 
-        it should distribute. By default, all source files are automatically 
-        included for distribution as well as certain <tt>well known</tt> files 
-        (see DistAlways variable in Makefile.rules for details). Each Makefile 
-        specifies, via the <tt>EXTRA_DIST</tt> variable, which additional files 
-        need to be distributed. Only those files that are needed to build LLVM 
-        should be added to <tt>EXTRA_DIST</tt>. <tt>EXTRA_DIST</tt> contains a 
-        list of file or directory names that should be distributed. For example, 
-        the top level Makefile contains <tt>EXTRA_DIST := test llvm.spec
-        include</tt>. This means that in addition to regular things that are
-        distributed at the top level (<tt>CREDITS.txt, LICENSE.txt</tt>, etc.)
-        the distribution should contain the entire <tt>test</tt> and
-        <tt>include</tt> directories as well as the <tt>llvm.spec</tt> file.</li>
-    <li><b><tt>dist-hook</tt></B> - this make target can be used to alter the 
-        content of the distribution directory. For example, in the top level 
-        Makefile there is some logic to eliminate files in the <tt>include</tt> 
-        subtree that are generated by the configure script. These should not be 
-        distributed. Similarly, any <tt>dist-hook</tt> target found in any 
-        directory can add or remove or modify things just before it gets
-        packaged. Any transformation is permitted. Generally, not much is
-        needed.</li>
-  </ol>
-
-  <p>
-  You will see various messages if things go wrong:
-  </p>
-
-  <ol>
-    <li>During the copying process, any files that are missing will be flagged 
-        with: <tt>===== WARNING: Distribution Source 'dir/file' Not Found!</tt>
-        These must be corrected by either adding the file or removing it from 
-        <tt>EXTRA_DIST</tt>.</li>
-    <li>If you build the distribution with <tt>VERBOSE=1</tt>, then you might 
-        also see: <tt>Skipping non-existent 'dir/file'</tt> in certain cases
-        where it's okay to skip the file.</li>
-    <li>The target can fail if any of the things it does fail. Error messages 
-        should indicate what went wrong.</li>
-  </ol>
-</div>
-
-<!-- ======================================================================= -->
-<div class="doc_subsection">dist</div>
-<div class="doc_text">
-  <p>
-  This target does exactly what <tt>distdir</tt> target does, but also includes
-  assembling the tarballs. There are actually four related targets here:
-  </p>
-
-  <ul>
-    <li><b><tt>dist-gzip</tt></b>: package the gzipped distribution tar 
-        file. The distribution directory is packaged into a single file ending
-        in <tt>.tar.gz</tt> which is gzip compressed.</li>
-    <li><b><tt>dist-bzip2</tt></b>: package the bzip2 distribution tar file. 
-        The distribution directory is packaged into a single file ending in 
-        <tt>.tar.bzip2</tt> which is bzip2 compressed.</li>
-    <li><b><tt>dist-zip</tt></b>: package the zip distribution file. The 
-        distribution directory is packaged into a single file ending in 
-        <tt>.zip</tt> which is zip compressed.</li>
-    <li><b><tt>dist</tt></b>: does all three, dist-gzip, dist-bzip2,
-        dist-zip</li>
-  </ul>
-</div>
-
-<!-- ======================================================================= -->
-<div class="doc_subsection">dist-check</div>
-<div class="doc_text">
-  <p>
-  This target checks the distribution. The basic idea is that it unpacks the 
-  distribution tarball and ensures that it can build. It takes the following 
-  actions:
-  </p>
-
-  <ol>
-    <li>It depends on the <tt>dist-gzip</tt> target which, if it hasn't already 
-        been built, builds the gzip tar bundle (see dist and distdir
-        above).</li>
-    <li>removes any pre-existing <tt>_distcheckdir</tt> at the top level.</li>
-    <li>creates a new <tt>_distcheckdir</tt> directory at the top level.</li>
-    <li>creates a <tt>build</tt> subdirectory and an <tt>install</tt> 
-        subdirectory under <tt>_distcheckdir</tt>.</li>
-    <li>unzips and untars the release tarball into <tt>_distcheckdir</tt>, 
-        creating <tt>LLVM-1.7</tt> directory (from the tarball).</li>
-    <li>in the build subdirectory, it configures with appropriate options to
-        build from the unpacked source tarball into the <tt>build</tt> directory
-        with installation in the <tt>install</tt> directory.</li>
-    <li>runs <tt>make all</tt></li>
-    <li>runs <tt>make </tt><tt>check</tt></li>
-    <li>runs <tt>make install</tt></li>
-    <li>runs <tt>make uninstall</tt></li>
-    <li>runs <tt>make dist</tt></li>
-    <li>runs <tt>make clean</tt></li>
-    <li>runs <tt>make dist-clean</tt></li>
-  </ol>
-
-  <p>
-  If it can pass all that, the distribution will be deemed distribution worth y
-  and you will see:
-  </p>
-
-  <pre>===== LLVM-1.7.tar.gz Ready For Distribution =====</pre>
-
-  <p>
-  This means the tarball should then be tested on other platforms and have the
-  nightly test run against it. If those all pass, THEN it is ready for
-  distribution.
-  </p>
- 
-  <p>
-  A note about disk space: using <tt>dist-check</tt> will easily triple the
-  amount of disk space your build tree is using. You might want to check
-  available space before you begin.
-  </p>
-</div>
-
-<!-- ======================================================================= -->
-<div class="doc_subsection">dist-clean</div>
-<div class="doc_text">
-  <p>
-  In addition to doing a normal <tt>clean</tt>, this target will clean up the
-  files and directories created by the distribution targets. In particular the
-  distribution directory (<tt>LLVM-X.X</tt>), check directory
-  (<tt>_distcheckdir</tt>), and the various tarballs will be removed. You do
-  this after the release has shipped and you no longer need this stuff in your
-  build tree.
-  </p>
-</div>
-
-<!-- *********************************************************************** -->
 <hr>
 <address>
   <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
@@ -594,7 +518,7 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.cs.uiuc.edu">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
index 2e6cc4f9da73..2ac457539628 100644
--- a/docs/HowToSubmitABug.html
+++ b/docs/HowToSubmitABug.html
@@ -60,7 +60,7 @@ more easily.</p>
 <p>Once you have a reduced test-case, go to <a
 href="http://llvm.org/bugs/enter_bug.cgi">the LLVM Bug Tracking
 System</a> and fill out the form with the necessary details (note that you don't
-need to pick a catagory, just use the "new-bugs" catagory if you're not sure).
+need to pick a category, just use the "new-bugs" category if you're not sure).
 The bug description should contain the following
 information:</p>
 
@@ -183,12 +183,12 @@ to llvm-gcc (in addition to the options you already pass).  Once your have
 foo.bc, one of the following commands should fail:</p>
 
 <ol>
-<li><tt><b>llc</b> foo.bc -f</tt></li>
-<li><tt><b>llc</b> foo.bc -f -relocation-model=pic</tt></li>
-<li><tt><b>llc</b> foo.bc -f -relocation-model=static</tt></li>
-<li><tt><b>llc</b> foo.bc -f -enable-eh</tt></li>
-<li><tt><b>llc</b> foo.bc -f -relocation-model=pic -enable-eh</tt></li>
-<li><tt><b>llc</b> foo.bc -f -relocation-model=static -enable-eh</tt></li>
+<li><tt><b>llc</b> foo.bc</tt></li>
+<li><tt><b>llc</b> foo.bc -relocation-model=pic</tt></li>
+<li><tt><b>llc</b> foo.bc -relocation-model=static</tt></li>
+<li><tt><b>llc</b> foo.bc -enable-eh</tt></li>
+<li><tt><b>llc</b> foo.bc -relocation-model=pic -enable-eh</tt></li>
+<li><tt><b>llc</b> foo.bc -relocation-model=static -enable-eh</tt></li>
 </ol>
 
 <p>If none of these crash, please follow the instructions for a
@@ -320,7 +320,7 @@ the following:</p>
 
 <div class="doc_code">
 <p><tt>
-<b>llc</b> test.bc -o test.s -f<br>
+<b>llc</b> test.bc -o test.s<br>
 <b>gcc</b> test.s safe.so -o test.llc<br>
 ./test.llc [program options]
 </tt></p>
@@ -348,7 +348,7 @@ the following:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2009-04-05 02:41:19 +0200 (Sun, 05 Apr 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index f229150ea300..21e41d5fa6b6 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -20,7 +20,24 @@
   <li><a href="#highlevel">High Level Structure</a>
     <ol>
       <li><a href="#modulestructure">Module Structure</a></li>
-      <li><a href="#linkage">Linkage Types</a></li>
+      <li><a href="#linkage">Linkage Types</a>
+        <ol>
+          <li><a href="#linkage_private">'<tt>private</tt>' Linkage</a></li>
+          <li><a href="#linkage_linker_private">'<tt>linker_private</tt>' Linkage</a></li>
+          <li><a href="#linkage_internal">'<tt>internal</tt>' Linkage</a></li>
+          <li><a href="#linkage_available_externally">'<tt>available_externally</tt>' Linkage</a></li>
+          <li><a href="#linkage_linkonce">'<tt>linkonce</tt>' Linkage</a></li>
+          <li><a href="#linkage_common">'<tt>common</tt>' Linkage</a></li>
+          <li><a href="#linkage_weak">'<tt>weak</tt>' Linkage</a></li>
+          <li><a href="#linkage_appending">'<tt>appending</tt>' Linkage</a></li>
+          <li><a href="#linkage_externweak">'<tt>extern_weak</tt>' Linkage</a></li>
+          <li><a href="#linkage_linkonce_odr">'<tt>linkonce_odr</tt>' Linkage</a></li>
+          <li><a href="#linkage_weak">'<tt>weak_odr</tt>' Linkage</a></li>
+          <li><a href="#linkage_external">'<tt>externally visible</tt>' Linkage</a></li>
+          <li><a href="#linkage_dllimport">'<tt>dllimport</tt>' Linkage</a></li>
+          <li><a href="#linkage_dllexport">'<tt>dllexport</tt>' Linkage</a></li>
+        </ol>
+      </li>
       <li><a href="#callingconv">Calling Conventions</a></li>
       <li><a href="#namedtypes">Named Types</a></li>
       <li><a href="#globalvars">Global Variables</a></li>
@@ -31,6 +48,7 @@
       <li><a href="#gc">Garbage Collector Names</a></li>
       <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
       <li><a href="#datalayout">Data Layout</a></li>
+      <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
     </ol>
   </li>
   <li><a href="#typesystem">Type System</a>
@@ -38,6 +56,7 @@
       <li><a href="#t_classifications">Type Classifications</a></li>
       <li><a href="#t_primitive">Primitive Types</a>    
         <ol>
+          <li><a href="#t_integer">Integer Type</a></li>
           <li><a href="#t_floating">Floating Point Types</a></li>
           <li><a href="#t_void">Void Type</a></li>
           <li><a href="#t_label">Label Type</a></li>
@@ -46,7 +65,6 @@
       </li>
       <li><a href="#t_derived">Derived Types</a>
         <ol>
-          <li><a href="#t_integer">Integer Type</a></li>
           <li><a href="#t_array">Array Type</a></li>
           <li><a href="#t_function">Function Type</a></li>
           <li><a href="#t_pointer">Pointer Type</a></li>
@@ -74,6 +92,17 @@
       <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
     </ol>
   </li>
+  <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
+    <ol>
+      <li><a href="#intg_used">The '<tt>llvm.used</tt>' Global Variable</a></li>
+      <li><a href="#intg_compiler_used">The '<tt>llvm.compiler.used</tt>'
+          Global Variable</a></li>
+      <li><a href="#intg_global_ctors">The '<tt>llvm.global_ctors</tt>'
+         Global Variable</a></li>
+      <li><a href="#intg_global_dtors">The '<tt>llvm.global_dtors</tt>'
+         Global Variable</a></li>
+    </ol>
+  </li>
   <li><a href="#instref">Instruction Reference</a>
     <ol>
       <li><a href="#terminators">Terminator Instructions</a>
@@ -155,8 +184,6 @@
         <ol>
           <li><a href="#i_icmp">'<tt>icmp</tt>' Instruction</a></li>
           <li><a href="#i_fcmp">'<tt>fcmp</tt>' Instruction</a></li>
-          <li><a href="#i_vicmp">'<tt>vicmp</tt>' Instruction</a></li>
-          <li><a href="#i_vfcmp">'<tt>vfcmp</tt>' Instruction</a></li>
           <li><a href="#i_phi">'<tt>phi</tt>'   Instruction</a></li>
           <li><a href="#i_select">'<tt>select</tt>' Instruction</a></li>
           <li><a href="#i_call">'<tt>call</tt>'  Instruction</a></li>
@@ -210,8 +237,6 @@
           <li><a href="#int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic </a></li>
           <li><a href="#int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic </a></li>
           <li><a href="#int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic </a></li>
-          <li><a href="#int_part_select">'<tt>llvm.part.select.*</tt>' Intrinsic </a></li>
-          <li><a href="#int_part_set">'<tt>llvm.part.set.*</tt>' Intrinsic </a></li>
         </ol>
       </li>
       <li><a href="#int_overflow">Arithmetic with Overflow Intrinsics</a>
@@ -248,6 +273,14 @@
           <li><a href="#int_atomic_load_umin"><tt>llvm.atomic.load.umin</tt></a></li>
         </ol>
       </li>
+      <li><a href="#int_memorymarkers">Memory Use Markers</a>
+        <ol>
+          <li><a href="#int_lifetime_start"><tt>llvm.lifetime.start</tt></a></li>
+          <li><a href="#int_lifetime_end"><tt>llvm.lifetime.end</tt></a></li>
+          <li><a href="#int_invariant_start"><tt>llvm.invariant.start</tt></a></li>
+          <li><a href="#int_invariant_end"><tt>llvm.invariant.end</tt></a></li>
+        </ol>
+      </li>
       <li><a href="#int_general">General intrinsics</a>
         <ol>
           <li><a href="#int_var_annotation">
@@ -274,12 +307,13 @@
 <!-- *********************************************************************** -->
 
 <div class="doc_text">
-<p>This document is a reference manual for the LLVM assembly language. 
-LLVM is a Static Single Assignment (SSA) based representation that provides
-type safety, low-level operations, flexibility, and the capability of
-representing 'all' high-level languages cleanly.  It is the common code
-representation used throughout all phases of the LLVM compilation
-strategy.</p>
+
+<p>This document is a reference manual for the LLVM assembly language. LLVM is
+   a Static Single Assignment (SSA) based representation that provides type
+   safety, low-level operations, flexibility, and the capability of representing
+   'all' high-level languages cleanly.  It is the common code representation
+   used throughout all phases of the LLVM compilation strategy.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -288,26 +322,24 @@ strategy.</p>
 
 <div class="doc_text">
 
-<p>The LLVM code representation is designed to be used in three
-different forms: as an in-memory compiler IR, as an on-disk bitcode
-representation (suitable for fast loading by a Just-In-Time compiler),
-and as a human readable assembly language representation.  This allows
-LLVM to provide a powerful intermediate representation for efficient
-compiler transformations and analysis, while providing a natural means
-to debug and visualize the transformations.  The three different forms
-of LLVM are all equivalent.  This document describes the human readable
-representation and notation.</p>
+<p>The LLVM code representation is designed to be used in three different forms:
+   as an in-memory compiler IR, as an on-disk bitcode representation (suitable
+   for fast loading by a Just-In-Time compiler), and as a human readable
+   assembly language representation.  This allows LLVM to provide a powerful
+   intermediate representation for efficient compiler transformations and
+   analysis, while providing a natural means to debug and visualize the
+   transformations.  The three different forms of LLVM are all equivalent.  This
+   document describes the human readable representation and notation.</p>
 
-<p>The LLVM representation aims to be light-weight and low-level
-while being expressive, typed, and extensible at the same time.  It
-aims to be a "universal IR" of sorts, by being at a low enough level
-that high-level ideas may be cleanly mapped to it (similar to how
-microprocessors are "universal IR's", allowing many source languages to
-be mapped to them).  By providing type information, LLVM can be used as
-the target of optimizations: for example, through pointer analysis, it
-can be proven that a C automatic variable is never accessed outside of
-the current function... allowing it to be promoted to a simple SSA
-value instead of a memory location.</p>
+<p>The LLVM representation aims to be light-weight and low-level while being
+   expressive, typed, and extensible at the same time.  It aims to be a
+   "universal IR" of sorts, by being at a low enough level that high-level ideas
+   may be cleanly mapped to it (similar to how microprocessors are "universal
+   IR's", allowing many source languages to be mapped to them).  By providing
+   type information, LLVM can be used as the target of optimizations: for
+   example, through pointer analysis, it can be proven that a C automatic
+   variable is never accessed outside of the current function... allowing it to
+   be promoted to a simple SSA value instead of a memory location.</p>
 
 </div>
 
@@ -316,10 +348,10 @@ value instead of a memory location.</p>
 
 <div class="doc_text">
 
-<p>It is important to note that this document describes 'well formed'
-LLVM assembly language.  There is a difference between what the parser
-accepts and what is considered 'well formed'.  For example, the
-following instruction is syntactically okay, but not well formed:</p>
+<p>It is important to note that this document describes 'well formed' LLVM
+   assembly language.  There is a difference between what the parser accepts and
+   what is considered 'well formed'.  For example, the following instruction is
+   syntactically okay, but not well formed:</p>
 
 <div class="doc_code">
 <pre>
@@ -327,13 +359,13 @@ following instruction is syntactically okay, but not well formed:</p>
 </pre>
 </div>
 
-<p>...because the definition of <tt>%x</tt> does not dominate all of
-its uses. The LLVM infrastructure provides a verification pass that may
-be used to verify that an LLVM module is well formed.  This pass is
-automatically run by the parser after parsing input assembly and by
-the optimizer before it outputs bitcode.  The violations pointed out
-by the verifier pass indicate bugs in transformation passes or input to
-the parser.</p>
+<p>...because the definition of <tt>%x</tt> does not dominate all of its
+   uses. The LLVM infrastructure provides a verification pass that may be used
+   to verify that an LLVM module is well formed.  This pass is automatically run
+   by the parser after parsing input assembly and by the optimizer before it
+   outputs bitcode.  The violations pointed out by the verifier pass indicate
+   bugs in transformation passes or input to the parser.</p>
+
 </div>
 
 <!-- Describe the typesetting conventions here. -->
@@ -344,44 +376,47 @@ the parser.</p>
 
 <div class="doc_text">
 
-  <p>LLVM identifiers come in two basic types: global and local. Global
-  identifiers (functions, global variables) begin with the @ character. Local
-  identifiers (register names, types) begin with the % character. Additionally,
-  there are three different formats for identifiers, for different purposes:</p>
+<p>LLVM identifiers come in two basic types: global and local. Global
+   identifiers (functions, global variables) begin with the <tt>'@'</tt>
+   character. Local identifiers (register names, types) begin with
+   the <tt>'%'</tt> character. Additionally, there are three different formats
+   for identifiers, for different purposes:</p>
 
 <ol>
   <li>Named values are represented as a string of characters with their prefix.
-  For example, %foo, @DivisionByZero, %a.really.long.identifier.  The actual
-  regular expression used is '<tt>[%@][a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.
-  Identifiers which require other characters in their names can be surrounded
-  with quotes. Special characters may be escaped using "\xx" where xx is the 
-  ASCII code for the character in hexadecimal.  In this way, any character can 
-  be used in a name value, even quotes themselves.
+      For example, <tt>%foo</tt>, <tt>@DivisionByZero</tt>,
+      <tt>%a.really.long.identifier</tt>. The actual regular expression used is
+      '<tt>[%@][a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.  Identifiers which require
+      other characters in their names can be surrounded with quotes. Special
+      characters may be escaped using <tt>"\xx"</tt> where <tt>xx</tt> is the
+      ASCII code for the character in hexadecimal.  In this way, any character
+      can be used in a name value, even quotes themselves.</li>
 
   <li>Unnamed values are represented as an unsigned numeric value with their
-  prefix.  For example, %12, @2, %44.</li>
+      prefix.  For example, <tt>%12</tt>, <tt>@2</tt>, <tt>%44</tt>.</li>
 
   <li>Constants, which are described in a <a href="#constants">section about
-  constants</a>, below.</li>
+      constants</a>, below.</li>
 </ol>
 
 <p>LLVM requires that values start with a prefix for two reasons: Compilers
-don't need to worry about name clashes with reserved words, and the set of
-reserved words may be expanded in the future without penalty.  Additionally,
-unnamed identifiers allow a compiler to quickly come up with a temporary
-variable without having to avoid symbol table conflicts.</p>
+   don't need to worry about name clashes with reserved words, and the set of
+   reserved words may be expanded in the future without penalty.  Additionally,
+   unnamed identifiers allow a compiler to quickly come up with a temporary
+   variable without having to avoid symbol table conflicts.</p>
 
 <p>Reserved words in LLVM are very similar to reserved words in other
-languages. There are keywords for different opcodes 
-('<tt><a href="#i_add">add</a></tt>', 
- '<tt><a href="#i_bitcast">bitcast</a></tt>', 
- '<tt><a href="#i_ret">ret</a></tt>', etc...), for primitive type names ('<tt><a
-href="#t_void">void</a></tt>', '<tt><a href="#t_primitive">i32</a></tt>', etc...),
-and others.  These reserved words cannot conflict with variable names, because
-none of them start with a prefix character ('%' or '@').</p>
+   languages. There are keywords for different opcodes
+   ('<tt><a href="#i_add">add</a></tt>',
+   '<tt><a href="#i_bitcast">bitcast</a></tt>',
+   '<tt><a href="#i_ret">ret</a></tt>', etc...), for primitive type names
+   ('<tt><a href="#t_void">void</a></tt>',
+   '<tt><a href="#t_primitive">i32</a></tt>', etc...), and others.  These
+   reserved words cannot conflict with variable names, because none of them
+   start with a prefix character (<tt>'%'</tt> or <tt>'@'</tt>).</p>
 
 <p>Here is an example of LLVM code to multiply the integer variable
-'<tt>%X</tt>' by 8:</p>
+   '<tt>%X</tt>' by 8:</p>
 
 <p>The easy way:</p>
 
@@ -409,25 +444,23 @@ none of them start with a prefix character ('%' or '@').</p>
 </pre>
 </div>
 
-<p>This last way of multiplying <tt>%X</tt> by 8 illustrates several
-important lexical features of LLVM:</p>
+<p>This last way of multiplying <tt>%X</tt> by 8 illustrates several important
+   lexical features of LLVM:</p>
 
 <ol>
-
   <li>Comments are delimited with a '<tt>;</tt>' and go until the end of
-  line.</li>
+      line.</li>
 
   <li>Unnamed temporaries are created when the result of a computation is not
-  assigned to a named value.</li>
+      assigned to a named value.</li>
 
   <li>Unnamed temporaries are numbered sequentially</li>
-
 </ol>
 
 <p>...and it also shows a convention that we follow in this document.  When
-demonstrating instructions, we will follow an instruction with a comment that
-defines the type and name of value produced.  Comments are shown in italic
-text.</p>
+   demonstrating instructions, we will follow an instruction with a comment that
+   defines the type and name of value produced.  Comments are shown in italic
+   text.</p>
 
 </div>
 
@@ -441,12 +474,12 @@ text.</p>
 
 <div class="doc_text">
 
-<p>LLVM programs are composed of "Module"s, each of which is a
-translation unit of the input programs.  Each module consists of
-functions, global variables, and symbol table entries.  Modules may be
-combined together with the LLVM linker, which merges function (and
-global variable) definitions, resolves forward declarations, and merges
-symbol table entries. Here is an example of the "hello world" module:</p>
+<p>LLVM programs are composed of "Module"s, each of which is a translation unit
+   of the input programs.  Each module consists of functions, global variables,
+   and symbol table entries.  Modules may be combined together with the LLVM
+   linker, which merges function (and global variable) definitions, resolves
+   forward declarations, and merges symbol table entries. Here is an example of
+   the "hello world" module:</p>
 
 <div class="doc_code">
 <pre><i>; Declare the string constant as a global constant...</i>
@@ -454,32 +487,32 @@ symbol table entries. Here is an example of the "hello world" module:</p>
  href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00"          <i>; [13 x i8]*</i>
 
 <i>; External declaration of the puts function</i>
-<a href="#functionstructure">declare</a> i32 @puts(i8 *)                                            <i>; i32(i8 *)* </i>
+<a href="#functionstructure">declare</a> i32 @puts(i8 *)                                           <i>; i32(i8 *)* </i>
 
 <i>; Definition of main function</i>
-define i32 @main() {                                                 <i>; i32()* </i>
+define i32 @main() {                                              <i>; i32()* </i>
         <i>; Convert [13 x i8]* to i8  *...</i>
         %cast210 = <a
- href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0 <i>; i8 *</i>
+ href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0   <i>; i8 *</i>
 
         <i>; Call puts function to write out the string to stdout...</i>
         <a
- href="#i_call">call</a> i32 @puts(i8 * %cast210)                              <i>; i32</i>
+ href="#i_call">call</a> i32 @puts(i8 * %cast210)                             <i>; i32</i>
         <a
  href="#i_ret">ret</a> i32 0<br>}<br>
 </pre>
 </div>
 
-<p>This example is made up of a <a href="#globalvars">global variable</a>
-named "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>"
-function, and a <a href="#functionstructure">function definition</a>
-for "<tt>main</tt>".</p>
+<p>This example is made up of a <a href="#globalvars">global variable</a> named
+   "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>" function, and
+   a <a href="#functionstructure">function definition</a> for
+   "<tt>main</tt>".</p>
 
-<p>In general, a module is made up of a list of global values,
-where both functions and global variables are global values.  Global values are
-represented by a pointer to a memory location (in this case, a pointer to an
-array of char, and a pointer to a function), and have one of the following <a
-href="#linkage">linkage types</a>.</p>
+<p>In general, a module is made up of a list of global values, where both
+   functions and global variables are global values.  Global values are
+   represented by a pointer to a memory location (in this case, a pointer to an
+   array of char, and a pointer to a function), and have one of the
+   following <a href="#linkage">linkage types</a>.</p>
 
 </div>
 
@@ -490,139 +523,126 @@ href="#linkage">linkage types</a>.</p>
 
 <div class="doc_text">
 
-<p>
-All Global Variables and Functions have one of the following types of linkage:
-</p>
+<p>All Global Variables and Functions have one of the following types of
+   linkage:</p>
 
 <dl>
-
   <dt><tt><b><a name="linkage_private">private</a></b></tt>: </dt>
-
-  <dd>Global values with private linkage are only directly accessible by
-  objects in the current module.  In particular, linking code into a module with
-  an private global value may cause the private to be renamed as necessary to
-  avoid collisions.  Because the symbol is private to the module, all
-  references can be updated. This doesn't show up in any symbol table in the
-  object file.
-  </dd>
+  <dd>Global values with private linkage are only directly accessible by objects
+      in the current module.  In particular, linking code into a module with an
+      private global value may cause the private to be renamed as necessary to
+      avoid collisions.  Because the symbol is private to the module, all
+      references can be updated. This doesn't show up in any symbol table in the
+      object file.</dd>
+
+  <dt><tt><b><a name="linkage_linker_private">linker_private</a></b></tt>: </dt>
+  <dd>Similar to private, but the symbol is passed through the assembler and
+      removed by the linker after evaluation.  Note that (unlike private
+      symbols) linker_private symbols are subject to coalescing by the linker:
+      weak symbols get merged and redefinitions are rejected.  However, unlike
+      normal strong symbols, they are removed by the linker from the final
+      linked image (executable or dynamic library).</dd>
 
   <dt><tt><b><a name="linkage_internal">internal</a></b></tt>: </dt>
+  <dd>Similar to private, but the value shows as a local symbol
+      (<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
+      corresponds to the notion of the '<tt>static</tt>' keyword in C.</dd>
 
-  <dd> Similar to private, but the value shows as a local symbol (STB_LOCAL in
-  the case of ELF) in the object file. This corresponds to the notion of the
-  '<tt>static</tt>' keyword in C.
-  </dd>
-
-  <dt><tt><b><a name="available_externally">available_externally</a></b></tt>:
-  </dt>
-
+  <dt><tt><b><a name="linkage_available_externally">available_externally</a></b></tt>: </dt>
   <dd>Globals with "<tt>available_externally</tt>" linkage are never emitted
-  into the object file corresponding to the LLVM module.  They exist to
-  allow inlining and other optimizations to take place given knowledge of the
-  definition of the global, which is known to be somewhere outside the module.
-  Globals with <tt>available_externally</tt> linkage are allowed to be discarded
-  at will, and are otherwise the same as <tt>linkonce_odr</tt>.  This linkage
-  type is only allowed on definitions, not declarations.</dd>
+      into the object file corresponding to the LLVM module.  They exist to
+      allow inlining and other optimizations to take place given knowledge of
+      the definition of the global, which is known to be somewhere outside the
+      module.  Globals with <tt>available_externally</tt> linkage are allowed to
+      be discarded at will, and are otherwise the same as <tt>linkonce_odr</tt>.
+      This linkage type is only allowed on definitions, not declarations.</dd>
 
   <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt>: </dt>
-
   <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
-  the same name when linkage occurs.  This is typically used to implement 
-  inline functions, templates, or other code which must be generated in each 
-  translation unit that uses it.  Unreferenced <tt>linkonce</tt> globals are 
-  allowed to be discarded.
-  </dd>
-
-  <dt><tt><b><a name="linkage_common">common</a></b></tt>: </dt>
-
-  <dd>"<tt>common</tt>" linkage is exactly the same as <tt>linkonce</tt> 
-  linkage, except that unreferenced <tt>common</tt> globals may not be
-  discarded.  This is used for globals that may be emitted in multiple 
-  translation units, but that are not guaranteed to be emitted into every 
-  translation unit that uses them.  One example of this is tentative
-  definitions in C, such as "<tt>int X;</tt>" at global scope.
-  </dd>
+      the same name when linkage occurs.  This is typically used to implement
+      inline functions, templates, or other code which must be generated in each
+      translation unit that uses it.  Unreferenced <tt>linkonce</tt> globals are
+      allowed to be discarded.</dd>
 
   <dt><tt><b><a name="linkage_weak">weak</a></b></tt>: </dt>
+  <dd>"<tt>weak</tt>" linkage has the same merging semantics as
+      <tt>linkonce</tt> linkage, except that unreferenced globals with
+      <tt>weak</tt> linkage may not be discarded.  This is used for globals that
+      are declared "weak" in C source code.</dd>
+
+  <dt><tt><b><a name="linkage_common">common</a></b></tt>: </dt>
+  <dd>"<tt>common</tt>" linkage is most similar to "<tt>weak</tt>" linkage, but
+      they are used for tentative definitions in C, such as "<tt>int X;</tt>" at
+      global scope.
+      Symbols with "<tt>common</tt>" linkage are merged in the same way as
+      <tt>weak symbols</tt>, and they may not be deleted if unreferenced.
+      <tt>common</tt> symbols may not have an explicit section,
+      must have a zero initializer, and may not be marked '<a 
+      href="#globalvars"><tt>constant</tt></a>'.  Functions and aliases may not
+      have common linkage.</dd>
 
-  <dd>"<tt>weak</tt>" linkage is the same as <tt>common</tt> linkage, except
-  that some targets may choose to emit different assembly sequences for them 
-  for target-dependent reasons.  This is used for globals that are declared 
-  "weak" in C source code.
-  </dd>
 
   <dt><tt><b><a name="linkage_appending">appending</a></b></tt>: </dt>
-
   <dd>"<tt>appending</tt>" linkage may only be applied to global variables of
-  pointer to array type.  When two global variables with appending linkage are
-  linked together, the two global arrays are appended together.  This is the
-  LLVM, typesafe, equivalent of having the system linker append together
-  "sections" with identical names when .o files are linked.
-  </dd>
+      pointer to array type.  When two global variables with appending linkage
+      are linked together, the two global arrays are appended together.  This is
+      the LLVM, typesafe, equivalent of having the system linker append together
+      "sections" with identical names when .o files are linked.</dd>
 
   <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt>: </dt>
-
-  <dd>The semantics of this linkage follow the ELF object file model: the
-    symbol is weak until linked, if not linked, the symbol becomes null instead
-    of being an undefined reference.
-  </dd>
-
-  <dt><tt><b><a name="linkage_linkonce">linkonce_odr</a></b></tt>: </dt>
-  <dt><tt><b><a name="linkage_weak">weak_odr</a></b></tt>: </dt>
-  <dd>Some languages allow differing globals to be merged, such as two
-    functions with different semantics.  Other languages, such as <tt>C++</tt>,
-    ensure that only equivalent globals are ever merged (the "one definition
-    rule" - "ODR").  Such languages can use the <tt>linkonce_odr</tt>
-    and <tt>weak_odr</tt> linkage types to indicate that the global will only
-    be merged with equivalent globals.  These linkage types are otherwise the
-    same as their non-<tt>odr</tt> versions.
-  </dd>
+  <dd>The semantics of this linkage follow the ELF object file model: the symbol
+      is weak until linked, if not linked, the symbol becomes null instead of
+      being an undefined reference.</dd>
+
+  <dt><tt><b><a name="linkage_linkonce_odr">linkonce_odr</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_weak_odr">weak_odr</a></b></tt>: </dt>
+  <dd>Some languages allow differing globals to be merged, such as two functions
+      with different semantics.  Other languages, such as <tt>C++</tt>, ensure
+      that only equivalent globals are ever merged (the "one definition rule" -
+      "ODR").  Such languages can use the <tt>linkonce_odr</tt>
+      and <tt>weak_odr</tt> linkage types to indicate that the global will only
+      be merged with equivalent globals.  These linkage types are otherwise the
+      same as their non-<tt>odr</tt> versions.</dd>
 
   <dt><tt><b><a name="linkage_external">externally visible</a></b></tt>:</dt>
-
   <dd>If none of the above identifiers are used, the global is externally
-  visible, meaning that it participates in linkage and can be used to resolve
-  external symbol references.
-  </dd>
+      visible, meaning that it participates in linkage and can be used to
+      resolve external symbol references.</dd>
 </dl>
 
-  <p>
-  The next two types of linkage are targeted for Microsoft Windows platform
-  only. They are designed to support importing (exporting) symbols from (to)
-  DLLs (Dynamic Link Libraries).
-  </p>
+<p>The next two types of linkage are targeted for Microsoft Windows platform
+   only. They are designed to support importing (exporting) symbols from (to)
+   DLLs (Dynamic Link Libraries).</p>
 
-  <dl>
+<dl>
   <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt>: </dt>
-
   <dd>"<tt>dllimport</tt>" linkage causes the compiler to reference a function
-    or variable via a global pointer to a pointer that is set up by the DLL
-    exporting the symbol. On Microsoft Windows targets, the pointer name is
-    formed by combining <code>__imp_</code> and the function or variable name.
-  </dd>
+      or variable via a global pointer to a pointer that is set up by the DLL
+      exporting the symbol. On Microsoft Windows targets, the pointer name is
+      formed by combining <code>__imp_</code> and the function or variable
+      name.</dd>
 
   <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt>: </dt>
-
   <dd>"<tt>dllexport</tt>" linkage causes the compiler to provide a global
-    pointer to a pointer in a DLL, so that it can be referenced with the
-    <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
-    name is formed by combining <code>__imp_</code> and the function or variable
-    name.
-  </dd>
-
+      pointer to a pointer in a DLL, so that it can be referenced with the
+      <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
+      name is formed by combining <code>__imp_</code> and the function or
+      variable name.</dd>
 </dl>
 
-<p>For example, since the "<tt>.LC0</tt>"
-variable is defined to be internal, if another module defined a "<tt>.LC0</tt>"
-variable and was linked with this one, one of the two would be renamed,
-preventing a collision.  Since "<tt>main</tt>" and "<tt>puts</tt>" are
-external (i.e., lacking any linkage declarations), they are accessible
-outside of the current module.</p>
-<p>It is illegal for a function <i>declaration</i>
-to have any linkage type other than "externally visible", <tt>dllimport</tt>
-or <tt>extern_weak</tt>.</p>
+<p>For example, since the "<tt>.LC0</tt>" variable is defined to be internal, if
+   another module defined a "<tt>.LC0</tt>" variable and was linked with this
+   one, one of the two would be renamed, preventing a collision.  Since
+   "<tt>main</tt>" and "<tt>puts</tt>" are external (i.e., lacking any linkage
+   declarations), they are accessible outside of the current module.</p>
+
+<p>It is illegal for a function <i>declaration</i> to have any linkage type
+   other than "externally visible", <tt>dllimport</tt>
+   or <tt>extern_weak</tt>.</p>
+
 <p>Aliases can have only <tt>external</tt>, <tt>internal</tt>, <tt>weak</tt>
-or <tt>weak_odr</tt> linkages.</p>
+   or <tt>weak_odr</tt> linkages.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -633,55 +653,48 @@ or <tt>weak_odr</tt> linkages.</p>
 <div class="doc_text">
 
 <p>LLVM <a href="#functionstructure">functions</a>, <a href="#i_call">calls</a>
-and <a href="#i_invoke">invokes</a> can all have an optional calling convention
-specified for the call.  The calling convention of any pair of dynamic
-caller/callee must match, or the behavior of the program is undefined.  The
-following calling conventions are supported by LLVM, and more may be added in
-the future:</p>
+   and <a href="#i_invoke">invokes</a> can all have an optional calling
+   convention specified for the call.  The calling convention of any pair of
+   dynamic caller/callee must match, or the behavior of the program is
+   undefined.  The following calling conventions are supported by LLVM, and more
+   may be added in the future:</p>
 
 <dl>
   <dt><b>"<tt>ccc</tt>" - The C calling convention</b>:</dt>
-
   <dd>This calling convention (the default if no other calling convention is
-  specified) matches the target C calling conventions.  This calling convention
-  supports varargs function calls and tolerates some mismatch in the declared
-  prototype and implemented declaration of the function (as does normal C). 
-  </dd>
+      specified) matches the target C calling conventions.  This calling
+      convention supports varargs function calls and tolerates some mismatch in
+      the declared prototype and implemented declaration of the function (as
+      does normal C).</dd>
 
   <dt><b>"<tt>fastcc</tt>" - The fast calling convention</b>:</dt>
-
   <dd>This calling convention attempts to make calls as fast as possible
-  (e.g. by passing things in registers).  This calling convention allows the
-  target to use whatever tricks it wants to produce fast code for the target,
-  without having to conform to an externally specified ABI (Application Binary
-  Interface).  Implementations of this convention should allow arbitrary
-  <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> to be
-  supported.  This calling convention does not support varargs and requires the
-  prototype of all callees to exactly match the prototype of the function
-  definition.
-  </dd>
+      (e.g. by passing things in registers).  This calling convention allows the
+      target to use whatever tricks it wants to produce fast code for the
+      target, without having to conform to an externally specified ABI
+      (Application Binary Interface).  Implementations of this convention should
+      allow arbitrary <a href="CodeGenerator.html#tailcallopt">tail call
+      optimization</a> to be supported.  This calling convention does not
+      support varargs and requires the prototype of all callees to exactly match
+      the prototype of the function definition.</dd>
 
   <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
-
   <dd>This calling convention attempts to make code in the caller as efficient
-  as possible under the assumption that the call is not commonly executed.  As
-  such, these calls often preserve all registers so that the call does not break
-  any live ranges in the caller side.  This calling convention does not support
-  varargs and requires the prototype of all callees to exactly match the
-  prototype of the function definition.
-  </dd>
+      as possible under the assumption that the call is not commonly executed.
+      As such, these calls often preserve all registers so that the call does
+      not break any live ranges in the caller side.  This calling convention
+      does not support varargs and requires the prototype of all callees to
+      exactly match the prototype of the function definition.</dd>
 
   <dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
-
   <dd>Any calling convention may be specified by number, allowing
-  target-specific calling conventions to be used.  Target specific calling
-  conventions start at 64.
-  </dd>
+      target-specific calling conventions to be used.  Target specific calling
+      conventions start at 64.</dd>
 </dl>
 
 <p>More calling conventions can be added/defined on an as-needed basis, to
-support pascal conventions or any other well-known target-independent
-convention.</p>
+   support Pascal conventions or any other well-known target-independent
+   convention.</p>
 
 </div>
 
@@ -692,37 +705,29 @@ convention.</p>
 
 <div class="doc_text">
 
-<p>
-All Global Variables and Functions have one of the following visibility styles:
-</p>
+<p>All Global Variables and Functions have one of the following visibility
+   styles:</p>
 
 <dl>
   <dt><b>"<tt>default</tt>" - Default style</b>:</dt>
-
   <dd>On targets that use the ELF object file format, default visibility means
-    that the declaration is visible to other
-    modules and, in shared libraries, means that the declared entity may be
-    overridden. On Darwin, default visibility means that the declaration is
-    visible to other modules. Default visibility corresponds to "external
-    linkage" in the language.
-  </dd>
+      that the declaration is visible to other modules and, in shared libraries,
+      means that the declared entity may be overridden. On Darwin, default
+      visibility means that the declaration is visible to other modules. Default
+      visibility corresponds to "external linkage" in the language.</dd>
 
   <dt><b>"<tt>hidden</tt>" - Hidden style</b>:</dt>
-
   <dd>Two declarations of an object with hidden visibility refer to the same
-    object if they are in the same shared object. Usually, hidden visibility
-    indicates that the symbol will not be placed into the dynamic symbol table,
-    so no other module (executable or shared library) can reference it
-    directly.
-  </dd>
+      object if they are in the same shared object. Usually, hidden visibility
+      indicates that the symbol will not be placed into the dynamic symbol
+      table, so no other module (executable or shared library) can reference it
+      directly.</dd>
 
   <dt><b>"<tt>protected</tt>" - Protected style</b>:</dt>
-
   <dd>On ELF, protected visibility indicates that the symbol will be placed in
-  the dynamic symbol table, but that references within the defining module will
-  bind to the local symbol. That is, the symbol cannot be overridden by another
-  module.
-  </dd>
+      the dynamic symbol table, but that references within the defining module
+      will bind to the local symbol. That is, the symbol cannot be overridden by
+      another module.</dd>
 </dl>
 
 </div>
@@ -735,9 +740,8 @@ All Global Variables and Functions have one of the following visibility styles:
 <div class="doc_text">
 
 <p>LLVM IR allows you to specify name aliases for certain types.  This can make
-it easier to read the IR and make the IR more condensed (particularly when
-recursive types are involved).  An example of a name specification is:
-</p>
+   it easier to read the IR and make the IR more condensed (particularly when
+   recursive types are involved).  An example of a name specification is:</p>
 
 <div class="doc_code">
 <pre>
@@ -745,19 +749,19 @@ recursive types are involved).  An example of a name specification is:
 </pre>
 </div>
 
-<p>You may give a name to any <a href="#typesystem">type</a> except "<a 
-href="t_void">void</a>".  Type name aliases may be used anywhere a type is
-expected with the syntax "%mytype".</p>
+<p>You may give a name to any <a href="#typesystem">type</a> except
+   "<a href="t_void">void</a>".  Type name aliases may be used anywhere a type
+   is expected with the syntax "%mytype".</p>
 
 <p>Note that type names are aliases for the structural type that they indicate,
-and that you can therefore specify multiple names for the same type.  This often
-leads to confusing behavior when dumping out a .ll file.  Since LLVM IR uses
-structural typing, the name is not part of the type.  When printing out LLVM IR,
-the printer will pick <em>one name</em> to render all types of a particular
-shape.  This means that if you have code where two different source types end up
-having the same LLVM type, that the dumper will sometimes print the "wrong" or
-unexpected type.  This is an important design point and isn't going to
-change.</p>
+   and that you can therefore specify multiple names for the same type.  This
+   often leads to confusing behavior when dumping out a .ll file.  Since LLVM IR
+   uses structural typing, the name is not part of the type.  When printing out
+   LLVM IR, the printer will pick <em>one name</em> to render all types of a
+   particular shape.  This means that if you have code where two different
+   source types end up having the same LLVM type, that the dumper will sometimes
+   print the "wrong" or unexpected type.  This is an important design point and
+   isn't going to change.</p>
 
 </div>
 
@@ -769,48 +773,47 @@ change.</p>
 <div class="doc_text">
 
 <p>Global variables define regions of memory allocated at compilation time
-instead of run-time.  Global variables may optionally be initialized, may have
-an explicit section to be placed in, and may have an optional explicit alignment
-specified.  A variable may be defined as "thread_local", which means that it
-will not be shared by threads (each thread will have a separated copy of the
-variable).  A variable may be defined as a global "constant," which indicates
-that the contents of the variable will <b>never</b> be modified (enabling better
-optimization, allowing the global data to be placed in the read-only section of
-an executable, etc).  Note that variables that need runtime initialization
-cannot be marked "constant" as there is a store to the variable.</p>
-
-<p>
-LLVM explicitly allows <em>declarations</em> of global variables to be marked
-constant, even if the final definition of the global is not.  This capability
-can be used to enable slightly better optimization of the program, but requires
-the language definition to guarantee that optimizations based on the
-'constantness' are valid for the translation units that do not include the
-definition.
-</p>
-
-<p>As SSA values, global variables define pointer values that are in
-scope (i.e. they dominate) all basic blocks in the program.  Global
-variables always define a pointer to their "content" type because they
-describe a region of memory, and all memory objects in LLVM are
-accessed through pointers.</p>
-
-<p>A global variable may be declared to reside in a target-specifc numbered 
-address space. For targets that support them, address spaces may affect how
-optimizations are performed and/or what target instructions are used to access 
-the variable. The default address space is zero. The address space qualifier 
-must precede any other attributes.</p>
+   instead of run-time.  Global variables may optionally be initialized, may
+   have an explicit section to be placed in, and may have an optional explicit
+   alignment specified.  A variable may be defined as "thread_local", which
+   means that it will not be shared by threads (each thread will have a
+   separated copy of the variable).  A variable may be defined as a global
+   "constant," which indicates that the contents of the variable
+   will <b>never</b> be modified (enabling better optimization, allowing the
+   global data to be placed in the read-only section of an executable, etc).
+   Note that variables that need runtime initialization cannot be marked
+   "constant" as there is a store to the variable.</p>
+
+<p>LLVM explicitly allows <em>declarations</em> of global variables to be marked
+   constant, even if the final definition of the global is not.  This capability
+   can be used to enable slightly better optimization of the program, but
+   requires the language definition to guarantee that optimizations based on the
+   'constantness' are valid for the translation units that do not include the
+   definition.</p>
+
+<p>As SSA values, global variables define pointer values that are in scope
+   (i.e. they dominate) all basic blocks in the program.  Global variables
+   always define a pointer to their "content" type because they describe a
+   region of memory, and all memory objects in LLVM are accessed through
+   pointers.</p>
+
+<p>A global variable may be declared to reside in a target-specific numbered
+   address space. For targets that support them, address spaces may affect how
+   optimizations are performed and/or what target instructions are used to
+   access the variable. The default address space is zero. The address space
+   qualifier must precede any other attributes.</p>
 
 <p>LLVM allows an explicit section to be specified for globals.  If the target
-supports it, it will emit globals to the section specified.</p>
+   supports it, it will emit globals to the section specified.</p>
 
 <p>An explicit alignment may be specified for a global.  If not present, or if
-the alignment is set to zero, the alignment of the global is set by the target
-to whatever it feels convenient.  If an explicit alignment is specified, the 
-global is forced to have at least that much alignment.  All alignments must be
-a power of 2.</p>
+   the alignment is set to zero, the alignment of the global is set by the
+   target to whatever it feels convenient.  If an explicit alignment is
+   specified, the global is forced to have at least that much alignment.  All
+   alignments must be a power of 2.</p>
 
-<p>For example, the following defines a global in a numbered address space with 
-an initializer, section, and alignment:</p>
+<p>For example, the following defines a global in a numbered address space with
+   an initializer, section, and alignment:</p>
 
 <div class="doc_code">
 <pre>
@@ -828,74 +831,72 @@ an initializer, section, and alignment:</p>
 
 <div class="doc_text">
 
-<p>LLVM function definitions consist of the "<tt>define</tt>" keyord, 
-an optional <a href="#linkage">linkage type</a>, an optional 
-<a href="#visibility">visibility style</a>, an optional 
-<a href="#callingconv">calling convention</a>, a return type, an optional
-<a href="#paramattrs">parameter attribute</a> for the return type, a function 
-name, a (possibly empty) argument list (each with optional 
-<a href="#paramattrs">parameter attributes</a>), optional 
-<a href="#fnattrs">function attributes</a>, an optional section, 
-an optional alignment, an optional <a href="#gc">garbage collector name</a>, 
-an opening curly brace, a list of basic blocks, and a closing curly brace.
+<p>LLVM function definitions consist of the "<tt>define</tt>" keyord, an
+   optional <a href="#linkage">linkage type</a>, an optional
+   <a href="#visibility">visibility style</a>, an optional
+   <a href="#callingconv">calling convention</a>, a return type, an optional
+   <a href="#paramattrs">parameter attribute</a> for the return type, a function
+   name, a (possibly empty) argument list (each with optional
+   <a href="#paramattrs">parameter attributes</a>), optional
+   <a href="#fnattrs">function attributes</a>, an optional section, an optional
+   alignment, an optional <a href="#gc">garbage collector name</a>, an opening
+   curly brace, a list of basic blocks, and a closing curly brace.</p>
 
-LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
-optional <a href="#linkage">linkage type</a>, an optional
-<a href="#visibility">visibility style</a>, an optional 
-<a href="#callingconv">calling convention</a>, a return type, an optional
-<a href="#paramattrs">parameter attribute</a> for the return type, a function 
-name, a possibly empty list of arguments, an optional alignment, and an optional
-<a href="#gc">garbage collector name</a>.</p>
+<p>LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
+   optional <a href="#linkage">linkage type</a>, an optional
+   <a href="#visibility">visibility style</a>, an optional 
+   <a href="#callingconv">calling convention</a>, a return type, an optional
+   <a href="#paramattrs">parameter attribute</a> for the return type, a function
+   name, a possibly empty list of arguments, an optional alignment, and an
+   optional <a href="#gc">garbage collector name</a>.</p>
 
 <p>A function definition contains a list of basic blocks, forming the CFG
-(Control Flow Graph) for
-the function.  Each basic block may optionally start with a label (giving the
-basic block a symbol table entry), contains a list of instructions, and ends
-with a <a href="#terminators">terminator</a> instruction (such as a branch or
-function return).</p>
+   (Control Flow Graph) for the function.  Each basic block may optionally start
+   with a label (giving the basic block a symbol table entry), contains a list
+   of instructions, and ends with a <a href="#terminators">terminator</a>
+   instruction (such as a branch or function return).</p>
 
 <p>The first basic block in a function is special in two ways: it is immediately
-executed on entrance to the function, and it is not allowed to have predecessor
-basic blocks (i.e. there can not be any branches to the entry block of a
-function).  Because the block can have no predecessors, it also cannot have any
-<a href="#i_phi">PHI nodes</a>.</p>
+   executed on entrance to the function, and it is not allowed to have
+   predecessor basic blocks (i.e. there can not be any branches to the entry
+   block of a function).  Because the block can have no predecessors, it also
+   cannot have any <a href="#i_phi">PHI nodes</a>.</p>
 
 <p>LLVM allows an explicit section to be specified for functions.  If the target
-supports it, it will emit functions to the section specified.</p>
+   supports it, it will emit functions to the section specified.</p>
 
 <p>An explicit alignment may be specified for a function.  If not present, or if
-the alignment is set to zero, the alignment of the function is set by the target
-to whatever it feels convenient.  If an explicit alignment is specified, the
-function is forced to have at least that much alignment.  All alignments must be
-a power of 2.</p>
-
-  <h5>Syntax:</h5>
+   the alignment is set to zero, the alignment of the function is set by the
+   target to whatever it feels convenient.  If an explicit alignment is
+   specified, the function is forced to have at least that much alignment.  All
+   alignments must be a power of 2.</p>
 
+<h5>Syntax:</h5>
 <div class="doc_code">
-<tt>
+<pre>
 define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
-      [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>]
-      &lt;ResultType&gt; @&lt;FunctionName&gt; ([argument list])
-      [<a href="#fnattrs">fn Attrs</a>] [section "name"] [align N]
-      [<a href="#gc">gc</a>] { ... }
-</tt>
+       [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>]
+       &lt;ResultType&gt; @&lt;FunctionName&gt; ([argument list])
+       [<a href="#fnattrs">fn Attrs</a>] [section "name"] [align N]
+       [<a href="#gc">gc</a>] { ... }
+</pre>
 </div>
 
 </div>
 
-
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="aliasstructure">Aliases</a>
 </div>
+
 <div class="doc_text">
-  <p>Aliases act as "second name" for the aliasee value (which can be either
-  function, global variable, another alias or bitcast of global value). Aliases
-  may have an optional <a href="#linkage">linkage type</a>, and an
-  optional <a href="#visibility">visibility style</a>.</p>
 
-  <h5>Syntax:</h5>
+<p>Aliases act as "second name" for the aliasee value (which can be either
+   function, global variable, another alias or bitcast of global value). Aliases
+   may have an optional <a href="#linkage">linkage type</a>, and an
+   optional <a href="#visibility">visibility style</a>.</p>
 
+<h5>Syntax:</h5>
 <div class="doc_code">
 <pre>
 @&lt;Name&gt; = alias [Linkage] [Visibility] &lt;AliaseeTy&gt; @&lt;Aliasee&gt;
@@ -904,21 +905,21 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
 
 </div>
 
-
-
 <!-- ======================================================================= -->
 <div class="doc_subsection"><a name="paramattrs">Parameter Attributes</a></div>
+
 <div class="doc_text">
-  <p>The return type and each parameter of a function type may have a set of
-  <i>parameter attributes</i> associated with them. Parameter attributes are
-  used to communicate additional information about the result or parameters of
-  a function. Parameter attributes are considered to be part of the function,
-  not of the function type, so functions with different parameter attributes
-  can have the same function type.</p>
 
-  <p>Parameter attributes are simple keywords that follow the type specified. If
-  multiple parameter attributes are needed, they are space separated. For 
-  example:</p>
+<p>The return type and each parameter of a function type may have a set of
+   <i>parameter attributes</i> associated with them. Parameter attributes are
+   used to communicate additional information about the result or parameters of
+   a function. Parameter attributes are considered to be part of the function,
+   not of the function type, so functions with different parameter attributes
+   can have the same function type.</p>
+
+<p>Parameter attributes are simple keywords that follow the type specified. If
+   multiple parameter attributes are needed, they are space separated. For
+   example:</p>
 
 <div class="doc_code">
 <pre>
@@ -928,71 +929,72 @@ declare signext i8 @returns_signed_char()
 </pre>
 </div>
 
-  <p>Note that any attributes for the function result (<tt>nounwind</tt>,
-  <tt>readonly</tt>) come immediately after the argument list.</p>
-
-  <p>Currently, only the following parameter attributes are defined:</p>
-  <dl>
-    <dt><tt>zeroext</tt></dt>
-    <dd>This indicates to the code generator that the parameter or return value
-    should be zero-extended to a 32-bit value by the caller (for a parameter)
-    or the callee (for a return value).</dd>
-
-    <dt><tt>signext</tt></dt>
-    <dd>This indicates to the code generator that the parameter or return value
-    should be sign-extended to a 32-bit value by the caller (for a parameter)
-    or the callee (for a return value).</dd>
-
-    <dt><tt>inreg</tt></dt>
-    <dd>This indicates that this parameter or return value should be treated
-    in a special target-dependent fashion during while emitting code for a
-    function call or return (usually, by putting it in a register as opposed 
-    to memory, though some targets use it to distinguish between two different
-    kinds of registers).  Use of this attribute is target-specific.</dd>
-
-    <dt><tt><a name="byval">byval</a></tt></dt>
-    <dd>This indicates that the pointer parameter should really be passed by
-    value to the function.  The attribute implies that a hidden copy of the
-    pointee is made between the caller and the callee, so the callee is unable
-    to modify the value in the callee.  This attribute is only valid on LLVM
-    pointer arguments.  It is generally used to pass structs and arrays by
-    value, but is also valid on pointers to scalars.  The copy is considered to
-    belong to the caller not the callee (for example,
-    <tt><a href="#readonly">readonly</a></tt> functions should not write to
-    <tt>byval</tt> parameters). This is not a valid attribute for return
-    values.  The byval attribute also supports specifying an alignment with the
-    align attribute.  This has a target-specific effect on the code generator
-    that usually indicates a desired alignment for the synthesized stack 
-    slot.</dd>
-
-    <dt><tt>sret</tt></dt>
-    <dd>This indicates that the pointer parameter specifies the address of a
-    structure that is the return value of the function in the source program.
-    This pointer must be guaranteed by the caller to be valid: loads and stores
-    to the structure may be assumed by the callee to not to trap.  This may only
-    be applied to the first parameter. This is not a valid attribute for
-    return values. </dd>
-
-    <dt><tt>noalias</tt></dt>
-    <dd>This indicates that the pointer does not alias any global or any other
-    parameter.  The caller is responsible for ensuring that this is the
-    case. On a function return value, <tt>noalias</tt> additionally indicates
-    that the pointer does not alias any other pointers visible to the
-    caller. For further details, please see the discussion of the NoAlias
-    response in
-    <a href="http://llvm.org/docs/AliasAnalysis.html#MustMayNo">alias
-    analysis</a>.</dd>
-
-    <dt><tt>nocapture</tt></dt>
-    <dd>This indicates that the callee does not make any copies of the pointer
-    that outlive the callee itself. This is not a valid attribute for return
-    values.</dd>
-
-    <dt><tt>nest</tt></dt>
-    <dd>This indicates that the pointer parameter can be excised using the
-    <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
-    attribute for return values.</dd>
-  </dl>
+<p>Note that any attributes for the function result (<tt>nounwind</tt>,
+   <tt>readonly</tt>) come immediately after the argument list.</p>
+
+<p>Currently, only the following parameter attributes are defined:</p>
+
+<dl>
+  <dt><tt>zeroext</tt></dt>
+  <dd>This indicates to the code generator that the parameter or return value
+      should be zero-extended to a 32-bit value by the caller (for a parameter)
+      or the callee (for a return value).</dd>
+
+  <dt><tt>signext</tt></dt>
+  <dd>This indicates to the code generator that the parameter or return value
+      should be sign-extended to a 32-bit value by the caller (for a parameter)
+      or the callee (for a return value).</dd>
+
+  <dt><tt>inreg</tt></dt>
+  <dd>This indicates that this parameter or return value should be treated in a
+      special target-dependent fashion during while emitting code for a function
+      call or return (usually, by putting it in a register as opposed to memory,
+      though some targets use it to distinguish between two different kinds of
+      registers).  Use of this attribute is target-specific.</dd>
+
+  <dt><tt><a name="byval">byval</a></tt></dt>
+  <dd>This indicates that the pointer parameter should really be passed by value
+      to the function.  The attribute implies that a hidden copy of the pointee
+      is made between the caller and the callee, so the callee is unable to
+      modify the value in the callee.  This attribute is only valid on LLVM
+      pointer arguments.  It is generally used to pass structs and arrays by
+      value, but is also valid on pointers to scalars.  The copy is considered
+      to belong to the caller not the callee (for example,
+      <tt><a href="#readonly">readonly</a></tt> functions should not write to
+      <tt>byval</tt> parameters). This is not a valid attribute for return
+      values.  The byval attribute also supports specifying an alignment with
+      the align attribute.  This has a target-specific effect on the code
+      generator that usually indicates a desired alignment for the synthesized
+      stack slot.</dd>
+
+  <dt><tt>sret</tt></dt>
+  <dd>This indicates that the pointer parameter specifies the address of a
+      structure that is the return value of the function in the source program.
+      This pointer must be guaranteed by the caller to be valid: loads and
+      stores to the structure may be assumed by the callee to not to trap.  This
+      may only be applied to the first parameter. This is not a valid attribute
+      for return values. </dd>
+
+  <dt><tt>noalias</tt></dt>
+  <dd>This indicates that the pointer does not alias any global or any other
+      parameter.  The caller is responsible for ensuring that this is the
+      case. On a function return value, <tt>noalias</tt> additionally indicates
+      that the pointer does not alias any other pointers visible to the
+      caller. For further details, please see the discussion of the NoAlias
+      response in
+      <a href="http://llvm.org/docs/AliasAnalysis.html#MustMayNo">alias
+      analysis</a>.</dd>
+
+  <dt><tt>nocapture</tt></dt>
+  <dd>This indicates that the callee does not make any copies of the pointer
+      that outlive the callee itself. This is not a valid attribute for return
+      values.</dd>
+
+  <dt><tt>nest</tt></dt>
+  <dd>This indicates that the pointer parameter can be excised using the
+      <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
+      attribute for return values.</dd>
+</dl>
 
 </div>
 
@@ -1002,15 +1004,20 @@ declare signext i8 @returns_signed_char()
 </div>
 
 <div class="doc_text">
+
 <p>Each function may specify a garbage collector name, which is simply a
-string.</p>
+   string:</p>
 
-<div class="doc_code"><pre
->define void @f() gc "name" { ...</pre></div>
+<div class="doc_code">
+<pre>
+define void @f() gc "name" { ...
+</pre>
+</div>
 
 <p>The compiler declares the supported values of <i>name</i>. Specifying a
-collector which will cause the compiler to alter its output in order to support
-the named garbage collection algorithm.</p>
+   collector which will cause the compiler to alter its output in order to
+   support the named garbage collection algorithm.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -1020,14 +1027,13 @@ the named garbage collection algorithm.</p>
 
 <div class="doc_text">
 
-<p>Function attributes are set to communicate additional information about 
-  a function. Function attributes are considered to be part of the function,
-  not of the function type, so functions with different parameter attributes
-  can have the same function type.</p>
+<p>Function attributes are set to communicate additional information about a
+   function. Function attributes are considered to be part of the function, not
+   of the function type, so functions with different parameter attributes can
+   have the same function type.</p>
 
-  <p>Function attributes are simple keywords that follow the type specified. If
-  multiple attributes are needed, they are space separated. For 
-  example:</p>
+<p>Function attributes are simple keywords that follow the type specified. If
+   multiple attributes are needed, they are space separated. For example:</p>
 
 <div class="doc_code">
 <pre>
@@ -1039,80 +1045,89 @@ define void @f() optsize
 </div>
 
 <dl>
-<dt><tt>alwaysinline</tt></dt>
-<dd>This attribute indicates that the inliner should attempt to inline this
-function into callers whenever possible, ignoring any active inlining size
-threshold for this caller.</dd>
-
-<dt><tt>noinline</tt></dt>
-<dd>This attribute indicates that the inliner should never inline this function
-in any situation. This attribute may not be used together with the
-<tt>alwaysinline</tt> attribute.</dd>
-
-<dt><tt>optsize</tt></dt>
-<dd>This attribute suggests that optimization passes and code generator passes
-make choices that keep the code size of this function low, and otherwise do
-optimizations specifically to reduce code size.</dd>
-
-<dt><tt>noreturn</tt></dt>
-<dd>This function attribute indicates that the function never returns normally.
-This produces undefined behavior at runtime if the function ever does
-dynamically return.</dd> 
-
-<dt><tt>nounwind</tt></dt>
-<dd>This function attribute indicates that the function never returns with an
-unwind or exceptional control flow.  If the function does unwind, its runtime
-behavior is undefined.</dd>
-
-<dt><tt>readnone</tt></dt>
-<dd>This attribute indicates that the function computes its result (or decides to
-unwind an exception) based strictly on its arguments, without dereferencing any
-pointer arguments or otherwise accessing any mutable state (e.g. memory, control
-registers, etc) visible to caller functions.  It does not write through any
-pointer arguments (including <tt><a href="#byval">byval</a></tt> arguments) and
-never changes any state visible to callers.  This means that it cannot unwind
-exceptions by calling the <tt>C++</tt> exception throwing methods, but could
-use the <tt>unwind</tt> instruction.</dd>
-
-<dt><tt><a name="readonly">readonly</a></tt></dt>
-<dd>This attribute indicates that the function does not write through any
-pointer arguments (including <tt><a href="#byval">byval</a></tt> arguments)
-or otherwise modify any state (e.g. memory, control registers, etc) visible to
-caller functions.  It may dereference pointer arguments and read state that may
-be set in the caller.  A readonly function always returns the same value (or
-unwinds an exception identically) when called with the same set of arguments
-and global state.  It cannot unwind an exception by calling the <tt>C++</tt>
-exception throwing methods, but may use the <tt>unwind</tt> instruction.</dd>
-
-<dt><tt><a name="ssp">ssp</a></tt></dt>
-<dd>This attribute indicates that the function should emit a stack smashing
-protector. It is in the form of a "canary"&mdash;a random value placed on the
-stack before the local variables that's checked upon return from the function to
-see if it has been overwritten. A heuristic is used to determine if a function
-needs stack protectors or not.
-
-<br><br>If a function that has an <tt>ssp</tt> attribute is inlined into a function
-that doesn't have an <tt>ssp</tt> attribute, then the resulting function will
-have an <tt>ssp</tt> attribute.</dd>
-
-<dt><tt>sspreq</tt></dt>
-<dd>This attribute indicates that the function should <em>always</em> emit a
-stack smashing protector. This overrides the <tt><a href="#ssp">ssp</a></tt>
-function attribute.
-
-If a function that has an <tt>sspreq</tt> attribute is inlined into a
-function that doesn't have an <tt>sspreq</tt> attribute or which has
-an <tt>ssp</tt> attribute, then the resulting function will have
-an <tt>sspreq</tt> attribute.</dd>
-
-<dt><tt>noredzone</tt></dt>
-<dd>This attribute indicates that the code generator should not use a
-red zone, even if the target-specific ABI normally permits it.
-</dd>
-
-<dt><tt>noimplicitfloat</tt></dt>
-<dd>This attributes disables implicit floating point instructions.</dd>
-
+  <dt><tt>alwaysinline</tt></dt>
+  <dd>This attribute indicates that the inliner should attempt to inline this
+      function into callers whenever possible, ignoring any active inlining size
+      threshold for this caller.</dd>
+
+  <dt><tt>inlinehint</tt></dt>
+  <dd>This attribute indicates that the source code contained a hint that inlining
+      this function is desirable (such as the "inline" keyword in C/C++).  It
+      is just a hint; it imposes no requirements on the inliner.</dd>
+
+  <dt><tt>noinline</tt></dt>
+  <dd>This attribute indicates that the inliner should never inline this
+      function in any situation. This attribute may not be used together with
+      the <tt>alwaysinline</tt> attribute.</dd>
+
+  <dt><tt>optsize</tt></dt>
+  <dd>This attribute suggests that optimization passes and code generator passes
+      make choices that keep the code size of this function low, and otherwise
+      do optimizations specifically to reduce code size.</dd>
+
+  <dt><tt>noreturn</tt></dt>
+  <dd>This function attribute indicates that the function never returns
+      normally.  This produces undefined behavior at runtime if the function
+      ever does dynamically return.</dd>
+
+  <dt><tt>nounwind</tt></dt>
+  <dd>This function attribute indicates that the function never returns with an
+      unwind or exceptional control flow.  If the function does unwind, its
+      runtime behavior is undefined.</dd>
+
+  <dt><tt>readnone</tt></dt>
+  <dd>This attribute indicates that the function computes its result (or decides
+      to unwind an exception) based strictly on its arguments, without
+      dereferencing any pointer arguments or otherwise accessing any mutable
+      state (e.g. memory, control registers, etc) visible to caller functions.
+      It does not write through any pointer arguments
+      (including <tt><a href="#byval">byval</a></tt> arguments) and never
+      changes any state visible to callers.  This means that it cannot unwind
+      exceptions by calling the <tt>C++</tt> exception throwing methods, but
+      could use the <tt>unwind</tt> instruction.</dd>
+
+  <dt><tt><a name="readonly">readonly</a></tt></dt>
+  <dd>This attribute indicates that the function does not write through any
+      pointer arguments (including <tt><a href="#byval">byval</a></tt>
+      arguments) or otherwise modify any state (e.g. memory, control registers,
+      etc) visible to caller functions.  It may dereference pointer arguments
+      and read state that may be set in the caller.  A readonly function always
+      returns the same value (or unwinds an exception identically) when called
+      with the same set of arguments and global state.  It cannot unwind an
+      exception by calling the <tt>C++</tt> exception throwing methods, but may
+      use the <tt>unwind</tt> instruction.</dd>
+
+  <dt><tt><a name="ssp">ssp</a></tt></dt>
+  <dd>This attribute indicates that the function should emit a stack smashing
+      protector. It is in the form of a "canary"&mdash;a random value placed on
+      the stack before the local variables that's checked upon return from the
+      function to see if it has been overwritten. A heuristic is used to
+      determine if a function needs stack protectors or not.<br>
+<br>
+      If a function that has an <tt>ssp</tt> attribute is inlined into a
+      function that doesn't have an <tt>ssp</tt> attribute, then the resulting
+      function will have an <tt>ssp</tt> attribute.</dd>
+
+  <dt><tt>sspreq</tt></dt>
+  <dd>This attribute indicates that the function should <em>always</em> emit a
+      stack smashing protector. This overrides
+      the <tt><a href="#ssp">ssp</a></tt> function attribute.<br>
+<br>
+      If a function that has an <tt>sspreq</tt> attribute is inlined into a
+      function that doesn't have an <tt>sspreq</tt> attribute or which has
+      an <tt>ssp</tt> attribute, then the resulting function will have
+      an <tt>sspreq</tt> attribute.</dd>
+
+  <dt><tt>noredzone</tt></dt>
+  <dd>This attribute indicates that the code generator should not use a red
+      zone, even if the target-specific ABI normally permits it.</dd>
+
+  <dt><tt>noimplicitfloat</tt></dt>
+  <dd>This attributes disables implicit floating point instructions.</dd>
+
+  <dt><tt>naked</tt></dt>
+  <dd>This attribute disables prologue / epilogue emission for the function.
+      This can have very system-specific consequences.</dd>
 </dl>
 
 </div>
@@ -1123,12 +1138,11 @@ red zone, even if the target-specific ABI normally permits it.
 </div>
 
 <div class="doc_text">
-<p>
-Modules may contain "module-level inline asm" blocks, which corresponds to the
-GCC "file scope inline asm" blocks.  These blocks are internally concatenated by
-LLVM and treated as a single unit, but may be separated in the .ll file if
-desired.  The syntax is very simple:
-</p>
+
+<p>Modules may contain "module-level inline asm" blocks, which corresponds to
+   the GCC "file scope inline asm" blocks.  These blocks are internally
+   concatenated by LLVM and treated as a single unit, but may be separated in
+   the <tt>.ll</tt> file if desired.  The syntax is very simple:</p>
 
 <div class="doc_code">
 <pre>
@@ -1139,13 +1153,11 @@ module asm "more can go here"
 
 <p>The strings can contain any character by escaping non-printable characters.
    The escape sequence used is simply "\xx" where "xx" is the two digit hex code
-   for the number.
-</p>
+   for the number.</p>
+
+<p>The inline asm code is simply printed to the machine code .s file when
+   assembly code is generated.</p>
 
-<p>
-  The inline asm code is simply printed to the machine code .s file when
-  assembly code is generated.
-</p>
 </div>
 
 <!-- ======================================================================= -->
@@ -1154,46 +1166,65 @@ module asm "more can go here"
 </div>
 
 <div class="doc_text">
+
 <p>A module may specify a target specific data layout string that specifies how
-data is to be laid out in memory. The syntax for the data layout is simply:</p>
-<pre>    target datalayout = "<i>layout specification</i>"</pre>
-<p>The <i>layout specification</i> consists of a list of specifications 
-separated by the minus sign character ('-').  Each specification starts with a 
-letter and may include other information after the letter to define some 
-aspect of the data layout.  The specifications accepted are as follows: </p>
+   data is to be laid out in memory. The syntax for the data layout is
+   simply:</p>
+
+<div class="doc_code">
+<pre>
+target datalayout = "<i>layout specification</i>"
+</pre>
+</div>
+
+<p>The <i>layout specification</i> consists of a list of specifications
+   separated by the minus sign character ('-').  Each specification starts with
+   a letter and may include other information after the letter to define some
+   aspect of the data layout.  The specifications accepted are as follows:</p>
+
 <dl>
   <dt><tt>E</tt></dt>
   <dd>Specifies that the target lays out data in big-endian form. That is, the
-  bits with the most significance have the lowest address location.</dd>
+      bits with the most significance have the lowest address location.</dd>
+
   <dt><tt>e</tt></dt>
   <dd>Specifies that the target lays out data in little-endian form. That is,
-  the bits with the least significance have the lowest address location.</dd>
+      the bits with the least significance have the lowest address
+      location.</dd>
+
   <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and 
-  <i>preferred</i> alignments. All sizes are in bits. Specifying the <i>pref</i>
-  alignment is optional. If omitted, the preceding <tt>:</tt> should be omitted
-  too.</dd>
+      <i>preferred</i> alignments. All sizes are in bits. Specifying
+      the <i>pref</i> alignment is optional. If omitted, the
+      preceding <tt>:</tt> should be omitted too.</dd>
+
   <dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for an integer type of a given bit
-  <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
+      <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
+
   <dt><tt>v<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for a vector type of a given bit 
-  <i>size</i>.</dd>
+      <i>size</i>.</dd>
+
   <dt><tt>f<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for a floating point type of a given bit 
-  <i>size</i>. The value of <i>size</i> must be either 32 (float) or 64
-  (double).</dd>
+      <i>size</i>. The value of <i>size</i> must be either 32 (float) or 64
+      (double).</dd>
+
   <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for an aggregate type of a given bit
-  <i>size</i>.</dd>
+      <i>size</i>.</dd>
+
   <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for a stack object of a given bit
-  <i>size</i>.</dd>
+      <i>size</i>.</dd>
 </dl>
+
 <p>When constructing the data layout for a given target, LLVM starts with a
-default set of specifications which are then (possibly) overriden by the
-specifications in the <tt>datalayout</tt> keyword. The default specifications
-are given in this list:</p>
+   default set of specifications which are then (possibly) overriden by the
+   specifications in the <tt>datalayout</tt> keyword. The default specifications
+   are given in this list:</p>
+
 <ul>
   <li><tt>E</tt> - big endian</li>
   <li><tt>p:32:64:64</tt> - 32-bit pointers with 64-bit alignment</li>
@@ -1210,22 +1241,80 @@ are given in this list:</p>
   <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
   <li><tt>s0:64:64</tt> - stack objects are 64-bit aligned</li>
 </ul>
-<p>When LLVM is determining the alignment for a given type, it uses the 
-following rules:</p>
+
+<p>When LLVM is determining the alignment for a given type, it uses the
+   following rules:</p>
+
 <ol>
   <li>If the type sought is an exact match for one of the specifications, that
-  specification is used.</li>
+      specification is used.</li>
+
   <li>If no match is found, and the type sought is an integer type, then the
-  smallest integer type that is larger than the bitwidth of the sought type is
-  used. If none of the specifications are larger than the bitwidth then the the
-  largest integer type is used. For example, given the default specifications
-  above, the i7 type will use the alignment of i8 (next largest) while both
-  i65 and i256 will use the alignment of i64 (largest specified).</li>
+      smallest integer type that is larger than the bitwidth of the sought type
+      is used. If none of the specifications are larger than the bitwidth then
+      the the largest integer type is used. For example, given the default
+      specifications above, the i7 type will use the alignment of i8 (next
+      largest) while both i65 and i256 will use the alignment of i64 (largest
+      specified).</li>
+
   <li>If no match is found, and the type sought is a vector type, then the
-  largest vector type that is smaller than the sought vector type will be used
-  as a fall back.  This happens because &lt;128 x double&gt; can be implemented
-  in terms of 64 &lt;2 x double&gt;, for example.</li>
+      largest vector type that is smaller than the sought vector type will be
+      used as a fall back.  This happens because &lt;128 x double&gt; can be
+      implemented in terms of 64 &lt;2 x double&gt;, for example.</li>
 </ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="pointeraliasing">Pointer Aliasing Rules</a>
+</div>
+
+<div class="doc_text">
+
+<p>Any memory access must be done through a pointer value associated
+with an address range of the memory access, otherwise the behavior
+is undefined. Pointer values are associated with address ranges
+according to the following rules:</p>
+
+<ul>
+  <li>A pointer value formed from a
+      <tt><a href="#i_getelementptr">getelementptr</a></tt> instruction
+      is associated with the addresses associated with the first operand
+      of the <tt>getelementptr</tt>.</li>
+  <li>An address of a global variable is associated with the address
+      range of the variable's storage.</li>
+  <li>The result value of an allocation instruction is associated with
+      the address range of the allocated storage.</li>
+  <li>A null pointer in the default address-space is associated with
+      no address.</li>
+  <li>A pointer value formed by an
+      <tt><a href="#i_inttoptr">inttoptr</a></tt> is associated with all
+      address ranges of all pointer values that contribute (directly or
+      indirectly) to the computation of the pointer's value.</li>
+  <li>The result value of a
+      <tt><a href="#i_bitcast">bitcast</a></tt> is associated with all
+      addresses associated with the operand of the <tt>bitcast</tt>.</li>
+  <li>An integer constant other than zero or a pointer value returned
+      from a function not defined within LLVM may be associated with address
+      ranges allocated through mechanisms other than those provided by
+      LLVM. Such ranges shall not overlap with any ranges of addresses
+      allocated by mechanisms provided by LLVM.</li>
+  </ul>
+
+<p>LLVM IR does not associate types with memory. The result type of a
+<tt><a href="#i_load">load</a></tt> merely indicates the size and
+alignment of the memory from which to load, as well as the
+interpretation of the value. The first operand of a
+<tt><a href="#i_store">store</a></tt> similarly only indicates the size
+and alignment of the store.</p>
+
+<p>Consequently, type-based alias analysis, aka TBAA, aka
+<tt>-fstrict-aliasing</tt>, is not applicable to general unadorned
+LLVM IR. <a href="#metadata">Metadata</a> may be used to encode
+additional information which specialized optimization passes may use
+to implement type-based alias analysis.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -1235,22 +1324,22 @@ following rules:</p>
 <div class="doc_text">
 
 <p>The LLVM type system is one of the most important features of the
-intermediate representation.  Being typed enables a number of
-optimizations to be performed on the intermediate representation directly,
-without having to do
-extra analyses on the side before the transformation.  A strong type
-system makes it easier to read the generated code and enables novel
-analyses and transformations that are not feasible to perform on normal
-three address code representations.</p>
+   intermediate representation.  Being typed enables a number of optimizations
+   to be performed on the intermediate representation directly, without having
+   to do extra analyses on the side before the transformation.  A strong type
+   system makes it easier to read the generated code and enables novel analyses
+   and transformations that are not feasible to perform on normal three address
+   code representations.</p>
 
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> <a name="t_classifications">Type
 Classifications</a> </div>
+
 <div class="doc_text">
-<p>The types fall into a few useful
-classifications:</p>
+
+<p>The types fall into a few useful classifications:</p>
 
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
@@ -1297,18 +1386,60 @@ classifications:</p>
   </tbody>
 </table>
 
-<p>The <a href="#t_firstclass">first class</a> types are perhaps the
-most important.  Values of these types are the only ones which can be
-produced by instructions, passed as arguments, or used as operands to
-instructions.</p>
+<p>The <a href="#t_firstclass">first class</a> types are perhaps the most
+   important.  Values of these types are the only ones which can be produced by
+   instructions.</p>
+
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> <a name="t_primitive">Primitive Types</a> </div>
 
 <div class="doc_text">
+
 <p>The primitive types are the fundamental building blocks of the LLVM
-system.</p>
+   system.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_integer">Integer Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The integer type is a very simple type that simply specifies an arbitrary
+   bit width for the integer type desired. Any bit width from 1 bit to
+   2<sup>23</sup>-1 (about 8 million) can be specified.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  iN
+</pre>
+
+<p>The number of bits the integer will occupy is specified by the <tt>N</tt>
+   value.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>i1</tt></td>
+    <td class="left">a single-bit integer.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>i32</tt></td>
+    <td class="left">a 32-bit integer.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>i1942652</tt></td>
+    <td class="left">a really big integer of over 1 million bits.</td>
+  </tr>
+</table>
+
+<p>Note that the code generator does not yet support large integer types to be
+   used as function return types. The specific limit on how large a return type
+   the code generator can currently handle is target-dependent; currently it's
+   often 64 bits for 32-bit targets and 128 bits for 64-bit targets.</p>
 
 </div>
 
@@ -1316,60 +1447,65 @@ system.</p>
 <div class="doc_subsubsection"> <a name="t_floating">Floating Point Types</a> </div>
 
 <div class="doc_text">
-      <table>
-        <tbody>
-          <tr><th>Type</th><th>Description</th></tr>
-          <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
-          <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
-          <tr><td><tt>fp128</tt></td><td>128-bit floating point value (112-bit mantissa)</td></tr>
-          <tr><td><tt>x86_fp80</tt></td><td>80-bit floating point value (X87)</td></tr>
-          <tr><td><tt>ppc_fp128</tt></td><td>128-bit floating point value (two 64-bits)</td></tr>
-        </tbody>
-      </table>
+
+<table>
+  <tbody>
+    <tr><th>Type</th><th>Description</th></tr>
+    <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
+    <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
+    <tr><td><tt>fp128</tt></td><td>128-bit floating point value (112-bit mantissa)</td></tr>
+    <tr><td><tt>x86_fp80</tt></td><td>80-bit floating point value (X87)</td></tr>
+    <tr><td><tt>ppc_fp128</tt></td><td>128-bit floating point value (two 64-bits)</td></tr>
+  </tbody>
+</table>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_void">Void Type</a> </div>
 
 <div class="doc_text">
+
 <h5>Overview:</h5>
 <p>The void type does not represent any value and has no size.</p>
 
 <h5>Syntax:</h5>
-
 <pre>
   void
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_label">Label Type</a> </div>
 
 <div class="doc_text">
+
 <h5>Overview:</h5>
 <p>The label type represents code labels.</p>
 
 <h5>Syntax:</h5>
-
 <pre>
   label
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_metadata">Metadata Type</a> </div>
 
 <div class="doc_text">
+
 <h5>Overview:</h5>
-<p>The metadata type represents embedded metadata. The only derived type that
-may contain metadata is <tt>metadata*</tt> or a function type that returns or
-takes metadata typed parameters, but not pointer to metadata types.</p>
+<p>The metadata type represents embedded metadata. No derived types may be
+   created from metadata except for <a href="#t_function">function</a>
+   arguments.
 
 <h5>Syntax:</h5>
-
 <pre>
   metadata
 </pre>
+
 </div>
 
 
@@ -1378,53 +1514,12 @@ takes metadata typed parameters, but not pointer to metadata types.</p>
 
 <div class="doc_text">
 
-<p>The real power in LLVM comes from the derived types in the system. 
-This is what allows a programmer to represent arrays, functions,
-pointers, and other useful types.  Note that these derived types may be
-recursive: For example, it is possible to have a two dimensional array.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_integer">Integer Type</a> </div>
-
-<div class="doc_text">
-
-<h5>Overview:</h5>
-<p>The integer type is a very simple derived type that simply specifies an
-arbitrary bit width for the integer type desired. Any bit width from 1 bit to
-2^23-1 (about 8 million) can be specified.</p>
-
-<h5>Syntax:</h5>
-
-<pre>
-  iN
-</pre>
-
-<p>The number of bits the integer will occupy is specified by the <tt>N</tt>
-value.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>i1</tt></td>
-    <td class="left">a single-bit integer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32</tt></td>
-    <td class="left">a 32-bit integer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i1942652</tt></td>
-    <td class="left">a really big integer of over 1 million bits.</td>
-  </tr>
-</table>
-
-<p>Note that the code generator does not yet support large integer types
-to be used as function return types. The specific limit on how large a
-return type the code generator can currently handle is target-dependent;
-currently it's often 64 bits for 32-bit targets and 128 bits for 64-bit
-targets.</p>
+<p>The real power in LLVM comes from the derived types in the system.  This is
+   what allows a programmer to represent arrays, functions, pointers, and other
+   useful types.  Each of these types contain one or more element types which
+   may be a primitive type, or another derived type.  For example, it is
+   possible to have a two dimensional array, using an array as the element type
+   of another array.</p>
 
 </div>
 
@@ -1434,19 +1529,17 @@ targets.</p>
 <div class="doc_text">
 
 <h5>Overview:</h5>
-
 <p>The array type is a very simple derived type that arranges elements
-sequentially in memory.  The array type requires a size (number of
-elements) and an underlying data type.</p>
+   sequentially in memory.  The array type requires a size (number of elements)
+   and an underlying data type.</p>
 
 <h5>Syntax:</h5>
-
 <pre>
   [&lt;# elements&gt; x &lt;elementtype&gt;]
 </pre>
 
-<p>The number of elements is a constant integer value; elementtype may
-be any type with a size.</p>
+<p>The number of elements is a constant integer value; <tt>elementtype</tt> may
+   be any type with a size.</p>
 
 <h5>Examples:</h5>
 <table class="layout">
@@ -1479,45 +1572,44 @@ be any type with a size.</p>
   </tr>
 </table>
 
-<p>Note that 'variable sized arrays' can be implemented in LLVM with a zero 
-length array.  Normally, accesses past the end of an array are undefined in
-LLVM (e.g. it is illegal to access the 5th element of a 3 element array).
-As a special case, however, zero length arrays are recognized to be variable
-length.  This allows implementation of 'pascal style arrays' with the  LLVM
-type "{ i32, [0 x float]}", for example.</p>
+<p>Note that 'variable sized arrays' can be implemented in LLVM with a zero
+   length array.  Normally, accesses past the end of an array are undefined in
+   LLVM (e.g. it is illegal to access the 5th element of a 3 element array).  As
+   a special case, however, zero length arrays are recognized to be variable
+   length.  This allows implementation of 'pascal style arrays' with the LLVM
+   type "<tt>{ i32, [0 x float]}</tt>", for example.</p>
 
-<p>Note that the code generator does not yet support large aggregate types
-to be used as function return types. The specific limit on how large an
-aggregate return type the code generator can currently handle is
-target-dependent, and also dependent on the aggregate element types.</p>
+<p>Note that the code generator does not yet support large aggregate types to be
+   used as function return types. The specific limit on how large an aggregate
+   return type the code generator can currently handle is target-dependent, and
+   also dependent on the aggregate element types.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_function">Function Type</a> </div>
+
 <div class="doc_text">
 
 <h5>Overview:</h5>
-
-<p>The function type can be thought of as a function signature.  It
-consists of a return type and a list of formal parameter types. The
-return type of a function type is a scalar type, a void type, or a struct type. 
-If the return type is a struct type then all struct elements must be of first 
-class types, and the struct must have at least one element.</p>
+<p>The function type can be thought of as a function signature.  It consists of
+   a return type and a list of formal parameter types. The return type of a
+   function type is a scalar type, a void type, or a struct type.  If the return
+   type is a struct type then all struct elements must be of first class types,
+   and the struct must have at least one element.</p>
 
 <h5>Syntax:</h5>
-
 <pre>
-  &lt;returntype list&gt; (&lt;parameter list&gt;)
+  &lt;returntype&gt; (&lt;parameter list&gt;)
 </pre>
 
 <p>...where '<tt>&lt;parameter list&gt;</tt>' is a comma-separated list of type
-specifiers.  Optionally, the parameter list may include a type <tt>...</tt>,
-which indicates that the function takes a variable number of arguments.
-Variable argument functions can access their arguments with the <a
- href="#int_varargs">variable argument handling intrinsic</a> functions.
-'<tt>&lt;returntype list&gt;</tt>' is a comma-separated list of
-<a href="#t_firstclass">first class</a> type specifiers.</p>
+   specifiers.  Optionally, the parameter list may include a type <tt>...</tt>,
+   which indicates that the function takes a variable number of arguments.
+   Variable argument functions can access their arguments with
+   the <a href="#int_varargs">variable argument handling intrinsic</a>
+   functions.  '<tt>&lt;returntype&gt;</tt>' is a any type except
+   <a href="#t_label">label</a>.</p>
 
 <h5>Examples:</h5>
 <table class="layout">
@@ -1542,27 +1634,34 @@ Variable argument functions can access their arguments with the <a
     </td>
   </tr><tr class="layout">
     <td class="left"><tt>{i32, i32} (i32)</tt></td>
-    <td class="left">A function taking an <tt>i32</tt>, returning two 
-        <tt>i32</tt> values as an aggregate of type <tt>{ i32, i32 }</tt>
+    <td class="left">A function taking an <tt>i32</tt>, returning a
+        <a href="#t_struct">structure</a> containing two <tt>i32</tt> values
     </td>
   </tr>
 </table>
 
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_struct">Structure Type</a> </div>
+
 <div class="doc_text">
+
 <h5>Overview:</h5>
-<p>The structure type is used to represent a collection of data members
-together in memory.  The packing of the field types is defined to match
-the ABI of the underlying processor.  The elements of a structure may
-be any type that has a size.</p>
-<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
-and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
-field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
-instruction.</p>
+<p>The structure type is used to represent a collection of data members together
+   in memory.  The packing of the field types is defined to match the ABI of the
+   underlying processor.  The elements of a structure may be any type that has a
+   size.</p>
+
+<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt> and
+   '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
+   the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+
 <h5>Syntax:</h5>
-<pre>  { &lt;type list&gt; }<br></pre>
+<pre>
+  { &lt;type list&gt; }
+</pre>
+
 <h5>Examples:</h5>
 <table class="layout">
   <tr class="layout">
@@ -1577,28 +1676,34 @@ instruction.</p>
   </tr>
 </table>
 
-<p>Note that the code generator does not yet support large aggregate types
-to be used as function return types. The specific limit on how large an
-aggregate return type the code generator can currently handle is
-target-dependent, and also dependent on the aggregate element types.</p>
+<p>Note that the code generator does not yet support large aggregate types to be
+   used as function return types. The specific limit on how large an aggregate
+   return type the code generator can currently handle is target-dependent, and
+   also dependent on the aggregate element types.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_pstruct">Packed Structure Type</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Overview:</h5>
 <p>The packed structure type is used to represent a collection of data members
-together in memory.  There is no padding between fields.  Further, the alignment
-of a packed structure is 1 byte.  The elements of a packed structure may
-be any type that has a size.</p>
-<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
-and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
-field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
-instruction.</p>
+   together in memory.  There is no padding between fields.  Further, the
+   alignment of a packed structure is 1 byte.  The elements of a packed
+   structure may be any type that has a size.</p>
+
+<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt> and
+   '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
+   the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+
 <h5>Syntax:</h5>
-<pre>  &lt; { &lt;type list&gt; } &gt; <br></pre>
+<pre>
+  &lt; { &lt;type list&gt; } &gt;
+</pre>
+
 <h5>Examples:</h5>
 <table class="layout">
   <tr class="layout">
@@ -1613,23 +1718,28 @@ instruction.</p>
       an <tt>i32</tt>.</td>
   </tr>
 </table>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
+
 <div class="doc_text">
+
 <h5>Overview:</h5>
-<p>As in many languages, the pointer type represents a pointer or
-reference to another object, which must live in memory. Pointer types may have 
-an optional address space attribute defining the target-specific numbered 
-address space where the pointed-to object resides. The default address space is 
-zero.</p>
+<p>As in many languages, the pointer type represents a pointer or reference to
+   another object, which must live in memory. Pointer types may have an optional
+   address space attribute defining the target-specific numbered address space
+   where the pointed-to object resides. The default address space is zero.</p>
 
-<p>Note that LLVM does not permit pointers to void (<tt>void*</tt>) nor does 
-it permit pointers to labels (<tt>label*</tt>).  Use <tt>i8*</tt> instead.</p>
+<p>Note that LLVM does not permit pointers to void (<tt>void*</tt>) nor does it
+   permit pointers to labels (<tt>label*</tt>).  Use <tt>i8*</tt> instead.</p>
 
 <h5>Syntax:</h5>
-<pre>  &lt;type&gt; *<br></pre>
+<pre>
+  &lt;type&gt; *
+</pre>
+
 <h5>Examples:</h5>
 <table class="layout">
   <tr class="layout">
@@ -1649,33 +1759,31 @@ it permit pointers to labels (<tt>label*</tt>).  Use <tt>i8*</tt> instead.</p>
      that resides in address space #5.</td>
   </tr>
 </table>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_vector">Vector Type</a> </div>
+
 <div class="doc_text">
 
 <h5>Overview:</h5>
-
-<p>A vector type is a simple derived type that represents a vector
-of elements.  Vector types are used when multiple primitive data 
-are operated in parallel using a single instruction (SIMD). 
-A vector type requires a size (number of
-elements) and an underlying primitive data type.  Vectors must have a power
-of two length (1, 2, 4, 8, 16 ...).  Vector types are
-considered <a href="#t_firstclass">first class</a>.</p>
+<p>A vector type is a simple derived type that represents a vector of elements.
+   Vector types are used when multiple primitive data are operated in parallel
+   using a single instruction (SIMD).  A vector type requires a size (number of
+   elements) and an underlying primitive data type.  Vectors must have a power
+   of two length (1, 2, 4, 8, 16 ...).  Vector types are considered
+   <a href="#t_firstclass">first class</a>.</p>
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt; &lt;# elements&gt; x &lt;elementtype&gt; &gt;
 </pre>
 
-<p>The number of elements is a constant integer value; elementtype may
-be any integer or floating point type.</p>
+<p>The number of elements is a constant integer value; elementtype may be any
+   integer or floating point type.</p>
 
 <h5>Examples:</h5>
-
 <table class="layout">
   <tr class="layout">
     <td class="left"><tt>&lt;4 x i32&gt;</tt></td>
@@ -1691,10 +1799,10 @@ be any integer or floating point type.</p>
   </tr>
 </table>
 
-<p>Note that the code generator does not yet support large vector types
-to be used as function return types. The specific limit on how large a
-vector return type codegen can currently handle is target-dependent;
-currently it's often a few times longer than a hardware vector register.</p>
+<p>Note that the code generator does not yet support large vector types to be
+   used as function return types. The specific limit on how large a vector
+   return type codegen can currently handle is target-dependent; currently it's
+   often a few times longer than a hardware vector register.</p>
 
 </div>
 
@@ -1703,26 +1811,24 @@ currently it's often a few times longer than a hardware vector register.</p>
 <div class="doc_text">
 
 <h5>Overview:</h5>
-
 <p>Opaque types are used to represent unknown types in the system.  This
-corresponds (for example) to the C notion of a forward declared structure type.
-In LLVM, opaque types can eventually be resolved to any type (not just a
-structure type).</p>
+   corresponds (for example) to the C notion of a forward declared structure
+   type.  In LLVM, opaque types can eventually be resolved to any type (not just
+   a structure type).</p>
 
 <h5>Syntax:</h5>
-
 <pre>
   opaque
 </pre>
 
 <h5>Examples:</h5>
-
 <table class="layout">
   <tr class="layout">
     <td class="left"><tt>opaque</tt></td>
     <td class="left">An opaque type.</td>
   </tr>
 </table>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -1731,12 +1837,13 @@ structure type).</p>
 </div>
 
 <div class="doc_text">
+
 <h5>Overview:</h5>
-<p>
-An "up reference" allows you to refer to a lexically enclosing type without
-requiring it to have a name. For instance, a structure declaration may contain a
-pointer to any of the types it is lexically a member of.  Example of up
-references (with their equivalent as named type declarations) include:</p>
+<p>An "up reference" allows you to refer to a lexically enclosing type without
+   requiring it to have a name. For instance, a structure declaration may
+   contain a pointer to any of the types it is lexically a member of.  Example
+   of up references (with their equivalent as named type declarations)
+   include:</p>
 
 <pre>
    { \2 * }                %x = type { %x* }
@@ -1744,24 +1851,20 @@ references (with their equivalent as named type declarations) include:</p>
    \1*                     %z = type %z*
 </pre>
 
-<p>
-An up reference is needed by the asmprinter for printing out cyclic types when
-there is no declared name for a type in the cycle.  Because the asmprinter does
-not want to print out an infinite type string, it needs a syntax to handle
-recursive types that have no names (all names are optional in llvm IR).
-</p>
+<p>An up reference is needed by the asmprinter for printing out cyclic types
+   when there is no declared name for a type in the cycle.  Because the
+   asmprinter does not want to print out an infinite type string, it needs a
+   syntax to handle recursive types that have no names (all names are optional
+   in llvm IR).</p>
 
 <h5>Syntax:</h5>
 <pre>
    \&lt;level&gt;
 </pre>
 
-<p>
-The level is the count of the lexical type that is being referred to.
-</p>
+<p>The level is the count of the lexical type that is being referred to.</p>
 
 <h5>Examples:</h5>
-
 <table class="layout">
   <tr class="layout">
     <td class="left"><tt>\1*</tt></td>
@@ -1773,8 +1876,8 @@ The level is the count of the lexical type that is being referred to.
                      structure.</td>
   </tr>
 </table>
-</div>
 
+</div>
 
 <!-- *********************************************************************** -->
 <div class="doc_section"> <a name="constants">Constants</a> </div>
@@ -1783,7 +1886,7 @@ The level is the count of the lexical type that is being referred to.
 <div class="doc_text">
 
 <p>LLVM has several different basic types of constants.  This section describes
-them all and their syntax.</p>
+   them all and their syntax.</p>
 
 </div>
 
@@ -1794,118 +1897,103 @@ them all and their syntax.</p>
 
 <dl>
   <dt><b>Boolean constants</b></dt>
-
   <dd>The two strings '<tt>true</tt>' and '<tt>false</tt>' are both valid
-  constants of the <tt><a href="#t_primitive">i1</a></tt> type.
-  </dd>
+      constants of the <tt><a href="#t_integer">i1</a></tt> type.</dd>
 
   <dt><b>Integer constants</b></dt>
-
-  <dd>Standard integers (such as '4') are constants of the <a
-  href="#t_integer">integer</a> type.  Negative numbers may be used with 
-  integer types.
-  </dd>
+  <dd>Standard integers (such as '4') are constants of
+      the <a href="#t_integer">integer</a> type.  Negative numbers may be used
+      with integer types.</dd>
 
   <dt><b>Floating point constants</b></dt>
-
   <dd>Floating point constants use standard decimal notation (e.g. 123.421),
-  exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal
-  notation (see below).  The assembler requires the exact decimal value of
-  a floating-point constant.  For example, the assembler accepts 1.25 but
-  rejects 1.3 because 1.3 is a repeating decimal in binary.  Floating point
-  constants must have a <a href="#t_floating">floating point</a> type. </dd>
+      exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal
+      notation (see below).  The assembler requires the exact decimal value of a
+      floating-point constant.  For example, the assembler accepts 1.25 but
+      rejects 1.3 because 1.3 is a repeating decimal in binary.  Floating point
+      constants must have a <a href="#t_floating">floating point</a> type. </dd>
 
   <dt><b>Null pointer constants</b></dt>
-
   <dd>The identifier '<tt>null</tt>' is recognized as a null pointer constant
-  and must be of <a href="#t_pointer">pointer type</a>.</dd>
-
+      and must be of <a href="#t_pointer">pointer type</a>.</dd>
 </dl>
 
-<p>The one non-intuitive notation for constants is the hexadecimal form
-of floating point constants.  For example, the form '<tt>double
-0x432ff973cafa8000</tt>' is equivalent to (but harder to read than) '<tt>double
-4.5e+15</tt>'.  The only time hexadecimal floating point constants are required
-(and the only time that they are generated by the disassembler) is when a 
-floating point constant must be emitted but it cannot be represented as a 
-decimal floating point number in a reasonable number of digits.  For example,
-NaN's, infinities, and other 
-special values are represented in their IEEE hexadecimal format so that 
-assembly and disassembly do not cause any bits to change in the constants.</p>
+<p>The one non-intuitive notation for constants is the hexadecimal form of
+   floating point constants.  For example, the form '<tt>double
+   0x432ff973cafa8000</tt>' is equivalent to (but harder to read than)
+   '<tt>double 4.5e+15</tt>'.  The only time hexadecimal floating point
+   constants are required (and the only time that they are generated by the
+   disassembler) is when a floating point constant must be emitted but it cannot
+   be represented as a decimal floating point number in a reasonable number of
+   digits.  For example, NaN's, infinities, and other special values are
+   represented in their IEEE hexadecimal format so that assembly and disassembly
+   do not cause any bits to change in the constants.</p>
+
 <p>When using the hexadecimal form, constants of types float and double are
-represented using the 16-digit form shown above (which matches the IEEE754
-representation for double); float values must, however, be exactly representable
-as IEE754 single precision.
-Hexadecimal format is always used for long
-double, and there are three forms of long double.  The 80-bit
-format used by x86 is represented as <tt>0xK</tt>
-followed by 20 hexadecimal digits.
-The 128-bit format used by PowerPC (two adjacent doubles) is represented
-by <tt>0xM</tt> followed by 32 hexadecimal digits.  The IEEE 128-bit
-format is represented
-by <tt>0xL</tt> followed by 32 hexadecimal digits; no currently supported
-target uses this format.  Long doubles will only work if they match
-the long double format on your target.  All hexadecimal formats are big-endian
-(sign bit at the left).</p>
+   represented using the 16-digit form shown above (which matches the IEEE754
+   representation for double); float values must, however, be exactly
+   representable as IEE754 single precision.  Hexadecimal format is always used
+   for long double, and there are three forms of long double.  The 80-bit format
+   used by x86 is represented as <tt>0xK</tt> followed by 20 hexadecimal digits.
+   The 128-bit format used by PowerPC (two adjacent doubles) is represented
+   by <tt>0xM</tt> followed by 32 hexadecimal digits.  The IEEE 128-bit format
+   is represented by <tt>0xL</tt> followed by 32 hexadecimal digits; no
+   currently supported target uses this format.  Long doubles will only work if
+   they match the long double format on your target.  All hexadecimal formats
+   are big-endian (sign bit at the left).</p>
+
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-<a name="aggregateconstants"> <!-- old anchor -->
-<a name="complexconstants">Complex Constants</a></a>
+<a name="aggregateconstants"></a> <!-- old anchor -->
+<a name="complexconstants">Complex Constants</a>
 </div>
 
 <div class="doc_text">
+
 <p>Complex constants are a (potentially recursive) combination of simple
-constants and smaller complex constants.</p>
+   constants and smaller complex constants.</p>
 
 <dl>
   <dt><b>Structure constants</b></dt>
-
   <dd>Structure constants are represented with notation similar to structure
-  type definitions (a comma separated list of elements, surrounded by braces
-  (<tt>{}</tt>)).  For example: "<tt>{ i32 4, float 17.0, i32* @G }</tt>",
-  where "<tt>@G</tt>" is declared as "<tt>@G = external global i32</tt>".  Structure constants
-  must have <a href="#t_struct">structure type</a>, and the number and
-  types of elements must match those specified by the type.
-  </dd>
+      type definitions (a comma separated list of elements, surrounded by braces
+      (<tt>{}</tt>)).  For example: "<tt>{ i32 4, float 17.0, i32* @G }</tt>",
+      where "<tt>@G</tt>" is declared as "<tt>@G = external global i32</tt>".
+      Structure constants must have <a href="#t_struct">structure type</a>, and
+      the number and types of elements must match those specified by the
+      type.</dd>
 
   <dt><b>Array constants</b></dt>
-
   <dd>Array constants are represented with notation similar to array type
-  definitions (a comma separated list of elements, surrounded by square brackets
-  (<tt>[]</tt>)).  For example: "<tt>[ i32 42, i32 11, i32 74 ]</tt>".  Array
-  constants must have <a href="#t_array">array type</a>, and the number and
-  types of elements must match those specified by the type.
-  </dd>
+     definitions (a comma separated list of elements, surrounded by square
+     brackets (<tt>[]</tt>)).  For example: "<tt>[ i32 42, i32 11, i32 74
+     ]</tt>".  Array constants must have <a href="#t_array">array type</a>, and
+     the number and types of elements must match those specified by the
+     type.</dd>
 
   <dt><b>Vector constants</b></dt>
-
   <dd>Vector constants are represented with notation similar to vector type
-  definitions (a comma separated list of elements, surrounded by
-  less-than/greater-than's (<tt>&lt;&gt;</tt>)).  For example: "<tt>&lt; i32 42,
-  i32 11, i32 74, i32 100 &gt;</tt>".  Vector constants must have <a
-  href="#t_vector">vector type</a>, and the number and types of elements must
-  match those specified by the type.
-  </dd>
+      definitions (a comma separated list of elements, surrounded by
+      less-than/greater-than's (<tt>&lt;&gt;</tt>)).  For example: "<tt>&lt; i32
+      42, i32 11, i32 74, i32 100 &gt;</tt>".  Vector constants must
+      have <a href="#t_vector">vector type</a>, and the number and types of
+      elements must match those specified by the type.</dd>
 
   <dt><b>Zero initialization</b></dt>
-
   <dd>The string '<tt>zeroinitializer</tt>' can be used to zero initialize a
-  value to zero of <em>any</em> type, including scalar and aggregate types.
-  This is often used to avoid having to print large zero initializers (e.g. for
-  large arrays) and is always exactly equivalent to using explicit zero
-  initializers.
-  </dd>
+      value to zero of <em>any</em> type, including scalar and aggregate types.
+      This is often used to avoid having to print large zero initializers
+      (e.g. for large arrays) and is always exactly equivalent to using explicit
+      zero initializers.</dd>
 
   <dt><b>Metadata node</b></dt>
-
   <dd>A metadata node is a structure-like constant with
-  <a href="#t_metadata">metadata type</a>.  For example:
-  "<tt>metadata !{ i32 0, metadata !"test" }</tt>".  Unlike other constants
-  that are meant to be interpreted as part of the instruction stream, metadata
-  is a place to attach additional information such as debug info.
-  </dd>
+      <a href="#t_metadata">metadata type</a>.  For example: "<tt>metadata !{
+      i32 0, metadata !"test" }</tt>".  Unlike other constants that are meant to
+      be interpreted as part of the instruction stream, metadata is a place to
+      attach additional information such as debug info.</dd>
 </dl>
 
 </div>
@@ -1917,12 +2005,12 @@ constants and smaller complex constants.</p>
 
 <div class="doc_text">
 
-<p>The addresses of <a href="#globalvars">global variables</a> and <a
-href="#functionstructure">functions</a> are always implicitly valid (link-time)
-constants.  These constants are explicitly referenced when the <a
-href="#identifiers">identifier for the global</a> is used and always have <a
-href="#t_pointer">pointer</a> type. For example, the following is a legal LLVM
-file:</p>
+<p>The addresses of <a href="#globalvars">global variables</a>
+   and <a href="#functionstructure">functions</a> are always implicitly valid
+   (link-time) constants.  These constants are explicitly referenced when
+   the <a href="#identifiers">identifier for the global</a> is used and always
+   have <a href="#t_pointer">pointer</a> type. For example, the following is a
+   legal LLVM file:</p>
 
 <div class="doc_code">
 <pre>
@@ -1937,13 +2025,150 @@ file:</p>
 <!-- ======================================================================= -->
 <div class="doc_subsection"><a name="undefvalues">Undefined Values</a></div>
 <div class="doc_text">
-  <p>The string '<tt>undef</tt>' is recognized as a type-less constant that has 
-  no specific value.  Undefined values may be of any type and be used anywhere 
-  a constant is permitted.</p>
 
-  <p>Undefined values indicate to the compiler that the program is well defined
-  no matter what value is used, giving the compiler more freedom to optimize.
-  </p>
+<p>The string '<tt>undef</tt>' can be used anywhere a constant is expected, and
+   indicates that the user of the value may receive an unspecified bit-pattern.
+   Undefined values may be of any type (other than label or void) and be used
+   anywhere a constant is permitted.</p>
+
+<p>Undefined values are useful because they indicate to the compiler that the
+   program is well defined no matter what value is used.  This gives the
+   compiler more freedom to optimize.  Here are some examples of (potentially
+   surprising) transformations that are valid (in pseudo IR):</p>
+
+
+<div class="doc_code">
+<pre>
+  %A = add %X, undef
+  %B = sub %X, undef
+  %C = xor %X, undef
+Safe:
+  %A = undef
+  %B = undef
+  %C = undef
+</pre>
+</div>
+
+<p>This is safe because all of the output bits are affected by the undef bits.
+Any output bit can have a zero or one depending on the input bits.</p>
+
+<div class="doc_code">
+<pre>
+  %A = or %X, undef
+  %B = and %X, undef
+Safe:
+  %A = -1
+  %B = 0
+Unsafe:
+  %A = undef
+  %B = undef
+</pre>
+</div>
+
+<p>These logical operations have bits that are not always affected by the input.
+For example, if "%X" has a zero bit, then the output of the 'and' operation will
+always be a zero, no matter what the corresponding bit from the undef is.  As
+such, it is unsafe to optimize or assume that the result of the and is undef.
+However, it is safe to assume that all bits of the undef could be 0, and 
+optimize the and to 0.  Likewise, it is safe to assume that all the bits of 
+the undef operand to the or could be set, allowing the or to be folded to 
+-1.</p>
+
+<div class="doc_code">
+<pre>
+  %A = select undef, %X, %Y
+  %B = select undef, 42, %Y
+  %C = select %X, %Y, undef
+Safe:
+  %A = %X     (or %Y)
+  %B = 42     (or %Y)
+  %C = %Y
+Unsafe:
+  %A = undef
+  %B = undef
+  %C = undef
+</pre>
+</div>
+
+<p>This set of examples show that undefined select (and conditional branch)
+conditions can go "either way" but they have to come from one of the two
+operands.  In the %A example, if %X and %Y were both known to have a clear low
+bit, then %A would have to have a cleared low bit.  However, in the %C example,
+the optimizer is allowed to assume that the undef operand could be the same as
+%Y, allowing the whole select to be eliminated.</p>
+
+
+<div class="doc_code">
+<pre>
+  %A = xor undef, undef
+  
+  %B = undef
+  %C = xor %B, %B
+
+  %D = undef
+  %E = icmp lt %D, 4
+  %F = icmp gte %D, 4
+
+Safe:
+  %A = undef
+  %B = undef
+  %C = undef
+  %D = undef
+  %E = undef
+  %F = undef
+</pre>
+</div>
+
+<p>This example points out that two undef operands are not necessarily the same.
+This can be surprising to people (and also matches C semantics) where they
+assume that "X^X" is always zero, even if X is undef.  This isn't true for a
+number of reasons, but the short answer is that an undef "variable" can
+arbitrarily change its value over its "live range".  This is true because the
+"variable" doesn't actually <em>have a live range</em>.  Instead, the value is
+logically read from arbitrary registers that happen to be around when needed,
+so the value is not necessarily consistent over time.  In fact, %A and %C need
+to have the same semantics or the core LLVM "replace all uses with" concept
+would not hold.</p>
+
+<div class="doc_code">
+<pre>
+  %A = fdiv undef, %X
+  %B = fdiv %X, undef
+Safe:
+  %A = undef
+b: unreachable
+</pre>
+</div>
+
+<p>These examples show the crucial difference between an <em>undefined
+value</em> and <em>undefined behavior</em>.  An undefined value (like undef) is
+allowed to have an arbitrary bit-pattern.  This means that the %A operation
+can be constant folded to undef because the undef could be an SNaN, and fdiv is
+not (currently) defined on SNaN's.  However, in the second example, we can make
+a more aggressive assumption: because the undef is allowed to be an arbitrary
+value, we are allowed to assume that it could be zero.  Since a divide by zero
+has <em>undefined behavior</em>, we are allowed to assume that the operation
+does not execute at all.  This allows us to delete the divide and all code after
+it: since the undefined operation "can't happen", the optimizer can assume that
+it occurs in dead code.
+</p>
+ 
+<div class="doc_code">
+<pre>
+a:  store undef -> %X
+b:  store %X -> undef
+Safe:
+a: &lt;deleted&gt;
+b: unreachable
+</pre>
+</div>
+
+<p>These examples reiterate the fdiv example: a store "of" an undefined value
+can be assumed to not have any effect: we can assume that the value is 
+overwritten with bits that happen to match what was already there.  However, a
+store "to" an undefined location could clobber arbitrary memory, therefore, it
+has undefined behavior.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -1953,71 +2178,75 @@ file:</p>
 <div class="doc_text">
 
 <p>Constant expressions are used to allow expressions involving other constants
-to be used as constants.  Constant expressions may be of any <a
-href="#t_firstclass">first class</a> type and may involve any LLVM operation
-that does not have side effects (e.g. load and call are not supported).  The
-following is the syntax for constant expressions:</p>
+   to be used as constants.  Constant expressions may be of
+   any <a href="#t_firstclass">first class</a> type and may involve any LLVM
+   operation that does not have side effects (e.g. load and call are not
+   supported).  The following is the syntax for constant expressions:</p>
 
 <dl>
   <dt><b><tt>trunc ( CST to TYPE )</tt></b></dt>
-  <dd>Truncate a constant to another type. The bit size of CST must be larger 
-  than the bit size of TYPE. Both types must be integers.</dd>
+  <dd>Truncate a constant to another type. The bit size of CST must be larger
+      than the bit size of TYPE. Both types must be integers.</dd>
 
   <dt><b><tt>zext ( CST to TYPE )</tt></b></dt>
-  <dd>Zero extend a constant to another type. The bit size of CST must be 
-  smaller or equal to the bit size of TYPE.  Both types must be integers.</dd>
+  <dd>Zero extend a constant to another type. The bit size of CST must be
+      smaller or equal to the bit size of TYPE.  Both types must be
+      integers.</dd>
 
   <dt><b><tt>sext ( CST to TYPE )</tt></b></dt>
-  <dd>Sign extend a constant to another type. The bit size of CST must be 
-  smaller or equal to the bit size of TYPE.  Both types must be integers.</dd>
+  <dd>Sign extend a constant to another type. The bit size of CST must be
+      smaller or equal to the bit size of TYPE.  Both types must be
+      integers.</dd>
 
   <dt><b><tt>fptrunc ( CST to TYPE )</tt></b></dt>
-  <dd>Truncate a floating point constant to another floating point type. The 
-  size of CST must be larger than the size of TYPE. Both types must be 
-  floating point.</dd>
+  <dd>Truncate a floating point constant to another floating point type. The
+      size of CST must be larger than the size of TYPE. Both types must be
+      floating point.</dd>
 
   <dt><b><tt>fpext ( CST to TYPE )</tt></b></dt>
-  <dd>Floating point extend a constant to another type. The size of CST must be 
-  smaller or equal to the size of TYPE. Both types must be floating point.</dd>
+  <dd>Floating point extend a constant to another type. The size of CST must be
+      smaller or equal to the size of TYPE. Both types must be floating
+      point.</dd>
 
   <dt><b><tt>fptoui ( CST to TYPE )</tt></b></dt>
   <dd>Convert a floating point constant to the corresponding unsigned integer
-  constant. TYPE must be a scalar or vector integer type. CST must be of scalar
-  or vector floating point type. Both CST and TYPE must be scalars, or vectors
-  of the same number of elements. If the  value won't fit in the integer type,
-  the results are undefined.</dd>
+      constant. TYPE must be a scalar or vector integer type. CST must be of
+      scalar or vector floating point type. Both CST and TYPE must be scalars,
+      or vectors of the same number of elements. If the value won't fit in the
+      integer type, the results are undefined.</dd>
 
   <dt><b><tt>fptosi ( CST to TYPE )</tt></b></dt>
   <dd>Convert a floating point constant to the corresponding signed integer
-  constant.  TYPE must be a scalar or vector integer type. CST must be of scalar
-  or vector floating point type. Both CST and TYPE must be scalars, or vectors
-  of the same number of elements. If the  value won't fit in the integer type,
-  the results are undefined.</dd>
+      constant.  TYPE must be a scalar or vector integer type. CST must be of
+      scalar or vector floating point type. Both CST and TYPE must be scalars,
+      or vectors of the same number of elements. If the value won't fit in the
+      integer type, the results are undefined.</dd>
 
   <dt><b><tt>uitofp ( CST to TYPE )</tt></b></dt>
   <dd>Convert an unsigned integer constant to the corresponding floating point
-  constant. TYPE must be a scalar or vector floating point type. CST must be of
-  scalar or vector integer type. Both CST and TYPE must be scalars, or vectors
-  of the same number of elements. If the value won't fit in the floating point 
-  type, the results are undefined.</dd>
+      constant. TYPE must be a scalar or vector floating point type. CST must be
+      of scalar or vector integer type. Both CST and TYPE must be scalars, or
+      vectors of the same number of elements. If the value won't fit in the
+      floating point type, the results are undefined.</dd>
 
   <dt><b><tt>sitofp ( CST to TYPE )</tt></b></dt>
   <dd>Convert a signed integer constant to the corresponding floating point
-  constant. TYPE must be a scalar or vector floating point type. CST must be of
-  scalar or vector integer type. Both CST and TYPE must be scalars, or vectors
-  of the same number of elements. If the value won't fit in the floating point 
-  type, the results are undefined.</dd>
+      constant. TYPE must be a scalar or vector floating point type. CST must be
+      of scalar or vector integer type. Both CST and TYPE must be scalars, or
+      vectors of the same number of elements. If the value won't fit in the
+      floating point type, the results are undefined.</dd>
 
   <dt><b><tt>ptrtoint ( CST to TYPE )</tt></b></dt>
   <dd>Convert a pointer typed constant to the corresponding integer constant
-  TYPE must be an integer type. CST must be of pointer type. The CST value is
-  zero extended, truncated, or unchanged to make it fit in TYPE.</dd>
+      <tt>TYPE</tt> must be an integer type. <tt>CST</tt> must be of pointer
+      type. The <tt>CST</tt> value is zero extended, truncated, or unchanged to
+      make it fit in <tt>TYPE</tt>.</dd>
 
   <dt><b><tt>inttoptr ( CST to TYPE )</tt></b></dt>
-  <dd>Convert a integer constant to a pointer constant.  TYPE must be a
-  pointer type.  CST must be of integer type. The CST value is zero extended, 
-  truncated, or unchanged to make it fit in a pointer size. This one is 
-  <i>really</i> dangerous!</dd>
+  <dd>Convert a integer constant to a pointer constant.  TYPE must be a pointer
+      type.  CST must be of integer type. The CST value is zero extended,
+      truncated, or unchanged to make it fit in a pointer size. This one is
+      <i>really</i> dangerous!</dd>
 
   <dt><b><tt>bitcast ( CST to TYPE )</tt></b></dt>
   <dd>Convert a constant, CST, to another TYPE. The constraints of the operands
@@ -2025,16 +2254,14 @@ following is the syntax for constant expressions:</p>
       instruction</a>.</dd>
 
   <dt><b><tt>getelementptr ( CSTPTR, IDX0, IDX1, ... )</tt></b></dt>
-
+  <dt><b><tt>getelementptr inbounds ( CSTPTR, IDX0, IDX1, ... )</tt></b></dt>
   <dd>Perform the <a href="#i_getelementptr">getelementptr operation</a> on
-  constants.  As with the <a href="#i_getelementptr">getelementptr</a>
-  instruction, the index list may have zero or more indexes, which are required
-  to make sense for the type of "CSTPTR".</dd>
+      constants.  As with the <a href="#i_getelementptr">getelementptr</a>
+      instruction, the index list may have zero or more indexes, which are
+      required to make sense for the type of "CSTPTR".</dd>
 
   <dt><b><tt>select ( COND, VAL1, VAL2 )</tt></b></dt>
-
-  <dd>Perform the <a href="#i_select">select operation</a> on
-  constants.</dd>
+  <dd>Perform the <a href="#i_select">select operation</a> on constants.</dd>
 
   <dt><b><tt>icmp COND ( VAL1, VAL2 )</tt></b></dt>
   <dd>Performs the <a href="#i_icmp">icmp operation</a> on constants.</dd>
@@ -2042,36 +2269,26 @@ following is the syntax for constant expressions:</p>
   <dt><b><tt>fcmp COND ( VAL1, VAL2 )</tt></b></dt>
   <dd>Performs the <a href="#i_fcmp">fcmp operation</a> on constants.</dd>
 
-  <dt><b><tt>vicmp COND ( VAL1, VAL2 )</tt></b></dt>
-  <dd>Performs the <a href="#i_vicmp">vicmp operation</a> on constants.</dd>
-
-  <dt><b><tt>vfcmp COND ( VAL1, VAL2 )</tt></b></dt>
-  <dd>Performs the <a href="#i_vfcmp">vfcmp operation</a> on constants.</dd>
-
   <dt><b><tt>extractelement ( VAL, IDX )</tt></b></dt>
-
-  <dd>Perform the <a href="#i_extractelement">extractelement
-  operation</a> on constants.</dd>
+  <dd>Perform the <a href="#i_extractelement">extractelement operation</a> on
+      constants.</dd>
 
   <dt><b><tt>insertelement ( VAL, ELT, IDX )</tt></b></dt>
-
-  <dd>Perform the <a href="#i_insertelement">insertelement
-    operation</a> on constants.</dd>
-
+  <dd>Perform the <a href="#i_insertelement">insertelement operation</a> on
+    constants.</dd>
 
   <dt><b><tt>shufflevector ( VEC1, VEC2, IDXMASK )</tt></b></dt>
-
-  <dd>Perform the <a href="#i_shufflevector">shufflevector
-    operation</a> on constants.</dd>
+  <dd>Perform the <a href="#i_shufflevector">shufflevector operation</a> on
+      constants.</dd>
 
   <dt><b><tt>OPCODE ( LHS, RHS )</tt></b></dt>
-
-  <dd>Perform the specified operation of the LHS and RHS constants. OPCODE may 
-  be any of the <a href="#binaryops">binary</a> or <a href="#bitwiseops">bitwise
-  binary</a> operations.  The constraints on operands are the same as those for
-  the corresponding instruction (e.g. no bitwise operations on floating point
-  values are allowed).</dd>
+  <dd>Perform the specified operation of the LHS and RHS constants. OPCODE may
+      be any of the <a href="#binaryops">binary</a>
+      or <a href="#bitwiseops">bitwise binary</a> operations.  The constraints
+      on operands are the same as those for the corresponding instruction
+      (e.g. no bitwise operations on floating point values are allowed).</dd>
 </dl>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -2080,31 +2297,30 @@ following is the syntax for constant expressions:</p>
 
 <div class="doc_text">
 
-<p>Embedded metadata provides a way to attach arbitrary data to the
-instruction stream without affecting the behaviour of the program.  There are
-two metadata primitives, strings and nodes. All metadata has the
-<tt>metadata</tt> type and is identified in syntax by a preceding exclamation
-point ('<tt>!</tt>').
-</p>
+<p>Embedded metadata provides a way to attach arbitrary data to the instruction
+   stream without affecting the behaviour of the program.  There are two
+   metadata primitives, strings and nodes. All metadata has the
+   <tt>metadata</tt> type and is identified in syntax by a preceding exclamation
+   point ('<tt>!</tt>').</p>
 
 <p>A metadata string is a string surrounded by double quotes.  It can contain
-any character by escaping non-printable characters with "\xx" where "xx" is
-the two digit hex code.  For example: "<tt>!"test\00"</tt>".
-</p>
+   any character by escaping non-printable characters with "\xx" where "xx" is
+   the two digit hex code.  For example: "<tt>!"test\00"</tt>".</p>
 
 <p>Metadata nodes are represented with notation similar to structure constants
-(a comma separated list of elements, surrounded by braces and preceeded by an
-exclamation point).  For example: "<tt>!{ metadata !"test\00", i32 10}</tt>".
-</p>
+   (a comma separated list of elements, surrounded by braces and preceded by an
+   exclamation point).  For example: "<tt>!{ metadata !"test\00", i32
+   10}</tt>".</p>
 
-<p>A metadata node will attempt to track changes to the values it holds. In
-the event that a value is deleted, it will be replaced with a typeless
-"<tt>null</tt>", such as "<tt>metadata !{null, i32 10}</tt>".</p> 
+<p>A metadata node will attempt to track changes to the values it holds. In the
+   event that a value is deleted, it will be replaced with a typeless
+   "<tt>null</tt>", such as "<tt>metadata !{null, i32 10}</tt>".</p>
 
 <p>Optimizations may rely on metadata to provide additional information about
-the program that isn't available in the instructions, or that isn't easily
-computable. Similarly, the code generator may expect a certain metadata format
-to be used to express debugging information.</p>
+   the program that isn't available in the instructions, or that isn't easily
+   computable. Similarly, the code generator may expect a certain metadata
+   format to be used to express debugging information.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -2118,14 +2334,14 @@ to be used to express debugging information.</p>
 
 <div class="doc_text">
 
-<p>
-LLVM supports inline assembler expressions (as opposed to <a href="#moduleasm">
-Module-Level Inline Assembly</a>) through the use of a special value.  This
-value represents the inline assembler as a string (containing the instructions
-to emit), a list of operand constraints (stored as a string), and a flag that 
-indicates whether or not the inline asm expression has side effects.  An example
-inline assembler expression is:
-</p>
+<p>LLVM supports inline assembler expressions (as opposed
+   to <a href="#moduleasm"> Module-Level Inline Assembly</a>) through the use of
+   a special value.  This value represents the inline assembler as a string
+   (containing the instructions to emit), a list of operand constraints (stored
+   as a string), a flag that indicates whether or not the inline asm
+   expression has side effects, and a flag indicating whether the asm came
+   originally from an asm block.  An example inline assembler
+   expression is:</p>
 
 <div class="doc_code">
 <pre>
@@ -2133,10 +2349,9 @@ i32 (i32) asm "bswap $0", "=r,r"
 </pre>
 </div>
 
-<p>
-Inline assembler expressions may <b>only</b> be used as the callee operand of
-a <a href="#i_call"><tt>call</tt> instruction</a>.  Thus, typically we have:
-</p>
+<p>Inline assembler expressions may <b>only</b> be used as the callee operand of
+   a <a href="#i_call"><tt>call</tt> instruction</a>.  Thus, typically we
+   have:</p>
 
 <div class="doc_code">
 <pre>
@@ -2144,11 +2359,9 @@ a <a href="#i_call"><tt>call</tt> instruction</a>.  Thus, typically we have:
 </pre>
 </div>
 
-<p>
-Inline asms with side effects not visible in the constraint list must be marked
-as having side effects.  This is done through the use of the
-'<tt>sideeffect</tt>' keyword, like so:
-</p>
+<p>Inline asms with side effects not visible in the constraint list must be
+   marked as having side effects.  This is done through the use of the
+   '<tt>sideeffect</tt>' keyword, like so:</p>
 
 <div class="doc_code">
 <pre>
@@ -2156,26 +2369,126 @@ call void asm sideeffect "eieio", ""()
 </pre>
 </div>
 
+<p>Inline asms derived from asm blocks are similarly marked with the
+   '<tt>msasm</tt>' keyword:</p>
+
+<div class="doc_code">
+<pre>
+call void asm msasm "eieio", ""()
+</pre>
+</div>
+
+<p>If both keywords appear the '<tt>sideeffect</tt>' keyword must come
+   first.</p>
+
 <p>TODO: The format of the asm and constraints string still need to be
-documented here.  Constraints on what can be done (e.g. duplication, moving, etc
-need to be documented).  This is probably best done by reference to another 
-document that covers inline asm from a holistic perspective.
-</p>
+   documented here.  Constraints on what can be done (e.g. duplication, moving,
+   etc need to be documented).  This is probably best done by reference to
+   another document that covers inline asm from a holistic perspective.</p>
 
 </div>
 
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="intrinsic_globals">Intrinsic Global Variables</a>
+</div>
+<!-- *********************************************************************** -->
+
+<p>LLVM has a number of "magic" global variables that contain data that affect
+code generation or other IR semantics.  These are documented here.  All globals
+of this sort should have a section specified as "<tt>llvm.metadata</tt>".  This
+section and all globals that start with "<tt>llvm.</tt>" are reserved for use
+by LLVM.</p>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_used">The '<tt>llvm.used</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>@llvm.used</tt> global is an array with i8* element type which has <a
+href="#linkage_appending">appending linkage</a>.  This array contains a list of
+pointers to global variables and functions which may optionally have a pointer
+cast formed of bitcast or getelementptr.  For example, a legal use of it is:</p>
+
+<pre>
+  @X = global i8 4
+  @Y = global i32 123
+
+  @llvm.used = appending global [2 x i8*] [
+     i8* @X,
+     i8* bitcast (i32* @Y to i8*)
+  ], section "llvm.metadata"
+</pre>
+
+<p>If a global variable appears in the <tt>@llvm.used</tt> list, then the
+compiler, assembler, and linker are required to treat the symbol as if there is
+a reference to the global that it cannot see.  For example, if a variable has
+internal linkage and no references other than that from the <tt>@llvm.used</tt>
+list, it cannot be deleted.  This is commonly used to represent references from
+inline asms and other things the compiler cannot "see", and corresponds to
+"attribute((used))" in GNU C.</p>
+
+<p>On some targets, the code generator must emit a directive to the assembler or
+object file to prevent the assembler and linker from molesting the symbol.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_compiler_used">The '<tt>llvm.compiler.used</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>@llvm.compiler.used</tt> directive is the same as the
+<tt>@llvm.used</tt> directive, except that it only prevents the compiler from
+touching the symbol.  On targets that support it, this allows an intelligent
+linker to optimize references to the symbol without being impeded as it would be
+by <tt>@llvm.used</tt>.</p>
+
+<p>This is a rare construct that should only be used in rare circumstances, and
+should not be exposed to source languages.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_global_ctors">The '<tt>llvm.global_ctors</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+
+<p>TODO: Describe this.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_global_dtors">The '<tt>llvm.global_dtors</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+
+<p>TODO: Describe this.</p>
+
+</div>
+
+
 <!-- *********************************************************************** -->
 <div class="doc_section"> <a name="instref">Instruction Reference</a> </div>
 <!-- *********************************************************************** -->
 
 <div class="doc_text">
 
-<p>The LLVM instruction set consists of several different
-classifications of instructions: <a href="#terminators">terminator
-instructions</a>, <a href="#binaryops">binary instructions</a>,
-<a href="#bitwiseops">bitwise binary instructions</a>, <a
- href="#memoryops">memory instructions</a>, and <a href="#otherops">other
-instructions</a>.</p>
+<p>The LLVM instruction set consists of several different classifications of
+   instructions: <a href="#terminators">terminator
+   instructions</a>, <a href="#binaryops">binary instructions</a>,
+   <a href="#bitwiseops">bitwise binary instructions</a>,
+   <a href="#memoryops">memory instructions</a>, and
+   <a href="#otherops">other instructions</a>.</p>
 
 </div>
 
@@ -2185,25 +2498,29 @@ Instructions</a> </div>
 
 <div class="doc_text">
 
-<p>As mentioned <a href="#functionstructure">previously</a>, every
-basic block in a program ends with a "Terminator" instruction, which
-indicates which block should be executed after the current block is
-finished. These terminator instructions typically yield a '<tt>void</tt>'
-value: they produce control flow, not values (the one exception being
-the '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
-<p>There are six different terminator instructions: the '<a
- href="#i_ret"><tt>ret</tt></a>' instruction, the '<a href="#i_br"><tt>br</tt></a>'
-instruction, the '<a href="#i_switch"><tt>switch</tt></a>' instruction,
-the '<a href="#i_invoke"><tt>invoke</tt></a>' instruction, the '<a
- href="#i_unwind"><tt>unwind</tt></a>' instruction, and the '<a
- href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
+<p>As mentioned <a href="#functionstructure">previously</a>, every basic block
+   in a program ends with a "Terminator" instruction, which indicates which
+   block should be executed after the current block is finished. These
+   terminator instructions typically yield a '<tt>void</tt>' value: they produce
+   control flow, not values (the one exception being the
+   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
+
+<p>There are six different terminator instructions: the
+   '<a href="#i_ret"><tt>ret</tt></a>' instruction, the
+   '<a href="#i_br"><tt>br</tt></a>' instruction, the
+   '<a href="#i_switch"><tt>switch</tt></a>' instruction, the
+   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction, the
+   '<a href="#i_unwind"><tt>unwind</tt></a>' instruction, and the
+   '<a href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_ret">'<tt>ret</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
 <pre>
   ret &lt;type&gt; &lt;value&gt;       <i>; Return a value from a non-void function</i>
@@ -2211,38 +2528,35 @@ Instruction</a> </div>
 </pre>
 
 <h5>Overview:</h5>
+<p>The '<tt>ret</tt>' instruction is used to return control flow (and optionally
+   a value) from a function back to the caller.</p>
 
-<p>The '<tt>ret</tt>' instruction is used to return control flow (and
-optionally a value) from a function back to the caller.</p>
-<p>There are two forms of the '<tt>ret</tt>' instruction: one that
-returns a value and then causes control flow, and one that just causes
-control flow to occur.</p>
+<p>There are two forms of the '<tt>ret</tt>' instruction: one that returns a
+   value and then causes control flow, and one that just causes control flow to
+   occur.</p>
 
 <h5>Arguments:</h5>
+<p>The '<tt>ret</tt>' instruction optionally accepts a single argument, the
+   return value. The type of the return value must be a
+   '<a href="#t_firstclass">first class</a>' type.</p>
 
-<p>The '<tt>ret</tt>' instruction optionally accepts a single argument,
-the return value. The type of the return value must be a
-'<a href="#t_firstclass">first class</a>' type.</p>
-
-<p>A function is not <a href="#wellformed">well formed</a> if
-it it has a non-void return type and contains a '<tt>ret</tt>'
-instruction with no return value or a return value with a type that
-does not match its type, or if it has a void return type and contains
-a '<tt>ret</tt>' instruction with a return value.</p>
+<p>A function is not <a href="#wellformed">well formed</a> if it it has a
+   non-void return type and contains a '<tt>ret</tt>' instruction with no return
+   value or a return value with a type that does not match its type, or if it
+   has a void return type and contains a '<tt>ret</tt>' instruction with a
+   return value.</p>
 
 <h5>Semantics:</h5>
-
-<p>When the '<tt>ret</tt>' instruction is executed, control flow
-returns back to the calling function's context.  If the caller is a "<a
- href="#i_call"><tt>call</tt></a>" instruction, execution continues at
-the instruction after the call.  If the caller was an "<a
- href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues
-at the beginning of the "normal" destination block.  If the instruction
-returns a value, that value shall set the call or invoke instruction's
-return value.</p>
+<p>When the '<tt>ret</tt>' instruction is executed, control flow returns back to
+   the calling function's context.  If the caller is a
+   "<a href="#i_call"><tt>call</tt></a>" instruction, execution continues at the
+   instruction after the call.  If the caller was an
+   "<a href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues at
+   the beginning of the "normal" destination block.  If the instruction returns
+   a value, that value shall set the call or invoke instruction's return
+   value.</p>
 
 <h5>Example:</h5>
-
 <pre>
   ret i32 5                       <i>; Return an integer value of 5</i>
   ret void                        <i>; Return from a void function</i>
@@ -2260,73 +2574,83 @@ return value.</p>
 </div>
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_br">'<tt>br</tt>' Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;<br>  br label &lt;dest&gt;          <i>; Unconditional branch</i>
+<pre>
+  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;<br>  br label &lt;dest&gt;          <i>; Unconditional branch</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>br</tt>' instruction is used to cause control flow to
-transfer to a different basic block in the current function.  There are
-two forms of this instruction, corresponding to a conditional branch
-and an unconditional branch.</p>
+<p>The '<tt>br</tt>' instruction is used to cause control flow to transfer to a
+   different basic block in the current function.  There are two forms of this
+   instruction, corresponding to a conditional branch and an unconditional
+   branch.</p>
+
 <h5>Arguments:</h5>
-<p>The conditional branch form of the '<tt>br</tt>' instruction takes a
-single '<tt>i1</tt>' value and two '<tt>label</tt>' values.  The
-unconditional form of the '<tt>br</tt>' instruction takes a single 
-'<tt>label</tt>' value as a target.</p>
+<p>The conditional branch form of the '<tt>br</tt>' instruction takes a single
+   '<tt>i1</tt>' value and two '<tt>label</tt>' values.  The unconditional form
+   of the '<tt>br</tt>' instruction takes a single '<tt>label</tt>' value as a
+   target.</p>
+
 <h5>Semantics:</h5>
 <p>Upon execution of a conditional '<tt>br</tt>' instruction, the '<tt>i1</tt>'
-argument is evaluated.  If the value is <tt>true</tt>, control flows
-to the '<tt>iftrue</tt>' <tt>label</tt> argument.  If "cond" is <tt>false</tt>,
-control flows to the '<tt>iffalse</tt>' <tt>label</tt> argument.</p>
+   argument is evaluated.  If the value is <tt>true</tt>, control flows to the
+   '<tt>iftrue</tt>' <tt>label</tt> argument.  If "cond" is <tt>false</tt>,
+   control flows to the '<tt>iffalse</tt>' <tt>label</tt> argument.</p>
+
 <h5>Example:</h5>
-<pre>Test:<br>  %cond = <a href="#i_icmp">icmp</a> eq i32 %a, %b<br>  br i1 %cond, label %IfEqual, label %IfUnequal<br>IfEqual:<br>  <a
- href="#i_ret">ret</a> i32 1<br>IfUnequal:<br>  <a href="#i_ret">ret</a> i32 0<br></pre>
+<pre>
+Test:
+  %cond = <a href="#i_icmp">icmp</a> eq i32 %a, %b
+  br i1 %cond, label %IfEqual, label %IfUnequal
+IfEqual:
+  <a href="#i_ret">ret</a> i32 1
+IfUnequal:
+  <a href="#i_ret">ret</a> i32 0
+</pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
    <a name="i_switch">'<tt>switch</tt>' Instruction</a>
 </div>
 
 <div class="doc_text">
-<h5>Syntax:</h5>
 
+<h5>Syntax:</h5>
 <pre>
   switch &lt;intty&gt; &lt;value&gt;, label &lt;defaultdest&gt; [ &lt;intty&gt; &lt;val&gt;, label &lt;dest&gt; ... ]
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>switch</tt>' instruction is used to transfer control flow to one of
-several different places.  It is a generalization of the '<tt>br</tt>'
-instruction, allowing a branch to occur to one of many possible
-destinations.</p>
-
+   several different places.  It is a generalization of the '<tt>br</tt>'
+   instruction, allowing a branch to occur to one of many possible
+   destinations.</p>
 
 <h5>Arguments:</h5>
-
 <p>The '<tt>switch</tt>' instruction uses three parameters: an integer
-comparison value '<tt>value</tt>', a default '<tt>label</tt>' destination, and
-an array of pairs of comparison value constants and '<tt>label</tt>'s.  The
-table is not allowed to contain duplicate constant entries.</p>
+   comparison value '<tt>value</tt>', a default '<tt>label</tt>' destination,
+   and an array of pairs of comparison value constants and '<tt>label</tt>'s.
+   The table is not allowed to contain duplicate constant entries.</p>
 
 <h5>Semantics:</h5>
-
 <p>The <tt>switch</tt> instruction specifies a table of values and
-destinations. When the '<tt>switch</tt>' instruction is executed, this
-table is searched for the given value.  If the value is found, control flow is
-transfered to the corresponding destination; otherwise, control flow is
-transfered to the default destination.</p>
+   destinations. When the '<tt>switch</tt>' instruction is executed, this table
+   is searched for the given value.  If the value is found, control flow is
+   transferred to the corresponding destination; otherwise, control flow is
+   transferred to the default destination.</p>
 
 <h5>Implementation:</h5>
-
 <p>Depending on properties of the target machine and the particular
-<tt>switch</tt> instruction, this instruction may be code generated in different
-ways.  For example, it could be generated as a series of chained conditional
-branches or with a lookup table.</p>
+   <tt>switch</tt> instruction, this instruction may be code generated in
+   different ways.  For example, it could be generated as a series of chained
+   conditional branches or with a lookup table.</p>
 
 <h5>Example:</h5>
-
 <pre>
  <i>; Emulate a conditional br instruction</i>
  %Val = <a href="#i_zext">zext</a> i1 %value to i32
@@ -2340,6 +2664,7 @@ branches or with a lookup table.</p>
                                      i32 1, label %onone
                                      i32 2, label %ontwo ]
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -2350,79 +2675,72 @@ branches or with a lookup table.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ptr to function ty&gt; &lt;function ptr val&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
                 to label &lt;normal label&gt; unwind label &lt;exception label&gt;
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>invoke</tt>' instruction causes control to transfer to a specified
-function, with the possibility of control flow transfer to either the
-'<tt>normal</tt>' label or the
-'<tt>exception</tt>' label.  If the callee function returns with the
-"<tt><a href="#i_ret">ret</a></tt>" instruction, control flow will return to the
-"normal" label.  If the callee (or any indirect callees) returns with the "<a
-href="#i_unwind"><tt>unwind</tt></a>" instruction, control is interrupted and
-continued at the dynamically nearest "exception" label.</p>
+   function, with the possibility of control flow transfer to either the
+   '<tt>normal</tt>' label or the '<tt>exception</tt>' label.  If the callee
+   function returns with the "<tt><a href="#i_ret">ret</a></tt>" instruction,
+   control flow will return to the "normal" label.  If the callee (or any
+   indirect callees) returns with the "<a href="#i_unwind"><tt>unwind</tt></a>"
+   instruction, control is interrupted and continued at the dynamically nearest
+   "exception" label.</p>
 
 <h5>Arguments:</h5>
-
 <p>This instruction requires several arguments:</p>
 
 <ol>
-  <li>
-    The optional "cconv" marker indicates which <a href="#callingconv">calling
-    convention</a> the call should use.  If none is specified, the call defaults
-    to using C calling conventions.
-  </li>
+  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
+      convention</a> the call should use.  If none is specified, the call
+      defaults to using C calling conventions.</li>
 
   <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
-   return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', 
-   and '<tt>inreg</tt>' attributes are valid here.</li>
+      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
+      '<tt>inreg</tt>' attributes are valid here.</li>
 
   <li>'<tt>ptr to function ty</tt>': shall be the signature of the pointer to
-  function value being invoked.  In most cases, this is a direct function
-  invocation, but indirect <tt>invoke</tt>s are just as possible, branching off
-  an arbitrary pointer to function value.
-  </li>
+      function value being invoked.  In most cases, this is a direct function
+      invocation, but indirect <tt>invoke</tt>s are just as possible, branching
+      off an arbitrary pointer to function value.</li>
 
   <li>'<tt>function ptr val</tt>': An LLVM value containing a pointer to a
-  function to be invoked. </li>
+      function to be invoked. </li>
 
   <li>'<tt>function args</tt>': argument list whose types match the function
-  signature argument types.  If the function signature indicates the function
-  accepts a variable number of arguments, the extra arguments can be
-  specified. </li>
+      signature argument types.  If the function signature indicates the
+      function accepts a variable number of arguments, the extra arguments can
+      be specified.</li>
 
   <li>'<tt>normal label</tt>': the label reached when the called function
-  executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
+      executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
 
   <li>'<tt>exception label</tt>': the label reached when a callee returns with
-  the <a href="#i_unwind"><tt>unwind</tt></a> instruction. </li>
+      the <a href="#i_unwind"><tt>unwind</tt></a> instruction. </li>
 
   <li>The optional <a href="#fnattrs">function attributes</a> list. Only
-  '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
-  '<tt>readnone</tt>' attributes are valid here.</li>
+      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
+      '<tt>readnone</tt>' attributes are valid here.</li>
 </ol>
 
 <h5>Semantics:</h5>
-
-<p>This instruction is designed to operate as a standard '<tt><a
-href="#i_call">call</a></tt>' instruction in most regards.  The primary
-difference is that it establishes an association with a label, which is used by
-the runtime library to unwind the stack.</p>
+<p>This instruction is designed to operate as a standard
+   '<tt><a href="#i_call">call</a></tt>' instruction in most regards.  The
+   primary difference is that it establishes an association with a label, which
+   is used by the runtime library to unwind the stack.</p>
 
 <p>This instruction is used in languages with destructors to ensure that proper
-cleanup is performed in the case of either a <tt>longjmp</tt> or a thrown
-exception.  Additionally, this is important for implementation of
-'<tt>catch</tt>' clauses in high-level languages that support them.</p>
+   cleanup is performed in the case of either a <tt>longjmp</tt> or a thrown
+   exception.  Additionally, this is important for implementation of
+   '<tt>catch</tt>' clauses in high-level languages that support them.</p>
 
-<p>For the purposes of the SSA form, the definition of the value
-returned by the '<tt>invoke</tt>' instruction is deemed to occur on
-the edge from the current block to the "normal" label. If the callee
-unwinds then no return value is available.</p>
+<p>For the purposes of the SSA form, the definition of the value returned by the
+   '<tt>invoke</tt>' instruction is deemed to occur on the edge from the current
+   block to the "normal" label. If the callee unwinds then no return value is
+   available.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -2431,8 +2749,8 @@ unwinds then no return value is available.</p>
   %retval = invoke <a href="#callingconv">coldcc</a> i32 %Testfnptr(i32 15) to label %Continue
               unwind label %TestCleanup              <i>; {i32}:retval set</i>
 </pre>
-</div>
 
+</div>
 
 <!-- _______________________________________________________________________ -->
 
@@ -2447,20 +2765,19 @@ Instruction</a> </div>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>unwind</tt>' instruction unwinds the stack, continuing control flow
-at the first callee in the dynamic call stack which used an <a
-href="#i_invoke"><tt>invoke</tt></a> instruction to perform the call.  This is
-primarily used to implement exception handling.</p>
+   at the first callee in the dynamic call stack which used
+   an <a href="#i_invoke"><tt>invoke</tt></a> instruction to perform the call.
+   This is primarily used to implement exception handling.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>unwind</tt>' instruction causes execution of the current function to
-immediately halt.  The dynamic call stack is then searched for the first <a
-href="#i_invoke"><tt>invoke</tt></a> instruction on the call stack.  Once found,
-execution continues at the "exceptional" destination block specified by the
-<tt>invoke</tt> instruction.  If there is no <tt>invoke</tt> instruction in the
-dynamic call chain, undefined behavior results.</p>
+   immediately halt.  The dynamic call stack is then searched for the
+   first <a href="#i_invoke"><tt>invoke</tt></a> instruction on the call stack.
+   Once found, execution continues at the "exceptional" destination block
+   specified by the <tt>invoke</tt> instruction.  If there is no <tt>invoke</tt>
+   instruction in the dynamic call chain, undefined behavior results.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -2476,29 +2793,31 @@ Instruction</a> </div>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>unreachable</tt>' instruction has no defined semantics.  This
-instruction is used to inform the optimizer that a particular portion of the
-code is not reachable.  This can be used to indicate that the code after a
-no-return function cannot be reached, and other facts.</p>
+   instruction is used to inform the optimizer that a particular portion of the
+   code is not reachable.  This can be used to indicate that the code after a
+   no-return function cannot be reached, and other facts.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>unreachable</tt>' instruction has no defined semantics.</p>
-</div>
-
 
+</div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> <a name="binaryops">Binary Operations</a> </div>
+
 <div class="doc_text">
-<p>Binary operators are used to do most of the computation in a
-program.  They require two operands of the same type, execute an operation on them, and
-produce a single value.  The operands might represent 
-multiple data, as is the case with the <a href="#t_vector">vector</a> data type. 
-The result value has the same type as its operands.</p>
+
+<p>Binary operators are used to do most of the computation in a program.  They
+   require two operands of the same type, execute an operation on them, and
+   produce a single value.  The operands might represent multiple data, as is
+   the case with the <a href="#t_vector">vector</a> data type.  The result value
+   has the same type as its operands.</p>
+
 <p>There are several different binary operators:</p>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="i_add">'<tt>add</tt>' Instruction</a>
@@ -2507,39 +2826,42 @@ The result value has the same type as its operands.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
-  &lt;result&gt; = add &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = add &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
+  &lt;result&gt; = add nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = add nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = add nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>add</tt>' instruction returns the sum of its two operands.</p>
 
 <h5>Arguments:</h5>
-
-<p>The two arguments to the '<tt>add</tt>' instruction must be <a
- href="#t_integer">integer</a> or
- <a href="#t_vector">vector</a> of integer values. Both arguments must
- have identical types.</p>
+<p>The two arguments to the '<tt>add</tt>' instruction must
+   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+   integer values. Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the integer sum of the two operands.</p>
 
-<p>If the sum has unsigned overflow, the result returned is the
-mathematical result modulo 2<sup>n</sup>, where n is the bit width of
-the result.</p>
+<p>If the sum has unsigned overflow, the result returned is the mathematical
+   result modulo 2<sup>n</sup>, where n is the bit width of the result.</p>
 
-<p>Because LLVM integers use a two's complement representation, this
-instruction is appropriate for both signed and unsigned integers.</p>
+<p>Because LLVM integers use a two's complement representation, this instruction
+   is appropriate for both signed and unsigned integers.</p>
 
-<h5>Example:</h5>
+<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
+   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
+   <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
+   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
 
+<h5>Example:</h5>
 <pre>
   &lt;result&gt; = add i32 4, %var          <i>; yields {i32}:result = 4 + %var</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
@@ -2548,31 +2870,28 @@ instruction is appropriate for both signed and unsigned integers.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = fadd &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>fadd</tt>' instruction returns the sum of its two operands.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>fadd</tt>' instruction must be
-<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-floating point values. Both arguments must have identical types.</p>
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values. Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the floating point sum of the two operands.</p>
 
 <h5>Example:</h5>
-
 <pre>
   &lt;result&gt; = fadd float 4.0, %var          <i>; yields {float}:result = 4.0 + %var</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
    <a name="i_sub">'<tt>sub</tt>' Instruction</a>
@@ -2581,42 +2900,47 @@ floating point values. Both arguments must have identical types.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
-  &lt;result&gt; = sub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>sub</tt>' instruction returns the difference of its two
-operands.</p>
+   operands.</p>
 
 <p>Note that the '<tt>sub</tt>' instruction is used to represent the
-'<tt>neg</tt>' instruction present in most other intermediate 
-representations.</p>
+   '<tt>neg</tt>' instruction present in most other intermediate
+   representations.</p>
 
 <h5>Arguments:</h5>
-
-<p>The two arguments to the '<tt>sub</tt>' instruction must be <a
- href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
- integer values.  Both arguments must have identical types.</p>
+<p>The two arguments to the '<tt>sub</tt>' instruction must
+   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+   integer values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the integer difference of the two operands.</p>
 
 <p>If the difference has unsigned overflow, the result returned is the
-mathematical result modulo 2<sup>n</sup>, where n is the bit width of
-the result.</p>
+   mathematical result modulo 2<sup>n</sup>, where n is the bit width of the
+   result.</p>
+
+<p>Because LLVM integers use a two's complement representation, this instruction
+   is appropriate for both signed and unsigned integers.</p>
 
-<p>Because LLVM integers use a two's complement representation, this
-instruction is appropriate for both signed and unsigned integers.</p>
+<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
+   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
+   <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
+   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
 
 <h5>Example:</h5>
 <pre>
   &lt;result&gt; = sub i32 4, %var          <i>; yields {i32}:result = 4 - %var</i>
   &lt;result&gt; = sub i32 0, %val          <i>; yields {i32}:result = -%var</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -2627,28 +2951,24 @@ instruction is appropriate for both signed and unsigned integers.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = fsub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>fsub</tt>' instruction returns the difference of its two
-operands.</p>
+   operands.</p>
 
 <p>Note that the '<tt>fsub</tt>' instruction is used to represent the
-'<tt>fneg</tt>' instruction present in most other intermediate
-representations.</p>
+   '<tt>fneg</tt>' instruction present in most other intermediate
+   representations.</p>
 
 <h5>Arguments:</h5>
-
-<p>The two arguments to the '<tt>fsub</tt>' instruction must be <a
- <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
- of floating point values.  Both arguments must have identical types.</p>
+<p>The two arguments to the '<tt>fsub</tt>' instruction must be
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the floating point difference of the two operands.</p>
 
 <h5>Example:</h5>
@@ -2656,6 +2976,7 @@ representations.</p>
   &lt;result&gt; = fsub float 4.0, %var           <i>; yields {float}:result = 4.0 - %var</i>
   &lt;result&gt; = fsub float -0.0, %val          <i>; yields {float}:result = -%var</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -2666,34 +2987,45 @@ representations.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = mul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = mul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
+  &lt;result&gt; = mul nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = mul nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = mul nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The  '<tt>mul</tt>' instruction returns the product of its two
-operands.</p>
+<p>The '<tt>mul</tt>' instruction returns the product of its two operands.</p>
 
 <h5>Arguments:</h5>
-
-<p>The two arguments to the '<tt>mul</tt>' instruction must be <a
-href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+<p>The two arguments to the '<tt>mul</tt>' instruction must
+   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+   integer values.  Both arguments must have identical types.</p>
  
 <h5>Semantics:</h5>
-
 <p>The value produced is the integer product of the two operands.</p>
 
-<p>If the result of the multiplication has unsigned overflow,
-the result returned is the mathematical result modulo 
-2<sup>n</sup>, where n is the bit width of the result.</p>
-<p>Because LLVM integers use a two's complement representation, and the
-result is the same width as the operands, this instruction returns the
-correct result for both signed and unsigned integers.  If a full product
-(e.g. <tt>i32</tt>x<tt>i32</tt>-><tt>i64</tt>) is needed, the operands
-should be sign-extended or zero-extended as appropriate to the
-width of the full product.</p>
+<p>If the result of the multiplication has unsigned overflow, the result
+   returned is the mathematical result modulo 2<sup>n</sup>, where n is the bit
+   width of the result.</p>
+
+<p>Because LLVM integers use a two's complement representation, and the result
+   is the same width as the operands, this instruction returns the correct
+   result for both signed and unsigned integers.  If a full product
+   (e.g. <tt>i32</tt>x<tt>i32</tt>-><tt>i64</tt>) is needed, the operands should
+   be sign-extended or zero-extended as appropriate to the width of the full
+   product.</p>
+
+<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
+   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
+   <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
+   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = mul i32 4, %var          <i>; yields {i32}:result = 4 * %var</i>
+<pre>
+  &lt;result&gt; = mul i32 4, %var          <i>; yields {i32}:result = 4 * %var</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -2704,140 +3036,170 @@ width of the full product.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The  '<tt>fmul</tt>' instruction returns the product of its two
-operands.</p>
+<p>The '<tt>fmul</tt>' instruction returns the product of its two operands.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>fmul</tt>' instruction must be
-<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
-of floating point values.  Both arguments must have identical types.</p>
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the floating point product of the two operands.</p>
 
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
+<pre>
+  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_udiv">'<tt>udiv</tt>' Instruction
 </a></div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>udiv</tt>' instruction returns the quotient of its two
-operands.</p>
+<p>The '<tt>udiv</tt>' instruction returns the quotient of its two operands.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>udiv</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the unsigned integer quotient of the two operands.</p>
+
 <p>Note that unsigned integer division and signed integer division are distinct
-operations; for signed integer division, use '<tt>sdiv</tt>'.</p>
+   operations; for signed integer division, use '<tt>sdiv</tt>'.</p>
+
 <p>Division by zero leads to undefined behavior.</p>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = udiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
+<pre>
+  &lt;result&gt; = udiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_sdiv">'<tt>sdiv</tt>' Instruction
 </a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = sdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = sdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = sdiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>The '<tt>sdiv</tt>' instruction returns the quotient of its two
-operands.</p>
+<p>The '<tt>sdiv</tt>' instruction returns the quotient of its two operands.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>sdiv</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-<p>The value produced is the signed integer quotient of the two operands rounded towards zero.</p>
+<p>The value produced is the signed integer quotient of the two operands rounded
+   towards zero.</p>
+
 <p>Note that signed integer division and unsigned integer division are distinct
-operations; for unsigned integer division, use '<tt>udiv</tt>'.</p>
+   operations; for unsigned integer division, use '<tt>udiv</tt>'.</p>
+
 <p>Division by zero leads to undefined behavior. Overflow also leads to
-undefined behavior; this is a rare case, but can occur, for example,
-by doing a 32-bit division of -2147483648 by -1.</p>
+   undefined behavior; this is a rare case, but can occur, for example, by doing
+   a 32-bit division of -2147483648 by -1.</p>
+
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>sdiv</tt> is undefined if the result would be rounded or if overflow
+   would occur.</p>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = sdiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
+<pre>
+  &lt;result&gt; = sdiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_fdiv">'<tt>fdiv</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
 <pre>
   &lt;result&gt; = fdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
-<h5>Overview:</h5>
 
-<p>The '<tt>fdiv</tt>' instruction returns the quotient of its two
-operands.</p>
+<h5>Overview:</h5>
+<p>The '<tt>fdiv</tt>' instruction returns the quotient of its two operands.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>fdiv</tt>' instruction must be
-<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
-of floating point values.  Both arguments must have identical types.</p>
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The value produced is the floating point quotient of the two operands.</p>
 
 <h5>Example:</h5>
-
 <pre>
   &lt;result&gt; = fdiv float 4.0, %var          <i>; yields {float}:result = 4.0 / %var</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_urem">'<tt>urem</tt>' Instruction</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = urem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = urem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>urem</tt>' instruction returns the remainder from the
-unsigned division of its two arguments.</p>
+<p>The '<tt>urem</tt>' instruction returns the remainder from the unsigned
+   division of its two arguments.</p>
+
 <h5>Arguments:</h5>
 <p>The two arguments to the '<tt>urem</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
 <h5>Semantics:</h5>
 <p>This instruction returns the unsigned integer <i>remainder</i> of a division.
-This instruction always performs an unsigned division to get the remainder.</p>
+   This instruction always performs an unsigned division to get the
+   remainder.</p>
+
 <p>Note that unsigned integer remainder and signed integer remainder are
-distinct operations; for signed integer remainder, use '<tt>srem</tt>'.</p>
+   distinct operations; for signed integer remainder, use '<tt>srem</tt>'.</p>
+
 <p>Taking the remainder of a division by zero leads to undefined behavior.</p>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = urem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
+<pre>
+  &lt;result&gt; = urem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
 </pre>
 
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="i_srem">'<tt>srem</tt>' Instruction</a>
@@ -2846,47 +3208,48 @@ distinct operations; for signed integer remainder, use '<tt>srem</tt>'.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = srem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>The '<tt>srem</tt>' instruction returns the remainder from the
-signed division of its two operands. This instruction can also take
-<a href="#t_vector">vector</a> versions of the values in which case
-the elements must be integers.</p>
+<p>The '<tt>srem</tt>' instruction returns the remainder from the signed
+   division of its two operands. This instruction can also take
+   <a href="#t_vector">vector</a> versions of the values in which case the
+   elements must be integers.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>srem</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>This instruction returns the <i>remainder</i> of a division (where the result
-has the same sign as the dividend, <tt>op1</tt>), not the <i>modulo</i> 
-operator (where the result has the same sign as the divisor, <tt>op2</tt>) of 
-a value.  For more information about the difference, see <a
- href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
-Math Forum</a>. For a table of how this is implemented in various languages,
-please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
-Wikipedia: modulo operation</a>.</p>
+   has the same sign as the dividend, <tt>op1</tt>), not the <i>modulo</i>
+   operator (where the result has the same sign as the divisor, <tt>op2</tt>) of
+   a value.  For more information about the difference,
+   see <a href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
+   Math Forum</a>. For a table of how this is implemented in various languages,
+   please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
+   Wikipedia: modulo operation</a>.</p>
+
 <p>Note that signed integer remainder and unsigned integer remainder are
-distinct operations; for unsigned integer remainder, use '<tt>urem</tt>'.</p>
+   distinct operations; for unsigned integer remainder, use '<tt>urem</tt>'.</p>
+
 <p>Taking the remainder of a division by zero leads to undefined behavior.
-Overflow also leads to undefined behavior; this is a rare case, but can occur,
-for example, by taking the remainder of a 32-bit division of -2147483648 by -1.
-(The remainder doesn't actually overflow, but this rule lets srem be 
-implemented using instructions that return both the result of the division
-and the remainder.)</p>
+   Overflow also leads to undefined behavior; this is a rare case, but can
+   occur, for example, by taking the remainder of a 32-bit division of
+   -2147483648 by -1.  (The remainder doesn't actually overflow, but this rule
+   lets srem be implemented using instructions that return both the result of
+   the division and the remainder.)</p>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = srem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
+<pre>
+  &lt;result&gt; = srem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
 </pre>
 
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="i_frem">'<tt>frem</tt>' Instruction</a> </div>
@@ -2894,99 +3257,110 @@ and the remainder.)</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = frem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = frem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>frem</tt>' instruction returns the remainder from the
-division of its two operands.</p>
+<p>The '<tt>frem</tt>' instruction returns the remainder from the division of
+   its two operands.</p>
+
 <h5>Arguments:</h5>
 <p>The two arguments to the '<tt>frem</tt>' instruction must be
-<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
-of floating point values.  Both arguments must have identical types.</p>
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
-<p>This instruction returns the <i>remainder</i> of a division.
-The remainder has the same sign as the dividend.</p>
+<p>This instruction returns the <i>remainder</i> of a division.  The remainder
+   has the same sign as the dividend.</p>
 
 <h5>Example:</h5>
-
 <pre>
   &lt;result&gt; = frem float 4.0, %var          <i>; yields {float}:result = 4.0 % %var</i>
 </pre>
+
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> <a name="bitwiseops">Bitwise Binary
 Operations</a> </div>
+
 <div class="doc_text">
-<p>Bitwise binary operators are used to do various forms of
-bit-twiddling in a program.  They are generally very efficient
-instructions and can commonly be strength reduced from other
-instructions.  They require two operands of the same type, execute an operation on them,
-and produce a single value.  The resulting value is the same type as its operands.</p>
+
+<p>Bitwise binary operators are used to do various forms of bit-twiddling in a
+   program.  They are generally very efficient instructions and can commonly be
+   strength reduced from other instructions.  They require two operands of the
+   same type, execute an operation on them, and produce a single value.  The
+   resulting value is the same type as its operands.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_shl">'<tt>shl</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>The '<tt>shl</tt>' instruction returns the first operand shifted to
-the left a specified number of bits.</p>
+<p>The '<tt>shl</tt>' instruction returns the first operand shifted to the left
+   a specified number of bits.</p>
 
 <h5>Arguments:</h5>
-
-<p>Both arguments to the '<tt>shl</tt>' instruction must be the same <a
- href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer 
-type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
+<p>Both arguments to the '<tt>shl</tt>' instruction must be the
+    same <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+    integer type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
  
 <h5>Semantics:</h5>
+<p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod
+   2<sup>n</sup>, where <tt>n</tt> is the width of the result.  If <tt>op2</tt>
+   is (statically or dynamically) negative or equal to or larger than the number
+   of bits in <tt>op1</tt>, the result is undefined.  If the arguments are
+   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
+   shift amount in <tt>op2</tt>.</p>
 
-<p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod 2<sup>n</sup>,
-where n is the width of the result.  If <tt>op2</tt> is (statically or dynamically) negative or
-equal to or larger than the number of bits in <tt>op1</tt>, the result is undefined.
-If the arguments are vectors, each vector element of <tt>op1</tt> is shifted by the
-corresponding shift amount in <tt>op2</tt>.</p>
-
-<h5>Example:</h5><pre>
+<h5>Example:</h5>
+<pre>
   &lt;result&gt; = shl i32 4, %var   <i>; yields {i32}: 4 &lt;&lt; %var</i>
   &lt;result&gt; = shl i32 4, 2      <i>; yields {i32}: 16</i>
   &lt;result&gt; = shl i32 1, 10     <i>; yields {i32}: 1024</i>
   &lt;result&gt; = shl i32 1, 32     <i>; undefined</i>
   &lt;result&gt; = shl &lt;2 x i32&gt; &lt; i32 1, i32 1&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 2, i32 4&gt;</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_lshr">'<tt>lshr</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>lshr</tt>' instruction (logical shift right) returns the first 
-operand shifted to the right a specified number of bits with zero fill.</p>
+<p>The '<tt>lshr</tt>' instruction (logical shift right) returns the first
+   operand shifted to the right a specified number of bits with zero fill.</p>
 
 <h5>Arguments:</h5>
 <p>Both arguments to the '<tt>lshr</tt>' instruction must be the same 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer 
-type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   type. '<tt>op2</tt>' is treated as an unsigned value.</p>
 
 <h5>Semantics:</h5>
-
 <p>This instruction always performs a logical shift right operation. The most
-significant bits of the result will be filled with zero bits after the 
-shift.  If <tt>op2</tt> is (statically or dynamically) equal to or larger than
-the number of bits in <tt>op1</tt>, the result is undefined. If the arguments are
-vectors, each vector element of <tt>op1</tt> is shifted by the corresponding shift
-amount in <tt>op2</tt>.</p>
+   significant bits of the result will be filled with zero bits after the shift.
+   If <tt>op2</tt> is (statically or dynamically) equal to or larger than the
+   number of bits in <tt>op1</tt>, the result is undefined. If the arguments are
+   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
+   shift amount in <tt>op2</tt>.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -2997,6 +3371,7 @@ amount in <tt>op2</tt>.</p>
   &lt;result&gt; = lshr i32 1, 32  <i>; undefined</i>
   &lt;result&gt; = lshr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0x7FFFFFFF, i32 1&gt;</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3005,25 +3380,27 @@ Instruction</a> </div>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>ashr</tt>' instruction (arithmetic shift right) returns the first 
-operand shifted to the right a specified number of bits with sign extension.</p>
+<p>The '<tt>ashr</tt>' instruction (arithmetic shift right) returns the first
+   operand shifted to the right a specified number of bits with sign
+   extension.</p>
 
 <h5>Arguments:</h5>
 <p>Both arguments to the '<tt>ashr</tt>' instruction must be the same 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer 
-type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
 
 <h5>Semantics:</h5>
-<p>This instruction always performs an arithmetic shift right operation, 
-The most significant bits of the result will be filled with the sign bit 
-of <tt>op1</tt>.  If <tt>op2</tt> is (statically or dynamically) equal to or
-larger than the number of bits in <tt>op1</tt>, the result is undefined. If the
-arguments are vectors, each vector element of <tt>op1</tt> is shifted by the
-corresponding shift amount in <tt>op2</tt>.</p>
+<p>This instruction always performs an arithmetic shift right operation, The
+   most significant bits of the result will be filled with the sign bit
+   of <tt>op1</tt>.  If <tt>op2</tt> is (statically or dynamically) equal to or
+   larger than the number of bits in <tt>op1</tt>, the result is undefined. If
+   the arguments are vectors, each vector element of <tt>op1</tt> is shifted by
+   the corresponding shift amount in <tt>op2</tt>.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -3034,6 +3411,7 @@ corresponding shift amount in <tt>op2</tt>.</p>
   &lt;result&gt; = ashr i32 1, 32  <i>; undefined</i>
   &lt;result&gt; = ashr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 3&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 -1, i32 0&gt;</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3043,26 +3421,22 @@ Instruction</a> </div>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = and &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>The '<tt>and</tt>' instruction returns the bitwise logical and of
-its two operands.</p>
+<p>The '<tt>and</tt>' instruction returns the bitwise logical and of its two
+   operands.</p>
 
 <h5>Arguments:</h5>
-
 <p>The two arguments to the '<tt>and</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
 <p>The truth table used for the '<tt>and</tt>' instruction is:</p>
-<p> </p>
-<div>
+
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
@@ -3092,7 +3466,7 @@ values.  Both arguments must have identical types.</p>
     </tr>
   </tbody>
 </table>
-</div>
+
 <h5>Example:</h5>
 <pre>
   &lt;result&gt; = and i32 4, %var         <i>; yields {i32}:result = 4 &amp; %var</i>
@@ -3102,22 +3476,26 @@ values.  Both arguments must have identical types.</p>
 </div>
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_or">'<tt>or</tt>' Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = or &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = or &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>or</tt>' instruction returns the bitwise logical inclusive
-or of its two operands.</p>
-<h5>Arguments:</h5>
+<p>The '<tt>or</tt>' instruction returns the bitwise logical inclusive or of its
+   two operands.</p>
 
+<h5>Arguments:</h5>
 <p>The two arguments to the '<tt>or</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
 <h5>Semantics:</h5>
 <p>The truth table used for the '<tt>or</tt>' instruction is:</p>
-<p> </p>
-<div>
+
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
@@ -3147,34 +3525,40 @@ values.  Both arguments must have identical types.</p>
     </tr>
   </tbody>
 </table>
-</div>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = or i32 4, %var         <i>; yields {i32}:result = 4 | %var</i>
+<pre>
+  &lt;result&gt; = or i32 4, %var         <i>; yields {i32}:result = 4 | %var</i>
   &lt;result&gt; = or i32 15, 40          <i>; yields {i32}:result = 47</i>
   &lt;result&gt; = or i32 4, 8            <i>; yields {i32}:result = 12</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_xor">'<tt>xor</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = xor &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+<pre>
+  &lt;result&gt; = xor &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>xor</tt>' instruction returns the bitwise logical exclusive
-or of its two operands.  The <tt>xor</tt> is used to implement the
-"one's complement" operation, which is the "~" operator in C.</p>
+<p>The '<tt>xor</tt>' instruction returns the bitwise logical exclusive or of
+   its two operands.  The <tt>xor</tt> is used to implement the "one's
+   complement" operation, which is the "~" operator in C.</p>
+
 <h5>Arguments:</h5>
 <p>The two arguments to the '<tt>xor</tt>' instruction must be 
-<a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-values.  Both arguments must have identical types.</p>
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
-
 <p>The truth table used for the '<tt>xor</tt>' instruction is:</p>
-<p> </p>
-<div>
+
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
@@ -3204,14 +3588,15 @@ values.  Both arguments must have identical types.</p>
     </tr>
   </tbody>
 </table>
-</div>
-<p> </p>
+
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = xor i32 4, %var         <i>; yields {i32}:result = 4 ^ %var</i>
+<pre>
+  &lt;result&gt; = xor i32 4, %var         <i>; yields {i32}:result = 4 ^ %var</i>
   &lt;result&gt; = xor i32 15, 40          <i>; yields {i32}:result = 39</i>
   &lt;result&gt; = xor i32 4, 8            <i>; yields {i32}:result = 12</i>
   &lt;result&gt; = xor i32 %V, -1          <i>; yields {i32}:result = ~%V</i>
 </pre>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -3222,11 +3607,11 @@ values.  Both arguments must have identical types.</p>
 <div class="doc_text">
 
 <p>LLVM supports several instructions to represent vector operations in a
-target-independent manner.  These instructions cover the element-access and
-vector-specific operations needed to process vectors effectively.  While LLVM
-does directly support these vector operations, many sophisticated algorithms
-will want to use target-specific intrinsics to take full advantage of a specific
-target.</p>
+   target-independent manner.  These instructions cover the element-access and
+   vector-specific operations needed to process vectors effectively.  While LLVM
+   does directly support these vector operations, many sophisticated algorithms
+   will want to use target-specific intrinsics to take full advantage of a
+   specific target.</p>
 
 </div>
 
@@ -3238,43 +3623,33 @@ target.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = extractelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, i32 &lt;idx&gt;    <i>; yields &lt;ty&gt;</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>extractelement</tt>' instruction extracts a single scalar
-element from a vector at a specified index.
-</p>
+<p>The '<tt>extractelement</tt>' instruction extracts a single scalar element
+   from a vector at a specified index.</p>
 
 
 <h5>Arguments:</h5>
-
-<p>
-The first operand of an '<tt>extractelement</tt>' instruction is a
-value of <a href="#t_vector">vector</a> type.  The second operand is
-an index indicating the position from which to extract the element.
-The index may be a variable.</p>
+<p>The first operand of an '<tt>extractelement</tt>' instruction is a value
+   of <a href="#t_vector">vector</a> type.  The second operand is an index
+   indicating the position from which to extract the element.  The index may be
+   a variable.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The result is a scalar of the same type as the element type of
-<tt>val</tt>.  Its value is the value at position <tt>idx</tt> of
-<tt>val</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
-results are undefined.
-</p>
+<p>The result is a scalar of the same type as the element type of
+   <tt>val</tt>.  Its value is the value at position <tt>idx</tt> of
+   <tt>val</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
+   results are undefined.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %result = extractelement &lt;4 x i32&gt; %vec, i32 0    <i>; yields i32</i>
 </pre>
-</div>
 
+</div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
@@ -3284,42 +3659,32 @@ results are undefined.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = insertelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, i32 &lt;idx&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>insertelement</tt>' instruction inserts a scalar
-element into a vector at a specified index.
-</p>
-
+<p>The '<tt>insertelement</tt>' instruction inserts a scalar element into a
+   vector at a specified index.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The first operand of an '<tt>insertelement</tt>' instruction is a
-value of <a href="#t_vector">vector</a> type.  The second operand is a
-scalar value whose type must equal the element type of the first
-operand.  The third operand is an index indicating the position at
-which to insert the value.  The index may be a variable.</p>
+<p>The first operand of an '<tt>insertelement</tt>' instruction is a value
+   of <a href="#t_vector">vector</a> type.  The second operand is a scalar value
+   whose type must equal the element type of the first operand.  The third
+   operand is an index indicating the position at which to insert the value.
+   The index may be a variable.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The result is a vector of the same type as <tt>val</tt>.  Its
-element values are those of <tt>val</tt> except at position
-<tt>idx</tt>, where it gets the value <tt>elt</tt>.  If <tt>idx</tt>
-exceeds the length of <tt>val</tt>, the results are undefined.
-</p>
+<p>The result is a vector of the same type as <tt>val</tt>.  Its element values
+   are those of <tt>val</tt> except at position <tt>idx</tt>, where it gets the
+   value <tt>elt</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
+   results are undefined.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %result = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0    <i>; yields &lt;4 x i32&gt;</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3330,46 +3695,33 @@ exceeds the length of <tt>val</tt>, the results are undefined.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;m x i32&gt; &lt;mask&gt;    <i>; yields &lt;m x &lt;ty&gt;&gt;</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
-from two input vectors, returning a vector with the same element type as
-the input and length that is the same as the shuffle mask.
-</p>
+<p>The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
+   from two input vectors, returning a vector with the same element type as the
+   input and length that is the same as the shuffle mask.</p>
 
 <h5>Arguments:</h5>
+<p>The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
+   with types that match each other. The third argument is a shuffle mask whose
+   element type is always 'i32'.  The result of the instruction is a vector
+   whose length is the same as the shuffle mask and whose element type is the
+   same as the element type of the first two operands.</p>
 
-<p>
-The first two operands of a '<tt>shufflevector</tt>' instruction are vectors 
-with types that match each other. The third argument is a shuffle mask whose
-element type is always 'i32'.  The result of the instruction is a vector whose
-length is the same as the shuffle mask and whose element type is the same as
-the element type of the first two operands.
-</p>
-
-<p>
-The shuffle mask operand is required to be a constant vector with either
-constant integer or undef values.
-</p>
+<p>The shuffle mask operand is required to be a constant vector with either
+   constant integer or undef values.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The elements of the two input vectors are numbered from left to right across
-both of the vectors.  The shuffle mask operand specifies, for each element of
-the result vector, which element of the two input vectors the result element
-gets.  The element selector may be undef (meaning "don't care") and the second
-operand may be undef if performing a shuffle from only one vector.
-</p>
+<p>The elements of the two input vectors are numbered from left to right across
+   both of the vectors.  The shuffle mask operand specifies, for each element of
+   the result vector, which element of the two input vectors the result element
+   gets.  The element selector may be undef (meaning "don't care") and the
+   second operand may be undef if performing a shuffle from only one vector.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
                           &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
@@ -3380,8 +3732,8 @@ operand may be undef if performing a shuffle from only one vector.
   %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
                           &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
 </pre>
-</div>
 
+</div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> 
@@ -3390,8 +3742,7 @@ operand may be undef if performing a shuffle from only one vector.
 
 <div class="doc_text">
 
-<p>LLVM supports several instructions for working with aggregate values.
-</p>
+<p>LLVM supports several instructions for working with aggregate values.</p>
 
 </div>
 
@@ -3403,43 +3754,31 @@ operand may be undef if performing a shuffle from only one vector.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = extractvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;idx&gt;{, &lt;idx&gt;}*
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>extractvalue</tt>' instruction extracts the value of a struct field
-or array element from an aggregate value.
-</p>
-
+<p>The '<tt>extractvalue</tt>' instruction extracts the value of a struct field
+   or array element from an aggregate value.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The first operand of an '<tt>extractvalue</tt>' instruction is a
-value of <a href="#t_struct">struct</a> or <a href="#t_array">array</a>
-type.  The operands are constant indices to specify which value to extract
-in a similar manner as indices in a
-'<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
-</p>
+<p>The first operand of an '<tt>extractvalue</tt>' instruction is a value
+   of <a href="#t_struct">struct</a> or <a href="#t_array">array</a> type.  The
+   operands are constant indices to specify which value to extract in a similar
+   manner as indices in a
+   '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The result is the value at the position in the aggregate specified by
-the index operands.
-</p>
+<p>The result is the value at the position in the aggregate specified by the
+   index operands.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %result = extractvalue {i32, float} %agg, 0    <i>; yields i32</i>
 </pre>
-</div>
 
+</div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
@@ -3449,46 +3788,35 @@ the index operands.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;val&gt;, &lt;idx&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>insertvalue</tt>' instruction inserts a value
-into a struct field or array element in an aggregate.
-</p>
+<p>The '<tt>insertvalue</tt>' instruction inserts a value into a struct field or
+   array element in an aggregate.</p>
 
 
 <h5>Arguments:</h5>
-
-<p>
-The first operand of an '<tt>insertvalue</tt>' instruction is a
-value of <a href="#t_struct">struct</a> or <a href="#t_array">array</a> type.
-The second operand is a first-class value to insert.
-The following operands are constant indices
-indicating the position at which to insert the value in a similar manner as
-indices in a
-'<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
-The value to insert must have the same type as the value identified
-by the indices.
-</p>
+<p>The first operand of an '<tt>insertvalue</tt>' instruction is a value
+   of <a href="#t_struct">struct</a> or <a href="#t_array">array</a> type.  The
+   second operand is a first-class value to insert.  The following operands are
+   constant indices indicating the position at which to insert the value in a
+   similar manner as indices in a
+   '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.  The
+   value to insert must have the same type as the value identified by the
+   indices.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The result is an aggregate of the same type as <tt>val</tt>.  Its
-value is that of <tt>val</tt> except that the value at the position
-specified by the indices is that of <tt>elt</tt>.
-</p>
+<p>The result is an aggregate of the same type as <tt>val</tt>.  Its value is
+   that of <tt>val</tt> except that the value at the position specified by the
+   indices is that of <tt>elt</tt>.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %result = insertvalue {i32, float} %agg, i32 1, 0    <i>; yields {i32, float}</i>
 </pre>
+
 </div>
 
 
@@ -3499,10 +3827,10 @@ specified by the indices is that of <tt>elt</tt>.
 
 <div class="doc_text">
 
-<p>A key design point of an SSA-based representation is how it
-represents memory.  In LLVM, no memory locations are in SSA form, which
-makes things very simple.  This section describes how to read, write,
-allocate, and free memory in LLVM.</p>
+<p>A key design point of an SSA-based representation is how it represents
+   memory.  In LLVM, no memory locations are in SSA form, which makes things
+   very simple.  This section describes how to read, write, allocate, and free
+   memory in LLVM.</p>
 
 </div>
 
@@ -3514,39 +3842,33 @@ allocate, and free memory in LLVM.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = malloc &lt;type&gt;[, i32 &lt;NumElements&gt;][, align &lt;alignment&gt;]     <i>; yields {type*}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>The '<tt>malloc</tt>' instruction allocates memory from the system
-heap and returns a pointer to it. The object is always allocated in the generic 
-address space (address space zero).</p>
+<p>The '<tt>malloc</tt>' instruction allocates memory from the system heap and
+   returns a pointer to it. The object is always allocated in the generic
+   address space (address space zero).</p>
 
 <h5>Arguments:</h5>
-
 <p>The '<tt>malloc</tt>' instruction allocates
-<tt>sizeof(&lt;type&gt;)*NumElements</tt>
-bytes of memory from the operating system and returns a pointer of the
-appropriate type to the program.  If "NumElements" is specified, it is the
-number of elements allocated, otherwise "NumElements" is defaulted to be one.
-If a constant alignment is specified, the value result of the allocation is
-guaranteed to be aligned to at least that boundary.  If not specified, or if
-zero, the target can choose to align the allocation on any convenient boundary
-compatible with the type.</p>
+   <tt>sizeof(&lt;type&gt;)*NumElements</tt> bytes of memory from the operating
+   system and returns a pointer of the appropriate type to the program.  If
+   "NumElements" is specified, it is the number of elements allocated, otherwise
+   "NumElements" is defaulted to be one.  If a constant alignment is specified,
+   the value result of the allocation is guaranteed to be aligned to at least
+   that boundary.  If not specified, or if zero, the target can choose to align
+   the allocation on any convenient boundary compatible with the type.</p>
 
 <p>'<tt>type</tt>' must be a sized type.</p>
 
 <h5>Semantics:</h5>
-
-<p>Memory is allocated using the system "<tt>malloc</tt>" function, and
-a pointer is returned.  The result of a zero byte allocation is undefined.  The
-result is null if there is insufficient memory available.</p>
+<p>Memory is allocated using the system "<tt>malloc</tt>" function, and a
+   pointer is returned.  The result of a zero byte allocation is undefined.  The
+   result is null if there is insufficient memory available.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %array  = malloc [4 x i8]                     <i>; yields {[%4 x i8]*}:array</i>
 
@@ -3557,8 +3879,7 @@ result is null if there is insufficient memory available.</p>
   %array4 = malloc i32, align 1024              <i>; yields {i32*}:array4</i>
 </pre>
 
-<p>Note that the code generator does not yet respect the
-   alignment value.</p>
+<p>Note that the code generator does not yet respect the alignment value.</p>
 
 </div>
 
@@ -3570,34 +3891,29 @@ result is null if there is insufficient memory available.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   free &lt;type&gt; &lt;value&gt;                           <i>; yields {void}</i>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>The '<tt>free</tt>' instruction returns memory back to the unused
-memory heap to be reallocated in the future.</p>
+<p>The '<tt>free</tt>' instruction returns memory back to the unused memory heap
+   to be reallocated in the future.</p>
 
 <h5>Arguments:</h5>
-
-<p>'<tt>value</tt>' shall be a pointer value that points to a value
-that was allocated with the '<tt><a href="#i_malloc">malloc</a></tt>'
-instruction.</p>
+<p>'<tt>value</tt>' shall be a pointer value that points to a value that was
+   allocated with the '<tt><a href="#i_malloc">malloc</a></tt>' instruction.</p>
 
 <h5>Semantics:</h5>
-
-<p>Access to the memory pointed to by the pointer is no longer defined
-after this instruction executes.  If the pointer is null, the operation
-is a noop.</p>
+<p>Access to the memory pointed to by the pointer is no longer defined after
+   this instruction executes.  If the pointer is null, the operation is a
+   noop.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %array  = <a href="#i_malloc">malloc</a> [4 x i8]                     <i>; yields {[4 x i8]*}:array</i>
             free   [4 x i8]* %array
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3608,137 +3924,150 @@ is a noop.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = alloca &lt;type&gt;[, i32 &lt;NumElements&gt;][, align &lt;alignment&gt;]     <i>; yields {type*}:result</i>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>alloca</tt>' instruction allocates memory on the stack frame of the
-currently executing function, to be automatically released when this function
-returns to its caller. The object is always allocated in the generic address 
-space (address space zero).</p>
+   currently executing function, to be automatically released when this function
+   returns to its caller. The object is always allocated in the generic address
+   space (address space zero).</p>
 
 <h5>Arguments:</h5>
-
-<p>The '<tt>alloca</tt>' instruction allocates <tt>sizeof(&lt;type&gt;)*NumElements</tt>
-bytes of memory on the runtime stack, returning a pointer of the
-appropriate type to the program.  If "NumElements" is specified, it is the
-number of elements allocated, otherwise "NumElements" is defaulted to be one.
-If a constant alignment is specified, the value result of the allocation is
-guaranteed to be aligned to at least that boundary.  If not specified, or if
-zero, the target can choose to align the allocation on any convenient boundary
-compatible with the type.</p>
+<p>The '<tt>alloca</tt>' instruction
+   allocates <tt>sizeof(&lt;type&gt;)*NumElements</tt> bytes of memory on the
+   runtime stack, returning a pointer of the appropriate type to the program.
+   If "NumElements" is specified, it is the number of elements allocated,
+   otherwise "NumElements" is defaulted to be one.  If a constant alignment is
+   specified, the value result of the allocation is guaranteed to be aligned to
+   at least that boundary.  If not specified, or if zero, the target can choose
+   to align the allocation on any convenient boundary compatible with the
+   type.</p>
 
 <p>'<tt>type</tt>' may be any sized type.</p>
 
 <h5>Semantics:</h5>
-
 <p>Memory is allocated; a pointer is returned.  The operation is undefined if
-there is insufficient stack space for the allocation.  '<tt>alloca</tt>'d
-memory is automatically released when the function returns.  The '<tt>alloca</tt>'
-instruction is commonly used to represent automatic variables that must
-have an address available.  When the function returns (either with the <tt><a
- href="#i_ret">ret</a></tt> or <tt><a href="#i_unwind">unwind</a></tt>
-instructions), the memory is reclaimed.  Allocating zero bytes
-is legal, but the result is undefined.</p>
+   there is insufficient stack space for the allocation.  '<tt>alloca</tt>'d
+   memory is automatically released when the function returns.  The
+   '<tt>alloca</tt>' instruction is commonly used to represent automatic
+   variables that must have an address available.  When the function returns
+   (either with the <tt><a href="#i_ret">ret</a></tt>
+   or <tt><a href="#i_unwind">unwind</a></tt> instructions), the memory is
+   reclaimed.  Allocating zero bytes is legal, but the result is undefined.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %ptr = alloca i32                             <i>; yields {i32*}:ptr</i>
   %ptr = alloca i32, i32 4                      <i>; yields {i32*}:ptr</i>
   %ptr = alloca i32, i32 4, align 1024          <i>; yields {i32*}:ptr</i>
   %ptr = alloca i32, align 1024                 <i>; yields {i32*}:ptr</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_load">'<tt>load</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]<br>  &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]<br></pre>
+<pre>
+  &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]
+  &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]
+</pre>
+
 <h5>Overview:</h5>
 <p>The '<tt>load</tt>' instruction is used to read from memory.</p>
+
 <h5>Arguments:</h5>
-<p>The argument to the '<tt>load</tt>' instruction specifies the memory
-address from which to load.  The pointer must point to a <a
- href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
-marked as <tt>volatile</tt>, then the optimizer is not allowed to modify
-the number or order of execution of this <tt>load</tt> with other
-volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
-instructions. </p>
-<p>
-The optional constant "align" argument specifies the alignment of the operation
-(that is, the alignment of the memory address). A value of 0 or an
-omitted "align" argument means that the operation has the preferential
-alignment for the target. It is the responsibility of the code emitter
-to ensure that the alignment information is correct. Overestimating
-the alignment results in an undefined behavior. Underestimating the
-alignment may produce less efficient code. An alignment of 1 is always
-safe.
-</p>
+<p>The argument to the '<tt>load</tt>' instruction specifies the memory address
+   from which to load.  The pointer must point to
+   a <a href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
+   marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
+   number or order of execution of this <tt>load</tt> with other
+   volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
+   instructions. </p>
+
+<p>The optional constant "align" argument specifies the alignment of the
+   operation (that is, the alignment of the memory address). A value of 0 or an
+   omitted "align" argument means that the operation has the preferential
+   alignment for the target. It is the responsibility of the code emitter to
+   ensure that the alignment information is correct. Overestimating the
+   alignment results in an undefined behavior. Underestimating the alignment may
+   produce less efficient code. An alignment of 1 is always safe.</p>
+
 <h5>Semantics:</h5>
-<p>The location of memory pointed to is loaded.  If the value being loaded
-is of scalar type then the number of bytes read does not exceed the minimum
-number of bytes needed to hold all bits of the type.  For example, loading an
-<tt>i24</tt> reads at most three bytes.  When loading a value of a type like
-<tt>i20</tt> with a size that is not an integral number of bytes, the result
-is undefined if the value was not originally written using a store of the
-same type.</p>
+<p>The location of memory pointed to is loaded.  If the value being loaded is of
+   scalar type then the number of bytes read does not exceed the minimum number
+   of bytes needed to hold all bits of the type.  For example, loading an
+   <tt>i24</tt> reads at most three bytes.  When loading a value of a type like
+   <tt>i20</tt> with a size that is not an integral number of bytes, the result
+   is undefined if the value was not originally written using a store of the
+   same type.</p>
+
 <h5>Examples:</h5>
-<pre>  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
-  <a
- href="#i_store">store</a> i32 3, i32* %ptr                          <i>; yields {void}</i>
+<pre>
+  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
+  <a href="#i_store">store</a> i32 3, i32* %ptr                          <i>; yields {void}</i>
   %val = load i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
 </pre>
+
 </div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_store">'<tt>store</tt>'
 Instruction</a> </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]                   <i>; yields {void}</i>
+<pre>
+  store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]                   <i>; yields {void}</i>
   volatile store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]          <i>; yields {void}</i>
 </pre>
+
 <h5>Overview:</h5>
 <p>The '<tt>store</tt>' instruction is used to write to memory.</p>
+
 <h5>Arguments:</h5>
-<p>There are two arguments to the '<tt>store</tt>' instruction: a value
-to store and an address at which to store it.  The type of the '<tt>&lt;pointer&gt;</tt>'
-operand must be a pointer to the <a href="#t_firstclass">first class</a> type
-of the '<tt>&lt;value&gt;</tt>'
-operand. If the <tt>store</tt> is marked as <tt>volatile</tt>, then the
-optimizer is not allowed to modify the number or order of execution of
-this <tt>store</tt> with other volatile <tt>load</tt> and <tt><a
- href="#i_store">store</a></tt> instructions.</p>
-<p>
-The optional constant "align" argument specifies the alignment of the operation
-(that is, the alignment of the memory address). A value of 0 or an
-omitted "align" argument means that the operation has the preferential
-alignment for the target. It is the responsibility of the code emitter
-to ensure that the alignment information is correct. Overestimating
-the alignment results in an undefined behavior. Underestimating the
-alignment may produce less efficient code. An alignment of 1 is always
-safe.
-</p>
+<p>There are two arguments to the '<tt>store</tt>' instruction: a value to store
+   and an address at which to store it.  The type of the
+   '<tt>&lt;pointer&gt;</tt>' operand must be a pointer to
+   the <a href="#t_firstclass">first class</a> type of the
+   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked
+   as <tt>volatile</tt>, then the optimizer is not allowed to modify the number
+   or order of execution of this <tt>store</tt> with other
+   volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
+   instructions.</p>
+
+<p>The optional constant "align" argument specifies the alignment of the
+   operation (that is, the alignment of the memory address). A value of 0 or an
+   omitted "align" argument means that the operation has the preferential
+   alignment for the target. It is the responsibility of the code emitter to
+   ensure that the alignment information is correct. Overestimating the
+   alignment results in an undefined behavior. Underestimating the alignment may
+   produce less efficient code. An alignment of 1 is always safe.</p>
+
 <h5>Semantics:</h5>
-<p>The contents of memory are updated to contain '<tt>&lt;value&gt;</tt>'
-at the location specified by the '<tt>&lt;pointer&gt;</tt>' operand.
-If '<tt>&lt;value&gt;</tt>' is of scalar type then the number of bytes
-written does not exceed the minimum number of bytes needed to hold all
-bits of the type.  For example, storing an <tt>i24</tt> writes at most
-three bytes.  When writing a value of a type like <tt>i20</tt> with a
-size that is not an integral number of bytes, it is unspecified what
-happens to the extra bits that do not belong to the type, but they will
-typically be overwritten.</p>
+<p>The contents of memory are updated to contain '<tt>&lt;value&gt;</tt>' at the
+   location specified by the '<tt>&lt;pointer&gt;</tt>' operand.  If
+   '<tt>&lt;value&gt;</tt>' is of scalar type then the number of bytes written
+   does not exceed the minimum number of bytes needed to hold all bits of the
+   type.  For example, storing an <tt>i24</tt> writes at most three bytes.  When
+   writing a value of a type like <tt>i20</tt> with a size that is not an
+   integral number of bytes, it is unspecified what happens to the extra bits
+   that do not belong to the type, but they will typically be overwritten.</p>
+
 <h5>Example:</h5>
-<pre>  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
+<pre>
+  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
   store i32 3, i32* %ptr                          <i>; yields {void}</i>
   %val = <a href="#i_load">load</a> i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3747,38 +4076,39 @@ typically be overwritten.</p>
 </div>
 
 <div class="doc_text">
+
 <h5>Syntax:</h5>
 <pre>
   &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>getelementptr</tt>' instruction is used to get the address of a
-subelement of an aggregate data structure. It performs address calculation only
-and does not access memory.</p>
+<p>The '<tt>getelementptr</tt>' instruction is used to get the address of a
+   subelement of an aggregate data structure. It performs address calculation
+   only and does not access memory.</p>
 
 <h5>Arguments:</h5>
-
 <p>The first argument is always a pointer, and forms the basis of the
-calculation. The remaining arguments are indices, that indicate which of the
-elements of the aggregate object are indexed. The interpretation of each index
-is dependent on the type being indexed into. The first index always indexes the
-pointer value given as the first argument, the second index indexes a value of
-the type pointed to (not necessarily the value directly pointed to, since the
-first index can be non-zero), etc. The first type indexed into must be a pointer
-value, subsequent types can be arrays, vectors and structs. Note that subsequent
-types being indexed into can never be pointers, since that would require loading
-the pointer before continuing calculation.</p>
+   calculation. The remaining arguments are indices that indicate which of the
+   elements of the aggregate object are indexed. The interpretation of each
+   index is dependent on the type being indexed into. The first index always
+   indexes the pointer value given as the first argument, the second index
+   indexes a value of the type pointed to (not necessarily the value directly
+   pointed to, since the first index can be non-zero), etc. The first type
+   indexed into must be a pointer value, subsequent types can be arrays, vectors
+   and structs. Note that subsequent types being indexed into can never be
+   pointers, since that would require loading the pointer before continuing
+   calculation.</p>
 
 <p>The type of each index argument depends on the type it is indexing into.
-When indexing into a (packed) structure, only <tt>i32</tt> integer
-<b>constants</b> are allowed.  When indexing into an array, pointer or vector,
-integers of any width are allowed (also non-constants).</p>
+   When indexing into a (optionally packed) structure, only <tt>i32</tt> integer
+   <b>constants</b> are allowed.  When indexing into an array, pointer or
+   vector, integers of any width are allowed, and they are not required to be
+   constant.</p>
 
-<p>For example, let's consider a C code fragment and how it gets
-compiled to LLVM:</p>
+<p>For example, let's consider a C code fragment and how it gets compiled to
+   LLVM:</p>
 
 <div class="doc_code">
 <pre>
@@ -3806,7 +4136,7 @@ int *foo(struct ST *s) {
 %RT = <a href="#namedtypes">type</a> { i8 , [10 x [20 x i32]], i8  }
 %ST = <a href="#namedtypes">type</a> { i32, double, %RT }
 
-define i32* %foo(%ST* %s) {
+define i32* @foo(%ST* %s) {
 entry:
   %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
   ret i32* %reg
@@ -3815,23 +4145,22 @@ entry:
 </div>
 
 <h5>Semantics:</h5>
-
 <p>In the example above, the first index is indexing into the '<tt>%ST*</tt>'
-type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
-}</tt>' type, a structure.  The second index indexes into the third element of
-the structure, yielding a '<tt>%RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]],
-i8  }</tt>' type, another structure.  The third index indexes into the second
-element of the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an
-array.  The two dimensions of the array are subscripted into, yielding an
-'<tt>i32</tt>' type.  The '<tt>getelementptr</tt>' instruction returns a pointer
-to this element, thus computing a value of '<tt>i32*</tt>' type.</p>
+   type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
+   }</tt>' type, a structure.  The second index indexes into the third element
+   of the structure, yielding a '<tt>%RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]],
+   i8 }</tt>' type, another structure.  The third index indexes into the second
+   element of the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an
+   array.  The two dimensions of the array are subscripted into, yielding an
+   '<tt>i32</tt>' type.  The '<tt>getelementptr</tt>' instruction returns a
+   pointer to this element, thus computing a value of '<tt>i32*</tt>' type.</p>
 
-<p>Note that it is perfectly legal to index partially through a
-structure, returning a pointer to an inner element.  Because of this,
-the LLVM code for the given testcase is equivalent to:</p>
+<p>Note that it is perfectly legal to index partially through a structure,
+   returning a pointer to an inner element.  Because of this, the LLVM code for
+   the given testcase is equivalent to:</p>
 
 <pre>
-  define i32* %foo(%ST* %s) {
+  define i32* @foo(%ST* %s) {
     %t1 = getelementptr %ST* %s, i32 1                        <i>; yields %ST*:%t1</i>
     %t2 = getelementptr %ST* %t1, i32 0, i32 2                <i>; yields %RT*:%t2</i>
     %t3 = getelementptr %RT* %t2, i32 0, i32 1                <i>; yields [10 x [20 x i32]]*:%t3</i>
@@ -3841,20 +4170,27 @@ the LLVM code for the given testcase is equivalent to:</p>
   }
 </pre>
 
-<p>Note that it is undefined to access an array out of bounds: array
-and pointer indexes must always be within the defined bounds of the
-array type when accessed with an instruction that dereferences the
-pointer (e.g. a load or store instruction).  The one exception for
-this rule is zero length arrays.  These arrays are defined to be
-accessible as variable length arrays, which requires access beyond the
-zero'th element.</p>
+<p>If the <tt>inbounds</tt> keyword is present, the result value of the
+   <tt>getelementptr</tt> is undefined if the base pointer is not an
+   <i>in bounds</i> address of an allocated object, or if any of the addresses
+   that would be formed by successive addition of the offsets implied by the
+   indices to the base address with infinitely precise arithmetic are not an
+   <i>in bounds</i> address of that allocated object.
+   The <i>in bounds</i> addresses for an allocated object are all the addresses
+   that point into the object, plus the address one byte past the end.</p>
 
-<p>The getelementptr instruction is often confusing.  For some more insight
-into how it works, see <a href="GetElementPtr.html">the getelementptr 
-FAQ</a>.</p>
+<p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
+   the base address with silently-wrapping two's complement arithmetic, and
+   the result value of the <tt>getelementptr</tt> may be outside the object
+   pointed to by the base pointer. The result value may not necessarily be
+   used to access memory though, even if it happens to point into allocated
+   storage. See the <a href="#pointeraliasing">Pointer Aliasing Rules</a>
+   section for more information.</p>
 
-<h5>Example:</h5>
+<p>The getelementptr instruction is often confusing.  For some more insight into
+   how it works, see <a href="GetElementPtr.html">the getelementptr FAQ</a>.</p>
 
+<h5>Example:</h5>
 <pre>
     <i>; yields [12 x i8]*:aptr</i>
     %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
@@ -3865,15 +4201,19 @@ FAQ</a>.</p>
     <i>; yields i32*:iptr</i>
     %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
 </pre>
+
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> <a name="convertops">Conversion Operations</a>
 </div>
+
 <div class="doc_text">
+
 <p>The instructions in this category are the conversion instructions (casting)
-which all take a single operand and a type. They perform various bit conversions
-on the operand.</p>
+   which all take a single operand and a type. They perform various bit
+   conversions on the operand.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3888,24 +4228,22 @@ on the operand.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>
-The '<tt>trunc</tt>' instruction truncates its operand to the type <tt>ty2</tt>.
-</p>
+<p>The '<tt>trunc</tt>' instruction truncates its operand to the
+   type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
-<p>
-The '<tt>trunc</tt>' instruction takes a <tt>value</tt> to trunc, which must 
-be an <a href="#t_integer">integer</a> type, and a type that specifies the size 
-and type of the result, which must be an <a href="#t_integer">integer</a> 
-type. The bit size of <tt>value</tt> must be larger than the bit size of 
-<tt>ty2</tt>. Equal sized types are not allowed.</p>
+<p>The '<tt>trunc</tt>' instruction takes a <tt>value</tt> to trunc, which must
+   be an <a href="#t_integer">integer</a> type, and a type that specifies the
+   size and type of the result, which must be
+   an <a href="#t_integer">integer</a> type. The bit size of <tt>value</tt> must
+   be larger than the bit size of <tt>ty2</tt>. Equal sized types are not
+   allowed.</p>
 
 <h5>Semantics:</h5>
-<p>
-The '<tt>trunc</tt>' instruction truncates the high order bits in <tt>value</tt>
-and converts the remaining bits to <tt>ty2</tt>. Since the source size must be
-larger than the destination size, <tt>trunc</tt> cannot be a <i>no-op cast</i>.
-It will always truncate bits.</p>
+<p>The '<tt>trunc</tt>' instruction truncates the high order bits
+   in <tt>value</tt> and converts the remaining bits to <tt>ty2</tt>. Since the
+   source size must be larger than the destination size, <tt>trunc</tt> cannot
+   be a <i>no-op cast</i>.  It will always truncate bits.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -3913,6 +4251,7 @@ It will always truncate bits.</p>
   %Y = trunc i32 123 to i1              <i>; yields i1:true</i>
   %Y = trunc i32 122 to i1              <i>; yields i1:false</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3928,19 +4267,19 @@ It will always truncate bits.</p>
 
 <h5>Overview:</h5>
 <p>The '<tt>zext</tt>' instruction zero extends its operand to type 
-<tt>ty2</tt>.</p>
+   <tt>ty2</tt>.</p>
 
 
 <h5>Arguments:</h5>
 <p>The '<tt>zext</tt>' instruction takes a value to cast, which must be of 
-<a href="#t_integer">integer</a> type, and a type to cast it to, which must
-also be of <a href="#t_integer">integer</a> type. The bit size of the
-<tt>value</tt> must be smaller than the bit size of the destination type, 
-<tt>ty2</tt>.</p>
+   <a href="#t_integer">integer</a> type, and a type to cast it to, which must
+   also be of <a href="#t_integer">integer</a> type. The bit size of the
+   <tt>value</tt> must be smaller than the bit size of the destination type, 
+   <tt>ty2</tt>.</p>
 
 <h5>Semantics:</h5>
 <p>The <tt>zext</tt> fills the high order bits of the <tt>value</tt> with zero
-bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
+   bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
 
 <p>When zero extending from i1, the result will always be either 0 or 1.</p>
 
@@ -3949,6 +4288,7 @@ bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
   %X = zext i32 257 to i64              <i>; yields i64:257</i>
   %Y = zext i1 true to i32              <i>; yields i32:1</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3966,18 +4306,16 @@ bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
 <p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
-<p>
-The '<tt>sext</tt>' instruction takes a value to cast, which must be of 
-<a href="#t_integer">integer</a> type, and a type to cast it to, which must
-also be of <a href="#t_integer">integer</a> type.  The bit size of the
-<tt>value</tt> must be smaller than the bit size of the destination type, 
-<tt>ty2</tt>.</p>
+<p>The '<tt>sext</tt>' instruction takes a value to cast, which must be of 
+   <a href="#t_integer">integer</a> type, and a type to cast it to, which must
+   also be of <a href="#t_integer">integer</a> type.  The bit size of the
+   <tt>value</tt> must be smaller than the bit size of the destination type, 
+   <tt>ty2</tt>.</p>
 
 <h5>Semantics:</h5>
-<p>
-The '<tt>sext</tt>' instruction performs a sign extension by copying the sign
-bit (highest order bit) of the <tt>value</tt> until it reaches the bit size of
-the type <tt>ty2</tt>.</p>
+<p>The '<tt>sext</tt>' instruction performs a sign extension by copying the sign
+   bit (highest order bit) of the <tt>value</tt> until it reaches the bit size
+   of the type <tt>ty2</tt>.</p>
 
 <p>When sign extending from i1, the extension always results in -1 or 0.</p>
 
@@ -3986,6 +4324,7 @@ the type <tt>ty2</tt>.</p>
   %X = sext i8  -1 to i16              <i>; yields i16   :65535</i>
   %Y = sext i1 true to i32             <i>; yields i32:-1</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -3996,34 +4335,34 @@ the type <tt>ty2</tt>.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = fptrunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
 </pre>
 
 <h5>Overview:</h5>
 <p>The '<tt>fptrunc</tt>' instruction truncates <tt>value</tt> to type
-<tt>ty2</tt>.</p>
-
+   <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>fptrunc</tt>' instruction takes a <a href="#t_floating">floating
-  point</a> value to cast and a <a href="#t_floating">floating point</a> type to
-cast it to. The size of <tt>value</tt> must be larger than the size of
-<tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a 
-<i>no-op cast</i>.</p>
+   point</a> value to cast and a <a href="#t_floating">floating point</a> type
+   to cast it to. The size of <tt>value</tt> must be larger than the size of
+   <tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a 
+   <i>no-op cast</i>.</p>
 
 <h5>Semantics:</h5>
-<p> The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
-<a href="#t_floating">floating point</a> type to a smaller 
-<a href="#t_floating">floating point</a> type.  If the value cannot fit within 
-the destination type, <tt>ty2</tt>, then the results are undefined.</p>
+<p>The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
+   <a href="#t_floating">floating point</a> type to a smaller 
+   <a href="#t_floating">floating point</a> type.  If the value cannot fit
+   within the destination type, <tt>ty2</tt>, then the results are
+   undefined.</p>
 
 <h5>Example:</h5>
 <pre>
   %X = fptrunc double 123.0 to float         <i>; yields float:123.0</i>
   %Y = fptrunc double 1.0E+300 to float      <i>; yields undefined</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4039,26 +4378,27 @@ the destination type, <tt>ty2</tt>, then the results are undefined.</p>
 
 <h5>Overview:</h5>
 <p>The '<tt>fpext</tt>' extends a floating point <tt>value</tt> to a larger
-floating point value.</p>
+   floating point value.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>fpext</tt>' instruction takes a 
-<a href="#t_floating">floating point</a> <tt>value</tt> to cast, 
-and a <a href="#t_floating">floating point</a> type to cast it to. The source
-type must be smaller than the destination type.</p>
+   <a href="#t_floating">floating point</a> <tt>value</tt> to cast, and
+   a <a href="#t_floating">floating point</a> type to cast it to. The source
+   type must be smaller than the destination type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>fpext</tt>' instruction extends the <tt>value</tt> from a smaller
-<a href="#t_floating">floating point</a> type to a larger 
-<a href="#t_floating">floating point</a> type. The <tt>fpext</tt> cannot be 
-used to make a <i>no-op cast</i> because it always changes bits. Use 
-<tt>bitcast</tt> to make a <i>no-op cast</i> for a floating point cast.</p>
+   <a href="#t_floating">floating point</a> type to a larger
+   <a href="#t_floating">floating point</a> type. The <tt>fpext</tt> cannot be
+   used to make a <i>no-op cast</i> because it always changes bits. Use
+   <tt>bitcast</tt> to make a <i>no-op cast</i> for a floating point cast.</p>
 
 <h5>Example:</h5>
 <pre>
   %X = fpext float 3.1415 to double        <i>; yields double:3.1415</i>
   %Y = fpext float 1.0 to float            <i>; yields float:1.0 (no-op)</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4074,21 +4414,20 @@ used to make a <i>no-op cast</i> because it always changes bits. Use
 
 <h5>Overview:</h5>
 <p>The '<tt>fptoui</tt>' converts a floating point <tt>value</tt> to its
-unsigned integer equivalent of type <tt>ty2</tt>.
-</p>
+   unsigned integer equivalent of type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
-<p>The '<tt>fptoui</tt>' instruction takes a value to cast, which must be a 
-scalar or vector <a href="#t_floating">floating point</a> value, and a type 
-to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> 
-type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
-vector integer type with the same number of elements as <tt>ty</tt></p>
+<p>The '<tt>fptoui</tt>' instruction takes a value to cast, which must be a
+   scalar or vector <a href="#t_floating">floating point</a> value, and a type
+   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
+   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
+   vector integer type with the same number of elements as <tt>ty</tt></p>
 
 <h5>Semantics:</h5>
-<p> The '<tt>fptoui</tt>' instruction converts its 
-<a href="#t_floating">floating point</a> operand into the nearest (rounding
-towards zero) unsigned integer value. If the value cannot fit in <tt>ty2</tt>,
-the results are undefined.</p>
+<p>The '<tt>fptoui</tt>' instruction converts its 
+   <a href="#t_floating">floating point</a> operand into the nearest (rounding
+   towards zero) unsigned integer value. If the value cannot fit
+   in <tt>ty2</tt>, the results are undefined.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -4096,6 +4435,7 @@ the results are undefined.</p>
   %Y = fptoui float 1.0E+300 to i1     <i>; yields undefined:1</i>
   %X = fptoui float 1.04E+17 to i8     <i>; yields undefined:1</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4111,21 +4451,21 @@ the results are undefined.</p>
 
 <h5>Overview:</h5>
 <p>The '<tt>fptosi</tt>' instruction converts 
-<a href="#t_floating">floating point</a> <tt>value</tt> to type <tt>ty2</tt>.
-</p>
+   <a href="#t_floating">floating point</a> <tt>value</tt> to
+   type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
-<p> The '<tt>fptosi</tt>' instruction takes a value to cast, which must be a 
-scalar or vector <a href="#t_floating">floating point</a> value, and a type 
-to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> 
-type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
-vector integer type with the same number of elements as <tt>ty</tt></p>
+<p>The '<tt>fptosi</tt>' instruction takes a value to cast, which must be a
+   scalar or vector <a href="#t_floating">floating point</a> value, and a type
+   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
+   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
+   vector integer type with the same number of elements as <tt>ty</tt></p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>fptosi</tt>' instruction converts its 
-<a href="#t_floating">floating point</a> operand into the nearest (rounding
-towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
-the results are undefined.</p>
+   <a href="#t_floating">floating point</a> operand into the nearest (rounding
+   towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
+   the results are undefined.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -4133,6 +4473,7 @@ the results are undefined.</p>
   %Y = fptosi float 1.0E-247 to i1      <i>; yields undefined:1</i>
   %X = fptosi float 1.04E+17 to i8      <i>; yields undefined:1</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4148,25 +4489,27 @@ the results are undefined.</p>
 
 <h5>Overview:</h5>
 <p>The '<tt>uitofp</tt>' instruction regards <tt>value</tt> as an unsigned
-integer and converts that value to the <tt>ty2</tt> type.</p>
+   integer and converts that value to the <tt>ty2</tt> type.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>uitofp</tt>' instruction takes a value to cast, which must be a
-scalar or vector <a href="#t_integer">integer</a> value, and a type to cast it
-to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a> 
-type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
-floating point type with the same number of elements as <tt>ty</tt></p>
+   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
+   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
+   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
+   floating point type with the same number of elements as <tt>ty</tt></p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>uitofp</tt>' instruction interprets its operand as an unsigned
-integer quantity and converts it to the corresponding floating point value. If
-the value cannot fit in the floating point value, the results are undefined.</p>
+   integer quantity and converts it to the corresponding floating point
+   value. If the value cannot fit in the floating point value, the results are
+   undefined.</p>
 
 <h5>Example:</h5>
 <pre>
   %X = uitofp i32 257 to float         <i>; yields float:257.0</i>
   %Y = uitofp i8 -1 to double          <i>; yields double:255.0</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4181,26 +4524,27 @@ the value cannot fit in the floating point value, the results are undefined.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>sitofp</tt>' instruction regards <tt>value</tt> as a signed
-integer and converts that value to the <tt>ty2</tt> type.</p>
+<p>The '<tt>sitofp</tt>' instruction regards <tt>value</tt> as a signed integer
+   and converts that value to the <tt>ty2</tt> type.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>sitofp</tt>' instruction takes a value to cast, which must be a
-scalar or vector <a href="#t_integer">integer</a> value, and a type to cast it
-to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a> 
-type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
-floating point type with the same number of elements as <tt>ty</tt></p>
+   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
+   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
+   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
+   floating point type with the same number of elements as <tt>ty</tt></p>
 
 <h5>Semantics:</h5>
-<p>The '<tt>sitofp</tt>' instruction interprets its operand as a signed
-integer quantity and converts it to the corresponding floating point value. If
-the value cannot fit in the floating point value, the results are undefined.</p>
+<p>The '<tt>sitofp</tt>' instruction interprets its operand as a signed integer
+   quantity and converts it to the corresponding floating point value. If the
+   value cannot fit in the floating point value, the results are undefined.</p>
 
 <h5>Example:</h5>
 <pre>
   %X = sitofp i32 257 to float         <i>; yields float:257.0</i>
   %Y = sitofp i8 -1 to double          <i>; yields double:-1.0</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4215,28 +4559,29 @@ the value cannot fit in the floating point value, the results are undefined.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to 
-the integer type <tt>ty2</tt>.</p>
+<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to
+   the integer type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which 
-must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
-<tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
+<p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
+   must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
+   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
-<tt>ty2</tt> by interpreting the pointer value as an integer and either 
-truncating or zero extending that value to the size of the integer type. If
-<tt>value</tt> is smaller than <tt>ty2</tt> then a zero extension is done. If
-<tt>value</tt> is larger than <tt>ty2</tt> then a truncation is done. If they
-are the same size, then nothing is done (<i>no-op cast</i>) other than a type
-change.</p>
+   <tt>ty2</tt> by interpreting the pointer value as an integer and either
+   truncating or zero extending that value to the size of the integer type. If
+   <tt>value</tt> is smaller than <tt>ty2</tt> then a zero extension is done. If
+   <tt>value</tt> is larger than <tt>ty2</tt> then a truncation is done. If they
+   are the same size, then nothing is done (<i>no-op cast</i>) other than a type
+   change.</p>
 
 <h5>Example:</h5>
 <pre>
   %X = ptrtoint i32* %X to i8           <i>; yields truncation on 32-bit architecture</i>
   %Y = ptrtoint i32* %x to i64          <i>; yields zero extension on 32-bit architecture</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4251,21 +4596,21 @@ change.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>inttoptr</tt>' instruction converts an integer <tt>value</tt> to 
-a pointer type, <tt>ty2</tt>.</p>
+<p>The '<tt>inttoptr</tt>' instruction converts an integer <tt>value</tt> to a
+   pointer type, <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>inttoptr</tt>' instruction takes an <a href="#t_integer">integer</a>
-value to cast, and a type to cast it to, which must be a 
-<a href="#t_pointer">pointer</a> type.</p>
+   value to cast, and a type to cast it to, which must be a
+   <a href="#t_pointer">pointer</a> type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>inttoptr</tt>' instruction converts <tt>value</tt> to type
-<tt>ty2</tt> by applying either a zero extension or a truncation depending on
-the size of the integer <tt>value</tt>. If <tt>value</tt> is larger than the
-size of a pointer then a truncation is done. If <tt>value</tt> is smaller than
-the size of a pointer then a zero extension is done. If they are the same size,
-nothing is done (<i>no-op cast</i>).</p>
+   <tt>ty2</tt> by applying either a zero extension or a truncation depending on
+   the size of the integer <tt>value</tt>. If <tt>value</tt> is larger than the
+   size of a pointer then a truncation is done. If <tt>value</tt> is smaller
+   than the size of a pointer then a zero extension is done. If they are the
+   same size, nothing is done (<i>no-op cast</i>).</p>
 
 <h5>Example:</h5>
 <pre>
@@ -4273,6 +4618,7 @@ nothing is done (<i>no-op cast</i>).</p>
   %X = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
   %Y = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4287,29 +4633,27 @@ nothing is done (<i>no-op cast</i>).</p>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
-<tt>ty2</tt> without changing any bits.</p>
+   <tt>ty2</tt> without changing any bits.</p>
 
 <h5>Arguments:</h5>
-
-<p>The '<tt>bitcast</tt>' instruction takes a value to cast, which must be 
-a non-aggregate first class value, and a type to cast it to, which must also be
-a non-aggregate <a href="#t_firstclass">first class</a> type. The bit sizes of
-<tt>value</tt>
-and the destination type, <tt>ty2</tt>, must be identical. If the source
-type is a pointer, the destination type must also be a pointer.  This
-instruction supports bitwise conversion of vectors to integers and to vectors
-of other types (as long as they have the same size).</p>
+<p>The '<tt>bitcast</tt>' instruction takes a value to cast, which must be a
+   non-aggregate first class value, and a type to cast it to, which must also be
+   a non-aggregate <a href="#t_firstclass">first class</a> type. The bit sizes
+   of <tt>value</tt> and the destination type, <tt>ty2</tt>, must be
+   identical. If the source type is a pointer, the destination type must also be
+   a pointer.  This instruction supports bitwise conversion of vectors to
+   integers and to vectors of other types (as long as they have the same
+   size).</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
-<tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with 
-this conversion.  The conversion is done as if the <tt>value</tt> had been 
-stored to memory and read back as type <tt>ty2</tt>. Pointer types may only be
-converted to other pointer types with this instruction. To convert pointers to 
-other types, use the <a href="#i_inttoptr">inttoptr</a> or 
-<a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
+   <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
+   this conversion.  The conversion is done as if the <tt>value</tt> had been
+   stored to memory and read back as type <tt>ty2</tt>. Pointer types may only
+   be converted to other pointer types with this instruction. To convert
+   pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
+   <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -4317,31 +4661,40 @@ other types, use the <a href="#i_inttoptr">inttoptr</a> or
   %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
   %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>   
 </pre>
+
 </div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection"> <a name="otherops">Other Operations</a> </div>
+
 <div class="doc_text">
-<p>The instructions in this category are the "miscellaneous"
-instructions, which defy better classification.</p>
+
+<p>The instructions in this category are the "miscellaneous" instructions, which
+   defy better classification.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"><a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
+<pre>
+  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>icmp</tt>' instruction returns a boolean value or
-a vector of boolean values based on comparison
-of its two integer, integer vector, or pointer operands.</p>
+<p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
+   boolean values based on comparison of its two integer, integer vector, or
+   pointer operands.</p>
+
 <h5>Arguments:</h5>
 <p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
-the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:
-</p>
+   the condition code indicating the kind of comparison to perform. It is not a
+   value, just a keyword. The possible condition code are:</p>
+
 <ol>
   <li><tt>eq</tt>: equal</li>
   <li><tt>ne</tt>: not equal </li>
@@ -4354,48 +4707,63 @@ a value, just a keyword. The possible condition code are:
   <li><tt>slt</tt>: signed less than</li>
   <li><tt>sle</tt>: signed less or equal</li>
 </ol>
+
 <p>The remaining two arguments must be <a href="#t_integer">integer</a> or
-<a href="#t_pointer">pointer</a>
-or integer <a href="#t_vector">vector</a> typed.
-They must also be identical types.</p>
+   <a href="#t_pointer">pointer</a> or integer <a href="#t_vector">vector</a>
+   typed.  They must also be identical types.</p>
+
 <h5>Semantics:</h5>
-<p>The '<tt>icmp</tt>' compares <tt>op1</tt> and <tt>op2</tt> according to 
-the condition code given as <tt>cond</tt>. The comparison performed always
-yields either an <a href="#t_primitive"><tt>i1</tt></a> or vector of <tt>i1</tt> result, as follows: 
-</p>
+<p>The '<tt>icmp</tt>' compares <tt>op1</tt> and <tt>op2</tt> according to the
+   condition code given as <tt>cond</tt>. The comparison performed always yields
+   either an <a href="#t_integer"><tt>i1</tt></a> or vector of <tt>i1</tt>
+   result, as follows:</p>
+
 <ol>
   <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal, 
-  <tt>false</tt> otherwise. No sign interpretation is necessary or performed.
-  </li>
+      <tt>false</tt> otherwise. No sign interpretation is necessary or
+      performed.</li>
+
   <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal, 
-  <tt>false</tt> otherwise. No sign interpretation is necessary or performed.</li>
+      <tt>false</tt> otherwise. No sign interpretation is necessary or
+      performed.</li>
+
   <li><tt>ugt</tt>: interprets the operands as unsigned values and yields
-  <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
   <li><tt>uge</tt>: interprets the operands as unsigned values and yields
-  <tt>true</tt> if <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is greater than or equal
+      to <tt>op2</tt>.</li>
+
   <li><tt>ult</tt>: interprets the operands as unsigned values and yields
-  <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
   <li><tt>ule</tt>: interprets the operands as unsigned values and yields
-  <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+
   <li><tt>sgt</tt>: interprets the operands as signed values and yields
-  <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
   <li><tt>sge</tt>: interprets the operands as signed values and yields
-  <tt>true</tt> if <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is greater than or equal
+      to <tt>op2</tt>.</li>
+
   <li><tt>slt</tt>: interprets the operands as signed values and yields
-  <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
   <li><tt>sle</tt>: interprets the operands as signed values and yields
-  <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
 </ol>
+
 <p>If the operands are <a href="#t_pointer">pointer</a> typed, the pointer
-values are compared as if they were integers.</p>
-<p>If the operands are integer vectors, then they are compared
-element by element. The result is an <tt>i1</tt> vector with
-the same number of elements as the values being compared.
-Otherwise, the result is an <tt>i1</tt>.
-</p>
+   values are compared as if they were integers.</p>
+
+<p>If the operands are integer vectors, then they are compared element by
+   element. The result is an <tt>i1</tt> vector with the same number of elements
+   as the values being compared.  Otherwise, the result is an <tt>i1</tt>.</p>
 
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = icmp eq i32 4, 5          <i>; yields: result=false</i>
+<pre>
+  &lt;result&gt; = icmp eq i32 4, 5          <i>; yields: result=false</i>
   &lt;result&gt; = icmp ne float* %X, %X     <i>; yields: result=false</i>
   &lt;result&gt; = icmp ult i16  4, 5        <i>; yields: result=true</i>
   &lt;result&gt; = icmp sgt i16  4, 5        <i>; yields: result=false</i>
@@ -4411,25 +4779,30 @@ Otherwise, the result is an <tt>i1</tt>.
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"><a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
+<pre>
+  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
 </pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>fcmp</tt>' instruction returns a boolean value
-or vector of boolean values based on comparison
-of its operands.</p>
-<p>
-If the operands are floating point scalars, then the result
-type is a boolean (<a href="#t_primitive"><tt>i1</tt></a>).
-</p>
-<p>If the operands are floating point vectors, then the result type
-is a vector of boolean with the same number of elements as the
-operands being compared.</p>
+<p>The '<tt>fcmp</tt>' instruction returns a boolean value or vector of boolean
+   values based on comparison of its operands.</p>
+
+<p>If the operands are floating point scalars, then the result type is a boolean
+(<a href="#t_integer"><tt>i1</tt></a>).</p>
+
+<p>If the operands are floating point vectors, then the result type is a vector
+   of boolean with the same number of elements as the operands being
+   compared.</p>
+
 <h5>Arguments:</h5>
 <p>The '<tt>fcmp</tt>' instruction takes three operands. The first operand is
-the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:</p>
+   the condition code indicating the kind of comparison to perform. It is not a
+   value, just a keyword. The possible condition code are:</p>
+
 <ol>
   <li><tt>false</tt>: no comparison, always returns false</li>
   <li><tt>oeq</tt>: ordered and equal</li>
@@ -4448,52 +4821,71 @@ a value, just a keyword. The possible condition code are:</p>
   <li><tt>uno</tt>: unordered (either nans)</li>
   <li><tt>true</tt>: no comparison, always returns true</li>
 </ol>
+
 <p><i>Ordered</i> means that neither operand is a QNAN while
-<i>unordered</i> means that either operand may be a QNAN.</p>
-<p>Each of <tt>val1</tt> and <tt>val2</tt> arguments must be
-either a <a href="#t_floating">floating point</a> type
-or a <a href="#t_vector">vector</a> of floating point type.
-They must have identical types.</p>
+   <i>unordered</i> means that either operand may be a QNAN.</p>
+
+<p>Each of <tt>val1</tt> and <tt>val2</tt> arguments must be either
+   a <a href="#t_floating">floating point</a> type or
+   a <a href="#t_vector">vector</a> of floating point type.  They must have
+   identical types.</p>
+
 <h5>Semantics:</h5>
 <p>The '<tt>fcmp</tt>' instruction compares <tt>op1</tt> and <tt>op2</tt>
-according to the condition code given as <tt>cond</tt>.
-If the operands are vectors, then the vectors are compared
-element by element.
-Each comparison performed 
-always yields an <a href="#t_primitive">i1</a> result, as follows:</p>
+   according to the condition code given as <tt>cond</tt>.  If the operands are
+   vectors, then the vectors are compared element by element.  Each comparison
+   performed always yields an <a href="#t_integer">i1</a> result, as
+   follows:</p>
+
 <ol>
   <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
+
   <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
-  <tt>op1</tt> is equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
+
   <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-  <tt>op1</tt> is greather than <tt>op2</tt>.</li>
+      <tt>op1</tt> is greather than <tt>op2</tt>.</li>
+
   <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
-  <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+
   <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
-  <tt>op1</tt> is less than <tt>op2</tt>.</li>
+      <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
   <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
-  <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+
   <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
-  <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
+
   <li><tt>ord</tt>: yields <tt>true</tt> if both operands are not a QNAN.</li>
+
   <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or 
-  <tt>op1</tt> is equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
+
   <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or 
-  <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
   <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or 
-  <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+
   <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or 
-  <tt>op1</tt> is less than <tt>op2</tt>.</li>
+      <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
   <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or 
-  <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+
   <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or 
-  <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
+      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
+
   <li><tt>uno</tt>: yields <tt>true</tt> if either operand is a QNAN.</li>
+
   <li><tt>true</tt>: always yields <tt>true</tt>, regardless of operands.</li>
 </ol>
 
 <h5>Example:</h5>
-<pre>  &lt;result&gt; = fcmp oeq float 4.0, 5.0    <i>; yields: result=false</i>
+<pre>
+  &lt;result&gt; = fcmp oeq float 4.0, 5.0    <i>; yields: result=false</i>
   &lt;result&gt; = fcmp one float 4.0, 5.0    <i>; yields: result=true</i>
   &lt;result&gt; = fcmp olt float 4.0, 5.0    <i>; yields: result=true</i>
   &lt;result&gt; = fcmp ueq double 1.0, 2.0   <i>; yields: result=false</i>
@@ -4506,143 +4898,41 @@ always yields an <a href="#t_primitive">i1</a> result, as follows:</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="i_vicmp">'<tt>vicmp</tt>' Instruction</a>
-</div>
-<div class="doc_text">
-<h5>Syntax:</h5>
-<pre>  &lt;result&gt; = vicmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-<h5>Overview:</h5>
-<p>The '<tt>vicmp</tt>' instruction returns an integer vector value based on
-element-wise comparison of its two integer vector operands.</p>
-<h5>Arguments:</h5>
-<p>The '<tt>vicmp</tt>' instruction takes three operands. The first operand is
-the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:</p>
-<ol>
-  <li><tt>eq</tt>: equal</li>
-  <li><tt>ne</tt>: not equal </li>
-  <li><tt>ugt</tt>: unsigned greater than</li>
-  <li><tt>uge</tt>: unsigned greater or equal</li>
-  <li><tt>ult</tt>: unsigned less than</li>
-  <li><tt>ule</tt>: unsigned less or equal</li>
-  <li><tt>sgt</tt>: signed greater than</li>
-  <li><tt>sge</tt>: signed greater or equal</li>
-  <li><tt>slt</tt>: signed less than</li>
-  <li><tt>sle</tt>: signed less or equal</li>
-</ol>
-<p>The remaining two arguments must be <a href="#t_vector">vector</a> or
-<a href="#t_integer">integer</a> typed. They must also be identical types.</p>
-<h5>Semantics:</h5>
-<p>The '<tt>vicmp</tt>' instruction compares <tt>op1</tt> and <tt>op2</tt>
-according to the condition code given as <tt>cond</tt>. The comparison yields a 
-<a href="#t_vector">vector</a> of <a href="#t_integer">integer</a> result, of
-identical type as the values being compared.  The most significant bit in each
-element is 1 if the element-wise comparison evaluates to true, and is 0
-otherwise.  All other bits of the result are undefined.  The condition codes
-are evaluated identically to the <a href="#i_icmp">'<tt>icmp</tt>'
-instruction</a>.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = vicmp eq &lt;2 x i32&gt; &lt; i32 4, i32 0&gt;, &lt; i32 5, i32 0&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0, i32 -1 &gt;</i>
-  &lt;result&gt; = vicmp ult &lt;2 x i8 &gt; &lt; i8 1, i8 2&gt;, &lt; i8 2, i8 2 &gt;        <i>; yields: result=&lt;2 x i8&gt; &lt; i8 -1, i8 0 &gt;</i>
-</pre>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="i_vfcmp">'<tt>vfcmp</tt>' Instruction</a>
-</div>
-<div class="doc_text">
-<h5>Syntax:</h5>
-<pre>  &lt;result&gt; = vfcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;</pre>
-<h5>Overview:</h5>
-<p>The '<tt>vfcmp</tt>' instruction returns an integer vector value based on
-element-wise comparison of its two floating point vector operands.  The output
-elements have the same width as the input elements.</p>
-<h5>Arguments:</h5>
-<p>The '<tt>vfcmp</tt>' instruction takes three operands. The first operand is
-the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:</p>
-<ol>
-  <li><tt>false</tt>: no comparison, always returns false</li>
-  <li><tt>oeq</tt>: ordered and equal</li>
-  <li><tt>ogt</tt>: ordered and greater than </li>
-  <li><tt>oge</tt>: ordered and greater than or equal</li>
-  <li><tt>olt</tt>: ordered and less than </li>
-  <li><tt>ole</tt>: ordered and less than or equal</li>
-  <li><tt>one</tt>: ordered and not equal</li>
-  <li><tt>ord</tt>: ordered (no nans)</li>
-  <li><tt>ueq</tt>: unordered or equal</li>
-  <li><tt>ugt</tt>: unordered or greater than </li>
-  <li><tt>uge</tt>: unordered or greater than or equal</li>
-  <li><tt>ult</tt>: unordered or less than </li>
-  <li><tt>ule</tt>: unordered or less than or equal</li>
-  <li><tt>une</tt>: unordered or not equal</li>
-  <li><tt>uno</tt>: unordered (either nans)</li>
-  <li><tt>true</tt>: no comparison, always returns true</li>
-</ol>
-<p>The remaining two arguments must be <a href="#t_vector">vector</a> of 
-<a href="#t_floating">floating point</a> typed. They must also be identical
-types.</p>
-<h5>Semantics:</h5>
-<p>The '<tt>vfcmp</tt>' instruction compares <tt>op1</tt> and <tt>op2</tt>
-according to  the condition code given as <tt>cond</tt>. The comparison yields a 
-<a href="#t_vector">vector</a> of <a href="#t_integer">integer</a> result, with
-an identical number of elements as the values being compared, and each element
-having identical with to the width of the floating point elements. The most 
-significant bit in each element is 1 if the element-wise comparison evaluates to
-true, and is 0 otherwise.  All other bits of the result are undefined.  The
-condition codes are evaluated identically to the 
-<a href="#i_fcmp">'<tt>fcmp</tt>' instruction</a>.</p>
-
-<h5>Example:</h5>
-<pre>
-  <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0, i32 -1 &gt;</i>
-  &lt;result&gt; = vfcmp oeq &lt;2 x float&gt; &lt; float 4, float 0 &gt;, &lt; float 5, float 0 &gt;
-  
-  <i>; yields: result=&lt;2 x i64&gt; &lt; i64 -1, i64 0 &gt;</i>
-  &lt;result&gt; = vfcmp ult &lt;2 x double&gt; &lt; double 1, double 2 &gt;, &lt; double 2, double 2&gt;
-</pre>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
   <a name="i_phi">'<tt>phi</tt>' Instruction</a>
 </div>
 
 <div class="doc_text">
 
 <h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = phi &lt;ty&gt; [ &lt;val0&gt;, &lt;label0&gt;], ...
+</pre>
 
-<pre>  &lt;result&gt; = phi &lt;ty&gt; [ &lt;val0&gt;, &lt;label0&gt;], ...<br></pre>
 <h5>Overview:</h5>
-<p>The '<tt>phi</tt>' instruction is used to implement the &#966; node in
-the SSA graph representing the function.</p>
-<h5>Arguments:</h5>
-
-<p>The type of the incoming values is specified with the first type
-field. After this, the '<tt>phi</tt>' instruction takes a list of pairs
-as arguments, with one pair for each predecessor basic block of the
-current block.  Only values of <a href="#t_firstclass">first class</a>
-type may be used as the value arguments to the PHI node.  Only labels
-may be used as the label arguments.</p>
-
-<p>There must be no non-phi instructions between the start of a basic
-block and the PHI instructions: i.e. PHI instructions must be first in
-a basic block.</p>
+<p>The '<tt>phi</tt>' instruction is used to implement the &#966; node in the
+   SSA graph representing the function.</p>
 
-<p>For the purposes of the SSA form, the use of each incoming value is
-deemed to occur on the edge from the corresponding predecessor block
-to the current block (but after any definition of an '<tt>invoke</tt>'
-instruction's return value on the same edge).</p>
+<h5>Arguments:</h5>
+<p>The type of the incoming values is specified with the first type field. After
+   this, the '<tt>phi</tt>' instruction takes a list of pairs as arguments, with
+   one pair for each predecessor basic block of the current block.  Only values
+   of <a href="#t_firstclass">first class</a> type may be used as the value
+   arguments to the PHI node.  Only labels may be used as the label
+   arguments.</p>
+
+<p>There must be no non-phi instructions between the start of a basic block and
+   the PHI instructions: i.e. PHI instructions must be first in a basic
+   block.</p>
+
+<p>For the purposes of the SSA form, the use of each incoming value is deemed to
+   occur on the edge from the corresponding predecessor block to the current
+   block (but after any definition of an '<tt>invoke</tt>' instruction's return
+   value on the same edge).</p>
 
 <h5>Semantics:</h5>
-
 <p>At runtime, the '<tt>phi</tt>' instruction logically takes on the value
-specified by the pair corresponding to the predecessor basic block that executed
-just prior to the current block.</p>
+   specified by the pair corresponding to the predecessor basic block that
+   executed just prior to the current block.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -4651,6 +4941,7 @@ Loop:       ; Infinite loop that counts from 0 on up...
   %nextindvar = add i32 %indvar, 1
   br label %Loop
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4661,7 +4952,6 @@ Loop:       ; Infinite loop that counts from 0 on up...
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;result&gt; = select <i>selty</i> &lt;cond&gt;, &lt;ty&gt; &lt;val1&gt;, &lt;ty&gt; &lt;val2&gt;             <i>; yields ty</i>
 
@@ -4669,38 +4959,25 @@ Loop:       ; Infinite loop that counts from 0 on up...
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>select</tt>' instruction is used to choose one value based on a
-condition, without branching.
-</p>
+<p>The '<tt>select</tt>' instruction is used to choose one value based on a
+   condition, without branching.</p>
 
 
 <h5>Arguments:</h5>
-
-<p>
-The '<tt>select</tt>' instruction requires an 'i1' value or
-a vector of 'i1' values indicating the
-condition, and two values of the same <a href="#t_firstclass">first class</a>
-type.  If the val1/val2 are vectors and
-the condition is a scalar, then entire vectors are selected, not
-individual elements.
-</p>
+<p>The '<tt>select</tt>' instruction requires an 'i1' value or a vector of 'i1'
+   values indicating the condition, and two values of the
+   same <a href="#t_firstclass">first class</a> type.  If the val1/val2 are
+   vectors and the condition is a scalar, then entire vectors are selected, not
+   individual elements.</p>
 
 <h5>Semantics:</h5>
+<p>If the condition is an i1 and it evaluates to 1, the instruction returns the
+   first value argument; otherwise, it returns the second value argument.</p>
 
-<p>
-If the condition is an i1 and it evaluates to 1, the instruction returns the first
-value argument; otherwise, it returns the second value argument.
-</p>
-<p>
-If the condition is a vector of i1, then the value arguments must
-be vectors of the same size, and the selection is done element 
-by element.
-</p>
+<p>If the condition is a vector of i1, then the value arguments must be vectors
+   of the same size, and the selection is done element by element.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %X = select i1 true, i8 17, i8 42          <i>; yields i8:17</i>
 </pre>
@@ -4710,7 +4987,6 @@ by element.
 
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="i_call">'<tt>call</tt>' Instruction</a>
@@ -4724,75 +5000,60 @@ by element.
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>call</tt>' instruction represents a simple function call.</p>
 
 <h5>Arguments:</h5>
-
 <p>This instruction requires several arguments:</p>
 
 <ol>
-  <li>
-    <p>The optional "tail" marker indicates whether the callee function accesses
-    any allocas or varargs in the caller.  If the "tail" marker is present, the
-    function call is eligible for tail call optimization.  Note that calls may
-    be marked "tail" even if they do not occur before a <a
-    href="#i_ret"><tt>ret</tt></a> instruction.</p>
-  </li>
-  <li>
-    <p>The optional "cconv" marker indicates which <a href="#callingconv">calling
-    convention</a> the call should use.  If none is specified, the call defaults
-    to using C calling conventions.</p>
-  </li>
+  <li>The optional "tail" marker indicates whether the callee function accesses
+      any allocas or varargs in the caller.  If the "tail" marker is present,
+      the function call is eligible for tail call optimization.  Note that calls
+      may be marked "tail" even if they do not occur before
+      a <a href="#i_ret"><tt>ret</tt></a> instruction.</li>
 
-  <li>
-    <p>The optional <a href="#paramattrs">Parameter Attributes</a> list for
-    return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', 
-    and '<tt>inreg</tt>' attributes are valid here.</p>
-  </li>
+  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
+      convention</a> the call should use.  If none is specified, the call
+      defaults to using C calling conventions.</li>
 
-  <li>
-    <p>'<tt>ty</tt>': the type of the call instruction itself which is also
-    the type of the return value.  Functions that return no value are marked
-    <tt><a href="#t_void">void</a></tt>.</p>
-  </li>
-  <li>
-    <p>'<tt>fnty</tt>': shall be the signature of the pointer to function
-    value being invoked.  The argument types must match the types implied by
-    this signature.  This type can be omitted if the function is not varargs
-    and if the function type does not return a pointer to a function.</p>
-  </li>
-  <li>
-    <p>'<tt>fnptrval</tt>': An LLVM value containing a pointer to a function to
-    be invoked. In most cases, this is a direct function invocation, but
-    indirect <tt>call</tt>s are just as possible, calling an arbitrary pointer
-    to function value.</p>
-  </li>
-  <li>
-    <p>'<tt>function args</tt>': argument list whose types match the
-    function signature argument types. All arguments must be of 
-    <a href="#t_firstclass">first class</a> type. If the function signature 
-    indicates the function accepts a variable number of arguments, the extra 
-    arguments can be specified.</p>
-  </li>
-  <li> 
-  <p>The optional <a href="#fnattrs">function attributes</a> list. Only
-  '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
-  '<tt>readnone</tt>' attributes are valid here.</p>
-  </li>
+  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
+      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
+      '<tt>inreg</tt>' attributes are valid here.</li>
+
+  <li>'<tt>ty</tt>': the type of the call instruction itself which is also the
+      type of the return value.  Functions that return no value are marked
+      <tt><a href="#t_void">void</a></tt>.</li>
+
+  <li>'<tt>fnty</tt>': shall be the signature of the pointer to function value
+      being invoked.  The argument types must match the types implied by this
+      signature.  This type can be omitted if the function is not varargs and if
+      the function type does not return a pointer to a function.</li>
+
+  <li>'<tt>fnptrval</tt>': An LLVM value containing a pointer to a function to
+      be invoked. In most cases, this is a direct function invocation, but
+      indirect <tt>call</tt>s are just as possible, calling an arbitrary pointer
+      to function value.</li>
+
+  <li>'<tt>function args</tt>': argument list whose types match the function
+      signature argument types. All arguments must be of
+      <a href="#t_firstclass">first class</a> type. If the function signature
+      indicates the function accepts a variable number of arguments, the extra
+      arguments can be specified.</li>
+
+  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
+      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
+      '<tt>readnone</tt>' attributes are valid here.</li>
 </ol>
 
 <h5>Semantics:</h5>
-
-<p>The '<tt>call</tt>' instruction is used to cause control flow to
-transfer to a specified function, with its incoming arguments bound to
-the specified values. Upon a '<tt><a href="#i_ret">ret</a></tt>'
-instruction in the called function, control flow continues with the
-instruction after the function call, and the return value of the
-function is bound to the result argument.</p>
+<p>The '<tt>call</tt>' instruction is used to cause control flow to transfer to
+   a specified function, with its incoming arguments bound to the specified
+   values. Upon a '<tt><a href="#i_ret">ret</a></tt>' instruction in the called
+   function, control flow continues with the instruction after the function
+   call, and the return value of the function is bound to the result
+   argument.</p>
 
 <h5>Example:</h5>
-
 <pre>
   %retval = call i32 @test(i32 %argc)
   call i32 (i8 *, ...)* @printf(i8 * %msg, i32 12, i8 42)      <i>; yields i32</i>
@@ -4808,6 +5069,12 @@ function is bound to the result argument.</p>
   %ZZ = call zeroext i32 @bar()                     <i>; Return value is %zero extended</i>
 </pre>
 
+<p>llvm treats calls to some functions with names and arguments that match the
+standard C99 library as being the C99 library functions, and may perform
+optimizations or generate code for them under that assumption.  This is
+something we'd like to change in the future to provide better support for
+freestanding environments and non-C-based langauges.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -4818,47 +5085,41 @@ function is bound to the result argument.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   &lt;resultval&gt; = va_arg &lt;va_list*&gt; &lt;arglist&gt;, &lt;argty&gt;
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>va_arg</tt>' instruction is used to access arguments passed through
-the "variable argument" area of a function call.  It is used to implement the
-<tt>va_arg</tt> macro in C.</p>
+   the "variable argument" area of a function call.  It is used to implement the
+   <tt>va_arg</tt> macro in C.</p>
 
 <h5>Arguments:</h5>
-
-<p>This instruction takes a <tt>va_list*</tt> value and the type of
-the argument. It returns a value of the specified argument type and
-increments the <tt>va_list</tt> to point to the next argument.  The
-actual type of <tt>va_list</tt> is target specific.</p>
+<p>This instruction takes a <tt>va_list*</tt> value and the type of the
+   argument. It returns a value of the specified argument type and increments
+   the <tt>va_list</tt> to point to the next argument.  The actual type
+   of <tt>va_list</tt> is target specific.</p>
 
 <h5>Semantics:</h5>
-
-<p>The '<tt>va_arg</tt>' instruction loads an argument of the specified
-type from the specified <tt>va_list</tt> and causes the
-<tt>va_list</tt> to point to the next argument.  For more information,
-see the variable argument handling <a href="#int_varargs">Intrinsic
-Functions</a>.</p>
+<p>The '<tt>va_arg</tt>' instruction loads an argument of the specified type
+   from the specified <tt>va_list</tt> and causes the <tt>va_list</tt> to point
+   to the next argument.  For more information, see the variable argument
+   handling <a href="#int_varargs">Intrinsic Functions</a>.</p>
 
 <p>It is legal for this instruction to be called in a function which does not
-take a variable number of arguments, for example, the <tt>vfprintf</tt>
-function.</p>
+   take a variable number of arguments, for example, the <tt>vfprintf</tt>
+   function.</p>
 
-<p><tt>va_arg</tt> is an LLVM instruction instead of an <a
-href="#intrinsics">intrinsic function</a> because it takes a type as an
-argument.</p>
+<p><tt>va_arg</tt> is an LLVM instruction instead of
+   an <a href="#intrinsics">intrinsic function</a> because it takes a type as an
+   argument.</p>
 
 <h5>Example:</h5>
-
 <p>See the <a href="#int_varargs">variable argument processing</a> section.</p>
 
-<p>Note that the code generator does not yet fully support va_arg
-   on many targets. Also, it does not currently support va_arg with
-   aggregate types on any target.</p>
+<p>Note that the code generator does not yet fully support va_arg on many
+   targets. Also, it does not currently support va_arg with aggregate types on
+   any target.</p>
 
 </div>
 
@@ -4869,45 +5130,45 @@ argument.</p>
 <div class="doc_text">
 
 <p>LLVM supports the notion of an "intrinsic function".  These functions have
-well known names and semantics and are required to follow certain restrictions.
-Overall, these intrinsics represent an extension mechanism for the LLVM 
-language that does not require changing all of the transformations in LLVM when 
-adding to the language (or the bitcode reader/writer, the parser, etc...).</p>
+   well known names and semantics and are required to follow certain
+   restrictions.  Overall, these intrinsics represent an extension mechanism for
+   the LLVM language that does not require changing all of the transformations
+   in LLVM when adding to the language (or the bitcode reader/writer, the
+   parser, etc...).</p>
 
 <p>Intrinsic function names must all start with an "<tt>llvm.</tt>" prefix. This
-prefix is reserved in LLVM for intrinsic names; thus, function names may not
-begin with this prefix.  Intrinsic functions must always be external functions:
-you cannot define the body of intrinsic functions.  Intrinsic functions may
-only be used in call or invoke instructions: it is illegal to take the address
-of an intrinsic function.  Additionally, because intrinsic functions are part
-of the LLVM language, it is required if any are added that they be documented
-here.</p>
-
-<p>Some intrinsic functions can be overloaded, i.e., the intrinsic represents 
-a family of functions that perform the same operation but on different data 
-types. Because LLVM can represent over 8 million different integer types, 
-overloading is used commonly to allow an intrinsic function to operate on any 
-integer type. One or more of the argument types or the result type can be 
-overloaded to accept any integer type. Argument types may also be defined as 
-exactly matching a previous argument's type or the result type. This allows an 
-intrinsic function which accepts multiple arguments, but needs all of them to 
-be of the same type, to only be overloaded with respect to a single argument or 
-the result.</p>
-
-<p>Overloaded intrinsics will have the names of its overloaded argument types 
-encoded into its function name, each preceded by a period. Only those types 
-which are overloaded result in a name suffix. Arguments whose type is matched 
-against another type do not. For example, the <tt>llvm.ctpop</tt> function can 
-take an integer of any width and returns an integer of exactly the same integer 
-width. This leads to a family of functions such as
-<tt>i8 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i29 @llvm.ctpop.i29(i29 %val)</tt>.
-Only one type, the return type, is overloaded, and only one type suffix is 
-required. Because the argument's type is matched against the return type, it 
-does not require its own name suffix.</p>
+   prefix is reserved in LLVM for intrinsic names; thus, function names may not
+   begin with this prefix.  Intrinsic functions must always be external
+   functions: you cannot define the body of intrinsic functions.  Intrinsic
+   functions may only be used in call or invoke instructions: it is illegal to
+   take the address of an intrinsic function.  Additionally, because intrinsic
+   functions are part of the LLVM language, it is required if any are added that
+   they be documented here.</p>
+
+<p>Some intrinsic functions can be overloaded, i.e., the intrinsic represents a
+   family of functions that perform the same operation but on different data
+   types. Because LLVM can represent over 8 million different integer types,
+   overloading is used commonly to allow an intrinsic function to operate on any
+   integer type. One or more of the argument types or the result type can be
+   overloaded to accept any integer type. Argument types may also be defined as
+   exactly matching a previous argument's type or the result type. This allows
+   an intrinsic function which accepts multiple arguments, but needs all of them
+   to be of the same type, to only be overloaded with respect to a single
+   argument or the result.</p>
+
+<p>Overloaded intrinsics will have the names of its overloaded argument types
+   encoded into its function name, each preceded by a period. Only those types
+   which are overloaded result in a name suffix. Arguments whose type is matched
+   against another type do not. For example, the <tt>llvm.ctpop</tt> function
+   can take an integer of any width and returns an integer of exactly the same
+   integer width. This leads to a family of functions such as
+   <tt>i8 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i29 @llvm.ctpop.i29(i29
+   %val)</tt>.  Only one type, the return type, is overloaded, and only one type
+   suffix is required. Because the argument's type is matched against the return
+   type, it does not require its own name suffix.</p>
 
 <p>To learn how to add an intrinsic function, please see the 
-<a href="ExtendingLLVM.html">Extending LLVM Guide</a>.
-</p>
+   <a href="ExtendingLLVM.html">Extending LLVM Guide</a>.</p>
 
 </div>
 
@@ -4918,20 +5179,19 @@ does not require its own name suffix.</p>
 
 <div class="doc_text">
 
-<p>Variable argument support is defined in LLVM with the <a
- href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
-intrinsic functions.  These functions are related to the similarly
-named macros defined in the <tt>&lt;stdarg.h&gt;</tt> header file.</p>
+<p>Variable argument support is defined in LLVM with
+   the <a href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
+   intrinsic functions.  These functions are related to the similarly named
+   macros defined in the <tt>&lt;stdarg.h&gt;</tt> header file.</p>
 
-<p>All of these functions operate on arguments that use a
-target-specific value type "<tt>va_list</tt>".  The LLVM assembly
-language reference manual does not define what this type is, so all
-transformations should be prepared to handle these functions regardless of
-the type used.</p>
+<p>All of these functions operate on arguments that use a target-specific value
+   type "<tt>va_list</tt>".  The LLVM assembly language reference manual does
+   not define what this type is, so all transformations should be prepared to
+   handle these functions regardless of the type used.</p>
 
 <p>This example shows how the <a href="#i_va_arg"><tt>va_arg</tt></a>
-instruction and the variable argument handling intrinsic functions are
-used.</p>
+   instruction and the variable argument handling intrinsic functions are
+   used.</p>
 
 <div class="doc_code">
 <pre>
@@ -4970,25 +5230,27 @@ declare void @llvm.va_end(i8*)
 
 
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  declare void %llvm.va_start(i8* &lt;arglist&gt;)<br></pre>
+<pre>
+  declare void %llvm.va_start(i8* &lt;arglist&gt;)
+</pre>
+
 <h5>Overview:</h5>
-<p>The '<tt>llvm.va_start</tt>' intrinsic initializes
-<tt>*&lt;arglist&gt;</tt> for subsequent use by <tt><a
-href="#i_va_arg">va_arg</a></tt>.</p>
+<p>The '<tt>llvm.va_start</tt>' intrinsic initializes <tt>*&lt;arglist&gt;</tt>
+   for subsequent use by <tt><a href="#i_va_arg">va_arg</a></tt>.</p>
 
 <h5>Arguments:</h5>
-
 <p>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
-macro available in C.  In a target-dependent way, it initializes the
-<tt>va_list</tt> element to which the argument points, so that the next call to
-<tt>va_arg</tt> will produce the first variable argument passed to the function.
-Unlike the C <tt>va_start</tt> macro, this intrinsic does not need to know the
-last argument of the function as the compiler can figure that out.</p>
+   macro available in C.  In a target-dependent way, it initializes
+   the <tt>va_list</tt> element to which the argument points, so that the next
+   call to <tt>va_arg</tt> will produce the first variable argument passed to
+   the function.  Unlike the C <tt>va_start</tt> macro, this intrinsic does not
+   need to know the last argument of the function as the compiler can figure
+   that out.</p>
 
 </div>
 
@@ -4998,26 +5260,28 @@ last argument of the function as the compiler can figure that out.</p>
 </div>
 
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<pre>  declare void @llvm.va_end(i8* &lt;arglist&gt;)<br></pre>
-<h5>Overview:</h5>
+<pre>
+  declare void @llvm.va_end(i8* &lt;arglist&gt;)
+</pre>
 
+<h5>Overview:</h5>
 <p>The '<tt>llvm.va_end</tt>' intrinsic destroys <tt>*&lt;arglist&gt;</tt>,
-which has been initialized previously with <tt><a href="#int_va_start">llvm.va_start</a></tt>
-or <tt><a href="#i_va_copy">llvm.va_copy</a></tt>.</p>
+   which has been initialized previously
+   with <tt><a href="#int_va_start">llvm.va_start</a></tt>
+   or <tt><a href="#i_va_copy">llvm.va_copy</a></tt>.</p>
 
 <h5>Arguments:</h5>
-
 <p>The argument is a pointer to a <tt>va_list</tt> to destroy.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.va_end</tt>' intrinsic works just like the <tt>va_end</tt>
-macro available in C.  In a target-dependent way, it destroys the
-<tt>va_list</tt> element to which the argument points.  Calls to <a
-href="#int_va_start"><tt>llvm.va_start</tt></a> and <a href="#int_va_copy">
-<tt>llvm.va_copy</tt></a> must be matched exactly with calls to
-<tt>llvm.va_end</tt>.</p>
+   macro available in C.  In a target-dependent way, it destroys
+   the <tt>va_list</tt> element to which the argument points.  Calls
+   to <a href="#int_va_start"><tt>llvm.va_start</tt></a>
+   and <a href="#int_va_copy"> <tt>llvm.va_copy</tt></a> must be matched exactly
+   with calls to <tt>llvm.va_end</tt>.</p>
 
 </div>
 
@@ -5029,30 +5293,26 @@ href="#int_va_start"><tt>llvm.va_start</tt></a> and <a href="#int_va_copy">
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   declare void @llvm.va_copy(i8* &lt;destarglist&gt;, i8* &lt;srcarglist&gt;)
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.va_copy</tt>' intrinsic copies the current argument position
-from the source argument list to the destination argument list.</p>
+   from the source argument list to the destination argument list.</p>
 
 <h5>Arguments:</h5>
-
 <p>The first argument is a pointer to a <tt>va_list</tt> element to initialize.
-The second argument is a pointer to a <tt>va_list</tt> element to copy from.</p>
-
+   The second argument is a pointer to a <tt>va_list</tt> element to copy
+   from.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.va_copy</tt>' intrinsic works just like the <tt>va_copy</tt>
-macro available in C.  In a target-dependent way, it copies the source
-<tt>va_list</tt> element into the destination <tt>va_list</tt> element.  This
-intrinsic is necessary because the <tt><a href="#int_va_start">
-llvm.va_start</a></tt> intrinsic may be arbitrarily complex and require, for
-example, memory allocation.</p>
+   macro available in C.  In a target-dependent way, it copies the
+   source <tt>va_list</tt> element into the destination <tt>va_list</tt>
+   element.  This intrinsic is necessary because
+   the <tt><a href="#int_va_start"> llvm.va_start</a></tt> intrinsic may be
+   arbitrarily complex and require, for example, memory allocation.</p>
 
 </div>
 
@@ -5063,20 +5323,18 @@ example, memory allocation.</p>
 
 <div class="doc_text">
 
-<p>
-LLVM support for <a href="GarbageCollection.html">Accurate Garbage
+<p>LLVM support for <a href="GarbageCollection.html">Accurate Garbage
 Collection</a> (GC) requires the implementation and generation of these
-intrinsics.
-These intrinsics allow identification of <a href="#int_gcroot">GC roots on the
-stack</a>, as well as garbage collector implementations that require <a
-href="#int_gcread">read</a> and <a href="#int_gcwrite">write</a> barriers.
-Front-ends for type-safe garbage collected languages should generate these
-intrinsics to make use of the LLVM garbage collectors.  For more details, see <a
-href="GarbageCollection.html">Accurate Garbage Collection with LLVM</a>.
-</p>
+intrinsics. These intrinsics allow identification of <a href="#int_gcroot">GC
+roots on the stack</a>, as well as garbage collector implementations that
+require <a href="#int_gcread">read</a> and <a href="#int_gcwrite">write</a>
+barriers.  Front-ends for type-safe garbage collected languages should generate
+these intrinsics to make use of the LLVM garbage collectors.  For more details,
+see <a href="GarbageCollection.html">Accurate Garbage Collection with
+LLVM</a>.</p>
 
-<p>The garbage collection intrinsics only operate on objects in the generic 
-	address space (address space zero).</p>
+<p>The garbage collection intrinsics only operate on objects in the generic
+   address space (address space zero).</p>
 
 </div>
 
@@ -5088,33 +5346,29 @@ href="GarbageCollection.html">Accurate Garbage Collection with LLVM</a>.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.gcroot</tt>' intrinsic declares the existence of a GC root to
-the code generator, and allows some metadata to be associated with it.</p>
+   the code generator, and allows some metadata to be associated with it.</p>
 
 <h5>Arguments:</h5>
-
 <p>The first argument specifies the address of a stack object that contains the
-root pointer.  The second pointer (which must be either a constant or a global
-value address) contains the meta-data to be associated with the root.</p>
+   root pointer.  The second pointer (which must be either a constant or a
+   global value address) contains the meta-data to be associated with the
+   root.</p>
 
 <h5>Semantics:</h5>
-
 <p>At runtime, a call to this intrinsic stores a null pointer into the "ptrloc"
-location.  At compile-time, the code generator generates information to allow
-the runtime to find the pointer at GC safe points. The '<tt>llvm.gcroot</tt>'
-intrinsic may only be used in a function which <a href="#gc">specifies a GC
-algorithm</a>.</p>
+   location.  At compile-time, the code generator generates information to allow
+   the runtime to find the pointer at GC safe points. The '<tt>llvm.gcroot</tt>'
+   intrinsic may only be used in a function which <a href="#gc">specifies a GC
+   algorithm</a>.</p>
 
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a>
@@ -5123,35 +5377,30 @@ algorithm</a>.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.gcread</tt>' intrinsic identifies reads of references from heap
-locations, allowing garbage collector implementations that require read
-barriers.</p>
+   locations, allowing garbage collector implementations that require read
+   barriers.</p>
 
 <h5>Arguments:</h5>
-
 <p>The second argument is the address to read from, which should be an address
-allocated from the garbage collector.  The first object is a pointer to the 
-start of the referenced object, if needed by the language runtime (otherwise
-null).</p>
+   allocated from the garbage collector.  The first object is a pointer to the
+   start of the referenced object, if needed by the language runtime (otherwise
+   null).</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.gcread</tt>' intrinsic has the same semantics as a load
-instruction, but may be replaced with substantially more complex code by the
-garbage collector runtime, as needed. The '<tt>llvm.gcread</tt>' intrinsic
-may only be used in a function which <a href="#gc">specifies a GC
-algorithm</a>.</p>
+   instruction, but may be replaced with substantially more complex code by the
+   garbage collector runtime, as needed. The '<tt>llvm.gcread</tt>' intrinsic
+   may only be used in a function which <a href="#gc">specifies a GC
+   algorithm</a>.</p>
 
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a>
@@ -5160,46 +5409,39 @@ algorithm</a>.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <pre>
   declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.gcwrite</tt>' intrinsic identifies writes of references to heap
-locations, allowing garbage collector implementations that require write
-barriers (such as generational or reference counting collectors).</p>
+   locations, allowing garbage collector implementations that require write
+   barriers (such as generational or reference counting collectors).</p>
 
 <h5>Arguments:</h5>
-
 <p>The first argument is the reference to store, the second is the start of the
-object to store it to, and the third is the address of the field of Obj to 
-store to.  If the runtime does not require a pointer to the object, Obj may be
-null.</p>
+   object to store it to, and the third is the address of the field of Obj to
+   store to.  If the runtime does not require a pointer to the object, Obj may
+   be null.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.gcwrite</tt>' intrinsic has the same semantics as a store
-instruction, but may be replaced with substantially more complex code by the
-garbage collector runtime, as needed. The '<tt>llvm.gcwrite</tt>' intrinsic
-may only be used in a function which <a href="#gc">specifies a GC
-algorithm</a>.</p>
+   instruction, but may be replaced with substantially more complex code by the
+   garbage collector runtime, as needed. The '<tt>llvm.gcwrite</tt>' intrinsic
+   may only be used in a function which <a href="#gc">specifies a GC
+   algorithm</a>.</p>
 
 </div>
 
-
-
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="int_codegen">Code Generator Intrinsics</a>
 </div>
 
 <div class="doc_text">
-<p>
-These intrinsics are provided by LLVM to expose special features that may only
-be implemented with code generator support.
-</p>
+
+<p>These intrinsics are provided by LLVM to expose special features that may
+   only be implemented with code generator support.</p>
 
 </div>
 
@@ -5216,38 +5458,28 @@ be implemented with code generator support.
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.returnaddress</tt>' intrinsic attempts to compute a 
-target-specific value indicating the return address of the current function 
-or one of its callers.
-</p>
+<p>The '<tt>llvm.returnaddress</tt>' intrinsic attempts to compute a
+   target-specific value indicating the return address of the current function
+   or one of its callers.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The argument to this intrinsic indicates which function to return the address
-for.  Zero indicates the calling function, one indicates its caller, etc.  The
-argument is <b>required</b> to be a constant integer value.
-</p>
+<p>The argument to this intrinsic indicates which function to return the address
+   for.  Zero indicates the calling function, one indicates its caller, etc.
+   The argument is <b>required</b> to be a constant integer value.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.returnaddress</tt>' intrinsic either returns a pointer
+   indicating the return address of the specified call frame, or zero if it
+   cannot be identified.  The value returned by this intrinsic is likely to be
+   incorrect or 0 for arguments other than zero, so it should only be used for
+   debugging purposes.</p>
 
-<p>
-The '<tt>llvm.returnaddress</tt>' intrinsic either returns a pointer indicating
-the return address of the specified call frame, or zero if it cannot be
-identified.  The value returned by this intrinsic is likely to be incorrect or 0
-for arguments other than zero, so it should only be used for debugging purposes.
-</p>
+<p>Note that calling this intrinsic does not prevent function inlining or other
+   aggressive transformations, so the value returned may not be that of the
+   obvious source-language caller.</p>
 
-<p>
-Note that calling this intrinsic does not prevent function inlining or other
-aggressive transformations, so the value returned may not be that of the obvious
-source-language caller.
-</p>
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a>
@@ -5261,34 +5493,25 @@ source-language caller.
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.frameaddress</tt>' intrinsic attempts to return the 
-target-specific frame pointer value for the specified stack frame.
-</p>
+<p>The '<tt>llvm.frameaddress</tt>' intrinsic attempts to return the
+   target-specific frame pointer value for the specified stack frame.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The argument to this intrinsic indicates which function to return the frame
-pointer for.  Zero indicates the calling function, one indicates its caller,
-etc.  The argument is <b>required</b> to be a constant integer value.
-</p>
+<p>The argument to this intrinsic indicates which function to return the frame
+   pointer for.  Zero indicates the calling function, one indicates its caller,
+   etc.  The argument is <b>required</b> to be a constant integer value.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.frameaddress</tt>' intrinsic either returns a pointer
+   indicating the frame address of the specified call frame, or zero if it
+   cannot be identified.  The value returned by this intrinsic is likely to be
+   incorrect or 0 for arguments other than zero, so it should only be used for
+   debugging purposes.</p>
 
-<p>
-The '<tt>llvm.frameaddress</tt>' intrinsic either returns a pointer indicating
-the frame address of the specified call frame, or zero if it cannot be
-identified.  The value returned by this intrinsic is likely to be incorrect or 0
-for arguments other than zero, so it should only be used for debugging purposes.
-</p>
+<p>Note that calling this intrinsic does not prevent function inlining or other
+   aggressive transformations, so the value returned may not be that of the
+   obvious source-language caller.</p>
 
-<p>
-Note that calling this intrinsic does not prevent function inlining or other
-aggressive transformations, so the value returned may not be that of the obvious
-source-language caller.
-</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -5304,25 +5527,20 @@ source-language caller.
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.stacksave</tt>' intrinsic is used to remember the current state of
-the function stack, for use with <a href="#int_stackrestore">
-<tt>llvm.stackrestore</tt></a>.  This is useful for implementing language
-features like scoped automatic variable sized arrays in C99.
-</p>
+<p>The '<tt>llvm.stacksave</tt>' intrinsic is used to remember the current state
+   of the function stack, for use
+   with <a href="#int_stackrestore"> <tt>llvm.stackrestore</tt></a>.  This is
+   useful for implementing language features like scoped automatic variable
+   sized arrays in C99.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-This intrinsic returns a opaque pointer value that can be passed to <a
-href="#int_stackrestore"><tt>llvm.stackrestore</tt></a>.  When an
-<tt>llvm.stackrestore</tt> intrinsic is executed with a value saved from 
-<tt>llvm.stacksave</tt>, it effectively restores the state of the stack to the
-state it was in when the <tt>llvm.stacksave</tt> intrinsic executed.  In
-practice, this pops any <a href="#i_alloca">alloca</a> blocks from the stack
-that were allocated after the <tt>llvm.stacksave</tt> was executed.
-</p>
+<p>This intrinsic returns a opaque pointer value that can be passed
+   to <a href="#int_stackrestore"><tt>llvm.stackrestore</tt></a>.  When
+   an <tt>llvm.stackrestore</tt> intrinsic is executed with a value saved
+   from <tt>llvm.stacksave</tt>, it effectively restores the state of the stack
+   to the state it was in when the <tt>llvm.stacksave</tt> intrinsic executed.
+   In practice, this pops any <a href="#i_alloca">alloca</a> blocks from the
+   stack that were allocated after the <tt>llvm.stacksave</tt> was executed.</p>
 
 </div>
 
@@ -5339,24 +5557,18 @@ that were allocated after the <tt>llvm.stacksave</tt> was executed.
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.stackrestore</tt>' intrinsic is used to restore the state of
-the function stack to the state it was in when the corresponding <a
-href="#int_stacksave"><tt>llvm.stacksave</tt></a> intrinsic executed.  This is
-useful for implementing language features like scoped automatic variable sized
-arrays in C99.
-</p>
+<p>The '<tt>llvm.stackrestore</tt>' intrinsic is used to restore the state of
+   the function stack to the state it was in when the
+   corresponding <a href="#int_stacksave"><tt>llvm.stacksave</tt></a> intrinsic
+   executed.  This is useful for implementing language features like scoped
+   automatic variable sized arrays in C99.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-See the description for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.
-</p>
+<p>See the description
+   for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.</p>
 
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a>
@@ -5370,34 +5582,23 @@ See the description for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.
 </pre>
 
 <h5>Overview:</h5>
-
-
-<p>
-The '<tt>llvm.prefetch</tt>' intrinsic is a hint to the code generator to insert
-a prefetch instruction if supported; otherwise, it is a noop.  Prefetches have
-no
-effect on the behavior of the program but can change its performance
-characteristics.
-</p>
+<p>The '<tt>llvm.prefetch</tt>' intrinsic is a hint to the code generator to
+   insert a prefetch instruction if supported; otherwise, it is a noop.
+   Prefetches have no effect on the behavior of the program but can change its
+   performance characteristics.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-<tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the specifier
-determining if the fetch should be for a read (0) or write (1), and
-<tt>locality</tt> is a temporal locality specifier ranging from (0) - no
-locality, to (3) - extremely local keep in cache.  The <tt>rw</tt> and
-<tt>locality</tt> arguments must be constant integers.
-</p>
+<p><tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the
+   specifier determining if the fetch should be for a read (0) or write (1),
+   and <tt>locality</tt> is a temporal locality specifier ranging from (0) - no
+   locality, to (3) - extremely local keep in cache.  The <tt>rw</tt>
+   and <tt>locality</tt> arguments must be constant integers.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-This intrinsic does not modify the behavior of the program.  In particular,
-prefetches cannot trap and do not produce a value.  On targets that support this
-intrinsic, the prefetch can provide hints to the processor cache for better
-performance.
-</p>
+<p>This intrinsic does not modify the behavior of the program.  In particular,
+   prefetches cannot trap and do not produce a value.  On targets that support
+   this intrinsic, the prefetch can provide hints to the processor cache for
+   better performance.</p>
 
 </div>
 
@@ -5414,32 +5615,21 @@ performance.
 </pre>
 
 <h5>Overview:</h5>
-
-
-<p>
-The '<tt>llvm.pcmarker</tt>' intrinsic is a method to export a Program Counter
-(PC) in a region of
-code to simulators and other tools.  The method is target specific, but it is
-expected that the marker will use exported symbols to transmit the PC of the
-marker.
-The marker makes no guarantees that it will remain with any specific instruction
-after optimizations.  It is possible that the presence of a marker will inhibit
-optimizations.  The intended use is to be inserted after optimizations to allow
-correlations of simulation runs.
-</p>
+<p>The '<tt>llvm.pcmarker</tt>' intrinsic is a method to export a Program
+   Counter (PC) in a region of code to simulators and other tools.  The method
+   is target specific, but it is expected that the marker will use exported
+   symbols to transmit the PC of the marker.  The marker makes no guarantees
+   that it will remain with any specific instruction after optimizations.  It is
+   possible that the presence of a marker will inhibit optimizations.  The
+   intended use is to be inserted after optimizations to allow correlations of
+   simulation runs.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-<tt>id</tt> is a numerical id identifying the marker.
-</p>
+<p><tt>id</tt> is a numerical id identifying the marker.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-This intrinsic does not modify the behavior of the program.  Backends that do not 
-support this intrinisic may ignore it.
-</p>
+<p>This intrinsic does not modify the behavior of the program.  Backends that do
+   not support this intrinisic may ignore it.</p>
 
 </div>
 
@@ -5456,23 +5646,17 @@ support this intrinisic may ignore it.
 </pre>
 
 <h5>Overview:</h5>
-
-
-<p>
-The '<tt>llvm.readcyclecounter</tt>' intrinsic provides access to the cycle 
-counter register (or similar low latency, high accuracy clocks) on those targets
-that support it.  On X86, it should map to RDTSC.  On Alpha, it should map to RPCC.
-As the backing counters overflow quickly (on the order of 9 seconds on alpha), this
-should only be used for small timings.  
-</p>
+<p>The '<tt>llvm.readcyclecounter</tt>' intrinsic provides access to the cycle
+   counter register (or similar low latency, high accuracy clocks) on those
+   targets that support it.  On X86, it should map to RDTSC.  On Alpha, it
+   should map to RPCC.  As the backing counters overflow quickly (on the order
+   of 9 seconds on alpha), this should only be used for small timings.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-When directly supported, reading the cycle counter should not modify any memory.  
-Implementations are allowed to either return a application specific value or a
-system wide value.  On backends without support, this is lowered to a constant 0.
-</p>
+<p>When directly supported, reading the cycle counter should not modify any
+   memory.  Implementations are allowed to either return a application specific
+   value or a system wide value.  On backends without support, this is lowered
+   to a constant 0.</p>
 
 </div>
 
@@ -5482,12 +5666,11 @@ system wide value.  On backends without support, this is lowered to a constant 0
 </div>
 
 <div class="doc_text">
-<p>
-LLVM provides intrinsics for a few important standard C library functions.
-These intrinsics allow source-language front-ends to pass information about the
-alignment of the pointer arguments to the code generator, providing opportunity
-for more efficient code generation.
-</p>
+
+<p>LLVM provides intrinsics for a few important standard C library functions.
+   These intrinsics allow source-language front-ends to pass information about
+   the alignment of the pointer arguments to the code generator, providing
+   opportunity for more efficient code generation.</p>
 
 </div>
 
@@ -5499,11 +5682,12 @@ for more efficient code generation.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.memcpy on any integer bit
-width. Not all targets support all bit widths however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
+
 <pre>
   declare void @llvm.memcpy.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i8 &lt;len&gt;, i32 &lt;align&gt;)
+                               i8 &lt;len&gt;, i32 &lt;align&gt;)
   declare void @llvm.memcpy.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
                                 i16 &lt;len&gt;, i32 &lt;align&gt;)
   declare void @llvm.memcpy.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
@@ -5513,44 +5697,31 @@ width. Not all targets support all bit widths however.</p>
 </pre>
 
 <h5>Overview:</h5>
+<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
+   source location to the destination location.</p>
 
-<p>
-The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the source
-location to the destination location.
-</p>
-
-<p>
-Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt> 
-intrinsics do not return a value, and takes an extra alignment argument.
-</p>
+<p>Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
+   intrinsics do not return a value, and takes an extra alignment argument.</p>
 
 <h5>Arguments:</h5>
+<p>The first argument is a pointer to the destination, the second is a pointer
+   to the source.  The third argument is an integer argument specifying the
+   number of bytes to copy, and the fourth argument is the alignment of the
+   source and destination locations.</p>
 
-<p>
-The first argument is a pointer to the destination, the second is a pointer to
-the source.  The third argument is an integer argument
-specifying the number of bytes to copy, and the fourth argument is the alignment
-of the source and destination locations.
-</p>
-
-<p>
-If the call to this intrinisic has an alignment value that is not 0 or 1, then
-the caller guarantees that both the source and destination pointers are aligned
-to that boundary.
-</p>
+<p>If the call to this intrinisic has an alignment value that is not 0 or 1,
+   then the caller guarantees that both the source and destination pointers are
+   aligned to that boundary.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
+   source location to the destination location, which are not allowed to
+   overlap.  It copies "len" bytes of memory over.  If the argument is known to
+   be aligned to some boundary, this can be specified as the fourth argument,
+   otherwise it should be set to 0 or 1.</p>
 
-<p>
-The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the source
-location to the destination location, which are not allowed to overlap.  It
-copies "len" bytes of memory over.  If the argument is known to be aligned to
-some boundary, this can be specified as the fourth argument, otherwise it should
-be set to 0 or 1.
-</p>
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_memmove">'<tt>llvm.memmove</tt>' Intrinsic</a>
@@ -5560,10 +5731,11 @@ be set to 0 or 1.
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
-width. Not all targets support all bit widths however.</p>
+   width. Not all targets support all bit widths however.</p>
+
 <pre>
   declare void @llvm.memmove.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i8 &lt;len&gt;, i32 &lt;align&gt;)
+                                i8 &lt;len&gt;, i32 &lt;align&gt;)
   declare void @llvm.memmove.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
                                  i16 &lt;len&gt;, i32 &lt;align&gt;)
   declare void @llvm.memmove.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
@@ -5573,45 +5745,33 @@ width. Not all targets support all bit widths however.</p>
 </pre>
 
 <h5>Overview:</h5>
+<p>The '<tt>llvm.memmove.*</tt>' intrinsics move a block of memory from the
+   source location to the destination location. It is similar to the
+   '<tt>llvm.memcpy</tt>' intrinsic but allows the two memory locations to
+   overlap.</p>
 
-<p>
-The '<tt>llvm.memmove.*</tt>' intrinsics move a block of memory from the source
-location to the destination location. It is similar to the
-'<tt>llvm.memcpy</tt>' intrinsic but allows the two memory locations to overlap.
-</p>
-
-<p>
-Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt> 
-intrinsics do not return a value, and takes an extra alignment argument.
-</p>
+<p>Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
+   intrinsics do not return a value, and takes an extra alignment argument.</p>
 
 <h5>Arguments:</h5>
+<p>The first argument is a pointer to the destination, the second is a pointer
+   to the source.  The third argument is an integer argument specifying the
+   number of bytes to copy, and the fourth argument is the alignment of the
+   source and destination locations.</p>
 
-<p>
-The first argument is a pointer to the destination, the second is a pointer to
-the source.  The third argument is an integer argument
-specifying the number of bytes to copy, and the fourth argument is the alignment
-of the source and destination locations.
-</p>
-
-<p>
-If the call to this intrinisic has an alignment value that is not 0 or 1, then
-the caller guarantees that the source and destination pointers are aligned to
-that boundary.
-</p>
+<p>If the call to this intrinisic has an alignment value that is not 0 or 1,
+   then the caller guarantees that the source and destination pointers are
+   aligned to that boundary.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the
+   source location to the destination location, which may overlap.  It copies
+   "len" bytes of memory over.  If the argument is known to be aligned to some
+   boundary, this can be specified as the fourth argument, otherwise it should
+   be set to 0 or 1.</p>
 
-<p>
-The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the source
-location to the destination location, which may overlap.  It
-copies "len" bytes of memory over.  If the argument is known to be aligned to
-some boundary, this can be specified as the fourth argument, otherwise it should
-be set to 0 or 1.
-</p>
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_memset">'<tt>llvm.memset.*</tt>' Intrinsics</a>
@@ -5621,10 +5781,11 @@ be set to 0 or 1.
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
-width. Not all targets support all bit widths however.</p>
+   width. Not all targets support all bit widths however.</p>
+
 <pre>
   declare void @llvm.memset.i8(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i8 &lt;len&gt;, i32 &lt;align&gt;)
+                               i8 &lt;len&gt;, i32 &lt;align&gt;)
   declare void @llvm.memset.i16(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
                                 i16 &lt;len&gt;, i32 &lt;align&gt;)
   declare void @llvm.memset.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
@@ -5634,43 +5795,30 @@ width. Not all targets support all bit widths however.</p>
 </pre>
 
 <h5>Overview:</h5>
+<p>The '<tt>llvm.memset.*</tt>' intrinsics fill a block of memory with a
+   particular byte value.</p>
 
-<p>
-The '<tt>llvm.memset.*</tt>' intrinsics fill a block of memory with a particular
-byte value.
-</p>
-
-<p>
-Note that, unlike the standard libc function, the <tt>llvm.memset</tt> intrinsic
-does not return a value, and takes an extra alignment argument.
-</p>
+<p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
+   intrinsic does not return a value, and takes an extra alignment argument.</p>
 
 <h5>Arguments:</h5>
+<p>The first argument is a pointer to the destination to fill, the second is the
+   byte value to fill it with, the third argument is an integer argument
+   specifying the number of bytes to fill, and the fourth argument is the known
+   alignment of destination location.</p>
 
-<p>
-The first argument is a pointer to the destination to fill, the second is the
-byte value to fill it with, the third argument is an integer
-argument specifying the number of bytes to fill, and the fourth argument is the
-known alignment of destination location.
-</p>
-
-<p>
-If the call to this intrinisic has an alignment value that is not 0 or 1, then
-the caller guarantees that the destination pointer is aligned to that boundary.
-</p>
+<p>If the call to this intrinisic has an alignment value that is not 0 or 1,
+   then the caller guarantees that the destination pointer is aligned to that
+   boundary.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting
+   at the destination location.  If the argument is known to be aligned to some
+   boundary, this can be specified as the fourth argument, otherwise it should
+   be set to 0 or 1.</p>
 
-<p>
-The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting at
-the
-destination location.  If the argument is known to be aligned to some boundary,
-this can be specified as the fourth argument, otherwise it should be set to 0 or
-1.
-</p>
 </div>
 
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a>
@@ -5679,9 +5827,10 @@ this can be specified as the fourth argument, otherwise it should be set to 0 or
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any 
-floating point or vector of floating point type. Not all targets support all
-types however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
 <pre>
   declare float     @llvm.sqrt.f32(float %Val)
   declare double    @llvm.sqrt.f64(double %Val)
@@ -5691,28 +5840,21 @@ types however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.sqrt</tt>' intrinsics return the sqrt of the specified operand,
-returning the same value as the libm '<tt>sqrt</tt>' functions would.  Unlike
-<tt>sqrt</tt> in libm, however, <tt>llvm.sqrt</tt> has undefined behavior for
-negative numbers other than -0.0 (which allows for better optimization, because
-there is no need to worry about errno being set).  <tt>llvm.sqrt(-0.0)</tt> is
-defined to return -0.0 like IEEE sqrt.
-</p>
+<p>The '<tt>llvm.sqrt</tt>' intrinsics return the sqrt of the specified operand,
+   returning the same value as the libm '<tt>sqrt</tt>' functions would.
+   Unlike <tt>sqrt</tt> in libm, however, <tt>llvm.sqrt</tt> has undefined
+   behavior for negative numbers other than -0.0 (which allows for better
+   optimization, because there is no need to worry about errno being
+   set).  <tt>llvm.sqrt(-0.0)</tt> is defined to return -0.0 like IEEE sqrt.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The argument and return value are floating point numbers of the same type.
-</p>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
 
 <h5>Semantics:</h5>
+<p>This function returns the sqrt of the specified operand if it is a
+   nonnegative floating point number.</p>
 
-<p>
-This function returns the sqrt of the specified operand if it is a nonnegative
-floating point number.
-</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -5723,9 +5865,10 @@ floating point number.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any 
-floating point or vector of floating point type. Not all targets support all
-types however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
 <pre>
   declare float     @llvm.powi.f32(float  %Val, i32 %power)
   declare double    @llvm.powi.f64(double %Val, i32 %power)
@@ -5735,26 +5878,19 @@ types however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.powi.*</tt>' intrinsics return the first operand raised to the
-specified (positive or negative) power.  The order of evaluation of
-multiplications is not defined.  When a vector of floating point type is
-used, the second argument remains a scalar integer value.
-</p>
+<p>The '<tt>llvm.powi.*</tt>' intrinsics return the first operand raised to the
+   specified (positive or negative) power.  The order of evaluation of
+   multiplications is not defined.  When a vector of floating point type is
+   used, the second argument remains a scalar integer value.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The second argument is an integer power, and the first is a value to raise to
-that power.
-</p>
+<p>The second argument is an integer power, and the first is a value to raise to
+   that power.</p>
 
 <h5>Semantics:</h5>
+<p>This function returns the first value raised to the second power with an
+   unspecified sequence of rounding operations.</p>
 
-<p>
-This function returns the first value raised to the second power with an
-unspecified sequence of rounding operations.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -5765,9 +5901,10 @@ unspecified sequence of rounding operations.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any 
-floating point or vector of floating point type. Not all targets support all
-types however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
 <pre>
   declare float     @llvm.sin.f32(float  %Val)
   declare double    @llvm.sin.f64(double %Val)
@@ -5777,23 +5914,17 @@ types however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.sin.*</tt>' intrinsics return the sine of the operand.
-</p>
+<p>The '<tt>llvm.sin.*</tt>' intrinsics return the sine of the operand.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The argument and return value are floating point numbers of the same type.
-</p>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
 
 <h5>Semantics:</h5>
+<p>This function returns the sine of the specified operand, returning the same
+   values as the libm <tt>sin</tt> functions would, and handles error conditions
+   in the same way.</p>
 
-<p>
-This function returns the sine of the specified operand, returning the
-same values as the libm <tt>sin</tt> functions would, and handles error
-conditions in the same way.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -5804,9 +5935,10 @@ conditions in the same way.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any 
-floating point or vector of floating point type. Not all targets support all
-types however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
 <pre>
   declare float     @llvm.cos.f32(float  %Val)
   declare double    @llvm.cos.f64(double %Val)
@@ -5816,23 +5948,17 @@ types however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.cos.*</tt>' intrinsics return the cosine of the operand.
-</p>
+<p>The '<tt>llvm.cos.*</tt>' intrinsics return the cosine of the operand.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The argument and return value are floating point numbers of the same type.
-</p>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
 
 <h5>Semantics:</h5>
+<p>This function returns the cosine of the specified operand, returning the same
+   values as the libm <tt>cos</tt> functions would, and handles error conditions
+   in the same way.</p>
 
-<p>
-This function returns the cosine of the specified operand, returning the
-same values as the libm <tt>cos</tt> functions would, and handles error
-conditions in the same way.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -5843,9 +5969,10 @@ conditions in the same way.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any 
-floating point or vector of floating point type. Not all targets support all
-types however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
 <pre>
   declare float     @llvm.pow.f32(float  %Val, float %Power)
   declare double    @llvm.pow.f64(double %Val, double %Power)
@@ -5855,39 +5982,29 @@ types however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.pow.*</tt>' intrinsics return the first operand raised to the
-specified (positive or negative) power.
-</p>
+<p>The '<tt>llvm.pow.*</tt>' intrinsics return the first operand raised to the
+   specified (positive or negative) power.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The second argument is a floating point power, and the first is a value to
-raise to that power.
-</p>
+<p>The second argument is a floating point power, and the first is a value to
+   raise to that power.</p>
 
 <h5>Semantics:</h5>
+<p>This function returns the first value raised to the second power, returning
+   the same values as the libm <tt>pow</tt> functions would, and handles error
+   conditions in the same way.</p>
 
-<p>
-This function returns the first value raised to the second power,
-returning the
-same values as the libm <tt>pow</tt> functions would, and handles error
-conditions in the same way.</p>
 </div>
 
-
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="int_manip">Bit Manipulation Intrinsics</a>
 </div>
 
 <div class="doc_text">
-<p>
-LLVM provides intrinsics for a few important bit manipulation operations.
-These allow efficient code generation for some algorithms.
-</p>
+
+<p>LLVM provides intrinsics for a few important bit manipulation operations.
+   These allow efficient code generation for some algorithms.</p>
 
 </div>
 
@@ -5900,7 +6017,8 @@ These allow efficient code generation for some algorithms.
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic function. You can use bswap on any integer
-type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
+   type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
+
 <pre>
   declare i16 @llvm.bswap.i16(i16 &lt;id&gt;)
   declare i32 @llvm.bswap.i32(i32 &lt;id&gt;)
@@ -5908,25 +6026,20 @@ type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.bswap</tt>' family of intrinsics is used to byte swap integer 
-values with an even number of bytes (positive multiple of 16 bits).  These are 
-useful for performing operations on data that is not in the target's native 
-byte order.
-</p>
+<p>The '<tt>llvm.bswap</tt>' family of intrinsics is used to byte swap integer
+   values with an even number of bytes (positive multiple of 16 bits).  These
+   are useful for performing operations on data that is not in the target's
+   native byte order.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The <tt>llvm.bswap.i16</tt> intrinsic returns an i16 value that has the high 
-and low byte of the input i16 swapped.  Similarly, the <tt>llvm.bswap.i32</tt> 
-intrinsic returns an i32 value that has the four bytes of the input i32 
-swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the returned 
-i32 will have its bytes in 3, 2, 1, 0 order.  The <tt>llvm.bswap.i48</tt>, 
-<tt>llvm.bswap.i64</tt> and other intrinsics extend this concept to
-additional even-byte lengths (6 bytes, 8 bytes and more, respectively).
-</p>
+<p>The <tt>llvm.bswap.i16</tt> intrinsic returns an i16 value that has the high
+   and low byte of the input i16 swapped.  Similarly,
+   the <tt>llvm.bswap.i32</tt> intrinsic returns an i32 value that has the four
+   bytes of the input i32 swapped, so that if the input bytes are numbered 0, 1,
+   2, 3 then the returned i32 will have its bytes in 3, 2, 1, 0 order.
+   The <tt>llvm.bswap.i48</tt>, <tt>llvm.bswap.i64</tt> and other intrinsics
+   extend this concept to additional even-byte lengths (6 bytes, 8 bytes and
+   more, respectively).</p>
 
 </div>
 
@@ -5939,7 +6052,8 @@ additional even-byte lengths (6 bytes, 8 bytes and more, respectively).
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
-width. Not all targets support all bit widths however.</p>
+   width. Not all targets support all bit widths however.</p>
+
 <pre>
   declare i8 @llvm.ctpop.i8(i8  &lt;src&gt;)
   declare i16 @llvm.ctpop.i16(i16 &lt;src&gt;)
@@ -5949,24 +6063,16 @@ width. Not all targets support all bit widths however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.ctpop</tt>' family of intrinsics counts the number of bits set in a 
-value.
-</p>
+<p>The '<tt>llvm.ctpop</tt>' family of intrinsics counts the number of bits set
+   in a value.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The only argument is the value to be counted.  The argument may be of any
-integer type.  The return type must match the argument type.
-</p>
+<p>The only argument is the value to be counted.  The argument may be of any
+   integer type.  The return type must match the argument type.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.</p>
 
-<p>
-The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.
-</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -5977,8 +6083,9 @@ The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any 
-integer bit width. Not all targets support all bit widths however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
+
 <pre>
   declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
   declare i16 @llvm.ctlz.i16(i16 &lt;src&gt;)
@@ -5988,30 +6095,20 @@ integer bit width. Not all targets support all bit widths however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.ctlz</tt>' family of intrinsic functions counts the number of 
-leading zeros in a variable.
-</p>
+<p>The '<tt>llvm.ctlz</tt>' family of intrinsic functions counts the number of
+   leading zeros in a variable.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The only argument is the value to be counted.  The argument may be of any
-integer type. The return type must match the argument type.
-</p>
+<p>The only argument is the value to be counted.  The argument may be of any
+   integer type. The return type must match the argument type.</p>
 
 <h5>Semantics:</h5>
+<p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
+   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   the type of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
 
-<p>
-The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant) zeros
-in a variable.  If the src == 0 then the result is the size in bits of the type
-of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.
-</p>
 </div>
 
-
-
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic</a>
@@ -6020,8 +6117,9 @@ of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any 
-integer bit width. Not all targets support all bit widths however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
+
 <pre>
   declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
   declare i16 @llvm.cttz.i16(i16 &lt;src&gt;)
@@ -6031,130 +6129,17 @@ integer bit width. Not all targets support all bit widths however.</p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.cttz</tt>' family of intrinsic functions counts the number of 
-trailing zeros.
-</p>
+<p>The '<tt>llvm.cttz</tt>' family of intrinsic functions counts the number of
+   trailing zeros.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The only argument is the value to be counted.  The argument may be of any
-integer type.  The return type must match the argument type.
-</p>
+<p>The only argument is the value to be counted.  The argument may be of any
+   integer type.  The return type must match the argument type.</p>
 
 <h5>Semantics:</h5>
-
-<p>
-The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant) zeros
-in a variable.  If the src == 0 then the result is the size in bits of the type
-of src.  For example, <tt>llvm.cttz(2) = 1</tt>.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_part_select">'<tt>llvm.part.select.*</tt>' Intrinsic</a>
-</div>
-
-<div class="doc_text">
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.part.select</tt> 
-on any integer bit width.</p>
-<pre>
-  declare i17 @llvm.part.select.i17 (i17 %val, i32 %loBit, i32 %hiBit)
-  declare i29 @llvm.part.select.i29 (i29 %val, i32 %loBit, i32 %hiBit)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.part.select</tt>' family of intrinsic functions selects a
-range of bits from an integer value and returns them in the same bit width as
-the original value.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument, <tt>%val</tt> and the result may be integer types of 
-any bit width but they must have the same bit width. The second and third 
-arguments must be <tt>i32</tt> type since they specify only a bit index.</p>
-
-<h5>Semantics:</h5>
-<p>The operation of the '<tt>llvm.part.select</tt>' intrinsic has two modes
-of operation: forwards and reverse. If <tt>%loBit</tt> is greater than
-<tt>%hiBits</tt> then the intrinsic operates in reverse mode. Otherwise it
-operates in forward mode.</p>
-<p>In forward mode, this intrinsic is the equivalent of shifting <tt>%val</tt>
-right by <tt>%loBit</tt> bits and then ANDing it with a mask with
-only the <tt>%hiBit - %loBit</tt> bits set, as follows:</p>
-<ol>
-  <li>The <tt>%val</tt> is shifted right (LSHR) by the number of bits specified
-  by <tt>%loBits</tt>. This normalizes the value to the low order bits.</li>
-  <li>The <tt>%loBits</tt> value is subtracted from the <tt>%hiBits</tt> value
-  to determine the number of bits to retain.</li>
-  <li>A mask of the retained bits is created by shifting a -1 value.</li>
-  <li>The mask is ANDed with <tt>%val</tt> to produce the result.</li>
-</ol>
-<p>In reverse mode, a similar computation is made except that the bits are
-returned in the reverse order. So, for example, if <tt>X</tt> has the value
-<tt>i16 0x0ACF (101011001111)</tt> and we apply 
-<tt>part.select(i16 X, 8, 3)</tt> to it, we get back the value 
-<tt>i16 0x0026 (000000100110)</tt>.</p>
-</div>
-
-<div class="doc_subsubsection">
-  <a name="int_part_set">'<tt>llvm.part.set.*</tt>' Intrinsic</a>
-</div>
-
-<div class="doc_text">
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.part.set</tt> 
-on any integer bit width.</p>
-<pre>
-  declare i17 @llvm.part.set.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
-  declare i29 @llvm.part.set.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.part.set</tt>' family of intrinsic functions replaces a range
-of bits in an integer value with another integer value. It returns the integer
-with the replaced bits.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument, <tt>%val</tt>, and the result may be integer types of 
-any bit width, but they must have the same bit width. <tt>%val</tt> is the value
-whose bits will be replaced.  The second argument, <tt>%repl</tt> may be an
-integer of any bit width. The third and fourth arguments must be <tt>i32</tt> 
-type since they specify only a bit index.</p>
-
-<h5>Semantics:</h5>
-<p>The operation of the '<tt>llvm.part.set</tt>' intrinsic has two modes
-of operation: forwards and reverse. If <tt>%lo</tt> is greater than
-<tt>%hi</tt> then the intrinsic operates in reverse mode. Otherwise it
-operates in forward mode.</p>
-
-<p>For both modes, the <tt>%repl</tt> value is prepared for use by either
-truncating it down to the size of the replacement area or zero extending it 
-up to that size.</p>
-
-<p>In forward mode, the bits between <tt>%lo</tt> and <tt>%hi</tt> (inclusive)
-are replaced with corresponding bits from <tt>%repl</tt>. That is the 0th bit
-in <tt>%repl</tt> replaces the <tt>%lo</tt>th bit in <tt>%val</tt> and etc. up
-to the <tt>%hi</tt>th bit.</p>
-
-<p>In reverse mode, a similar computation is made except that the bits are
-reversed.  That is, the <tt>0</tt>th bit in <tt>%repl</tt> replaces the 
-<tt>%hi</tt> bit in <tt>%val</tt> and etc. down to the <tt>%lo</tt>th bit.</p>
-
-<h5>Examples:</h5>
-
-<pre>
-  llvm.part.set(0xFFFF, 0, 4, 7) -&gt; 0xFF0F
-  llvm.part.set(0xFFFF, 0, 7, 4) -&gt; 0xFF0F
-  llvm.part.set(0xFFFF, 1, 7, 4) -&gt; 0xFF8F
-  llvm.part.set(0xFFFF, F, 8, 3) -&gt; 0xFFE7
-  llvm.part.set(0xFFFF, 0, 3, 8) -&gt; 0xFE07
-</pre>
+<p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
+   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   the type of src.  For example, <tt>llvm.cttz(2) = 1</tt>.</p>
 
 </div>
 
@@ -6164,9 +6149,8 @@ reversed.  That is, the <tt>0</tt>th bit in <tt>%repl</tt> replaces the
 </div>
 
 <div class="doc_text">
-<p>
-LLVM provides intrinsics for some arithmetic with overflow operations.
-</p>
+
+<p>LLVM provides intrinsics for some arithmetic with overflow operations.</p>
 
 </div>
 
@@ -6178,9 +6162,8 @@ LLVM provides intrinsics for some arithmetic with overflow operations.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <p>This is an overloaded intrinsic. You can use <tt>llvm.sadd.with.overflow</tt>
-on any integer bit width.</p>
+   on any integer bit width.</p>
 
 <pre>
   declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
@@ -6189,24 +6172,23 @@ on any integer bit width.</p>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
-a signed addition of the two arguments, and indicate whether an overflow
-occurred during the signed summation.</p>
+   a signed addition of the two arguments, and indicate whether an overflow
+   occurred during the signed summation.</p>
 
 <h5>Arguments:</h5>
-
 <p>The arguments (%a and %b) and the first element of the result structure may
-be of integer types of any bit width, but they must have the same bit width. The
-second element of the result structure must be of type <tt>i1</tt>. <tt>%a</tt>
-and <tt>%b</tt> are the two values that will undergo signed addition.</p>
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo signed addition.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
-a signed addition of the two variables. They return a structure &mdash; the
-first element of which is the signed summation, and the second element of which
-is a bit specifying if the signed summation resulted in an overflow.</p>
+   a signed addition of the two variables. They return a structure &mdash; the
+   first element of which is the signed summation, and the second element of
+   which is a bit specifying if the signed summation resulted in an
+   overflow.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6226,9 +6208,8 @@ is a bit specifying if the signed summation resulted in an overflow.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <p>This is an overloaded intrinsic. You can use <tt>llvm.uadd.with.overflow</tt>
-on any integer bit width.</p>
+   on any integer bit width.</p>
 
 <pre>
   declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
@@ -6237,24 +6218,22 @@ on any integer bit width.</p>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
-an unsigned addition of the two arguments, and indicate whether a carry occurred
-during the unsigned summation.</p>
+   an unsigned addition of the two arguments, and indicate whether a carry
+   occurred during the unsigned summation.</p>
 
 <h5>Arguments:</h5>
-
 <p>The arguments (%a and %b) and the first element of the result structure may
-be of integer types of any bit width, but they must have the same bit width. The
-second element of the result structure must be of type <tt>i1</tt>. <tt>%a</tt>
-and <tt>%b</tt> are the two values that will undergo unsigned addition.</p>
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo unsigned addition.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
-an unsigned addition of the two arguments. They return a structure &mdash; the
-first element of which is the sum, and the second element of which is a bit
-specifying if the unsigned summation resulted in a carry.</p>
+   an unsigned addition of the two arguments. They return a structure &mdash;
+   the first element of which is the sum, and the second element of which is a
+   bit specifying if the unsigned summation resulted in a carry.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6274,9 +6253,8 @@ specifying if the unsigned summation resulted in a carry.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <p>This is an overloaded intrinsic. You can use <tt>llvm.ssub.with.overflow</tt>
-on any integer bit width.</p>
+   on any integer bit width.</p>
 
 <pre>
   declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
@@ -6285,24 +6263,23 @@ on any integer bit width.</p>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
-a signed subtraction of the two arguments, and indicate whether an overflow
-occurred during the signed subtraction.</p>
+   a signed subtraction of the two arguments, and indicate whether an overflow
+   occurred during the signed subtraction.</p>
 
 <h5>Arguments:</h5>
-
 <p>The arguments (%a and %b) and the first element of the result structure may
-be of integer types of any bit width, but they must have the same bit width. The
-second element of the result structure must be of type <tt>i1</tt>. <tt>%a</tt>
-and <tt>%b</tt> are the two values that will undergo signed subtraction.</p>
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo signed subtraction.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
-a signed subtraction of the two arguments. They return a structure &mdash; the
-first element of which is the subtraction, and the second element of which is a bit
-specifying if the signed subtraction resulted in an overflow.</p>
+   a signed subtraction of the two arguments. They return a structure &mdash;
+   the first element of which is the subtraction, and the second element of
+   which is a bit specifying if the signed subtraction resulted in an
+   overflow.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6322,9 +6299,8 @@ specifying if the signed subtraction resulted in an overflow.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <p>This is an overloaded intrinsic. You can use <tt>llvm.usub.with.overflow</tt>
-on any integer bit width.</p>
+   on any integer bit width.</p>
 
 <pre>
   declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
@@ -6333,24 +6309,23 @@ on any integer bit width.</p>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
-an unsigned subtraction of the two arguments, and indicate whether an overflow
-occurred during the unsigned subtraction.</p>
+   an unsigned subtraction of the two arguments, and indicate whether an
+   overflow occurred during the unsigned subtraction.</p>
 
 <h5>Arguments:</h5>
-
 <p>The arguments (%a and %b) and the first element of the result structure may
-be of integer types of any bit width, but they must have the same bit width. The
-second element of the result structure must be of type <tt>i1</tt>. <tt>%a</tt>
-and <tt>%b</tt> are the two values that will undergo unsigned subtraction.</p>
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo unsigned subtraction.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
-an unsigned subtraction of the two arguments. They return a structure &mdash; the
-first element of which is the subtraction, and the second element of which is a bit
-specifying if the unsigned subtraction resulted in an overflow.</p>
+   an unsigned subtraction of the two arguments. They return a structure &mdash;
+   the first element of which is the subtraction, and the second element of
+   which is a bit specifying if the unsigned subtraction resulted in an
+   overflow.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6370,9 +6345,8 @@ specifying if the unsigned subtraction resulted in an overflow.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <p>This is an overloaded intrinsic. You can use <tt>llvm.smul.with.overflow</tt>
-on any integer bit width.</p>
+   on any integer bit width.</p>
 
 <pre>
   declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
@@ -6383,23 +6357,22 @@ on any integer bit width.</p>
 <h5>Overview:</h5>
 
 <p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
-a signed multiplication of the two arguments, and indicate whether an overflow
-occurred during the signed multiplication.</p>
+   a signed multiplication of the two arguments, and indicate whether an
+   overflow occurred during the signed multiplication.</p>
 
 <h5>Arguments:</h5>
-
 <p>The arguments (%a and %b) and the first element of the result structure may
-be of integer types of any bit width, but they must have the same bit width. The
-second element of the result structure must be of type <tt>i1</tt>. <tt>%a</tt>
-and <tt>%b</tt> are the two values that will undergo signed multiplication.</p>
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo signed multiplication.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
-a signed multiplication of the two arguments. They return a structure &mdash;
-the first element of which is the multiplication, and the second element of
-which is a bit specifying if the signed multiplication resulted in an
-overflow.</p>
+   a signed multiplication of the two arguments. They return a structure &mdash;
+   the first element of which is the multiplication, and the second element of
+   which is a bit specifying if the signed multiplication resulted in an
+   overflow.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6419,9 +6392,8 @@ overflow.</p>
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-
 <p>This is an overloaded intrinsic. You can use <tt>llvm.umul.with.overflow</tt>
-on any integer bit width.</p>
+   on any integer bit width.</p>
 
 <pre>
   declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
@@ -6430,26 +6402,23 @@ on any integer bit width.</p>
 </pre>
 
 <h5>Overview:</h5>
-
 <p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
-a unsigned multiplication of the two arguments, and indicate whether an overflow
-occurred during the unsigned multiplication.</p>
+   a unsigned multiplication of the two arguments, and indicate whether an
+   overflow occurred during the unsigned multiplication.</p>
 
 <h5>Arguments:</h5>
-
 <p>The arguments (%a and %b) and the first element of the result structure may
-be of integer types of any bit width, but they must have the same bit width. The
-second element of the result structure must be of type <tt>i1</tt>. <tt>%a</tt>
-and <tt>%b</tt> are the two values that will undergo unsigned
-multiplication.</p>
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo unsigned multiplication.</p>
 
 <h5>Semantics:</h5>
-
 <p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
-an unsigned multiplication of the two arguments. They return a structure &mdash;
-the first element of which is the multiplication, and the second element of
-which is a bit specifying if the unsigned multiplication resulted in an
-overflow.</p>
+   an unsigned multiplication of the two arguments. They return a structure
+   &mdash; the first element of which is the multiplication, and the second
+   element of which is a bit specifying if the unsigned multiplication resulted
+   in an overflow.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6467,14 +6436,13 @@ overflow.</p>
 </div>
 
 <div class="doc_text">
-<p>
-The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt> prefix),
-are described in the <a
-href="SourceLevelDebugging.html#format_common_intrinsics">LLVM Source Level
-Debugging</a> document.
-</p>
-</div>
 
+<p>The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt>
+   prefix), are described in
+   the <a href="SourceLevelDebugging.html#format_common_intrinsics">LLVM Source
+   Level Debugging</a> document.</p>
+
+</div>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
@@ -6482,10 +6450,12 @@ Debugging</a> document.
 </div>
 
 <div class="doc_text">
-<p> The LLVM exception handling intrinsics (which all start with
-<tt>llvm.eh.</tt> prefix), are described in the <a
-href="ExceptionHandling.html#format_common_intrinsics">LLVM Exception
-Handling</a> document. </p>
+
+<p>The LLVM exception handling intrinsics (which all start with
+   <tt>llvm.eh.</tt> prefix), are described in
+   the <a href="ExceptionHandling.html#format_common_intrinsics">LLVM Exception
+   Handling</a> document.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -6494,70 +6464,74 @@ Handling</a> document. </p>
 </div>
 
 <div class="doc_text">
-<p>
-  This intrinsic makes it possible to excise one parameter, marked with
-  the <tt>nest</tt> attribute, from a function.  The result is a callable
-  function pointer lacking the nest parameter - the caller does not need
-  to provide a value for it.  Instead, the value to use is stored in
-  advance in a "trampoline", a block of memory usually allocated
-  on the stack, which also contains code to splice the nest value into the
-  argument list.  This is used to implement the GCC nested function address
-  extension.
-</p>
-<p>
-  For example, if the function is
-  <tt>i32 f(i8* nest  %c, i32 %x, i32 %y)</tt> then the resulting function
-  pointer has signature <tt>i32 (i32, i32)*</tt>.  It can be created as follows:</p>
+
+<p>This intrinsic makes it possible to excise one parameter, marked with
+   the <tt>nest</tt> attribute, from a function.  The result is a callable
+   function pointer lacking the nest parameter - the caller does not need to
+   provide a value for it.  Instead, the value to use is stored in advance in a
+   "trampoline", a block of memory usually allocated on the stack, which also
+   contains code to splice the nest value into the argument list.  This is used
+   to implement the GCC nested function address extension.</p>
+
+<p>For example, if the function is
+   <tt>i32 f(i8* nest %c, i32 %x, i32 %y)</tt> then the resulting function
+   pointer has signature <tt>i32 (i32, i32)*</tt>.  It can be created as
+   follows:</p>
+
+<div class="doc_code">
 <pre>
   %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
   %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
   %p = call i8* @llvm.init.trampoline( i8* %tramp1, i8* bitcast (i32 (i8* nest , i32, i32)* @f to i8*), i8* %nval )
   %fp = bitcast i8* %p to i32 (i32, i32)*
 </pre>
-  <p>The call <tt>%val = call i32 %fp( i32 %x, i32 %y )</tt> is then equivalent
-  to <tt>%val = call i32 %f( i8* %nval, i32 %x, i32 %y )</tt>.</p>
+</div>
+
+<p>The call <tt>%val = call i32 %fp( i32 %x, i32 %y )</tt> is then equivalent
+   to <tt>%val = call i32 %f( i8* %nval, i32 %x, i32 %y )</tt>.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
 <pre>
-declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
+  declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  This fills the memory pointed to by <tt>tramp</tt> with code
-  and returns a function pointer suitable for executing it.
-</p>
+<p>This fills the memory pointed to by <tt>tramp</tt> with code and returns a
+   function pointer suitable for executing it.</p>
+
 <h5>Arguments:</h5>
-<p>
-  The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
-  pointers.  The <tt>tramp</tt> argument must point to a sufficiently large
-  and sufficiently aligned block of memory; this memory is written to by the
-  intrinsic.  Note that the size and the alignment are target-specific - LLVM
-  currently provides no portable way of determining them, so a front-end that
-  generates this intrinsic needs to have some target-specific knowledge.
-  The <tt>func</tt> argument must hold a function bitcast to an <tt>i8*</tt>.
-</p>
+<p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
+   pointers.  The <tt>tramp</tt> argument must point to a sufficiently large and
+   sufficiently aligned block of memory; this memory is written to by the
+   intrinsic.  Note that the size and the alignment are target-specific - LLVM
+   currently provides no portable way of determining them, so a front-end that
+   generates this intrinsic needs to have some target-specific knowledge.
+   The <tt>func</tt> argument must hold a function bitcast to
+   an <tt>i8*</tt>.</p>
+
 <h5>Semantics:</h5>
-<p>
-  The block of memory pointed to by <tt>tramp</tt> is filled with target
-  dependent code, turning it into a function.  A pointer to this function is
-  returned, but needs to be bitcast to an
-  <a href="#int_trampoline">appropriate function pointer type</a>
-  before being called.  The new function's signature is the same as that of
-  <tt>func</tt> with any arguments marked with the <tt>nest</tt> attribute
-  removed.  At most one such <tt>nest</tt> argument is allowed, and it must be
-  of pointer type.  Calling the new function is equivalent to calling
-  <tt>func</tt> with the same argument list, but with <tt>nval</tt> used for the
-  missing <tt>nest</tt> argument.  If, after calling
-  <tt>llvm.init.trampoline</tt>, the memory pointed to by <tt>tramp</tt> is
-  modified, then the effect of any later call to the returned function pointer is
-  undefined.
-</p>
+<p>The block of memory pointed to by <tt>tramp</tt> is filled with target
+   dependent code, turning it into a function.  A pointer to this function is
+   returned, but needs to be bitcast to an <a href="#int_trampoline">appropriate
+   function pointer type</a> before being called.  The new function's signature
+   is the same as that of <tt>func</tt> with any arguments marked with
+   the <tt>nest</tt> attribute removed.  At most one such <tt>nest</tt> argument
+   is allowed, and it must be of pointer type.  Calling the new function is
+   equivalent to calling <tt>func</tt> with the same argument list, but
+   with <tt>nval</tt> used for the missing <tt>nest</tt> argument.  If, after
+   calling <tt>llvm.init.trampoline</tt>, the memory pointed to
+   by <tt>tramp</tt> is modified, then the effect of any later call to the
+   returned function pointer is undefined.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -6566,27 +6540,25 @@ declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;n
 </div>
 
 <div class="doc_text">
-<p>
-  These intrinsic functions expand the "universal IR" of LLVM to represent 
-  hardware constructs for atomic operations and memory synchronization.  This 
-  provides an interface to the hardware, not an interface to the programmer. It 
-  is aimed at a low enough level to allow any programming models or APIs
-  (Application Programming Interfaces) which 
-  need atomic behaviors to map cleanly onto it. It is also modeled primarily on 
-  hardware behavior. Just as hardware provides a "universal IR" for source 
-  languages, it also provides a starting point for developing a "universal" 
-  atomic operation and synchronization IR.
-</p>
-<p>
-  These do <em>not</em> form an API such as high-level threading libraries, 
-  software transaction memory systems, atomic primitives, and intrinsic 
-  functions as found in BSD, GNU libc, atomic_ops, APR, and other system and 
-  application libraries.  The hardware interface provided by LLVM should allow 
-  a clean implementation of all of these APIs and parallel programming models. 
-  No one model or paradigm should be selected above others unless the hardware 
-  itself ubiquitously does so.
 
-</p>
+<p>These intrinsic functions expand the "universal IR" of LLVM to represent
+   hardware constructs for atomic operations and memory synchronization.  This
+   provides an interface to the hardware, not an interface to the programmer. It
+   is aimed at a low enough level to allow any programming models or APIs
+   (Application Programming Interfaces) which need atomic behaviors to map
+   cleanly onto it. It is also modeled primarily on hardware behavior. Just as
+   hardware provides a "universal IR" for source languages, it also provides a
+   starting point for developing a "universal" atomic operation and
+   synchronization IR.</p>
+
+<p>These do <em>not</em> form an API such as high-level threading libraries,
+   software transaction memory systems, atomic primitives, and intrinsic
+   functions as found in BSD, GNU libc, atomic_ops, APR, and other system and
+   application libraries.  The hardware interface provided by LLVM should allow
+   a clean implementation of all of these APIs and parallel programming models.
+   No one model or paradigm should be selected above others unless the hardware
+   itself ubiquitously does so.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -6596,59 +6568,56 @@ declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;n
 <div class="doc_text">
 <h5>Syntax:</h5>
 <pre>
-declare void @llvm.memory.barrier( i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;, 
-i1 &lt;device&gt; )
-
+  declare void @llvm.memory.barrier( i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;, i1 &lt;device&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between 
-  specific pairs of memory access types.
-</p>
+<p>The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between
+   specific pairs of memory access types.</p>
+
 <h5>Arguments:</h5>
-<p>
-  The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments. 
-  The first four arguments enables a specific barrier as listed below.  The fith
-  argument specifies that the barrier applies to io or device or uncached memory.
+<p>The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
+   The first four arguments enables a specific barrier as listed below.  The
+   fith argument specifies that the barrier applies to io or device or uncached
+   memory.</p>
+
+<ul>
+  <li><tt>ll</tt>: load-load barrier</li>
+  <li><tt>ls</tt>: load-store barrier</li>
+  <li><tt>sl</tt>: store-load barrier</li>
+  <li><tt>ss</tt>: store-store barrier</li>
+  <li><tt>device</tt>: barrier applies to device and uncached memory also.</li>
+</ul>
 
-</p>
-  <ul>
-    <li><tt>ll</tt>: load-load barrier</li>
-    <li><tt>ls</tt>: load-store barrier</li>
-    <li><tt>sl</tt>: store-load barrier</li>
-    <li><tt>ss</tt>: store-store barrier</li>
-    <li><tt>device</tt>: barrier applies to device and uncached memory also.</li>
-  </ul>
 <h5>Semantics:</h5>
-<p>
-  This intrinsic causes the system to enforce some ordering constraints upon 
-  the loads and stores of the program. This barrier does not indicate 
-  <em>when</em> any events will occur, it only enforces an <em>order</em> in 
-  which they occur. For any of the specified pairs of load and store operations 
-  (f.ex.  load-load, or store-load), all of the first operations preceding the 
-  barrier will complete before any of the second operations succeeding the 
-  barrier begin. Specifically the semantics for each pairing is as follows:
-</p>
-  <ul>
-    <li><tt>ll</tt>: All loads before the barrier must complete before any load 
-    after the barrier begins.</li>
-
-    <li><tt>ls</tt>: All loads before the barrier must complete before any 
-    store after the barrier begins.</li>
-    <li><tt>ss</tt>: All stores before the barrier must complete before any 
-    store after the barrier begins.</li>
-    <li><tt>sl</tt>: All stores before the barrier must complete before any 
-    load after the barrier begins.</li>
-  </ul>
-<p>
-  These semantics are applied with a logical "and" behavior when more than  one 
-  is enabled in a single memory barrier intrinsic.  
-</p>
-<p>
-  Backends may implement stronger barriers than those requested when they do not
-  support as fine grained a barrier as requested.  Some architectures do not
-  need all types of barriers and on such architectures, these become noops.
-</p>
+<p>This intrinsic causes the system to enforce some ordering constraints upon
+   the loads and stores of the program. This barrier does not
+   indicate <em>when</em> any events will occur, it only enforces
+   an <em>order</em> in which they occur. For any of the specified pairs of load
+   and store operations (f.ex.  load-load, or store-load), all of the first
+   operations preceding the barrier will complete before any of the second
+   operations succeeding the barrier begin. Specifically the semantics for each
+   pairing is as follows:</p>
+
+<ul>
+  <li><tt>ll</tt>: All loads before the barrier must complete before any load
+      after the barrier begins.</li>
+  <li><tt>ls</tt>: All loads before the barrier must complete before any 
+      store after the barrier begins.</li>
+  <li><tt>ss</tt>: All stores before the barrier must complete before any 
+      store after the barrier begins.</li>
+  <li><tt>sl</tt>: All stores before the barrier must complete before any 
+      load after the barrier begins.</li>
+</ul>
+
+<p>These semantics are applied with a logical "and" behavior when more than one
+   is enabled in a single memory barrier intrinsic.</p>
+
+<p>Backends may implement stronger barriers than those requested when they do
+   not support as fine grained a barrier as requested.  Some architectures do
+   not need all types of barriers and on such architectures, these become
+   noops.</p>
+
 <h5>Example:</h5>
 <pre>
 %ptr      = malloc i32
@@ -6659,50 +6628,48 @@ i1 &lt;device&gt; )
                                 <i>; guarantee the above finishes</i>
             store i32 8, %ptr   <i>; before this begins</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_atomic_cmp_swap">'<tt>llvm.atomic.cmp.swap.*</tt>' Intrinsic</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<p>
-  This is an overloaded intrinsic. You can use <tt>llvm.atomic.cmp.swap</tt> on
-  any integer bit width and for different address spaces. Not all targets
-  support all bit widths however.</p>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.cmp.swap</tt> on
+   any integer bit width and for different address spaces. Not all targets
+   support all bit widths however.</p>
 
 <pre>
-declare i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;cmp&gt;, i8 &lt;val&gt; )
-declare i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;cmp&gt;, i16 &lt;val&gt; )
-declare i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;cmp&gt;, i32 &lt;val&gt; )
-declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;cmp&gt;, i64 &lt;val&gt; )
-
+  declare i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;cmp&gt;, i8 &lt;val&gt; )
+  declare i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;cmp&gt;, i16 &lt;val&gt; )
+  declare i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;cmp&gt;, i32 &lt;val&gt; )
+  declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;cmp&gt;, i64 &lt;val&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  This loads a value in memory and compares it to a given value. If they are 
-  equal, it stores a new value into the memory.
-</p>
+<p>This loads a value in memory and compares it to a given value. If they are
+   equal, it stores a new value into the memory.</p>
+
 <h5>Arguments:</h5>
-<p>
-  The <tt>llvm.atomic.cmp.swap</tt> intrinsic takes three arguments. The result as 
-  well as both <tt>cmp</tt> and <tt>val</tt> must be integer values with the 
-  same bit width. The <tt>ptr</tt> argument must be a pointer to a value of 
-  this integer type. While any bit width integer may be used, targets may only 
-  lower representations they support in hardware.
+<p>The <tt>llvm.atomic.cmp.swap</tt> intrinsic takes three arguments. The result
+   as well as both <tt>cmp</tt> and <tt>val</tt> must be integer values with the
+   same bit width. The <tt>ptr</tt> argument must be a pointer to a value of
+   this integer type. While any bit width integer may be used, targets may only
+   lower representations they support in hardware.</p>
 
-</p>
 <h5>Semantics:</h5>
-<p>
-  This entire intrinsic must be executed atomically. It first loads the value 
-  in memory pointed to by <tt>ptr</tt> and compares it with the value 
-  <tt>cmp</tt>. If they are equal, <tt>val</tt> is stored into the memory. The 
-  loaded value is yielded in all cases. This provides the equivalent of an 
-  atomic compare-and-swap operation within the SSA framework.
-</p>
-<h5>Examples:</h5>
+<p>This entire intrinsic must be executed atomically. It first loads the value
+   in memory pointed to by <tt>ptr</tt> and compares it with the
+   value <tt>cmp</tt>. If they are equal, <tt>val</tt> is stored into the
+   memory. The loaded value is yielded in all cases. This provides the
+   equivalent of an atomic compare-and-swap operation within the SSA
+   framework.</p>
 
+<h5>Examples:</h5>
 <pre>
 %ptr      = malloc i32
             store i32 4, %ptr
@@ -6720,6 +6687,7 @@ declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;cmp&gt;,
 
 %memval2  = load i32* %ptr                <i>; yields {i32}:memval2 = 8</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -6729,38 +6697,33 @@ declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;cmp&gt;,
 <div class="doc_text">
 <h5>Syntax:</h5>
 
-<p>
-  This is an overloaded intrinsic. You can use <tt>llvm.atomic.swap</tt> on any 
-  integer bit width. Not all targets support all bit widths however.</p>
-<pre>
-declare i8 @llvm.atomic.swap.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;val&gt; )
-declare i16 @llvm.atomic.swap.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;val&gt; )
-declare i32 @llvm.atomic.swap.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;val&gt; )
-declare i64 @llvm.atomic.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;val&gt; )
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.swap</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
 
+<pre>
+  declare i8 @llvm.atomic.swap.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;val&gt; )
+  declare i16 @llvm.atomic.swap.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;val&gt; )
+  declare i32 @llvm.atomic.swap.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;val&gt; )
+  declare i64 @llvm.atomic.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;val&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  This intrinsic loads the value stored in memory at <tt>ptr</tt> and yields 
-  the value from memory. It then stores the value in <tt>val</tt> in the memory 
-  at <tt>ptr</tt>.
-</p>
+<p>This intrinsic loads the value stored in memory at <tt>ptr</tt> and yields
+   the value from memory. It then stores the value in <tt>val</tt> in the memory
+   at <tt>ptr</tt>.</p>
+
 <h5>Arguments:</h5>
+<p>The <tt>llvm.atomic.swap</tt> intrinsic takes two arguments. Both
+  the <tt>val</tt> argument and the result must be integers of the same bit
+  width.  The first argument, <tt>ptr</tt>, must be a pointer to a value of this
+  integer type. The targets may only lower integer representations they
+  support.</p>
 
-<p>
-  The <tt>llvm.atomic.swap</tt> intrinsic takes two arguments. Both the 
-  <tt>val</tt> argument and the result must be integers of the same bit width. 
-  The first argument, <tt>ptr</tt>, must be a pointer to a value of this 
-  integer type. The targets may only lower integer representations they 
-  support.
-</p>
 <h5>Semantics:</h5>
-<p>
-  This intrinsic loads the value pointed to by <tt>ptr</tt>, yields it, and 
-  stores <tt>val</tt> back into <tt>ptr</tt> atomically. This provides the 
-  equivalent of an atomic swap operation within the SSA framework.
+<p>This intrinsic loads the value pointed to by <tt>ptr</tt>, yields it, and
+   stores <tt>val</tt> back into <tt>ptr</tt> atomically. This provides the
+   equivalent of an atomic swap operation within the SSA framework.</p>
 
-</p>
 <h5>Examples:</h5>
 <pre>
 %ptr      = malloc i32
@@ -6779,6 +6742,7 @@ declare i64 @llvm.atomic.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;val&gt; )
 %stored2  = icmp eq i32 %result2, 8     <i>; yields {i1}:stored2 = true</i>
 %memval2  = load i32* %ptr              <i>; yields {i32}:memval2 = 2</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -6786,37 +6750,34 @@ declare i64 @llvm.atomic.swap.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;val&gt; )
   <a name="int_atomic_load_add">'<tt>llvm.atomic.load.add.*</tt>' Intrinsic</a>
 
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<p>
-  This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.add</tt> on any 
-  integer bit width. Not all targets support all bit widths however.</p>
-<pre>
-declare i8 @llvm.atomic.load.add.i8..p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.add.i16..p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.add.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.add.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.add</tt> on
+   any integer bit width. Not all targets support all bit widths however.</p>
 
+<pre>
+  declare i8 @llvm.atomic.load.add.i8..p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.add.i16..p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.add.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.add.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  This intrinsic adds <tt>delta</tt> to the value stored in memory at 
-  <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.
-</p>
+<p>This intrinsic adds <tt>delta</tt> to the value stored in memory
+   at <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
+
 <h5>Arguments:</h5>
-<p>
+<p>The intrinsic takes two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
 
-  The intrinsic takes two arguments, the first a pointer to an integer value 
-  and the second an integer value. The result is also an integer value. These 
-  integer types can have any bit width, but they must all have the same bit 
-  width. The targets may only lower integer representations they support.
-</p>
 <h5>Semantics:</h5>
-<p>
-  This intrinsic does a series of operations atomically. It first loads the 
-  value stored at <tt>ptr</tt>. It then adds <tt>delta</tt>, stores the result 
-  to <tt>ptr</tt>. It yields the original value stored at <tt>ptr</tt>.
-</p>
+<p>This intrinsic does a series of operations atomically. It first loads the
+   value stored at <tt>ptr</tt>. It then adds <tt>delta</tt>, stores the result
+   to <tt>ptr</tt>. It yields the original value stored at <tt>ptr</tt>.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6830,6 +6791,7 @@ declare i64 @llvm.atomic.load.add.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt
                                 <i>; yields {i32}:result3 = 10</i>
 %memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = 15</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -6837,38 +6799,36 @@ declare i64 @llvm.atomic.load.add.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt
   <a name="int_atomic_load_sub">'<tt>llvm.atomic.load.sub.*</tt>' Intrinsic</a>
 
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<p>
-  This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.sub</tt> on
-  any integer bit width and for different address spaces. Not all targets
-  support all bit widths however.</p>
-<pre>
-declare i8 @llvm.atomic.load.sub.i8.p0i32( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.sub.i16.p0i32( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.sub.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.sub</tt> on
+   any integer bit width and for different address spaces. Not all targets
+   support all bit widths however.</p>
 
+<pre>
+  declare i8 @llvm.atomic.load.sub.i8.p0i32( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.sub.i16.p0i32( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.sub.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  This intrinsic subtracts <tt>delta</tt> to the value stored in memory at 
-  <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.
-</p>
+<p>This intrinsic subtracts <tt>delta</tt> to the value stored in memory at 
+   <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
+
 <h5>Arguments:</h5>
-<p>
+<p>The intrinsic takes two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
 
-  The intrinsic takes two arguments, the first a pointer to an integer value 
-  and the second an integer value. The result is also an integer value. These 
-  integer types can have any bit width, but they must all have the same bit 
-  width. The targets may only lower integer representations they support.
-</p>
 <h5>Semantics:</h5>
-<p>
-  This intrinsic does a series of operations atomically. It first loads the 
-  value stored at <tt>ptr</tt>. It then subtracts <tt>delta</tt>, stores the
-  result to <tt>ptr</tt>. It yields the original value stored at <tt>ptr</tt>.
-</p>
+<p>This intrinsic does a series of operations atomically. It first loads the
+   value stored at <tt>ptr</tt>. It then subtracts <tt>delta</tt>, stores the
+   result to <tt>ptr</tt>. It yields the original value stored
+   at <tt>ptr</tt>.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6882,6 +6842,7 @@ declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt;
                                 <i>; yields {i32}:result3 = 2</i>
 %memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = -3</i>
 </pre>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -6890,67 +6851,61 @@ declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt;
   <a name="int_atomic_load_nand">'<tt>llvm.atomic.load.nand.*</tt>' Intrinsic</a><br>
   <a name="int_atomic_load_or">'<tt>llvm.atomic.load.or.*</tt>' Intrinsic</a><br>
   <a name="int_atomic_load_xor">'<tt>llvm.atomic.load.xor.*</tt>' Intrinsic</a><br>
-
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<p>
-  These are overloaded intrinsics. You can use <tt>llvm.atomic.load_and</tt>,
-  <tt>llvm.atomic.load_nand</tt>, <tt>llvm.atomic.load_or</tt>, and
-  <tt>llvm.atomic.load_xor</tt> on any integer bit width and for different
-  address spaces. Not all targets support all bit widths however.</p>
-<pre>
-declare i8 @llvm.atomic.load.and.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.and.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.and.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.and.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+<p>These are overloaded intrinsics. You can
+  use <tt>llvm.atomic.load_and</tt>, <tt>llvm.atomic.load_nand</tt>,
+  <tt>llvm.atomic.load_or</tt>, and <tt>llvm.atomic.load_xor</tt> on any integer
+  bit width and for different address spaces. Not all targets support all bit
+  widths however.</p>
 
+<pre>
+  declare i8 @llvm.atomic.load.and.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.and.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.and.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.and.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
-declare i8 @llvm.atomic.load.or.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.or.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.or.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.or.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
-
+  declare i8 @llvm.atomic.load.or.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.or.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.or.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.or.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
-declare i8 @llvm.atomic.load.nand.i8.p0i32( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.nand.i16.p0i32( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.nand.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.nand.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
-
+  declare i8 @llvm.atomic.load.nand.i8.p0i32( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.nand.i16.p0i32( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.nand.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.nand.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
-declare i8 @llvm.atomic.load.xor.i8.p0i32( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.xor.i16.p0i32( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.xor.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
-
+  declare i8 @llvm.atomic.load.xor.i8.p0i32( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.xor.i16.p0i32( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.xor.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  These intrinsics bitwise the operation (and, nand, or, xor) <tt>delta</tt> to
-  the value stored in memory at <tt>ptr</tt>. It yields the original value
-  at <tt>ptr</tt>.
-</p>
+<p>These intrinsics bitwise the operation (and, nand, or, xor) <tt>delta</tt> to
+   the value stored in memory at <tt>ptr</tt>. It yields the original value
+   at <tt>ptr</tt>.</p>
+
 <h5>Arguments:</h5>
-<p>
+<p>These intrinsics take two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
 
-  These intrinsics take two arguments, the first a pointer to an integer value 
-  and the second an integer value. The result is also an integer value. These 
-  integer types can have any bit width, but they must all have the same bit 
-  width. The targets may only lower integer representations they support.
-</p>
 <h5>Semantics:</h5>
-<p>
-  These intrinsics does a series of operations atomically. They first load the 
-  value stored at <tt>ptr</tt>. They then do the bitwise operation
-  <tt>delta</tt>, store the result to <tt>ptr</tt>. They yield the original
-  value stored at <tt>ptr</tt>.
-</p>
+<p>These intrinsics does a series of operations atomically. They first load the
+   value stored at <tt>ptr</tt>. They then do the bitwise
+   operation <tt>delta</tt>, store the result to <tt>ptr</tt>. They yield the
+   original value stored at <tt>ptr</tt>.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -6966,8 +6921,8 @@ declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt;
                                 <i>; yields {i32}:result3 = FF</i>
 %memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = F0</i>
 </pre>
-</div>
 
+</div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
@@ -6975,68 +6930,60 @@ declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* &lt;ptr&gt;, i64 &lt;delta&gt;
   <a name="int_atomic_load_min">'<tt>llvm.atomic.load.min.*</tt>' Intrinsic</a><br>
   <a name="int_atomic_load_umax">'<tt>llvm.atomic.load.umax.*</tt>' Intrinsic</a><br>
   <a name="int_atomic_load_umin">'<tt>llvm.atomic.load.umin.*</tt>' Intrinsic</a><br>
-
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
-<p>
-  These are overloaded intrinsics. You can use <tt>llvm.atomic.load_max</tt>,
-  <tt>llvm.atomic.load_min</tt>, <tt>llvm.atomic.load_umax</tt>, and
-  <tt>llvm.atomic.load_umin</tt> on any integer bit width and for different
-  address spaces. Not all targets
-  support all bit widths however.</p>
-<pre>
-declare i8 @llvm.atomic.load.max.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.max.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.max.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.max.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+<p>These are overloaded intrinsics. You can use <tt>llvm.atomic.load_max</tt>,
+   <tt>llvm.atomic.load_min</tt>, <tt>llvm.atomic.load_umax</tt>, and
+   <tt>llvm.atomic.load_umin</tt> on any integer bit width and for different
+   address spaces. Not all targets support all bit widths however.</p>
 
+<pre>
+  declare i8 @llvm.atomic.load.max.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.max.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.max.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.max.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
-declare i8 @llvm.atomic.load.min.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.min.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.min.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.min.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
-
+  declare i8 @llvm.atomic.load.min.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.min.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.min.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.min.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
-declare i8 @llvm.atomic.load.umax.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.umax.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.umax.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.umax.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
-
+  declare i8 @llvm.atomic.load.umax.i8.p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.umax.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.umax.i32.p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.umax.i64.p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
 
 <pre>
-declare i8 @llvm.atomic.load.umin.i8..p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
-declare i16 @llvm.atomic.load.umin.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
-declare i32 @llvm.atomic.load.umin.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
-declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
-
+  declare i8 @llvm.atomic.load.umin.i8..p0i8( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+  declare i16 @llvm.atomic.load.umin.i16.p0i16( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+  declare i32 @llvm.atomic.load.umin.i32..p0i32( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+  declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  These intrinsics takes the signed or unsigned minimum or maximum of 
-  <tt>delta</tt> and the value stored in memory at <tt>ptr</tt>. It yields the
-  original value at <tt>ptr</tt>.
-</p>
+<p>These intrinsics takes the signed or unsigned minimum or maximum of 
+   <tt>delta</tt> and the value stored in memory at <tt>ptr</tt>. It yields the
+   original value at <tt>ptr</tt>.</p>
+
 <h5>Arguments:</h5>
-<p>
+<p>These intrinsics take two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
 
-  These intrinsics take two arguments, the first a pointer to an integer value 
-  and the second an integer value. The result is also an integer value. These 
-  integer types can have any bit width, but they must all have the same bit 
-  width. The targets may only lower integer representations they support.
-</p>
 <h5>Semantics:</h5>
-<p>
-  These intrinsics does a series of operations atomically. They first load the 
-  value stored at <tt>ptr</tt>. They then do the signed or unsigned min or max
-  <tt>delta</tt> and the value, store the result to <tt>ptr</tt>. They yield
-  the original value stored at <tt>ptr</tt>.
-</p>
+<p>These intrinsics does a series of operations atomically. They first load the
+   value stored at <tt>ptr</tt>. They then do the signed or unsigned min or
+   max <tt>delta</tt> and the value, store the result to <tt>ptr</tt>. They
+   yield the original value stored at <tt>ptr</tt>.</p>
 
 <h5>Examples:</h5>
 <pre>
@@ -7052,6 +6999,134 @@ declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&g
                                 <i>; yields {i32}:result3 = 8</i>
 %memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = 30</i>
 </pre>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_memorymarkers">Memory Use Markers</a>
+</div>
+
+<div class="doc_text">
+
+<p>This class of intrinsics exists to information about the lifetime of memory
+   objects and ranges where variables are immutable.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.lifetime.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.lifetime.start</tt>' intrinsic specifies the start of a memory
+   object's lifetime.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized.  The second argument is a pointer to
+   the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that before this point in the code, the value of the
+   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
+   never be used and has an undefined value.  A load from the pointer that is
+   preceded by this intrinsic can be replaced with
+   <tt>'<a href="#undefvalues">undef</a>'</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.lifetime.end(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.lifetime.end</tt>' intrinsic specifies the end of a memory
+   object's lifetime.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized.  The second argument is a pointer to
+   the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that after this point in the code, the value of the
+   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
+   never be used and has an undefined value.  Any stores into the memory object
+   following this intrinsic may be removed as dead.
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare {}* @llvm.invariant.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;) readonly
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.invariant.start</tt>' intrinsic specifies that the contents of
+   a memory object will not change.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized.  The second argument is a pointer to
+   the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that until an <tt>llvm.invariant.end</tt> that uses
+   the return value, the referenced memory location is constant and
+   unchanging.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.invariant.end({}* &lt;start&gt;, i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.invariant.end</tt>' intrinsic specifies that the contents of
+   a memory object are mutable.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is the matching <tt>llvm.invariant.start</tt> intrinsic.
+   The second argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized and the third argument is a pointer
+   to the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that the memory is mutable again.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -7060,8 +7135,10 @@ declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* &lt;ptr&gt;, i64 &lt;delta&g
 </div>
 
 <div class="doc_text">
-<p> This class of intrinsics is designed to be generic and has
-no specific purpose. </p>
+
+<p>This class of intrinsics is designed to be generic and has no specific
+   purpose.</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -7077,27 +7154,19 @@ no specific purpose. </p>
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.var.annotation</tt>' intrinsic
-</p>
+<p>The '<tt>llvm.var.annotation</tt>' intrinsic.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The first argument is a pointer to a value, the second is a pointer to a 
-global string, the third is a pointer to a global string which is the source 
-file name, and the last argument is the line number.
-</p>
+<p>The first argument is a pointer to a value, the second is a pointer to a
+   global string, the third is a pointer to a global string which is the source
+   file name, and the last argument is the line number.</p>
 
 <h5>Semantics:</h5>
+<p>This intrinsic allows annotation of local variables with arbitrary strings.
+   This can be useful for special purpose optimizations that want to look for
+   these annotations.  These have no other defined use, they are ignored by code
+   generation and optimization.</p>
 
-<p>
-This intrinsic allows annotation of local variables with arbitrary strings.
-This can be useful for special purpose optimizations that want to look for these
-annotations.  These have no other defined use, they are ignored by code
-generation and optimization.
-</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -7108,9 +7177,9 @@ generation and optimization.
 <div class="doc_text">
 
 <h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use '<tt>llvm.annotation</tt>' on 
-any integer bit width. 
-</p>
+<p>This is an overloaded intrinsic. You can use '<tt>llvm.annotation</tt>' on
+   any integer bit width.</p>
+
 <pre>
   declare i8 @llvm.annotation.i8(i8 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt; )
   declare i16 @llvm.annotation.i16(i16 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt; )
@@ -7120,28 +7189,20 @@ any integer bit width.
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.annotation</tt>' intrinsic.
-</p>
+<p>The '<tt>llvm.annotation</tt>' intrinsic.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-The first argument is an integer value (result of some expression), 
-the second is a pointer to a global string, the third is a pointer to a global 
-string which is the source file name, and the last argument is the line number.
-It returns the value of the first argument.
-</p>
+<p>The first argument is an integer value (result of some expression), the
+   second is a pointer to a global string, the third is a pointer to a global
+   string which is the source file name, and the last argument is the line
+   number.  It returns the value of the first argument.</p>
 
 <h5>Semantics:</h5>
+<p>This intrinsic allows annotations to be put on arbitrary expressions with
+   arbitrary strings.  This can be useful for special purpose optimizations that
+   want to look for these annotations.  These have no other defined use, they
+   are ignored by code generation and optimization.</p>
 
-<p>
-This intrinsic allows annotations to be put on arbitrary expressions
-with arbitrary strings.  This can be useful for special purpose optimizations 
-that want to look for these annotations.  These have no other defined use, they 
-are ignored by code generation and optimization.
-</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -7157,58 +7218,50 @@ are ignored by code generation and optimization.
 </pre>
 
 <h5>Overview:</h5>
-
-<p>
-The '<tt>llvm.trap</tt>' intrinsic
-</p>
+<p>The '<tt>llvm.trap</tt>' intrinsic.</p>
 
 <h5>Arguments:</h5>
-
-<p>
-None
-</p>
+<p>None.</p>
 
 <h5>Semantics:</h5>
+<p>This intrinsics is lowered to the target dependent trap instruction. If the
+   target does not have a trap instruction, this intrinsic will be lowered to
+   the call of the <tt>abort()</tt> function.</p>
 
-<p>
-This intrinsics is lowered to the target dependent trap instruction. If the
-target does not have a trap instruction, this intrinsic will be lowered to the
-call of the abort() function.
-</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="int_stackprotector">'<tt>llvm.stackprotector</tt>' Intrinsic</a>
 </div>
+
 <div class="doc_text">
+
 <h5>Syntax:</h5>
 <pre>
-declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
-
+  declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
 </pre>
+
 <h5>Overview:</h5>
-<p>
-  The <tt>llvm.stackprotector</tt> intrinsic takes the <tt>guard</tt> and stores
-  it onto the stack at <tt>slot</tt>. The stack slot is adjusted to ensure that
-  it is placed on the stack before local variables.
-</p>
+<p>The <tt>llvm.stackprotector</tt> intrinsic takes the <tt>guard</tt> and
+   stores it onto the stack at <tt>slot</tt>. The stack slot is adjusted to
+   ensure that it is placed on the stack before local variables.</p>
+
 <h5>Arguments:</h5>
-<p>
-  The <tt>llvm.stackprotector</tt> intrinsic requires two pointer arguments. The
-  first argument is the value loaded from the stack guard
-  <tt>@__stack_chk_guard</tt>. The second variable is an <tt>alloca</tt> that
-  has enough space to hold the value of the guard.
-</p>
+<p>The <tt>llvm.stackprotector</tt> intrinsic requires two pointer
+   arguments. The first argument is the value loaded from the stack
+   guard <tt>@__stack_chk_guard</tt>. The second variable is an <tt>alloca</tt>
+   that has enough space to hold the value of the guard.</p>
+
 <h5>Semantics:</h5>
-<p>
-  This intrinsic causes the prologue/epilogue inserter to force the position of
-  the <tt>AllocaInst</tt> stack slot to be before local variables on the
-  stack. This is to ensure that if a local variable on the stack is overwritten,
-  it will destroy the value of the guard. When the function exits, the guard on
-  the stack is checked against the original guard. If they're different, then
-  the program aborts by calling the <tt>__stack_chk_fail()</tt> function.
-</p>
+<p>This intrinsic causes the prologue/epilogue inserter to force the position of
+   the <tt>AllocaInst</tt> stack slot to be before local variables on the
+   stack. This is to ensure that if a local variable on the stack is
+   overwritten, it will destroy the value of the guard. When the function exits,
+   the guard on the stack is checked against the original guard. If they're
+   different, then the program aborts by calling the <tt>__stack_chk_fail()</tt>
+   function.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -7221,7 +7274,7 @@ declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-20 13:26:06 +0000 (Sat, 20 Jun 2009) $
+  Last modified: $Date: 2009-10-13 23:56:55 +0200 (Tue, 13 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
index c9d1e190eab0..0934b47cbc96 100644
--- a/docs/LinkTimeOptimization.html
+++ b/docs/LinkTimeOptimization.html
@@ -166,7 +166,7 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
     provided by the linker on various platform are not unique. This means, 
     this new tool needs to support all such features and platforms in one 
     super tool or a separate tool per platform is required. This increases 
-    maintance cost for link time optimizer significantly, which is not 
+    maintenance cost for link time optimizer significantly, which is not 
     necessary. This approach also requires staying synchronized with linker 
     developements on various platforms, which is not the main focus of the link 
     time optimizer. Finally, this approach increases end user's build time due 
@@ -189,7 +189,7 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
   user-supplied information, such as a list of exported symbols. LLVM 
   optimizer collects control flow information, data flow information and knows 
   much more about program structure from the optimizer's point of view. 
-  Our goal is to take advantage of tight intergration between the linker and 
+  Our goal is to take advantage of tight integration between the linker and 
   the optimizer by sharing this information during various linking phases.
 </p>
 </div>
@@ -382,7 +382,7 @@ of the native object files.</p>
 
   Devang Patel and Nick Kledzik<br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-12-16 04:07:49 +0100 (Tue, 16 Dec 2008) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
index 39a04f7c84d1..36a4725edec3 100644
--- a/docs/MakefileGuide.html
+++ b/docs/MakefileGuide.html
@@ -261,7 +261,7 @@
 <!-- ======================================================================= -->
 <div class="doc_subsubsection"><a name="BCModules">Bitcode Modules</a></div>
 <div class="doc_text">
-  <p>In some situations, it is desireable to build a single bitcode module from
+  <p>In some situations, it is desirable to build a single bitcode module from
   a variety of sources, instead of an archive, shared library, or bitcode 
   library. Bitcode modules can be specified in addition to any of the other
   types of libraries by defining the <a href="#MODULE_NAME">MODULE_NAME</a>
@@ -626,6 +626,11 @@
     <dd>If set to any value, causes a bitcode library (.bc) to be built.</dd>
     <dt><a name="CONFIG_FILES"><tt>CONFIG_FILES</tt></a></dt>
     <dd>Specifies a set of configuration files to be installed.</dd>
+    <dt><a name="DEBUG_SYMBOLS"><tt>DEBUG_SYMBOLS</tt></a></dt>
+    <dd>If set to any value, causes the build to include debugging
+    symbols even in optimized objects, libraries and executables. This
+    alters the flags specified to the compilers and linkers. Debugging
+    isn't fun in an optimized build, but it is possible.</dd>
     <dt><a name="DIRS"><tt>DIRS</tt></a></dt>
     <dd>Specifies a set of directories, usually children of the current
     directory, that should also be made using the same goal. These directories 
@@ -1016,7 +1021,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-16 23:00:42 +0000 (Tue, 16 Jun 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/Passes.html b/docs/Passes.html
index 5406be5e2df6..48f5adf62ef8 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -78,7 +78,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#anders-aa">-anders-aa</a></td><td>Andersen's Interprocedural Alias Analysis</td></tr>
 <tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (default AA impl)</td></tr>
 <tr><td><a href="#basiccg">-basiccg</a></td><td>Basic CallGraph Construction</td></tr>
-<tr><td><a href="#basicvn">-basicvn</a></td><td>Basic Value Numbering (default GVN impl)</td></tr>
 <tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
 <tr><td><a href="#count-aa">-count-aa</a></td><td>Count Alias Analysis Query Responses</td></tr>
 <tr><td><a href="#debug-aa">-debug-aa</a></td><td>AA use debugger</td></tr>
@@ -90,7 +89,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#globalsmodref-aa">-globalsmodref-aa</a></td><td>Simple mod/ref analysis for globals</td></tr>
 <tr><td><a href="#instcount">-instcount</a></td><td>Counts the various types of Instructions</td></tr>
 <tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr>
-<tr><td><a href="#load-vn">-load-vn</a></td><td>Load Value Numbering</td></tr>
 <tr><td><a href="#loops">-loops</a></td><td>Natural Loop Construction</td></tr>
 <tr><td><a href="#memdep">-memdep</a></td><td>Memory Dependence Analysis</td></tr>
 <tr><td><a href="#no-aa">-no-aa</a></td><td>No Alias Analysis (always returns 'may' alias)</td></tr>
@@ -125,11 +123,9 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#deadtypeelim">-deadtypeelim</a></td><td>Dead Type Elimination</td></tr>
 <tr><td><a href="#die">-die</a></td><td>Dead Instruction Elimination</td></tr>
 <tr><td><a href="#dse">-dse</a></td><td>Dead Store Elimination</td></tr>
-<tr><td><a href="#gcse">-gcse</a></td><td>Global Common Subexpression Elimination</td></tr>
 <tr><td><a href="#globaldce">-globaldce</a></td><td>Dead Global Elimination</td></tr>
 <tr><td><a href="#globalopt">-globalopt</a></td><td>Global Variable Optimizer</td></tr>
 <tr><td><a href="#gvn">-gvn</a></td><td>Global Value Numbering</td></tr>
-<tr><td><a href="#gvnpre">-gvnpre</a></td><td>Global Value Numbering/Partial Redundancy Elimination</td></tr>
 <tr><td><a href="#indmemrem">-indmemrem</a></td><td>Indirect Malloc and Free Removal</td></tr>
 <tr><td><a href="#indvars">-indvars</a></td><td>Canonicalize Induction Variables</td></tr>
 <tr><td><a href="#inline">-inline</a></td><td>Function Integration/Inlining</td></tr>
@@ -161,7 +157,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#mem2reg">-mem2reg</a></td><td>Promote Memory to Register</td></tr>
 <tr><td><a href="#memcpyopt">-memcpyopt</a></td><td>Optimize use of memcpy and friends</td></tr>
 <tr><td><a href="#mergereturn">-mergereturn</a></td><td>Unify function exit nodes</td></tr>
-<tr><td><a href="#predsimplify">-predsimplify</a></td><td>Predicate Simplifier</td></tr>
 <tr><td><a href="#prune-eh">-prune-eh</a></td><td>Remove unused exception handling info</td></tr>
 <tr><td><a href="#raiseallocs">-raiseallocs</a></td><td>Raise allocations from calls to instructions</td></tr>
 <tr><td><a href="#reassociate">-reassociate</a></td><td>Reassociate expressions</td></tr>
@@ -274,6 +269,12 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
   located at <code>getNode(<var>F</var>) + CallReturnPos</code>. The arguments 
   start at <code>getNode(<var>F</var>) + CallArgPos</code>.
   </p>
+
+  <p>
+  Please keep in mind that the current andersen's pass has many known
+  problems and bugs.  It should be considered "research quality".
+  </p>
+
 </div>
 
 <!-------------------------------------------------------------------------- -->
@@ -298,25 +299,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="basicvn">Basic Value Numbering (default Value Numbering impl)</a>
-</div>
-<div class="doc_text">
-  <p>
-  This is the default implementation of the <code>ValueNumbering</code>
-  interface.  It walks the SSA def-use chains to trivially identify
-  lexically identical expressions.  This does not require any ahead of time
-  analysis, so it is a very fast default implementation.
-  </p>
-  <p>
-  The ValueNumbering analysis passes are mostly deprecated. They are only used
-  by the <a href="#gcse">Global Common Subexpression Elimination pass</a>, which
-  is deprecated by the <a href="#gvn">Global Value Numbering pass</a> (which
-  does its value numbering on its own).
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
   <a name="codegenprepare">Optimize for code generation</a>
 </div>
 <div class="doc_text">
@@ -455,28 +437,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="load-vn">Load Value Numbering</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass value numbers load and call instructions.  To do this, it finds
-  lexically identical load instructions, and uses alias analysis to determine
-  which loads are guaranteed to produce the same value.  To value number call
-  instructions, it looks for calls to functions that do not write to memory
-  which do not have intervening instructions that clobber the memory that is
-  read from.
-  </p>
-  
-  <p>
-  This pass builds off of another value numbering pass to implement value
-  numbering for non-load and non-call instructions.  It uses Alias Analysis so
-  that it can disambiguate the load instructions.  The more powerful these base
-  analyses are, the more powerful the resultant value numbering will be.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
   <a name="loops">Natural Loop Construction</a>
 </div>
 <div class="doc_text">
@@ -859,23 +819,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="gcse">Global Common Subexpression Elimination</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass is designed to be a very quick global transformation that
-  eliminates global common subexpressions from a function.  It does this by
-  using an existing value numbering analysis pass to identify the common
-  subexpressions, eliminating them when possible.
-  </p>
-  <p>
-  This pass is deprecated by the <a href="#gvn">Global Value Numbering pass</a>
-  (which does a better job with its own value numbering).
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
   <a name="globaldce">Dead Global Elimination</a>
 </div>
 <div class="doc_text">
@@ -906,35 +849,11 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 <div class="doc_text">
   <p>
-  This pass performs global value numbering to eliminate fully redundant
-  instructions.  It also performs simple dead load elimination.
-  </p>
-  <p>
-  Note that this pass does the value numbering itself, it does not use the
-  ValueNumbering analysis passes.
+  This pass performs global value numbering to eliminate fully and partially
+  redundant instructions.  It also performs redundant load elimination.
   </p>
 </div>
 
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="gvnpre">Global Value Numbering/Partial Redundancy Elimination</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass performs a hybrid of global value numbering and partial redundancy
-  elimination, known as GVN-PRE.  It performs partial redundancy elimination on
-  values, rather than lexical expressions, allowing a more comprehensive view 
-  the optimization.  It replaces redundant values with uses of earlier 
-  occurences of the same value.  While this is beneficial in that it eliminates
-  unneeded computation, it also increases register pressure by creating large
-  live ranges, and should be used with caution on platforms that are very 
-  sensitive to register pressure.
-  </p>
-  <p>
-  Note that this pass does the value numbering itself, it does not use the
-  ValueNumbering analysis passes.
-  </p>
-</div>
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
@@ -1572,28 +1491,6 @@ if (X &lt; 3) {</pre>
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="predsimplify">Predicate Simplifier</a>
-</div>
-<div class="doc_text">
-  <p>
-  Path-sensitive optimizer. In a branch where <tt>x == y</tt>, replace uses of
-  <tt>x</tt> with <tt>y</tt>. Permits further optimization, such as the 
-  elimination of the unreachable call:
-  </p>
-  
-<blockquote><pre
->void test(int *p, int *q)
-{
-  if (p != q)
-    return;
-
-  if (*p != *q)
-    foo(); // unreachable
-}</pre></blockquote>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
   <a name="prune-eh">Remove unused exception handling info</a>
 </div>
 <div class="doc_text">
@@ -1647,7 +1544,7 @@ if (X &lt; 3) {</pre>
   <p>
   This file demotes all registers to memory references.  It is intented to be
   the inverse of <a href="#mem2reg"><tt>-mem2reg</tt></a>.  By converting to
-  <tt>load</tt> instructions, the only values live accross basic blocks are
+  <tt>load</tt> instructions, the only values live across basic blocks are
   <tt>alloca</tt> instructions and <tt>load</tt> instructions before
   <tt>phi</tt> nodes. It is intended that this should make CFG hacking much 
   easier. To make later hacking easier, the entry block is split into two, such
@@ -1963,7 +1860,7 @@ if (X &lt; 3) {</pre>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index b45a60b7611d..3234554f7cae 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -29,6 +29,13 @@
     <ul>
       <li><a href="#isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt>
 and <tt>dyn_cast&lt;&gt;</tt> templates</a> </li>
+      <li><a href="#string_apis">Passing strings (the <tt>StringRef</tt>
+and <tt>Twine</tt> classes)</a>
+        <ul>
+          <li><a href="#StringRef">The <tt>StringRef</tt> class</a> </li>
+          <li><a href="#Twine">The <tt>Twine</tt> class</a> </li>
+        </ul>
+      </li>
       <li><a href="#DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt>
 option</a>
         <ul>
@@ -79,6 +86,10 @@ option</a></li>
       <li><a href="#dss_map">&lt;map&gt;</a></li>
       <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
     </ul></li>
+    <li><a href="#ds_string">String-like containers</a>
+    <!--<ul>
+       todo
+    </ul>--></li>
     <li><a href="#ds_bit">BitVector-like containers</a>
     <ul>
       <li><a href="#dss_bitvector">A dense bitvector</a></li>
@@ -136,6 +147,7 @@ with another <tt>Value</tt></a> </li>
         </a></li>
     <li><a href="#shutdown">Ending execution with <tt>llvm_shutdown()</tt></a></li>
     <li><a href="#managedstatic">Lazy initialization with <tt>ManagedStatic</tt></a></li>
+    <li><a href="#llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a></li>
   </ul>
   </li>
 
@@ -424,6 +436,107 @@ are lots of examples in the LLVM source base.</p>
 
 </div>
 
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="string_apis">Passing strings (the <tt>StringRef</tt>
+and <tt>Twine</tt> classes)</a>
+</div>
+
+<div class="doc_text">
+
+<p>Although LLVM generally does not do much string manipulation, we do have
+several important APIs which take strings.  Two important examples are the
+Value class -- which has names for instructions, functions, etc. -- and the
+StringMap class which is used extensively in LLVM and Clang.</p>
+
+<p>These are generic classes, and they need to be able to accept strings which
+may have embedded null characters.  Therefore, they cannot simply take
+a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
+clients to perform a heap allocation which is usually unnecessary.  Instead,
+many LLVM APIs use a <tt>const StringRef&amp;</tt> or a <tt>const 
+Twine&amp;</tt> for passing strings efficiently.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="StringRef">The <tt>StringRef</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>StringRef</tt> data type represents a reference to a constant string
+(a character array and a length) and supports the common operations available
+on <tt>std:string</tt>, but does not require heap allocation.</p>
+
+<p>It can be implicitly constructed using a C style null-terminated string,
+an <tt>std::string</tt>, or explicitly with a character pointer and length.
+For example, the <tt>StringRef</tt> find function is declared as:</p>
+
+<div class="doc_code">
+  iterator find(const StringRef &amp;Key);
+</div>
+
+<p>and clients can call it using any one of:</p>
+
+<div class="doc_code">
+<pre>
+  Map.find("foo");                 <i>// Lookup "foo"</i>
+  Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
+  Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
+</pre>
+</div>
+
+<p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
+instance, which can be used directly or converted to an <tt>std::string</tt>
+using the <tt>str</tt> member function.  See 
+"<tt><a href="/doxygen/classllvm_1_1StringRef_8h-source.html">llvm/ADT/StringRef.h</a></tt>"
+for more information.</p>
+
+<p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
+pointers to external memory it is not generally safe to store an instance of the
+class (unless you know that the external storage will not be freed).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="Twine">The <tt>Twine</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>Twine</tt> class is an efficient way for APIs to accept concatenated
+strings.  For example, a common LLVM paradigm is to name one instruction based on
+the name of another instruction with a suffix, for example:</p>
+
+<div class="doc_code">
+<pre>
+    New = CmpInst::Create(<i>...</i>, SO->getName() + ".cmp");
+</pre>
+</div>
+
+<p>The <tt>Twine</tt> class is effectively a
+lightweight <a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
+which points to temporary (stack allocated) objects.  Twines can be implicitly
+constructed as the result of the plus operator applied to strings (i.e., a C
+strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays the
+actual concatentation of strings until it is actually required, at which point
+it can be efficiently rendered directly into a character array.  This avoids
+unnecessary heap allocation involved in constructing the temporary results of
+string concatenation. See
+"<tt><a href="/doxygen/classllvm_1_1Twine_8h-source.html">llvm/ADT/Twine.h</a></tt>"
+for more information.</p>
+
+<p>As with a <tt>StringRef</tt>, <tt>Twine</tt> objects point to external memory
+and should almost never be stored or mentioned directly.  They are intended
+solely for use when defining a function which should be able to efficiently
+accept concatenated strings.</p>
+
+</div>
+
+
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
@@ -448,7 +561,7 @@ tool) is run with the '<tt>-debug</tt>' command line argument:</p>
 
 <div class="doc_code">
 <pre>
-DOUT &lt;&lt; "I am here!\n";
+DEBUG(errs() &lt;&lt; "I am here!\n");
 </pre>
 </div>
 
@@ -493,16 +606,16 @@ option as follows:</p>
 
 <div class="doc_code">
 <pre>
-DOUT &lt;&lt; "No debug type\n";
 #undef  DEBUG_TYPE
+DEBUG(errs() &lt;&lt; "No debug type\n");
 #define DEBUG_TYPE "foo"
-DOUT &lt;&lt; "'foo' debug type\n";
+DEBUG(errs() &lt;&lt; "'foo' debug type\n");
 #undef  DEBUG_TYPE
 #define DEBUG_TYPE "bar"
-DOUT &lt;&lt; "'bar' debug type\n";
+DEBUG(errs() &lt;&lt; "'bar' debug type\n"));
 #undef  DEBUG_TYPE
 #define DEBUG_TYPE ""
-DOUT &lt;&lt; "No debug type (2)\n";
+DEBUG(errs() &lt;&lt; "No debug type (2)\n");
 </pre>
 </div>
 
@@ -534,6 +647,21 @@ on when the name is specified. This allows, for example, all debug information
 for instruction scheduling to be enabled with <tt>-debug-type=InstrSched</tt>,
 even if the source lives in multiple files.</p>
 
+<p>The <tt>DEBUG_WITH_TYPE</tt> macro is also available for situations where you
+would like to set <tt>DEBUG_TYPE</tt>, but only for one specific <tt>DEBUG</tt>
+statement. It takes an additional first parameter, which is the type to use. For
+example, the preceding example could be written as:</p>
+
+
+<div class="doc_code">
+<pre>
+DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type\n");
+DEBUG_WITH_TYPE("foo", errs() &lt;&lt; "'foo' debug type\n");
+DEBUG_WITH_TYPE("bar", errs() &lt;&lt; "'bar' debug type\n"));
+DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type (2)\n");
+</pre>
+</div>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -726,6 +854,10 @@ access the container.  Based on that, you should use:</p>
     iteration, but do not support efficient look-up based on a key.
 </li>
 
+<li>a <a href="#ds_string">string</a> container is a specialized sequential
+    container or reference structure that is used for character or byte
+    arrays.</li>
+
 <li>a <a href="#ds_bit">bit</a> container provides an efficient way to store and
     perform set operations on sets of numeric id's, while automatically
     eliminating duplicates.  Bit containers require a maximum of 1 bit for each
@@ -1399,6 +1531,20 @@ always better.</p>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
+  <a name="ds_string">String-like containers</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+TODO: const char* vs stringref vs smallstring vs std::string.  Describe twine,
+xref to #string_apis.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
   <a name="ds_bit">Bit storage containers (BitVector, SparseBitVector)</a>
 </div>
 
@@ -1508,7 +1654,7 @@ an example that prints the name of a <tt>BasicBlock</tt> and the number of
 for (Function::iterator i = func-&gt;begin(), e = func-&gt;end(); i != e; ++i)
   // <i>Print out the name of the basic block if it has one, and then the</i>
   // <i>number of instructions that it contains</i>
-  llvm::cerr &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
+  errs() &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
              &lt;&lt; i-&gt;size() &lt;&lt; " instructions.\n";
 </pre>
 </div>
@@ -1541,14 +1687,14 @@ a <tt>BasicBlock</tt>:</p>
 for (BasicBlock::iterator i = blk-&gt;begin(), e = blk-&gt;end(); i != e; ++i)
    // <i>The next statement works since operator&lt;&lt;(ostream&amp;,...)</i>
    // <i>is overloaded for Instruction&amp;</i>
-   llvm::cerr &lt;&lt; *i &lt;&lt; "\n";
+   errs() &lt;&lt; *i &lt;&lt; "\n";
 </pre>
 </div>
 
 <p>However, this isn't really the best way to print out the contents of a
 <tt>BasicBlock</tt>!  Since the ostream operators are overloaded for virtually
 anything you'll care about, you could have just invoked the print routine on the
-basic block itself: <tt>llvm::cerr &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
+basic block itself: <tt>errs() &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
 
 </div>
 
@@ -1574,7 +1720,7 @@ small example that shows how to dump all instructions in a function to the stand
 
 // <i>F is a pointer to a Function instance</i>
 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-  llvm::cerr &lt;&lt; *I &lt;&lt; "\n";
+  errs() &lt;&lt; *I &lt;&lt; "\n";
 </pre>
 </div>
 
@@ -1653,7 +1799,7 @@ without actually obtaining it via iteration over some structure:</p>
 void printNextInstruction(Instruction* inst) {
   BasicBlock::iterator it(inst);
   ++it; // <i>After this line, it refers to the instruction after *inst</i>
-  if (it != inst-&gt;getParent()-&gt;end()) llvm::cerr &lt;&lt; *it &lt;&lt; "\n";
+  if (it != inst-&gt;getParent()-&gt;end()) errs() &lt;&lt; *it &lt;&lt; "\n";
 }
 </pre>
 </div>
@@ -1771,8 +1917,8 @@ Function *F = ...;
 
 for (Value::use_iterator i = F-&gt;use_begin(), e = F-&gt;use_end(); i != e; ++i)
   if (Instruction *Inst = dyn_cast&lt;Instruction&gt;(*i)) {
-    llvm::cerr &lt;&lt; "F is used in instruction:\n";
-    llvm::cerr &lt;&lt; *Inst &lt;&lt; "\n";
+    errs() &lt;&lt; "F is used in instruction:\n";
+    errs() &lt;&lt; *Inst &lt;&lt; "\n";
   }
 </pre>
 </div>
@@ -2257,6 +2403,50 @@ and only if you know what you're doing!
 </p>
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a>
+</div>
+
+<div class="doc_text">
+<p>
+<tt>LLVMContext</tt> is an opaque class in the LLVM API which clients can use
+to operate multiple, isolated instances of LLVM concurrently within the same
+address space.  For instance, in a hypothetical compile-server, the compilation
+of an individual translation unit is conceptually independent from all the 
+others, and it would be desirable to be able to compile incoming translation 
+units concurrently on independent server threads.  Fortunately, 
+<tt>LLVMContext</tt> exists to enable just this kind of scenario!
+</p>
+
+<p>
+Conceptually, <tt>LLVMContext</tt> provides isolation.  Every LLVM entity 
+(<tt>Module</tt>s, <tt>Value</tt>s, <tt>Type</tt>s, <tt>Constant</tt>s, etc.)
+in LLVM's in-memory IR belongs to an <tt>LLVMContext</tt>.  Entities in 
+different contexts <em>cannot</em> interact with each other: <tt>Module</tt>s in
+different contexts cannot be linked together, <tt>Function</tt>s cannot be added
+to <tt>Module</tt>s in different contexts, etc.  What this means is that is is
+safe to compile on multiple threads simultaneously, as long as no two threads
+operate on entities within the same context.
+</p>
+
+<p>
+In practice, very few places in the API require the explicit specification of a
+<tt>LLVMContext</tt>, other than the <tt>Type</tt> creation/lookup APIs.
+Because every <tt>Type</tt> carries a reference to its owning context, most
+other entities can determine what context they belong to by looking at their
+own <tt>Type</tt>.  If you are adding new entities to LLVM IR, please try to
+maintain this interface design.
+</p>
+
+<p>
+For clients that do <em>not</em> require the benefits of isolation, LLVM 
+provides a convenience API <tt>getGlobalContext()</tt>.  This returns a global,
+lazily initialized <tt>LLVMContext</tt> that may be used in situations where
+isolation is not a concern.
+</p>
+</div>
+
 <!-- *********************************************************************** -->
 <div class="doc_section">
   <a name="advanced">Advanced Topics</a>
@@ -2793,7 +2983,7 @@ the <tt>lib/VMCore</tt> directory.</p>
   <dt><tt>VectorType</tt></dt>
   <dd>Subclass of SequentialType for vector types. A 
   vector type is similar to an ArrayType but is distinguished because it is 
-  a first class type wherease ArrayType is not. Vector types are used for 
+  a first class type whereas ArrayType is not. Vector types are used for 
   vector operations and are usually small vectors of of an integer or floating 
   point type.</dd>
   <dt><tt>StructType</tt></dt>
@@ -3353,7 +3543,7 @@ Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>,
 <a href="#Value"><tt>Value</tt></a></p>
 
 <p>The <tt>Function</tt> class represents a single procedure in LLVM.  It is
-actually one of the more complex classes in the LLVM heirarchy because it must
+actually one of the more complex classes in the LLVM hierarchy because it must
 keep track of a large amount of data.  The <tt>Function</tt> class keeps track
 of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal 
 <a href="#Argument"><tt>Argument</tt></a>s, and a 
@@ -3362,7 +3552,7 @@ of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal
 <p>The list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s is the most
 commonly used part of <tt>Function</tt> objects.  The list imposes an implicit
 ordering of the blocks in the function, which indicate how the code will be
-layed out by the backend.  Additionally, the first <a
+laid out by the backend.  Additionally, the first <a
 href="#BasicBlock"><tt>BasicBlock</tt></a> is the implicit entry node for the
 <tt>Function</tt>.  It is not legal in LLVM to explicitly branch to this initial
 block.  There are no implicit exit nodes, and in fact there may be multiple exit
@@ -3492,7 +3682,7 @@ Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>,
 <a href="#User"><tt>User</tt></a>,
 <a href="#Value"><tt>Value</tt></a></p>
 
-<p>Global variables are represented with the (suprise suprise)
+<p>Global variables are represented with the (surprise surprise)
 <tt>GlobalVariable</tt> class. Like functions, <tt>GlobalVariable</tt>s are also
 subclasses of <a href="#GlobalValue"><tt>GlobalValue</tt></a>, and as such are
 always referenced by their address (global values must live in memory, so their
@@ -3542,7 +3732,7 @@ never change at runtime).</p>
 
   <li><tt><a href="#Constant">Constant</a> *getInitializer()</tt>
 
-    <p>Returns the intial value for a <tt>GlobalVariable</tt>.  It is not legal
+    <p>Returns the initial value for a <tt>GlobalVariable</tt>.  It is not legal
     to call this method if there is no initializer.</p></li>
 </ul>
 
@@ -3664,7 +3854,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-17 21:12:26 +0000 (Wed, 17 Jun 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/Projects.html b/docs/Projects.html
index 882ce2eb234d..582c4e2cbf3f 100644
--- a/docs/Projects.html
+++ b/docs/Projects.html
@@ -121,8 +121,8 @@ configure script with these commands:
        % ./AutoRegen.sh</tt></p>
 </div>
 
-<p>You must be using Autoconf version 2.59 or later and your aclocal version 
-should 1.9 or later.</p></li>
+<p>You must be using Autoconf version 2.59 or later and your aclocal version
+should be 1.9 or later.</p></li>
 
 <li>Run <tt>configure</tt> in the directory in which you want to place
 object code.  Use the following options to tell your project where it
@@ -453,7 +453,7 @@ Mailing List</a>.</p>
   <a href="mailto:criswell@uiuc.edu">John Criswell</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2009-01-12 22:29:24 +0100 (Mon, 12 Jan 2009) $
+  Last modified: $Date: 2009-08-13 22:08:52 +0200 (Thu, 13 Aug 2009) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index b0165b05cf0a..870705224c28 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -4,17 +4,17 @@
 <head>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <link rel="stylesheet" href="llvm.css" type="text/css">
-  <title>LLVM 2.5 Release Notes</title>
+  <title>LLVM 2.6 Release Notes</title>
 </head>
 <body>
 
-<div class="doc_title">LLVM 2.5 Release Notes</div>
+<div class="doc_title">LLVM 2.6 Release Notes</div>
 
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 2.5</a></li>
-  <li><a href="#whatsnew">What's New in LLVM 2.5?</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 2.6</a></li>
+  <li><a href="#whatsnew">What's New in LLVM 2.6?</a></li>
   <li><a href="GettingStarted.html">Installation Instructions</a></li>
   <li><a href="#portability">Portability and Supported Platforms</a></li>
   <li><a href="#knownproblems">Known Problems</a></li>
@@ -34,7 +34,7 @@
 <div class="doc_text">
 
 <p>This document contains the release notes for the LLVM Compiler
-Infrastructure, release 2.5.  Here we describe the status of LLVM, including
+Infrastructure, release 2.6.  Here we describe the status of LLVM, including
 major improvements from the previous release and significant known problems.
 All LLVM releases may be downloaded from the <a
 href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
@@ -51,25 +51,37 @@ current one.  To see the release notes for a specific release, please see the
 <a href="http://llvm.org/releases/">releases page</a>.</p>
 
 </div>
-
-<!-- Unfinished features in 2.5:
-  Machine LICM
-  Machine Sinking
-  target-specific intrinsics
-  gold lto plugin
-  pre-alloc splitter, strong phi elim
-  <tt>llc -enable-value-prop</tt>, propagation of value info
-       (sign/zero ext info) from one MBB to another
-  debug info for optimized code
-  interpreter + libffi
+ 
+
+<!--
+Almost dead code.
+  include/llvm/Analysis/LiveValues.h => Dan
+  lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8.
+  llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8.
+-->
+ 
+   
+<!-- Unfinished features in 2.6:
+  gcc plugin.
+  strong phi elim
+  variable debug info for optimized code
   postalloc scheduler: anti dependence breaking, hazard recognizer?
-
-initial support for debug line numbers when optimization enabled, not useful in
-  2.5 but will be for 2.6.
-
+  metadata
+  loop dependence analysis
+  ELF Writer?  How stable?
+  <li>PostRA scheduler improvements, ARM adoption (David Goodwin).</li>
+  2.7 supports the GDB 7.0 jit interfaces for debug info.
+  2.7 eliminates ADT/iterator.h
  -->
 
  <!-- for announcement email:
+ Logo web page.
+ llvm devmtg
+ compiler_rt
+ KLEE web page at klee.llvm.org
+ Many new papers added to /pubs/
+   Mention gcc plugin.
+
    -->
 
 <!-- *********************************************************************** -->
@@ -80,12 +92,11 @@ initial support for debug line numbers when optimization enabled, not useful in
 
 <div class="doc_text">
 <p>
-The LLVM 2.5 distribution currently consists of code from the core LLVM
-repository &mdash;which roughly includes the LLVM optimizers, code generators
-and supporting tools &mdash; and the llvm-gcc repository.  In addition to this
-code, the LLVM Project includes other sub-projects that are in development.  The
-two which are the most actively developed are the <a href="#clang">Clang
-Project</a> and the <a href="#vmkit">VMKit Project</a>.
+The LLVM 2.6 distribution currently consists of code from the core LLVM
+repository (which roughly includes the LLVM optimizers, code generators
+and supporting tools), the Clang repository and the llvm-gcc repository.  In
+addition to this code, the LLVM Project includes other sub-projects that are in
+development.  Here we include updates on these subprojects.
 </p>
 
 </div>
@@ -99,37 +110,30 @@ Project</a> and the <a href="#vmkit">VMKit Project</a>.
 <div class="doc_text">
 
 <p>The <a href="http://clang.llvm.org/">Clang project</a> is an effort to build
-a set of new 'LLVM native' front-end technologies for the LLVM optimizer and
-code generator.  While Clang is not included in the LLVM 2.5 release, it is
-continuing to make major strides forward in all areas.  Its C and Objective-C
-parsing and code generation support is now very solid.  For example, it is
-capable of successfully building many real-world applications for X86-32
-and X86-64,
-including the <a href="http://wiki.freebsd.org/BuildingFreeBSDWithClang">FreeBSD
-kernel</a> and <a href="http://gcc.gnu.org/gcc-4.2/">gcc 4.2</a>.  C++ is also
-making <a href="http://clang.llvm.org/cxx_status.html">incredible progress</a>,
-and work on templates has recently started.  If you are
-interested in fast compiles and good diagnostics, we encourage you to try it out
-by <a href="http://clang.llvm.org/get_started.html">building from mainline</a>
-and reporting any issues you hit to the <a
+a set of new 'LLVM native' front-end technologies for the C family of languages.
+LLVM 2.6 is the first release to officially include Clang, and it provides a
+production quality C and Objective-C compiler.  If you are interested in <a 
+href="http://clang.llvm.org/performance.html">fast compiles</a> and
+<a href="http://clang.llvm.org/diagnostics.html">good diagnostics</a>, we
+encourage you to try it out.  Clang currently compiles typical Objective-C code
+3x faster than GCC and compiles C code about 30% faster than GCC at -O0 -g
+(which is when the most pressure is on the frontend).</p>
+
+<p>In addition to supporting these languages, C++ support is also <a
+href="http://clang.llvm.org/cxx_status.html">well under way</a>, and mainline
+Clang is able to parse the libstdc++ 4.2 headers and even codegen simple apps.
+If you are interested in Clang C++ support or any other Clang feature, we
+strongly encourage you to get involved on the <a 
 href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev">Clang front-end mailing
 list</a>.</p>
 
-<p>In the LLVM 2.5 time-frame, the Clang team has made many improvements:</p>
+<p>In the LLVM 2.6 time-frame, the Clang team has made many improvements:</p>
 
 <ul>
-<li>Clang now has a new driver, which is focused on providing a GCC-compatible
-    interface.</li>
-<li>The X86-64 ABI is now supported, including support for the Apple
-    64-bit Objective-C runtime and zero cost exception handling.</li>
-<li>Precompiled header support is now implemented.</li>
-<li>Objective-C support is significantly improved beyond LLVM 2.4, supporting
-    many features, such as Objective-C Garbage Collection.</li>
-<li>Variable length arrays are now fully supported.</li>
-<li>C99 designated initializers are now fully supported.</li>
-<li>Clang now includes all major compiler headers, including a
-    redesigned <i>tgmath.h</i> and several more intrinsic headers.</li>
-<li>Many many bugs are fixed and many features have been added.</li>
+<li>C and Objective-C support are now considered production quality.</li>
+<li>AuroraUX, FreeBSD and OpenBSD are now supported.</li>
+<li>Most of Objective-C 2.0 is now supported with the GNU runtime.</li>
+<li>Many many bugs are fixed and lots of features have been added.</li>
 </ul>
 </div>
 
@@ -140,19 +144,18 @@ list</a>.</p>
 
 <div class="doc_text">
 
-<p>Previously announced in the last LLVM release, the Clang project also
+<p>Previously announced in the 2.4 and 2.5 LLVM releases, the Clang project also
 includes an early stage static source code analysis tool for <a
 href="http://clang.llvm.org/StaticAnalysis.html">automatically finding bugs</a>
-in C and Objective-C programs. The tool performs a growing set of checks to find
+in C and Objective-C programs. The tool performs checks to find
 bugs that occur on a specific path within a program.</p>
 
-<p>In the LLVM 2.5 time-frame there have been many significant improvements to
-the analyzer's core path simulation engine and machinery for generating
-path-based bug reports to end-users. Particularly noteworthy improvements
-include experimental support for full field-sensitivity and reasoning about heap
-objects as well as an improved value-constraints subengine that does a much
-better job of reasoning about inequality relationships (e.g., <tt>x &gt; 2</tt>)
-between variables and constants.
+<p>In the LLVM 2.6 time-frame, the analyzer core has undergone several important
+improvements and cleanups and now includes a new <em>Checker</em> interface that
+is intended to eventually serve as a basis for domain-specific checks. Further,
+in addition to generating HTML files for reporting analysis results, the
+analyzer can now also emit bug reports in a structured XML format that is
+intended to be easily readable by other programs.</p>
 
 <p>The set of checks performed by the static analyzer continues to expand, and
 future plans for the tool include full source-level inter-procedural analysis
@@ -170,44 +173,191 @@ this project is encouraged to get involved!</p>
 <div class="doc_text">
 <p>
 The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation of
-a JVM and a CLI Virtual Machines (Microsoft .NET is an
-implementation of the CLI) using the Just-In-Time compiler of LLVM.</p>
+a JVM and a CLI Virtual Machine (Microsoft .NET is an
+implementation of the CLI) using LLVM for static and just-in-time
+compilation.</p>
 
-<p>Following LLVM 2.5, VMKit has its second release that you can find on its
-<a href="http://vmkit.llvm.org/releases/">webpage</a>. The release includes
+<p>
+VMKit version 0.26 builds with LLVM 2.6 and you can find it on its
+<a href="http://vmkit.llvm.org/releases/">web page</a>. The release includes
 bug fixes, cleanup and new features. The major changes are:</p>
 
 <ul>
 
-<li>Ahead of Time compiler: compiles .class files to llvm .bc. VMKit uses this
-functionality to native compile the standard classes (e.g. java.lang.String).
-Users can compile AoT .class files into dynamic libraries and run them with the
-help of VMKit.</li>
+<li>A new llcj tool to generate shared libraries or executables of Java
+    files.</li>
+<li>Cooperative garbage collection. </li>
+<li>Fast subtype checking (paper from Click et al [JGI'02]). </li>
+<li>Implementation of a two-word header for Java objects instead of the original
+    three-word header. </li>
+<li>Better Java specification-compliance: division by zero checks, stack
+    overflow checks, finalization and references support. </li>
 
-<li>New exception model: the dwarf exception model is very slow for
-exception-intensive applications, so the JVM has had a new implementation of
-exceptions which check at each function call if an exception happened. There is
-a low performance penalty on applications without exceptions, but it is a big
-gain for exception-intensive applications. For example the jack benchmark in
-Spec JVM98 is 6x faster (performance gain of 83%).</li>
+</ul>
+</div>
 
-<li>User-level management of thread stacks, so that thread local data access
-at runtime is fast and portable. </li>
 
-<li>Implementation of biased locking for faster object synchronizations at
-runtime.</li>
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="compiler-rt">compiler-rt: Compiler Runtime Library</a>
+</div>
 
-<li>New support for OSX/X64, Linux/X64 (with the Boehm GC) and Linux/ppc32.</li>
+<div class="doc_text">
+<p>
+The new LLVM <a href="http://compiler-rt.llvm.org/">compiler-rt project</a>
+is a simple library that provides an implementation of the low-level
+target-specific hooks required by code generation and other runtime components.
+For example, when compiling for a 32-bit target, converting a double to a 64-bit
+unsigned integer is compiled into a runtime call to the "__fixunsdfdi"
+function. The compiler-rt library provides highly optimized implementations of
+this and other low-level routines (some are 3x faster than the equivalent
+libgcc routines).</p>
 
-</ul>
+<p>
+All of the code in the compiler-rt project is available under the standard LLVM
+License, a "BSD-style" license.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="klee">KLEE: Symbolic Execution and Automatic Test Case Generator</a>
 </div>
 
+<div class="doc_text">
+<p>
+The new LLVM <a href="http://klee.llvm.org/">KLEE project</a> is a symbolic
+execution framework for programs in LLVM bitcode form.  KLEE tries to
+symbolically evaluate "all" paths through the application and records state
+transitions that lead to fault states.  This allows it to construct testcases
+that lead to faults and can even be used to verify algorithms.  For more
+details, please see the <a
+href="http://llvm.org/pubs/2008-12-OSDI-KLEE.html">OSDI 2008 paper</a> about
+KLEE.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="dragonegg">DragonEgg: GCC-4.5 as an LLVM frontend</a>
+</div>
+
+<div class="doc_text">
+<p>
+The goal of <a href="http://dragonegg.llvm.org/">DragonEgg</a> is to make
+gcc-4.5 act like llvm-gcc without requiring any gcc modifications whatsoever.
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> is a shared library (llvm.so)
+that is loaded by gcc at runtime.  It uses the new gcc plugin architecture to
+disable the GCC optimizers and code generators, and schedule the LLVM optimizers
+and code generators (or direct output of LLVM IR) instead.  Currently only Linux
+and Darwin are supported, and only on x86-32 and x86-64.  It should be easy to
+add additional unix-like architectures and other processor families.  In theory
+it should be possible to use <a href="http://dragonegg.llvm.org/">DragonEgg</a>
+with any language supported by gcc, however only C and Fortran work well for the
+moment.  Ada and C++ work to some extent, while Java, Obj-C and Obj-C++ are so
+far entirely untested.  Since gcc-4.5 has not yet been released, neither has
+<a href="http://dragonegg.llvm.org/">DragonEgg</a>.  To build
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> you will need to check out the
+development versions of <a href="http://gcc.gnu.org/svn.html/"> gcc</a>,
+<a href="http://llvm.org/docs/GettingStarted.html#checkout">llvm</a> and
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> from their respective
+subversion repositories, and follow the instructions in the
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> README.
+</p>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="mc">llvm-mc: Machine Code Toolkit</a>
+</div>
+
+<div class="doc_text">
+<p>
+The LLVM Machine Code (MC) Toolkit project is a (very early) effort to build
+better tools for dealing with machine code, object file formats, etc.  The idea
+is to be able to generate most of the target specific details of assemblers and
+disassemblers from existing LLVM target .td files (with suitable enhancements),
+and to build infrastructure for reading and writing common object file formats.
+One of the first deliverables is to build a full assembler and integrate it into
+the compiler, which is predicted to substantially reduce compile time in some
+scenarios.
+</p>
+
+<p>In the LLVM 2.6 timeframe, the MC framework has grown to the point where it
+can reliably parse and pretty print (with some encoding information) a
+darwin/x86 .s file successfully, and has the very early phases of a Mach-O
+assembler in progress.  Beyond the MC framework itself, major refactoring of the
+LLVM code generator has started.  The idea is to make the code generator reason
+about the code it is producing in a much more semantic way, rather than a
+textual way.  For example, the code generator now uses MCSection objects to
+represent section assignments, instead of text strings that print to .section
+directives.</p>
+
+<p>MC is an early and ongoing project that will hopefully continue to lead to
+many improvements in the code generator and build infrastructure useful for many
+other situations.
+</p>
+
+</div>	
+
+
 <!-- *********************************************************************** -->
 <div class="doc_section">
-  <a name="externalproj">External Projects Using LLVM 2.5</a>
+  <a name="externalproj">External Open Source Projects Using LLVM 2.6</a>
 </div>
 <!-- *********************************************************************** -->
 
+<div class="doc_text">
+
+<p>An exciting aspect of LLVM is that it is used as an enabling technology for
+   a lot of other language and tools projects.  This section lists some of the
+   projects that have already been updated to work with LLVM 2.6.</p>
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="Rubinius">Rubinius</a>
+</div>
+
+<div class="doc_text">
+<p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is an environment
+for running Ruby code which strives to write as much of the core class
+implementation in Ruby as possible. Combined with a bytecode interpreting VM, it
+uses LLVM to optimize and compile ruby code down to machine code. Techniques
+such as type feedback, method inlining, and uncommon traps are all used to
+remove dynamism from ruby execution and increase performance.</p>
+
+<p>Since LLVM 2.5, Rubinius has made several major leaps forward, implementing
+a counter based JIT, type feedback and speculative method inlining.
+</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="macruby">MacRuby</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+<a href="http://macruby.org">MacRuby</a> is an implementation of Ruby on top of
+core Mac OS X technologies, such as the Objective-C common runtime and garbage
+collector and the CoreFoundation framework. It is principally developed by
+Apple and aims at enabling the creation of full-fledged Mac OS X applications.
+</p>
+
+<p>
+MacRuby uses LLVM for optimization passes, JIT and AOT compilation of Ruby
+expressions. It also uses zero-cost DWARF exceptions to implement Ruby exception
+handling.</p>
+
+</div>
+
+
 <!--=========================================================================-->
 <div class="doc_subsection">
 <a name="pure">Pure</a>
@@ -224,12 +374,8 @@ built-in list and matrix support (including list and matrix comprehensions) and
 an easy-to-use C interface. The interpreter uses LLVM as a backend to
  JIT-compile Pure programs to fast native code.</p>
 
-<p>In addition to the usual algebraic data structures, Pure also has
-MATLAB-style matrices in order to support numeric computations and signal
-processing in an efficient way. Pure is mainly aimed at mathematical
-applications right now, but it has been designed as a general purpose language.
-The dynamic interpreter environment and the C interface make it possible to use
-it as a kind of functional scripting language for many application areas.
+<p>Pure versions 0.31 and later have been tested and are known to work with
+LLVM 2.6 (and continue to work with older LLVM releases >= 2.3 as well).
 </p>
 </div>
 
@@ -243,11 +389,11 @@ it as a kind of functional scripting language for many application areas.
 <p>
 <a href="http://www.dsource.org/projects/ldc">LDC</a> is an implementation of
 the D Programming Language using the LLVM optimizer and code generator.
-The LDC project works great with the LLVM 2.5 release.  General improvements in
+The LDC project works great with the LLVM 2.6 release.  General improvements in
 this
 cycle have included new inline asm constraint handling, better debug info
-support, general bugfixes, and better x86-64 support.  This has allowed
-some major improvements in LDC, getting us much closer to being as
+support, general bug fixes and better x86-64 support.  This has allowed
+some major improvements in LDC, getting it much closer to being as
 fully featured as the original DMD compiler from DigitalMars.
 </p>
 </div>
@@ -258,142 +404,160 @@ fully featured as the original DMD compiler from DigitalMars.
 </div>
 
 <div class="doc_text">
-<p><a href="http://code.roadsend.com/rphp">Roadsend PHP</a> (rphp) is an open
+<p>
+<a href="http://code.roadsend.com/rphp">Roadsend PHP</a> (rphp) is an open
 source implementation of the PHP programming 
-language that uses LLVM for its optimizer, JIT, and static compiler. This is a 
+language that uses LLVM for its optimizer, JIT and static compiler. This is a 
 reimplementation of an earlier project that is now based on LLVM.</p>
 </div>
 
-
-<!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="whatsnew">What's New in LLVM 2.5?</a>
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="UnladenSwallow">Unladen Swallow</a>
 </div>
-<!-- *********************************************************************** -->
 
 <div class="doc_text">
-
-<p>This release includes a huge number of bug fixes, performance tweaks, and
-minor improvements.  Some of the major improvements and new features are listed
-in this section.
-</p>
+<p>
+<a href="http://code.google.com/p/unladen-swallow/">Unladen Swallow</a> is a
+branch of <a href="http://python.org/">Python</a> intended to be fully
+compatible and significantly faster.  It uses LLVM's optimization passes and JIT
+compiler.</p>
 </div>
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="majorfeatures">Major New Features</a>
+<a name="llvm-lua">llvm-lua</a>
 </div>
 
 <div class="doc_text">
+<p>
+<a href="http://code.google.com/p/llvm-lua/">LLVM-Lua</a> uses LLVM to add JIT
+and static compiling support to the Lua VM.  Lua bytecode is analyzed to
+remove type checks, then LLVM is used to compile the bytecode down to machine
+code.</p>
+</div>
 
-<p>LLVM 2.5 includes several major new capabilities:</p>
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="icedtea">IcedTea Java Virtual Machine Implementation</a>
+</div>
 
-<ul>
-<li>LLVM 2.5 includes a brand new <a
-href="http://en.wikipedia.org/wiki/XCore">XCore</a> backend.</li>
+<div class="doc_text">
+<p>
+<a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
+harness to build OpenJDK using only free software build tools and to provide
+replacements for the not-yet free parts of OpenJDK.  One of the extensions that
+IcedTea provides is a new JIT compiler named <a
+href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
+to provide native code generation without introducing processor-dependent
+code.
+</p>
+</div>
 
-<li>llvm-gcc now generally supports the GFortran front-end, and the precompiled
-release binaries now support Fortran, even on Mac OS/X.</li>
 
-<li>CMake is now used by the <a href="GettingStartedVS.html">LLVM build process
-on Windows</a>.  It automatically generates Visual Studio project files (and
-more) from a set of simple text files.  This makes it much easier to
-maintain.  In time, we'd like to standardize on CMake for everything.</li>
 
-<li>LLVM 2.5 now uses (and includes) Google Test for unit testing.</li>
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="whatsnew">What's New in LLVM 2.6?</a>
+</div>
+<!-- *********************************************************************** -->
 
-<li>The LLVM native code generator now supports arbitrary precision integers.
-Types like <tt>i33</tt> have long been valid in the LLVM IR, but were previously
-only supported by the interpreter.  Note that the C backend still does not
-support these.</li>
+<div class="doc_text">
 
-<li>LLVM 2.5 no longer uses 'bison,' so it is easier to build on Windows.</li>
-</ul>
+<p>This release includes a huge number of bug fixes, performance tweaks and
+minor improvements.  Some of the major improvements and new features are listed
+in this section.
+</p>
 
 </div>
 
-
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="llvm-gcc">llvm-gcc 4.2 Improvements</a>
+<a name="majorfeatures">Major New Features</a>
 </div>
 
 <div class="doc_text">
 
-<p>LLVM fully supports the llvm-gcc 4.2 front-end, which marries the GCC
-front-ends and driver with the LLVM optimizer and code generator.  It currently
-includes support for the C, C++, Objective-C, Ada, and Fortran front-ends.</p>
+<p>LLVM 2.6 includes several major new capabilities:</p>
 
 <ul>
-<li>In this release, the GCC inliner is completely disabled.  Previously the GCC
-inliner was used to handle always-inline functions and other cases.  This caused
-problems with code size growth, and it is completely disabled in this
-release.</li>
-
-<li>llvm-gcc (and LLVM in general) now support code generation for stack
-canaries, which is an effective form of <a
-href="http://en.wikipedia.org/wiki/Stack-smashing_protection">buffer overflow
-protection</a>.  llvm-gcc supports this with the <tt>-fstack-protector</tt>
-command line option (just like GCC).  In LLVM IR, you can request code
-generation for stack canaries with function attributes.
-</li>
+<li>New <a href="#compiler-rt">compiler-rt</a>, <A href="#klee">KLEE</a>
+    and <a href="#mc">machine code toolkit</a> sub-projects.</li>
+<li>Debug information now includes line numbers when optimizations are enabled.
+    This allows statistical sampling tools like OProfile and Shark to map
+    samples back to source lines.</li>
+<li>LLVM now includes new experimental backends to support the MSP430, SystemZ
+    and BlackFin architectures.</li>
+<li>LLVM supports a new <a href="GoldPlugin.html">Gold Linker Plugin</a> which
+    enables support for <a href="LinkTimeOptimization.html">transparent
+    link-time optimization</a> on ELF targets when used with the Gold binutils
+    linker.</li>
+<li>LLVM now supports doing optimization and code generation on multiple 
+    threads.  Please see the <a href="ProgrammersManual.html#threading">LLVM
+    Programmer's Manual</a> for more information.</li>
+<li>LLVM now has experimental support for <a
+    href="http://nondot.org/~sabre/LLVMNotes/EmbeddedMetadata.txt">embedded
+    metadata</a> in LLVM IR, though the implementation is not guaranteed to be
+    final and the .bc file format may change in future releases.  Debug info 
+    does not yet use this format in LLVM 2.6.</li>
 </ul>
 
 </div>
 
-
 <!--=========================================================================-->
 <div class="doc_subsection">
 <a name="coreimprovements">LLVM IR and Core Improvements</a>
 </div>
 
 <div class="doc_text">
-<p>LLVM IR has several new features that are used by our existing front-ends and
-can be useful if you are writing a front-end for LLVM:</p>
+<p>LLVM IR has several new features for better support of new targets and that
+expose new optimization opportunities:</p>
 
 <ul>
-<li>The <a href="LangRef.html#i_shufflevector">shufflevector</a> instruction 
-has been generalized to allow different shuffle mask width than its input
-vectors.  This allows you to use shufflevector to combine two
-"&lt;4 x float&gt;" vectors into a "&lt;8 x float&gt;" for example.</li>
-
-<li>LLVM IR now supports new intrinsics for computing and acting on <a 
-href="LangRef.html#int_overflow">overflow of integer operations</a>. This allows
-efficient code generation for languages that must trap or throw an exception on
-overflow.  While these intrinsics work on all targets, they only generate
-efficient code on X86 so far.</li>
-
-<li>LLVM IR now supports a new <a href="LangRef.html#linkage">private
-linkage</a> type to produce labels that are stripped by the assembler before it
-produces a .o file (thus they are invisible to the linker).</li>
-
-<li>LLVM IR supports two new attributes for better alias analysis.  The <a
-href="LangRef.html#paramattrs">noalias</a> attribute can now be used on the
-return value of a function to indicate that it returns new memory (e.g.
-'malloc', 'calloc', etc).
-The new <a href="LangRef.html#paramattrs">nocapture</a> attribute can be used
-on pointer arguments to indicate that the function does not return the pointer,
-store it in an object that outlives the call, or let the value of the pointer
-escape from the function in any other way.
-Note that it is the pointer itself that must not escape, not the value it
-points to: loading a value out of the pointer is perfectly fine.
-Many standard library functions (e.g. 'strlen', 'memcpy') have this property.
-<!-- The simplifylibcalls pass applies these attributes to standard libc functions. -->
-</li>
-
-<li>The parser for ".ll" files in lib/AsmParser is now completely rewritten as a
-recursive descent parser.  This parser produces better error messages (including
-caret diagnostics), is less fragile (less likely to crash on strange things),
-does not leak memory, is more efficient, and eliminates LLVM's last use of the
-'bison' tool.</li>
-
-<li>Debug information representation and manipulation internals have been
-    consolidated to use a new set of classes in
-    <tt>llvm/Analysis/DebugInfo.h</tt>.  These routines are more
-    efficient, robust, and extensible and replace the older mechanisms.
-    llvm-gcc, clang, and the code generator now use them to create and process
-    debug information.</li>
-
+<li>The <a href="LangRef.html#i_add">add</a>, <a 
+    href="LangRef.html#i_sub">sub</a> and <a href="LangRef.html#i_mul">mul</a>
+    instructions have been split into integer and floating point versions (like
+    divide and remainder), introducing new <a
+    href="LangRef.html#i_fadd">fadd</a>, <a href="LangRef.html#i_fsub">fsub</a>,
+    and <a href="LangRef.html#i_fmul">fmul</a> instructions.</li>
+<li>The <a href="LangRef.html#i_add">add</a>, <a 
+    href="LangRef.html#i_sub">sub</a> and <a href="LangRef.html#i_mul">mul</a>
+    instructions now support optional "nsw" and "nuw" bits which indicate that
+    the operation is guaranteed to not overflow (in the signed or
+    unsigned case, respectively).  This gives the optimizer more information and
+    can be used for things like C signed integer values, which are undefined on
+    overflow.</li>
+<li>The <a href="LangRef.html#i_sdiv">sdiv</a> instruction now supports an
+    optional "exact" flag which indicates that the result of the division is
+    guaranteed to have a remainder of zero.  This is useful for optimizing pointer
+    subtraction in C.</li>
+<li>The <a href="LangRef.html#i_getelementptr">getelementptr</a> instruction now
+    supports arbitrary integer index values for array/pointer indices.  This
+    allows for better code generation on 16-bit pointer targets like PIC16.</li>
+<li>The <a href="LangRef.html#i_getelementptr">getelementptr</a> instruction now
+    supports an "inbounds" optimization hint that tells the optimizer that the
+    pointer is guaranteed to be within its allocated object.</li>
+<li>LLVM now support a series of new linkage types for global values which allow
+    for better optimization and new capabilities:
+    <ul>
+    <li><a href="LangRef.html#linkage_linkonce">linkonce_odr</a> and
+        <a href="LangRef.html#linkage_weak">weak_odr</a> have the same linkage
+        semantics as the non-"odr" linkage types.  The difference is that these
+        linkage types indicate that all definitions of the specified function
+        are guaranteed to have the same semantics.  This allows inlining
+        templates functions in C++ but not inlining weak functions in C,
+        which previously both got the same linkage type.</li>
+    <li><a href="LangRef.html#linkage_available_externally">available_externally
+        </a> is a new linkage type that gives the optimizer visibility into the
+        definition of a function (allowing inlining and side effect analysis)
+        but that does not cause code to be generated.  This allows better
+        optimization of "GNU inline" functions, extern templates, etc.</li>
+    <li><a href="LangRef.html#linkage_linker_private">linker_private</a> is a
+        new linkage type (which is only useful on Mac OS X) that is used for
+        some metadata generation and other obscure things.</li>
+    </ul></li>
+<li>Finally, target-specific intrinsics can now return multiple values, which
+    is useful for modeling target operations with multiple results.</li>
 </ul>
 
 </div>
@@ -405,27 +569,53 @@ does not leak memory, is more efficient, and eliminates LLVM's last use of the
 
 <div class="doc_text">
 
-<p>In addition to a large array of bug fixes and minor performance tweaks, this
+<p>In addition to a large array of minor performance tweaks and bug fixes, this
 release includes a few major enhancements and additions to the optimizers:</p>
 
 <ul>
 
-<li>The loop optimizer now improves floating point induction variables in
-several ways, including adding shadow induction variables to avoid
-"integer &lt;-&gt; floating point" conversions in loops when safe.</li>
+<li>The <a href="Passes.html#scalarrepl">Scalar Replacement of Aggregates</a>
+    pass has many improvements that allow it to better promote vector unions,
+    variables which are memset, and much more strange code that can happen to
+    do bitfield accesses to register operations.  An interesting change is that
+    it now produces "unusual" integer sizes (like i1704) in some cases and lets
+    other optimizers clean things up.</li>
+<li>The <a href="Passes.html#loop-reduce">Loop Strength Reduction</a> pass now
+    promotes small integer induction variables to 64-bit on 64-bit targets,
+    which provides a major performance boost for much numerical code.  It also
+    promotes shorts to int on 32-bit hosts, etc.  LSR now also analyzes pointer
+    expressions (e.g. getelementptrs), as well as integers.</li>
+<li>The <a href="Passes.html#gvn">GVN</a> pass now eliminates partial
+    redundancies of loads in simple cases.</li>
+<li>The <a href="Passes.html#inline">Inliner</a> now reuses stack space when
+    inlining similar arrays from multiple callees into one caller.</li>
+<li>LLVM includes a new experimental Static Single Information (SSI)
+    construction pass.</li>
 
-<li>The "-mem2reg" pass is now much faster on code with large basic blocks.</li>
+</ul>
+
+</div>
 
-<li>The "-jump-threading" pass is more powerful: it is iterative
-  and handles threading based on values with fully and partially redundant
-  loads.</li>
 
-<li>The "-memdep" memory dependence analysis pass (used by GVN and memcpyopt) is
-    both faster and more aggressive.</li>
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="executionengine">Interpreter and JIT Improvements</a>
+</div>
 
-<li>The "-scalarrepl" scalar replacement of aggregates pass is more aggressive
-    about promoting unions to registers.</li>
+<div class="doc_text">
 
+<ul>
+<li>LLVM has a new "EngineBuilder" class which makes it more obvious how to
+    set up and configure an ExecutionEngine (a JIT or interpreter).</li>
+<li>The JIT now supports generating more than 16M of code.</li>
+<li>When configured with <tt>--with-oprofile</tt>, the JIT can now inform
+     OProfile about JIT'd code, allowing OProfile to get line number and function
+     name information for JIT'd functions.</li>
+<li>When "libffi" is available, the LLVM interpreter now uses it, which supports
+    calling almost arbitrary external (natively compiled) functions.</li>
+<li>Clients of the JIT can now register a 'JITEventListener' object to receive
+    callbacks when the JIT emits or frees machine code. The OProfile support
+    uses this mechanism.</li>
 </ul>
 
 </div>
@@ -442,33 +632,55 @@ infrastructure, which allows us to implement more aggressive algorithms and make
 it run faster:</p>
 
 <ul>
-<li>The <a href="WritingAnLLVMBackend.html">Writing an LLVM Compiler
-Backend</a> document has been greatly expanded and is substantially more
-complete.</li>
-
-<li>The SelectionDAG type legalization logic has been completely rewritten, is
-now more powerful (it supports arbitrary precision integer types for example),
-and is more correct in several corner cases.  The type legalizer converts
-operations on types that are not natively supported by the target machine into
-equivalent code sequences that only use natively supported types.  The old type
-legalizer is still available (for now) and will be used if
-<tt>-disable-legalize-types</tt> is passed to the code generator.
-</li>
 
-<li>The code generator now supports widening illegal vectors to larger legal
-ones (for example, converting operations on &lt;3 x float&gt; to work on
-&lt;4 x float&gt;) which is very important for common graphics
-applications.</li>
-
-<li>The assembly printers for each target are now split out into their own
-libraries that are separate from the main code generation logic.  This reduces
-the code size of JIT compilers by not requiring them to be linked in.</li>
-
-<li>The 'fast' instruction selection path (used at -O0 and for fast JIT
-    compilers) now supports accelerating codegen for code that uses exception
-    handling constructs.</li>
-    
-<li>The optional PBQP register allocator now supports register coalescing.</li>
+<li>The <tt>llc -asm-verbose</tt> option (exposed from llvm-gcc as <tt>-dA</tt>
+    and clang as <tt>-fverbose-asm</tt> or <tt>-dA</tt>) now adds a lot of 
+    useful information in comments to
+    the generated .s file.  This information includes location information (if
+    built with <tt>-g</tt>) and loop nest information.</li>
+<li>The code generator now supports a new MachineVerifier pass which is useful
+    for finding bugs in targets and codegen passes.</li>
+<li>The Machine LICM is now enabled by default.  It hoists instructions out of
+    loops (such as constant pool loads, loads from read-only stubs, vector
+    constant synthesization code, etc.) and is currently configured to only do
+    so when the hoisted operation can be rematerialized.</li>
+<li>The Machine Sinking pass is now enabled by default.  This pass moves
+    side-effect free operations down the CFG so that they are executed on fewer
+    paths through a function.</li>
+<li>The code generator now performs "stack slot coloring" of register spills,
+    which allows spill slots to be reused.  This leads to smaller stack frames
+    in cases where there are lots of register spills.</li>
+<li>The register allocator has many improvements to take better advantage of
+    commutable operations, various spiller peephole optimizations, and can now
+    coalesce cross-register-class copies.</li>
+<li>Tblgen now supports multiclass inheritance and a number of new string and
+    list operations like <tt>!(subst)</tt>, <tt>!(foreach)</tt>, <tt>!car</tt>,
+    <tt>!cdr</tt>, <tt>!null</tt>, <tt>!if</tt>, <tt>!cast</tt>.
+    These make the .td files more expressive and allow more aggressive factoring
+    of duplication across instruction patterns.</li>
+<li>Target-specific intrinsics can now be added without having to hack VMCore to
+    add them.  This makes it easier to maintain out-of-tree targets.</li>
+<li>The instruction selector is better at propagating information about values
+    (such as whether they are sign/zero extended etc.) across basic block
+    boundaries.</li>
+<li>The SelectionDAG datastructure has new nodes for representing buildvector
+    and <a href="http://llvm.org/PR2957">vector shuffle</a> operations.  This
+    makes operations and pattern matching more efficient and easier to get
+    right.</li>
+<li>The Prolog/Epilog Insertion Pass now has experimental support for performing
+    the "shrink wrapping" optimization, which moves spills and reloads around in
+    the CFG to avoid doing saves on paths that don't need them.</li>
+<li>LLVM includes new experimental support for writing ELF .o files directly
+    from the compiler.  It works well for many simple C testcases, but doesn't
+    support exception handling, debug info, inline assembly, etc.</li>
+<li>Targets can now specify register allocation hints through
+    <tt>MachineRegisterInfo::setRegAllocationHint</tt>. A regalloc hint consists
+    of hint type and physical register number. A hint type of zero specifies a
+    register allocation preference. Other hint type values are target specific
+    which are resolved by <tt>TargetRegisterInfo::ResolveRegAllocHint</tt>. An
+    example is the ARM target which uses register hints to request that the
+    register allocator provide an even / odd register pair to two virtual
+    registers.</li>
 </ul>
 </div>
 
@@ -482,37 +694,33 @@ the code size of JIT compilers by not requiring them to be linked in.</li>
 </p>
 
 <ul>
-<li>The <tt><a href="LangRef.html#int_returnaddress">llvm.returnaddress</a></tt>
-intrinsic (which is used to implement <tt>__builtin_return_address</tt>) now
-supports non-zero stack depths on X86.</li>
-
-<li>The X86 backend now supports code generation of vector shift operations
-using SSE instructions.</li>
-
-<li>X86-64 code generation now takes advantage of red zone, unless the
-<tt>-mno-red-zone</tt> option is specified.</li>
-
-<li>The X86 backend now supports using address space #256 in LLVM IR as a way of
-performing memory references off the GS segment register.  This allows a
-front-end to take advantage of very low-level programming techniques when
-targeting X86 CPUs. See <tt>test/CodeGen/X86/movgs.ll</tt> for a simple
-example.</li>
-
-<li>The X86 backend now supports a <tt>-disable-mmx</tt> command line option to
-  prevent use of MMX even on chips that support it.  This is important for cases
-  where code does not contain the proper <tt>llvm.x86.mmx.emms</tt>
-  intrinsics.</li>
-
-<li>The X86 JIT now detects the new Intel <a 
-   href="http://en.wikipedia.org/wiki/Intel_Core_i7">Core i7</a> and <a
-   href="http://en.wikipedia.org/wiki/Intel_Atom">Atom</a> chips and
-    auto-configures itself appropriately for the features of these chips.</li>
-    
-<li>The JIT now supports exception handling constructs on Linux/X86-64 and
-    Darwin/x86-64.</li>
 
-<li>The JIT supports Thread Local Storage (TLS) on Linux/X86-32 but not yet on
-    X86-64.</li>
+<li>SSE 4.2 builtins are now supported.</li>
+<li>GCC-compatible soft float modes are now supported, which are typically used
+    by OS kernels.</li>
+<li>X86-64 now models implicit zero extensions better, which allows the code
+    generator to remove a lot of redundant zexts.  It also models the 8-bit "H"
+    registers as subregs, which allows them to be used in some tricky
+    situations.</li>
+<li>X86-64 now supports the "local exec" and "initial exec" thread local storage
+    model.</li>
+<li>The vector forms of the <a href="LangRef.html#i_icmp">icmp</a> and <a
+    href="LangRef.html#i_fcmp">fcmp</a> instructions now select to efficient
+    SSE operations.</li>
+<li>Support for the win64 calling conventions have improved.  The primary
+    missing feature is support for varargs function definitions.  It seems to
+    work well for many win64 JIT purposes.</li>
+<li>The X86 backend has preliminary support for <a 
+    href="CodeGenerator.html#x86_memory">mapping address spaces to segment
+    register references</a>.  This allows you to write GS or FS relative memory
+    accesses directly in LLVM IR for cases where you know exactly what you're
+    doing (such as in an OS kernel).  There are some known problems with this
+    support, but it works in simple cases.</li>
+<li>The X86 code generator has been refactored to move all global variable
+    reference logic to one place
+    (<tt>X86Subtarget::ClassifyGlobalReference</tt>) which
+    makes it easier to reason about.</li>
+
 </ul>
 
 </div>
@@ -527,70 +735,156 @@ example.</li>
 </p>
 
 <ul>
-<li>Both direct and indirect load/stores work now.</li>
-<li>Logical, bitwise and conditional operations now work for integer data
-types.</li>
-<li>Function calls involving basic types work now.</li>
-<li>Support for integer arrays.</li>
-<li>The compiler can now emit libcalls for operations not supported by m/c
-instructions.</li>
-<li>Support for both data and ROM address spaces.</li>
+<li>Support for floating-point, indirect function calls, and
+    passing/returning aggregate types to functions.
+<li>The code generator is able to generate debug info into output COFF files.
+<li>Support for placing an object into a specific section or at a specific
+    address in memory.</li>
 </ul>
 
 <p>Things not yet supported:</p>
 
 <ul>
-<li>Floating point.</li>
-<li>Passing/returning aggregate types to and from functions.</li>
 <li>Variable arguments.</li>
-<li>Indirect function calls.</li>
 <li>Interrupts/programs.</li>
-<li>Debug info.</li>
 </ul>
 
 </div>
 
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="ARM">ARM Target Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>New features of the ARM target include:
+</p>
+
+<ul>
+
+<li>Preliminary support for processors, such as the Cortex-A8 and Cortex-A9,
+that implement version v7-A of the ARM architecture.  The ARM backend now
+supports both the Thumb2 and Advanced SIMD (Neon) instruction sets.</li>
+
+<li>The AAPCS-VFP "hard float" calling conventions are also supported with the
+<tt>-float-abi=hard</tt> flag.</li>
+
+<li>The ARM calling convention code is now tblgen generated instead of resorting
+    to C++ code.</li>
+</ul>
+
+<p>These features are still somewhat experimental
+and subject to change. The Neon intrinsics, in particular, may change in future
+releases of LLVM.  ARMv7 support has progressed a lot on top of tree since 2.6
+branched.</p>
+
+
+</div>
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="llvmc">Improvements in LLVMC</a>
+<a name="OtherTarget">Other Target Specific Improvements</a>
 </div>
 
 <div class="doc_text">
-<p>New features include:</p>
+<p>New features of other targets include:
+</p>
 
 <ul>
-<li>Beginning with LLVM 2.5, <tt>llvmc2</tt> is known as
- just <tt>llvmc</tt>. The old <tt>llvmc</tt> driver was removed.</li>
+<li>Mips now supports O32 Calling Convention.</li>
+<li>Many improvements to the 32-bit PowerPC SVR4 ABI (used on powerpc-linux)
+    support, lots of bugs fixed.</li>
+<li>Added support for the 64-bit PowerPC SVR4 ABI (used on powerpc64-linux).
+    Needs more testing.</li>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="newapis">New Useful APIs</a>
+</div>
 
-<li>The Clang plugin was substantially improved and is now enabled
- by default. The command <tt>llvmc --clang</tt> can be now used as a
- synonym to <tt>ccc</tt>.</li>
+<div class="doc_text">
 
-<li>There is now a <tt>--check-graph</tt> option, which is supposed to catch
- common errors like multiple default edges, mismatched output/input language
- names and cycles. In general, these checks can't be done at compile-time
- because of the need to support plugins.</li>
+<p>This release includes a number of new APIs that are used internally, which
+   may also be useful for external clients.
+</p>
 
-<li>Plugins are now more flexible and can refer to compilation graph nodes and
- options defined in other plugins. To manage dependencies, a priority-sorting
- mechanism was introduced. This change affects the TableGen file syntax. See the
- documentation for details.</li>
+<ul>
+<li>New <a href="http://llvm.org/doxygen/PrettyStackTrace_8h-source.html">
+    <tt>PrettyStackTrace</tt> class</a> allows crashes of llvm tools (and applications
+    that integrate them) to provide more detailed indication of what the
+    compiler was doing at the time of the crash (e.g. running a pass).
+    At the top level for each LLVM tool, it includes the command line arguments.
+    </li>
+<li>New <a href="http://llvm.org/doxygen/StringRef_8h-source.html">StringRef</a>
+    and <a href="http://llvm.org/doxygen/Twine_8h-source.html">Twine</a> classes
+    make operations on character ranges and
+    string concatenation to be more efficient.  <tt>StringRef</tt> is just a <tt>const
+    char*</tt> with a length, <tt>Twine</tt> is a light-weight rope.</li>
+<li>LLVM has new <tt>WeakVH</tt>, <tt>AssertingVH</tt> and <tt>CallbackVH</tt>
+    classes, which make it easier to write LLVM IR transformations.  <tt>WeakVH</tt>
+    is automatically drops to null when the referenced <tt>Value</tt> is deleted,
+    and is updated across a <tt>replaceAllUsesWith</tt> operation.
+    <tt>AssertingVH</tt> aborts the program if the
+    referenced value is destroyed while it is being referenced.  <tt>CallbackVH</tt>
+    is a customizable class for handling value references.  See <a
+    href="http://llvm.org/doxygen/ValueHandle_8h-source.html">ValueHandle.h</a> 
+    for more information.</li>
+<li>The new '<a href="http://llvm.org/doxygen/Triple_8h-source.html">Triple
+    </a>' class centralizes a lot of logic that reasons about target
+    triples.</li>
+<li>The new '<a href="http://llvm.org/doxygen/ErrorHandling_8h-source.html">
+    llvm_report_error()</a>' set of APIs allows tools to embed the LLVM
+    optimizer and backend and recover from previously unrecoverable errors.</li>
+<li>LLVM has new abstractions for <a 
+    href="http://llvm.org/doxygen/Atomic_8h-source.html">atomic operations</a>
+    and <a href="http://llvm.org/doxygen/RWMutex_8h-source.html">reader/writer
+    locks</a>.</li>
+<li>LLVM has new <a href="http://llvm.org/doxygen/SourceMgr_8h-source.html">
+    <tt>SourceMgr</tt> and <tt>SMLoc</tt> classes</a> which implement caret
+    diagnostics and basic include stack processing for simple parsers. It is
+    used by tablegen, llvm-mc, the .ll parser and FileCheck.</li>
+</ul>
 
-<li>Hooks can now be provided with arguments. The syntax is "<tt>$CALL(MyHook,
- 'Arg1', 'Arg2', 'Arg3')</tt>".</li>
 
-<li>A new option type: multi-valued option, for options that take more than one
- argument (for example, "<tt>-foo a b c</tt>").</li>
+</div>
 
-<li>New option properties: '<tt>one_or_more</tt>', '<tt>zero_or_more</tt>',
-'<tt>hidden</tt>' and '<tt>really_hidden</tt>'.</li>
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="otherimprovements">Other Improvements and New Features</a>
+</div>
 
-<li>The '<tt>case</tt>' expression gained an '<tt>error</tt>' action and
- an '<tt>empty</tt>' test (equivalent to "<tt>(not (not_empty ...))</tt>").</li>
+<div class="doc_text">
+<p>Other miscellaneous features include:</p>
 
-<li>Documentation now looks more consistent to the rest of the LLVM
- docs. There is also a man page now.</li>
+<ul>
+<li>LLVM now includes a new internal '<a 
+    href="http://llvm.org/cmds/FileCheck.html">FileCheck</a>' tool which allows
+    writing much more accurate regression tests that run faster.  Please see the
+    <a href="TestingGuide.html#FileCheck">FileCheck section of the Testing
+    Guide</a> for more information.</li>
+<li>LLVM profile information support has been significantly improved to produce
+correct use counts, and has support for edge profiling with reduced runtime
+overhead.  Combined, the generated profile information is both more correct and
+imposes about half as much overhead (2.6. from 12% to 6% overhead on SPEC
+CPU2000).</li>
+<li>The C bindings (in the llvm/include/llvm-c directory) include many newly
+    supported APIs.</li>
+<li>LLVM 2.6 includes a brand new experimental LLVM bindings to the Ada2005
+    programming language.</li>
+
+<li>The LLVMC driver has several new features:
+  <ul>
+  <li>Dynamic plugins now work on Windows.</li>
+  <li>New option property: init. Makes possible to provide default values for
+      options defined in plugins (interface to <tt>cl::init</tt>).</li>
+  <li>New example: Skeleton, shows how to create a standalone LLVMC-based
+      driver.</li>
+  <li>New example: mcc16, a driver for the PIC16 toolchain.</li>
+  </ul>
+</li>
 
 </ul>
 
@@ -605,13 +899,24 @@ instructions.</li>
 <div class="doc_text">
 
 <p>If you're already an LLVM user or developer with out-of-tree changes based
-on LLVM 2.4, this section lists some "gotchas" that you may run into upgrading
+on LLVM 2.5, this section lists some "gotchas" that you may run into upgrading
 from the previous release.</p>
 
 <ul>
-
-<li>llvm-gcc defaults to <tt>-fno-math-errno</tt> on all X86 targets.</li>
-
+<li>The Itanium (IA64) backend has been removed.  It was not actively supported
+    and had bitrotted.</li>
+<li>The BigBlock register allocator has been removed, it had also bitrotted.</li>
+<li>The C Backend (<tt>-march=c</tt>) is no longer considered part of the LLVM release
+criteria.  We still want it to work, but no one is maintaining it and it lacks
+support for arbitrary precision integers and other important IR features.</li>
+
+<li>All LLVM tools now default to overwriting their output file, behaving more
+    like standard unix tools.  Previously, this only happened with the '<tt>-f</tt>'
+    option.</li>
+<li>LLVM build now builds all libraries as .a files instead of some
+  libraries as relinked .o files.  This requires some APIs like
+  InitializeAllTargets.h.
+  </li>
 </ul>
 
 
@@ -619,8 +924,82 @@ from the previous release.</p>
 API changes are:</p>
 
 <ul>
-<li>Some deprecated interfaces to create <tt>Instruction</tt> subclasses, that
-    were spelled with lower case "create," have been removed.</li>
+<li>All uses of <tt>hash_set</tt> and <tt>hash_map</tt> have been removed from
+    the LLVM tree and the wrapper headers have been removed.</li>
+<li>The llvm/Streams.h and <tt>DOUT</tt> member of Debug.h have been removed.  The
+    <tt>llvm::Ostream</tt> class has been completely removed and replaced with
+    uses of <tt>raw_ostream</tt>.</li>
+<li>LLVM's global uniquing tables for <tt>Type</tt>s and <tt>Constant</tt>s have
+    been privatized into members of an <tt>LLVMContext</tt>.  A number of APIs
+    now take an <tt>LLVMContext</tt> as a parameter.  To smooth the transition
+    for clients that will only ever use a single context, the new 
+    <tt>getGlobalContext()</tt> API can be used to access a default global 
+    context which can be passed in any and all cases where a context is 
+    required.
+<li>The <tt>getABITypeSize</tt> methods are now called <tt>getAllocSize</tt>.</li>
+<li>The <tt>Add</tt>, <tt>Sub</tt> and <tt>Mul</tt> operators are no longer
+    overloaded for floating-point types. Floating-point addition, subtraction
+    and multiplication are now represented with new operators <tt>FAdd</tt>,
+    <tt>FSub</tt> and <tt>FMul</tt>. In the <tt>IRBuilder</tt> API,
+    <tt>CreateAdd</tt>, <tt>CreateSub</tt>, <tt>CreateMul</tt> and
+    <tt>CreateNeg</tt> should only be used for integer arithmetic now;
+    <tt>CreateFAdd</tt>, <tt>CreateFSub</tt>, <tt>CreateFMul</tt> and
+    <tt>CreateFNeg</tt> should now be used for floating-point arithmetic.</li>
+<li>The <tt>DynamicLibrary</tt> class can no longer be constructed, its functionality has
+    moved to static member functions.</li>
+<li><tt>raw_fd_ostream</tt>'s constructor for opening a given filename now
+    takes an extra <tt>Force</tt> argument. If <tt>Force</tt> is set to
+    <tt>false</tt>, an error will be reported if a file with the given name
+    already exists. If <tt>Force</tt> is set to <tt>true</tt>, the file will
+    be silently truncated (which is the behavior before this flag was
+    added).</li>
+<li><tt>SCEVHandle</tt> no longer exists, because reference counting is no
+    longer done for <tt>SCEV*</tt> objects, instead <tt>const SCEV*</tt>
+    should be used.</li>
+
+<li>Many APIs, notably <tt>llvm::Value</tt>, now use the <tt>StringRef</tt>
+and <tt>Twine</tt> classes instead of passing <tt>const char*</tt>
+or <tt>std::string</tt>, as described in
+the <a href="ProgrammersManual.html#string_apis">Programmer's Manual</a>. Most
+clients should be unaffected by this transition, unless they are used to
+<tt>Value::getName()</tt> returning a string. Here are some tips on updating to
+2.6:
+  <ul>
+    <li><tt>getNameStr()</tt> is still available, and matches the old
+      behavior. Replacing <tt>getName()</tt> calls with this is an safe option,
+      although more efficient alternatives are now possible.</li>
+
+    <li>If you were just relying on <tt>getName()</tt> being able to be sent to
+      a <tt>std::ostream</tt>, consider migrating
+      to <tt>llvm::raw_ostream</tt>.</li>
+      
+    <li>If you were using <tt>getName().c_str()</tt> to get a <tt>const
+        char*</tt> pointer to the name, you can use <tt>getName().data()</tt>.
+        Note that this string (as before), may not be the entire name if the
+        name contains embedded null characters.</li>
+
+    <li>If you were using <tt>operator +</tt> on the result of <tt>getName()</tt> and
+      treating the result as an <tt>std::string</tt>, you can either
+      use <tt>Twine::str</tt> to get the result as an <tt>std::string</tt>, or
+      could move to a <tt>Twine</tt> based design.</li>
+
+    <li><tt>isName()</tt> should be replaced with comparison
+      against <tt>getName()</tt> (this is now efficient).
+  </ul>
+</li>
+
+<li>The registration interfaces for backend Targets has changed (what was
+previously <tt>TargetMachineRegistry</tt>). For backend authors, see the <a
+href="WritingAnLLVMBackend.html#TargetRegistration">Writing An LLVM Backend</a>
+guide. For clients, the notable API changes are:
+  <ul>
+    <li><tt>TargetMachineRegistry</tt> has been renamed
+      to <tt>TargetRegistry</tt>.</li>
+
+    <li>Clients should move to using the <tt>TargetRegistry::lookupTarget()</tt>
+      function to find targets.</li>
+  </ul>
+</li>
 </ul>
 
 </div>
@@ -639,15 +1018,15 @@ API changes are:</p>
 
 <ul>
 <li>Intel and AMD machines (IA32, X86-64, AMD64, EMT-64) running Red Hat
-Linux, Fedora Core and FreeBSD (and probably other unix-like systems).</li>
+    Linux, Fedora Core, FreeBSD and AuroraUX (and probably other unix-like
+    systems).</li>
 <li>PowerPC and X86-based Mac OS X systems, running 10.3 and above in 32-bit
-and 64-bit modes.</li>
+    and 64-bit modes.</li>
 <li>Intel and AMD machines running on Win32 using MinGW libraries (native).</li>
 <li>Intel and AMD machines running on Win32 with the Cygwin libraries (limited
     support is available for native builds with Visual C++).</li>
 <li>Sun UltraSPARC workstations running Solaris 10.</li>
 <li>Alpha-based machines running Debian GNU/Linux.</li>
-<li>Itanium-based (IA64) machines running Linux and HP-UX.</li>
 </ul>
 
 <p>The core LLVM infrastructure uses GNU autoconf to adapt itself
@@ -670,6 +1049,21 @@ listed by component.  If you run into a problem, please check the <a
 href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
 there isn't already one.</p>
 
+<ul>
+<li>The llvm-gcc bootstrap will fail with some versions of binutils (e.g. 2.15)
+    with a message of "<tt><a href="http://llvm.org/PR5004">Error: can not do 8
+    byte pc-relative relocation</a></tt>" when building C++ code.  We intend to
+    fix this on mainline, but a workaround for 2.6 is to upgrade to binutils
+    2.17 or later.</li>
+    
+<li>LLVM will not correctly compile on Solaris and/or OpenSolaris
+using the stock GCC 3.x.x series 'out the box',
+See: <a href="#brokengcc">Broken versions of GCC and other tools</a>.
+However, A <a href="http://pkg.auroraux.org/GCC">Modern GCC Build</a>
+for x86/x86-64 has been made available from the third party AuroraUX Project
+that has been meticulously tested for bootstrapping LLVM &amp; Clang.</li>
+</ul>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -687,9 +1081,11 @@ components, please contact us on the <a
 href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
 
 <ul>
-<li>The MSIL, IA64, Alpha, SPU, MIPS, and PIC16 backends are experimental.</li>
+<li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430 and SystemZ backends are
+    experimental.</li>
 <li>The <tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
-    supported value for this option.</li>
+    supported value for this option.  The ELF writer is experimental.</li>
+<li>The implementation of Andersen's Alias Analysis has many known bugs.</li>
 </ul>
 
 </div>
@@ -744,14 +1140,14 @@ compilation, and lacks support for debug information.</li>
 <div class="doc_text">
 
 <ul>
+<li>Support for the Advanced SIMD (Neon) instruction set is still incomplete
+and not well tested.  Some features may not work at all, and the code quality
+may be poor in some cases.</li>
 <li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
 processors, thumb programs can crash or produce wrong
 results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
 <li>Compilation for ARM Linux OABI (old ABI) is supported but not fully tested.
 </li>
-<li>There is a bug in QEMU-ARM (&lt;= 0.9.0) which causes it to incorrectly
- execute
-programs compiled with LLVM.  Please use more recent versions of QEMU.</li>
 </ul>
 
 </div>
@@ -778,7 +1174,6 @@ programs compiled with LLVM.  Please use more recent versions of QEMU.</li>
 <div class="doc_text">
 
 <ul>
-<li>The O32 ABI is not fully supported.</li>
 <li>64-bit MIPS targets are not supported yet.</li>
 </ul>
 
@@ -801,21 +1196,6 @@ appropriate nops inserted to ensure restartability.</li>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="ia64-be">Known problems with the IA64 back-end</a>
-</div>
-
-<div class="doc_text">
-
-<ul>
-<li>The Itanium backend is highly experimental and has a number of known
-    issues.  We are looking for a maintainer for the Itanium backend.  If you
-    are interested, please contact the LLVMdev mailing list.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<div class="doc_subsection">
   <a name="c-be">Known problems with the C back-end</a>
 </div>
 
@@ -841,10 +1221,6 @@ appropriate nops inserted to ensure restartability.</li>
 
 <div class="doc_text">
 
-<p>llvm-gcc does not currently support <a href="http://llvm.org/PR869">Link-Time
-Optimization</a> on most platforms "out-of-the-box".  Please inquire on the
-LLVMdev mailing list if you are interested.</p>
-
 <p>The only major language feature of GCC not supported by llvm-gcc is
     the <tt>__builtin_apply</tt> family of builtins.   However, some extensions
     are only supported on some targets.  For example, trampolines are only
@@ -882,7 +1258,8 @@ itself, Qt, Mozilla, etc.</p>
 <div class="doc_text">
 <ul>
 <li>Fortran support generally works, but there are still several unresolved bugs
-    in Bugzilla.  Please see the tools/gfortran component for details.</li>
+    in <a href="http://llvm.org/bugs/">Bugzilla</a>.  Please see the
+    tools/gfortran component for details.</li>
 </ul>
 </div>
 
@@ -902,16 +1279,16 @@ which does support trampolines.</li>
 <li>The Ada front-end <a href="http://llvm.org/PR2007">fails to bootstrap</a>.
 This is due to lack of LLVM support for <tt>setjmp</tt>/<tt>longjmp</tt> style
 exception handling, which is used internally by the compiler.
-Workaround: configure with --disable-bootstrap.</li>
+Workaround: configure with <tt>--disable-bootstrap</tt>.</li>
 <li>The c380004, <a href="http://llvm.org/PR2010">c393010</a>
 and <a href="http://llvm.org/PR2421">cxg2021</a> ACATS tests fail
 (c380004 also fails with gcc-4.2 mainline).
 If the compiler is built with checks disabled then <a href="http://llvm.org/PR2010">c393010</a>
 causes the compiler to go into an infinite loop, using up all system memory.</li>
 <li>Some GCC specific Ada tests continue to crash the compiler.</li>
-<li>The -E binder option (exception backtraces)
+<li>The <tt>-E</tt> binder option (exception backtraces)
 <a href="http://llvm.org/PR1982">does not work</a> and will result in programs
-crashing if an exception is raised.  Workaround: do not use -E.</li>
+crashing if an exception is raised.  Workaround: do not use <tt>-E</tt>.</li>
 <li>Only discrete types <a href="http://llvm.org/PR1981">are allowed to start
 or finish at a non-byte offset</a> in a record.  Workaround: do not pack records
 or use representation clauses that result in a field of a non-discrete type
@@ -925,6 +1302,20 @@ ignored</a>.</li>
 </ul>
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+	<a name="ocaml-bindings">Known problems with the O'Caml bindings</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>Llvm.Linkage</tt> module is broken, and has incorrect values. Only
+<tt>Llvm.Linkage.External</tt>, <tt>Llvm.Linkage.Available_externally</tt>, and
+<tt>Llvm.Linkage.Link_once</tt> will be correct. If you need any of the other linkage
+modes, you'll have to write an external C library in order to expose the
+functionality. This has been fixed in the trunk.</p>
+</div>
+
 <!-- *********************************************************************** -->
 <div class="doc_section">
   <a name="additionalinfo">Additional Information</a>
@@ -957,7 +1348,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-03-02 21:08:12 +0100 (Mon, 02 Mar 2009) $
+  Last modified: $Date: 2009-10-13 19:48:04 +0200 (Tue, 13 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index e940e93d82fa..277b1e334f9f 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -80,7 +80,7 @@ height="369">
    debug information in LLVM.  It describes the <a href="#format">actual format
    that the LLVM debug information</a> takes, which is useful for those
    interested in creating front-ends or dealing directly with the information.
-   Further, this document provides specifc examples of what debug information
+   Further, this document provides specific examples of what debug information
    for C/C++.</p>
 
 </div>
@@ -122,8 +122,8 @@ height="369">
 <p>The approach used by the LLVM implementation is to use a small set
    of <a href="#format_common_intrinsics">intrinsic functions</a> to define a
    mapping between LLVM program objects and the source-level objects.  The
-   description of the source-level program is maintained in LLVM global
-   variables in an <a href="#ccxx_frontend">implementation-defined format</a>
+   description of the source-level program is maintained in LLVM metadata
+   in an <a href="#ccxx_frontend">implementation-defined format</a>
    (the C/C++ front-end currently uses working draft 7 of
    the <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3
    standard</a>).</p>
@@ -240,31 +240,21 @@ height="369">
 <p>LLVM debugging information has been carefully designed to make it possible
    for the optimizer to optimize the program and debugging information without
    necessarily having to know anything about debugging information.  In
-   particular, the global constant merging pass automatically eliminates
-   duplicated debugging information (often caused by header files), the global
-   dead code elimination pass automatically deletes debugging information for a
-   function if it decides to delete the function, and the linker eliminates
-   debug information when it merges <tt>linkonce</tt> functions.</p>
+   particular, te use of metadadta avoids duplicated dubgging information from
+   the beginning, and the global dead code elimination pass automatically 
+   deletes debugging information for a function if it decides to delete the 
+   function. </p>
 
 <p>To do this, most of the debugging information (descriptors for types,
    variables, functions, source files, etc) is inserted by the language
-   front-end in the form of LLVM global variables.  These LLVM global variables
-   are no different from any other global variables, except that they have a web
-   of LLVM intrinsic functions that point to them.  If the last references to a
-   particular piece of debugging information are deleted (for example, by the
-   <tt>-globaldce</tt> pass), the extraneous debug information will
-   automatically become dead and be removed by the optimizer.</p>
+   front-end in the form of LLVM metadata. </p>
 
 <p>Debug information is designed to be agnostic about the target debugger and
    debugging information representation (e.g. DWARF/Stabs/etc).  It uses a
-   generic machine debug information pass to decode the information that
-   represents variables, types, functions, namespaces, etc: this allows for
-   arbitrary source-language semantics and type-systems to be used, as long as
-   there is a module written for the target debugger to interpret the
-   information. In addition, debug global variables are declared in
-   the <tt>"llvm.metadata"</tt> section.  All values declared in this section
-   are stripped away after target debug information is constructed and before
-   the program object is emitted.</p>
+   generic pass to decode the information that represents variables, types, 
+   functions, namespaces, etc: this allows for arbitrary source-language 
+   semantics and type-systems to be used, as long as there is a module 
+   written for the target debugger to interpret the information. </p>
 
 <p>To provide basic functionality, the LLVM debugger does have to make some
    assumptions about the source-level language being debugged, though it keeps
@@ -288,9 +278,7 @@ height="369">
 <div class="doc_text">
 
 <p>In consideration of the complexity and volume of debug information, LLVM
-   provides a specification for well formed debug global variables.  The
-   constant value of each of these globals is one of a limited set of
-   structures, known as debug descriptors.</p>
+   provides a specification for well formed debug descriptors. </p>
 
 <p>Consumers of LLVM debug information expect the descriptors for program
    objects to start in a canonical format, but the descriptors can include
@@ -300,20 +288,17 @@ height="369">
    way.  Also, all debugging information objects start with a tag to indicate
    what type of object it is.  The source-language is allowed to define its own
    objects, by using unreserved tag numbers.  We recommend using with tags in
-   the range 0x1000 thru 0x2000 (there is a defined enum DW_TAG_user_base =
+   the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base =
    0x1000.)</p>
 
-<p>The fields of debug descriptors used internally by LLVM (MachineModuleInfo)
+<p>The fields of debug descriptors used internally by LLVM 
    are restricted to only the simple data types <tt>int</tt>, <tt>uint</tt>,
-   <tt>bool</tt>, <tt>float</tt>, <tt>double</tt>, <tt>i8*</tt> and
-   <tt>{&nbsp;}*</tt>.  References to arbitrary values are handled using a
-   <tt>{&nbsp;}*</tt> and a cast to <tt>{&nbsp;}*</tt> expression; typically
-   references to other field descriptors, arrays of descriptors or global
-   variables.</p>
+   <tt>bool</tt>, <tt>float</tt>, <tt>double</tt>, <tt>mdstring</tt> and
+   <tt>mdnode</tt>. </p>
 
 <div class="doc_code">
 <pre>
-%llvm.dbg.object.type = type {
+!1 = metadata !{
   uint,   ;; A tag
   ...
 }
@@ -326,8 +311,8 @@ height="369">
    of tags are loosely bound to the tag values of DWARF information entries.
    However, that does not restrict the use of the information supplied to DWARF
    targets.  To facilitate versioning of debug information, the tag is augmented
-   with the current debug version (LLVMDebugVersion = 4 << 16 or 0x40000 or
-   262144.)</a></p>
+   with the current debug version (LLVMDebugVersion = 7 << 16 or 0x70000 or
+   458752.)</a></p>
 
 <p>The details of the various descriptors follow.</p>  
 
@@ -342,17 +327,18 @@ height="369">
 
 <div class="doc_code">
 <pre>
-%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = type {
-  i32,    ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_compile_unit)
-  {  }*,  ;; Compile unit anchor = cast = (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_units</a> to {  }*)
-  i32,    ;; DWARF language identifier (ex. DW_LANG_C89) 
-  i8*,    ;; Source file name
-  i8*,    ;; Source file directory (includes trailing slash)
-  i8*     ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
-  i1,     ;; True if this is a main compile unit. 
-  i1,     ;; True if this is optimized.
-  i8*,    ;; Flags
-  i32     ;; Runtime version
+!0 = metadata !{
+  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+             ;; (DW_TAG_compile_unit)
+  i32,       ;; Unused field. 
+  i32,       ;; DWARF language identifier (ex. DW_LANG_C89) 
+  metadata,  ;; Source file name
+  metadata,  ;; Source file directory (includes trailing slash)
+  metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
+  i1,        ;; True if this is a main compile unit. 
+  i1,        ;; True if this is optimized.
+  metadata,  ;; Flags
+  i32        ;; Runtime version
 }
 </pre>
 </div>
@@ -388,19 +374,20 @@ height="369">
 
 <div class="doc_code">
 <pre>
-%<a href="#format_global_variables">llvm.dbg.global_variable.type</a> = type {
-  i32,    ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_variable)
-  {  }*,  ;; Global variable anchor = cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_global_variables">llvm.dbg.global_variables</a> to {  }*),  
-  {  }*,  ;; Reference to context descriptor
-  i8*,    ;; Name
-  i8*,    ;; Display name (fully qualified C++ name)
-  i8*,    ;; MIPS linkage name (for C++)
-  {  }*,  ;; Reference to compile unit where defined
-  i32,    ;; Line number where defined
-  {  }*,  ;; Reference to type descriptor
-  i1,     ;; True if the global is local to compile unit (static)
-  i1,     ;; True if the global is defined in the compile unit (not extern)
-  {  }*   ;; Reference to the global variable
+!1 = metadata !{
+  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+            ;; (DW_TAG_variable)
+  i32,      ;; Unused field.
+  metadata, ;; Reference to context descriptor
+  metadata, ;; Name
+  metadata, ;; Display name (fully qualified C++ name)
+  metadata, ;; MIPS linkage name (for C++)
+  metadata, ;; Reference to compile unit where defined
+  i32,      ;; Line number where defined
+  metadata, ;; Reference to type descriptor
+  i1,       ;; True if the global is local to compile unit (static)
+  i1,       ;; True if the global is defined in the compile unit (not extern)
+  {  }*     ;; Reference to the global variable
 }
 </pre>
 </div>
@@ -419,18 +406,19 @@ provide details such as name, type and where the variable is defined.</p>
 
 <div class="doc_code">
 <pre>
-%<a href="#format_subprograms">llvm.dbg.subprogram.type</a> = type {
-  i32,    ;; Tag = 46 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subprogram)
-  {  }*,  ;; Subprogram anchor = cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_subprograms">llvm.dbg.subprograms</a> to {  }*),  
-  {  }*,  ;; Reference to context descriptor
-  i8*,    ;; Name
-  i8*,    ;; Display name (fully qualified C++ name)
-  i8*,    ;; MIPS linkage name (for C++)
-  {  }*,  ;; Reference to compile unit where defined
-  i32,    ;; Line number where defined
-  {  }*,  ;; Reference to type descriptor
-  i1,     ;; True if the global is local to compile unit (static)
-  i1      ;; True if the global is defined in the compile unit (not extern)
+!2 = metadata !{
+  i32,      ;; Tag = 46 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
+            ;; (DW_TAG_subprogram)
+  i32,      ;; Unused field.
+  metadata, ;; Reference to context descriptor
+  metadata, ;; Name
+  metadata, ;; Display name (fully qualified C++ name)
+  metadata, ;; MIPS linkage name (for C++)
+  metadata, ;; Reference to compile unit where defined
+  i32,      ;; Line number where defined
+  metadata, ;; Reference to type descriptor
+  i1,       ;; True if the global is local to compile unit (static)
+  i1        ;; True if the global is defined in the compile unit (not extern)
 }
 </pre>
 </div>
@@ -450,9 +438,9 @@ provide details such as name, type and where the variable is defined.</p>
 
 <div class="doc_code">
 <pre>
-%<a href="#format_blocks">llvm.dbg.block</a> = type {
-  i32,    ;; Tag = 13 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
-  {  }*   ;; Reference to context descriptor
+!3 = metadata !{
+  i32,     ;; Tag = 13 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
+  metadata ;; Reference to context descriptor
 }
 </pre>
 </div>
@@ -472,17 +460,18 @@ provide details such as name, type and where the variable is defined.</p>
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype.type</a> = type {
-  i32,    ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_base_type)
-  {  }*,  ;; Reference to context (typically a compile unit)
-  i8*,    ;; Name (may be "" for anonymous types)
-  {  }*,  ;; Reference to compile unit where defined (may be NULL)
-  i32,    ;; Line number where defined (may be 0)
-  i64,    ;; Size in bits
-  i64,    ;; Alignment in bits
-  i64,    ;; Offset in bits
-  i32,    ;; Flags
-  i32     ;; DWARF type encoding
+!4 = metadata !{
+  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+            ;; (DW_TAG_base_type)
+  metadata, ;; Reference to context (typically a compile unit)
+  metadata, ;; Name (may be "" for anonymous types)
+  metadata, ;; Reference to compile unit where defined (may be NULL)
+  i32,      ;; Line number where defined (may be 0)
+  i64,      ;; Size in bits
+  i64,      ;; Alignment in bits
+  i64,      ;; Offset in bits
+  i32,      ;; Flags
+  i32       ;; DWARF type encoding
 }
 </pre>
 </div>
@@ -523,16 +512,16 @@ DW_ATE_unsigned_char = 8
 
 <div class="doc_code">
 <pre>
-%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> = type {
-  i32,    ;; Tag (see below)
-  {  }*,  ;; Reference to context
-  i8*,    ;; Name (may be "" for anonymous types)
-  {  }*,  ;; Reference to compile unit where defined (may be NULL)
-  i32,    ;; Line number where defined (may be 0)
-  i32,    ;; Size in bits
-  i32,    ;; Alignment in bits
-  i32,    ;; Offset in bits
-  {  }*   ;; Reference to type derived from
+!5 = metadata !{
+  i32,      ;; Tag (see below)
+  metadata, ;; Reference to context
+  metadata, ;; Name (may be "" for anonymous types)
+  metadata, ;; Reference to compile unit where defined (may be NULL)
+  i32,      ;; Line number where defined (may be 0)
+  i32,      ;; Size in bits
+  i32,      ;; Alignment in bits
+  i32,      ;; Offset in bits
+  metadata  ;; Reference to type derived from
 }
 </pre>
 </div>
@@ -591,19 +580,19 @@ DW_TAG_restrict_type    = 55
 
 <div class="doc_code">
 <pre>
-%<a href="#format_composite_type">llvm.dbg.compositetype.type</a> = type {
-  i32,    ;; Tag (see below)
-  {  }*,  ;; Reference to context
-  i8*,    ;; Name (may be "" for anonymous types)
-  {  }*,  ;; Reference to compile unit where defined (may be NULL)
-  i32,    ;; Line number where defined (may be 0)
-  i64,    ;; Size in bits
-  i64,    ;; Alignment in bits
-  i64,    ;; Offset in bits
-  i32,    ;; Flags
-  {  }*,  ;; Reference to type derived from
-  {  }*,  ;; Reference to array of member descriptors
-  i32     ;; Runtime languages
+!6 = metadata !{
+  i32,      ;; Tag (see below)
+  metadata, ;; Reference to context
+  metadata, ;; Name (may be "" for anonymous types)
+  metadata, ;; Reference to compile unit where defined (may be NULL)
+  i32,      ;; Line number where defined (may be 0)
+  i64,      ;; Size in bits
+  i64,      ;; Alignment in bits
+  i64,      ;; Offset in bits
+  i32,      ;; Flags
+  metadata, ;; Reference to type derived from
+  metadata, ;; Reference to array of member descriptors
+  i32       ;; Runtime languages
 }
 </pre>
 </div>
@@ -702,10 +691,11 @@ DW_TAG_inheritance      = 28
 
 <div class="doc_code">
 <pre>
-%<a href="#format_enumeration">llvm.dbg.enumerator.type</a> = type {
-  i32,    ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_enumerator)
-  i8*,    ;; Name
-  i64     ;; Value
+!6 = metadata !{
+  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+            ;; (DW_TAG_enumerator)
+  metadata, ;; Name
+  i64       ;; Value
 }
 </pre>
 </div>
@@ -725,13 +715,13 @@ DW_TAG_inheritance      = 28
 
 <div class="doc_code">
 <pre>
-%<a href="#format_variables">llvm.dbg.variable.type</a> = type {
-  i32,     ;; Tag (see below)
-  {  }*,   ;; Context
-  i8*,     ;; Name
-  {  }*,   ;; Reference to compile unit where defined
-  i32,     ;; Line number where defined
-  {  }*    ;; Type descriptor
+!7 = metadata !{
+  i32,      ;; Tag (see below)
+  metadata, ;; Context
+  metadata, ;; Name
+  metadata, ;; Reference to compile unit where defined
+  i32,      ;; Line number where defined
+  metadata  ;; Type descriptor
 }
 </pre>
 </div>
@@ -778,14 +768,14 @@ DW_TAG_return_variable = 258
 
 <div class="doc_text">
 <pre>
-  void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint, uint, { }* )
+  void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint, uint, metadata)
 </pre>
 
 <p>This intrinsic is used to provide correspondence between the source file and
    the generated code.  The first argument is the line number (base 1), second
    argument is the column number (0 if unknown) and the third argument the
-   source <tt>%<a href="#format_compile_units">llvm.dbg.compile_unit</a>*</tt>
-   cast to a <tt>{&nbsp;}*</tt>.  Code following a call to this intrinsic will
+   source <tt>%<a href="#format_compile_units">llvm.dbg.compile_unit</a>.
+   Code following a call to this intrinsic will
    have been defined in close proximity of the line, column and file. This
    information holds until the next call
    to <tt>%<a href="#format_common_stoppoint">lvm.dbg.stoppoint</a></tt>.</p>
@@ -799,7 +789,7 @@ DW_TAG_return_variable = 258
 
 <div class="doc_text">
 <pre>
-  void %<a href="#format_common_func_start">llvm.dbg.func.start</a>( { }* )
+  void %<a href="#format_common_func_start">llvm.dbg.func.start</a>( metadata )
 </pre>
 
 <p>This intrinsic is used to link the debug information
@@ -823,7 +813,7 @@ DW_TAG_return_variable = 258
 
 <div class="doc_text">
 <pre>
-  void %<a href="#format_common_region_start">llvm.dbg.region.start</a>( { }* )
+  void %<a href="#format_common_region_start">llvm.dbg.region.start</a>( metadata )
 </pre>
 
 <p>This intrinsic is used to define the beginning of a declarative scope (ex.
@@ -843,7 +833,7 @@ DW_TAG_return_variable = 258
 
 <div class="doc_text">
 <pre>
-  void %<a href="#format_common_region_end">llvm.dbg.region.end</a>( { }* )
+  void %<a href="#format_common_region_end">llvm.dbg.region.end</a>( metadata )
 </pre>
 
 <p>This intrinsic is used to define the end of a declarative scope (ex. block)
@@ -864,14 +854,14 @@ DW_TAG_return_variable = 258
 
 <div class="doc_text">
 <pre>
-  void %<a href="#format_common_declare">llvm.dbg.declare</a>( { } *, { }* )
+  void %<a href="#format_common_declare">llvm.dbg.declare</a>( { } *, metadata )
 </pre>
 
 <p>This intrinsic provides information about a local element (ex. variable.) The
    first argument is the alloca for the variable, cast to a <tt>{ }*</tt>. The
    second argument is
    the <tt>%<a href="#format_variables">llvm.dbg.variable</a></tt> containing
-   the description of the variable, also cast to a <tt>{ }*</tt>.</p>
+   the description of the variable. </p>
 
 </div>
 
@@ -955,29 +945,29 @@ entry:
     
     ...
     
-    call void @<a href="#format_common_func_start">llvm.dbg.func.start</a>( %<a href="#format_subprograms">llvm.dbg.subprogram.type</a>* @llvm.dbg.subprogram )
+    call void @<a href="#format_common_func_start">llvm.dbg.func.start</a>( metadata !0)
     
-    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 2, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* @llvm.dbg.compile_unit )
+    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 2, uint 2, metadata !1)
     
     call void @<a href="#format_common_declare">llvm.dbg.declare</a>({}* %X, ...)
     call void @<a href="#format_common_declare">llvm.dbg.declare</a>({}* %Y, ...)
     
     <i>;; Evaluate expression on line 2, assigning to X.</i>
     
-    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 3, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* @llvm.dbg.compile_unit )
+    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 3, uint 2, metadata !1)
     
     <i>;; Evaluate expression on line 3, assigning to Y.</i>
     
     call void @<a href="#format_common_stoppoint">llvm.region.start</a>()
-    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 5, uint 4, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* @llvm.dbg.compile_unit )
+    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 5, uint 4, metadata !1)
     call void @<a href="#format_common_declare">llvm.dbg.declare</a>({}* %X, ...)
     
     <i>;; Evaluate expression on line 5, assigning to Z.</i>
     
-    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 7, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* @llvm.dbg.compile_unit )
+    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 7, uint 2, metadata !1)
     call void @<a href="#format_common_region_end">llvm.region.end</a>()
     
-    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 9, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* @llvm.dbg.compile_unit )
+    call void @<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 9, uint 2, metadata !1)
     
     call void @<a href="#format_common_region_end">llvm.region.end</a>()
     
@@ -1097,50 +1087,35 @@ int main(int argc, char *argv[]) {
 <pre>
 ...
 ;;
-;; Define types used.  In this case we need one for compile unit anchors and one
-;; for compile units.
-;;
-%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type { uint, uint }
-%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = type { uint, {  }*, uint, uint, i8*, i8*, i8* }
-...
-;;
-;; Define the anchor for compile units.  Note that the second field of the
-;; anchor is 17, which is the same as the tag for compile units
-;; (17 = DW_TAG_compile_unit.)
-;;
-%<a href="#format_compile_units">llvm.dbg.compile_units</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 17 }, section "llvm.metadata"
-
-;;
 ;; Define the compile unit for the source file "/Users/mine/sources/MySource.cpp".
 ;;
-%<a href="#format_compile_units">llvm.dbg.compile_unit1</a> = internal constant %<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> {
-    uint add(uint 17, uint 262144), 
-    {  }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_units</a> to {  }*), 
-    uint 1, 
-    uint 1, 
-    i8* getelementptr ([13 x i8]* %str1, i32 0, i32 0), 
-    i8* getelementptr ([21 x i8]* %str2, i32 0, i32 0), 
-    i8* getelementptr ([33 x i8]* %str3, i32 0, i32 0) }, section "llvm.metadata"
-    
+!3 = metadata !{
+  i32 458769,    ;; Tag
+  i32 0,         ;; Unused
+  i32 4,         ;; Language Id
+  metadata !"MySource.cpp", 
+  metadata !"/Users/mine/sources", 
+  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
+  i1 true,       ;; Main Compile Unit
+  i1 false,      ;; Optimized compile unit
+  metadata !"",  ;; Compiler flags
+  i32 0}         ;; Runtime version
+
 ;;
 ;; Define the compile unit for the header file "/Users/mine/sources/MyHeader.h".
 ;;
-%<a href="#format_compile_units">llvm.dbg.compile_unit2</a> = internal constant %<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> {
-    uint add(uint 17, uint 262144), 
-    {  }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_units</a> to {  }*), 
-    uint 1, 
-    uint 1, 
-    i8* getelementptr ([11 x i8]* %str4, int 0, int 0), 
-    i8* getelementptr ([21 x i8]* %str2, int 0, int 0), 
-    i8* getelementptr ([33 x i8]* %str3, int 0, int 0) }, section "llvm.metadata"
+!1 = metadata !{
+  i32 458769,    ;; Tag
+  i32 0,         ;; Unused
+  i32 4,         ;; Language Id
+  metadata !"MyHeader.h", 
+  metadata !"/Users/mine/sources", 
+  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
+  i1 false,      ;; Main Compile Unit
+  i1 false,      ;; Optimized compile unit
+  metadata !"",  ;; Compiler flags
+  i32 0}         ;; Runtime version
 
-;;
-;; Define each of the strings used in the compile units.
-;;
-%str1 = internal constant [13 x i8] c"MySource.cpp\00", section "llvm.metadata";
-%str2 = internal constant [21 x i8] c"/Users/mine/sources/\00", section "llvm.metadata";
-%str3 = internal constant [33 x i8] c"4.0.1 LLVM (LLVM research group)\00", section "llvm.metadata";
-%str4 = internal constant [11 x i8] c"MyHeader.h\00", section "llvm.metadata";
 ...
 </pre>
 </div>
@@ -1167,65 +1142,51 @@ int MyGlobal = 100;
 <div class="doc_code">
 <pre>
 ;;
-;; Define types used. One for global variable anchors, one for the global
-;; variable descriptor, one for the global's basic type and one for the global's
-;; compile unit.
-;;
-%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type { uint, uint }
-%<a href="#format_global_variables">llvm.dbg.global_variable.type</a> = type { uint, {  }*, {  }*, i8*, {  }*, uint, {  }*, bool, bool, {  }*, uint }
-%<a href="#format_basic_type">llvm.dbg.basictype.type</a> = type { uint, {  }*, i8*, {  }*, int, uint, uint, uint, uint }
-%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = ...
-...
-;;
 ;; Define the global itself.
 ;;
 %MyGlobal = global int 100
 ...
 ;;
-;; Define the anchor for global variables.  Note that the second field of the
-;; anchor is 52, which is the same as the tag for global variables
-;; (52 = DW_TAG_variable.)
+;; List of debug info of globals
 ;;
-%<a href="#format_global_variables">llvm.dbg.global_variables</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 52 }, section "llvm.metadata"
+!llvm.dbg.gv = !{!0}
 
 ;;
 ;; Define the global variable descriptor.  Note the reference to the global
 ;; variable anchor and the global variable itself.
 ;;
-%<a href="#format_global_variables">llvm.dbg.global_variable</a> = internal constant %<a href="#format_global_variables">llvm.dbg.global_variable.type</a> {
-    uint add(uint 52, uint 262144), 
-    {  }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_global_variables">llvm.dbg.global_variables</a> to {  }*), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([9 x i8]* %str1, int 0, int 0), 
-    i8* getelementptr ([1 x i8]* %str2, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    uint 1,
-    {  }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to {  }*), 
-    bool false, 
-    bool true, 
-    {  }* cast (int* %MyGlobal to {  }*) }, section "llvm.metadata"
-    
+!0 = metadata !{
+  i32 458804,              ;; Tag
+  i32 0,                   ;; Unused
+  metadata !1,             ;; Context
+  metadata !"MyGlobal",    ;; Name
+  metadata !"MyGlobal",    ;; Display Name
+  metadata !"MyGlobal",    ;; Linkage Name
+  metadata !1,             ;; Compile Unit
+  i32 1,                   ;; Line Number
+  metadata !2,             ;; Type
+  i1 false,                ;; Is a local variable
+  i1 true,                 ;; Is this a definition
+  i32* @MyGlobal           ;; The global variable
+}
+
 ;;
 ;; Define the basic type of 32 bit signed integer.  Note that since int is an
 ;; intrinsic type the source file is NULL and line 0.
 ;;    
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([4 x i8]* %str3, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 5 }, section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,              ;; Tag
+  metadata !1,             ;; Context
+  metadata !"int",         ;; Name
+  metadata !1,             ;; Compile Unit
+  i32 0,                   ;; Line number
+  i64 32,                  ;; Size in Bits
+  i64 32,                  ;; Align in Bits
+  i64 0,                   ;; Offset in Bits
+  i32 0,                   ;; Flags
+  i32 5                    ;; Encoding
+}
 
-;;
-;; Define the names of the global variable and basic type.
-;;
-%str1 = internal constant [9 x i8] c"MyGlobal\00", section "llvm.metadata"
-%str2 = internal constant [1 x i8] c"\00", section "llvm.metadata"
-%str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"
 </pre>
 </div>
 
@@ -1253,46 +1214,27 @@ int main(int argc, char *argv[]) {
 <div class="doc_code">
 <pre>
 ;;
-;; Define types used. One for subprogram anchors, one for the subprogram
-;; descriptor, one for the global's basic type and one for the subprogram's
-;; compile unit.
-;;
-%<a href="#format_subprograms">llvm.dbg.subprogram.type</a> = type { uint, {  }*, {  }*, i8*, {  }*, bool, bool }
-%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type { uint, uint }
-%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = ...
-	
-;;
 ;; Define the anchor for subprograms.  Note that the second field of the
 ;; anchor is 46, which is the same as the tag for subprograms
 ;; (46 = DW_TAG_subprogram.)
 ;;
-%<a href="#format_subprograms">llvm.dbg.subprograms</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 46 }, section "llvm.metadata"
-
-;;
-;; Define the descriptor for the subprogram.  TODO - more details.
-;;
-%<a href="#format_subprograms">llvm.dbg.subprogram</a> = internal constant %<a href="#format_subprograms">llvm.dbg.subprogram.type</a> {
-    uint add(uint 46, uint 262144), 
-    {  }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_subprograms">llvm.dbg.subprograms</a> to {  }*), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([5 x i8]* %str1, int 0, int 0), 
-    i8* getelementptr ([1 x i8]* %str2, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*),
-    uint 1,
-    {  }* null, 
-    bool false, 
-    bool true }, section "llvm.metadata"
-
-;;
-;; Define the name of the subprogram.
-;;
-%str1 = internal constant [5 x i8] c"main\00", section "llvm.metadata"
-%str2 = internal constant [1 x i8] c"\00", section "llvm.metadata"
-
+!0 = metadata !{
+  i32 458798,        ;; Tag
+  i32 0,             ;; Unused
+  metadata !1,       ;; Context
+  metadata !"main",  ;; Name
+  metadata !"main",  ;; Display name
+  metadata !"main",  ;; Linkage name
+  metadata !1,       ;; Compile unit
+  i32 1,             ;; Line number
+  metadata !2,       ;; Type
+  i1 false,          ;; Is local 
+  i1 true            ;; Is definition
+}
 ;;
 ;; Define the subprogram itself.
 ;;
-int %main(int %argc, i8** %argv) {
+define i32 @main(i32 %argc, i8** %argv) {
 ...
 }
 </pre>
@@ -1320,17 +1262,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([5 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 2 }, section "llvm.metadata"
-%str1 = internal constant [5 x i8] c"bool\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"bool",  ;; Name
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 8,             ;; Size in Bits
+  i64 8,             ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 2              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1345,17 +1288,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([5 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 8, 
-    uint 8, 
-    uint 0, 
-    uint 6 }, section "llvm.metadata"
-%str1 = internal constant [5 x i8] c"char\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"char",  ;; Name
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 8,             ;; Size in Bits
+  i64 8,             ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 6              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1370,17 +1314,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([14 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 8, 
-    uint 8, 
-    uint 0, 
-    uint 8 }, section "llvm.metadata"
-%str1 = internal constant [14 x i8] c"unsigned char\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"unsigned char", 
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 8,             ;; Size in Bits
+  i64 8,             ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 8              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1395,17 +1340,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([10 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 16, 
-    uint 16, 
-    uint 0, 
-    uint 5 }, section "llvm.metadata"
-%str1 = internal constant [10 x i8] c"short int\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"short int",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 16,            ;; Size in Bits
+  i64 16,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 5              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1420,17 +1366,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([19 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 16, 
-    uint 16, 
-    uint 0, 
-    uint 7 }, section "llvm.metadata"
-%str1 = internal constant [19 x i8] c"short unsigned int\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"short unsigned int",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 16,            ;; Size in Bits
+  i64 16,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1445,17 +1392,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([4 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 5 }, section "llvm.metadata"
-%str1 = internal constant [4 x i8] c"int\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"int",   ;; Name
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 5              ;; Encoding
+}
 </pre></div>
 
 </div>
@@ -1469,17 +1417,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([13 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 7 }, section "llvm.metadata"
-%str1 = internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"unsigned int",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1494,17 +1443,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([14 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 64, 
-    uint 64, 
-    uint 0, 
-    uint 5 }, section "llvm.metadata"
-%str1 = internal constant [14 x i8] c"long long int\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"long long int",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 64,            ;; Size in Bits
+  i64 64,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 5              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1519,17 +1469,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([23 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 64, 
-    uint 64, 
-    uint 0, 
-    uint 7 }, section "llvm.metadata"
-%str1 = internal constant [23 x 8] c"long long unsigned int\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"long long unsigned int",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 64,            ;; Size in Bits
+  i64 64,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1544,17 +1495,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([6 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 4 }, section "llvm.metadata"
-%str1 = internal constant [6 x i8] c"float\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"float",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 4              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1569,17 +1521,18 @@ int %main(int %argc, i8** %argv) {
 
 <div class="doc_code">
 <pre>
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    8* getelementptr ([7 x 8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 64, 
-    uint 64, 
-    uint 0, 
-    uint 4 }, section "llvm.metadata"
-%str1 = internal constant [7 x 8] c"double\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"double",;; Name
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 64,            ;; Size in Bits
+  i64 64,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 4              ;; Encoding
+}
 </pre>
 </div>
 
@@ -1607,60 +1560,64 @@ typedef const int *IntPtr;
 ;;
 ;; Define the typedef "IntPtr".
 ;;
-%<a href="#format_derived_type">llvm.dbg.derivedtype1</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
-    uint add(uint 22, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([7 x 8]* %str1, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    int 1, 
-    uint 0, 
-    uint 0, 
-    uint 0, 
-    {  }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype2</a> to {  }*) }, section "llvm.metadata"
-%str1 = internal constant [7 x 8] c"IntPtr\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458774,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"IntPtr",  ;; Name
+  metadata !3,         ;; Compile unit
+  i32 0,               ;; Line number
+  i64 0,               ;; Size in bits
+  i64 0,               ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  metadata !4          ;; Derived From type
+}
 
 ;;
 ;; Define the pointer type.
 ;;
-%<a href="#format_derived_type">llvm.dbg.derivedtype2</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
-    uint add(uint 15, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* null, 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    {  }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype3</a> to {  }*) }, section "llvm.metadata"
-
+!4 = metadata !{
+  i32 458767,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"",        ;; Name
+  metadata !1,         ;; Compile unit
+  i32 0,               ;; Line number
+  i64 64,              ;; Size in bits
+  i64 64,              ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  metadata !5          ;; Derived From type
+}
 ;;
 ;; Define the const type.
 ;;
-%<a href="#format_derived_type">llvm.dbg.derivedtype3</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
-    uint add(uint 38, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* null, 
-    {  }* null, 
-    int 0, 
-    uint 0, 
-    uint 0, 
-    uint 0, 
-    {  }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype1</a> to {  }*) }, section "llvm.metadata"	
-
+!5 = metadata !{
+  i32 458790,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"",        ;; Name
+  metadata !1,         ;; Compile unit
+  i32 0,               ;; Line number
+  i64 32,              ;; Size in bits
+  i64 32,              ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  metadata !6          ;; Derived From type
+}
 ;;
 ;; Define the int type.
 ;;
-%<a href="#format_basic_type">llvm.dbg.basictype1</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    8* getelementptr ([4 x 8]* %str2, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 5 }, section "llvm.metadata"
-%str2 = internal constant [4 x 8] c"int\00", section "llvm.metadata"
+!6 = metadata !{
+  i32 458788,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"int",     ;; Name
+  metadata !1,         ;; Compile unit
+  i32 0,               ;; Line number
+  i64 32,              ;; Size in bits
+  i64 32,              ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  5                    ;; Encoding
+}
 </pre>
 </div>
 
@@ -1692,86 +1649,88 @@ struct Color {
 ;;
 ;; Define basic type for unsigned int.
 ;;
-%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
-    uint add(uint 36, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([13 x i8]* %str1, int 0, int 0), 
-    {  }* null, 
-    int 0, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    uint 7 }, section "llvm.metadata"
-%str1 = internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"
-
+!5 = metadata !{
+  i32 458788,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"unsigned int",
+  metadata !1,       ;; Compile Unit
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
 ;;
 ;; Define composite type for struct Color.
 ;;
-%<a href="#format_composite_type">llvm.dbg.compositetype</a> = internal constant %<a href="#format_composite_type">llvm.dbg.compositetype.type</a> {
-    uint add(uint 19, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([6 x i8]* %str2, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    int 1, 
-    uint 96, 
-    uint 32, 
-    uint 0, 
-    {  }* null,
-    {  }* cast ([3 x {  }*]* %llvm.dbg.array to {  }*) }, section "llvm.metadata"
-%str2 = internal constant [6 x i8] c"Color\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458771,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Color", ;; Name
+  metadata !1,       ;; Compile unit
+  i32 1,             ;; Line number
+  i64 96,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 0,             ;; Offset in bits
+  i32 0,             ;; Flags
+  null,              ;; Derived From
+  metadata !3,       ;; Elements
+  i32 0              ;; Runtime Language
+}
 
 ;;
 ;; Define the Red field.
 ;;
-%<a href="#format_derived_type">llvm.dbg.derivedtype1</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
-    uint add(uint 13, uint 262144), 
-    {  }* null, 
-    i8* getelementptr ([4 x i8]* %str3, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    int 2, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    {  }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to {  }*) }, section "llvm.metadata"
-%str3 = internal constant [4 x i8] c"Red\00", section "llvm.metadata"
+!4 = metadata !{
+  i32 458765,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Red",   ;; Name
+  metadata !1,       ;; Compile Unit
+  i32 2,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 0,             ;; Offset in bits
+  i32 0,             ;; Flags
+  metadata !5        ;; Derived From type
+}
 
 ;;
 ;; Define the Green field.
 ;;
-%<a href="#format_derived_type">llvm.dbg.derivedtype2</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
-    uint add(uint 13, uint 262144), 
-    {  }* null, 
-    i8* getelementptr ([6 x i8]* %str4, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    int 3, 
-    uint 32, 
-    uint 32, 
-    uint 32, 
-    {  }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to {  }*) }, section "llvm.metadata"
-%str4 = internal constant [6 x i8] c"Green\00", section "llvm.metadata"
+!6 = metadata !{
+  i32 458765,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Green", ;; Name
+  metadata !1,       ;; Compile Unit
+  i32 3,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 32,             ;; Offset in bits
+  i32 0,             ;; Flags
+  metadata !5        ;; Derived From type
+}
 
 ;;
 ;; Define the Blue field.
 ;;
-%<a href="#format_derived_type">llvm.dbg.derivedtype3</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
-    uint add(uint 13, uint 262144), 
-    {  }* null, 
-    i8* getelementptr ([5 x i8]* %str5, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    int 4, 
-    uint 32, 
-    uint 32, 
-    uint 64, 
-    {  }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to {  }*) }, section "llvm.metadata"
-%str5 = internal constant [5 x 8] c"Blue\00", section "llvm.metadata"
+!7 = metadata !{
+  i32 458765,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Blue",  ;; Name
+  metadata !1,       ;; Compile Unit
+  i32 4,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 64,             ;; Offset in bits
+  i32 0,             ;; Flags
+  metadata !5        ;; Derived From type
+}
 
 ;;
 ;; Define the array of fields used by the composite type Color.
 ;;
-%llvm.dbg.array = internal constant [3 x {  }*] [
-      {  }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype1</a> to {  }*),
-      {  }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype2</a> to {  }*),
-      {  }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype3</a> to {  }*) ], section "llvm.metadata"
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
 </pre>
 </div>
 
@@ -1803,53 +1762,41 @@ enum Trees {
 ;;
 ;; Define composite type for enum Trees
 ;;
-%<a href="#format_composite_type">llvm.dbg.compositetype</a> = internal constant %<a href="#format_composite_type">llvm.dbg.compositetype.type</a> {
-    uint add(uint 4, uint 262144), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    i8* getelementptr ([6 x i8]* %str1, int 0, int 0), 
-    {  }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to {  }*), 
-    int 1, 
-    uint 32, 
-    uint 32, 
-    uint 0, 
-    {  }* null, 
-    {  }* cast ([3 x {  }*]* %llvm.dbg.array to {  }*) }, section "llvm.metadata"
-%str1 = internal constant [6 x i8] c"Trees\00", section "llvm.metadata"
+!2 = metadata !{
+  i32 458756,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Trees", ;; Name
+  metadata !1,       ;; Compile unit
+  i32 1,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 0,             ;; Offset in bits
+  i32 0,             ;; Flags
+  null,              ;; Derived From type
+  metadata !3,       ;; Elements
+  i32 0              ;; Runtime language
+}
+
+;;
+;; Define the array of enumerators used by composite type Trees.
+;;
+!3 = metadata !{metadata !4, metadata !5, metadata !6}
 
 ;;
 ;; Define Spruce enumerator.
 ;;
-%<a href="#format_enumeration">llvm.dbg.enumerator1</a> = internal constant %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> {
-    uint add(uint 40, uint 262144), 
-    i8* getelementptr ([7 x i8]* %str2, int 0, int 0), 
-    int 100 }, section "llvm.metadata"
-%str2 = internal constant [7 x i8] c"Spruce\00", section "llvm.metadata"
+!4 = metadata !{i32 458792, metadata !"Spruce", i64 100}
 
 ;;
 ;; Define Oak enumerator.
 ;;
-%<a href="#format_enumeration">llvm.dbg.enumerator2</a> = internal constant %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> {
-    uint add(uint 40, uint 262144), 
-    i8* getelementptr ([4 x i8]* %str3, int 0, int 0), 
-    int 200 }, section "llvm.metadata"
-%str3 = internal constant [4 x i8] c"Oak\00", section "llvm.metadata"
+!5 = metadata !{i32 458792, metadata !"Oak", i64 200}
 
 ;;
 ;; Define Maple enumerator.
 ;;
-%<a href="#format_enumeration">llvm.dbg.enumerator3</a> = internal constant %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> {
-    uint add(uint 40, uint 262144), 
-    i8* getelementptr ([6 x i8]* %str4, int 0, int 0), 
-    int 300 }, section "llvm.metadata"
-%str4 = internal constant [6 x i8] c"Maple\00", section "llvm.metadata"
+!6 = metadata !{i32 458792, metadata !"Maple", i64 300}
 
-;;
-;; Define the array of enumerators used by composite type Trees.
-;;
-%llvm.dbg.array = internal constant [3 x {  }*] [
-  {  }* cast (%<a href="#format_enumeration">llvm.dbg.enumerator.type</a>* %<a href="#format_enumeration">llvm.dbg.enumerator1</a> to {  }*),
-  {  }* cast (%<a href="#format_enumeration">llvm.dbg.enumerator.type</a>* %<a href="#format_enumeration">llvm.dbg.enumerator2</a> to {  }*),
-  {  }* cast (%<a href="#format_enumeration">llvm.dbg.enumerator.type</a>* %<a href="#format_enumeration">llvm.dbg.enumerator3</a> to {  }*) ], section "llvm.metadata"
 </pre>
 </div>
 
@@ -1866,7 +1813,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-26 03:49:18 +0200 (Fri, 26 Jun 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
index c4e85d1948d5..aca7e5d5f874 100644
--- a/docs/SystemLibrary.html
+++ b/docs/SystemLibrary.html
@@ -23,7 +23,6 @@
     <li><a href="#nounused">No Unused Functionality</a></li>
     <li><a href="#virtuals">No Virtual Methods</a></li>
     <li><a href="#softerrors">Minimize Soft Errors</a></li>
-    <li><a href="#throw">Throw Only std::string</a></li>
     <li><a href="#throw_spec">No throw() Specifications</a></li>
     <li><a href="#organization">Code Organization</a></li>
     <li><a href="#semantics">Consistent Semantics</a></li>
@@ -76,7 +75,7 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="headers">Don't Inlcude System Headers</a>
+<div class="doc_subsection"><a name="headers">Don't Include System Headers</a>
 </div>
 <div class="doc_text">
   <p>Except in <tt>lib/System</tt>, no LLVM source code should directly
@@ -211,8 +210,8 @@
   "out of space", "bad disk sector", or "system call interrupted". We'll call 
   the first group "<i>soft</i>" errors and the second group "<i>hard</i>" 
   errors.<p>
-  <p>lib/System must always attempt to minimize soft errors and always just
-  throw a std::string on hard errors. This is a design requirement because the
+  <p>lib/System must always attempt to minimize soft errors.
+  This is a design requirement because the
   minimization of soft errors can affect the granularity and the nature of the
   interface. In general, if you find that you're wanting to throw soft errors,
   you must review the granularity of the interface because it is likely you're
@@ -240,31 +239,6 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="throw">Throw Only std::string</a></div>
-<div class="doc_text">
-  <p>If an error occurs that lib/System cannot handle, the only action taken by
-  lib/System is to throw an instance of std:string. The contents of the string
-  must explain both what happened and the context in which it happened. The
-  format of the string should be a (possibly empty) list of contexts each 
-  terminated with a : and a space, followed by the error message, optionally
-  followed by a reason, and optionally followed by a suggestion.</p>
-  <p>For example, failure to open a file named "foo" could result in a message
-  like:</p>
-  <ul><li>foo: Unable to open file because it doesn't exist."</li></ul>
-  <p>The "foo:" part is the context. The "Unable to open file" part is the error
-  message. The "because it doesn't exist." part is the reason. This message has
-  no suggestion. Where possible, the implementation of lib/System should use
-  operating system specific facilities for converting the error code returned by
-  a system call into an error message. This will help to make the error message
-  more familiar to users of that type of operating system.</p>
-  <p>Note that this requirement precludes the throwing of any other exceptions.
-  For example, various C++ standard library functions can cause exceptions to be
-  thrown (e.g. out of memory situation). In all cases, if there is a possibility
-  that non-string exceptions could be thrown, the lib/System library must ensure
-  that the exceptions are translated to std::string form.</p>
-</div>
-
-<!-- ======================================================================= -->
 <div class="doc_subsection"><a name="throw_spec">No throw Specifications</a>
 </div>
 <div class="doc_text">
@@ -273,7 +247,8 @@
   compiler does not insert additional exception handling code into the interface
   functions. This is a performance consideration: lib/System functions are at
   the bottom of many call chains and as such can be frequently called. We
-  need them to be as efficient as possible.</p>
+  need them to be as efficient as possible.  However, no routines in the
+  system library should actually throw exceptions.</p>
 </div>
 
 <!-- ======================================================================= -->
@@ -338,7 +313,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $
+  Last modified: $Date: 2009-07-17 23:11:24 +0200 (Fri, 17 Jul 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 568b5728850f..7ae1ca489e32 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -371,8 +371,8 @@ supported include:</p>
   <dd>string value</dd>
 <dt><tt>[{ ... }]</tt></dt>
   <dd>code fragment</dd>
-<dt><tt>[ X, Y, Z ]<type></tt></dt>
-  <dd>list value.  <type> is the type of the list 
+<dt><tt>[ X, Y, Z ]&lt;type&gt;</tt></dt>
+  <dd>list value.  &lt;type&gt; is the type of the list 
 element and is usually optional.  In rare cases,
 TableGen is unable to deduce the element type in
 which case the user must specify it explicitly.</dd>
@@ -398,23 +398,16 @@ which case the user must specify it explicitly.</dd>
   <dd>a dag value.  The first element is required to be a record definition, the
   remaining elements in the list may be arbitrary other values, including nested
   `<tt>dag</tt>' values.</dd>
-<dt><tt>(implicit a)</tt></dt>
-  <dd>an implicitly defined physical register.  This tells the dag instruction
-  selection emitter the input pattern's extra definitions matches implicit
-  physical register definitions.</dd>
-<dt><tt>(parallel (a), (b))</tt></dt>
-  <dd>a list of dags specifying parallel operations which map to the same
-  instruction.</dd>
 <dt><tt>!strconcat(a, b)</tt></dt>
   <dd>A string value that is the result of concatenating the 'a' and 'b'
   strings.</dd>
-<dt><tt>!cast<type>(a)</tt></dt>
+<dt><tt>!cast&lt;type&gt;(a)</tt></dt>
   <dd>A symbol of type <em>type</em> obtained by looking up the string 'a' in
 the symbol table.  If the type of 'a' does not match <em>type</em>, TableGen
-aborts with an error. !cast<string> is a special case in that the argument must
+aborts with an error. !cast&lt;string&gt; is a special case in that the argument must
 be an object defined by a 'def' construct.</dd>
 <dt><tt>!nameconcat&lt;type&gt;(a, b)</tt></dt>
-  <dd>Shorthand for !cast<type>(!strconcat(a, b))</dd>
+  <dd>Shorthand for !cast&lt;type&gt;(!strconcat(a, b))</dd>
 <dt><tt>!subst(a, b, c)</tt></dt>
   <dd>If 'a' and 'b' are of string type or are symbol references, substitute 
 'b' for 'a' in 'c.'  This operation is analogous to $(subst) in GNU make.</dd>
@@ -760,6 +753,25 @@ opened, as in the case with the <tt>CALL*</tt> instructions above.</p>
 </div>
 
 <!-- *********************************************************************** -->
+<div class="doc_section"><a name="codegen">Code Generator backend info</a></div>
+<!-- *********************************************************************** -->
+
+<p>Expressions used by code generator to describe instructions and isel
+patterns:</p>
+
+<div class="doc_text">
+
+<dt><tt>(implicit a)</tt></dt>
+  <dd>an implicitly defined physical register.  This tells the dag instruction
+  selection emitter the input pattern's extra definitions matches implicit
+  physical register definitions.</dd>
+<dt><tt>(parallel (a), (b))</tt></dt>
+  <dd>a list of dags specifying parallel operations which map to the same
+  instruction.</dd>
+
+</div>
+
+<!-- *********************************************************************** -->
 <div class="doc_section"><a name="backends">TableGen backends</a></div>
 <!-- *********************************************************************** -->
 
@@ -782,7 +794,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-29 22:05:29 +0200 (Mon, 29 Jun 2009) $
+  Last modified: $Date: 2009-10-05 04:51:06 +0200 (Mon, 05 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 32b16cadf47a..4f05d77daaed 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -29,6 +29,7 @@
   <li><a href="#dgstructure">DejaGNU structure</a>
     <ul>
       <li><a href="#dgcustom">Writing new DejaGNU tests</a></li>
+      <li><a href="#FileCheck">The FileCheck utility</a></li>
       <li><a href="#dgvars">Variables and substitutions</a></li>
       <li><a href="#dgfeatures">Other features</a></li>
    </ul>
@@ -448,7 +449,257 @@ negatives).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="dgvars">Variables and substitutions</a></div>
+<div class="doc_subsection"><a name="FileCheck">The FileCheck utility</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>A powerful feature of the RUN: lines is that it allows any arbitrary commands
+   to be executed as part of the test harness.  While standard (portable) unix
+   tools like 'grep' work fine on run lines, as you see above, there are a lot
+   of caveats due to interaction with Tcl syntax, and we want to make sure the
+   run lines are portable to a wide range of systems.  Another major problem is
+   that grep is not very good at checking to verify that the output of a tools
+   contains a series of different output in a specific order.  The FileCheck
+   tool was designed to help with these problems.</p>
+
+<p>FileCheck (whose basic command line arguments are described in <a
+   href="http://llvm.org/cmds/FileCheck.html">the FileCheck man page</a> is
+   designed to read a file to check from standard input, and the set of things
+   to verify from a file specified as a command line argument.  A simple example
+   of using FileCheck from a RUN line looks like this:</p>
+   
+<div class="doc_code">
+<pre>
+; RUN: llvm-as &lt; %s | llc -march=x86-64 | <b>FileCheck %s</b>
+</pre>
+</div>
+
+<p>This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
+llc, then pipe the output of llc into FileCheck.  This means that FileCheck will
+be verifying its standard input (the llc output) against the filename argument
+specified (the original .ll file specified by "%s").  To see how this works,
+lets look at the rest of the .ll file (after the RUN line):</p>
+
+<div class="doc_code">
+<pre>
+define void @sub1(i32* %p, i32 %v) {
+entry:
+; <b>CHECK: sub1:</b>
+; <b>CHECK: subl</b>
+        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
+        ret void
+}
+
+define void @inc4(i64* %p) {
+entry:
+; <b>CHECK: inc4:</b>
+; <b>CHECK: incq</b>
+        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
+        ret void
+}
+</pre>
+</div>
+
+<p>Here you can see some "CHECK:" lines specified in comments.  Now you can see
+how the file is piped into llvm-as, then llc, and the machine code output is
+what we are verifying.  FileCheck checks the machine code output to verify that
+it matches what the "CHECK:" lines specify.</p>
+
+<p>The syntax of the CHECK: lines is very simple: they are fixed strings that
+must occur in order.  FileCheck defaults to ignoring horizontal whitespace
+differences (e.g. a space is allowed to match a tab) but otherwise, the contents
+of the CHECK: line is required to match some thing in the test file exactly.</p>
+
+<p>One nice thing about FileCheck (compared to grep) is that it allows merging
+test cases together into logical groups.  For example, because the test above
+is checking for the "sub1:" and "inc4:" labels, it will not match unless there
+is a "subl" in between those labels.  If it existed somewhere else in the file,
+that would not count: "grep subl" matches if subl exists anywhere in the
+file.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-check-prefix">The FileCheck -check-prefix option</a></div>
+
+<div class="doc_text">
+
+<p>The FileCheck -check-prefix option allows multiple test configurations to be
+driven from one .ll file.  This is useful in many circumstances, for example,
+testing different architectural variants with llc.  Here's a simple example:</p>
+
+<div class="doc_code">
+<pre>
+; RUN: llvm-as &lt; %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
+; RUN:              | <b>FileCheck %s -check-prefix=X32</b>
+; RUN: llvm-as &lt; %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
+; RUN:              | <b>FileCheck %s -check-prefix=X64</b>
+
+define &lt;4 x i32&gt; @pinsrd_1(i32 %s, &lt;4 x i32&gt; %tmp) nounwind {
+        %tmp1 = insertelement &lt;4 x i32&gt; %tmp, i32 %s, i32 1
+        ret &lt;4 x i32&gt; %tmp1
+; <b>X32:</b> pinsrd_1:
+; <b>X32:</b>    pinsrd $1, 4(%esp), %xmm0
+
+; <b>X64:</b> pinsrd_1:
+; <b>X64:</b>    pinsrd $1, %edi, %xmm0
+}
+</pre>
+</div>
+
+<p>In this case, we're testing that we get the expected code generation with
+both 32-bit and 64-bit code generation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a></div>
+
+<div class="doc_text">
+
+<p>Sometimes you want to match lines and would like to verify that matches
+happen on exactly consequtive lines with no other lines in between them.  In
+this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
+you specified a custom check prefix, just use "&lt;PREFIX&gt;-NEXT:".  For
+example, something like this works as you'd expect:</p>
+
+<div class="doc_code">
+<pre>
+define void @t2(&lt;2 x double&gt;* %r, &lt;2 x double&gt;* %A, double %B) {
+	%tmp3 = load &lt;2 x double&gt;* %A, align 16
+	%tmp7 = insertelement &lt;2 x double&gt; undef, double %B, i32 0
+	%tmp9 = shufflevector &lt;2 x double&gt; %tmp3,
+                              &lt;2 x double&gt; %tmp7,
+                              &lt;2 x i32&gt; &lt; i32 0, i32 2 &gt;
+	store &lt;2 x double&gt; %tmp9, &lt;2 x double&gt;* %r, align 16
+	ret void
+        
+; <b>CHECK:</b> t2:
+; <b>CHECK:</b> 	movl	8(%esp), %eax
+; <b>CHECK-NEXT:</b> 	movapd	(%eax), %xmm0
+; <b>CHECK-NEXT:</b> 	movhpd	12(%esp), %xmm0
+; <b>CHECK-NEXT:</b> 	movl	4(%esp), %eax
+; <b>CHECK-NEXT:</b> 	movapd	%xmm0, (%eax)
+; <b>CHECK-NEXT:</b> 	ret
+}
+</pre>
+</div>
+
+<p>CHECK-NEXT: directives reject the input unless there is exactly one newline
+between it an the previous directive.  A CHECK-NEXT cannot be the first
+directive in a file.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-CHECK-NOT">The "CHECK-NOT:" directive</a></div>
+
+<div class="doc_text">
+
+<p>The CHECK-NOT: directive is used to verify that a string doesn't occur
+between two matches (or the first match and the beginning of the file).  For
+example, to verify that a load is removed by a transformation, a test like this
+can be used:</p>
+
+<div class="doc_code">
+<pre>
+define i8 @coerce_offset0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to i8*
+  %P3 = getelementptr i8* %P2, i32 2
+
+  %A = load i8* %P3
+  ret i8 %A
+; <b>CHECK:</b> @coerce_offset0
+; <b>CHECK-NOT:</b> load
+; <b>CHECK:</b> ret i8
+}
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-Matching">FileCheck Pattern Matching Syntax</a></div>
+
+<div class="doc_text">
+
+<p>The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
+uses of FileCheck, fixed string matching is perfectly sufficient.  For some
+things, a more flexible form of matching is desired.  To support this, FileCheck
+allows you to specify regular expressions in matching strings, surrounded by
+double braces: <b>{{yourregex}}</b>.  Because we want to use fixed string
+matching for a majority of what we do, FileCheck has been designed to support
+mixing and matching fixed string matching with regular expressions.  This allows
+you to write things like this:</p>
+
+<div class="doc_code">
+<pre>
+; CHECK: movhpd	<b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
+</pre>
+</div>
+
+<p>In this case, any offset from the ESP register will be allowed, and any xmm
+register will be allowed.</p>
+
+<p>Because regular expressions are enclosed with double braces, they are
+visually distinct, and you don't need to use escape characters within the double
+braces like you would in C.  In the rare case that you want to match double
+braces explicitly from the input, you can use something ugly like
+<b>{{[{][{]}}</b> as your pattern.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-Variables">FileCheck Variables</a></div>
+
+<div class="doc_text">
+
+<p>It is often useful to match a pattern and then verify that it occurs again
+later in the file.  For codegen tests, this can be useful to allow any register,
+but verify that that register is used consistently later.  To do this, FileCheck
+allows named variables to be defined and substituted into patterns.  Here is a
+simple example:</p>
+
+<div class="doc_code">
+<pre>
+; CHECK: test5:
+; CHECK:    notw	<b>[[REGISTER:%[a-z]+]]</b>
+; CHECK:    andw	{{.*}}<b>[[REGISTER]]</b>
+</pre>
+</div>
+
+<p>The first check line matches a regex (<tt>%[a-z]+</tt>) and captures it into
+the variables "REGISTER".  The second line verifies that whatever is in REGISTER
+occurs later in the file after an "andw".  FileCheck variable references are
+always contained in <tt>[[ ]]</tt> pairs, are named, and their names can be
+formed with the regex "<tt>[a-zA-Z][a-zA-Z0-9]*</tt>".  If a colon follows the
+name, then it is a definition of the variable, if not, it is a use.</p>
+
+<p>FileCheck variables can be defined multiple times, and uses always get the
+latest value.  Note that variables are all read at the start of a "CHECK" line
+and are all defined at the end.  This means that if you have something like
+"<tt>CHECK: [[XYZ:.*]]x[[XYZ]]</tt>" that the check line will read the previous
+value of the XYZ variable and define a new one after the match is performed.  If
+you need to do something like this you can probably take advantage of the fact
+that FileCheck is not actually line-oriented when it matches, this allows you to
+define two separate CHECK lines that match on the same line.
+</p>
+
+
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="dgvars">Variables and
+substitutions</a></div>
 <!-- _______________________________________________________________________ -->
 <div class="doc_text">
   <p>With a RUN line there are a number of substitutions that are permitted. In
@@ -502,14 +753,6 @@ negatives).</p>
     <dd>The target triplet that corresponds to the current host machine (the one
     running the test cases). This should probably be called "host".<dd>
 
-    <dt><b>prcontext</b> (%prcontext)</dt>
-    <dd>Path to the prcontext tcl script that prints some context around a 
-    line that matches a pattern. This isn't strictly necessary as the test suite
-    is run with its PATH altered to include the test/Scripts directory where
-    the prcontext script is located. Note that this script is similar to 
-    <tt>grep -C</tt> but you should use the <tt>prcontext</tt> script because
-    not all platforms support <tt>grep -C</tt>.</dd>
-
     <dt><b>llvmgcc</b> (%llvmgcc)</dt>
     <dd>The full path to the <tt>llvm-gcc</tt> executable as specified in the
     configured LLVM environment</dd>
@@ -974,7 +1217,7 @@ know. Thanks!</p>
 
   John T. Criswell, Reid Spencer, and Tanya Lattner<br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-26 07:44:53 +0200 (Fri, 26 Jun 2009) $
+  Last modified: $Date: 2009-09-27 10:01:44 +0200 (Sun, 27 Sep 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html
index 7458ecad9ec4..c6bcaf09a8bf 100644
--- a/docs/UsingLibraries.html
+++ b/docs/UsingLibraries.html
@@ -128,8 +128,6 @@
     <td>Code generation for ARM architecture</td></tr>
   <tr><td>LLVMCBackend</td><td><tt>.o</tt></td>
     <td>'C' language code generator.</td></tr>
-  <tr><td>LLVMIA64</td><td><tt>.o</tt></td>
-    <td>Code generation for IA64 architecture</td></tr>
   <tr><td>LLVMPowerPC</td><td><tt>.o</tt></td>
     <td>Code generation for PowerPC architecture</td></tr>
   <tr><td>LLVMSparc</td><td><tt>.o</tt></td>
@@ -356,14 +354,6 @@
     <li>libLLVMSystem.a</li>
     <li>libLLVMTarget.a</li>
   </ul></dd>
-  <dt><b>LLVMIA64.o</b></dt><dd><ul>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSelectionDAG.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
   <dt><b>LLVMInterpreter.o</b></dt><dd><ul>
     <li>LLVMExecutionEngine.o</li>
     <li>libLLVMCodeGen.a</li>
@@ -442,7 +432,7 @@
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
 </address>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a> 
-<br>Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $ </div>
+<br>Last modified: $Date: 2009-07-24 02:30:09 +0200 (Fri, 24 Jul 2009) $ </div>
 </body>
 </html>
 <!-- vim: sw=2 ts=2 ai
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index 8016283dee63..5a951853c0ae 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -22,6 +22,7 @@
     <li><a href="#Preliminaries">Preliminaries</a></li>
   </ul>
   <li><a href="#TargetMachine">Target Machine</a></li>
+  <li><a href="#TargetRegistration">Target Registration</a></li>
   <li><a href="#RegisterSet">Register Set and Register Classes</a>
   <ul>
     <li><a href="#RegisterDef">Defining a Register</a></li>
@@ -422,21 +423,62 @@ SparcTargetMachine::SparcTargetMachine(const Module &amp;M, const std::string &a
     alignment, and then ABI preferred alignment.</li>
 </ul>
 
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="TargetRegistration">Target Registration</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+You must also register your target with the <tt>TargetRegistry</tt>, which is
+what other LLVM tools use to be able to lookup and use your target at
+runtime. The <tt>TargetRegistry</tt> can be used directly, but for most targets
+there are helper templates which should take care of the work for you.</p>
+
+<p>
+All targets should declare a global <tt>Target</tt> object which is used to
+represent the target during registration. Then, in the target's TargetInfo
+library, the target should define that object and use
+the <tt>RegisterTarget</tt> template to register the target. For example, the Sparc registration code looks like this:
+</p>
+
+<div class="doc_code">
+<pre>
+Target llvm::TheSparcTarget;
+
+extern "C" void LLVMInitializeSparcTargetInfo() { 
+  RegisterTarget&lt;Triple::sparc, /*HasJIT=*/false&gt;
+    X(TheSparcTarget, "sparc", "Sparc");
+}
+</pre>
+</div>
+
 <p>
-You must also register your target using the <tt>RegisterTarget</tt>
-template. (See the <tt>TargetMachineRegistry</tt> class.) For example,
-in <tt>SparcTargetMachine.cpp</tt>, the target is registered with:
+This allows the <tt>TargetRegistry</tt> to look up the target by name or by
+target triple. In addition, most targets will also register additional features
+which are available in separate libraries. These registration steps are
+separate, because some clients may wish to only link in some parts of the target
+-- the JIT code generator does not require the use of the assembler printer, for
+example. Here is an example of registering the Sparc assembly printer:
 </p>
 
 <div class="doc_code">
 <pre>
-namespace {
-  // Register the target.
-  RegisterTarget&lt;SparcTargetMachine&gt;X("sparc", "SPARC");
+extern "C" void LLVMInitializeSparcAsmPrinter() { 
+  RegisterAsmPrinter&lt;SparcAsmPrinter&gt; X(TheSparcTarget);
 }
 </pre>
 </div>
 
+<p>
+For more information, see
+"<a href="/doxygen/TargetRegistry_8h-source.html">llvm/Target/TargetRegistry.h</a>".
+</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -2038,8 +2080,8 @@ SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &amp;TM) {
 
 <p>
 The X86 assembly printer implementation (<tt>X86TargetAsmInfo</tt>) is an
-example where the target specific <tt>TargetAsmInfo</tt> class uses overridden
-methods: <tt>ExpandInlineAsm</tt> and <tt>PreferredEHDataFormat</tt>.
+example where the target specific <tt>TargetAsmInfo</tt> class uses an 
+overridden methods: <tt>ExpandInlineAsm</tt>.
 </p>
 
 <p>
@@ -2122,9 +2164,7 @@ in <tt>XXXGenAsmWriter.inc</tt> contains an implementation of the
 The implementations of <tt>printDeclare</tt>, <tt>printImplicitDef</tt>,
 <tt>printInlineAsm</tt>, and <tt>printLabel</tt> in <tt>AsmPrinter.cpp</tt> are
 generally adequate for printing assembly and do not need to be
-overridden. (<tt>printBasicBlockLabel</tt> is another method that is implemented
-in <tt>AsmPrinter.cpp</tt> that may be directly used in an implementation of
-<tt>XXXAsmPrinter</tt>.)
+overridden.
 </p>
 
 <p>
@@ -2523,7 +2563,7 @@ with assembler.
   <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2009-04-05 02:44:06 +0200 (Sun, 05 Apr 2009) $
+  Last modified: $Date: 2009-09-13 00:57:37 +0200 (Sun, 13 Sep 2009) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index dd8b41d121b6..218f8ef90d43 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -179,7 +179,7 @@ source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
 <div class="doc_code"><pre>
 # Makefile for hello pass
 
-# Path to top level of LLVM heirarchy
+# Path to top level of LLVM hierarchy
 LEVEL = ../../..
 
 # Name of the library to build
@@ -223,12 +223,14 @@ Start out with:</p>
 <div class="doc_code"><pre>
 <b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
 <b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/raw__ostream_8h.html">llvm/Support/raw_ostream.h</a>"
 </pre></div>
 
 <p>Which are needed because we are writing a <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>, and
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>,
 we are operating on <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1Function.html">Function</a></tt>'s.</p>
+href="http://llvm.org/doxygen/classllvm_1_1Function.html">Function</a></tt>'s,
+and we will be doing some printing.</p>
 
 <p>Next we have:</p>
 <div class="doc_code"><pre>
@@ -273,7 +275,7 @@ avoid using expensive C++ runtime information.</p>
 
 <div class="doc_code"><pre>
     <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
-      llvm::cerr &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
+      errs() &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
       <b>return false</b>;
     }
   };  <i>// end of struct Hello</i>
@@ -312,6 +314,7 @@ is supplied as fourth argument. </p>
 <div class="doc_code"><pre>
 <b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
 <b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/raw__ostream_8h.html">llvm/Support/raw_ostream.h</a>"
 
 <b>using namespace llvm;</b>
 
@@ -322,7 +325,7 @@ is supplied as fourth argument. </p>
     Hello() : FunctionPass(&amp;ID) {}
 
     <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
-      llvm::cerr &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
+      errs() &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
       <b>return false</b>;
     }
   };
@@ -450,7 +453,7 @@ available, from the most general to the most specific.</p>
 <p>When choosing a superclass for your Pass, you should choose the <b>most
 specific</b> class possible, while still being able to meet the requirements
 listed.  This gives the LLVM Pass Infrastructure information necessary to
-optimize how passes are run, so that the resultant compiler isn't unneccesarily
+optimize how passes are run, so that the resultant compiler isn't unnecessarily
 slow.</p>
 
 </div>
@@ -489,7 +492,7 @@ invalidated, and are never "run".</p>
 href="http://llvm.org/doxygen/classllvm_1_1ModulePass.html">ModulePass</a></tt>"
 class is the most general of all superclasses that you can use.  Deriving from
 <tt>ModulePass</tt> indicates that your pass uses the entire program as a unit,
-refering to function bodies in no predictable order, or adding and removing
+referring to function bodies in no predictable order, or adding and removing
 functions.  Because nothing is known about the behavior of <tt>ModulePass</tt>
 subclasses, no optimization can be done for their execution.</p>
 
@@ -497,7 +500,7 @@ subclasses, no optimization can be done for their execution.</p>
 the getAnalysis interface
 <tt>getAnalysis&lt;DominatorTree&gt;(llvm::Function *)</tt> to provide the
 function to retrieve analysis result for, if the function pass does not require
-any module passes. Note that this can only be done for functions for which the
+any module or immutable passes. Note that this can only be done for functions for which the
 analysis ran, e.g. in the case of dominators you should only ask for the
 DominatorTree for function definitions, not declarations.</p>
 
@@ -1826,7 +1829,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-07-02 01:38:44 +0200 (Thu, 02 Jul 2009) $
+  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/index.html b/docs/index.html
index 00d48ae37c57..36ed0e2d9f86 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -41,13 +41,13 @@
 <ul>
 <li><a href="LangRef.html">LLVM Language Reference Manual</a> - Defines the LLVM
 intermediate representation.</li>
-<li><a href="http://llvm.org/pubs/2006-04-25-GelatoLLVMIntro.html">Introduction to the LLVM Compiler Infrastructure</a> - Presentation describing LLVM.</li>
+<li><a href="http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html">Introduction to the LLVM Compiler </a> - Presentation describing LLVM.</li>
 <li><a href="http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html">The LLVM Compiler Framework and
 Infrastructure Tutorial</a> - Tutorial for writing passes, exploring the system.</li>
 <li><a href="http://llvm.org/pubs/2004-01-30-CGO-LLVM.html">LLVM: A Compilation Framework for
 Lifelong Program Analysis &amp; Transformation</a> - Design overview.</li>
 <li><a href="http://llvm.org/pubs/2002-12-LattnerMSThesis.html">LLVM: An Infrastructure for
-Multi-Stage Optimization</a> - More details (somewhat old now).</li>
+Multi-Stage Optimization</a> - More details (quite old now).</li>
 <li><a href="GetElementPtr.html">GetElementPtr FAQ</a> - Answers to some very
 frequent questions about LLVM's most frequently misunderstood instruction.</li>
 </ul>
@@ -233,6 +233,9 @@ the linker and its design</li>
 
 <li><a href="GoldPlugin.html">The LLVM gold plugin</a> - How to build your
 programs with link-time optimization on Linux.</li>
+
+<li><a href="DebuggingJITedCode.html">The GDB JIT interface</a> - How to debug
+JITed code with GDB.</li>
 </ul>
 
 
@@ -278,7 +281,7 @@ times each day, making it a high volume list.</li>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-03-02 00:42:51 +0100 (Mon, 02 Mar 2009) $
+  Last modified: $Date: 2009-09-21 04:34:59 +0200 (Mon, 21 Sep 2009) $
 </address>
 </body></html>
 
diff --git a/docs/re_format.7 b/docs/re_format.7
new file mode 100644
index 000000000000..0c0928716f49
--- /dev/null
+++ b/docs/re_format.7
@@ -0,0 +1,756 @@
+.\"	$OpenBSD: re_format.7,v 1.14 2007/05/31 19:19:30 jmc Exp $
+.\"
+.\" Copyright (c) 1997, Phillip F Knaack. All rights reserved.
+.\"
+.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
+.\" Copyright (c) 1992, 1993, 1994
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Henry Spencer.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"	@(#)re_format.7	8.3 (Berkeley) 3/20/94
+.\"
+.Dd $Mdocdate: May 31 2007 $
+.Dt RE_FORMAT 7
+.Os
+.Sh NAME
+.Nm re_format
+.Nd POSIX regular expressions
+.Sh DESCRIPTION
+Regular expressions (REs),
+as defined in
+.St -p1003.1-2004 ,
+come in two forms:
+basic regular expressions
+(BREs)
+and extended regular expressions
+(EREs).
+Both forms of regular expressions are supported
+by the interfaces described in
+.Xr regex 3 .
+Applications dealing with regular expressions
+may use one or the other form
+(or indeed both).
+For example,
+.Xr ed 1
+uses BREs,
+whilst
+.Xr egrep 1
+talks EREs.
+Consult the manual page for the specific application to find out which
+it uses.
+.Pp
+POSIX leaves some aspects of RE syntax and semantics open;
+.Sq **
+marks decisions on these aspects that
+may not be fully portable to other POSIX implementations.
+.Pp
+This manual page first describes regular expressions in general,
+specifically extended regular expressions,
+and then discusses differences between them and basic regular expressions.
+.Sh EXTENDED REGULAR EXPRESSIONS
+An ERE is one** or more non-empty**
+.Em branches ,
+separated by
+.Sq \*(Ba .
+It matches anything that matches one of the branches.
+.Pp
+A branch is one** or more
+.Em pieces ,
+concatenated.
+It matches a match for the first, followed by a match for the second, etc.
+.Pp
+A piece is an
+.Em atom
+possibly followed by a single**
+.Sq * ,
+.Sq + ,
+.Sq ?\& ,
+or
+.Em bound .
+An atom followed by
+.Sq *
+matches a sequence of 0 or more matches of the atom.
+An atom followed by
+.Sq +
+matches a sequence of 1 or more matches of the atom.
+An atom followed by
+.Sq ?\&
+matches a sequence of 0 or 1 matches of the atom.
+.Pp
+A bound is
+.Sq {
+followed by an unsigned decimal integer,
+possibly followed by
+.Sq ,\&
+possibly followed by another unsigned decimal integer,
+always followed by
+.Sq } .
+The integers must lie between 0 and
+.Dv RE_DUP_MAX
+(255**) inclusive,
+and if there are two of them, the first may not exceed the second.
+An atom followed by a bound containing one integer
+.Ar i
+and no comma matches
+a sequence of exactly
+.Ar i
+matches of the atom.
+An atom followed by a bound
+containing one integer
+.Ar i
+and a comma matches
+a sequence of
+.Ar i
+or more matches of the atom.
+An atom followed by a bound
+containing two integers
+.Ar i
+and
+.Ar j
+matches a sequence of
+.Ar i
+through
+.Ar j
+(inclusive) matches of the atom.
+.Pp
+An atom is a regular expression enclosed in
+.Sq ()
+(matching a part of the regular expression),
+an empty set of
+.Sq ()
+(matching the null string)**,
+a
+.Em bracket expression
+(see below),
+.Sq .\&
+(matching any single character),
+.Sq ^
+(matching the null string at the beginning of a line),
+.Sq $
+(matching the null string at the end of a line),
+a
+.Sq \e
+followed by one of the characters
+.Sq ^.[$()|*+?{\e
+(matching that character taken as an ordinary character),
+a
+.Sq \e
+followed by any other character**
+(matching that character taken as an ordinary character,
+as if the
+.Sq \e
+had not been present**),
+or a single character with no other significance (matching that character).
+A
+.Sq {
+followed by a character other than a digit is an ordinary character,
+not the beginning of a bound**.
+It is illegal to end an RE with
+.Sq \e .
+.Pp
+A bracket expression is a list of characters enclosed in
+.Sq [] .
+It normally matches any single character from the list (but see below).
+If the list begins with
+.Sq ^ ,
+it matches any single character
+.Em not
+from the rest of the list
+(but see below).
+If two characters in the list are separated by
+.Sq - ,
+this is shorthand for the full
+.Em range
+of characters between those two (inclusive) in the
+collating sequence, e.g.\&
+.Sq [0-9]
+in ASCII matches any decimal digit.
+It is illegal** for two ranges to share an endpoint, e.g.\&
+.Sq a-c-e .
+Ranges are very collating-sequence-dependent,
+and portable programs should avoid relying on them.
+.Pp
+To include a literal
+.Sq ]\&
+in the list, make it the first character
+(following a possible
+.Sq ^ ) .
+To include a literal
+.Sq - ,
+make it the first or last character,
+or the second endpoint of a range.
+To use a literal
+.Sq -
+as the first endpoint of a range,
+enclose it in
+.Sq [.
+and
+.Sq .]
+to make it a collating element (see below).
+With the exception of these and some combinations using
+.Sq [
+(see next paragraphs),
+all other special characters, including
+.Sq \e ,
+lose their special significance within a bracket expression.
+.Pp
+Within a bracket expression, a collating element
+(a character,
+a multi-character sequence that collates as if it were a single character,
+or a collating-sequence name for either)
+enclosed in
+.Sq [.
+and
+.Sq .]
+stands for the sequence of characters of that collating element.
+The sequence is a single element of the bracket expression's list.
+A bracket expression containing a multi-character collating element
+can thus match more than one character,
+e.g. if the collating sequence includes a
+.Sq ch
+collating element,
+then the RE
+.Sq [[.ch.]]*c
+matches the first five characters of
+.Sq chchcc .
+.Pp
+Within a bracket expression, a collating element enclosed in
+.Sq [=
+and
+.Sq =]
+is an equivalence class, standing for the sequences of characters
+of all collating elements equivalent to that one, including itself.
+(If there are no other equivalent collating elements,
+the treatment is as if the enclosing delimiters were
+.Sq [.
+and
+.Sq .] . )
+For example, if
+.Sq x
+and
+.Sq y
+are the members of an equivalence class,
+then
+.Sq [[=x=]] ,
+.Sq [[=y=]] ,
+and
+.Sq [xy]
+are all synonymous.
+An equivalence class may not** be an endpoint of a range.
+.Pp
+Within a bracket expression, the name of a
+.Em character class
+enclosed
+in
+.Sq [:
+and
+.Sq :]
+stands for the list of all characters belonging to that class.
+Standard character class names are:
+.Bd -literal -offset indent
+alnum	digit	punct
+alpha	graph	space
+blank	lower	upper
+cntrl	print	xdigit
+.Ed
+.Pp
+These stand for the character classes defined in
+.Xr ctype 3 .
+A locale may provide others.
+A character class may not be used as an endpoint of a range.
+.Pp
+There are two special cases** of bracket expressions:
+the bracket expressions
+.Sq [[:<:]]
+and
+.Sq [[:>:]]
+match the null string at the beginning and end of a word, respectively.
+A word is defined as a sequence of
+characters starting and ending with a word character
+which is neither preceded nor followed by
+word characters.
+A word character is an
+.Em alnum
+character (as defined by
+.Xr ctype 3 )
+or an underscore.
+This is an extension,
+compatible with but not specified by POSIX,
+and should be used with
+caution in software intended to be portable to other systems.
+.Pp
+In the event that an RE could match more than one substring of a given
+string,
+the RE matches the one starting earliest in the string.
+If the RE could match more than one substring starting at that point,
+it matches the longest.
+Subexpressions also match the longest possible substrings, subject to
+the constraint that the whole match be as long as possible,
+with subexpressions starting earlier in the RE taking priority over
+ones starting later.
+Note that higher-level subexpressions thus take priority over
+their lower-level component subexpressions.
+.Pp
+Match lengths are measured in characters, not collating elements.
+A null string is considered longer than no match at all.
+For example,
+.Sq bb*
+matches the three middle characters of
+.Sq abbbc ;
+.Sq (wee|week)(knights|nights)
+matches all ten characters of
+.Sq weeknights ;
+when
+.Sq (.*).*
+is matched against
+.Sq abc ,
+the parenthesized subexpression matches all three characters;
+and when
+.Sq (a*)*
+is matched against
+.Sq bc ,
+both the whole RE and the parenthesized subexpression match the null string.
+.Pp
+If case-independent matching is specified,
+the effect is much as if all case distinctions had vanished from the
+alphabet.
+When an alphabetic that exists in multiple cases appears as an
+ordinary character outside a bracket expression, it is effectively
+transformed into a bracket expression containing both cases,
+e.g.\&
+.Sq x
+becomes
+.Sq [xX] .
+When it appears inside a bracket expression,
+all case counterparts of it are added to the bracket expression,
+so that, for example,
+.Sq [x]
+becomes
+.Sq [xX]
+and
+.Sq [^x]
+becomes
+.Sq [^xX] .
+.Pp
+No particular limit is imposed on the length of REs**.
+Programs intended to be portable should not employ REs longer
+than 256 bytes,
+as an implementation can refuse to accept such REs and remain
+POSIX-compliant.
+.Pp
+The following is a list of extended regular expressions:
+.Bl -tag -width Ds
+.It Ar c
+Any character
+.Ar c
+not listed below matches itself.
+.It \e Ns Ar c
+Any backslash-escaped character
+.Ar c
+matches itself.
+.It \&.
+Matches any single character that is not a newline
+.Pq Sq \en .
+.It Bq Ar char-class
+Matches any single character in
+.Ar char-class .
+To include a
+.Ql \&]
+in
+.Ar char-class ,
+it must be the first character.
+A range of characters may be specified by separating the end characters
+of the range with a
+.Ql - ;
+e.g.\&
+.Ar a-z
+specifies the lower case characters.
+The following literal expressions can also be used in
+.Ar char-class
+to specify sets of characters:
+.Bd -unfilled -offset indent
+[:alnum:] [:cntrl:] [:lower:] [:space:]
+[:alpha:] [:digit:] [:print:] [:upper:]
+[:blank:] [:graph:] [:punct:] [:xdigit:]
+.Ed
+.Pp
+If
+.Ql -
+appears as the first or last character of
+.Ar char-class ,
+then it matches itself.
+All other characters in
+.Ar char-class
+match themselves.
+.Pp
+Patterns in
+.Ar char-class
+of the form
+.Eo [.
+.Ar col-elm
+.Ec .]\&
+or
+.Eo [=
+.Ar col-elm
+.Ec =]\& ,
+where
+.Ar col-elm
+is a collating element, are interpreted according to
+.Xr setlocale 3
+.Pq not currently supported .
+.It Bq ^ Ns Ar char-class
+Matches any single character, other than newline, not in
+.Ar char-class .
+.Ar char-class
+is defined as above.
+.It ^
+If
+.Sq ^
+is the first character of a regular expression, then it
+anchors the regular expression to the beginning of a line.
+Otherwise, it matches itself.
+.It $
+If
+.Sq $
+is the last character of a regular expression,
+it anchors the regular expression to the end of a line.
+Otherwise, it matches itself.
+.It [[:<:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the beginning of a word.
+.It [[:>:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the end of a word.
+.It Pq Ar re
+Defines a subexpression
+.Ar re .
+Any set of characters enclosed in parentheses
+matches whatever the set of characters without parentheses matches
+(that is a long-winded way of saying the constructs
+.Sq (re)
+and
+.Sq re
+match identically).
+.It *
+Matches the single character regular expression or subexpression
+immediately preceding it zero or more times.
+If
+.Sq *
+is the first character of a regular expression or subexpression,
+then it matches itself.
+The
+.Sq *
+operator sometimes yields unexpected results.
+For example, the regular expression
+.Ar b*
+matches the beginning of the string
+.Qq abbb
+(as opposed to the substring
+.Qq bbb ) ,
+since a null match is the only leftmost match.
+.It +
+Matches the singular character regular expression
+or subexpression immediately preceding it
+one or more times.
+.It ?
+Matches the singular character regular expression
+or subexpression immediately preceding it
+0 or 1 times.
+.Sm off
+.It Xo
+.Pf { Ar n , m No }\ \&
+.Pf { Ar n , No }\ \&
+.Pf { Ar n No }
+.Xc
+.Sm on
+Matches the single character regular expression or subexpression
+immediately preceding it at least
+.Ar n
+and at most
+.Ar m
+times.
+If
+.Ar m
+is omitted, then it matches at least
+.Ar n
+times.
+If the comma is also omitted, then it matches exactly
+.Ar n
+times.
+.It \*(Ba
+Used to separate patterns.
+For example,
+the pattern
+.Sq cat\*(Badog
+matches either
+.Sq cat
+or
+.Sq dog .
+.El
+.Sh BASIC REGULAR EXPRESSIONS
+Basic regular expressions differ in several respects:
+.Bl -bullet -offset 3n
+.It
+.Sq \*(Ba ,
+.Sq + ,
+and
+.Sq ?\&
+are ordinary characters and there is no equivalent
+for their functionality.
+.It
+The delimiters for bounds are
+.Sq \e{
+and
+.Sq \e} ,
+with
+.Sq {
+and
+.Sq }
+by themselves ordinary characters.
+.It
+The parentheses for nested subexpressions are
+.Sq \e(
+and
+.Sq \e) ,
+with
+.Sq (
+and
+.Sq )\&
+by themselves ordinary characters.
+.It
+.Sq ^
+is an ordinary character except at the beginning of the
+RE or** the beginning of a parenthesized subexpression.
+.It
+.Sq $
+is an ordinary character except at the end of the
+RE or** the end of a parenthesized subexpression.
+.It
+.Sq *
+is an ordinary character if it appears at the beginning of the
+RE or the beginning of a parenthesized subexpression
+(after a possible leading
+.Sq ^ ) .
+.It
+Finally, there is one new type of atom, a
+.Em back-reference :
+.Sq \e
+followed by a non-zero decimal digit
+.Ar d
+matches the same sequence of characters matched by the
+.Ar d Ns th
+parenthesized subexpression
+(numbering subexpressions by the positions of their opening parentheses,
+left to right),
+so that, for example,
+.Sq \e([bc]\e)\e1
+matches
+.Sq bb\&
+or
+.Sq cc
+but not
+.Sq bc .
+.El
+.Pp
+The following is a list of basic regular expressions:
+.Bl -tag -width Ds
+.It Ar c
+Any character
+.Ar c
+not listed below matches itself.
+.It \e Ns Ar c
+Any backslash-escaped character
+.Ar c ,
+except for
+.Sq { ,
+.Sq } ,
+.Sq \&( ,
+and
+.Sq \&) ,
+matches itself.
+.It \&.
+Matches any single character that is not a newline
+.Pq Sq \en .
+.It Bq Ar char-class
+Matches any single character in
+.Ar char-class .
+To include a
+.Ql \&]
+in
+.Ar char-class ,
+it must be the first character.
+A range of characters may be specified by separating the end characters
+of the range with a
+.Ql - ;
+e.g.\&
+.Ar a-z
+specifies the lower case characters.
+The following literal expressions can also be used in
+.Ar char-class
+to specify sets of characters:
+.Bd -unfilled -offset indent
+[:alnum:] [:cntrl:] [:lower:] [:space:]
+[:alpha:] [:digit:] [:print:] [:upper:]
+[:blank:] [:graph:] [:punct:] [:xdigit:]
+.Ed
+.Pp
+If
+.Ql -
+appears as the first or last character of
+.Ar char-class ,
+then it matches itself.
+All other characters in
+.Ar char-class
+match themselves.
+.Pp
+Patterns in
+.Ar char-class
+of the form
+.Eo [.
+.Ar col-elm
+.Ec .]\&
+or
+.Eo [=
+.Ar col-elm
+.Ec =]\& ,
+where
+.Ar col-elm
+is a collating element, are interpreted according to
+.Xr setlocale 3
+.Pq not currently supported .
+.It Bq ^ Ns Ar char-class
+Matches any single character, other than newline, not in
+.Ar char-class .
+.Ar char-class
+is defined as above.
+.It ^
+If
+.Sq ^
+is the first character of a regular expression, then it
+anchors the regular expression to the beginning of a line.
+Otherwise, it matches itself.
+.It $
+If
+.Sq $
+is the last character of a regular expression,
+it anchors the regular expression to the end of a line.
+Otherwise, it matches itself.
+.It [[:<:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the beginning of a word.
+.It [[:>:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the end of a word.
+.It \e( Ns Ar re Ns \e)
+Defines a subexpression
+.Ar re .
+Subexpressions may be nested.
+A subsequent backreference of the form
+.Pf \e Ns Ar n ,
+where
+.Ar n
+is a number in the range [1,9], expands to the text matched by the
+.Ar n Ns th
+subexpression.
+For example, the regular expression
+.Ar \e(.*\e)\e1
+matches any string consisting of identical adjacent substrings.
+Subexpressions are ordered relative to their left delimiter.
+.It *
+Matches the single character regular expression or subexpression
+immediately preceding it zero or more times.
+If
+.Sq *
+is the first character of a regular expression or subexpression,
+then it matches itself.
+The
+.Sq *
+operator sometimes yields unexpected results.
+For example, the regular expression
+.Ar b*
+matches the beginning of the string
+.Qq abbb
+(as opposed to the substring
+.Qq bbb ) ,
+since a null match is the only leftmost match.
+.Sm off
+.It Xo
+.Pf \e{ Ar n , m No \e}\ \&
+.Pf \e{ Ar n , No \e}\ \&
+.Pf \e{ Ar n No \e}
+.Xc
+.Sm on
+Matches the single character regular expression or subexpression
+immediately preceding it at least
+.Ar n
+and at most
+.Ar m
+times.
+If
+.Ar m
+is omitted, then it matches at least
+.Ar n
+times.
+If the comma is also omitted, then it matches exactly
+.Ar n
+times.
+.El
+.Sh SEE ALSO
+.Xr ctype 3 ,
+.Xr regex 3
+.Sh STANDARDS
+.St -p1003.1-2004 :
+Base Definitions, Chapter 9 (Regular Expressions).
+.Sh BUGS
+Having two kinds of REs is a botch.
+.Pp
+The current POSIX spec says that
+.Sq )\&
+is an ordinary character in the absence of an unmatched
+.Sq ( ;
+this was an unintentional result of a wording error,
+and change is likely.
+Avoid relying on it.
+.Pp
+Back-references are a dreadful botch,
+posing major problems for efficient implementations.
+They are also somewhat vaguely defined
+(does
+.Sq a\e(\e(b\e)*\e2\e)*d
+match
+.Sq abbbd ? ) .
+Avoid using them.
+.Pp
+POSIX's specification of case-independent matching is vague.
+The
+.Dq one case implies all cases
+definition given above
+is the current consensus among implementors as to the right interpretation.
+.Pp
+The syntax for word boundaries is incredibly ugly.
diff --git a/docs/tutorial/JITTutorial1.html b/docs/tutorial/JITTutorial1.html
index ac3958e64273..3b7b8dea1acc 100644
--- a/docs/tutorial/JITTutorial1.html
+++ b/docs/tutorial/JITTutorial1.html
@@ -107,7 +107,7 @@ first chunk of our <code>makeLLVMModule()</code>:</p>
 <pre>
 Module* makeLLVMModule() {
   // Module Construction
-  Module* mod = new Module("test");
+  Module* mod = new Module("test", getGlobalContext());
 </pre>
 </div>
 
@@ -153,7 +153,7 @@ function will interoperate properly with C code, which is a good thing.</p>
 
 <div class="doc_code">
 <pre>
-  BasicBlock* block = BasicBlock::Create("entry", mul_add);
+  BasicBlock* block = BasicBlock::Create(getGlobalContext(), "entry", mul_add);
   IRBuilder&lt;&gt; builder(block);
 </pre>
 </div>
@@ -200,7 +200,7 @@ function will interoperate properly with C code, which is a good thing.</p>
 
   <a href="mailto:owen@apple.com">Owen Anderson</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2007-10-17 11:05:13 -0700 (Wed, 17 Oct 2007) $
+  Last modified: $Date: 2009-07-21 11:05:13 -0700 (Tue, 21 Jul 2009) $
 </address>
 
 </body>
diff --git a/docs/tutorial/JITTutorial2.html b/docs/tutorial/JITTutorial2.html
index c2483e4d01bc..504d96597b00 100644
--- a/docs/tutorial/JITTutorial2.html
+++ b/docs/tutorial/JITTutorial2.html
@@ -100,11 +100,11 @@ Module* makeLLVMModule() {
 
 <div class="doc_code">
 <pre>
-  BasicBlock* entry = BasicBlock::Create(&quot;entry&quot;, gcd);
-  BasicBlock* ret = BasicBlock::Create(&quot;return&quot;, gcd);
-  BasicBlock* cond_false = BasicBlock::Create(&quot;cond_false&quot;, gcd);
-  BasicBlock* cond_true = BasicBlock::Create(&quot;cond_true&quot;, gcd);
-  BasicBlock* cond_false_2 = BasicBlock::Create(&quot;cond_false&quot;, gcd);
+  BasicBlock* entry = BasicBlock::Create(getGlobalContext(), (&quot;entry&quot;, gcd);
+  BasicBlock* ret = BasicBlock::Create(getGlobalContext(), (&quot;return&quot;, gcd);
+  BasicBlock* cond_false = BasicBlock::Create(getGlobalContext(), (&quot;cond_false&quot;, gcd);
+  BasicBlock* cond_true = BasicBlock::Create(getGlobalContext(), (&quot;cond_true&quot;, gcd);
+  BasicBlock* cond_false_2 = BasicBlock::Create(getGlobalContext(), (&quot;cond_false&quot;, gcd);
 </pre>
 </div>
 
diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html
index 018d0be76032..5bcd0dd2c7ff 100644
--- a/docs/tutorial/LangImpl2.html
+++ b/docs/tutorial/LangImpl2.html
@@ -84,7 +84,7 @@ public:
 class NumberExprAST : public ExprAST {
   double Val;
 public:
-  explicit NumberExprAST(double val) : Val(val) {}
+  NumberExprAST(double val) : Val(val) {}
 };
 </pre>
 </div>
@@ -107,7 +107,7 @@ in the basic form of the Kaleidoscope language:
 class VariableExprAST : public ExprAST {
   std::string Name;
 public:
-  explicit VariableExprAST(const std::string &amp;name) : Name(name) {}
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -333,9 +333,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-    
+
       if (CurTok == ')') break;
-    
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -815,6 +815,7 @@ course.)  To build this, just compile with:</p>
 <div class="doc_code">
 <pre>
 #include &lt;cstdio&gt;
+#include &lt;cstdlib&gt;
 #include &lt;string&gt;
 #include &lt;map&gt;
 #include &lt;vector&gt;
@@ -832,7 +833,7 @@ enum Token {
   tok_def = -2, tok_extern = -3,
 
   // primary
-  tok_identifier = -4, tok_number = -5,
+  tok_identifier = -4, tok_number = -5
 };
 
 static std::string IdentifierStr;  // Filled in if tok_identifier
@@ -900,14 +901,14 @@ public:
 class NumberExprAST : public ExprAST {
   double Val;
 public:
-  explicit NumberExprAST(double val) : Val(val) {}
+  NumberExprAST(double val) : Val(val) {}
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
 class VariableExprAST : public ExprAST {
   std::string Name;
 public:
-  explicit VariableExprAST(const std::string &amp;name) : Name(name) {}
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -1003,9 +1004,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-    
+
       if (CurTok == ')') break;
-    
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -1149,7 +1150,7 @@ static PrototypeAST *ParseExtern() {
 //===----------------------------------------------------------------------===//
 
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
+  if (ParseDefinition()) {
     fprintf(stderr, "Parsed a function definition.\n");
   } else {
     // Skip token for error recovery.
@@ -1158,7 +1159,7 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
+  if (ParseExtern()) {
     fprintf(stderr, "Parsed an extern\n");
   } else {
     // Skip token for error recovery.
@@ -1168,7 +1169,7 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
+  if (ParseTopLevelExpr()) {
     fprintf(stderr, "Parsed a top-level expr\n");
   } else {
     // Skip token for error recovery.
@@ -1206,7 +1207,9 @@ int main() {
   fprintf(stderr, "ready&gt; ");
   getNextToken();
 
+  // Run the main "interpreter loop" now.
   MainLoop();
+
   return 0;
 }
 </pre>
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
index faf11d0592be..e3d2117c4e68 100644
--- a/docs/tutorial/LangImpl3.html
+++ b/docs/tutorial/LangImpl3.html
@@ -79,7 +79,7 @@ public:
 class NumberExprAST : public ExprAST {
   double Val;
 public:
-  explicit NumberExprAST(double val) : Val(val) {}
+  NumberExprAST(double val) : Val(val) {}
   <b>virtual Value *Codegen();</b>
 };
 ...
@@ -115,7 +115,7 @@ undeclared parameter):</p>
 Value *ErrorV(const char *Str) { Error(Str); return 0; }
 
 static Module *TheModule;
-static IRBuilder&lt;&gt; Builder;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
 static std::map&lt;std::string, Value*&gt; NamedValues;
 </pre>
 </div>
@@ -159,7 +159,7 @@ we'll do numeric literals:</p>
 <div class="doc_code">
 <pre>
 Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(APFloat(Val));
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 </pre>
 </div>
@@ -170,7 +170,7 @@ internally (<tt>APFloat</tt> has the capability of holding floating point
 constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
 creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
 that constants are all uniqued together and shared.  For this reason, the API
-uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
+uses "the Context.get..." idiom instead of "new foo(..)" or "foo::Create(..)".</p>
 
 <div class="doc_code">
 <pre>
@@ -183,7 +183,7 @@ Value *VariableExprAST::Codegen() {
 </div>
 
 <p>References to variables are also quite simple using LLVM.  In the simple version
-of Kaleidoscope, we assume that the variable has already been emited somewhere
+of Kaleidoscope, we assume that the variable has already been emitted somewhere
 and its value is available.  In practice, the only values that can be in the
 <tt>NamedValues</tt> map are function arguments.  This
 code simply checks to see that the specified name is in the map (if not, an 
@@ -206,7 +206,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   default: return ErrorV("invalid binary operator");
   }
 }
@@ -307,8 +308,10 @@ bodies and external function declarations.  The code starts with:</p>
 <pre>
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector&lt;const Type*&gt; Doubles(Args.size(), Type::DoubleTy);
-  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
   
   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
 </pre>
@@ -320,10 +323,10 @@ really talks about the external interface for a function (not the value computed
 by an expression), it makes sense for it to return the LLVM Function it
 corresponds to when codegen'd.</p>
 
-<p>The call to <tt>FunctionType::get</tt> creates
+<p>The call to <tt>Context.get</tt> creates
 the <tt>FunctionType</tt> that should be used for a given Prototype.  Since all
 function arguments in Kaleidoscope are of type double, the first line creates
-a vector of "N" LLVM double types.  It then uses the <tt>FunctionType::get</tt>
+a vector of "N" LLVM double types.  It then uses the <tt>Context.get</tt>
 method to create a function type that takes "N" doubles as arguments, returns
 one double as a result, and that is not vararg (the false parameter indicates
 this).  Note that Types in LLVM are uniqued just like Constants are, so you
@@ -359,7 +362,7 @@ definition of this function.</p>
 first, we want to allow 'extern'ing a function more than once, as long as the
 prototypes for the externs match (since all arguments have the same type, we
 just have to check that the number of arguments match).  Second, we want to
-allow 'extern'ing a function and then definining a body for it.  This is useful
+allow 'extern'ing a function and then defining a body for it.  This is useful
 when defining mutually recursive functions.</p>
 
 <p>In order to implement this, the code above first checks to see if there is
@@ -439,7 +442,7 @@ is an LLVM Function object that is ready to go for us.</p>
 <div class="doc_code">
 <pre>
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   if (Value *RetVal = Body-&gt;Codegen()) {
@@ -461,9 +464,10 @@ block at this point.  We'll fix this in <a href="LangImpl5.html">Chapter 5</a> :
   if (Value *RetVal = Body-&gt;Codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
-    
+
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
+
     return TheFunction;
   }
 </pre>
@@ -682,6 +686,7 @@ our makefile/command line about which options to use:</p>
 // See example below.
 
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Support/IRBuilder.h"
@@ -704,7 +709,7 @@ enum Token {
   tok_def = -2, tok_extern = -3,
 
   // primary
-  tok_identifier = -4, tok_number = -5,
+  tok_identifier = -4, tok_number = -5
 };
 
 static std::string IdentifierStr;  // Filled in if tok_identifier
@@ -773,7 +778,7 @@ public:
 class NumberExprAST : public ExprAST {
   double Val;
 public:
-  explicit NumberExprAST(double val) : Val(val) {}
+  NumberExprAST(double val) : Val(val) {}
   virtual Value *Codegen();
 };
 
@@ -781,7 +786,7 @@ public:
 class VariableExprAST : public ExprAST {
   std::string Name;
 public:
-  explicit VariableExprAST(const std::string &amp;name) : Name(name) {}
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
   virtual Value *Codegen();
 };
 
@@ -806,7 +811,8 @@ public:
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
 class PrototypeAST {
   std::string Name;
   std::vector&lt;std::string&gt; Args;
@@ -833,7 +839,7 @@ public:
 //===----------------------------------------------------------------------===//
 
 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser it looking at.  getNextToken reads another token from the
+/// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
 static int getNextToken() {
@@ -881,9 +887,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-    
+
       if (CurTok == ')') break;
-    
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -1027,13 +1033,13 @@ static PrototypeAST *ParseExtern() {
 //===----------------------------------------------------------------------===//
 
 static Module *TheModule;
-static IRBuilder&lt;&gt; Builder;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
 static std::map&lt;std::string, Value*&gt; NamedValues;
 
 Value *ErrorV(const char *Str) { Error(Str); return 0; }
 
 Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(APFloat(Val));
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
 Value *VariableExprAST::Codegen() {
@@ -1054,7 +1060,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   default: return ErrorV("invalid binary operator");
   }
 }
@@ -1080,8 +1087,10 @@ Value *CallExprAST::Codegen() {
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector&lt;const Type*&gt; Doubles(Args.size(), Type::DoubleTy);
-  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
   
   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
   
@@ -1126,15 +1135,16 @@ Function *FunctionAST::Codegen() {
     return 0;
   
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   if (Value *RetVal = Body-&gt;Codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
-    
+
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
+
     return TheFunction;
   }
   
@@ -1172,7 +1182,7 @@ static void HandleExtern() {
 }
 
 static void HandleTopLevelExpression() {
-  // Evaluate a top level expression into an anonymous function.
+  // Evaluate a top-level expression into an anonymous function.
   if (FunctionAST *F = ParseTopLevelExpr()) {
     if (Function *LF = F-&gt;Codegen()) {
       fprintf(stderr, "Read top-level expression:");
@@ -1190,7 +1200,7 @@ static void MainLoop() {
     fprintf(stderr, "ready&gt; ");
     switch (CurTok) {
     case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top level semicolons.
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
     case tok_def:    HandleDefinition(); break;
     case tok_extern: HandleExtern(); break;
     default:         HandleTopLevelExpression(); break;
@@ -1198,8 +1208,6 @@ static void MainLoop() {
   }
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // "Library" functions that can be "extern'd" from user code.
 //===----------------------------------------------------------------------===//
@@ -1216,7 +1224,7 @@ double putchard(double X) {
 //===----------------------------------------------------------------------===//
 
 int main() {
-  TheModule = new Module("my cool jit");
+  LLVMContext &amp;Context = getGlobalContext();
 
   // Install standard binary operators.
   // 1 is lowest precedence.
@@ -1229,8 +1237,15 @@ int main() {
   fprintf(stderr, "ready&gt; ");
   getNextToken();
 
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Run the main "interpreter loop" now.
   MainLoop();
+
+  // Print out all of the generated code.
   TheModule-&gt;dump();
+
   return 0;
 }
 </pre>
@@ -1248,7 +1263,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2007-10-17 11:05:13 -0700 (Wed, 17 Oct 2007) $
+  Last modified: $Date: 2009-07-21 11:05:13 -0700 (Tue, 21 Jul 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
index 9a3bfd21471e..3188135384e0 100644
--- a/docs/tutorial/LangImpl4.html
+++ b/docs/tutorial/LangImpl4.html
@@ -171,26 +171,30 @@ add a set of optimizations to run.  The code looks like this:</p>
 
 <div class="doc_code">
 <pre>
-    ExistingModuleProvider OurModuleProvider(TheModule);
-    FunctionPassManager OurFPM(&amp;OurModuleProvider);
-      
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-    // Eliminate Common SubExpressions.
-    OurFPM.add(createGVNPass());
-    // Simplify the control flow graph (deleting unreachable blocks, etc).
-    OurFPM.add(createCFGSimplificationPass());
-
-    // Set the global so the code gen can use this.
-    TheFPM = &amp;OurFPM;
-
-    // Run the main "interpreter loop" now.
-    MainLoop();
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
 </pre>
 </div>
 
@@ -205,7 +209,7 @@ requires a pointer to the <tt>Module</tt> (through the <tt>ModuleProvider</tt>)
 to construct itself.  Once it is set up, we use a series of "add" calls to add
 a bunch of LLVM passes.  The first pass is basically boilerplate, it adds a pass
 so that later optimizations know how the data structures in the program are
-layed out.  The "<tt>TheExecutionEngine</tt>" variable is related to the JIT,
+laid out.  The "<tt>TheExecutionEngine</tt>" variable is related to the JIT,
 which we will get to in the next section.</p>
 
 <p>In this case, we choose to add 4 optimization passes.  The passes we chose
@@ -298,8 +302,8 @@ by adding a global variable and a call in <tt>main</tt>:</p>
 ...
 int main() {
   ..
-  <b>// Create the JIT.
-  TheExecutionEngine = ExecutionEngine::create(TheModule);</b>
+  <b>// Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();</b>
   ..
 }
 </pre>
@@ -320,7 +324,7 @@ top-level expression to look like this:</p>
 <div class="doc_code">
 <pre>
 static void HandleTopLevelExpression() {
-  // Evaluate a top level expression into an anonymous function.
+  // Evaluate a top-level expression into an anonymous function.
   if (FunctionAST *F = ParseTopLevelExpr()) {
     if (Function *LF = F-&gt;Codegen()) {
       LF->dump();  // Dump the function for exposition purposes.
@@ -330,7 +334,7 @@ static void HandleTopLevelExpression() {
       
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
-      double (*FP)() = (double (*)())FPtr;
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
       fprintf(stderr, "Evaluated to %f\n", FP());</b>
     }
 </pre>
@@ -359,7 +363,7 @@ entry:
 
 <p>Well this looks like it is basically working.  The dump of the function
 shows the "no argument function that always returns double" that we synthesize
-for each top level expression that is typed in.  This demonstrates very basic
+for each top-level expression that is typed in.  This demonstrates very basic
 functionality, but can we do more?</p>
 
 <div class="doc_code">
@@ -495,7 +499,7 @@ LLVM JIT and optimizer.  To build this example, use:
 <div class="doc_code">
 <pre>
    # Compile
-   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit interpreter native` -O3 -o toy
    # Run
    ./toy
 </pre>
@@ -512,11 +516,15 @@ at runtime.</p>
 <pre>
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/IRBuilder.h"
 #include &lt;cstdio&gt;
@@ -538,7 +546,7 @@ enum Token {
   tok_def = -2, tok_extern = -3,
 
   // primary
-  tok_identifier = -4, tok_number = -5,
+  tok_identifier = -4, tok_number = -5
 };
 
 static std::string IdentifierStr;  // Filled in if tok_identifier
@@ -640,7 +648,8 @@ public:
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
 class PrototypeAST {
   std::string Name;
   std::vector&lt;std::string&gt; Args;
@@ -667,7 +676,7 @@ public:
 //===----------------------------------------------------------------------===//
 
 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser it looking at.  getNextToken reads another token from the
+/// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
 static int getNextToken() {
@@ -715,9 +724,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-    
+
       if (CurTok == ')') break;
-    
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -861,14 +870,14 @@ static PrototypeAST *ParseExtern() {
 //===----------------------------------------------------------------------===//
 
 static Module *TheModule;
-static IRBuilder&lt;&gt; Builder;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
 static std::map&lt;std::string, Value*&gt; NamedValues;
 static FunctionPassManager *TheFPM;
 
 Value *ErrorV(const char *Str) { Error(Str); return 0; }
 
 Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(APFloat(Val));
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
 Value *VariableExprAST::Codegen() {
@@ -889,7 +898,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   default: return ErrorV("invalid binary operator");
   }
 }
@@ -915,8 +925,10 @@ Value *CallExprAST::Codegen() {
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector&lt;const Type*&gt; Doubles(Args.size(), Type::DoubleTy);
-  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
   
   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
   
@@ -961,7 +973,7 @@ Function *FunctionAST::Codegen() {
     return 0;
   
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   if (Value *RetVal = Body-&gt;Codegen()) {
@@ -1013,7 +1025,7 @@ static void HandleExtern() {
 }
 
 static void HandleTopLevelExpression() {
-  // Evaluate a top level expression into an anonymous function.
+  // Evaluate a top-level expression into an anonymous function.
   if (FunctionAST *F = ParseTopLevelExpr()) {
     if (Function *LF = F-&gt;Codegen()) {
       // JIT the function, returning a function pointer.
@@ -1021,7 +1033,7 @@ static void HandleTopLevelExpression() {
       
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
-      double (*FP)() = (double (*)())FPtr;
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
       fprintf(stderr, "Evaluated to %f\n", FP());
     }
   } else {
@@ -1036,7 +1048,7 @@ static void MainLoop() {
     fprintf(stderr, "ready&gt; ");
     switch (CurTok) {
     case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top level semicolons.
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
     case tok_def:    HandleDefinition(); break;
     case tok_extern: HandleExtern(); break;
     default:         HandleTopLevelExpression(); break;
@@ -1044,8 +1056,6 @@ static void MainLoop() {
   }
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // "Library" functions that can be "extern'd" from user code.
 //===----------------------------------------------------------------------===//
@@ -1062,6 +1072,9 @@ double putchard(double X) {
 //===----------------------------------------------------------------------===//
 
 int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
   // Install standard binary operators.
   // 1 is lowest precedence.
   BinopPrecedence['&lt;'] = 10;
@@ -1074,39 +1087,41 @@ int main() {
   getNextToken();
 
   // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit");
-  
-  // Create the JIT.
-  TheExecutionEngine = ExecutionEngine::create(TheModule);
+  TheModule = new Module("my cool jit", Context);
+
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
 
-  {
-    ExistingModuleProvider OurModuleProvider(TheModule);
-    FunctionPassManager OurFPM(&amp;OurModuleProvider);
-      
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-    // Eliminate Common SubExpressions.
-    OurFPM.add(createGVNPass());
-    // Simplify the control flow graph (deleting unreachable blocks, etc).
-    OurFPM.add(createCFGSimplificationPass());
-
-    // Set the global so the code gen can use this.
-    TheFPM = &amp;OurFPM;
-
-    // Run the main "interpreter loop" now.
-    MainLoop();
-    
-    TheFPM = 0;
-    
-    // Print out all of the generated code.
-    TheModule-&gt;dump();
-  }  // Free module provider (and thus the module) and pass manager.
-                                   
   return 0;
 }
 </pre>
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
index bf96b460465f..f93b59be0dca 100644
--- a/docs/tutorial/LangImpl5.html
+++ b/docs/tutorial/LangImpl5.html
@@ -288,8 +288,8 @@ into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
 href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
 see this graph:</p>
 
-<center><img src="LangImpl5-cfg.png" alt="Example CFG" width="423" 
-height="315"></center>
+<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423" 
+height="315"></div>
 
 <p>Another way to get this is to call "<tt>F-&gt;viewCFG()</tt>" or
 "<tt>F-&gt;viewCFGOnly()</tt>" (where F is a "<tt>Function*</tt>") either by
@@ -364,7 +364,7 @@ Value *IfExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(CondV, 
-                                ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                 "ifcond");
 </pre>
 </div>
@@ -379,9 +379,9 @@ value as a 1-bit (bool) value.</p>
   
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create("else");
-  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
 
   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
 </pre>
@@ -472,7 +472,8 @@ are emitted, we can finish up with the merge code:</p>
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
   
   PN->addIncoming(ThenV, ThenBB);
   PN->addIncoming(ElseV, ElseBB);
@@ -727,7 +728,7 @@ block, but remember that the body code itself could consist of multiple blocks
   // block.
   Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
   BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
   
   // Insert an explicit fall through from the current block to the LoopBB.
   Builder.CreateBr(LoopBB);
@@ -745,7 +746,7 @@ create an unconditional branch for the fall-through between the two blocks.</p>
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::DoubleTy, VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
   Variable-&gt;addIncoming(StartVal, PreheaderBB);
 </pre>
 </div>
@@ -796,7 +797,7 @@ references to it will naturally find it in the symbol table.</p>
     if (StepVal == 0) return 0;
   } else {
     // If not specified, use 1.0.
-    StepVal = ConstantFP::get(APFloat(1.0));
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
   
   Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
@@ -815,7 +816,7 @@ will be the value of the loop variable on the next iteration of the loop.</p>
   
   // Convert condition to a bool by comparing equal to 0.0.
   EndCond = Builder.CreateFCmpONE(EndCond, 
-                                  ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
 </pre>
 </div>
@@ -828,7 +829,7 @@ statement.</p>
 <pre>
   // Create the "after loop" block and insert it.
   BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
   
   // Insert the conditional branch into the end of LoopEndBB.
   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@@ -856,7 +857,7 @@ the loop again and exiting the loop.  Any future code is emitted in the
     NamedValues.erase(VarName);
   
   // for expr always returns 0.0.
-  return Constant::getNullValue(Type::DoubleTy);
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 </pre>
 </div>
@@ -901,11 +902,15 @@ if/then/else and for expressions..  To build this example, use:
 <pre>
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/IRBuilder.h"
 #include &lt;cstdio&gt;
@@ -1058,7 +1063,8 @@ public:
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
 class PrototypeAST {
   std::string Name;
   std::vector&lt;std::string&gt; Args;
@@ -1085,7 +1091,7 @@ public:
 //===----------------------------------------------------------------------===//
 
 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser it looking at.  getNextToken reads another token from the
+/// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
 static int getNextToken() {
@@ -1133,9 +1139,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-      
+
       if (CurTok == ')') break;
-      
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -1235,7 +1241,6 @@ static ExprAST *ParseForExpr() {
   return new ForExprAST(IdName, Start, End, Step, Body);
 }
 
-
 /// primary
 ///   ::= identifierexpr
 ///   ::= numberexpr
@@ -1352,14 +1357,14 @@ static PrototypeAST *ParseExtern() {
 //===----------------------------------------------------------------------===//
 
 static Module *TheModule;
-static IRBuilder&lt;&gt; Builder;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
 static std::map&lt;std::string, Value*&gt; NamedValues;
 static FunctionPassManager *TheFPM;
 
 Value *ErrorV(const char *Str) { Error(Str); return 0; }
 
 Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(APFloat(Val));
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
 Value *VariableExprAST::Codegen() {
@@ -1380,7 +1385,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   default: return ErrorV("invalid binary operator");
   }
 }
@@ -1410,16 +1416,16 @@ Value *IfExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(CondV, 
-                                ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                 "ifcond");
   
   Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
   
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create("else");
-  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
   
   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
   
@@ -1447,7 +1453,8 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
   
   PN-&gt;addIncoming(ThenV, ThenBB);
   PN-&gt;addIncoming(ElseV, ElseBB);
@@ -1479,7 +1486,7 @@ Value *ForExprAST::Codegen() {
   // block.
   Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
   BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
   
   // Insert an explicit fall through from the current block to the LoopBB.
   Builder.CreateBr(LoopBB);
@@ -1488,7 +1495,7 @@ Value *ForExprAST::Codegen() {
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::DoubleTy, VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
   Variable-&gt;addIncoming(StartVal, PreheaderBB);
   
   // Within the loop, the variable is defined equal to the PHI node.  If it
@@ -1509,7 +1516,7 @@ Value *ForExprAST::Codegen() {
     if (StepVal == 0) return 0;
   } else {
     // If not specified, use 1.0.
-    StepVal = ConstantFP::get(APFloat(1.0));
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
   
   Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
@@ -1520,12 +1527,12 @@ Value *ForExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   EndCond = Builder.CreateFCmpONE(EndCond, 
-                                  ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
   
   // Create the "after loop" block and insert it.
   BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
   
   // Insert the conditional branch into the end of LoopEndBB.
   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@@ -1544,13 +1551,15 @@ Value *ForExprAST::Codegen() {
 
   
   // for expr always returns 0.0.
-  return Constant::getNullValue(Type::DoubleTy);
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector&lt;const Type*&gt; Doubles(Args.size(), Type::DoubleTy);
-  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
   
   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
   
@@ -1595,7 +1604,7 @@ Function *FunctionAST::Codegen() {
     return 0;
   
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   if (Value *RetVal = Body-&gt;Codegen()) {
@@ -1647,7 +1656,7 @@ static void HandleExtern() {
 }
 
 static void HandleTopLevelExpression() {
-  // Evaluate a top level expression into an anonymous function.
+  // Evaluate a top-level expression into an anonymous function.
   if (FunctionAST *F = ParseTopLevelExpr()) {
     if (Function *LF = F-&gt;Codegen()) {
       // JIT the function, returning a function pointer.
@@ -1655,7 +1664,7 @@ static void HandleTopLevelExpression() {
       
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
-      double (*FP)() = (double (*)())FPtr;
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
       fprintf(stderr, "Evaluated to %f\n", FP());
     }
   } else {
@@ -1670,7 +1679,7 @@ static void MainLoop() {
     fprintf(stderr, "ready&gt; ");
     switch (CurTok) {
     case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top level semicolons.
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
     case tok_def:    HandleDefinition(); break;
     case tok_extern: HandleExtern(); break;
     default:         HandleTopLevelExpression(); break;
@@ -1678,8 +1687,6 @@ static void MainLoop() {
   }
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // "Library" functions that can be "extern'd" from user code.
 //===----------------------------------------------------------------------===//
@@ -1696,6 +1703,9 @@ double putchard(double X) {
 //===----------------------------------------------------------------------===//
 
 int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
   // Install standard binary operators.
   // 1 is lowest precedence.
   BinopPrecedence['&lt;'] = 10;
@@ -1708,38 +1718,41 @@ int main() {
   getNextToken();
 
   // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit");
-  
-  // Create the JIT.
-  TheExecutionEngine = ExecutionEngine::create(TheModule);
+  TheModule = new Module("my cool jit", Context);
 
-  {
-    ExistingModuleProvider OurModuleProvider(TheModule);
-    FunctionPassManager OurFPM(&amp;OurModuleProvider);
-      
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-    // Eliminate Common SubExpressions.
-    OurFPM.add(createGVNPass());
-    // Simplify the control flow graph (deleting unreachable blocks, etc).
-    OurFPM.add(createCFGSimplificationPass());
-    // Set the global so the code gen can use this.
-    TheFPM = &amp;OurFPM;
-
-    // Run the main "interpreter loop" now.
-    MainLoop();
-    
-    TheFPM = 0;
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
 
-    // Print out all of the generated code.
-    TheModule-&gt;dump();
-  }  // Free module provider (and thus the module) and pass manager.
-                                   
   return 0;
 }
 </pre>
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index 44ad15b00965..f113e96651e9 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -207,7 +207,7 @@ the prototype for a user-defined operator, we need to parse it:</p>
 static PrototypeAST *ParsePrototype() {
   std::string FnName;
   
-  <b>int Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  <b>unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;</b>
   
   switch (CurTok) {
@@ -283,7 +283,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   <b>default: break;</b>
   }
   
@@ -305,7 +306,7 @@ function call to it.  Since user-defined operators are just built as normal
 functions (because the "prototype" boils down to a function with the right
 name) everything falls into place.</p>
 
-<p>The final piece of code we are missing, is a bit of top level magic:</p>
+<p>The final piece of code we are missing, is a bit of top-level magic:</p>
 
 <div class="doc_code">
 <pre>
@@ -321,7 +322,7 @@ Function *FunctionAST::Codegen() {
     BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();</b>
   
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   if (Value *RetVal = Body-&gt;Codegen()) {
@@ -438,7 +439,7 @@ with:</p>
 static PrototypeAST *ParsePrototype() {
   std::string FnName;
   
-  int Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
   
   switch (CurTok) {
@@ -794,7 +795,6 @@ add variable mutation without building SSA in your front-end.</p>
 
 </div>
 
-
 <!-- *********************************************************************** -->
 <div class="doc_section"><a name="code">Full Code Listing</a></div>
 <!-- *********************************************************************** -->
@@ -821,11 +821,15 @@ if/then/else and for expressions..  To build this example, use:
 <pre>
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/IRBuilder.h"
 #include &lt;cstdio&gt;
@@ -993,7 +997,8 @@ public:
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
 class PrototypeAST {
   std::string Name;
   std::vector&lt;std::string&gt; Args;
@@ -1033,7 +1038,7 @@ public:
 //===----------------------------------------------------------------------===//
 
 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser it looking at.  getNextToken reads another token from the
+/// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
 static int getNextToken() {
@@ -1081,9 +1086,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-      
+
       if (CurTok == ')') break;
-      
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -1183,7 +1188,6 @@ static ExprAST *ParseForExpr() {
   return new ForExprAST(IdName, Start, End, Step, Body);
 }
 
-
 /// primary
 ///   ::= identifierexpr
 ///   ::= numberexpr
@@ -1267,7 +1271,7 @@ static ExprAST *ParseExpression() {
 static PrototypeAST *ParsePrototype() {
   std::string FnName;
   
-  int Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
   
   switch (CurTok) {
@@ -1357,14 +1361,14 @@ static PrototypeAST *ParseExtern() {
 //===----------------------------------------------------------------------===//
 
 static Module *TheModule;
-static IRBuilder&lt;&gt; Builder;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
 static std::map&lt;std::string, Value*&gt; NamedValues;
 static FunctionPassManager *TheFPM;
 
 Value *ErrorV(const char *Str) { Error(Str); return 0; }
 
 Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(APFloat(Val));
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
 Value *VariableExprAST::Codegen() {
@@ -1384,7 +1388,6 @@ Value *UnaryExprAST::Codegen() {
   return Builder.CreateCall(F, OperandV, "unop");
 }
 
-
 Value *BinaryExprAST::Codegen() {
   Value *L = LHS-&gt;Codegen();
   Value *R = RHS-&gt;Codegen();
@@ -1397,7 +1400,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   default: break;
   }
   
@@ -1435,16 +1439,16 @@ Value *IfExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(CondV, 
-                                ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                 "ifcond");
   
   Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
   
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create("else");
-  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
   
   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
   
@@ -1472,7 +1476,8 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
   
   PN-&gt;addIncoming(ThenV, ThenBB);
   PN-&gt;addIncoming(ElseV, ElseBB);
@@ -1504,7 +1509,7 @@ Value *ForExprAST::Codegen() {
   // block.
   Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
   BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
   
   // Insert an explicit fall through from the current block to the LoopBB.
   Builder.CreateBr(LoopBB);
@@ -1513,7 +1518,7 @@ Value *ForExprAST::Codegen() {
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::DoubleTy, VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
   Variable-&gt;addIncoming(StartVal, PreheaderBB);
   
   // Within the loop, the variable is defined equal to the PHI node.  If it
@@ -1534,7 +1539,7 @@ Value *ForExprAST::Codegen() {
     if (StepVal == 0) return 0;
   } else {
     // If not specified, use 1.0.
-    StepVal = ConstantFP::get(APFloat(1.0));
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
   
   Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
@@ -1545,12 +1550,12 @@ Value *ForExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   EndCond = Builder.CreateFCmpONE(EndCond, 
-                                  ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
   
   // Create the "after loop" block and insert it.
   BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
   
   // Insert the conditional branch into the end of LoopEndBB.
   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@@ -1569,13 +1574,15 @@ Value *ForExprAST::Codegen() {
 
   
   // for expr always returns 0.0.
-  return Constant::getNullValue(Type::DoubleTy);
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector&lt;const Type*&gt; Doubles(Args.size(), Type::DoubleTy);
-  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
   
   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
   
@@ -1624,7 +1631,7 @@ Function *FunctionAST::Codegen() {
     BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
   
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   if (Value *RetVal = Body-&gt;Codegen()) {
@@ -1679,7 +1686,7 @@ static void HandleExtern() {
 }
 
 static void HandleTopLevelExpression() {
-  // Evaluate a top level expression into an anonymous function.
+  // Evaluate a top-level expression into an anonymous function.
   if (FunctionAST *F = ParseTopLevelExpr()) {
     if (Function *LF = F-&gt;Codegen()) {
       // JIT the function, returning a function pointer.
@@ -1687,7 +1694,7 @@ static void HandleTopLevelExpression() {
       
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
-      double (*FP)() = (double (*)())FPtr;
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
       fprintf(stderr, "Evaluated to %f\n", FP());
     }
   } else {
@@ -1702,7 +1709,7 @@ static void MainLoop() {
     fprintf(stderr, "ready&gt; ");
     switch (CurTok) {
     case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top level semicolons.
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
     case tok_def:    HandleDefinition(); break;
     case tok_extern: HandleExtern(); break;
     default:         HandleTopLevelExpression(); break;
@@ -1710,8 +1717,6 @@ static void MainLoop() {
   }
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // "Library" functions that can be "extern'd" from user code.
 //===----------------------------------------------------------------------===//
@@ -1735,6 +1740,9 @@ double printd(double X) {
 //===----------------------------------------------------------------------===//
 
 int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
   // Install standard binary operators.
   // 1 is lowest precedence.
   BinopPrecedence['&lt;'] = 10;
@@ -1747,38 +1755,41 @@ int main() {
   getNextToken();
 
   // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit");
-  
-  // Create the JIT.
-  TheExecutionEngine = ExecutionEngine::create(TheModule);
+  TheModule = new Module("my cool jit", Context);
+
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
 
-  {
-    ExistingModuleProvider OurModuleProvider(TheModule);
-    FunctionPassManager OurFPM(&amp;OurModuleProvider);
-      
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-    // Eliminate Common SubExpressions.
-    OurFPM.add(createGVNPass());
-    // Simplify the control flow graph (deleting unreachable blocks, etc).
-    OurFPM.add(createCFGSimplificationPass());
-    // Set the global so the code gen can use this.
-    TheFPM = &amp;OurFPM;
-
-    // Run the main "interpreter loop" now.
-    MainLoop();
-    
-    TheFPM = 0;
-    
-    // Print out all of the generated code.
-    TheModule-&gt;dump();
-  }  // Free module provider (and thus the module) and pass manager.
-  
   return 0;
 }
 </pre>
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
index f5606484eb9f..ec07fa88d4b1 100644
--- a/docs/tutorial/LangImpl7.html
+++ b/docs/tutorial/LangImpl7.html
@@ -424,7 +424,8 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &amp;VarName) {
   IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
                  TheFunction-&gt;getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::DoubleTy, 0, VarName.c_str());
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                           VarName.c_str());
 }
 </pre>
 </div>
@@ -923,7 +924,7 @@ that we replace in OldBindings.</p>
       InitVal = Init-&gt;Codegen();
       if (InitVal == 0) return 0;
     } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(APFloat(0.0));
+      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
     }
     
     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
@@ -1003,11 +1004,15 @@ variables and var/in support.  To build this example, use:
 <pre>
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/IRBuilder.h"
 #include &lt;cstdio&gt;
@@ -1192,7 +1197,8 @@ public:
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
 class PrototypeAST {
   std::string Name;
   std::vector&lt;std::string&gt; Args;
@@ -1234,7 +1240,7 @@ public:
 //===----------------------------------------------------------------------===//
 
 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser it looking at.  getNextToken reads another token from the
+/// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
 static int getNextToken() {
@@ -1282,9 +1288,9 @@ static ExprAST *ParseIdentifierExpr() {
       ExprAST *Arg = ParseExpression();
       if (!Arg) return 0;
       Args.push_back(Arg);
-      
+
       if (CurTok == ')') break;
-      
+
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
       getNextToken();
@@ -1429,7 +1435,6 @@ static ExprAST *ParseVarExpr() {
   return new VarExprAST(VarNames, Body);
 }
 
-
 /// primary
 ///   ::= identifierexpr
 ///   ::= numberexpr
@@ -1515,7 +1520,7 @@ static ExprAST *ParseExpression() {
 static PrototypeAST *ParsePrototype() {
   std::string FnName;
   
-  int Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
   
   switch (CurTok) {
@@ -1605,7 +1610,7 @@ static PrototypeAST *ParseExtern() {
 //===----------------------------------------------------------------------===//
 
 static Module *TheModule;
-static IRBuilder&lt;&gt; Builder;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
 static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
 static FunctionPassManager *TheFPM;
 
@@ -1617,12 +1622,12 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &amp;VarName) {
   IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
                  TheFunction-&gt;getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::DoubleTy, 0, VarName.c_str());
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                           VarName.c_str());
 }
 
-
 Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(APFloat(Val));
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
 Value *VariableExprAST::Codegen() {
@@ -1645,7 +1650,6 @@ Value *UnaryExprAST::Codegen() {
   return Builder.CreateCall(F, OperandV, "unop");
 }
 
-
 Value *BinaryExprAST::Codegen() {
   // Special case '=' because we don't want to emit the LHS as an expression.
   if (Op == '=') {
@@ -1665,7 +1669,6 @@ Value *BinaryExprAST::Codegen() {
     return Val;
   }
   
-  
   Value *L = LHS-&gt;Codegen();
   Value *R = RHS-&gt;Codegen();
   if (L == 0 || R == 0) return 0;
@@ -1677,7 +1680,8 @@ Value *BinaryExprAST::Codegen() {
   case '&lt;':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
   default: break;
   }
   
@@ -1715,16 +1719,16 @@ Value *IfExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(CondV, 
-                                ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                 "ifcond");
   
   Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
   
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create("else");
-  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
   
   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
   
@@ -1752,7 +1756,8 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
   
   PN-&gt;addIncoming(ThenV, ThenBB);
   PN-&gt;addIncoming(ElseV, ElseBB);
@@ -1794,8 +1799,7 @@ Value *ForExprAST::Codegen() {
   
   // Make the new basic block for the loop header, inserting after current
   // block.
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
   
   // Insert an explicit fall through from the current block to the LoopBB.
   Builder.CreateBr(LoopBB);
@@ -1821,7 +1825,7 @@ Value *ForExprAST::Codegen() {
     if (StepVal == 0) return 0;
   } else {
     // If not specified, use 1.0.
-    StepVal = ConstantFP::get(APFloat(1.0));
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
   
   // Compute the end condition.
@@ -1836,12 +1840,11 @@ Value *ForExprAST::Codegen() {
   
   // Convert condition to a bool by comparing equal to 0.0.
   EndCond = Builder.CreateFCmpONE(EndCond, 
-                                  ConstantFP::get(APFloat(0.0)),
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
   
   // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
   
   // Insert the conditional branch into the end of LoopEndBB.
   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@@ -1857,7 +1860,7 @@ Value *ForExprAST::Codegen() {
 
   
   // for expr always returns 0.0.
-  return Constant::getNullValue(Type::DoubleTy);
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Value *VarExprAST::Codegen() {
@@ -1880,7 +1883,7 @@ Value *VarExprAST::Codegen() {
       InitVal = Init-&gt;Codegen();
       if (InitVal == 0) return 0;
     } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(APFloat(0.0));
+      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
     }
     
     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
@@ -1906,11 +1909,12 @@ Value *VarExprAST::Codegen() {
   return BodyVal;
 }
 
-
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector&lt;const Type*&gt; Doubles(Args.size(), Type::DoubleTy);
-  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
   
   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
   
@@ -1959,7 +1963,6 @@ void PrototypeAST::CreateArgumentAllocas(Function *F) {
   }
 }
 
-
 Function *FunctionAST::Codegen() {
   NamedValues.clear();
   
@@ -1972,12 +1975,12 @@ Function *FunctionAST::Codegen() {
     BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
   
   // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
   
   // Add all arguments to the symbol table and create their allocas.
   Proto-&gt;CreateArgumentAllocas(TheFunction);
-  
+
   if (Value *RetVal = Body-&gt;Codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
@@ -2030,7 +2033,7 @@ static void HandleExtern() {
 }
 
 static void HandleTopLevelExpression() {
-  // Evaluate a top level expression into an anonymous function.
+  // Evaluate a top-level expression into an anonymous function.
   if (FunctionAST *F = ParseTopLevelExpr()) {
     if (Function *LF = F-&gt;Codegen()) {
       // JIT the function, returning a function pointer.
@@ -2038,7 +2041,7 @@ static void HandleTopLevelExpression() {
       
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
-      double (*FP)() = (double (*)())FPtr;
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
       fprintf(stderr, "Evaluated to %f\n", FP());
     }
   } else {
@@ -2053,7 +2056,7 @@ static void MainLoop() {
     fprintf(stderr, "ready&gt; ");
     switch (CurTok) {
     case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top level semicolons.
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
     case tok_def:    HandleDefinition(); break;
     case tok_extern: HandleExtern(); break;
     default:         HandleTopLevelExpression(); break;
@@ -2061,8 +2064,6 @@ static void MainLoop() {
   }
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // "Library" functions that can be "extern'd" from user code.
 //===----------------------------------------------------------------------===//
@@ -2086,6 +2087,9 @@ double printd(double X) {
 //===----------------------------------------------------------------------===//
 
 int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
   // Install standard binary operators.
   // 1 is lowest precedence.
   BinopPrecedence['='] = 2;
@@ -2099,42 +2103,43 @@ int main() {
   getNextToken();
 
   // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit");
-  
-  // Create the JIT.
-  TheExecutionEngine = ExecutionEngine::create(TheModule);
+  TheModule = new Module("my cool jit", Context);
 
-  {
-    ExistingModuleProvider OurModuleProvider(TheModule);
-    FunctionPassManager OurFPM(&amp;OurModuleProvider);
-      
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
-    // Promote allocas to registers.
-    OurFPM.add(createPromoteMemoryToRegisterPass());
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-    // Eliminate Common SubExpressions.
-    OurFPM.add(createGVNPass());
-    // Simplify the control flow graph (deleting unreachable blocks, etc).
-    OurFPM.add(createCFGSimplificationPass());
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
 
-    // Set the global so the code gen can use this.
-    TheFPM = &amp;OurFPM;
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Promote allocas to registers.
+  OurFPM.add(createPromoteMemoryToRegisterPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
 
-    // Run the main "interpreter loop" now.
-    MainLoop();
-    
-    TheFPM = 0;
-    
-    // Print out all of the generated code.
-    TheModule-&gt;dump();
-    
-  }  // Free module provider (and thus the module) and pass manager.
-  
   return 0;
 }
 </pre>
diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html
index 9caae435e29e..a59887522ee5 100644
--- a/docs/tutorial/OCamlLangImpl3.html
+++ b/docs/tutorial/OCamlLangImpl3.html
@@ -95,8 +95,8 @@ an undeclared parameter):</p>
 <pre>
 exception Error of string
 
-let the_module = create_module "my cool jit"
-let builder = builder ()
+let the_module = create_module (global_context ()) "my cool jit"
+let builder = builder (global_context ())
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 </pre>
 </div>
@@ -159,7 +159,7 @@ uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
 </div>
 
 <p>References to variables are also quite simple using LLVM.  In the simple
-version of Kaleidoscope, we assume that the variable has already been emited
+version of Kaleidoscope, we assume that the variable has already been emitted
 somewhere and its value is available.  In practice, the only values that can be
 in the <tt>Codegen.named_values</tt> map are function arguments.  This code
 simply checks to see that the specified name is in the map (if not, an unknown
@@ -323,7 +323,7 @@ code above.</p>
 first, we want to allow 'extern'ing a function more than once, as long as the
 prototypes for the externs match (since all arguments have the same type, we
 just have to check that the number of arguments match).  Second, we want to
-allow 'extern'ing a function and then definining a body for it.  This is useful
+allow 'extern'ing a function and then defining a body for it.  This is useful
 when defining mutually recursive functions.</p>
 
 <div class="doc_code">
@@ -899,8 +899,9 @@ open Llvm
 
 exception Error of string
 
-let the_module = create_module "my cool jit"
-let builder = builder ()
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 
 let rec codegen_expr = function
diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html
index ffa85d51dfb7..26f253249bb1 100644
--- a/docs/tutorial/OCamlLangImpl4.html
+++ b/docs/tutorial/OCamlLangImpl4.html
@@ -206,6 +206,8 @@ add a set of optimizations to run.  The code looks like this:</p>
   (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
   add_cfg_simplification the_fpm;
 
+  ignore (PassManager.initialize the_fpm);
+
   (* Run the main "interpreter loop" now. *)
   Toplevel.main_loop the_fpm the_execution_engine stream;
 </pre>
@@ -222,7 +224,7 @@ requires a pointer to the <tt>the_module</tt> (through the
 <tt>the_module_provider</tt>) to construct itself.  Once it is set up, we use a
 series of "add" calls to add a bunch of LLVM passes.  The first pass is
 basically boilerplate, it adds a pass so that later optimizations know how the
-data structures in the program are layed out.  The
+data structures in the program are laid out.  The
 "<tt>the_execution_engine</tt>" variable is related to the JIT, which we will
 get to in the next section.</p>
 
@@ -795,8 +797,9 @@ open Llvm
 
 exception Error of string
 
-let the_module = create_module "my cool jit"
-let builder = builder ()
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 
 let rec codegen_expr = function
@@ -959,6 +962,8 @@ open Llvm_target
 open Llvm_scalar_opts
 
 let main () =
+  ignore (initialize_native_target ());
+
   (* Install standard binary operators.
    * 1 is the lowest precedence. *)
   Hashtbl.add Parser.binop_precedence '&lt;' 10;
@@ -991,6 +996,8 @@ let main () =
   (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
   add_cfg_simplification the_fpm;
 
+  ignore (PassManager.initialize the_fpm);
+
   (* Run the main "interpreter loop" now. *)
   Toplevel.main_loop the_fpm the_execution_engine stream;
 
diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html
index 594a77d16480..f19e900c00bd 100644
--- a/docs/tutorial/OCamlLangImpl5.html
+++ b/docs/tutorial/OCamlLangImpl5.html
@@ -271,8 +271,8 @@ into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
 href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
 see this graph:</p>
 
-<center><img src="LangImpl5-cfg.png" alt="Example CFG" width="423"
-height="315"></center>
+<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423"
+height="315"></div>
 
 <p>Another way to get this is to call "<tt>Llvm_analysis.view_function_cfg
 f</tt>" or "<tt>Llvm_analysis.view_function_cfg_only f</tt>" (where <tt>f</tt>
@@ -1200,8 +1200,9 @@ open Llvm
 
 exception Error of string
 
-let the_module = create_module "my cool jit"
-let builder = builder ()
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 
 let rec codegen_expr = function
@@ -1486,6 +1487,8 @@ open Llvm_target
 open Llvm_scalar_opts
 
 let main () =
+  ignore (initialize_native_target ());
+
   (* Install standard binary operators.
    * 1 is the lowest precedence. *)
   Hashtbl.add Parser.binop_precedence '&lt;' 10;
@@ -1518,6 +1521,8 @@ let main () =
   (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
   add_cfg_simplification the_fpm;
 
+  ignore (PassManager.initialize the_fpm);
+
   (* Run the main "interpreter loop" now. *)
   Toplevel.main_loop the_fpm the_execution_engine stream;
 
diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html
index 780cab819142..2edb22edf632 100644
--- a/docs/tutorial/OCamlLangImpl6.html
+++ b/docs/tutorial/OCamlLangImpl6.html
@@ -1173,8 +1173,9 @@ open Llvm
 
 exception Error of string
 
-let the_module = create_module "my cool jit"
-let builder = builder ()
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 
 let rec codegen_expr = function
@@ -1485,6 +1486,8 @@ open Llvm_target
 open Llvm_scalar_opts
 
 let main () =
+  ignore (initialize_native_target ());
+
   (* Install standard binary operators.
    * 1 is the lowest precedence. *)
   Hashtbl.add Parser.binop_precedence '&lt;' 10;
@@ -1517,6 +1520,8 @@ let main () =
   (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
   add_cfg_simplification the_fpm;
 
+  ignore (PassManager.initialize the_fpm);
+
   (* Run the main "interpreter loop" now. *)
   Toplevel.main_loop the_fpm the_execution_engine stream;
 
diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html
index abda44011cab..07768214b9a5 100644
--- a/docs/tutorial/OCamlLangImpl7.html
+++ b/docs/tutorial/OCamlLangImpl7.html
@@ -1384,14 +1384,15 @@ open Llvm
 
 exception Error of string
 
-let the_module = create_module "my cool jit"
-let builder = builder ()
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 
 (* Create an alloca instruction in the entry block of the function. This
  * is used for mutable variables etc. *)
 let create_entry_block_alloca the_function var_name =
-  let builder = builder_at (instr_begin (entry_block the_function)) in
+  let builder = builder_at context (instr_begin (entry_block the_function)) in
   build_alloca double_type var_name builder
 
 let rec codegen_expr = function
@@ -1815,6 +1816,8 @@ open Llvm_target
 open Llvm_scalar_opts
 
 let main () =
+  ignore (initialize_native_target ());
+
   (* Install standard binary operators.
    * 1 is the lowest precedence. *)
   Hashtbl.add Parser.binop_precedence '=' 2;
@@ -1851,6 +1854,8 @@ let main () =
   (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
   add_cfg_simplification the_fpm;
 
+  ignore (PassManager.initialize the_fpm);
+
   (* Run the main "interpreter loop" now. *)
   Toplevel.main_loop the_fpm the_execution_engine stream;
 
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index d3261d79965c..5cf2b883bc48 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -43,7 +43,7 @@ Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf,
   comflag  = cf;
 
   header(Context);
-  readloop(0, 0, 0);
+  readloop(0, 0, 0, Context);
   delete builder;
   return module;
 }
@@ -54,39 +54,39 @@ void BrainF::header(LLVMContext& C) {
   //Function prototypes
 
   //declare void @llvm.memset.i32(i8 *, i8, i32, i32)
-  const Type *Tys[] = { Type::Int32Ty };
+  const Type *Tys[] = { Type::getInt32Ty(C) };
   Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset,
                                                     Tys, 1);
 
   //declare i32 @getchar()
   getchar_func = cast<Function>(module->
-    getOrInsertFunction("getchar", IntegerType::Int32Ty, NULL));
+    getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL));
 
   //declare i32 @putchar(i32)
   putchar_func = cast<Function>(module->
-    getOrInsertFunction("putchar", IntegerType::Int32Ty,
-                        IntegerType::Int32Ty, NULL));
+    getOrInsertFunction("putchar", IntegerType::getInt32Ty(C),
+                        IntegerType::getInt32Ty(C), NULL));
 
 
   //Function header
 
   //define void @brainf()
   brainf_func = cast<Function>(module->
-    getOrInsertFunction("brainf", Type::VoidTy, NULL));
+    getOrInsertFunction("brainf", Type::getVoidTy(C), NULL));
 
-  builder = new IRBuilder<>(BasicBlock::Create(label, brainf_func));
+  builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func));
 
   //%arr = malloc i8, i32 %d
-  ConstantInt *val_mem = ConstantInt::get(APInt(32, memtotal));
-  ptr_arr = builder->CreateMalloc(IntegerType::Int8Ty, val_mem, "arr");
+  ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal));
+  ptr_arr = builder->CreateMalloc(IntegerType::getInt8Ty(C), val_mem, "arr");
 
   //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1)
   {
     Value *memset_params[] = {
       ptr_arr,
-      ConstantInt::get(APInt(8, 0)),
+      ConstantInt::get(C, APInt(8, 0)),
       val_mem,
-      ConstantInt::get(APInt(32, 1))
+      ConstantInt::get(C, APInt(32, 1))
     };
 
     CallInst *memset_call = builder->
@@ -97,12 +97,12 @@ void BrainF::header(LLVMContext& C) {
   //%arrmax = getelementptr i8 *%arr, i32 %d
   if (comflag & flag_arraybounds) {
     ptr_arrmax = builder->
-      CreateGEP(ptr_arr, ConstantInt::get(APInt(32, memtotal)), "arrmax");
+      CreateGEP(ptr_arr, ConstantInt::get(C, APInt(32, memtotal)), "arrmax");
   }
 
   //%head.%d = getelementptr i8 *%arr, i32 %d
   curhead = builder->CreateGEP(ptr_arr,
-                               ConstantInt::get(APInt(32, memtotal/2)),
+                               ConstantInt::get(C, APInt(32, memtotal/2)),
                                headreg);
 
 
@@ -110,13 +110,13 @@ void BrainF::header(LLVMContext& C) {
   //Function footer
 
   //brainf.end:
-  endbb = BasicBlock::Create(label, brainf_func);
+  endbb = BasicBlock::Create(C, label, brainf_func);
 
   //free i8 *%arr
   new FreeInst(ptr_arr, endbb);
 
   //ret void
-  ReturnInst::Create(endbb);
+  ReturnInst::Create(C, endbb);
 
 
 
@@ -124,28 +124,28 @@ void BrainF::header(LLVMContext& C) {
   if (comflag & flag_arraybounds)
   {
     //@aberrormsg = internal constant [%d x i8] c"\00"
-    Constant *msg_0 = ConstantArray::
-      get("Error: The head has left the tape.", true);
+    Constant *msg_0 =
+      ConstantArray::get(C, "Error: The head has left the tape.", true);
 
     GlobalVariable *aberrormsg = new GlobalVariable(
+      *module,
       msg_0->getType(),
       true,
       GlobalValue::InternalLinkage,
       msg_0,
-      "aberrormsg",
-      module);
+      "aberrormsg");
 
     //declare i32 @puts(i8 *)
     Function *puts_func = cast<Function>(module->
-      getOrInsertFunction("puts", IntegerType::Int32Ty,
-                          PointerType::getUnqual(IntegerType::Int8Ty), NULL));
+      getOrInsertFunction("puts", IntegerType::getInt32Ty(C),
+                      PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL));
 
     //brainf.aberror:
-    aberrorbb = BasicBlock::Create(label, brainf_func);
+    aberrorbb = BasicBlock::Create(C, label, brainf_func);
 
     //call i32 @puts(i8 *getelementptr([%d x i8] *@aberrormsg, i32 0, i32 0))
     {
-      Constant *zero_32 = Constant::getNullValue(IntegerType::Int32Ty);
+      Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C));
 
       Constant *gep_params[] = {
         zero_32,
@@ -172,7 +172,8 @@ void BrainF::header(LLVMContext& C) {
   }
 }
 
-void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
+void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb,
+                      LLVMContext &C) {
   Symbol cursym = SYM_NONE;
   int curvalue = 0;
   Symbol nextsym = SYM_NONE;
@@ -197,7 +198,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
 
           //%tape.%d = trunc i32 %tape.%d to i8
           Value *tape_1 = builder->
-            CreateTrunc(tape_0, IntegerType::Int8Ty, tapereg);
+            CreateTrunc(tape_0, IntegerType::getInt8Ty(C), tapereg);
 
           //store i8 %tape.%d, i8 *%head.%d
           builder->CreateStore(tape_1, curhead);
@@ -211,7 +212,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
 
           //%tape.%d = sext i8 %tape.%d to i32
           Value *tape_1 = builder->
-            CreateSExt(tape_0, IntegerType::Int32Ty, tapereg);
+            CreateSExt(tape_0, IntegerType::getInt32Ty(C), tapereg);
 
           //call i32 @putchar(i32 %tape.%d)
           Value *putchar_params[] = {
@@ -228,7 +229,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
         {
           //%head.%d = getelementptr i8 *%head.%d, i32 %d
           curhead = builder->
-            CreateGEP(curhead, ConstantInt::get(APInt(32, curvalue)),
+            CreateGEP(curhead, ConstantInt::get(C, APInt(32, curvalue)),
                       headreg);
 
           //Error block for array out of bounds
@@ -247,7 +248,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
               CreateOr(test_0, test_1, testreg);
 
             //br i1 %test.%d, label %main.%d, label %main.%d
-            BasicBlock *nextbb = BasicBlock::Create(label, brainf_func);
+            BasicBlock *nextbb = BasicBlock::Create(C, label, brainf_func);
             builder->CreateCondBr(test_2, aberrorbb, nextbb);
 
             //main.%d:
@@ -263,7 +264,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
 
           //%tape.%d = add i8 %tape.%d, %d
           Value *tape_1 = builder->
-            CreateAdd(tape_0, ConstantInt::get(APInt(8, curvalue)), tapereg);
+            CreateAdd(tape_0, ConstantInt::get(C, APInt(8, curvalue)), tapereg);
 
           //store i8 %tape.%d, i8 *%head.%d\n"
           builder->CreateStore(tape_1, curhead);
@@ -273,23 +274,23 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
       case SYM_LOOP:
         {
           //br label %main.%d
-          BasicBlock *testbb = BasicBlock::Create(label, brainf_func);
+          BasicBlock *testbb = BasicBlock::Create(C, label, brainf_func);
           builder->CreateBr(testbb);
 
           //main.%d:
           BasicBlock *bb_0 = builder->GetInsertBlock();
-          BasicBlock *bb_1 = BasicBlock::Create(label, brainf_func);
+          BasicBlock *bb_1 = BasicBlock::Create(C, label, brainf_func);
           builder->SetInsertPoint(bb_1);
 
           // Make part of PHI instruction now, wait until end of loop to finish
           PHINode *phi_0 =
-            PHINode::Create(PointerType::getUnqual(IntegerType::Int8Ty),
+            PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)),
                             headreg, testbb);
           phi_0->reserveOperandSpace(2);
           phi_0->addIncoming(curhead, bb_0);
           curhead = phi_0;
 
-          readloop(phi_0, bb_1, testbb);
+          readloop(phi_0, bb_1, testbb, C);
         }
         break;
 
@@ -427,12 +428,11 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
       LoadInst *tape_0 = new LoadInst(head_0, tapereg, testbb);
 
       //%test.%d = icmp eq i8 %tape.%d, 0
-      ICmpInst *test_0 = new ICmpInst(ICmpInst::ICMP_EQ, tape_0,
-                                      ConstantInt::get(APInt(8, 0)), testreg,
-                                      testbb);
+      ICmpInst *test_0 = new ICmpInst(*testbb, ICmpInst::ICMP_EQ, tape_0,
+                                    ConstantInt::get(C, APInt(8, 0)), testreg);
 
       //br i1 %test.%d, label %main.%d, label %main.%d
-      BasicBlock *bb_0 = BasicBlock::Create(label, brainf_func);
+      BasicBlock *bb_0 = BasicBlock::Create(C, label, brainf_func);
       BranchInst::Create(bb_0, oldbb, test_0, testbb);
 
       //main.%d:
@@ -440,7 +440,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) {
 
       //%head.%d = phi i8 *[%head.%d, %main.%d]
       PHINode *phi_1 = builder->
-        CreatePHI(PointerType::getUnqual(IntegerType::Int8Ty), headreg);
+        CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg);
       phi_1->reserveOperandSpace(1);
       phi_1->addIncoming(head_0, testbb);
       curhead = phi_1;
diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h
index 053ddaa58755..add0687d54a6 100644
--- a/examples/BrainF/BrainF.h
+++ b/examples/BrainF/BrainF.h
@@ -70,7 +70,8 @@ class BrainF {
 
     /// The main loop for parsing.  It calls itself recursively
     /// to handle the depth of nesting of "[]".
-    void readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb);
+    void readloop(PHINode *phi, BasicBlock *oldbb,
+                  BasicBlock *testbb, LLVMContext &Context);
 
     /// Constants during parsing
     int memtotal;
diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp
index 4eaa4940e70a..6f4ba69927ce 100644
--- a/examples/BrainF/BrainFDriver.cpp
+++ b/examples/BrainF/BrainFDriver.cpp
@@ -32,11 +32,12 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/TargetSelect.h"
-#include <fstream>
+#include "llvm/Support/raw_ostream.h"
 #include <iostream>
+#include <fstream>
 using namespace llvm;
 
 //Command line options
@@ -58,9 +59,10 @@ JIT("jit", cl::desc("Run program Just-In-Time"));
 void addMainFunction(Module *mod) {
   //define i32 @main(i32 %argc, i8 **%argv)
   Function *main_func = cast<Function>(mod->
-    getOrInsertFunction("main", IntegerType::Int32Ty, IntegerType::Int32Ty,
+    getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()),
+                        IntegerType::getInt32Ty(mod->getContext()),
                         PointerType::getUnqual(PointerType::getUnqual(
-                          IntegerType::Int8Ty)), NULL));
+                          IntegerType::getInt8Ty(mod->getContext()))), NULL));
   {
     Function::arg_iterator args = main_func->arg_begin();
     Value *arg_0 = args++;
@@ -70,7 +72,7 @@ void addMainFunction(Module *mod) {
   }
 
   //main.0:
-  BasicBlock *bb = BasicBlock::Create("main.0", main_func);
+  BasicBlock *bb = BasicBlock::Create(mod->getContext(), "main.0", main_func);
 
   //call void @brainf()
   {
@@ -80,59 +82,58 @@ void addMainFunction(Module *mod) {
   }
 
   //ret i32 0
-  ReturnInst::Create(ConstantInt::get(APInt(32, 0)), bb);
+  ReturnInst::Create(mod->getContext(),
+                     ConstantInt::get(mod->getContext(), APInt(32, 0)), bb);
 }
 
 int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, " BrainF compiler\n");
 
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
 
   if (InputFilename == "") {
-    std::cerr<<"Error: You must specify the filename of the program to "
+    errs() << "Error: You must specify the filename of the program to "
     "be compiled.  Use --help to see the options.\n";
     abort();
   }
 
   //Get the output stream
-  std::ostream *out = &std::cout;
+  raw_ostream *out = &outs();
   if (!JIT) {
     if (OutputFilename == "") {
       std::string base = InputFilename;
-      if (InputFilename == "-") {base = "a";}
+      if (InputFilename == "-") { base = "a"; }
 
-      //Use default filename
-      const char *suffix = ".bc";
-      OutputFilename = base+suffix;
+      // Use default filename.
+      OutputFilename = base+".bc";
     }
     if (OutputFilename != "-") {
-      out = new std::
-        ofstream(OutputFilename.c_str(),
-                 std::ios::out | std::ios::trunc | std::ios::binary);
+      std::string ErrInfo;
+      out = new raw_fd_ostream(OutputFilename.c_str(), ErrInfo,
+                               raw_fd_ostream::F_Binary);
     }
   }
 
   //Get the input stream
   std::istream *in = &std::cin;
-  if (InputFilename != "-") {
+  if (InputFilename != "-")
     in = new std::ifstream(InputFilename.c_str());
-  }
 
   //Gather the compile flags
   BrainF::CompileFlags cf = BrainF::flag_off;
-  if (ArrayBoundsChecking) {
+  if (ArrayBoundsChecking)
     cf = BrainF::CompileFlags(cf | BrainF::flag_arraybounds);
-  }
 
   //Read the BrainF program
   BrainF bf;
   Module *mod = bf.parse(in, 65536, cf, Context); //64 KiB
-  if (in != &std::cin) {delete in;}
+  if (in != &std::cin)
+    delete in;
   addMainFunction(mod);
 
   //Verify generated code
   if (verifyModule(*mod)) {
-    std::cerr<<"Error: module failed verification.  This shouldn't happen.\n";
+    errs() << "Error: module failed verification.  This shouldn't happen.\n";
     abort();
   }
 
@@ -140,9 +141,8 @@ int main(int argc, char **argv) {
   if (JIT) {
     InitializeNativeTarget();
 
-    std::cout << "------- Running JIT -------\n";
-    ExistingModuleProvider *mp = new ExistingModuleProvider(mod);
-    ExecutionEngine *ee = ExecutionEngine::create(mp, false);
+    outs() << "------- Running JIT -------\n";
+    ExecutionEngine *ee = EngineBuilder(mod).create();
     std::vector<GenericValue> args;
     Function *brainf_func = mod->getFunction("brainf");
     GenericValue gv = ee->runFunction(brainf_func, args);
@@ -151,7 +151,8 @@ int main(int argc, char **argv) {
   }
 
   //Clean up
-  if (out != &std::cout) {delete out;}
+  if (out != &outs())
+    delete out;
   delete mod;
 
   llvm_shutdown();
diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp
index c3431fc3527e..b1a4691a9f6c 100644
--- a/examples/Fibonacci/fibonacci.cpp
+++ b/examples/Fibonacci/fibonacci.cpp
@@ -36,35 +36,36 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static Function *CreateFibFunction(Module *M) {
+static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
   // Create the fib function and insert it into module M.  This function is said
   // to return an int and take an int parameter.
   Function *FibF =
-    cast<Function>(M->getOrInsertFunction("fib", Type::Int32Ty, Type::Int32Ty,
+    cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), 
+                                          Type::getInt32Ty(Context),
                                           (Type *)0));
 
   // Add a basic block to the function.
-  BasicBlock *BB = BasicBlock::Create("EntryBlock", FibF);
+  BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF);
 
   // Get pointers to the constants.
-  Value *One = ConstantInt::get(Type::Int32Ty, 1);
-  Value *Two = ConstantInt::get(Type::Int32Ty, 2);
+  Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2);
 
   // Get pointer to the integer argument of the add1 function...
   Argument *ArgX = FibF->arg_begin();   // Get the arg.
   ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
 
   // Create the true_block.
-  BasicBlock *RetBB = BasicBlock::Create("return", FibF);
+  BasicBlock *RetBB = BasicBlock::Create(Context, "return", FibF);
   // Create an exit block.
-  BasicBlock* RecurseBB = BasicBlock::Create("recurse", FibF);
+  BasicBlock* RecurseBB = BasicBlock::Create(Context, "recurse", FibF);
 
   // Create the "if (arg <= 2) goto exitbb"
-  Value *CondInst = new ICmpInst(ICmpInst::ICMP_SLE, ArgX, Two, "cond", BB);
+  Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond");
   BranchInst::Create(RetBB, RecurseBB, CondInst, BB);
 
   // Create: ret int 1
-  ReturnInst::Create(One, RetBB);
+  ReturnInst::Create(Context, One, RetBB);
 
   // create fib(x-1)
   Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB);
@@ -82,7 +83,7 @@ static Function *CreateFibFunction(Module *M) {
                                          "addresult", RecurseBB);
 
   // Create the return instruction and add it to the basic block
-  ReturnInst::Create(Sum, RecurseBB);
+  ReturnInst::Create(Context, Sum, RecurseBB);
 
   return FibF;
 }
@@ -97,11 +98,10 @@ int main(int argc, char **argv) {
   Module *M = new Module("test", Context);
 
   // We are about to create the "fib" function:
-  Function *FibF = CreateFibFunction(M);
+  Function *FibF = CreateFibFunction(M, Context);
 
   // Now we going to create JIT
-  ExistingModuleProvider *MP = new ExistingModuleProvider(M);
-  ExecutionEngine *EE = ExecutionEngine::create(MP, false);
+  ExecutionEngine *EE = EngineBuilder(M).create();
 
   errs() << "verifying... ";
   if (verifyModule(*M)) {
diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp
index 673454791624..ec9c2e68541f 100644
--- a/examples/HowToUseJIT/HowToUseJIT.cpp
+++ b/examples/HowToUseJIT/HowToUseJIT.cpp
@@ -61,15 +61,16 @@ int main() {
   // function will have a return type of "int" and take an argument of "int".
   // The '0' terminates the list of argument types.
   Function *Add1F =
-    cast<Function>(M->getOrInsertFunction("add1", Type::Int32Ty, Type::Int32Ty,
+    cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context),
+                                          Type::getInt32Ty(Context),
                                           (Type *)0));
 
   // Add a basic block to the function. As before, it automatically inserts
   // because of the last argument.
-  BasicBlock *BB = BasicBlock::Create("EntryBlock", Add1F);
+  BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", Add1F);
 
   // Get pointers to the constant `1'.
-  Value *One = ConstantInt::get(Type::Int32Ty, 1);
+  Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1);
 
   // Get pointers to the integer argument of the add1 function...
   assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg
@@ -80,7 +81,7 @@ int main() {
   Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB);
 
   // Create the return instruction and add it to the basic block
-  ReturnInst::Create(Add, BB);
+  ReturnInst::Create(Context, Add, BB);
 
   // Now, function add1 is ready.
 
@@ -88,24 +89,24 @@ int main() {
   // Now we going to create function `foo', which returns an int and takes no
   // arguments.
   Function *FooF =
-    cast<Function>(M->getOrInsertFunction("foo", Type::Int32Ty, (Type *)0));
+    cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context),
+                                          (Type *)0));
 
   // Add a basic block to the FooF function.
-  BB = BasicBlock::Create("EntryBlock", FooF);
+  BB = BasicBlock::Create(Context, "EntryBlock", FooF);
 
   // Get pointers to the constant `10'.
-  Value *Ten = ConstantInt::get(Type::Int32Ty, 10);
+  Value *Ten = ConstantInt::get(Type::getInt32Ty(Context), 10);
 
   // Pass Ten to the call call:
   CallInst *Add1CallRes = CallInst::Create(Add1F, Ten, "add1", BB);
   Add1CallRes->setTailCall(true);
 
   // Create the return instruction and add it to the basic block.
-  ReturnInst::Create(Add1CallRes, BB);
+  ReturnInst::Create(Context, Add1CallRes, BB);
 
   // Now we create the JIT.
-  ExistingModuleProvider* MP = new ExistingModuleProvider(M);
-  ExecutionEngine* EE = ExecutionEngine::create(MP, false);
+  ExecutionEngine* EE = EngineBuilder(M).create();
 
   outs() << "We just constructed this LLVM module:\n\n" << *M;
   outs() << "\n\nRunning foo: ";
diff --git a/examples/Kaleidoscope/CMakeLists.txt b/examples/Kaleidoscope/CMakeLists.txt
index 9a18aae30534..8c87ac50b7a4 100644
--- a/examples/Kaleidoscope/CMakeLists.txt
+++ b/examples/Kaleidoscope/CMakeLists.txt
@@ -1,5 +1,6 @@
-set(LLVM_LINK_COMPONENTS core jit native)
-
-add_llvm_example(Kaleidoscope
-  toy.cpp
-  )
+add_subdirectory(Chapter2)
+add_subdirectory(Chapter3)
+add_subdirectory(Chapter4)
+add_subdirectory(Chapter5)
+add_subdirectory(Chapter6)
+add_subdirectory(Chapter7)
diff --git a/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/examples/Kaleidoscope/Chapter2/CMakeLists.txt
new file mode 100644
index 000000000000..79f2b172d0df
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter2/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_example(Kaleidoscope-Ch2
+  toy.cpp
+  )
diff --git a/examples/Kaleidoscope/Chapter2/Makefile b/examples/Kaleidoscope/Chapter2/Makefile
new file mode 100644
index 000000000000..1a9b94ce541e
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter2/Makefile
@@ -0,0 +1,13 @@
+##===- examples/Kaleidoscope/Chapter2/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch2
+EXAMPLE_TOOL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp
new file mode 100644
index 000000000000..f4f09d0b351a
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter2/toy.cpp
@@ -0,0 +1,398 @@
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <map>
+#include <vector>
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (ParseDefinition()) {
+    fprintf(stderr, "Parsed a function definition.\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (ParseExtern()) {
+    fprintf(stderr, "Parsed an extern\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (ParseTopLevelExpr()) {
+    fprintf(stderr, "Parsed a top-level expr\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
diff --git a/examples/Kaleidoscope/Chapter3/CMakeLists.txt b/examples/Kaleidoscope/Chapter3/CMakeLists.txt
new file mode 100644
index 000000000000..1af8db00a172
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter3/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core)
+
+add_llvm_example(Kaleidoscope-Ch3
+  toy.cpp
+  )
diff --git a/examples/Kaleidoscope/Chapter3/Makefile b/examples/Kaleidoscope/Chapter3/Makefile
new file mode 100644
index 000000000000..4cc6948d8037
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter3/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter3/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch3
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core
+
+include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp
new file mode 100644
index 000000000000..73520d8fa953
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -0,0 +1,563 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateAdd(L, R, "addtmp");
+  case '-': return Builder.CreateSub(L, R, "subtmp");
+  case '*': return Builder.CreateMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+	std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read top-level expression:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt
new file mode 100644
index 000000000000..0d1ac533f02d
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch4
+  toy.cpp
+  )
diff --git a/examples/Kaleidoscope/Chapter4/Makefile b/examples/Kaleidoscope/Chapter4/Makefile
new file mode 100644
index 000000000000..7bc742fb1e44
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter4/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter4/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch4
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit interpreter native
+
+include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
new file mode 100644
index 000000000000..d136635e8114
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -0,0 +1,610 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateAdd(L, R, "addtmp");
+  case '-': return Builder.CreateSub(L, R, "subtmp");
+  case '*': return Builder.CreateMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+	std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt
new file mode 100644
index 000000000000..2d75ad35923f
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch5
+  toy.cpp
+  )
diff --git a/examples/Kaleidoscope/Chapter5/Makefile b/examples/Kaleidoscope/Chapter5/Makefile
new file mode 100644
index 000000000000..5a8355d3153d
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter5/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter5/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch5
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit interpreter native
+
+include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
new file mode 100644
index 000000000000..c2613e36029b
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -0,0 +1,855 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateAdd(L, R, "addtmp");
+  case '-': return Builder.CreateSub(L, R, "subtmp");
+  case '*': return Builder.CreateMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond->Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then->Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else->Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   ...
+  //   start = startexpr
+  //   goto loop
+  // loop: 
+  //   variable = phi [start, loopheader], [nextvariable, loopend]
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   nextvariable = variable + step
+  //   endcond = endexpr
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable->addIncoming(StartVal, PreheaderBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body->Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step->Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End->Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+	std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt
new file mode 100644
index 000000000000..2e15a5f7dfc6
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch6
+  toy.cpp
+  )
diff --git a/examples/Kaleidoscope/Chapter6/Makefile b/examples/Kaleidoscope/Chapter6/Makefile
new file mode 100644
index 000000000000..de2d758728eb
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter6/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter6/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch6
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit interpreter native
+
+include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
new file mode 100644
index 000000000000..638a340d51ae
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -0,0 +1,973 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10,
+  
+  // operators
+  tok_binary = -11, tok_unary = -12
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+               bool isoperator = false, unsigned prec = 0)
+  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+  
+  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParseUnary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand->Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateAdd(L, R, "addtmp");
+  case '-': return Builder.CreateSub(L, R, "subtmp");
+  case '*': return Builder.CreateMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: break;
+  }
+  
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule->getFunction(std::string("binary")+Op);
+  assert(F && "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond->Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then->Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else->Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   ...
+  //   start = startexpr
+  //   goto loop
+  // loop: 
+  //   variable = phi [start, loopheader], [nextvariable, loopend]
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   nextvariable = variable + step
+  //   endcond = endexpr
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable->addIncoming(StartVal, PreheaderBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body->Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step->Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End->Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+	std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // If this is an operator, install it.
+  if (Proto->isBinaryOp())
+    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (Proto->isBinaryOp())
+    BinopPrecedence.erase(Proto->getOperatorName());
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" 
+double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
new file mode 100644
index 000000000000..9b8227c69340
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch7
+  toy.cpp
+  )
diff --git a/examples/Kaleidoscope/Chapter7/Makefile b/examples/Kaleidoscope/Chapter7/Makefile
new file mode 100644
index 000000000000..9d2df6f02d07
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter7/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter7/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch7
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit interpreter native
+
+include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
new file mode 100644
index 000000000000..8b0c321c06c5
--- /dev/null
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -0,0 +1,1139 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10,
+  
+  // operators
+  tok_binary = -11, tok_unary = -12,
+  
+  // var definition
+  tok_var = -13
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    if (IdentifierStr == "var") return tok_var;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  const std::string &getName() const { return Name; }
+  virtual Value *Codegen();
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector<std::pair<std::string, ExprAST*> > VarNames;
+  ExprAST *Body;
+public:
+  VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
+             ExprAST *body)
+  : VarNames(varnames), Body(body) {}
+  
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its argument names as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+               bool isoperator = false, unsigned prec = 0)
+  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+  
+  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  
+  Function *Codegen();
+  
+  void CreateArgumentAllocas(Function *F);
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// varexpr ::= 'var' identifier ('=' expression)? 
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static ExprAST *ParseVarExpr() {
+  getNextToken();  // eat the var.
+
+  std::vector<std::pair<std::string, ExprAST*> > VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after var");
+  
+  while (1) {
+    std::string Name = IdentifierStr;
+    getNextToken();  // eat identifier.
+
+    // Read the optional initializer.
+    ExprAST *Init = 0;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+      
+      Init = ParseExpression();
+      if (Init == 0) return 0;
+    }
+    
+    VarNames.push_back(std::make_pair(Name, Init));
+    
+    // End of var list, exit loop.
+    if (CurTok != ',') break;
+    getNextToken(); // eat the ','.
+    
+    if (CurTok != tok_identifier)
+      return Error("expected identifier list after var");
+  }
+  
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return Error("expected 'in' keyword after 'var'");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+  
+  return new VarExprAST(VarNames, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  case tok_var:        return ParseVarExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParseUnary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, AllocaInst*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          const std::string &VarName) {
+  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                 TheFunction->getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                           VarName.c_str());
+}
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (V == 0) return ErrorV("Unknown variable name");
+
+  // Load the value.
+  return Builder.CreateLoad(V, Name.c_str());
+}
+
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand->Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::Codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+    if (!LHSE)
+      return ErrorV("destination of '=' must be a variable");
+    // Codegen the RHS.
+    Value *Val = RHS->Codegen();
+    if (Val == 0) return 0;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE->getName()];
+    if (Variable == 0) return ErrorV("Unknown variable name");
+
+    Builder.CreateStore(Val, Variable);
+    return Val;
+  }
+  
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateAdd(L, R, "addtmp");
+  case '-': return Builder.CreateSub(L, R, "subtmp");
+  case '*': return Builder.CreateMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: break;
+  }
+  
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule->getFunction(std::string("binary")+Op);
+  assert(F && "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond->Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then->Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else->Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   var = alloca double
+  //   ...
+  //   start = startexpr
+  //   store start -> var
+  //   goto loop
+  // loop: 
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   endcond = endexpr
+  //
+  //   curvar = load var
+  //   nextvar = curvar + step
+  //   store nextvar -> var
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+  // Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Store the value into the alloca.
+  Builder.CreateStore(StartVal, Alloca);
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  AllocaInst *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Alloca;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body->Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step->Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  // Compute the end condition.
+  Value *EndCond = End->Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
+  Value *NextVar = Builder.CreateAdd(CurVar, StepVal, "nextvar");
+  Builder.CreateStore(NextVar, Alloca);
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Value *VarExprAST::Codegen() {
+  std::vector<AllocaInst *> OldBindings;
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second;
+    
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init->Codegen();
+      if (InitVal == 0) return 0;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+    }
+    
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder.CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+    
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+  
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body->Codegen();
+  if (BodyVal == 0) return 0;
+  
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<const Type*> Doubles(Args.size(), 
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx)
+    AI->setName(Args[Idx]);
+    
+  return F;
+}
+
+/// CreateArgumentAllocas - Create an alloca for each argument and register the
+/// argument in the symbol table so that references to it will succeed.
+void PrototypeAST::CreateArgumentAllocas(Function *F) {
+  Function::arg_iterator AI = F->arg_begin();
+  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+    // Store the initial value into the alloca.
+    Builder.CreateStore(AI, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = Alloca;
+  }
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // If this is an operator, install it.
+  if (Proto->isBinaryOp())
+    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  // Add all arguments to the symbol table and create their allocas.
+  Proto->CreateArgumentAllocas(TheFunction);
+
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (Proto->isBinaryOp())
+    BinopPrecedence.erase(Proto->getOperatorName());
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" 
+double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['='] = 2;
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  ExistingModuleProvider *OurModuleProvider =
+      new ExistingModuleProvider(TheModule);
+
+  // Create the JIT.  This takes ownership of the module and module provider.
+  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
+
+  FunctionPassManager OurFPM(OurModuleProvider);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Promote allocas to registers.
+  OurFPM.add(createPromoteMemoryToRegisterPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/examples/Kaleidoscope/Makefile b/examples/Kaleidoscope/Makefile
index c1759a09fe32..bd0c252c2c03 100644
--- a/examples/Kaleidoscope/Makefile
+++ b/examples/Kaleidoscope/Makefile
@@ -6,10 +6,10 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
-TOOLNAME = Kaleidoscope
-EXAMPLE_TOOL = 1
+LEVEL=../..
 
-LINK_COMPONENTS := core jit native
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7
 
 include $(LEVEL)/Makefile.common
diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp
index 59a86d031d2f..6bc52c12a034 100644
--- a/examples/ModuleMaker/ModuleMaker.cpp
+++ b/examples/ModuleMaker/ModuleMaker.cpp
@@ -19,7 +19,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include <iostream>
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 int main() {
@@ -30,7 +30,8 @@ int main() {
   Module *M = new Module("test", Context);
 
   // Create the main function: first create the type 'int ()'
-  FunctionType *FT = FunctionType::get(Type::Int32Ty, /*not vararg*/false);
+  FunctionType *FT =
+    FunctionType::get(Type::getInt32Ty(Context), /*not vararg*/false);
 
   // By passing a module as the last parameter to the Function constructor,
   // it automatically gets appended to the Module.
@@ -38,11 +39,11 @@ int main() {
 
   // Add a basic block to the function... again, it automatically inserts
   // because of the last argument.
-  BasicBlock *BB = BasicBlock::Create("EntryBlock", F);
+  BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", F);
 
   // Get pointers to the constant integers...
-  Value *Two = ConstantInt::get(Type::Int32Ty, 2);
-  Value *Three = ConstantInt::get(Type::Int32Ty, 3);
+  Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2);
+  Value *Three = ConstantInt::get(Type::getInt32Ty(Context), 3);
 
   // Create the add instruction... does not insert...
   Instruction *Add = BinaryOperator::Create(Instruction::Add, Two, Three,
@@ -52,10 +53,10 @@ int main() {
   BB->getInstList().push_back(Add);
 
   // Create the return instruction and add it to the basic block
-  BB->getInstList().push_back(ReturnInst::Create(Add));
+  BB->getInstList().push_back(ReturnInst::Create(Context, Add));
 
   // Output the bitcode file to stdout
-  WriteBitcodeToFile(M, std::cout);
+  WriteBitcodeToFile(M, outs());
 
   // Delete the module and all of its contents.
   delete M;
diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt
index d8dd7084c40d..fbdc6e5fc10b 100644
--- a/examples/ParallelJIT/CMakeLists.txt
+++ b/examples/ParallelJIT/CMakeLists.txt
@@ -4,4 +4,6 @@ add_llvm_example(ParallelJIT
   ParallelJIT.cpp
   )
 
-target_link_libraries(ParallelJIT pthread)
+if(HAVE_LIBPTHREAD)
+  target_link_libraries(ParallelJIT pthread)
+endif(HAVE_LIBPTHREAD)
diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp
index eadd0f58e5d5..be40a282150d 100644
--- a/examples/ParallelJIT/ParallelJIT.cpp
+++ b/examples/ParallelJIT/ParallelJIT.cpp
@@ -36,15 +36,17 @@ static Function* createAdd1(Module *M) {
   // function will have a return type of "int" and take an argument of "int".
   // The '0' terminates the list of argument types.
   Function *Add1F =
-    cast<Function>(M->getOrInsertFunction("add1", Type::Int32Ty, Type::Int32Ty,
+    cast<Function>(M->getOrInsertFunction("add1",
+                                          Type::getInt32Ty(M->getContext()),
+                                          Type::getInt32Ty(M->getContext()),
                                           (Type *)0));
 
   // Add a basic block to the function. As before, it automatically inserts
   // because of the last argument.
-  BasicBlock *BB = BasicBlock::Create("EntryBlock", Add1F);
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", Add1F);
 
   // Get pointers to the constant `1'.
-  Value *One = ConstantInt::get(Type::Int32Ty, 1);
+  Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
 
   // Get pointers to the integer argument of the add1 function...
   assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg
@@ -55,7 +57,7 @@ static Function* createAdd1(Module *M) {
   Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB);
 
   // Create the return instruction and add it to the basic block
-  ReturnInst::Create(Add, BB);
+  ReturnInst::Create(M->getContext(), Add, BB);
 
   // Now, function add1 is ready.
   return Add1F;
@@ -65,31 +67,33 @@ static Function *CreateFibFunction(Module *M) {
   // Create the fib function and insert it into module M.  This function is said
   // to return an int and take an int parameter.
   Function *FibF = 
-    cast<Function>(M->getOrInsertFunction("fib", Type::Int32Ty, Type::Int32Ty,
+    cast<Function>(M->getOrInsertFunction("fib",
+                                          Type::getInt32Ty(M->getContext()),
+                                          Type::getInt32Ty(M->getContext()),
                                           (Type *)0));
 
   // Add a basic block to the function.
-  BasicBlock *BB = BasicBlock::Create("EntryBlock", FibF);
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF);
 
   // Get pointers to the constants.
-  Value *One = ConstantInt::get(Type::Int32Ty, 1);
-  Value *Two = ConstantInt::get(Type::Int32Ty, 2);
+  Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
+  Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2);
 
   // Get pointer to the integer argument of the add1 function...
   Argument *ArgX = FibF->arg_begin();   // Get the arg.
   ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
 
   // Create the true_block.
-  BasicBlock *RetBB = BasicBlock::Create("return", FibF);
+  BasicBlock *RetBB = BasicBlock::Create(M->getContext(), "return", FibF);
   // Create an exit block.
-  BasicBlock* RecurseBB = BasicBlock::Create("recurse", FibF);
+  BasicBlock* RecurseBB = BasicBlock::Create(M->getContext(), "recurse", FibF);
 
   // Create the "if (arg < 2) goto exitbb"
-  Value *CondInst = new ICmpInst(ICmpInst::ICMP_SLE, ArgX, Two, "cond", BB);
+  Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond");
   BranchInst::Create(RetBB, RecurseBB, CondInst, BB);
 
   // Create: ret int 1
-  ReturnInst::Create(One, RetBB);
+  ReturnInst::Create(M->getContext(), One, RetBB);
 
   // create fib(x-1)
   Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB);
@@ -104,7 +108,7 @@ static Function *CreateFibFunction(Module *M) {
     BinaryOperator::CreateAdd(CallFibX1, CallFibX2, "addresult", RecurseBB);
 
   // Create the return instruction and add it to the basic block
-  ReturnInst::Create(Sum, RecurseBB);
+  ReturnInst::Create(M->getContext(), Sum, RecurseBB);
 
   return FibF;
 }
@@ -242,8 +246,7 @@ int main() {
   Function* fibF = CreateFibFunction( M );
 
   // Now we create the JIT.
-  ExistingModuleProvider* MP = new ExistingModuleProvider(M);
-  ExecutionEngine* EE = ExecutionEngine::create(MP, false);
+  ExecutionEngine* EE = EngineBuilder(M).create();
 
   //~ std::cout << "We just constructed this LLVM module:\n\n" << *M;
   //~ std::cout << "\n\nRunning foo: " << std::flush;
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index 5dce27eb23b8..a184f609d6ba 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -32,8 +32,8 @@ extern "C" {
 int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
                      LLVMModuleRef *OutModule, char **OutMessage);
 
-int LLVMParseBitcodeInContext(LLVMMemoryBufferRef MemBuf,
-                              LLVMContextRef ContextRef,
+int LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                              LLVMMemoryBufferRef MemBuf,
                               LLVMModuleRef *OutModule, char **OutMessage);
 
 /* Reads a module from the specified path, returning via the OutMP parameter
@@ -43,8 +43,8 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
                                  LLVMModuleProviderRef *OutMP,
                                  char **OutMessage);
 
-int LLVMGetBitcodeModuleProviderInContext(LLVMMemoryBufferRef MemBuf,
-                                          LLVMContextRef ContextRef,
+int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                          LLVMMemoryBufferRef MemBuf,
                                           LLVMModuleProviderRef *OutMP,
                                           char **OutMessage);
 
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 3538c0837187..74c170928f09 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -33,6 +33,8 @@
 #ifndef LLVM_C_CORE_H
 #define LLVM_C_CORE_H
 
+#include "llvm/Support/DataTypes.h"
+
 #ifdef __cplusplus
 
 /* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' 
@@ -49,7 +51,7 @@ extern "C" {
 /**
  * The top-level container for all LLVM global data.  See the LLVMContext class.
  */
-typedef struct LLVMCtxt *LLVMContextRef;
+typedef struct LLVMOpaqueContext *LLVMContextRef;
 
 /**
  * The top-level container for all other LLVM Intermediate Representation (IR)
@@ -87,6 +89,12 @@ typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
 /** See the llvm::PassManagerBase class. */
 typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
 
+/**
+ * Used to iterate through the uses of a Value, allowing access to all Values
+ * that use this Value.  See the llvm::Use and llvm::value_use_iterator classes.
+ */
+typedef struct LLVMOpaqueUseIterator *LLVMUseIteratorRef;
+
 typedef enum {
     LLVMZExtAttribute       = 1<<0,
     LLVMSExtAttribute       = 1<<1,
@@ -98,10 +106,76 @@ typedef enum {
     LLVMByValAttribute      = 1<<7,
     LLVMNestAttribute       = 1<<8,
     LLVMReadNoneAttribute   = 1<<9,
-    LLVMReadOnlyAttribute   = 1<<10
+    LLVMReadOnlyAttribute   = 1<<10,
+    LLVMNoInlineAttribute   = 1<<11,
+    LLVMAlwaysInlineAttribute    = 1<<12,
+    LLVMOptimizeForSizeAttribute = 1<<13,
+    LLVMStackProtectAttribute    = 1<<14,
+    LLVMStackProtectReqAttribute = 1<<15,
+    LLVMNoCaptureAttribute  = 1<<21,
+    LLVMNoRedZoneAttribute  = 1<<22,
+    LLVMNoImplicitFloatAttribute = 1<<23,
+    LLVMNakedAttribute      = 1<<24,
+    LLVMInlineHintAttribute = 1<<25
 } LLVMAttribute;
 
 typedef enum {
+  LLVMRet            = 1,
+  LLVMBr             = 2,
+  LLVMSwitch         = 3,
+  LLVMInvoke         = 4,
+  LLVMUnwind         = 5,
+  LLVMUnreachable    = 6,
+  LLVMAdd            = 7,
+  LLVMFAdd           = 8,
+  LLVMSub            = 9,
+  LLVMFSub           = 10,
+  LLVMMul            = 11,
+  LLVMFMul           = 12,
+  LLVMUDiv           = 13,
+  LLVMSDiv           = 14,
+  LLVMFDiv           = 15,
+  LLVMURem           = 16,
+  LLVMSRem           = 17,
+  LLVMFRem           = 18,
+  LLVMShl            = 19,
+  LLVMLShr           = 20,
+  LLVMAShr           = 21,
+  LLVMAnd            = 22,
+  LLVMOr             = 23,
+  LLVMXor            = 24,
+  LLVMMalloc         = 25,
+  LLVMFree           = 26,
+  LLVMAlloca         = 27,
+  LLVMLoad           = 28,
+  LLVMStore          = 29,
+  LLVMGetElementPtr  = 30,
+  LLVMTrunk          = 31,
+  LLVMZExt           = 32,
+  LLVMSExt           = 33,
+  LLVMFPToUI         = 34,
+  LLVMFPToSI         = 35,
+  LLVMUIToFP         = 36,
+  LLVMSIToFP         = 37,
+  LLVMFPTrunc        = 38,
+  LLVMFPExt          = 39,
+  LLVMPtrToInt       = 40,
+  LLVMIntToPtr       = 41,
+  LLVMBitCast        = 42,
+  LLVMICmp           = 43,
+  LLVMFCmp           = 44,
+  LLVMPHI            = 45,
+  LLVMCall           = 46,
+  LLVMSelect         = 47,
+  LLVMVAArg          = 50,
+  LLVMExtractElement = 51,
+  LLVMInsertElement  = 52,
+  LLVMShuffleVector  = 53,
+  LLVMExtractValue   = 54,
+  LLVMInsertValue    = 55
+} LLVMOpcode;
+
+typedef enum {
   LLVMVoidTypeKind,        /**< type with no size */
   LLVMFloatTypeKind,       /**< 32 bit floating point type */
   LLVMDoubleTypeKind,      /**< 64 bit floating point type */
@@ -115,7 +189,8 @@ typedef enum {
   LLVMArrayTypeKind,       /**< Arrays */
   LLVMPointerTypeKind,     /**< Pointers */
   LLVMOpaqueTypeKind,      /**< Opaque: type with unknown structure */
-  LLVMVectorTypeKind       /**< SIMD 'packed' format, or other vector type */
+  LLVMVectorTypeKind,      /**< SIMD 'packed' format, or other vector type */
+  LLVMMetadataTypeKind     /**< Metadata */
 } LLVMTypeKind;
 
 typedef enum {
@@ -136,7 +211,8 @@ typedef enum {
   LLVMExternalWeakLinkage,/**< ExternalWeak linkage description */
   LLVMGhostLinkage,       /**< Stand-in functions for streaming fns from
                                bitcode */
-  LLVMCommonLinkage       /**< Tentative definitions */
+  LLVMCommonLinkage,      /**< Tentative definitions */
+  LLVMLinkerPrivateLinkage /**< Like Private, but linker removes. */
 } LLVMLinkage;
 
 typedef enum {
@@ -194,8 +270,8 @@ void LLVMDisposeMessage(char *Message);
 /*===-- Modules -----------------------------------------------------------===*/
 
 /* Create and destroy contexts. */
-LLVMContextRef LLVMContextCreate();
-LLVMContextRef LLVMGetGlobalContext();
+LLVMContextRef LLVMContextCreate(void);
+LLVMContextRef LLVMGetGlobalContext(void);
 void LLVMContextDispose(LLVMContextRef C);
 
 /* Create and destroy modules. */ 
@@ -218,6 +294,7 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
 /** See Module::addTypeName. */
 int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty);
 void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name);
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
 
 /** See Module::dump. */
 void LLVMDumpModule(LLVMModuleRef M);
@@ -243,7 +320,17 @@ void LLVMDumpModule(LLVMModuleRef M);
 /** See llvm::LLVMTypeKind::getTypeID. */
 LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty);
 
+/** See llvm::LLVMType::getContext. */
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty);
+
 /* Operations on integer types */
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits);
+
 LLVMTypeRef LLVMInt1Type(void);
 LLVMTypeRef LLVMInt8Type(void);
 LLVMTypeRef LLVMInt16Type(void);
@@ -253,6 +340,12 @@ LLVMTypeRef LLVMIntType(unsigned NumBits);
 unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy);
 
 /* Operations on real types */
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C);
+
 LLVMTypeRef LLVMFloatType(void);
 LLVMTypeRef LLVMDoubleType(void);
 LLVMTypeRef LLVMX86FP80Type(void);
@@ -269,6 +362,8 @@ unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy);
 void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest);
 
 /* Operations on struct types */
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                                    unsigned ElementCount, int Packed);
 LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount,
                            int Packed);
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
@@ -286,6 +381,10 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy);
 unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy);
 
 /* Operations on other types */
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C);
+
 LLVMTypeRef LLVMVoidType(void);
 LLVMTypeRef LLVMLabelType(void);
 LLVMTypeRef LLVMOpaqueType(void);
@@ -340,8 +439,6 @@ void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle);
       macro(CmpInst)                        \
       macro(FCmpInst)                       \
       macro(ICmpInst)                       \
-      macro(VFCmpInst)                      \
-      macro(VICmpInst)                      \
       macro(ExtractElementInst)             \
       macro(GetElementPtrInst)              \
       macro(InsertElementInst)              \
@@ -384,6 +481,7 @@ LLVMTypeRef LLVMTypeOf(LLVMValueRef Val);
 const char *LLVMGetValueName(LLVMValueRef Val);
 void LLVMSetValueName(LLVMValueRef Val, const char *Name);
 void LLVMDumpValue(LLVMValueRef Val);
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal);
 
 /* Conversion functions. Return the input value if it is an instance of the
    specified class, otherwise NULL. See llvm::dyn_cast_or_null<>. */
@@ -391,6 +489,15 @@ void LLVMDumpValue(LLVMValueRef Val);
   LLVMValueRef LLVMIsA##name(LLVMValueRef Val);
 LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST)
 
+/* Operations on Uses */
+LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val);
+LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef U);
+LLVMValueRef LLVMGetUser(LLVMUseIteratorRef U);
+LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef U);
+
+/* Operations on Users */
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index);
+
 /* Operations on constants of any type */
 LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */
 LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */
@@ -398,31 +505,55 @@ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty);
 int LLVMIsConstant(LLVMValueRef Val);
 int LLVMIsNull(LLVMValueRef Val);
 int LLVMIsUndef(LLVMValueRef Val);
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty);
 
 /* Operations on scalar constants */
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
                           int SignExtend);
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text,
+                                  uint8_t Radix);
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text,
+                                         unsigned SLen, uint8_t Radix);
 LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N);
 LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text);
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text,
+                                          unsigned SLen);
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal);
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal);
+
 
 /* Operations on composite constants */
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+                                      unsigned Length, int DontNullTerminate);
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, int Packed);
+
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
                              int DontNullTerminate);
 LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length);
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
-                             int packed);
+                             int Packed);
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
 
 /* Constant expressions */
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty);
 LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty);
 LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal);
 LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal);
 LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
@@ -439,6 +570,9 @@ LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices);
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+                                  LLVMValueRef *ConstantIndices,
+                                  unsigned NumIndices);
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
@@ -451,6 +585,17 @@ LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType);
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType);
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+                                     LLVMTypeRef ToType);
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+                                  LLVMTypeRef ToType);
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+                              unsigned isSigned);
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
                              LLVMValueRef ConstantIfTrue,
                              LLVMValueRef ConstantIfFalse);
@@ -517,6 +662,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC);
 const char *LLVMGetGC(LLVMValueRef Fn);
 void LLVMSetGC(LLVMValueRef Fn, const char *Name);
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn);
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
 
 /* Operations on parameters */
@@ -530,6 +676,7 @@ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg);
 LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg);
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA);
 void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
 
 /* Operations on basic blocks */
@@ -544,6 +691,14 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn);
 LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB);
 LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
 LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+                                                LLVMValueRef Fn,
+                                                const char *Name);
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+                                                LLVMBasicBlockRef BB,
+                                                const char *Name);
+
 LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name);
 LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB,
                                        const char *Name);
@@ -582,6 +737,7 @@ LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index);
  * exclusive means of building instructions using the C interface.
  */
 
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C);
 LLVMBuilderRef LLVMCreateBuilder(void);
 void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
                          LLVMValueRef Instr);
@@ -590,11 +746,15 @@ void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block);
 LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder);
 void LLVMClearInsertionPosition(LLVMBuilderRef Builder);
 void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr);
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+                                   const char *Name);
 void LLVMDisposeBuilder(LLVMBuilderRef Builder);
 
 /* Terminators */
 LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef);
 LLVMValueRef LLVMBuildRet(LLVMBuilderRef, LLVMValueRef V);
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef, LLVMValueRef *RetVals,
+                                   unsigned N);
 LLVMValueRef LLVMBuildBr(LLVMBuilderRef, LLVMBasicBlockRef Dest);
 LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef, LLVMValueRef If,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Else);
@@ -614,14 +774,24 @@ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
 /* Arithmetic */
 LLVMValueRef LLVMBuildAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name);
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
 LLVMValueRef LLVMBuildSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name);
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
 LLVMValueRef LLVMBuildMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name);
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
 LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name);
 LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name);
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                                const char *Name);
 LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name);
 LLVMValueRef LLVMBuildURem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
@@ -643,6 +813,7 @@ LLVMValueRef LLVMBuildOr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
 LLVMValueRef LLVMBuildXor(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name);
 LLVMValueRef LLVMBuildNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name);
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name);
 LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name);
 
 /* Memory */
@@ -659,6 +830,15 @@ LLVMValueRef LLVMBuildStore(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef Ptr);
 LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                           LLVMValueRef *Indices, unsigned NumIndices,
                           const char *Name);
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                  LLVMValueRef *Indices, unsigned NumIndices,
+                                  const char *Name);
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                unsigned Idx, const char *Name);
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+                                   const char *Name);
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+                                      const char *Name);
 
 /* Casts */
 LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val,
@@ -685,6 +865,18 @@ LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef, LLVMValueRef Val,
                                LLVMTypeRef DestTy, const char *Name);
 LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef, LLVMValueRef Val,
                               LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                                     LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val,
+                                  LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name);
 
 /* Comparisons */
 LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op,
@@ -718,6 +910,13 @@ LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef, LLVMValueRef AggVal,
                                   LLVMValueRef EltVal, unsigned Index,
                                   const char *Name);
 
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val,
+                             const char *Name);
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
+                                const char *Name);
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
+                              LLVMValueRef RHS, const char *Name);
+
 
 /*===-- Module providers --------------------------------------------------===*/
 
@@ -828,11 +1027,22 @@ namespace llvm {
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ModuleProvider,     LLVMModuleProviderRef)
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer,       LLVMMemoryBufferRef  )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext,        LLVMContextRef       )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use,                LLVMUseIteratorRef           )
   DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBase,    LLVMPassManagerRef   )
   
   #undef DEFINE_STDCXX_CONVERSION_FUNCTIONS
   #undef DEFINE_ISA_CONVERSION_FUNCTIONS
   #undef DEFINE_SIMPLE_CONVERSION_FUNCTIONS
+
+  /* Specialized opaque context conversions.
+   */
+  inline LLVMContext **unwrap(LLVMContextRef* Tys) {
+    return reinterpret_cast<LLVMContext**>(Tys);
+  }
+  
+  inline LLVMContextRef *wrap(const LLVMContext **Tys) {
+    return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
+  }
   
   /* Specialized opaque type conversions.
    */
@@ -853,7 +1063,7 @@ namespace llvm {
   template<typename T>
   inline T **unwrap(LLVMValueRef *Vals, unsigned Length) {
     #if DEBUG
-    for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I)
+    for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I)
       cast<T>(*I);
     #endif
     return reinterpret_cast<T**>(Vals);
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index 9877b8d50953..05f2a892e223 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -104,7 +104,7 @@ void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global);
 }
 
 namespace llvm {
-  class GenericValue;
+  struct GenericValue;
   class ExecutionEngine;
   
   #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)   \
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index bb423bbaadfc..43388512e875 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -33,9 +33,20 @@ typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
 typedef struct LLVMStructLayout *LLVMStructLayoutRef;
 
 /* Declare all of the target-initialization functions that are available. */
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+
 #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
 
+/** LLVMInitializeAllTargetInfos - The main program should call this function if
+    it wants access to all available targets that LLVM is configured to
+    support. */
+static inline void LLVMInitializeAllTargetInfos() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+}
+
 /** LLVMInitializeAllTargets - The main program should call this function if it
     wants to link in all available targets that LLVM is configured to
     support. */
@@ -50,7 +61,9 @@ static inline void LLVMInitializeAllTargets() {
 static inline int LLVMInitializeNativeTarget() {
   /* If we have a native target, initialize it to ensure it is linked in. */
 #ifdef LLVM_NATIVE_ARCH
-#define DoInit2(TARG)   LLVMInitialize ## TARG ()
+#define DoInit2(TARG) \
+  LLVMInitialize ## TARG ## Info ();          \
+  LLVMInitialize ## TARG ()
 #define DoInit(T) DoInit2(T)
   DoInit(LLVM_NATIVE_ARCH);
   return 0;
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 1ae8c670dd29..7cafcb29f219 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -16,7 +16,6 @@
 #ifndef LTO_H
 #define LTO_H  1
 
-#include "llvm-c/Core.h"
 #include <stdbool.h>
 #include <stddef.h>
 
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index 928ecc0c3cf5..4d7e7ae11e8b 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -109,6 +109,7 @@ namespace llvm {
   typedef signed short exponent_t;
 
   struct fltSemantics;
+  class StringRef;
 
   /* When bits of a floating point number are truncated, this enum is
      used to indicate what fraction of the LSB those bits represented.
@@ -172,7 +173,8 @@ namespace llvm {
     };
 
     // Constructors.
-    APFloat(const fltSemantics &, const char *);
+    APFloat(const fltSemantics &); // Default construct to 0.0
+    APFloat(const fltSemantics &, const StringRef &);
     APFloat(const fltSemantics &, integerPart);
     APFloat(const fltSemantics &, fltCategory, bool negative, unsigned type=0);
     explicit APFloat(double d);
@@ -234,7 +236,7 @@ namespace llvm {
                                             bool, roundingMode);
     opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int,
                                             bool, roundingMode);
-    opStatus convertFromString(const char *, roundingMode);
+    opStatus convertFromString(const StringRef&, roundingMode);
     APInt bitcastToAPInt() const;
     double convertToDouble() const;
     float convertToFloat() const;
@@ -312,8 +314,8 @@ namespace llvm {
                                           roundingMode, bool *) const;
     opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
                                       roundingMode);
-    opStatus convertFromHexadecimalString(const char *, roundingMode);
-    opStatus convertFromDecimalString (const char *, roundingMode);
+    opStatus convertFromHexadecimalString(const StringRef&, roundingMode);
+    opStatus convertFromDecimalString (const StringRef&, roundingMode);
     char *convertNormalToHexString(char *, unsigned int, bool,
                                    roundingMode) const;
     opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
@@ -321,11 +323,13 @@ namespace llvm {
 
     APInt convertFloatAPFloatToAPInt() const;
     APInt convertDoubleAPFloatToAPInt() const;
+    APInt convertQuadrupleAPFloatToAPInt() const;
     APInt convertF80LongDoubleAPFloatToAPInt() const;
     APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
     void initFromAPInt(const APInt& api, bool isIEEE = false);
     void initFromFloatAPInt(const APInt& api);
     void initFromDoubleAPInt(const APInt& api);
+    void initFromQuadrupleAPInt(const APInt &api);
     void initFromF80LongDoubleAPInt(const APInt& api);
     void initFromPPCDoubleDoubleAPInt(const APInt& api);
 
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 56cd3ccf84e3..88aa9956d932 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_APINT_H
 #define LLVM_APINT_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <climits>
@@ -27,12 +26,13 @@ namespace llvm {
   class Deserializer;
   class FoldingSetNodeID;
   class raw_ostream;
+  class StringRef;
 
   template<typename T>
   class SmallVectorImpl;
 
-  /* An unsigned host type used as a single part of a multi-part
-     bignum.  */
+  // An unsigned host type used as a single part of a multi-part
+  // bignum.
   typedef uint64_t integerPart;
 
   const unsigned int host_char_bit = 8;
@@ -152,8 +152,7 @@ class APInt {
 
   /// This is used by the constructors that take string arguments.
   /// @brief Convert a char array into an APInt
-  void fromString(unsigned numBits, const char *strStart, unsigned slen,
-                  uint8_t radix);
+  void fromString(unsigned numBits, const StringRef &str, uint8_t radix);
 
   /// This is used by the toString method to divide by the radix. It simply
   /// provides a more convenient form of divide for internal use since KnuthDiv
@@ -229,17 +228,17 @@ public:
   /// @brief Construct an APInt of numBits width, initialized as bigVal[].
   APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
 
-  /// This constructor interprets the slen characters starting at StrStart as
-  /// a string in the given radix. The interpretation stops when the first
-  /// character that is not suitable for the radix is encountered. Acceptable
-  /// radix values are 2, 8, 10 and 16. It is an error for the value implied by
-  /// the string to require more bits than numBits.
+  /// This constructor interprets the string \arg str in the given radix. The
+  /// interpretation stops when the first character that is not suitable for the
+  /// radix is encountered, or the end of the string. Acceptable radix values
+  /// are 2, 8, 10 and 16. It is an error for the value implied by the string to
+  /// require more bits than numBits.
+  ///
   /// @param numBits the bit width of the constructed APInt
-  /// @param strStart the start of the string to be interpreted
-  /// @param slen the maximum number of characters to interpret
-  /// @param radix the radix to use for the conversion
+  /// @param str the string to be interpreted
+  /// @param radix the radix to use for the conversion 
   /// @brief Construct an APInt from a string representation.
-  APInt(unsigned numBits, const char strStart[], unsigned slen, uint8_t radix);
+  APInt(unsigned numBits, const StringRef &str, uint8_t radix);
 
   /// Simply makes *this a copy of that.
   /// @brief Copy Constructor.
@@ -1063,9 +1062,9 @@ public:
   }
 
   /// This method determines how many bits are required to hold the APInt
-  /// equivalent of the string given by \p str of length \p slen.
+  /// equivalent of the string given by \arg str.
   /// @brief Get bits required for string value.
-  static unsigned getBitsNeeded(const char* str, unsigned slen, uint8_t radix);
+  static unsigned getBitsNeeded(const StringRef& str, uint8_t radix);
 
   /// countLeadingZeros - This function is an APInt version of the
   /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number
@@ -1235,6 +1234,11 @@ public:
     return BitWidth - 1 - countLeadingZeros();
   }
 
+  /// @returns the ceil log base 2 of this APInt.
+  unsigned ceilLogBase2() const {
+    return BitWidth - (*this - 1).countLeadingZeros();
+  }
+
   /// @returns the log base 2 of this APInt if its an exact power of two, -1
   /// otherwise
   int32_t exactLogBase2() const {
@@ -1426,8 +1430,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
   return OS;
 }
 
-std::ostream &operator<<(std::ostream &o, const APInt &I);
-
 namespace APIntOps {
 
 /// @brief Determine the smaller of two APInts considered to be signed.
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index e18be8963d48..0ed2d5a25257 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -16,109 +16,15 @@
 
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <utility>
+#include "llvm/ADT/DenseMapInfo.h"
+#include <iterator>
 #include <new>
+#include <utility>
+#include <cassert>
+#include <cstring>
 
 namespace llvm {
 
-template<typename T>
-struct DenseMapInfo {
-  //static inline T getEmptyKey();
-  //static inline T getTombstoneKey();
-  //static unsigned getHashValue(const T &Val);
-  //static bool isEqual(const T &LHS, const T &RHS);
-  //static bool isPod()
-};
-
-// Provide DenseMapInfo for all pointers.
-template<typename T>
-struct DenseMapInfo<T*> {
-  static inline T* getEmptyKey() {
-    intptr_t Val = -1;
-    Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
-    return reinterpret_cast<T*>(Val);
-  }
-  static inline T* getTombstoneKey() {
-    intptr_t Val = -2;
-    Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
-    return reinterpret_cast<T*>(Val);
-  }
-  static unsigned getHashValue(const T *PtrVal) {
-    return (unsigned((uintptr_t)PtrVal) >> 4) ^
-           (unsigned((uintptr_t)PtrVal) >> 9);
-  }
-  static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
-  static bool isPod() { return true; }
-};
-
-// Provide DenseMapInfo for chars.
-template<> struct DenseMapInfo<char> {
-  static inline char getEmptyKey() { return ~0; }
-  static inline char getTombstoneKey() { return ~0 - 1; }
-  static unsigned getHashValue(const char& Val) { return Val * 37; }
-  static bool isPod() { return true; }
-  static bool isEqual(const char &LHS, const char &RHS) {
-    return LHS == RHS;
-  }
-};
-  
-// Provide DenseMapInfo for unsigned ints.
-template<> struct DenseMapInfo<unsigned> {
-  static inline unsigned getEmptyKey() { return ~0; }
-  static inline unsigned getTombstoneKey() { return ~0 - 1; }
-  static unsigned getHashValue(const unsigned& Val) { return Val * 37; }
-  static bool isPod() { return true; }
-  static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
-  return LHS == RHS;
-  }
-};
-
-// Provide DenseMapInfo for unsigned longs.
-template<> struct DenseMapInfo<unsigned long> {
-  static inline unsigned long getEmptyKey() { return ~0L; }
-  static inline unsigned long getTombstoneKey() { return ~0L - 1L; }
-  static unsigned getHashValue(const unsigned long& Val) {
-    return (unsigned)(Val * 37L);
-  }
-  static bool isPod() { return true; }
-  static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) {
-  return LHS == RHS;
-  }
-};
-
-// Provide DenseMapInfo for all pairs whose members have info.
-template<typename T, typename U>
-struct DenseMapInfo<std::pair<T, U> > {
-  typedef std::pair<T, U> Pair;
-  typedef DenseMapInfo<T> FirstInfo;
-  typedef DenseMapInfo<U> SecondInfo;
-
-  static inline Pair getEmptyKey() {
-    return std::make_pair(FirstInfo::getEmptyKey(),
-                          SecondInfo::getEmptyKey());
-  }
-  static inline Pair getTombstoneKey() {
-    return std::make_pair(FirstInfo::getTombstoneKey(),
-                            SecondInfo::getEmptyKey());
-  }
-  static unsigned getHashValue(const Pair& PairVal) {
-    uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32
-          | (uint64_t)SecondInfo::getHashValue(PairVal.second);
-    key += ~(key << 32);
-    key ^= (key >> 22);
-    key += ~(key << 13);
-    key ^= (key >> 8);
-    key += (key << 3);
-    key ^= (key >> 15);
-    key += ~(key << 27);
-    key ^= (key >> 31);
-    return (unsigned)key;
-  }
-  static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; }
-  static bool isPod() { return FirstInfo::isPod() && SecondInfo::isPod(); }
-};
-
 template<typename KeyT, typename ValueT,
          typename KeyInfoT = DenseMapInfo<KeyT>,
          typename ValueInfoT = DenseMapInfo<ValueT> >
@@ -160,6 +66,9 @@ public:
         P->second.~ValueT();
       P->first.~KeyT();
     }
+#ifndef NDEBUG
+    memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
+#endif
     operator delete(Buckets);
   }
 
@@ -185,6 +94,8 @@ public:
   void resize(size_t Size) { grow(Size); }
 
   void clear() {
+    if (NumEntries == 0 && NumTombstones == 0) return;
+    
     // If the capacity of the array is huge, and the # elements used is small,
     // shrink the array.
     if (NumEntries * 4 < NumBuckets && NumBuckets > 64) {
@@ -234,6 +145,9 @@ public:
     return ValueT();
   }
 
+  // Inserts key,value pair into the map if the key isn't already in the map.
+  // If the key is already in the map, it returns false and doesn't update the
+  // value.
   std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
     BucketT *TheBucket;
     if (LookupBucketFor(KV.first, TheBucket))
@@ -318,8 +232,12 @@ private:
     NumEntries = other.NumEntries;
     NumTombstones = other.NumTombstones;
 
-    if (NumBuckets)
+    if (NumBuckets) {
+#ifndef NDEBUG
+      memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
+#endif
       operator delete(Buckets);
+    }
     Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) *
                                                  other.NumBuckets));
 
@@ -465,6 +383,9 @@ private:
       B->first.~KeyT();
     }
 
+#ifndef NDEBUG
+    memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
+#endif
     // Free the old table.
     operator delete(OldBuckets);
   }
@@ -495,6 +416,9 @@ private:
       B->first.~KeyT();
     }
 
+#ifndef NDEBUG
+    memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
+#endif
     // Free the old table.
     operator delete(OldBuckets);
 
@@ -503,12 +427,14 @@ private:
 };
 
 template<typename KeyT, typename ValueT, typename KeyInfoT, typename ValueInfoT>
-class DenseMapIterator {
+class DenseMapIterator : 
+      public std::iterator<std::forward_iterator_tag, std::pair<KeyT, ValueT>,
+                          ptrdiff_t> {
   typedef std::pair<KeyT, ValueT> BucketT;
 protected:
   const BucketT *Ptr, *End;
 public:
-  DenseMapIterator(void) : Ptr(0), End(0) {}
+  DenseMapIterator() : Ptr(0), End(0) {}
 
   DenseMapIterator(const BucketT *Pos, const BucketT *E) : Ptr(Pos), End(E) {
     AdvancePastEmptyBuckets();
@@ -552,7 +478,7 @@ private:
 template<typename KeyT, typename ValueT, typename KeyInfoT, typename ValueInfoT>
 class DenseMapConstIterator : public DenseMapIterator<KeyT, ValueT, KeyInfoT> {
 public:
-  DenseMapConstIterator(void) : DenseMapIterator<KeyT, ValueT, KeyInfoT>() {}
+  DenseMapConstIterator() : DenseMapIterator<KeyT, ValueT, KeyInfoT>() {}
   DenseMapConstIterator(const std::pair<KeyT, ValueT> *Pos,
                         const std::pair<KeyT, ValueT> *E)
     : DenseMapIterator<KeyT, ValueT, KeyInfoT>(Pos, E) {
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
new file mode 100644
index 000000000000..632728bf0d17
--- /dev/null
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -0,0 +1,135 @@
+//===- llvm/ADT/DenseMapInfo.h - Type traits for DenseMap -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines DenseMapInfo traits for DenseMap.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DENSEMAPINFO_H
+#define LLVM_ADT_DENSEMAPINFO_H
+
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <utility>
+
+namespace llvm {
+
+template<typename T>
+struct DenseMapInfo {
+  //static inline T getEmptyKey();
+  //static inline T getTombstoneKey();
+  //static unsigned getHashValue(const T &Val);
+  //static bool isEqual(const T &LHS, const T &RHS);
+  //static bool isPod()
+};
+
+// Provide DenseMapInfo for all pointers.
+template<typename T>
+struct DenseMapInfo<T*> {
+  static inline T* getEmptyKey() {
+    intptr_t Val = -1;
+    Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
+    return reinterpret_cast<T*>(Val);
+  }
+  static inline T* getTombstoneKey() {
+    intptr_t Val = -2;
+    Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
+    return reinterpret_cast<T*>(Val);
+  }
+  static unsigned getHashValue(const T *PtrVal) {
+    return (unsigned((uintptr_t)PtrVal) >> 4) ^
+           (unsigned((uintptr_t)PtrVal) >> 9);
+  }
+  static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
+  static bool isPod() { return true; }
+};
+
+// Provide DenseMapInfo for chars.
+template<> struct DenseMapInfo<char> {
+  static inline char getEmptyKey() { return ~0; }
+  static inline char getTombstoneKey() { return ~0 - 1; }
+  static unsigned getHashValue(const char& Val) { return Val * 37; }
+  static bool isPod() { return true; }
+  static bool isEqual(const char &LHS, const char &RHS) {
+    return LHS == RHS;
+  }
+};
+  
+// Provide DenseMapInfo for unsigned ints.
+template<> struct DenseMapInfo<unsigned> {
+  static inline unsigned getEmptyKey() { return ~0; }
+  static inline unsigned getTombstoneKey() { return ~0U - 1; }
+  static unsigned getHashValue(const unsigned& Val) { return Val * 37; }
+  static bool isPod() { return true; }
+  static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for unsigned longs.
+template<> struct DenseMapInfo<unsigned long> {
+  static inline unsigned long getEmptyKey() { return ~0UL; }
+  static inline unsigned long getTombstoneKey() { return ~0UL - 1L; }
+  static unsigned getHashValue(const unsigned long& Val) {
+    return Val * 37UL;
+  }
+  static bool isPod() { return true; }
+  static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for unsigned long longs.
+template<> struct DenseMapInfo<unsigned long long> {
+  static inline unsigned long long getEmptyKey() { return ~0ULL; }
+  static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
+  static unsigned getHashValue(const unsigned long long& Val) {
+    return Val * 37ULL;
+  }
+  static bool isPod() { return true; }
+  static bool isEqual(const unsigned long long& LHS,
+                      const unsigned long long& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for all pairs whose members have info.
+template<typename T, typename U>
+struct DenseMapInfo<std::pair<T, U> > {
+  typedef std::pair<T, U> Pair;
+  typedef DenseMapInfo<T> FirstInfo;
+  typedef DenseMapInfo<U> SecondInfo;
+
+  static inline Pair getEmptyKey() {
+    return std::make_pair(FirstInfo::getEmptyKey(),
+                          SecondInfo::getEmptyKey());
+  }
+  static inline Pair getTombstoneKey() {
+    return std::make_pair(FirstInfo::getTombstoneKey(),
+                            SecondInfo::getEmptyKey());
+  }
+  static unsigned getHashValue(const Pair& PairVal) {
+    uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32
+          | (uint64_t)SecondInfo::getHashValue(PairVal.second);
+    key += ~(key << 32);
+    key ^= (key >> 22);
+    key += ~(key << 13);
+    key ^= (key >> 8);
+    key += (key << 3);
+    key ^= (key >> 15);
+    key += ~(key << 27);
+    key ^= (key >> 31);
+    return (unsigned)key;
+  }
+  static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; }
+  static bool isPod() { return FirstInfo::isPod() && SecondInfo::isPod(); }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 517768f402df..5f2df2a17e41 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -34,8 +34,8 @@
 #define LLVM_ADT_DEPTHFIRSTITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include <set>
 #include <vector>
 
@@ -62,28 +62,35 @@ public:
 template<class GraphT,
 class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
          bool ExtStorage = false, class GT = GraphTraits<GraphT> >
-class df_iterator : public forward_iterator<typename GT::NodeType, ptrdiff_t>,
+class df_iterator : public std::iterator<std::forward_iterator_tag,
+                                         typename GT::NodeType, ptrdiff_t>,
                     public df_iterator_storage<SetType, ExtStorage> {
-  typedef forward_iterator<typename GT::NodeType, ptrdiff_t> super;
+  typedef std::iterator<std::forward_iterator_tag,
+                        typename GT::NodeType, ptrdiff_t> super;
 
   typedef typename GT::NodeType          NodeType;
   typedef typename GT::ChildIteratorType ChildItTy;
+  typedef PointerIntPair<NodeType*, 1>   PointerIntTy;
 
   // VisitStack - Used to maintain the ordering.  Top = current block
   // First element is node pointer, second is the 'next child' to visit
-  std::vector<std::pair<NodeType *, ChildItTy> > VisitStack;
+  // if the int in PointerIntTy is 0, the 'next child' to visit is invalid
+  std::vector<std::pair<PointerIntTy, ChildItTy> > VisitStack;
 private:
   inline df_iterator(NodeType *Node) {
     this->Visited.insert(Node);
-    VisitStack.push_back(std::make_pair(Node, GT::child_begin(Node)));
+    VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), 
+                                        GT::child_begin(Node)));
+  }
+  inline df_iterator() { 
+    // End is when stack is empty 
   }
-  inline df_iterator() { /* End is when stack is empty */ }
-
   inline df_iterator(NodeType *Node, SetType &S)
     : df_iterator_storage<SetType, ExtStorage>(S) {
     if (!S.count(Node)) {
+      VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), 
+                                          GT::child_begin(Node)));
       this->Visited.insert(Node);
-      VisitStack.push_back(std::make_pair(Node, GT::child_begin(Node)));
     }
   }
   inline df_iterator(SetType &S)
@@ -91,6 +98,34 @@ private:
     // End is when stack is empty
   }
 
+  inline void toNext() {
+    do {
+      std::pair<PointerIntTy, ChildItTy> &Top = VisitStack.back();
+      NodeType *Node = Top.first.getPointer();
+      ChildItTy &It  = Top.second;
+      if (!Top.first.getInt()) {
+        // now retrieve the real begin of the children before we dive in
+        It = GT::child_begin(Node);
+        Top.first.setInt(1);
+      }
+
+      while (It != GT::child_end(Node)) {
+        NodeType *Next = *It++;
+        // Has our next sibling been visited?
+        if (Next && !this->Visited.count(Next)) {  
+          // No, do it now.
+          this->Visited.insert(Next);
+          VisitStack.push_back(std::make_pair(PointerIntTy(Next, 0), 
+                                              GT::child_begin(Next)));
+          return;
+        }
+      }
+
+      // Oops, ran out of successors... go up a level on the stack.
+      VisitStack.pop_back();
+    } while (!VisitStack.empty());
+  }
+
 public:
   typedef typename super::pointer pointer;
   typedef df_iterator<GraphT, SetType, ExtStorage, GT> _Self;
@@ -114,7 +149,7 @@ public:
   inline bool operator!=(const _Self& x) const { return !operator==(x); }
 
   inline pointer operator*() const {
-    return VisitStack.back().first;
+    return VisitStack.back().first.getPointer();
   }
 
   // This is a nonstandard operator-> that dereferences the pointer an extra
@@ -124,24 +159,16 @@ public:
   inline NodeType *operator->() const { return operator*(); }
 
   inline _Self& operator++() {   // Preincrement
-    do {
-      std::pair<NodeType *, ChildItTy> &Top = VisitStack.back();
-      NodeType *Node = Top.first;
-      ChildItTy &It  = Top.second;
-
-      while (It != GT::child_end(Node)) {
-        NodeType *Next = *It++;
-        if (!this->Visited.count(Next)) {  // Has our next sibling been visited?
-          // No, do it now.
-          this->Visited.insert(Next);
-          VisitStack.push_back(std::make_pair(Next, GT::child_begin(Next)));
-          return *this;
-        }
-      }
+    toNext();
+    return *this;
+  }
 
-      // Oops, ran out of successors... go up a level on the stack.
-      VisitStack.pop_back();
-    } while (!VisitStack.empty());
+  // skips all children of the current node and traverses to next node
+  //
+  inline _Self& skipChildren() {  
+    VisitStack.pop_back();
+    if (!VisitStack.empty())
+      toNext();
     return *this;
   }
 
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index 6e00a217bebf..ac9dd4db1fb4 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
-#include "llvm/ADT/iterator.h"
 #include "llvm/Support/DataTypes.h"
 #include <set>
 
@@ -234,8 +233,10 @@ public:
     return L1;
   }
 
-  class member_iterator : public forward_iterator<ElemTy, ptrdiff_t> {
-    typedef forward_iterator<const ElemTy, ptrdiff_t> super;
+  class member_iterator : public std::iterator<std::forward_iterator_tag,
+                                               const ElemTy, ptrdiff_t> {
+    typedef std::iterator<std::forward_iterator_tag,
+                          const ElemTy, ptrdiff_t> super;
     const ECValue *Node;
     friend class EquivalenceClasses;
   public:
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 1bcff3dc9eb3..c62c47d27353 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -18,7 +18,7 @@
 
 #include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
-#include <string>
+#include "llvm/ADT/StringRef.h"
 #include <iterator>
 
 namespace llvm {
@@ -227,9 +227,7 @@ public:
   void AddInteger(long long I);
   void AddInteger(unsigned long long I);
   void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); }
-  void AddString(const char* String, const char* End);
-  void AddString(const std::string &String);
-  void AddString(const char* String);
+  void AddString(StringRef String);
 
   template <typename T>
   inline void Add(const T& x) { FoldingSetTrait<T>::Profile(x, *this); }
@@ -439,6 +437,20 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
+/// FastFoldingSetNode - This is a subclass of FoldingSetNode which stores
+/// a FoldingSetNodeID value rather than requiring the node to recompute it
+/// each time it is needed. This trades space for speed (which can be
+/// significant if the ID is long), and it also permits nodes to drop
+/// information that would otherwise only be required for recomputing an ID.
+class FastFoldingSetNode : public FoldingSetNode {
+  FoldingSetNodeID FastID;
+protected:
+  explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {}
+public:
+  void Profile(FoldingSetNodeID& ID) { ID = FastID; }
+};
+
+//===----------------------------------------------------------------------===//
 // Partial specializations of FoldingSetTrait.
 
 template<typename T> struct FoldingSetTrait<T*> {
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index 52708bc8a108..742e2321865d 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -80,22 +80,25 @@ public:
 
   class Factory {
     typename TreeTy::Factory F;
+    const bool Canonicalize;
 
   public:
-    Factory() {}
-
-    Factory(BumpPtrAllocator& Alloc)
-      : F(Alloc) {}
+    Factory(bool canonicalize = true)
+      : Canonicalize(canonicalize) {}
+    
+    Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
+      : F(Alloc), Canonicalize(canonicalize) {}
 
     ImmutableMap GetEmptyMap() { return ImmutableMap(F.GetEmptyTree()); }
 
     ImmutableMap Add(ImmutableMap Old, key_type_ref K, data_type_ref D) {
-      return ImmutableMap(F.Add(Old.Root,
-                                std::make_pair<key_type,data_type>(K,D)));
+      TreeTy *T = F.Add(Old.Root, std::make_pair<key_type,data_type>(K,D));
+      return ImmutableMap(Canonicalize ? F.GetCanonicalTree(T): T);
     }
 
     ImmutableMap Remove(ImmutableMap Old, key_type_ref K) {
-      return ImmutableMap(F.Remove(Old.Root,K));
+      TreeTy *T = F.Remove(Old.Root,K);
+      return ImmutableMap(Canonicalize ? F.GetCanonicalTree(T): T);
     }
 
   private:
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index be274dbe6758..14f4ac8123d6 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -51,10 +51,8 @@ public:
 
   /// getLeft - Returns a pointer to the left subtree.  This value
   ///  is NULL if there is no left subtree.
-  ImutAVLTree* getLeft() const {
-    assert (!isMutable() && "Node is incorrectly marked mutable.");
-
-    return reinterpret_cast<ImutAVLTree*>(Left);
+  ImutAVLTree *getLeft() const {
+    return reinterpret_cast<ImutAVLTree*>(Left & ~LeftFlags);
   }
 
   /// getRight - Returns a pointer to the right subtree.  This value is
@@ -168,7 +166,7 @@ public:
   /// contains - Returns true if this tree contains a subtree (node) that
   ///  has an data element that matches the specified key.  Complexity
   ///  is logarithmic in the size of the tree.
-  bool contains(const key_type_ref K) { return (bool) find(K); }
+  bool contains(key_type_ref K) { return (bool) find(K); }
 
   /// foreach - A member template the accepts invokes operator() on a functor
   ///  object (specifed by Callback) for every node/subtree in the tree.
@@ -227,7 +225,7 @@ private:
   ImutAVLTree*     Right;
   unsigned         Height;
   value_type       Value;
-  unsigned         Digest;
+  uint32_t         Digest;
 
   //===----------------------------------------------------===//
   // Internal methods (node manipulation; used by Factory).
@@ -235,12 +233,12 @@ private:
 
 private:
 
-  enum { Mutable = 0x1 };
+  enum { Mutable = 0x1, NoCachedDigest = 0x2, LeftFlags = 0x3 };
 
   /// ImutAVLTree - Internal constructor that is only called by
   ///   ImutAVLFactory.
   ImutAVLTree(ImutAVLTree* l, ImutAVLTree* r, value_type_ref v, unsigned height)
-  : Left(reinterpret_cast<uintptr_t>(l) | Mutable),
+  : Left(reinterpret_cast<uintptr_t>(l) | (Mutable | NoCachedDigest)),
     Right(r), Height(height), Value(v), Digest(0) {}
 
 
@@ -251,13 +249,10 @@ private:
   ///  method returns false for an instance of ImutAVLTree, all subtrees
   ///  will also have this method return false.  The converse is not true.
   bool isMutable() const { return Left & Mutable; }
-
-  /// getSafeLeft - Returns the pointer to the left tree by always masking
-  ///  out the mutable bit.  This is used internally by ImutAVLFactory,
-  ///  as no trees returned to the client should have the mutable flag set.
-  ImutAVLTree* getSafeLeft() const {
-    return reinterpret_cast<ImutAVLTree*>(Left & ~Mutable);
-  }
+  
+  /// hasCachedDigest - Returns true if the digest for this tree is cached.
+  ///  This can only be true if the tree is immutable.
+  bool hasCachedDigest() const { return !(Left & NoCachedDigest); }
 
   //===----------------------------------------------------===//
   // Mutating operations.  A tree root can be manipulated as
@@ -270,64 +265,73 @@ private:
   // immutable.
   //===----------------------------------------------------===//
 
-
   /// MarkImmutable - Clears the mutable flag for a tree.  After this happens,
-  ///   it is an error to call setLeft(), setRight(), and setHeight().  It
-  ///   is also then safe to call getLeft() instead of getSafeLeft().
+  ///   it is an error to call setLeft(), setRight(), and setHeight().
   void MarkImmutable() {
-    assert (isMutable() && "Mutable flag already removed.");
+    assert(isMutable() && "Mutable flag already removed.");
     Left &= ~Mutable;
   }
+  
+  /// MarkedCachedDigest - Clears the NoCachedDigest flag for a tree.
+  void MarkedCachedDigest() {
+    assert(!hasCachedDigest() && "NoCachedDigest flag already removed.");
+    Left &= ~NoCachedDigest;
+  }
 
   /// setLeft - Changes the reference of the left subtree.  Used internally
   ///   by ImutAVLFactory.
   void setLeft(ImutAVLTree* NewLeft) {
-    assert (isMutable() &&
-            "Only a mutable tree can have its left subtree changed.");
-
-    Left = reinterpret_cast<uintptr_t>(NewLeft) | Mutable;
+    assert(isMutable() &&
+           "Only a mutable tree can have its left subtree changed.");
+    Left = reinterpret_cast<uintptr_t>(NewLeft) | LeftFlags;
   }
 
   /// setRight - Changes the reference of the right subtree.  Used internally
   ///  by ImutAVLFactory.
   void setRight(ImutAVLTree* NewRight) {
-    assert (isMutable() &&
-            "Only a mutable tree can have its right subtree changed.");
+    assert(isMutable() &&
+           "Only a mutable tree can have its right subtree changed.");
 
     Right = NewRight;
+    // Set the NoCachedDigest flag.
+    Left = Left | NoCachedDigest;
+
   }
 
   /// setHeight - Changes the height of the tree.  Used internally by
   ///  ImutAVLFactory.
   void setHeight(unsigned h) {
-    assert (isMutable() && "Only a mutable tree can have its height changed.");
+    assert(isMutable() && "Only a mutable tree can have its height changed.");
     Height = h;
   }
 
-
   static inline
-  unsigned ComputeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) {
-    unsigned digest = 0;
+  uint32_t ComputeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) {
+    uint32_t digest = 0;
 
-    if (L) digest += L->ComputeDigest();
+    if (L)
+      digest += L->ComputeDigest();
 
-    { // Compute digest of stored data.
-      FoldingSetNodeID ID;
-      ImutInfo::Profile(ID,V);
-      digest += ID.ComputeHash();
-    }
+    // Compute digest of stored data.
+    FoldingSetNodeID ID;
+    ImutInfo::Profile(ID,V);
+    digest += ID.ComputeHash();
 
-    if (R) digest += R->ComputeDigest();
+    if (R)
+      digest += R->ComputeDigest();
 
     return digest;
   }
 
-  inline unsigned ComputeDigest() {
-    if (Digest) return Digest;
-
-    unsigned X = ComputeDigest(getSafeLeft(), getRight(), getValue());
-    if (!isMutable()) Digest = X;
+  inline uint32_t ComputeDigest() {
+    // Check the lowest bit to determine if digest has actually been
+    // pre-computed.
+    if (hasCachedDigest())
+      return Digest;
 
+    uint32_t X = ComputeDigest(getLeft(), getRight(), getValue());
+    Digest = X;
+    MarkedCachedDigest();
     return X;
   }
 };
@@ -394,7 +398,7 @@ private:
 
   bool           isEmpty(TreeTy* T) const { return !T; }
   unsigned        Height(TreeTy* T) const { return T ? T->getHeight() : 0; }
-  TreeTy*           Left(TreeTy* T) const { return T->getSafeLeft(); }
+  TreeTy*           Left(TreeTy* T) const { return T->getLeft(); }
   TreeTy*          Right(TreeTy* T) const { return T->getRight(); }
   value_type_ref   Value(TreeTy* T) const { return T->Value; }
 
@@ -404,7 +408,6 @@ private:
     return ( hl > hr ? hl : hr ) + 1;
   }
 
-
   static bool CompareTreeWithSection(TreeTy* T,
                                      typename TreeTy::iterator& TI,
                                      typename TreeTy::iterator& TE) {
@@ -428,62 +431,10 @@ private:
   // returned to the caller.
   //===--------------------------------------------------===//
 
-  TreeTy* CreateNode(TreeTy* L, value_type_ref V, TreeTy* R) {
-    // Search the FoldingSet bucket for a Tree with the same digest.
-    FoldingSetNodeID ID;
-    unsigned digest = TreeTy::ComputeDigest(L, R, V);
-    ID.AddInteger(digest);
-    unsigned hash = ID.ComputeHash();
-
-    typename CacheTy::bucket_iterator I = Cache.bucket_begin(hash);
-    typename CacheTy::bucket_iterator E = Cache.bucket_end(hash);
-
-    for (; I != E; ++I) {
-      TreeTy* T = &*I;
-
-      if (T->ComputeDigest() != digest)
-        continue;
-
-      // We found a collision.  Perform a comparison of Contents('T')
-      // with Contents('L')+'V'+Contents('R').
-
-      typename TreeTy::iterator TI = T->begin(), TE = T->end();
-
-      // First compare Contents('L') with the (initial) contents of T.
-      if (!CompareTreeWithSection(L, TI, TE))
-        continue;
-
-      // Now compare the new data element.
-      if (TI == TE || !TI->ElementEqual(V))
-        continue;
-
-      ++TI;
-
-      // Now compare the remainder of 'T' with 'R'.
-      if (!CompareTreeWithSection(R, TI, TE))
-        continue;
-
-      if (TI != TE) // Contents('R') did not match suffix of 'T'.
-        continue;
-
-      // Trees did match!  Return 'T'.
-      return T;
-    }
-
-    // No tree with the contents: Contents('L')+'V'+Contents('R').
-    // Create it.
-
-    // Allocate the new tree node and insert it into the cache.
+  TreeTy* CreateNode(TreeTy* L, value_type_ref V, TreeTy* R) {   
     BumpPtrAllocator& A = getAllocator();
     TreeTy* T = (TreeTy*) A.Allocate<TreeTy>();
     new (T) TreeTy(L,R,V,IncrementHeight(L,R));
-
-    // We do not insert 'T' into the FoldingSet here.  This is because
-    // this tree is still mutable and things may get rebalanced.
-    // Because our digest is associative and based on the contents of
-    // the set, this should hopefully not cause any strange bugs.
-    // 'T' is inserted by 'MarkImmutable'.
-
     return T;
   }
 
@@ -496,7 +447,8 @@ private:
       OldTree->setHeight(IncrementHeight(L,R));
       return OldTree;
     }
-    else return CreateNode(L, Value(OldTree), R);
+    else
+      return CreateNode(L, Value(OldTree), R);
   }
 
   /// Balance - Used by Add_internal and Remove_internal to
@@ -615,12 +567,56 @@ private:
     T->MarkImmutable();
     MarkImmutable(Left(T));
     MarkImmutable(Right(T));
+  }
+  
+public:
+  TreeTy *GetCanonicalTree(TreeTy *TNew) {
+    if (!TNew)
+      return NULL;    
+    
+    // Search the FoldingSet bucket for a Tree with the same digest.
+    FoldingSetNodeID ID;
+    unsigned digest = TNew->ComputeDigest();
+    ID.AddInteger(digest);
+    unsigned hash = ID.ComputeHash();
+    
+    typename CacheTy::bucket_iterator I = Cache.bucket_begin(hash);
+    typename CacheTy::bucket_iterator E = Cache.bucket_end(hash);
+    
+    for (; I != E; ++I) {
+      TreeTy *T = &*I;
+      
+      if (T->ComputeDigest() != digest)
+        continue;
+      
+      // We found a collision.  Perform a comparison of Contents('T')
+      // with Contents('L')+'V'+Contents('R').
+      typename TreeTy::iterator TI = T->begin(), TE = T->end();
+      
+      // First compare Contents('L') with the (initial) contents of T.
+      if (!CompareTreeWithSection(TNew->getLeft(), TI, TE))
+        continue;
+      
+      // Now compare the new data element.
+      if (TI == TE || !TI->ElementEqual(TNew->getValue()))
+        continue;
+      
+      ++TI;
+      
+      // Now compare the remainder of 'T' with 'R'.
+      if (!CompareTreeWithSection(TNew->getRight(), TI, TE))
+        continue;
+      
+      if (TI != TE)
+        continue; // Contents('R') did not match suffix of 'T'.
+      
+      // Trees did match!  Return 'T'.
+      return T;
+    }
 
-    // Now that the node is immutable it can safely be inserted
-    // into the node cache.
-    llvm::FoldingSetNodeID ID;
-    ID.AddInteger(T->ComputeDigest());
-    Cache.InsertNode(T, (void*) &*Cache.bucket_end(ID.ComputeHash()));
+    // 'TNew' is the only tree of its kind.  Return it.
+    Cache.InsertNode(TNew, (void*) &*Cache.bucket_end(hash));
+    return TNew;
   }
 };
 
@@ -701,7 +697,7 @@ public:
 
     switch (getVisitState()) {
       case VisitedNone:
-        if (TreeTy* L = Current->getSafeLeft())
+        if (TreeTy* L = Current->getLeft())
           stack.push_back(reinterpret_cast<uintptr_t>(L));
         else
           stack.back() |= VisitedLeft;
@@ -940,8 +936,8 @@ public:
   typedef ImutAVLTree<ValInfo> TreeTy;
 
 private:
-  TreeTy* Root;
-
+  TreeTy *Root;
+  
 public:
   /// Constructs a set from a pointer to a tree root.  In general one
   /// should use a Factory object to create sets instead of directly
@@ -951,15 +947,19 @@ public:
 
   class Factory {
     typename TreeTy::Factory F;
+    const bool Canonicalize;
 
   public:
-    Factory() {}
+    Factory(bool canonicalize = true)
+      : Canonicalize(canonicalize) {}
 
-    Factory(BumpPtrAllocator& Alloc)
-      : F(Alloc) {}
+    Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
+      : F(Alloc), Canonicalize(canonicalize) {}
 
     /// GetEmptySet - Returns an immutable set that contains no elements.
-    ImmutableSet GetEmptySet() { return ImmutableSet(F.GetEmptyTree()); }
+    ImmutableSet GetEmptySet() {
+      return ImmutableSet(F.GetEmptyTree());
+    }
 
     /// Add - Creates a new immutable set that contains all of the values
     ///  of the original set with the addition of the specified value.  If
@@ -969,7 +969,8 @@ public:
     ///  The memory allocated to represent the set is released when the
     ///  factory object that created the set is destroyed.
     ImmutableSet Add(ImmutableSet Old, value_type_ref V) {
-      return ImmutableSet(F.Add(Old.Root,V));
+      TreeTy *NewT = F.Add(Old.Root, V);
+      return ImmutableSet(Canonicalize ? F.GetCanonicalTree(NewT) : NewT);
     }
 
     /// Remove - Creates a new immutable set that contains all of the values
@@ -980,7 +981,8 @@ public:
     ///  The memory allocated to represent the set is released when the
     ///  factory object that created the set is destroyed.
     ImmutableSet Remove(ImmutableSet Old, value_type_ref V) {
-      return ImmutableSet(F.Remove(Old.Root,V));
+      TreeTy *NewT = F.Remove(Old.Root, V);
+      return ImmutableSet(Canonicalize ? F.GetCanonicalTree(NewT) : NewT);
     }
 
     BumpPtrAllocator& getAllocator() { return F.getAllocator(); }
@@ -993,7 +995,7 @@ public:
   friend class Factory;
 
   /// contains - Returns true if the set contains the specified value.
-  bool contains(const value_type_ref V) const {
+  bool contains(value_type_ref V) const {
     return Root ? Root->contains(V) : false;
   }
 
@@ -1005,7 +1007,9 @@ public:
     return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
   }
 
-  TreeTy* getRoot() const { return Root; }
+  TreeTy *getRoot() { 
+    return Root;
+  }
 
   /// isEmpty - Return true if the set contains no elements.
   bool isEmpty() const { return !Root; }
@@ -1026,11 +1030,10 @@ public:
 
   class iterator {
     typename TreeTy::iterator itr;
-
-    iterator() {}
     iterator(TreeTy* t) : itr(t) {}
     friend class ImmutableSet<ValT,ValInfo>;
   public:
+    iterator() {}
     inline value_type_ref operator*() const { return itr->getValue(); }
     inline iterator& operator++() { ++itr; return *this; }
     inline iterator  operator++(int) { iterator tmp(*this); ++itr; return tmp; }
diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h
index ff5d3a139c70..89f0dfa64e1c 100644
--- a/include/llvm/ADT/IndexedMap.h
+++ b/include/llvm/ADT/IndexedMap.h
@@ -26,7 +26,7 @@
 
 namespace llvm {
 
-  struct IdentityFunctor : std::unary_function<unsigned, unsigned> {
+  struct IdentityFunctor : public std::unary_function<unsigned, unsigned> {
     unsigned operator()(unsigned Index) const {
       return Index;
     }
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 0aa478b1ff61..73ba3c7293de 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -65,7 +65,8 @@ public:
   }
 
   PointerTy getPointer() const {
-    return reinterpret_cast<PointerTy>(Value & PointerBitMask);
+    return PtrTraits::getFromVoidPointer(
+                         reinterpret_cast<void*>(Value & PointerBitMask));
   }
 
   IntType getInt() const {
@@ -73,7 +74,8 @@ public:
   }
 
   void setPointer(PointerTy Ptr) {
-    intptr_t PtrVal = reinterpret_cast<intptr_t>(Ptr);
+    intptr_t PtrVal
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
     assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
     // Preserve all low bits, just update the pointer.
@@ -141,8 +143,7 @@ public:
     return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
   }
   enum {
-    NumLowBitsAvailable = 
-           PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable - IntBits
+    NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits
   };
 };
 
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 1b36aeea7934..33f2fcb11395 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -79,7 +79,7 @@ namespace llvm {
       Val.setInt(1);
     }
     
-    /// isNull - Return true if the pointer help in the union is null,
+    /// isNull - Return true if the pointer held in the union is null,
     /// regardless of which type it is.
     bool isNull() const { return Val.getPointer() == 0; }
     operator bool() const { return !isNull(); }
@@ -176,7 +176,7 @@ namespace llvm {
       Val = V;
     }
     
-    /// isNull - Return true if the pointer help in the union is null,
+    /// isNull - Return true if the pointer held in the union is null,
     /// regardless of which type it is.
     bool isNull() const { return Val.isNull(); }
     operator bool() const { return !isNull(); }
@@ -254,6 +254,115 @@ namespace llvm {
           ::NumLowBitsAvailable
     };
   };
+
+  /// PointerUnion4 - This is a pointer union of four pointer types.  See
+  /// documentation for PointerUnion for usage.
+  template <typename PT1, typename PT2, typename PT3, typename PT4>
+  class PointerUnion4 {
+  public:
+    typedef PointerUnion<PT1, PT2> InnerUnion1;
+    typedef PointerUnion<PT3, PT4> InnerUnion2;
+    typedef PointerUnion<InnerUnion1, InnerUnion2> ValTy;
+  private:
+    ValTy Val;
+  public:
+    PointerUnion4() {}
+    
+    PointerUnion4(PT1 V) {
+      Val = InnerUnion1(V);
+    }
+    PointerUnion4(PT2 V) {
+      Val = InnerUnion1(V);
+    }
+    PointerUnion4(PT3 V) {
+      Val = InnerUnion2(V);
+    }
+    PointerUnion4(PT4 V) {
+      Val = InnerUnion2(V);
+    }
+    
+    /// isNull - Return true if the pointer held in the union is null,
+    /// regardless of which type it is.
+    bool isNull() const { return Val.isNull(); }
+    operator bool() const { return !isNull(); }
+    
+    /// is<T>() return true if the Union currently holds the type matching T.
+    template<typename T>
+    int is() const {
+      // Is it PT1/PT2?
+      if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
+        return Val.is<InnerUnion1>() && Val.get<InnerUnion1>().is<T>();
+      return Val.is<InnerUnion2>() && Val.get<InnerUnion2>().is<T>();
+    }
+    
+    /// get<T>() - Return the value of the specified pointer type. If the
+    /// specified pointer type is incorrect, assert.
+    template<typename T>
+    T get() const {
+      assert(is<T>() && "Invalid accessor called");
+      // Is it PT1/PT2?
+      if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
+        return Val.get<InnerUnion1>().get<T>();
+      
+      return Val.get<InnerUnion2>().get<T>();
+    }
+    
+    /// dyn_cast<T>() - If the current value is of the specified pointer type,
+    /// return it, otherwise return null.
+    template<typename T>
+    T dyn_cast() const {
+      if (is<T>()) return get<T>();
+      return T();
+    }
+    
+    /// Assignment operators - Allow assigning into this union from either
+    /// pointer type, setting the discriminator to remember what it came from.
+    const PointerUnion4 &operator=(const PT1 &RHS) {
+      Val = InnerUnion1(RHS);
+      return *this;
+    }
+    const PointerUnion4 &operator=(const PT2 &RHS) {
+      Val = InnerUnion1(RHS);
+      return *this;
+    }
+    const PointerUnion4 &operator=(const PT3 &RHS) {
+      Val = InnerUnion2(RHS);
+      return *this;
+    }
+    const PointerUnion4 &operator=(const PT4 &RHS) {
+      Val = InnerUnion2(RHS);
+      return *this;
+    }
+    
+    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+    static PointerUnion4 getFromOpaqueValue(void *VP) {
+      PointerUnion4 V;
+      V.Val = ValTy::getFromOpaqueValue(VP);
+      return V;
+    }
+  };
+  
+  // Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
+  // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+  template<typename PT1, typename PT2, typename PT3, typename PT4>
+  class PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4> > {
+  public:
+    static inline void *
+    getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
+      return P.getOpaqueValue();
+    }
+    static inline PointerUnion4<PT1, PT2, PT3, PT4>
+    getFromVoidPointer(void *P) {
+      return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
+    }
+    
+    // The number of bits available are the min of the two pointer types.
+    enum {
+      NumLowBitsAvailable = 
+        PointerLikeTypeTraits<typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>
+          ::NumLowBitsAvailable
+    };
+  };
 }
 
 #endif
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index b477d0a8f0f5..8315bc9f9ed5 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -17,7 +17,6 @@
 #define LLVM_ADT_POSTORDERITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <set>
 #include <stack>
@@ -43,9 +42,11 @@ template<class GraphT,
   class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
   bool ExtStorage = false,
   class GT = GraphTraits<GraphT> >
-class po_iterator : public forward_iterator<typename GT::NodeType, ptrdiff_t>,
+class po_iterator : public std::iterator<std::forward_iterator_tag,
+                                         typename GT::NodeType, ptrdiff_t>,
                     public po_iterator_storage<SetType, ExtStorage> {
-  typedef forward_iterator<typename GT::NodeType, ptrdiff_t> super;
+  typedef std::iterator<std::forward_iterator_tag,
+                        typename GT::NodeType, ptrdiff_t> super;
   typedef typename GT::NodeType          NodeType;
   typedef typename GT::ChildIteratorType ChildItTy;
 
@@ -71,7 +72,7 @@ class po_iterator : public forward_iterator<typename GT::NodeType, ptrdiff_t>,
   inline po_iterator() {} // End is when stack is empty.
 
   inline po_iterator(NodeType *BB, SetType &S) :
-    po_iterator_storage<SetType, ExtStorage>(&S) {
+    po_iterator_storage<SetType, ExtStorage>(S) {
     if(!S.count(BB)) {
       this->Visited.insert(BB);
       VisitStack.push(std::make_pair(BB, GT::child_begin(BB)));
@@ -80,7 +81,7 @@ class po_iterator : public forward_iterator<typename GT::NodeType, ptrdiff_t>,
   }
 
   inline po_iterator(SetType &S) :
-      po_iterator_storage<SetType, ExtStorage>(&S) {
+      po_iterator_storage<SetType, ExtStorage>(S) {
   } // End is when stack is empty.
 public:
   typedef typename super::pointer pointer;
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index e28f4caa5d76..db985b5ae1d2 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -22,8 +22,7 @@
 #define LLVM_ADT_SCCITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator.h"
-#include <map>
+#include "llvm/ADT/DenseMap.h"
 #include <vector>
 
 namespace llvm {
@@ -35,11 +34,13 @@ namespace llvm {
 ///
 template<class GraphT, class GT = GraphTraits<GraphT> >
 class scc_iterator
-  : public forward_iterator<std::vector<typename GT::NodeType>, ptrdiff_t> {
+  : public std::iterator<std::forward_iterator_tag,
+                         std::vector<typename GT::NodeType>, ptrdiff_t> {
   typedef typename GT::NodeType          NodeType;
   typedef typename GT::ChildIteratorType ChildItTy;
   typedef std::vector<NodeType*> SccTy;
-  typedef forward_iterator<SccTy, ptrdiff_t> super;
+  typedef std::iterator<std::forward_iterator_tag,
+                        std::vector<typename GT::NodeType>, ptrdiff_t> super;
   typedef typename super::reference reference;
   typedef typename super::pointer pointer;
 
@@ -47,7 +48,7 @@ class scc_iterator
   // visitNum is the global counter.
   // nodeVisitNumbers are per-node visit numbers, also used as DFS flags.
   unsigned visitNum;
-  std::map<NodeType *, unsigned> nodeVisitNumbers;
+  DenseMap<NodeType *, unsigned> nodeVisitNumbers;
 
   // SCCNodeStack - Stack holding nodes of the SCC.
   std::vector<NodeType *> SCCNodeStack;
@@ -71,7 +72,7 @@ class scc_iterator
     SCCNodeStack.push_back(N);
     MinVisitNumStack.push_back(visitNum);
     VisitStack.push_back(std::make_pair(N, GT::child_begin(N)));
-    //DOUT << "TarjanSCC: Node " << N <<
+    //errs() << "TarjanSCC: Node " << N <<
     //      " : visitNum = " << visitNum << "\n";
   }
 
@@ -106,7 +107,7 @@ class scc_iterator
       if (!MinVisitNumStack.empty() && MinVisitNumStack.back() > minVisitNum)
         MinVisitNumStack.back() = minVisitNum;
 
-      //DOUT << "TarjanSCC: Popped node " << visitingN <<
+      //errs() << "TarjanSCC: Popped node " << visitingN <<
       //      " : minVisitNum = " << minVisitNum << "; Node visit num = " <<
       //      nodeVisitNumbers[visitingN] << "\n";
 
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index 964e7e07ef7d..6f4769260aa9 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -19,8 +19,8 @@
 
 #include <cstddef> // for std::size_t
 #include <functional>
+#include <iterator>
 #include <utility> // for std::pair
-#include "llvm/ADT/iterator.h"
 
 namespace llvm {
 
@@ -29,6 +29,13 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 
 template<class Ty>
+struct less_ptr : public std::binary_function<Ty, Ty, bool> {
+  bool operator()(const Ty* left, const Ty* right) const {
+    return *left < *right;
+  }
+};
+
+template<class Ty>
 struct greater_ptr : public std::binary_function<Ty, Ty, bool> {
   bool operator()(const Ty* left, const Ty* right) const {
     return *right < *left;
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index a189de2c2279..7d00e9a073e7 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -17,6 +17,7 @@
 
 #include <cassert>
 #include <cstring>
+#include <iterator>
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 
@@ -170,7 +171,14 @@ protected:
 template<typename PtrTy>
 class SmallPtrSetIterator : public SmallPtrSetIteratorImpl {
   typedef PointerLikeTypeTraits<PtrTy> PtrTraits;
+  
 public:
+  typedef PtrTy                     value_type;
+  typedef PtrTy                     reference;
+  typedef PtrTy                     pointer;
+  typedef std::ptrdiff_t            difference_type;
+  typedef std::forward_iterator_tag iterator_category;
+  
   explicit SmallPtrSetIterator(const void *const *BP)
     : SmallPtrSetIteratorImpl(BP) {}
 
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index caaa96c045f7..d03f1bef15b1 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -30,7 +30,7 @@ namespace llvm {
 template <typename T, unsigned N>
 class SmallSet {
   /// Use a SmallVector to hold the elements here (even though it will never
-  /// reach it's 'large' stage) to avoid calling the default ctors of elements
+  /// reach its 'large' stage) to avoid calling the default ctors of elements
   /// we will never use.
   SmallVector<T, N> Vector;
   std::set<T> Set;
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index 687fa2d26e24..035462515a83 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -15,8 +15,7 @@
 #define LLVM_ADT_SMALLSTRING_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DataTypes.h"
-#include <cstring>
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
@@ -37,73 +36,30 @@ public:
 
 
   // Extra methods.
-  const char *c_str() const {
-    SmallString *This = const_cast<SmallString*>(this);
-    // Ensure that there is a \0 at the end of the string.
-    This->reserve(this->size()+1);
-    This->End[0] = 0;
-    return this->begin();
-  }
+  StringRef str() const { return StringRef(this->begin(), this->size()); }
 
+  const char *c_str() {
+    this->push_back(0);
+    this->pop_back();
+    return this->data();
+  }
+  
   // Extra operators.
-  const SmallString &operator=(const char *RHS) {
+  const SmallString &operator=(StringRef RHS) {
     this->clear();
     return *this += RHS;
   }
 
-  SmallString &operator+=(const char *RHS) {
-    this->append(RHS, RHS+strlen(RHS));
+  SmallString &operator+=(StringRef RHS) {
+    this->append(RHS.begin(), RHS.end());
     return *this;
   }
   SmallString &operator+=(char C) {
     this->push_back(C);
     return *this;
   }
-
-  SmallString &append_uint_32(uint32_t N) {
-    char Buffer[20];
-    char *BufPtr = Buffer+20;
-
-    if (N == 0) *--BufPtr = '0';  // Handle special case.
-
-    while (N) {
-      *--BufPtr = '0' + char(N % 10);
-      N /= 10;
-    }
-    this->append(BufPtr, Buffer+20);
-    return *this;
-  }
-
-  SmallString &append_uint(uint64_t N) {
-    if (N == uint32_t(N))
-      return append_uint_32(uint32_t(N));
-
-    char Buffer[40];
-    char *BufPtr = Buffer+40;
-
-    if (N == 0) *--BufPtr = '0';  // Handle special case...
-
-    while (N) {
-      *--BufPtr = '0' + char(N % 10);
-      N /= 10;
-    }
-
-    this->append(BufPtr, Buffer+40);
-    return *this;
-  }
-
-  SmallString &append_sint(int64_t N) {
-    // TODO, wrong for minint64.
-    if (N < 0) {
-      this->push_back('-');
-      N = -N;
-    }
-    return append_uint(N);
-  }
-
 };
 
-
 }
 
 #endif
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index f59a438d3eb4..f3b4533b9429 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_ADT_SMALLVECTOR_H
 #define LLVM_ADT_SMALLVECTOR_H
 
-#include "llvm/ADT/iterator.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cassert>
@@ -122,11 +121,11 @@ public:
 
 
   reference operator[](unsigned idx) {
-    assert (Begin + idx < End);
+    assert(Begin + idx < End);
     return Begin[idx];
   }
   const_reference operator[](unsigned idx) const {
-    assert (Begin + idx < End);
+    assert(Begin + idx < End);
     return Begin[idx];
   }
 
@@ -399,6 +398,24 @@ public:
                                         RHS.begin(), RHS.end());
   }
 
+  /// capacity - Return the total number of elements in the currently allocated
+  /// buffer.
+  size_t capacity() const { return Capacity - Begin; }
+
+  /// set_size - Set the array size to \arg N, which the current array must have
+  /// enough capacity for.
+  ///
+  /// This does not construct or destroy any elements in the vector.
+  ///
+  /// Clients can use this in conjunction with capacity() to write past the end
+  /// of the buffer when they know that more elements are available, and only
+  /// update the size later. This avoids the cost of value initializing elements
+  /// which will only be overwritten.
+  void set_size(unsigned N) {
+    assert(N <= capacity());
+    End = Begin + N;
+  }
+
 private:
   /// isSmall - Return true if this is a smallvector which has not had dynamic
   /// memory allocated for it.
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 6230135131a7..b7a6873001e2 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -15,13 +15,14 @@
 #ifndef LLVM_ADT_SPARSEBITVECTOR_H
 #define LLVM_ADT_SPARSEBITVECTOR_H
 
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <climits>
 #include <cstring>
-#include "llvm/Support/DataTypes.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/ADT/ilist.h"
 
 namespace llvm {
 
@@ -41,7 +42,7 @@ namespace llvm {
 
 template <unsigned ElementSize = 128>
 struct SparseBitVectorElement
-  : ilist_node<SparseBitVectorElement<ElementSize> > {
+  : public ilist_node<SparseBitVectorElement<ElementSize> > {
 public:
   typedef unsigned long BitWord;
   enum {
@@ -887,7 +888,7 @@ operator-(const SparseBitVector<ElementSize> &LHS,
 
 // Dump a SparseBitVector to a stream
 template <unsigned ElementSize>
-void dump(const SparseBitVector<ElementSize> &LHS, llvm::OStream &out) {
+void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
   out << "[ ";
 
   typename SparseBitVector<ElementSize>::iterator bi;
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index e40e409802d2..3d1993c6b263 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -103,10 +103,6 @@ static inline std::string itostr(int64_t X) {
     return utostr(static_cast<uint64_t>(X));
 }
 
-static inline std::string itohexstr(int64_t X) {
-  return utohexstr(static_cast<uint64_t>(X));
-}
-
 static inline std::string ftostr(double V) {
   char Buffer[200];
   sprintf(Buffer, "%20.6e", V);
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index a15d24eeae25..73fd635ee2bc 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ADT_STRINGMAP_H
 #define LLVM_ADT_STRINGMAP_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include <cstring>
 #include <string>
@@ -95,12 +96,12 @@ protected:
   /// specified bucket will be non-null.  Otherwise, it will be null.  In either
   /// case, the FullHashValue field of the bucket will be set to the hash value
   /// of the string.
-  unsigned LookupBucketFor(const char *KeyStart, const char *KeyEnd);
+  unsigned LookupBucketFor(const StringRef &Key);
 
   /// FindKey - Look up the bucket that contains the specified key. If it exists
   /// in the map, return the bucket number of the key.  Otherwise return -1.
   /// This does not modify the map.
-  int FindKey(const char *KeyStart, const char *KeyEnd) const;
+  int FindKey(const StringRef &Key) const;
 
   /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
   /// delete it.  This aborts if the value isn't in the table.
@@ -108,7 +109,7 @@ protected:
 
   /// RemoveKey - Remove the StringMapEntry for the specified key from the
   /// table, returning it.  If the key is not in the table, this returns null.
-  StringMapEntryBase *RemoveKey(const char *KeyStart, const char *KeyEnd);
+  StringMapEntryBase *RemoveKey(const StringRef &Key);
 private:
   void init(unsigned Size);
 public:
@@ -136,6 +137,10 @@ public:
   StringMapEntry(unsigned strLen, const ValueTy &V)
     : StringMapEntryBase(strLen), second(V) {}
 
+  StringRef getKey() const { 
+    return StringRef(getKeyData(), getKeyLength()); 
+  }
+
   const ValueTy &getValue() const { return second; }
   ValueTy &getValue() { return second; }
 
@@ -277,75 +282,40 @@ public:
     return const_iterator(TheTable+NumBuckets, true);
   }
 
-  iterator find(const char *KeyStart, const char *KeyEnd) {
-    int Bucket = FindKey(KeyStart, KeyEnd);
+  iterator find(const StringRef &Key) {
+    int Bucket = FindKey(Key);
     if (Bucket == -1) return end();
     return iterator(TheTable+Bucket);
   }
-  iterator find(const char *Key) {
-    return find(Key, Key + strlen(Key));
-  }
-  iterator find(const std::string &Key) {
-    return find(Key.data(), Key.data() + Key.size());
-  }
 
-  const_iterator find(const char *KeyStart, const char *KeyEnd) const {
-    int Bucket = FindKey(KeyStart, KeyEnd);
+  const_iterator find(const StringRef &Key) const {
+    int Bucket = FindKey(Key);
     if (Bucket == -1) return end();
     return const_iterator(TheTable+Bucket);
   }
-  const_iterator find(const char *Key) const {
-    return find(Key, Key + strlen(Key));
-  }
-  const_iterator find(const std::string &Key) const {
-    return find(Key.data(), Key.data() + Key.size());
-  }
 
    /// lookup - Return the entry for the specified key, or a default
   /// constructed value if no such entry exists.
-  ValueTy lookup(const char *KeyStart, const char *KeyEnd) const {
-    const_iterator it = find(KeyStart, KeyEnd);
-    if (it != end())
-      return it->second;
-    return ValueTy();
-  }
-  ValueTy lookup(const char *Key) const {
-    const_iterator it = find(Key);
-    if (it != end())
-      return it->second;
-    return ValueTy();
-  }
-  ValueTy lookup(const std::string &Key) const {
+  ValueTy lookup(const StringRef &Key) const {
     const_iterator it = find(Key);
     if (it != end())
       return it->second;
     return ValueTy();
   }
 
-  ValueTy& operator[](const char *Key) {
-    return GetOrCreateValue(Key, Key + strlen(Key)).getValue();
-  }
-  ValueTy& operator[](const std::string &Key) {
-    return GetOrCreateValue(Key.data(), Key.data() + Key.size()).getValue();
+  ValueTy& operator[](const StringRef &Key) {
+    return GetOrCreateValue(Key).getValue();
   }
 
-  size_type count(const char *KeyStart, const char *KeyEnd) const {
-    return find(KeyStart, KeyEnd) == end() ? 0 : 1;
-  }
-  size_type count(const char *Key) const {
-    return count(Key, Key + strlen(Key));
-  }
-  size_type count(const std::string &Key) const {
-    return count(Key.data(), Key.data() + Key.size());
+  size_type count(const StringRef &Key) const {
+    return find(Key) == end() ? 0 : 1;
   }
 
   /// insert - Insert the specified key/value pair into the map.  If the key
   /// already exists in the map, return false and ignore the request, otherwise
   /// insert it and return true.
   bool insert(MapEntryTy *KeyValue) {
-    unsigned BucketNo =
-      LookupBucketFor(KeyValue->getKeyData(),
-                      KeyValue->getKeyData()+KeyValue->getKeyLength());
+    unsigned BucketNo = LookupBucketFor(KeyValue->getKey());
     ItemBucket &Bucket = TheTable[BucketNo];
     if (Bucket.Item && Bucket.Item != getTombstoneVal())
       return false;  // Already exists in map.
@@ -380,15 +350,15 @@ public:
   /// exists, return it.  Otherwise, default construct a value, insert it, and
   /// return.
   template <typename InitTy>
-  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
-                                            const char *KeyEnd,
+  StringMapEntry<ValueTy> &GetOrCreateValue(const StringRef &Key,
                                             InitTy Val) {
-    unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd);
+    unsigned BucketNo = LookupBucketFor(Key);
     ItemBucket &Bucket = TheTable[BucketNo];
     if (Bucket.Item && Bucket.Item != getTombstoneVal())
       return *static_cast<MapEntryTy*>(Bucket.Item);
 
-    MapEntryTy *NewItem = MapEntryTy::Create(KeyStart, KeyEnd, Allocator, Val);
+    MapEntryTy *NewItem =
+      MapEntryTy::Create(Key.begin(), Key.end(), Allocator, Val);
 
     if (Bucket.Item == getTombstoneVal())
       --NumTombstones;
@@ -403,9 +373,20 @@ public:
     return *NewItem;
   }
 
+  StringMapEntry<ValueTy> &GetOrCreateValue(const StringRef &Key) {
+    return GetOrCreateValue(Key, ValueTy());
+  }
+
+  template <typename InitTy>
+  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
+                                            const char *KeyEnd,
+                                            InitTy Val) {
+    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart), Val);
+  }
+
   StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
                                             const char *KeyEnd) {
-    return GetOrCreateValue(KeyStart, KeyEnd, ValueTy());
+    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart));
   }
 
   /// remove - Remove the specified key/value pair from the map, but do not
@@ -420,14 +401,7 @@ public:
     V.Destroy(Allocator);
   }
 
-  bool erase(const char *Key) {
-    iterator I = find(Key);
-    if (I == end()) return false;
-    erase(I);
-    return true;
-  }
-
-  bool erase(const std::string &Key) {
+  bool erase(const StringRef &Key) {
     iterator I = find(Key);
     if (I == end()) return false;
     erase(I);
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
new file mode 100644
index 000000000000..aa7d577da75a
--- /dev/null
+++ b/include/llvm/ADT/StringRef.h
@@ -0,0 +1,335 @@
+//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRINGREF_H
+#define LLVM_ADT_STRINGREF_H
+
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <string>
+
+namespace llvm {
+
+  /// StringRef - Represent a constant reference to a string, i.e. a character
+  /// array and a length, which need not be null terminated.
+  ///
+  /// This class does not own the string data, it is expected to be used in
+  /// situations where the character data resides in some other buffer, whose
+  /// lifetime extends past that of the StringRef. For this reason, it is not in
+  /// general safe to store a StringRef.
+  class StringRef {
+  public:
+    typedef const char *iterator;
+    static const size_t npos = ~size_t(0);
+    typedef size_t size_type;
+    
+  private:
+    /// The start of the string, in an external buffer.
+    const char *Data;
+
+    /// The length of the string.
+    size_t Length;
+
+  public:
+    /// @name Constructors
+    /// @{
+
+    /// Construct an empty string ref.
+    /*implicit*/ StringRef() : Data(0), Length(0) {}
+
+    /// Construct a string ref from a cstring.
+    /*implicit*/ StringRef(const char *Str) 
+      : Data(Str) { if (Str) Length = ::strlen(Str); else Length = 0; }
+ 
+    /// Construct a string ref from a pointer and length.
+    /*implicit*/ StringRef(const char *data, unsigned length)
+      : Data(data), Length(length) {}
+
+    /// Construct a string ref from an std::string.
+    /*implicit*/ StringRef(const std::string &Str) 
+      : Data(Str.c_str()), Length(Str.length()) {}
+
+    /// @}
+    /// @name Iterators
+    /// @{
+
+    iterator begin() const { return Data; }
+
+    iterator end() const { return Data + Length; }
+
+    /// @}
+    /// @name String Operations
+    /// @{
+
+    /// data - Get a pointer to the start of the string (which may not be null
+    /// terminated).
+    const char *data() const { return Data; }
+
+    /// empty - Check if the string is empty.
+    bool empty() const { return Length == 0; }
+
+    /// size - Get the string size.
+    size_t size() const { return Length; }
+
+    /// front - Get the first character in the string.
+    char front() const {
+      assert(!empty());
+      return Data[0];
+    }
+    
+    /// back - Get the last character in the string.
+    char back() const {
+      assert(!empty());
+      return Data[Length-1];
+    }
+
+    /// equals - Check for string equality, this is more efficient than
+    /// compare() when the relative ordering of inequal strings isn't needed.
+    bool equals(const StringRef &RHS) const {
+      return (Length == RHS.Length && 
+              memcmp(Data, RHS.Data, RHS.Length) == 0);
+    }
+
+    /// compare - Compare two strings; the result is -1, 0, or 1 if this string
+    /// is lexicographically less than, equal to, or greater than the \arg RHS.
+    int compare(const StringRef &RHS) const {
+      // Check the prefix for a mismatch.
+      if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length)))
+        return Res < 0 ? -1 : 1;
+
+      // Otherwise the prefixes match, so we only need to check the lengths.
+      if (Length == RHS.Length)
+        return 0;
+      return Length < RHS.Length ? -1 : 1;
+    }
+
+    /// str - Get the contents as an std::string.
+    std::string str() const { return std::string(Data, Length); }
+
+    /// @}
+    /// @name Operator Overloads
+    /// @{
+
+    char operator[](size_t Index) const { 
+      assert(Index < Length && "Invalid index!");
+      return Data[Index]; 
+    }
+
+    /// @}
+    /// @name Type Conversions
+    /// @{
+
+    operator std::string() const {
+      return str();
+    }
+
+    /// @}
+    /// @name String Predicates
+    /// @{
+
+    /// startswith - Check if this string starts with the given \arg Prefix.
+    bool startswith(const StringRef &Prefix) const { 
+      return substr(0, Prefix.Length).equals(Prefix);
+    }
+
+    /// endswith - Check if this string ends with the given \arg Suffix.
+    bool endswith(const StringRef &Suffix) const {
+      return slice(size() - Suffix.Length, size()).equals(Suffix);
+    }
+
+    /// @}
+    /// @name String Searching
+    /// @{
+
+    /// find - Search for the first character \arg C in the string.
+    ///
+    /// \return - The index of the first occurence of \arg C, or npos if not
+    /// found.
+    size_t find(char C) const {
+      for (size_t i = 0, e = Length; i != e; ++i)
+        if (Data[i] == C)
+          return i;
+      return npos;
+    }
+
+    /// find - Search for the first string \arg Str in the string.
+    ///
+    /// \return - The index of the first occurence of \arg Str, or npos if not
+    /// found.
+    size_t find(const StringRef &Str) const;
+    
+    /// rfind - Search for the last character \arg C in the string.
+    ///
+    /// \return - The index of the last occurence of \arg C, or npos if not
+    /// found.
+    size_t rfind(char C, size_t From = npos) const {
+      From = std::min(From, Length);
+      size_t i = From;
+      while (i != 0) {
+        --i;
+        if (Data[i] == C)
+          return i;
+      }
+      return npos;
+    }
+    
+    /// rfind - Search for the last string \arg Str in the string.
+    ///
+    /// \return - The index of the last occurence of \arg Str, or npos if not
+    /// found.
+    size_t rfind(const StringRef &Str) const;
+    
+    /// find_first_of - Find the first instance of the specified character or
+    /// return npos if not in string.  Same as find.
+    size_type find_first_of(char C) const { return find(C); }
+    
+    /// find_first_of - Find the first character from the string 'Chars' in the
+    /// current string or return npos if not in string.
+    size_type find_first_of(StringRef Chars) const;
+    
+    /// find_first_not_of - Find the first character in the string that is not
+    /// in the string 'Chars' or return npos if all are in string. Same as find.
+    size_type find_first_not_of(StringRef Chars) const;
+    
+    /// @}
+    /// @name Helpful Algorithms
+    /// @{
+    
+    /// count - Return the number of occurrences of \arg C in the string.
+    size_t count(char C) const {
+      size_t Count = 0;
+      for (size_t i = 0, e = Length; i != e; ++i)
+        if (Data[i] == C)
+          ++Count;
+      return Count;
+    }
+    
+    /// count - Return the number of non-overlapped occurrences of \arg Str in
+    /// the string.
+    size_t count(const StringRef &Str) const;
+    
+    /// getAsInteger - Parse the current string as an integer of the specified
+    /// radix.  If Radix is specified as zero, this does radix autosensing using
+    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
+    ///
+    /// If the string is invalid or if only a subset of the string is valid,
+    /// this returns true to signify the error.  The string is considered
+    /// erroneous if empty.
+    ///
+    bool getAsInteger(unsigned Radix, long long &Result) const;
+    bool getAsInteger(unsigned Radix, unsigned long long &Result) const;
+    bool getAsInteger(unsigned Radix, int &Result) const;
+    bool getAsInteger(unsigned Radix, unsigned &Result) const;
+
+    // TODO: Provide overloads for int/unsigned that check for overflow.
+    
+    /// @}
+    /// @name Substring Operations
+    /// @{
+
+    /// substr - Return a reference to the substring from [Start, Start + N).
+    ///
+    /// \param Start - The index of the starting character in the substring; if
+    /// the index is npos or greater than the length of the string then the
+    /// empty substring will be returned.
+    ///
+    /// \param N - The number of characters to included in the substring. If N
+    /// exceeds the number of characters remaining in the string, the string
+    /// suffix (starting with \arg Start) will be returned.
+    StringRef substr(size_t Start, size_t N = npos) const {
+      Start = std::min(Start, Length);
+      return StringRef(Data + Start, std::min(N, Length - Start));
+    }
+
+    /// slice - Return a reference to the substring from [Start, End).
+    ///
+    /// \param Start - The index of the starting character in the substring; if
+    /// the index is npos or greater than the length of the string then the
+    /// empty substring will be returned.
+    ///
+    /// \param End - The index following the last character to include in the
+    /// substring. If this is npos, or less than \arg Start, or exceeds the
+    /// number of characters remaining in the string, the string suffix
+    /// (starting with \arg Start) will be returned.
+    StringRef slice(size_t Start, size_t End) const {
+      Start = std::min(Start, Length);
+      End = std::min(std::max(Start, End), Length);
+      return StringRef(Data + Start, End - Start);
+    }
+
+    /// split - Split into two substrings around the first occurence of a
+    /// separator character.
+    ///
+    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
+    /// such that (*this == LHS + Separator + RHS) is true and RHS is
+    /// maximal. If \arg Separator is not in the string, then the result is a
+    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
+    ///
+    /// \param Separator - The character to split on.
+    /// \return - The split substrings.
+    std::pair<StringRef, StringRef> split(char Separator) const {
+      size_t Idx = find(Separator);
+      if (Idx == npos)
+        return std::make_pair(*this, StringRef());
+      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
+    }
+
+    /// rsplit - Split into two substrings around the last occurence of a
+    /// separator character.
+    ///
+    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
+    /// such that (*this == LHS + Separator + RHS) is true and RHS is
+    /// minimal. If \arg Separator is not in the string, then the result is a
+    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
+    ///
+    /// \param Separator - The character to split on.
+    /// \return - The split substrings.
+    std::pair<StringRef, StringRef> rsplit(char Separator) const {
+      size_t Idx = rfind(Separator);
+      if (Idx == npos)
+        return std::make_pair(*this, StringRef());
+      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
+    }
+
+    /// @}
+  };
+
+  /// @name StringRef Comparison Operators
+  /// @{
+
+  inline bool operator==(const StringRef &LHS, const StringRef &RHS) {
+    return LHS.equals(RHS);
+  }
+
+  inline bool operator!=(const StringRef &LHS, const StringRef &RHS) { 
+    return !(LHS == RHS);
+  }
+  
+  inline bool operator<(const StringRef &LHS, const StringRef &RHS) {
+    return LHS.compare(RHS) == -1; 
+  }
+
+  inline bool operator<=(const StringRef &LHS, const StringRef &RHS) {
+    return LHS.compare(RHS) != 1; 
+  }
+
+  inline bool operator>(const StringRef &LHS, const StringRef &RHS) {
+    return LHS.compare(RHS) == 1; 
+  }
+
+  inline bool operator>=(const StringRef &LHS, const StringRef &RHS) {
+    return LHS.compare(RHS) != -1; 
+  }
+
+  /// @}
+
+}
+
+#endif
diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h
index ed94f9de2d1c..cf92862c20d9 100644
--- a/include/llvm/ADT/Trie.h
+++ b/include/llvm/ADT/Trie.h
@@ -118,12 +118,12 @@ public:
 
 #if 0
     inline void dump() {
-      std::cerr << "Node: " << this << "\n"
+      llvm::cerr << "Node: " << this << "\n"
                 << "Label: " << Label << "\n"
                 << "Children:\n";
 
       for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
-        std::cerr << (*I)->Label << "\n";
+        llvm::cerr << (*I)->Label << "\n";
     }
 #endif
 
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 96c035795849..89736bcc4565 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -10,9 +10,17 @@
 #ifndef LLVM_ADT_TRIPLE_H
 #define LLVM_ADT_TRIPLE_H
 
+#include "llvm/ADT/StringRef.h"
 #include <string>
 
+// Some system headers or GCC predefined macros conflict with identifiers in
+// this file.  Undefine them here.
+#undef mips
+#undef sparc
+
 namespace llvm {
+class StringRef;
+class Twine;
 
 /// Triple - Helper class for working with target triples.
 ///
@@ -26,17 +34,44 @@ namespace llvm {
 /// behavior for particular targets. This class isolates the mapping
 /// from the components of the target triple to well known IDs.
 ///
-/// See autoconf/config.guess for a glimpse into what they look like
-/// in practice.
+/// At its core the Triple class is designed to be a wrapper for a triple
+/// string; it does not normally change or normalize the triple string, instead
+/// it provides additional APIs to parse normalized parts out of the triple.
+///
+/// One curiosity this implies is that for some odd triples the results of,
+/// e.g., getOSName() can be very different from the result of getOS().  For
+/// example, for 'i386-mingw32', getOS() will return MinGW32, but since
+/// getOSName() is purely based on the string structure that will return the
+/// empty string.
+///
+/// Clients should generally avoid using getOSName() and related APIs unless
+/// they are familiar with the triple format (this is particularly true when
+/// rewriting a triple).
+///
+/// See autoconf/config.guess for a glimpse into what they look like in
+/// practice.
 class Triple {
 public:
   enum ArchType {
     UnknownArch,
     
-    x86,    // i?86
-    ppc,    // powerpc
-    ppc64,  // powerpc64
-    x86_64, // amd64, x86_64
+    alpha,   // Alpha: alpha
+    arm,     // ARM; arm, armv.*, xscale
+    bfin,    // Blackfin: bfin
+    cellspu, // CellSPU: spu, cellspu
+    mips,    // MIPS: mips, mipsallegrex
+    mipsel,  // MIPSEL: mipsel, mipsallegrexel, psp
+    msp430,  // MSP430: msp430
+    pic16,   // PIC16: pic16
+    ppc,     // PPC: powerpc
+    ppc64,   // PPC64: powerpc64
+    sparc,   // Sparc: sparc
+    systemz, // SystemZ: s390x
+    tce,     // TCE (http://tce.cs.tut.fi/): tce
+    thumb,   // Thumb: thumb, thumbv.*
+    x86,     // X86: i[3-9]86
+    x86_64,  // X86-64: amd64, x86_64
+    xcore,   // XCore: xcore
 
     InvalidArch
   };
@@ -50,11 +85,17 @@ public:
     UnknownOS,
 
     AuroraUX,
+    Cygwin,
     Darwin,
     DragonFly,
     FreeBSD,
     Linux,
-    OpenBSD
+    MinGW32,
+    MinGW64,
+    NetBSD,
+    OpenBSD,
+    Solaris,
+    Win32
   };
   
 private:
@@ -76,9 +117,9 @@ public:
   /// @name Constructors
   /// @{
   
-  Triple() : Data(""), Arch(InvalidArch) {}
-  explicit Triple(const char *Str) : Data(Str), Arch(InvalidArch) {}
-  explicit Triple(const char *ArchStr, const char *VendorStr, const char *OSStr)
+  Triple() : Data(), Arch(InvalidArch) {}
+  explicit Triple(StringRef Str) : Data(Str), Arch(InvalidArch) {}
+  explicit Triple(StringRef ArchStr, StringRef VendorStr, StringRef OSStr)
     : Data(ArchStr), Arch(InvalidArch) {
     Data += '-';
     Data += VendorStr;
@@ -120,29 +161,41 @@ public:
 
   const std::string &getTriple() const { return Data; }
 
-  // FIXME: Invent a lightweight string representation for these to
-  // use.
-
   /// getArchName - Get the architecture (first) component of the
   /// triple.
-  std::string getArchName() const;
+  StringRef getArchName() const;
 
   /// getVendorName - Get the vendor (second) component of the triple.
-  std::string getVendorName() const;
+  StringRef getVendorName() const;
 
   /// getOSName - Get the operating system (third) component of the
   /// triple.
-  std::string getOSName() const;
+  StringRef getOSName() const;
 
   /// getEnvironmentName - Get the optional environment (fourth)
   /// component of the triple, or "" if empty.
-  std::string getEnvironmentName() const;
+  StringRef getEnvironmentName() const;
 
   /// getOSAndEnvironmentName - Get the operating system and optional
   /// environment components as a single string (separated by a '-'
   /// if the environment component is present).
-  std::string getOSAndEnvironmentName() const;
+  StringRef getOSAndEnvironmentName() const;
 
+  
+  /// getDarwinNumber - Parse the 'darwin number' out of the specific target
+  /// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
+  /// not defined, return 0's.  This requires that the triple have an OSType of
+  /// darwin before it is called.
+  void getDarwinNumber(unsigned &Maj, unsigned &Min, unsigned &Revision) const;
+  
+  /// getDarwinMajorNumber - Return just the major version number, this is
+  /// specialized because it is a common query.
+  unsigned getDarwinMajorNumber() const {
+    unsigned Maj, Min, Rev;
+    getDarwinNumber(Maj, Min, Rev);
+    return Maj;
+  }
+  
   /// @}
   /// @name Mutators
   /// @{
@@ -160,27 +213,27 @@ public:
   void setOS(OSType Kind);
 
   /// setTriple - Set all components to the new triple \arg Str.
-  void setTriple(const std::string &Str);
+  void setTriple(const Twine &Str);
 
   /// setArchName - Set the architecture (first) component of the
   /// triple by name.
-  void setArchName(const std::string &Str);
+  void setArchName(const StringRef &Str);
 
   /// setVendorName - Set the vendor (second) component of the triple
   /// by name.
-  void setVendorName(const std::string &Str);
+  void setVendorName(const StringRef &Str);
 
   /// setOSName - Set the operating system (third) component of the
   /// triple by name.
-  void setOSName(const std::string &Str);
+  void setOSName(const StringRef &Str);
 
   /// setEnvironmentName - Set the optional environment (fourth)
   /// component of the triple by name.
-  void setEnvironmentName(const std::string &Str);
+  void setEnvironmentName(const StringRef &Str);
 
   /// setOSAndEnvironmentName - Set the operating system and optional
   /// environment components with a single string.
-  void setOSAndEnvironmentName(const std::string &Str);
+  void setOSAndEnvironmentName(const StringRef &Str);
 
   /// @}
   /// @name Static helpers for IDs.
@@ -190,6 +243,14 @@ public:
   /// architecture.
   static const char *getArchTypeName(ArchType Kind);
 
+  /// getArchTypePrefix - Get the "prefix" canonical name for the \arg Kind
+  /// architecture. This is the prefix used by the architecture specific
+  /// builtins, and is suitable for passing to \see
+  /// Intrinsic::getIntrinsicForGCCBuiltin().
+  ///
+  /// \return - The architecture prefix, or 0 if none is defined.
+  static const char *getArchTypePrefix(ArchType Kind);
+
   /// getVendorTypeName - Get the canonical name for the \arg Kind
   /// vendor.
   static const char *getVendorTypeName(VendorType Kind);
@@ -198,6 +259,19 @@ public:
   static const char *getOSTypeName(OSType Kind);
 
   /// @}
+  /// @name Static helpers for converting alternate architecture names.
+  /// @{
+
+  /// getArchTypeForLLVMName - The canonical type for the given LLVM
+  /// architecture name (e.g., "x86").
+  static ArchType getArchTypeForLLVMName(const StringRef &Str);
+
+  /// getArchTypeForDarwinArchName - Get the architecture type for a "Darwin"
+  /// architecture name, for example as accepted by "gcc -arch" (see also
+  /// arch(3)).
+  static ArchType getArchTypeForDarwinArchName(const StringRef &Str);
+
+  /// @}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
new file mode 100644
index 000000000000..88fde0a54ae6
--- /dev/null
+++ b/include/llvm/ADT/Twine.h
@@ -0,0 +1,422 @@
+//===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TWINE_H
+#define LLVM_ADT_TWINE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <string>
+
+namespace llvm {
+  template <typename T>
+  class SmallVectorImpl;
+  class StringRef;
+  class raw_ostream;
+
+  /// Twine - A lightweight data structure for efficiently representing the
+  /// concatenation of temporary values as strings.
+  ///
+  /// A Twine is a kind of rope, it represents a concatenated string using a
+  /// binary-tree, where the string is the preorder of the nodes. Since the
+  /// Twine can be efficiently rendered into a buffer when its result is used,
+  /// it avoids the cost of generating temporary values for intermediate string
+  /// results -- particularly in cases when the Twine result is never
+  /// required. By explicitly tracking the type of leaf nodes, we can also avoid
+  /// the creation of temporary strings for conversions operations (such as
+  /// appending an integer to a string).
+  ///
+  /// A Twine is not intended for use directly and should not be stored, its
+  /// implementation relies on the ability to store pointers to temporary stack
+  /// objects which may be deallocated at the end of a statement. Twines should
+  /// only be used accepted as const references in arguments, when an API wishes
+  /// to accept possibly-concatenated strings.
+  ///
+  /// Twines support a special 'null' value, which always concatenates to form
+  /// itself, and renders as an empty string. This can be returned from APIs to
+  /// effectively nullify any concatenations performed on the result.
+  /// 
+  /// \b Implementation \n
+  ///
+  /// Given the nature of a Twine, it is not possible for the Twine's
+  /// concatenation method to construct interior nodes; the result must be
+  /// represented inside the returned value. For this reason a Twine object
+  /// actually holds two values, the left- and right-hand sides of a
+  /// concatenation. We also have nullary Twine objects, which are effectively
+  /// sentinel values that represent empty strings.
+  ///
+  /// Thus, a Twine can effectively have zero, one, or two children. The \see
+  /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
+  /// testing the number of children.
+  ///
+  /// We maintain a number of invariants on Twine objects (FIXME: Why):
+  ///  - Nullary twines are always represented with their Kind on the left-hand
+  ///    side, and the Empty kind on the right-hand side.
+  ///  - Unary twines are always represented with the value on the left-hand
+  ///    side, and the Empty kind on the right-hand side.
+  ///  - If a Twine has another Twine as a child, that child should always be
+  ///    binary (otherwise it could have been folded into the parent).
+  ///
+  /// These invariants are check by \see isValid().
+  ///
+  /// \b Efficiency Considerations \n
+  ///
+  /// The Twine is designed to yield efficient and small code for common
+  /// situations. For this reason, the concat() method is inlined so that
+  /// concatenations of leaf nodes can be optimized into stores directly into a
+  /// single stack allocated object.
+  ///
+  /// In practice, not all compilers can be trusted to optimize concat() fully,
+  /// so we provide two additional methods (and accompanying operator+
+  /// overloads) to guarantee that particularly important cases (cstring plus
+  /// StringRef) codegen as desired.
+  class Twine {
+    /// NodeKind - Represent the type of an argument.
+    enum NodeKind {
+      /// An empty string; the result of concatenating anything with it is also
+      /// empty.
+      NullKind,
+
+      /// The empty string.
+      EmptyKind,
+
+      /// A pointer to a Twine instance.
+      TwineKind,
+
+      /// A pointer to a C string instance.
+      CStringKind,
+
+      /// A pointer to an std::string instance.
+      StdStringKind,
+
+      /// A pointer to a StringRef instance.
+      StringRefKind,
+
+      /// A pointer to an unsigned int value, to render as an unsigned decimal
+      /// integer.
+      DecUIKind,
+
+      /// A pointer to an int value, to render as a signed decimal integer.
+      DecIKind,
+
+      /// A pointer to an unsigned long value, to render as an unsigned decimal
+      /// integer.
+      DecULKind,
+
+      /// A pointer to a long value, to render as a signed decimal integer.
+      DecLKind,
+
+      /// A pointer to an unsigned long long value, to render as an unsigned
+      /// decimal integer.
+      DecULLKind,
+
+      /// A pointer to a long long value, to render as a signed decimal integer.
+      DecLLKind,
+
+      /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
+      /// integer.
+      UHexKind
+    };
+
+  private:
+    /// LHS - The prefix in the concatenation, which may be uninitialized for
+    /// Null or Empty kinds.
+    const void *LHS;
+    /// RHS - The suffix in the concatenation, which may be uninitialized for
+    /// Null or Empty kinds.
+    const void *RHS;
+    /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
+    NodeKind LHSKind : 8;
+    /// RHSKind - The NodeKind of the left hand side, \see getLHSKind().
+    NodeKind RHSKind : 8;
+
+  private:
+    /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
+    explicit Twine(NodeKind Kind)
+      : LHSKind(Kind), RHSKind(EmptyKind) {
+      assert(isNullary() && "Invalid kind!");
+    }
+
+    /// Construct a binary twine.
+    explicit Twine(const Twine &_LHS, const Twine &_RHS)
+      : LHS(&_LHS), RHS(&_RHS), LHSKind(TwineKind), RHSKind(TwineKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct a twine from explicit values.
+    explicit Twine(const void *_LHS, NodeKind _LHSKind,
+                   const void *_RHS, NodeKind _RHSKind)
+      : LHS(_LHS), RHS(_RHS), LHSKind(_LHSKind), RHSKind(_RHSKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// isNull - Check for the null twine.
+    bool isNull() const {
+      return getLHSKind() == NullKind;
+    }
+
+    /// isEmpty - Check for the empty twine.
+    bool isEmpty() const {
+      return getLHSKind() == EmptyKind;
+    }
+
+    /// isNullary - Check if this is a nullary twine (null or empty).
+    bool isNullary() const {
+      return isNull() || isEmpty();
+    }
+
+    /// isUnary - Check if this is a unary twine.
+    bool isUnary() const {
+      return getRHSKind() == EmptyKind && !isNullary();
+    }
+
+    /// isBinary - Check if this is a binary twine.
+    bool isBinary() const {
+      return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
+    }
+
+    /// isValid - Check if this is a valid twine (satisfying the invariants on
+    /// order and number of arguments).
+    bool isValid() const {
+      // Nullary twines always have Empty on the RHS.
+      if (isNullary() && getRHSKind() != EmptyKind)
+        return false;
+
+      // Null should never appear on the RHS.
+      if (getRHSKind() == NullKind)
+        return false;
+
+      // The RHS cannot be non-empty if the LHS is empty.
+      if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
+        return false;
+
+      // A twine child should always be binary.
+      if (getLHSKind() == TwineKind &&
+          !static_cast<const Twine*>(LHS)->isBinary())
+        return false;
+      if (getRHSKind() == TwineKind &&
+          !static_cast<const Twine*>(RHS)->isBinary())
+        return false;
+
+      return true;
+    }
+
+    /// getLHSKind - Get the NodeKind of the left-hand side.
+    NodeKind getLHSKind() const { return LHSKind; }
+
+    /// getRHSKind - Get the NodeKind of the left-hand side.
+    NodeKind getRHSKind() const { return RHSKind; }
+
+    /// printOneChild - Print one child from a twine.
+    void printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const;
+
+    /// printOneChildRepr - Print the representation of one child from a twine.
+    void printOneChildRepr(raw_ostream &OS, const void *Ptr,
+                           NodeKind Kind) const;
+
+  public:
+    /// @name Constructors
+    /// @{
+
+    /// Construct from an empty string.
+    /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct from a C string.
+    ///
+    /// We take care here to optimize "" into the empty twine -- this will be
+    /// optimized out for string constants. This allows Twine arguments have
+    /// default "" values, without introducing unnecessary string constants.
+    /*implicit*/ Twine(const char *Str)
+      : RHSKind(EmptyKind) {
+      if (Str[0] != '\0') {
+        LHS = Str;
+        LHSKind = CStringKind;
+      } else
+        LHSKind = EmptyKind;
+
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct from an std::string.
+    /*implicit*/ Twine(const std::string &Str)
+      : LHS(&Str), LHSKind(StdStringKind), RHSKind(EmptyKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct from a StringRef.
+    /*implicit*/ Twine(const StringRef &Str)
+      : LHS(&Str), LHSKind(StringRefKind), RHSKind(EmptyKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct a twine to print \arg Val as an unsigned decimal integer.
+    explicit Twine(const unsigned int &Val) 
+      : LHS(&Val), LHSKind(DecUIKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as a signed decimal integer.
+    explicit Twine(const int &Val) 
+      : LHS(&Val), LHSKind(DecIKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as an unsigned decimal integer.
+    explicit Twine(const unsigned long &Val) 
+      : LHS(&Val), LHSKind(DecULKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as a signed decimal integer.
+    explicit Twine(const long &Val) 
+      : LHS(&Val), LHSKind(DecLKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as an unsigned decimal integer.
+    explicit Twine(const unsigned long long &Val) 
+      : LHS(&Val), LHSKind(DecULLKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as a signed decimal integer.
+    explicit Twine(const long long &Val) 
+      : LHS(&Val), LHSKind(DecLLKind), RHSKind(EmptyKind) {
+    }
+
+    // FIXME: Unfortunately, to make sure this is as efficient as possible we
+    // need extra binary constructors from particular types. We can't rely on
+    // the compiler to be smart enough to fold operator+()/concat() down to the
+    // right thing. Yet.
+
+    /// Construct as the concatenation of a C string and a StringRef.
+    /*implicit*/ Twine(const char *_LHS, const StringRef &_RHS)
+      : LHS(_LHS), RHS(&_RHS), LHSKind(CStringKind), RHSKind(StringRefKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct as the concatenation of a StringRef and a C string.
+    /*implicit*/ Twine(const StringRef &_LHS, const char *_RHS)
+      : LHS(&_LHS), RHS(_RHS), LHSKind(StringRefKind), RHSKind(CStringKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Create a 'null' string, which is an empty string that always
+    /// concatenates to form another empty string.
+    static Twine createNull() {
+      return Twine(NullKind);
+    }
+
+    /// @}
+    /// @name Numeric Conversions
+    /// @{
+
+    // Construct a twine to print \arg Val as an unsigned hexadecimal integer.
+    static Twine utohexstr(const uint64_t &Val) {
+      return Twine(&Val, UHexKind, 0, EmptyKind);
+    }
+
+    /// @}
+    /// @name Predicate Operations
+    /// @{
+
+    /// isTriviallyEmpty - Check if this twine is trivially empty; a false
+    /// return value does not necessarily mean the twine is empty.
+    bool isTriviallyEmpty() const {
+      return isNullary();
+    }
+
+    /// @}
+    /// @name String Operations
+    /// @{
+
+    Twine concat(const Twine &Suffix) const;
+
+    /// @}
+    /// @name Output & Conversion.
+    /// @{
+
+    /// str - Return the twine contents as a std::string.
+    std::string str() const;
+
+    /// toVector - Write the concatenated string into the given SmallString or
+    /// SmallVector.
+    void toVector(SmallVectorImpl<char> &Out) const;
+
+    /// print - Write the concatenated string represented by this twine to the
+    /// stream \arg OS.
+    void print(raw_ostream &OS) const;
+
+    /// dump - Dump the concatenated string represented by this twine to stderr.
+    void dump() const;
+
+    /// print - Write the representation of this twine to the stream \arg OS.
+    void printRepr(raw_ostream &OS) const;
+
+    /// dumpRepr - Dump the representation of this twine to stderr.
+    void dumpRepr() const;
+
+    /// @}
+  };
+
+  /// @name Twine Inline Implementations
+  /// @{
+
+  inline Twine Twine::concat(const Twine &Suffix) const {
+    // Concatenation with null is null.
+    if (isNull() || Suffix.isNull())
+      return Twine(NullKind);
+
+    // Concatenation with empty yields the other side.
+    if (isEmpty())
+      return Suffix;
+    if (Suffix.isEmpty())
+      return *this;
+
+    // Otherwise we need to create a new node, taking care to fold in unary
+    // twines.
+    const void *NewLHS = this, *NewRHS = &Suffix;
+    NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
+    if (isUnary()) {
+      NewLHS = LHS;
+      NewLHSKind = getLHSKind();
+    }
+    if (Suffix.isUnary()) {
+      NewRHS = Suffix.LHS;
+      NewRHSKind = Suffix.getLHSKind();
+    }
+
+    return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
+  }
+
+  inline Twine operator+(const Twine &LHS, const Twine &RHS) {
+    return LHS.concat(RHS);
+  }
+
+  /// Additional overload to guarantee simplified codegen; this is equivalent to
+  /// concat().
+
+  inline Twine operator+(const char *LHS, const StringRef &RHS) {
+    return Twine(LHS, RHS);
+  }
+
+  /// Additional overload to guarantee simplified codegen; this is equivalent to
+  /// concat().
+
+  inline Twine operator+(const StringRef &LHS, const char *RHS) {
+    return Twine(LHS, RHS);
+  }
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
+    RHS.print(OS);
+    return OS;
+  }
+
+  /// @}
+}
+
+#endif
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index b95e3e04e81f..b3824a217c74 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -38,8 +38,8 @@
 #ifndef LLVM_ADT_ILIST_H
 #define LLVM_ADT_ILIST_H
 
-#include "llvm/ADT/iterator.h"
 #include <cassert>
+#include <iterator>
 
 namespace llvm {
 
@@ -121,15 +121,15 @@ struct ilist_node_traits {
 /// for all common operations.
 ///
 template<typename NodeTy>
-struct ilist_default_traits : ilist_nextprev_traits<NodeTy>,
-                              ilist_sentinel_traits<NodeTy>,
-                              ilist_node_traits<NodeTy> {
+struct ilist_default_traits : public ilist_nextprev_traits<NodeTy>,
+                              public ilist_sentinel_traits<NodeTy>,
+                              public ilist_node_traits<NodeTy> {
 };
 
 // Template traits for intrusive list.  By specializing this template class, you
 // can change what next/prev fields are used to store the links...
 template<typename NodeTy>
-struct ilist_traits : ilist_default_traits<NodeTy> {};
+struct ilist_traits : public ilist_default_traits<NodeTy> {};
 
 // Const traits are the same as nonconst traits...
 template<typename Ty>
@@ -140,11 +140,12 @@ struct ilist_traits<const Ty> : public ilist_traits<Ty> {};
 //
 template<typename NodeTy>
 class ilist_iterator
-  : public bidirectional_iterator<NodeTy, ptrdiff_t> {
+  : public std::iterator<std::bidirectional_iterator_tag, NodeTy, ptrdiff_t> {
 
 public:
   typedef ilist_traits<NodeTy> Traits;
-  typedef bidirectional_iterator<NodeTy, ptrdiff_t> super;
+  typedef std::iterator<std::bidirectional_iterator_tag,
+                        NodeTy, ptrdiff_t> super;
 
   typedef typename super::value_type value_type;
   typedef typename super::difference_type difference_type;
@@ -189,12 +190,10 @@ public:
 
   // Accessors...
   operator pointer() const {
-    assert(Traits::getNext(NodePtr) != 0 && "Dereferencing end()!");
     return NodePtr;
   }
 
   reference operator*() const {
-    assert(Traits::getNext(NodePtr) != 0 && "Dereferencing end()!");
     return *NodePtr;
   }
   pointer operator->() const { return &operator*(); }
@@ -215,7 +214,6 @@ public:
   }
   ilist_iterator &operator++() {      // preincrement - Advance
     NodePtr = Traits::getNext(NodePtr);
-    assert(NodePtr && "++'d off the end of an ilist!");
     return *this;
   }
   ilist_iterator operator--(int) {    // postdecrement operators...
@@ -323,13 +321,13 @@ class iplist : public Traits {
   /// CreateLazySentinel - This method verifies whether the sentinel for the
   /// list has been created and lazily makes it if not.
   void CreateLazySentinel() const {
-    this->Traits::ensureHead(Head);
+    this->ensureHead(Head);
   }
 
   static bool op_less(NodeTy &L, NodeTy &R) { return L < R; }
   static bool op_equal(NodeTy &L, NodeTy &R) { return L == R; }
 
-  // No fundamental reason why iplist can't by copyable, but the default
+  // No fundamental reason why iplist can't be copyable, but the default
   // copy/copy-assign won't do.
   iplist(const iplist &);         // do not implement
   void operator=(const iplist &); // do not implement
@@ -347,7 +345,7 @@ public:
   typedef std::reverse_iterator<const_iterator>  const_reverse_iterator;
   typedef std::reverse_iterator<iterator>  reverse_iterator;
 
-  iplist() : Head(this->Traits::provideInitialHead()) {}
+  iplist() : Head(this->provideInitialHead()) {}
   ~iplist() {
     if (!Head) return;
     clear();
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index dae7475ffa01..da25f959e612 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -18,28 +18,37 @@
 namespace llvm {
 
 template<typename NodeTy>
-struct ilist_nextprev_traits;
+struct ilist_traits;
 
+/// ilist_half_node - Base class that provides prev services for sentinels.
+///
 template<typename NodeTy>
-struct ilist_traits;
+class ilist_half_node {
+  friend struct ilist_traits<NodeTy>;
+  NodeTy *Prev;
+protected:
+  NodeTy *getPrev() { return Prev; }
+  const NodeTy *getPrev() const { return Prev; }
+  void setPrev(NodeTy *P) { Prev = P; }
+  ilist_half_node() : Prev(0) {}
+};
+
+template<typename NodeTy>
+struct ilist_nextprev_traits;
 
 /// ilist_node - Base class that provides next/prev services for nodes
 /// that use ilist_nextprev_traits or ilist_default_traits.
 ///
 template<typename NodeTy>
-class ilist_node {
-private:
+class ilist_node : private ilist_half_node<NodeTy> {
   friend struct ilist_nextprev_traits<NodeTy>;
   friend struct ilist_traits<NodeTy>;
-  NodeTy *Prev, *Next;
-  NodeTy *getPrev() { return Prev; }
+  NodeTy *Next;
   NodeTy *getNext() { return Next; }
-  const NodeTy *getPrev() const { return Prev; }
   const NodeTy *getNext() const { return Next; }
-  void setPrev(NodeTy *N) { Prev = N; }
   void setNext(NodeTy *N) { Next = N; }
 protected:
-  ilist_node() : Prev(0), Next(0) {}
+  ilist_node() : Next(0) {}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/AbstractTypeUser.h b/include/llvm/AbstractTypeUser.h
index c1216baabf8f..b6cceb4011ad 100644
--- a/include/llvm/AbstractTypeUser.h
+++ b/include/llvm/AbstractTypeUser.h
@@ -31,6 +31,7 @@
 
 namespace llvm {
 
+class Value;
 class Type;
 class DerivedType;
 template<typename T> struct simplify_type;
@@ -55,6 +56,12 @@ template<typename T> struct simplify_type;
 class AbstractTypeUser {
 protected:
   virtual ~AbstractTypeUser();                        // Derive from me
+
+  /// setType - It's normally not possible to change a Value's type in place,
+  /// but an AbstractTypeUser subclass that knows what its doing can be
+  /// permitted to do so with care.
+  void setType(Value *V, const Type *NewTy);
+
 public:
 
   /// refineAbstractType - The callback method invoked when an abstract type is
@@ -65,7 +72,7 @@ public:
                                   const Type *NewTy) = 0;
 
   /// The other case which AbstractTypeUsers must be aware of is when a type
-  /// makes the transition from being abstract (where it has clients on it's
+  /// makes the transition from being abstract (where it has clients on its
   /// AbstractTypeUsers list) to concrete (where it does not).  This method
   /// notifies ATU's when this occurs for a type.
   ///
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index ba040e1393bf..be7d5ee37b80 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -56,8 +56,7 @@ protected:
   void InitializeAliasAnalysis(Pass *P);
 
   /// getAnalysisUsage - All alias analysis implementations should invoke this
-  /// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that
-  /// TargetData is required by the pass.
+  /// directly (using AliasAnalysis::getAnalysisUsage(AU)).
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
 public:
@@ -65,11 +64,15 @@ public:
   AliasAnalysis() : TD(0), AA(0) {}
   virtual ~AliasAnalysis();  // We want to be subclassed
 
-  /// getTargetData - Every alias analysis implementation depends on the size of
-  /// data items in the current Target.  This provides a uniform way to handle
-  /// it.
+  /// getTargetData - Return a pointer to the current TargetData object, or
+  /// null if no TargetData object is available.
   ///
-  const TargetData &getTargetData() const { return *TD; }
+  const TargetData *getTargetData() const { return TD; }
+
+  /// getTypeStoreSize - Return the TargetData store size for the given type,
+  /// if known, or a conservative value otherwise.
+  ///
+  unsigned getTypeStoreSize(const Type *Ty);
 
   //===--------------------------------------------------------------------===//
   /// Alias Queries...
@@ -344,7 +347,7 @@ bool isNoAliasCall(const Value *V);
 
 /// isIdentifiedObject - Return true if this pointer refers to a distinct and
 /// identifiable object.  This returns true for:
-///    Global Variables and Functions
+///    Global Variables and Functions (but not Global Aliases)
 ///    Allocas and Mallocs
 ///    ByVal and NoAlias Arguments
 ///    NoAlias returns
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 786c1d15ba1a..239f30f9384e 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -18,9 +18,8 @@
 #define LLVM_ANALYSIS_ALIASSETTRACKER_H
 
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include <vector>
@@ -155,12 +154,12 @@ public:
   iterator end()   const { return iterator(); }
   bool empty() const { return PtrList == 0; }
 
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
   void dump() const;
 
   /// Define an iterator for alias sets... this is just a forward iterator.
-  class iterator : public forward_iterator<PointerRec, ptrdiff_t> {
+  class iterator : public std::iterator<std::forward_iterator_tag,
+                                        PointerRec, ptrdiff_t> {
     PointerRec *CurNode;
   public:
     explicit iterator(PointerRec *CN = 0) : CurNode(CN) {}
@@ -245,18 +244,38 @@ private:
   bool aliasesCallSite(CallSite CS, AliasAnalysis &AA) const;
 };
 
-inline std::ostream& operator<<(std::ostream &OS, const AliasSet &AS) {
+inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {
   AS.print(OS);
   return OS;
 }
 
 
 class AliasSetTracker {
+  /// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be
+  /// notified whenever a Value is deleted.
+  class ASTCallbackVH : public CallbackVH {
+    AliasSetTracker *AST;
+    virtual void deleted();
+  public:
+    ASTCallbackVH(Value *V, AliasSetTracker *AST = 0);
+    ASTCallbackVH &operator=(Value *V);
+  };
+  /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that ASTCallbackVH
+  /// is not a POD (it needs its destructor called).
+  struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {
+    static bool isPod() { return false; }
+  };
+
   AliasAnalysis &AA;
   ilist<AliasSet> AliasSets;
 
+  typedef DenseMap<ASTCallbackVH, AliasSet::PointerRec*,
+                   ASTCallbackVHDenseMapInfo>
+    PointerMapType;
+
   // Map from pointers to their node
-  DenseMap<Value*, AliasSet::PointerRec*> PointerMap;
+  PointerMapType PointerMap;
+
 public:
   /// AliasSetTracker ctor - Create an empty collection of AliasSets, and use
   /// the specified alias analysis object to disambiguate load and store
@@ -354,8 +373,7 @@ public:
   iterator begin() { return AliasSets.begin(); }
   iterator end()   { return AliasSets.end(); }
 
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
   void dump() const;
 
 private:
@@ -365,7 +383,7 @@ private:
   // getEntryFor - Just like operator[] on the map, except that it creates an
   // entry for the pointer if it doesn't already exist.
   AliasSet::PointerRec &getEntryFor(Value *V) {
-    AliasSet::PointerRec *&Entry = PointerMap[V];
+    AliasSet::PointerRec *&Entry = PointerMap[ASTCallbackVH(V, this)];
     if (Entry == 0)
       Entry = new AliasSet::PointerRec(V);
     return *Entry;
@@ -383,7 +401,7 @@ private:
   AliasSet *findAliasSetForCallSite(CallSite CS);
 };
 
-inline std::ostream& operator<<(std::ostream &OS, const AliasSetTracker &AST) {
+inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) {
   AST.print(OS);
   return OS;
 }
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index de839694dc8a..bcb6dee033dc 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -55,6 +55,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/System/IncludeFile.h"
 #include <map>
 
@@ -77,7 +78,7 @@ protected:
 public:
   static char ID; // Class identification, replacement for typeinfo
   //===---------------------------------------------------------------------
-  // Accessors...
+  // Accessors.
   //
   typedef FunctionMapTy::iterator iterator;
   typedef FunctionMapTy::const_iterator const_iterator;
@@ -107,6 +108,7 @@ public:
   /// Returns the CallGraphNode which is used to represent undetermined calls
   /// into the callgraph.  Override this if you want behavioral inheritance.
   virtual CallGraphNode* getExternalCallingNode() const { return 0; }
+  virtual CallGraphNode* getCallsExternalNode()   const { return 0; }
 
   /// Return the root/main method in the module, or some other root node, such
   /// as the externalcallingnode.  Overload these if you behavioral
@@ -130,19 +132,13 @@ public:
     return removeFunctionFromModule((*this)[F]);
   }
 
-  /// changeFunction - This method changes the function associated with this
-  /// CallGraphNode, for use by transformations that need to change the
-  /// prototype of a Function (thus they must create a new Function and move the
-  /// old code over).
-  void changeFunction(Function *OldF, Function *NewF);
-
   /// getOrInsertFunction - This method is identical to calling operator[], but
   /// it will insert a new CallGraphNode for the specified function if one does
   /// not already exist.
   CallGraphNode *getOrInsertFunction(const Function *F);
 
   //===---------------------------------------------------------------------
-  // Pass infrastructure interface glue code...
+  // Pass infrastructure interface glue code.
   //
 protected:
   CallGraph() {}
@@ -155,35 +151,50 @@ public:
   ///
   void initialize(Module &M);
 
-  virtual void print(std::ostream &o, const Module *M) const;
-  void print(std::ostream *o, const Module *M) const { if (o) print(*o, M); }
+  void print(raw_ostream &o, Module *) const;
   void dump() const;
-
 protected:
   // destroy - Release memory for the call graph
   virtual void destroy();
 };
 
 //===----------------------------------------------------------------------===//
-// CallGraphNode class definition
+// CallGraphNode class definition.
 //
 class CallGraphNode {
-  Function *F;
-  typedef std::pair<CallSite,CallGraphNode*> CallRecord;
+  AssertingVH<Function> F;
+  
+  // CallRecord - This is a pair of the calling instruction (a call or invoke)
+  // and the callgraph node being called.
+public:
+  typedef std::pair<WeakVH, CallGraphNode*> CallRecord;
+private:
   std::vector<CallRecord> CalledFunctions;
-
-  CallGraphNode(const CallGraphNode &);           // Do not implement
+  
+  /// NumReferences - This is the number of times that this CallGraphNode occurs
+  /// in the CalledFunctions array of this or other CallGraphNodes.
+  unsigned NumReferences;
+
+  CallGraphNode(const CallGraphNode &);            // DO NOT IMPLEMENT
+  void operator=(const CallGraphNode &);           // DO NOT IMPLEMENT
+  
+  void DropRef() { --NumReferences; }
+  void AddRef() { ++NumReferences; }
 public:
   typedef std::vector<CallRecord> CalledFunctionsVector;
 
+  
+  // CallGraphNode ctor - Create a node for the specified function.
+  inline CallGraphNode(Function *f) : F(f), NumReferences(0) {}
+  
   //===---------------------------------------------------------------------
-  // Accessor methods...
+  // Accessor methods.
   //
 
   typedef std::vector<CallRecord>::iterator iterator;
   typedef std::vector<CallRecord>::const_iterator const_iterator;
 
-  // getFunction - Return the function that this call graph node represents...
+  // getFunction - Return the function that this call graph node represents.
   Function *getFunction() const { return F; }
 
   inline iterator begin() { return CalledFunctions.begin(); }
@@ -193,17 +204,21 @@ public:
   inline bool empty() const { return CalledFunctions.empty(); }
   inline unsigned size() const { return (unsigned)CalledFunctions.size(); }
 
-  // Subscripting operator - Return the i'th called function...
+  /// getNumReferences - Return the number of other CallGraphNodes in this
+  /// CallGraph that reference this node in their callee list.
+  unsigned getNumReferences() const { return NumReferences; }
+  
+  // Subscripting operator - Return the i'th called function.
   //
   CallGraphNode *operator[](unsigned i) const {
+    assert(i < CalledFunctions.size() && "Invalid index");
     return CalledFunctions[i].second;
   }
 
   /// dump - Print out this call graph node.
   ///
   void dump() const;
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
 
   //===---------------------------------------------------------------------
   // Methods to keep a call graph up to date with a function that has been
@@ -213,15 +228,35 @@ public:
   /// removeAllCalledFunctions - As the name implies, this removes all edges
   /// from this CallGraphNode to any functions it calls.
   void removeAllCalledFunctions() {
-    CalledFunctions.clear();
+    while (!CalledFunctions.empty()) {
+      CalledFunctions.back().second->DropRef();
+      CalledFunctions.pop_back();
+    }
+  }
+  
+  /// stealCalledFunctionsFrom - Move all the callee information from N to this
+  /// node.
+  void stealCalledFunctionsFrom(CallGraphNode *N) {
+    assert(CalledFunctions.empty() &&
+           "Cannot steal callsite information if I already have some");
+    std::swap(CalledFunctions, N->CalledFunctions);
   }
+  
 
   /// addCalledFunction - Add a function to the list of functions called by this
   /// one.
   void addCalledFunction(CallSite CS, CallGraphNode *M) {
-    CalledFunctions.push_back(std::make_pair(CS, M));
+    CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M));
+    M->AddRef();
   }
 
+  void removeCallEdge(iterator I) {
+    I->second->DropRef();
+    *I = CalledFunctions.back();
+    CalledFunctions.pop_back();
+  }
+  
+  
   /// removeCallEdgeFor - This method removes the edge in the node for the
   /// specified call site.  Note that this method takes linear time, so it
   /// should be used sparingly.
@@ -235,16 +270,12 @@ public:
   /// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
   /// from this node to the specified callee function.
   void removeOneAbstractEdgeTo(CallGraphNode *Callee);
-
-  /// replaceCallSite - Make the edge in the node for Old CallSite be for
-  /// New CallSite instead.  Note that this method takes linear time, so it
-  /// should be used sparingly.
-  void replaceCallSite(CallSite Old, CallSite New);
-
-  friend class CallGraph;
-
-  // CallGraphNode ctor - Create a node for the specified function.
-  inline CallGraphNode(Function *f) : F(f) {}
+  
+  /// replaceCallEdge - This method replaces the edge in the node for the
+  /// specified call site with a new one.  Note that this method takes linear
+  /// time, so it should be used sparingly.
+  void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
+  
 };
 
 //===----------------------------------------------------------------------===//
@@ -257,7 +288,7 @@ public:
 template <> struct GraphTraits<CallGraphNode*> {
   typedef CallGraphNode NodeType;
 
-  typedef std::pair<CallSite, CallGraphNode*> CGNPairTy;
+  typedef CallGraphNode::CallRecord CGNPairTy;
   typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode*> CGNDerefFun;
 
   static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; }
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 5fdf6d2c916c..9805c6cf28d3 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -1,4 +1,4 @@
-//===-- ConstantFolding.h - Analyze constant folding possibilities --------===//
+//===-- ConstantFolding.h - Fold instructions into constants --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This family of functions determines the possibility of performing constant
-// folding.
+// This file declares routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file declares some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,18 +26,20 @@ namespace llvm {
   class TargetData;
   class Function;
   class Type;
+  class LLVMContext;
 
 /// ConstantFoldInstruction - Attempt to constant fold the specified
 /// instruction.  If successful, the constant result is returned, if not, null
 /// is returned.  Note that this function can only fail when attempting to fold
 /// instructions like loads and stores, which have no constant expression form.
 ///
-Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0);
+Constant *ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
+                                  const TargetData *TD = 0);
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
-Constant *ConstantFoldConstantExpression(ConstantExpr *CE,
+Constant *ConstantFoldConstantExpression(ConstantExpr *CE, LLVMContext &Context,
                                          const TargetData *TD = 0);
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -44,6 +50,7 @@ Constant *ConstantFoldConstantExpression(ConstantExpr *CE,
 ///
 Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
                                    Constant*const * Ops, unsigned NumOps,
+                                   LLVMContext &Context,
                                    const TargetData *TD = 0);
 
 /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
@@ -52,6 +59,7 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
 ///
 Constant *ConstantFoldCompareInstOperands(unsigned Predicate,
                                           Constant*const * Ops, unsigned NumOps,
+                                          LLVMContext &Context,
                                           const TargetData *TD = 0);
 
 
diff --git a/include/llvm/Analysis/ConstantsScanner.h b/include/llvm/Analysis/ConstantsScanner.h
index bac551f0492a..cdaf68d75a63 100644
--- a/include/llvm/Analysis/ConstantsScanner.h
+++ b/include/llvm/Analysis/ConstantsScanner.h
@@ -17,13 +17,13 @@
 #define LLVM_ANALYSIS_CONSTANTSSCANNER_H
 
 #include "llvm/Support/InstIterator.h"
-#include "llvm/ADT/iterator.h"
 
 namespace llvm {
 
 class Constant;
 
-class constant_iterator : public forward_iterator<const Constant, ptrdiff_t> {
+class constant_iterator : public std::iterator<std::forward_iterator_tag,
+                                               const Constant, ptrdiff_t> {
   const_inst_iterator InstI;                // Method instruction iterator
   unsigned OpIdx;                           // Operand index
 
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 06110d040d62..f76aa46b75d4 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -17,11 +17,16 @@
 #ifndef LLVM_ANALYSIS_DEBUGINFO_H
 #define LLVM_ANALYSIS_DEBUGINFO_H
 
+#include "llvm/Metadata.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
+
+#define ATTACH_DEBUG_INFO_TO_AN_INSN 1
 
 namespace llvm {
   class BasicBlock;
@@ -37,19 +42,20 @@ namespace llvm {
   struct DbgRegionStartInst;
   struct DbgRegionEndInst;
   class DebugLoc;
-  class DebugLocTracker;
+  struct DebugLocTracker;
   class Instruction;
+  class LLVMContext;
 
   class DIDescriptor {
-  protected:    
-    GlobalVariable *DbgGV;
+  protected:
+    TrackingVH<MDNode> DbgNode;
 
-    /// DIDescriptor constructor.  If the specified GV is non-null, this checks
+    /// DIDescriptor constructor.  If the specified node is non-null, check
     /// to make sure that the tag in the descriptor matches 'RequiredTag'.  If
     /// not, the debug info is corrupt and we ignore it.
-    DIDescriptor(GlobalVariable *GV, unsigned RequiredTag);
+    DIDescriptor(MDNode *N, unsigned RequiredTag);
 
-    const std::string &getStringField(unsigned Elt, std::string &Result) const;
+    const char *getStringField(unsigned Elt) const;
     unsigned getUnsignedField(unsigned Elt) const {
       return (unsigned)getUInt64Field(Elt);
     }
@@ -58,18 +64,18 @@ namespace llvm {
 
     template <typename DescTy>
     DescTy getFieldAs(unsigned Elt) const {
-      return DescTy(getDescriptorField(Elt).getGV());
+      return DescTy(getDescriptorField(Elt).getNode());
     }
 
     GlobalVariable *getGlobalVariableField(unsigned Elt) const;
 
   public:
-    explicit DIDescriptor() : DbgGV(0) {}
-    explicit DIDescriptor(GlobalVariable *GV) : DbgGV(GV) {}
+    explicit DIDescriptor() : DbgNode(0) {}
+    explicit DIDescriptor(MDNode *N) : DbgNode(N) {}
 
-    bool isNull() const { return DbgGV == 0; }
+    bool isNull() const { return DbgNode == 0; }
 
-    GlobalVariable *getGV() const { return DbgGV; }
+    MDNode *getNode() const { return DbgNode; }
 
     unsigned getVersion() const {
       return getUnsignedField(0) & LLVMDebugVersionMask;
@@ -79,18 +85,32 @@ namespace llvm {
       return getUnsignedField(0) & ~LLVMDebugVersionMask;
     }
 
-    /// ValidDebugInfo - Return true if V represents valid debug info value.
-    static bool ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel);
+    /// ValidDebugInfo - Return true if N represents valid debug info value.
+    static bool ValidDebugInfo(MDNode *N, CodeGenOpt::Level OptLevel);
 
     /// dump - print descriptor.
     void dump() const;
+
+    bool isDerivedType() const;
+    bool isCompositeType() const;
+    bool isBasicType() const;
+    bool isVariable() const;
+    bool isSubprogram() const;
+    bool isGlobalVariable() const;
+    bool isScope() const;
+    bool isCompileUnit() const;
+    bool isLexicalBlock() const;
+    bool isSubrange() const;
+    bool isEnumerator() const;
+    bool isType() const;
+    bool isGlobal() const;
   };
 
   /// DISubrange - This is used to represent ranges, for array bounds.
   class DISubrange : public DIDescriptor {
   public:
-    explicit DISubrange(GlobalVariable *GV = 0)
-      : DIDescriptor(GV, dwarf::DW_TAG_subrange_type) {}
+    explicit DISubrange(MDNode *N = 0)
+      : DIDescriptor(N, dwarf::DW_TAG_subrange_type) {}
 
     int64_t getLo() const { return (int64_t)getUInt64Field(1); }
     int64_t getHi() const { return (int64_t)getUInt64Field(2); }
@@ -99,7 +119,8 @@ namespace llvm {
   /// DIArray - This descriptor holds an array of descriptors.
   class DIArray : public DIDescriptor {
   public:
-    explicit DIArray(GlobalVariable *GV = 0) : DIDescriptor(GV) {}
+    explicit DIArray(MDNode *N = 0)
+      : DIDescriptor(N) {}
 
     unsigned getNumElements() const;
     DIDescriptor getElement(unsigned Idx) const {
@@ -107,37 +128,44 @@ namespace llvm {
     }
   };
 
+  /// DIScope - A base class for various scopes.
+  class DIScope : public DIDescriptor {
+  public:
+    explicit DIScope(MDNode *N = 0) : DIDescriptor (N) {
+      if (DbgNode && !isScope())
+        DbgNode = 0;
+    }
+    virtual ~DIScope() {}
+
+    const char *getFilename() const;
+    const char *getDirectory() const;
+  };
+
   /// DICompileUnit - A wrapper for a compile unit.
-  class DICompileUnit : public DIDescriptor {
+  class DICompileUnit : public DIScope {
   public:
-    explicit DICompileUnit(GlobalVariable *GV = 0)
-      : DIDescriptor(GV, dwarf::DW_TAG_compile_unit) {}
+    explicit DICompileUnit(MDNode *N = 0) : DIScope(N) {
+      if (DbgNode && !isCompileUnit())
+        DbgNode = 0;
+    }
 
     unsigned getLanguage() const     { return getUnsignedField(2); }
-    const std::string &getFilename(std::string &F) const {
-      return getStringField(3, F);
-    }
-    const std::string &getDirectory(std::string &F) const {
-      return getStringField(4, F);
-    }
-    const std::string &getProducer(std::string &F) const {
-      return getStringField(5, F);
-    }
-    
+    const char *getFilename() const  { return getStringField(3);   }
+    const char *getDirectory() const { return getStringField(4);   }
+    const char *getProducer() const  { return getStringField(5);   }
+
     /// isMain - Each input file is encoded as a separate compile unit in LLVM
     /// debugging information output. However, many target specific tool chains
-    /// prefer to encode only one compile unit in an object file. In this 
+    /// prefer to encode only one compile unit in an object file. In this
     /// situation, the LLVM code generator will include  debugging information
-    /// entities in the compile unit that is marked as main compile unit. The 
+    /// entities in the compile unit that is marked as main compile unit. The
     /// code generator accepts maximum one main compile unit per module. If a
-    /// module does not contain any main compile unit then the code generator 
+    /// module does not contain any main compile unit then the code generator
     /// will emit multiple compile units in the output object file.
 
     bool isMain() const                { return getUnsignedField(6); }
     bool isOptimized() const           { return getUnsignedField(7); }
-    const std::string &getFlags(std::string &F) const {
-      return getStringField(8, F);
-    }
+    const char *getFlags() const       { return getStringField(8);   }
     unsigned getRunTimeVersion() const { return getUnsignedField(9); }
 
     /// Verify - Verify that a compile unit is well formed.
@@ -152,13 +180,11 @@ namespace llvm {
   /// type/precision or a file/line pair for location info.
   class DIEnumerator : public DIDescriptor {
   public:
-    explicit DIEnumerator(GlobalVariable *GV = 0)
-      : DIDescriptor(GV, dwarf::DW_TAG_enumerator) {}
+    explicit DIEnumerator(MDNode *N = 0)
+      : DIDescriptor(N, dwarf::DW_TAG_enumerator) {}
 
-    const std::string &getName(std::string &F) const {
-      return getStringField(1, F);
-    }
-    uint64_t getEnumValue() const { return getUInt64Field(2); }
+    const char *getName() const        { return getStringField(1); }
+    uint64_t getEnumValue() const      { return getUInt64Field(2); }
   };
 
   /// DIType - This is a wrapper for a type.
@@ -167,43 +193,31 @@ namespace llvm {
   class DIType : public DIDescriptor {
   public:
     enum {
-      FlagPrivate   = 1 << 0,
-      FlagProtected = 1 << 1,
-      FlagFwdDecl   = 1 << 2
+      FlagPrivate          = 1 << 0,
+      FlagProtected        = 1 << 1,
+      FlagFwdDecl          = 1 << 2,
+      FlagAppleBlock       = 1 << 3,
+      FlagBlockByrefStruct = 1 << 4
     };
 
   protected:
-    DIType(GlobalVariable *GV, unsigned Tag) : DIDescriptor(GV, Tag) {}
+    DIType(MDNode *N, unsigned Tag)
+      : DIDescriptor(N, Tag) {}
     // This ctor is used when the Tag has already been validated by a derived
     // ctor.
-    DIType(GlobalVariable *GV, bool, bool) : DIDescriptor(GV) {}
+    DIType(MDNode *N, bool, bool) : DIDescriptor(N) {}
 
   public:
-    /// isDerivedType - Return true if the specified tag is legal for
-    /// DIDerivedType.
-    static bool isDerivedType(unsigned TAG);
-
-    /// isCompositeType - Return true if the specified tag is legal for
-    /// DICompositeType.
-    static bool isCompositeType(unsigned TAG);
-
-    /// isBasicType - Return true if the specified tag is legal for
-    /// DIBasicType.
-    static bool isBasicType(unsigned TAG) {
-      return TAG == dwarf::DW_TAG_base_type;
-    }
 
     /// Verify - Verify that a type descriptor is well formed.
     bool Verify() const;
   public:
-    explicit DIType(GlobalVariable *GV);
+    explicit DIType(MDNode *N);
     explicit DIType() {}
     virtual ~DIType() {}
 
     DIDescriptor getContext() const     { return getDescriptorField(1); }
-    const std::string &getName(std::string &F) const {
-      return getStringField(2, F);
-    }
+    const char *getName() const         { return getStringField(2);     }
     DICompileUnit getCompileUnit() const{ return getFieldAs<DICompileUnit>(3); }
     unsigned getLineNumber() const      { return getUnsignedField(4); }
     uint64_t getSizeInBits() const      { return getUInt64Field(5); }
@@ -212,9 +226,22 @@ namespace llvm {
     // carry this is just plain insane.
     uint64_t getOffsetInBits() const    { return getUInt64Field(7); }
     unsigned getFlags() const           { return getUnsignedField(8); }
-    bool isPrivate() const              { return (getFlags() & FlagPrivate) != 0; }
-    bool isProtected() const            { return (getFlags() & FlagProtected) != 0; }
-    bool isForwardDecl() const          { return (getFlags() & FlagFwdDecl) != 0; }
+    bool isPrivate() const {
+      return (getFlags() & FlagPrivate) != 0;
+    }
+    bool isProtected() const {
+      return (getFlags() & FlagProtected) != 0;
+    }
+    bool isForwardDecl() const {
+      return (getFlags() & FlagFwdDecl) != 0;
+    }
+    // isAppleBlock - Return true if this is the Apple Blocks extension.
+    bool isAppleBlockExtension() const {
+      return (getFlags() & FlagAppleBlock) != 0;
+    }
+    bool isBlockByrefStruct() const {
+      return (getFlags() & FlagBlockByrefStruct) != 0;
+    }
 
     /// dump - print type.
     void dump() const;
@@ -223,8 +250,8 @@ namespace llvm {
   /// DIBasicType - A basic type, like 'int' or 'float'.
   class DIBasicType : public DIType {
   public:
-    explicit DIBasicType(GlobalVariable *GV)
-      : DIType(GV, dwarf::DW_TAG_base_type) {}
+    explicit DIBasicType(MDNode *N = 0)
+      : DIType(N, dwarf::DW_TAG_base_type) {}
 
     unsigned getEncoding() const { return getUnsignedField(9); }
 
@@ -236,13 +263,13 @@ namespace llvm {
   /// a typedef, a pointer or reference, etc.
   class DIDerivedType : public DIType {
   protected:
-    explicit DIDerivedType(GlobalVariable *GV, bool, bool)
-      : DIType(GV, true, true) {}
+    explicit DIDerivedType(MDNode *N, bool, bool)
+      : DIType(N, true, true) {}
   public:
-    explicit DIDerivedType(GlobalVariable *GV)
-      : DIType(GV, true, true) {
-      if (GV && !isDerivedType(getTag()))
-        DbgGV = 0;
+    explicit DIDerivedType(MDNode *N = 0)
+      : DIType(N, true, true) {
+      if (DbgNode && !isDerivedType())
+        DbgNode = 0;
     }
 
     DIType getTypeDerivedFrom() const { return getFieldAs<DIType>(9); }
@@ -252,6 +279,11 @@ namespace llvm {
     uint64_t getOriginalTypeSize() const;
     /// dump - print derived type.
     void dump() const;
+
+    /// replaceAllUsesWith - Replace all uses of debug info referenced by
+    /// this descriptor. After this completes, the current debug info value
+    /// is erased.
+    void replaceAllUsesWith(DIDescriptor &D);
   };
 
   /// DICompositeType - This descriptor holds a type that can refer to multiple
@@ -259,10 +291,10 @@ namespace llvm {
   /// FIXME: Why is this a DIDerivedType??
   class DICompositeType : public DIDerivedType {
   public:
-    explicit DICompositeType(GlobalVariable *GV)
-      : DIDerivedType(GV, true, true) {
-      if (GV && !isCompositeType(getTag()))
-        DbgGV = 0;
+    explicit DICompositeType(MDNode *N = 0)
+      : DIDerivedType(N, true, true) {
+      if (N && !isCompositeType())
+        DbgNode = 0;
     }
 
     DIArray getTypeArray() const { return getFieldAs<DIArray>(10); }
@@ -278,34 +310,16 @@ namespace llvm {
   /// DIGlobal - This is a common class for global variables and subprograms.
   class DIGlobal : public DIDescriptor {
   protected:
-    explicit DIGlobal(GlobalVariable *GV, unsigned RequiredTag)
-      : DIDescriptor(GV, RequiredTag) {}
-
-    /// isSubprogram - Return true if the specified tag is legal for
-    /// DISubprogram.
-    static bool isSubprogram(unsigned TAG) {
-      return TAG == dwarf::DW_TAG_subprogram;
-    }
-
-    /// isGlobalVariable - Return true if the specified tag is legal for
-    /// DIGlobalVariable.
-    static bool isGlobalVariable(unsigned TAG) {
-      return TAG == dwarf::DW_TAG_variable;
-    }
+    explicit DIGlobal(MDNode *N, unsigned RequiredTag)
+      : DIDescriptor(N, RequiredTag) {}
 
   public:
     virtual ~DIGlobal() {}
 
     DIDescriptor getContext() const     { return getDescriptorField(2); }
-    const std::string &getName(std::string &F) const {
-      return getStringField(3, F);
-    }
-    const std::string &getDisplayName(std::string &F) const {
-      return getStringField(4, F);
-    }
-    const std::string &getLinkageName(std::string &F) const {
-      return getStringField(5, F);
-    }
+    const char *getName() const         { return getStringField(3); }
+    const char *getDisplayName() const  { return getStringField(4); }
+    const char *getLinkageName() const  { return getStringField(5); }
     DICompileUnit getCompileUnit() const{ return getFieldAs<DICompileUnit>(6); }
     unsigned getLineNumber() const      { return getUnsignedField(7); }
     DIType getType() const              { return getFieldAs<DIType>(8); }
@@ -320,26 +334,41 @@ namespace llvm {
   };
 
   /// DISubprogram - This is a wrapper for a subprogram (e.g. a function).
-  class DISubprogram : public DIGlobal {
+  class DISubprogram : public DIScope {
   public:
-    explicit DISubprogram(GlobalVariable *GV = 0)
-      : DIGlobal(GV, dwarf::DW_TAG_subprogram) {}
+    explicit DISubprogram(MDNode *N = 0) : DIScope(N) {
+      if (DbgNode && !isSubprogram())
+        DbgNode = 0;
+    }
 
+    DIDescriptor getContext() const     { return getDescriptorField(2); }
+    const char *getName() const         { return getStringField(3); }
+    const char *getDisplayName() const  { return getStringField(4); }
+    const char *getLinkageName() const  { return getStringField(5); }
+    DICompileUnit getCompileUnit() const{ return getFieldAs<DICompileUnit>(6); }
+    unsigned getLineNumber() const      { return getUnsignedField(7); }
     DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
 
     /// getReturnTypeName - Subprogram return types are encoded either as
     /// DIType or as DICompositeType.
-    const std::string &getReturnTypeName(std::string &F) const {
+    const char *getReturnTypeName() const {
       DICompositeType DCT(getFieldAs<DICompositeType>(8));
       if (!DCT.isNull()) {
         DIArray A = DCT.getTypeArray();
-        DIType T(A.getElement(0).getGV());
-        return T.getName(F);
+        DIType T(A.getElement(0).getNode());
+        return T.getName();
       }
       DIType T(getFieldAs<DIType>(8));
-      return T.getName(F);
+      return T.getName();
     }
 
+    /// isLocalToUnit - Return true if this subprogram is local to the current
+    /// compile unit, like 'static' in C.
+    unsigned isLocalToUnit() const     { return getUnsignedField(9); }
+    unsigned isDefinition() const      { return getUnsignedField(10); }
+    const char *getFilename() const    { return getCompileUnit().getFilename();}
+    const char *getDirectory() const   { return getCompileUnit().getDirectory();}
+
     /// Verify - Verify that a subprogram descriptor is well formed.
     bool Verify() const;
 
@@ -354,8 +383,8 @@ namespace llvm {
   /// DIGlobalVariable - This is a wrapper for a global variable.
   class DIGlobalVariable : public DIGlobal {
   public:
-    explicit DIGlobalVariable(GlobalVariable *GV = 0)
-      : DIGlobal(GV, dwarf::DW_TAG_variable) {}
+    explicit DIGlobalVariable(MDNode *N = 0)
+      : DIGlobal(N, dwarf::DW_TAG_variable) {}
 
     GlobalVariable *getGlobal() const { return getGlobalVariableField(11); }
 
@@ -370,43 +399,75 @@ namespace llvm {
   /// global etc).
   class DIVariable : public DIDescriptor {
   public:
-    explicit DIVariable(GlobalVariable *GV = 0)
-      : DIDescriptor(GV) {
-      if (GV && !isVariable(getTag()))
-        DbgGV = 0;
+    explicit DIVariable(MDNode *N = 0)
+      : DIDescriptor(N) {
+      if (DbgNode && !isVariable())
+        DbgNode = 0;
     }
 
     DIDescriptor getContext() const { return getDescriptorField(1); }
-    const std::string &getName(std::string &F) const {
-      return getStringField(2, F);
-    }
+    const char *getName() const     { return getStringField(2);     }
     DICompileUnit getCompileUnit() const{ return getFieldAs<DICompileUnit>(3); }
     unsigned getLineNumber() const      { return getUnsignedField(4); }
     DIType getType() const              { return getFieldAs<DIType>(5); }
 
-    /// isVariable - Return true if the specified tag is legal for DIVariable.
-    static bool isVariable(unsigned Tag);
 
     /// Verify - Verify that a variable descriptor is well formed.
     bool Verify() const;
 
+    /// HasComplexAddr - Return true if the variable has a complex address.
+    bool hasComplexAddress() const {
+      return getNumAddrElements() > 0;
+    }
+
+    unsigned getNumAddrElements() const { return DbgNode->getNumElements()-6; }
+
+    uint64_t getAddrElement(unsigned Idx) const {
+      return getUInt64Field(Idx+6);
+    }
+
+    /// isBlockByrefVariable - Return true if the variable was declared as
+    /// a "__block" variable (Apple Blocks).
+    bool isBlockByrefVariable() const {
+      return getType().isBlockByrefStruct();
+    }
+
     /// dump - print variable.
     void dump() const;
   };
 
-  /// DIBlock - This is a wrapper for a block (e.g. a function, scope, etc).
-  class DIBlock : public DIDescriptor {
+  /// DILexicalBlock - This is a wrapper for a lexical block.
+  class DILexicalBlock : public DIScope {
   public:
-    explicit DIBlock(GlobalVariable *GV = 0)
-      : DIDescriptor(GV, dwarf::DW_TAG_lexical_block) {}
+    explicit DILexicalBlock(MDNode *N = 0) : DIScope(N) {
+      if (DbgNode && !isLexicalBlock())
+        DbgNode = 0;
+    }
+    DIScope getContext() const       { return getFieldAs<DIScope>(1); }
+    const char *getDirectory() const { return getContext().getDirectory(); }
+    const char *getFilename() const  { return getContext().getFilename(); }
+  };
 
-    DIDescriptor getContext() const { return getDescriptorField(1); }
+  /// DILocation - This object holds location information. This object
+  /// is not associated with any DWARF tag.
+  class DILocation : public DIDescriptor {
+  public:
+    explicit DILocation(MDNode *N) : DIDescriptor(N) { ; }
+
+    unsigned getLineNumber() const     { return getUnsignedField(0); }
+    unsigned getColumnNumber() const   { return getUnsignedField(1); }
+    DIScope  getScope() const          { return getFieldAs<DIScope>(2); }
+    DILocation getOrigLocation() const { return getFieldAs<DILocation>(3); }
+    const char *getFilename() const    { return getScope().getFilename(); }
+    const char *getDirectory() const   { return getScope().getDirectory(); }
   };
 
   /// DIFactory - This object assists with the construction of the various
   /// descriptors.
   class DIFactory {
     Module &M;
+    LLVMContext& VMContext;
+
     // Cached values for uniquing and faster lookups.
     const Type *EmptyStructPtr; // "{}*".
     Function *StopPointFn;   // llvm.dbg.stoppoint
@@ -420,9 +481,11 @@ namespace llvm {
     DIFactory(const DIFactory &);     // DO NOT IMPLEMENT
     void operator=(const DIFactory&); // DO NOT IMPLEMENT
   public:
+    enum ComplexAddrKind { OpPlus=1, OpDeref };
+
     explicit DIFactory(Module &m);
 
-    /// GetOrCreateArray - Create an descriptor for an array of descriptors. 
+    /// GetOrCreateArray - Create an descriptor for an array of descriptors.
     /// This implicitly uniques the arrays created.
     DIArray GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys);
 
@@ -433,19 +496,19 @@ namespace llvm {
     /// CreateCompileUnit - Create a new descriptor for the specified compile
     /// unit.
     DICompileUnit CreateCompileUnit(unsigned LangID,
-                                    const std::string &Filename,
-                                    const std::string &Directory,
-                                    const std::string &Producer,
+                                    StringRef Filenae,
+                                    StringRef Directory,
+                                    StringRef Producer,
                                     bool isMain = false,
                                     bool isOptimized = false,
                                     const char *Flags = "",
                                     unsigned RunTimeVer = 0);
 
     /// CreateEnumerator - Create a single enumerator value.
-    DIEnumerator CreateEnumerator(const std::string &Name, uint64_t Val);
+    DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val);
 
     /// CreateBasicType - Create a basic type like int, float, etc.
-    DIBasicType CreateBasicType(DIDescriptor Context, const std::string &Name,
+    DIBasicType CreateBasicType(DIDescriptor Context, StringRef Name,
                                 DICompileUnit CompileUnit, unsigned LineNumber,
                                 uint64_t SizeInBits, uint64_t AlignInBits,
                                 uint64_t OffsetInBits, unsigned Flags,
@@ -454,7 +517,7 @@ namespace llvm {
     /// CreateDerivedType - Create a derived type like const qualified type,
     /// pointer, typedef, etc.
     DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context,
-                                    const std::string &Name,
+                                    StringRef Name,
                                     DICompileUnit CompileUnit,
                                     unsigned LineNumber,
                                     uint64_t SizeInBits, uint64_t AlignInBits,
@@ -463,7 +526,7 @@ namespace llvm {
 
     /// CreateCompositeType - Create a composite type like array, struct, etc.
     DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context,
-                                        const std::string &Name,
+                                        StringRef Name,
                                         DICompileUnit CompileUnit,
                                         unsigned LineNumber,
                                         uint64_t SizeInBits,
@@ -475,31 +538,43 @@ namespace llvm {
 
     /// CreateSubprogram - Create a new descriptor for the specified subprogram.
     /// See comments in DISubprogram for descriptions of these fields.
-    DISubprogram CreateSubprogram(DIDescriptor Context, const std::string &Name,
-                                  const std::string &DisplayName,
-                                  const std::string &LinkageName,
+    DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name,
+                                  StringRef DisplayName,
+                                  StringRef LinkageName,
                                   DICompileUnit CompileUnit, unsigned LineNo,
                                   DIType Type, bool isLocalToUnit,
                                   bool isDefinition);
 
     /// CreateGlobalVariable - Create a new descriptor for the specified global.
     DIGlobalVariable
-    CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
-                         const std::string &DisplayName,
-                         const std::string &LinkageName, 
+    CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+                         StringRef DisplayName,
+                         StringRef LinkageName,
                          DICompileUnit CompileUnit,
                          unsigned LineNo, DIType Type, bool isLocalToUnit,
                          bool isDefinition, llvm::GlobalVariable *GV);
 
     /// CreateVariable - Create a new descriptor for the specified variable.
     DIVariable CreateVariable(unsigned Tag, DIDescriptor Context,
-                              const std::string &Name,
+                              StringRef Name,
                               DICompileUnit CompileUnit, unsigned LineNo,
                               DIType Type);
 
-    /// CreateBlock - This creates a descriptor for a lexical block with the
-    /// specified parent context.
-    DIBlock CreateBlock(DIDescriptor Context);
+    /// CreateComplexVariable - Create a new descriptor for the specified
+    /// variable which has a complex address expression for its address.
+    DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Context,
+                                     const std::string &Name,
+                                     DICompileUnit CompileUnit, unsigned LineNo,
+                                     DIType Type,
+                                     SmallVector<Value *, 9> &addr);
+
+    /// CreateLexicalBlock - This creates a descriptor for a lexical block
+    /// with the specified parent context.
+    DILexicalBlock CreateLexicalBlock(DIDescriptor Context);
+
+    /// CreateLocation - Creates a debug info location.
+    DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo,
+                              DIScope S, DILocation OrigLoc);
 
     /// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation,
     /// inserting it at the end of the specified basic block.
@@ -519,21 +594,22 @@ namespace llvm {
     void InsertRegionEnd(DIDescriptor D, BasicBlock *BB);
 
     /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-    void InsertDeclare(llvm::Value *Storage, DIVariable D, BasicBlock *BB);
+    void InsertDeclare(llvm::Value *Storage, DIVariable D,
+                       BasicBlock *InsertAtEnd);
+
+    /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+    void InsertDeclare(llvm::Value *Storage, DIVariable D,
+                       Instruction *InsertBefore);
 
   private:
     Constant *GetTagConstant(unsigned TAG);
-    Constant *GetStringConstant(const std::string &String);
-
-    /// getCastToEmpty - Return the descriptor as a Constant* with type '{}*'.
-    Constant *getCastToEmpty(DIDescriptor D);
   };
 
   /// Finds the stoppoint coressponding to this instruction, that is the
-  /// stoppoint that dominates this instruction 
+  /// stoppoint that dominates this instruction
   const DbgStopPointInst *findStopPoint(const Instruction *Inst);
 
-  /// Finds the stoppoint corresponding to first real (non-debug intrinsic) 
+  /// Finds the stoppoint corresponding to first real (non-debug intrinsic)
   /// instruction in this Basic Block, and returns the stoppoint for it.
   const DbgStopPointInst *findBBStopPoint(const BasicBlock *BB);
 
@@ -544,46 +620,46 @@ namespace llvm {
   /// Find the debug info descriptor corresponding to this global variable.
   Value *findDbgGlobalDeclare(GlobalVariable *V);
 
-  bool getLocationInfo(const Value *V, std::string &DisplayName, std::string &Type, 
-                       unsigned &LineNo, std::string &File, std::string &Dir); 
-
-  /// CollectDebugInfoAnchors - Collect debugging information anchors.
-  void CollectDebugInfoAnchors(Module &M,
-                               SmallVector<GlobalVariable *, 2> &CompileUnits,
-                               SmallVector<GlobalVariable *, 4> &GlobalVars,
-                               SmallVector<GlobalVariable *, 4> &Subprograms);
+bool getLocationInfo(const Value *V, std::string &DisplayName,
+                     std::string &Type, unsigned &LineNo, std::string &File,
+                     std::string &Dir);
 
-  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug
   /// info intrinsic.
-  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, 
+  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI,
                                  CodeGenOpt::Level OptLev);
 
-  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI,
                                  CodeGenOpt::Level OptLev);
 
-  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI,
                                  CodeGenOpt::Level OptLev);
 
-  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI,
                                  CodeGenOpt::Level OptLev);
 
-  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI,
                                  CodeGenOpt::Level OptLev);
 
-  /// ExtractDebugLocation - Extract debug location information 
+  /// ExtractDebugLocation - Extract debug location information
   /// from llvm.dbg.stoppoint intrinsic.
   DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI,
                                 DebugLocTracker &DebugLocInfo);
 
-  /// ExtractDebugLocation - Extract debug location information 
+  /// ExtractDebugLocation - Extract debug location information
+  /// from DILocation.
+  DebugLoc ExtractDebugLocation(DILocation &Loc,
+                                DebugLocTracker &DebugLocInfo);
+
+  /// ExtractDebugLocation - Extract debug location information
   /// from llvm.dbg.func_start intrinsic.
   DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI,
                                 DebugLocTracker &DebugLocInfo);
@@ -593,7 +669,74 @@ namespace llvm {
 
   /// isInlinedFnEnd - Return true if REI is ending an inlined function.
   bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn);
+  /// DebugInfoFinder - This object collects DebugInfo from a module.
+  class DebugInfoFinder {
 
+  public:
+    /// processModule - Process entire module and collect debug info
+    /// anchors.
+    void processModule(Module &M);
+
+  private:
+    /// processType - Process DIType.
+    void processType(DIType DT);
+
+    /// processLexicalBlock - Process DILexicalBlock.
+    void processLexicalBlock(DILexicalBlock LB);
+
+    /// processSubprogram - Process DISubprogram.
+    void processSubprogram(DISubprogram SP);
+
+    /// processStopPoint - Process DbgStopPointInst.
+    void processStopPoint(DbgStopPointInst *SPI);
+
+    /// processFuncStart - Process DbgFuncStartInst.
+    void processFuncStart(DbgFuncStartInst *FSI);
+
+    /// processRegionStart - Process DbgRegionStart.
+    void processRegionStart(DbgRegionStartInst *DRS);
+
+    /// processRegionEnd - Process DbgRegionEnd.
+    void processRegionEnd(DbgRegionEndInst *DRE);
+
+    /// processDeclare - Process DbgDeclareInst.
+    void processDeclare(DbgDeclareInst *DDI);
+
+    /// addCompileUnit - Add compile unit into CUs.
+    bool addCompileUnit(DICompileUnit CU);
+
+    /// addGlobalVariable - Add global variable into GVs.
+    bool addGlobalVariable(DIGlobalVariable DIG);
+
+    // addSubprogram - Add subprgoram into SPs.
+    bool addSubprogram(DISubprogram SP);
+
+    /// addType - Add type into Tys.
+    bool addType(DIType DT);
+
+  public:
+    typedef SmallVector<MDNode *, 8>::iterator iterator;
+    iterator compile_unit_begin()    { return CUs.begin(); }
+    iterator compile_unit_end()      { return CUs.end(); }
+    iterator subprogram_begin()      { return SPs.begin(); }
+    iterator subprogram_end()        { return SPs.end(); }
+    iterator global_variable_begin() { return GVs.begin(); }
+    iterator global_variable_end()   { return GVs.end(); }
+    iterator type_begin()            { return TYs.begin(); }
+    iterator type_end()              { return TYs.end(); }
+
+    unsigned compile_unit_count()    { return CUs.size(); }
+    unsigned global_variable_count() { return GVs.size(); }
+    unsigned subprogram_count()      { return SPs.size(); }
+    unsigned type_count()            { return TYs.size(); }
+
+  private:
+    SmallVector<MDNode *, 8> CUs;  // Compile Units
+    SmallVector<MDNode *, 8> SPs;  // Subprograms
+    SmallVector<MDNode *, 8> GVs;  // Global Variables;
+    SmallVector<MDNode *, 8> TYs;  // Types
+    SmallPtrSet<MDNode *, 64> NodesSeen;
+  };
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 366d492b1145..f63e31c36bab 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -22,7 +22,6 @@
 #define LLVM_ANALYSIS_DOMINATORS_H
 
 #include "llvm/Pass.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
@@ -32,6 +31,7 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <map>
 #include <set>
@@ -82,12 +82,12 @@ public:
   typedef typename std::vector<DomTreeNodeBase<NodeT> *>::iterator iterator;
   typedef typename std::vector<DomTreeNodeBase<NodeT> *>::const_iterator
                    const_iterator;
-  
+
   iterator begin()             { return Children.begin(); }
   iterator end()               { return Children.end(); }
   const_iterator begin() const { return Children.begin(); }
   const_iterator end()   const { return Children.end(); }
-  
+
   NodeT *getBlock() const { return TheBB; }
   DomTreeNodeBase<NodeT> *getIDom() const { return IDom; }
   const std::vector<DomTreeNodeBase<NodeT>*> &getChildren() const {
@@ -96,7 +96,7 @@ public:
 
   DomTreeNodeBase(NodeT *BB, DomTreeNodeBase<NodeT> *iDom)
     : TheBB(BB), IDom(iDom), DFSNumIn(-1), DFSNumOut(-1) { }
-  
+
   DomTreeNodeBase<NodeT> *addChild(DomTreeNodeBase<NodeT> *C) {
     Children.push_back(C);
     return C;
@@ -109,7 +109,7 @@ public:
   void clearAllChildren() {
     Children.clear();
   }
-  
+
   bool compare(DomTreeNodeBase<NodeT> *Other) {
     if (getNumChildren() != Other->getNumChildren())
       return true;
@@ -143,7 +143,7 @@ public:
       IDom->Children.push_back(this);
     }
   }
-  
+
   /// getDFSNumIn/getDFSNumOut - These are an internal implementation detail, do
   /// not call them.
   unsigned getDFSNumIn() const { return DFSNumIn; }
@@ -161,22 +161,22 @@ EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<BasicBlock>);
 EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
 
 template<class NodeT>
-static std::ostream &operator<<(std::ostream &o,
-                                const DomTreeNodeBase<NodeT> *Node) {
+static raw_ostream &operator<<(raw_ostream &o,
+                               const DomTreeNodeBase<NodeT> *Node) {
   if (Node->getBlock())
     WriteAsOperand(o, Node->getBlock(), false);
   else
     o << " <<exit node>>";
-  
+
   o << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "}";
-  
+
   return o << "\n";
 }
 
 template<class NodeT>
-static void PrintDomTree(const DomTreeNodeBase<NodeT> *N, std::ostream &o,
+static void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &o,
                          unsigned Lev) {
-  o << std::string(2*Lev, ' ') << "[" << Lev << "] " << N;
+  o.indent(2*Lev) << "[" << Lev << "] " << N;
   for (typename DomTreeNodeBase<NodeT>::const_iterator I = N->begin(),
        E = N->end(); I != E; ++I)
     PrintDomTree<NodeT>(*I, o, Lev+1);
@@ -233,7 +233,7 @@ protected:
     Vertex.clear();
     RootNode = 0;
   }
-  
+
   // NewBB is split and now it has one successor. Update dominator tree to
   // reflect this change.
   template<class N, class GraphT>
@@ -320,7 +320,7 @@ public:
 
       DomTreeNodeBase<NodeT>* MyNd = I->second;
       DomTreeNodeBase<NodeT>* OtherNd = OI->second;
-      
+
       if (MyNd->compare(OtherNd))
         return true;
     }
@@ -352,7 +352,7 @@ public:
   /// Note that this is not a constant time operation!
   ///
   bool properlyDominates(const DomTreeNodeBase<NodeT> *A,
-                         DomTreeNodeBase<NodeT> *B) const {
+                         const DomTreeNodeBase<NodeT> *B) const {
     if (A == 0 || B == 0) return false;
     return dominatedBySlowTreeWalk(A, B);
   }
@@ -378,12 +378,12 @@ public:
             && "This is not implemented for post dominators");
     return dominates(&A->getParent()->front(), A);
   }
-  
+
   /// dominates - Returns true iff A dominates B.  Note that this is not a
   /// constant time operation!
   ///
   inline bool dominates(const DomTreeNodeBase<NodeT> *A,
-                        DomTreeNodeBase<NodeT> *B) {
+                        const DomTreeNodeBase<NodeT> *B) {
     if (B == A) 
       return true;  // A node trivially dominates itself.
 
@@ -404,13 +404,17 @@ public:
     return dominatedBySlowTreeWalk(A, B);
   }
 
-  inline bool dominates(NodeT *A, NodeT *B) {
+  inline bool dominates(const NodeT *A, const NodeT *B) {
     if (A == B) 
       return true;
-    
-    return dominates(getNode(A), getNode(B));
+
+    // Cast away the const qualifiers here. This is ok since
+    // this function doesn't actually return the values returned
+    // from getNode.
+    return dominates(getNode(const_cast<NodeT *>(A)),
+                     getNode(const_cast<NodeT *>(B)));
   }
-  
+
   NodeT *getRoot() const {
     assert(this->Roots.size() == 1 && "Should always have entry node!");
     return this->Roots[0];
@@ -522,7 +526,7 @@ public:
     assert(getNode(BB) && "Removing node that isn't in dominator tree.");
     DomTreeNodes.erase(BB);
   }
-  
+
   /// splitBlock - BB is split and now it has one successor. Update dominator
   /// tree to reflect this change.
   void splitBlock(NodeT* NewBB) {
@@ -534,7 +538,7 @@ public:
 
   /// print - Convert to human readable form
   ///
-  virtual void print(std::ostream &o, const Module* ) const {
+  void print(raw_ostream &o) const {
     o << "=============================--------------------------------\n";
     if (this->isPostDominator())
       o << "Inorder PostDominator Tree: ";
@@ -544,17 +548,11 @@ public:
       o << "DFSNumbers invalid: " << SlowQueries << " slow queries.";
     o << "\n";
 
-    PrintDomTree<NodeT>(getRootNode(), o, 1);
+    // The postdom tree can have a null root if there are no returns.
+    if (getRootNode())
+      PrintDomTree<NodeT>(getRootNode(), o, 1);
   }
-  
-  void print(std::ostream *OS, const Module* M = 0) const {
-    if (OS) print(*OS, M);
-  }
-  
-  virtual void dump() {
-    print(llvm::cerr);
-  }
-  
+
 protected:
   template<class GraphT>
   friend void Compress(DominatorTreeBase<typename GraphT::NodeType>& DT,
@@ -569,16 +567,16 @@ protected:
   friend void Link(DominatorTreeBase<typename GraphT::NodeType>& DT,
                    unsigned DFSNumV, typename GraphT::NodeType* W,
          typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &WInfo);
-  
+
   template<class GraphT>
   friend unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
                           typename GraphT::NodeType* V,
                           unsigned N);
-  
+
   template<class FuncT, class N>
   friend void Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType>& DT,
                         FuncT& F);
-  
+
   /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking
   /// dominator tree in dfs order.
   void updateDFSNumbers() {
@@ -606,17 +604,17 @@ protected:
           // Otherwise, recursively visit this child.
           DomTreeNodeBase<NodeT> *Child = *ChildIt;
           ++WorkStack.back().second;
-          
+
           WorkStack.push_back(std::make_pair(Child, Child->begin()));
           Child->DFSNumIn = DFSNum++;
         }
       }
     }
-    
+
     SlowQueries = 0;
     DFSInfoValid = true;
   }
-  
+
   DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
     typename DomTreeNodeMapType::iterator I = this->DomTreeNodes.find(BB);
     if (I != this->DomTreeNodes.end() && I->second)
@@ -634,31 +632,31 @@ protected:
     DomTreeNodeBase<NodeT> *C = new DomTreeNodeBase<NodeT>(BB, IDomNode);
     return this->DomTreeNodes[BB] = IDomNode->addChild(C);
   }
-  
+
   inline NodeT *getIDom(NodeT *BB) const {
     typename DenseMap<NodeT*, NodeT*>::const_iterator I = IDoms.find(BB);
     return I != IDoms.end() ? I->second : 0;
   }
-  
+
   inline void addRoot(NodeT* BB) {
     this->Roots.push_back(BB);
   }
-  
+
 public:
   /// recalculate - compute a dominator tree for the given function
   template<class FT>
   void recalculate(FT& F) {
     if (!this->IsPostDominators) {
       reset();
-      
+
       // Initialize roots
       this->Roots.push_back(&F.front());
       this->IDoms[&F.front()] = 0;
       this->DomTreeNodes[&F.front()] = 0;
       this->Vertex.push_back(0);
-      
+
       Calculate<FT, NodeT*>(*this, F);
-      
+
       updateDFSNumbers();
     } else {
       reset();     // Reset from the last time we were run...
@@ -675,7 +673,7 @@ public:
       }
 
       this->Vertex.push_back(0);
-      
+
       Calculate<FT, Inverse<NodeT*> >(*this, F);
     }
   }
@@ -691,18 +689,18 @@ class DominatorTree : public FunctionPass {
 public:
   static char ID; // Pass ID, replacement for typeid
   DominatorTreeBase<BasicBlock>* DT;
-  
+
   DominatorTree() : FunctionPass(&ID) {
     DT = new DominatorTreeBase<BasicBlock>(false);
   }
-  
+
   ~DominatorTree() {
     DT->releaseMemory();
     delete DT;
   }
-  
+
   DominatorTreeBase<BasicBlock>& getBase() { return *DT; }
-  
+
   /// getRoots -  Return the root blocks of the current CFG.  This may include
   /// multiple blocks if we are computing post dominators.  For forward
   /// dominators, this will always be a single block (the entry node).
@@ -710,11 +708,11 @@ public:
   inline const std::vector<BasicBlock*> &getRoots() const {
     return DT->getRoots();
   }
-  
+
   inline BasicBlock *getRoot() const {
     return DT->getRoot();
   }
-  
+
   inline DomTreeNode *getRootNode() const {
     return DT->getRootNode();
   }
@@ -724,10 +722,10 @@ public:
   inline bool compare(DominatorTree &Other) const {
     DomTreeNode *R = getRootNode();
     DomTreeNode *OtherR = Other.getRootNode();
-    
+
     if (!R || !OtherR || R->getBlock() != OtherR->getBlock())
       return true;
-    
+
     if (DT->compare(Other.getBase()))
       return true;
 
@@ -735,111 +733,91 @@ public:
   }
 
   virtual bool runOnFunction(Function &F);
-  
+
+  virtual void verifyAnalysis() const;
+
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.setPreservesAll();
   }
-  
+
   inline bool dominates(DomTreeNode* A, DomTreeNode* B) const {
     return DT->dominates(A, B);
   }
-  
-  inline bool dominates(BasicBlock* A, BasicBlock* B) const {
+
+  inline bool dominates(const BasicBlock* A, const BasicBlock* B) const {
     return DT->dominates(A, B);
   }
-  
+
   // dominates - Return true if A dominates B. This performs the
   // special checks necessary if A and B are in the same basic block.
-  bool dominates(Instruction *A, Instruction *B) const {
-    BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
-    if (BBA != BBB) return DT->dominates(BBA, BBB);
-
-    // It is not possible to determine dominance between two PHI nodes 
-    // based on their ordering.
-    if (isa<PHINode>(A) && isa<PHINode>(B)) 
-      return false;
-
-    // Loop through the basic block until we find A or B.
-    BasicBlock::iterator I = BBA->begin();
-    for (; &*I != A && &*I != B; ++I) /*empty*/;
+  bool dominates(const Instruction *A, const Instruction *B) const;
 
-    //if(!DT.IsPostDominators) {
-      // A dominates B if it is found first in the basic block.
-      return &*I == A;
-    //} else {
-    //  // A post-dominates B if B is found first in the basic block.
-    //  return &*I == B;
-    //}
-  }
-  
-  inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const {
+  bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const {
     return DT->properlyDominates(A, B);
   }
-  
-  inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const {
+
+  bool properlyDominates(BasicBlock *A, BasicBlock *B) const {
     return DT->properlyDominates(A, B);
   }
-  
+
   /// findNearestCommonDominator - Find nearest common dominator basic block
   /// for basic block A and B. If there is no such block then return NULL.
   inline BasicBlock *findNearestCommonDominator(BasicBlock *A, BasicBlock *B) {
     return DT->findNearestCommonDominator(A, B);
   }
-  
+
   inline DomTreeNode *operator[](BasicBlock *BB) const {
     return DT->getNode(BB);
   }
-  
+
   /// getNode - return the (Post)DominatorTree node for the specified basic
   /// block.  This is the same as using operator[] on this class.
   ///
   inline DomTreeNode *getNode(BasicBlock *BB) const {
     return DT->getNode(BB);
   }
-  
+
   /// addNewBlock - Add a new node to the dominator tree information.  This
   /// creates a new node as a child of DomBB dominator node,linking it into 
   /// the children list of the immediate dominator.
   inline DomTreeNode *addNewBlock(BasicBlock *BB, BasicBlock *DomBB) {
     return DT->addNewBlock(BB, DomBB);
   }
-  
+
   /// changeImmediateDominator - This method is used to update the dominator
   /// tree information when a node's immediate dominator changes.
   ///
   inline void changeImmediateDominator(BasicBlock *N, BasicBlock* NewIDom) {
     DT->changeImmediateDominator(N, NewIDom);
   }
-  
+
   inline void changeImmediateDominator(DomTreeNode *N, DomTreeNode* NewIDom) {
     DT->changeImmediateDominator(N, NewIDom);
   }
-  
+
   /// eraseNode - Removes a node from  the dominator tree. Block must not
   /// domiante any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   inline void eraseNode(BasicBlock *BB) {
     DT->eraseNode(BB);
   }
-  
+
   /// splitBlock - BB is split and now it has one successor. Update dominator
   /// tree to reflect this change.
   inline void splitBlock(BasicBlock* NewBB) {
     DT->splitBlock(NewBB);
   }
-  
+
   bool isReachableFromEntry(BasicBlock* A) {
     return DT->isReachableFromEntry(A);
   }
-  
-  
+
+
   virtual void releaseMemory() { 
     DT->releaseMemory();
   }
-  
-  virtual void print(std::ostream &OS, const Module* M= 0) const {
-    DT->print(OS, M);
-  }
+
+  virtual void print(raw_ostream &OS, const Module* M= 0) const;
 };
 
 //===-------------------------------------
@@ -849,7 +827,7 @@ public:
 template <> struct GraphTraits<DomTreeNode *> {
   typedef DomTreeNode NodeType;
   typedef NodeType::iterator  ChildIteratorType;
-  
+
   static NodeType *getEntryNode(NodeType *N) {
     return N;
   }
@@ -881,7 +859,7 @@ protected:
   DomSetMapType Frontiers;
   std::vector<BasicBlock*> Roots;
   const bool IsPostDominators;
-  
+
 public:
   DominanceFrontierBase(void *ID, bool isPostDom) 
     : FunctionPass(ID), IsPostDominators(isPostDom) {}
@@ -891,7 +869,7 @@ public:
   /// dominators, this will always be a single block (the entry node).
   ///
   inline const std::vector<BasicBlock*> &getRoots() const { return Roots; }
-  
+
   /// isPostDominator - Returns true if analysis based of postdoms
   ///
   bool isPostDominator() const { return IsPostDominators; }
@@ -987,11 +965,7 @@ public:
 
   /// print - Convert to human readable form
   ///
-  virtual void print(std::ostream &OS, const Module* = 0) const;
-  void print(std::ostream *OS, const Module* M = 0) const {
-    if (OS) print(*OS, M);
-  }
-  virtual void dump();
+  virtual void print(raw_ostream &OS, const Module* = 0) const;
 };
 
 
@@ -1019,6 +993,8 @@ public:
     return false;
   }
 
+  virtual void verifyAnalysis() const;
+
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.setPreservesAll();
     AU.addRequired<DominatorTree>();
diff --git a/include/llvm/Analysis/FindUsedTypes.h b/include/llvm/Analysis/FindUsedTypes.h
index c897af3a58a6..1337385848ed 100644
--- a/include/llvm/Analysis/FindUsedTypes.h
+++ b/include/llvm/Analysis/FindUsedTypes.h
@@ -37,8 +37,7 @@ public:
   /// passed in, then the types are printed symbolically if possible, using the
   /// symbol table from the module.
   ///
-  void print(std::ostream &o, const Module *M) const;
-  void print(std::ostream *o, const Module *M) const { if (o) print(*o, M); }
+  void print(raw_ostream &o, const Module *M) const;
 
 private:
   /// IncorporateType - Incorporate one type and all of its subtypes into the
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 40396e2fcaf2..948c67592424 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -25,7 +25,7 @@ namespace llvm {
 class DominatorTree;
 class Instruction;
 class Value;
-class IVUsersOfOneStride;
+struct IVUsersOfOneStride;
 
 /// IVStrideUse - Keep track of one use of a strided induction variable, where
 /// the stride is stored externally.  The Offset member keeps track of the
@@ -34,7 +34,7 @@ class IVUsersOfOneStride;
 class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
 public:
   IVStrideUse(IVUsersOfOneStride *parent,
-              const SCEV* offset,
+              const SCEV *offset,
               Instruction* U, Value *O)
     : CallbackVH(U), Parent(parent), Offset(offset),
       OperandValToReplace(O),
@@ -58,10 +58,10 @@ public:
   /// getOffset - Return the offset to add to a theoeretical induction
   /// variable that starts at zero and counts up by the stride to compute
   /// the value for the use. This always has the same type as the stride.
-  const SCEV* getOffset() const { return Offset; }
+  const SCEV *getOffset() const { return Offset; }
 
   /// setOffset - Assign a new offset to this use.
-  void setOffset(const SCEV* Val) {
+  void setOffset(const SCEV *Val) {
     Offset = Val;
   }
 
@@ -96,7 +96,7 @@ private:
   IVUsersOfOneStride *Parent;
 
   /// Offset - The offset to add to the base induction expression.
-  const SCEV* Offset;
+  const SCEV *Offset;
 
   /// OperandValToReplace - The Value of the operand in the user instruction
   /// that this IVStrideUse is representing.
@@ -158,7 +158,7 @@ public:
   /// initial value and the operand that uses the IV.
   ilist<IVStrideUse> Users;
 
-  void addUser(const SCEV* Offset, Instruction *User, Value *Operand) {
+  void addUser(const SCEV *Offset, Instruction *User, Value *Operand) {
     Users.push_back(new IVStrideUse(this, Offset, User, Operand));
   }
 };
@@ -178,12 +178,12 @@ public:
 
   /// IVUsesByStride - A mapping from the strides in StrideOrder to the
   /// uses in IVUses.
-  std::map<const SCEV*, IVUsersOfOneStride*> IVUsesByStride;
+  std::map<const SCEV *, IVUsersOfOneStride*> IVUsesByStride;
 
   /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
   /// We use this to iterate over the IVUsesByStride collection without being
   /// dependent on random ordering of pointers in the process.
-  SmallVector<const SCEV*, 16> StrideOrder;
+  SmallVector<const SCEV *, 16> StrideOrder;
 
 private:
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -203,13 +203,9 @@ public:
 
   /// getReplacementExpr - Return a SCEV expression which computes the
   /// value of the OperandValToReplace of the given IVStrideUse.
-  const SCEV* getReplacementExpr(const IVStrideUse &U) const;
+  const SCEV *getReplacementExpr(const IVStrideUse &U) const;
 
   void print(raw_ostream &OS, const Module* = 0) const;
-  virtual void print(std::ostream &OS, const Module* = 0) const;
-  void print(std::ostream *OS, const Module* M = 0) const {
-    if (OS) print(*OS, M);
-  }
 
   /// dump - This method is used for debugging.
   void dump() const;
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
new file mode 100644
index 000000000000..7ce49d7de332
--- /dev/null
+++ b/include/llvm/Analysis/InlineCost.h
@@ -0,0 +1,180 @@
+//===- InlineCost.cpp - Cost analysis for inliner ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements heuristics for inlining decisions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INLINECOST_H
+#define LLVM_ANALYSIS_INLINECOST_H
+
+#include <cassert>
+#include <climits>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+  class Value;
+  class Function;
+  class BasicBlock;
+  class CallSite;
+  template<class PtrType, unsigned SmallSize>
+  class SmallPtrSet;
+
+  // CodeMetrics - Calculate size and a few similar metrics for a set of
+  // basic blocks.
+  struct CodeMetrics {
+    /// NeverInline - True if this callee should never be inlined into a
+    /// caller.
+    bool NeverInline;
+    
+    /// usesDynamicAlloca - True if this function calls alloca (in the C sense).
+    bool usesDynamicAlloca;
+
+    /// NumInsts, NumBlocks - Keep track of how large each function is, which
+    /// is used to estimate the code size cost of inlining it.
+    unsigned NumInsts, NumBlocks;
+
+    /// NumVectorInsts - Keep track of how many instructions produce vector
+    /// values.  The inliner is being more aggressive with inlining vector
+    /// kernels.
+    unsigned NumVectorInsts;
+    
+    /// NumRets - Keep track of how many Ret instructions the block contains.
+    unsigned NumRets;
+
+    CodeMetrics() : NeverInline(false), usesDynamicAlloca(false), NumInsts(0),
+                    NumBlocks(0), NumVectorInsts(0), NumRets(0) {}
+    
+    /// analyzeBasicBlock - Add information about the specified basic block
+    /// to the current structure.
+    void analyzeBasicBlock(const BasicBlock *BB);
+
+    /// analyzeFunction - Add information about the specified function
+    /// to the current structure.
+    void analyzeFunction(Function *F);
+  };
+
+  namespace InlineConstants {
+    // Various magic constants used to adjust heuristics.
+    const int CallPenalty = 5;
+    const int LastCallToStaticBonus = -15000;
+    const int ColdccPenalty = 2000;
+    const int NoreturnPenalty = 10000;
+  }
+
+  /// InlineCost - Represent the cost of inlining a function. This
+  /// supports special values for functions which should "always" or
+  /// "never" be inlined. Otherwise, the cost represents a unitless
+  /// amount; smaller values increase the likelyhood of the function
+  /// being inlined.
+  class InlineCost {
+    enum Kind {
+      Value,
+      Always,
+      Never
+    };
+
+    // This is a do-it-yourself implementation of
+    //   int Cost : 30;
+    //   unsigned Type : 2;
+    // We used to use bitfields, but they were sometimes miscompiled (PR3822).
+    enum { TYPE_BITS = 2 };
+    enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS };
+    unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS;
+
+    Kind getType() const {
+      return Kind(TypedCost >> COST_BITS);
+    }
+
+    int getCost() const {
+      // Sign-extend the bottom COST_BITS bits.
+      return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS;
+    }
+
+    InlineCost(int C, int T) {
+      TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS);
+      assert(getCost() == C && "Cost exceeds InlineCost precision");
+    }
+  public:
+    static InlineCost get(int Cost) { return InlineCost(Cost, Value); }
+    static InlineCost getAlways() { return InlineCost(0, Always); }
+    static InlineCost getNever() { return InlineCost(0, Never); }
+
+    bool isVariable() const { return getType() == Value; }
+    bool isAlways() const { return getType() == Always; }
+    bool isNever() const { return getType() == Never; }
+
+    /// getValue() - Return a "variable" inline cost's amount. It is
+    /// an error to call this on an "always" or "never" InlineCost.
+    int getValue() const {
+      assert(getType() == Value && "Invalid access of InlineCost");
+      return getCost();
+    }
+  };
+  
+  /// InlineCostAnalyzer - Cost analyzer used by inliner.
+  class InlineCostAnalyzer {
+    struct ArgInfo {
+    public:
+      unsigned ConstantWeight;
+      unsigned AllocaWeight;
+      
+      ArgInfo(unsigned CWeight, unsigned AWeight)
+        : ConstantWeight(CWeight), AllocaWeight(AWeight) {}
+    };
+    
+    struct FunctionInfo {
+      CodeMetrics Metrics;
+
+      /// ArgumentWeights - Each formal argument of the function is inspected to
+      /// see if it is used in any contexts where making it a constant or alloca
+      /// would reduce the code size.  If so, we add some value to the argument
+      /// entry here.
+      std::vector<ArgInfo> ArgumentWeights;
+    
+      /// CountCodeReductionForConstant - Figure out an approximation for how
+      /// many instructions will be constant folded if the specified value is
+      /// constant.
+      unsigned CountCodeReductionForConstant(Value *V);
+    
+      /// CountCodeReductionForAlloca - Figure out an approximation of how much
+      /// smaller the function will be if it is inlined into a context where an
+      /// argument becomes an alloca.
+      ///
+      unsigned CountCodeReductionForAlloca(Value *V);
+
+      /// analyzeFunction - Add information about the specified function
+      /// to the current structure.
+      void analyzeFunction(Function *F);
+    };
+
+    std::map<const Function *, FunctionInfo> CachedFunctionInfo;
+
+  public:
+
+    /// getInlineCost - The heuristic used to determine if we should inline the
+    /// function call or not.
+    ///
+    InlineCost getInlineCost(CallSite CS,
+                             SmallPtrSet<const Function *, 16> &NeverInline);
+
+    /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+    /// higher threshold to determine if the function call should be inlined.
+    float getInlineFudgeFactor(CallSite CS);
+
+    /// resetCachedFunctionInfo - erase any cached cost info for this function.
+    void resetCachedCostInfo(Function* Caller) {
+      CachedFunctionInfo[Caller] = FunctionInfo();
+    }
+  };
+}
+
+#endif
diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h
index 1da2022f6961..ca8ad73131a9 100644
--- a/include/llvm/Analysis/Interval.h
+++ b/include/llvm/Analysis/Interval.h
@@ -22,11 +22,11 @@
 
 #include "llvm/ADT/GraphTraits.h"
 #include <vector>
-#include <iosfwd>
 
 namespace llvm {
 
 class BasicBlock;
+class raw_ostream;
 
 //===----------------------------------------------------------------------===//
 //
@@ -98,8 +98,7 @@ public:
   bool isLoop() const;
 
   /// print - Show contents in human readable format...
-  void print(std::ostream &O) const;
-  void print(std::ostream *O) const { if (O) print(*O); }
+  void print(raw_ostream &O) const;
 };
 
 /// succ_begin/succ_end - define methods so that Intervals may be used
diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h
index 551bb7243798..d842840b66b5 100644
--- a/include/llvm/Analysis/IntervalIterator.h
+++ b/include/llvm/Analysis/IntervalIterator.h
@@ -233,7 +233,8 @@ private:
 };
 
 typedef IntervalIterator<BasicBlock, Function> function_interval_iterator;
-typedef IntervalIterator<Interval, IntervalPartition> interval_part_interval_iterator;
+typedef IntervalIterator<Interval, IntervalPartition>
+                                          interval_part_interval_iterator;
 
 
 inline function_interval_iterator intervals_begin(Function *F,
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index feae6d82f82f..c1214e7427a4 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -60,10 +60,7 @@ public:
   IntervalPartition(IntervalPartition &I, bool);
 
   // print - Show contents in human readable format...
-  virtual void print(std::ostream &O, const Module* = 0) const;
-  void print(std::ostream *O, const Module* M = 0) const {
-    if (O) print(*O, M);
-  }
+  virtual void print(raw_ostream &O, const Module* = 0) const;
 
   // getRootInterval() - Return the root interval that contains the starting
   // block of the function.
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
index ea17a237caaa..7944af3b8a5a 100644
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -22,7 +22,7 @@ namespace llvm {
   struct LibCallFunctionInfo;
   
   /// LibCallAliasAnalysis - Alias analysis driven from LibCallInfo.
-  struct LibCallAliasAnalysis : public FunctionPass, AliasAnalysis {
+  struct LibCallAliasAnalysis : public FunctionPass, public AliasAnalysis {
     static char ID; // Class identification
     
     LibCallInfo *LCI;
diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h
index 67da2e7fbc1b..1d386ba88804 100644
--- a/include/llvm/Analysis/LoopDependenceAnalysis.h
+++ b/include/llvm/Analysis/LoopDependenceAnalysis.h
@@ -20,43 +20,102 @@
 #ifndef LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H
 #define LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H
 
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/raw_ostream.h"
-#include <iosfwd>
+#include "llvm/Support/Allocator.h"
 
 namespace llvm {
 
-  class AliasAnalysis;
-  class AnalysisUsage;
-  class ScalarEvolution;
-  class Value;
+class AliasAnalysis;
+class AnalysisUsage;
+class ScalarEvolution;
+class SCEV;
+class Value;
+class raw_ostream;
 
-  class LoopDependenceAnalysis : public LoopPass {
-    Loop *L;
-    AliasAnalysis *AA;
-    ScalarEvolution *SE;
+class LoopDependenceAnalysis : public LoopPass {
+  AliasAnalysis *AA;
+  ScalarEvolution *SE;
 
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    LoopDependenceAnalysis() : LoopPass(&ID) {}
+  /// L - The loop we are currently analysing.
+  Loop *L;
 
-    /// TODO: docs
-    bool isDependencePair(const Value*, const Value*) const;
-    bool depends(Value*, Value*);
+  /// TODO: doc
+  enum DependenceResult { Independent = 0, Dependent = 1, Unknown = 2 };
 
-    bool runOnLoop(Loop*, LPPassManager&);
+  /// TODO: doc
+  struct Subscript {
+    /// TODO: Add distance, direction, breaking conditions, ...
+  };
 
-    virtual void getAnalysisUsage(AnalysisUsage&) const;
+  /// DependencePair - Represents a data dependence relation between to memory
+  /// reference instructions.
+  struct DependencePair : public FastFoldingSetNode {
+    Value *A;
+    Value *B;
+    DependenceResult Result;
+    SmallVector<Subscript, 4> Subscripts;
 
-    void print(raw_ostream&, const Module* = 0) const;
-    virtual void print(std::ostream&, const Module* = 0) const;
-  }; // class LoopDependenceAnalysis
+    DependencePair(const FoldingSetNodeID &ID, Value *a, Value *b) :
+        FastFoldingSetNode(ID), A(a), B(b), Result(Unknown), Subscripts() {}
+  };
 
+  /// findOrInsertDependencePair - Return true if a DependencePair for the
+  /// given Values already exists, false if a new DependencePair had to be
+  /// created. The third argument is set to the pair found or created.
+  bool findOrInsertDependencePair(Value*, Value*, DependencePair*&);
 
-  // createLoopDependenceAnalysisPass - This creates an instance of the
-  // LoopDependenceAnalysis pass.
-  //
-  LoopPass *createLoopDependenceAnalysisPass();
+  /// getLoops - Collect all loops of the loop-nest L a given SCEV is variant
+  /// in.
+  void getLoops(const SCEV*, DenseSet<const Loop*>*) const;
+
+  /// isLoopInvariant - True if a given SCEV is invariant in all loops of the
+  /// loop-nest starting at the innermost loop L.
+  bool isLoopInvariant(const SCEV*) const;
+
+  /// isAffine - An SCEV is affine with respect to the loop-nest starting at
+  /// the innermost loop L if it is of the form A+B*X where A, B are invariant
+  /// in the loop-nest and X is a induction variable in the loop-nest.
+  bool isAffine(const SCEV*) const;
+
+  /// TODO: doc
+  bool isZIVPair(const SCEV*, const SCEV*) const;
+  bool isSIVPair(const SCEV*, const SCEV*) const;
+  DependenceResult analyseZIV(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analyseSIV(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analyseMIV(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analyseSubscript(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analysePair(DependencePair*) const;
+
+public:
+  static char ID; // Class identification, replacement for typeinfo
+  LoopDependenceAnalysis() : LoopPass(&ID) {}
+
+  /// isDependencePair - Check wether two values can possibly give rise to a
+  /// data dependence: that is the case if both are instructions accessing
+  /// memory and at least one of those accesses is a write.
+  bool isDependencePair(const Value*, const Value*) const;
+
+  /// depends - Return a boolean indicating if there is a data dependence
+  /// between two instructions.
+  bool depends(Value*, Value*);
+
+  bool runOnLoop(Loop*, LPPassManager&);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  void print(raw_ostream&, const Module* = 0) const;
+
+private:
+  FoldingSet<DependencePair> Pairs;
+  BumpPtrAllocator PairAllocator;
+}; // class LoopDependenceAnalysis
+
+// createLoopDependenceAnalysisPass - This creates an instance of the
+// LoopDependenceAnalysis pass.
+//
+LoopPass *createLoopDependenceAnalysisPass();
 
 } // namespace llvm
 
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 8b293cb7b986..763111063d01 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -8,7 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines the LoopInfo class that is used to identify natural loops
-// and determine the loop depth of various nodes of the CFG.  Note that natural
+// and determine the loop depth of various nodes of the CFG.  A natural loop
+// has exactly one entry-point, which is called the header. Note that natural
 // loops may actually be several loops that share the same header node.
 //
 // This analysis calculates the nesting structure of loops in a function.  For
@@ -31,17 +32,13 @@
 #define LLVM_ANALYSIS_LOOP_INFO_H
 
 #include "llvm/Pass.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <ostream>
 
 namespace llvm {
 
@@ -54,26 +51,27 @@ static void RemoveFromVector(std::vector<T*> &V, T *N) {
 
 class DominatorTree;
 class LoopInfo;
-template<class N> class LoopInfoBase;
-template<class N> class LoopBase;
-
-typedef LoopBase<BasicBlock> Loop;
+class Loop;
+template<class N, class M> class LoopInfoBase;
+template<class N, class M> class LoopBase;
 
 //===----------------------------------------------------------------------===//
 /// LoopBase class - Instances of this class are used to represent loops that
 /// are detected in the flow graph
 ///
-template<class BlockT>
+template<class BlockT, class LoopT>
 class LoopBase {
-  LoopBase<BlockT> *ParentLoop;
+  LoopT *ParentLoop;
   // SubLoops - Loops contained entirely within this one.
-  std::vector<LoopBase<BlockT>*> SubLoops;
+  std::vector<LoopT *> SubLoops;
 
   // Blocks - The list of blocks in this loop.  First entry is the header node.
   std::vector<BlockT*> Blocks;
 
-  LoopBase(const LoopBase<BlockT> &);                  // DO NOT IMPLEMENT
-  const LoopBase<BlockT>&operator=(const LoopBase<BlockT> &);// DO NOT IMPLEMENT
+  // DO NOT IMPLEMENT
+  LoopBase(const LoopBase<BlockT, LoopT> &);
+  // DO NOT IMPLEMENT
+  const LoopBase<BlockT, LoopT>&operator=(const LoopBase<BlockT, LoopT> &);
 public:
   /// Loop ctor - This creates an empty loop.
   LoopBase() : ParentLoop(0) {}
@@ -87,13 +85,13 @@ public:
   /// blocks, where depth 0 is used for blocks not inside any loops.
   unsigned getLoopDepth() const {
     unsigned D = 1;
-    for (const LoopBase<BlockT> *CurLoop = ParentLoop; CurLoop;
+    for (const LoopT *CurLoop = ParentLoop; CurLoop;
          CurLoop = CurLoop->ParentLoop)
       ++D;
     return D;
   }
   BlockT *getHeader() const { return Blocks.front(); }
-  LoopBase<BlockT> *getParentLoop() const { return ParentLoop; }
+  LoopT *getParentLoop() const { return ParentLoop; }
 
   /// contains - Return true if the specified basic block is in this loop
   ///
@@ -103,8 +101,8 @@ public:
 
   /// iterator/begin/end - Return the loops contained entirely within this loop.
   ///
-  const std::vector<LoopBase<BlockT>*> &getSubLoops() const { return SubLoops; }
-  typedef typename std::vector<LoopBase<BlockT>*>::const_iterator iterator;
+  const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
+  typedef typename std::vector<LoopT *>::const_iterator iterator;
   iterator begin() const { return SubLoops.begin(); }
   iterator end() const { return SubLoops.end(); }
   bool empty() const { return SubLoops.empty(); }
@@ -146,14 +144,6 @@ public:
     return NumBackEdges;
   }
 
-  /// isLoopInvariant - Return true if the specified value is loop invariant
-  ///
-  inline bool isLoopInvariant(Value *V) const {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      return !contains(I->getParent());
-    return true;  // All non-instructions are loop invariant
-  }
-
   //===--------------------------------------------------------------------===//
   // APIs for simple analysis of the loop.
   //
@@ -223,72 +213,22 @@ public:
     return 0;
   }
 
-  /// getUniqueExitBlocks - Return all unique successor blocks of this loop. 
-  /// These are the blocks _outside of the current loop_ which are branched to.
-  /// This assumes that loop is in canonical form.
-  ///
-  void getUniqueExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const {
+  /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
+  typedef std::pair<const BlockT*,const BlockT*> Edge;
+  void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const {
     // Sort the blocks vector so that we can use binary search to do quick
     // lookups.
     SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
     std::sort(LoopBBs.begin(), LoopBBs.end());
 
-    std::vector<BlockT*> switchExitBlocks;  
-
-    for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
-
-      BlockT *current = *BI;
-      switchExitBlocks.clear();
-
-      typedef GraphTraits<BlockT*> BlockTraits;
-      typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+    typedef GraphTraits<BlockT*> BlockTraits;
+    for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
       for (typename BlockTraits::ChildIteratorType I =
            BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
-           I != E; ++I) {
-        if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
-      // If block is inside the loop then it is not a exit block.
-          continue;
-      
-        typename InvBlockTraits::ChildIteratorType PI =
-                                                InvBlockTraits::child_begin(*I);
-        BlockT *firstPred = *PI;
-
-        // If current basic block is this exit block's first predecessor
-        // then only insert exit block in to the output ExitBlocks vector.
-        // This ensures that same exit block is not inserted twice into
-        // ExitBlocks vector.
-        if (current != firstPred) 
-          continue;
-
-        // If a terminator has more then two successors, for example SwitchInst,
-        // then it is possible that there are multiple edges from current block 
-        // to one exit block. 
-        if (std::distance(BlockTraits::child_begin(current),
-                          BlockTraits::child_end(current)) <= 2) {
-          ExitBlocks.push_back(*I);
-          continue;
-        }
-
-        // In case of multiple edges from current block to exit block, collect
-        // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
-        // duplicate edges.
-        if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) 
-            == switchExitBlocks.end()) {
-          switchExitBlocks.push_back(*I);
-          ExitBlocks.push_back(*I);
-        }
-      }
-    }
-  }
-
-  /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
-  /// block, return that block. Otherwise return null.
-  BlockT *getUniqueExitBlock() const {
-    SmallVector<BlockT*, 8> UniqueExitBlocks;
-    getUniqueExitBlocks(UniqueExitBlocks);
-    if (UniqueExitBlocks.size() == 1)
-      return UniqueExitBlocks[0];
-    return 0;
+           I != E; ++I)
+        if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+          // Not in current loop? It must be an exit block.
+          ExitEdges.push_back(std::make_pair(*BI, *I));
   }
 
   /// getLoopPreheader - If there is a preheader for this loop, return it.  A
@@ -355,178 +295,6 @@ public:
 
     return Latch;
   }
-  
-  /// getCanonicalInductionVariable - Check to see if the loop has a canonical
-  /// induction variable: an integer recurrence that starts at 0 and increments
-  /// by one each time through the loop.  If so, return the phi node that
-  /// corresponds to it.
-  ///
-  /// The IndVarSimplify pass transforms loops to have a canonical induction
-  /// variable.
-  ///
-  inline PHINode *getCanonicalInductionVariable() const {
-    BlockT *H = getHeader();
-
-    BlockT *Incoming = 0, *Backedge = 0;
-    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-    typename InvBlockTraits::ChildIteratorType PI =
-                                                 InvBlockTraits::child_begin(H);
-    assert(PI != InvBlockTraits::child_end(H) &&
-           "Loop must have at least one backedge!");
-    Backedge = *PI++;
-    if (PI == InvBlockTraits::child_end(H)) return 0;  // dead loop
-    Incoming = *PI++;
-    if (PI != InvBlockTraits::child_end(H)) return 0;  // multiple backedges?
-
-    if (contains(Incoming)) {
-      if (contains(Backedge))
-        return 0;
-      std::swap(Incoming, Backedge);
-    } else if (!contains(Backedge))
-      return 0;
-
-    // Loop over all of the PHI nodes, looking for a canonical indvar.
-    for (typename BlockT::iterator I = H->begin(); isa<PHINode>(I); ++I) {
-      PHINode *PN = cast<PHINode>(I);
-      if (ConstantInt *CI =
-          dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
-        if (CI->isNullValue())
-          if (Instruction *Inc =
-              dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
-            if (Inc->getOpcode() == Instruction::Add &&
-                Inc->getOperand(0) == PN)
-              if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
-                if (CI->equalsInt(1))
-                  return PN;
-    }
-    return 0;
-  }
-
-  /// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
-  /// the canonical induction variable value for the "next" iteration of the
-  /// loop.  This always succeeds if getCanonicalInductionVariable succeeds.
-  ///
-  inline Instruction *getCanonicalInductionVariableIncrement() const {
-    if (PHINode *PN = getCanonicalInductionVariable()) {
-      bool P1InLoop = contains(PN->getIncomingBlock(1));
-      return cast<Instruction>(PN->getIncomingValue(P1InLoop));
-    }
-    return 0;
-  }
-
-  /// getTripCount - Return a loop-invariant LLVM value indicating the number of
-  /// times the loop will be executed.  Note that this means that the backedge
-  /// of the loop executes N-1 times.  If the trip-count cannot be determined,
-  /// this returns null.
-  ///
-  /// The IndVarSimplify pass transforms loops to have a form that this
-  /// function easily understands.
-  ///
-  inline Value *getTripCount() const {
-    // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
-    // canonical induction variable and V is the trip count of the loop.
-    Instruction *Inc = getCanonicalInductionVariableIncrement();
-    if (Inc == 0) return 0;
-    PHINode *IV = cast<PHINode>(Inc->getOperand(0));
-
-    BlockT *BackedgeBlock =
-            IV->getIncomingBlock(contains(IV->getIncomingBlock(1)));
-
-    if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
-      if (BI->isConditional()) {
-        if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
-          if (ICI->getOperand(0) == Inc) {
-            if (BI->getSuccessor(0) == getHeader()) {
-              if (ICI->getPredicate() == ICmpInst::ICMP_NE)
-                return ICI->getOperand(1);
-            } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
-              return ICI->getOperand(1);
-            }
-          }
-        }
-      }
-
-    return 0;
-  }
-  
-  /// getSmallConstantTripCount - Returns the trip count of this loop as a
-  /// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-  /// of not constant. Will also return 0 if the trip count is very large 
-  /// (>= 2^32)
-  inline unsigned getSmallConstantTripCount() const {
-    Value* TripCount = this->getTripCount();
-    if (TripCount) {
-      if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
-        // Guard against huge trip counts.
-        if (TripCountC->getValue().getActiveBits() <= 32) {
-          return (unsigned)TripCountC->getZExtValue();
-        }
-      }
-    }
-    return 0;
-  }
-
-  /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-  /// trip count of this loop as a normal unsigned value, if possible. This
-  /// means that the actual trip count is always a multiple of the returned
-  /// value (don't forget the trip count could very well be zero as well!).
-  ///
-  /// Returns 1 if the trip count is unknown or not guaranteed to be the
-  /// multiple of a constant (which is also the case if the trip count is simply
-  /// constant, use getSmallConstantTripCount for that case), Will also return 1
-  /// if the trip count is very large (>= 2^32).
-  inline unsigned getSmallConstantTripMultiple() const {
-    Value* TripCount = this->getTripCount();
-    // This will hold the ConstantInt result, if any
-    ConstantInt *Result = NULL;
-    if (TripCount) {
-      // See if the trip count is constant itself
-      Result = dyn_cast<ConstantInt>(TripCount);
-      // if not, see if it is a multiplication
-      if (!Result)
-        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
-          switch (BO->getOpcode()) {
-          case BinaryOperator::Mul:
-            Result = dyn_cast<ConstantInt>(BO->getOperand(1));
-            break;
-          default: 
-            break;
-          }
-        }
-    }
-    // Guard against huge trip counts.
-    if (Result && Result->getValue().getActiveBits() <= 32) {
-      return (unsigned)Result->getZExtValue();
-    } else {
-      return 1;
-    }
-  }
-  
-  /// isLCSSAForm - Return true if the Loop is in LCSSA form
-  inline bool isLCSSAForm() const {
-    // Sort the blocks vector so that we can use binary search to do quick
-    // lookups.
-    SmallPtrSet<BlockT*, 16> LoopBBs(block_begin(), block_end());
-
-    for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
-      BlockT *BB = *BI;
-      for (typename BlockT::iterator I = BB->begin(), E = BB->end(); I != E;++I)
-        for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
-             ++UI) {
-          BlockT *UserBB = cast<Instruction>(*UI)->getParent();
-          if (PHINode *P = dyn_cast<PHINode>(*UI)) {
-            UserBB = P->getIncomingBlock(UI);
-          }
-
-          // Check the current block, as a fast-path.  Most values are used in
-          // the same block they are defined in.
-          if (UserBB != BB && !LoopBBs.count(UserBB))
-            return false;
-        }
-    }
-
-    return true;
-  }
 
   //===--------------------------------------------------------------------===//
   // APIs for updating loop information after changing the CFG
@@ -538,39 +306,39 @@ public:
   /// to the specified LoopInfo object as being in the current basic block.  It
   /// is not valid to replace the loop header with this method.
   ///
-  void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT> &LI);
+  void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
 
   /// replaceChildLoopWith - This is used when splitting loops up.  It replaces
   /// the OldChild entry in our children list with NewChild, and updates the
   /// parent pointer of OldChild to be null and the NewChild to be this loop.
   /// This updates the loop depth of the new child.
-  void replaceChildLoopWith(LoopBase<BlockT> *OldChild,
-                            LoopBase<BlockT> *NewChild) {
+  void replaceChildLoopWith(LoopT *OldChild,
+                            LoopT *NewChild) {
     assert(OldChild->ParentLoop == this && "This loop is already broken!");
     assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!");
-    typename std::vector<LoopBase<BlockT>*>::iterator I =
+    typename std::vector<LoopT *>::iterator I =
                           std::find(SubLoops.begin(), SubLoops.end(), OldChild);
     assert(I != SubLoops.end() && "OldChild not in loop!");
     *I = NewChild;
     OldChild->ParentLoop = 0;
-    NewChild->ParentLoop = this;
+    NewChild->ParentLoop = static_cast<LoopT *>(this);
   }
 
   /// addChildLoop - Add the specified loop to be a child of this loop.  This
   /// updates the loop depth of the new child.
   ///
-  void addChildLoop(LoopBase<BlockT> *NewChild) {
+  void addChildLoop(LoopT *NewChild) {
     assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!");
-    NewChild->ParentLoop = this;
+    NewChild->ParentLoop = static_cast<LoopT *>(this);
     SubLoops.push_back(NewChild);
   }
 
   /// removeChildLoop - This removes the specified child from being a subloop of
   /// this loop.  The loop is not deleted, as it will presumably be inserted
   /// into another loop.
-  LoopBase<BlockT> *removeChildLoop(iterator I) {
+  LoopT *removeChildLoop(iterator I) {
     assert(I != SubLoops.end() && "Cannot remove end iterator!");
-    LoopBase<BlockT> *Child = *I;
+    LoopT *Child = *I;
     assert(Child->ParentLoop == this && "Child is not a child of this loop!");
     SubLoops.erase(SubLoops.begin()+(I-begin()));
     Child->ParentLoop = 0;
@@ -609,16 +377,86 @@ public:
   /// verifyLoop - Verify loop structure
   void verifyLoop() const {
 #ifndef NDEBUG
-    assert (getHeader() && "Loop header is missing");
-    assert (getLoopPreheader() && "Loop preheader is missing");
-    assert (getLoopLatch() && "Loop latch is missing");
-    for (iterator I = SubLoops.begin(), E = SubLoops.end(); I != E; ++I)
-      (*I)->verifyLoop();
+    assert(!Blocks.empty() && "Loop header is missing");
+
+    // Sort the blocks vector so that we can use binary search to do quick
+    // lookups.
+    SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
+    std::sort(LoopBBs.begin(), LoopBBs.end());
+
+    // Check the individual blocks.
+    for (block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
+      BlockT *BB = *I;
+      bool HasInsideLoopSuccs = false;
+      bool HasInsideLoopPreds = false;
+      SmallVector<BlockT *, 2> OutsideLoopPreds;
+
+      typedef GraphTraits<BlockT*> BlockTraits;
+      for (typename BlockTraits::ChildIteratorType SI =
+           BlockTraits::child_begin(BB), SE = BlockTraits::child_end(BB);
+           SI != SE; ++SI)
+        if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *SI)) {
+          HasInsideLoopSuccs = true;
+          break;
+        }
+      typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+      for (typename InvBlockTraits::ChildIteratorType PI =
+           InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB);
+           PI != PE; ++PI) {
+        if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *PI))
+          HasInsideLoopPreds = true;
+        else
+          OutsideLoopPreds.push_back(*PI);
+      }
+
+      if (BB == getHeader()) {
+        assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
+      } else if (!OutsideLoopPreds.empty()) {
+        // A non-header loop shouldn't be reachable from outside the loop,
+        // though it is permitted if the predecessor is not itself actually
+        // reachable.
+        BlockT *EntryBB = BB->getParent()->begin();
+        for (df_iterator<BlockT *> NI = df_begin(EntryBB),
+             NE = df_end(EntryBB); NI != NE; ++NI)
+          for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i)
+            assert(*NI != OutsideLoopPreds[i] &&
+                   "Loop has multiple entry points!");
+      }
+      assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!");
+      assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!");
+      assert(BB != getHeader()->getParent()->begin() &&
+             "Loop contains function entry block!");
+    }
+
+    // Check the subloops.
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      // Each block in each subloop should be contained within this loop.
+      for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end();
+           BI != BE; ++BI) {
+        assert(std::binary_search(LoopBBs.begin(), LoopBBs.end(), *BI) &&
+               "Loop does not contain all the blocks of a subloop!");
+      }
+
+    // Check the parent loop pointer.
+    if (ParentLoop) {
+      assert(std::find(ParentLoop->begin(), ParentLoop->end(), this) !=
+               ParentLoop->end() &&
+             "Loop is not a subloop of its parent!");
+    }
 #endif
   }
 
-  void print(std::ostream &OS, unsigned Depth = 0) const {
-    OS << std::string(Depth*2, ' ') << "Loop at depth " << getLoopDepth()
+  /// verifyLoop - Verify loop structure of this loop and all nested loops.
+  void verifyLoopNest() const {
+    // Verify this loop.
+    verifyLoop();
+    // Verify the subloops.
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      (*I)->verifyLoopNest();
+  }
+
+  void print(raw_ostream &OS, unsigned Depth = 0) const {
+    OS.indent(Depth*2) << "Loop at depth " << getLoopDepth()
        << " containing: ";
 
     for (unsigned i = 0; i < getBlocks().size(); ++i) {
@@ -635,33 +473,131 @@ public:
       (*I)->print(OS, Depth+2);
   }
   
-  void print(std::ostream *O, unsigned Depth = 0) const {
-    if (O) print(*O, Depth);
-  }
-  
   void dump() const {
-    print(cerr);
+    print(errs());
   }
   
-private:
-  friend class LoopInfoBase<BlockT>;
+protected:
+  friend class LoopInfoBase<BlockT, LoopT>;
   explicit LoopBase(BlockT *BB) : ParentLoop(0) {
     Blocks.push_back(BB);
   }
 };
 
+class Loop : public LoopBase<BasicBlock, Loop> {
+public:
+  Loop() {}
+
+  /// isLoopInvariant - Return true if the specified value is loop invariant
+  ///
+  bool isLoopInvariant(Value *V) const;
+
+  /// isLoopInvariant - Return true if the specified instruction is
+  /// loop-invariant.
+  ///
+  bool isLoopInvariant(Instruction *I) const;
+
+  /// makeLoopInvariant - If the given value is an instruction inside of the
+  /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+  /// Return true if the value after any hoisting is loop invariant. This
+  /// function can be used as a slightly more aggressive replacement for
+  /// isLoopInvariant.
+  ///
+  /// If InsertPt is specified, it is the point to hoist instructions to.
+  /// If null, the terminator of the loop preheader is used.
+  ///
+  bool makeLoopInvariant(Value *V, bool &Changed,
+                         Instruction *InsertPt = 0) const;
+
+  /// makeLoopInvariant - If the given instruction is inside of the
+  /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+  /// Return true if the instruction after any hoisting is loop invariant. This
+  /// function can be used as a slightly more aggressive replacement for
+  /// isLoopInvariant.
+  ///
+  /// If InsertPt is specified, it is the point to hoist instructions to.
+  /// If null, the terminator of the loop preheader is used.
+  ///
+  bool makeLoopInvariant(Instruction *I, bool &Changed,
+                         Instruction *InsertPt = 0) const;
+
+  /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+  /// induction variable: an integer recurrence that starts at 0 and increments
+  /// by one each time through the loop.  If so, return the phi node that
+  /// corresponds to it.
+  ///
+  /// The IndVarSimplify pass transforms loops to have a canonical induction
+  /// variable.
+  ///
+  PHINode *getCanonicalInductionVariable() const;
+
+  /// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
+  /// the canonical induction variable value for the "next" iteration of the
+  /// loop.  This always succeeds if getCanonicalInductionVariable succeeds.
+  ///
+  Instruction *getCanonicalInductionVariableIncrement() const;
+
+  /// getTripCount - Return a loop-invariant LLVM value indicating the number of
+  /// times the loop will be executed.  Note that this means that the backedge
+  /// of the loop executes N-1 times.  If the trip-count cannot be determined,
+  /// this returns null.
+  ///
+  /// The IndVarSimplify pass transforms loops to have a form that this
+  /// function easily understands.
+  ///
+  Value *getTripCount() const;
+
+  /// getSmallConstantTripCount - Returns the trip count of this loop as a
+  /// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+  /// of not constant. Will also return 0 if the trip count is very large
+  /// (>= 2^32)
+  unsigned getSmallConstantTripCount() const;
+
+  /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+  /// trip count of this loop as a normal unsigned value, if possible. This
+  /// means that the actual trip count is always a multiple of the returned
+  /// value (don't forget the trip count could very well be zero as well!).
+  ///
+  /// Returns 1 if the trip count is unknown or not guaranteed to be the
+  /// multiple of a constant (which is also the case if the trip count is simply
+  /// constant, use getSmallConstantTripCount for that case), Will also return 1
+  /// if the trip count is very large (>= 2^32).
+  unsigned getSmallConstantTripMultiple() const;
+
+  /// isLCSSAForm - Return true if the Loop is in LCSSA form
+  bool isLCSSAForm() const;
+
+  /// isLoopSimplifyForm - Return true if the Loop is in the form that
+  /// the LoopSimplify form transforms loops to, which is sometimes called
+  /// normal form.
+  bool isLoopSimplifyForm() const;
+
+  /// getUniqueExitBlocks - Return all unique successor blocks of this loop. 
+  /// These are the blocks _outside of the current loop_ which are branched to.
+  /// This assumes that loop is in canonical form.
+  ///
+  void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
+
+  /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+  /// block, return that block. Otherwise return null.
+  BasicBlock *getUniqueExitBlock() const;
+
+private:
+  friend class LoopInfoBase<BasicBlock, Loop>;
+  explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {}
+};
 
 //===----------------------------------------------------------------------===//
 /// LoopInfo - This class builds and contains all of the top level loop
 /// structures in the specified function.
 ///
 
-template<class BlockT>
+template<class BlockT, class LoopT>
 class LoopInfoBase {
   // BBMap - Mapping of basic blocks to the inner most loop they occur in
-  std::map<BlockT*, LoopBase<BlockT>*> BBMap;
-  std::vector<LoopBase<BlockT>*> TopLevelLoops;
-  friend class LoopBase<BlockT>;
+  std::map<BlockT *, LoopT *> BBMap;
+  std::vector<LoopT *> TopLevelLoops;
+  friend class LoopBase<BlockT, LoopT>;
 
   void operator=(const LoopInfoBase &); // do not implement
   LoopInfoBase(const LoopInfo &);       // do not implement
@@ -670,7 +606,7 @@ public:
   ~LoopInfoBase() { releaseMemory(); }
   
   void releaseMemory() {
-    for (typename std::vector<LoopBase<BlockT>* >::iterator I =
+    for (typename std::vector<LoopT *>::iterator I =
          TopLevelLoops.begin(), E = TopLevelLoops.end(); I != E; ++I)
       delete *I;   // Delete all of the loops...
 
@@ -681,7 +617,7 @@ public:
   /// iterator/begin/end - The interface to the top-level loops in the current
   /// function.
   ///
-  typedef typename std::vector<LoopBase<BlockT>*>::const_iterator iterator;
+  typedef typename std::vector<LoopT *>::const_iterator iterator;
   iterator begin() const { return TopLevelLoops.begin(); }
   iterator end() const { return TopLevelLoops.end(); }
   bool empty() const { return TopLevelLoops.empty(); }
@@ -689,15 +625,15 @@ public:
   /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
   /// block is in no loop (for example the entry node), null is returned.
   ///
-  LoopBase<BlockT> *getLoopFor(const BlockT *BB) const {
-    typename std::map<BlockT *, LoopBase<BlockT>*>::const_iterator I=
+  LoopT *getLoopFor(const BlockT *BB) const {
+    typename std::map<BlockT *, LoopT *>::const_iterator I=
       BBMap.find(const_cast<BlockT*>(BB));
     return I != BBMap.end() ? I->second : 0;
   }
   
   /// operator[] - same as getLoopFor...
   ///
-  const LoopBase<BlockT> *operator[](const BlockT *BB) const {
+  const LoopT *operator[](const BlockT *BB) const {
     return getLoopFor(BB);
   }
   
@@ -705,22 +641,22 @@ public:
   /// depth of 0 means the block is not inside any loop.
   ///
   unsigned getLoopDepth(const BlockT *BB) const {
-    const LoopBase<BlockT> *L = getLoopFor(BB);
+    const LoopT *L = getLoopFor(BB);
     return L ? L->getLoopDepth() : 0;
   }
 
   // isLoopHeader - True if the block is a loop header node
   bool isLoopHeader(BlockT *BB) const {
-    const LoopBase<BlockT> *L = getLoopFor(BB);
+    const LoopT *L = getLoopFor(BB);
     return L && L->getHeader() == BB;
   }
   
   /// removeLoop - This removes the specified top-level loop from this loop info
   /// object.  The loop is not deleted, as it will presumably be inserted into
   /// another loop.
-  LoopBase<BlockT> *removeLoop(iterator I) {
+  LoopT *removeLoop(iterator I) {
     assert(I != end() && "Cannot remove end iterator!");
-    LoopBase<BlockT> *L = *I;
+    LoopT *L = *I;
     assert(L->getParentLoop() == 0 && "Not a top-level loop!");
     TopLevelLoops.erase(TopLevelLoops.begin() + (I-begin()));
     return L;
@@ -729,17 +665,17 @@ public:
   /// changeLoopFor - Change the top-level loop that contains BB to the
   /// specified loop.  This should be used by transformations that restructure
   /// the loop hierarchy tree.
-  void changeLoopFor(BlockT *BB, LoopBase<BlockT> *L) {
-    LoopBase<BlockT> *&OldLoop = BBMap[BB];
+  void changeLoopFor(BlockT *BB, LoopT *L) {
+    LoopT *&OldLoop = BBMap[BB];
     assert(OldLoop && "Block not in a loop yet!");
     OldLoop = L;
   }
   
   /// changeTopLevelLoop - Replace the specified loop in the top-level loops
   /// list with the indicated loop.
-  void changeTopLevelLoop(LoopBase<BlockT> *OldLoop,
-                          LoopBase<BlockT> *NewLoop) {
-    typename std::vector<LoopBase<BlockT>*>::iterator I =
+  void changeTopLevelLoop(LoopT *OldLoop,
+                          LoopT *NewLoop) {
+    typename std::vector<LoopT *>::iterator I =
                  std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop);
     assert(I != TopLevelLoops.end() && "Old loop not at top level!");
     *I = NewLoop;
@@ -749,7 +685,7 @@ public:
   
   /// addTopLevelLoop - This adds the specified loop to the collection of
   /// top-level loops.
-  void addTopLevelLoop(LoopBase<BlockT> *New) {
+  void addTopLevelLoop(LoopT *New) {
     assert(New->getParentLoop() == 0 && "Loop already in subloop!");
     TopLevelLoops.push_back(New);
   }
@@ -758,9 +694,9 @@ public:
   /// including all of the Loop objects it is nested in and our mapping from
   /// BasicBlocks to loops.
   void removeBlock(BlockT *BB) {
-    typename std::map<BlockT *, LoopBase<BlockT>*>::iterator I = BBMap.find(BB);
+    typename std::map<BlockT *, LoopT *>::iterator I = BBMap.find(BB);
     if (I != BBMap.end()) {
-      for (LoopBase<BlockT> *L = I->second; L; L = L->getParentLoop())
+      for (LoopT *L = I->second; L; L = L->getParentLoop())
         L->removeBlockFromLoop(BB);
 
       BBMap.erase(I);
@@ -769,8 +705,8 @@ public:
   
   // Internals
   
-  static bool isNotAlreadyContainedIn(const LoopBase<BlockT> *SubLoop,
-                                      const LoopBase<BlockT> *ParentLoop) {
+  static bool isNotAlreadyContainedIn(const LoopT *SubLoop,
+                                      const LoopT *ParentLoop) {
     if (SubLoop == 0) return true;
     if (SubLoop == ParentLoop) return false;
     return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
@@ -781,11 +717,11 @@ public:
 
     for (df_iterator<BlockT*> NI = df_begin(RootNode),
            NE = df_end(RootNode); NI != NE; ++NI)
-      if (LoopBase<BlockT> *L = ConsiderForLoop(*NI, DT))
+      if (LoopT *L = ConsiderForLoop(*NI, DT))
         TopLevelLoops.push_back(L);
   }
   
-  LoopBase<BlockT> *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) {
+  LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) {
     if (BBMap.find(BB) != BBMap.end()) return 0;// Haven't processed this node?
 
     std::vector<BlockT *> TodoStack;
@@ -796,13 +732,13 @@ public:
     for (typename InvBlockTraits::ChildIteratorType I =
          InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB);
          I != E; ++I)
-      if (DT.dominates(BB, *I))   // If BB dominates it's predecessor...
+      if (DT.dominates(BB, *I))   // If BB dominates its predecessor...
         TodoStack.push_back(*I);
 
     if (TodoStack.empty()) return 0;  // No backedges to this block...
 
     // Create a new loop to represent this basic block...
-    LoopBase<BlockT> *L = new LoopBase<BlockT>(BB);
+    LoopT *L = new LoopT(BB);
     BBMap[BB] = L;
 
     BlockT *EntryBlock = BB->getParent()->begin();
@@ -819,13 +755,13 @@ public:
         // occurs, this child loop gets added to a part of the current loop,
         // making it a sibling to the current loop.  We have to reparent this
         // loop.
-        if (LoopBase<BlockT> *SubLoop =
-            const_cast<LoopBase<BlockT>*>(getLoopFor(X)))
+        if (LoopT *SubLoop =
+            const_cast<LoopT *>(getLoopFor(X)))
           if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){
-            // Remove the subloop from it's current parent...
+            // Remove the subloop from its current parent...
             assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L);
-            LoopBase<BlockT> *SLP = SubLoop->ParentLoop;  // SubLoopParent
-            typename std::vector<LoopBase<BlockT>*>::iterator I =
+            LoopT *SLP = SubLoop->ParentLoop;  // SubLoopParent
+            typename std::vector<LoopT *>::iterator I =
               std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop);
             assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?");
             SLP->SubLoops.erase(I);   // Remove from parent...
@@ -849,7 +785,7 @@ public:
     // If there are any loops nested within this loop, create them now!
     for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
          E = L->Blocks.end(); I != E; ++I)
-      if (LoopBase<BlockT> *NewLoop = ConsiderForLoop(*I, DT)) {
+      if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) {
         L->SubLoops.push_back(NewLoop);
         NewLoop->ParentLoop = L;
       }
@@ -858,25 +794,20 @@ public:
     // loop can be found for them.
     //
     for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
-           E = L->Blocks.end(); I != E; ++I) {
-      typename std::map<BlockT*, LoopBase<BlockT>*>::iterator BBMI =
-                                                          BBMap.find(*I);
-      if (BBMI == BBMap.end())                       // Not in map yet...
-        BBMap.insert(BBMI, std::make_pair(*I, L));   // Must be at this level
-    }
+           E = L->Blocks.end(); I != E; ++I)
+      BBMap.insert(std::make_pair(*I, L));
 
     // Now that we have a list of all of the child loops of this loop, check to
     // see if any of them should actually be nested inside of each other.  We
     // can accidentally pull loops our of their parents, so we must make sure to
     // organize the loop nests correctly now.
     {
-      std::map<BlockT*, LoopBase<BlockT>*> ContainingLoops;
+      std::map<BlockT *, LoopT *> ContainingLoops;
       for (unsigned i = 0; i != L->SubLoops.size(); ++i) {
-        LoopBase<BlockT> *Child = L->SubLoops[i];
+        LoopT *Child = L->SubLoops[i];
         assert(Child->getParentLoop() == L && "Not proper child loop?");
 
-        if (LoopBase<BlockT> *ContainingLoop =
-                                          ContainingLoops[Child->getHeader()]) {
+        if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) {
           // If there is already a loop which contains this loop, move this loop
           // into the containing loop.
           MoveSiblingLoopInto(Child, ContainingLoop);
@@ -886,11 +817,11 @@ public:
           // if any of the contained blocks are loop headers for subloops we
           // have already processed.
           for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) {
-            LoopBase<BlockT> *&BlockLoop = ContainingLoops[Child->Blocks[b]];
+            LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]];
             if (BlockLoop == 0) {   // Child block not processed yet...
               BlockLoop = Child;
             } else if (BlockLoop != Child) {
-              LoopBase<BlockT> *SubLoop = BlockLoop;
+              LoopT *SubLoop = BlockLoop;
               // Reparent all of the blocks which used to belong to BlockLoops
               for (unsigned j = 0, e = SubLoop->Blocks.size(); j != e; ++j)
                 ContainingLoops[SubLoop->Blocks[j]] = Child;
@@ -911,14 +842,14 @@ public:
   
   /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
   /// of the NewParent Loop, instead of being a sibling of it.
-  void MoveSiblingLoopInto(LoopBase<BlockT> *NewChild,
-                           LoopBase<BlockT> *NewParent) {
-    LoopBase<BlockT> *OldParent = NewChild->getParentLoop();
+  void MoveSiblingLoopInto(LoopT *NewChild,
+                           LoopT *NewParent) {
+    LoopT *OldParent = NewChild->getParentLoop();
     assert(OldParent && OldParent == NewParent->getParentLoop() &&
            NewChild != NewParent && "Not sibling loops!");
 
     // Remove NewChild from being a child of OldParent
-    typename std::vector<LoopBase<BlockT>*>::iterator I =
+    typename std::vector<LoopT *>::iterator I =
       std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(),
                 NewChild);
     assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??");
@@ -931,7 +862,7 @@ public:
   /// InsertLoopInto - This inserts loop L into the specified parent loop.  If
   /// the parent loop contains a loop which should contain L, the loop gets
   /// inserted into L instead.
-  void InsertLoopInto(LoopBase<BlockT> *L, LoopBase<BlockT> *Parent) {
+  void InsertLoopInto(LoopT *L, LoopT *Parent) {
     BlockT *LHeader = L->getHeader();
     assert(Parent->contains(LHeader) &&
            "This loop should not be inserted here!");
@@ -951,11 +882,11 @@ public:
   
   // Debugging
   
-  void print(std::ostream &OS, const Module* ) const {
+  void print(raw_ostream &OS) const {
     for (unsigned i = 0; i < TopLevelLoops.size(); ++i)
       TopLevelLoops[i]->print(OS);
   #if 0
-    for (std::map<BasicBlock*, Loop*>::const_iterator I = BBMap.begin(),
+    for (std::map<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(),
            E = BBMap.end(); I != E; ++I)
       OS << "BB '" << I->first->getName() << "' level = "
          << I->second->getLoopDepth() << "\n";
@@ -964,8 +895,8 @@ public:
 };
 
 class LoopInfo : public FunctionPass {
-  LoopInfoBase<BasicBlock> LI;
-  friend class LoopBase<BasicBlock>;
+  LoopInfoBase<BasicBlock, Loop> LI;
+  friend class LoopBase<BasicBlock, Loop>;
 
   void operator=(const LoopInfo &); // do not implement
   LoopInfo(const LoopInfo &);       // do not implement
@@ -974,12 +905,12 @@ public:
 
   LoopInfo() : FunctionPass(&ID) {}
 
-  LoopInfoBase<BasicBlock>& getBase() { return LI; }
+  LoopInfoBase<BasicBlock, Loop>& getBase() { return LI; }
 
   /// iterator/begin/end - The interface to the top-level loops in the current
   /// function.
   ///
-  typedef LoopInfoBase<BasicBlock>::iterator iterator;
+  typedef LoopInfoBase<BasicBlock, Loop>::iterator iterator;
   inline iterator begin() const { return LI.begin(); }
   inline iterator end() const { return LI.end(); }
   bool empty() const { return LI.empty(); }
@@ -1013,12 +944,12 @@ public:
   ///
   virtual bool runOnFunction(Function &F);
 
-  virtual void releaseMemory() { LI.releaseMemory(); }
+  virtual void verifyAnalysis() const;
 
-  virtual void print(std::ostream &O, const Module* M = 0) const {
-    LI.print(O, M);
-  }
+  virtual void releaseMemory() { LI.releaseMemory(); }
 
+  virtual void print(raw_ostream &O, const Module* M = 0) const;
+  
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
   /// removeLoop - This removes the specified top-level loop from this loop info
@@ -1051,6 +982,13 @@ public:
   void removeBlock(BasicBlock *BB) {
     LI.removeBlock(BB);
   }
+
+  static bool isNotAlreadyContainedIn(const Loop *SubLoop,
+                                      const Loop *ParentLoop) {
+    return
+      LoopInfoBase<BasicBlock, Loop>::isNotAlreadyContainedIn(SubLoop,
+                                                              ParentLoop);
+  }
 };
 
 
@@ -1081,19 +1019,21 @@ template <> struct GraphTraits<Loop*> {
   }
 };
 
-template<class BlockT>
-void LoopBase<BlockT>::addBasicBlockToLoop(BlockT *NewBB,
-                                           LoopInfoBase<BlockT> &LIB) {
+template<class BlockT, class LoopT>
+void
+LoopBase<BlockT, LoopT>::addBasicBlockToLoop(BlockT *NewBB,
+                                             LoopInfoBase<BlockT, LoopT> &LIB) {
   assert((Blocks.empty() || LIB[getHeader()] == this) &&
          "Incorrect LI specified for this loop!");
   assert(NewBB && "Cannot add a null basic block to the loop!");
   assert(LIB[NewBB] == 0 && "BasicBlock already in the loop!");
 
+  LoopT *L = static_cast<LoopT *>(this);
+
   // Add the loop mapping to the LoopInfo object...
-  LIB.BBMap[NewBB] = this;
+  LIB.BBMap[NewBB] = L;
 
   // Add the basic block to this loop and all parent loops...
-  LoopBase<BlockT> *L = this;
   while (L) {
     L->Blocks.push_back(NewBB);
     L = L->getParentLoop();
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 7659b5bf458d..2eb329f7f0e3 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -111,9 +111,13 @@ public:
   // Delete loop from the loop queue and loop nest (LoopInfo).
   void deleteLoopFromQueue(Loop *L);
 
-  // Insert loop into the loop nest(LoopInfo) and loop queue(LQ).
+  // Insert loop into the loop queue and add it as a child of the
+  // given parent.
   void insertLoop(Loop *L, Loop *ParentLoop);
 
+  // Insert a loop into the loop queue.
+  void insertLoopIntoQueue(Loop *L);
+
   // Reoptimize this loop. LPPassManager will re-insert this loop into the
   // queue. This allows LoopPass to change loop nest for the loop. This
   // utility may send LPPassManager into infinite loops so use caution.
diff --git a/include/llvm/Analysis/MallocHelper.h b/include/llvm/Analysis/MallocHelper.h
new file mode 100644
index 000000000000..0588dff08a60
--- /dev/null
+++ b/include/llvm/Analysis/MallocHelper.h
@@ -0,0 +1,86 @@
+//===- llvm/Analysis/MallocHelper.h ---- Identify malloc calls --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to malloc, bitcasts of malloc
+// calls, and the types and array sizes associated with them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MALLOCHELPER_H
+#define LLVM_ANALYSIS_MALLOCHELPER_H
+
+namespace llvm {
+class CallInst;
+class LLVMContext;
+class PointerType;
+class TargetData;
+class Type;
+class Value;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the the value is either a malloc call or a
+/// bitcast of the result of a malloc call
+bool isMalloc(const Value* I);
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst* extractMallocCall(const Value* I);
+CallInst* extractMallocCall(Value* I);
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+const CallInst* extractMallocCallFromBitCast(const Value* I);
+CallInst* extractMallocCallFromBitCast(Value* I);
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// matches the malloc call IR generated by CallInst::CreateMalloc().  This 
+/// means that it is a malloc call with one bitcast use AND the malloc call's 
+/// size argument is:
+///  1. a constant not equal to the malloc's allocated type
+/// or
+///  2. the result of a multiplication by the malloc's allocated type
+/// Otherwise it returns NULL.
+/// The unique bitcast is needed to determine the type/size of the array
+/// allocation.
+CallInst* isArrayMalloc(Value* I, LLVMContext &Context, const TargetData* TD);
+const CallInst* isArrayMalloc(const Value* I, LLVMContext &Context,
+                              const TargetData* TD);
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// This PointerType is the result type of the call's only bitcast use.
+/// If there is no unique bitcast use, then return NULL.
+const PointerType* getMallocType(const CallInst* CI);
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
+/// Type is the result type of the call's only bitcast use. If there is no
+/// unique bitcast use, then return NULL.
+const Type* getMallocAllocatedType(const CallInst* CI);
+
+/// getMallocArraySize - Returns the array size of a malloc call.  The array
+/// size is computated in 1 of 3 ways:
+///  1. If the element type if of size 1, then array size is the argument to 
+///     malloc.
+///  2. Else if the malloc's argument is a constant, the array size is that
+///     argument divided by the element type's size.
+///  3. Else the malloc argument must be a multiplication and the array size is
+///     the first operand of the multiplication.
+/// This function returns constant 1 if:
+///  1. The malloc call's allocated type cannot be determined.
+///  2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL
+///     ArraySize.
+Value* getMallocArraySize(CallInst* CI, LLVMContext &Context,
+                          const TargetData* TD);
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index d7d795e08a16..205c34ab5c89 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -95,7 +95,7 @@ namespace llvm {
     /// a instruction definition dependency.
     bool isDef() const { return Value.getInt() == Def; }
     
-    /// isNonLocal - Return true if this MemDepResult represents an query that
+    /// isNonLocal - Return true if this MemDepResult represents a query that
     /// is transparent to the start of the block, but where a non-local hasn't
     /// been done.
     bool isNonLocal() const { return Value.getInt() == NonLocal; }
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 35bd82199488..66ab3ea5caf1 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -21,6 +21,7 @@ namespace llvm {
   class LoopPass;
   class ModulePass;
   class Pass;
+  class PassInfo;
   class LibCallInfo;
 
   //===--------------------------------------------------------------------===//
@@ -73,6 +74,13 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
+  // createScalarEvolutionAliasAnalysisPass - This pass implements a simple
+  // alias analysis using ScalarEvolution queries.
+  //
+  FunctionPass *createScalarEvolutionAliasAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
   // createAndersensPass - This pass implements Andersen's interprocedural alias
   // analysis.
   //
@@ -93,6 +101,20 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
+  // createProfileEstimatorPass - This pass estimates profiling information
+  // instead of loading it from a previous run.
+  //
+  FunctionPass *createProfileEstimatorPass();
+  extern const PassInfo *ProfileEstimatorPassID;
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createProfileVerifierPass - This pass verifies profiling information.
+  //
+  FunctionPass *createProfileVerifierPass();
+
+  //===--------------------------------------------------------------------===//
+  //
   // createDSAAPass - This pass implements simple context sensitive alias
   // analysis.
   //
diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h
new file mode 100644
index 000000000000..a14bbf0290ea
--- /dev/null
+++ b/include/llvm/Analysis/PointerTracking.h
@@ -0,0 +1,131 @@
+//===- PointerTracking.h - Pointer Bounds Tracking --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tracking of pointer bounds.
+// It knows that the libc functions "calloc" and "realloc" allocate memory, thus
+// you should avoid using this pass if they mean something else for your
+// language.
+//
+// All methods assume that the pointer is not NULL, if it is then the returned
+// allocation size is wrong, and the result from checkLimits is wrong too.
+// It also assumes that pointers are valid, and that it is not analyzing a
+// use-after-free scenario.
+// Due to these limitations the "size" returned by these methods should be
+// considered as either 0 or the returned size.
+//
+// Another analysis pass should be used to find use-after-free/NULL dereference
+// bugs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_POINTERTRACKING_H
+#define LLVM_ANALYSIS_POINTERTRACKING_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/PredIteratorCache.h"
+
+namespace llvm {
+  class DominatorTree;
+  class ScalarEvolution;
+  class SCEV;
+  class Loop;
+  class LoopInfo;
+  class TargetData;
+
+  // Result from solver, assuming pointer is not NULL,
+  // and it is not a use-after-free situation.
+  enum SolverResult {
+    AlwaysFalse,// always false with above constraints
+    AlwaysTrue,// always true with above constraints
+    Unknown // it can sometimes be true, sometimes false, or it is undecided
+  };
+
+  class PointerTracking : public FunctionPass {
+  public:
+    typedef ICmpInst::Predicate Predicate;
+    static char ID;
+    PointerTracking();
+
+    virtual bool doInitialization(Module &M);
+
+    // If this pointer directly points to an allocation, return
+    // the number of elements of type Ty allocated.
+    // Otherwise return CouldNotCompute.
+    // Since allocations can fail by returning NULL, the real element count
+    // for every allocation is either 0 or the value returned by this function.
+    const SCEV *getAllocationElementCount(Value *P) const;
+
+    // Same as getAllocationSize() but returns size in bytes.
+    // We consider one byte as 8 bits.
+    const SCEV *getAllocationSizeInBytes(Value *V) const;
+
+    // Given a Pointer, determine a base pointer of known size, and an offset
+    // therefrom.
+    // When unable to determine, sets Base to NULL, and Limit/Offset to
+    // CouldNotCompute.
+    // BaseSize, and Offset are in bytes: Pointer == Base + Offset
+    void getPointerOffset(Value *Pointer, Value *&Base, const SCEV *& BaseSize,
+                          const SCEV *&Offset) const;
+
+    // Compares the 2 scalar evolution expressions according to predicate,
+    // and if it can prove that the result is always true or always false
+    // return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown.
+    enum SolverResult compareSCEV(const SCEV *A, Predicate Pred, const SCEV *B,
+                                  const Loop *L);
+
+    // Determines whether the condition LHS <Pred> RHS is sufficient
+    // for the condition A <Pred> B to hold.
+    // Currently only ULT/ULE is supported.
+    // This errs on the side of returning false.
+    bool conditionSufficient(const SCEV *LHS, Predicate Pred1, const SCEV *RHS,
+                             const SCEV *A, Predicate Pred2, const SCEV *B,
+                             const Loop *L);
+
+    // Determines whether Offset is known to be always in [0, Limit) bounds.
+    // This errs on the side of returning Unknown.
+    enum SolverResult checkLimits(const SCEV *Offset, const SCEV *Limit,
+                                  BasicBlock *BB);
+
+    virtual bool runOnFunction(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    void print(raw_ostream &OS, const Module* = 0) const;
+  private:
+    Function *FF;
+    TargetData *TD;
+    ScalarEvolution *SE;
+    LoopInfo *LI;
+    DominatorTree *DT;
+
+    Function *callocFunc;
+    Function *reallocFunc;
+    PredIteratorCache predCache;
+
+    SmallPtrSet<const SCEV*, 1> analyzing;
+
+    enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred,
+                                      const SCEV *A, const SCEV *B) const;
+    static bool isMonotonic(const SCEV *S);
+    bool scevPositive(const SCEV *A, const Loop *L, bool strict=true) const;
+    bool conditionSufficient(Value *Cond, bool negated,
+                             const SCEV *A, Predicate Pred, const SCEV *B);
+    Value *getConditionToReach(BasicBlock *A,
+                               DomTreeNodeBase<BasicBlock> *B,
+                               bool &negated);
+    Value *getConditionToReach(BasicBlock *A,
+                               BasicBlock *B,
+                               bool &negated);
+    const SCEV *computeAllocationCount(Value *P, const Type *&Ty) const;
+    const SCEV *computeAllocationCountForType(Value *P, const Type *Ty) const;
+  };
+}
+#endif
+
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index cd6af74024a5..171cfdb2eac3 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -49,6 +49,14 @@ struct PostDominatorTree : public FunctionPass {
     return DT->getNode(BB);
   }
   
+  inline bool dominates(DomTreeNode* A, DomTreeNode* B) const {
+    return DT->dominates(A, B);
+  }
+
+  inline bool dominates(const BasicBlock* A, const BasicBlock* B) const {
+    return DT->dominates(A, B);
+  }
+
   inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const {
     return DT->properlyDominates(A, B);
   }
@@ -57,9 +65,11 @@ struct PostDominatorTree : public FunctionPass {
     return DT->properlyDominates(A, B);
   }
 
-  virtual void print(std::ostream &OS, const Module* M= 0) const {
-    DT->print(OS, M);
+  virtual void releaseMemory() {
+    DT->releaseMemory();
   }
+
+  virtual void print(raw_ostream &OS, const Module*) const;
 };
 
 FunctionPass* createPostDomTree();
diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h
index ff83f97ee042..2a80f3d4c43a 100644
--- a/include/llvm/Analysis/ProfileInfo.h
+++ b/include/llvm/Analysis/ProfileInfo.h
@@ -14,54 +14,123 @@
 //
 // Note that to be useful, all profile-based optimizations should preserve
 // ProfileInfo, which requires that they notify it when changes to the CFG are
-// made.
+// made. (This is not implemented yet.)
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_PROFILEINFO_H
 #define LLVM_ANALYSIS_PROFILEINFO_H
 
+#include "llvm/BasicBlock.h"
+#include <cassert>
 #include <string>
 #include <map>
 
 namespace llvm {
-  class BasicBlock;
+  class Function;
   class Pass;
+  class raw_ostream;
 
-  /// ProfileInfo Class - This class holds and maintains edge profiling
+  /// ProfileInfo Class - This class holds and maintains profiling
   /// information for some unit of code.
   class ProfileInfo {
+  public:
+    // Types for handling profiling information.
+    typedef std::pair<const BasicBlock*, const BasicBlock*> Edge;
+    typedef std::pair<Edge, double> EdgeWeight;
+    typedef std::map<Edge, double> EdgeWeights;
+    typedef std::map<const BasicBlock*, double> BlockCounts;
+
   protected:
-    // EdgeCounts - Count the number of times a transition between two blocks is
-    // executed.  As a special case, we also hold an edge from the null
-    // BasicBlock to the entry block to indicate how many times the function was
-    // entered.
-    std::map<std::pair<BasicBlock*, BasicBlock*>, unsigned> EdgeCounts;
+    // EdgeInformation - Count the number of times a transition between two
+    // blocks is executed. As a special case, we also hold an edge from the
+    // null BasicBlock to the entry block to indicate how many times the
+    // function was entered.
+    std::map<const Function*, EdgeWeights> EdgeInformation;
+
+    // BlockInformation - Count the number of times a block is executed.
+    std::map<const Function*, BlockCounts> BlockInformation;
+
+    // FunctionInformation - Count the number of times a function is executed.
+    std::map<const Function*, double> FunctionInformation;
   public:
     static char ID; // Class identification, replacement for typeinfo
     virtual ~ProfileInfo();  // We want to be subclassed
 
+    // MissingValue - The value that is returned for execution counts in case
+    // no value is available.
+    static const double MissingValue;
+
+    // getFunction() - Returns the Function for an Edge, checking for validity.
+    static const Function* getFunction(Edge e) {
+      if (e.first) {
+        return e.first->getParent();
+      } else if (e.second) {
+        return e.second->getParent();
+      }
+      assert(0 && "Invalid ProfileInfo::Edge");
+      return (const Function*)0;
+    }
+
+    // getEdge() - Creates an Edge from two BasicBlocks.
+    static Edge getEdge(const BasicBlock *Src, const BasicBlock *Dest) {
+      return std::make_pair(Src, Dest);
+    }
+
     //===------------------------------------------------------------------===//
     /// Profile Information Queries
     ///
-    unsigned getExecutionCount(BasicBlock *BB) const;
+    double getExecutionCount(const Function *F);
+
+    double getExecutionCount(const BasicBlock *BB);
+
+    double getEdgeWeight(Edge e) const {
+      std::map<const Function*, EdgeWeights>::const_iterator J =
+        EdgeInformation.find(getFunction(e));
+      if (J == EdgeInformation.end()) return MissingValue;
 
-    unsigned getEdgeWeight(BasicBlock *Src, BasicBlock *Dest) const {
-      std::map<std::pair<BasicBlock*, BasicBlock*>, unsigned>::const_iterator I=
-        EdgeCounts.find(std::make_pair(Src, Dest));
-      return I != EdgeCounts.end() ? I->second : 0;
+      EdgeWeights::const_iterator I = J->second.find(e);
+      if (I == J->second.end()) return MissingValue;
+
+      return I->second;
+    }
+
+    EdgeWeights &getEdgeWeights (const Function *F) {
+      return EdgeInformation[F];
     }
 
     //===------------------------------------------------------------------===//
     /// Analysis Update Methods
     ///
+    void removeBlock(const BasicBlock *BB) {
+      std::map<const Function*, BlockCounts>::iterator J =
+        BlockInformation.find(BB->getParent());
+      if (J == BlockInformation.end()) return;
+
+      J->second.erase(BB);
+    }
+
+    void removeEdge(Edge e) {
+      std::map<const Function*, EdgeWeights>::iterator J =
+        EdgeInformation.find(getFunction(e));
+      if (J == EdgeInformation.end()) return;
 
+      J->second.erase(e);
+    }
+
+    void splitEdge(const BasicBlock *FirstBB, const BasicBlock *SecondBB,
+                   const BasicBlock *NewBB, bool MergeIdenticalEdges = false);
+
+    void replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB);
   };
 
   /// createProfileLoaderPass - This function returns a Pass that loads the
   /// profiling information for the module from the specified filename, making
   /// it available to the optimizers.
   Pass *createProfileLoaderPass(const std::string &Filename);
+
+  raw_ostream& operator<<(raw_ostream &O, ProfileInfo::Edge E);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/ProfileInfoLoader.h b/include/llvm/Analysis/ProfileInfoLoader.h
index 9076fbc4fb95..9e0c393c428f 100644
--- a/include/llvm/Analysis/ProfileInfoLoader.h
+++ b/include/llvm/Analysis/ProfileInfoLoader.h
@@ -27,11 +27,13 @@ class Function;
 class BasicBlock;
 
 class ProfileInfoLoader {
+  const std::string &Filename;
   Module &M;
   std::vector<std::string> CommandLines;
   std::vector<unsigned>    FunctionCounts;
   std::vector<unsigned>    BlockCounts;
   std::vector<unsigned>    EdgeCounts;
+  std::vector<unsigned>    OptimalEdgeCounts;
   std::vector<unsigned>    BBTrace;
   bool Warned;
 public:
@@ -40,49 +42,41 @@ public:
   ProfileInfoLoader(const char *ToolName, const std::string &Filename,
                     Module &M);
 
+  static const unsigned Uncounted;
+
   unsigned getNumExecutions() const { return CommandLines.size(); }
   const std::string &getExecution(unsigned i) const { return CommandLines[i]; }
 
-  // getFunctionCounts - This method is used by consumers of function counting
-  // information.  If we do not directly have function count information, we
-  // compute it from other, more refined, types of profile information.
-  //
-  void getFunctionCounts(std::vector<std::pair<Function*, unsigned> > &Counts);
+  const std::string &getFileName() const { return Filename; }
 
-  // hasAccurateBlockCounts - Return true if we can synthesize accurate block
-  // frequency information from whatever we have.
+  // getRawFunctionCounts - This method is used by consumers of function
+  // counting information.
   //
-  bool hasAccurateBlockCounts() const {
-    return !BlockCounts.empty() || !EdgeCounts.empty();
+  const std::vector<unsigned> &getRawFunctionCounts() const {
+    return FunctionCounts;
   }
 
-  // hasAccurateEdgeCounts - Return true if we can synthesize accurate edge
-  // frequency information from whatever we have.
+  // getRawBlockCounts - This method is used by consumers of block counting
+  // information.
   //
-  bool hasAccurateEdgeCounts() const {
-    return !EdgeCounts.empty();
+  const std::vector<unsigned> &getRawBlockCounts() const {
+    return BlockCounts;
   }
 
-  // getBlockCounts - This method is used by consumers of block counting
-  // information.  If we do not directly have block count information, we
-  // compute it from other, more refined, types of profile information.
-  //
-  void getBlockCounts(std::vector<std::pair<BasicBlock*, unsigned> > &Counts);
-
   // getEdgeCounts - This method is used by consumers of edge counting
-  // information.  If we do not directly have edge count information, we compute
-  // it from other, more refined, types of profile information.
-  //
-  // Edges are represented as a pair, where the first element is the basic block
-  // and the second element is the successor number.
+  // information.
   //
-  typedef std::pair<BasicBlock*, unsigned> Edge;
-  void getEdgeCounts(std::vector<std::pair<Edge, unsigned> > &Counts);
+  const std::vector<unsigned> &getRawEdgeCounts() const {
+    return EdgeCounts;
+  }
 
-  // getBBTrace - This method is used by consumers of basic-block trace
-  // information.
+  // getEdgeOptimalCounts - This method is used by consumers of optimal edge 
+  // counting information.
   //
-  void getBBTrace(std::vector<BasicBlock *> &Trace);
+  const std::vector<unsigned> &getRawOptimalEdgeCounts() const {
+    return OptimalEdgeCounts;
+  }
+
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Analysis/ProfileInfoTypes.h b/include/llvm/Analysis/ProfileInfoTypes.h
index f311f8cb90c5..0d531d5c5f88 100644
--- a/include/llvm/Analysis/ProfileInfoTypes.h
+++ b/include/llvm/Analysis/ProfileInfoTypes.h
@@ -22,7 +22,8 @@ enum ProfilingType {
   BlockInfo     = 3,   /* Block profiling information     */
   EdgeInfo      = 4,   /* Edge profiling information      */
   PathInfo      = 5,   /* Path profiling information      */
-  BBTraceInfo   = 6    /* Basic block trace information   */
+  BBTraceInfo   = 6,   /* Basic block trace information   */
+  OptEdgeInfo   = 7    /* Edge profiling information, optimal version */
 };
 
 #endif /* LLVM_ANALYSIS_PROFILEINFOTYPES_H */
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 9da5c59a5e54..ed5d18eaf981 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -22,37 +22,50 @@
 #define LLVM_ANALYSIS_SCALAREVOLUTION_H
 
 #include "llvm/Pass.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/ConstantRange.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/DenseMap.h"
-#include <iosfwd>
+#include <map>
 
 namespace llvm {
   class APInt;
+  class Constant;
   class ConstantInt;
+  class DominatorTree;
   class Type;
   class ScalarEvolution;
   class TargetData;
+  class LLVMContext;
+  class Loop;
+  class LoopInfo;
+  class Operator;
 
   /// SCEV - This class represents an analyzed expression in the program.  These
   /// are opaque objects that the client is not allowed to do much with
   /// directly.
   ///
-  class SCEV : public FoldingSetNode {
-    const unsigned SCEVType;      // The SCEV baseclass this node corresponds to
+  class SCEV : public FastFoldingSetNode {
+    // The SCEV baseclass this node corresponds to
+    const unsigned short SCEVType;
 
+  protected:
+    /// SubclassData - This field is initialized to zero and may be used in
+    /// subclasses to store miscelaneous information.
+    unsigned short SubclassData;
+
+  private:
     SCEV(const SCEV &);            // DO NOT IMPLEMENT
     void operator=(const SCEV &);  // DO NOT IMPLEMENT
   protected:
     virtual ~SCEV();
   public:
-    explicit SCEV(unsigned SCEVTy) : 
-      SCEVType(SCEVTy) {}
-
-    virtual void Profile(FoldingSetNodeID &ID) const = 0;
+    explicit SCEV(const FoldingSetNodeID &ID, unsigned SCEVTy) :
+      FastFoldingSetNode(ID), SCEVType(SCEVTy), SubclassData(0) {}
 
     unsigned getSCEVType() const { return SCEVType; }
 
@@ -83,26 +96,22 @@ namespace llvm {
     ///
     bool isAllOnesValue() const;
 
-    /// replaceSymbolicValuesWithConcrete - If this SCEV internally references
-    /// the symbolic value "Sym", construct and return a new SCEV that produces
-    /// the same value, but which uses the concrete value Conc instead of the
-    /// symbolic value.  If this SCEV does not use the symbolic value, it
-    /// returns itself.
-    virtual const SCEV*
-    replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                      const SCEV* Conc,
-                                      ScalarEvolution &SE) const = 0;
+    /// hasOperand - Test whether this SCEV has Op as a direct or
+    /// indirect operand.
+    virtual bool hasOperand(const SCEV *Op) const = 0;
 
     /// dominates - Return true if elements that makes up this SCEV dominates
     /// the specified basic block.
     virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const = 0;
 
+    /// properlyDominates - Return true if elements that makes up this SCEV
+    /// properly dominate the specified basic block.
+    virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const = 0;
+
     /// print - Print out the internal representation of this scalar to the
     /// specified stream.  This should really only be used for debugging
     /// purposes.
     virtual void print(raw_ostream &OS) const = 0;
-    void print(std::ostream &OS) const;
-    void print(std::ostream *OS) const { if (OS) print(*OS); }
 
     /// dump - This method is used for debugging.
     ///
@@ -114,11 +123,6 @@ namespace llvm {
     return OS;
   }
 
-  inline std::ostream &operator<<(std::ostream &OS, const SCEV &S) {
-    S.print(OS);
-    return OS;
-  }
-
   /// SCEVCouldNotCompute - An object of this class is returned by queries that
   /// could not be answered.  For example, if you ask for the number of
   /// iterations of a linked-list traversal loop, you will get one of these.
@@ -128,20 +132,20 @@ namespace llvm {
     SCEVCouldNotCompute();
 
     // None of these methods are valid for this object.
-    virtual void Profile(FoldingSetNodeID &ID) const;
     virtual bool isLoopInvariant(const Loop *L) const;
     virtual const Type *getType() const;
     virtual bool hasComputableLoopEvolution(const Loop *L) const;
     virtual void print(raw_ostream &OS) const;
-    virtual const SCEV*
-    replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                      const SCEV* Conc,
-                                      ScalarEvolution &SE) const;
+    virtual bool hasOperand(const SCEV *Op) const;
 
     virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const {
       return true;
     }
 
+    virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+      return true;
+    }
+
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVCouldNotCompute *S) { return true; }
     static bool classof(const SCEV *S);
@@ -163,7 +167,7 @@ namespace llvm {
     };
 
     friend class SCEVCallbackVH;
-    friend class SCEVExpander;
+    friend struct SCEVExpander;
 
     /// F - The function we are analyzing.
     ///
@@ -183,7 +187,7 @@ namespace llvm {
 
     /// Scalars - This is a cache of the scalars we have analyzed so far.
     ///
-    std::map<SCEVCallbackVH, const SCEV*> Scalars;
+    std::map<SCEVCallbackVH, const SCEV *> Scalars;
 
     /// BackedgeTakenInfo - Information about the backedge-taken count
     /// of a loop. This currently inclues an exact count and a maximum count.
@@ -191,16 +195,16 @@ namespace llvm {
     struct BackedgeTakenInfo {
       /// Exact - An expression indicating the exact backedge-taken count of
       /// the loop if it is known, or a SCEVCouldNotCompute otherwise.
-      const SCEV* Exact;
+      const SCEV *Exact;
 
-      /// Exact - An expression indicating the least maximum backedge-taken
+      /// Max - An expression indicating the least maximum backedge-taken
       /// count of the loop that is known, or a SCEVCouldNotCompute.
-      const SCEV* Max;
+      const SCEV *Max;
 
-      /*implicit*/ BackedgeTakenInfo(const SCEV* exact) :
+      /*implicit*/ BackedgeTakenInfo(const SCEV *exact) :
         Exact(exact), Max(exact) {}
 
-      BackedgeTakenInfo(const SCEV* exact, const SCEV* max) :
+      BackedgeTakenInfo(const SCEV *exact, const SCEV *max) :
         Exact(exact), Max(max) {}
 
       /// hasAnyInfo - Test whether this BackedgeTakenInfo contains any
@@ -223,37 +227,42 @@ namespace llvm {
     /// exit value.
     std::map<PHINode*, Constant*> ConstantEvolutionLoopExitValue;
 
-    /// ValuesAtScopes - This map contains entries for all the instructions
-    /// that we attempt to compute getSCEVAtScope information for without
-    /// using SCEV techniques, which can be expensive.
-    std::map<Instruction *, std::map<const Loop *, Constant *> > ValuesAtScopes;
+    /// ValuesAtScopes - This map contains entries for all the expressions
+    /// that we attempt to compute getSCEVAtScope information for, which can
+    /// be expensive in extreme cases.
+    std::map<const SCEV *,
+             std::map<const Loop *, const SCEV *> > ValuesAtScopes;
 
     /// createSCEV - We know that there is no SCEV for the specified value.
     /// Analyze the expression.
-    const SCEV* createSCEV(Value *V);
+    const SCEV *createSCEV(Value *V);
 
     /// createNodeForPHI - Provide the special handling we need to analyze PHI
     /// SCEVs.
-    const SCEV* createNodeForPHI(PHINode *PN);
+    const SCEV *createNodeForPHI(PHINode *PN);
 
     /// createNodeForGEP - Provide the special handling we need to analyze GEP
     /// SCEVs.
-    const SCEV* createNodeForGEP(User *GEP);
+    const SCEV *createNodeForGEP(Operator *GEP);
+
+    /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called
+    /// at most once for each SCEV+Loop pair.
+    ///
+    const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
 
-    /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value
-    /// for the specified instruction and replaces any references to the
-    /// symbolic value SymName with the specified value.  This is used during
-    /// PHI resolution.
-    void ReplaceSymbolicValueWithConcrete(Instruction *I,
-                                          const SCEV* SymName,
-                                          const SCEV* NewVal);
+    /// ForgetSymbolicValue - This looks up computed SCEV values for all
+    /// instructions that depend on the given instruction and removes them from
+    /// the Scalars map if they reference SymName. This is used during PHI
+    /// resolution.
+    void ForgetSymbolicName(Instruction *I, const SCEV *SymName);
 
     /// getBECount - Subtract the end and start values and divide by the step,
     /// rounding up, to get the number of times the backedge is executed. Return
     /// CouldNotCompute if an intermediate computation overflows.
-    const SCEV* getBECount(const SCEV* Start,
-                          const SCEV* End,
-                          const SCEV* Step);
+    const SCEV *getBECount(const SCEV *Start,
+                           const SCEV *End,
+                           const SCEV *Step,
+                           bool NoWrap);
 
     /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given
     /// loop, lazily computing new values if the loop hasn't been analyzed
@@ -290,31 +299,32 @@ namespace llvm {
                                                 BasicBlock *FBB);
 
     /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition
-    /// of 'icmp op load X, cst', try to see if we can compute the trip count.
-    const SCEV*
+    /// of 'icmp op load X, cst', try to see if we can compute the
+    /// backedge-taken count.
+    const SCEV *
       ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI,
                                                    Constant *RHS,
                                                    const Loop *L,
                                                    ICmpInst::Predicate p);
 
-    /// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute
+    /// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute
     /// a constant number of times (the condition evolves only from constants),
     /// try to evaluate a few iterations of the loop until we get the exit
     /// condition gets a value of ExitWhen (true or false).  If we cannot
-    /// evaluate the trip count of the loop, return CouldNotCompute.
-    const SCEV* ComputeBackedgeTakenCountExhaustively(const Loop *L,
+    /// evaluate the backedge-taken count of the loop, return CouldNotCompute.
+    const SCEV *ComputeBackedgeTakenCountExhaustively(const Loop *L,
                                                       Value *Cond,
                                                       bool ExitWhen);
 
     /// HowFarToZero - Return the number of times a backedge comparing the
     /// specified value to zero will execute.  If not computable, return
     /// CouldNotCompute.
-    const SCEV* HowFarToZero(const SCEV *V, const Loop *L);
+    const SCEV *HowFarToZero(const SCEV *V, const Loop *L);
 
     /// HowFarToNonZero - Return the number of times a backedge checking the
     /// specified value for nonzero will execute.  If not computable, return
     /// CouldNotCompute.
-    const SCEV* HowFarToNonZero(const SCEV *V, const Loop *L);
+    const SCEV *HowFarToNonZero(const SCEV *V, const Loop *L);
 
     /// HowManyLessThans - Return the number of times a backedge containing the
     /// specified less-than comparison will execute.  If not computable, return
@@ -332,11 +342,25 @@ namespace llvm {
     /// found.
     BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
-    /// isNecessaryCond - Test whether the given CondValue value is a condition
-    /// which is at least as strict as the one described by Pred, LHS, and RHS.
-    bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred,
-                         const SCEV *LHS, const SCEV *RHS,
-                         bool Inverse);
+    /// isImpliedCond - Test whether the condition described by Pred, LHS,
+    /// and RHS is true whenever the given Cond value evaluates to true.
+    bool isImpliedCond(Value *Cond, ICmpInst::Predicate Pred,
+                       const SCEV *LHS, const SCEV *RHS,
+                       bool Inverse);
+
+    /// isImpliedCondOperands - Test whether the condition described by Pred,
+    /// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS,
+    /// and FoundRHS is true.
+    bool isImpliedCondOperands(ICmpInst::Predicate Pred,
+                               const SCEV *LHS, const SCEV *RHS,
+                               const SCEV *FoundLHS, const SCEV *FoundRHS);
+
+    /// isImpliedCondOperandsHelper - Test whether the condition described by
+    /// Pred, LHS, and RHS is true whenever the condition desribed by Pred,
+    /// FoundLHS, and FoundRHS is true.
+    bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS,
+                                     const SCEV *FoundLHS, const SCEV *FoundRHS);
 
     /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
     /// in the header of its containing loop, we know the loop executes a
@@ -345,15 +369,12 @@ namespace llvm {
     Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs,
                                                 const Loop *L);
 
-    /// forgetLoopPHIs - Delete the memoized SCEVs associated with the
-    /// PHI nodes in the given loop. This is used when the trip count of
-    /// the loop may have changed.
-    void forgetLoopPHIs(const Loop *L);
-
   public:
     static char ID; // Pass identification, replacement for typeid
     ScalarEvolution();
 
+    LLVMContext &getContext() const { return F->getContext(); }
+
     /// isSCEVable - Test if values of the given type are analyzable within
     /// the SCEV framework. This primarily includes integer types, and it
     /// can optionally include pointer types if the ScalarEvolution class
@@ -370,127 +391,129 @@ namespace llvm {
     /// this is the pointer-sized integer type.
     const Type *getEffectiveSCEVType(const Type *Ty) const;
 
-    /// getSCEV - Return a SCEV expression handle for the full generality of the
+    /// getSCEV - Return a SCEV expression for the full generality of the
     /// specified expression.
-    const SCEV* getSCEV(Value *V);
-
-    const SCEV* getConstant(ConstantInt *V);
-    const SCEV* getConstant(const APInt& Val);
-    const SCEV* getConstant(const Type *Ty, uint64_t V, bool isSigned = false);
-    const SCEV* getTruncateExpr(const SCEV* Op, const Type *Ty);
-    const SCEV* getZeroExtendExpr(const SCEV* Op, const Type *Ty);
-    const SCEV* getSignExtendExpr(const SCEV* Op, const Type *Ty);
-    const SCEV* getAnyExtendExpr(const SCEV* Op, const Type *Ty);
-    const SCEV* getAddExpr(SmallVectorImpl<const SCEV*> &Ops);
-    const SCEV* getAddExpr(const SCEV* LHS, const SCEV* RHS) {
-      SmallVector<const SCEV*, 2> Ops;
+    const SCEV *getSCEV(Value *V);
+
+    const SCEV *getConstant(ConstantInt *V);
+    const SCEV *getConstant(const APInt& Val);
+    const SCEV *getConstant(const Type *Ty, uint64_t V, bool isSigned = false);
+    const SCEV *getTruncateExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getZeroExtendExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getSignExtendExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getAnyExtendExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                           bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
+                           bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 2> Ops;
       Ops.push_back(LHS);
       Ops.push_back(RHS);
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, HasNUW, HasNSW);
     }
-    const SCEV* getAddExpr(const SCEV* Op0, const SCEV* Op1,
-                          const SCEV* Op2) {
-      SmallVector<const SCEV*, 3> Ops;
+    const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1,
+                           const SCEV *Op2,
+                           bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 3> Ops;
       Ops.push_back(Op0);
       Ops.push_back(Op1);
       Ops.push_back(Op2);
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, HasNUW, HasNSW);
     }
-    const SCEV* getMulExpr(SmallVectorImpl<const SCEV*> &Ops);
-    const SCEV* getMulExpr(const SCEV* LHS, const SCEV* RHS) {
-      SmallVector<const SCEV*, 2> Ops;
+    const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                           bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
+                           bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 2> Ops;
       Ops.push_back(LHS);
       Ops.push_back(RHS);
-      return getMulExpr(Ops);
+      return getMulExpr(Ops, HasNUW, HasNSW);
     }
-    const SCEV* getUDivExpr(const SCEV* LHS, const SCEV* RHS);
-    const SCEV* getAddRecExpr(const SCEV* Start, const SCEV* Step,
-                             const Loop *L);
-    const SCEV* getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
-                             const Loop *L);
-    const SCEV* getAddRecExpr(const SmallVectorImpl<const SCEV*> &Operands,
-                             const Loop *L) {
-      SmallVector<const SCEV*, 4> NewOp(Operands.begin(), Operands.end());
-      return getAddRecExpr(NewOp, L);
+    const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step,
+                              const Loop *L,
+                              bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                              const Loop *L,
+                              bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands,
+                              const Loop *L,
+                              bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end());
+      return getAddRecExpr(NewOp, L, HasNUW, HasNSW);
     }
-    const SCEV* getSMaxExpr(const SCEV* LHS, const SCEV* RHS);
-    const SCEV* getSMaxExpr(SmallVectorImpl<const SCEV*> &Operands);
-    const SCEV* getUMaxExpr(const SCEV* LHS, const SCEV* RHS);
-    const SCEV* getUMaxExpr(SmallVectorImpl<const SCEV*> &Operands);
-    const SCEV* getSMinExpr(const SCEV* LHS, const SCEV* RHS);
-    const SCEV* getUMinExpr(const SCEV* LHS, const SCEV* RHS);
-    const SCEV* getUnknown(Value *V);
-    const SCEV* getCouldNotCompute();
+    const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+    const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+    const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getFieldOffsetExpr(const StructType *STy, unsigned FieldNo);
+    const SCEV *getAllocSizeExpr(const Type *AllocTy);
+    const SCEV *getUnknown(Value *V);
+    const SCEV *getCouldNotCompute();
 
     /// getNegativeSCEV - Return the SCEV object corresponding to -V.
     ///
-    const SCEV* getNegativeSCEV(const SCEV* V);
+    const SCEV *getNegativeSCEV(const SCEV *V);
 
     /// getNotSCEV - Return the SCEV object corresponding to ~V.
     ///
-    const SCEV* getNotSCEV(const SCEV* V);
+    const SCEV *getNotSCEV(const SCEV *V);
 
     /// getMinusSCEV - Return LHS-RHS.
     ///
-    const SCEV* getMinusSCEV(const SCEV* LHS,
-                            const SCEV* RHS);
+    const SCEV *getMinusSCEV(const SCEV *LHS,
+                             const SCEV *RHS);
 
     /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
     /// of the input value to the specified type.  If the type must be
     /// extended, it is zero extended.
-    const SCEV* getTruncateOrZeroExtend(const SCEV* V, const Type *Ty);
+    const SCEV *getTruncateOrZeroExtend(const SCEV *V, const Type *Ty);
 
     /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion
     /// of the input value to the specified type.  If the type must be
     /// extended, it is sign extended.
-    const SCEV* getTruncateOrSignExtend(const SCEV* V, const Type *Ty);
+    const SCEV *getTruncateOrSignExtend(const SCEV *V, const Type *Ty);
 
     /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of
     /// the input value to the specified type.  If the type must be extended,
     /// it is zero extended.  The conversion must not be narrowing.
-    const SCEV* getNoopOrZeroExtend(const SCEV* V, const Type *Ty);
+    const SCEV *getNoopOrZeroExtend(const SCEV *V, const Type *Ty);
 
     /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of
     /// the input value to the specified type.  If the type must be extended,
     /// it is sign extended.  The conversion must not be narrowing.
-    const SCEV* getNoopOrSignExtend(const SCEV* V, const Type *Ty);
+    const SCEV *getNoopOrSignExtend(const SCEV *V, const Type *Ty);
 
     /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
     /// the input value to the specified type. If the type must be extended,
     /// it is extended with unspecified bits. The conversion must not be
     /// narrowing.
-    const SCEV* getNoopOrAnyExtend(const SCEV* V, const Type *Ty);
+    const SCEV *getNoopOrAnyExtend(const SCEV *V, const Type *Ty);
 
     /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
     /// input value to the specified type.  The conversion must not be
     /// widening.
-    const SCEV* getTruncateOrNoop(const SCEV* V, const Type *Ty);
+    const SCEV *getTruncateOrNoop(const SCEV *V, const Type *Ty);
 
     /// getIntegerSCEV - Given a SCEVable type, create a constant for the
     /// specified signed integer value and return a SCEV for the constant.
-    const SCEV* getIntegerSCEV(int Val, const Type *Ty);
+    const SCEV *getIntegerSCEV(int Val, const Type *Ty);
 
     /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
     /// the types using zero-extension, and then perform a umax operation
     /// with them.
-    const SCEV* getUMaxFromMismatchedTypes(const SCEV* LHS,
-                                          const SCEV* RHS);
+    const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                           const SCEV *RHS);
 
     /// getUMinFromMismatchedTypes - Promote the operands to the wider of
     /// the types using zero-extension, and then perform a umin operation
     /// with them.
-    const SCEV* getUMinFromMismatchedTypes(const SCEV* LHS,
-                                           const SCEV* RHS);
-
-    /// hasSCEV - Return true if the SCEV for this value has already been
-    /// computed.
-    bool hasSCEV(Value *V) const;
+    const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS,
+                                           const SCEV *RHS);
 
-    /// setSCEV - Insert the specified SCEV into the map of current SCEVs for
-    /// the specified value.
-    void setSCEV(Value *V, const SCEV* H);
-
-    /// getSCEVAtScope - Return a SCEV expression handle for the specified value
+    /// getSCEVAtScope - Return a SCEV expression for the specified value
     /// at the specified scope in the program.  The L value specifies a loop
     /// nest to evaluate the expression at, where null is the top-level or a
     /// specified loop is immediately inside of the loop.
@@ -500,18 +523,24 @@ namespace llvm {
     ///
     /// In the case that a relevant loop exit value cannot be computed, the
     /// original value V is returned.
-    const SCEV* getSCEVAtScope(const SCEV *S, const Loop *L);
+    const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L);
 
     /// getSCEVAtScope - This is a convenience function which does
     /// getSCEVAtScope(getSCEV(V), L).
-    const SCEV* getSCEVAtScope(Value *V, const Loop *L);
+    const SCEV *getSCEVAtScope(Value *V, const Loop *L);
 
     /// isLoopGuardedByCond - Test whether entry to the loop is protected by
     /// a conditional between LHS and RHS.  This is used to help avoid max
-    /// expressions in loop trip counts.
+    /// expressions in loop trip counts, and to eliminate casts.
     bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
                              const SCEV *LHS, const SCEV *RHS);
 
+    /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+    /// protected by a conditional between LHS and RHS.  This is used to
+    /// to eliminate casts.
+    bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS);
+
     /// getBackedgeTakenCount - If the specified loop has a predictable
     /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
     /// object. The backedge-taken count is the number of times the loop header
@@ -523,12 +552,12 @@ namespace llvm {
     /// loop-invariant backedge-taken count (see
     /// hasLoopInvariantBackedgeTakenCount).
     ///
-    const SCEV* getBackedgeTakenCount(const Loop *L);
+    const SCEV *getBackedgeTakenCount(const Loop *L);
 
     /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
     /// return the least SCEV value that is known never to be less than the
     /// actual backedge taken count.
-    const SCEV* getMaxBackedgeTakenCount(const Loop *L);
+    const SCEV *getMaxBackedgeTakenCount(const Loop *L);
 
     /// hasLoopInvariantBackedgeTakenCount - Return true if the specified loop
     /// has an analyzable loop-invariant backedge-taken count.
@@ -545,24 +574,49 @@ namespace llvm {
     /// time, the minimum number of times S is divisible by 2.  For example,
     /// given {4,+,8} it returns 2.  If S is guaranteed to be 0, it returns the
     /// bitwidth of S.
-    uint32_t GetMinTrailingZeros(const SCEV* S);
+    uint32_t GetMinTrailingZeros(const SCEV *S);
 
-    /// GetMinLeadingZeros - Determine the minimum number of zero bits that S is
-    /// guaranteed to begin with (at every loop iteration).
-    uint32_t GetMinLeadingZeros(const SCEV* S);
+    /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+    ///
+    ConstantRange getUnsignedRange(const SCEV *S);
 
-    /// GetMinSignBits - Determine the minimum number of sign bits that S is
-    /// guaranteed to begin with.
-    uint32_t GetMinSignBits(const SCEV* S);
+    /// getSignedRange - Determine the signed range for a particular SCEV.
+    ///
+    ConstantRange getSignedRange(const SCEV *S);
+
+    /// isKnownNegative - Test if the given expression is known to be negative.
+    ///
+    bool isKnownNegative(const SCEV *S);
+
+    /// isKnownPositive - Test if the given expression is known to be positive.
+    ///
+    bool isKnownPositive(const SCEV *S);
+
+    /// isKnownNonNegative - Test if the given expression is known to be
+    /// non-negative.
+    ///
+    bool isKnownNonNegative(const SCEV *S);
+
+    /// isKnownNonPositive - Test if the given expression is known to be
+    /// non-positive.
+    ///
+    bool isKnownNonPositive(const SCEV *S);
+
+    /// isKnownNonZero - Test if the given expression is known to be
+    /// non-zero.
+    ///
+    bool isKnownNonZero(const SCEV *S);
+
+    /// isKnownNonZero - Test if the given expression is known to satisfy
+    /// the condition described by Pred, LHS, and RHS.
+    ///
+    bool isKnownPredicate(ICmpInst::Predicate Pred,
+                          const SCEV *LHS, const SCEV *RHS);
 
     virtual bool runOnFunction(Function &F);
     virtual void releaseMemory();
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    void print(raw_ostream &OS, const Module* = 0) const;
-    virtual void print(std::ostream &OS, const Module* = 0) const;
-    void print(std::ostream *OS, const Module* M = 0) const {
-      if (OS) print(*OS, M);
-    }
+    virtual void print(raw_ostream &OS, const Module* = 0) const;
 
   private:
     FoldingSet<SCEV> UniqueSCEVs;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 60a23c504310..915227d77b51 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -17,13 +17,14 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/TargetFolder.h"
+#include <set>
 
 namespace llvm {
   /// SCEVExpander - This class uses information about analyze scalars to
   /// rewrite expressions in canonical form.
   ///
   /// Clients should create an instance of this class when rewriting is needed,
-  /// and destroy it when finished to allow the release of the associated 
+  /// and destroy it when finished to allow the release of the associated
   /// memory.
   struct SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
     ScalarEvolution &SE;
@@ -37,7 +38,8 @@ namespace llvm {
     friend struct SCEVVisitor<SCEVExpander, Value*>;
   public:
     explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), Builder(TargetFolder(se.TD)) {}
+      : SE(se), Builder(se.getContext(),
+                        TargetFolder(se.TD, se.getContext())) {}
 
     /// clear - Erase the contents of the InsertedExpressions map so that users
     /// trying to expand the same expression into multiple BasicBlocks or
@@ -53,12 +55,14 @@ namespace llvm {
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
     /// specified block.
-    Value *expandCodeFor(const SCEV* SH, const Type *Ty, Instruction *IP) {
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *IP) {
       Builder.SetInsertPoint(IP->getParent(), IP);
       return expandCodeFor(SH, Ty);
     }
 
   private:
+    LLVMContext &getContext() const { return SE.getContext(); }
+
     /// InsertBinop - Insert the specified binary operator, doing a small amount
     /// of work to avoid inserting an obviously redundant operation.
     Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
@@ -70,8 +74,8 @@ namespace llvm {
 
     /// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP
     /// instead of using ptrtoint+arithmetic+inttoptr.
-    Value *expandAddToGEP(const SCEV* const *op_begin,
-                          const SCEV* const *op_end,
+    Value *expandAddToGEP(const SCEV *const *op_begin,
+                          const SCEV *const *op_end,
                           const PointerType *PTy, const Type *Ty, Value *V);
 
     Value *expand(const SCEV *S);
@@ -80,7 +84,7 @@ namespace llvm {
     /// expression into the program.  The inserted code is inserted into the
     /// SCEVExpander's current insertion point. If a type is specified, the
     /// result will be expanded to have that type, with a cast if necessary.
-    Value *expandCodeFor(const SCEV* SH, const Type *Ty = 0);
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty = 0);
 
     /// isInsertedInstruction - Return true if the specified instruction was
     /// inserted by the code rewriter.  If so, the client should not modify the
@@ -111,6 +115,10 @@ namespace llvm {
 
     Value *visitUMaxExpr(const SCEVUMaxExpr *S);
 
+    Value *visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S);
+
+    Value *visitAllocSizeExpr(const SCEVAllocSizeExpr *S);
+
     Value *visitUnknown(const SCEVUnknown *S) {
       return S->getValue();
     }
@@ -118,4 +126,3 @@ namespace llvm {
 }
 
 #endif
-
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index c54c86556c36..2c503506035e 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
 
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
   class ConstantInt;
@@ -25,8 +26,8 @@ namespace llvm {
     // These should be ordered in terms of increasing complexity to make the
     // folders simpler.
     scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
-    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUnknown,
-    scCouldNotCompute
+    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
+    scFieldOffset, scAllocSize, scUnknown, scCouldNotCompute
   };
 
   //===--------------------------------------------------------------------===//
@@ -36,11 +37,9 @@ namespace llvm {
     friend class ScalarEvolution;
 
     ConstantInt *V;
-    explicit SCEVConstant(ConstantInt *v) :
-      SCEV(scConstant), V(v) {}
+    SCEVConstant(const FoldingSetNodeID &ID, ConstantInt *v) :
+      SCEV(ID, scConstant), V(v) {}
   public:
-    virtual void Profile(FoldingSetNodeID &ID) const;
-
     ConstantInt *getValue() const { return V; }
 
     virtual bool isLoopInvariant(const Loop *L) const {
@@ -53,16 +52,18 @@ namespace llvm {
 
     virtual const Type *getType() const;
 
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const {
-      return this;
+    virtual bool hasOperand(const SCEV *) const {
+      return false;
     }
 
     bool dominates(BasicBlock *BB, DominatorTree *DT) const {
       return true;
     }
 
+    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+      return true;
+    }
+
     virtual void print(raw_ostream &OS) const;
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -77,15 +78,14 @@ namespace llvm {
   ///
   class SCEVCastExpr : public SCEV {
   protected:
-    const SCEV* Op;
+    const SCEV *Op;
     const Type *Ty;
 
-    SCEVCastExpr(unsigned SCEVTy, const SCEV* op, const Type *ty);
+    SCEVCastExpr(const FoldingSetNodeID &ID,
+                 unsigned SCEVTy, const SCEV *op, const Type *ty);
 
   public:
-    virtual void Profile(FoldingSetNodeID &ID) const;
-
-    const SCEV* getOperand() const { return Op; }
+    const SCEV *getOperand() const { return Op; }
     virtual const Type *getType() const { return Ty; }
 
     virtual bool isLoopInvariant(const Loop *L) const {
@@ -96,8 +96,14 @@ namespace llvm {
       return Op->hasComputableLoopEvolution(L);
     }
 
+    virtual bool hasOperand(const SCEV *O) const {
+      return Op == O || Op->hasOperand(O);
+    }
+
     virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const;
 
+    virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
+
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVCastExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -114,18 +120,10 @@ namespace llvm {
   class SCEVTruncateExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVTruncateExpr(const SCEV* op, const Type *ty);
+    SCEVTruncateExpr(const FoldingSetNodeID &ID,
+                     const SCEV *op, const Type *ty);
 
   public:
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const {
-      const SCEV* H = Op->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-      if (H == Op)
-        return this;
-      return SE.getTruncateExpr(H, Ty);
-    }
-
     virtual void print(raw_ostream &OS) const;
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -142,18 +140,10 @@ namespace llvm {
   class SCEVZeroExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVZeroExtendExpr(const SCEV* op, const Type *ty);
+    SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+                       const SCEV *op, const Type *ty);
 
   public:
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const {
-      const SCEV* H = Op->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-      if (H == Op)
-        return this;
-      return SE.getZeroExtendExpr(H, Ty);
-    }
-
     virtual void print(raw_ostream &OS) const;
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -170,18 +160,10 @@ namespace llvm {
   class SCEVSignExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVSignExtendExpr(const SCEV* op, const Type *ty);
+    SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+                       const SCEV *op, const Type *ty);
 
   public:
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const {
-      const SCEV* H = Op->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-      if (H == Op)
-        return this;
-      return SE.getSignExtendExpr(H, Ty);
-    }
-
     virtual void print(raw_ostream &OS) const;
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -198,22 +180,23 @@ namespace llvm {
   ///
   class SCEVNAryExpr : public SCEV {
   protected:
-    SmallVector<const SCEV*, 8> Operands;
+    SmallVector<const SCEV *, 8> Operands;
 
-    SCEVNAryExpr(enum SCEVTypes T, const SmallVectorImpl<const SCEV*> &ops)
-      : SCEV(T), Operands(ops.begin(), ops.end()) {}
+    SCEVNAryExpr(const FoldingSetNodeID &ID,
+                 enum SCEVTypes T, const SmallVectorImpl<const SCEV *> &ops)
+      : SCEV(ID, T), Operands(ops.begin(), ops.end()) {}
 
   public:
-    virtual void Profile(FoldingSetNodeID &ID) const;
-
     unsigned getNumOperands() const { return (unsigned)Operands.size(); }
-    const SCEV* getOperand(unsigned i) const {
+    const SCEV *getOperand(unsigned i) const {
       assert(i < Operands.size() && "Operand index out of range!");
       return Operands[i];
     }
 
-    const SmallVectorImpl<const SCEV*> &getOperands() const { return Operands; }
-    typedef SmallVectorImpl<const SCEV*>::const_iterator op_iterator;
+    const SmallVectorImpl<const SCEV *> &getOperands() const {
+      return Operands;
+    }
+    typedef SmallVectorImpl<const SCEV *>::const_iterator op_iterator;
     op_iterator op_begin() const { return Operands.begin(); }
     op_iterator op_end() const { return Operands.end(); }
 
@@ -238,10 +221,28 @@ namespace llvm {
       return HasVarying;
     }
 
+    virtual bool hasOperand(const SCEV *O) const {
+      for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+        if (O == getOperand(i) || getOperand(i)->hasOperand(O))
+          return true;
+      return false;
+    }
+
     bool dominates(BasicBlock *BB, DominatorTree *DT) const;
 
+    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
+
     virtual const Type *getType() const { return getOperand(0)->getType(); }
 
+    bool hasNoUnsignedWrap() const { return SubclassData & (1 << 0); }
+    void setHasNoUnsignedWrap(bool B) {
+      SubclassData = (SubclassData & ~(1 << 0)) | (B << 0);
+    }
+    bool hasNoSignedWrap() const { return SubclassData & (1 << 1); }
+    void setHasNoSignedWrap(bool B) {
+      SubclassData = (SubclassData & ~(1 << 1)) | (B << 1);
+    }
+
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVNAryExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -259,15 +260,12 @@ namespace llvm {
   ///
   class SCEVCommutativeExpr : public SCEVNAryExpr {
   protected:
-    SCEVCommutativeExpr(enum SCEVTypes T,
-                        const SmallVectorImpl<const SCEV*> &ops)
-      : SCEVNAryExpr(T, ops) {}
+    SCEVCommutativeExpr(const FoldingSetNodeID &ID,
+                        enum SCEVTypes T,
+                        const SmallVectorImpl<const SCEV *> &ops)
+      : SCEVNAryExpr(ID, T, ops) {}
 
   public:
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const;
-
     virtual const char *getOperationStr() const = 0;
 
     virtual void print(raw_ostream &OS) const;
@@ -289,8 +287,9 @@ namespace llvm {
   class SCEVAddExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVAddExpr(const SmallVectorImpl<const SCEV*> &ops)
-      : SCEVCommutativeExpr(scAddExpr, ops) {
+    SCEVAddExpr(const FoldingSetNodeID &ID,
+                const SmallVectorImpl<const SCEV *> &ops)
+      : SCEVCommutativeExpr(ID, scAddExpr, ops) {
     }
 
   public:
@@ -309,8 +308,9 @@ namespace llvm {
   class SCEVMulExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVMulExpr(const SmallVectorImpl<const SCEV*> &ops)
-      : SCEVCommutativeExpr(scMulExpr, ops) {
+    SCEVMulExpr(const FoldingSetNodeID &ID,
+                const SmallVectorImpl<const SCEV *> &ops)
+      : SCEVCommutativeExpr(ID, scMulExpr, ops) {
     }
 
   public:
@@ -330,16 +330,14 @@ namespace llvm {
   class SCEVUDivExpr : public SCEV {
     friend class ScalarEvolution;
 
-    const SCEV* LHS;
-    const SCEV* RHS;
-    SCEVUDivExpr(const SCEV* lhs, const SCEV* rhs)
-      : SCEV(scUDivExpr), LHS(lhs), RHS(rhs) {}
+    const SCEV *LHS;
+    const SCEV *RHS;
+    SCEVUDivExpr(const FoldingSetNodeID &ID, const SCEV *lhs, const SCEV *rhs)
+      : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {}
 
   public:
-    virtual void Profile(FoldingSetNodeID &ID) const;
-
-    const SCEV* getLHS() const { return LHS; }
-    const SCEV* getRHS() const { return RHS; }
+    const SCEV *getLHS() const { return LHS; }
+    const SCEV *getRHS() const { return RHS; }
 
     virtual bool isLoopInvariant(const Loop *L) const {
       return LHS->isLoopInvariant(L) && RHS->isLoopInvariant(L);
@@ -350,19 +348,14 @@ namespace llvm {
              RHS->hasComputableLoopEvolution(L);
     }
 
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const {
-      const SCEV* L = LHS->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-      const SCEV* R = RHS->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-      if (L == LHS && R == RHS)
-        return this;
-      else
-        return SE.getUDivExpr(L, R);
+    virtual bool hasOperand(const SCEV *O) const {
+      return O == LHS || O == RHS || LHS->hasOperand(O) || RHS->hasOperand(O);
     }
 
     bool dominates(BasicBlock *BB, DominatorTree *DT) const;
 
+    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
+
     virtual const Type *getType() const;
 
     void print(raw_ostream &OS) const;
@@ -389,25 +382,25 @@ namespace llvm {
 
     const Loop *L;
 
-    SCEVAddRecExpr(const SmallVectorImpl<const SCEV*> &ops, const Loop *l)
-      : SCEVNAryExpr(scAddRecExpr, ops), L(l) {
+    SCEVAddRecExpr(const FoldingSetNodeID &ID,
+                   const SmallVectorImpl<const SCEV *> &ops, const Loop *l)
+      : SCEVNAryExpr(ID, scAddRecExpr, ops), L(l) {
       for (size_t i = 0, e = Operands.size(); i != e; ++i)
         assert(Operands[i]->isLoopInvariant(l) &&
                "Operands of AddRec must be loop-invariant!");
     }
 
   public:
-    virtual void Profile(FoldingSetNodeID &ID) const;
-
-    const SCEV* getStart() const { return Operands[0]; }
+    const SCEV *getStart() const { return Operands[0]; }
     const Loop *getLoop() const { return L; }
 
     /// getStepRecurrence - This method constructs and returns the recurrence
     /// indicating how much this expression steps by.  If this is a polynomial
     /// of degree N, it returns a chrec of degree N-1.
-    const SCEV* getStepRecurrence(ScalarEvolution &SE) const {
+    const SCEV *getStepRecurrence(ScalarEvolution &SE) const {
       if (isAffine()) return getOperand(1);
-      return SE.getAddRecExpr(SmallVector<const SCEV*, 3>(op_begin()+1,op_end()),
+      return SE.getAddRecExpr(SmallVector<const SCEV *, 3>(op_begin()+1,
+                                                           op_end()),
                               getLoop());
     }
 
@@ -435,7 +428,7 @@ namespace llvm {
 
     /// evaluateAtIteration - Return the value of this chain of recurrences at
     /// the specified iteration number.
-    const SCEV* evaluateAtIteration(const SCEV* It, ScalarEvolution &SE) const;
+    const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const;
 
     /// getNumIterationsInRange - Return the number of iterations of this loop
     /// that produce values in the specified constant range.  Another way of
@@ -443,12 +436,14 @@ namespace llvm {
     /// value is not in the condition, thus computing the exit count.  If the
     /// iteration count can't be computed, an instance of SCEVCouldNotCompute is
     /// returned.
-    const SCEV* getNumIterationsInRange(ConstantRange Range,
+    const SCEV *getNumIterationsInRange(ConstantRange Range,
                                        ScalarEvolution &SE) const;
 
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const;
+    /// getPostIncExpr - Return an expression representing the value of
+    /// this expression one iteration of the loop ahead.
+    const SCEVAddRecExpr *getPostIncExpr(ScalarEvolution &SE) const {
+      return cast<SCEVAddRecExpr>(SE.getAddExpr(this, getStepRecurrence(SE)));
+    }
 
     virtual void print(raw_ostream &OS) const;
 
@@ -466,8 +461,12 @@ namespace llvm {
   class SCEVSMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVSMaxExpr(const SmallVectorImpl<const SCEV*> &ops)
-      : SCEVCommutativeExpr(scSMaxExpr, ops) {
+    SCEVSMaxExpr(const FoldingSetNodeID &ID,
+                 const SmallVectorImpl<const SCEV *> &ops)
+      : SCEVCommutativeExpr(ID, scSMaxExpr, ops) {
+      // Max never overflows.
+      setHasNoUnsignedWrap(true);
+      setHasNoSignedWrap(true);
     }
 
   public:
@@ -487,8 +486,12 @@ namespace llvm {
   class SCEVUMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVUMaxExpr(const SmallVectorImpl<const SCEV*> &ops)
-      : SCEVCommutativeExpr(scUMaxExpr, ops) {
+    SCEVUMaxExpr(const FoldingSetNodeID &ID,
+                 const SmallVectorImpl<const SCEV *> &ops)
+      : SCEVCommutativeExpr(ID, scUMaxExpr, ops) {
+      // Max never overflows.
+      setHasNoUnsignedWrap(true);
+      setHasNoSignedWrap(true);
     }
 
   public:
@@ -501,22 +504,108 @@ namespace llvm {
     }
   };
 
+  //===--------------------------------------------------------------------===//
+  /// SCEVTargetDataConstant - This node is the base class for representing
+  /// target-dependent values in a target-independent way.
+  ///
+  class SCEVTargetDataConstant : public SCEV {
+  protected:
+    const Type *Ty;
+    SCEVTargetDataConstant(const FoldingSetNodeID &ID, enum SCEVTypes T,
+                           const Type *ty) :
+      SCEV(ID, T), Ty(ty) {}
+
+  public:
+    virtual bool isLoopInvariant(const Loop *) const { return true; }
+    virtual bool hasComputableLoopEvolution(const Loop *) const {
+      return false; // not computable
+    }
+
+    virtual bool hasOperand(const SCEV *) const {
+      return false;
+    }
+
+    bool dominates(BasicBlock *, DominatorTree *) const {
+      return true;
+    }
+
+    bool properlyDominates(BasicBlock *, DominatorTree *) const {
+      return true;
+    }
+
+    virtual const Type *getType() const { return Ty; }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVTargetDataConstant *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scFieldOffset ||
+             S->getSCEVType() == scAllocSize;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVFieldOffsetExpr - This node represents an offsetof expression.
+  ///
+  class SCEVFieldOffsetExpr : public SCEVTargetDataConstant {
+    friend class ScalarEvolution;
+
+    const StructType *STy;
+    unsigned FieldNo;
+    SCEVFieldOffsetExpr(const FoldingSetNodeID &ID, const Type *ty,
+                        const StructType *sty, unsigned fieldno) :
+      SCEVTargetDataConstant(ID, scFieldOffset, ty),
+      STy(sty), FieldNo(fieldno) {}
+
+  public:
+    const StructType *getStructType() const { return STy; }
+    unsigned getFieldNo() const { return FieldNo; }
+
+    virtual void print(raw_ostream &OS) const;
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVFieldOffsetExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scFieldOffset;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVAllocSize - This node represents a sizeof expression.
+  ///
+  class SCEVAllocSizeExpr : public SCEVTargetDataConstant {
+    friend class ScalarEvolution;
+
+    const Type *AllocTy;
+    SCEVAllocSizeExpr(const FoldingSetNodeID &ID,
+                      const Type *ty, const Type *allocty) :
+      SCEVTargetDataConstant(ID, scAllocSize, ty),
+      AllocTy(allocty) {}
+
+  public:
+    const Type *getAllocType() const { return AllocTy; }
+
+    virtual void print(raw_ostream &OS) const;
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVAllocSizeExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scAllocSize;
+    }
+  };
 
   //===--------------------------------------------------------------------===//
   /// SCEVUnknown - This means that we are dealing with an entirely unknown SCEV
-  /// value, and only represent it as it's LLVM Value.  This is the "bottom"
+  /// value, and only represent it as its LLVM Value.  This is the "bottom"
   /// value for the analysis.
   ///
   class SCEVUnknown : public SCEV {
     friend class ScalarEvolution;
 
     Value *V;
-    explicit SCEVUnknown(Value *v) :
-      SCEV(scUnknown), V(v) {}
-      
-  public:
-    virtual void Profile(FoldingSetNodeID &ID) const;
+    SCEVUnknown(const FoldingSetNodeID &ID, Value *v) :
+      SCEV(ID, scUnknown), V(v) {}
 
+  public:
     Value *getValue() const { return V; }
 
     virtual bool isLoopInvariant(const Loop *L) const;
@@ -524,15 +613,14 @@ namespace llvm {
       return false; // not computable
     }
 
-    const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                                 const SCEV* Conc,
-                                                 ScalarEvolution &SE) const {
-      if (&*Sym == this) return Conc;
-      return this;
+    virtual bool hasOperand(const SCEV *) const {
+      return false;
     }
 
     bool dominates(BasicBlock *BB, DominatorTree *DT) const;
 
+    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
+
     virtual const Type *getType() const;
 
     virtual void print(raw_ostream &OS) const;
@@ -570,19 +658,21 @@ namespace llvm {
         return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
       case scUMaxExpr:
         return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
+      case scFieldOffset:
+        return ((SC*)this)->visitFieldOffsetExpr((const SCEVFieldOffsetExpr*)S);
+      case scAllocSize:
+        return ((SC*)this)->visitAllocSizeExpr((const SCEVAllocSizeExpr*)S);
       case scUnknown:
         return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
       case scCouldNotCompute:
         return ((SC*)this)->visitCouldNotCompute((const SCEVCouldNotCompute*)S);
       default:
-        assert(0 && "Unknown SCEV type!");
-        abort();
+        llvm_unreachable("Unknown SCEV type!");
       }
     }
 
     RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) {
-      assert(0 && "Invalid use of SCEVCouldNotCompute!");
-      abort();
+      llvm_unreachable("Invalid use of SCEVCouldNotCompute!");
       return RetVal();
     }
   };
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index c75531a7e6e0..820e1bd1e436 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -17,7 +17,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include <iosfwd>
 #include <vector>
 #include <set>
 
@@ -31,6 +30,8 @@ namespace llvm {
   class BasicBlock;
   class Function;
   class SparseSolver;
+  class LLVMContext;
+  class raw_ostream;
 
   template<typename T> class SmallVectorImpl;
   
@@ -71,6 +72,12 @@ public:
   virtual LatticeVal ComputeConstant(Constant *C) {
     return getOverdefinedVal(); // always safe
   }
+
+  /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is
+  /// one that the we want to handle through ComputeInstructionState.
+  virtual bool IsSpecialCasedPHI(PHINode *PN) {
+    return false;
+  }
   
   /// GetConstant - If the specified lattice value is representable as an LLVM
   /// constant value, return it.  Otherwise return null.  The returned value
@@ -99,7 +106,7 @@ public:
   }
   
   /// PrintValue - Render the specified lattice value to the specified stream.
-  virtual void PrintValue(LatticeVal V, std::ostream &OS);
+  virtual void PrintValue(LatticeVal V, raw_ostream &OS);
 };
 
   
@@ -113,6 +120,8 @@ class SparseSolver {
   /// compute transfer functions.
   AbstractLatticeFunction *LatticeFunc;
   
+  LLVMContext *Context;
+  
   DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
   SmallPtrSet<BasicBlock*, 16> BBExecutable;   // The bbs that are executable.
   
@@ -128,8 +137,8 @@ class SparseSolver {
   SparseSolver(const SparseSolver&);    // DO NOT IMPLEMENT
   void operator=(const SparseSolver&);  // DO NOT IMPLEMENT
 public:
-  explicit SparseSolver(AbstractLatticeFunction *Lattice)
-    : LatticeFunc(Lattice) {}
+  explicit SparseSolver(AbstractLatticeFunction *Lattice, LLVMContext *C)
+    : LatticeFunc(Lattice), Context(C) {}
   ~SparseSolver() {
     delete LatticeFunc;
   }
@@ -138,7 +147,7 @@ public:
   ///
   void Solve(Function &F);
   
-  void Print(Function &F, std::ostream &OS) const;
+  void Print(Function &F, raw_ostream &OS) const;
 
   /// getLatticeState - Return the LatticeVal object that corresponds to the
   /// value.  If an value is not in the map, it is returned as untracked,
diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h
index fd615fcdae08..99651e192d3b 100644
--- a/include/llvm/Analysis/Trace.h
+++ b/include/llvm/Analysis/Trace.h
@@ -18,7 +18,6 @@
 #ifndef LLVM_ANALYSIS_TRACE_H
 #define LLVM_ANALYSIS_TRACE_H
 
-#include "llvm/Support/Streams.h"
 #include <vector>
 #include <cassert>
 
@@ -26,6 +25,7 @@ namespace llvm {
   class BasicBlock;
   class Function;
   class Module;
+  class raw_ostream;
 
 class Trace {
   typedef std::vector<BasicBlock *> BasicBlockListType;
@@ -106,13 +106,12 @@ public:
 
   /// print - Write trace to output stream.
   ///
-  void print (std::ostream &O) const;
-  void print (std::ostream *O) const { if (O) print(*O); }
+  void print(raw_ostream &O) const;
 
   /// dump - Debugger convenience method; writes trace to standard error
   /// output stream.
   ///
-  void dump () const;
+  void dump() const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 5f5f77a5c9fe..212b5d1da5f5 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -23,20 +23,33 @@ namespace llvm {
   class Instruction;
   class APInt;
   class TargetData;
+  class LLVMContext;
   
   /// ComputeMaskedBits - Determine which of the bits specified in Mask are
   /// known to be either zero or one and return them in the KnownZero/KnownOne
   /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
   /// processing.
+  ///
+  /// This function is defined on values with integer type, values with pointer
+  /// type (but only if TD is non-null), and vectors of integers.  In the case
+  /// where V is a vector, the mask, known zero, and known one values are the
+  /// same width as the vector element, and the bit is set only if it is true
+  /// for all of the elements in the vector.
   void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
-                         APInt &KnownOne, TargetData *TD = 0,
+                         APInt &KnownOne, const TargetData *TD = 0,
                          unsigned Depth = 0);
   
   /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
   /// this predicate to simplify operations downstream.  Mask is known to be
   /// zero for bits that V cannot have.
+  ///
+  /// This function is defined on values with integer type, values with pointer
+  /// type (but only if TD is non-null), and vectors of integers.  In the case
+  /// where V is a vector, the mask, known zero, and known one values are the
+  /// same width as the vector element, and the bit is set only if it is true
+  /// for all of the elements in the vector.
   bool MaskedValueIsZero(Value *V, const APInt &Mask, 
-                         TargetData *TD = 0, unsigned Depth = 0);
+                         const TargetData *TD = 0, unsigned Depth = 0);
 
   
   /// ComputeNumSignBits - Return the number of times the sign bit of the
@@ -47,7 +60,7 @@ namespace llvm {
   ///
   /// 'Op' must have a scalar integer type.
   ///
-  unsigned ComputeNumSignBits(Value *Op, TargetData *TD = 0,
+  unsigned ComputeNumSignBits(Value *Op, const TargetData *TD = 0,
                               unsigned Depth = 0);
 
   /// CannotBeNegativeZero - Return true if we can prove that the specified FP 
@@ -64,14 +77,16 @@ namespace llvm {
   Value *FindInsertedValue(Value *V,
                            const unsigned *idx_begin,
                            const unsigned *idx_end,
+                           LLVMContext &Context,
                            Instruction *InsertBefore = 0);
 
   /// This is a convenience wrapper for finding values indexed by a single index
   /// only.
   inline Value *FindInsertedValue(Value *V, const unsigned Idx,
+                                  LLVMContext &Context,
                                   Instruction *InsertBefore = 0) {
     const unsigned Idxs[1] = { Idx };
-    return FindInsertedValue(V, &Idxs[0], &Idxs[1], InsertBefore);
+    return FindInsertedValue(V, &Idxs[0], &Idxs[1], Context, InsertBefore);
   }
   
   /// GetConstantStringInfo - This function computes the length of a
diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h
index 9c0636779849..3a846c28994d 100644
--- a/include/llvm/Argument.h
+++ b/include/llvm/Argument.h
@@ -38,8 +38,7 @@ public:
   /// Argument ctor - If Function argument is specified, this argument is
   /// inserted at the end of the argument list for the function.
   ///
-  explicit Argument(const Type *Ty, const std::string &Name = "",
-                    Function *F = 0);
+  explicit Argument(const Type *Ty, const Twine &Name = "", Function *F = 0);
 
   inline const Function *getParent() const { return Parent; }
   inline       Function *getParent()       { return Parent; }
diff --git a/include/llvm/Assembly/Parser.h b/include/llvm/Assembly/Parser.h
index 966abaaa2067..82ec6d81367b 100644
--- a/include/llvm/Assembly/Parser.h
+++ b/include/llvm/Assembly/Parser.h
@@ -19,6 +19,7 @@
 namespace llvm {
 
 class Module;
+class MemoryBuffer;
 class SMDiagnostic;
 class raw_ostream;
 class LLVMContext;
@@ -48,6 +49,17 @@ Module *ParseAssemblyString(
   LLVMContext &Context
 );
 
+/// This function is the low-level interface to the LLVM Assembly Parser.
+/// ParseAssemblyFile and ParseAssemblyString are wrappers around this function.
+/// @brief Parse LLVM Assembly from a MemoryBuffer. This function *always*
+/// takes ownership of the MemoryBuffer.
+Module *ParseAssembly(
+    MemoryBuffer *F,     ///< The MemoryBuffer containing assembly
+    Module *M,           ///< A module to add the assembly too.
+    SMDiagnostic &Err,   ///< Error result info.
+    LLVMContext &Context
+);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Assembly/Writer.h b/include/llvm/Assembly/Writer.h
index 5e5fe1560585..c5b239079a0d 100644
--- a/include/llvm/Assembly/Writer.h
+++ b/include/llvm/Assembly/Writer.h
@@ -17,7 +17,6 @@
 #ifndef LLVM_ASSEMBLY_WRITER_H
 #define LLVM_ASSEMBLY_WRITER_H
 
-#include <iosfwd>
 #include <string>
 
 namespace llvm {
@@ -71,8 +70,6 @@ void WriteTypeSymbolic(raw_ostream &, const Type *, const Module *M);
 // then even constants get pretty-printed; for example, the type of a null
 // pointer is printed symbolically.
 //
-void WriteAsOperand(std::ostream &, const Value *, bool PrintTy = true,
-                    const Module *Context = 0);
 void WriteAsOperand(raw_ostream &, const Value *, bool PrintTy = true,
                     const Module *Context = 0);
 
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 134e35020285..0bbdc349b1b4 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -57,6 +57,8 @@ const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer
 const Attributes NoRedZone = 1<<22; /// disable redzone
 const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
                                           /// instructions.
+const Attributes Naked           = 1<<24; ///< Naked function
+const Attributes InlineHint      = 1<<25; ///< source said inlining was desirable
 
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
@@ -65,7 +67,7 @@ const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 /// be used on return values or function parameters.
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | 
   NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
-  NoRedZone | NoImplicitFloat;
+  NoRedZone | NoImplicitFloat | Naked | InlineHint;
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
 const Attributes VarArgsIncompatible = StructRet;
diff --git a/include/llvm/AutoUpgrade.h b/include/llvm/AutoUpgrade.h
index f61bd1a08d32..0a81c807956f 100644
--- a/include/llvm/AutoUpgrade.h
+++ b/include/llvm/AutoUpgrade.h
@@ -15,6 +15,7 @@
 #define LLVM_AUTOUPGRADE_H
 
 namespace llvm {
+  class Module;
   class Function;
   class CallInst;
 
@@ -34,6 +35,9 @@ namespace llvm {
   /// so that it can update all calls to the old function.
   void UpgradeCallsToIntrinsic(Function* F);
 
+  /// This function checks debug info intrinsics. If an intrinsic is invalid
+  /// then this function simply removes the intrinsic. 
+  void CheckDebugInfoIntrinsics(Module *M);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index 072f6152ea7d..b497827fd695 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -22,6 +22,7 @@
 namespace llvm {
 
 class TerminatorInst;
+class LLVMContext;
 
 template<> struct ilist_traits<Instruction>
   : public SymbolTableListTraits<Instruction, BasicBlock> {
@@ -46,7 +47,7 @@ template<> struct ilist_traits<Instruction>
   Instruction *ensureHead(Instruction*) const { return createSentinel(); }
   static void noteHead(Instruction*, Instruction*) {}
 private:
-  mutable ilist_node<Instruction> Sentinel;
+  mutable ilist_half_node<Instruction> Sentinel;
 };
 
 /// This represents a single basic block in LLVM. A basic block is simply a
@@ -82,9 +83,12 @@ private:
   /// is automatically inserted at either the end of the function (if
   /// InsertBefore is null), or before the specified basic block.
   ///
-  explicit BasicBlock(const std::string &Name = "", Function *Parent = 0,
-                      BasicBlock *InsertBefore = 0);
+  explicit BasicBlock(LLVMContext &C, const Twine &Name = "",
+                      Function *Parent = 0, BasicBlock *InsertBefore = 0);
 public:
+  /// getContext - Get the context in which this basic block lives.
+  LLVMContext &getContext() const;
+  
   /// Instruction iterators...
   typedef InstListType::iterator                              iterator;
   typedef InstListType::const_iterator                  const_iterator;
@@ -92,9 +96,9 @@ public:
   /// Create - Creates a new BasicBlock. If the Parent parameter is specified,
   /// the basic block is automatically inserted at either the end of the
   /// function (if InsertBefore is 0), or before the specified basic block.
-  static BasicBlock *Create(const std::string &Name = "", Function *Parent = 0,
-                            BasicBlock *InsertBefore = 0) {
-    return new BasicBlock(Name, Parent, InsertBefore);
+  static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "", 
+                            Function *Parent = 0,BasicBlock *InsertBefore = 0) {
+    return new BasicBlock(Context, Name, Parent, InsertBefore);
   }
   ~BasicBlock();
 
@@ -227,7 +231,10 @@ public:
   /// cause a degenerate basic block to be formed, having a terminator inside of
   /// the basic block).
   ///
-  BasicBlock *splitBasicBlock(iterator I, const std::string &BBName = "");
+  /// Also note that this doesn't preserve any passes. To split blocks while
+  /// keeping loop information consistent, use the SplitBlock utility function.
+  ///
+  BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 13583c0d5ecd..e19e4c09ce32 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -22,7 +22,6 @@
 #include "llvm/System/Path.h"
 #include <map>
 #include <set>
-#include <fstream>
 
 namespace llvm {
   class MemoryBuffer;
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 28249eec0b02..779ef5fa2d83 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -17,6 +17,7 @@
 
 #include "llvm/Bitcode/BitCodes.h"
 #include <climits>
+#include <string>
 #include <vector>
 
 namespace llvm {
@@ -260,6 +261,7 @@ public:
   
   
   uint32_t Read(unsigned NumBits) {
+    assert(NumBits <= 32 && "Cannot return more than 32 bits!");
     // If the field is fully contained by CurWord, return it quickly.
     if (BitsInCurWord >= NumBits) {
       uint32_t R = CurWord & ((1U << NumBits)-1);
@@ -322,17 +324,19 @@ public:
     }
   }
 
+  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
+  // chunk size of the VBR must still be <= 32 bits though.
   uint64_t ReadVBR64(unsigned NumBits) {
-    uint64_t Piece = Read(NumBits);
-    if ((Piece & (uint64_t(1) << (NumBits-1))) == 0)
-      return Piece;
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return uint64_t(Piece);
 
     uint64_t Result = 0;
     unsigned NextBit = 0;
     while (1) {
-      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
 
-      if ((Piece & (uint64_t(1) << (NumBits-1))) == 0)
+      if ((Piece & (1U << (NumBits-1))) == 0)
         return Result;
 
       NextBit += NumBits-1;
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 55dd4dd49b0f..e48a19083365 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -15,6 +15,7 @@
 #ifndef BITSTREAM_WRITER_H
 #define BITSTREAM_WRITER_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include <vector>
 
@@ -293,7 +294,9 @@ private:
   /// known to exist at the end of the the record.
   template<typename uintty>
   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                                const char *BlobData, unsigned BlobLen) {
+                                const StringRef &Blob) {
+    const char *BlobData = Blob.data();
+    unsigned BlobLen = (unsigned) Blob.size();
     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
     BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
@@ -409,7 +412,7 @@ public:
   /// the first entry.
   template<typename uintty>
   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
-    EmitRecordWithAbbrevImpl(Abbrev, Vals, 0, 0);
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
   }
   
   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
@@ -419,16 +422,27 @@ public:
   /// of the record.
   template<typename uintty>
   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          const StringRef &Blob) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
+  }
+  template<typename uintty>
+  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
                           const char *BlobData, unsigned BlobLen) {
-    EmitRecordWithAbbrevImpl(Abbrev, Vals, BlobData, BlobLen);
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
   }
 
   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
   /// that end with an array.
   template<typename uintty>
   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          const StringRef &Array) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
+  }
+  template<typename uintty>
+  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
                           const char *ArrayData, unsigned ArrayLen) {
-    EmitRecordWithAbbrevImpl(Abbrev, Vals, ArrayData, ArrayLen);
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 
+                                                            ArrayLen));
   }
   
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 1ede69dee161..dccd8e035d79 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -33,7 +33,9 @@ namespace bitc {
     CONSTANTS_BLOCK_ID,
     FUNCTION_BLOCK_ID,
     TYPE_SYMTAB_BLOCK_ID,
-    VALUE_SYMTAB_BLOCK_ID
+    VALUE_SYMTAB_BLOCK_ID,
+    METADATA_BLOCK_ID,
+    METADATA_ATTACHMENT_ID
   };
 
 
@@ -106,6 +108,14 @@ namespace bitc {
     VST_CODE_BBENTRY = 2   // VST_BBENTRY: [bbid, namechar x N]
   };
 
+  enum MetadataCodes {
+    METADATA_STRING        = 1,   // MDSTRING:      [values]
+    METADATA_NODE          = 2,   // MDNODE:        [n x (type num, value num)]
+    METADATA_NAME          = 3,   // STRING:        [values]
+    METADATA_NAMED_NODE    = 4,   // NAMEDMDNODE:   [n x mdnodes]
+    METADATA_KIND          = 5,   // [n x [id, name]]
+    METADATA_ATTACHMENT    = 6    // [m x [value, [n x [id, mdnode]]]
+  };
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
   // constant and maintains an implicit current type value.
   enum ConstantsCodes {
@@ -128,8 +138,7 @@ namespace bitc {
     CST_CODE_CE_CMP        = 17,  // CE_CMP:        [opty, opval, opval, pred]
     CST_CODE_INLINEASM     = 18,  // INLINEASM:     [sideeffect,asmstr,conststr]
     CST_CODE_CE_SHUFVEC_EX = 19,  // SHUFVEC_EX:    [opty, opval, opval, opval]
-    CST_CODE_MDSTRING      = 20,  // MDSTRING:      [values]
-    CST_CODE_MDNODE        = 21   // MDNODE:        [n x (type num, value num)]
+    CST_CODE_CE_INBOUNDS_GEP = 20 // INBOUNDS_GEP:  [n x operands]
   };
 
   /// CastOpcodes - These are values used in the bitcode files to encode which
@@ -171,6 +180,18 @@ namespace bitc {
     BINOP_XOR  = 12
   };
 
+  /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
+  /// OverflowingBinaryOperator's SubclassOptionalData contents.
+  enum OverflowingBinaryOperatorOptionalFlags {
+    OBO_NO_UNSIGNED_WRAP = 0,
+    OBO_NO_SIGNED_WRAP = 1
+  };
+
+  /// SDivOperatorOptionalFlags - Flags for serializing SDivOperator's
+  /// SubclassOptionalData contents.
+  enum SDivOperatorOptionalFlags {
+    SDIV_EXACT = 0
+  };
 
   // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
   // can contain a constant block (CONSTANTS_BLOCK_ID).
@@ -210,10 +231,12 @@ namespace bitc {
     FUNC_CODE_INST_GETRESULT   = 25, // GETRESULT:  [ty, opval, n]
     FUNC_CODE_INST_EXTRACTVAL  = 26, // EXTRACTVAL: [n x operands]
     FUNC_CODE_INST_INSERTVAL   = 27, // INSERTVAL:  [n x operands]
-    // fcmp/icmp returning Int1TY or vector of Int1Ty, NOT for vicmp/vfcmp
+    // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
+    // support legacy vicmp/vfcmp instructions.
     FUNC_CODE_INST_CMP2        = 28, // CMP2:       [opty, opval, opval, pred]
     // new select on i1 or [N x i1]
-    FUNC_CODE_INST_VSELECT     = 29  // VSELECT:    [ty,opval,opval,predty,pred]
+    FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
+    FUNC_CODE_INST_INBOUNDS_GEP = 30 // INBOUNDS_GEP: [n x operands]
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index 3d33d75a06a1..7b74bdf76ba7 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_BITCODE_H
 #define LLVM_BITCODE_H
 
-#include <iosfwd>
 #include <string>
 
 namespace llvm {
@@ -41,10 +40,6 @@ namespace llvm {
   Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
                            std::string *ErrMsg = 0);
 
-  /// WriteBitcodeToFile - Write the specified module to the specified output
-  /// stream.
-  void WriteBitcodeToFile(const Module *M, std::ostream &Out);
-
   /// WriteBitcodeToFile - Write the specified module to the specified
   /// raw output stream.
   void WriteBitcodeToFile(const Module *M, raw_ostream &Out);
@@ -53,23 +48,48 @@ namespace llvm {
   /// raw output stream.
   void WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream);
 
-  /// CreateBitcodeWriterPass - Create and return a pass that writes the module
-  /// to the specified ostream.
-  ModulePass *CreateBitcodeWriterPass(std::ostream &Str);
-
   /// createBitcodeWriterPass - Create and return a pass that writes the module
   /// to the specified ostream.
   ModulePass *createBitcodeWriterPass(raw_ostream &Str);
   
   
-  /// isBitcodeWrapper - Return true fi this is a wrapper for LLVM IR bitcode
-  /// files.
-  static bool inline isBitcodeWrapper(unsigned char *BufPtr,
-                                      unsigned char *BufEnd) {
-    return (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && 
-            BufPtr[2] == 0x17 && BufPtr[3] == 0x0B);
+  /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
+  /// for an LLVM IR bitcode wrapper.
+  ///
+  static inline bool isBitcodeWrapper(const unsigned char *BufPtr,
+                                      const unsigned char *BufEnd) {
+    // See if you can find the hidden message in the magic bytes :-).
+    // (Hint: it's a little-endian encoding.)
+    return BufPtr != BufEnd &&
+           BufPtr[0] == 0xDE &&
+           BufPtr[1] == 0xC0 &&
+           BufPtr[2] == 0x17 &&
+           BufPtr[3] == 0x0B;
   }
-  
+
+  /// isRawBitcode - Return true if the given bytes are the magic bytes for
+  /// raw LLVM IR bitcode (without a wrapper).
+  ///
+  static inline bool isRawBitcode(const unsigned char *BufPtr,
+                                  const unsigned char *BufEnd) {
+    // These bytes sort of have a hidden message, but it's not in
+    // little-endian this time, and it's a little redundant.
+    return BufPtr != BufEnd &&
+           BufPtr[0] == 'B' &&
+           BufPtr[1] == 'C' &&
+           BufPtr[2] == 0xc0 &&
+           BufPtr[3] == 0xde;
+  }
+
+  /// isBitcode - Return true if the given bytes are the magic bytes for
+  /// LLVM IR bitcode, either with or without a wrapper.
+  ///
+  static bool inline isBitcode(const unsigned char *BufPtr,
+                               const unsigned char *BufEnd) {
+    return isBitcodeWrapper(BufPtr, BufEnd) ||
+           isRawBitcode(BufPtr, BufEnd);
+  }
+
   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
   /// header for padding or other reasons.  The format of this header is:
   ///
diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h
index d5ff17cf247c..fc9feda5bd8c 100644
--- a/include/llvm/CallGraphSCCPass.h
+++ b/include/llvm/CallGraphSCCPass.h
@@ -22,6 +22,7 @@
 #define LLVM_CALL_GRAPH_SCC_PASS_H
 
 #include "llvm/Pass.h"
+#include "llvm/Analysis/CallGraph.h"
 
 namespace llvm {
 
@@ -45,7 +46,10 @@ struct CallGraphSCCPass : public Pass {
   /// non-recursive (or only self-recursive) functions will have an SCC size of
   /// 1, where recursive portions of the call graph will have SCC size > 1.
   ///
-  virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC) = 0;
+  /// SCC passes that add or delete functions to the SCC are required to update
+  /// the SCC list, otherwise stale pointers may be dereferenced.
+  ///
+  virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC) = 0;
 
   /// doFinalization - This method is called after the SCC's of the program has
   /// been processed, allowing the pass to do final cleanup as necessary.
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index ef609e4efefe..62d0679fb738 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -16,30 +16,43 @@
 #ifndef LLVM_CODEGEN_ASMPRINTER_H
 #define LLVM_CODEGEN_ASMPRINTER_H
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Target/TargetMachine.h"
-#include <set>
+#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
   class GCStrategy;
   class Constant;
   class ConstantArray;
+  class ConstantFP;
   class ConstantInt;
   class ConstantStruct;
   class ConstantVector;
   class GCMetadataPrinter;
+  class GlobalValue;
   class GlobalVariable;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineLoopInfo;
+  class MachineLoop;
+  class MachineConstantPool;
   class MachineConstantPoolEntry;
   class MachineConstantPoolValue;
+  class MachineJumpTableInfo;
   class MachineModuleInfo;
+  class MCInst;
+  class MCContext;
+  class MCSection;
+  class MCStreamer;
+  class MCSymbol;
   class DwarfWriter;
   class Mangler;
-  class Section;
-  class TargetAsmInfo;
+  class MCAsmInfo;
+  class TargetLoweringObjectFile;
   class Type;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   /// AsmPrinter - This class is intended to be used as a driving class for all
   /// asm writers.
@@ -57,31 +70,51 @@ namespace llvm {
     typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
     typedef gcp_map_type::iterator gcp_iterator;
     gcp_map_type GCMetadataPrinters;
-    
-  protected:
+
+    /// If VerboseAsm is set, a pointer to the loop info for this
+    /// function.
+    ///
+    MachineLoopInfo *LI;
+
+  public:
     /// MMI - If available, this is a pointer to the current MachineModuleInfo.
     MachineModuleInfo *MMI;
     
+  protected:
     /// DW - If available, this is a pointer to the current dwarf writer.
     DwarfWriter *DW;
-    
+
   public:
     /// Output stream on which we're printing assembly code.
     ///
-    raw_ostream &O;
+    formatted_raw_ostream &O;
 
     /// Target machine description.
     ///
     TargetMachine &TM;
     
+    /// getObjFileLowering - Return information about object file lowering.
+    TargetLoweringObjectFile &getObjFileLowering() const;
+    
     /// Target Asm Printer information.
     ///
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     /// Target Register Information.
     ///
     const TargetRegisterInfo *TRI;
 
+    /// OutContext - This is the context for the output file that we are
+    /// streaming.  This owns all of the global MC-related objects for the
+    /// generated translation unit.
+    MCContext &OutContext;
+    
+    /// OutStreamer - This is the MCStreamer object for the file we are
+    /// generating.  This contains the transient state for the current
+    /// translation unit that we are generating (such as the current section
+    /// etc).
+    MCStreamer &OutStreamer;
+    
     /// The current machine function.
     const MachineFunction *MF;
 
@@ -94,14 +127,9 @@ namespace llvm {
     ///
     std::string CurrentFnName;
     
-    /// CurrentSection - The current section we are emitting to.  This is
-    /// controlled and used by the SwitchSection method.
-    std::string CurrentSection;
-    const Section* CurrentSection_;
-
-    /// IsInTextSection - True if the current section we are emitting to is a
-    /// text section.
-    bool IsInTextSection;
+    /// getCurrentSection() - Return the current section we are emitting to.
+    const MCSection *getCurrentSection() const;
+    
 
     /// VerboseAsm - Emit comments in assembly output if this is true.
     ///
@@ -113,12 +141,12 @@ namespace llvm {
     mutable const Function *LastFn;
     mutable unsigned Counter;
     
-    // Private state for processDebugLock()
+    // Private state for processDebugLoc()
     mutable DebugLocTuple PrevDLT;
 
   protected:
-    explicit AsmPrinter(raw_ostream &o, TargetMachine &TM,
-                        const TargetAsmInfo *T, bool V);
+    explicit AsmPrinter(formatted_raw_ostream &o, TargetMachine &TM,
+                        const MCAsmInfo *T, bool V);
     
   public:
     virtual ~AsmPrinter();
@@ -127,54 +155,10 @@ namespace llvm {
     ///
     bool isVerbose() const { return VerboseAsm; }
 
-    /// SwitchToTextSection - Switch to the specified section of the executable
-    /// if we are not already in it!  If GV is non-null and if the global has an
-    /// explicitly requested section, we switch to the section indicated for the
-    /// global instead of NewSection.
-    ///
-    /// If the new section is an empty string, this method forgets what the
-    /// current section is, but does not emit a .section directive.
-    ///
-    /// This method is used when about to emit executable code.
-    ///
-    void SwitchToTextSection(const char *NewSection, 
-                             const GlobalValue *GV = NULL);
-
-    /// SwitchToDataSection - Switch to the specified section of the executable
-    /// if we are not already in it!  If GV is non-null and if the global has an
-    /// explicitly requested section, we switch to the section indicated for the
-    /// global instead of NewSection.
-    ///
-    /// If the new section is an empty string, this method forgets what the
-    /// current section is, but does not emit a .section directive.
-    ///
-    /// This method is used when about to emit data.  For most assemblers, this
-    /// is the same as the SwitchToTextSection method, but not all assemblers
-    /// are the same.
+    /// getFunctionNumber - Return a unique ID for the current function.
     ///
-    void SwitchToDataSection(const char *NewSection, 
-                             const GlobalValue *GV = NULL);
-
-    /// SwitchToSection - Switch to the specified section of the executable if
-    /// we are not already in it!
-    void SwitchToSection(const Section* NS);
-
-    /// getGlobalLinkName - Returns the asm/link name of of the specified
-    /// global variable.  Should be overridden by each target asm printer to
-    /// generate the appropriate value.
-    virtual const std::string &getGlobalLinkName(const GlobalVariable *GV,
-                                                 std::string &LinkName) const;
-
-    /// EmitExternalGlobal - Emit the external reference to a global variable.
-    /// Should be overridden if an indirect reference should be used.
-    virtual void EmitExternalGlobal(const GlobalVariable *GV);
-
-    /// getCurrentFunctionEHName - Called to return (and cache) the
-    /// CurrentFnEHName.
-    /// 
-    const std::string &getCurrentFunctionEHName(const MachineFunction *MF,
-                                                std::string &FuncEHName) const;
-
+    unsigned getFunctionNumber() const { return FunctionNumber; }
+    
   protected:
     /// getAnalysisUsage - Record analysis usage.
     /// 
@@ -185,6 +169,14 @@ namespace llvm {
     /// call this implementation.
     bool doInitialization(Module &M);
 
+    /// EmitStartOfAsmFile - This virtual method can be overridden by targets
+    /// that want to emit something at the start of their file.
+    virtual void EmitStartOfAsmFile(Module &M) {}
+    
+    /// EmitEndOfAsmFile - This virtual method can be overridden by targets that
+    /// want to emit something at the end of their file.
+    virtual void EmitEndOfAsmFile(Module &M) {}
+    
     /// doFinalization - Shut down the asmprinter.  If you override this in your
     /// pass, you must make sure to call it explicitly.
     bool doFinalization(Module &M);
@@ -212,14 +204,14 @@ namespace llvm {
                                        unsigned AsmVariant, 
                                        const char *ExtraCode);
     
+    /// PrintGlobalVariable - Emit the specified global variable and its
+    /// initializer to the output stream.
+    virtual void PrintGlobalVariable(const GlobalVariable *GV) = 0;
+
     /// SetupMachineFunction - This should be called when a new MachineFunction
     /// is being processed from runOnMachineFunction.
     void SetupMachineFunction(MachineFunction &MF);
     
-    /// getFunctionNumber - Return a unique ID for the current function.
-    ///
-    unsigned getFunctionNumber() const { return FunctionNumber; }
-    
     /// IncrementFunctionNumber - Increase Function Number.  AsmPrinters should
     /// not normally call this, as the counter is automatically bumped by
     /// SetupMachineFunction.
@@ -241,7 +233,7 @@ namespace llvm {
     /// special global used by LLVM.  If so, emit it and return true, otherwise
     /// do nothing and return false.
     bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
-    
+
   public:
     //===------------------------------------------------------------------===//
     /// LEB 128 number encoding.
@@ -267,7 +259,8 @@ namespace llvm {
     void EOL() const;
     void EOL(const std::string &Comment) const;
     void EOL(const char* Comment) const;
-    
+    void EOL(const char *Comment, unsigned Encoding) const;
+
     /// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
     /// unsigned leb128 value.
     void EmitULEB128Bytes(unsigned Value) const;
@@ -332,6 +325,19 @@ namespace llvm {
     /// debug tables.
     void printDeclare(const MachineInstr *MI) const;
 
+    /// EmitComments - Pretty-print comments for instructions
+    void EmitComments(const MachineInstr &MI) const;
+    /// EmitComments - Pretty-print comments for basic blocks
+    void EmitComments(const MachineBasicBlock &MBB) const;
+
+    /// GetMBBSymbol - Return the MCSymbol corresponding to the specified basic
+    /// block label.
+    MCSymbol *GetMBBSymbol(unsigned MBBID) const;
+    
+    /// EmitBasicBlockStart - This method prints the label for the specified
+    /// MachineBasicBlock, an alignment (if present) and a comment describing
+    /// it if appropriate.
+    void EmitBasicBlockStart(const MachineBasicBlock *MBB) const;
   protected:
     /// EmitZeros - Emit a block of zeros.
     ///
@@ -351,8 +357,8 @@ namespace llvm {
     virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
 
     /// processDebugLoc - Processes the debug information of each machine
-    /// instruction's DebugLoc.
-    void processDebugLoc(DebugLoc DL);
+    /// instruction's DebugLoc. 
+    void processDebugLoc(const MachineInstr *MI, bool BeforePrintingInsn);
     
     /// printInlineAsm - This method formats and prints the specified machine
     /// instruction that is an inline asm.
@@ -362,13 +368,7 @@ namespace llvm {
     /// that is an implicit def.
     virtual void printImplicitDef(const MachineInstr *MI) const;
     
-    /// printBasicBlockLabel - This method prints the label for the specified
-    /// MachineBasicBlock
-    virtual void printBasicBlockLabel(const MachineBasicBlock *MBB,
-                                      bool printAlign = false,
-                                      bool printColon = false,
-                                      bool printComment = true) const;
-                                      
+    
     /// printPICJumpTableSetLabel - This method prints a set label for the
     /// specified MachineBasicBlock for a jumptable entry.
     virtual void printPICJumpTableSetLabel(unsigned uid,
@@ -383,22 +383,14 @@ namespace llvm {
     /// specified type.
     void printDataDirective(const Type *type, unsigned AddrSpace = 0);
 
-    /// printSuffixedName - This prints a name with preceding 
-    /// getPrivateGlobalPrefix and the specified suffix, handling quoted names
-    /// correctly.
-    void printSuffixedName(const char *Name, const char *Suffix,
-                           const char *Prefix = 0);
-    void printSuffixedName(const std::string &Name, const char* Suffix);
-
     /// printVisibility - This prints visibility information about symbol, if
     /// this is suported by the target.
     void printVisibility(const std::string& Name, unsigned Visibility) const;
 
     /// printOffset - This is just convenient handler for printing offsets.
     void printOffset(int64_t Offset) const;
-
+ 
   private:
-    const GlobalValue *findGlobalValue(const Constant* CV);
     void EmitLLVMUsedList(Constant *List);
     void EmitXXStructorList(Constant *List);
     void EmitGlobalConstantStruct(const ConstantStruct* CVS,
diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h
index ce0c07af9515..2d4bd73a82eb 100644
--- a/include/llvm/CodeGen/BinaryObject.h
+++ b/include/llvm/CodeGen/BinaryObject.h
@@ -68,6 +68,13 @@ public:
     return !Relocations.empty();
   }
 
+  /// emitZeros - This callback is invoked to emit a arbitrary number 
+  /// of zero bytes to the data stream.
+  inline void emitZeros(unsigned Size) {
+    for (unsigned i=0; i < Size; ++i)
+      emitByte(0);
+  }
+
   /// emitByte - This callback is invoked when a byte needs to be
   /// written to the data stream.
   inline void emitByte(uint8_t B) {
@@ -86,15 +93,15 @@ public:
   /// emitWord16LE - This callback is invoked when a 16-bit word needs to be
   /// written to the data stream in correct endian format and correct size.
   inline void emitWord16LE(uint16_t W) {
-    Data.push_back((W >> 0) & 255);
-    Data.push_back((W >> 8) & 255);
+    Data.push_back((uint8_t)(W >> 0));
+    Data.push_back((uint8_t)(W >> 8));
   }
 
   /// emitWord16BE - This callback is invoked when a 16-bit word needs to be
   /// written to the data stream in correct endian format and correct size.
   inline void emitWord16BE(uint16_t W) {
-    Data.push_back((W >> 8) & 255);
-    Data.push_back((W >> 0) & 255);
+    Data.push_back((uint8_t)(W >> 8));
+    Data.push_back((uint8_t)(W >> 0));
   }
 
   /// emitWord - This callback is invoked when a word needs to be
@@ -124,49 +131,62 @@ public:
       emitDWordBE(W);
   }
 
+  /// emitWord64 - This callback is invoked when a x86_fp80 needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWordFP80(const uint64_t *W, unsigned PadSize) {
+    if (IsLittleEndian) {
+      emitWord64(W[0]);
+      emitWord16(W[1]);  
+    } else {
+      emitWord16(W[1]);  
+      emitWord64(W[0]);
+    }
+    emitZeros(PadSize);
+  }
+
   /// emitWordLE - This callback is invoked when a 32-bit word needs to be
   /// written to the data stream in little-endian format.
   inline void emitWordLE(uint32_t W) {
-    Data.push_back((W >>  0) & 255);
-    Data.push_back((W >>  8) & 255);
-    Data.push_back((W >> 16) & 255);
-    Data.push_back((W >> 24) & 255);
+    Data.push_back((uint8_t)(W >>  0));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >> 24));
   }
 
   /// emitWordBE - This callback is invoked when a 32-bit word needs to be
   /// written to the data stream in big-endian format.
   ///
   inline void emitWordBE(uint32_t W) {
-    Data.push_back((W >> 24) & 255);
-    Data.push_back((W >> 16) & 255);
-    Data.push_back((W >>  8) & 255);
-    Data.push_back((W >>  0) & 255);
+    Data.push_back((uint8_t)(W >> 24));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >>  0));
   }
 
   /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
   /// written to the data stream in little-endian format.
   inline void emitDWordLE(uint64_t W) {
-    Data.push_back(unsigned(W >>  0) & 255);
-    Data.push_back(unsigned(W >>  8) & 255);
-    Data.push_back(unsigned(W >> 16) & 255);
-    Data.push_back(unsigned(W >> 24) & 255);
-    Data.push_back(unsigned(W >> 32) & 255);
-    Data.push_back(unsigned(W >> 40) & 255);
-    Data.push_back(unsigned(W >> 48) & 255);
-    Data.push_back(unsigned(W >> 56) & 255);
+    Data.push_back((uint8_t)(W >>  0));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >> 24));
+    Data.push_back((uint8_t)(W >> 32));
+    Data.push_back((uint8_t)(W >> 40));
+    Data.push_back((uint8_t)(W >> 48));
+    Data.push_back((uint8_t)(W >> 56));
   }
 
   /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
   /// written to the data stream in big-endian format.
   inline void emitDWordBE(uint64_t W) {
-    Data.push_back(unsigned(W >> 56) & 255);
-    Data.push_back(unsigned(W >> 48) & 255);
-    Data.push_back(unsigned(W >> 40) & 255);
-    Data.push_back(unsigned(W >> 32) & 255);
-    Data.push_back(unsigned(W >> 24) & 255);
-    Data.push_back(unsigned(W >> 16) & 255);
-    Data.push_back(unsigned(W >>  8) & 255);
-    Data.push_back(unsigned(W >>  0) & 255);
+    Data.push_back((uint8_t)(W >> 56));
+    Data.push_back((uint8_t)(W >> 48));
+    Data.push_back((uint8_t)(W >> 40));
+    Data.push_back((uint8_t)(W >> 32));
+    Data.push_back((uint8_t)(W >> 24));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >>  0));
   }
 
   /// fixByte - This callback is invoked when a byte needs to be
@@ -187,15 +207,15 @@ public:
   /// emitWord16LE - This callback is invoked when a 16-bit word needs to
   /// fixup the data stream in little endian format.
   inline void fixWord16LE(uint16_t W, uint32_t offset) {
-    Data[offset++] = W & 255;
-    Data[offset] = (W >> 8) & 255;
+    Data[offset]   = (uint8_t)(W >> 0);
+    Data[++offset] = (uint8_t)(W >> 8);
   }
 
   /// fixWord16BE - This callback is invoked when a 16-bit word needs to
   /// fixup data stream in big endian format.
   inline void fixWord16BE(uint16_t W, uint32_t offset) {
-    Data[offset++] = (W >> 8) & 255;
-    Data[offset] = W & 255;
+    Data[offset]   = (uint8_t)(W >> 8);
+    Data[++offset] = (uint8_t)(W >> 0);
   }
 
   /// emitWord - This callback is invoked when a word needs to
@@ -219,19 +239,19 @@ public:
   /// fixWord32LE - This callback is invoked when a 32-bit word needs to
   /// fixup the data in little endian format.
   inline void fixWord32LE(uint32_t W, uint32_t offset) {
-    Data[offset++] = W & 255;
-    Data[offset++] = (W >> 8) & 255;
-    Data[offset++] = (W >> 16) & 255;
-    Data[offset] = (W >> 24) & 255;
+    Data[offset]   = (uint8_t)(W >>  0);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >> 24);
   }
 
   /// fixWord32BE - This callback is invoked when a 32-bit word needs to
   /// fixup the data in big endian format.
   inline void fixWord32BE(uint32_t W, uint32_t offset) {
-    Data[offset++] = (W >> 24) & 255;
-    Data[offset++] = (W >> 16) & 255;
-    Data[offset++] = (W >> 8) & 255;
-    Data[offset] = W & 255;
+    Data[offset]   = (uint8_t)(W >> 24);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >>  0);
   }
 
   /// fixWord64 - This callback is invoked when a 64-bit word needs to
@@ -246,42 +266,42 @@ public:
   /// fixWord64BE - This callback is invoked when a 64-bit word needs to
   /// fixup the data in little endian format.
   inline void fixWord64LE(uint64_t W, uint32_t offset) {
-    Data[offset++] = W & 255;
-    Data[offset++] = (W >> 8) & 255;
-    Data[offset++] = (W >> 16) & 255;
-    Data[offset++] = (W >> 24) & 255;
-    Data[offset++] = (W >> 32) & 255;
-    Data[offset++] = (W >> 40) & 255;
-    Data[offset++] = (W >> 48) & 255;
-    Data[offset] = (W >> 56) & 255;
+    Data[offset]   = (uint8_t)(W >>  0);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >> 24);
+    Data[++offset] = (uint8_t)(W >> 32);
+    Data[++offset] = (uint8_t)(W >> 40);
+    Data[++offset] = (uint8_t)(W >> 48);
+    Data[++offset] = (uint8_t)(W >> 56);
   }
 
   /// fixWord64BE - This callback is invoked when a 64-bit word needs to
   /// fixup the data in big endian format.
   inline void fixWord64BE(uint64_t W, uint32_t offset) {
-    Data[offset++] = (W >> 56) & 255;
-    Data[offset++] = (W >> 48) & 255;
-    Data[offset++] = (W >> 40) & 255;
-    Data[offset++] = (W >> 32) & 255;
-    Data[offset++] = (W >> 24) & 255;
-    Data[offset++] = (W >> 16) & 255;
-    Data[offset++] = (W >> 8) & 255;
-    Data[offset] = W & 255;
+    Data[offset]   = (uint8_t)(W >> 56);
+    Data[++offset] = (uint8_t)(W >> 48);
+    Data[++offset] = (uint8_t)(W >> 40);
+    Data[++offset] = (uint8_t)(W >> 32);
+    Data[++offset] = (uint8_t)(W >> 24);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >>  0);
   }
 
   /// emitAlignment - Pad the data to the specified alignment.
-  void emitAlignment(unsigned Alignment) {
+  void emitAlignment(unsigned Alignment, uint8_t fill = 0) {
     if (Alignment <= 1) return;
     unsigned PadSize = -Data.size() & (Alignment-1);
     for (unsigned i = 0; i<PadSize; ++i)
-      Data.push_back(0);
+      Data.push_back(fill);
   }
 
   /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
   /// written to the data stream.
   void emitULEB128Bytes(uint64_t Value) {
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = (uint8_t)(Value & 0x7f);
       Value >>= 7;
       if (Value) Byte |= 0x80;
       emitByte(Byte);
@@ -295,7 +315,7 @@ public:
     bool IsMore;
 
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = (uint8_t)(Value & 0x7f);
       Value >>= 7;
       IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
       if (IsMore) Byte |= 0x80;
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 7c83e24728c4..5e730fc12cc1 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CallingConv.h"
 
 namespace llvm {
   class TargetRegisterInfo;
@@ -33,34 +34,35 @@ public:
     SExt,   // The value is sign extended in the location.
     ZExt,   // The value is zero extended in the location.
     AExt,   // The value is extended with undefined upper bits.
-    BCvt    // The value is bit-converted in the location.
+    BCvt,   // The value is bit-converted in the location.
+    Indirect // The location contains pointer to the value.
     // TODO: a subset of the value is in the location.
   };
 private:
   /// ValNo - This is the value number begin assigned (e.g. an argument number).
   unsigned ValNo;
-  
+
   /// Loc is either a stack offset or a register number.
   unsigned Loc;
-  
+
   /// isMem - True if this is a memory loc, false if it is a register loc.
   bool isMem : 1;
-  
+
   /// isCustom - True if this arg/retval requires special handling.
   bool isCustom : 1;
 
   /// Information about how the value is assigned.
   LocInfo HTP : 6;
-  
+
   /// ValVT - The type of the value being assigned.
-  MVT ValVT;
+  EVT ValVT;
 
   /// LocVT - The type of the location being assigned to.
-  MVT LocVT;
+  EVT LocVT;
 public:
-    
-  static CCValAssign getReg(unsigned ValNo, MVT ValVT,
-                            unsigned RegNo, MVT LocVT,
+
+  static CCValAssign getReg(unsigned ValNo, EVT ValVT,
+                            unsigned RegNo, EVT LocVT,
                             LocInfo HTP) {
     CCValAssign Ret;
     Ret.ValNo = ValNo;
@@ -73,8 +75,8 @@ public:
     return Ret;
   }
 
-  static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT,
-                                  unsigned RegNo, MVT LocVT,
+  static CCValAssign getCustomReg(unsigned ValNo, EVT ValVT,
+                                  unsigned RegNo, EVT LocVT,
                                   LocInfo HTP) {
     CCValAssign Ret;
     Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP);
@@ -82,8 +84,8 @@ public:
     return Ret;
   }
 
-  static CCValAssign getMem(unsigned ValNo, MVT ValVT,
-                            unsigned Offset, MVT LocVT,
+  static CCValAssign getMem(unsigned ValNo, EVT ValVT,
+                            unsigned Offset, EVT LocVT,
                             LocInfo HTP) {
     CCValAssign Ret;
     Ret.ValNo = ValNo;
@@ -95,9 +97,9 @@ public:
     Ret.LocVT = LocVT;
     return Ret;
   }
-  
-  static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT,
-                                  unsigned Offset, MVT LocVT,
+
+  static CCValAssign getCustomMem(unsigned ValNo, EVT ValVT,
+                                  unsigned Offset, EVT LocVT,
                                   LocInfo HTP) {
     CCValAssign Ret;
     Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP);
@@ -106,57 +108,63 @@ public:
   }
 
   unsigned getValNo() const { return ValNo; }
-  MVT getValVT() const { return ValVT; }
+  EVT getValVT() const { return ValVT; }
 
   bool isRegLoc() const { return !isMem; }
   bool isMemLoc() const { return isMem; }
-  
+
   bool needsCustom() const { return isCustom; }
 
   unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
   unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
-  MVT getLocVT() const { return LocVT; }
-  
+  EVT getLocVT() const { return LocVT; }
+
   LocInfo getLocInfo() const { return HTP; }
+  bool isExtInLoc() const {
+    return (HTP == AExt || HTP == SExt || HTP == ZExt);
+  }
+
 };
 
 /// CCAssignFn - This function assigns a location for Val, updating State to
 /// reflect the change.
-typedef bool CCAssignFn(unsigned ValNo, MVT ValVT,
-                        MVT LocVT, CCValAssign::LocInfo LocInfo,
+typedef bool CCAssignFn(unsigned ValNo, EVT ValVT,
+                        EVT LocVT, CCValAssign::LocInfo LocInfo,
                         ISD::ArgFlagsTy ArgFlags, CCState &State);
 
 /// CCCustomFn - This function assigns a location for Val, possibly updating
 /// all args to reflect changes and indicates if it handled it. It must set
 /// isCustom if it handles the arg and returns true.
-typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT,
-                        MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+typedef bool CCCustomFn(unsigned &ValNo, EVT &ValVT,
+                        EVT &LocVT, CCValAssign::LocInfo &LocInfo,
                         ISD::ArgFlagsTy &ArgFlags, CCState &State);
 
 /// CCState - This class holds information needed while lowering arguments and
 /// return values.  It captures which registers are already assigned and which
 /// stack slots are used.  It provides accessors to allocate these values.
 class CCState {
-  unsigned CallingConv;
+  CallingConv::ID CallingConv;
   bool IsVarArg;
   const TargetMachine &TM;
   const TargetRegisterInfo &TRI;
   SmallVector<CCValAssign, 16> &Locs;
-  
+  LLVMContext &Context;
+
   unsigned StackOffset;
   SmallVector<uint32_t, 16> UsedRegs;
 public:
-  CCState(unsigned CC, bool isVarArg, const TargetMachine &TM,
-          SmallVector<CCValAssign, 16> &locs);
-  
+  CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+          SmallVector<CCValAssign, 16> &locs, LLVMContext &C);
+
   void addLoc(const CCValAssign &V) {
     Locs.push_back(V);
   }
-  
+
+  LLVMContext &getContext() const { return Context; }
   const TargetMachine &getTarget() const { return TM; }
-  unsigned getCallingConv() const { return CallingConv; }
+  CallingConv::ID getCallingConv() const { return CallingConv; }
   bool isVarArg() const { return IsVarArg; }
-  
+
   unsigned getNextStackOffset() const { return StackOffset; }
 
   /// isAllocated - Return true if the specified register (or an alias) is
@@ -164,32 +172,36 @@ public:
   bool isAllocated(unsigned Reg) const {
     return UsedRegs[Reg/32] & (1 << (Reg&31));
   }
-  
-  /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+
+  /// AnalyzeFormalArguments - Analyze an array of argument values,
   /// incorporating info about the formals into this state.
-  void AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn);
-  
-  /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn);
+
+  /// AnalyzeReturn - Analyze the returned values of a return,
   /// incorporating info about the result values into this state.
-  void AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn);
-  
-  /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
-  /// about the passed values into this state.
-  void AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn);
+  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     CCAssignFn Fn);
+
+  /// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn);
 
   /// AnalyzeCallOperands - Same as above except it takes vectors of types
   /// and argument flags.
-  void AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+  void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
                            SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                            CCAssignFn Fn);
 
-  /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+  /// AnalyzeCallResult - Analyze the return values of a call,
   /// incorporating info about the passed values into this state.
-  void AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn);
-  
+  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                         CCAssignFn Fn);
+
   /// AnalyzeCallResult - Same as above except it's specialized for calls which
   /// produce a single value.
-  void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
+  void AnalyzeCallResult(EVT VT, CCAssignFn Fn);
 
   /// getFirstUnallocated - Return the first unallocated register in the set, or
   /// NumRegs if they are all allocated.
@@ -199,7 +211,7 @@ public:
         return i;
     return NumRegs;
   }
-  
+
   /// AllocateReg - Attempt to allocate one register.  If it is not available,
   /// return zero.  Otherwise, return the register, marking it and any aliases
   /// as allocated.
@@ -258,8 +270,8 @@ public:
   // HandleByVal - Allocate a stack slot large enough to pass an argument by
   // value. The size and alignment information of the argument is encoded in its
   // parameter attribute.
-  void HandleByVal(unsigned ValNo, MVT ValVT,
-                   MVT LocVT, CCValAssign::LocInfo LocInfo,
+  void HandleByVal(unsigned ValNo, EVT ValVT,
+                   EVT LocVT, CCValAssign::LocInfo LocInfo,
                    int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
 
 private:
diff --git a/include/llvm/CodeGen/DwarfWriter.h b/include/llvm/CodeGen/DwarfWriter.h
index facd5f6e6a5f..e7a2f664eb4c 100644
--- a/include/llvm/CodeGen/DwarfWriter.h
+++ b/include/llvm/CodeGen/DwarfWriter.h
@@ -33,8 +33,8 @@ class MachineFunction;
 class MachineInstr;
 class Value;
 class Module;
-class GlobalVariable;
-class TargetAsmInfo;
+class MDNode;
+class MCAsmInfo;
 class raw_ostream;
 class Instruction;
 class DICompileUnit;
@@ -68,7 +68,7 @@ public:
   /// BeginModule - Emit all Dwarf sections that should come prior to the
   /// content.
   void BeginModule(Module *M, MachineModuleInfo *MMI, raw_ostream &OS,
-                   AsmPrinter *A, const TargetAsmInfo *T);
+                   AsmPrinter *A, const MCAsmInfo *T);
   
   /// EndModule - Emit all Dwarf sections that should come after the content.
   ///
@@ -85,21 +85,20 @@ public:
   /// RecordSourceLine - Register a source line with debug info. Returns a
   /// unique label ID used to generate a label and provide correspondence to
   /// the source line list.
-  unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU);
+  unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
 
   /// RecordRegionStart - Indicate the start of a region.
-  unsigned RecordRegionStart(GlobalVariable *V);
+  unsigned RecordRegionStart(MDNode *N);
 
   /// RecordRegionEnd - Indicate the end of a region.
-  unsigned RecordRegionEnd(GlobalVariable *V);
+  unsigned RecordRegionEnd(MDNode *N);
 
   /// getRecordSourceLineCount - Count source lines.
   unsigned getRecordSourceLineCount();
 
   /// RecordVariable - Indicate the declaration of  a local variable.
   ///
-  void RecordVariable(GlobalVariable *GV, unsigned FrameIndex, 
-                      const MachineInstr *MI);
+  void RecordVariable(MDNode *N, unsigned FrameIndex);
 
   /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
   /// be emitted.
@@ -111,13 +110,10 @@ public:
 
   /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
   unsigned RecordInlinedFnEnd(DISubprogram SP);
-
-  /// RecordVariableScope - Record scope for the variable declared by
-  /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE.
-  void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI);
+  void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L);
+  void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L);
 };
 
-
 } // end llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index c7b1a42d06b6..1efd1e08acfa 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -91,7 +91,7 @@ public:
   ///
   bool SelectInstruction(Instruction *I);
 
-  /// SelectInstruction - Do "fast" instruction selection for the given
+  /// SelectOperator - Do "fast" instruction selection for the given
   /// LLVM IR operator (Instruction or ConstantExpr), and append
   /// generated machine instructions to the current block. Return true
   /// if selection was successful.
@@ -137,24 +137,24 @@ protected:
   /// FastEmit_r - This method is called by target-independent code
   /// to request that an instruction with the given type and opcode
   /// be emitted.
-  virtual unsigned FastEmit_(MVT::SimpleValueType VT,
-                             MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_(MVT VT,
+                             MVT RetVT,
                              ISD::NodeType Opcode);
 
   /// FastEmit_r - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
   /// register operand be emitted.
   ///
-  virtual unsigned FastEmit_r(MVT::SimpleValueType VT,
-                              MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_r(MVT VT,
+                              MVT RetVT,
                               ISD::NodeType Opcode, unsigned Op0);
 
   /// FastEmit_rr - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
   /// register operands be emitted.
   ///
-  virtual unsigned FastEmit_rr(MVT::SimpleValueType VT,
-                               MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_rr(MVT VT,
+                               MVT RetVT,
                                ISD::NodeType Opcode,
                                unsigned Op0, unsigned Op1);
 
@@ -162,8 +162,8 @@ protected:
   /// to request that an instruction with the given type, opcode, and
   /// register and immediate operands be emitted.
   ///
-  virtual unsigned FastEmit_ri(MVT::SimpleValueType VT,
-                               MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_ri(MVT VT,
+                               MVT RetVT,
                                ISD::NodeType Opcode,
                                unsigned Op0, uint64_t Imm);
 
@@ -171,8 +171,8 @@ protected:
   /// to request that an instruction with the given type, opcode, and
   /// register and floating-point immediate operands be emitted.
   ///
-  virtual unsigned FastEmit_rf(MVT::SimpleValueType VT,
-                               MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_rf(MVT VT,
+                               MVT RetVT,
                                ISD::NodeType Opcode,
                                unsigned Op0, ConstantFP *FPImm);
 
@@ -180,8 +180,8 @@ protected:
   /// to request that an instruction with the given type, opcode, and
   /// register and immediate operands be emitted.
   ///
-  virtual unsigned FastEmit_rri(MVT::SimpleValueType VT,
-                                MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_rri(MVT VT,
+                                MVT RetVT,
                                 ISD::NodeType Opcode,
                                 unsigned Op0, unsigned Op1, uint64_t Imm);
 
@@ -189,33 +189,33 @@ protected:
   /// to emit an instruction with an immediate operand using FastEmit_ri.
   /// If that fails, it materializes the immediate into a register and try
   /// FastEmit_rr instead.
-  unsigned FastEmit_ri_(MVT::SimpleValueType VT,
+  unsigned FastEmit_ri_(MVT VT,
                         ISD::NodeType Opcode,
                         unsigned Op0, uint64_t Imm,
-                        MVT::SimpleValueType ImmType);
+                        MVT ImmType);
   
   /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries
   /// to emit an instruction with an immediate operand using FastEmit_rf.
   /// If that fails, it materializes the immediate into a register and try
   /// FastEmit_rr instead.
-  unsigned FastEmit_rf_(MVT::SimpleValueType VT,
+  unsigned FastEmit_rf_(MVT VT,
                         ISD::NodeType Opcode,
                         unsigned Op0, ConstantFP *FPImm,
-                        MVT::SimpleValueType ImmType);
+                        MVT ImmType);
   
   /// FastEmit_i - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
   /// immediate operand be emitted.
-  virtual unsigned FastEmit_i(MVT::SimpleValueType VT,
-                              MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_i(MVT VT,
+                              MVT RetVT,
                               ISD::NodeType Opcode,
                               uint64_t Imm);
 
   /// FastEmit_f - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
   /// floating-point immediate operand be emitted.
-  virtual unsigned FastEmit_f(MVT::SimpleValueType VT,
-                              MVT::SimpleValueType RetVT,
+  virtual unsigned FastEmit_f(MVT VT,
+                              MVT RetVT,
                               ISD::NodeType Opcode,
                               ConstantFP *FPImm);
 
@@ -268,12 +268,12 @@ protected:
 
   /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg
   /// from a specified index of a superregister to a specified type.
-  unsigned FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
+  unsigned FastEmitInst_extractsubreg(MVT RetVT,
                                       unsigned Op0, uint32_t Idx);
 
   /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
   /// with all but the least significant bit set to zero.
-  unsigned FastEmitZExtFromI1(MVT::SimpleValueType VT,
+  unsigned FastEmitZExtFromI1(MVT VT,
                               unsigned Op);
 
   /// FastEmitBranch - Emit an unconditional branch to the given block,
@@ -300,6 +300,8 @@ protected:
 private:
   bool SelectBinaryOp(User *I, ISD::NodeType ISDOpcode);
 
+  bool SelectFNeg(User *I);
+
   bool SelectGetElementPtr(User *I);
 
   bool SelectCall(User *I);
diff --git a/include/llvm/CodeGen/FileWriters.h b/include/llvm/CodeGen/FileWriters.h
index b3781e0c6012..a913d21eb206 100644
--- a/include/llvm/CodeGen/FileWriters.h
+++ b/include/llvm/CodeGen/FileWriters.h
@@ -17,14 +17,14 @@
 namespace llvm {
 
   class PassManagerBase;
-  class MachineCodeEmitter;
+  class ObjectCodeEmitter;
   class TargetMachine;
   class raw_ostream;
 
-  MachineCodeEmitter *AddELFWriter(PassManagerBase &FPM, raw_ostream &O,
-                                   TargetMachine &TM);
-  MachineCodeEmitter *AddMachOWriter(PassManagerBase &FPM, raw_ostream &O,
-                                     TargetMachine &TM);
+  ObjectCodeEmitter *AddELFWriter(PassManagerBase &FPM, raw_ostream &O,
+                                  TargetMachine &TM);
+  ObjectCodeEmitter *AddMachOWriter(PassManagerBase &FPM, raw_ostream &O,
+                                    TargetMachine &TM);
 
 } // end llvm namespace
 
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index e94aba388a4c..04fd8bed9724 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -42,7 +42,7 @@ namespace llvm {
   class AsmPrinter;
   class GCStrategy;
   class Constant;
-  class TargetAsmInfo;
+  class MCAsmInfo;
   
   
   namespace GC {
diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h
index b693b1bbfc7c..ff1a205adbfd 100644
--- a/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -63,10 +63,10 @@ namespace llvm {
     
     /// beginAssembly/finishAssembly - Emit module metadata as assembly code.
     virtual void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                               const TargetAsmInfo &TAI);
+                               const MCAsmInfo &MAI);
     
     virtual void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                const TargetAsmInfo &TAI);
+                                const MCAsmInfo &MAI);
     
     virtual ~GCMetadataPrinter();
   };
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 73197af1afdc..180783a4d624 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 
 using namespace std;
@@ -162,17 +162,26 @@ public:
   /// alignment (saturated to BufferEnd of course).
   void emitAlignment(unsigned Alignment) {
     if (Alignment == 0) Alignment = 1;
+    uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr,
+                                                   Alignment);
+    CurBufferPtr = std::min(NewPtr, BufferEnd);
+  }
 
-    if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
-      // Move the current buffer ptr up to the specified alignment.
-      CurBufferPtr =
-        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
-                   ~(uintptr_t)(Alignment-1));
-    } else {
+  /// emitAlignmentWithFill - Similar to emitAlignment, except that the
+  /// extra bytes are filled with the provided byte.
+  void emitAlignmentWithFill(unsigned Alignment, uint8_t Fill) {
+    if (Alignment == 0) Alignment = 1;
+    uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr,
+                                                   Alignment);
+    // Fail if we don't have room.
+    if (NewPtr > BufferEnd) {
       CurBufferPtr = BufferEnd;
+      return;
+    }
+    while (CurBufferPtr < NewPtr) {
+      *CurBufferPtr++ = Fill;
     }
   }
-  
 
   /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
   /// written to the output stream.
@@ -267,6 +276,11 @@ public:
     return Result;
   }
 
+  /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
+  /// this method does not allocate memory in the current output buffer,
+  /// because a global may live longer than the current function.
+  virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0;
+
   /// StartMachineBasicBlock - This should be called by the target when a new
   /// basic block is about to be emitted.  This way the MCE knows where the
   /// start of the block is, and can implement getMachineBasicBlockAddress.
@@ -285,6 +299,13 @@ public:
     return CurBufferPtr-BufferBegin;
   }
 
+  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
+  /// during code emission time. The JIT is capable of doing this because it
+  /// creates jump tables or constant pools in memory on the fly while the
+  /// object code emitters rely on a linker to have real addresses and should
+  /// use relocations instead.
+  bool earlyResolveAddresses() const { return true; }
+
   /// addRelocation - Whenever a relocatable address is needed, it should be
   /// noted with this interface.
   virtual void addRelocation(const MachineRelocation &MR) = 0;
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index a231f49d81ec..4d2d0eec52db 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -32,9 +32,7 @@ namespace {
 
       (void) llvm::createDeadMachineInstructionElimPass();
 
-      (void) llvm::createSimpleRegisterAllocator();
       (void) llvm::createLocalRegisterAllocator();
-      (void) llvm::createBigBlockRegisterAllocator();
       (void) llvm::createLinearScanRegisterAllocator();
       (void) llvm::createPBQPRegisterAllocator();
 
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 0cb7e9004387..05bd173dd2ea 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -21,9 +21,10 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_H
 #define LLVM_CODEGEN_LIVEINTERVAL_H
 
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
-#include <iosfwd>
+#include "llvm/Support/AlignOf.h"
 #include <cassert>
 #include <climits>
 
@@ -31,7 +32,210 @@ namespace llvm {
   class MachineInstr;
   class MachineRegisterInfo;
   class TargetRegisterInfo;
-  struct LiveInterval;
+  class raw_ostream;
+  
+  /// LiveIndex - An opaque wrapper around machine indexes.
+  class LiveIndex {
+    friend class VNInfo;
+    friend class LiveInterval;
+    friend class LiveIntervals;
+    friend struct DenseMapInfo<LiveIndex>;
+
+  public:
+
+    enum Slot { LOAD, USE, DEF, STORE, NUM };
+
+  private:
+
+    unsigned index;
+
+    static const unsigned PHI_BIT = 1 << 31;
+
+  public:
+
+    /// Construct a default LiveIndex pointing to a reserved index.
+    LiveIndex() : index(0) {}
+
+    /// Construct an index from the given index, pointing to the given slot.
+    LiveIndex(LiveIndex m, Slot s)
+      : index((m.index / NUM) * NUM + s) {} 
+    
+    /// Print this index to the given raw_ostream.
+    void print(raw_ostream &os) const;
+
+    /// Compare two LiveIndex objects for equality.
+    bool operator==(LiveIndex other) const {
+      return ((index & ~PHI_BIT) == (other.index & ~PHI_BIT));
+    }
+    /// Compare two LiveIndex objects for inequality.
+    bool operator!=(LiveIndex other) const {
+      return ((index & ~PHI_BIT) != (other.index & ~PHI_BIT));
+    }
+   
+    /// Compare two LiveIndex objects. Return true if the first index
+    /// is strictly lower than the second.
+    bool operator<(LiveIndex other) const {
+      return ((index & ~PHI_BIT) < (other.index & ~PHI_BIT));
+    }
+    /// Compare two LiveIndex objects. Return true if the first index
+    /// is lower than, or equal to, the second.
+    bool operator<=(LiveIndex other) const {
+      return ((index & ~PHI_BIT) <= (other.index & ~PHI_BIT));
+    }
+
+    /// Compare two LiveIndex objects. Return true if the first index
+    /// is greater than the second.
+    bool operator>(LiveIndex other) const {
+      return ((index & ~PHI_BIT) > (other.index & ~PHI_BIT));
+    }
+
+    /// Compare two LiveIndex objects. Return true if the first index
+    /// is greater than, or equal to, the second.
+    bool operator>=(LiveIndex other) const {
+      return ((index & ~PHI_BIT) >= (other.index & ~PHI_BIT));
+    }
+
+    /// Returns true if this index represents a load.
+    bool isLoad() const {
+      return ((index % NUM) == LOAD);
+    }
+
+    /// Returns true if this index represents a use.
+    bool isUse() const {
+      return ((index % NUM) == USE);
+    }
+
+    /// Returns true if this index represents a def.
+    bool isDef() const {
+      return ((index % NUM) == DEF);
+    }
+
+    /// Returns true if this index represents a store.
+    bool isStore() const {
+      return ((index % NUM) == STORE);
+    }
+
+    /// Returns the slot for this LiveIndex.
+    Slot getSlot() const {
+      return static_cast<Slot>(index % NUM);
+    }
+
+    /// Returns true if this index represents a non-PHI use/def.
+    bool isNonPHIIndex() const {
+      return ((index & PHI_BIT) == 0);
+    }
+
+    /// Returns true if this index represents a PHI use/def.
+    bool isPHIIndex() const {
+      return ((index & PHI_BIT) == PHI_BIT);
+    }
+
+  private:
+
+    /// Construct an index from the given index, with its PHI kill marker set.
+    LiveIndex(bool phi, LiveIndex o) : index(o.index) {
+      if (phi)
+        index |= PHI_BIT;
+      else
+        index &= ~PHI_BIT;
+    }
+
+    explicit LiveIndex(unsigned idx)
+      : index(idx & ~PHI_BIT) {}
+
+    LiveIndex(bool phi, unsigned idx)
+      : index(idx & ~PHI_BIT) {
+      if (phi)
+        index |= PHI_BIT;
+    }
+
+    LiveIndex(bool phi, unsigned idx, Slot slot)
+      : index(((idx / NUM) * NUM + slot) & ~PHI_BIT) {
+      if (phi)
+        index |= PHI_BIT;
+    }
+    
+    LiveIndex nextSlot_() const {
+      assert((index & PHI_BIT) == ((index + 1) & PHI_BIT) &&
+             "Index out of bounds.");
+      return LiveIndex(index + 1);
+    }
+
+    LiveIndex nextIndex_() const {
+      assert((index & PHI_BIT) == ((index + NUM) & PHI_BIT) &&
+             "Index out of bounds.");
+      return LiveIndex(index + NUM);
+    }
+
+    LiveIndex prevSlot_() const {
+      assert((index & PHI_BIT) == ((index - 1) & PHI_BIT) &&
+             "Index out of bounds.");
+      return LiveIndex(index - 1);
+    }
+
+    LiveIndex prevIndex_() const {
+      assert((index & PHI_BIT) == ((index - NUM) & PHI_BIT) &&
+             "Index out of bounds.");
+      return LiveIndex(index - NUM);
+    }
+
+    int distance(LiveIndex other) const {
+      return (other.index & ~PHI_BIT) - (index & ~PHI_BIT);
+    }
+
+    /// Returns an unsigned number suitable as an index into a
+    /// vector over all instructions.
+    unsigned getVecIndex() const {
+      return (index & ~PHI_BIT) / NUM;
+    }
+
+    /// Scale this index by the given factor.
+    LiveIndex scale(unsigned factor) const {
+      unsigned i = (index & ~PHI_BIT) / NUM,
+               o = (index % ~PHI_BIT) % NUM;
+      assert(index <= (~0U & ~PHI_BIT) / (factor * NUM) &&
+             "Rescaled interval would overflow");
+      return LiveIndex(i * NUM * factor, o);
+    }
+
+    static LiveIndex emptyKey() {
+      return LiveIndex(true, 0x7fffffff);
+    }
+
+    static LiveIndex tombstoneKey() {
+      return LiveIndex(true, 0x7ffffffe);
+    }
+
+    static unsigned getHashValue(const LiveIndex &v) {
+      return v.index * 37;
+    }
+
+  };
+
+  inline raw_ostream& operator<<(raw_ostream &os, LiveIndex mi) {
+    mi.print(os);
+    return os;
+  }
+
+  /// Densemap specialization for LiveIndex.
+  template <>
+  struct DenseMapInfo<LiveIndex> {
+    static inline LiveIndex getEmptyKey() {
+      return LiveIndex::emptyKey();
+    }
+    static inline LiveIndex getTombstoneKey() {
+      return LiveIndex::tombstoneKey();
+    }
+    static inline unsigned getHashValue(const LiveIndex &v) {
+      return LiveIndex::getHashValue(v);
+    }
+    static inline bool isEqual(const LiveIndex &LHS,
+                               const LiveIndex &RHS) {
+      return (LHS == RHS);
+    }
+    static inline bool isPod() { return true; }
+  };
+
 
   /// VNInfo - Value Number Information.
   /// This class holds information about a machine level values, including
@@ -48,7 +252,6 @@ namespace llvm {
   /// index of the MBB in which the PHI originally existed. This can be used
   /// to insert code (spills or copies) which deals with the value, which will
   /// be live in to the block.
-
   class VNInfo {
   private:
     enum {
@@ -60,36 +263,70 @@ namespace llvm {
     };
 
     unsigned char flags;
+    union {
+      MachineInstr *copy;
+      unsigned reg;
+    } cr;
 
   public:
+
+    typedef SmallVector<LiveIndex, 4> KillSet;
+
     /// The ID number of this value.
     unsigned id;
     
     /// The index of the defining instruction (if isDefAccurate() returns true).
-    unsigned def;
-    MachineInstr *copy;
-    SmallVector<unsigned, 4> kills;
+    LiveIndex def;
+
+    KillSet kills;
 
     VNInfo()
-      : flags(IS_UNUSED), id(~1U), def(0), copy(0) {}
+      : flags(IS_UNUSED), id(~1U) { cr.copy = 0; }
 
     /// VNInfo constructor.
     /// d is presumed to point to the actual defining instr. If it doesn't
     /// setIsDefAccurate(false) should be called after construction.
-    VNInfo(unsigned i, unsigned d, MachineInstr *c)
-      : flags(IS_DEF_ACCURATE), id(i), def(d), copy(c) {}
+    VNInfo(unsigned i, LiveIndex d, MachineInstr *c)
+      : flags(IS_DEF_ACCURATE), id(i), def(d) { cr.copy = c; }
 
     /// VNInfo construtor, copies values from orig, except for the value number.
     VNInfo(unsigned i, const VNInfo &orig)
-      : flags(orig.flags), id(i), def(orig.def), copy(orig.copy),
-        kills(orig.kills) {}
+      : flags(orig.flags), cr(orig.cr), id(i), def(orig.def), kills(orig.kills)
+    { }
+
+    /// Copy from the parameter into this VNInfo.
+    void copyFrom(VNInfo &src) {
+      flags = src.flags;
+      cr = src.cr;
+      def = src.def;
+      kills = src.kills;
+    }
 
     /// Used for copying value number info.
     unsigned getFlags() const { return flags; }
     void setFlags(unsigned flags) { this->flags = flags; }
 
+    /// For a register interval, if this VN was definied by a copy instr
+    /// getCopy() returns a pointer to it, otherwise returns 0.
+    /// For a stack interval the behaviour of this method is undefined.
+    MachineInstr* getCopy() const { return cr.copy; }
+    /// For a register interval, set the copy member.
+    /// This method should not be called on stack intervals as it may lead to
+    /// undefined behavior.
+    void setCopy(MachineInstr *c) { cr.copy = c; }
+    
+    /// For a stack interval, returns the reg which this stack interval was
+    /// defined from.
+    /// For a register interval the behaviour of this method is undefined. 
+    unsigned getReg() const { return cr.reg; }
+    /// For a stack interval, set the defining register.
+    /// This method should not be called on register intervals as it may lead
+    /// to undefined behaviour.
+    void setReg(unsigned reg) { cr.reg = reg; }
+
     /// Returns true if one or more kills are PHI nodes.
     bool hasPHIKill() const { return flags & HAS_PHI_KILL; }
+    /// Set the PHI kill flag on this value.
     void setHasPHIKill(bool hasKill) {
       if (hasKill)
         flags |= HAS_PHI_KILL;
@@ -100,16 +337,18 @@ namespace llvm {
     /// Returns true if this value is re-defined by an early clobber somewhere
     /// during the live range.
     bool hasRedefByEC() const { return flags & REDEF_BY_EC; }
+    /// Set the "redef by early clobber" flag on this value.
     void setHasRedefByEC(bool hasRedef) {
       if (hasRedef)
         flags |= REDEF_BY_EC;
       else
         flags &= ~REDEF_BY_EC;
     }
-  
+   
     /// Returns true if this value is defined by a PHI instruction (or was,
     /// PHI instrucions may have been eliminated).
     bool isPHIDef() const { return flags & IS_PHI_DEF; }
+    /// Set the "phi def" flag on this value.
     void setIsPHIDef(bool phiDef) {
       if (phiDef)
         flags |= IS_PHI_DEF;
@@ -119,6 +358,7 @@ namespace llvm {
 
     /// Returns true if this value is unused.
     bool isUnused() const { return flags & IS_UNUSED; }
+    /// Set the "is unused" flag on this value.
     void setIsUnused(bool unused) {
       if (unused)
         flags |= IS_UNUSED;
@@ -128,6 +368,7 @@ namespace llvm {
 
     /// Returns true if the def is accurate.
     bool isDefAccurate() const { return flags & IS_DEF_ACCURATE; }
+    /// Set the "is def accurate" flag on this value.
     void setIsDefAccurate(bool defAccurate) {
       if (defAccurate)
         flags |= IS_DEF_ACCURATE;
@@ -135,26 +376,74 @@ namespace llvm {
         flags &= ~IS_DEF_ACCURATE;
     }
 
+    /// Returns true if the given index is a kill of this value.
+    bool isKill(LiveIndex k) const {
+      KillSet::const_iterator
+        i = std::lower_bound(kills.begin(), kills.end(), k);
+      return (i != kills.end() && *i == k);
+    }
+
+    /// addKill - Add a kill instruction index to the specified value
+    /// number.
+    void addKill(LiveIndex k) {
+      if (kills.empty()) {
+        kills.push_back(k);
+      } else {
+        KillSet::iterator
+          i = std::lower_bound(kills.begin(), kills.end(), k);
+        kills.insert(i, k);
+      }
+    }
+
+    /// Remove the specified kill index from this value's kills list.
+    /// Returns true if the value was present, otherwise returns false.
+    bool removeKill(LiveIndex k) {
+      KillSet::iterator i = std::lower_bound(kills.begin(), kills.end(), k);
+      if (i != kills.end() && *i == k) {
+        kills.erase(i);
+        return true;
+      }
+      return false;
+    }
+
+    /// Remove all kills in the range [s, e).
+    void removeKills(LiveIndex s, LiveIndex e) {
+      KillSet::iterator
+        si = std::lower_bound(kills.begin(), kills.end(), s),
+        se = std::upper_bound(kills.begin(), kills.end(), e);
+
+      kills.erase(si, se);
+    }
+
   };
 
   /// LiveRange structure - This represents a simple register range in the
   /// program, with an inclusive start point and an exclusive end point.
   /// These ranges are rendered as [start,end).
   struct LiveRange {
-    unsigned start;  // Start point of the interval (inclusive)
-    unsigned end;    // End point of the interval (exclusive)
+    LiveIndex start;  // Start point of the interval (inclusive)
+    LiveIndex end;    // End point of the interval (exclusive)
     VNInfo *valno;   // identifier for the value contained in this interval.
 
-    LiveRange(unsigned S, unsigned E, VNInfo *V) : start(S), end(E), valno(V) {
+    LiveRange(LiveIndex S, LiveIndex E, VNInfo *V)
+      : start(S), end(E), valno(V) {
+
       assert(S < E && "Cannot create empty or backwards range");
     }
 
     /// contains - Return true if the index is covered by this range.
     ///
-    bool contains(unsigned I) const {
+    bool contains(LiveIndex I) const {
       return start <= I && I < end;
     }
 
+    /// containsRange - Return true if the given range, [S, E), is covered by
+    /// this range. 
+    bool containsRange(LiveIndex S, LiveIndex E) const {
+      assert((S < E) && "Backwards interval?");
+      return (start <= S && S < end) && (start < E && E <= end);
+    }
+
     bool operator<(const LiveRange &LR) const {
       return start < LR.start || (start == LR.start && end < LR.end);
     }
@@ -163,28 +452,29 @@ namespace llvm {
     }
 
     void dump() const;
-    void print(std::ostream &os) const;
-    void print(std::ostream *os) const { if (os) print(*os); }
+    void print(raw_ostream &os) const;
 
   private:
     LiveRange(); // DO NOT IMPLEMENT
   };
 
-  std::ostream& operator<<(std::ostream& os, const LiveRange &LR);
+  raw_ostream& operator<<(raw_ostream& os, const LiveRange &LR);
 
 
-  inline bool operator<(unsigned V, const LiveRange &LR) {
+  inline bool operator<(LiveIndex V, const LiveRange &LR) {
     return V < LR.start;
   }
 
-  inline bool operator<(const LiveRange &LR, unsigned V) {
+  inline bool operator<(const LiveRange &LR, LiveIndex V) {
     return LR.start < V;
   }
 
   /// LiveInterval - This class represents some number of live ranges for a
   /// register or value.  This class also contains a bit of register allocator
   /// state.
-  struct LiveInterval {
+  class LiveInterval {
+  public:
+
     typedef SmallVector<LiveRange,4> Ranges;
     typedef SmallVector<VNInfo*,4> VNInfoList;
 
@@ -193,8 +483,6 @@ namespace llvm {
     float weight;        // weight of this interval
     Ranges ranges;       // the ranges in which this register is live
     VNInfoList valnos;   // value#'s
-
-  public:
     
     struct InstrSlots {
       enum {
@@ -205,14 +493,6 @@ namespace llvm {
         NUM   = 4
       };
 
-      static unsigned scale(unsigned slot, unsigned factor) {
-        unsigned index = slot / NUM,
-                 offset = slot % NUM;
-        assert(index <= ~0U / (factor * NUM) &&
-               "Rescaled interval would overflow");
-        return index * NUM * factor + offset;
-      }
-
     };
 
     LiveInterval(unsigned Reg, float Weight, bool IsSS = false)
@@ -242,8 +522,8 @@ namespace llvm {
     /// end of the interval.  If no LiveRange contains this position, but the
     /// position is in a hole, this method returns an iterator pointing the the
     /// LiveRange immediately after the hole.
-    iterator advanceTo(iterator I, unsigned Pos) {
-      if (Pos >= endNumber())
+    iterator advanceTo(iterator I, LiveIndex Pos) {
+      if (Pos >= endIndex())
         return end();
       while (I->end <= Pos) ++I;
       return I;
@@ -286,33 +566,15 @@ namespace llvm {
     inline const VNInfo *getValNumInfo(unsigned ValNo) const {
       return valnos[ValNo];
     }
-    
-    /// copyValNumInfo - Copy the value number info for one value number to
-    /// another.
-    void copyValNumInfo(VNInfo *DstValNo, const VNInfo *SrcValNo) {
-      DstValNo->def = SrcValNo->def;
-      DstValNo->copy = SrcValNo->copy;
-      DstValNo->setFlags(SrcValNo->getFlags());
-      DstValNo->kills = SrcValNo->kills;
-    }
 
     /// getNextValue - Create a new value number and return it.  MIIdx specifies
     /// the instruction that defines the value number.
-    VNInfo *getNextValue(unsigned MIIdx, MachineInstr *CopyMI,
-                         bool isDefAccurate, BumpPtrAllocator &VNInfoAllocator) {
-
-      assert(MIIdx != ~0u && MIIdx != ~1u &&
-             "PHI def / unused flags should now be passed explicitly.");
-#ifdef __GNUC__
-      unsigned Alignment = (unsigned)__alignof__(VNInfo);
-#else
-      // FIXME: ugly.
-      unsigned Alignment = 8;
-#endif
+    VNInfo *getNextValue(LiveIndex def, MachineInstr *CopyMI,
+                         bool isDefAccurate, BumpPtrAllocator &VNInfoAllocator){
       VNInfo *VNI =
         static_cast<VNInfo*>(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo),
-                                                      Alignment));
-      new (VNI) VNInfo((unsigned)valnos.size(), MIIdx, CopyMI);
+                                                      alignof<VNInfo>()));
+      new (VNI) VNInfo((unsigned)valnos.size(), def, CopyMI);
       VNI->setIsDefAccurate(isDefAccurate);
       valnos.push_back(VNI);
       return VNI;
@@ -320,86 +582,31 @@ namespace llvm {
 
     /// Create a copy of the given value. The new value will be identical except
     /// for the Value number.
-    VNInfo *createValueCopy(const VNInfo *orig, BumpPtrAllocator &VNInfoAllocator) {
-
-#ifdef __GNUC__
-      unsigned Alignment = (unsigned)__alignof__(VNInfo);
-#else
-      // FIXME: ugly.
-      unsigned Alignment = 8;
-#endif
+    VNInfo *createValueCopy(const VNInfo *orig,
+                            BumpPtrAllocator &VNInfoAllocator) {
       VNInfo *VNI =
         static_cast<VNInfo*>(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo),
-                                                      Alignment));
+                                                      alignof<VNInfo>()));
     
       new (VNI) VNInfo((unsigned)valnos.size(), *orig);
       valnos.push_back(VNI);
       return VNI;
     }
 
-    /// addKill - Add a kill instruction index to the specified value
-    /// number.
-    static void addKill(VNInfo *VNI, unsigned KillIdx) {
-      SmallVector<unsigned, 4> &kills = VNI->kills;
-      if (kills.empty()) {
-        kills.push_back(KillIdx);
-      } else {
-        SmallVector<unsigned, 4>::iterator
-          I = std::lower_bound(kills.begin(), kills.end(), KillIdx);
-        kills.insert(I, KillIdx);
-      }
-    }
-
     /// addKills - Add a number of kills into the VNInfo kill vector. If this
     /// interval is live at a kill point, then the kill is not added.
-    void addKills(VNInfo *VNI, const SmallVector<unsigned, 4> &kills) {
+    void addKills(VNInfo *VNI, const VNInfo::KillSet &kills) {
       for (unsigned i = 0, e = static_cast<unsigned>(kills.size());
            i != e; ++i) {
-        unsigned KillIdx = kills[i];
-        if (!liveBeforeAndAt(KillIdx)) {
-          SmallVector<unsigned, 4>::iterator
-            I = std::lower_bound(VNI->kills.begin(), VNI->kills.end(), KillIdx);
-          VNI->kills.insert(I, KillIdx);
+        if (!liveBeforeAndAt(kills[i])) {
+          VNI->addKill(kills[i]);
         }
       }
     }
 
-    /// removeKill - Remove the specified kill from the list of kills of
-    /// the specified val#.
-    static bool removeKill(VNInfo *VNI, unsigned KillIdx) {
-      SmallVector<unsigned, 4> &kills = VNI->kills;
-      SmallVector<unsigned, 4>::iterator
-        I = std::lower_bound(kills.begin(), kills.end(), KillIdx);
-      if (I != kills.end() && *I == KillIdx) {
-        kills.erase(I);
-        return true;
-      }
-      return false;
-    }
-
-    /// removeKills - Remove all the kills in specified range
-    /// [Start, End] of the specified val#.
-    static void removeKills(VNInfo *VNI, unsigned Start, unsigned End) {
-      SmallVector<unsigned, 4> &kills = VNI->kills;
-      SmallVector<unsigned, 4>::iterator
-        I = std::lower_bound(kills.begin(), kills.end(), Start);
-      SmallVector<unsigned, 4>::iterator
-        E = std::upper_bound(kills.begin(), kills.end(), End);
-      kills.erase(I, E);
-    }
-
-    /// isKill - Return true if the specified index is a kill of the
-    /// specified val#.
-    static bool isKill(const VNInfo *VNI, unsigned KillIdx) {
-      const SmallVector<unsigned, 4> &kills = VNI->kills;
-      SmallVector<unsigned, 4>::const_iterator
-        I = std::lower_bound(kills.begin(), kills.end(), KillIdx);
-      return I != kills.end() && *I == KillIdx;
-    }
-
     /// isOnlyLROfValNo - Return true if the specified live range is the only
     /// one defined by the its val#.
-    bool isOnlyLROfValNo( const LiveRange *LR) {
+    bool isOnlyLROfValNo(const LiveRange *LR) {
       for (const_iterator I = begin(), E = end(); I != E; ++I) {
         const LiveRange *Tmp = I;
         if (Tmp != LR && Tmp->valno == LR->valno)
@@ -423,7 +630,8 @@ namespace llvm {
 
     /// MergeInClobberRange - Same as MergeInClobberRanges except it merge in a
     /// single LiveRange only.
-    void MergeInClobberRange(unsigned Start, unsigned End,
+    void MergeInClobberRange(LiveIndex Start,
+                             LiveIndex End,
                              BumpPtrAllocator &VNInfoAllocator);
 
     /// MergeValueInAsValue - Merge all of the live ranges of a specific val#
@@ -448,51 +656,62 @@ namespace llvm {
     
     bool empty() const { return ranges.empty(); }
 
-    /// beginNumber - Return the lowest numbered slot covered by interval.
-    unsigned beginNumber() const {
+    /// beginIndex - Return the lowest numbered slot covered by interval.
+    LiveIndex beginIndex() const {
       if (empty())
-        return 0;
+        return LiveIndex();
       return ranges.front().start;
     }
 
     /// endNumber - return the maximum point of the interval of the whole,
     /// exclusive.
-    unsigned endNumber() const {
+    LiveIndex endIndex() const {
       if (empty())
-        return 0;
+        return LiveIndex();
       return ranges.back().end;
     }
 
-    bool expiredAt(unsigned index) const {
-      return index >= endNumber();
+    bool expiredAt(LiveIndex index) const {
+      return index >= endIndex();
     }
 
-    bool liveAt(unsigned index) const;
+    bool liveAt(LiveIndex index) const;
 
     // liveBeforeAndAt - Check if the interval is live at the index and the
     // index just before it. If index is liveAt, check if it starts a new live
     // range.If it does, then check if the previous live range ends at index-1.
-    bool liveBeforeAndAt(unsigned index) const;
+    bool liveBeforeAndAt(LiveIndex index) const;
 
     /// getLiveRangeContaining - Return the live range that contains the
     /// specified index, or null if there is none.
-    const LiveRange *getLiveRangeContaining(unsigned Idx) const {
+    const LiveRange *getLiveRangeContaining(LiveIndex Idx) const {
       const_iterator I = FindLiveRangeContaining(Idx);
       return I == end() ? 0 : &*I;
     }
 
+    /// getLiveRangeContaining - Return the live range that contains the
+    /// specified index, or null if there is none.
+    LiveRange *getLiveRangeContaining(LiveIndex Idx) {
+      iterator I = FindLiveRangeContaining(Idx);
+      return I == end() ? 0 : &*I;
+    }
+
     /// FindLiveRangeContaining - Return an iterator to the live range that
     /// contains the specified index, or end() if there is none.
-    const_iterator FindLiveRangeContaining(unsigned Idx) const;
+    const_iterator FindLiveRangeContaining(LiveIndex Idx) const;
 
     /// FindLiveRangeContaining - Return an iterator to the live range that
     /// contains the specified index, or end() if there is none.
-    iterator FindLiveRangeContaining(unsigned Idx);
+    iterator FindLiveRangeContaining(LiveIndex Idx);
+
+    /// findDefinedVNInfo - Find the by the specified
+    /// index (register interval) or defined 
+    VNInfo *findDefinedVNInfoForRegInt(LiveIndex Idx) const;
+
+    /// findDefinedVNInfo - Find the VNInfo that's defined by the specified
+    /// register (stack inteval only).
+    VNInfo *findDefinedVNInfoForStackInt(unsigned Reg) const;
 
-    /// findDefinedVNInfo - Find the VNInfo that's defined at the specified
-    /// index (register interval) or defined by the specified register (stack
-    /// inteval).
-    VNInfo *findDefinedVNInfo(unsigned DefIdxOrReg) const;
     
     /// overlaps - Return true if the intersection of the two live intervals is
     /// not empty.
@@ -502,7 +721,7 @@ namespace llvm {
 
     /// overlaps - Return true if the live interval overlaps a range specified
     /// by [Start, End).
-    bool overlaps(unsigned Start, unsigned End) const;
+    bool overlaps(LiveIndex Start, LiveIndex End) const;
 
     /// overlapsFrom - Return true if the intersection of the two live intervals
     /// is not empty.  The specified iterator is a hint that we can begin
@@ -526,11 +745,12 @@ namespace llvm {
 
     /// isInOneLiveRange - Return true if the range specified is entirely in the
     /// a single LiveRange of the live interval.
-    bool isInOneLiveRange(unsigned Start, unsigned End);
+    bool isInOneLiveRange(LiveIndex Start, LiveIndex End);
 
     /// removeRange - Remove the specified range from this interval.  Note that
     /// the range must be a single LiveRange in its entirety.
-    void removeRange(unsigned Start, unsigned End, bool RemoveDeadValNo = false);
+    void removeRange(LiveIndex Start, LiveIndex End,
+                     bool RemoveDeadValNo = false);
 
     void removeRange(LiveRange LR, bool RemoveDeadValNo = false) {
       removeRange(LR.start, LR.end, RemoveDeadValNo);
@@ -548,24 +768,30 @@ namespace llvm {
     ///
     unsigned getSize() const;
 
+    /// ComputeJoinedWeight - Set the weight of a live interval after
+    /// Other has been merged into it.
+    void ComputeJoinedWeight(const LiveInterval &Other);
+
     bool operator<(const LiveInterval& other) const {
-      return beginNumber() < other.beginNumber();
+      const LiveIndex &thisIndex = beginIndex();
+      const LiveIndex &otherIndex = other.beginIndex();
+      return (thisIndex < otherIndex ||
+              (thisIndex == otherIndex && reg < other.reg));
     }
 
-    void print(std::ostream &OS, const TargetRegisterInfo *TRI = 0) const;
-    void print(std::ostream *OS, const TargetRegisterInfo *TRI = 0) const {
-      if (OS) print(*OS, TRI);
-    }
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = 0) const;
     void dump() const;
 
   private:
+
     Ranges::iterator addRangeFrom(LiveRange LR, Ranges::iterator From);
-    void extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd);
-    Ranges::iterator extendIntervalStartTo(Ranges::iterator I, unsigned NewStr);
+    void extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd);
+    Ranges::iterator extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStr);
     LiveInterval& operator=(const LiveInterval& rhs); // DO NOT IMPLEMENT
+
   };
 
-  inline std::ostream &operator<<(std::ostream &OS, const LiveInterval &LI) {
+  inline raw_ostream &operator<<(raw_ostream &OS, const LiveInterval &LI) {
     LI.print(OS);
     return OS;
   }
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 7ae98bb093d6..511db6db10c9 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -20,6 +20,7 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/ADT/BitVector.h"
@@ -39,13 +40,13 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterClass;
   class VirtRegMap;
-  typedef std::pair<unsigned, MachineBasicBlock*> IdxMBBPair;
+  typedef std::pair<LiveIndex, MachineBasicBlock*> IdxMBBPair;
 
-  inline bool operator<(unsigned V, const IdxMBBPair &IM) {
+  inline bool operator<(LiveIndex V, const IdxMBBPair &IM) {
     return V < IM.first;
   }
 
-  inline bool operator<(const IdxMBBPair &IM, unsigned V) {
+  inline bool operator<(const IdxMBBPair &IM, LiveIndex V) {
     return IM.first < V;
   }
 
@@ -70,7 +71,7 @@ namespace llvm {
 
     /// MBB2IdxMap - The indexes of the first and last instructions in the
     /// specified basic block.
-    std::vector<std::pair<unsigned, unsigned> > MBB2IdxMap;
+    std::vector<std::pair<LiveIndex, LiveIndex> > MBB2IdxMap;
 
     /// Idx2MBBMap - Sorted list of pairs of index of first instruction
     /// and MBB id.
@@ -79,7 +80,7 @@ namespace llvm {
     /// FunctionSize - The number of instructions present in the function
     uint64_t FunctionSize;
 
-    typedef DenseMap<MachineInstr*, unsigned> Mi2IndexMap;
+    typedef DenseMap<const MachineInstr*, LiveIndex> Mi2IndexMap;
     Mi2IndexMap mi2iMap_;
 
     typedef std::vector<MachineInstr*> Index2MiMap;
@@ -88,9 +89,16 @@ namespace llvm {
     typedef DenseMap<unsigned, LiveInterval*> Reg2IntervalMap;
     Reg2IntervalMap r2iMap_;
 
+    DenseMap<MachineBasicBlock*, LiveIndex> terminatorGaps;
+
+    /// phiJoinCopies - Copy instructions which are PHI joins.
+    SmallVector<MachineInstr*, 16> phiJoinCopies;
+
+    /// allocatableRegs_ - A bit vector of allocatable registers.
     BitVector allocatableRegs_;
 
-    std::vector<MachineInstr*> ClonedMIs;
+    /// CloneMIs - A list of clones as result of re-materialization.
+    std::vector<MachineInstr*> CloneMIs;
 
     typedef LiveInterval::InstrSlots InstrSlots;
 
@@ -98,23 +106,40 @@ namespace llvm {
     static char ID; // Pass identification, replacement for typeid
     LiveIntervals() : MachineFunctionPass(&ID) {}
 
-    static unsigned getBaseIndex(unsigned index) {
-      return index - (index % InstrSlots::NUM);
+    LiveIndex getBaseIndex(LiveIndex index) {
+      return LiveIndex(index, LiveIndex::LOAD);
+    }
+    LiveIndex getBoundaryIndex(LiveIndex index) {
+      return LiveIndex(index,
+        (LiveIndex::Slot)(LiveIndex::NUM - 1));
+    }
+    LiveIndex getLoadIndex(LiveIndex index) {
+      return LiveIndex(index, LiveIndex::LOAD);
     }
-    static unsigned getBoundaryIndex(unsigned index) {
-      return getBaseIndex(index + InstrSlots::NUM - 1);
+    LiveIndex getUseIndex(LiveIndex index) {
+      return LiveIndex(index, LiveIndex::USE);
     }
-    static unsigned getLoadIndex(unsigned index) {
-      return getBaseIndex(index) + InstrSlots::LOAD;
+    LiveIndex getDefIndex(LiveIndex index) {
+      return LiveIndex(index, LiveIndex::DEF);
     }
-    static unsigned getUseIndex(unsigned index) {
-      return getBaseIndex(index) + InstrSlots::USE;
+    LiveIndex getStoreIndex(LiveIndex index) {
+      return LiveIndex(index, LiveIndex::STORE);
+    }    
+
+    LiveIndex getNextSlot(LiveIndex m) const {
+      return m.nextSlot_();
+    }
+
+    LiveIndex getNextIndex(LiveIndex m) const {
+      return m.nextIndex_();
     }
-    static unsigned getDefIndex(unsigned index) {
-      return getBaseIndex(index) + InstrSlots::DEF;
+
+    LiveIndex getPrevSlot(LiveIndex m) const {
+      return m.prevSlot_();
     }
-    static unsigned getStoreIndex(unsigned index) {
-      return getBaseIndex(index) + InstrSlots::STORE;
+
+    LiveIndex getPrevIndex(LiveIndex m) const {
+      return m.prevIndex_();
     }
 
     static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
@@ -147,20 +172,20 @@ namespace llvm {
 
     /// getMBBStartIdx - Return the base index of the first instruction in the
     /// specified MachineBasicBlock.
-    unsigned getMBBStartIdx(MachineBasicBlock *MBB) const {
+    LiveIndex getMBBStartIdx(MachineBasicBlock *MBB) const {
       return getMBBStartIdx(MBB->getNumber());
     }
-    unsigned getMBBStartIdx(unsigned MBBNo) const {
+    LiveIndex getMBBStartIdx(unsigned MBBNo) const {
       assert(MBBNo < MBB2IdxMap.size() && "Invalid MBB number!");
       return MBB2IdxMap[MBBNo].first;
     }
 
     /// getMBBEndIdx - Return the store index of the last instruction in the
     /// specified MachineBasicBlock.
-    unsigned getMBBEndIdx(MachineBasicBlock *MBB) const {
+    LiveIndex getMBBEndIdx(MachineBasicBlock *MBB) const {
       return getMBBEndIdx(MBB->getNumber());
     }
-    unsigned getMBBEndIdx(unsigned MBBNo) const {
+    LiveIndex getMBBEndIdx(unsigned MBBNo) const {
       assert(MBBNo < MBB2IdxMap.size() && "Invalid MBB number!");
       return MBB2IdxMap[MBBNo].second;
     }
@@ -181,7 +206,7 @@ namespace llvm {
 
     /// getMBBFromIndex - given an index in any instruction of an
     /// MBB return a pointer the MBB
-    MachineBasicBlock* getMBBFromIndex(unsigned index) const {
+    MachineBasicBlock* getMBBFromIndex(LiveIndex index) const {
       std::vector<IdxMBBPair>::const_iterator I =
         std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), index);
       // Take the pair containing the index
@@ -189,14 +214,14 @@ namespace llvm {
         ((I != Idx2MBBMap.end() && I->first > index) ||
          (I == Idx2MBBMap.end() && Idx2MBBMap.size()>0)) ? (I-1): I;
 
-      assert(J != Idx2MBBMap.end() && J->first < index+1 &&
+      assert(J != Idx2MBBMap.end() && J->first <= index &&
              index <= getMBBEndIdx(J->second) &&
              "index does not correspond to an MBB");
       return J->second;
     }
 
     /// getInstructionIndex - returns the base index of instr
-    unsigned getInstructionIndex(MachineInstr* instr) const {
+    LiveIndex getInstructionIndex(const MachineInstr* instr) const {
       Mi2IndexMap::const_iterator it = mi2iMap_.find(instr);
       assert(it != mi2iMap_.end() && "Invalid instruction!");
       return it->second;
@@ -204,48 +229,49 @@ namespace llvm {
 
     /// getInstructionFromIndex - given an index in any slot of an
     /// instruction return a pointer the instruction
-    MachineInstr* getInstructionFromIndex(unsigned index) const {
-      index /= InstrSlots::NUM; // convert index to vector index
-      assert(index < i2miMap_.size() &&
+    MachineInstr* getInstructionFromIndex(LiveIndex index) const {
+      // convert index to vector index
+      unsigned i = index.getVecIndex();
+      assert(i < i2miMap_.size() &&
              "index does not correspond to an instruction");
-      return i2miMap_[index];
+      return i2miMap_[i];
     }
 
     /// hasGapBeforeInstr - Return true if the previous instruction slot,
     /// i.e. Index - InstrSlots::NUM, is not occupied.
-    bool hasGapBeforeInstr(unsigned Index) {
-      Index = getBaseIndex(Index - InstrSlots::NUM);
+    bool hasGapBeforeInstr(LiveIndex Index) {
+      Index = getBaseIndex(getPrevIndex(Index));
       return getInstructionFromIndex(Index) == 0;
     }
 
     /// hasGapAfterInstr - Return true if the successive instruction slot,
     /// i.e. Index + InstrSlots::Num, is not occupied.
-    bool hasGapAfterInstr(unsigned Index) {
-      Index = getBaseIndex(Index + InstrSlots::NUM);
+    bool hasGapAfterInstr(LiveIndex Index) {
+      Index = getBaseIndex(getNextIndex(Index));
       return getInstructionFromIndex(Index) == 0;
     }
 
     /// findGapBeforeInstr - Find an empty instruction slot before the
     /// specified index. If "Furthest" is true, find one that's furthest
     /// away from the index (but before any index that's occupied).
-    unsigned findGapBeforeInstr(unsigned Index, bool Furthest = false) {
-      Index = getBaseIndex(Index - InstrSlots::NUM);
+    LiveIndex findGapBeforeInstr(LiveIndex Index, bool Furthest = false) {
+      Index = getBaseIndex(getPrevIndex(Index));
       if (getInstructionFromIndex(Index))
-        return 0;  // No gap!
+        return LiveIndex();  // No gap!
       if (!Furthest)
         return Index;
-      unsigned PrevIndex = getBaseIndex(Index - InstrSlots::NUM);
+      LiveIndex PrevIndex = getBaseIndex(getPrevIndex(Index));
       while (getInstructionFromIndex(Index)) {
         Index = PrevIndex;
-        PrevIndex = getBaseIndex(Index - InstrSlots::NUM);
+        PrevIndex = getBaseIndex(getPrevIndex(Index));
       }
       return Index;
     }
 
     /// InsertMachineInstrInMaps - Insert the specified machine instruction
     /// into the instruction index map at the given index.
-    void InsertMachineInstrInMaps(MachineInstr *MI, unsigned Index) {
-      i2miMap_[Index / InstrSlots::NUM] = MI;
+    void InsertMachineInstrInMaps(MachineInstr *MI, LiveIndex Index) {
+      i2miMap_[Index.getVecIndex()] = MI;
       Mi2IndexMap::iterator it = mi2iMap_.find(MI);
       assert(it == mi2iMap_.end() && "Already in map!");
       mi2iMap_[MI] = Index;
@@ -265,12 +291,12 @@ namespace llvm {
     /// findLiveInMBBs - Given a live range, if the value of the range
     /// is live in any MBB returns true as well as the list of basic blocks
     /// in which the value is live.
-    bool findLiveInMBBs(unsigned Start, unsigned End,
+    bool findLiveInMBBs(LiveIndex Start, LiveIndex End,
                         SmallVectorImpl<MachineBasicBlock*> &MBBs) const;
 
     /// findReachableMBBs - Return a list MBB that can be reached via any
     /// branch or fallthroughs. Return true if the list is not empty.
-    bool findReachableMBBs(unsigned Start, unsigned End,
+    bool findReachableMBBs(LiveIndex Start, LiveIndex End,
                         SmallVectorImpl<MachineBasicBlock*> &MBBs) const;
 
     // Interval creation
@@ -289,7 +315,7 @@ namespace llvm {
     /// addLiveRangeToEndOfBlock - Given a register and an instruction,
     /// adds a live range from that instruction to the end of its MBB.
     LiveRange addLiveRangeToEndOfBlock(unsigned reg,
-                                        MachineInstr* startInst);
+                                       MachineInstr* startInst);
 
     // Interval removal
 
@@ -312,7 +338,7 @@ namespace llvm {
       // MachineInstr -> index mappings
       Mi2IndexMap::iterator mi2i = mi2iMap_.find(MI);
       if (mi2i != mi2iMap_.end()) {
-        i2miMap_[mi2i->second/InstrSlots::NUM] = 0;
+        i2miMap_[mi2i->second.index/InstrSlots::NUM] = 0;
         mi2iMap_.erase(mi2i);
       }
     }
@@ -323,10 +349,10 @@ namespace llvm {
       Mi2IndexMap::iterator mi2i = mi2iMap_.find(MI);
       if (mi2i == mi2iMap_.end())
         return;
-      i2miMap_[mi2i->second/InstrSlots::NUM] = NewMI;
+      i2miMap_[mi2i->second.index/InstrSlots::NUM] = NewMI;
       Mi2IndexMap::iterator it = mi2iMap_.find(MI);
       assert(it != mi2iMap_.end() && "Invalid instruction!");
-      unsigned Index = it->second;
+      LiveIndex Index = it->second;
       mi2iMap_.erase(it);
       mi2iMap_[NewMI] = Index;
     }
@@ -344,10 +370,7 @@ namespace llvm {
     virtual bool runOnMachineFunction(MachineFunction&);
 
     /// print - Implement the dump method.
-    virtual void print(std::ostream &O, const Module* = 0) const;
-    void print(std::ostream *O, const Module* M = 0) const {
-      if (O) print(*O, M);
-    }
+    virtual void print(raw_ostream &O, const Module* = 0) const;
 
     /// addIntervalsForSpills - Create new intervals for spilled defs / uses of
     /// the given interval. FIXME: It also returns the weight of the spill slot
@@ -408,32 +431,40 @@ namespace llvm {
   private:      
     /// computeIntervals - Compute live intervals.
     void computeIntervals();
-    
+
+    bool isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
+                                SmallVector<MachineInstr*,16> &IdentCopies,
+                                SmallVector<MachineInstr*,16> &OtherCopies);
+
+    void performEarlyCoalescing();
+
     /// handleRegisterDef - update intervals for a register def
     /// (calls handlePhysicalRegisterDef and
     /// handleVirtualRegisterDef)
     void handleRegisterDef(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator MI, unsigned MIIdx,
+                           MachineBasicBlock::iterator MI,
+                           LiveIndex MIIdx,
                            MachineOperand& MO, unsigned MOIdx);
 
     /// handleVirtualRegisterDef - update intervals for a virtual
     /// register def
     void handleVirtualRegisterDef(MachineBasicBlock *MBB,
                                   MachineBasicBlock::iterator MI,
-                                  unsigned MIIdx, MachineOperand& MO,
-                                  unsigned MOIdx, LiveInterval& interval);
+                                  LiveIndex MIIdx, MachineOperand& MO,
+                                  unsigned MOIdx,
+                                  LiveInterval& interval);
 
     /// handlePhysicalRegisterDef - update intervals for a physical register
     /// def.
     void handlePhysicalRegisterDef(MachineBasicBlock* mbb,
                                    MachineBasicBlock::iterator mi,
-                                   unsigned MIIdx, MachineOperand& MO,
+                                   LiveIndex MIIdx, MachineOperand& MO,
                                    LiveInterval &interval,
                                    MachineInstr *CopyMI);
 
     /// handleLiveInRegister - Create interval for a livein register.
     void handleLiveInRegister(MachineBasicBlock* mbb,
-                              unsigned MIIdx,
+                              LiveIndex MIIdx,
                               LiveInterval &interval, bool isAlias = false);
 
     /// getReMatImplicitUse - If the remat definition MI has one (for now, we
@@ -446,7 +477,7 @@ namespace llvm {
     /// which reaches the given instruction also reaches the specified use
     /// index.
     bool isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
-                            unsigned UseIdx) const;
+                            LiveIndex UseIdx) const;
 
     /// isReMaterializable - Returns true if the definition MI of the specified
     /// val# of the specified interval is re-materializable. Also returns true
@@ -461,9 +492,9 @@ namespace llvm {
     /// MI. If it is successul, MI is updated with the newly created MI and
     /// returns true.
     bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
-                              MachineInstr *DefMI, unsigned InstrIdx,
+                              MachineInstr *DefMI, LiveIndex InstrIdx,
                               SmallVector<unsigned, 2> &Ops,
-                              bool isSS, int Slot, unsigned Reg);
+                              bool isSS, int FrameIndex, unsigned Reg);
 
     /// canFoldMemoryOperand - Return true if the specified load / store
     /// folding is possible.
@@ -474,7 +505,8 @@ namespace llvm {
     /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified
     /// VNInfo that's after the specified index but is within the basic block.
     bool anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI,
-                              MachineBasicBlock *MBB, unsigned Idx) const;
+                              MachineBasicBlock *MBB,
+                              LiveIndex Idx) const;
 
     /// hasAllocatableSuperReg - Return true if the specified physical register
     /// has any super register that's allocatable.
@@ -482,16 +514,17 @@ namespace llvm {
 
     /// SRInfo - Spill / restore info.
     struct SRInfo {
-      int index;
+      LiveIndex index;
       unsigned vreg;
       bool canFold;
-      SRInfo(int i, unsigned vr, bool f) : index(i), vreg(vr), canFold(f) {};
+      SRInfo(LiveIndex i, unsigned vr, bool f)
+        : index(i), vreg(vr), canFold(f) {}
     };
 
-    bool alsoFoldARestore(int Id, int index, unsigned vr,
+    bool alsoFoldARestore(int Id, LiveIndex index, unsigned vr,
                           BitVector &RestoreMBBs,
                           DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
-    void eraseRestoreInfo(int Id, int index, unsigned vr,
+    void eraseRestoreInfo(int Id, LiveIndex index, unsigned vr,
                           BitVector &RestoreMBBs,
                           DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
 
@@ -510,8 +543,9 @@ namespace llvm {
     /// functions for addIntervalsForSpills to rewrite uses / defs for the given
     /// live range.
     bool rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-        bool TrySplit, unsigned index, unsigned end, MachineInstr *MI,
-        MachineInstr *OrigDefMI, MachineInstr *DefMI, unsigned Slot, int LdSlot,
+        bool TrySplit, LiveIndex index, LiveIndex end,
+        MachineInstr *MI, MachineInstr *OrigDefMI, MachineInstr *DefMI,
+        unsigned Slot, int LdSlot,
         bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
         VirtRegMap &vrm, const TargetRegisterClass* rc,
         SmallVector<int, 4> &ReMatIds, const MachineLoopInfo *loopInfo,
@@ -533,9 +567,9 @@ namespace llvm {
 
     static LiveInterval* createInterval(unsigned Reg);
 
-    void printRegName(unsigned reg) const;
+    void printInstrs(raw_ostream &O) const;
+    void dumpInstrs() const;
   };
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index 27ae1be7f913..d63a222475c6 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -102,10 +102,7 @@ namespace llvm {
     virtual bool runOnMachineFunction(MachineFunction&);
 
     /// print - Implement the dump method.
-    virtual void print(std::ostream &O, const Module* = 0) const;
-    void print(std::ostream *O, const Module* M = 0) const {
-      if (O) print(*O, M);
-    }
+    virtual void print(raw_ostream &O, const Module* = 0) const;
   };
 }
 
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 26c036269d68..172fb750944d 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -29,9 +29,12 @@
 #ifndef LLVM_CODEGEN_LIVEVARIABLES_H
 #define LLVM_CODEGEN_LIVEVARIABLES_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
 
@@ -146,16 +149,14 @@ private:   // Intermediate data structures
   bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI);
 
   void HandlePhysRegUse(unsigned Reg, MachineInstr *MI);
-  void HandlePhysRegDef(unsigned Reg, MachineInstr *MI);
+  void HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+                        SmallVector<unsigned, 4> &Defs);
+  void UpdatePhysRegDefs(MachineInstr *MI, SmallVector<unsigned, 4> &Defs);
 
   /// FindLastPartialDef - Return the last partial def of the specified register.
-  /// Also returns the sub-register that's defined.
-  MachineInstr *FindLastPartialDef(unsigned Reg, unsigned &PartDefReg);
-
-  /// hasRegisterUseBelow - Return true if the specified register is used after
-  /// the current instruction and before it's next definition.
-  bool hasRegisterUseBelow(unsigned Reg, MachineBasicBlock::iterator I,
-                           MachineBasicBlock *MBB);
+  /// Also returns the sub-registers that're defined by the instruction.
+  MachineInstr *FindLastPartialDef(unsigned Reg,
+                                   SmallSet<unsigned,4> &PartDefRegs);
 
   /// analyzePHINodes - Gather information about the PHI nodes in here. In
   /// particular, we want to map the variable information of a virtual
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 134d22663b01..2a9e86a04c09 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -16,17 +16,17 @@
 
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/Support/Streams.h"
 
 namespace llvm {
 
 class BasicBlock;
 class MachineFunction;
+class raw_ostream;
 
 template <>
 struct ilist_traits<MachineInstr> : public ilist_default_traits<MachineInstr> {
 private:
-  mutable ilist_node<MachineInstr> Sentinel;
+  mutable ilist_half_node<MachineInstr> Sentinel;
 
   // this is only set by the MachineBasicBlock owning the LiveList
   friend class MachineBasicBlock;
@@ -310,8 +310,7 @@ public:
 
   // Debugging methods.
   void dump() const;
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
 
   /// getNumber - MachineBasicBlocks are uniquely numbered at the function
   /// level, unless they're not in a MachineFunction yet, in which case this
@@ -339,7 +338,7 @@ private:   // Methods used to maintain doubly linked list of blocks...
   void removePredecessor(MachineBasicBlock *pred);
 };
 
-std::ostream& operator<<(std::ostream &OS, const MachineBasicBlock &MBB);
+raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
 
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for machine basic block graphs (machine-CFGs)
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index eb1ea2dc56b1..abb6dd9cd087 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -18,6 +18,7 @@
 #define LLVM_CODEGEN_MACHINECODEEMITTER_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
@@ -74,24 +75,6 @@ public:
   /// false.
   ///
   virtual bool finishFunction(MachineFunction &F) = 0;
-  
-  /// startGVStub - This callback is invoked when the JIT needs the
-  /// address of a GV (e.g. function) that has not been code generated yet.
-  /// The StubSize specifies the total size required by the stub.
-  ///
-  virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
-                           unsigned Alignment = 1) = 0;
-
-  /// startGVStub - This callback is invoked when the JIT needs the address of a 
-  /// GV (e.g. function) that has not been code generated yet.  Buffer points to
-  /// memory already allocated for this stub.
-  ///
-  virtual void startGVStub(const GlobalValue* GV, void *Buffer,
-                           unsigned StubSize) = 0;
-  
-  /// finishGVStub - This callback is invoked to terminate a GV stub.
-  ///
-  virtual void *finishGVStub(const GlobalValue* F) = 0;
 
   /// emitByte - This callback is invoked when a byte needs to be written to the
   /// output stream.
@@ -250,7 +233,12 @@ public:
       (*(uint64_t*)Addr) = (uint64_t)Value;
   }
   
-  
+  /// processDebugLoc - Records debug location information about a
+  /// MachineInstruction.  This is called before emitting any bytes associated
+  /// with the instruction.  Even if successive instructions have the same debug
+  /// location, this method will be called for each one.
+  virtual void processDebugLoc(DebugLoc DL, bool BeforePrintintInsn) {}
+
   /// emitLabel - Emits a label
   virtual void emitLabel(uint64_t LabelID) = 0;
 
@@ -288,14 +276,20 @@ public:
 
   /// getCurrentPCOffset - Return the offset from the start of the emitted
   /// buffer that we are currently writing to.
-  uintptr_t getCurrentPCOffset() const {
+  virtual uintptr_t getCurrentPCOffset() const {
     return CurBufferPtr-BufferBegin;
   }
 
+  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
+  /// during code emission time. The JIT is capable of doing this because it
+  /// creates jump tables or constant pools in memory on the fly while the
+  /// object code emitters rely on a linker to have real addresses and should
+  /// use relocations instead.
+  virtual bool earlyResolveAddresses() const = 0;
+
   /// addRelocation - Whenever a relocatable address is needed, it should be
   /// noted with this interface.
   virtual void addRelocation(const MachineRelocation &MR) = 0;
-
   
   /// FIXME: These should all be handled with relocations!
   
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 99996cf28fb1..8d6c1d1e4ca2 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -41,8 +41,15 @@ public:
 
   /// getType - get type of this MachineConstantPoolValue.
   ///
-  inline const Type *getType() const { return Ty; }
+  const Type *getType() const { return Ty; }
 
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are the same as Constant::getRelocationInfo().
+  virtual unsigned getRelocationInfo() const = 0;
+  
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
                                         unsigned Alignment) = 0;
 
@@ -82,7 +89,7 @@ public:
   MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A)
     : Alignment(A) {
     Val.MachineCPVal = V; 
-    Alignment |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
+    Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1);
   }
 
   bool isMachineConstantPoolEntry() const {
@@ -94,6 +101,19 @@ public:
   }
 
   const Type *getType() const;
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are:
+  /// 
+  ///  0: This constant pool entry is guaranteed to never have a relocation
+  ///     applied to it (because it holds a simple constant like '4').
+  ///  1: This entry has relocations, but the entries are guaranteed to be
+  ///     resolvable by the static linker, so the dynamic linker will never see
+  ///     them.
+  ///  2: This entry may have arbitrary relocations. 
+  unsigned getRelocationInfo() const;
 };
   
 /// The MachineConstantPool class keeps track of constants referenced by a
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 5981e5a3a589..e56776b1440c 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -15,13 +15,15 @@
 #ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H
 #define LLVM_CODEGEN_MACHINEDOMINATORS_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/DominatorInternals.h"
 
 namespace llvm {
 
-inline void WriteAsOperand(std::ostream &, const MachineBasicBlock*, bool t) {  }
+inline void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t) {  }
 
 template<>
 inline void DominatorTreeBase<MachineBasicBlock>::addRoot(MachineBasicBlock* MBB) {
@@ -160,9 +162,7 @@ public:
   
   virtual void releaseMemory();
   
-  virtual void print(std::ostream &OS, const Module* M= 0) const {
-    DT->print(OS, M);
-  }
+  virtual void print(raw_ostream &OS, const Module*) const;
 };
 
 //===-------------------------------------
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 4c981f7caf02..b5479ba09f3c 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -14,17 +14,20 @@
 #ifndef LLVM_CODEGEN_MACHINEFRAMEINFO_H
 #define LLVM_CODEGEN_MACHINEFRAMEINFO_H
 
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
-#include <iosfwd>
 #include <vector>
 
 namespace llvm {
+class raw_ostream;
 class TargetData;
 class TargetRegisterClass;
 class Type;
 class MachineModuleInfo;
 class MachineFunction;
+class MachineBasicBlock;
 class TargetFrameInfo;
 
 /// The CalleeSavedInfo class tracks the information need to locate where a
@@ -130,11 +133,14 @@ class MachineFrameInfo {
   uint64_t StackSize;
   
   /// OffsetAdjustment - The amount that a frame offset needs to be adjusted to
-  /// have the actual offset from the stack/frame pointer.  The calculation is 
-  /// MFI->getObjectOffset(Index) + StackSize - TFI.getOffsetOfLocalArea() +
-  /// OffsetAdjustment.  If OffsetAdjustment is zero (default) then offsets are
-  /// away from TOS. If OffsetAdjustment == StackSize then offsets are toward
-  /// TOS.
+  /// have the actual offset from the stack/frame pointer.  The exact usage of
+  /// this is target-dependent, but it is typically used to adjust between
+  /// SP-relative and FP-relative offsets.  E.G., if objects are accessed via
+  /// SP then OffsetAdjustment is zero; if FP is used, OffsetAdjustment is set
+  /// to the distance between the initial SP and the value in FP.  For many
+  /// targets, this value is only used when generating debug info (via
+  /// TargetRegisterInfo::getFrameIndexOffset); when generating code, the
+  /// corresponding adjustments are performed directly.
   int OffsetAdjustment;
   
   /// MaxAlignment - The prolog/epilog code inserter may process objects 
@@ -166,7 +172,10 @@ class MachineFrameInfo {
   /// epilog code inserter, this data used for debug info and exception
   /// handling.
   std::vector<CalleeSavedInfo> CSInfo;
-  
+
+  /// CSIValid - Has CSInfo been set yet?
+  bool CSIValid;
+
   /// MMI - This field is set (via setMachineModuleInfo) by a module info
   /// consumer (ex. DwarfWriter) to indicate that frame layout information
   /// should be acquired.  Typically, it's the responsibility of the target's
@@ -185,6 +194,7 @@ public:
     HasCalls = false;
     StackProtectorIdx = -1;
     MaxCallFrameSize = 0;
+    CSIValid = false;
     MMI = 0;
   }
 
@@ -389,6 +399,22 @@ public:
     CSInfo = CSI;
   }
 
+  /// isCalleeSavedInfoValid - Has the callee saved info been calculated yet?
+  bool isCalleeSavedInfoValid() const { return CSIValid; }
+
+  void setCalleeSavedInfoValid(bool v) { CSIValid = v; }
+
+  /// getPristineRegs - Return a set of physical registers that are pristine on
+  /// entry to the MBB.
+  ///
+  /// Pristine registers hold a value that is useless to the current function,
+  /// but that must be preserved - they are callee saved registers that have not
+  /// been saved yet.
+  ///
+  /// Before the PrologueEpilogueInserter has placed the CSR spill code, this
+  /// method always returns an empty set.
+  BitVector getPristineRegs(const MachineBasicBlock *MBB) const;
+
   /// getMachineModuleInfo - Used by a prologue/epilogue
   /// emitter (TargetRegisterInfo) to provide frame layout information. 
   MachineModuleInfo *getMachineModuleInfo() const { return MMI; }
@@ -400,9 +426,9 @@ public:
   /// print - Used by the MachineFunction printer to print information about
   /// stack objects.  Implemented in MachineFunction.cpp
   ///
-  void print(const MachineFunction &MF, std::ostream &OS) const;
+  void print(const MachineFunction &MF, raw_ostream &OS) const;
 
-  /// dump - Call print(MF, std::cerr) to be called from the debugger.
+  /// dump - Print the function to stderr.
   void dump(const MachineFunction &MF) const;
 };
 
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index ea6a384d2287..ba831cab1725 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -18,15 +18,16 @@
 #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
 #define LLVM_CODEGEN_MACHINEFUNCTION_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Support/Annotation.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Recycler.h"
+#include <map>
 
 namespace llvm {
 
+class Value;
 class Function;
 class MachineRegisterInfo;
 class MachineFrameInfo;
@@ -38,7 +39,7 @@ class TargetRegisterClass;
 template <>
 struct ilist_traits<MachineBasicBlock>
     : public ilist_default_traits<MachineBasicBlock> {
-  mutable ilist_node<MachineBasicBlock> Sentinel;
+  mutable ilist_half_node<MachineBasicBlock> Sentinel;
 public:
   MachineBasicBlock *createSentinel() const {
     return static_cast<MachineBasicBlock*>(&Sentinel);
@@ -63,11 +64,11 @@ private:
 /// of type are accessed/created with MF::getInfo and destroyed when the
 /// MachineFunction is destroyed.
 struct MachineFunctionInfo {
-  virtual ~MachineFunctionInfo() {}
+  virtual ~MachineFunctionInfo();
 };
 
-class MachineFunction : private Annotation {
-  const Function *Fn;
+class MachineFunction {
+  Function *Fn;
   const TargetMachine &Target;
 
   // RegInfo - Information about each register in use in the function.
@@ -115,12 +116,12 @@ class MachineFunction : private Annotation {
   unsigned Alignment;
 
 public:
-  MachineFunction(const Function *Fn, const TargetMachine &TM);
+  MachineFunction(Function *Fn, const TargetMachine &TM);
   ~MachineFunction();
 
   /// getFunction - Return the LLVM function that this machine code represents
   ///
-  const Function *getFunction() const { return Fn; }
+  Function *getFunction() const { return Fn; }
 
   /// getTarget - Return the target machine this machine code is compiled with
   ///
@@ -159,8 +160,8 @@ public:
   ///
   void setAlignment(unsigned A) { Alignment = A; }
 
-  /// MachineFunctionInfo - Keep track of various per-function pieces of
-  /// information for backends that would like to do so.
+  /// getInfo - Keep track of various per-function pieces of information for
+  /// backends that would like to do so.
   ///
   template<typename Ty>
   Ty *getInfo() {
@@ -207,8 +208,7 @@ public:
   /// print - Print out the MachineFunction in a format suitable for debugging
   /// to the specified stream.
   ///
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
 
   /// viewCFG - This function is meant for use from the debugger.  You can just
   /// say 'call F->viewCFG()' and a ghostview window should pop up from the
@@ -229,21 +229,6 @@ public:
   ///
   void dump() const;
 
-  /// construct - Allocate and initialize a MachineFunction for a given Function
-  /// and Target
-  ///
-  static MachineFunction& construct(const Function *F, const TargetMachine &TM);
-
-  /// destruct - Destroy the MachineFunction corresponding to a given Function
-  ///
-  static void destruct(const Function *F);
-
-  /// get - Return a handle to a MachineFunction corresponding to the given
-  /// Function.  This should not be called before "construct()" for a given
-  /// Function.
-  ///
-  static MachineFunction& get(const Function *F);
-
   // Provide accessors for the MachineBasicBlock list...
   typedef BasicBlockListType::iterator iterator;
   typedef BasicBlockListType::const_iterator const_iterator;
@@ -336,16 +321,42 @@ public:
   ///
   void DeleteMachineBasicBlock(MachineBasicBlock *MBB);
 
+  /// getMachineMemOperand - Allocate a new MachineMemOperand.
+  /// MachineMemOperands are owned by the MachineFunction and need not be
+  /// explicitly deallocated.
+  MachineMemOperand *getMachineMemOperand(const Value *v, unsigned f,
+                                          int64_t o, uint64_t s,
+                                          unsigned base_alignment);
+
+  /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
+  /// an existing one, adjusting by an offset and using the given size.
+  /// MachineMemOperands are owned by the MachineFunction and need not be
+  /// explicitly deallocated.
+  MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
+                                          int64_t Offset, uint64_t Size);
+
+  /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
+  /// pointers.  This array is owned by the MachineFunction.
+  MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);
+
+  /// extractLoadMemRefs - Allocate an array and populate it with just the
+  /// load information from the given MachineMemOperand sequence.
+  std::pair<MachineInstr::mmo_iterator,
+            MachineInstr::mmo_iterator>
+    extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+                       MachineInstr::mmo_iterator End);
+
+  /// extractStoreMemRefs - Allocate an array and populate it with just the
+  /// store information from the given MachineMemOperand sequence.
+  std::pair<MachineInstr::mmo_iterator,
+            MachineInstr::mmo_iterator>
+    extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+                        MachineInstr::mmo_iterator End);
+
   //===--------------------------------------------------------------------===//
   // Debug location.
   //
 
-  /// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given
-  /// source file, line, and column. If none currently exists, create a new
-  /// DebugLocTuple, and insert it into the DebugIdMap.
-  unsigned getOrCreateDebugLocID(GlobalVariable *CompileUnit,
-                                 unsigned Line, unsigned Col);
-
   /// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object.
   DebugLocTuple getDebugLocTuple(DebugLoc DL) const;
 
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
new file mode 100644
index 000000000000..d020a7b4c73b
--- /dev/null
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -0,0 +1,49 @@
+//===-- MachineFunctionAnalysis.h - Owner of MachineFunctions ----*-C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MachineFunctionAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H
+#define LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class MachineFunction;
+
+/// MachineFunctionAnalysis - This class is a Pass that manages a
+/// MachineFunction object.
+struct MachineFunctionAnalysis : public FunctionPass {
+private:
+  const TargetMachine &TM;
+  CodeGenOpt::Level OptLevel;
+  MachineFunction *MF;
+
+public:
+  static char ID;
+  explicit MachineFunctionAnalysis(TargetMachine &tm,
+                                   CodeGenOpt::Level OL = CodeGenOpt::Default);
+  ~MachineFunctionAnalysis();
+
+  MachineFunction &getMF() const { return *MF; }
+  CodeGenOpt::Level getOptLevel() const { return OptLevel; }
+
+private:
+  virtual bool runOnFunction(Function &F);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 6b5e64abc46c..bac110316d4f 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -20,23 +20,34 @@
 #define LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H
 
 #include "llvm/Pass.h"
-#include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 
-  // FIXME: This pass should declare that the pass does not invalidate any LLVM
-  // passes.
-struct MachineFunctionPass : public FunctionPass {
+class MachineFunction;
+
+/// MachineFunctionPass - This class adapts the FunctionPass interface to
+/// allow convenient creation of passes that operate on the MachineFunction
+/// representation. Instead of overriding runOnFunction, subclasses
+/// override runOnMachineFunction.
+class MachineFunctionPass : public FunctionPass {
+protected:
   explicit MachineFunctionPass(intptr_t ID) : FunctionPass(ID) {}
   explicit MachineFunctionPass(void *ID) : FunctionPass(ID) {}
 
-protected:
   /// runOnMachineFunction - This method must be overloaded to perform the
   /// desired machine code transformation or analysis.
   ///
   virtual bool runOnMachineFunction(MachineFunction &MF) = 0;
 
-public:
+  /// getAnalysisUsage - Subclasses that override getAnalysisUsage
+  /// must call this.
+  ///
+  /// For MachineFunctionPasses, calling AU.preservesCFG() indicates that
+  /// the pass does not modify the MachineBasicBlock CFG.
+  ///
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+private:
   bool runOnFunction(Function &F);
 };
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 2b2f24a88371..de22710fe6e0 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -20,29 +20,34 @@
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Support/DebugLoc.h"
-#include <list>
 #include <vector>
 
 namespace llvm {
 
+class AliasAnalysis;
 class TargetInstrDesc;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 class MachineFunction;
+class MachineMemOperand;
 
 //===----------------------------------------------------------------------===//
 /// MachineInstr - Representation of each machine instruction.
 ///
 class MachineInstr : public ilist_node<MachineInstr> {
+public:
+  typedef MachineMemOperand **mmo_iterator;
+
+private:
   const TargetInstrDesc *TID;           // Instruction descriptor.
   unsigned short NumImplicitOps;        // Number of implicit operands (which
                                         // are determined at construction time).
 
   std::vector<MachineOperand> Operands; // the operands
-  std::list<MachineMemOperand> MemOperands; // information on memory references
+  mmo_iterator MemRefs;                 // information on memory references
+  mmo_iterator MemRefsEnd;
   MachineBasicBlock *Parent;            // Pointer to the owning basic block.
   DebugLoc debugLoc;                    // Source line information.
 
@@ -132,21 +137,14 @@ public:
   unsigned getNumExplicitOperands() const;
   
   /// Access to memory operands of the instruction
-  std::list<MachineMemOperand>::iterator memoperands_begin()
-  { return MemOperands.begin(); }
-  std::list<MachineMemOperand>::iterator memoperands_end()
-  { return MemOperands.end(); }
-  std::list<MachineMemOperand>::const_iterator memoperands_begin() const
-  { return MemOperands.begin(); }
-  std::list<MachineMemOperand>::const_iterator memoperands_end() const
-  { return MemOperands.end(); }
-  bool memoperands_empty() const { return MemOperands.empty(); }
+  mmo_iterator memoperands_begin() const { return MemRefs; }
+  mmo_iterator memoperands_end() const { return MemRefsEnd; }
+  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
 
   /// hasOneMemOperand - Return true if this instruction has exactly one
   /// MachineMemOperand.
   bool hasOneMemOperand() const {
-    return !memoperands_empty() &&
-           next(memoperands_begin()) == memoperands_end();
+    return MemRefsEnd - MemRefs == 1;
   }
 
   /// isIdenticalTo - Return true if this instruction is identical to (same
@@ -208,7 +206,7 @@ public:
   }
 
   /// findRegisterUseOperandIdx() - Returns the operand index that is a use of
-  /// the specific register or -1 if it is not found. It further tightening
+  /// the specific register or -1 if it is not found. It further tightens
   /// the search criteria to a use that kills the register if isKill is true.
   int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false,
                                 const TargetRegisterInfo *TRI = NULL) const;
@@ -277,11 +275,13 @@ public:
   /// isSafeToMove - Return true if it is safe to move this instruction. If
   /// SawStore is set to true, it means that there is a store (or call) between
   /// the instruction's location and its intended destination.
-  bool isSafeToMove(const TargetInstrInfo *TII, bool &SawStore) const;
+  bool isSafeToMove(const TargetInstrInfo *TII, bool &SawStore,
+                    AliasAnalysis *AA) const;
 
   /// isSafeToReMat - Return true if it's safe to rematerialize the specified
   /// instruction which defined the specified register instead of copying it.
-  bool isSafeToReMat(const TargetInstrInfo *TII, unsigned DstReg) const;
+  bool isSafeToReMat(const TargetInstrInfo *TII, unsigned DstReg,
+                     AliasAnalysis *AA) const;
 
   /// hasVolatileMemoryRef - Return true if this instruction may have a
   /// volatile memory reference, or if the information describing the
@@ -289,19 +289,17 @@ public:
   /// have no volatile memory references.
   bool hasVolatileMemoryRef() const;
 
+  /// isInvariantLoad - Return true if this instruction is loading from a
+  /// location whose value is invariant across the function.  For example,
+  /// loading a value from the constant pool or from from the argument area of
+  /// a function if it does not change.  This should only return true of *all*
+  /// loads the instruction does are invariant (if it does multiple loads).
+  bool isInvariantLoad(AliasAnalysis *AA) const;
+
   //
   // Debugging support
   //
-  void print(std::ostream *OS, const TargetMachine *TM) const {
-    if (OS) print(*OS, TM);
-  }
-  void print(std::ostream &OS, const TargetMachine *TM = 0) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
-  void print(raw_ostream *OS, const TargetMachine *TM) const {
-    if (OS) print(*OS, TM);
-  }
   void print(raw_ostream &OS, const TargetMachine *TM = 0) const;
-  void print(raw_ostream *OS) const { if (OS) print(*OS); }
   void dump() const;
 
   //===--------------------------------------------------------------------===//
@@ -328,13 +326,17 @@ public:
   ///
   void RemoveOperand(unsigned i);
 
-  /// addMemOperand - Add a MachineMemOperand to the machine instruction,
-  /// referencing arbitrary storage.
-  void addMemOperand(MachineFunction &MF,
-                     const MachineMemOperand &MO);
+  /// addMemOperand - Add a MachineMemOperand to the machine instruction.
+  /// This function should be used only occasionally. The setMemRefs function
+  /// is the primary method for setting up a MachineInstr's MemRefs list.
+  void addMemOperand(MachineFunction &MF, MachineMemOperand *MO);
 
-  /// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands.
-  void clearMemOperands(MachineFunction &MF);
+  /// setMemRefs - Assign this MachineInstr's memory reference descriptor
+  /// list. This does not transfer ownership.
+  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
+    MemRefs = NewMemRefs;
+    MemRefsEnd = NewMemRefsEnd;
+  }
 
 private:
   /// getRegInfo - If this instruction is embedded into a MachineFunction,
@@ -360,11 +362,6 @@ private:
 //===----------------------------------------------------------------------===//
 // Debugging Support
 
-inline std::ostream& operator<<(std::ostream &OS, const MachineInstr &MI) {
-  MI.print(OS);
-  return OS;
-}
-
 inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) {
   MI.print(OS);
   return OS;
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index c6a6679c1b69..7f681d7cea8c 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -39,6 +39,7 @@ namespace RegState {
 class MachineInstrBuilder {
   MachineInstr *MI;
 public:
+  MachineInstrBuilder() : MI(0) {}
   explicit MachineInstrBuilder(MachineInstr *mi) : MI(mi) {}
 
   /// Allow automatic conversion to the machine instruction we are working on.
@@ -108,20 +109,19 @@ public:
   }
 
   const MachineInstrBuilder &addMetadata(MDNode *N,
-					 int64_t Offset = 0,
-					 unsigned char TargetFlags = 0) const {
+                                         int64_t Offset = 0,
+                                         unsigned char TargetFlags = 0) const {
     MI->addOperand(MachineOperand::CreateMDNode(N, Offset, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addExternalSymbol(const char *FnName,
-                                               int64_t Offset = 0,
                                           unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateES(FnName, Offset, TargetFlags));
+    MI->addOperand(MachineOperand::CreateES(FnName, TargetFlags));
     return *this;
   }
 
-  const MachineInstrBuilder &addMemOperand(const MachineMemOperand &MMO) const {
+  const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const {
     MI->addMemOperand(*MI->getParent()->getParent(), MMO);
     return *this;
   }
@@ -191,7 +191,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
 
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction at the end of the given MachineBasicBlock, and sets up the first
-/// operand as a destination virtual register. 
+/// operand as a destination virtual register.
 ///
 inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
                                    DebugLoc DL,
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 56e2e5499a7f..3ff2f2e8c7a1 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -21,13 +21,13 @@
 #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 
 #include <vector>
-#include <iosfwd>
 #include <cassert>
 
 namespace llvm {
 
 class MachineBasicBlock;
 class TargetData;
+class raw_ostream;
 
 /// MachineJumpTableEntry - One jump table in the jump table info.
 ///
@@ -79,10 +79,9 @@ public:
   /// print - Used by the MachineFunction printer to print information about
   /// jump tables.  Implemented in MachineFunction.cpp
   ///
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
 
-  /// dump - Call print(std::cerr) to be called from the debugger.
+  /// dump - Call to stderr.
   ///
   void dump() const;
 };
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 2d19d7a2f803..65ad4e484148 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -35,48 +35,23 @@
 
 namespace llvm {
 
-// Provide overrides for Loop methods that don't make sense for machine loops.
-template<> inline
-PHINode *LoopBase<MachineBasicBlock>::getCanonicalInductionVariable() const {
-  assert(0 && "getCanonicalInductionVariable not supported for machine loops!");
-  return 0;
-}
-
-template<> inline Instruction*
-LoopBase<MachineBasicBlock>::getCanonicalInductionVariableIncrement() const {
-  assert(0 &&
-     "getCanonicalInductionVariableIncrement not supported for machine loops!");
-  return 0;
-}
-
-template<>
-inline bool LoopBase<MachineBasicBlock>::isLoopInvariant(Value *V) const {
-  assert(0 && "isLoopInvariant not supported for machine loops!");
-  return false;
-}
-
-template<>
-inline Value *LoopBase<MachineBasicBlock>::getTripCount() const {
-  assert(0 && "getTripCount not supported for machine loops!");
-  return 0;
-}
-
-template<>
-inline bool LoopBase<MachineBasicBlock>::isLCSSAForm() const {
-  assert(0 && "isLCSSAForm not supported for machine loops");
-  return false;
-}
-
-typedef LoopBase<MachineBasicBlock> MachineLoop;
+class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
+public:
+  MachineLoop();
+private:
+  friend class LoopInfoBase<MachineBasicBlock, MachineLoop>;
+  explicit MachineLoop(MachineBasicBlock *MBB)
+    : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {}
+};
 
 class MachineLoopInfo : public MachineFunctionPass {
-  LoopInfoBase<MachineBasicBlock> LI;
-  friend class LoopBase<MachineBasicBlock>;
+  LoopInfoBase<MachineBasicBlock, MachineLoop> LI;
+  friend class LoopBase<MachineBasicBlock, MachineLoop>;
 
   void operator=(const MachineLoopInfo &);  // do not implement
   MachineLoopInfo(const MachineLoopInfo &); // do not implement
 
-  LoopInfoBase<MachineBasicBlock>& getBase() { return LI; }
+  LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -86,7 +61,7 @@ public:
   /// iterator/begin/end - The interface to the top-level loops in the current
   /// function.
   ///
-  typedef LoopInfoBase<MachineBasicBlock>::iterator iterator;
+  typedef LoopInfoBase<MachineBasicBlock, MachineLoop>::iterator iterator;
   inline iterator begin() const { return LI.begin(); }
   inline iterator end() const { return LI.end(); }
   bool empty() const { return LI.empty(); }
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 4388c0aab224..b7e267dd1333 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -20,6 +20,7 @@ namespace llvm {
 
 class Value;
 class FoldingSetNodeID;
+class raw_ostream;
 
 //===----------------------------------------------------------------------===//
 /// MachineMemOperand - A description of a memory reference used in the backend.
@@ -47,14 +48,17 @@ public:
   };
 
   /// MachineMemOperand - Construct an MachineMemOperand object with the
-  /// specified address Value, flags, offset, size, and alignment.
+  /// specified address Value, flags, offset, size, and base alignment.
   MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s,
-                    unsigned int a);
+                    unsigned int base_alignment);
 
-  /// getValue - Return the base address of the memory access.
-  /// Special values are PseudoSourceValue::FPRel, PseudoSourceValue::SPRel,
-  /// and the other PseudoSourceValue members which indicate references to
-  /// frame/stack pointer relative references and other special references.
+  /// getValue - Return the base address of the memory access. This may either
+  /// be a normal LLVM IR Value, or one of the special values used in CodeGen.
+  /// Special values are those obtained via
+  /// PseudoSourceValue::getFixedStack(int), PseudoSourceValue::getStack, and
+  /// other PseudoSourceValue member functions which return objects which stand
+  /// for frame/stack pointer relative references and other special references
+  /// which are not representable in the high-level IR.
   const Value *getValue() const { return V; }
 
   /// getFlags - Return the raw flags of the source value, \see MemOperandFlags.
@@ -69,18 +73,34 @@ public:
   uint64_t getSize() const { return Size; }
 
   /// getAlignment - Return the minimum known alignment in bytes of the
-  /// memory reference.
-  unsigned int getAlignment() const { return (1u << (Flags >> 3)) >> 1; }
+  /// actual memory reference.
+  uint64_t getAlignment() const;
+
+  /// getBaseAlignment - Return the minimum known alignment in bytes of the
+  /// base address, without the offset.
+  uint64_t getBaseAlignment() const { return (1u << (Flags >> 3)) >> 1; }
 
   bool isLoad() const { return Flags & MOLoad; }
   bool isStore() const { return Flags & MOStore; }
   bool isVolatile() const { return Flags & MOVolatile; }
 
+  /// refineAlignment - Update this MachineMemOperand to reflect the alignment
+  /// of MMO, if it has a greater alignment. This must only be used when the
+  /// new alignment applies to all users of this MachineMemOperand.
+  void refineAlignment(const MachineMemOperand *MMO);
+
+  /// setValue - Change the SourceValue for this MachineMemOperand. This
+  /// should only be used when an object is being relocated and all references
+  /// to it are being updated.
+  void setValue(const Value *NewSV) { V = NewSV; }
+
   /// Profile - Gather unique data for the object.
   ///
   void Profile(FoldingSetNodeID &ID) const;
 };
 
+raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 1872bd26d8aa..5878d67b939d 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -42,18 +42,34 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Pass.h"
+#include "llvm/Metadata.h"
+
+#define ATTACH_DEBUG_INFO_TO_AN_INSN 1
 
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
 // Forward declarations.
 class Constant;
+class MDNode;
 class GlobalVariable;
 class MachineBasicBlock;
 class MachineFunction;
 class Module;
 class PointerType;
 class StructType;
+  
+  
+/// MachineModuleInfoImpl - This class can be derived from and used by targets
+/// to hold private target-specific information for each Module.  Objects of
+/// type are accessed/created with MMI::getInfo and destroyed when the
+/// MachineModuleInfo is destroyed.
+class MachineModuleInfoImpl {
+public:
+  virtual ~MachineModuleInfoImpl();
+};
+  
+  
 
 //===----------------------------------------------------------------------===//
 /// LandingPadInfo - This structure is used to retain landing pad info for
@@ -80,7 +96,11 @@ struct LandingPadInfo {
 /// schemes and reformated for specific use.
 ///
 class MachineModuleInfo : public ImmutablePass {
-private:
+  /// ObjFileMMI - This is the object-file-format-specific implementation of
+  /// MachineModuleInfoImpl, which lets targets accumulate whatever info they
+  /// want.
+  MachineModuleInfoImpl *ObjFileMMI;
+
   // LabelIDList - One entry per assigned label.  Normally the entry is equal to
   // the list index(+1).  If the entry is zero then the label has been deleted.
   // Any other value indicates the label has been deleted by is mapped to
@@ -112,8 +132,9 @@ private:
   // common EH frames.
   std::vector<Function *> Personalities;
 
-  // UsedFunctions - the functions in the llvm.used list in a more easily
-  // searchable format.
+  /// UsedFunctions - The functions in the @llvm.used list in a more easily
+  /// searchable format.  This does not include the functions in
+  /// llvm.compiler.used.
   SmallPtrSet<const Function *, 32> UsedFunctions;
 
   /// UsedDbgLabels - labels are used by debug info entries.
@@ -125,28 +146,45 @@ private:
   /// DbgInfoAvailable - True if debugging information is available
   /// in this module.
   bool DbgInfoAvailable;
+
 public:
   static char ID; // Pass identification, replacement for typeid
 
+  typedef SmallVector< std::pair< WeakMetadataVH, unsigned>, 4 > VariableDbgInfoMapTy;
+  VariableDbgInfoMapTy VariableDbgInfo;
+
   MachineModuleInfo();
   ~MachineModuleInfo();
   
-  /// doInitialization - Initialize the state for a new module.
-  ///
   bool doInitialization();
-  
-  /// doFinalization - Tear down the state after completion of a module.
-  ///
   bool doFinalization();
-  
+
   /// BeginFunction - Begin gathering function meta information.
   ///
-  void BeginFunction(MachineFunction *MF);
+  void BeginFunction(MachineFunction *) {}
   
   /// EndFunction - Discard function meta information.
   ///
   void EndFunction();
 
+  /// getInfo - Keep track of various per-function pieces of information for
+  /// backends that would like to do so.
+  ///
+  template<typename Ty>
+  Ty &getObjFileInfo() {
+    if (ObjFileMMI == 0)
+      ObjFileMMI = new Ty(*this);
+    
+    assert((void*)dynamic_cast<Ty*>(ObjFileMMI) == (void*)ObjFileMMI &&
+           "Invalid concrete type or multiple inheritence for getInfo");
+    return *static_cast<Ty*>(ObjFileMMI);
+  }
+  
+  template<typename Ty>
+  const Ty &getObjFileInfo() const {
+    return const_cast<MachineModuleInfo*>(this)->getObjFileInfo<Ty>();
+  }
+  
   /// AnalyzeModule - Scan the module for global debug information.
   ///
   void AnalyzeModule(Module &M);
@@ -240,9 +278,11 @@ public:
     return Personalities;
   }
 
-  // UsedFunctions - Return set of the functions in the llvm.used list.
-  const SmallPtrSet<const Function *, 32>& getUsedFunctions() const {
-    return UsedFunctions;
+  /// isUsedFunction - Return true if the functions in the llvm.used list.  This
+  /// does not return true for things in llvm.compiler.used unless they are also
+  /// in llvm.used.
+  bool isUsedFunction(const Function *F) {
+    return UsedFunctions.count(F);
   }
 
   /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
@@ -293,6 +333,14 @@ public:
   /// of one is required to emit exception handling info.
   Function *getPersonality() const;
 
+  /// setVariableDbgInfo - Collect information used to emit debugging information
+  /// of a variable.
+  void setVariableDbgInfo(MDNode *N, unsigned S) {
+    VariableDbgInfo.push_back(std::make_pair(N, S));
+  }
+
+  VariableDbgInfoMapTy &getVariableDbgInfo() {  return VariableDbgInfo;  }
+
 }; // End class MachineModuleInfo
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
new file mode 100644
index 000000000000..44813cbdcd95
--- /dev/null
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -0,0 +1,79 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.h -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
+#define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+namespace llvm {
+  class MCSymbol;
+  
+  /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
+  /// for MachO targets.
+  class MachineModuleInfoMachO : public MachineModuleInfoImpl {
+    /// FnStubs - Darwin '$stub' stubs.  The key is something like "Lfoo$stub",
+    /// the value is something like "_foo".
+    DenseMap<const MCSymbol*, const MCSymbol*> FnStubs;
+    
+    /// GVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
+    /// "Lfoo$non_lazy_ptr", the value is something like "_foo".
+    DenseMap<const MCSymbol*, const MCSymbol*> GVStubs;
+    
+    /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
+    /// "Lfoo$non_lazy_ptr", the value is something like "_foo".  Unlike GVStubs
+    /// these are for things with hidden visibility.
+    DenseMap<const MCSymbol*, const MCSymbol*> HiddenGVStubs;
+    
+    virtual void Anchor();  // Out of line virtual method.
+  public:
+    MachineModuleInfoMachO(const MachineModuleInfo &) {}
+    
+    const MCSymbol *&getFnStubEntry(const MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return FnStubs[Sym];
+    }
+
+    const MCSymbol *&getGVStubEntry(const MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return GVStubs[Sym];
+    }
+
+    const MCSymbol *&getHiddenGVStubEntry(const MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return HiddenGVStubs[Sym];
+    }
+    
+    /// Accessor methods to return the set of stubs in sorted order.
+    typedef std::vector<std::pair<const MCSymbol*, const MCSymbol*> >
+      SymbolListTy;
+    
+    SymbolListTy GetFnStubList() const {
+      return GetSortedStubs(FnStubs);
+    }
+    SymbolListTy GetGVStubList() const {
+      return GetSortedStubs(GVStubs);
+    }
+    SymbolListTy GetHiddenGVStubList() const {
+      return GetSortedStubs(HiddenGVStubs);
+    }
+    
+  private:
+    static SymbolListTy
+    GetSortedStubs(const DenseMap<const MCSymbol*, const MCSymbol*> &Map);
+  };
+  
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 26ec239f4c8b..f715c445f8ab 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
-#include <iosfwd>
 
 namespace llvm {
   
@@ -111,7 +110,7 @@ private:
         GlobalValue *GV;          // For MO_GlobalAddress.
         MDNode *Node;             // For MO_Metadata.
       } Val;
-      int64_t Offset;   // An offset from the object.
+      int64_t Offset;             // An offset from the object.
     } OffsetedInfo;
   } Contents;
   
@@ -119,12 +118,6 @@ private:
     TargetFlags = 0;
   }
 public:
-  MachineOperand(const MachineOperand &M) {
-    *this = M;
-  }
-  
-  ~MachineOperand() {}
-  
   /// getType - Returns the MachineOperandType for this operand.
   ///
   MachineOperandType getType() const { return (MachineOperandType)OpKind; }
@@ -139,7 +132,6 @@ public:
   MachineInstr *getParent() { return ParentMI; }
   const MachineInstr *getParent() const { return ParentMI; }
   
-  void print(std::ostream &os, const TargetMachine *TM = 0) const;
   void print(raw_ostream &os, const TargetMachine *TM = 0) const;
 
   //===--------------------------------------------------------------------===//
@@ -164,6 +156,8 @@ public:
   bool isGlobal() const { return OpKind == MO_GlobalAddress; }
   /// isSymbol - Tests if this is a MO_ExternalSymbol operand.
   bool isSymbol() const { return OpKind == MO_ExternalSymbol; }
+  /// isMetadata - Tests if this is a MO_Metadata operand.
+  bool isMetadata() const { return OpKind == MO_Metadata; }
 
   //===--------------------------------------------------------------------===//
   // Accessors for Register Operands
@@ -304,6 +298,8 @@ public:
     return Contents.OffsetedInfo.Val.Node;
   }
   
+  /// getOffset - Return the offset from the symbol in this operand. This always
+  /// returns 0 for ExternalSymbol operands.
   int64_t getOffset() const {
     assert((isGlobal() || isSymbol() || isCPI()) &&
            "Wrong MachineOperand accessor");
@@ -325,7 +321,7 @@ public:
   }
 
   void setOffset(int64_t Offset) {
-    assert((isGlobal() || isSymbol() || isCPI()) &&
+    assert((isGlobal() || isSymbol() || isCPI() || isMetadata()) &&
         "Wrong MachineOperand accessor");
     Contents.OffsetedInfo.Offset = Offset;
   }
@@ -438,28 +434,14 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateES(const char *SymName, int64_t Offset = 0,
+  static MachineOperand CreateES(const char *SymName,
                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_ExternalSymbol);
     Op.Contents.OffsetedInfo.Val.SymbolName = SymName;
-    Op.setOffset(Offset);
+    Op.setOffset(0); // Offset is always 0.
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  const MachineOperand &operator=(const MachineOperand &MO) {
-    OpKind   = MO.OpKind;
-    IsDef    = MO.IsDef;
-    IsImp    = MO.IsImp;
-    IsKill   = MO.IsKill;
-    IsDead   = MO.IsDead;
-    IsUndef  = MO.IsUndef;
-    IsEarlyClobber = MO.IsEarlyClobber;
-    SubReg   = MO.SubReg;
-    ParentMI = MO.ParentMI;
-    Contents = MO.Contents;
-    TargetFlags = MO.TargetFlags;
-    return *this;
-  }
 
   friend class MachineInstr;
   friend class MachineRegisterInfo;
@@ -486,11 +468,6 @@ private:
   void RemoveRegOperandFromRegInfo();
 };
 
-inline std::ostream &operator<<(std::ostream &OS, const MachineOperand &MO) {
-  MO.print(OS, 0);
-  return OS;
-}
-
 inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) {
   MO.print(OS, 0);
   return OS;
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 80c37b39ca0c..18e60200b099 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/iterator.h"
 #include <vector>
 
 namespace llvm {
@@ -256,7 +255,7 @@ public:
   /// returns end().
   template<bool ReturnUses, bool ReturnDefs>
   class defusechain_iterator
-    : public forward_iterator<MachineInstr, ptrdiff_t> {
+    : public std::iterator<std::forward_iterator_tag, MachineInstr, ptrdiff_t> {
     MachineOperand *Op;
     explicit defusechain_iterator(MachineOperand *op) : Op(op) {
       // If the first node isn't one we're interested in, advance to one that
@@ -269,8 +268,10 @@ public:
     }
     friend class MachineRegisterInfo;
   public:
-    typedef forward_iterator<MachineInstr, ptrdiff_t>::reference reference;
-    typedef forward_iterator<MachineInstr, ptrdiff_t>::pointer pointer;
+    typedef std::iterator<std::forward_iterator_tag,
+                          MachineInstr, ptrdiff_t>::reference reference;
+    typedef std::iterator<std::forward_iterator_tag,
+                          MachineInstr, ptrdiff_t>::pointer pointer;
     
     defusechain_iterator(const defusechain_iterator &I) : Op(I.Op) {}
     defusechain_iterator() : Op(0) {}
diff --git a/include/llvm/CodeGen/ObjectCodeEmitter.h b/include/llvm/CodeGen/ObjectCodeEmitter.h
new file mode 100644
index 000000000000..8252e07d84b1
--- /dev/null
+++ b/include/llvm/CodeGen/ObjectCodeEmitter.h
@@ -0,0 +1,178 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.h - Object Code Emitter -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Generalized Object Code Emitter, works with ObjectModule and BinaryObject.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_OBJECTCODEEMITTER_H
+#define LLVM_CODEGEN_OBJECTCODEEMITTER_H
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+namespace llvm {
+
+class BinaryObject;
+class MachineBasicBlock;
+class MachineCodeEmitter;
+class MachineFunction;
+class MachineConstantPool;
+class MachineJumpTableInfo;
+class MachineModuleInfo;
+
+class ObjectCodeEmitter : public MachineCodeEmitter {
+protected:
+
+  /// Binary Object (Section or Segment) we are emitting to.
+  BinaryObject *BO;
+
+  /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+  /// It is filled in by the StartMachineBasicBlock callback and queried by
+  /// the getMachineBasicBlockAddress callback.
+  std::vector<uintptr_t> MBBLocations;
+
+  /// LabelLocations - This vector is a mapping from Label ID's to their 
+  /// address.
+  std::vector<uintptr_t> LabelLocations;
+
+  /// CPLocations - This is a map of constant pool indices to offsets from the
+  /// start of the section for that constant pool index.
+  std::vector<uintptr_t> CPLocations;
+
+  /// CPSections - This is a map of constant pool indices to the Section
+  /// containing the constant pool entry for that index.
+  std::vector<uintptr_t> CPSections;
+
+  /// JTLocations - This is a map of jump table indices to offsets from the
+  /// start of the section for that jump table index.
+  std::vector<uintptr_t> JTLocations;
+
+public:
+  ObjectCodeEmitter();
+  ObjectCodeEmitter(BinaryObject *bo);
+  virtual ~ObjectCodeEmitter();
+
+  /// setBinaryObject - set the BinaryObject we are writting to
+  void setBinaryObject(BinaryObject *bo);
+
+  /// emitByte - This callback is invoked when a byte needs to be 
+  /// written to the data stream, without buffer overflow testing.
+  void emitByte(uint8_t B);
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in little-endian format.
+  void emitWordLE(uint32_t W);
+
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in big-endian format.
+  void emitWordBE(uint32_t W);
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in little-endian format.
+  void emitDWordLE(uint64_t W);
+
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in big-endian format.
+  void emitDWordBE(uint64_t W);
+
+  /// emitAlignment - Move the CurBufferPtr pointer up the the specified
+  /// alignment (saturated to BufferEnd of course).
+  void emitAlignment(unsigned Alignment = 0, uint8_t fill = 0);
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the data stream.
+  void emitULEB128Bytes(uint64_t Value);
+
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the data stream.
+  void emitSLEB128Bytes(uint64_t Value);
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the data stream.
+  void emitString(const std::string &String);
+
+  /// getCurrentPCValue - This returns the address that the next emitted byte
+  /// will be output to.
+  uintptr_t getCurrentPCValue() const;
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  uintptr_t getCurrentPCOffset() const;
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  void addRelocation(const MachineRelocation& relocation);
+
+  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
+  /// during code emission time. The JIT is capable of doing this because it
+  /// creates jump tables or constant pools in memory on the fly while the
+  /// object code emitters rely on a linker to have real addresses and should
+  /// use relocations instead.
+  bool earlyResolveAddresses() const { return false; }
+
+  /// startFunction - This callback is invoked when the specified function is
+  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
+  /// fields.
+  virtual void startFunction(MachineFunction &F) = 0;
+
+  /// finishFunction - This callback is invoked when the specified function has
+  /// finished code generation.  If a buffer overflow has occurred, this method
+  /// returns true (the callee is required to try again), otherwise it returns
+  /// false.
+  virtual bool finishFunction(MachineFunction &F) = 0;
+
+  /// StartMachineBasicBlock - This should be called by the target when a new
+  /// basic block is about to be emitted.  This way the MCE knows where the
+  /// start of the block is, and can implement getMachineBasicBlockAddress.
+  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB);
+
+  /// getMachineBasicBlockAddress - Return the address of the specified
+  /// MachineBasicBlock, only usable after the label for the MBB has been
+  /// emitted.
+  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const;
+
+  /// emitLabel - Emits a label
+  virtual void emitLabel(uint64_t LabelID) = 0;
+
+  /// getLabelAddress - Return the address of the specified LabelID, only usable
+  /// after the LabelID has been emitted.
+  virtual uintptr_t getLabelAddress(uint64_t LabelID) const = 0;
+
+  /// emitJumpTables - Emit all the jump tables for a given jump table info
+  /// record to the appropriate section.
+  virtual void emitJumpTables(MachineJumpTableInfo *MJTI) = 0;
+
+  /// getJumpTableEntryAddress - Return the address of the jump table with index
+  /// 'Index' in the function that last called initJumpTableInfo.
+  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const;
+
+  /// emitConstantPool - For each constant pool entry, figure out which section
+  /// the constant should live in, allocate space for it, and emit it to the 
+  /// Section data buffer.
+  virtual void emitConstantPool(MachineConstantPool *MCP) = 0;
+
+  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+  /// the constant pool that was last emitted with the emitConstantPool method.
+  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const;
+
+  /// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+  /// the constant pool that was last emitted with the emitConstantPool method.
+  virtual uintptr_t getConstantPoolEntrySection(unsigned Index) const;
+
+  /// Specifies the MachineModuleInfo object. This is used for exception handling
+  /// purposes.
+  virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
+  // to be implemented or depreciated with MachineModuleInfo
+
+}; // end class ObjectCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 7f1c16ff8064..1e7115e090bd 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_CODEGEN_PASSES_H
 #define LLVM_CODEGEN_PASSES_H
 
-#include <iosfwd>
 #include <string>
 
 namespace llvm {
@@ -25,6 +24,7 @@ namespace llvm {
   class TargetMachine;
   class TargetLowering;
   class RegisterCoalescer;
+  class raw_ostream;
 
   /// createUnreachableBlockEliminationPass - The LLVM code generator does not
   /// work well with unreachable basic blocks (what live ranges make sense for a
@@ -35,8 +35,8 @@ namespace llvm {
   FunctionPass *createUnreachableBlockEliminationPass();
 
   /// MachineFunctionPrinter pass - This pass prints out the machine function to
-  /// standard error, as a debugging tool.
-  FunctionPass *createMachineFunctionPrinterPass(std::ostream *OS,
+  /// the given stream, as a debugging tool.
+  FunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
                                                  const std::string &Banner ="");
 
   /// MachineLoopInfo pass - This pass is a loop analysis pass.
@@ -87,27 +87,12 @@ namespace llvm {
   ///
   FunctionPass *createRegisterAllocator();
 
-  /// SimpleRegisterAllocation Pass - This pass converts the input machine code
-  /// from SSA form to use explicit registers by spilling every register.  Wow,
-  /// great policy huh?
-  ///
-  FunctionPass *createSimpleRegisterAllocator();
-
   /// LocalRegisterAllocation Pass - This pass register allocates the input code
   /// a basic block at a time, yielding code better than the simple register
   /// allocator, but not as good as a global allocator.
   ///
   FunctionPass *createLocalRegisterAllocator();
 
-  /// BigBlockRegisterAllocation Pass - The BigBlock register allocator
-  /// munches single basic blocks at a time, like the local register
-  /// allocator.  While the BigBlock allocator is a little slower, and uses
-  /// somewhat more memory than the local register allocator, it tends to
-  /// yield the best allocations (of any of the allocators) for blocks that
-  /// have hundreds or thousands of instructions in sequence.
-  ///
-  FunctionPass *createBigBlockRegisterAllocator();
-
   /// LinearScanRegisterAllocation Pass - This pass implements the linear scan
   /// register allocation algorithm, a global register allocator.
   ///
@@ -155,11 +140,6 @@ namespace llvm {
   /// by seeing if the labels map to the same reduced label.
   FunctionPass *createDebugLabelFoldingPass();
 
-  /// MachineCodeDeletion Pass - This pass deletes all of the machine code for
-  /// the current function, which should happen after the function has been
-  /// emitted to a .s file or to memory.
-  FunctionPass *createMachineCodeDeleter();
-
   /// getRegisterAllocator - This creates an instance of the register allocator
   /// for the Sparc.
   FunctionPass *getRegisterAllocator(TargetMachine &T);
@@ -180,7 +160,7 @@ namespace llvm {
   
   /// Creates a pass to print GC metadata.
   /// 
-  FunctionPass *createGCInfoPrinter(std::ostream &OS);
+  FunctionPass *createGCInfoPrinter(raw_ostream &OS);
   
   /// createMachineLICMPass - This pass performs LICM on machine instructions.
   /// 
@@ -207,6 +187,10 @@ namespace llvm {
   /// adapted to code generation.  Required if using dwarf exception handling.
   FunctionPass *createDwarfEHPass(const TargetLowering *tli, bool fast);
 
+  /// createSjLjEHPass - This pass adapts exception handling code to use
+  /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
+  FunctionPass *createSjLjEHPass(const TargetLowering *tli);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index 3ad2502fe08a..c6be645040a8 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -25,18 +25,17 @@ namespace llvm {
   /// stack frame (e.g., a spill slot), below the stack frame (e.g., argument
   /// space), or constant pool.
   class PseudoSourceValue : public Value {
+  private:
+    /// printCustom - Implement printing for PseudoSourceValue. This is called
+    /// from Value::print or Value's operator<<.
+    ///
+    virtual void printCustom(raw_ostream &O) const;
+
   public:
     PseudoSourceValue();
 
-    /// dump - Support for debugging, callable in GDB: V->dump()
-    //
-    virtual void dump() const;
-
-    /// print - Implement operator<< on PseudoSourceValue.
-    ///
-    virtual void print(raw_ostream &OS) const;
-
-    /// isConstant - Test whether this PseudoSourceValue has a constant value.
+    /// isConstant - Test whether the memory pointed to by this
+    /// PseudoSourceValue has a constant value.
     ///
     virtual bool isConstant(const MachineFrameInfo *) const;
 
@@ -52,18 +51,21 @@ namespace llvm {
     /// e.g., a spill slot.
     static const PseudoSourceValue *getFixedStack(int FI);
 
-    /// A source value referencing the area below the stack frame of a function,
-    /// e.g., the argument space.
+    /// A pseudo source value referencing the area below the stack frame of
+    /// a function, e.g., the argument space.
     static const PseudoSourceValue *getStack();
 
-    /// A source value referencing the global offset table (or something the
-    /// like).
+    /// A pseudo source value referencing the global offset table
+    /// (or something the like).
     static const PseudoSourceValue *getGOT();
 
-    /// A SV referencing the constant pool
+    /// A pseudo source value referencing the constant pool. Since constant
+    /// pools are constant, this doesn't need to identify a specific constant
+    /// pool entry.
     static const PseudoSourceValue *getConstantPool();
 
-    /// A SV referencing the jump table
+    /// A pseudo source value referencing a jump table. Since jump tables are
+    /// constant, this doesn't need to identify a specific jump table.
     static const PseudoSourceValue *getJumpTable();
   };
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h
index a08e42a5d34b..100e357654fb 100644
--- a/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/include/llvm/CodeGen/RegAllocRegistry.h
@@ -34,7 +34,9 @@ public:
 
   RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
   : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
-  { Registry.Add(this); }
+  { 
+     Registry.Add(this); 
+  }
   ~RegisterRegAlloc() { Registry.Remove(this); }
   
 
diff --git a/include/llvm/CodeGen/RegisterCoalescer.h b/include/llvm/CodeGen/RegisterCoalescer.h
index 79dd9db0f11a..1490aa0172fb 100644
--- a/include/llvm/CodeGen/RegisterCoalescer.h
+++ b/include/llvm/CodeGen/RegisterCoalescer.h
@@ -42,7 +42,7 @@ namespace llvm {
 
     /// Reset state.  Can be used to allow a coalescer run by
     /// PassManager to be run again by the register allocator.
-    virtual void reset(MachineFunction &mf) {};
+    virtual void reset(MachineFunction &mf) {}
 
     /// Register allocators must call this from their own
     /// getAnalysisUsage to cover the case where the coalescer is not
@@ -51,7 +51,7 @@ namespace llvm {
     /// which to invalidate when running the register allocator or any
     /// pass that might call coalescing.  The long-term solution is to
     /// allow hierarchies of PassManagers.
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {};
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
   }; 
 
   /// An abstract interface for register allocators to interact with
@@ -68,7 +68,7 @@ namespace llvm {
   ///
   ///   public:
   ///     LinearScanRegallocQuery(LiveIntervals &intervals) 
-  ///         : li(intervals) {};
+  ///         : li(intervals) {}
   ///
   ///     /// This is pretty slow and conservative, but since linear scan
   ///     /// allocation doesn't pre-compute interference information it's
@@ -85,14 +85,14 @@ namespace llvm {
   ///           interferences.insert(&iv->second);
   ///         }
   ///       }
-  ///     };
+  ///     }
   ///
   ///     /// This is *really* slow and stupid.  See above.
   ///     int getNumberOfInterferences(const LiveInterval &a) const {
   ///       IntervalSet intervals;
   ///       getInterferences(intervals, a);
   ///       return intervals.size();
-  ///     };
+  ///     }
   ///   };  
   ///
   ///   In the allocator:
@@ -108,14 +108,14 @@ namespace llvm {
   public:
     typedef SmallPtrSet<const LiveInterval *, 8> IntervalSet;
 
-    virtual ~RegallocQuery() {};
+    virtual ~RegallocQuery() {}
     
     /// Return whether two live ranges interfere.
     virtual bool interfere(const LiveInterval &a,
                            const LiveInterval &b) const {
       // A naive test
       return a.overlaps(b);
-    };
+    }
 
     /// Return the set of intervals that interfere with this one.
     virtual void getInterferences(IntervalSet &interferences,
@@ -129,7 +129,7 @@ namespace llvm {
     /// coalescing or other modifications.
     virtual void updateDataForMerge(const LiveInterval &a,
                                     const LiveInterval &b,
-                                    const MachineInstr &copy) {};
+                                    const MachineInstr &copy) {}
 
     /// Allow the register allocator to communicate when it doesn't
     /// want a copy coalesced.  This may be due to assumptions made by
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 458c2e4487f9..84b726d73fb3 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -19,7 +19,6 @@
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
 
@@ -69,14 +68,6 @@ class RegScavenger {
   /// available, unset means the register is currently being used.
   BitVector RegsAvailable;
 
-  /// CurrDist - Distance from MBB entry to the current instruction MBBI.
-  ///
-  unsigned CurrDist;
-
-  /// DistanceMap - Keep track the distance of a MI from the start of the
-  /// current basic block.
-  DenseMap<MachineInstr*, unsigned> DistanceMap;
-
 public:
   RegScavenger()
     : MBB(NULL), NumPhysRegs(0), Tracking(false),
@@ -86,56 +77,30 @@ public:
   /// basic block.
   void enterBasicBlock(MachineBasicBlock *mbb);
 
-  /// forward / backward - Move the internal MBB iterator and update register
-  /// states.
+  /// initRegState - allow resetting register state info for multiple
+  /// passes over/within the same function.
+  void initRegState();
+
+  /// forward - Move the internal MBB iterator and update register states.
   void forward();
-  void backward();
 
-  /// forward / backward - Move the internal MBB iterator and update register
-  /// states until it has processed the specific iterator.
+  /// forward - Move the internal MBB iterator and update register states until
+  /// it has processed the specific iterator.
   void forward(MachineBasicBlock::iterator I) {
     if (!Tracking && MBB->begin() != I) forward();
     while (MBBI != I) forward();
   }
-  void backward(MachineBasicBlock::iterator I) {
-    while (MBBI != I) backward();
-  }
 
   /// skipTo - Move the internal MBB iterator but do not update register states.
   ///
   void skipTo(MachineBasicBlock::iterator I) { MBBI = I; }
 
-  /// isReserved - Returns true if a register is reserved. It is never "unused".
-  bool isReserved(unsigned Reg) const { return ReservedRegs[Reg]; }
-
-  /// isUsed / isUsed - Test if a register is currently being used.
-  ///
-  bool isUsed(unsigned Reg) const   { return !RegsAvailable[Reg]; }
-  bool isUnused(unsigned Reg) const { return RegsAvailable[Reg]; }
-
   /// getRegsUsed - return all registers currently in use in used.
   void getRegsUsed(BitVector &used, bool includeReserved);
 
-  /// setUsed / setUnused - Mark the state of one or a number of registers.
-  ///
-  void setUsed(unsigned Reg);
-  void setUsed(BitVector &Regs) {
-    RegsAvailable &= ~Regs;
-  }
-  void setUnused(unsigned Reg, const MachineInstr *MI);
-  void setUnused(BitVector &Regs) {
-    RegsAvailable |= Regs;
-  }
-
-  /// FindUnusedReg - Find a unused register of the specified register class
-  /// from the specified set of registers. It return 0 is none is found.
-  unsigned FindUnusedReg(const TargetRegisterClass *RegClass,
-                         const BitVector &Candidates) const;
-
   /// FindUnusedReg - Find a unused register of the specified register class.
-  /// Exclude callee saved registers if directed. It return 0 is none is found.
-  unsigned FindUnusedReg(const TargetRegisterClass *RegClass,
-                         bool ExCalleeSaved = false) const;
+  /// Return 0 if none is found.
+  unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const;
 
   /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of
   /// ScavengingFrameIndex.
@@ -152,16 +117,43 @@ public:
     return scavengeRegister(RegClass, MBBI, SPAdj);
   }
 
+  /// setUsed - Tell the scavenger a register is used.
+  ///
+  void setUsed(unsigned Reg);
 private:
-  /// restoreScavengedReg - Restore scavenged by loading it back from the
-  /// emergency spill slot. Mark it used.
-  void restoreScavengedReg();
+  /// isReserved - Returns true if a register is reserved. It is never "unused".
+  bool isReserved(unsigned Reg) const { return ReservedRegs.test(Reg); }
+
+  /// isUsed / isUnused - Test if a register is currently being used.
+  ///
+  bool isUsed(unsigned Reg) const   { return !RegsAvailable.test(Reg); }
+  bool isUnused(unsigned Reg) const { return RegsAvailable.test(Reg); }
+
+  /// isAliasUsed - Is Reg or an alias currently in use?
+  bool isAliasUsed(unsigned Reg) const;
+
+  /// setUsed / setUnused - Mark the state of one or a number of registers.
+  ///
+  void setUsed(BitVector &Regs) {
+    RegsAvailable &= ~Regs;
+  }
+  void setUnused(BitVector &Regs) {
+    RegsAvailable |= Regs;
+  }
+
+  /// Add Reg and all its sub-registers to BV.
+  void addRegWithSubRegs(BitVector &BV, unsigned Reg);
+
+  /// Add Reg and its aliases to BV.
+  void addRegWithAliases(BitVector &BV, unsigned Reg);
+
+  unsigned findSurvivorReg(MachineBasicBlock::iterator MI,
+                           BitVector &Candidates,
+                           unsigned InstrLimit,
+                           MachineBasicBlock::iterator &UseMI);
 
-  MachineInstr *findFirstUse(MachineBasicBlock *MBB,
-                             MachineBasicBlock::iterator I, unsigned Reg,
-                             unsigned &Dist);
 };
- 
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 7f2c8bc36840..7a40f0233d57 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -224,6 +224,11 @@ namespace RTLIB {
     O_F32,
     O_F64,
 
+    // MEMORY
+    MEMCPY,
+    MEMSET,
+    MEMMOVE,
+
     // EXCEPTION HANDLING
     UNWIND_RESUME,
 
@@ -232,27 +237,27 @@ namespace RTLIB {
 
   /// getFPEXT - Return the FPEXT_*_* value for the given types, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getFPEXT(MVT OpVT, MVT RetVT);
+  Libcall getFPEXT(EVT OpVT, EVT RetVT);
 
   /// getFPROUND - Return the FPROUND_*_* value for the given types, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getFPROUND(MVT OpVT, MVT RetVT);
+  Libcall getFPROUND(EVT OpVT, EVT RetVT);
 
   /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getFPTOSINT(MVT OpVT, MVT RetVT);
+  Libcall getFPTOSINT(EVT OpVT, EVT RetVT);
 
   /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getFPTOUINT(MVT OpVT, MVT RetVT);
+  Libcall getFPTOUINT(EVT OpVT, EVT RetVT);
 
   /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getSINTTOFP(MVT OpVT, MVT RetVT);
+  Libcall getSINTTOFP(EVT OpVT, EVT RetVT);
 
   /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getUINTTOFP(MVT OpVT, MVT RetVT);
+  Libcall getUINTTOFP(EVT OpVT, EVT RetVT);
 }
 }
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 237d491e8262..39563f733068 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/PointerIntPair.h"
 
 namespace llvm {
+  class AliasAnalysis;
   class SUnit;
   class MachineConstantPool;
   class MachineFunction;
@@ -145,6 +146,11 @@ namespace llvm {
       return Latency;
     }
 
+    /// setLatency - Set the latency for this edge.
+    void setLatency(unsigned Lat) {
+      Latency = Lat;
+    }
+
     //// getSUnit - Return the SUnit to which this edge points.
     SUnit *getSUnit() const {
       return Dep.getPointer();
@@ -238,10 +244,10 @@ namespace llvm {
     unsigned NodeNum;                   // Entry # of node in the node vector.
     unsigned NodeQueueId;               // Queue id of node.
     unsigned short Latency;             // Node latency.
-    short NumPreds;                     // # of SDep::Data preds.
-    short NumSuccs;                     // # of SDep::Data sucss.
-    short NumPredsLeft;                 // # of preds not scheduled.
-    short NumSuccsLeft;                 // # of succs not scheduled.
+    unsigned NumPreds;                  // # of SDep::Data preds.
+    unsigned NumSuccs;                  // # of SDep::Data sucss.
+    unsigned NumPredsLeft;              // # of preds not scheduled.
+    unsigned NumSuccsLeft;              // # of succs not scheduled.
     bool isTwoAddress     : 1;          // Is a two-address instruction.
     bool isCommutable     : 1;          // Is a commutable instruction.
     bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
@@ -429,8 +435,8 @@ namespace llvm {
 
   class ScheduleDAG {
   public:
-    MachineBasicBlock *BB;                // The block in which to insert instructions.
-    MachineBasicBlock::iterator InsertPos;// The position to insert instructions.
+    MachineBasicBlock *BB;          // The block in which to insert instructions
+    MachineBasicBlock::iterator InsertPos;// The position to insert instructions
     const TargetMachine &TM;              // Target processor
     const TargetInstrInfo *TII;           // Target instruction information
     const TargetRegisterInfo *TRI;        // Target processor register info
@@ -456,7 +462,8 @@ namespace llvm {
     /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
     /// according to the order specified in Sequence.
     ///
-    virtual MachineBasicBlock *EmitSchedule() = 0;
+    virtual MachineBasicBlock*
+    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) = 0;
 
     void dumpSchedule() const;
 
@@ -484,19 +491,25 @@ namespace llvm {
     /// BuildSchedGraph - Build SUnits and set up their Preds and Succs
     /// to form the scheduling dependency graph.
     ///
-    virtual void BuildSchedGraph() = 0;
+    virtual void BuildSchedGraph(AliasAnalysis *AA) = 0;
 
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU) = 0;
 
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { };
+
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
     ///
     virtual void Schedule() = 0;
 
-    /// ForceUnitLatencies - Return true if all scheduling edges should be given a
-    /// latency value of one.  The default is to return false; schedulers may
+    /// ForceUnitLatencies - Return true if all scheduling edges should be given
+    /// a latency value of one.  The default is to return false; schedulers may
     /// override this as needed.
     virtual bool ForceUnitLatencies() const { return false; }
 
@@ -504,27 +517,11 @@ namespace llvm {
     ///
     void EmitNoop();
 
-    void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
-
     void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
-
-  private:
-    /// EmitLiveInCopy - Emit a copy for a live in physical register. If the
-    /// physical register has only a single copy use, then coalesced the copy
-    /// if possible.
-    void EmitLiveInCopy(MachineBasicBlock *MBB,
-                        MachineBasicBlock::iterator &InsertPos,
-                        unsigned VirtReg, unsigned PhysReg,
-                        const TargetRegisterClass *RC,
-                        DenseMap<MachineInstr*, unsigned> &CopyRegMap);
-
-    /// EmitLiveInCopies - If this is the first basic block in the function,
-    /// and if it has live ins that need to be copied into vregs, emit the
-    /// copies into the top of the block.
-    void EmitLiveInCopies(MachineBasicBlock *MBB);
   };
 
-  class SUnitIterator : public forward_iterator<SUnit, ptrdiff_t> {
+  class SUnitIterator : public std::iterator<std::forward_iterator_tag,
+                                             SUnit, ptrdiff_t> {
     SUnit *Node;
     unsigned Operand;
 
@@ -536,7 +533,7 @@ namespace llvm {
     bool operator!=(const SUnitIterator& x) const { return !operator==(x); }
 
     const SUnitIterator &operator=(const SUnitIterator &I) {
-      assert(I.Node == Node && "Cannot assign iterators to two different nodes!");
+      assert(I.Node==Node && "Cannot assign iterators to two different nodes!");
       Operand = I.Operand;
       return *this;
     }
diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 369882d258e3..09e3e8861316 100644
--- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -43,6 +43,11 @@ public:
     return NoHazard;
   }
 
+  /// Reset - This callback is invoked when a new block of
+  /// instructions is about to be schedule. The hazard state should be
+  /// set to an initialized state.
+  virtual void Reset() {}
+
   /// EmitInstruction - This callback is invoked when an instruction is
   /// emitted, to advance the hazard state.
   virtual void EmitInstruction(SUnit *) {}
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 8abd78dd2abb..e0198ef2e3f4 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 #include <vector>
@@ -37,7 +38,7 @@ class FunctionLoweringInfo;
 
 template<> struct ilist_traits<SDNode> : public ilist_default_traits<SDNode> {
 private:
-  mutable ilist_node<SDNode> Sentinel;
+  mutable ilist_half_node<SDNode> Sentinel;
 public:
   SDNode *createSentinel() const {
     return static_cast<SDNode*>(&Sentinel);
@@ -78,6 +79,7 @@ class SelectionDAG {
   FunctionLoweringInfo &FLI;
   MachineModuleInfo *MMI;
   DwarfWriter *DW;
+  LLVMContext* Context;
 
   /// EntryNode - The starting token.
   SDNode EntryNode;
@@ -98,7 +100,7 @@ class SelectionDAG {
   NodeAllocatorType NodeAllocator;
 
   /// CSEMap - This structure is used to memoize nodes, automatically performing
-  /// CSE with existing nodes with a duplicate is requested.
+  /// CSE with existing nodes when a duplicate is requested.
   FoldingSet<SDNode> CSEMap;
 
   /// OperandAllocator - Pool allocation for machine-opcode SDNode operands.
@@ -138,6 +140,7 @@ public:
   FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; }
   MachineModuleInfo *getMachineModuleInfo() const { return MMI; }
   DwarfWriter *getDwarfWriter() const { return DW; }
+  LLVMContext *getContext() const {return Context; }
 
   /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
   ///
@@ -242,70 +245,70 @@ public:
 
   /// getVTList - Return an SDVTList that represents the list of values
   /// specified.
-  SDVTList getVTList(MVT VT);
-  SDVTList getVTList(MVT VT1, MVT VT2);
-  SDVTList getVTList(MVT VT1, MVT VT2, MVT VT3);
-  SDVTList getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4);
-  SDVTList getVTList(const MVT *VTs, unsigned NumVTs);
+  SDVTList getVTList(EVT VT);
+  SDVTList getVTList(EVT VT1, EVT VT2);
+  SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3);
+  SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4);
+  SDVTList getVTList(const EVT *VTs, unsigned NumVTs);
 
   //===--------------------------------------------------------------------===//
   // Node creation methods.
   //
-  SDValue getConstant(uint64_t Val, MVT VT, bool isTarget = false);
-  SDValue getConstant(const APInt &Val, MVT VT, bool isTarget = false);
-  SDValue getConstant(const ConstantInt &Val, MVT VT, bool isTarget = false);
+  SDValue getConstant(uint64_t Val, EVT VT, bool isTarget = false);
+  SDValue getConstant(const APInt &Val, EVT VT, bool isTarget = false);
+  SDValue getConstant(const ConstantInt &Val, EVT VT, bool isTarget = false);
   SDValue getIntPtrConstant(uint64_t Val, bool isTarget = false);
-  SDValue getTargetConstant(uint64_t Val, MVT VT) {
+  SDValue getTargetConstant(uint64_t Val, EVT VT) {
     return getConstant(Val, VT, true);
   }
-  SDValue getTargetConstant(const APInt &Val, MVT VT) {
+  SDValue getTargetConstant(const APInt &Val, EVT VT) {
     return getConstant(Val, VT, true);
   }
-  SDValue getTargetConstant(const ConstantInt &Val, MVT VT) {
+  SDValue getTargetConstant(const ConstantInt &Val, EVT VT) {
     return getConstant(Val, VT, true);
   }
-  SDValue getConstantFP(double Val, MVT VT, bool isTarget = false);
-  SDValue getConstantFP(const APFloat& Val, MVT VT, bool isTarget = false);
-  SDValue getConstantFP(const ConstantFP &CF, MVT VT, bool isTarget = false);
-  SDValue getTargetConstantFP(double Val, MVT VT) {
+  SDValue getConstantFP(double Val, EVT VT, bool isTarget = false);
+  SDValue getConstantFP(const APFloat& Val, EVT VT, bool isTarget = false);
+  SDValue getConstantFP(const ConstantFP &CF, EVT VT, bool isTarget = false);
+  SDValue getTargetConstantFP(double Val, EVT VT) {
     return getConstantFP(Val, VT, true);
   }
-  SDValue getTargetConstantFP(const APFloat& Val, MVT VT) {
+  SDValue getTargetConstantFP(const APFloat& Val, EVT VT) {
     return getConstantFP(Val, VT, true);
   }
-  SDValue getTargetConstantFP(const ConstantFP &Val, MVT VT) {
+  SDValue getTargetConstantFP(const ConstantFP &Val, EVT VT) {
     return getConstantFP(Val, VT, true);
   }
-  SDValue getGlobalAddress(const GlobalValue *GV, MVT VT,
+  SDValue getGlobalAddress(const GlobalValue *GV, EVT VT,
                            int64_t offset = 0, bool isTargetGA = false,
                            unsigned char TargetFlags = 0);
-  SDValue getTargetGlobalAddress(const GlobalValue *GV, MVT VT,
+  SDValue getTargetGlobalAddress(const GlobalValue *GV, EVT VT,
                                  int64_t offset = 0,
                                  unsigned char TargetFlags = 0) {
     return getGlobalAddress(GV, VT, offset, true, TargetFlags);
   }
-  SDValue getFrameIndex(int FI, MVT VT, bool isTarget = false);
-  SDValue getTargetFrameIndex(int FI, MVT VT) {
+  SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false);
+  SDValue getTargetFrameIndex(int FI, EVT VT) {
     return getFrameIndex(FI, VT, true);
   }
-  SDValue getJumpTable(int JTI, MVT VT, bool isTarget = false,
+  SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false,
                        unsigned char TargetFlags = 0);
-  SDValue getTargetJumpTable(int JTI, MVT VT, unsigned char TargetFlags = 0) {
+  SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) {
     return getJumpTable(JTI, VT, true, TargetFlags);
   }
-  SDValue getConstantPool(Constant *C, MVT VT,
+  SDValue getConstantPool(Constant *C, EVT VT,
                           unsigned Align = 0, int Offs = 0, bool isT=false,
                           unsigned char TargetFlags = 0);
-  SDValue getTargetConstantPool(Constant *C, MVT VT,
+  SDValue getTargetConstantPool(Constant *C, EVT VT,
                                 unsigned Align = 0, int Offset = 0,
                                 unsigned char TargetFlags = 0) {
     return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
   }
-  SDValue getConstantPool(MachineConstantPoolValue *C, MVT VT,
+  SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT,
                           unsigned Align = 0, int Offs = 0, bool isT=false,
                           unsigned char TargetFlags = 0);
   SDValue getTargetConstantPool(MachineConstantPoolValue *C,
-                                  MVT VT, unsigned Align = 0,
+                                  EVT VT, unsigned Align = 0,
                                   int Offset = 0, unsigned char TargetFlags=0) {
     return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
   }
@@ -313,15 +316,14 @@ public:
   // to provide debug info for the BB at that time, so keep this one around.
   SDValue getBasicBlock(MachineBasicBlock *MBB);
   SDValue getBasicBlock(MachineBasicBlock *MBB, DebugLoc dl);
-  SDValue getExternalSymbol(const char *Sym, MVT VT);
-  SDValue getExternalSymbol(const char *Sym, DebugLoc dl, MVT VT);
-  SDValue getTargetExternalSymbol(const char *Sym, MVT VT,
+  SDValue getExternalSymbol(const char *Sym, EVT VT);
+  SDValue getExternalSymbol(const char *Sym, DebugLoc dl, EVT VT);
+  SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
                                   unsigned char TargetFlags = 0);
-  SDValue getArgFlags(ISD::ArgFlagsTy Flags);
-  SDValue getValueType(MVT);
-  SDValue getRegister(unsigned Reg, MVT VT);
+  SDValue getValueType(EVT);
+  SDValue getRegister(unsigned Reg, EVT VT);
   SDValue getDbgStopPoint(DebugLoc DL, SDValue Root, 
-                          unsigned Line, unsigned Col, Value *CU);
+                          unsigned Line, unsigned Col, MDNode *CU);
   SDValue getLabel(unsigned Opcode, DebugLoc dl, SDValue Root,
                    unsigned LabelID);
 
@@ -348,7 +350,7 @@ public:
     return getNode(ISD::CopyToReg, dl, VTs, Ops, Flag.getNode() ? 4 : 3);
   }
 
-  SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, MVT VT) {
+  SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT) {
     SDVTList VTs = getVTList(VT, MVT::Other);
     SDValue Ops[] = { Chain, getRegister(Reg, VT) };
     return getNode(ISD::CopyFromReg, dl, VTs, Ops, 2);
@@ -357,7 +359,7 @@ public:
   // This version of the getCopyFromReg method takes an extra operand, which
   // indicates that there is potentially an incoming flag value (if Flag is not
   // null) and that there should be a flag result.
-  SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, MVT VT,
+  SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT,
                            SDValue Flag) {
     SDVTList VTs = getVTList(VT, MVT::Other, MVT::Flag);
     SDValue Ops[] = { Chain, getRegister(Reg, VT), Flag };
@@ -368,7 +370,7 @@ public:
 
   /// Returns the ConvertRndSat Note: Avoid using this node because it may
   /// disappear in the future and most targets don't support it.
-  SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
+  SDValue getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
                            SDValue STy,
                            SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
   
@@ -376,15 +378,23 @@ public:
   /// elements in VT, which must be a vector type, must match the number of
   /// mask elements NumElts.  A integer mask element equal to -1 is treated as
   /// undefined.
-  SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, 
+  SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, 
                            const int *MaskElts);
 
+  /// getSExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// integer type VT, by either sign-extending or truncating it.
+  SDValue getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT);
+
+  /// getZExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// integer type VT, by either zero-extending or truncating it.
+  SDValue getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT);
+
   /// getZeroExtendInReg - Return the expression required to zero extend the Op
   /// value assuming it was the smaller SrcTy value.
-  SDValue getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT SrcTy);
+  SDValue getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT SrcTy);
 
   /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
-  SDValue getNOT(DebugLoc DL, SDValue Val, MVT VT);
+  SDValue getNOT(DebugLoc DL, SDValue Val, EVT VT);
 
   /// getCALLSEQ_START - Return a new CALLSEQ_START node, which always must have
   /// a flag result (to ensure it's not CSE'd).  CALLSEQ_START does not have a
@@ -413,36 +423,36 @@ public:
   }
 
   /// getUNDEF - Return an UNDEF node.  UNDEF does not have a useful DebugLoc.
-  SDValue getUNDEF(MVT VT) {
+  SDValue getUNDEF(EVT VT) {
     return getNode(ISD::UNDEF, DebugLoc::getUnknownLoc(), VT);
   }
 
   /// getGLOBAL_OFFSET_TABLE - Return a GLOBAL_OFFSET_TABLE node.  This does
   /// not have a useful DebugLoc.
-  SDValue getGLOBAL_OFFSET_TABLE(MVT VT) {
+  SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
     return getNode(ISD::GLOBAL_OFFSET_TABLE, DebugLoc::getUnknownLoc(), VT);
   }
 
   /// getNode - Gets or creates the specified node.
   ///
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, SDValue N);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, SDValue N1, SDValue N2);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                   SDValue N1, SDValue N2, SDValue N3);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                   SDValue N1, SDValue N2, SDValue N3, SDValue N4);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                   SDValue N1, SDValue N2, SDValue N3, SDValue N4,
                   SDValue N5);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                   const SDUse *Ops, unsigned NumOps);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                   const SDValue *Ops, unsigned NumOps);
   SDValue getNode(unsigned Opcode, DebugLoc DL,
-                  const std::vector<MVT> &ResultTys,
+                  const std::vector<EVT> &ResultTys,
                   const SDValue *Ops, unsigned NumOps);
-  SDValue getNode(unsigned Opcode, DebugLoc DL, const MVT *VTs, unsigned NumVTs,
+  SDValue getNode(unsigned Opcode, DebugLoc DL, const EVT *VTs, unsigned NumVTs,
                   const SDValue *Ops, unsigned NumOps);
   SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
                   const SDValue *Ops, unsigned NumOps);
@@ -458,6 +468,12 @@ public:
                   SDValue N1, SDValue N2, SDValue N3, SDValue N4,
                   SDValue N5);
 
+  /// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+  /// the incoming stack arguments to be loaded from the stack. This is
+  /// used in tail call lowering to protect stack arguments from being
+  /// clobbered.
+  SDValue getStackArgumentTokenFactor(SDValue Chain);
+
   SDValue getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
                     SDValue Size, unsigned Align, bool AlwaysInline,
                     const Value *DstSV, uint64_t DstSVOff,
@@ -475,7 +491,7 @@ public:
   /// getSetCC - Helper function to make it easier to build SetCC's if you just
   /// have an ISD::CondCode instead of an SDValue.
   ///
-  SDValue getSetCC(DebugLoc DL, MVT VT, SDValue LHS, SDValue RHS,
+  SDValue getSetCC(DebugLoc DL, EVT VT, SDValue LHS, SDValue RHS,
                    ISD::CondCode Cond) {
     return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
   }
@@ -483,7 +499,7 @@ public:
   /// getVSetCC - Helper function to make it easier to build VSetCC's nodes
   /// if you just have an ISD::CondCode instead of an SDValue.
   ///
-  SDValue getVSetCC(DebugLoc DL, MVT VT, SDValue LHS, SDValue RHS,
+  SDValue getVSetCC(DebugLoc DL, EVT VT, SDValue LHS, SDValue RHS,
                     ISD::CondCode Cond) {
     return getNode(ISD::VSETCC, DL, VT, LHS, RHS, getCondCode(Cond));
   }
@@ -499,82 +515,89 @@ public:
 
   /// getVAArg - VAArg produces a result and token chain, and takes a pointer
   /// and a source value as input.
-  SDValue getVAArg(MVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
+  SDValue getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
                    SDValue SV);
 
   /// getAtomic - Gets a node for an atomic op, produces result and chain and
   /// takes 3 operands
-  SDValue getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Chain,
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Cmp, SDValue Swp, const Value* PtrVal,
                     unsigned Alignment=0);
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
+                    SDValue Ptr, SDValue Cmp, SDValue Swp,
+                    MachineMemOperand *MMO);
 
   /// getAtomic - Gets a node for an atomic op, produces result and chain and
   /// takes 2 operands.
-  SDValue getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Chain,
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Val, const Value* PtrVal,
                     unsigned Alignment = 0);
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
+                    SDValue Ptr, SDValue Val,
+                    MachineMemOperand *MMO);
 
   /// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a
-  /// result and takes a list of operands.
+  /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
+  /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
+  /// less than FIRST_TARGET_MEMORY_OPCODE.
   SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
-                              const MVT *VTs, unsigned NumVTs,
+                              const EVT *VTs, unsigned NumVTs,
                               const SDValue *Ops, unsigned NumOps,
-                              MVT MemVT, const Value *srcValue, int SVOff,
+                              EVT MemVT, const Value *srcValue, int SVOff,
                               unsigned Align = 0, bool Vol = false,
                               bool ReadMem = true, bool WriteMem = true);
 
   SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                               const SDValue *Ops, unsigned NumOps,
-                              MVT MemVT, const Value *srcValue, int SVOff,
+                              EVT MemVT, const Value *srcValue, int SVOff,
                               unsigned Align = 0, bool Vol = false,
                               bool ReadMem = true, bool WriteMem = true);
 
+  SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                              const SDValue *Ops, unsigned NumOps,
+                              EVT MemVT, MachineMemOperand *MMO);
+
   /// getMergeValues - Create a MERGE_VALUES node from the given operands.
   SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, DebugLoc dl);
 
-  /// getCall - Create a CALL node from the given information.
-  ///
-  SDValue getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
-                  bool IsTailCall, bool isInreg, SDVTList VTs,
-                  const SDValue *Operands, unsigned NumOperands,
-                  unsigned NumFixedArgs);
-
   /// getLoad - Loads are not normal binary operators: their result type is not
   /// determined by their operands, and they produce a value AND a token chain.
   ///
-  SDValue getLoad(MVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
+  SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
                     const Value *SV, int SVOffset, bool isVolatile=false,
                     unsigned Alignment=0);
-  SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT,
+  SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                        SDValue Chain, SDValue Ptr, const Value *SV,
-                       int SVOffset, MVT EVT, bool isVolatile=false,
+                       int SVOffset, EVT MemVT, bool isVolatile=false,
                        unsigned Alignment=0);
   SDValue getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
                            SDValue Offset, ISD::MemIndexedMode AM);
   SDValue getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType,
-                    MVT VT, SDValue Chain,
-                    SDValue Ptr, SDValue Offset,
-                    const Value *SV, int SVOffset, MVT EVT,
-                    bool isVolatile=false, unsigned Alignment=0);
+                  EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset,
+                  const Value *SV, int SVOffset, EVT MemVT,
+                  bool isVolatile=false, unsigned Alignment=0);
+  SDValue getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType,
+                  EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset,
+                  EVT MemVT, MachineMemOperand *MMO);
 
   /// getStore - Helper function to build ISD::STORE nodes.
   ///
   SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
                      const Value *SV, int SVOffset, bool isVolatile=false,
                      unsigned Alignment=0);
+  SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
+                   MachineMemOperand *MMO);
   SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
-                          const Value *SV, int SVOffset, MVT TVT,
+                          const Value *SV, int SVOffset, EVT TVT,
                           bool isVolatile=false, unsigned Alignment=0);
+  SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
+                        EVT TVT, MachineMemOperand *MMO);
   SDValue getIndexedStore(SDValue OrigStoe, DebugLoc dl, SDValue Base,
                            SDValue Offset, ISD::MemIndexedMode AM);
 
   /// getSrcValue - Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
-  /// getMemOperand - Construct a node to track a memory reference
-  /// through the backend.
-  SDValue getMemOperand(const MachineMemOperand &MO);
-
   /// getShiftAmountOperand - Return the specified value casted to
   /// the target's desired shift amount type.
   SDValue getShiftAmountOperand(SDValue Op);
@@ -600,91 +623,104 @@ public:
   /// specified node to have the specified return type, Target opcode, and
   /// operands.  Note that target opcodes are stored as
   /// ~TargetOpcode in the node opcode field.  The resultant node is returned.
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT, SDValue Op1);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT,
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
                        SDValue Op1, SDValue Op2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT,
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
                        SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT,
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
                        const SDValue *Ops, unsigned NumOps);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, MVT VT2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1,
-                       MVT VT2, const SDValue *Ops, unsigned NumOps);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1,
-                       MVT VT2, MVT VT3, const SDValue *Ops, unsigned NumOps);
-  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, MVT VT1,
-                       MVT VT2, MVT VT3, MVT VT4, const SDValue *Ops,
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, EVT VT2);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, const SDValue *Ops, unsigned NumOps);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps);
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
+                       EVT VT2, EVT VT3, EVT VT4, const SDValue *Ops,
                        unsigned NumOps);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1,
-                       MVT VT2, SDValue Op1);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1,
-                       MVT VT2, SDValue Op1, SDValue Op2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1,
-                       MVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1,
-                       MVT VT2, MVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, SDValue Op1);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, SDValue Op1, SDValue Op2);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs,
                        const SDValue *Ops, unsigned NumOps);
 
   /// MorphNodeTo - These *mutate* the specified node to have the specified
   /// return type, opcode, and operands.
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT, SDValue Op1);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT,
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT, SDValue Op1);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT,
                       SDValue Op1, SDValue Op2);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT,
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT,
                       SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT,
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT,
                       const SDValue *Ops, unsigned NumOps);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, MVT VT2);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1,
-                      MVT VT2, const SDValue *Ops, unsigned NumOps);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1,
-                      MVT VT2, MVT VT3, const SDValue *Ops, unsigned NumOps);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1,
-                      MVT VT2, SDValue Op1);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1,
-                      MVT VT2, SDValue Op1, SDValue Op2);
-  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1,
-                      MVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, EVT VT2);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1,
+                      EVT VT2, const SDValue *Ops, unsigned NumOps);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1,
+                      EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1,
+                      EVT VT2, SDValue Op1);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1,
+                      EVT VT2, SDValue Op1, SDValue Op2);
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1,
+                      EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
   SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs,
                       const SDValue *Ops, unsigned NumOps);
 
-  /// getTargetNode - These are used for target selectors to create a new node
-  /// with specified return type(s), target opcode, and operands.
+  /// getMachineNode - These are used for target selectors to create a new node
+  /// with specified return type(s), MachineInstr opcode, and operands.
   ///
-  /// Note that getTargetNode returns the resultant node.  If there is already a
-  /// node of the specified opcode and operands, it returns that node instead of
-  /// the current one.
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, SDValue Op1);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, SDValue Op1,
-                        SDValue Op2);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                        SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                        const SDValue *Ops, unsigned NumOps);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2,
-                        SDValue Op1);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                        MVT VT2, SDValue Op1, SDValue Op2);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                        MVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2,
-                        const SDValue *Ops, unsigned NumOps);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3,
-                        SDValue Op1, SDValue Op2);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3,
-                        SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3,
-                        const SDValue *Ops, unsigned NumOps);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3,
-                        MVT VT4, const SDValue *Ops, unsigned NumOps);
-  SDNode *getTargetNode(unsigned Opcode, DebugLoc dl,
-                        const std::vector<MVT> &ResultTys, const SDValue *Ops,
-                        unsigned NumOps);
+  /// Note that getMachineNode returns the resultant node.  If there is already
+  /// a node of the specified opcode and operands, it returns that node instead
+  /// of the current one.
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                                SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                                SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                         SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                         const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                         EVT VT2, SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                         EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl,
+                         const std::vector<EVT> &ResultTys, const SDValue *Ops,
+                         unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs,
+                         const SDValue *Ops, unsigned NumOps);
+
+  /// getTargetExtractSubreg - A convenience function for creating
+  /// TargetInstrInfo::EXTRACT_SUBREG nodes.
+  SDValue getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                 SDValue Operand);
+
+  /// getTargetInsertSubreg - A convenience function for creating
+  /// TargetInstrInfo::INSERT_SUBREG nodes.
+  SDValue getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                SDValue Operand, SDValue Subreg);
 
   /// getNodeIfExists - Get the specified node if it's already available, or
   /// else return NULL.
@@ -792,20 +828,20 @@ public:
   /// CreateStackTemporary - Create a stack temporary, suitable for holding the
   /// specified value type.  If minAlign is specified, the slot size will have
   /// at least that alignment.
-  SDValue CreateStackTemporary(MVT VT, unsigned minAlign = 1);
+  SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1);
 
   /// CreateStackTemporary - Create a stack temporary suitable for holding
   /// either of the specified value types.
-  SDValue CreateStackTemporary(MVT VT1, MVT VT2);
+  SDValue CreateStackTemporary(EVT VT1, EVT VT2);
 
   /// FoldConstantArithmetic -
   SDValue FoldConstantArithmetic(unsigned Opcode,
-                                 MVT VT,
+                                 EVT VT,
                                  ConstantSDNode *Cst1,
                                  ConstantSDNode *Cst2);
 
   /// FoldSetCC - Constant fold a setcc to true or false.
-  SDValue FoldSetCC(MVT VT, SDValue N1,
+  SDValue FoldSetCC(EVT VT, SDValue N1,
                     SDValue N2, ISD::CondCode Cond, DebugLoc dl);
 
   /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
@@ -835,6 +871,9 @@ public:
   /// class to allow target nodes to be understood.
   unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
 
+  /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
+  bool isKnownNeverNaN(SDValue Op) const;
+
   /// isVerifiedDebugInfoDesc - Returns true if the specified SDValue has
   /// been verified as a debug information descriptor.
   bool isVerifiedDebugInfoDesc(SDValue Op) const;
@@ -855,7 +894,7 @@ private:
   void DeleteNodeNotInCSEMaps(SDNode *N);
   void DeallocateNode(SDNode *N);
 
-  unsigned getMVTAlignment(MVT MemoryVT) const;
+  unsigned getEVTAlignment(EVT MemoryVT) const;
 
   void allnodes_clear();
 
@@ -866,7 +905,7 @@ private:
   std::vector<CondCodeSDNode*> CondCodeNodes;
 
   std::vector<SDNode*> ValueTypeNodes;
-  std::map<MVT, SDNode*, MVT::compareRawBits> ExtendedValueTypeNodes;
+  std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes;
   StringMap<SDNode*> ExternalSymbols;
   
   std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index d2c0dc420f8a..2b713f10df56 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -19,6 +19,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Constant.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
   class FastISel;
@@ -39,7 +40,7 @@ namespace llvm {
  
 /// SelectionDAGISel - This is the common base class used for SelectionDAG-based
 /// pattern-matching instruction selectors.
-class SelectionDAGISel : public FunctionPass {
+class SelectionDAGISel : public MachineFunctionPass {
 public:
   const TargetMachine &TM;
   TargetLowering &TLI;
@@ -62,9 +63,9 @@ public:
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
-  virtual bool runOnFunction(Function &Fn);
+  virtual bool runOnMachineFunction(MachineFunction &MF);
 
-  unsigned MakeReg(MVT VT);
+  unsigned MakeReg(EVT VT);
 
   virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {}
   virtual void InstructionSelect() = 0;
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 975253751c8d..d7c8f1ca0096 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -22,18 +22,15 @@
 #include "llvm/Constants.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 #include <cassert>
-#include <climits>
 
 namespace llvm {
 
@@ -52,7 +49,7 @@ template <typename T> struct ilist_traits;
 /// SelectionDAG::getVTList(...).
 ///
 struct SDVTList {
-  const MVT *VTs;
+  const EVT *VTs;
   unsigned int NumVTs;
 };
 
@@ -97,7 +94,7 @@ namespace ISD {
     AssertSext, AssertZext,
 
     // Various leaf nodes.
-    BasicBlock, VALUETYPE, ARG_FLAGS, CONDCODE, Register,
+    BasicBlock, VALUETYPE, CONDCODE, Register,
     Constant, ConstantFP,
     GlobalAddress, GlobalTLSAddress, FrameIndex,
     JumpTable, ConstantPool, ExternalSymbol,
@@ -121,6 +118,10 @@ namespace ISD {
     // address of the exception block on entry to an landing pad block.
     EXCEPTIONADDR,
 
+    // RESULT, OUTCHAIN = LSDAADDR(INCHAIN) - This node represents the
+    // address of the Language Specific Data Area for the enclosing function.
+    LSDAADDR,
+
     // RESULT, OUTCHAIN = EHSELECTION(INCHAIN, EXCEPTION) - This node represents
     // the selection index of the exception thrown.
     EHSELECTION,
@@ -180,38 +181,6 @@ namespace ISD {
     // UNDEF - An undefined node
     UNDEF,
 
-    /// FORMAL_ARGUMENTS(CHAIN, CC#, ISVARARG, FLAG0, ..., FLAGn) - This node
-    /// represents the formal arguments for a function.  CC# is a Constant value
-    /// indicating the calling convention of the function, and ISVARARG is a
-    /// flag that indicates whether the function is varargs or not. This node
-    /// has one result value for each incoming argument, plus one for the output
-    /// chain. It must be custom legalized. See description of CALL node for
-    /// FLAG argument contents explanation.
-    ///
-    FORMAL_ARGUMENTS,
-
-    /// RV1, RV2...RVn, CHAIN = CALL(CHAIN, CALLEE,
-    ///                              ARG0, FLAG0, ARG1, FLAG1, ... ARGn, FLAGn)
-    /// This node represents a fully general function call, before the legalizer
-    /// runs.  This has one result value for each argument / flag pair, plus
-    /// a chain result. It must be custom legalized. Flag argument indicates
-    /// misc. argument attributes. Currently:
-    /// Bit 0 - signness
-    /// Bit 1 - 'inreg' attribute
-    /// Bit 2 - 'sret' attribute
-    /// Bit 4 - 'byval' attribute
-    /// Bit 5 - 'nest' attribute
-    /// Bit 6-9 - alignment of byval structures
-    /// Bit 10-26 - size of byval structures
-    /// Bits 31:27 - argument ABI alignment in the first argument piece and
-    /// alignment '1' in other argument pieces.
-    ///
-    /// CALL nodes use the CallSDNode subclass of SDNode, which
-    /// additionally carries information about the calling convention,
-    /// whether the call is varargs, and if it's marked as a tail call.
-    ///
-    CALL,
-
     // EXTRACT_ELEMENT - This is used to get the lower or upper (determined by
     // a Constant, which is required to be operand #1) half of the integer or
     // float value specified as operand #0.  This is only for use before
@@ -225,9 +194,9 @@ namespace ISD {
 
     // MERGE_VALUES - This node takes multiple discrete operands and returns
     // them all as its individual results.  This nodes has exactly the same
-    // number of inputs and outputs, and is only valid before legalization.
-    // This node is useful for some pieces of the code generator that want to
-    // think about a single node with multiple results, not multiple nodes.
+    // number of inputs and outputs. This node is useful for some pieces of the
+    // code generator that want to think about a single node with multiple
+    // results, not multiple nodes.
     MERGE_VALUES,
 
     // Simple integer binary arithmetic operators.
@@ -303,7 +272,9 @@ namespace ISD {
     INSERT_VECTOR_ELT,
 
     /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR
-    /// identified by the (potentially variable) element number IDX.
+    /// identified by the (potentially variable) element number IDX.  If the
+    /// return type is an integer type larger than the element type of the
+    /// vector, the result is extended to the width of the return type.
     EXTRACT_VECTOR_ELT,
 
     /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of
@@ -318,7 +289,7 @@ namespace ISD {
     EXTRACT_SUBVECTOR,
 
     /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as 
-    /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int 
+    /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int
     /// values that indicate which value (or undef) each result element will
     /// get.  These constant ints are accessible through the 
     /// ShuffleVectorSDNode class.  This is quite similar to the Altivec 
@@ -363,12 +334,11 @@ namespace ISD {
     // them with (op #2) as a CondCodeSDNode.
     SETCC,
 
-    // Vector SetCC operator - This evaluates to a vector of integer elements
-    // with the high bit in each element set to true if the comparison is true
-    // and false if the comparison is false.  All other bits in each element
-    // are undefined.  The operands to this are the left and right operands
-    // to compare (ops #0, and #1) and the condition code to compare them with
-    // (op #2) as a CondCodeSDNode.
+    // RESULT = VSETCC(LHS, RHS, COND) operator - This evaluates to a vector of
+    // integer elements with all bits of the result elements set to true if the
+    // comparison is true or all cleared if the comparison is false.  The
+    // operands to this are the left and right operands to compare (LHS/RHS) and
+    // the condition code to compare them with (COND) as a CondCodeSDNode.
     VSETCC,
 
     // SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
@@ -514,12 +484,6 @@ namespace ISD {
     // chain, cc, lhs, rhs, block to branch to if condition is true.
     BR_CC,
 
-    // RET - Return from function.  The first operand is the chain,
-    // and any subsequent operands are pairs of return value and return value
-    // attributes (see CALL for description of attributes) for the function.
-    // This operation can have variable number of operands.
-    RET,
-
     // INLINEASM - Represents an inline asm block.  This node always has two
     // return values: a chain and a flag result.  The inputs are as follows:
     //   Operand #0   : Input chain.
@@ -535,12 +499,6 @@ namespace ISD {
     DBG_LABEL,
     EH_LABEL,
 
-    // DECLARE - Represents a llvm.dbg.declare intrinsic. It's used to track
-    // local variable declarations for debugging information. First operand is
-    // a chain, while the next two operands are first two arguments (address
-    // and variable) of a llvm.dbg.declare instruction.
-    DECLARE,
-
     // STACKSAVE - STACKSAVE has one operand, an input chain.  It produces a
     // value, the same type as the pointer type for the system, and an output
     // chain.
@@ -575,11 +533,6 @@ namespace ISD {
     // make reference to a value in the LLVM IR.
     SRCVALUE,
 
-    // MEMOPERAND - This is a node that contains a MachineMemOperand which
-    // records information about a memory reference. This is used to make
-    // AliasAnalysis queries from the backend.
-    MEMOPERAND,
-
     // PCMARKER - This corresponds to the pcmarker intrinsic.
     PCMARKER,
 
@@ -656,10 +609,17 @@ namespace ISD {
     ATOMIC_LOAD_UMIN,
     ATOMIC_LOAD_UMAX,
 
-    // BUILTIN_OP_END - This must be the last enum value in this list.
+    /// BUILTIN_OP_END - This must be the last enum value in this list.
+    /// The target-specific pre-isel opcode values start here.
     BUILTIN_OP_END
   };
 
+  /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
+  /// which do not reference a specific memory location should be less than
+  /// this value. Those that do must not be less than this value, and can
+  /// be used with SelectionDAG::getMemIntrinsicNode.
+  static const int FIRST_TARGET_MEMORY_OPCODE = 1 << 14;
+
   /// Node predicates
 
   /// isBuildVectorAllOnes - Return true if the specified node is a
@@ -893,7 +853,7 @@ public:
 
   /// getValueType - Return the ValueType of the referenced return value.
   ///
-  inline MVT getValueType() const;
+  inline EVT getValueType() const;
 
   /// getValueSizeInBits - Returns the size of the value in bits.
   ///
@@ -906,6 +866,7 @@ public:
   inline unsigned getNumOperands() const;
   inline const SDValue &getOperand(unsigned i) const;
   inline uint64_t getConstantOperandVal(unsigned i) const;
+  inline bool isTargetMemoryOpcode() const;
   inline bool isTargetOpcode() const;
   inline bool isMachineOpcode() const;
   inline unsigned getMachineOpcode() const;
@@ -1002,7 +963,7 @@ public:
   /// getResNo - Convenience function for get().getResNo().
   unsigned getResNo() const { return Val.getResNo(); }
   /// getValueType - Convenience function for get().getValueType().
-  MVT getValueType() const { return Val.getValueType(); }
+  EVT getValueType() const { return Val.getValueType(); }
 
   /// operator== - Convenience function for get().operator==
   bool operator==(const SDValue &V) const {
@@ -1070,17 +1031,17 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
 private:
   /// NodeType - The operation that this node performs.
   ///
-  short NodeType;
+  int16_t NodeType;
 
   /// OperandsNeedDelete - This is true if OperandList was new[]'d.  If true,
   /// then they will be delete[]'d when the node is destroyed.
-  unsigned short OperandsNeedDelete : 1;
+  uint16_t OperandsNeedDelete : 1;
 
 protected:
   /// SubclassData - This member is defined by this class, but is not used for
   /// anything.  Subclasses can use it to hold whatever state they find useful.
   /// This field is initialized to zero by the ctor.
-  unsigned short SubclassData : 15;
+  uint16_t SubclassData : 15;
 
 private:
   /// NodeId - Unique id per SDNode in the DAG.
@@ -1092,7 +1053,7 @@ private:
 
   /// ValueList - The types of the values this node defines.  SDNode's may
   /// define multiple values simultaneously.
-  const MVT *ValueList;
+  const EVT *ValueList;
 
   /// UseList - List of uses for this SDNode.
   SDUse *UseList;
@@ -1104,7 +1065,7 @@ private:
   DebugLoc debugLoc;
 
   /// getValueTypeList - Return a pointer to the specified value type.
-  static const MVT *getValueTypeList(MVT VT);
+  static const EVT *getValueTypeList(EVT VT);
 
   friend class SelectionDAG;
   friend struct ilist_traits<SDNode>;
@@ -1124,6 +1085,13 @@ public:
   /// \<target\>ISD namespace).
   bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
 
+  /// isTargetMemoryOpcode - Test if this node has a target-specific 
+  /// memory-referencing opcode (in the \<target\>ISD namespace and
+  /// greater than FIRST_TARGET_MEMORY_OPCODE).
+  bool isTargetMemoryOpcode() const {
+    return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
+  }
+
   /// isMachineOpcode - Test if this node has a post-isel opcode, directly
   /// corresponding to a MachineInstr opcode.
   bool isMachineOpcode() const { return NodeType < 0; }
@@ -1168,14 +1136,16 @@ public:
   /// use_iterator - This class provides iterator support for SDUse
   /// operands that use a specific SDNode.
   class use_iterator
-    : public forward_iterator<SDUse, ptrdiff_t> {
+    : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
     SDUse *Op;
     explicit use_iterator(SDUse *op) : Op(op) {
     }
     friend class SDNode;
   public:
-    typedef forward_iterator<SDUse, ptrdiff_t>::reference reference;
-    typedef forward_iterator<SDUse, ptrdiff_t>::pointer pointer;
+    typedef std::iterator<std::forward_iterator_tag,
+                          SDUse, ptrdiff_t>::reference reference;
+    typedef std::iterator<std::forward_iterator_tag,
+                          SDUse, ptrdiff_t>::pointer pointer;
 
     use_iterator(const use_iterator &I) : Op(I.Op) {}
     use_iterator() : Op(0) {}
@@ -1278,7 +1248,7 @@ public:
   /// to which the flag operand points. Otherwise return NULL.
   SDNode *getFlaggedNode() const {
     if (getNumOperands() != 0 &&
-        getOperand(getNumOperands()-1).getValueType() == MVT::Flag)
+      getOperand(getNumOperands()-1).getValueType().getSimpleVT() == MVT::Flag)
       return getOperand(getNumOperands()-1).getNode();
     return 0;
   }
@@ -1306,7 +1276,7 @@ public:
 
   /// getValueType - Return the type of a specified result.
   ///
-  MVT getValueType(unsigned ResNo) const {
+  EVT getValueType(unsigned ResNo) const {
     assert(ResNo < NumValues && "Illegal result number!");
     return ValueList[ResNo];
   }
@@ -1317,7 +1287,7 @@ public:
     return getValueType(ResNo).getSizeInBits();
   }
 
-  typedef const MVT* value_iterator;
+  typedef const EVT* value_iterator;
   value_iterator value_begin() const { return ValueList; }
   value_iterator value_end() const { return ValueList+NumValues; }
 
@@ -1332,6 +1302,7 @@ public:
   void dump() const;
   void dumpr() const;
   void dump(const SelectionDAG *G) const;
+  void dumpr(const SelectionDAG *G) const;
 
   static bool classof(const SDNode *) { return true; }
 
@@ -1344,7 +1315,7 @@ public:
   void addUse(SDUse &U) { U.addToList(&UseList); }
 
 protected:
-  static SDVTList getSDVTList(MVT VT) {
+  static SDVTList getSDVTList(EVT VT) {
     SDVTList Ret = { getValueTypeList(VT), 1 };
     return Ret;
   }
@@ -1438,7 +1409,7 @@ protected:
 inline unsigned SDValue::getOpcode() const {
   return Node->getOpcode();
 }
-inline MVT SDValue::getValueType() const {
+inline EVT SDValue::getValueType() const {
   return Node->getValueType(ResNo);
 }
 inline unsigned SDValue::getNumOperands() const {
@@ -1453,6 +1424,9 @@ inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
 inline bool SDValue::isTargetOpcode() const {
   return Node->isTargetOpcode();
 }
+inline bool SDValue::isTargetMemoryOpcode() const {
+  return Node->isTargetMemoryOpcode();
+}
 inline bool SDValue::isMachineOpcode() const {
   return Node->isMachineOpcode();
 }
@@ -1549,45 +1523,57 @@ public:
 class MemSDNode : public SDNode {
 private:
   // MemoryVT - VT of in-memory value.
-  MVT MemoryVT;
-
-  //! SrcValue - Memory location for alias analysis.
-  const Value *SrcValue;
+  EVT MemoryVT;
 
-  //! SVOffset - Memory location offset. Note that base is defined in MemSDNode
-  int SVOffset;
+protected:
+  /// MMO - Memory reference information.
+  MachineMemOperand *MMO;
 
 public:
-  MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT MemoryVT,
-            const Value *srcValue, int SVOff,
-            unsigned alignment, bool isvolatile);
+  MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT MemoryVT,
+            MachineMemOperand *MMO);
 
   MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, const SDValue *Ops,
-            unsigned NumOps, MVT MemoryVT, const Value *srcValue, int SVOff,
-            unsigned alignment, bool isvolatile);
+            unsigned NumOps, EVT MemoryVT, MachineMemOperand *MMO);
+
+  bool readMem() const { return MMO->isLoad(); }
+  bool writeMem() const { return MMO->isStore(); }
 
   /// Returns alignment and volatility of the memory access
-  unsigned getAlignment() const { return (1u << (SubclassData >> 6)) >> 1; }
-  bool isVolatile() const { return (SubclassData >> 5) & 1; }
+  unsigned getOriginalAlignment() const { 
+    return MMO->getBaseAlignment();
+  }
+  unsigned getAlignment() const {
+    return MMO->getAlignment();
+  }
 
   /// getRawSubclassData - Return the SubclassData value, which contains an
-  /// encoding of the alignment and volatile information, as well as bits
-  /// used by subclasses. This function should only be used to compute a
-  /// FoldingSetNodeID value.
+  /// encoding of the volatile flag, as well as bits used by subclasses. This
+  /// function should only be used to compute a FoldingSetNodeID value.
   unsigned getRawSubclassData() const {
     return SubclassData;
   }
 
+  bool isVolatile() const { return (SubclassData >> 5) & 1; }
+
   /// Returns the SrcValue and offset that describes the location of the access
-  const Value *getSrcValue() const { return SrcValue; }
-  int getSrcValueOffset() const { return SVOffset; }
+  const Value *getSrcValue() const { return MMO->getValue(); }
+  int64_t getSrcValueOffset() const { return MMO->getOffset(); }
 
   /// getMemoryVT - Return the type of the in-memory value.
-  MVT getMemoryVT() const { return MemoryVT; }
+  EVT getMemoryVT() const { return MemoryVT; }
 
   /// getMemOperand - Return a MachineMemOperand object describing the memory
   /// reference performed by operation.
-  MachineMemOperand getMemOperand() const;
+  MachineMemOperand *getMemOperand() const { return MMO; }
+
+  /// refineAlignment - Update this MemSDNode's MachineMemOperand information
+  /// to reflect the alignment of NewMMO, if it has a greater alignment.
+  /// This must only be used when the new alignment applies to all users of
+  /// this MachineMemOperand.
+  void refineAlignment(const MachineMemOperand *NewMMO) {
+    MMO->refineAlignment(NewMMO);
+  }
 
   const SDValue &getChain() const { return getOperand(0); }
   const SDValue &getBasePtr() const {
@@ -1613,9 +1599,7 @@ public:
            N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
            N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
-           N->getOpcode() == ISD::INTRINSIC_W_CHAIN   ||
-           N->getOpcode() == ISD::INTRINSIC_VOID      ||
-           N->isTargetOpcode();
+           N->isTargetMemoryOpcode();
   }
 };
 
@@ -1633,19 +1617,20 @@ public:
   // Swp:    swap value
   // SrcVal: address to update as a Value (used for MemOperand)
   // Align:  alignment of memory
-  AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, MVT MemVT,
+  AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, EVT MemVT,
                SDValue Chain, SDValue Ptr,
-               SDValue Cmp, SDValue Swp, const Value* SrcVal,
-               unsigned Align=0)
-    : MemSDNode(Opc, dl, VTL, MemVT, SrcVal, /*SVOffset=*/0,
-                Align, /*isVolatile=*/true) {
+               SDValue Cmp, SDValue Swp, MachineMemOperand *MMO)
+    : MemSDNode(Opc, dl, VTL, MemVT, MMO) {
+    assert(readMem() && "Atomic MachineMemOperand is not a load!");
+    assert(writeMem() && "Atomic MachineMemOperand is not a store!");
     InitOperands(Ops, Chain, Ptr, Cmp, Swp);
   }
-  AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, MVT MemVT,
+  AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, EVT MemVT,
                SDValue Chain, SDValue Ptr,
-               SDValue Val, const Value* SrcVal, unsigned Align=0)
-    : MemSDNode(Opc, dl, VTL, MemVT, SrcVal, /*SVOffset=*/0,
-                Align, /*isVolatile=*/true) {
+               SDValue Val, MachineMemOperand *MMO)
+    : MemSDNode(Opc, dl, VTL, MemVT, MMO) {
+    assert(readMem() && "Atomic MachineMemOperand is not a load!");
+    assert(writeMem() && "Atomic MachineMemOperand is not a store!");
     InitOperands(Ops, Chain, Ptr, Val);
   }
 
@@ -1675,24 +1660,18 @@ public:
   }
 };
 
-/// MemIntrinsicSDNode - This SDNode is used for target intrinsic that touches
-/// memory and need an associated memory operand.
-///
+/// MemIntrinsicSDNode - This SDNode is used for target intrinsics that touch
+/// memory and need an associated MachineMemOperand. Its opcode may be
+/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, or a target-specific opcode with a
+/// value not less than FIRST_TARGET_MEMORY_OPCODE.
 class MemIntrinsicSDNode : public MemSDNode {
-  bool ReadMem;  // Intrinsic reads memory
-  bool WriteMem; // Intrinsic writes memory
 public:
   MemIntrinsicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
                      const SDValue *Ops, unsigned NumOps,
-                     MVT MemoryVT, const Value *srcValue, int SVO,
-                     unsigned Align, bool Vol, bool ReadMem, bool WriteMem)
-    : MemSDNode(Opc, dl, VTs, Ops, NumOps, MemoryVT, srcValue, SVO, Align, Vol),
-      ReadMem(ReadMem), WriteMem(WriteMem) {
+                     EVT MemoryVT, MachineMemOperand *MMO)
+    : MemSDNode(Opc, dl, VTs, Ops, NumOps, MemoryVT, MMO) {
   }
 
-  bool readMem() const { return ReadMem; }
-  bool writeMem() const { return WriteMem; }
-
   // Methods to support isa and dyn_cast
   static bool classof(const MemIntrinsicSDNode *) { return true; }
   static bool classof(const SDNode *N) {
@@ -1700,7 +1679,7 @@ public:
     // early a node with a target opcode can be of this class
     return N->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
            N->getOpcode() == ISD::INTRINSIC_VOID ||
-           N->isTargetOpcode();
+           N->isTargetMemoryOpcode();
   }
 };
 
@@ -1720,7 +1699,7 @@ class ShuffleVectorSDNode : public SDNode {
   const int *Mask;
 protected:
   friend class SelectionDAG;
-  ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, 
+  ShuffleVectorSDNode(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, 
                       const int *M)
     : SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) {
     InitOperands(Ops, N1, N2);
@@ -1728,7 +1707,7 @@ protected:
 public:
 
   void getMask(SmallVectorImpl<int> &M) const {
-    MVT VT = getValueType(0);
+    EVT VT = getValueType(0);
     M.clear();
     for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
       M.push_back(Mask[i]);
@@ -1743,7 +1722,7 @@ public:
     assert(isSplat() && "Cannot get splat index for non-splat!");
     return Mask[0];
   }
-  static bool isSplatMask(const int *Mask, MVT VT);
+  static bool isSplatMask(const int *Mask, EVT VT);
 
   static bool classof(const ShuffleVectorSDNode *) { return true; }
   static bool classof(const SDNode *N) {
@@ -1754,7 +1733,7 @@ public:
 class ConstantSDNode : public SDNode {
   const ConstantInt *Value;
   friend class SelectionDAG;
-  ConstantSDNode(bool isTarget, const ConstantInt *val, MVT VT)
+  ConstantSDNode(bool isTarget, const ConstantInt *val, EVT VT)
     : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant,
              DebugLoc::getUnknownLoc(), getSDVTList(VT)), Value(val) {
   }
@@ -1778,7 +1757,7 @@ public:
 class ConstantFPSDNode : public SDNode {
   const ConstantFP *Value;
   friend class SelectionDAG;
-  ConstantFPSDNode(bool isTarget, const ConstantFP *val, MVT VT)
+  ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
     : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP,
              DebugLoc::getUnknownLoc(), getSDVTList(VT)), Value(val) {
   }
@@ -1807,7 +1786,7 @@ public:
   }
   bool isExactlyValue(const APFloat& V) const;
 
-  bool isValueValidForType(MVT VT, const APFloat& Val);
+  bool isValueValidForType(EVT VT, const APFloat& Val);
 
   static bool classof(const ConstantFPSDNode *) { return true; }
   static bool classof(const SDNode *N) {
@@ -1821,7 +1800,7 @@ class GlobalAddressSDNode : public SDNode {
   int64_t Offset;
   unsigned char TargetFlags;
   friend class SelectionDAG;
-  GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, MVT VT,
+  GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, EVT VT,
                       int64_t o, unsigned char TargetFlags);
 public:
 
@@ -1843,7 +1822,7 @@ public:
 class FrameIndexSDNode : public SDNode {
   int FI;
   friend class SelectionDAG;
-  FrameIndexSDNode(int fi, MVT VT, bool isTarg)
+  FrameIndexSDNode(int fi, EVT VT, bool isTarg)
     : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
       DebugLoc::getUnknownLoc(), getSDVTList(VT)), FI(fi) {
   }
@@ -1862,7 +1841,7 @@ class JumpTableSDNode : public SDNode {
   int JTI;
   unsigned char TargetFlags;
   friend class SelectionDAG;
-  JumpTableSDNode(int jti, MVT VT, bool isTarg, unsigned char TF)
+  JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
     : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
       DebugLoc::getUnknownLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
   }
@@ -1887,7 +1866,7 @@ class ConstantPoolSDNode : public SDNode {
   unsigned Alignment;  // Minimum alignment requirement of CP (not log2 value).
   unsigned char TargetFlags;
   friend class SelectionDAG;
-  ConstantPoolSDNode(bool isTarget, Constant *c, MVT VT, int o, unsigned Align,
+  ConstantPoolSDNode(bool isTarget, Constant *c, EVT VT, int o, unsigned Align,
                      unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc::getUnknownLoc(),
@@ -1896,7 +1875,7 @@ class ConstantPoolSDNode : public SDNode {
     Val.ConstVal = c;
   }
   ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
-                     MVT VT, int o, unsigned Align, unsigned char TF)
+                     EVT VT, int o, unsigned Align, unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc::getUnknownLoc(),
              getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
@@ -1988,10 +1967,6 @@ public:
 /// used when the SelectionDAG needs to make a simple reference to something
 /// in the LLVM IR representation.
 ///
-/// Note that this is not used for carrying alias information; that is done
-/// with MemOperandSDNode, which includes a Value which is required to be a
-/// pointer, and several other fields specific to memory references.
-///
 class SrcValueSDNode : public SDNode {
   const Value *V;
   friend class SelectionDAG;
@@ -2011,32 +1986,10 @@ public:
 };
 
 
-/// MemOperandSDNode - An SDNode that holds a MachineMemOperand. This is
-/// used to represent a reference to memory after ISD::LOAD
-/// and ISD::STORE have been lowered.
-///
-class MemOperandSDNode : public SDNode {
-  friend class SelectionDAG;
-  /// Create a MachineMemOperand node
-  explicit MemOperandSDNode(const MachineMemOperand &mo)
-    : SDNode(ISD::MEMOPERAND, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)), MO(mo) {}
-
-public:
-  /// MO - The contained MachineMemOperand.
-  const MachineMemOperand MO;
-
-  static bool classof(const MemOperandSDNode *) { return true; }
-  static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::MEMOPERAND;
-  }
-};
-
-
 class RegisterSDNode : public SDNode {
   unsigned Reg;
   friend class SelectionDAG;
-  RegisterSDNode(unsigned reg, MVT VT)
+  RegisterSDNode(unsigned reg, EVT VT)
     : SDNode(ISD::Register, DebugLoc::getUnknownLoc(),
              getSDVTList(VT)), Reg(reg) {
   }
@@ -2054,10 +2007,10 @@ class DbgStopPointSDNode : public SDNode {
   SDUse Chain;
   unsigned Line;
   unsigned Column;
-  Value *CU;
+  MDNode *CU;
   friend class SelectionDAG;
   DbgStopPointSDNode(SDValue ch, unsigned l, unsigned c,
-                     Value *cu)
+                     MDNode *cu)
     : SDNode(ISD::DBG_STOPPOINT, DebugLoc::getUnknownLoc(),
       getSDVTList(MVT::Other)), Line(l), Column(c), CU(cu) {
     InitOperands(&Chain, ch);
@@ -2065,7 +2018,7 @@ class DbgStopPointSDNode : public SDNode {
 public:
   unsigned getLine() const { return Line; }
   unsigned getColumn() const { return Column; }
-  Value *getCompileUnit() const { return CU; }
+  MDNode *getCompileUnit() const { return CU; }
 
   static bool classof(const DbgStopPointSDNode *) { return true; }
   static bool classof(const SDNode *N) {
@@ -2096,7 +2049,7 @@ class ExternalSymbolSDNode : public SDNode {
   unsigned char TargetFlags;
   
   friend class SelectionDAG;
-  ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, MVT VT)
+  ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
     : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
              DebugLoc::getUnknownLoc(),
              getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {
@@ -2135,7 +2088,7 @@ public:
 class CvtRndSatSDNode : public SDNode {
   ISD::CvtCode CvtCode;
   friend class SelectionDAG;
-  explicit CvtRndSatSDNode(MVT VT, DebugLoc dl, const SDValue *Ops,
+  explicit CvtRndSatSDNode(EVT VT, DebugLoc dl, const SDValue *Ops,
                            unsigned NumOps, ISD::CvtCode Code)
     : SDNode(ISD::CONVERT_RNDSAT, dl, getSDVTList(VT), Ops, NumOps),
       CvtCode(Code) {
@@ -2233,93 +2186,54 @@ namespace ISD {
     /// getRawBits - Represent the flags as a bunch of bits.
     uint64_t getRawBits() const { return Flags; }
   };
-}
-
-/// ARG_FLAGSSDNode - Leaf node holding parameter flags.
-class ARG_FLAGSSDNode : public SDNode {
-  ISD::ArgFlagsTy TheFlags;
-  friend class SelectionDAG;
-  explicit ARG_FLAGSSDNode(ISD::ArgFlagsTy Flags)
-    : SDNode(ISD::ARG_FLAGS, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)), TheFlags(Flags) {
-  }
-public:
-  ISD::ArgFlagsTy getArgFlags() const { return TheFlags; }
-
-  static bool classof(const ARG_FLAGSSDNode *) { return true; }
-  static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::ARG_FLAGS;
-  }
-};
-
-/// CallSDNode - Node for calls -- ISD::CALL.
-class CallSDNode : public SDNode {
-  unsigned CallingConv;
-  bool IsVarArg;
-  bool IsTailCall;
-  unsigned NumFixedArgs;
-  // We might eventually want a full-blown Attributes for the result; that
-  // will expand the size of the representation.  At the moment we only
-  // need Inreg.
-  bool Inreg;
-  friend class SelectionDAG;
-  CallSDNode(unsigned cc, DebugLoc dl, bool isvararg, bool istailcall,
-             bool isinreg, SDVTList VTs, const SDValue *Operands,
-             unsigned numOperands, unsigned numFixedArgs)
-    : SDNode(ISD::CALL, dl, VTs, Operands, numOperands),
-      CallingConv(cc), IsVarArg(isvararg), IsTailCall(istailcall),
-      NumFixedArgs(numFixedArgs), Inreg(isinreg) {}
-public:
-  unsigned getCallingConv() const { return CallingConv; }
-  unsigned isVarArg() const { return IsVarArg; }
-  unsigned isTailCall() const { return IsTailCall; }
-  unsigned isInreg() const { return Inreg; }
-
-  /// Set this call to not be marked as a tail call. Normally setter
-  /// methods in SDNodes are unsafe because it breaks the CSE map,
-  /// but we don't include the tail call flag for calls so it's ok
-  /// in this case.
-  void setNotTailCall() { IsTailCall = false; }
-
-  SDValue getChain() const { return getOperand(0); }
-  SDValue getCallee() const { return getOperand(1); }
 
-  unsigned getNumArgs() const { return (getNumOperands() - 2) / 2; }
-  unsigned getNumFixedArgs() const {
-    if (isVarArg())
-      return NumFixedArgs;
-    else
-      return getNumArgs();
-  }
-  SDValue getArg(unsigned i) const { return getOperand(2+2*i); }
-  SDValue getArgFlagsVal(unsigned i) const {
-    return getOperand(3+2*i);
-  }
-  ISD::ArgFlagsTy getArgFlags(unsigned i) const {
-    return cast<ARG_FLAGSSDNode>(getArgFlagsVal(i).getNode())->getArgFlags();
-  }
-
-  unsigned getNumRetVals() const { return getNumValues() - 1; }
-  MVT getRetValType(unsigned i) const { return getValueType(i); }
+  /// InputArg - This struct carries flags and type information about a
+  /// single incoming (formal) argument or incoming (from the perspective
+  /// of the caller) return value virtual register.
+  ///
+  struct InputArg {
+    ArgFlagsTy Flags;
+    EVT VT;
+    bool Used;
+
+    InputArg() : VT(MVT::Other), Used(false) {}
+    InputArg(ISD::ArgFlagsTy flags, EVT vt, bool used)
+      : Flags(flags), VT(vt), Used(used) {
+      assert(VT.isSimple() &&
+             "InputArg value type must be Simple!");
+    }
+  };
 
-  static bool classof(const CallSDNode *) { return true; }
-  static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::CALL;
-  }
-};
+  /// OutputArg - This struct carries flags and a value for a
+  /// single outgoing (actual) argument or outgoing (from the perspective
+  /// of the caller) return value virtual register.
+  ///
+  struct OutputArg {
+    ArgFlagsTy Flags;
+    SDValue Val;
+    bool IsFixed;
+
+    OutputArg() : IsFixed(false) {}
+    OutputArg(ISD::ArgFlagsTy flags, SDValue val, bool isfixed)
+      : Flags(flags), Val(val), IsFixed(isfixed) {
+      assert(Val.getValueType().isSimple() &&
+             "OutputArg value type must be Simple!");
+    }
+  };
+}
 
-/// VTSDNode - This class is used to represent MVT's, which are used
+/// VTSDNode - This class is used to represent EVT's, which are used
 /// to parameterize some operations.
 class VTSDNode : public SDNode {
-  MVT ValueType;
+  EVT ValueType;
   friend class SelectionDAG;
-  explicit VTSDNode(MVT VT)
+  explicit VTSDNode(EVT VT)
     : SDNode(ISD::VALUETYPE, DebugLoc::getUnknownLoc(),
              getSDVTList(MVT::Other)), ValueType(VT) {
   }
 public:
 
-  MVT getVT() const { return ValueType; }
+  EVT getVT() const { return ValueType; }
 
   static bool classof(const VTSDNode *) { return true; }
   static bool classof(const SDNode *N) {
@@ -2340,9 +2254,8 @@ class LSBaseSDNode : public MemSDNode {
 public:
   LSBaseSDNode(ISD::NodeType NodeTy, DebugLoc dl, SDValue *Operands,
                unsigned numOperands, SDVTList VTs, ISD::MemIndexedMode AM,
-               MVT VT, const Value *SV, int SVO, unsigned Align, bool Vol)
-    : MemSDNode(NodeTy, dl, VTs, VT, SV, SVO, Align, Vol) {
-    assert(Align != 0 && "Loads and stores should have non-zero aligment");
+               EVT MemVT, MachineMemOperand *MMO)
+    : MemSDNode(NodeTy, dl, VTs, MemVT, MMO) {
     SubclassData |= AM << 2;
     assert(getAddressingMode() == AM && "MemIndexedMode encoding error!");
     InitOperands(Ops, Operands, numOperands);
@@ -2378,12 +2291,14 @@ public:
 class LoadSDNode : public LSBaseSDNode {
   friend class SelectionDAG;
   LoadSDNode(SDValue *ChainPtrOff, DebugLoc dl, SDVTList VTs,
-             ISD::MemIndexedMode AM, ISD::LoadExtType ETy, MVT LVT,
-             const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
+             ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
+             MachineMemOperand *MMO)
     : LSBaseSDNode(ISD::LOAD, dl, ChainPtrOff, 3,
-                   VTs, AM, LVT, SV, O, Align, Vol) {
+                   VTs, AM, MemVT, MMO) {
     SubclassData |= (unsigned short)ETy;
     assert(getExtensionType() == ETy && "LoadExtType encoding error!");
+    assert(readMem() && "Load MachineMemOperand is not a load!");
+    assert(!writeMem() && "Load MachineMemOperand is a store!");
   }
 public:
 
@@ -2407,12 +2322,14 @@ public:
 class StoreSDNode : public LSBaseSDNode {
   friend class SelectionDAG;
   StoreSDNode(SDValue *ChainValuePtrOff, DebugLoc dl, SDVTList VTs,
-              ISD::MemIndexedMode AM, bool isTrunc, MVT SVT,
-              const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
+              ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
+              MachineMemOperand *MMO)
     : LSBaseSDNode(ISD::STORE, dl, ChainValuePtrOff, 4,
-                   VTs, AM, SVT, SV, O, Align, Vol) {
+                   VTs, AM, MemVT, MMO) {
     SubclassData |= (unsigned short)isTrunc;
     assert(isTruncatingStore() == isTrunc && "isTrunc encoding error!");
+    assert(!readMem() && "Store MachineMemOperand is a load!");
+    assert(writeMem() && "Store MachineMemOperand is not a store!");
   }
 public:
 
@@ -2431,8 +2348,47 @@ public:
   }
 };
 
+/// MachineSDNode - An SDNode that represents everything that will be needed
+/// to construct a MachineInstr. These nodes are created during the
+/// instruction selection proper phase.
+///
+class MachineSDNode : public SDNode {
+public:
+  typedef MachineMemOperand **mmo_iterator;
+
+private:
+  friend class SelectionDAG;
+  MachineSDNode(unsigned Opc, const DebugLoc DL, SDVTList VTs)
+    : SDNode(Opc, DL, VTs), MemRefs(0), MemRefsEnd(0) {}
+
+  /// LocalOperands - Operands for this instruction, if they fit here. If
+  /// they don't, this field is unused.
+  SDUse LocalOperands[4];
+
+  /// MemRefs - Memory reference descriptions for this instruction.
+  mmo_iterator MemRefs;
+  mmo_iterator MemRefsEnd;
+
+public:
+  mmo_iterator memoperands_begin() const { return MemRefs; }
+  mmo_iterator memoperands_end() const { return MemRefsEnd; }
+  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+
+  /// setMemRefs - Assign this MachineSDNodes's memory reference descriptor
+  /// list. This does not transfer ownership.
+  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
+    MemRefs = NewMemRefs;
+    MemRefsEnd = NewMemRefsEnd;
+  }
+
+  static bool classof(const MachineSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->isMachineOpcode();
+  }
+};
 
-class SDNodeIterator : public forward_iterator<SDNode, ptrdiff_t> {
+class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
+                                            SDNode, ptrdiff_t> {
   SDNode *Node;
   unsigned Operand;
 
@@ -2490,7 +2446,7 @@ typedef LoadSDNode LargestSDNode;
 /// MostAlignedSDNode - The SDNode class with the greatest alignment
 /// requirement.
 ///
-typedef ARG_FLAGSSDNode MostAlignedSDNode;
+typedef GlobalAddressSDNode MostAlignedSDNode;
 
 namespace ISD {
   /// isNormalLoad - Returns true if the specified node is a non-extending
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index e661c58940e1..1f0dd2108817 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -23,8 +23,10 @@
 
 namespace llvm {
   class Type;
+  class LLVMContext;
+  struct EVT;
 
-  struct MVT { // MVT = Machine Value Type
+  class MVT { // MVT = Machine Value Type
   public:
     enum SimpleValueType {
       // If you change this numbering, you must change the values in
@@ -59,184 +61,368 @@ namespace llvm {
       v8i16          =  21,   //  8 x i16
       v16i16         =  22,   // 16 x i16
       v2i32          =  23,   //  2 x i32
-      v3i32          =  24,   //  3 x i32
-      v4i32          =  25,   //  4 x i32
-      v8i32          =  26,   //  8 x i32
-      v1i64          =  27,   //  1 x i64
-      v2i64          =  28,   //  2 x i64
-      v4i64          =  29,   //  4 x i64
-
-      v2f32          =  30,   //  2 x f32
-      v3f32          =  31,   //  3 x f32
-      v4f32          =  32,   //  4 x f32
-      v8f32          =  33,   //  8 x f32
-      v2f64          =  34,   //  2 x f64
-      v4f64          =  35,   //  4 x f64
-  
+      v4i32          =  24,   //  4 x i32
+      v8i32          =  25,   //  8 x i32
+      v1i64          =  26,   //  1 x i64
+      v2i64          =  27,   //  2 x i64
+      v4i64          =  28,   //  4 x i64
+
+      v2f32          =  29,   //  2 x f32
+      v4f32          =  30,   //  4 x f32
+      v8f32          =  31,   //  8 x f32
+      v2f64          =  32,   //  2 x f64
+      v4f64          =  33,   //  4 x f64
+
       FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v4f64,
 
-      LAST_VALUETYPE =  36,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  34,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
-      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
+      // EVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
       // This value must be a multiple of 32.
       MAX_ALLOWED_VALUETYPE = 64,
 
+      // Metadata - This is MDNode or MDString.
+      Metadata       = 250,
+
       // iPTRAny - An int value the size of the pointer of the current
       // target to any address space. This must only be used internal to
       // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
-      iPTRAny        =  252,
+      iPTRAny        = 251,
+
+      // vAny - A vector with any length and element size. This is used
+      // for intrinsics that have overloadings based on vector types.
+      // This is only for tblgen's consumption!
+      vAny           = 252,
 
       // fAny - Any floating-point or vector floating-point value. This is used
       // for intrinsics that have overloadings based on floating-point types.
       // This is only for tblgen's consumption!
-      fAny           =  253,
+      fAny           = 253,
 
       // iAny - An integer or vector integer value of any bit width. This is
       // used for intrinsics that have overloadings based on integer bit widths.
       // This is only for tblgen's consumption!
-      iAny           =  254,
+      iAny           = 254,
 
       // iPTR - An int value the size of the pointer of the current
       // target.  This should only be used internal to tblgen!
-      iPTR           =  255,
+      iPTR           = 255,
 
       // LastSimpleValueType - The greatest valid SimpleValueType value.
-      LastSimpleValueType = 255
-    };
+      LastSimpleValueType = 255,
 
-  private:
-    /// This union holds low-level value types. Valid values include any of
-    /// the values in the SimpleValueType enum, or any value returned from one
-    /// of the MVT methods.  Any value type equal to one of the SimpleValueType
-    /// enum values is a "simple" value type.  All others are "extended".
-    ///
-    /// Note that simple doesn't necessary mean legal for the target machine.
-    /// All legal value types must be simple, but often there are some simple
-    /// value types that are not legal.
-    ///
-    union {
-      uintptr_t V;
-      const Type *LLVMTy;
+      // INVALID_SIMPLE_VALUE_TYPE - Simple value types greater than or equal
+      // to this are considered extended value types.
+      INVALID_SIMPLE_VALUE_TYPE = LastSimpleValueType + 1
     };
 
-  public:
-    MVT() {}
-    MVT(SimpleValueType S) : V(S) {}
+    SimpleValueType SimpleTy;
+
+    MVT() : SimpleTy((SimpleValueType)(INVALID_SIMPLE_VALUE_TYPE)) {}
+    MVT(SimpleValueType SVT) : SimpleTy(SVT) { }
+    
+    bool operator>(const MVT& S)  const { return SimpleTy >  S.SimpleTy; }
+    bool operator<(const MVT& S)  const { return SimpleTy <  S.SimpleTy; }
+    bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
+    bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
+    bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
+    
+    /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
+    bool isFloatingPoint() const {
+      return ((SimpleTy >= MVT::f32 && SimpleTy <= MVT::ppcf128) ||
+        (SimpleTy >= MVT::v2f32 && SimpleTy <= MVT::v4f64));
+    }
+
+    /// isInteger - Return true if this is an integer, or a vector integer type.
+    bool isInteger() const {
+      return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
+               SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
+               (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v4i64));
+    }
 
-    bool operator==(const MVT VT) const {
-      return getRawBits() == VT.getRawBits();
+    /// isVector - Return true if this is a vector value type.
+    bool isVector() const {
+      return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
+              SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
     }
-    bool operator!=(const MVT VT) const {
-      return getRawBits() != VT.getRawBits();
+    
+    /// isPow2VectorType - Retuns true if the given vector is a power of 2.
+    bool isPow2VectorType() const {
+      unsigned NElts = getVectorNumElements();
+      return !(NElts & (NElts - 1));
     }
 
-    /// getFloatingPointVT - Returns the MVT that represents a floating point
-    /// type with the given number of bits.  There are two floating point types
-    /// with 128 bits - this returns f128 rather than ppcf128.
+    /// getPow2VectorType - Widens the length of the given vector EVT up to
+    /// the nearest power of 2 and returns that type.
+    MVT getPow2VectorType() const {
+      if (!isPow2VectorType()) {
+        unsigned NElts = getVectorNumElements();
+        unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
+        return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
+      }
+      else {
+        return *this;
+      }
+    }
+    
+    MVT getVectorElementType() const {
+      switch (SimpleTy) {
+      default:
+        return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+      case v2i8 :
+      case v4i8 :
+      case v8i8 :
+      case v16i8:
+      case v32i8: return i8;
+      case v2i16:
+      case v4i16:
+      case v8i16:
+      case v16i16: return i16;
+      case v2i32:
+      case v4i32:
+      case v8i32: return i32;
+      case v1i64:
+      case v2i64:
+      case v4i64: return i64;
+      case v2f32:
+      case v4f32:
+      case v8f32: return f32;
+      case v2f64:
+      case v4f64: return f64;
+      }
+    }
+    
+    unsigned getVectorNumElements() const {
+      switch (SimpleTy) {
+      default:
+        return ~0U;
+      case v32i8: return 32;
+      case v16i8:
+      case v16i16: return 16;
+      case v8i8 :
+      case v8i16:
+      case v8i32:
+      case v8f32: return 8;
+      case v4i8:
+      case v4i16:
+      case v4i32:
+      case v4i64:
+      case v4f32:
+      case v4f64: return 4;
+      case v2i8:
+      case v2i16:
+      case v2i32:
+      case v2i64:
+      case v2f32:
+      case v2f64: return 2;
+      case v1i64: return 1;
+      }
+    }
+    
+    unsigned getSizeInBits() const {
+      switch (SimpleTy) {
+      case iPTR:
+        assert(0 && "Value type size is target-dependent. Ask TLI.");
+      case iPTRAny:
+      case iAny:
+      case fAny:
+        assert(0 && "Value type is overloaded.");
+      default:
+        assert(0 && "getSizeInBits called on extended MVT.");
+      case i1  :  return 1;
+      case i8  :  return 8;
+      case i16 :
+      case v2i8:  return 16;
+      case f32 :
+      case i32 :
+      case v4i8:
+      case v2i16: return 32;
+      case f64 :
+      case i64 :
+      case v8i8:
+      case v4i16:
+      case v2i32:
+      case v1i64:
+      case v2f32: return 64;
+      case f80 :  return 80;
+      case f128:
+      case ppcf128:
+      case i128:
+      case v16i8:
+      case v8i16:
+      case v4i32:
+      case v2i64:
+      case v4f32:
+      case v2f64: return 128;
+      case v32i8:
+      case v16i16:
+      case v8i32:
+      case v4i64:
+      case v8f32:
+      case v4f64: return 256;
+      }
+    }
+    
     static MVT getFloatingPointVT(unsigned BitWidth) {
       switch (BitWidth) {
       default:
         assert(false && "Bad bit width!");
       case 32:
-        return f32;
+        return MVT::f32;
       case 64:
-        return f64;
+        return MVT::f64;
       case 80:
-        return f80;
+        return MVT::f80;
       case 128:
-        return f128;
+        return MVT::f128;
       }
     }
-
-    /// getIntegerVT - Returns the MVT that represents an integer with the given
-    /// number of bits.
+    
     static MVT getIntegerVT(unsigned BitWidth) {
       switch (BitWidth) {
       default:
-        break;
+        return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
       case 1:
-        return i1;
+        return MVT::i1;
       case 8:
-        return i8;
+        return MVT::i8;
       case 16:
-        return i16;
+        return MVT::i16;
       case 32:
-        return i32;
+        return MVT::i32;
       case 64:
-        return i64;
+        return MVT::i64;
       case 128:
-        return i128;
+        return MVT::i128;
       }
-      return getExtendedIntegerVT(BitWidth);
     }
-
-    /// getVectorVT - Returns the MVT that represents a vector NumElements in
-    /// length, where each element is of type VT.
+    
     static MVT getVectorVT(MVT VT, unsigned NumElements) {
-      switch (VT.V) {
+      switch (VT.SimpleTy) {
       default:
         break;
-      case i8:
-        if (NumElements == 2)  return v2i8;
-        if (NumElements == 4)  return v4i8;
-        if (NumElements == 8)  return v8i8;
-        if (NumElements == 16) return v16i8;
-        if (NumElements == 32) return v32i8;
+      case MVT::i8:
+        if (NumElements == 2)  return MVT::v2i8;
+        if (NumElements == 4)  return MVT::v4i8;
+        if (NumElements == 8)  return MVT::v8i8;
+        if (NumElements == 16) return MVT::v16i8;
+        if (NumElements == 32) return MVT::v32i8;
         break;
-      case i16:
-        if (NumElements == 2)  return v2i16;
-        if (NumElements == 4)  return v4i16;
-        if (NumElements == 8)  return v8i16;
-        if (NumElements == 16)  return v16i16;
+      case MVT::i16:
+        if (NumElements == 2)  return MVT::v2i16;
+        if (NumElements == 4)  return MVT::v4i16;
+        if (NumElements == 8)  return MVT::v8i16;
+        if (NumElements == 16) return MVT::v16i16;
         break;
-      case i32:
-        if (NumElements == 2)  return v2i32;
-        if (NumElements == 3)  return v3i32;
-        if (NumElements == 4)  return v4i32;
-	if (NumElements == 8)  return v8i32;
+      case MVT::i32:
+        if (NumElements == 2)  return MVT::v2i32;
+        if (NumElements == 4)  return MVT::v4i32;
+        if (NumElements == 8)  return MVT::v8i32;
         break;
-      case i64:
-        if (NumElements == 1)  return v1i64;
-        if (NumElements == 2)  return v2i64;
-	if (NumElements == 4)  return v4i64;
+      case MVT::i64:
+        if (NumElements == 1)  return MVT::v1i64;
+        if (NumElements == 2)  return MVT::v2i64;
+        if (NumElements == 4)  return MVT::v4i64;
         break;
-      case f32:
-        if (NumElements == 2)  return v2f32;
-        if (NumElements == 3)  return v3f32;
-        if (NumElements == 4)  return v4f32;
-        if (NumElements == 8)  return v8f32;
+      case MVT::f32:
+        if (NumElements == 2)  return MVT::v2f32;
+        if (NumElements == 4)  return MVT::v4f32;
+        if (NumElements == 8)  return MVT::v8f32;
         break;
-      case f64:
-        if (NumElements == 2)  return v2f64;
-        if (NumElements == 4)  return v4f64;
+      case MVT::f64:
+        if (NumElements == 2)  return MVT::v2f64;
+        if (NumElements == 4)  return MVT::v4f64;
         break;
       }
-      return getExtendedVectorVT(VT, NumElements);
+      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
     }
-
-    /// getIntVectorWithNumElements - Return any integer vector type that has
-    /// the specified number of elements.
+    
     static MVT getIntVectorWithNumElements(unsigned NumElts) {
       switch (NumElts) {
-      default: return getVectorVT(i8, NumElts);
-      case  1: return v1i64;
-      case  2: return v2i32;
-      case  3: return v3i32;
-      case  4: return v4i16;
-      case  8: return v8i8;
-      case 16: return v16i8;
+      default: return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+      case  1: return MVT::v1i64;
+      case  2: return MVT::v2i32;
+      case  4: return MVT::v4i16;
+      case  8: return MVT::v8i8;
+      case 16: return MVT::v16i8;
+      }
+    }
+  };
+
+  struct EVT { // EVT = Extended Value Type
+  private:
+    MVT V;
+    const Type *LLVMTy;
+
+  public:
+    EVT() : V((MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE)),
+            LLVMTy(0) {}
+    EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(0) { }
+    EVT(MVT S) : V(S), LLVMTy(0) {}
+
+    bool operator==(const EVT VT) const {
+      if (V.SimpleTy == VT.V.SimpleTy) {
+        if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+          return LLVMTy == VT.LLVMTy;
+        return true;
+      }
+      return false;
+    }
+    bool operator!=(const EVT VT) const {
+      if (V.SimpleTy == VT.V.SimpleTy) {
+        if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+          return LLVMTy != VT.LLVMTy;
+        return false;
       }
+      return true;
+    }
+
+    /// getFloatingPointVT - Returns the EVT that represents a floating point
+    /// type with the given number of bits.  There are two floating point types
+    /// with 128 bits - this returns f128 rather than ppcf128.
+    static EVT getFloatingPointVT(unsigned BitWidth) {
+      return MVT::getFloatingPointVT(BitWidth);
+    }
+
+    /// getIntegerVT - Returns the EVT that represents an integer with the given
+    /// number of bits.
+    static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+      MVT M = MVT::getIntegerVT(BitWidth);
+      if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return getExtendedIntegerVT(Context, BitWidth);
+      else
+        return M;
+    }
+
+    /// getVectorVT - Returns the EVT that represents a vector NumElements in
+    /// length, where each element is of type VT.
+    static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) {
+      MVT M = MVT::getVectorVT(VT.V, NumElements);
+      if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return getExtendedVectorVT(Context, VT, NumElements);
+      else
+        return M;
+    }
+
+    /// getIntVectorWithNumElements - Return any integer vector type that has
+    /// the specified number of elements.
+    static EVT getIntVectorWithNumElements(LLVMContext &C, unsigned NumElts) {
+      MVT M = MVT::getIntVectorWithNumElements(NumElts);
+      if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return getVectorVT(C, MVT::i8, NumElts);
+      else
+        return M;
     }
 
-    /// isSimple - Test if the given MVT is simple (as opposed to being
+    /// isSimple - Test if the given EVT is simple (as opposed to being
     /// extended).
     bool isSimple() const {
-      return V <= LastSimpleValueType;
+      return V.SimpleTy <= MVT::LastSimpleValueType;
     }
 
-    /// isExtended - Test if the given MVT is extended (as opposed to
+    /// isExtended - Test if the given EVT is extended (as opposed to
     /// being simple).
     bool isExtended() const {
       return !isSimple();
@@ -245,44 +431,53 @@ namespace llvm {
     /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
     bool isFloatingPoint() const {
       return isSimple() ?
-             ((V >= f32 && V <= ppcf128) ||
-              (V >= v2f32 && V <= v4f64)) : isExtendedFloatingPoint();
+             ((V >= MVT::f32 && V <= MVT::ppcf128) ||
+              (V >= MVT::v2f32 && V <= MVT::v4f64)) : isExtendedFloatingPoint();
     }
 
     /// isInteger - Return true if this is an integer, or a vector integer type.
     bool isInteger() const {
       return isSimple() ?
-             ((V >= FIRST_INTEGER_VALUETYPE && V <= LAST_INTEGER_VALUETYPE) ||
-              (V >= v2i8 && V <= v4i64)) : isExtendedInteger();
+             ((V >= MVT::FIRST_INTEGER_VALUETYPE &&
+               V <= MVT::LAST_INTEGER_VALUETYPE) ||
+              (V >= MVT::v2i8 && V <= MVT::v4i64)) : isExtendedInteger();
     }
 
     /// isVector - Return true if this is a vector value type.
     bool isVector() const {
       return isSimple() ?
-             (V >= FIRST_VECTOR_VALUETYPE && V <= LAST_VECTOR_VALUETYPE) :
+             (V >= MVT::FIRST_VECTOR_VALUETYPE && V <= 
+                   MVT::LAST_VECTOR_VALUETYPE) :
              isExtendedVector();
     }
 
     /// is64BitVector - Return true if this is a 64-bit vector type.
     bool is64BitVector() const {
       return isSimple() ?
-             (V==v8i8 || V==v4i16 || V==v2i32 || V==v1i64 || V==v2f32) :
+             (V==MVT::v8i8 || V==MVT::v4i16 || V==MVT::v2i32 ||
+              V==MVT::v1i64 || V==MVT::v2f32) :
              isExtended64BitVector();
     }
 
     /// is128BitVector - Return true if this is a 128-bit vector type.
     bool is128BitVector() const {
       return isSimple() ?
-             (V==v16i8 || V==v8i16 || V==v4i32 ||
-              V==v2i64 || V==v4f32 || V==v2f64) :
+             (V==MVT::v16i8 || V==MVT::v8i16 || V==MVT::v4i32 ||
+              V==MVT::v2i64 || V==MVT::v4f32 || V==MVT::v2f64) :
              isExtended128BitVector();
     }
 
     /// is256BitVector - Return true if this is a 256-bit vector type.
     inline bool is256BitVector() const {
-      return isSimple() ? 
-             (V==v8f32 || V==v4f64 || V==v32i8 || V==v16i16 || V==v8i32 ||
-              V==v4i64) : isExtended256BitVector();
+      return isSimple() ?
+             (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 ||
+              V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) : 
+            isExtended256BitVector();
+    }
+
+    /// isOverloaded - Return true if this is an overloaded type for TableGen.
+    bool isOverloaded() const {
+      return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny);
     }
 
     /// isByteSized - Return true if the bit size is a multiple of 8.
@@ -297,165 +492,88 @@ namespace llvm {
     }
 
     /// bitsEq - Return true if this has the same number of bits as VT.
-    bool bitsEq(MVT VT) const {
+    bool bitsEq(EVT VT) const {
       return getSizeInBits() == VT.getSizeInBits();
     }
 
     /// bitsGT - Return true if this has more bits than VT.
-    bool bitsGT(MVT VT) const {
+    bool bitsGT(EVT VT) const {
       return getSizeInBits() > VT.getSizeInBits();
     }
 
     /// bitsGE - Return true if this has no less bits than VT.
-    bool bitsGE(MVT VT) const {
+    bool bitsGE(EVT VT) const {
       return getSizeInBits() >= VT.getSizeInBits();
     }
 
     /// bitsLT - Return true if this has less bits than VT.
-    bool bitsLT(MVT VT) const {
+    bool bitsLT(EVT VT) const {
       return getSizeInBits() < VT.getSizeInBits();
     }
 
     /// bitsLE - Return true if this has no more bits than VT.
-    bool bitsLE(MVT VT) const {
+    bool bitsLE(EVT VT) const {
       return getSizeInBits() <= VT.getSizeInBits();
     }
 
 
     /// getSimpleVT - Return the SimpleValueType held in the specified
-    /// simple MVT.
-    SimpleValueType getSimpleVT() const {
+    /// simple EVT.
+    MVT getSimpleVT() const {
       assert(isSimple() && "Expected a SimpleValueType!");
-      return SimpleValueType(V);
+      return V;
     }
 
     /// getVectorElementType - Given a vector type, return the type of
     /// each element.
-    MVT getVectorElementType() const {
+    EVT getVectorElementType() const {
       assert(isVector() && "Invalid vector type!");
-      switch (V) {
-      default:
+      if (isSimple())
+        return V.getVectorElementType();
+      else
         return getExtendedVectorElementType();
-      case v2i8 :
-      case v4i8 :
-      case v8i8 :
-      case v16i8:
-      case v32i8: return i8;
-      case v2i16:
-      case v4i16:
-      case v8i16:
-      case v16i16: return i16;
-      case v2i32:
-      case v3i32:
-      case v4i32:
-      case v8i32: return i32;
-      case v1i64:
-      case v2i64:
-      case v4i64: return i64;
-      case v2f32:
-      case v3f32:
-      case v4f32:
-      case v8f32: return f32;
-      case v2f64:
-      case v4f64: return f64;
-      }
     }
 
     /// getVectorNumElements - Given a vector type, return the number of
     /// elements it contains.
     unsigned getVectorNumElements() const {
       assert(isVector() && "Invalid vector type!");
-      switch (V) {
-      default:
+      if (isSimple())
+        return V.getVectorNumElements();
+      else
         return getExtendedVectorNumElements();
-      case v32i8: return 32;
-      case v16i8:
-      case v16i16: return 16;
-      case v8i8 :
-      case v8i16:
-      case v8i32:
-      case v8f32: return 8;
-      case v4i8:
-      case v4i16:
-      case v4i32:
-      case v4i64:
-      case v4f32:
-      case v4f64: return 4;
-      case v3i32:
-      case v3f32: return 3;
-      case v2i8:
-      case v2i16:
-      case v2i32:
-      case v2i64:
-      case v2f32:
-      case v2f64: return 2;
-      case v1i64: return 1;
-      }
     }
 
     /// getSizeInBits - Return the size of the specified value type in bits.
     unsigned getSizeInBits() const {
-      switch (V) {
-      case iPTR:
-        assert(0 && "Value type size is target-dependent. Ask TLI.");
-      case iPTRAny:
-      case iAny:
-      case fAny:
-        assert(0 && "Value type is overloaded.");
-      default:
+      if (isSimple())
+        return V.getSizeInBits();
+      else
         return getExtendedSizeInBits();
-      case i1  :  return 1;
-      case i8  :  return 8;
-      case i16 :
-      case v2i8:  return 16;
-      case f32 :
-      case i32 :
-      case v4i8:
-      case v2i16: return 32;
-      case f64 :
-      case i64 :
-      case v8i8:
-      case v4i16:
-      case v2i32:
-      case v1i64:
-      case v2f32: return 64;
-      case f80 :  return 80;
-      case v3i32:
-      case v3f32: return 96;
-      case f128:
-      case ppcf128:
-      case i128:
-      case v16i8:
-      case v8i16:
-      case v4i32:
-      case v2i64:
-      case v4f32:
-      case v2f64: return 128;
-      case v32i8:
-      case v16i16:
-      case v8i32:
-      case v4i64:	
-      case v8f32:
-      case v4f64: return 256;
-      }
+    }
+
+    /// getStoreSize - Return the number of bytes overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSize() const {
+      return (getSizeInBits() + 7) / 8;
     }
 
     /// getStoreSizeInBits - Return the number of bits overwritten by a store
     /// of the specified value type.
     unsigned getStoreSizeInBits() const {
-      return (getSizeInBits() + 7)/8*8;
+      return getStoreSize() * 8;
     }
 
-    /// getRoundIntegerType - Rounds the bit-width of the given integer MVT up
+    /// getRoundIntegerType - Rounds the bit-width of the given integer EVT up
     /// to the nearest power of two (and at least to eight), and returns the
-    /// integer MVT with that number of bits.
-    MVT getRoundIntegerType() const {
+    /// integer EVT with that number of bits.
+    EVT getRoundIntegerType(LLVMContext &Context) const {
       assert(isInteger() && !isVector() && "Invalid integer type!");
       unsigned BitWidth = getSizeInBits();
       if (BitWidth <= 8)
-        return i8;
+        return EVT(MVT::i8);
       else
-        return getIntegerVT(1 << Log2_32_Ceil(BitWidth));
+        return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth));
     }
 
     /// isPow2VectorType - Retuns true if the given vector is a power of 2.
@@ -464,41 +582,48 @@ namespace llvm {
       return !(NElts & (NElts - 1));
     }
 
-    /// getPow2VectorType - Widens the length of the given vector MVT up to
+    /// getPow2VectorType - Widens the length of the given vector EVT up to
     /// the nearest power of 2 and returns that type.
-    MVT getPow2VectorType() const {
+    EVT getPow2VectorType(LLVMContext &Context) const {
       if (!isPow2VectorType()) {
         unsigned NElts = getVectorNumElements();
         unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
-        return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
+        return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts);
       }
       else {
         return *this;
       }
     }
 
-    /// getMVTString - This function returns value type as a string,
+    /// getEVTString - This function returns value type as a string,
     /// e.g. "i32".
-    std::string getMVTString() const;
+    std::string getEVTString() const;
 
-    /// getTypeForMVT - This method returns an LLVM type corresponding to the
-    /// specified MVT.  For integer types, this returns an unsigned type.  Note
+    /// getTypeForEVT - This method returns an LLVM type corresponding to the
+    /// specified EVT.  For integer types, this returns an unsigned type.  Note
     /// that this will abort for types that cannot be represented.
-    const Type *getTypeForMVT() const;
+    const Type *getTypeForEVT(LLVMContext &Context) const;
 
-    /// getMVT - Return the value type corresponding to the specified type.
+    /// getEVT - Return the value type corresponding to the specified type.
     /// This returns all pointers as iPTR.  If HandleUnknown is true, unknown
     /// types are returned as Other, otherwise they are invalid.
-    static MVT getMVT(const Type *Ty, bool HandleUnknown = false);
+    static EVT getEVT(const Type *Ty, bool HandleUnknown = false);
 
-    /// getRawBits - Represent the type as a bunch of bits.
-    uintptr_t getRawBits() const { return V; }
+    intptr_t getRawBits() {
+      if (V.SimpleTy <= MVT::LastSimpleValueType)
+        return V.SimpleTy;
+      else
+        return (intptr_t)(LLVMTy);
+    }
 
     /// compareRawBits - A meaningless but well-behaved order, useful for
     /// constructing containers.
     struct compareRawBits {
-      bool operator()(MVT L, MVT R) const {
-        return L.getRawBits() < R.getRawBits();
+      bool operator()(EVT L, EVT R) const {
+        if (L.V.SimpleTy == R.V.SimpleTy)
+          return L.LLVMTy < R.LLVMTy;
+        else
+          return L.V.SimpleTy < R.V.SimpleTy;
       }
     };
 
@@ -506,15 +631,16 @@ namespace llvm {
     // Methods for handling the Extended-type case in functions above.
     // These are all out-of-line to prevent users of this header file
     // from having a dependency on Type.h.
-    static MVT getExtendedIntegerVT(unsigned BitWidth);
-    static MVT getExtendedVectorVT(MVT VT, unsigned NumElements);
+    static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
+    static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
+                                   unsigned NumElements);
     bool isExtendedFloatingPoint() const;
     bool isExtendedInteger() const;
     bool isExtendedVector() const;
     bool isExtended64BitVector() const;
     bool isExtended128BitVector() const;
     bool isExtended256BitVector() const;
-    MVT getExtendedVectorElementType() const;
+    EVT getExtendedVectorElementType() const;
     unsigned getExtendedVectorNumElements() const;
     unsigned getExtendedSizeInBits() const;
   };
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 7f6728bb678e..986555b976e9 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -44,23 +44,26 @@ def v4i16  : ValueType<64 , 20>;   //  4 x i16 vector value
 def v8i16  : ValueType<128, 21>;   //  8 x i16 vector value
 def v16i16 : ValueType<256, 22>;   // 16 x i16 vector value
 def v2i32  : ValueType<64 , 23>;   //  2 x i32 vector value
-def v3i32  : ValueType<96 , 24>;   //  3 x i32 vector value
-def v4i32  : ValueType<128, 25>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 26>;   //  8 x f32 vector value
-def v1i64  : ValueType<64 , 27>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 28>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 29>;   //  4 x f64 vector value
+def v4i32  : ValueType<128, 24>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 25>;   //  8 x i32 vector value
+def v1i64  : ValueType<64 , 26>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 27>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 28>;   //  4 x f64 vector value
+
+def v2f32  : ValueType<64,  29>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 30>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 31>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 32>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 33>;   //  4 x f64 vector value
+
+def MetadataVT: ValueType<0, 250>; // Metadata
 
-def v2f32  : ValueType<64,  30>;   //  2 x f32 vector value
-def v3f32  : ValueType<96 , 31>;   //  3 x f32 vector value
-def v4f32  : ValueType<128, 32>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 33>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 34>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 35>;   //  4 x f64 vector value
-  
 // Pseudo valuetype mapped to the current pointer size to any address space.
 // Should only be used in TableGen.
-def iPTRAny   : ValueType<0, 252>;
+def iPTRAny   : ValueType<0, 251>;
+
+// Pseudo valuetype to represent "vector of any size"
+def vAny   : ValueType<0  , 252>;
 
 // Pseudo valuetype to represent "float of any format"
 def fAny   : ValueType<0  , 253>;
diff --git a/include/llvm/CompilerDriver/BuiltinOptions.h b/include/llvm/CompilerDriver/BuiltinOptions.h
index 492dffd30725..fe44c30a7ad5 100644
--- a/include/llvm/CompilerDriver/BuiltinOptions.h
+++ b/include/llvm/CompilerDriver/BuiltinOptions.h
@@ -22,6 +22,7 @@ namespace SaveTempsEnum { enum Values { Cwd, Obj, Unset }; }
 
 extern llvm::cl::list<std::string> InputFilenames;
 extern llvm::cl::opt<std::string> OutputFilename;
+extern llvm::cl::opt<std::string> TempDirname;
 extern llvm::cl::list<std::string> Languages;
 extern llvm::cl::opt<bool> DryRun;
 extern llvm::cl::opt<bool> VerboseMode;
diff --git a/include/llvm/CompilerDriver/Common.td b/include/llvm/CompilerDriver/Common.td
index 1f6bacc787bc..5b7c543f1c92 100644
--- a/include/llvm/CompilerDriver/Common.td
+++ b/include/llvm/CompilerDriver/Common.td
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains common definitions used in llvmc2 tool description files.
+// This file contains common definitions used in llvmc tool description files.
 //
 //===----------------------------------------------------------------------===//
 
@@ -39,29 +39,35 @@ def prefix_list_option;
 def extern;
 def help;
 def hidden;
+def init;
 def multi_val;
 def one_or_more;
 def really_hidden;
 def required;
 def zero_or_one;
 
-// Empty DAG marker.
-def empty;
-
 // The 'case' construct.
 def case;
 
+// Boolean constants.
+def true;
+def false;
+
 // Boolean operators.
 def and;
 def or;
+def not;
 
 // Primitive tests.
 def switch_on;
 def parameter_equals;
 def element_in_list;
 def input_languages_contain;
+def empty;
 def not_empty;
 def default;
+def single_input_file;
+def multiple_input_files;
 
 // Possible actions.
 
@@ -76,6 +82,9 @@ def error;
 def inc_weight;
 def dec_weight;
 
+// Empty DAG marker.
+def empty_dag_marker;
+
 // Used to specify plugin priority.
 class PluginPriority<int p> {
       int priority = p;
@@ -105,10 +114,10 @@ class EdgeBase<string t1, string t2, dag d> {
       dag weight = d;
 }
 
-class Edge<string t1, string t2> : EdgeBase<t1, t2, (empty)>;
+class Edge<string t1, string t2> : EdgeBase<t1, t2, (empty_dag_marker)>;
 
 // Edge and SimpleEdge are synonyms.
-class SimpleEdge<string t1, string t2> : EdgeBase<t1, t2, (empty)>;
+class SimpleEdge<string t1, string t2> : EdgeBase<t1, t2, (empty_dag_marker)>;
 
 // Optionally enabled edge.
 class OptionalEdge<string t1, string t2, dag props> : EdgeBase<t1, t2, props>;
diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h
index 825d4c40f8aa..3daafd58a7c2 100644
--- a/include/llvm/CompilerDriver/CompilationGraph.h
+++ b/include/llvm/CompilerDriver/CompilationGraph.h
@@ -18,7 +18,6 @@
 
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
@@ -242,7 +241,8 @@ namespace llvmc {
 
 
   /// NodeChildIterator - Another auxiliary class needed by GraphTraits.
-  class NodeChildIterator : public bidirectional_iterator<Node, ptrdiff_t> {
+  class NodeChildIterator : public
+               std::iterator<std::bidirectional_iterator_tag, Node, ptrdiff_t> {
     typedef NodeChildIterator ThisType;
     typedef Node::container_type::iterator iterator;
 
diff --git a/include/llvm/CompilerDriver/ForceLinkage.h b/include/llvm/CompilerDriver/ForceLinkage.h
index 58ea16710e49..830c04e2d307 100644
--- a/include/llvm/CompilerDriver/ForceLinkage.h
+++ b/include/llvm/CompilerDriver/ForceLinkage.h
@@ -41,6 +41,26 @@ namespace llvmc {
       LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_5);
 #endif
 
+#ifdef LLVMC_BUILTIN_PLUGIN_6
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_6);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_7
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_7);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_8
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_8);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_9
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_9);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_10
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_10);
+#endif
+
 namespace force_linkage {
 
   struct LinkageForcer {
@@ -68,6 +88,26 @@ namespace force_linkage {
       LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_5);
 #endif
 
+#ifdef LLVMC_BUILTIN_PLUGIN_6
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_6);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_7
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_7);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_8
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_8);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_9
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_9);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_10
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_10);
+#endif
+
     }
   };
 } // End namespace force_linkage.
diff --git a/include/llvm/Config/AsmParsers.def.in b/include/llvm/Config/AsmParsers.def.in
new file mode 100644
index 000000000000..041af837541c
--- /dev/null
+++ b/include/llvm/Config/AsmParsers.def.in
@@ -0,0 +1,29 @@
+//===- llvm/Config/AsmParsers.def - LLVM Assembly Parsers -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file enumerates all of the assembly-language parsers
+// supported by this build of LLVM. Clients of this file should define
+// the LLVM_ASM_PARSER macro to be a function-like macro with a
+// single parameter (the name of the target whose assembly can be
+// generated); including this file will then enumerate all of the
+// targets with assembly parsers.
+//
+// The set of targets supported by LLVM is generated at configuration
+// time, at which point this header is generated. Do not modify this
+// header directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASM_PARSER
+#  error Please define the macro LLVM_ASM_PARSER(TargetName)
+#endif
+
+@LLVM_ENUM_ASM_PARSERS@
+
+#undef LLVM_ASM_PARSER
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 180e8c523859..fa5d316ef076 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -6,14 +6,6 @@
 /* Define if dlopen(0) will open the symbols of the program */
 #undef CAN_DLOPEN_SELF
 
-/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
-   systems. This function is required for `alloca.c' support on those systems.
-   */
-#undef CRAY_STACKSEG_END
-
-/* Define to 1 if using `alloca.c'. */
-#undef C_ALLOCA
-
 /* Define if CBE is enabled for printf %a output */
 #undef ENABLE_CBE_PRINTF_A
 
@@ -23,13 +15,6 @@
 /* Define if threads enabled */
 #cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
 
-/* Define to 1 if you have `alloca', as a function or macro. */
-#cmakedefine HAVE_ALLOCA ${HAVE_ALLOCA}
-
-/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
-   */
-#cmakedefine HAVE_ALLOCA_H ${HAVE_ALLOCA_H}
-
 /* Define to 1 if you have the `argz_append' function. */
 #undef HAVE_ARGZ_APPEND
 
@@ -113,7 +98,7 @@
 #cmakedefine HAVE_FCNTL_H ${HAVE_FCNTL_H}
 
 /* Set to 1 if the finite function is found in <ieeefp.h> */
-#undef HAVE_FINITE_IN_IEEEFP_H
+#cmakedefine HAVE_FINITE_IN_IEEEFP_H ${HAVE_FINITE_IN_IEEEFP_H}
 
 /* Define to 1 if you have the `floorf' function. */
 #cmakedefine HAVE_FLOORF ${HAVE_FLOORF}
@@ -181,9 +166,6 @@
 /* Define if you have the libdl library or equivalent. */
 #undef HAVE_LIBDL
 
-/* Define to 1 if you have the `elf' library (-lelf). */
-#undef HAVE_LIBELF
-
 /* Define to 1 if you have the `imagehlp' library (-limagehlp). */
 #cmakedefine HAVE_LIBIMAGEHLP ${HAVE_LIBIMAGEHLP}
 
@@ -240,13 +222,13 @@
 #cmakedefine HAVE_MEMORY_H ${HAVE_MEMORY_H}
 
 /* Define to 1 if you have the `mkdtemp' function. */
-#undef HAVE_MKDTEMP
+#cmakedefine HAVE_MKDTEMP ${HAVE_MKDTEMP}
 
 /* Define to 1 if you have the `mkstemp' function. */
-#undef HAVE_MKSTEMP
+#cmakedefine HAVE_MKSTEMP ${HAVE_MKSTEMP}
 
 /* Define to 1 if you have the `mktemp' function. */
-#undef HAVE_MKTEMP
+#cmakedefine HAVE_MKTEMP ${HAVE_MKTEMP}
 
 /* Define to 1 if you have a working `mmap' system call. */
 #undef HAVE_MMAP
@@ -307,7 +289,10 @@
 #undef HAVE_ROUNDF
 
 /* Define to 1 if you have the `sbrk' function. */
-#undef HAVE_SBRK
+#cmakedefine HAVE_SBRK ${HAVE_SBRK}
+
+/* Define to 1 if you have the `setenv' function. */
+#cmakedefine HAVE_SETENV ${HAVE_SETENV}
 
 /* Define to 1 if you have the `setjmp' function. */
 #undef HAVE_SETJMP
@@ -364,13 +349,13 @@
 #undef HAVE_STRDUP
 
 /* Define to 1 if you have the `strerror' function. */
-#cmakedefine HAVE_STRERROR
+#cmakedefine HAVE_STRERROR ${HAVE_STRERROR}
 
 /* Define to 1 if you have the `strerror_r' function. */
-#cmakedefine HAVE_STRERROR_R
+#cmakedefine HAVE_STRERROR_R ${HAVE_STRERROR_R}
 
 /* Define to 1 if you have the `strerror_s' function. */
-#cmakedefine HAVE_STRERROR_S
+#cmakedefine HAVE_STRERROR_S ${HAVE_STRERROR_S}
 
 /* Define to 1 if you have the <strings.h> header file. */
 #undef HAVE_STRINGS_H
@@ -470,6 +455,9 @@
 /* Installation directory for man pages */
 #undef LLVM_MANDIR
 
+/* Build multithreading support into LLVM */
+#cmakedefine LLVM_MULTITHREADED ${LLVM_MULTITHREADED}
+
 /* Define if this is Unixish platform */
 #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 4dd1345d0e60..5257df97b2b3 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -60,6 +60,9 @@
 /* Define to 1 if you have the `ceilf' function. */
 #undef HAVE_CEILF
 
+/* Define if the neat program is available */
+#undef HAVE_CIRCO
+
 /* Define to 1 if you have the `closedir' function. */
 #undef HAVE_CLOSEDIR
 
@@ -109,6 +112,9 @@
 /* Define to 1 if you have the <fcntl.h> header file. */
 #undef HAVE_FCNTL_H
 
+/* Define if the neat program is available */
+#undef HAVE_FDP
+
 /* Define if libffi is available on this platform. */
 #undef HAVE_FFI_CALL
 
@@ -178,9 +184,6 @@
 /* Define if you have the libdl library or equivalent. */
 #undef HAVE_LIBDL
 
-/* Define to 1 if you have the `elf' library (-lelf). */
-#undef HAVE_LIBELF
-
 /* Define to 1 if you have the `imagehlp' library (-limagehlp). */
 #undef HAVE_LIBIMAGEHLP
 
@@ -267,6 +270,9 @@
 /* Define to 1 if you have the `nearbyintf' function. */
 #undef HAVE_NEARBYINTF
 
+/* Define if the neat program is available */
+#undef HAVE_NEATO
+
 /* Define to 1 if you have the `opendir' function. */
 #undef HAVE_OPENDIR
 
@@ -315,6 +321,9 @@
 /* Define to 1 if you have the `sbrk' function. */
 #undef HAVE_SBRK
 
+/* Define to 1 if you have the `setenv' function. */
+#undef HAVE_SETENV
+
 /* Define to 1 if you have the `setjmp' function. */
 #undef HAVE_SETJMP
 
@@ -431,6 +440,9 @@
 /* Define to 1 if you have the <termios.h> header file. */
 #undef HAVE_TERMIOS_H
 
+/* Define if the neat program is available */
+#undef HAVE_TWOPI
+
 /* Define to 1 if the system has the type `uint64_t'. */
 #undef HAVE_UINT64_T
 
@@ -491,18 +503,30 @@
 /* Define if this is Win32ish platform */
 #undef LLVM_ON_WIN32
 
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#undef LLVM_PATH_CIRCO
+
 /* Define to path to dot program if found or 'echo dot' otherwise */
 #undef LLVM_PATH_DOT
 
 /* Define to path to dotty program if found or 'echo dotty' otherwise */
 #undef LLVM_PATH_DOTTY
 
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#undef LLVM_PATH_FDP
+
 /* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
 #undef LLVM_PATH_GRAPHVIZ
 
 /* Define to path to gv program if found or 'echo gv' otherwise */
 #undef LLVM_PATH_GV
 
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#undef LLVM_PATH_NEATO
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#undef LLVM_PATH_TWOPI
+
 /* Installation prefix directory */
 #undef LLVM_PREFIX
 
@@ -568,6 +592,9 @@
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
+/* Define if we have the oprofile JIT-support library */
+#undef USE_OPROFILE
+
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index d4949d1a0ffd..a42c7d437171 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -17,21 +17,10 @@
 #include "llvm/User.h"
 
 namespace llvm {
-  template<typename T> class SmallVectorImpl;
+  class APInt;
 
-  /// If object contains references to other objects, then relocations are
-  /// usually required for emission of such object (especially in PIC mode). One
-  /// usually distinguishes local and global relocations. Local relocations are
-  /// made wrt objects in the same module and these objects have local (internal
-  /// or private) linkage. Global relocations are made wrt externally visible
-  /// objects. In most cases local relocations can be resolved via so-called
-  /// 'pre-link' technique.
-  namespace Reloc {
-    const unsigned None   = 0;
-    const unsigned Local  = 1 << 0; ///< Local relocations are required
-    const unsigned Global = 1 << 1; ///< Global relocations are required
-    const unsigned LocalOrGlobal = Local | Global;
-  }
+  template<typename T> class SmallVectorImpl;
+  class LLVMContext;
 
 /// This is an important base class in LLVM. It provides the common facilities
 /// of all constant values in an LLVM program. A constant is a value that is
@@ -53,35 +42,47 @@ namespace llvm {
 class Constant : public User {
   void operator=(const Constant &);     // Do not implement
   Constant(const Constant &);           // Do not implement
+  
 protected:
   Constant(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
     : User(ty, vty, Ops, NumOps) {}
 
   void destroyConstantImpl();
 public:
-  /// Static constructor to get a '0' constant of arbitrary type...
-  ///
-  static Constant *getNullValue(const Type *Ty);
-
-  /// Static constructor to get a '-1' constant.  This supports integers and
-  /// vectors.
-  ///
-  static Constant *getAllOnesValue(const Type *Ty);
-  
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
   virtual bool isNullValue() const = 0;
 
+  /// isNegativeZeroValue - Return true if the value is what would be returned 
+  /// by getZeroValueForNegation.
+  virtual bool isNegativeZeroValue() const { return isNullValue(); }
+
   /// canTrap - Return true if evaluation of this constant could trap.  This is
   /// true for things like constant expressions that could divide by zero.
   bool canTrap() const;
 
-  /// ContainsRelocations - Return true if the constant value contains
-  /// relocations which cannot be resolved at compile time. Note that answer is
-  /// not exclusive: there can be possibility that relocations of other kind are
-  /// required as well.
-  bool ContainsRelocations(unsigned Kind = Reloc::LocalOrGlobal) const;
-
+  enum PossibleRelocationsTy {
+    NoRelocation = 0,
+    LocalRelocation = 1,
+    GlobalRelocations = 2
+  };
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are:
+  /// 
+  ///  NoRelocation: This constant pool entry is guaranteed to never have a
+  ///     relocation applied to it (because it holds a simple constant like
+  ///     '4').
+  ///  LocalRelocation: This entry has relocations, but the entries are
+  ///     guaranteed to be resolvable by the static linker, so the dynamic
+  ///     linker will never see them.
+  ///  GlobalRelocations: This entry may have arbitrary relocations.
+  ///
+  /// FIXME: This really should not be in VMCore.
+  PossibleRelocationsTy getRelocationInfo() const;
+  
   // Specialize get/setOperand for Constants as their operands are always
   // constants as well.
   Constant *getOperand(unsigned i) {
@@ -98,7 +99,8 @@ public:
   /// type, returns the elements of the vector in the specified smallvector.
   /// This handles breaking down a vector undef into undef elements, etc.  For
   /// constant exprs and other cases we can't handle, we return an empty vector.
-  void getVectorElements(SmallVectorImpl<Constant*> &Elts) const;
+  void getVectorElements(LLVMContext &Context, 
+                         SmallVectorImpl<Constant*> &Elts) const;
 
   /// destroyConstant - Called if some element of this constant is no longer
   /// valid.  At this point only other constants may be on the use_list for this
@@ -135,6 +137,17 @@ public:
            "implemented for all constants that have operands!");
     assert(0 && "Constants that do not have operands cannot be using 'From'!");
   }
+  
+  static Constant* getNullValue(const Type* Ty);
+  
+  /// @returns the value for an integer constant of the given type that has all
+  /// its bits set to true.
+  /// @brief Get the all ones value
+  static Constant* getAllOnesValue(const Type* Ty);
+
+  /// getIntegerValue - Return the value for an integer or pointer constant,
+  /// or a vector thereof, with the given scalar value.
+  static Constant* getIntegerValue(const Type* Ty, const APInt &V);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index da69d25cf621..7715286bbe5c 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -45,7 +45,6 @@ struct ConvertConstantType;
 /// represents both boolean and integral constants.
 /// @brief Class for constant integers.
 class ConstantInt : public Constant {
-  static ConstantInt *TheTrueVal, *TheFalseVal;
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
   ConstantInt(const ConstantInt &);      // DO NOT IMPLEMENT
   ConstantInt(const IntegerType *Ty, const APInt& V);
@@ -56,10 +55,47 @@ protected:
     return User::operator new(s, 0);
   }
 public:
+  static ConstantInt *getTrue(LLVMContext &Context);
+  static ConstantInt *getFalse(LLVMContext &Context);
+  
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(const Type *Ty, uint64_t V, bool isSigned = false);
+                              
+  /// Return a ConstantInt with the specified integer value for the specified
+  /// type. If the type is wider than 64 bits, the value will be zero-extended
+  /// to fit the type, unless isSigned is true, in which case the value will
+  /// be interpreted as a 64-bit signed integer and sign-extended to fit
+  /// the type.
+  /// @brief Get a ConstantInt for a specific value.
+  static ConstantInt *get(const IntegerType *Ty, uint64_t V,
+                          bool isSigned = false);
+
+  /// Return a ConstantInt with the specified value for the specified type. The
+  /// value V will be canonicalized to a an unsigned APInt. Accessing it with
+  /// either getSExtValue() or getZExtValue() will yield a correctly sized and
+  /// signed value for the type Ty.
+  /// @brief Get a ConstantInt for a specific signed value.
+  static ConstantInt *getSigned(const IntegerType *Ty, int64_t V);
+  static Constant *getSigned(const Type *Ty, int64_t V);
+  
+  /// Return a ConstantInt with the specified value and an implied Type. The
+  /// type is the integer type that corresponds to the bit width of the value.
+  static ConstantInt *get(LLVMContext &Context, const APInt &V);
+
+  /// Return a ConstantInt constructed from the string strStart with the given
+  /// radix. 
+  static ConstantInt *get(const IntegerType *Ty, const StringRef &Str,
+                          uint8_t radix);
+  
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(const Type* Ty, const APInt& V);
+  
   /// Return the constant as an APInt value reference. This allows clients to
   /// obtain a copy of the value, with all its precision in tact.
   /// @brief Return the constant's value.
-  inline const APInt& getValue() const {
+  inline const APInt &getValue() const {
     return Val;
   }
   
@@ -92,49 +128,6 @@ public:
     return Val == V;
   }
 
-  /// getTrue/getFalse - Return the singleton true/false values.
-  static inline ConstantInt *getTrue() {
-    if (TheTrueVal) return TheTrueVal;
-    return CreateTrueFalseVals(true);
-  }
-  static inline ConstantInt *getFalse() {
-    if (TheFalseVal) return TheFalseVal;
-    return CreateTrueFalseVals(false);
-  }
-
-  /// Return a ConstantInt with the specified integer value for the specified
-  /// type. If the type is wider than 64 bits, the value will be zero-extended
-  /// to fit the type, unless isSigned is true, in which case the value will
-  /// be interpreted as a 64-bit signed integer and sign-extended to fit
-  /// the type.
-  /// @brief Get a ConstantInt for a specific value.
-  static ConstantInt *get(const IntegerType *Ty,
-                          uint64_t V, bool isSigned = false);
-
-  /// If Ty is a vector type, return a Constant with a splat of the given
-  /// value. Otherwise return a ConstantInt for the given value.
-  static Constant *get(const Type *Ty, uint64_t V, bool isSigned = false);
-
-  /// Return a ConstantInt with the specified value for the specified type. The
-  /// value V will be canonicalized to a an unsigned APInt. Accessing it with
-  /// either getSExtValue() or getZExtValue() will yield a correctly sized and
-  /// signed value for the type Ty.
-  /// @brief Get a ConstantInt for a specific signed value.
-  static ConstantInt *getSigned(const IntegerType *Ty, int64_t V) {
-    return get(Ty, V, true);
-  }
-  static Constant *getSigned(const Type *Ty, int64_t V) {
-    return get(Ty, V, true);
-  }
-
-  /// Return a ConstantInt with the specified value and an implied Type. The
-  /// type is the integer type that corresponds to the bit width of the value.
-  static ConstantInt *get(const APInt &V);
-
-  /// If Ty is a vector type, return a Constant with a splat of the given
-  /// value. Otherwise return a ConstantInt for the given value.
-  static Constant *get(const Type *Ty, const APInt &V);
-
   /// getType - Specialize the getType() method to always return an IntegerType,
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
@@ -227,19 +220,11 @@ public:
     return Val.getLimitedValue(Limit);
   }
 
-  /// @returns the value for an integer constant of the given type that has all
-  /// its bits set to true.
-  /// @brief Get the all ones value
-  static ConstantInt *getAllOnesValue(const Type *Ty);
-
   /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const ConstantInt *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantIntVal;
   }
-  static void ResetTrueFalse() { TheTrueVal = TheFalseVal = 0; }
-private:
-  static ConstantInt *CreateTrueFalseVals(bool WhichOne);
 };
 
 
@@ -250,6 +235,7 @@ class ConstantFP : public Constant {
   APFloat Val;
   void *operator new(size_t, unsigned);// DO NOT IMPLEMENT
   ConstantFP(const ConstantFP &);      // DO NOT IMPLEMENT
+  friend class LLVMContextImpl;
 protected:
   ConstantFP(const Type *Ty, const APFloat& V);
 protected:
@@ -258,26 +244,35 @@ protected:
     return User::operator new(s, 0);
   }
 public:
-  /// get() - Static factory methods - Return objects of the specified value
-  static ConstantFP *get(const APFloat &V);
-
+  /// Floating point negation must be implemented with f(x) = -0.0 - x. This
+  /// method returns the negative zero constant for floating point or vector
+  /// floating point types; for all other types, it returns the null value.
+  static Constant *getZeroValueForNegation(const Type *Ty);
+  
   /// get() - This returns a ConstantFP, or a vector containing a splat of a
   /// ConstantFP, for the specified value in the specified type.  This should
   /// only be used for simple constant values like 2.0/1.0 etc, that are
   /// known-valid both as host double and as the target format.
-  static Constant *get(const Type *Ty, double V);
-
+  static Constant *get(const Type* Ty, double V);
+  static Constant *get(const Type* Ty, const StringRef &Str);
+  static ConstantFP *get(LLVMContext &Context, const APFloat &V);
+  static ConstantFP *getNegativeZero(const Type* Ty);
+  static ConstantFP *getInfinity(const Type *Ty, bool Negative = false);
+  
   /// isValueValidForType - return true if Ty is big enough to represent V.
-  static bool isValueValidForType(const Type *Ty, const APFloat& V);
+  static bool isValueValidForType(const Type *Ty, const APFloat &V);
   inline const APFloat& getValueAPF() const { return Val; }
 
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.  Don't depend on == for doubles to tell us it's zero, it
   /// considers -0.0 to be null as well as 0.0.  :(
   virtual bool isNullValue() const;
-
-  // Get a negative zero.
-  static ConstantFP *getNegativeZero(const Type* Ty);
+  
+  /// isNegativeZeroValue - Return true if the value is what would be returned 
+  /// by getZeroValueForNegation.
+  virtual bool isNegativeZeroValue() const {
+    return Val.isZero() && Val.isNegative();
+  }
 
   /// isExactlyValue - We don't rely on operator== working on double values, as
   /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
@@ -285,7 +280,7 @@ public:
   /// two floating point values.  The version with a double operand is retained
   /// because it's so convenient to write isExactlyValue(2.0), but please use
   /// it only for simple constants.
-  bool isExactlyValue(const APFloat& V) const;
+  bool isExactlyValue(const APFloat &V) const;
 
   bool isExactlyValue(double V) const {
     bool ignored;
@@ -319,10 +314,8 @@ protected:
     return User::operator new(s, 0);
   }
 public:
-  /// get() - static factory method for creating a null aggregate.  It is
-  /// illegal to call this method with a non-aggregate type.
-  static ConstantAggregateZero *get(const Type *Ty);
-
+  static ConstantAggregateZero* get(const Type *Ty);
+  
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
   virtual bool isNullValue() const { return true; }
@@ -348,22 +341,20 @@ class ConstantArray : public Constant {
 protected:
   ConstantArray(const ArrayType *T, const std::vector<Constant*> &Val);
 public:
-  /// get() - Static factory methods - Return objects of the specified value
-  static Constant *get(const ArrayType *T, const std::vector<Constant*> &);
-  static Constant *get(const ArrayType *T,
-                       Constant*const*Vals, unsigned NumVals) {
-    // FIXME: make this the primary ctor method.
-    return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
-  }
-
+  // ConstantArray accessors
+  static Constant *get(const ArrayType *T, const std::vector<Constant*> &V);
+  static Constant *get(const ArrayType *T, Constant *const *Vals, 
+                       unsigned NumVals);
+                             
   /// This method constructs a ConstantArray and initializes it with a text
   /// string. The default behavior (AddNull==true) causes a null terminator to
   /// be placed at the end of the array. This effectively increases the length
   /// of the array by one (you've been warned).  However, in some situations 
   /// this is not desired so if AddNull==false then the string is copied without
-  /// null termination. 
-  static Constant *get(const std::string &Initializer, bool AddNull = true);
-
+  /// null termination.
+  static Constant *get(LLVMContext &Context, const StringRef &Initializer,
+                       bool AddNull = true);
+  
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -406,7 +397,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantArray> : VariadicOperandTraits<> {
+struct OperandTraits<ConstantArray> : public VariadicOperandTraits<> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantArray, Constant)
@@ -421,16 +412,13 @@ class ConstantStruct : public Constant {
 protected:
   ConstantStruct(const StructType *T, const std::vector<Constant*> &Val);
 public:
-  /// get() - Static factory methods - Return objects of the specified value
-  ///
+  // ConstantStruct accessors
   static Constant *get(const StructType *T, const std::vector<Constant*> &V);
-  static Constant *get(const std::vector<Constant*> &V, bool Packed = false);
-  static Constant *get(Constant*const* Vals, unsigned NumVals,
-                       bool Packed = false) {
-    // FIXME: make this the primary ctor method.
-    return get(std::vector<Constant*>(Vals, Vals+NumVals), Packed);
-  }
-  
+  static Constant *get(LLVMContext &Context, 
+                       const std::vector<Constant*> &V, bool Packed);
+  static Constant *get(LLVMContext &Context,
+                       Constant *const *Vals, unsigned NumVals, bool Packed);
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -458,7 +446,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantStruct> : VariadicOperandTraits<> {
+struct OperandTraits<ConstantStruct> : public VariadicOperandTraits<> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantStruct, Constant)
@@ -473,13 +461,10 @@ class ConstantVector : public Constant {
 protected:
   ConstantVector(const VectorType *T, const std::vector<Constant*> &Val);
 public:
-  /// get() - Static factory methods - Return objects of the specified value
-  static Constant *get(const VectorType *T, const std::vector<Constant*> &);
+  // ConstantVector accessors
+  static Constant *get(const VectorType *T, const std::vector<Constant*> &V);
   static Constant *get(const std::vector<Constant*> &V);
-  static Constant *get(Constant*const* Vals, unsigned NumVals) {
-    // FIXME: make this the primary ctor method.
-    return get(std::vector<Constant*>(Vals, Vals+NumVals));
-  }
+  static Constant *get(Constant *const *Vals, unsigned NumVals);
   
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -490,11 +475,6 @@ public:
   inline const VectorType *getType() const {
     return reinterpret_cast<const VectorType*>(Value::getType());
   }
-
-  /// @returns the value for a vector integer constant of the given type that
-  /// has all its bits set to true.
-  /// @brief Get the all ones value
-  static ConstantVector *getAllOnesValue(const VectorType *Ty);
   
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.  This always returns false because zero vectors are always
@@ -522,7 +502,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantVector> : VariadicOperandTraits<> {
+struct OperandTraits<ConstantVector> : public VariadicOperandTraits<> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantVector, Constant)
@@ -590,13 +570,17 @@ protected:
   // These private methods are used by the type resolution code to create
   // ConstantExprs in intermediate forms.
   static Constant *getTy(const Type *Ty, unsigned Opcode,
-                         Constant *C1, Constant *C2);
+                         Constant *C1, Constant *C2,
+                         unsigned Flags = 0);
   static Constant *getCompareTy(unsigned short pred, Constant *C1,
                                 Constant *C2);
   static Constant *getSelectTy(const Type *Ty,
                                Constant *C1, Constant *C2, Constant *C3);
   static Constant *getGetElementPtrTy(const Type *Ty, Constant *C,
                                       Value* const *Idxs, unsigned NumIdxs);
+  static Constant *getInBoundsGetElementPtrTy(const Type *Ty, Constant *C,
+                                              Value* const *Idxs,
+                                              unsigned NumIdxs);
   static Constant *getExtractElementTy(const Type *Ty, Constant *Val,
                                        Constant *Idx);
   static Constant *getInsertElementTy(const Type *Ty, Constant *Val,
@@ -617,6 +601,43 @@ public:
 
   /// Cast constant expr
   ///
+
+  /// getAlignOf constant expr - computes the alignment of a type in a target
+  /// independent way (Note: the return type is an i32; Note: assumes that i8
+  /// is byte aligned).
+  static Constant *getAlignOf(const Type* Ty);
+  
+  /// getSizeOf constant expr - computes the size of a type in a target
+  /// independent way (Note: the return type is an i64).
+  ///
+  static Constant *getSizeOf(const Type* Ty);
+
+  /// getOffsetOf constant expr - computes the offset of a field in a target
+  /// independent way (Note: the return type is an i64).
+  ///
+  static Constant *getOffsetOf(const StructType* Ty, unsigned FieldNo);
+  
+  static Constant *getNeg(Constant *C);
+  static Constant *getFNeg(Constant *C);
+  static Constant *getNot(Constant *C);
+  static Constant *getAdd(Constant *C1, Constant *C2);
+  static Constant *getFAdd(Constant *C1, Constant *C2);
+  static Constant *getSub(Constant *C1, Constant *C2);
+  static Constant *getFSub(Constant *C1, Constant *C2);
+  static Constant *getMul(Constant *C1, Constant *C2);
+  static Constant *getFMul(Constant *C1, Constant *C2);
+  static Constant *getUDiv(Constant *C1, Constant *C2);
+  static Constant *getSDiv(Constant *C1, Constant *C2);
+  static Constant *getFDiv(Constant *C1, Constant *C2);
+  static Constant *getURem(Constant *C1, Constant *C2);
+  static Constant *getSRem(Constant *C1, Constant *C2);
+  static Constant *getFRem(Constant *C1, Constant *C2);
+  static Constant *getAnd(Constant *C1, Constant *C2);
+  static Constant *getOr(Constant *C1, Constant *C2);
+  static Constant *getXor(Constant *C1, Constant *C2);
+  static Constant *getShl(Constant *C1, Constant *C2);
+  static Constant *getLShr(Constant *C1, Constant *C2);
+  static Constant *getAShr(Constant *C1, Constant *C2);
   static Constant *getTrunc   (Constant *C, const Type *Ty);
   static Constant *getSExt    (Constant *C, const Type *Ty);
   static Constant *getZExt    (Constant *C, const Type *Ty);
@@ -630,6 +651,10 @@ public:
   static Constant *getIntToPtr(Constant *C, const Type *Ty);
   static Constant *getBitCast (Constant *C, const Type *Ty);
 
+  static Constant *getNSWAdd(Constant *C1, Constant *C2);
+  static Constant *getNSWSub(Constant *C1, Constant *C2);
+  static Constant *getExactSDiv(Constant *C1, Constant *C2);
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -688,69 +713,51 @@ public:
   /// and the getIndices() method may be used.
   bool hasIndices() const;
 
+  /// @brief Return true if this is a getelementptr expression and all
+  /// the index operands are compile-time known integers within the
+  /// corresponding notional static array extents. Note that this is
+  /// not equivalant to, a subset of, or a superset of the "inbounds"
+  /// property.
+  bool isGEPWithNoNotionalOverIndexing() const;
+
   /// Select constant expr
   ///
   static Constant *getSelect(Constant *C, Constant *V1, Constant *V2) {
     return getSelectTy(V1->getType(), C, V1, V2);
   }
 
-  /// getAlignOf constant expr - computes the alignment of a type in a target
-  /// independent way (Note: the return type is an i32; Note: assumes that i8
-  /// is byte aligned).
-  ///
-  static Constant *getAlignOf(const Type *Ty);
-
-  /// getSizeOf constant expr - computes the size of a type in a target
-  /// independent way (Note: the return type is an i64).
-  ///
-  static Constant *getSizeOf(const Type *Ty);
-
-  /// ConstantExpr::get - Return a binary or shift operator constant expression,
+  /// get - Return a binary or shift operator constant expression,
   /// folding if possible.
   ///
-  static Constant *get(unsigned Opcode, Constant *C1, Constant *C2);
+  static Constant *get(unsigned Opcode, Constant *C1, Constant *C2,
+                       unsigned Flags = 0);
 
-  /// @brief Return an ICmp, FCmp, VICmp, or VFCmp comparison operator constant
-  /// expression.
+  /// @brief Return an ICmp or FCmp comparison operator constant expression.
   static Constant *getCompare(unsigned short pred, Constant *C1, Constant *C2);
 
-  /// ConstantExpr::get* - Return some common constants without having to
+  /// get* - Return some common constants without having to
   /// specify the full Instruction::OPCODE identifier.
   ///
-  static Constant *getNeg(Constant *C);
-  static Constant *getFNeg(Constant *C);
-  static Constant *getNot(Constant *C);
-  static Constant *getAdd(Constant *C1, Constant *C2);
-  static Constant *getFAdd(Constant *C1, Constant *C2);
-  static Constant *getSub(Constant *C1, Constant *C2);
-  static Constant *getFSub(Constant *C1, Constant *C2);
-  static Constant *getMul(Constant *C1, Constant *C2);
-  static Constant *getFMul(Constant *C1, Constant *C2);
-  static Constant *getUDiv(Constant *C1, Constant *C2);
-  static Constant *getSDiv(Constant *C1, Constant *C2);
-  static Constant *getFDiv(Constant *C1, Constant *C2);
-  static Constant *getURem(Constant *C1, Constant *C2); // unsigned rem
-  static Constant *getSRem(Constant *C1, Constant *C2); // signed rem
-  static Constant *getFRem(Constant *C1, Constant *C2);
-  static Constant *getAnd(Constant *C1, Constant *C2);
-  static Constant *getOr(Constant *C1, Constant *C2);
-  static Constant *getXor(Constant *C1, Constant *C2);
   static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS);
   static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS);
-  static Constant *getVICmp(unsigned short pred, Constant *LHS, Constant *RHS);
-  static Constant *getVFCmp(unsigned short pred, Constant *LHS, Constant *RHS);
-  static Constant *getShl(Constant *C1, Constant *C2);
-  static Constant *getLShr(Constant *C1, Constant *C2);
-  static Constant *getAShr(Constant *C1, Constant *C2);
 
   /// Getelementptr form.  std::vector<Value*> is only accepted for convenience:
   /// all elements must be Constant's.
   ///
   static Constant *getGetElementPtr(Constant *C,
-                                    Constant* const *IdxList, unsigned NumIdx);
+                                    Constant *const *IdxList, unsigned NumIdx);
   static Constant *getGetElementPtr(Constant *C,
                                     Value* const *IdxList, unsigned NumIdx);
-  
+
+  /// Create an "inbounds" getelementptr. See the documentation for the
+  /// "inbounds" flag in LangRef.html for details.
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            Constant *const *IdxList,
+                                            unsigned NumIdx);
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            Value* const *IdxList,
+                                            unsigned NumIdx);
+
   static Constant *getExtractElement(Constant *Vec, Constant *Idx);
   static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx);
   static Constant *getShuffleVector(Constant *V1, Constant *V2, Constant *Mask);
@@ -759,11 +766,6 @@ public:
   static Constant *getInsertValue(Constant *Agg, Constant *Val,
                                   const unsigned *IdxList, unsigned NumIdx);
 
-  /// Floating point negation must be implemented with f(x) = -0.0 - x. This
-  /// method returns the negative zero constant for floating point or vector
-  /// floating point types; for all other types, it returns the null value.
-  static Constant *getZeroValueForNegationExpr(const Type *Ty);
-
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
   virtual bool isNullValue() const { return false; }
@@ -792,7 +794,7 @@ public:
   Constant *getWithOperands(const std::vector<Constant*> &Ops) const {
     return getWithOperands(&Ops[0], (unsigned)Ops.size());
   }
-  Constant *getWithOperands(Constant* const *Ops, unsigned NumOps) const;
+  Constant *getWithOperands(Constant *const *Ops, unsigned NumOps) const;
   
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
@@ -805,7 +807,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantExpr> : VariadicOperandTraits<1> {
+struct OperandTraits<ConstantExpr> : public VariadicOperandTraits<1> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantExpr, Constant)
@@ -845,62 +847,6 @@ public:
     return V->getValueID() == UndefValueVal;
   }
 };
-
-//===----------------------------------------------------------------------===//
-/// MDString - a single uniqued string.
-/// These are used to efficiently contain a byte sequence for metadata.
-///
-class MDString : public Constant {
-  MDString(const MDString &);            // DO NOT IMPLEMENT
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-  MDString(const char *begin, const char *end);
-
-  const char *StrBegin, *StrEnd;
-protected:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  /// get() - Static factory methods - Return objects of the specified value.
-  ///
-  static MDString *get(const char *StrBegin, const char *StrEnd);
-  static MDString *get(const std::string &Str);
-
-  /// size() - The length of this string.
-  ///
-  intptr_t size() const { return StrEnd - StrBegin; }
-
-  /// begin() - Pointer to the first byte of the string.
-  ///
-  const char *begin() const { return StrBegin; }
-
-  /// end() - Pointer to one byte past the end of the string.
-  ///
-  const char *end() const { return StrEnd; }
-
-  /// getType() specialization - Type is always MetadataTy.
-  ///
-  inline const Type *getType() const {
-    return Type::MetadataTy;
-  }
-
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.  This always returns false because getNullValue will never
-  /// produce metadata.
-  virtual bool isNullValue() const {
-    return false;
-  }
-
-  virtual void destroyConstant();
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const MDString *) { return true; }
-  static bool classof(const Value *V) {
-    return V->getValueID() == MDStringVal;
-  }
-};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 053091b86fe1..fb51430b481e 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -31,12 +31,13 @@ class PointerValType;
 class VectorValType;
 class IntegerValType;
 class APInt;
+class LLVMContext;
 
 class DerivedType : public Type {
   friend class Type;
 
 protected:
-  explicit DerivedType(TypeID id) : Type(id) {}
+  explicit DerivedType(LLVMContext &C, TypeID id) : Type(C, id) {}
 
   /// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type
   /// that the current type has transitioned from being abstract to being
@@ -82,8 +83,11 @@ public:
 /// Int64Ty.
 /// @brief Integer representation type
 class IntegerType : public DerivedType {
+  friend class LLVMContextImpl;
+  
 protected:
-  explicit IntegerType(unsigned NumBits) : DerivedType(IntegerTyID) {
+  explicit IntegerType(LLVMContext &C, unsigned NumBits) : 
+      DerivedType(C, IntegerTyID) {
     setSubclassData(NumBits);
   }
   friend class TypeMap<IntegerValType, IntegerType>;
@@ -101,7 +105,7 @@ public:
   /// that instance will be returned. Otherwise a new one will be created. Only
   /// one instance with a given NumBits value is ever created.
   /// @brief Get or create an IntegerType instance.
-  static const IntegerType* get(unsigned NumBits);
+  static const IntegerType* get(LLVMContext &C, unsigned NumBits);
 
   /// @brief Get the number of bits in this IntegerType
   unsigned getBitWidth() const { return getSubclassData(); }
@@ -207,7 +211,8 @@ public:
 /// and VectorType
 class CompositeType : public DerivedType {
 protected:
-  inline explicit CompositeType(TypeID id) : DerivedType(id) { }
+  inline explicit CompositeType(LLVMContext &C, TypeID id) :
+    DerivedType(C, id) { }
 public:
 
   /// getTypeAtIndex - Given an index value into the type, return the type of
@@ -235,25 +240,28 @@ class StructType : public CompositeType {
   friend class TypeMap<StructValType, StructType>;
   StructType(const StructType &);                   // Do not implement
   const StructType &operator=(const StructType &);  // Do not implement
-  StructType(const std::vector<const Type*> &Types, bool isPacked);
+  StructType(LLVMContext &C,
+             const std::vector<const Type*> &Types, bool isPacked);
 public:
   /// StructType::get - This static method is the primary way to create a
   /// StructType.
   ///
-  static StructType *get(const std::vector<const Type*> &Params,
+  static StructType *get(LLVMContext &Context, 
+                         const std::vector<const Type*> &Params,
                          bool isPacked=false);
 
   /// StructType::get - Create an empty structure type.
   ///
-  static StructType *get(bool isPacked=false) {
-    return get(std::vector<const Type*>(), isPacked);
+  static StructType *get(LLVMContext &Context, bool isPacked=false) {
+    return get(Context, std::vector<const Type*>(), isPacked);
   }
 
   /// StructType::get - This static method is a convenience method for
   /// creating structure types by specifying the elements as arguments.
   /// Note that this method always returns a non-packed struct.  To get
   /// an empty struct, pass NULL, NULL.
-  static StructType *get(const Type *type, ...) END_WITH_NULL;
+  static StructType *get(LLVMContext &Context, 
+                         const Type *type, ...) END_WITH_NULL;
 
   /// isValidElementType - Return true if the specified type is valid as a
   /// element type.
@@ -310,7 +318,7 @@ class SequentialType : public CompositeType {
   SequentialType* this_() { return this; }
 protected:
   SequentialType(TypeID TID, const Type *ElType)
-    : CompositeType(TID), ContainedType(ElType, this_()) {
+    : CompositeType(ElType->getContext(), TID), ContainedType(ElType, this_()) {
     ContainedTys = &ContainedType;
     NumContainedTys = 1;
   }
@@ -396,7 +404,7 @@ public:
   ///
   static VectorType *getInteger(const VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    const Type *EltTy = IntegerType::get(EltBits);
+    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
 
@@ -406,7 +414,7 @@ public:
   ///
   static VectorType *getExtendedElementVectorType(const VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    const Type *EltTy = IntegerType::get(EltBits * 2);
+    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
 
@@ -418,7 +426,7 @@ public:
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
     assert((EltBits & 1) == 0 &&
            "Cannot truncate vector element with odd bit-width");
-    const Type *EltTy = IntegerType::get(EltBits / 2);
+    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
 
@@ -431,7 +439,7 @@ public:
 
   /// @brief Return the number of bits in the Vector type.
   inline unsigned getBitWidth() const {
-    return NumElements *getElementType()->getPrimitiveSizeInBits();
+    return NumElements * getElementType()->getPrimitiveSizeInBits();
   }
 
   // Implement the AbstractTypeUser interface.
@@ -490,12 +498,12 @@ public:
 class OpaqueType : public DerivedType {
   OpaqueType(const OpaqueType &);                   // DO NOT IMPLEMENT
   const OpaqueType &operator=(const OpaqueType &);  // DO NOT IMPLEMENT
-  OpaqueType();
+  OpaqueType(LLVMContext &C);
 public:
   /// OpaqueType::get - Static factory method for the OpaqueType class...
   ///
-  static OpaqueType *get() {
-    return new OpaqueType();           // All opaque types are distinct
+  static OpaqueType *get(LLVMContext &C) {
+    return new OpaqueType(C);           // All opaque types are distinct
   }
 
   // Implement support for type inquiry through isa, cast, and dyn_cast:
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 613adb574e3c..b9da0fcfce19 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -19,6 +19,7 @@
 #include <map>
 #include <string>
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -26,6 +27,7 @@ namespace llvm {
 
 struct GenericValue;
 class Constant;
+class ExecutionEngine;
 class Function;
 class GlobalVariable;
 class GlobalValue;
@@ -39,38 +41,66 @@ class TargetData;
 class Type;
 
 class ExecutionEngineState {
+public:
+  class MapUpdatingCVH : public CallbackVH {
+    ExecutionEngineState &EES;
+
+  public:
+    MapUpdatingCVH(ExecutionEngineState &EES, const GlobalValue *GV);
+
+    operator const GlobalValue*() const {
+      return cast<GlobalValue>(getValPtr());
+    }
+
+    virtual void deleted();
+    virtual void allUsesReplacedWith(Value *new_value);
+  };
+
 private:
+  ExecutionEngine &EE;
+
   /// GlobalAddressMap - A mapping between LLVM global values and their
   /// actualized version...
-  std::map<const GlobalValue*, void *> GlobalAddressMap;
+  std::map<MapUpdatingCVH, void *> GlobalAddressMap;
 
   /// GlobalAddressReverseMap - This is the reverse mapping of GlobalAddressMap,
   /// used to convert raw addresses into the LLVM global value that is emitted
   /// at the address.  This map is not computed unless getGlobalValueAtAddress
   /// is called at some point.
-  std::map<void *, const GlobalValue*> GlobalAddressReverseMap;
+  std::map<void *, AssertingVH<const GlobalValue> > GlobalAddressReverseMap;
 
 public:
-  std::map<const GlobalValue*, void *> &
+  ExecutionEngineState(ExecutionEngine &EE) : EE(EE) {}
+
+  MapUpdatingCVH getVH(const GlobalValue *GV) {
+    return MapUpdatingCVH(*this, GV);
+  }
+
+  std::map<MapUpdatingCVH, void *> &
   getGlobalAddressMap(const MutexGuard &) {
     return GlobalAddressMap;
   }
 
-  std::map<void*, const GlobalValue*> & 
+  std::map<void*, AssertingVH<const GlobalValue> > &
   getGlobalAddressReverseMap(const MutexGuard &) {
     return GlobalAddressReverseMap;
   }
+
+  // Returns the address ToUnmap was mapped to.
+  void *RemoveMapping(const MutexGuard &, const GlobalValue *ToUnmap);
 };
 
 
 class ExecutionEngine {
   const TargetData *TD;
-  ExecutionEngineState state;
+  ExecutionEngineState EEState;
   bool LazyCompilationDisabled;
   bool GVCompilationDisabled;
   bool SymbolSearchingDisabled;
   bool DlsymStubsEnabled;
 
+  friend class EngineBuilder;  // To allow access to JITCtor and InterpCtor.
+
 protected:
   /// Modules - This is a list of ModuleProvider's that we are JIT'ing from.  We
   /// use a smallvector to optimize for the case where there is only one module.
@@ -86,9 +116,13 @@ protected:
   // To avoid having libexecutionengine depend on the JIT and interpreter
   // libraries, the JIT and Interpreter set these functions to ctor pointers
   // at startup time if they are linked in.
-  typedef ExecutionEngine *(*EECtorFn)(ModuleProvider*, std::string*,
-                                       CodeGenOpt::Level OptLevel);
-  static EECtorFn JITCtor, InterpCtor;
+  static ExecutionEngine *(*JITCtor)(ModuleProvider *MP,
+                                     std::string *ErrorStr,
+                                     JITMemoryManager *JMM,
+                                     CodeGenOpt::Level OptLevel,
+                                     bool GVsWithCode);
+  static ExecutionEngine *(*InterpCtor)(ModuleProvider *MP,
+                                        std::string *ErrorStr);
 
   /// LazyFunctionCreator - If an unknown function is needed, this function
   /// pointer is invoked to create it. If this returns null, the JIT will abort.
@@ -118,8 +152,18 @@ public:
                                  bool ForceInterpreter = false,
                                  std::string *ErrorStr = 0,
                                  CodeGenOpt::Level OptLevel =
-                                   CodeGenOpt::Default);
-  
+                                   CodeGenOpt::Default,
+                                 // Allocating globals with code breaks
+                                 // freeMachineCodeForFunction and is probably
+                                 // unsafe and bad for performance.  However,
+                                 // we have clients who depend on this
+                                 // behavior, so we must support it.
+                                 // Eventually, when we're willing to break
+                                 // some backwards compatability, this flag
+                                 // should be flipped to false, so that by
+                                 // default freeMachineCodeForFunction works.
+                                 bool GVsWithCode = true);
+
   /// create - This is the factory method for creating an execution engine which
   /// is appropriate for the current machine.  This takes ownership of the
   /// module.
@@ -128,11 +172,15 @@ public:
   /// createJIT - This is the factory method for creating a JIT for the current
   /// machine, it does not fall back to the interpreter.  This takes ownership
   /// of the ModuleProvider and JITMemoryManager if successful.
+  ///
+  /// Clients should make sure to initialize targets prior to calling this
+  /// function.
   static ExecutionEngine *createJIT(ModuleProvider *MP,
                                     std::string *ErrorStr = 0,
                                     JITMemoryManager *JMM = 0,
                                     CodeGenOpt::Level OptLevel =
-                                      CodeGenOpt::Default);
+                                      CodeGenOpt::Default,
+                                    bool GVsWithCode = true);
 
   /// addModuleProvider - Add a ModuleProvider to the list of modules that we
   /// can JIT from.  Note that this takes ownership of the ModuleProvider: when
@@ -189,8 +237,8 @@ public:
   /// at the specified location.  This is used internally as functions are JIT'd
   /// and as global variables are laid out in memory.  It can and should also be
   /// used by clients of the EE that want to have an LLVM global overlay
-  /// existing data in memory.  After adding a mapping for GV, you must not
-  /// destroy it until you've removed the mapping.
+  /// existing data in memory.  Mappings are automatically removed when their
+  /// GlobalValue is destroyed.
   void addGlobalMapping(const GlobalValue *GV, void *Addr);
   
   /// clearAllGlobalMappings - Clear all global mappings and start over again
@@ -214,29 +262,23 @@ public:
   void *getPointerToGlobalIfAvailable(const GlobalValue *GV);
 
   /// getPointerToGlobal - This returns the address of the specified global
-  /// value.  This may involve code generation if it's a function.  After
-  /// getting a pointer to GV, it and all globals it transitively refers to have
-  /// been passed to addGlobalMapping.  You must clear the mapping for each
-  /// referred-to global before destroying it.  If a referred-to global RTG is a
-  /// function and this ExecutionEngine is a JIT compiler, calling
-  /// updateGlobalMapping(RTG, 0) will leak the function's machine code, so you
-  /// should call freeMachineCodeForFunction(RTG) instead.  Note that
-  /// optimizations can move and delete non-external GlobalValues without
-  /// notifying the ExecutionEngine.
+  /// value.  This may involve code generation if it's a function.
   ///
   void *getPointerToGlobal(const GlobalValue *GV);
 
   /// getPointerToFunction - The different EE's represent function bodies in
   /// different ways.  They should each implement this to say what a function
-  /// pointer should look like.  See getPointerToGlobal for the requirements on
-  /// destroying F and any GlobalValues it refers to.
+  /// pointer should look like.  When F is destroyed, the ExecutionEngine will
+  /// remove its global mapping but will not yet free its machine code.  Call
+  /// freeMachineCodeForFunction(F) explicitly to do that.  Note that global
+  /// optimizations can destroy Functions without notifying the ExecutionEngine.
   ///
   virtual void *getPointerToFunction(Function *F) = 0;
 
   /// getPointerToFunctionOrStub - If the specified function has been
   /// code-gen'd, return a pointer to the function.  If not, compile it, or use
-  /// a stub to implement lazy compilation if available.  See getPointerToGlobal
-  /// for the requirements on destroying F and any GlobalValues it refers to.
+  /// a stub to implement lazy compilation if available.  See
+  /// getPointerToFunction for the requirements on destroying F.
   ///
   virtual void *getPointerToFunctionOrStub(Function *F) {
     // Default implementation, just codegen the function.
@@ -272,8 +314,7 @@ public:
 
   /// getOrEmitGlobalVariable - Return the address of the specified global
   /// variable, possibly emitting it to memory if needed.  This is used by the
-  /// Emitter.  See getPointerToGlobal for the requirements on destroying GV and
-  /// any GlobalValues it refers to.
+  /// Emitter.
   virtual void *getOrEmitGlobalVariable(const GlobalVariable *GV) {
     return getPointerToGlobal((GlobalValue*)GV);
   }
@@ -282,8 +323,8 @@ public:
   /// the JIT.  See JITEventListener.h for more details.  Does not
   /// take ownership of the argument.  The argument may be NULL, in
   /// which case these functions do nothing.
-  virtual void RegisterJITEventListener(JITEventListener *L) {}
-  virtual void UnregisterJITEventListener(JITEventListener *L) {}
+  virtual void RegisterJITEventListener(JITEventListener *) {}
+  virtual void UnregisterJITEventListener(JITEventListener *) {}
 
   /// DisableLazyCompilation - If called, the JIT will abort if lazy compilation
   /// is ever attempted.
@@ -357,6 +398,102 @@ protected:
                            const Type *Ty);
 };
 
+namespace EngineKind {
+  // These are actually bitmasks that get or-ed together.
+  enum Kind {
+    JIT         = 0x1,
+    Interpreter = 0x2
+  };
+  const static Kind Either = (Kind)(JIT | Interpreter);
+}
+
+/// EngineBuilder - Builder class for ExecutionEngines.  Use this by
+/// stack-allocating a builder, chaining the various set* methods, and
+/// terminating it with a .create() call.
+class EngineBuilder {
+
+ private:
+  ModuleProvider *MP;
+  EngineKind::Kind WhichEngine;
+  std::string *ErrorStr;
+  CodeGenOpt::Level OptLevel;
+  JITMemoryManager *JMM;
+  bool AllocateGVsWithCode;
+
+  /// InitEngine - Does the common initialization of default options.
+  ///
+  void InitEngine() {
+    WhichEngine = EngineKind::Either;
+    ErrorStr = NULL;
+    OptLevel = CodeGenOpt::Default;
+    JMM = NULL;
+    AllocateGVsWithCode = false;
+  }
+
+ public:
+  /// EngineBuilder - Constructor for EngineBuilder.  If create() is called and
+  /// is successful, the created engine takes ownership of the module
+  /// provider.
+  EngineBuilder(ModuleProvider *mp) : MP(mp) {
+    InitEngine();
+  }
+
+  /// EngineBuilder - Overloaded constructor that automatically creates an
+  /// ExistingModuleProvider for an existing module.
+  EngineBuilder(Module *m);
+
+  /// setEngineKind - Controls whether the user wants the interpreter, the JIT,
+  /// or whichever engine works.  This option defaults to EngineKind::Either.
+  EngineBuilder &setEngineKind(EngineKind::Kind w) {
+    WhichEngine = w;
+    return *this;
+  }
+
+  /// setJITMemoryManager - Sets the memory manager to use.  This allows
+  /// clients to customize their memory allocation policies.  If create() is
+  /// called and is successful, the created engine takes ownership of the
+  /// memory manager.  This option defaults to NULL.
+  EngineBuilder &setJITMemoryManager(JITMemoryManager *jmm) {
+    JMM = jmm;
+    return *this;
+  }
+
+  /// setErrorStr - Set the error string to write to on error.  This option
+  /// defaults to NULL.
+  EngineBuilder &setErrorStr(std::string *e) {
+    ErrorStr = e;
+    return *this;
+  }
+
+  /// setOptLevel - Set the optimization level for the JIT.  This option
+  /// defaults to CodeGenOpt::Default.
+  EngineBuilder &setOptLevel(CodeGenOpt::Level l) {
+    OptLevel = l;
+    return *this;
+  }
+
+  /// setAllocateGVsWithCode - Sets whether global values should be allocated
+  /// into the same buffer as code.  For most applications this should be set
+  /// to false.  Allocating globals with code breaks freeMachineCodeForFunction
+  /// and is probably unsafe and bad for performance.  However, we have clients
+  /// who depend on this behavior, so we must support it.  This option defaults
+  /// to false so that users of the new API can safely use the new memory
+  /// manager and free machine code.
+  EngineBuilder &setAllocateGVsWithCode(bool a) {
+    AllocateGVsWithCode = a;
+    return *this;
+  }
+
+  ExecutionEngine *create();
+
+};
+
+inline bool operator<(const ExecutionEngineState::MapUpdatingCVH& lhs,
+                      const ExecutionEngineState::MapUpdatingCVH& rhs) {
+    return static_cast<const GlobalValue*>(lhs) <
+        static_cast<const GlobalValue*>(rhs);
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index dd76f26c8776..8d3a1d77f04e 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -16,13 +16,28 @@
 #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+
+#include <vector>
 
 namespace llvm {
 class Function;
+class MachineFunction;
 
 /// Empty for now, but this object will contain all details about the
 /// generated machine code that a Listener might care about.
 struct JITEvent_EmittedFunctionDetails {
+  const MachineFunction *MF;
+
+  struct LineStart {
+    // The address at which the current line changes.
+    uintptr_t Address;
+    // The new location information.  These can be translated to
+    // DebugLocTuples using MF->getDebugLocTuple().
+    DebugLoc Loc;
+  };
+  // This holds line boundary information sorted by address.
+  std::vector<LineStart> LineStarts;
 };
 
 /// JITEventListener - This interface is used by the JIT to notify clients about
@@ -52,7 +67,9 @@ public:
   virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr) {}
 };
 
+// These return NULL if support isn't available.
 JITEventListener *createMacOSJITEventListener();
+JITEventListener *createOProfileJITEventListener();
 
 } // end namespace llvm.
 
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 688a1626d2c3..21dee553474c 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -15,9 +15,12 @@
 #define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
 
 #include "llvm/Support/DataTypes.h"
+#include <string>
 
 namespace llvm {
+
   class Function;
+  class GlobalValue;
 
 /// JITMemoryManager - This interface is used by the JIT to allocate and manage
 /// memory for the code generated by the JIT.  This can be reimplemented by
@@ -28,6 +31,7 @@ protected:
   bool HasGOT;
   bool SizeRequired;
 public:
+
   JITMemoryManager() : HasGOT(false), SizeRequired(false) {}
   virtual ~JITMemoryManager();
   
@@ -37,11 +41,16 @@ public:
   
   /// setMemoryWritable - When code generation is in progress,
   /// the code pages may need permissions changed.
-  virtual void setMemoryWritable(void) = 0;
+  virtual void setMemoryWritable() = 0;
 
   /// setMemoryExecutable - When code generation is done and we're ready to
   /// start execution, the code pages may need permissions changed.
-  virtual void setMemoryExecutable(void) = 0;
+  virtual void setMemoryExecutable() = 0;
+
+  /// setPoisonMemory - Setting this flag to true makes the memory manager
+  /// garbage values over freed memory.  This is useful for testing and
+  /// debugging, and is be turned on by default in debug mode.
+  virtual void setPoisonMemory(bool poison) = 0;
 
   //===--------------------------------------------------------------------===//
   // Global Offset Table Management
@@ -82,16 +91,19 @@ public:
   //===--------------------------------------------------------------------===//
   // Main Allocation Functions
   //===--------------------------------------------------------------------===//
-  
-  /// startFunctionBody - When we start JITing a function, the JIT calls this 
+
+  /// startFunctionBody - When we start JITing a function, the JIT calls this
   /// method to allocate a block of free RWX memory, which returns a pointer to
-  /// it.  The JIT doesn't know ahead of time how much space it will need to
-  /// emit the function, so it doesn't pass in the size.  Instead, this method
-  /// is required to pass back a "valid size".  The JIT will be careful to not
-  /// write more than the returned ActualSize bytes of memory. 
-  virtual uint8_t *startFunctionBody(const Function *F, 
+  /// it.  If the JIT wants to request a block of memory of at least a certain
+  /// size, it passes that value as ActualSize, and this method returns a block
+  /// with at least that much space.  If the JIT doesn't know ahead of time how
+  /// much space it will need to emit the function, it passes 0 for the
+  /// ActualSize.  In either case, this method is required to pass back the size
+  /// of the allocated block through ActualSize.  The JIT will be careful to
+  /// not write more than the returned ActualSize bytes of memory.
+  virtual uint8_t *startFunctionBody(const Function *F,
                                      uintptr_t &ActualSize) = 0;
-  
+
   /// allocateStub - This method is called by the JIT to allocate space for a
   /// function stub (used to handle limited branch displacements) while it is
   /// JIT compiling a function.  For example, if foo calls bar, and if bar
@@ -112,9 +124,14 @@ public:
   virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
                                uint8_t *FunctionEnd) = 0;
 
-  /// allocateSpace - Allocate a memory block of the given size.
+  /// allocateSpace - Allocate a memory block of the given size.  This method
+  /// cannot be called between calls to startFunctionBody and endFunctionBody.
   virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
-  
+
+  /// allocateGlobal - Allocate memory for a global.
+  ///
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0;
+
   /// deallocateMemForFunction - Free JIT memory for the specified function.
   /// This is never called when the JIT is currently emitting a function.
   virtual void deallocateMemForFunction(const Function *F) = 0;
@@ -128,6 +145,49 @@ public:
   /// the exception table.
   virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
                                  uint8_t *TableEnd, uint8_t* FrameRegister) = 0;
+
+  /// CheckInvariants - For testing only.  Return true if all internal
+  /// invariants are preserved, or return false and set ErrorStr to a helpful
+  /// error message.
+  virtual bool CheckInvariants(std::string &ErrorStr) {
+    return true;
+  }
+
+  /// GetDefaultCodeSlabSize - For testing only.  Returns DefaultCodeSlabSize
+  /// from DefaultJITMemoryManager.
+  virtual size_t GetDefaultCodeSlabSize() {
+    return 0;
+  }
+
+  /// GetDefaultDataSlabSize - For testing only.  Returns DefaultCodeSlabSize
+  /// from DefaultJITMemoryManager.
+  virtual size_t GetDefaultDataSlabSize() {
+    return 0;
+  }
+
+  /// GetDefaultStubSlabSize - For testing only.  Returns DefaultCodeSlabSize
+  /// from DefaultJITMemoryManager.
+  virtual size_t GetDefaultStubSlabSize() {
+    return 0;
+  }
+
+  /// GetNumCodeSlabs - For testing only.  Returns the number of MemoryBlocks
+  /// allocated for code.
+  virtual unsigned GetNumCodeSlabs() {
+    return 0;
+  }
+
+  /// GetNumDataSlabs - For testing only.  Returns the number of MemoryBlocks
+  /// allocated for data.
+  virtual unsigned GetNumDataSlabs() {
+    return 0;
+  }
+
+  /// GetNumStubSlabs - For testing only.  Returns the number of MemoryBlocks
+  /// allocated for function stubs.
+  virtual unsigned GetNumStubSlabs() {
+    return 0;
+  }
 };
 
 } // end namespace llvm.
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index 34ced973dd55..088c99952e9f 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -19,9 +19,9 @@
 #define LLVM_FUNCTION_H
 
 #include "llvm/GlobalValue.h"
+#include "llvm/CallingConv.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Argument.h"
-#include "llvm/Support/Annotation.h"
 #include "llvm/Attributes.h"
 
 namespace llvm {
@@ -46,7 +46,7 @@ template<> struct ilist_traits<BasicBlock>
 
   static ValueSymbolTable *getSymTab(Function *ItemParent);
 private:
-  mutable ilist_node<BasicBlock> Sentinel;
+  mutable ilist_half_node<BasicBlock> Sentinel;
 };
 
 template<> struct ilist_traits<Argument>
@@ -63,10 +63,10 @@ template<> struct ilist_traits<Argument>
 
   static ValueSymbolTable *getSymTab(Function *ItemParent);
 private:
-  mutable ilist_node<Argument> Sentinel;
+  mutable ilist_half_node<Argument> Sentinel;
 };
 
-class Function : public GlobalValue, public Annotable,
+class Function : public GlobalValue,
                  public ilist_node<Function> {
 public:
   typedef iplist<Argument> ArgumentListType;
@@ -87,7 +87,7 @@ private:
   AttrListPtr AttributeList;              ///< Parameter attributes
 
   // The Calling Convention is stored in Value::SubclassData.
-  /*unsigned CallingConvention;*/
+  /*CallingConv::ID CallingConvention;*/
 
   friend class SymbolTableListTraits<Function, Module>;
 
@@ -114,11 +114,11 @@ private:
   /// the module.
   ///
   Function(const FunctionType *Ty, LinkageTypes Linkage,
-           const std::string &N = "", Module *M = 0);
+           const Twine &N = "", Module *M = 0);
 
 public:
   static Function *Create(const FunctionType *Ty, LinkageTypes Linkage,
-                          const std::string &N = "", Module *M = 0) {
+                          const Twine &N = "", Module *M = 0) {
     return new(0) Function(Ty, Linkage, N, M);
   }
 
@@ -129,7 +129,7 @@ public:
 
   /// getContext - Return a pointer to the LLVMContext associated with this 
   /// function, or NULL if this function is not bound to a context yet.
-  LLVMContext* getContext();
+  LLVMContext &getContext() const;
 
   /// isVarArg - Return true if this function takes a variable number of
   /// arguments.
@@ -151,12 +151,14 @@ public:
   unsigned getIntrinsicID() const;
   bool isIntrinsic() const { return getIntrinsicID() != 0; }
 
-  /// getCallingConv()/setCallingConv(uint) - These method get and set the
+  /// getCallingConv()/setCallingConv(CC) - These method get and set the
   /// calling convention of this function.  The enum values for the known
   /// calling conventions are defined in CallingConv.h.
-  unsigned getCallingConv() const { return SubclassData >> 1; }
-  void setCallingConv(unsigned CC) {
-    SubclassData = (SubclassData & 1) | (CC << 1);
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(SubclassData >> 1);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    SubclassData = (SubclassData & 1) | (static_cast<unsigned>(CC) << 1);
   }
   
   /// getAttributes - Return the attribute list for this Function.
diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h
index b106116f3750..9b3f4505697d 100644
--- a/include/llvm/GlobalAlias.h
+++ b/include/llvm/GlobalAlias.h
@@ -40,7 +40,7 @@ public:
   }
   /// GlobalAlias ctor - If a parent module is specified, the alias is
   /// automatically inserted into the end of the specified module's alias list.
-  GlobalAlias(const Type *Ty, LinkageTypes Linkage, const std::string &Name = "",
+  GlobalAlias(const Type *Ty, LinkageTypes Linkage, const Twine &Name = "",
               Constant* Aliasee = 0, Module *Parent = 0);
 
   /// Provide fast operand accessors
@@ -88,7 +88,7 @@ public:
 };
 
 template <>
-struct OperandTraits<GlobalAlias> : FixedNumOperandTraits<1> {
+struct OperandTraits<GlobalAlias> : public FixedNumOperandTraits<1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Value)
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 3b7f67d5d036..7b0de34d9cb6 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -37,13 +37,14 @@ public:
     WeakAnyLinkage,     ///< Keep one copy of named function when linking (weak)
     WeakODRLinkage,     ///< Same, but only replaced by something equivalent.
     AppendingLinkage,   ///< Special purpose, only applies to global arrays
-    InternalLinkage,    ///< Rename collisions when linking (static functions)
-    PrivateLinkage,     ///< Like Internal, but omit from symbol table
+    InternalLinkage,    ///< Rename collisions when linking (static functions).
+    PrivateLinkage,     ///< Like Internal, but omit from symbol table.
+    LinkerPrivateLinkage, ///< Like Private, but linker removes.
     DLLImportLinkage,   ///< Function to be imported from DLL
-    DLLExportLinkage,   ///< Function to be accessible from DLL
-    ExternalWeakLinkage,///< ExternalWeak linkage description
-    GhostLinkage,       ///< Stand-in functions for streaming fns from BC files
-    CommonLinkage       ///< Tentative definitions
+    DLLExportLinkage,   ///< Function to be accessible from DLL.
+    ExternalWeakLinkage,///< ExternalWeak linkage description.
+    GhostLinkage,       ///< Stand-in functions for streaming fns from BC files.
+    CommonLinkage       ///< Tentative definitions.
   };
 
   /// @brief An enumeration for the kinds of visibility of global values.
@@ -55,10 +56,10 @@ public:
 
 protected:
   GlobalValue(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps,
-              LinkageTypes linkage, const std::string &name = "")
+              LinkageTypes linkage, const Twine &Name = "")
     : Constant(ty, vty, Ops, NumOps), Parent(0),
       Linkage(linkage), Visibility(DefaultVisibility), Alignment(0) {
-    if (!name.empty()) setName(name);
+    setName(Name);
   }
 
   Module *Parent;
@@ -80,6 +81,7 @@ public:
   }
 
   VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); }
+  bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; }
   bool hasHiddenVisibility() const { return Visibility == HiddenVisibility; }
   bool hasProtectedVisibility() const {
     return Visibility == ProtectedVisibility;
@@ -88,7 +90,7 @@ public:
   
   bool hasSection() const { return !Section.empty(); }
   const std::string &getSection() const { return Section; }
-  void setSection(const std::string &S) { Section = S; }
+  void setSection(const StringRef &S) { Section = S; }
   
   /// If the usage is empty (except transitively dead constants), then this
   /// global value can can be safely deleted since the destructor will
@@ -122,8 +124,10 @@ public:
   bool hasAppendingLinkage() const { return Linkage == AppendingLinkage; }
   bool hasInternalLinkage() const { return Linkage == InternalLinkage; }
   bool hasPrivateLinkage() const { return Linkage == PrivateLinkage; }
+  bool hasLinkerPrivateLinkage() const { return Linkage==LinkerPrivateLinkage; }
   bool hasLocalLinkage() const {
-    return Linkage == InternalLinkage || Linkage == PrivateLinkage;
+    return hasInternalLinkage() || hasPrivateLinkage() ||
+      hasLinkerPrivateLinkage();
   }
   bool hasDLLImportLinkage() const { return Linkage == DLLImportLinkage; }
   bool hasDLLExportLinkage() const { return Linkage == DLLExportLinkage; }
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index ae64ccf614be..56b2b9d3ebac 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -28,6 +28,7 @@ namespace llvm {
 
 class Module;
 class Constant;
+class LLVMContext;
 template<typename ValueSubClass, typename ItemParentClass>
   class SymbolTableListTraits;
 
@@ -49,15 +50,16 @@ public:
   }
   /// GlobalVariable ctor - If a parent module is specified, the global is
   /// automatically inserted into the end of the specified modules global list.
-  GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes Linkage,
-                 Constant *Initializer = 0, const std::string &Name = "",
-                 Module *Parent = 0, bool ThreadLocal = false,
-                 unsigned AddressSpace = 0);
+  GlobalVariable(LLVMContext &Context, const Type *Ty, bool isConstant,
+                 LinkageTypes Linkage,
+                 Constant *Initializer = 0, const Twine &Name = "",
+                 bool ThreadLocal = false, unsigned AddressSpace = 0);
   /// GlobalVariable ctor - This creates a global and inserts it before the
   /// specified other global.
-  GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes Linkage,
-                 Constant *Initializer, const std::string &Name,
-                 GlobalVariable *InsertBefore, bool ThreadLocal = false,
+  GlobalVariable(Module &M, const Type *Ty, bool isConstant,
+                 LinkageTypes Linkage, Constant *Initializer,
+                 const Twine &Name,
+                 GlobalVariable *InsertBefore = 0, bool ThreadLocal = false,
                  unsigned AddressSpace = 0);
 
   ~GlobalVariable() {
@@ -149,7 +151,7 @@ public:
 };
 
 template <>
-struct OperandTraits<GlobalVariable> : OptionalOperandTraits<> {
+struct OperandTraits<GlobalVariable> : public OptionalOperandTraits<> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index 84292cf19e3d..bc55031b0d42 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -31,18 +31,22 @@ class InlineAsm : public Value {
 
   std::string AsmString, Constraints;
   bool HasSideEffects;
+  bool IsMsAsm;
   
-  InlineAsm(const FunctionType *Ty, const std::string &AsmString,
-            const std::string &Constraints, bool hasSideEffects);
+  InlineAsm(const FunctionType *Ty, const StringRef &AsmString,
+            const StringRef &Constraints, bool hasSideEffects,
+            bool isMsAsm = false);
   virtual ~InlineAsm();
 public:
 
   /// InlineAsm::get - Return the the specified uniqued inline asm string.
   ///
-  static InlineAsm *get(const FunctionType *Ty, const std::string &AsmString,
-                        const std::string &Constraints, bool hasSideEffects);
+  static InlineAsm *get(const FunctionType *Ty, const StringRef &AsmString,
+                        const StringRef &Constraints, bool hasSideEffects,
+                        bool isMsAsm = false);
   
   bool hasSideEffects() const { return HasSideEffects; }
+  bool isMsAsm() const { return IsMsAsm; }
   
   /// getType - InlineAsm's are always pointers.
   ///
@@ -61,7 +65,7 @@ public:
   /// the specified constraint string is legal for the type.  This returns true
   /// if legal, false if not.
   ///
-  static bool Verify(const FunctionType *Ty, const std::string &Constraints);
+  static bool Verify(const FunctionType *Ty, const StringRef &Constraints);
 
   // Constraint String Parsing 
   enum ConstraintPrefix {
@@ -106,7 +110,7 @@ public:
     /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
     /// fields in this structure.  If the constraint string is not understood,
     /// return true, otherwise return false.
-    bool Parse(const std::string &Str, 
+    bool Parse(const StringRef &Str, 
                std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar);
   };
   
@@ -114,7 +118,7 @@ public:
   /// constraints and their prefixes.  If this returns an empty vector, and if
   /// the constraint string itself isn't empty, there was an error parsing.
   static std::vector<ConstraintInfo> 
-    ParseConstraints(const std::string &ConstraintString);
+    ParseConstraints(const StringRef &ConstraintString);
   
   /// ParseConstraints - Parse the constraints of this inlineasm object, 
   /// returning them the same way that ParseConstraints(str) does.
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index 1eab983da68a..cc923dec2987 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -18,10 +18,13 @@
 
 #include "llvm/Instruction.h"
 #include "llvm/OperandTraits.h"
+#include "llvm/Operator.h"
 #include "llvm/DerivedTypes.h"
 
 namespace llvm {
 
+class LLVMContext;
+
 //===----------------------------------------------------------------------===//
 //                            TerminatorInst Class
 //===----------------------------------------------------------------------===//
@@ -50,7 +53,7 @@ protected:
   virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
 public:
 
-  virtual Instruction *clone() const = 0;
+  virtual TerminatorInst *clone() const = 0;
 
   /// getNumSuccessors - Return the number of successors that this terminator
   /// has.
@@ -87,7 +90,6 @@ public:
 
 class UnaryInstruction : public Instruction {
   void *operator new(size_t, unsigned);      // Do not implement
-  UnaryInstruction(const UnaryInstruction&); // Do not implement
 
 protected:
   UnaryInstruction(const Type *Ty, unsigned iType, Value *V,
@@ -128,7 +130,7 @@ public:
 };
 
 template <>
-struct OperandTraits<UnaryInstruction> : FixedNumOperandTraits<1> {
+struct OperandTraits<UnaryInstruction> : public FixedNumOperandTraits<1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
@@ -142,9 +144,9 @@ class BinaryOperator : public Instruction {
 protected:
   void init(BinaryOps iType);
   BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty,
-                 const std::string &Name, Instruction *InsertBefore);
+                 const Twine &Name, Instruction *InsertBefore);
   BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty,
-                 const std::string &Name, BasicBlock *InsertAtEnd);
+                 const Twine &Name, BasicBlock *InsertAtEnd);
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -160,7 +162,7 @@ public:
   /// Instruction is allowed to be a dereferenced end iterator.
   ///
   static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
-                                const std::string &Name = "",
+                                const Twine &Name = "",
                                 Instruction *InsertBefore = 0);
 
   /// Create() - Construct a binary instruction, given the opcode and the two
@@ -168,49 +170,111 @@ public:
   /// BasicBlock specified.
   ///
   static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
-                                const std::string &Name,
-                                BasicBlock *InsertAtEnd);
+                                const Twine &Name, BasicBlock *InsertAtEnd);
 
   /// Create* - These methods just forward to Create, and are useful when you
   /// statically know what type of instruction you're going to create.  These
   /// helpers just save some typing.
 #define HANDLE_BINARY_INST(N, OPC, CLASS) \
   static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
-                                     const std::string &Name = "") {\
+                                     const Twine &Name = "") {\
     return Create(Instruction::OPC, V1, V2, Name);\
   }
 #include "llvm/Instruction.def"
 #define HANDLE_BINARY_INST(N, OPC, CLASS) \
   static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
-                                     const std::string &Name, BasicBlock *BB) {\
+                                     const Twine &Name, BasicBlock *BB) {\
     return Create(Instruction::OPC, V1, V2, Name, BB);\
   }
 #include "llvm/Instruction.def"
 #define HANDLE_BINARY_INST(N, OPC, CLASS) \
   static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
-                                     const std::string &Name, Instruction *I) {\
+                                     const Twine &Name, Instruction *I) {\
     return Create(Instruction::OPC, V1, V2, Name, I);\
   }
 #include "llvm/Instruction.def"
 
 
+  /// CreateNSWAdd - Create an Add operator with the NSW flag set.
+  ///
+  static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2,
+                                      const Twine &Name = "") {
+    BinaryOperator *BO = CreateAdd(V1, V2, Name);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2,
+                                      const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = CreateAdd(V1, V2, Name, BB);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2,
+                                      const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = CreateAdd(V1, V2, Name, I);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+
+  /// CreateNSWSub - Create an Sub operator with the NSW flag set.
+  ///
+  static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
+                                      const Twine &Name = "") {
+    BinaryOperator *BO = CreateSub(V1, V2, Name);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
+                                      const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = CreateSub(V1, V2, Name, BB);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
+                                      const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = CreateSub(V1, V2, Name, I);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+
+  /// CreateExactSDiv - Create an SDiv operator with the exact flag set.
+  ///
+  static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
+                                         const Twine &Name = "") {
+    BinaryOperator *BO = CreateSDiv(V1, V2, Name);
+    BO->setIsExact(true);
+    return BO;
+  }
+  static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
+                                         const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = CreateSDiv(V1, V2, Name, BB);
+    BO->setIsExact(true);
+    return BO;
+  }
+  static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
+                                         const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = CreateSDiv(V1, V2, Name, I);
+    BO->setIsExact(true);
+    return BO;
+  }
+
   /// Helper functions to construct and inspect unary operations (NEG and NOT)
   /// via binary operators SUB and XOR:
   ///
   /// CreateNeg, CreateNot - Create the NEG and NOT
   ///     instructions out of SUB and XOR instructions.
   ///
-  static BinaryOperator *CreateNeg(Value *Op, const std::string &Name = "",
+  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name = "",
                                    Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateNeg(Value *Op, const std::string &Name,
+  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name,
                                    BasicBlock *InsertAtEnd);
-  static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name = "",
+  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name = "",
                                     Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name,
+  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name,
                                     BasicBlock *InsertAtEnd);
-  static BinaryOperator *CreateNot(Value *Op, const std::string &Name = "",
+  static BinaryOperator *CreateNot(Value *Op, const Twine &Name = "",
                                    Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateNot(Value *Op, const std::string &Name,
+  static BinaryOperator *CreateNot(Value *Op, const Twine &Name,
                                    BasicBlock *InsertAtEnd);
 
   /// isNeg, isFNeg, isNot - Check if the given Value is a
@@ -244,6 +308,30 @@ public:
   ///
   bool swapOperands();
 
+  /// setHasNoUnsignedWrap - Set or clear the nsw flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setHasNoUnsignedWrap(bool b = true);
+
+  /// setHasNoSignedWrap - Set or clear the nsw flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setHasNoSignedWrap(bool b = true);
+
+  /// setIsExact - Set or clear the exact flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setIsExact(bool b = true);
+
+  /// hasNoUnsignedWrap - Determine whether the no unsigned wrap flag is set.
+  bool hasNoUnsignedWrap() const;
+
+  /// hasNoSignedWrap - Determine whether the no signed wrap flag is set.
+  bool hasNoSignedWrap() const;
+
+  /// isExact - Determine whether the exact flag is set.
+  bool isExact() const;
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const BinaryOperator *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -255,7 +343,7 @@ public:
 };
 
 template <>
-struct OperandTraits<BinaryOperator> : FixedNumOperandTraits<2> {
+struct OperandTraits<BinaryOperator> : public FixedNumOperandTraits<2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
@@ -271,22 +359,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
 /// if (isa<CastInst>(Instr)) { ... }
 /// @brief Base class of casting instructions.
 class CastInst : public UnaryInstruction {
-  /// @brief Copy constructor
-  CastInst(const CastInst &CI)
-    : UnaryInstruction(CI.getType(), CI.getOpcode(), CI.getOperand(0)) {
-  }
-  /// @brief Do not allow default construction
-  CastInst();
 protected:
   /// @brief Constructor with insert-before-instruction semantics for subclasses
   CastInst(const Type *Ty, unsigned iType, Value *S,
-           const std::string &NameStr = "", Instruction *InsertBefore = 0)
+           const Twine &NameStr = "", Instruction *InsertBefore = 0)
     : UnaryInstruction(Ty, iType, S, InsertBefore) {
     setName(NameStr);
   }
   /// @brief Constructor with insert-at-end-of-block semantics for subclasses
   CastInst(const Type *Ty, unsigned iType, Value *S,
-           const std::string &NameStr, BasicBlock *InsertAtEnd)
+           const Twine &NameStr, BasicBlock *InsertAtEnd)
     : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
     setName(NameStr);
   }
@@ -301,7 +383,7 @@ public:
     Instruction::CastOps,    ///< The opcode of the cast instruction
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which cast should be made
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
   /// Provides a way to construct any of the CastInst subclasses using an
@@ -314,7 +396,7 @@ public:
     Instruction::CastOps,    ///< The opcode for the cast instruction
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which operand is casted
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -322,7 +404,7 @@ public:
   static CastInst *CreateZExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which cast should be made
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
 
@@ -330,7 +412,7 @@ public:
   static CastInst *CreateZExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which operand is casted
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -338,7 +420,7 @@ public:
   static CastInst *CreateSExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which cast should be made
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
 
@@ -346,7 +428,7 @@ public:
   static CastInst *CreateSExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which operand is casted
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -354,7 +436,7 @@ public:
   static CastInst *CreatePointerCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     const Type *Ty,          ///< The type to which operand is casted
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -362,7 +444,7 @@ public:
   static CastInst *CreatePointerCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     const Type *Ty,          ///< The type to which cast should be made
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
 
@@ -371,7 +453,7 @@ public:
     Value *S,                ///< The pointer value to be casted (operand 0)
     const Type *Ty,          ///< The type to which cast should be made
     bool isSigned,           ///< Whether to regard S as signed or not
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
 
@@ -380,7 +462,7 @@ public:
     Value *S,                ///< The integer value to be casted (operand 0)
     const Type *Ty,          ///< The integer type to which operand is casted
     bool isSigned,           ///< Whether to regard S as signed or not
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -388,7 +470,7 @@ public:
   static CastInst *CreateFPCast(
     Value *S,                ///< The floating point value to be casted
     const Type *Ty,          ///< The floating point type to cast to
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
 
@@ -396,7 +478,7 @@ public:
   static CastInst *CreateFPCast(
     Value *S,                ///< The floating point value to be casted
     const Type *Ty,          ///< The floating point type to cast to
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -404,7 +486,7 @@ public:
   static CastInst *CreateTruncOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which cast should be made
-    const std::string &Name = "", ///< Name for the instruction
+    const Twine &Name = "", ///< Name for the instruction
     Instruction *InsertBefore = 0 ///< Place to insert the instruction
   );
 
@@ -412,7 +494,7 @@ public:
   static CastInst *CreateTruncOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     const Type *Ty,          ///< The type to which operand is casted
-    const std::string &Name, ///< The name for the instruction
+    const Twine &Name, ///< The name for the instruction
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
@@ -471,7 +553,7 @@ public:
     const Type *SrcTy, ///< SrcTy of 1st cast
     const Type *MidTy, ///< DstTy of 1st cast & SrcTy of 2nd cast
     const Type *DstTy, ///< DstTy of 2nd cast
-    const Type *IntPtrTy ///< Integer type corresponding to Ptr types
+    const Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null
   );
 
   /// @brief Return the opcode of this CastInst
@@ -512,11 +594,11 @@ class CmpInst: public Instruction {
   CmpInst(); // do not implement
 protected:
   CmpInst(const Type *ty, Instruction::OtherOps op, unsigned short pred,
-          Value *LHS, Value *RHS, const std::string &Name = "",
+          Value *LHS, Value *RHS, const Twine &Name = "",
           Instruction *InsertBefore = 0);
 
   CmpInst(const Type *ty, Instruction::OtherOps op, unsigned short pred,
-          Value *LHS, Value *RHS, const std::string &Name,
+          Value *LHS, Value *RHS, const Twine &Name,
           BasicBlock *InsertAtEnd);
 
 public:
@@ -569,8 +651,9 @@ public:
   /// instruction into a BasicBlock right before the specified instruction.
   /// The specified Instruction is allowed to be a dereferenced end iterator.
   /// @brief Create a CmpInst
-  static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
-                         Value *S2, const std::string &Name = "",
+  static CmpInst *Create(OtherOps Op,
+                         unsigned short predicate, Value *S1,
+                         Value *S2, const Twine &Name = "",
                          Instruction *InsertBefore = 0);
 
   /// Construct a compare instruction, given the opcode, the predicate and the
@@ -578,8 +661,7 @@ public:
   /// the BasicBlock specified.
   /// @brief Create a CmpInst
   static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
-                         Value *S2, const std::string &Name,
-                         BasicBlock *InsertAtEnd);
+                         Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
 
   /// @brief Get the opcode casted to the right type
   OtherOps getOpcode() const {
@@ -655,26 +737,26 @@ public:
   static inline bool classof(const CmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ICmp ||
-           I->getOpcode() == Instruction::FCmp ||
-           I->getOpcode() == Instruction::VICmp ||
-           I->getOpcode() == Instruction::VFCmp;
+           I->getOpcode() == Instruction::FCmp;
   }
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-  /// @brief Create a result type for fcmp/icmp (but not vicmp/vfcmp)
+  
+  /// @brief Create a result type for fcmp/icmp
   static const Type* makeCmpResultType(const Type* opnd_type) {
     if (const VectorType* vt = dyn_cast<const VectorType>(opnd_type)) {
-      return VectorType::get(Type::Int1Ty, vt->getNumElements());
+      return VectorType::get(Type::getInt1Ty(opnd_type->getContext()),
+                             vt->getNumElements());
     }
-    return Type::Int1Ty;
+    return Type::getInt1Ty(opnd_type->getContext());
   }
 };
 
 
 // FIXME: these are redundant if CmpInst < BinaryOperator
 template <>
-struct OperandTraits<CmpInst> : FixedNumOperandTraits<2> {
+struct OperandTraits<CmpInst> : public FixedNumOperandTraits<2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index 98fda7770548..e603c1257ef1 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -169,10 +169,8 @@ HANDLE_OTHER_INST(52, InsertElement, InsertElementInst)  // insert into vector
 HANDLE_OTHER_INST(53, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
 HANDLE_OTHER_INST(54, ExtractValue, ExtractValueInst)// extract from aggregate
 HANDLE_OTHER_INST(55, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(56, VICmp  , VICmpInst  )  // Vec Int comparison instruction.
-HANDLE_OTHER_INST(57, VFCmp  , VFCmpInst  )  // Vec FP point comparison instr.
 
-  LAST_OTHER_INST(57)
+  LAST_OTHER_INST(55)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 7d946e85a6d0..fdae3d7d7442 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -20,6 +20,8 @@
 
 namespace llvm {
 
+class LLVMContext;
+
 template<typename ValueSubClass, typename ItemParentClass>
   class SymbolTableListTraits;
 
@@ -52,6 +54,11 @@ public:
   /// extra information (e.g. load is volatile) agree.
   bool isIdenticalTo(const Instruction *I) const;
 
+  /// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+  /// ignores the SubclassOptionalData flags, which specify conditions
+  /// under which the instruction's result is undefined.
+  bool isIdenticalToWhenDefined(const Instruction *I) const;
+
   /// This function determines if the specified instruction executes the same
   /// operation as the current one. This means that the opcodes, type, operand
   /// types and any other factors affecting the operation must be the same. This
@@ -166,13 +173,6 @@ public:
   bool isCommutative() const { return isCommutative(getOpcode()); }
   static bool isCommutative(unsigned op);
 
-  /// isTrapping - Return true if the instruction may trap.
-  ///
-  bool isTrapping() const {
-    return isTrapping(getOpcode());
-  }
-  static bool isTrapping(unsigned op);
-
   /// mayWriteToMemory - Return true if this instruction may modify memory.
   ///
   bool mayWriteToMemory() const;
@@ -187,10 +187,34 @@ public:
 
   /// mayHaveSideEffects - Return true if the instruction may have side effects.
   ///
+  /// Note that this does not consider malloc and alloca to have side
+  /// effects because the newly allocated memory is completely invisible to
+  /// instructions which don't used the returned value.  For cases where this
+  /// matters, isSafeToSpeculativelyExecute may be more appropriate.
   bool mayHaveSideEffects() const {
     return mayWriteToMemory() || mayThrow();
   }
 
+  /// isSafeToSpeculativelyExecute - Return true if the instruction does not
+  /// have any effects besides calculating the result and does not have
+  /// undefined behavior.
+  ///
+  /// This method never returns true for an instruction that returns true for
+  /// mayHaveSideEffects; however, this method also does some other checks in
+  /// addition. It checks for undefined behavior, like dividing by zero or
+  /// loading from an invalid pointer (but not for undefined results, like a
+  /// shift with a shift amount larger than the width of the result). It checks
+  /// for malloc and alloca because speculatively executing them might cause a
+  /// memory leak. It also returns false for instructions related to control
+  /// flow, specifically terminators and PHI nodes.
+  ///
+  /// This method only looks at the instruction itself and its operands, so if
+  /// this method returns true, it is safe to move the instruction as long as
+  /// the correct dominance relationships for the operands and users hold.
+  /// However, this method can return true for instructions that read memory;
+  /// for such instructions, moving them may change the resulting value.
+  bool isSafeToSpeculativelyExecute() const;
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *) { return true; }
   static inline bool classof(const Value *V) {
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 59ae6100d2cd..b28fcbb9516f 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -20,6 +20,8 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Attributes.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/CallingConv.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include <iterator>
 
@@ -28,6 +30,8 @@ namespace llvm {
 class ConstantInt;
 class ConstantRange;
 class APInt;
+class LLVMContext;
+class DominatorTree;
 
 //===----------------------------------------------------------------------===//
 //                             AllocationInst Class
@@ -38,10 +42,12 @@ class APInt;
 ///
 class AllocationInst : public UnaryInstruction {
 protected:
-  AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy, unsigned Align,
-                 const std::string &Name = "", Instruction *InsertBefore = 0);
-  AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy, unsigned Align,
-                 const std::string &Name, BasicBlock *InsertAtEnd);
+  AllocationInst(const Type *Ty, Value *ArraySize, 
+                 unsigned iTy, unsigned Align, const Twine &Name = "", 
+                 Instruction *InsertBefore = 0);
+  AllocationInst(const Type *Ty, Value *ArraySize,
+                 unsigned iTy, unsigned Align, const Twine &Name,
+                 BasicBlock *InsertAtEnd);
 public:
   // Out of line virtual method, so the vtable, etc. has a home.
   virtual ~AllocationInst();
@@ -51,7 +57,7 @@ public:
   ///
   bool isArrayAllocation() const;
 
-  /// getArraySize - Get the number of element allocated, for a simple
+  /// getArraySize - Get the number of elements allocated. For a simple
   /// allocation of a single element, this will return a constant 1 value.
   ///
   const Value *getArraySize() const { return getOperand(0); }
@@ -74,7 +80,7 @@ public:
   unsigned getAlignment() const { return (1u << SubclassData) >> 1; }
   void setAlignment(unsigned Align);
 
-  virtual Instruction *clone() const = 0;
+  virtual AllocationInst *clone() const = 0;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const AllocationInst *) { return true; }
@@ -95,30 +101,33 @@ public:
 /// MallocInst - an instruction to allocated memory on the heap
 ///
 class MallocInst : public AllocationInst {
-  MallocInst(const MallocInst &MI);
 public:
   explicit MallocInst(const Type *Ty, Value *ArraySize = 0,
-                      const std::string &NameStr = "",
+                      const Twine &NameStr = "",
                       Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, ArraySize, Malloc, 0, NameStr, InsertBefore) {}
-  MallocInst(const Type *Ty, Value *ArraySize, const std::string &NameStr,
-             BasicBlock *InsertAtEnd)
+    : AllocationInst(Ty, ArraySize, Malloc,
+                     0, NameStr, InsertBefore) {}
+  MallocInst(const Type *Ty, Value *ArraySize,
+             const Twine &NameStr, BasicBlock *InsertAtEnd)
     : AllocationInst(Ty, ArraySize, Malloc, 0, NameStr, InsertAtEnd) {}
 
-  MallocInst(const Type *Ty, const std::string &NameStr,
+  MallocInst(const Type *Ty, const Twine &NameStr,
              Instruction *InsertBefore = 0)
     : AllocationInst(Ty, 0, Malloc, 0, NameStr, InsertBefore) {}
-  MallocInst(const Type *Ty, const std::string &NameStr,
+  MallocInst(const Type *Ty, const Twine &NameStr,
              BasicBlock *InsertAtEnd)
     : AllocationInst(Ty, 0, Malloc, 0, NameStr, InsertAtEnd) {}
 
-  MallocInst(const Type *Ty, Value *ArraySize, unsigned Align,
-             const std::string &NameStr, BasicBlock *InsertAtEnd)
-    : AllocationInst(Ty, ArraySize, Malloc, Align, NameStr, InsertAtEnd) {}
-  MallocInst(const Type *Ty, Value *ArraySize, unsigned Align,
-                      const std::string &NameStr = "",
-                      Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, ArraySize, Malloc, Align, NameStr, InsertBefore) {}
+  MallocInst(const Type *Ty, Value *ArraySize,
+             unsigned Align, const Twine &NameStr,
+             BasicBlock *InsertAtEnd)
+    : AllocationInst(Ty, ArraySize, Malloc,
+                     Align, NameStr, InsertAtEnd) {}
+  MallocInst(const Type *Ty, Value *ArraySize,
+             unsigned Align, const Twine &NameStr = "", 
+             Instruction *InsertBefore = 0)
+    : AllocationInst(Ty, ArraySize,
+                     Malloc, Align, NameStr, InsertBefore) {}
 
   virtual MallocInst *clone() const;
 
@@ -140,29 +149,35 @@ public:
 /// AllocaInst - an instruction to allocate memory on the stack
 ///
 class AllocaInst : public AllocationInst {
-  AllocaInst(const AllocaInst &);
 public:
-  explicit AllocaInst(const Type *Ty, Value *ArraySize = 0,
-                      const std::string &NameStr = "",
+  explicit AllocaInst(const Type *Ty,
+                      Value *ArraySize = 0,
+                      const Twine &NameStr = "",
                       Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, ArraySize, Alloca, 0, NameStr, InsertBefore) {}
-  AllocaInst(const Type *Ty, Value *ArraySize, const std::string &NameStr,
+    : AllocationInst(Ty, ArraySize, Alloca,
+                     0, NameStr, InsertBefore) {}
+  AllocaInst(const Type *Ty,
+             Value *ArraySize, const Twine &NameStr,
              BasicBlock *InsertAtEnd)
     : AllocationInst(Ty, ArraySize, Alloca, 0, NameStr, InsertAtEnd) {}
 
-  AllocaInst(const Type *Ty, const std::string &NameStr,
+  AllocaInst(const Type *Ty, const Twine &NameStr,
              Instruction *InsertBefore = 0)
     : AllocationInst(Ty, 0, Alloca, 0, NameStr, InsertBefore) {}
-  AllocaInst(const Type *Ty, const std::string &NameStr,
+  AllocaInst(const Type *Ty, const Twine &NameStr,
              BasicBlock *InsertAtEnd)
     : AllocationInst(Ty, 0, Alloca, 0, NameStr, InsertAtEnd) {}
 
-  AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
-             const std::string &NameStr = "", Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, ArraySize, Alloca, Align, NameStr, InsertBefore) {}
-  AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
-             const std::string &NameStr, BasicBlock *InsertAtEnd)
-    : AllocationInst(Ty, ArraySize, Alloca, Align, NameStr, InsertAtEnd) {}
+  AllocaInst(const Type *Ty, Value *ArraySize,
+             unsigned Align, const Twine &NameStr = "",
+             Instruction *InsertBefore = 0)
+    : AllocationInst(Ty, ArraySize, Alloca,
+                     Align, NameStr, InsertBefore) {}
+  AllocaInst(const Type *Ty, Value *ArraySize,
+             unsigned Align, const Twine &NameStr,
+             BasicBlock *InsertAtEnd)
+    : AllocationInst(Ty, ArraySize, Alloca,
+                     Align, NameStr, InsertAtEnd) {}
 
   virtual AllocaInst *clone() const;
 
@@ -219,27 +234,17 @@ public:
 /// SubclassData field in Value to store whether or not the load is volatile.
 ///
 class LoadInst : public UnaryInstruction {
-
-  LoadInst(const LoadInst &LI)
-    : UnaryInstruction(LI.getType(), Load, LI.getOperand(0)) {
-    setVolatile(LI.isVolatile());
-    setAlignment(LI.getAlignment());
-
-#ifndef NDEBUG
-    AssertOK();
-#endif
-  }
   void AssertOK();
 public:
-  LoadInst(Value *Ptr, const std::string &NameStr, Instruction *InsertBefore);
-  LoadInst(Value *Ptr, const std::string &NameStr, BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile = false,
+  LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
+  LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false,
            Instruction *InsertBefore = 0);
-  LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile,
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, Instruction *InsertBefore = 0);
-  LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile,
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
            BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile,
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, BasicBlock *InsertAtEnd);
 
   LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
@@ -274,6 +279,11 @@ public:
   const Value *getPointerOperand() const { return getOperand(0); }
   static unsigned getPointerOperandIndex() { return 0U; }
 
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+  
+  
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const LoadInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -293,18 +303,6 @@ public:
 ///
 class StoreInst : public Instruction {
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-
-  StoreInst(const StoreInst &SI) : Instruction(SI.getType(), Store,
-                                               &Op<0>(), 2) {
-    Op<0>() = SI.Op<0>();
-    Op<1>() = SI.Op<1>();
-    setVolatile(SI.isVolatile());
-    setAlignment(SI.getAlignment());
-
-#ifndef NDEBUG
-    AssertOK();
-#endif
-  }
   void AssertOK();
 public:
   // allocate space for exactly two operands
@@ -350,6 +348,10 @@ public:
   const Value *getPointerOperand() const { return getOperand(1); }
   static unsigned getPointerOperandIndex() { return 1U; }
 
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+  
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const StoreInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -361,7 +363,7 @@ public:
 };
 
 template <>
-struct OperandTraits<StoreInst> : FixedNumOperandTraits<2> {
+struct OperandTraits<StoreInst> : public FixedNumOperandTraits<2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
@@ -384,12 +386,12 @@ static inline const Type *checkType(const Type *Ty) {
 class GetElementPtrInst : public Instruction {
   GetElementPtrInst(const GetElementPtrInst &GEPI);
   void init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
-            const std::string &NameStr);
-  void init(Value *Ptr, Value *Idx, const std::string &NameStr);
+            const Twine &NameStr);
+  void init(Value *Ptr, Value *Idx, const Twine &NameStr);
 
   template<typename InputIterator>
   void init(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd,
-            const std::string &NameStr,
+            const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
             std::random_access_iterator_tag) {
@@ -436,25 +438,25 @@ class GetElementPtrInst : public Instruction {
   inline GetElementPtrInst(Value *Ptr, InputIterator IdxBegin,
                            InputIterator IdxEnd,
                            unsigned Values,
-                           const std::string &NameStr,
+                           const Twine &NameStr,
                            Instruction *InsertBefore);
   template<typename InputIterator>
   inline GetElementPtrInst(Value *Ptr,
                            InputIterator IdxBegin, InputIterator IdxEnd,
                            unsigned Values,
-                           const std::string &NameStr, BasicBlock *InsertAtEnd);
+                           const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   /// Constructors - These two constructors are convenience methods because one
   /// and two index getelementptr instructions are so common.
-  GetElementPtrInst(Value *Ptr, Value *Idx, const std::string &NameStr = "",
+  GetElementPtrInst(Value *Ptr, Value *Idx, const Twine &NameStr = "",
                     Instruction *InsertBefore = 0);
   GetElementPtrInst(Value *Ptr, Value *Idx,
-                    const std::string &NameStr, BasicBlock *InsertAtEnd);
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
 public:
   template<typename InputIterator>
   static GetElementPtrInst *Create(Value *Ptr, InputIterator IdxBegin,
                                    InputIterator IdxEnd,
-                                   const std::string &NameStr = "",
+                                   const Twine &NameStr = "",
                                    Instruction *InsertBefore = 0) {
     typename std::iterator_traits<InputIterator>::difference_type Values =
       1 + std::distance(IdxBegin, IdxEnd);
@@ -464,7 +466,7 @@ public:
   template<typename InputIterator>
   static GetElementPtrInst *Create(Value *Ptr,
                                    InputIterator IdxBegin, InputIterator IdxEnd,
-                                   const std::string &NameStr,
+                                   const Twine &NameStr,
                                    BasicBlock *InsertAtEnd) {
     typename std::iterator_traits<InputIterator>::difference_type Values =
       1 + std::distance(IdxBegin, IdxEnd);
@@ -475,16 +477,54 @@ public:
   /// Constructors - These two creators are convenience methods because one
   /// index getelementptr instructions are so common.
   static GetElementPtrInst *Create(Value *Ptr, Value *Idx,
-                                   const std::string &NameStr = "",
+                                   const Twine &NameStr = "",
                                    Instruction *InsertBefore = 0) {
     return new(2) GetElementPtrInst(Ptr, Idx, NameStr, InsertBefore);
   }
   static GetElementPtrInst *Create(Value *Ptr, Value *Idx,
-                                   const std::string &NameStr,
+                                   const Twine &NameStr,
                                    BasicBlock *InsertAtEnd) {
     return new(2) GetElementPtrInst(Ptr, Idx, NameStr, InsertAtEnd);
   }
 
+  /// Create an "inbounds" getelementptr. See the documentation for the
+  /// "inbounds" flag in LangRef.html for details.
+  template<typename InputIterator>
+  static GetElementPtrInst *CreateInBounds(Value *Ptr, InputIterator IdxBegin,
+                                           InputIterator IdxEnd,
+                                           const Twine &NameStr = "",
+                                           Instruction *InsertBefore = 0) {
+    GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd,
+                                    NameStr, InsertBefore);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  template<typename InputIterator>
+  static GetElementPtrInst *CreateInBounds(Value *Ptr,
+                                           InputIterator IdxBegin,
+                                           InputIterator IdxEnd,
+                                           const Twine &NameStr,
+                                           BasicBlock *InsertAtEnd) {
+    GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd,
+                                    NameStr, InsertAtEnd);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  static GetElementPtrInst *CreateInBounds(Value *Ptr, Value *Idx,
+                                           const Twine &NameStr = "",
+                                           Instruction *InsertBefore = 0) {
+    GetElementPtrInst *GEP = Create(Ptr, Idx, NameStr, InsertBefore);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  static GetElementPtrInst *CreateInBounds(Value *Ptr, Value *Idx,
+                                           const Twine &NameStr,
+                                           BasicBlock *InsertAtEnd) {
+    GetElementPtrInst *GEP = Create(Ptr, Idx, NameStr, InsertAtEnd);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+
   virtual GetElementPtrInst *clone() const;
 
   /// Transparently provide more efficient getOperand methods.
@@ -532,6 +572,10 @@ public:
   static unsigned getPointerOperandIndex() {
     return 0U;                      // get index for modifying correct operand
   }
+  
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getType())->getAddressSpace();
+  }
 
   /// getPointerOperandType - Method to return the pointer operand as a
   /// PointerType.
@@ -558,6 +602,12 @@ public:
   /// a constant offset between them.
   bool hasAllConstantIndices() const;
 
+  /// setIsInBounds - Set or clear the inbounds flag on this GEP instruction.
+  /// See LangRef.html for the meaning of inbounds on a getelementptr.
+  void setIsInBounds(bool b = true);
+
+  /// isInBounds - Determine whether the GEP has the inbounds flag.
+  bool isInBounds() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const GetElementPtrInst *) { return true; }
@@ -570,7 +620,7 @@ public:
 };
 
 template <>
-struct OperandTraits<GetElementPtrInst> : VariadicOperandTraits<1> {
+struct OperandTraits<GetElementPtrInst> : public VariadicOperandTraits<1> {
 };
 
 template<typename InputIterator>
@@ -578,7 +628,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      InputIterator IdxBegin,
                                      InputIterator IdxEnd,
                                      unsigned Values,
-                                     const std::string &NameStr,
+                                     const Twine &NameStr,
                                      Instruction *InsertBefore)
   : Instruction(PointerType::get(checkType(
                                    getIndexedType(Ptr->getType(),
@@ -596,7 +646,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      InputIterator IdxBegin,
                                      InputIterator IdxEnd,
                                      unsigned Values,
-                                     const std::string &NameStr,
+                                     const Twine &NameStr,
                                      BasicBlock *InsertAtEnd)
   : Instruction(PointerType::get(checkType(
                                    getIndexedType(Ptr->getType(),
@@ -626,11 +676,11 @@ class ICmpInst: public CmpInst {
 public:
   /// @brief Constructor with insert-before-instruction semantics.
   ICmpInst(
+    Instruction *InsertBefore,  ///< Where to insert
     Predicate pred,  ///< The predicate to use for the comparison
     Value *LHS,      ///< The left-hand-side of the expression
     Value *RHS,      ///< The right-hand-side of the expression
-    const std::string &NameStr = "",  ///< Name of the instruction
-    Instruction *InsertBefore = 0  ///< Where to insert
+    const Twine &NameStr = ""  ///< Name of the instruction
   ) : CmpInst(makeCmpResultType(LHS->getType()),
               Instruction::ICmp, pred, LHS, RHS, NameStr,
               InsertBefore) {
@@ -645,16 +695,35 @@ public:
            "Invalid operand types for ICmp instruction");
   }
 
-  /// @brief Constructor with insert-at-block-end semantics.
+  /// @brief Constructor with insert-at-end semantics.
+  ICmpInst(
+    BasicBlock &InsertAtEnd, ///< Block to insert into.
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr,
+              &InsertAtEnd) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+          pred <= CmpInst::LAST_ICMP_PREDICATE &&
+          "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVector() ||
+            isa<PointerType>(getOperand(0)->getType())) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// @brief Constructor with no-insertion semantics
   ICmpInst(
     Predicate pred, ///< The predicate to use for the comparison
     Value *LHS,     ///< The left-hand-side of the expression
     Value *RHS,     ///< The right-hand-side of the expression
-    const std::string &NameStr,  ///< Name of the instruction
-    BasicBlock *InsertAtEnd   ///< Block to insert into.
+    const Twine &NameStr = "" ///< Name of the instruction
   ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::ICmp, pred, LHS, RHS, NameStr,
-              InsertAtEnd) {
+              Instruction::ICmp, pred, LHS, RHS, NameStr) {
     assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
            pred <= CmpInst::LAST_ICMP_PREDICATE &&
            "Invalid ICmp predicate value");
@@ -781,11 +850,11 @@ class FCmpInst: public CmpInst {
 public:
   /// @brief Constructor with insert-before-instruction semantics.
   FCmpInst(
+    Instruction *InsertBefore, ///< Where to insert
     Predicate pred,  ///< The predicate to use for the comparison
     Value *LHS,      ///< The left-hand-side of the expression
     Value *RHS,      ///< The right-hand-side of the expression
-    const std::string &NameStr = "",  ///< Name of the instruction
-    Instruction *InsertBefore = 0  ///< Where to insert
+    const Twine &NameStr = ""  ///< Name of the instruction
   ) : CmpInst(makeCmpResultType(LHS->getType()),
               Instruction::FCmp, pred, LHS, RHS, NameStr,
               InsertBefore) {
@@ -797,17 +866,34 @@ public:
     assert(getOperand(0)->getType()->isFPOrFPVector() &&
            "Invalid operand types for FCmp instruction");
   }
+  
+  /// @brief Constructor with insert-at-end semantics.
+  FCmpInst(
+    BasicBlock &InsertAtEnd, ///< Block to insert into.
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr,
+              &InsertAtEnd) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVector() &&
+           "Invalid operand types for FCmp instruction");
+  }
 
-  /// @brief Constructor with insert-at-block-end semantics.
+  /// @brief Constructor with no-insertion semantics
   FCmpInst(
     Predicate pred, ///< The predicate to use for the comparison
     Value *LHS,     ///< The left-hand-side of the expression
     Value *RHS,     ///< The right-hand-side of the expression
-    const std::string &NameStr,  ///< Name of the instruction
-    BasicBlock *InsertAtEnd   ///< Block to insert into.
+    const Twine &NameStr = "" ///< Name of the instruction
   ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::FCmp, pred, LHS, RHS, NameStr,
-              InsertAtEnd) {
+              Instruction::FCmp, pred, LHS, RHS, NameStr) {
     assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
            "Invalid FCmp predicate value");
     assert(getOperand(0)->getType() == getOperand(1)->getType() &&
@@ -858,119 +944,6 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-
-};
-
-//===----------------------------------------------------------------------===//
-//                               VICmpInst Class
-//===----------------------------------------------------------------------===//
-
-/// This instruction compares its operands according to the predicate given
-/// to the constructor. It only operates on vectors of integers.
-/// The operands must be identical types.
-/// @brief Represents a vector integer comparison operator.
-class VICmpInst: public CmpInst {
-public:
-  /// @brief Constructor with insert-before-instruction semantics.
-  VICmpInst(
-    Predicate pred,  ///< The predicate to use for the comparison
-    Value *LHS,      ///< The left-hand-side of the expression
-    Value *RHS,      ///< The right-hand-side of the expression
-    const std::string &NameStr = "",  ///< Name of the instruction
-    Instruction *InsertBefore = 0  ///< Where to insert
-  ) : CmpInst(LHS->getType(), Instruction::VICmp, pred, LHS, RHS, NameStr,
-              InsertBefore) {
-    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
-           pred <= CmpInst::LAST_ICMP_PREDICATE &&
-           "Invalid VICmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-          "Both operands to VICmp instruction are not of the same type!");
-  }
-
-  /// @brief Constructor with insert-at-block-end semantics.
-  VICmpInst(
-    Predicate pred, ///< The predicate to use for the comparison
-    Value *LHS,     ///< The left-hand-side of the expression
-    Value *RHS,     ///< The right-hand-side of the expression
-    const std::string &NameStr,  ///< Name of the instruction
-    BasicBlock *InsertAtEnd   ///< Block to insert into.
-  ) : CmpInst(LHS->getType(), Instruction::VICmp, pred, LHS, RHS, NameStr,
-              InsertAtEnd) {
-    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
-           pred <= CmpInst::LAST_ICMP_PREDICATE &&
-           "Invalid VICmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-          "Both operands to VICmp instruction are not of the same type!");
-  }
-
-  /// @brief Return the predicate for this instruction.
-  Predicate getPredicate() const { return Predicate(SubclassData); }
-
-  virtual VICmpInst *clone() const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const VICmpInst *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::VICmp;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                               VFCmpInst Class
-//===----------------------------------------------------------------------===//
-
-/// This instruction compares its operands according to the predicate given
-/// to the constructor. It only operates on vectors of floating point values.
-/// The operands must be identical types.
-/// @brief Represents a vector floating point comparison operator.
-class VFCmpInst: public CmpInst {
-public:
-  /// @brief Constructor with insert-before-instruction semantics.
-  VFCmpInst(
-    Predicate pred,  ///< The predicate to use for the comparison
-    Value *LHS,      ///< The left-hand-side of the expression
-    Value *RHS,      ///< The right-hand-side of the expression
-    const std::string &NameStr = "",  ///< Name of the instruction
-    Instruction *InsertBefore = 0  ///< Where to insert
-  ) : CmpInst(VectorType::getInteger(cast<VectorType>(LHS->getType())),
-              Instruction::VFCmp, pred, LHS, RHS, NameStr, InsertBefore) {
-    assert(pred <= CmpInst::LAST_FCMP_PREDICATE &&
-           "Invalid VFCmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-           "Both operands to VFCmp instruction are not of the same type!");
-  }
-
-  /// @brief Constructor with insert-at-block-end semantics.
-  VFCmpInst(
-    Predicate pred, ///< The predicate to use for the comparison
-    Value *LHS,     ///< The left-hand-side of the expression
-    Value *RHS,     ///< The right-hand-side of the expression
-    const std::string &NameStr,  ///< Name of the instruction
-    BasicBlock *InsertAtEnd   ///< Block to insert into.
-  ) : CmpInst(VectorType::getInteger(cast<VectorType>(LHS->getType())),
-              Instruction::VFCmp, pred, LHS, RHS, NameStr, InsertAtEnd) {
-    assert(pred <= CmpInst::LAST_FCMP_PREDICATE &&
-           "Invalid VFCmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-           "Both operands to VFCmp instruction are not of the same type!");
-  }
-
-  /// @brief Return the predicate for this instruction.
-  Predicate getPredicate() const { return Predicate(SubclassData); }
-
-  virtual VFCmpInst *clone() const;
-
-  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const VFCmpInst *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::VFCmp;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -992,7 +965,7 @@ class CallInst : public Instruction {
 
   template<typename InputIterator>
   void init(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
-            const std::string &NameStr,
+            const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
             std::random_access_iterator_tag) {
@@ -1011,7 +984,7 @@ class CallInst : public Instruction {
   /// @brief Construct a CallInst from a range of arguments
   template<typename InputIterator>
   CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
-           const std::string &NameStr, Instruction *InsertBefore);
+           const Twine &NameStr, Instruction *InsertBefore);
 
   /// Construct a CallInst given a range of arguments.  InputIterator
   /// must be a random-access iterator pointing to contiguous storage
@@ -1021,20 +994,20 @@ class CallInst : public Instruction {
   /// @brief Construct a CallInst from a range of arguments
   template<typename InputIterator>
   inline CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
-                  const std::string &NameStr, BasicBlock *InsertAtEnd);
+                  const Twine &NameStr, BasicBlock *InsertAtEnd);
 
-  CallInst(Value *F, Value *Actual, const std::string& NameStr,
+  CallInst(Value *F, Value *Actual, const Twine &NameStr,
            Instruction *InsertBefore);
-  CallInst(Value *F, Value *Actual, const std::string& NameStr,
+  CallInst(Value *F, Value *Actual, const Twine &NameStr,
            BasicBlock *InsertAtEnd);
-  explicit CallInst(Value *F, const std::string &NameStr,
+  explicit CallInst(Value *F, const Twine &NameStr,
                     Instruction *InsertBefore);
-  CallInst(Value *F, const std::string &NameStr, BasicBlock *InsertAtEnd);
+  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
 public:
   template<typename InputIterator>
   static CallInst *Create(Value *Func,
                           InputIterator ArgBegin, InputIterator ArgEnd,
-                          const std::string &NameStr = "",
+                          const Twine &NameStr = "",
                           Instruction *InsertBefore = 0) {
     return new((unsigned)(ArgEnd - ArgBegin + 1))
       CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertBefore);
@@ -1042,27 +1015,39 @@ public:
   template<typename InputIterator>
   static CallInst *Create(Value *Func,
                           InputIterator ArgBegin, InputIterator ArgEnd,
-                          const std::string &NameStr, BasicBlock *InsertAtEnd) {
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
     return new((unsigned)(ArgEnd - ArgBegin + 1))
       CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertAtEnd);
   }
   static CallInst *Create(Value *F, Value *Actual,
-                          const std::string& NameStr = "",
+                          const Twine &NameStr = "",
                           Instruction *InsertBefore = 0) {
     return new(2) CallInst(F, Actual, NameStr, InsertBefore);
   }
-  static CallInst *Create(Value *F, Value *Actual, const std::string& NameStr,
+  static CallInst *Create(Value *F, Value *Actual, const Twine &NameStr,
                           BasicBlock *InsertAtEnd) {
     return new(2) CallInst(F, Actual, NameStr, InsertAtEnd);
   }
-  static CallInst *Create(Value *F, const std::string &NameStr = "",
+  static CallInst *Create(Value *F, const Twine &NameStr = "",
                           Instruction *InsertBefore = 0) {
     return new(1) CallInst(F, NameStr, InsertBefore);
   }
-  static CallInst *Create(Value *F, const std::string &NameStr,
+  static CallInst *Create(Value *F, const Twine &NameStr,
                           BasicBlock *InsertAtEnd) {
     return new(1) CallInst(F, NameStr, InsertAtEnd);
   }
+  /// CreateMalloc - Generate the IR for a call to malloc:
+  /// 1. Compute the malloc call's argument as the specified type's size,
+  ///    possibly multiplied by the array size if the array size is not
+  ///    constant 1.
+  /// 2. Call malloc with that argument.
+  /// 3. Bitcast the result of the malloc call to the specified type.
+  static Value *CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy,
+                             const Type *AllocTy, Value *ArraySize = 0,
+                             const Twine &Name = "");
+  static Value *CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy,
+                             const Type *AllocTy, Value *ArraySize = 0,
+                             const Twine &Name = "");
 
   ~CallInst();
 
@@ -1078,9 +1063,11 @@ public:
 
   /// getCallingConv/setCallingConv - Get or set the calling convention of this
   /// function call.
-  unsigned getCallingConv() const { return SubclassData >> 1; }
-  void setCallingConv(unsigned CC) {
-    SubclassData = (SubclassData & 1) | (CC << 1);
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(SubclassData >> 1);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    SubclassData = (SubclassData & 1) | (static_cast<unsigned>(CC) << 1);
   }
 
   /// getAttributes - Return the parameter attributes for this call.
@@ -1176,12 +1163,12 @@ public:
 };
 
 template <>
-struct OperandTraits<CallInst> : VariadicOperandTraits<1> {
+struct OperandTraits<CallInst> : public VariadicOperandTraits<1> {
 };
 
 template<typename InputIterator>
 CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
-                   const std::string &NameStr, BasicBlock *InsertAtEnd)
+                   const Twine &NameStr, BasicBlock *InsertAtEnd)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
                 Instruction::Call,
@@ -1193,7 +1180,7 @@ CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
 
 template<typename InputIterator>
 CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
-                   const std::string &NameStr, Instruction *InsertBefore)
+                   const Twine &NameStr, Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
                 Instruction::Call,
@@ -1219,18 +1206,14 @@ class SelectInst : public Instruction {
     Op<2>() = S2;
   }
 
-  SelectInst(const SelectInst &SI)
-    : Instruction(SI.getType(), SI.getOpcode(), &Op<0>(), 3) {
-    init(SI.Op<0>(), SI.Op<1>(), SI.Op<2>());
-  }
-  SelectInst(Value *C, Value *S1, Value *S2, const std::string &NameStr,
+  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
              Instruction *InsertBefore)
     : Instruction(S1->getType(), Instruction::Select,
                   &Op<0>(), 3, InsertBefore) {
     init(C, S1, S2);
     setName(NameStr);
   }
-  SelectInst(Value *C, Value *S1, Value *S2, const std::string &NameStr,
+  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
              BasicBlock *InsertAtEnd)
     : Instruction(S1->getType(), Instruction::Select,
                   &Op<0>(), 3, InsertAtEnd) {
@@ -1239,20 +1222,23 @@ class SelectInst : public Instruction {
   }
 public:
   static SelectInst *Create(Value *C, Value *S1, Value *S2,
-                            const std::string &NameStr = "",
+                            const Twine &NameStr = "",
                             Instruction *InsertBefore = 0) {
     return new(3) SelectInst(C, S1, S2, NameStr, InsertBefore);
   }
   static SelectInst *Create(Value *C, Value *S1, Value *S2,
-                            const std::string &NameStr,
+                            const Twine &NameStr,
                             BasicBlock *InsertAtEnd) {
     return new(3) SelectInst(C, S1, S2, NameStr, InsertAtEnd);
   }
 
-  Value *getCondition() const { return Op<0>(); }
-  Value *getTrueValue() const { return Op<1>(); }
-  Value *getFalseValue() const { return Op<2>(); }
-
+  const Value *getCondition() const { return Op<0>(); }
+  const Value *getTrueValue() const { return Op<1>(); }
+  const Value *getFalseValue() const { return Op<2>(); }
+  Value *getCondition() { return Op<0>(); }
+  Value *getTrueValue() { return Op<1>(); }
+  Value *getFalseValue() { return Op<2>(); }
+  
   /// areInvalidOperands - Return a string if the specified operands are invalid
   /// for a select operation, otherwise return null.
   static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
@@ -1277,7 +1263,7 @@ public:
 };
 
 template <>
-struct OperandTraits<SelectInst> : FixedNumOperandTraits<3> {
+struct OperandTraits<SelectInst> : public FixedNumOperandTraits<3> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
@@ -1290,15 +1276,13 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
 /// an argument of the specified type given a va_list and increments that list
 ///
 class VAArgInst : public UnaryInstruction {
-  VAArgInst(const VAArgInst &VAA)
-    : UnaryInstruction(VAA.getType(), VAArg, VAA.getOperand(0)) {}
 public:
-  VAArgInst(Value *List, const Type *Ty, const std::string &NameStr = "",
+  VAArgInst(Value *List, const Type *Ty, const Twine &NameStr = "",
              Instruction *InsertBefore = 0)
     : UnaryInstruction(Ty, VAArg, List, InsertBefore) {
     setName(NameStr);
   }
-  VAArgInst(Value *List, const Type *Ty, const std::string &NameStr,
+  VAArgInst(Value *List, const Type *Ty, const Twine &NameStr,
             BasicBlock *InsertAtEnd)
     : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) {
     setName(NameStr);
@@ -1324,25 +1308,21 @@ public:
 /// element from a VectorType value
 ///
 class ExtractElementInst : public Instruction {
-  ExtractElementInst(const ExtractElementInst &EE) :
-    Instruction(EE.getType(), ExtractElement, &Op<0>(), 2) {
-    Op<0>() = EE.Op<0>();
-    Op<1>() = EE.Op<1>();
-  }
-
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2); // FIXME: "unsigned Idx" forms of ctor?
-  }
-  ExtractElementInst(Value *Vec, Value *Idx, const std::string &NameStr = "",
+  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "",
                      Instruction *InsertBefore = 0);
-  ExtractElementInst(Value *Vec, unsigned Idx, const std::string &NameStr = "",
-                     Instruction *InsertBefore = 0);
-  ExtractElementInst(Value *Vec, Value *Idx, const std::string &NameStr,
-                     BasicBlock *InsertAtEnd);
-  ExtractElementInst(Value *Vec, unsigned Idx, const std::string &NameStr,
+  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
                      BasicBlock *InsertAtEnd);
+public:
+  static ExtractElementInst *Create(Value *Vec, Value *Idx,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore);
+  }
+  static ExtractElementInst *Create(Value *Vec, Value *Idx,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd);
+  }
 
   /// isValidOperands - Return true if an extractelement instruction can be
   /// formed with the specified operands.
@@ -1350,6 +1330,16 @@ public:
 
   virtual ExtractElementInst *clone() const;
 
+  Value *getVectorOperand() { return Op<0>(); }
+  Value *getIndexOperand() { return Op<1>(); }
+  const Value *getVectorOperand() const { return Op<0>(); }
+  const Value *getIndexOperand() const { return Op<1>(); }
+  
+  const VectorType *getVectorOperandType() const {
+    return reinterpret_cast<const VectorType*>(getVectorOperand()->getType());
+  }
+  
+  
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1364,7 +1354,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ExtractElementInst> : FixedNumOperandTraits<2> {
+struct OperandTraits<ExtractElementInst> : public FixedNumOperandTraits<2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
@@ -1377,38 +1367,19 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
 /// element into a VectorType value
 ///
 class InsertElementInst : public Instruction {
-  InsertElementInst(const InsertElementInst &IE);
   InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
-                    const std::string &NameStr = "",
-                    Instruction *InsertBefore = 0);
-  InsertElementInst(Value *Vec, Value *NewElt, unsigned Idx,
-                    const std::string &NameStr = "",
+                    const Twine &NameStr = "",
                     Instruction *InsertBefore = 0);
   InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
-                    const std::string &NameStr, BasicBlock *InsertAtEnd);
-  InsertElementInst(Value *Vec, Value *NewElt, unsigned Idx,
-                    const std::string &NameStr, BasicBlock *InsertAtEnd);
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
 public:
-  static InsertElementInst *Create(const InsertElementInst &IE) {
-    return new(IE.getNumOperands()) InsertElementInst(IE);
-  }
   static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
-                                   const std::string &NameStr = "",
-                                   Instruction *InsertBefore = 0) {
-    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore);
-  }
-  static InsertElementInst *Create(Value *Vec, Value *NewElt, unsigned Idx,
-                                   const std::string &NameStr = "",
+                                   const Twine &NameStr = "",
                                    Instruction *InsertBefore = 0) {
     return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore);
   }
   static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
-                                   const std::string &NameStr,
-                                   BasicBlock *InsertAtEnd) {
-    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd);
-  }
-  static InsertElementInst *Create(Value *Vec, Value *NewElt, unsigned Idx,
-                                   const std::string &NameStr,
+                                   const Twine &NameStr,
                                    BasicBlock *InsertAtEnd) {
     return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd);
   }
@@ -1440,7 +1411,7 @@ public:
 };
 
 template <>
-struct OperandTraits<InsertElementInst> : FixedNumOperandTraits<3> {
+struct OperandTraits<InsertElementInst> : public FixedNumOperandTraits<3> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
@@ -1453,17 +1424,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
 /// input vectors.
 ///
 class ShuffleVectorInst : public Instruction {
-  ShuffleVectorInst(const ShuffleVectorInst &IE);
 public:
   // allocate space for exactly three operands
   void *operator new(size_t s) {
     return User::operator new(s, 3);
   }
   ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                    const std::string &NameStr = "",
+                    const Twine &NameStr = "",
                     Instruction *InsertBefor = 0);
   ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                    const std::string &NameStr, BasicBlock *InsertAtEnd);
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   /// isValidOperands - Return true if a shufflevector instruction can be
   /// formed with the specified operands.
@@ -1497,7 +1467,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ShuffleVectorInst> : FixedNumOperandTraits<3> {
+struct OperandTraits<ShuffleVectorInst> : public FixedNumOperandTraits<3> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)
@@ -1514,12 +1484,12 @@ class ExtractValueInst : public UnaryInstruction {
 
   ExtractValueInst(const ExtractValueInst &EVI);
   void init(const unsigned *Idx, unsigned NumIdx,
-            const std::string &NameStr);
-  void init(unsigned Idx, const std::string &NameStr);
+            const Twine &NameStr);
+  void init(unsigned Idx, const Twine &NameStr);
 
   template<typename InputIterator>
   void init(InputIterator IdxBegin, InputIterator IdxEnd,
-            const std::string &NameStr,
+            const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
             std::random_access_iterator_tag) {
@@ -1569,12 +1539,12 @@ class ExtractValueInst : public UnaryInstruction {
   template<typename InputIterator>
   inline ExtractValueInst(Value *Agg, InputIterator IdxBegin,
                           InputIterator IdxEnd,
-                          const std::string &NameStr,
+                          const Twine &NameStr,
                           Instruction *InsertBefore);
   template<typename InputIterator>
   inline ExtractValueInst(Value *Agg,
                           InputIterator IdxBegin, InputIterator IdxEnd,
-                          const std::string &NameStr, BasicBlock *InsertAtEnd);
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   // allocate space for exactly one operand
   void *operator new(size_t s) {
@@ -1585,7 +1555,7 @@ public:
   template<typename InputIterator>
   static ExtractValueInst *Create(Value *Agg, InputIterator IdxBegin,
                                   InputIterator IdxEnd,
-                                  const std::string &NameStr = "",
+                                  const Twine &NameStr = "",
                                   Instruction *InsertBefore = 0) {
     return new
       ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertBefore);
@@ -1593,7 +1563,7 @@ public:
   template<typename InputIterator>
   static ExtractValueInst *Create(Value *Agg,
                                   InputIterator IdxBegin, InputIterator IdxEnd,
-                                  const std::string &NameStr,
+                                  const Twine &NameStr,
                                   BasicBlock *InsertAtEnd) {
     return new ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertAtEnd);
   }
@@ -1602,13 +1572,13 @@ public:
   /// index extractvalue instructions are much more common than those with
   /// more than one.
   static ExtractValueInst *Create(Value *Agg, unsigned Idx,
-                                  const std::string &NameStr = "",
+                                  const Twine &NameStr = "",
                                   Instruction *InsertBefore = 0) {
     unsigned Idxs[1] = { Idx };
     return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertBefore);
   }
   static ExtractValueInst *Create(Value *Agg, unsigned Idx,
-                                  const std::string &NameStr,
+                                  const Twine &NameStr,
                                   BasicBlock *InsertAtEnd) {
     unsigned Idxs[1] = { Idx };
     return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertAtEnd);
@@ -1668,7 +1638,7 @@ template<typename InputIterator>
 ExtractValueInst::ExtractValueInst(Value *Agg,
                                    InputIterator IdxBegin,
                                    InputIterator IdxEnd,
-                                   const std::string &NameStr,
+                                   const Twine &NameStr,
                                    Instruction *InsertBefore)
   : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
                                               IdxBegin, IdxEnd)),
@@ -1680,7 +1650,7 @@ template<typename InputIterator>
 ExtractValueInst::ExtractValueInst(Value *Agg,
                                    InputIterator IdxBegin,
                                    InputIterator IdxEnd,
-                                   const std::string &NameStr,
+                                   const Twine &NameStr,
                                    BasicBlock *InsertAtEnd)
   : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
                                               IdxBegin, IdxEnd)),
@@ -1703,13 +1673,13 @@ class InsertValueInst : public Instruction {
   void *operator new(size_t, unsigned); // Do not implement
   InsertValueInst(const InsertValueInst &IVI);
   void init(Value *Agg, Value *Val, const unsigned *Idx, unsigned NumIdx,
-            const std::string &NameStr);
-  void init(Value *Agg, Value *Val, unsigned Idx, const std::string &NameStr);
+            const Twine &NameStr);
+  void init(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr);
 
   template<typename InputIterator>
   void init(Value *Agg, Value *Val,
             InputIterator IdxBegin, InputIterator IdxEnd,
-            const std::string &NameStr,
+            const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
             std::random_access_iterator_tag) {
@@ -1733,20 +1703,20 @@ class InsertValueInst : public Instruction {
   template<typename InputIterator>
   inline InsertValueInst(Value *Agg, Value *Val, InputIterator IdxBegin,
                          InputIterator IdxEnd,
-                         const std::string &NameStr,
+                         const Twine &NameStr,
                          Instruction *InsertBefore);
   template<typename InputIterator>
   inline InsertValueInst(Value *Agg, Value *Val,
                          InputIterator IdxBegin, InputIterator IdxEnd,
-                         const std::string &NameStr, BasicBlock *InsertAtEnd);
+                         const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   /// Constructors - These two constructors are convenience methods because one
   /// and two index insertvalue instructions are so common.
   InsertValueInst(Value *Agg, Value *Val,
-                  unsigned Idx, const std::string &NameStr = "",
+                  unsigned Idx, const Twine &NameStr = "",
                   Instruction *InsertBefore = 0);
   InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
-                  const std::string &NameStr, BasicBlock *InsertAtEnd);
+                  const Twine &NameStr, BasicBlock *InsertAtEnd);
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -1756,7 +1726,7 @@ public:
   template<typename InputIterator>
   static InsertValueInst *Create(Value *Agg, Value *Val, InputIterator IdxBegin,
                                  InputIterator IdxEnd,
-                                 const std::string &NameStr = "",
+                                 const Twine &NameStr = "",
                                  Instruction *InsertBefore = 0) {
     return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
                                NameStr, InsertBefore);
@@ -1764,7 +1734,7 @@ public:
   template<typename InputIterator>
   static InsertValueInst *Create(Value *Agg, Value *Val,
                                  InputIterator IdxBegin, InputIterator IdxEnd,
-                                 const std::string &NameStr,
+                                 const Twine &NameStr,
                                  BasicBlock *InsertAtEnd) {
     return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
                                NameStr, InsertAtEnd);
@@ -1774,12 +1744,12 @@ public:
   /// index insertvalue instructions are much more common than those with
   /// more than one.
   static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx,
-                                 const std::string &NameStr = "",
+                                 const Twine &NameStr = "",
                                  Instruction *InsertBefore = 0) {
     return new InsertValueInst(Agg, Val, Idx, NameStr, InsertBefore);
   }
   static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx,
-                                 const std::string &NameStr,
+                                 const Twine &NameStr,
                                  BasicBlock *InsertAtEnd) {
     return new InsertValueInst(Agg, Val, Idx, NameStr, InsertAtEnd);
   }
@@ -1832,7 +1802,7 @@ public:
 };
 
 template <>
-struct OperandTraits<InsertValueInst> : FixedNumOperandTraits<2> {
+struct OperandTraits<InsertValueInst> : public FixedNumOperandTraits<2> {
 };
 
 template<typename InputIterator>
@@ -1840,7 +1810,7 @@ InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
                                  InputIterator IdxBegin,
                                  InputIterator IdxEnd,
-                                 const std::string &NameStr,
+                                 const Twine &NameStr,
                                  Instruction *InsertBefore)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
@@ -1853,7 +1823,7 @@ InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
                                  InputIterator IdxBegin,
                                  InputIterator IdxEnd,
-                                 const std::string &NameStr,
+                                 const Twine &NameStr,
                                  BasicBlock *InsertAtEnd)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
@@ -1882,24 +1852,24 @@ class PHINode : public Instruction {
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
-  explicit PHINode(const Type *Ty, const std::string &NameStr = "",
+  explicit PHINode(const Type *Ty, const Twine &NameStr = "",
                    Instruction *InsertBefore = 0)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
       ReservedSpace(0) {
     setName(NameStr);
   }
 
-  PHINode(const Type *Ty, const std::string &NameStr, BasicBlock *InsertAtEnd)
+  PHINode(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
       ReservedSpace(0) {
     setName(NameStr);
   }
 public:
-  static PHINode *Create(const Type *Ty, const std::string &NameStr = "",
+  static PHINode *Create(const Type *Ty, const Twine &NameStr = "",
                          Instruction *InsertBefore = 0) {
     return new PHINode(Ty, NameStr, InsertBefore);
   }
-  static PHINode *Create(const Type *Ty, const std::string &NameStr,
+  static PHINode *Create(const Type *Ty, const Twine &NameStr,
                          BasicBlock *InsertAtEnd) {
     return new PHINode(Ty, NameStr, InsertAtEnd);
   }
@@ -1940,19 +1910,29 @@ public:
     return i/2;
   }
 
+  /// getIncomingBlock - Return incoming basic block #i.
+  ///
+  BasicBlock *getIncomingBlock(unsigned i) const {
+    return cast<BasicBlock>(getOperand(i*2+1));
+  }
+  
   /// getIncomingBlock - Return incoming basic block corresponding
-  /// to value use iterator
+  /// to an operand of the PHI.
   ///
-  template <typename U>
-  BasicBlock *getIncomingBlock(value_use_iterator<U> I) const {
-    assert(this == *I && "Iterator doesn't point to PHI's Uses?");
-    return static_cast<BasicBlock*>((&I.getUse() + 1)->get());
+  BasicBlock *getIncomingBlock(const Use &U) const {
+    assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
+    return cast<BasicBlock>((&U + 1)->get());
   }
-  /// getIncomingBlock - Return incoming basic block number x
+  
+  /// getIncomingBlock - Return incoming basic block corresponding
+  /// to value use iterator.
   ///
-  BasicBlock *getIncomingBlock(unsigned i) const {
-    return static_cast<BasicBlock*>(getOperand(i*2+1));
+  template <typename U>
+  BasicBlock *getIncomingBlock(value_use_iterator<U> I) const {
+    return getIncomingBlock(I.getUse());
   }
+  
+  
   void setIncomingBlock(unsigned i, BasicBlock *BB) {
     setOperand(i*2+1, BB);
   }
@@ -2013,7 +1993,12 @@ public:
   /// hasConstantValue - If the specified PHI node always merges together the
   /// same value, return the value, otherwise return null.
   ///
-  Value *hasConstantValue(bool AllowNonDominatingInstruction = false) const;
+  /// If the PHI has undef operands, but all the rest of the operands are
+  /// some unique value, return that value if it can be proved that the
+  /// value dominates the PHI. If DT is null, use a conservative check,
+  /// otherwise use DT to test for dominance.
+  ///
+  Value *hasConstantValue(DominatorTree *DT = 0) const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const PHINode *) { return true; }
@@ -2028,7 +2013,7 @@ public:
 };
 
 template <>
-struct OperandTraits<PHINode> : HungoffOperandTraits<2> {
+struct OperandTraits<PHINode> : public HungoffOperandTraits<2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)
@@ -2057,18 +2042,21 @@ private:
   //
   // NOTE: If the Value* passed is of type void then the constructor behaves as
   // if it was passed NULL.
-  explicit ReturnInst(Value *retVal = 0, Instruction *InsertBefore = 0);
-  ReturnInst(Value *retVal, BasicBlock *InsertAtEnd);
-  explicit ReturnInst(BasicBlock *InsertAtEnd);
+  explicit ReturnInst(LLVMContext &C, Value *retVal = 0,
+                      Instruction *InsertBefore = 0);
+  ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
+  explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
 public:
-  static ReturnInst* Create(Value *retVal = 0, Instruction *InsertBefore = 0) {
-    return new(!!retVal) ReturnInst(retVal, InsertBefore);
+  static ReturnInst* Create(LLVMContext &C, Value *retVal = 0,
+                            Instruction *InsertBefore = 0) {
+    return new(!!retVal) ReturnInst(C, retVal, InsertBefore);
   }
-  static ReturnInst* Create(Value *retVal, BasicBlock *InsertAtEnd) {
-    return new(!!retVal) ReturnInst(retVal, InsertAtEnd);
+  static ReturnInst* Create(LLVMContext &C, Value *retVal,
+                            BasicBlock *InsertAtEnd) {
+    return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd);
   }
-  static ReturnInst* Create(BasicBlock *InsertAtEnd) {
-    return new(0) ReturnInst(InsertAtEnd);
+  static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) {
+    return new(0) ReturnInst(C, InsertAtEnd);
   }
   virtual ~ReturnInst();
 
@@ -2101,7 +2089,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ReturnInst> : OptionalOperandTraits<> {
+struct OperandTraits<ReturnInst> : public OptionalOperandTraits<> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
@@ -2209,7 +2197,7 @@ private:
 };
 
 template <>
-struct OperandTraits<BranchInst> : VariadicOperandTraits<1> {};
+struct OperandTraits<BranchInst> : public VariadicOperandTraits<1> {};
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
 
@@ -2358,7 +2346,7 @@ private:
 };
 
 template <>
-struct OperandTraits<SwitchInst> : HungoffOperandTraits<2> {
+struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
@@ -2380,7 +2368,7 @@ class InvokeInst : public TerminatorInst {
   template<typename InputIterator>
   void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
             InputIterator ArgBegin, InputIterator ArgEnd,
-            const std::string &NameStr,
+            const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
             std::random_access_iterator_tag) {
@@ -2402,7 +2390,7 @@ class InvokeInst : public TerminatorInst {
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
                     InputIterator ArgBegin, InputIterator ArgEnd,
                     unsigned Values,
-                    const std::string &NameStr, Instruction *InsertBefore);
+                    const Twine &NameStr, Instruction *InsertBefore);
 
   /// Construct an InvokeInst given a range of arguments.
   /// InputIterator must be a random-access iterator pointing to
@@ -2415,13 +2403,13 @@ class InvokeInst : public TerminatorInst {
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
                     InputIterator ArgBegin, InputIterator ArgEnd,
                     unsigned Values,
-                    const std::string &NameStr, BasicBlock *InsertAtEnd);
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
 public:
   template<typename InputIterator>
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
                             InputIterator ArgBegin, InputIterator ArgEnd,
-                            const std::string &NameStr = "",
+                            const Twine &NameStr = "",
                             Instruction *InsertBefore = 0) {
     unsigned Values(ArgEnd - ArgBegin + 3);
     return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
@@ -2431,7 +2419,7 @@ public:
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
                             InputIterator ArgBegin, InputIterator ArgEnd,
-                            const std::string &NameStr,
+                            const Twine &NameStr,
                             BasicBlock *InsertAtEnd) {
     unsigned Values(ArgEnd - ArgBegin + 3);
     return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
@@ -2445,9 +2433,11 @@ public:
 
   /// getCallingConv/setCallingConv - Get or set the calling convention of this
   /// function call.
-  unsigned getCallingConv() const { return SubclassData; }
-  void setCallingConv(unsigned CC) {
-    SubclassData = CC;
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(SubclassData);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    SubclassData = static_cast<unsigned>(CC);
   }
 
   /// getAttributes - Return the parameter attributes for this invoke.
@@ -2474,7 +2464,7 @@ public:
 
   /// @brief Determine if the call does not access memory.
   bool doesNotAccessMemory() const {
-    return paramHasAttr(0, Attribute::ReadNone);
+    return paramHasAttr(~0, Attribute::ReadNone);
   }
   void setDoesNotAccessMemory(bool NotAccessMemory = true) {
     if (NotAccessMemory) addAttribute(~0, Attribute::ReadNone);
@@ -2574,7 +2564,7 @@ private:
 };
 
 template <>
-struct OperandTraits<InvokeInst> : VariadicOperandTraits<3> {
+struct OperandTraits<InvokeInst> : public VariadicOperandTraits<3> {
 };
 
 template<typename InputIterator>
@@ -2582,7 +2572,7 @@ InvokeInst::InvokeInst(Value *Func,
                        BasicBlock *IfNormal, BasicBlock *IfException,
                        InputIterator ArgBegin, InputIterator ArgEnd,
                        unsigned Values,
-                       const std::string &NameStr, Instruction *InsertBefore)
+                       const Twine &NameStr, Instruction *InsertBefore)
   : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
                                       ->getElementType())->getReturnType(),
                    Instruction::Invoke,
@@ -2596,7 +2586,7 @@ InvokeInst::InvokeInst(Value *Func,
                        BasicBlock *IfNormal, BasicBlock *IfException,
                        InputIterator ArgBegin, InputIterator ArgEnd,
                        unsigned Values,
-                       const std::string &NameStr, BasicBlock *InsertAtEnd)
+                       const Twine &NameStr, BasicBlock *InsertAtEnd)
   : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
                                       ->getElementType())->getReturnType(),
                    Instruction::Invoke,
@@ -2623,8 +2613,8 @@ public:
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
-  explicit UnwindInst(Instruction *InsertBefore = 0);
-  explicit UnwindInst(BasicBlock *InsertAtEnd);
+  explicit UnwindInst(LLVMContext &C, Instruction *InsertBefore = 0);
+  explicit UnwindInst(LLVMContext &C, BasicBlock *InsertAtEnd);
 
   virtual UnwindInst *clone() const;
 
@@ -2660,8 +2650,8 @@ public:
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
-  explicit UnreachableInst(Instruction *InsertBefore = 0);
-  explicit UnreachableInst(BasicBlock *InsertAtEnd);
+  explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = 0);
+  explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
 
   virtual UnreachableInst *clone() const;
 
@@ -2687,16 +2677,12 @@ private:
 
 /// @brief This class represents a truncation of integer types.
 class TruncInst : public CastInst {
-  /// Private copy constructor
-  TruncInst(const TruncInst &CI)
-    : CastInst(CI.getType(), Trunc, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   TruncInst(
     Value *S,                     ///< The value to be truncated
     const Type *Ty,               ///< The (smaller) type to truncate to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2704,12 +2690,12 @@ public:
   TruncInst(
     Value *S,                     ///< The value to be truncated
     const Type *Ty,               ///< The (smaller) type to truncate to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical TruncInst
-  virtual CastInst *clone() const;
+  virtual TruncInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const TruncInst *) { return true; }
@@ -2727,16 +2713,12 @@ public:
 
 /// @brief This class represents zero extension of integer types.
 class ZExtInst : public CastInst {
-  /// @brief Private copy constructor
-  ZExtInst(const ZExtInst &CI)
-    : CastInst(CI.getType(), ZExt, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   ZExtInst(
     Value *S,                     ///< The value to be zero extended
     const Type *Ty,               ///< The type to zero extend to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2744,12 +2726,12 @@ public:
   ZExtInst(
     Value *S,                     ///< The value to be zero extended
     const Type *Ty,               ///< The type to zero extend to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical ZExtInst
-  virtual CastInst *clone() const;
+  virtual ZExtInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const ZExtInst *) { return true; }
@@ -2767,16 +2749,12 @@ public:
 
 /// @brief This class represents a sign extension of integer types.
 class SExtInst : public CastInst {
-  /// @brief Private copy constructor
-  SExtInst(const SExtInst &CI)
-    : CastInst(CI.getType(), SExt, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   SExtInst(
     Value *S,                     ///< The value to be sign extended
     const Type *Ty,               ///< The type to sign extend to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2784,12 +2762,12 @@ public:
   SExtInst(
     Value *S,                     ///< The value to be sign extended
     const Type *Ty,               ///< The type to sign extend to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical SExtInst
-  virtual CastInst *clone() const;
+  virtual SExtInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SExtInst *) { return true; }
@@ -2807,15 +2785,12 @@ public:
 
 /// @brief This class represents a truncation of floating point types.
 class FPTruncInst : public CastInst {
-  FPTruncInst(const FPTruncInst &CI)
-    : CastInst(CI.getType(), FPTrunc, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPTruncInst(
     Value *S,                     ///< The value to be truncated
     const Type *Ty,               ///< The type to truncate to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2823,12 +2798,12 @@ public:
   FPTruncInst(
     Value *S,                     ///< The value to be truncated
     const Type *Ty,               ///< The type to truncate to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical FPTruncInst
-  virtual CastInst *clone() const;
+  virtual FPTruncInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPTruncInst *) { return true; }
@@ -2846,15 +2821,12 @@ public:
 
 /// @brief This class represents an extension of floating point types.
 class FPExtInst : public CastInst {
-  FPExtInst(const FPExtInst &CI)
-    : CastInst(CI.getType(), FPExt, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPExtInst(
     Value *S,                     ///< The value to be extended
     const Type *Ty,               ///< The type to extend to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2862,12 +2834,12 @@ public:
   FPExtInst(
     Value *S,                     ///< The value to be extended
     const Type *Ty,               ///< The type to extend to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical FPExtInst
-  virtual CastInst *clone() const;
+  virtual FPExtInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPExtInst *) { return true; }
@@ -2885,15 +2857,12 @@ public:
 
 /// @brief This class represents a cast unsigned integer to floating point.
 class UIToFPInst : public CastInst {
-  UIToFPInst(const UIToFPInst &CI)
-    : CastInst(CI.getType(), UIToFP, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   UIToFPInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2901,12 +2870,12 @@ public:
   UIToFPInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical UIToFPInst
-  virtual CastInst *clone() const;
+  virtual UIToFPInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const UIToFPInst *) { return true; }
@@ -2924,15 +2893,12 @@ public:
 
 /// @brief This class represents a cast from signed integer to floating point.
 class SIToFPInst : public CastInst {
-  SIToFPInst(const SIToFPInst &CI)
-    : CastInst(CI.getType(), SIToFP, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   SIToFPInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2940,12 +2906,12 @@ public:
   SIToFPInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical SIToFPInst
-  virtual CastInst *clone() const;
+  virtual SIToFPInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SIToFPInst *) { return true; }
@@ -2963,15 +2929,12 @@ public:
 
 /// @brief This class represents a cast from floating point to unsigned integer
 class FPToUIInst  : public CastInst {
-  FPToUIInst(const FPToUIInst &CI)
-    : CastInst(CI.getType(), FPToUI, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPToUIInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -2979,12 +2942,12 @@ public:
   FPToUIInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< Where to insert the new instruction
   );
 
   /// @brief Clone an identical FPToUIInst
-  virtual CastInst *clone() const;
+  virtual FPToUIInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPToUIInst *) { return true; }
@@ -3002,15 +2965,12 @@ public:
 
 /// @brief This class represents a cast from floating point to signed integer.
 class FPToSIInst  : public CastInst {
-  FPToSIInst(const FPToSIInst &CI)
-    : CastInst(CI.getType(), FPToSI, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPToSIInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -3018,12 +2978,12 @@ public:
   FPToSIInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical FPToSIInst
-  virtual CastInst *clone() const;
+  virtual FPToSIInst *clone() const;
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPToSIInst *) { return true; }
@@ -3041,15 +3001,12 @@ public:
 
 /// @brief This class represents a cast from an integer to a pointer.
 class IntToPtrInst : public CastInst {
-  IntToPtrInst(const IntToPtrInst &CI)
-    : CastInst(CI.getType(), IntToPtr, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   IntToPtrInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -3057,12 +3014,12 @@ public:
   IntToPtrInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical IntToPtrInst
-  virtual CastInst *clone() const;
+  virtual IntToPtrInst *clone() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const IntToPtrInst *) { return true; }
@@ -3080,15 +3037,12 @@ public:
 
 /// @brief This class represents a cast from a pointer to an integer
 class PtrToIntInst : public CastInst {
-  PtrToIntInst(const PtrToIntInst &CI)
-    : CastInst(CI.getType(), PtrToInt, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   PtrToIntInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -3096,12 +3050,12 @@ public:
   PtrToIntInst(
     Value *S,                     ///< The value to be converted
     const Type *Ty,               ///< The type to convert to
-    const std::string &NameStr,   ///< A name for the new instruction
+    const Twine &NameStr,   ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical PtrToIntInst
-  virtual CastInst *clone() const;
+  virtual PtrToIntInst *clone() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const PtrToIntInst *) { return true; }
@@ -3119,15 +3073,12 @@ public:
 
 /// @brief This class represents a no-op cast from one type to another.
 class BitCastInst : public CastInst {
-  BitCastInst(const BitCastInst &CI)
-    : CastInst(CI.getType(), BitCast, CI.getOperand(0)) {
-  }
 public:
   /// @brief Constructor with insert-before-instruction semantics
   BitCastInst(
     Value *S,                     ///< The value to be casted
     const Type *Ty,               ///< The type to casted to
-    const std::string &NameStr = "", ///< A name for the new instruction
+    const Twine &NameStr = "", ///< A name for the new instruction
     Instruction *InsertBefore = 0 ///< Where to insert the new instruction
   );
 
@@ -3135,12 +3086,12 @@ public:
   BitCastInst(
     Value *S,                     ///< The value to be casted
     const Type *Ty,               ///< The type to casted to
-    const std::string &NameStr,      ///< A name for the new instruction
+    const Twine &NameStr,      ///< A name for the new instruction
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
   /// @brief Clone an identical BitCastInst
-  virtual CastInst *clone() const;
+  virtual BitCastInst *clone() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const BitCastInst *) { return true; }
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index 8f5e05f70cd7..6a8f37639261 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -25,6 +25,7 @@
 #define LLVM_INTRINSICINST_H
 
 #include "llvm/Constants.h"
+#include "llvm/Metadata.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
@@ -85,8 +86,8 @@ namespace llvm {
   struct DbgStopPointInst : public DbgInfoIntrinsic {
     Value *getLineValue() const { return const_cast<Value*>(getOperand(1)); }
     Value *getColumnValue() const { return const_cast<Value*>(getOperand(2)); }
-    Value *getContext() const {
-      return StripCast(getOperand(3));
+    MDNode *getContext() const {
+      return cast<MDNode>(getOperand(3));
     }
 
     unsigned getLine() const {
@@ -112,7 +113,7 @@ namespace llvm {
   /// DbgFuncStartInst - This represents the llvm.dbg.func.start instruction.
   ///
   struct DbgFuncStartInst : public DbgInfoIntrinsic {
-    Value *getSubprogram() const { return StripCast(getOperand(1)); }
+    MDNode *getSubprogram() const { return cast<MDNode>(getOperand(1)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgFuncStartInst *) { return true; }
@@ -127,7 +128,7 @@ namespace llvm {
   /// DbgRegionStartInst - This represents the llvm.dbg.region.start
   /// instruction.
   struct DbgRegionStartInst : public DbgInfoIntrinsic {
-    Value *getContext() const { return StripCast(getOperand(1)); }
+    MDNode *getContext() const { return cast<MDNode>(getOperand(1)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgRegionStartInst *) { return true; }
@@ -142,7 +143,7 @@ namespace llvm {
   /// DbgRegionEndInst - This represents the llvm.dbg.region.end instruction.
   ///
   struct DbgRegionEndInst : public DbgInfoIntrinsic {
-    Value *getContext() const { return StripCast(getOperand(1)); }
+    MDNode *getContext() const { return cast<MDNode>(getOperand(1)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgRegionEndInst *) { return true; }
@@ -158,7 +159,7 @@ namespace llvm {
   ///
   struct DbgDeclareInst : public DbgInfoIntrinsic {
     Value *getAddress()  const { return getOperand(1); }
-    Value *getVariable() const { return StripCast(getOperand(2)); }
+    MDNode *getVariable() const { return cast<MDNode>(getOperand(2)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgDeclareInst *) { return true; }
@@ -202,9 +203,13 @@ namespace llvm {
              "setLength called with value of wrong type!");
       setOperand(3, L);
     }
-    void setAlignment(unsigned A) {
-      const Type *Int32Ty = getOperand(4)->getType();
-      setOperand(4, ConstantInt::get(Int32Ty, A));
+    
+    void setAlignment(Constant* A) {
+      setOperand(4, A);
+    }
+    
+    const Type *getAlignmentType() const {
+      return getOperand(4)->getType();
     }
     
     // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -308,8 +313,7 @@ namespace llvm {
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const EHSelectorInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::eh_selector_i32 ||
-             I->getIntrinsicID() == Intrinsic::eh_selector_i64;
+      return I->getIntrinsicID() == Intrinsic::eh_selector;
     }
     static inline bool classof(const Value *V) {
       return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h
index 227eb5a5b70f..8f1b1aee1f5a 100644
--- a/include/llvm/Intrinsics.h
+++ b/include/llvm/Intrinsics.h
@@ -23,6 +23,7 @@ namespace llvm {
 class Type;
 class FunctionType;
 class Function;
+class LLVMContext;
 class Module;
 class AttrListPtr;
 
@@ -47,7 +48,8 @@ namespace Intrinsic {
   
   /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
   ///
-  const FunctionType *getType(ID id, const Type **Tys = 0, unsigned numTys = 0);
+  const FunctionType *getType(LLVMContext &Context, ID id,
+                              const Type **Tys = 0, unsigned numTys = 0);
 
   /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
   /// overloaded.
@@ -61,7 +63,7 @@ namespace Intrinsic {
   /// declaration for an intrinsic, and return it.
   ///
   /// The Tys and numTys parameters are for intrinsics with overloaded types
-  /// (i.e., those using iAny or fAny). For a declaration for an overloaded
+  /// (e.g., those using iAny or fAny). For a declaration for an overloaded
   /// intrinsic, Tys should point to an array of numTys pointers to Type,
   /// and must provide exactly one type for each overloaded type in the
   /// intrinsic.
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index c036151329c6..38ac4c2927c0 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -1,10 +1,10 @@
 //===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines properties of all LLVM intrinsics.
@@ -21,7 +21,7 @@ class IntrinsicProperty;
 
 // Intr*Mem - Memory properties.  An intrinsic is allowed to have exactly one of
 // these properties set.  They are listed from the most aggressive (best to use
-// if correct) to the least aggressive.  If no property is set, the worst case 
+// if correct) to the least aggressive.  If no property is set, the worst case
 // is assumed (IntrWriteMem).
 
 // IntrNoMem - The intrinsic does not access memory or have any other side
@@ -42,7 +42,7 @@ def IntrReadMem : IntrinsicProperty;
 // and writes may be volatile, but except for this it has no other side effects.
 def IntrWriteArgMem : IntrinsicProperty;
 
-// IntrWriteMem - This intrinsic may read or modify unspecified memory or has 
+// IntrWriteMem - This intrinsic may read or modify unspecified memory or has
 // other side effects.  It cannot be modified by the optimizer.  This is the
 // default if the intrinsic has no other Intr*Mem property.
 def IntrWriteMem : IntrinsicProperty;
@@ -66,12 +66,12 @@ class LLVMType<ValueType vt> {
 class LLVMPointerType<LLVMType elty>
   : LLVMType<iPTR>{
   LLVMType ElTy = elty;
-} 
+}
 
 class LLVMAnyPointerType<LLVMType elty>
   : LLVMType<iPTRAny>{
   LLVMType ElTy = elty;
-} 
+}
 
 // Match the type of another intrinsic parameter.  Number is an index into the
 // list of overloaded types for the intrinsic, excluding all the fixed types.
@@ -84,7 +84,7 @@ class LLVMMatchType<int num>
   int Number = num;
 }
 
-// Match the type of another intrinsic parameter that is expected to be 
+// Match the type of another intrinsic parameter that is expected to be
 // an integral vector type, but change the element size to be twice as wide
 // or half as wide as the other type.  This is only useful when the intrinsic
 // is overloaded, so the matched type should be declared as iAny.
@@ -94,6 +94,7 @@ class LLVMTruncatedElementVectorType<int num> : LLVMMatchType<num>;
 def llvm_void_ty       : LLVMType<isVoid>;
 def llvm_anyint_ty     : LLVMType<iAny>;
 def llvm_anyfloat_ty   : LLVMType<fAny>;
+def llvm_anyvector_ty  : LLVMType<vAny>;
 def llvm_i1_ty         : LLVMType<i1>;
 def llvm_i8_ty         : LLVMType<i8>;
 def llvm_i16_ty        : LLVMType<i16>;
@@ -109,6 +110,7 @@ def llvm_ptrptr_ty     : LLVMPointerType<llvm_ptr_ty>;            // i8**
 def llvm_anyptr_ty     : LLVMAnyPointerType<llvm_i8_ty>;          // (space)i8*
 def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
 def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
+def llvm_metadata_ty   : LLVMType<MetadataVT>;                    // !{...}
 
 def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
 def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
@@ -127,7 +129,6 @@ def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
 def llvm_v4i64_ty      : LLVMType<v4i64>;    //  4 x i64
 
 def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
-def llvm_v3f32_ty      : LLVMType<v3f32>;    //  3 x float
 def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
 def llvm_v8f32_ty      : LLVMType<v8f32>;    //  8 x float
 def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
@@ -173,7 +174,7 @@ class GCCBuiltin<string name> {
 
 
 //===--------------- Variable Argument Handling Intrinsics ----------------===//
-//  
+//
 
 def int_vastart : Intrinsic<[llvm_void_ty], [llvm_ptr_ty], [], "llvm.va_start">;
 def int_vacopy  : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_ptr_ty], [],
@@ -181,7 +182,7 @@ def int_vacopy  : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_ptr_ty], [],
 def int_vaend   : Intrinsic<[llvm_void_ty], [llvm_ptr_ty], [], "llvm.va_end">;
 
 //===------------------- Garbage Collection Intrinsics --------------------===//
-//  
+//
 def int_gcroot  : Intrinsic<[llvm_void_ty],
                             [llvm_ptrptr_ty, llvm_ptr_ty]>;
 def int_gcread  : Intrinsic<[llvm_ptr_ty],
@@ -192,7 +193,7 @@ def int_gcwrite : Intrinsic<[llvm_void_ty],
                             [IntrWriteArgMem, NoCapture<1>, NoCapture<2>]>;
 
 //===--------------------- Code Generator Intrinsics ----------------------===//
-//  
+//
 def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
 def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
 
@@ -242,7 +243,7 @@ let Properties = [IntrReadMem] in {
   def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
   def int_sin  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_cos  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; 
+  def int_cos  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_pow  : Intrinsic<[llvm_anyfloat_ty],
                            [LLVMMatchType<0>, LLVMMatchType<0>]>;
   def int_log  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
@@ -267,11 +268,6 @@ let Properties = [IntrNoMem] in {
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_part_select : Intrinsic<[llvm_anyint_ty],
-                                  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty]>;
-  def int_part_set : Intrinsic<[llvm_anyint_ty],
-                               [LLVMMatchType<0>, llvm_anyint_ty,
-                                llvm_i32_ty, llvm_i32_ty]>;
 }
 
 //===------------------------ Debugger Intrinsics -------------------------===//
@@ -282,25 +278,22 @@ let Properties = [IntrNoMem] in {
 // places.
 let Properties = [IntrNoMem] in {
   def int_dbg_stoppoint    : Intrinsic<[llvm_void_ty],
-                                       [llvm_i32_ty, llvm_i32_ty, 
-                                        llvm_descriptor_ty]>;
-  def int_dbg_region_start : Intrinsic<[llvm_void_ty], [llvm_descriptor_ty]>;
-  def int_dbg_region_end   : Intrinsic<[llvm_void_ty], [llvm_descriptor_ty]>;
-  def int_dbg_func_start   : Intrinsic<[llvm_void_ty], [llvm_descriptor_ty]>;
+                                       [llvm_i32_ty, llvm_i32_ty,
+                                        llvm_metadata_ty]>;
+  def int_dbg_region_start : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
+  def int_dbg_region_end   : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
+  def int_dbg_func_start   : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
   def int_dbg_declare      : Intrinsic<[llvm_void_ty],
-                                       [llvm_descriptor_ty, llvm_descriptor_ty]>;
+                                       [llvm_descriptor_ty, llvm_metadata_ty]>;
 }
 
 //===------------------ Exception Handling Intrinsics----------------------===//
 //
-def int_eh_exception    : Intrinsic<[llvm_ptr_ty]>;
-def int_eh_selector_i32 : Intrinsic<[llvm_i32_ty],
-                                    [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>;
-def int_eh_selector_i64 : Intrinsic<[llvm_i64_ty],
-                                    [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>;
+def int_eh_exception : Intrinsic<[llvm_ptr_ty], [], [IntrReadMem]>;
+def int_eh_selector  : Intrinsic<[llvm_i32_ty],
+                                 [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>;
 
-def int_eh_typeid_for_i32 : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
-def int_eh_typeid_for_i64 : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty]>;
+def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 
 def int_eh_return_i32 : Intrinsic<[llvm_void_ty], [llvm_i32_ty, llvm_ptr_ty]>;
 def int_eh_return_i64 : Intrinsic<[llvm_void_ty], [llvm_i64_ty, llvm_ptr_ty]>;
@@ -311,19 +304,20 @@ def int_eh_unwind_init: Intrinsic<[llvm_void_ty]>,
 def int_eh_dwarf_cfa  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
 
 let Properties = [IntrNoMem] in {
-def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
-def int_eh_sjlj_longjmp : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_i32_ty]>;
+  def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
+  def int_eh_sjlj_longjmp : Intrinsic<[llvm_void_ty], [llvm_ptr_ty]>;
+  def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
 }
 
 //===---------------- Generic Variable Attribute Intrinsics----------------===//
 //
 def int_var_annotation : Intrinsic<[llvm_void_ty],
                                    [llvm_ptr_ty, llvm_ptr_ty,
-                                    llvm_ptr_ty, llvm_i32_ty], 
+                                    llvm_ptr_ty, llvm_i32_ty],
                                    [], "llvm.var.annotation">;
 def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
                                    [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty,
-                                    llvm_i32_ty], 
+                                    llvm_i32_ty],
                                    [], "llvm.ptr.annotation">;
 def int_annotation : Intrinsic<[llvm_anyint_ty],
                                [LLVMMatchType<0>, llvm_ptr_ty,
@@ -423,7 +417,23 @@ def int_atomic_load_umax : Intrinsic<[llvm_anyint_ty],
                                       LLVMMatchType<0>],
                                      [IntrWriteArgMem, NoCapture<0>]>,
                            GCCBuiltin<"__sync_fetch_and_umax">;
-                                  
+
+//===------------------------- Memory Use Markers -------------------------===//
+//
+def int_lifetime_start  : Intrinsic<[llvm_void_ty],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrWriteArgMem, NoCapture<1>]>;
+def int_lifetime_end    : Intrinsic<[llvm_void_ty],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrWriteArgMem, NoCapture<1>]>;
+def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadArgMem, NoCapture<1>]>;
+def int_invariant_end   : Intrinsic<[llvm_void_ty],
+                                    [llvm_descriptor_ty, llvm_i64_ty,
+                                     llvm_ptr_ty],
+                                    [IntrWriteArgMem, NoCapture<2>]>;
+
 //===-------------------------- Other Intrinsics --------------------------===//
 //
 def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
@@ -464,3 +474,4 @@ include "llvm/IntrinsicsARM.td"
 include "llvm/IntrinsicsCellSPU.td"
 include "llvm/IntrinsicsAlpha.td"
 include "llvm/IntrinsicsXCore.td"
+include "llvm/IntrinsicsBlackfin.td"
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index 4723ffb530d2..c408a2f374ec 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -27,41 +27,36 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
 
   // The following classes do not correspond directly to GCC builtins.
   class Neon_1Arg_Intrinsic
-    : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-  class Neon_1Arg_Float_Intrinsic
-    : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   class Neon_1Arg_Narrow_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
   class Neon_1Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMTruncatedElementVectorType<0>], [IntrNoMem]>;
   class Neon_2Arg_Intrinsic
-    : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrNoMem]>;
-  class Neon_2Arg_Float_Intrinsic
-    : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
                 [IntrNoMem]>;
   class Neon_2Arg_Narrow_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMExtendedElementVectorType<0>,
                  LLVMExtendedElementVectorType<0>],
                 [IntrNoMem]>;
   class Neon_2Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMTruncatedElementVectorType<0>,
                  LLVMTruncatedElementVectorType<0>],
                 [IntrNoMem]>;
   class Neon_2Arg_Wide_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMMatchType<0>, LLVMTruncatedElementVectorType<0>],
                 [IntrNoMem]>;
   class Neon_3Arg_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                 [IntrNoMem]>;
   class Neon_3Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                 [LLVMMatchType<0>,
                  LLVMTruncatedElementVectorType<0>,
                  LLVMTruncatedElementVectorType<0>],
@@ -70,6 +65,28 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
     : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
   class Neon_CvtFPToFx_Intrinsic
     : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
+  // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
+  // Overall, the classes range from 2 to 6 v8i8 arguments.
+  class Neon_Tbl2Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+  class Neon_Tbl3Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+  class Neon_Tbl4Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
+                [IntrNoMem]>;
+  class Neon_Tbl5Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
+                 llvm_v8i8_ty], [IntrNoMem]>;
+  class Neon_Tbl6Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
+                 llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
 }
 
 // Arithmetic ops
@@ -110,18 +127,16 @@ let Properties = [IntrNoMem, Commutative] in {
   // Vector Maximum.
   def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
   def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vmaxf : Neon_2Arg_Float_Intrinsic;
 
   // Vector Minimum.
   def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
   def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vminf : Neon_2Arg_Float_Intrinsic;
 
   // Vector Reciprocal Step.
-  def int_arm_neon_vrecps : Neon_2Arg_Float_Intrinsic;
+  def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
 
   // Vector Reciprocal Square Root Step.
-  def int_arm_neon_vrsqrts : Neon_2Arg_Float_Intrinsic;
+  def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
 }
 
 // Vector Subtract.
@@ -155,7 +170,6 @@ let TargetPrefix = "arm" in {
 // Vector Absolute Differences.
 def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
 def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vabdf : Neon_2Arg_Float_Intrinsic;
 def int_arm_neon_vabdls : Neon_2Arg_Long_Intrinsic;
 def int_arm_neon_vabdlu : Neon_2Arg_Long_Intrinsic;
 
@@ -166,17 +180,16 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic;
 def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
 
 // Vector Pairwise Add.
-def int_arm_neon_vpaddi : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpaddf : Neon_2Arg_Float_Intrinsic;
+def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
 
 // Vector Pairwise Add Long.
 // Note: This is different than the other "long" NEON intrinsics because
 // the result vector has half as many elements as the source vector.
 // The source and destination vector types must be specified separately.
 let TargetPrefix = "arm" in {
-  def int_arm_neon_vpaddls : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty],
+  def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
                                        [IntrNoMem]>;
-  def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty],
+  def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
                                        [IntrNoMem]>;
 }
 
@@ -184,21 +197,19 @@ let TargetPrefix = "arm" in {
 // Note: This is similar to vpaddl but the destination vector also appears
 // as the first argument.
 let TargetPrefix = "arm" in {
-  def int_arm_neon_vpadals : Intrinsic<[llvm_anyint_ty],
-                                       [LLVMMatchType<0>, llvm_anyint_ty],
+  def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
+                                       [LLVMMatchType<0>, llvm_anyvector_ty],
                                        [IntrNoMem]>;
-  def int_arm_neon_vpadalu : Intrinsic<[llvm_anyint_ty],
-                                       [LLVMMatchType<0>, llvm_anyint_ty],
+  def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
+                                       [LLVMMatchType<0>, llvm_anyvector_ty],
                                        [IntrNoMem]>;
 }
 
 // Vector Pairwise Maximum and Minimum.
 def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
 def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpmaxf : Neon_2Arg_Float_Intrinsic;
 def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
 def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpminf : Neon_2Arg_Float_Intrinsic;
 
 // Vector Shifts:
 //
@@ -253,7 +264,6 @@ def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
 
 // Vector Absolute Value and Saturating Absolute Value.
 def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
-def int_arm_neon_vabsf : Neon_1Arg_Float_Intrinsic;
 def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
 
 // Vector Saturating Negate.
@@ -268,11 +278,9 @@ def int_arm_neon_vcnt : Neon_1Arg_Intrinsic;
 
 // Vector Reciprocal Estimate.
 def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrecpef : Neon_1Arg_Float_Intrinsic;
 
 // Vector Reciprocal Square Root Estimate.
 def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrsqrtef : Neon_1Arg_Float_Intrinsic;
 
 // Vector Conversions Between Floating-point and Fixed-point.
 def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
@@ -288,38 +296,81 @@ def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
 def int_arm_neon_vmovls : Neon_1Arg_Long_Intrinsic;
 def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic;
 
+// Vector Table Lookup.
+// The first 1-4 arguments are the table.
+def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
+def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
+def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
+def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;
+
+// Vector Table Extension.
+// Some elements of the destination vector may not be updated, so the original
+// value of that vector is passed as the first argument.  The next 1-4
+// arguments after that are the table.
+def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
+def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
+def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
+def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
+
 let TargetPrefix = "arm" in {
 
   // De-interleaving vector loads from N-element structures.
-  def int_arm_neon_vld3i : Intrinsic<[llvm_anyint_ty],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld3f : Intrinsic<[llvm_anyfloat_ty],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld4i : Intrinsic<[llvm_anyint_ty],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld4f : Intrinsic<[llvm_anyfloat_ty],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+
+  // Vector load N-element structure to one lane.
+  def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                        [llvm_ptr_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, llvm_i32_ty],
+                                        [IntrReadArgMem]>;
+  def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>],
+                                        [llvm_ptr_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         llvm_i32_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, LLVMMatchType<0>],
+                                        [llvm_ptr_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, llvm_i32_ty],
+                                        [IntrReadArgMem]>;
 
   // Interleaving vector stores from N-element structures.
-  def int_arm_neon_vst3i : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyint_ty],
+  def int_arm_neon_vst1 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty],
+                                    [IntrWriteArgMem]>;
+  def int_arm_neon_vst2 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>], [IntrWriteArgMem]>;
+  def int_arm_neon_vst3 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
                                      [IntrWriteArgMem]>;
-  def int_arm_neon_vst3f : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyfloat_ty],
-                                     [IntrWriteArgMem]>;
-  def int_arm_neon_vst4i : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyint_ty],
-                                     [IntrWriteArgMem]>;
-  def int_arm_neon_vst4f : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyfloat_ty],
-                                     [IntrWriteArgMem]>;
-
-  // Vector Table Lookup
-  def int_arm_neon_vtbl : Intrinsic<[llvm_v8i8_ty],
-                                    [llvm_anyint_ty, llvm_v8i8_ty],
-                                    [IntrNoMem]>;
-  // Vector Table Extension
-  def int_arm_neon_vtbx : Intrinsic<[llvm_v8i8_ty],
-                                    [llvm_v8i8_ty, llvm_anyint_ty,
-                                     llvm_v8i8_ty], [IntrNoMem]>;
+  def int_arm_neon_vst4 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, LLVMMatchType<0>,
+                                     LLVMMatchType<0>], [IntrWriteArgMem]>;
+
+  // Vector store N-element structure from one lane.
+  def int_arm_neon_vst2lane : Intrinsic<[llvm_void_ty],
+                                        [llvm_ptr_ty, llvm_anyvector_ty,
+                                         LLVMMatchType<0>, llvm_i32_ty],
+                                        [IntrWriteArgMem]>;
+  def int_arm_neon_vst3lane : Intrinsic<[llvm_void_ty],
+                                        [llvm_ptr_ty, llvm_anyvector_ty,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         llvm_i32_ty], [IntrWriteArgMem]>;
+  def int_arm_neon_vst4lane : Intrinsic<[llvm_void_ty],
+                                        [llvm_ptr_ty, llvm_anyvector_ty,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, llvm_i32_ty],
+                                        [IntrWriteArgMem]>;
 }
diff --git a/include/llvm/IntrinsicsBlackfin.td b/include/llvm/IntrinsicsBlackfin.td
new file mode 100644
index 000000000000..188e18cc91f6
--- /dev/null
+++ b/include/llvm/IntrinsicsBlackfin.td
@@ -0,0 +1,34 @@
+//===- IntrinsicsBlackfin.td - Defines Blackfin intrinsics -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the blackfin-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Core synchronisation etc.
+//
+// These intrinsics have sideeffects. Each represent a single instruction, but
+// workarounds are sometimes required depending on the cpu.
+
+let TargetPrefix = "bfin" in {
+
+  // Execute csync instruction with workarounds
+  def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
+          Intrinsic<[llvm_void_ty]>;
+
+  // Execute ssync instruction with workarounds
+  def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
+          Intrinsic<[llvm_void_ty]>;
+
+  // Execute idle instruction with workarounds
+  def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
+          Intrinsic<[llvm_void_ty]>;
+
+}
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 37ba59c92186..5be032bb8204 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1,10 +1,10 @@
 //===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines all of the X86-specific intrinsics.
@@ -129,7 +129,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
               Intrinsic<[llvm_v2i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, 
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v2i32_ty], [IntrNoMem]>;
 }
 
@@ -814,9 +814,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector insert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pinsrb         : GCCBuiltin<"__builtin_ia32_vec_set_v16qi">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
   def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
                     [IntrNoMem]>;
@@ -867,6 +864,105 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
 }
 
+// Test instruction with bitwise comparison.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_ptestz          : GCCBuiltin<"__builtin_ia32_ptestz128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse41_ptestc          : GCCBuiltin<"__builtin_ia32_ptestc128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse41_ptestnzc        : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                    [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.2
+
+// Miscellaneous
+// CRC Instruction
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse42_crc32_8         : GCCBuiltin<"__builtin_ia32_crc32qi">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_16         : GCCBuiltin<"__builtin_ia32_crc32hi">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_32         : GCCBuiltin<"__builtin_ia32_crc32si">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_64         : GCCBuiltin<"__builtin_ia32_crc32di">,
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                    [IntrNoMem]>;
+}
+
+// String/text processing ops.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse42_pcmpistrm128  : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
+	  Intrinsic<[llvm_v16i8_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistri128  : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestrm128  : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
+	  Intrinsic<[llvm_v16i8_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestri128  : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+}
 
 //===----------------------------------------------------------------------===//
 // MMX
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
index efe12cccb6d1..a135f671b771 100644
--- a/include/llvm/LLVMContext.h
+++ b/include/llvm/LLVMContext.h
@@ -15,35 +15,10 @@
 #ifndef LLVM_LLVMCONTEXT_H
 #define LLVM_LLVMCONTEXT_H
 
-#include "llvm/Support/DataTypes.h"
-#include <vector>
-#include <string>
-
 namespace llvm {
 
 class LLVMContextImpl;
-class Constant;
-class ConstantInt;
-class ConstantPointerNull;
-class ConstantStruct;
-class ConstantAggregateZero;
-class ConstantArray;
-class ConstantFP;
-class ConstantVector;
-class UndefValue;
-class MDNode;
-class MDString;
-class IntegerType;
-class PointerType;
-class StructType;
-class ArrayType;
-class VectorType;
-class OpaqueType;
-class FunctionType;
-class Type;
-class APInt;
-class APFloat;
-class Value;
+class MetadataContext;
 
 /// This is an important class for using LLVM in a threaded context.  It
 /// (opaquely) owns and manages the core "global" data of LLVM's core 
@@ -51,170 +26,16 @@ class Value;
 /// LLVMContext itself provides no locking guarantees, so you should be careful
 /// to have one context per thread.
 class LLVMContext {
-  LLVMContextImpl* pImpl;
+  // DO NOT IMPLEMENT
+  LLVMContext(LLVMContext&);
+  void operator=(LLVMContext&);
+
 public:
+  LLVMContextImpl* const pImpl;
+  MetadataContext &getMetadata();
+  bool RemoveDeadMetadata();
   LLVMContext();
   ~LLVMContext();
-  
-  // Constant accessors
-  Constant* getNullValue(const Type* Ty);
-  Constant* getAllOnesValue(const Type* Ty);
-  
-  // UndefValue accessors
-  UndefValue* getUndef(const Type* Ty);
-  
-  // ConstantInt accessors
-  ConstantInt* getConstantIntTrue();
-  ConstantInt* getConstantIntFalse();
-  Constant* getConstantInt(const Type* Ty, uint64_t V,
-                              bool isSigned = false);
-  ConstantInt* getConstantInt(const IntegerType* Ty, uint64_t V,
-                              bool isSigned = false);
-  ConstantInt* getConstantIntSigned(const IntegerType* Ty, int64_t V);
-  ConstantInt* getConstantInt(const APInt& V);
-  Constant* getConstantInt(const Type* Ty, const APInt& V);
-  ConstantInt* getConstantIntAllOnesValue(const Type* Ty);
-  
-  // ConstantPointerNull accessors
-  ConstantPointerNull* getConstantPointerNull(const PointerType* T);
-  
-  // ConstantStruct accessors
-  Constant* getConstantStruct(const StructType* T,
-                              const std::vector<Constant*>& V);
-  Constant* getConstantStruct(const std::vector<Constant*>& V,
-                              bool Packed = false);
-  Constant* getConstantStruct(Constant* const *Vals, unsigned NumVals,
-                              bool Packed = false);
-                              
-  // ConstantAggregateZero accessors
-  ConstantAggregateZero* getConstantAggregateZero(const Type* Ty);
-  
-  // ConstantArray accessors
-  Constant* getConstantArray(const ArrayType* T,
-                             const std::vector<Constant*>& V);
-  Constant* getConstantArray(const ArrayType* T, Constant* const* Vals,
-                             unsigned NumVals);
-  Constant* getConstantArray(const std::string& Initializer,
-                             bool AddNull = false);
-                             
-  // ConstantExpr accessors
-  Constant* getConstantExpr(unsigned Opcode, Constant* C1, Constant* C2);
-  Constant* getConstantExprTrunc(Constant* C, const Type* Ty);
-  Constant* getConstantExprSExt(Constant* C, const Type* Ty);
-  Constant* getConstantExprZExt(Constant* C, const Type* Ty);
-  Constant* getConstantExprFPTrunc(Constant* C, const Type* Ty);
-  Constant* getConstantExprFPExtend(Constant* C, const Type* Ty);
-  Constant* getConstantExprUIToFP(Constant* C, const Type* Ty);
-  Constant* getConstantExprSIToFP(Constant* C, const Type* Ty);
-  Constant* getConstantExprFPToUI(Constant* C, const Type* Ty);
-  Constant* getConstantExprFPToSI(Constant* C, const Type* Ty);
-  Constant* getConstantExprPtrToInt(Constant* C, const Type* Ty);
-  Constant* getConstantExprIntToPtr(Constant* C, const Type* Ty);
-  Constant* getConstantExprBitCast(Constant* C, const Type* Ty);
-  Constant* getConstantExprCast(unsigned ops, Constant* C, const Type* Ty);
-  Constant* getConstantExprZExtOrBitCast(Constant* C, const Type* Ty);
-  Constant* getConstantExprSExtOrBitCast(Constant* C, const Type* Ty);
-  Constant* getConstantExprTruncOrBitCast(Constant* C, const Type* Ty);
-  Constant* getConstantExprPointerCast(Constant* C, const Type* Ty);
-  Constant* getConstantExprIntegerCast(Constant* C, const Type* Ty,
-                                       bool isSigned);
-  Constant* getConstantExprFPCast(Constant* C, const Type* Ty);
-  Constant* getConstantExprSelect(Constant* C, Constant* V1, Constant* V2);
-  Constant* getConstantExprAlignOf(const Type* Ty);
-  Constant* getConstantExprCompare(unsigned short pred,
-                                   Constant* C1, Constant* C2);
-  Constant* getConstantExprNeg(Constant* C);
-  Constant* getConstantExprFNeg(Constant* C);
-  Constant* getConstantExprNot(Constant* C);
-  Constant* getConstantExprAdd(Constant* C1, Constant* C2);
-  Constant* getConstantExprFAdd(Constant* C1, Constant* C2);
-  Constant* getConstantExprSub(Constant* C1, Constant* C2);
-  Constant* getConstantExprFSub(Constant* C1, Constant* C2);
-  Constant* getConstantExprMul(Constant* C1, Constant* C2);
-  Constant* getConstantExprFMul(Constant* C1, Constant* C2);
-  Constant* getConstantExprUDiv(Constant* C1, Constant* C2);
-  Constant* getConstantExprSDiv(Constant* C1, Constant* C2);
-  Constant* getConstantExprFDiv(Constant* C1, Constant* C2);
-  Constant* getConstantExprURem(Constant* C1, Constant* C2);
-  Constant* getConstantExprSRem(Constant* C1, Constant* C2);
-  Constant* getConstantExprFRem(Constant* C1, Constant* C2);
-  Constant* getConstantExprAnd(Constant* C1, Constant* C2);
-  Constant* getConstantExprOr(Constant* C1, Constant* C2);
-  Constant* getConstantExprXor(Constant* C1, Constant* C2);
-  Constant* getConstantExprICmp(unsigned short pred, Constant* LHS,
-                                Constant* RHS);
-  Constant* getConstantExprFCmp(unsigned short pred, Constant* LHS,
-                                Constant* RHS);
-  Constant* getConstantExprVICmp(unsigned short pred, Constant* LHS,
-                                 Constant* RHS);
-  Constant* getConstantExprVFCmp(unsigned short pred, Constant* LHS,
-                                 Constant* RHS);
-  Constant* getConstantExprShl(Constant* C1, Constant* C2);
-  Constant* getConstantExprLShr(Constant* C1, Constant* C2);
-  Constant* getConstantExprAShr(Constant* C1, Constant* C2);
-  Constant* getConstantExprGetElementPtr(Constant* C, Constant* const* IdxList, 
-                                         unsigned NumIdx);
-  Constant* getConstantExprGetElementPtr(Constant* C, Value* const* IdxList, 
-                                          unsigned NumIdx);
-  Constant* getConstantExprExtractElement(Constant* Vec, Constant* Idx);
-  Constant* getConstantExprInsertElement(Constant* Vec, Constant* Elt,
-                                         Constant* Idx);
-  Constant* getConstantExprShuffleVector(Constant* V1, Constant* V2,
-                                         Constant* Mask);
-  Constant* getConstantExprExtractValue(Constant* Agg, const unsigned* IdxList, 
-                                        unsigned NumIdx);
-  Constant* getConstantExprInsertValue(Constant* Agg, Constant* Val,
-                                       const unsigned* IdxList,
-                                       unsigned NumIdx);
-  Constant* getZeroValueForNegation(const Type* Ty);
-  
-  // ConstantFP accessors
-  ConstantFP* getConstantFP(const APFloat& V);
-  Constant* getConstantFP(const Type* Ty, double V);
-  ConstantFP* getConstantFPNegativeZero(const Type* Ty);
-  
-  // ConstantVector accessors
-  Constant* getConstantVector(const VectorType* T,
-                              const std::vector<Constant*>& V);
-  Constant* getConstantVector(const std::vector<Constant*>& V);
-  Constant* getConstantVector(Constant* const* Vals, unsigned NumVals);
-  ConstantVector* getConstantVectorAllOnesValue(const VectorType* Ty);
-  
-  // MDNode accessors
-  MDNode* getMDNode(Value* const* Vals, unsigned NumVals);
-  
-  // MDString accessors
-  MDString* getMDString(const char *StrBegin, const char *StrEnd);
-  MDString* getMDString(const std::string &Str);
-  
-  // FunctionType accessors
-  FunctionType* getFunctionType(const Type* Result,
-                                const std::vector<const Type*>& Params,
-                                bool isVarArg);
-                                
-  // IntegerType accessors
-  const IntegerType* getIntegerType(unsigned NumBits);
-  
-  // OpaqueType accessors
-  OpaqueType* getOpaqueType();
-  
-  // StructType accessors
-  StructType* getStructType(bool isPacked=false);
-  StructType* getStructType(const std::vector<const Type*>& Params,
-                            bool isPacked = false);
-  
-  // ArrayType accessors
-  ArrayType* getArrayType(const Type* ElementType, uint64_t NumElements);
-  
-  // PointerType accessors
-  PointerType* getPointerType(const Type* ElementType, unsigned AddressSpace);
-  PointerType* getPointerTypeUnqual(const Type* ElementType);
-  
-  // VectorType accessors
-  VectorType* getVectorType(const Type* ElementType, unsigned NumElements);
-  VectorType* getVectorTypeInteger(const VectorType* VTy);
-  VectorType* getVectorTypeExtendedElement(const VectorType* VTy);
-  VectorType* getVectorTypeTruncatedElement(const VectorType* VTy);
 };
 
 /// FOR BACKWARDS COMPATIBILITY - Returns a global context.
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index e199758f0157..e9a0542bf10b 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -18,8 +18,8 @@
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/FindUsedTypes.h"
 #include "llvm/Analysis/IntervalPartition.h"
-#include "llvm/Analysis/LoopVR.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PointerTracking.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Assembly/PrintModulePass.h"
@@ -50,6 +50,7 @@ namespace {
       (void) llvm::createStructRetPromotionPass();
       (void) llvm::createBasicAliasAnalysisPass();
       (void) llvm::createLibCallAliasAnalysisPass(0);
+      (void) llvm::createScalarEvolutionAliasAnalysisPass();
       (void) llvm::createBlockPlacementPass();
       (void) llvm::createBlockProfilerPass();
       (void) llvm::createBreakCriticalEdgesPass();
@@ -62,13 +63,13 @@ namespace {
       (void) llvm::createDeadStoreEliminationPass();
       (void) llvm::createDeadTypeEliminationPass();
       (void) llvm::createEdgeProfilerPass();
+      (void) llvm::createOptimalEdgeProfilerPass();
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
       (void) llvm::createFunctionProfilerPass();
       (void) llvm::createGlobalDCEPass();
       (void) llvm::createGlobalOptimizerPass();
       (void) llvm::createGlobalsModRefPass();
-      (void) llvm::createGVNPREPass();
       (void) llvm::createIPConstantPropagationPass();
       (void) llvm::createIPSCCPPass();
       (void) llvm::createIndVarSimplifyPass();
@@ -91,6 +92,8 @@ namespace {
       (void) llvm::createLowerSwitchPass();
       (void) llvm::createNoAAPass();
       (void) llvm::createNoProfileInfoPass();
+      (void) llvm::createProfileEstimatorPass();
+      (void) llvm::createProfileVerifierPass();
       (void) llvm::createProfileLoaderPass();
       (void) llvm::createPromoteMemoryToRegisterPass();
       (void) llvm::createDemoteRegisterToMemoryPass();
@@ -114,7 +117,7 @@ namespace {
       (void) llvm::createRSProfilingPass();
       (void) llvm::createIndMemRemPass();
       (void) llvm::createInstCountPass();
-      (void) llvm::createPredicateSimplifierPass();
+      (void) llvm::createCodeGenLICMPass();
       (void) llvm::createCodeGenPreparePass();
       (void) llvm::createGVNPass();
       (void) llvm::createMemCpyOptPass();
@@ -130,11 +133,12 @@ namespace {
       (void) llvm::createDbgInfoPrinterPass();
       (void) llvm::createPartialInliningPass();
       (void) llvm::createSSIPass();
+      (void) llvm::createSSIEverythingPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
       (void)new llvm::ScalarEvolution();
-      (void)new llvm::LoopVR();
+      (void)new llvm::PointerTracking();
       ((llvm::Function*)0)->viewCFGOnly();
       llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0);
       X.add((llvm::Value*)0, 0);  // for -print-alias-sets
diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h
index e5a51971f164..0ee18d57a04f 100644
--- a/include/llvm/LinkAllVMCore.h
+++ b/include/llvm/LinkAllVMCore.h
@@ -46,7 +46,7 @@ namespace {
       if (std::getenv("bar") != (char*) -1)
         return;
       llvm::Module* M = new llvm::Module("", llvm::getGlobalContext());
-      (void)new llvm::UnreachableInst();
+      (void)new llvm::UnreachableInst(llvm::getGlobalContext());
       (void)    llvm::createVerifierPass(); 
       (void) new llvm::Mangler(*M,"");
     }
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index 2d0c2cd96b76..1e1da867113b 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -14,11 +14,12 @@
 #ifndef LLVM_LINKER_H
 #define LLVM_LINKER_H
 
-#include "llvm/System/Path.h"
 #include <memory>
 #include <vector>
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
+  namespace sys { class Path; }
 
 class Module;
 class LLVMContext;
@@ -32,7 +33,7 @@ class LLVMContext;
 /// The Linker can link Modules from memory, bitcode files, or bitcode
 /// archives.  It retains a set of search paths in which to find any libraries
 /// presented to it. By default, the linker will generate error and warning
-/// messages to std::cerr but this capability can be turned off with the
+/// messages to stderr but this capability can be turned off with the
 /// QuietWarnings and QuietErrors flags. It can also be instructed to verbosely
 /// print out the linking actions it is taking with the Verbose flag.
 /// @brief The LLVM Linker.
@@ -52,9 +53,9 @@ class Linker {
     /// This enumeration is used to control various optional features of the
     /// linker.
     enum ControlFlags {
-      Verbose       = 1, ///< Print to std::cerr what steps the linker is taking
-      QuietWarnings = 2, ///< Don't print warnings to std::cerr.
-      QuietErrors   = 4  ///< Don't print errors to std::cerr.
+      Verbose       = 1, ///< Print to stderr what steps the linker is taking
+      QuietWarnings = 2, ///< Don't print warnings to stderr.
+      QuietErrors   = 4  ///< Don't print errors to stderr.
     };
 
   /// @}
@@ -64,17 +65,16 @@ class Linker {
     /// Construct the Linker with an empty module which will be given the
     /// name \p progname. \p progname will also be used for error messages.
     /// @brief Construct with empty module
-    Linker(
-        const std::string& progname, ///< name of tool running linker
-        const std::string& modulename, ///< name of linker's end-result module
-        LLVMContext& C, ///< Context for global info
-        unsigned Flags = 0  ///< ControlFlags (one or more |'d together)
+    Linker(const StringRef &progname, ///< name of tool running linker
+           const StringRef &modulename, ///< name of linker's end-result module
+           LLVMContext &C, ///< Context for global info
+           unsigned Flags = 0  ///< ControlFlags (one or more |'d together)
     );
 
     /// Construct the Linker with a previously defined module, \p aModule. Use
     /// \p progname for the name of the program in error messages.
     /// @brief Construct with existing module
-    Linker(const std::string& progname, Module* aModule, unsigned Flags = 0);
+    Linker(const StringRef& progname, Module* aModule, unsigned Flags = 0);
 
     /// Destruct the Linker.
     /// @brief Destructor
@@ -114,9 +114,9 @@ class Linker {
     /// true, indicating an error occurred. At most one error is retained so
     /// this function always returns the last error that occurred. Note that if
     /// the Quiet control flag is not set, the error string will have already
-    /// been printed to std::cerr.
+    /// been printed to stderr.
     /// @brief Get the text of the last error that occurred.
-    const std::string& getLastError() const { return Error; }
+    const std::string &getLastError() const { return Error; }
 
   /// @}
   /// @name Mutators
@@ -214,7 +214,7 @@ class Linker {
     /// @returns true if an error occurs, false otherwise
     /// @brief Link one library into the module
     bool LinkInLibrary (
-      const std::string& Library, ///< The library to link in
+      const StringRef &Library, ///< The library to link in
       bool& is_native             ///< Indicates if lib a native library
     );
 
@@ -267,7 +267,7 @@ class Linker {
     /// will be empty (i.e. sys::Path::isEmpty() will return true).
     /// @returns A sys::Path to the found library
     /// @brief Find a library from its short name.
-    sys::Path FindLib(const std::string &Filename);
+    sys::Path FindLib(const StringRef &Filename);
 
   /// @}
   /// @name Implementation
@@ -277,9 +277,9 @@ class Linker {
     /// Module it contains (wrapped in an auto_ptr), or 0 if an error occurs.
     std::auto_ptr<Module> LoadObject(const sys::Path& FN);
 
-    bool warning(const std::string& message);
-    bool error(const std::string& message);
-    void verbose(const std::string& message);
+    bool warning(const StringRef &message);
+    bool error(const StringRef &message);
+    void verbose(const StringRef &message);
 
   /// @}
   /// @name Data
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
new file mode 100644
index 000000000000..fb69630ff52e
--- /dev/null
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -0,0 +1,472 @@
+//===-- llvm/MC/MCAsmInfo.h - Asm info --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class to be used as the basis for target specific
+// asm writers.  This class primarily takes care of global printing constants,
+// which are used in very similar ways across all targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ASM_INFO_H
+#define LLVM_TARGET_ASM_INFO_H
+
+#include <cassert>
+
+namespace llvm {
+  /// MCAsmInfo - This class is intended to be used as a base class for asm
+  /// properties and features specific to the target.
+  namespace ExceptionHandling { enum ExceptionsType { None, Dwarf, SjLj }; }
+
+  class MCAsmInfo {
+  protected:
+    //===------------------------------------------------------------------===//
+    // Properties to be set by the target writer, used to configure asm printer.
+    //
+
+    /// ZeroFillDirective - Directive for emitting a global to the ZeroFill
+    /// section on this target.  Null if this target doesn't support zerofill.
+    const char *ZeroFillDirective;           // Default is null.
+
+    /// NonexecutableStackDirective - Directive for declaring to the
+    /// linker and beyond that the emitted code does not require stack
+    /// memory to be executable.
+    const char *NonexecutableStackDirective; // Default is null.
+
+    /// NeedsSet - True if target asm treats expressions in data directives
+    /// as linktime-relocatable.  For assembly-time computation, we need to
+    /// use a .set.  Thus:
+    /// .set w, x-y
+    /// .long w
+    /// is computed at assembly time, while
+    /// .long x-y
+    /// is relocated if the relative locations of x and y change at linktime.
+    /// We want both these things in different places.
+    bool NeedsSet;                           // Defaults to false.
+    
+    /// MaxInstLength - This is the maximum possible length of an instruction,
+    /// which is needed to compute the size of an inline asm.
+    unsigned MaxInstLength;                  // Defaults to 4.
+    
+    /// PCSymbol - The symbol used to represent the current PC.  Used in PC
+    /// relative expressions.
+    const char *PCSymbol;                    // Defaults to "$".
+
+    /// SeparatorChar - This character, if specified, is used to separate
+    /// instructions from each other when on the same line.  This is used to
+    /// measure inline asm instructions.
+    char SeparatorChar;                      // Defaults to ';'
+
+    /// CommentColumn - This indicates the comment num (zero-based) at
+    /// which asm comments should be printed.
+    unsigned CommentColumn;                  // Defaults to 60
+
+    /// CommentString - This indicates the comment character used by the
+    /// assembler.
+    const char *CommentString;               // Defaults to "#"
+
+    /// GlobalPrefix - If this is set to a non-empty string, it is prepended
+    /// onto all global symbols.  This is often used for "_" or ".".
+    const char *GlobalPrefix;                // Defaults to ""
+
+    /// PrivateGlobalPrefix - This prefix is used for globals like constant
+    /// pool entries that are completely private to the .s file and should not
+    /// have names in the .o file.  This is often "." or "L".
+    const char *PrivateGlobalPrefix;         // Defaults to "."
+    
+    /// LinkerPrivateGlobalPrefix - This prefix is used for symbols that should
+    /// be passed through the assembler but be removed by the linker.  This
+    /// is "l" on Darwin, currently used for some ObjC metadata.
+    const char *LinkerPrivateGlobalPrefix;   // Defaults to ""
+    
+    /// InlineAsmStart/End - If these are nonempty, they contain a directive to
+    /// emit before and after an inline assembly statement.
+    const char *InlineAsmStart;              // Defaults to "#APP\n"
+    const char *InlineAsmEnd;                // Defaults to "#NO_APP\n"
+
+    /// AssemblerDialect - Which dialect of an assembler variant to use.
+    unsigned AssemblerDialect;               // Defaults to 0
+
+    /// AllowQuotesInName - This is true if the assembler allows for complex
+    /// symbol names to be surrounded in quotes.  This defaults to false.
+    bool AllowQuotesInName;
+
+    /// AllowNameToStartWithDigit - This is true if the assembler allows symbol
+    /// names to start with a digit (e.g., "0x0021").  This defaults to false.
+    bool AllowNameToStartWithDigit;
+    
+    //===--- Data Emission Directives -------------------------------------===//
+
+    /// ZeroDirective - this should be set to the directive used to get some
+    /// number of zero bytes emitted to the current section.  Common cases are
+    /// "\t.zero\t" and "\t.space\t".  If this is set to null, the
+    /// Data*bitsDirective's will be used to emit zero bytes.
+    const char *ZeroDirective;               // Defaults to "\t.zero\t"
+    const char *ZeroDirectiveSuffix;         // Defaults to ""
+
+    /// AsciiDirective - This directive allows emission of an ascii string with
+    /// the standard C escape characters embedded into it.
+    const char *AsciiDirective;              // Defaults to "\t.ascii\t"
+    
+    /// AscizDirective - If not null, this allows for special handling of
+    /// zero terminated strings on this target.  This is commonly supported as
+    /// ".asciz".  If a target doesn't support this, it can be set to null.
+    const char *AscizDirective;              // Defaults to "\t.asciz\t"
+
+    /// DataDirectives - These directives are used to output some unit of
+    /// integer data to the current section.  If a data directive is set to
+    /// null, smaller data directives will be used to emit the large sizes.
+    const char *Data8bitsDirective;          // Defaults to "\t.byte\t"
+    const char *Data16bitsDirective;         // Defaults to "\t.short\t"
+    const char *Data32bitsDirective;         // Defaults to "\t.long\t"
+    const char *Data64bitsDirective;         // Defaults to "\t.quad\t"
+
+    /// getDataASDirective - Return the directive that should be used to emit
+    /// data of the specified size to the specified numeric address space.
+    virtual const char *getDataASDirective(unsigned Size, unsigned AS) const {
+      assert(AS != 0 && "Don't know the directives for default addr space");
+      return 0;
+    }
+
+    /// SunStyleELFSectionSwitchSyntax - This is true if this target uses "Sun
+    /// Style" syntax for section switching ("#alloc,#write" etc) instead of the
+    /// normal ELF syntax (,"a,w") in .section directives.
+    bool SunStyleELFSectionSwitchSyntax;     // Defaults to false.
+
+    /// UsesELFSectionDirectiveForBSS - This is true if this target uses ELF
+    /// '.section' directive before the '.bss' one. It's used for PPC/Linux 
+    /// which doesn't support the '.bss' directive only.
+    bool UsesELFSectionDirectiveForBSS;      // Defaults to false.
+    
+    //===--- Alignment Information ----------------------------------------===//
+
+    /// AlignDirective - The directive used to emit round up to an alignment
+    /// boundary.
+    ///
+    const char *AlignDirective;              // Defaults to "\t.align\t"
+
+    /// AlignmentIsInBytes - If this is true (the default) then the asmprinter
+    /// emits ".align N" directives, where N is the number of bytes to align to.
+    /// Otherwise, it emits ".align log2(N)", e.g. 3 to align to an 8 byte
+    /// boundary.
+    bool AlignmentIsInBytes;                 // Defaults to true
+
+    /// TextAlignFillValue - If non-zero, this is used to fill the executable
+    /// space created as the result of a alignment directive.
+    unsigned TextAlignFillValue;             // Defaults to 0
+
+    //===--- Section Switching Directives ---------------------------------===//
+    
+    /// JumpTableDirective - if non-null, the directive to emit before jump
+    /// table entries.  FIXME: REMOVE THIS.
+    const char *JumpTableDirective;          // Defaults to NULL.
+    const char *PICJumpTableDirective;       // Defaults to NULL.
+
+
+    //===--- Global Variable Emission Directives --------------------------===//
+    
+    /// GlobalDirective - This is the directive used to declare a global entity.
+    ///
+    const char *GlobalDirective;             // Defaults to NULL.
+
+    /// ExternDirective - This is the directive used to declare external 
+    /// globals.
+    ///
+    const char *ExternDirective;             // Defaults to NULL.
+    
+    /// SetDirective - This is the name of a directive that can be used to tell
+    /// the assembler to set the value of a variable to some expression.
+    const char *SetDirective;                // Defaults to null.
+    
+    /// LCOMMDirective - This is the name of a directive (if supported) that can
+    /// be used to efficiently declare a local (internal) block of zero
+    /// initialized data in the .bss/.data section.  The syntax expected is:
+    /// @verbatim <LCOMMDirective> SYMBOLNAME LENGTHINBYTES, ALIGNMENT
+    /// @endverbatim
+    const char *LCOMMDirective;              // Defaults to null.
+    
+    const char *COMMDirective;               // Defaults to "\t.comm\t".
+
+    /// COMMDirectiveTakesAlignment - True if COMMDirective take a third
+    /// argument that specifies the alignment of the declaration.
+    bool COMMDirectiveTakesAlignment;        // Defaults to true.
+    
+    /// HasDotTypeDotSizeDirective - True if the target has .type and .size
+    /// directives, this is true for most ELF targets.
+    bool HasDotTypeDotSizeDirective;         // Defaults to true.
+
+    /// HasSingleParameterDotFile - True if the target has a single parameter
+    /// .file directive, this is true for ELF targets.
+    bool HasSingleParameterDotFile;          // Defaults to true.
+
+    /// UsedDirective - This directive, if non-null, is used to declare a global
+    /// as being used somehow that the assembler can't see.  This prevents dead
+    /// code elimination on some targets.
+    const char *UsedDirective;               // Defaults to NULL.
+
+    /// WeakRefDirective - This directive, if non-null, is used to declare a
+    /// global as being a weak undefined symbol.
+    const char *WeakRefDirective;            // Defaults to NULL.
+    
+    /// WeakDefDirective - This directive, if non-null, is used to declare a
+    /// global as being a weak defined symbol.
+    const char *WeakDefDirective;            // Defaults to NULL.
+    
+    /// HiddenDirective - This directive, if non-null, is used to declare a
+    /// global or function as having hidden visibility.
+    const char *HiddenDirective;             // Defaults to "\t.hidden\t".
+
+    /// ProtectedDirective - This directive, if non-null, is used to declare a
+    /// global or function as having protected visibility.
+    const char *ProtectedDirective;          // Defaults to "\t.protected\t".
+
+    //===--- Dwarf Emission Directives -----------------------------------===//
+
+    /// AbsoluteDebugSectionOffsets - True if we should emit abolute section
+    /// offsets for debug information.
+    bool AbsoluteDebugSectionOffsets;        // Defaults to false.
+
+    /// AbsoluteEHSectionOffsets - True if we should emit abolute section
+    /// offsets for EH information. Defaults to false.
+    bool AbsoluteEHSectionOffsets;
+
+    /// HasLEB128 - True if target asm supports leb128 directives.
+    bool HasLEB128;                          // Defaults to false.
+
+    /// hasDotLocAndDotFile - True if target asm supports .loc and .file
+    /// directives for emitting debugging information.
+    bool HasDotLocAndDotFile;                // Defaults to false.
+
+    /// SupportsDebugInformation - True if target supports emission of debugging
+    /// information.
+    bool SupportsDebugInformation;           // Defaults to false.
+
+    /// SupportsExceptionHandling - True if target supports exception handling.
+    ExceptionHandling::ExceptionsType ExceptionsType; // Defaults to None
+
+    /// RequiresFrameSection - true if the Dwarf2 output needs a frame section
+    bool DwarfRequiresFrameSection;          // Defaults to true.
+
+    /// DwarfUsesInlineInfoSection - True if DwarfDebugInlineSection is used to
+    /// encode inline subroutine information.
+    bool DwarfUsesInlineInfoSection;         // Defaults to false.
+
+    /// Is_EHSymbolPrivate - If set, the "_foo.eh" is made private so that it
+    /// doesn't show up in the symbol table of the object file.
+    bool Is_EHSymbolPrivate;                 // Defaults to true.
+
+    /// GlobalEHDirective - This is the directive used to make exception frame
+    /// tables globally visible.
+    const char *GlobalEHDirective;           // Defaults to NULL.
+
+    /// SupportsWeakEmptyEHFrame - True if target assembler and linker will
+    /// handle a weak_definition of constant 0 for an omitted EH frame.
+    bool SupportsWeakOmittedEHFrame;         // Defaults to true.
+
+    /// DwarfSectionOffsetDirective - Special section offset directive.
+    const char* DwarfSectionOffsetDirective; // Defaults to NULL
+    
+    //===--- CBE Asm Translation Table -----------------------------------===//
+
+    const char *const *AsmTransCBE;          // Defaults to empty
+
+  public:
+    explicit MCAsmInfo();
+    virtual ~MCAsmInfo();
+
+    /// getSLEB128Size - Compute the number of bytes required for a signed
+    /// leb128 value.
+    static unsigned getSLEB128Size(int Value);
+
+    /// getULEB128Size - Compute the number of bytes required for an unsigned
+    /// leb128 value.
+    static unsigned getULEB128Size(unsigned Value);
+
+    // Data directive accessors.
+    //
+    const char *getData8bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data8bitsDirective : getDataASDirective(8, AS);
+    }
+    const char *getData16bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data16bitsDirective : getDataASDirective(16, AS);
+    }
+    const char *getData32bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data32bitsDirective : getDataASDirective(32, AS);
+    }
+    const char *getData64bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data64bitsDirective : getDataASDirective(64, AS);
+    }
+
+    
+    bool usesSunStyleELFSectionSwitchSyntax() const {
+      return SunStyleELFSectionSwitchSyntax;
+    }
+    
+    bool usesELFSectionDirectiveForBSS() const {
+      return UsesELFSectionDirectiveForBSS;
+    }
+
+    // Accessors.
+    //
+    const char *getZeroFillDirective() const {
+      return ZeroFillDirective;
+    }
+    const char *getNonexecutableStackDirective() const {
+      return NonexecutableStackDirective;
+    }
+    bool needsSet() const {
+      return NeedsSet;
+    }
+    unsigned getMaxInstLength() const {
+      return MaxInstLength;
+    }
+    const char *getPCSymbol() const {
+      return PCSymbol;
+    }
+    char getSeparatorChar() const {
+      return SeparatorChar;
+    }
+    unsigned getCommentColumn() const {
+      return CommentColumn;
+    }
+    const char *getCommentString() const {
+      return CommentString;
+    }
+    const char *getGlobalPrefix() const {
+      return GlobalPrefix;
+    }
+    const char *getPrivateGlobalPrefix() const {
+      return PrivateGlobalPrefix;
+    }
+    const char *getLinkerPrivateGlobalPrefix() const {
+      return LinkerPrivateGlobalPrefix;
+    }
+    const char *getInlineAsmStart() const {
+      return InlineAsmStart;
+    }
+    const char *getInlineAsmEnd() const {
+      return InlineAsmEnd;
+    }
+    unsigned getAssemblerDialect() const {
+      return AssemblerDialect;
+    }
+    bool doesAllowQuotesInName() const {
+      return AllowQuotesInName;
+    }
+    bool doesAllowNameToStartWithDigit() const {
+      return AllowNameToStartWithDigit;
+    }
+    const char *getZeroDirective() const {
+      return ZeroDirective;
+    }
+    const char *getZeroDirectiveSuffix() const {
+      return ZeroDirectiveSuffix;
+    }
+    const char *getAsciiDirective() const {
+      return AsciiDirective;
+    }
+    const char *getAscizDirective() const {
+      return AscizDirective;
+    }
+    const char *getJumpTableDirective(bool isPIC) const {
+      return isPIC ? PICJumpTableDirective : JumpTableDirective;
+    }
+    const char *getAlignDirective() const {
+      return AlignDirective;
+    }
+    bool getAlignmentIsInBytes() const {
+      return AlignmentIsInBytes;
+    }
+    unsigned getTextAlignFillValue() const {
+      return TextAlignFillValue;
+    }
+    const char *getGlobalDirective() const {
+      return GlobalDirective;
+    }
+    const char *getExternDirective() const {
+      return ExternDirective;
+    }
+    const char *getSetDirective() const {
+      return SetDirective;
+    }
+    const char *getLCOMMDirective() const {
+      return LCOMMDirective;
+    }
+    const char *getCOMMDirective() const {
+      return COMMDirective;
+    }
+    bool getCOMMDirectiveTakesAlignment() const {
+      return COMMDirectiveTakesAlignment;
+    }
+    bool hasDotTypeDotSizeDirective() const {
+      return HasDotTypeDotSizeDirective;
+    }
+    bool hasSingleParameterDotFile() const {
+      return HasSingleParameterDotFile;
+    }
+    const char *getUsedDirective() const {
+      return UsedDirective;
+    }
+    const char *getWeakRefDirective() const {
+      return WeakRefDirective;
+    }
+    const char *getWeakDefDirective() const {
+      return WeakDefDirective;
+    }
+    const char *getHiddenDirective() const {
+      return HiddenDirective;
+    }
+    const char *getProtectedDirective() const {
+      return ProtectedDirective;
+    }
+    bool isAbsoluteDebugSectionOffsets() const {
+      return AbsoluteDebugSectionOffsets;
+    }
+    bool isAbsoluteEHSectionOffsets() const {
+      return AbsoluteEHSectionOffsets;
+    }
+    bool hasLEB128() const {
+      return HasLEB128;
+    }
+    bool hasDotLocAndDotFile() const {
+      return HasDotLocAndDotFile;
+    }
+    bool doesSupportDebugInformation() const {
+      return SupportsDebugInformation;
+    }
+    bool doesSupportExceptionHandling() const {
+      return ExceptionsType != ExceptionHandling::None;
+    }
+    ExceptionHandling::ExceptionsType getExceptionHandlingType() const {
+      return ExceptionsType;
+    }
+    bool doesDwarfRequireFrameSection() const {
+      return DwarfRequiresFrameSection;
+    }
+    bool doesDwarfUsesInlineInfoSection() const {
+      return DwarfUsesInlineInfoSection;
+    }
+    bool is_EHSymbolPrivate() const {
+      return Is_EHSymbolPrivate;
+    }
+    const char *getGlobalEHDirective() const {
+      return GlobalEHDirective;
+    }
+    bool getSupportsWeakOmittedEHFrame() const {
+      return SupportsWeakOmittedEHFrame;
+    }
+    const char *getDwarfSectionOffsetDirective() const {
+      return DwarfSectionOffsetDirective;
+    }
+    const char *const *getAsmCBE() const {
+      return AsmTransCBE;
+    }
+  };
+}
+
+#endif
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
new file mode 100644
index 000000000000..a3ee1593c3ac
--- /dev/null
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -0,0 +1,24 @@
+//===-- MCAsmInfoCOFF.h - COFF asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_COFF_TARGET_ASM_INFO_H
+#define LLVM_COFF_TARGET_ASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class MCAsmInfoCOFF : public MCAsmInfo {
+  protected:
+    explicit MCAsmInfoCOFF();
+      
+  };
+}
+
+
+#endif // LLVM_COFF_TARGET_ASM_INFO_H
diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h
new file mode 100644
index 000000000000..c85aa3da9572
--- /dev/null
+++ b/include/llvm/MC/MCAsmInfoDarwin.h
@@ -0,0 +1,32 @@
+//===---- MCAsmInfoDarwin.h - Darwin asm properties -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DARWIN_TARGET_ASM_INFO_H
+#define LLVM_DARWIN_TARGET_ASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class GlobalValue;
+  class GlobalVariable;
+  class Type;
+  class Mangler;
+
+  struct MCAsmInfoDarwin : public MCAsmInfo {
+    explicit MCAsmInfoDarwin();
+  };
+}
+
+
+#endif // LLVM_DARWIN_TARGET_ASM_INFO_H
diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h
new file mode 100644
index 000000000000..e66425abef6a
--- /dev/null
+++ b/include/llvm/MC/MCAsmLexer.h
@@ -0,0 +1,141 @@
+//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMLEXER_H
+#define LLVM_MC_MCASMLEXER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmLexer;
+class MCInst;
+class SMLoc;
+class Target;
+
+/// AsmToken - Target independent representation for an assembler token.
+struct AsmToken {
+  enum TokenKind {
+    // Markers
+    Eof, Error,
+
+    // String values.
+    Identifier,
+    String,
+    
+    // Integer values.
+    Integer,
+    
+    // No-value.
+    EndOfStatement,
+    Colon,
+    Plus, Minus, Tilde,
+    Slash,    // '/'
+    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
+    Star, Comma, Dollar, Equal, EqualEqual,
+    
+    Pipe, PipePipe, Caret, 
+    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
+    Less, LessEqual, LessLess, LessGreater,
+    Greater, GreaterEqual, GreaterGreater
+  };
+
+  TokenKind Kind;
+
+  /// A reference to the entire token contents; this is always a pointer into
+  /// a memory buffer owned by the source manager.
+  StringRef Str;
+
+  int64_t IntVal;
+
+public:
+  AsmToken() {}
+  AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0)
+    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
+
+  TokenKind getKind() const { return Kind; }
+  bool is(TokenKind K) const { return Kind == K; }
+  bool isNot(TokenKind K) const { return Kind != K; }
+
+  SMLoc getLoc() const;
+
+  /// getStringContents - Get the contents of a string token (without quotes).
+  StringRef getStringContents() const { 
+    assert(Kind == String && "This token isn't a string!");
+    return Str.slice(1, Str.size() - 1);
+  }
+
+  /// getIdentifier - Get the identifier string for the current token, which
+  /// should be an identifier or a string. This gets the portion of the string
+  /// which should be used as the identifier, e.g., it does not include the
+  /// quotes on strings.
+  StringRef getIdentifier() const {
+    if (Kind == Identifier)
+      return getString();
+    return getStringContents();
+  }
+
+  /// getString - Get the string for the current token, this includes all
+  /// characters (for example, the quotes on strings) in the token.
+  ///
+  /// The returned StringRef points into the source manager's memory buffer, and
+  /// is safe to store across calls to Lex().
+  StringRef getString() const { return Str; }
+
+  // FIXME: Don't compute this in advance, it makes every token larger, and is
+  // also not generally what we want (it is nicer for recovery etc. to lex 123br
+  // as a single token, then diagnose as an invalid number).
+  int64_t getIntVal() const { 
+    assert(Kind == Integer && "This token isn't an integer!");
+    return IntVal; 
+  }
+};
+
+/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
+/// assembly lexers.
+class MCAsmLexer {
+  /// The current token, stored in the base class for faster access.
+  AsmToken CurTok;
+
+  MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
+  void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  MCAsmLexer();
+
+  virtual AsmToken LexToken() = 0;
+
+public:
+  virtual ~MCAsmLexer();
+
+  /// Lex - Consume the next token from the input stream and return it.
+  ///
+  /// The lexer will continuosly return the end-of-file token once the end of
+  /// the main input file has been reached.
+  const AsmToken &Lex() {
+    return CurTok = LexToken();
+  }
+
+  /// getTok - Get the current (last) lexed token.
+  const AsmToken &getTok() {
+    return CurTok;
+  }
+
+  /// getKind - Get the kind of current token.
+  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
+
+  /// is - Check if the current token has kind \arg K.
+  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
+
+  /// isNot - Check if the current token has kind \arg K.
+  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCAsmParser.h b/include/llvm/MC/MCAsmParser.h
new file mode 100644
index 000000000000..c1b5d133cdea
--- /dev/null
+++ b/include/llvm/MC/MCAsmParser.h
@@ -0,0 +1,79 @@
+//===-- llvm/MC/MCAsmParser.h - Abstract Asm Parser Interface ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMPARSER_H
+#define LLVM_MC_MCASMPARSER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmLexer;
+class MCContext;
+class MCExpr;
+class MCStreamer;
+class MCValue;
+class SMLoc;
+class Twine;
+
+/// MCAsmParser - Generic assembler parser interface, for use by target specific
+/// assembly parsers.
+class MCAsmParser {
+  MCAsmParser(const MCAsmParser &);   // DO NOT IMPLEMENT
+  void operator=(const MCAsmParser &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  MCAsmParser();
+ 
+public:
+  virtual ~MCAsmParser();
+
+  virtual MCAsmLexer &getLexer() = 0;
+
+  virtual MCContext &getContext() = 0;
+
+  /// getSteamer - Return the output streamer for the assembler.
+  virtual MCStreamer &getStreamer() = 0;
+
+  /// Warning - Emit a warning at the location \arg L, with the message \arg
+  /// Msg.
+  virtual void Warning(SMLoc L, const Twine &Msg) = 0;
+
+  /// Warning - Emit an error at the location \arg L, with the message \arg
+  /// Msg.
+  ///
+  /// \return The return value is always true, as an idiomatic convenience to
+  /// clients.
+  virtual bool Error(SMLoc L, const Twine &Msg) = 0;
+
+  /// ParseExpression - Parse an arbitrary expression.
+  ///
+  /// @param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool ParseExpression(const MCExpr *&Res) = 0;
+
+  /// ParseParenExpression - Parse an arbitrary expression, assuming that an
+  /// initial '(' has already been consumed.
+  ///
+  /// @param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool ParseParenExpression(const MCExpr *&Res) = 0;
+
+  /// ParseAbsoluteExpression - Parse an expression which must evaluate to an
+  /// absolute value.
+  ///
+  /// @param Res - The value of the absolute expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool ParseAbsoluteExpression(int64_t &Res) = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
new file mode 100644
index 000000000000..892f54839db3
--- /dev/null
+++ b/include/llvm/MC/MCAssembler.h
@@ -0,0 +1,661 @@
+//===- MCAssembler.h - Object File Generation -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASSEMBLER_H
+#define LLVM_MC_MCASSEMBLER_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataTypes.h"
+#include <vector> // FIXME: Shouldn't be needed.
+
+namespace llvm {
+class raw_ostream;
+class MCAssembler;
+class MCContext;
+class MCSection;
+class MCSectionData;
+
+class MCFragment : public ilist_node<MCFragment> {
+  MCFragment(const MCFragment&);     // DO NOT IMPLEMENT
+  void operator=(const MCFragment&); // DO NOT IMPLEMENT
+
+public:
+  enum FragmentType {
+    FT_Data,
+    FT_Align,
+    FT_Fill,
+    FT_Org,
+    FT_ZeroFill
+  };
+
+private:
+  FragmentType Kind;
+
+  /// Parent - The data for the section this fragment is in.
+  MCSectionData *Parent;
+
+  /// @name Assembler Backend Data
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  /// Offset - The offset of this fragment in its section. This is ~0 until
+  /// initialized.
+  uint64_t Offset;
+
+  /// FileSize - The file size of this section. This is ~0 until initialized.
+  uint64_t FileSize;
+
+  /// @}
+
+protected:
+  MCFragment(FragmentType _Kind, MCSectionData *_Parent = 0);
+
+public:
+  // Only for sentinel.
+  MCFragment();
+  virtual ~MCFragment();
+
+  FragmentType getKind() const { return Kind; }
+
+  MCSectionData *getParent() const { return Parent; }
+  void setParent(MCSectionData *Value) { Parent = Value; }
+
+  // FIXME: This should be abstract, fix sentinel.
+  virtual uint64_t getMaxFileSize() const {
+    assert(0 && "Invalid getMaxFileSize call!");
+    return 0;
+  };
+
+  /// @name Assembler Backend Support
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  uint64_t getAddress() const;
+
+  uint64_t getFileSize() const { 
+    assert(FileSize != ~UINT64_C(0) && "File size not set!");
+    return FileSize;
+  }
+  void setFileSize(uint64_t Value) {
+    assert(Value <= getMaxFileSize() && "Invalid file size!");
+    FileSize = Value;
+  }
+
+  uint64_t getOffset() const {
+    assert(Offset != ~UINT64_C(0) && "File offset not set!");
+    return Offset;
+  }
+  void setOffset(uint64_t Value) { Offset = Value; }
+
+  /// @}
+
+  static bool classof(const MCFragment *O) { return true; }
+};
+
+class MCDataFragment : public MCFragment {
+  SmallString<32> Contents;
+
+public:
+  MCDataFragment(MCSectionData *SD = 0) : MCFragment(FT_Data, SD) {}
+
+  /// @name Accessors
+  /// @{
+
+  uint64_t getMaxFileSize() const {
+    return Contents.size();
+  }
+
+  SmallString<32> &getContents() { return Contents; }
+  const SmallString<32> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) { 
+    return F->getKind() == MCFragment::FT_Data; 
+  }
+  static bool classof(const MCDataFragment *) { return true; }
+};
+
+class MCAlignFragment : public MCFragment {
+  /// Alignment - The alignment to ensure, in bytes.
+  unsigned Alignment;
+
+  /// Value - Value to use for filling padding bytes.
+  int64_t Value;
+
+  /// ValueSize - The size of the integer (in bytes) of \arg Value.
+  unsigned ValueSize;
+
+  /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
+  /// cannot be satisfied in this width then this fragment is ignored.
+  unsigned MaxBytesToEmit;
+
+public:
+  MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize,
+                  unsigned _MaxBytesToEmit, MCSectionData *SD = 0)
+    : MCFragment(FT_Align, SD), Alignment(_Alignment),
+      Value(_Value),ValueSize(_ValueSize),
+      MaxBytesToEmit(_MaxBytesToEmit) {}
+
+  /// @name Accessors
+  /// @{
+
+  uint64_t getMaxFileSize() const {
+    return std::max(Alignment - 1, MaxBytesToEmit);
+  }
+
+  unsigned getAlignment() const { return Alignment; }
+  
+  int64_t getValue() const { return Value; }
+
+  unsigned getValueSize() const { return ValueSize; }
+
+  unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) { 
+    return F->getKind() == MCFragment::FT_Align; 
+  }
+  static bool classof(const MCAlignFragment *) { return true; }
+};
+
+class MCFillFragment : public MCFragment {
+  /// Value - Value to use for filling bytes.
+  MCValue Value;
+
+  /// ValueSize - The size (in bytes) of \arg Value to use when filling.
+  unsigned ValueSize;
+
+  /// Count - The number of copies of \arg Value to insert.
+  uint64_t Count;
+
+public:
+  MCFillFragment(MCValue _Value, unsigned _ValueSize, uint64_t _Count,
+                 MCSectionData *SD = 0) 
+    : MCFragment(FT_Fill, SD),
+      Value(_Value), ValueSize(_ValueSize), Count(_Count) {}
+
+  /// @name Accessors
+  /// @{
+
+  uint64_t getMaxFileSize() const {
+    return ValueSize * Count;
+  }
+
+  MCValue getValue() const { return Value; }
+  
+  unsigned getValueSize() const { return ValueSize; }
+
+  uint64_t getCount() const { return Count; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) { 
+    return F->getKind() == MCFragment::FT_Fill; 
+  }
+  static bool classof(const MCFillFragment *) { return true; }
+};
+
+class MCOrgFragment : public MCFragment {
+  /// Offset - The offset this fragment should start at.
+  MCValue Offset;
+
+  /// Value - Value to use for filling bytes.  
+  int8_t Value;
+
+public:
+  MCOrgFragment(MCValue _Offset, int8_t _Value, MCSectionData *SD = 0)
+    : MCFragment(FT_Org, SD),
+      Offset(_Offset), Value(_Value) {}
+
+  /// @name Accessors
+  /// @{
+
+  uint64_t getMaxFileSize() const {
+    // FIXME: This doesn't make much sense.
+    return ~UINT64_C(0);
+  }
+
+  MCValue getOffset() const { return Offset; }
+  
+  uint8_t getValue() const { return Value; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) { 
+    return F->getKind() == MCFragment::FT_Org; 
+  }
+  static bool classof(const MCOrgFragment *) { return true; }
+};
+
+/// MCZeroFillFragment - Represent data which has a fixed size and alignment,
+/// but requires no physical space in the object file.
+class MCZeroFillFragment : public MCFragment {
+  /// Size - The size of this fragment.
+  uint64_t Size;
+
+  /// Alignment - The alignment for this fragment.
+  unsigned Alignment;
+
+public:
+  MCZeroFillFragment(uint64_t _Size, unsigned _Alignment, MCSectionData *SD = 0)
+    : MCFragment(FT_ZeroFill, SD),
+      Size(_Size), Alignment(_Alignment) {}
+
+  /// @name Accessors
+  /// @{
+
+  uint64_t getMaxFileSize() const {
+    // FIXME: This also doesn't make much sense, this method is misnamed.
+    return ~UINT64_C(0);
+  }
+
+  uint64_t getSize() const { return Size; }
+  
+  unsigned getAlignment() const { return Alignment; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) { 
+    return F->getKind() == MCFragment::FT_ZeroFill; 
+  }
+  static bool classof(const MCZeroFillFragment *) { return true; }
+};
+
+// FIXME: Should this be a separate class, or just merged into MCSection? Since
+// we anticipate the fast path being through an MCAssembler, the only reason to
+// keep it out is for API abstraction.
+class MCSectionData : public ilist_node<MCSectionData> {
+  MCSectionData(const MCSectionData&);  // DO NOT IMPLEMENT
+  void operator=(const MCSectionData&); // DO NOT IMPLEMENT
+
+public:
+  /// Fixup - Represent a fixed size region of bytes inside some fragment which
+  /// needs to be rewritten. This region will either be rewritten by the
+  /// assembler or cause a relocation entry to be generated.
+  struct Fixup {
+    /// Fragment - The fragment containing the fixup.
+    MCFragment *Fragment;
+    
+    /// Offset - The offset inside the fragment which needs to be rewritten.
+    uint64_t Offset;
+
+    /// Value - The expression to eventually write into the fragment.
+    //
+    // FIXME: We could probably get away with requiring the client to pass in an
+    // owned reference whose lifetime extends past that of the fixup.
+    MCValue Value;
+
+    /// Size - The fixup size.
+    unsigned Size;
+
+    /// FixedValue - The value to replace the fix up by.
+    //
+    // FIXME: This should not be here.
+    uint64_t FixedValue;
+
+  public:
+    Fixup(MCFragment &_Fragment, uint64_t _Offset, const MCValue &_Value, 
+          unsigned _Size) 
+      : Fragment(&_Fragment), Offset(_Offset), Value(_Value), Size(_Size),
+        FixedValue(0) {}
+  };
+
+  typedef iplist<MCFragment> FragmentListType;
+
+  typedef FragmentListType::const_iterator const_iterator;
+  typedef FragmentListType::iterator iterator;
+
+  typedef std::vector<Fixup>::const_iterator const_fixup_iterator;
+  typedef std::vector<Fixup>::iterator fixup_iterator;
+
+private:
+  iplist<MCFragment> Fragments;
+  const MCSection *Section;
+
+  /// Alignment - The maximum alignment seen in this section.
+  unsigned Alignment;
+
+  /// @name Assembler Backend Data
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  /// Address - The computed address of this section. This is ~0 until
+  /// initialized.
+  uint64_t Address;
+
+  /// Size - The content size of this section. This is ~0 until initialized.
+  uint64_t Size;
+
+  /// FileSize - The size of this section in the object file. This is ~0 until
+  /// initialized.
+  uint64_t FileSize;
+
+  /// LastFixupLookup - Cache for the last looked up fixup.
+  mutable unsigned LastFixupLookup;
+
+  /// Fixups - The list of fixups in this section.
+  std::vector<Fixup> Fixups;
+  
+  /// @}
+
+public:    
+  // Only for use as sentinel.
+  MCSectionData();
+  MCSectionData(const MCSection &Section, MCAssembler *A = 0);
+
+  const MCSection &getSection() const { return *Section; }
+
+  unsigned getAlignment() const { return Alignment; }
+  void setAlignment(unsigned Value) { Alignment = Value; }
+
+  /// @name Fragment Access
+  /// @{
+
+  const FragmentListType &getFragmentList() const { return Fragments; }
+  FragmentListType &getFragmentList() { return Fragments; }
+
+  iterator begin() { return Fragments.begin(); }
+  const_iterator begin() const { return Fragments.begin(); }
+
+  iterator end() { return Fragments.end(); }
+  const_iterator end() const { return Fragments.end(); }
+
+  size_t size() const { return Fragments.size(); }
+
+  bool empty() const { return Fragments.empty(); }
+
+  /// @}
+  /// @name Fixup Access
+  /// @{
+
+  std::vector<Fixup> &getFixups() {
+    return Fixups;
+  }
+
+  fixup_iterator fixup_begin() {
+    return Fixups.begin();
+  }
+
+  fixup_iterator fixup_end() {
+    return Fixups.end();
+  }
+
+  size_t fixup_size() const { return Fixups.size(); }
+
+  /// @}
+  /// @name Assembler Backend Support
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  /// LookupFixup - Look up the fixup for the given \arg Fragment and \arg
+  /// Offset.
+  ///
+  /// If multiple fixups exist for the same fragment and offset it is undefined
+  /// which one is returned.
+  //
+  // FIXME: This isn't horribly slow in practice, but there are much nicer
+  // solutions to applying the fixups.
+  const Fixup *LookupFixup(const MCFragment *Fragment, uint64_t Offset) const;
+
+  uint64_t getAddress() const { 
+    assert(Address != ~UINT64_C(0) && "Address not set!");
+    return Address;
+  }
+  void setAddress(uint64_t Value) { Address = Value; }
+
+  uint64_t getSize() const { 
+    assert(Size != ~UINT64_C(0) && "File size not set!");
+    return Size;
+  }
+  void setSize(uint64_t Value) { Size = Value; }
+
+  uint64_t getFileSize() const { 
+    assert(FileSize != ~UINT64_C(0) && "File size not set!");
+    return FileSize;
+  }
+  void setFileSize(uint64_t Value) { FileSize = Value; }  
+
+  /// @}
+};
+
+// FIXME: Same concerns as with SectionData.
+class MCSymbolData : public ilist_node<MCSymbolData> {
+public:
+  const MCSymbol *Symbol;
+
+  /// Fragment - The fragment this symbol's value is relative to, if any.
+  MCFragment *Fragment;
+
+  /// Offset - The offset to apply to the fragment address to form this symbol's
+  /// value.
+  uint64_t Offset;
+    
+  /// IsExternal - True if this symbol is visible outside this translation
+  /// unit.
+  unsigned IsExternal : 1;
+
+  /// IsPrivateExtern - True if this symbol is private extern.
+  unsigned IsPrivateExtern : 1;
+
+  /// CommonSize - The size of the symbol, if it is 'common', or 0.
+  //
+  // FIXME: Pack this in with other fields? We could put it in offset, since a
+  // common symbol can never get a definition.
+  uint64_t CommonSize;
+
+  /// CommonAlign - The alignment of the symbol, if it is 'common'.
+  //
+  // FIXME: Pack this in with other fields?
+  unsigned CommonAlign;
+
+  /// Flags - The Flags field is used by object file implementations to store
+  /// additional per symbol information which is not easily classified.
+  uint32_t Flags;
+
+  /// Index - Index field, for use by the object file implementation.
+  uint64_t Index;
+
+public:
+  // Only for use as sentinel.
+  MCSymbolData();
+  MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset,
+               MCAssembler *A = 0);
+
+  /// @name Accessors
+  /// @{
+
+  const MCSymbol &getSymbol() const { return *Symbol; }
+
+  MCFragment *getFragment() const { return Fragment; }
+  void setFragment(MCFragment *Value) { Fragment = Value; }
+
+  uint64_t getOffset() const { return Offset; }
+  void setOffset(uint64_t Value) { Offset = Value; }
+
+  /// @}
+  /// @name Symbol Attributes
+  /// @{
+  
+  bool isExternal() const { return IsExternal; }
+  void setExternal(bool Value) { IsExternal = Value; }
+  
+  bool isPrivateExtern() const { return IsPrivateExtern; }
+  void setPrivateExtern(bool Value) { IsPrivateExtern = Value; }
+
+  /// isCommon - Is this a 'common' symbol.
+  bool isCommon() const { return CommonSize != 0; }
+
+  /// setCommon - Mark this symbol as being 'common'.
+  ///
+  /// \param Size - The size of the symbol.
+  /// \param Align - The alignment of the symbol.
+  void setCommon(uint64_t Size, unsigned Align) {
+    CommonSize = Size;
+    CommonAlign = Align;
+  }
+
+  /// getCommonSize - Return the size of a 'common' symbol.
+  uint64_t getCommonSize() const {
+    assert(isCommon() && "Not a 'common' symbol!");
+    return CommonSize;
+  }
+
+  /// getCommonAlignment - Return the alignment of a 'common' symbol.
+  unsigned getCommonAlignment() const {
+    assert(isCommon() && "Not a 'common' symbol!");
+    return CommonAlign;
+  }
+
+  /// getFlags - Get the (implementation defined) symbol flags.
+  uint32_t getFlags() const { return Flags; }
+
+  /// setFlags - Set the (implementation defined) symbol flags.
+  void setFlags(uint32_t Value) { Flags = Value; }
+  
+  /// getIndex - Get the (implementation defined) index.
+  uint64_t getIndex() const { return Index; }
+
+  /// setIndex - Set the (implementation defined) index.
+  void setIndex(uint64_t Value) { Index = Value; }
+  
+  /// @}  
+};
+
+// FIXME: This really doesn't belong here. See comments below.
+struct IndirectSymbolData {
+  MCSymbol *Symbol;
+  MCSectionData *SectionData;
+};
+
+class MCAssembler {
+public:
+  typedef iplist<MCSectionData> SectionDataListType;
+  typedef iplist<MCSymbolData> SymbolDataListType;
+
+  typedef SectionDataListType::const_iterator const_iterator;
+  typedef SectionDataListType::iterator iterator;
+
+  typedef SymbolDataListType::const_iterator const_symbol_iterator;
+  typedef SymbolDataListType::iterator symbol_iterator;
+
+  typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;
+
+private:
+  MCAssembler(const MCAssembler&);    // DO NOT IMPLEMENT
+  void operator=(const MCAssembler&); // DO NOT IMPLEMENT
+
+  MCContext &Context;
+
+  raw_ostream &OS;
+  
+  iplist<MCSectionData> Sections;
+
+  iplist<MCSymbolData> Symbols;
+
+  std::vector<IndirectSymbolData> IndirectSymbols;
+
+  unsigned SubsectionsViaSymbols : 1;
+
+private:
+  /// LayoutSection - Assign offsets and sizes to the fragments in the section
+  /// \arg SD, and update the section size. The section file offset should
+  /// already have been computed.
+  void LayoutSection(MCSectionData &SD);
+
+public:
+  /// Construct a new assembler instance.
+  ///
+  /// \arg OS - The stream to output to.
+  //
+  // FIXME: How are we going to parameterize this? Two obvious options are stay
+  // concrete and require clients to pass in a target like object. The other
+  // option is to make this abstract, and have targets provide concrete
+  // implementations as we do with AsmParser.
+  MCAssembler(MCContext &_Context, raw_ostream &OS);
+  ~MCAssembler();
+
+  MCContext &getContext() const { return Context; }
+
+  /// Finish - Do final processing and write the object to the output stream.
+  void Finish();
+
+  // FIXME: This does not belong here.
+  bool getSubsectionsViaSymbols() const {
+    return SubsectionsViaSymbols;
+  }
+  void setSubsectionsViaSymbols(bool Value) {
+    SubsectionsViaSymbols = Value;
+  }
+
+  /// @name Section List Access
+  /// @{
+
+  const SectionDataListType &getSectionList() const { return Sections; }
+  SectionDataListType &getSectionList() { return Sections; }  
+
+  iterator begin() { return Sections.begin(); }
+  const_iterator begin() const { return Sections.begin(); }
+
+  iterator end() { return Sections.end(); }
+  const_iterator end() const { return Sections.end(); }
+
+  size_t size() const { return Sections.size(); }
+
+  /// @}
+  /// @name Symbol List Access
+  /// @{
+
+  const SymbolDataListType &getSymbolList() const { return Symbols; }
+  SymbolDataListType &getSymbolList() { return Symbols; }
+
+  symbol_iterator symbol_begin() { return Symbols.begin(); }
+  const_symbol_iterator symbol_begin() const { return Symbols.begin(); }
+
+  symbol_iterator symbol_end() { return Symbols.end(); }
+  const_symbol_iterator symbol_end() const { return Symbols.end(); }
+
+  size_t symbol_size() const { return Symbols.size(); }
+
+  /// @}
+  /// @name Indirect Symbol List Access
+  /// @{
+
+  // FIXME: This is a total hack, this should not be here. Once things are
+  // factored so that the streamer has direct access to the .o writer, it can
+  // disappear.
+  std::vector<IndirectSymbolData> &getIndirectSymbols() {
+    return IndirectSymbols;
+  }
+
+  indirect_symbol_iterator indirect_symbol_begin() {
+    return IndirectSymbols.begin();
+  }
+
+  indirect_symbol_iterator indirect_symbol_end() {
+    return IndirectSymbols.end();
+  }
+
+  size_t indirect_symbol_size() const { return IndirectSymbols.size(); }
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
new file mode 100644
index 000000000000..ad42dc2e5b46
--- /dev/null
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -0,0 +1,34 @@
+//===-- llvm/MC/MCCodeEmitter.h - Instruction Encoding ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCCODEEMITTER_H
+#define LLVM_MC_MCCODEEMITTER_H
+
+namespace llvm {
+class MCInst;
+class raw_ostream;
+
+/// MCCodeEmitter - Generic instruction encoding interface.
+class MCCodeEmitter {
+  MCCodeEmitter(const MCCodeEmitter &);   // DO NOT IMPLEMENT
+  void operator=(const MCCodeEmitter &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  MCCodeEmitter();
+ 
+public:
+  virtual ~MCCodeEmitter();
+
+  /// EncodeInstruction - Encode the given \arg Inst to bytes on the output
+  /// stream \arg OS.
+  virtual void EncodeInstruction(const MCInst &Inst, raw_ostream &OS) const = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 846e195139de..955aa8b08388 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -18,8 +18,11 @@ namespace llvm {
   class MCValue;
   class MCSection;
   class MCSymbol;
+  class StringRef;
 
-  /// MCContext - Context object for machine code objects.
+  /// MCContext - Context object for machine code objects.  This class owns all
+  /// of the sections that it creates.
+  ///
   class MCContext {
     MCContext(const MCContext&); // DO NOT IMPLEMENT
     MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT
@@ -33,32 +36,33 @@ namespace llvm {
     /// SymbolValues - Bindings of symbols to values.
     //
     // FIXME: Is there a good reason to not just put this in the MCSymbol?
-    DenseMap<MCSymbol*, MCValue> SymbolValues;
+    DenseMap<const MCSymbol*, MCValue> SymbolValues;
 
     /// Allocator - Allocator object used for creating machine code objects.
     ///
     /// We use a bump pointer allocator to avoid the need to track all allocated
     /// objects.
     BumpPtrAllocator Allocator;
-
   public:
     MCContext();
     ~MCContext();
 
-    /// GetSection - Get or create a new section with the given @param Name.
-    MCSection *GetSection(const char *Name);
-    
+    /// @name Symbol Managment
+    /// @{
+
     /// CreateSymbol - Create a new symbol with the specified @param Name.
     ///
     /// @param Name - The symbol name, which must be unique across all symbols.
-    MCSymbol *CreateSymbol(const char *Name);
+    MCSymbol *CreateSymbol(const StringRef &Name);
 
     /// GetOrCreateSymbol - Lookup the symbol inside with the specified
     /// @param Name.  If it exists, return it.  If not, create a forward
     /// reference and return it.
     ///
     /// @param Name - The symbol name, which must be unique across all symbols.
-    MCSymbol *GetOrCreateSymbol(const char *Name);
+    /// @param IsTemporary - Whether this symbol is an assembler temporary,
+    /// which should not survive into the symbol table for the translation unit.
+    MCSymbol *GetOrCreateSymbol(const StringRef &Name);
     
     /// CreateTemporarySymbol - Create a new temporary symbol with the specified
     /// @param Name.
@@ -66,22 +70,26 @@ namespace llvm {
     /// @param Name - The symbol name, for debugging purposes only, temporary
     /// symbols do not surive assembly. If non-empty the name must be unique
     /// across all symbols.
-    MCSymbol *CreateTemporarySymbol(const char *Name = "");
+    MCSymbol *CreateTemporarySymbol(const StringRef &Name = "");
 
     /// LookupSymbol - Get the symbol for @param Name, or null.
-    MCSymbol *LookupSymbol(const char *Name) const;
+    MCSymbol *LookupSymbol(const StringRef &Name) const;
 
-    /// ClearSymbolValue - Erase a value binding for @param Symbol, if one
-    /// exists.
-    void ClearSymbolValue(MCSymbol *Symbol);
+    /// @}
+    /// @name Symbol Value Table
+    /// @{
 
-    /// SetSymbolValue - Set the value binding for @param Symbol to @param
-    /// Value.
-    void SetSymbolValue(MCSymbol *Symbol, const MCValue &Value);
+    /// ClearSymbolValue - Erase a value binding for @arg Symbol, if one exists.
+    void ClearSymbolValue(const MCSymbol *Symbol);
 
-    /// GetSymbolValue - Return the current value for @param Symbol, or null if
+    /// SetSymbolValue - Set the value binding for @arg Symbol to @arg Value.
+    void SetSymbolValue(const MCSymbol *Symbol, const MCValue &Value);
+
+    /// GetSymbolValue - Return the current value for @arg Symbol, or null if
     /// none exists.
-    const MCValue *GetSymbolValue(MCSymbol *Symbol) const;
+    const MCValue *GetSymbolValue(const MCSymbol *Symbol) const;
+
+    /// @}
 
     void *Allocate(unsigned Size, unsigned Align = 8) {
       return Allocator.Allocate(Size, Align);
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
new file mode 100644
index 000000000000..ef10b8035936
--- /dev/null
+++ b/include/llvm/MC/MCDisassembler.h
@@ -0,0 +1,50 @@
+//===-- llvm/MC/MCDisassembler.h - Disassembler interface -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MCDISASSEMBLER_H
+#define MCDISASSEMBLER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+/// MCDisassembler - Superclass for all disassemblers.  Consumes a memory region
+///   and provides an array of assembly instructions.
+class MCDisassembler {
+public:
+  /// Constructor     - Performs initial setup for the disassembler.
+  MCDisassembler() {}
+  
+  virtual ~MCDisassembler();
+  
+  /// getInstruction  - Returns the disassembly of a single instruction.
+  ///
+  /// @param instr    - An MCInst to populate with the contents of the 
+  ///                   instruction.
+  /// @param size     - A value to populate with the size of the instruction, or
+  ///                   the number of bytes consumed while attempting to decode
+  ///                   an invalid instruction.
+  /// @param region   - The memory object to use as a source for machine code.
+  /// @param address  - The address, in the memory space of region, of the first
+  ///                   byte of the instruction.
+  /// @param vStream  - The stream to print warnings and diagnostic messages on.
+  /// @return         - True if the instruction is valid; false otherwise.
+  virtual bool          getInstruction(MCInst& instr,
+                                       uint64_t& size,
+                                       const MemoryObject &region,
+                                       uint64_t address,
+                                       raw_ostream &vStream) const = 0;
+};  
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
new file mode 100644
index 000000000000..19a32e7added
--- /dev/null
+++ b/include/llvm/MC/MCExpr.h
@@ -0,0 +1,328 @@
+//===- MCExpr.h - Assembly Level Expressions --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCEXPR_H
+#define LLVM_MC_MCEXPR_H
+
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmInfo;
+class MCContext;
+class MCSymbol;
+class MCValue;
+class raw_ostream;
+class StringRef;
+
+/// MCExpr - Base class for the full range of assembler expressions which are
+/// needed for parsing.
+class MCExpr {
+public:
+  enum ExprKind {
+    Binary,    ///< Binary expressions.
+    Constant,  ///< Constant expressions.
+    SymbolRef, ///< References to labels and assigned expressions.
+    Unary      ///< Unary expressions.
+  };
+
+private:
+  ExprKind Kind;
+
+  MCExpr(const MCExpr&); // DO NOT IMPLEMENT
+  void operator=(const MCExpr&); // DO NOT IMPLEMENT
+
+protected:
+  MCExpr(ExprKind _Kind) : Kind(_Kind) {}
+
+public:
+  /// @name Accessors
+  /// @{
+
+  ExprKind getKind() const { return Kind; }
+
+  /// @}
+  /// @name Utility Methods
+  /// @{
+
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+  void dump() const;
+
+  /// @}
+  /// @name Expression Evaluation
+  /// @{
+
+  /// EvaluateAsAbsolute - Try to evaluate the expression to an absolute value.
+  ///
+  /// @param Res - The absolute value, if evaluation succeeds.
+  /// @result - True on success.
+  bool EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const;
+
+  /// EvaluateAsRelocatable - Try to evaluate the expression to a relocatable
+  /// value, i.e. an expression of the fixed form (a - b + constant).
+  ///
+  /// @param Res - The relocatable value, if evaluation succeeds.
+  /// @result - True on success.
+  bool EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const;
+
+  /// @}
+
+  static bool classof(const MCExpr *) { return true; }
+};
+
+//// MCConstantExpr - Represent a constant integer expression.
+class MCConstantExpr : public MCExpr {
+  int64_t Value;
+
+  MCConstantExpr(int64_t _Value)
+    : MCExpr(MCExpr::Constant), Value(_Value) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCConstantExpr *Create(int64_t Value, MCContext &Ctx);
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  int64_t getValue() const { return Value; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Constant;
+  }
+  static bool classof(const MCConstantExpr *) { return true; }
+};
+
+/// MCSymbolRefExpr - Represent a reference to a symbol from inside an
+/// expression.
+///
+/// A symbol reference in an expression may be a use of a label, a use of an
+/// assembler variable (defined constant), or constitute an implicit definition
+/// of the symbol as external.
+class MCSymbolRefExpr : public MCExpr {
+  const MCSymbol *Symbol;
+
+  MCSymbolRefExpr(const MCSymbol *_Symbol)
+    : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, MCContext &Ctx);
+  static const MCSymbolRefExpr *Create(const StringRef &Name, MCContext &Ctx);
+  
+  
+  
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  const MCSymbol &getSymbol() const { return *Symbol; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::SymbolRef;
+  }
+  static bool classof(const MCSymbolRefExpr *) { return true; }
+};
+
+/// MCUnaryExpr - Unary assembler expressions.
+class MCUnaryExpr : public MCExpr {
+public:
+  enum Opcode {
+    LNot,  ///< Logical negation.
+    Minus, ///< Unary minus.
+    Not,   ///< Bitwise negation.
+    Plus   ///< Unary plus.
+  };
+
+private:
+  Opcode Op;
+  const MCExpr *Expr;
+
+  MCUnaryExpr(Opcode _Op, const MCExpr *_Expr)
+    : MCExpr(MCExpr::Unary), Op(_Op), Expr(_Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCUnaryExpr *Create(Opcode Op, const MCExpr *Expr,
+                                   MCContext &Ctx);
+  static const MCUnaryExpr *CreateLNot(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(LNot, Expr, Ctx);
+  }
+  static const MCUnaryExpr *CreateMinus(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(Minus, Expr, Ctx);
+  }
+  static const MCUnaryExpr *CreateNot(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(Not, Expr, Ctx);
+  }
+  static const MCUnaryExpr *CreatePlus(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(Plus, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this unary expression.
+  Opcode getOpcode() const { return Op; }
+
+  /// getSubExpr - Get the child of this unary expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Unary;
+  }
+  static bool classof(const MCUnaryExpr *) { return true; }
+};
+
+/// MCBinaryExpr - Binary assembler expressions.
+class MCBinaryExpr : public MCExpr {
+public:
+  enum Opcode {
+    Add,  ///< Addition.
+    And,  ///< Bitwise and.
+    Div,  ///< Division.
+    EQ,   ///< Equality comparison.
+    GT,   ///< Greater than comparison.
+    GTE,  ///< Greater than or equal comparison.
+    LAnd, ///< Logical and.
+    LOr,  ///< Logical or.
+    LT,   ///< Less than comparison.
+    LTE,  ///< Less than or equal comparison.
+    Mod,  ///< Modulus.
+    Mul,  ///< Multiplication.
+    NE,   ///< Inequality comparison.
+    Or,   ///< Bitwise or.
+    Shl,  ///< Bitwise shift left.
+    Shr,  ///< Bitwise shift right.
+    Sub,  ///< Subtraction.
+    Xor   ///< Bitwise exclusive or.
+  };
+
+private:
+  Opcode Op;
+  const MCExpr *LHS, *RHS;
+
+  MCBinaryExpr(Opcode _Op, const MCExpr *_LHS, const MCExpr *_RHS)
+    : MCExpr(MCExpr::Binary), Op(_Op), LHS(_LHS), RHS(_RHS) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCBinaryExpr *Create(Opcode Op, const MCExpr *LHS,
+                                    const MCExpr *RHS, MCContext &Ctx);
+  static const MCBinaryExpr *CreateAdd(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Add, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateAnd(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(And, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateDiv(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Div, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateEQ(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(EQ, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateGT(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(GT, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateGTE(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(GTE, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLAnd(const MCExpr *LHS, const MCExpr *RHS,
+                                        MCContext &Ctx) {
+    return Create(LAnd, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLOr(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(LOr, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLT(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(LT, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLTE(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(LTE, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateMod(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Mod, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateMul(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Mul, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateNE(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(NE, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateOr(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(Or, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateShl(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Shl, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateShr(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Shr, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateSub(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Sub, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateXor(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Xor, LHS, RHS, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this binary expression.
+  Opcode getOpcode() const { return Op; }
+
+  /// getLHS - Get the left-hand side expression of the binary operator.
+  const MCExpr *getLHS() const { return LHS; }
+
+  /// getRHS - Get the right-hand side expression of the binary operator.
+  const MCExpr *getRHS() const { return RHS; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Binary;
+  }
+  static bool classof(const MCBinaryExpr *) { return true; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index 8b638d4c743e..0fc4d186b975 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -16,12 +16,13 @@
 #ifndef LLVM_MC_MCINST_H
 #define LLVM_MC_MCINST_H
 
-#include "llvm/MC/MCValue.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
+class raw_ostream;
+class MCAsmInfo;
+class MCExpr;
 
 /// MCOperand - Instances of this class represent operands of the MCInst class.
 /// This is a simple discriminated union.
@@ -30,19 +31,14 @@ class MCOperand {
     kInvalid,                 ///< Uninitialized.
     kRegister,                ///< Register operand.
     kImmediate,               ///< Immediate operand.
-    kMBBLabel,                ///< Basic block label.
-    kMCValue                  ///< Relocatable immediate operand.
+    kExpr                     ///< Relocatable immediate operand.
   };
   unsigned char Kind;
   
   union {
     unsigned RegVal;
     int64_t ImmVal;
-    MCValue MCValueVal;
-    struct {
-      unsigned FunctionNo;
-      unsigned BlockNo;
-    } MBBLabel;
+    const MCExpr *ExprVal;
   };
 public:
   
@@ -52,8 +48,7 @@ public:
   bool isValid() const { return Kind != kInvalid; }
   bool isReg() const { return Kind == kRegister; }
   bool isImm() const { return Kind == kImmediate; }
-  bool isMBBLabel() const { return Kind == kMBBLabel; }
-  bool isMCValue() const { return Kind == kMCValue; }
+  bool isExpr() const { return Kind == kExpr; }
   
   /// getReg - Returns the register number.
   unsigned getReg() const {
@@ -76,41 +71,36 @@ public:
     ImmVal = Val;
   }
   
-  unsigned getMBBLabelFunction() const {
-    assert(isMBBLabel() && "Wrong accessor");
-    return MBBLabel.FunctionNo; 
+  const MCExpr *getExpr() const {
+    assert(isExpr() && "This is not an expression");
+    return ExprVal;
   }
-  unsigned getMBBLabelBlock() const {
-    assert(isMBBLabel() && "Wrong accessor");
-    return MBBLabel.BlockNo; 
-  }
-
-  const MCValue &getMCValue() const {
-    assert(isMCValue() && "This is not an MCValue");
-    return MCValueVal;
-  }
-  void setMCValue(const MCValue &Val) {
-    assert(isMCValue() && "This is not an MCValue");
-    MCValueVal = Val;
+  void setExpr(const MCExpr *Val) {
+    assert(isExpr() && "This is not an expression");
+    ExprVal = Val;
   }
   
-  void MakeReg(unsigned Reg) {
-    Kind = kRegister;
-    RegVal = Reg;
-  }
-  void MakeImm(int64_t Val) {
-    Kind = kImmediate;
-    ImmVal = Val;
+  static MCOperand CreateReg(unsigned Reg) {
+    MCOperand Op;
+    Op.Kind = kRegister;
+    Op.RegVal = Reg;
+    return Op;
   }
-  void MakeMBBLabel(unsigned Fn, unsigned MBB) {
-    Kind = kMBBLabel;
-    MBBLabel.FunctionNo = Fn;
-    MBBLabel.BlockNo = MBB;
+  static MCOperand CreateImm(int64_t Val) {
+    MCOperand Op;
+    Op.Kind = kImmediate;
+    Op.ImmVal = Val;
+    return Op;
   }
-  void MakeMCValue(const MCValue &Val) {
-    Kind = kMCValue;
-    MCValueVal = Val;
+  static MCOperand CreateExpr(const MCExpr *Val) {
+    MCOperand Op;
+    Op.Kind = kExpr;
+    Op.ExprVal = Val;
+    return Op;
   }
+
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+  void dump() const;
 };
 
   
@@ -120,13 +110,12 @@ class MCInst {
   unsigned Opcode;
   SmallVector<MCOperand, 8> Operands;
 public:
-  MCInst() : Opcode(~0U) {}
+  MCInst() : Opcode(0) {}
   
   void setOpcode(unsigned Op) { Opcode = Op; }
   
   unsigned getOpcode() const { return Opcode; }
-  DebugLoc getDebugLoc() const { return DebugLoc(); }
-  
+
   const MCOperand &getOperand(unsigned i) const { return Operands[i]; }
   MCOperand &getOperand(unsigned i) { return Operands[i]; }
   unsigned getNumOperands() const { return Operands.size(); }
@@ -134,6 +123,9 @@ public:
   void addOperand(const MCOperand &Op) {
     Operands.push_back(Op);
   }
+
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+  void dump() const;
 };
 
 
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
new file mode 100644
index 000000000000..d62a9dae7c54
--- /dev/null
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -0,0 +1,37 @@
+//===-- MCInstPrinter.h - Convert an MCInst to target assembly syntax -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCINSTPRINTER_H
+#define LLVM_MC_MCINSTPRINTER_H
+
+namespace llvm {
+class MCInst;
+class raw_ostream;
+class MCAsmInfo;
+
+  
+/// MCInstPrinter - This is an instance of a target assembly language printer
+/// that converts an MCInst to valid target assembly syntax.
+class MCInstPrinter {
+protected:
+  raw_ostream &O;
+  const MCAsmInfo &MAI;
+public:
+  MCInstPrinter(raw_ostream &o, const MCAsmInfo &mai) : O(o), MAI(mai) {}
+  
+  virtual ~MCInstPrinter();
+  
+  /// printInst - Print the specified MCInst to the current raw_ostream.
+  ///
+  virtual void printInst(const MCInst *MI) = 0;
+};
+  
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 1b127b52e1c9..9e071864e65f 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -15,25 +15,57 @@
 #define LLVM_MC_MCSECTION_H
 
 #include <string>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SectionKind.h"
 
 namespace llvm {
-
+  class MCContext;
+  class MCAsmInfo;
+  class raw_ostream;
+  
   /// MCSection - Instances of this class represent a uniqued identifier for a
   /// section in the current translation unit.  The MCContext class uniques and
   /// creates these.
   class MCSection {
-    std::string Name;
-  private:
-    friend class MCContext;
-    MCSection(const char *_Name) : Name(_Name) {}
-    
     MCSection(const MCSection&);      // DO NOT IMPLEMENT
     void operator=(const MCSection&); // DO NOT IMPLEMENT
+  protected:
+    MCSection(SectionKind K) : Kind(K) {}
+    SectionKind Kind;
   public:
+    virtual ~MCSection();
 
-    const std::string &getName() const { return Name; }
+    SectionKind getKind() const { return Kind; }
+    
+    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                      raw_ostream &OS) const = 0;
   };
 
+  class MCSectionCOFF : public MCSection {
+    std::string Name;
+    
+    /// IsDirective - This is true if the section name is a directive, not
+    /// something that should be printed with ".section".
+    ///
+    /// FIXME: This is a hack.  Switch to a semantic view of the section instead
+    /// of a syntactic one.
+    bool IsDirective;
+    
+    MCSectionCOFF(const StringRef &name, bool isDirective, SectionKind K)
+      : MCSection(K), Name(name), IsDirective(isDirective) {
+    }
+  public:
+    
+    static MCSectionCOFF *Create(const StringRef &Name, bool IsDirective, 
+                                   SectionKind K, MCContext &Ctx);
+
+    const std::string &getName() const { return Name; }
+    bool isDirective() const { return IsDirective; }
+    
+    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                      raw_ostream &OS) const;
+  };
+  
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
new file mode 100644
index 000000000000..57fa903f717a
--- /dev/null
+++ b/include/llvm/MC/MCSectionELF.h
@@ -0,0 +1,191 @@
+//===- MCSectionELF.h - ELF Machine Code Sections ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionELF class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONELF_H
+#define LLVM_MC_MCSECTIONELF_H
+
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+  
+/// MCSectionELF - This represents a section on linux, lots of unix variants
+/// and some bare metal systems.
+class MCSectionELF : public MCSection {
+  std::string SectionName;
+  
+  /// Type - This is the sh_type field of a section, drawn from the enums below.
+  unsigned Type;
+  
+  /// Flags - This is the sh_flags field of a section, drawn from the enums.
+  /// below.
+  unsigned Flags;
+
+  /// IsExplicit - Indicates that this section comes from globals with an
+  /// explicit section specfied.
+  bool IsExplicit;
+  
+protected:
+  MCSectionELF(const StringRef &Section, unsigned type, unsigned flags,
+               SectionKind K, bool isExplicit)
+    : MCSection(K), SectionName(Section.str()), Type(type), Flags(flags), 
+      IsExplicit(isExplicit) {}
+public:
+  
+  static MCSectionELF *Create(const StringRef &Section, unsigned Type, 
+                              unsigned Flags, SectionKind K, bool isExplicit,
+                              MCContext &Ctx);
+
+  /// ShouldOmitSectionDirective - Decides whether a '.section' directive
+  /// should be printed before the section name
+  bool ShouldOmitSectionDirective(const char *Name, 
+                                  const MCAsmInfo &MAI) const;
+
+  /// ShouldPrintSectionType - Only prints the section type if supported
+  bool ShouldPrintSectionType(unsigned Ty) const;
+
+  /// HasCommonSymbols - True if this section holds common symbols, this is
+  /// indicated on the ELF object file by a symbol with SHN_COMMON section 
+  /// header index.
+  bool HasCommonSymbols() const;
+  
+  /// These are the section type and flags fields.  An ELF section can have
+  /// only one Type, but can have more than one of the flags specified.
+  ///
+  /// Valid section types.
+  enum {
+    // This value marks the section header as inactive.
+    SHT_NULL             = 0x00U,
+
+    // Holds information defined by the program, with custom format and meaning.
+    SHT_PROGBITS         = 0x01U,
+
+    // This section holds a symbol table.
+    SHT_SYMTAB           = 0x02U,
+
+    // The section holds a string table.
+    SHT_STRTAB           = 0x03U,
+
+    // The section holds relocation entries with explicit addends.
+    SHT_RELA             = 0x04U,
+
+    // The section holds a symbol hash table.
+    SHT_HASH             = 0x05U,
+    
+    // Information for dynamic linking.
+    SHT_DYNAMIC          = 0x06U,
+
+    // The section holds information that marks the file in some way.
+    SHT_NOTE             = 0x07U,
+
+    // A section of this type occupies no space in the file.
+    SHT_NOBITS           = 0x08U,
+
+    // The section holds relocation entries without explicit addends.
+    SHT_REL              = 0x09U,
+
+    // This section type is reserved but has unspecified semantics. 
+    SHT_SHLIB            = 0x0AU,
+
+    // This section holds a symbol table.
+    SHT_DYNSYM           = 0x0BU,
+
+    // This section contains an array of pointers to initialization functions.
+    SHT_INIT_ARRAY       = 0x0EU,
+
+    // This section contains an array of pointers to termination functions.
+    SHT_FINI_ARRAY       = 0x0FU,
+
+    // This section contains an array of pointers to functions that are invoked
+    // before all other initialization functions.
+    SHT_PREINIT_ARRAY    = 0x10U,
+
+    // A section group is a set of sections that are related and that must be
+    // treated specially by the linker.
+    SHT_GROUP            = 0x11U,
+
+    // This section is associated with a section of type SHT_SYMTAB, when the
+    // referenced symbol table contain the escape value SHN_XINDEX
+    SHT_SYMTAB_SHNDX     = 0x12U,
+
+    LAST_KNOWN_SECTION_TYPE = SHT_SYMTAB_SHNDX
+  }; 
+
+  /// Valid section flags.
+  enum {
+    // The section contains data that should be writable.
+    SHF_WRITE            = 0x1U,
+
+    // The section occupies memory during execution.
+    SHF_ALLOC            = 0x2U,
+
+    // The section contains executable machine instructions.
+    SHF_EXECINSTR        = 0x4U,
+
+    // The data in the section may be merged to eliminate duplication.
+    SHF_MERGE            = 0x10U,
+
+    // Elements in the section consist of null-terminated character strings.
+    SHF_STRINGS          = 0x20U,
+
+    // A field in this section holds a section header table index.
+    SHF_INFO_LINK        = 0x40U,
+
+    // Adds special ordering requirements for link editors.
+    SHF_LINK_ORDER       = 0x80U,
+
+    // This section requires special OS-specific processing to avoid incorrect
+    // behavior.
+    SHF_OS_NONCONFORMING = 0x100U,
+
+    // This section is a member of a section group.
+    SHF_GROUP            = 0x200U,
+
+    // This section holds Thread-Local Storage.
+    SHF_TLS              = 0x400U,
+    
+    /// FIRST_TARGET_DEP_FLAG - This is the first flag that subclasses are
+    /// allowed to specify.
+    FIRST_TARGET_DEP_FLAG = 0x800U,
+
+    /// TARGET_INDEP_SHF - This is the bitmask for all the target independent
+    /// section flags.  Targets can define their own target flags above these.
+    /// If they do that, they should implement their own MCSectionELF subclasses
+    /// and implement the virtual method hooks below to handle printing needs.
+    TARGET_INDEP_SHF     = FIRST_TARGET_DEP_FLAG-1U
+  };
+
+  StringRef getSectionName() const {
+    return StringRef(SectionName);
+  }
+  
+  unsigned getType() const { return Type; }
+  unsigned getFlags() const { return Flags; }
+  
+  virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                    raw_ostream &OS) const;
+  
+  
+  /// PrintTargetSpecificSectionFlags - Targets that define their own
+  /// MCSectionELF subclasses with target specific section flags should
+  /// implement this method if they end up adding letters to the attributes
+  /// list.
+  virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
+                                               raw_ostream &OS) const {
+  }
+                                               
+  
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
new file mode 100644
index 000000000000..251c88fa9481
--- /dev/null
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -0,0 +1,175 @@
+//===- MCSectionMachO.h - MachO Machine Code Sections -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionMachO class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONMACHO_H
+#define LLVM_MC_MCSECTIONMACHO_H
+
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+  
+/// MCSectionMachO - This represents a section on a Mach-O system (used by
+/// Mac OS X).  On a Mac system, these are also described in
+/// /usr/include/mach-o/loader.h.
+class MCSectionMachO : public MCSection {
+  char SegmentName[16];  // Not necessarily null terminated!
+  char SectionName[16];  // Not necessarily null terminated!
+  
+  /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES
+  /// field of a section, drawn from the enums below.
+  unsigned TypeAndAttributes;
+  
+  /// Reserved2 - The 'reserved2' field of a section, used to represent the
+  /// size of stubs, for example.
+  unsigned Reserved2;
+  
+  MCSectionMachO(const StringRef &Segment, const StringRef &Section,
+                 unsigned TAA, unsigned reserved2, SectionKind K)
+    : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+    assert(Segment.size() <= 16 && Section.size() <= 16 &&
+           "Segment or section string too long");
+    for (unsigned i = 0; i != 16; ++i) {
+      if (i < Segment.size())
+        SegmentName[i] = Segment[i];
+      else
+        SegmentName[i] = 0;
+      
+      if (i < Section.size())
+        SectionName[i] = Section[i];
+      else
+        SectionName[i] = 0;
+    }        
+  }
+public:
+  
+  static MCSectionMachO *Create(const StringRef &Segment,
+                                const StringRef &Section,
+                                unsigned TypeAndAttributes,
+                                unsigned Reserved2,
+                                SectionKind K, MCContext &Ctx);
+  
+  /// These are the section type and attributes fields.  A MachO section can
+  /// have only one Type, but can have any of the attributes specified.
+  enum {
+    // TypeAndAttributes bitmasks.
+    SECTION_TYPE       = 0x000000FFU,
+    SECTION_ATTRIBUTES = 0xFFFFFF00U,
+    
+    // Valid section types.
+    
+    /// S_REGULAR - Regular section.
+    S_REGULAR                    = 0x00U,
+    /// S_ZEROFILL - Zero fill on demand section.
+    S_ZEROFILL                   = 0x01U,
+    /// S_CSTRING_LITERALS - Section with literal C strings.
+    S_CSTRING_LITERALS           = 0x02U,
+    /// S_4BYTE_LITERALS - Section with 4 byte literals.
+    S_4BYTE_LITERALS             = 0x03U,
+    /// S_8BYTE_LITERALS - Section with 8 byte literals.
+    S_8BYTE_LITERALS             = 0x04U,
+    /// S_LITERAL_POINTERS - Section with pointers to literals.
+    S_LITERAL_POINTERS           = 0x05U,
+    /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers.
+    S_NON_LAZY_SYMBOL_POINTERS   = 0x06U,
+    /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers.
+    S_LAZY_SYMBOL_POINTERS       = 0x07U,
+    /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in
+    /// the Reserved2 field.
+    S_SYMBOL_STUBS               = 0x08U,
+    /// S_SYMBOL_STUBS - Section with only function pointers for
+    /// initialization.
+    S_MOD_INIT_FUNC_POINTERS     = 0x09U,
+    /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for
+    /// termination.
+    S_MOD_TERM_FUNC_POINTERS     = 0x0AU,
+    /// S_COALESCED - Section contains symbols that are to be coalesced.
+    S_COALESCED                  = 0x0BU,
+    /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4
+    /// gigabytes).
+    S_GB_ZEROFILL                = 0x0CU,
+    /// S_INTERPOSING - Section with only pairs of function pointers for
+    /// interposing.
+    S_INTERPOSING                = 0x0DU,
+    /// S_16BYTE_LITERALS - Section with only 16 byte literals.
+    S_16BYTE_LITERALS            = 0x0EU,
+    /// S_DTRACE_DOF - Section contains DTrace Object Format.
+    S_DTRACE_DOF                 = 0x0FU,
+    /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to
+    /// lazy loaded dylibs.
+    S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10U,
+
+    LAST_KNOWN_SECTION_TYPE = S_LAZY_DYLIB_SYMBOL_POINTERS,
+    
+
+    // Valid section attributes.
+    
+    /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine
+    /// instructions.
+    S_ATTR_PURE_INSTRUCTIONS   = 1U << 31,
+    /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be
+    /// in a ranlib table of contents.
+    S_ATTR_NO_TOC              = 1U << 30,
+    /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section
+    /// in files with the MY_DYLDLINK flag.
+    S_ATTR_STRIP_STATIC_SYMS   = 1U << 29,
+    /// S_ATTR_NO_DEAD_STRIP - No dead stripping.
+    S_ATTR_NO_DEAD_STRIP       = 1U << 28,
+    /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks.
+    S_ATTR_LIVE_SUPPORT        = 1U << 27,
+    /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by
+    /// dyld.
+    S_ATTR_SELF_MODIFYING_CODE = 1U << 26,
+    /// S_ATTR_DEBUG - A debug section.
+    S_ATTR_DEBUG               = 1U << 25,
+    /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions.
+    S_ATTR_SOME_INSTRUCTIONS   = 1U << 10,
+    /// S_ATTR_EXT_RELOC - Section has external relocation entries.
+    S_ATTR_EXT_RELOC           = 1U << 9,
+    /// S_ATTR_LOC_RELOC - Section has local relocation entries.
+    S_ATTR_LOC_RELOC           = 1U << 8
+  };
+
+  StringRef getSegmentName() const {
+    // SegmentName is not necessarily null terminated!
+    if (SegmentName[15])
+      return StringRef(SegmentName, 16);
+    return StringRef(SegmentName);
+  }
+  StringRef getSectionName() const {
+    // SectionName is not necessarily null terminated!
+    if (SectionName[15])
+      return StringRef(SectionName, 16);
+    return StringRef(SectionName);
+  }
+  
+  unsigned getTypeAndAttributes() const { return TypeAndAttributes; }
+  unsigned getStubSize() const { return Reserved2; }
+  
+  /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+  /// This is a string that can appear after a .section directive in a mach-o
+  /// flavored .s file.  If successful, this fills in the specified Out
+  /// parameters and returns an empty string.  When an invalid section
+  /// specifier is present, this returns a string indicating the problem.
+  static std::string ParseSectionSpecifier(StringRef Spec,       // In.
+                                           StringRef &Segment,   // Out.
+                                           StringRef &Section,   // Out.
+                                           unsigned  &TAA,       // Out.
+                                           unsigned  &StubSize); // Out.
+  
+  virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                    raw_ostream &OS) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 54de8a31076f..248e6b0a4481 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -17,17 +17,21 @@
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
+  class MCAsmInfo;
+  class MCCodeEmitter;
   class MCContext;
-  class MCValue;
+  class MCExpr;
   class MCInst;
+  class MCInstPrinter;
   class MCSection;
   class MCSymbol;
+  class StringRef;
   class raw_ostream;
 
   /// MCStreamer - Streaming machine code generation interface.  This interface
   /// is intended to provide a programatic interface that is very similar to the
   /// level that an assembler .s file provides.  It has callbacks to emit bytes,
-  /// "emit directives", etc.  The implementation of this interface retains
+  /// handle directives, etc.  The implementation of this interface retains
   /// state to know what the current section is etc.
   ///
   /// There are multiple implementations of this interface: one for writing out
@@ -53,6 +57,10 @@ namespace llvm {
       SymbolAttrLast = WeakReference
     };
 
+    enum AssemblerFlag {
+      SubsectionsViaSymbols  /// .subsections_via_symbols (Apple)
+    };
+
   private:
     MCContext &Context;
 
@@ -62,6 +70,10 @@ namespace llvm {
   protected:
     MCStreamer(MCContext &Ctx);
 
+    /// CurSection - This is the current section code is being emitted to, it is
+    /// kept up to date by SwitchSection.
+    const MCSection *CurSection;
+
   public:
     virtual ~MCStreamer();
 
@@ -69,13 +81,17 @@ namespace llvm {
 
     /// @name Symbol & Section Management
     /// @{
+    
+    /// getCurrentSection - Return the current seciton that the streamer is
+    /// emitting code to.
+    const MCSection *getCurrentSection() const { return CurSection; }
 
     /// SwitchSection - Set the current section where code is being emitted to
-    /// @param Section.
+    /// @param Section.  This is required to update CurSection.
     ///
     /// This corresponds to assembler directives like .section, .text, etc.
-    virtual void SwitchSection(MCSection *Section) = 0;
-
+    virtual void SwitchSection(const MCSection *Section) = 0;
+    
     /// EmitLabel - Emit a label for @param Symbol into the current section.
     ///
     /// This corresponds to an assembler statement such as:
@@ -84,11 +100,11 @@ namespace llvm {
     /// @param Symbol - The symbol to emit. A given symbol should only be
     /// emitted as a label once, and symbols emitted as a label should never be
     /// used in an assignment.
-    //
-    // FIXME: What to do about the current section? Should we get rid of the
-    // symbol section in the constructor and initialize it here?
     virtual void EmitLabel(MCSymbol *Symbol) = 0;
 
+    /// EmitAssemblerFlag - Note in the output the specified @param Flag
+    virtual void EmitAssemblerFlag(AssemblerFlag Flag) = 0;
+
     /// EmitAssignment - Emit an assignment of @param Value to @param Symbol.
     ///
     /// This corresponds to an assembler statement such as:
@@ -100,31 +116,46 @@ namespace llvm {
     ///
     /// @param Symbol - The symbol being assigned to.
     /// @param Value - The value for the symbol.
-    /// @param MakeAbsolute - If true, then the symbol should be given the
-    /// absolute value of @param Value, even if @param Value would be
-    /// relocatable expression. This corresponds to the ".set" directive.
-    virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
-                                bool MakeAbsolute = false) = 0;
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) = 0;
 
     /// EmitSymbolAttribute - Add the given @param Attribute to @param Symbol.
-    //
-    // FIXME: This doesn't make much sense, could we just have attributes be on
-    // the symbol and make the printer smart enough to add the right symbols?
-    // This should work as long as the order of attributes in the file doesn't
-    // matter.
     virtual void EmitSymbolAttribute(MCSymbol *Symbol,
                                      SymbolAttr Attribute) = 0;
 
+    /// EmitSymbolDesc - Set the @param DescValue for the @param Symbol.
+    ///
+    /// @param Symbol - The symbol to have its n_desc field set.
+    /// @param DescValue - The value to set into the n_desc field.
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) = 0;
+
+    /// EmitCommonSymbol - Emit a common or local common symbol.
+    ///
+    /// @param Symbol - The common symbol to emit.
+    /// @param Size - The size of the common symbol.
+    /// @param ByteAlignment - The alignment of the symbol if
+    /// non-zero. This must be a power of 2 on some targets.
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size,
+                                  unsigned ByteAlignment) = 0;
+
+    /// EmitZerofill - Emit a the zerofill section and an option symbol.
+    ///
+    /// @param Section - The zerofill section to create and or to put the symbol
+    /// @param Symbol - The zerofill symbol to emit, if non-NULL.
+    /// @param Size - The size of the zerofill symbol.
+    /// @param ByteAlignment - The alignment of the zerofill symbol if
+    /// non-zero. This must be a power of 2 on some targets.
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                              unsigned Size = 0,unsigned ByteAlignment = 0) = 0;
+
     /// @}
     /// @name Generating Data
     /// @{
 
-    /// EmitBytes - Emit @param Length bytes starting at @param Data into the
-    /// output.
+    /// EmitBytes - Emit the bytes in \arg Data into the output.
     ///
     /// This is used to implement assembler directives such as .byte, .ascii,
     /// etc.
-    virtual void EmitBytes(const char *Data, unsigned Length) = 0;
+    virtual void EmitBytes(const StringRef &Data) = 0;
 
     /// EmitValue - Emit the expression @param Value into the output as a native
     /// integer of the given @param Size bytes.
@@ -135,7 +166,7 @@ namespace llvm {
     /// @param Value - The value to emit.
     /// @param Size - The size of the integer (in bytes) to emit. This must
     /// match a native machine width.
-    virtual void EmitValue(const MCValue &Value, unsigned Size) = 0;
+    virtual void EmitValue(const MCExpr *Value, unsigned Size) = 0;
 
     /// EmitValueToAlignment - Emit some number of copies of @param Value until
     /// the byte alignment @param ByteAlignment is reached.
@@ -163,12 +194,10 @@ namespace llvm {
     ///
     /// This is used to implement assembler directives such as .org.
     ///
-    /// @param Offset - The offset to reach.This may be an expression, but the
+    /// @param Offset - The offset to reach. This may be an expression, but the
     /// expression must be associated with the current section.
     /// @param Value - The value to use when filling bytes.
-    // 
-    // FIXME: How are we going to signal failures out of this?
-    virtual void EmitValueToOffset(const MCValue &Offset, 
+    virtual void EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value = 0) = 0;
     
     /// @}
@@ -181,10 +210,17 @@ namespace llvm {
     virtual void Finish() = 0;
   };
 
+  /// createNullStreamer - Create a dummy machine code streamer, which does
+  /// nothing. This is useful for timing the assembler front end.
+  MCStreamer *createNullStreamer(MCContext &Ctx);
+
   /// createAsmStreamer - Create a machine code streamer which will print out
   /// assembly for the native target, suitable for compiling with a native
   /// assembler.
-  MCStreamer *createAsmStreamer(MCContext &Ctx, raw_ostream &OS);
+  MCStreamer *createAsmStreamer(MCContext &Ctx, raw_ostream &OS,
+                                const MCAsmInfo &MAI,
+                                MCInstPrinter *InstPrint = 0,
+                                MCCodeEmitter *CE = 0);
 
   // FIXME: These two may end up getting rolled into a single
   // createObjectStreamer interface, which implements the assembler backend, and
@@ -192,7 +228,8 @@ namespace llvm {
 
   /// createMachOStream - Create a machine code streamer which will generative
   /// Mach-O format object files.
-  MCStreamer *createMachOStreamer(MCContext &Ctx, raw_ostream &OS);
+  MCStreamer *createMachOStreamer(MCContext &Ctx, raw_ostream &OS,
+                                  MCCodeEmitter *CE = 0);
 
   /// createELFStreamer - Create a machine code streamer which will generative
   /// ELF format object files.
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 235e6614f973..5dd7d68585c7 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -15,10 +15,14 @@
 #define LLVM_MC_MCSYMBOL_H
 
 #include <string>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
+  class MCAsmInfo;
   class MCSection;
   class MCContext;
+  class raw_ostream;
 
   /// MCSymbol - Instances of this class represent a symbol name in the MC file,
   /// and MCSymbols are created and unique'd by the MCContext class.
@@ -28,38 +32,85 @@ namespace llvm {
   /// it is a reference to an external entity, it has a null section.  
   /// 
   class MCSymbol {
+    // Special sentinal value for the absolute pseudo section.
+    //
+    // FIXME: Use a PointerInt wrapper for this?
+    static const MCSection *AbsolutePseudoSection;
+
     /// Name - The name of the symbol.
     std::string Name;
-    /// Section - The section the symbol is defined in, or null if the symbol
-    /// has not been defined in the associated translation unit.
-    MCSection *Section;
-    
+
+    /// Section - The section the symbol is defined in. This is null for
+    /// undefined symbols, and the special AbsolutePseudoSection value for
+    /// absolute symbols.
+    const MCSection *Section;
+
     /// IsTemporary - True if this is an assembler temporary label, which
     /// typically does not survive in the .o file's symbol table.  Usually
     /// "Lfoo" or ".foo".
     unsigned IsTemporary : 1;
-    
-    /// IsExternal - True if this symbol has been implicitly defined as an
-    /// external, for example by using it in an expression without ever emitting
-    /// it as a label. The @var Section for an external symbol is always null.
-    unsigned IsExternal : 1;
 
   private:  // MCContext creates and uniques these.
     friend class MCContext;
-    MCSymbol(const char *_Name, bool _IsTemporary) 
-      : Name(_Name), Section(0), IsTemporary(_IsTemporary), IsExternal(false) {}
+    MCSymbol(const StringRef &_Name, bool _IsTemporary) 
+      : Name(_Name), Section(0), IsTemporary(_IsTemporary) {}
     
     MCSymbol(const MCSymbol&);       // DO NOT IMPLEMENT
     void operator=(const MCSymbol&); // DO NOT IMPLEMENT
   public:
-    
-    MCSection *getSection() const { return Section; }
-    void setSection(MCSection *Value) { Section = Value; }
+    /// getName - Get the symbol name.
+    const std::string &getName() const { return Name; }
 
-    bool isExternal() const { return IsExternal; }
-    void setExternal(bool Value) { IsExternal = Value; }
+    /// @name Symbol Type
+    /// @{
 
-    const std::string &getName() const { return Name; }
+    /// isTemporary - Check if this is an assembler temporary symbol.
+    bool isTemporary() const {
+      return IsTemporary;
+    }
+
+    /// isDefined - Check if this symbol is defined (i.e., it has an address).
+    ///
+    /// Defined symbols are either absolute or in some section.
+    bool isDefined() const {
+      return Section != 0;
+    }
+
+    /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
+    bool isUndefined() const {
+      return !isDefined();
+    }
+
+    /// isAbsolute - Check if this this is an absolute symbol.
+    bool isAbsolute() const {
+      return Section == AbsolutePseudoSection;
+    }
+
+    /// getSection - Get the section associated with a defined, non-absolute
+    /// symbol.
+    const MCSection &getSection() const {
+      assert(!isUndefined() && !isAbsolute() && "Invalid accessor!");
+      return *Section;
+    }
+
+    /// setSection - Mark the symbol as defined in the section \arg S.
+    void setSection(const MCSection &S) { Section = &S; }
+
+    /// setUndefined - Mark the symbol as undefined.
+    void setUndefined() {
+      Section = 0;
+    }
+
+    /// setAbsolute - Mark the symbol as absolute.
+    void setAbsolute() { Section = AbsolutePseudoSection; }
+
+    /// @}
+
+    /// print - Print the value to the stream \arg OS.
+    void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+
+    /// dump - Print the value to stderr.
+    void dump() const;
   };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index d032f170c3f6..62aca6e3a6f6 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -20,6 +20,7 @@
 
 namespace llvm {
 class MCSymbol;
+class raw_ostream;
 
 /// MCValue - This represents an "assembler immediate".  In its most general
 /// form, this can hold "SymbolA - SymbolB + imm64".  Not all targets supports
@@ -32,13 +33,13 @@ class MCSymbol;
 /// Note that this class must remain a simple POD value class, because we need
 /// it to live in unions etc.
 class MCValue {
-  MCSymbol *SymA, *SymB;
+  const MCSymbol *SymA, *SymB;
   int64_t Cst;
 public:
 
   int64_t getConstant() const { return Cst; }
-  MCSymbol *getSymA() const { return SymA; }
-  MCSymbol *getSymB() const { return SymB; }
+  const MCSymbol *getSymA() const { return SymA; }
+  const MCSymbol *getSymB() const { return SymB; }
 
   /// isAbsolute - Is this an absolute (as opposed to relocatable) value.
   bool isAbsolute() const { return !SymA && !SymB; }
@@ -48,11 +49,19 @@ public:
   ///
   /// @result - The value's associated section, or null for external or constant
   /// values.
-  MCSection *getAssociatedSection() const {
-    return SymA ? SymA->getSection() : 0;
-  }
+  //
+  // FIXME: Switch to a tagged section, so this can return the tagged section
+  // value.
+  const MCSection *getAssociatedSection() const;
+
+  /// print - Print the value to the stream \arg OS.
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+  
+  /// dump - Print the value to stderr.
+  void dump() const;
 
-  static MCValue get(MCSymbol *SymA, MCSymbol *SymB = 0, int64_t Val = 0) {
+  static MCValue get(const MCSymbol *SymA, const MCSymbol *SymB = 0,
+                     int64_t Val = 0) {
     MCValue R;
     assert((!SymB || SymA) && "Invalid relocatable MCValue!");
     R.Cst = Val;
diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h
new file mode 100644
index 000000000000..945cff790a48
--- /dev/null
+++ b/include/llvm/MC/SectionKind.h
@@ -0,0 +1,221 @@
+//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_SECTIONKIND_H
+#define LLVM_MC_SECTIONKIND_H
+
+namespace llvm {
+
+/// SectionKind - This is a simple POD value that classifies the properties of
+/// a section.  A section is classified into the deepest possible
+/// classification, and then the target maps them onto their sections based on
+/// what capabilities they have.
+///
+/// The comments below describe these as if they were an inheritance hierarchy
+/// in order to explain the predicates below.
+///
+class SectionKind {
+  enum Kind {
+    /// Metadata - Debug info sections or other metadata.
+    Metadata,
+    
+    /// Text - Text section, used for functions and other executable code.
+    Text,
+    
+    /// ReadOnly - Data that is never written to at program runtime by the
+    /// program or the dynamic linker.  Things in the top-level readonly
+    /// SectionKind are not mergeable.
+    ReadOnly,
+
+        /// MergableCString - Any null-terminated string which allows merging.
+        /// These values are known to end in a nul value of the specified size,
+        /// not otherwise contain a nul value, and be mergable.  This allows the
+        /// linker to unique the strings if it so desires.
+
+           /// Mergeable1ByteCString - 1 byte mergable, null terminated, string.
+           Mergeable1ByteCString,
+    
+           /// Mergeable2ByteCString - 2 byte mergable, null terminated, string.
+           Mergeable2ByteCString,
+
+           /// Mergeable4ByteCString - 4 byte mergable, null terminated, string.
+           Mergeable4ByteCString,
+
+        /// MergeableConst - These are sections for merging fixed-length
+        /// constants together.  For example, this can be used to unique
+        /// constant pool entries etc.
+        MergeableConst,
+    
+            /// MergeableConst4 - This is a section used by 4-byte constants,
+            /// for example, floats.
+            MergeableConst4,
+    
+            /// MergeableConst8 - This is a section used by 8-byte constants,
+            /// for example, doubles.
+            MergeableConst8,
+
+            /// MergeableConst16 - This is a section used by 16-byte constants,
+            /// for example, vectors.
+            MergeableConst16,
+    
+    /// Writeable - This is the base of all segments that need to be written
+    /// to during program runtime.
+    
+       /// ThreadLocal - This is the base of all TLS segments.  All TLS
+       /// objects must be writeable, otherwise there is no reason for them to
+       /// be thread local!
+    
+           /// ThreadBSS - Zero-initialized TLS data objects.
+           ThreadBSS,
+    
+           /// ThreadData - Initialized TLS data objects.
+           ThreadData,
+    
+       /// GlobalWriteableData - Writeable data that is global (not thread
+       /// local).
+    
+           /// BSS - Zero initialized writeable data.
+           BSS,
+
+           /// DataRel - This is the most general form of data that is written
+           /// to by the program, it can have random relocations to arbitrary
+           /// globals.
+           DataRel,
+
+               /// DataRelLocal - This is writeable data that has a non-zero
+               /// initializer and has relocations in it, but all of the
+               /// relocations are known to be within the final linked image
+               /// the global is linked into.
+               DataRelLocal,
+
+                   /// DataNoRel - This is writeable data that has a non-zero
+                   /// initializer, but whose initializer is known to have no
+                   /// relocations.
+                   DataNoRel,
+
+           /// ReadOnlyWithRel - These are global variables that are never
+           /// written to by the program, but that have relocations, so they
+           /// must be stuck in a writeable section so that the dynamic linker
+           /// can write to them.  If it chooses to, the dynamic linker can
+           /// mark the pages these globals end up on as read-only after it is
+           /// done with its relocation phase.
+           ReadOnlyWithRel,
+    
+               /// ReadOnlyWithRelLocal - This is data that is readonly by the
+               /// program, but must be writeable so that the dynamic linker
+               /// can perform relocations in it.  This is used when we know
+               /// that all the relocations are to globals in this final
+               /// linked image.
+               ReadOnlyWithRelLocal
+    
+  } K : 8;
+public:
+  
+  bool isMetadata() const { return K == Metadata; }
+  bool isText() const { return K == Text; }
+  
+  bool isReadOnly() const {
+    return K == ReadOnly || isMergeableCString() ||
+           isMergeableConst();
+  }
+
+  bool isMergeableCString() const {
+    return K == Mergeable1ByteCString || K == Mergeable2ByteCString ||
+           K == Mergeable4ByteCString;
+  }
+  bool isMergeable1ByteCString() const { return K == Mergeable1ByteCString; }
+  bool isMergeable2ByteCString() const { return K == Mergeable2ByteCString; }
+  bool isMergeable4ByteCString() const { return K == Mergeable4ByteCString; }
+  
+  bool isMergeableConst() const {
+    return K == MergeableConst || K == MergeableConst4 ||
+           K == MergeableConst8 || K == MergeableConst16;
+  }
+  bool isMergeableConst4() const { return K == MergeableConst4; }
+  bool isMergeableConst8() const { return K == MergeableConst8; }
+  bool isMergeableConst16() const { return K == MergeableConst16; }
+  
+  bool isWriteable() const {
+    return isThreadLocal() || isGlobalWriteableData();
+  }
+  
+  bool isThreadLocal() const {
+    return K == ThreadData || K == ThreadBSS;
+  }
+  
+  bool isThreadBSS() const { return K == ThreadBSS; } 
+  bool isThreadData() const { return K == ThreadData; } 
+
+  bool isGlobalWriteableData() const {
+    return isBSS() || isDataRel() || isReadOnlyWithRel();
+  }
+  
+  bool isBSS() const { return K == BSS; }
+  
+  bool isDataRel() const {
+    return K == DataRel || K == DataRelLocal || K == DataNoRel;
+  }
+  
+  bool isDataRelLocal() const {
+    return K == DataRelLocal || K == DataNoRel;
+  }
+
+  bool isDataNoRel() const { return K == DataNoRel; }
+  
+  bool isReadOnlyWithRel() const {
+    return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal;
+  }
+
+  bool isReadOnlyWithRelLocal() const {
+    return K == ReadOnlyWithRelLocal;
+  }
+private: 
+  static SectionKind get(Kind K) {
+    SectionKind Res;
+    Res.K = K;
+    return Res;
+  }
+public:
+  
+  static SectionKind getMetadata() { return get(Metadata); }
+  static SectionKind getText() { return get(Text); }
+  static SectionKind getReadOnly() { return get(ReadOnly); }
+  static SectionKind getMergeable1ByteCString() {
+    return get(Mergeable1ByteCString);
+  }
+  static SectionKind getMergeable2ByteCString() {
+    return get(Mergeable2ByteCString);
+  }
+  static SectionKind getMergeable4ByteCString() {
+    return get(Mergeable4ByteCString);
+  }
+  static SectionKind getMergeableConst() { return get(MergeableConst); }
+  static SectionKind getMergeableConst4() { return get(MergeableConst4); }
+  static SectionKind getMergeableConst8() { return get(MergeableConst8); }
+  static SectionKind getMergeableConst16() { return get(MergeableConst16); }
+  static SectionKind getThreadBSS() { return get(ThreadBSS); }
+  static SectionKind getThreadData() { return get(ThreadData); }
+  static SectionKind getBSS() { return get(BSS); }
+  static SectionKind getDataRel() { return get(DataRel); }
+  static SectionKind getDataRelLocal() { return get(DataRelLocal); }
+  static SectionKind getDataNoRel() { return get(DataNoRel); }
+  static SectionKind getReadOnlyWithRel() { return get(ReadOnlyWithRel); }
+  static SectionKind getReadOnlyWithRelLocal(){
+    return get(ReadOnlyWithRelLocal);
+  }
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
new file mode 100644
index 000000000000..63c2da2e7dfd
--- /dev/null
+++ b/include/llvm/Metadata.h
@@ -0,0 +1,377 @@
+//===-- llvm/Metadata.h - Metadata definitions ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations for metadata subclasses.
+/// They represent the different flavors of metadata that live in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MDNODE_H
+#define LLVM_MDNODE_H
+
+#include "llvm/User.h"
+#include "llvm/Type.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+class Constant;
+class Instruction;
+class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+// MetadataBase  - A base class for MDNode, MDString and NamedMDNode.
+class MetadataBase : public User {
+private:
+  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// the number actually in use.
+  unsigned ReservedSpace;
+
+protected:
+  MetadataBase(const Type *Ty, unsigned scid)
+    : User(Ty, scid, NULL, 0), ReservedSpace(0) {}
+
+  void resizeOperands(unsigned NumOps);
+public:
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  This always returns false because getNullValue will never
+  /// produce metadata.
+  virtual bool isNullValue() const {
+    return false;
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const MetadataBase *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDStringVal || V->getValueID() == MDNodeVal
+      || V->getValueID() == NamedMDNodeVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// MDString - a single uniqued string.
+/// These are used to efficiently contain a byte sequence for metadata.
+/// MDString is always unnamd.
+class MDString : public MetadataBase {
+  MDString(const MDString &);            // DO NOT IMPLEMENT
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  unsigned getNumOperands();             // DO NOT IMPLEMENT
+
+  StringRef Str;
+protected:
+  explicit MDString(LLVMContext &C, const char *begin, unsigned l)
+    : MetadataBase(Type::getMetadataTy(C), Value::MDStringVal), Str(begin, l) {}
+
+public:
+  // Do not allocate any space for operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  static MDString *get(LLVMContext &Context, const StringRef &Str);
+  
+  StringRef getString() const { return Str; }
+
+  unsigned length() const { return Str.size(); }
+
+  /// begin() - Pointer to the first byte of the string.
+  ///
+  const char *begin() const { return Str.begin(); }
+
+  /// end() - Pointer to one byte past the end of the string.
+  ///
+  const char *end() const { return Str.end(); }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const MDString *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDStringVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// MDNode - a tuple of other values.
+/// These contain a list of the values that represent the metadata. 
+/// MDNode is always unnamed.
+class MDNode : public MetadataBase, public FoldingSetNode {
+  MDNode(const MDNode &);                // DO NOT IMPLEMENT
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  // getNumOperands - Make this only available for private uses.
+  unsigned getNumOperands() { return User::getNumOperands();  }
+
+  friend class ElementVH;
+  // Use CallbackVH to hold MDNOde elements.
+  struct ElementVH : public CallbackVH {
+    MDNode *Parent;
+    ElementVH(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
+    ~ElementVH() {}
+
+    virtual void deleted() {
+      Parent->replaceElement(this->operator Value*(), 0);
+    }
+
+    virtual void allUsesReplacedWith(Value *NV) {
+      Parent->replaceElement(this->operator Value*(), NV);
+    }
+  };
+  // Replace each instance of F from the element list of this node with T.
+  void replaceElement(Value *F, Value *T);
+
+  SmallVector<ElementVH, 4> Node;
+
+protected:
+  explicit MDNode(LLVMContext &C, Value*const* Vals, unsigned NumVals);
+public:
+  // Do not allocate any space for operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  // Constructors and destructors.
+  static MDNode *get(LLVMContext &Context, 
+                     Value* const* Vals, unsigned NumVals);
+
+  /// dropAllReferences - Remove all uses and clear node vector.
+  void dropAllReferences();
+
+  /// ~MDNode - Destroy MDNode.
+  ~MDNode();
+  
+  /// getElement - Return specified element.
+  Value *getElement(unsigned i) const {
+    assert (getNumElements() > i && "Invalid element number!");
+    return Node[i];
+  }
+
+  /// getNumElements - Return number of MDNode elements.
+  unsigned getNumElements() const {
+    return Node.size();
+  }
+
+  // Element access
+  typedef SmallVectorImpl<ElementVH>::const_iterator const_elem_iterator;
+  typedef SmallVectorImpl<ElementVH>::iterator elem_iterator;
+  /// elem_empty - Return true if MDNode is empty.
+  bool elem_empty() const                { return Node.empty(); }
+  const_elem_iterator elem_begin() const { return Node.begin(); }
+  const_elem_iterator elem_end() const   { return Node.end();   }
+  elem_iterator elem_begin()             { return Node.begin(); }
+  elem_iterator elem_end()               { return Node.end();   }
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  This always returns false because getNullValue will never
+  /// produce metadata.
+  virtual bool isNullValue() const {
+    return false;
+  }
+
+  /// Profile - calculate a unique identifier for this MDNode to collapse
+  /// duplicates
+  void Profile(FoldingSetNodeID &ID) const;
+
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+    llvm_unreachable("This should never be called because MDNodes have no ops");
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const MDNode *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDNodeVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// WeakMetadataVH - a weak value handle for metadata.
+class WeakMetadataVH : public WeakVH {
+public:
+  WeakMetadataVH() : WeakVH() {}
+  WeakMetadataVH(MetadataBase *M) : WeakVH(M) {}
+  WeakMetadataVH(const WeakMetadataVH &RHS) : WeakVH(RHS) {}
+  
+  operator Value*() const {
+    llvm_unreachable("WeakMetadataVH only handles Metadata");
+  }
+
+  operator MetadataBase*() const {
+   return dyn_cast_or_null<MetadataBase>(getValPtr());
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// NamedMDNode - a tuple of other metadata. 
+/// NamedMDNode is always named. All NamedMDNode element has a type of metadata.
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class NamedMDNode : public MetadataBase, public ilist_node<NamedMDNode> {
+  friend class SymbolTableListTraits<NamedMDNode, Module>;
+  friend class LLVMContextImpl;
+
+  NamedMDNode(const NamedMDNode &);      // DO NOT IMPLEMENT
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  // getNumOperands - Make this only available for private uses.
+  unsigned getNumOperands() { return User::getNumOperands();  }
+
+  Module *Parent;
+  SmallVector<WeakMetadataVH, 4> Node;
+  typedef SmallVectorImpl<WeakMetadataVH>::iterator elem_iterator;
+
+protected:
+  explicit NamedMDNode(LLVMContext &C, const Twine &N, MetadataBase*const* Vals, 
+                       unsigned NumVals, Module *M = 0);
+public:
+  // Do not allocate any space for operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  static NamedMDNode *Create(LLVMContext &C, const Twine &N, 
+                             MetadataBase*const*MDs, 
+                             unsigned NumMDs, Module *M = 0) {
+    return new NamedMDNode(C, N, MDs, NumMDs, M);
+  }
+
+  static NamedMDNode *Create(const NamedMDNode *NMD, Module *M = 0);
+
+  /// eraseFromParent - Drop all references and remove the node from parent
+  /// module.
+  void eraseFromParent();
+
+  /// dropAllReferences - Remove all uses and clear node vector.
+  void dropAllReferences();
+
+  /// ~NamedMDNode - Destroy NamedMDNode.
+  ~NamedMDNode();
+
+  /// getParent - Get the module that holds this named metadata collection.
+  inline Module *getParent() { return Parent; }
+  inline const Module *getParent() const { return Parent; }
+  void setParent(Module *M) { Parent = M; }
+
+  /// getElement - Return specified element.
+  MetadataBase *getElement(unsigned i) const {
+    assert (getNumElements() > i && "Invalid element number!");
+    return Node[i];
+  }
+
+  /// getNumElements - Return number of NamedMDNode elements.
+  unsigned getNumElements() const {
+    return Node.size();
+  }
+
+  /// addElement - Add metadata element.
+  void addElement(MetadataBase *M) {
+    resizeOperands(0);
+    OperandList[NumOperands++] = M;
+    Node.push_back(WeakMetadataVH(M));
+  }
+
+  typedef SmallVectorImpl<WeakMetadataVH>::const_iterator const_elem_iterator;
+  bool elem_empty() const                { return Node.empty(); }
+  const_elem_iterator elem_begin() const { return Node.begin(); }
+  const_elem_iterator elem_end() const   { return Node.end();   }
+  elem_iterator elem_begin()             { return Node.begin(); }
+  elem_iterator elem_end()               { return Node.end();   }
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  This always returns false because getNullValue will never
+  /// produce metadata.
+  virtual bool isNullValue() const {
+    return false;
+  }
+
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+    llvm_unreachable(
+                "This should never be called because NamedMDNodes have no ops");
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const NamedMDNode *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == NamedMDNodeVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// MetadataContext -
+/// MetadataContext handles uniquing and assignment of IDs for custom metadata
+/// types. Custom metadata handler names do not contain spaces. And the name
+/// must start with an alphabet. The regular expression used to check name
+/// is [a-zA-Z$._][a-zA-Z$._0-9]*
+class MetadataContext {
+public:
+  typedef std::pair<unsigned, WeakVH> MDPairTy;
+  typedef SmallVector<MDPairTy, 2> MDMapTy;
+  typedef DenseMap<const Instruction *, MDMapTy> MDStoreTy;
+  friend class BitcodeReader;
+private:
+
+  /// MetadataStore - Collection of metadata used in this context.
+  MDStoreTy MetadataStore;
+
+  /// MDHandlerNames - Map to hold metadata handler names.
+  StringMap<unsigned> MDHandlerNames;
+
+public:
+  /// RegisterMDKind - Register a new metadata kind and return its ID.
+  /// A metadata kind can be registered only once. 
+  unsigned RegisterMDKind(const char *Name);
+
+  /// getMDKind - Return metadata kind. If the requested metadata kind
+  /// is not registered then return 0.
+  unsigned getMDKind(const char *Name);
+
+  /// validName - Return true if Name is a valid custom metadata handler name.
+  bool validName(const char *Name);
+
+  /// getMD - Get the metadata of given kind attached with an Instruction.
+  /// If the metadata is not found then return 0.
+  MDNode *getMD(unsigned Kind, const Instruction *Inst);
+
+  /// getMDs - Get the metadata attached with an Instruction.
+  const MDMapTy *getMDs(const Instruction *Inst);
+
+  /// addMD - Attach the metadata of given kind with an Instruction.
+  void addMD(unsigned Kind, MDNode *Node, Instruction *Inst);
+  
+  /// removeMD - Remove metadata of given kind attached with an instuction.
+  void removeMD(unsigned Kind, Instruction *Inst);
+  
+  /// removeMDs - Remove all metadata attached with an instruction.
+  void removeMDs(const Instruction *Inst);
+
+  /// copyMD - If metadata is attached with Instruction In1 then attach
+  /// the same metadata to In2.
+  void copyMD(Instruction *In1, Instruction *In2);
+
+  /// getHandlerNames - Get handler names. This is used by bitcode
+  /// writer.
+  const StringMap<unsigned> *getHandlerNames();
+
+  /// ValueIsDeleted - This handler is used to update metadata store
+  /// when a value is deleted.
+  void ValueIsDeleted(const Value *V) {}
+  void ValueIsDeleted(const Instruction *Inst) {
+    removeMDs(Inst);
+  }
+  void ValueIsRAUWd(Value *V1, Value *V2);
+
+  /// ValueIsCloned - This handler is used to update metadata store
+  /// when In1 is cloned to create In2.
+  void ValueIsCloned(const Instruction *In1, Instruction *In2);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 7a139cc1e0a9..501625df7a3d 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -18,6 +18,7 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
+#include "llvm/Metadata.h"
 #include "llvm/Support/DataTypes.h"
 #include <vector>
 
@@ -56,6 +57,21 @@ template<> struct ilist_traits<GlobalAlias>
   static GlobalAlias *createSentinel();
   static void destroySentinel(GlobalAlias *GA) { delete GA; }
 };
+template<> struct ilist_traits<NamedMDNode>
+  : public SymbolTableListTraits<NamedMDNode, Module> {
+  // createSentinel is used to get hold of a node that marks the end of
+  // the list...
+  NamedMDNode *createSentinel() const {
+    return static_cast<NamedMDNode*>(&Sentinel);
+  }
+  static void destroySentinel(NamedMDNode*) {}
+
+  NamedMDNode *provideInitialHead() const { return createSentinel(); }
+  NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
+  static void noteHead(NamedMDNode*, NamedMDNode*) {}
+private:
+  mutable ilist_node<NamedMDNode> Sentinel;
+};
 
 /// A Module instance is used to store all the information related to an
 /// LLVM module. Modules are the top level container of all other LLVM
@@ -78,25 +94,31 @@ public:
   typedef iplist<Function> FunctionListType;
   /// The type for the list of aliases.
   typedef iplist<GlobalAlias> AliasListType;
+  /// The type for the list of named metadata.
+  typedef iplist<NamedMDNode> NamedMDListType;
 
   /// The type for the list of dependent libraries.
   typedef std::vector<std::string> LibraryListType;
 
   /// The Global Variable iterator.
-  typedef GlobalListType::iterator                     global_iterator;
+  typedef GlobalListType::iterator                      global_iterator;
   /// The Global Variable constant iterator.
-  typedef GlobalListType::const_iterator         const_global_iterator;
+  typedef GlobalListType::const_iterator          const_global_iterator;
 
   /// The Function iterators.
-  typedef FunctionListType::iterator                          iterator;
+  typedef FunctionListType::iterator                           iterator;
   /// The Function constant iterator
-  typedef FunctionListType::const_iterator              const_iterator;
+  typedef FunctionListType::const_iterator               const_iterator;
 
   /// The Global Alias iterators.
-  typedef AliasListType::iterator                       alias_iterator;
+  typedef AliasListType::iterator                        alias_iterator;
   /// The Global Alias constant iterator
-  typedef AliasListType::const_iterator           const_alias_iterator;
+  typedef AliasListType::const_iterator            const_alias_iterator;
 
+  /// The named metadata iterators.
+  typedef NamedMDListType::iterator             named_metadata_iterator;
+  /// The named metadata constant interators.
+  typedef NamedMDListType::const_iterator const_named_metadata_iterator;
   /// The Library list iterator.
   typedef LibraryListType::const_iterator lib_iterator;
 
@@ -110,12 +132,13 @@ public:
 /// @name Member Variables
 /// @{
 private:
-  LLVMContext& Context;    ///< The LLVMContext from which types and
+  LLVMContext& Context;          ///< The LLVMContext from which types and
                                  ///< constants are allocated.
   GlobalListType GlobalList;     ///< The Global Variables in the module
   FunctionListType FunctionList; ///< The Functions in the module
   AliasListType AliasList;       ///< The Aliases in the module
   LibraryListType LibraryList;   ///< The Libraries needed by the module
+  NamedMDListType NamedMDList;   ///< The named metadata in the module
   std::string GlobalScopeAsm;    ///< Inline Asm at global scope.
   ValueSymbolTable *ValSymTab;   ///< Symbol table for values
   TypeSymbolTable *TypeSymTab;   ///< Symbol table for types
@@ -131,7 +154,7 @@ private:
 public:
   /// The Module constructor. Note that there is no default constructor. You
   /// must provide a name for the module upon construction.
-  explicit Module(const std::string &ModuleID, LLVMContext& C);
+  explicit Module(const StringRef &ModuleID, LLVMContext& C);
   /// The module destructor. This will dropAllReferences.
   ~Module();
 
@@ -146,7 +169,7 @@ public:
   /// Get the data layout string for the module's target platform.  This encodes
   /// the type sizes and alignments expected by this module.
   /// @returns the data layout as a string
-  const std::string& getDataLayout() const { return DataLayout; }
+  const std::string &getDataLayout() const { return DataLayout; }
 
   /// Get the target triple which is a string describing the target host.
   /// @returns a string containing the target triple.
@@ -173,20 +196,20 @@ public:
 public:
 
   /// Set the module identifier.
-  void setModuleIdentifier(const std::string &ID) { ModuleID = ID; }
+  void setModuleIdentifier(const StringRef &ID) { ModuleID = ID; }
 
   /// Set the data layout
-  void setDataLayout(const std::string& DL) { DataLayout = DL; }
+  void setDataLayout(const StringRef &DL) { DataLayout = DL; }
 
   /// Set the target triple.
-  void setTargetTriple(const std::string &T) { TargetTriple = T; }
+  void setTargetTriple(const StringRef &T) { TargetTriple = T; }
 
   /// Set the module-scope inline assembly blocks.
-  void setModuleInlineAsm(const std::string &Asm) { GlobalScopeAsm = Asm; }
+  void setModuleInlineAsm(const StringRef &Asm) { GlobalScopeAsm = Asm; }
 
   /// Append to the module-scope inline assembly blocks, automatically
   /// appending a newline to the end.
-  void appendModuleInlineAsm(const std::string &Asm) {
+  void appendModuleInlineAsm(const StringRef &Asm) {
     GlobalScopeAsm += Asm;
     GlobalScopeAsm += '\n';
   }
@@ -198,8 +221,7 @@ public:
   /// getNamedValue - Return the first global value in the module with
   /// the specified name, of arbitrary type.  This method returns null
   /// if a global with the specified name is not found.
-  GlobalValue *getNamedValue(const std::string &Name) const;
-  GlobalValue *getNamedValue(const char *Name) const;
+  GlobalValue *getNamedValue(const StringRef &Name) const;
 
 /// @}
 /// @name Function Accessors
@@ -214,10 +236,10 @@ public:
   ///      the existing function.
   ///   4. Finally, the function exists but has the wrong prototype: return the
   ///      function with a constantexpr cast to the right prototype.
-  Constant *getOrInsertFunction(const std::string &Name, const FunctionType *T,
+  Constant *getOrInsertFunction(const StringRef &Name, const FunctionType *T,
                                 AttrListPtr AttributeList);
 
-  Constant *getOrInsertFunction(const std::string &Name, const FunctionType *T);
+  Constant *getOrInsertFunction(const StringRef &Name, const FunctionType *T);
 
   /// getOrInsertFunction - Look up the specified function in the module symbol
   /// table.  If it does not exist, add a prototype for the function and return
@@ -226,21 +248,20 @@ public:
   /// named function has a different type.  This version of the method takes a
   /// null terminated list of function arguments, which makes it easier for
   /// clients to use.
-  Constant *getOrInsertFunction(const std::string &Name,
+  Constant *getOrInsertFunction(const StringRef &Name,
                                 AttrListPtr AttributeList,
                                 const Type *RetTy, ...)  END_WITH_NULL;
 
-  Constant *getOrInsertFunction(const std::string &Name, const Type *RetTy, ...)
+  Constant *getOrInsertFunction(const StringRef &Name, const Type *RetTy, ...)
     END_WITH_NULL;
 
-  Constant *getOrInsertTargetIntrinsic(const std::string &Name,
+  Constant *getOrInsertTargetIntrinsic(const StringRef &Name,
                                        const FunctionType *Ty,
                                        AttrListPtr AttributeList);
   
   /// getFunction - Look up the specified function in the module symbol table.
   /// If it does not exist, return null.
-  Function *getFunction(const std::string &Name) const;
-  Function *getFunction(const char *Name) const;
+  Function *getFunction(const StringRef &Name) const;
 
 /// @}
 /// @name Global Variable Accessors
@@ -250,13 +271,13 @@ public:
   /// symbol table.  If it does not exist, return null. If AllowInternal is set
   /// to true, this function will return types that have InternalLinkage. By
   /// default, these types are not returned.
-  GlobalVariable *getGlobalVariable(const std::string &Name,
+  GlobalVariable *getGlobalVariable(const StringRef &Name,
                                     bool AllowInternal = false) const;
 
   /// getNamedGlobal - Return the first global variable in the module with the
   /// specified name, of arbitrary type.  This method returns null if a global
   /// with the specified name is not found.
-  GlobalVariable *getNamedGlobal(const std::string &Name) const {
+  GlobalVariable *getNamedGlobal(const StringRef &Name) const {
     return getGlobalVariable(Name, true);
   }
 
@@ -267,7 +288,7 @@ public:
   ///      with a constantexpr cast to the right type.
   ///   3. Finally, if the existing global is the correct delclaration, return
   ///      the existing global.
-  Constant *getOrInsertGlobal(const std::string &Name, const Type *Ty);
+  Constant *getOrInsertGlobal(const StringRef &Name, const Type *Ty);
 
 /// @}
 /// @name Global Alias Accessors
@@ -276,7 +297,21 @@ public:
   /// getNamedAlias - Return the first global alias in the module with the
   /// specified name, of arbitrary type.  This method returns null if a global
   /// with the specified name is not found.
-  GlobalAlias *getNamedAlias(const std::string &Name) const;
+  GlobalAlias *getNamedAlias(const StringRef &Name) const;
+
+/// @}
+/// @name Named Metadata Accessors
+/// @{
+public:
+  /// getNamedMetadata - Return the first NamedMDNode in the module with the
+  /// specified name. This method returns null if a NamedMDNode with the 
+  /// specified name is not found.
+  NamedMDNode *getNamedMetadata(const StringRef &Name) const;
+
+  /// getOrInsertNamedMetadata - Return the first named MDNode in the module 
+  /// with the specified name. This method returns a new NamedMDNode if a 
+  /// NamedMDNode with the specified name is not found.
+  NamedMDNode *getOrInsertNamedMetadata(const StringRef &Name);
 
 /// @}
 /// @name Type Accessors
@@ -285,7 +320,7 @@ public:
   /// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
   /// there is already an entry for this name, true is returned and the symbol
   /// table is not modified.
-  bool addTypeName(const std::string &Name, const Type *Ty);
+  bool addTypeName(const StringRef &Name, const Type *Ty);
 
   /// getTypeName - If there is at least one entry in the symbol table for the
   /// specified type, return it.
@@ -293,7 +328,7 @@ public:
 
   /// getTypeByName - Return the type with the specified name in this module, or
   /// null if there is none by that name.
-  const Type *getTypeByName(const std::string &Name) const;
+  const Type *getTypeByName(const StringRef &Name) const;
 
 /// @}
 /// @name Direct access to the globals list, functions list, and symbol table
@@ -320,6 +355,13 @@ public:
   static iplist<GlobalAlias> Module::*getSublistAccess(GlobalAlias*) {
     return &Module::AliasList;
   }
+  /// Get the Module's list of named metadata (constant).
+  const NamedMDListType  &getNamedMDList() const      { return NamedMDList; }
+  /// Get the Module's list of named metadata.
+  NamedMDListType  &getNamedMDList()                  { return NamedMDList; }
+  static iplist<NamedMDNode> Module::*getSublistAccess(NamedMDNode *) {
+    return &Module::NamedMDList;
+  }
   /// Get the symbol table of global variable and function identifiers
   const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
   /// Get the Module's symbol table of global variable and function identifiers.
@@ -372,9 +414,9 @@ public:
   /// @brief Returns the number of items in the list of libraries.
   inline size_t       lib_size()  const { return LibraryList.size();  }
   /// @brief Add a library to the list of dependent libraries
-  void addLibrary(const std::string& Lib);
+  void addLibrary(const StringRef &Lib);
   /// @brief Remove a library from the list of dependent libraries
-  void removeLibrary(const std::string& Lib);
+  void removeLibrary(const StringRef &Lib);
   /// @brief Get all the libraries
   inline const LibraryListType& getLibraries() const { return LibraryList; }
 
@@ -390,18 +432,42 @@ public:
   alias_iterator       alias_end  ()            { return AliasList.end();   }
   /// Get a constant iterator to the last alias.
   const_alias_iterator alias_end  () const      { return AliasList.end();   }
-  /// Determine how many functions are in the Module's list of aliases.
+  /// Determine how many aliases are in the Module's list of aliases.
   size_t               alias_size () const      { return AliasList.size();  }
   /// Determine if the list of aliases is empty.
   bool                 alias_empty() const      { return AliasList.empty(); }
 
+
+/// @}
+/// @name Named Metadata Iteration
+/// @{
+public:
+  /// Get an iterator to the first named metadata.
+  named_metadata_iterator       named_metadata_begin()            
+                                                { return NamedMDList.begin(); }
+  /// Get a constant iterator to the first named metadata.
+  const_named_metadata_iterator named_metadata_begin() const      
+                                                { return NamedMDList.begin(); }
+  /// Get an iterator to the last named metadata.
+  named_metadata_iterator       named_metadata_end  ()            
+                                                { return NamedMDList.end();   }
+  /// Get a constant iterator to the last named metadata.
+  const_named_metadata_iterator named_metadata_end  () const      
+                                                { return NamedMDList.end();   }
+  /// Determine how many NamedMDNodes are in the Module's list of named metadata.
+  size_t                        named_metadata_size () const      
+                                                { return NamedMDList.size();  }
+  /// Determine if the list of named metadata is empty.
+  bool                          named_metadata_empty() const      
+                                                { return NamedMDList.empty(); }
+
+
 /// @}
 /// @name Utility functions for printing and dumping Module objects
 /// @{
 public:
   /// Print the module to an output stream with AssemblyAnnotationWriter.
   void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const;
-  void print(std::ostream &OS, AssemblyAnnotationWriter *AAW) const;
   
   /// Dump the module to stderr (for debugging).
   void dump() const;
@@ -415,11 +481,7 @@ public:
 /// @}
 };
 
-/// An iostream inserter for modules.
-inline std::ostream &operator<<(std::ostream &O, const Module &M) {
-  M.print(O, 0);
-  return O;
-}
+/// An raw_ostream inserter for modules.
 inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
   M.print(O, 0);
   return O;
diff --git a/include/llvm/OperandTraits.h b/include/llvm/OperandTraits.h
index 83c1025c07b7..7c879c88f13b 100644
--- a/include/llvm/OperandTraits.h
+++ b/include/llvm/OperandTraits.h
@@ -44,11 +44,10 @@ struct FixedNumOperandTraits {
   };
   template <class U>
   struct Layout {
-    struct overlay : prefix, U {
+    struct overlay : public prefix, public U {
       overlay(); // DO NOT IMPLEMENT
     };
   };
-  static inline void *allocate(unsigned); // FIXME
 };
 
 //===----------------------------------------------------------------------===//
@@ -56,7 +55,7 @@ struct FixedNumOperandTraits {
 //===----------------------------------------------------------------------===//
 
 template <unsigned ARITY = 1>
-struct OptionalOperandTraits : FixedNumOperandTraits<ARITY> {
+struct OptionalOperandTraits : public FixedNumOperandTraits<ARITY> {
   static unsigned operands(const User *U) {
     return U->getNumOperands();
   }
@@ -81,7 +80,6 @@ struct VariadicOperandTraits {
   static unsigned operands(const User *U) {
     return U->getNumOperands();
   }
-  static inline void *allocate(unsigned); // FIXME
 };
 
 //===----------------------------------------------------------------------===//
@@ -109,7 +107,6 @@ struct HungoffOperandTraits {
   static unsigned operands(const User *U) {
     return U->getNumOperands();
   }
-  static inline void *allocate(unsigned); // FIXME
 };
 
 /// Macro for generating in-class operand accessor declarations.
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
new file mode 100644
index 000000000000..2b5cc57e75dd
--- /dev/null
+++ b/include/llvm/Operator.h
@@ -0,0 +1,306 @@
+//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various classes for working with Instructions and
+// ConstantExprs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPERATOR_H
+#define LLVM_OPERATOR_H
+
+#include "llvm/Instruction.h"
+#include "llvm/Constants.h"
+
+namespace llvm {
+
+class GetElementPtrInst;
+class BinaryOperator;
+class ConstantExpr;
+
+/// Operator - This is a utility class that provides an abstraction for the
+/// common functionality between Instructions and ConstantExprs.
+///
+class Operator : public User {
+private:
+  // Do not implement any of these. The Operator class is intended to be used
+  // as a utility, and is never itself instantiated.
+  void *operator new(size_t, unsigned);
+  void *operator new(size_t s);
+  Operator();
+  ~Operator();
+
+public:
+  /// getOpcode - Return the opcode for this Instruction or ConstantExpr.
+  ///
+  unsigned getOpcode() const {
+    if (const Instruction *I = dyn_cast<Instruction>(this))
+      return I->getOpcode();
+    return cast<ConstantExpr>(this)->getOpcode();
+  }
+
+  /// getOpcode - If V is an Instruction or ConstantExpr, return its
+  /// opcode. Otherwise return UserOp1.
+  ///
+  static unsigned getOpcode(const Value *V) {
+    if (const Instruction *I = dyn_cast<Instruction>(V))
+      return I->getOpcode();
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return CE->getOpcode();
+    return Instruction::UserOp1;
+  }
+
+  static inline bool classof(const Operator *) { return true; }
+  static inline bool classof(const Instruction *I) { return true; }
+  static inline bool classof(const ConstantExpr *I) { return true; }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) || isa<ConstantExpr>(V);
+  }
+};
+
+/// OverflowingBinaryOperator - Utility class for integer arithmetic operators
+/// which may exhibit overflow - Add, Sub, and Mul. It does not include SDiv,
+/// despite that operator having the potential for overflow.
+///
+class OverflowingBinaryOperator : public Operator {
+public:
+  enum {
+    NoUnsignedWrap = (1 << 0),
+    NoSignedWrap   = (1 << 1)
+  };
+
+private:
+  ~OverflowingBinaryOperator(); // do not implement
+
+  friend class BinaryOperator;
+  friend class ConstantExpr;
+  void setHasNoUnsignedWrap(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
+  }
+  void setHasNoSignedWrap(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
+  }
+
+public:
+  /// hasNoUnsignedWrap - Test whether this operation is known to never
+  /// undergo unsigned overflow, aka the nuw property.
+  bool hasNoUnsignedWrap() const {
+    return SubclassOptionalData & NoUnsignedWrap;
+  }
+
+  /// hasNoSignedWrap - Test whether this operation is known to never
+  /// undergo signed overflow, aka the nsw property.
+  bool hasNoSignedWrap() const {
+    return SubclassOptionalData & NoSignedWrap;
+  }
+
+  static inline bool classof(const OverflowingBinaryOperator *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Add ||
+           I->getOpcode() == Instruction::Sub ||
+           I->getOpcode() == Instruction::Mul;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::Add ||
+           CE->getOpcode() == Instruction::Sub ||
+           CE->getOpcode() == Instruction::Mul;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// AddOperator - Utility class for integer addition operators.
+///
+class AddOperator : public OverflowingBinaryOperator {
+  ~AddOperator(); // do not implement
+public:
+  static inline bool classof(const AddOperator *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Add;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::Add;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// SubOperator - Utility class for integer subtraction operators.
+///
+class SubOperator : public OverflowingBinaryOperator {
+  ~SubOperator(); // do not implement
+public:
+  static inline bool classof(const SubOperator *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Sub;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::Sub;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// MulOperator - Utility class for integer multiplication operators.
+///
+class MulOperator : public OverflowingBinaryOperator {
+  ~MulOperator(); // do not implement
+public:
+  static inline bool classof(const MulOperator *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Mul;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::Mul;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// SDivOperator - An Operator with opcode Instruction::SDiv.
+///
+class SDivOperator : public Operator {
+public:
+  enum {
+    IsExact = (1 << 0)
+  };
+
+private:
+  ~SDivOperator(); // do not implement
+
+  friend class BinaryOperator;
+  friend class ConstantExpr;
+  void setIsExact(bool B) {
+    SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
+  }
+
+public:
+  /// isExact - Test whether this division is known to be exact, with
+  /// zero remainder.
+  bool isExact() const {
+    return SubclassOptionalData & IsExact;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SDivOperator *) { return true; }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::SDiv;
+  }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::SDiv;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+class GEPOperator : public Operator {
+  enum {
+    IsInBounds = (1 << 0)
+  };
+
+  ~GEPOperator(); // do not implement
+
+  friend class GetElementPtrInst;
+  friend class ConstantExpr;
+  void setIsInBounds(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
+  }
+
+public:
+  /// isInBounds - Test whether this is an inbounds GEP, as defined
+  /// by LangRef.html.
+  bool isInBounds() const {
+    return SubclassOptionalData & IsInBounds;
+  }
+
+  inline op_iterator       idx_begin()       { return op_begin()+1; }
+  inline const_op_iterator idx_begin() const { return op_begin()+1; }
+  inline op_iterator       idx_end()         { return op_end(); }
+  inline const_op_iterator idx_end()   const { return op_end(); }
+
+  Value *getPointerOperand() {
+    return getOperand(0);
+  }
+  const Value *getPointerOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getPointerOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  /// getPointerOperandType - Method to return the pointer operand as a
+  /// PointerType.
+  const PointerType *getPointerOperandType() const {
+    return reinterpret_cast<const PointerType*>(getPointerOperand()->getType());
+  }
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return getNumOperands() - 1;
+  }
+
+  bool hasIndices() const {
+    return getNumOperands() > 1;
+  }
+
+  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
+  /// zeros.  If so, the result pointer and the first operand have the same
+  /// value, just potentially different types.
+  bool hasAllZeroIndices() const {
+    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
+      if (Constant *C = dyn_cast<Constant>(I))
+        if (C->isNullValue())
+          continue;
+      return false;
+    }
+    return true;
+  }
+
+  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
+  /// constant integers.  If so, the result pointer and the first operand have
+  /// a constant offset between them.
+  bool hasAllConstantIndices() const {
+    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
+      if (!isa<ConstantInt>(I))
+        return false;
+    }
+    return true;
+  }
+  
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const GEPOperator *) { return true; }
+  static inline bool classof(const GetElementPtrInst *) { return true; }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::GetElementPtr;
+  }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::GetElementPtr;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index eea99e028dd8..eb4c92281c9b 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -29,11 +29,8 @@
 #ifndef LLVM_PASS_H
 #define LLVM_PASS_H
 
-#include "llvm/Module.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Streams.h"
 #include <cassert>
-#include <iosfwd>
 #include <utility>
 #include <vector>
 
@@ -48,7 +45,8 @@ class ImmutablePass;
 class PMStack;
 class AnalysisResolver;
 class PMDataManager;
-class LLVMContext;
+class raw_ostream;
+class StringRef;
 
 // AnalysisID - Use the PassInfo to identify a pass...
 typedef const PassInfo* AnalysisID;
@@ -78,9 +76,6 @@ class Pass {
   void operator=(const Pass&);  // DO NOT IMPLEMENT
   Pass(const Pass &);           // DO NOT IMPLEMENT
   
-protected:
-  LLVMContext* Context;
-  
 public:
   explicit Pass(intptr_t pid) : Resolver(0), PassID(pid) {
     assert(pid && "pid cannot be 0");
@@ -108,9 +103,8 @@ public:
   /// provide the Module* in case the analysis doesn't need it it can just be
   /// ignored.
   ///
-  virtual void print(std::ostream &O, const Module *M) const;
-  void print(std::ostream *O, const Module *M) const { if (O) print(*O, M); }
-  void dump() const; // dump - call print(std::cerr, 0);
+  virtual void print(raw_ostream &O, const Module *M) const;
+  void dump() const; // dump - Print to stderr.
 
   /// Each pass is responsible for assigning a pass manager to itself.
   /// PMS is the stack of available pass manager. 
@@ -171,6 +165,10 @@ public:
   // or null if it is not known.
   static const PassInfo *lookupPassInfo(intptr_t TI);
 
+  // lookupPassInfo - Return the pass info object for the pass with the given
+  // argument string, or null if it is not known.
+  static const PassInfo *lookupPassInfo(const StringRef &Arg);
+
   /// getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to
   /// get analysis information that might be around, for example to update it.
   /// This is different than getAnalysis in that it can fail (if the analysis
@@ -198,7 +196,7 @@ public:
   AnalysisType &getAnalysis() const; // Defined in PassAnalysisSupport.h
 
   template<typename AnalysisType>
-  AnalysisType &getAnalysis(Function &F); // Defined in PassanalysisSupport.h
+  AnalysisType &getAnalysis(Function &F); // Defined in PassAnalysisSupport.h
 
   template<typename AnalysisType>
   AnalysisType &getAnalysisID(const PassInfo *PI) const;
@@ -207,9 +205,6 @@ public:
   AnalysisType &getAnalysisID(const PassInfo *PI, Function &F);
 };
 
-inline std::ostream &operator<<(std::ostream &OS, const Pass &P) {
-  P.print(OS, 0); return OS;
-}
 
 //===----------------------------------------------------------------------===//
 /// ModulePass class - This class is used to implement unstructured
@@ -281,11 +276,8 @@ public:
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
-  virtual bool doInitialization(Module &M) {
-    Context = &M.getContext();
-    return false;
-  }
-
+  virtual bool doInitialization(Module &M) { return false; }
+  
   /// runOnFunction - Virtual method overriden by subclasses to do the
   /// per-function processing of the pass.
   ///
@@ -336,10 +328,7 @@ public:
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
-  virtual bool doInitialization(Module &M) { 
-    Context = &M.getContext();
-    return false;
-  }
+  virtual bool doInitialization(Module &M) { return false; }
 
   /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
   /// to do any necessary per-function initialization.
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index b09ba45e346d..f339481c1ede 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -24,6 +24,8 @@
 
 namespace llvm {
 
+class StringRef;
+
 // No need to include Pass.h, we are being included by it!
 
 //===----------------------------------------------------------------------===//
@@ -79,6 +81,9 @@ public:
     return *this;
   }
 
+  // addPreserved - Add the specified Pass class to the set of analyses
+  // preserved by this pass.
+  //
   template<class PassClass>
   AnalysisUsage &addPreserved() {
     assert(Pass::getClassPassInfo<PassClass>() && "Pass class not registered!");
@@ -86,6 +91,18 @@ public:
     return *this;
   }
 
+  // addPreserved - Add the Pass with the specified argument string to the set
+  // of analyses preserved by this pass. If no such Pass exists, do nothing.
+  // This can be useful when a pass is trivially preserved, but may not be
+  // linked in. Be careful about spelling!
+  //
+  AnalysisUsage &addPreserved(const StringRef &Arg) {
+    const PassInfo *PI = Pass::lookupPassInfo(Arg);
+    // If the pass exists, preserve it. Otherwise silently do nothing.
+    if (PI) Preserved.push_back(PI);
+    return *this;
+  }
+
   // setPreservesAll - Set by analyses that do not transform their input at all
   void setPreservesAll() { PreservesAll = true; }
   bool getPreservesAll() const { return PreservesAll; }
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index 1aa0d3a6fa42..5a8f55570a82 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -91,9 +91,11 @@
 #include "llvm/Support/PrettyStackTrace.h"
 
 namespace llvm {
+  class Module;
   class Pass;
+  class StringRef;
   class Value;
-  class Module;
+  class Timer;
 
 /// FunctionPassManager and PassManager, two top level managers, serve 
 /// as the public interface of pass manager infrastructure.
@@ -121,7 +123,7 @@ class PassManagerPrettyStackEntry : public PrettyStackTraceEntry {
   Value *V;
   Module *M;
 public:
-  PassManagerPrettyStackEntry(Pass *p)
+  explicit PassManagerPrettyStackEntry(Pass *p)
     : P(p), V(0), M(0) {}  // When P is releaseMemory'd.
   PassManagerPrettyStackEntry(Pass *p, Value &v)
     : P(p), V(&v), M(0) {} // When P is run on V
@@ -278,14 +280,16 @@ public:
   /// verifyPreservedAnalysis -- Verify analysis presreved by pass P.
   void verifyPreservedAnalysis(Pass *P);
 
-  /// verifyDomInfo -- Verify dominator information if it is available.
-  void verifyDomInfo(Pass &P, Function &F);
-
   /// Remove Analysis that is not preserved by the pass
   void removeNotPreservedAnalysis(Pass *P);
   
-  /// Remove dead passes
-  void removeDeadPasses(Pass *P, const char *Msg, enum PassDebuggingString);
+  /// Remove dead passes used by P.
+  void removeDeadPasses(Pass *P, const StringRef &Msg, 
+                        enum PassDebuggingString);
+
+  /// Remove P.
+  void freePass(Pass *P, const StringRef &Msg, 
+                enum PassDebuggingString);
 
   /// Add pass P into the PassVector. Update 
   /// AvailableAnalysis appropriately if ProcessAnalysis is true.
@@ -340,7 +344,7 @@ public:
   void dumpLastUses(Pass *P, unsigned Offset) const;
   void dumpPassArguments() const;
   void dumpPassInfo(Pass *P, enum PassDebuggingString S1,
-                    enum PassDebuggingString S2, const char *Msg);
+                    enum PassDebuggingString S2, const StringRef &Msg);
   void dumpRequiredSet(const Pass *P) const;
   void dumpPreservedSet(const Pass *P) const;
 
@@ -378,8 +382,13 @@ protected:
   // then PMT_Last active pass mangers.
   std::map<AnalysisID, Pass *> *InheritedAnalysis[PMT_Last];
 
+  
+  /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+  /// or higher is specified.
+  bool isPassDebuggingExecutionsOrMore() const;
+  
 private:
-  void dumpAnalysisUsage(const char *Msg, const Pass *P,
+  void dumpAnalysisUsage(const StringRef &Msg, const Pass *P,
                            const AnalysisUsage::VectorType &Set) const;
 
   // Set of available Analysis. This information is used while scheduling 
@@ -449,9 +458,9 @@ public:
   }
 };
 
-}
+extern Timer *StartPassTimer(Pass *);
+extern void StopPassTimer(Pass *, Timer *);
 
-extern void StartPassTimer(llvm::Pass *);
-extern void StopPassTimer(llvm::Pass *);
+}
 
 #endif
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index fe3ca520659c..b5e581a6f4e7 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -190,14 +190,11 @@ struct RegisterPass : public PassInfo {
 /// a nice name with the interface.
 ///
 class RegisterAGBase : public PassInfo {
-  PassInfo *InterfaceInfo;
-  const PassInfo *ImplementationInfo;
-  bool isDefaultImplementation;
 protected:
-  explicit RegisterAGBase(const char *Name,
-                          intptr_t InterfaceID,
-                          intptr_t PassID = 0,
-                          bool isDefault = false);
+  RegisterAGBase(const char *Name,
+                 intptr_t InterfaceID,
+                 intptr_t PassID = 0,
+                 bool isDefault = false);
 };
 
 template<typename Interface, bool Default = false>
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index c0414f970a29..4c848788c73d 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -15,6 +15,8 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
 #include <cstdlib>
 
 namespace llvm {
@@ -41,21 +43,104 @@ public:
   void PrintStats() const {}
 };
 
-/// BumpPtrAllocator - This allocator is useful for containers that need very
-/// simple memory allocation strategies.  In particular, this just keeps
+/// MemSlab - This structure lives at the beginning of every slab allocated by
+/// the bump allocator.
+class MemSlab {
+public:
+  size_t Size;
+  MemSlab *NextPtr;
+};
+
+/// SlabAllocator - This class can be used to parameterize the underlying
+/// allocation strategy for the bump allocator.  In particular, this is used
+/// by the JIT to allocate contiguous swathes of executable memory.  The
+/// interface uses MemSlab's instead of void *'s so that the allocator
+/// doesn't have to remember the size of the pointer it allocated.
+class SlabAllocator {
+public:
+  virtual ~SlabAllocator();
+  virtual MemSlab *Allocate(size_t Size) = 0;
+  virtual void Deallocate(MemSlab *Slab) = 0;
+};
+
+/// MallocSlabAllocator - The default slab allocator for the bump allocator
+/// is an adapter class for MallocAllocator that just forwards the method
+/// calls and translates the arguments.
+class MallocSlabAllocator : public SlabAllocator {
+  /// Allocator - The underlying allocator that we forward to.
+  ///
+  MallocAllocator Allocator;
+
+public:
+  MallocSlabAllocator() : Allocator() { }
+  virtual ~MallocSlabAllocator();
+  virtual MemSlab *Allocate(size_t Size);
+  virtual void Deallocate(MemSlab *Slab);
+};
+
+/// BumpPtrAllocator - This allocator is useful for containers that need
+/// very simple memory allocation strategies.  In particular, this just keeps
 /// allocating memory, and never deletes it until the entire block is dead. This
 /// makes allocation speedy, but must only be used when the trade-off is ok.
 class BumpPtrAllocator {
   BumpPtrAllocator(const BumpPtrAllocator &); // do not implement
   void operator=(const BumpPtrAllocator &);   // do not implement
 
-  void *TheMemory;
+  /// SlabSize - Allocate data into slabs of this size unless we get an
+  /// allocation above SizeThreshold.
+  size_t SlabSize;
+
+  /// SizeThreshold - For any allocation larger than this threshold, we should
+  /// allocate a separate slab.
+  size_t SizeThreshold;
+
+  /// Allocator - The underlying allocator we use to get slabs of memory.  This
+  /// defaults to MallocSlabAllocator, which wraps malloc, but it could be
+  /// changed to use a custom allocator.
+  SlabAllocator &Allocator;
+
+  /// CurSlab - The slab that we are currently allocating into.
+  ///
+  MemSlab *CurSlab;
+
+  /// CurPtr - The current pointer into the current slab.  This points to the
+  /// next free byte in the slab.
+  char *CurPtr;
+
+  /// End - The end of the current slab.
+  ///
+  char *End;
+
+  /// BytesAllocated - This field tracks how many bytes we've allocated, so
+  /// that we can compute how much space was wasted.
+  size_t BytesAllocated;
+
+  /// AlignPtr - Align Ptr to Alignment bytes, rounding up.  Alignment should
+  /// be a power of two.  This method rounds up, so AlignPtr(7, 4) == 8 and
+  /// AlignPtr(8, 4) == 8.
+  static char *AlignPtr(char *Ptr, size_t Alignment);
+
+  /// StartNewSlab - Allocate a new slab and move the bump pointers over into
+  /// the new slab.  Modifies CurPtr and End.
+  void StartNewSlab();
+
+  /// DeallocateSlabs - Deallocate all memory slabs after and including this
+  /// one.
+  void DeallocateSlabs(MemSlab *Slab);
+
+  static MallocSlabAllocator DefaultSlabAllocator;
+
 public:
-  BumpPtrAllocator();
+  BumpPtrAllocator(size_t size = 4096, size_t threshold = 4096,
+                   SlabAllocator &allocator = DefaultSlabAllocator);
   ~BumpPtrAllocator();
 
+  /// Reset - Deallocate all but the current slab and reset the current pointer
+  /// to the beginning of it, freeing all memory allocated so far.
   void Reset();
 
+  /// Allocate - Allocate space at the specified alignment.
+  ///
   void *Allocate(size_t Size, size_t Alignment);
 
   /// Allocate space, but do not construct, one object.
@@ -83,9 +168,11 @@ public:
 
   void Deallocate(const void * /*Ptr*/) {}
 
+  unsigned GetNumSlabs() const;
+
   void PrintStats() const;
 };
 
 }  // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_ALLOCATOR_H
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index b0b857bf0280..3a20696f05aa 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -18,17 +18,17 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/Function.h"
 #include "llvm/InstrTypes.h"
-#include "llvm/ADT/iterator.h"
 
 namespace llvm {
 
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 // BasicBlock pred_iterator definition
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 template <class _Ptr,  class _USE_iterator> // Predecessor Iterator
-class PredIterator : public forward_iterator<_Ptr, ptrdiff_t> {
-  typedef forward_iterator<_Ptr, ptrdiff_t> super;
+class PredIterator : public std::iterator<std::forward_iterator_tag,
+                                          _Ptr, ptrdiff_t> {
+  typedef std::iterator<std::forward_iterator_tag, _Ptr, ptrdiff_t> super;
   _USE_iterator It;
 public:
   typedef PredIterator<_Ptr,_USE_iterator> _Self;
@@ -80,15 +80,16 @@ inline pred_const_iterator pred_end(const BasicBlock *BB) {
 
 
 
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 // BasicBlock succ_iterator definition
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 template <class Term_, class BB_>           // Successor Iterator
-class SuccIterator : public bidirectional_iterator<BB_, ptrdiff_t> {
+class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
+                                          BB_, ptrdiff_t> {
   const Term_ Term;
   unsigned idx;
-  typedef bidirectional_iterator<BB_, ptrdiff_t> super;
+  typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t> super;
 public:
   typedef SuccIterator<Term_, BB_> _Self;
   typedef typename super::pointer pointer;
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index dc41590fb8a5..285b558afccb 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -26,6 +26,7 @@
 #include "llvm/Attributes.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/CallingConv.h"
 #include "llvm/Instruction.h"
 
 namespace llvm {
@@ -40,8 +41,6 @@ public:
   CallSite(CallInst *CI) : I(reinterpret_cast<Instruction*>(CI), true) {}
   CallSite(InvokeInst *II) : I(reinterpret_cast<Instruction*>(II), false) {}
   CallSite(Instruction *C);
-  CallSite(const CallSite &CS) : I(CS.I) {}
-  CallSite &operator=(const CallSite &CS) { I = CS.I; return *this; }
 
   bool operator==(const CallSite &CS) const { return I == CS.I; }
   bool operator!=(const CallSite &CS) const { return I != CS.I; }
@@ -63,8 +62,8 @@ public:
 
   /// getCallingConv/setCallingConv - get or set the calling convention of the
   /// call.
-  unsigned getCallingConv() const;
-  void setCallingConv(unsigned CC);
+  CallingConv::ID getCallingConv() const;
+  void setCallingConv(CallingConv::ID CC);
 
   /// getAttributes/setAttributes - get or set the parameter attributes of
   /// the call.
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 48988f8a6bb8..35fb29ec6cb3 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -235,7 +235,7 @@ inline typename cast_retty<X, Y>::ret_type dyn_cast_or_null(const Y &Val) {
 
 
 #ifdef DEBUG_CAST_OPERATORS
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 
 struct bar {
   bar() {}
@@ -251,7 +251,7 @@ struct foo {
 };
 
 template <> inline bool isa_impl<foo,bar>(const bar &Val) {
-  cerr << "Classof: " << &Val << "\n";
+  errs() << "Classof: " << &Val << "\n";
   return true;
 }
 
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 3ae50136e4a9..dc73979bb09b 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -21,18 +21,17 @@
 #define LLVM_SUPPORT_COMMANDLINE_H
 
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
 #include <cassert>
 #include <climits>
 #include <cstdarg>
-#include <string>
 #include <utility>
 #include <vector>
 
 namespace llvm {
-
+  
 /// cl Namespace - This namespace contains all of the command line option
 /// processing machinery.  It is intentionally a short name to make qualified
 /// usage concise.
@@ -68,7 +67,7 @@ void MarkOptionsChanged();
 // Flags permitted to be passed to command line arguments
 //
 
-enum NumOccurrences {          // Flags for the number of occurrences allowed
+enum NumOccurrencesFlag {      // Flags for the number of occurrences allowed
   Optional        = 0x01,      // Zero or One occurrence
   ZeroOrMore      = 0x02,      // Zero or more occurrences allowed
   Required        = 0x03,      // One occurrence required
@@ -143,8 +142,8 @@ class Option {
   // an argument.  Should return true if there was an error processing the
   // argument and the program should exit.
   //
-  virtual bool handleOccurrence(unsigned pos, const char *ArgName,
-                                const std::string &Arg) = 0;
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName,
+                                StringRef Arg) = 0;
 
   virtual enum ValueExpected getValueExpectedFlagDefault() const {
     return ValueOptional;
@@ -163,8 +162,8 @@ public:
   const char *HelpStr;    // The descriptive text message for --help
   const char *ValueStr;   // String describing what the value of this option is
 
-  inline enum NumOccurrences getNumOccurrencesFlag() const {
-    return static_cast<enum NumOccurrences>(Flags & OccurrencesMask);
+  inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
+    return static_cast<enum NumOccurrencesFlag>(Flags & OccurrencesMask);
   }
   inline enum ValueExpected getValueExpectedFlag() const {
     int VE = Flags & ValueMask;
@@ -198,7 +197,7 @@ public:
     Flags |= Flag;
   }
 
-  void setNumOccurrencesFlag(enum NumOccurrences Val) {
+  void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) {
     setFlag(Val, OccurrencesMask);
   }
   void setValueExpectedFlag(enum ValueExpected Val) { setFlag(Val, ValueMask); }
@@ -215,8 +214,7 @@ protected:
            getOptionHiddenFlag() != 0 && "Not all default flags specified!");
   }
 
-  inline void setNumAdditionalVals(unsigned n)
-  { AdditionalVals = n; }
+  inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
 public:
   // addArgument - Register this argument with the commandline system.
   //
@@ -232,15 +230,15 @@ public:
   //
   virtual void printOptionInfo(size_t GlobalWidth) const = 0;
 
-  virtual void getExtraOptionNames(std::vector<const char*> &) {}
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &) {}
 
-  // addOccurrence - Wrapper around handleOccurrence that enforces Flags
+  // addOccurrence - Wrapper around handleOccurrence that enforces Flags.
   //
-  bool addOccurrence(unsigned pos, const char *ArgName,
-                     const std::string &Value, bool MultiArg = false);
+  bool addOccurrence(unsigned pos, StringRef ArgName,
+                     StringRef Value, bool MultiArg = false);
 
   // Prints option name followed by message.  Always returns true.
-  bool error(std::string Message, const char *ArgName = 0);
+  bool error(const Twine &Message, StringRef ArgName = StringRef());
 
 public:
   inline int getNumOccurrences() const { return NumOccurrences; }
@@ -399,7 +397,7 @@ struct generic_parser_base {
     hasArgStr = O.hasArgStr();
   }
 
-  void getExtraOptionNames(std::vector<const char*> &OptionNames) {
+  void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
     // If there has been no argstr specified, that means that we need to add an
     // argument for every possible option.  This ensures that our options are
     // vectored to us.
@@ -458,9 +456,8 @@ public:
   }
 
   // parse - Return true on error.
-  bool parse(Option &O, const char *ArgName, const std::string &Arg,
-             DataType &V) {
-    std::string ArgVal;
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) {
+    StringRef ArgVal;
     if (hasArgStr)
       ArgVal = Arg;
     else
@@ -468,12 +465,12 @@ public:
 
     for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
          i != e; ++i)
-      if (ArgVal == Values[i].first) {
+      if (Values[i].first == ArgVal) {
         V = Values[i].second.first;
         return false;
       }
 
-    return O.error(": Cannot find option named '" + ArgVal + "'!");
+    return O.error("Cannot find option named '" + ArgVal + "'!");
   }
 
   /// addLiteralOption - Add an entry to the mapping table.
@@ -505,7 +502,7 @@ struct basic_parser_impl {  // non-template implementation of basic_parser<t>
     return ValueRequired;
   }
 
-  void getExtraOptionNames(std::vector<const char*> &) {}
+  void getExtraOptionNames(SmallVectorImpl<const char*> &) {}
 
   void initialize(Option &) {}
 
@@ -541,7 +538,7 @@ class parser<bool> : public basic_parser<bool> {
 public:
 
   // parse - Return true on error.
-  bool parse(Option &O, const char *ArgName, const std::string &Arg, bool &Val);
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, bool &Val);
 
   template <class Opt>
   void initialize(Opt &O) {
@@ -568,8 +565,7 @@ template<>
 class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
 public:
   // parse - Return true on error.
-  bool parse(Option &O, const char *ArgName, const std::string &Arg,
-             boolOrDefault &Val);
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, boolOrDefault &Val);
 
   enum ValueExpected getValueExpectedFlagDefault() const {
     return ValueOptional;
@@ -591,7 +587,7 @@ template<>
 class parser<int> : public basic_parser<int> {
 public:
   // parse - Return true on error.
-  bool parse(Option &O, const char *ArgName, const std::string &Arg, int &Val);
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, int &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "int"; }
@@ -610,7 +606,7 @@ template<>
 class parser<unsigned> : public basic_parser<unsigned> {
 public:
   // parse - Return true on error.
-  bool parse(Option &O, const char *AN, const std::string &Arg, unsigned &Val);
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "uint"; }
@@ -628,7 +624,7 @@ template<>
 class parser<double> : public basic_parser<double> {
 public:
   // parse - Return true on error.
-  bool parse(Option &O, const char *AN, const std::string &Arg, double &Val);
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, double &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "number"; }
@@ -646,7 +642,7 @@ template<>
 class parser<float> : public basic_parser<float> {
 public:
   // parse - Return true on error.
-  bool parse(Option &O, const char *AN, const std::string &Arg, float &Val);
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, float &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "number"; }
@@ -664,9 +660,8 @@ template<>
 class parser<std::string> : public basic_parser<std::string> {
 public:
   // parse - Return true on error.
-  bool parse(Option &, const char *, const std::string &Arg,
-             std::string &Value) {
-    Value = Arg;
+  bool parse(Option &, StringRef ArgName, StringRef Arg, std::string &Value) {
+    Value = Arg.str();
     return false;
   }
 
@@ -686,8 +681,7 @@ template<>
 class parser<char> : public basic_parser<char> {
 public:
   // parse - Return true on error.
-  bool parse(Option &, const char *, const std::string &Arg,
-             char &Value) {
+  bool parse(Option &, StringRef ArgName, StringRef Arg, char &Value) {
     Value = Arg[0];
     return false;
   }
@@ -726,8 +720,10 @@ template<> struct applicator<const char*> {
   static void opt(const char *Str, Opt &O) { O.setArgStr(Str); }
 };
 
-template<> struct applicator<NumOccurrences> {
-  static void opt(NumOccurrences NO, Option &O) { O.setNumOccurrencesFlag(NO); }
+template<> struct applicator<NumOccurrencesFlag> {
+  static void opt(NumOccurrencesFlag NO, Option &O) {
+    O.setNumOccurrencesFlag(NO);
+  }
 };
 template<> struct applicator<ValueExpected> {
   static void opt(ValueExpected VE, Option &O) { O.setValueExpectedFlag(VE); }
@@ -770,7 +766,7 @@ public:
 
   bool setLocation(Option &O, DataType &L) {
     if (Location)
-      return O.error(": cl::location(x) specified more than once!");
+      return O.error("cl::location(x) specified more than once!");
     Location = &L;
     return false;
   }
@@ -833,8 +829,8 @@ class opt : public Option,
                                is_class<DataType>::value> {
   ParserClass Parser;
 
-  virtual bool handleOccurrence(unsigned pos, const char *ArgName,
-                                const std::string &Arg) {
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName,
+                                StringRef Arg) {
     typename ParserClass::parser_data_type Val =
        typename ParserClass::parser_data_type();
     if (Parser.parse(*this, ArgName, Arg, Val))
@@ -847,7 +843,7 @@ class opt : public Option,
   virtual enum ValueExpected getValueExpectedFlagDefault() const {
     return Parser.getValueExpectedFlagDefault();
   }
-  virtual void getExtraOptionNames(std::vector<const char*> &OptionNames) {
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
     return Parser.getExtraOptionNames(OptionNames);
   }
 
@@ -964,7 +960,7 @@ public:
 
   bool setLocation(Option &O, StorageClass &L) {
     if (Location)
-      return O.error(": cl::location(x) specified more than once!");
+      return O.error("cl::location(x) specified more than once!");
     Location = &L;
     return false;
   }
@@ -1002,12 +998,11 @@ class list : public Option, public list_storage<DataType, Storage> {
   virtual enum ValueExpected getValueExpectedFlagDefault() const {
     return Parser.getValueExpectedFlagDefault();
   }
-  virtual void getExtraOptionNames(std::vector<const char*> &OptionNames) {
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
     return Parser.getExtraOptionNames(OptionNames);
   }
 
-  virtual bool handleOccurrence(unsigned pos, const char *ArgName,
-                                const std::string &Arg) {
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg){
     typename ParserClass::parser_data_type Val =
       typename ParserClass::parser_data_type();
     if (Parser.parse(*this, ArgName, Arg, Val))
@@ -1139,7 +1134,7 @@ public:
 
   bool setLocation(Option &O, unsigned &L) {
     if (Location)
-      return O.error(": cl::location(x) specified more than once!");
+      return O.error("cl::location(x) specified more than once!");
     Location = &L;
     return false;
   }
@@ -1202,12 +1197,11 @@ class bits : public Option, public bits_storage<DataType, Storage> {
   virtual enum ValueExpected getValueExpectedFlagDefault() const {
     return Parser.getValueExpectedFlagDefault();
   }
-  virtual void getExtraOptionNames(std::vector<const char*> &OptionNames) {
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
     return Parser.getExtraOptionNames(OptionNames);
   }
 
-  virtual bool handleOccurrence(unsigned pos, const char *ArgName,
-                                const std::string &Arg) {
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg){
     typename ParserClass::parser_data_type Val =
       typename ParserClass::parser_data_type();
     if (Parser.parse(*this, ArgName, Arg, Val))
@@ -1307,8 +1301,8 @@ public:
 
 class alias : public Option {
   Option *AliasFor;
-  virtual bool handleOccurrence(unsigned pos, const char * /*ArgName*/,
-                                const std::string &Arg) {
+  virtual bool handleOccurrence(unsigned pos, StringRef /*ArgName*/,
+                                StringRef Arg) {
     return AliasFor->handleOccurrence(pos, AliasFor->ArgStr, Arg);
   }
   // Handle printing stuff...
@@ -1317,15 +1311,15 @@ class alias : public Option {
 
   void done() {
     if (!hasArgStr())
-      error(": cl::alias must have argument name specified!");
+      error("cl::alias must have argument name specified!");
     if (AliasFor == 0)
-      error(": cl::alias must have an cl::aliasopt(option) specified!");
+      error("cl::alias must have an cl::aliasopt(option) specified!");
       addArgument();
   }
 public:
   void setAliasFor(Option &O) {
     if (AliasFor)
-      error(": cl::alias must only have one cl::aliasopt(...) specified!");
+      error("cl::alias must only have one cl::aliasopt(...) specified!");
     AliasFor = &O;
   }
 
@@ -1366,7 +1360,7 @@ struct aliasopt {
 
 // extrahelp - provide additional help at the end of the normal help
 // output. All occurrences of cl::extrahelp will be accumulated and
-// printed to std::cerr at the end of the regular help, just before
+// printed to stderr at the end of the regular help, just before
 // exit is called.
 struct extrahelp {
   const char * morehelp;
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 90292df38196..342a97d761ee 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -23,7 +23,7 @@
 #define VISIBILITY_HIDDEN
 #endif
 
-#if (__GNUC__ >= 4)
+#if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
 #define ATTRIBUTE_USED __attribute__((__used__))
 #else
 #define ATTRIBUTE_USED
@@ -56,4 +56,10 @@
 #define DISABLE_INLINE
 #endif
 
+#ifdef __GNUC__
+#define NORETURN __attribute__((noreturn))
+#else
+#define NORETURN
+#endif
+
 #endif
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index 35065a060866..99cb92078f36 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -21,9 +21,12 @@
 
 namespace llvm {
 
+class LLVMContext;
+
 /// ConstantFolder - Create constants with minimum, target independent, folding.
 class ConstantFolder {
 public:
+  explicit ConstantFolder(LLVMContext &) {}
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
@@ -32,12 +35,18 @@ public:
   Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getAdd(LHS, RHS);
   }
+  Constant *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getNSWAdd(LHS, RHS);
+  }
   Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFAdd(LHS, RHS);
   }
   Constant *CreateSub(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getSub(LHS, RHS);
   }
+  Constant *CreateNSWSub(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getNSWSub(LHS, RHS);
+  }
   Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFSub(LHS, RHS);
   }
@@ -53,6 +62,9 @@ public:
   Constant *CreateSDiv(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getSDiv(LHS, RHS);
   }
+  Constant *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getExactSDiv(LHS, RHS);
+  }
   Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFDiv(LHS, RHS);
   }
@@ -116,6 +128,15 @@ public:
     return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
   }
 
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList,
+                                        unsigned NumIdx) const {
+    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
+  }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                        unsigned NumIdx) const {
+    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
@@ -124,10 +145,16 @@ public:
                        const Type *DestTy) const {
     return ConstantExpr::getCast(Op, C, DestTy);
   }
+  Constant *CreatePointerCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getPointerCast(C, DestTy);
+  }
   Constant *CreateIntCast(Constant *C, const Type *DestTy,
                           bool isSigned) const {
     return ConstantExpr::getIntegerCast(C, DestTy, isSigned);
   }
+  Constant *CreateFPCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getFPCast(C, DestTy);
+  }
 
   Constant *CreateBitCast(Constant *C, const Type *DestTy) const {
     return CreateCast(Instruction::BitCast, C, DestTy);
@@ -138,6 +165,13 @@ public:
   Constant *CreatePtrToInt(Constant *C, const Type *DestTy) const {
     return CreateCast(Instruction::PtrToInt, C, DestTy);
   }
+  Constant *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getZExtOrBitCast(C, DestTy);
+  }
+  Constant *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getSExtOrBitCast(C, DestTy);
+  }
+
   Constant *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const {
     return ConstantExpr::getTruncOrBitCast(C, DestTy);
   }
@@ -154,14 +188,6 @@ public:
                        Constant *RHS) const {
     return ConstantExpr::getCompare(P, LHS, RHS);
   }
-  Constant *CreateVICmp(CmpInst::Predicate P, Constant *LHS,
-                        Constant *RHS) const {
-    return ConstantExpr::getCompare(P, LHS, RHS);
-  }
-  Constant *CreateVFCmp(CmpInst::Predicate P, Constant *LHS,
-                        Constant *RHS) const {
-    return ConstantExpr::getCompare(P, LHS, RHS);
-  }
 
   //===--------------------------------------------------------------------===//
   // Other Instructions
diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h
index 098fab5f98dd..e9c8c7cb2e14 100644
--- a/include/llvm/Support/ConstantRange.h
+++ b/include/llvm/Support/ConstantRange.h
@@ -24,7 +24,9 @@
 // [0, 0)     = {}       = Empty set
 // [255, 255) = {0..255} = Full Set
 //
-// Note that ConstantRange always keeps unsigned values.
+// Note that ConstantRange can be used to represent either signed or
+// unsigned ranges.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_CONSTANT_RANGE_H
@@ -35,11 +37,14 @@
 
 namespace llvm {
 
+/// ConstantRange - This class represents an range of values.
+///
 class ConstantRange {
   APInt Lower, Upper;
   static ConstantRange intersect1Wrapped(const ConstantRange &LHS,
                                          const ConstantRange &RHS);
- public:
+
+public:
   /// Initialize a full (the default) or empty set for the specified bit width.
   ///
   explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true);
@@ -53,6 +58,16 @@ class ConstantRange {
   /// assert out if the two APInt's are not the same bit width.
   ConstantRange(const APInt& Lower, const APInt& Upper);
 
+  /// makeICmpRegion - Produce the smallest range that contains all values that
+  /// might satisfy the comparison specified by Pred when compared to any value
+  /// contained within Other.
+  ///
+  /// Solves for range X in 'for all x in X, there exists a y in Y such that
+  /// icmp op x, y is true'. Every value that might make the comparison true
+  /// is included in the resulting range.
+  static ConstantRange makeICmpRegion(unsigned Pred,
+                                      const ConstantRange &Other);
+
   /// getLower - Return the lower value for this range...
   ///
   const APInt &getLower() const { return Lower; }
@@ -83,6 +98,10 @@ class ConstantRange {
   ///
   bool contains(const APInt &Val) const;
 
+  /// contains - Return true if the other range is a subset of this one.
+  ///
+  bool contains(const ConstantRange &CR) const;
+
   /// getSingleElement - If this set contains a single element, return it,
   /// otherwise return null.
   ///
@@ -134,21 +153,13 @@ class ConstantRange {
   ConstantRange subtract(const APInt &CI) const;
 
   /// intersectWith - Return the range that results from the intersection of
-  /// this range with another range.  The resultant range is pruned as much as
-  /// possible, but there may be cases where elements are included that are in
-  /// one of the sets but not the other.  For example: [100, 8) intersect [3,
-  /// 120) yields [3, 120)
-  ///
-  ConstantRange intersectWith(const ConstantRange &CR) const;
-
-  /// maximalIntersectWith - Return the range that results from the intersection
-  /// of this range with another range.  The resultant range is guaranteed to
+  /// this range with another range.  The resultant range is guaranteed to
   /// include all elements contained in both input ranges, and to have the
   /// smallest possible set size that does so.  Because there may be two
-  /// intersections with the same set size, A.maximalIntersectWith(B) might not
-  /// be equal to B.maximalIntersectWith(A).
+  /// intersections with the same set size, A.intersectWith(B) might not
+  /// be equal to B.intersectWith(A).
   ///
-  ConstantRange maximalIntersectWith(const ConstantRange &CR) const;
+  ConstantRange intersectWith(const ConstantRange &CR) const;
 
   /// unionWith - Return the range that results from the union of this range
   /// with another range.  The resultant range is guaranteed to include the
@@ -176,6 +187,28 @@ class ConstantRange {
   /// truncated to the specified type.
   ConstantRange truncate(uint32_t BitWidth) const;
 
+  /// add - Return a new range representing the possible values resulting
+  /// from an addition of a value in this range and a value in Other.
+  ConstantRange add(const ConstantRange &Other) const;
+
+  /// multiply - Return a new range representing the possible values resulting
+  /// from a multiplication of a value in this range and a value in Other.
+  /// TODO: This isn't fully implemented yet.
+  ConstantRange multiply(const ConstantRange &Other) const;
+
+  /// smax - Return a new range representing the possible values resulting
+  /// from a signed maximum of a value in this range and a value in Other.
+  ConstantRange smax(const ConstantRange &Other) const;
+
+  /// umax - Return a new range representing the possible values resulting
+  /// from an unsigned maximum of a value in this range and a value in Other.
+  ConstantRange umax(const ConstantRange &Other) const;
+
+  /// udiv - Return a new range representing the possible values resulting
+  /// from an unsigned division of a value in this range and a value in Other.
+  /// TODO: This isn't fully implemented yet.
+  ConstantRange udiv(const ConstantRange &Other) const;
+
   /// print - Print out the bounds to a stream...
   ///
   void print(raw_ostream &OS) const;
diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake
index 4d6fcc8e0974..ad210ed3b54d 100644
--- a/include/llvm/Support/DataTypes.h.cmake
+++ b/include/llvm/Support/DataTypes.h.cmake
@@ -1,22 +1,25 @@
-//===-- include/Support/DataTypes.h - Define fixed size types ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains definitions to figure out the size of _HOST_ data types.
-// This file is important because different host OS's define different macros,
-// which makes portability tough.  This file exports the following definitions:
-//
-//   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types
-//   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.
-//
-// No library is required when using these functinons.
-//
-//===----------------------------------------------------------------------===//
+/*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functinons.                        *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
 
 #ifndef SUPPORT_DATATYPES_H
 #define SUPPORT_DATATYPES_H
@@ -24,18 +27,21 @@
 #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
 #cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
 #cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
-#undef HAVE_UINT64_T
-#undef HAVE_U_INT64_T
+#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
+#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
 
-// FIXME: UGLY HACK (Added by Kevin)
-#define HAVE_UINT64_T 1
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
 
 #ifndef _MSC_VER
 
-// Note that this header's correct operation depends on __STDC_LIMIT_MACROS
-// being defined.  We would define it here, but in order to prevent Bad Things
-// happening when system headers or C++ STL headers include stdint.h before
-// we define it here, we define it on the g++ command line (in Makefile.rules).
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
 #if !defined(__STDC_LIMIT_MACROS)
 # error "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
 #endif
@@ -45,7 +51,7 @@
         "#including Support/DataTypes.h"
 #endif
 
-// Note that <inttypes.h> includes <stdint.h>, if this is a C99 system.
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
@@ -58,17 +64,11 @@
 #include <stdint.h>
 #endif
 
-#ifdef __cplusplus
-#include <cmath>
-#else
-#include <math.h>
-#endif
-
 #ifdef _AIX
 #include "llvm/Support/AIXDataTypesFix.h"
 #endif
 
-// Handle incorrect definition of uint64_t as u_int64_t
+/* Handle incorrect definition of uint64_t as u_int64_t */
 #ifndef HAVE_UINT64_T
 #ifdef HAVE_U_INT64_T
 typedef u_int64_t uint64_t;
@@ -90,11 +90,16 @@ typedef u_int64_t uint64_t;
 #endif
 
 #else /* _MSC_VER */
-// Visual C++ doesn't provide standard integer headers, but it does provide
-// built-in data types.
+/* Visual C++ doesn't provide standard integer headers, but it does provide
+   built-in data types. */
 #include <stdlib.h>
 #include <stddef.h>
 #include <sys/types.h>
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
 typedef __int64 int64_t;
 typedef unsigned __int64 uint64_t;
 typedef signed int int32_t;
diff --git a/include/llvm/Support/DataTypes.h.in b/include/llvm/Support/DataTypes.h.in
index 72063f7d2add..405f4764c8f5 100644
--- a/include/llvm/Support/DataTypes.h.in
+++ b/include/llvm/Support/DataTypes.h.in
@@ -1,22 +1,25 @@
-//===-- include/Support/DataTypes.h - Define fixed size types ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains definitions to figure out the size of _HOST_ data types.
-// This file is important because different host OS's define different macros,
-// which makes portability tough.  This file exports the following definitions:
-//
-//   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types
-//   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.
-//
-// No library is required when using these functinons.
-//
-//===----------------------------------------------------------------------===//
+/*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functinons.                        *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
 
 #ifndef SUPPORT_DATATYPES_H
 #define SUPPORT_DATATYPES_H
@@ -27,12 +30,18 @@
 #undef HAVE_UINT64_T
 #undef HAVE_U_INT64_T
 
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
 #ifndef _MSC_VER
 
-// Note that this header's correct operation depends on __STDC_LIMIT_MACROS
-// being defined.  We would define it here, but in order to prevent Bad Things
-// happening when system headers or C++ STL headers include stdint.h before
-// we define it here, we define it on the g++ command line (in Makefile.rules).
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
 #if !defined(__STDC_LIMIT_MACROS)
 # error "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
 #endif
@@ -42,7 +51,7 @@
         "#including Support/DataTypes.h"
 #endif
 
-// Note that <inttypes.h> includes <stdint.h>, if this is a C99 system.
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
@@ -55,17 +64,11 @@
 #include <stdint.h>
 #endif
 
-#ifdef __cplusplus
-#include <cmath>
-#else
-#include <math.h>
-#endif
-
 #ifdef _AIX
 #include "llvm/Support/AIXDataTypesFix.h"
 #endif
 
-// Handle incorrect definition of uint64_t as u_int64_t
+/* Handle incorrect definition of uint64_t as u_int64_t */
 #ifndef HAVE_UINT64_T
 #ifdef HAVE_U_INT64_T
 typedef u_int64_t uint64_t;
@@ -87,8 +90,8 @@ typedef u_int64_t uint64_t;
 #endif
 
 #else /* _MSC_VER */
-// Visual C++ doesn't provide standard integer headers, but it does provide
-// built-in data types.
+/* Visual C++ doesn't provide standard integer headers, but it does provide
+   built-in data types. */
 #include <stdlib.h>
 #include <stddef.h>
 #include <sys/types.h>
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index 52d0d3fb4055..6f82ea716f24 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -18,61 +18,65 @@
 // can specify '-debug-only=foo' to enable JUST the debug information for the
 // foo class.
 //
-// When compiling in release mode, the -debug-* options and all code in DEBUG()
-// statements disappears, so it does not effect the runtime of the code.
+// When compiling without assertions, the -debug-* options and all code in
+// DEBUG() statements disappears, so it does not effect the runtime of the code.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_DEBUG_H
 #define LLVM_SUPPORT_DEBUG_H
 
-#include "llvm/Support/Streams.h"
-
 namespace llvm {
 
 // DebugFlag - This boolean is set to true if the '-debug' command line option
 // is specified.  This should probably not be referenced directly, instead, use
 // the DEBUG macro below.
 //
+#ifndef NDEBUG
 extern bool DebugFlag;
+#endif
 
 // isCurrentDebugType - Return true if the specified string is the debug type
 // specified on the command line, or if none was specified on the command line
 // with the -debug-only=X option.
 //
+#ifndef NDEBUG
 bool isCurrentDebugType(const char *Type);
+#else
+#define isCurrentDebugType(X) (false)
+#endif
+
+// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
+// information.  In the '-debug' option is specified on the commandline, and if
+// this is a debug build, then the code specified as the option to the macro
+// will be executed.  Otherwise it will not be.  Example:
+//
+// DEBUG_WITH_TYPE("bitset", errs() << "Bitset contains: " << Bitset << "\n");
+//
+// This will emit the debug information if -debug is present, and -debug-only is
+// not specified, or is specified as "bitset".
+
+#ifdef NDEBUG
+#define DEBUG_WITH_TYPE(TYPE, X) do { } while (0)
+#else
+#define DEBUG_WITH_TYPE(TYPE, X)                                        \
+  do { if (DebugFlag && isCurrentDebugType(TYPE)) { X; } } while (0)
+#endif
 
 // DEBUG macro - This macro should be used by passes to emit debug information.
 // In the '-debug' option is specified on the commandline, and if this is a
 // debug build, then the code specified as the option to the macro will be
 // executed.  Otherwise it will not be.  Example:
 //
-// DEBUG(cerr << "Bitset contains: " << Bitset << "\n");
+// DEBUG(errs() << "Bitset contains: " << Bitset << "\n");
 //
 
 #ifndef DEBUG_TYPE
 #define DEBUG_TYPE ""
 #endif
 
-#ifdef NDEBUG
-#define DEBUG(X)
-#else
-#define DEBUG(X) \
-  do { if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { X; } } while (0)
-#endif
-
-/// getErrorOutputStream - Returns the error output stream (std::cerr). This
-/// places the std::c* I/O streams into one .cpp file and relieves the whole
-/// program from having to have hundreds of static c'tor/d'tors for them.
-///
-OStream &getErrorOutputStream(const char *DebugType);
-
-#ifdef NDEBUG
-#define DOUT llvm::OStream(0)
-#else
-#define DOUT llvm::getErrorOutputStream(DEBUG_TYPE)
-#endif
-
+#define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
+  
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 5c089efc98ce..55c3c4ffbd76 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -19,20 +19,25 @@
 #include <vector>
 
 namespace llvm {
-  class GlobalVariable;
+  class MDNode;
 
   /// DebugLocTuple - Debug location tuple of filename id, line and column.
   ///
   struct DebugLocTuple {
-    GlobalVariable *CompileUnit;
+    MDNode *Scope;
+    MDNode *InlinedAtLoc;
     unsigned Line, Col;
 
-    DebugLocTuple(GlobalVariable *v, unsigned l, unsigned c)
-      : CompileUnit(v), Line(l), Col(c) {};
+    DebugLocTuple()
+      : Scope(0), InlinedAtLoc(0), Line(~0U), Col(~0U) {};
+
+    DebugLocTuple(MDNode *n, MDNode *i, unsigned l, unsigned c)
+      : Scope(n), InlinedAtLoc(i), Line(l), Col(c) {};
 
     bool operator==(const DebugLocTuple &DLT) const {
-      return CompileUnit == DLT.CompileUnit &&
-             Line == DLT.Line && Col == DLT.Col;
+      return Scope == DLT.Scope &&
+        InlinedAtLoc == DLT.InlinedAtLoc &&
+        Line == DLT.Line && Col == DLT.Col;
     }
     bool operator!=(const DebugLocTuple &DLT) const {
       return !(*this == DLT);
@@ -60,23 +65,25 @@ namespace llvm {
     bool operator!=(const DebugLoc &DL) const { return !(*this == DL); }
   };
 
-  // Partially specialize DenseMapInfo for DebugLocTyple.
+  // Specialize DenseMapInfo for DebugLocTuple.
   template<>  struct DenseMapInfo<DebugLocTuple> {
     static inline DebugLocTuple getEmptyKey() {
-      return DebugLocTuple(0, ~0U, ~0U);
+      return DebugLocTuple(0, 0, ~0U, ~0U);
     }
     static inline DebugLocTuple getTombstoneKey() {
-      return DebugLocTuple((GlobalVariable*)~1U, ~1U, ~1U);
+      return DebugLocTuple((MDNode*)~1U, (MDNode*)~1U, ~1U, ~1U);
     }
     static unsigned getHashValue(const DebugLocTuple &Val) {
-      return DenseMapInfo<GlobalVariable*>::getHashValue(Val.CompileUnit) ^
+      return DenseMapInfo<MDNode*>::getHashValue(Val.Scope) ^
+             DenseMapInfo<MDNode*>::getHashValue(Val.InlinedAtLoc) ^
              DenseMapInfo<unsigned>::getHashValue(Val.Line) ^
              DenseMapInfo<unsigned>::getHashValue(Val.Col);
     }
     static bool isEqual(const DebugLocTuple &LHS, const DebugLocTuple &RHS) {
-      return LHS.CompileUnit == RHS.CompileUnit &&
-             LHS.Line        == RHS.Line &&
-             LHS.Col         == RHS.Col;
+      return LHS.Scope        == RHS.Scope &&
+             LHS.InlinedAtLoc == RHS.InlinedAtLoc &&
+             LHS.Line         == RHS.Line &&
+             LHS.Col          == RHS.Col;
     }
 
     static bool isPod() { return true; }
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 55838b8144bc..bfccc522b979 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -449,6 +449,7 @@ enum dwarf_constants {
 
   // Call frame instruction encodings
   DW_CFA_extended = 0x00,
+  DW_CFA_nop = 0x00,
   DW_CFA_advance_loc = 0x40,
   DW_CFA_offset = 0x80,
   DW_CFA_restore = 0xc0,
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
new file mode 100644
index 000000000000..67bccf09269e
--- /dev/null
+++ b/include/llvm/Support/ErrorHandling.h
@@ -0,0 +1,87 @@
+//===- llvm/Support/ErrorHandling.h - Callbacks for errors ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API used to indicate error conditions.
+// Callbacks can be registered for these errors through this API.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ERRORHANDLING_H
+#define LLVM_SUPPORT_ERRORHANDLING_H
+
+#include "llvm/Support/Compiler.h"
+#include <string>
+
+namespace llvm {
+  class Twine;
+
+  /// An error handler callback.
+  typedef void (*llvm_error_handler_t)(void *user_data,
+                                       const std::string& reason);
+
+  /// llvm_instal_error_handler - Installs a new error handler to be used
+  /// whenever a serious (non-recoverable) error is encountered by LLVM.
+  ///
+  /// If you are using llvm_start_multithreaded, you should register the handler
+  /// before doing that.
+  ///
+  /// If no error handler is installed the default is to print the error message
+  /// to stderr, and call exit(1).  If an error handler is installed then it is
+  /// the handler's responsibility to log the message, it will no longer be
+  /// printed to stderr.  If the error handler returns, then exit(1) will be
+  /// called.
+  ///
+  /// It is dangerous to naively use an error handler which throws an exception.
+  /// Even though some applications desire to gracefully recover from arbitrary
+  /// faults, blindly throwing exceptions through unfamiliar code isn't a way to
+  /// achieve this.
+  ///
+  /// \param user_data - An argument which will be passed to the install error
+  /// handler.
+  void llvm_install_error_handler(llvm_error_handler_t handler,
+                                  void *user_data = 0);
+
+  /// Restores default error handling behaviour.
+  /// This must not be called between llvm_start_multithreaded() and
+  /// llvm_stop_multithreaded().
+  void llvm_remove_error_handler();
+
+  /// Reports a serious error, calling any installed error handler. These
+  /// functions are intended to be used for error conditions which are outside
+  /// the control of the compiler (I/O errors, invalid user input, etc.)
+  ///
+  /// If no error handler is installed the default is to print the message to
+  /// standard error, followed by a newline.
+  /// After the error handler is called this function will call exit(1), it 
+  /// does not return.
+  void llvm_report_error(const char *reason) NORETURN;
+  void llvm_report_error(const std::string &reason) NORETURN;
+  void llvm_report_error(const Twine &reason) NORETURN;
+
+  /// This function calls abort(), and prints the optional message to stderr.
+  /// Use the llvm_unreachable macro (that adds location info), instead of
+  /// calling this function directly.
+  void llvm_unreachable_internal(const char *msg=0, const char *file=0,
+                                 unsigned line=0) NORETURN;
+}
+
+/// Prints the message and location info to stderr in !NDEBUG builds.
+/// This is intended to be used for "impossible" situations that imply
+/// a bug in the compiler.
+///
+/// In NDEBUG mode it only prints "UNREACHABLE executed".
+/// Use this instead of assert(0), so that the compiler knows this path
+/// is not reachable even for NDEBUG builds.
+#ifndef NDEBUG
+#define llvm_unreachable(msg) llvm_unreachable_internal(msg, __FILE__, __LINE__)
+#else
+#define llvm_unreachable(msg) llvm_unreachable_internal()
+#endif
+
+#endif
diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h
index 2ab097faf56d..df03f66ddc7d 100644
--- a/include/llvm/Support/Format.h
+++ b/include/llvm/Support/Format.h
@@ -36,6 +36,10 @@ class format_object_base {
 protected:
   const char *Fmt;
   virtual void home(); // Out of line virtual method.
+
+  /// snprint - Call snprintf() for this object, on the given buffer and size.
+  virtual int snprint(char *Buffer, unsigned BufferSize) const = 0;
+
 public:
   format_object_base(const char *fmt) : Fmt(fmt) {}
   virtual ~format_object_base() {}
@@ -43,7 +47,23 @@ public:
   /// print - Format the object into the specified buffer.  On success, this
   /// returns the length of the formatted string.  If the buffer is too small,
   /// this returns a length to retry with, which will be larger than BufferSize.
-  virtual unsigned print(char *Buffer, unsigned BufferSize) const = 0;
+  unsigned print(char *Buffer, unsigned BufferSize) const {
+    assert(BufferSize && "Invalid buffer size!");
+
+    // Print the string, leaving room for the terminating null.
+    int N = snprint(Buffer, BufferSize);
+
+    // VC++ and old GlibC return negative on overflow, just double the size.
+    if (N < 0)
+      return BufferSize*2;
+
+    // Other impls yield number of bytes needed, not including the final '\0'.
+    if (unsigned(N) >= BufferSize)
+      return N+1;
+
+    // Otherwise N is the length of output (not including the final '\0').
+    return N;
+  }
 };
 
 /// format_object1 - This is a templated helper class used by the format
@@ -58,17 +78,8 @@ public:
     : format_object_base(fmt), Val(val) {
   }
 
-  /// print - Format the object into the specified buffer.  On success, this
-  /// returns the length of the formatted string.  If the buffer is too small,
-  /// this returns a length to retry with, which will be larger than BufferSize.
-  virtual unsigned print(char *Buffer, unsigned BufferSize) const {
-    int N = snprintf(Buffer, BufferSize-1, Fmt, Val);
-    if (N < 0)             // VC++ and old GlibC return negative on overflow.
-      return BufferSize*2;
-    if (unsigned(N) >= BufferSize-1)// Other impls yield number of bytes needed.
-      return N+1;
-    // If N is positive and <= BufferSize-1, then the string fit, yay.
-    return N;
+  virtual int snprint(char *Buffer, unsigned BufferSize) const {
+    return snprintf(Buffer, BufferSize, Fmt, Val);
   }
 };
 
@@ -85,17 +96,8 @@ public:
   : format_object_base(fmt), Val1(val1), Val2(val2) {
   }
 
-  /// print - Format the object into the specified buffer.  On success, this
-  /// returns the length of the formatted string.  If the buffer is too small,
-  /// this returns a length to retry with, which will be larger than BufferSize.
-  virtual unsigned print(char *Buffer, unsigned BufferSize) const {
-    int N = snprintf(Buffer, BufferSize-1, Fmt, Val1, Val2);
-    if (N < 0)             // VC++ and old GlibC return negative on overflow.
-      return BufferSize*2;
-    if (unsigned(N) >= BufferSize-1)// Other impls yield number of bytes needed.
-      return N+1;
-    // If N is positive and <= BufferSize-1, then the string fit, yay.
-    return N;
+  virtual int snprint(char *Buffer, unsigned BufferSize) const {
+    return snprintf(Buffer, BufferSize, Fmt, Val1, Val2);
   }
 };
 
@@ -113,17 +115,8 @@ public:
     : format_object_base(fmt), Val1(val1), Val2(val2), Val3(val3) {
   }
 
-  /// print - Format the object into the specified buffer.  On success, this
-  /// returns the length of the formatted string.  If the buffer is too small,
-  /// this returns a length to retry with, which will be larger than BufferSize.
-  virtual unsigned print(char *Buffer, unsigned BufferSize) const {
-    int N = snprintf(Buffer, BufferSize-1, Fmt, Val1, Val2, Val3);
-    if (N < 0)             // VC++ and old GlibC return negative on overflow.
-      return BufferSize*2;
-    if (unsigned(N) >= BufferSize-1)// Other impls yield number of bytes needed.
-      return N+1;
-    // If N is positive and <= BufferSize-1, then the string fit, yay.
-    return N;
+  virtual int snprint(char *Buffer, unsigned BufferSize) const {
+    return snprintf(Buffer, BufferSize, Fmt, Val1, Val2, Val3);
   }
 };
 
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
new file mode 100644
index 000000000000..24a3546200ac
--- /dev/null
+++ b/include/llvm/Support/FormattedStream.h
@@ -0,0 +1,150 @@
+//===-- llvm/CodeGen/FormattedStream.h - Formatted streams ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains raw_ostream implementations for streams to do
+// things like pretty-print comments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATTEDSTREAM_H
+#define LLVM_SUPPORT_FORMATTEDSTREAM_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm 
+{
+  /// formatted_raw_ostream - Formatted raw_fd_ostream to handle
+  /// asm-specific constructs.
+  ///
+  class formatted_raw_ostream : public raw_ostream {
+  public:
+    /// DELETE_STREAM - Tell the destructor to delete the held stream.
+    ///
+    static const bool DELETE_STREAM = true;
+
+    /// PRESERVE_STREAM - Tell the destructor to not delete the held
+    /// stream.
+    ///
+    static const bool PRESERVE_STREAM = false;
+
+  private:
+    /// TheStream - The real stream we output to. We set it to be
+    /// unbuffered, since we're already doing our own buffering.
+    ///
+    raw_ostream *TheStream;
+
+    /// DeleteStream - Do we need to delete TheStream in the
+    /// destructor?
+    ///
+    bool DeleteStream;
+
+    /// ColumnScanned - The current output column of the data that's
+    /// been flushed and the portion of the buffer that's been
+    /// scanned.  The column scheme is zero-based.
+    ///
+    unsigned ColumnScanned;
+
+    /// Scanned - This points to one past the last character in the
+    /// buffer we've scanned.
+    ///
+    const char *Scanned;
+
+    virtual void write_impl(const char *Ptr, size_t Size);
+
+    /// current_pos - Return the current position within the stream,
+    /// not counting the bytes currently in the buffer.
+    virtual uint64_t current_pos() { 
+      // This has the same effect as calling TheStream.current_pos(),
+      // but that interface is private.
+      return TheStream->tell() - TheStream->GetNumBytesInBuffer();
+    }
+
+    /// ComputeColumn - Examine the given output buffer and figure out which
+    /// column we end up in after output.
+    ///
+    void ComputeColumn(const char *Ptr, size_t size);
+
+  public:
+    /// formatted_raw_ostream - Open the specified file for
+    /// writing. If an error occurs, information about the error is
+    /// put into ErrorInfo, and the stream should be immediately
+    /// destroyed; the string will be empty if no error occurred.
+    ///
+    /// As a side effect, the given Stream is set to be Unbuffered.
+    /// This is because formatted_raw_ostream does its own buffering,
+    /// so it doesn't want another layer of buffering to be happening
+    /// underneath it.
+    ///
+    formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
+      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+      setStream(Stream, Delete);
+    }
+    explicit formatted_raw_ostream()
+      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+      Scanned = 0;
+    }
+
+    ~formatted_raw_ostream() {
+      flush();
+      releaseStream();
+    }
+
+    void setStream(raw_ostream &Stream, bool Delete = false) {
+      releaseStream();
+
+      TheStream = &Stream;
+      DeleteStream = Delete;
+
+      // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
+      // own buffering, and it doesn't need or want TheStream to do another
+      // layer of buffering underneath. Resize the buffer to what TheStream
+      // had been using, and tell TheStream not to do its own buffering.
+      if (size_t BufferSize = TheStream->GetBufferSize())
+        SetBufferSize(BufferSize);
+      else
+        SetUnbuffered();
+      TheStream->SetUnbuffered();
+
+      Scanned = 0;
+    }
+
+    /// PadToColumn - Align the output to some column number.  If the current
+    /// column is already equal to or more than NewCol, PadToColumn inserts one
+    /// space.
+    ///
+    /// \param NewCol - The column to move to.
+    void PadToColumn(unsigned NewCol);
+
+  private:
+    void releaseStream() {
+      // Delete the stream if needed. Otherwise, transfer the buffer
+      // settings from this raw_ostream back to the underlying stream.
+      if (!TheStream)
+        return;
+      if (DeleteStream)
+        delete TheStream;
+      else if (size_t BufferSize = GetBufferSize())
+        TheStream->SetBufferSize(BufferSize);
+      else
+        TheStream->SetUnbuffered();
+    }
+  };
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output.  Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &fouts();
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error.  Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &ferrs();
+
+} // end llvm namespace
+
+
+#endif
diff --git a/include/llvm/Support/GetElementPtrTypeIterator.h b/include/llvm/Support/GetElementPtrTypeIterator.h
index e1cda75c5f6a..f5915c992cdb 100644
--- a/include/llvm/Support/GetElementPtrTypeIterator.h
+++ b/include/llvm/Support/GetElementPtrTypeIterator.h
@@ -21,8 +21,9 @@
 namespace llvm {
   template<typename ItTy = User::const_op_iterator>
   class generic_gep_type_iterator
-    : public forward_iterator<const Type *, ptrdiff_t> {
-    typedef forward_iterator<const Type*, ptrdiff_t> super;
+    : public std::iterator<std::forward_iterator_tag, const Type *, ptrdiff_t> {
+    typedef std::iterator<std::forward_iterator_tag,
+                          const Type *, ptrdiff_t> super;
 
     ItTy OpIt;
     const Type *CurTy;
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 01b44d0b8e2f..bd3fcea11025 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -24,53 +24,33 @@
 #define LLVM_SUPPORT_GRAPHWRITER_H
 
 #include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/System/Path.h"
-#include <fstream>
 #include <vector>
+#include <cassert>
 
 namespace llvm {
 
 namespace DOT {  // Private functions...
-  inline std::string EscapeString(const std::string &Label) {
-    std::string Str(Label);
-    for (unsigned i = 0; i != Str.length(); ++i)
-      switch (Str[i]) {
-      case '\n':
-        Str.insert(Str.begin()+i, '\\');  // Escape character...
-        ++i;
-        Str[i] = 'n';
-        break;
-      case '\t':
-        Str.insert(Str.begin()+i, ' ');  // Convert to two spaces
-        ++i;
-        Str[i] = ' ';
-        break;
-      case '\\':
-        if (i+1 != Str.length())
-          switch (Str[i+1]) {
-            case 'l': continue; // don't disturb \l
-            case '|': case '{': case '}':
-               Str.erase(Str.begin()+i); continue;
-            default: break;
-          }
-      case '{': case '}':
-      case '<': case '>':
-      case '|': case '"':
-        Str.insert(Str.begin()+i, '\\');  // Escape character...
-        ++i;  // don't infinite loop
-        break;
-      }
-    return Str;
-  }
+  std::string EscapeString(const std::string &Label);
+}
+
+namespace GraphProgram {
+   enum Name {
+      DOT,
+      FDP,
+      NEATO,
+      TWOPI,
+      CIRCO
+   };
 }
 
-void DisplayGraph(const sys::Path& Filename);
+void DisplayGraph(const sys::Path& Filename, bool wait=true, GraphProgram::Name program = GraphProgram::DOT);
 
 template<typename GraphType>
 class GraphWriter {
-  std::ostream &O;
+  raw_ostream &O;
   const GraphType &G;
   bool ShortNames;
 
@@ -80,7 +60,7 @@ class GraphWriter {
   typedef typename GTraits::nodes_iterator    node_iterator;
   typedef typename GTraits::ChildIteratorType child_iterator;
 public:
-  GraphWriter(std::ostream &o, const GraphType &g, bool SN) :
+  GraphWriter(raw_ostream &o, const GraphType &g, bool SN) :
     O(o), G(g), ShortNames(SN) {}
 
   void writeHeader(const std::string &Name) {
@@ -222,7 +202,7 @@ public:
 
       for (unsigned i = 0; i != NumEdgeSources; ++i) {
         if (i) O << "|";
-        O << "<g" << i << ">";
+        O << "<s" << i << ">";
         if (EdgeSourceLabels) O << (*EdgeSourceLabels)[i];
       }
       O << "}}";
@@ -241,8 +221,12 @@ public:
     if (SrcNodePort >= 0)
       O << ":s" << SrcNodePort;
     O << " -> Node" << DestNodeID;
-    if (DestNodePort >= 0)
-      O << ":d" << DestNodePort;
+    if (DestNodePort >= 0) {
+      if (DOTTraits::hasEdgeDestLabels())
+        O << ":d" << DestNodePort;
+      else
+        O << ":s" << DestNodePort;
+    }
 
     if (!Attrs.empty())
       O << "[" << Attrs << "]";
@@ -251,10 +235,10 @@ public:
 };
 
 template<typename GraphType>
-std::ostream &WriteGraph(std::ostream &O, const GraphType &G,
-                         bool ShortNames = false,
-                         const std::string &Name = "",
-                         const std::string &Title = "") {
+raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G,
+                        bool ShortNames = false,
+                        const std::string &Name = "",
+                        const std::string &Title = "") {
   // Start the graph emission process...
   GraphWriter<GraphType> W(O, G, ShortNames);
 
@@ -273,33 +257,30 @@ std::ostream &WriteGraph(std::ostream &O, const GraphType &G,
 }
 
 template<typename GraphType>
-sys::Path WriteGraph(const GraphType &G,
-                     const std::string& Name,
-                     bool ShortNames = false,
-                     const std::string& Title = "") {
+sys::Path WriteGraph(const GraphType &G, const std::string &Name,
+                     bool ShortNames = false, const std::string &Title = "") {
   std::string ErrMsg;
   sys::Path Filename = sys::Path::GetTemporaryDirectory(&ErrMsg);
   if (Filename.isEmpty()) {
-    cerr << "Error: " << ErrMsg << "\n";
+    errs() << "Error: " << ErrMsg << "\n";
     return Filename;
   }
   Filename.appendComponent(Name + ".dot");
   if (Filename.makeUnique(true,&ErrMsg)) {
-    cerr << "Error: " << ErrMsg << "\n";
+    errs() << "Error: " << ErrMsg << "\n";
     return sys::Path();
   }
 
-  cerr << "Writing '" << Filename << "'... ";
+  errs() << "Writing '" << Filename.str() << "'... ";
 
-  std::ofstream O(Filename.c_str());
+  std::string ErrorInfo;
+  raw_fd_ostream O(Filename.c_str(), ErrorInfo);
 
-  if (O.good()) {
+  if (ErrorInfo.empty()) {
     WriteGraph(O, G, ShortNames, Name, Title);
-    cerr << " done. \n";
-
-    O.close();
+    errs() << " done. \n";
   } else {
-    cerr << "error opening file for writing!\n";
+    errs() << "error opening file '" << Filename.str() << "' for writing!\n";
     Filename.clear();
   }
 
@@ -310,17 +291,15 @@ sys::Path WriteGraph(const GraphType &G,
 /// then cleanup.  For use from the debugger.
 ///
 template<typename GraphType>
-void ViewGraph(const GraphType& G,
-               const std::string& Name,
-               bool ShortNames = false,
-               const std::string& Title = "") {
-  sys::Path Filename =  WriteGraph(G, Name, ShortNames, Title);
+void ViewGraph(const GraphType &G, const std::string &Name,
+               bool ShortNames = false, const std::string &Title = "",
+               GraphProgram::Name Program = GraphProgram::DOT) {
+  sys::Path Filename = WriteGraph(G, Name, ShortNames, Title);
 
-  if (Filename.isEmpty()) {
+  if (Filename.isEmpty())
     return;
-  }
 
-  DisplayGraph(Filename);
+  DisplayGraph(Filename, true, Program);
 }
 
 } // End llvm namespace
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index ed6a3f19ef7a..1f659787eb79 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -20,37 +20,86 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ConstantFolder.h"
 
 namespace llvm {
 
+/// IRBuilderDefaultInserter - This provides the default implementation of the
+/// IRBuilder 'InsertHelper' method that is called whenever an instruction is
+/// created by IRBuilder and needs to be inserted.  By default, this inserts the
+/// instruction at the insertion point.
+template <bool preserveNames = true>
+class IRBuilderDefaultInserter {
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    if (BB) BB->getInstList().insert(InsertPt, I);
+    if (preserveNames)
+      I->setName(Name);
+  }
+};
+  
+  
 /// IRBuilder - This provides a uniform API for creating instructions and
 /// inserting them into a basic block: either at the end of a BasicBlock, or
 /// at a specific iterator location in a block.
 ///
 /// Note that the builder does not expose the full generality of LLVM
-/// instructions.  For example, it cannot be used to create instructions with
-/// arbitrary names (specifically, names with nul characters in them) - It only
-/// supports nul-terminated C strings.  For fully generic names, use
-/// I->setName().  For access to extra instruction properties, use the mutators
+/// instructions.  For access to extra instruction properties, use the mutators
 /// (e.g. setVolatile) on the instructions after they have been created.
 /// The first template argument handles whether or not to preserve names in the
 /// final instruction output. This defaults to on.  The second template argument
 /// specifies a class to use for creating constants.  This defaults to creating
-/// minimally folded constants.
-template <bool preserveNames=true, typename T = ConstantFolder> class IRBuilder{
+/// minimally folded constants.  The fourth template argument allows clients to
+/// specify custom insertion hooks that are called on every newly created
+/// insertion.
+template<bool preserveNames = true, typename T = ConstantFolder,
+         typename Inserter = IRBuilderDefaultInserter<preserveNames> >
+class IRBuilder : public Inserter {
   BasicBlock *BB;
   BasicBlock::iterator InsertPt;
+  unsigned MDKind;
+  MDNode *CurDbgLocation;
+  LLVMContext &Context;
   T Folder;
 public:
-  IRBuilder(const T& F = T()) : Folder(F) { ClearInsertionPoint(); }
-  explicit IRBuilder(BasicBlock *TheBB, const T& F = T())
-    : Folder(F) { SetInsertPoint(TheBB); }
-  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F = T())
-    : Folder(F) { SetInsertPoint(TheBB, IP); }
+  IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter())
+    : Inserter(I), MDKind(0), CurDbgLocation(0), Context(C), Folder(F) {
+    ClearInsertionPoint(); 
+  }
+  
+  explicit IRBuilder(LLVMContext &C) 
+    : MDKind(0), CurDbgLocation(0), Context(C), Folder(C) {
+    ClearInsertionPoint();
+  }
+  
+  explicit IRBuilder(BasicBlock *TheBB, const T &F)
+    : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), Folder(F) {
+    SetInsertPoint(TheBB);
+  }
+  
+  explicit IRBuilder(BasicBlock *TheBB)
+    : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), 
+      Folder(Context) {
+    SetInsertPoint(TheBB);
+  }
+  
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F)
+    : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), Folder(F) {
+    SetInsertPoint(TheBB, IP);
+  }
+  
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP)
+    : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), 
+      Folder(Context) {
+    SetInsertPoint(TheBB, IP);
+  }
 
   /// getFolder - Get the constant folder being used.
-  const T& getFolder() { return Folder; }
+  const T &getFolder() { return Folder; }
 
   /// isNamePreserving - Return true if this builder is configured to actually
   /// add the requested names to IR created through it.
@@ -84,20 +133,75 @@ public:
     InsertPt = IP;
   }
 
+  /// SetCurrentDebugLocation - Set location information used by debugging
+  /// information.
+  void SetCurrentDebugLocation(MDNode *L) {
+    if (MDKind == 0) 
+      MDKind = Context.getMetadata().getMDKind("dbg");
+    if (MDKind == 0)
+      MDKind = Context.getMetadata().RegisterMDKind("dbg");
+    CurDbgLocation = L;
+  }
+
+  MDNode *getCurrentDebugLocation() const { return CurDbgLocation; }
+
+  /// SetDebugLocation -  Set location information for the given instruction.
+  void SetDebugLocation(Instruction *I) {
+    if (CurDbgLocation)
+      Context.getMetadata().addMD(MDKind, CurDbgLocation, I);
+  }
+
   /// Insert - Insert and return the specified instruction.
   template<typename InstTy>
-  InstTy *Insert(InstTy *I, const char *Name = "") const {
-    InsertHelper(I, Name);
+  InstTy *Insert(InstTy *I, const Twine &Name = "") const {
+    this->InsertHelper(I, Name, BB, InsertPt);
+    if (CurDbgLocation)
+      Context.getMetadata().addMD(MDKind, CurDbgLocation, I);
     return I;
   }
 
-  /// InsertHelper - Insert the specified instruction at the specified insertion
-  /// point.  This is split out of Insert so that it isn't duplicated for every
-  /// template instantiation.
-  void InsertHelper(Instruction *I, const char *Name) const {
-    if (BB) BB->getInstList().insert(InsertPt, I);
-    if (preserveNames && Name[0])
-      I->setName(Name);
+  //===--------------------------------------------------------------------===//
+  // Type creation methods
+  //===--------------------------------------------------------------------===//
+
+  /// getInt1Ty - Fetch the type representing a single bit
+  const Type *getInt1Ty() {
+    return Type::getInt1Ty(Context);
+  }
+  
+  /// getInt8Ty - Fetch the type representing an 8-bit integer.
+  const Type *getInt8Ty() {
+    return Type::getInt8Ty(Context);
+  }
+  
+  /// getInt16Ty - Fetch the type representing a 16-bit integer.
+  const Type *getInt16Ty() {
+    return Type::getInt16Ty(Context);
+  }
+  
+  /// getInt32Ty - Fetch the type resepresenting a 32-bit integer.
+  const Type *getInt32Ty() {
+    return Type::getInt32Ty(Context);
+  }
+  
+  /// getInt64Ty - Fetch the type representing a 64-bit integer.
+  const Type *getInt64Ty() {
+    return Type::getInt64Ty(Context);
+  }
+
+  /// getFloatTy - Fetch the type representing a 32-bit floating point value.
+  const Type *getFloatTy() {
+    return Type::getFloatTy(Context);
+  }
+  
+  /// getDoubleTy - Fetch the type representing a 64-bit floating point value.
+  const Type *getDoubleTy() {
+    return Type::getDoubleTy(Context);
+  }
+  
+  /// getVoidTy - Fetch the type representing void.
+  const Type *getVoidTy() {
+    return Type::getVoidTy(Context);
   }
 
   //===--------------------------------------------------------------------===//
@@ -106,14 +210,14 @@ public:
 
   /// CreateRetVoid - Create a 'ret void' instruction.
   ReturnInst *CreateRetVoid() {
-    return Insert(ReturnInst::Create());
+    return Insert(ReturnInst::Create(Context));
   }
 
   /// @verbatim
   /// CreateRet - Create a 'ret <val>' instruction.
   /// @endverbatim
   ReturnInst *CreateRet(Value *V) {
-    return Insert(ReturnInst::Create(V));
+    return Insert(ReturnInst::Create(Context, V));
   }
 
   /// CreateAggregateRet - Create a sequence of N insertvalue instructions,
@@ -128,7 +232,7 @@ public:
     Value *V = UndefValue::get(RetType);
     for (unsigned i = 0; i != N; ++i)
       V = CreateInsertValue(V, retVals[i], i, "mrv");
-    return Insert(ReturnInst::Create(V));
+    return Insert(ReturnInst::Create(Context, V));
   }
 
   /// CreateBr - Create an unconditional 'br label X' instruction.
@@ -153,126 +257,144 @@ public:
   template<typename InputIterator>
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest, InputIterator ArgBegin,
-                           InputIterator ArgEnd, const char *Name = "") {
+                           InputIterator ArgEnd, const Twine &Name = "") {
     return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
                                      ArgBegin, ArgEnd), Name);
   }
 
   UnwindInst *CreateUnwind() {
-    return Insert(new UnwindInst());
+    return Insert(new UnwindInst(Context));
   }
 
   UnreachableInst *CreateUnreachable() {
-    return Insert(new UnreachableInst());
+    return Insert(new UnreachableInst(Context));
   }
 
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Binary Operators
   //===--------------------------------------------------------------------===//
 
-  Value *CreateAdd(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateAdd(LC, RC);
     return Insert(BinaryOperator::CreateAdd(LHS, RHS), Name);
   }
-  Value *CreateFAdd(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateNSWAdd(LC, RC);
+    return Insert(BinaryOperator::CreateNSWAdd(LHS, RHS), Name);
+  }
+  Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateFAdd(LC, RC);
     return Insert(BinaryOperator::CreateFAdd(LHS, RHS), Name);
   }
-  Value *CreateSub(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateSub(LC, RC);
     return Insert(BinaryOperator::CreateSub(LHS, RHS), Name);
   }
-  Value *CreateFSub(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateNSWSub(LC, RC);
+    return Insert(BinaryOperator::CreateNSWSub(LHS, RHS), Name);
+  }
+  Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateFSub(LC, RC);
     return Insert(BinaryOperator::CreateFSub(LHS, RHS), Name);
   }
-  Value *CreateMul(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateMul(LC, RC);
     return Insert(BinaryOperator::CreateMul(LHS, RHS), Name);
   }
-  Value *CreateFMul(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateFMul(LC, RC);
     return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name);
   }
-  Value *CreateUDiv(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateUDiv(LC, RC);
     return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
   }
-  Value *CreateSDiv(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateSDiv(LC, RC);
     return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
   }
-  Value *CreateFDiv(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateExactSDiv(LC, RC);
+    return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
+  }
+  Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateFDiv(LC, RC);
     return Insert(BinaryOperator::CreateFDiv(LHS, RHS), Name);
   }
-  Value *CreateURem(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateURem(LC, RC);
     return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
   }
-  Value *CreateSRem(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateSRem(LC, RC);
     return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
   }
-  Value *CreateFRem(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateFRem(LC, RC);
     return Insert(BinaryOperator::CreateFRem(LHS, RHS), Name);
   }
-  Value *CreateShl(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateShl(LC, RC);
     return Insert(BinaryOperator::CreateShl(LHS, RHS), Name);
   }
-  Value *CreateLShr(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateLShr(LC, RC);
     return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
   }
-  Value *CreateAShr(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateAShr(LC, RC);
     return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
   }
-  Value *CreateAnd(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateAnd(LC, RC);
     return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
   }
-  Value *CreateOr(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateOr(LC, RC);
     return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
   }
-  Value *CreateXor(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateXor(LC, RC);
@@ -280,24 +402,24 @@ public:
   }
 
   Value *CreateBinOp(Instruction::BinaryOps Opc,
-                     Value *LHS, Value *RHS, const char *Name = "") {
+                     Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateBinOp(Opc, LC, RC);
     return Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
   }
 
-  Value *CreateNeg(Value *V, const char *Name = "") {
+  Value *CreateNeg(Value *V, const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
       return Folder.CreateNeg(VC);
     return Insert(BinaryOperator::CreateNeg(V), Name);
   }
-  Value *CreateFNeg(Value *V, const char *Name = "") {
+  Value *CreateFNeg(Value *V, const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
       return Folder.CreateFNeg(VC);
     return Insert(BinaryOperator::CreateFNeg(V), Name);
   }
-  Value *CreateNot(Value *V, const char *Name = "") {
+  Value *CreateNot(Value *V, const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
       return Folder.CreateNot(VC);
     return Insert(BinaryOperator::CreateNot(V), Name);
@@ -308,20 +430,25 @@ public:
   //===--------------------------------------------------------------------===//
 
   MallocInst *CreateMalloc(const Type *Ty, Value *ArraySize = 0,
-                           const char *Name = "") {
+                           const Twine &Name = "") {
     return Insert(new MallocInst(Ty, ArraySize), Name);
   }
   AllocaInst *CreateAlloca(const Type *Ty, Value *ArraySize = 0,
-                           const char *Name = "") {
+                           const Twine &Name = "") {
     return Insert(new AllocaInst(Ty, ArraySize), Name);
   }
   FreeInst *CreateFree(Value *Ptr) {
     return Insert(new FreeInst(Ptr));
   }
-  LoadInst *CreateLoad(Value *Ptr, const char *Name = "") {
+  // Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
+  // converting the string to 'bool' for the isVolatile parameter.
+  LoadInst *CreateLoad(Value *Ptr, const char *Name) {
+    return Insert(new LoadInst(Ptr), Name);
+  }
+  LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
     return Insert(new LoadInst(Ptr), Name);
   }
-  LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const char *Name = "") {
+  LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
     return Insert(new LoadInst(Ptr, 0, isVolatile), Name);
   }
   StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
@@ -329,38 +456,69 @@ public:
   }
   template<typename InputIterator>
   Value *CreateGEP(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd,
-                   const char *Name = "") {
+                   const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       InputIterator i;
-      for (i = IdxBegin; i < IdxEnd; ++i) {
-        if (!dyn_cast<Constant>(*i))
+      for (i = IdxBegin; i < IdxEnd; ++i)
+        if (!isa<Constant>(*i))
           break;
-      }
       if (i == IdxEnd)
         return Folder.CreateGetElementPtr(PC, &IdxBegin[0], IdxEnd - IdxBegin);
     }
     return Insert(GetElementPtrInst::Create(Ptr, IdxBegin, IdxEnd), Name);
   }
-  Value *CreateGEP(Value *Ptr, Value *Idx, const char *Name = "") {
+  template<typename InputIterator>
+  Value *CreateInBoundsGEP(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd,
+                           const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+      // Every index must be constant.
+      InputIterator i;
+      for (i = IdxBegin; i < IdxEnd; ++i)
+        if (!isa<Constant>(*i))
+          break;
+      if (i == IdxEnd)
+        return Folder.CreateInBoundsGetElementPtr(PC,
+                                                  &IdxBegin[0],
+                                                  IdxEnd - IdxBegin);
+    }
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, IdxBegin, IdxEnd),
+                  Name);
+  }
+  Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Folder.CreateGetElementPtr(PC, &IC, 1);
     return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
   }
-  Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const char *Name = "") {
-    Value *Idx = ConstantInt::get(Type::Int32Ty, Idx0);
+  Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Folder.CreateInBoundsGetElementPtr(PC, &IC, 1);
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
+  }
+  Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Folder.CreateGetElementPtr(PC, &Idx, 1);
 
     return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);    
   }
+  Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0,
+                                    const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name);
+  }
   Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1, 
-                    const char *Name = "") {
+                    const Twine &Name = "") {
     Value *Idxs[] = {
-      ConstantInt::get(Type::Int32Ty, Idx0),
-      ConstantInt::get(Type::Int32Ty, Idx1)
+      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
@@ -368,19 +526,40 @@ public:
 
     return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);    
   }
-  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const char *Name = "") {
-    Value *Idx = ConstantInt::get(Type::Int64Ty, Idx0);
+  Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
+                                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name);
+  }
+  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Folder.CreateGetElementPtr(PC, &Idx, 1);
 
     return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);    
   }
-  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1, 
-                    const char *Name = "") {
+  Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+                                    const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name);
+  }
+  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                    const Twine &Name = "") {
     Value *Idxs[] = {
-      ConstantInt::get(Type::Int64Ty, Idx0),
-      ConstantInt::get(Type::Int64Ty, Idx1)
+      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
@@ -388,235 +567,272 @@ public:
 
     return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);    
   }
-  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const char *Name = "") {
-    return CreateConstGEP2_32(Ptr, 0, Idx, Name);
+  Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name);
+  }
+  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
+    return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name);
   }
-  Value *CreateGlobalString(const char *Str = "", const char *Name = "") {
-    Constant *StrConstant = ConstantArray::get(Str, true);
-    GlobalVariable *gv = new GlobalVariable(StrConstant->getType(),
+  Value *CreateGlobalString(const char *Str = "", const Twine &Name = "") {
+    Constant *StrConstant = ConstantArray::get(Context, Str, true);
+    Module &M = *BB->getParent()->getParent();
+    GlobalVariable *gv = new GlobalVariable(M,
+                                            StrConstant->getType(),
                                             true,
                                             GlobalValue::InternalLinkage,
                                             StrConstant,
                                             "",
-                                            BB->getParent()->getParent(),
+                                            0,
                                             false);
     gv->setName(Name);
     return gv;
   }
-  Value *CreateGlobalStringPtr(const char *Str = "", const char *Name = "") {
+  Value *CreateGlobalStringPtr(const char *Str = "", const Twine &Name = "") {
     Value *gv = CreateGlobalString(Str, Name);
-    Value *zero = ConstantInt::get(Type::Int32Ty, 0);
+    Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
     Value *Args[] = { zero, zero };
-    return CreateGEP(gv, Args, Args+2, Name);
+    return CreateInBoundsGEP(gv, Args, Args+2, Name);
   }
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
 
-  Value *CreateTrunc(Value *V, const Type *DestTy, const char *Name = "") {
+  Value *CreateTrunc(Value *V, const Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::Trunc, V, DestTy, Name);
   }
-  Value *CreateZExt(Value *V, const Type *DestTy, const char *Name = "") {
+  Value *CreateZExt(Value *V, const Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::ZExt, V, DestTy, Name);
   }
-  Value *CreateSExt(Value *V, const Type *DestTy, const char *Name = "") {
+  Value *CreateSExt(Value *V, const Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::SExt, V, DestTy, Name);
   }
-  Value *CreateFPToUI(Value *V, const Type *DestTy, const char *Name = ""){
+  Value *CreateFPToUI(Value *V, const Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::FPToUI, V, DestTy, Name);
   }
-  Value *CreateFPToSI(Value *V, const Type *DestTy, const char *Name = ""){
+  Value *CreateFPToSI(Value *V, const Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::FPToSI, V, DestTy, Name);
   }
-  Value *CreateUIToFP(Value *V, const Type *DestTy, const char *Name = ""){
+  Value *CreateUIToFP(Value *V, const Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::UIToFP, V, DestTy, Name);
   }
-  Value *CreateSIToFP(Value *V, const Type *DestTy, const char *Name = ""){
+  Value *CreateSIToFP(Value *V, const Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::SIToFP, V, DestTy, Name);
   }
   Value *CreateFPTrunc(Value *V, const Type *DestTy,
-                       const char *Name = "") {
+                       const Twine &Name = "") {
     return CreateCast(Instruction::FPTrunc, V, DestTy, Name);
   }
-  Value *CreateFPExt(Value *V, const Type *DestTy, const char *Name = "") {
+  Value *CreateFPExt(Value *V, const Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::FPExt, V, DestTy, Name);
   }
   Value *CreatePtrToInt(Value *V, const Type *DestTy,
-                        const char *Name = "") {
+                        const Twine &Name = "") {
     return CreateCast(Instruction::PtrToInt, V, DestTy, Name);
   }
   Value *CreateIntToPtr(Value *V, const Type *DestTy,
-                        const char *Name = "") {
+                        const Twine &Name = "") {
     return CreateCast(Instruction::IntToPtr, V, DestTy, Name);
   }
   Value *CreateBitCast(Value *V, const Type *DestTy,
-                       const char *Name = "") {
+                       const Twine &Name = "") {
     return CreateCast(Instruction::BitCast, V, DestTy, Name);
   }
-
+  Value *CreateZExtOrBitCast(Value *V, const Type *DestTy,
+                             const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Folder.CreateZExtOrBitCast(VC, DestTy);
+    return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateSExtOrBitCast(Value *V, const Type *DestTy,
+                             const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Folder.CreateSExtOrBitCast(VC, DestTy);
+    return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateTruncOrBitCast(Value *V, const Type *DestTy,
+                              const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Folder.CreateTruncOrBitCast(VC, DestTy);
+    return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
+  }
   Value *CreateCast(Instruction::CastOps Op, Value *V, const Type *DestTy,
-                    const char *Name = "") {
+                    const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
       return Folder.CreateCast(Op, VC, DestTy);
     return Insert(CastInst::Create(Op, V, DestTy), Name);
   }
+  Value *CreatePointerCast(Value *V, const Type *DestTy,
+                           const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Folder.CreatePointerCast(VC, DestTy);
+    return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
+  }
   Value *CreateIntCast(Value *V, const Type *DestTy, bool isSigned,
-                       const char *Name = "") {
+                       const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
       return Folder.CreateIntCast(VC, DestTy, isSigned);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
   }
+  Value *CreateFPCast(Value *V, const Type *DestTy, const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Folder.CreateFPCast(VC, DestTy);
+    return Insert(CastInst::CreateFPCast(V, DestTy), Name);
+  }
 
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Compare Instructions
   //===--------------------------------------------------------------------===//
 
-  Value *CreateICmpEQ(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_EQ, LHS, RHS, Name);
   }
-  Value *CreateICmpNE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_NE, LHS, RHS, Name);
   }
-  Value *CreateICmpUGT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_UGT, LHS, RHS, Name);
   }
-  Value *CreateICmpUGE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_UGE, LHS, RHS, Name);
   }
-  Value *CreateICmpULT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_ULT, LHS, RHS, Name);
   }
-  Value *CreateICmpULE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_ULE, LHS, RHS, Name);
   }
-  Value *CreateICmpSGT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SGT, LHS, RHS, Name);
   }
-  Value *CreateICmpSGE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SGE, LHS, RHS, Name);
   }
-  Value *CreateICmpSLT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SLT, LHS, RHS, Name);
   }
-  Value *CreateICmpSLE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name);
   }
 
-  Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name);
   }
-  Value *CreateFCmpOGT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name);
   }
-  Value *CreateFCmpOGE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name);
   }
-  Value *CreateFCmpOLT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name);
   }
-  Value *CreateFCmpOLE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name);
   }
-  Value *CreateFCmpONE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name);
   }
-  Value *CreateFCmpORD(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name);
   }
-  Value *CreateFCmpUNO(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name);
   }
-  Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name);
   }
-  Value *CreateFCmpUGT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name);
   }
-  Value *CreateFCmpUGE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name);
   }
-  Value *CreateFCmpULT(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name);
   }
-  Value *CreateFCmpULE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name);
   }
-  Value *CreateFCmpUNE(Value *LHS, Value *RHS, const char *Name = "") {
+  Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name);
   }
 
   Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
-                    const char *Name = "") {
+                    const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateICmp(P, LC, RC);
     return Insert(new ICmpInst(P, LHS, RHS), Name);
   }
   Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
-                    const char *Name = "") {
+                    const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateFCmp(P, LC, RC);
     return Insert(new FCmpInst(P, LHS, RHS), Name);
   }
 
-  Value *CreateVICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
-                     const char *Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateVICmp(P, LC, RC);
-    return Insert(new VICmpInst(P, LHS, RHS), Name);
-  }
-  Value *CreateVFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
-                     const char *Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateVFCmp(P, LC, RC);
-    return Insert(new VFCmpInst(P, LHS, RHS), Name);
-  }
-
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Other Instructions
   //===--------------------------------------------------------------------===//
 
-  PHINode *CreatePHI(const Type *Ty, const char *Name = "") {
+  PHINode *CreatePHI(const Type *Ty, const Twine &Name = "") {
     return Insert(PHINode::Create(Ty), Name);
   }
 
-  CallInst *CreateCall(Value *Callee, const char *Name = "") {
+  CallInst *CreateCall(Value *Callee, const Twine &Name = "") {
     return Insert(CallInst::Create(Callee), Name);
   }
-  CallInst *CreateCall(Value *Callee, Value *Arg, const char *Name = "") {
+  CallInst *CreateCall(Value *Callee, Value *Arg, const Twine &Name = "") {
     return Insert(CallInst::Create(Callee, Arg), Name);
   }
   CallInst *CreateCall2(Value *Callee, Value *Arg1, Value *Arg2,
-                        const char *Name = "") {
+                        const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2 };
     return Insert(CallInst::Create(Callee, Args, Args+2), Name);
   }
   CallInst *CreateCall3(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
-                        const char *Name = "") {
+                        const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2, Arg3 };
     return Insert(CallInst::Create(Callee, Args, Args+3), Name);
   }
   CallInst *CreateCall4(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
-                        Value *Arg4, const char *Name = "") {
+                        Value *Arg4, const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2, Arg3, Arg4 };
     return Insert(CallInst::Create(Callee, Args, Args+4), Name);
   }
 
   template<typename InputIterator>
   CallInst *CreateCall(Value *Callee, InputIterator ArgBegin,
-                       InputIterator ArgEnd, const char *Name = "") {
+                       InputIterator ArgEnd, const Twine &Name = "") {
     return Insert(CallInst::Create(Callee, ArgBegin, ArgEnd), Name);
   }
 
   Value *CreateSelect(Value *C, Value *True, Value *False,
-                      const char *Name = "") {
+                      const Twine &Name = "") {
     if (Constant *CC = dyn_cast<Constant>(C))
       if (Constant *TC = dyn_cast<Constant>(True))
         if (Constant *FC = dyn_cast<Constant>(False))
@@ -624,20 +840,20 @@ public:
     return Insert(SelectInst::Create(C, True, False), Name);
   }
 
-  VAArgInst *CreateVAArg(Value *List, const Type *Ty, const char *Name = "") {
+  VAArgInst *CreateVAArg(Value *List, const Type *Ty, const Twine &Name = "") {
     return Insert(new VAArgInst(List, Ty), Name);
   }
 
   Value *CreateExtractElement(Value *Vec, Value *Idx,
-                              const char *Name = "") {
+                              const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Folder.CreateExtractElement(VC, IC);
-    return Insert(new ExtractElementInst(Vec, Idx), Name);
+    return Insert(ExtractElementInst::Create(Vec, Idx), Name);
   }
 
   Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
-                             const char *Name = "") {
+                             const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *NC = dyn_cast<Constant>(NewElt))
         if (Constant *IC = dyn_cast<Constant>(Idx))
@@ -646,7 +862,7 @@ public:
   }
 
   Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
-                             const char *Name = "") {
+                             const Twine &Name = "") {
     if (Constant *V1C = dyn_cast<Constant>(V1))
       if (Constant *V2C = dyn_cast<Constant>(V2))
         if (Constant *MC = dyn_cast<Constant>(Mask))
@@ -655,7 +871,7 @@ public:
   }
 
   Value *CreateExtractValue(Value *Agg, unsigned Idx,
-                            const char *Name = "") {
+                            const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       return Folder.CreateExtractValue(AggC, &Idx, 1);
     return Insert(ExtractValueInst::Create(Agg, Idx), Name);
@@ -665,14 +881,14 @@ public:
   Value *CreateExtractValue(Value *Agg,
                             InputIterator IdxBegin,
                             InputIterator IdxEnd,
-                            const char *Name = "") {
+                            const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       return Folder.CreateExtractValue(AggC, IdxBegin, IdxEnd - IdxBegin);
     return Insert(ExtractValueInst::Create(Agg, IdxBegin, IdxEnd), Name);
   }
 
   Value *CreateInsertValue(Value *Agg, Value *Val, unsigned Idx,
-                           const char *Name = "") {
+                           const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
         return Folder.CreateInsertValue(AggC, ValC, &Idx, 1);
@@ -683,11 +899,10 @@ public:
   Value *CreateInsertValue(Value *Agg, Value *Val,
                            InputIterator IdxBegin,
                            InputIterator IdxEnd,
-                           const char *Name = "") {
+                           const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
-        return Folder.CreateInsertValue(AggC, ValC,
-                                            IdxBegin, IdxEnd - IdxBegin);
+        return Folder.CreateInsertValue(AggC, ValC, IdxBegin, IdxEnd-IdxBegin);
     return Insert(InsertValueInst::Create(Agg, Val, IdxBegin, IdxEnd), Name);
   }
 
@@ -696,30 +911,32 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// CreateIsNull - Return an i1 value testing if \arg Arg is null.
-  Value *CreateIsNull(Value *Arg, const char *Name = "") {
+  Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
     return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()),
                         Name);
   }
 
   /// CreateIsNotNull - Return an i1 value testing if \arg Arg is not null.
-  Value *CreateIsNotNull(Value *Arg, const char *Name = "") {
+  Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
     return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()),
                         Name);
   }
 
   /// CreatePtrDiff - Return the i64 difference between two pointer values,
   /// dividing out the size of the pointed-to objects.  This is intended to
-  /// implement C-style pointer subtraction.
-  Value *CreatePtrDiff(Value *LHS, Value *RHS, const char *Name = "") {
+  /// implement C-style pointer subtraction. As such, the pointers must be
+  /// appropriately aligned for their element types and pointing into the
+  /// same object.
+  Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") {
     assert(LHS->getType() == RHS->getType() &&
            "Pointer subtraction operand types must match!");
     const PointerType *ArgType = cast<PointerType>(LHS->getType());
-    Value *LHS_int = CreatePtrToInt(LHS, Type::Int64Ty);
-    Value *RHS_int = CreatePtrToInt(RHS, Type::Int64Ty);
+    Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
+    Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
     Value *Difference = CreateSub(LHS_int, RHS_int);
-    return CreateSDiv(Difference,
-                      ConstantExpr::getSizeOf(ArgType->getElementType()),
-                      Name);
+    return CreateExactSDiv(Difference,
+                           ConstantExpr::getSizeOf(ArgType->getElementType()),
+                           Name);
   }
 };
 
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
new file mode 100644
index 000000000000..e7780b05d534
--- /dev/null
+++ b/include/llvm/Support/IRReader.h
@@ -0,0 +1,115 @@
+//===---- llvm/Support/IRReader.h - Reader for LLVM IR files ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions for reading LLVM IR. They support both
+// Bitcode and Assembly, automatically detecting the input format.
+//
+// These functions must be defined in a header file in order to avoid
+// library dependencies, since they reference both Bitcode and Assembly
+// functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_IRREADER_H
+#define LLVM_SUPPORT_IRREADER_H
+
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/ModuleProvider.h"
+
+namespace llvm {
+
+  /// If the given MemoryBuffer holds a bitcode image, return a ModuleProvider
+  /// for it which does lazy deserialization of function bodies.  Otherwise,
+  /// attempt to parse it as LLVM Assembly and return a fully populated
+  /// ModuleProvider. This function *always* takes ownership of the given
+  /// MemoryBuffer.
+  inline ModuleProvider *getIRModuleProvider(MemoryBuffer *Buffer,
+                                             SMDiagnostic &Err,
+                                             LLVMContext &Context) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      std::string ErrMsg;
+      ModuleProvider *MP = getBitcodeModuleProvider(Buffer, Context, &ErrMsg);
+      if (MP == 0) {
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), -1, -1, ErrMsg, "");
+        // ParseBitcodeFile does not take ownership of the Buffer in the
+        // case of an error.
+        delete Buffer;
+      }
+      return MP;
+    }
+
+    Module *M = ParseAssembly(Buffer, 0, Err, Context);
+    if (M == 0)
+      return 0;
+    return new ExistingModuleProvider(M);
+  }
+
+  /// If the given file holds a bitcode image, return a ModuleProvider
+  /// for it which does lazy deserialization of function bodies.  Otherwise,
+  /// attempt to parse it as LLVM Assembly and return a fully populated
+  /// ModuleProvider.
+  inline ModuleProvider *getIRFileModuleProvider(const std::string &Filename,
+                                                 SMDiagnostic &Err,
+                                                 LLVMContext &Context) {
+    std::string ErrMsg;
+    MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
+    if (F == 0) {
+      Err = SMDiagnostic(Filename, -1, -1,
+                         "Could not open input file '" + Filename + "'", "");
+      return 0;
+    }
+
+    return getIRModuleProvider(F, Err, Context);
+  }
+
+  /// If the given MemoryBuffer holds a bitcode image, return a Module
+  /// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
+  /// a Module for it. This function *always* takes ownership of the given
+  /// MemoryBuffer.
+  inline Module *ParseIR(MemoryBuffer *Buffer,
+                         SMDiagnostic &Err,
+                         LLVMContext &Context) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      std::string ErrMsg;
+      Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+      // ParseBitcodeFile does not take ownership of the Buffer.
+      delete Buffer;
+      if (M == 0)
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), -1, -1, ErrMsg, "");
+      return M;
+    }
+
+    return ParseAssembly(Buffer, 0, Err, Context);
+  }
+
+  /// If the given file holds a bitcode image, return a Module for it.
+  /// Otherwise, attempt to parse it as LLVM Assembly and return a Module
+  /// for it.
+  inline Module *ParseIRFile(const std::string &Filename,
+                             SMDiagnostic &Err,
+                             LLVMContext &Context) {
+    std::string ErrMsg;
+    MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
+    if (F == 0) {
+      Err = SMDiagnostic(Filename, -1, -1,
+                         "Could not open input file '" + Filename + "'", "");
+      return 0;
+    }
+
+    return ParseIR(F, Err, Context);
+  }
+
+}
+
+#endif
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index 597cc9d90542..5d7c2f72ba7b 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -14,6 +14,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 
@@ -30,13 +31,13 @@ namespace llvm {
 
 /// @brief Base class for instruction visitors
 ///
-/// Instruction visitors are used when you want to perform different action for
-/// different kinds of instruction without without having to use lots of casts
-/// and a big switch statement (in your code that is).
+/// Instruction visitors are used when you want to perform different actions
+/// for different kinds of instructions without having to use lots of casts
+/// and a big switch statement (in your code, that is).
 ///
 /// To define your own visitor, inherit from this class, specifying your
 /// new type for the 'SubClass' template parameter, and "override" visitXXX
-/// functions in your class. I say "overriding" because this class is defined
+/// functions in your class. I say "override" because this class is defined
 /// in terms of statically resolved overloading, not virtual functions.
 ///
 /// For example, here is a visitor that counts the number of malloc
@@ -58,12 +59,12 @@ namespace llvm {
 ///    NumMallocs = CMV.Count;
 ///
 /// The defined has 'visit' methods for Instruction, and also for BasicBlock,
-/// Function, and Module, which recursively process all conained instructions.
+/// Function, and Module, which recursively process all contained instructions.
 ///
 /// Note that if you don't implement visitXXX for some instruction type,
 /// the visitXXX method for instruction superclass will be invoked. So
 /// if instructions are added in the future, they will be automatically
-/// supported, if you handle on of their superclasses.
+/// supported, if you handle one of their superclasses.
 ///
 /// The optional second template argument specifies the type that instruction
 /// visitation functions should return. If you specify this, you *MUST* provide
@@ -113,8 +114,7 @@ public:
   //
   RetTy visit(Instruction &I) {
     switch (I.getOpcode()) {
-    default: assert(0 && "Unknown instruction type encountered!");
-             abort();
+    default: llvm_unreachable("Unknown instruction type encountered!");
       // Build the switch statement using the Instruction.def file...
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
     case Instruction::OPCODE: return \
@@ -165,8 +165,6 @@ public:
   RetTy visitUnreachableInst(UnreachableInst &I)    { DELEGATE(TerminatorInst);}
   RetTy visitICmpInst(ICmpInst &I)                  { DELEGATE(CmpInst);}
   RetTy visitFCmpInst(FCmpInst &I)                  { DELEGATE(CmpInst);}
-  RetTy visitVICmpInst(VICmpInst &I)                { DELEGATE(CmpInst);}
-  RetTy visitVFCmpInst(VFCmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitMallocInst(MallocInst &I)              { DELEGATE(AllocationInst);}
   RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(AllocationInst);}
   RetTy visitFreeInst(FreeInst     &I)              { DELEGATE(Instruction); }
@@ -195,7 +193,7 @@ public:
   RetTy visitExtractValueInst(ExtractValueInst &I)  { DELEGATE(Instruction);}
   RetTy visitInsertValueInst(InsertValueInst &I)    { DELEGATE(Instruction); }
 
-  // Next level propagators... if the user does not overload a specific
+  // Next level propagators: If the user does not overload a specific
   // instruction type, they can overload one of these to get the whole class
   // of instructions...
   //
@@ -206,7 +204,7 @@ public:
   RetTy visitCastInst(CastInst &I)             { DELEGATE(Instruction); }
 
   // If the user wants a 'default' case, they can choose to override this
-  // function.  If this function is not overloaded in the users subclass, then
+  // function.  If this function is not overloaded in the user's subclass, then
   // this instruction just gets ignored.
   //
   // Note that you MUST override this function if your return type is not void.
diff --git a/include/llvm/Support/LeakDetector.h b/include/llvm/Support/LeakDetector.h
index 8d74ac663b11..7dbfdbf3d52b 100644
--- a/include/llvm/Support/LeakDetector.h
+++ b/include/llvm/Support/LeakDetector.h
@@ -56,9 +56,9 @@ struct LeakDetector {
   /// The specified message will be printed indicating when the check was
   /// performed.
   ///
-  static void checkForGarbage(const std::string &Message) {
+  static void checkForGarbage(LLVMContext &C, const std::string &Message) {
 #ifndef NDEBUG
-    checkForGarbageImpl(Message);
+    checkForGarbageImpl(C, Message);
 #endif
   }
 
@@ -83,7 +83,7 @@ private:
   static void removeGarbageObjectImpl(const Value *Object);
   static void addGarbageObjectImpl(void *Object);
   static void removeGarbageObjectImpl(void *Object);
-  static void checkForGarbageImpl(const std::string &Message);
+  static void checkForGarbageImpl(LLVMContext &C, const std::string &Message);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 4fc648319ad4..b8e223587fbd 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -27,10 +27,12 @@ void* object_creator() {
 
 /// object_deleter - Helper method for ManagedStatic.
 ///
-template<class C>
-void object_deleter(void *Ptr) {
-  delete (C*)Ptr;
-}
+template<typename T> struct object_deleter {
+  static void call(void * Ptr) { delete (T*)Ptr; }
+};
+template<typename T, size_t N> struct object_deleter<T[N]> {
+  static void call(void * Ptr) { delete[] (T*)Ptr; }
+};
 
 /// ManagedStaticBase - Common base class for ManagedStatic instances.
 class ManagedStaticBase {
@@ -62,28 +64,28 @@ public:
   C &operator*() {
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
-    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
 
     return *static_cast<C*>(Ptr);
   }
   C *operator->() {
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
-    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
 
     return static_cast<C*>(Ptr);
   }
   const C &operator*() const {
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
-    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
 
     return *static_cast<C*>(Ptr);
   }
   const C *operator->() const {
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
-    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
 
     return static_cast<C*>(Ptr);
   }
diff --git a/include/llvm/Support/Mangler.h b/include/llvm/Support/Mangler.h
index 8f672bdd6f65..03c564897bb8 100644
--- a/include/llvm/Support/Mangler.h
+++ b/include/llvm/Support/Mangler.h
@@ -23,8 +23,17 @@ class Type;
 class Module;
 class Value;
 class GlobalValue;
+template <typename T> class SmallVectorImpl; 
 
 class Mangler {
+public:
+  enum ManglerPrefixTy {
+    Default,               ///< Emit default string before each symbol.
+    Private,               ///< Emit "private" prefix before each symbol.
+    LinkerPrivate          ///< Emit "linker private" prefix before each symbol.
+  };
+
+private:
   /// Prefix - This string is added to each symbol that is emitted, unless the
   /// symbol is marked as not needing this prefix.
   const char *Prefix;
@@ -33,48 +42,50 @@ class Mangler {
   /// linkage.
   const char *PrivatePrefix;
 
+  /// LinkerPrivatePrefix - This string is emitted before each symbol with
+  /// "linker_private" linkage.
+  const char *LinkerPrivatePrefix;
+
   /// UseQuotes - If this is set, the target accepts global names in quotes,
   /// e.g. "foo bar" is a legal name.  This syntax is used instead of escaping
   /// the space character.  By default, this is false.
   bool UseQuotes;
 
-  /// PreserveAsmNames - If this is set, the asm escape character is not removed
-  /// from names with 'asm' specifiers.
-  bool PreserveAsmNames;
+  /// SymbolsCanStartWithDigit - If this is set, the target allows symbols to
+  /// start with digits (e.g., "0x0021").  By default, this is false.
+  bool SymbolsCanStartWithDigit;
 
-  /// Memo - This is used to remember the name that we assign a value.
+  /// AnonGlobalIDs - We need to give global values the same name every time
+  /// they are mangled.  This keeps track of the number we give to anonymous
+  /// ones.
   ///
-  DenseMap<const Value*, std::string> Memo;
+  DenseMap<const GlobalValue*, unsigned> AnonGlobalIDs;
 
-  /// Count - This simple counter is used to unique value names.
+  /// NextAnonGlobalID - This simple counter is used to unique value names.
   ///
-  unsigned Count;
-
-  /// TypeMap - If the client wants us to unique types, this keeps track of the
-  /// current assignments and TypeCounter keeps track of the next id to assign.
-  DenseMap<const Type*, unsigned> TypeMap;
-  unsigned TypeCounter;
+  unsigned NextAnonGlobalID;
 
   /// AcceptableChars - This bitfield contains a one for each character that is
   /// allowed to be part of an unmangled name.
-  unsigned AcceptableChars[256/32];
-public:
+  unsigned AcceptableChars[256 / 32];
 
+public:
   // Mangler ctor - if a prefix is specified, it will be prepended onto all
   // symbols.
-  Mangler(Module &M, const char *Prefix = "", const char *privatePrefix = "");
+  Mangler(Module &M, const char *Prefix = "", const char *privatePrefix = "",
+          const char *linkerPrivatePrefix = "");
 
   /// setUseQuotes - If UseQuotes is set to true, this target accepts quoted
   /// strings for assembler labels.
   void setUseQuotes(bool Val) { UseQuotes = Val; }
 
-  /// setPreserveAsmNames - If the mangler should not strip off the asm name
-  /// @verbatim identifier (\001), this should be set. @endverbatim
-  void setPreserveAsmNames(bool Val) { PreserveAsmNames = Val; }
+  /// setSymbolsCanStartWithDigit - If SymbolsCanStartWithDigit is set to true,
+  /// this target allows symbols to start with digits.
+  void setSymbolsCanStartWithDigit(bool Val) { SymbolsCanStartWithDigit = Val; }
 
   /// Acceptable Characters - This allows the target to specify which characters
   /// are acceptable to the assembler without being mangled.  By default we
-  /// allow letters, numbers, '_', '$', and '.', which is what GAS accepts.
+  /// allow letters, numbers, '_', '$', '.', which is what GAS accepts, and '@'.
   void markCharAcceptable(unsigned char X) {
     AcceptableChars[X/32] |= 1 << (X&31);
   }
@@ -85,11 +96,13 @@ public:
     return (AcceptableChars[X/32] & (1 << (X&31))) != 0;
   }
 
-  /// getValueName - Returns the mangled name of V, an LLVM Value,
-  /// in the current module.
+  /// getMangledName - Returns the mangled name of V, an LLVM Value,
+  /// in the current module.  If 'Suffix' is specified, the name ends with the
+  /// specified suffix.  If 'ForcePrivate' is specified, the label is specified
+  /// to have a private label prefix.
   ///
-  std::string getValueName(const GlobalValue *V, const char *Suffix = "");
-  std::string getValueName(const Value *V);
+  std::string getMangledName(const GlobalValue *V, const char *Suffix = "",
+                             bool ForcePrivate = false);
 
   /// makeNameProper - We don't want identifier names with ., space, or
   /// - in them, so we mangle these characters into the strings "d_",
@@ -98,13 +111,14 @@ public:
   /// does this for you, so there's no point calling it on the result
   /// from getValueName.
   ///
-  std::string makeNameProper(const std::string &x, const char *Prefix = 0,
-                             const char *PrivatePrefix = 0);
-
-private:
-  /// getTypeID - Return a unique ID for the specified LLVM type.
-  ///
-  unsigned getTypeID(const Type *Ty);
+  std::string makeNameProper(const std::string &x,
+                             ManglerPrefixTy PrefixTy = Mangler::Default);
+  
+  /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+  /// and the specified global variable's name.  If the global variable doesn't
+  /// have a name, this fills in a unique name for the global.
+  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+                         bool isImplicitlyPrivate);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 85e19acd9ea7..6fa618eb1af8 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -52,6 +52,16 @@ inline bool isUInt32(int64_t Value) {
   return static_cast<uint32_t>(Value) == Value;
 }
 
+template<unsigned N>
+inline bool isInt(int64_t x) {
+  return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1)));
+}
+
+template<unsigned N>
+inline bool isUint(uint64_t x) {
+  return N >= 64 || x < (UINT64_C(1)<<N);
+}
+
 /// isMask_32 - This function returns true if the argument is a sequence of ones
 /// starting at the least significant bit with the remainder zero (32 bit
 /// version).   Ex. isMask_32(0x0000FFFFU) == true.
@@ -108,7 +118,7 @@ inline uint16_t ByteSwap_16(uint16_t Value) {
 /// ByteSwap_32 - This function returns a byte-swapped representation of the
 /// 32-bit argument, Value.
 inline uint32_t ByteSwap_32(uint32_t Value) {
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
   return __builtin_bswap32(Value);
 #elif defined(_MSC_VER) && !defined(_DEBUG)
   return _byteswap_ulong(Value);
@@ -124,7 +134,7 @@ inline uint32_t ByteSwap_32(uint32_t Value) {
 /// ByteSwap_64 - This function returns a byte-swapped representation of the
 /// 64-bit argument, Value.
 inline uint64_t ByteSwap_64(uint64_t Value) {
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
   return __builtin_bswap64(Value);
 #elif defined(_MSC_VER) && !defined(_DEBUG)
   return _byteswap_uint64(Value);
@@ -425,6 +435,13 @@ inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) {
   return ((Value + Align - 1) / Align) * Align;
 }
 
+/// OffsetToAlignment - Return the offset to the next integer (mod 2**64) that
+/// is greater than or equal to \arg Value and is a multiple of \arg
+/// Align. Align must be non-zero.
+inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
+  return RoundUpToAlignment(Value, Align) - Value;
+}
+
 /// abs64 - absolute value of a 64-bit int.  Not all environments support
 /// "abs" on whatever their name for the 64-bit int type is.  The absolute
 /// value of the largest negative number is undefined, as with "abs".
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 58a217f6c79e..eb4784cbf580 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_SUPPORT_MEMORYBUFFER_H
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
 #include <string>
 
@@ -42,6 +43,10 @@ public:
   const char *getBufferEnd() const   { return BufferEnd; }
   size_t getBufferSize() const { return BufferEnd-BufferStart; }
 
+  StringRef getBuffer() const { 
+    return StringRef(BufferStart, getBufferSize()); 
+  }
+
   /// getBufferIdentifier - Return an identifier for this buffer, typically the
   /// filename it was read from.
   virtual const char *getBufferIdentifier() const {
diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h
new file mode 100644
index 000000000000..dec0f134b306
--- /dev/null
+++ b/include/llvm/Support/MemoryObject.h
@@ -0,0 +1,70 @@
+//===- MemoryObject.h - Abstract memory interface ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMORYOBJECT_H
+#define MEMORYOBJECT_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+/// MemoryObject - Abstract base class for contiguous addressable memory.
+///   Necessary for cases in which the memory is in another process, in a
+///   file, or on a remote machine.
+///   All size and offset parameters are uint64_ts, to allow 32-bit processes
+///   access to 64-bit address spaces.
+class MemoryObject {
+public:
+  /// Destructor      - Override as necessary.
+  virtual ~MemoryObject();
+  
+  /// getBase         - Returns the lowest valid address in the region.
+  ///
+  /// @result         - The lowest valid address.
+  virtual uint64_t getBase() const = 0;
+  
+  /// getExtent       - Returns the size of the region in bytes.  (The region is
+  ///                   contiguous, so the highest valid address of the region 
+  ///                   is getBase() + getExtent() - 1).
+  ///
+  /// @result         - The size of the region.
+  virtual uint64_t getExtent() const = 0;
+  
+  /// readByte        - Tries to read a single byte from the region.
+  ///
+  /// @param address  - The address of the byte, in the same space as getBase().
+  /// @param ptr      - A pointer to a byte to be filled in.  Must be non-NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readByte(uint64_t address, uint8_t* ptr) const = 0;
+  
+  /// readBytes       - Tries to read a contiguous range of bytes from the
+  ///                   region, up to the end of the region.
+  ///                   You should override this function if there is a quicker
+  ///                   way than going back and forth with individual bytes.
+  ///
+  /// @param address  - The address of the first byte, in the same space as 
+  ///                   getBase().
+  /// @param size     - The maximum number of bytes to copy.
+  /// @param buf      - A pointer to a buffer to be filled in.  Must be non-NULL
+  ///                   and large enough to hold size bytes.
+  /// @param copied   - A pointer to a nunber that is filled in with the number
+  ///                   of bytes actually read.  May be NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied) const;
+};
+
+}
+
+#endif
+
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index a49cf8424067..1f671c19250d 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -28,9 +28,12 @@
 
 namespace llvm {
 
+class LLVMContext;
+
 /// NoFolder - Create "constants" (actually, values) with no folding.
 class NoFolder {
 public:
+  explicit NoFolder(LLVMContext &) {}
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
@@ -39,12 +42,18 @@ public:
   Value *CreateAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAdd(LHS, RHS);
   }
+  Value *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNSWAdd(LHS, RHS);
+  }
   Value *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFAdd(LHS, RHS);
   }
   Value *CreateSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSub(LHS, RHS);
   }
+  Value *CreateNSWSub(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNSWSub(LHS, RHS);
+  }
   Value *CreateFSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFSub(LHS, RHS);
   }
@@ -60,6 +69,9 @@ public:
   Value *CreateSDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSDiv(LHS, RHS);
   }
+  Value *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateExactSDiv(LHS, RHS);
+  }
   Value *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFDiv(LHS, RHS);
   }
@@ -120,6 +132,15 @@ public:
     return GetElementPtrInst::Create(C, IdxList, IdxList+NumIdx);
   }
 
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList,
+                                        unsigned NumIdx) const {
+    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
+  }
+  Value *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                     unsigned NumIdx) const {
+    return GetElementPtrInst::CreateInBounds(C, IdxList, IdxList+NumIdx);
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
@@ -143,12 +164,6 @@ public:
   Value *CreateFCmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const {
     return new FCmpInst(P, LHS, RHS);
   }
-  Value *CreateVICmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const {
-    return new VICmpInst(P, LHS, RHS);
-  }
-  Value *CreateVFCmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const {
-    return new VFCmpInst(P, LHS, RHS);
-  }
 
   //===--------------------------------------------------------------------===//
   // Other Instructions
diff --git a/include/llvm/Support/PassNameParser.h b/include/llvm/Support/PassNameParser.h
index e489e0a6f0b5..66ce3f2e2085 100644
--- a/include/llvm/Support/PassNameParser.h
+++ b/include/llvm/Support/PassNameParser.h
@@ -24,6 +24,7 @@
 #define LLVM_SUPPORT_PASS_NAME_PARSER_H
 
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Pass.h"
 #include <algorithm>
 #include <cstring>
@@ -65,9 +66,9 @@ public:
   virtual void passRegistered(const PassInfo *P) {
     if (ignorablePass(P) || !Opt) return;
     if (findOption(P->getPassArgument()) != getNumOptions()) {
-      cerr << "Two passes with the same argument (-"
+      errs() << "Two passes with the same argument (-"
            << P->getPassArgument() << ") attempted to be registered!\n";
-      abort();
+      llvm_unreachable(0);
     }
     addLiteralOption(P->getPassArgument(), P, P->getPassName());
   }
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index fda925f5a9a8..c0b6a6b98c09 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -58,7 +58,7 @@ struct constantint_ty {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       const APInt &CIV = CI->getValue();
       if (Val >= 0)
-        return CIV == Val;
+        return CIV == static_cast<uint64_t>(Val);
       // If Val is negative, and CI is shorter than it, truncate to the right
       // number of bits.  If it is larger, then we have to sign extend.  Just
       // compare their negated values.
@@ -87,6 +87,18 @@ struct zero_ty {
 /// m_Zero() - Match an arbitrary zero/null constant.
 inline zero_ty m_Zero() { return zero_ty(); }
 
+struct one_ty {
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(V))
+      return C->isOne();
+    return false;
+  }
+};
+
+/// m_One() - Match a an integer 1.
+inline one_ty m_One() { return one_ty(); }
+  
 
 template<typename Class>
 struct bind_ty {
@@ -311,7 +323,8 @@ struct BinaryOpClass_match {
   template<typename OpTy>
   bool match(OpTy *V) {
     if (Class *I = dyn_cast<Class>(V))
-      if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
+      if (L.match(I->getOperand(0)) &&
+          R.match(I->getOperand(1))) {
         if (Opcode)
           *Opcode = I->getOpcode();
         return true;
@@ -356,7 +369,8 @@ struct CmpClass_match {
   template<typename OpTy>
   bool match(OpTy *V) {
     if (Class *I = dyn_cast<Class>(V))
-      if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
+      if (L.match(I->getOperand(0)) &&
+          R.match(I->getOperand(1))) {
         Predicate = I->getPredicate();
         return true;
       }
@@ -403,7 +417,7 @@ struct SelectClass_match {
 };
 
 template<typename Cond, typename LHS, typename RHS>
-inline SelectClass_match<Cond, RHS, LHS>
+inline SelectClass_match<Cond, LHS, RHS>
 m_Select(const Cond &C, const LHS &L, const RHS &R) {
   return SelectClass_match<Cond, LHS, RHS>(C, L, R);
 }
@@ -503,7 +517,7 @@ struct neg_match {
   }
 private:
   bool matchIfNeg(Value *LHS, Value *RHS) {
-    return LHS == ConstantExpr::getZeroValueForNegationExpr(LHS->getType()) &&
+    return LHS == ConstantFP::getZeroValueForNegation(LHS->getType()) &&
            L.match(RHS);
   }
 };
@@ -532,7 +546,7 @@ struct fneg_match {
   }
 private:
   bool matchIfFNeg(Value *LHS, Value *RHS) {
-    return LHS == ConstantExpr::getZeroValueForNegationExpr(LHS->getType()) &&
+    return LHS == ConstantFP::getZeroValueForNegation(LHS->getType()) &&
            L.match(RHS);
   }
 };
diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h
index b0edd3bd09f9..d64993f54d1d 100644
--- a/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/include/llvm/Support/PointerLikeTypeTraits.h
@@ -50,12 +50,16 @@ public:
 // Provide PointerLikeTypeTraits for const pointers.
 template<typename T>
 class PointerLikeTypeTraits<const T*> {
+  typedef PointerLikeTypeTraits<T*> NonConst;
+
 public:
-  static inline const void *getAsVoidPointer(const T* P) { return P; }
+  static inline const void *getAsVoidPointer(const T* P) {
+    return NonConst::getAsVoidPointer(const_cast<T*>(P));
+  }
   static inline const T *getFromVoidPointer(const void *P) {
-    return static_cast<const T*>(P);
+    return NonConst::getFromVoidPointer(const_cast<void*>(P));
   }
-  enum { NumLowBitsAvailable = 2 };
+  enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
 };
 
 // Provide PointerLikeTypeTraits for uintptr_t.
diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h
index 909d286f28b6..0db84e1a14c6 100644
--- a/include/llvm/Support/PrettyStackTrace.h
+++ b/include/llvm/Support/PrettyStackTrace.h
@@ -18,6 +18,12 @@
 
 namespace llvm {
   class raw_ostream;
+
+  /// DisablePrettyStackTrace - Set this to true to disable this module. This
+  /// might be neccessary if the host application installs its own signal
+  /// handlers which conflict with the ones installed by this module.
+  /// Defaults to false.
+  extern bool DisablePrettyStackTrace;
   
   /// PrettyStackTraceEntry - This class is used to represent a frame of the
   /// "pretty" stack trace that is dumped when a program crashes. You can define
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index 2fa0365d5f50..d8f8c7894142 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -34,7 +34,8 @@ struct RecyclerStruct {
 };
 
 template<>
-struct ilist_traits<RecyclerStruct> : ilist_default_traits<RecyclerStruct> {
+struct ilist_traits<RecyclerStruct> :
+    public ilist_default_traits<RecyclerStruct> {
   static RecyclerStruct *getPrev(const RecyclerStruct *t) { return t->Prev; }
   static RecyclerStruct *getNext(const RecyclerStruct *t) { return t->Next; }
   static void setPrev(RecyclerStruct *t, RecyclerStruct *p) { t->Prev = p; }
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
new file mode 100644
index 000000000000..c954c0d31abe
--- /dev/null
+++ b/include/llvm/Support/Regex.h
@@ -0,0 +1,63 @@
+//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string>
+
+struct llvm_regex;
+
+namespace llvm {
+  class StringRef;
+  template<typename T> class SmallVectorImpl;
+  
+  class Regex {
+  public:
+    enum {
+      NoFlags=0,
+      /// Compile for matching that ignores upper/lower case distinctions.
+      IgnoreCase=1,
+      /// Compile for newline-sensitive matching. With this flag '[^' bracket
+      /// expressions and '.' never match newline. A ^ anchor matches the 
+      /// null string after any newline in the string in addition to its normal 
+      /// function, and the $ anchor matches the null string before any 
+      /// newline in the string in addition to its normal function.
+      Newline=2
+    };
+
+    /// Compiles the given POSIX Extended Regular Expression \arg Regex.
+    /// This implementation supports regexes and matching strings with embedded
+    /// NUL characters.
+    Regex(const StringRef &Regex, unsigned Flags = NoFlags);
+    ~Regex();
+
+    /// isValid - returns the error encountered during regex compilation, or
+    /// matching, if any.
+    bool isValid(std::string &Error);
+
+    /// getNumMatches - In a valid regex, return the number of parenthesized
+    /// matches it contains.  The number filled in by match will include this
+    /// many entries plus one for the whole regex (as element 0).
+    unsigned getNumMatches() const;
+    
+    /// matches - Match the regex against a given \arg String.
+    ///
+    /// \param Matches - If given, on a succesful match this will be filled in
+    /// with references to the matched group expressions (inside \arg String),
+    /// the first group is always the entire pattern.
+    ///
+    /// This returns true on a successful match.
+    bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
+  private:
+    struct llvm_regex *preg;
+    int error;
+  };
+}
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 454679bda834..4db88825afb5 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -77,9 +77,6 @@ namespace llvm {
     static listener *ListenerHead, *ListenerTail;
 
   public:
-    class iterator;
-
-
     /// Node in linked list of entries.
     ///
     class node {
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 7c8a139091bf..5b6f56b43628 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -65,10 +65,14 @@ class SourceMgr {
   // include files in.
   std::vector<std::string> IncludeDirectories;
   
+  /// LineNoCache - This is a cache for line number queries, its implementation
+  /// is really private to SourceMgr.cpp.
+  mutable void *LineNoCache;
+  
   SourceMgr(const SourceMgr&);    // DO NOT IMPLEMENT
   void operator=(const SourceMgr&); // DO NOT IMPLEMENT
 public:
-  SourceMgr() {}
+  SourceMgr() : LineNoCache(0) {}
   ~SourceMgr();
   
   void setIncludeDirs(const std::vector<std::string> &Dirs) {
@@ -145,17 +149,6 @@ public:
                const std::string &Msg, const std::string &LineStr)
     : Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
       LineContents(LineStr) {}
-  SMDiagnostic(const SMDiagnostic &RHS) {
-    operator=(RHS);
-  }
-
-  void operator=(const SMDiagnostic &E) {
-    Filename = E.Filename;
-    LineNo = E.LineNo;
-    ColumnNo = E.ColumnNo;
-    Message = E.Message;
-    LineContents = E.LineContents;
-  }
 
   void Print(const char *ProgName, raw_ostream &S);
 };
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index 5c63034a863c..c71e6b94fa27 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -80,6 +80,8 @@ namespace llvm {
     }
   }
 
+  /// createStandardModulePasses - Add the standard module passes.  This is
+  /// expected to be run after the standard function passes.
   static inline void createStandardModulePasses(PassManager *PM,
                                                 unsigned OptimizationLevel,
                                                 bool OptimizeSize,
@@ -91,71 +93,69 @@ namespace llvm {
     if (OptimizationLevel == 0) {
       if (InliningPass)
         PM->add(InliningPass);
-    } else {
-      if (UnitAtATime)
-        PM->add(createRaiseAllocationsPass());    // call %malloc -> malloc inst
-      PM->add(createCFGSimplificationPass());     // Clean up disgusting code
-       // Kill useless allocas
-      PM->add(createPromoteMemoryToRegisterPass());
-      if (UnitAtATime) {
-        PM->add(createGlobalOptimizerPass());     // Optimize out global vars
-        PM->add(createGlobalDCEPass());           // Remove unused fns and globs
-        // IP Constant Propagation
-        PM->add(createIPConstantPropagationPass());
-        PM->add(createDeadArgEliminationPass());  // Dead argument elimination
-      }
-      PM->add(createInstructionCombiningPass());  // Clean up after IPCP & DAE
-      PM->add(createCFGSimplificationPass());     // Clean up after IPCP & DAE
-      if (UnitAtATime) {
-        if (HaveExceptions)
-          PM->add(createPruneEHPass());           // Remove dead EH info
-        PM->add(createFunctionAttrsPass());       // Set readonly/readnone attrs
-      }
-      if (InliningPass)
-        PM->add(InliningPass);
-      if (OptimizationLevel > 2)
-        PM->add(createArgumentPromotionPass());   // Scalarize uninlined fn args
-      if (SimplifyLibCalls)
-        PM->add(createSimplifyLibCallsPass());    // Library Call Optimizations
-      PM->add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
-      PM->add(createJumpThreadingPass());         // Thread jumps.
-      PM->add(createCFGSimplificationPass());     // Merge & remove BBs
-      PM->add(createScalarReplAggregatesPass());  // Break up aggregate allocas
-      PM->add(createInstructionCombiningPass());  // Combine silly seq's
-      PM->add(createCondPropagationPass());       // Propagate conditionals
-      PM->add(createTailCallEliminationPass());   // Eliminate tail calls
-      PM->add(createCFGSimplificationPass());     // Merge & remove BBs
-      PM->add(createReassociatePass());           // Reassociate expressions
-      PM->add(createLoopRotatePass());            // Rotate Loop
-      PM->add(createLICMPass());                  // Hoist loop invariants
-      PM->add(createLoopUnswitchPass(OptimizeSize));
-      PM->add(createLoopIndexSplitPass());        // Split loop index
-      PM->add(createInstructionCombiningPass());  
-      PM->add(createIndVarSimplifyPass());        // Canonicalize indvars
-      PM->add(createLoopDeletionPass());          // Delete dead loops
-      if (UnrollLoops)
-        PM->add(createLoopUnrollPass());          // Unroll small loops
-      PM->add(createInstructionCombiningPass());  // Clean up after the unroller
-      PM->add(createGVNPass());                   // Remove redundancies
-      PM->add(createMemCpyOptPass());             // Remove memcpy / form memset
-      PM->add(createSCCPPass());                  // Constant prop with SCCP
+      return;
+    }
     
-      // Run instcombine after redundancy elimination to exploit opportunities
-      // opened up by them.
-      PM->add(createInstructionCombiningPass());
-      PM->add(createCondPropagationPass());       // Propagate conditionals
-      PM->add(createDeadStoreEliminationPass());  // Delete dead stores
-      PM->add(createAggressiveDCEPass());         // Delete dead instructions
-      PM->add(createCFGSimplificationPass());     // Merge & remove BBs
-
-      if (UnitAtATime) {
-        PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
-        PM->add(createDeadTypeEliminationPass()); // Eliminate dead types
-      }
-
-      if (OptimizationLevel > 1 && UnitAtATime)
-        PM->add(createConstantMergePass());       // Merge dup global constants
+    if (UnitAtATime)
+      PM->add(createRaiseAllocationsPass());    // call %malloc -> malloc inst
+    PM->add(createCFGSimplificationPass());     // Clean up disgusting code
+    if (UnitAtATime) {
+      PM->add(createGlobalOptimizerPass());     // Optimize out global vars
+      PM->add(createGlobalDCEPass());           // Remove unused fns and globs
+      // IP Constant Propagation
+      PM->add(createIPConstantPropagationPass());
+      PM->add(createDeadArgEliminationPass());  // Dead argument elimination
     }
+    PM->add(createInstructionCombiningPass());  // Clean up after IPCP & DAE
+    PM->add(createCFGSimplificationPass());     // Clean up after IPCP & DAE
+    if (UnitAtATime) {
+      if (HaveExceptions)
+        PM->add(createPruneEHPass());           // Remove dead EH info
+      PM->add(createFunctionAttrsPass());       // Set readonly/readnone attrs
+    }
+    if (InliningPass)
+      PM->add(InliningPass);
+    if (OptimizationLevel > 2)
+      PM->add(createArgumentPromotionPass());   // Scalarize uninlined fn args
+    if (SimplifyLibCalls)
+      PM->add(createSimplifyLibCallsPass());    // Library Call Optimizations
+    PM->add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
+    PM->add(createJumpThreadingPass());         // Thread jumps.
+    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+    PM->add(createScalarReplAggregatesPass());  // Break up aggregate allocas
+    PM->add(createInstructionCombiningPass());  // Combine silly seq's
+    PM->add(createCondPropagationPass());       // Propagate conditionals
+    PM->add(createTailCallEliminationPass());   // Eliminate tail calls
+    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+    PM->add(createReassociatePass());           // Reassociate expressions
+    PM->add(createLoopRotatePass());            // Rotate Loop
+    PM->add(createLICMPass());                  // Hoist loop invariants
+    PM->add(createLoopUnswitchPass(OptimizeSize));
+    PM->add(createInstructionCombiningPass());  
+    PM->add(createIndVarSimplifyPass());        // Canonicalize indvars
+    PM->add(createLoopDeletionPass());          // Delete dead loops
+    if (UnrollLoops)
+      PM->add(createLoopUnrollPass());          // Unroll small loops
+    PM->add(createInstructionCombiningPass());  // Clean up after the unroller
+    PM->add(createGVNPass());                   // Remove redundancies
+    PM->add(createMemCpyOptPass());             // Remove memcpy / form memset
+    PM->add(createSCCPPass());                  // Constant prop with SCCP
+  
+    // Run instcombine after redundancy elimination to exploit opportunities
+    // opened up by them.
+    PM->add(createInstructionCombiningPass());
+    PM->add(createCondPropagationPass());       // Propagate conditionals
+    PM->add(createDeadStoreEliminationPass());  // Delete dead stores
+    PM->add(createAggressiveDCEPass());         // Delete dead instructions
+    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+
+    if (UnitAtATime) {
+      PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+      PM->add(createDeadTypeEliminationPass()); // Eliminate dead types
+    }
+
+    if (OptimizationLevel > 1 && UnitAtATime)
+      PM->add(createConstantMergePass());       // Merge dup global constants
   }
 
   static inline void addOnePass(PassManager *PM, Pass *P, bool AndVerify) {
@@ -230,10 +230,8 @@ namespace llvm {
     addOnePass(PM, createInstructionCombiningPass(), VerifyEach);
 
     addOnePass(PM, createJumpThreadingPass(), VerifyEach);
-    // Cleanup jump threading.
-    addOnePass(PM, createPromoteMemoryToRegisterPass(), VerifyEach);
     
-    // Delete basic blocks, which optimization passes may have killed...
+    // Delete basic blocks, which optimization passes may have killed.
     addOnePass(PM, createCFGSimplificationPass(), VerifyEach);
 
     // Now that we have optimized the program, discard unreachable functions.
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 98db8e2bf37c..82e46d42c69d 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -1,4 +1,4 @@
-//===-- StringPool.h - Interned string pool -------------------------------===//
+//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -64,12 +64,7 @@ namespace llvm {
     /// intern - Adds a string to the pool and returns a reference-counted
     /// pointer to it. No additional memory is allocated if the string already
     /// exists in the pool.
-    PooledStringPtr intern(const char *Begin, const char *End);
-
-    /// intern - Adds a null-terminated string to the pool and returns a
-    /// reference-counted pointer to it. No additional memory is allocated if
-    /// the string already exists in the pool.
-    inline PooledStringPtr intern(const char *Str);
+    PooledStringPtr intern(const StringRef &Str);
 
     /// empty - Checks whether the pool is empty. Returns true if so.
     ///
@@ -139,10 +134,6 @@ namespace llvm {
     inline bool operator!=(const PooledStringPtr &That) { return S != That.S; }
   };
 
-  PooledStringPtr StringPool::intern(const char *Str) {
-    return intern(Str, Str + strlen(Str));
-  }
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h
index 9a33fa31f226..b3d83fc24345 100644
--- a/include/llvm/Support/SystemUtils.h
+++ b/include/llvm/Support/SystemUtils.h
@@ -15,27 +15,29 @@
 #ifndef LLVM_SUPPORT_SYSTEMUTILS_H
 #define LLVM_SUPPORT_SYSTEMUTILS_H
 
-#include "llvm/System/Program.h"
+#include <string>
 
 namespace llvm {
+  class raw_ostream;
+  namespace sys { class Path; }
 
-/// Determine if the ostream provided is connected to the std::cout and
+/// Determine if the raw_ostream provided is connected to the outs() and
 /// displayed or not (to a console window). If so, generate a warning message
 /// advising against display of bitcode and return true. Otherwise just return
 /// false
 /// @brief Check for output written to a console
 bool CheckBitcodeOutputToConsole(
-  std::ostream* stream_to_check, ///< The stream to be checked
-  bool print_warning = true ///< Control whether warnings are printed
+  raw_ostream &stream_to_check, ///< The stream to be checked
+  bool print_warning = true     ///< Control whether warnings are printed
 );
 
 /// FindExecutable - Find a named executable, giving the argv[0] of program
-/// being executed. This allows us to find another LLVM tool if it is built into
-/// the same directory, but that directory is neither the current directory, nor
-/// in the PATH.  If the executable cannot be found, return an empty string.
+/// being executed. This allows us to find another LLVM tool if it is built in
+/// the same directory.  If the executable cannot be found, return an
+/// empty string.
 /// @brief Find a named executable.
 sys::Path FindExecutable(const std::string &ExeName,
-                         const std::string &ProgramPath);
+                         const char *Argv0, void *MainAddr);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index b0700c1dadd8..8e28632b7eb8 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -25,21 +25,24 @@
 namespace llvm {
 
 class TargetData;
+class LLVMContext;
 
 /// TargetFolder - Create constants with target dependent folding.
 class TargetFolder {
   const TargetData *TD;
+  LLVMContext &Context;
 
   /// Fold - Fold the constant using target specific information.
   Constant *Fold(Constant *C) const {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      if (Constant *CF = ConstantFoldConstantExpression(CE, TD))
+      if (Constant *CF = ConstantFoldConstantExpression(CE, Context, TD))
         return CF;
     return C;
   }
 
 public:
-  explicit TargetFolder(const TargetData *TheTD) : TD(TheTD) {}
+  explicit TargetFolder(const TargetData *TheTD, LLVMContext &C) :
+    TD(TheTD), Context(C) {}
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
@@ -48,12 +51,18 @@ public:
   Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getAdd(LHS, RHS));
   }
+  Constant *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getNSWAdd(LHS, RHS));
+  }
   Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFAdd(LHS, RHS));
   }
   Constant *CreateSub(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getSub(LHS, RHS));
   }
+  Constant *CreateNSWSub(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getNSWSub(LHS, RHS));
+  }
   Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFSub(LHS, RHS));
   }
@@ -69,6 +78,9 @@ public:
   Constant *CreateSDiv(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getSDiv(LHS, RHS));
   }
+  Constant *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getExactSDiv(LHS, RHS));
+  }
   Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFDiv(LHS, RHS));
   }
@@ -132,6 +144,15 @@ public:
     return Fold(ConstantExpr::getGetElementPtr(C, IdxList, NumIdx));
   }
 
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList,
+                                        unsigned NumIdx) const {
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx));
+  }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                        unsigned NumIdx) const {
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx));
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
@@ -176,14 +197,6 @@ public:
                        Constant *RHS) const {
     return Fold(ConstantExpr::getCompare(P, LHS, RHS));
   }
-  Constant *CreateVICmp(CmpInst::Predicate P, Constant *LHS,
-                        Constant *RHS) const {
-    return Fold(ConstantExpr::getCompare(P, LHS, RHS));
-  }
-  Constant *CreateVFCmp(CmpInst::Predicate P, Constant *LHS,
-                        Constant *RHS) const {
-    return Fold(ConstantExpr::getCompare(P, LHS, RHS));
-  }
 
   //===--------------------------------------------------------------------===//
   // Other Instructions
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index 71b7ee58fd3c..54f1da96cad6 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -19,15 +19,15 @@
 #include "llvm/System/Mutex.h"
 #include <string>
 #include <vector>
-#include <iosfwd>
 #include <cassert>
 
 namespace llvm {
 
 class TimerGroup;
+class raw_ostream;
 
 /// Timer - This class is used to track the amount of time spent between
-/// invocations of it's startTimer()/stopTimer() methods.  Given appropriate OS
+/// invocations of its startTimer()/stopTimer() methods.  Given appropriate OS
 /// support it can also keep track of the RSS of the program at various points.
 /// By default, the Timer will print the amount of time it has captured to
 /// standard error when the laster timer is destroyed, otherwise it is printed
@@ -112,7 +112,7 @@ public:
 
   /// print - Print the current timer to standard error, and reset the "Started"
   /// flag.
-  void print(const Timer &Total, std::ostream &OS);
+  void print(const Timer &Total, raw_ostream &OS);
 
 private:
   friend class TimerGroup;
diff --git a/include/llvm/Support/TypeBuilder.h b/include/llvm/Support/TypeBuilder.h
index b0ae516b815c..fb22e3f5241d 100644
--- a/include/llvm/Support/TypeBuilder.h
+++ b/include/llvm/Support/TypeBuilder.h
@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_TYPEBUILDER_H
 
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include <limits.h>
 
 namespace llvm {
@@ -49,15 +50,14 @@ namespace llvm {
 ///   namespace llvm {
 ///   template<bool xcompile> class TypeBuilder<MyType, xcompile> {
 ///   public:
-///     static const StructType *get() {
-///       // Using the static result variable ensures that the type is
-///       // only looked up once.
-///       static const StructType *const result = StructType::get(
-///         TypeBuilder<types::i<32>, xcompile>::get(),
-///         TypeBuilder<types::i<32>*, xcompile>::get(),
-///         TypeBuilder<types::i<8>*[], xcompile>::get(),
+///     static const StructType *get(LLVMContext &Context) {
+///       // If you cache this result, be sure to cache it separately
+///       // for each LLVMContext.
+///       return StructType::get(
+///         TypeBuilder<types::i<32>, xcompile>::get(Context),
+///         TypeBuilder<types::i<32>*, xcompile>::get(Context),
+///         TypeBuilder<types::i<8>*[], xcompile>::get(Context),
 ///         NULL);
-///       return result;
 ///     }
 ///
 ///     // You may find this a convenient place to put some constants
@@ -71,9 +71,6 @@ namespace llvm {
 ///   }
 ///   }  // namespace llvm
 ///
-/// Using the static result variable ensures that the type is only looked up
-/// once.
-///
 /// TypeBuilder cannot handle recursive types or types you only know at runtime.
 /// If you try to give it a recursive type, it will deadlock, infinitely
 /// recurse, or throw a recursive_init exception.
@@ -104,10 +101,8 @@ template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
 // Pointers
 template<typename T, bool cross> class TypeBuilder<T*, cross> {
 public:
-  static const PointerType *get() {
-    static const PointerType *const result =
-      PointerType::getUnqual(TypeBuilder<T,cross>::get());
-    return result;
+  static const PointerType *get(LLVMContext &Context) {
+    return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
   }
 };
 
@@ -117,19 +112,15 @@ template<typename T, bool cross> class TypeBuilder<T&, cross> {};
 // Arrays
 template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
 public:
-  static const ArrayType *get() {
-    static const ArrayType *const result =
-      ArrayType::get(TypeBuilder<T, cross>::get(), N);
-    return result;
+  static const ArrayType *get(LLVMContext &Context) {
+    return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
   }
 };
 /// LLVM uses an array of length 0 to represent an unknown-length array.
 template<typename T, bool cross> class TypeBuilder<T[], cross> {
 public:
-  static const ArrayType *get() {
-    static const ArrayType *const result =
-      ArrayType::get(TypeBuilder<T, cross>::get(), 0);
-    return result;
+  static const ArrayType *get(LLVMContext &Context) {
+    return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
   }
 };
 
@@ -158,10 +149,8 @@ public:
 #define DEFINE_INTEGRAL_TYPEBUILDER(T) \
 template<> class TypeBuilder<T, false> { \
 public: \
-  static const IntegerType *get() { \
-    static const IntegerType *const result = \
-      IntegerType::get(sizeof(T) * CHAR_BIT); \
-    return result; \
+  static const IntegerType *get(LLVMContext &Context) { \
+    return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
   } \
 }; \
 template<> class TypeBuilder<T, true> { \
@@ -189,53 +178,52 @@ DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
 template<uint32_t num_bits, bool cross>
 class TypeBuilder<types::i<num_bits>, cross> {
 public:
-  static const IntegerType *get() {
-    static const IntegerType *const result = IntegerType::get(num_bits);
-    return result;
+  static const IntegerType *get(LLVMContext &C) {
+    return IntegerType::get(C, num_bits);
   }
 };
 
 template<> class TypeBuilder<float, false> {
 public:
-  static const Type *get() {
-    return Type::FloatTy;
+  static const Type *get(LLVMContext& C) {
+    return Type::getFloatTy(C);
   }
 };
 template<> class TypeBuilder<float, true> {};
 
 template<> class TypeBuilder<double, false> {
 public:
-  static const Type *get() {
-    return Type::DoubleTy;
+  static const Type *get(LLVMContext& C) {
+    return Type::getDoubleTy(C);
   }
 };
 template<> class TypeBuilder<double, true> {};
 
 template<bool cross> class TypeBuilder<types::ieee_float, cross> {
 public:
-  static const Type *get() { return Type::FloatTy; }
+  static const Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
 };
 template<bool cross> class TypeBuilder<types::ieee_double, cross> {
 public:
-  static const Type *get() { return Type::DoubleTy; }
+  static const Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
 };
 template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
 public:
-  static const Type *get() { return Type::X86_FP80Ty; }
+  static const Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
 };
 template<bool cross> class TypeBuilder<types::fp128, cross> {
 public:
-  static const Type *get() { return Type::FP128Ty; }
+  static const Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
 };
 template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
 public:
-  static const Type *get() { return Type::PPC_FP128Ty; }
+  static const Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
 };
 
 template<bool cross> class TypeBuilder<void, cross> {
 public:
-  static const Type *get() {
-    return Type::VoidTy;
+  static const Type *get(LLVMContext &C) {
+    return Type::getVoidTy(C);
   }
 };
 
@@ -246,64 +234,43 @@ template<> class TypeBuilder<void*, false>
 
 template<typename R, bool cross> class TypeBuilder<R(), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
-    return FunctionType::get(TypeBuilder<R, cross>::get(), false);
+  static const FunctionType *get(LLVMContext &Context) {
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
   }
 };
 template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(1);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, false);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
   }
 };
 template<typename R, typename A1, typename A2, bool cross>
 class TypeBuilder<R(A1, A2), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(2);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, false);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
   }
 };
 template<typename R, typename A1, typename A2, typename A3, bool cross>
 class TypeBuilder<R(A1, A2, A3), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(3);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    params.push_back(TypeBuilder<A3, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, false);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
   }
 };
 
@@ -311,20 +278,15 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          bool cross>
 class TypeBuilder<R(A1, A2, A3, A4), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(4);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    params.push_back(TypeBuilder<A3, cross>::get());
-    params.push_back(TypeBuilder<A4, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, false);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
   }
 };
 
@@ -332,85 +294,58 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          typename A5, bool cross>
 class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(5);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    params.push_back(TypeBuilder<A3, cross>::get());
-    params.push_back(TypeBuilder<A4, cross>::get());
-    params.push_back(TypeBuilder<A5, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, false);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    params.push_back(TypeBuilder<A5, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
   }
 };
 
 template<typename R, bool cross> class TypeBuilder<R(...), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
-    return FunctionType::get(TypeBuilder<R, cross>::get(), true);
+  static const FunctionType *get(LLVMContext &Context) {
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
   }
 };
 template<typename R, typename A1, bool cross>
 class TypeBuilder<R(A1, ...), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(1);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, true);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
   }
 };
 template<typename R, typename A1, typename A2, bool cross>
 class TypeBuilder<R(A1, A2, ...), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(2);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, true);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
   }
 };
 template<typename R, typename A1, typename A2, typename A3, bool cross>
 class TypeBuilder<R(A1, A2, A3, ...), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(3);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    params.push_back(TypeBuilder<A3, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, true);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
   }
 };
 
@@ -418,20 +353,15 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          bool cross>
 class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(4);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    params.push_back(TypeBuilder<A3, cross>::get());
-    params.push_back(TypeBuilder<A4, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, true);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, true);
   }
 };
 
@@ -439,21 +369,16 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          typename A5, bool cross>
 class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
 public:
-  static const FunctionType *get() {
-    static const FunctionType *const result = create();
-    return result;
-  }
-
-private:
-  static const FunctionType *create() {
+  static const FunctionType *get(LLVMContext &Context) {
     std::vector<const Type*> params;
     params.reserve(5);
-    params.push_back(TypeBuilder<A1, cross>::get());
-    params.push_back(TypeBuilder<A2, cross>::get());
-    params.push_back(TypeBuilder<A3, cross>::get());
-    params.push_back(TypeBuilder<A4, cross>::get());
-    params.push_back(TypeBuilder<A5, cross>::get());
-    return FunctionType::get(TypeBuilder<R, cross>::get(), params, true);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    params.push_back(TypeBuilder<A5, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
   }
 };
 
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index a97a5e88142d..e6363ffea980 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_SUPPORT_VALUEHANDLE_H
 #define LLVM_SUPPORT_VALUEHANDLE_H
 
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Value.h"
 
@@ -44,73 +45,87 @@ protected:
   /// fully general Callback version does have a vtable.
   enum HandleBaseKind {
     Assert,
-    Weak,
-    Callback
+    Callback,
+    Tracking,
+    Weak
   };
 private:
-  
+
   PointerIntPair<ValueHandleBase**, 2, HandleBaseKind> PrevPair;
   ValueHandleBase *Next;
   Value *VP;
+  
+  explicit ValueHandleBase(const ValueHandleBase&); // DO NOT IMPLEMENT.
 public:
   explicit ValueHandleBase(HandleBaseKind Kind)
     : PrevPair(0, Kind), Next(0), VP(0) {}
   ValueHandleBase(HandleBaseKind Kind, Value *V)
     : PrevPair(0, Kind), Next(0), VP(V) {
-    if (V)
+    if (isValid(VP))
       AddToUseList();
   }
   ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
     : PrevPair(0, Kind), Next(0), VP(RHS.VP) {
-    if (VP)
+    if (isValid(VP))
       AddToExistingUseList(RHS.getPrevPtr());
   }
   ~ValueHandleBase() {
-    if (VP)
-      RemoveFromUseList();   
+    if (isValid(VP))
+      RemoveFromUseList();
   }
-  
+
   Value *operator=(Value *RHS) {
     if (VP == RHS) return RHS;
-    if (VP) RemoveFromUseList();
+    if (isValid(VP)) RemoveFromUseList();
     VP = RHS;
-    if (VP) AddToUseList();
+    if (isValid(VP)) AddToUseList();
     return RHS;
   }
 
   Value *operator=(const ValueHandleBase &RHS) {
     if (VP == RHS.VP) return RHS.VP;
-    if (VP) RemoveFromUseList();
+    if (isValid(VP)) RemoveFromUseList();
     VP = RHS.VP;
-    if (VP) AddToExistingUseList(RHS.getPrevPtr());
+    if (isValid(VP)) AddToExistingUseList(RHS.getPrevPtr());
     return VP;
   }
-  
+
   Value *operator->() const { return getValPtr(); }
   Value &operator*() const { return *getValPtr(); }
 
 protected:
   Value *getValPtr() const { return VP; }
+  static bool isValid(Value *V) {
+    return V &&
+           V != DenseMapInfo<Value *>::getEmptyKey() &&
+           V != DenseMapInfo<Value *>::getTombstoneKey();
+  }
+
 private:
   // Callbacks made from Value.
   static void ValueIsDeleted(Value *V);
   static void ValueIsRAUWd(Value *Old, Value *New);
-  
+
   // Internal implementation details.
   ValueHandleBase **getPrevPtr() const { return PrevPair.getPointer(); }
   HandleBaseKind getKind() const { return PrevPair.getInt(); }
   void setPrevPtr(ValueHandleBase **Ptr) { PrevPair.setPointer(Ptr); }
-  
-  /// AddToUseList - Add this ValueHandle to the use list for VP, where List is
-  /// known to point into the existing use list.
+
+  /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+  /// List is the address of either the head of the list or a Next node within
+  /// the existing use list.
   void AddToExistingUseList(ValueHandleBase **List);
-  
+
+  /// AddToExistingUseListAfter - Add this ValueHandle to the use list after
+  /// Node.
+  void AddToExistingUseListAfter(ValueHandleBase *Node);
+
   /// AddToUseList - Add this ValueHandle to the use list for VP.
   void AddToUseList();
   /// RemoveFromUseList - Remove this ValueHandle from its current use list.
   void RemoveFromUseList();
 };
-  
+
 /// WeakVH - This is a value handle that tries hard to point to a Value, even
 /// across RAUW operations, but will null itself out if the value is destroyed.
 /// this is useful for advisory sorts of information, but should not be used as
@@ -123,6 +138,13 @@ public:
   WeakVH(const WeakVH &RHS)
     : ValueHandleBase(Weak, RHS) {}
 
+  Value *operator=(Value *RHS) {
+    return ValueHandleBase::operator=(RHS);
+  }
+  Value *operator=(const ValueHandleBase &RHS) {
+    return ValueHandleBase::operator=(RHS);
+  }
+
   operator Value*() const {
     return getValPtr();
   }
@@ -153,7 +175,7 @@ template<> struct simplify_type<WeakVH> : public simplify_type<const WeakVH> {};
 /// AssertingVH's as it moves.  This is required because in non-assert mode this
 /// class turns into a trivial wrapper around a pointer.
 template <typename ValueTy>
-class AssertingVH 
+class AssertingVH
 #ifndef NDEBUG
   : public ValueHandleBase
 #endif
@@ -164,7 +186,7 @@ class AssertingVH
     return static_cast<ValueTy*>(ValueHandleBase::getValPtr());
   }
   void setValPtr(ValueTy *P) {
-    ValueHandleBase::operator=(P);
+    ValueHandleBase::operator=(GetAsValue(P));
   }
 #else
   ValueTy *ThePtr;
@@ -172,10 +194,15 @@ class AssertingVH
   void setValPtr(ValueTy *P) { ThePtr = P; }
 #endif
 
+  // Convert a ValueTy*, which may be const, to the type the base
+  // class expects.
+  static Value *GetAsValue(Value *V) { return V; }
+  static Value *GetAsValue(const Value *V) { return const_cast<Value*>(V); }
+
 public:
 #ifndef NDEBUG
   AssertingVH() : ValueHandleBase(Assert) {}
-  AssertingVH(ValueTy *P) : ValueHandleBase(Assert, P) {}
+  AssertingVH(ValueTy *P) : ValueHandleBase(Assert, GetAsValue(P)) {}
   AssertingVH(const AssertingVH &RHS) : ValueHandleBase(Assert, RHS) {}
 #else
   AssertingVH() : ThePtr(0) {}
@@ -190,7 +217,7 @@ public:
     setValPtr(RHS);
     return getValPtr();
   }
-  ValueTy *operator=(AssertingVH<ValueTy> &RHS) {
+  ValueTy *operator=(const AssertingVH<ValueTy> &RHS) {
     setValPtr(RHS.getValPtr());
     return getValPtr();
   }
@@ -211,6 +238,88 @@ template<> struct simplify_type<const AssertingVH<Value> > {
 template<> struct simplify_type<AssertingVH<Value> >
   : public simplify_type<const AssertingVH<Value> > {};
 
+/// TrackingVH - This is a value handle that tracks a Value (or Value subclass),
+/// even across RAUW operations.
+///
+/// TrackingVH is designed for situations where a client needs to hold a handle
+/// to a Value (or subclass) across some operations which may move that value,
+/// but should never destroy it or replace it with some unacceptable type.
+///
+/// It is an error to do anything with a TrackingVH whose value has been
+/// destroyed, except to destruct it.
+///
+/// It is an error to attempt to replace a value with one of a type which is
+/// incompatible with any of its outstanding TrackingVHs.
+template<typename ValueTy>
+class TrackingVH : public ValueHandleBase {
+  void CheckValidity() const {
+    Value *VP = ValueHandleBase::getValPtr();
+
+    // Null is always ok.
+    if (!VP)
+        return;
+
+    // Check that this value is valid (i.e., it hasn't been deleted). We
+    // explicitly delay this check until access to avoid requiring clients to be
+    // unnecessarily careful w.r.t. destruction.
+    assert(ValueHandleBase::isValid(VP) && "Tracked Value was deleted!");
+
+    // Check that the value is a member of the correct subclass. We would like
+    // to check this property on assignment for better debugging, but we don't
+    // want to require a virtual interface on this VH. Instead we allow RAUW to
+    // replace this value with a value of an invalid type, and check it here.
+    assert(isa<ValueTy>(VP) &&
+           "Tracked Value was replaced by one with an invalid type!");
+  }
+
+  ValueTy *getValPtr() const {
+    CheckValidity();
+    return static_cast<ValueTy*>(ValueHandleBase::getValPtr());
+  }
+  void setValPtr(ValueTy *P) {
+    CheckValidity();
+    ValueHandleBase::operator=(GetAsValue(P));
+  }
+
+  // Convert a ValueTy*, which may be const, to the type the base
+  // class expects.
+  static Value *GetAsValue(Value *V) { return V; }
+  static Value *GetAsValue(const Value *V) { return const_cast<Value*>(V); }
+
+public:
+  TrackingVH() : ValueHandleBase(Tracking) {}
+  TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, P) {}
+  TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {}
+
+  operator ValueTy*() const {
+    return getValPtr();
+  }
+
+  ValueTy *operator=(ValueTy *RHS) {
+    setValPtr(RHS);
+    return getValPtr();
+  }
+  ValueTy *operator=(const TrackingVH<ValueTy> &RHS) {
+    setValPtr(RHS.getValPtr());
+    return getValPtr();
+  }
+
+  ValueTy *operator->() const { return getValPtr(); }
+  ValueTy &operator*() const { return *getValPtr(); }
+};
+
+// Specialize simplify_type to allow TrackingVH to participate in
+// dyn_cast, isa, etc.
+template<typename From> struct simplify_type;
+template<> struct simplify_type<const TrackingVH<Value> > {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const TrackingVH<Value> &AVH) {
+    return static_cast<Value *>(AVH);
+  }
+};
+template<> struct simplify_type<TrackingVH<Value> >
+  : public simplify_type<const TrackingVH<Value> > {};
+
 /// CallbackVH - This is a value handle that allows subclasses to define
 /// callbacks that run when the underlying Value has RAUW called on it or is
 /// destroyed.  This class can be used as the key of a map, as long as the user
diff --git a/include/llvm/Support/raw_os_ostream.h b/include/llvm/Support/raw_os_ostream.h
new file mode 100644
index 000000000000..e0978b238e31
--- /dev/null
+++ b/include/llvm/Support/raw_os_ostream.h
@@ -0,0 +1,42 @@
+//===- raw_os_ostream.h - std::ostream adaptor for raw_ostream --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the raw_os_ostream class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RAW_OS_OSTREAM_H
+#define LLVM_SUPPORT_RAW_OS_OSTREAM_H
+
+#include "llvm/Support/raw_ostream.h"
+#include <iosfwd>
+
+namespace llvm {
+
+/// raw_os_ostream - A raw_ostream that writes to an std::ostream.  This is a
+/// simple adaptor class.  It does not check for output errors; clients should
+/// use the underlying stream to detect errors.
+class raw_os_ostream : public raw_ostream {
+  std::ostream &OS;
+  
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t Size);
+  
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos();
+  
+public:
+  raw_os_ostream(std::ostream &O) : OS(O) {}
+  ~raw_os_ostream();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 8242f04e23ce..7827dd83804b 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -14,11 +14,8 @@
 #ifndef LLVM_SUPPORT_RAW_OSTREAM_H
 #define LLVM_SUPPORT_RAW_OSTREAM_H
 
-#include "llvm/ADT/StringExtras.h"
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <iosfwd>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class format_object_base;
@@ -31,18 +28,39 @@ namespace llvm {
 /// a chunk at a time.
 class raw_ostream {
 private:
+  // Do not implement. raw_ostream is noncopyable.
+  void operator=(const raw_ostream &);
+  raw_ostream(const raw_ostream &);
+
   /// The buffer is handled in such a way that the buffer is
   /// uninitialized, unbuffered, or out of space when OutBufCur >=
   /// OutBufEnd. Thus a single comparison suffices to determine if we
   /// need to take the slow path to write a single character.
   ///
   /// The buffer is in one of three states:
-  ///  1. Unbuffered (Unbuffered == true)
-  ///  1. Uninitialized (Unbuffered == false && OutBufStart == 0).
-  ///  2. Buffered (Unbuffered == false && OutBufStart != 0 &&
-  ///               OutBufEnd - OutBufStart >= 64).
+  ///  1. Unbuffered (BufferMode == Unbuffered)
+  ///  1. Uninitialized (BufferMode != Unbuffered && OutBufStart == 0).
+  ///  2. Buffered (BufferMode != Unbuffered && OutBufStart != 0 &&
+  ///               OutBufEnd - OutBufStart >= 1).
+  ///
+  /// If buffered, then the raw_ostream owns the buffer if (BufferMode ==
+  /// InternalBuffer); otherwise the buffer has been set via SetBuffer and is
+  /// managed by the subclass.
+  ///
+  /// If a subclass installs an external buffer using SetBuffer then it can wait
+  /// for a \see write_impl() call to handle the data which has been put into
+  /// this buffer.
   char *OutBufStart, *OutBufEnd, *OutBufCur;
-  bool Unbuffered;
+  
+  enum BufferKind {
+    Unbuffered = 0,
+    InternalBuffer,
+    ExternalBuffer
+  } BufferMode;
+
+  /// Error This flag is true if an error of any kind has been detected.
+  ///
+  bool Error;
 
 public:
   // color order matches ANSI escape sequence, don't change
@@ -58,49 +76,66 @@ public:
     SAVEDCOLOR
   };
 
-  explicit raw_ostream(bool unbuffered=false) : Unbuffered(unbuffered) {
+  explicit raw_ostream(bool unbuffered=false)
+    : BufferMode(unbuffered ? Unbuffered : InternalBuffer), Error(false) {
     // Start out ready to flush.
     OutBufStart = OutBufEnd = OutBufCur = 0;
   }
 
-  virtual ~raw_ostream() {
-    delete [] OutBufStart;
-  }
+  virtual ~raw_ostream();
 
   /// tell - Return the current offset with the file.
   uint64_t tell() { return current_pos() + GetNumBytesInBuffer(); }
 
+  /// has_error - Return the value of the flag in this raw_ostream indicating
+  /// whether an output error has been encountered.
+  bool has_error() const {
+    return Error;
+  }
+
+  /// clear_error - Set the flag read by has_error() to false. If the error
+  /// flag is set at the time when this raw_ostream's destructor is called,
+  /// llvm_report_error is called to report the error. Use clear_error()
+  /// after handling the error to avoid this behavior.
+  void clear_error() {
+    Error = false;
+  }
+
   //===--------------------------------------------------------------------===//
   // Configuration Interface
   //===--------------------------------------------------------------------===//
 
-  /// SetBufferSize - Set the internal buffer size to the specified amount
-  /// instead of the default.
-  void SetBufferSize(unsigned Size=4096) {
-    assert(Size >= 64 &&
-           "Buffer size must be somewhat large for invariants to hold");
+  /// SetBuffered - Set the stream to be buffered, with an automatically
+  /// determined buffer size.
+  void SetBuffered();
+
+  /// SetBufferSize - Set the stream to be buffered, using the
+  /// specified buffer size.
+  void SetBufferSize(size_t Size) {
     flush();
+    SetBufferAndMode(new char[Size], Size, InternalBuffer);
+  }
 
-    delete [] OutBufStart;
-    OutBufStart = new char[Size];
-    OutBufEnd = OutBufStart+Size;
-    OutBufCur = OutBufStart;
-    Unbuffered = false;
+  size_t GetBufferSize() {
+    // If we're supposed to be buffered but haven't actually gotten around
+    // to allocating the buffer yet, return the value that would be used.
+    if (BufferMode != Unbuffered && OutBufStart == 0)
+      return preferred_buffer_size();
+
+    // Otherwise just return the size of the allocated buffer.
+    return OutBufEnd - OutBufStart;
   }
 
-  /// SetUnbuffered - Set the streams buffering status. When
-  /// unbuffered the stream will flush after every write. This routine
+  /// SetUnbuffered - Set the stream to be unbuffered. When
+  /// unbuffered, the stream will flush after every write. This routine
   /// will also flush the buffer immediately when the stream is being
   /// set to unbuffered.
   void SetUnbuffered() {
     flush();
-    
-    delete [] OutBufStart;
-    OutBufStart = OutBufEnd = OutBufCur = 0;
-    Unbuffered = true;
+    SetBufferAndMode(0, 0, Unbuffered);
   }
 
-  unsigned GetNumBytesInBuffer() const {
+  size_t GetNumBytesInBuffer() const {
     return OutBufCur - OutBufStart;
   }
 
@@ -134,22 +169,29 @@ public:
     return *this;
   }
 
-  raw_ostream &operator<<(const char *Str) {
-    // Inline fast path, particulary for constant strings where a
-    // sufficiently smart compiler will simplify strlen.
-
-    unsigned Size = strlen(Str);
+  raw_ostream &operator<<(const StringRef &Str) {
+    // Inline fast path, particularly for strings with a known length.
+    size_t Size = Str.size();
 
     // Make sure we can use the fast path.
     if (OutBufCur+Size > OutBufEnd)
-      return write(Str, Size);
+      return write(Str.data(), Size);
 
-    memcpy(OutBufCur, Str, Size);
+    memcpy(OutBufCur, Str.data(), Size);
     OutBufCur += Size;
     return *this;
   }
 
-  raw_ostream &operator<<(const std::string& Str) {
+  raw_ostream &operator<<(const char *Str) {
+    // Inline fast path, particulary for constant strings where a sufficiently
+    // smart compiler will simplify strlen.
+
+    this->operator<<(StringRef(Str));
+    return *this;
+  }
+
+  raw_ostream &operator<<(const std::string &Str) {
+    // Avoid the fast path, it would only increase code size for a marginal win.
     write(Str.data(), Str.length());
     return *this;
   }
@@ -169,17 +211,21 @@ public:
     return *this;
   }
 
-  raw_ostream &operator<<(double N) {
-    this->operator<<(ftostr(N));
-    return *this;
-  }
+  raw_ostream &operator<<(double N);  
+
+  /// write_hex - Output \arg N in hexadecimal, without any prefix or padding.
+  raw_ostream &write_hex(unsigned long long N);
 
   raw_ostream &write(unsigned char C);
-  raw_ostream &write(const char *Ptr, unsigned Size);
+  raw_ostream &write(const char *Ptr, size_t Size);
 
   // Formatted output, see the format() function in Support/Format.h.
   raw_ostream &operator<<(const format_object_base &Fmt);
 
+  /// indent - Insert 'NumSpaces' spaces.
+  raw_ostream &indent(unsigned NumSpaces);
+  
+  
   /// Changes the foreground color of text that will be output from this point
   /// forward.
   /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to
@@ -194,6 +240,11 @@ public:
   /// outputting colored text, or before program exit.
   virtual raw_ostream &resetColor() { return *this; }
 
+  /// This function determines if this stream is connected to a "tty" or
+  /// "console" window. That is, the output would be displayed to the user
+  /// rather than being put on a pipe or stored in a file.
+  virtual bool is_displayed() const { return false; }
+
   //===--------------------------------------------------------------------===//
   // Subclass Interface
   //===--------------------------------------------------------------------===//
@@ -203,8 +254,15 @@ private:
   /// by subclasses.  This writes the \args Size bytes starting at
   /// \arg Ptr to the underlying stream.
   /// 
+  /// This function is guaranteed to only be called at a point at which it is
+  /// safe for the subclass to install a new buffer via SetBuffer.
+  ///
+  /// \arg Ptr - The start of the data to be written. For buffered streams this
+  /// is guaranteed to be the start of the buffer.
+  /// \arg Size - The number of bytes to be written.
+  ///
   /// \invariant { Size > 0 }
-  virtual void write_impl(const char *Ptr, unsigned Size) = 0;
+  virtual void write_impl(const char *Ptr, size_t Size) = 0;
 
   // An out of line virtual method to provide a home for the class vtable.
   virtual void handle();
@@ -213,14 +271,42 @@ private:
   /// counting the bytes currently in the buffer.
   virtual uint64_t current_pos() = 0;
 
+protected:
+  /// SetBuffer - Use the provided buffer as the raw_ostream buffer. This is
+  /// intended for use only by subclasses which can arrange for the output to go
+  /// directly into the desired output buffer, instead of being copied on each
+  /// flush.
+  void SetBuffer(char *BufferStart, size_t Size) {
+    SetBufferAndMode(BufferStart, Size, ExternalBuffer);
+  }
+
+  /// preferred_buffer_size - Return an efficient buffer size for the
+  /// underlying output mechanism.
+  virtual size_t preferred_buffer_size();
+
+  /// error_detected - Set the flag indicating that an output error has
+  /// been encountered.
+  void error_detected() { Error = true; }
+
+  /// getBufferStart - Return the beginning of the current stream buffer, or 0
+  /// if the stream is unbuffered.
+  const char *getBufferStart() const { return OutBufStart; }
+
   //===--------------------------------------------------------------------===//
   // Private Interface
   //===--------------------------------------------------------------------===//
 private:
+  /// SetBufferAndMode - Install the given buffer and mode.
+  void SetBufferAndMode(char *BufferStart, size_t Size, BufferKind Mode);
+
   /// flush_nonempty - Flush the current buffer, which is known to be
   /// non-empty. This outputs the currently buffered data and resets
   /// the buffer to empty.
   void flush_nonempty();
+
+  /// copy_to_buffer - Copy data into the buffer. Size must not be
+  /// greater than the number of unused bytes in the buffer.
+  void copy_to_buffer(const char *Ptr, size_t Size);
 };
 
 //===----------------------------------------------------------------------===//
@@ -235,23 +321,41 @@ class raw_fd_ostream : public raw_ostream {
   uint64_t pos;
 
   /// write_impl - See raw_ostream::write_impl.
-  virtual void write_impl(const char *Ptr, unsigned Size);
+  virtual void write_impl(const char *Ptr, size_t Size);
 
   /// current_pos - Return the current position within the stream, not
   /// counting the bytes currently in the buffer.
   virtual uint64_t current_pos() { return pos; }
 
+  /// preferred_buffer_size - Determine an efficient buffer size.
+  virtual size_t preferred_buffer_size();
+
 public:
-  /// raw_fd_ostream - Open the specified file for writing. If an
-  /// error occurs, information about the error is put into ErrorInfo,
-  /// and the stream should be immediately destroyed; the string will
-  /// be empty if no error occurred.
+  
+  enum {
+    /// F_Excl - When opening a file, this flag makes raw_fd_ostream
+    /// report an error if the file already exists.
+    F_Excl  = 1,
+
+    /// F_Append - When opening a file, if it already exists append to the
+    /// existing file instead of returning an error.  This may not be specified
+    /// with F_Excl.
+    F_Append = 2,
+
+    /// F_Binary - The file should be opened in binary mode on platforms that
+    /// make this distinction.
+    F_Binary = 4
+  };
+  
+  /// raw_fd_ostream - Open the specified file for writing. If an error occurs,
+  /// information about the error is put into ErrorInfo, and the stream should
+  /// be immediately destroyed; the string will be empty if no error occurred.
+  /// This allows optional flags to control how the file will be opened.
   ///
   /// \param Filename - The file to open. If this is "-" then the
   /// stream will use stdout instead.
-  /// \param Binary - The file should be opened in binary mode on
-  /// platforms that support this distinction.
-  raw_fd_ostream(const char *Filename, bool Binary, std::string &ErrorInfo);
+  raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+                 unsigned Flags = 0);
 
   /// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
   /// ShouldClose is true, this closes the file when the stream is destroyed.
@@ -264,9 +368,6 @@ public:
   /// close - Manually flush the stream and close the file.
   void close();
 
-  /// tell - Return the current offset with the file.
-  uint64_t tell() { return pos + GetNumBytesInBuffer(); }
-
   /// seek - Flushes the stream and repositions the underlying file descriptor
   ///  positition to the offset specified from the beginning of the file.
   uint64_t seek(uint64_t off);
@@ -274,6 +375,8 @@ public:
   virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
                                    bool bg=false);
   virtual raw_ostream &resetColor();
+
+  virtual bool is_displayed() const;
 };
 
 /// raw_stdout_ostream - This is a stream that always prints to stdout.
@@ -302,49 +405,29 @@ raw_ostream &outs();
 /// Use it like: errs() << "foo" << "bar";
 raw_ostream &errs();
 
+/// nulls() - This returns a reference to a raw_ostream which simply discards
+/// output.
+raw_ostream &nulls();
 
 //===----------------------------------------------------------------------===//
 // Output Stream Adaptors
 //===----------------------------------------------------------------------===//
 
-/// raw_os_ostream - A raw_ostream that writes to an std::ostream.  This is a
-/// simple adaptor class.
-class raw_os_ostream : public raw_ostream {
-  std::ostream &OS;
-
-  /// write_impl - See raw_ostream::write_impl.
-  virtual void write_impl(const char *Ptr, unsigned Size);
-
-  /// current_pos - Return the current position within the stream, not
-  /// counting the bytes currently in the buffer.
-  virtual uint64_t current_pos();
-
-public:
-  raw_os_ostream(std::ostream &O) : OS(O) {}
-  ~raw_os_ostream();
-
-  /// tell - Return the current offset with the stream.
-  uint64_t tell();
-};
-
 /// raw_string_ostream - A raw_ostream that writes to an std::string.  This is a
-/// simple adaptor class.
+/// simple adaptor class. This class does not encounter output errors.
 class raw_string_ostream : public raw_ostream {
   std::string &OS;
 
   /// write_impl - See raw_ostream::write_impl.
-  virtual void write_impl(const char *Ptr, unsigned Size);
+  virtual void write_impl(const char *Ptr, size_t Size);
 
   /// current_pos - Return the current position within the stream, not
   /// counting the bytes currently in the buffer.
   virtual uint64_t current_pos() { return OS.size(); }
 public:
-  raw_string_ostream(std::string &O) : OS(O) {}
+  explicit raw_string_ostream(std::string &O) : OS(O) {}
   ~raw_string_ostream();
 
-  /// tell - Return the current offset with the stream.
-  uint64_t tell() { return OS.size() + GetNumBytesInBuffer(); }
-
   /// str - Flushes the stream contents to the target string and returns
   ///  the string's reference.
   std::string& str() {
@@ -354,22 +437,42 @@ public:
 };
 
 /// raw_svector_ostream - A raw_ostream that writes to an SmallVector or
-/// SmallString.  This is a simple adaptor class.
+/// SmallString.  This is a simple adaptor class. This class does not
+/// encounter output errors.
 class raw_svector_ostream : public raw_ostream {
   SmallVectorImpl<char> &OS;
 
   /// write_impl - See raw_ostream::write_impl.
-  virtual void write_impl(const char *Ptr, unsigned Size);
+  virtual void write_impl(const char *Ptr, size_t Size);
 
   /// current_pos - Return the current position within the stream, not
   /// counting the bytes currently in the buffer.
   virtual uint64_t current_pos();
 public:
-  raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {}
+  /// Construct a new raw_svector_ostream.
+  ///
+  /// \arg O - The vector to write to; this should generally have at least 128
+  /// bytes free to avoid any extraneous memory overhead.
+  explicit raw_svector_ostream(SmallVectorImpl<char> &O);
   ~raw_svector_ostream();
 
-  /// tell - Return the current offset with the stream.
-  uint64_t tell();
+  /// str - Flushes the stream contents to the target vector and return a
+  /// StringRef for the vector contents.
+  StringRef str();
+};
+
+/// raw_null_ostream - A raw_ostream that discards all output.
+class raw_null_ostream : public raw_ostream {
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t size);
+  
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos();
+
+public:
+  explicit raw_null_ostream() {}
+  ~raw_null_ostream();
 };
 
 } // end llvm namespace
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index 5000a8b859b8..5f799b850de1 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -35,7 +35,7 @@ namespace dont_use
     // important to make the is_class<T>::value idiom zero cost. it
     // evaluates to a constant 1 or 0 depending on whether the
     // parameter T is a class or not (respectively).
-    template<typename T> char is_class_helper(void(T::*)(void));
+    template<typename T> char is_class_helper(void(T::*)());
     template<typename T> double is_class_helper(...);
 }
 
@@ -49,6 +49,44 @@ struct is_class
     enum { value = sizeof(char) == sizeof(dont_use::is_class_helper<T>(0)) };
 };
 
+/// \brief Metafunction that determines whether the two given types are 
+/// equivalent.
+template<typename T, typename U>
+struct is_same {
+  static const bool value = false;
+};
+
+template<typename T>
+struct is_same<T, T> {
+  static const bool value = true;
+};
+  
+// enable_if_c - Enable/disable a template based on a metafunction
+template<bool Cond, typename T = void>
+struct enable_if_c {
+  typedef T type;
+};
+
+template<typename T> struct enable_if_c<false, T> { };
+  
+// enable_if - Enable/disable a template based on a metafunction
+template<typename Cond, typename T = void>
+struct enable_if : public enable_if_c<Cond::value, T> { };
+
+namespace dont_use {
+  template<typename Base> char base_of_helper(const volatile Base*);
+  template<typename Base> double base_of_helper(...);
+}
+
+/// is_base_of - Metafunction to determine whether one type is a base class of
+/// (or identical to) another type.
+template<typename Base, typename Derived>
+struct is_base_of {
+  static const bool value 
+    = is_class<Base>::value && is_class<Derived>::value &&
+      sizeof(char) == sizeof(dont_use::base_of_helper<Base>((Derived*)0));
+};
+
 }
 
 #endif
diff --git a/include/llvm/SymbolTableListTraits.h b/include/llvm/SymbolTableListTraits.h
index 337b76f76680..39953e1a5809 100644
--- a/include/llvm/SymbolTableListTraits.h
+++ b/include/llvm/SymbolTableListTraits.h
@@ -28,7 +28,8 @@
 #include "llvm/ADT/ilist.h"
 
 namespace llvm {
-
+class ValueSymbolTable;
+  
 template<typename NodeTy> class ilist_iterator;
 template<typename NodeTy, typename Traits> class iplist;
 template<typename Ty> struct ilist_traits;
diff --git a/include/llvm/System/Alarm.h b/include/llvm/System/Alarm.h
index 9535d23f812c..7c284167c2ce 100644
--- a/include/llvm/System/Alarm.h
+++ b/include/llvm/System/Alarm.h
@@ -39,7 +39,8 @@ namespace sys {
   /// @returns -1=cancelled, 0=untriggered, 1=triggered
   int AlarmStatus();
 
-  /// Sleep for n seconds.
+  /// Sleep for n seconds. Warning: mixing calls to Sleep() and other *Alarm
+  /// calls may be a bad idea on some platforms (source: Linux man page).
   /// @returns nothing.
   void Sleep(unsigned n);
 
diff --git a/include/llvm/System/Disassembler.h b/include/llvm/System/Disassembler.h
index d1d8a81007e3..6d1cc0fdcb50 100644
--- a/include/llvm/System/Disassembler.h
+++ b/include/llvm/System/Disassembler.h
@@ -23,7 +23,7 @@ namespace sys {
 
 /// This function returns true, if there is possible to use some external
 /// disassembler library. False otherwise.
-bool hasDisassembler(void); 
+bool hasDisassembler();
 
 /// This function provides some "glue" code to call external disassembler
 /// libraries.
diff --git a/include/llvm/System/DynamicLibrary.h b/include/llvm/System/DynamicLibrary.h
index 409a9d279c2d..ac58407a194d 100644
--- a/include/llvm/System/DynamicLibrary.h
+++ b/include/llvm/System/DynamicLibrary.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SYSTEM_DYNAMIC_LIBRARY_H
 #define LLVM_SYSTEM_DYNAMIC_LIBRARY_H
 
-#include "llvm/System/Path.h"
 #include <string>
 
 namespace llvm {
@@ -30,66 +29,55 @@ namespace sys {
   /// but rather the main program itself, useful on Windows where the main
   /// executable cannot be searched.
   class DynamicLibrary {
-    /// @name Constructors
-    /// @{
-    public:
-      /// Construct a DynamicLibrary that represents the currently executing
-      /// program. The program must have been linked with -export-dynamic or
-      /// -dlopen self for this to work. 
-      /// @throws std::string indicating why the program couldn't be opened.
-      /// @brief Open program as dynamic library.
-      DynamicLibrary();
+    DynamicLibrary(); // DO NOT IMPLEMENT
+  public:
+    /// This function allows a library to be loaded without instantiating a
+    /// DynamicLibrary object. Consequently, it is marked as being permanent
+    /// and will only be unloaded when the program terminates.  This returns
+    /// false on success or returns true and fills in *ErrMsg on failure.
+    /// @brief Open a dynamic library permanently.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static bool LoadLibraryPermanently(const char *filename,
+                                       std::string *ErrMsg = 0);
 
-      /// After destruction, the symbols of the library will no longer be
-      /// available to the program. 
-      /// @brief Closes the DynamicLibrary
-      ~DynamicLibrary();
+    /// This function will search through all previously loaded dynamic
+    /// libraries for the symbol \p symbolName. If it is found, the addressof
+    /// that symbol is returned. If not, null is returned. Note that this will
+    /// search permanently loaded libraries (LoadLibraryPermanently) as well
+    /// as ephemerally loaded libraries (constructors).
+    /// @throws std::string on error.
+    /// @brief Search through libraries for address of a symbol
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void *SearchForAddressOfSymbol(const char *symbolName);
 
-    /// @}
-    /// @name Functions
-    /// @{
-    public:
-      /// This function allows a library to be loaded without instantiating a
-      /// DynamicLibrary object. Consequently, it is marked as being permanent
-      /// and will only be unloaded when the program terminates.  This returns
-      /// false on success or returns true and fills in *ErrMsg on failure.
-      /// @brief Open a dynamic library permanently.
-      static bool LoadLibraryPermanently(const char* filename,
-                                         std::string *ErrMsg = 0);
+    /// @brief Convenience function for C++ophiles.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void *SearchForAddressOfSymbol(const std::string &symbolName) {
+      return SearchForAddressOfSymbol(symbolName.c_str());
+    }
 
-      /// This function will search through all previously loaded dynamic
-      /// libraries for the symbol \p symbolName. If it is found, the addressof
-      /// that symbol is returned. If not, null is returned. Note that this will
-      /// search permanently loaded libraries (LoadLibraryPermanently) as well
-      /// as ephemerally loaded libraries (constructors).
-      /// @throws std::string on error.
-      /// @brief Search through libraries for address of a symbol
-      static void* SearchForAddressOfSymbol(const char* symbolName);
+    /// This functions permanently adds the symbol \p symbolName with the
+    /// value \p symbolValue.  These symbols are searched before any
+    /// libraries.
+    /// @brief Add searchable symbol/value pair.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void AddSymbol(const char *symbolName, void *symbolValue);
 
-      /// @brief Convenience function for C++ophiles.
-      static void* SearchForAddressOfSymbol(const std::string& symbolName) {
-        return SearchForAddressOfSymbol(symbolName.c_str());
-      }
-
-      /// This functions permanently adds the symbol \p symbolName with the
-      /// value \p symbolValue.  These symbols are searched before any
-      /// libraries.
-      /// @brief Add searchable symbol/value pair.
-      static void AddSymbol(const char* symbolName, void *symbolValue);
-
-      /// @brief Convenience function for C++ophiles.
-      static void AddSymbol(const std::string& symbolName, void *symbolValue) {
-        AddSymbol(symbolName.c_str(), symbolValue);
-      }
-
-    /// @}
-    /// @name Implementation
-    /// @{
-    protected:
-      void* handle;  // Opaque handle for information about the library
-      DynamicLibrary(const DynamicLibrary&); ///< Do not implement
-      DynamicLibrary& operator=(const DynamicLibrary&); ///< Do not implement
-    /// @}
+    /// @brief Convenience function for C++ophiles.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void AddSymbol(const std::string &symbolName, void *symbolValue) {
+      AddSymbol(symbolName.c_str(), symbolValue);
+    }
   };
 
 } // End sys namespace
diff --git a/include/llvm/System/Memory.h b/include/llvm/System/Memory.h
index 136dc8a32895..d6300db5a9e2 100644
--- a/include/llvm/System/Memory.h
+++ b/include/llvm/System/Memory.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_SYSTEM_MEMORY_H
 #define LLVM_SYSTEM_MEMORY_H
 
+#include "llvm/Support/DataTypes.h"
 #include <string>
 
 namespace llvm {
@@ -26,11 +27,13 @@ namespace sys {
   /// @brief Memory block abstraction.
   class MemoryBlock {
   public:
+    MemoryBlock() { }
+    MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
     void *base() const { return Address; }
-    unsigned size() const { return Size; }
+    size_t size() const { return Size; }
   private:
     void *Address;    ///< Address of first byte of memory area
-    unsigned Size;    ///< Size, in bytes of the memory area
+    size_t Size;      ///< Size, in bytes of the memory area
     friend class Memory;
   };
 
@@ -50,7 +53,7 @@ namespace sys {
     /// a null memory block and fills in *ErrMsg.
     /// 
     /// @brief Allocate Read/Write/Execute memory.
-    static MemoryBlock AllocateRWX(unsigned NumBytes,
+    static MemoryBlock AllocateRWX(size_t NumBytes,
                                    const MemoryBlock *NearBlock,
                                    std::string *ErrMsg = 0);
 
diff --git a/include/llvm/System/Mutex.h b/include/llvm/System/Mutex.h
index d2c457dbc91c..71d10067c303 100644
--- a/include/llvm/System/Mutex.h
+++ b/include/llvm/System/Mutex.h
@@ -93,32 +93,36 @@ namespace llvm
         MutexImpl(rec), acquired(0), recursive(rec) { }
       
       bool acquire() {
-        if (!mt_only || llvm_is_multithreaded())
+        if (!mt_only || llvm_is_multithreaded()) {
           return MutexImpl::acquire();
-        
-        // Single-threaded debugging code.  This would be racy in multithreaded
-        // mode, but provides not sanity checks in single threaded mode.
-        assert((recursive || acquired == 0) && "Lock already acquired!!");
-        ++acquired;
-        return true;
+        } else {
+          // Single-threaded debugging code.  This would be racy in
+          // multithreaded mode, but provides not sanity checks in single
+          // threaded mode.
+          assert((recursive || acquired == 0) && "Lock already acquired!!");
+          ++acquired;
+          return true;
+        }
       }
 
       bool release() {
-        if (!mt_only || llvm_is_multithreaded())
+        if (!mt_only || llvm_is_multithreaded()) {
           return MutexImpl::release();
-        
-        // Single-threaded debugging code.  This would be racy in multithreaded
-        // mode, but provides not sanity checks in single threaded mode.
-        assert(((recursive && acquired) || (acquired == 1)) &&
-               "Lock not acquired before release!");
-        --acquired;
-        return true;
+        } else {
+          // Single-threaded debugging code.  This would be racy in
+          // multithreaded mode, but provides not sanity checks in single
+          // threaded mode.
+          assert(((recursive && acquired) || (acquired == 1)) &&
+                 "Lock not acquired before release!");
+          --acquired;
+          return true;
+        }
       }
 
       bool tryacquire() {
         if (!mt_only || llvm_is_multithreaded())
           return MutexImpl::tryacquire();
-        return true;
+        else return true;
       }
       
       private:
@@ -131,15 +135,15 @@ namespace llvm
     
     template<bool mt_only>
     class SmartScopedLock  {
-      SmartMutex<mt_only>* mtx;
+      SmartMutex<mt_only>& mtx;
       
     public:
-      SmartScopedLock(SmartMutex<mt_only>* m) : mtx(m) {
-        mtx->acquire();
+      SmartScopedLock(SmartMutex<mt_only>& m) : mtx(m) {
+        mtx.acquire();
       }
       
       ~SmartScopedLock() {
-        mtx->release();
+        mtx.release();
       }
     };
     
diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h
index 05be2212758b..3b73a128fbd1 100644
--- a/include/llvm/System/Path.h
+++ b/include/llvm/System/Path.h
@@ -18,7 +18,6 @@
 #include <set>
 #include <string>
 #include <vector>
-#include <iosfwd>
 
 namespace llvm {
 namespace sys {
@@ -216,7 +215,7 @@ namespace sys {
       /// Compares \p this Path with \p that Path for inequality.
       /// @returns true if \p this and \p that refer to different things.
       /// @brief Inequality Operator
-      bool operator!=(const Path &that) const;
+      bool operator!=(const Path &that) const { return !(*this == that); }
 
       /// Determines if \p this Path is less than \p that Path. This is required
       /// so that Path objects can be placed into ordered collections (e.g.
@@ -248,13 +247,7 @@ namespace sys {
       /// @brief Determines if the path name is empty (invalid).
       bool isEmpty() const { return path.empty(); }
 
-      /// This function returns the current contents of the path as a
-      /// std::string. This allows the underlying path string to be manipulated.
-      /// @returns std::string containing the path name.
-      /// @brief Returns the path as a std::string.
-      const std::string &toString() const { return path; }
-
-      /// This function returns the last component of the path name. The last
+       /// This function returns the last component of the path name. The last
       /// component is the file or directory name occuring after the last
       /// directory separator. If no directory separator is present, the entire
       /// path name is returned (i.e. same as toString).
@@ -285,6 +278,8 @@ namespace sys {
       /// @returns a 'C' string containing the path name.
       /// @brief Returns the path as a C string.
       const char *c_str() const { return path.c_str(); }
+      const std::string &str() const { return path; }
+
 
       /// size - Return the length in bytes of this path name.
       size_t size() const { return path.size(); }
@@ -586,6 +581,7 @@ namespace sys {
     /// @name Data
     /// @{
     protected:
+      // Our win32 implementation relies on this string being mutable.
       mutable std::string path;   ///< Storage for the path name.
 
 
@@ -714,13 +710,6 @@ namespace sys {
   extern const char PathSeparator;
 }
 
-std::ostream& operator<<(std::ostream& strm, const sys::Path& aPath);
-inline std::ostream& operator<<(std::ostream& strm,
-                                const sys::PathWithStatus& aPath) {
-  strm << static_cast<const sys::Path&>(aPath);
-  return strm;
-}
-
 }
 
 #endif
diff --git a/include/llvm/System/Process.h b/include/llvm/System/Process.h
index 11dbf759a6c4..010499acd4bf 100644
--- a/include/llvm/System/Process.h
+++ b/include/llvm/System/Process.h
@@ -94,6 +94,11 @@ namespace sys {
       /// the user rather than being put on a pipe or stored in a file.
       static bool StandardErrIsDisplayed();
 
+      /// This function determines if the given file descriptor is connected to
+      /// a "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool FileDescriptorIsDisplayed(int fd);
+
       /// This function determines the number of columns in the window
       /// if standard output is connected to a "tty" or "console"
       /// window. If standard output is not connected to a tty or
diff --git a/include/llvm/System/Program.h b/include/llvm/System/Program.h
index 37f55466a50d..679956272609 100644
--- a/include/llvm/System/Program.h
+++ b/include/llvm/System/Program.h
@@ -19,6 +19,9 @@
 namespace llvm {
 namespace sys {
 
+  // TODO: Add operations to communicate with the process, redirect its I/O,
+  // etc.
+
   /// This class provides an abstraction for programs that are executable by the
   /// operating system. It provides a platform generic way to find executable
   /// programs from the path and to execute them in various ways. The sys::Path
@@ -26,67 +29,126 @@ namespace sys {
   /// @since 1.4
   /// @brief An abstraction for finding and executing programs.
   class Program {
+    /// Opaque handle for target specific data.
+    void *Data_;
+
+    // Noncopyable.
+    Program(const Program& other);
+    Program& operator=(const Program& other);
+
     /// @name Methods
     /// @{
-    public:
-      /// This static constructor (factory) will attempt to locate a program in
-      /// the operating system's file system using some pre-determined set of
-      /// locations to search (e.g. the PATH on Unix).
-      /// @returns A Path object initialized to the path of the program or a
-      /// Path object that is empty (invalid) if the program could not be found.
-      /// @throws nothing
-      /// @brief Construct a Program by finding it by name.
-      static Path FindProgramByName(const std::string& name);
-
-      /// This function executes the program using the \p arguments provided and
-      /// waits for the program to exit. This function will block the current
-      /// program until the invoked program exits. The invoked program will
-      /// inherit the stdin, stdout, and stderr file descriptors, the
-      /// environment and other configuration settings of the invoking program.
-      /// If Path::executable() does not return true when this function is
-      /// called then a std::string is thrown.
-      /// @returns an integer result code indicating the status of the program.
-      /// A zero or positive value indicates the result code of the program. A
-      /// negative value is the signal number on which it terminated.
-      /// @see FindProgrambyName
-      /// @brief Executes the program with the given set of \p args.
-      static int ExecuteAndWait(
-        const Path& path,  ///< sys::Path object providing the path of the
-          ///< program to be executed. It is presumed this is the result of
-          ///< the FindProgramByName method.
-        const char** args, ///< A vector of strings that are passed to the
-          ///< program.  The first element should be the name of the program.
-          ///< The list *must* be terminated by a null char* entry.
-        const char ** env = 0, ///< An optional vector of strings to use for
-          ///< the program's environment. If not provided, the current program's
-          ///< environment will be used.
-        const sys::Path** redirects = 0, ///< An optional array of pointers to
-          ///< Paths. If the array is null, no redirection is done. The array
-          ///< should have a size of at least three. If the pointer in the array
-          ///< are not null, then the inferior process's stdin(0), stdout(1),
-          ///< and stderr(2) will be redirected to the corresponding Paths.
-          ///< When an empty Path is passed in, the corresponding file
-          ///< descriptor will be disconnected (ie, /dev/null'd) in a portable
-          ///< way.
-        unsigned secondsToWait = 0, ///< If non-zero, this specifies the amount
-          ///< of time to wait for the child process to exit. If the time
-          ///< expires, the child is killed and this call returns. If zero,
-          ///< this function will wait until the child finishes or forever if
-          ///< it doesn't.
-        unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount
-          ///< of memory can be allocated by process. If memory usage will be
-          ///< higher limit, the child is killed and this call returns. If zero
-          ///< - no memory limit.
-        std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
-          ///< instance in which error messages will be returned. If the string
-          ///< is non-empty upon return an error occurred while invoking the
-          ///< program.
+  public:
+
+    Program();
+    ~Program();
+
+    /// Return process ID of this program.
+    unsigned GetPid() const;
+
+    /// This function executes the program using the \p arguments provided.  The
+    /// invoked program will inherit the stdin, stdout, and stderr file
+    /// descriptors, the environment and other configuration settings of the
+    /// invoking program. If Path::executable() does not return true when this
+    /// function is called then a std::string is thrown.
+    /// @returns false in case of error, true otherwise.
+    /// @see FindProgramByName
+    /// @brief Executes the program with the given set of \p args.
+    bool Execute
+    ( const Path& path,  ///< sys::Path object providing the path of the
+      ///< program to be executed. It is presumed this is the result of
+      ///< the FindProgramByName method.
+      const char** args, ///< A vector of strings that are passed to the
+      ///< program.  The first element should be the name of the program.
+      ///< The list *must* be terminated by a null char* entry.
+      const char ** env = 0, ///< An optional vector of strings to use for
+      ///< the program's environment. If not provided, the current program's
+      ///< environment will be used.
+      const sys::Path** redirects = 0, ///< An optional array of pointers to
+      ///< Paths. If the array is null, no redirection is done. The array
+      ///< should have a size of at least three. If the pointer in the array
+      ///< are not null, then the inferior process's stdin(0), stdout(1),
+      ///< and stderr(2) will be redirected to the corresponding Paths.
+      ///< When an empty Path is passed in, the corresponding file
+      ///< descriptor will be disconnected (ie, /dev/null'd) in a portable
+      ///< way.
+      unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount
+      ///< of memory can be allocated by process. If memory usage will be
+      ///< higher limit, the child is killed and this call returns. If zero
+      ///< - no memory limit.
+      std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while invoking the
+      ///< program.
+      );
+
+    /// This function waits for the program to exit. This function will block
+    /// the current program until the invoked program exits.
+    /// @returns an integer result code indicating the status of the program.
+    /// A zero or positive value indicates the result code of the program. A
+    /// negative value is the signal number on which it terminated.
+    /// @see Execute
+    /// @brief Waits for the program to exit.
+    int Wait
+    ( unsigned secondsToWait = 0, ///< If non-zero, this specifies the amount
+      ///< of time to wait for the child process to exit. If the time
+      ///< expires, the child is killed and this call returns. If zero,
+      ///< this function will wait until the child finishes or forever if
+      ///< it doesn't.
+      std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while waiting.
       );
-      // These methods change the specified standard stream (stdin or stdout) to
-      // binary mode. They return true if an error occurred
-      static bool ChangeStdinToBinary();
-      static bool ChangeStdoutToBinary();
+
+    /// This function terminates the program.
+    /// @returns true if an error occured.
+    /// @see Execute
+    /// @brief Terminates the program.
+    bool Kill
+    ( std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while killing the
+      ///< program.
+      );
+
+    /// This static constructor (factory) will attempt to locate a program in
+    /// the operating system's file system using some pre-determined set of
+    /// locations to search (e.g. the PATH on Unix).
+    /// @returns A Path object initialized to the path of the program or a
+    /// Path object that is empty (invalid) if the program could not be found.
+    /// @throws nothing
+    /// @brief Construct a Program by finding it by name.
+    static Path FindProgramByName(const std::string& name);
+
+    // These methods change the specified standard stream (stdin or stdout) to
+    // binary mode. They return true if an error occurred
+    static bool ChangeStdinToBinary();
+    static bool ChangeStdoutToBinary();
+
+    /// A convenience function equivalent to Program prg; prg.Execute(..);
+    /// prg.Wait(..);
+    /// @throws nothing
+    /// @see Execute, Wait
+    static int ExecuteAndWait(const Path& path,
+                              const char** args,
+                              const char ** env = 0,
+                              const sys::Path** redirects = 0,
+                              unsigned secondsToWait = 0,
+                              unsigned memoryLimit = 0,
+                              std::string* ErrMsg = 0);
+
+    /// A convenience function equivalent to Program prg; prg.Execute(..);
+    /// @throws nothing
+    /// @see Execute
+    static void ExecuteNoWait(const Path& path,
+                              const char** args,
+                              const char ** env = 0,
+                              const sys::Path** redirects = 0,
+                              unsigned memoryLimit = 0,
+                              std::string* ErrMsg = 0);
+
     /// @}
+
   };
 }
 }
diff --git a/include/llvm/System/RWMutex.h b/include/llvm/System/RWMutex.h
index e577d457afb5..3a288180bf07 100644
--- a/include/llvm/System/RWMutex.h
+++ b/include/llvm/System/RWMutex.h
@@ -141,15 +141,14 @@ namespace llvm
     /// ScopedReader - RAII acquisition of a reader lock
     template<bool mt_only>
     struct SmartScopedReader {
-      SmartRWMutex<mt_only>* mutex;
+      SmartRWMutex<mt_only>& mutex;
       
-      explicit SmartScopedReader(SmartRWMutex<mt_only>* m) {
-        mutex = m;
-        mutex->reader_acquire();
+      explicit SmartScopedReader(SmartRWMutex<mt_only>& m) : mutex(m) {
+        mutex.reader_acquire();
       }
       
       ~SmartScopedReader() {
-        mutex->reader_release();
+        mutex.reader_release();
       }
     };
     typedef SmartScopedReader<false> ScopedReader;
@@ -157,15 +156,14 @@ namespace llvm
     /// ScopedWriter - RAII acquisition of a writer lock
     template<bool mt_only>
     struct SmartScopedWriter {
-      SmartRWMutex<mt_only>* mutex;
+      SmartRWMutex<mt_only>& mutex;
       
-      explicit SmartScopedWriter(SmartRWMutex<mt_only>* m) {
-        mutex = m;
-        mutex->writer_acquire();
+      explicit SmartScopedWriter(SmartRWMutex<mt_only>& m) : mutex(m) {
+        mutex.writer_acquire();
       }
       
       ~SmartScopedWriter() {
-        mutex->writer_release();
+        mutex.writer_release();
       }
     };
     typedef SmartScopedWriter<false> ScopedWriter;
diff --git a/include/llvm/System/TimeValue.h b/include/llvm/System/TimeValue.h
index b9ada0071232..109973042f83 100644
--- a/include/llvm/System/TimeValue.h
+++ b/include/llvm/System/TimeValue.h
@@ -251,7 +251,7 @@ namespace sys {
       return seconds_ - PosixZeroTime.seconds_;
     }
 
-    /// Converts the TiemValue into the correspodning number of "ticks" for
+    /// Converts the TimeValue into the corresponding number of "ticks" for
     /// Win32 platforms, correcting for the difference in Win32 zero time.
     /// @brief Convert to windows time (seconds since 12:00:00a Jan 1, 1601)
     uint64_t toWin32Time() const {
@@ -271,7 +271,7 @@ namespace sys {
     /// Provides conversion of the TimeValue into a readable time & date.
     /// @returns std::string containing the readable time value
     /// @brief Convert time to a string.
-    std::string toString() const;
+    std::string str() const;
 
   /// @}
   /// @name Mutators
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h
index 5cfdc023d439..58333e2b424f 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/Target/SubtargetFeature.h
@@ -20,12 +20,12 @@
 
 #include <string>
 #include <vector>
-#include <iosfwd>
 #include <cstring>
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-
+  class raw_ostream;
+  
 //===----------------------------------------------------------------------===//
 ///
 /// SubtargetFeatureKV - Used to provide key value pairs for feature and
@@ -102,8 +102,7 @@ public:
   void *getInfo(const SubtargetInfoKV *Table, size_t TableSize);
   
   /// Print feature string.
-  void print(std::ostream &OS) const;
-  void print(std::ostream *OS) const { if (OS) print(*OS); }
+  void print(raw_ostream &OS) const;
   
   // Dump feature info.
   void dump() const;
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index ebd826a6f4a1..4d65b19e2e71 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -203,6 +203,8 @@ class Instruction {
   bit hasCtrlDep   = 0;     // Does this instruction r/w ctrl-flow chains?
   bit isNotDuplicable = 0;  // Is it unsafe to duplicate this instruction?
   bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
+  bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement?
+  bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement?
 
   // Side effect flags - When set, the flags have these meanings:
   //
@@ -221,6 +223,11 @@ class Instruction {
   bit mayHaveSideEffects = 0;
   bit neverHasSideEffects = 0;
 
+  // Is this instruction a "real" instruction (with a distinct machine
+  // encoding), or is it a pseudo instruction used for codegen modeling
+  // purposes.
+  bit isCodeGenOnly = 0;
+
   InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling.
 
   string Constraints = "";  // OperandConstraint, e.g. $src = $dst.
@@ -258,16 +265,63 @@ def ins;
 /// of operands.
 def variable_ops;
 
+
+/// PointerLikeRegClass - Values that are designed to have pointer width are
+/// derived from this.  TableGen treats the register class as having a symbolic
+/// type that it doesn't know, and resolves the actual regclass to use by using
+/// the TargetRegisterInfo::getPointerRegClass() hook at codegen time.
+class PointerLikeRegClass<int Kind> {
+  int RegClassKind = Kind;
+}
+
+
 /// ptr_rc definition - Mark this operand as being a pointer value whose
 /// register class is resolved dynamically via a callback to TargetInstrInfo.
 /// FIXME: We should probably change this to a class which contain a list of
 /// flags. But currently we have but one flag.
-def ptr_rc;
+def ptr_rc : PointerLikeRegClass<0>;
 
 /// unknown definition - Mark this operand as being of unknown type, causing
 /// it to be resolved by inference in the context it is used.
 def unknown;
 
+/// AsmOperandClass - Representation for the kinds of operands which the target
+/// specific parser can create and the assembly matcher may need to distinguish.
+///
+/// Operand classes are used to define the order in which instructions are
+/// matched, to ensure that the instruction which gets matched for any
+/// particular list of operands is deterministic.
+///
+/// The target specific parser must be able to classify a parsed operand into a
+/// unique class which does not partially overlap with any other classes. It can
+/// match a subset of some other class, in which case the super class field
+/// should be defined.
+class AsmOperandClass {
+  /// The name to use for this class, which should be usable as an enum value.
+  string Name = ?;
+
+  /// The super class of this operand.
+  AsmOperandClass SuperClass = ?;
+
+  /// The name of the method on the target specific operand to call to test
+  /// whether the operand is an instance of this class. If not set, this will
+  /// default to "isFoo", where Foo is the AsmOperandClass name. The method
+  /// signature should be:
+  ///   bool isFoo() const;
+  string PredicateMethod = ?;
+
+  /// The name of the method on the target specific operand to call to add the
+  /// target specific operand to an MCInst. If not set, this will default to
+  /// "addFooOperands", where Foo is the AsmOperandClass name. The method
+  /// signature should be:
+  ///   void addFooOperands(MCInst &Inst, unsigned N) const;
+  string RenderMethod = ?;
+}
+
+def ImmAsmOperand : AsmOperandClass {
+  let Name = "Imm";
+}
+   
 /// Operand Types - These provide the built-in operand types that may be used
 /// by a target.  Targets can optionally provide their own operand types as
 /// needed, though this should not be needed for RISC targets.
@@ -276,6 +330,16 @@ class Operand<ValueType ty> {
   string PrintMethod = "printOperand";
   string AsmOperandLowerMethod = ?;
   dag MIOperandInfo = (ops);
+
+  // ParserMatchClass - The "match class" that operands of this type fit
+  // in. Match classes are used to define the order in which instructions are
+  // match, to ensure that which instructions gets matched is deterministic.
+  //
+  // The target specific parser must be able to classify an parsed operand 
+  // into a unique class, which does not partially overlap with any other 
+  // classes. It can match a subset of some other class, in which case 
+  // ParserMatchSuperClass should be set to the name of that class.
+  AsmOperandClass ParserMatchClass = ImmAsmOperand;
 }
 
 def i1imm  : Operand<i1>;
@@ -302,8 +366,8 @@ class PredicateOperand<ValueType ty, dag OpTypes, dag AlwaysVal>
 }
 
 /// OptionalDefOperand - This is used to define a optional definition operand
-/// for an instruction. DefaultOps is the register the operand represents if none
-/// is supplied, e.g. zero_reg.
+/// for an instruction. DefaultOps is the register the operand represents if
+/// none is supplied, e.g. zero_reg.
 class OptionalDefOperand<ValueType ty, dag OpTypes, dag defaultops>
   : Operand<ty> {
   let MIOperandInfo = OpTypes;
@@ -329,7 +393,8 @@ class InstrInfo {
   bit isLittleEndianEncoding = 0;
 }
 
-// Standard Instructions.
+// Standard Pseudo Instructions.
+let isCodeGenOnly = 1 in {
 def PHI : Instruction {
   let OutOperandList = (ops);
   let InOperandList = (ops variable_ops);
@@ -363,12 +428,12 @@ def GC_LABEL : Instruction {
   let Namespace = "TargetInstrInfo";
   let hasCtrlDep = 1;
 }
-def DECLARE : Instruction {
+def KILL : Instruction {
   let OutOperandList = (ops);
   let InOperandList = (ops variable_ops);
   let AsmString = "";
   let Namespace = "TargetInstrInfo";
-  let hasCtrlDep = 1;
+  let neverHasSideEffects = 1;
 }
 def EXTRACT_SUBREG : Instruction {
   let OutOperandList = (ops unknown:$dst);
@@ -409,6 +474,39 @@ def COPY_TO_REGCLASS : Instruction {
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
 }
+}
+
+//===----------------------------------------------------------------------===//
+// AsmParser - This class can be implemented by targets that wish to implement 
+// .s file parsing.
+//
+// Subtargets can have multiple different assembly parsers (e.g. AT&T vs Intel 
+// syntax on X86 for example).
+//
+class AsmParser {
+  // AsmParserClassName - This specifies the suffix to use for the asmparser
+  // class.  Generated AsmParser classes are always prefixed with the target
+  // name.
+  string AsmParserClassName  = "AsmParser";
+ 
+  // Variant - AsmParsers can be of multiple different variants.  Variants are
+  // used to support targets that need to parser multiple formats for the 
+  // assembly language.
+  int Variant = 0;
+
+  // CommentDelimiter - If given, the delimiter string used to recognize
+  // comments which are hard coded in the .td assembler strings for individual
+  // instructions.
+  string CommentDelimiter = "";
+
+  // RegisterPrefix - If given, the token prefix which indicates a register
+  // token. This is used by the matcher to automatically recognize hard coded
+  // register tokens as constrained registers, instead of tokens, for the
+  // purposes of matching.
+  string RegisterPrefix = "";
+}
+def DefaultAsmParser : AsmParser;
+
 
 //===----------------------------------------------------------------------===//
 // AsmWriter - This class can be implemented by targets that need to customize
@@ -434,6 +532,17 @@ class AsmWriter {
   // will specify which alternative to use.  For example "{x|y|z}" with Variant
   // == 1, will expand to "y".
   int Variant = 0;
+  
+  
+  // FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar
+  // layout, the asmwriter can actually generate output in this columns (in
+  // verbose-asm mode).  These two values indicate the width of the first column
+  // (the "opcode" area) and the width to reserve for subsequent operands.  When
+  // verbose asm mode is enabled, operands will be indented to respect this.
+  int FirstOperandColumn = -1;
+  
+  // OperandSpacing - Space between operand columns.
+  int OperandSpacing = -1;
 }
 def DefaultAsmWriter : AsmWriter;
 
@@ -445,6 +554,9 @@ class Target {
   // InstructionSet - Instruction set description for this target.
   InstrInfo InstructionSet;
 
+  // AssemblyParsers - The AsmParser instances available for this target.
+  list<AsmParser> AssemblyParsers = [DefaultAsmParser];
+
   // AssemblyWriters - The AsmWriter instances available for this target.
   list<AsmWriter> AssemblyWriters = [DefaultAsmWriter];
 }
diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h
new file mode 100644
index 000000000000..ef1fc49cefee
--- /dev/null
+++ b/include/llvm/Target/TargetAsmParser.h
@@ -0,0 +1,65 @@
+//===-- llvm/Target/TargetAsmParser.h - Target Assembly Parser --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETPARSER_H
+#define LLVM_TARGET_TARGETPARSER_H
+
+#include "llvm/MC/MCAsmLexer.h"
+
+namespace llvm {
+class MCAsmParser;
+class MCInst;
+class StringRef;
+class Target;
+
+/// TargetAsmParser - Generic interface to target specific assembly parsers.
+class TargetAsmParser {
+  TargetAsmParser(const TargetAsmParser &);   // DO NOT IMPLEMENT
+  void operator=(const TargetAsmParser &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  TargetAsmParser(const Target &);
+ 
+  /// TheTarget - The Target that this machine was created for.
+  const Target &TheTarget;
+
+public:
+  virtual ~TargetAsmParser();
+
+  const Target &getTarget() const { return TheTarget; }
+
+  /// ParseInstruction - Parse one assembly instruction.
+  ///
+  /// The parser is positioned following the instruction name. The target
+  /// specific instruction parser should parse the entire instruction and
+  /// construct the appropriate MCInst, or emit an error. On success, the entire
+  /// line should be parsed up to and including the end-of-statement token. On
+  /// failure, the parser is not required to read to the end of the line.
+  //
+  /// \param AP - The current parser object.
+  /// \param Name - The instruction name.
+  /// \param Inst [out] - On success, the parsed instruction.
+  /// \return True on failure.
+  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst) = 0;
+
+  /// ParseDirective - Parse a target specific assembler directive
+  ///
+  /// The parser is positioned following the directive name.  The target
+  /// specific directive parser should parse the entire directive doing or
+  /// recording any target specific work, or return true and do nothing if the
+  /// directive is not target specific. If the directive is specific for
+  /// the target, the entire line is parsed up to and including the
+  /// end-of-statement token and false is returned.
+  ///
+  /// \param ID - the identifier token of the directive.
+  virtual bool ParseDirective(AsmToken DirectiveID) = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index da3cbd208867..ceaeb0b5038b 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -58,6 +58,10 @@ class CCIfNest<CCAction A> : CCIf<"ArgFlags.isNest()", A> {}
 /// the specified action.
 class CCIfSplit<CCAction A> : CCIf<"ArgFlags.isSplit()", A> {}
 
+/// CCIfSRet - If this argument is marked with the 'sret' attribute, apply
+/// the specified action.
+class CCIfSRet<CCAction A> : CCIf<"ArgFlags.isSRet()", A> {}
+
 /// CCIfNotVarArg - If the current function is not vararg - apply the action
 class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}
 
@@ -105,6 +109,12 @@ class CCBitConvertToType<ValueType destTy> : CCAction {
   ValueType DestTy = destTy;
 }
 
+/// CCPassIndirect - If applied, this stores the value to stack and passes the pointer
+/// as normal argument.
+class CCPassIndirect<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
 /// CCDelegateTo - This action invokes the specified sub-calling-convention.  It
 /// is successful if the specified CC matches.
 class CCDelegateTo<CallingConv cc> : CCAction {
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index 82abfc72864f..f8ea64b4ea66 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -22,6 +22,7 @@
 
 #include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include <string>
 
@@ -33,6 +34,7 @@ class IntegerType;
 class StructType;
 class StructLayout;
 class GlobalVariable;
+class LLVMContext;
 
 /// Enum used to categorize the alignment types stored by TargetAlignElem
 enum AlignTypeEnum {
@@ -89,6 +91,9 @@ private:
    */
   static const TargetAlignElem InvalidAlignmentElem;
 
+  // Opaque pointer for the StructType -> StructLayout map.
+  mutable void* LayoutMap;
+
   //! Set/initialize target alignments
   void setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
                     unsigned char pref_align, uint32_t bit_width);
@@ -111,9 +116,8 @@ public:
   /// @note This has to exist, because this is a pass, but it should never be
   /// used.
   TargetData() : ImmutablePass(&ID) {
-    assert(0 && "ERROR: Bad TargetData ctor used.  "
-           "Tool did not specify a TargetData to use?");
-    abort();
+    llvm_report_error("Bad TargetData ctor used.  "
+                      "Tool did not specify a TargetData to use?");
   }
 
   /// Constructs a TargetData from a specification string. See init().
@@ -131,7 +135,8 @@ public:
     PointerMemSize(TD.PointerMemSize),
     PointerABIAlign(TD.PointerABIAlign),
     PointerPrefAlign(TD.PointerPrefAlign),
-    Alignments(TD.Alignments)
+    Alignments(TD.Alignments),
+    LayoutMap(0)
   { }
 
   ~TargetData();  // Not virtual, do not subclass this class
@@ -229,7 +234,7 @@ public:
   /// getIntPtrType - Return an unsigned integer type that is the same size or
   /// greater to the host pointer size.
   ///
-  const IntegerType *getIntPtrType() const;
+  const IntegerType *getIntPtrType(LLVMContext &C) const;
 
   /// getIndexedOffset - return the offset from the beginning of the type for
   /// the specified indices.  This is used to implement getelementptr.
diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h
index a5b30c4f6e3d..7cb693155c29 100644
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@@ -97,9 +97,26 @@ namespace llvm {
     /// ELF relocation entry.
     virtual bool hasRelocationAddend() const = 0;
 
-    /// getAddendForRelTy - Gets the addend value for an ELF relocation entry
-    /// based on the target relocation type. If addend is not used returns 0.
-    virtual long int getAddendForRelTy(unsigned RelTy) const = 0;
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const = 0;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const = 0;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const = 0;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const = 0;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const = 0;
   };
 
 } // end llvm namespace
diff --git a/include/llvm/Target/TargetFrameInfo.h b/include/llvm/Target/TargetFrameInfo.h
index 3e26b9dd01be..975d15659c15 100644
--- a/include/llvm/Target/TargetFrameInfo.h
+++ b/include/llvm/Target/TargetFrameInfo.h
@@ -31,13 +31,22 @@ public:
     StackGrowsUp,        // Adding to the stack increases the stack address
     StackGrowsDown       // Adding to the stack decreases the stack address
   };
+
+  // Maps a callee saved register to a stack slot with a fixed offset.
+  struct SpillSlot {
+    unsigned Reg;
+    int Offset; // Offset relative to stack pointer on function entry.
+  };
 private:
   StackDirection StackDir;
   unsigned StackAlignment;
+  unsigned TransientStackAlignment;
   int LocalAreaOffset;
 public:
-  TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO)
-    : StackDir(D), StackAlignment(StackAl), LocalAreaOffset(LAO) {}
+  TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO,
+                  unsigned TransAl = 1)
+    : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+      LocalAreaOffset(LAO) {}
 
   virtual ~TargetFrameInfo();
 
@@ -48,12 +57,20 @@ public:
   ///
   StackDirection getStackGrowthDirection() const { return StackDir; }
 
-  /// getStackAlignment - This method returns the number of bytes that the stack
-  /// pointer must be aligned to.  Typically, this is the largest alignment for
-  /// any data object in the target.
+  /// getStackAlignment - This method returns the number of bytes to which the
+  /// stack pointer must be aligned on entry to a function.  Typically, this
+  /// is the largest alignment for any data object in the target.
   ///
   unsigned getStackAlignment() const { return StackAlignment; }
 
+  /// getTransientStackAlignment - This method returns the number of bytes to
+  /// which the stack pointer must be aligned at all times, even between
+  /// calls.
+  ///
+  unsigned getTransientStackAlignment() const {
+    return TransientStackAlignment;
+  }
+
   /// getOffsetOfLocalArea - This method returns the offset of the local area
   /// from the stack pointer on entrance to a function.
   ///
@@ -65,10 +82,10 @@ public:
   ///
   /// Each entry in this array contains a <register,offset> pair, indicating the
   /// fixed offset from the incoming stack pointer that each register should be
-  /// spilled at.  If a register is not listed here, the code generator is
+  /// spilled at. If a register is not listed here, the code generator is
   /// allowed to spill it anywhere it chooses.
   ///
-  virtual const std::pair<unsigned, int> *
+  virtual const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const {
     NumEntries = 0;
     return 0;
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index 622a216c33c6..d828a236cd88 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -18,7 +18,8 @@
 namespace llvm {
 
 class TargetRegisterClass;
-
+class TargetRegisterInfo;
+  
 //===----------------------------------------------------------------------===//
 // Machine Operand Flags and Description
 //===----------------------------------------------------------------------===//
@@ -45,14 +46,28 @@ namespace TOI {
 class TargetOperandInfo {
 public:
   /// RegClass - This specifies the register class enumeration of the operand 
-  /// if the operand is a register.  If not, this contains 0.
+  /// if the operand is a register.  If isLookupPtrRegClass is set, then this is
+  /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to
+  /// get a dynamic register class.
+  ///
+  /// NOTE: This member should be considered to be private, all access should go
+  /// through "getRegClass(TRI)" below.
   unsigned short RegClass;
+  
+  /// Flags - These are flags from the TOI::OperandFlags enum.
   unsigned short Flags;
+  
   /// Lower 16 bits are used to specify which constraints are set. The higher 16
   /// bits are used to specify the value of constraints (4 bits each).
-  unsigned int Constraints;
+  unsigned Constraints;
   /// Currently no other information.
   
+  /// getRegClass - Get the register class for the operand, handling resolution
+  /// of "symbolic" pointer register classes etc.  If this is not a register
+  /// operand, this returns null.
+  const TargetRegisterClass *getRegClass(const TargetRegisterInfo *TRI) const;
+  
+  
   /// isLookupPtrRegClass - Set if this operand is a pointer value and it
   /// requires a callback to look up its register class.
   bool isLookupPtrRegClass() const { return Flags&(1 <<TOI::LookupPtrRegClass);}
@@ -96,7 +111,9 @@ namespace TID {
     ConvertibleTo3Addr,
     UsesCustomDAGSchedInserter,
     Rematerializable,
-    CheapAsAMove
+    CheapAsAMove,
+    ExtraSrcRegAllocReq,
+    ExtraDefRegAllocReq
   };
 }
 
@@ -428,6 +445,26 @@ public:
   bool isAsCheapAsAMove() const {
     return Flags & (1 << TID::CheapAsAMove);
   }
+
+  /// hasExtraSrcRegAllocReq - Returns true if this instruction source operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::STRD's two source registers must be an
+  /// even / odd pair, ARM::STM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for sources of instructions with this flag.
+  bool hasExtraSrcRegAllocReq() const {
+    return Flags & (1 << TID::ExtraSrcRegAllocReq);
+  }
+
+  /// hasExtraDefRegAllocReq - Returns true if this instruction def operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::LDRD's two def registers must be an
+  /// even / odd pair, ARM::LDM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for definitions of instructions with this flag.
+  bool hasExtraDefRegAllocReq() const {
+    return Flags & (1 << TID::ExtraDefRegAllocReq);
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index ecdd68258d55..919bef1e7f2b 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -19,6 +19,7 @@
 
 namespace llvm {
 
+class MCAsmInfo;
 class TargetRegisterClass;
 class TargetRegisterInfo;
 class LiveVariables;
@@ -50,7 +51,10 @@ public:
     DBG_LABEL = 2,
     EH_LABEL = 3,
     GC_LABEL = 4,
-    DECLARE = 5,
+
+    /// KILL - This instruction is a noop that is used only to adjust the liveness
+    /// of registers. This can be useful when dealing with sub-registers.
+    KILL = 5,
 
     /// EXTRACT_SUBREG - This instruction takes two operands: a register
     /// that has subregisters, and a subregister index. It returns the
@@ -99,24 +103,35 @@ public:
   /// isTriviallyReMaterializable - Return true if the instruction is trivially
   /// rematerializable, meaning it has no side effects and requires no operands
   /// that aren't always available.
-  bool isTriviallyReMaterializable(const MachineInstr *MI) const {
-    return MI->getDesc().isRematerializable() &&
-           isReallyTriviallyReMaterializable(MI);
+  bool isTriviallyReMaterializable(const MachineInstr *MI,
+                                   AliasAnalysis *AA = 0) const {
+    return MI->getOpcode() == IMPLICIT_DEF ||
+           (MI->getDesc().isRematerializable() &&
+            (isReallyTriviallyReMaterializable(MI, AA) ||
+             isReallyTriviallyReMaterializableGeneric(MI, AA)));
   }
 
 protected:
   /// isReallyTriviallyReMaterializable - For instructions with opcodes for
-  /// which the M_REMATERIALIZABLE flag is set, this function tests whether the
-  /// instruction itself is actually trivially rematerializable, considering
-  /// its operands.  This is used for targets that have instructions that are
-  /// only trivially rematerializable for specific uses.  This predicate must
-  /// return false if the instruction has any side effects other than
-  /// producing a value, or if it requres any address registers that are not
-  /// always available.
-  virtual bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
-    return true;
+  /// which the M_REMATERIALIZABLE flag is set, this hook lets the target
+  /// specify whether the instruction is actually trivially rematerializable,
+  /// taking into consideration its operands. This predicate must return false
+  /// if the instruction has any side effects other than producing a value, or
+  /// if it requres any address registers that are not always available.
+  virtual bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                 AliasAnalysis *AA) const {
+    return false;
   }
 
+private:
+  /// isReallyTriviallyReMaterializableGeneric - For instructions with opcodes
+  /// for which the M_REMATERIALIZABLE flag is set and the target hook
+  /// isReallyTriviallyReMaterializable returns false, this function does
+  /// target-independent tests to determine if the instruction is really
+  /// trivially rematerializable.
+  bool isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+                                                AliasAnalysis *AA) const;
+
 public:
   /// Return true if the instruction is a register to register move and return
   /// the source and dest operands and their sub-register indices by reference.
@@ -150,19 +165,9 @@ public:
   /// specific location targeting a new destination register.
   virtual void reMaterialize(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MI,
-                             unsigned DestReg,
+                             unsigned DestReg, unsigned SubIdx,
                              const MachineInstr *Orig) const = 0;
 
-  /// isInvariantLoad - Return true if the specified instruction (which is
-  /// marked mayLoad) is loading from a location whose value is invariant across
-  /// the function.  For example, loading a value from the constant pool or from
-  /// from the argument area of a function if it does not change.  This should
-  /// only return true of *all* loads the instruction does are invariant (if it
-  /// does multiple loads).
-  virtual bool isInvariantLoad(const MachineInstr *MI) const {
-    return false;
-  }
-  
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
   /// may be able to convert a two-address instruction into one or more true
@@ -194,13 +199,11 @@ public:
   virtual MachineInstr *commuteInstruction(MachineInstr *MI,
                                            bool NewMI = false) const = 0;
 
-  /// CommuteChangesDestination - Return true if commuting the specified
-  /// instruction will also changes the destination operand. Also return the
-  /// current operand index of the would be new destination register by
-  /// reference. This can happen when the commutable instruction is also a
-  /// two-address instruction.
-  virtual bool CommuteChangesDestination(MachineInstr *MI,
-                                         unsigned &OpIdx) const = 0;
+  /// findCommutedOpIndices - If specified MI is commutable, return the two
+  /// operand indices that would swap value. Return true if the instruction
+  /// is not in a form which this routine understands.
+  virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+                                     unsigned &SrcOpIdx2) const = 0;
 
   /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -212,15 +215,15 @@ public:
   /// 2. If this block ends with only an unconditional branch, it sets TBB to be
   ///    the destination block.
   /// 3. If this block ends with an conditional branch and it falls through to
-  ///    an successor block, it sets TBB to be the branch destination block and
+  ///    a successor block, it sets TBB to be the branch destination block and
   ///    a list of operands that evaluate the condition. These
   ///    operands can be passed to other TargetInstrInfo methods to create new
   ///    branches.
-  /// 4. If this block ends with an conditional branch and an unconditional
-  ///    block, it returns the 'true' destination in TBB, the 'false'
-  ///    destination in FBB, and a list of operands that evaluate the condition.
-  ///    These operands can be passed to other TargetInstrInfo methods to create
-  ///    new branches.
+  /// 4. If this block ends with a conditional branch followed by an
+  ///    unconditional branch, it returns the 'true' destination in TBB, the
+  ///    'false' destination in FBB, and a list of operands that evaluate the
+  ///    condition.  These operands can be passed to other TargetInstrInfo
+  ///    methods to create new branches.
   ///
   /// Note that RemoveBranch and InsertBranch must be implemented to support
   /// cases where this method returns success.
@@ -234,7 +237,7 @@ public:
                              bool AllowModify = false) const {
     return true;
   }
-  
+
   /// RemoveBranch - Remove the branching code at the end of the specific MBB.
   /// This is only invoked in cases where AnalyzeBranch returns success. It
   /// returns the number of instructions that were removed.
@@ -242,13 +245,12 @@ public:
     assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!"); 
     return 0;
   }
-  
-  /// InsertBranch - Insert a branch into the end of the specified
-  /// MachineBasicBlock.  This operands to this method are the same as those
-  /// returned by AnalyzeBranch.  This is invoked in cases where AnalyzeBranch
-  /// returns success and when an unconditional branch (TBB is non-null, FBB is
-  /// null, Cond is empty) needs to be inserted. It returns the number of
-  /// instructions inserted.
+
+  /// InsertBranch - Insert branch code into the end of the specified
+  /// MachineBasicBlock.  The operands to this method are the same as those
+  /// returned by AnalyzeBranch.  This is only invoked in cases where
+  /// AnalyzeBranch returns success. It returns the number of instructions
+  /// inserted.
   ///
   /// It is also invoked by tail merging to add unconditional branches in
   /// cases where AnalyzeBranch doesn't apply because there was no original
@@ -285,18 +287,6 @@ public:
     assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
   }
 
-  /// storeRegToAddr - Store the specified register of the given register class
-  /// to the specified address. The store instruction is to be added to the
-  /// given machine basic block before the specified machine instruction. If
-  /// isKill is true, the register operand is the last use and must be marked
-  /// kill.
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::storeRegToAddr!");
-  }
-
   /// loadRegFromStackSlot - Load the specified register of the given register
   /// class from the specified stack frame index. The load instruction is to be
   /// added to the given machine basic block before the specified machine
@@ -307,16 +297,6 @@ public:
                                     const TargetRegisterClass *RC) const {
     assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
   }
-
-  /// loadRegFromAddr - Load the specified register of the given register class
-  /// class from the specified address. The load instruction is to be added to
-  /// the given machine basic block before the specified machine instruction.
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromAddr!");
-  }
   
   /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
   /// saved registers and returns true if it isn't possible / profitable to do
@@ -429,11 +409,8 @@ public:
   /// insertNoop - Insert a noop into the instruction stream at the specified
   /// point.
   virtual void insertNoop(MachineBasicBlock &MBB, 
-                          MachineBasicBlock::iterator MI) const {
-    assert(0 && "Target didn't implement insertNoop!");
-    abort();
-  }
-
+                          MachineBasicBlock::iterator MI) const;
+  
   /// isPredicated - Returns true if the instruction is already predicated.
   ///
   virtual bool isPredicated(const MachineInstr *MI) const {
@@ -479,9 +456,15 @@ public:
     return 0;
   }
 
-  /// GetFunctionSizeInBytes - Returns the size of the specified MachineFunction.
+  /// GetFunctionSizeInBytes - Returns the size of the specified
+  /// MachineFunction.
   /// 
   virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const = 0;
+  
+  /// Measure the specified inline asm to determine an approximation of its
+  /// length.
+  virtual unsigned getInlineAsmLength(const char *Str,
+                                      const MCAsmInfo &MAI) const;
 };
 
 /// TargetInstrInfoImpl - This is the default implementation of
@@ -495,23 +478,17 @@ protected:
 public:
   virtual MachineInstr *commuteInstruction(MachineInstr *MI,
                                            bool NewMI = false) const;
-  virtual bool CommuteChangesDestination(MachineInstr *MI,
-                                         unsigned &OpIdx) const;
+  virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+                                     unsigned &SrcOpIdx2) const;
   virtual bool PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const;
   virtual void reMaterialize(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MI,
-                             unsigned DestReg,
+                             unsigned DestReg, unsigned SubReg,
                              const MachineInstr *Orig) const;
   virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
 };
 
-/// getInstrOperandRegClass - Return register class of the operand of an
-/// instruction of the specified TargetInstrDesc.
-const TargetRegisterClass*
-getInstrOperandRegClass(const TargetRegisterInfo *TRI,
-                        const TargetInstrDesc &II, unsigned Op);
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h
index 18931ea7fb48..420fa94ce76b 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -7,90 +7,160 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file describes the structures used for instruction itineraries and
-// states.  This is used by schedulers to determine instruction states and
-// latencies.
+// This file describes the structures used for instruction
+// itineraries, stages, and operand reads/writes.  This is used by
+// schedulers to determine instruction stages and latencies.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
 #define LLVM_TARGET_TARGETINSTRITINERARIES_H
 
+#include <algorithm>
+
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
-/// Instruction stage - These values represent a step in the execution of an
-/// instruction.  The latency represents the number of discrete time slots used
-/// need to complete the stage.  Units represent the choice of functional units
-/// that can be used to complete the stage.  Eg. IntUnit1, IntUnit2.
+/// Instruction stage - These values represent a non-pipelined step in
+/// the execution of an instruction.  Cycles represents the number of
+/// discrete time slots needed to complete the stage.  Units represent
+/// the choice of functional units that can be used to complete the
+/// stage.  Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+/// cycles should elapse from the start of this stage to the start of
+/// the next stage in the itinerary. A value of -1 indicates that the
+/// next stage should start immediately after the current one.
+/// For example:
+///
+///   { 1, x, -1 }
+///      indicates that the stage occupies FU x for 1 cycle and that
+///      the next stage starts immediately after this one.
+///
+///   { 2, x|y, 1 }
+///      indicates that the stage occupies either FU x or FU y for 2
+///      consecuative cycles and that the next stage starts one cycle
+///      after this stage starts. That is, the stage requirements
+///      overlap in time.
+///
+///   { 1, x, 0 }
+///      indicates that the stage occupies FU x for 1 cycle and that
+///      the next stage starts in this same cycle. This can be used to
+///      indicate that the instruction requires multiple stages at the
+///      same time.
 ///
 struct InstrStage {
-  unsigned Cycles;  ///< Length of stage in machine cycles
-  unsigned Units;   ///< Choice of functional units
+  unsigned Cycles_;  ///< Length of stage in machine cycles
+  unsigned Units_;   ///< Choice of functional units
+  int NextCycles_;   ///< Number of machine cycles to next stage 
+
+  /// getCycles - returns the number of cycles the stage is occupied
+  unsigned getCycles() const {
+    return Cycles_;
+  }
+
+  /// getUnits - returns the choice of FUs
+  unsigned getUnits() const {
+    return Units_;
+  }
+
+  /// getNextCycles - returns the number of cycles from the start of
+  /// this stage to the start of the next stage in the itinerary
+  unsigned getNextCycles() const {
+    return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
+  }
 };
 
 
 //===----------------------------------------------------------------------===//
-/// Instruction itinerary - An itinerary represents a sequential series of steps
-/// required to complete an instruction.  Itineraries are represented as
-/// sequences of instruction stages.
+/// Instruction itinerary - An itinerary represents the scheduling
+/// information for an instruction. This includes a set of stages
+/// occupies by the instruction, and the pipeline cycle in which
+/// operands are read and written.
 ///
 struct InstrItinerary {
-  unsigned First;    ///< Index of first stage in itinerary
-  unsigned Last;     ///< Index of last + 1 stage in itinerary
+  unsigned FirstStage;         ///< Index of first stage in itinerary
+  unsigned LastStage;          ///< Index of last + 1 stage in itinerary
+  unsigned FirstOperandCycle;  ///< Index of first operand rd/wr
+  unsigned LastOperandCycle;   ///< Index of last + 1 operand rd/wr
 };
 
 
-
 //===----------------------------------------------------------------------===//
 /// Instruction itinerary Data - Itinerary data supplied by a subtarget to be
 /// used by a target.
 ///
 struct InstrItineraryData {
   const InstrStage     *Stages;         ///< Array of stages selected
+  const unsigned       *OperandCycles;  ///< Array of operand cycles selected
   const InstrItinerary *Itineratries;   ///< Array of itineraries selected
 
   /// Ctors.
   ///
-  InstrItineraryData() : Stages(0), Itineratries(0) {}
-  InstrItineraryData(const InstrStage *S, const InstrItinerary *I)
-    : Stages(S), Itineratries(I) {}
+  InstrItineraryData() : Stages(0), OperandCycles(0), Itineratries(0) {}
+  InstrItineraryData(const InstrStage *S, const unsigned *OS,
+                     const InstrItinerary *I)
+    : Stages(S), OperandCycles(OS), Itineratries(I) {}
   
   /// isEmpty - Returns true if there are no itineraries.
   ///
   bool isEmpty() const { return Itineratries == 0; }
-  
-  /// begin - Return the first stage of the itinerary.
+
+  /// isEndMarker - Returns true if the index is for the end marker
+  /// itinerary.
+  ///
+  bool isEndMarker(unsigned ItinClassIndx) const {
+    return ((Itineratries[ItinClassIndx].FirstStage == ~0U) &&
+            (Itineratries[ItinClassIndx].LastStage == ~0U));
+  }
+
+  /// beginStage - Return the first stage of the itinerary.
   /// 
-  const InstrStage *begin(unsigned ItinClassIndx) const {
-    unsigned StageIdx = Itineratries[ItinClassIndx].First;
+  const InstrStage *beginStage(unsigned ItinClassIndx) const {
+    unsigned StageIdx = Itineratries[ItinClassIndx].FirstStage;
     return Stages + StageIdx;
   }
 
-  /// end - Return the last+1 stage of the itinerary.
+  /// endStage - Return the last+1 stage of the itinerary.
   /// 
-  const InstrStage *end(unsigned ItinClassIndx) const {
-    unsigned StageIdx = Itineratries[ItinClassIndx].Last;
+  const InstrStage *endStage(unsigned ItinClassIndx) const {
+    unsigned StageIdx = Itineratries[ItinClassIndx].LastStage;
     return Stages + StageIdx;
   }
 
-  /// getLatency - Return the scheduling latency of the given class.  A
-  /// simple latency value for an instruction is an over-simplification
-  /// for some architectures, but it's a reasonable first approximation.
+  /// getStageLatency - Return the total stage latency of the given
+  /// class.  The latency is the maximum completion time for any stage
+  /// in the itinerary.
   ///
-  unsigned getLatency(unsigned ItinClassIndx) const {
-    // If the target doesn't provide latency information, use a simple
-    // non-zero default value for all instructions.
+  unsigned getStageLatency(unsigned ItinClassIndx) const {
+    // If the target doesn't provide itinerary information, use a
+    // simple non-zero default value for all instructions.
     if (isEmpty())
       return 1;
 
-    // Just sum the cycle count for each stage.
-    unsigned Latency = 0;
-    for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
-         IS != E; ++IS)
-      Latency += IS->Cycles;
+    // Calculate the maximum completion time for any stage.
+    unsigned Latency = 0, StartCycle = 0;
+    for (const InstrStage *IS = beginStage(ItinClassIndx),
+           *E = endStage(ItinClassIndx); IS != E; ++IS) {
+      Latency = std::max(Latency, StartCycle + IS->getCycles());
+      StartCycle += IS->getNextCycles();
+    }
+
     return Latency;
   }
+
+  /// getOperandCycle - Return the cycle for the given class and
+  /// operand. Return -1 if no cycle is specified for the operand.
+  ///
+  int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const {
+    if (isEmpty())
+      return -1;
+
+    unsigned FirstIdx = Itineratries[ItinClassIndx].FirstOperandCycle;
+    unsigned LastIdx = Itineratries[ItinClassIndx].LastOperandCycle;
+    if ((FirstIdx + OperandIdx) >= LastIdx)
+      return -1;
+
+    return (int)OperandCycles[FirstIdx + OperandIdx];
+  }
 };
 
 
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index d24ca679ab76..4f567b0b203a 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -22,6 +22,7 @@
 #ifndef LLVM_TARGET_TARGETLOWERING_H
 #define LLVM_TARGET_TARGETLOWERING_H
 
+#include "llvm/CallingConv.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -54,6 +55,7 @@ namespace llvm {
   class TargetMachine;
   class TargetRegisterClass;
   class TargetSubtarget;
+  class TargetLoweringObjectFile;
   class Value;
 
   // FIXME: should this be here?
@@ -77,6 +79,8 @@ namespace llvm {
 /// target-specific constructs to SelectionDAG operators.
 ///
 class TargetLowering {
+  TargetLowering(const TargetLowering&);  // DO NOT IMPLEMENT
+  void operator=(const TargetLowering&);  // DO NOT IMPLEMENT
 public:
   /// LegalizeAction - This enum indicates whether operations are valid for a
   /// target, and if not, what action should be used to make them valid.
@@ -87,12 +91,6 @@ public:
     Custom      // Use the LowerOperation hook to implement custom lowering.
   };
 
-  enum OutOfRangeShiftAmount {
-    Undefined,  // Oversized shift amounts are undefined (default).
-    Mask,       // Shift amounts are auto masked (anded) to value size.
-    Extend      // Oversized shift pulls in zeros or sign bits.
-  };
-
   enum BooleanContent { // How the target represents true/false values.
     UndefinedBooleanContent,    // Only bit 0 counts, the rest can hold garbage.
     ZeroOrOneBooleanContent,        // All bits zero except for bit 0.
@@ -104,17 +102,18 @@ public:
     SchedulingForRegPressure       // Scheduling for lowest register pressure.
   };
 
-  explicit TargetLowering(TargetMachine &TM);
+  /// NOTE: The constructor takes ownership of TLOF.
+  explicit TargetLowering(TargetMachine &TM, TargetLoweringObjectFile *TLOF);
   virtual ~TargetLowering();
 
   TargetMachine &getTargetMachine() const { return TM; }
   const TargetData *getTargetData() const { return TD; }
+  TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
 
   bool isBigEndian() const { return !IsLittleEndian; }
   bool isLittleEndian() const { return IsLittleEndian; }
   MVT getPointerTy() const { return PointerTy; }
   MVT getShiftAmountTy() const { return ShiftAmountTy; }
-  OutOfRangeShiftAmount getShiftAmountFlavor() const {return ShiftAmtHandling; }
 
   /// usesGlobalOffsetTable - Return true if this target uses a GOT for PIC
   /// codegen.
@@ -137,7 +136,8 @@ public:
   /// the condition operand of SELECT and BRCOND nodes.  In the case of
   /// BRCOND the argument passed is MVT::Other since there are no other
   /// operands to get a type hint from.
-  virtual MVT getSetCCResultType(MVT VT) const;
+  virtual
+  MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
   /// getBooleanContents - For targets without i1 registers, this gives the
   /// nature of the high-bits of boolean values held in types wider than i1.
@@ -153,9 +153,9 @@ public:
 
   /// getRegClassFor - Return the register class that should be used for the
   /// specified value type.  This may only be called on legal types.
-  TargetRegisterClass *getRegClassFor(MVT VT) const {
-    assert((unsigned)VT.getSimpleVT() < array_lengthof(RegClassForVT));
-    TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT()];
+  TargetRegisterClass *getRegClassFor(EVT VT) const {
+    assert(VT.isSimple() && "getRegClassFor called on illegal type!");
+    TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
@@ -163,10 +163,10 @@ public:
   /// isTypeLegal - Return true if the target has native support for the
   /// specified value type.  This means that it has a register that directly
   /// holds it without promotions or expansions.
-  bool isTypeLegal(MVT VT) const {
+  bool isTypeLegal(EVT VT) const {
     assert(!VT.isSimple() ||
-           (unsigned)VT.getSimpleVT() < array_lengthof(RegClassForVT));
-    return VT.isSimple() && RegClassForVT[VT.getSimpleVT()] != 0;
+           (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
+    return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != 0;
   }
 
   class ValueTypeActionImpl {
@@ -187,23 +187,23 @@ public:
       ValueTypeActions[3] = RHS.ValueTypeActions[3];
     }
     
-    LegalizeAction getTypeAction(MVT VT) const {
+    LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
       if (VT.isExtended()) {
         if (VT.isVector()) {
           return VT.isPow2VectorType() ? Expand : Promote;
         }
         if (VT.isInteger())
           // First promote to a power-of-two size, then expand if necessary.
-          return VT == VT.getRoundIntegerType() ? Expand : Promote;
+          return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
         assert(0 && "Unsupported extended type!");
         return Legal;
       }
-      unsigned I = VT.getSimpleVT();
+      unsigned I = VT.getSimpleVT().SimpleTy;
       assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0]));
       return (LegalizeAction)((ValueTypeActions[I>>4] >> ((2*I) & 31)) & 3);
     }
-    void setTypeAction(MVT VT, LegalizeAction Action) {
-      unsigned I = VT.getSimpleVT();
+    void setTypeAction(EVT VT, LegalizeAction Action) {
+      unsigned I = VT.getSimpleVT().SimpleTy;
       assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0]));
       ValueTypeActions[I>>4] |= Action << ((I*2) & 31);
     }
@@ -217,8 +217,8 @@ public:
   /// it is already legal (return 'Legal') or we need to promote it to a larger
   /// type (return 'Promote'), or we need to expand it into multiple registers
   /// of smaller integer type (return 'Expand').  'Custom' is not an option.
-  LegalizeAction getTypeAction(MVT VT) const {
-    return ValueTypeActions.getTypeAction(VT);
+  LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
+    return ValueTypeActions.getTypeAction(Context, VT);
   }
 
   /// getTypeToTransformTo - For types supported by the target, this is an
@@ -227,33 +227,37 @@ public:
   /// than the largest integer register, this contains one step in the expansion
   /// to get to the smaller register. For illegal floating point types, this
   /// returns the integer type to transform to.
-  MVT getTypeToTransformTo(MVT VT) const {
+  EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
     if (VT.isSimple()) {
-      assert((unsigned)VT.getSimpleVT() < array_lengthof(TransformToType));
-      MVT NVT = TransformToType[VT.getSimpleVT()];
-      assert(getTypeAction(NVT) != Promote &&
+      assert((unsigned)VT.getSimpleVT().SimpleTy < 
+             array_lengthof(TransformToType));
+      EVT NVT = TransformToType[VT.getSimpleVT().SimpleTy];
+      assert(getTypeAction(Context, NVT) != Promote &&
              "Promote may not follow Expand or Promote");
       return NVT;
     }
 
     if (VT.isVector()) {
-      MVT NVT = VT.getPow2VectorType();
+      EVT NVT = VT.getPow2VectorType(Context);
       if (NVT == VT) {
         // Vector length is a power of 2 - split to half the size.
         unsigned NumElts = VT.getVectorNumElements();
-        MVT EltVT = VT.getVectorElementType();
-        return (NumElts == 1) ? EltVT : MVT::getVectorVT(EltVT, NumElts / 2);
+        EVT EltVT = VT.getVectorElementType();
+        return (NumElts == 1) ?
+          EltVT : EVT::getVectorVT(Context, EltVT, NumElts / 2);
       }
       // Promote to a power of two size, avoiding multi-step promotion.
-      return getTypeAction(NVT) == Promote ? getTypeToTransformTo(NVT) : NVT;
+      return getTypeAction(Context, NVT) == Promote ?
+        getTypeToTransformTo(Context, NVT) : NVT;
     } else if (VT.isInteger()) {
-      MVT NVT = VT.getRoundIntegerType();
+      EVT NVT = VT.getRoundIntegerType(Context);
       if (NVT == VT)
         // Size is a power of two - expand to half the size.
-        return MVT::getIntegerVT(VT.getSizeInBits() / 2);
+        return EVT::getIntegerVT(Context, VT.getSizeInBits() / 2);
       else
         // Promote to a power of two size, avoiding multi-step promotion.
-        return getTypeAction(NVT) == Promote ? getTypeToTransformTo(NVT) : NVT;
+        return getTypeAction(Context, NVT) == Promote ? 
+          getTypeToTransformTo(Context, NVT) : NVT;
     }
     assert(0 && "Unsupported extended type!");
     return MVT(MVT::Other); // Not reached
@@ -263,14 +267,14 @@ public:
   /// identity function.  For types that must be expanded (i.e. integer types
   /// that are larger than the largest integer register or illegal floating
   /// point types), this returns the largest legal type it will be expanded to.
-  MVT getTypeToExpandTo(MVT VT) const {
+  EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
     assert(!VT.isVector());
     while (true) {
-      switch (getTypeAction(VT)) {
+      switch (getTypeAction(Context, VT)) {
       case Legal:
         return VT;
       case Expand:
-        VT = getTypeToTransformTo(VT);
+        VT = getTypeToTransformTo(Context, VT);
         break;
       default:
         assert(false && "Type is not legal nor is it to be expanded!");
@@ -281,18 +285,18 @@ public:
   }
 
   /// getVectorTypeBreakdown - Vector types are broken down into some number of
-  /// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
-  /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
-  /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+  /// legal first class types.  For example, EVT::v8f32 maps to 2 EVT::v4f32
+  /// with Altivec or SSE1, or 8 promoted EVT::f64 values with the X86 FP stack.
+  /// Similarly, EVT::v2i64 turns into 4 EVT::i32 values with both PPC and X86.
   ///
   /// This method returns the number of registers needed, and the VT for each
   /// register.  It also returns the VT and quantity of the intermediate values
   /// before they are promoted/expanded.
   ///
-  unsigned getVectorTypeBreakdown(MVT VT,
-                                  MVT &IntermediateVT,
+  unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                  EVT &IntermediateVT,
                                   unsigned &NumIntermediates,
-                                  MVT &RegisterVT) const;
+                                  EVT &RegisterVT) const;
 
   /// getTgtMemIntrinsic: Given an intrinsic, checks if on the target the
   /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If
@@ -300,7 +304,7 @@ public:
   /// information into the IntrinsicInfo that was passed to the function.
   typedef struct IntrinsicInfo { 
     unsigned     opc;         // target opcode
-    MVT          memVT;       // memory VT
+    EVT          memVT;       // memory VT
     const Value* ptrVal;      // value representing memory location
     int          offset;      // offset off of ptrVal 
     unsigned     align;       // alignment
@@ -319,7 +323,7 @@ public:
   /// If there is no vector type that we want to widen to, returns MVT::Other
   /// When and were to widen is target dependent based on the cost of
   /// scalarizing vs using the wider vector type.
-  virtual MVT getWidenVectorType(MVT VT) const;
+  virtual EVT getWidenVectorType(EVT VT) const;
 
   typedef std::vector<APFloat>::const_iterator legal_fpimm_iterator;
   legal_fpimm_iterator legal_fpimm_begin() const {
@@ -334,7 +338,7 @@ public:
   /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
   /// are assumed to be legal.
   virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
-                                  MVT VT) const {
+                                  EVT VT) const {
     return true;
   }
 
@@ -343,7 +347,7 @@ public:
   /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
   /// pool entry.
   virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
-                                      MVT VT) const {
+                                      EVT VT) const {
     return false;
   }
 
@@ -351,12 +355,12 @@ public:
   /// it is legal, needs to be promoted to a larger size, needs to be
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
-  LegalizeAction getOperationAction(unsigned Op, MVT VT) const {
+  LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
     if (VT.isExtended()) return Expand;
     assert(Op < array_lengthof(OpActions[0]) &&
-           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0][0])*8 &&
+           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(OpActions[0][0])*8 &&
            "Table isn't big enough!");
-    unsigned I = (unsigned) VT.getSimpleVT();
+    unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
     unsigned J = I & 31;
     I = I >> 5;
     return (LegalizeAction)((OpActions[I][Op] >> (J*2) ) & 3);
@@ -365,7 +369,7 @@ public:
   /// isOperationLegalOrCustom - Return true if the specified operation is
   /// legal on this target or can be made legal with custom lowering. This
   /// is used to help guide high-level lowering decisions.
-  bool isOperationLegalOrCustom(unsigned Op, MVT VT) const {
+  bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
     return (VT == MVT::Other || isTypeLegal(VT)) &&
       (getOperationAction(Op, VT) == Legal ||
        getOperationAction(Op, VT) == Custom);
@@ -373,7 +377,7 @@ public:
 
   /// isOperationLegal - Return true if the specified operation is legal on this
   /// target.
-  bool isOperationLegal(unsigned Op, MVT VT) const {
+  bool isOperationLegal(unsigned Op, EVT VT) const {
     return (VT == MVT::Other || isTypeLegal(VT)) &&
            getOperationAction(Op, VT) == Legal;
   }
@@ -382,16 +386,17 @@ public:
   /// either it is legal, needs to be promoted to a larger size, needs to be
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
-  LegalizeAction getLoadExtAction(unsigned LType, MVT VT) const {
+  LegalizeAction getLoadExtAction(unsigned LType, EVT VT) const {
     assert(LType < array_lengthof(LoadExtActions) &&
-           (unsigned)VT.getSimpleVT() < sizeof(LoadExtActions[0])*4 &&
+           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(LoadExtActions[0])*4 &&
            "Table isn't big enough!");
-    return (LegalizeAction)((LoadExtActions[LType] >> (2*VT.getSimpleVT())) & 3);
+    return (LegalizeAction)((LoadExtActions[LType] >> 
+              (2*VT.getSimpleVT().SimpleTy)) & 3);
   }
 
   /// isLoadExtLegal - Return true if the specified load with extension is legal
   /// on this target.
-  bool isLoadExtLegal(unsigned LType, MVT VT) const {
+  bool isLoadExtLegal(unsigned LType, EVT VT) const {
     return VT.isSimple() &&
       (getLoadExtAction(LType, VT) == Legal ||
        getLoadExtAction(LType, VT) == Custom);
@@ -401,18 +406,20 @@ public:
   /// treated: either it is legal, needs to be promoted to a larger size, needs
   /// to be expanded to some other code sequence, or the target has a custom
   /// expander for it.
-  LegalizeAction getTruncStoreAction(MVT ValVT,
-                                     MVT MemVT) const {
-    assert((unsigned)ValVT.getSimpleVT() < array_lengthof(TruncStoreActions) &&
-           (unsigned)MemVT.getSimpleVT() < sizeof(TruncStoreActions[0])*4 &&
+  LegalizeAction getTruncStoreAction(EVT ValVT,
+                                     EVT MemVT) const {
+    assert((unsigned)ValVT.getSimpleVT().SimpleTy <
+             array_lengthof(TruncStoreActions) &&
+           (unsigned)MemVT.getSimpleVT().SimpleTy <
+             sizeof(TruncStoreActions[0])*4 &&
            "Table isn't big enough!");
-    return (LegalizeAction)((TruncStoreActions[ValVT.getSimpleVT()] >>
-                             (2*MemVT.getSimpleVT())) & 3);
+    return (LegalizeAction)((TruncStoreActions[ValVT.getSimpleVT().SimpleTy] >>
+                             (2*MemVT.getSimpleVT().SimpleTy)) & 3);
   }
 
   /// isTruncStoreLegal - Return true if the specified store with truncation is
   /// legal on this target.
-  bool isTruncStoreLegal(MVT ValVT, MVT MemVT) const {
+  bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
     return isTypeLegal(ValVT) && MemVT.isSimple() &&
       (getTruncStoreAction(ValVT, MemVT) == Legal ||
        getTruncStoreAction(ValVT, MemVT) == Custom);
@@ -423,16 +430,17 @@ public:
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
   LegalizeAction
-  getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
+  getIndexedLoadAction(unsigned IdxMode, EVT VT) const {
     assert( IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
-           ((unsigned)VT.getSimpleVT()) < MVT::LAST_VALUETYPE &&
+           ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode]));
+    return (LegalizeAction)((IndexedModeActions[
+                             (unsigned)VT.getSimpleVT().SimpleTy][0][IdxMode]));
   }
 
   /// isIndexedLoadLegal - Return true if the specified indexed load is legal
   /// on this target.
-  bool isIndexedLoadLegal(unsigned IdxMode, MVT VT) const {
+  bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
     return VT.isSimple() &&
       (getIndexedLoadAction(IdxMode, VT) == Legal ||
        getIndexedLoadAction(IdxMode, VT) == Custom);
@@ -443,16 +451,17 @@ public:
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
   LegalizeAction
-  getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
+  getIndexedStoreAction(unsigned IdxMode, EVT VT) const {
     assert(IdxMode < array_lengthof(IndexedModeActions[0][1]) &&
-           (unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode]));
+    return (LegalizeAction)((IndexedModeActions[
+              (unsigned)VT.getSimpleVT().SimpleTy][1][IdxMode]));
   }  
 
   /// isIndexedStoreLegal - Return true if the specified indexed load is legal
   /// on this target.
-  bool isIndexedStoreLegal(unsigned IdxMode, MVT VT) const {
+  bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
     return VT.isSimple() &&
       (getIndexedStoreAction(IdxMode, VT) == Legal ||
        getIndexedStoreAction(IdxMode, VT) == Custom);
@@ -463,17 +472,19 @@ public:
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
   LegalizeAction
-  getConvertAction(MVT FromVT, MVT ToVT) const {
-    assert((unsigned)FromVT.getSimpleVT() < array_lengthof(ConvertActions) &&
-           (unsigned)ToVT.getSimpleVT() < sizeof(ConvertActions[0])*4 &&
+  getConvertAction(EVT FromVT, EVT ToVT) const {
+    assert((unsigned)FromVT.getSimpleVT().SimpleTy <
+              array_lengthof(ConvertActions) &&
+           (unsigned)ToVT.getSimpleVT().SimpleTy <
+              sizeof(ConvertActions[0])*4 &&
            "Table isn't big enough!");
-    return (LegalizeAction)((ConvertActions[FromVT.getSimpleVT()] >>
-                             (2*ToVT.getSimpleVT())) & 3);
+    return (LegalizeAction)((ConvertActions[FromVT.getSimpleVT().SimpleTy] >>
+                             (2*ToVT.getSimpleVT().SimpleTy)) & 3);
   }
 
   /// isConvertLegal - Return true if the specified conversion is legal
   /// on this target.
-  bool isConvertLegal(MVT FromVT, MVT ToVT) const {
+  bool isConvertLegal(EVT FromVT, EVT ToVT) const {
     return isTypeLegal(FromVT) && isTypeLegal(ToVT) &&
       (getConvertAction(FromVT, ToVT) == Legal ||
        getConvertAction(FromVT, ToVT) == Custom);
@@ -483,19 +494,19 @@ public:
   /// either it is legal, needs to be expanded to some other code sequence,
   /// or the target has a custom expander for it.
   LegalizeAction
-  getCondCodeAction(ISD::CondCode CC, MVT VT) const {
+  getCondCodeAction(ISD::CondCode CC, EVT VT) const {
     assert((unsigned)CC < array_lengthof(CondCodeActions) &&
-           (unsigned)VT.getSimpleVT() < sizeof(CondCodeActions[0])*4 &&
+           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(CondCodeActions[0])*4 &&
            "Table isn't big enough!");
     LegalizeAction Action = (LegalizeAction)
-      ((CondCodeActions[CC] >> (2*VT.getSimpleVT())) & 3);
+      ((CondCodeActions[CC] >> (2*VT.getSimpleVT().SimpleTy)) & 3);
     assert(Action != Promote && "Can't promote condition code!");
     return Action;
   }
 
   /// isCondCodeLegal - Return true if the specified condition code is legal
   /// on this target.
-  bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
+  bool isCondCodeLegal(ISD::CondCode CC, EVT VT) const {
     return getCondCodeAction(CC, VT) == Legal ||
            getCondCodeAction(CC, VT) == Custom;
   }
@@ -503,22 +514,22 @@ public:
 
   /// getTypeToPromoteTo - If the action for this operation is to promote, this
   /// method returns the ValueType to promote to.
-  MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
+  EVT getTypeToPromoteTo(unsigned Op, EVT VT) const {
     assert(getOperationAction(Op, VT) == Promote &&
            "This operation isn't promoted!");
 
     // See if this has an explicit type specified.
     std::map<std::pair<unsigned, MVT::SimpleValueType>,
              MVT::SimpleValueType>::const_iterator PTTI =
-      PromoteToType.find(std::make_pair(Op, VT.getSimpleVT()));
+      PromoteToType.find(std::make_pair(Op, VT.getSimpleVT().SimpleTy));
     if (PTTI != PromoteToType.end()) return PTTI->second;
 
     assert((VT.isInteger() || VT.isFloatingPoint()) &&
            "Cannot autopromote this type, add it with AddPromotedToType.");
     
-    MVT NVT = VT;
+    EVT NVT = VT;
     do {
-      NVT = (MVT::SimpleValueType)(NVT.getSimpleVT()+1);
+      NVT = (MVT::SimpleValueType)(NVT.getSimpleVT().SimpleTy+1);
       assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&
              "Didn't find type to promote to!");
     } while (!isTypeLegal(NVT) ||
@@ -526,13 +537,13 @@ public:
     return NVT;
   }
 
-  /// getValueType - Return the MVT corresponding to this LLVM type.
+  /// getValueType - Return the EVT corresponding to this LLVM type.
   /// This is fixed by the LLVM operations except for the pointer size.  If
-  /// AllowUnknown is true, this will return MVT::Other for types with no MVT
+  /// AllowUnknown is true, this will return MVT::Other for types with no EVT
   /// counterpart (e.g. structs), otherwise it will assert.
-  MVT getValueType(const Type *Ty, bool AllowUnknown = false) const {
-    MVT VT = MVT::getMVT(Ty, AllowUnknown);
-    return VT == MVT::iPTR ? PointerTy : VT;
+  EVT getValueType(const Type *Ty, bool AllowUnknown = false) const {
+    EVT VT = EVT::getEVT(Ty, AllowUnknown);
+    return VT == MVT:: iPTR ? PointerTy : VT;
   }
 
   /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -542,22 +553,31 @@ public:
   
   /// getRegisterType - Return the type of registers that this ValueType will
   /// eventually require.
-  MVT getRegisterType(MVT VT) const {
+  EVT getRegisterType(MVT VT) const {
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT));
+    return RegisterTypeForVT[VT.SimpleTy];
+  }
+  
+  /// getRegisterType - Return the type of registers that this ValueType will
+  /// eventually require.
+  EVT getRegisterType(LLVMContext &Context, EVT VT) const {
     if (VT.isSimple()) {
-      assert((unsigned)VT.getSimpleVT() < array_lengthof(RegisterTypeForVT));
-      return RegisterTypeForVT[VT.getSimpleVT()];
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+                array_lengthof(RegisterTypeForVT));
+      return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
-      MVT VT1, RegisterVT;
+      EVT VT1, RegisterVT;
       unsigned NumIntermediates;
-      (void)getVectorTypeBreakdown(VT, VT1, NumIntermediates, RegisterVT);
+      (void)getVectorTypeBreakdown(Context, VT, VT1,
+                                   NumIntermediates, RegisterVT);
       return RegisterVT;
     }
     if (VT.isInteger()) {
-      return getRegisterType(getTypeToTransformTo(VT));
+      return getRegisterType(Context, getTypeToTransformTo(Context, VT));
     }
     assert(0 && "Unsupported extended type!");
-    return MVT(MVT::Other); // Not reached
+    return EVT(MVT::Other); // Not reached
   }
 
   /// getNumRegisters - Return the number of registers that this ValueType will
@@ -566,19 +586,20 @@ public:
   /// into pieces.  For types like i140, which are first promoted then expanded,
   /// it is the number of registers needed to hold all the bits of the original
   /// type.  For an i140 on a 32 bit machine this means 5 registers.
-  unsigned getNumRegisters(MVT VT) const {
+  unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
     if (VT.isSimple()) {
-      assert((unsigned)VT.getSimpleVT() < array_lengthof(NumRegistersForVT));
-      return NumRegistersForVT[VT.getSimpleVT()];
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+                array_lengthof(NumRegistersForVT));
+      return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
-      MVT VT1, VT2;
+      EVT VT1, VT2;
       unsigned NumIntermediates;
-      return getVectorTypeBreakdown(VT, VT1, NumIntermediates, VT2);
+      return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
     }
     if (VT.isInteger()) {
       unsigned BitWidth = VT.getSizeInBits();
-      unsigned RegWidth = getRegisterType(VT).getSizeInBits();
+      unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
       return (BitWidth + RegWidth - 1) / RegWidth;
     }
     assert(0 && "Unsupported extended type!");
@@ -588,7 +609,7 @@ public:
   /// ShouldShrinkFPConstant - If true, then instruction selection should
   /// seek to shrink the FP constant of the specified type to a smaller type
   /// in order to save space and / or reduce runtime.
-  virtual bool ShouldShrinkFPConstant(MVT VT) const { return true; }
+  virtual bool ShouldShrinkFPConstant(EVT VT) const { return true; }
 
   /// hasTargetDAGCombine - If true, the target has custom DAG combine
   /// transformations that it can perform for the specified node.
@@ -616,13 +637,13 @@ public:
   unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; }
 
   /// This function returns true if the target allows unaligned memory accesses.
-  /// This is used, for example, in situations where an array copy/move/set is 
-  /// converted to a sequence of store operations. It's use helps to ensure that
-  /// such replacements don't generate code that causes an alignment error 
-  /// (trap) on the target machine. 
+  /// of the specified type. This is used, for example, in situations where an
+  /// array copy/move/set is  converted to a sequence of store operations. It's
+  /// use helps to ensure that such replacements don't generate code that causes
+  /// an alignment error  (trap) on the target machine. 
   /// @brief Determine if the target supports unaligned memory accesses.
-  bool allowsUnalignedMemoryAccesses() const {
-    return allowUnalignedMemoryAccesses;
+  virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+    return false;
   }
 
   /// This function returns true if the target would benefit from code placement
@@ -634,9 +655,9 @@ public:
 
   /// getOptimalMemOpType - Returns the target specific optimal type for load
   /// and store operations as a result of memset, memcpy, and memmove lowering.
-  /// It returns MVT::iAny if SelectionDAG should be responsible for
+  /// It returns EVT::iAny if SelectionDAG should be responsible for
   /// determining it.
-  virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+  virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
                                   bool isSrcConst, bool isSrcStr,
                                   SelectionDAG &DAG) const {
     return MVT::iAny;
@@ -804,14 +825,17 @@ public:
   struct DAGCombinerInfo {
     void *DC;  // The DAG Combiner object.
     bool BeforeLegalize;
+    bool BeforeLegalizeOps;
     bool CalledByLegalizer;
   public:
     SelectionDAG &DAG;
     
-    DAGCombinerInfo(SelectionDAG &dag, bool bl, bool cl, void *dc)
-      : DC(dc), BeforeLegalize(bl), CalledByLegalizer(cl), DAG(dag) {}
+    DAGCombinerInfo(SelectionDAG &dag, bool bl, bool blo, bool cl, void *dc)
+      : DC(dc), BeforeLegalize(bl), BeforeLegalizeOps(blo),
+        CalledByLegalizer(cl), DAG(dag) {}
     
     bool isBeforeLegalize() const { return BeforeLegalize; }
+    bool isBeforeLegalizeOps() const { return BeforeLegalizeOps; }
     bool isCalledByLegalizer() const { return CalledByLegalizer; }
     
     void AddToWorklist(SDNode *N);
@@ -825,7 +849,7 @@ public:
 
   /// SimplifySetCC - Try to simplify a setcc built with the specified operands 
   /// and cc. If it is unable to simplify it, return a null SDValue.
-  SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
+  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           ISD::CondCode Cond, bool foldBooleans,
                           DAGCombinerInfo &DCI, DebugLoc dl) const;
 
@@ -878,12 +902,6 @@ protected:
     SchedPreferenceInfo = Pref;
   }
 
-  /// setShiftAmountFlavor - Describe how the target handles out of range shift
-  /// amounts.
-  void setShiftAmountFlavor(OutOfRangeShiftAmount OORSA) {
-    ShiftAmtHandling = OORSA;
-  }
-
   /// setUseUnderscoreSetJmp - Indicate whether this target prefers to
   /// use _setjmp to implement llvm.setjmp or the non _ version.
   /// Defaults to false.
@@ -936,10 +954,10 @@ protected:
   /// addRegisterClass - Add the specified register class as an available
   /// regclass for the specified value type.  This indicates the selector can
   /// handle values of that class natively.
-  void addRegisterClass(MVT VT, TargetRegisterClass *RC) {
-    assert((unsigned)VT.getSimpleVT() < array_lengthof(RegClassForVT));
+  void addRegisterClass(EVT VT, TargetRegisterClass *RC) {
+    assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
     AvailableRegClasses.push_back(std::make_pair(VT, RC));
-    RegClassForVT[VT.getSimpleVT()] = RC;
+    RegClassForVT[VT.getSimpleVT().SimpleTy] = RC;
   }
 
   /// computeRegisterProperties - Once all of the register classes are added,
@@ -950,9 +968,7 @@ protected:
   /// with the specified type and indicate what to do about it.
   void setOperationAction(unsigned Op, MVT VT,
                           LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0][0])*8 &&
-           Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
-    unsigned I = (unsigned) VT.getSimpleVT();
+    unsigned I = (unsigned)VT.SimpleTy;
     unsigned J = I & 31;
     I = I >> 5;
     OpActions[I][Op] &= ~(uint64_t(3UL) << (J*2));
@@ -963,24 +979,22 @@ protected:
   /// not work with the with specified type and indicate what to do about it.
   void setLoadExtAction(unsigned ExtType, MVT VT,
                       LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(LoadExtActions[0])*4 &&
+    assert((unsigned)VT.SimpleTy < sizeof(LoadExtActions[0])*4 &&
            ExtType < array_lengthof(LoadExtActions) &&
            "Table isn't big enough!");
-    LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    LoadExtActions[ExtType] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2);
+    LoadExtActions[ExtType] |= (uint64_t)Action << VT.SimpleTy*2;
   }
   
   /// setTruncStoreAction - Indicate that the specified truncating store does
   /// not work with the with specified type and indicate what to do about it.
   void setTruncStoreAction(MVT ValVT, MVT MemVT,
                            LegalizeAction Action) {
-    assert((unsigned)ValVT.getSimpleVT() < array_lengthof(TruncStoreActions) &&
-           (unsigned)MemVT.getSimpleVT() < sizeof(TruncStoreActions[0])*4 &&
+    assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) &&
+           (unsigned)MemVT.SimpleTy < sizeof(TruncStoreActions[0])*4 &&
            "Table isn't big enough!");
-    TruncStoreActions[ValVT.getSimpleVT()] &= ~(uint64_t(3UL) <<
-                                                MemVT.getSimpleVT()*2);
-    TruncStoreActions[ValVT.getSimpleVT()] |= (uint64_t)Action <<
-      MemVT.getSimpleVT()*2;
+    TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL)  << MemVT.SimpleTy*2);
+    TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2;
   }
 
   /// setIndexedLoadAction - Indicate that the specified indexed load does or
@@ -989,10 +1003,10 @@ protected:
   /// TargetLowering.cpp
   void setIndexedLoadAction(unsigned IdxMode, MVT VT,
                             LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
            IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
            "Table isn't big enough!");
-    IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode] = (uint8_t)Action;
+    IndexedModeActions[(unsigned)VT.SimpleTy][0][IdxMode] = (uint8_t)Action;
   }
   
   /// setIndexedStoreAction - Indicate that the specified indexed store does or
@@ -1001,33 +1015,32 @@ protected:
   /// TargetLowering.cpp
   void setIndexedStoreAction(unsigned IdxMode, MVT VT,
                              LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
            IdxMode < array_lengthof(IndexedModeActions[0][1] ) &&
            "Table isn't big enough!");
-    IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode] = (uint8_t)Action;
+    IndexedModeActions[(unsigned)VT.SimpleTy][1][IdxMode] = (uint8_t)Action;
   }
   
   /// setConvertAction - Indicate that the specified conversion does or does
   /// not work with the with specified type and indicate what to do about it.
   void setConvertAction(MVT FromVT, MVT ToVT,
                         LegalizeAction Action) {
-    assert((unsigned)FromVT.getSimpleVT() < array_lengthof(ConvertActions) &&
-           (unsigned)ToVT.getSimpleVT() < sizeof(ConvertActions[0])*4 &&
+    assert((unsigned)FromVT.SimpleTy < array_lengthof(ConvertActions) &&
+           (unsigned)ToVT.SimpleTy < sizeof(ConvertActions[0])*4 &&
            "Table isn't big enough!");
-    ConvertActions[FromVT.getSimpleVT()] &= ~(uint64_t(3UL) <<
-                                              ToVT.getSimpleVT()*2);
-    ConvertActions[FromVT.getSimpleVT()] |= (uint64_t)Action <<
-      ToVT.getSimpleVT()*2;
+    ConvertActions[FromVT.SimpleTy] &= ~(uint64_t(3UL)  << ToVT.SimpleTy*2);
+    ConvertActions[FromVT.SimpleTy] |= (uint64_t)Action << ToVT.SimpleTy*2;
   }
 
   /// setCondCodeAction - Indicate that the specified condition code is or isn't
   /// supported on the target and indicate what to do about it.
-  void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(CondCodeActions[0])*4 &&
+  void setCondCodeAction(ISD::CondCode CC, MVT VT,
+                         LegalizeAction Action) {
+    assert((unsigned)VT.SimpleTy < sizeof(CondCodeActions[0])*4 &&
            (unsigned)CC < array_lengthof(CondCodeActions) &&
            "Table isn't big enough!");
-    CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    CondCodeActions[(unsigned)CC] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL)  << VT.SimpleTy*2);
+    CondCodeActions[(unsigned)CC] |= (uint64_t)Action << VT.SimpleTy*2;
   }
 
   /// AddPromotedToType - If Opc/OrigVT is specified as being promoted, the
@@ -1035,8 +1048,7 @@ protected:
   /// one that works.  If that default is insufficient, this method can be used
   /// by the target to override the default.
   void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
-    PromoteToType[std::make_pair(Opc, OrigVT.getSimpleVT())] =
-      DestVT.getSimpleVT();
+    PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
   }
 
   /// addLegalFPImmediate - Indicate that this target can instruction select
@@ -1090,21 +1102,33 @@ public:
     assert(0 && "Not Implemented");
     return NULL;    // this is here to silence compiler errors
   }
+
   //===--------------------------------------------------------------------===//
   // Lowering methods - These methods must be implemented by targets so that
   // the SelectionDAGLowering code knows how to lower these.
   //
 
-  /// LowerArguments - This hook must be implemented to indicate how we should
-  /// lower the arguments for the specified function, into the specified DAG.
-  virtual void
-  LowerArguments(Function &F, SelectionDAG &DAG,
-                 SmallVectorImpl<SDValue>& ArgValues, DebugLoc dl);
+  /// LowerFormalArguments - This hook must be implemented to lower the
+  /// incoming (formal) arguments, described by the Ins array, into the
+  /// specified DAG. The implementation should fill in the InVals array
+  /// with legal-type argument values, and return the resulting token
+  /// chain value.
+  ///
+  virtual SDValue
+    LowerFormalArguments(SDValue Chain,
+                         CallingConv::ID CallConv, bool isVarArg,
+                         const SmallVectorImpl<ISD::InputArg> &Ins,
+                         DebugLoc dl, SelectionDAG &DAG,
+                         SmallVectorImpl<SDValue> &InVals) {
+    assert(0 && "Not Implemented");
+    return SDValue();    // this is here to silence compiler errors
+  }
 
-  /// LowerCallTo - This hook lowers an abstract call to a function into an
+  /// LowerCallTo - This function lowers an abstract call to a function into an
   /// actual call.  This returns a pair of operands.  The first element is the
   /// return value for the function (if RetTy is not VoidTy).  The second
-  /// element is the outgoing token chain.
+  /// element is the outgoing token chain. It calls LowerCall to do the actual
+  /// lowering.
   struct ArgListEntry {
     SDValue Node;
     const Type* Ty;
@@ -1120,11 +1144,48 @@ public:
       isSRet(false), isNest(false), isByVal(false), Alignment(0) { }
   };
   typedef std::vector<ArgListEntry> ArgListTy;
-  virtual std::pair<SDValue, SDValue>
+  std::pair<SDValue, SDValue>
   LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
               bool isVarArg, bool isInreg, unsigned NumFixedArgs,
-              unsigned CallingConv, bool isTailCall, SDValue Callee,
-              ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl);
+              CallingConv::ID CallConv, bool isTailCall,
+              bool isReturnValueUsed, SDValue Callee, ArgListTy &Args,
+              SelectionDAG &DAG, DebugLoc dl);
+
+  /// LowerCall - This hook must be implemented to lower calls into the
+  /// the specified DAG. The outgoing arguments to the call are described
+  /// by the Outs array, and the values to be returned by the call are
+  /// described by the Ins array. The implementation should fill in the
+  /// InVals array with legal-type return values from the call, and return
+  /// the resulting token chain value.
+  ///
+  /// The isTailCall flag here is normative. If it is true, the
+  /// implementation must emit a tail call. The
+  /// IsEligibleForTailCallOptimization hook should be used to catch
+  /// cases that cannot be handled.
+  ///
+  virtual SDValue
+    LowerCall(SDValue Chain, SDValue Callee,
+              CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+              const SmallVectorImpl<ISD::OutputArg> &Outs,
+              const SmallVectorImpl<ISD::InputArg> &Ins,
+              DebugLoc dl, SelectionDAG &DAG,
+              SmallVectorImpl<SDValue> &InVals) {
+    assert(0 && "Not Implemented");
+    return SDValue();    // this is here to silence compiler errors
+  }
+
+  /// LowerReturn - This hook must be implemented to lower outgoing
+  /// return values, described by the Outs array, into the specified
+  /// DAG. The implementation should return the resulting token chain
+  /// value.
+  ///
+  virtual SDValue
+    LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                DebugLoc dl, SelectionDAG &DAG) {
+    assert(0 && "Not Implemented");
+    return SDValue();    // this is here to silence compiler errors
+  }
 
   /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
   /// memcpy. This can be used by targets to provide code sequences for cases
@@ -1220,19 +1281,17 @@ public:
 
   /// IsEligibleForTailCallOptimization - Check whether the call is eligible for
   /// tail call optimization. Targets which want to do tail call optimization
-  /// should override this function. 
-  virtual bool IsEligibleForTailCallOptimization(CallSDNode *Call, 
-                                                 SDValue Ret, 
-                                                 SelectionDAG &DAG) const {
+  /// should override this function.
+  virtual bool
+  IsEligibleForTailCallOptimization(SDValue Callee,
+                                    CallingConv::ID CalleeCC,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SelectionDAG& DAG) const {
+    // Conservative default: no calls are eligible.
     return false;
   }
 
-  /// CheckTailCallReturnConstraints - Check whether CALL node immediatly
-  /// preceeds the RET node and whether the return uses the result of the node
-  /// or is a void return. This function can be used by the target to determine
-  /// eligiblity of tail call optimization.
-  static bool CheckTailCallReturnConstraints(CallSDNode *TheCall, SDValue Ret); 
-
   /// GetPossiblePreceedingTailCall - Get preceeding TailCallNodeOpCode node if
   /// it exists. Skip a possible ISD::TokenFactor.
   static SDValue GetPossiblePreceedingTailCall(SDValue Chain,
@@ -1270,6 +1329,14 @@ public:
   // Inline Asm Support hooks
   //
   
+  /// ExpandInlineAsm - This hook allows the target to expand an inline asm
+  /// call to be explicit llvm code if it wants to.  This is useful for
+  /// turning simple inline asms into LLVM intrinsics, which gives the
+  /// compiler more information about the behavior of the code.
+  virtual bool ExpandInlineAsm(CallInst *CI) const {
+    return false;
+  }
+  
   enum ConstraintType {
     C_Register,            // Constraint represents specific register(s).
     C_RegisterClass,       // Constraint represents any of register(s) in class.
@@ -1296,7 +1363,7 @@ public:
     Value *CallOperandVal;
   
     /// ConstraintVT - The ValueType for the operand value.
-    MVT ConstraintVT;
+    EVT ConstraintVT;
     
     /// isMatchingInputConstraint - Return true of this is an input operand that
     /// is a matching constraint like "4".
@@ -1333,7 +1400,7 @@ public:
   /// This should only be used for C_RegisterClass constraints.
   virtual std::vector<unsigned> 
   getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                    MVT VT) const;
+                                    EVT VT) const;
 
   /// getRegForInlineAsmConstraint - Given a physical register constraint (e.g.
   /// {edx}), return the register number and the register class for the
@@ -1347,13 +1414,13 @@ public:
   /// this returns a register number of 0 and a null register class pointer..
   virtual std::pair<unsigned, const TargetRegisterClass*> 
     getRegForInlineAsmConstraint(const std::string &Constraint,
-                                 MVT VT) const;
+                                 EVT VT) const;
   
   /// LowerXConstraint - try to replace an X constraint, which matches anything,
   /// with another that has more specific requirements based on the type of the
   /// corresponding operand.  This returns null if there is no replacement to
   /// make.
-  virtual const char *LowerXConstraint(MVT ConstraintVT) const;
+  virtual const char *LowerXConstraint(EVT ConstraintVT) const;
   
   /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
   /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is true
@@ -1373,8 +1440,12 @@ public:
   // instructions are special in various ways, which require special support to
   // insert.  The specified MachineInstr is created but not inserted into any
   // basic blocks, and the scheduler passes ownership of it to this method.
+  // When new basic blocks are inserted and the edges from MBB to its successors
+  // are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+  // DenseMap.
   virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
   //===--------------------------------------------------------------------===//
   // Addressing mode description hooks (used by LSR etc).
@@ -1410,7 +1481,7 @@ public:
     return false;
   }
 
-  virtual bool isTruncateFree(MVT VT1, MVT VT2) const {
+  virtual bool isTruncateFree(EVT VT1, EVT VT2) const {
     return false;
   }
 
@@ -1426,14 +1497,14 @@ public:
     return false;
   }
 
-  virtual bool isZExtFree(MVT VT1, MVT VT2) const {
+  virtual bool isZExtFree(EVT VT1, EVT VT2) const {
     return false;
   }
 
   /// isNarrowingProfitable - Return true if it's profitable to narrow
   /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
   /// from i32 to i8 but not from i32 to i16.
-  virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const {
+  virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const {
     return false;
   }
 
@@ -1474,9 +1545,22 @@ public:
     return CmpLibcallCCs[Call];
   }
 
+  /// setLibcallCallingConv - Set the CallingConv that should be used for the
+  /// specified libcall.
+  void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
+    LibcallCallingConvs[Call] = CC;
+  }
+  
+  /// getLibcallCallingConv - Get the CallingConv that should be used for the
+  /// specified libcall.
+  CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
+    return LibcallCallingConvs[Call];
+  }
+
 private:
   TargetMachine &TM;
   const TargetData *TD;
+  TargetLoweringObjectFile &TLOF;
 
   /// PointerTy - The type to use for pointers, usually i32 or i64.
   ///
@@ -1517,8 +1601,6 @@ private:
   /// PointerTy is.
   MVT ShiftAmountTy;
 
-  OutOfRangeShiftAmount ShiftAmtHandling;
-
   /// BooleanContents - Information about the contents of the high-bits in
   /// boolean values held in a type wider than i1.  See getBooleanContents.
   BooleanContent BooleanContents;
@@ -1565,14 +1647,14 @@ private:
   /// each ValueType the target supports natively.
   TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
   unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
-  MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
+  EVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
 
   /// TransformToType - For any value types we are promoting or expanding, this
   /// contains the value type that we are changing to.  For Expanded types, this
   /// contains one step of the expand (e.g. i64 -> i32), even if there are
   /// multiple steps required (e.g. i64 -> i16).  For types natively supported
   /// by the system, this holds the same type (e.g. i32 -> i32).
-  MVT TransformToType[MVT::LAST_VALUETYPE];
+  EVT TransformToType[MVT::LAST_VALUETYPE];
 
   /// OpActions - For each operation and each value type, keep a LegalizeAction
   /// that indicates how instruction selection should deal with the operation.
@@ -1616,7 +1698,7 @@ private:
 
   std::vector<APFloat> LegalFPImmediates;
 
-  std::vector<std::pair<MVT, TargetRegisterClass*> > AvailableRegClasses;
+  std::vector<std::pair<EVT, TargetRegisterClass*> > AvailableRegClasses;
 
   /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
   /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(),
@@ -1641,6 +1723,10 @@ private:
   /// of each of the comparison libcall against zero.
   ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
 
+  /// LibcallCallingConvs - Stores the CallingConv that should be used for each
+  /// libcall.
+  CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
+
 protected:
   /// When lowering \@llvm.memset this field specifies the maximum number of
   /// store operations that may be substituted for the call to memset. Targets
@@ -1676,12 +1762,6 @@ protected:
   /// @brief Specify maximum bytes of store instructions per memmove call.
   unsigned maxStoresPerMemmove;
 
-  /// This field specifies whether the target machine permits unaligned memory
-  /// accesses.  This is used, for example, to determine the size of store 
-  /// operations when copying small arrays and other similar tasks.
-  /// @brief Indicate whether the target permits unaligned memory accesses.
-  bool allowUnalignedMemoryAccesses;
-
   /// This field specifies whether the target can benefit from code placement
   /// optimization.
   bool benefitFromCodePlacementOpt;
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
new file mode 100644
index 000000000000..821e53783c61
--- /dev/null
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -0,0 +1,361 @@
+//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
+#define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
+
+#include "llvm/MC/SectionKind.h"
+
+namespace llvm {
+  class MachineModuleInfo;
+  class Mangler;
+  class MCAsmInfo;
+  class MCExpr;
+  class MCSection;
+  class MCSectionMachO;
+  class MCContext;
+  class GlobalValue;
+  class StringRef;
+  class TargetMachine;
+  
+class TargetLoweringObjectFile {
+  MCContext *Ctx;
+  
+  TargetLoweringObjectFile(const TargetLoweringObjectFile&); // DO NOT IMPLEMENT
+  void operator=(const TargetLoweringObjectFile&);           // DO NOT IMPLEMENT
+protected:
+  
+  TargetLoweringObjectFile();
+  
+  /// TextSection - Section directive for standard text.
+  ///
+  const MCSection *TextSection;
+  
+  /// DataSection - Section directive for standard data.
+  ///
+  const MCSection *DataSection;
+  
+  /// BSSSection - Section that is default initialized to zero.
+  const MCSection *BSSSection;
+  
+  /// ReadOnlySection - Section that is readonly and can contain arbitrary
+  /// initialized data.  Targets are not required to have a readonly section.
+  /// If they don't, various bits of code will fall back to using the data
+  /// section for constants.
+  const MCSection *ReadOnlySection;
+  
+  /// StaticCtorSection - This section contains the static constructor pointer
+  /// list.
+  const MCSection *StaticCtorSection;
+
+  /// StaticDtorSection - This section contains the static destructor pointer
+  /// list.
+  const MCSection *StaticDtorSection;
+  
+  /// LSDASection - If exception handling is supported by the target, this is
+  /// the section the Language Specific Data Area information is emitted to.
+  const MCSection *LSDASection;
+  
+  /// EHFrameSection - If exception handling is supported by the target, this is
+  /// the section the EH Frame is emitted to.
+  const MCSection *EHFrameSection;
+  
+  // Dwarf sections for debug info.  If a target supports debug info, these must
+  // be set.
+  const MCSection *DwarfAbbrevSection;
+  const MCSection *DwarfInfoSection;
+  const MCSection *DwarfLineSection;
+  const MCSection *DwarfFrameSection;
+  const MCSection *DwarfPubNamesSection;
+  const MCSection *DwarfPubTypesSection;
+  const MCSection *DwarfDebugInlineSection;
+  const MCSection *DwarfStrSection;
+  const MCSection *DwarfLocSection;
+  const MCSection *DwarfARangesSection;
+  const MCSection *DwarfRangesSection;
+  const MCSection *DwarfMacroInfoSection;
+  
+public:
+  
+  MCContext &getContext() const { return *Ctx; }
+  
+
+  virtual ~TargetLoweringObjectFile();
+  
+  /// Initialize - this method must be called before any actual lowering is
+  /// done.  This specifies the current context for codegen, and gives the
+  /// lowering implementations a chance to set up their default sections.
+  virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
+    Ctx = &ctx;
+  }
+  
+  
+  const MCSection *getTextSection() const { return TextSection; }
+  const MCSection *getDataSection() const { return DataSection; }
+  const MCSection *getBSSSection() const { return BSSSection; }
+  const MCSection *getStaticCtorSection() const { return StaticCtorSection; }
+  const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
+  const MCSection *getLSDASection() const { return LSDASection; }
+  const MCSection *getEHFrameSection() const { return EHFrameSection; }
+  const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
+  const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
+  const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
+  const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; }
+  const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;}
+  const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;}
+  const MCSection *getDwarfDebugInlineSection() const {
+    return DwarfDebugInlineSection;
+  }
+  const MCSection *getDwarfStrSection() const { return DwarfStrSection; }
+  const MCSection *getDwarfLocSection() const { return DwarfLocSection; }
+  const MCSection *getDwarfARangesSection() const { return DwarfARangesSection;}
+  const MCSection *getDwarfRangesSection() const { return DwarfRangesSection; }
+  const MCSection *getDwarfMacroInfoSection() const {
+    return DwarfMacroInfoSection;
+  }
+  
+  /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively
+  /// decide not to emit the UsedDirective for some symbols in llvm.used.
+  /// FIXME: REMOVE this (rdar://7071300)
+  virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
+                                          Mangler *) const {
+    return GV != 0;
+  }
+  
+  /// getSectionForConstant - Given a constant with the SectionKind, return a
+  /// section that it should be placed in.
+  virtual const MCSection *getSectionForConstant(SectionKind Kind) const;
+  
+  /// getKindForGlobal - Classify the specified global variable into a set of
+  /// target independent categories embodied in SectionKind.
+  static SectionKind getKindForGlobal(const GlobalValue *GV,
+                                      const TargetMachine &TM);
+  
+  /// SectionForGlobal - This method computes the appropriate section to emit
+  /// the specified global variable or function definition.  This should not
+  /// be passed external (or available externally) globals.
+  const MCSection *SectionForGlobal(const GlobalValue *GV,
+                                    SectionKind Kind, Mangler *Mang,
+                                    const TargetMachine &TM) const;
+  
+  /// SectionForGlobal - This method computes the appropriate section to emit
+  /// the specified global variable or function definition.  This should not
+  /// be passed external (or available externally) globals.
+  const MCSection *SectionForGlobal(const GlobalValue *GV,
+                                    Mangler *Mang,
+                                    const TargetMachine &TM) const {
+    return SectionForGlobal(GV, getKindForGlobal(GV, TM), Mang, TM);
+  }
+  
+  
+  
+  /// getExplicitSectionGlobal - Targets should implement this method to assign
+  /// a section to globals with an explicit section specfied.  The
+  /// implementation of this method can assume that GV->hasSection() is true.
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                           Mangler *Mang, const TargetMachine &TM) const = 0;
+  
+  /// getSpecialCasedSectionGlobals - Allow the target to completely override
+  /// section assignment of a global.
+  virtual const MCSection *
+  getSpecialCasedSectionGlobals(const GlobalValue *GV, Mangler *Mang,
+                                SectionKind Kind) const {
+    return 0;
+  }
+  
+  /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a
+  /// pc-relative reference to the specified global variable from exception
+  /// handling information.  In addition to the symbol, this returns
+  /// by-reference:
+  ///
+  /// IsIndirect - True if the returned symbol is actually a stub that contains
+  ///    the address of the symbol, false if the symbol is the global itself.
+  ///
+  /// IsPCRel - True if the symbol reference is already pc-relative, false if
+  ///    the caller needs to subtract off the address of the reference from the
+  ///    symbol.
+  ///
+  virtual const MCExpr *
+  getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                   MachineModuleInfo *MMI,
+                                   bool &IsIndirect, bool &IsPCRel) const;
+  
+protected:
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+};
+  
+  
+  
+
+class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
+  mutable void *UniquingMap;
+protected:
+  /// TLSDataSection - Section directive for Thread Local data.
+  ///
+  const MCSection *TLSDataSection;        // Defaults to ".tdata".
+  
+  /// TLSBSSSection - Section directive for Thread Local uninitialized data.
+  /// Null if this target doesn't support a BSS section.
+  ///
+  const MCSection *TLSBSSSection;         // Defaults to ".tbss".
+  
+  const MCSection *DataRelSection;
+  const MCSection *DataRelLocalSection;
+  const MCSection *DataRelROSection;
+  const MCSection *DataRelROLocalSection;
+  
+  const MCSection *MergeableConst4Section;
+  const MCSection *MergeableConst8Section;
+  const MCSection *MergeableConst16Section;
+  
+protected:
+  const MCSection *getELFSection(StringRef Section, unsigned Type, 
+                                 unsigned Flags, SectionKind Kind,
+                                 bool IsExplicit = false) const;
+public:
+  TargetLoweringObjectFileELF() : UniquingMap(0) {}
+  ~TargetLoweringObjectFileELF();
+  
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  
+  /// getSectionForConstant - Given a constant with the SectionKind, return a
+  /// section that it should be placed in.
+  virtual const MCSection *getSectionForConstant(SectionKind Kind) const;
+  
+  
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                           Mangler *Mang, const TargetMachine &TM) const;
+  
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+};
+
+  
+  
+class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
+  mutable void *UniquingMap;
+  
+  const MCSection *CStringSection;
+  const MCSection *UStringSection;
+  const MCSection *TextCoalSection;
+  const MCSection *ConstTextCoalSection;
+  const MCSection *ConstDataCoalSection;
+  const MCSection *ConstDataSection;
+  const MCSection *DataCoalSection;
+  const MCSection *FourByteConstantSection;
+  const MCSection *EightByteConstantSection;
+  const MCSection *SixteenByteConstantSection;
+  
+  const MCSection *LazySymbolPointerSection;
+  const MCSection *NonLazySymbolPointerSection;
+public:
+  TargetLoweringObjectFileMachO() : UniquingMap(0) {}
+  ~TargetLoweringObjectFileMachO();
+  
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+  
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                           Mangler *Mang, const TargetMachine &TM) const;
+  
+  virtual const MCSection *getSectionForConstant(SectionKind Kind) const;
+  
+  /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively
+  /// decide not to emit the UsedDirective for some symbols in llvm.used.
+  /// FIXME: REMOVE this (rdar://7071300)
+  virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
+                                          Mangler *) const;
+
+  /// getMachOSection - Return the MCSection for the specified mach-o section.
+  /// This requires the operands to be valid.
+  const MCSectionMachO *getMachOSection(const StringRef &Segment,
+                                        const StringRef &Section,
+                                        unsigned TypeAndAttributes,
+                                        SectionKind K) const {
+    return getMachOSection(Segment, Section, TypeAndAttributes, 0, K);
+  }
+  const MCSectionMachO *getMachOSection(const StringRef &Segment,
+                                        const StringRef &Section,
+                                        unsigned TypeAndAttributes,
+                                        unsigned Reserved2,
+                                        SectionKind K) const;
+
+  /// getTextCoalSection - Return the "__TEXT,__textcoal_nt" section we put weak
+  /// text symbols into.
+  const MCSection *getTextCoalSection() const {
+    return TextCoalSection;
+  }
+  
+  /// getConstTextCoalSection - Return the "__TEXT,__const_coal" section
+  /// we put weak read-only symbols into.
+  const MCSection *getConstTextCoalSection() const {
+    return ConstTextCoalSection;
+  }
+  
+  /// getLazySymbolPointerSection - Return the section corresponding to
+  /// the .lazy_symbol_pointer directive.
+  const MCSection *getLazySymbolPointerSection() const {
+    return LazySymbolPointerSection;
+  }
+  
+  /// getNonLazySymbolPointerSection - Return the section corresponding to
+  /// the .non_lazy_symbol_pointer directive.
+  const MCSection *getNonLazySymbolPointerSection() const {
+    return NonLazySymbolPointerSection;
+  }
+  
+  /// getSymbolForDwarfGlobalReference - The mach-o version of this method
+  /// defaults to returning a stub reference.
+  virtual const MCExpr *
+  getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                   MachineModuleInfo *MMI,
+                                   bool &IsIndirect, bool &IsPCRel) const;
+};
+
+
+
+class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
+  mutable void *UniquingMap;
+public:
+  TargetLoweringObjectFileCOFF() : UniquingMap(0) {}
+  ~TargetLoweringObjectFileCOFF();
+  
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                           Mangler *Mang, const TargetMachine &TM) const;
+  
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+
+  /// getCOFFSection - Return the MCSection for the specified COFF section.
+  /// FIXME: Switch this to a semantic view eventually.
+  const MCSection *getCOFFSection(const char *Name, bool isDirective,
+                                  SectionKind K) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 33fc45161a6e..92b648cbb0a9 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -16,10 +16,12 @@
 
 #include "llvm/Target/TargetInstrItineraries.h"
 #include <cassert>
+#include <string>
 
 namespace llvm {
 
-class TargetAsmInfo;
+class Target;
+class MCAsmInfo;
 class TargetData;
 class TargetSubtarget;
 class TargetInstrInfo;
@@ -29,14 +31,14 @@ class TargetLowering;
 class TargetFrameInfo;
 class MachineCodeEmitter;
 class JITCodeEmitter;
+class ObjectCodeEmitter;
 class TargetRegisterInfo;
-class Module;
 class PassManagerBase;
 class PassManager;
 class Pass;
 class TargetMachOWriterInfo;
 class TargetELFWriterInfo;
-class raw_ostream;
+class formatted_raw_ostream;
 
 // Relocation model types.
 namespace Reloc {
@@ -79,15 +81,6 @@ namespace CodeGenOpt {
 }
 
 
-// Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
-namespace FloatABI {
-  enum ABIType {
-    Default, // Target-specific (either soft of hard depending on triple, etc).
-    Soft, // Soft float.
-    Hard  // Hard float.
-  };
-}
-
 //===----------------------------------------------------------------------===//
 ///
 /// TargetMachine - Primary interface to the complete machine description for
@@ -98,35 +91,23 @@ class TargetMachine {
   TargetMachine(const TargetMachine &);   // DO NOT IMPLEMENT
   void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
-  TargetMachine();
+  TargetMachine(const Target &);
 
   /// getSubtargetImpl - virtual method implemented by subclasses that returns
   /// a reference to that target's TargetSubtarget-derived member variable.
   virtual const TargetSubtarget *getSubtargetImpl() const { return 0; }
+
+  /// TheTarget - The Target that this machine was created for.
+  const Target &TheTarget;
   
   /// AsmInfo - Contains target specific asm information.
   ///
-  mutable const TargetAsmInfo *AsmInfo;
+  const MCAsmInfo *AsmInfo;
   
-  /// createTargetAsmInfo - Create a new instance of target specific asm
-  /// information.
-  virtual const TargetAsmInfo *createTargetAsmInfo() const { return 0; }
-
 public:
   virtual ~TargetMachine();
 
-  /// getModuleMatchQuality - This static method should be implemented by
-  /// targets to indicate how closely they match the specified module.  This is
-  /// used by the LLC tool to determine which target to use when an explicit
-  /// -march option is not specified.  If a target returns zero, it will never
-  /// be chosen without an explicit -march option.
-  static unsigned getModuleMatchQuality(const Module &) { return 0; }
-
-  /// getJITMatchQuality - This static method should be implemented by targets
-  /// that provide JIT capabilities to indicate how suitable they are for
-  /// execution on the current host.  If a value of 0 is returned, the target
-  /// will not be used unless an explicit -march option is used.
-  static unsigned getJITMatchQuality() { return 0; }
+  const Target &getTarget() const { return TheTarget; }
 
   // Interfaces to the major aspects of target machine information:
   // -- Instruction opcode and operand information
@@ -139,12 +120,9 @@ public:
   virtual       TargetLowering    *getTargetLowering() const { return 0; }
   virtual const TargetData            *getTargetData() const { return 0; }
   
-  /// getTargetAsmInfo - Return target specific asm information.
+  /// getMCAsmInfo - Return target specific asm information.
   ///
-  const TargetAsmInfo *getTargetAsmInfo() const {
-    if (!AsmInfo) AsmInfo = createTargetAsmInfo();
-    return AsmInfo;
-  }
+  const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
   
   /// getSubtarget - This method returns a pointer to the specified type of
   /// TargetSubtarget.  In debug builds, it verifies that the object being
@@ -225,13 +203,12 @@ public:
 
   /// addPassesToEmitFile - Add passes to the specified pass manager to get the
   /// specified file emitted.  Typically this will involve several steps of code
-  /// generation.  If Fast is set to true, the code generator should emit code
-  /// as fast as possible, though the generated code may be less efficient.
+  /// generation.
   /// This method should return FileModel::Error if emission of this file type
   /// is not supported.
   ///
   virtual FileModel::Model addPassesToEmitFile(PassManagerBase &,
-                                               raw_ostream &,
+                                               formatted_raw_ostream &,
                                                CodeGenFileType,
                                                CodeGenOpt::Level) {
     return FileModel::None;
@@ -257,6 +234,16 @@ public:
     return true;
   }
  
+  /// addPassesToEmitFileFinish - If the passes to emit the specified file had
+  /// to be split up (e.g., to add an object writer pass), this method can be
+  /// used to finish up adding passes to emit the file, if necessary.
+  ///
+  virtual bool addPassesToEmitFileFinish(PassManagerBase &,
+                                         ObjectCodeEmitter *,
+                                         CodeGenOpt::Level) {
+    return true;
+  }
+ 
   /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
   /// get machine code emitted.  This uses a MachineCodeEmitter object to handle
   /// actually outputting the machine code and resolving things like the address
@@ -285,7 +272,7 @@ public:
   /// require having the entire module at once.  This is not recommended, do not
   /// use this.
   virtual bool WantsWholeFile() const { return false; }
-  virtual bool addPassesToEmitWholeFile(PassManager &, raw_ostream &,
+  virtual bool addPassesToEmitWholeFile(PassManager &, formatted_raw_ostream &,
                                         CodeGenFileType,
                                         CodeGenOpt::Level) {
     return true;
@@ -297,8 +284,8 @@ public:
 ///
 class LLVMTargetMachine : public TargetMachine {
 protected: // Can only create subclasses.
-  LLVMTargetMachine() { }
-
+  LLVMTargetMachine(const Target &T, const std::string &TargetTriple);
+  
   /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for
   /// both emitting to assembly files or machine code output.
   ///
@@ -318,7 +305,7 @@ public:
   /// target-specific passes in standard locations.
   ///
   virtual FileModel::Model addPassesToEmitFile(PassManagerBase &PM,
-                                               raw_ostream &Out,
+                                               formatted_raw_ostream &Out,
                                                CodeGenFileType FileType,
                                                CodeGenOpt::Level);
   
@@ -335,7 +322,15 @@ public:
   /// used to finish up adding passes to emit the file, if necessary.
   ///
   virtual bool addPassesToEmitFileFinish(PassManagerBase &PM,
-                                         JITCodeEmitter *MCE,
+                                         JITCodeEmitter *JCE,
+                                         CodeGenOpt::Level);
+ 
+  /// addPassesToEmitFileFinish - If the passes to emit the specified file had
+  /// to be split up (e.g., to add an object writer pass), this method can be
+  /// used to finish up adding passes to emit the file, if necessary.
+  ///
+  virtual bool addPassesToEmitFileFinish(PassManagerBase &PM,
+                                         ObjectCodeEmitter *OCE,
                                          CodeGenOpt::Level);
  
   /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
@@ -367,20 +362,28 @@ public:
     return true;
   }
 
-  /// addPreRegAllocPasses - This method may be implemented by targets that want
-  /// to run passes immediately before register allocation. This should return
+  /// addPreRegAlloc - This method may be implemented by targets that want to
+  /// run passes immediately before register allocation. This should return
   /// true if -print-machineinstrs should print after these passes.
   virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level) {
     return false;
   }
 
-  /// addPostRegAllocPasses - This method may be implemented by targets that
-  /// want to run passes after register allocation but before prolog-epilog
+  /// addPostRegAlloc - This method may be implemented by targets that want
+  /// to run passes after register allocation but before prolog-epilog
   /// insertion.  This should return true if -print-machineinstrs should print
   /// after these passes.
   virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level) {
     return false;
   }
+
+  /// addPreSched2 - This method may be implemented by targets that want to
+  /// run passes after prolog-epilog insertion and before the second instruction
+  /// scheduling pass.  This should return true if -print-machineinstrs should
+  /// print after these passes.
+  virtual bool addPreSched2(PassManagerBase &, CodeGenOpt::Level) {
+    return false;
+  }
   
   /// addPreEmitPass - This pass may be implemented by targets that want to run
   /// passes immediately before machine code is emitted.  This should return
@@ -390,51 +393,57 @@ public:
   }
   
   
-  /// addAssemblyEmitter - This pass should be overridden by the target to add
-  /// the asmprinter, if asm emission is supported.  If this is not supported,
-  /// 'true' should be returned.
-  virtual bool addAssemblyEmitter(PassManagerBase &, CodeGenOpt::Level,
-                                  bool /* VerboseAsmDefault */, raw_ostream &) {
-    return true;
-  }
-  
   /// addCodeEmitter - This pass should be overridden by the target to add a
   /// code emitter, if supported.  If this is not supported, 'true' should be
-  /// returned. If DumpAsm is true, the generated assembly is printed to cerr.
+  /// returned.
   virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
-                              bool /*DumpAsm*/, MachineCodeEmitter &) {
+                              MachineCodeEmitter &) {
     return true;
   }
 
   /// addCodeEmitter - This pass should be overridden by the target to add a
   /// code emitter, if supported.  If this is not supported, 'true' should be
-  /// returned. If DumpAsm is true, the generated assembly is printed to cerr.
+  /// returned.
   virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
-                              bool /*DumpAsm*/, JITCodeEmitter &) {
+                              JITCodeEmitter &) {
     return true;
   }
 
   /// addSimpleCodeEmitter - This pass should be overridden by the target to add
   /// a code emitter (without setting flags), if supported.  If this is not
-  /// supported, 'true' should be returned.  If DumpAsm is true, the generated
-  /// assembly is printed to cerr.
+  /// supported, 'true' should be returned.
   virtual bool addSimpleCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
-                                    bool /*DumpAsm*/, MachineCodeEmitter &) {
+                                    MachineCodeEmitter &) {
     return true;
   }
 
   /// addSimpleCodeEmitter - This pass should be overridden by the target to add
   /// a code emitter (without setting flags), if supported.  If this is not
-  /// supported, 'true' should be returned.  If DumpAsm is true, the generated
-  /// assembly is printed to cerr.
+  /// supported, 'true' should be returned.
   virtual bool addSimpleCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
-                                    bool /*DumpAsm*/, JITCodeEmitter &) {
+                                    JITCodeEmitter &) {
+    return true;
+  }
+
+  /// addSimpleCodeEmitter - This pass should be overridden by the target to add
+  /// a code emitter (without setting flags), if supported.  If this is not
+  /// supported, 'true' should be returned.
+  virtual bool addSimpleCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
+                                    ObjectCodeEmitter &) {
     return true;
   }
 
   /// getEnableTailMergeDefault - the default setting for -enable-tail-merge
   /// on this target.  User flag overrides.
   virtual bool getEnableTailMergeDefault() const { return true; }
+
+  /// addAssemblyEmitter - Helper function which creates a target specific
+  /// assembly printer, if available.
+  ///
+  /// \return Returns 'false' on success.
+  bool addAssemblyEmitter(PassManagerBase &, CodeGenOpt::Level,
+                          bool /* VerboseAsmDefault */,
+                          formatted_raw_ostream &);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 377e03f95c47..8d52dadc285f 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -16,6 +16,15 @@
 #define LLVM_TARGET_TARGETOPTIONS_H
 
 namespace llvm {
+  // Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
+  namespace FloatABI {
+    enum ABIType {
+      Default, // Target-specific (either soft of hard depending on triple, etc).
+      Soft, // Soft float.
+      Hard  // Hard float.
+    };
+  }
+  
   /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
   /// option is specified on the command line, and should enable debugging
   /// output from the code generator.
@@ -85,10 +94,23 @@ namespace llvm {
   /// .bss section. This flag disables such behaviour (necessary, e.g. for
   /// crt*.o compiling).
   extern bool NoZerosInBSS;
-  
-  /// ExceptionHandling - This flag indicates that exception information should
-  /// be emitted.
-  extern bool ExceptionHandling;
+
+  /// DwarfExceptionHandling - This flag indicates that Dwarf exception
+  /// information should be emitted.
+  extern bool DwarfExceptionHandling;
+
+  /// SjLjExceptionHandling - This flag indicates that SJLJ exception
+  /// information should be emitted.
+  extern bool SjLjExceptionHandling;
+
+  /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
+  /// debug information and notify a debugger about it.
+  extern bool JITEmitDebugInfo;
+
+  /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write
+  /// the object files generated by the JITEmitDebugInfo flag to disk.  This
+  /// flag is hidden and is only for debugging the debug info.
+  extern bool JITEmitDebugInfoToDisk;
 
   /// UnwindTablesMandatory - This flag indicates that unwind tables should
   /// be emitted for all functions.
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 91e8f80fd108..e90fc6cccc3d 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -41,7 +41,6 @@ class RegScavenger;
 /// of AX.
 ///
 struct TargetRegisterDesc {
-  const char     *AsmName;      // Assembly language name for the register
   const char     *Name;         // Printable name for the reg (for debugging)
   const unsigned *AliasSet;     // Register Alias Set, described above
   const unsigned *SubRegs;      // Sub-register set, described above
@@ -53,7 +52,7 @@ public:
   typedef const unsigned* iterator;
   typedef const unsigned* const_iterator;
 
-  typedef const MVT* vt_iterator;
+  typedef const EVT* vt_iterator;
   typedef const TargetRegisterClass* const * sc_iterator;
 private:
   unsigned ID;
@@ -70,7 +69,7 @@ private:
 public:
   TargetRegisterClass(unsigned id,
                       const char *name,
-                      const MVT *vts,
+                      const EVT *vts,
                       const TargetRegisterClass * const *subcs,
                       const TargetRegisterClass * const *supcs,
                       const TargetRegisterClass * const *subregcs,
@@ -84,7 +83,7 @@ public:
         RegSet.insert(*I);
     }
   virtual ~TargetRegisterClass() {}     // Allow subclasses
-  
+
   /// getID() - Return the register class ID number.
   ///
   unsigned getID() const { return ID; }
@@ -117,13 +116,13 @@ public:
 
   /// hasType - return true if this TargetRegisterClass has the ValueType vt.
   ///
-  bool hasType(MVT vt) const {
-    for(int i = 0; VTs[i] != MVT::Other; ++i)
+  bool hasType(EVT vt) const {
+    for(int i = 0; VTs[i].getSimpleVT().SimpleTy != MVT::Other; ++i)
       if (VTs[i] == vt)
         return true;
     return false;
   }
-  
+
   /// vt_begin / vt_end - Loop over all of the value types that can be
   /// represented by values in this register class.
   vt_iterator vt_begin() const {
@@ -132,7 +131,7 @@ public:
 
   vt_iterator vt_end() const {
     vt_iterator I = VTs;
-    while (*I != MVT::Other) ++I;
+    while (I->getSimpleVT().SimpleTy != MVT::Other) ++I;
     return I;
   }
 
@@ -173,7 +172,7 @@ public:
   /// hasSubClass - return true if the the specified TargetRegisterClass
   /// is a proper subset of this TargetRegisterClass.
   bool hasSubClass(const TargetRegisterClass *cs) const {
-    for (int i = 0; SubClasses[i] != NULL; ++i) 
+    for (int i = 0; SubClasses[i] != NULL; ++i)
       if (SubClasses[i] == cs)
         return true;
     return false;
@@ -184,17 +183,17 @@ public:
   sc_iterator subclasses_begin() const {
     return SubClasses;
   }
-  
+
   sc_iterator subclasses_end() const {
     sc_iterator I = SubClasses;
     while (*I != NULL) ++I;
     return I;
   }
-  
+
   /// hasSuperClass - return true if the specified TargetRegisterClass is a
   /// proper superset of this TargetRegisterClass.
   bool hasSuperClass(const TargetRegisterClass *cs) const {
-    for (int i = 0; SuperClasses[i] != NULL; ++i) 
+    for (int i = 0; SuperClasses[i] != NULL; ++i)
       if (SuperClasses[i] == cs)
         return true;
     return false;
@@ -205,7 +204,7 @@ public:
   sc_iterator superclasses_begin() const {
     return SuperClasses;
   }
-  
+
   sc_iterator superclasses_end() const {
     sc_iterator I = SuperClasses;
     while (*I != NULL) ++I;
@@ -217,7 +216,7 @@ public:
   bool isASubClass() const {
     return SuperClasses[0] != 0;
   }
-  
+
   /// allocation_order_begin/end - These methods define a range of registers
   /// which specify the registers in this class that are valid to register
   /// allocate, and the preferred order to allocate them in.  For example,
@@ -318,15 +317,15 @@ public:
   }
 
   /// getPhysicalRegisterRegClass - Returns the Register Class of a physical
-  /// register of the given type. If type is MVT::Other, then just return any
+  /// register of the given type. If type is EVT::Other, then just return any
   /// register class the register belongs to.
   virtual const TargetRegisterClass *
-    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+    getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const;
 
   /// getAllocatableSet - Returns a bitset indexed by register number
   /// indicating if a register is allocatable or not. If a register class is
   /// specified, returns the subset for the class.
-  BitVector getAllocatableSet(MachineFunction &MF,
+  BitVector getAllocatableSet(const MachineFunction &MF,
                               const TargetRegisterClass *RC = NULL) const;
 
   const TargetRegisterDesc &operator[](unsigned RegNo) const {
@@ -368,12 +367,6 @@ public:
     return get(RegNo).SuperRegs;
   }
 
-  /// getAsmName - Return the symbolic target-specific name for the
-  /// specified physical register.
-  const char *getAsmName(unsigned RegNo) const {
-    return get(RegNo).AsmName;
-  }
-
   /// getName - Return the human-readable symbolic target-specific name for the
   /// specified physical register.
   const char *getName(unsigned RegNo) const {
@@ -386,9 +379,16 @@ public:
     return NumRegs;
   }
 
-  /// areAliases - Returns true if the two registers alias each other, false
-  /// otherwise
-  bool areAliases(unsigned regA, unsigned regB) const {
+  /// regsOverlap - Returns true if the two registers are equal or alias each
+  /// other. The registers may be virtual register.
+  bool regsOverlap(unsigned regA, unsigned regB) const {
+    if (regA == regB)
+      return true;
+
+    if (isVirtualRegister(regA) || isVirtualRegister(regB))
+      return false;
+
+    // regA and regB are distinct physical registers. Do they alias?
     size_t index = (regA + regB * 37) & (AliasesHashSize-1);
     unsigned ProbeAmt = 0;
     while (AliasesHash[index*2] != 0 &&
@@ -403,17 +403,6 @@ public:
     return false;
   }
 
-  /// regsOverlap - Returns true if the two registers are equal or alias each
-  /// other. The registers may be virtual register.
-  bool regsOverlap(unsigned regA, unsigned regB) const {
-    if (regA == regB)
-      return true;
-
-    if (isVirtualRegister(regA) || isVirtualRegister(regB))
-      return false;
-    return areAliases(regA, regB);
-  }
-
   /// isSubRegister - Returns true if regB is a sub-register of regA.
   ///
   bool isSubRegister(unsigned regA, unsigned regB) const {
@@ -424,11 +413,11 @@ public:
            SubregHash[index*2+1] != 0) {
       if (SubregHash[index*2] == regA && SubregHash[index*2+1] == regB)
         return true;
-      
+
       index = (index + ProbeAmt) & (SubregHashSize-1);
       ProbeAmt += 2;
     }
-    
+
     return false;
   }
 
@@ -442,11 +431,11 @@ public:
            SuperregHash[index*2+1] != 0) {
       if (SuperregHash[index*2] == regA && SuperregHash[index*2+1] == regB)
         return true;
-      
+
       index = (index + ProbeAmt) & (SuperregHashSize-1);
       ProbeAmt += 2;
     }
-    
+
     return false;
   }
 
@@ -476,7 +465,7 @@ public:
 
   /// getMatchingSuperReg - Return a super-register of the specified register
   /// Reg so its sub-register of index SubIdx is Reg.
-  unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, 
+  unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
                                const TargetRegisterClass *RC) const {
     for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs)
       if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR))
@@ -484,6 +473,15 @@ public:
     return 0;
   }
 
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const {
+    return 0;
+  }
+
   //===--------------------------------------------------------------------===//
   // Register Class Information
   //
@@ -496,7 +494,7 @@ public:
   unsigned getNumRegClasses() const {
     return (unsigned)(regclass_end()-regclass_begin());
   }
-  
+
   /// getRegClass - Returns the register class associated with the enumeration
   /// value.  See class TargetOperandInfo.
   const TargetRegisterClass *getRegClass(unsigned i) const {
@@ -505,8 +503,9 @@ public:
   }
 
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
-  /// values.
-  virtual const TargetRegisterClass *getPointerRegClass() const {
+  /// values.  If a target supports multiple different pointer register classes,
+  /// kind specifies which one is indicated.
+  virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const {
     assert(0 && "Target didn't implement getPointerRegClass!");
     return 0; // Must return a value in order to compile with VS 2005
   }
@@ -561,24 +560,41 @@ public:
   virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
     return false;
   }
-  
+
+  /// requiresFrameIndexScavenging - returns true if the target requires post
+  /// PEI scavenging of registers for materializing frame index constants.
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return false;
+  }
+
   /// hasFP - Return true if the specified function should have a dedicated
   /// frame pointer register. For most targets this is true only if the function
   /// has variable sized allocas or if frame pointer elimination is disabled.
   virtual bool hasFP(const MachineFunction &MF) const = 0;
 
-  // hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-  // not required, we reserve argument space for call sites in the function
-  // immediately on entry to the current function. This eliminates the need for
-  // add/sub sp brackets around call sites. Returns true if the call frame is
-  // included as part of the stack frame.
+  /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+  /// not required, we reserve argument space for call sites in the function
+  /// immediately on entry to the current function. This eliminates the need for
+  /// add/sub sp brackets around call sites. Returns true if the call frame is
+  /// included as part of the stack frame.
   virtual bool hasReservedCallFrame(MachineFunction &MF) const {
     return !hasFP(MF);
   }
 
-  // needsStackRealignment - true if storage within the function requires the
-  // stack pointer to be aligned more than the normal calling convention calls
-  // for.
+  /// hasReservedSpillSlot - Return true if target has reserved a spill slot in
+  /// the stack frame of the given function for the specified register. e.g. On
+  /// x86, if the frame register is required, the first fixed stack object is
+  /// reserved as its spill slot. This tells PEI not to create a new stack frame
+  /// object for the given register. It should be called only after
+  /// processFunctionBeforeCalleeSavedScan().
+  virtual bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+                                    int &FrameIdx) const {
+    return false;
+  }
+
+  /// needsStackRealignment - true if storage within the function requires the
+  /// stack pointer to be aligned more than the normal calling convention calls
+  /// for.
   virtual bool needsStackRealignment(const MachineFunction &MF) const {
     return false;
   }
@@ -625,6 +641,24 @@ public:
   virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
   }
 
+  /// saveScavengerRegister - Save the register so it can be used by the
+  /// register scavenger. Return true if the register was saved, false
+  /// otherwise. If this function does not save the register, the scavenger
+  /// will instead spill it to the emergency spill slot.
+  ///
+  virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Reg) const {return false;}
+
+  /// restoreScavengerRegister - Restore a register saved by
+  /// saveScavengerRegister().
+  ///
+  virtual void restoreScavengerRegister(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        const TargetRegisterClass *RC,
+                                        unsigned Reg) const {}
+
   /// eliminateFrameIndex - This method must be overriden to eliminate abstract
   /// frame indices from instructions which may use them.  The instruction
   /// referenced by the iterator contains an MO_FrameIndex operand which must be
@@ -632,18 +666,23 @@ public:
   /// specified instruction, as long as it keeps the iterator pointing the the
   /// finished product. SPAdj is the SP adjustment due to call frame setup
   /// instruction.
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                                   int SPAdj, RegScavenger *RS=NULL) const = 0;
+  ///
+  /// When -enable-frame-index-scavenging is enabled, the virtual register
+  /// allocated for this frame index is returned and its value is stored in
+  /// *Value.
+  virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                       int SPAdj, int *Value = NULL,
+                                       RegScavenger *RS=NULL) const = 0;
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
   virtual void emitPrologue(MachineFunction &MF) const = 0;
   virtual void emitEpilogue(MachineFunction &MF,
                             MachineBasicBlock &MBB) const = 0;
-                            
+
   //===--------------------------------------------------------------------===//
   /// Debug information queries.
-  
+
   /// getDwarfRegNum - Map a target register to an equivalent dwarf register
   /// number.  Returns -1 if there is no equivalent value.  The second
   /// parameter allows targets to use different numberings for EH info and
@@ -657,11 +696,11 @@ public:
   /// getFrameIndexOffset - Returns the displacement from the frame register to
   /// the stack frame of the specified index.
   virtual int getFrameIndexOffset(MachineFunction &MF, int FI) const;
-                           
+
   /// getRARegister - This method should return the register where the return
   /// address can be found.
   virtual unsigned getRARegister() const = 0;
-  
+
   /// getInitialFrameState - Returns a list of machine moves that are assumed
   /// on entry to all functions.  Note that LabelID is ignored (assumed to be
   /// the beginning of the function.)
@@ -670,7 +709,7 @@ public:
 
 
 // This is useful when building IndexedMaps keyed on virtual registers
-struct VirtReg2IndexFunctor : std::unary_function<unsigned, unsigned> {
+struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
   unsigned operator()(unsigned Reg) const {
     return Reg - TargetRegisterInfo::FirstVirtualRegister;
   }
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
new file mode 100644
index 000000000000..8042d2363677
--- /dev/null
+++ b/include/llvm/Target/TargetRegistry.h
@@ -0,0 +1,560 @@
+//===-- Target/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the TargetRegistry interface, which tools can use to access
+// the appropriate target specific classes (TargetMachine, AsmPrinter, etc.)
+// which have been registered.
+//
+// Target specific class implementations should register themselves using the
+// appropriate TargetRegistry interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETREGISTRY_H
+#define LLVM_TARGET_TARGETREGISTRY_H
+
+#include "llvm/ADT/Triple.h"
+#include <string>
+#include <cassert>
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmParser;
+  class MCCodeEmitter;
+  class Module;
+  class MCAsmInfo;
+  class MCDisassembler;
+  class MCInstPrinter;
+  class TargetAsmParser;
+  class TargetMachine;
+  class formatted_raw_ostream;
+  class raw_ostream;
+
+  /// Target - Wrapper for Target specific information.
+  ///
+  /// For registration purposes, this is a POD type so that targets can be
+  /// registered without the use of static constructors.
+  ///
+  /// Targets should implement a single global instance of this class (which
+  /// will be zero initialized), and pass that instance to the TargetRegistry as
+  /// part of their initialization.
+  class Target {
+  public:
+    friend struct TargetRegistry;
+
+    typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT);
+
+    typedef const MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
+                                                const StringRef &TT);
+    typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T,
+                                                  const std::string &TT,
+                                                  const std::string &Features);
+    typedef AsmPrinter *(*AsmPrinterCtorTy)(formatted_raw_ostream &OS,
+                                            TargetMachine &TM,
+                                            const MCAsmInfo *MAI,
+                                            bool VerboseAsm);
+    typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,
+                                                MCAsmParser &P);
+    typedef const MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
+    typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
+                                                  unsigned SyntaxVariant,
+                                                  const MCAsmInfo &MAI,
+                                                  raw_ostream &O);
+    typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T,
+                                                TargetMachine &TM);
+
+  private:
+    /// Next - The next registered target in the linked list, maintained by the
+    /// TargetRegistry.
+    Target *Next;
+
+    /// TripleMatchQualityFn - The target function for rating the match quality
+    /// of a triple.
+    TripleMatchQualityFnTy TripleMatchQualityFn;
+
+    /// Name - The target name.
+    const char *Name;
+
+    /// ShortDesc - A short description of the target.
+    const char *ShortDesc;
+
+    /// HasJIT - Whether this target supports the JIT.
+    bool HasJIT;
+
+    AsmInfoCtorFnTy AsmInfoCtorFn;
+    
+    /// TargetMachineCtorFn - Construction function for this target's
+    /// TargetMachine, if registered.
+    TargetMachineCtorTy TargetMachineCtorFn;
+
+    /// AsmPrinterCtorFn - Construction function for this target's AsmPrinter,
+    /// if registered.
+    AsmPrinterCtorTy AsmPrinterCtorFn;
+
+    /// AsmParserCtorFn - Construction function for this target's AsmParser,
+    /// if registered.
+    AsmParserCtorTy AsmParserCtorFn;
+    
+    /// MCDisassemblerCtorFn - Construction function for this target's
+    /// MCDisassembler, if registered.
+    MCDisassemblerCtorTy MCDisassemblerCtorFn;
+
+    
+    /// MCInstPrinterCtorFn - Construction function for this target's 
+    /// MCInstPrinter, if registered.
+    MCInstPrinterCtorTy MCInstPrinterCtorFn;
+    
+    /// CodeEmitterCtorFn - Construction function for this target's CodeEmitter,
+    /// if registered.
+    CodeEmitterCtorTy CodeEmitterCtorFn;
+
+  public:
+    /// @name Target Information
+    /// @{
+
+    // getNext - Return the next registered target.
+    const Target *getNext() const { return Next; }
+
+    /// getName - Get the target name.
+    const char *getName() const { return Name; }
+
+    /// getShortDescription - Get a short description of the target.
+    const char *getShortDescription() const { return ShortDesc; }
+
+    /// @}
+    /// @name Feature Predicates
+    /// @{
+
+    /// hasJIT - Check if this targets supports the just-in-time compilation.
+    bool hasJIT() const { return HasJIT; }
+
+    /// hasTargetMachine - Check if this target supports code generation.
+    bool hasTargetMachine() const { return TargetMachineCtorFn != 0; }
+
+    /// hasAsmPrinter - Check if this target supports .s printing.
+    bool hasAsmPrinter() const { return AsmPrinterCtorFn != 0; }
+
+    /// hasAsmParser - Check if this target supports .s parsing.
+    bool hasAsmParser() const { return AsmParserCtorFn != 0; }
+    
+    /// hasMCDisassembler - Check if this target has a disassembler.
+    bool hasMCDisassembler() const { return MCDisassemblerCtorFn != 0; }
+
+    /// hasMCInstPrinter - Check if this target has an instruction printer.
+    bool hasMCInstPrinter() const { return MCInstPrinterCtorFn != 0; }
+
+    /// hasCodeEmitter - Check if this target supports instruction encoding.
+    bool hasCodeEmitter() const { return CodeEmitterCtorFn != 0; }
+
+    /// @}
+    /// @name Feature Constructors
+    /// @{
+    
+    /// createAsmInfo - Create a MCAsmInfo implementation for the specified
+    /// target triple.
+    ///
+    /// \arg Triple - This argument is used to determine the target machine
+    /// feature set; it should always be provided. Generally this should be
+    /// either the target triple from the module, or the target triple of the
+    /// host if that does not exist.
+    const MCAsmInfo *createAsmInfo(const StringRef &Triple) const {
+      if (!AsmInfoCtorFn)
+        return 0;
+      return AsmInfoCtorFn(*this, Triple);
+    }
+    
+    /// createTargetMachine - Create a target specific machine implementation
+    /// for the specified \arg Triple.
+    ///
+    /// \arg Triple - This argument is used to determine the target machine
+    /// feature set; it should always be provided. Generally this should be
+    /// either the target triple from the module, or the target triple of the
+    /// host if that does not exist.
+    TargetMachine *createTargetMachine(const std::string &Triple,
+                                       const std::string &Features) const {
+      if (!TargetMachineCtorFn)
+        return 0;
+      return TargetMachineCtorFn(*this, Triple, Features);
+    }
+
+    /// createAsmPrinter - Create a target specific assembly printer pass.
+    AsmPrinter *createAsmPrinter(formatted_raw_ostream &OS, TargetMachine &TM,
+                                 const MCAsmInfo *MAI, bool Verbose) const {
+      if (!AsmPrinterCtorFn)
+        return 0;
+      return AsmPrinterCtorFn(OS, TM, MAI, Verbose);
+    }
+
+    /// createAsmParser - Create a target specific assembly parser.
+    ///
+    /// \arg Parser - The target independent parser implementation to use for
+    /// parsing and lexing.
+    TargetAsmParser *createAsmParser(MCAsmParser &Parser) const {
+      if (!AsmParserCtorFn)
+        return 0;
+      return AsmParserCtorFn(*this, Parser);
+    }
+    
+    const MCDisassembler *createMCDisassembler() const {
+      if (!MCDisassemblerCtorFn)
+        return 0;
+      return MCDisassemblerCtorFn(*this);
+    }
+
+    MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant,
+                                       const MCAsmInfo &MAI,
+                                       raw_ostream &O) const {
+      if (!MCInstPrinterCtorFn)
+        return 0;
+      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, O);
+    }
+    
+    
+    /// createCodeEmitter - Create a target specific code emitter.
+    MCCodeEmitter *createCodeEmitter(TargetMachine &TM) const {
+      if (!CodeEmitterCtorFn)
+        return 0;
+      return CodeEmitterCtorFn(*this, TM);
+    }
+
+    /// @}
+  };
+
+  /// TargetRegistry - Generic interface to target specific features.
+  struct TargetRegistry {
+    class iterator {
+      const Target *Current;
+      explicit iterator(Target *T) : Current(T) {}
+      friend struct TargetRegistry;
+    public:
+      iterator(const iterator &I) : Current(I.Current) {}
+      iterator() : Current(0) {}
+
+      bool operator==(const iterator &x) const {
+        return Current == x.Current;
+      }
+      bool operator!=(const iterator &x) const {
+        return !operator==(x);
+      }
+
+      // Iterator traversal: forward iteration only
+      iterator &operator++() {          // Preincrement
+        assert(Current && "Cannot increment end iterator!");
+        Current = Current->getNext();
+        return *this;
+      }
+      iterator operator++(int) {        // Postincrement
+        iterator tmp = *this; 
+        ++*this; 
+        return tmp;
+      }
+
+      const Target &operator*() const {
+        assert(Current && "Cannot dereference end iterator!");
+        return *Current;
+      }
+
+      const Target *operator->() const {
+        return &operator*();
+      }
+    };
+
+    /// @name Registry Access
+    /// @{
+
+    static iterator begin();
+
+    static iterator end() { return iterator(); }
+
+    /// lookupTarget - Lookup a target based on a target triple.
+    ///
+    /// \param Triple - The triple to use for finding a target.
+    /// \param Error - On failure, an error string describing why no target was
+    /// found.
+    static const Target *lookupTarget(const std::string &Triple,
+                                      std::string &Error);
+
+    /// getClosestTargetForJIT - Pick the best target that is compatible with
+    /// the current host.  If no close target can be found, this returns null
+    /// and sets the Error string to a reason.
+    ///
+    /// Maintained for compatibility through 2.6.
+    static const Target *getClosestTargetForJIT(std::string &Error);
+
+    /// @}
+    /// @name Target Registration
+    /// @{
+
+    /// RegisterTarget - Register the given target. Attempts to register a
+    /// target which has already been registered will be ignored.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Name - The target name. This should be a static string.
+    /// @param ShortDesc - A short target description. This should be a static
+    /// string. 
+    /// @param TQualityFn - The triple match quality computation function for
+    /// this target.
+    /// @param HasJIT - Whether the target supports JIT code
+    /// generation.
+    static void RegisterTarget(Target &T,
+                               const char *Name,
+                               const char *ShortDesc,
+                               Target::TripleMatchQualityFnTy TQualityFn,
+                               bool HasJIT = false);
+
+    /// RegisterAsmInfo - Register a MCAsmInfo implementation for the
+    /// given target.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    /// 
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a MCAsmInfo for the target.
+    static void RegisterAsmInfo(Target &T, Target::AsmInfoCtorFnTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.AsmInfoCtorFn)
+        T.AsmInfoCtorFn = Fn;
+    }
+    
+    /// RegisterTargetMachine - Register a TargetMachine implementation for the
+    /// given target.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    /// 
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a TargetMachine for the target.
+    static void RegisterTargetMachine(Target &T, 
+                                      Target::TargetMachineCtorTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.TargetMachineCtorFn)
+        T.TargetMachineCtorFn = Fn;
+    }
+
+    /// RegisterAsmPrinter - Register an AsmPrinter implementation for the given
+    /// target.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmPrinter for the target.
+    static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.AsmPrinterCtorFn)
+        T.AsmPrinterCtorFn = Fn;
+    }
+
+    /// RegisterAsmParser - Register a TargetAsmParser implementation for the
+    /// given target.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmPrinter for the target.
+    static void RegisterAsmParser(Target &T, Target::AsmParserCtorTy Fn) {
+      if (!T.AsmParserCtorFn)
+        T.AsmParserCtorFn = Fn;
+    }
+    
+    /// RegisterMCDisassembler - Register a MCDisassembler implementation for
+    /// the given target.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCDisassembler for the target.
+    static void RegisterMCDisassembler(Target &T, 
+                                       Target::MCDisassemblerCtorTy Fn) {
+      if (!T.MCDisassemblerCtorFn)
+        T.MCDisassemblerCtorFn = Fn;
+    }
+
+    static void RegisterMCInstPrinter(Target &T,
+                                      Target::MCInstPrinterCtorTy Fn) {
+      if (!T.MCInstPrinterCtorFn)
+        T.MCInstPrinterCtorFn = Fn;
+    }
+    
+    /// RegisterCodeEmitter - Register a MCCodeEmitter implementation for the
+    /// given target.
+    /// 
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmPrinter for the target.
+    static void RegisterCodeEmitter(Target &T, Target::CodeEmitterCtorTy Fn) {
+      if (!T.CodeEmitterCtorFn)
+        T.CodeEmitterCtorFn = Fn;
+    }
+
+    /// @}
+  };
+
+
+  //===--------------------------------------------------------------------===//
+
+  /// RegisterTarget - Helper template for registering a target, for use in the
+  /// target's initialization function. Usage:
+  ///
+  ///
+  /// Target TheFooTarget; // The global target instance.
+  ///
+  /// extern "C" void LLVMInitializeFooTargetInfo() {
+  ///   RegisterTarget<Triple::foo> X(TheFooTarget, "foo", "Foo description");
+  /// }
+  template<Triple::ArchType TargetArchType = Triple::InvalidArch,
+           bool HasJIT = false>
+  struct RegisterTarget {
+    RegisterTarget(Target &T, const char *Name, const char *Desc) {
+      TargetRegistry::RegisterTarget(T, Name, Desc,
+                                     &getTripleMatchQuality,
+                                     HasJIT);
+    }
+
+    static unsigned getTripleMatchQuality(const std::string &TT) {
+      if (Triple(TT).getArch() == TargetArchType)
+        return 20;
+      return 0;
+    }
+  };
+
+  /// RegisterAsmInfo - Helper template for registering a target assembly info
+  /// implementation.  This invokes the static "Create" method on the class to
+  /// actually do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmInfo<FooMCAsmInfo> X(TheFooTarget);
+  /// }
+  template<class MCAsmInfoImpl>
+  struct RegisterAsmInfo {
+    RegisterAsmInfo(Target &T) {
+      TargetRegistry::RegisterAsmInfo(T, &Allocator);
+    }
+  private:
+    static const MCAsmInfo *Allocator(const Target &T, const StringRef &TT) {
+      return new MCAsmInfoImpl(T, TT);
+    }
+    
+  };
+
+  /// RegisterAsmInfoFn - Helper template for registering a target assembly info
+  /// implementation.  This invokes the specified function to do the
+  /// construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmInfoFn X(TheFooTarget, TheFunction);
+  /// }
+  struct RegisterAsmInfoFn {
+    RegisterAsmInfoFn(Target &T, Target::AsmInfoCtorFnTy Fn) {
+      TargetRegistry::RegisterAsmInfo(T, Fn);
+    }
+  };
+
+
+  /// RegisterTargetMachine - Helper template for registering a target machine
+  /// implementation, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterTargetMachine<FooTargetMachine> X(TheFooTarget);
+  /// }
+  template<class TargetMachineImpl>
+  struct RegisterTargetMachine {
+    RegisterTargetMachine(Target &T) {
+      TargetRegistry::RegisterTargetMachine(T, &Allocator);
+    }
+
+  private:
+    static TargetMachine *Allocator(const Target &T, const std::string &TT,
+                                    const std::string &FS) {
+      return new TargetMachineImpl(T, TT, FS);
+    }
+  };
+
+  /// RegisterAsmPrinter - Helper template for registering a target specific
+  /// assembly printer, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooAsmPrinter() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmPrinter<FooAsmPrinter> X(TheFooTarget);
+  /// }
+  template<class AsmPrinterImpl>
+  struct RegisterAsmPrinter {
+    RegisterAsmPrinter(Target &T) {
+      TargetRegistry::RegisterAsmPrinter(T, &Allocator);
+    }
+
+  private:
+    static AsmPrinter *Allocator(formatted_raw_ostream &OS, TargetMachine &TM,
+                                 const MCAsmInfo *MAI, bool Verbose) {
+      return new AsmPrinterImpl(OS, TM, MAI, Verbose);
+    }
+  };
+
+  /// RegisterAsmParser - Helper template for registering a target specific
+  /// assembly parser, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooAsmParser() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmParser<FooAsmParser> X(TheFooTarget);
+  /// }
+  template<class AsmParserImpl>
+  struct RegisterAsmParser {
+    RegisterAsmParser(Target &T) {
+      TargetRegistry::RegisterAsmParser(T, &Allocator);
+    }
+
+  private:
+    static TargetAsmParser *Allocator(const Target &T, MCAsmParser &P) {
+      return new AsmParserImpl(T, P);
+    }
+  };
+
+  /// RegisterCodeEmitter - Helper template for registering a target specific
+  /// machine code emitter, for use in the target initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooCodeEmitter() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterCodeEmitter<FooCodeEmitter> X(TheFooTarget);
+  /// }
+  template<class CodeEmitterImpl>
+  struct RegisterCodeEmitter {
+    RegisterCodeEmitter(Target &T) {
+      TargetRegistry::RegisterCodeEmitter(T, &Allocator);
+    }
+
+  private:
+    static MCCodeEmitter *Allocator(const Target &T, TargetMachine &TM) {
+      return new CodeEmitterImpl(T, TM);
+    }
+  };
+
+}
+
+#endif
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 38461c5a380e..dcc09921d994 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -23,14 +23,23 @@
 class FuncUnit;
 
 //===----------------------------------------------------------------------===//
-// Instruction stage - These values represent a step in the execution of an
-// instruction.  The latency represents the number of discrete time slots used
-// need to complete the stage.  Units represent the choice of functional units
-// that can be used to complete the stage.  Eg. IntUnit1, IntUnit2.
+// Instruction stage - These values represent a non-pipelined step in
+// the execution of an instruction.  Cycles represents the number of
+// discrete time slots needed to complete the stage.  Units represent
+// the choice of functional units that can be used to complete the
+// stage.  Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+// cycles should elapse from the start of this stage to the start of
+// the next stage in the itinerary.  For example:
 //
-class InstrStage<int cycles, list<FuncUnit> units> {
+// A stage is specified in one of two ways:
+//
+//   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
+//   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
+//
+class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> {
   int Cycles          = cycles;       // length of stage in machine cycles
   list<FuncUnit> Units = units;       // choice of functional units
+  int TimeInc         = timeinc;      // cycles till start of next stage
 }
 
 //===----------------------------------------------------------------------===//
@@ -51,11 +60,13 @@ def NoItinerary : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Instruction itinerary data - These values provide a runtime map of an 
-// instruction itinerary class (name) to it's itinerary data.
+// instruction itinerary class (name) to its itinerary data.
 //
-class InstrItinData<InstrItinClass Class, list<InstrStage> stages> {
+class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
+                    list<int> operandcycles = []> {
   InstrItinClass TheClass = Class;
   list<InstrStage> Stages = stages;
+  list<int> OperandCycles = operandcycles;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h
index 002d5fc70e5e..e79f651e5da5 100644
--- a/include/llvm/Target/TargetSelect.h
+++ b/include/llvm/Target/TargetSelect.h
@@ -1,4 +1,4 @@
-//===- TargetSelect.h - Target Selection & Registration -------------------===//
+//===- TargetSelect.h - Target Selection & Registration ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,37 +20,76 @@
 
 extern "C" {
   // Declare all of the target-initialization functions that are available.
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+
 #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
   
-  // Declare all of the available asm-printer initialization functions.
+  // Declare all of the available assembly printer initialization functions.
 #define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
+
+  // Declare all of the available assembly parser initialization functions.
+#define LLVM_ASM_PARSER(TargetName) void LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
 }
 
 namespace llvm {
+  /// InitializeAllTargetInfos - The main program should call this function if
+  /// it wants access to all available targets that LLVM is configured to
+  /// support, to make them available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllTargetInfos() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+  }
+  
   /// InitializeAllTargets - The main program should call this function if it
-  /// wants to link in all available targets that LLVM is configured to support.
+  /// wants access to all available target machines that LLVM is configured to
+  /// support, to make them available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
   inline void InitializeAllTargets() {
+    // FIXME: Remove this, clients should do it.
+    InitializeAllTargetInfos();
+
 #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
   }
   
   /// InitializeAllAsmPrinters - The main program should call this function if
-  /// it wants all asm printers that LLVM is configured to support.  This will
-  /// cause them to be linked into its executable.
+  /// it wants all asm printers that LLVM is configured to support, to make them
+  /// available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
   inline void InitializeAllAsmPrinters() {
 #define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
   }
   
+  /// InitializeAllAsmParsers - The main program should call this function if it
+  /// wants all asm parsers that LLVM is configured to support, to make them
+  /// available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllAsmParsers() {
+#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+  }
+  
   /// InitializeNativeTarget - The main program should call this function to
   /// initialize the native target corresponding to the host.  This is useful 
   /// for JIT applications to ensure that the target gets linked in correctly.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
   inline bool InitializeNativeTarget() {
   // If we have a native target, initialize it to ensure it is linked in.
 #ifdef LLVM_NATIVE_ARCH
-#define DoInit2(TARG)   LLVMInitialize ## TARG ()
+#define DoInit2(TARG) \
+    LLVMInitialize ## TARG ## Info ();          \
+    LLVMInitialize ## TARG ()
 #define DoInit(T) DoInit2(T)
     DoInit(LLVM_NATIVE_ARCH);
     return false;
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 364d4d0d3cc0..700c64c8ca53 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -30,12 +30,15 @@ class SDTCisVT<int OpNum, ValueType vt> : SDTypeConstraint<OpNum> {
 
 class SDTCisPtrTy<int OpNum> : SDTypeConstraint<OpNum>;
 
-// SDTCisInt - The specified operand is has integer type.
+// SDTCisInt - The specified operand has integer type.
 class SDTCisInt<int OpNum> : SDTypeConstraint<OpNum>;
 
-// SDTCisFP - The specified operand is has floating point type.
+// SDTCisFP - The specified operand has floating-point type.
 class SDTCisFP<int OpNum> : SDTypeConstraint<OpNum>;
 
+// SDTCisVec - The specified operand has a vector type.
+class SDTCisVec<int OpNum> : SDTypeConstraint<OpNum>;
+
 // SDTCisSameAs - The two specified operands have identical types.
 class SDTCisSameAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
   int OtherOperandNum = OtherOp;
@@ -345,7 +348,6 @@ def vsetcc     : SDNode<"ISD::VSETCC"     , SDTSetCC>;
 def brcond     : SDNode<"ISD::BRCOND"     , SDTBrcond, [SDNPHasChain]>;
 def brind      : SDNode<"ISD::BRIND"      , SDTBrind,  [SDNPHasChain]>;
 def br         : SDNode<"ISD::BR"         , SDTBr,     [SDNPHasChain]>;
-def ret        : SDNode<"ISD::RET"        , SDTNone,   [SDNPHasChain]>;
 def trap       : SDNode<"ISD::TRAP"       , SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
 
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h
index eca45eb0d745..ac094f664419 100644
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@@ -16,6 +16,9 @@
 
 namespace llvm {
 
+class SDep;
+class SUnit;
+
 //===----------------------------------------------------------------------===//
 ///
 /// TargetSubtarget - Generic base class for all target subtargets.  All
@@ -35,6 +38,15 @@ public:
   /// indicating the number of scheduling cycles of backscheduling that
   /// should be attempted.
   virtual unsigned getSpecialAddressLatency() const { return 0; }
+
+  // enablePostRAScheduler - Return true to enable
+  // post-register-allocation scheduling.
+  virtual bool enablePostRAScheduler() const { return false; }
+
+  // adjustSchedDependency - Perform target specific adjustments to
+  // the latency of a schedule dependency.
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, 
+                                     SDep& dep) const { }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 750969b36ebd..d66ed896d80c 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -19,7 +19,6 @@
 
 namespace llvm {
 
-class FunctionPass;
 class ModulePass;
 class Pass;
 class Function;
@@ -174,12 +173,12 @@ ModulePass *createIPSCCPPass();
 /// createLoopExtractorPass - This pass extracts all natural loops from the
 /// program into a function if it can.
 ///
-FunctionPass *createLoopExtractorPass();
+Pass *createLoopExtractorPass();
 
 /// createSingleLoopExtractorPass - This pass extracts one natural loop from the
 /// program into a function if it can.  This is used by bugpoint.
 ///
-FunctionPass *createSingleLoopExtractorPass();
+Pass *createSingleLoopExtractorPass();
 
 /// createBlockExtractorPass - This pass extracts all blocks (except those
 /// specified in the argument list) from the functions in the module.
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index b370e964aa59..5d00f4215a83 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -14,16 +14,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef INLINER_H
-#define INLINER_H
+#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
+#define LLVM_TRANSFORMS_IPO_INLINERPASS_H
 
 #include "llvm/CallGraphSCCPass.h"
-#include "llvm/Transforms/Utils/InlineCost.h"
-#include "llvm/Target/TargetData.h"
-
 
 namespace llvm {
   class CallSite;
+  class TargetData;
+  class InlineCost;
+  template<class PtrType, unsigned SmallSize>
+  class SmallPtrSet;
 
 /// Inliner - This class contains all of the helper code which is used to
 /// perform the inlining operations that do not depend on the policy.
@@ -39,17 +40,12 @@ struct Inliner : public CallGraphSCCPass {
 
   // Main run interface method, this implements the interface required by the
   // Pass class.
-  virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+  virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
 
   // doFinalization - Remove now-dead linkonce functions at the end of
   // processing to avoid breaking the SCC traversal.
   virtual bool doFinalization(CallGraph &CG);
 
-  // InlineCallIfPossible
-  bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
-                            const SmallPtrSet<Function*, 8> &SCCFunctions,
-                            const TargetData &TD);
-
   /// This method returns the value specified by the -inline-threshold value,
   /// specified on the command line.  This is typically not directly needed.
   ///
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 698e248e7e64..9794ffd42998 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -28,6 +28,9 @@ ModulePass *createBlockProfilerPass();
 // Insert edge profiling instrumentation
 ModulePass *createEdgeProfilerPass();
 
+// Insert optimal edge profiling instrumentation
+ModulePass *createOptimalEdgeProfilerPass();
+
 // Random Sampling Profiling Framework
 ModulePass* createNullProfilerRSPass();
 FunctionPass* createRSProfilingPass();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 29cd3e3dc6c6..2483768ead57 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -220,12 +220,12 @@ extern const PassInfo *const BreakCriticalEdgesID;
 //
 //   AU.addRequiredID(LoopSimplifyID);
 //
-FunctionPass *createLoopSimplifyPass();
+Pass *createLoopSimplifyPass();
 extern const PassInfo *const LoopSimplifyID;
 
 //===----------------------------------------------------------------------===//
 //
-// LowerAllocations - Turn malloc and free instructions into %malloc and %free
+// LowerAllocations - Turn malloc and free instructions into @malloc and @free
 // calls.
 //
 //   AU.addRequiredID(LowerAllocationsID);
@@ -278,20 +278,6 @@ extern const PassInfo *const LCSSAID;
 
 //===----------------------------------------------------------------------===//
 //
-// PredicateSimplifier - This pass collapses duplicate variables into one
-// canonical form, and tries to simplify expressions along the way.
-//
-FunctionPass *createPredicateSimplifierPass();
-
-//===----------------------------------------------------------------------===//
-//
-// GVN-PRE - This pass performs global value numbering and partial redundancy
-// elimination.
-//
-FunctionPass *createGVNPREPass();
-
-//===----------------------------------------------------------------------===//
-//
 // GVN - This pass performs global value numbering and redundant load 
 // elimination cotemporaneously.
 //
@@ -329,6 +315,11 @@ FunctionPass *createSimplifyHalfPowrLibCallsPass();
 //
 FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0);
 
+//===----------------------------------------------------------------------===//
+//
+// CodeGenLICM - This pass performs late LICM; hoisting constants out of loops.
+//
+Pass *createCodeGenLICMPass();
   
 //===----------------------------------------------------------------------===//
 //
@@ -339,10 +330,18 @@ extern const PassInfo *const InstructionNamerID;
   
 //===----------------------------------------------------------------------===//
 //
-// SSI - This pass converts to Static Single Information form.
+// SSI - This pass converts instructions to Static Single Information form
+// on demand.
 //
 FunctionPass *createSSIPass();
 
+//===----------------------------------------------------------------------===//
+//
+// SSI - This pass converts every non-void instuction to Static Single
+// Information form.
+//
+FunctionPass *createSSIEverythingPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/AddrModeMatcher.h b/include/llvm/Transforms/Utils/AddrModeMatcher.h
index 913a541f8b5d..be601e257b8c 100644
--- a/include/llvm/Transforms/Utils/AddrModeMatcher.h
+++ b/include/llvm/Transforms/Utils/AddrModeMatcher.h
@@ -20,7 +20,6 @@
 #define LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
@@ -30,18 +29,19 @@ class Instruction;
 class Value;
 class Type;
 class User;
-  
+class raw_ostream;
+
 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
 /// which holds actual Value*'s for register values.
 struct ExtAddrMode : public TargetLowering::AddrMode {
   Value *BaseReg;
   Value *ScaledReg;
   ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
-  void print(OStream &OS) const;
+  void print(raw_ostream &OS) const;
   void dump() const;
 };
 
-static inline OStream &operator<<(OStream &OS, const ExtAddrMode &AM) {
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
   AM.print(OS);
   return OS;
 }
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 95ffa4606960..e766d729e1b0 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -126,10 +126,10 @@ bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 /// dest go to one block instead of each going to a different block, but isn't 
 /// the standard definition of a "critical edge".
 ///
-bool SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P = 0,
-                       bool MergeIdenticalEdges = false);
+BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                              Pass *P = 0, bool MergeIdenticalEdges = false);
 
-inline bool SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, Pass *P = 0) {
+inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, Pass *P = 0) {
   return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(), P);
 }
 
@@ -143,7 +143,7 @@ inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, Pass *P = 0) {
   TerminatorInst *TI = (*PI)->getTerminator();
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
     if (TI->getSuccessor(i) == Succ)
-      MadeChange |= SplitCriticalEdge(TI, i, P);
+      MadeChange |= !!SplitCriticalEdge(TI, i, P);
   return MadeChange;
 }
 
@@ -151,8 +151,9 @@ inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, Pass *P = 0) {
 /// and return true, otherwise return false.  This method requires that there be
 /// an edge between the two blocks.  If P is specified, it updates the analyses
 /// described above.
-inline bool SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst, Pass *P = 0,
-                              bool MergeIdenticalEdges = false) {
+inline BasicBlock *SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
+                                     Pass *P = 0,
+                                     bool MergeIdenticalEdges = false) {
   TerminatorInst *TI = Src->getTerminator();
   unsigned i = 0;
   while (1) {
@@ -180,8 +181,12 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
 /// Preds array, which has NumPreds elements in it.  The new block is given a
 /// suffix of 'Suffix'.  This function returns the new block.
 ///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and
-/// DominanceFrontier, but no other analyses.
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
+/// In particular, it does not preserve LoopSimplify (because it's
+/// complicated to handle the case where one of the edges being split
+/// is an exit of a loop with other exits).
+///
 BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
                                    unsigned NumPreds, const char *Suffix,
                                    Pass *P = 0);
diff --git a/include/llvm/Transforms/Utils/BasicInliner.h b/include/llvm/Transforms/Utils/BasicInliner.h
index 6a570552d640..4bca6b8c4417 100644
--- a/include/llvm/Transforms/Utils/BasicInliner.h
+++ b/include/llvm/Transforms/Utils/BasicInliner.h
@@ -15,7 +15,7 @@
 #ifndef BASICINLINER_H
 #define BASICINLINER_H
 
-#include "llvm/Transforms/Utils/InlineCost.h"
+#include "llvm/Analysis/InlineCost.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 840d9708cbaf..5b15b5b87199 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -18,7 +18,6 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H
 #define LLVM_TRANSFORMS_UTILS_CLONING_H
 
-#include <vector>
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
@@ -36,10 +35,11 @@ class CallSite;
 class Trace;
 class CallGraph;
 class TargetData;
+class Loop;
 class LoopInfo;
 class LLVMContext;
-template<class N> class LoopBase;
-typedef LoopBase<BasicBlock> Loop;
+class AllocaInst;
+template <typename T> class SmallVectorImpl;
 
 /// CloneModule - Return an exact copy of the specified module
 ///
@@ -105,9 +105,9 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB,
                             ClonedCodeInfo *CodeInfo = 0);
 
 
-/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate ValueMap
-/// using old blocks to new blocks mapping.
-Loop *CloneLoop(Loop *L, LPPassManager  *LPM, LoopInfo *LI, 
+/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate
+/// ValueMap using old blocks to new blocks mapping.
+Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, 
                 DenseMap<const Value *, Value *> &ValueMap, Pass *P);
 
 /// CloneFunction - Return a copy of the specified function, but without
@@ -138,7 +138,7 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
 ///
 void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        DenseMap<const Value*, Value*> &ValueMap,
-                       std::vector<ReturnInst*> &Returns,
+                       SmallVectorImpl<ReturnInst*> &Returns,
                        const char *NameSuffix = "", 
                        ClonedCodeInfo *CodeInfo = 0);
 
@@ -151,25 +151,11 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 /// used for things like CloneFunction or CloneModule.
 void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                DenseMap<const Value*, Value*> &ValueMap,
-                               std::vector<ReturnInst*> &Returns,
+                               SmallVectorImpl<ReturnInst*> &Returns,
                                const char *NameSuffix = "", 
                                ClonedCodeInfo *CodeInfo = 0,
                                const TargetData *TD = 0);
 
-
-/// CloneTraceInto - Clone T into NewFunc. Original<->clone mapping is
-/// saved in ValueMap.
-///
-void CloneTraceInto(Function *NewFunc, Trace &T,
-                    DenseMap<const Value*, Value*> &ValueMap,
-                    const char *NameSuffix);
-
-/// CloneTrace - Returns a copy of the specified trace.
-/// It takes a vector of basic blocks clones the basic blocks, removes internal
-/// phi nodes, adds it to the same function as the original (although there is
-/// no jump to it) and returns the new vector of basic blocks.
-std::vector<BasicBlock *> CloneTrace(const std::vector<BasicBlock*> &origTrace);
-
 /// InlineFunction - This function inlines the called function into the basic
 /// block of the caller.  This returns false if it is not possible to inline
 /// this call.  The program is still in a well defined state if this occurs
@@ -183,9 +169,15 @@ std::vector<BasicBlock *> CloneTrace(const std::vector<BasicBlock*> &origTrace);
 /// If a non-null callgraph pointer is provided, these functions update the
 /// CallGraph to represent the program after inlining.
 ///
-bool InlineFunction(CallInst *C, CallGraph *CG = 0, const TargetData *TD = 0);
-bool InlineFunction(InvokeInst *II, CallGraph *CG = 0, const TargetData *TD =0);
-bool InlineFunction(CallSite CS, CallGraph *CG = 0, const TargetData *TD = 0);
+/// If StaticAllocas is non-null, InlineFunction populates it with all of the
+/// static allocas that it inlines into the caller.
+///
+bool InlineFunction(CallInst *C, CallGraph *CG = 0, const TargetData *TD = 0,
+                    SmallVectorImpl<AllocaInst*> *StaticAllocas = 0);
+bool InlineFunction(InvokeInst *II, CallGraph *CG = 0, const TargetData *TD = 0,
+                    SmallVectorImpl<AllocaInst*> *StaticAllocas = 0);
+bool InlineFunction(CallSite CS, CallGraph *CG = 0, const TargetData *TD = 0,
+                    SmallVectorImpl<AllocaInst*> *StaticAllocas = 0);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Utils/FunctionUtils.h b/include/llvm/Transforms/Utils/FunctionUtils.h
index dc7ef238652c..785b08f82917 100644
--- a/include/llvm/Transforms/Utils/FunctionUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionUtils.h
@@ -14,13 +14,13 @@
 #ifndef LLVM_TRANSFORMS_UTILS_FUNCTION_H
 #define LLVM_TRANSFORMS_UTILS_FUNCTION_H
 
-#include "llvm/Analysis/LoopInfo.h"
 #include <vector>
 
 namespace llvm {
   class BasicBlock;
   class DominatorTree;
   class Function;
+  class Loop;
 
   /// ExtractCodeRegion - rip out a sequence of basic blocks into a new function
   ///
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index dd423fa3b173..419029f10ee1 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -27,6 +27,7 @@ class PHINode;
 class AllocaInst;
 class ConstantExpr;
 class TargetData;
+class LLVMContext;
 struct DbgInfoIntrinsic;
 
 template<typename T> class SmallVectorImpl;
@@ -82,7 +83,7 @@ void RecursivelyDeleteDeadPHINode(PHINode *PN);
 /// between them, moving the instructions in the predecessor into BB.  This
 /// deletes the predecessor block.
 ///
-void MergeBasicBlockIntoOnlyPred(BasicBlock *BB);
+void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P = 0);
     
   
 /// SimplifyCFG - This function is used to do simplification of a CFG.  For
@@ -107,7 +108,8 @@ bool FoldBranchToCommonDest(BranchInst *BI);
 /// invalidating the SSA information for the value.  It returns the pointer to
 /// the alloca inserted to create a stack slot for X.
 ///
-AllocaInst *DemoteRegToStack(Instruction &X, bool VolatileLoads = false,
+AllocaInst *DemoteRegToStack(Instruction &X,
+                             bool VolatileLoads = false,
                              Instruction *AllocaPoint = 0);
 
 /// DemotePHIToStack - This function takes a virtual register computed by a phi
diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h
index 35cfaddb7379..71a077e8625e 100644
--- a/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -23,6 +23,7 @@ class AllocaInst;
 class DominatorTree;
 class DominanceFrontier;
 class AliasSetTracker;
+class LLVMContext;
 
 /// isAllocaPromotable - Return true if this alloca is legal for promotion.
 /// This is true if there are only loads and stores to the alloca...
@@ -39,6 +40,7 @@ bool isAllocaPromotable(const AllocaInst *AI);
 ///
 void PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
                      DominatorTree &DT, DominanceFrontier &DF,
+                     LLVMContext &Context,
                      AliasSetTracker *AST = 0);
 
 } // End llvm namespace
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
new file mode 100644
index 000000000000..11b90d426778
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -0,0 +1,108 @@
+//===-- SSAUpdater.h - Unstructured SSA Update Tool -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
+#define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
+
+namespace llvm {
+  class Value;
+  class BasicBlock;
+  class Use;
+  class PHINode;
+  template<typename T>
+  class SmallVectorImpl;
+  
+/// SSAUpdater - This class updates SSA form for a set of values defined in
+/// multiple blocks.  This is used when code duplication or another unstructured
+/// transformation wants to rewrite a set of uses of one value with uses of a
+/// set of values.
+class SSAUpdater {
+  /// AvailableVals - This keeps track of which value to use on a per-block
+  /// basis.  When we insert PHI nodes, we keep track of them here.  We use
+  /// WeakVH's for the value of the map because we RAUW PHI nodes when we
+  /// eliminate them, and want the WeakVH to track this.
+  //typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
+  void *AV;
+  
+  /// PrototypeValue is an arbitrary representative value, which we derive names
+  /// and a type for PHI nodes.
+  Value *PrototypeValue;
+  
+  /// IncomingPredInfo - We use this as scratch space when doing our recursive
+  /// walk.  This should only be used in GetValueInBlockInternal, normally it
+  /// should be empty.
+  //std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > IncomingPredInfo;
+  void *IPI;
+  
+  /// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that
+  /// it creates to the vector.
+  SmallVectorImpl<PHINode*> *InsertedPHIs;
+public:
+  /// SSAUpdater constructor.  If InsertedPHIs is specified, it will be filled
+  /// in with all PHI Nodes created by rewriting.
+  SSAUpdater(SmallVectorImpl<PHINode*> *InsertedPHIs = 0);
+  ~SSAUpdater();
+  
+  /// Initialize - Reset this object to get ready for a new set of SSA
+  /// updates.  ProtoValue is the value used to name PHI nodes.
+  void Initialize(Value *ProtoValue);
+  
+  /// AddAvailableValue - Indicate that a rewritten value is available at the
+  /// end of the specified block with the specified value.
+  void AddAvailableValue(BasicBlock *BB, Value *V);
+
+  /// HasValueForBlock - Return true if the SSAUpdater already has a value for
+  /// the specified block.
+  bool HasValueForBlock(BasicBlock *BB) const;
+  
+  /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+  /// live at the end of the specified block.
+  Value *GetValueAtEndOfBlock(BasicBlock *BB);
+  
+  /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+  /// is live in the middle of the specified block.
+  ///
+  /// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+  /// important case: if there is a definition of the rewritten value after the
+  /// 'use' in BB.  Consider code like this:
+  ///
+  ///      X1 = ...
+  ///   SomeBB:
+  ///      use(X)
+  ///      X2 = ...
+  ///      br Cond, SomeBB, OutBB
+  ///
+  /// In this case, there are two values (X1 and X2) added to the AvailableVals
+  /// set by the client of the rewriter, and those values are both live out of
+  /// their respective blocks.  However, the use of X happens in the *middle* of
+  /// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+  /// merge the appropriate values, and this value isn't live out of the block.
+  ///
+  Value *GetValueInMiddleOfBlock(BasicBlock *BB);
+  
+  /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+  /// which use their value in the corresponding predecessor.  Note that this
+  /// will not work if the use is supposed to be rewritten to a value defined in
+  /// the same block as the use, but above it.  Any 'AddAvailableValue's added
+  /// for the use's block will be considered to be below it.
+  void RewriteUse(Use &U);
+  
+private:
+  Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
+  void operator=(const SSAUpdater&); // DO NOT IMPLEMENT
+  SSAUpdater(const SSAUpdater&);     // DO NOT IMPLEMENT
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Transforms/Utils/SSI.h b/include/llvm/Transforms/Utils/SSI.h
index 59dd6d026ce5..ff5bb7b8614d 100644
--- a/include/llvm/Transforms/Utils/SSI.h
+++ b/include/llvm/Transforms/Utils/SSI.h
@@ -23,7 +23,6 @@
 #define LLVM_TRANSFORMS_UTILS_SSI_H
 
 #include "llvm/Pass.h"
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -44,7 +43,6 @@ namespace llvm {
 
       void getAnalysisUsage(AnalysisUsage &AU) const;
 
-      /// runOnMachineFunction - pass entry point
       bool runOnFunction(Function&);
 
       void createSSI(SmallVectorImpl<Instruction *> &value);
@@ -56,44 +54,36 @@ namespace llvm {
       // Stores variables created by SSI
       SmallPtrSet<Instruction *, 16> created;
 
-      // These variables are only live for each creation
-      unsigned num_values;
-
-      // Has a bit for each variable, true if it needs to be created
-      // and false otherwise
-      BitVector needConstruction;
-
       // Phis created by SSI
-      DenseMap<PHINode *, unsigned> phis;
+      DenseMap<PHINode *, Instruction*> phis;
 
       // Sigmas created by SSI
-      DenseMap<PHINode *, unsigned> sigmas;
+      DenseMap<PHINode *, Instruction*> sigmas;
 
       // Phi nodes that have a phi as operand and has to be fixed
       SmallPtrSet<PHINode *, 1> phisToFix;
 
       // List of definition points for every variable
-      SmallVector<SmallVector<BasicBlock *, 1>, 0> defsites;
+      DenseMap<Instruction*, SmallVector<BasicBlock*, 4> > defsites;
 
       // Basic Block of the original definition of each variable
-      SmallVector<BasicBlock *, 0> value_original;
+      DenseMap<Instruction*, BasicBlock*> value_original;
 
       // Stack of last seen definition of a variable
-      SmallVector<SmallVector<Instruction *, 1>, 0> value_stack;
+      DenseMap<Instruction*, SmallVector<Instruction *, 1> > value_stack;
 
-      void insertSigmaFunctions(SmallVectorImpl<Instruction *> &value);
-      void insertPhiFunctions(SmallVectorImpl<Instruction *> &value);
-      void renameInit(SmallVectorImpl<Instruction *> &value);
+      void insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value);
+      void insertSigma(TerminatorInst *TI, Instruction *I);
+      void insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value);
+      void renameInit(SmallPtrSet<Instruction*, 4> &value);
       void rename(BasicBlock *BB);
 
       void substituteUse(Instruction *I);
       bool dominateAny(BasicBlock *BB, Instruction *value);
       void fixPhis();
 
-      unsigned getPositionPhi(PHINode *PN);
-      unsigned getPositionSigma(PHINode *PN);
-
-      unsigned isUsedInTerminator(CmpInst *CI);
+      Instruction* getPositionPhi(PHINode *PN);
+      Instruction* getPositionSigma(PHINode *PN);
 
       void init(SmallVectorImpl<Instruction *> &value);
       void clean();
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index a9c0bf6968a7..3d5ee1a62b8a 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -16,10 +16,10 @@
 #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
 #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
 
-#include "llvm/Analysis/LoopInfo.h"
-
 namespace llvm {
 
+class Loop;
+class LoopInfo;
 class LPPassManager;
 
 bool UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM);
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index ed3341364181..d31edab5b551 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -20,9 +20,10 @@
 namespace llvm {
   class Value;
   class Instruction;
+  class LLVMContext;
   typedef DenseMap<const Value *, Value *> ValueMapTy;
 
-  Value *MapValue(const Value *V, ValueMapTy &VM);
+  Value *MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context);
   void RemapInstruction(Instruction *I, ValueMapTy &VM);
 } // End llvm namespace
 
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index d439233d8c05..4a470af9ca5b 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -12,11 +12,11 @@
 #define LLVM_TYPE_H
 
 #include "llvm/AbstractTypeUser.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/System/Atomic.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator.h"
 #include <string>
 #include <vector>
 
@@ -66,6 +66,7 @@ public:
   /// value, you can cast to a "DerivedType" subclass (see DerivedTypes.h)
   /// Note: If you add an element to this, you need to add an element to the
   /// Type::getPrimitiveType function, or else things will break!
+  /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
   ///
   enum TypeID {
     // PrimitiveTypes .. make sure LastPrimitiveTyID stays up to date
@@ -105,6 +106,10 @@ private:
   ///
   mutable sys::cas_flag RefCount;
 
+  /// Context - This refers to the LLVMContext in which this type was uniqued.
+  LLVMContext &Context;
+  friend class LLVMContextImpl;
+
   const Type *getForwardedTypeInternal() const;
 
   // Some Type instances are allocated as arrays, some aren't. So we provide
@@ -112,8 +117,10 @@ private:
   void destroy() const; // const is a lie, this does "delete this"!
 
 protected:
-  explicit Type(TypeID id) : ID(id), Abstract(false), SubclassData(0),
-                             RefCount(0), ForwardType(0), NumContainedTys(0),
+  explicit Type(LLVMContext &C, TypeID id) :
+                             ID(id), Abstract(false), SubclassData(0),
+                             RefCount(0), Context(C),
+                             ForwardType(0), NumContainedTys(0),
                              ContainedTys(0) {}
   virtual ~Type() {
     assert(AbstractTypeUsers.empty() && "Abstract types remain");
@@ -160,7 +167,6 @@ protected:
 
 public:
   void print(raw_ostream &O) const;
-  void print(std::ostream &O) const;
 
   /// @brief Debugging support: print to stderr
   void dump() const;
@@ -169,6 +175,9 @@ public:
   /// module).
   void dump(const Module *Context) const;
 
+  /// getContext - Fetch the LLVMContext in which this type was uniqued.
+  LLVMContext &getContext() const { return Context; }
+
   //===--------------------------------------------------------------------===//
   // Property accessors for dealing with types... Some of these virtual methods
   // are defined in private classes defined in Type.cpp for primitive types.
@@ -179,6 +188,30 @@ public:
   ///
   inline TypeID getTypeID() const { return ID; }
 
+  /// isVoidTy - Return true if this is 'void'.
+  bool isVoidTy() const { return ID == VoidTyID; }
+
+  /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
+  bool isFloatTy() const { return ID == FloatTyID; }
+  
+  /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
+  bool isDoubleTy() const { return ID == DoubleTyID; }
+
+  /// isX86_FP80Ty - Return true if this is x86 long double.
+  bool isX86_FP80Ty() const { return ID == X86_FP80TyID; }
+
+  /// isFP128Ty - Return true if this is 'fp128'.
+  bool isFP128Ty() const { return ID == FP128TyID; }
+
+  /// isPPC_FP128Ty - Return true if this is powerpc long double.
+  bool isPPC_FP128Ty() const { return ID == PPC_FP128TyID; }
+
+  /// isLabelTy - Return true if this is 'label'.
+  bool isLabelTy() const { return ID == LabelTyID; }
+
+  /// isMetadataTy - Return true if this is 'metadata'.
+  bool isMetadataTy() const { return ID == MetadataTyID; }
+
   /// getDescription - Return the string representation of the type.
   std::string getDescription() const;
 
@@ -191,7 +224,7 @@ public:
   ///
   bool isIntOrIntVector() const;
   
-  /// isFloatingPoint - Return true if this is one of the two floating point
+  /// isFloatingPoint - Return true if this is one of the five floating point
   /// types
   bool isFloatingPoint() const { return ID == FloatTyID || ID == DoubleTyID ||
       ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; }
@@ -268,6 +301,11 @@ public:
   /// This will return zero if the type does not have a size or is not a
   /// primitive type.
   ///
+  /// Note that this may not reflect the size of memory allocated for an
+  /// instance of the type or the number of bytes that are written when an
+  /// instance of the type is stored to memory. The TargetData class provides
+  /// additional query functions to provide this information.
+  ///
   unsigned getPrimitiveSizeInBits() const;
 
   /// getScalarSizeInBits - If this is a vector type, return the
@@ -292,7 +330,7 @@ public:
   /// getVAArgsPromotedType - Return the type an argument of this type
   /// will be promoted to if passed through a variable argument
   /// function.
-  const Type *getVAArgsPromotedType() const; 
+  const Type *getVAArgsPromotedType(LLVMContext &C) const; 
 
   /// getScalarType - If this is a vector type, return the element type,
   /// otherwise return this.
@@ -324,14 +362,39 @@ public:
   //
 
   /// getPrimitiveType - Return a type based on an identifier.
-  static const Type *getPrimitiveType(TypeID IDNumber);
+  static const Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
 
   //===--------------------------------------------------------------------===//
   // These are the builtin types that are always available...
   //
-  static const Type *VoidTy, *LabelTy, *FloatTy, *DoubleTy, *MetadataTy;
-  static const Type *X86_FP80Ty, *FP128Ty, *PPC_FP128Ty;
-  static const IntegerType *Int1Ty, *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
+  static const Type *getVoidTy(LLVMContext &C);
+  static const Type *getLabelTy(LLVMContext &C);
+  static const Type *getFloatTy(LLVMContext &C);
+  static const Type *getDoubleTy(LLVMContext &C);
+  static const Type *getMetadataTy(LLVMContext &C);
+  static const Type *getX86_FP80Ty(LLVMContext &C);
+  static const Type *getFP128Ty(LLVMContext &C);
+  static const Type *getPPC_FP128Ty(LLVMContext &C);
+  static const IntegerType *getInt1Ty(LLVMContext &C);
+  static const IntegerType *getInt8Ty(LLVMContext &C);
+  static const IntegerType *getInt16Ty(LLVMContext &C);
+  static const IntegerType *getInt32Ty(LLVMContext &C);
+  static const IntegerType *getInt64Ty(LLVMContext &C);
+
+  //===--------------------------------------------------------------------===//
+  // Convenience methods for getting pointer types with one of the above builtin
+  // types as pointee.
+  //
+  static const PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Type *) { return true; }
@@ -367,7 +430,7 @@ public:
 
   /// getPointerTo - Return a pointer to the current type.  This is equivalent
   /// to PointerType::get(Foo, AddrSpace).
-  PointerType *getPointerTo(unsigned AddrSpace = 0) const;
+  const PointerType *getPointerTo(unsigned AddrSpace = 0) const;
 
 private:
   /// isSizedDerivedType - Derived types like structures and arrays are sized
@@ -459,7 +522,6 @@ template <> inline bool isa_impl<PointerType, Type>(const Type &Ty) {
   return Ty.getTypeID() == Type::PointerTyID;
 }
 
-std::ostream &operator<<(std::ostream &OS, const Type &T);
 raw_ostream &operator<<(raw_ostream &OS, const Type &T);
 
 } // End llvm namespace
diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h
index e1459b057941..4dd3a4af2a48 100644
--- a/include/llvm/TypeSymbolTable.h
+++ b/include/llvm/TypeSymbolTable.h
@@ -19,6 +19,8 @@
 
 namespace llvm {
 
+class StringRef;
+
 /// This class provides a symbol table of name/type pairs with operations to
 /// support constructing, searching and iterating over the symbol table. The
 /// class derives from AbstractTypeUser so that the contents of the symbol
@@ -55,14 +57,24 @@ public:
   /// incrementing an integer and appending it to the name, if necessary
   /// @returns the unique name
   /// @brief Get a unique name for a type
-  std::string getUniqueName(const std::string &BaseName) const;
+  std::string getUniqueName(const StringRef &BaseName) const;
 
   /// This method finds the type with the given \p name in the type map
   /// and returns it.
   /// @returns null if the name is not found, otherwise the Type
   /// associated with the \p name.
   /// @brief Lookup a type by name.
-  Type* lookup(const std::string& name) const;
+  Type *lookup(const StringRef &name) const;
+
+  /// Lookup the type associated with name.
+  /// @returns end() if the name is not found, or an iterator at the entry for
+  /// Type.
+  iterator find(const StringRef &name);
+
+  /// Lookup the type associated with name.
+  /// @returns end() if the name is not found, or an iterator at the entry for
+  /// Type.
+  const_iterator find(const StringRef &name) const;
 
   /// @returns true iff the symbol table is empty.
   /// @brief Determine if the symbol table is empty
@@ -102,7 +114,7 @@ public:
   /// a many-to-one mapping between names and types. This method allows a type
   /// with an existing entry in the symbol table to get a new name.
   /// @brief Insert a type under a new name.
-  void insert(const std::string &Name, const Type *Typ);
+  void insert(const StringRef &Name, const Type *Typ);
 
   /// Remove a type at the specified position in the symbol table.
   /// @returns the removed Type.
diff --git a/include/llvm/Use.h b/include/llvm/Use.h
index 489dbc50a041..970f69b9da88 100644
--- a/include/llvm/Use.h
+++ b/include/llvm/Use.h
@@ -26,8 +26,8 @@
 #define LLVM_USE_H
 
 #include "llvm/Support/Casting.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include <iterator>
 
 namespace llvm {
 
@@ -158,8 +158,9 @@ template<> struct simplify_type<const Use> {
 
 
 template<typename UserTy>  // UserTy == 'User' or 'const User'
-class value_use_iterator : public forward_iterator<UserTy*, ptrdiff_t> {
-  typedef forward_iterator<UserTy*, ptrdiff_t> super;
+class value_use_iterator : public std::iterator<std::forward_iterator_tag,
+                                                UserTy*, ptrdiff_t> {
+  typedef std::iterator<std::forward_iterator_tag, UserTy*, ptrdiff_t> super;
   typedef value_use_iterator<UserTy> _Self;
 
   Use *U;
diff --git a/include/llvm/User.h b/include/llvm/User.h
index 8a244068b24a..f8277952ee4b 100644
--- a/include/llvm/User.h
+++ b/include/llvm/User.h
@@ -41,7 +41,6 @@ struct OperandTraits<User> {
   struct Layout {
     typedef U overlay;
   };
-  static inline void *allocate(unsigned);
 };
 
 class User : public Value {
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index a38d8cb8d984..6b393f603a61 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -16,8 +16,9 @@
 
 #include "llvm/AbstractTypeUser.h"
 #include "llvm/Use.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/Casting.h"
-#include <iosfwd>
 #include <string>
 
 namespace llvm {
@@ -40,6 +41,8 @@ typedef StringMapEntry<Value*> ValueName;
 class raw_ostream;
 class AssemblyAnnotationWriter;
 class ValueHandleBase;
+class LLVMContext;
+class MetadataContext;
 
 //===----------------------------------------------------------------------===//
 //                                 Value Class
@@ -61,7 +64,14 @@ class ValueHandleBase;
 class Value {
   const unsigned char SubclassID;   // Subclass identifier (for isa/dyn_cast)
   unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
+  unsigned char HasMetadata : 1;    // Has a metadata attached to this ?
 protected:
+  /// SubclassOptionalData - This member is similar to SubclassData, however it
+  /// is for holding information which may be used to aid optimization, but
+  /// which may be cleared to zero without affecting conservative
+  /// interpretation.
+  unsigned char SubclassOptionalData : 7;
+
   /// SubclassData - This member is defined by this class, but is not used for
   /// anything.  Subclasses can use it to hold whatever state they find useful.
   /// This field is initialized to zero by the ctor.
@@ -73,57 +83,62 @@ private:
   friend class ValueSymbolTable; // Allow ValueSymbolTable to directly mod Name.
   friend class SymbolTable;      // Allow SymbolTable to directly poke Name.
   friend class ValueHandleBase;
+  friend class MetadataContext;
+  friend class AbstractTypeUser;
   ValueName *Name;
 
   void operator=(const Value &);     // Do not implement
   Value(const Value &);              // Do not implement
 
+protected:
+  /// printCustom - Value subclasses can override this to implement custom
+  /// printing behavior.
+  virtual void printCustom(raw_ostream &O) const;
+
 public:
   Value(const Type *Ty, unsigned scid);
   virtual ~Value();
 
   /// dump - Support for debugging, callable in GDB: V->dump()
   //
-  virtual void dump() const;
+  void dump() const;
 
   /// print - Implement operator<< on Value.
   ///
-  void print(std::ostream &O, AssemblyAnnotationWriter *AAW = 0) const;
   void print(raw_ostream &O, AssemblyAnnotationWriter *AAW = 0) const;
 
   /// All values are typed, get the type of this value.
   ///
   inline const Type *getType() const { return VTy; }
 
+  /// All values hold a context through their type.
+  LLVMContext &getContext() const;
+
   // All values can potentially be named...
   inline bool hasName() const { return Name != 0; }
   ValueName *getValueName() const { return Name; }
-
-  /// getNameStart - Return a pointer to a null terminated string for this name.
-  /// Note that names can have null characters within the string as well as at
-  /// their end.  This always returns a non-null pointer.
-  const char *getNameStart() const;
-  /// getNameEnd - Return a pointer to the end of the name.
-  const char *getNameEnd() const { return getNameStart() + getNameLen(); }
-  
-  /// isName - Return true if this value has the name specified by the provided
-  /// nul terminated string.
-  bool isName(const char *N) const;
   
-  /// getNameLen - Return the length of the string, correctly handling nul
-  /// characters embedded into them.
-  unsigned getNameLen() const;
-
-  /// getName()/getNameStr() - Return the name of the specified value, 
-  /// *constructing a string* to hold it.  Because these are guaranteed to
-  /// construct a string, they are very expensive and should be avoided.
-  std::string getName() const { return getNameStr(); }
+  /// getName() - Return a constant reference to the value's name. This is cheap
+  /// and guaranteed to return the same reference as long as the value is not
+  /// modified.
+  ///
+  /// This is currently guaranteed to return a StringRef for which data() points
+  /// to a valid null terminated string. The use of StringRef.data() is 
+  /// deprecated here, however, and clients should not rely on it. If such 
+  /// behavior is needed, clients should use expensive getNameStr(), or switch 
+  /// to an interface that does not depend on null termination.
+  StringRef getName() const;
+
+  /// getNameStr() - Return the name of the specified value, *constructing a
+  /// string* to hold it.  This is guaranteed to construct a string and is very
+  /// expensive, clients should use getName() unless necessary.
   std::string getNameStr() const;
 
-
-  void setName(const std::string &name);
-  void setName(const char *Name, unsigned NameLen);
-  void setName(const char *Name);  // Takes a null-terminated string.
+  /// setName() - Change the name of the value, choosing a new unique name if
+  /// the provided name is taken.
+  ///
+  /// \arg Name - The new name; or "" if the value's name should be removed.
+  void setName(const Twine &Name);
 
   
   /// takeName - transfer the name from V to this value, setting V's name to
@@ -203,15 +218,16 @@ public:
     ConstantStructVal,        // This is an instance of ConstantStruct
     ConstantVectorVal,        // This is an instance of ConstantVector
     ConstantPointerNullVal,   // This is an instance of ConstantPointerNull
-    MDStringVal,              // This is an instance of MDString
     MDNodeVal,                // This is an instance of MDNode
+    MDStringVal,              // This is an instance of MDString
+    NamedMDNodeVal,           // This is an instance of NamedMDNode
     InlineAsmVal,             // This is an instance of InlineAsm
     PseudoSourceValueVal,     // This is an instance of PseudoSourceValue
     InstructionVal,           // This is an instance of Instruction
     
     // Markers:
     ConstantFirstVal = FunctionVal,
-    ConstantLastVal  = MDNodeVal
+    ConstantLastVal  = ConstantPointerNullVal
   };
 
   /// getValueID - Return an ID for the concrete type of this object.  This is
@@ -227,6 +243,25 @@ public:
     return SubclassID;
   }
 
+  /// getRawSubclassOptionalData - Return the raw optional flags value
+  /// contained in this value. This should only be used when testing two
+  /// Values for equivalence.
+  unsigned getRawSubclassOptionalData() const {
+    return SubclassOptionalData;
+  }
+
+  /// hasSameSubclassOptionalData - Test whether the optional flags contained
+  /// in this value are equal to the optional flags in the given value.
+  bool hasSameSubclassOptionalData(const Value *V) const {
+    return SubclassOptionalData == V->SubclassOptionalData;
+  }
+
+  /// intersectOptionalDataWith - Clear any optional flags in this value
+  /// that are not also set in the given value.
+  void intersectOptionalDataWith(const Value *V) {
+    SubclassOptionalData &= V->SubclassOptionalData;
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *) {
     return true; // Values are always values.
@@ -263,12 +298,11 @@ public:
                                 const BasicBlock *PredBB) const{
     return const_cast<Value*>(this)->DoPHITranslation(CurBB, PredBB);
   }
+
+  /// hasMetadata - Return true if metadata is attached with this value.
+  bool hasMetadata() const { return HasMetadata; }
 };
 
-inline std::ostream &operator<<(std::ostream &OS, const Value &V) {
-  V.print(OS);
-  return OS;
-}
 inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) {
   V.print(OS);
   return OS;
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index 752dd2f24fec..4f8ebe800172 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -23,7 +23,9 @@ namespace llvm {
         class SymbolTableListTraits;
   class BasicBlock;
   class Function;
+  class NamedMDNode;
   class Module;
+  class StringRef;
   
 /// This class provides a symbol table of name/value pairs. It is essentially
 /// a std::map<std::string,Value*> but has a controlled interface provided by
@@ -37,6 +39,7 @@ class ValueSymbolTable {
   friend class SymbolTableListTraits<Function, Module>;
   friend class SymbolTableListTraits<GlobalVariable, Module>;
   friend class SymbolTableListTraits<GlobalAlias, Module>;
+  friend class SymbolTableListTraits<NamedMDNode, Module>;
 /// @name Types
 /// @{
 public:
@@ -62,12 +65,11 @@ public:
 /// @{
 public:
 
-  /// This method finds the value with the given \p name in the
+  /// This method finds the value with the given \p Name in the
   /// the symbol table. 
-  /// @returns the value associated with the \p name
+  /// @returns the value associated with the \p Name
   /// @brief Lookup a named Value.
-  Value *lookup(const std::string &name) const;
-  Value *lookup(const char *NameBegin, const char *NameEnd) const;
+  Value *lookup(const StringRef &Name) const { return vmap.lookup(Name); }
 
   /// @returns true iff the symbol table is empty
   /// @brief Determine if the symbol table is empty
@@ -110,7 +112,7 @@ private:
   /// createValueName - This method attempts to create a value name and insert
   /// it into the symbol table with the specified name.  If it conflicts, it
   /// auto-renames the name and returns that instead.
-  ValueName *createValueName(const char *NameStart, unsigned NameLen, Value *V);
+  ValueName *createValueName(const StringRef &Name, Value *V);
   
   /// This method removes a value from the symbol table.  It leaves the
   /// ValueName attached to the value, but it is no longer inserted in the
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index c5523ec4634d..c456990d8ae2 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -88,7 +88,7 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) {
-  return alias(L->getOperand(0), TD->getTypeStoreSize(L->getType()),
+  return alias(L->getOperand(0), getTypeStoreSize(L->getType()),
                P, Size) ? Ref : NoModRef;
 }
 
@@ -97,7 +97,7 @@ AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) {
   // If the stored address cannot alias the pointer in question, then the
   // pointer cannot be modified by the store.
   if (!alias(S->getOperand(1),
-             TD->getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
+             getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
     return NoModRef;
 
   // If the pointer is a pointer to constant memory, then it could not have been
@@ -177,18 +177,23 @@ AliasAnalysis::~AliasAnalysis() {}
 /// AliasAnalysis interface before any other methods are called.
 ///
 void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
-  TD = &P->getAnalysis<TargetData>();
+  TD = P->getAnalysisIfAvailable<TargetData>();
   AA = &P->getAnalysis<AliasAnalysis>();
 }
 
 // getAnalysisUsage - All alias analysis implementations should invoke this
-// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that
-// TargetData is required by the pass.
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
 void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetData>();            // All AA's need TargetData.
   AU.addRequired<AliasAnalysis>();         // All AA's chain
 }
 
+/// getTypeStoreSize - Return the TargetData store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+  return TD ? TD->getTypeStoreSize(Ty) : ~0u;
+}
+
 /// canBasicBlockModify - Return true if it is possible for execution of the
 /// specified basic block to modify the value pointed to by Ptr.
 ///
@@ -228,13 +233,15 @@ bool llvm::isNoAliasCall(const Value *V) {
 
 /// isIdentifiedObject - Return true if this pointer refers to a distinct and
 /// identifiable object.  This returns true for:
-///    Global Variables and Functions
+///    Global Variables and Functions (but not Global Aliases)
 ///    Allocas and Mallocs
 ///    ByVal and NoAlias Arguments
 ///    NoAlias returns
 ///
 bool llvm::isIdentifiedObject(const Value *V) {
-  if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isNoAliasCall(V))
+  if (isa<AllocationInst>(V) || isNoAliasCall(V))
+    return true;
+  if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
     return true;
   if (const Argument *A = dyn_cast<Argument>(V))
     return A->hasNoAliasAttr() || A->hasByValAttr();
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 4362d7d301a8..272c871ce239 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -18,11 +18,12 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<bool>
-PrintAll("count-aa-print-all-queries", cl::ReallyHidden);
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
 static cl::opt<bool>
 PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
 
@@ -41,33 +42,33 @@ namespace {
     }
 
     void printLine(const char *Desc, unsigned Val, unsigned Sum) {
-      cerr <<  "  " << Val << " " << Desc << " responses ("
-           << Val*100/Sum << "%)\n";
+      errs() <<  "  " << Val << " " << Desc << " responses ("
+             << Val*100/Sum << "%)\n";
     }
     ~AliasAnalysisCounter() {
       unsigned AASum = No+May+Must;
       unsigned MRSum = NoMR+JustRef+JustMod+MR;
       if (AASum + MRSum) { // Print a report if any counted queries occurred...
-        cerr << "\n===== Alias Analysis Counter Report =====\n"
-             << "  Analysis counted: " << Name << "\n"
-             << "  " << AASum << " Total Alias Queries Performed\n";
+        errs() << "\n===== Alias Analysis Counter Report =====\n"
+               << "  Analysis counted: " << Name << "\n"
+               << "  " << AASum << " Total Alias Queries Performed\n";
         if (AASum) {
           printLine("no alias",     No, AASum);
           printLine("may alias",   May, AASum);
           printLine("must alias", Must, AASum);
-          cerr << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
-               << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
+          errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+                 << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
         }
 
-        cerr << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
+        errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
         if (MRSum) {
           printLine("no mod/ref",    NoMR, MRSum);
           printLine("ref",        JustRef, MRSum);
           printLine("mod",        JustMod, MRSum);
           printLine("mod/ref",         MR, MRSum);
-          cerr << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum<< "%/"
-               << JustRef*100/MRSum << "%/" << JustMod*100/MRSum << "%/"
-               << MR*100/MRSum <<"%\n\n";
+          errs() << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+                 << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+                 << "%/" << MR*100/MRSum <<"%\n\n";
         }
       }
     }
@@ -89,19 +90,6 @@ namespace {
     bool pointsToConstantMemory(const Value *P) {
       return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P);
     }
-    bool doesNotAccessMemory(CallSite CS) {
-      return getAnalysis<AliasAnalysis>().doesNotAccessMemory(CS);
-    }
-    bool doesNotAccessMemory(Function *F) {
-      return getAnalysis<AliasAnalysis>().doesNotAccessMemory(F);
-    }
-    bool onlyReadsMemory(CallSite CS) {
-      return getAnalysis<AliasAnalysis>().onlyReadsMemory(CS);
-    }
-    bool onlyReadsMemory(Function *F) {
-      return getAnalysis<AliasAnalysis>().onlyReadsMemory(F);
-    }
-
 
     // Forwarding functions: just delegate to a real AA implementation, counting
     // the number of responses...
@@ -131,20 +119,20 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
 
   const char *AliasString;
   switch (R) {
-  default: assert(0 && "Unknown alias type!");
+  default: llvm_unreachable("Unknown alias type!");
   case NoAlias:   No++;   AliasString = "No alias"; break;
   case MayAlias:  May++;  AliasString = "May alias"; break;
   case MustAlias: Must++; AliasString = "Must alias"; break;
   }
 
   if (PrintAll || (PrintAllFailures && R == MayAlias)) {
-    cerr << AliasString << ":\t";
-    cerr << "[" << V1Size << "B] ";
-    WriteAsOperand(*cerr.stream(), V1, true, M);
-    cerr << ", ";
-    cerr << "[" << V2Size << "B] ";
-    WriteAsOperand(*cerr.stream(), V2, true, M);
-    cerr << "\n";
+    errs() << AliasString << ":\t";
+    errs() << "[" << V1Size << "B] ";
+    WriteAsOperand(errs(), V1, true, M);
+    errs() << ", ";
+    errs() << "[" << V2Size << "B] ";
+    WriteAsOperand(errs(), V2, true, M);
+    errs() << "\n";
   }
 
   return R;
@@ -156,7 +144,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
 
   const char *MRString;
   switch (R) {
-  default:       assert(0 && "Unknown mod/ref type!");
+  default:       llvm_unreachable("Unknown mod/ref type!");
   case NoModRef: NoMR++;     MRString = "NoModRef"; break;
   case Ref:      JustRef++;  MRString = "JustRef"; break;
   case Mod:      JustMod++;  MRString = "JustMod"; break;
@@ -164,10 +152,10 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
   }
 
   if (PrintAll || (PrintAllFailures && R == ModRef)) {
-    cerr << MRString << ":  Ptr: ";
-    cerr << "[" << Size << "B] ";
-    WriteAsOperand(*cerr.stream(), P, true, M);
-    cerr << "\t<->" << *CS.getInstruction();
+    errs() << MRString << ":  Ptr: ";
+    errs() << "[" << Size << "B] ";
+    WriteAsOperand(errs(), P, true, M);
+    errs() << "\t<->" << *CS.getInstruction();
   }
   return R;
 }
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 07820e350681..bb95c01e2ea9 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -29,9 +29,8 @@
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
-#include <set>
-#include <sstream>
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
@@ -81,18 +80,21 @@ X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true);
 
 FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
 
-static void PrintResults(const char *Msg, bool P, const Value *V1, const Value *V2,
-                         const Module *M) {
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+                         const Value *V2, const Module *M) {
   if (P) {
-    std::stringstream s1, s2;
-    WriteAsOperand(s1, V1, true, M);
-    WriteAsOperand(s2, V2, true, M);
-    std::string o1(s1.str()), o2(s2.str());
+    std::string o1, o2;
+    {
+      raw_string_ostream os1(o1), os2(o2);
+      WriteAsOperand(os1, V1, true, M);
+      WriteAsOperand(os2, V2, true, M);
+    }
+    
     if (o2 < o1)
-        std::swap(o1, o2);
-    cerr << "  " << Msg << ":\t"
-         << o1 << ", "
-         << o2 << "\n";
+      std::swap(o1, o2);
+    errs() << "  " << Msg << ":\t"
+           << o1 << ", "
+           << o2 << "\n";
   }
 }
 
@@ -100,19 +102,17 @@ static inline void
 PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
                    Module *M) {
   if (P) {
-    cerr << "  " << Msg << ":  Ptr: ";
-    WriteAsOperand(*cerr.stream(), Ptr, true, M);
-    cerr << "\t<->" << *I;
+    errs() << "  " << Msg << ":  Ptr: ";
+    WriteAsOperand(errs(), Ptr, true, M);
+    errs() << "\t<->" << *I << '\n';
   }
 }
 
 bool AAEval::runOnFunction(Function &F) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
-  const TargetData &TD = AA.getTargetData();
-
-  std::set<Value *> Pointers;
-  std::set<CallSite> CallSites;
+  SetVector<Value *> Pointers;
+  SetVector<CallSite> CallSites;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
     if (isa<PointerType>(I->getType()))    // Add all pointer arguments
@@ -136,20 +136,20 @@ bool AAEval::runOnFunction(Function &F) {
 
   if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
       PrintNoModRef || PrintMod || PrintRef || PrintModRef)
-    cerr << "Function: " << F.getName() << ": " << Pointers.size()
-         << " pointers, " << CallSites.size() << " call sites\n";
+    errs() << "Function: " << F.getName() << ": " << Pointers.size()
+           << " pointers, " << CallSites.size() << " call sites\n";
 
   // iterate over the worklist, and run the full (n^2)/2 disambiguations
-  for (std::set<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
-    unsigned I1Size = 0;
+    unsigned I1Size = ~0u;
     const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
-    if (I1ElTy->isSized()) I1Size = TD.getTypeStoreSize(I1ElTy);
+    if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
 
-    for (std::set<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
-      unsigned I2Size = 0;
+    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+      unsigned I2Size = ~0u;
       const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
-      if (I2ElTy->isSized()) I2Size = TD.getTypeStoreSize(I2ElTy);
+      if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
 
       switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
       case AliasAnalysis::NoAlias:
@@ -162,21 +162,21 @@ bool AAEval::runOnFunction(Function &F) {
         PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
         ++MustAlias; break;
       default:
-        cerr << "Unknown alias query result!\n";
+        errs() << "Unknown alias query result!\n";
       }
     }
   }
 
   // Mod/ref alias analysis: compare all pairs of calls and values
-  for (std::set<CallSite>::iterator C = CallSites.begin(),
+  for (SetVector<CallSite>::iterator C = CallSites.begin(),
          Ce = CallSites.end(); C != Ce; ++C) {
     Instruction *I = C->getInstruction();
 
-    for (std::set<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+    for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
          V != Ve; ++V) {
-      unsigned Size = 0;
+      unsigned Size = ~0u;
       const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
-      if (ElTy->isSized()) Size = TD.getTypeStoreSize(ElTy);
+      if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
 
       switch (AA.getModRefInfo(*C, *V, Size)) {
       case AliasAnalysis::NoModRef:
@@ -192,7 +192,7 @@ bool AAEval::runOnFunction(Function &F) {
         PrintModRefResults("  ModRef", PrintModRef, I, *V, F.getParent());
         ++ModRef; break;
       default:
-        cerr << "Unknown alias query result!\n";
+        errs() << "Unknown alias query result!\n";
       }
     }
   }
@@ -201,45 +201,45 @@ bool AAEval::runOnFunction(Function &F) {
 }
 
 static void PrintPercent(unsigned Num, unsigned Sum) {
-  cerr << "(" << Num*100ULL/Sum << "."
-            << ((Num*1000ULL/Sum) % 10) << "%)\n";
+  errs() << "(" << Num*100ULL/Sum << "."
+         << ((Num*1000ULL/Sum) % 10) << "%)\n";
 }
 
 bool AAEval::doFinalization(Module &M) {
   unsigned AliasSum = NoAlias + MayAlias + MustAlias;
-  cerr << "===== Alias Analysis Evaluator Report =====\n";
+  errs() << "===== Alias Analysis Evaluator Report =====\n";
   if (AliasSum == 0) {
-    cerr << "  Alias Analysis Evaluator Summary: No pointers!\n";
+    errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
   } else {
-    cerr << "  " << AliasSum << " Total Alias Queries Performed\n";
-    cerr << "  " << NoAlias << " no alias responses ";
+    errs() << "  " << AliasSum << " Total Alias Queries Performed\n";
+    errs() << "  " << NoAlias << " no alias responses ";
     PrintPercent(NoAlias, AliasSum);
-    cerr << "  " << MayAlias << " may alias responses ";
+    errs() << "  " << MayAlias << " may alias responses ";
     PrintPercent(MayAlias, AliasSum);
-    cerr << "  " << MustAlias << " must alias responses ";
+    errs() << "  " << MustAlias << " must alias responses ";
     PrintPercent(MustAlias, AliasSum);
-    cerr << "  Alias Analysis Evaluator Pointer Alias Summary: "
-         << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
-         << MustAlias*100/AliasSum << "%\n";
+    errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
+           << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
+           << MustAlias*100/AliasSum << "%\n";
   }
 
   // Display the summary for mod/ref analysis
   unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
   if (ModRefSum == 0) {
-    cerr << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
   } else {
-    cerr << "  " << ModRefSum << " Total ModRef Queries Performed\n";
-    cerr << "  " << NoModRef << " no mod/ref responses ";
+    errs() << "  " << ModRefSum << " Total ModRef Queries Performed\n";
+    errs() << "  " << NoModRef << " no mod/ref responses ";
     PrintPercent(NoModRef, ModRefSum);
-    cerr << "  " << Mod << " mod responses ";
+    errs() << "  " << Mod << " mod responses ";
     PrintPercent(Mod, ModRefSum);
-    cerr << "  " << Ref << " ref responses ";
+    errs() << "  " << Ref << " ref responses ";
     PrintPercent(Ref, ModRefSum);
-    cerr << "  " << ModRef << " mod & ref responses ";
+    errs() << "  " << ModRef << " mod & ref responses ";
     PrintPercent(ModRef, ModRefSum);
-    cerr << "  Alias Analysis Evaluator Mod/Ref Summary: "
-         << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
-         << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+    errs() << "  Alias Analysis Evaluator Mod/Ref Summary: "
+           << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
+           << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
   }
 
   return false;
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 18c2b66505f6..b056d0091a09 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -20,8 +20,10 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 /// mergeSetIn - Merge the specified alias set into this alias set.
@@ -186,8 +188,8 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
 
 void AliasSetTracker::clear() {
   // Delete all the PointerRec entries.
-  for (DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.begin(),
-       E = PointerMap.end(); I != E; ++I)
+  for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+       I != E; ++I)
     I->second->eraseFromList();
   
   PointerMap.clear();
@@ -279,7 +281,7 @@ bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
 bool AliasSetTracker::add(LoadInst *LI) {
   bool NewPtr;
   AliasSet &AS = addPointer(LI->getOperand(0),
-                            AA.getTargetData().getTypeStoreSize(LI->getType()),
+                            AA.getTypeStoreSize(LI->getType()),
                             AliasSet::Refs, NewPtr);
   if (LI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -289,7 +291,7 @@ bool AliasSetTracker::add(StoreInst *SI) {
   bool NewPtr;
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
-                            AA.getTargetData().getTypeStoreSize(Val->getType()),
+                            AA.getTypeStoreSize(Val->getType()),
                             AliasSet::Mods, NewPtr);
   if (SI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -411,7 +413,7 @@ bool AliasSetTracker::remove(Value *Ptr, unsigned Size) {
 }
 
 bool AliasSetTracker::remove(LoadInst *LI) {
-  unsigned Size = AA.getTargetData().getTypeStoreSize(LI->getType());
+  unsigned Size = AA.getTypeStoreSize(LI->getType());
   AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size);
   if (!AS) return false;
   remove(*AS);
@@ -419,8 +421,7 @@ bool AliasSetTracker::remove(LoadInst *LI) {
 }
 
 bool AliasSetTracker::remove(StoreInst *SI) {
-  unsigned Size =
-    AA.getTargetData().getTypeStoreSize(SI->getOperand(0)->getType());
+  unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
   AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size);
   if (!AS) return false;
   remove(*AS);
@@ -485,7 +486,7 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
         AS->removeCallSite(CS);
 
   // First, look up the PointerRec for this pointer.
-  DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(PtrVal);
+  PointerMapType::iterator I = PointerMap.find(PtrVal);
   if (I == PointerMap.end()) return;  // Noop
 
   // If we found one, remove the pointer from the alias set it is in.
@@ -511,7 +512,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
   AA.copyValue(From, To);
 
   // First, look up the PointerRec for this pointer.
-  DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(From);
+  PointerMapType::iterator I = PointerMap.find(From);
   if (I == PointerMap.end())
     return;  // Noop
   assert(I->second->hasAliasSet() && "Dead entry?");
@@ -531,15 +532,15 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
 //               AliasSet/AliasSetTracker Printing Support
 //===----------------------------------------------------------------------===//
 
-void AliasSet::print(std::ostream &OS) const {
-  OS << "  AliasSet[" << (void*)this << "," << RefCount << "] ";
+void AliasSet::print(raw_ostream &OS) const {
+  OS << "  AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] ";
   OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
   switch (AccessTy) {
   case NoModRef: OS << "No access "; break;
   case Refs    : OS << "Ref       "; break;
   case Mods    : OS << "Mod       "; break;
   case ModRef  : OS << "Mod/Ref   "; break;
-  default: assert(0 && "Bad value for AccessTy!");
+  default: llvm_unreachable("Bad value for AccessTy!");
   }
   if (isVolatile()) OS << "[volatile] ";
   if (Forward)
@@ -564,7 +565,7 @@ void AliasSet::print(std::ostream &OS) const {
   OS << "\n";
 }
 
-void AliasSetTracker::print(std::ostream &OS) const {
+void AliasSetTracker::print(raw_ostream &OS) const {
   OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
      << PointerMap.size() << " pointer values.\n";
   for (const_iterator I = begin(), E = end(); I != E; ++I)
@@ -572,8 +573,26 @@ void AliasSetTracker::print(std::ostream &OS) const {
   OS << "\n";
 }
 
-void AliasSet::dump() const { print (cerr); }
-void AliasSetTracker::dump() const { print(cerr); }
+void AliasSet::dump() const { print(errs()); }
+void AliasSetTracker::dump() const { print(errs()); }
+
+//===----------------------------------------------------------------------===//
+//                     ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+  assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+  AST->deleteValue(getValPtr());
+  // this now dangles!
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+  : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+  return *this = ASTCallbackVH(V, AST);
+}
 
 //===----------------------------------------------------------------------===//
 //                            AliasSetPrinter Pass
@@ -596,7 +615,7 @@ namespace {
 
       for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
         Tracker->add(&*I);
-      Tracker->print(cerr);
+      Tracker->print(errs());
       delete Tracker;
       return false;
     }
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 493c6e88b3f8..f8cb32321b00 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -9,7 +9,6 @@
 
 #include "llvm-c/Analysis.h"
 #include "llvm/Analysis/Verifier.h"
-#include <fstream>
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index f689dcac305a..2c4efc4985b3 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -22,11 +23,15 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include <algorithm>
 using namespace llvm;
@@ -35,12 +40,8 @@ using namespace llvm;
 // Useful predicates
 //===----------------------------------------------------------------------===//
 
-static const User *isGEP(const Value *V) {
-  if (isa<GetElementPtrInst>(V) ||
-      (isa<ConstantExpr>(V) &&
-       cast<ConstantExpr>(V)->getOpcode() == Instruction::GetElementPtr))
-    return cast<User>(V);
-  return 0;
+static const GEPOperator *isGEP(const Value *V) {
+  return dyn_cast<GEPOperator>(V);
 }
 
 static const Value *GetGEPOperands(const Value *V, 
@@ -103,7 +104,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
 /// isObjectSmallerThan - Return true if we can prove that the object specified
 /// by V is smaller than Size.
 static bool isObjectSmallerThan(const Value *V, unsigned Size,
-                                const TargetData &TD) {
+                                LLVMContext &Context, const TargetData &TD) {
   const Type *AccessTy;
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     AccessTy = GV->getType()->getElementType();
@@ -112,6 +113,12 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size,
       AccessTy = AI->getType()->getElementType();
     else
       return false;
+  } else if (const CallInst* CI = extractMallocCall(V)) {
+    if (!isArrayMalloc(V, Context, &TD))
+      // The size is the argument to the malloc call.
+      if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1)))
+        return (C->getZExtValue() < Size);
+    return false;
   } else if (const Argument *A = dyn_cast<Argument>(V)) {
     if (A->hasByValAttr())
       AccessTy = cast<PointerType>(A->getType())->getElementType();
@@ -142,11 +149,10 @@ namespace {
     explicit NoAA(void *PID) : ImmutablePass(PID) { }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
 
     virtual void initializePass() {
-      TD = &getAnalysis<TargetData>();
+      TD = getAnalysisIfAvailable<TargetData>();
     }
 
     virtual AliasResult alias(const Value *V1, unsigned V1Size,
@@ -156,7 +162,7 @@ namespace {
 
     virtual void getArgumentAccesses(Function *F, CallSite CS,
                                      std::vector<PointerAccessInfo> &Info) {
-      assert(0 && "This method may not be called on this function!");
+      llvm_unreachable("This method may not be called on this function!");
     }
 
     virtual void getMustAliases(Value *P, std::vector<Value*> &RetVals) { }
@@ -196,7 +202,12 @@ namespace {
     static char ID; // Class identification, replacement for typeinfo
     BasicAliasAnalysis() : NoAA(&ID) {}
     AliasResult alias(const Value *V1, unsigned V1Size,
-                      const Value *V2, unsigned V2Size);
+                      const Value *V2, unsigned V2Size) {
+      assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!");
+      AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size);
+      VisitedPHIs.clear();
+      return Alias;
+    }
 
     ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
     ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
@@ -210,6 +221,22 @@ namespace {
     bool pointsToConstantMemory(const Value *P);
 
   private:
+    // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call.
+    SmallSet<const PHINode*, 16> VisitedPHIs;
+
+    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+    // against another.
+    AliasResult aliasGEP(const Value *V1, unsigned V1Size,
+                         const Value *V2, unsigned V2Size);
+
+    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+    // against another.
+    AliasResult aliasPHI(const PHINode *PN, unsigned PNSize,
+                         const Value *V2, unsigned V2Size);
+
+    AliasResult aliasCheck(const Value *V1, unsigned V1Size,
+                           const Value *V2, unsigned V2Size);
+
     // CheckGEPInstructions - Check two GEP instructions with known
     // must-aliasing base pointers.  This checks to see if the index expressions
     // preclude the pointers from aliasing...
@@ -279,6 +306,27 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
       if (!passedAsArg)
         return NoModRef;
     }
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::atomic_cmp_swap:
+      case Intrinsic::atomic_swap:
+      case Intrinsic::atomic_load_add:
+      case Intrinsic::atomic_load_sub:
+      case Intrinsic::atomic_load_and:
+      case Intrinsic::atomic_load_nand:
+      case Intrinsic::atomic_load_or:
+      case Intrinsic::atomic_load_xor:
+      case Intrinsic::atomic_load_max:
+      case Intrinsic::atomic_load_min:
+      case Intrinsic::atomic_load_umax:
+      case Intrinsic::atomic_load_umin:
+        if (alias(II->getOperand(1), Size, P, Size) == NoAlias)
+          return NoModRef;
+        break;
+      }
+    }
   }
 
   // The AliasAnalysis base class has some smarts, lets use them.
@@ -303,71 +351,12 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
   return NoAA::getModRefInfo(CS1, CS2);
 }
 
-
-// alias - Provide a bunch of ad-hoc rules to disambiguate in common cases, such
-// as array references.
+// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+// against another.
 //
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
-                          const Value *V2, unsigned V2Size) {
-  // Strip off any constant expression casts if they exist
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V1))
-    if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType()))
-      V1 = CE->getOperand(0);
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V2))
-    if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType()))
-      V2 = CE->getOperand(0);
-
-  // Are we checking for alias of the same value?
-  if (V1 == V2) return MustAlias;
-
-  if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
-    return NoAlias;  // Scalars cannot alias each other
-
-  // Strip off cast instructions.   Since V1 and V2 are pointers, they must be
-  // pointer<->pointer bitcasts.
-  if (const BitCastInst *I = dyn_cast<BitCastInst>(V1))
-    return alias(I->getOperand(0), V1Size, V2, V2Size);
-  if (const BitCastInst *I = dyn_cast<BitCastInst>(V2))
-    return alias(V1, V1Size, I->getOperand(0), V2Size);
-
-  // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = V1->getUnderlyingObject();
-  const Value *O2 = V2->getUnderlyingObject();
-
-  if (O1 != O2) {
-    // If V1/V2 point to two different objects we know that we have no alias.
-    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
-      return NoAlias;
-  
-    // Arguments can't alias with local allocations or noalias calls.
-    if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) ||
-        (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1))))
-      return NoAlias;
-
-    // Most objects can't alias null.
-    if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) ||
-        (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2)))
-      return NoAlias;
-  }
-  
-  // If the size of one access is larger than the entire object on the other
-  // side, then we know such behavior is undefined and can assume no alias.
-  const TargetData &TD = getTargetData();
-  if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, TD)) ||
-      (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, TD)))
-    return NoAlias;
-  
-  // If one pointer is the result of a call/invoke and the other is a
-  // non-escaping local object, then we know the object couldn't escape to a
-  // point where the call could return it.
-  if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) &&
-      isNonEscapingLocalObject(O2) && O1 != O2)
-    return NoAlias;
-  if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) &&
-      isNonEscapingLocalObject(O1) && O1 != O2)
-    return NoAlias;
-  
+BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size,
+                             const Value *V2, unsigned V2Size) {
   // If we have two gep instructions with must-alias'ing base pointers, figure
   // out if the indexes to the GEP tell us anything about the derived pointer.
   // Note that we also handle chains of getelementptr instructions as well as
@@ -387,8 +376,8 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
         GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() &&
         // All operands are the same, ignoring the base.
         std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1))
-      return alias(GEP1->getOperand(0), V1Size, GEP2->getOperand(0), V2Size);
-    
+      return aliasCheck(GEP1->getOperand(0), V1Size,
+                        GEP2->getOperand(0), V2Size);
     
     // Drill down into the first non-gep value, to test for must-aliasing of
     // the base pointers.
@@ -405,7 +394,7 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
     const Value *BasePtr2 = GEP2->getOperand(0);
 
     // Do the base pointers alias?
-    AliasResult BaseAlias = alias(BasePtr1, ~0U, BasePtr2, ~0U);
+    AliasResult BaseAlias = aliasCheck(BasePtr1, ~0U, BasePtr2, ~0U);
     if (BaseAlias == NoAlias) return NoAlias;
     if (BaseAlias == MustAlias) {
       // If the base pointers alias each other exactly, check to see if we can
@@ -435,79 +424,190 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
   // instruction.  If one pointer is a GEP with a non-zero index of the other
   // pointer, we know they cannot alias.
   //
-  if (isGEP(V2)) {
-    std::swap(V1, V2);
-    std::swap(V1Size, V2Size);
-  }
+  if (V1Size == ~0U || V2Size == ~0U)
+    return MayAlias;
 
-  if (V1Size != ~0U && V2Size != ~0U)
-    if (isGEP(V1)) {
-      SmallVector<Value*, 16> GEPOperands;
-      const Value *BasePtr = GetGEPOperands(V1, GEPOperands);
-
-      AliasResult R = alias(BasePtr, V1Size, V2, V2Size);
-      if (R == MustAlias) {
-        // If there is at least one non-zero constant index, we know they cannot
-        // alias.
-        bool ConstantFound = false;
-        bool AllZerosFound = true;
-        for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i)
-          if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) {
-            if (!C->isNullValue()) {
-              ConstantFound = true;
-              AllZerosFound = false;
-              break;
-            }
-          } else {
-            AllZerosFound = false;
-          }
+  SmallVector<Value*, 16> GEPOperands;
+  const Value *BasePtr = GetGEPOperands(V1, GEPOperands);
+
+  AliasResult R = aliasCheck(BasePtr, ~0U, V2, V2Size);
+  if (R != MustAlias)
+    // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+    // If V2 is known not to alias GEP base pointer, then the two values
+    // cannot alias per GEP semantics: "A pointer value formed from a
+    // getelementptr instruction is associated with the addresses associated
+    // with the first operand of the getelementptr".
+    return R;
+
+  // If there is at least one non-zero constant index, we know they cannot
+  // alias.
+  bool ConstantFound = false;
+  bool AllZerosFound = true;
+  for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i)
+    if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) {
+      if (!C->isNullValue()) {
+        ConstantFound = true;
+        AllZerosFound = false;
+        break;
+      }
+    } else {
+      AllZerosFound = false;
+    }
 
-        // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases
-        // the ptr, the end result is a must alias also.
-        if (AllZerosFound)
-          return MustAlias;
+  // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases
+  // the ptr, the end result is a must alias also.
+  if (AllZerosFound)
+    return MustAlias;
 
-        if (ConstantFound) {
-          if (V2Size <= 1 && V1Size <= 1)  // Just pointer check?
-            return NoAlias;
+  if (ConstantFound) {
+    if (V2Size <= 1 && V1Size <= 1)  // Just pointer check?
+      return NoAlias;
 
-          // Otherwise we have to check to see that the distance is more than
-          // the size of the argument... build an index vector that is equal to
-          // the arguments provided, except substitute 0's for any variable
-          // indexes we find...
-          if (cast<PointerType>(
-                BasePtr->getType())->getElementType()->isSized()) {
-            for (unsigned i = 0; i != GEPOperands.size(); ++i)
-              if (!isa<ConstantInt>(GEPOperands[i]))
-                GEPOperands[i] =
-                  Constant::getNullValue(GEPOperands[i]->getType());
-            int64_t Offset =
-              getTargetData().getIndexedOffset(BasePtr->getType(),
-                                               &GEPOperands[0],
-                                               GEPOperands.size());
-
-            if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size)
-              return NoAlias;
-          }
-        }
-      }
+    // Otherwise we have to check to see that the distance is more than
+    // the size of the argument... build an index vector that is equal to
+    // the arguments provided, except substitute 0's for any variable
+    // indexes we find...
+    if (TD &&
+        cast<PointerType>(BasePtr->getType())->getElementType()->isSized()) {
+      for (unsigned i = 0; i != GEPOperands.size(); ++i)
+        if (!isa<ConstantInt>(GEPOperands[i]))
+          GEPOperands[i] = Constant::getNullValue(GEPOperands[i]->getType());
+      int64_t Offset = TD->getIndexedOffset(BasePtr->getType(),
+                                            &GEPOperands[0],
+                                            GEPOperands.size());
+
+      if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size)
+        return NoAlias;
     }
+  }
+
+  return MayAlias;
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
+                             const Value *V2, unsigned V2Size) {
+  // The PHI node has already been visited, avoid recursion any further.
+  if (!VisitedPHIs.insert(PN))
+    return MayAlias;
+
+  SmallSet<Value*, 4> UniqueSrc;
+  SmallVector<Value*, 4> V1Srcs;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PV1 = PN->getIncomingValue(i);
+    if (isa<PHINode>(PV1))
+      // If any of the source itself is a PHI, return MayAlias conservatively
+      // to avoid compile time explosion. The worst possible case is if both
+      // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+      // and 'n' are the number of PHI sources.
+      return MayAlias;
+    if (UniqueSrc.insert(PV1))
+      V1Srcs.push_back(PV1);
+  }
+
+  AliasResult Alias = aliasCheck(V1Srcs[0], PNSize, V2, V2Size);
+  // Early exit if the check of the first PHI source against V2 is MayAlias.
+  // Other results are not possible.
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+    Value *V = V1Srcs[i];
+    AliasResult ThisAlias = aliasCheck(V, PNSize, V2, V2Size);
+    if (ThisAlias != Alias || ThisAlias == MayAlias)
+      return MayAlias;
+  }
+
+  return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
+                               const Value *V2, unsigned V2Size) {
+  // Strip off any casts if they exist.
+  V1 = V1->stripPointerCasts();
+  V2 = V2->stripPointerCasts();
+
+  // Are we checking for alias of the same value?
+  if (V1 == V2) return MustAlias;
+
+  if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
+    return NoAlias;  // Scalars cannot alias each other
+
+  // Figure out what objects these things are pointing to if we can.
+  const Value *O1 = V1->getUnderlyingObject();
+  const Value *O2 = V2->getUnderlyingObject();
+
+  if (O1 != O2) {
+    // If V1/V2 point to two different objects we know that we have no alias.
+    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+      return NoAlias;
+  
+    // Arguments can't alias with local allocations or noalias calls.
+    if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) ||
+        (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1))))
+      return NoAlias;
+
+    // Most objects can't alias null.
+    if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) ||
+        (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2)))
+      return NoAlias;
+  }
+  
+  // If the size of one access is larger than the entire object on the other
+  // side, then we know such behavior is undefined and can assume no alias.
+  LLVMContext &Context = V1->getContext();
+  if (TD)
+    if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) ||
+        (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD)))
+      return NoAlias;
+  
+  // If one pointer is the result of a call/invoke and the other is a
+  // non-escaping local object, then we know the object couldn't escape to a
+  // point where the call could return it.
+  if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) &&
+      isNonEscapingLocalObject(O2) && O1 != O2)
+    return NoAlias;
+  if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) &&
+      isNonEscapingLocalObject(O1) && O1 != O2)
+    return NoAlias;
+
+  if (!isGEP(V1) && isGEP(V2)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (isGEP(V1))
+    return aliasGEP(V1, V1Size, V2, V2Size);
+
+  if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const PHINode *PN = dyn_cast<PHINode>(V1))
+    return aliasPHI(PN, V1Size, V2, V2Size);
 
   return MayAlias;
 }
 
 // This function is used to determine if the indices of two GEP instructions are
 // equal. V1 and V2 are the indices.
-static bool IndexOperandsEqual(Value *V1, Value *V2) {
+static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) {
   if (V1->getType() == V2->getType())
     return V1 == V2;
   if (Constant *C1 = dyn_cast<Constant>(V1))
     if (Constant *C2 = dyn_cast<Constant>(V2)) {
       // Sign extend the constants to long types, if necessary
-      if (C1->getType() != Type::Int64Ty)
-        C1 = ConstantExpr::getSExt(C1, Type::Int64Ty);
-      if (C2->getType() != Type::Int64Ty) 
-        C2 = ConstantExpr::getSExt(C2, Type::Int64Ty);
+      if (C1->getType() != Type::getInt64Ty(Context))
+        C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
+      if (C2->getType() != Type::getInt64Ty(Context)) 
+        C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
       return C1 == C2;
     }
   return false;
@@ -528,6 +628,8 @@ BasicAliasAnalysis::CheckGEPInstructions(
 
   const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
 
+  LLVMContext &Context = GEPPointerTy->getContext();
+
   // Find the (possibly empty) initial sequence of equal values... which are not
   // necessarily constants.
   unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
@@ -535,7 +637,8 @@ BasicAliasAnalysis::CheckGEPInstructions(
   unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands);
   unsigned UnequalOper = 0;
   while (UnequalOper != MinOperands &&
-         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) {
+         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper],
+         Context)) {
     // Advance through the type as we go...
     ++UnequalOper;
     if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
@@ -599,10 +702,10 @@ BasicAliasAnalysis::CheckGEPInstructions(
         if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){
           if (G1OC->getType() != G2OC->getType()) {
             // Sign extend both operands to long.
-            if (G1OC->getType() != Type::Int64Ty)
-              G1OC = ConstantExpr::getSExt(G1OC, Type::Int64Ty);
-            if (G2OC->getType() != Type::Int64Ty) 
-              G2OC = ConstantExpr::getSExt(G2OC, Type::Int64Ty);
+            if (G1OC->getType() != Type::getInt64Ty(Context))
+              G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context));
+            if (G2OC->getType() != Type::getInt64Ty(Context)) 
+              G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context));
             GEP1Ops[FirstConstantOper] = G1OC;
             GEP2Ops[FirstConstantOper] = G2OC;
           }
@@ -673,6 +776,10 @@ BasicAliasAnalysis::CheckGEPInstructions(
   // However, one GEP may have more operands than the other.  If this is the
   // case, there may still be hope.  Check this now.
   if (FirstConstantOper == MinOperands) {
+    // Without TargetData, we won't know what the offsets are.
+    if (!TD)
+      return MayAlias;
+
     // Make GEP1Ops be the longer one if there is a longer one.
     if (NumGEP1Ops < NumGEP2Ops) {
       std::swap(GEP1Ops, GEP2Ops);
@@ -692,13 +799,12 @@ BasicAliasAnalysis::CheckGEPInstructions(
               GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType());
           // Okay, now get the offset.  This is the relative offset for the full
           // instruction.
-          const TargetData &TD = getTargetData();
-          int64_t Offset1 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops,
-                                                NumGEP1Ops);
+          int64_t Offset1 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops,
+                                                 NumGEP1Ops);
 
           // Now check without any constants at the end.
-          int64_t Offset2 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops,
-                                                MinOperands);
+          int64_t Offset2 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops,
+                                                 MinOperands);
 
           // Make sure we compare the absolute difference.
           if (Offset1 > Offset2)
@@ -734,7 +840,8 @@ BasicAliasAnalysis::CheckGEPInstructions(
   const Type *ZeroIdxTy = GEPPointerTy;
   for (unsigned i = 0; i != FirstConstantOper; ++i) {
     if (!isa<StructType>(ZeroIdxTy))
-      GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Type::Int32Ty);
+      GEP1Ops[i] = GEP2Ops[i] = 
+                              Constant::getNullValue(Type::getInt32Ty(Context));
 
     if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy))
       ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]);
@@ -775,9 +882,13 @@ BasicAliasAnalysis::CheckGEPInstructions(
           // value possible.
           //
           if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty))
-            GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,AT->getNumElements()-1);
+            GEP1Ops[i] =
+                  ConstantInt::get(Type::getInt64Ty(Context), 
+                                   AT->getNumElements()-1);
           else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty))
-            GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,VT->getNumElements()-1);
+            GEP1Ops[i] = 
+                  ConstantInt::get(Type::getInt64Ty(Context),
+                                   VT->getNumElements()-1);
         }
       }
 
@@ -812,11 +923,11 @@ BasicAliasAnalysis::CheckGEPInstructions(
     }
   }
 
-  if (GEPPointerTy->getElementType()->isSized()) {
+  if (TD && GEPPointerTy->getElementType()->isSized()) {
     int64_t Offset1 =
-      getTargetData().getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops);
+      TD->getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops);
     int64_t Offset2 = 
-      getTargetData().getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops);
+      TD->getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops);
     assert(Offset1 != Offset2 &&
            "There is at least one different constant here!");
     
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 8ada5a3f74cd..6fed4005d193 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -25,38 +25,36 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/GraphWriter.h"
-#include "llvm/Config/config.h"
-#include <iosfwd>
-#include <sstream>
-#include <fstream>
 using namespace llvm;
 
 namespace llvm {
 template<>
 struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   static std::string getGraphName(const Function *F) {
-    return "CFG for '" + F->getName() + "' function";
+    return "CFG for '" + F->getNameStr() + "' function";
   }
 
   static std::string getNodeLabel(const BasicBlock *Node,
                                   const Function *Graph,
                                   bool ShortNames) {
     if (ShortNames && !Node->getName().empty())
-      return Node->getName() + ":";
+      return Node->getNameStr() + ":";
+
+    std::string Str;
+    raw_string_ostream OS(Str);
 
-    std::ostringstream Out;
     if (ShortNames) {
-      WriteAsOperand(Out, Node, false);
-      return Out.str();
+      WriteAsOperand(OS, Node, false);
+      return OS.str();
     }
 
     if (Node->getName().empty()) {
-      WriteAsOperand(Out, Node, false);
-      Out << ":";
+      WriteAsOperand(OS, Node, false);
+      OS << ":";
     }
-
-    Out << *Node;
-    std::string OutStr = Out.str();
+    
+    OS << *Node;
+    std::string OutStr = OS.str();
     if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
 
     // Process string output to make it nicer...
@@ -94,7 +92,7 @@ namespace {
       return false;
     }
 
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -112,11 +110,11 @@ namespace {
     CFGOnlyViewer() : FunctionPass(&ID) {}
 
     virtual bool runOnFunction(Function &F) {
-      F.viewCFG();
+      F.viewCFGOnly();
       return false;
     }
 
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -136,19 +134,21 @@ namespace {
     explicit CFGPrinter(void *pid) : FunctionPass(pid) {}
 
     virtual bool runOnFunction(Function &F) {
-      std::string Filename = "cfg." + F.getName() + ".dot";
-      cerr << "Writing '" << Filename << "'...";
-      std::ofstream File(Filename.c_str());
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
+      
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
 
-      if (File.good())
+      if (ErrorInfo.empty())
         WriteGraph(File, (const Function*)&F);
       else
-        cerr << "  error opening file for writing!";
-      cerr << "\n";
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
       return false;
     }
 
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -166,18 +166,20 @@ namespace {
     CFGOnlyPrinter() : FunctionPass(&ID) {}
     explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
     virtual bool runOnFunction(Function &F) {
-      std::string Filename = "cfg." + F.getName() + ".dot";
-      cerr << "Writing '" << Filename << "'...";
-      std::ofstream File(Filename.c_str());
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
 
-      if (File.good())
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+      
+      if (ErrorInfo.empty())
         WriteGraph(File, (const Function*)&F, true);
       else
-        cerr << "  error opening file for writing!";
-      cerr << "\n";
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
       return false;
     }
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -196,7 +198,7 @@ P2("dot-cfg-only",
 /// being a 'dot' and 'gv' program in your path.
 ///
 void Function::viewCFG() const {
-  ViewGraph(this, "cfg" + getName());
+  ViewGraph(this, "cfg" + getNameStr());
 }
 
 /// viewCFGOnly - This function is meant for use from the debugger.  It works
@@ -205,7 +207,7 @@ void Function::viewCFG() const {
 /// his can make the graph smaller.
 ///
 void Function::viewCFGOnly() const {
-  ViewGraph(this, "cfg" + getName(), true);
+  ViewGraph(this, "cfg" + getNameStr(), true);
 }
 
 FunctionPass *llvm::createCFGPrinterPass () {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 6f2a06c7ac8f..1d2f118bb446 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,28 +6,33 @@ add_llvm_library(LLVMAnalysis
   AliasSetTracker.cpp
   Analysis.cpp
   BasicAliasAnalysis.cpp
-  CaptureTracking.cpp
   CFGPrinter.cpp
+  CaptureTracking.cpp
   ConstantFolding.cpp
   DbgInfoPrinter.cpp
   DebugInfo.cpp
+  IVUsers.cpp
+  InlineCost.cpp
   InstCount.cpp
   Interval.cpp
   IntervalPartition.cpp
-  IVUsers.cpp
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
   LiveValues.cpp
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
   LoopPass.cpp
-  LoopVR.cpp
+  MallocHelper.cpp
   MemoryDependenceAnalysis.cpp
+  PointerTracking.cpp
   PostDominators.cpp
+  ProfileEstimatorPass.cpp
   ProfileInfo.cpp
   ProfileInfoLoader.cpp
   ProfileInfoLoaderPass.cpp
+  ProfileVerifierPass.cpp
   ScalarEvolution.cpp
+  ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionExpander.cpp
   SparsePropagation.cpp
   Trace.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index a19b8e4f94db..b30ac719ae0e 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -54,7 +54,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
       // its return value and doesn't unwind (a readonly function can leak bits
       // by throwing an exception or not depending on the input value).
       if (CS.onlyReadsMemory() && CS.doesNotThrow() &&
-          I->getType() == Type::VoidTy)
+          I->getType() == Type::getVoidTy(V->getContext()))
         break;
 
       // Not captured if only passed via 'nocapture' arguments.  Note that
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5aa4d56c4e67..0ce1c24bed67 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1,4 +1,4 @@
-//===-- ConstantFolding.cpp - Analyze constant folding possibilities ------===//
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This family of functions determines the possibility of performing constant
-// folding.
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,9 +23,11 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include <cerrno>
@@ -92,7 +98,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
 /// these together.  If target data info is available, it is provided as TD, 
 /// otherwise TD is null.
 static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
-                                           Constant *Op1, const TargetData *TD){
+                                           Constant *Op1, const TargetData *TD,
+                                           LLVMContext &Context){
   // SROA
   
   // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
@@ -121,40 +128,103 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 /// constant expression, do so.
 static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
                                          const Type *ResultTy,
+                                         LLVMContext &Context,
                                          const TargetData *TD) {
   Constant *Ptr = Ops[0];
   if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
     return 0;
-  
-  uint64_t BasePtr = 0;
+
+  unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context));
+  APInt BasePtr(BitWidth, 0);
+  bool BaseIsInt = true;
   if (!Ptr->isNullValue()) {
     // If this is a inttoptr from a constant int, we can fold this as the base,
     // otherwise we can't.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
       if (CE->getOpcode() == Instruction::IntToPtr)
-        if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
-          BasePtr = Base->getZExtValue();
+        if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+          BasePtr = Base->getValue();
+          BasePtr.zextOrTrunc(BitWidth);
+        }
     
     if (BasePtr == 0)
-      return 0;
+      BaseIsInt = false;
   }
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
   for (unsigned i = 1; i != NumOps; ++i)
     if (!isa<ConstantInt>(Ops[i]))
-      return false;
+      return 0;
   
-  uint64_t Offset = TD->getIndexedOffset(Ptr->getType(),
-                                         (Value**)Ops+1, NumOps-1);
-  Constant *C = ConstantInt::get(TD->getIntPtrType(), Offset+BasePtr);
-  return ConstantExpr::getIntToPtr(C, ResultTy);
+  APInt Offset = APInt(BitWidth,
+                       TD->getIndexedOffset(Ptr->getType(),
+                                            (Value**)Ops+1, NumOps-1));
+  // If the base value for this address is a literal integer value, fold the
+  // getelementptr to the resulting integer value casted to the pointer type.
+  if (BaseIsInt) {
+    Constant *C = ConstantInt::get(Context, Offset+BasePtr);
+    return ConstantExpr::getIntToPtr(C, ResultTy);
+  }
+
+  // Otherwise form a regular getelementptr. Recompute the indices so that
+  // we eliminate over-indexing of the notional static type array bounds.
+  // This makes it easy to determine if the getelementptr is "inbounds".
+  // Also, this helps GlobalOpt do SROA on GlobalVariables.
+  const Type *Ty = Ptr->getType();
+  SmallVector<Constant*, 32> NewIdxs;
+  do {
+    if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+      // The only pointer indexing we'll do is on the first index of the GEP.
+      if (isa<PointerType>(ATy) && !NewIdxs.empty())
+        break;
+      // Determine which element of the array the offset points into.
+      APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+      if (ElemSize == 0)
+        return 0;
+      APInt NewIdx = Offset.udiv(ElemSize);
+      Offset -= NewIdx * ElemSize;
+      NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx));
+      Ty = ATy->getElementType();
+    } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      // Determine which field of the struct the offset points into. The
+      // getZExtValue is at least as safe as the StructLayout API because we
+      // know the offset is within the struct at this point.
+      const StructLayout &SL = *TD->getStructLayout(STy);
+      unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx));
+      Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+      Ty = STy->getTypeAtIndex(ElIdx);
+    } else {
+      // We've reached some non-indexable type.
+      break;
+    }
+  } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+  // If we haven't used up the entire offset by descending the static
+  // type, then the offset is pointing into the middle of an indivisible
+  // member, so we can't simplify it.
+  if (Offset != 0)
+    return 0;
+
+  // Create a GEP.
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+  assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+         "Computed GetElementPtr has unexpected type!");
+
+  // If we ended up indexing a member with a type that doesn't match
+  // the type of what the original indices indexed, add a cast.
+  if (Ty != cast<PointerType>(ResultTy)->getElementType())
+    C = ConstantExpr::getBitCast(C, ResultTy);
+
+  return C;
 }
 
 /// FoldBitCast - Constant fold bitcast, symbolically evaluating it with 
 /// targetdata.  Return 0 if unfoldable.
 static Constant *FoldBitCast(Constant *C, const Type *DestTy,
-                             const TargetData &TD) {
+                             const TargetData &TD, LLVMContext &Context) {
   // If this is a bitcast from constant vector -> vector, fold it.
   if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
     if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
@@ -180,10 +250,10 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
       if (DstEltTy->isFloatingPoint()) {
         // Fold to an vector of integers with same size as our FP type.
         unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
-        const Type *DestIVTy = VectorType::get(IntegerType::get(FPWidth),
-                                               NumDstElt);
+        const Type *DestIVTy = VectorType::get(
+                                 IntegerType::get(Context, FPWidth), NumDstElt);
         // Recursively handle this integer conversion, if possible.
-        C = FoldBitCast(C, DestIVTy, TD);
+        C = FoldBitCast(C, DestIVTy, TD, Context);
         if (!C) return 0;
         
         // Finally, VMCore can handle this now that #elts line up.
@@ -194,8 +264,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
       // it to integer first.
       if (SrcEltTy->isFloatingPoint()) {
         unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
-        const Type *SrcIVTy = VectorType::get(IntegerType::get(FPWidth),
-                                              NumSrcElt);
+        const Type *SrcIVTy = VectorType::get(
+                                 IntegerType::get(Context, FPWidth), NumSrcElt);
         // Ask VMCore to do the conversion now that #elts line up.
         C = ConstantExpr::getBitCast(C, SrcIVTy);
         CV = dyn_cast<ConstantVector>(C);
@@ -228,7 +298,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
             
             // Shift it to the right place, depending on endianness.
             Src = ConstantExpr::getShl(Src, 
-                                    ConstantInt::get(Src->getType(), ShiftAmt));
+                             ConstantInt::get(Src->getType(), ShiftAmt));
             ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
             
             // Mix it in.
@@ -251,7 +321,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
             // Shift the piece of the value into the right place, depending on
             // endianness.
             Constant *Elt = ConstantExpr::getLShr(Src, 
-                                ConstantInt::get(Src->getType(), ShiftAmt));
+                            ConstantInt::get(Src->getType(), ShiftAmt));
             ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
 
             // Truncate and remember this piece.
@@ -278,7 +348,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
 /// is returned.  Note that this function can only fail when attempting to fold
 /// instructions like loads and stores, which have no constant expression form.
 ///
-Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
+                                        const TargetData *TD) {
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     if (PN->getNumIncomingValues() == 0)
       return UndefValue::get(PN->getType());
@@ -306,16 +377,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           Ops.data(), Ops.size(), TD);
-  else
-    return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                    Ops.data(), Ops.size(), TD);
+                                           Ops.data(), Ops.size(), 
+                                           Context, TD);
+  
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  Ops.data(), Ops.size(), Context, TD);
 }
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
+                                               LLVMContext &Context,
                                                const TargetData *TD) {
   SmallVector<Constant*, 8> Ops;
   for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
@@ -323,10 +396,10 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
 
   if (CE->isCompare())
     return ConstantFoldCompareInstOperands(CE->getPredicate(),
-                                           Ops.data(), Ops.size(), TD);
-  else 
-    return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
-                                    Ops.data(), Ops.size(), TD);
+                                           Ops.data(), Ops.size(), 
+                                           Context, TD);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+                                  Ops.data(), Ops.size(), Context, TD);
 }
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -337,11 +410,13 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
 ///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, 
                                          Constant* const* Ops, unsigned NumOps,
+                                         LLVMContext &Context,
                                          const TargetData *TD) {
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
     if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
-      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD,
+                                                  Context))
         return C;
     
     return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
@@ -356,9 +431,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     return 0;
   case Instruction::ICmp:
   case Instruction::FCmp:
-  case Instruction::VICmp:
-  case Instruction::VFCmp:
-    assert(0 &&"This function is invalid for compares: no predicate specified");
+    llvm_unreachable("This function is invalid for compares: no predicate specified");
   case Instruction::PtrToInt:
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
     // the width of a pointer, so it can't be done in ConstantExpr::getCast.
@@ -368,7 +441,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
         if (TD->getPointerSizeInBits() < InWidth) {
           Constant *Mask = 
-            ConstantInt::get(APInt::getLowBitsSet(InWidth,
+            ConstantInt::get(Context, APInt::getLowBitsSet(InWidth,
                                                   TD->getPointerSizeInBits()));
           Input = ConstantExpr::getAnd(Input, Mask);
         }
@@ -387,7 +460,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
           CE->getType()->getScalarSizeInBits()) {
         if (CE->getOpcode() == Instruction::PtrToInt) {
           Constant *Input = CE->getOperand(0);
-          Constant *C = FoldBitCast(Input, DestTy, *TD);
+          Constant *C = FoldBitCast(Input, DestTy, *TD, Context);
           return C ? C : ConstantExpr::getBitCast(Input, DestTy);
         }
         // If there's a constant offset added to the integer value before
@@ -412,9 +485,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
                                             AT->getNumElements()))) {
                         Constant *Index[] = {
                           Constant::getNullValue(CE->getType()),
-                          ConstantInt::get(ElemIdx)
+                          ConstantInt::get(Context, ElemIdx)
                         };
-                        return ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
+                        return
+                        ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
                       }
                     }
                   }
@@ -434,7 +508,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
       return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::BitCast:
     if (TD)
-      if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD))
+      if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD, Context))
         return C;
     return ConstantExpr::getBitCast(Ops[0], DestTy);
   case Instruction::Select:
@@ -446,7 +520,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD))
       return C;
     
     return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
@@ -460,6 +534,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                 Constant*const * Ops, 
                                                 unsigned NumOps,
+                                                LLVMContext &Context,
                                                 const TargetData *TD) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
@@ -470,14 +545,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
   // around to know if bit truncation is happening.
   if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) {
     if (TD && Ops[1]->isNullValue()) {
-      const Type *IntPtrTy = TD->getIntPtrType();
+      const Type *IntPtrTy = TD->getIntPtrType(Context);
       if (CE0->getOpcode() == Instruction::IntToPtr) {
         // Convert the integer value to the right size to ensure we get the
         // proper extension or truncation.
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
                                                    IntPtrTy, false);
         Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
-        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
+                                               Context, TD);
       }
       
       // Only do this transformation if the int is intptrty in size, otherwise
@@ -487,13 +563,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
         Constant *C = CE0->getOperand(0);
         Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
         // FIXME!
-        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
+                                               Context, TD);
       }
     }
     
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) {
       if (TD && CE0->getOpcode() == CE1->getOpcode()) {
-        const Type *IntPtrTy = TD->getIntPtrType();
+        const Type *IntPtrTy = TD->getIntPtrType(Context);
 
         if (CE0->getOpcode() == Instruction::IntToPtr) {
           // Convert the integer value to the right size to ensure we get the
@@ -503,7 +580,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
           Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
                                                       IntPtrTy, false);
           Constant *NewOps[] = { C0, C1 };
-          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, 
+                                                 Context, TD);
         }
 
         // Only do this transformation if the int is intptrty in size, otherwise
@@ -514,7 +592,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
           Constant *NewOps[] = { 
             CE0->getOperand(0), CE1->getOperand(0) 
           };
-          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, 
+                                                 Context, TD);
         }
       }
     }
@@ -597,74 +676,47 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
     return true;
-  default: break;
+  default:
+    return false;
+  case 0: break;
   }
 
   if (!F->hasName()) return false;
-  const char *Str = F->getNameStart();
-  unsigned Len = F->getNameLen();
+  StringRef Name = F->getName();
   
   // In these cases, the check of the length is required.  We don't want to
   // return true for a name like "cos\0blah" which strcmp would return equal to
   // "cos", but has length 8.
-  switch (Str[0]) {
+  switch (Name[0]) {
   default: return false;
   case 'a':
-    if (Len == 4)
-      return !strcmp(Str, "acos") || !strcmp(Str, "asin") ||
-             !strcmp(Str, "atan");
-    else if (Len == 5)
-      return !strcmp(Str, "atan2");
-    return false;
+    return Name == "acos" || Name == "asin" || 
+      Name == "atan" || Name == "atan2";
   case 'c':
-    if (Len == 3)
-      return !strcmp(Str, "cos");
-    else if (Len == 4)
-      return !strcmp(Str, "ceil") || !strcmp(Str, "cosf") ||
-             !strcmp(Str, "cosh");
-    return false;
+    return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
   case 'e':
-    if (Len == 3)
-      return !strcmp(Str, "exp");
-    return false;
+    return Name == "exp";
   case 'f':
-    if (Len == 4)
-      return !strcmp(Str, "fabs") || !strcmp(Str, "fmod");
-    else if (Len == 5)
-      return !strcmp(Str, "floor");
-    return false;
-    break;
+    return Name == "fabs" || Name == "fmod" || Name == "floor";
   case 'l':
-    if (Len == 3 && !strcmp(Str, "log"))
-      return true;
-    if (Len == 5 && !strcmp(Str, "log10"))
-      return true;
-    return false;
+    return Name == "log" || Name == "log10";
   case 'p':
-    if (Len == 3 && !strcmp(Str, "pow"))
-      return true;
-    return false;
+    return Name == "pow";
   case 's':
-    if (Len == 3)
-      return !strcmp(Str, "sin");
-    if (Len == 4)
-      return !strcmp(Str, "sinh") || !strcmp(Str, "sqrt") ||
-             !strcmp(Str, "sinf");
-    if (Len == 5)
-      return !strcmp(Str, "sqrtf");
-    return false;
+    return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+      Name == "sinf" || Name == "sqrtf";
   case 't':
-    if (Len == 3 && !strcmp(Str, "tan"))
-      return true;
-    else if (Len == 4 && !strcmp(Str, "tanh"))
-      return true;
-    return false;
+    return Name == "tan" || Name == "tanh";
   }
 }
 
 static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, 
-                                const Type *Ty) {
+                                const Type *Ty, LLVMContext &Context) {
   errno = 0;
   V = NativeFP(V);
   if (errno != 0) {
@@ -672,17 +724,18 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
     return 0;
   }
   
-  if (Ty == Type::FloatTy)
-    return ConstantFP::get(APFloat((float)V));
-  if (Ty == Type::DoubleTy)
-    return ConstantFP::get(APFloat(V));
-  assert(0 && "Can only constant fold float/double");
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Context, APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Context, APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
   return 0; // dummy return to suppress warning
 }
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
                                       double V, double W,
-                                      const Type *Ty) {
+                                      const Type *Ty,
+                                      LLVMContext &Context) {
   errno = 0;
   V = NativeFP(V, W);
   if (errno != 0) {
@@ -690,137 +743,195 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
     return 0;
   }
   
-  if (Ty == Type::FloatTy)
-    return ConstantFP::get(APFloat((float)V));
-  if (Ty == Type::DoubleTy)
-    return ConstantFP::get(APFloat(V));
-  assert(0 && "Can only constant fold float/double");
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Context, APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Context, APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
   return 0; // dummy return to suppress warning
 }
 
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
-
 Constant *
 llvm::ConstantFoldCall(Function *F, 
-                       Constant* const* Operands, unsigned NumOperands) {
+                       Constant *const *Operands, unsigned NumOperands) {
   if (!F->hasName()) return 0;
-  const char *Str = F->getNameStart();
-  unsigned Len = F->getNameLen();
-  
+  LLVMContext &Context = F->getContext();
+  StringRef Name = F->getName();
+
   const Type *Ty = F->getReturnType();
   if (NumOperands == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
-      if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy)
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
       /// Currently APFloat versions of these functions do not exist, so we use
       /// the host native double versions.  Float versions are not called
       /// directly but for all these it is true (float)(f((double)arg)) ==
       /// f(arg).  Long double not supported yet.
-      double V = Ty==Type::FloatTy ? (double)Op->getValueAPF().convertToFloat():
+      double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
                                      Op->getValueAPF().convertToDouble();
-      switch (Str[0]) {
+      switch (Name[0]) {
       case 'a':
-        if (Len == 4 && !strcmp(Str, "acos"))
-          return ConstantFoldFP(acos, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "asin"))
-          return ConstantFoldFP(asin, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "atan"))
-          return ConstantFoldFP(atan, V, Ty);
+        if (Name == "acos")
+          return ConstantFoldFP(acos, V, Ty, Context);
+        else if (Name == "asin")
+          return ConstantFoldFP(asin, V, Ty, Context);
+        else if (Name == "atan")
+          return ConstantFoldFP(atan, V, Ty, Context);
         break;
       case 'c':
-        if (Len == 4 && !strcmp(Str, "ceil"))
-          return ConstantFoldFP(ceil, V, Ty);
-        else if (Len == 3 && !strcmp(Str, "cos"))
-          return ConstantFoldFP(cos, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "cosh"))
-          return ConstantFoldFP(cosh, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "cosf"))
-          return ConstantFoldFP(cos, V, Ty);
+        if (Name == "ceil")
+          return ConstantFoldFP(ceil, V, Ty, Context);
+        else if (Name == "cos")
+          return ConstantFoldFP(cos, V, Ty, Context);
+        else if (Name == "cosh")
+          return ConstantFoldFP(cosh, V, Ty, Context);
+        else if (Name == "cosf")
+          return ConstantFoldFP(cos, V, Ty, Context);
         break;
       case 'e':
-        if (Len == 3 && !strcmp(Str, "exp"))
-          return ConstantFoldFP(exp, V, Ty);
+        if (Name == "exp")
+          return ConstantFoldFP(exp, V, Ty, Context);
         break;
       case 'f':
-        if (Len == 4 && !strcmp(Str, "fabs"))
-          return ConstantFoldFP(fabs, V, Ty);
-        else if (Len == 5 && !strcmp(Str, "floor"))
-          return ConstantFoldFP(floor, V, Ty);
+        if (Name == "fabs")
+          return ConstantFoldFP(fabs, V, Ty, Context);
+        else if (Name == "floor")
+          return ConstantFoldFP(floor, V, Ty, Context);
         break;
       case 'l':
-        if (Len == 3 && !strcmp(Str, "log") && V > 0)
-          return ConstantFoldFP(log, V, Ty);
-        else if (Len == 5 && !strcmp(Str, "log10") && V > 0)
-          return ConstantFoldFP(log10, V, Ty);
-        else if (!strcmp(Str, "llvm.sqrt.f32") ||
-                 !strcmp(Str, "llvm.sqrt.f64")) {
+        if (Name == "log" && V > 0)
+          return ConstantFoldFP(log, V, Ty, Context);
+        else if (Name == "log10" && V > 0)
+          return ConstantFoldFP(log10, V, Ty, Context);
+        else if (Name == "llvm.sqrt.f32" ||
+                 Name == "llvm.sqrt.f64") {
           if (V >= -0.0)
-            return ConstantFoldFP(sqrt, V, Ty);
+            return ConstantFoldFP(sqrt, V, Ty, Context);
           else // Undefined
             return Constant::getNullValue(Ty);
         }
         break;
       case 's':
-        if (Len == 3 && !strcmp(Str, "sin"))
-          return ConstantFoldFP(sin, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "sinh"))
-          return ConstantFoldFP(sinh, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "sqrt") && V >= 0)
-          return ConstantFoldFP(sqrt, V, Ty);
-        else if (Len == 5 && !strcmp(Str, "sqrtf") && V >= 0)
-          return ConstantFoldFP(sqrt, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "sinf"))
-          return ConstantFoldFP(sin, V, Ty);
+        if (Name == "sin")
+          return ConstantFoldFP(sin, V, Ty, Context);
+        else if (Name == "sinh")
+          return ConstantFoldFP(sinh, V, Ty, Context);
+        else if (Name == "sqrt" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty, Context);
+        else if (Name == "sqrtf" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty, Context);
+        else if (Name == "sinf")
+          return ConstantFoldFP(sin, V, Ty, Context);
         break;
       case 't':
-        if (Len == 3 && !strcmp(Str, "tan"))
-          return ConstantFoldFP(tan, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "tanh"))
-          return ConstantFoldFP(tanh, V, Ty);
+        if (Name == "tan")
+          return ConstantFoldFP(tan, V, Ty, Context);
+        else if (Name == "tanh")
+          return ConstantFoldFP(tanh, V, Ty, Context);
         break;
       default:
         break;
       }
-    } else if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
-      if (Len > 11 && !memcmp(Str, "llvm.bswap", 10))
-        return ConstantInt::get(Op->getValue().byteSwap());
-      else if (Len > 11 && !memcmp(Str, "llvm.ctpop", 10))
+      return 0;
+    }
+    
+    
+    if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+      if (Name.startswith("llvm.bswap"))
+        return ConstantInt::get(Context, Op->getValue().byteSwap());
+      else if (Name.startswith("llvm.ctpop"))
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
-      else if (Len > 10 && !memcmp(Str, "llvm.cttz", 9))
+      else if (Name.startswith("llvm.cttz"))
         return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
-      else if (Len > 10 && !memcmp(Str, "llvm.ctlz", 9))
+      else if (Name.startswith("llvm.ctlz"))
         return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+      return 0;
     }
-  } else if (NumOperands == 2) {
+    
+    return 0;
+  }
+  
+  if (NumOperands == 2) {
     if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
-      if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy)
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
-      double Op1V = Ty==Type::FloatTy ? 
-                      (double)Op1->getValueAPF().convertToFloat():
+      double Op1V = Ty->isFloatTy() ? 
+                      (double)Op1->getValueAPF().convertToFloat() :
                       Op1->getValueAPF().convertToDouble();
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
-        double Op2V = Ty==Type::FloatTy ? 
+        if (Op2->getType() != Op1->getType())
+          return 0;
+        
+        double Op2V = Ty->isFloatTy() ? 
                       (double)Op2->getValueAPF().convertToFloat():
                       Op2->getValueAPF().convertToDouble();
 
-        if (Len == 3 && !strcmp(Str, "pow")) {
-          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        } else if (Len == 4 && !strcmp(Str, "fmod")) {
-          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        } else if (Len == 5 && !strcmp(Str, "atan2")) {
-          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
-        }
+        if (Name == "pow")
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context);
+        if (Name == "fmod")
+          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context);
+        if (Name == "atan2")
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
-        if (!strcmp(Str, "llvm.powi.f32")) {
-          return ConstantFP::get(APFloat((float)std::pow((float)Op1V,
+        if (Name == "llvm.powi.f32")
+          return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V,
                                                  (int)Op2C->getZExtValue())));
-        } else if (!strcmp(Str, "llvm.powi.f64")) {
-          return ConstantFP::get(APFloat((double)std::pow((double)Op1V,
+        if (Name == "llvm.powi.f64")
+          return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V,
                                                  (int)Op2C->getZExtValue())));
+      }
+      return 0;
+    }
+    
+    
+    if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+      if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+        switch (F->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::uadd_with_overflow: {
+          Constant *Res = ConstantExpr::getAdd(Op1, Op2);           // result.
+          Constant *Ops[] = {
+            Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow.
+          };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        case Intrinsic::usub_with_overflow: {
+          Constant *Res = ConstantExpr::getSub(Op1, Op2);           // result.
+          Constant *Ops[] = {
+            Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow.
+          };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        case Intrinsic::sadd_with_overflow: {
+          Constant *Res = ConstantExpr::getAdd(Op1, Op2);           // result.
+          Constant *Overflow = ConstantExpr::getSelect(
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT,
+                ConstantInt::get(Op1->getType(), 0), Op1),
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), 
+              ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow.
+
+          Constant *Ops[] = { Res, Overflow };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        case Intrinsic::ssub_with_overflow: {
+          Constant *Res = ConstantExpr::getSub(Op1, Op2);           // result.
+          Constant *Overflow = ConstantExpr::getSelect(
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT,
+                ConstantInt::get(Op2->getType(), 0), Op2),
+              ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), 
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow.
+
+          Constant *Ops[] = { Res, Overflow };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
         }
       }
+      
+      return 0;
     }
+    return 0;
   }
   return 0;
 }
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 6c549e6345e6..2bbe2e0ecb4f 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -90,10 +90,9 @@ void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) {
 }
 
 void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) {
-  DISubprogram Subprogram(cast<GlobalVariable>(FS->getSubprogram()));
-  std::string Res1, Res2;
-  Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1)
-      << " return type: " << Subprogram.getReturnTypeName(Res2)
+  DISubprogram Subprogram(FS->getSubprogram());
+  Out << "; fully qualified function name: " << Subprogram.getDisplayName()
+      << " return type: " << Subprogram.getReturnTypeName()
       << " at line " << Subprogram.getLineNumber()
       << "\n\n";
 }
@@ -152,7 +151,7 @@ bool PrintDbgInfo::runOnFunction(Function &F) {
           Printed = true;
         }
 
-        Out << *i;
+        Out << *i << '\n';
         printVariableDeclaration(i);
 
         if (const User *U = dyn_cast<User>(i)) {
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 9eecc339b483..7bb7e9b4af2d 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -18,12 +18,13 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/Streams.h"
-
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
@@ -32,18 +33,12 @@ using namespace llvm::dwarf;
 //===----------------------------------------------------------------------===//
 
 /// ValidDebugInfo - Return true if V represents valid debug info value.
-bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
-  if (!V)
-    return false;
-
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V->stripPointerCasts());
-  if (!GV)
-    return false;
-
-  if (!GV->hasInternalLinkage () && !GV->hasLinkOnceLinkage())
+/// FIXME : Add DIDescriptor.isValid()
+bool DIDescriptor::ValidDebugInfo(MDNode *N, CodeGenOpt::Level OptLevel) {
+  if (!N)
     return false;
 
-  DIDescriptor DI(GV);
+  DIDescriptor DI(N);
 
   // Check current version. Allow Version6 for now.
   unsigned Version = DI.getVersion();
@@ -53,13 +48,13 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
   unsigned Tag = DI.getTag();
   switch (Tag) {
   case DW_TAG_variable:
-    assert(DIVariable(GV).Verify() && "Invalid DebugInfo value");
+    assert(DIVariable(N).Verify() && "Invalid DebugInfo value");
     break;
   case DW_TAG_compile_unit:
-    assert(DICompileUnit(GV).Verify() && "Invalid DebugInfo value");
+    assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value");
     break;
   case DW_TAG_subprogram:
-    assert(DISubprogram(GV).Verify() && "Invalid DebugInfo value");
+    assert(DISubprogram(N).Verify() && "Invalid DebugInfo value");
     break;
   case DW_TAG_lexical_block:
     // FIXME: This interfers with the quality of generated code during
@@ -74,84 +69,75 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
   return true;
 }
 
-DIDescriptor::DIDescriptor(GlobalVariable *GV, unsigned RequiredTag) {
-  DbgGV = GV;
-  
-  // If this is non-null, check to see if the Tag matches. If not, set to null.
-  if (GV && getTag() != RequiredTag)
-    DbgGV = 0;
-}
+DIDescriptor::DIDescriptor(MDNode *N, unsigned RequiredTag) {
+  DbgNode = N;
 
-const std::string &
-DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
-  if (DbgGV == 0) {
-    Result.clear();
-    return Result;
+  // If this is non-null, check to see if the Tag matches. If not, set to null.
+  if (N && getTag() != RequiredTag) {
+    DbgNode = 0;
   }
+}
 
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands()) {
-    Result.clear();
-    return Result;
-  }
+const char *
+DIDescriptor::getStringField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return NULL;
 
-  // Fills in the string if it succeeds
-  if (!GetConstantStringInfo(C->getOperand(Elt), Result))
-    Result.clear();
+  if (Elt < DbgNode->getNumElements())
+    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt)))
+      return MDS->getString().data();
 
-  return Result;
+  return NULL;
 }
 
 uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (DbgGV == 0) return 0;
-
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands())
+  if (DbgNode == 0)
     return 0;
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(C->getOperand(Elt)))
-    return CI->getZExtValue();
+  if (Elt < DbgNode->getNumElements())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getElement(Elt)))
+      return CI->getZExtValue();
+
   return 0;
 }
 
 DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  if (DbgGV == 0) return DIDescriptor();
-
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands())
+  if (DbgNode == 0)
     return DIDescriptor();
 
-  C = C->getOperand(Elt);
-  return DIDescriptor(dyn_cast<GlobalVariable>(C->stripPointerCasts()));
+  if (Elt < DbgNode->getNumElements() && DbgNode->getElement(Elt))
+    return DIDescriptor(dyn_cast<MDNode>(DbgNode->getElement(Elt)));
+
+  return DIDescriptor();
 }
 
 GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  if (DbgGV == 0) return 0;
-
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands())
+  if (DbgNode == 0)
     return 0;
 
-  C = C->getOperand(Elt);
-  return dyn_cast<GlobalVariable>(C->stripPointerCasts());
+  if (Elt < DbgNode->getNumElements())
+      return dyn_cast_or_null<GlobalVariable>(DbgNode->getElement(Elt));
+  return 0;
 }
 
 //===----------------------------------------------------------------------===//
-// Simple Descriptor Constructors and other Methods
+// Predicates
 //===----------------------------------------------------------------------===//
 
-// Needed by DIVariable::getType().
-DIType::DIType(GlobalVariable *GV) : DIDescriptor(GV) {
-  if (!GV) return;
-  unsigned tag = getTag();
-  if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) &&
-      !DICompositeType::isCompositeType(tag))
-    DbgGV = 0;
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_base_type;
 }
 
-/// isDerivedType - Return true if the specified tag is legal for
-/// DIDerivedType.
-bool DIType::isDerivedType(unsigned Tag) {
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
   switch (Tag) {
   case dwarf::DW_TAG_typedef:
   case dwarf::DW_TAG_pointer_type:
@@ -163,16 +149,18 @@ bool DIType::isDerivedType(unsigned Tag) {
   case dwarf::DW_TAG_inheritance:
     return true;
   default:
-    // FIXME: Even though it doesn't make sense, CompositeTypes are current
-    // modelled as DerivedTypes, this should return true for them as well.
-    return false;
+    // CompositeTypes are currently modelled as DerivedTypes.
+    return isCompositeType();
   }
 }
 
 /// isCompositeType - Return true if the specified tag is legal for
 /// DICompositeType.
-bool DIType::isCompositeType(unsigned TAG) {
-  switch (TAG) {
+bool DIDescriptor::isCompositeType() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  switch (Tag) {
   case dwarf::DW_TAG_array_type:
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
@@ -187,7 +175,10 @@ bool DIType::isCompositeType(unsigned TAG) {
 }
 
 /// isVariable - Return true if the specified tag is legal for DIVariable.
-bool DIVariable::isVariable(unsigned Tag) {
+bool DIDescriptor::isVariable() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
   switch (Tag) {
   case dwarf::DW_TAG_auto_variable:
   case dwarf::DW_TAG_arg_variable:
@@ -198,19 +189,126 @@ bool DIVariable::isVariable(unsigned Tag) {
   }
 }
 
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+  return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_variable;
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+  return isGlobalVariable();
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  switch (Tag) {
+    case dwarf::DW_TAG_compile_unit:
+    case dwarf::DW_TAG_lexical_block:
+    case dwarf::DW_TAG_subprogram:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_compile_unit;
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_lexical_block;
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_enumerator;
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(MDNode *N) : DIDescriptor(N) {
+  if (!N) return;
+  if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+    DbgNode = 0;
+  }
+}
+
 unsigned DIArray::getNumElements() const {
-  assert (DbgGV && "Invalid DIArray");
-  Constant *C = DbgGV->getInitializer();
-  assert (C && "Invalid DIArray initializer");
-  return C->getNumOperands();
+  assert (DbgNode && "Invalid DIArray");
+  return DbgNode->getNumElements();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor. After this completes, the current debug info value
+/// is erased.
+void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
+  if (isNull())
+    return;
+
+  assert (!D.isNull() && "Can not replace with null");
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (getNode() != D.getNode()) {
+    MDNode *Node = DbgNode;
+    Node->replaceAllUsesWith(D.getNode());
+    delete Node;
+  }
 }
 
 /// Verify - Verify that a compile unit is well formed.
 bool DICompileUnit::Verify() const {
-  if (isNull()) 
+  if (isNull())
     return false;
-  std::string Res;
-  if (getFilename(Res).empty()) 
+  const char *N = getFilename();
+  if (!N)
     return false;
   // It is possible that directory and produce string is empty.
   return true;
@@ -218,26 +316,26 @@ bool DICompileUnit::Verify() const {
 
 /// Verify - Verify that a type descriptor is well formed.
 bool DIType::Verify() const {
-  if (isNull()) 
+  if (isNull())
     return false;
-  if (getContext().isNull()) 
+  if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.isNull() && !CU.Verify()) 
+  if (!CU.isNull() && !CU.Verify())
     return false;
   return true;
 }
 
 /// Verify - Verify that a composite type descriptor is well formed.
 bool DICompositeType::Verify() const {
-  if (isNull()) 
+  if (isNull())
     return false;
-  if (getContext().isNull()) 
+  if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.isNull() && !CU.Verify()) 
+  if (!CU.isNull() && !CU.Verify())
     return false;
   return true;
 }
@@ -246,12 +344,12 @@ bool DICompositeType::Verify() const {
 bool DISubprogram::Verify() const {
   if (isNull())
     return false;
-  
+
   if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.Verify()) 
+  if (!CU.Verify())
     return false;
 
   DICompositeType Ty = getType();
@@ -264,12 +362,12 @@ bool DISubprogram::Verify() const {
 bool DIGlobalVariable::Verify() const {
   if (isNull())
     return false;
-  
+
   if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.isNull() && !CU.Verify()) 
+  if (!CU.isNull() && !CU.Verify())
     return false;
 
   DIType Ty = getType();
@@ -286,7 +384,7 @@ bool DIGlobalVariable::Verify() const {
 bool DIVariable::Verify() const {
   if (isNull())
     return false;
-  
+
   if (getContext().isNull())
     return false;
 
@@ -312,15 +410,38 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
 /// information for the function F.
 bool DISubprogram::describes(const Function *F) {
   assert (F && "Invalid function");
-  std::string Name;
-  getLinkageName(Name);
-  if (Name.empty())
-    getName(Name);
-  if (!Name.empty() && (strcmp(Name.c_str(), F->getNameStart()) == false))
+  const char *Name = getLinkageName();
+  if (!Name)
+    Name = getName();
+  if (strcmp(F->getName().data(), Name) == 0)
     return true;
   return false;
 }
 
+const char *DIScope::getFilename() const {
+  if (isLexicalBlock()) 
+    return DILexicalBlock(DbgNode).getFilename();
+  else if (isSubprogram())
+    return DISubprogram(DbgNode).getFilename();
+  else if (isCompileUnit())
+    return DICompileUnit(DbgNode).getFilename();
+  else 
+    assert (0 && "Invalid DIScope!");
+  return NULL;
+}
+
+const char *DIScope::getDirectory() const {
+  if (isLexicalBlock()) 
+    return DILexicalBlock(DbgNode).getDirectory();
+  else if (isSubprogram())
+    return DISubprogram(DbgNode).getDirectory();
+  else if (isCompileUnit())
+    return DICompileUnit(DbgNode).getDirectory();
+  else 
+    assert (0 && "Invalid DIScope!");
+  return NULL;
+}
+
 //===----------------------------------------------------------------------===//
 // DIDescriptor: dump routines for all descriptors.
 //===----------------------------------------------------------------------===//
@@ -328,69 +449,67 @@ bool DISubprogram::describes(const Function *F) {
 
 /// dump - Print descriptor.
 void DIDescriptor::dump() const {
-  cerr << "[" << dwarf::TagString(getTag()) << "] ";
-  cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec;
+  errs() << "[" << dwarf::TagString(getTag()) << "] ";
+  errs().write_hex((intptr_t) &*DbgNode) << ']';
 }
 
 /// dump - Print compile unit.
 void DICompileUnit::dump() const {
   if (getLanguage())
-    cerr << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+    errs() << " [" << dwarf::LanguageString(getLanguage()) << "] ";
 
-  std::string Res1, Res2;
-  cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]";
+  errs() << " [" << getDirectory() << "/" << getFilename() << " ]";
 }
 
 /// dump - Print type.
 void DIType::dump() const {
   if (isNull()) return;
 
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  cerr << " [" << dwarf::TagString(Tag) << "] ";
+  errs() << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
   getCompileUnit().dump();
-  cerr << " [" 
-       << getLineNumber() << ", " 
-       << getSizeInBits() << ", "
-       << getAlignInBits() << ", "
-       << getOffsetInBits() 
-       << "] ";
-
-  if (isPrivate()) 
-    cerr << " [private] ";
+  errs() << " ["
+         << getLineNumber() << ", "
+         << getSizeInBits() << ", "
+         << getAlignInBits() << ", "
+         << getOffsetInBits()
+         << "] ";
+
+  if (isPrivate())
+    errs() << " [private] ";
   else if (isProtected())
-    cerr << " [protected] ";
+    errs() << " [protected] ";
 
   if (isForwardDecl())
-    cerr << " [fwd] ";
-
-  if (isBasicType(Tag))
-    DIBasicType(DbgGV).dump();
-  else if (isDerivedType(Tag))
-    DIDerivedType(DbgGV).dump();
-  else if (isCompositeType(Tag))
-    DICompositeType(DbgGV).dump();
+    errs() << " [fwd] ";
+
+  if (isBasicType())
+    DIBasicType(DbgNode).dump();
+  else if (isDerivedType())
+    DIDerivedType(DbgNode).dump();
+  else if (isCompositeType())
+    DICompositeType(DbgNode).dump();
   else {
-    cerr << "Invalid DIType\n";
+    errs() << "Invalid DIType\n";
     return;
   }
 
-  cerr << "\n";
+  errs() << "\n";
 }
 
 /// dump - Print basic type.
 void DIBasicType::dump() const {
-  cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+  errs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
 }
 
 /// dump - Print derived type.
 void DIDerivedType::dump() const {
-  cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump();
+  errs() << "\n\t Derived From: "; getTypeDerivedFrom().dump();
 }
 
 /// dump - Print composite type.
@@ -398,54 +517,72 @@ void DICompositeType::dump() const {
   DIArray A = getTypeArray();
   if (A.isNull())
     return;
-  cerr << " [" << A.getNumElements() << " elements]";
+  errs() << " [" << A.getNumElements() << " elements]";
 }
 
 /// dump - Print global.
 void DIGlobal::dump() const {
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  cerr << " [" << dwarf::TagString(Tag) << "] ";
+  errs() << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
   getCompileUnit().dump();
-  cerr << " [" << getLineNumber() << "] ";
+  errs() << " [" << getLineNumber() << "] ";
 
   if (isLocalToUnit())
-    cerr << " [local] ";
+    errs() << " [local] ";
 
   if (isDefinition())
-    cerr << " [def] ";
+    errs() << " [def] ";
 
-  if (isGlobalVariable(Tag))
-    DIGlobalVariable(DbgGV).dump();
+  if (isGlobalVariable())
+    DIGlobalVariable(DbgNode).dump();
 
-  cerr << "\n";
+  errs() << "\n";
 }
 
 /// dump - Print subprogram.
 void DISubprogram::dump() const {
-  DIGlobal::dump();
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  errs() << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().dump();
+  errs() << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    errs() << " [local] ";
+
+  if (isDefinition())
+    errs() << " [def] ";
+
+  errs() << "\n";
 }
 
 /// dump - Print global variable.
 void DIGlobalVariable::dump() const {
-  cerr << " ["; getGlobal()->dump(); cerr << "] ";
+  errs() << " [";
+  getGlobal()->dump();
+  errs() << "] ";
 }
 
 /// dump - Print variable.
 void DIVariable::dump() const {
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
 
   getCompileUnit().dump();
-  cerr << " [" << getLineNumber() << "] ";
+  errs() << " [" << getLineNumber() << "] ";
   getType().dump();
-  cerr << "\n";
+  errs() << "\n";
+
+  // FIXME: Dump complex addresses
 }
 
 //===----------------------------------------------------------------------===//
@@ -453,98 +590,46 @@ void DIVariable::dump() const {
 //===----------------------------------------------------------------------===//
 
 DIFactory::DIFactory(Module &m)
-  : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0),
+  : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0),
+    RegionStartFn(0), RegionEndFn(0),
     DeclareFn(0) {
-  EmptyStructPtr = PointerType::getUnqual(StructType::get());
-}
-
-/// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'.
-/// This is only valid when the descriptor is non-null.
-Constant *DIFactory::getCastToEmpty(DIDescriptor D) {
-  if (D.isNull()) return Constant::getNullValue(EmptyStructPtr);
-  return ConstantExpr::getBitCast(D.getGV(), EmptyStructPtr);
+  EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext));
 }
 
 Constant *DIFactory::GetTagConstant(unsigned TAG) {
   assert((TAG & LLVMDebugVersionMask) == 0 &&
          "Tag too large for debug encoding!");
-  return ConstantInt::get(Type::Int32Ty, TAG | LLVMDebugVersion);
-}
-
-Constant *DIFactory::GetStringConstant(const std::string &String) {
-  // Check string cache for previous edition.
-  Constant *&Slot = StringCache[String];
-  
-  // Return Constant if previously defined.
-  if (Slot) return Slot;
-  
-  const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty);
-  
-  // If empty string then use a i8* null instead.
-  if (String.empty())
-    return Slot = ConstantPointerNull::get(DestTy);
-
-  // Construct string as an llvm constant.
-  Constant *ConstStr = ConstantArray::get(String);
-    
-  // Otherwise create and return a new string global.
-  GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true,
-                                             GlobalVariable::InternalLinkage,
-                                             ConstStr, ".str", &M);
-  StrGV->setSection("llvm.metadata");
-  return Slot = ConstantExpr::getBitCast(StrGV, DestTy);
+  return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion);
 }
 
 //===----------------------------------------------------------------------===//
 // DIFactory: Primary Constructors
 //===----------------------------------------------------------------------===//
 
-/// GetOrCreateArray - Create an descriptor for an array of descriptors. 
+/// GetOrCreateArray - Create an descriptor for an array of descriptors.
 /// This implicitly uniques the arrays created.
 DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
-  SmallVector<Constant*, 16> Elts;
-  
-  for (unsigned i = 0; i != NumTys; ++i)
-    Elts.push_back(getCastToEmpty(Tys[i]));
-  
-  Constant *Init = ConstantArray::get(ArrayType::get(EmptyStructPtr,
-                                                     Elts.size()),
-                                      Elts.data(), Elts.size());
-  // If we already have this array, just return the uniqued version.
-  DIDescriptor &Entry = SimpleConstantCache[Init];
-  if (!Entry.isNull()) return DIArray(Entry.getGV());
-  
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.array", &M);
-  GV->setSection("llvm.metadata");
-  Entry = DIDescriptor(GV);
-  return DIArray(GV);
+  SmallVector<Value*, 16> Elts;
+
+  if (NumTys == 0)
+    Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  else
+    for (unsigned i = 0; i != NumTys; ++i)
+      Elts.push_back(Tys[i].getNode());
+
+  return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size()));
 }
 
 /// GetOrCreateSubrange - Create a descriptor for a value range.  This
 /// implicitly uniques the values returned.
 DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subrange_type),
-    ConstantInt::get(Type::Int64Ty, Lo),
-    ConstantInt::get(Type::Int64Ty, Hi)
+    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-
-  // If we already have this range, just return the uniqued version.
-  DIDescriptor &Entry = SimpleConstantCache[Init];
-  if (!Entry.isNull()) return DISubrange(Entry.getGV());
-  
-  M.addTypeName("llvm.dbg.subrange.type", Init->getType());
 
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.subrange", &M);
-  GV->setSection("llvm.metadata");
-  Entry = DIDescriptor(GV);
-  return DISubrange(GV);
+  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
 }
 
 
@@ -552,92 +637,69 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
 /// CreateCompileUnit - Create a new descriptor for the specified compile
 /// unit.  Note that this does not unique compile units within the module.
 DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
-                                           const std::string &Filename,
-                                           const std::string &Directory,
-                                           const std::string &Producer,
+                                           StringRef Filename,
+                                           StringRef Directory,
+                                           StringRef Producer,
                                            bool isMain,
                                            bool isOptimized,
                                            const char *Flags,
                                            unsigned RunTimeVer) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_compile_unit),
-    Constant::getNullValue(EmptyStructPtr),
-    ConstantInt::get(Type::Int32Ty, LangID),
-    GetStringConstant(Filename),
-    GetStringConstant(Directory),
-    GetStringConstant(Producer),
-    ConstantInt::get(Type::Int1Ty, isMain),
-    ConstantInt::get(Type::Int1Ty, isOptimized),
-    GetStringConstant(Flags),
-    ConstantInt::get(Type::Int32Ty, RunTimeVer)
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LangID),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    MDString::get(VMContext, Producer),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isMain),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    MDString::get(VMContext, Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.compile_unit.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::LinkOnceAnyLinkage,
-                                          Init, "llvm.dbg.compile_unit", &M);
-  GV->setSection("llvm.metadata");
-  return DICompileUnit(GV);
+
+  return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10));
 }
 
 /// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIFactory::CreateEnumerator(const std::string &Name, uint64_t Val){
-  Constant *Elts[] = {
+DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_enumerator),
-    GetStringConstant(Name),
-    ConstantInt::get(Type::Int64Ty, Val)
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.enumerator.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.enumerator", &M);
-  GV->setSection("llvm.metadata");
-  return DIEnumerator(GV);
+  return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3));
 }
 
 
 /// CreateBasicType - Create a basic type like int, float, etc.
 DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
-                                      const std::string &Name,
+                                       StringRef Name,
                                        DICompileUnit CompileUnit,
                                        unsigned LineNumber,
                                        uint64_t SizeInBits,
                                        uint64_t AlignInBits,
                                        uint64_t OffsetInBits, unsigned Flags,
                                        unsigned Encoding) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_base_type),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNumber),
-    ConstantInt::get(Type::Int64Ty, SizeInBits),
-    ConstantInt::get(Type::Int64Ty, AlignInBits),
-    ConstantInt::get(Type::Int64Ty, OffsetInBits),
-    ConstantInt::get(Type::Int32Ty, Flags),
-    ConstantInt::get(Type::Int32Ty, Encoding)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.basictype.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.basictype", &M);
-  GV->setSection("llvm.metadata");
-  return DIBasicType(GV);
+  return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
 /// CreateDerivedType - Create a derived type like const qualified type,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            DIDescriptor Context,
-                                           const std::string &Name,
+                                           StringRef Name,
                                            DICompileUnit CompileUnit,
                                            unsigned LineNumber,
                                            uint64_t SizeInBits,
@@ -645,33 +707,25 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            uint64_t OffsetInBits,
                                            unsigned Flags,
                                            DIType DerivedFrom) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(Tag),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNumber),
-    ConstantInt::get(Type::Int64Ty, SizeInBits),
-    ConstantInt::get(Type::Int64Ty, AlignInBits),
-    ConstantInt::get(Type::Int64Ty, OffsetInBits),
-    ConstantInt::get(Type::Int32Ty, Flags),
-    getCastToEmpty(DerivedFrom)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom.getNode(),
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.derivedtype.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.derivedtype", &M);
-  GV->setSection("llvm.metadata");
-  return DIDerivedType(GV);
+  return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIDescriptor Context,
-                                               const std::string &Name,
+                                               StringRef Name,
                                                DICompileUnit CompileUnit,
                                                unsigned LineNumber,
                                                uint64_t SizeInBits,
@@ -682,143 +736,143 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIArray Elements,
                                                unsigned RuntimeLang) {
 
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(Tag),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNumber),
-    ConstantInt::get(Type::Int64Ty, SizeInBits),
-    ConstantInt::get(Type::Int64Ty, AlignInBits),
-    ConstantInt::get(Type::Int64Ty, OffsetInBits),
-    ConstantInt::get(Type::Int32Ty, Flags),
-    getCastToEmpty(DerivedFrom),
-    getCastToEmpty(Elements),
-    ConstantInt::get(Type::Int32Ty, RuntimeLang)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom.getNode(),
+    Elements.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.composite.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.composite", &M);
-  GV->setSection("llvm.metadata");
-  return DICompositeType(GV);
+  return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
 }
 
 
 /// CreateSubprogram - Create a new descriptor for the specified subprogram.
 /// See comments in DISubprogram for descriptions of these fields.  This
 /// method does not unique the generated descriptors.
-DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, 
-                                         const std::string &Name,
-                                         const std::string &DisplayName,
-                                         const std::string &LinkageName,
+DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
+                                         StringRef Name,
+                                         StringRef DisplayName,
+                                         StringRef LinkageName,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNo, DIType Type,
                                          bool isLocalToUnit,
                                          bool isDefinition) {
 
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
-    Constant::getNullValue(EmptyStructPtr),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    GetStringConstant(DisplayName),
-    GetStringConstant(LinkageName),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNo),
-    getCastToEmpty(Type),
-    ConstantInt::get(Type::Int1Ty, isLocalToUnit),
-    ConstantInt::get(Type::Int1Ty, isDefinition)
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, DisplayName),
+    MDString::get(VMContext, LinkageName),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Type.getNode(),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.subprogram.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::LinkOnceAnyLinkage,
-                                          Init, "llvm.dbg.subprogram", &M);
-  GV->setSection("llvm.metadata");
-  return DISubprogram(GV);
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 11));
 }
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
 DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
-                                const std::string &DisplayName,
-                                const std::string &LinkageName,
+DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+                                StringRef DisplayName,
+                                StringRef LinkageName,
                                 DICompileUnit CompileUnit,
                                 unsigned LineNo, DIType Type,bool isLocalToUnit,
                                 bool isDefinition, llvm::GlobalVariable *Val) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_variable),
-    Constant::getNullValue(EmptyStructPtr),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    GetStringConstant(DisplayName),
-    GetStringConstant(LinkageName),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNo),
-    getCastToEmpty(Type),
-    ConstantInt::get(Type::Int1Ty, isLocalToUnit),
-    ConstantInt::get(Type::Int1Ty, isDefinition),
-    ConstantExpr::getBitCast(Val, EmptyStructPtr)
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, DisplayName),
+    MDString::get(VMContext, LinkageName),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Type.getNode(),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    Val
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.global_variable.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::LinkOnceAnyLinkage,
-                                          Init, "llvm.dbg.global_variable", &M);
-  GV->setSection("llvm.metadata");
-  return DIGlobalVariable(GV);
+
+  Value *const *Vs = &Elts[0];
+  MDNode *Node = MDNode::get(VMContext,Vs, 12);
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addElement(Node);
+
+  return DIGlobalVariable(Node);
 }
 
 
 /// CreateVariable - Create a new descriptor for the specified variable.
 DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
-                                     const std::string &Name,
+                                     StringRef Name,
                                      DICompileUnit CompileUnit, unsigned LineNo,
                                      DIType Type) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(Tag),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNo),
-    getCastToEmpty(Type)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Type.getNode(),
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.variable.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.variable", &M);
-  GV->setSection("llvm.metadata");
-  return DIVariable(GV);
+  return DIVariable(MDNode::get(VMContext, &Elts[0], 6));
+}
+
+
+/// CreateComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
+                                            const std::string &Name,
+                                            DICompileUnit CompileUnit,
+                                            unsigned LineNo,
+                                   DIType Type, SmallVector<Value *, 9> &addr) {
+  SmallVector<Value *, 9> Elts;
+  Elts.push_back(GetTagConstant(Tag));
+  Elts.push_back(Context.getNode());
+  Elts.push_back(MDString::get(VMContext, Name));
+  Elts.push_back(CompileUnit.getNode());
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
+  Elts.push_back(Type.getNode());
+  Elts.insert(Elts.end(), addr.begin(), addr.end());
+
+  return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size()));
 }
 
 
 /// CreateBlock - This creates a descriptor for a lexical block with the
-/// specified parent context.
-DIBlock DIFactory::CreateBlock(DIDescriptor Context) {
-  Constant *Elts[] = {
+/// specified parent VMContext.
+DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_lexical_block),
-    getCastToEmpty(Context)
+    Context.getNode()
+  };
+  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2));
+}
+
+/// CreateLocation - Creates a debug info location.
+DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
+                                     DIScope S, DILocation OrigLoc) {
+  Value *Elts[] = {
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
+    S.getNode(),
+    OrigLoc.getNode(),
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.block.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.block", &M);
-  GV->setSection("llvm.metadata");
-  return DIBlock(GV);
+  return DILocation(MDNode::get(VMContext, &Elts[0], 4));
 }
 
 
@@ -830,17 +884,17 @@ DIBlock DIFactory::CreateBlock(DIDescriptor Context) {
 /// inserting it at the end of the specified basic block.
 void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo,
                                 unsigned ColNo, BasicBlock *BB) {
-  
+
   // Lazily construct llvm.dbg.stoppoint function.
   if (!StopPointFn)
-    StopPointFn = llvm::Intrinsic::getDeclaration(&M, 
+    StopPointFn = llvm::Intrinsic::getDeclaration(&M,
                                               llvm::Intrinsic::dbg_stoppoint);
-  
+
   // Invoke llvm.dbg.stoppoint
   Value *Args[] = {
-    llvm::ConstantInt::get(llvm::Type::Int32Ty, LineNo),
-    llvm::ConstantInt::get(llvm::Type::Int32Ty, ColNo),
-    getCastToEmpty(CU)
+    ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo),
+    CU.getNode()
   };
   CallInst::Create(StopPointFn, Args, Args+3, "", BB);
 }
@@ -851,9 +905,9 @@ void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) {
   // Lazily construct llvm.dbg.func.start.
   if (!FuncStartFn)
     FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start);
-  
+
   // Call llvm.dbg.func.start which also implicitly sets a stoppoint.
-  CallInst::Create(FuncStartFn, getCastToEmpty(SP), "", BB);
+  CallInst::Create(FuncStartFn, SP.getNode(), "", BB);
 }
 
 /// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to
@@ -864,7 +918,7 @@ void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) {
     RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start);
 
   // Call llvm.dbg.func.start.
-  CallInst::Create(RegionStartFn, getCastToEmpty(D), "", BB);
+  CallInst::Create(RegionStartFn, D.getNode(), "", BB);
 }
 
 /// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to
@@ -875,19 +929,220 @@ void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) {
     RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end);
 
   // Call llvm.dbg.region.end.
-  CallInst::Create(RegionEndFn, getCastToEmpty(D), "", BB);
+  CallInst::Create(RegionEndFn, D.getNode(), "", BB);
 }
 
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-void DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *BB) {
+void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+                              Instruction *InsertBefore) {
   // Cast the storage to a {}* for the call to llvm.dbg.declare.
-  Storage = new BitCastInst(Storage, EmptyStructPtr, "", BB);
-  
+  Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore);
+
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { Storage, getCastToEmpty(D) };
-  CallInst::Create(DeclareFn, Args, Args+2, "", BB);
+  Value *Args[] = { Storage, D.getNode() };
+  CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+                              BasicBlock *InsertAtEnd) {
+  // Cast the storage to a {}* for the call to llvm.dbg.declare.
+  Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd);
+
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { Storage, D.getNode() };
+  CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(Module &M) {
+
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  MetadataContext &TheMetadata = M.getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+#endif
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
+      for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
+           ++BI) {
+        if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(BI))
+          processStopPoint(SPI);
+        else if (DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI))
+          processFuncStart(FSI);
+        else if (DbgRegionStartInst *DRS = dyn_cast<DbgRegionStartInst>(BI))
+          processRegionStart(DRS);
+        else if (DbgRegionEndInst *DRE = dyn_cast<DbgRegionEndInst>(BI))
+          processRegionEnd(DRE);
+        else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+          processDeclare(DDI);
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+        else if (MDDbgKind) {
+          if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) {
+            DILocation Loc(L);
+            DIScope S(Loc.getScope().getNode());
+            if (S.isCompileUnit())
+              addCompileUnit(DICompileUnit(S.getNode()));
+            else if (S.isSubprogram())
+              processSubprogram(DISubprogram(S.getNode()));
+            else if (S.isLexicalBlock())
+              processLexicalBlock(DILexicalBlock(S.getNode()));
+          }
+        }
+#endif
+      }
+
+  NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
+  if (!NMD)
+    return;
+
+  for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+    DIGlobalVariable DIG(cast<MDNode>(NMD->getElement(i)));
+    if (addGlobalVariable(DIG)) {
+      addCompileUnit(DIG.getCompileUnit());
+      processType(DIG.getType());
+    }
+  }
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+  if (!addType(DT))
+    return;
+
+  addCompileUnit(DT.getCompileUnit());
+  if (DT.isCompositeType()) {
+    DICompositeType DCT(DT.getNode());
+    processType(DCT.getTypeDerivedFrom());
+    DIArray DA = DCT.getTypeArray();
+    if (!DA.isNull())
+      for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+        DIDescriptor D = DA.getElement(i);
+        DIType TypeE = DIType(D.getNode());
+        if (!TypeE.isNull())
+          processType(TypeE);
+        else
+          processSubprogram(DISubprogram(D.getNode()));
+      }
+  } else if (DT.isDerivedType()) {
+    DIDerivedType DDT(DT.getNode());
+    if (!DDT.isNull())
+      processType(DDT.getTypeDerivedFrom());
+  }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+  if (LB.isNull())
+    return;
+  DIScope Context = LB.getContext();
+  if (Context.isLexicalBlock())
+    return processLexicalBlock(DILexicalBlock(Context.getNode()));
+  else
+    return processSubprogram(DISubprogram(Context.getNode()));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+  if (SP.isNull())
+    return;
+  if (!addSubprogram(SP))
+    return;
+  addCompileUnit(SP.getCompileUnit());
+  processType(SP.getType());
+}
+
+/// processStopPoint - Process DbgStopPointInst.
+void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) {
+  MDNode *Context = dyn_cast<MDNode>(SPI->getContext());
+  addCompileUnit(DICompileUnit(Context));
+}
+
+/// processFuncStart - Process DbgFuncStartInst.
+void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) {
+  MDNode *SP = dyn_cast<MDNode>(FSI->getSubprogram());
+  processSubprogram(DISubprogram(SP));
+}
+
+/// processRegionStart - Process DbgRegionStart.
+void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) {
+  MDNode *SP = dyn_cast<MDNode>(DRS->getContext());
+  processSubprogram(DISubprogram(SP));
+}
+
+/// processRegionEnd - Process DbgRegionEnd.
+void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) {
+  MDNode *SP = dyn_cast<MDNode>(DRE->getContext());
+  processSubprogram(DISubprogram(SP));
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+  DIVariable DV(cast<MDNode>(DDI->getVariable()));
+  if (DV.isNull())
+    return;
+
+  if (!NodesSeen.insert(DV.getNode()))
+    return;
+
+  addCompileUnit(DV.getCompileUnit());
+  processType(DV.getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+  if (DT.isNull())
+    return false;
+
+  if (!NodesSeen.insert(DT.getNode()))
+    return false;
+
+  TYs.push_back(DT.getNode());
+  return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+  if (CU.isNull())
+    return false;
+
+  if (!NodesSeen.insert(CU.getNode()))
+    return false;
+
+  CUs.push_back(CU.getNode());
+  return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+  if (DIG.isNull())
+    return false;
+
+  if (!NodesSeen.insert(DIG.getNode()))
+    return false;
+
+  GVs.push_back(DIG.getNode());
+  return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+  if (SP.isNull())
+    return false;
+
+  if (!NodesSeen.insert(SP.getNode()))
+    return false;
+
+  SPs.push_back(SP.getNode());
+  return true;
 }
 
 namespace llvm {
@@ -939,30 +1194,17 @@ namespace llvm {
 
   Value *findDbgGlobalDeclare(GlobalVariable *V) {
     const Module *M = V->getParent();
-    const Type *Ty = M->getTypeByName("llvm.dbg.global_variable.type");
-    if (!Ty) return 0;
-
-    Ty = PointerType::get(Ty, 0);
-
-    Value *Val = V->stripPointerCasts();
-    for (Value::use_iterator I = Val->use_begin(), E = Val->use_end();
-         I != E; ++I) {
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I)) {
-        if (CE->getOpcode() == Instruction::BitCast) {
-          Value *VV = CE;
-
-          while (VV->hasOneUse())
-            VV = *VV->use_begin();
+    NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+    if (!NMD)
+      return 0;
 
-          if (VV->getType() == Ty)
-            return VV;
-        }
-      }
+    for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+      DIGlobalVariable DIG(cast_or_null<MDNode>(NMD->getElement(i)));
+      if (DIG.isNull())
+        continue;
+      if (DIG.getGlobal() == V)
+        return DIG.getNode();
     }
-    
-    if (Val->getType() == Ty)
-      return Val;
-
     return 0;
   }
 
@@ -990,8 +1232,8 @@ namespace llvm {
     return 0;
   }
 
-  bool getLocationInfo(const Value *V, std::string &DisplayName,
-                       std::string &Type, unsigned &LineNo, std::string &File,
+bool getLocationInfo(const Value *V, std::string &DisplayName,
+                     std::string &Type, unsigned &LineNo, std::string &File,
                        std::string &Dir) {
     DICompileUnit Unit;
     DIType TypeD;
@@ -999,81 +1241,56 @@ namespace llvm {
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
       Value *DIGV = findDbgGlobalDeclare(GV);
       if (!DIGV) return false;
-      DIGlobalVariable Var(cast<GlobalVariable>(DIGV));
+      DIGlobalVariable Var(cast<MDNode>(DIGV));
 
-      Var.getDisplayName(DisplayName);
+      if (const char *D = Var.getDisplayName())
+        DisplayName = D;
       LineNo = Var.getLineNumber();
       Unit = Var.getCompileUnit();
       TypeD = Var.getType();
     } else {
       const DbgDeclareInst *DDI = findDbgDeclare(V);
       if (!DDI) return false;
-      DIVariable Var(cast<GlobalVariable>(DDI->getVariable()));
+      DIVariable Var(cast<MDNode>(DDI->getVariable()));
 
-      Var.getName(DisplayName);
+      if (const char *D = Var.getName())
+        DisplayName = D;
       LineNo = Var.getLineNumber();
       Unit = Var.getCompileUnit();
       TypeD = Var.getType();
     }
 
-    TypeD.getName(Type);
-    Unit.getFilename(File);
-    Unit.getDirectory(Dir);
+    if (const char *T = TypeD.getName())
+      Type = T;
+    if (const char *F = Unit.getFilename())
+      File = F;
+    if (const char *D = Unit.getDirectory())
+      Dir = D;
     return true;
   }
 
-  /// CollectDebugInfoAnchors - Collect debugging information anchors.
-  void CollectDebugInfoAnchors(Module &M,
-                               SmallVector<GlobalVariable *, 2> &CUs,
-                               SmallVector<GlobalVariable *, 4> &GVs,
-                               SmallVector<GlobalVariable *, 4> &SPs) {
-
-    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
-       GVI != E; GVI++) {
-      GlobalVariable *GV = GVI;
-      if (GV->hasName() && strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0
-          && GV->isConstant() && GV->hasInitializer()) {
-        DICompileUnit C(GV);
-        if (C.isNull() == false) {
-          CUs.push_back(GV);
-          continue;
-        }
-        DIGlobalVariable G(GV);
-        if (G.isNull() == false) {
-          GVs.push_back(GV);
-          continue;
-        }
-        DISubprogram S(GV);
-        if (S.isNull() == false) {
-          SPs.push_back(GV);
-          continue;
-        }
-      }
-    }
-  }
-
-  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug
   /// info intrinsic.
-  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, 
+  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLev);
   }
 
-  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(FSI.getSubprogram(), OptLev);
   }
 
-  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLev);
   }
 
-  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI,
                                  CodeGenOpt::Level OptLev) {
@@ -1081,14 +1298,14 @@ namespace llvm {
   }
 
 
-  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(DI.getVariable(), OptLev);
   }
 
-  /// ExtractDebugLocation - Extract debug location information 
+  /// ExtractDebugLocation - Extract debug location information
   /// from llvm.dbg.stoppoint intrinsic.
   DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI,
                                 DebugLocTracker &DebugLocInfo) {
@@ -1096,7 +1313,7 @@ namespace llvm {
     Value *Context = SPI.getContext();
 
     // If this location is already tracked then use it.
-    DebugLocTuple Tuple(cast<GlobalVariable>(Context), SPI.getLine(), 
+    DebugLocTuple Tuple(cast<MDNode>(Context), NULL, SPI.getLine(),
                         SPI.getColumn());
     DenseMap<DebugLocTuple, unsigned>::iterator II
       = DebugLocInfo.DebugIdMap.find(Tuple);
@@ -1107,23 +1324,48 @@ namespace llvm {
     unsigned Id = DebugLocInfo.DebugLocations.size();
     DebugLocInfo.DebugLocations.push_back(Tuple);
     DebugLocInfo.DebugIdMap[Tuple] = Id;
-    
+
+    return DebugLoc::get(Id);
+  }
+
+  /// ExtractDebugLocation - Extract debug location information
+  /// from DILocation.
+  DebugLoc ExtractDebugLocation(DILocation &Loc,
+                                DebugLocTracker &DebugLocInfo) {
+    DebugLoc DL;
+    MDNode *Context = Loc.getScope().getNode();
+    MDNode *InlinedLoc = NULL;
+    if (!Loc.getOrigLocation().isNull())
+      InlinedLoc = Loc.getOrigLocation().getNode();
+    // If this location is already tracked then use it.
+    DebugLocTuple Tuple(Context, InlinedLoc, Loc.getLineNumber(),
+                        Loc.getColumnNumber());
+    DenseMap<DebugLocTuple, unsigned>::iterator II
+      = DebugLocInfo.DebugIdMap.find(Tuple);
+    if (II != DebugLocInfo.DebugIdMap.end())
+      return DebugLoc::get(II->second);
+
+    // Add a new location entry.
+    unsigned Id = DebugLocInfo.DebugLocations.size();
+    DebugLocInfo.DebugLocations.push_back(Tuple);
+    DebugLocInfo.DebugIdMap[Tuple] = Id;
+
     return DebugLoc::get(Id);
   }
 
-  /// ExtractDebugLocation - Extract debug location information 
+  /// ExtractDebugLocation - Extract debug location information
   /// from llvm.dbg.func_start intrinsic.
   DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI,
                                 DebugLocTracker &DebugLocInfo) {
     DebugLoc DL;
     Value *SP = FSI.getSubprogram();
 
-    DISubprogram Subprogram(cast<GlobalVariable>(SP));
+    DISubprogram Subprogram(cast<MDNode>(SP));
     unsigned Line = Subprogram.getLineNumber();
     DICompileUnit CU(Subprogram.getCompileUnit());
 
     // If this location is already tracked then use it.
-    DebugLocTuple Tuple(CU.getGV(), Line, /* Column */ 0);
+    DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0);
     DenseMap<DebugLocTuple, unsigned>::iterator II
       = DebugLocInfo.DebugIdMap.find(Tuple);
     if (II != DebugLocInfo.DebugIdMap.end())
@@ -1133,13 +1375,13 @@ namespace llvm {
     unsigned Id = DebugLocInfo.DebugLocations.size();
     DebugLocInfo.DebugLocations.push_back(Tuple);
     DebugLocInfo.DebugIdMap[Tuple] = Id;
-    
+
     return DebugLoc::get(Id);
   }
 
   /// isInlinedFnStart - Return true if FSI is starting an inlined function.
   bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) {
-    DISubprogram Subprogram(cast<GlobalVariable>(FSI.getSubprogram()));
+    DISubprogram Subprogram(cast<MDNode>(FSI.getSubprogram()));
     if (Subprogram.describes(CurrentFn))
       return false;
 
@@ -1148,11 +1390,10 @@ namespace llvm {
 
   /// isInlinedFnEnd - Return true if REI is ending an inlined function.
   bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) {
-    DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    DISubprogram Subprogram(cast<MDNode>(REI.getContext()));
     if (Subprogram.isNull() || Subprogram.describes(CurrentFn))
       return false;
 
     return true;
   }
-
 }
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 3fb65265472d..1c9159dfbfcc 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -60,9 +60,11 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/System/Atomic.h"
@@ -84,7 +86,9 @@
 #define FULL_UNIVERSAL 0
 
 using namespace llvm;
+#ifndef NDEBUG
 STATISTIC(NumIters      , "Number of iterations to reach convergence");
+#endif
 STATISTIC(NumConstraints, "Number of constraints");
 STATISTIC(NumNodes      , "Number of nodes");
 STATISTIC(NumUnified    , "Number of variables unified");
@@ -507,7 +511,7 @@ namespace {
 #ifndef NDEBUG
         V->dump();
 #endif
-        assert(0 && "Value does not have a node in the points-to graph!");
+        llvm_unreachable("Value does not have a node in the points-to graph!");
       }
       return I->second;
     }
@@ -589,9 +593,12 @@ namespace {
     friend class InstVisitor<Andersens>;
     void visitReturnInst(ReturnInst &RI);
     void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
-    void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); }
+    void visitCallInst(CallInst &CI) { 
+      if (isMalloc(&CI)) visitAllocationInst(CI);
+      else visitCallSite(CallSite(&CI)); 
+    }
     void visitCallSite(CallSite CS);
-    void visitAllocationInst(AllocationInst &AI);
+    void visitAllocationInst(Instruction &I);
     void visitLoadInst(LoadInst &LI);
     void visitStoreInst(StoreInst &SI);
     void visitGetElementPtrInst(GetElementPtrInst &GEP);
@@ -606,7 +613,7 @@ namespace {
     //===------------------------------------------------------------------===//
     // Implement Analyize interface
     //
-    void print(std::ostream &O, const Module* M) const {
+    void print(raw_ostream &O, const Module*) const {
       PrintPointsToGraph();
     }
   };
@@ -614,7 +621,8 @@ namespace {
 
 char Andersens::ID = 0;
 static RegisterPass<Andersens>
-X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true);
+X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)",
+  false, true);
 static RegisterAnalysisGroup<AliasAnalysis> Y(X);
 
 // Initialize Timestamp Counter (static).
@@ -786,6 +794,8 @@ void Andersens::IdentifyObjects(Module &M) {
         ValueNodes[&*II] = NumObjects++;
         if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II))
           ObjectNodes[AI] = NumObjects++;
+        else if (isMalloc(&*II))
+          ObjectNodes[&*II] = NumObjects++;
       }
 
       // Calls to inline asm need to be added as well because the callee isn't
@@ -825,11 +835,11 @@ unsigned Andersens::getNodeForConstantPointer(Constant *C) {
     case Instruction::BitCast:
       return getNodeForConstantPointer(CE->getOperand(0));
     default:
-      cerr << "Constant Expr not yet handled: " << *CE << "\n";
-      assert(0);
+      errs() << "Constant Expr not yet handled: " << *CE << "\n";
+      llvm_unreachable(0);
     }
   } else {
-    assert(0 && "Unknown constant pointer!");
+    llvm_unreachable("Unknown constant pointer!");
   }
   return 0;
 }
@@ -852,11 +862,11 @@ unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
     case Instruction::BitCast:
       return getNodeForConstantPointerTarget(CE->getOperand(0));
     default:
-      cerr << "Constant Expr not yet handled: " << *CE << "\n";
-      assert(0);
+      errs() << "Constant Expr not yet handled: " << *CE << "\n";
+      llvm_unreachable(0);
     }
   } else {
-    assert(0 && "Unknown constant pointer!");
+    llvm_unreachable("Unknown constant pointer!");
   }
   return 0;
 }
@@ -996,7 +1006,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) {
   if (!isa<PointerType>(V->getType())) return true;
 
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (dyn_cast<LoadInst>(*UI)) {
+    if (isa<LoadInst>(*UI)) {
       return false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
       if (V == SI->getOperand(1)) {
@@ -1027,7 +1037,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) {
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true;  // Allow comparison against null.
-    } else if (dyn_cast<FreeInst>(*UI)) {
+    } else if (isa<FreeInst>(*UI)) {
       return false;
     } else {
       return true;
@@ -1060,7 +1070,7 @@ void Andersens::CollectConstraints(Module &M) {
     Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
                                      ObjectIndex));
 
-    if (I->hasInitializer()) {
+    if (I->hasDefinitiveInitializer()) {
       AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
     } else {
       // If it doesn't have an initializer (i.e. it's defined in another
@@ -1152,15 +1162,15 @@ void Andersens::visitInstruction(Instruction &I) {
     return;
   default:
     // Is this something we aren't handling yet?
-    cerr << "Unknown instruction: " << I;
-    abort();
+    errs() << "Unknown instruction: " << I;
+    llvm_unreachable(0);
   }
 }
 
-void Andersens::visitAllocationInst(AllocationInst &AI) {
-  unsigned ObjectIndex = getObject(&AI);
-  GraphNodes[ObjectIndex].setValue(&AI);
-  Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI),
+void Andersens::visitAllocationInst(Instruction &I) {
+  unsigned ObjectIndex = getObject(&I);
+  GraphNodes[ObjectIndex].setValue(&I);
+  Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I),
                                    ObjectIndex));
 }
 
@@ -1243,7 +1253,7 @@ void Andersens::visitSelectInst(SelectInst &SI) {
 }
 
 void Andersens::visitVAArg(VAArgInst &I) {
-  assert(0 && "vaarg not handled yet!");
+  llvm_unreachable("vaarg not handled yet!");
 }
 
 /// AddConstraintsForCall - Add constraints for a call with actual arguments
@@ -1395,12 +1405,6 @@ bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
   return Result;
 }
 
-void dumpToDOUT(SparseBitVector<> *bitmap) {
-#ifndef NDEBUG
-  dump(*bitmap, DOUT);
-#endif
-}
-
 
 /// Clump together address taken variables so that the points-to sets use up
 /// less space and can be operated on faster.
@@ -1424,7 +1428,7 @@ void Andersens::ClumpAddressTaken() {
     unsigned Pos = NewPos++;
     Translate[i] = Pos;
     NewGraphNodes.push_back(GraphNodes[i]);
-    DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+    DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n");
   }
 
   // I believe this ends up being faster than making two vectors and splicing
@@ -1434,7 +1438,7 @@ void Andersens::ClumpAddressTaken() {
       unsigned Pos = NewPos++;
       Translate[i] = Pos;
       NewGraphNodes.push_back(GraphNodes[i]);
-      DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+      DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n");
     }
   }
 
@@ -1443,7 +1447,7 @@ void Andersens::ClumpAddressTaken() {
       unsigned Pos = NewPos++;
       Translate[i] = Pos;
       NewGraphNodes.push_back(GraphNodes[i]);
-      DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+      DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n");
     }
   }
 
@@ -1515,7 +1519,7 @@ void Andersens::ClumpAddressTaken() {
 /// receive &D from E anyway.
 
 void Andersens::HVN() {
-  DOUT << "Beginning HVN\n";
+  DEBUG(errs() << "Beginning HVN\n");
   // Build a predecessor graph.  This is like our constraint graph with the
   // edges going in the opposite direction, and there are edges for all the
   // constraints, instead of just copy constraints.  We also build implicit
@@ -1586,7 +1590,7 @@ void Andersens::HVN() {
   Node2DFS.clear();
   Node2Deleted.clear();
   Node2Visited.clear();
-  DOUT << "Finished HVN\n";
+  DEBUG(errs() << "Finished HVN\n");
 
 }
 
@@ -1710,7 +1714,7 @@ void Andersens::HVNValNum(unsigned NodeIndex) {
 /// and is equivalent to value numbering the collapsed constraint graph
 /// including evaluating unions.
 void Andersens::HU() {
-  DOUT << "Beginning HU\n";
+  DEBUG(errs() << "Beginning HU\n");
   // Build a predecessor graph.  This is like our constraint graph with the
   // edges going in the opposite direction, and there are edges for all the
   // constraints, instead of just copy constraints.  We also build implicit
@@ -1790,7 +1794,7 @@ void Andersens::HU() {
   }
   // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller.
   Set2PEClass.clear();
-  DOUT << "Finished HU\n";
+  DEBUG(errs() << "Finished HU\n");
 }
 
 
@@ -1968,12 +1972,12 @@ void Andersens::RewriteConstraints() {
     // to anything.
     if (LHSLabel == 0) {
       DEBUG(PrintNode(&GraphNodes[LHSNode]));
-      DOUT << " is a non-pointer, ignoring constraint.\n";
+      DEBUG(errs() << " is a non-pointer, ignoring constraint.\n");
       continue;
     }
     if (RHSLabel == 0) {
       DEBUG(PrintNode(&GraphNodes[RHSNode]));
-      DOUT << " is a non-pointer, ignoring constraint.\n";
+      DEBUG(errs() << " is a non-pointer, ignoring constraint.\n");
       continue;
     }
     // This constraint may be useless, and it may become useless as we translate
@@ -2021,19 +2025,19 @@ void Andersens::PrintLabels() const {
     if (i < FirstRefNode) {
       PrintNode(&GraphNodes[i]);
     } else if (i < FirstAdrNode) {
-      DOUT << "REF(";
+      DEBUG(errs() << "REF(");
       PrintNode(&GraphNodes[i-FirstRefNode]);
-      DOUT <<")";
+      DEBUG(errs() <<")");
     } else {
-      DOUT << "ADR(";
+      DEBUG(errs() << "ADR(");
       PrintNode(&GraphNodes[i-FirstAdrNode]);
-      DOUT <<")";
+      DEBUG(errs() <<")");
     }
 
-    DOUT << " has pointer label " << GraphNodes[i].PointerEquivLabel
+    DEBUG(errs() << " has pointer label " << GraphNodes[i].PointerEquivLabel
          << " and SCC rep " << VSSCCRep[i]
          << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct")
-         << "\n";
+         << "\n");
   }
 }
 
@@ -2047,7 +2051,7 @@ void Andersens::PrintLabels() const {
 /// operation are stored in SDT and are later used in SolveContraints()
 /// and UniteNodes().
 void Andersens::HCD() {
-  DOUT << "Starting HCD.\n";
+  DEBUG(errs() << "Starting HCD.\n");
   HCDSCCRep.resize(GraphNodes.size());
 
   for (unsigned i = 0; i < GraphNodes.size(); ++i) {
@@ -2096,7 +2100,7 @@ void Andersens::HCD() {
   Node2Visited.clear();
   Node2Deleted.clear();
   HCDSCCRep.clear();
-  DOUT << "HCD complete.\n";
+  DEBUG(errs() << "HCD complete.\n");
 }
 
 // Component of HCD: 
@@ -2168,7 +2172,7 @@ void Andersens::Search(unsigned Node) {
 /// Optimize the constraints by performing offline variable substitution and
 /// other optimizations.
 void Andersens::OptimizeConstraints() {
-  DOUT << "Beginning constraint optimization\n";
+  DEBUG(errs() << "Beginning constraint optimization\n");
 
   SDTActive = false;
 
@@ -2252,7 +2256,7 @@ void Andersens::OptimizeConstraints() {
 
   // HCD complete.
 
-  DOUT << "Finished constraint optimization\n";
+  DEBUG(errs() << "Finished constraint optimization\n");
   FirstRefNode = 0;
   FirstAdrNode = 0;
 }
@@ -2260,7 +2264,7 @@ void Andersens::OptimizeConstraints() {
 /// Unite pointer but not location equivalent variables, now that the constraint
 /// graph is built.
 void Andersens::UnitePointerEquivalences() {
-  DOUT << "Uniting remaining pointer equivalences\n";
+  DEBUG(errs() << "Uniting remaining pointer equivalences\n");
   for (unsigned i = 0; i < GraphNodes.size(); ++i) {
     if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) {
       unsigned Label = GraphNodes[i].PointerEquivLabel;
@@ -2269,7 +2273,7 @@ void Andersens::UnitePointerEquivalences() {
         UniteNodes(i, PENLEClass2Node[Label]);
     }
   }
-  DOUT << "Finished remaining pointer equivalences\n";
+  DEBUG(errs() << "Finished remaining pointer equivalences\n");
   PENLEClass2Node.clear();
 }
 
@@ -2425,7 +2429,7 @@ void Andersens::SolveConstraints() {
   std::vector<unsigned int> RSV;
 #endif
   while( !CurrWL->empty() ) {
-    DOUT << "Starting iteration #" << ++NumIters << "\n";
+    DEBUG(errs() << "Starting iteration #" << ++NumIters << "\n");
 
     Node* CurrNode;
     unsigned CurrNodeIndex;
@@ -2728,11 +2732,11 @@ unsigned Andersens::UniteNodes(unsigned First, unsigned Second,
   SecondNode->OldPointsTo = NULL;
 
   NumUnified++;
-  DOUT << "Unified Node ";
+  DEBUG(errs() << "Unified Node ");
   DEBUG(PrintNode(FirstNode));
-  DOUT << " and Node ";
+  DEBUG(errs() << " and Node ");
   DEBUG(PrintNode(SecondNode));
-  DOUT << "\n";
+  DEBUG(errs() << "\n");
 
   if (SDTActive)
     if (SDT[Second] >= 0) {
@@ -2777,17 +2781,17 @@ unsigned Andersens::FindNode(unsigned NodeIndex) const {
 
 void Andersens::PrintNode(const Node *N) const {
   if (N == &GraphNodes[UniversalSet]) {
-    cerr << "<universal>";
+    errs() << "<universal>";
     return;
   } else if (N == &GraphNodes[NullPtr]) {
-    cerr << "<nullptr>";
+    errs() << "<nullptr>";
     return;
   } else if (N == &GraphNodes[NullObject]) {
-    cerr << "<null>";
+    errs() << "<null>";
     return;
   }
   if (!N->getValue()) {
-    cerr << "artificial" << (intptr_t) N;
+    errs() << "artificial" << (intptr_t) N;
     return;
   }
 
@@ -2796,85 +2800,85 @@ void Andersens::PrintNode(const Node *N) const {
   if (Function *F = dyn_cast<Function>(V)) {
     if (isa<PointerType>(F->getFunctionType()->getReturnType()) &&
         N == &GraphNodes[getReturnNode(F)]) {
-      cerr << F->getName() << ":retval";
+      errs() << F->getName() << ":retval";
       return;
     } else if (F->getFunctionType()->isVarArg() &&
                N == &GraphNodes[getVarargNode(F)]) {
-      cerr << F->getName() << ":vararg";
+      errs() << F->getName() << ":vararg";
       return;
     }
   }
 
   if (Instruction *I = dyn_cast<Instruction>(V))
-    cerr << I->getParent()->getParent()->getName() << ":";
+    errs() << I->getParent()->getParent()->getName() << ":";
   else if (Argument *Arg = dyn_cast<Argument>(V))
-    cerr << Arg->getParent()->getName() << ":";
+    errs() << Arg->getParent()->getName() << ":";
 
   if (V->hasName())
-    cerr << V->getName();
+    errs() << V->getName();
   else
-    cerr << "(unnamed)";
+    errs() << "(unnamed)";
 
-  if (isa<GlobalValue>(V) || isa<AllocationInst>(V))
+  if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isMalloc(V))
     if (N == &GraphNodes[getObject(V)])
-      cerr << "<mem>";
+      errs() << "<mem>";
 }
 void Andersens::PrintConstraint(const Constraint &C) const {
   if (C.Type == Constraint::Store) {
-    cerr << "*";
+    errs() << "*";
     if (C.Offset != 0)
-      cerr << "(";
+      errs() << "(";
   }
   PrintNode(&GraphNodes[C.Dest]);
   if (C.Type == Constraint::Store && C.Offset != 0)
-    cerr << " + " << C.Offset << ")";
-  cerr << " = ";
+    errs() << " + " << C.Offset << ")";
+  errs() << " = ";
   if (C.Type == Constraint::Load) {
-    cerr << "*";
+    errs() << "*";
     if (C.Offset != 0)
-      cerr << "(";
+      errs() << "(";
   }
   else if (C.Type == Constraint::AddressOf)
-    cerr << "&";
+    errs() << "&";
   PrintNode(&GraphNodes[C.Src]);
   if (C.Offset != 0 && C.Type != Constraint::Store)
-    cerr << " + " << C.Offset;
+    errs() << " + " << C.Offset;
   if (C.Type == Constraint::Load && C.Offset != 0)
-    cerr << ")";
-  cerr << "\n";
+    errs() << ")";
+  errs() << "\n";
 }
 
 void Andersens::PrintConstraints() const {
-  cerr << "Constraints:\n";
+  errs() << "Constraints:\n";
 
   for (unsigned i = 0, e = Constraints.size(); i != e; ++i)
     PrintConstraint(Constraints[i]);
 }
 
 void Andersens::PrintPointsToGraph() const {
-  cerr << "Points-to graph:\n";
+  errs() << "Points-to graph:\n";
   for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) {
     const Node *N = &GraphNodes[i];
     if (FindNode(i) != i) {
       PrintNode(N);
-      cerr << "\t--> same as ";
+      errs() << "\t--> same as ";
       PrintNode(&GraphNodes[FindNode(i)]);
-      cerr << "\n";
+      errs() << "\n";
     } else {
-      cerr << "[" << (N->PointsTo->count()) << "] ";
+      errs() << "[" << (N->PointsTo->count()) << "] ";
       PrintNode(N);
-      cerr << "\t--> ";
+      errs() << "\t--> ";
 
       bool first = true;
       for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
            bi != N->PointsTo->end();
            ++bi) {
         if (!first)
-          cerr << ", ";
+          errs() << ", ";
         PrintNode(&GraphNodes[*bi]);
         first = false;
       }
-      cerr << "\n";
+      errs() << "\n";
     }
   }
 }
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 6dabcdb94bf1..e2b288d1ba96 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -18,8 +18,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
-#include <ostream>
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -54,7 +53,7 @@ public:
     CallsExternalNode = new CallGraphNode(0);
     Root = 0;
   
-    // Add every function to the call graph...
+    // Add every function to the call graph.
     for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
       addToCallGraph(I);
   
@@ -68,30 +67,21 @@ public:
     AU.setPreservesAll();
   }
 
-  void print(std::ostream *o, const Module *M) const {
-    if (o) print(*o, M);
-  }
-
-  virtual void print(std::ostream &o, const Module *M) const {
-    o << "CallGraph Root is: ";
+  virtual void print(raw_ostream &OS, const Module *) const {
+    OS << "CallGraph Root is: ";
     if (Function *F = getRoot()->getFunction())
-      o << F->getName() << "\n";
-    else
-      o << "<<null function: 0x" << getRoot() << ">>\n";
+      OS << F->getName() << "\n";
+    else {
+      OS << "<<null function: 0x" << getRoot() << ">>\n";
+    }
     
-    CallGraph::print(o, M);
+    CallGraph::print(OS, 0);
   }
 
   virtual void releaseMemory() {
     destroy();
   }
   
-  /// dump - Print out this call graph.
-  ///
-  inline void dump() const {
-    print(cerr, Mod);
-  }
-
   CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
   CallGraphNode* getCallsExternalNode()   const { return CallsExternalNode; }
 
@@ -179,21 +169,20 @@ void CallGraph::initialize(Module &M) {
 }
 
 void CallGraph::destroy() {
-  if (!FunctionMap.empty()) {
-    for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-        I != E; ++I)
-      delete I->second;
-    FunctionMap.clear();
-  }
+  if (FunctionMap.empty()) return;
+  
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+      I != E; ++I)
+    delete I->second;
+  FunctionMap.clear();
 }
 
-void CallGraph::print(std::ostream &OS, const Module *M) const {
+void CallGraph::print(raw_ostream &OS, Module*) const {
   for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
     I->second->print(OS);
 }
-
 void CallGraph::dump() const {
-  print(cerr, 0);
+  print(errs(), 0);
 }
 
 //===----------------------------------------------------------------------===//
@@ -207,7 +196,7 @@ void CallGraph::dump() const {
 // is to dropAllReferences before calling this.
 //
 Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
-  assert(CGN->CalledFunctions.empty() && "Cannot remove function from call "
+  assert(CGN->empty() && "Cannot remove function from call "
          "graph if it references other functions!");
   Function *F = CGN->getFunction(); // Get the function for the call graph node
   delete CGN;                       // Delete the call graph node for this func
@@ -217,20 +206,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
   return F;
 }
 
-// changeFunction - This method changes the function associated with this
-// CallGraphNode, for use by transformations that need to change the prototype
-// of a Function (thus they must create a new Function and move the old code
-// over).
-void CallGraph::changeFunction(Function *OldF, Function *NewF) {
-  iterator I = FunctionMap.find(OldF);
-  CallGraphNode *&New = FunctionMap[NewF];
-  assert(I != FunctionMap.end() && I->second && !New &&
-         "OldF didn't exist in CG or NewF already does!");
-  New = I->second;
-  New->F = NewF;
-  FunctionMap.erase(I);
-}
-
 // getOrInsertFunction - This method is identical to calling operator[], but
 // it will insert a new CallGraphNode for the specified function if one does
 // not already exist.
@@ -242,11 +217,13 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
   return CGN = new CallGraphNode(const_cast<Function*>(F));
 }
 
-void CallGraphNode::print(std::ostream &OS) const {
+void CallGraphNode::print(raw_ostream &OS) const {
   if (Function *F = getFunction())
-    OS << "Call graph node for function: '" << F->getName() <<"'\n";
+    OS << "Call graph node for function: '" << F->getName() << "'";
   else
-    OS << "Call graph node <<null function: 0x" << this << ">>:\n";
+    OS << "Call graph node <<null function>>";
+  
+  OS << "<<0x" << this << ">>  #uses=" << getNumReferences() << '\n';
 
   for (const_iterator I = begin(), E = end(); I != E; ++I)
     if (Function *FI = I->second->getFunction())
@@ -256,7 +233,7 @@ void CallGraphNode::print(std::ostream &OS) const {
   OS << "\n";
 }
 
-void CallGraphNode::dump() const { print(cerr); }
+void CallGraphNode::dump() const { print(errs()); }
 
 /// removeCallEdgeFor - This method removes the edge in the node for the
 /// specified call site.  Note that this method takes linear time, so it
@@ -264,8 +241,10 @@ void CallGraphNode::dump() const { print(cerr); }
 void CallGraphNode::removeCallEdgeFor(CallSite CS) {
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
-    if (I->first == CS) {
-      CalledFunctions.erase(I);
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
       return;
     }
   }
@@ -278,6 +257,7 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) {
 void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
   for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
     if (CalledFunctions[i].second == Callee) {
+      Callee->DropRef();
       CalledFunctions[i] = CalledFunctions.back();
       CalledFunctions.pop_back();
       --i; --e;
@@ -290,21 +270,27 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
     CallRecord &CR = *I;
-    if (CR.second == Callee && !CR.first.getInstruction()) {
-      CalledFunctions.erase(I);
+    if (CR.second == Callee && CR.first == 0) {
+      Callee->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
       return;
     }
   }
 }
 
-/// replaceCallSite - Make the edge in the node for Old CallSite be for
-/// New CallSite instead.  Note that this method takes linear time, so it
-/// should be used sparingly.
-void CallGraphNode::replaceCallSite(CallSite Old, CallSite New) {
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one.  Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+                                    CallSite NewCS, CallGraphNode *NewNode){
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
-    assert(I != CalledFunctions.end() && "Cannot find callsite to replace!");
-    if (I->first == Old) {
-      I->first = New;
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      I->first = NewCS.getInstruction();
+      I->second = NewNode;
+      NewNode->AddRef();
       return;
     }
   }
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 3880d0a10bb6..a96a5c591f83 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -15,22 +15,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "cgscc-passmgr"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/PassManagers.h"
 #include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
 // CGPassManager
 //
-/// CGPassManager manages FPPassManagers and CalLGraphSCCPasses.
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
 
 namespace {
 
 class CGPassManager : public ModulePass, public PMDataManager {
-
 public:
   static char ID;
   explicit CGPassManager(int Depth) 
@@ -56,7 +59,7 @@ public:
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) {
-    llvm::cerr << std::string(Offset*2, ' ') << "Call Graph SCC Pass Manager\n";
+    errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       Pass *P = getContainedPass(Index);
       P->dumpPassStructure(Offset + 1);
@@ -65,56 +68,275 @@ public:
   }
 
   Pass *getContainedPass(unsigned N) {
-    assert ( N < PassVector.size() && "Pass number out of range!");
-    Pass *FP = static_cast<Pass *>(PassVector[N]);
-    return FP;
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<Pass *>(PassVector[N]);
   }
 
   virtual PassManagerType getPassManagerType() const { 
     return PMT_CallGraphPassManager; 
   }
+  
+private:
+  bool RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate);
+  void RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, CallGraph &CG,
+                        bool IsCheckingMode);
 };
 
-}
+} // end anonymous namespace.
 
 char CGPassManager::ID = 0;
+
+bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate) {
+  bool Changed = false;
+  if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass*>(P)) {
+    if (!CallGraphUpToDate) {
+      RefreshCallGraph(CurSCC, CG, false);
+      CallGraphUpToDate = true;
+    }
+
+    Timer *T = StartPassTimer(CGSP);
+    Changed = CGSP->runOnSCC(CurSCC);
+    StopPassTimer(CGSP, T);
+    
+    // After the CGSCCPass is done, when assertions are enabled, use
+    // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+    if (Changed)
+      RefreshCallGraph(CurSCC, CG, true);
+#endif
+    
+    return Changed;
+  }
+  
+  FPPassManager *FPP = dynamic_cast<FPPassManager *>(P);
+  assert(FPP && "Invalid CGPassManager member");
+  
+  // Run pass P on all functions in the current SCC.
+  for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
+    if (Function *F = CurSCC[i]->getFunction()) {
+      dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+      Timer *T = StartPassTimer(FPP);
+      Changed |= FPP->runOnFunction(*F);
+      StopPassTimer(FPP, T);
+    }
+  }
+  
+  // The function pass(es) modified the IR, they may have clobbered the
+  // callgraph.
+  if (Changed && CallGraphUpToDate) {
+    DEBUG(errs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+                 << P->getPassName() << '\n');
+    CallGraphUpToDate = false;
+  }
+  return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it.  This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
+                                     CallGraph &CG, bool CheckingMode) {
+  DenseMap<Value*, CallGraphNode*> CallSites;
+  
+  DEBUG(errs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+               << " nodes:\n";
+        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
+          CurSCC[i]->dump();
+        );
+
+  bool MadeChange = false;
+  
+  // Scan all functions in the SCC.
+  for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) {
+    CallGraphNode *CGN = CurSCC[sccidx];
+    Function *F = CGN->getFunction();
+    if (F == 0 || F->isDeclaration()) continue;
+    
+    // Walk the function body looking for call sites.  Sync up the call sites in
+    // CGN with those actually in the function.
+    
+    // Get the set of call sites currently in the function.
+    for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+      // If this call site is null, then the function pass deleted the call
+      // entirely and the WeakVH nulled it out.  
+      if (I->first == 0 ||
+          // If we've already seen this call site, then the FunctionPass RAUW'd
+          // one call with another, which resulted in two "uses" in the edge
+          // list of the same call.
+          CallSites.count(I->first) ||
+
+          // If the call edge is not from a call or invoke, then the function
+          // pass RAUW'd a call with another value.  This can happen when
+          // constant folding happens of well known functions etc.
+          CallSite::get(I->first).getInstruction() == 0) {
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+        
+        // Just remove the edge from the set of callees, keep track of whether
+        // I points to the last element of the vector.
+        bool WasLast = I + 1 == E;
+        CGN->removeCallEdge(I);
+        
+        // If I pointed to the last element of the vector, we have to bail out:
+        // iterator checking rejects comparisons of the resultant pointer with
+        // end.
+        if (WasLast)
+          break;
+        E = CGN->end();
+        continue;
+      }
+      
+      assert(!CallSites.count(I->first) &&
+             "Call site occurs in node multiple times");
+      CallSites.insert(std::make_pair(I->first, I->second));
+      ++I;
+    }
+    
+    // Loop over all of the instructions in the function, getting the callsites.
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS = CallSite::get(I);
+        if (!CS.getInstruction() || isa<DbgInfoIntrinsic>(I)) continue;
+        
+        // If this call site already existed in the callgraph, just verify it
+        // matches up to expectations and remove it from CallSites.
+        DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+          CallSites.find(CS.getInstruction());
+        if (ExistingIt != CallSites.end()) {
+          CallGraphNode *ExistingNode = ExistingIt->second;
+
+          // Remove from CallSites since we have now seen it.
+          CallSites.erase(ExistingIt);
+          
+          // Verify that the callee is right.
+          if (ExistingNode->getFunction() == CS.getCalledFunction())
+            continue;
+          
+          // If we are in checking mode, we are not allowed to actually mutate
+          // the callgraph.  If this is a case where we can infer that the
+          // callgraph is less precise than it could be (e.g. an indirect call
+          // site could be turned direct), don't reject it in checking mode, and
+          // don't tweak it to be more precise.
+          if (CheckingMode && CS.getCalledFunction() &&
+              ExistingNode->getFunction() == 0)
+            continue;
+          
+          assert(!CheckingMode &&
+                 "CallGraphSCCPass did not update the CallGraph correctly!");
+          
+          // If not, we either went from a direct call to indirect, indirect to
+          // direct, or direct to different direct.
+          CallGraphNode *CalleeNode;
+          if (Function *Callee = CS.getCalledFunction())
+            CalleeNode = CG.getOrInsertFunction(Callee);
+          else
+            CalleeNode = CG.getCallsExternalNode();
+
+          // Update the edge target in CGN.
+          for (CallGraphNode::iterator I = CGN->begin(); ; ++I) {
+            assert(I != CGN->end() && "Didn't find call entry");
+            if (I->first == CS.getInstruction()) {
+              I->second = CalleeNode;
+              break;
+            }
+          }
+          MadeChange = true;
+          continue;
+        }
+        
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+
+        // If the call site didn't exist in the CGN yet, add it.  We assume that
+        // newly introduced call sites won't be indirect.  This could be fixed
+        // in the future.
+        CallGraphNode *CalleeNode;
+        if (Function *Callee = CS.getCalledFunction())
+          CalleeNode = CG.getOrInsertFunction(Callee);
+        else
+          CalleeNode = CG.getCallsExternalNode();
+        
+        CGN->addCalledFunction(CS, CalleeNode);
+        MadeChange = true;
+      }
+    
+    // After scanning this function, if we still have entries in callsites, then
+    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // this happens.
+    assert(CallSites.empty() && "Dangling pointers found in call sites map");
+    
+    // Periodically do an explicit clear to remove tombstones when processing
+    // large scc's.
+    if ((sccidx & 15) == 0)
+      CallSites.clear();
+  }
+
+  DEBUG(if (MadeChange) {
+          errs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+          for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
+            CurSCC[i]->dump();
+         } else {
+           errs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+         }
+        );
+}
+
 /// run - Execute all of the passes scheduled for execution.  Keep track of
 /// whether any of the passes modifies the module, and if so, return true.
 bool CGPassManager::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraph>();
   bool Changed = doInitialization(CG);
 
-  // Walk SCC
-  for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG);
-       I != E; ++I) {
-
-    // Run all passes on current SCC
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-      Pass *P = getContainedPass(Index);
-
-      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, "");
+  std::vector<CallGraphNode*> CurSCC;
+  
+  // Walk the callgraph in bottom-up SCC order.
+  for (scc_iterator<CallGraph*> CGI = scc_begin(&CG), E = scc_end(&CG);
+       CGI != E;) {
+    // Copy the current SCC and increment past it so that the pass can hack
+    // on the SCC if it wants to without invalidating our iterator.
+    CurSCC = *CGI;
+    ++CGI;
+    
+    
+    // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+    // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+    // CallGraphSCC Passes and other random function passes.  Because other
+    // random function passes are not CallGraph aware, they may clobber the
+    // call graph by introducing new calls or deleting other ones.  This flag
+    // is set to false when we run a function pass so that we know to clean up
+    // the callgraph when we need to run a CGSCCPass again.
+    bool CallGraphUpToDate = true;
+    
+    // Run all passes on current SCC.
+    for (unsigned PassNo = 0, e = getNumContainedPasses();
+         PassNo != e; ++PassNo) {
+      Pass *P = getContainedPass(PassNo);
+
+      // If we're in -debug-pass=Executions mode, construct the SCC node list,
+      // otherwise avoid constructing this string as it is expensive.
+      if (isPassDebuggingExecutionsOrMore()) {
+        std::string Functions;
+#ifndef NDEBUG
+        raw_string_ostream OS(Functions);
+        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
+          if (i) OS << ", ";
+          CurSCC[i]->print(OS);
+        }
+        OS.flush();
+#endif
+        dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+      }
       dumpRequiredSet(P);
 
       initializeAnalysisImpl(P);
 
-      StartPassTimer(P);
-      if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass *>(P))
-        Changed |= CGSP->runOnSCC(*I);   // TODO : What if CG is changed ?
-      else {
-        FPPassManager *FPP = dynamic_cast<FPPassManager *>(P);
-        assert (FPP && "Invalid CGPassManager member");
-
-        // Run pass P on all functions current SCC
-        std::vector<CallGraphNode*> &SCC = *I;
-        for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-          Function *F = SCC[i]->getFunction();
-          if (F) {
-            dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getNameStart());
-            Changed |= FPP->runOnFunction(*F);
-          }
-        }
-      }
-      StopPassTimer(P);
+      // Actually run this pass on the current SCC.
+      Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate);
 
       if (Changed)
         dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
@@ -125,6 +347,11 @@ bool CGPassManager::runOnModule(Module &M) {
       recordAvailableAnalysis(P);
       removeDeadPasses(P, "", ON_CG_MSG);
     }
+    
+    // If the callgraph was left out of date (because the last pass run was a
+    // functionpass), refresh it before we move on to the next SCC.
+    if (!CallGraphUpToDate)
+      RefreshCallGraph(CurSCC, CG, false);
   }
   Changed |= doFinalization(CG);
   return Changed;
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 920ee374555f..c4fb0b9a4e3d 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -92,13 +92,12 @@ bool FindUsedTypes::runOnModule(Module &m) {
 // passed in, then the types are printed symbolically if possible, using the
 // symbol table from the module.
 //
-void FindUsedTypes::print(std::ostream &OS, const Module *M) const {
-  raw_os_ostream RO(OS);
-  RO << "Types in use by this module:\n";
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+  OS << "Types in use by this module:\n";
   for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
        E = UsedTypes.end(); I != E; ++I) {
-    RO << "   ";
-    WriteTypeSymbolic(RO, *I, M);
-    RO << '\n';
+    OS << "   ";
+    WriteTypeSymbolic(OS, *I, M);
+    OS << '\n';
   }
 }
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 2e9884aa01b4..f5c110841292 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -23,6 +23,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstIterator.h"
@@ -236,6 +237,9 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
       }
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
       if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+      if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+        return true;
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
@@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
       // Check the value being stored.
       Value *Ptr = SI->getOperand(0)->getUnderlyingObject();
 
-      if (isa<MallocInst>(Ptr)) {
+      if (isa<MallocInst>(Ptr) || isMalloc(Ptr)) {
         // Okay, easy case.
       } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
         Function *F = CI->getCalledFunction();
@@ -435,7 +439,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
           if (cast<StoreInst>(*II).isVolatile())
             // Treat volatile stores as reading memory somewhere.
             FunctionEffect |= Ref;
-        } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II)) {
+        } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II) ||
+                   isMalloc(&cast<Instruction>(*II))) {
           FunctionEffect |= ModRef;
         }
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index caeb14bef373..543e017fc9dd 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/ADT/STLExtras.h"
@@ -39,7 +38,7 @@ Pass *llvm::createIVUsersPass() {
 /// containsAddRecFromDifferentLoop - Determine whether expression S involves a
 /// subexpression that is an AddRec from a loop other than L.  An outer loop
 /// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
+static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
   // This is very common, put it first.
   if (isa<SCEVConstant>(S))
     return false;
@@ -54,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
         return false;
     }
     return true;
@@ -80,10 +79,10 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
 /// a mix of loop invariant and loop variant expressions.  The start cannot,
 /// however, contain an AddRec from a different loop, unless that loop is an
 /// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop,
-                                  const SCEV* &Start, const SCEV* &Stride,
+static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
+                                  const SCEV *&Start, const SCEV *&Stride,
                                   ScalarEvolution *SE, DominatorTree *DT) {
-  const SCEV* TheAddRec = Start;   // Initialize to zero.
+  const SCEV *TheAddRec = Start;   // Initialize to zero.
 
   // If the outer level is an AddExpr, the operands are all start values except
   // for a nested AddRecExpr.
@@ -109,9 +108,9 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop,
 
   // Use getSCEVAtScope to attempt to simplify other loops out of
   // the picture.
-  const SCEV* AddRecStart = AddRec->getStart();
+  const SCEV *AddRecStart = AddRec->getStart();
   AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-  const SCEV* AddRecStride = AddRec->getStepRecurrence(*SE);
+  const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE);
 
   // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
   // than an outer loop of the current loop, reject it.  LSR has no concept of
@@ -122,15 +121,15 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop,
 
   Start = SE->getAddExpr(Start, AddRecStart);
 
-  // If stride is an instruction, make sure it dominates the loop preheader.
+  // If stride is an instruction, make sure it properly dominates the header.
   // Otherwise we could end up with a use before def situation.
   if (!isa<SCEVConstant>(AddRecStride)) {
-    BasicBlock *Preheader = L->getLoopPreheader();
-    if (!AddRecStride->dominates(Preheader, DT))
+    BasicBlock *Header = L->getHeader();
+    if (!AddRecStride->properlyDominates(Header, DT))
       return false;
 
-    DOUT << "[" << L->getHeader()->getName()
-         << "] Variable stride: " << *AddRec << "\n";
+    DEBUG(errs() << "[" << L->getHeader()->getName()
+                 << "] Variable stride: " << *AddRec << "\n");
   }
 
   Stride = AddRecStride;
@@ -196,13 +195,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     return true;    // Instruction already handled.
 
   // Get the symbolic expression for this instruction.
-  const SCEV* ISE = SE->getSCEV(I);
+  const SCEV *ISE = SE->getSCEV(I);
   if (isa<SCEVCouldNotCompute>(ISE)) return false;
 
   // Get the start and stride for this expression.
   Loop *UseLoop = LI->getLoopFor(I->getParent());
-  const SCEV* Start = SE->getIntegerSCEV(0, ISE->getType());
-  const SCEV* Stride = Start;
+  const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType());
+  const SCEV *Stride = Start;
 
   if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
     return false;  // Non-reducible symbolic expression, bail out.
@@ -228,14 +227,14 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
           !AddUsersIfInteresting(User)) {
-        DOUT << "FOUND USER in other loop: " << *User
-             << "   OF SCEV: " << *ISE << "\n";
+        DEBUG(errs() << "FOUND USER in other loop: " << *User << '\n'
+                     << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
     } else if (Processed.count(User) ||
                !AddUsersIfInteresting(User)) {
-      DOUT << "FOUND USER: " << *User
-           << "   OF SCEV: " << *ISE << "\n";
+      DEBUG(errs() << "FOUND USER: " << *User << '\n'
+                   << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
     }
 
@@ -254,10 +253,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
       if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
         // The value used will be incremented by the stride more than we are
         // expecting, so subtract this off.
-        const SCEV* NewStart = SE->getMinusSCEV(Start, Stride);
+        const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
         StrideUses->addUser(NewStart, User, I);
         StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
-        DOUT << "   USING POSTINC SCEV, START=" << *NewStart<< "\n";
+        DEBUG(errs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
       } else {
         StrideUses->addUser(Start, User, I);
       }
@@ -295,9 +294,9 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 
 /// getReplacementExpr - Return a SCEV expression which computes the
 /// value of the OperandValToReplace of the given IVStrideUse.
-const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const {
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // Start with zero.
-  const SCEV* RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
   // Create the basic add recurrence.
   RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
   // Add the offset in a separate step, because it may be loop-variant.
@@ -308,7 +307,7 @@ const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const {
     RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
   // Evaluate the expression out of the loop, if possible.
   if (!L->contains(U.getUser()->getParent())) {
-    const SCEV* ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
+    const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
     if (ExitVal->isLoopInvariant(L))
       RetVal = ExitVal;
   }
@@ -325,7 +324,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
   OS << ":\n";
 
   for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
-    std::map<const SCEV*, IVUsersOfOneStride*>::const_iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI =
       IVUsesByStride.find(StrideOrder[Stride]);
     assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
     OS << "  Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
@@ -340,15 +339,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
         OS << " (post-inc)";
       OS << " in ";
       UI->getUser()->print(OS);
+      OS << '\n';
     }
   }
 }
 
-void IVUsers::print(std::ostream &o, const Module *M) const {
-  raw_os_ostream OS(o);
-  print(OS, M);
-}
-
 void IVUsers::dump() const {
   print(errs());
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
new file mode 100644
index 000000000000..3b0d2c90aeb5
--- /dev/null
+++ b/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,338 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned InlineCostAnalyzer::FunctionInfo::
+         CountCodeReductionForConstant(Value *V) {
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+    if (isa<BranchInst>(*UI))
+      Reduction += 40;          // Eliminating a conditional branch is a big win
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI))
+      // Eliminating a switch is a big win, proportional to the number of edges
+      // deleted.
+      Reduction += (SI->getNumSuccessors()-1) * 40;
+    else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+      // Turning an indirect call into a direct call is a BIG win
+      Reduction += CI->getCalledValue() == V ? 500 : 0;
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+      // Turning an indirect call into a direct call is a BIG win
+      Reduction += II->getCalledValue() == V ? 500 : 0;
+    } else {
+      // Figure out if this instruction will be removed due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(**UI);
+      
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocationInst>(Inst)) 
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant) {
+        // We will get to remove this instruction...
+        Reduction += 7;
+
+        // And any other instructions that use it which become constants
+        // themselves.
+        Reduction += CountCodeReductionForConstant(&Inst);
+      }
+    }
+
+  return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned InlineCostAnalyzer::FunctionInfo::
+         CountCodeReductionForAlloca(Value *V) {
+  if (!isa<PointerType>(V->getType())) return 0;  // Not a pointer
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *I = cast<Instruction>(*UI);
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      Reduction += 10;
+    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      // If the GEP has variable indices, we won't be able to do much with it.
+      if (!GEP->hasAllConstantIndices())
+        Reduction += CountCodeReductionForAlloca(GEP)+15;
+    } else {
+      // If there is some other strange instruction, we're not going to be able
+      // to do much if we inline this.
+      return 0;
+    }
+  }
+
+  return Reduction;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+  ++NumBlocks;
+
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      if (isa<DbgInfoIntrinsic>(II))
+        continue;  // Debug intrinsics don't count as size.
+      
+      CallSite CS = CallSite::get(const_cast<Instruction*>(&*II));
+      
+      // If this function contains a call to setjmp or _setjmp, never inline
+      // it.  This is a hack because we depend on the user marking their local
+      // variables as volatile if they are live across a setjmp call, and they
+      // probably won't do this in callers.
+      if (Function *F = CS.getCalledFunction())
+        if (F->isDeclaration() && 
+            (F->getName() == "setjmp" || F->getName() == "_setjmp"))
+          NeverInline = true;
+
+      // Calls often compile into many machine instructions.  Bump up their
+      // cost to reflect this.
+      if (!isa<IntrinsicInst>(II))
+        NumInsts += InlineConstants::CallPenalty;
+    }
+    
+    // These, too, are calls.
+    if (isa<MallocInst>(II) || isa<FreeInst>(II))
+      NumInsts += InlineConstants::CallPenalty;
+
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
+      ++NumVectorInsts; 
+    
+    // Noop casts, including ptr <-> int,  don't count.
+    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
+          isa<PtrToIntInst>(CI))
+        continue;
+    } else if (const GetElementPtrInst *GEPI =
+               dyn_cast<GetElementPtrInst>(II)) {
+      // If a GEP has all constant indices, it will probably be folded with
+      // a load/store.
+      if (GEPI->hasAllConstantIndices())
+        continue;
+    }
+
+    if (isa<ReturnInst>(II))
+      ++NumRets;
+    
+    ++NumInsts;
+  }
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void CodeMetrics::analyzeFunction(Function *F) {
+  // Look at the size of the callee.
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    analyzeBasicBlock(&*BB);
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+  Metrics.analyzeFunction(F);
+
+  // A function with exactly one return has it removed during the inlining
+  // process (see InlineFunction), so don't count it.
+  // FIXME: This knowledge should really be encoded outside of FunctionInfo.
+  if (Metrics.NumRets==1)
+    --Metrics.NumInsts;
+
+  // Check out all of the arguments to the function, figuring out how much
+  // code can be eliminated if one of the arguments is a constant.
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I),
+                                      CountCodeReductionForAlloca(I)));
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               SmallPtrSet<const Function *, 16> &NeverInline) {
+  Instruction *TheCall = CS.getInstruction();
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = TheCall->getParent()->getParent();
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee))
+    return llvm::InlineCost::getNever();
+
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative.
+  //
+  int InlineCost = 0;
+  
+  // If there is only one call of the function, and it has internal linkage,
+  // make it almost guaranteed to be inlined.
+  //
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse())
+    InlineCost += InlineConstants::LastCallToStaticBonus;
+  
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    InlineCost += InlineConstants::ColdccPenalty;
+  
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn.  As such,
+  // there is little point in inlining this.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+      InlineCost += InlineConstants::NoreturnPenalty;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+    InlineCost += InlineConstants::NoreturnPenalty;
+  
+  // Get information about the callee...
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.Metrics.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  // If we should never inline this, return a huge cost.
+  if (CalleeFI.Metrics.NeverInline)
+    return InlineCost::getNever();
+
+  // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
+  // could move this up and avoid computing the FunctionInfo for
+  // things we are going to just return always inline for. This
+  // requires handling setjmp somewhere else, however.
+  if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+    return InlineCost::getAlways();
+    
+  if (CalleeFI.Metrics.usesDynamicAlloca) {
+    // Get infomation about the caller...
+    FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+    // If we haven't calculated this information yet, do so now.
+    if (CallerFI.Metrics.NumBlocks == 0)
+      CallerFI.analyzeFunction(Caller);
+
+    // Don't inline a callee with dynamic alloca into a caller without them.
+    // Functions containing dynamic alloca's are inefficient in various ways;
+    // don't create more inefficiency.
+    if (!CallerFI.Metrics.usesDynamicAlloca)
+      return InlineCost::getNever();
+  }
+
+  // Add to the inline quality for properties that make the call valuable to
+  // inline.  This includes factors that indicate that the result of inlining
+  // the function will be optimizable.  Currently this just looks at arguments
+  // passed into the function.
+  //
+  unsigned ArgNo = 0;
+  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I, ++ArgNo) {
+    // Each argument passed in has a cost at both the caller and the callee
+    // sides.  This favors functions that take many arguments over functions
+    // that take few arguments.
+    InlineCost -= 20;
+    
+    // If this is a function being passed in, it is very likely that we will be
+    // able to turn an indirect function call into a direct function call.
+    if (isa<Function>(I))
+      InlineCost -= 100;
+    
+    // If an alloca is passed in, inlining this function is likely to allow
+    // significant future optimization possibilities (like scalar promotion, and
+    // scalarization), so encourage the inlining of the function.
+    //
+    else if (isa<AllocaInst>(I)) {
+      if (ArgNo < CalleeFI.ArgumentWeights.size())
+        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;
+      
+      // If this is a constant being passed into the function, use the argument
+      // weights calculated for the callee to determine how much will be folded
+      // away with this information.
+    } else if (isa<Constant>(I)) {
+      if (ArgNo < CalleeFI.ArgumentWeights.size())
+        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight;
+    }
+  }
+  
+  // Now that we have considered all of the factors that make the call site more
+  // likely to be inlined, look at factors that make us not want to inline it.
+  
+  // Don't inline into something too big, which would make it bigger.
+  // "size" here is the number of basic blocks, not instructions.
+  //
+  InlineCost += Caller->size()/15;
+  
+  // Look at the size of the callee. Each instruction counts as 5.
+  InlineCost += CalleeFI.Metrics.NumInsts*5;
+
+  return llvm::InlineCost::get(InlineCost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  
+  // Get information about the callee...
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.Metrics.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  float Factor = 1.0f;
+  // Single BB functions are often written to be inlined.
+  if (CalleeFI.Metrics.NumBlocks == 1)
+    Factor += 0.5f;
+
+  // Be more aggressive if the function contains a good chunk (if it mades up
+  // at least 10% of the instructions) of vector instructions.
+  if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
+    Factor += 2.0f;
+  else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
+    Factor += 1.5f;
+  return Factor;
+}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 2b34ad3b070d..83724caf5210 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -16,8 +16,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -46,8 +47,8 @@ namespace {
 #include "llvm/Instruction.def"
 
     void visitInstruction(Instruction &I) {
-      cerr << "Instruction Count does not know about " << I;
-      abort();
+      errs() << "Instruction Count does not know about " << I;
+      llvm_unreachable(0);
     }
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -58,7 +59,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
     }
-    virtual void print(std::ostream &O, const Module *M) const {}
+    virtual void print(raw_ostream &O, const Module *M) const {}
 
   };
 }
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index 16b194723071..ca9cdcaf2464 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/Interval.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
 using namespace llvm;
@@ -29,29 +30,29 @@ bool Interval::isLoop() const {
   // There is a loop in this interval iff one of the predecessors of the header
   // node lives in the interval.
   for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
-       I != E; ++I) {
-    if (contains(*I)) return true;
-  }
+       I != E; ++I)
+    if (contains(*I))
+      return true;
   return false;
 }
 
 
-void Interval::print(std::ostream &o) const {
-  o << "-------------------------------------------------------------\n"
+void Interval::print(raw_ostream &OS) const {
+  OS << "-------------------------------------------------------------\n"
        << "Interval Contents:\n";
 
   // Print out all of the basic blocks in the interval...
   for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
          E = Nodes.end(); I != E; ++I)
-    o << **I << "\n";
+    OS << **I << "\n";
 
-  o << "Interval Predecessors:\n";
+  OS << "Interval Predecessors:\n";
   for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
          E = Predecessors.end(); I != E; ++I)
-    o << **I << "\n";
+    OS << **I << "\n";
 
-  o << "Interval Successors:\n";
+  OS << "Interval Successors:\n";
   for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
          E = Successors.end(); I != E; ++I)
-    o << **I << "\n";
+    OS << **I << "\n";
 }
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index cb8a85da552a..1f17b77a5b96 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -32,7 +32,7 @@ void IntervalPartition::releaseMemory() {
   RootInterval = 0;
 }
 
-void IntervalPartition::print(std::ostream &O, const Module*) const {
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
   for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
     Intervals[i]->print(O);
 }
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index 971e6e7accb4..741965929890 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
 using namespace llvm;
   
 // Register this pass...
@@ -37,7 +36,6 @@ LibCallAliasAnalysis::~LibCallAliasAnalysis() {
 
 void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AliasAnalysis::getAnalysisUsage(AU);
-  AU.addRequired<TargetData>();
   AU.setPreservesAll();                         // Does not transform code
 }
 
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 29850471f7dc..e0060c3e89b1 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -57,9 +57,6 @@ const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const {
   }
   
   // Look up this function in the string map.
-  const char *ValueName = F->getNameStart();
-  StringMap<const LibCallFunctionInfo*>::iterator I =
-  Map->find(ValueName, ValueName+F->getNameLen());
-  return I != Map->end() ? I->second : 0;
+  return Map->lookup(F->getName());
 }
 
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index f6057839266f..32d22662c341 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -15,18 +15,33 @@
 //
 // TODO: adapt as implementation progresses.
 //
+// TODO: document lingo (pair, subscript, index)
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lda"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopDependenceAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 using namespace llvm;
 
+STATISTIC(NumAnswered,    "Number of dependence queries answered");
+STATISTIC(NumAnalysed,    "Number of distinct dependence pairs analysed");
+STATISTIC(NumDependent,   "Number of pairs with dependent accesses");
+STATISTIC(NumIndependent, "Number of pairs with independent accesses");
+STATISTIC(NumUnknown,     "Number of pairs with unknown accesses");
+
 LoopPass *llvm::createLoopDependenceAnalysisPass() {
   return new LoopDependenceAnalysis();
 }
@@ -44,14 +59,14 @@ static inline bool IsMemRefInstr(const Value *V) {
   return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
 }
 
-static void GetMemRefInstrs(
-    const Loop *L, SmallVectorImpl<Instruction*> &memrefs) {
+static void GetMemRefInstrs(const Loop *L,
+                            SmallVectorImpl<Instruction*> &Memrefs) {
   for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
-      b != be; ++b)
+       b != be; ++b)
     for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
-        i != ie; ++i)
+         i != ie; ++i)
       if (IsMemRefInstr(i))
-        memrefs.push_back(i);
+        Memrefs.push_back(i);
 }
 
 static bool IsLoadOrStoreInst(Value *I) {
@@ -63,53 +78,223 @@ static Value *GetPointerOperand(Value *I) {
     return i->getPointerOperand();
   if (StoreInst *i = dyn_cast<StoreInst>(I))
     return i->getPointerOperand();
-  assert(0 && "Value is no load or store instruction!");
+  llvm_unreachable("Value is no load or store instruction!");
   // Never reached.
   return 0;
 }
 
+static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
+                                                         const Value *A,
+                                                         const Value *B) {
+  const Value *aObj = A->getUnderlyingObject();
+  const Value *bObj = B->getUnderlyingObject();
+  return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
+                   bObj, AA->getTypeStoreSize(bObj->getType()));
+}
+
+static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) {
+  return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L);
+}
+
 //===----------------------------------------------------------------------===//
 //                             Dependence Testing
 //===----------------------------------------------------------------------===//
 
-bool LoopDependenceAnalysis::isDependencePair(const Value *x,
-                                              const Value *y) const {
-  return IsMemRefInstr(x) &&
-         IsMemRefInstr(y) &&
-         (cast<const Instruction>(x)->mayWriteToMemory() ||
-          cast<const Instruction>(y)->mayWriteToMemory());
+bool LoopDependenceAnalysis::isDependencePair(const Value *A,
+                                              const Value *B) const {
+  return IsMemRefInstr(A) &&
+         IsMemRefInstr(B) &&
+         (cast<const Instruction>(A)->mayWriteToMemory() ||
+          cast<const Instruction>(B)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
+                                                        Value *B,
+                                                        DependencePair *&P) {
+  void *insertPos = 0;
+  FoldingSetNodeID id;
+  id.AddPointer(A);
+  id.AddPointer(B);
+
+  P = Pairs.FindNodeOrInsertPos(id, insertPos);
+  if (P) return true;
+
+  P = PairAllocator.Allocate<DependencePair>();
+  new (P) DependencePair(id, A, B);
+  Pairs.InsertNode(P, insertPos);
+  return false;
+}
+
+void LoopDependenceAnalysis::getLoops(const SCEV *S,
+                                      DenseSet<const Loop*>* Loops) const {
+  // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
+  for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
+    if (!S->isLoopInvariant(L))
+      Loops->insert(L);
+}
+
+bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const {
+  DenseSet<const Loop*> loops;
+  getLoops(S, &loops);
+  return loops.empty();
+}
+
+bool LoopDependenceAnalysis::isAffine(const SCEV *S) const {
+  const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S);
+  return isLoopInvariant(S) || (rec && rec->isAffine());
+}
+
+bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const {
+  return isLoopInvariant(A) && isLoopInvariant(B);
+}
+
+bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const {
+  DenseSet<const Loop*> loops;
+  getLoops(A, &loops);
+  getLoops(B, &loops);
+  return loops.size() == 1;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseZIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!");
+  return A == B ? Dependent : Independent;
 }
 
-bool LoopDependenceAnalysis::depends(Value *src, Value *dst) {
-  assert(isDependencePair(src, dst) && "Values form no dependence pair!");
-  DOUT << "== LDA test ==\n" << *src << *dst;
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseMIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
 
-  // We only analyse loads and stores; for possible memory accesses by e.g.
-  // free, call, or invoke instructions we conservatively assume dependence.
-  if (!IsLoadOrStoreInst(src) || !IsLoadOrStoreInst(dst))
-    return true;
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSubscript(const SCEV *A,
+                                         const SCEV *B,
+                                         Subscript *S) const {
+  DEBUG(errs() << "  Testing subscript: " << *A << ", " << *B << "\n");
 
-  Value *srcPtr = GetPointerOperand(src);
-  Value *dstPtr = GetPointerOperand(dst);
-  const Value *srcObj = srcPtr->getUnderlyingObject();
-  const Value *dstObj = dstPtr->getUnderlyingObject();
-  AliasAnalysis::AliasResult alias = AA->alias(
-      srcObj, AA->getTargetData().getTypeStoreSize(srcObj->getType()),
-      dstObj, AA->getTargetData().getTypeStoreSize(dstObj->getType()));
+  if (A == B) {
+    DEBUG(errs() << "  -> [D] same SCEV\n");
+    return Dependent;
+  }
 
-  // If we don't know whether or not the two objects alias, assume dependence.
-  if (alias == AliasAnalysis::MayAlias)
-    return true;
+  if (!isAffine(A) || !isAffine(B)) {
+    DEBUG(errs() << "  -> [?] not affine\n");
+    return Unknown;
+  }
 
-  // If the objects noalias, they are distinct, accesses are independent.
-  if (alias == AliasAnalysis::NoAlias)
-    return false;
+  if (isZIVPair(A, B))
+    return analyseZIV(A, B, S);
 
-  // TODO: the underlying objects MustAlias, test for dependence
+  if (isSIVPair(A, B))
+    return analyseSIV(A, B, S);
 
-  // We couldn't establish a more precise result, so we have to conservatively
-  // assume full dependence.
-  return true;
+  return analyseMIV(A, B, S);
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analysePair(DependencePair *P) const {
+  DEBUG(errs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n");
+
+  // We only analyse loads and stores but no possible memory accesses by e.g.
+  // free, call, or invoke instructions.
+  if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) {
+    DEBUG(errs() << "--> [?] no load/store\n");
+    return Unknown;
+  }
+
+  Value *aPtr = GetPointerOperand(P->A);
+  Value *bPtr = GetPointerOperand(P->B);
+
+  switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
+  case AliasAnalysis::MayAlias:
+    // We can not analyse objects if we do not know about their aliasing.
+    DEBUG(errs() << "---> [?] may alias\n");
+    return Unknown;
+
+  case AliasAnalysis::NoAlias:
+    // If the objects noalias, they are distinct, accesses are independent.
+    DEBUG(errs() << "---> [I] no alias\n");
+    return Independent;
+
+  case AliasAnalysis::MustAlias:
+    break; // The underlying objects alias, test accesses for dependence.
+  }
+
+  const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr);
+  const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr);
+
+  if (!aGEP || !bGEP)
+    return Unknown;
+
+  // FIXME: Is filtering coupled subscripts necessary?
+
+  // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding
+  // trailing zeroes to the smaller GEP, if needed.
+  typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy;
+  GEPOpdPairsTy opds;
+  for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(),
+                                     aEnd = aGEP->idx_end(),
+                                     bIdx = bGEP->idx_begin(),
+                                     bEnd = bGEP->idx_end();
+      aIdx != aEnd && bIdx != bEnd;
+      aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) {
+    const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE);
+    const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE);
+    opds.push_back(std::make_pair(aSCEV, bSCEV));
+  }
+
+  if (!opds.empty() && opds[0].first != opds[0].second) {
+    // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting
+    //
+    // TODO: this could be relaxed by adding the size of the underlying object
+    // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we
+    // know that x is a [100 x i8]*, we could modify the first subscript to be
+    // (i, 200-i) instead of (i, -i).
+    return Unknown;
+  }
+
+  // Now analyse the collected operand pairs (skipping the GEP ptr offsets).
+  for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end();
+       i != end; ++i) {
+    Subscript subscript;
+    DependenceResult result = analyseSubscript(i->first, i->second, &subscript);
+    if (result != Dependent) {
+      // We either proved independence or failed to analyse this subscript.
+      // Further subscripts will not improve the situation, so abort early.
+      return result;
+    }
+    P->Subscripts.push_back(subscript);
+  }
+  // We successfully analysed all subscripts but failed to prove independence.
+  return Dependent;
+}
+
+bool LoopDependenceAnalysis::depends(Value *A, Value *B) {
+  assert(isDependencePair(A, B) && "Values form no dependence pair!");
+  ++NumAnswered;
+
+  DependencePair *p;
+  if (!findOrInsertDependencePair(A, B, p)) {
+    // The pair is not cached, so analyse it.
+    ++NumAnalysed;
+    switch (p->Result = analysePair(p)) {
+    case Dependent:   ++NumDependent;   break;
+    case Independent: ++NumIndependent; break;
+    case Unknown:     ++NumUnknown;     break;
+    }
+  }
+  return p->Result != Independent;
 }
 
 //===----------------------------------------------------------------------===//
@@ -123,14 +308,19 @@ bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
   return false;
 }
 
+void LoopDependenceAnalysis::releaseMemory() {
+  Pairs.clear();
+  PairAllocator.Reset();
+}
+
 void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<AliasAnalysis>();
   AU.addRequiredTransitive<ScalarEvolution>();
 }
 
-static void PrintLoopInfo(
-    raw_ostream &OS, LoopDependenceAnalysis *LDA, const Loop *L) {
+static void PrintLoopInfo(raw_ostream &OS,
+                          LoopDependenceAnalysis *LDA, const Loop *L) {
   if (!L->empty()) return; // ignore non-innermost loops
 
   SmallVector<Instruction*, 8> memrefs;
@@ -142,14 +332,14 @@ static void PrintLoopInfo(
 
   OS << "  Load/store instructions: " << memrefs.size() << "\n";
   for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
-      end = memrefs.end(); x != end; ++x)
-    OS << "\t" << (x - memrefs.begin()) << ": " << **x;
+       end = memrefs.end(); x != end; ++x)
+    OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n";
 
   OS << "  Pairwise dependence results:\n";
   for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
-      end = memrefs.end(); x != end; ++x)
+       end = memrefs.end(); x != end; ++x)
     for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
-        y != end; ++y)
+         y != end; ++y)
       if (LDA->isDependencePair(*x, *y))
         OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
            << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
@@ -160,8 +350,3 @@ void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
   // TODO: doc why const_cast is safe
   PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
 }
-
-void LoopDependenceAnalysis::print(std::ostream &OS, const Module *M) const {
-  raw_os_ostream os(OS);
-  print(os, M);
-}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index bb535894efab..ce2d29f331b6 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -20,12 +20,22 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 using namespace llvm;
 
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+bool VerifyLoopInfo = true;
+#else
+bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+                cl::desc("Verify loop info (time consuming)"));
+
 char LoopInfo::ID = 0;
 static RegisterPass<LoopInfo>
 X("loops", "Natural Loop Information", true, true);
@@ -34,6 +44,338 @@ X("loops", "Natural Loop Information", true, true);
 // Loop implementation
 //
 
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return isLoopInvariant(I);
+  return true;  // All non-instructions are loop invariant
+}
+
+/// isLoopInvariant - Return true if the specified instruction is
+/// loop-invariant.
+///
+bool Loop::isLoopInvariant(Instruction *I) const {
+  return !contains(I->getParent());
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+                             Instruction *InsertPt) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return makeLoopInvariant(I, Changed, InsertPt);
+  return true;  // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+                             Instruction *InsertPt) const {
+  // Test if the value is already loop-invariant.
+  if (isLoopInvariant(I))
+    return true;
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+  if (I->mayReadFromMemory())
+    return false;
+  // Determine the insertion point, unless one was given.
+  if (!InsertPt) {
+    BasicBlock *Preheader = getLoopPreheader();
+    // Without a preheader, hoisting is not feasible.
+    if (!Preheader)
+      return false;
+    InsertPt = Preheader->getTerminator();
+  }
+  // Don't hoist instructions with loop-variant operands.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+      return false;
+  // Hoist.
+  I->moveBefore(InsertPt);
+  Changed = true;
+  return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop.  If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+  BasicBlock *H = getHeader();
+
+  BasicBlock *Incoming = 0, *Backedge = 0;
+  typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits;
+  InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H);
+  assert(PI != InvBlockTraits::child_end(H) &&
+         "Loop must have at least one backedge!");
+  Backedge = *PI++;
+  if (PI == InvBlockTraits::child_end(H)) return 0;  // dead loop
+  Incoming = *PI++;
+  if (PI != InvBlockTraits::child_end(H)) return 0;  // multiple backedges?
+
+  if (contains(Incoming)) {
+    if (contains(Backedge))
+      return 0;
+    std::swap(Incoming, Backedge);
+  } else if (!contains(Backedge))
+    return 0;
+
+  // Loop over all of the PHI nodes, looking for a canonical indvar.
+  for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+      if (CI->isNullValue())
+        if (Instruction *Inc =
+            dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+          if (Inc->getOpcode() == Instruction::Add &&
+                Inc->getOperand(0) == PN)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+              if (CI->equalsInt(1))
+                return PN;
+  }
+  return 0;
+}
+
+/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
+/// the canonical induction variable value for the "next" iteration of the
+/// loop.  This always succeeds if getCanonicalInductionVariable succeeds.
+///
+Instruction *Loop::getCanonicalInductionVariableIncrement() const {
+  if (PHINode *PN = getCanonicalInductionVariable()) {
+    bool P1InLoop = contains(PN->getIncomingBlock(1));
+    return cast<Instruction>(PN->getIncomingValue(P1InLoop));
+  }
+  return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the number of
+/// times the loop will be executed.  Note that this means that the backedge
+/// of the loop executes N-1 times.  If the trip-count cannot be determined,
+/// this returns null.
+///
+/// The IndVarSimplify pass transforms loops to have a form that this
+/// function easily understands.
+///
+Value *Loop::getTripCount() const {
+  // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
+  // canonical induction variable and V is the trip count of the loop.
+  Instruction *Inc = getCanonicalInductionVariableIncrement();
+  if (Inc == 0) return 0;
+  PHINode *IV = cast<PHINode>(Inc->getOperand(0));
+
+  BasicBlock *BackedgeBlock =
+    IV->getIncomingBlock(contains(IV->getIncomingBlock(1)));
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
+    if (BI->isConditional()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+        if (ICI->getOperand(0) == Inc) {
+          if (BI->getSuccessor(0) == getHeader()) {
+            if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+              return ICI->getOperand(1);
+          } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+            return ICI->getOperand(1);
+          }
+        }
+      }
+    }
+
+  return 0;
+}
+
+/// getSmallConstantTripCount - Returns the trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// of not constant. Will also return 0 if the trip count is very large
+/// (>= 2^32)
+unsigned Loop::getSmallConstantTripCount() const {
+  Value* TripCount = this->getTripCount();
+  if (TripCount) {
+    if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
+      // Guard against huge trip counts.
+      if (TripCountC->getValue().getActiveBits() <= 32) {
+        return (unsigned)TripCountC->getZExtValue();
+      }
+    }
+  }
+  return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned Loop::getSmallConstantTripMultiple() const {
+  Value* TripCount = this->getTripCount();
+  // This will hold the ConstantInt result, if any
+  ConstantInt *Result = NULL;
+  if (TripCount) {
+    // See if the trip count is constant itself
+    Result = dyn_cast<ConstantInt>(TripCount);
+    // if not, see if it is a multiplication
+    if (!Result)
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
+        switch (BO->getOpcode()) {
+        case BinaryOperator::Mul:
+          Result = dyn_cast<ConstantInt>(BO->getOperand(1));
+          break;
+        default:
+          break;
+        }
+      }
+  }
+  // Guard against huge trip counts.
+  if (Result && Result->getValue().getActiveBits() <= 32) {
+    return (unsigned)Result->getZExtValue();
+  } else {
+    return 1;
+  }
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm() const {
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+
+  for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+    BasicBlock  *BB = *BI;
+    for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+        if (PHINode *P = dyn_cast<PHINode>(*UI)) {
+          UserBB = P->getIncomingBlock(UI);
+        }
+
+        // Check the current block, as a fast-path.  Most values are used in
+        // the same block they are defined in.
+        if (UserBB != BB && !LoopBBs.count(UserBB))
+          return false;
+      }
+  }
+
+  return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+  // Normal-form loops have a preheader.
+  if (!getLoopPreheader())
+    return false;
+  // Normal-form loops have a single backedge.
+  if (!getLoopLatch())
+    return false;
+  // Each predecessor of each exit block of a normal loop is contained
+  // within the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  getExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+         PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+      if (!contains(*PI))
+        return false;
+  // All the requirements are met.
+  return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop is in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+  assert(isLoopSimplifyForm() &&
+         "getUniqueExitBlocks assumes the loop is in canonical form!");
+
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+  std::sort(LoopBBs.begin(), LoopBBs.end());
+
+  SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+    BasicBlock *current = *BI;
+    switchExitBlocks.clear();
+
+    typedef GraphTraits<BasicBlock *> BlockTraits;
+    typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits;
+    for (BlockTraits::ChildIteratorType I =
+         BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+         I != E; ++I) {
+      // If block is inside the loop then it is not a exit block.
+      if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+        continue;
+
+      InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I);
+      BasicBlock *firstPred = *PI;
+
+      // If current basic block is this exit block's first predecessor
+      // then only insert exit block in to the output ExitBlocks vector.
+      // This ensures that same exit block is not inserted twice into
+      // ExitBlocks vector.
+      if (current != firstPred)
+        continue;
+
+      // If a terminator has more then two successors, for example SwitchInst,
+      // then it is possible that there are multiple edges from current block
+      // to one exit block.
+      if (std::distance(BlockTraits::child_begin(current),
+                        BlockTraits::child_end(current)) <= 2) {
+        ExitBlocks.push_back(*I);
+        continue;
+      }
+
+      // In case of multiple edges from current block to exit block, collect
+      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+      // duplicate edges.
+      if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+          == switchExitBlocks.end()) {
+        switchExitBlocks.push_back(*I);
+        ExitBlocks.push_back(*I);
+      }
+    }
+  }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+  SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+  getUniqueExitBlocks(UniqueExitBlocks);
+  if (UniqueExitBlocks.size() == 1)
+    return UniqueExitBlocks[0];
+  return 0;
+}
+
 //===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
@@ -43,7 +385,29 @@ bool LoopInfo::runOnFunction(Function &) {
   return false;
 }
 
+void LoopInfo::verifyAnalysis() const {
+  // LoopInfo is a FunctionPass, but verifying every loop in the function
+  // each time verifyAnalysis is called is very expensive. The
+  // -verify-loop-info option can enable this. In order to perform some
+  // checking by default, LoopPass has been taught to call verifyLoop
+  // manually during loop pass sequences.
+
+  if (!VerifyLoopInfo) return;
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+    (*I)->verifyLoopNest();
+  }
+
+  // TODO: check BBMap consistency.
+}
+
 void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<DominatorTree>();
 }
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+  LI.print(OS);
+}
+
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index ee03556f2741..43463cd8ef1c 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -21,7 +21,6 @@ using namespace llvm;
 //
 
 char LPPassManager::ID = 0;
-/// LPPassManager manages FPPassManagers and CalLGraphSCCPasses.
 
 LPPassManager::LPPassManager(int Depth) 
   : FunctionPass(&ID), PMDataManager(Depth) { 
@@ -111,17 +110,21 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
   else
     LI->addTopLevelLoop(L);
 
+  insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
   // Insert L into loop queue
   if (L == CurrentLoop) 
     redoLoop(L);
-  else if (!ParentLoop)
+  else if (!L->getParentLoop())
     // This is top level loop. 
     LQ.push_front(L);
   else {
-    // Insert L after ParentLoop
+    // Insert L after the parent loop.
     for (std::deque<Loop *>::iterator I = LQ.begin(),
            E = LQ.end(); I != E; ++I) {
-      if (*I == ParentLoop) {
+      if (*I == L->getParentLoop()) {
         // deque does not support insert after.
         ++I;
         LQ.insert(I, 1, L);
@@ -217,41 +220,66 @@ bool LPPassManager::runOnFunction(Function &F) {
     skipThisLoop = false;
     redoThisLoop = false;
 
-    // Run all passes on current SCC
+    // Run all passes on the current Loop.
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
       Pass *P = getContainedPass(Index);
 
-      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, "");
+      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+                   CurrentLoop->getHeader()->getNameStr());
       dumpRequiredSet(P);
 
       initializeAnalysisImpl(P);
 
       LoopPass *LP = dynamic_cast<LoopPass *>(P);
+      assert(LP && "Invalid LPPassManager member");
       {
         PassManagerPrettyStackEntry X(LP, *CurrentLoop->getHeader());
-        StartPassTimer(P);
-        assert(LP && "Invalid LPPassManager member");
+        Timer *T = StartPassTimer(P);
         Changed |= LP->runOnLoop(CurrentLoop, *this);
-        StopPassTimer(P);
+        StopPassTimer(P, T);
       }
 
       if (Changed)
-        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, "");
+        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+                     skipThisLoop ? "<deleted>" :
+                                    CurrentLoop->getHeader()->getNameStr());
       dumpPreservedSet(P);
 
-      verifyPreservedAnalysis(LP);
+      if (!skipThisLoop) {
+        // Manually check that this loop is still healthy. This is done
+        // instead of relying on LoopInfo::verifyLoop since LoopInfo
+        // is a function pass and it's really expensive to verify every
+        // loop in the function every time. That level of checking can be
+        // enabled with the -verify-loop-info option.
+        Timer *T = StartPassTimer(LI);
+        CurrentLoop->verifyLoop();
+        StopPassTimer(LI, T);
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(LP);
+      }
+
       removeNotPreservedAnalysis(P);
       recordAvailableAnalysis(P);
-      removeDeadPasses(P, "", ON_LOOP_MSG);
-
-      // If dominator information is available then verify the info if requested.
-      verifyDomInfo(*LP, F);
+      removeDeadPasses(P,
+                       skipThisLoop ? "<deleted>" :
+                                      CurrentLoop->getHeader()->getNameStr(),
+                       ON_LOOP_MSG);
 
       if (skipThisLoop)
         // Do not run other passes on this loop.
         break;
     }
     
+    // If the loop was deleted, release all the loop passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisLoop)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_LOOP_MSG);
+      }
+
     // Pop the loop from queue after running all passes.
     LQ.pop_back();
     
@@ -272,7 +300,7 @@ bool LPPassManager::runOnFunction(Function &F) {
 
 /// Print passes managed by this manager
 void LPPassManager::dumpPassStructure(unsigned Offset) {
-  llvm::cerr << std::string(Offset*2, ' ') << "Loop Pass Manager\n";
+  errs().indent(Offset*2) << "Loop Pass Manager\n";
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     Pass *P = getContainedPass(Index);
     P->dumpPassStructure(Offset + 1);
diff --git a/lib/Analysis/MallocHelper.cpp b/lib/Analysis/MallocHelper.cpp
new file mode 100644
index 000000000000..89051d178838
--- /dev/null
+++ b/lib/Analysis/MallocHelper.cpp
@@ -0,0 +1,230 @@
+//===-- MallocHelper.cpp - Functions to identify malloc calls -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to malloc, bitcasts of malloc
+// calls, and the types and array sizes associated with them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value* I) {
+  return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+  if (!CI)
+    return false;
+
+  const Module* M = CI->getParent()->getParent()->getParent();
+  Function *MallocFunc = M->getFunction("malloc");
+
+  if (CI->getOperand(0) != MallocFunc)
+    return false;
+
+  // Check malloc prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = MallocFunc->getFunctionType();
+  if (FTy->getNumParams() != 1)
+    return false;
+  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
+    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst* llvm::extractMallocCall(const Value* I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst* llvm::extractMallocCall(Value* I) {
+  CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst* BCI) {
+  if (!BCI)
+    return false;
+    
+  return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst* llvm::extractMallocCallFromBitCast(Value* I) {
+  BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+const CallInst* llvm::extractMallocCallFromBitCast(const Value* I) {
+  const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
+                                const TargetData* TD) {
+  if (!CI)
+    return false;
+
+  const Type* T = getMallocAllocatedType(CI);
+
+  // We can only indentify an array malloc if we know the type of the malloc 
+  // call.
+  if (!T) return false;
+
+  Value* MallocArg = CI->getOperand(1);
+  Constant *ElementSize = ConstantExpr::getSizeOf(T);
+  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
+                                                MallocArg->getType());
+  Constant *FoldedElementSize = ConstantFoldConstantExpression(
+                                       cast<ConstantExpr>(ElementSize), 
+                                       Context, TD);
+
+
+  if (isa<ConstantExpr>(MallocArg))
+    return (MallocArg != ElementSize);
+
+  BinaryOperator *BI = dyn_cast<BinaryOperator>(MallocArg);
+  if (!BI)
+    return false;
+
+  if (BI->getOpcode() == Instruction::Mul)
+    // ArraySize * ElementSize
+    if (BI->getOperand(1) == ElementSize ||
+        (FoldedElementSize && BI->getOperand(1) == FoldedElementSize))
+      return true;
+
+  // TODO: Detect case where MallocArg mul has been transformed to shl.
+
+  return false;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// matches the malloc call IR generated by CallInst::CreateMalloc().  This 
+/// means that it is a malloc call with one bitcast use AND the malloc call's 
+/// size argument is:
+///  1. a constant not equal to the malloc's allocated type
+/// or
+///  2. the result of a multiplication by the malloc's allocated type
+/// Otherwise it returns NULL.
+/// The unique bitcast is needed to determine the type/size of the array
+/// allocation.
+CallInst* llvm::isArrayMalloc(Value* I, LLVMContext &Context,
+                              const TargetData* TD) {
+  CallInst *CI = extractMallocCall(I);
+  return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL;
+}
+
+const CallInst* llvm::isArrayMalloc(const Value* I, LLVMContext &Context,
+                                    const TargetData* TD) {
+  const CallInst *CI = extractMallocCall(I);
+  return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// This PointerType is the result type of the call's only bitcast use.
+/// If there is no unique bitcast use, then return NULL.
+const PointerType* llvm::getMallocType(const CallInst* CI) {
+  assert(isMalloc(CI) && "GetMallocType and not malloc call");
+  
+  const BitCastInst* BCI = NULL;
+  
+  // Determine if CallInst has a bitcast use.
+  for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; )
+    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+      break;
+
+  // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
+  // destination type.
+  if (BCI && CI->hasOneUse())
+    return cast<PointerType>(BCI->getDestTy());
+
+  // Malloc call was not bitcast, so type is the malloc function's return type.
+  if (!BCI)
+    return cast<PointerType>(CI->getType());
+
+  // Type could not be determined.
+  return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
+/// Type is the result type of the call's only bitcast use. If there is no
+/// unique bitcast use, then return NULL.
+const Type* llvm::getMallocAllocatedType(const CallInst* CI) {
+  const PointerType* PT = getMallocType(CI);
+  return PT ? PT->getElementType() : NULL;
+}
+
+/// isConstantOne - Return true only if val is constant int 1.
+static bool isConstantOne(Value *val) {
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call.  The array
+/// size is computated in 1 of 3 ways:
+///  1. If the element type if of size 1, then array size is the argument to 
+///     malloc.
+///  2. Else if the malloc's argument is a constant, the array size is that
+///     argument divided by the element type's size.
+///  3. Else the malloc argument must be a multiplication and the array size is
+///     the first operand of the multiplication.
+/// This function returns constant 1 if:
+///  1. The malloc call's allocated type cannot be determined.
+///  2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL
+///     ArraySize.
+Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context,
+                                const TargetData* TD) {
+  // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
+  if (!isArrayMalloc(CI, Context, TD))
+    return ConstantInt::get(CI->getOperand(1)->getType(), 1);
+
+  Value* MallocArg = CI->getOperand(1);
+  assert(getMallocAllocatedType(CI) && "getMallocArraySize and no type");
+  Constant *ElementSize = ConstantExpr::getSizeOf(getMallocAllocatedType(CI));
+  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
+                                                MallocArg->getType());
+
+  Constant* CO = dyn_cast<Constant>(MallocArg);
+  BinaryOperator* BO = dyn_cast<BinaryOperator>(MallocArg);
+  assert((isConstantOne(ElementSize) || CO || BO) &&
+         "getMallocArraySize and malformed malloc IR");
+      
+  if (isConstantOne(ElementSize))
+    return MallocArg;
+    
+  if (CO)
+    return CO->getOperand(0);
+    
+  // TODO: Detect case where MallocArg mul has been transformed to shl.
+
+  assert(BO && "getMallocArraySize not constant but not multiplication either");
+  return BO->getOperand(0);
+}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3b2102955f33..d6400757a513 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -16,16 +16,15 @@
 
 #define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
@@ -71,12 +70,10 @@ void MemoryDependenceAnalysis::releaseMemory() {
 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<AliasAnalysis>();
-  AU.addRequiredTransitive<TargetData>();
 }
 
 bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
-  TD = &getAnalysis<TargetData>();
   if (PredCache == 0)
     PredCache.reset(new PredIteratorCache());
   return false;
@@ -112,10 +109,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     uint64_t PointerSize = 0;
     if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
       Pointer = S->getPointerOperand();
-      PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType());
+      PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
     } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
       Pointer = V->getOperand(0);
-      PointerSize = TD->getTypeStoreSize(V->getType());
+      PointerSize = AA->getTypeStoreSize(V->getType());
     } else if (FreeInst *F = dyn_cast<FreeInst>(Inst)) {
       Pointer = F->getPointerOperand();
       
@@ -185,7 +182,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // a load depends on another must aliased load from the same value.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
       Value *Pointer = LI->getPointerOperand();
-      uint64_t PointerSize = TD->getTypeStoreSize(LI->getType());
+      uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R =
@@ -211,7 +208,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
       Value *Pointer = SI->getPointerOperand();
-      uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+      uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R =
@@ -228,15 +225,19 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // the allocation, return Def.  This means that there is no dependence and
     // the access can be optimized based on that.  For example, a load could
     // turn into undef.
-    if (AllocationInst *AI = dyn_cast<AllocationInst>(Inst)) {
+    // Note: Only determine this to be a malloc if Inst is the malloc call, not
+    // a subsequent bitcast of the malloc call result.  There can be stores to
+    // the malloced memory between the malloc call and its bitcast uses, and we
+    // need to continue scanning until the malloc call.
+    if (isa<AllocationInst>(Inst) || extractMallocCall(Inst)) {
       Value *AccessPtr = MemPtr->getUnderlyingObject();
       
-      if (AccessPtr == AI ||
-          AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
-        return MemDepResult::getDef(AI);
+      if (AccessPtr == Inst ||
+          AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+        return MemDepResult::getDef(Inst);
       continue;
     }
-    
+
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
     switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
     case AliasAnalysis::NoModRef:
@@ -302,7 +303,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
     else {
       MemPtr = SI->getPointerOperand();
-      MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+      MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
     }
   } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
     // If this is a volatile load, don't mess around with it.  Just return the
@@ -311,7 +312,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
     else {
       MemPtr = LI->getPointerOperand();
-      MemSize = TD->getTypeStoreSize(LI->getType());
+      MemSize = AA->getTypeStoreSize(LI->getType());
     }
   } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
     CallSite QueryCS = CallSite::get(QueryInst);
@@ -513,7 +514,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
   // We know that the pointer value is live into FromBB find the def/clobbers
   // from presecessors.
   const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
-  uint64_t PointeeSize = TD->getTypeStoreSize(EltTy);
+  uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
   
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
@@ -599,6 +600,42 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
   return Dep;
 }
 
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered.  This is
+/// optimized for the case when only a few entries are added.
+static void 
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         unsigned NumSortedEntries) {
+  switch (Cache.size() - NumSortedEntries) {
+  case 0:
+    // done, no new entries.
+    break;
+  case 2: {
+    // Two new entries, insert the last one into place.
+    MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+    Cache.pop_back();
+    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+    Cache.insert(Entry, Val);
+    // FALL THROUGH.
+  }
+  case 1:
+    // One new entry, Just insert the new value at the appropriate position.
+    if (Cache.size() != 1) {
+      MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+      Cache.pop_back();
+      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+        std::upper_bound(Cache.begin(), Cache.end(), Val);
+      Cache.insert(Entry, Val);
+    }
+    break;
+  default:
+    // Added many values, do a full scale sort.
+    std::sort(Cache.begin(), Cache.end());
+    break;
+  }
+}
+
 
 /// getNonLocalPointerDepFromBB - Perform a dependency query based on
 /// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
@@ -731,10 +768,22 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
     // If we do need to do phi translation, then there are a bunch of different
     // cases, because we have to find a Value* live in the predecessor block. We
     // know that PtrInst is defined in this block at least.
+
+    // We may have added values to the cache list before this PHI translation.
+    // If so, we haven't done anything to ensure that the cache remains sorted.
+    // Sort it now (if needed) so that recursive invocations of
+    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+    // value will only see properly sorted cache arrays.
+    if (Cache && NumSortedEntries != Cache->size()) {
+      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      NumSortedEntries = Cache->size();
+    }
     
     // If this is directly a PHI node, just use the incoming values for each
     // pred as the phi translated version.
     if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
+      Cache = 0;
+      
       for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
         BasicBlock *Pred = *PI;
         Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
@@ -759,15 +808,6 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
           goto PredTranslationFailure;
         }
 
-        // We may have added values to the cache list before this PHI
-        // translation.  If so, we haven't done anything to ensure that the
-        // cache remains sorted.  Sort it now (if needed) so that recursive
-        // invocations of getNonLocalPointerDepFromBB that could reuse the cache
-        // value will only see properly sorted cache arrays.
-        if (Cache && NumSortedEntries != Cache->size())
-          std::sort(Cache->begin(), Cache->end());
-        Cache = 0;
-        
         // FIXME: it is entirely possible that PHI translating will end up with
         // the same value.  Consider PHI translating something like:
         // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
@@ -779,7 +819,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
                                         Result, Visited))
           goto PredTranslationFailure;
       }
-
+      
       // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->second;
@@ -806,11 +846,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->second;
       NumSortedEntries = Cache->size();
-    } else if (NumSortedEntries != Cache->size()) {
-      std::sort(Cache->begin(), Cache->end());
-      NumSortedEntries = Cache->size();
     }
-
+    
     // Since we did phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
@@ -841,33 +878,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
   }
 
   // Okay, we're done now.  If we added new values to the cache, re-sort it.
-  switch (Cache->size()-NumSortedEntries) {
-  case 0:
-    // done, no new entries.
-    break;
-  case 2: {
-    // Two new entries, insert the last one into place.
-    NonLocalDepEntry Val = Cache->back();
-    Cache->pop_back();
-    NonLocalDepInfo::iterator Entry =
-    std::upper_bound(Cache->begin(), Cache->end()-1, Val);
-    Cache->insert(Entry, Val);
-    // FALL THROUGH.
-  }
-  case 1:
-    // One new entry, Just insert the new value at the appropriate position.
-    if (Cache->size() != 1) {
-      NonLocalDepEntry Val = Cache->back();
-      Cache->pop_back();
-      NonLocalDepInfo::iterator Entry =
-        std::upper_bound(Cache->begin(), Cache->end(), Val);
-      Cache->insert(Entry, Val);
-    }
-    break;
-  default:
-    // Added many values, do a full scale sort.
-    std::sort(Cache->begin(), Cache->end());
-  }
+  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   DEBUG(AssertSorted(*Cache));
   return false;
 }
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
new file mode 100644
index 000000000000..43f4af36d81c
--- /dev/null
+++ b/lib/Analysis/PointerTracking.cpp
@@ -0,0 +1,265 @@
+//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tracking of pointer bounds.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/PointerTracking.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Value.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+char PointerTracking::ID = 0;
+PointerTracking::PointerTracking() : FunctionPass(&ID) {}
+
+bool PointerTracking::runOnFunction(Function &F) {
+  predCache.clear();
+  assert(analyzing.empty());
+  FF = &F;
+  TD = getAnalysisIfAvailable<TargetData>();
+  SE = &getAnalysis<ScalarEvolution>();
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTree>();
+  return false;
+}
+
+void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+}
+
+bool PointerTracking::doInitialization(Module &M) {
+  const Type *PTy = Type::getInt8PtrTy(M.getContext());
+
+  // Find calloc(i64, i64) or calloc(i32, i32).
+  callocFunc = M.getFunction("calloc");
+  if (callocFunc) {
+    const FunctionType *Ty = callocFunc->getFunctionType();
+
+    std::vector<const Type*> args, args2;
+    args.push_back(Type::getInt64Ty(M.getContext()));
+    args.push_back(Type::getInt64Ty(M.getContext()));
+    args2.push_back(Type::getInt32Ty(M.getContext()));
+    args2.push_back(Type::getInt32Ty(M.getContext()));
+    const FunctionType *Calloc1Type =
+      FunctionType::get(PTy, args, false);
+    const FunctionType *Calloc2Type =
+      FunctionType::get(PTy, args2, false);
+    if (Ty != Calloc1Type && Ty != Calloc2Type)
+      callocFunc = 0; // Give up
+  }
+
+  // Find realloc(i8*, i64) or realloc(i8*, i32).
+  reallocFunc = M.getFunction("realloc");
+  if (reallocFunc) {
+    const FunctionType *Ty = reallocFunc->getFunctionType();
+    std::vector<const Type*> args, args2;
+    args.push_back(PTy);
+    args.push_back(Type::getInt64Ty(M.getContext()));
+    args2.push_back(PTy);
+    args2.push_back(Type::getInt32Ty(M.getContext()));
+
+    const FunctionType *Realloc1Type =
+      FunctionType::get(PTy, args, false);
+    const FunctionType *Realloc2Type =
+      FunctionType::get(PTy, args2, false);
+    if (Ty != Realloc1Type && Ty != Realloc2Type)
+      reallocFunc = 0; // Give up
+  }
+  return false;
+}
+
+// Calculates the number of elements allocated for pointer P,
+// the type of the element is stored in Ty.
+const SCEV *PointerTracking::computeAllocationCount(Value *P,
+                                                    const Type *&Ty) const {
+  Value *V = P->stripPointerCasts();
+  if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+    Value *arraySize = AI->getArraySize();
+    Ty = AI->getAllocatedType();
+    // arraySize elements of type Ty.
+    return SE->getSCEV(arraySize);
+  }
+
+  if (CallInst *CI = extractMallocCall(V)) {
+    Value *arraySize = getMallocArraySize(CI, P->getContext(), TD);
+    Ty = getMallocAllocatedType(CI);
+    if (!Ty || !arraySize) return SE->getCouldNotCompute();
+    // arraySize elements of type Ty.
+    return SE->getSCEV(arraySize);
+  }
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (GV->hasDefinitiveInitializer()) {
+      Constant *C = GV->getInitializer();
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+        Ty = ATy->getElementType();
+        return SE->getConstant(Type::getInt32Ty(P->getContext()),
+                               ATy->getNumElements());
+      }
+    }
+    Ty = GV->getType();
+    return SE->getConstant(Type::getInt32Ty(P->getContext()), 1);
+    //TODO: implement more tracking for globals
+  }
+
+  if (CallInst *CI = dyn_cast<CallInst>(V)) {
+    CallSite CS(CI);
+    Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+    const Loop *L = LI->getLoopFor(CI->getParent());
+    if (F == callocFunc) {
+      Ty = Type::getInt8Ty(P->getContext());
+      // calloc allocates arg0*arg1 bytes.
+      return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)),
+                                               SE->getSCEV(CS.getArgument(1))),
+                                L);
+    } else if (F == reallocFunc) {
+      Ty = Type::getInt8Ty(P->getContext());
+      // realloc allocates arg1 bytes.
+      return SE->getSCEVAtScope(CS.getArgument(1), L);
+    }
+  }
+
+  return SE->getCouldNotCompute();
+}
+
+// Calculates the number of elements of type Ty allocated for P.
+const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
+                                                           const Type *Ty)
+  const {
+    const Type *elementTy;
+    const SCEV *Count = computeAllocationCount(P, elementTy);
+    if (isa<SCEVCouldNotCompute>(Count))
+      return Count;
+    if (elementTy == Ty)
+      return Count;
+
+    if (!TD) // need TargetData from this point forward
+      return SE->getCouldNotCompute();
+
+    uint64_t elementSize = TD->getTypeAllocSize(elementTy);
+    uint64_t wantSize = TD->getTypeAllocSize(Ty);
+    if (elementSize == wantSize)
+      return Count;
+    if (elementSize % wantSize) //fractional counts not possible
+      return SE->getCouldNotCompute();
+    return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
+                                                 elementSize/wantSize));
+}
+
+const SCEV *PointerTracking::getAllocationElementCount(Value *V) const {
+  // We only deal with pointers.
+  const PointerType *PTy = cast<PointerType>(V->getType());
+  return computeAllocationCountForType(V, PTy->getElementType());
+}
+
+const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const {
+  return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext()));
+}
+
+// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too
+enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
+                                                   Predicate Pred,
+                                                   const SCEV *A,
+                                                   const SCEV *B) const {
+  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+    return AlwaysTrue;
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+    return AlwaysTrue;
+
+  Pred = ICmpInst::getInversePredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+    return AlwaysFalse;
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+    return AlwaysTrue;
+  return Unknown;
+}
+
+enum SolverResult PointerTracking::checkLimits(const SCEV *Offset,
+                                               const SCEV *Limit,
+                                               BasicBlock *BB)
+{
+  //FIXME: merge implementation
+  return Unknown;
+}
+
+void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base,
+                                       const SCEV *&Limit,
+                                       const SCEV *&Offset) const
+{
+    Pointer = Pointer->stripPointerCasts();
+    Base = Pointer->getUnderlyingObject();
+    Limit = getAllocationSizeInBytes(Base);
+    if (isa<SCEVCouldNotCompute>(Limit)) {
+      Base = 0;
+      Offset = Limit;
+      return;
+    }
+
+    Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base));
+    if (isa<SCEVCouldNotCompute>(Offset)) {
+      Base = 0;
+      Limit = Offset;
+    }
+}
+
+void PointerTracking::print(raw_ostream &OS, const Module* M) const {
+  // Calling some PT methods may cause caches to be updated, however
+  // this should be safe for the same reason its safe for SCEV.
+  PointerTracking &PT = *const_cast<PointerTracking*>(this);
+  for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
+    if (!isa<PointerType>(I->getType()))
+      continue;
+    Value *Base;
+    const SCEV *Limit, *Offset;
+    getPointerOffset(&*I, Base, Limit, Offset);
+    if (!Base)
+      continue;
+
+    if (Base == &*I) {
+      const SCEV *S = getAllocationElementCount(Base);
+      OS << *Base << " ==> " << *S << " elements, ";
+      OS << *Limit << " bytes allocated\n";
+      continue;
+    }
+    OS << &*I << " -- base: " << *Base;
+    OS << " offset: " << *Offset;
+
+    enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent());
+    switch (res) {
+    case AlwaysTrue:
+      OS << " always safe\n";
+      break;
+    case AlwaysFalse:
+      OS << " always unsafe\n";
+      break;
+    case Unknown:
+      OS << " <<unknown>>\n";
+      break;
+    }
+  }
+}
+
+static RegisterPass<PointerTracking> X("pointertracking",
+                                       "Track pointer bounds", false, true);
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 4853c2ac87b7..69d6b47bbee4 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -33,15 +33,19 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true);
 
 bool PostDominatorTree::runOnFunction(Function &F) {
   DT->recalculate(F);
-  DEBUG(DT->dump());
+  DEBUG(DT->print(errs()));
   return false;
 }
 
-PostDominatorTree::~PostDominatorTree()
-{
+PostDominatorTree::~PostDominatorTree() {
   delete DT;
 }
 
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+
 FunctionPass* llvm::createPostDomTree() {
   return new PostDominatorTree();
 }
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 000000000000..c585c1dced04
--- /dev/null
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,310 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+    "profile-estimator-loop-weight", cl::init(10),
+    cl::value_desc("loop-weight"),
+    cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+  class VISIBILITY_HIDDEN ProfileEstimatorPass :
+      public FunctionPass, public ProfileInfo {
+    double ExecCount;
+    LoopInfo *LI;
+    std::set<BasicBlock*>  BBToVisit;
+    std::map<Loop*,double> LoopExitWeights;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    explicit ProfileEstimatorPass(const double execcount = 0)
+      : FunctionPass(&ID), ExecCount(execcount) {
+      if (execcount == 0) ExecCount = LoopWeight;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<LoopInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Profiling information estimator";
+    }
+
+    /// run - Estimate the profile information from the specified file.
+    virtual bool runOnFunction(Function &F);
+
+    virtual void recurseBasicBlock(BasicBlock *BB);
+
+    void inline printEdgeWeight(Edge);
+  };
+}  // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+static RegisterPass<ProfileEstimatorPass>
+X("profile-estimator", "Estimate profiling information", false, true);
+
+static RegisterAnalysisGroup<ProfileInfo> Y(X);
+
+namespace llvm {
+  const PassInfo *ProfileEstimatorPassID = &X;
+
+  FunctionPass *createProfileEstimatorPass() {
+    return new ProfileEstimatorPass();
+  }
+
+  /// createProfileEstimatorPass - This function returns a Pass that estimates
+  /// profiling information using the given loop execution count.
+  Pass *createProfileEstimatorPass(const unsigned execcount) {
+    return new ProfileEstimatorPass(execcount);
+  }
+}
+
+static double ignoreMissing(double w) {
+  if (w == ProfileInfo::MissingValue) return 0;
+  return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+  DEBUG(errs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+  DEBUG(errs() << "-- Weight of Edge " << E << ":"
+               << format("%g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+  // Break the recursion if this BasicBlock was already visited.
+  if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+  // Read the LoopInfo for this block.
+  bool  BBisHeader = LI->isLoopHeader(BB);
+  Loop* BBLoop     = LI->getLoopFor(BB);
+
+  // To get the block weight, read all incoming edges.
+  double BBWeight = 0;
+  std::set<BasicBlock*> ProcessedPreds;
+  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+        bbi != bbe; ++bbi ) {
+    // If this block was not considered already, add weight.
+    Edge edge = getEdge(*bbi,BB);
+    double w = getEdgeWeight(edge);
+    if (ProcessedPreds.insert(*bbi).second) {
+      BBWeight += ignoreMissing(w);
+    }
+    // If this block is a loop header and the predecessor is contained in this
+    // loop, thus the edge is a backedge, continue and do not check if the
+    // value is valid.
+    if (BBisHeader && BBLoop->contains(*bbi)) {
+      printEdgeError(edge, "but is backedge, continueing");
+      continue;
+    }
+    // If the edges value is missing (and this is no loop header, and this is
+    // no backedge) return, this block is currently non estimatable.
+    if (w == MissingValue) {
+      printEdgeError(edge, "returning");
+      return;
+    }
+  }
+  if (getExecutionCount(BB) != MissingValue) {
+    BBWeight = getExecutionCount(BB);
+  }
+
+  // Fetch all necessary information for current block.
+  SmallVector<Edge, 8> ExitEdges;
+  SmallVector<Edge, 8> Edges;
+  if (BBLoop) {
+    BBLoop->getExitEdges(ExitEdges);
+  }
+
+  // If this is a loop header, consider the following:
+  // Exactly the flow that is entering this block, must exit this block too. So
+  // do the following: 
+  // *) get all the exit edges, read the flow that is already leaving this
+  // loop, remember the edges that do not have any flow on them right now.
+  // (The edges that have already flow on them are most likely exiting edges of
+  // other loops, do not touch those flows because the previously caclulated
+  // loopheaders would not be exact anymore.)
+  // *) In case there is not a single exiting edge left, create one at the loop
+  // latch to prevent the flow from building up in the loop.
+  // *) Take the flow that is not leaving the loop already and distribute it on
+  // the remaining exiting edges.
+  // (This ensures that all flow that enters the loop also leaves it.)
+  // *) Increase the flow into the loop by increasing the weight of this block.
+  // There is at least one incoming backedge that will bring us this flow later
+  // on. (So that the flow condition in this node is valid again.)
+  if (BBisHeader) {
+    double incoming = BBWeight;
+    // Subtract the flow leaving the loop.
+    std::set<Edge> ProcessedExits;
+    for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+         ee = ExitEdges.end(); ei != ee; ++ei) {
+      if (ProcessedExits.insert(*ei).second) {
+        double w = getEdgeWeight(*ei);
+        if (w == MissingValue) {
+          Edges.push_back(*ei);
+        } else {
+          incoming -= w;
+        }
+      }
+    }
+    // If no exit edges, create one:
+    if (Edges.size() == 0) {
+      BasicBlock *Latch = BBLoop->getLoopLatch();
+      if (Latch) {
+        Edge edge = getEdge(Latch,0);
+        EdgeInformation[BB->getParent()][edge] = BBWeight;
+        printEdgeWeight(edge);
+        edge = getEdge(Latch, BB);
+        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+        printEdgeWeight(edge);
+      }
+    }
+    // Distribute remaining weight onto the exit edges.
+    for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+         ei != ee; ++ei) {
+      EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size();
+      printEdgeWeight(*ei);
+    }
+    // Increase flow into the loop.
+    BBWeight *= (ExecCount+1);
+  }
+
+  BlockInformation[BB->getParent()][BB] = BBWeight;
+  // Up until now we considered only the loop exiting edges, now we have a
+  // definite block weight and must ditribute this onto the outgoing edges.
+  // Since there may be already flow attached to some of the edges, read this
+  // flow first and remember the edges that have still now flow attached.
+  Edges.clear();
+  std::set<BasicBlock*> ProcessedSuccs;
+
+  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+  // Also check for (BB,0) edges that may already contain some flow. (But only
+  // in case there are no successors.)
+  if (bbi == bbe) {
+    Edge edge = getEdge(BB,0);
+    EdgeInformation[BB->getParent()][edge] = BBWeight;
+    printEdgeWeight(edge);
+  }
+  for ( ; bbi != bbe; ++bbi ) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      Edge edge = getEdge(BB,*bbi);
+      double w = getEdgeWeight(edge);
+      if (w != MissingValue) {
+        BBWeight -= getEdgeWeight(edge);
+      } else {
+        Edges.push_back(edge);
+      }
+    }
+  }
+
+  // Finally we know what flow is still not leaving the block, distribute this
+  // flow onto the empty edges.
+  for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+       ei != ee; ++ei) {
+    EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size();
+    printEdgeWeight(*ei);
+  }
+
+  // This block is visited, mark this before the recursion.
+  BBToVisit.erase(BB);
+
+  // Recurse into successors.
+  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+  if (F.isDeclaration()) return false;
+
+  // Fetch LoopInfo and clear ProfileInfo for this function.
+  LI = &getAnalysis<LoopInfo>();
+  FunctionInformation.erase(&F);
+  BlockInformation[&F].clear();
+  EdgeInformation[&F].clear();
+
+  // Mark all blocks as to visit.
+  for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+    BBToVisit.insert(bi);
+
+  DEBUG(errs() << "Working on function " << F.getNameStr() << "\n");
+
+  // Since the entry block is the first one and has no predecessors, the edge
+  // (0,entry) is inserted with the starting weight of 1.
+  BasicBlock *entry = &F.getEntryBlock();
+  BlockInformation[&F][entry] = 1;
+  Edge edge = getEdge(0,entry);
+  EdgeInformation[&F][edge] = 1;
+  printEdgeWeight(edge);
+
+  // Since recurseBasicBlock() maybe returns with a block which was not fully
+  // estimated, use recurseBasicBlock() until everything is calculated. 
+  recurseBasicBlock(entry);
+  while (BBToVisit.size() > 0) {
+    // Remember number of open blocks, this is later used to check if progress
+    // was made.
+    unsigned size = BBToVisit.size();
+
+    // Try to calculate all blocks in turn.
+    for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+         be = BBToVisit.end(); bi != be; ++bi) {
+      recurseBasicBlock(*bi);
+      // If at least one block was finished, break because iterator may be
+      // invalid.
+      if (BBToVisit.size() < size) break;
+    }
+
+    // If there was not a single block resovled, make some assumptions.
+    if (BBToVisit.size() == size) {
+      BasicBlock *BB = *(BBToVisit.begin());
+      // Since this BB was not calculated because of missing incoming edges,
+      // set these edges to zero.
+      for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+           bbi != bbe; ++bbi) {
+        Edge e = getEdge(*bbi,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          EdgeInformation[&F][e] = 0;
+          DEBUG(errs() << "Assuming edge weight: ");
+          printEdgeWeight(e);
+        }
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index a0965b66da81..9efdd23081c4 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -17,6 +17,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
 #include <set>
 using namespace llvm;
 
@@ -26,56 +29,149 @@ char ProfileInfo::ID = 0;
 
 ProfileInfo::~ProfileInfo() {}
 
-unsigned ProfileInfo::getExecutionCount(BasicBlock *BB) const {
-  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+const double ProfileInfo::MissingValue = -1;
+
+double ProfileInfo::getExecutionCount(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(BB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
 
   // Are there zero predecessors of this block?
   if (PI == PE) {
     // If this is the entry block, look for the Null -> Entry edge.
     if (BB == &BB->getParent()->getEntryBlock())
-      return getEdgeWeight(0, BB);
+      return getEdgeWeight(getEdge(0, BB));
     else
       return 0;   // Otherwise, this is a dead block.
   }
 
   // Otherwise, if there are predecessors, the execution count of this block is
-  // the sum of the edge frequencies from the incoming edges.  Note that if
-  // there are multiple edges from a predecessor to this block that we don't
-  // want to count its weight multiple times.  For this reason, we keep track of
-  // the predecessors we've seen and only count them if we haven't run into them
-  // yet.
-  //
-  // We don't want to create an std::set unless we are dealing with a block that
-  // has a LARGE number of in-edges.  Handle the common case of having only a
-  // few in-edges with special code.
-  //
-  BasicBlock *FirstPred = *PI;
-  unsigned Count = getEdgeWeight(FirstPred, BB);
-  ++PI;
-  if (PI == PE) return Count;   // Quick exit for single predecessor blocks
-
-  BasicBlock *SecondPred = *PI;
-  if (SecondPred != FirstPred) Count += getEdgeWeight(SecondPred, BB);
-  ++PI;
-  if (PI == PE) return Count;   // Quick exit for two predecessor blocks
-
-  BasicBlock *ThirdPred = *PI;
-  if (ThirdPred != FirstPred && ThirdPred != SecondPred)
-    Count += getEdgeWeight(ThirdPred, BB);
-  ++PI;
-  if (PI == PE) return Count;   // Quick exit for three predecessor blocks
-
-  std::set<BasicBlock*> ProcessedPreds;
-  ProcessedPreds.insert(FirstPred);
-  ProcessedPreds.insert(SecondPred);
-  ProcessedPreds.insert(ThirdPred);
+  // the sum of the edge frequencies from the incoming edges.
+  std::set<const BasicBlock*> ProcessedPreds;
+  double Count = 0;
   for (; PI != PE; ++PI)
-    if (ProcessedPreds.insert(*PI).second)
-      Count += getEdgeWeight(*PI, BB);
+    if (ProcessedPreds.insert(*PI).second) {
+      double w = getEdgeWeight(getEdge(*PI, BB));
+      if (w == MissingValue) {
+        Count = MissingValue;
+        break;
+      }
+      Count += w;
+    }
+
+  if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+  return Count;
+}
+
+double ProfileInfo::getExecutionCount(const Function *F) {
+  std::map<const Function*, double>::iterator J =
+    FunctionInformation.find(F);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  // isDeclaration() is checked here and not at start of function to allow
+  // functions without a body still to have a execution count.
+  if (F->isDeclaration()) return MissingValue;
+
+  double Count = getExecutionCount(&F->getEntryBlock());
+  if (Count != MissingValue) FunctionInformation[F] = Count;
   return Count;
 }
 
+/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurences of RmBB with DestBB.
+void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, 
+                                 const BasicBlock *DestBB) {
+  DEBUG(errs() << "Replacing " << RmBB->getNameStr()
+               << " with " << DestBB->getNameStr() << "\n");
+  const Function *F = DestBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+       I != E; ++I) {
+    Edge e = I->first;
+    Edge newedge; bool foundedge = false;
+    if (e.first == RmBB) {
+      newedge = getEdge(DestBB, e.second);
+      foundedge = true;
+    }
+    if (e.second == RmBB) {
+      newedge = getEdge(e.first, DestBB);
+      foundedge = true;
+    }
+    if (foundedge) {
+      double w = getEdgeWeight(e);
+      EdgeInformation[F][newedge] = w;
+      DEBUG(errs() << "Replacing " << e << " with " << newedge  << "\n");
+      J->second.erase(e);
+    }
+  }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
+                            const BasicBlock *SecondBB,
+                            const BasicBlock *NewBB,
+                            bool MergeIdenticalEdges) {
+  const Function *F = FirstBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  // Generate edges and read current weight.
+  Edge e  = getEdge(FirstBB, SecondBB);
+  Edge n1 = getEdge(FirstBB, NewBB);
+  Edge n2 = getEdge(NewBB, SecondBB);
+  EdgeWeights &ECs = J->second;
+  double w = ECs[e];
+
+  int succ_count = 0;
+  if (!MergeIdenticalEdges) {
+    // First count the edges from FristBB to SecondBB, if there is more than
+    // one, only slice out a proporional part for NewBB.
+    for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+        BBI != BBE; ++BBI) {
+      if (*BBI == SecondBB) succ_count++;  
+    }
+    // When the NewBB is completely new, increment the count by one so that
+    // the counts are properly distributed.
+    if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+  } else {
+    // When the edges are merged anyway, then redirect all flow.
+    succ_count = 1;
+  }
 
+  // We know now how many edges there are from FirstBB to SecondBB, reroute a
+  // proportional part of the edge weight over NewBB.
+  double neww = w / succ_count;
+  ECs[n1] += neww;
+  ECs[n2] += neww;
+  BlockInformation[F][NewBB] += neww;
+  if (succ_count == 1) {
+    ECs.erase(e);
+  } else {
+    ECs[e] -= neww;
+  }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) {
+  O << "(";
+  O << (E.first ? E.first->getNameStr() : "0");
+  O << ",";
+  O << (E.second ? E.second->getNameStr() : "0");
+  return O << ")";
+}
 
 //===----------------------------------------------------------------------===//
 //  NoProfile ProfileInfo implementation
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index adb2bdc42549..25481b2ee671 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Analysis/ProfileInfoTypes.h"
 #include "llvm/Module.h"
 #include "llvm/InstrTypes.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <cstdlib>
 #include <map>
@@ -26,10 +26,17 @@ using namespace llvm;
 //
 static inline unsigned ByteSwap(unsigned Var, bool Really) {
   if (!Really) return Var;
-  return ((Var & (255<< 0)) << 24) |
-         ((Var & (255<< 8)) <<  8) |
-         ((Var & (255<<16)) >>  8) |
-         ((Var & (255<<24)) >> 24);
+  return ((Var & (255U<< 0U)) << 24U) |
+         ((Var & (255U<< 8U)) <<  8U) |
+         ((Var & (255U<<16U)) >>  8U) |
+         ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+  // If either value is undefined, use the other.
+  if (A == ProfileInfoLoader::Uncounted) return B;
+  if (B == ProfileInfoLoader::Uncounted) return A;
+  return A + B;
 }
 
 static void ReadProfilingBlock(const char *ToolName, FILE *F,
@@ -38,7 +45,7 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F,
   // Read the number of entries...
   unsigned NumEntries;
   if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
-    cerr << ToolName << ": data packet truncated!\n";
+    errs() << ToolName << ": data packet truncated!\n";
     perror(0);
     exit(1);
   }
@@ -49,35 +56,41 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F,
 
   // Read in the block of data...
   if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
-    cerr << ToolName << ": data packet truncated!\n";
+    errs() << ToolName << ": data packet truncated!\n";
     perror(0);
     exit(1);
   }
 
-  // Make sure we have enough space...
+  // Make sure we have enough space... The space is initialised to -1 to
+  // facitiltate the loading of missing values for OptimalEdgeProfiling.
   if (Data.size() < NumEntries)
-    Data.resize(NumEntries);
+    Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
 
   // Accumulate the data we just read into the data.
   if (!ShouldByteSwap) {
-    for (unsigned i = 0; i != NumEntries; ++i)
-      Data[i] += TempSpace[i];
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(TempSpace[i], Data[i]);
+    }
   } else {
-    for (unsigned i = 0; i != NumEntries; ++i)
-      Data[i] += ByteSwap(TempSpace[i], true);
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+    }
   }
 }
 
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
 // ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
 // program if the file is invalid or broken.
 //
 ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
                                      const std::string &Filename,
-                                     Module &TheModule) : 
-                              M(TheModule), Warned(false) {
-  FILE *F = fopen(Filename.c_str(), "r");
+                                     Module &TheModule) :
+                                     Filename(Filename), 
+                                     M(TheModule), Warned(false) {
+  FILE *F = fopen(Filename.c_str(), "rb");
   if (F == 0) {
-    cerr << ToolName << ": Error opening '" << Filename << "': ";
+    errs() << ToolName << ": Error opening '" << Filename << "': ";
     perror(0);
     exit(1);
   }
@@ -95,7 +108,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
     case ArgumentInfo: {
       unsigned ArgLength;
       if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
-        cerr << ToolName << ": arguments packet truncated!\n";
+        errs() << ToolName << ": arguments packet truncated!\n";
         perror(0);
         exit(1);
       }
@@ -106,7 +119,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
 
       if (ArgLength)
         if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
-          cerr << ToolName << ": arguments packet truncated!\n";
+          errs() << ToolName << ": arguments packet truncated!\n";
           perror(0);
           exit(1);
         }
@@ -126,12 +139,16 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
       ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
       break;
 
+    case OptEdgeInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+      break;
+
     case BBTraceInfo:
       ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
       break;
 
     default:
-      cerr << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+      errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
       exit(1);
     }
   }
@@ -139,139 +156,3 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
   fclose(F);
 }
 
-
-// getFunctionCounts - This method is used by consumers of function counting
-// information.  If we do not directly have function count information, we
-// compute it from other, more refined, types of profile information.
-//
-void ProfileInfoLoader::getFunctionCounts(std::vector<std::pair<Function*,
-                                                      unsigned> > &Counts) {
-  if (FunctionCounts.empty()) {
-    if (hasAccurateBlockCounts()) {
-      // Synthesize function frequency information from the number of times
-      // their entry blocks were executed.
-      std::vector<std::pair<BasicBlock*, unsigned> > BlockCounts;
-      getBlockCounts(BlockCounts);
-
-      for (unsigned i = 0, e = BlockCounts.size(); i != e; ++i)
-        if (&BlockCounts[i].first->getParent()->getEntryBlock() ==
-            BlockCounts[i].first)
-          Counts.push_back(std::make_pair(BlockCounts[i].first->getParent(),
-                                          BlockCounts[i].second));
-    } else {
-      cerr << "Function counts are not available!\n";
-    }
-    return;
-  }
-
-  unsigned Counter = 0;
-  for (Module::iterator I = M.begin(), E = M.end();
-       I != E && Counter != FunctionCounts.size(); ++I)
-    if (!I->isDeclaration())
-      Counts.push_back(std::make_pair(I, FunctionCounts[Counter++]));
-}
-
-// getBlockCounts - This method is used by consumers of block counting
-// information.  If we do not directly have block count information, we
-// compute it from other, more refined, types of profile information.
-//
-void ProfileInfoLoader::getBlockCounts(std::vector<std::pair<BasicBlock*,
-                                                         unsigned> > &Counts) {
-  if (BlockCounts.empty()) {
-    if (hasAccurateEdgeCounts()) {
-      // Synthesize block count information from edge frequency information.
-      // The block execution frequency is equal to the sum of the execution
-      // frequency of all outgoing edges from a block.
-      //
-      // If a block has no successors, this will not be correct, so we have to
-      // special case it. :(
-      std::vector<std::pair<Edge, unsigned> > EdgeCounts;
-      getEdgeCounts(EdgeCounts);
-
-      std::map<BasicBlock*, unsigned> InEdgeFreqs;
-
-      BasicBlock *LastBlock = 0;
-      TerminatorInst *TI = 0;
-      for (unsigned i = 0, e = EdgeCounts.size(); i != e; ++i) {
-        if (EdgeCounts[i].first.first != LastBlock) {
-          LastBlock = EdgeCounts[i].first.first;
-          TI = LastBlock->getTerminator();
-          Counts.push_back(std::make_pair(LastBlock, 0));
-        }
-        Counts.back().second += EdgeCounts[i].second;
-        unsigned SuccNum = EdgeCounts[i].first.second;
-        if (SuccNum >= TI->getNumSuccessors()) {
-          if (!Warned) {
-            cerr << "WARNING: profile info doesn't seem to match"
-                 << " the program!\n";
-            Warned = true;
-          }
-        } else {
-          // If this successor has no successors of its own, we will never
-          // compute an execution count for that block.  Remember the incoming
-          // edge frequencies to add later.
-          BasicBlock *Succ = TI->getSuccessor(SuccNum);
-          if (Succ->getTerminator()->getNumSuccessors() == 0)
-            InEdgeFreqs[Succ] += EdgeCounts[i].second;
-        }
-      }
-
-      // Now we have to accumulate information for those blocks without
-      // successors into our table.
-      for (std::map<BasicBlock*, unsigned>::iterator I = InEdgeFreqs.begin(),
-             E = InEdgeFreqs.end(); I != E; ++I) {
-        unsigned i = 0;
-        for (; i != Counts.size() && Counts[i].first != I->first; ++i)
-          /*empty*/;
-        if (i == Counts.size()) Counts.push_back(std::make_pair(I->first, 0));
-        Counts[i].second += I->second;
-      }
-
-    } else {
-      cerr << "Block counts are not available!\n";
-    }
-    return;
-  }
-
-  unsigned Counter = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      Counts.push_back(std::make_pair(BB, BlockCounts[Counter++]));
-      if (Counter == BlockCounts.size())
-        return;
-    }
-}
-
-// getEdgeCounts - This method is used by consumers of edge counting
-// information.  If we do not directly have edge count information, we compute
-// it from other, more refined, types of profile information.
-//
-void ProfileInfoLoader::getEdgeCounts(std::vector<std::pair<Edge,
-                                                  unsigned> > &Counts) {
-  if (EdgeCounts.empty()) {
-    cerr << "Edge counts not available, and no synthesis "
-         << "is implemented yet!\n";
-    return;
-  }
-
-  unsigned Counter = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      for (unsigned i = 0, e = BB->getTerminator()->getNumSuccessors();
-           i != e; ++i) {
-        Counts.push_back(std::make_pair(Edge(BB, i), EdgeCounts[Counter++]));
-        if (Counter == EdgeCounts.size())
-          return;
-      }
-}
-
-// getBBTrace - This method is used by consumers of basic-block trace
-// information.
-//
-void ProfileInfoLoader::getBBTrace(std::vector<BasicBlock *> &Trace) {
-  if (BBTrace.empty ()) {
-    cerr << "Basic block trace is not available!\n";
-    return;
-  }
-  cerr << "Basic block trace loading is not implemented yet!\n";
-}
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 0a8a87bd0f97..89d90bca2166 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -11,18 +11,27 @@
 // loads the information from a profile dump file.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "profile-loader"
 #include "llvm/BasicBlock.h"
 #include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
 using namespace llvm;
 
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
 static cl::opt<std::string>
 ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
                     cl::value_desc("filename"),
@@ -31,6 +40,9 @@ ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
 namespace {
   class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo {
     std::string Filename;
+    std::set<Edge> SpanningTree;
+    std::set<const BasicBlock*> BBisUnvisited;
+    unsigned ReadCount;
   public:
     static char ID; // Class identification, replacement for typeinfo
     explicit LoaderPass(const std::string &filename = "")
@@ -46,6 +58,12 @@ namespace {
       return "Profiling information loader";
     }
 
+    // recurseBasicBlock() - Calculates the edge weights for as much basic
+    // blocks as possbile.
+    virtual void recurseBasicBlock(const BasicBlock *BB);
+    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, unsigned &);
+    virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
     /// run - Load the profile information from the specified file.
     virtual bool runOnModule(Module &M);
   };
@@ -66,25 +84,210 @@ Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
   return new LoaderPass(Filename);
 }
 
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, 
+                                    unsigned &uncalc, unsigned &count) {
+  double w;
+  if ((w = getEdgeWeight(edge)) == MissingValue) {
+    tocalc = edge;
+    uncalc++;
+  } else {
+    count+=w;
+  }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+  // break recursion if already visited
+  if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+  BBisUnvisited.erase(BB);
+  if (!BB) return;
+
+  for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+  for (pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+
+  Edge edgetocalc;
+  unsigned uncalculated = 0;
+
+  // collect weights of all incoming and outgoing edges, rememer edges that
+  // have no value
+  unsigned incount = 0;
+  SmallSet<const BasicBlock*,8> pred_visited;
+  pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi==bbe) {
+    readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount);
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (pred_visited.insert(*bbi)) {
+      readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount);
+    }
+  }
+
+  unsigned outcount = 0;
+  SmallSet<const BasicBlock*,8> succ_visited;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi==sbbe) {
+    readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount);
+  }
+  for (;sbbi != sbbe; ++sbbi) {
+    if (succ_visited.insert(*sbbi)) {
+      readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount);
+    }
+  }
+
+  // if exactly one edge weight was missing, calculate it and remove it from
+  // spanning tree
+  if (uncalculated == 1) {
+    if (incount < outcount) {
+      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+    } else {
+      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+    }
+    DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": "
+                 << format("%g", getEdgeWeight(edgetocalc)) << "\n");
+    SpanningTree.erase(edgetocalc);
+  }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+                          std::vector<unsigned> &ECs) {
+  if (ReadCount < ECs.size()) {
+    double weight = ECs[ReadCount++];
+    if (weight != ProfileInfoLoader::Uncounted) {
+      // Here the data realm changes from the unsigned of the file to the
+      // double of the ProfileInfo. This conversion is save because we know
+      // that everything thats representable in unsinged is also representable
+      // in double.
+      EdgeInformation[getFunction(e)][e] += (double)weight;
+
+      DEBUG(errs() << "--Read Edge Counter for " << e
+                   << " (# "<< (ReadCount-1) << "): "
+                   << (unsigned)getEdgeWeight(e) << "\n");
+    } else {
+      // This happens only if reading optimal profiling information, not when
+      // reading regular profiling information.
+      SpanningTree.insert(e);
+    }
+  }
+}
+
 bool LoaderPass::runOnModule(Module &M) {
   ProfileInfoLoader PIL("profile-loader", Filename, M);
-  EdgeCounts.clear();
-  bool PrintedWarning = false;
-
-  std::vector<std::pair<ProfileInfoLoader::Edge, unsigned> > ECs;
-  PIL.getEdgeCounts(ECs);
-  for (unsigned i = 0, e = ECs.size(); i != e; ++i) {
-    BasicBlock *BB = ECs[i].first.first;
-    unsigned SuccNum = ECs[i].first.second;
-    TerminatorInst *TI = BB->getTerminator();
-    if (SuccNum >= TI->getNumSuccessors()) {
-      if (!PrintedWarning) {
-        cerr << "WARNING: profile information is inconsistent with "
+
+  EdgeInformation.clear();
+  std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
              << "the current program!\n";
-        PrintedWarning = true;
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  Counters = PIL.getRawOptimalEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        if (TI->getNumSuccessors() == 0) {
+          readEdge(getEdge(BB,0), Counters);
+        }
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
       }
-    } else {
-      EdgeCounts[std::make_pair(BB, TI->getSuccessor(SuccNum))]+= ECs[i].second;
+      while (SpanningTree.size() > 0) {
+#if 0
+        unsigned size = SpanningTree.size();
+#endif
+        BBisUnvisited.clear();
+        for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+             ee = SpanningTree.end(); ei != ee; ++ei) {
+          BBisUnvisited.insert(ei->first);
+          BBisUnvisited.insert(ei->second);
+        }
+        while (BBisUnvisited.size() > 0) {
+          recurseBasicBlock(*BBisUnvisited.begin());
+        }
+#if 0
+        if (SpanningTree.size() == size) {
+          DEBUG(errs()<<"{");
+          for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+               ee = SpanningTree.end(); ei != ee; ++ei) {
+            DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<","
+                        <<(ei->second?ei->second->getName():"0")<<"),");
+          }
+          assert(0 && "No edge calculated!");
+        }
+#endif
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  BlockInformation.clear();
+  Counters = PIL.getRawBlockCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+        if (ReadCount < Counters.size())
+          // Here the data realm changes from the unsigned of the file to the
+          // double of the ProfileInfo. This conversion is save because we know
+          // that everything thats representable in unsinged is also
+          // representable in double.
+          BlockInformation[F][BB] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+  }
+
+  FunctionInformation.clear();
+  Counters = PIL.getRawFunctionCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      if (ReadCount < Counters.size())
+        // Here the data realm changes from the unsigned of the file to the
+        // double of the ProfileInfo. This conversion is save because we know
+        // that everything thats representable in unsinged is also
+        // representable in double.
+        FunctionInformation[F] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
     }
   }
 
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 000000000000..9766da5992df
--- /dev/null
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,343 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for 
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+     cl::desc("Disable assertions"));
+
+namespace {
+  class VISIBILITY_HIDDEN ProfileVerifierPass : public FunctionPass {
+
+    struct DetailedBlockInfo {
+      const BasicBlock *BB;
+      double            BBWeight;
+      double            inWeight;
+      int               inCount;
+      double            outWeight;
+      int               outCount;
+    };
+
+    ProfileInfo *PI;
+    std::set<const BasicBlock*> BBisVisited;
+    std::set<const Function*>   FisVisited;
+    bool DisableAssertions;
+
+    // When debugging is enabled, the verifier prints a whole slew of debug
+    // information, otherwise its just the assert. These are all the helper
+    // functions.
+    bool PrintedDebugTree;
+    std::set<const BasicBlock*> BBisPrinted;
+    void debugEntry(DetailedBlockInfo*);
+    void printDebugInfo(const BasicBlock *BB);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    explicit ProfileVerifierPass () : FunctionPass(&ID) {
+      DisableAssertions = ProfileVerifierDisableAssertions;
+    }
+    explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), 
+                                             DisableAssertions(da) {
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<ProfileInfo>();
+    }
+
+    const char *getPassName() const {
+      return "Profiling information verifier";
+    }
+
+    /// run - Verify the profile information.
+    bool runOnFunction(Function &F);
+    void recurseBasicBlock(const BasicBlock*);
+
+    bool   exitReachable(const Function*);
+    double ReadOrAssert(ProfileInfo::Edge);
+    void   CheckValue(bool, const char*, DetailedBlockInfo*);
+  };
+}  // End of anonymous namespace
+
+char ProfileVerifierPass::ID = 0;
+static RegisterPass<ProfileVerifierPass>
+X("profile-verifier", "Verify profiling information", false, true);
+
+namespace llvm {
+  FunctionPass *createProfileVerifierPass() {
+    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
+  }
+}
+
+void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) {
+
+  if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+  double BBWeight = PI->getExecutionCount(BB);
+  if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; }
+  double inWeight = 0;
+  int inCount = 0;
+  std::set<const BasicBlock*> ProcessedPreds;
+  for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+        bbi != bbe; ++bbi ) {
+    if (ProcessedPreds.insert(*bbi).second) {
+      ProfileInfo::Edge E = PI->getEdge(*bbi,BB);
+      double EdgeWeight = PI->getEdgeWeight(E);
+      if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
+      errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n";
+      inWeight += EdgeWeight;
+      inCount++;
+    }
+  }
+  double outWeight = 0;
+  int outCount = 0;
+  std::set<const BasicBlock*> ProcessedSuccs;
+  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+        bbi != bbe; ++bbi ) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      ProfileInfo::Edge E = PI->getEdge(BB,*bbi);
+      double EdgeWeight = PI->getEdgeWeight(E);
+      if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
+      errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n";
+      outWeight += EdgeWeight;
+      outCount++;
+    }
+  }
+  errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr()
+        <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount
+        <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n";
+
+  // mark as visited and recurse into subnodes
+  BBisPrinted.insert(BB);
+  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+        bbi != bbe; ++bbi ) {
+    printDebugInfo(*bbi);
+  }
+}
+
+void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) {
+  errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
+         << DI->BB->getParent()->getNameStr()  << ":";
+  errs() << "BBWeight="  << DI->BBWeight   << ",";
+  errs() << "inWeight="  << DI->inWeight   << ",";
+  errs() << "inCount="   << DI->inCount    << ",";
+  errs() << "outWeight=" << DI->outWeight  << ",";
+  errs() << "outCount="  << DI->outCount   << "\n";
+  if (!PrintedDebugTree) {
+    PrintedDebugTree = true;
+    printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+  }
+}
+
+// This compares A and B but considering maybe small differences.
+static bool Equals(double A, double B) { 
+  double maxRelativeError = 0.0000001;
+  if (A == B)
+    return true;
+  double relativeError;
+  if (fabs(B) > fabs(A)) 
+    relativeError = fabs((A - B) / B);
+  else 
+    relativeError = fabs((A - B) / A);
+  if (relativeError <= maxRelativeError) return true; 
+  return false; 
+}
+
+// This checks if the function "exit" is reachable from an given function
+// via calls, this is necessary to check if a profile is valid despite the
+// counts not fitting exactly.
+bool ProfileVerifierPass::exitReachable(const Function *F) {
+  if (!F) return false;
+
+  if (FisVisited.count(F)) return false;
+
+  Function *Exit = F->getParent()->getFunction("exit");
+  if (Exit == F) {
+    return true;
+  }
+
+  FisVisited.insert(F);
+  bool exits = false;
+  for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+      exits |= exitReachable(CI->getCalledFunction());
+      if (exits) break;
+    }
+  }
+  return exits;
+}
+
+#define ASSERTMESSAGE(M) \
+    errs() << (M) << "\n"; \
+    if (!DisableAssertions) assert(0 && (M));
+
+double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) {
+  double EdgeWeight = PI->getEdgeWeight(E);
+  if (EdgeWeight == ProfileInfo::MissingValue) {
+    errs() << "Edge " << E << " in Function " 
+           << ProfileInfo::getFunction(E)->getNameStr() << ": ";
+    ASSERTMESSAGE("ASSERT:Edge has missing value");
+    return 0;
+  } else {
+    return EdgeWeight;
+  }
+}
+
+void ProfileVerifierPass::CheckValue(bool Error, const char *Message,
+                                     DetailedBlockInfo *DI) {
+  if (Error) {
+    DEBUG(debugEntry(DI));
+    errs() << "Block " << DI->BB->getNameStr() << " in Function " 
+           << DI->BB->getParent()->getNameStr() << ": ";
+    ASSERTMESSAGE(Message);
+  }
+  return;
+}
+
+// This calculates the Information for a block and then recurses into the
+// successors.
+void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) {
+
+  // Break the recursion by remembering all visited blocks.
+  if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+  // Use a data structure to store all the information, this can then be handed
+  // to debug printers.
+  DetailedBlockInfo DI;
+  DI.BB = BB;
+  DI.outCount = DI.inCount = DI.inWeight = DI.outWeight = 0;
+
+  // Read predecessors.
+  std::set<const BasicBlock*> ProcessedPreds;
+  pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+  // If there are none, check for (0,BB) edge.
+  if (bpi == bpe) {
+    DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+    DI.inCount++;
+  }
+  for (;bpi != bpe; ++bpi) {
+    if (ProcessedPreds.insert(*bpi).second) {
+      DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+      DI.inCount++;
+    }
+  }
+
+  // Read successors.
+  std::set<const BasicBlock*> ProcessedSuccs;
+  succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+  // If there is an (0,BB) edge, consider it too. (This is done not only when
+  // there are no successors, but every time; not every function contains
+  // return blocks with no successors (think loop latch as return block)).
+  double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+  if (w != ProfileInfo::MissingValue) {
+    DI.outWeight += w;
+    DI.outCount++;
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+      DI.outCount++;
+    }
+  }
+
+  // Read block weight.
+  DI.BBWeight = PI->getExecutionCount(BB);
+  CheckValue(DI.BBWeight == ProfileInfo::MissingValue,
+             "ASSERT:BasicBlock has missing value", &DI);
+
+  // Check if this block is a setjmp target.
+  bool isSetJmpTarget = false;
+  if (DI.outWeight > DI.inWeight) {
+    for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
+         i != ie; ++i) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+        Function *F = CI->getCalledFunction();
+        if (F && (F->getNameStr() == "_setjmp")) {
+          isSetJmpTarget = true; break;
+        }
+      }
+    }
+  }
+  // Check if this block is eventually reaching exit.
+  bool isExitReachable = false;
+  if (DI.inWeight > DI.outWeight) {
+    for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
+         i != ie; ++i) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+        FisVisited.clear();
+        isExitReachable |= exitReachable(CI->getCalledFunction());
+        if (isExitReachable) break;
+      }
+    }
+  }
+
+  if (DI.inCount > 0 && DI.outCount == 0) {
+     // If this is a block with no successors.
+    if (!isSetJmpTarget) {
+      CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
+                 "ASSERT:inWeight and BBWeight do not match", &DI);
+    }
+  } else if (DI.inCount == 0 && DI.outCount > 0) {
+    // If this is a block with no predecessors.
+    if (!isExitReachable)
+      CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
+                 "ASSERT:BBWeight and outWeight do not match", &DI);
+  } else {
+    // If this block has successors and predecessors.
+    if (DI.inWeight > DI.outWeight && !isExitReachable)
+      CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                 "ASSERT:inWeight and outWeight do not match", &DI);
+    if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+      CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                 "ASSERT:inWeight and outWeight do not match", &DI);
+  }
+
+
+  // Mark this block as visited, rescurse into successors.
+  BBisVisited.insert(BB);
+  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+        bbi != bbe; ++bbi ) {
+    recurseBasicBlock(*bbi);
+  }
+}
+
+bool ProfileVerifierPass::runOnFunction(Function &F) {
+  PI = &getAnalysis<ProfileInfo>();
+
+  // Prepare global variables.
+  PrintedDebugTree = false;
+  BBisVisited.clear();
+
+  // Fetch entry block and recurse into it.
+  const BasicBlock *entry = &F.getEntryBlock();
+  recurseBasicBlock(entry);
+
+  if (!DisableAssertions)
+    assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) &&
+           "Function count and entry block count do not match");
+  return false;
+}
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt
new file mode 100644
index 000000000000..c40109027299
--- /dev/null
+++ b/lib/Analysis/README.txt
@@ -0,0 +1,18 @@
+Analysis Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the
+ScalarEvolution expression for %r is this:
+
+  {1,+,3,+,2}<loop>
+
+Outside the loop, this could be evaluated simply as (%n * %n), however
+ScalarEvolution currently evaluates it as
+
+  (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n))
+
+In addition to being much more complicated, it involves i65 arithmetic,
+which is very inefficient when expanded into code.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 408156265d24..62f3aa1dcae4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -14,9 +14,8 @@
 // There are several aspects to this library.  First is the representation of
 // scalar expressions, which are represented as subclasses of the SCEV class.
 // These classes are used to represent certain types of subexpressions that we
-// can handle.  These classes are reference counted, managed by the const SCEV*
-// class.  We only create one SCEV of a particular shape, so pointer-comparisons
-// for equality are legal.
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
 //
 // One important aspect of the SCEV objects is that they are never cyclic, even
 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
@@ -64,7 +63,10 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -74,12 +76,14 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -118,11 +122,6 @@ void SCEV::dump() const {
   errs() << '\n';
 }
 
-void SCEV::print(std::ostream &o) const {
-  raw_os_ostream OS(o);
-  print(OS);
-}
-
 bool SCEV::isZero() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
     return SC->getValue()->isZero();
@@ -142,33 +141,26 @@ bool SCEV::isAllOnesValue() const {
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
-  SCEV(scCouldNotCompute) {}
-
-void SCEVCouldNotCompute::Profile(FoldingSetNodeID &ID) const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
-}
+  SCEV(FoldingSetNodeID(), scCouldNotCompute) {}
 
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   return false;
 }
 
 const Type *SCEVCouldNotCompute::getType() const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   return 0;
 }
 
 bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   return false;
 }
 
-const SCEV *
-SCEVCouldNotCompute::replaceSymbolicValuesWithConcrete(
-                                                    const SCEV *Sym,
-                                                    const SCEV *Conc,
-                                                    ScalarEvolution &SE) const {
-  return this;
+bool SCEVCouldNotCompute::hasOperand(const SCEV *) const {
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  return false;
 }
 
 void SCEVCouldNotCompute::print(raw_ostream &OS) const {
@@ -179,30 +171,26 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
 }
 
-const SCEV* ScalarEvolution::getConstant(ConstantInt *V) {
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
   FoldingSetNodeID ID;
   ID.AddInteger(scConstant);
   ID.AddPointer(V);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVConstant>();
-  new (S) SCEVConstant(V);
+  new (S) SCEVConstant(ID, V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
-const SCEV* ScalarEvolution::getConstant(const APInt& Val) {
-  return getConstant(ConstantInt::get(Val));
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+  return getConstant(ConstantInt::get(getContext(), Val));
 }
 
-const SCEV*
+const SCEV *
 ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
-  return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
-}
-
-void SCEVConstant::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scConstant);
-  ID.AddPointer(V);
+  return getConstant(
+    ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
 }
 
 const Type *SCEVConstant::getType() const { return V->getType(); }
@@ -211,22 +199,21 @@ void SCEVConstant::print(raw_ostream &OS) const {
   WriteAsOperand(OS, V, false);
 }
 
-SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
-                           const SCEV* op, const Type *ty)
-  : SCEV(SCEVTy), Op(op), Ty(ty) {}
-
-void SCEVCastExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(getSCEVType());
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-}
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID,
+                           unsigned SCEVTy, const SCEV *op, const Type *ty)
+  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 
 bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->dominates(BB, DT);
 }
 
-SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty)
-  : SCEVCastExpr(scTruncate, op, ty) {
+bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  return Op->properlyDominates(BB, DT);
+}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
+                                   const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scTruncate, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate non-integer value!");
@@ -236,8 +223,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
   OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty)
-  : SCEVCastExpr(scZeroExtend, op, ty) {
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot zero extend non-integer value!");
@@ -247,8 +235,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
   OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty)
-  : SCEVCastExpr(scSignExtend, op, ty) {
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scSignExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot sign extend non-integer value!");
@@ -267,46 +256,6 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const {
   OS << ")";
 }
 
-const SCEV *
-SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete(
-                                                    const SCEV *Sym,
-                                                    const SCEV *Conc,
-                                                    ScalarEvolution &SE) const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    const SCEV* H =
-      getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-    if (H != getOperand(i)) {
-      SmallVector<const SCEV*, 8> NewOps;
-      NewOps.reserve(getNumOperands());
-      for (unsigned j = 0; j != i; ++j)
-        NewOps.push_back(getOperand(j));
-      NewOps.push_back(H);
-      for (++i; i != e; ++i)
-        NewOps.push_back(getOperand(i)->
-                         replaceSymbolicValuesWithConcrete(Sym, Conc, SE));
-
-      if (isa<SCEVAddExpr>(this))
-        return SE.getAddExpr(NewOps);
-      else if (isa<SCEVMulExpr>(this))
-        return SE.getMulExpr(NewOps);
-      else if (isa<SCEVSMaxExpr>(this))
-        return SE.getSMaxExpr(NewOps);
-      else if (isa<SCEVUMaxExpr>(this))
-        return SE.getUMaxExpr(NewOps);
-      else
-        assert(0 && "Unknown commutative expr!");
-    }
-  }
-  return this;
-}
-
-void SCEVNAryExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(getSCEVType());
-  ID.AddInteger(Operands.size());
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-    ID.AddPointer(Operands[i]);
-}
-
 bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     if (!getOperand(i)->dominates(BB, DT))
@@ -315,16 +264,22 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
-void SCEVUDivExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scUDivExpr);
-  ID.AddPointer(LHS);
-  ID.AddPointer(RHS);
+bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (!getOperand(i)->properlyDominates(BB, DT))
+      return false;
+  }
+  return true;
 }
 
 bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
 }
 
+bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT);
+}
+
 void SCEVUDivExpr::print(raw_ostream &OS) const {
   OS << "(" << *LHS << " /u " << *RHS << ")";
 }
@@ -338,38 +293,6 @@ const Type *SCEVUDivExpr::getType() const {
   return RHS->getType();
 }
 
-void SCEVAddRecExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scAddRecExpr);
-  ID.AddInteger(Operands.size());
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-    ID.AddPointer(Operands[i]);
-  ID.AddPointer(L);
-}
-
-const SCEV *
-SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym,
-                                                  const SCEV *Conc,
-                                                  ScalarEvolution &SE) const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    const SCEV* H =
-      getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-    if (H != getOperand(i)) {
-      SmallVector<const SCEV*, 8> NewOps;
-      NewOps.reserve(getNumOperands());
-      for (unsigned j = 0; j != i; ++j)
-        NewOps.push_back(getOperand(j));
-      NewOps.push_back(H);
-      for (++i; i != e; ++i)
-        NewOps.push_back(getOperand(i)->
-                         replaceSymbolicValuesWithConcrete(Sym, Conc, SE));
-
-      return SE.getAddRecExpr(NewOps, L);
-    }
-  }
-  return this;
-}
-
-
 bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
   // Add recurrences are never invariant in the function-body (null loop).
   if (!QueryLoop)
@@ -396,9 +319,13 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "}<" << L->getHeader()->getName() + ">";
 }
 
-void SCEVUnknown::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scUnknown);
-  ID.AddPointer(V);
+void SCEVFieldOffsetExpr::print(raw_ostream &OS) const {
+  // LLVM struct fields don't have names, so just print the field number.
+  OS << "offsetof(" << *STy << ", " << FieldNo << ")";
+}
+
+void SCEVAllocSizeExpr::print(raw_ostream &OS) const {
+  OS << "sizeof(" << *AllocTy << ")";
 }
 
 bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
@@ -417,6 +344,12 @@ bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
+bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  if (Instruction *I = dyn_cast<Instruction>(getValue()))
+    return DT->properlyDominates(I->getParent(), BB);
+  return true;
+}
+
 const Type *SCEVUnknown::getType() const {
   return V->getType();
 }
@@ -429,6 +362,41 @@ void SCEVUnknown::print(raw_ostream &OS) const {
 //                               SCEV Utilities
 //===----------------------------------------------------------------------===//
 
+static bool CompareTypes(const Type *A, const Type *B) {
+  if (A->getTypeID() != B->getTypeID())
+    return A->getTypeID() < B->getTypeID();
+  if (const IntegerType *AI = dyn_cast<IntegerType>(A)) {
+    const IntegerType *BI = cast<IntegerType>(B);
+    return AI->getBitWidth() < BI->getBitWidth();
+  }
+  if (const PointerType *AI = dyn_cast<PointerType>(A)) {
+    const PointerType *BI = cast<PointerType>(B);
+    return CompareTypes(AI->getElementType(), BI->getElementType());
+  }
+  if (const ArrayType *AI = dyn_cast<ArrayType>(A)) {
+    const ArrayType *BI = cast<ArrayType>(B);
+    if (AI->getNumElements() != BI->getNumElements())
+      return AI->getNumElements() < BI->getNumElements();
+    return CompareTypes(AI->getElementType(), BI->getElementType());
+  }
+  if (const VectorType *AI = dyn_cast<VectorType>(A)) {
+    const VectorType *BI = cast<VectorType>(B);
+    if (AI->getNumElements() != BI->getNumElements())
+      return AI->getNumElements() < BI->getNumElements();
+    return CompareTypes(AI->getElementType(), BI->getElementType());
+  }
+  if (const StructType *AI = dyn_cast<StructType>(A)) {
+    const StructType *BI = cast<StructType>(B);
+    if (AI->getNumElements() != BI->getNumElements())
+      return AI->getNumElements() < BI->getNumElements();
+    for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i)
+      if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) ||
+          CompareTypes(BI->getElementType(i), AI->getElementType(i)))
+        return CompareTypes(AI->getElementType(i), BI->getElementType(i));
+  }
+  return false;
+}
+
 namespace {
   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
   /// than the complexity of the RHS.  This comparator is used to canonicalize
@@ -439,6 +407,10 @@ namespace {
     explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {}
 
     bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+      // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+      if (LHS == RHS)
+        return false;
+
       // Primarily, sort the SCEVs by their getSCEVType().
       if (LHS->getSCEVType() != RHS->getSCEVType())
         return LHS->getSCEVType() < RHS->getSCEVType();
@@ -495,6 +467,8 @@ namespace {
       // Compare constant values.
       if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
         const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+        if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth())
+          return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth();
         return LC->getValue()->getValue().ult(RC->getValue()->getValue());
       }
 
@@ -539,7 +513,22 @@ namespace {
         return operator()(LC->getOperand(), RC->getOperand());
       }
 
-      assert(0 && "Unknown SCEV kind!");
+      // Compare offsetof expressions.
+      if (const SCEVFieldOffsetExpr *LA = dyn_cast<SCEVFieldOffsetExpr>(LHS)) {
+        const SCEVFieldOffsetExpr *RA = cast<SCEVFieldOffsetExpr>(RHS);
+        if (CompareTypes(LA->getStructType(), RA->getStructType()) ||
+            CompareTypes(RA->getStructType(), LA->getStructType()))
+          return CompareTypes(LA->getStructType(), RA->getStructType());
+        return LA->getFieldNo() < RA->getFieldNo();
+      }
+
+      // Compare sizeof expressions by the allocation type.
+      if (const SCEVAllocSizeExpr *LA = dyn_cast<SCEVAllocSizeExpr>(LHS)) {
+        const SCEVAllocSizeExpr *RA = cast<SCEVAllocSizeExpr>(RHS);
+        return CompareTypes(LA->getAllocType(), RA->getAllocType());
+      }
+
+      llvm_unreachable("Unknown SCEV kind!");
       return false;
     }
   };
@@ -555,7 +544,7 @@ namespace {
 /// this to depend on where the addresses of various SCEV objects happened to
 /// land in memory.
 ///
-static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops,
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
                               LoopInfo *LI) {
   if (Ops.size() < 2) return;  // Noop
   if (Ops.size() == 2) {
@@ -598,9 +587,9 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops,
 
 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
 /// Assume, K > 0.
-static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
-                                      ScalarEvolution &SE,
-                                      const Type* ResultTy) {
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+                                       ScalarEvolution &SE,
+                                       const Type* ResultTy) {
   // Handle the simplest case efficiently.
   if (K == 1)
     return SE.getTruncateOrZeroExtend(It, ResultTy);
@@ -690,16 +679,17 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
   MultiplyFactor = MultiplyFactor.trunc(W);
 
   // Calculate the product, at width T+W
-  const IntegerType *CalculationTy = IntegerType::get(CalculationBits);
-  const SCEV* Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+  const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+                                                      CalculationBits);
+  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
   for (unsigned i = 1; i != K; ++i) {
-    const SCEV* S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
+    const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
     Dividend = SE.getMulExpr(Dividend,
                              SE.getTruncateOrZeroExtend(S, CalculationTy));
   }
 
   // Divide by 2^T
-  const SCEV* DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
 
   // Truncate the result, and divide by K! / 2^T.
 
@@ -716,14 +706,14 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
 ///
 /// where BC(It, k) stands for binomial coefficient.
 ///
-const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It,
-                                               ScalarEvolution &SE) const {
-  const SCEV* Result = getStart();
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+                                                ScalarEvolution &SE) const {
+  const SCEV *Result = getStart();
   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
     // The computation is correct in the face of overflow provided that the
     // multiplication is performed _after_ the evaluation of the binomial
     // coefficient.
-    const SCEV* Coeff = BinomialCoefficient(It, i, SE, getType());
+    const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
     if (isa<SCEVCouldNotCompute>(Coeff))
       return Coeff;
 
@@ -736,14 +726,21 @@ const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It,
 //                    SCEV Expression folder implementations
 //===----------------------------------------------------------------------===//
 
-const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
-                                            const Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+                                             const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
          "This is not a truncating conversion!");
   assert(isSCEVable(Ty) &&
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
+  FoldingSetNodeID ID;
+  ID.AddInteger(scTruncate);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
   // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     return getConstant(
@@ -763,26 +760,23 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
 
   // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
-    SmallVector<const SCEV*, 4> Operands;
+    SmallVector<const SCEV *, 4> Operands;
     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
     return getAddRecExpr(Operands, AddRec->getLoop());
   }
 
-  FoldingSetNodeID ID;
-  ID.AddInteger(scTruncate);
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-  void *IP = 0;
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>();
-  new (S) SCEVTruncateExpr(Op, Ty);
+  new (S) SCEVTruncateExpr(ID, Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
-const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
-                                              const Type *Ty) {
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -801,12 +795,33 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getZeroExtendExpr(SZ->getOperand(), Ty);
 
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scZeroExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can zero extend all of the
   // operands (often constants).  This allows analysis of something like
   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
     if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoUnsignedWrap())
+        return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                             getZeroExtendExpr(Step, Ty),
+                             L);
+
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -815,28 +830,25 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
-      const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
-        const SCEV* Start = AR->getStart();
-        const SCEV* Step = AR->getStepRecurrence(*this);
 
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
-        const SCEV* CastedMaxBECount =
+        const SCEV *CastedMaxBECount =
           getTruncateOrZeroExtend(MaxBECount, Start->getType());
-        const SCEV* RecastedMaxBECount =
+        const SCEV *RecastedMaxBECount =
           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
         if (MaxBECount == RecastedMaxBECount) {
-          const Type *WideTy =
-            IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no unsigned overflow.
-          const SCEV* ZMul =
+          const SCEV *ZMul =
             getMulExpr(CastedMaxBECount,
                        getTruncateOrZeroExtend(Step, Start->getType()));
-          const SCEV* Add = getAddExpr(Start, ZMul);
-          const SCEV* OperandExtendedAdd =
+          const SCEV *Add = getAddExpr(Start, ZMul);
+          const SCEV *OperandExtendedAdd =
             getAddExpr(getZeroExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getZeroExtendExpr(Step, WideTy)));
@@ -844,11 +856,11 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getZeroExtendExpr(Step, Ty),
-                                 AR->getLoop());
+                                 L);
 
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
-          const SCEV* SMul =
+          const SCEV *SMul =
             getMulExpr(CastedMaxBECount,
                        getTruncateOrSignExtend(Step, Start->getType()));
           Add = getAddExpr(Start, SMul);
@@ -860,25 +872,50 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 AR->getLoop());
+                                 L);
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+                                      getUnsignedRange(Step).getUnsignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) &&
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) ||
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
         }
       }
     }
 
-  FoldingSetNodeID ID;
-  ID.AddInteger(scZeroExtend);
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-  void *IP = 0;
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>();
-  new (S) SCEVZeroExtendExpr(Op, Ty);
+  new (S) SCEVZeroExtendExpr(ID, Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
-const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
-                                              const Type *Ty) {
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -897,12 +934,33 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
     return getSignExtendExpr(SS->getOperand(), Ty);
 
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSignExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can sign extend all of the
   // operands (often constants).  This allows analysis of something like
   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
     if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoSignedWrap())
+        return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                             getSignExtendExpr(Step, Ty),
+                             L);
+
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -911,28 +969,25 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
-      const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
-        const SCEV* Start = AR->getStart();
-        const SCEV* Step = AR->getStepRecurrence(*this);
 
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
-        const SCEV* CastedMaxBECount =
+        const SCEV *CastedMaxBECount =
           getTruncateOrZeroExtend(MaxBECount, Start->getType());
-        const SCEV* RecastedMaxBECount =
+        const SCEV *RecastedMaxBECount =
           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
         if (MaxBECount == RecastedMaxBECount) {
-          const Type *WideTy =
-            IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no signed overflow.
-          const SCEV* SMul =
+          const SCEV *SMul =
             getMulExpr(CastedMaxBECount,
                        getTruncateOrSignExtend(Step, Start->getType()));
-          const SCEV* Add = getAddExpr(Start, SMul);
-          const SCEV* OperandExtendedAdd =
+          const SCEV *Add = getAddExpr(Start, SMul);
+          const SCEV *OperandExtendedAdd =
             getAddExpr(getSignExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getSignExtendExpr(Step, WideTy)));
@@ -940,19 +995,60 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getSignExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 AR->getLoop());
+                                 L);
+
+          // Similar to above, only this time treat the step value as unsigned.
+          // This covers loops that count up with an unsigned step.
+          const SCEV *UMul =
+            getMulExpr(CastedMaxBECount,
+                       getTruncateOrZeroExtend(Step, Start->getType()));
+          Add = getAddExpr(Start, UMul);
+          OperandExtendedAdd =
+            getAddExpr(getSignExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
         }
       }
     }
 
-  FoldingSetNodeID ID;
-  ID.AddInteger(scSignExtend);
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-  void *IP = 0;
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>();
-  new (S) SCEVSignExtendExpr(Op, Ty);
+  new (S) SCEVSignExtendExpr(ID, Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -960,8 +1056,8 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
 /// unspecified bits out to the given type.
 ///
-const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op,
-                                             const Type *Ty) {
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+                                              const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -975,19 +1071,19 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op,
 
   // Peel off a truncate cast.
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
-    const SCEV* NewOp = T->getOperand();
+    const SCEV *NewOp = T->getOperand();
     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
       return getAnyExtendExpr(NewOp, Ty);
     return getTruncateOrNoop(NewOp, Ty);
   }
 
   // Next try a zext cast. If the cast is folded, use it.
-  const SCEV* ZExt = getZeroExtendExpr(Op, Ty);
+  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
   if (!isa<SCEVZeroExtendExpr>(ZExt))
     return ZExt;
 
   // Next try a sext cast. If the cast is folded, use it.
-  const SCEV* SExt = getSignExtendExpr(Op, Ty);
+  const SCEV *SExt = getSignExtendExpr(Op, Ty);
   if (!isa<SCEVSignExtendExpr>(SExt))
     return SExt;
 
@@ -1025,10 +1121,10 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op,
 /// is also used as a check to avoid infinite recursion.
 ///
 static bool
-CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
-                             SmallVector<const SCEV*, 8> &NewOps,
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+                             SmallVector<const SCEV *, 8> &NewOps,
                              APInt &AccumulatedConstant,
-                             const SmallVectorImpl<const SCEV*> &Ops,
+                             const SmallVectorImpl<const SCEV *> &Ops,
                              const APInt &Scale,
                              ScalarEvolution &SE) {
   bool Interesting = false;
@@ -1049,9 +1145,9 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
       } else {
         // A multiplication of a constant with some other value. Update
         // the map.
-        SmallVector<const SCEV*, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
-        const SCEV* Key = SE.getMulExpr(MulOps);
-        std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair =
+        SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        const SCEV *Key = SE.getMulExpr(MulOps);
+        std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
           M.insert(std::make_pair(Key, NewScale));
         if (Pair.second) {
           NewOps.push_back(Pair.first->first);
@@ -1069,7 +1165,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
       AccumulatedConstant += Scale * C->getValue()->getValue();
     } else {
       // An ordinary operand. Update the map.
-      std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair =
+      std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
         M.insert(std::make_pair(Ops[i], Scale));
       if (Pair.second) {
         NewOps.push_back(Pair.first->first);
@@ -1095,7 +1191,8 @@ namespace {
 
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
-const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        bool HasNUW, bool HasNSW) {
   assert(!Ops.empty() && "Cannot get empty add!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1139,13 +1236,13 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
       // Found a match, merge the two values into a multiply, and add any
       // remaining values to the result.
-      const SCEV* Two = getIntegerSCEV(2, Ty);
-      const SCEV* Mul = getMulExpr(Ops[i], Two);
+      const SCEV *Two = getIntegerSCEV(2, Ty);
+      const SCEV *Mul = getMulExpr(Ops[i], Two);
       if (Ops.size() == 2)
         return Mul;
       Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
       Ops.push_back(Mul);
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, HasNUW, HasNSW);
     }
 
   // Check for truncates. If all the operands are truncated from the same
@@ -1156,7 +1253,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
     const Type *DstType = Trunc->getType();
     const Type *SrcType = Trunc->getOperand()->getType();
-    SmallVector<const SCEV*, 8> LargeOps;
+    SmallVector<const SCEV *, 8> LargeOps;
     bool Ok = true;
     // Check all the operands to see if they can be represented in the
     // source type of the truncate.
@@ -1172,7 +1269,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
         // is much more likely to be foldable here.
         LargeOps.push_back(getSignExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
-        SmallVector<const SCEV*, 8> LargeMulOps;
+        SmallVector<const SCEV *, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
           if (const SCEVTruncateExpr *T =
                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
@@ -1200,7 +1297,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     }
     if (Ok) {
       // Evaluate the expression in the larger type.
-      const SCEV* Fold = getAddExpr(LargeOps);
+      const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW);
       // If it folds to something simple, use it. Otherwise, don't.
       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
         return getTruncateExpr(Fold, DstType);
@@ -1237,16 +1334,16 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
   // operands multiplied by constant values.
   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
     uint64_t BitWidth = getTypeSizeInBits(Ty);
-    DenseMap<const SCEV*, APInt> M;
-    SmallVector<const SCEV*, 8> NewOps;
+    DenseMap<const SCEV *, APInt> M;
+    SmallVector<const SCEV *, 8> NewOps;
     APInt AccumulatedConstant(BitWidth, 0);
     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
                                      Ops, APInt(BitWidth, 1), *this)) {
       // Some interesting folding opportunity is present, so its worthwhile to
       // re-generate the operands list. Group the operands by constant scale,
       // to avoid multiplying by the same constant scale multiple times.
-      std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare> MulOpLists;
-      for (SmallVector<const SCEV*, 8>::iterator I = NewOps.begin(),
+      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+      for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(),
            E = NewOps.end(); I != E; ++I)
         MulOpLists[M.find(*I)->second].push_back(*I);
       // Re-generate the operands list.
@@ -1276,17 +1373,17 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
         if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
-          const SCEV* InnerMul = Mul->getOperand(MulOp == 0);
+          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
           if (Mul->getNumOperands() != 2) {
             // If the multiply has more than two operands, we must get the
             // Y*Z term.
-            SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end());
+            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end());
             MulOps.erase(MulOps.begin()+MulOp);
             InnerMul = getMulExpr(MulOps);
           }
-          const SCEV* One = getIntegerSCEV(1, Ty);
-          const SCEV* AddOne = getAddExpr(InnerMul, One);
-          const SCEV* OuterMul = getMulExpr(AddOne, Ops[AddOp]);
+          const SCEV *One = getIntegerSCEV(1, Ty);
+          const SCEV *AddOne = getAddExpr(InnerMul, One);
+          const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]);
           if (Ops.size() == 2) return OuterMul;
           if (AddOp < Idx) {
             Ops.erase(Ops.begin()+AddOp);
@@ -1310,22 +1407,22 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
              OMulOp != e; ++OMulOp)
           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
-            const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0);
+            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                   Mul->op_end());
               MulOps.erase(MulOps.begin()+MulOp);
               InnerMul1 = getMulExpr(MulOps);
             }
-            const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
                                                   OtherMul->op_end());
               MulOps.erase(MulOps.begin()+OMulOp);
               InnerMul2 = getMulExpr(MulOps);
             }
-            const SCEV* InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
-            const SCEV* OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
             if (Ops.size() == 2) return OuterMul;
             Ops.erase(Ops.begin()+Idx);
             Ops.erase(Ops.begin()+OtherMulIdx-1);
@@ -1346,7 +1443,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this add and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    SmallVector<const SCEV*, 8> LIOps;
+    SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1360,11 +1457,11 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
       LIOps.push_back(AddRec->getStart());
 
-      SmallVector<const SCEV*, 4> AddRecOps(AddRec->op_begin(),
+      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
                                            AddRec->op_end());
       AddRecOps[0] = getAddExpr(LIOps);
 
-      const SCEV* NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
 
@@ -1396,7 +1493,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
             }
             NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
           }
-          const SCEV* NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
+          const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
 
           if (Ops.size() == 2) return NewAddRec;
 
@@ -1420,16 +1517,19 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVAddExpr>();
-  new (S) SCEVAddExpr(Ops);
+  SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>();
+  new (S) SCEVAddExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
 }
 
 
 /// getMulExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
-const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        bool HasNUW, bool HasNSW) {
   assert(!Ops.empty() && "Cannot get empty mul!");
 #ifndef NDEBUG
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
@@ -1457,7 +1557,8 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() *
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                                           LHSC->getValue()->getValue() *
                                            RHSC->getValue()->getValue());
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
@@ -1510,7 +1611,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this mul and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    SmallVector<const SCEV*, 8> LIOps;
+    SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1522,7 +1623,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
-      SmallVector<const SCEV*, 4> NewOps;
+      SmallVector<const SCEV *, 4> NewOps;
       NewOps.reserve(AddRec->getNumOperands());
       if (LIOps.size() == 1) {
         const SCEV *Scale = LIOps[0];
@@ -1530,13 +1631,13 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
           NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
       } else {
         for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
-          SmallVector<const SCEV*, 4> MulOps(LIOps.begin(), LIOps.end());
+          SmallVector<const SCEV *, 4> MulOps(LIOps.begin(), LIOps.end());
           MulOps.push_back(AddRec->getOperand(i));
           NewOps.push_back(getMulExpr(MulOps));
         }
       }
 
-      const SCEV* NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1560,14 +1661,14 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
         if (AddRec->getLoop() == OtherAddRec->getLoop()) {
           // F * G  -->  {A,+,B} * {C,+,D}  -->  {A*C,+,F*D + G*B + B*D}
           const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
-          const SCEV* NewStart = getMulExpr(F->getStart(),
+          const SCEV *NewStart = getMulExpr(F->getStart(),
                                                  G->getStart());
-          const SCEV* B = F->getStepRecurrence(*this);
-          const SCEV* D = G->getStepRecurrence(*this);
-          const SCEV* NewStep = getAddExpr(getMulExpr(F, D),
+          const SCEV *B = F->getStepRecurrence(*this);
+          const SCEV *D = G->getStepRecurrence(*this);
+          const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
                                           getMulExpr(G, B),
                                           getMulExpr(B, D));
-          const SCEV* NewAddRec = getAddRecExpr(NewStart, NewStep,
+          const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
                                                F->getLoop());
           if (Ops.size() == 2) return NewAddRec;
 
@@ -1591,14 +1692,16 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVMulExpr>();
-  new (S) SCEVMulExpr(Ops);
+  SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>();
+  new (S) SCEVMulExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
 }
 
-/// getUDivExpr - Get a canonical multiply expression, or something simpler if
-/// possible.
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   assert(getEffectiveSCEVType(LHS->getType()) ==
@@ -1607,7 +1710,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
 
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
     if (RHSC->getValue()->equalsInt(1))
-      return LHS;                            // X udiv 1 --> x
+      return LHS;                               // X udiv 1 --> x
     if (RHSC->isZero())
       return getIntegerSCEV(0, LHS->getType()); // value is undefined
 
@@ -1622,7 +1725,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     if (!RHSC->getValue()->getValue().isPowerOf2())
       ++MaxShiftAmt;
     const IntegerType *ExtTy =
-      IntegerType::get(getTypeSizeInBits(Ty) + MaxShiftAmt);
+      IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
     // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
       if (const SCEVConstant *Step =
@@ -1633,24 +1736,24 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
             getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                           getZeroExtendExpr(Step, ExtTy),
                           AR->getLoop())) {
-          SmallVector<const SCEV*, 4> Operands;
+          SmallVector<const SCEV *, 4> Operands;
           for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
             Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
           return getAddRecExpr(Operands, AR->getLoop());
         }
     // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
-      SmallVector<const SCEV*, 4> Operands;
+      SmallVector<const SCEV *, 4> Operands;
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
       if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
         // Find an operand that's safely divisible.
         for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-          const SCEV* Op = M->getOperand(i);
-          const SCEV* Div = getUDivExpr(Op, RHSC);
+          const SCEV *Op = M->getOperand(i);
+          const SCEV *Div = getUDivExpr(Op, RHSC);
           if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            const SmallVectorImpl<const SCEV*> &MOperands = M->getOperands();
-            Operands = SmallVector<const SCEV*, 4>(MOperands.begin(),
+            const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+            Operands = SmallVector<const SCEV *, 4>(MOperands.begin(),
                                                   MOperands.end());
             Operands[i] = Div;
             return getMulExpr(Operands);
@@ -1659,13 +1762,13 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     }
     // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
     if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
-      SmallVector<const SCEV*, 4> Operands;
+      SmallVector<const SCEV *, 4> Operands;
       for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
       if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
         Operands.clear();
         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
-          const SCEV* Op = getUDivExpr(A->getOperand(i), RHS);
+          const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
           if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i))
             break;
           Operands.push_back(Op);
@@ -1691,7 +1794,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>();
-  new (S) SCEVUDivExpr(LHS, RHS);
+  new (S) SCEVUDivExpr(ID, LHS, RHS);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1699,9 +1802,10 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start,
-                               const SCEV* Step, const Loop *L) {
-  SmallVector<const SCEV*, 4> Operands;
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start,
+                                           const SCEV *Step, const Loop *L,
+                                           bool HasNUW, bool HasNSW) {
+  SmallVector<const SCEV *, 4> Operands;
   Operands.push_back(Start);
   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
     if (StepChrec->getLoop() == L) {
@@ -1711,14 +1815,15 @@ const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start,
     }
 
   Operands.push_back(Step);
-  return getAddRecExpr(Operands, L);
+  return getAddRecExpr(Operands, L, HasNUW, HasNSW);
 }
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
 const SCEV *
-ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
-                               const Loop *L) {
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                               const Loop *L,
+                               bool HasNUW, bool HasNSW) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -1729,14 +1834,14 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
 
   if (Operands.back()->isZero()) {
     Operands.pop_back();
-    return getAddRecExpr(Operands, L);             // {X,+,0}  -->  X
+    return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
   }
 
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop* NestedLoop = NestedAR->getLoop();
     if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
-      SmallVector<const SCEV*, 4> NestedOperands(NestedAR->op_begin(),
+      SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                 NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
       // AddRecs require their operands be loop-invariant with respect to their
@@ -1758,7 +1863,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
           }
         if (AllInvariant)
           // Ok, both add recurrences are valid after the transformation.
-          return getAddRecExpr(NestedOperands, NestedLoop);
+          return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW);
       }
       // Reset Operands to its original state.
       Operands[0] = NestedAR;
@@ -1773,22 +1878,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
   ID.AddPointer(L);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
-  new (S) SCEVAddRecExpr(Operands, L);
+  SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
+  new (S) SCEVAddRecExpr(ID, Operands, L);
   UniqueSCEVs.InsertNode(S, IP);
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
 }
 
 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
-  SmallVector<const SCEV*, 2> Ops;
+  SmallVector<const SCEV *, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getSMaxExpr(Ops);
 }
 
-const SCEV*
-ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty smax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1808,7 +1915,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(
+      ConstantInt *Fold = ConstantInt::get(getContext(),
                               APIntOps::smax(LHSC->getValue()->getValue(),
                                              RHSC->getValue()->getValue()));
       Ops[0] = getConstant(Fold);
@@ -1871,21 +1978,21 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>();
-  new (S) SCEVSMaxExpr(Ops);
+  new (S) SCEVSMaxExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
-  SmallVector<const SCEV*, 2> Ops;
+  SmallVector<const SCEV *, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getUMaxExpr(Ops);
 }
 
-const SCEV*
-ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty umax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1905,7 +2012,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(
+      ConstantInt *Fold = ConstantInt::get(getContext(),
                               APIntOps::umax(LHSC->getValue()->getValue(),
                                              RHSC->getValue()->getValue()));
       Ops[0] = getConstant(Fold);
@@ -1968,7 +2075,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>();
-  new (S) SCEVUMaxExpr(Ops);
+  new (S) SCEVUMaxExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1985,7 +2092,77 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
-const SCEV* ScalarEvolution::getUnknown(Value *V) {
+const SCEV *ScalarEvolution::getFieldOffsetExpr(const StructType *STy,
+                                                unsigned FieldNo) {
+  // If we have TargetData we can determine the constant offset.
+  if (TD) {
+    const Type *IntPtrTy = TD->getIntPtrType(getContext());
+    const StructLayout &SL = *TD->getStructLayout(STy);
+    uint64_t Offset = SL.getElementOffset(FieldNo);
+    return getIntegerSCEV(Offset, IntPtrTy);
+  }
+
+  // Field 0 is always at offset 0.
+  if (FieldNo == 0) {
+    const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+    return getIntegerSCEV(0, Ty);
+  }
+
+  // Okay, it looks like we really DO need an offsetof expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scFieldOffset);
+  ID.AddPointer(STy);
+  ID.AddInteger(FieldNo);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVFieldOffsetExpr>();
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+  new (S) SCEVFieldOffsetExpr(ID, Ty, STy, FieldNo);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getAllocSizeExpr(const Type *AllocTy) {
+  // If we have TargetData we can determine the constant size.
+  if (TD && AllocTy->isSized()) {
+    const Type *IntPtrTy = TD->getIntPtrType(getContext());
+    return getIntegerSCEV(TD->getTypeAllocSize(AllocTy), IntPtrTy);
+  }
+
+  // Expand an array size into the element size times the number
+  // of elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(AllocTy)) {
+    const SCEV *E = getAllocSizeExpr(ATy->getElementType());
+    return getMulExpr(
+      E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()),
+                                      ATy->getNumElements())));
+  }
+
+  // Expand a vector size into the element size times the number
+  // of elements.
+  if (const VectorType *VTy = dyn_cast<VectorType>(AllocTy)) {
+    const SCEV *E = getAllocSizeExpr(VTy->getElementType());
+    return getMulExpr(
+      E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()),
+                                      VTy->getNumElements())));
+  }
+
+  // Okay, it looks like we really DO need a sizeof expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAllocSize);
+  ID.AddPointer(AllocTy);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVAllocSizeExpr>();
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  new (S) SCEVAllocSizeExpr(ID, Ty, AllocTy);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
   // Don't attempt to do anything other than create a SCEVUnknown object
   // here.  createSCEV only calls getUnknown after checking for all other
   // interesting possibilities, and any other code that calls getUnknown
@@ -1997,7 +2174,7 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) {
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>();
-  new (S) SCEVUnknown(V);
+  new (S) SCEVUnknown(ID, V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2011,17 +2188,8 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) {
 /// can optionally include pointer types if the ScalarEvolution class
 /// has access to target-specific information.
 bool ScalarEvolution::isSCEVable(const Type *Ty) const {
-  // Integers are always SCEVable.
-  if (Ty->isInteger())
-    return true;
-
-  // Pointers are SCEVable if TargetData information is available
-  // to provide pointer size information.
-  if (isa<PointerType>(Ty))
-    return TD != NULL;
-
-  // Otherwise it's not SCEVable.
-  return false;
+  // Integers and pointers are always SCEVable.
+  return Ty->isInteger() || isa<PointerType>(Ty);
 }
 
 /// getTypeSizeInBits - Return the size in bits of the specified type,
@@ -2033,9 +2201,14 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
   if (TD)
     return TD->getTypeSizeInBits(Ty);
 
-  // Otherwise, we support only integer types.
-  assert(Ty->isInteger() && "isSCEVable permitted a non-SCEVable type!");
-  return Ty->getPrimitiveSizeInBits();
+  // Integer types have fixed sizes.
+  if (Ty->isInteger())
+    return Ty->getPrimitiveSizeInBits();
+
+  // The only other support type is pointer. Without TargetData, conservatively
+  // assume pointers are 64-bit.
+  assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!");
+  return 64;
 }
 
 /// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -2048,58 +2221,60 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
   if (Ty->isInteger())
     return Ty;
 
+  // The only other support type is pointer.
   assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!");
-  return TD->getIntPtrType();
-}
+  if (TD) return TD->getIntPtrType(getContext());
 
-const SCEV* ScalarEvolution::getCouldNotCompute() {
-  return &CouldNotCompute;
+  // Without TargetData, conservatively assume pointers are 64-bit.
+  return Type::getInt64Ty(getContext());
 }
 
-/// hasSCEV - Return true if the SCEV for this value has already been
-/// computed.
-bool ScalarEvolution::hasSCEV(Value *V) const {
-  return Scalars.count(V);
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+  return &CouldNotCompute;
 }
 
 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
 /// expression and create a new one.
-const SCEV* ScalarEvolution::getSCEV(Value *V) {
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
 
-  std::map<SCEVCallbackVH, const SCEV*>::iterator I = Scalars.find(V);
+  std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V);
   if (I != Scalars.end()) return I->second;
-  const SCEV* S = createSCEV(V);
+  const SCEV *S = createSCEV(V);
   Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S));
   return S;
 }
 
 /// getIntegerSCEV - Given a SCEVable type, create a constant for the
 /// specified signed integer value and return a SCEV for the constant.
-const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
+const SCEV *ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
   const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
   return getConstant(ConstantInt::get(ITy, Val));
 }
 
 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
 ///
-const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getConstant(cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+    return getConstant(
+               cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
-  return getMulExpr(V, getConstant(ConstantInt::getAllOnesValue(Ty)));
+  return getMulExpr(V,
+                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
 }
 
 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
-const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) {
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getConstant(cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+    return getConstant(
+                cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
-  const SCEV* AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty));
+  const SCEV *AllOnes =
+                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
   return getMinusSCEV(AllOnes, V);
 }
 
@@ -2114,12 +2289,12 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is zero
 /// extended.
-const SCEV*
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V,
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2131,12 +2306,12 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V,
 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is sign
 /// extended.
-const SCEV*
-ScalarEvolution::getTruncateOrSignExtend(const SCEV* V,
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2148,11 +2323,11 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV* V,
 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is zero
 /// extended.  The conversion must not be narrowing.
-const SCEV*
-ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot noop or zero extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrZeroExtend cannot truncate!");
@@ -2164,11 +2339,11 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) {
 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is sign
 /// extended.  The conversion must not be narrowing.
-const SCEV*
-ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot noop or sign extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrSignExtend cannot truncate!");
@@ -2181,11 +2356,11 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) {
 /// the input value to the specified type. If the type must be extended,
 /// it is extended with unspecified bits. The conversion must not be
 /// narrowing.
-const SCEV*
-ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot noop or any extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrAnyExtend cannot truncate!");
@@ -2196,11 +2371,11 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) {
 
 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  The conversion must not be widening.
-const SCEV*
-ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate or noop with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
          "getTruncateOrNoop cannot extend!");
@@ -2214,8 +2389,8 @@ ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) {
 /// with them.
 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
                                                         const SCEV *RHS) {
-  const SCEV* PromotedLHS = LHS;
-  const SCEV* PromotedRHS = RHS;
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
 
   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
@@ -2230,8 +2405,8 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
 /// with them.
 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
                                                         const SCEV *RHS) {
-  const SCEV* PromotedLHS = LHS;
-  const SCEV* PromotedRHS = RHS;
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
 
   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
@@ -2241,34 +2416,60 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
   return getUMinExpr(PromotedLHS, PromotedRHS);
 }
 
-/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
-/// the specified instruction and replaces any references to the symbolic value
-/// SymName with the specified value.  This is used during PHI resolution.
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+                   SmallVectorImpl<Instruction *> &Worklist) {
+  // Push the def-use children onto the Worklist stack.
+  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+       UI != UE; ++UI)
+    Worklist.push_back(cast<Instruction>(UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the Scalars map if they reference SymName. This is used during PHI
+/// resolution.
 void
-ScalarEvolution::ReplaceSymbolicValueWithConcrete(Instruction *I,
-                                                  const SCEV *SymName,
-                                                  const SCEV *NewVal) {
-  std::map<SCEVCallbackVH, const SCEV*>::iterator SI =
-    Scalars.find(SCEVCallbackVH(I, this));
-  if (SI == Scalars.end()) return;
+ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
+  SmallVector<Instruction *, 16> Worklist;
+  PushDefUseChildren(I, Worklist);
 
-  const SCEV* NV =
-    SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this);
-  if (NV == SI->second) return;  // No change.
+  SmallPtrSet<Instruction *, 8> Visited;
+  Visited.insert(I);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
 
-  SI->second = NV;       // Update the scalars map!
+    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+      Scalars.find(static_cast<Value *>(I));
+    if (It != Scalars.end()) {
+      // Short-circuit the def-use traversal if the symbolic name
+      // ceases to appear in expressions.
+      if (!It->second->hasOperand(SymName))
+        continue;
+
+      // SCEVUnknown for a PHI either means that it has an unrecognized
+      // structure, or it's a PHI that's in the progress of being computed
+      // by createNodeForPHI.  In the former case, additional loop trip
+      // count information isn't going to change anything. In the later
+      // case, createNodeForPHI will perform the necessary updates on its
+      // own when it gets to that point.
+      if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+        ValuesAtScopes.erase(It->second);
+        Scalars.erase(It);
+      }
+    }
 
-  // Any instruction values that use this instruction might also need to be
-  // updated!
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-       UI != E; ++UI)
-    ReplaceSymbolicValueWithConcrete(cast<Instruction>(*UI), SymName, NewVal);
+    PushDefUseChildren(I, Worklist);
+  }
 }
 
 /// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
 /// a loop header, making it a potential recurrence, or it doesn't.
 ///
-const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   if (PN->getNumIncomingValues() == 2)  // The loops have been canonicalized.
     if (const Loop *L = LI->getLoopFor(PN->getParent()))
       if (L->getHeader() == PN->getParent()) {
@@ -2278,14 +2479,15 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
         unsigned BackEdge     = IncomingEdge^1;
 
         // While we are analyzing this PHI node, handle its value symbolically.
-        const SCEV* SymbolicName = getUnknown(PN);
+        const SCEV *SymbolicName = getUnknown(PN);
         assert(Scalars.find(PN) == Scalars.end() &&
                "PHI node already processed?");
         Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
 
         // Using this symbolic name for the PHI, analyze the value coming around
         // the back-edge.
-        const SCEV* BEValue = getSCEV(PN->getIncomingValue(BackEdge));
+        Value *BEValueV = PN->getIncomingValue(BackEdge);
+        const SCEV *BEValue = getSCEV(BEValueV);
 
         // NOTE: If BEValue is loop invariant, we know that the PHI node just
         // has a special value for the first iteration of the loop.
@@ -2305,11 +2507,11 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
           if (FoundIndex != Add->getNumOperands()) {
             // Create an add with everything but the specified operand.
-            SmallVector<const SCEV*, 8> Ops;
+            SmallVector<const SCEV *, 8> Ops;
             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
               if (i != FoundIndex)
                 Ops.push_back(Add->getOperand(i));
-            const SCEV* Accum = getAddExpr(Ops);
+            const SCEV *Accum = getAddExpr(Ops);
 
             // This is not a valid addrec if the step amount is varying each
             // loop iteration, but is not itself an addrec in this loop.
@@ -2318,15 +2520,35 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
               const SCEV *StartVal =
                 getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEV *PHISCEV =
-                getAddRecExpr(StartVal, Accum, L);
+              const SCEVAddRecExpr *PHISCEV =
+                cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L));
+
+              // If the increment doesn't overflow, then neither the addrec nor the
+              // post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV))
+                if (OBO->getOperand(0) == PN &&
+                    getSCEV(OBO->getOperand(1)) ==
+                      PHISCEV->getStepRecurrence(*this)) {
+                  const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this);
+                  if (OBO->hasNoUnsignedWrap()) {
+                    const_cast<SCEVAddRecExpr *>(PHISCEV)
+                      ->setHasNoUnsignedWrap(true);
+                    const_cast<SCEVAddRecExpr *>(PostInc)
+                      ->setHasNoUnsignedWrap(true);
+                  }
+                  if (OBO->hasNoSignedWrap()) {
+                    const_cast<SCEVAddRecExpr *>(PHISCEV)
+                      ->setHasNoSignedWrap(true);
+                    const_cast<SCEVAddRecExpr *>(PostInc)
+                      ->setHasNoSignedWrap(true);
+                  }
+                }
 
               // Okay, for the entire analysis of this edge we assumed the PHI
-              // to be symbolic.  We now need to go back and update all of the
-              // entries for the scalars that use the PHI (except for the PHI
-              // itself) to use the new analyzed value instead of the "symbolic"
-              // value.
-              ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV);
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
               return PHISCEV;
             }
           }
@@ -2338,21 +2560,20 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
           // Because the other in-value of i (0) fits the evolution of BEValue
           // i really is an addrec evolution.
           if (AddRec->getLoop() == L && AddRec->isAffine()) {
-            const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
+            const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
 
             // If StartVal = j.start - j.stride, we can use StartVal as the
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
                                             AddRec->getOperand(1))) {
-              const SCEV* PHISCEV =
+              const SCEV *PHISCEV =
                  getAddRecExpr(StartVal, AddRec->getOperand(1), L);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
-              // to be symbolic.  We now need to go back and update all of the
-              // entries for the scalars that use the PHI (except for the PHI
-              // itself) to use the new analyzed value instead of the "symbolic"
-              // value.
-              ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV);
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
               return PHISCEV;
             }
           }
@@ -2361,6 +2582,10 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
         return SymbolicName;
       }
 
+  // It's tempting to recognize PHIs with a unique incoming value, however
+  // this leads passes like indvars to break LCSSA form. Fortunately, such
+  // PHIs are rare, as instcombine zaps them.
+
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
 }
@@ -2368,14 +2593,14 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
 /// createNodeForGEP - Expand GEP instructions into add and multiply
 /// operations. This allows them to be analyzed by regular SCEV code.
 ///
-const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) {
+const SCEV *ScalarEvolution::createNodeForGEP(Operator *GEP) {
 
-  const Type *IntPtrTy = TD->getIntPtrType();
+  const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
   Value *Base = GEP->getOperand(0);
   // Don't attempt to analyze GEPs over unsized objects.
   if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
     return getUnknown(GEP);
-  const SCEV* TotalOffset = getIntegerSCEV(0, IntPtrTy);
+  const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy);
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
                                       E = GEP->op_end();
@@ -2384,22 +2609,16 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) {
     // Compute the (potentially symbolic) offset in bytes for this index.
     if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
       // For a struct, add the member offset.
-      const StructLayout &SL = *TD->getStructLayout(STy);
       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-      uint64_t Offset = SL.getElementOffset(FieldNo);
       TotalOffset = getAddExpr(TotalOffset,
-                                  getIntegerSCEV(Offset, IntPtrTy));
+                               getFieldOffsetExpr(STy, FieldNo));
     } else {
       // For an array, add the element offset, explicitly scaled.
-      const SCEV* LocalOffset = getSCEV(Index);
+      const SCEV *LocalOffset = getSCEV(Index);
       if (!isa<PointerType>(LocalOffset->getType()))
         // Getelementptr indicies are signed.
-        LocalOffset = getTruncateOrSignExtend(LocalOffset,
-                                              IntPtrTy);
-      LocalOffset =
-        getMulExpr(LocalOffset,
-                   getIntegerSCEV(TD->getTypeAllocSize(*GTI),
-                                  IntPtrTy));
+        LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
+      LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI));
       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
     }
   }
@@ -2411,7 +2630,7 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) {
 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
 uint32_t
-ScalarEvolution::GetMinTrailingZeros(const SCEV* S) {
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return C->getValue()->getValue().countTrailingZeros();
 
@@ -2487,18 +2706,100 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV* S) {
   return 0;
 }
 
-uint32_t
-ScalarEvolution::GetMinLeadingZeros(const SCEV* S) {
-  // TODO: Handle other SCEV expression types here.
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return C->getValue()->getValue().countLeadingZeros();
+    return ConstantRange(C->getValue()->getValue());
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getUnsignedRange(Add->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+    return X;
+  }
 
-  if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    // A zero-extension cast adds zero bits.
-    return GetMinLeadingZeros(C->getOperand()) +
-           (getTypeSizeInBits(C->getType()) -
-            getTypeSizeInBits(C->getOperand()->getType()));
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UDiv->getLHS());
+    ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+    return X.udiv(Y);
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(ZExt->getOperand());
+    return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SExt->getOperand());
+    return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Trunc->getOperand());
+    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+  }
+
+  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
+    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
+    if (!Trip) return FullSet;
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+
+        // Check for overflow.
+        // TODO: This is very conservative.
+        if (!(Step->isOne() &&
+              isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) &&
+            !(Step->isAllOnesValue() &&
+              isKnownPredicate(ICmpInst::ICMP_UGT, Start, End)))
+          return FullSet;
+
+        ConstantRange StartRange = getUnsignedRange(Start);
+        ConstantRange EndRange = getUnsignedRange(End);
+        APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+                                   EndRange.getUnsignedMin());
+        APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+                                   EndRange.getUnsignedMax());
+        if (Min.isMinValue() && Max.isMaxValue())
+          return FullSet;
+        return ConstantRange(Min, Max+1);
+      }
+    }
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -2507,73 +2808,128 @@ ScalarEvolution::GetMinLeadingZeros(const SCEV* S) {
     APInt Mask = APInt::getAllOnesValue(BitWidth);
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
     ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
-    return Zeros.countLeadingOnes();
+    if (Ones == ~Zeros + 1)
+      return FullSet;
+    return ConstantRange(Ones, ~Zeros + 1);
   }
 
-  return 1;
+  return FullSet;
 }
 
-uint32_t
-ScalarEvolution::GetMinSignBits(const SCEV* S) {
-  // TODO: Handle other SCEV expression types here.
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
 
-  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-    const APInt &A = C->getValue()->getValue();
-    return A.isNegative() ? A.countLeadingOnes() :
-                            A.countLeadingZeros();
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return ConstantRange(C->getValue()->getValue());
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getSignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getSignedRange(Add->getOperand(i)));
+    return X;
   }
 
-  if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) {
-    // A sign-extension cast adds sign bits.
-    return GetMinSignBits(C->getOperand()) +
-           (getTypeSizeInBits(C->getType()) -
-            getTypeSizeInBits(C->getOperand()->getType()));
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getSignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getSignedRange(Mul->getOperand(i)));
+    return X;
   }
 
-  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
-    unsigned BitWidth = getTypeSizeInBits(A->getType());
-
-    // Special case decrementing a value (ADD X, -1):
-    if (const SCEVConstant *CRHS = dyn_cast<SCEVConstant>(A->getOperand(0)))
-      if (CRHS->isAllOnesValue()) {
-        SmallVector<const SCEV *, 4> OtherOps(A->op_begin() + 1, A->op_end());
-        const SCEV *OtherOpsAdd = getAddExpr(OtherOps);
-        unsigned LZ = GetMinLeadingZeros(OtherOpsAdd);
-
-        // If the input is known to be 0 or 1, the output is 0/-1, which is all
-        // sign bits set.
-        if (LZ == BitWidth - 1)
-          return BitWidth;
-
-        // If we are subtracting one from a positive number, there is no carry
-        // out of the result.
-        if (LZ > 0)
-          return GetMinSignBits(OtherOpsAdd);
-      }
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getSignedRange(SMax->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getSignedRange(UMax->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getSignedRange(UDiv->getLHS());
+    ConstantRange Y = getSignedRange(UDiv->getRHS());
+    return X.udiv(Y);
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(ZExt->getOperand());
+    return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(SExt->getOperand());
+    return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getSignedRange(Trunc->getOperand());
+    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+  }
 
-    // Add can have at most one carry bit.  Thus we know that the output
-    // is, at worst, one more bit than the inputs.
-    unsigned Min = BitWidth;
-    for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
-      unsigned N = GetMinSignBits(A->getOperand(i));
-      Min = std::min(Min, N) - 1;
-      if (Min == 0) return 1;
+  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
+    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
+    if (!Trip) return FullSet;
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+
+        // Check for overflow.
+        // TODO: This is very conservative.
+        if (!(Step->isOne() &&
+              isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) &&
+            !(Step->isAllOnesValue() &&
+              isKnownPredicate(ICmpInst::ICMP_SGT, Start, End)))
+          return FullSet;
+
+        ConstantRange StartRange = getSignedRange(Start);
+        ConstantRange EndRange = getSignedRange(End);
+        APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+                                   EndRange.getSignedMin());
+        APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+                                   EndRange.getSignedMax());
+        if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+          return FullSet;
+        return ConstantRange(Min, Max+1);
+      }
     }
-    return 1;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    return ComputeNumSignBits(U->getValue(), TD);
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+    if (NS == 1)
+      return FullSet;
+    return
+      ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1);
   }
 
-  return 1;
+  return FullSet;
 }
 
 /// createSCEV - We know that there is no SCEV for the specified value.
 /// Analyze the expression.
 ///
-const SCEV* ScalarEvolution::createSCEV(Value *V) {
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
     return getUnknown(V);
 
@@ -2588,15 +2944,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     return getIntegerSCEV(0, V->getType());
   else if (isa<UndefValue>(V))
     return getIntegerSCEV(0, V->getType());
+  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   else
     return getUnknown(V);
 
-  User *U = cast<User>(V);
+  Operator *U = cast<Operator>(V);
   switch (Opcode) {
   case Instruction::Add:
+    // Don't transfer the NSW and NUW bits from the Add instruction to the
+    // Add expression, because the Instruction may be guarded by control
+    // flow and the no-overflow bits may not be valid for the expression in
+    // any context.
     return getAddExpr(getSCEV(U->getOperand(0)),
                       getSCEV(U->getOperand(1)));
   case Instruction::Mul:
+    // Don't transfer the NSW and NUW bits from the Mul instruction to the
+    // Mul expression, as with Add.
     return getMulExpr(getSCEV(U->getOperand(0)),
                       getSCEV(U->getOperand(1)));
   case Instruction::UDiv:
@@ -2630,7 +2994,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
       if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
         return
           getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                            IntegerType::get(BitWidth - LZ)),
+                                IntegerType::get(getContext(), BitWidth - LZ)),
                             U->getType());
     }
     break;
@@ -2643,11 +3007,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     // In order for this transformation to be safe, the LHS must be of the
     // form X*(2^n) and the Or constant must be less than 2^n.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
-      const SCEV* LHS = getSCEV(U->getOperand(0));
+      const SCEV *LHS = getSCEV(U->getOperand(0));
       const APInt &CIVal = CI->getValue();
       if (GetMinTrailingZeros(LHS) >=
-          (CIVal.getBitWidth() - CIVal.countLeadingZeros()))
-        return getAddExpr(LHS, getSCEV(U->getOperand(1)));
+          (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+        // Build a plain add SCEV.
+        const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+        // If the LHS of the add was an addrec and it has no-wrap flags,
+        // transfer the no-wrap flags, since an or won't introduce a wrap.
+        if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+          const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+          if (OldAR->hasNoUnsignedWrap())
+            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true);
+          if (OldAR->hasNoSignedWrap())
+            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true);
+        }
+        return S;
+      }
     }
     break;
   case Instruction::Xor:
@@ -2673,7 +3049,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
             if (const SCEVZeroExtendExpr *Z =
                   dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
               const Type *UTy = U->getType();
-              const SCEV* Z0 = Z->getOperand();
+              const SCEV *Z0 = Z->getOperand();
               const Type *Z0Ty = Z0->getType();
               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
 
@@ -2699,7 +3075,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     // Turn shift left of a constant amount into a multiply.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-      Constant *X = ConstantInt::get(
+      Constant *X = ConstantInt::get(getContext(),
         APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
@@ -2709,7 +3085,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     // Turn logical shift right of a constant into a unsigned divide.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-      Constant *X = ConstantInt::get(
+      Constant *X = ConstantInt::get(getContext(),
         APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
@@ -2729,7 +3105,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
             return getIntegerSCEV(0, U->getType()); // value is undefined
           return
             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
-                                                      IntegerType::get(Amt)),
+                                           IntegerType::get(getContext(), Amt)),
                                  U->getType());
         }
     break;
@@ -2749,18 +3125,12 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
       return getSCEV(U->getOperand(0));
     break;
 
-  case Instruction::IntToPtr:
-    if (!TD) break; // Without TD we can't analyze pointers.
-    return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)),
-                                   TD->getIntPtrType());
-
-  case Instruction::PtrToInt:
-    if (!TD) break; // Without TD we can't analyze pointers.
-    return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)),
-                                   U->getType());
+    // It's tempting to handle inttoptr and ptrtoint, however this can
+    // lead to pointer expressions which cannot be expanded to GEPs
+    // (because they may overflow). For now, the only pointer-typed
+    // expressions we handle are GEPs and address literals.
 
   case Instruction::GetElementPtr:
-    if (!TD) break; // Without TD we can't analyze pointers.
     return createNodeForGEP(U);
 
   case Instruction::PHI:
@@ -2842,17 +3212,29 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
 /// loop-invariant backedge-taken count (see
 /// hasLoopInvariantBackedgeTakenCount).
 ///
-const SCEV* ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).Exact;
 }
 
 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
 /// return the least SCEV value that is known never to be less than the
 /// actual backedge taken count.
-const SCEV* ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).Max;
 }
 
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+  BasicBlock *Header = L->getHeader();
+
+  // Push all Loop-header PHIs onto the Worklist stack.
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    Worklist.push_back(PN);
+}
+
 const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // Initially insert a CouldNotCompute for this loop. If the insertion
@@ -2883,10 +3265,39 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 
     // Now that we know more about the trip count for this loop, forget any
     // existing SCEV values for PHI nodes in this loop since they are only
-    // conservative estimates made without the benefit
-    // of trip count information.
-    if (ItCount.hasAnyInfo())
-      forgetLoopPHIs(L);
+    // conservative estimates made without the benefit of trip count
+    // information. This is similar to the code in
+    // forgetLoopBackedgeTakenCount, except that it handles SCEVUnknown PHI
+    // nodes specially.
+    if (ItCount.hasAnyInfo()) {
+      SmallVector<Instruction *, 16> Worklist;
+      PushLoopPHIs(L, Worklist);
+
+      SmallPtrSet<Instruction *, 8> Visited;
+      while (!Worklist.empty()) {
+        Instruction *I = Worklist.pop_back_val();
+        if (!Visited.insert(I)) continue;
+
+        std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+          Scalars.find(static_cast<Value *>(I));
+        if (It != Scalars.end()) {
+          // SCEVUnknown for a PHI either means that it has an unrecognized
+          // structure, or it's a PHI that's in the progress of being computed
+          // by createNodeForPHI.  In the former case, additional loop trip
+          // count information isn't going to change anything. In the later
+          // case, createNodeForPHI will perform the necessary updates on its
+          // own when it gets to that point.
+          if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+            ValuesAtScopes.erase(It->second);
+            Scalars.erase(It);
+          }
+          if (PHINode *PN = dyn_cast<PHINode>(I))
+            ConstantEvolutionLoopExitValue.erase(PN);
+        }
+
+        PushDefUseChildren(I, Worklist);
+      }
+    }
   }
   return Pair.first->second;
 }
@@ -2897,37 +3308,25 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 /// is deleted.
 void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) {
   BackedgeTakenCounts.erase(L);
-  forgetLoopPHIs(L);
-}
 
-/// forgetLoopPHIs - Delete the memoized SCEVs associated with the
-/// PHI nodes in the given loop. This is used when the trip count of
-/// the loop may have changed.
-void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
-  BasicBlock *Header = L->getHeader();
-
-  // Push all Loop-header PHIs onto the Worklist stack, except those
-  // that are presently represented via a SCEVUnknown. SCEVUnknown for
-  // a PHI either means that it has an unrecognized structure, or it's
-  // a PHI that's in the progress of being computed by createNodeForPHI.
-  // In the former case, additional loop trip count information isn't
-  // going to change anything. In the later case, createNodeForPHI will
-  // perform the necessary updates on its own when it gets to that point.
   SmallVector<Instruction *, 16> Worklist;
-  for (BasicBlock::iterator I = Header->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
-      Scalars.find((Value*)I);
-    if (It != Scalars.end() && !isa<SCEVUnknown>(It->second))
-      Worklist.push_back(PN);
-  }
+  PushLoopPHIs(L, Worklist);
 
+  SmallPtrSet<Instruction *, 8> Visited;
   while (!Worklist.empty()) {
     Instruction *I = Worklist.pop_back_val();
-    if (Scalars.erase(I))
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE; ++UI)
-        Worklist.push_back(cast<Instruction>(UI));
+    if (!Visited.insert(I)) continue;
+
+    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+      Scalars.find(static_cast<Value *>(I));
+    if (It != Scalars.end()) {
+      ValuesAtScopes.erase(It->second);
+      Scalars.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
   }
 }
 
@@ -2939,8 +3338,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   L->getExitingBlocks(ExitingBlocks);
 
   // Examine all exits and pick the most conservative values.
-  const SCEV* BECount = getCouldNotCompute();
-  const SCEV* MaxBECount = getCouldNotCompute();
+  const SCEV *BECount = getCouldNotCompute();
+  const SCEV *MaxBECount = getCouldNotCompute();
   bool CouldNotComputeBECount = false;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BackedgeTakenInfo NewBTI =
@@ -3049,8 +3448,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
       BackedgeTakenInfo BTI1 =
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
-      const SCEV* BECount = getCouldNotCompute();
-      const SCEV* MaxBECount = getCouldNotCompute();
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
       if (L->contains(TBB)) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
@@ -3084,8 +3483,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
       BackedgeTakenInfo BTI1 =
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
-      const SCEV* BECount = getCouldNotCompute();
-      const SCEV* MaxBECount = getCouldNotCompute();
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
       if (L->contains(FBB)) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
@@ -3143,7 +3542,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   // Handle common loops like: for (X = "string"; *X; ++X)
   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
-      const SCEV* ItCnt =
+      const SCEV *ItCnt =
         ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
       if (!isa<SCEVCouldNotCompute>(ItCnt)) {
         unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
@@ -3153,8 +3552,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
       }
     }
 
-  const SCEV* LHS = getSCEV(ExitCond->getOperand(0));
-  const SCEV* RHS = getSCEV(ExitCond->getOperand(1));
+  const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+  const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
 
   // Try to evaluate any dependencies out of the loop.
   LHS = getSCEVAtScope(LHS, L);
@@ -3177,20 +3576,20 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
         ConstantRange CompRange(
             ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
 
-        const SCEV* Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+        const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
       }
 
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    const SCEV* TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
     if (!isa<SCEVCouldNotCompute>(TC)) return TC;
     break;
   }
-  case ICmpInst::ICMP_EQ: {
-    // Convert to: while (X-Y == 0)           // while (X == Y)
-    const SCEV* TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+  case ICmpInst::ICMP_EQ: {                     // while (X == Y)
+    // Convert to: while (X-Y == 0)
+    const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
     if (!isa<SCEVCouldNotCompute>(TC)) return TC;
     break;
   }
@@ -3234,8 +3633,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
 static ConstantInt *
 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
                                 ScalarEvolution &SE) {
-  const SCEV* InVal = SE.getConstant(C);
-  const SCEV* Val = AddRec->evaluateAtIteration(InVal, SE);
+  const SCEV *InVal = SE.getConstant(C);
+  const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
   assert(isa<SCEVConstant>(Val) &&
          "Evaluation of SCEV at constant didn't fold correctly?");
   return cast<SCEVConstant>(Val)->getValue();
@@ -3246,7 +3645,7 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
 /// the addressed element of the initializer or null if the index expression is
 /// invalid.
 static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
+GetAddressedElementFromGlobal(LLVMContext &Context, GlobalVariable *GV,
                               const std::vector<ConstantInt*> &Indices) {
   Constant *Init = GV->getInitializer();
   for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
@@ -3265,7 +3664,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
         if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
         Init = Constant::getNullValue(ATy->getElementType());
       } else {
-        assert(0 && "Unknown constant aggregate type!");
+        llvm_unreachable("Unknown constant aggregate type!");
       }
       return 0;
     } else {
@@ -3293,7 +3692,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
   // Make sure that it is really a constant global we are gepping, with an
   // initializer, and make sure the first IDX is really 0.
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
       !cast<Constant>(GEP->getOperand(1))->isNullValue())
     return getCouldNotCompute();
@@ -3314,7 +3713,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
 
   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
   // Check to see if X is a loop variant variable value now.
-  const SCEV* Idx = getSCEV(VarIdx);
+  const SCEV *Idx = getSCEV(VarIdx);
   Idx = getSCEVAtScope(Idx, L);
 
   // We can only recognize very limited forms of loop index expressions, in
@@ -3327,14 +3726,14 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
 
   unsigned MaxSteps = MaxBruteForceIterations;
   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
-    ConstantInt *ItCst =
-      ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum);
+    ConstantInt *ItCst = ConstantInt::get(
+                           cast<IntegerType>(IdxExpr->getType()), IterationNum);
     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
 
     // Form the GEP offset.
     Indexes[VarIdxNum] = Val;
 
-    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    Constant *Result = GetAddressedElementFromGlobal(getContext(), GV, Indexes);
     if (Result == 0) break;  // Cannot compute!
 
     // Evaluate the condition for this iteration.
@@ -3418,6 +3817,7 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
   if (Constant *C = dyn_cast<Constant>(V)) return C;
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV;
   Instruction *I = cast<Instruction>(V);
+  LLVMContext &Context = I->getParent()->getContext();
 
   std::vector<Constant*> Operands;
   Operands.resize(I->getNumOperands());
@@ -3429,10 +3829,12 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Operands[0], Operands.size());
+                                           &Operands[0], Operands.size(),
+                                           Context);
   else
     return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                    &Operands[0], Operands.size());
+                                    &Operands[0], Operands.size(),
+                                    Context);
 }
 
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -3487,7 +3889,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   }
 }
 
-/// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute a
+/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
 /// constant number of times (the condition evolves only from constants),
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
@@ -3526,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
 
     if (CondVal->getValue() == uint64_t(ExitWhen)) {
       ++NumBruteForceTripCountsComputed;
-      return getConstant(Type::Int32Ty, IterationNum);
+      return getConstant(Type::getInt32Ty(getContext()), IterationNum);
     }
 
     // Compute the value of the PHI node for the next iteration.
@@ -3540,7 +3942,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
   return getCouldNotCompute();
 }
 
-/// getSCEVAtScope - Return a SCEV expression handle for the specified value
+/// getSCEVAtScope - Return a SCEV expression for the specified value
 /// at the specified scope in the program.  The L value specifies a loop
 /// nest to evaluate the expression at, where null is the top-level or a
 /// specified loop is immediately inside of the loop.
@@ -3550,9 +3952,21 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
 ///
 /// In the case that a relevant loop exit value cannot be computed, the
 /// original value V is returned.
-const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
-  // FIXME: this should be turned into a virtual method on SCEV!
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+  // Check to see if we've folded this expression at this loop before.
+  std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+  std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+  if (!Pair.second)
+    return Pair.first->second ? Pair.first->second : V;
 
+  // Otherwise compute it.
+  const SCEV *C = computeSCEVAtScope(V, L);
+  ValuesAtScopes[V][L] = C;
+  return C;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   if (isa<SCEVConstant>(V)) return V;
 
   // If this instruction is evolved from a constant-evolving PHI, compute the
@@ -3567,7 +3981,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
             // to see if the loop that contains it has a known backedge-taken
             // count.  If so, we may be able to force computation of the exit
             // value.
-            const SCEV* BackedgeTakenCount = getBackedgeTakenCount(LI);
+            const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
             if (const SCEVConstant *BTCC =
                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
               // Okay, we know how many times the containing loop executes.  If
@@ -3585,13 +3999,6 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       // the arguments into constants, and if so, try to constant propagate the
       // result.  This is particularly useful for computing loop exit values.
       if (CanConstantFold(I)) {
-        // Check to see if we've folded this instruction at this loop before.
-        std::map<const Loop *, Constant *> &Values = ValuesAtScopes[I];
-        std::pair<std::map<const Loop *, Constant *>::iterator, bool> Pair =
-          Values.insert(std::make_pair(L, static_cast<Constant *>(0)));
-        if (!Pair.second)
-          return Pair.first->second ? &*getSCEV(Pair.first->second) : V;
-
         std::vector<Constant*> Operands;
         Operands.reserve(I->getNumOperands());
         for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -3605,7 +4012,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
             if (!isSCEVable(Op->getType()))
               return V;
 
-            const SCEV* OpV = getSCEVAtScope(getSCEV(Op), L);
+            const SCEV* OpV = getSCEVAtScope(Op, L);
             if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) {
               Constant *C = SC->getValue();
               if (C->getType() != Op->getType())
@@ -3634,11 +4041,12 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
         Constant *C;
         if (const CmpInst *CI = dyn_cast<CmpInst>(I))
           C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                              &Operands[0], Operands.size());
+                                              &Operands[0], Operands.size(),
+                                              getContext());
         else
           C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                       &Operands[0], Operands.size());
-        Pair.first->second = C;
+                                       &Operands[0], Operands.size(),
+                                       getContext());
         return getSCEV(C);
       }
     }
@@ -3651,7 +4059,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
     // Avoid performing the look-up in the common case where the specified
     // expression has no loop-variant portions.
     for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
-      const SCEV* OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+      const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
@@ -3671,7 +4079,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
           return getSMaxExpr(NewOps);
         if (isa<SCEVUMaxExpr>(Comm))
           return getUMaxExpr(NewOps);
-        assert(0 && "Unknown commutative SCEV type!");
+        llvm_unreachable("Unknown commutative SCEV type!");
       }
     }
     // If we got here, all operands are loop invariant.
@@ -3679,8 +4087,8 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   }
 
   if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
-    const SCEV* LHS = getSCEVAtScope(Div->getLHS(), L);
-    const SCEV* RHS = getSCEVAtScope(Div->getRHS(), L);
+    const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+    const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
     if (LHS == Div->getLHS() && RHS == Div->getRHS())
       return Div;   // must be loop invariant
     return getUDivExpr(LHS, RHS);
@@ -3692,7 +4100,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
     if (!L || !AddRec->getLoop()->contains(L->getHeader())) {
       // To evaluate this recurrence, we need to know how many times the AddRec
       // loop iterates.  Compute this now.
-      const SCEV* BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+      const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
       if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
 
       // Then, evaluate the AddRec.
@@ -3702,33 +4110,36 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   }
 
   if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
-    const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L);
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getZeroExtendExpr(Op, Cast->getType());
   }
 
   if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
-    const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L);
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getSignExtendExpr(Op, Cast->getType());
   }
 
   if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
-    const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L);
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getTruncateExpr(Op, Cast->getType());
   }
 
-  assert(0 && "Unknown SCEV type!");
+  if (isa<SCEVTargetDataConstant>(V))
+    return V;
+
+  llvm_unreachable("Unknown SCEV type!");
   return 0;
 }
 
 /// getSCEVAtScope - This is a convenience function which does
 /// getSCEVAtScope(getSCEV(V), L).
-const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
   return getSCEVAtScope(getSCEV(V), L);
 }
 
@@ -3741,7 +4152,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
 /// A and B isn't important.
 ///
 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
-static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
                                                ScalarEvolution &SE) {
   uint32_t BW = A.getBitWidth();
   assert(BW == B.getBitWidth() && "Bit widths must be the same.");
@@ -3784,7 +4195,7 @@ static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
 /// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
 /// might be the same) or two SCEVCouldNotCompute objects.
 ///
-static std::pair<const SCEV*,const SCEV*>
+static std::pair<const SCEV *,const SCEV *>
 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
@@ -3833,8 +4244,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
       return std::make_pair(CNC, CNC);
     }
 
-    ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA));
-    ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA));
+    LLVMContext &Context = SE.getContext();
+
+    ConstantInt *Solution1 =
+      ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+    ConstantInt *Solution2 =
+      ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
 
     return std::make_pair(SE.getConstant(Solution1),
                           SE.getConstant(Solution2));
@@ -3843,7 +4258,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 
 /// HowFarToZero - Return the number of times a backedge comparing the specified
 /// value to zero will execute.  If not computable, return CouldNotCompute.
-const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -3878,7 +4293,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 
       // First, handle unitary steps.
       if (StepC->getValue()->equalsInt(1))      // 1*N = -Start (mod 2^BW), so:
-        return getNegativeSCEV(Start);       //   N = -Start (as unsigned)
+        return getNegativeSCEV(Start);          //   N = -Start (as unsigned)
       if (StepC->getValue()->isAllOnesValue())  // -1*N = -Start (mod 2^BW), so:
         return Start;                           //    N = Start (as unsigned)
 
@@ -3891,7 +4306,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) {
     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
     // the quadratic equation to solve it.
-    std::pair<const SCEV*,const SCEV*> Roots = SolveQuadraticEquation(AddRec,
+    std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
                                                                     *this);
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
@@ -3910,7 +4325,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
         // We can only use this value if the chrec ends up with an exact zero
         // value at this index.  When solving for "X*X != 5", for example, we
         // should not accept a root of 2.
-        const SCEV* Val = AddRec->evaluateAtIteration(R1, *this);
+        const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
           return R1;  // We found a quadratic root!
       }
@@ -3923,7 +4338,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 /// HowFarToNonZero - Return the number of times a backedge checking the
 /// specified value for nonzero will execute.  If not computable, return
 /// CouldNotCompute
-const SCEV* ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // Loops that look like: while (X == 0) are very strange indeed.  We don't
   // handle them yet except for the trivial case.  This could be expanded in the
   // future as needed.
@@ -3984,7 +4399,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
 /// more general, since a front-end may have replicated the controlling
 /// expression.
 ///
-static bool HasSameValue(const SCEV* A, const SCEV* B) {
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
   // Quick check to see if they are the same SCEV.
   if (A == B) return true;
 
@@ -3994,19 +4409,142 @@ static bool HasSameValue(const SCEV* A, const SCEV* B) {
     if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
       if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
         if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
-          if (AI->isIdenticalTo(BI))
+          if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
             return true;
 
   // Otherwise assume they may have a different value.
   return false;
 }
 
-/// isLoopGuardedByCond - Test whether entry to the loop is protected by
-/// a conditional between LHS and RHS.  This is used to help avoid max
-/// expressions in loop trip counts.
-bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
-                                          ICmpInst::Predicate Pred,
-                                          const SCEV *LHS, const SCEV *RHS) {
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+  return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+  return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+  return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+  return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+                                       const SCEV *LHS, const SCEV *RHS) {
+
+  if (HasSameValue(LHS, RHS))
+    return ICmpInst::isTrueWhenEqual(Pred);
+
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    break;
+  case ICmpInst::ICMP_SGT:
+    Pred = ICmpInst::ICMP_SLT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE:
+    Pred = ICmpInst::ICMP_SLE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGT:
+    Pred = ICmpInst::ICMP_ULT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULT: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGE:
+    Pred = ICmpInst::ICMP_ULE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULE: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_NE: {
+    if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+      return true;
+    if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+      return true;
+
+    const SCEV *Diff = getMinusSCEV(LHS, RHS);
+    if (isKnownNonZero(Diff))
+      return true;
+    break;
+  }
+  case ICmpInst::ICMP_EQ:
+    // The check at the top of the function catches the case where
+    // the values are known to be equal.
+    break;
+  }
+  return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS.  This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+                                             ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return true;
+
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+
+  BranchInst *LoopContinuePredicate =
+    dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LoopContinuePredicate ||
+      LoopContinuePredicate->isUnconditional())
+    return false;
+
+  return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS,
+                       LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS.  This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopGuardedByCond(const Loop *L,
+                                     ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
   // (interprocedural conditions notwithstanding).
   if (!L) return false;
@@ -4027,136 +4565,308 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
         LoopEntryPredicate->isUnconditional())
       continue;
 
-    if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
-                        LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+    if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+                      LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
       return true;
   }
 
   return false;
 }
 
-/// isNecessaryCond - Test whether the given CondValue value is a condition
-/// which is at least as strict as the one described by Pred, LHS, and RHS.
-bool ScalarEvolution::isNecessaryCond(Value *CondValue,
-                                      ICmpInst::Predicate Pred,
-                                      const SCEV *LHS, const SCEV *RHS,
-                                      bool Inverse) {
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(Value *CondValue,
+                                    ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS,
+                                    bool Inverse) {
   // Recursivly handle And and Or conditions.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
     if (BO->getOpcode() == Instruction::And) {
       if (!Inverse)
-        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
-               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+        return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
     } else if (BO->getOpcode() == Instruction::Or) {
       if (Inverse)
-        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
-               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+        return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
     }
   }
 
   ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
   if (!ICI) return false;
 
+  // Bail if the ICmp's operands' types are wider than the needed type
+  // before attempting to call getSCEV on them. This avoids infinite
+  // recursion, since the analysis of widening casts can require loop
+  // exit condition information for overflow checking, which would
+  // lead back here.
+  if (getTypeSizeInBits(LHS->getType()) <
+      getTypeSizeInBits(ICI->getOperand(0)->getType()))
+    return false;
+
   // Now that we found a conditional branch that dominates the loop, check to
   // see if it is the comparison we are looking for.
-  Value *PreCondLHS = ICI->getOperand(0);
-  Value *PreCondRHS = ICI->getOperand(1);
-  ICmpInst::Predicate Cond;
+  ICmpInst::Predicate FoundPred;
   if (Inverse)
-    Cond = ICI->getInversePredicate();
+    FoundPred = ICI->getInversePredicate();
   else
-    Cond = ICI->getPredicate();
+    FoundPred = ICI->getPredicate();
+
+  const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+  const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+  // Balance the types. The case where FoundLHS' type is wider than
+  // LHS' type is checked for above.
+  if (getTypeSizeInBits(LHS->getType()) >
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(Pred)) {
+      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+    } else {
+      FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+    }
+  }
 
-  if (Cond == Pred)
-    ; // An exact match.
-  else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
-    ; // The actual condition is beyond sufficient.
-  else
-    // Check a few special cases.
-    switch (Cond) {
+  // Canonicalize the query to match the way instcombine will have
+  // canonicalized the comparison.
+  // First, put a constant operand on the right.
+  if (isa<SCEVConstant>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+  // Then, canonicalize comparisons with boundary cases.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getValue()->getValue();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMinValue()) return true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMaxValue()) return true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMinSignedValue()) return true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMaxSignedValue()) return true;
+      break;
     case ICmpInst::ICMP_UGT:
-      if (Pred == ICmpInst::ICMP_ULT) {
-        std::swap(PreCondLHS, PreCondRHS);
-        Cond = ICmpInst::ICMP_ULT;
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
         break;
       }
-      return false;
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMaxValue()) return false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        break;
+      }
+      if (RA.isMinValue()) return false;
+      break;
     case ICmpInst::ICMP_SGT:
-      if (Pred == ICmpInst::ICMP_SLT) {
-        std::swap(PreCondLHS, PreCondRHS);
-        Cond = ICmpInst::ICMP_SLT;
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
         break;
       }
-      return false;
-    case ICmpInst::ICMP_NE:
-      // Expressions like (x >u 0) are often canonicalized to (x != 0),
-      // so check for this case by checking if the NE is comparing against
-      // a minimum or maximum constant.
-      if (!ICmpInst::isTrueWhenEqual(Pred))
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
-          const APInt &A = CI->getValue();
-          switch (Pred) {
-          case ICmpInst::ICMP_SLT:
-            if (A.isMaxSignedValue()) break;
-            return false;
-          case ICmpInst::ICMP_SGT:
-            if (A.isMinSignedValue()) break;
-            return false;
-          case ICmpInst::ICMP_ULT:
-            if (A.isMaxValue()) break;
-            return false;
-          case ICmpInst::ICMP_UGT:
-            if (A.isMinValue()) break;
-            return false;
-          default:
-            return false;
-          }
-          Cond = ICmpInst::ICMP_NE;
-          // NE is symmetric but the original comparison may not be. Swap
-          // the operands if necessary so that they match below.
-          if (isa<SCEVConstant>(LHS))
-            std::swap(PreCondLHS, PreCondRHS);
-          break;
-        }
-      return false;
-    default:
-      // We weren't able to reconcile the condition.
-      return false;
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMaxSignedValue()) return false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+       break;
+      }
+      if (RA.isMinSignedValue()) return false;
+      break;
+    }
+  }
+
+  // Check to see if we can make the LHS or RHS match.
+  if (LHS == FoundRHS || RHS == FoundLHS) {
+    if (isa<SCEVConstant>(RHS)) {
+      std::swap(FoundLHS, FoundRHS);
+      FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+    } else {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
     }
+  }
 
-  if (!PreCondLHS->getType()->isInteger()) return false;
+  // Check whether the found predicate is the same as the desired predicate.
+  if (FoundPred == Pred)
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
 
-  const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
-  const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
-  return (HasSameValue(LHS, PreCondLHSSCEV) &&
-          HasSameValue(RHS, PreCondRHSSCEV)) ||
-         (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
-          HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
+  // Check whether swapping the found predicate makes it the same as the
+  // desired predicate.
+  if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+    if (isa<SCEVConstant>(RHS))
+      return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+    else
+      return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+                                   RHS, LHS, FoundLHS, FoundRHS);
+  }
+
+  // Check whether the actual condition is beyond sufficient.
+  if (FoundPred == ICmpInst::ICMP_EQ)
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+  if (Pred == ICmpInst::ICMP_NE)
+    if (!ICmpInst::isTrueWhenEqual(FoundPred))
+      if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+
+  // Otherwise assume the worst.
+  return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS,
+                                            const SCEV *FoundLHS,
+                                            const SCEV *FoundRHS) {
+  return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     FoundLHS, FoundRHS) ||
+         // ~x < ~y --> x > y
+         isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     getNotSCEV(FoundRHS),
+                                     getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition desribed by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS,
+                                             const SCEV *FoundLHS,
+                                             const SCEV *FoundRHS) {
+  switch (Pred) {
+  default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE:
+    if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+      return true;
+    break;
+  }
+
+  return false;
 }
 
 /// getBECount - Subtract the end and start values and divide by the step,
 /// rounding up, to get the number of times the backedge is executed. Return
 /// CouldNotCompute if an intermediate computation overflows.
-const SCEV* ScalarEvolution::getBECount(const SCEV* Start,
-                                       const SCEV* End,
-                                       const SCEV* Step) {
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+                                        const SCEV *End,
+                                        const SCEV *Step,
+                                        bool NoWrap) {
   const Type *Ty = Start->getType();
-  const SCEV* NegOne = getIntegerSCEV(-1, Ty);
-  const SCEV* Diff = getMinusSCEV(End, Start);
-  const SCEV* RoundUp = getAddExpr(Step, NegOne);
+  const SCEV *NegOne = getIntegerSCEV(-1, Ty);
+  const SCEV *Diff = getMinusSCEV(End, Start);
+  const SCEV *RoundUp = getAddExpr(Step, NegOne);
 
   // Add an adjustment to the difference between End and Start so that
   // the division will effectively round up.
-  const SCEV* Add = getAddExpr(Diff, RoundUp);
-
-  // Check Add for unsigned overflow.
-  // TODO: More sophisticated things could be done here.
-  const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1);
-  const SCEV* OperandExtendedAdd =
-    getAddExpr(getZeroExtendExpr(Diff, WideTy),
-               getZeroExtendExpr(RoundUp, WideTy));
-  if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
-    return getCouldNotCompute();
+  const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+  if (!NoWrap) {
+    // Check Add for unsigned overflow.
+    // TODO: More sophisticated things could be done here.
+    const Type *WideTy = IntegerType::get(getContext(),
+                                          getTypeSizeInBits(Ty) + 1);
+    const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+    const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+    const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+    if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+      return getCouldNotCompute();
+  }
 
   return getUDivExpr(Add, Step);
 }
@@ -4174,10 +4884,14 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
+  // Check to see if we have a flag which makes analysis easy.
+  bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() :
+                           AddRec->hasNoUnsignedWrap();
+
   if (AddRec->isAffine()) {
     // FORNOW: We only support unit strides.
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
-    const SCEV* Step = AddRec->getStepRecurrence(*this);
+    const SCEV *Step = AddRec->getStepRecurrence(*this);
 
     // TODO: handle non-constant strides.
     const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
@@ -4186,7 +4900,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     if (CStep->isOne()) {
       // With unit stride, the iteration never steps past the limit value.
     } else if (CStep->getValue()->getValue().isStrictlyPositive()) {
-      if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) {
+      if (NoWrap) {
+        // We know the iteration won't step past the maximum value for its type.
+        ;
+      } else if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) {
         // Test whether a positive iteration iteration can step past the limit
         // value and past the maximum value for its type in a single step.
         if (isSigned) {
@@ -4213,39 +4930,37 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // treat m-n as signed nor unsigned due to overflow possibility.
 
     // First, we get the value of the LHS in the first iteration: n
-    const SCEV* Start = AddRec->getOperand(0);
+    const SCEV *Start = AddRec->getOperand(0);
 
     // Determine the minimum constant start value.
-    const SCEV *MinStart = isa<SCEVConstant>(Start) ? Start :
-      getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) :
-                             APInt::getMinValue(BitWidth));
+    const SCEV *MinStart = getConstant(isSigned ?
+      getSignedRange(Start).getSignedMin() :
+      getUnsignedRange(Start).getUnsignedMin());
 
     // If we know that the condition is true in order to enter the loop,
     // then we know that it will run exactly (m-n)/s times. Otherwise, we
     // only know that it will execute (max(m,n)-n)/s times. In both cases,
     // the division must round up.
-    const SCEV* End = RHS;
+    const SCEV *End = RHS;
     if (!isLoopGuardedByCond(L,
-                             isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                             isSigned ? ICmpInst::ICMP_SLT :
+                                        ICmpInst::ICMP_ULT,
                              getMinusSCEV(Start, Step), RHS))
       End = isSigned ? getSMaxExpr(RHS, Start)
                      : getUMaxExpr(RHS, Start);
 
     // Determine the maximum constant end value.
-    const SCEV* MaxEnd =
-      isa<SCEVConstant>(End) ? End :
-      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth)
-                               .ashr(GetMinSignBits(End) - 1) :
-                             APInt::getMaxValue(BitWidth)
-                               .lshr(GetMinLeadingZeros(End)));
+    const SCEV *MaxEnd = getConstant(isSigned ?
+      getSignedRange(End).getSignedMax() :
+      getUnsignedRange(End).getUnsignedMax());
 
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
-    const SCEV* BECount = getBECount(Start, End, Step);
+    const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
 
     // The maximum backedge count is similar, except using the minimum start
     // value and the maximum end value.
-    const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step);
+    const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap);
 
     return BackedgeTakenInfo(BECount, MaxBECount);
   }
@@ -4258,7 +4973,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 /// this is that it returns the first iteration number where the value is not in
 /// the condition, thus computing the exit count. If the iteration count can't
 /// be computed, an instance of SCEVCouldNotCompute is returned.
-const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
                                                     ScalarEvolution &SE) const {
   if (Range.isFullSet())  // Infinite loop.
     return SE.getCouldNotCompute();
@@ -4266,9 +4981,9 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // If the start is a non-zero constant, shift the range to simplify things.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
-      SmallVector<const SCEV*, 4> Operands(op_begin(), op_end());
+      SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
       Operands[0] = SE.getIntegerSCEV(0, SC->getType());
-      const SCEV* Shifted = SE.getAddRecExpr(Operands, getLoop());
+      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop());
       if (const SCEVAddRecExpr *ShiftedAddRec =
             dyn_cast<SCEVAddRecExpr>(Shifted))
         return ShiftedAddRec->getNumIterationsInRange(
@@ -4307,7 +5022,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 
     // The exit value should be (End+A)/A.
     APInt ExitVal = (End + A).udiv(A);
-    ConstantInt *ExitValue = ConstantInt::get(ExitVal);
+    ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
 
     // Evaluate at the exit value.  If we really did fall out of the valid
     // range, then we computed our trip count, otherwise wrap around or other
@@ -4319,7 +5034,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // Ensure that the previous value is in the range.  This is a sanity check.
     assert(Range.contains(
            EvaluateConstantChrecAtConstant(this,
-           ConstantInt::get(ExitVal - One), SE)->getValue()) &&
+           ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
   } else if (isQuadratic()) {
@@ -4327,12 +5042,12 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // quadratic equation to solve it.  To do this, we must frame our problem in
     // terms of figuring out when zero is crossed, instead of when
     // Range.getUpper() is crossed.
-    SmallVector<const SCEV*, 4> NewOps(op_begin(), op_end());
+    SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
-    const SCEV* NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
 
     // Next, solve the constructed addrec
-    std::pair<const SCEV*,const SCEV*> Roots =
+    std::pair<const SCEV *,const SCEV *> Roots =
       SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
@@ -4340,7 +5055,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
           dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
-                                   R1->getValue(), R2->getValue()))) {
+                         R1->getValue(), R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
 
@@ -4352,7 +5067,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
                                                              SE);
         if (Range.contains(R1Val->getValue())) {
           // The next iteration must be out of the range...
-          ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()+1);
+          ConstantInt *NextVal =
+                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
 
           R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
           if (!Range.contains(R1Val->getValue()))
@@ -4362,7 +5078,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 
         // If R1 was not in the range, then it is a good return value.  Make
         // sure that R1-1 WAS in the range though, just in case.
-        ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()-1);
+        ConstantInt *NextVal =
+               ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
         R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
         if (Range.contains(R1Val->getValue()))
           return R1;
@@ -4381,22 +5098,21 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 //===----------------------------------------------------------------------===//
 
 void ScalarEvolution::SCEVCallbackVH::deleted() {
-  assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!");
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
   if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
     SE->ConstantEvolutionLoopExitValue.erase(PN);
-  if (Instruction *I = dyn_cast<Instruction>(getValPtr()))
-    SE->ValuesAtScopes.erase(I);
   SE->Scalars.erase(getValPtr());
   // this now dangles!
 }
 
 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
-  assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!");
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
 
   // Forget all the expressions associated with users of the old value,
   // so that future queries will recompute the expressions using the new
   // value.
   SmallVector<User *, 16> Worklist;
+  SmallPtrSet<User *, 8> Visited;
   Value *Old = getValPtr();
   bool DeleteOld = false;
   for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
@@ -4410,20 +5126,19 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
       DeleteOld = true;
       continue;
     }
+    if (!Visited.insert(U))
+      continue;
     if (PHINode *PN = dyn_cast<PHINode>(U))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
-    if (Instruction *I = dyn_cast<Instruction>(U))
-      SE->ValuesAtScopes.erase(I);
-    if (SE->Scalars.erase(U))
-      for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-           UI != UE; ++UI)
-        Worklist.push_back(*UI);
+    SE->Scalars.erase(U);
+    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+         UI != UE; ++UI)
+      Worklist.push_back(*UI);
   }
+  // Delete the Old value if it (indirectly) references itself.
   if (DeleteOld) {
     if (PHINode *PN = dyn_cast<PHINode>(Old))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
-    if (Instruction *I = dyn_cast<Instruction>(Old))
-      SE->ValuesAtScopes.erase(I);
     SE->Scalars.erase(Old);
     // this now dangles!
   }
@@ -4502,21 +5217,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
   // out SCEV values of all instructions that are interesting. Doing
   // this potentially causes it to create new SCEV objects though,
   // which technically conflicts with the const qualifier. This isn't
-  // observable from outside the class though (the hasSCEV function
-  // notwithstanding), so casting away the const isn't dangerous.
+  // observable from outside the class though, so casting away the
+  // const isn't dangerous.
   ScalarEvolution &SE = *const_cast<ScalarEvolution*>(this);
 
   OS << "Classifying expressions for: " << F->getName() << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
     if (isSCEVable(I->getType())) {
-      OS << *I;
+      OS << *I << '\n';
       OS << "  -->  ";
-      const SCEV* SV = SE.getSCEV(&*I);
+      const SCEV *SV = SE.getSCEV(&*I);
       SV->print(OS);
 
       const Loop *L = LI->getLoopFor((*I).getParent());
 
-      const SCEV* AtUse = SE.getSCEVAtScope(SV, L);
+      const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
       if (AtUse != SV) {
         OS << "  -->  ";
         AtUse->print(OS);
@@ -4524,7 +5239,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
 
       if (L) {
         OS << "\t\t" "Exits: ";
-        const SCEV* ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+        const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
         if (!ExitValue->isLoopInvariant(L)) {
           OS << "<<Unknown>>";
         } else {
@@ -4540,7 +5255,3 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
     PrintLoopInfo(OS, &SE, *I);
 }
 
-void ScalarEvolution::print(std::ostream &o, const Module *M) const {
-  raw_os_ostream OS(o);
-  print(OS, M);
-}
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 000000000000..cc79e6c3b130
--- /dev/null
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,133 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+  /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses ScalarEvolution to answer queries.
+  class VISIBILITY_HIDDEN ScalarEvolutionAliasAnalysis : public FunctionPass,
+                                                         public AliasAnalysis {
+    ScalarEvolution *SE;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {}
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+    virtual AliasResult alias(const Value *V1, unsigned V1Size,
+                              const Value *V2, unsigned V2Size);
+
+    Value *GetUnderlyingIdentifiedObject(const SCEV *S);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+static RegisterPass<ScalarEvolutionAliasAnalysis>
+X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true);
+
+// Declare that we implement the AliasAnalysis interface
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+  return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+  InitializeAliasAnalysis(this);
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+/// GetUnderlyingIdentifiedObject - Given an expression, try to find an
+/// "identified object" (see AliasAnalysis::isIdentifiedObject) base
+/// value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetUnderlyingIdentifiedObject(const SCEV *S) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // In an addrec, assume that the base will be in the start, rather
+    // than the step.
+    return GetUnderlyingIdentifiedObject(AR->getStart());
+  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // If there's a pointer operand, it'll be sorted at the end of the list.
+    const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+    if (isa<PointerType>(Last->getType()))
+      return GetUnderlyingIdentifiedObject(Last);
+  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // Determine if we've found an Identified object.
+    Value *V = U->getValue();
+    if (isIdentifiedObject(V))
+      return V;
+  }
+  // No Identified object found.
+  return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
+                                    const Value *B, unsigned BSize) {
+  // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+  const SCEV *AS = SE->getSCEV(const_cast<Value *>(A));
+  const SCEV *BS = SE->getSCEV(const_cast<Value *>(B));
+
+  // If they evaluate to the same expression, it's a MustAlias.
+  if (AS == BS) return MustAlias;
+
+  // If something is known about the difference between the two addresses,
+  // see if it's enough to prove a NoAlias.
+  if (SE->getEffectiveSCEVType(AS->getType()) ==
+      SE->getEffectiveSCEVType(BS->getType())) {
+    unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+    APInt AI(BitWidth, ASize);
+    const SCEV *BA = SE->getMinusSCEV(BS, AS);
+    if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) {
+      APInt BI(BitWidth, BSize);
+      const SCEV *AB = SE->getMinusSCEV(AS, BS);
+      if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin()))
+        return NoAlias;
+    }
+  }
+
+  // If ScalarEvolution can find an underlying object, form a new query.
+  // The correctness of this depends on ScalarEvolution not recognizing
+  // inttoptr and ptrtoint operators.
+  Value *AO = GetUnderlyingIdentifiedObject(AS);
+  Value *BO = GetUnderlyingIdentifiedObject(BS);
+  if ((AO && AO != A) || (BO && BO != B))
+    if (alias(AO ? AO : A, AO ? ~0u : ASize,
+              BO ? BO : B, BO ? ~0u : BSize) == NoAlias)
+      return NoAlias;
+
+  // Forward the query to the next analysis.
+  return AliasAnalysis::alias(A, ASize, B, BSize);
+}
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 729a0c325448..d674ee847f11 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
@@ -52,10 +53,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
         return CE->getOperand(0);
   }
 
-  // FIXME: keep track of the cast instruction.
   if (Constant *C = dyn_cast<Constant>(V))
     return ConstantExpr::getCast(Op, C, Ty);
-  
+
   if (Argument *A = dyn_cast<Argument>(V)) {
     // Check to see if there is already a cast!
     for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
@@ -155,55 +155,95 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
 /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
 /// check to see if the divide was folded.
-static bool FactorOutConstant(const SCEV* &S,
-                              const SCEV* &Remainder,
-                              const APInt &Factor,
-                              ScalarEvolution &SE) {
+static bool FactorOutConstant(const SCEV *&S,
+                              const SCEV *&Remainder,
+                              const SCEV *Factor,
+                              ScalarEvolution &SE,
+                              const TargetData *TD) {
   // Everything is divisible by one.
-  if (Factor == 1)
+  if (Factor->isOne())
+    return true;
+
+  // x/x == 1.
+  if (S == Factor) {
+    S = SE.getIntegerSCEV(1, S->getType());
     return true;
+  }
 
   // For a Constant, check for a multiple of the given factor.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-    ConstantInt *CI =
-      ConstantInt::get(C->getValue()->getValue().sdiv(Factor));
-    // If the quotient is zero and the remainder is non-zero, reject
-    // the value at this scale. It will be considered for subsequent
-    // smaller scales.
-    if (C->isZero() || !CI->isZero()) {
-      const SCEV* Div = SE.getConstant(CI);
-      S = Div;
-      Remainder =
-        SE.getAddExpr(Remainder,
-                      SE.getConstant(C->getValue()->getValue().srem(Factor)));
+    // 0/x == 0.
+    if (C->isZero())
       return true;
+    // Check for divisibility.
+    if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+      ConstantInt *CI =
+        ConstantInt::get(SE.getContext(),
+                         C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+      // If the quotient is zero and the remainder is non-zero, reject
+      // the value at this scale. It will be considered for subsequent
+      // smaller scales.
+      if (!CI->isZero()) {
+        const SCEV *Div = SE.getConstant(CI);
+        S = Div;
+        Remainder =
+          SE.getAddExpr(Remainder,
+                        SE.getConstant(C->getValue()->getValue().srem(
+                                                  FC->getValue()->getValue())));
+        return true;
+      }
     }
   }
 
   // In a Mul, check if there is a constant operand which is a multiple
   // of the given factor.
-  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
-    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
-      if (!C->getValue()->getValue().srem(Factor)) {
-        const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-        SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
-                                               MOperands.end());
-        NewMulOps[0] =
-          SE.getConstant(C->getValue()->getValue().sdiv(Factor));
-        S = SE.getMulExpr(NewMulOps);
-        return true;
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    if (TD) {
+      // With TargetData, the size is known. Check if there is a constant
+      // operand which is a multiple of the given factor. If so, we can
+      // factor it.
+      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
+                                                 MOperands.end());
+          NewMulOps[0] =
+            SE.getConstant(C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
+    } else {
+      // Without TargetData, check if Factor can be factored out of any of the
+      // Mul's operands. If so, we can just remove it.
+      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+        const SCEV *SOp = M->getOperand(i);
+        const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType());
+        if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+            Remainder->isZero()) {
+          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
+                                                 MOperands.end());
+          NewMulOps[i] = SOp;
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
       }
+    }
+  }
 
   // In an AddRec, check if both start and step are divisible.
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
-    const SCEV* Step = A->getStepRecurrence(SE);
-    const SCEV* StepRem = SE.getIntegerSCEV(0, Step->getType());
-    if (!FactorOutConstant(Step, StepRem, Factor, SE))
+    const SCEV *Step = A->getStepRecurrence(SE);
+    const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType());
+    if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
       return false;
     if (!StepRem->isZero())
       return false;
-    const SCEV* Start = A->getStart();
-    if (!FactorOutConstant(Start, Remainder, Factor, SE))
+    const SCEV *Start = A->getStart();
+    if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
       return false;
     S = SE.getAddRecExpr(Start, Step, A->getLoop());
     return true;
@@ -212,15 +252,81 @@ static bool FactorOutConstant(const SCEV* &S,
   return false;
 }
 
-/// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP
-/// instead of using ptrtoint+arithmetic+inttoptr. This helps
-/// BasicAliasAnalysis analyze the result. However, it suffers from the
-/// underlying bug described in PR2831. Addition in LLVM currently always
-/// has two's complement wrapping guaranteed. However, the semantics for
-/// getelementptr overflow are ambiguous. In the common case though, this
-/// expansion gets used when a GEP in the original code has been converted
-/// into integer arithmetic, in which case the resulting code will be no
-/// more undefined than it was originally.
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+                                const Type *Ty,
+                                ScalarEvolution &SE) {
+  unsigned NumAddRecs = 0;
+  for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+    ++NumAddRecs;
+  // Group Ops into non-addrecs and addrecs.
+  SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+  SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+  // Let ScalarEvolution sort and simplify the non-addrecs list.
+  const SCEV *Sum = NoAddRecs.empty() ?
+                    SE.getIntegerSCEV(0, Ty) :
+                    SE.getAddExpr(NoAddRecs);
+  // If it returned an add, use the operands. Otherwise it simplified
+  // the sum into a single value, so just use that.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+    Ops = Add->getOperands();
+  else {
+    Ops.clear();
+    if (!Sum->isZero())
+      Ops.push_back(Sum);
+  }
+  // Then append the addrecs.
+  Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+                         const Type *Ty,
+                         ScalarEvolution &SE) {
+  // Find the addrecs.
+  SmallVector<const SCEV *, 8> AddRecs;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+      const SCEV *Start = A->getStart();
+      if (Start->isZero()) break;
+      const SCEV *Zero = SE.getIntegerSCEV(0, Ty);
+      AddRecs.push_back(SE.getAddRecExpr(Zero,
+                                         A->getStepRecurrence(SE),
+                                         A->getLoop()));
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+        Ops[i] = Zero;
+        Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+        e += Add->getNumOperands();
+      } else {
+        Ops[i] = Start;
+      }
+    }
+  if (!AddRecs.empty()) {
+    // Add the addrecs onto the end of the list.
+    Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
+    // Resort the operand list, moving any constants to the front.
+    SimplifyAddOperands(Ops, Ty, SE);
+  }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelmeentptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
 ///
 /// Design note: It might seem desirable for this function to be more
 /// loop-aware. If some of the indices are loop-invariant while others
@@ -237,92 +343,130 @@ static bool FactorOutConstant(const SCEV* &S,
 /// loop-invariant portions of expressions, after considering what
 /// can be folded using target addressing modes.
 ///
-Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
-                                    const SCEV* const *op_end,
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+                                    const SCEV *const *op_end,
                                     const PointerType *PTy,
                                     const Type *Ty,
                                     Value *V) {
   const Type *ElTy = PTy->getElementType();
   SmallVector<Value *, 4> GepIndices;
-  SmallVector<const SCEV*, 8> Ops(op_begin, op_end);
+  SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
   bool AnyNonZeroIndices = false;
 
+  // Split AddRecs up into parts as either of the parts may be usable
+  // without the other.
+  SplitAddRecs(Ops, Ty, SE);
+
   // Decend down the pointer's type and attempt to convert the other
   // operands into GEP indices, at each level. The first index in a GEP
   // indexes into the array implied by the pointer operand; the rest of
   // the indices index into the element or field type selected by the
   // preceding index.
   for (;;) {
-    APInt ElSize = APInt(SE.getTypeSizeInBits(Ty),
-                         ElTy->isSized() ?  SE.TD->getTypeAllocSize(ElTy) : 0);
-    SmallVector<const SCEV*, 8> NewOps;
-    SmallVector<const SCEV*, 8> ScaledOps;
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      // Split AddRecs up into parts as either of the parts may be usable
-      // without the other.
-      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i]))
-        if (!A->getStart()->isZero()) {
-          const SCEV* Start = A->getStart();
-          Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()),
-                                         A->getStepRecurrence(SE),
-                                         A->getLoop()));
-          Ops[i] = Start;
-          ++e;
-        }
-      // If the scale size is not 0, attempt to factor out a scale.
-      if (ElSize != 0) {
-        const SCEV* Op = Ops[i];
-        const SCEV* Remainder = SE.getIntegerSCEV(0, Op->getType());
-        if (FactorOutConstant(Op, Remainder, ElSize, SE)) {
-          ScaledOps.push_back(Op); // Op now has ElSize factored out.
-          NewOps.push_back(Remainder);
-          continue;
+    const SCEV *ElSize = SE.getAllocSizeExpr(ElTy);
+    // If the scale size is not 0, attempt to factor out a scale for
+    // array indexing.
+    SmallVector<const SCEV *, 8> ScaledOps;
+    if (ElTy->isSized() && !ElSize->isZero()) {
+      SmallVector<const SCEV *, 8> NewOps;
+      for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+        const SCEV *Op = Ops[i];
+        const SCEV *Remainder = SE.getIntegerSCEV(0, Ty);
+        if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+          // Op now has ElSize factored out.
+          ScaledOps.push_back(Op);
+          if (!Remainder->isZero())
+            NewOps.push_back(Remainder);
+          AnyNonZeroIndices = true;
+        } else {
+          // The operand was not divisible, so add it to the list of operands
+          // we'll scan next iteration.
+          NewOps.push_back(Ops[i]);
         }
       }
-      // If the operand was not divisible, add it to the list of operands
-      // we'll scan next iteration.
-      NewOps.push_back(Ops[i]);
+      // If we made any changes, update Ops.
+      if (!ScaledOps.empty()) {
+        Ops = NewOps;
+        SimplifyAddOperands(Ops, Ty, SE);
+      }
     }
-    Ops = NewOps;
-    AnyNonZeroIndices |= !ScaledOps.empty();
+
+    // Record the scaled array index for this level of the type. If
+    // we didn't find any operands that could be factored, tentatively
+    // assume that element zero was selected (since the zero offset
+    // would obviously be folded away).
     Value *Scaled = ScaledOps.empty() ?
                     Constant::getNullValue(Ty) :
                     expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
     GepIndices.push_back(Scaled);
 
     // Collect struct field index operands.
-    if (!Ops.empty())
-      while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+    while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+      bool FoundFieldNo = false;
+      // An empty struct has no fields.
+      if (STy->getNumElements() == 0) break;
+      if (SE.TD) {
+        // With TargetData, field offsets are known. See if a constant offset
+        // falls within any of the struct fields.
+        if (Ops.empty()) break;
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
           if (SE.getTypeSizeInBits(C->getType()) <= 64) {
             const StructLayout &SL = *SE.TD->getStructLayout(STy);
             uint64_t FullOffset = C->getValue()->getZExtValue();
             if (FullOffset < SL.getSizeInBytes()) {
               unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
-              GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx));
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
               ElTy = STy->getTypeAtIndex(ElIdx);
               Ops[0] =
                 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
               AnyNonZeroIndices = true;
-              continue;
+              FoundFieldNo = true;
             }
           }
-        break;
+      } else {
+        // Without TargetData, just check for a SCEVFieldOffsetExpr of the
+        // appropriate struct type.
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+          if (const SCEVFieldOffsetExpr *FO =
+                dyn_cast<SCEVFieldOffsetExpr>(Ops[i]))
+            if (FO->getStructType() == STy) {
+              unsigned FieldNo = FO->getFieldNo();
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                   FieldNo));
+              ElTy = STy->getTypeAtIndex(FieldNo);
+              Ops[i] = SE.getConstant(Ty, 0);
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+              break;
+            }
+      }
+      // If no struct field offsets were found, tentatively assume that
+      // field zero was selected (since the zero offset would obviously
+      // be folded away).
+      if (!FoundFieldNo) {
+        ElTy = STy->getTypeAtIndex(0u);
+        GepIndices.push_back(
+          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
       }
+    }
 
-    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) {
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
       ElTy = ATy->getElementType();
-      continue;
-    }
-    break;
+    else
+      break;
   }
 
   // If none of the operands were convertable to proper GEP indices, cast
   // the base to i8* and do an ugly getelementptr with that. It's still
   // better than ptrtoint+arithmetic+inttoptr at least.
   if (!AnyNonZeroIndices) {
+    // Cast the base to i8*.
     V = InsertNoopCastOfTo(V,
-                           Type::Int8Ty->getPointerTo(PTy->getAddressSpace()));
+       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+    // Expand the operands for a plain byte offset.
     Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
 
     // Fold a GEP with constant operands.
@@ -345,12 +489,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
       }
     }
 
-    Value *GEP = Builder.CreateGEP(V, Idx, "scevgep");
+    // Emit a GEP.
+    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
     InsertedValues.insert(GEP);
     return GEP;
   }
 
-  // Insert a pretty getelementptr.
+  // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+  // because ScalarEvolution may have changed the address arithmetic to
+  // compute a value which is beyond the end of the allocated object.
   Value *GEP = Builder.CreateGEP(V,
                                  GepIndices.begin(),
                                  GepIndices.end(),
@@ -361,21 +508,37 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
 }
 
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+  int NumOperands = S->getNumOperands();
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand(S->getNumOperands()-1));
+
+  // Find the index of an operand to start with. Choose the operand with
+  // pointer type, if there is one, or the last operand otherwise.
+  int PIdx = 0;
+  for (; PIdx != NumOperands - 1; ++PIdx)
+    if (isa<PointerType>(S->getOperand(PIdx)->getType())) break;
+
+  // Expand code for the operand that we chose.
+  Value *V = expand(S->getOperand(PIdx));
 
   // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
   // comments on expandAddToGEP for details.
-  if (SE.TD)
-    if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
-      const SmallVectorImpl<const SCEV*> &Ops = S->getOperands();
-      return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V);
-    }
+  if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
+    // Take the operand at PIdx out of the list.
+    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
+    SmallVector<const SCEV *, 8> NewOps;
+    NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx);
+    NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end());
+    // Make a GEP.
+    return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V);
+  }
 
+  // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer
+  // arithmetic.
   V = InsertNoopCastOfTo(V, Ty);
 
   // Emit a bunch of add instructions
-  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+  for (int i = NumOperands-1; i >= 0; --i) {
+    if (i == PIdx) continue;
     Value *W = expandCodeFor(S->getOperand(i), Ty);
     V = InsertBinop(Instruction::Add, V, W);
   }
@@ -422,7 +585,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
 /// Move parts of Base into Rest to leave Base with the minimal
 /// expression that provides a pointer operand suitable for a
 /// GEP expansion.
-static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest,
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
                               ScalarEvolution &SE) {
   while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
     Base = A->getStart();
@@ -433,7 +596,7 @@ static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest,
   }
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     Base = A->getOperand(A->getNumOperands()-1);
-    SmallVector<const SCEV*, 8> NewAddOps(A->op_begin(), A->op_end());
+    SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
     NewAddOps.back() = Rest;
     Rest = SE.getAddExpr(NewAddOps);
     ExposePointerBase(Base, Rest, SE);
@@ -457,11 +620,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   if (CanonicalIV &&
       SE.getTypeSizeInBits(CanonicalIV->getType()) >
       SE.getTypeSizeInBits(Ty)) {
-    const SCEV *Start = SE.getAnyExtendExpr(S->getStart(),
-                                            CanonicalIV->getType());
-    const SCEV *Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
-                                           CanonicalIV->getType());
-    Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
+    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
+    SmallVector<const SCEV *, 4> NewOps(Ops.size());
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop()));
     BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
@@ -475,28 +638,26 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
-    const SmallVectorImpl<const SCEV*> &SOperands = S->getOperands();
-    SmallVector<const SCEV*, 4> NewOps(SOperands.begin(), SOperands.end());
+    const SmallVectorImpl<const SCEV *> &SOperands = S->getOperands();
+    SmallVector<const SCEV *, 4> NewOps(SOperands.begin(), SOperands.end());
     NewOps[0] = SE.getIntegerSCEV(0, Ty);
-    const SCEV* Rest = SE.getAddRecExpr(NewOps, L);
+    const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
 
     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
     // comments on expandAddToGEP for details.
-    if (SE.TD) {
-      const SCEV* Base = S->getStart();
-      const SCEV* RestArray[1] = { Rest };
-      // Dig into the expression to find the pointer base for a GEP.
-      ExposePointerBase(Base, RestArray[0], SE);
-      // If we found a pointer, expand the AddRec with a GEP.
-      if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
-        // Make sure the Base isn't something exotic, such as a multiplied
-        // or divided pointer value. In those cases, the result type isn't
-        // actually a pointer type.
-        if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
-          Value *StartV = expand(Base);
-          assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
-          return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
-        }
+    const SCEV *Base = S->getStart();
+    const SCEV *RestArray[1] = { Rest };
+    // Dig into the expression to find the pointer base for a GEP.
+    ExposePointerBase(Base, RestArray[0], SE);
+    // If we found a pointer, expand the AddRec with a GEP.
+    if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+      // Make sure the Base isn't something exotic, such as a multiplied
+      // or divided pointer value. In those cases, the result type isn't
+      // actually a pointer type.
+      if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+        Value *StartV = expand(Base);
+        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+        return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
       }
     }
 
@@ -519,29 +680,22 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     // Create and insert the PHI node for the induction variable in the
     // specified loop.
     BasicBlock *Header = L->getHeader();
-    BasicBlock *Preheader = L->getLoopPreheader();
     PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
     InsertedValues.insert(PN);
-    PN->addIncoming(Constant::getNullValue(Ty), Preheader);
 
-    pred_iterator HPI = pred_begin(Header);
-    assert(HPI != pred_end(Header) && "Loop with zero preds???");
-    if (!L->contains(*HPI)) ++HPI;
-    assert(HPI != pred_end(Header) && L->contains(*HPI) &&
-           "No backedge in loop?");
-
-    // Insert a unit add instruction right before the terminator corresponding
-    // to the back-edge.
     Constant *One = ConstantInt::get(Ty, 1);
-    Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
-                                                 (*HPI)->getTerminator());
-    InsertedValues.insert(Add);
-
-    pred_iterator PI = pred_begin(Header);
-    if (*PI == Preheader)
-      ++PI;
-    PN->addIncoming(Add, *PI);
-    return PN;
+    for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
+         HPI != HPE; ++HPI)
+      if (L->contains(*HPI)) {
+        // Insert a unit add instruction right before the terminator corresponding
+        // to the back-edge.
+        Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
+                                                     (*HPI)->getTerminator());
+        InsertedValues.insert(Add);
+        PN->addIncoming(Add, *HPI);
+      } else {
+        PN->addIncoming(Constant::getNullValue(Ty), *HPI);
+      }
   }
 
   // {0,+,F} --> {0,+,1} * F
@@ -563,19 +717,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // folders, then expandCodeFor the closed form.  This allows the folders to
   // simplify the expression without having to build a bunch of special code
   // into this folder.
-  const SCEV* IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.
+  const SCEV *IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.
 
   // Promote S up to the canonical IV type, if the cast is foldable.
-  const SCEV* NewS = S;
-  const SCEV* Ext = SE.getNoopOrAnyExtend(S, I->getType());
+  const SCEV *NewS = S;
+  const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType());
   if (isa<SCEVAddRecExpr>(Ext))
     NewS = Ext;
 
-  const SCEV* V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+  const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
   //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
 
   // Truncate the result down to the original type, if needed.
-  const SCEV* T = SE.getTruncateOrNoop(V, Ty);
+  const SCEV *T = SE.getTruncateOrNoop(V, Ty);
   return expand(T);
 }
 
@@ -607,9 +761,15 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
 }
 
 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
-  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
-  for (unsigned i = 1; i < S->getNumOperands(); ++i) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
     InsertedValues.insert(ICmp);
@@ -617,13 +777,23 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
     InsertedValues.insert(Sel);
     LHS = Sel;
   }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
   return LHS;
 }
 
 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
-  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
-  for (unsigned i = 1; i < S->getNumOperands(); ++i) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
     InsertedValues.insert(ICmp);
@@ -631,10 +801,22 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
     InsertedValues.insert(Sel);
     LHS = Sel;
   }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
   return LHS;
 }
 
-Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) {
+Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) {
+  return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo());
+}
+
+Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) {
+  return ConstantExpr::getSizeOf(S->getAllocType());
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
   // Expand the code for this SCEV.
   Value *V = expand(SH);
   if (Ty) {
@@ -695,7 +877,7 @@ Value *
 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isInteger() && "Can only insert integer induction variables!");
-  const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
+  const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
                                    SE.getIntegerSCEV(1, Ty), L);
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 543306854ced..b7844f022765 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -17,7 +17,9 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -27,7 +29,7 @@ using namespace llvm;
 AbstractLatticeFunction::~AbstractLatticeFunction() {}
 
 /// PrintValue - Render the specified lattice value to the specified stream.
-void AbstractLatticeFunction::PrintValue(LatticeVal V, std::ostream &OS) {
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
   if (V == UndefVal)
     OS << "undefined";
   else if (V == OverdefinedVal)
@@ -87,7 +89,7 @@ void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
 /// MarkBlockExecutable - This method can be used by clients to mark all of
 /// the blocks that are known to be intrinsically live in the processed unit.
 void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
-  DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
+  DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");
   BBExecutable.insert(BB);   // Basic block is executable!
   BBWorkList.push_back(BB);  // Add the block to the work list!
 }
@@ -98,8 +100,8 @@ void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
   if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
     return;  // This edge is already known to be executable!
   
-  DOUT << "Marking Edge Executable: " << Source->getNameStart()
-       << " -> " << Dest->getNameStart() << "\n";
+  DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
+        << " -> " << Dest->getName() << "\n");
 
   if (BBExecutable.count(Dest)) {
     // The destination is already executable, but we just made an edge
@@ -153,7 +155,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
     }
 
     // Constant condition variables mean the branch can only go a single way
-    Succs[C == ConstantInt::getFalse()] = true;
+    Succs[C == ConstantInt::getFalse(*Context)] = true;
     return;
   }
   
@@ -221,6 +223,16 @@ void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
 }
 
 void SparseSolver::visitPHINode(PHINode &PN) {
+  // The lattice function may store more information on a PHINode than could be
+  // computed from its incoming values.  For example, SSI form stores its sigma
+  // functions as PHINodes with a single incoming value.
+  if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+    LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+    if (IV != LatticeFunc->getUntrackedVal())
+      UpdateState(PN, IV);
+    return;
+  }
+
   LatticeVal PNIV = getOrInitValueState(&PN);
   LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
   
@@ -283,7 +295,7 @@ void SparseSolver::Solve(Function &F) {
       Instruction *I = InstWorkList.back();
       InstWorkList.pop_back();
 
-      DOUT << "\nPopped off I-WL: " << *I;
+      DEBUG(errs() << "\nPopped off I-WL: " << *I << "\n");
 
       // "I" got into the work list because it made a transition.  See if any
       // users are both live and in need of updating.
@@ -300,7 +312,7 @@ void SparseSolver::Solve(Function &F) {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
 
-      DOUT << "\nPopped off BBWL: " << *BB;
+      DEBUG(errs() << "\nPopped off BBWL: " << *BB);
 
       // Notify all instructions in this basic block that they are newly
       // executable.
@@ -310,7 +322,7 @@ void SparseSolver::Solve(Function &F) {
   }
 }
 
-void SparseSolver::Print(Function &F, std::ostream &OS) const {
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
   OS << "\nFUNCTION: " << F.getNameStr() << "\n";
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (!BBExecutable.count(BB))
@@ -322,7 +334,7 @@ void SparseSolver::Print(Function &F, std::ostream &OS) const {
       OS << "; anon bb\n";
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
       LatticeFunc->PrintValue(getLatticeState(I), OS);
-      OS << *I;
+      OS << *I << "\n";
     }
     
     OS << "\n";
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 8f19fda953dd..c9b303b48b28 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Analysis/Trace.h"
 #include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 Function *Trace::getFunction() const {
@@ -31,9 +31,9 @@ Module *Trace::getModule() const {
 
 /// print - Write trace to output stream.
 ///
-void Trace::print(std::ostream &O) const {
-  Function *F = getFunction ();
-  O << "; Trace from function " << F->getName() << ", blocks:\n";
+void Trace::print(raw_ostream &O) const {
+  Function *F = getFunction();
+  O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
   for (const_iterator i = begin(), e = end(); i != e; ++i) {
     O << "; ";
     WriteAsOperand(O, *i, true, getModule());
@@ -46,5 +46,5 @@ void Trace::print(std::ostream &O) const {
 /// output stream.
 ///
 void Trace::dump() const {
-  print(cerr);
+  print(errs());
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 07a18fe4de42..baa347a6638f 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -16,25 +16,16 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include <cstring>
 using namespace llvm;
 
-/// getOpcode - If this is an Instruction or a ConstantExpr, return the
-/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getOpcode();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    return CE->getOpcode();
-  // Use UserOp1 to mean there's no opcode.
-  return Instruction::UserOp1;
-}
-
-
 /// ComputeMaskedBits - Determine which of the bits specified in Mask are
 /// known to be either zero or one and return them in the KnownZero/KnownOne
 /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
@@ -45,9 +36,15 @@ static unsigned getOpcode(const Value *V) {
 /// optimized based on the contradictory assumption that it is non-zero.
 /// Because instcombine aggressively folds operations with undef args anyway,
 /// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
 void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
                              APInt &KnownZero, APInt &KnownOne,
-                             TargetData *TD, unsigned Depth) {
+                             const TargetData *TD, unsigned Depth) {
   const unsigned MaxDepth = 6;
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
@@ -91,8 +88,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) 
-      Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+      const Type *ObjectType = GV->getType()->getElementType();
+      // If the object is defined in the current Module, we'll be giving
+      // it the preferred alignment. Otherwise, we have to assume that it
+      // may only have the minimum ABI alignment.
+      if (!GV->isDeclaration() && !GV->mayBeOverridden())
+        Align = TD->getPrefTypeAlignment(ObjectType);
+      else
+        Align = TD->getABITypeAlignment(ObjectType);
+    }
     if (Align > 0)
       KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
                                               CountTrailingZeros_32(Align));
@@ -101,17 +106,28 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     KnownOne.clear();
     return;
   }
+  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+  // the bits of its aliasee.
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (GA->mayBeOverridden()) {
+      KnownZero.clear(); KnownOne.clear();
+    } else {
+      ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
+                        TD, Depth+1);
+    }
+    return;
+  }
 
   KnownZero.clear(); KnownOne.clear();   // Start out not knowing anything.
 
   if (Depth == MaxDepth || Mask == 0)
     return;  // Limit search depth.
 
-  User *I = dyn_cast<User>(V);
+  Operator *I = dyn_cast<Operator>(V);
   if (!I) return;
 
   APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
-  switch (getOpcode(I)) {
+  switch (I->getOpcode()) {
   default: break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -228,12 +244,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // FALL THROUGH and handle them the same as zext/trunc.
   case Instruction::ZExt:
   case Instruction::Trunc: {
+    const Type *SrcTy = I->getOperand(0)->getType();
+    
+    unsigned SrcBitWidth;
     // Note that we handle pointer operands here because of inttoptr/ptrtoint
     // which fall through here.
-    const Type *SrcTy = I->getOperand(0)->getType();
-    unsigned SrcBitWidth = TD ?
-      TD->getTypeSizeInBits(SrcTy) :
-      SrcTy->getScalarSizeInBits();
+    if (isa<PointerType>(SrcTy))
+      SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
+    else
+      SrcBitWidth = SrcTy->getScalarSizeInBits();
+    
     APInt MaskIn(Mask);
     MaskIn.zextOrTrunc(SrcBitWidth);
     KnownZero.zextOrTrunc(SrcBitWidth);
@@ -261,8 +281,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
-    const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
-    unsigned SrcBitWidth = SrcTy->getBitWidth();
+    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
       
     APInt MaskIn(Mask); 
     MaskIn.trunc(SrcBitWidth);
@@ -382,7 +401,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Determine which operand has more trailing zeros, and use that
     // many bits from the other operand.
     if (LHSKnownZeroOut > RHSKnownZeroOut) {
-      if (getOpcode(I) == Instruction::Add) {
+      if (I->getOpcode() == Instruction::Add) {
         APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
         KnownZero |= KnownZero2 & Mask;
         KnownOne  |= KnownOne2 & Mask;
@@ -462,10 +481,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         Align = TD->getABITypeAlignment(AI->getType()->getElementType());
         Align =
           std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
+                   (unsigned)TD->getABITypeAlignment(
+                     Type::getDoubleTy(V->getContext())));
         Align =
           std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
+                   (unsigned)TD->getABITypeAlignment(
+                      Type::getInt64Ty(V->getContext())));
       }
     }
     
@@ -522,10 +543,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       for (unsigned i = 0; i != 2; ++i) {
         Value *L = P->getIncomingValue(i);
         Value *R = P->getIncomingValue(!i);
-        User *LU = dyn_cast<User>(L);
+        Operator *LU = dyn_cast<Operator>(L);
         if (!LU)
           continue;
-        unsigned Opcode = getOpcode(LU);
+        unsigned Opcode = LU->getOpcode();
         // Check for operations that have the property that if
         // both their operands have low zero bits, the result
         // will have low zero bits.
@@ -608,8 +629,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
 /// this predicate to simplify operations downstream.  Mask is known to be zero
 /// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
 bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
-                             TargetData *TD, unsigned Depth) {
+                             const TargetData *TD, unsigned Depth) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
   ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
@@ -626,7 +653,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
 ///
 /// 'Op' must have a scalar integer type.
 ///
-unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
+unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
+                                  unsigned Depth) {
   assert((TD || V->getType()->isIntOrIntVector()) &&
          "ComputeNumSignBits requires a TargetData object to operate "
          "on non-integer values!");
@@ -642,8 +670,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
   if (Depth == 6)
     return 1;  // Limit search depth.
   
-  User *U = dyn_cast<User>(V);
-  switch (getOpcode(V)) {
+  Operator *U = dyn_cast<Operator>(V);
+  switch (Operator::getOpcode(V)) {
   default: break;
   case Instruction::SExt:
     Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
@@ -789,7 +817,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (Depth == 6)
     return 1;  // Limit search depth.
 
-  const Instruction *I = dyn_cast<Instruction>(V);
+  const Operator *I = dyn_cast<Operator>(V);
   if (I == 0) return false;
   
   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
@@ -810,15 +838,15 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (const Function *F = CI->getCalledFunction()) {
       if (F->isDeclaration()) {
-        switch (F->getNameLen()) {
-        case 3:  // abs(x) != -0.0
-          if (!strcmp(F->getNameStart(), "abs")) return true;
-          break;
-        case 4:  // abs[lf](x) != -0.0
-          if (!strcmp(F->getNameStart(), "absf")) return true;
-          if (!strcmp(F->getNameStart(), "absl")) return true;
-          break;
-        }
+        // abs(x) != -0.0
+        if (F->getName() == "abs") return true;
+        // fabs[lf](x) != -0.0
+        if (F->getName() == "fabs") return true;
+        if (F->getName() == "fabsf") return true;
+        if (F->getName() == "fabsl") return true;
+        if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+            F->getName() == "sqrtl")
+          return CannotBeNegativeZero(CI->getOperand(1), Depth+1);
       }
     }
   
@@ -831,10 +859,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
 // indices from Idxs that should be left out when inserting into the resulting
 // struct. To is the result struct built so far, new insertvalue instructions
 // build on that.
-Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
-                                 SmallVector<unsigned, 10> &Idxs,
-                                 unsigned IdxSkip,
-                                 Instruction *InsertBefore) {
+static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+                                SmallVector<unsigned, 10> &Idxs,
+                                unsigned IdxSkip,
+                                LLVMContext &Context,
+                                Instruction *InsertBefore) {
   const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
   if (STy) {
     // Save the original To argument so we can modify it
@@ -845,7 +874,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
       Idxs.push_back(i);
       Value *PrevTo = To;
       To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
-                             InsertBefore);
+                             Context, InsertBefore);
       Idxs.pop_back();
       if (!To) {
         // Couldn't find any inserted value for this index? Cleanup
@@ -868,7 +897,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
   // we might be able to find the complete struct somewhere.
   
   // Find the value that is at that particular spot
-  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end(), Context);
 
   if (!V)
     return NULL;
@@ -890,8 +919,9 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
 // insertvalue instruction somewhere).
 //
 // All inserted insertvalue instructions are inserted before InsertBefore
-Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
-                         const unsigned *idx_end, Instruction *InsertBefore) {
+static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
+                                const unsigned *idx_end, LLVMContext &Context,
+                                Instruction *InsertBefore) {
   assert(InsertBefore && "Must have someplace to insert!");
   const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
                                                              idx_begin,
@@ -900,7 +930,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
   SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
   unsigned IdxSkip = Idxs.size();
 
-  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip,
+                           Context, InsertBefore);
 }
 
 /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
@@ -910,7 +941,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
 /// If InsertBefore is not null, this function will duplicate (modified)
 /// insertvalues when a part of a nested struct is extracted.
 Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
-                         const unsigned *idx_end, Instruction *InsertBefore) {
+                         const unsigned *idx_end, LLVMContext &Context,
+                         Instruction *InsertBefore) {
   // Nothing to index? Just return V then (this is useful at the end of our
   // recursion)
   if (idx_begin == idx_end)
@@ -921,20 +953,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
   assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
          && "Invalid indices for type?");
   const CompositeType *PTy = cast<CompositeType>(V->getType());
-  
+
   if (isa<UndefValue>(V))
     return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
                                                               idx_begin,
                                                               idx_end));
   else if (isa<ConstantAggregateZero>(V))
     return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
-                                                                     idx_begin,
-                                                                     idx_end));
+                                                                  idx_begin,
+                                                                  idx_end));
   else if (Constant *C = dyn_cast<Constant>(V)) {
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
       // Recursively process this constant
-      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, idx_end,
-                               InsertBefore);
+      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
+                               idx_end, Context, InsertBefore);
   } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
@@ -953,7 +985,8 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
           // %C = insertvalue {i32, i32 } %A, i32 11, 1
           // which allows the unused 0,0 element from the nested struct to be
           // removed.
-          return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+          return BuildSubAggregate(V, idx_begin, req_idx,
+                                   Context, InsertBefore);
         else
           // We can't handle this without inserting insertvalues
           return 0;
@@ -964,13 +997,13 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
       // looking for, then.
       if (*req_idx != *i)
         return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
-                                 InsertBefore);
+                                 Context, InsertBefore);
     }
     // If we end up here, the indices of the insertvalue match with those
     // requested (though possibly only partially). Now we recursively look at
     // the inserted value, passing any remaining indices.
     return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
-                             InsertBefore);
+                             Context, InsertBefore);
   } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
     // something else, we can extract from that something else directly instead.
@@ -994,7 +1027,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
            && "Number of indices added not correct?");
     
     return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
-                             InsertBefore);
+                             Context, InsertBefore);
   }
   // Otherwise, we don't know (such as, extracting from a function return value
   // or load instruction)
@@ -1035,7 +1068,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // Make sure the index-ee is a pointer to array of i8.
     const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || AT->getElementType() != Type::Int8Ty)
+    if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext()))
       return false;
     
     // Check to make sure that the first operand of the GEP is an integer and
@@ -1056,11 +1089,16 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
                                  StopAtNul);
   }
   
+  if (MDString *MDStr = dyn_cast<MDString>(V)) {
+    Str = MDStr->getString();
+    return true;
+  }
+
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
   GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
-  if (!GV || !GV->isConstant() || !GV->hasInitializer())
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
   Constant *GlobalInit = GV->getInitializer();
   
@@ -1074,7 +1112,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 || Array->getType()->getElementType() != Type::Int8Ty)
+  if (Array == 0 ||
+      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
     return false;
   
   // Get the number of elements in the array
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index bb5726293fe4..00778d998344 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -31,7 +31,7 @@ ArchiveMember::getMemberSize() const {
 
   // If it has a long filename, include the name length
   if (hasLongFilename())
-    result += path.toString().length() + 1;
+    result += path.str().length() + 1;
 
   // If its now odd lengthed, include the padding byte
   if (result % 2 != 0 )
@@ -76,38 +76,38 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
   path = newFile;
 
   // SVR4 symbol tables have an empty name
-  if (path.toString() == ARFILE_SVR4_SYMTAB_NAME)
+  if (path.str() == ARFILE_SVR4_SYMTAB_NAME)
     flags |= SVR4SymbolTableFlag;
   else
     flags &= ~SVR4SymbolTableFlag;
 
   // BSD4.4 symbol tables have a special name
-  if (path.toString() == ARFILE_BSD4_SYMTAB_NAME)
+  if (path.str() == ARFILE_BSD4_SYMTAB_NAME)
     flags |= BSD4SymbolTableFlag;
   else
     flags &= ~BSD4SymbolTableFlag;
 
   // LLVM symbol tables have a very specific name
-  if (path.toString() == ARFILE_LLVM_SYMTAB_NAME)
+  if (path.str() == ARFILE_LLVM_SYMTAB_NAME)
     flags |= LLVMSymbolTableFlag;
   else
     flags &= ~LLVMSymbolTableFlag;
 
   // String table name
-  if (path.toString() == ARFILE_STRTAB_NAME)
+  if (path.str() == ARFILE_STRTAB_NAME)
     flags |= StringTableFlag;
   else
     flags &= ~StringTableFlag;
 
   // If it has a slash then it has a path
-  bool hasSlash = path.toString().find('/') != std::string::npos;
+  bool hasSlash = path.str().find('/') != std::string::npos;
   if (hasSlash)
     flags |= HasPathFlag;
   else
     flags &= ~HasPathFlag;
 
   // If it has a slash or its over 15 chars then its a long filename format
-  if (hasSlash || path.toString().length() > 15)
+  if (hasSlash || path.str().length() > 15)
     flags |= HasLongFilenameFlag;
   else
     flags &= ~HasLongFilenameFlag;
@@ -126,8 +126,11 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
       return true;
   }
 
-  // Determine what kind of file it is
+  // Determine what kind of file it is.
   switch (sys::IdentifyFileType(signature,4)) {
+    case sys::Bitcode_FileType:
+      flags |= BitcodeFlag;
+      break;
     default:
       flags &= ~BitcodeFlag;
       break;
@@ -214,7 +217,7 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
   std::auto_ptr<MemoryBuffer> Buffer(
                        MemoryBuffer::getFileOrSTDIN(fName.c_str()));
   if (!Buffer.get()) {
-    if (ErrMsg) *ErrMsg = "Could not open file '" + fName.toString() + "'";
+    if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'";
     return true;
   }
   
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 718d44608b1d..74895d8a6f11 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -344,8 +344,8 @@ Archive::getAllModules(std::vector<Module*>& Modules,
 
   for (iterator I=begin(), E=end(); I != E; ++I) {
     if (I->isBitcode()) {
-      std::string FullMemberName = archPath.toString() +
-        "(" + I->getPath().toString() + ")";
+      std::string FullMemberName = archPath.str() +
+        "(" + I->getPath().str() + ")";
       MemoryBuffer *Buffer =
         MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
       memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
@@ -484,8 +484,8 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
     return 0;
 
   // Now, load the bitcode module to get the ModuleProvider
-  std::string FullMemberName = archPath.toString() + "(" +
-    mbr->getPath().toString() + ")";
+  std::string FullMemberName = archPath.str() + "(" +
+    mbr->getPath().str() + ")";
   MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(),
                                                       FullMemberName.c_str());
   memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize());
@@ -534,8 +534,8 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
       if (mbr->isBitcode()) {
         // Get the symbols
         std::vector<std::string> symbols;
-        std::string FullMemberName = archPath.toString() + "(" +
-          mbr->getPath().toString() + ")";
+        std::string FullMemberName = archPath.str() + "(" +
+          mbr->getPath().str() + ")";
         ModuleProvider* MP = 
           GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
                             FullMemberName, Context, symbols, error);
@@ -552,7 +552,7 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
         } else {
           if (error)
             *error = "Can't parse bitcode member: " + 
-              mbr->getPath().toString() + ": " + *error;
+              mbr->getPath().str() + ": " + *error;
           delete mbr;
           return false;
         }
@@ -612,7 +612,7 @@ bool Archive::isBitcodeArchive() {
       continue;
     
     std::string FullMemberName = 
-      archPath.toString() + "(" + I->getPath().toString() + ")";
+      archPath.str() + "(" + I->getPath().str() + ")";
 
     MemoryBuffer *Buffer =
       MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 881d75b3ba8f..d17f6b5036f3 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -95,7 +95,7 @@ Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
   memcpy(hdr.date,buffer,12);
 
   // Get rid of trailing blanks in the name
-  std::string mbrPath = mbr.getPath().toString();
+  std::string mbrPath = mbr.getPath().str();
   size_t mbrLen = mbrPath.length();
   while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
     mbrPath.erase(mbrLen-1,1);
@@ -173,10 +173,10 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
   mbr->info = *FSInfo;
 
   unsigned flags = 0;
-  bool hasSlash = filePath.toString().find('/') != std::string::npos;
+  bool hasSlash = filePath.str().find('/') != std::string::npos;
   if (hasSlash)
     flags |= ArchiveMember::HasPathFlag;
-  if (hasSlash || filePath.toString().length() > 15)
+  if (hasSlash || filePath.str().length() > 15)
     flags |= ArchiveMember::HasLongFilenameFlag;
   std::string magic;
   mbr->path.getMagicNumber(magic,4);
@@ -223,8 +223,7 @@ Archive::writeMember(
   // symbol table if its a bitcode file.
   if (CreateSymbolTable && member.isBitcode()) {
     std::vector<std::string> symbols;
-    std::string FullMemberName = archPath.toString() + "(" +
-      member.getPath().toString()
+    std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
       + ")";
     ModuleProvider* MP = 
       GetBitcodeSymbols((const unsigned char*)data,fSize,
@@ -249,7 +248,7 @@ Archive::writeMember(
     } else {
       delete mFile;
       if (ErrMsg)
-        *ErrMsg = "Can't parse bitcode member: " + member.getPath().toString()
+        *ErrMsg = "Can't parse bitcode member: " + member.getPath().str()
           + ": " + *ErrMsg;
       return true;
     }
@@ -266,8 +265,8 @@ Archive::writeMember(
 
   // Write the long filename if its long
   if (writeLongName) {
-    ARFile.write(member.getPath().toString().data(),
-                 member.getPath().toString().length());
+    ARFile.write(member.getPath().str().data(),
+                 member.getPath().str().length());
   }
 
   // Write the (possibly compressed) member's content to the file.
@@ -371,7 +370,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     if (TmpArchive.exists())
       TmpArchive.eraseFromDisk();
     if (ErrMsg)
-      *ErrMsg = "Error opening archive file: " + archPath.toString();
+      *ErrMsg = "Error opening archive file: " + archPath.str();
     return true;
   }
 
@@ -425,7 +424,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
       if (TmpArchive.exists())
         TmpArchive.eraseFromDisk();
       if (ErrMsg)
-        *ErrMsg = "Error opening archive file: " + FinalFilePath.toString();
+        *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
       return true;
     }
 
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 741c5381fc78..0e9f1a05fe3e 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -14,11 +14,14 @@
 #include "LLLexer.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instruction.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Assembly/Parser.h"
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 using namespace llvm;
@@ -180,8 +183,9 @@ static const char *isLabelTail(const char *CurPtr) {
 // Lexer definition.
 //===----------------------------------------------------------------------===//
 
-LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err)
-  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), APFloatVal(0.0) {
+LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+                 LLVMContext &C)
+  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
   CurPtr = CurBuf->getBufferStart();
 }
 
@@ -250,7 +254,7 @@ lltok::Kind LLLexer::LexToken() {
   case ';':
     SkipLineComment();
     return LexToken();
-  case '!': return lltok::Metadata;
+  case '!': return LexMetadata();
   case '0': case '1': case '2': case '3': case '4':
   case '5': case '6': case '7': case '8': case '9':
   case '-':
@@ -418,7 +422,23 @@ static bool JustWhitespaceNewLine(const char *&Ptr) {
   return false;
 }
 
+/// LexMetadata:
+///    !{...}
+///    !42
+///    !foo
+lltok::Kind LLLexer::LexMetadata() {
+  if (isalpha(CurPtr[0])) {
+    ++CurPtr;
+    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+           CurPtr[0] == '.' || CurPtr[0] == '_')
+      ++CurPtr;
 
+    StrVal.assign(TokStart+1, CurPtr);   // Skip !
+    return lltok::NamedOrCustomMD;
+  }
+  return lltok::Metadata;
+}
+  
 /// LexIdentifier: Handle several related productions:
 ///    Label           [-a-zA-Z$._0-9]+:
 ///    IntegerType     i[0-9]+
@@ -452,7 +472,7 @@ lltok::Kind LLLexer::LexIdentifier() {
       Error("bitwidth for integer type out of range!");
       return lltok::Error;
     }
-    TyVal = IntegerType::get(NumBits);
+    TyVal = IntegerType::get(Context, NumBits);
     return lltok::Type;
   }
 
@@ -471,6 +491,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(global);  KEYWORD(constant);
 
   KEYWORD(private);
+  KEYWORD(linker_private);
   KEYWORD(internal);
   KEYWORD(available_externally);
   KEYWORD(linkonce);
@@ -497,6 +518,10 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(deplibs);
   KEYWORD(datalayout);
   KEYWORD(volatile);
+  KEYWORD(nuw);
+  KEYWORD(nsw);
+  KEYWORD(exact);
+  KEYWORD(inbounds);
   KEYWORD(align);
   KEYWORD(addrspace);
   KEYWORD(section);
@@ -504,6 +529,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(module);
   KEYWORD(asm);
   KEYWORD(sideeffect);
+  KEYWORD(msasm);
   KEYWORD(gc);
 
   KEYWORD(ccc);
@@ -531,6 +557,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(readnone);
   KEYWORD(readonly);
 
+  KEYWORD(inlinehint);
   KEYWORD(noinline);
   KEYWORD(alwaysinline);
   KEYWORD(optsize);
@@ -538,6 +565,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(sspreq);
   KEYWORD(noredzone);
   KEYWORD(noimplicitfloat);
+  KEYWORD(naked);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -554,14 +582,14 @@ lltok::Kind LLLexer::LexIdentifier() {
 #define TYPEKEYWORD(STR, LLVMTY) \
   if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
     TyVal = LLVMTY; return lltok::Type; }
-  TYPEKEYWORD("void",      Type::VoidTy);
-  TYPEKEYWORD("float",     Type::FloatTy);
-  TYPEKEYWORD("double",    Type::DoubleTy);
-  TYPEKEYWORD("x86_fp80",  Type::X86_FP80Ty);
-  TYPEKEYWORD("fp128",     Type::FP128Ty);
-  TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty);
-  TYPEKEYWORD("label",     Type::LabelTy);
-  TYPEKEYWORD("metadata",  Type::MetadataTy);
+  TYPEKEYWORD("void",      Type::getVoidTy(Context));
+  TYPEKEYWORD("float",     Type::getFloatTy(Context));
+  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
+  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
+  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
+  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
+  TYPEKEYWORD("label",     Type::getLabelTy(Context));
+  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
 #undef TYPEKEYWORD
 
   // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
@@ -589,7 +617,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
   INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
   INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
-  INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp);
 
   INSTKEYWORD(phi,         PHI);
   INSTKEYWORD(call,        Call);
@@ -635,7 +662,7 @@ lltok::Kind LLLexer::LexIdentifier() {
       TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
     int len = CurPtr-TokStart-3;
     uint32_t bits = len * 4;
-    APInt Tmp(bits, TokStart+3, len, 16);
+    APInt Tmp(bits, StringRef(TokStart+3, len), 16);
     uint32_t activeBits = Tmp.getActiveBits();
     if (activeBits > 0 && activeBits < bits)
       Tmp.trunc(activeBits);
@@ -698,7 +725,7 @@ lltok::Kind LLLexer::Lex0x() {
 
   uint64_t Pair[2];
   switch (Kind) {
-  default: assert(0 && "Unknown kind!");
+  default: llvm_unreachable("Unknown kind!");
   case 'K':
     // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
     FP80HexToIntPair(TokStart+3, CurPtr, Pair);
@@ -761,7 +788,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
       return Lex0x();
     unsigned Len = CurPtr-TokStart;
     uint32_t numBits = ((Len * 64) / 19) + 2;
-    APInt Tmp(numBits, TokStart, Len, 10);
+    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
     if (TokStart[0] == '-') {
       uint32_t minBits = Tmp.getMinSignedBits();
       if (minBits > 0 && minBits < numBits)
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index b5e58f1418ec..de39272f45e2 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -24,12 +24,14 @@ namespace llvm {
   class MemoryBuffer;
   class Type;
   class SMDiagnostic;
+  class LLVMContext;
 
   class LLLexer {
     const char *CurPtr;
     MemoryBuffer *CurBuf;
     SMDiagnostic &ErrorInfo;
     SourceMgr &SM;
+    LLVMContext &Context;
 
     // Information about the current token.
     const char *TokStart;
@@ -42,7 +44,8 @@ namespace llvm {
 
     std::string TheError;
   public:
-    explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &);
+    explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
+                     LLVMContext &C);
     ~LLLexer() {}
 
     lltok::Kind Lex() {
@@ -72,6 +75,7 @@ namespace llvm {
     lltok::Kind LexDigitOrNegative();
     lltok::Kind LexPositive();
     lltok::Kind LexAt();
+    lltok::Kind LexMetadata();
     lltok::Kind LexPercent();
     lltok::Kind LexQuote();
     lltok::Kind Lex0x();
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 3966ab3b5fc6..09bc5f736fc6 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -19,11 +19,13 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/MDNode.h"
+#include "llvm/Metadata.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -40,15 +42,17 @@ namespace llvm {
       t_Null, t_Undef, t_Zero,    // No value.
       t_EmptyArray,               // No value:  []
       t_Constant,                 // Value in ConstantVal.
-      t_InlineAsm                 // Value in StrVal/StrVal2/UIntVal.
+      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
+      t_Metadata                  // Value in MetadataVal.
     } Kind;
-    
+
     LLParser::LocTy Loc;
     unsigned UIntVal;
     std::string StrVal, StrVal2;
     APSInt APSIntVal;
     APFloat APFloatVal;
     Constant *ConstantVal;
+    MetadataBase *MetadataVal;
     ValID() : APFloatVal(0.0) {}
   };
 }
@@ -73,21 +77,29 @@ bool LLParser::ValidateEndOfModule() {
     return Error(ForwardRefTypeIDs.begin()->second.second,
                  "use of undefined type '%" +
                  utostr(ForwardRefTypeIDs.begin()->first) + "'");
-  
+
   if (!ForwardRefVals.empty())
     return Error(ForwardRefVals.begin()->second.second,
                  "use of undefined value '@" + ForwardRefVals.begin()->first +
                  "'");
-  
+
   if (!ForwardRefValIDs.empty())
     return Error(ForwardRefValIDs.begin()->second.second,
                  "use of undefined value '@" +
                  utostr(ForwardRefValIDs.begin()->first) + "'");
-  
+
+  if (!ForwardRefMDNodes.empty())
+    return Error(ForwardRefMDNodes.begin()->second.second,
+                 "use of undefined metadata '!" +
+                 utostr(ForwardRefMDNodes.begin()->first) + "'");
+
+
   // Look for intrinsic functions and CallInst that need to be upgraded
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
     UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
-  
+
+  // Check debug info intrinsics.
+  CheckDebugInfoIntrinsics(M);
   return false;
 }
 
@@ -107,27 +119,31 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_target:  if (ParseTargetDefinition()) return true; break;
     case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
     case lltok::kw_type:    if (ParseUnnamedType()) return true; break;
+    case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
     case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
     case lltok::LocalVar:   if (ParseNamedType()) return true; break;
+    case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
     case lltok::Metadata:   if (ParseStandaloneMetadata()) return true; break;
+    case lltok::NamedOrCustomMD: if (ParseNamedMetadata()) return true; break;
 
     // The Global variable production with no name can have many different
     // optional leading prefixes, the production is:
     // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
     //               OptionalAddrSpace ('constant'|'global') ...
-    case lltok::kw_private:       // OptionalLinkage
-    case lltok::kw_internal:      // OptionalLinkage
-    case lltok::kw_weak:          // OptionalLinkage
-    case lltok::kw_weak_odr:      // OptionalLinkage
-    case lltok::kw_linkonce:      // OptionalLinkage
-    case lltok::kw_linkonce_odr:  // OptionalLinkage
-    case lltok::kw_appending:     // OptionalLinkage
-    case lltok::kw_dllexport:     // OptionalLinkage
-    case lltok::kw_common:        // OptionalLinkage
-    case lltok::kw_dllimport:     // OptionalLinkage
-    case lltok::kw_extern_weak:   // OptionalLinkage
-    case lltok::kw_external: {    // OptionalLinkage
+    case lltok::kw_private :       // OptionalLinkage
+    case lltok::kw_linker_private: // OptionalLinkage
+    case lltok::kw_internal:       // OptionalLinkage
+    case lltok::kw_weak:           // OptionalLinkage
+    case lltok::kw_weak_odr:       // OptionalLinkage
+    case lltok::kw_linkonce:       // OptionalLinkage
+    case lltok::kw_linkonce_odr:   // OptionalLinkage
+    case lltok::kw_appending:      // OptionalLinkage
+    case lltok::kw_dllexport:      // OptionalLinkage
+    case lltok::kw_common:         // OptionalLinkage
+    case lltok::kw_dllimport:      // OptionalLinkage
+    case lltok::kw_extern_weak:    // OptionalLinkage
+    case lltok::kw_external: {     // OptionalLinkage
       unsigned Linkage, Visibility;
       if (ParseOptionalLinkage(Linkage) ||
           ParseOptionalVisibility(Visibility) ||
@@ -144,7 +160,7 @@ bool LLParser::ParseTopLevelEntities() {
         return true;
       break;
     }
-        
+
     case lltok::kw_thread_local:  // OptionalThreadLocal
     case lltok::kw_addrspace:     // OptionalAddrSpace
     case lltok::kw_constant:      // GlobalType
@@ -161,11 +177,11 @@ bool LLParser::ParseTopLevelEntities() {
 bool LLParser::ParseModuleAsm() {
   assert(Lex.getKind() == lltok::kw_module);
   Lex.Lex();
-  
-  std::string AsmStr; 
+
+  std::string AsmStr;
   if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
       ParseStringConstant(AsmStr)) return true;
-  
+
   const std::string &AsmSoFar = M->getModuleInlineAsm();
   if (AsmSoFar.empty())
     M->setModuleInlineAsm(AsmStr);
@@ -211,7 +227,7 @@ bool LLParser::ParseDepLibs() {
 
   if (EatIfPresent(lltok::rsquare))
     return false;
-  
+
   std::string Str;
   if (ParseStringConstant(Str)) return true;
   M->addLibrary(Str);
@@ -224,32 +240,44 @@ bool LLParser::ParseDepLibs() {
   return ParseToken(lltok::rsquare, "expected ']' at end of list");
 }
 
-/// toplevelentity
+/// ParseUnnamedType:
 ///   ::= 'type' type
+///   ::= LocalVarID '=' 'type' type
 bool LLParser::ParseUnnamedType() {
+  unsigned TypeID = NumberedTypes.size();
+
+  // Handle the LocalVarID form.
+  if (Lex.getKind() == lltok::LocalVarID) {
+    if (Lex.getUIntVal() != TypeID)
+      return Error(Lex.getLoc(), "type expected to be numbered '%" +
+                   utostr(TypeID) + "'");
+    Lex.Lex(); // eat LocalVarID;
+
+    if (ParseToken(lltok::equal, "expected '=' after name"))
+      return true;
+  }
+
   assert(Lex.getKind() == lltok::kw_type);
   LocTy TypeLoc = Lex.getLoc();
   Lex.Lex(); // eat kw_type
 
-  PATypeHolder Ty(Type::VoidTy);
+  PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseType(Ty)) return true;
- 
-  unsigned TypeID = NumberedTypes.size();
-  
+
   // See if this type was previously referenced.
   std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
     FI = ForwardRefTypeIDs.find(TypeID);
   if (FI != ForwardRefTypeIDs.end()) {
     if (FI->second.first.get() == Ty)
       return Error(TypeLoc, "self referential type is invalid");
-    
+
     cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
     Ty = FI->second.first.get();
     ForwardRefTypeIDs.erase(FI);
   }
-  
+
   NumberedTypes.push_back(Ty);
-  
+
   return false;
 }
 
@@ -259,14 +287,14 @@ bool LLParser::ParseNamedType() {
   std::string Name = Lex.getStrVal();
   LocTy NameLoc = Lex.getLoc();
   Lex.Lex();  // eat LocalVar.
-  
-  PATypeHolder Ty(Type::VoidTy);
-  
+
+  PATypeHolder Ty(Type::getVoidTy(Context));
+
   if (ParseToken(lltok::equal, "expected '=' after name") ||
       ParseToken(lltok::kw_type, "expected 'type' after name") ||
       ParseType(Ty))
     return true;
-  
+
   // Set the type name, checking for conflicts as we do so.
   bool AlreadyExists = M->addTypeName(Name, Ty);
   if (!AlreadyExists) return false;
@@ -283,16 +311,16 @@ bool LLParser::ParseNamedType() {
     Ty = FI->second.first.get();
     ForwardRefTypes.erase(FI);
   }
-  
+
   // Inserting a name that is already defined, get the existing name.
   const Type *Existing = M->getTypeByName(Name);
   assert(Existing && "Conflict but no matching type?!");
-    
+
   // Otherwise, this is an attempt to redefine a type. That's okay if
   // the redefinition is identical to the original.
   // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0
   if (Existing == Ty) return false;
-  
+
   // Any other kind of (non-equivalent) redefinition is an error.
   return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" +
                Ty->getDescription() + "'");
@@ -304,7 +332,7 @@ bool LLParser::ParseNamedType() {
 bool LLParser::ParseDeclare() {
   assert(Lex.getKind() == lltok::kw_declare);
   Lex.Lex();
-  
+
   Function *F;
   return ParseFunctionHeader(F, false);
 }
@@ -314,7 +342,7 @@ bool LLParser::ParseDeclare() {
 bool LLParser::ParseDefine() {
   assert(Lex.getKind() == lltok::kw_define);
   Lex.Lex();
-  
+
   Function *F;
   return ParseFunctionHeader(F, true) ||
          ParseFunctionBody(*F);
@@ -336,6 +364,38 @@ bool LLParser::ParseGlobalType(bool &IsConstant) {
   return false;
 }
 
+/// ParseUnnamedGlobal:
+///   OptionalVisibility ALIAS ...
+///   OptionalLinkage OptionalVisibility ...   -> global variable
+///   GlobalID '=' OptionalVisibility ALIAS ...
+///   GlobalID '=' OptionalLinkage OptionalVisibility ...   -> global variable
+bool LLParser::ParseUnnamedGlobal() {
+  unsigned VarID = NumberedVals.size();
+  std::string Name;
+  LocTy NameLoc = Lex.getLoc();
+
+  // Handle the GlobalID form.
+  if (Lex.getKind() == lltok::GlobalID) {
+    if (Lex.getUIntVal() != VarID)
+      return Error(Lex.getLoc(), "variable expected to be numbered '%" +
+                   utostr(VarID) + "'");
+    Lex.Lex(); // eat GlobalID;
+
+    if (ParseToken(lltok::equal, "expected '=' after name"))
+      return true;
+  }
+
+  bool HasLinkage;
+  unsigned Linkage, Visibility;
+  if (ParseOptionalLinkage(Linkage, HasLinkage) ||
+      ParseOptionalVisibility(Visibility))
+    return true;
+
+  if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+    return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+  return ParseAlias(Name, NameLoc, Visibility);
+}
+
 /// ParseNamedGlobal:
 ///   GlobalVar '=' OptionalVisibility ALIAS ...
 ///   GlobalVar '=' OptionalLinkage OptionalVisibility ...   -> global variable
@@ -344,21 +404,96 @@ bool LLParser::ParseNamedGlobal() {
   LocTy NameLoc = Lex.getLoc();
   std::string Name = Lex.getStrVal();
   Lex.Lex();
-  
+
   bool HasLinkage;
   unsigned Linkage, Visibility;
   if (ParseToken(lltok::equal, "expected '=' in global variable") ||
       ParseOptionalLinkage(Linkage, HasLinkage) ||
       ParseOptionalVisibility(Visibility))
     return true;
-  
+
   if (HasLinkage || Lex.getKind() != lltok::kw_alias)
     return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
   return ParseAlias(Name, NameLoc, Visibility);
 }
 
+// MDString:
+//   ::= '!' STRINGCONSTANT
+bool LLParser::ParseMDString(MetadataBase *&MDS) {
+  std::string Str;
+  if (ParseStringConstant(Str)) return true;
+  MDS = MDString::get(Context, Str);
+  return false;
+}
+
+// MDNode:
+//   ::= '!' MDNodeNumber
+bool LLParser::ParseMDNode(MetadataBase *&Node) {
+  // !{ ..., !42, ... }
+  unsigned MID = 0;
+  if (ParseUInt32(MID))  return true;
+
+  // Check existing MDNode.
+  std::map<unsigned, MetadataBase *>::iterator I = MetadataCache.find(MID);
+  if (I != MetadataCache.end()) {
+    Node = I->second;
+    return false;
+  }
+
+  // Check known forward references.
+  std::map<unsigned, std::pair<MetadataBase *, LocTy> >::iterator
+    FI = ForwardRefMDNodes.find(MID);
+  if (FI != ForwardRefMDNodes.end()) {
+    Node = FI->second.first;
+    return false;
+  }
+
+  // Create MDNode forward reference
+  SmallVector<Value *, 1> Elts;
+  std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID);
+  Elts.push_back(MDString::get(Context, FwdRefName));
+  MDNode *FwdNode = MDNode::get(Context, Elts.data(), Elts.size());
+  ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
+  Node = FwdNode;
+  return false;
+}
+
+///ParseNamedMetadata:
+///   !foo = !{ !1, !2 }
+bool LLParser::ParseNamedMetadata() {
+  assert(Lex.getKind() == lltok::NamedOrCustomMD);
+  Lex.Lex();
+  std::string Name = Lex.getStrVal();
+
+  if (ParseToken(lltok::equal, "expected '=' here"))
+    return true;
+
+  if (Lex.getKind() != lltok::Metadata)
+    return TokError("Expected '!' here");
+  Lex.Lex();
+
+  if (Lex.getKind() != lltok::lbrace)
+    return TokError("Expected '{' here");
+  Lex.Lex();
+  SmallVector<MetadataBase *, 8> Elts;
+  do {
+    if (Lex.getKind() != lltok::Metadata)
+      return TokError("Expected '!' here");
+    Lex.Lex();
+    MetadataBase *N = 0;
+    if (ParseMDNode(N)) return true;
+    Elts.push_back(N);
+  } while (EatIfPresent(lltok::comma));
+
+  if (ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  NamedMDNode::Create(Context, Name, Elts.data(), Elts.size(), M);
+  return false;
+}
+
 /// ParseStandaloneMetadata:
-///   !42 = !{...} 
+///   !42 = !{...}
 bool LLParser::ParseStandaloneMetadata() {
   assert(Lex.getKind() == lltok::Metadata);
   Lex.Lex();
@@ -371,17 +506,32 @@ bool LLParser::ParseStandaloneMetadata() {
     return true;
 
   LocTy TyLoc;
-  bool IsConstant;    
-  PATypeHolder Ty(Type::VoidTy);
-  if (ParseGlobalType(IsConstant) ||
-      ParseType(Ty, TyLoc))
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  if (ParseType(Ty, TyLoc))
     return true;
-  
-  Constant *Init = 0;
-  if (ParseGlobalValue(Ty, Init))
-      return true;
 
+  if (Lex.getKind() != lltok::Metadata)
+    return TokError("Expected metadata here");
+
+  Lex.Lex();
+  if (Lex.getKind() != lltok::lbrace)
+    return TokError("Expected '{' here");
+
+  SmallVector<Value *, 16> Elts;
+  if (ParseMDNodeVector(Elts)
+      || ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size());
   MetadataCache[MetadataID] = Init;
+  std::map<unsigned, std::pair<MetadataBase *, LocTy> >::iterator
+    FI = ForwardRefMDNodes.find(MetadataID);
+  if (FI != ForwardRefMDNodes.end()) {
+    MDNode *FwdNode = cast<MDNode>(FI->second.first);
+    FwdNode->replaceAllUsesWith(Init);
+    ForwardRefMDNodes.erase(FI);
+  }
+
   return false;
 }
 
@@ -390,7 +540,7 @@ bool LLParser::ParseStandaloneMetadata() {
 /// Aliasee
 ///   ::= TypeAndValue
 ///   ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
-///   ::= 'getelementptr' '(' ... ')'
+///   ::= 'getelementptr' 'inbounds'? '(' ... ')'
 ///
 /// Everything through visibility has already been parsed.
 ///
@@ -407,9 +557,10 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
       Linkage != GlobalValue::WeakAnyLinkage &&
       Linkage != GlobalValue::WeakODRLinkage &&
       Linkage != GlobalValue::InternalLinkage &&
-      Linkage != GlobalValue::PrivateLinkage)
+      Linkage != GlobalValue::PrivateLinkage &&
+      Linkage != GlobalValue::LinkerPrivateLinkage)
     return Error(LinkageLoc, "invalid linkage type for alias");
-  
+
   Constant *Aliasee;
   LocTy AliaseeLoc = Lex.getLoc();
   if (Lex.getKind() != lltok::kw_bitcast &&
@@ -423,7 +574,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
       return Error(AliaseeLoc, "invalid aliasee");
     Aliasee = ID.ConstantVal;
   }
-  
+
   if (!isa<PointerType>(Aliasee->getType()))
     return Error(AliaseeLoc, "alias must have pointer type");
 
@@ -432,7 +583,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
                                     (GlobalValue::LinkageTypes)Linkage, Name,
                                     Aliasee);
   GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
-  
+
   // See if this value already exists in the symbol table.  If so, it is either
   // a redefinition or a definition of a forward reference.
   if (GlobalValue *Val =
@@ -449,18 +600,18 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
     if (Val->getType() != GA->getType())
       return Error(NameLoc,
               "forward reference and definition of alias have different types");
-    
+
     // If they agree, just RAUW the old value with the alias and remove the
     // forward ref info.
     Val->replaceAllUsesWith(GA);
     Val->eraseFromParent();
     ForwardRefVals.erase(I);
   }
-  
+
   // Insert into the module, we know its name won't collide now.
   M->getAliasList().push_back(GA);
   assert(GA->getNameStr() == Name && "Should not be a name conflict!");
-  
+
   return false;
 }
 
@@ -478,14 +629,14 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   unsigned AddrSpace;
   bool ThreadLocal, IsConstant;
   LocTy TyLoc;
-    
-  PATypeHolder Ty(Type::VoidTy);
+
+  PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
       ParseOptionalAddrSpace(AddrSpace) ||
       ParseGlobalType(IsConstant) ||
       ParseType(Ty, TyLoc))
     return true;
-  
+
   // If the linkage is specified and is external, then no initializer is
   // present.
   Constant *Init = 0;
@@ -496,9 +647,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       return true;
   }
 
-  if (isa<FunctionType>(Ty) || Ty == Type::LabelTy)
+  if (isa<FunctionType>(Ty) || Ty->isLabelTy())
     return Error(TyLoc, "invalid type for global variable");
-  
+
   GlobalVariable *GV = 0;
 
   // See if the global was forward referenced, if so, use the global.
@@ -516,20 +667,20 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   }
 
   if (GV == 0) {
-    GV = new GlobalVariable(Ty, false, GlobalValue::ExternalLinkage, 0, Name,
-                            M, false, AddrSpace);
+    GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0,
+                            Name, 0, false, AddrSpace);
   } else {
     if (GV->getType()->getElementType() != Ty)
       return Error(TyLoc,
             "forward reference and definition of global have different types");
-    
+
     // Move the forward-reference to the correct spot in the module.
     M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV);
   }
 
   if (Name.empty())
     NumberedVals.push_back(GV);
-  
+
   // Set the parsed properties on the global.
   if (Init)
     GV->setInitializer(Init);
@@ -537,11 +688,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
   GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
   GV->setThreadLocal(ThreadLocal);
-  
+
   // Parse attributes on the global.
   while (Lex.getKind() == lltok::comma) {
     Lex.Lex();
-    
+
     if (Lex.getKind() == lltok::kw_section) {
       Lex.Lex();
       GV->setSection(Lex.getStrVal());
@@ -555,7 +706,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       TokError("unknown global variable property!");
     }
   }
-  
+
   return false;
 }
 
@@ -574,11 +725,11 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
     Error(Loc, "global variable reference must have pointer type");
     return 0;
   }
-  
+
   // Look this name up in the normal function symbol table.
   GlobalValue *Val =
     cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name));
-  
+
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (Val == 0) {
@@ -587,7 +738,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
     if (I != ForwardRefVals.end())
       Val = I->second.first;
   }
-  
+
   // If we have the value in the symbol table or fwd-ref table, return it.
   if (Val) {
     if (Val->getType() == Ty) return Val;
@@ -595,7 +746,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
           Val->getType()->getDescription() + "'");
     return 0;
   }
-  
+
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal;
   if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
@@ -604,13 +755,13 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
       Error(Loc, "function may not return opaque type");
       return 0;
     }
-    
+
     FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
   } else {
-    FwdVal = new GlobalVariable(PTy->getElementType(), false,
-                                GlobalValue::ExternalWeakLinkage, 0, Name, M);
+    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+                                GlobalValue::ExternalWeakLinkage, 0, Name);
   }
-  
+
   ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
@@ -621,9 +772,9 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
     Error(Loc, "global variable reference must have pointer type");
     return 0;
   }
-  
+
   GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
-  
+
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (Val == 0) {
@@ -632,7 +783,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
     if (I != ForwardRefValIDs.end())
       Val = I->second.first;
   }
-  
+
   // If we have the value in the symbol table or fwd-ref table, return it.
   if (Val) {
     if (Val->getType() == Ty) return Val;
@@ -640,7 +791,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
           Val->getType()->getDescription() + "'");
     return 0;
   }
-  
+
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal;
   if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
@@ -651,10 +802,10 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
     }
     FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
   } else {
-    FwdVal = new GlobalVariable(PTy->getElementType(), false,
-                                GlobalValue::ExternalWeakLinkage, 0, "", M);
+    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+                                GlobalValue::ExternalWeakLinkage, 0, "");
   }
-  
+
   ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
@@ -707,7 +858,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
   return ParseToken(lltok::lparen, "expected '(' in address space") ||
          ParseUInt32(AddrSpace) ||
          ParseToken(lltok::rparen, "expected ')' in address space");
-}  
+}
 
 /// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
 /// indicates what kind of attribute list this is: 0: function arg, 1: result,
@@ -716,7 +867,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
 bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
   Attrs = Attribute::None;
   LocTy AttrLoc = Lex.getLoc();
-  
+
   while (1) {
     switch (Lex.getKind()) {
     case lltok::kw_sext:
@@ -737,10 +888,10 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     default:  // End of attributes.
       if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
         return Error(AttrLoc, "invalid use of function-only attribute");
-        
+
       if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly))
         return Error(AttrLoc, "invalid use of parameter-only attribute");
-        
+
       return false;
     case lltok::kw_zeroext:         Attrs |= Attribute::ZExt; break;
     case lltok::kw_signext:         Attrs |= Attribute::SExt; break;
@@ -756,13 +907,15 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noinline:        Attrs |= Attribute::NoInline; break;
     case lltok::kw_readnone:        Attrs |= Attribute::ReadNone; break;
     case lltok::kw_readonly:        Attrs |= Attribute::ReadOnly; break;
+    case lltok::kw_inlinehint:      Attrs |= Attribute::InlineHint; break;
     case lltok::kw_alwaysinline:    Attrs |= Attribute::AlwaysInline; break;
     case lltok::kw_optsize:         Attrs |= Attribute::OptimizeForSize; break;
     case lltok::kw_ssp:             Attrs |= Attribute::StackProtect; break;
     case lltok::kw_sspreq:          Attrs |= Attribute::StackProtectReq; break;
     case lltok::kw_noredzone:       Attrs |= Attribute::NoRedZone; break;
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
-        
+    case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
+
     case lltok::kw_align: {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
@@ -778,6 +931,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
 /// ParseOptionalLinkage
 ///   ::= /*empty*/
 ///   ::= 'private'
+///   ::= 'linker_private'
 ///   ::= 'internal'
 ///   ::= 'weak'
 ///   ::= 'weak_odr'
@@ -792,22 +946,23 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
 bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
   HasLinkage = false;
   switch (Lex.getKind()) {
-  default:                     Res = GlobalValue::ExternalLinkage; return false;
-  case lltok::kw_private:      Res = GlobalValue::PrivateLinkage; break;
-  case lltok::kw_internal:     Res = GlobalValue::InternalLinkage; break;
-  case lltok::kw_weak:         Res = GlobalValue::WeakAnyLinkage; break;
-  case lltok::kw_weak_odr:     Res = GlobalValue::WeakODRLinkage; break;
-  case lltok::kw_linkonce:     Res = GlobalValue::LinkOnceAnyLinkage; break;
-  case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break;
+  default:                       Res=GlobalValue::ExternalLinkage; return false;
+  case lltok::kw_private:        Res = GlobalValue::PrivateLinkage;       break;
+  case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break;
+  case lltok::kw_internal:       Res = GlobalValue::InternalLinkage;      break;
+  case lltok::kw_weak:           Res = GlobalValue::WeakAnyLinkage;       break;
+  case lltok::kw_weak_odr:       Res = GlobalValue::WeakODRLinkage;       break;
+  case lltok::kw_linkonce:       Res = GlobalValue::LinkOnceAnyLinkage;   break;
+  case lltok::kw_linkonce_odr:   Res = GlobalValue::LinkOnceODRLinkage;   break;
   case lltok::kw_available_externally:
     Res = GlobalValue::AvailableExternallyLinkage;
     break;
-  case lltok::kw_appending:    Res = GlobalValue::AppendingLinkage; break;
-  case lltok::kw_dllexport:    Res = GlobalValue::DLLExportLinkage; break;
-  case lltok::kw_common:       Res = GlobalValue::CommonLinkage; break;
-  case lltok::kw_dllimport:    Res = GlobalValue::DLLImportLinkage; break;
-  case lltok::kw_extern_weak:  Res = GlobalValue::ExternalWeakLinkage; break;
-  case lltok::kw_external:     Res = GlobalValue::ExternalLinkage; break;
+  case lltok::kw_appending:      Res = GlobalValue::AppendingLinkage;     break;
+  case lltok::kw_dllexport:      Res = GlobalValue::DLLExportLinkage;     break;
+  case lltok::kw_common:         Res = GlobalValue::CommonLinkage;        break;
+  case lltok::kw_dllimport:      Res = GlobalValue::DLLImportLinkage;     break;
+  case lltok::kw_extern_weak:    Res = GlobalValue::ExternalWeakLinkage;  break;
+  case lltok::kw_external:       Res = GlobalValue::ExternalLinkage;      break;
   }
   Lex.Lex();
   HasLinkage = true;
@@ -819,7 +974,7 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
 ///   ::= 'default'
 ///   ::= 'hidden'
 ///   ::= 'protected'
-/// 
+///
 bool LLParser::ParseOptionalVisibility(unsigned &Res) {
   switch (Lex.getKind()) {
   default:                  Res = GlobalValue::DefaultVisibility; return false;
@@ -843,7 +998,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
 ///   ::= 'arm_aapcs_vfpcc'
 ///   ::= 'cc' UINT
 ///
-bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
+bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
   switch (Lex.getKind()) {
   default:                       CC = CallingConv::C; return false;
   case lltok::kw_ccc:            CC = CallingConv::C; break;
@@ -854,9 +1009,47 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
   case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
   case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
-  case lltok::kw_cc:             Lex.Lex(); return ParseUInt32(CC);
+  case lltok::kw_cc: {
+      unsigned ArbitraryCC;
+      Lex.Lex();
+      if (ParseUInt32(ArbitraryCC)) {
+        return true;
+      } else
+        CC = static_cast<CallingConv::ID>(ArbitraryCC);
+        return false;
+    }
+    break;
   }
+
+  Lex.Lex();
+  return false;
+}
+
+/// ParseOptionalCustomMetadata
+///   ::= /* empty */
+///   ::= !dbg !42
+bool LLParser::ParseOptionalCustomMetadata() {
+
+  std::string Name;
+  if (Lex.getKind() == lltok::NamedOrCustomMD) {
+    Name = Lex.getStrVal();
+    Lex.Lex();
+  } else
+    return false;
+
+  if (Lex.getKind() != lltok::Metadata)
+    return TokError("Expected '!' here");
   Lex.Lex();
+
+  MetadataBase *Node;
+  if (ParseMDNode(Node)) return true;
+
+  MetadataContext &TheMetadata = M->getContext().getMetadata();
+  unsigned MDK = TheMetadata.getMDKind(Name.c_str());
+  if (!MDK)
+    MDK = TheMetadata.RegisterMDKind(Name.c_str());
+  MDsOnInst.push_back(std::make_pair(MDK, cast<MDNode>(Node)));
+
   return false;
 }
 
@@ -874,29 +1067,36 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
   return false;
 }
 
-/// ParseOptionalCommaAlignment
-///   ::= /* empty */
-///   ::= ',' 'align' 4
-bool LLParser::ParseOptionalCommaAlignment(unsigned &Alignment) {
-  Alignment = 0;
-  if (!EatIfPresent(lltok::comma))
-    return false;
-  return ParseToken(lltok::kw_align, "expected 'align'") ||
-         ParseUInt32(Alignment);
+/// ParseOptionalInfo
+///   ::= OptionalInfo (',' OptionalInfo)+
+bool LLParser::ParseOptionalInfo(unsigned &Alignment) {
+
+  // FIXME: Handle customized metadata info attached with an instruction.
+  do {
+      if (Lex.getKind() == lltok::NamedOrCustomMD) {
+      if (ParseOptionalCustomMetadata()) return true;
+    } else if (Lex.getKind() == lltok::kw_align) {
+      if (ParseOptionalAlignment(Alignment)) return true;
+    } else
+      return true;
+  } while (EatIfPresent(lltok::comma));
+
+  return false;
 }
 
+
 /// ParseIndexList
 ///    ::=  (',' uint32)+
 bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
   if (Lex.getKind() != lltok::comma)
     return TokError("expected ',' as start of index list");
-  
+
   while (EatIfPresent(lltok::comma)) {
     unsigned Idx;
     if (ParseUInt32(Idx)) return true;
     Indices.push_back(Idx);
   }
-  
+
   return false;
 }
 
@@ -908,14 +1108,14 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
 bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) {
   LocTy TypeLoc = Lex.getLoc();
   if (ParseTypeRec(Result)) return true;
-  
+
   // Verify no unresolved uprefs.
   if (!UpRefs.empty())
     return Error(UpRefs.back().Loc, "invalid unresolved type up reference");
-  
-  if (!AllowVoid && Result.get() == Type::VoidTy)
+
+  if (!AllowVoid && Result.get()->isVoidTy())
     return Error(TypeLoc, "void type only allowed for function results");
-  
+
   return false;
 }
 
@@ -930,26 +1130,26 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
   // If Ty isn't abstract, or if there are no up-references in it, then there is
   // nothing to resolve here.
   if (!ty->isAbstract() || UpRefs.empty()) return ty;
-  
+
   PATypeHolder Ty(ty);
 #if 0
   errs() << "Type '" << Ty->getDescription()
          << "' newly formed.  Resolving upreferences.\n"
          << UpRefs.size() << " upreferences active!\n";
 #endif
-  
+
   // If we find any resolvable upreferences (i.e., those whose NestingLevel goes
   // to zero), we resolve them all together before we resolve them to Ty.  At
   // the end of the loop, if there is anything to resolve to Ty, it will be in
   // this variable.
   OpaqueType *TypeToResolve = 0;
-  
+
   for (unsigned i = 0; i != UpRefs.size(); ++i) {
     // Determine if 'Ty' directly contains this up-references 'LastContainedTy'.
     bool ContainsType =
       std::find(Ty->subtype_begin(), Ty->subtype_end(),
                 UpRefs[i].LastContainedTy) != Ty->subtype_end();
-    
+
 #if 0
     errs() << "  UR#" << i << " - TypeContains(" << Ty->getDescription() << ", "
            << UpRefs[i].LastContainedTy->getDescription() << ") = "
@@ -958,15 +1158,15 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
 #endif
     if (!ContainsType)
       continue;
-    
+
     // Decrement level of upreference
     unsigned Level = --UpRefs[i].NestingLevel;
     UpRefs[i].LastContainedTy = Ty;
-    
+
     // If the Up-reference has a non-zero level, it shouldn't be resolved yet.
     if (Level != 0)
       continue;
-    
+
 #if 0
     errs() << "  * Resolving upreference for " << UpRefs[i].UpRefTy << "\n";
 #endif
@@ -977,10 +1177,10 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
     UpRefs.erase(UpRefs.begin()+i);     // Remove from upreference list.
     --i;                                // Do not skip the next element.
   }
-  
+
   if (TypeToResolve)
     TypeToResolve->refineAbstractTypeTo(Ty);
-  
+
   return Ty;
 }
 
@@ -994,11 +1194,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
   case lltok::Type:
     // TypeRec ::= 'float' | 'void' (etc)
     Result = Lex.getTyVal();
-    Lex.Lex(); 
+    Lex.Lex();
     break;
   case lltok::kw_opaque:
     // TypeRec ::= 'opaque'
-    Result = Context.getOpaqueType();
+    Result = OpaqueType::get(Context);
     Lex.Lex();
     break;
   case lltok::lbrace:
@@ -1028,7 +1228,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
       Result = T;
     } else {
-      Result = Context.getOpaqueType();
+      Result = OpaqueType::get(Context);
       ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(),
                                             std::make_pair(Result,
                                                            Lex.getLoc())));
@@ -1036,7 +1236,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     }
     Lex.Lex();
     break;
-      
+
   case lltok::LocalVarID:
     // TypeRec ::= %4
     if (Lex.getUIntVal() < NumberedTypes.size())
@@ -1047,7 +1247,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
       if (I != ForwardRefTypeIDs.end())
         Result = I->second.first;
       else {
-        Result = Context.getOpaqueType();
+        Result = OpaqueType::get(Context);
         ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(),
                                                 std::make_pair(Result,
                                                                Lex.getLoc())));
@@ -1060,36 +1260,36 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     Lex.Lex();
     unsigned Val;
     if (ParseUInt32(Val)) return true;
-    OpaqueType *OT = Context.getOpaqueType(); //Use temporary placeholder.
+    OpaqueType *OT = OpaqueType::get(Context); //Use temporary placeholder.
     UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT));
     Result = OT;
     break;
   }
   }
-  
-  // Parse the type suffixes. 
+
+  // Parse the type suffixes.
   while (1) {
     switch (Lex.getKind()) {
     // End of type.
-    default: return false;    
+    default: return false;
 
     // TypeRec ::= TypeRec '*'
     case lltok::star:
-      if (Result.get() == Type::LabelTy)
+      if (Result.get()->isLabelTy())
         return TokError("basic block pointers are invalid");
-      if (Result.get() == Type::VoidTy)
+      if (Result.get()->isVoidTy())
         return TokError("pointers to void are invalid; use i8* instead");
       if (!PointerType::isValidElementType(Result.get()))
         return TokError("pointer to this type is invalid");
-      Result = HandleUpRefs(Context.getPointerTypeUnqual(Result.get()));
+      Result = HandleUpRefs(PointerType::getUnqual(Result.get()));
       Lex.Lex();
       break;
 
     // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*'
     case lltok::kw_addrspace: {
-      if (Result.get() == Type::LabelTy)
+      if (Result.get()->isLabelTy())
         return TokError("basic block pointers are invalid");
-      if (Result.get() == Type::VoidTy)
+      if (Result.get()->isVoidTy())
         return TokError("pointers to void are invalid; use i8* instead");
       if (!PointerType::isValidElementType(Result.get()))
         return TokError("pointer to this type is invalid");
@@ -1098,10 +1298,10 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
           ParseToken(lltok::star, "expected '*' in address space"))
         return true;
 
-      Result = HandleUpRefs(Context.getPointerType(Result.get(), AddrSpace));
+      Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace));
       break;
     }
-        
+
     /// Types '(' ArgTypeListI ')' OptFuncAttrs
     case lltok::lparen:
       if (ParseFunctionType(Result))
@@ -1120,16 +1320,16 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
                                   PerFunctionState &PFS) {
   if (ParseToken(lltok::lparen, "expected '(' in call"))
     return true;
-  
+
   while (Lex.getKind() != lltok::rparen) {
     // If this isn't the first argument, we need a comma.
     if (!ArgList.empty() &&
         ParseToken(lltok::comma, "expected ',' in argument list"))
       return true;
-    
+
     // Parse the argument.
     LocTy ArgLoc;
-    PATypeHolder ArgTy(Type::VoidTy);
+    PATypeHolder ArgTy(Type::getVoidTy(Context));
     unsigned ArgAttrs1, ArgAttrs2;
     Value *V;
     if (ParseType(ArgTy, ArgLoc) ||
@@ -1162,7 +1362,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
   isVarArg = false;
   assert(Lex.getKind() == lltok::lparen);
   Lex.Lex(); // eat the (.
-  
+
   if (Lex.getKind() == lltok::rparen) {
     // empty
   } else if (Lex.getKind() == lltok::dotdotdot) {
@@ -1170,19 +1370,19 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
     Lex.Lex();
   } else {
     LocTy TypeLoc = Lex.getLoc();
-    PATypeHolder ArgTy(Type::VoidTy);
+    PATypeHolder ArgTy(Type::getVoidTy(Context));
     unsigned Attrs;
     std::string Name;
-    
+
     // If we're parsing a type, use ParseTypeRec, because we allow recursive
     // types (such as a function returning a pointer to itself).  If parsing a
     // function prototype, we require fully resolved types.
     if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
         ParseOptionalAttrs(Attrs, 0)) return true;
-    
-    if (ArgTy == Type::VoidTy)
+
+    if (ArgTy->isVoidTy())
       return Error(TypeLoc, "argument can not have void type");
-    
+
     if (Lex.getKind() == lltok::LocalVar ||
         Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
       Name = Lex.getStrVal();
@@ -1191,22 +1391,22 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
 
     if (!FunctionType::isValidArgumentType(ArgTy))
       return Error(TypeLoc, "invalid type for function argument");
-    
+
     ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
-    
+
     while (EatIfPresent(lltok::comma)) {
       // Handle ... at end of arg list.
       if (EatIfPresent(lltok::dotdotdot)) {
         isVarArg = true;
         break;
       }
-      
+
       // Otherwise must be an argument type.
       TypeLoc = Lex.getLoc();
       if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
           ParseOptionalAttrs(Attrs, 0)) return true;
 
-      if (ArgTy == Type::VoidTy)
+      if (ArgTy->isVoidTy())
         return Error(TypeLoc, "argument can not have void type");
 
       if (Lex.getKind() == lltok::LocalVar ||
@@ -1219,14 +1419,14 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
 
       if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy))
         return Error(TypeLoc, "invalid type for function argument");
-      
+
       ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
     }
   }
-  
+
   return ParseToken(lltok::rparen, "expected ')' at end of argument list");
 }
-  
+
 /// ParseFunctionType
 ///  ::= Type ArgumentList OptionalAttrs
 bool LLParser::ParseFunctionType(PATypeHolder &Result) {
@@ -1234,7 +1434,7 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) {
 
   if (!FunctionType::isValidReturnType(Result))
     return TokError("invalid function return type");
-  
+
   std::vector<ArgInfo> ArgList;
   bool isVarArg;
   unsigned Attrs;
@@ -1243,7 +1443,7 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) {
       // FIXME: Remove in LLVM 3.0
       ParseOptionalAttrs(Attrs, 2))
     return true;
-  
+
   // Reject names on the arguments lists.
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
@@ -1254,12 +1454,12 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) {
       // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0
     }
   }
-  
+
   std::vector<const Type*> ArgListTy;
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
     ArgListTy.push_back(ArgList[i].Type);
-    
-  Result = HandleUpRefs(Context.getFunctionType(Result.get(),
+
+  Result = HandleUpRefs(FunctionType::get(Result.get(),
                                                 ArgListTy, isVarArg));
   return false;
 }
@@ -1273,9 +1473,9 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) {
 bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
   assert(Lex.getKind() == lltok::lbrace);
   Lex.Lex(); // Consume the '{'
-  
+
   if (EatIfPresent(lltok::rbrace)) {
-    Result = Context.getStructType(Packed);
+    Result = StructType::get(Context, Packed);
     return false;
   }
 
@@ -1283,62 +1483,62 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
   LocTy EltTyLoc = Lex.getLoc();
   if (ParseTypeRec(Result)) return true;
   ParamsList.push_back(Result);
-  
-  if (Result == Type::VoidTy)
+
+  if (Result->isVoidTy())
     return Error(EltTyLoc, "struct element can not have void type");
   if (!StructType::isValidElementType(Result))
     return Error(EltTyLoc, "invalid element type for struct");
-  
+
   while (EatIfPresent(lltok::comma)) {
     EltTyLoc = Lex.getLoc();
     if (ParseTypeRec(Result)) return true;
-    
-    if (Result == Type::VoidTy)
+
+    if (Result->isVoidTy())
       return Error(EltTyLoc, "struct element can not have void type");
     if (!StructType::isValidElementType(Result))
       return Error(EltTyLoc, "invalid element type for struct");
-    
+
     ParamsList.push_back(Result);
   }
-  
+
   if (ParseToken(lltok::rbrace, "expected '}' at end of struct"))
     return true;
-  
+
   std::vector<const Type*> ParamsListTy;
   for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
     ParamsListTy.push_back(ParamsList[i].get());
-  Result = HandleUpRefs(Context.getStructType(ParamsListTy, Packed));
+  Result = HandleUpRefs(StructType::get(Context, ParamsListTy, Packed));
   return false;
 }
 
 /// ParseArrayVectorType - Parse an array or vector type, assuming the first
 /// token has already been consumed.
-///   TypeRec 
+///   TypeRec
 ///     ::= '[' APSINTVAL 'x' Types ']'
 ///     ::= '<' APSINTVAL 'x' Types '>'
 bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
       Lex.getAPSIntVal().getBitWidth() > 64)
     return TokError("expected number in address space");
-  
+
   LocTy SizeLoc = Lex.getLoc();
   uint64_t Size = Lex.getAPSIntVal().getZExtValue();
   Lex.Lex();
-      
+
   if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
       return true;
 
   LocTy TypeLoc = Lex.getLoc();
-  PATypeHolder EltTy(Type::VoidTy);
+  PATypeHolder EltTy(Type::getVoidTy(Context));
   if (ParseTypeRec(EltTy)) return true;
-  
-  if (EltTy == Type::VoidTy)
+
+  if (EltTy->isVoidTy())
     return Error(TypeLoc, "array and vector element type cannot be void");
 
   if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
                  "expected end of sequential type"))
     return true;
-  
+
   if (isVector) {
     if (Size == 0)
       return Error(SizeLoc, "zero element vector is illegal");
@@ -1346,11 +1546,11 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
       return Error(TypeLoc, "vector element type must be fp or integer");
-    Result = Context.getVectorType(EltTy, unsigned(Size));
+    Result = VectorType::get(EltTy, unsigned(Size));
   } else {
     if (!ArrayType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid array element type");
-    Result = HandleUpRefs(Context.getArrayType(EltTy, Size));
+    Result = HandleUpRefs(ArrayType::get(EltTy, Size));
   }
   return false;
 }
@@ -1375,16 +1575,16 @@ LLParser::PerFunctionState::~PerFunctionState() {
        I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
     if (!isa<BasicBlock>(I->second.first)) {
       I->second.first->replaceAllUsesWith(
-                           P.getContext().getUndef(I->second.first->getType()));
+                           UndefValue::get(I->second.first->getType()));
       delete I->second.first;
       I->second.first = 0;
     }
-  
+
   for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
        I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
     if (!isa<BasicBlock>(I->second.first)) {
       I->second.first->replaceAllUsesWith(
-                           P.getContext().getUndef(I->second.first->getType()));
+                           UndefValue::get(I->second.first->getType()));
       delete I->second.first;
       I->second.first = 0;
     }
@@ -1410,7 +1610,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
                                           const Type *Ty, LocTy Loc) {
   // Look this name up in the normal function symbol table.
   Value *Val = F.getValueSymbolTable().lookup(Name);
-  
+
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (Val == 0) {
@@ -1419,31 +1619,32 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
     if (I != ForwardRefVals.end())
       Val = I->second.first;
   }
-    
+
   // If we have the value in the symbol table or fwd-ref table, return it.
   if (Val) {
     if (Val->getType() == Ty) return Val;
-    if (Ty == Type::LabelTy)
+    if (Ty->isLabelTy())
       P.Error(Loc, "'%" + Name + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Name + "' defined with type '" +
               Val->getType()->getDescription() + "'");
     return 0;
   }
-  
+
   // Don't make placeholders with invalid type.
-  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && Ty != Type::LabelTy) {
+  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) &&
+      Ty != Type::getLabelTy(F.getContext())) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
-  
+
   // Otherwise, create a new forward reference for this value and remember it.
   Value *FwdVal;
-  if (Ty == Type::LabelTy) 
-    FwdVal = BasicBlock::Create(Name, &F);
+  if (Ty->isLabelTy())
+    FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
   else
     FwdVal = new Argument(Ty, Name);
-  
+
   ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
@@ -1452,7 +1653,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
                                           LocTy Loc) {
   // Look this name up in the normal function symbol table.
   Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
-  
+
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (Val == 0) {
@@ -1461,30 +1662,31 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
     if (I != ForwardRefValIDs.end())
       Val = I->second.first;
   }
-  
+
   // If we have the value in the symbol table or fwd-ref table, return it.
   if (Val) {
     if (Val->getType() == Ty) return Val;
-    if (Ty == Type::LabelTy)
+    if (Ty->isLabelTy())
       P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block");
     else
       P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" +
               Val->getType()->getDescription() + "'");
     return 0;
   }
-  
-  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && Ty != Type::LabelTy) {
+
+  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) &&
+      Ty != Type::getLabelTy(F.getContext())) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
-  
+
   // Otherwise, create a new forward reference for this value and remember it.
   Value *FwdVal;
-  if (Ty == Type::LabelTy) 
-    FwdVal = BasicBlock::Create("", &F);
+  if (Ty->isLabelTy())
+    FwdVal = BasicBlock::Create(F.getContext(), "", &F);
   else
     FwdVal = new Argument(Ty);
-  
+
   ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
@@ -1495,30 +1697,31 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
                                              const std::string &NameStr,
                                              LocTy NameLoc, Instruction *Inst) {
   // If this instruction has void type, it cannot have a name or ID specified.
-  if (Inst->getType() == Type::VoidTy) {
+  if (Inst->getType()->isVoidTy()) {
     if (NameID != -1 || !NameStr.empty())
       return P.Error(NameLoc, "instructions returning void cannot have a name");
     return false;
   }
-  
+
   // If this was a numbered instruction, verify that the instruction is the
   // expected value and resolve any forward references.
   if (NameStr.empty()) {
     // If neither a name nor an ID was specified, just use the next ID.
     if (NameID == -1)
       NameID = NumberedVals.size();
-    
+
     if (unsigned(NameID) != NumberedVals.size())
       return P.Error(NameLoc, "instruction expected to be numbered '%" +
                      utostr(NumberedVals.size()) + "'");
-    
+
     std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
       ForwardRefValIDs.find(NameID);
     if (FI != ForwardRefValIDs.end()) {
       if (FI->second.first->getType() != Inst->getType())
-        return P.Error(NameLoc, "instruction forward referenced with type '" + 
+        return P.Error(NameLoc, "instruction forward referenced with type '" +
                        FI->second.first->getType()->getDescription() + "'");
       FI->second.first->replaceAllUsesWith(Inst);
+      delete FI->second.first;
       ForwardRefValIDs.erase(FI);
     }
 
@@ -1531,17 +1734,18 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
     FI = ForwardRefVals.find(NameStr);
   if (FI != ForwardRefVals.end()) {
     if (FI->second.first->getType() != Inst->getType())
-      return P.Error(NameLoc, "instruction forward referenced with type '" + 
+      return P.Error(NameLoc, "instruction forward referenced with type '" +
                      FI->second.first->getType()->getDescription() + "'");
     FI->second.first->replaceAllUsesWith(Inst);
+    delete FI->second.first;
     ForwardRefVals.erase(FI);
   }
-  
+
   // Set the name on the instruction.
   Inst->setName(NameStr);
-  
+
   if (Inst->getNameStr() != NameStr)
-    return P.Error(NameLoc, "multiple definition of local value named '" + 
+    return P.Error(NameLoc, "multiple definition of local value named '" +
                    NameStr + "'");
   return false;
 }
@@ -1550,11 +1754,13 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
 /// forward reference record if needed.
 BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
                                               LocTy Loc) {
-  return cast_or_null<BasicBlock>(GetVal(Name, Type::LabelTy, Loc));
+  return cast_or_null<BasicBlock>(GetVal(Name,
+                                        Type::getLabelTy(F.getContext()), Loc));
 }
 
 BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
-  return cast_or_null<BasicBlock>(GetVal(ID, Type::LabelTy, Loc));
+  return cast_or_null<BasicBlock>(GetVal(ID,
+                                        Type::getLabelTy(F.getContext()), Loc));
 }
 
 /// DefineBB - Define the specified basic block, which is either named or
@@ -1568,11 +1774,11 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
   else
     BB = GetBB(Name, Loc);
   if (BB == 0) return 0; // Already diagnosed error.
-  
+
   // Move the block to the end of the function.  Forward ref'd blocks are
   // inserted wherever they happen to be referenced.
   F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB);
-  
+
   // Remove the block from forward ref sets.
   if (Name.empty()) {
     ForwardRefValIDs.erase(NumberedVals.size());
@@ -1581,7 +1787,7 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
     // BB forward references are already in the function symbol table.
     ForwardRefVals.erase(Name);
   }
-  
+
   return BB;
 }
 
@@ -1615,7 +1821,7 @@ bool LLParser::ParseValID(ValID &ID) {
     ID.Kind = ValID::t_LocalName;
     break;
   case lltok::Metadata: {  // !{...} MDNode, !"foo" MDString
-    ID.Kind = ValID::t_Constant;
+    ID.Kind = ValID::t_Metadata;
     Lex.Lex();
     if (Lex.getKind() == lltok::lbrace) {
       SmallVector<Value*, 16> Elts;
@@ -1623,31 +1829,23 @@ bool LLParser::ParseValID(ValID &ID) {
           ParseToken(lltok::rbrace, "expected end of metadata node"))
         return true;
 
-      ID.ConstantVal = Context.getMDNode(Elts.data(), Elts.size());
+      ID.MetadataVal = MDNode::get(Context, Elts.data(), Elts.size());
       return false;
     }
 
     // Standalone metadata reference
     // !{ ..., !42, ... }
-    unsigned MID = 0;
-    if (!ParseUInt32(MID)) {
-      std::map<unsigned, Constant *>::iterator I = MetadataCache.find(MID);
-      if (I == MetadataCache.end())
-	return TokError("Unknown metadata reference");
-      ID.ConstantVal = I->second;
+    if (!ParseMDNode(ID.MetadataVal))
       return false;
-    }
-    
+
     // MDString:
     //   ::= '!' STRINGCONSTANT
-    std::string Str;
-    if (ParseStringConstant(Str)) return true;
-
-    ID.ConstantVal = Context.getMDString(Str.data(), Str.data() + Str.size());
+    if (ParseMDString(ID.MetadataVal)) return true;
+    ID.Kind = ValID::t_Metadata;
     return false;
   }
   case lltok::APSInt:
-    ID.APSIntVal = Lex.getAPSIntVal(); 
+    ID.APSIntVal = Lex.getAPSIntVal();
     ID.Kind = ValID::t_APSInt;
     break;
   case lltok::APFloat:
@@ -1655,17 +1853,17 @@ bool LLParser::ParseValID(ValID &ID) {
     ID.Kind = ValID::t_APFloat;
     break;
   case lltok::kw_true:
-    ID.ConstantVal = Context.getConstantIntTrue();
+    ID.ConstantVal = ConstantInt::getTrue(Context);
     ID.Kind = ValID::t_Constant;
     break;
   case lltok::kw_false:
-    ID.ConstantVal = Context.getConstantIntFalse();
+    ID.ConstantVal = ConstantInt::getFalse(Context);
     ID.Kind = ValID::t_Constant;
     break;
   case lltok::kw_null: ID.Kind = ValID::t_Null; break;
   case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
   case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
-      
+
   case lltok::lbrace: {
     // ValID ::= '{' ConstVector '}'
     Lex.Lex();
@@ -1673,8 +1871,9 @@ bool LLParser::ParseValID(ValID &ID) {
     if (ParseGlobalValueVector(Elts) ||
         ParseToken(lltok::rbrace, "expected end of struct constant"))
       return true;
-    
-    ID.ConstantVal = Context.getConstantStruct(Elts.data(), Elts.size(), false);
+
+    ID.ConstantVal = ConstantStruct::get(Context, Elts.data(),
+                                         Elts.size(), false);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1683,7 +1882,7 @@ bool LLParser::ParseValID(ValID &ID) {
     // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct.
     Lex.Lex();
     bool isPackedStruct = EatIfPresent(lltok::lbrace);
-    
+
     SmallVector<Constant*, 16> Elts;
     LocTy FirstEltLoc = Lex.getLoc();
     if (ParseGlobalValueVector(Elts) ||
@@ -1691,14 +1890,14 @@ bool LLParser::ParseValID(ValID &ID) {
          ParseToken(lltok::rbrace, "expected end of packed struct")) ||
         ParseToken(lltok::greater, "expected end of constant"))
       return true;
-    
+
     if (isPackedStruct) {
       ID.ConstantVal =
-        Context.getConstantStruct(Elts.data(), Elts.size(), true);
+        ConstantStruct::get(Context, Elts.data(), Elts.size(), true);
       ID.Kind = ValID::t_Constant;
       return false;
     }
-    
+
     if (Elts.empty())
       return Error(ID.Loc, "constant vector must not be empty");
 
@@ -1706,15 +1905,15 @@ bool LLParser::ParseValID(ValID &ID) {
         !Elts[0]->getType()->isFloatingPoint())
       return Error(FirstEltLoc,
                    "vector elements must have integer or floating point type");
-    
+
     // Verify that all the vector elements have the same type.
     for (unsigned i = 1, e = Elts.size(); i != e; ++i)
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "vector element #" + utostr(i) +
                     " is not of type '" + Elts[0]->getType()->getDescription());
-    
-    ID.ConstantVal = Context.getConstantVector(Elts.data(), Elts.size());
+
+    ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1733,13 +1932,13 @@ bool LLParser::ParseValID(ValID &ID) {
       ID.Kind = ValID::t_EmptyArray;
       return false;
     }
-    
+
     if (!Elts[0]->getType()->isFirstClassType())
-      return Error(FirstEltLoc, "invalid array element type: " + 
+      return Error(FirstEltLoc, "invalid array element type: " +
                    Elts[0]->getType()->getDescription());
-          
-    ArrayType *ATy = Context.getArrayType(Elts[0]->getType(), Elts.size());
-    
+
+    ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+
     // Verify all elements are correct type!
     for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
       if (Elts[i]->getType() != Elts[0]->getType())
@@ -1747,33 +1946,34 @@ bool LLParser::ParseValID(ValID &ID) {
                      "array element #" + utostr(i) +
                      " is not of type '" +Elts[0]->getType()->getDescription());
     }
-    
-    ID.ConstantVal = Context.getConstantArray(ATy, Elts.data(), Elts.size());
+
+    ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
   case lltok::kw_c:  // c "foo"
     Lex.Lex();
-    ID.ConstantVal = Context.getConstantArray(Lex.getStrVal(), false);
+    ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false);
     if (ParseToken(lltok::StringConstant, "expected string")) return true;
     ID.Kind = ValID::t_Constant;
     return false;
 
   case lltok::kw_asm: {
-    // ValID ::= 'asm' SideEffect? STRINGCONSTANT ',' STRINGCONSTANT
-    bool HasSideEffect;
+    // ValID ::= 'asm' SideEffect? MsAsm? STRINGCONSTANT ',' STRINGCONSTANT
+    bool HasSideEffect, MsAsm;
     Lex.Lex();
     if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+        ParseOptionalToken(lltok::kw_msasm, MsAsm) ||
         ParseStringConstant(ID.StrVal) ||
         ParseToken(lltok::comma, "expected comma in inline asm expression") ||
         ParseToken(lltok::StringConstant, "expected constraint string"))
       return true;
     ID.StrVal2 = Lex.getStrVal();
-    ID.UIntVal = HasSideEffect;
+    ID.UIntVal = HasSideEffect | ((unsigned)MsAsm<<1);
     ID.Kind = ValID::t_InlineAsm;
     return false;
   }
-      
+
   case lltok::kw_trunc:
   case lltok::kw_zext:
   case lltok::kw_sext:
@@ -1783,11 +1983,11 @@ bool LLParser::ParseValID(ValID &ID) {
   case lltok::kw_uitofp:
   case lltok::kw_sitofp:
   case lltok::kw_fptoui:
-  case lltok::kw_fptosi: 
+  case lltok::kw_fptosi:
   case lltok::kw_inttoptr:
-  case lltok::kw_ptrtoint: { 
+  case lltok::kw_ptrtoint: {
     unsigned Opc = Lex.getUIntVal();
-    PATypeHolder DestTy(Type::VoidTy);
+    PATypeHolder DestTy(Type::getVoidTy(Context));
     Constant *SrcVal;
     Lex.Lex();
     if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
@@ -1800,7 +2000,7 @@ bool LLParser::ParseValID(ValID &ID) {
       return Error(ID.Loc, "invalid cast opcode for cast from '" +
                    SrcVal->getType()->getDescription() + "' to '" +
                    DestTy->getDescription() + "'");
-    ID.ConstantVal = Context.getConstantExprCast((Instruction::CastOps)Opc, 
+    ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
                                                  SrcVal, DestTy);
     ID.Kind = ValID::t_Constant;
     return false;
@@ -1820,7 +2020,7 @@ bool LLParser::ParseValID(ValID &ID) {
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for extractvalue");
     ID.ConstantVal =
-      Context.getConstantExprExtractValue(Val, Indices.data(), Indices.size());
+      ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1840,15 +2040,13 @@ bool LLParser::ParseValID(ValID &ID) {
     if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for insertvalue");
-    ID.ConstantVal = Context.getConstantExprInsertValue(Val0, Val1,
+    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1,
                        Indices.data(), Indices.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
   case lltok::kw_icmp:
-  case lltok::kw_fcmp:
-  case lltok::kw_vicmp:
-  case lltok::kw_vfcmp: {
+  case lltok::kw_fcmp: {
     unsigned PredVal, Opc = Lex.getUIntVal();
     Constant *Val0, *Val1;
     Lex.Lex();
@@ -1859,38 +2057,27 @@ bool LLParser::ParseValID(ValID &ID) {
         ParseGlobalTypeAndValue(Val1) ||
         ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
       return true;
-    
+
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "compare operands must have the same type");
-    
+
     CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
-    
+
     if (Opc == Instruction::FCmp) {
       if (!Val0->getType()->isFPOrFPVector())
         return Error(ID.Loc, "fcmp requires floating point operands");
-      ID.ConstantVal = Context.getConstantExprFCmp(Pred, Val0, Val1);
-    } else if (Opc == Instruction::ICmp) {
+      ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+    } else {
+      assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
       if (!Val0->getType()->isIntOrIntVector() &&
           !isa<PointerType>(Val0->getType()))
         return Error(ID.Loc, "icmp requires pointer or integer operands");
-      ID.ConstantVal = Context.getConstantExprICmp(Pred, Val0, Val1);
-    } else if (Opc == Instruction::VFCmp) {
-      // FIXME: REMOVE VFCMP Support
-      if (!Val0->getType()->isFPOrFPVector() ||
-          !isa<VectorType>(Val0->getType()))
-        return Error(ID.Loc, "vfcmp requires vector floating point operands");
-      ID.ConstantVal = Context.getConstantExprVFCmp(Pred, Val0, Val1);
-    } else if (Opc == Instruction::VICmp) {
-      // FIXME: REMOVE VICMP Support
-      if (!Val0->getType()->isIntOrIntVector() ||
-          !isa<VectorType>(Val0->getType()))
-        return Error(ID.Loc, "vicmp requires vector floating point operands");
-      ID.ConstantVal = Context.getConstantExprVICmp(Pred, Val0, Val1);
+      ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
     }
     ID.Kind = ValID::t_Constant;
     return false;
   }
-      
+
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_fadd:
@@ -1904,9 +2091,27 @@ bool LLParser::ParseValID(ValID &ID) {
   case lltok::kw_urem:
   case lltok::kw_srem:
   case lltok::kw_frem: {
+    bool NUW = false;
+    bool NSW = false;
+    bool Exact = false;
     unsigned Opc = Lex.getUIntVal();
     Constant *Val0, *Val1;
     Lex.Lex();
+    LocTy ModifierLoc = Lex.getLoc();
+    if (Opc == Instruction::Add ||
+        Opc == Instruction::Sub ||
+        Opc == Instruction::Mul) {
+      if (EatIfPresent(lltok::kw_nuw))
+        NUW = true;
+      if (EatIfPresent(lltok::kw_nsw)) {
+        NSW = true;
+        if (EatIfPresent(lltok::kw_nuw))
+          NUW = true;
+      }
+    } else if (Opc == Instruction::SDiv) {
+      if (EatIfPresent(lltok::kw_exact))
+        Exact = true;
+    }
     if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
         ParseGlobalTypeAndValue(Val0) ||
         ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
@@ -1915,14 +2120,27 @@ bool LLParser::ParseValID(ValID &ID) {
       return true;
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "operands of constexpr must have same type");
+    if (!Val0->getType()->isIntOrIntVector()) {
+      if (NUW)
+        return Error(ModifierLoc, "nuw only applies to integer operations");
+      if (NSW)
+        return Error(ModifierLoc, "nsw only applies to integer operations");
+    }
+    // API compatibility: Accept either integer or floating-point types with
+    // add, sub, and mul.
     if (!Val0->getType()->isIntOrIntVector() &&
         !Val0->getType()->isFPOrFPVector())
       return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
-    ID.ConstantVal = Context.getConstantExpr(Opc, Val0, Val1);
+    unsigned Flags = 0;
+    if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+    if (NSW)   Flags |= OverflowingBinaryOperator::NoSignedWrap;
+    if (Exact) Flags |= SDivOperator::IsExact;
+    Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
+    ID.ConstantVal = C;
     ID.Kind = ValID::t_Constant;
     return false;
   }
-      
+
   // Logical Operations
   case lltok::kw_shl:
   case lltok::kw_lshr:
@@ -1944,11 +2162,11 @@ bool LLParser::ParseValID(ValID &ID) {
     if (!Val0->getType()->isIntOrIntVector())
       return Error(ID.Loc,
                    "constexpr requires integer or integer vector operands");
-    ID.ConstantVal = Context.getConstantExpr(Opc, Val0, Val1);
+    ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
     ID.Kind = ValID::t_Constant;
     return false;
-  }  
-      
+  }
+
   case lltok::kw_getelementptr:
   case lltok::kw_shufflevector:
   case lltok::kw_insertelement:
@@ -1956,41 +2174,49 @@ bool LLParser::ParseValID(ValID &ID) {
   case lltok::kw_select: {
     unsigned Opc = Lex.getUIntVal();
     SmallVector<Constant*, 16> Elts;
+    bool InBounds = false;
     Lex.Lex();
+    if (Opc == Instruction::GetElementPtr)
+      InBounds = EatIfPresent(lltok::kw_inbounds);
     if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
         ParseGlobalValueVector(Elts) ||
         ParseToken(lltok::rparen, "expected ')' in constantexpr"))
       return true;
-    
+
     if (Opc == Instruction::GetElementPtr) {
       if (Elts.size() == 0 || !isa<PointerType>(Elts[0]->getType()))
         return Error(ID.Loc, "getelementptr requires pointer operand");
-      
+
       if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
-                                             (Value**)&Elts[1], Elts.size()-1))
+                                             (Value**)(Elts.data() + 1),
+                                             Elts.size() - 1))
         return Error(ID.Loc, "invalid indices for getelementptr");
-      ID.ConstantVal = Context.getConstantExprGetElementPtr(Elts[0],
-                                                      &Elts[1], Elts.size()-1);
+      ID.ConstantVal = InBounds ?
+        ConstantExpr::getInBoundsGetElementPtr(Elts[0],
+                                               Elts.data() + 1,
+                                               Elts.size() - 1) :
+        ConstantExpr::getGetElementPtr(Elts[0],
+                                       Elts.data() + 1, Elts.size() - 1);
     } else if (Opc == Instruction::Select) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to select");
       if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
                                                               Elts[2]))
         return Error(ID.Loc, Reason);
-      ID.ConstantVal = Context.getConstantExprSelect(Elts[0], Elts[1], Elts[2]);
+      ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
     } else if (Opc == Instruction::ShuffleVector) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to shufflevector");
       if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
         return Error(ID.Loc, "invalid operands to shufflevector");
       ID.ConstantVal =
-                 Context.getConstantExprShuffleVector(Elts[0], Elts[1],Elts[2]);
+                 ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
     } else if (Opc == Instruction::ExtractElement) {
       if (Elts.size() != 2)
         return Error(ID.Loc, "expected two operands to extractelement");
       if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
         return Error(ID.Loc, "invalid extractelement operands");
-      ID.ConstantVal = Context.getConstantExprExtractElement(Elts[0], Elts[1]);
+      ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
     } else {
       assert(Opc == Instruction::InsertElement && "Unknown opcode");
       if (Elts.size() != 3)
@@ -1998,14 +2224,14 @@ bool LLParser::ParseValID(ValID &ID) {
       if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
         return Error(ID.Loc, "invalid insertelement operands");
       ID.ConstantVal =
-                 Context.getConstantExprInsertElement(Elts[0], Elts[1],Elts[2]);
+                 ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
     }
-    
+
     ID.Kind = ValID::t_Constant;
     return false;
   }
   }
-  
+
   Lex.Lex();
   return false;
 }
@@ -2024,9 +2250,11 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
                                          Constant *&V) {
   if (isa<FunctionType>(Ty))
     return Error(ID.Loc, "functions are not values, refer to them as pointers");
-  
+
   switch (ID.Kind) {
-  default: assert(0 && "Unknown ValID!");
+  default: llvm_unreachable("Unknown ValID!");
+  case ValID::t_Metadata:
+    return Error(ID.Loc, "invalid use of metadata");
   case ValID::t_LocalID:
   case ValID::t_LocalName:
     return Error(ID.Loc, "invalid use of function-local name");
@@ -2042,50 +2270,50 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
     if (!isa<IntegerType>(Ty))
       return Error(ID.Loc, "integer constant must have integer type");
     ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
-    V = Context.getConstantInt(ID.APSIntVal);
+    V = ConstantInt::get(Context, ID.APSIntVal);
     return false;
   case ValID::t_APFloat:
     if (!Ty->isFloatingPoint() ||
         !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
       return Error(ID.Loc, "floating point constant invalid for type");
-      
+
     // The lexer has no type info, so builds all float and double FP constants
     // as double.  Fix this here.  Long double does not need this.
     if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
-        Ty == Type::FloatTy) {
+        Ty->isFloatTy()) {
       bool Ignored;
       ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
                             &Ignored);
     }
-    V = Context.getConstantFP(ID.APFloatVal);
-      
+    V = ConstantFP::get(Context, ID.APFloatVal);
+
     if (V->getType() != Ty)
       return Error(ID.Loc, "floating point constant does not have type '" +
                    Ty->getDescription() + "'");
-      
+
     return false;
   case ValID::t_Null:
     if (!isa<PointerType>(Ty))
       return Error(ID.Loc, "null must be a pointer type");
-    V = Context.getConstantPointerNull(cast<PointerType>(Ty));
+    V = ConstantPointerNull::get(cast<PointerType>(Ty));
     return false;
   case ValID::t_Undef:
     // FIXME: LabelTy should not be a first-class type.
-    if ((!Ty->isFirstClassType() || Ty == Type::LabelTy) &&
+    if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
         !isa<OpaqueType>(Ty))
       return Error(ID.Loc, "invalid type for undef constant");
-    V = Context.getUndef(Ty);
+    V = UndefValue::get(Ty);
     return false;
   case ValID::t_EmptyArray:
     if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0)
       return Error(ID.Loc, "invalid empty array initializer");
-    V = Context.getUndef(Ty);
+    V = UndefValue::get(Ty);
     return false;
   case ValID::t_Zero:
     // FIXME: LabelTy should not be a first-class type.
-    if (!Ty->isFirstClassType() || Ty == Type::LabelTy)
+    if (!Ty->isFirstClassType() || Ty->isLabelTy())
       return Error(ID.Loc, "invalid type for null constant");
-    V = Context.getNullValue(Ty);
+    V = Constant::getNullValue(Ty);
     return false;
   case ValID::t_Constant:
     if (ID.ConstantVal->getType() != Ty)
@@ -2094,12 +2322,12 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
     return false;
   }
 }
-  
+
 bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
-  PATypeHolder Type(Type::VoidTy);
+  PATypeHolder Type(Type::getVoidTy(Context));
   return ParseType(Type) ||
          ParseGlobalValue(Type, V);
-}    
+}
 
 /// ParseGlobalValueVector
 ///   ::= /*empty*/
@@ -2111,16 +2339,16 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
       Lex.getKind() == lltok::greater ||
       Lex.getKind() == lltok::rparen)
     return false;
-  
+
   Constant *C;
   if (ParseGlobalTypeAndValue(C)) return true;
   Elts.push_back(C);
-  
+
   while (EatIfPresent(lltok::comma)) {
     if (ParseGlobalTypeAndValue(C)) return true;
     Elts.push_back(C);
   }
-  
+
   return false;
 }
 
@@ -2141,8 +2369,10 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
       PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
     if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
       return Error(ID.Loc, "invalid type for inline asm constraint string");
-    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal);
+    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1);
     return false;
+  } else if (ID.Kind == ValID::t_Metadata) {
+    V = ID.MetadataVal;
   } else {
     Constant *C;
     if (ConvertGlobalValIDToValue(Ty, ID, C)) return true;
@@ -2161,7 +2391,7 @@ bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
 }
 
 bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
-  PATypeHolder T(Type::VoidTy);
+  PATypeHolder T(Type::getVoidTy(Context));
   return ParseType(T) ||
          ParseValue(T, V, PFS);
 }
@@ -2174,9 +2404,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Parse the linkage.
   LocTy LinkageLoc = Lex.getLoc();
   unsigned Linkage;
-  
-  unsigned Visibility, CC, RetAttrs;
-  PATypeHolder RetType(Type::VoidTy);
+
+  unsigned Visibility, RetAttrs;
+  CallingConv::ID CC;
+  PATypeHolder RetType(Type::getVoidTy(Context));
   LocTy RetTypeLoc = Lex.getLoc();
   if (ParseOptionalLinkage(Linkage) ||
       ParseOptionalVisibility(Visibility) ||
@@ -2195,6 +2426,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       return Error(LinkageLoc, "invalid linkage for function definition");
     break;
   case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
   case GlobalValue::InternalLinkage:
   case GlobalValue::AvailableExternallyLinkage:
   case GlobalValue::LinkOnceAnyLinkage:
@@ -2210,11 +2442,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   case GlobalValue::CommonLinkage:
     return Error(LinkageLoc, "invalid function linkage type");
   }
-  
+
   if (!FunctionType::isValidReturnType(RetType) ||
       isa<OpaqueType>(RetType))
     return Error(RetTypeLoc, "invalid function return type");
-  
+
   LocTy NameLoc = Lex.getLoc();
 
   std::string FunctionName;
@@ -2229,12 +2461,12 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   } else {
     return TokError("expected function name");
   }
-  
+
   Lex.Lex();
-  
+
   if (Lex.getKind() != lltok::lparen)
     return TokError("expected '(' in function argument list");
-  
+
   std::vector<ArgInfo> ArgList;
   bool isVarArg;
   unsigned FuncAttrs;
@@ -2256,22 +2488,22 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
     Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs);
     FuncAttrs &= ~Attribute::Alignment;
   }
-  
+
   // Okay, if we got here, the function is syntactically valid.  Convert types
   // and do semantic checks.
   std::vector<const Type*> ParamTypeList;
   SmallVector<AttributeWithIndex, 8> Attrs;
-  // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function 
+  // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function
   // attributes.
   unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
   if (FuncAttrs & ObsoleteFuncAttrs) {
     RetAttrs |= FuncAttrs & ObsoleteFuncAttrs;
     FuncAttrs &= ~ObsoleteFuncAttrs;
   }
-  
+
   if (RetAttrs != Attribute::None)
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
-  
+
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     ParamTypeList.push_back(ArgList[i].Type);
     if (ArgList[i].Attrs != Attribute::None)
@@ -2282,14 +2514,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
     Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs));
 
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
-  
+
   if (PAL.paramHasAttr(1, Attribute::StructRet) &&
-      RetType != Type::VoidTy)
-    return Error(RetTypeLoc, "functions with 'sret' argument must return void"); 
-  
+      RetType != Type::getVoidTy(Context))
+    return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+
   const FunctionType *FT =
-    Context.getFunctionType(RetType, ParamTypeList, isVarArg);
-  const PointerType *PFT = Context.getPointerTypeUnqual(FT);
+    FunctionType::get(RetType, ParamTypeList, isVarArg);
+  const PointerType *PFT = PointerType::getUnqual(FT);
 
   Fn = 0;
   if (!FunctionName.empty()) {
@@ -2317,8 +2549,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
           AI->setName("");
       }
     }
-    
-  } else if (FunctionName.empty()) {
+
+  } else {
     // If this is a definition of a forward referenced function, make sure the
     // types agree.
     std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
@@ -2339,7 +2571,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   if (FunctionName.empty())
     NumberedVals.push_back(Fn);
-  
+
   Fn->setLinkage((GlobalValue::LinkageTypes)Linkage);
   Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
   Fn->setCallingConv(CC);
@@ -2347,21 +2579,21 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setAlignment(Alignment);
   Fn->setSection(Section);
   if (!GC.empty()) Fn->setGC(GC.c_str());
-    
+
   // Add all of the arguments we parsed to the function.
   Function::arg_iterator ArgIt = Fn->arg_begin();
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
     // If the argument has a name, insert it into the argument symbol table.
     if (ArgList[i].Name.empty()) continue;
-    
+
     // Set the name, if it conflicted, it will be auto-renamed.
     ArgIt->setName(ArgList[i].Name);
-    
+
     if (ArgIt->getNameStr() != ArgList[i].Name)
       return Error(ArgList[i].Loc, "redefinition of argument '%" +
                    ArgList[i].Name + "'");
   }
-  
+
   return false;
 }
 
@@ -2374,15 +2606,15 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
   if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin)
     return TokError("expected '{' in function body");
   Lex.Lex();  // eat the {.
-  
+
   PerFunctionState PFS(*this, Fn);
-  
+
   while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
     if (ParseBasicBlock(PFS)) return true;
-  
+
   // Eat the }.
   Lex.Lex();
-  
+
   // Verify function is ok.
   return PFS.VerifyFunctionComplete();
 }
@@ -2397,12 +2629,12 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
     Name = Lex.getStrVal();
     Lex.Lex();
   }
-  
+
   BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
   if (BB == 0) return true;
-  
+
   std::string NameStr;
-  
+
   // Parse the instructions in this block until we get a terminator.
   Instruction *Inst;
   do {
@@ -2411,7 +2643,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
     LocTy NameLoc = Lex.getLoc();
     int NameID = -1;
     NameStr = "";
-    
+
     if (Lex.getKind() == lltok::LocalVarID) {
       NameID = Lex.getUIntVal();
       Lex.Lex();
@@ -2425,15 +2657,24 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       if (ParseToken(lltok::equal, "expected '=' after instruction name"))
         return true;
     }
-    
+
     if (ParseInstruction(Inst, BB, PFS)) return true;
-    
+    if (EatIfPresent(lltok::comma))
+      ParseOptionalCustomMetadata();
+
+    // Set metadata attached with this instruction.
+    MetadataContext &TheMetadata = M->getContext().getMetadata();
+    for (SmallVector<std::pair<unsigned, MDNode *>, 2>::iterator
+           MDI = MDsOnInst.begin(), MDE = MDsOnInst.end(); MDI != MDE; ++MDI)
+      TheMetadata.addMD(MDI->first, MDI->second, Inst);
+    MDsOnInst.clear();
+
     BB->getInstList().push_back(Inst);
 
     // Set the name on the instruction.
     if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
   } while (!isa<TerminatorInst>(Inst));
-  
+
   return false;
 }
 
@@ -2451,12 +2692,12 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   LocTy Loc = Lex.getLoc();
   unsigned KeywordVal = Lex.getUIntVal();
   Lex.Lex();  // Eat the keyword.
-  
+
   switch (Token) {
   default:                    return Error(Loc, "expected instruction opcode");
   // Terminator Instructions.
-  case lltok::kw_unwind:      Inst = new UnwindInst(); return false;
-  case lltok::kw_unreachable: Inst = new UnreachableInst(); return false;
+  case lltok::kw_unwind:      Inst = new UnwindInst(Context); return false;
+  case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
   case lltok::kw_ret:         return ParseRet(Inst, BB, PFS);
   case lltok::kw_br:          return ParseBr(Inst, PFS);
   case lltok::kw_switch:      return ParseSwitch(Inst, PFS);
@@ -2464,15 +2705,49 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
-  case lltok::kw_mul:
+  case lltok::kw_mul: {
+    bool NUW = false;
+    bool NSW = false;
+    LocTy ModifierLoc = Lex.getLoc();
+    if (EatIfPresent(lltok::kw_nuw))
+      NUW = true;
+    if (EatIfPresent(lltok::kw_nsw)) {
+      NSW = true;
+      if (EatIfPresent(lltok::kw_nuw))
+        NUW = true;
+    }
     // API compatibility: Accept either integer or floating-point types.
-    return ParseArithmetic(Inst, PFS, KeywordVal, 0);
+    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0);
+    if (!Result) {
+      if (!Inst->getType()->isIntOrIntVector()) {
+        if (NUW)
+          return Error(ModifierLoc, "nuw only applies to integer operations");
+        if (NSW)
+          return Error(ModifierLoc, "nsw only applies to integer operations");
+      }
+      if (NUW)
+        cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+      if (NSW)
+        cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+    }
+    return Result;
+  }
   case lltok::kw_fadd:
   case lltok::kw_fsub:
   case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
 
+  case lltok::kw_sdiv: {
+    bool Exact = false;
+    if (EatIfPresent(lltok::kw_exact))
+      Exact = true;
+    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
+    if (!Result)
+      if (Exact)
+        cast<BinaryOperator>(Inst)->setIsExact(true);
+    return Result;
+  }
+
   case lltok::kw_udiv:
-  case lltok::kw_sdiv:
   case lltok::kw_urem:
   case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
   case lltok::kw_fdiv:
@@ -2484,9 +2759,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_or:
   case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
   case lltok::kw_icmp:
-  case lltok::kw_fcmp:
-  case lltok::kw_vicmp:
-  case lltok::kw_vfcmp:  return ParseCompare(Inst, PFS, KeywordVal);
+  case lltok::kw_fcmp:   return ParseCompare(Inst, PFS, KeywordVal);
   // Casts.
   case lltok::kw_trunc:
   case lltok::kw_zext:
@@ -2497,7 +2770,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_uitofp:
   case lltok::kw_sitofp:
   case lltok::kw_fptoui:
-  case lltok::kw_fptosi: 
+  case lltok::kw_fptosi:
   case lltok::kw_inttoptr:
   case lltok::kw_ptrtoint:       return ParseCast(Inst, PFS, KeywordVal);
   // Other.
@@ -2531,8 +2804,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
 
 /// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
 bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
-  // FIXME: REMOVE vicmp/vfcmp!
-  if (Opc == Instruction::FCmp || Opc == Instruction::VFCmp) {
+  if (Opc == Instruction::FCmp) {
     switch (Lex.getKind()) {
     default: TokError("expected fcmp predicate (e.g. 'oeq')");
     case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
@@ -2576,42 +2848,57 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
 //===----------------------------------------------------------------------===//
 
 /// ParseRet - Parse a return instruction.
-///   ::= 'ret' void
-///   ::= 'ret' TypeAndValue
-///   ::= 'ret' TypeAndValue (',' TypeAndValue)+  [[obsolete: LLVM 3.0]]
+///   ::= 'ret' void (',' !dbg, !1)
+///   ::= 'ret' TypeAndValue (',' !dbg, !1)
+///   ::= 'ret' TypeAndValue (',' TypeAndValue)+  (',' !dbg, !1)
+///         [[obsolete: LLVM 3.0]]
 bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
                         PerFunctionState &PFS) {
-  PATypeHolder Ty(Type::VoidTy);
+  PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseType(Ty, true /*void allowed*/)) return true;
-  
-  if (Ty == Type::VoidTy) {
-    Inst = ReturnInst::Create();
+
+  if (Ty->isVoidTy()) {
+    if (EatIfPresent(lltok::comma))
+      if (ParseOptionalCustomMetadata()) return true;
+    Inst = ReturnInst::Create(Context);
     return false;
   }
-  
+
   Value *RV;
   if (ParseValue(Ty, RV, PFS)) return true;
-  
-  // The normal case is one return value.
-  if (Lex.getKind() == lltok::comma) {
-    // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring use
-    // of 'ret {i32,i32} {i32 1, i32 2}'
-    SmallVector<Value*, 8> RVs;
-    RVs.push_back(RV);
-    
-    while (EatIfPresent(lltok::comma)) {
-      if (ParseTypeAndValue(RV, PFS)) return true;
+
+  if (EatIfPresent(lltok::comma)) {
+    // Parse optional custom metadata, e.g. !dbg
+    if (Lex.getKind() == lltok::NamedOrCustomMD) {
+      if (ParseOptionalCustomMetadata()) return true;
+    } else {
+      // The normal case is one return value.
+      // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring use
+      // of 'ret {i32,i32} {i32 1, i32 2}'
+      SmallVector<Value*, 8> RVs;
       RVs.push_back(RV);
-    }
 
-    RV = Context.getUndef(PFS.getFunction().getReturnType());
-    for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
-      Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
-      BB->getInstList().push_back(I);
-      RV = I;
+      do {
+        // If optional custom metadata, e.g. !dbg is seen then this is the 
+        // end of MRV.
+        if (Lex.getKind() == lltok::NamedOrCustomMD)
+          break;
+        if (ParseTypeAndValue(RV, PFS)) return true;
+        RVs.push_back(RV);
+      } while (EatIfPresent(lltok::comma));
+
+      RV = UndefValue::get(PFS.getFunction().getReturnType());
+      for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
+        Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
+        BB->getInstList().push_back(I);
+        RV = I;
+      }
     }
   }
-  Inst = ReturnInst::Create(RV);
+  if (EatIfPresent(lltok::comma))
+    if (ParseOptionalCustomMetadata()) return true;
+
+  Inst = ReturnInst::Create(Context, RV);
   return false;
 }
 
@@ -2623,26 +2910,26 @@ bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy Loc, Loc2;
   Value *Op0, *Op1, *Op2;
   if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
-  
+
   if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
     Inst = BranchInst::Create(BB);
     return false;
   }
-  
-  if (Op0->getType() != Type::Int1Ty)
+
+  if (Op0->getType() != Type::getInt1Ty(Context))
     return Error(Loc, "branch condition must have 'i1' type");
-    
+
   if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
       ParseTypeAndValue(Op1, Loc, PFS) ||
       ParseToken(lltok::comma, "expected ',' after true destination") ||
       ParseTypeAndValue(Op2, Loc2, PFS))
     return true;
-  
+
   if (!isa<BasicBlock>(Op1))
     return Error(Loc, "true destination of branch must be a basic block");
   if (!isa<BasicBlock>(Op2))
     return Error(Loc2, "true destination of branch must be a basic block");
-    
+
   Inst = BranchInst::Create(cast<BasicBlock>(Op1), cast<BasicBlock>(Op2), Op0);
   return false;
 }
@@ -2665,13 +2952,13 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(CondLoc, "switch condition must have integer type");
   if (!isa<BasicBlock>(DefaultBB))
     return Error(BBLoc, "default destination must be a basic block");
-  
+
   // Parse the jump table pairs.
   SmallPtrSet<Value*, 32> SeenCases;
   SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
   while (Lex.getKind() != lltok::rsquare) {
     Value *Constant, *DestBB;
-    
+
     if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
         ParseToken(lltok::comma, "expected ',' after case value") ||
         ParseTypeAndValue(DestBB, BBLoc, PFS))
@@ -2683,13 +2970,13 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
       return Error(CondLoc, "case value is not a constant integer");
     if (!isa<BasicBlock>(DestBB))
       return Error(BBLoc, "case destination is not a basic block");
-    
+
     Table.push_back(std::make_pair(cast<ConstantInt>(Constant),
                                    cast<BasicBlock>(DestBB)));
   }
-  
+
   Lex.Lex();  // Eat the ']'.
-  
+
   SwitchInst *SI = SwitchInst::Create(Cond, cast<BasicBlock>(DefaultBB),
                                       Table.size());
   for (unsigned i = 0, e = Table.size(); i != e; ++i)
@@ -2703,8 +2990,9 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
 ///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
 bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CallLoc = Lex.getLoc();
-  unsigned CC, RetAttrs, FnAttrs;
-  PATypeHolder RetType(Type::VoidTy);
+  unsigned RetAttrs, FnAttrs;
+  CallingConv::ID CC;
+  PATypeHolder RetType(Type::getVoidTy(Context));
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
@@ -2721,12 +3009,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
       ParseTypeAndValue(UnwindBB, PFS))
     return true;
-  
+
   if (!isa<BasicBlock>(NormalBB))
     return Error(CallLoc, "normal destination is not a basic block");
   if (!isa<BasicBlock>(UnwindBB))
     return Error(CallLoc, "unwind destination is not a basic block");
-  
+
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
@@ -2738,18 +3026,18 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     std::vector<const Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
       ParamTypes.push_back(ArgList[i].V->getType());
-    
+
     if (!FunctionType::isValidReturnType(RetType))
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
-    
-    Ty = Context.getFunctionType(RetType, ParamTypes, false);
-    PFTy = Context.getPointerTypeUnqual(Ty);
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+    PFTy = PointerType::getUnqual(Ty);
   }
-  
+
   // Look up the callee.
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true;
-  
+
   // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
   // function attributes.
   unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
@@ -2757,14 +3045,14 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
     FnAttrs &= ~ObsoleteFuncAttrs;
   }
-  
+
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs != Attribute::None)
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
-  
+
   SmallVector<Value*, 8> Args;
-  
+
   // Loop through FunctionType's arguments and ensure they are specified
   // correctly.  Also, gather any parameter attributes.
   FunctionType::param_iterator I = Ty->param_begin();
@@ -2776,7 +3064,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     } else if (!Ty->isVarArg()) {
       return Error(ArgList[i].Loc, "too many arguments specified");
     }
-    
+
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
                    ExpectedTy->getDescription() + "'");
@@ -2784,16 +3072,16 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
-  
+
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
-  
+
   if (FnAttrs != Attribute::None)
     Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
-  
+
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
-  
+
   InvokeInst *II = InvokeInst::Create(Callee, cast<BasicBlock>(NormalBB),
                                       cast<BasicBlock>(UnwindBB),
                                       Args.begin(), Args.end());
@@ -2824,7 +3112,7 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
 
   bool Valid;
   switch (OperandType) {
-  default: assert(0 && "Unknown operand type!");
+  default: llvm_unreachable("Unknown operand type!");
   case 0: // int or FP.
     Valid = LHS->getType()->isIntOrIntVector() ||
             LHS->getType()->isFPOrFPVector();
@@ -2832,10 +3120,10 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
   case 1: Valid = LHS->getType()->isIntOrIntVector(); break;
   case 2: Valid = LHS->getType()->isFPOrFPVector(); break;
   }
-  
+
   if (!Valid)
     return Error(Loc, "invalid operand type for instruction");
-  
+
   Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
   return false;
 }
@@ -2861,8 +3149,6 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
 /// ParseCompare
 ///  ::= 'icmp' IPredicates TypeAndValue ',' Value
 ///  ::= 'fcmp' FPredicates TypeAndValue ',' Value
-///  ::= 'vicmp' IPredicates TypeAndValue ',' Value
-///  ::= 'vfcmp' FPredicates TypeAndValue ',' Value
 bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
                             unsigned Opc) {
   // Parse the integer/fp comparison predicate.
@@ -2874,24 +3160,17 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
       ParseToken(lltok::comma, "expected ',' after compare value") ||
       ParseValue(LHS->getType(), RHS, PFS))
     return true;
-  
+
   if (Opc == Instruction::FCmp) {
     if (!LHS->getType()->isFPOrFPVector())
       return Error(Loc, "fcmp requires floating point operands");
     Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
-  } else if (Opc == Instruction::ICmp) {
+  } else {
+    assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
     if (!LHS->getType()->isIntOrIntVector() &&
         !isa<PointerType>(LHS->getType()))
       return Error(Loc, "icmp requires integer operands");
     Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
-  } else if (Opc == Instruction::VFCmp) {
-    if (!LHS->getType()->isFPOrFPVector() || !isa<VectorType>(LHS->getType()))
-      return Error(Loc, "vfcmp requires vector floating point operands");
-    Inst = new VFCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
-  } else if (Opc == Instruction::VICmp) {
-    if (!LHS->getType()->isIntOrIntVector() || !isa<VectorType>(LHS->getType()))
-      return Error(Loc, "vicmp requires vector floating point operands");
-    Inst = new VICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
   }
   return false;
 }
@@ -2906,12 +3185,12 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
 bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
                          unsigned Opc) {
   LocTy Loc;  Value *Op;
-  PATypeHolder DestTy(Type::VoidTy);
+  PATypeHolder DestTy(Type::getVoidTy(Context));
   if (ParseTypeAndValue(Op, Loc, PFS) ||
       ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
       ParseType(DestTy))
     return true;
-  
+
   if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
     CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
     return Error(Loc, "invalid cast opcode for cast from '" +
@@ -2933,10 +3212,10 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::comma, "expected ',' after select value") ||
       ParseTypeAndValue(Op2, PFS))
     return true;
-  
+
   if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
     return Error(Loc, Reason);
-  
+
   Inst = SelectInst::Create(Op0, Op1, Op2);
   return false;
 }
@@ -2945,13 +3224,13 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'va_arg' TypeAndValue ',' Type
 bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Op;
-  PATypeHolder EltTy(Type::VoidTy);
+  PATypeHolder EltTy(Type::getVoidTy(Context));
   LocTy TypeLoc;
   if (ParseTypeAndValue(Op, PFS) ||
       ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
       ParseType(EltTy, TypeLoc))
     return true;
-  
+
   if (!EltTy->isFirstClassType())
     return Error(TypeLoc, "va_arg requires operand with first class type");
 
@@ -2968,11 +3247,11 @@ bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::comma, "expected ',' after extract value") ||
       ParseTypeAndValue(Op1, PFS))
     return true;
-  
+
   if (!ExtractElementInst::isValidOperands(Op0, Op1))
     return Error(Loc, "invalid extractelement operands");
-  
-  Inst = new ExtractElementInst(Op0, Op1);
+
+  Inst = ExtractElementInst::Create(Op0, Op1);
   return false;
 }
 
@@ -2987,10 +3266,10 @@ bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::comma, "expected ',' after insertelement value") ||
       ParseTypeAndValue(Op2, PFS))
     return true;
-  
+
   if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
-    return Error(Loc, "invalid extractelement operands");
-  
+    return Error(Loc, "invalid insertelement operands");
+
   Inst = InsertElementInst::Create(Op0, Op1, Op2);
   return false;
 }
@@ -3006,10 +3285,10 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::comma, "expected ',' after shuffle value") ||
       ParseTypeAndValue(Op2, PFS))
     return true;
-  
+
   if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
     return Error(Loc, "invalid extractelement operands");
-  
+
   Inst = new ShuffleVectorInst(Op0, Op1, Op2);
   return false;
 }
@@ -3017,33 +3296,33 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
 /// ParsePHI
 ///   ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Valueß ']')*
 bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
-  PATypeHolder Ty(Type::VoidTy);
+  PATypeHolder Ty(Type::getVoidTy(Context));
   Value *Op0, *Op1;
   LocTy TypeLoc = Lex.getLoc();
-  
+
   if (ParseType(Ty) ||
       ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
       ParseValue(Ty, Op0, PFS) ||
       ParseToken(lltok::comma, "expected ',' after insertelement value") ||
-      ParseValue(Type::LabelTy, Op1, PFS) ||
+      ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
       ParseToken(lltok::rsquare, "expected ']' in phi value list"))
     return true;
- 
+
   SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
   while (1) {
     PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
-    
+
     if (!EatIfPresent(lltok::comma))
       break;
 
     if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
         ParseValue(Ty, Op0, PFS) ||
         ParseToken(lltok::comma, "expected ',' after insertelement value") ||
-        ParseValue(Type::LabelTy, Op1, PFS) ||
+        ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
         ParseToken(lltok::rsquare, "expected ']' in phi value list"))
       return true;
   }
-  
+
   if (!Ty->isFirstClassType())
     return Error(TypeLoc, "phi node must have first class type");
 
@@ -3060,13 +3339,14 @@ bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
 ///       ParameterList OptionalAttrs
 bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          bool isTail) {
-  unsigned CC, RetAttrs, FnAttrs;
-  PATypeHolder RetType(Type::VoidTy);
+  unsigned RetAttrs, FnAttrs;
+  CallingConv::ID CC;
+  PATypeHolder RetType(Type::getVoidTy(Context));
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
   LocTy CallLoc = Lex.getLoc();
-  
+
   if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
       ParseOptionalCallingConv(CC) ||
       ParseOptionalAttrs(RetAttrs, 1) ||
@@ -3075,7 +3355,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
       ParseParameterList(ArgList, PFS) ||
       ParseOptionalAttrs(FnAttrs, 2))
     return true;
-  
+
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
@@ -3087,18 +3367,18 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     std::vector<const Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
       ParamTypes.push_back(ArgList[i].V->getType());
-    
+
     if (!FunctionType::isValidReturnType(RetType))
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
-    
-    Ty = Context.getFunctionType(RetType, ParamTypes, false);
-    PFTy = Context.getPointerTypeUnqual(Ty);
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+    PFTy = PointerType::getUnqual(Ty);
   }
-  
+
   // Look up the callee.
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true;
-  
+
   // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
   // function attributes.
   unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
@@ -3111,9 +3391,9 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs != Attribute::None)
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
-  
+
   SmallVector<Value*, 8> Args;
-  
+
   // Loop through FunctionType's arguments and ensure they are specified
   // correctly.  Also, gather any parameter attributes.
   FunctionType::param_iterator I = Ty->param_begin();
@@ -3125,7 +3405,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     } else if (!Ty->isVarArg()) {
       return Error(ArgList[i].Loc, "too many arguments specified");
     }
-    
+
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
                    ExpectedTy->getDescription() + "'");
@@ -3133,7 +3413,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
-  
+
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
@@ -3142,7 +3422,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
-  
+
   CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end());
   CI->setTailCall(isTail);
   CI->setCallingConv(CC);
@@ -3156,26 +3436,28 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 //===----------------------------------------------------------------------===//
 
 /// ParseAlloc
-///   ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalAlignment)?
-///   ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalAlignment)?
+///   ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)?
+///   ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
 bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
                           unsigned Opc) {
-  PATypeHolder Ty(Type::VoidTy);
+  PATypeHolder Ty(Type::getVoidTy(Context));
   Value *Size = 0;
   LocTy SizeLoc;
   unsigned Alignment = 0;
   if (ParseType(Ty)) return true;
 
   if (EatIfPresent(lltok::comma)) {
-    if (Lex.getKind() == lltok::kw_align) {
-      if (ParseOptionalAlignment(Alignment)) return true;
-    } else if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
-               ParseOptionalCommaAlignment(Alignment)) {
-      return true;
+    if (Lex.getKind() == lltok::kw_align 
+        || Lex.getKind() == lltok::NamedOrCustomMD) {
+      if (ParseOptionalInfo(Alignment)) return true;
+    } else {
+      if (ParseTypeAndValue(Size, SizeLoc, PFS)) return true;
+      if (EatIfPresent(lltok::comma))
+        if (ParseOptionalInfo(Alignment)) return true;
     }
   }
 
-  if (Size && Size->getType() != Type::Int32Ty)
+  if (Size && Size->getType() != Type::getInt32Ty(Context))
     return Error(SizeLoc, "element count must be i32");
 
   if (Opc == Instruction::Malloc)
@@ -3197,19 +3479,20 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) {
 }
 
 /// ParseLoad
-///   ::= 'volatile'? 'load' TypeAndValue (',' 'align' i32)?
+///   ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)?
 bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
                          bool isVolatile) {
   Value *Val; LocTy Loc;
-  unsigned Alignment;
-  if (ParseTypeAndValue(Val, Loc, PFS) ||
-      ParseOptionalCommaAlignment(Alignment))
-    return true;
+  unsigned Alignment = 0;
+  if (ParseTypeAndValue(Val, Loc, PFS)) return true;
+
+  if (EatIfPresent(lltok::comma))
+    if (ParseOptionalInfo(Alignment)) return true;
 
   if (!isa<PointerType>(Val->getType()) ||
       !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
     return Error(Loc, "load operand must be a pointer to a first class type");
-  
+
   Inst = new LoadInst(Val, "", isVolatile, Alignment);
   return false;
 }
@@ -3219,20 +3502,22 @@ bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
 bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
                           bool isVolatile) {
   Value *Val, *Ptr; LocTy Loc, PtrLoc;
-  unsigned Alignment;
+  unsigned Alignment = 0;
   if (ParseTypeAndValue(Val, Loc, PFS) ||
       ParseToken(lltok::comma, "expected ',' after store operand") ||
-      ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
-      ParseOptionalCommaAlignment(Alignment))
+      ParseTypeAndValue(Ptr, PtrLoc, PFS))
     return true;
-  
+
+  if (EatIfPresent(lltok::comma))
+    if (ParseOptionalInfo(Alignment)) return true;
+
   if (!isa<PointerType>(Ptr->getType()))
     return Error(PtrLoc, "store operand must be a pointer");
   if (!Val->getType()->isFirstClassType())
     return Error(Loc, "store operand must be a first class value");
   if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
     return Error(Loc, "stored value and pointer type do not match");
-  
+
   Inst = new StoreInst(Val, Ptr, isVolatile, Alignment);
   return false;
 }
@@ -3247,7 +3532,7 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::comma, "expected ',' after getresult operand") ||
       ParseUInt32(Element, EltLoc))
     return true;
-  
+
   if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
     return Error(ValLoc, "getresult inst requires an aggregate operand");
   if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
@@ -3257,26 +3542,35 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
 }
 
 /// ParseGetElementPtr
-///   ::= 'getelementptr' TypeAndValue (',' TypeAndValue)*
+///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
 bool LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Ptr, *Val; LocTy Loc, EltLoc;
+
+  bool InBounds = EatIfPresent(lltok::kw_inbounds);
+
   if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
-  
+
   if (!isa<PointerType>(Ptr->getType()))
     return Error(Loc, "base of getelementptr must be a pointer");
-  
+
   SmallVector<Value*, 16> Indices;
   while (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::NamedOrCustomMD)
+      break;
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
     if (!isa<IntegerType>(Val->getType()))
       return Error(EltLoc, "getelementptr index must be an integer");
     Indices.push_back(Val);
   }
-  
+  if (Lex.getKind() == lltok::NamedOrCustomMD)
+    if (ParseOptionalCustomMetadata()) return true;
+
   if (!GetElementPtrInst::getIndexedType(Ptr->getType(),
                                          Indices.begin(), Indices.end()))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end());
+  if (InBounds)
+    cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
   return false;
 }
 
@@ -3309,10 +3603,10 @@ bool LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseTypeAndValue(Val1, Loc1, PFS) ||
       ParseIndexList(Indices))
     return true;
-  
+
   if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
     return Error(Loc0, "extractvalue operand must be array or struct");
-  
+
   if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                         Indices.end()))
     return Error(Loc0, "invalid indices for insertvalue");
@@ -3332,14 +3626,28 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts) {
   assert(Lex.getKind() == lltok::lbrace);
   Lex.Lex();
   do {
-    Value *V;
+    Value *V = 0;
     if (Lex.getKind() == lltok::kw_null) {
       Lex.Lex();
       V = 0;
     } else {
-      Constant *C;
-      if (ParseGlobalTypeAndValue(C)) return true;
-      V = C;
+      PATypeHolder Ty(Type::getVoidTy(Context));
+      if (ParseType(Ty)) return true;
+      if (Lex.getKind() == lltok::Metadata) {
+        Lex.Lex();
+        MetadataBase *Node = 0;
+        if (!ParseMDNode(Node))
+          V = Node;
+        else {
+          MetadataBase *MDS = 0;
+          if (ParseMDString(MDS)) return true;
+          V = MDS;
+        }
+      } else {
+        Constant *C;
+        if (ParseGlobalValue(Ty, C)) return true;
+        V = C;
+      }
     }
     Elts.push_back(V);
   } while (EatIfPresent(lltok::comma));
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 6659620e6c93..97bf2f309f6d 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -28,6 +28,7 @@ namespace llvm {
   class Instruction;
   class Constant;
   class GlobalValue;
+  class MetadataBase;
   class MDString;
   class MDNode;
   struct ValID;
@@ -45,7 +46,9 @@ namespace llvm {
     std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
     std::vector<PATypeHolder> NumberedTypes;
     /// MetadataCache - This map keeps track of parsed metadata constants.
-    std::map<unsigned, Constant *> MetadataCache;
+    std::map<unsigned, MetadataBase *> MetadataCache;
+    std::map<unsigned, std::pair<MetadataBase *, LocTy> > ForwardRefMDNodes;
+    SmallVector<std::pair<unsigned, MDNode *>, 2> MDsOnInst;
     struct UpRefRecord {
       /// Loc - This is the location of the upref.
       LocTy Loc;
@@ -74,7 +77,7 @@ namespace llvm {
     std::vector<GlobalValue*> NumberedVals;
   public:
     LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
-      Context(m->getContext()), Lex(F, SM, Err), M(m) {}
+      Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m) {}
     bool Run();
 
     LLVMContext& getContext() { return Context; }
@@ -123,9 +126,10 @@ namespace llvm {
       bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
     }
     bool ParseOptionalVisibility(unsigned &Visibility);
-    bool ParseOptionalCallingConv(unsigned &CC);
+    bool ParseOptionalCallingConv(CallingConv::ID &CC);
     bool ParseOptionalAlignment(unsigned &Alignment);
-    bool ParseOptionalCommaAlignment(unsigned &Alignment);
+    bool ParseOptionalCustomMetadata();
+    bool ParseOptionalInfo(unsigned &Alignment);
     bool ParseIndexList(SmallVectorImpl<unsigned> &Indices);
 
     // Top-Level Entities
@@ -140,11 +144,15 @@ namespace llvm {
     bool ParseDefine();
 
     bool ParseGlobalType(bool &IsConstant);
+    bool ParseUnnamedGlobal();
     bool ParseNamedGlobal();
     bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
                      bool HasLinkage, unsigned Visibility);
     bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
     bool ParseStandaloneMetadata();
+    bool ParseNamedMetadata();
+    bool ParseMDString(MetadataBase *&S);
+    bool ParseMDNode(MetadataBase *&N);
 
     // Type Parsing.
     bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index cff89f8e472f..b3c59ee9d360 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -36,8 +36,9 @@ namespace lltok {
     kw_declare, kw_define,
     kw_global,  kw_constant,
 
-    kw_private, kw_internal, kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr,
-    kw_appending, kw_dllimport, kw_dllexport, kw_common,kw_available_externally,
+    kw_private, kw_linker_private, kw_internal, kw_linkonce, kw_linkonce_odr,
+    kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common,
+    kw_available_externally,
     kw_default, kw_hidden, kw_protected,
     kw_extern_weak,
     kw_external, kw_thread_local,
@@ -50,6 +51,10 @@ namespace lltok {
     kw_deplibs,
     kw_datalayout,
     kw_volatile,
+    kw_nuw,
+    kw_nsw,
+    kw_exact,
+    kw_inbounds,
     kw_align,
     kw_addrspace,
     kw_section,
@@ -57,7 +62,9 @@ namespace lltok {
     kw_module,
     kw_asm,
     kw_sideeffect,
+    kw_msasm,
     kw_gc,
+    kw_dbg,
     kw_c,
 
     kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
@@ -77,6 +84,7 @@ namespace lltok {
     kw_readnone,
     kw_readonly,
 
+    kw_inlinehint,
     kw_noinline,
     kw_alwaysinline,
     kw_optsize,
@@ -84,6 +92,7 @@ namespace lltok {
     kw_sspreq,
     kw_noredzone,
     kw_noimplicitfloat,
+    kw_naked,
 
     kw_type,
     kw_opaque,
@@ -96,7 +105,7 @@ namespace lltok {
     kw_add,  kw_fadd, kw_sub,  kw_fsub, kw_mul,  kw_fmul,
     kw_udiv, kw_sdiv, kw_fdiv,
     kw_urem, kw_srem, kw_frem, kw_shl,  kw_lshr, kw_ashr,
-    kw_and,  kw_or,   kw_xor,  kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp,
+    kw_and,  kw_or,   kw_xor,  kw_icmp, kw_fcmp,
 
     kw_phi, kw_call,
     kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
@@ -119,6 +128,7 @@ namespace lltok {
     GlobalVar,         // @foo @"foo"
     LocalVar,          // %foo %"foo"
     StringConstant,    // "foo"
+    NamedOrCustomMD,   // !foo
 
     // Metadata valued tokens.
     Metadata,          // !"foo" !{i8 42}
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index d66c13d39c09..331a23323b51 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -21,6 +21,24 @@
 #include <cstring>
 using namespace llvm;
 
+Module *llvm::ParseAssembly(MemoryBuffer *F,
+                            Module *M,
+                            SMDiagnostic &Err,
+                            LLVMContext &Context) {
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(F, SMLoc());
+
+  // If we are parsing into an existing module, do it.
+  if (M)
+    return LLParser(F, SM, Err, M).Run() ? 0 : M;
+
+  // Otherwise create a new module.
+  OwningPtr<Module> M2(new Module(F->getBufferIdentifier(), Context));
+  if (LLParser(F, SM, Err, M2.get()).Run())
+    return 0;
+  return M2.take();
+}
+
 Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
                                 LLVMContext &Context) {
   std::string ErrorStr;
@@ -31,13 +49,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
     return 0;
   }
 
-  SourceMgr SM;
-  SM.AddNewSourceBuffer(F, SMLoc());
-  
-  OwningPtr<Module> M(new Module(Filename, Context));
-  if (LLParser(F, SM, Err, M.get()).Run())
-    return 0;
-  return M.take();
+  return ParseAssembly(F, 0, Err, Context);
 }
 
 Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
@@ -45,17 +57,6 @@ Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
   MemoryBuffer *F =
     MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString),
                                "<string>");
-  
-  SourceMgr SM;
-  SM.AddNewSourceBuffer(F, SMLoc());
 
-  // If we are parsing into an existing module, do it.
-  if (M)
-    return LLParser(F, SM, Err, M).Run() ? 0 : M;
-
-  // Otherwise create a new module.
-  OwningPtr<Module> M2(new Module("<string>", Context));
-  if (LLParser(F, SM, Err, M2.get()).Run())
-    return 0;
-  return M2.take();
+  return ParseAssembly(F, M, Err, Context);
 }
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index e5b8f7c7685a..f513d41ce3b4 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -34,12 +34,12 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
   return 0;
 }
 
-int LLVMParseBitcodeInContext(LLVMMemoryBufferRef MemBuf,
-                              LLVMContextRef ContextRef,
+int LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                              LLVMMemoryBufferRef MemBuf,
                               LLVMModuleRef *OutModule, char **OutMessage) {
   std::string Message;
   
-  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),  
+  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
                                      &Message));
   if (!*OutModule) {
     if (OutMessage)
@@ -70,13 +70,13 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
   return 0;
 }
 
-int LLVMGetBitcodeModuleProviderInContext(LLVMMemoryBufferRef MemBuf,
-                                          LLVMContextRef ContextRef,
+int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                          LLVMMemoryBufferRef MemBuf,
                                           LLVMModuleProviderRef *OutMP,
                                           char **OutMessage) {
   std::string Message;
   
-  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), 
+  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef),
                                          &Message));
   if (!*OutMP) {
     if (OutMessage)
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 5943de2f8121..4eb12c69eb6e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -16,9 +16,11 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/MDNode.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/AutoUpgrade.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -32,7 +34,8 @@ void BitcodeReader::FreeState() {
   Buffer = 0;
   std::vector<PATypeHolder>().swap(TypeList);
   ValueList.clear();
-  
+  MDValueList.clear();
+
   std::vector<AttrListPtr>().swap(MAttributes);
   std::vector<BasicBlock*>().swap(FunctionBBs);
   std::vector<Function*>().swap(FunctionsWithBodies);
@@ -50,7 +53,7 @@ static bool ConvertToString(SmallVector<uint64_t, 64> &Record, unsigned Idx,
                             StrTy &Result) {
   if (Idx > Record.size())
     return true;
-  
+
   for (unsigned i = Idx, e = Record.size(); i != e; ++i)
     Result += (char)Record[i];
   return false;
@@ -59,19 +62,20 @@ static bool ConvertToString(SmallVector<uint64_t, 64> &Record, unsigned Idx,
 static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
   switch (Val) {
   default: // Map unknown/new linkages to external
-  case 0: return GlobalValue::ExternalLinkage;
-  case 1: return GlobalValue::WeakAnyLinkage;
-  case 2: return GlobalValue::AppendingLinkage;
-  case 3: return GlobalValue::InternalLinkage;
-  case 4: return GlobalValue::LinkOnceAnyLinkage;
-  case 5: return GlobalValue::DLLImportLinkage;
-  case 6: return GlobalValue::DLLExportLinkage;
-  case 7: return GlobalValue::ExternalWeakLinkage;
-  case 8: return GlobalValue::CommonLinkage;
-  case 9: return GlobalValue::PrivateLinkage;
+  case 0:  return GlobalValue::ExternalLinkage;
+  case 1:  return GlobalValue::WeakAnyLinkage;
+  case 2:  return GlobalValue::AppendingLinkage;
+  case 3:  return GlobalValue::InternalLinkage;
+  case 4:  return GlobalValue::LinkOnceAnyLinkage;
+  case 5:  return GlobalValue::DLLImportLinkage;
+  case 6:  return GlobalValue::DLLExportLinkage;
+  case 7:  return GlobalValue::ExternalWeakLinkage;
+  case 8:  return GlobalValue::CommonLinkage;
+  case 9:  return GlobalValue::PrivateLinkage;
   case 10: return GlobalValue::WeakODRLinkage;
   case 11: return GlobalValue::LinkOnceODRLinkage;
   case 12: return GlobalValue::AvailableExternallyLinkage;
+  case 13: return GlobalValue::LinkerPrivateLinkage;
   }
 }
 
@@ -137,19 +141,19 @@ namespace {
     void *operator new(size_t s) {
       return User::operator new(s, 1);
     }
-    explicit ConstantPlaceHolder(const Type *Ty)
+    explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context)
       : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
-      Op<0>() = UndefValue::get(Type::Int32Ty);
+      Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
     }
-    
+
     /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
     static inline bool classof(const ConstantPlaceHolder *) { return true; }
     static bool classof(const Value *V) {
-      return isa<ConstantExpr>(V) && 
+      return isa<ConstantExpr>(V) &&
              cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
     }
-    
-    
+
+
     /// Provide fast operand accessors
     //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
   };
@@ -157,7 +161,7 @@ namespace {
 
 // FIXME: can we inherit this from ConstantExpr?
 template <>
-struct OperandTraits<ConstantPlaceHolder> : FixedNumOperandTraits<1> {
+struct OperandTraits<ConstantPlaceHolder> : public FixedNumOperandTraits<1> {
 };
 }
 
@@ -167,16 +171,16 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
     push_back(V);
     return;
   }
-  
+
   if (Idx >= size())
     resize(Idx+1);
-  
+
   WeakVH &OldV = ValuePtrs[Idx];
   if (OldV == 0) {
     OldV = V;
     return;
   }
-  
+
   // Handle constants and non-constants (e.g. instrs) differently for
   // efficiency.
   if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
@@ -189,7 +193,7 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
     delete PrevVal;
   }
 }
-  
+
 
 Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
                                                     const Type *Ty) {
@@ -202,7 +206,7 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
   }
 
   // Create and return a placeholder, which will later be RAUW'd.
-  Constant *C = new ConstantPlaceHolder(Ty);
+  Constant *C = new ConstantPlaceHolder(Ty, Context);
   ValuePtrs[Idx] = C;
   return C;
 }
@@ -210,15 +214,15 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
 Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
   if (Idx >= size())
     resize(Idx + 1);
-  
+
   if (Value *V = ValuePtrs[Idx]) {
     assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
     return V;
   }
-  
+
   // No type specified, must be invalid reference.
   if (Ty == 0) return 0;
-  
+
   // Create and return a placeholder, which will later be RAUW'd.
   Value *V = new Argument(Ty);
   ValuePtrs[Idx] = V;
@@ -233,30 +237,30 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
 /// uses and rewrite all the place holders at once for any constant that uses
 /// a placeholder.
 void BitcodeReaderValueList::ResolveConstantForwardRefs() {
-  // Sort the values by-pointer so that they are efficient to look up with a 
+  // Sort the values by-pointer so that they are efficient to look up with a
   // binary search.
   std::sort(ResolveConstants.begin(), ResolveConstants.end());
-  
+
   SmallVector<Constant*, 64> NewOps;
-  
+
   while (!ResolveConstants.empty()) {
     Value *RealVal = operator[](ResolveConstants.back().second);
     Constant *Placeholder = ResolveConstants.back().first;
     ResolveConstants.pop_back();
-    
+
     // Loop over all users of the placeholder, updating them to reference the
     // new value.  If they reference more than one placeholder, update them all
     // at once.
     while (!Placeholder->use_empty()) {
       Value::use_iterator UI = Placeholder->use_begin();
-      
+
       // If the using object isn't uniqued, just update the operands.  This
       // handles instructions and initializers for global variables.
       if (!isa<Constant>(*UI) || isa<GlobalValue>(*UI)) {
         UI.getUse().set(RealVal);
         continue;
       }
-      
+
       // Otherwise, we have a constant that uses the placeholder.  Replace that
       // constant with a new constant that has *all* placeholder uses updated.
       Constant *UserC = cast<Constant>(*UI);
@@ -271,8 +275,8 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
           NewOp = RealVal;
         } else {
           // Otherwise, look up the placeholder in ResolveConstants.
-          ResolveConstantsTy::iterator It = 
-            std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(), 
+          ResolveConstantsTy::iterator It =
+            std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
                              std::pair<Constant*, unsigned>(cast<Constant>(*I),
                                                             0));
           assert(It != ResolveConstants.end() && It->first == *I);
@@ -285,10 +289,11 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
       // Make the new constant.
       Constant *NewC;
       if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
-        NewC = ConstantArray::get(UserCA->getType(), &NewOps[0], NewOps.size());
+        NewC = ConstantArray::get(UserCA->getType(), &NewOps[0],
+                                        NewOps.size());
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
-        NewC = ConstantStruct::get(&NewOps[0], NewOps.size(),
-                                   UserCS->getType()->isPacked());
+        NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
+                                         UserCS->getType()->isPacked());
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(&NewOps[0], NewOps.size());
       } else {
@@ -296,29 +301,67 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
         NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0],
                                                           NewOps.size());
       }
-      
+
       UserC->replaceAllUsesWith(NewC);
       UserC->destroyConstant();
       NewOps.clear();
     }
-    
+
     // Update all ValueHandles, they should be the only users at this point.
     Placeholder->replaceAllUsesWith(RealVal);
     delete Placeholder;
   }
 }
 
+void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = MDValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  Value *PrevVal = OldV;
+  OldV->replaceAllUsesWith(V);
+  delete PrevVal;
+  // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
+  // value for Idx.
+  MDValuePtrs[Idx] = V;
+}
+
+Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = MDValuePtrs[Idx]) {
+    assert(V->getType()->isMetadataTy() && "Type mismatch in value table!");
+    return V;
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Value *V = new Argument(Type::getMetadataTy(Context));
+  MDValuePtrs[Idx] = V;
+  return V;
+}
 
 const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) {
   // If the TypeID is in range, return it.
   if (ID < TypeList.size())
     return TypeList[ID].get();
   if (!isTypeTable) return 0;
-  
+
   // The type table allows forward references.  Push as many Opaque types as
   // needed to get up to ID.
   while (TypeList.size() <= ID)
-    TypeList.push_back(OpaqueType::get());
+    TypeList.push_back(OpaqueType::get(Context));
   return TypeList.back().get();
 }
 
@@ -329,14 +372,14 @@ const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) {
 bool BitcodeReader::ParseAttributeBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
     return Error("Malformed block record");
-  
+
   if (!MAttributes.empty())
     return Error("Multiple PARAMATTR blocks found!");
-  
+
   SmallVector<uint64_t, 64> Record;
-  
+
   SmallVector<AttributeWithIndex, 8> Attrs;
-  
+
   // Read all the records.
   while (1) {
     unsigned Code = Stream.ReadCode();
@@ -345,7 +388,7 @@ bool BitcodeReader::ParseAttributeBlock() {
         return Error("Error at end of PARAMATTR block");
       return false;
     }
-    
+
     if (Code == bitc::ENTER_SUBBLOCK) {
       // No known subblocks, always skip them.
       Stream.ReadSubBlockID();
@@ -353,12 +396,12 @@ bool BitcodeReader::ParseAttributeBlock() {
         return Error("Malformed block record");
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     Record.clear();
     switch (Stream.ReadRecord(Code, Record)) {
@@ -397,14 +440,14 @@ bool BitcodeReader::ParseAttributeBlock() {
 
       unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
                               Attribute::ReadOnly|Attribute::ReadNone);
-      
+
       if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
           (RetAttribute & OldRetAttrs) != 0) {
         if (FnAttribute == Attribute::None) { // add a slot so they get added.
           Record.push_back(~0U);
           Record.push_back(0);
         }
-        
+
         FnAttribute  |= RetAttribute & OldRetAttrs;
         RetAttribute &= ~OldRetAttrs;
       }
@@ -432,7 +475,7 @@ bool BitcodeReader::ParseAttributeBlock() {
 bool BitcodeReader::ParseTypeTable() {
   if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID))
     return Error("Malformed block record");
-  
+
   if (!TypeList.empty())
     return Error("Multiple TYPE_BLOCKs found!");
 
@@ -449,7 +492,7 @@ bool BitcodeReader::ParseTypeTable() {
         return Error("Error at end of type table block");
       return false;
     }
-    
+
     if (Code == bitc::ENTER_SUBBLOCK) {
       // No known subblocks, always skip them.
       Stream.ReadSubBlockID();
@@ -457,12 +500,12 @@ bool BitcodeReader::ParseTypeTable() {
         return Error("Malformed block record");
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     Record.clear();
     const Type *ResultTy = 0;
@@ -478,46 +521,47 @@ bool BitcodeReader::ParseTypeTable() {
       TypeList.reserve(Record[0]);
       continue;
     case bitc::TYPE_CODE_VOID:      // VOID
-      ResultTy = Type::VoidTy;
+      ResultTy = Type::getVoidTy(Context);
       break;
     case bitc::TYPE_CODE_FLOAT:     // FLOAT
-      ResultTy = Type::FloatTy;
+      ResultTy = Type::getFloatTy(Context);
       break;
     case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
-      ResultTy = Type::DoubleTy;
+      ResultTy = Type::getDoubleTy(Context);
       break;
     case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
-      ResultTy = Type::X86_FP80Ty;
+      ResultTy = Type::getX86_FP80Ty(Context);
       break;
     case bitc::TYPE_CODE_FP128:     // FP128
-      ResultTy = Type::FP128Ty;
+      ResultTy = Type::getFP128Ty(Context);
       break;
     case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
-      ResultTy = Type::PPC_FP128Ty;
+      ResultTy = Type::getPPC_FP128Ty(Context);
       break;
     case bitc::TYPE_CODE_LABEL:     // LABEL
-      ResultTy = Type::LabelTy;
+      ResultTy = Type::getLabelTy(Context);
       break;
     case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
       ResultTy = 0;
       break;
     case bitc::TYPE_CODE_METADATA:  // METADATA
-      ResultTy = Type::MetadataTy;
+      ResultTy = Type::getMetadataTy(Context);
       break;
     case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
       if (Record.size() < 1)
         return Error("Invalid Integer type record");
-      
-      ResultTy = IntegerType::get(Record[0]);
+
+      ResultTy = IntegerType::get(Context, Record[0]);
       break;
-    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or 
+    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
                                     //          [pointee type, address space]
       if (Record.size() < 1)
         return Error("Invalid POINTER type record");
       unsigned AddressSpace = 0;
       if (Record.size() == 2)
         AddressSpace = Record[1];
-      ResultTy = PointerType::get(getTypeByID(Record[0], true), AddressSpace);
+      ResultTy = PointerType::get(getTypeByID(Record[0], true),
+                                        AddressSpace);
       break;
     }
     case bitc::TYPE_CODE_FUNCTION: {
@@ -528,7 +572,7 @@ bool BitcodeReader::ParseTypeTable() {
       std::vector<const Type*> ArgTys;
       for (unsigned i = 3, e = Record.size(); i != e; ++i)
         ArgTys.push_back(getTypeByID(Record[i], true));
-      
+
       ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys,
                                    Record[0]);
       break;
@@ -539,7 +583,7 @@ bool BitcodeReader::ParseTypeTable() {
       std::vector<const Type*> EltTys;
       for (unsigned i = 1, e = Record.size(); i != e; ++i)
         EltTys.push_back(getTypeByID(Record[i], true));
-      ResultTy = StructType::get(EltTys, Record[0]);
+      ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
@@ -553,10 +597,10 @@ bool BitcodeReader::ParseTypeTable() {
       ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]);
       break;
     }
-    
+
     if (NumRecords == TypeList.size()) {
       // If this is a new type slot, just append it.
-      TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get());
+      TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get(Context));
       ++NumRecords;
     } else if (ResultTy == 0) {
       // Otherwise, this was forward referenced, so an opaque type was created,
@@ -568,14 +612,14 @@ bool BitcodeReader::ParseTypeTable() {
       // Resolve the opaque type to the real type now.
       assert(NumRecords < TypeList.size() && "Typelist imbalance");
       const OpaqueType *OldTy = cast<OpaqueType>(TypeList[NumRecords++].get());
-     
+
       // Don't directly push the new type on the Tab. Instead we want to replace
       // the opaque type we previously inserted with the new concrete value. The
       // refinement from the abstract (opaque) type to the new type causes all
       // uses of the abstract type to use the concrete type (NewTy). This will
       // also cause the opaque type to be deleted.
       const_cast<OpaqueType*>(OldTy)->refineAbstractTypeTo(ResultTy);
-      
+
       // This should have replaced the old opaque type with the new type in the
       // value table... or with a preexisting type that was already in the
       // system.  Let's just make sure it did.
@@ -589,9 +633,9 @@ bool BitcodeReader::ParseTypeTable() {
 bool BitcodeReader::ParseTypeSymbolTable() {
   if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID))
     return Error("Malformed block record");
-  
+
   SmallVector<uint64_t, 64> Record;
-  
+
   // Read all the records for this type table.
   std::string TypeName;
   while (1) {
@@ -601,7 +645,7 @@ bool BitcodeReader::ParseTypeSymbolTable() {
         return Error("Error at end of type symbol table block");
       return false;
     }
-    
+
     if (Code == bitc::ENTER_SUBBLOCK) {
       // No known subblocks, always skip them.
       Stream.ReadSubBlockID();
@@ -609,12 +653,12 @@ bool BitcodeReader::ParseTypeSymbolTable() {
         return Error("Malformed block record");
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     Record.clear();
     switch (Stream.ReadRecord(Code, Record)) {
@@ -639,7 +683,7 @@ bool BitcodeReader::ParseValueSymbolTable() {
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
-  
+
   // Read all the records for this value table.
   SmallString<128> ValueName;
   while (1) {
@@ -648,7 +692,7 @@ bool BitcodeReader::ParseValueSymbolTable() {
       if (Stream.ReadBlockEnd())
         return Error("Error at end of value symbol table block");
       return false;
-    }    
+    }
     if (Code == bitc::ENTER_SUBBLOCK) {
       // No known subblocks, always skip them.
       Stream.ReadSubBlockID();
@@ -656,12 +700,12 @@ bool BitcodeReader::ParseValueSymbolTable() {
         return Error("Malformed block record");
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     Record.clear();
     switch (Stream.ReadRecord(Code, Record)) {
@@ -674,8 +718,8 @@ bool BitcodeReader::ParseValueSymbolTable() {
       if (ValueID >= ValueList.size())
         return Error("Invalid Value ID in VST_ENTRY record");
       Value *V = ValueList[ValueID];
-      
-      V->setName(&ValueName[0], ValueName.size());
+
+      V->setName(StringRef(ValueName.data(), ValueName.size()));
       ValueName.clear();
       break;
     }
@@ -685,8 +729,8 @@ bool BitcodeReader::ParseValueSymbolTable() {
       BasicBlock *BB = getBasicBlock(Record[0]);
       if (BB == 0)
         return Error("Invalid BB ID in VST_BBENTRY record");
-      
-      BB->setName(&ValueName[0], ValueName.size());
+
+      BB->setName(StringRef(ValueName.data(), ValueName.size()));
       ValueName.clear();
       break;
     }
@@ -694,12 +738,121 @@ bool BitcodeReader::ParseValueSymbolTable() {
   }
 }
 
+bool BitcodeReader::ParseMetadata() {
+  unsigned NextValueNo = MDValueList.size();
+
+  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::METADATA_NAME: {
+      // Read named of the named metadata.
+      unsigned NameLength = Record.size();
+      SmallString<8> Name;
+      Name.resize(NameLength);
+      for (unsigned i = 0; i != NameLength; ++i)
+        Name[i] = Record[i];
+      Record.clear();
+      Code = Stream.ReadCode();
+
+      // METADATA_NAME is always followed by METADATA_NAMED_NODE.
+      if (Stream.ReadRecord(Code, Record) != bitc::METADATA_NAMED_NODE)
+        assert ( 0 && "Inavlid Named Metadata record");
+
+      // Read named metadata elements.
+      unsigned Size = Record.size();
+      SmallVector<MetadataBase*, 8> Elts;
+      for (unsigned i = 0; i != Size; ++i) {
+        Value *MD = MDValueList.getValueFwdRef(Record[i]);
+        if (MetadataBase *B = dyn_cast<MetadataBase>(MD))
+        Elts.push_back(B);
+      }
+      Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(),
+                                     Elts.size(), TheModule);
+      MDValueList.AssignValue(V, NextValueNo++);
+      break;
+    }
+    case bitc::METADATA_NODE: {
+      if (Record.empty() || Record.size() % 2 == 1)
+        return Error("Invalid METADATA_NODE record");
+
+      unsigned Size = Record.size();
+      SmallVector<Value*, 8> Elts;
+      for (unsigned i = 0; i != Size; i += 2) {
+        const Type *Ty = getTypeByID(Record[i], false);
+        if (Ty->isMetadataTy())
+          Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+        else if (Ty != Type::getVoidTy(Context))
+          Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
+        else
+          Elts.push_back(NULL);
+      }
+      Value *V = MDNode::get(Context, &Elts[0], Elts.size());
+      MDValueList.AssignValue(V, NextValueNo++);
+      break;
+    }
+    case bitc::METADATA_STRING: {
+      unsigned MDStringLength = Record.size();
+      SmallString<8> String;
+      String.resize(MDStringLength);
+      for (unsigned i = 0; i != MDStringLength; ++i)
+        String[i] = Record[i];
+      Value *V = MDString::get(Context,
+                               StringRef(String.data(), String.size()));
+      MDValueList.AssignValue(V, NextValueNo++);
+      break;
+    }
+    case bitc::METADATA_KIND: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty() || RecordLength < 2)
+        return Error("Invalid METADATA_KIND record");
+      SmallString<8> Name;
+      Name.resize(RecordLength-1);
+      unsigned Kind = Record[0];
+      for (unsigned i = 1; i != RecordLength; ++i)
+        Name[i-1] = Record[i];
+      MetadataContext &TheMetadata = Context.getMetadata();
+      TheMetadata.MDHandlerNames[Name.str()] = Kind;
+      break;
+    }
+    }
+  }
+}
+
 /// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
 /// the LSB for dense VBR encoding.
 static uint64_t DecodeSignRotatedValue(uint64_t V) {
   if ((V & 1) == 0)
     return V >> 1;
-  if (V != 1) 
+  if (V != 1)
     return -(V >> 1);
   // There is no such thing as -0 with integers.  "-0" really means MININT.
   return 1ULL << 63;
@@ -710,7 +863,7 @@ static uint64_t DecodeSignRotatedValue(uint64_t V) {
 bool BitcodeReader::ResolveGlobalAndAliasInits() {
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
-  
+
   GlobalInitWorklist.swap(GlobalInits);
   AliasInitWorklist.swap(AliasInits);
 
@@ -725,7 +878,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() {
       else
         return Error("Global variable initializer is not a constant!");
     }
-    GlobalInitWorklist.pop_back(); 
+    GlobalInitWorklist.pop_back();
   }
 
   while (!AliasInitWorklist.empty()) {
@@ -738,26 +891,25 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() {
       else
         return Error("Alias initializer is not a constant!");
     }
-    AliasInitWorklist.pop_back(); 
+    AliasInitWorklist.pop_back();
   }
   return false;
 }
 
-
 bool BitcodeReader::ParseConstants() {
   if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
-  
+
   // Read all the records for this value table.
-  const Type *CurTy = Type::Int32Ty;
+  const Type *CurTy = Type::getInt32Ty(Context);
   unsigned NextCstNo = ValueList.size();
   while (1) {
     unsigned Code = Stream.ReadCode();
     if (Code == bitc::END_BLOCK)
       break;
-    
+
     if (Code == bitc::ENTER_SUBBLOCK) {
       // No known subblocks, always skip them.
       Stream.ReadSubBlockID();
@@ -765,16 +917,17 @@ bool BitcodeReader::ParseConstants() {
         return Error("Malformed block record");
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     Record.clear();
     Value *V = 0;
-    switch (Stream.ReadRecord(Code, Record)) {
+    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    switch (BitCode) {
     default:  // Default behavior: unknown constant
     case bitc::CST_CODE_UNDEF:     // UNDEF
       V = UndefValue::get(CurTy);
@@ -797,45 +950,46 @@ bool BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
       if (!isa<IntegerType>(CurTy) || Record.empty())
         return Error("Invalid WIDE_INTEGER record");
-      
+
       unsigned NumWords = Record.size();
       SmallVector<uint64_t, 8> Words;
       Words.resize(NumWords);
       for (unsigned i = 0; i != NumWords; ++i)
         Words[i] = DecodeSignRotatedValue(Record[i]);
-      V = ConstantInt::get(APInt(cast<IntegerType>(CurTy)->getBitWidth(),
-                                 NumWords, &Words[0]));
+      V = ConstantInt::get(Context,
+                           APInt(cast<IntegerType>(CurTy)->getBitWidth(),
+                           NumWords, &Words[0]));
       break;
     }
     case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
       if (Record.empty())
         return Error("Invalid FLOAT record");
-      if (CurTy == Type::FloatTy)
-        V = ConstantFP::get(APFloat(APInt(32, (uint32_t)Record[0])));
-      else if (CurTy == Type::DoubleTy)
-        V = ConstantFP::get(APFloat(APInt(64, Record[0])));
-      else if (CurTy == Type::X86_FP80Ty) {
+      if (CurTy->isFloatTy())
+        V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
+      else if (CurTy->isDoubleTy())
+        V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
+      else if (CurTy->isX86_FP80Ty()) {
         // Bits are not stored the same way as a normal i80 APInt, compensate.
         uint64_t Rearrange[2];
         Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
         Rearrange[1] = Record[0] >> 48;
-        V = ConstantFP::get(APFloat(APInt(80, 2, Rearrange)));
-      } else if (CurTy == Type::FP128Ty)
-        V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0]), true));
-      else if (CurTy == Type::PPC_FP128Ty)
-        V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0])));
+        V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange)));
+      } else if (CurTy->isFP128Ty())
+        V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true));
+      else if (CurTy->isPPC_FP128Ty())
+        V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0])));
       else
         V = UndefValue::get(CurTy);
       break;
     }
-      
+
     case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
       if (Record.empty())
         return Error("Invalid CST_AGGREGATE record");
-      
+
       unsigned Size = Record.size();
       std::vector<Constant*> Elts;
-      
+
       if (const StructType *STy = dyn_cast<StructType>(CurTy)) {
         for (unsigned i = 0; i != Size; ++i)
           Elts.push_back(ValueList.getConstantFwdRef(Record[i],
@@ -862,7 +1016,7 @@ bool BitcodeReader::ParseConstants() {
 
       const ArrayType *ATy = cast<ArrayType>(CurTy);
       const Type *EltTy = ATy->getElementType();
-      
+
       unsigned Size = Record.size();
       std::vector<Constant*> Elts;
       for (unsigned i = 0; i != Size; ++i)
@@ -873,10 +1027,10 @@ bool BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
       if (Record.empty())
         return Error("Invalid CST_AGGREGATE record");
-      
+
       const ArrayType *ATy = cast<ArrayType>(CurTy);
       const Type *EltTy = ATy->getElementType();
-      
+
       unsigned Size = Record.size();
       std::vector<Constant*> Elts;
       for (unsigned i = 0; i != Size; ++i)
@@ -893,10 +1047,24 @@ bool BitcodeReader::ParseConstants() {
       } else {
         Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
         Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
-        V = ConstantExpr::get(Opc, LHS, RHS);
+        unsigned Flags = 0;
+        if (Record.size() >= 4) {
+          if (Opc == Instruction::Add ||
+              Opc == Instruction::Sub ||
+              Opc == Instruction::Mul) {
+            if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoSignedWrap;
+            if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+          } else if (Opc == Instruction::SDiv) {
+            if (Record[3] & (1 << bitc::SDIV_EXACT))
+              Flags |= SDivOperator::IsExact;
+          }
+        }
+        V = ConstantExpr::get(Opc, LHS, RHS, Flags);
       }
       break;
-    }  
+    }
     case bitc::CST_CODE_CE_CAST: {  // CE_CAST: [opcode, opty, opval]
       if (Record.size() < 3) return Error("Invalid CE_CAST record");
       int Opc = GetDecodedCastOpcode(Record[0]);
@@ -909,7 +1077,8 @@ bool BitcodeReader::ParseConstants() {
         V = ConstantExpr::getCast(Opc, Op, CurTy);
       }
       break;
-    }  
+    }
+    case bitc::CST_CODE_CE_INBOUNDS_GEP:
     case bitc::CST_CODE_CE_GEP: {  // CE_GEP:        [n x operands]
       if (Record.size() & 1) return Error("Invalid CE_GEP record");
       SmallVector<Constant*, 16> Elts;
@@ -918,23 +1087,28 @@ bool BitcodeReader::ParseConstants() {
         if (!ElTy) return Error("Invalid CE_GEP record");
         Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
       }
-      V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1], Elts.size()-1);
+      if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
+        V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1],
+                                                   Elts.size()-1);
+      else
+        V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1],
+                                           Elts.size()-1);
       break;
     }
     case bitc::CST_CODE_CE_SELECT:  // CE_SELECT: [opval#, opval#, opval#]
       if (Record.size() < 3) return Error("Invalid CE_SELECT record");
       V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
-                                                              Type::Int1Ty),
+                                                              Type::getInt1Ty(Context)),
                                   ValueList.getConstantFwdRef(Record[1],CurTy),
                                   ValueList.getConstantFwdRef(Record[2],CurTy));
       break;
     case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
       if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
-      const VectorType *OpTy = 
+      const VectorType *OpTy =
         dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
-      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
       V = ConstantExpr::getExtractElement(Op0, Op1);
       break;
     }
@@ -945,7 +1119,7 @@ bool BitcodeReader::ParseConstants() {
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
                                                   OpTy->getElementType());
-      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty);
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
       V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
       break;
     }
@@ -955,7 +1129,8 @@ bool BitcodeReader::ParseConstants() {
         return Error("Invalid CE_SHUFFLEVEC record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
-      const Type *ShufTy=VectorType::get(Type::Int32Ty, OpTy->getNumElements());
+      const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 OpTy->getNumElements());
       Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
       V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
       break;
@@ -967,7 +1142,8 @@ bool BitcodeReader::ParseConstants() {
         return Error("Invalid CE_SHUFVEC_EX record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
-      const Type *ShufTy=VectorType::get(Type::Int32Ty, RTy->getNumElements());
+      const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 RTy->getNumElements());
       Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
       V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
       break;
@@ -981,72 +1157,43 @@ bool BitcodeReader::ParseConstants() {
 
       if (OpTy->isFloatingPoint())
         V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
-      else if (!isa<VectorType>(OpTy))
-        V = ConstantExpr::getICmp(Record[3], Op0, Op1);
-      else if (OpTy->isFPOrFPVector())
-        V = ConstantExpr::getVFCmp(Record[3], Op0, Op1);
       else
-        V = ConstantExpr::getVICmp(Record[3], Op0, Op1);
+        V = ConstantExpr::getICmp(Record[3], Op0, Op1);
       break;
     }
     case bitc::CST_CODE_INLINEASM: {
       if (Record.size() < 2) return Error("Invalid INLINEASM record");
       std::string AsmStr, ConstrStr;
-      bool HasSideEffects = Record[0];
+      bool HasSideEffects = Record[0] & 1;
+      bool IsMsAsm = Record[0] >> 1;
       unsigned AsmStrSize = Record[1];
       if (2+AsmStrSize >= Record.size())
         return Error("Invalid INLINEASM record");
       unsigned ConstStrSize = Record[2+AsmStrSize];
       if (3+AsmStrSize+ConstStrSize > Record.size())
         return Error("Invalid INLINEASM record");
-      
+
       for (unsigned i = 0; i != AsmStrSize; ++i)
         AsmStr += (char)Record[2+i];
       for (unsigned i = 0; i != ConstStrSize; ++i)
         ConstrStr += (char)Record[3+AsmStrSize+i];
       const PointerType *PTy = cast<PointerType>(CurTy);
       V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
-                         AsmStr, ConstrStr, HasSideEffects);
-      break;
-    }
-    case bitc::CST_CODE_MDSTRING: {
-      if (Record.size() < 2) return Error("Invalid MDSTRING record");
-      unsigned MDStringLength = Record.size();
-      SmallString<8> String;
-      String.resize(MDStringLength);
-      for (unsigned i = 0; i != MDStringLength; ++i)
-        String[i] = Record[i];
-      V = MDString::get(String.c_str(), String.c_str() + MDStringLength);
-      break;
-    }
-    case bitc::CST_CODE_MDNODE: {
-      if (Record.empty() || Record.size() % 2 == 1)
-        return Error("Invalid CST_MDNODE record");
-      
-      unsigned Size = Record.size();
-      SmallVector<Value*, 8> Elts;
-      for (unsigned i = 0; i != Size; i += 2) {
-        const Type *Ty = getTypeByID(Record[i], false);
-        if (Ty != Type::VoidTy)
-          Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
-        else
-          Elts.push_back(NULL);
-      }
-      V = MDNode::get(&Elts[0], Elts.size());
+                         AsmStr, ConstrStr, HasSideEffects, IsMsAsm);
       break;
     }
     }
-    
+
     ValueList.AssignValue(V, NextCstNo);
     ++NextCstNo;
   }
-  
+
   if (NextCstNo != ValueList.size())
     return Error("Invalid constant reference!");
-  
+
   if (Stream.ReadBlockEnd())
     return Error("Error at end of constants block");
-  
+
   // Once all the constants have been read, go through and resolve forward
   // references.
   ValueList.ResolveConstantForwardRefs();
@@ -1060,18 +1207,18 @@ bool BitcodeReader::RememberAndSkipFunctionBody() {
   // Get the function we are talking about.
   if (FunctionsWithBodies.empty())
     return Error("Insufficient function protos");
-  
+
   Function *Fn = FunctionsWithBodies.back();
   FunctionsWithBodies.pop_back();
-  
+
   // Save the current stream state.
   uint64_t CurBit = Stream.GetCurrentBitNo();
   DeferredFunctionInfo[Fn] = std::make_pair(CurBit, Fn->getLinkage());
-  
+
   // Set the functions linkage to GhostLinkage so we know it is lazily
   // deserialized.
   Fn->setLinkage(GlobalValue::GhostLinkage);
-  
+
   // Skip over the function block for now.
   if (Stream.SkipBlock())
     return Error("Malformed block record");
@@ -1082,13 +1229,13 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
   // Reject multiple MODULE_BLOCK's in a single bitstream.
   if (TheModule)
     return Error("Multiple MODULE_BLOCKs in same stream");
-  
+
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return Error("Malformed block record");
 
   // Otherwise, create the module.
   TheModule = new Module(ModuleID, Context);
-  
+
   SmallVector<uint64_t, 64> Record;
   std::vector<std::string> SectionTable;
   std::vector<std::string> GCTable;
@@ -1122,7 +1269,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
       std::vector<Function*>().swap(FunctionsWithBodies);
       return false;
     }
-    
+
     if (Code == bitc::ENTER_SUBBLOCK) {
       switch (Stream.ReadSubBlockID()) {
       default:  // Skip unknown content.
@@ -1153,6 +1300,10 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
         if (ParseConstants() || ResolveGlobalAndAliasInits())
           return true;
         break;
+      case bitc::METADATA_BLOCK_ID:
+        if (ParseMetadata())
+          return true;
+        break;
       case bitc::FUNCTION_BLOCK_ID:
         // If this is the first function body we've seen, reverse the
         // FunctionsWithBodies list.
@@ -1160,19 +1311,19 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
           std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
           HasReversedFunctionsWithBodies = true;
         }
-        
+
         if (RememberAndSkipFunctionBody())
           return true;
         break;
       }
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     switch (Stream.ReadRecord(Code, Record)) {
     default: break;  // Default behavior, ignore unknown content.
@@ -1235,7 +1386,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
         return Error("Global not a pointer type!");
       unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
       Ty = cast<PointerType>(Ty)->getElementType();
-      
+
       bool isConstant = Record[1];
       GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]);
       unsigned Alignment = (1 << Record[4]) >> 1;
@@ -1253,16 +1404,16 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
         isThreadLocal = Record[7];
 
       GlobalVariable *NewGV =
-        new GlobalVariable(Ty, isConstant, Linkage, 0, "", TheModule, 
+        new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
                            isThreadLocal, AddressSpace);
       NewGV->setAlignment(Alignment);
       if (!Section.empty())
         NewGV->setSection(Section);
       NewGV->setVisibility(Visibility);
       NewGV->setThreadLocal(isThreadLocal);
-      
+
       ValueList.push_back(NewGV);
-      
+
       // Remember which value to use for the global initializer.
       if (unsigned InitID = Record[2])
         GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
@@ -1284,11 +1435,11 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
       Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
                                         "", TheModule);
 
-      Func->setCallingConv(Record[1]);
+      Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
       bool isProto = Record[2];
       Func->setLinkage(GetDecodedLinkage(Record[3]));
       Func->setAttributes(getAttributes(Record[4]));
-      
+
       Func->setAlignment((1 << Record[5]) >> 1);
       if (Record[6]) {
         if (Record[6]-1 >= SectionTable.size())
@@ -1302,7 +1453,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
         Func->setGC(GCTable[Record[8]-1].c_str());
       }
       ValueList.push_back(Func);
-      
+
       // If this is a function with a body, remember the prototype we are
       // creating now, so that we can match up the body with them later.
       if (!isProto)
@@ -1317,7 +1468,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
       const Type *Ty = getTypeByID(Record[0]);
       if (!isa<PointerType>(Ty))
         return Error("Function not a pointer type!");
-      
+
       GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
                                            "", 0, TheModule);
       // Old bitcode files didn't have visibility field.
@@ -1337,28 +1488,28 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
     }
     Record.clear();
   }
-  
+
   return Error("Premature end of bitstream");
 }
 
 bool BitcodeReader::ParseBitcode() {
   TheModule = 0;
-  
+
   if (Buffer->getBufferSize() & 3)
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
-  
+
   unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
   unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-  
+
   // If we have a wrapper header, parse it and ignore the non-bc file contents.
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, BufEnd))
     if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
       return Error("Invalid bitcode wrapper header");
-  
+
   StreamFile.init(BufPtr, BufEnd);
   Stream.init(StreamFile);
-  
+
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||
       Stream.Read(8) != 'C' ||
@@ -1367,17 +1518,17 @@ bool BitcodeReader::ParseBitcode() {
       Stream.Read(4) != 0xE ||
       Stream.Read(4) != 0xD)
     return Error("Invalid bitcode signature");
-  
+
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
-    
+
     if (Code != bitc::ENTER_SUBBLOCK)
       return Error("Invalid record at top-level");
-    
+
     unsigned BlockID = Stream.ReadSubBlockID();
-    
+
     // We only know the MODULE subblock ID.
     switch (BlockID) {
     case bitc::BLOCKINFO_BLOCK_ID:
@@ -1394,22 +1545,61 @@ bool BitcodeReader::ParseBitcode() {
       break;
     }
   }
-  
+
   return false;
 }
 
+/// ParseMetadataAttachment - Parse metadata attachments.
+bool BitcodeReader::ParseMetadataAttachment() {
+  if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+    return Error("Malformed block record");
+
+  MetadataContext &TheMetadata = Context.getMetadata();
+  SmallVector<uint64_t, 64> Record;
+  while(1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      break;
+    }
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+    // Read a metadata attachment record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::METADATA_ATTACHMENT: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty() || (RecordLength - 1) % 2 == 1)
+        return Error ("Invalid METADATA_ATTACHMENT reader!");
+      Instruction *Inst = InstructionList[Record[0]];
+      for (unsigned i = 1; i != RecordLength; i = i+2) {
+        unsigned Kind = Record[i];
+        Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
+        TheMetadata.addMD(Kind, cast<MDNode>(Node), Inst);
+      }
+      break;
+    }
+    }
+  }
+  return false;
+}
 
 /// ParseFunctionBody - Lazily parse the specified function body block.
 bool BitcodeReader::ParseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
     return Error("Malformed block record");
-  
+
   unsigned ModuleValueListSize = ValueList.size();
-  
+
   // Add all the function arguments to the value table.
   for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
     ValueList.push_back(I);
-  
+
   unsigned NextValueNo = ValueList.size();
   BasicBlock *CurBB = 0;
   unsigned CurBBNo = 0;
@@ -1423,7 +1613,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         return Error("Error at end of function block");
       break;
     }
-    
+
     if (Code == bitc::ENTER_SUBBLOCK) {
       switch (Stream.ReadSubBlockID()) {
       default:  // Skip unknown content.
@@ -1437,19 +1627,23 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (ParseValueSymbolTable()) return true;
         break;
+      case bitc::METADATA_ATTACHMENT_ID:
+        if (ParseMetadataAttachment()) return true;
+        break;
       }
       continue;
     }
-    
+
     if (Code == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       continue;
     }
-    
+
     // Read a record.
     Record.clear();
     Instruction *I = 0;
-    switch (Stream.ReadRecord(Code, Record)) {
+    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    switch (BitCode) {
     default: // Default behavior: reject
       return Error("Unknown instruction");
     case bitc::FUNC_CODE_DECLAREBLOCKS:     // DECLAREBLOCKS: [nblocks]
@@ -1458,21 +1652,35 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       // Create all the basic blocks for the function.
       FunctionBBs.resize(Record[0]);
       for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
-        FunctionBBs[i] = BasicBlock::Create("", F);
+        FunctionBBs[i] = BasicBlock::Create(Context, "", F);
       CurBB = FunctionBBs[0];
       continue;
-      
+
     case bitc::FUNC_CODE_INST_BINOP: {    // BINOP: [opval, ty, opval, opcode]
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
           getValue(Record, OpNum, LHS->getType(), RHS) ||
-          OpNum+1 != Record.size())
+          OpNum+1 > Record.size())
         return Error("Invalid BINOP record");
-      
-      int Opc = GetDecodedBinaryOpcode(Record[OpNum], LHS->getType());
+
+      int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
       if (Opc == -1) return Error("Invalid BINOP record");
       I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+      InstructionList.push_back(I);
+      if (OpNum < Record.size()) {
+        if (Opc == Instruction::Add ||
+            Opc == Instruction::Sub ||
+            Opc == Instruction::Mul) {
+          if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
+          if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
+        } else if (Opc == Instruction::SDiv) {
+          if (Record[3] & (1 << bitc::SDIV_EXACT))
+            cast<BinaryOperator>(I)->setIsExact(true);
+        }
+      }
       break;
     }
     case bitc::FUNC_CODE_INST_CAST: {    // CAST: [opval, opty, destty, castopc]
@@ -1481,14 +1689,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           OpNum+2 != Record.size())
         return Error("Invalid CAST record");
-      
+
       const Type *ResTy = getTypeByID(Record[OpNum]);
       int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
       if (Opc == -1 || ResTy == 0)
         return Error("Invalid CAST record");
       I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+      InstructionList.push_back(I);
       break;
     }
+    case bitc::FUNC_CODE_INST_INBOUNDS_GEP:
     case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
       unsigned OpNum = 0;
       Value *BasePtr;
@@ -1504,9 +1714,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       }
 
       I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end());
+      InstructionList.push_back(I);
+      if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
+        cast<GetElementPtrInst>(I)->setIsInBounds(true);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_EXTRACTVAL: {
                                        // EXTRACTVAL: [opty, opval, n x indices]
       unsigned OpNum = 0;
@@ -1525,9 +1738,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 
       I = ExtractValueInst::Create(Agg,
                                    EXTRACTVALIdx.begin(), EXTRACTVALIdx.end());
+      InstructionList.push_back(I);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_INSERTVAL: {
                            // INSERTVAL: [opty, opval, opty, opval, n x indices]
       unsigned OpNum = 0;
@@ -1549,9 +1763,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 
       I = InsertValueInst::Create(Agg, Val,
                                   INSERTVALIdx.begin(), INSERTVALIdx.end());
+      InstructionList.push_back(I);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
       // obsolete form of select
       // handles select i1 ... in old bitcode
@@ -1559,13 +1774,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       Value *TrueVal, *FalseVal, *Cond;
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
           getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
-          getValue(Record, OpNum, Type::Int1Ty, Cond))
+          getValue(Record, OpNum, Type::getInt1Ty(Context), Cond))
         return Error("Invalid SELECT record");
-      
+
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
       // new form of select
       // handles select i1 or select [N x i1]
@@ -1580,40 +1796,43 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (const VectorType* vector_type =
           dyn_cast<const VectorType>(Cond->getType())) {
         // expect <n x i1>
-        if (vector_type->getElementType() != Type::Int1Ty) 
+        if (vector_type->getElementType() != Type::getInt1Ty(Context))
           return Error("Invalid SELECT condition type");
       } else {
         // expect i1
-        if (Cond->getType() != Type::Int1Ty) 
+        if (Cond->getType() != Type::getInt1Ty(Context))
           return Error("Invalid SELECT condition type");
-      } 
-      
+      }
+
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
       unsigned OpNum = 0;
       Value *Vec, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
-          getValue(Record, OpNum, Type::Int32Ty, Idx))
+          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
         return Error("Invalid EXTRACTELT record");
-      I = new ExtractElementInst(Vec, Idx);
+      I = ExtractElementInst::Create(Vec, Idx);
+      InstructionList.push_back(I);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
       unsigned OpNum = 0;
       Value *Vec, *Elt, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
-          getValue(Record, OpNum, 
+          getValue(Record, OpNum,
                    cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
-          getValue(Record, OpNum, Type::Int32Ty, Idx))
+          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
         return Error("Invalid INSERTELT record");
       I = InsertElementInst::Create(Vec, Elt, Idx);
+      InstructionList.push_back(I);
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
       unsigned OpNum = 0;
       Value *Vec1, *Vec2, *Mask;
@@ -1624,44 +1843,32 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
         return Error("Invalid SHUFFLEVEC record");
       I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+      InstructionList.push_back(I);
       break;
     }
 
-    case bitc::FUNC_CODE_INST_CMP: { // CMP: [opty, opval, opval, pred]
-      // VFCmp/VICmp
-      // or old form of ICmp/FCmp returning bool
-      unsigned OpNum = 0;
-      Value *LHS, *RHS;
-      if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
-          getValue(Record, OpNum, LHS->getType(), RHS) ||
-          OpNum+1 != Record.size())
-        return Error("Invalid CMP record");
-      
-      if (LHS->getType()->isFloatingPoint())
-        I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
-      else if (!isa<VectorType>(LHS->getType()))
-        I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
-      else if (LHS->getType()->isFPOrFPVector())
-        I = new VFCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
-      else
-        I = new VICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
-      break;
-    }
+    case bitc::FUNC_CODE_INST_CMP:   // CMP: [opty, opval, opval, pred]
+      // Old form of ICmp/FCmp returning bool
+      // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
+      // both legal on vectors but had different behaviour.
     case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
-      // Fcmp/ICmp returning bool or vector of bool
+      // FCmp/ICmp returning bool or vector of bool
+
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
           getValue(Record, OpNum, LHS->getType(), RHS) ||
           OpNum+1 != Record.size())
-        return Error("Invalid CMP2 record");
-      
+        return Error("Invalid CMP record");
+
       if (LHS->getType()->isFPOrFPVector())
         I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
-      else 
+      else
         I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+      InstructionList.push_back(I);
       break;
     }
+
     case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n]
       if (Record.size() != 2)
         return Error("Invalid GETRESULT record");
@@ -1670,14 +1877,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       getValueTypePair(Record, OpNum, NextValueNo, Op);
       unsigned Index = Record[1];
       I = ExtractValueInst::Create(Op, Index);
+      InstructionList.push_back(I);
       break;
     }
-    
+
     case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
       {
         unsigned Size = Record.size();
         if (Size == 0) {
-          I = ReturnInst::Create();
+          I = ReturnInst::Create(Context);
+          InstructionList.push_back(I);
           break;
         }
 
@@ -1697,15 +1906,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           Value *RV = UndefValue::get(ReturnType);
           for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
             I = InsertValueInst::Create(RV, Vs[i], i, "mrv");
+            InstructionList.push_back(I);
             CurBB->getInstList().push_back(I);
             ValueList.AssignValue(I, NextValueNo++);
             RV = I;
           }
-          I = ReturnInst::Create(RV);
+          I = ReturnInst::Create(Context, RV);
+          InstructionList.push_back(I);
           break;
         }
 
-        I = ReturnInst::Create(Vs[0]);
+        I = ReturnInst::Create(Context, Vs[0]);
+        InstructionList.push_back(I);
         break;
       }
     case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
@@ -1715,14 +1927,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (TrueDest == 0)
         return Error("Invalid BR record");
 
-      if (Record.size() == 1)
+      if (Record.size() == 1) {
         I = BranchInst::Create(TrueDest);
+        InstructionList.push_back(I);
+      }
       else {
         BasicBlock *FalseDest = getBasicBlock(Record[1]);
-        Value *Cond = getFnValueByID(Record[2], Type::Int1Ty);
+        Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context));
         if (FalseDest == 0 || Cond == 0)
           return Error("Invalid BR record");
         I = BranchInst::Create(TrueDest, FalseDest, Cond);
+        InstructionList.push_back(I);
       }
       break;
     }
@@ -1736,8 +1951,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         return Error("Invalid SWITCH record");
       unsigned NumCases = (Record.size()-3)/2;
       SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+      InstructionList.push_back(SI);
       for (unsigned i = 0, e = NumCases; i != e; ++i) {
-        ConstantInt *CaseVal = 
+        ConstantInt *CaseVal =
           dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
         BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
         if (CaseVal == 0 || DestBB == 0) {
@@ -1749,7 +1965,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = SI;
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_INVOKE: {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4) return Error("Invalid INVOKE record");
@@ -1757,12 +1973,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned CCInfo = Record[1];
       BasicBlock *NormalBB = getBasicBlock(Record[2]);
       BasicBlock *UnwindBB = getBasicBlock(Record[3]);
-      
+
       unsigned OpNum = 4;
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
         return Error("Invalid INVOKE record");
-      
+
       const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
       const FunctionType *FTy = !CalleeTy ? 0 :
         dyn_cast<FunctionType>(CalleeTy->getElementType());
@@ -1771,13 +1987,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
           Record.size() < OpNum+FTy->getNumParams())
         return Error("Invalid INVOKE record");
-      
+
       SmallVector<Value*, 16> Ops;
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
         Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
         if (Ops.back() == 0) return Error("Invalid INVOKE record");
       }
-      
+
       if (!FTy->isVarArg()) {
         if (Record.size() != OpNum)
           return Error("Invalid INVOKE record");
@@ -1790,28 +2006,33 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           Ops.push_back(Op);
         }
       }
-      
+
       I = InvokeInst::Create(Callee, NormalBB, UnwindBB,
                              Ops.begin(), Ops.end());
-      cast<InvokeInst>(I)->setCallingConv(CCInfo);
+      InstructionList.push_back(I);
+      cast<InvokeInst>(I)->setCallingConv(
+        static_cast<CallingConv::ID>(CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
       break;
     }
     case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
-      I = new UnwindInst();
+      I = new UnwindInst(Context);
+      InstructionList.push_back(I);
       break;
     case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
-      I = new UnreachableInst();
+      I = new UnreachableInst(Context);
+      InstructionList.push_back(I);
       break;
     case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
       if (Record.size() < 1 || ((Record.size()-1)&1))
         return Error("Invalid PHI record");
       const Type *Ty = getTypeByID(Record[0]);
       if (!Ty) return Error("Invalid PHI record");
-      
+
       PHINode *PN = PHINode::Create(Ty);
+      InstructionList.push_back(PN);
       PN->reserveOperandSpace((Record.size()-1)/2);
-      
+
       for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
         Value *V = getFnValueByID(Record[1+i], Ty);
         BasicBlock *BB = getBasicBlock(Record[2+i]);
@@ -1821,16 +2042,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = PN;
       break;
     }
-      
+
     case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align]
       if (Record.size() < 3)
         return Error("Invalid MALLOC record");
       const PointerType *Ty =
         dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
-      Value *Size = getFnValueByID(Record[1], Type::Int32Ty);
+      Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
       unsigned Align = Record[2];
       if (!Ty || !Size) return Error("Invalid MALLOC record");
       I = new MallocInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+      InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty]
@@ -1840,6 +2062,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           OpNum != Record.size())
         return Error("Invalid FREE record");
       I = new FreeInst(Op);
+      InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align]
@@ -1847,10 +2070,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         return Error("Invalid ALLOCA record");
       const PointerType *Ty =
         dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
-      Value *Size = getFnValueByID(Record[1], Type::Int32Ty);
+      Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
       unsigned Align = Record[2];
       if (!Ty || !Size) return Error("Invalid ALLOCA record");
       I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+      InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
@@ -1859,20 +2083,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           OpNum+2 != Record.size())
         return Error("Invalid LOAD record");
-      
+
       I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          getValue(Record, OpNum, 
+          getValue(Record, OpNum,
                     cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
           OpNum+2 != Record.size())
         return Error("Invalid STORE record");
-      
+
       I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol]
@@ -1880,32 +2106,34 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Val) ||
-          getValue(Record, OpNum, PointerType::getUnqual(Val->getType()), Ptr)||
+          getValue(Record, OpNum,
+                   PointerType::getUnqual(Val->getType()), Ptr)||
           OpNum+2 != Record.size())
         return Error("Invalid STORE record");
-      
+
       I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
         return Error("Invalid CALL record");
-      
+
       AttrListPtr PAL = getAttributes(Record[0]);
       unsigned CCInfo = Record[1];
-      
+
       unsigned OpNum = 2;
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
         return Error("Invalid CALL record");
-      
+
       const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
       const FunctionType *FTy = 0;
       if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
       if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
         return Error("Invalid CALL record");
-      
+
       SmallVector<Value*, 16> Args;
       // Read the fixed params.
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
@@ -1915,7 +2143,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
         if (Args.back() == 0) return Error("Invalid CALL record");
       }
-      
+
       // Read type/value pairs for varargs params.
       if (!FTy->isVarArg()) {
         if (OpNum != Record.size())
@@ -1928,9 +2156,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           Args.push_back(Op);
         }
       }
-      
+
       I = CallInst::Create(Callee, Args.begin(), Args.end());
-      cast<CallInst>(I)->setCallingConv(CCInfo>>1);
+      InstructionList.push_back(I);
+      cast<CallInst>(I)->setCallingConv(
+        static_cast<CallingConv::ID>(CCInfo>>1));
       cast<CallInst>(I)->setTailCall(CCInfo & 1);
       cast<CallInst>(I)->setAttributes(PAL);
       break;
@@ -1944,6 +2174,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (!OpTy || !Op || !ResTy)
         return Error("Invalid VAARG record");
       I = new VAArgInst(Op, ResTy);
+      InstructionList.push_back(I);
       break;
     }
     }
@@ -1955,18 +2186,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       return Error("Invalid instruction with no BB");
     }
     CurBB->getInstList().push_back(I);
-    
+
     // If this was a terminator instruction, move to the next block.
     if (isa<TerminatorInst>(I)) {
       ++CurBBNo;
       CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
     }
-    
+
     // Non-void values get registered in the value table for future use.
-    if (I && I->getType() != Type::VoidTy)
+    if (I && I->getType() != Type::getVoidTy(Context))
       ValueList.AssignValue(I, NextValueNo++);
   }
-  
+
   // Check the function list for unresolved values.
   if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
     if (A->getParent() == 0) {
@@ -1980,11 +2211,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       return Error("Never resolved value found in function!");
     }
   }
-  
+
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   std::vector<BasicBlock*>().swap(FunctionBBs);
-  
+
   return false;
 }
 
@@ -1996,16 +2227,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) {
   // If it already is material, ignore the request.
   if (!F->hasNotBeenReadFromBitcode()) return false;
-  
-  DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator DFII = 
+
+  DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator DFII =
     DeferredFunctionInfo.find(F);
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
-  
+
   // Move the bit stream to the saved position of the deferred function body and
   // restore the real linkage type for the function.
   Stream.JumpToBit(DFII->second.first);
   F->setLinkage((GlobalValue::LinkageTypes)DFII->second.second);
-  
+
   if (ParseFunctionBody(F)) {
     if (ErrInfo) *ErrInfo = ErrorString;
     return true;
@@ -2022,7 +2253,7 @@ bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) {
       }
     }
   }
-  
+
   return false;
 }
 
@@ -2030,9 +2261,9 @@ void BitcodeReader::dematerializeFunction(Function *F) {
   // If this function isn't materialized, or if it is a proto, this is a noop.
   if (F->hasNotBeenReadFromBitcode() || F->isDeclaration())
     return;
-  
+
   assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
-  
+
   // Just forget the function body, we can remat it later.
   F->deleteBody();
   F->setLinkage(GlobalValue::GhostLinkage);
@@ -2048,9 +2279,9 @@ Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
         materializeFunction(F, ErrInfo))
       return 0;
 
-  // Upgrade any intrinsic calls that slipped through (should not happen!) and 
-  // delete the old functions to clean up. We can't do this unless the entire 
-  // module is materialized because there could always be another function body 
+  // Upgrade any intrinsic calls that slipped through (should not happen!) and
+  // delete the old functions to clean up. We can't do this unless the entire
+  // module is materialized because there could always be another function body
   // with calls to the old function.
   for (std::vector<std::pair<Function*, Function*> >::iterator I =
        UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
@@ -2066,7 +2297,10 @@ Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
     }
   }
   std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
-  
+
+  // Check debug info intrinsics.
+  CheckDebugInfoIntrinsics(TheModule);
+
   return TheModule;
 }
 
@@ -2096,7 +2330,7 @@ ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer,
   if (R->ParseBitcode()) {
     if (ErrMsg)
       *ErrMsg = R->getErrorString();
-    
+
     // Don't let the BitcodeReader dtor delete 'Buffer'.
     R->releaseMemoryBuffer();
     delete R;
@@ -2107,25 +2341,25 @@ ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer,
 
 /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
 /// If an error occurs, return null and fill in *ErrMsg if non-null.
-Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, 
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
                                std::string *ErrMsg){
   BitcodeReader *R;
-  R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, Context, 
+  R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, Context,
                                                            ErrMsg));
   if (!R) return 0;
-  
+
   // Read in the entire module.
   Module *M = R->materializeModule(ErrMsg);
 
   // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
   // there was an error.
   R->releaseMemoryBuffer();
-  
+
   // If there was no error, tell ModuleProvider not to delete it when its dtor
   // is run.
   if (M)
     M = R->releaseModule(ErrMsg);
-   
+
   delete R;
   return M;
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 662631bce950..eefc7bdc28a8 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -44,8 +44,9 @@ class BitcodeReaderValueList {
   /// number that holds the resolved value.
   typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
   ResolveConstantsTy ResolveConstants;
+  LLVMContext& Context;
 public:
-  BitcodeReaderValueList() {}
+  BitcodeReaderValueList(LLVMContext& C) : Context(C) {}
   ~BitcodeReaderValueList() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
   }
@@ -85,6 +86,41 @@ public:
   void ResolveConstantForwardRefs();
 };
 
+
+//===----------------------------------------------------------------------===//
+//                          BitcodeReaderMDValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderMDValueList {
+  std::vector<WeakVH> MDValuePtrs;
+  
+  LLVMContext& Context;
+public:
+  BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
+
+  // vector compatibility methods
+  unsigned size() const       { return MDValuePtrs.size(); }
+  void resize(unsigned N)     { MDValuePtrs.resize(N); }
+  void push_back(Value *V)    { MDValuePtrs.push_back(V);  }
+  void clear()                { MDValuePtrs.clear();  }
+  Value *back() const         { return MDValuePtrs.back(); }
+  void pop_back()             { MDValuePtrs.pop_back(); }
+  bool empty() const          { return MDValuePtrs.empty(); }
+  
+  Value *operator[](unsigned i) const {
+    assert(i < MDValuePtrs.size());
+    return MDValuePtrs[i];
+  }
+  
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    MDValuePtrs.resize(N);
+  }
+
+  Value *getValueFwdRef(unsigned Idx);
+  void AssignValue(Value *V, unsigned Idx);
+};
+
 class BitcodeReader : public ModuleProvider {
   LLVMContext& Context;
   MemoryBuffer *Buffer;
@@ -95,6 +131,9 @@ class BitcodeReader : public ModuleProvider {
   
   std::vector<PATypeHolder> TypeList;
   BitcodeReaderValueList ValueList;
+  BitcodeReaderMDValueList MDValueList;
+  SmallVector<Instruction *, 64> InstructionList;
+
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
   
@@ -126,7 +165,7 @@ class BitcodeReader : public ModuleProvider {
   DenseMap<Function*, std::pair<uint64_t, unsigned> > DeferredFunctionInfo;
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext& C)
-      : Context(C), Buffer(buffer), ErrorString(0) {
+    : Context(C), Buffer(buffer), ErrorString(0), ValueList(C), MDValueList(C) {
     HasReversedFunctionsWithBodies = false;
   }
   ~BitcodeReader() {
@@ -159,7 +198,10 @@ public:
 private:
   const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
   Value *getFnValueByID(unsigned ID, const Type *Ty) {
-    return ValueList.getValueFwdRef(ID, Ty);
+    if (Ty == Type::getMetadataTy(Context))
+      return MDValueList.getValueFwdRef(ID);
+    else
+      return ValueList.getValueFwdRef(ID, Ty);
   }
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return 0; // Invalid ID
@@ -209,6 +251,8 @@ private:
   bool RememberAndSkipFunctionBody();
   bool ParseFunctionBody(Function *F);
   bool ResolveGlobalAndAliasInits();
+  bool ParseMetadata();
+  bool ParseMetadataAttachment();
 };
   
 } // End llvm namespace
diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp
index 06da6ce72721..67607efae08a 100644
--- a/lib/Bitcode/Reader/Deserialize.cpp
+++ b/lib/Bitcode/Reader/Deserialize.cpp
@@ -12,11 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/Deserialize.h"
-
-#ifdef DEBUG_BACKPATCH
-#include "llvm/Support/Streams.h"
-#endif
-
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 Deserializer::Deserializer(BitstreamReader& stream)
@@ -357,7 +353,7 @@ void Deserializer::RegisterPtr(const SerializedPtrID& PtrId,
   assert (!HasFinalPtr(E) && "Pointer already registered.");
 
 #ifdef DEBUG_BACKPATCH
-  llvm::cerr << "RegisterPtr: " << PtrId << " => " << Ptr << "\n";
+  errs() << "RegisterPtr: " << PtrId << " => " << Ptr << "\n";
 #endif 
   
   SetPtr(E,Ptr);
@@ -377,8 +373,8 @@ void Deserializer::ReadUIntPtr(uintptr_t& PtrRef,
     PtrRef = GetFinalPtr(E);
 
 #ifdef DEBUG_BACKPATCH
-    llvm::cerr << "ReadUintPtr: " << PtrId
-               << " <-- " <<  (void*) GetFinalPtr(E) << '\n';
+    errs() << "ReadUintPtr: " << PtrId
+           << " <-- " <<  (void*) GetFinalPtr(E) << '\n';
 #endif    
   }
   else {
@@ -386,7 +382,7 @@ void Deserializer::ReadUIntPtr(uintptr_t& PtrRef,
             "Client forbids backpatching for this pointer.");
     
 #ifdef DEBUG_BACKPATCH
-    llvm::cerr << "ReadUintPtr: " << PtrId << " (NO PTR YET)\n";
+    errs() << "ReadUintPtr: " << PtrId << " (NO PTR YET)\n";
 #endif
     
     // Register backpatch.  Check the freelist for a BPNode.
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 8834964b040c..7ed651b77e2e 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -9,43 +9,31 @@
 
 #include "llvm-c/BitWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include <fstream>
-
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 
 /*===-- Operations on modules ---------------------------------------------===*/
 
 int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
-  std::ofstream OS(Path, std::ios_base::out|std::ios::trunc|std::ios::binary);
-  
-  if (!OS.fail())
-    WriteBitcodeToFile(unwrap(M), OS);
+  std::string ErrorInfo;
+  raw_fd_ostream OS(Path, ErrorInfo,
+                    raw_fd_ostream::F_Binary);
   
-  if (OS.fail())
+  if (!ErrorInfo.empty())
     return -1;
   
+  WriteBitcodeToFile(unwrap(M), OS);
   return 0;
 }
 
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4)
 #include <ext/stdio_filebuf.h>
 
-// FIXME: Control this with configure? Provide some portable abstraction in
-// libSystem? As is, the user will just get a linker error if they use this on 
-// non-GCC. Some C++ stdlibs even have ofstream::ofstream(int fd).
 int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
-  __gnu_cxx::stdio_filebuf<char> Buffer(FileHandle, std::ios_base::out |
-                                                    std::ios::trunc |
-                                                    std::ios::binary);
-  std::ostream OS(&Buffer);
-  
-  if (!OS.fail())
-    WriteBitcodeToFile(unwrap(M), OS);
-  
-  if (OS.fail())
-    return -1;
+  raw_fd_ostream OS(FileHandle, false);
   
+  WriteBitcodeToFile(unwrap(M), OS);
   return 0;
 }
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 6dcddedef1ca..12a1f5ea5dc2 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -19,12 +19,13 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
-#include "llvm/MDNode.h"
+#include "llvm/Metadata.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Program.h"
 using namespace llvm;
@@ -33,22 +34,23 @@ using namespace llvm;
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
   CurVersion = 0,
-  
+
   // VALUE_SYMTAB_BLOCK abbrev id's.
   VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   VST_ENTRY_7_ABBREV,
   VST_ENTRY_6_ABBREV,
   VST_BBENTRY_6_ABBREV,
-  
+
   // CONSTANTS_BLOCK abbrev id's.
   CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   CONSTANTS_INTEGER_ABBREV,
   CONSTANTS_CE_CAST_Abbrev,
   CONSTANTS_NULL_Abbrev,
-  
+
   // FUNCTION_BLOCK abbrev id's.
   FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   FUNCTION_INST_BINOP_ABBREV,
+  FUNCTION_INST_BINOP_FLAGS_ABBREV,
   FUNCTION_INST_CAST_ABBREV,
   FUNCTION_INST_RET_VOID_ABBREV,
   FUNCTION_INST_RET_VAL_ABBREV,
@@ -58,7 +60,7 @@ enum {
 
 static unsigned GetEncodedCastOpcode(unsigned Opcode) {
   switch (Opcode) {
-  default: assert(0 && "Unknown cast instruction!");
+  default: llvm_unreachable("Unknown cast instruction!");
   case Instruction::Trunc   : return bitc::CAST_TRUNC;
   case Instruction::ZExt    : return bitc::CAST_ZEXT;
   case Instruction::SExt    : return bitc::CAST_SEXT;
@@ -76,7 +78,7 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) {
 
 static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
   switch (Opcode) {
-  default: assert(0 && "Unknown binary instruction!");
+  default: llvm_unreachable("Unknown binary instruction!");
   case Instruction::Add:
   case Instruction::FAdd: return bitc::BINOP_ADD;
   case Instruction::Sub:
@@ -100,24 +102,24 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
 
 
 
-static void WriteStringRecord(unsigned Code, const std::string &Str, 
+static void WriteStringRecord(unsigned Code, const std::string &Str,
                               unsigned AbbrevToUse, BitstreamWriter &Stream) {
   SmallVector<unsigned, 64> Vals;
-  
+
   // Code: [strchar x N]
   for (unsigned i = 0, e = Str.size(); i != e; ++i)
     Vals.push_back(Str[i]);
-    
+
   // Emit the finished record.
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
 }
 
 // Emit information about parameter attributes.
-static void WriteAttributeTable(const ValueEnumerator &VE, 
+static void WriteAttributeTable(const ValueEnumerator &VE,
                                 BitstreamWriter &Stream) {
   const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
   if (Attrs.empty()) return;
-  
+
   Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
@@ -138,21 +140,21 @@ static void WriteAttributeTable(const ValueEnumerator &VE,
 
       Record.push_back(FauxAttr);
     }
-    
+
     Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
     Record.clear();
   }
-  
+
   Stream.ExitBlock();
 }
 
 /// WriteTypeTable - Write out the type table for a module.
 static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   const ValueEnumerator::TypeList &TypeList = VE.getTypes();
-  
+
   Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
-  
+
   // Abbrev for TYPE_CODE_POINTER.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
@@ -160,7 +162,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
   unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
-  
+
   // Abbrev for TYPE_CODE_FUNCTION.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
@@ -170,7 +172,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
-  
+
   // Abbrev for TYPE_CODE_STRUCT.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT));
@@ -179,7 +181,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
- 
+
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -187,20 +189,20 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
-  
+
   // Emit an entry count so the reader can reserve space.
   TypeVals.push_back(TypeList.size());
   Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
   TypeVals.clear();
-  
+
   // Loop over all of the types, emitting each in turn.
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
     const Type *T = TypeList[i].first;
     int AbbrevToUse = 0;
     unsigned Code = 0;
-    
+
     switch (T->getTypeID()) {
-    default: assert(0 && "Unknown type!");
+    default: llvm_unreachable("Unknown type!");
     case Type::VoidTyID:   Code = bitc::TYPE_CODE_VOID;   break;
     case Type::FloatTyID:  Code = bitc::TYPE_CODE_FLOAT;  break;
     case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
@@ -272,33 +274,34 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
     TypeVals.clear();
   }
-  
+
   Stream.ExitBlock();
 }
 
 static unsigned getEncodedLinkage(const GlobalValue *GV) {
   switch (GV->getLinkage()) {
-  default: assert(0 && "Invalid linkage!");
+  default: llvm_unreachable("Invalid linkage!");
   case GlobalValue::GhostLinkage:  // Map ghost linkage onto external.
-  case GlobalValue::ExternalLinkage:     return 0;
-  case GlobalValue::WeakAnyLinkage:      return 1;
-  case GlobalValue::AppendingLinkage:    return 2;
-  case GlobalValue::InternalLinkage:     return 3;
-  case GlobalValue::LinkOnceAnyLinkage:  return 4;
-  case GlobalValue::DLLImportLinkage:    return 5;
-  case GlobalValue::DLLExportLinkage:    return 6;
-  case GlobalValue::ExternalWeakLinkage: return 7;
-  case GlobalValue::CommonLinkage:       return 8;
-  case GlobalValue::PrivateLinkage:      return 9;
-  case GlobalValue::WeakODRLinkage:      return 10;
-  case GlobalValue::LinkOnceODRLinkage:  return 11;
-  case GlobalValue::AvailableExternallyLinkage:  return 12;
+  case GlobalValue::ExternalLinkage:            return 0;
+  case GlobalValue::WeakAnyLinkage:             return 1;
+  case GlobalValue::AppendingLinkage:           return 2;
+  case GlobalValue::InternalLinkage:            return 3;
+  case GlobalValue::LinkOnceAnyLinkage:         return 4;
+  case GlobalValue::DLLImportLinkage:           return 5;
+  case GlobalValue::DLLExportLinkage:           return 6;
+  case GlobalValue::ExternalWeakLinkage:        return 7;
+  case GlobalValue::CommonLinkage:              return 8;
+  case GlobalValue::PrivateLinkage:             return 9;
+  case GlobalValue::WeakODRLinkage:             return 10;
+  case GlobalValue::LinkOnceODRLinkage:         return 11;
+  case GlobalValue::AvailableExternallyLinkage: return 12;
+  case GlobalValue::LinkerPrivateLinkage:       return 13;
   }
 }
 
 static unsigned getEncodedVisibility(const GlobalValue *GV) {
   switch (GV->getVisibility()) {
-  default: assert(0 && "Invalid visibility!");
+  default: llvm_unreachable("Invalid visibility!");
   case GlobalValue::DefaultVisibility:   return 0;
   case GlobalValue::HiddenVisibility:    return 1;
   case GlobalValue::ProtectedVisibility: return 2;
@@ -334,7 +337,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
        GV != E; ++GV) {
     MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
     MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
-    
+
     if (!GV->hasSection()) continue;
     // Give section names unique ID's.
     unsigned &Entry = SectionMap[GV->getSection()];
@@ -364,10 +367,10 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
       }
     }
   }
-  
+
   // Emit abbrev for globals, now that we know # sections and max alignment.
   unsigned SimpleGVarAbbrev = 0;
-  if (!M->global_empty()) { 
+  if (!M->global_empty()) {
     // Add an abbrev for common globals with no visibility or thread localness.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
@@ -391,14 +394,14 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     // Don't bother emitting vis + thread local.
     SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
   }
-  
+
   // Emit the global variable information.
   SmallVector<unsigned, 64> Vals;
   for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
        GV != E; ++GV) {
     unsigned AbbrevToUse = 0;
 
-    // GLOBALVAR: [type, isconst, initid, 
+    // GLOBALVAR: [type, isconst, initid,
     //             linkage, alignment, section, visibility, threadlocal]
     Vals.push_back(VE.getTypeID(GV->getType()));
     Vals.push_back(GV->isConstant());
@@ -407,14 +410,14 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(getEncodedLinkage(GV));
     Vals.push_back(Log2_32(GV->getAlignment())+1);
     Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
-    if (GV->isThreadLocal() || 
+    if (GV->isThreadLocal() ||
         GV->getVisibility() != GlobalValue::DefaultVisibility) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(GV->isThreadLocal());
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }
-    
+
     Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
     Vals.clear();
   }
@@ -432,13 +435,13 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
     Vals.push_back(getEncodedVisibility(F));
     Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
-    
+
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
     Vals.clear();
   }
-  
-  
+
+
   // Emit the alias information.
   for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
        AI != E; ++AI) {
@@ -452,20 +455,185 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   }
 }
 
+static uint64_t GetOptimizationFlags(const Value *V) {
+  uint64_t Flags = 0;
+
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(V)) {
+    if (OBO->hasNoSignedWrap())
+      Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
+    if (OBO->hasNoUnsignedWrap())
+      Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
+  } else if (const SDivOperator *Div = dyn_cast<SDivOperator>(V)) {
+    if (Div->isExact())
+      Flags |= 1 << bitc::SDIV_EXACT;
+  }
+
+  return Flags;
+}
+
+static void WriteMDNode(const MDNode *N,
+                        const ValueEnumerator &VE,
+                        BitstreamWriter &Stream,
+                        SmallVector<uint64_t, 64> &Record) {
+  for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) {
+    if (N->getElement(i)) {
+      Record.push_back(VE.getTypeID(N->getElement(i)->getType()));
+      Record.push_back(VE.getValueID(N->getElement(i)));
+    } else {
+      Record.push_back(VE.getTypeID(Type::getVoidTy(N->getContext())));
+      Record.push_back(0);
+    }
+  }
+  Stream.EmitRecord(bitc::METADATA_NODE, Record, 0);
+  Record.clear();
+}
+
+static void WriteModuleMetadata(const ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+  bool StartedMetadataBlock = false;
+  unsigned MDSAbbrev = 0;
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+
+    if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
+      if (!StartedMetadataBlock) {
+        Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+        StartedMetadataBlock = true;
+      }
+      WriteMDNode(N, VE, Stream, Record);
+    } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
+      if (!StartedMetadataBlock)  {
+        Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+        // Abbrev for METADATA_STRING.
+        BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+        Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
+        Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+        Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+        MDSAbbrev = Stream.EmitAbbrev(Abbv);
+        StartedMetadataBlock = true;
+      }
+
+      // Code: [strchar x N]
+      const char *StrBegin = MDS->begin();
+      for (unsigned i = 0, e = MDS->length(); i != e; ++i)
+        Record.push_back(StrBegin[i]);
+
+      // Emit the finished record.
+      Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
+      Record.clear();
+    } else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(Vals[i].first)) {
+      if (!StartedMetadataBlock)  {
+        Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+        StartedMetadataBlock = true;
+      }
+
+      // Write name.
+      std::string Str = NMD->getNameStr();
+      const char *StrBegin = Str.c_str();
+      for (unsigned i = 0, e = Str.length(); i != e; ++i)
+        Record.push_back(StrBegin[i]);
+      Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+      Record.clear();
+
+      // Write named metadata elements.
+      for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+        if (NMD->getElement(i))
+          Record.push_back(VE.getValueID(NMD->getElement(i)));
+        else
+          Record.push_back(0);
+      }
+      Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
+      Record.clear();
+    }
+  }
+
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
+static void WriteMetadataAttachment(const Function &F,
+                                    const ValueEnumerator &VE,
+                                    BitstreamWriter &Stream) {
+  bool StartedMetadataBlock = false;
+  SmallVector<uint64_t, 64> Record;
+
+  // Write metadata attachments
+  // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+  MetadataContext &TheMetadata = F.getContext().getMetadata();
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      const MetadataContext::MDMapTy *P = TheMetadata.getMDs(I);
+      if (!P) continue;
+      bool RecordedInstruction = false;
+      for (MetadataContext::MDMapTy::const_iterator PI = P->begin(), 
+             PE = P->end(); PI != PE; ++PI) {
+        if (MDNode *ND = dyn_cast_or_null<MDNode>(PI->second)) {
+          if (RecordedInstruction == false) {
+            Record.push_back(VE.getInstructionID(I));
+            RecordedInstruction = true;
+          }
+          Record.push_back(PI->first);
+          Record.push_back(VE.getValueID(ND));
+        }
+      }
+      if (!Record.empty()) {
+        if (!StartedMetadataBlock)  {
+          Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+        Record.clear();
+      }
+    }
+
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
+static void WriteModuleMetadataStore(const Module *M,
+                                     const ValueEnumerator &VE,
+                                     BitstreamWriter &Stream) {
+
+  bool StartedMetadataBlock = false;
+  SmallVector<uint64_t, 64> Record;
+
+  // Write metadata kinds
+  // METADATA_KIND - [n x [id, name]]
+  MetadataContext &TheMetadata = M->getContext().getMetadata();
+  const StringMap<unsigned> *Kinds = TheMetadata.getHandlerNames();
+  for (StringMap<unsigned>::const_iterator
+         I = Kinds->begin(), E = Kinds->end(); I != E; ++I) {
+    Record.push_back(I->second);
+    StringRef KName = I->first();
+    for (unsigned i = 0, e = KName.size(); i != e; ++i)
+      Record.push_back(KName[i]);
+    if (!StartedMetadataBlock)  {
+      Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+      StartedMetadataBlock = true;
+    }
+    Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
+    Record.clear();
+  }
+
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
 
 static void WriteConstants(unsigned FirstVal, unsigned LastVal,
                            const ValueEnumerator &VE,
                            BitstreamWriter &Stream, bool isGlobal) {
   if (FirstVal == LastVal) return;
-  
+
   Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
 
   unsigned AggregateAbbrev = 0;
   unsigned String8Abbrev = 0;
   unsigned CString7Abbrev = 0;
   unsigned CString6Abbrev = 0;
-  unsigned MDString8Abbrev = 0;
-  unsigned MDString6Abbrev = 0;
   // If this is a constant pool for the module, emit module-specific abbrevs.
   if (isGlobal) {
     // Abbrev for CST_CODE_AGGREGATE.
@@ -493,21 +661,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     CString6Abbrev = Stream.EmitAbbrev(Abbv);
+  }
 
-    // Abbrev for CST_CODE_MDSTRING.
-    Abbv = new BitCodeAbbrev();
-    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-    MDString8Abbrev = Stream.EmitAbbrev(Abbv);
-    // Abbrev for CST_CODE_MDSTRING.
-    Abbv = new BitCodeAbbrev();
-    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-    MDString6Abbrev = Stream.EmitAbbrev(Abbv);
-  }  
-  
   SmallVector<uint64_t, 64> Record;
 
   const ValueEnumerator::ValueList &Vals = VE.getValues();
@@ -522,16 +677,17 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
                         CONSTANTS_SETTYPE_ABBREV);
       Record.clear();
     }
-    
+
     if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
-      Record.push_back(unsigned(IA->hasSideEffects()));
-      
+      Record.push_back(unsigned(IA->hasSideEffects()) |
+                       unsigned(IA->isMsAsm()) << 1);
+
       // Add the asm string.
       const std::string &AsmStr = IA->getAsmString();
       Record.push_back(AsmStr.size());
       for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
         Record.push_back(AsmStr[i]);
-      
+
       // Add the constraint string.
       const std::string &ConstraintStr = IA->getConstraintString();
       Record.push_back(ConstraintStr.size());
@@ -558,11 +714,11 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
         Code = bitc::CST_CODE_INTEGER;
         AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
       } else {                             // Wide integers, > 64 bits in size.
-        // We have an arbitrary precision integer value to write whose 
-        // bit width is > 64. However, in canonical unsigned integer 
+        // We have an arbitrary precision integer value to write whose
+        // bit width is > 64. However, in canonical unsigned integer
         // format it is likely that the high bits are going to be zero.
         // So, we only write the number of active words.
-        unsigned NWords = IV->getValue().getActiveWords(); 
+        unsigned NWords = IV->getValue().getActiveWords();
         const uint64_t *RawWords = IV->getValue().getRawData();
         for (unsigned i = 0; i != NWords; ++i) {
           int64_t V = RawWords[i];
@@ -576,16 +732,16 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
     } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
       Code = bitc::CST_CODE_FLOAT;
       const Type *Ty = CFP->getType();
-      if (Ty == Type::FloatTy || Ty == Type::DoubleTy) {
+      if (Ty->isFloatTy() || Ty->isDoubleTy()) {
         Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
-      } else if (Ty == Type::X86_FP80Ty) {
+      } else if (Ty->isX86_FP80Ty()) {
         // api needed to prevent premature destruction
         // bits are not in the same order as a normal i80 APInt, compensate.
         APInt api = CFP->getValueAPF().bitcastToAPInt();
         const uint64_t *p = api.getRawData();
         Record.push_back((p[1] << 48) | (p[0] >> 16));
         Record.push_back(p[0] & 0xffffLL);
-      } else if (Ty == Type::FP128Ty || Ty == Type::PPC_FP128Ty) {
+      } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
         APInt api = CFP->getValueAPF().bitcastToAPInt();
         const uint64_t *p = api.getRawData();
         Record.push_back(p[0]);
@@ -610,10 +766,10 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
         unsigned char V = cast<ConstantInt>(C->getOperand(i))->getZExtValue();
         Record.push_back(V);
         isCStr7 &= (V & 128) == 0;
-        if (isCStrChar6) 
+        if (isCStrChar6)
           isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
       }
-      
+
       if (isCStrChar6)
         AbbrevToUse = CString6Abbrev;
       else if (isCStr7)
@@ -639,10 +795,15 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
           Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
           Record.push_back(VE.getValueID(C->getOperand(0)));
           Record.push_back(VE.getValueID(C->getOperand(1)));
+          uint64_t Flags = GetOptimizationFlags(CE);
+          if (Flags != 0)
+            Record.push_back(Flags);
         }
         break;
       case Instruction::GetElementPtr:
         Code = bitc::CST_CODE_CE_GEP;
+        if (cast<GEPOperator>(C)->isInBounds())
+          Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
         for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
           Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
           Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -683,45 +844,15 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
         break;
       case Instruction::ICmp:
       case Instruction::FCmp:
-      case Instruction::VICmp:
-      case Instruction::VFCmp:
-        if (isa<VectorType>(C->getOperand(0)->getType())
-            && (CE->getOpcode() == Instruction::ICmp
-                || CE->getOpcode() == Instruction::FCmp)) {
-          // compare returning vector of Int1Ty
-          assert(0 && "Unsupported constant!");
-        } else {
-          Code = bitc::CST_CODE_CE_CMP;
-        }
+        Code = bitc::CST_CODE_CE_CMP;
         Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         Record.push_back(CE->getPredicate());
         break;
       }
-    } else if (const MDString *S = dyn_cast<MDString>(C)) {
-      Code = bitc::CST_CODE_MDSTRING;
-      AbbrevToUse = MDString6Abbrev;
-      for (unsigned i = 0, e = S->size(); i != e; ++i) {
-        char V = S->begin()[i];
-        Record.push_back(V);
-
-        if (!BitCodeAbbrevOp::isChar6(V))
-          AbbrevToUse = MDString8Abbrev;
-      }
-    } else if (const MDNode *N = dyn_cast<MDNode>(C)) {
-      Code = bitc::CST_CODE_MDNODE;
-      for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) {
-        if (N->getElement(i)) {
-          Record.push_back(VE.getTypeID(N->getElement(i)->getType()));
-          Record.push_back(VE.getValueID(N->getElement(i)));
-        } else {
-          Record.push_back(VE.getTypeID(Type::VoidTy));
-          Record.push_back(0);
-        }
-      }
     } else {
-      assert(0 && "Unknown constant!");
+      llvm_unreachable("Unknown constant!");
     }
     Stream.EmitRecord(Code, Record, AbbrevToUse);
     Record.clear();
@@ -733,7 +864,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
 static void WriteModuleConstants(const ValueEnumerator &VE,
                                  BitstreamWriter &Stream) {
   const ValueEnumerator::ValueList &Vals = VE.getValues();
-  
+
   // Find the first constant to emit, which is the first non-globalvalue value.
   // We know globalvalues have been emitted by WriteModuleInfo.
   for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
@@ -753,7 +884,7 @@ static void WriteModuleConstants(const ValueEnumerator &VE,
 /// instruction ID, then it is a forward reference, and it also includes the
 /// type ID.
 static bool PushValueAndType(const Value *V, unsigned InstID,
-                             SmallVector<unsigned, 64> &Vals, 
+                             SmallVector<unsigned, 64> &Vals,
                              ValueEnumerator &VE) {
   unsigned ValID = VE.getValueID(V);
   Vals.push_back(ValID);
@@ -770,6 +901,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
                              SmallVector<unsigned, 64> &Vals) {
   unsigned Code = 0;
   unsigned AbbrevToUse = 0;
+  VE.setInstructionID(&I);
   switch (I.getOpcode()) {
   default:
     if (Instruction::isCast(I.getOpcode())) {
@@ -785,11 +917,19 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
         AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
       Vals.push_back(VE.getValueID(I.getOperand(1)));
       Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+      uint64_t Flags = GetOptimizationFlags(&I);
+      if (Flags != 0) {
+        if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV)
+          AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
+        Vals.push_back(Flags);
+      }
     }
     break;
 
   case Instruction::GetElementPtr:
     Code = bitc::FUNC_CODE_INST_GEP;
+    if (cast<GEPOperator>(&I)->isInBounds())
+      Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP;
     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
       PushValueAndType(I.getOperand(i), InstID, Vals, VE);
     break;
@@ -835,21 +975,14 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     break;
   case Instruction::ICmp:
   case Instruction::FCmp:
-  case Instruction::VICmp:
-  case Instruction::VFCmp:
-    if (I.getOpcode() == Instruction::ICmp
-        || I.getOpcode() == Instruction::FCmp) {
-      // compare returning Int1Ty or vector of Int1Ty
-      Code = bitc::FUNC_CODE_INST_CMP2;
-    } else {
-      Code = bitc::FUNC_CODE_INST_CMP;
-    }
+    // compare returning Int1Ty or vector of Int1Ty
+    Code = bitc::FUNC_CODE_INST_CMP2;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     Vals.push_back(VE.getValueID(I.getOperand(1)));
     Vals.push_back(cast<CmpInst>(I).getPredicate());
     break;
 
-  case Instruction::Ret: 
+  case Instruction::Ret:
     {
       Code = bitc::FUNC_CODE_INST_RET;
       unsigned NumOperands = I.getNumOperands();
@@ -887,13 +1020,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const PointerType *PTy = cast<PointerType>(Callee->getType());
     const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
     Code = bitc::FUNC_CODE_INST_INVOKE;
-    
+
     Vals.push_back(VE.getAttributeID(II->getAttributes()));
     Vals.push_back(II->getCallingConv());
     Vals.push_back(VE.getValueID(II->getNormalDest()));
     Vals.push_back(VE.getValueID(II->getUnwindDest()));
     PushValueAndType(Callee, InstID, Vals, VE);
-    
+
     // Emit value #'s for the fixed parameters.
     for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i+3)));  // fixed param.
@@ -913,38 +1046,38 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Code = bitc::FUNC_CODE_INST_UNREACHABLE;
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
     break;
-  
+
   case Instruction::PHI:
     Code = bitc::FUNC_CODE_INST_PHI;
     Vals.push_back(VE.getTypeID(I.getType()));
     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i)));
     break;
-    
+
   case Instruction::Malloc:
     Code = bitc::FUNC_CODE_INST_MALLOC;
     Vals.push_back(VE.getTypeID(I.getType()));
     Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
     Vals.push_back(Log2_32(cast<MallocInst>(I).getAlignment())+1);
     break;
-    
+
   case Instruction::Free:
     Code = bitc::FUNC_CODE_INST_FREE;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     break;
-    
+
   case Instruction::Alloca:
     Code = bitc::FUNC_CODE_INST_ALLOCA;
     Vals.push_back(VE.getTypeID(I.getType()));
     Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
     Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
     break;
-    
+
   case Instruction::Load:
     Code = bitc::FUNC_CODE_INST_LOAD;
     if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))  // ptr
       AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
-      
+
     Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
     Vals.push_back(cast<LoadInst>(I).isVolatile());
     break;
@@ -960,16 +1093,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
 
     Code = bitc::FUNC_CODE_INST_CALL;
-    
+
     const CallInst *CI = cast<CallInst>(&I);
     Vals.push_back(VE.getAttributeID(CI->getAttributes()));
     Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall()));
     PushValueAndType(CI->getOperand(0), InstID, Vals, VE);  // Callee
-    
+
     // Emit value #'s for the fixed parameters.
     for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i+1)));  // fixed param.
-      
+
     // Emit type/value pairs for varargs params.
     if (FTy->isVarArg()) {
       unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams();
@@ -986,7 +1119,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Vals.push_back(VE.getTypeID(I.getType())); // restype.
     break;
   }
-  
+
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
   Vals.clear();
 }
@@ -1001,27 +1134,27 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
   // FIXME: Set up the abbrev, we know how many values there are!
   // FIXME: We know if the type names can use 7-bit ascii.
   SmallVector<unsigned, 64> NameVals;
-  
+
   for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
        SI != SE; ++SI) {
-    
+
     const ValueName &Name = *SI;
-    
+
     // Figure out the encoding to use for the name.
     bool is7Bit = true;
     bool isChar6 = true;
     for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
          C != E; ++C) {
-      if (isChar6) 
+      if (isChar6)
         isChar6 = BitCodeAbbrevOp::isChar6(*C);
       if ((unsigned char)*C & 128) {
         is7Bit = false;
         break;  // don't bother scanning the rest.
       }
     }
-    
+
     unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
-    
+
     // VST_ENTRY:   [valueid, namechar x N]
     // VST_BBENTRY: [bbid, namechar x N]
     unsigned Code;
@@ -1036,12 +1169,12 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
       else if (is7Bit)
         AbbrevToUse = VST_ENTRY_7_ABBREV;
     }
-    
+
     NameVals.push_back(VE.getValueID(SI->getValue()));
     for (const char *P = Name.getKeyData(),
          *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
       NameVals.push_back((unsigned char)*P);
-    
+
     // Emit the finished record.
     Stream.EmitRecord(Code, NameVals, AbbrevToUse);
     NameVals.clear();
@@ -1050,39 +1183,40 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
 }
 
 /// WriteFunction - Emit a function body to the module stream.
-static void WriteFunction(const Function &F, ValueEnumerator &VE, 
+static void WriteFunction(const Function &F, ValueEnumerator &VE,
                           BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
   VE.incorporateFunction(F);
 
   SmallVector<unsigned, 64> Vals;
-  
+
   // Emit the number of basic blocks, so the reader can create them ahead of
   // time.
   Vals.push_back(VE.getBasicBlocks().size());
   Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
   Vals.clear();
-  
+
   // If there are function-local constants, emit them now.
   unsigned CstStart, CstEnd;
   VE.getFunctionConstantRange(CstStart, CstEnd);
   WriteConstants(CstStart, CstEnd, VE, Stream, false);
-  
-  // Keep a running idea of what the instruction ID is. 
+
+  // Keep a running idea of what the instruction ID is.
   unsigned InstID = CstEnd;
-  
+
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       WriteInstruction(*I, InstID, VE, Stream, Vals);
-      if (I->getType() != Type::VoidTy)
+      if (I->getType() != Type::getVoidTy(F.getContext()))
         ++InstID;
     }
-  
+
   // Emit names for all the instructions etc.
   WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
-    
+
+  WriteMetadataAttachment(F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
 }
@@ -1092,9 +1226,9 @@ static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
                                  const ValueEnumerator &VE,
                                  BitstreamWriter &Stream) {
   if (TST.empty()) return;
-  
+
   Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3);
-  
+
   // 7-bit fixed width VST_CODE_ENTRY strings.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
@@ -1103,14 +1237,14 @@ static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
   unsigned V7Abbrev = Stream.EmitAbbrev(Abbv);
-  
+
   SmallVector<unsigned, 64> NameVals;
-  
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
+
+  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
        TI != TE; ++TI) {
     // TST_ENTRY: [typeid, namechar x N]
     NameVals.push_back(VE.getTypeID(TI->second));
-    
+
     const std::string &Str = TI->first;
     bool is7Bit = true;
     for (unsigned i = 0, e = Str.size(); i != e; ++i) {
@@ -1118,12 +1252,12 @@ static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
       if (Str[i] & 128)
         is7Bit = false;
     }
-    
+
     // Emit the finished record.
     Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0);
     NameVals.clear();
   }
-  
+
   Stream.ExitBlock();
 }
 
@@ -1133,18 +1267,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.  Other
   // blocks can defined their abbrevs inline.
   Stream.EnterBlockInfoBlock(2);
-  
+
   { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, 
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
                                    Abbv) != VST_ENTRY_8_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
+
   { // 7-bit fixed width VST_ENTRY strings.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
@@ -1153,7 +1287,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
                                    Abbv) != VST_ENTRY_7_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // 6-bit char6 VST_ENTRY strings.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -1163,7 +1297,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
                                    Abbv) != VST_ENTRY_6_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // 6-bit char6 VST_BBENTRY strings.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -1173,11 +1307,11 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
                                    Abbv) != VST_BBENTRY_6_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
-  
-  
+
+
+
   { // SETTYPE abbrev for CONSTANTS_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
@@ -1185,18 +1319,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                               Log2_32_Ceil(VE.getTypes().size()+1)));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
                                    Abbv) != CONSTANTS_SETTYPE_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
+
   { // INTEGER abbrev for CONSTANTS_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
                                    Abbv) != CONSTANTS_INTEGER_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
+
   { // CE_CAST abbrev for CONSTANTS_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
@@ -1207,18 +1341,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
                                    Abbv) != CONSTANTS_CE_CAST_Abbrev)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // NULL abbrev for CONSTANTS_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
                                    Abbv) != CONSTANTS_NULL_Abbrev)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
+
   // FIXME: This should only use space for first class types!
- 
+
   { // INST_LOAD abbrev for FUNCTION_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
@@ -1227,7 +1361,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_LOAD_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_BINOP abbrev for FUNCTION_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -1237,7 +1371,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_BINOP_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_CAST abbrev for FUNCTION_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -1248,15 +1393,15 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // opc
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_CAST_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
+
   { // INST_RET abbrev for FUNCTION_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_RET abbrev for FUNCTION_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -1264,16 +1409,16 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
   { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
-      assert(0 && "Unexpected abbrev ordering!");
+      llvm_unreachable("Unexpected abbrev ordering!");
   }
-  
+
   Stream.ExitBlock();
 }
 
@@ -1281,44 +1426,50 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 /// WriteModule - Emit the specified module to the bitstream.
 static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
-  
+
   // Emit the version number if it is non-zero.
   if (CurVersion) {
     SmallVector<unsigned, 1> Vals;
     Vals.push_back(CurVersion);
     Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
   }
-  
+
   // Analyze the module, enumerating globals, functions, etc.
   ValueEnumerator VE(M);
 
   // Emit blockinfo, which defines the standard abbreviations etc.
   WriteBlockInfo(VE, Stream);
-  
+
   // Emit information about parameter attributes.
   WriteAttributeTable(VE, Stream);
-  
+
   // Emit information describing all of the types in the module.
   WriteTypeTable(VE, Stream);
-  
+
   // Emit top-level description of module, including target triple, inline asm,
   // descriptors for global variables, and function prototype info.
   WriteModuleInfo(M, VE, Stream);
-  
+
   // Emit constants.
   WriteModuleConstants(VE, Stream);
-  
+
+  // Emit metadata.
+  WriteModuleMetadata(VE, Stream);
+
   // Emit function bodies.
   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
     if (!I->isDeclaration())
       WriteFunction(*I, VE, Stream);
-  
+
+  // Emit metadata.
+  WriteModuleMetadataStore(M, VE, Stream);
+
   // Emit the type symbol table information.
   WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream);
-  
+
   // Emit names for globals/functions etc.
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
-  
+
   Stream.ExitBlock();
 }
 
@@ -1326,7 +1477,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
 /// header and trailer to make it compatible with the system archiver.  To do
 /// this we emit the following header, and then emit a trailer that pads the
 /// file out to be a multiple of 16 bytes.
-/// 
+///
 /// struct bc_header {
 ///   uint32_t Magic;         // 0x0B17C0DE
 ///   uint32_t Version;       // Version, currently always 0.
@@ -1343,7 +1494,7 @@ enum {
 static void EmitDarwinBCHeader(BitstreamWriter &Stream,
                                const std::string &TT) {
   unsigned CPUType = ~0U;
-  
+
   // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*.  The CPUType is a
   // magic number from /usr/include/mach/machine.h.  It is ok to reproduce the
   // specific constants here because they are implicitly part of the Darwin ABI.
@@ -1352,7 +1503,7 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
     DARWIN_CPU_TYPE_X86        = 7,
     DARWIN_CPU_TYPE_POWERPC    = 18
   };
-  
+
   if (TT.find("x86_64-") == 0)
     CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
   else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
@@ -1362,10 +1513,10 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
     CPUType = DARWIN_CPU_TYPE_POWERPC;
   else if (TT.find("powerpc64-") == 0)
     CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
-  
+
   // Traditional Bitcode starts after header.
   unsigned BCOffset = DarwinBCHeaderSize;
-  
+
   Stream.Emit(0x0B17C0DE, 32);
   Stream.Emit(0         , 32);  // Version.
   Stream.Emit(BCOffset  , 32);
@@ -1378,7 +1529,7 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
 static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
   // Update the size field in the header.
   Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
-  
+
   // If the file is not a multiple of 16 bytes, insert dummy padding.
   while (BufferSize & 15) {
     Stream.Emit(0, 8);
@@ -1389,31 +1540,21 @@ static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
-void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
-  raw_os_ostream RawOut(Out);
-  // If writing to stdout, set binary mode.
-  if (llvm::cout == Out)
-    sys::Program::ChangeStdoutToBinary();
-  WriteBitcodeToFile(M, RawOut);
-}
-
-/// WriteBitcodeToFile - Write the specified module to the specified output
-/// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
   std::vector<unsigned char> Buffer;
   BitstreamWriter Stream(Buffer);
-  
+
   Buffer.reserve(256*1024);
 
   WriteBitcodeToStream( M, Stream );
-  
+
   // If writing to stdout, set binary mode.
   if (&llvm::outs() == &Out)
     sys::Program::ChangeStdoutToBinary();
 
   // Write the generated bitstream to "Out".
   Out.write((char*)&Buffer.front(), Buffer.size());
-  
+
   // Make sure it hits disk now.
   Out.flush();
 }
@@ -1425,7 +1566,7 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
   bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
   if (isDarwin)
     EmitDarwinBCHeader(Stream, M->getTargetTriple());
-  
+
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
   Stream.Emit((unsigned)'C', 8);
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 209cf0980d2d..3a0d3ce0be99 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -17,24 +17,16 @@ using namespace llvm;
 
 namespace {
   class WriteBitcodePass : public ModulePass {
-    // FIXME: Kill off std::ostream
-    std::ostream *Out;
-    raw_ostream *RawOut; // raw_ostream to print on
+    raw_ostream &OS; // raw_ostream to print on
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit WriteBitcodePass(std::ostream &o)
-      : ModulePass(&ID), Out(&o), RawOut(0) {}
     explicit WriteBitcodePass(raw_ostream &o)
-      : ModulePass(&ID), Out(0), RawOut(&o) {}
+      : ModulePass(&ID), OS(o) {}
     
     const char *getPassName() const { return "Bitcode Writer"; }
     
     bool runOnModule(Module &M) {
-      if (Out) {
-        WriteBitcodeToFile(&M, *Out);
-      } else {
-        WriteBitcodeToFile(&M, *RawOut);
-      }
+      WriteBitcodeToFile(&M, OS);
       return false;
     }
   };
@@ -42,13 +34,6 @@ namespace {
 
 char WriteBitcodePass::ID = 0;
 
-/// CreateBitcodeWriterPass - Create and return a pass that writes the module
-/// to the specified ostream.
-ModulePass *llvm::CreateBitcodeWriterPass(std::ostream &Str) {
-  return new WriteBitcodePass(Str);
-}
-
-
 /// createBitcodeWriterPass - Create and return a pass that writes the module
 /// to the specified ostream.
 ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
diff --git a/lib/Bitcode/Writer/Serialize.cpp b/lib/Bitcode/Writer/Serialize.cpp
index 79464a61be46..a6beb1789e1e 100644
--- a/lib/Bitcode/Writer/Serialize.cpp
+++ b/lib/Bitcode/Writer/Serialize.cpp
@@ -12,11 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/Serialize.h"
-#include "string.h"
-
-#ifdef DEBUG_BACKPATCH
-#include "llvm/Support/Streams.h"
-#endif
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
 
 using namespace llvm;
 
@@ -86,7 +83,7 @@ SerializedPtrID Serializer::getPtrId(const void* ptr) {
   if (I == PtrMap.end()) {
     unsigned id = PtrMap.size()+1;
 #ifdef DEBUG_BACKPATCH
-    llvm::cerr << "Registered PTR: " << ptr << " => " << id << "\n";
+    errs() << "Registered PTR: " << ptr << " => " << id << "\n";
 #endif
     PtrMap[ptr] = id;
     return id;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 32b2819762db..60253ad91e6e 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -14,7 +14,7 @@
 #include "ValueEnumerator.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/MDNode.h"
+#include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
@@ -40,6 +40,8 @@ static bool CompareByFrequency(const std::pair<const llvm::Type*,
 
 /// ValueEnumerator - Enumerate module-level information.
 ValueEnumerator::ValueEnumerator(const Module *M) {
+  InstructionCount = 0;
+
   // Enumerate the global variables.
   for (Module::const_global_iterator I = M->global_begin(),
          E = M->global_end(); I != E; ++I)
@@ -55,10 +57,10 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I)
     EnumerateValue(I);
-  
+
   // Remember what is the cutoff between globalvalue's and other constants.
   unsigned FirstConstant = Values.size();
-  
+
   // Enumerate the global variable initializers.
   for (Module::const_global_iterator I = M->global_begin(),
          E = M->global_end(); I != E; ++I)
@@ -69,24 +71,25 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I)
     EnumerateValue(I->getAliasee());
-  
+
   // Enumerate types used by the type symbol table.
   EnumerateTypeSymbolTable(M->getTypeSymbolTable());
 
   // Insert constants that are named at module level into the slot pool so that
   // the module symbol table can refer to them...
   EnumerateValueSymbolTable(M->getValueSymbolTable());
-  
+
   // Enumerate types used by function bodies and argument lists.
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
-    
+
     for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
          I != E; ++I)
       EnumerateType(I->getType());
-    
+
+    MetadataContext &TheMetadata = F->getContext().getMetadata();
     for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
-        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); 
+        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
              OI != E; ++OI)
           EnumerateOperandType(*OI);
         EnumerateType(I->getType());
@@ -94,16 +97,24 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
           EnumerateAttributes(CI->getAttributes());
         else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
           EnumerateAttributes(II->getAttributes());
+
+        // Enumerate metadata attached with this instruction.
+        const MetadataContext::MDMapTy *MDs = TheMetadata.getMDs(I);
+        if (MDs)
+          for (MetadataContext::MDMapTy::const_iterator MI = MDs->begin(),
+                 ME = MDs->end(); MI != ME; ++MI)
+            if (MDNode *MDN = dyn_cast_or_null<MDNode>(MI->second))
+              EnumerateMetadata(MDN);
       }
   }
-  
+
   // Optimize constant ordering.
   OptimizeConstants(FirstConstant, Values.size());
-    
+
   // Sort the type table by frequency so that most commonly used types are early
   // in the table (have low bit-width).
   std::stable_sort(Types.begin(), Types.end(), CompareByFrequency);
-    
+
   // Partition the Type ID's so that the single-value types occur before the
   // aggregate types.  This allows the aggregate types to be dropped from the
   // type table after parsing the global variable initializers.
@@ -114,6 +125,28 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
     TypeMap[Types[i].first] = i+1;
 }
 
+unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
+  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+  assert (I != InstructionMap.end() && "Instruction is not mapped!");
+    return I->second;
+}
+
+void ValueEnumerator::setInstructionID(const Instruction *I) {
+  InstructionMap[I] = InstructionCount++;
+}
+
+unsigned ValueEnumerator::getValueID(const Value *V) const {
+  if (isa<MetadataBase>(V)) {
+    ValueMapType::const_iterator I = MDValueMap.find(V);
+    assert(I != MDValueMap.end() && "Value not in slotcalculator!");
+    return I->second-1;
+  }
+
+  ValueMapType::const_iterator I = ValueMap.find(V);
+  assert(I != ValueMap.end() && "Value not in slotcalculator!");
+  return I->second-1;
+}
+
 // Optimize constant ordering.
 namespace {
   struct CstSortPredicate {
@@ -123,7 +156,7 @@ namespace {
                     const std::pair<const Value*, unsigned> &RHS) {
       // Sort by plane.
       if (LHS.first->getType() != RHS.first->getType())
-        return VE.getTypeID(LHS.first->getType()) < 
+        return VE.getTypeID(LHS.first->getType()) <
                VE.getTypeID(RHS.first->getType());
       // Then by frequency.
       return LHS.second > RHS.second;
@@ -134,15 +167,15 @@ namespace {
 /// OptimizeConstants - Reorder constant pool for denser encoding.
 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
   if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
-  
+
   CstSortPredicate P(*this);
   std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
-  
+
   // Ensure that integer constants are at the start of the constant pool.  This
   // is important so that GEP structure indices come before gep constant exprs.
   std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
                  isIntegerValue);
-  
+
   // Rebuild the modified portion of ValueMap.
   for (; CstStart != CstEnd; ++CstStart)
     ValueMap[Values[CstStart].first] = CstStart+1;
@@ -152,7 +185,7 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
 /// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol
 /// table.
 void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) {
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
+  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
        TI != TE; ++TI)
     EnumerateType(TI->second);
 }
@@ -160,14 +193,57 @@ void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) {
 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
 /// table into the values table.
 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
-  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); 
+  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
        VI != VE; ++VI)
     EnumerateValue(VI->getValue());
 }
 
+void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) {
+  // Check to see if it's already in!
+  unsigned &MDValueID = MDValueMap[MD];
+  if (MDValueID) {
+    // Increment use count.
+    MDValues[MDValueID-1].second++;
+    return;
+  }
+
+  // Enumerate the type of this value.
+  EnumerateType(MD->getType());
+
+  if (const MDNode *N = dyn_cast<MDNode>(MD)) {
+    MDValues.push_back(std::make_pair(MD, 1U));
+    MDValueMap[MD] = MDValues.size();
+    MDValueID = MDValues.size();
+    for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
+         I != E; ++I) {
+      if (*I)
+        EnumerateValue(*I);
+      else
+        EnumerateType(Type::getVoidTy(MD->getContext()));
+    }
+    return;
+  } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(MD)) {
+    for(NamedMDNode::const_elem_iterator I = N->elem_begin(),
+          E = N->elem_end(); I != E; ++I) {
+      MetadataBase *M = *I;
+      EnumerateValue(M);
+    }
+    MDValues.push_back(std::make_pair(MD, 1U));
+    MDValueMap[MD] = Values.size();
+    return;
+  }
+
+  // Add the value.
+  MDValues.push_back(std::make_pair(MD, 1U));
+  MDValueID = MDValues.size();
+}
+
 void ValueEnumerator::EnumerateValue(const Value *V) {
-  assert(V->getType() != Type::VoidTy && "Can't insert void values!");
-  
+  assert(V->getType() != Type::getVoidTy(V->getContext()) &&
+         "Can't insert void values!");
+  if (const MetadataBase *MB = dyn_cast<MetadataBase>(V))
+    return EnumerateMetadata(MB);
+
   // Check to see if it's already in!
   unsigned &ValueID = ValueMap[V];
   if (ValueID) {
@@ -178,7 +254,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
 
   // Enumerate the type of this value.
   EnumerateType(V->getType());
-  
+
   if (const Constant *C = dyn_cast<Constant>(V)) {
     if (isa<GlobalValue>(C)) {
       // Initializers for globals are handled explicitly elsewhere.
@@ -190,7 +266,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
       // If a constant has operands, enumerate them.  This makes sure that if a
       // constant has uses (for example an array of const ints), that they are
       // inserted also.
-      
+
       // We prefer to enumerate them with values before we enumerate the user
       // itself.  This makes it more likely that we can avoid forward references
       // in the reader.  We know that there can be no cycles in the constants
@@ -198,27 +274,15 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
       for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
            I != E; ++I)
         EnumerateValue(*I);
-      
+
       // Finally, add the value.  Doing this could make the ValueID reference be
       // dangling, don't reuse it.
       Values.push_back(std::make_pair(V, 1U));
       ValueMap[V] = Values.size();
       return;
-    } else if (const MDNode *N = dyn_cast<MDNode>(C)) {
-      for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
-           I != E; ++I) {
-        if (*I)
-          EnumerateValue(*I);
-        else
-          EnumerateType(Type::VoidTy);
-      }
-
-      Values.push_back(std::make_pair(V, 1U));
-      ValueMap[V] = Values.size();
-      return;
     }
   }
-  
+
   // Add the value.
   Values.push_back(std::make_pair(V, 1U));
   ValueID = Values.size();
@@ -227,17 +291,17 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
 
 void ValueEnumerator::EnumerateType(const Type *Ty) {
   unsigned &TypeID = TypeMap[Ty];
-  
+
   if (TypeID) {
     // If we've already seen this type, just increase its occurrence count.
     Types[TypeID-1].second++;
     return;
   }
-  
+
   // First time we saw this type, add it.
   Types.push_back(std::make_pair(Ty, 1U));
   TypeID = Types.size();
-  
+
   // Enumerate subtypes.
   for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
        I != E; ++I)
@@ -259,10 +323,14 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
       EnumerateOperandType(C->getOperand(i));
 
     if (const MDNode *N = dyn_cast<MDNode>(V)) {
-      for (unsigned i = 0, e = N->getNumElements(); i != e; ++i)
-        EnumerateOperandType(N->getElement(i));
+      for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) {
+        Value *Elem = N->getElement(i);
+        if (Elem)
+          EnumerateOperandType(Elem);
+      }
     }
-  }
+  } else if (isa<MDString>(V) || isa<MDNode>(V))
+    EnumerateValue(V);
 }
 
 void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
@@ -279,18 +347,18 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
 
 void ValueEnumerator::incorporateFunction(const Function &F) {
   NumModuleValues = Values.size();
-  
+
   // Adding function arguments to the value table.
   for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
       I != E; ++I)
     EnumerateValue(I);
 
   FirstFuncConstantID = Values.size();
-  
+
   // Add all function-level constants to the value table.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
-      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); 
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
            OI != E; ++OI) {
         if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
             isa<InlineAsm>(*OI))
@@ -299,20 +367,20 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
     BasicBlocks.push_back(BB);
     ValueMap[BB] = BasicBlocks.size();
   }
-  
+
   // Optimize the constant layout.
   OptimizeConstants(FirstFuncConstantID, Values.size());
-  
+
   // Add the function's parameter attributes so they are available for use in
   // the function's instruction.
   EnumerateAttributes(F.getAttributes());
 
   FirstInstID = Values.size();
-  
+
   // Add all of the instructions.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
-      if (I->getType() != Type::VoidTy)
+      if (I->getType() != Type::getVoidTy(F.getContext()))
         EnumerateValue(I);
     }
   }
@@ -324,8 +392,7 @@ void ValueEnumerator::purgeFunction() {
     ValueMap.erase(Values[i].first);
   for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
     ValueMap.erase(BasicBlocks[i]);
-    
+
   Values.resize(NumModuleValues);
   BasicBlocks.clear();
 }
-
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 40eeabb2b6aa..da63dde2a279 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -22,9 +22,11 @@ namespace llvm {
 
 class Type;
 class Value;
+class Instruction;
 class BasicBlock;
 class Function;
 class Module;
+class MetadataBase;
 class AttrListPtr;
 class TypeSymbolTable;
 class ValueSymbolTable;
@@ -44,11 +46,17 @@ private:
   typedef DenseMap<const Value*, unsigned> ValueMapType;
   ValueMapType ValueMap;
   ValueList Values;
+  ValueList MDValues;
+  ValueMapType MDValueMap;
   
   typedef DenseMap<void*, unsigned> AttributeMapType;
   AttributeMapType AttributeMap;
   std::vector<AttrListPtr> Attributes;
   
+  typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+  InstructionMapType InstructionMap;
+  unsigned InstructionCount;
+
   /// BasicBlocks - This contains all the basic blocks for the currently
   /// incorporated function.  Their reverse mapping is stored in ValueMap.
   std::vector<const BasicBlock*> BasicBlocks;
@@ -64,18 +72,17 @@ private:
 public:
   ValueEnumerator(const Module *M);
 
-  unsigned getValueID(const Value *V) const {
-    ValueMapType::const_iterator I = ValueMap.find(V);
-    assert(I != ValueMap.end() && "Value not in slotcalculator!");
-    return I->second-1;
-  }
-  
+  unsigned getValueID(const Value *V) const;
+
   unsigned getTypeID(const Type *T) const {
     TypeMapType::const_iterator I = TypeMap.find(T);
     assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
     return I->second-1;
   }
-  
+
+  unsigned getInstructionID(const Instruction *I) const;
+  void setInstructionID(const Instruction *I);
+
   unsigned getAttributeID(const AttrListPtr &PAL) const {
     if (PAL.isEmpty()) return 0;  // Null maps to zero.
     AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
@@ -91,6 +98,7 @@ public:
   }
   
   const ValueList &getValues() const { return Values; }
+  const ValueList &getMDValues() const { return MDValues; }
   const TypeList &getTypes() const { return Types; }
   const std::vector<const BasicBlock*> &getBasicBlocks() const {
     return BasicBlocks; 
@@ -108,6 +116,7 @@ public:
 private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
     
+  void EnumerateMetadata(const MetadataBase *MD);
   void EnumerateValue(const Value *V);
   void EnumerateType(const Type *T);
   void EnumerateOperandType(const Value *V);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6d125810d927..8bc5ef91cdf4 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,16 +18,25 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -41,12 +50,17 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
            cl::init(cl::BOU_UNSET));
 
 char AsmPrinter::ID = 0;
-AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
-                       const TargetAsmInfo *T, bool VDef)
+AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
+                       const MCAsmInfo *T, bool VDef)
   : MachineFunctionPass(&ID), FunctionNumber(0), O(o),
-    TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
-    IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U),
-    PrevDLT(0, ~0U, ~0U) {
+    TM(tm), MAI(T), TRI(tm.getRegisterInfo()),
+
+    OutContext(*new MCContext()),
+    // FIXME: Pass instprinter to streamer.
+    OutStreamer(*createAsmStreamer(OutContext, O, *T, 0)),
+
+    LastMI(0), LastFn(0), Counter(~0U),
+    PrevDLT(0, 0, ~0U, ~0U) {
   DW = 0; MMI = 0;
   switch (AsmVerbose) {
   case cl::BOU_UNSET: VerboseAsm = VDef;  break;
@@ -59,188 +73,124 @@ AsmPrinter::~AsmPrinter() {
   for (gcp_iterator I = GCMetadataPrinters.begin(),
                     E = GCMetadataPrinters.end(); I != E; ++I)
     delete I->second;
-}
-
-/// SwitchToTextSection - Switch to the specified text section of the executable
-/// if we are not already in it!
-///
-void AsmPrinter::SwitchToTextSection(const char *NewSection,
-                                     const GlobalValue *GV) {
-  std::string NS;
-  if (GV && GV->hasSection())
-    NS = TAI->getSwitchToSectionDirective() + GV->getSection();
-  else
-    NS = NewSection;
   
-  // If we're already in this section, we're done.
-  if (CurrentSection == NS) return;
-
-  // Close the current section, if applicable.
-  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
-    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
-
-  CurrentSection = NS;
-
-  if (!CurrentSection.empty())
-    O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
-
-  IsInTextSection = true;
+  delete &OutStreamer;
+  delete &OutContext;
 }
 
-/// SwitchToDataSection - Switch to the specified data section of the executable
-/// if we are not already in it!
-///
-void AsmPrinter::SwitchToDataSection(const char *NewSection,
-                                     const GlobalValue *GV) {
-  std::string NS;
-  if (GV && GV->hasSection())
-    NS = TAI->getSwitchToSectionDirective() + GV->getSection();
-  else
-    NS = NewSection;
-  
-  // If we're already in this section, we're done.
-  if (CurrentSection == NS) return;
-
-  // Close the current section, if applicable.
-  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
-    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
-
-  CurrentSection = NS;
-  
-  if (!CurrentSection.empty())
-    O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
-
-  IsInTextSection = false;
+TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+  return TM.getTargetLowering()->getObjFileLowering();
 }
 
-/// SwitchToSection - Switch to the specified section of the executable if we
-/// are not already in it!
-void AsmPrinter::SwitchToSection(const Section* NS) {
-  const std::string& NewSection = NS->getName();
-
-  // If we're already in this section, we're done.
-  if (CurrentSection == NewSection) return;
-
-  // Close the current section, if applicable.
-  if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
-    O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
-
-  // FIXME: Make CurrentSection a Section* in the future
-  CurrentSection = NewSection;
-  CurrentSection_ = NS;
-
-  if (!CurrentSection.empty()) {
-    // If section is named we need to switch into it via special '.section'
-    // directive and also append funky flags. Otherwise - section name is just
-    // some magic assembler directive.
-    if (NS->isNamed())
-      O << TAI->getSwitchToSectionDirective()
-        << CurrentSection
-        << TAI->getSectionFlags(NS->getFlags());
-    else
-      O << CurrentSection;
-    O << TAI->getDataSectionStartSuffix() << '\n';
-  }
-
-  IsInTextSection = (NS->getFlags() & SectionFlags::Code);
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+  return OutStreamer.getCurrentSection();
 }
 
+
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
+  if (VerboseAsm)
+    AU.addRequired<MachineLoopInfo>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
-  Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+    .Initialize(OutContext, TM);
   
-  if (TAI->doesAllowQuotesInName())
+  Mang = new Mangler(M, MAI->getGlobalPrefix(), MAI->getPrivateGlobalPrefix(),
+                     MAI->getLinkerPrivateGlobalPrefix());
+  
+  if (MAI->doesAllowQuotesInName())
     Mang->setUseQuotes(true);
+
+  if (MAI->doesAllowNameToStartWithDigit())
+    Mang->setSymbolsCanStartWithDigit(true);
   
-  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
-  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  // Allow the target to emit any magic that it wants at the start of the file.
+  EmitStartOfAsmFile(M);
 
-  if (TAI->hasSingleParameterDotFile()) {
+  if (MAI->hasSingleParameterDotFile()) {
     /* Very minimal debug info. It is ignored if we emit actual
-       debug info. If we don't, this at helps the user find where
+       debug info. If we don't, this at least helps the user find where
        a function came from. */
     O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n";
   }
 
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
   for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
     if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
-      MP->beginAssembly(O, *this, *TAI);
+      MP->beginAssembly(O, *this, *MAI);
   
   if (!M.getModuleInlineAsm().empty())
-    O << TAI->getCommentString() << " Start of file scope inline assembly\n"
+    O << MAI->getCommentString() << " Start of file scope inline assembly\n"
       << M.getModuleInlineAsm()
-      << '\n' << TAI->getCommentString()
+      << '\n' << MAI->getCommentString()
       << " End of file scope inline assembly\n";
 
-  SwitchToDataSection("");   // Reset back to no section.
-  
-  if (TAI->doesSupportDebugInformation() ||
-      TAI->doesSupportExceptionHandling()) {
-    MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    if (MMI)
-      MMI->AnalyzeModule(M);
-    DW = getAnalysisIfAvailable<DwarfWriter>();
-    if (DW)
-      DW->BeginModule(&M, MMI, O, this, TAI);
-  }
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  if (MMI)
+    MMI->AnalyzeModule(M);
+  DW = getAnalysisIfAvailable<DwarfWriter>();
+  if (DW)
+    DW->BeginModule(&M, MMI, O, this, MAI);
 
   return false;
 }
 
 bool AsmPrinter::doFinalization(Module &M) {
+  // Emit global variables.
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    PrintGlobalVariable(I);
+  
   // Emit final debug information.
-  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
     DW->EndModule();
   
   // If the target wants to know about weak references, print them all.
-  if (TAI->getWeakRefDirective()) {
+  if (MAI->getWeakRefDirective()) {
     // FIXME: This is not lazy, it would be nice to only print weak references
     // to stuff that is actually used.  Note that doing so would require targets
     // to notice uses in operands (due to constant exprs etc).  This should
     // happen with the MC stuff eventually.
-    SwitchToDataSection("");
 
     // Print out module-level global variables here.
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
       if (I->hasExternalWeakLinkage())
-        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+        O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n';
     }
     
-    for (Module::const_iterator I = M.begin(), E = M.end();
-         I != E; ++I) {
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
       if (I->hasExternalWeakLinkage())
-        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+        O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n';
     }
   }
 
-  if (TAI->getSetDirective()) {
-    if (!M.alias_empty())
-      SwitchToSection(TAI->getTextSection());
-
+  if (MAI->getSetDirective()) {
     O << '\n';
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
          I != E; ++I) {
-      std::string Name = Mang->getValueName(I);
-      std::string Target;
+      std::string Name = Mang->getMangledName(I);
 
       const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
-      Target = Mang->getValueName(GV);
+      std::string Target = Mang->getMangledName(GV);
 
-      if (I->hasExternalLinkage() || !TAI->getWeakRefDirective())
+      if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
         O << "\t.globl\t" << Name << '\n';
       else if (I->hasWeakLinkage())
-        O << TAI->getWeakRefDirective() << Name << '\n';
+        O << MAI->getWeakRefDirective() << Name << '\n';
       else if (!I->hasLocalLinkage())
-        assert(0 && "Invalid alias linkage");
+        llvm_unreachable("Invalid alias linkage");
 
       printVisibility(Name, I->getVisibility());
 
-      O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n';
+      O << MAI->getSetDirective() << ' ' << Name << ", " << Target << '\n';
     }
   }
 
@@ -248,45 +198,43 @@ bool AsmPrinter::doFinalization(Module &M) {
   assert(MI && "AsmPrinter didn't require GCModuleInfo?");
   for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
     if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
-      MP->finishAssembly(O, *this, *TAI);
+      MP->finishAssembly(O, *this, *MAI);
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (TAI->getNonexecutableStackDirective())
-      O << TAI->getNonexecutableStackDirective() << '\n';
+    if (MAI->getNonexecutableStackDirective())
+      O << MAI->getNonexecutableStackDirective() << '\n';
 
+  
+  // Allow the target to emit any magic that it wants at the end of the file,
+  // after everything else has gone out.
+  EmitEndOfAsmFile(M);
+  
   delete Mang; Mang = 0;
   DW = 0; MMI = 0;
+  
+  OutStreamer.Finish();
   return false;
 }
 
-const std::string &
-AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF,
-                                     std::string &Name) const {
-  assert(MF && "No machine function?");
-  Name = MF->getFunction()->getName();
-  if (Name.empty())
-    Name = Mang->getValueName(MF->getFunction());
-  Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() +
-                              Name + ".eh", TAI->getGlobalPrefix());
-  return Name;
-}
-
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   // What's my mangled name?
-  CurrentFnName = Mang->getValueName(MF.getFunction());
+  CurrentFnName = Mang->getMangledName(MF.getFunction());
   IncrementFunctionNumber();
+
+  if (VerboseAsm)
+    LI = &getAnalysis<MachineLoopInfo>();
 }
 
 namespace {
   // SectionCPs - Keep track the alignment, constpool entries per Section.
   struct SectionCPs {
-    const Section *S;
+    const MCSection *S;
     unsigned Alignment;
     SmallVector<unsigned, 4> CPEs;
-    SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {};
+    SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {};
   };
 }
 
@@ -303,9 +251,27 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
   // the same section together to reduce amount of section switch statements.
   SmallVector<SectionCPs, 4> CPSections;
   for (unsigned i = 0, e = CP.size(); i != e; ++i) {
-    MachineConstantPoolEntry CPE = CP[i];
+    const MachineConstantPoolEntry &CPE = CP[i];
     unsigned Align = CPE.getAlignment();
-    const Section* S = TAI->SelectSectionForMachineConst(CPE.getType());
+    
+    SectionKind Kind;
+    switch (CPE.getRelocationInfo()) {
+    default: llvm_unreachable("Unknown section kind");
+    case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+    case 1:
+      Kind = SectionKind::getReadOnlyWithRelLocal();
+      break;
+    case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16();break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+    }
+
+    const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+    
     // The number of sections are small, just do a linear search from the
     // last section to the first.
     bool Found = false;
@@ -328,7 +294,7 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
 
   // Now print stuff into the calculated sections.
   for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
-    SwitchToSection(CPSections[i].S);
+    OutStreamer.SwitchSection(CPSections[i].S);
     EmitAlignment(Log2_32(CPSections[i].Alignment));
 
     unsigned Offset = 0;
@@ -344,11 +310,12 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
       const Type *Ty = CPE.getType();
       Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
 
-      O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
-        << CPI << ":\t\t\t\t\t";
+      O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+        << CPI << ':';
       if (VerboseAsm) {
-        O << TAI->getCommentString() << ' ';
-        WriteTypeSymbolic(O, CPE.getType(), 0);
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " constant ";
+        WriteTypeSymbolic(O, CPE.getType(), MF->getFunction()->getParent());
       }
       O << '\n';
       if (CPE.isMachineConstantPoolEntry())
@@ -373,20 +340,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
   // the appropriate section.
   TargetLowering *LoweringInfo = TM.getTargetLowering();
 
-  const char* JumpTableDataSection = TAI->getJumpTableDataSection();
   const Function *F = MF.getFunction();
-  unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
   bool JTInDiffSection = false;
-  if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
-      !JumpTableDataSection ||
-      SectionFlags & SectionFlags::Linkonce) {
+  if (F->isWeakForLinker() ||
+      (IsPic && !LoweringInfo->usesGlobalOffsetTable())) {
     // In PIC mode, we need to emit the jump table to the same section as the
     // function body itself, otherwise the label differences won't make sense.
     // We should also do if the section name is NULL or function is declared in
     // discardable section.
-    SwitchToSection(TAI->SectionForGlobal(F));
+    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang,
+                                                                    TM));
   } else {
-    SwitchToDataSection(JumpTableDataSection);
+    // Otherwise, drop it in the readonly section.
+    const MCSection *ReadOnlySection = 
+      getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+    OutStreamer.SwitchSection(ReadOnlySection);
     JTInDiffSection = true;
   }
   
@@ -402,21 +370,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     // the number of relocations the assembler will generate for the jump table.
     // Set directives are all printed before the jump table itself.
     SmallPtrSet<MachineBasicBlock*, 16> EmittedSets;
-    if (TAI->getSetDirective() && IsPic)
+    if (MAI->getSetDirective() && IsPic)
       for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
         if (EmittedSets.insert(JTBBs[ii]))
           printPICJumpTableSetLabel(i, JTBBs[ii]);
     
-    // On some targets (e.g. darwin) we want to emit two consequtive labels
+    // On some targets (e.g. Darwin) we want to emit two consequtive labels
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
-    if (JTInDiffSection) {
-      if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
-        O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) {
+      O << MAI->getLinkerPrivateGlobalPrefix()
+        << "JTI" << getFunctionNumber() << '_' << i << ":\n";
     }
     
-    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
       << '_' << i << ":\n";
     
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
@@ -429,15 +397,15 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
 void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                         const MachineBasicBlock *MBB,
                                         unsigned uid)  const {
-  bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
   
   // Use JumpTableDirective otherwise honor the entry size from the jump table
   // info.
-  const char *JTEntryDirective = TAI->getJumpTableDirective();
+  const char *JTEntryDirective = MAI->getJumpTableDirective(isPIC);
   bool HadJTEntryDirective = JTEntryDirective != NULL;
   if (!HadJTEntryDirective) {
     JTEntryDirective = MJTI->getEntrySize() == 4 ?
-      TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+      MAI->getData32bitsDirective() : MAI->getData64bitsDirective();
   }
 
   O << JTEntryDirective << ' ';
@@ -447,20 +415,18 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
   // emit the table entries as differences between two text section labels.
   // If we're emitting non-PIC code, then emit the entries as direct
   // references to the target basic blocks.
-  if (IsPic) {
-    if (TAI->getSetDirective()) {
-      O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-        << '_' << uid << "_set_" << MBB->getNumber();
-    } else {
-      printBasicBlockLabel(MBB, false, false, false);
-      // If the arch uses custom Jump Table directives, don't calc relative to
-      // JT
-      if (!HadJTEntryDirective) 
-        O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
-          << getFunctionNumber() << '_' << uid;
-    }
+  if (!isPIC) {
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  } else if (MAI->getSetDirective()) {
+    O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
+      << '_' << uid << "_set_" << MBB->getNumber();
   } else {
-    printBasicBlockLabel(MBB, false, false, false);
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+    // If the arch uses custom Jump Table directives, don't calc relative to
+    // JT
+    if (!HadJTEntryDirective) 
+      O << '-' << MAI->getPrivateGlobalPrefix() << "JTI"
+        << getFunctionNumber() << '_' << uid;
   }
 }
 
@@ -470,12 +436,12 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
 /// do nothing and return false.
 bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   if (GV->getName() == "llvm.used") {
-    if (TAI->getUsedDirective() != 0)    // No need to emit this at all.
+    if (MAI->getUsedDirective() != 0)    // No need to emit this at all.
       EmitLLVMUsedList(GV->getInitializer());
     return true;
   }
 
-  // Ignore debug and non-emitted data.
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
   if (GV->getSection() == "llvm.metadata" ||
       GV->hasAvailableExternallyLinkage())
     return true;
@@ -487,14 +453,14 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   const TargetData *TD = TM.getTargetData();
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
-    SwitchToDataSection(TAI->getStaticCtorsSection());
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
     EmitAlignment(Align, 0);
     EmitXXStructorList(GV->getInitializer());
     return true;
   } 
   
   if (GV->getName() == "llvm.global_dtors") {
-    SwitchToDataSection(TAI->getStaticDtorsSection());
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
     EmitAlignment(Align, 0);
     EmitXXStructorList(GV->getInitializer());
     return true;
@@ -503,45 +469,20 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   return false;
 }
 
-/// findGlobalValue - if CV is an expression equivalent to a single
-/// global value, return that value.
-const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
-    return GV;
-  else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
-    const TargetData *TD = TM.getTargetData();
-    unsigned Opcode = CE->getOpcode();    
-    switch (Opcode) {
-    case Instruction::GetElementPtr: {
-      const Constant *ptrVal = CE->getOperand(0);
-      SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
-      if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size()))
-        return 0;
-      return findGlobalValue(ptrVal);
-    }
-    case Instruction::BitCast:
-      return findGlobalValue(CE->getOperand(0));
-    default:
-      return 0;
-    }
-  }
-  return 0;
-}
-
-/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-
 void AsmPrinter::EmitLLVMUsedList(Constant *List) {
-  const char *Directive = TAI->getUsedDirective();
+  const char *Directive = MAI->getUsedDirective();
 
   // Should be an array of 'i8*'.
   ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
   
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    const GlobalValue *GV = findGlobalValue(InitList->getOperand(i));
-    if (TAI->emitUsedDirectiveFor(GV, Mang)) {
+    const GlobalValue *GV =
+      dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+    if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) {
       O << Directive;
       EmitConstantValueOnly(InitList->getOperand(i));
       O << '\n';
@@ -567,32 +508,6 @@ void AsmPrinter::EmitXXStructorList(Constant *List) {
     }
 }
 
-/// getGlobalLinkName - Returns the asm/link name of of the specified
-/// global variable.  Should be overridden by each target asm printer to
-/// generate the appropriate value.
-const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV,
-                                                 std::string &LinkName) const {
-  if (isa<Function>(GV)) {
-    LinkName += TAI->getFunctionAddrPrefix();
-    LinkName += Mang->getValueName(GV);
-    LinkName += TAI->getFunctionAddrSuffix();
-  } else {
-    LinkName += TAI->getGlobalVarAddrPrefix();
-    LinkName += Mang->getValueName(GV);
-    LinkName += TAI->getGlobalVarAddrSuffix();
-  }  
-  
-  return LinkName;
-}
-
-/// EmitExternalGlobal - Emit the external reference to a global variable.
-/// Should be overridden if an indirect reference should be used.
-void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
-  std::string GLN;
-  O << getGlobalLinkName(GV, GLN);
-}
-
-
 
 //===----------------------------------------------------------------------===//
 /// LEB 128 number encoding.
@@ -646,8 +561,8 @@ void AsmPrinter::EOL() const {
 
 void AsmPrinter::EOL(const std::string &Comment) const {
   if (VerboseAsm && !Comment.empty()) {
-    O << '\t'
-      << TAI->getCommentString()
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString()
       << ' '
       << Comment;
   }
@@ -656,22 +571,72 @@ void AsmPrinter::EOL(const std::string &Comment) const {
 
 void AsmPrinter::EOL(const char* Comment) const {
   if (VerboseAsm && *Comment) {
-    O << '\t'
-      << TAI->getCommentString()
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString()
       << ' '
       << Comment;
   }
   O << '\n';
 }
 
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+  switch (Encoding) {
+  case dwarf::DW_EH_PE_absptr:
+    return "absptr";
+  case dwarf::DW_EH_PE_omit:
+    return "omit";
+  case dwarf::DW_EH_PE_pcrel:
+    return "pcrel";
+  case dwarf::DW_EH_PE_udata4:
+    return "udata4";
+  case dwarf::DW_EH_PE_udata8:
+    return "udata8";
+  case dwarf::DW_EH_PE_sdata4:
+    return "sdata4";
+  case dwarf::DW_EH_PE_sdata8:
+    return "sdata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+    return "pcrel udata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+    return "pcrel sdata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+    return "pcrel udata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+    return "pcrel sdata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+    return "indirect pcrel udata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+    return "indirect pcrel sdata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+    return "indirect pcrel udata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+    return "indirect pcrel sdata8";
+  }
+
+  return 0;
+}
+
+void AsmPrinter::EOL(const char *Comment, unsigned Encoding) const {
+  if (VerboseAsm && *Comment) {
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString()
+      << ' '
+      << Comment;
+
+    if (const char *EncStr = DecodeDWARFEncoding(Encoding))
+      O << " (" << EncStr << ')';
+  }
+  O << '\n';
+}
+
 /// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
 /// unsigned leb128 value.
 void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
-  if (TAI->hasLEB128()) {
+  if (MAI->hasLEB128()) {
     O << "\t.uleb128\t"
       << Value;
   } else {
-    O << TAI->getData8bitsDirective();
+    O << MAI->getData8bitsDirective();
     PrintULEB128(Value);
   }
 }
@@ -679,11 +644,11 @@ void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
 /// EmitSLEB128Bytes - print an assembler byte data directive to compose a
 /// signed leb128 value.
 void AsmPrinter::EmitSLEB128Bytes(int Value) const {
-  if (TAI->hasLEB128()) {
+  if (MAI->hasLEB128()) {
     O << "\t.sleb128\t"
       << Value;
   } else {
-    O << TAI->getData8bitsDirective();
+    O << MAI->getData8bitsDirective();
     PrintSLEB128(Value);
   }
 }
@@ -691,29 +656,29 @@ void AsmPrinter::EmitSLEB128Bytes(int Value) const {
 /// EmitInt8 - Emit a byte directive and value.
 ///
 void AsmPrinter::EmitInt8(int Value) const {
-  O << TAI->getData8bitsDirective();
+  O << MAI->getData8bitsDirective();
   PrintHex(Value & 0xFF);
 }
 
 /// EmitInt16 - Emit a short directive and value.
 ///
 void AsmPrinter::EmitInt16(int Value) const {
-  O << TAI->getData16bitsDirective();
+  O << MAI->getData16bitsDirective();
   PrintHex(Value & 0xFFFF);
 }
 
 /// EmitInt32 - Emit a long directive and value.
 ///
 void AsmPrinter::EmitInt32(int Value) const {
-  O << TAI->getData32bitsDirective();
+  O << MAI->getData32bitsDirective();
   PrintHex(Value);
 }
 
 /// EmitInt64 - Emit a long long directive and value.
 ///
 void AsmPrinter::EmitInt64(uint64_t Value) const {
-  if (TAI->getData64bitsDirective()) {
-    O << TAI->getData64bitsDirective();
+  if (MAI->getData64bitsDirective()) {
+    O << MAI->getData64bitsDirective();
     PrintHex(Value);
   } else {
     if (TM.getTargetData()->isBigEndian()) {
@@ -734,7 +699,7 @@ static inline char toOctal(int X) {
 
 /// printStringChar - Print a char, escaped if necessary.
 ///
-static void printStringChar(raw_ostream &O, unsigned char C) {
+static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
   if (C == '"') {
     O << "\\\"";
   } else if (C == '\\') {
@@ -766,11 +731,11 @@ void AsmPrinter::EmitString(const std::string &String) const {
 }
 
 void AsmPrinter::EmitString(const char *String, unsigned Size) const {
-  const char* AscizDirective = TAI->getAscizDirective();
+  const char* AscizDirective = MAI->getAscizDirective();
   if (AscizDirective)
     O << AscizDirective;
   else
-    O << TAI->getAsciiDirective();
+    O << MAI->getAsciiDirective();
   O << '\"';
   for (unsigned i = 0; i < Size; ++i)
     printStringChar(O, String[i]);
@@ -813,31 +778,26 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
   NumBits = std::max(NumBits, ForcedAlignBits);
   
   if (NumBits == 0) return;   // No need to emit alignment.
-  if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
-  O << TAI->getAlignDirective() << NumBits;
-
-  unsigned FillValue = TAI->getTextAlignFillValue();
-  UseFillExpr &= IsInTextSection && FillValue;
-  if (UseFillExpr) {
-    O << ',';
-    PrintHex(FillValue);
-  }
-  O << '\n';
+  
+  unsigned FillValue = 0;
+  if (getCurrentSection()->getKind().isText())
+    FillValue = MAI->getTextAlignFillValue();
+  
+  OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0);
 }
 
-    
 /// EmitZeros - Emit a block of zeros.
 ///
 void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const {
   if (NumZeros) {
-    if (TAI->getZeroDirective()) {
-      O << TAI->getZeroDirective() << NumZeros;
-      if (TAI->getZeroDirectiveSuffix())
-        O << TAI->getZeroDirectiveSuffix();
+    if (MAI->getZeroDirective()) {
+      O << MAI->getZeroDirective() << NumZeros;
+      if (MAI->getZeroDirectiveSuffix())
+        O << MAI->getZeroDirectiveSuffix();
       O << '\n';
     } else {
       for (; NumZeros; --NumZeros)
-        O << TAI->getData8bitsDirective(AddrSpace) << "0\n";
+        O << MAI->getData8bitsDirective(AddrSpace) << "0\n";
     }
   }
 }
@@ -851,22 +811,22 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
     O << CI->getZExtValue();
   } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
     // This is a constant address for a global variable or function. Use the
-    // name of the variable or function as the address value, possibly
-    // decorating it with GlobalVarAddrPrefix/Suffix or
-    // FunctionAddrPrefix/Suffix (these all default to "" )
-    if (isa<Function>(GV)) {
-      O << TAI->getFunctionAddrPrefix()
-        << Mang->getValueName(GV)
-        << TAI->getFunctionAddrSuffix();
-    } else {
-      O << TAI->getGlobalVarAddrPrefix()
-        << Mang->getValueName(GV)
-        << TAI->getGlobalVarAddrSuffix();
-    }
+    // name of the variable or function as the address value.
+    O << Mang->getMangledName(GV);
   } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     const TargetData *TD = TM.getTargetData();
     unsigned Opcode = CE->getOpcode();    
     switch (Opcode) {
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      llvm_unreachable("FIXME: Don't support this constant cast expr");
     case Instruction::GetElementPtr: {
       // generate a symbolic expression for the byte address
       const Constant *ptrVal = CE->getOperand(0);
@@ -891,17 +851,6 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
       }
       break;
     }
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-      assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
-      break;
     case Instruction::BitCast:
       return EmitConstantValueOnly(CE->getOperand(0));
 
@@ -909,7 +858,8 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
       // Handle casts to pointers by changing them into casts to the appropriate
       // integer type.  This promotes constant folding and simplifies this code.
       Constant *Op = CE->getOperand(0);
-      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/);
+      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                        false/*ZExt*/);
       return EmitConstantValueOnly(Op);
     }
       
@@ -922,16 +872,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
 
       // We can emit the pointer value into this slot if the slot is an
       // integer slot greater or equal to the size of the pointer.
-      if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType()))
+      if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
         return EmitConstantValueOnly(Op);
 
       O << "((";
       EmitConstantValueOnly(Op);
-      APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty));
+      APInt ptrMask =
+        APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType()));
       
       SmallString<40> S;
       ptrMask.toStringUnsigned(S);
-      O << ") & " << S.c_str() << ')';
+      O << ") & " << S.str() << ')';
       break;
     }
     case Instruction::Add:
@@ -966,17 +917,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
       O << ')';
       break;
     default:
-      assert(0 && "Unsupported operator!");
+      llvm_unreachable("Unsupported operator!");
     }
   } else {
-    assert(0 && "Unknown constant value!");
+    llvm_unreachable("Unknown constant value!");
   }
 }
 
 /// printAsCString - Print the specified array as a C compatible string, only if
 /// the predicate isString is true.
 ///
-static void printAsCString(raw_ostream &O, const ConstantArray *CVA,
+static void printAsCString(formatted_raw_ostream &O, const ConstantArray *CVA,
                            unsigned LastElt) {
   assert(CVA->isString() && "Array is not string compatible!");
 
@@ -993,12 +944,12 @@ static void printAsCString(raw_ostream &O, const ConstantArray *CVA,
 ///
 void AsmPrinter::EmitString(const ConstantArray *CVA) const {
   unsigned NumElts = CVA->getNumOperands();
-  if (TAI->getAscizDirective() && NumElts && 
+  if (MAI->getAscizDirective() && NumElts && 
       cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) {
-    O << TAI->getAscizDirective();
+    O << MAI->getAscizDirective();
     printAsCString(O, CVA, NumElts-1);
   } else {
-    O << TAI->getAsciiDirective();
+    O << MAI->getAsciiDirective();
     printAsCString(O, CVA, NumElts);
   }
   O << '\n';
@@ -1053,48 +1004,65 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP,
                                       unsigned AddrSpace) {
   // FP Constants are printed as integer constants to avoid losing
   // precision...
+  LLVMContext &Context = CFP->getContext();
   const TargetData *TD = TM.getTargetData();
-  if (CFP->getType() == Type::DoubleTy) {
+  if (CFP->getType()->isDoubleTy()) {
     double Val = CFP->getValueAPF().convertToDouble();  // for comment only
     uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    if (TAI->getData64bitsDirective(AddrSpace)) {
-      O << TAI->getData64bitsDirective(AddrSpace) << i;
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " double value: " << Val;
+    if (MAI->getData64bitsDirective(AddrSpace)) {
+      O << MAI->getData64bitsDirective(AddrSpace) << i;
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " double " << Val;
+      }
       O << '\n';
     } else if (TD->isBigEndian()) {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word of double " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word of double " << Val;
+      }
       O << '\n';
     } else {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word of double " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " double most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word of double " << Val;
+      }
       O << '\n';
     }
     return;
-  } else if (CFP->getType() == Type::FloatTy) {
+  }
+  
+  if (CFP->getType()->isFloatTy()) {
     float Val = CFP->getValueAPF().convertToFloat();  // for comment only
-    O << TAI->getData32bitsDirective(AddrSpace)
+    O << MAI->getData32bitsDirective(AddrSpace)
       << CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    if (VerboseAsm)
-      O << '\t' << TAI->getCommentString() << " float " << Val;
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " float " << Val;
+    }
     O << '\n';
     return;
-  } else if (CFP->getType() == Type::X86_FP80Ty) {
+  }
+  
+  if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
     // api needed to prevent premature destruction
     APInt api = CFP->getValueAPF().bitcastToAPInt();
@@ -1105,110 +1073,148 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP,
     DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
                       &ignored);
     if (TD->isBigEndian()) {
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant halfword of ~"
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant halfword of x86_fp80 ~"
           << DoubleVal.convertToDouble();
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString() << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant halfword";
+      }
       O << '\n';
      } else {
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant halfword of ~"
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant halfword of x86_fp80 ~"
           << DoubleVal.convertToDouble();
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next halfword";
+      }
       O << '\n';
-      O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant halfword";
+      O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant halfword";
+      }
       O << '\n';
     }
-    EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) -
-              TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace);
+    EmitZeros(TD->getTypeAllocSize(Type::getX86_FP80Ty(Context)) -
+              TD->getTypeStoreSize(Type::getX86_FP80Ty(Context)), AddrSpace);
     return;
-  } else if (CFP->getType() == Type::PPC_FP128Ty) {
+  }
+  
+  if (CFP->getType()->isPPC_FP128Ty()) {
     // all long double variants are printed as hex
     // api needed to prevent premature destruction
     APInt api = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     if (TD->isBigEndian()) {
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word of ppc_fp128";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
-      if (VerboseAsm)      
-        O << '\t' << TAI->getCommentString()
-        << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+        << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word";
+      }
       O << '\n';
      } else {
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double least significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant word of ppc_fp128";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double next word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " next word";
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " long double most significant word";
+      O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant word";
+      }
       O << '\n';
     }
     return;
-  } else assert(0 && "Floating point constant type not handled");
+  } else llvm_unreachable("Floating point constant type not handled");
 }
 
 void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
@@ -1229,29 +1235,37 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
     else
       Val = RawData[i];
 
-    if (TAI->getData64bitsDirective(AddrSpace))
-      O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n';
+    if (MAI->getData64bitsDirective(AddrSpace))
+      O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n';
     else if (TD->isBigEndian()) {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant half of i64 " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant half of i64 " << Val;
+      }
       O << '\n';
     } else {
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word least significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " least significant half of i64 " << Val;
+      }
       O << '\n';
-      O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm)
-        O << '\t' << TAI->getCommentString()
-          << " Double-word most significant word " << Val;
+      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString()
+          << " most significant half of i64 " << Val;
+      }
       O << '\n';
     }
   }
@@ -1292,7 +1306,8 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
       SmallString<40> S;
       CI->getValue().toStringUnsigned(S, 16);
-      O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str();
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " 0x" << S.str();
     }
   }
   O << '\n';
@@ -1300,7 +1315,7 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
 
 void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
   // Target doesn't support this yet!
-  abort();
+  llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
 }
 
 /// PrintSpecial - Print information related to the specified machine instr
@@ -1311,10 +1326,10 @@ void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
 /// for their own strange codes.
 void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
   if (!strcmp(Code, "private")) {
-    O << TAI->getPrivateGlobalPrefix();
+    O << MAI->getPrivateGlobalPrefix();
   } else if (!strcmp(Code, "comment")) {
     if (VerboseAsm)
-      O << TAI->getCommentString();
+      O << MAI->getCommentString();
   } else if (!strcmp(Code, "uid")) {
     // Comparing the address of MI isn't sufficient, because machineinstrs may
     // be allocated to the same address across functions.
@@ -1328,23 +1343,38 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
     }
     O << Counter;
   } else {
-    cerr << "Unknown special formatter '" << Code
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Unknown special formatter '" << Code
          << "' for machine instr: " << *MI;
-    exit(1);
+    llvm_report_error(Msg.str());
   }    
 }
 
 /// processDebugLoc - Processes the debug information of each machine
 /// instruction's DebugLoc.
-void AsmPrinter::processDebugLoc(DebugLoc DL) {
-  if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
+void AsmPrinter::processDebugLoc(const MachineInstr *MI, 
+                                 bool BeforePrintingInsn) {
+  if (!MAI || !DW)
+    return;
+  DebugLoc DL = MI->getDebugLoc();
+  if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
     if (!DL.isUnknown()) {
       DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
-
-      if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT)
-        printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
-                                        DICompileUnit(CurDLT.CompileUnit)));
-
+      if (BeforePrintingInsn) {
+        if (CurDLT.Scope != 0 && PrevDLT != CurDLT) {
+	  unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+	  				    CurDLT.Scope);
+          printLabel(L);
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+          DW->SetDbgScopeBeginLabels(MI, L);
+#endif
+        } else {
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+          DW->SetDbgScopeEndLabels(MI, 0);
+#endif
+        }
+      } 
       PrevDLT = CurDLT;
     }
   }
@@ -1369,14 +1399,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
   // If this asmstr is empty, just print the #APP/#NOAPP markers.
   // These are useful to see where empty asm's wound up.
   if (AsmStr[0] == 0) {
-    O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+    O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
+    O << MAI->getCommentString() << MAI->getInlineAsmEnd() << '\n';
     return;
   }
   
-  O << TAI->getInlineAsmStart() << "\n\t";
+  O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
 
   // The variant of the current asmprinter.
-  int AsmPrinterVariant = TAI->getAssemblerDialect();
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
 
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
@@ -1413,9 +1444,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
       case '(':             // $( -> same as GCC's { character.
         ++LastEmitted;      // Consume '(' character.
         if (CurVariant != -1) {
-          cerr << "Nested variants found in inline asm string: '"
-               << AsmStr << "'\n";
-          exit(1);
+          llvm_report_error("Nested variants found in inline asm string: '"
+                            + std::string(AsmStr) + "'");
         }
         CurVariant = 0;     // We're in the first variant now.
         break;
@@ -1450,9 +1480,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
         const char *StrStart = LastEmitted;
         const char *StrEnd = strchr(StrStart, '}');
         if (StrEnd == 0) {
-          cerr << "Unterminated ${:foo} operand in inline asm string: '" 
-               << AsmStr << "'\n";
-          exit(1);
+          llvm_report_error("Unterminated ${:foo} operand in inline asm string: '" 
+                            + std::string(AsmStr) + "'");
         }
         
         std::string Val(StrStart, StrEnd);
@@ -1466,9 +1495,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
       errno = 0;
       long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
       if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
-        cerr << "Bad $ operand number in inline asm string: '" 
-             << AsmStr << "'\n";
-        exit(1);
+        llvm_report_error("Bad $ operand number in inline asm string: '" 
+                          + std::string(AsmStr) + "'");
       }
       LastEmitted = IDEnd;
       
@@ -1480,9 +1508,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
         if (*LastEmitted == ':') {
           ++LastEmitted;    // Consume ':' character.
           if (*LastEmitted == 0) {
-            cerr << "Bad ${:} expression in inline asm string: '" 
-                 << AsmStr << "'\n";
-            exit(1);
+            llvm_report_error("Bad ${:} expression in inline asm string: '" 
+                              + std::string(AsmStr) + "'");
           }
           
           Modifier[0] = *LastEmitted;
@@ -1490,17 +1517,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
         }
         
         if (*LastEmitted != '}') {
-          cerr << "Bad ${} expression in inline asm string: '" 
-               << AsmStr << "'\n";
-          exit(1);
+          llvm_report_error("Bad ${} expression in inline asm string: '" 
+                            + std::string(AsmStr) + "'");
         }
         ++LastEmitted;    // Consume '}' character.
       }
       
       if ((unsigned)Val >= NumOperands-1) {
-        cerr << "Invalid $ operand number in inline asm string: '" 
-             << AsmStr << "'\n";
-        exit(1);
+        llvm_report_error("Invalid $ operand number in inline asm string: '" 
+                          + std::string(AsmStr) + "'");
       }
       
       // Okay, we finally have a value number.  Ask the target to print this
@@ -1524,8 +1549,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
           ++OpNo;  // Skip over the ID number.
 
           if (Modifier[0]=='l')  // labels are target independent
-            printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), 
-                                 false, false, false);
+            GetMBBSymbol(MI->getOperand(OpNo).getMBB()
+                           ->getNumber())->print(O, MAI);
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if ((OpFlags & 7) == 4) {
@@ -1538,25 +1563,28 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
           }
         }
         if (Error) {
-          cerr << "Invalid operand found in inline asm: '"
+          std::string msg;
+          raw_string_ostream Msg(msg);
+          Msg << "Invalid operand found in inline asm: '"
                << AsmStr << "'\n";
-          MI->dump();
-          exit(1);
+          MI->print(Msg);
+          llvm_report_error(Msg.str());
         }
       }
       break;
     }
     }
   }
-  O << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+  O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd();
 }
 
 /// printImplicitDef - This method prints the specified machine instruction
 /// that is an implicit def.
 void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
-  if (VerboseAsm)
-    O << '\t' << TAI->getCommentString() << " implicit-def: "
-      << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n';
+  if (!VerboseAsm) return;
+  O.PadToColumn(MAI->getCommentColumn());
+  O << MAI->getCommentString() << " implicit-def: "
+    << TRI->getName(MI->getOperand(0).getReg());
 }
 
 /// printLabel - This method prints a local label used by debug and
@@ -1566,17 +1594,7 @@ void AsmPrinter::printLabel(const MachineInstr *MI) const {
 }
 
 void AsmPrinter::printLabel(unsigned Id) const {
-  O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n";
-}
-
-/// printDeclare - This method prints a local variable declaration used by
-/// debug tables.
-/// FIXME: It doesn't really print anything rather it inserts a DebugVariable
-/// entry into dwarf table.
-void AsmPrinter::printDeclare(const MachineInstr *MI) const {
-  unsigned FI = MI->getOperand(0).getIndex();
-  GlobalValue *GV = MI->getOperand(1).getGlobal();
-  DW->RecordVariable(cast<GlobalVariable>(GV), FI, MI);
+  O << MAI->getPrivateGlobalPrefix() << "label" << Id << ':';
 }
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
@@ -1595,51 +1613,69 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-/// printBasicBlockLabel - This method prints the label for the specified
-/// MachineBasicBlock
-void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
-                                      bool printAlign, 
-                                      bool printColon,
-                                      bool printComment) const {
-  if (printAlign) {
-    unsigned Align = MBB->getAlignment();
-    if (Align)
-      EmitAlignment(Log2_32(Align));
-  }
+MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB"
+    << getFunctionNumber() << '_' << MBBID;
+  
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
 
-  O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_'
-    << MBB->getNumber();
-  if (printColon)
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+  if (unsigned Align = MBB->getAlignment())
+    EmitAlignment(Log2_32(Align));
+
+  if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
+    if (VerboseAsm)
+      O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
+  } else {
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
     O << ':';
-  if (printComment && MBB->getBasicBlock())
-    O << '\t' << TAI->getCommentString() << ' '
-      << MBB->getBasicBlock()->getNameStart();
+    if (!VerboseAsm)
+      O << '\n';
+  }
+  
+  if (VerboseAsm) {
+    if (const BasicBlock *BB = MBB->getBasicBlock())
+      if (BB->hasName()) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << ' ';
+        WriteAsOperand(O, BB, /*PrintType=*/false);
+      }
+
+    EmitComments(*MBB);
+    O << '\n';
+  }
 }
 
 /// printPICJumpTableSetLabel - This method prints a set label for the
 /// specified MachineBasicBlock for a jumptable entry.
 void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, 
                                            const MachineBasicBlock *MBB) const {
-  if (!TAI->getSetDirective())
+  if (!MAI->getSetDirective())
     return;
   
-  O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false, false);
-  O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+  GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
     << '_' << uid << '\n';
 }
 
 void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
                                            const MachineBasicBlock *MBB) const {
-  if (!TAI->getSetDirective())
+  if (!MAI->getSetDirective())
     return;
   
-  O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
     << getFunctionNumber() << '_' << uid << '_' << uid2
     << "_set_" << MBB->getNumber() << ',';
-  printBasicBlockLabel(MBB, false, false, false);
-  O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
+  GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
     << '_' << uid << '_' << uid2 << '\n';
 }
 
@@ -1648,73 +1684,51 @@ void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
 void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) {
   const TargetData *TD = TM.getTargetData();
   switch (type->getTypeID()) {
+  case Type::FloatTyID: case Type::DoubleTyID:
+  case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID:
+    assert(0 && "Should have already output floating point constant.");
+  default:
+    assert(0 && "Can't handle printing this type of thing");
   case Type::IntegerTyID: {
     unsigned BitWidth = cast<IntegerType>(type)->getBitWidth();
     if (BitWidth <= 8)
-      O << TAI->getData8bitsDirective(AddrSpace);
+      O << MAI->getData8bitsDirective(AddrSpace);
     else if (BitWidth <= 16)
-      O << TAI->getData16bitsDirective(AddrSpace);
+      O << MAI->getData16bitsDirective(AddrSpace);
     else if (BitWidth <= 32)
-      O << TAI->getData32bitsDirective(AddrSpace);
+      O << MAI->getData32bitsDirective(AddrSpace);
     else if (BitWidth <= 64) {
-      assert(TAI->getData64bitsDirective(AddrSpace) &&
+      assert(MAI->getData64bitsDirective(AddrSpace) &&
              "Target cannot handle 64-bit constant exprs!");
-      O << TAI->getData64bitsDirective(AddrSpace);
+      O << MAI->getData64bitsDirective(AddrSpace);
     } else {
-      assert(0 && "Target cannot handle given data directive width!");
+      llvm_unreachable("Target cannot handle given data directive width!");
     }
     break;
   }
   case Type::PointerTyID:
     if (TD->getPointerSize() == 8) {
-      assert(TAI->getData64bitsDirective(AddrSpace) &&
+      assert(MAI->getData64bitsDirective(AddrSpace) &&
              "Target cannot handle 64-bit pointer exprs!");
-      O << TAI->getData64bitsDirective(AddrSpace);
+      O << MAI->getData64bitsDirective(AddrSpace);
     } else if (TD->getPointerSize() == 2) {
-      O << TAI->getData16bitsDirective(AddrSpace);
+      O << MAI->getData16bitsDirective(AddrSpace);
     } else if (TD->getPointerSize() == 1) {
-      O << TAI->getData8bitsDirective(AddrSpace);
+      O << MAI->getData8bitsDirective(AddrSpace);
     } else {
-      O << TAI->getData32bitsDirective(AddrSpace);
+      O << MAI->getData32bitsDirective(AddrSpace);
     }
     break;
-  case Type::FloatTyID: case Type::DoubleTyID:
-  case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID:
-    assert (0 && "Should have already output floating point constant.");
-  default:
-    assert (0 && "Can't handle printing this type of thing");
-    break;
   }
 }
 
-void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix,
-                                   const char *Prefix) {
-  if (Name[0]=='\"')
-    O << '\"';
-  O << TAI->getPrivateGlobalPrefix();
-  if (Prefix) O << Prefix;
-  if (Name[0]=='\"')
-    O << '\"';
-  if (Name[0]=='\"')
-    O << Name[1];
-  else
-    O << Name;
-  O << Suffix;
-  if (Name[0]=='\"')
-    O << '\"';
-}
-
-void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) {
-  printSuffixedName(Name.c_str(), Suffix);
-}
-
 void AsmPrinter::printVisibility(const std::string& Name,
                                  unsigned Visibility) const {
   if (Visibility == GlobalValue::HiddenVisibility) {
-    if (const char *Directive = TAI->getHiddenDirective())
+    if (const char *Directive = MAI->getHiddenDirective())
       O << Directive << Name << '\n';
   } else if (Visibility == GlobalValue::ProtectedVisibility) {
-    if (const char *Directive = TAI->getProtectedDirective())
+    if (const char *Directive = MAI->getProtectedDirective())
       O << Directive << Name << '\n';
   }
 }
@@ -1746,6 +1760,104 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
       return GMP;
     }
   
-  cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n";
-  abort();
+  errs() << "no GCMetadataPrinter registered for GC: " << Name << "\n";
+  llvm_unreachable(0);
+}
+
+/// EmitComments - Pretty-print comments for instructions
+void AsmPrinter::EmitComments(const MachineInstr &MI) const {
+  assert(VerboseAsm && !MI.getDebugLoc().isUnknown());
+  
+  DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+
+  // Print source line info.
+  O.PadToColumn(MAI->getCommentColumn());
+  O << MAI->getCommentString() << " SrcLine ";
+  if (DLT.Scope) {
+    DICompileUnit CU(DLT.Scope);
+    if (!CU.isNull())
+      O << CU.getFilename() << " ";
+  }
+  O << DLT.Line;
+  if (DLT.Col != 0) 
+    O << ":" << DLT.Col;
+}
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+///
+static void PrintChildLoopComment(formatted_raw_ostream &O,
+                                  const MachineLoop *loop,
+                                  const MCAsmInfo *MAI,
+                                  int FunctionNumber) {
+  // Add child loop information
+  for(MachineLoop::iterator cl = loop->begin(),
+        clend = loop->end();
+      cl != clend;
+      ++cl) {
+    MachineBasicBlock *Header = (*cl)->getHeader();
+    assert(Header && "No header for loop");
+
+    O << '\n';
+    O.PadToColumn(MAI->getCommentColumn());
+
+    O << MAI->getCommentString();
+    O.indent(((*cl)->getLoopDepth()-1)*2)
+      << " Child Loop BB" << FunctionNumber << "_"
+      << Header->getNumber() << " Depth " << (*cl)->getLoopDepth();
+
+    PrintChildLoopComment(O, *cl, MAI, FunctionNumber);
+  }
+}
+
+/// EmitComments - Pretty-print comments for basic blocks
+void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const
+{
+  if (VerboseAsm) {
+    // Add loop depth information
+    const MachineLoop *loop = LI->getLoopFor(&MBB);
+
+    if (loop) {
+      // Print a newline after bb# annotation.
+      O << "\n";
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Loop Depth " << loop->getLoopDepth()
+        << '\n';
+
+      O.PadToColumn(MAI->getCommentColumn());
+
+      MachineBasicBlock *Header = loop->getHeader();
+      assert(Header && "No header for loop");
+      
+      if (Header == &MBB) {
+        O << MAI->getCommentString() << " Loop Header";
+        PrintChildLoopComment(O, loop, MAI, getFunctionNumber());
+      }
+      else {
+        O << MAI->getCommentString() << " Loop Header is BB"
+          << getFunctionNumber() << "_" << loop->getHeader()->getNumber();
+      }
+
+      if (loop->empty()) {
+        O << '\n';
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << " Inner Loop";
+      }
+
+      // Add parent loop information
+      for (const MachineLoop *CurLoop = loop->getParentLoop();
+           CurLoop;
+           CurLoop = CurLoop->getParentLoop()) {
+        MachineBasicBlock *Header = CurLoop->getHeader();
+        assert(Header && "No header for loop");
+
+        O << '\n';
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString();
+        O.indent((CurLoop->getLoopDepth()-1)*2)
+          << " Inside Loop BB" << getFunctionNumber() << "_"
+          << Header->getNumber() << " Depth " << CurLoop->getLoopDepth();
+      }
+    }
+  }
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 01c431c849a3..ecf00077fc31 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -14,9 +14,10 @@
 #include "DIE.h"
 #include "DwarfPrinter.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
-#include <ostream>
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -75,24 +76,24 @@ void DIEAbbrev::Emit(const AsmPrinter *Asm) const {
 }
 
 #ifndef NDEBUG
-void DIEAbbrev::print(std::ostream &O) {
+void DIEAbbrev::print(raw_ostream &O) {
   O << "Abbreviation @"
-    << std::hex << (intptr_t)this << std::dec
+    << format("0x%lx", (long)(intptr_t)this)
     << "  "
     << dwarf::TagString(Tag)
     << " "
     << dwarf::ChildrenString(ChildrenFlag)
-    << "\n";
+    << '\n';
 
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
     O << "  "
       << dwarf::AttributeString(Data[i].getAttribute())
       << "  "
       << dwarf::FormEncodingString(Data[i].getForm())
-      << "\n";
+      << '\n';
   }
 }
-void DIEAbbrev::dump() { print(cerr); }
+void DIEAbbrev::dump() { print(errs()); }
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -125,7 +126,7 @@ void DIE::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIE::print(std::ostream &O, unsigned IncIndent) {
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
   const std::string Indent(IndentCount, ' ');
   bool isBlock = Abbrev.getTag() == 0;
@@ -133,7 +134,7 @@ void DIE::print(std::ostream &O, unsigned IncIndent) {
   if (!isBlock) {
     O << Indent
       << "Die: "
-      << "0x" << std::hex << (intptr_t)this << std::dec
+      << format("0x%lx", (long)(intptr_t)this)
       << ", Offset: " << Offset
       << ", Size: " << Size
       << "\n";
@@ -175,14 +176,14 @@ void DIE::print(std::ostream &O, unsigned IncIndent) {
 }
 
 void DIE::dump() {
-  print(cerr);
+  print(errs());
 }
 #endif
 
 
 #ifndef NDEBUG
 void DIEValue::dump() {
-  print(cerr);
+  print(errs());
 }
 #endif
 
@@ -206,7 +207,7 @@ void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const {
   case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer);        break;
   case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break;
-  default: assert(0 && "DIE Value form not supported yet");  break;
+  default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
 
@@ -223,9 +224,9 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
-  case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer);
-  case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer);
-  default: assert(0 && "DIE Value form not supported yet"); break;
+  case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+  default: llvm_unreachable("DIE Value form not supported yet"); break;
   }
   return 0;
 }
@@ -241,9 +242,9 @@ void DIEInteger::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEInteger::print(std::ostream &O) {
+void DIEInteger::print(raw_ostream &O) {
   O << "Int: " << (int64_t)Integer
-    << "  0x" << std::hex << Integer << std::dec;
+    << format("  0x%llx", (unsigned long long)Integer);
 }
 #endif
 
@@ -268,7 +269,7 @@ void DIEString::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEString::print(std::ostream &O) {
+void DIEString::print(raw_ostream &O) {
   O << "Str: \"" << Str << "\"";
 }
 #endif
@@ -302,7 +303,7 @@ void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEDwarfLabel::print(std::ostream &O) {
+void DIEDwarfLabel::print(raw_ostream &O) {
   O << "Lbl: ";
   Label.print(O);
 }
@@ -337,7 +338,7 @@ void DIEObjectLabel::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEObjectLabel::print(std::ostream &O) {
+void DIEObjectLabel::print(raw_ostream &O) {
   O << "Obj: " << Label;
 }
 #endif
@@ -377,7 +378,7 @@ void DIESectionOffset::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIESectionOffset::print(std::ostream &O) {
+void DIESectionOffset::print(raw_ostream &O) {
   O << "Off: ";
   Label.print(O);
   O << "-";
@@ -417,7 +418,7 @@ void DIEDelta::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEDelta::print(std::ostream &O) {
+void DIEDelta::print(raw_ostream &O) {
   O << "Del: ";
   LabelHi.print(O);
   O << "-";
@@ -451,8 +452,8 @@ void DIEEntry::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEEntry::print(std::ostream &O) {
-  O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec;
+void DIEEntry::print(raw_ostream &O) {
+  O << format("Die: 0x%lx", (long)(intptr_t)Entry);
 }
 #endif
 
@@ -481,7 +482,7 @@ void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const {
   case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);        break;
   case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);        break;
   case dwarf::DW_FORM_block:  Asm->EmitULEB128Bytes(Size); break;
-  default: assert(0 && "Improper form for block");         break;
+  default: llvm_unreachable("Improper form for block");         break;
   }
 
   const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
@@ -498,8 +499,8 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
-  case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size);
-  default: assert(0 && "Improper form for block"); break;
+  case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+  default: llvm_unreachable("Improper form for block"); break;
   }
   return 0;
 }
@@ -510,7 +511,7 @@ void DIEBlock::Profile(FoldingSetNodeID &ID) {
 }
 
 #ifndef NDEBUG
-void DIEBlock::print(std::ostream &O) {
+void DIEBlock::print(raw_ostream &O) {
   O << "Blk: ";
   DIE::print(O, 5);
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 5b60327f9036..62b51ecd18ac 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -19,8 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/raw_ostream.h"
-#include <iosfwd>
+#include <vector>
 
 namespace llvm {
   class AsmPrinter;
@@ -103,10 +102,7 @@ namespace llvm {
     void Emit(const AsmPrinter *Asm) const;
 
 #ifndef NDEBUG
-    void print(std::ostream *O) {
-      if (O) print(*O);
-    }
-    void print(std::ostream &O);
+    void print(raw_ostream &O);
     void dump();
 #endif
   };
@@ -198,10 +194,7 @@ namespace llvm {
     void Profile(FoldingSetNodeID &ID) ;
 
 #ifndef NDEBUG
-    void print(std::ostream *O, unsigned IncIndent = 0) {
-      if (O) print(*O, IncIndent);
-    }
-    void print(std::ostream &O, unsigned IncIndent = 0);
+    void print(raw_ostream &O, unsigned IncIndent = 0);
     void dump();
 #endif
   };
@@ -248,10 +241,7 @@ namespace llvm {
     static bool classof(const DIEValue *) { return true; }
 
 #ifndef NDEBUG
-    void print(std::ostream *O) {
-      if (O) print(*O);
-    }
-    virtual void print(std::ostream &O) = 0;
+    virtual void print(raw_ostream &O) = 0;
     void dump();
 #endif
   };
@@ -297,7 +287,7 @@ namespace llvm {
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -329,7 +319,7 @@ namespace llvm {
     static bool classof(const DIEValue *S) { return S->getType() == isString; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -359,7 +349,7 @@ namespace llvm {
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -392,7 +382,7 @@ namespace llvm {
     }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -431,7 +421,7 @@ namespace llvm {
     }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -464,7 +454,7 @@ namespace llvm {
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -500,7 +490,7 @@ namespace llvm {
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
@@ -544,7 +534,7 @@ namespace llvm {
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
 
 #ifndef NDEBUG
-    virtual void print(std::ostream &O);
+    virtual void print(raw_ostream &O);
 #endif
   };
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 547140fa5217..4394ec08ef22 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -10,16 +10,24 @@
 // This file contains support for writing dwarf debug info into asm files.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/System/Path.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/System/Path.h"
 using namespace llvm;
 
 static TimerGroup &getDwarfTimerGroup() {
@@ -51,11 +59,13 @@ class VISIBILITY_HIDDEN CompileUnit {
 
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
   /// variables to debug information entries.
-  std::map<GlobalVariable *, DIE *> GVToDieMap;
+  /// FIXME : Rename GVToDieMap -> NodeToDieMap
+  std::map<MDNode *, DIE *> GVToDieMap;
 
   /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
   /// descriptors to debug information entries using a DIEEntry proxy.
-  std::map<GlobalVariable *, DIEEntry *> GVToDIEEntryMap;
+  /// FIXME : Rename
+  std::map<MDNode *, DIEEntry *> GVToDIEEntryMap;
 
   /// Globals - A map of globally visible named entities for this unit.
   ///
@@ -84,12 +94,12 @@ public:
 
   /// getDieMapSlotFor - Returns the debug information entry map slot for the
   /// specified debug variable.
-  DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; }
+  DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; }
 
-  /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the
-  /// specified debug variable.
-  DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) {
-    return GVToDIEEntryMap[GV];
+  /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for
+  /// the specified debug variable.
+  DIEEntry *&getDIEEntrySlotFor(MDNode *N) {
+    return GVToDIEEntryMap[N];
   }
 
   /// AddDie - Adds or interns the DIE to the compile unit.
@@ -138,15 +148,18 @@ class VISIBILITY_HIDDEN DbgScope {
                                       // Either subprogram or block.
   unsigned StartLabelID;              // Label ID of the beginning of scope.
   unsigned EndLabelID;                // Label ID of the end of scope.
+  const MachineInstr *LastInsn;       // Last instruction of this scope.
+  const MachineInstr *FirstInsn;      // First instruction of this scope.
   SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
   SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
   SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
-  
+
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D)
-    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), IndentLevel(0) {}
+    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), LastInsn(0),
+      FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
@@ -159,7 +172,10 @@ public:
   SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
   void setStartLabelID(unsigned S) { StartLabelID = S; }
   void setEndLabelID(unsigned E)   { EndLabelID = E; }
-
+  void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
+  const MachineInstr *getLastInsn()      { return LastInsn; }
+  void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+  const MachineInstr *getFirstInsn()      { return FirstInsn; }
   /// AddScope - Add a scope to the scope.
   ///
   void AddScope(DbgScope *S) { Scopes.push_back(S); }
@@ -172,6 +188,21 @@ public:
   ///
   void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
 
+  void FixInstructionMarkers() {
+    assert (getFirstInsn() && "First instruction is missing!");
+    if (getLastInsn())
+      return;
+    
+    // If a scope does not have an instruction to mark an end then use
+    // the end of last child scope.
+    SmallVector<DbgScope *, 4> &Scopes = getScopes();
+    assert (!Scopes.empty() && "Inner most scope does not have last insn!");
+    DbgScope *L = Scopes.back();
+    if (!L->getLastInsn())
+      L->FixInstructionMarkers();
+    setLastInsn(L->getLastInsn());
+  }
+
 #ifndef NDEBUG
   void dump() const;
 #endif
@@ -179,10 +210,10 @@ public:
 
 #ifndef NDEBUG
 void DbgScope::dump() const {
-  std::string Indent(IndentLevel, ' ');
-
-  cerr << Indent; Desc.dump();
-  cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+  raw_ostream &err = errs();
+  err.indent(IndentLevel);
+  Desc.dump();
+  err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
 
   IndentLevel += 2;
 
@@ -220,10 +251,10 @@ DbgScope::~DbgScope() {
 
 } // end llvm namespace
 
-DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
   : Dwarf(OS, A, T, "dbg"), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
-    ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(),
+    ValuesSet(InitValuesSetSize), Values(), StringPool(),
     SectionSourceLines(), didInitial(false), shouldEmit(false),
     FunctionDbgScope(0), DebugTimer(0) {
   if (TimePassesIsEnabled)
@@ -234,7 +265,7 @@ DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = Values.size(); j < M; ++j)
     delete Values[j];
 
-  for (DenseMap<const GlobalVariable *, DbgScope *>::iterator
+  for (DenseMap<const MDNode *, DbgScope *>::iterator
          I = AbstractInstanceRootMap.begin(),
          E = AbstractInstanceRootMap.end(); I != E;++I)
     delete I->second;
@@ -479,6 +510,27 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) {
   AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) {
+  // If there is no compile unit specified, don't add a line #.
+  if (SP->getCompileUnit().isNull())
+    return;
+  // If the line number is 0, don't add it.
+  if (SP->getLineNumber() == 0)
+    return;
+
+
+  unsigned Line = SP->getLineNumber();
+  unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID();
+  assert(FileID && "Invalid file id");
+  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
 void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
   // If there is no compile unit specified, don't add a line #.
   DICompileUnit CU = Ty->getCompileUnit();
@@ -492,6 +544,270 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
   AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
+/* Byref variables, in Blocks, are declared by the programmer as
+   "SomeType VarName;", but the compiler creates a
+   __Block_byref_x_VarName struct, and gives the variable VarName
+   either the struct, or a pointer to the struct, as its type.  This
+   is necessary for various behind-the-scenes things the compiler
+   needs to do with by-reference variables in blocks.
+
+   However, as far as the original *programmer* is concerned, the
+   variable should still have type 'SomeType', as originally declared.
+
+   The following function dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable.  This will be
+   passed back to the code generating the type for the Debug
+   Information Entry for the variable 'VarName'.  'VarName' will then
+   have the original type 'SomeType' in its debug information.
+
+   The original type 'SomeType' will be the type of the field named
+   'VarName' inside the __Block_byref_x_VarName struct.
+
+   NOTE: In order for this to not completely fail on the debugger
+   side, the Debug Information Entry for the variable VarName needs to
+   have a DW_AT_location that tells the debugger how to unwind through
+   the pointers and __Block_byref_x_VarName struct to find the actual
+   value of the variable.  The function AddBlockByrefType does this.  */
+
+/// Find the type the programmer originally declared the variable to be
+/// and return that type.
+///
+DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) {
+
+  DIType subType = Ty;
+  unsigned tag = Ty.getTag();
+
+  if (tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty.getNode());
+    subType = DTy.getTypeDerivedFrom();
+  }
+
+  DICompositeType blockStruct = DICompositeType(subType.getNode());
+
+  DIArray Elements = blockStruct.getTypeArray();
+
+  if (Elements.isNull())
+    return Ty;
+
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element.getNode());
+    if (strcmp(Name.c_str(), DT.getName()) == 0)
+      return (DT.getTypeDerivedFrom());
+  }
+
+  return Ty;
+}
+
+/// AddComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location.  Add the DWARF information to the die.
+///
+void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
+                                   unsigned Attribute,
+                                   const MachineLocation &Location) {
+  const DIVariable &VD = DV->getVariable();
+  DIType Ty = VD.getType();
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32) {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    } else {
+      Reg = Reg - dwarf::DW_OP_reg0;
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
+    uint64_t Element = VD.getAddrElement(i);
+
+    if (Element == DIFactory::OpPlus) {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+    } else if (Element == DIFactory::OpDeref) {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    } else llvm_unreachable("unknown DIFactory Opcode");
+  }
+
+  // Now attach the location information to the DIE.
+  AddBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+   gives the variable VarName either the struct, or a pointer to the struct, as
+   its type.  This is necessary for various behind-the-scenes things the
+   compiler needs to do with by-reference variables in Blocks.
+
+   However, as far as the original *programmer* is concerned, the variable
+   should still have type 'SomeType', as originally declared.
+
+   The function GetBlockByrefType dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable, which is then assigned to
+   the variable's Debug Information Entry as its real type.  So far, so good.
+   However now the debugger will expect the variable VarName to have the type
+   SomeType.  So we need the location attribute for the variable to be an
+   expression that explains to the debugger how to navigate through the
+   pointers and struct to find the actual variable of type SomeType.
+
+   The following function does just that.  We start by getting
+   the "normal" location for the variable. This will be the location
+   of either the struct __Block_byref_x_VarName or the pointer to the
+   struct __Block_byref_x_VarName.
+
+   The struct will look something like:
+
+   struct __Block_byref_x_VarName {
+     ... <various fields>
+     struct __Block_byref_x_VarName *forwarding;
+     ... <various other fields>
+     SomeType VarName;
+     ... <maybe more fields>
+   };
+
+   If we are given the struct directly (as our starting point) we
+   need to tell the debugger to:
+
+   1).  Add the offset of the forwarding field.
+
+   2).  Follow that pointer to get the the real __Block_byref_x_VarName
+   struct to use (the real one may have been copied onto the heap).
+
+   3).  Add the offset for the field VarName, to find the actual variable.
+
+   If we started with a pointer to the struct, then we need to
+   dereference that pointer first, before the other steps.
+   Translating this into DWARF ops, we will need to append the following
+   to the current location description for the variable:
+
+   DW_OP_deref                    -- optional, if we start with a pointer
+   DW_OP_plus_uconst <forward_fld_offset>
+   DW_OP_deref
+   DW_OP_plus_uconst <varName_fld_offset>
+
+   That is what this function does.  */
+
+/// AddBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location.  Add the DWARF information to the die.  For
+/// more information, read large comment just above here.
+///
+void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+                                      unsigned Attribute,
+                                      const MachineLocation &Location) {
+  const DIVariable &VD = DV->getVariable();
+  DIType Ty = VD.getType();
+  DIType TmpTy = Ty;
+  unsigned Tag = Ty.getTag();
+  bool isPointer = false;
+
+  const char *varName = VD.getName();
+
+  if (Tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty.getNode());
+    TmpTy = DTy.getTypeDerivedFrom();
+    isPointer = true;
+  }
+
+  DICompositeType blockStruct = DICompositeType(TmpTy.getNode());
+
+  // Find the __forwarding field and the variable field in the __Block_byref
+  // struct.
+  DIArray Fields = blockStruct.getTypeArray();
+  DIDescriptor varField = DIDescriptor();
+  DIDescriptor forwardingField = DIDescriptor();
+
+
+  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Fields.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element.getNode());
+    const char *fieldName = DT.getName();
+    if (strcmp(fieldName, "__forwarding") == 0)
+      forwardingField = Element;
+    else if (strcmp(fieldName, varName) == 0)
+      varField = Element;
+  }
+
+  assert(!varField.isNull() && "Can't find byref variable in Block struct");
+  assert(!forwardingField.isNull()
+         && "Can't find forwarding field in Block struct");
+
+  // Get the offsets for the forwarding field and the variable field.
+  unsigned int forwardingFieldOffset =
+    DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
+  unsigned int varFieldOffset =
+    DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32)
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    else {
+      Reg = Reg - dwarf::DW_OP_reg0;
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  // If we started with a pointer to the __Block_byref... struct, then
+  // the first thing we need to do is dereference the pointer (DW_OP_deref).
+  if (isPointer)
+    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Next add the offset for the '__forwarding' field:
+  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
+  // adding the offset if it's 0.
+  if (forwardingFieldOffset > 0) {
+    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+  }
+
+  // Now dereference the __forwarding field to get to the real __Block_byref
+  // struct:  DW_OP_deref.
+  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Now that we've got the real __Block_byref... struct, add the offset
+  // for the variable's field to get to the location of the actual variable:
+  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
+  if (varFieldOffset > 0) {
+    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+  }
+
+  // Now attach the location information to the DIE.
+  AddBlock(Die, Attribute, 0, Block);
+}
+
 /// AddAddress - Add an address attribute to a die based on the location
 /// provided.
 void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute,
@@ -526,7 +842,7 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
     return;
 
   // Check for pre-existence.
-  DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV());
+  DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode());
 
   // If it exists then use the existing value.
   if (Slot) {
@@ -539,20 +855,20 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
 
   // Construct type.
   DIE Buffer(dwarf::DW_TAG_base_type);
-  if (Ty.isBasicType(Ty.getTag()))
-    ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV()));
-  else if (Ty.isDerivedType(Ty.getTag()))
-    ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV()));
+  if (Ty.isBasicType())
+    ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode()));
+  else if (Ty.isCompositeType())
+    ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode()));
   else {
-    assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType");
-    ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV()));
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode()));
   }
 
   // Add debug information entry to entity and appropriate context.
   DIE *Die = NULL;
   DIDescriptor Context = Ty.getContext();
   if (!Context.isNull())
-    Die = DW_Unit->getDieMapSlotFor(Context.getGV());
+    Die = DW_Unit->getDieMapSlotFor(Context.getNode());
 
   if (Die) {
     DIE *Child = new DIE(Buffer);
@@ -571,14 +887,13 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
 void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIBasicType BTy) {
   // Get core information.
-  std::string Name;
-  BTy.getName(Name);
+  const char *Name = BTy.getName();
   Buffer.setTag(dwarf::DW_TAG_base_type);
   AddUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
           BTy.getEncoding());
 
   // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
+  if (Name)
     AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   uint64_t Size = BTy.getSizeInBits() >> 3;
   AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -588,8 +903,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIDerivedType DTy) {
   // Get core information.
-  std::string Name;
-  DTy.getName(Name);
+  const char *Name = DTy.getName();
   uint64_t Size = DTy.getSizeInBits() >> 3;
   unsigned Tag = DTy.getTag();
 
@@ -603,7 +917,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   AddType(DW_Unit, &Buffer, FromTy);
 
   // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
+  if (Name)
     AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add size if non-zero (derived types might be zero-sized.)
@@ -619,8 +933,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DICompositeType CTy) {
   // Get core information.
-  std::string Name;
-  CTy.getName(Name);
+  const char *Name = CTy.getName();
 
   uint64_t Size = CTy.getSizeInBits() >> 3;
   unsigned Tag = CTy.getTag();
@@ -637,9 +950,11 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add enumerators to enumeration type.
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIE *ElemDie = NULL;
-      DIEnumerator Enum(Elements.getElement(i).getGV());
-      ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
-      Buffer.AddChild(ElemDie);
+      DIEnumerator Enum(Elements.getElement(i).getNode());
+      if (!Enum.isNull()) {
+        ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
+        Buffer.AddChild(ElemDie);
+      }
     }
   }
     break;
@@ -647,7 +962,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add return type.
     DIArray Elements = CTy.getTypeArray();
     DIDescriptor RTy = Elements.getElement(0);
-    AddType(DW_Unit, &Buffer, DIType(RTy.getGV()));
+    AddType(DW_Unit, &Buffer, DIType(RTy.getNode()));
 
     // Add prototype flag.
     AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
@@ -656,7 +971,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      AddType(DW_Unit, Arg, DIType(Ty.getGV()));
+      AddType(DW_Unit, Arg, DIType(Ty.getNode()));
       Buffer.AddChild(Arg);
     }
   }
@@ -674,20 +989,19 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add elements to structure type.
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
+      if (Element.isNull())
+        continue;
       DIE *ElemDie = NULL;
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
         ElemDie = CreateSubprogramDIE(DW_Unit,
-                                      DISubprogram(Element.getGV()));
+                                      DISubprogram(Element.getNode()));
       else
         ElemDie = CreateMemberDIE(DW_Unit,
-                                  DIDerivedType(Element.getGV()));
+                                  DIDerivedType(Element.getNode()));
       Buffer.AddChild(ElemDie);
     }
 
-    // FIXME: We'd like an API to register additional attributes for the
-    // frontend to use while synthesizing, and then we'd use that api in clang
-    // instead of this.
-    if (Name == "__block_literal_generic")
+    if (CTy.isAppleBlockExtension())
       AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
     unsigned RLang = CTy.getRunTimeLang();
@@ -701,7 +1015,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   }
 
   // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
+  if (Name)
     AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   if (Tag == dwarf::DW_TAG_enumeration_type ||
@@ -729,12 +1043,11 @@ void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   int64_t H = SR.getHi();
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
 
-  if (L != H) {
-    AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-    if (L)
-      AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  if (L)
+    AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  if (H)
     AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
-  }
 
   Buffer.AddChild(DW_Subrange);
 }
@@ -761,15 +1074,14 @@ void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy);
+      ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy);
   }
 }
 
 /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
 DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  std::string Name;
-  ETy->getName(Name);
+  const char *Name = ETy->getName();
   AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   int64_t Value = ETy->getEnumValue();
   AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
@@ -780,27 +1092,39 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
 DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
                                          const DIGlobalVariable &GV) {
   DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
-  std::string Name;
-  GV.getDisplayName(Name);
-  AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  std::string LinkageName;
-  GV.getLinkageName(LinkageName);
-  if (!LinkageName.empty())
+  AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+            GV.getDisplayName());
+
+  const char *LinkageName = GV.getLinkageName();
+  if (LinkageName) {
+    // Skip special LLVM prefix that is used to inform the asm printer to not
+    // emit usual symbol prefix before the symbol name. This happens for
+    // Objective-C symbol names and symbol whose name is replaced using GCC's
+    // __asm__ attribute.
+    if (LinkageName[0] == 1)
+      LinkageName = &LinkageName[1];
     AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
+  }
   AddType(DW_Unit, GVDie, GV.getType());
   if (!GV.isLocalToUnit())
     AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
   AddSourceLine(GVDie, &GV);
+
+  // Add address.
+  DIEBlock *Block = new DIEBlock();
+  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+  AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+                 Asm->Mang->getMangledName(GV.getGlobal()));
+  AddBlock(GVDie, dwarf::DW_AT_location, 0, Block);
+
   return GVDie;
 }
 
 /// CreateMemberDIE - Create new member DIE.
 DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
   DIE *MemberDie = new DIE(DT.getTag());
-  std::string Name;
-  DT.getName(Name);
-  if (!Name.empty())
+  if (const char *Name = DT.getName())
     AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
@@ -849,17 +1173,19 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
                                      bool IsInlined) {
   DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
 
-  std::string Name;
-  SP.getName(Name);
+  const char * Name = SP.getName();
   AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
-  std::string LinkageName;
-  SP.getLinkageName(LinkageName);
-
-  if (!LinkageName.empty())
+  const char *LinkageName = SP.getLinkageName();
+  if (LinkageName) {
+    // Skip special LLVM prefix that is used to inform the asm printer to not emit
+    // usual symbol prefix before the symbol name. This happens for Objective-C
+    // symbol names and symbol whose name is replaced using GCC's __asm__ attribute.
+    if (LinkageName[0] == 1)
+      LinkageName = &LinkageName[1];
     AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
-
+  }
   AddSourceLine(SPDie, &SP);
 
   DICompositeType SPTy = SP.getType();
@@ -877,7 +1203,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
     if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
       AddType(DW_Unit, SPDie, SPTy);
     else
-      AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV()));
+      AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
   }
 
   if (!SP.isDefinition()) {
@@ -888,7 +1214,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV()));
+        AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
         AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
         SPDie->AddChild(Arg);
       }
@@ -898,7 +1224,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
     AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
-  DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV());
+  DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
   Slot = SPDie;
   return SPDie;
 }
@@ -907,7 +1233,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
 ///
 CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
   DenseMap<Value *, CompileUnit *>::const_iterator I =
-    CompileUnitMap.find(Unit.getGV());
+    CompileUnitMap.find(Unit.getNode());
   assert(I != CompileUnitMap.end() && "Missing compile unit.");
   return *I->second;
 }
@@ -935,15 +1261,18 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-  std::string Name;
-  VD.getName(Name);
+  const char *Name = VD.getName();
   AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add source line info if available.
   AddSourceLine(VariableDie, &VD);
 
   // Add variable type.
-  AddType(Unit, VariableDie, VD.getType());
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead.
+  if (VD.isBlockByrefVariable())
+    AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+  else
+    AddType(Unit, VariableDie, VD.getType());
 
   // Add variable address.
   if (!DV->isInlinedFnVar()) {
@@ -952,7 +1281,14 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
     MachineLocation Location;
     Location.set(RI->getFrameRegister(*MF),
                  RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-    AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+
+
+    if (VD.hasComplexAddress())
+      AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else if (VD.isBlockByrefVariable())
+      AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else
+      AddAddress(VariableDie, dwarf::DW_AT_location, Location);
   }
 
   return VariableDie;
@@ -960,26 +1296,64 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
 /// getOrCreateScope - Returns the scope associated with the given descriptor.
 ///
-DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) {
-  DbgScope *&Slot = DbgScopeMap[V];
+DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI) {
+  DbgScope *&Slot = DbgScopeMap[N];
+  if (Slot) return Slot;
+
+  DbgScope *Parent = NULL;
+
+  DIDescriptor Scope(N);
+  if (Scope.isCompileUnit()) {
+    return NULL;
+  } else if (Scope.isSubprogram()) {
+    DISubprogram SP(N);
+    DIDescriptor ParentDesc = SP.getContext();
+    if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
+      Parent = getDbgScope(ParentDesc.getNode(), MI);
+  } else if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(N);
+    DIDescriptor ParentDesc = DB.getContext();
+    if (!ParentDesc.isNull())
+      Parent = getDbgScope(ParentDesc.getNode(), MI);
+  } else
+    assert (0 && "Unexpected scope info");
+
+  Slot = new DbgScope(Parent, DIDescriptor(N));
+  Slot->setFirstInsn(MI);
+
+  if (Parent)
+    Parent->AddScope(Slot);
+  else
+    // First function is top level function.
+    if (!FunctionDbgScope)
+      FunctionDbgScope = Slot;
+
+  return Slot;
+}
+
+
+/// getOrCreateScope - Returns the scope associated with the given descriptor.
+/// FIXME - Remove this method.
+DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) {
+  DbgScope *&Slot = DbgScopeMap[N];
   if (Slot) return Slot;
 
   DbgScope *Parent = NULL;
-  DIBlock Block(V);
+  DILexicalBlock Block(N);
 
   // Don't create a new scope if we already created one for an inlined function.
-  DenseMap<const GlobalVariable *, DbgScope *>::iterator
-    II = AbstractInstanceRootMap.find(V);
+  DenseMap<const MDNode *, DbgScope *>::iterator
+    II = AbstractInstanceRootMap.find(N);
   if (II != AbstractInstanceRootMap.end())
     return LexicalScopeStack.back();
 
   if (!Block.isNull()) {
     DIDescriptor ParentDesc = Block.getContext();
     Parent =
-      ParentDesc.isNull() ?  NULL : getOrCreateScope(ParentDesc.getGV());
+      ParentDesc.isNull() ?  NULL : getOrCreateScope(ParentDesc.getNode());
   }
 
-  Slot = new DbgScope(Parent, DIDescriptor(V));
+  Slot = new DbgScope(Parent, DIDescriptor(N));
 
   if (Parent)
     Parent->AddScope(Slot);
@@ -1088,10 +1462,14 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
     return;
 
   // Get the subprogram debug information entry.
-  DISubprogram SPD(Desc.getGV());
+  DISubprogram SPD(Desc.getNode());
 
   // Get the subprogram die.
-  DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getGV());
+  DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
+  if (!SPDie) {
+    ConstructSubprogram(SPD.getNode());
+    SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
+  }
   assert(SPDie && "Missing subprogram descriptor");
 
   if (!AbstractScope) {
@@ -1105,23 +1483,33 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
   }
 
   ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU);
+  // If there are global variables at this scope then add their dies.
+  for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), 
+       SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
+    MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
+    if (!N) continue;
+    DIGlobalVariable GV(N);
+    if (GV.getContext().getNode() == RootScope->getDesc().getNode()) {
+      DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+      SPDie->AddChild(ScopedGVDie);
+    }
+  }
 }
 
 /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
 ///
 void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
-  const char *FnName = MF->getFunction()->getNameStart();
   StringMap<DIE*> &Globals = ModuleCU->getGlobals();
-  StringMap<DIE*>::iterator GI = Globals.find(FnName);
+  StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName());
   if (GI != Globals.end()) {
     DIE *SPDie = GI->second;
-    
+
     // Add the function bounds.
     AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
              DWLabel("func_begin", SubprogramCount));
     AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
              DWLabel("func_end", SubprogramCount));
-    
+
     MachineLocation Location(RI->getFrameRegister(*MF));
     AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
   }
@@ -1131,8 +1519,8 @@ void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName,
-                                         const std::string &FileName) {
+unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName,
+                                         const char *FileName) {
   unsigned DId;
   StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
   if (DI != DirectoryIdMap.end()) {
@@ -1165,30 +1553,28 @@ unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName,
   return SrcId;
 }
 
-void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
-  DICompileUnit DIUnit(GV);
-  std::string Dir, FN, Prod;
-  unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir),
-                                    DIUnit.getFilename(FN));
+void DwarfDebug::ConstructCompileUnit(MDNode *N) {
+  DICompileUnit DIUnit(N);
+  const char *FN = DIUnit.getFilename();
+  const char *Dir = DIUnit.getDirectory();
+  unsigned ID = GetOrCreateSourceID(Dir, FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
   AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
                    DWLabel("section_line", 0), DWLabel("section_line", 0),
                    false);
   AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
-            DIUnit.getProducer(Prod));
+            DIUnit.getProducer());
   AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
   AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
 
-  if (!Dir.empty())
+  if (Dir)
     AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
   if (DIUnit.isOptimized())
     AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
-  std::string Flags;
-  DIUnit.getFlags(Flags);
-  if (!Flags.empty())
+  if (const char *Flags = DIUnit.getFlags())
     AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
 
   unsigned RVer = DIUnit.getRunTimeVersion();
@@ -1203,28 +1589,24 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
     ModuleCU = Unit;
   }
 
-  CompileUnitMap[DIUnit.getGV()] = Unit;
+  CompileUnitMap[DIUnit.getNode()] = Unit;
   CompileUnits.push_back(Unit);
 }
 
-void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
-  DIGlobalVariable DI_GV(GV);
+void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) {
+  DIGlobalVariable DI_GV(N);
+
+  // If debug information is malformed then ignore it.
+  if (DI_GV.Verify() == false)
+    return;
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getGV());
+  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode());
   if (Slot)
     return;
 
   DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV);
 
-  // Add address.
-  DIEBlock *Block = new DIEBlock();
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-  std::string GLN;
-  AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
-                 Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN));
-  AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
-
   // Add to map.
   Slot = VariableDie;
 
@@ -1232,16 +1614,15 @@ void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   ModuleCU->getDie()->AddChild(VariableDie);
 
   // Expose as global. FIXME - need to check external flag.
-  std::string Name;
-  ModuleCU->AddGlobal(DI_GV.getName(Name), VariableDie);
+  ModuleCU->AddGlobal(DI_GV.getName(), VariableDie);
   return;
 }
 
-void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
-  DISubprogram SP(GV);
+void DwarfDebug::ConstructSubprogram(MDNode *N) {
+  DISubprogram SP(N);
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(GV);
+  DIE *&Slot = ModuleCU->getDieMapSlotFor(N);
   if (Slot)
     return;
 
@@ -1259,28 +1640,25 @@ void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   ModuleCU->getDie()->AddChild(SubprogramDie);
 
   // Expose as global.
-  std::string Name;
-  ModuleCU->AddGlobal(SP.getName(Name), SubprogramDie);
+  ModuleCU->AddGlobal(SP.getName(), SubprogramDie);
   return;
 }
 
-  /// BeginModule - Emit all Dwarf sections that should come prior to the
-  /// content. Create global DIEs and emit initial debug info sections.
-  /// This is inovked by the target AsmPrinter.
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
 void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   this->M = M;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  SmallVector<GlobalVariable *, 2> CUs;
-  SmallVector<GlobalVariable *, 4> GVs;
-  SmallVector<GlobalVariable *, 4> SPs;
-  CollectDebugInfoAnchors(*M, CUs, GVs, SPs);
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(*M);
 
   // Create all the compile unit DIEs.
-  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
-         E = CUs.end(); I != E; ++I) 
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+         E = DbgFinder.compile_unit_end(); I != E; ++I)
     ConstructCompileUnit(*I);
 
   if (CompileUnits.empty()) {
@@ -1295,23 +1673,19 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   if (!ModuleCU)
     ModuleCU = CompileUnits[0];
 
-  // If there is not any debug info available for any global variables and any
-  // subprograms then there is not any debug info to emit.
-  if (GVs.empty() && SPs.empty()) {
-    if (TimePassesIsEnabled)
-      DebugTimer->stopTimer();
-
-    return;
-  }
-
   // Create DIEs for each of the externally visible global variables.
-  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
-         E = GVs.end(); I != E; ++I) 
-    ConstructGlobalVariableDIE(*I);
+  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+         E = DbgFinder.global_variable_end(); I != E; ++I) {
+    DIGlobalVariable GV(*I);
+    if (GV.getContext().getNode() != GV.getCompileUnit().getNode())
+      ScopedGVs.push_back(*I);
+    else
+      ConstructGlobalVariableDIE(*I);
+  }
 
   // Create DIEs for each of the externally visible subprograms.
-  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
-         E = SPs.end(); I != E; ++I) 
+  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+         E = DbgFinder.subprogram_end(); I != E; ++I)
     ConstructSubprogram(*I);
 
   MMI = mmi;
@@ -1319,11 +1693,11 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   MMI->setDebugInfoAvailability(true);
 
   // Prime section data.
-  SectionMap.insert(TAI->getTextSection());
+  SectionMap.insert(Asm->getObjFileLowering().getTextSection());
 
   // Print out .file directives to specify files for .loc directives. These are
   // printed out early so that they precede any .loc directives.
-  if (TAI->hasDotLocAndDotFile()) {
+  if (MAI->hasDotLocAndDotFile()) {
     for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
       // Remember source id starts at 1.
       std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
@@ -1332,7 +1706,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
         FullPath.appendComponent(getSourceFileName(Id.second));
       assert(AppendOk && "Could not append filename to directory!");
       AppendOk = false;
-      Asm->EmitFile(i, FullPath.toString());
+      Asm->EmitFile(i, FullPath.str());
       Asm->EOL();
     }
   }
@@ -1347,21 +1721,21 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
 /// EndModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::EndModule() {
-  if (!ShouldEmitDwarfDebug())
+  if (!ModuleCU)
     return;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
   // Standard sections final addresses.
-  Asm->SwitchToSection(TAI->getTextSection());
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
   EmitLabel("text_end", 0);
-  Asm->SwitchToSection(TAI->getDataSection());
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
   EmitLabel("data_end", 0);
 
   // End text sections.
   for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
-    Asm->SwitchToSection(SectionMap[i]);
+    Asm->OutStreamer.SwitchSection(SectionMap[i]);
     EmitLabel("section_end", i);
   }
 
@@ -1410,6 +1784,135 @@ void DwarfDebug::EndModule() {
     DebugTimer->stopTimer();
 }
 
+/// CollectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::CollectVariableInfo() {
+  if (!MMI) return;
+  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+         VE = VMap.end(); VI != VE; ++VI) {
+    MetadataBase *MB = VI->first;
+    MDNode *Var = dyn_cast_or_null<MDNode>(MB);
+    DIVariable DV (Var);
+    if (DV.isNull()) continue;
+    unsigned VSlot = VI->second;
+    DbgScope *Scope = getDbgScope(DV.getContext().getNode(),  NULL);
+    Scope->AddVariable(new DbgVariable(DV, VSlot, false));
+  }
+}
+
+/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
+/// start with this machine instruction.
+void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) {
+  InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
+  if (I == DbgScopeBeginMap.end())
+    return;
+  SmallVector<DbgScope *, 2> &SD = I->second;
+  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+       SDI != SDE; ++SDI) 
+    (*SDI)->setStartLabelID(Label);
+}
+
+/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
+/// end with this machine instruction.
+void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) {
+  InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
+  if (I == DbgScopeEndMap.end())
+    return;
+  SmallVector<DbgScope *, 2> &SD = I->second;
+  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+       SDI != SDE; ++SDI) 
+    (*SDI)->setEndLabelID(Label);
+}
+
+/// ExtractScopeInformation - Scan machine instructions in this function
+/// and collect DbgScopes. Return true, if atleast one scope was found.
+bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
+  // If scope information was extracted using .dbg intrinsics then there is not
+  // any need to extract these information by scanning each instruction.
+  if (!DbgScopeMap.empty())
+    return false;
+
+  // Scan each instruction and create scopes.
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      DebugLoc DL = MInsn->getDebugLoc();
+      if (DL.isUnknown())
+        continue;
+      DebugLocTuple DLT = MF->getDebugLocTuple(DL);
+      if (!DLT.Scope)
+        continue;
+      // There is no need to create another DIE for compile unit. For all
+      // other scopes, create one DbgScope now. This will be translated 
+      // into a scope DIE at the end.
+      DIDescriptor D(DLT.Scope);
+      if (!D.isCompileUnit()) {
+        DbgScope *Scope = getDbgScope(DLT.Scope, MInsn);
+        Scope->setLastInsn(MInsn);
+      }
+    }
+  }
+
+  // If a scope's last instruction is not set then use its child scope's
+  // last instruction as this scope's last instrunction.
+  for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
+	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
+    assert (DI->second->getFirstInsn() && "Invalid first instruction!");
+    DI->second->FixInstructionMarkers();
+    assert (DI->second->getLastInsn() && "Invalid last instruction!");
+  }
+
+  // Each scope has first instruction and last instruction to mark beginning
+  // and end of a scope respectively. Create an inverse map that list scopes
+  // starts (and ends) with an instruction. One instruction may start (or end)
+  // multiple scopes.
+  for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
+	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
+    DbgScope *S = DI->second;
+    assert (S && "DbgScope is missing!");
+    const MachineInstr *MI = S->getFirstInsn();
+    assert (MI && "DbgScope does not have first instruction!");
+
+    InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
+    if (IDI != DbgScopeBeginMap.end())
+      IDI->second.push_back(S);
+    else
+      DbgScopeBeginMap.insert(std::make_pair(MI, 
+                                             SmallVector<DbgScope *, 2>(2, S)));
+
+    MI = S->getLastInsn();
+    assert (MI && "DbgScope does not have last instruction!");
+    IDI = DbgScopeEndMap.find(MI);
+    if (IDI != DbgScopeEndMap.end())
+      IDI->second.push_back(S);
+    else
+      DbgScopeEndMap.insert(std::make_pair(MI,
+                                             SmallVector<DbgScope *, 2>(2, S)));
+  }
+
+  return !DbgScopeMap.empty();
+}
+
+static DISubprogram getDISubprogram(MDNode *N) {
+
+  DIDescriptor D(N);
+  if (D.isNull())
+    return DISubprogram();
+
+  if (D.isCompileUnit()) 
+    return DISubprogram();
+
+  if (D.isSubprogram())
+    return DISubprogram(N);
+
+  if (D.isLexicalBlock())
+    return getDISubprogram(DILexicalBlock(N).getContext().getNode());
+
+  llvm_unreachable("Unexpected Descriptor!");
+}
+
 /// BeginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::BeginFunction(MachineFunction *MF) {
@@ -1420,6 +1923,12 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  if (!ExtractScopeInformation(MF))
+    return;
+  CollectVariableInfo();
+#endif
+
   // Begin accumulating function debug information.
   MMI->BeginFunction(MF);
 
@@ -1428,14 +1937,28 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
 
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   DebugLoc FDL = MF->getDefaultDebugLoc();
   if (!FDL.isUnknown()) {
     DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
-    unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col,
-                                        DICompileUnit(DLT.CompileUnit));
+    unsigned LabelID = 0;
+    DISubprogram SP = getDISubprogram(DLT.Scope);
+    if (!SP.isNull())
+      LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope);
+    else
+      LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
     Asm->printLabel(LabelID);
+    O << '\n';
   }
-
+#else
+  DebugLoc FDL = MF->getDefaultDebugLoc();
+  if (!FDL.isUnknown()) {
+    DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
+    unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
+    Asm->printLabel(LabelID);
+    O << '\n';
+  }
+#endif
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
@@ -1448,13 +1971,17 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  if (DbgScopeMap.empty())
+    return;
+#endif
   // Define end label for subprogram.
   EmitLabel("func_end", SubprogramCount);
 
   // Get function line info.
   if (!Lines.empty()) {
     // Get section line info.
-    unsigned ID = SectionMap.insert(Asm->CurrentSection_);
+    unsigned ID = SectionMap.insert(Asm->getCurrentSection());
     if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
     std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
     // Append the function info to section info.
@@ -1489,9 +2016,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
   if (FunctionDbgScope) {
     delete FunctionDbgScope;
     DbgScopeMap.clear();
+    DbgScopeBeginMap.clear();
+    DbgScopeEndMap.clear();
     DbgAbstractScopeMap.clear();
     DbgConcreteScopeMap.clear();
-    InlinedVariableScopes.clear();
     FunctionDbgScope = NULL;
     LexicalScopeStack.clear();
     AbstractInstanceRootList.clear();
@@ -1507,32 +2035,34 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
 /// RecordSourceLine - Records location information and associates it with a
 /// label. Returns a unique label ID used to generate a label and provide
 /// correspondence to the source line list.
-unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  CompileUnit *Unit = CompileUnitMap[V];
-  assert(Unit && "Unable to find CompileUnit");
-  unsigned ID = MMI->NextLabelID();
-  Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID));
+unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, 
+                                      MDNode *S) {
+  if (!MMI)
+    return 0;
 
   if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return ID;
-}
-
-/// RecordSourceLine - Records location information and associates it with a
-/// label. Returns a unique label ID used to generate a label and provide
-/// correspondence to the source line list.
-unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col,
-                                      DICompileUnit CU) {
-  if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  std::string Dir, Fn;
-  unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir),
-                                     CU.getFilename(Fn));
+  const char *Dir = NULL;
+  const char *Fn = NULL;
+
+  DIDescriptor Scope(S);
+  if (Scope.isCompileUnit()) {
+    DICompileUnit CU(S);
+    Dir = CU.getDirectory();
+    Fn = CU.getFilename();
+  } else if (Scope.isSubprogram()) {
+    DISubprogram SP(S);
+    Dir = SP.getDirectory();
+    Fn = SP.getFilename();
+  } else if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(S);
+    Dir = DB.getDirectory();
+    Fn = DB.getFilename();
+  } else
+    assert (0 && "Unexpected scope info");
+
+  unsigned Src = GetOrCreateSourceID(Dir, Fn);
   unsigned ID = MMI->NextLabelID();
   Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
 
@@ -1552,7 +2082,7 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  unsigned SrcId = GetOrCreateSourceID(DirName, FileName);
+  unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
@@ -1561,11 +2091,11 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
 }
 
 /// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) {
+unsigned DwarfDebug::RecordRegionStart(MDNode *N) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  DbgScope *Scope = getOrCreateScope(V);
+  DbgScope *Scope = getOrCreateScope(N);
   unsigned ID = MMI->NextLabelID();
   if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
   LexicalScopeStack.push_back(Scope);
@@ -1577,11 +2107,11 @@ unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) {
 }
 
 /// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
+unsigned DwarfDebug::RecordRegionEnd(MDNode *N) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  DbgScope *Scope = getOrCreateScope(V);
+  DbgScope *Scope = getOrCreateScope(N);
   unsigned ID = MMI->NextLabelID();
   Scope->setEndLabelID(ID);
   // FIXME : region.end() may not be in the last basic block.
@@ -1598,62 +2128,36 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
 }
 
 /// RecordVariable - Indicate the declaration of a local variable.
-void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
-                                const MachineInstr *MI) {
+void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  DIDescriptor Desc(GV);
+  DIDescriptor Desc(N);
   DbgScope *Scope = NULL;
   bool InlinedFnVar = false;
 
-  if (Desc.getTag() == dwarf::DW_TAG_variable) {
-    // GV is a global variable.
-    DIGlobalVariable DG(GV);
-    Scope = getOrCreateScope(DG.getContext().getGV());
-  } else {
-    DenseMap<const MachineInstr *, DbgScope *>::iterator
-      SI = InlinedVariableScopes.find(MI);
-
-    if (SI != InlinedVariableScopes.end()) {
-      // or GV is an inlined local variable.
-      Scope = SI->second;
-    } else {
-      DIVariable DV(GV);
-      GlobalVariable *V = DV.getContext().getGV();
-
-      // FIXME: The code that checks for the inlined local variable is a hack!
-      DenseMap<const GlobalVariable *, DbgScope *>::iterator
-        AI = AbstractInstanceRootMap.find(V);
-
-      if (AI != AbstractInstanceRootMap.end()) {
-        // This method is called each time a DECLARE node is encountered. For an
-        // inlined function, this could be many, many times. We don't want to
-        // re-add variables to that DIE for each time. We just want to add them
-        // once. Check to make sure that we haven't added them already.
-        DenseMap<const GlobalVariable *,
-          SmallSet<const GlobalVariable *, 32> >::iterator
-          IP = InlinedParamMap.find(V);
-
-        if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) {
-          if (TimePassesIsEnabled)
-            DebugTimer->stopTimer();
-          return;
-        }
-
-        // or GV is an inlined local variable.
-        Scope = AI->second;
-        InlinedParamMap[V].insert(GV);
-        InlinedFnVar = true;
-      } else {
-        // or GV is a local variable.
-        Scope = getOrCreateScope(V);
+  if (Desc.getTag() == dwarf::DW_TAG_variable)
+    Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode());
+  else {
+    bool InlinedVar = false;
+    MDNode *Context = DIVariable(N).getContext().getNode();
+    DISubprogram SP(Context);
+    if (!SP.isNull()) {
+      // SP is inserted into DbgAbstractScopeMap when inlined function
+      // start was recorded by RecordInlineFnStart.
+      DenseMap<MDNode *, DbgScope *>::iterator
+        I = DbgAbstractScopeMap.find(SP.getNode());
+      if (I != DbgAbstractScopeMap.end()) {
+        InlinedVar = true;
+        Scope = I->second;
       }
     }
+    if (!InlinedVar)
+      Scope = getOrCreateScope(Context);
   }
 
   assert(Scope && "Unable to find the variable's scope");
-  DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar);
+  DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar);
   Scope->AddVariable(DV);
 
   if (TimePassesIsEnabled)
@@ -1665,23 +2169,23 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
                                           unsigned Line, unsigned Col) {
   unsigned LabelID = MMI->NextLabelID();
 
-  if (!TAI->doesDwarfUsesInlineInfoSection())
+  if (!MAI->doesDwarfUsesInlineInfoSection())
     return LabelID;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  GlobalVariable *GV = SP.getGV();
-  DenseMap<const GlobalVariable *, DbgScope *>::iterator
-    II = AbstractInstanceRootMap.find(GV);
+  MDNode *Node = SP.getNode();
+  DenseMap<const MDNode *, DbgScope *>::iterator
+    II = AbstractInstanceRootMap.find(Node);
 
   if (II == AbstractInstanceRootMap.end()) {
     // Create an abstract instance entry for this inlined function if it doesn't
     // already exist.
-    DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
+    DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node));
 
     // Get the compile unit context.
-    DIE *SPDie = ModuleCU->getDieMapSlotFor(GV);
+    DIE *SPDie = ModuleCU->getDieMapSlotFor(Node);
     if (!SPDie)
       SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true);
 
@@ -1693,18 +2197,18 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
     AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
 
     // Keep track of the abstract scope for this function.
-    DbgAbstractScopeMap[GV] = Scope;
+    DbgAbstractScopeMap[Node] = Scope;
 
-    AbstractInstanceRootMap[GV] = Scope;
+    AbstractInstanceRootMap[Node] = Scope;
     AbstractInstanceRootList.push_back(Scope);
   }
 
   // Create a concrete inlined instance for this inlined function.
-  DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
+  DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node));
   DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
   ScopeDie->setAbstractCompileUnit(ModuleCU);
 
-  DIE *Origin = ModuleCU->getDieMapSlotFor(GV);
+  DIE *Origin = ModuleCU->getDieMapSlotFor(Node);
   AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, Origin);
   AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
@@ -1718,20 +2222,20 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
 
   // Keep track of the concrete scope that's inlined into this function.
-  DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
-    SI = DbgConcreteScopeMap.find(GV);
+  DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
+    SI = DbgConcreteScopeMap.find(Node);
 
   if (SI == DbgConcreteScopeMap.end())
-    DbgConcreteScopeMap[GV].push_back(ConcreteScope);
+    DbgConcreteScopeMap[Node].push_back(ConcreteScope);
   else
     SI->second.push_back(ConcreteScope);
 
   // Track the start label for this inlined function.
-  DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
-    I = InlineInfo.find(GV);
+  DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator
+    I = InlineInfo.find(Node);
 
   if (I == InlineInfo.end())
-    InlineInfo[GV].push_back(LabelID);
+    InlineInfo[Node].push_back(LabelID);
   else
     I->second.push_back(LabelID);
 
@@ -1743,15 +2247,15 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
 
 /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
 unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
-  if (!TAI->doesDwarfUsesInlineInfoSection())
+  if (!MAI->doesDwarfUsesInlineInfoSection())
     return 0;
 
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  GlobalVariable *GV = SP.getGV();
-  DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
-    I = DbgConcreteScopeMap.find(GV);
+  MDNode *Node = SP.getNode();
+  DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
+    I = DbgConcreteScopeMap.find(Node);
 
   if (I == DbgConcreteScopeMap.end()) {
     // FIXME: Can this situation actually happen? And if so, should it?
@@ -1781,33 +2285,6 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
   return ID;
 }
 
-/// RecordVariableScope - Record scope for the variable declared by
-/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
-/// for only inlined subroutine variables. Other variables's scopes are
-/// determined during RecordVariable().
-void DwarfDebug::RecordVariableScope(DIVariable &DV,
-                                     const MachineInstr *DeclareMI) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  DISubprogram SP(DV.getContext().getGV());
-
-  if (SP.isNull()) {
-    if (TimePassesIsEnabled)
-      DebugTimer->stopTimer();
-
-    return;
-  }
-
-  DenseMap<GlobalVariable *, DbgScope *>::iterator
-    I = DbgAbstractScopeMap.find(SP.getGV());
-  if (I != DbgAbstractScopeMap.end())
-    InlinedVariableScopes[DeclareMI] = I->second;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-}
-
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
@@ -1832,7 +2309,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
   Die->setOffset(Offset);
 
   // Start the size with the size of abbreviation code.
-  Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber);
+  Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
 
   const SmallVector<DIEValue*, 32> &Values = Die->getValues();
   const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
@@ -1879,38 +2356,40 @@ void DwarfDebug::EmitInitial() {
   if (didInitial) return;
   didInitial = true;
 
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   // Dwarf sections base addresses.
-  if (TAI->doesDwarfRequireFrameSection()) {
-    Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+  if (MAI->doesDwarfRequireFrameSection()) {
+    Asm->OutStreamer.SwitchSection(TLOF.getDwarfFrameSection());
     EmitLabel("section_debug_frame", 0);
   }
 
-  Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfInfoSection());
   EmitLabel("section_info", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfAbbrevSection());
   EmitLabel("section_abbrev", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfARangesSection());
   EmitLabel("section_aranges", 0);
 
-  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
-    Asm->SwitchToDataSection(LineInfoDirective);
+  if (const MCSection *LineInfoDirective = TLOF.getDwarfMacroInfoSection()) {
+    Asm->OutStreamer.SwitchSection(LineInfoDirective);
     EmitLabel("section_macinfo", 0);
   }
 
-  Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfLineSection());
   EmitLabel("section_line", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfLocSection());
   EmitLabel("section_loc", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection());
   EmitLabel("section_pubnames", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection());
   EmitLabel("section_str", 0);
-  Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection());
   EmitLabel("section_ranges", 0);
 
-  Asm->SwitchToSection(TAI->getTextSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getTextSection());
   EmitLabel("text_begin", 0);
-  Asm->SwitchToSection(TAI->getDataSection());
+  Asm->OutStreamer.SwitchSection(TLOF.getDataSection());
   EmitLabel("data_begin", 0);
 }
 
@@ -2012,7 +2491,8 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
 
 void DwarfDebug::EmitDebugInfo() {
   // Start debug info section.
-  Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfInfoSection());
 
   EmitDebugInfoPerCU(ModuleCU);
 }
@@ -2023,7 +2503,8 @@ void DwarfDebug::EmitAbbreviations() const {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
-    Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+    Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfAbbrevSection());
 
     EmitLabel("abbrev_begin", 0);
 
@@ -2071,7 +2552,7 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
 void DwarfDebug::EmitDebugLines() {
   // If the target is using .loc/.file, the assembler will be emitting the
   // .debug_line table automatically.
-  if (TAI->hasDotLocAndDotFile())
+  if (MAI->hasDotLocAndDotFile())
     return;
 
   // Minimum line delta, thus ranging from -10..(255-10).
@@ -2080,7 +2561,8 @@ void DwarfDebug::EmitDebugLines() {
   const int MaxLineDelta = 255 + MinLineDelta;
 
   // Start the dwarf line section.
-  Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfLineSection());
 
   // Construct the section header.
   EmitDifference("line_end", 0, "line_begin", 0, true);
@@ -2147,13 +2629,12 @@ void DwarfDebug::EmitDebugLines() {
     // Isolate current sections line info.
     const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
 
-    if (Asm->isVerbose()) {
-      const Section* S = SectionMap[j + 1];
-      O << '\t' << TAI->getCommentString() << " Section"
+    /*if (Asm->isVerbose()) {
+      const MCSection *S = SectionMap[j + 1];
+      O << '\t' << MAI->getCommentString() << " Section"
         << S->getName() << '\n';
-    } else {
-      Asm->EOL();
-    }
+    }*/
+    Asm->EOL();
 
     // Dwarf assumes we start with first line of first source file.
     unsigned Source = 1;
@@ -2165,12 +2646,14 @@ void DwarfDebug::EmitDebugLines() {
       unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
       if (!LabelID) continue;
 
+      if (LineInfo.getLine() == 0) continue;
+
       if (!Asm->isVerbose())
         Asm->EOL();
       else {
         std::pair<unsigned, unsigned> SourceID =
           getSourceDirectoryAndFileIds(LineInfo.getSourceID());
-        O << '\t' << TAI->getCommentString() << ' '
+        O << '\t' << MAI->getCommentString() << ' '
           << getSourceDirectoryName(SourceID.first) << ' '
           << getSourceFileName(SourceID.second)
           <<" :" << utostr_32(LineInfo.getLine()) << '\n';
@@ -2231,7 +2714,7 @@ void DwarfDebug::EmitDebugLines() {
 /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
 void DwarfDebug::EmitCommonDebugFrame() {
-  if (!TAI->doesDwarfRequireFrameSection())
+  if (!MAI->doesDwarfRequireFrameSection())
     return;
 
   int stackGrowth =
@@ -2240,7 +2723,8 @@ void DwarfDebug::EmitCommonDebugFrame() {
     TD->getPointerSize() : -TD->getPointerSize();
 
   // Start the dwarf frame section.
-  Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfFrameSection());
 
   EmitLabel("debug_frame_common", 0);
   EmitDifference("debug_frame_common_end", 0,
@@ -2276,11 +2760,12 @@ void DwarfDebug::EmitCommonDebugFrame() {
 /// section.
 void
 DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
-  if (!TAI->doesDwarfRequireFrameSection())
+  if (!MAI->doesDwarfRequireFrameSection())
     return;
 
   // Start the dwarf frame section.
-  Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfFrameSection());
 
   EmitDifference("debug_frame_end", DebugFrameInfo.Number,
                  "debug_frame_begin", DebugFrameInfo.Number, true);
@@ -2344,7 +2829,8 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
 ///
 void DwarfDebug::EmitDebugPubNames() {
   // Start the dwarf pubnames section.
-  Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfPubNamesSection());
 
   EmitDebugPubNamesPerCU(ModuleCU);
 }
@@ -2355,7 +2841,8 @@ void DwarfDebug::EmitDebugStr() {
   // Check to see if it is worth the effort.
   if (!StringPool.empty()) {
     // Start the dwarf str section.
-    Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+    Asm->OutStreamer.SwitchSection(
+                                Asm->getObjFileLowering().getDwarfStrSection());
 
     // For each of strings in the string pool.
     for (unsigned StringID = 1, N = StringPool.size();
@@ -2376,7 +2863,8 @@ void DwarfDebug::EmitDebugStr() {
 ///
 void DwarfDebug::EmitDebugLoc() {
   // Start the dwarf loc section.
-  Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfLocSection());
   Asm->EOL();
 }
 
@@ -2384,7 +2872,8 @@ void DwarfDebug::EmitDebugLoc() {
 ///
 void DwarfDebug::EmitDebugARanges() {
   // Start the dwarf aranges section.
-  Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfARangesSection());
 
   // FIXME - Mock up
 #if 0
@@ -2420,16 +2909,18 @@ void DwarfDebug::EmitDebugARanges() {
 ///
 void DwarfDebug::EmitDebugRanges() {
   // Start the dwarf ranges section.
-  Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfRangesSection());
   Asm->EOL();
 }
 
 /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
 ///
 void DwarfDebug::EmitDebugMacInfo() {
-  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
+  if (const MCSection *LineInfo =
+      Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
     // Start the dwarf macinfo section.
-    Asm->SwitchToDataSection(LineInfoDirective);
+    Asm->OutStreamer.SwitchSection(LineInfo);
     Asm->EOL();
   }
 }
@@ -2453,13 +2944,14 @@ void DwarfDebug::EmitDebugMacInfo() {
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
 void DwarfDebug::EmitDebugInlineInfo() {
-  if (!TAI->doesDwarfUsesInlineInfoSection())
+  if (!MAI->doesDwarfUsesInlineInfoSection())
     return;
 
   if (!ModuleCU)
     return;
 
-  Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection());
+  Asm->OutStreamer.SwitchSection(
+                        Asm->getObjFileLowering().getDwarfDebugInlineSection());
   Asm->EOL();
   EmitDifference("debug_inlined_end", 1,
                  "debug_inlined_begin", 1, true);
@@ -2470,18 +2962,25 @@ void DwarfDebug::EmitDebugInlineInfo() {
   Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
   Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
 
-  for (DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
+  for (DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator
          I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
-    GlobalVariable *GV = I->first;
+    MDNode *Node = I->first;
     SmallVector<unsigned, 4> &Labels = I->second;
-    DISubprogram SP(GV);
-    std::string Name;
-    std::string LName;
-
-    SP.getLinkageName(LName);
-    SP.getName(Name);
+    DISubprogram SP(Node);
+    const char *LName = SP.getLinkageName();
+    const char *Name = SP.getName();
 
-    Asm->EmitString(LName.empty() ? Name : LName);
+    if (!LName)
+      Asm->EmitString(Name);
+    else {
+      // Skip special LLVM prefix that is used to inform the asm printer to not
+      // emit usual symbol prefix before the symbol name. This happens for
+      // Objective-C symbol names and symbol whose name is replaced using GCC's
+      // __asm__ attribute.
+      if (LName[0] == 1)
+        LName = &LName[1];
+      Asm->EmitString(LName);
+    }
     Asm->EOL("MIPS linkage name");
 
     Asm->EmitString(Name); Asm->EOL("Function name");
@@ -2490,13 +2989,13 @@ void DwarfDebug::EmitDebugInlineInfo() {
 
     for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
-      DIE *SP = ModuleCU->getDieMapSlotFor(GV);
+      DIE *SP = ModuleCU->getDieMapSlotFor(Node);
       Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
 
       if (TD->getPointerSize() == sizeof(int32_t))
-        O << TAI->getData32bitsDirective();
+        O << MAI->getData32bitsDirective();
       else
-        O << TAI->getData64bitsDirective();
+        O << MAI->getData64bitsDirective();
 
       PrintLabelName("label", *LI); Asm->EOL("low_pc");
     }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 101dc705d3b0..bd377c5593cc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -35,7 +35,7 @@ class DbgScope;
 class DbgConcreteScope;
 class MachineFrameInfo;
 class MachineModuleInfo;
-class TargetAsmInfo;
+class MCAsmInfo;
 class Timer;
 
 //===----------------------------------------------------------------------===//
@@ -120,7 +120,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
 
   /// SectionMap - Provides a unique id per text section.
   ///
-  UniqueVector<const Section*> SectionMap;
+  UniqueVector<const MCSection*> SectionMap;
 
   /// SectionSourceLines - Tracks line numbers per text section.
   ///
@@ -139,34 +139,38 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   DbgScope *FunctionDbgScope;
   
   /// DbgScopeMap - Tracks the scopes in the current function.
-  DenseMap<GlobalVariable *, DbgScope *> DbgScopeMap;
+  DenseMap<MDNode *, DbgScope *> DbgScopeMap;
+
+  /// ScopedGVs - Tracks global variables that are not at file scope.
+  /// For example void f() { static int b = 42; }
+  SmallVector<WeakVH, 4> ScopedGVs;
+
+  typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> > 
+    InsnToDbgScopeMapTy;
+
+  /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts.
+  InsnToDbgScopeMapTy DbgScopeBeginMap;
+
+  /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
+  InsnToDbgScopeMapTy DbgScopeEndMap;
 
   /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
   /// function.
-  DenseMap<GlobalVariable *, DbgScope *> DbgAbstractScopeMap;
+  DenseMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
 
   /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
   /// function.
-  DenseMap<GlobalVariable *,
+  DenseMap<MDNode *,
            SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
-  DenseMap<GlobalVariable *, SmallVector<unsigned, 4> > InlineInfo;
-
-  /// InlinedVariableScopes - Scopes information for the inlined subroutine
-  /// variables.
-  DenseMap<const MachineInstr *, DbgScope *> InlinedVariableScopes;
+  DenseMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
 
   /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
   /// functions. These are subroutine entries that contain a DW_AT_inline
   /// attribute.
-  DenseMap<const GlobalVariable *, DbgScope *> AbstractInstanceRootMap;
-
-  /// InlinedParamMap - A map keeping track of which parameters are assigned to
-  /// which abstract instance.
-  DenseMap<const GlobalVariable *,
-    SmallSet<const GlobalVariable *, 32> > InlinedParamMap;
+  DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap;
 
   /// AbstractInstanceRootList - List of abstract instance roots of inlined
   /// functions. These are subroutine entries that contain a DW_AT_inline
@@ -284,11 +288,8 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   /// AddSourceLine - Add location information to specified debug information
   /// entry.
   void AddSourceLine(DIE *Die, const DIVariable *V);
-
-  /// AddSourceLine - Add location information to specified debug information
-  /// entry.
   void AddSourceLine(DIE *Die, const DIGlobal *G);
-
+  void AddSourceLine(DIE *Die, const DISubprogram *SP);
   void AddSourceLine(DIE *Die, const DIType *Ty);
 
   /// AddAddress - Add an address attribute to a die based on the location
@@ -296,6 +297,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   void AddAddress(DIE *Die, unsigned Attribute,
                   const MachineLocation &Location);
 
+  /// AddComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  ///
+  void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of AddComplexAddress.
+  /// AddBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use AddComplexAddress instead.
+  ///
+  void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location);
+
   /// AddType - Add a new type attribute to the specified entity.
   void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
 
@@ -342,9 +361,10 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   ///
   DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
 
-  /// getOrCreateScope - Returns the scope associated with the given descriptor.
+  /// getDbgScope - Returns the scope associated with the given descriptor.
   ///
-  DbgScope *getOrCreateScope(GlobalVariable *V);
+  DbgScope *getOrCreateScope(MDNode *N);
+  DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI);
 
   /// ConstructDbgScope - Construct the components of a scope.
   ///
@@ -454,20 +474,26 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   /// source file names. If none currently exists, create a new id and insert it
   /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
   /// as well.
-  unsigned GetOrCreateSourceID(const std::string &DirName,
-                               const std::string &FileName);
+  unsigned GetOrCreateSourceID(const char *DirName,
+                               const char *FileName);
 
-  void ConstructCompileUnit(GlobalVariable *GV);
+  void ConstructCompileUnit(MDNode *N);
 
-  void ConstructGlobalVariableDIE(GlobalVariable *GV);
+  void ConstructGlobalVariableDIE(MDNode *N);
 
-  void ConstructSubprogram(GlobalVariable *GV);
+  void ConstructSubprogram(MDNode *N);
+
+  // FIXME: This should go away in favor of complex addresses.
+  /// Find the type the programmer originally declared the variable to be
+  /// and return that type.  Obsolete, use GetComplexAddrType instead.
+  ///
+  DIType GetBlockByrefType(DIType Ty, std::string Name);
 
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+  DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
   virtual ~DwarfDebug();
 
   /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
@@ -493,12 +519,7 @@ public:
   /// RecordSourceLine - Records location information and associates it with a 
   /// label. Returns a unique label ID used to generate a label and provide
   /// correspondence to the source line list.
-  unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col);
-  
-  /// RecordSourceLine - Records location information and associates it with a 
-  /// label. Returns a unique label ID used to generate a label and provide
-  /// correspondence to the source line list.
-  unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU);
+  unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
 
   /// getRecordSourceLineCount - Return the number of source lines in the debug
   /// info.
@@ -515,14 +536,13 @@ public:
                                const std::string &FileName);
 
   /// RecordRegionStart - Indicate the start of a region.
-  unsigned RecordRegionStart(GlobalVariable *V);
+  unsigned RecordRegionStart(MDNode *N);
 
   /// RecordRegionEnd - Indicate the end of a region.
-  unsigned RecordRegionEnd(GlobalVariable *V);
+  unsigned RecordRegionEnd(MDNode *N);
 
   /// RecordVariable - Indicate the declaration of  a local variable.
-  void RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
-                      const MachineInstr *MI);
+  void RecordVariable(MDNode *N, unsigned FrameIndex);
 
   //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
   unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
@@ -531,11 +551,20 @@ public:
   /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
   unsigned RecordInlinedFnEnd(DISubprogram &SP);
 
-  /// RecordVariableScope - Record scope for the variable declared by
-  /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
-  /// for only inlined subroutine variables. Other variables's scopes are
-  /// determined during RecordVariable().
-  void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI);
+  /// ExtractScopeInformation - Scan machine instructions in this function
+  /// and collect DbgScopes. Return true, if atleast one scope was found.
+  bool ExtractScopeInformation(MachineFunction *MF);
+
+  /// CollectVariableInfo - Populate DbgScope entries with variables' info.
+  void CollectVariableInfo();
+
+  /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
+  /// start with this machine instruction.
+  void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label);
+
+  /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
+  /// end with this machine instruction.
+  void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
 };
 
 } // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 37466ab39a23..626523b820f6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains support for writing dwarf exception info into asm files.
+// This file contains support for writing DWARF exception info into asm files.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,30 +15,38 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 static TimerGroup &getDwarfTimerGroup() {
-  static TimerGroup DwarfTimerGroup("Dwarf Exception");
+  static TimerGroup DwarfTimerGroup("DWARF Exception");
   return DwarfTimerGroup;
 }
 
 DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
-                               const TargetAsmInfo *T)
+                               const MCAsmInfo *T)
   : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false),
     shouldEmitTableModule(false), shouldEmitMovesModule(false),
     ExceptionTimer(0) {
-  if (TimePassesIsEnabled) 
-    ExceptionTimer = new Timer("Dwarf Exception Writer",
+  if (TimePassesIsEnabled)
+    ExceptionTimer = new Timer("DWARF Exception Writer",
                                getDwarfTimerGroup());
 }
 
@@ -46,21 +54,45 @@ DwarfException::~DwarfException() {
   delete ExceptionTimer;
 }
 
-void DwarfException::EmitCommonEHFrame(const Function *Personality,
-                                       unsigned Index) {
+/// SizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+
+  switch (Encoding & 0x07) {
+  case dwarf::DW_EH_PE_absptr:
+    return TD->getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+    return 8;
+  }
+
+  assert(0 && "Invalid encoded value.");
+  return 0;
+}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries.  There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // Size and sign of stack growth.
   int stackGrowth =
     Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
     TargetFrameInfo::StackGrowsUp ?
     TD->getPointerSize() : -TD->getPointerSize();
 
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  
   // Begin eh frame section.
-  Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
-
-  if (!TAI->doesRequireNonLocalEHFrameLabel())
-    O << TAI->getEHGlobalPrefix();
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
+  if (MAI->is_EHSymbolPrivate())
+    O << MAI->getPrivateGlobalPrefix();
   O << "EH_frame" << Index << ":\n";
+  
   EmitLabel("section_eh_frame", Index);
 
   // Define base labels.
@@ -79,8 +111,53 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality,
   Asm->EOL("CIE Version");
 
   // The personality presence indicates that language specific information will
-  // show up in the eh frame.
-  Asm->EmitString(Personality ? "zPLR" : "zR");
+  // show up in the eh frame.  Find out how we are supposed to lower the
+  // personality function reference:
+  const MCExpr *PersonalityRef = 0;
+  bool IsPersonalityIndirect = false, IsPersonalityPCRel = false;
+  if (PersonalityFn) {
+    // FIXME: HANDLE STATIC CODEGEN MODEL HERE.
+    
+    // In non-static mode, ask the object file how to represent this reference.
+    PersonalityRef =
+      TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang,
+                                            Asm->MMI,
+                                            IsPersonalityIndirect,
+                                            IsPersonalityPCRel);
+  }
+  
+  unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+  if (IsPersonalityIndirect)
+    PerEncoding |= dwarf::DW_EH_PE_indirect;
+  unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+  unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+  char Augmentation[5] = { 0 };
+  unsigned AugmentationSize = 0;
+  char *APtr = Augmentation + 1;
+
+  if (PersonalityRef) {
+    // There is a personality function.
+    *APtr++ = 'P';
+    AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
+  }
+
+  if (UsesLSDA[Index]) {
+    // An LSDA pointer is in the FDE augmentation.
+    *APtr++ = 'L';
+    ++AugmentationSize;
+  }
+
+  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+    // A non-default pointer encoding for the FDE.
+    *APtr++ = 'R';
+    ++AugmentationSize;
+  }
+
+  if (APtr != Augmentation + 1)
+    Augmentation[0] = 'z';
+
+  Asm->EmitString(Augmentation);
   Asm->EOL("CIE Augmentation");
 
   // Round out reader.
@@ -91,39 +168,41 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality,
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
   Asm->EOL("CIE Return Address Column");
 
-  // If there is a personality, we need to indicate the functions location.
-  if (Personality) {
-    Asm->EmitULEB128Bytes(7);
-    Asm->EOL("Augmentation Size");
-
-    if (TAI->getNeedsIndirectEncoding()) {
-      Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 |
-                    dwarf::DW_EH_PE_indirect);
-      Asm->EOL("Personality (pcrel sdata4 indirect)");
-    } else {
-      Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-      Asm->EOL("Personality (pcrel sdata4)");
+  Asm->EmitULEB128Bytes(AugmentationSize);
+  Asm->EOL("Augmentation Size");
+
+  Asm->EmitInt8(PerEncoding);
+  Asm->EOL("Personality", PerEncoding);
+
+  // If there is a personality, we need to indicate the function's location.
+  if (PersonalityRef) {
+    // If the reference to the personality function symbol is not already
+    // pc-relative, then we need to subtract our current address from it.  Do
+    // this by emitting a label and subtracting it from the expression we
+    // already have.  This is equivalent to emitting "foo - .", but we have to
+    // emit the label for "." directly.
+    if (!IsPersonalityPCRel) {
+      SmallString<64> Name;
+      raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+         << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index;
+      MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+      Asm->OutStreamer.EmitLabel(DotSym);
+      
+      PersonalityRef =  
+        MCBinaryExpr::CreateSub(PersonalityRef,
+                                MCSymbolRefExpr::Create(DotSym,Asm->OutContext),
+                                Asm->OutContext);
     }
-
-    PrintRelDirective(true);
-    O << TAI->getPersonalityPrefix();
-    Asm->EmitExternalGlobal((const GlobalVariable *)(Personality));
-    O << TAI->getPersonalitySuffix();
-    if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL"))
-      O << "-" << TAI->getPCSymbol();
+    
+    O << MAI->getData32bitsDirective();
+    PersonalityRef->print(O, MAI);
     Asm->EOL("Personality");
 
-    Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    Asm->EOL("LSDA Encoding (pcrel sdata4)");
+    Asm->EmitInt8(LSDAEncoding);
+    Asm->EOL("LSDA Encoding", LSDAEncoding);
 
-    Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    Asm->EOL("FDE Encoding (pcrel sdata4)");
-  } else {
-    Asm->EmitULEB128Bytes(1);
-    Asm->EOL("Augmentation Size");
-
-    Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    Asm->EOL("FDE Encoding (pcrel sdata4)");
+    Asm->EmitInt8(FDEEncoding);
+    Asm->EOL("FDE Encoding", FDEEncoding);
   }
 
   // Indicate locations of general callee saved registers in frame.
@@ -134,55 +213,44 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality,
   // On Darwin the linker honors the alignment of eh_frame, which means it must
   // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
   // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
-                     0, 0, false);
+  Asm->EmitAlignment(TD->getPointerSize() == 4 ? 2 : 3, 0, 0, false);
   EmitLabel("eh_frame_common_end", Index);
 
   Asm->EOL();
 }
 
-/// EmitEHFrame - Emit function exception frame information.
-///
-void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
-  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && 
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
          "Should not emit 'available externally' functions at all");
 
-  Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage();
-  Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+  const Function *TheFunc = EHFrameInfo.function;
+
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection());
 
   // Externally visible entry into the functions eh frame info. If the
   // corresponding function is static, this should not be externally visible.
-  if (linkage != Function::InternalLinkage &&
-      linkage != Function::PrivateLinkage) {
-    if (const char *GlobalEHDirective = TAI->getGlobalEHDirective())
+  if (!TheFunc->hasLocalLinkage())
+    if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
       O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
-  }
 
   // If corresponding function is weak definition, this should be too.
-  if ((linkage == Function::WeakAnyLinkage ||
-       linkage == Function::WeakODRLinkage ||
-       linkage == Function::LinkOnceAnyLinkage ||
-       linkage == Function::LinkOnceODRLinkage) &&
-      TAI->getWeakDefDirective())
-    O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+  if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
+    O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
 
   // If there are no calls then you can't unwind.  This may mean we can omit the
   // EH Frame, but some environments do not handle weak absolute symbols. If
   // UnwindTablesMandatory is set we cannot do this optimization; the unwind
   // info is to be available for non-EH uses.
-  if (!EHFrameInfo.hasCalls &&
-      !UnwindTablesMandatory &&
-      ((linkage != Function::WeakAnyLinkage &&
-        linkage != Function::WeakODRLinkage &&
-        linkage != Function::LinkOnceAnyLinkage &&
-        linkage != Function::LinkOnceODRLinkage) ||
-       !TAI->getWeakDefDirective() ||
-       TAI->getSupportsWeakOmittedEHFrame())) {
+  if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory &&
+      (!TheFunc->isWeakForLinker() ||
+       !MAI->getWeakDefDirective() ||
+       MAI->getSupportsWeakOmittedEHFrame())) {
     O << EHFrameInfo.FnName << " = 0\n";
     // This name has no connection to the function, so it might get
     // dead-stripped when the function is not, erroneously.  Prohibit
     // dead-stripping unconditionally.
-    if (const char *UsedDirective = TAI->getUsedDirective())
+    if (const char *UsedDirective = MAI->getUsedDirective())
       O << UsedDirective << EHFrameInfo.FnName << "\n\n";
   } else {
     O << EHFrameInfo.FnName << ":\n";
@@ -194,17 +262,9 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
 
     EmitLabel("eh_frame_begin", EHFrameInfo.Number);
 
-    if (TAI->doesRequireNonLocalEHFrameLabel()) {
-      PrintRelDirective(true, true);
-      PrintLabelName("eh_frame_begin", EHFrameInfo.Number);
-
-      if (!TAI->isAbsoluteEHSectionOffsets())
-        O << "-EH_frame" << EHFrameInfo.PersonalityIndex;
-    } else {
-      EmitSectionOffset("eh_frame_begin", "eh_frame_common",
-                        EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
-                        true, true, false);
-    }
+    EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+                      EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+                      true, true, false);
 
     Asm->EOL("FDE CIE offset");
 
@@ -216,14 +276,20 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
 
     // If there is a personality and landing pads then point to the language
     // specific data area in the exception table.
-    if (EHFrameInfo.PersonalityIndex) {
-      Asm->EmitULEB128Bytes(4);
+    if (MMI->getPersonalities()[0] != NULL) {
+      bool is4Byte = TD->getPointerSize() == sizeof(int32_t);
+
+      Asm->EmitULEB128Bytes(is4Byte ? 4 : 8);
       Asm->EOL("Augmentation size");
 
       if (EHFrameInfo.hasLandingPads)
-        EmitReference("exception", EHFrameInfo.Number, true, true);
-      else
-        Asm->EmitInt32((int)0);
+        EmitReference("exception", EHFrameInfo.Number, true, false);
+      else {
+        if (is4Byte)
+          Asm->EmitInt32((int)0);
+        else
+          Asm->EmitInt64((int)0);
+      }
       Asm->EOL("Language Specific Data Area");
     } else {
       Asm->EmitULEB128Bytes(0);
@@ -231,7 +297,7 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
     }
 
     // Indicate locations of function specific callee saved registers in frame.
-    EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, 
+    EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
                    true);
 
     // On Darwin the linker honors the alignment of eh_frame, which means it
@@ -246,32 +312,13 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
     // retains the function in this case, and there is code around that depends
     // on unused functions (calling undefined externals) being dead-stripped to
     // link correctly.  Yes, there really is.
-    if (MMI->getUsedFunctions().count(EHFrameInfo.function))
-      if (const char *UsedDirective = TAI->getUsedDirective())
+    if (MMI->isUsedFunction(EHFrameInfo.function))
+      if (const char *UsedDirective = MAI->getUsedDirective())
         O << UsedDirective << EHFrameInfo.FnName << "\n\n";
   }
-}
 
-/// EmitExceptionTable - Emit landing pads and actions.
-///
-/// The general organization of the table is complex, but the basic concepts are
-/// easy.  First there is a header which describes the location and organization
-/// of the three components that follow.
-/// 
-///  1. The landing pad site information describes the range of code covered by
-///     the try.  In our case it's an accumulation of the ranges covered by the
-///     invokes in the try.  There is also a reference to the landing pad that
-///     handles the exception once processed.  Finally an index into the actions
-///     table.
-///  2. The action table, in our case, is composed of pairs of type ids and next
-///     action offset.  Starting with the action index from the landing pad
-///     site, each type Id is checked for a match to the current exception.  If
-///     it matches then the exception and type id are passed on to the landing
-///     pad.  Otherwise the next action is looked up.  This chain is terminated
-///     with a next action of zero.  If no type id is found the the frame is
-///     unwound and handling continues.
-///  3. Type id table contains references to all the C++ typeinfo for all
-///     catches in the function.  This tables is reversed indexed base 1.
+  Asm->EOL();
+}
 
 /// SharedTypeIds - How many leading type ids two landing pads have in common.
 unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
@@ -301,51 +348,58 @@ bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
   return LSize < RSize;
 }
 
-void DwarfException::EmitExceptionTable() {
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  if (PadInfos.empty()) return;
-
-  // Sort the landing pads in order of their type ids.  This is used to fold
-  // duplicate actions.
-  SmallVector<const LandingPadInfo *, 64> LandingPads;
-  LandingPads.reserve(PadInfos.size());
-  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
-    LandingPads.push_back(&PadInfos[i]);
-  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
-
-  // Negative type ids index into FilterIds, positive type ids index into
-  // TypeInfos.  The value written for a positive type id is just the type id
-  // itself.  For a negative type id, however, the value written is the
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+                    SmallVectorImpl<ActionEntry> &Actions,
+                    SmallVectorImpl<unsigned> &FirstActions) {
+
+  // The action table follows the call-site table in the LSDA. The individual
+  // records are of two types:
+  //
+  //   * Catch clause
+  //   * Exception specification
+  //
+  // The two record kinds have the same format, with only small differences.
+  // They are distinguished by the "switch value" field: Catch clauses
+  // (TypeInfos) have strictly positive switch values, and exception
+  // specifications (FilterIds) have strictly negative switch values. Value 0
+  // indicates a catch-all clause.
+  //
+  // Negative type IDs index into FilterIds. Positive type IDs index into
+  // TypeInfos.  The value written for a positive type ID is just the type ID
+  // itself.  For a negative type ID, however, the value written is the
   // (negative) byte offset of the corresponding FilterIds entry.  The byte
-  // offset is usually equal to the type id, because the FilterIds entries are
-  // written using a variable width encoding which outputs one byte per entry as
-  // long as the value written is not too large, but can differ.  This kind of
-  // complication does not occur for positive type ids because type infos are
+  // offset is usually equal to the type ID (because the FilterIds entries are
+  // written using a variable width encoding, which outputs one byte per entry
+  // as long as the value written is not too large) but can differ.  This kind
+  // of complication does not occur for positive type IDs because type infos are
   // output using a fixed width encoding.  FilterOffsets[i] holds the byte
   // offset corresponding to FilterIds[i].
+
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   SmallVector<int, 16> FilterOffsets;
   FilterOffsets.reserve(FilterIds.size());
   int Offset = -1;
-  for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
-        E = FilterIds.end(); I != E; ++I) {
+
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
     FilterOffsets.push_back(Offset);
-    Offset -= TargetAsmInfo::getULEB128Size(*I);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
   }
 
-  // Compute the actions table and gather the first action index for each
-  // landing pad site.
-  SmallVector<ActionEntry, 32> Actions;
-  SmallVector<unsigned, 64> FirstActions;
   FirstActions.reserve(LandingPads.size());
 
   int FirstAction = 0;
   unsigned SizeActions = 0;
-  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
-    const LandingPadInfo *LP = LandingPads[i];
-    const std::vector<int> &TypeIds = LP->TypeIds;
-    const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+  const LandingPadInfo *PrevLPI = 0;
+
+  for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+         I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+    const LandingPadInfo *LPI = *I;
+    const std::vector<int> &TypeIds = LPI->TypeIds;
+    const unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
     unsigned SizeSiteActions = 0;
 
     if (NumShared < TypeIds.size()) {
@@ -353,34 +407,33 @@ void DwarfException::EmitExceptionTable() {
       ActionEntry *PrevAction = 0;
 
       if (NumShared) {
-        const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+        const unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
         PrevAction = &Actions.back();
-        SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
 
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
           SizeAction -=
-            TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+            MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
           SizeAction += -PrevAction->NextAction;
           PrevAction = PrevAction->Previous;
         }
       }
 
       // Compute the actions.
-      for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
-        int TypeID = TypeIds[I];
-        assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+      for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+        int TypeID = TypeIds[J];
+        assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
         int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
-        unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
         SizeSiteActions += SizeAction;
 
-        ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+        ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
         Actions.push_back(Action);
-
         PrevAction = &Actions.back();
       }
 
@@ -388,35 +441,34 @@ void DwarfException::EmitExceptionTable() {
       FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
     } // else identical - re-use previous FirstAction
 
+    // Information used when created the call-site table. The action record
+    // field of the call site record is the offset of the first associated
+    // action record, relative to the start of the actions table. This value is
+    // biased by 1 (1 in dicating the start of the actions table), and 0
+    // indicates that there are no actions.
     FirstActions.push_back(FirstAction);
 
     // Compute this sites contribution to size.
     SizeActions += SizeSiteActions;
-  }
-
-  // Compute the call-site table.  The entry for an invoke has a try-range
-  // containing the call, a non-zero landing pad and an appropriate action.  The
-  // entry for an ordinary call has a try-range containing the call and zero for
-  // the landing pad and the action.  Calls marked 'nounwind' have no entry and
-  // must not be contained in the try-range of any entry - they form gaps in the
-  // table.  Entries must be ordered by try-range address.
-  SmallVector<CallSiteEntry, 64> CallSites;
-
-  RangeMapType PadMap;
 
-  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
-  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
-  // try-ranges for them need be deduced.
-  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
-    const LandingPadInfo *LandingPad = LandingPads[i];
-    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      unsigned BeginLabel = LandingPad->BeginLabels[j];
-      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
-      PadRange P = { i, j };
-      PadMap[BeginLabel] = P;
-    }
+    PrevLPI = LPI;
   }
 
+  return SizeActions;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table.  The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action.  The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action.  Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table.  Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const RangeMapType &PadMap,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
   // The end label of the previous invoke or nounwind try-range.
   unsigned LastLabel = 0;
 
@@ -424,7 +476,7 @@ void DwarfException::EmitExceptionTable() {
   // an ordinary call) between the end of the previous try-range and now.
   bool SawPotentiallyThrowing = false;
 
-  // Whether the last callsite entry was for an invoke.
+  // Whether the last CallSite entry was for an invoke.
   bool PreviousIsInvoke = false;
 
   // Visit all instructions in order of address.
@@ -450,17 +502,18 @@ void DwarfException::EmitExceptionTable() {
         // Nope, it was just some random label.
         continue;
 
-      PadRange P = L->second;
+      const PadRange &P = L->second;
       const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
-
       assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
              "Inconsistent landing pad map!");
 
-      // If some instruction between the previous try-range and this one may
-      // throw, create a call-site entry with no landing pad for the region
-      // between the try-ranges.
-      if (SawPotentiallyThrowing) {
-        CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+      // For Dwarf exception handling (SjLj handling doesn't use this). If some
+      // instruction between the previous try-range and this one may throw,
+      // create a call-site entry with no landing pad for the region between the
+      // try-ranges.
+      if (SawPotentiallyThrowing &&
+          MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+        CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
       }
@@ -470,12 +523,16 @@ void DwarfException::EmitExceptionTable() {
 
       if (LandingPad->LandingPadLabel) {
         // This try-range is for an invoke.
-        CallSiteEntry Site = {BeginLabel, LastLabel,
-                              LandingPad->LandingPadLabel,
-                              FirstActions[P.PadIndex]};
-
-        // Try to merge with the previous call-site.
-        if (PreviousIsInvoke) {
+        CallSiteEntry Site = {
+          BeginLabel,
+          LastLabel,
+          LandingPad->LandingPadLabel,
+          FirstActions[P.PadIndex]
+        };
+
+        // Try to merge with the previous call-site. SJLJ doesn't do this
+        if (PreviousIsInvoke &&
+          MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
           CallSiteEntry &Prev = CallSites.back();
           if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
@@ -497,128 +554,363 @@ void DwarfException::EmitExceptionTable() {
   // If some instruction between the previous try-range and the end of the
   // function may throw, create a call-site entry with no landing pad for the
   // region following the try-range.
-  if (SawPotentiallyThrowing) {
-    CallSiteEntry Site = {LastLabel, 0, 0, 0};
+  if (SawPotentiallyThrowing &&
+      MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+    CallSiteEntry Site = { LastLabel, 0, 0, 0 };
     CallSites.push_back(Site);
   }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy.  First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+///  1. The landing pad site information describes the range of code covered by
+///     the try.  In our case it's an accumulation of the ranges covered by the
+///     invokes in the try.  There is also a reference to the landing pad that
+///     handles the exception once processed.  Finally an index into the actions
+///     table.
+///  2. The action table, in our case, is composed of pairs of type IDs and next
+///     action offset.  Starting with the action index from the landing pad
+///     site, each type ID is checked for a match to the current exception.  If
+///     it matches then the exception and type id are passed on to the landing
+///     pad.  Otherwise the next action is looked up.  This chain is terminated
+///     with a next action of zero.  If no type id is found then the frame is
+///     unwound and handling continues.
+///  3. Type ID table contains references to all the C++ typeinfo for all
+///     catches in the function.  This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  if (PadInfos.empty()) return;
+
+  // Sort the landing pads in order of their type ids.  This is used to fold
+  // duplicate actions.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+
+  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+    LandingPads.push_back(&PadInfos[i]);
+
+  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+  // Compute the actions table and gather the first action index for each
+  // landing pad site.
+  SmallVector<ActionEntry, 32> Actions;
+  SmallVector<unsigned, 64> FirstActions;
+  unsigned SizeActions = ComputeActionsTable(LandingPads, Actions,
+                                             FirstActions);
+
+  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
+  // try-ranges for them need be deduced when using DWARF exception handling.
+  RangeMapType PadMap;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LandingPad = LandingPads[i];
+    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+      unsigned BeginLabel = LandingPad->BeginLabels[j];
+      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+      PadRange P = { i, j };
+      PadMap[BeginLabel] = P;
+    }
+  }
+
+  // Compute the call-site table.
+  SmallVector<CallSiteEntry, 64> CallSites;
+  ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
 
   // Final tallies.
 
   // Call sites.
-  const unsigned SiteStartSize  = sizeof(int32_t); // DW_EH_PE_udata4
-  const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4
-  const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4
-  unsigned SizeSites = CallSites.size() * (SiteStartSize +
-                                           SiteLengthSize +
-                                           LandingPadSize);
-  for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
-    SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+  const unsigned SiteStartSize  = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+  const unsigned SiteLengthSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+  const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+  bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+  bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+  unsigned SizeSites;
+
+  if (IsSJLJ)
+    SizeSites = 0;
+  else
+    SizeSites = CallSites.size() *
+      (SiteStartSize + SiteLengthSize + LandingPadSize);
+
+  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+    if (IsSJLJ)
+      SizeSites += MCAsmInfo::getULEB128Size(i);
+  }
 
   // Type infos.
-  const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr
-  unsigned SizeTypes = TypeInfos.size() * TypeInfoSize;
-
-  unsigned TypeOffset = sizeof(int8_t) + // Call site format
-    TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length
-    SizeSites + SizeActions + SizeTypes;
-
-  unsigned TotalSize = sizeof(int8_t) + // LPStart format
-                       sizeof(int8_t) + // TType format
-           TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
-                       TypeOffset;
+  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+  unsigned TTypeFormat;
+  unsigned TypeFormatSize;
+
+  if (!HaveTTData) {
+    // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+    // that we're omitting that bit.
+    TTypeFormat = dwarf::DW_EH_PE_omit;
+    TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
+  } else {
+    // Okay, we have actual filters or typeinfos to emit.  As such, we need to
+    // pick a type encoding for them.  We're about to emit a list of pointers to
+    // typeinfo objects at the end of the LSDA.  However, unless we're in static
+    // mode, this reference will require a relocation by the dynamic linker.
+    //
+    // Because of this, we have a couple of options:
+    // 
+    //   1) If we are in -static mode, we can always use an absolute reference
+    //      from the LSDA, because the static linker will resolve it.
+    //      
+    //   2) Otherwise, if the LSDA section is writable, we can output the direct
+    //      reference to the typeinfo and allow the dynamic linker to relocate
+    //      it.  Since it is in a writable section, the dynamic linker won't
+    //      have a problem.
+    //      
+    //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+    //      we need to use some form of indirection.  For example, on Darwin,
+    //      we can output a statically-relocatable reference to a dyld stub. The
+    //      offset to the stub is constant, but the contents are in a section
+    //      that is updated by the dynamic linker.  This is easy enough, but we
+    //      need to tell the personality function of the unwinder to indirect
+    //      through the dyld stub.
+    //
+    // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+    // somewhere.  This predicate should be moved to a shared location that is
+    // in target-independent code.
+    //
+    if (LSDASection->getKind().isWriteable() ||
+        Asm->TM.getRelocationModel() == Reloc::Static)
+      TTypeFormat = dwarf::DW_EH_PE_absptr;
+    else
+      TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
 
-  unsigned SizeAlign = (4 - TotalSize) & 3;
+    TypeFormatSize = SizeOfEncodedValue(TTypeFormat);
+  }
 
   // Begin the exception table.
-  Asm->SwitchToDataSection(TAI->getDwarfExceptionSection());
+  Asm->OutStreamer.SwitchSection(LSDASection);
   Asm->EmitAlignment(2, 0, 0, false);
+
   O << "GCC_except_table" << SubprogramCount << ":\n";
 
+  // The type infos need to be aligned. GCC does this by inserting padding just
+  // before the type infos. However, this changes the size of the exception
+  // table, so you need to take this into account when you output the exception
+  // table size. However, the size is output using a variable length encoding.
+  // So by increasing the size by inserting padding, you may increase the number
+  // of bytes used for writing the size. If it increases, say by one byte, then
+  // you now need to output one less byte of padding to get the type infos
+  // aligned.  However this decreases the size of the exception table. This
+  // changes the value you have to output for the exception table size. Due to
+  // the variable length encoding, the number of bytes used for writing the
+  // length may decrease. If so, you then have to increase the amount of
+  // padding. And so on. If you look carefully at the GCC code you will see that
+  // it indeed does this in a loop, going on and on until the values stabilize.
+  // We chose another solution: don't output padding inside the table like GCC
+  // does, instead output it before the table.
+  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+  unsigned TyOffset = sizeof(int8_t) +          // Call site format
+    MCAsmInfo::getULEB128Size(SizeSites) +      // Call-site table length
+    SizeSites + SizeActions + SizeTypes;
+  unsigned TotalSize = sizeof(int8_t) +         // LPStart format
+                       sizeof(int8_t) +         // TType format
+    (HaveTTData ?
+     MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset
+    TyOffset;
+  unsigned SizeAlign = (4 - TotalSize) & 3;
+
   for (unsigned i = 0; i != SizeAlign; ++i) {
     Asm->EmitInt8(0);
     Asm->EOL("Padding");
-    }
+  }
 
   EmitLabel("exception", SubprogramCount);
 
+  if (IsSJLJ) {
+    SmallString<16> LSDAName;
+    raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+      "_LSDA_" << Asm->getFunctionNumber();
+    O << LSDAName.str() << ":\n";
+  }
+
   // Emit the header.
   Asm->EmitInt8(dwarf::DW_EH_PE_omit);
-  Asm->EOL("LPStart format (DW_EH_PE_omit)");
-  Asm->EmitInt8(dwarf::DW_EH_PE_absptr);
-  Asm->EOL("TType format (DW_EH_PE_absptr)");
-  Asm->EmitULEB128Bytes(TypeOffset);
-  Asm->EOL("TType base offset");
-  Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
-  Asm->EOL("Call site format (DW_EH_PE_udata4)");
-  Asm->EmitULEB128Bytes(SizeSites);
-  Asm->EOL("Call-site table length");
-
-  // Emit the landing pad site information.
-  for (unsigned i = 0; i < CallSites.size(); ++i) {
-    CallSiteEntry &S = CallSites[i];
-    const char *BeginTag;
-    unsigned BeginNumber;
-
-    if (!S.BeginLabel) {
-      BeginTag = "eh_func_begin";
-      BeginNumber = SubprogramCount;
-    } else {
-      BeginTag = "label";
-      BeginNumber = S.BeginLabel;
-    }
+  Asm->EOL("@LPStart format", dwarf::DW_EH_PE_omit);
 
-    EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
-                      true, true);
-    Asm->EOL("Region start");
+  Asm->EmitInt8(TTypeFormat);
+  Asm->EOL("@TType format", TTypeFormat);
 
-    if (!S.EndLabel)
-      EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
-                     true);
-    else
-      EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+  if (HaveTTData) {
+    Asm->EmitULEB128Bytes(TyOffset);
+    Asm->EOL("@TType base offset");
+  }
 
-    Asm->EOL("Region length");
+  // SjLj Exception handling
+  if (IsSJLJ) {
+    Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
+    Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4);
+    Asm->EmitULEB128Bytes(SizeSites);
+    Asm->EOL("Call site table length");
+
+    // Emit the landing pad site information.
+    unsigned idx = 0;
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+      const CallSiteEntry &S = *I;
+
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      Asm->EmitULEB128Bytes(idx);
+      Asm->EOL("Landing pad");
+
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128Bytes(S.Action);
+      Asm->EOL("Action");
+    }
+  } else {
+    // DWARF Exception handling
+    assert(MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+
+    // The call-site table is a list of all call sites that may throw an
+    // exception (including C++ 'throw' statements) in the procedure
+    // fragment. It immediately follows the LSDA header. Each entry indicates,
+    // for a given call, the first corresponding action record and corresponding
+    // landing pad.
+    //
+    // The table begins with the number of bytes, stored as an LEB128
+    // compressed, unsigned integer. The records immediately follow the record
+    // count. They are sorted in increasing call-site address. Each record
+    // indicates:
+    //
+    //   * The position of the call-site.
+    //   * The position of the landing pad.
+    //   * The first action record for that call site.
+    //
+    // A missing entry in the call-site table indicates that a call is not
+    // supposed to throw.
+
+    // Emit the landing pad call site table.
+    Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
+    Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4);
+    Asm->EmitULEB128Bytes(SizeSites);
+    Asm->EOL("Call site table size");
+
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+      const CallSiteEntry &S = *I;
+      const char *BeginTag;
+      unsigned BeginNumber;
+
+      if (!S.BeginLabel) {
+        BeginTag = "eh_func_begin";
+        BeginNumber = SubprogramCount;
+      } else {
+        BeginTag = "label";
+        BeginNumber = S.BeginLabel;
+      }
 
-    if (!S.PadLabel)
-      Asm->EmitInt32(0);
-    else
-      EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+      // Offset of the call site relative to the previous call site, counted in
+      // number of 16-byte bundles. The first call site is counted relative to
+      // the start of the procedure fragment.
+      EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
                         true, true);
+      Asm->EOL("Region start");
+
+      if (!S.EndLabel)
+        EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
+                       true);
+      else
+        EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+
+      Asm->EOL("Region length");
 
-    Asm->EOL("Landing pad");
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      if (!S.PadLabel)
+        Asm->EmitInt32(0);
+      else
+        EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+                          true, true);
+
+      Asm->EOL("Landing pad");
 
-    Asm->EmitULEB128Bytes(S.Action);
-    Asm->EOL("Action");
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128Bytes(S.Action);
+      Asm->EOL("Action");
+    }
   }
 
-  // Emit the actions.
-  for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
-    ActionEntry &Action = Actions[I];
+  // Emit the Action Table.
+  for (SmallVectorImpl<ActionEntry>::const_iterator
+         I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+    const ActionEntry &Action = *I;
+
+    // Type Filter
+    //
+    //   Used by the runtime to match the type of the thrown exception to the
+    //   type of the catch clauses or the types in the exception specification.
 
     Asm->EmitSLEB128Bytes(Action.ValueForTypeID);
     Asm->EOL("TypeInfo index");
+
+    // Action Record
+    //
+    //   Self-relative signed displacement in bytes of the next action record,
+    //   or 0 if there is no next action record.
+
     Asm->EmitSLEB128Bytes(Action.NextAction);
     Asm->EOL("Next action");
   }
 
-  // Emit the type ids.
-  for (unsigned M = TypeInfos.size(); M; --M) {
-    GlobalVariable *GV = TypeInfos[M - 1];
+  // Emit the Catch Clauses. The code for the catch clauses following the same
+  // try is similar to a switch statement. The catch clause action record
+  // informs the runtime about the type of a catch clause and about the
+  // associated switch value.
+  //
+  //  Action Record Fields:
+  //
+  //   * Filter Value
+  //     Positive value, starting at 1. Index in the types table of the
+  //     __typeinfo for the catch-clause type. 1 is the first word preceding
+  //     TTBase, 2 is the second word, and so on. Used by the runtime to check
+  //     if the thrown exception type matches the catch-clause type. Back-end
+  //     generated switch statements check against this value.
+  //
+  //   * Next
+  //     Signed offset, in bytes from the start of this field, to the next
+  //     chained action record, or zero if none.
+  //
+  // The order of the action records determined by the next field is the order
+  // of the catch clauses as they appear in the source code, and must be kept in
+  // the same order. As a result, changing the order of the catch clause would
+  // change the semantics of the program.
+  for (std::vector<GlobalVariable *>::const_reverse_iterator
+         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+    const GlobalVariable *GV = *I;
     PrintRelDirective();
 
     if (GV) {
-      std::string GLN;
-      O << Asm->getGlobalLinkName(GV, GLN);
+      O << Asm->Mang->getMangledName(GV);
     } else {
-      O << "0";
+      O << "0x0";
     }
 
     Asm->EOL("TypeInfo");
   }
 
-  // Emit the filter typeids.
-  for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
-    unsigned TypeID = FilterIds[j];
+  // Emit the Type Table.
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+    unsigned TypeID = *I;
     Asm->EmitULEB128Bytes(TypeID);
     Asm->EOL("Filter TypeInfo index");
   }
@@ -629,48 +921,53 @@ void DwarfException::EmitExceptionTable() {
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
+  if (MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
+    return;
+
+  if (!shouldEmitMovesModule && !shouldEmitTableModule)
+    return;
+
   if (TimePassesIsEnabled)
     ExceptionTimer->startTimer();
 
-  if (shouldEmitMovesModule || shouldEmitTableModule) {
-    const std::vector<Function *> Personalities = MMI->getPersonalities();
-    for (unsigned i = 0; i < Personalities.size(); ++i)
-      EmitCommonEHFrame(Personalities[i], i);
+  const std::vector<Function *> Personalities = MMI->getPersonalities();
 
-    for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(),
-           E = EHFrames.end(); I != E; ++I)
-      EmitEHFrame(*I);
-  }
+  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+    EmitCIE(Personalities[I], I);
+
+  for (std::vector<FunctionEHFrameInfo>::iterator
+         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+    EmitFDE(*I);
 
   if (TimePassesIsEnabled)
     ExceptionTimer->stopTimer();
 }
 
-/// BeginFunction - Gather pre-function exception information.  Assumes being
-/// emitted immediately after the function entry point.
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(MachineFunction *MF) {
+  if (!MMI || !MAI->doesSupportExceptionHandling()) return;
+
   if (TimePassesIsEnabled)
     ExceptionTimer->startTimer();
 
   this->MF = MF;
   shouldEmitTable = shouldEmitMoves = false;
 
-  if (MMI && TAI->doesSupportExceptionHandling()) {
-    // Map all labels and get rid of any dead landing pads.
-    MMI->TidyLandingPads();
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
 
-    // If any landing pads survive, we need an EH table.
-    if (MMI->getLandingPads().size())
-      shouldEmitTable = true;
+  // If any landing pads survive, we need an EH table.
+  if (!MMI->getLandingPads().empty())
+    shouldEmitTable = true;
 
-    // See if we need frame move info.
-    if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
-      shouldEmitMoves = true;
+  // See if we need frame move info.
+  if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+    shouldEmitMoves = true;
 
-    if (shouldEmitMoves || shouldEmitTable)
-      // Assumes in correct section after the entry point.
-      EmitLabel("eh_func_begin", ++SubprogramCount);
-  }
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    EmitLabel("eh_func_begin", ++SubprogramCount);
 
   shouldEmitTableModule |= shouldEmitTable;
   shouldEmitMovesModule |= shouldEmitMoves;
@@ -682,25 +979,29 @@ void DwarfException::BeginFunction(MachineFunction *MF) {
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  if (TimePassesIsEnabled) 
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  if (TimePassesIsEnabled)
     ExceptionTimer->startTimer();
 
-  if (shouldEmitMoves || shouldEmitTable) {
-    EmitLabel("eh_func_end", SubprogramCount);
-    EmitExceptionTable();
-
-    // Save EH frame information
-    std::string Name;
-    EHFrames.push_back(
-        FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name),
-                            SubprogramCount,
-                            MMI->getPersonalityIndex(),
-                            MF->getFrameInfo()->hasCalls(),
-                            !MMI->getLandingPads().empty(),
-                            MMI->getFrameMoves(),
-                            MF->getFunction()));
-  }
+  EmitLabel("eh_func_end", SubprogramCount);
+  EmitExceptionTable();
 
-  if (TimePassesIsEnabled) 
+  std::string FunctionEHName =
+    Asm->Mang->getMangledName(MF->getFunction(), ".eh",
+                              Asm->MAI->is_EHSymbolPrivate());
+  
+  // Save EH frame information
+  EHFrames.push_back(FunctionEHFrameInfo(FunctionEHName, SubprogramCount,
+                                         MMI->getPersonalityIndex(),
+                                         MF->getFrameInfo()->hasCalls(),
+                                         !MMI->getLandingPads().empty(),
+                                         MMI->getFrameMoves(),
+                                         MF->getFunction()));
+
+  // Record if this personality index uses a landing pad.
+  UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty();
+
+  if (TimePassesIsEnabled)
     ExceptionTimer->stopTimer();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f1c3e5642359..f6f50255f2e7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
-#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 
 #include "DIE.h"
 #include "DwarfPrinter.h"
@@ -24,7 +24,7 @@ namespace llvm {
 
 struct LandingPadInfo;
 class MachineModuleInfo;
-class TargetAsmInfo;
+class MCAsmInfo;
 class Timer;
 class raw_ostream;
 
@@ -51,6 +51,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
 
   std::vector<FunctionEHFrameInfo> EHFrames;
 
+  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+  /// uses an LSDA. If so, then we need to encode that information in the CIE's
+  /// augmentation.
+  DenseMap<unsigned, bool> UsesLSDA;
+
   /// shouldEmitTable - Per-function flag to indicate if EH tables should
   /// be emitted.
   bool shouldEmitTable;
@@ -70,13 +75,16 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
   /// ExceptionTimer - Timer for the Dwarf exception writer.
   Timer *ExceptionTimer;
 
-  /// EmitCommonEHFrame - Emit the common eh unwind frame.
-  ///
-  void EmitCommonEHFrame(const Function *Personality, unsigned Index);
+  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
+  unsigned SizeOfEncodedValue(unsigned Encoding);
 
-  /// EmitEHFrame - Emit function exception frame information.
-  ///
-  void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo);
+  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+  /// that is shared among many Frame Description Entries.  There is at least
+  /// one CIE in every non-empty .debug_frame section.
+  void EmitCIE(const Function *Personality, unsigned Index);
+
+  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
 
   /// EmitExceptionTable - Emit landing pads and actions.
   ///
@@ -113,13 +121,6 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
     static bool isPod() { return true; }
   };
 
-  /// ActionEntry - Structure describing an entry in the actions table.
-  struct ActionEntry {
-    int ValueForTypeID; // The value to write - may not be equal to the type id.
-    int NextAction;
-    struct ActionEntry *Previous;
-  };
-
   /// PadRange - Structure holding a try-range and the associated landing pad.
   struct PadRange {
     // The index of the landing pad.
@@ -130,23 +131,48 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
 
   typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
 
+  /// ActionEntry - Structure describing an entry in the actions table.
+  struct ActionEntry {
+    int ValueForTypeID; // The value to write - may not be equal to the type id.
+    int NextAction;
+    struct ActionEntry *Previous;
+  };
+
   /// CallSiteEntry - Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
     unsigned BeginLabel; // zero indicates the start of the function.
     unsigned EndLabel;   // zero indicates the end of the function.
+
     // The landing pad starts at PadLabel.
     unsigned PadLabel;   // zero indicates that there is no landing pad.
     unsigned Action;
   };
 
+  /// ComputeActionsTable - Compute the actions table and gather the first
+  /// action index for each landing pad site.
+  unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+                               SmallVectorImpl<ActionEntry> &Actions,
+                               SmallVectorImpl<unsigned> &FirstActions);
+
+  /// ComputeCallSiteTable - Compute the call-site table.  The entry for an
+  /// invoke has a try-range containing the call, a non-zero landing pad and an
+  /// appropriate action.  The entry for an ordinary call has a try-range
+  /// containing the call and zero for the landing pad and the action.  Calls
+  /// marked 'nounwind' have no entry and must not be contained in the try-range
+  /// of any entry - they form gaps in the table.  Entries must be ordered by
+  /// try-range address.
+  void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                            const RangeMapType &PadMap,
+                            const SmallVectorImpl<const LandingPadInfo *> &LPs,
+                            const SmallVectorImpl<unsigned> &FirstActions);
   void EmitExceptionTable();
 
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+  DwarfException(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
   virtual ~DwarfException();
 
   /// BeginModule - Emit all exception information that should come prior to the
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
index 8021b7c97bb0..6e9293a03bd5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
@@ -13,7 +13,7 @@
 
 #include "DwarfLabel.h"
 #include "llvm/ADT/FoldingSet.h"
-#include <ostream>
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -25,10 +25,7 @@ void DWLabel::Profile(FoldingSetNodeID &ID) const {
 }
 
 #ifndef NDEBUG
-void DWLabel::print(std::ostream *O) const {
-  if (O) print(*O);
-}
-void DWLabel::print(std::ostream &O) const {
+void DWLabel::print(raw_ostream &O) const {
   O << "." << Tag;
   if (Number) O << Number;
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h
index b49390334bd2..0c0cc4bdc3c6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfLabel.h
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h
@@ -14,19 +14,16 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__
 #define CODEGEN_ASMPRINTER_DWARFLABEL_H__
 
-#include "llvm/Support/Compiler.h"
-#include <iosfwd>
-#include <vector>
-
 namespace llvm {
   class FoldingSetNodeID;
+  class raw_ostream;
 
   //===--------------------------------------------------------------------===//
   /// DWLabel - Labels are used to track locations in the assembler file.
   /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim,
   /// where the tag is a category of label (Ex. location) and number is a value
   /// unique in that category.
-  class VISIBILITY_HIDDEN DWLabel {
+  class DWLabel {
     /// Tag - Label category tag. Should always be a statically declared C
     /// string.
     /// 
@@ -47,8 +44,7 @@ namespace llvm {
     void Profile(FoldingSetNodeID &ID) const;
 
 #ifndef NDEBUG
-    void print(std::ostream *O) const;
-    void print(std::ostream &O) const;
+    void print(raw_ostream &O) const;
 #endif
   };
 } // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index a1b97df82afc..20b959b914fc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -15,39 +15,41 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
-Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
              const char *flavor)
-: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()),
+: O(OS), Asm(A), MAI(T), TD(Asm->TM.getTargetData()),
   RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
   SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
 
 void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const {
-  if (isInSection && TAI->getDwarfSectionOffsetDirective())
-    O << TAI->getDwarfSectionOffsetDirective();
+  if (isInSection && MAI->getDwarfSectionOffsetDirective())
+    O << MAI->getDwarfSectionOffsetDirective();
   else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t))
-    O << TAI->getData32bitsDirective();
+    O << MAI->getData32bitsDirective();
   else
-    O << TAI->getData64bitsDirective();
+    O << MAI->getData64bitsDirective();
 }
 
 /// PrintLabelName - Print label name in form used by Dwarf writer.
 ///
 void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const {
-  O << TAI->getPrivateGlobalPrefix() << Tag;
+  O << MAI->getPrivateGlobalPrefix() << Tag;
   if (Number) O << Number;
 }
 void Dwarf::PrintLabelName(const char *Tag, unsigned Number,
                            const char *Suffix) const {
-  O << TAI->getPrivateGlobalPrefix() << Tag;
+  O << MAI->getPrivateGlobalPrefix() << Tag;
   if (Number) O << Number;
   O << Suffix;
 }
@@ -65,13 +67,13 @@ void Dwarf::EmitReference(const char *Tag, unsigned Number,
                           bool IsPCRelative, bool Force32Bit) const {
   PrintRelDirective(Force32Bit);
   PrintLabelName(Tag, Number);
-  if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
 }
 void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative,
                           bool Force32Bit) const {
   PrintRelDirective(Force32Bit);
   O << Name;
-  if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+  if (IsPCRelative) O << "-" << MAI->getPCSymbol();
 }
 
 /// EmitDifference - Emit the difference between two labels.  Some assemblers do
@@ -80,7 +82,7 @@ void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative,
 void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi,
                            const char *TagLo, unsigned NumberLo,
                            bool IsSmall) {
-  if (TAI->needsSet()) {
+  if (MAI->needsSet()) {
     O << "\t.set\t";
     PrintLabelName("set", SetCounter, Flavor);
     O << ",";
@@ -106,11 +108,11 @@ void Dwarf::EmitSectionOffset(const char* Label, const char* Section,
                               bool useSet) {
   bool printAbsolute = false;
   if (isEH)
-    printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+    printAbsolute = MAI->isAbsoluteEHSectionOffsets();
   else
-    printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+    printAbsolute = MAI->isAbsoluteDebugSectionOffsets();
 
-  if (TAI->needsSet() && useSet) {
+  if (MAI->needsSet() && useSet) {
     O << "\t.set\t";
     PrintLabelName("set", SetCounter, Flavor);
     O << ",";
@@ -190,7 +192,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(Offset);
         Asm->EOL("Offset");
       } else {
-        assert(0 && "Machine move not supported yet.");
+        llvm_unreachable("Machine move not supported yet.");
       }
     } else if (Src.isReg() &&
                Src.getReg() == MachineLocation::VirtualFP) {
@@ -200,7 +202,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
         Asm->EOL("Register");
       } else {
-        assert(0 && "Machine move not supported yet.");
+        llvm_unreachable("Machine move not supported yet.");
       }
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 6e75992cb07c..33ebb3bd0eb5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -25,7 +25,7 @@ namespace llvm {
   class MachineFunction;
   class MachineModuleInfo;
   class Module;
-  class TargetAsmInfo;
+  class MCAsmInfo;
   class TargetData;
   class TargetRegisterInfo;
 
@@ -43,9 +43,9 @@ namespace llvm {
     ///
     AsmPrinter *Asm;
 
-    /// TAI - Target asm information.
+    /// MAI - Target asm information.
     /// 
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     /// TD - Target data.
     /// 
@@ -80,7 +80,7 @@ namespace llvm {
     /// 
     unsigned SetCounter;
 
-    Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+    Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
           const char *flavor);
   public:
     //===------------------------------------------------------------------===//
@@ -88,7 +88,7 @@ namespace llvm {
     //
     const AsmPrinter *getAsm() const { return Asm; }
     MachineModuleInfo *getMMI() const { return MMI; }
-    const TargetAsmInfo *getTargetAsmInfo() const { return TAI; }
+    const MCAsmInfo *getMCAsmInfo() const { return MAI; }
     const TargetData *getTargetData() const { return TD; }
 
     void PrintRelDirective(bool Force32Bit = false,
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 89084989b875..0638d3568549 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -39,7 +39,7 @@ DwarfWriter::~DwarfWriter() {
 void DwarfWriter::BeginModule(Module *M,
                               MachineModuleInfo *MMI,
                               raw_ostream &OS, AsmPrinter *A,
-                              const TargetAsmInfo *T) {
+                              const MCAsmInfo *T) {
   DE = new DwarfException(OS, A, T);
   DD = new DwarfDebug(OS, A, T);
   DE->BeginModule(M, MMI);
@@ -51,6 +51,8 @@ void DwarfWriter::BeginModule(Module *M,
 void DwarfWriter::EndModule() {
   DE->EndModule();
   DD->EndModule();
+  delete DD; DD = 0;
+  delete DE; DE = 0;
 }
 
 /// BeginFunction - Gather pre-function debug information.  Assumes being
@@ -75,18 +77,18 @@ void DwarfWriter::EndFunction(MachineFunction *MF) {
 /// label. Returns a unique label ID used to generate a label and provide
 /// correspondence to the source line list.
 unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, 
-                                       DICompileUnit CU) {
-  return DD->RecordSourceLine(Line, Col, CU);
+                                       MDNode *Scope) {
+  return DD->RecordSourceLine(Line, Col, Scope);
 }
 
 /// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) {
-  return DD->RecordRegionStart(V);
+unsigned DwarfWriter::RecordRegionStart(MDNode *N) {
+  return DD->RecordRegionStart(N);
 }
 
 /// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) {
-  return DD->RecordRegionEnd(V);
+unsigned DwarfWriter::RecordRegionEnd(MDNode *N) {
+  return DD->RecordRegionEnd(N);
 }
 
 /// getRecordSourceLineCount - Count source lines.
@@ -96,9 +98,8 @@ unsigned DwarfWriter::getRecordSourceLineCount() {
 
 /// RecordVariable - Indicate the declaration of  a local variable.
 ///
-void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
-                                 const MachineInstr *MI) {
-  DD->RecordVariable(GV, FrameIndex, MI);
+void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) {
+  DD->RecordVariable(N, FrameIndex);
 }
 
 /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
@@ -107,8 +108,7 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const {
   return DD && DD->ShouldEmitDwarfDebug();
 }
 
-//// RecordInlinedFnStart - Global variable GV is inlined at the location marked
-//// by LabelID label.
+//// RecordInlinedFnStart
 unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
                                            unsigned Line, unsigned Col) {
   return DD->RecordInlinedFnStart(SP, CU, Line, Col);
@@ -119,9 +119,9 @@ unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
   return DD->RecordInlinedFnEnd(SP);
 }
 
-/// RecordVariableScope - Record scope for the variable declared by
-/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE.
-void DwarfWriter::RecordVariableScope(DIVariable &DV,
-                                      const MachineInstr *DeclareMI) {
-  DD->RecordVariableScope(DV, DeclareMI);
+void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) {
+  DD->SetDbgScopeEndLabels(MI, L);
+}
+void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) {
+  DD->SetDbgScopeBeginLabels(MI, L);
 }
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 8ba903a65d79..06b92b7294b6 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,12 +15,14 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/Module.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -28,10 +30,10 @@ namespace {
   class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter {
   public:
     void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                       const TargetAsmInfo &TAI);
+                       const MCAsmInfo &MAI);
 
     void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                        const TargetAsmInfo &TAI);
+                        const MCAsmInfo &MAI);
   };
 
 }
@@ -42,11 +44,11 @@ Y("ocaml", "ocaml 3.10-compatible collector");
 void llvm::linkOcamlGCPrinter() { }
 
 static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
-                           const TargetAsmInfo &TAI, const char *Id) {
+                           const MCAsmInfo &MAI, const char *Id) {
   const std::string &MId = M.getModuleIdentifier();
 
   std::string Mangled;
-  Mangled += TAI.getGlobalPrefix();
+  Mangled += MAI.getGlobalPrefix();
   Mangled += "caml";
   size_t Letter = Mangled.size();
   Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
@@ -56,18 +58,18 @@ static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
   // Capitalize the first letter of the module name.
   Mangled[Letter] = toupper(Mangled[Letter]);
 
-  if (const char *GlobalDirective = TAI.getGlobalDirective())
+  if (const char *GlobalDirective = MAI.getGlobalDirective())
     OS << GlobalDirective << Mangled << "\n";
   OS << Mangled << ":\n";
 }
 
 void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                           const TargetAsmInfo &TAI) {
-  AP.SwitchToSection(TAI.getTextSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin");
+                                           const MCAsmInfo &MAI) {
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "code_begin");
 
-  AP.SwitchToSection(TAI.getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "data_begin");
 }
 
 /// emitAssembly - Print the frametable. The ocaml frametable format is thus:
@@ -87,55 +89,59 @@ void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
 /// either condition is detected in a function which uses the GC.
 ///
 void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                            const TargetAsmInfo &TAI) {
+                                            const MCAsmInfo &MAI) {
   const char *AddressDirective;
   int AddressAlignLog;
   if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
-    AddressDirective = TAI.getData32bitsDirective();
+    AddressDirective = MAI.getData32bitsDirective();
     AddressAlignLog = 2;
   } else {
-    AddressDirective = TAI.getData64bitsDirective();
+    AddressDirective = MAI.getData64bitsDirective();
     AddressAlignLog = 3;
   }
 
-  AP.SwitchToSection(TAI.getTextSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "code_end");
 
-  AP.SwitchToSection(TAI.getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "data_end");
 
   OS << AddressDirective << 0; // FIXME: Why does ocaml emit this??
   AP.EOL();
 
-  AP.SwitchToSection(TAI.getDataSection());
-  EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable");
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), OS, AP, MAI, "frametable");
 
   for (iterator I = begin(), IE = end(); I != IE; ++I) {
     GCFunctionInfo &FI = **I;
 
     uint64_t FrameSize = FI.getFrameSize();
     if (FrameSize >= 1<<16) {
-      cerr << "Function '" << FI.getFunction().getNameStart()
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Function '" << FI.getFunction().getName()
            << "' is too large for the ocaml GC! "
            << "Frame size " << FrameSize << " >= 65536.\n";
-      cerr << "(" << uintptr_t(&FI) << ")\n";
-      abort(); // Very rude!
+      Msg << "(" << uintptr_t(&FI) << ")";
+      llvm_report_error(Msg.str()); // Very rude!
     }
 
-    OS << "\t" << TAI.getCommentString() << " live roots for "
-       << FI.getFunction().getNameStart() << "\n";
+    OS << "\t" << MAI.getCommentString() << " live roots for "
+       << FI.getFunction().getName() << "\n";
 
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
       if (LiveCount >= 1<<16) {
-        cerr << "Function '" << FI.getFunction().getNameStart()
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "Function '" << FI.getFunction().getName()
              << "' is too large for the ocaml GC! "
-             << "Live root count " << LiveCount << " >= 65536.\n";
-        abort(); // Very rude!
+             << "Live root count " << LiveCount << " >= 65536.";
+        llvm_report_error(Msg.str()); // Very rude!
       }
 
       OS << AddressDirective
-         << TAI.getPrivateGlobalPrefix() << "label" << J->Num;
+         << MAI.getPrivateGlobalPrefix() << "label" << J->Num;
       AP.EOL("call return address");
 
       AP.EmitInt16(FrameSize);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 26353035ae2f..f9abeacbdbb3 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -27,6 +28,8 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -44,70 +47,35 @@ TailMergeThreshold("tail-merge-threshold",
           cl::desc("Max number of predecessors to consider tail merging"),
           cl::init(150), cl::Hidden);
 
-namespace {
-  struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass {
-    static char ID;
-    explicit BranchFolder(bool defaultEnableTailMerge) : 
-      MachineFunctionPass(&ID) {
-      switch (FlagEnableTailMerge) {
-        case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
-        case cl::BOU_TRUE: EnableTailMerge = true; break;
-        case cl::BOU_FALSE: EnableTailMerge = false; break;
-      }
-    }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
-    const TargetInstrInfo *TII;
-    MachineModuleInfo *MMI;
-    bool MadeChange;
-  private:
-    // Tail Merging.
-    bool EnableTailMerge;
-    bool TailMergeBlocks(MachineFunction &MF);
-    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
-                        MachineBasicBlock* PredBB);
-    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
-                                 MachineBasicBlock *NewDest);
-    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
-                                  MachineBasicBlock::iterator BBI1);
-    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
-    void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
-                                                MachineBasicBlock* PredBB);
-    unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
-                                       unsigned maxCommonTailLength);
-
-    typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
-    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
-    std::vector<MergePotentialsElt> MergePotentials;
-
-    typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
-    std::vector<SameTailElt> SameTails;
-
-    const TargetRegisterInfo *RegInfo;
-    RegScavenger *RS;
-    // Branch optzn.
-    bool OptimizeBranches(MachineFunction &MF);
-    void OptimizeBlock(MachineBasicBlock *MBB);
-    void RemoveDeadBlock(MachineBasicBlock *MBB);
-    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
-    
-    bool CanFallThrough(MachineBasicBlock *CurBB);
-    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
-                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond);
-  };
-  char BranchFolder::ID = 0;
-}
+char BranchFolderPass::ID = 0;
 
 FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { 
-      return new BranchFolder(DefaultEnableTailMerge); }
+  return new BranchFolderPass(DefaultEnableTailMerge);
+}
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+  return OptimizeFunction(MF,
+                          MF.getTarget().getInstrInfo(),
+                          MF.getTarget().getRegisterInfo(),
+                          getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+  switch (FlagEnableTailMerge) {
+  case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+  case cl::BOU_TRUE: EnableTailMerge = true; break;
+  case cl::BOU_FALSE: EnableTailMerge = false; break;
+  }
+}
 
 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
 /// function, updating the CFG.
 void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
-  DOUT << "\nRemoving MBB: " << *MBB;
+  DEBUG(errs() << "\nRemoving MBB: " << *MBB);
   
   MachineFunction *MF = MBB->getParent();
   // drop all successors.
@@ -146,7 +114,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
       break;
     unsigned Reg = I->getOperand(0).getReg();
     ImpDefRegs.insert(Reg);
-    for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs)
       ImpDefRegs.insert(SubReg);
     ++I;
@@ -180,32 +148,37 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
   return true;
 }
 
-bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  if (!TII) return false;
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+                                    const TargetInstrInfo *tii,
+                                    const TargetRegisterInfo *tri,
+                                    MachineModuleInfo *mmi) {
+  if (!tii) return false;
+
+  TII = tii;
+  TRI = tri;
+  MMI = mmi;
 
-  RegInfo = MF.getTarget().getRegisterInfo();
+  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
 
   // Fix CFG.  The later algorithms expect it to be right.
-  bool EverMadeChange = false;
+  bool MadeChange = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
     MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
     SmallVector<MachineOperand, 4> Cond;
     if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
-      EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-    EverMadeChange |= OptimizeImpDefsBlock(MBB);
+      MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+    MadeChange |= OptimizeImpDefsBlock(MBB);
   }
 
-  RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
-
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
 
   bool MadeChangeThisIteration = true;
   while (MadeChangeThisIteration) {
     MadeChangeThisIteration = false;
     MadeChangeThisIteration |= TailMergeBlocks(MF);
     MadeChangeThisIteration |= OptimizeBranches(MF);
-    EverMadeChange |= MadeChangeThisIteration;
+    MadeChange |= MadeChangeThisIteration;
   }
 
   // See if any jump tables have become mergable or dead as the code generator
@@ -222,8 +195,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
 
     // Scan the jump tables, seeing if there are any duplicates.  Note that this
     // is N^2, which should be fixed someday.
-    for (unsigned i = 1, e = JTs.size(); i != e; ++i)
-      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+    for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
+      if (JTs[i].MBBs.empty())
+        JTMapping.push_back(i);
+      else
+        JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+    }
     
     // If a jump table was merge with another one, walk the function rewriting
     // references to jump tables to reference the new JT ID's.  Keep track of
@@ -250,12 +227,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
     for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
       if (!JTIsLive.test(i)) {
         JTI->RemoveJumpTable(i);
-        EverMadeChange = true;
+        MadeChange = true;
       }
   }
-  
+
   delete RS;
-  return EverMadeChange;
+  return MadeChange;
 }
 
 //===----------------------------------------------------------------------===//
@@ -395,9 +372,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
     RS->enterBasicBlock(&CurMBB);
     if (!CurMBB.empty())
       RS->forward(prior(CurMBB.end()));
-    BitVector RegsLiveAtExit(RegInfo->getNumRegs());
+    BitVector RegsLiveAtExit(TRI->getNumRegs());
     RS->getRegsUsed(RegsLiveAtExit, false);
-    for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++)
+    for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++)
       if (RegsLiveAtExit[i])
         NewMBB->addLiveIn(i);
   }
@@ -461,7 +438,7 @@ static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
       // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
       // an object with itself.
 #ifndef _GLIBCXX_DEBUG
-      assert(0 && "Predecessor appears twice");
+      llvm_unreachable("Predecessor appears twice");
 #endif
       return false;
     }
@@ -567,8 +544,8 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
   MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
 
-  DOUT << "\nSplitting " << MBB->getNumber() << ", size " << 
-          maxCommonTailLength;
+  DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size "
+               << maxCommonTailLength);
 
   MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
   SameTails[commonTailIndex].first->second = newMBB;
@@ -590,13 +567,14 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
 
 bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
                                   MachineBasicBlock* PredBB) {
+  bool MadeChange = false;
+
   // It doesn't make sense to save a single instruction since tail merging
   // will add a jump.
   // FIXME: Ask the target to provide the threshold?
   unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
-  MadeChange = false;
   
-  DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n';
+  DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n');
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
@@ -643,17 +621,17 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
     MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
-    DOUT << "\nUsing common tail " << MBB->getNumber() << " for ";
+    DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for ");
     for (unsigned int i=0; i<SameTails.size(); ++i) {
       if (commonTailIndex==i)
         continue;
-      DOUT << SameTails[i].first->second->getNumber() << ",";
+      DEBUG(errs() << SameTails[i].first->second->getNumber() << ",");
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
       ReplaceTailWithBranchTo(SameTails[i].second, MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
       MergePotentials.erase(SameTails[i].first);
     }
-    DOUT << "\n";
+    DEBUG(errs() << "\n");
     // We leave commonTailIndex in the worklist in case there are other blocks
     // that match it with a smaller number of instructions.
     MadeChange = true;
@@ -665,7 +643,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   if (!EnableTailMerge) return false;
  
-  MadeChange = false;
+  bool MadeChange = false;
 
   // First find blocks with no successors.
   MergePotentials.clear();
@@ -699,6 +677,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+      SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
@@ -709,6 +688,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // Skip blocks that loop to themselves, can't tail merge these.
         if (PBB==IBB)
           continue;
+        // Visit each predecessor only once.
+        if (!UniquePreds.insert(PBB))
+          continue;
         MachineBasicBlock *TBB = 0, *FBB = 0;
         SmallVector<MachineOperand, 4> Cond;
         if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
@@ -772,14 +754,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 //===----------------------------------------------------------------------===//
 
 bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
-  MadeChange = false;
+  bool MadeChange = false;
   
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
 
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
-    OptimizeBlock(MBB);
+    MadeChange |= OptimizeBlock(MBB);
     
     // If it is dead, remove it.
     if (MBB->pred_empty()) {
@@ -873,7 +855,9 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
 
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
-void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+  bool MadeChange = false;
+
   MachineFunction::iterator FallThrough = MBB;
   ++FallThrough;
   
@@ -882,7 +866,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   // points to this block.
   if (MBB->empty() && !MBB->isLandingPad()) {
     // Dead block?  Leave for cleanup later.
-    if (MBB->pred_empty()) return;
+    if (MBB->pred_empty()) return MadeChange;
     
     if (FallThrough == MBB->getParent()->end()) {
       // TODO: Simplify preds to not branch here if possible!
@@ -893,14 +877,13 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         MachineBasicBlock *Pred = *(MBB->pred_end()-1);
         Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
       }
-      
       // If MBB was the target of a jump table, update jump tables to go to the
       // fallthrough instead.
       MBB->getParent()->getJumpTableInfo()->
         ReplaceMBBInJumpTables(MBB, FallThrough);
       MadeChange = true;
     }
-    return;
+    return MadeChange;
   }
 
   // Check to see if we can simplify the terminator of the block before this
@@ -1004,8 +987,8 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
         if (!TII->ReverseBranchCondition(NewPriorCond)) {
-          DOUT << "\nMoving MBB: " << *MBB;
-          DOUT << "To make fallthrough to: " << *PriorTBB << "\n";
+          DEBUG(errs() << "\nMoving MBB: " << *MBB
+                       << "To make fallthrough to: " << *PriorTBB << "\n");
           
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
@@ -1014,7 +997,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           MBB->moveAfter(--MBB->getParent()->end());
           MadeChange = true;
           ++NumBranchOpts;
-          return;
+          return MadeChange;
         }
       }
     }
@@ -1116,7 +1099,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           if (DidChange) {
             ++NumBranchOpts;
             MadeChange = true;
-            if (!HasBranchToSelf) return;
+            if (!HasBranchToSelf) return MadeChange;
           }
         }
       }
@@ -1197,8 +1180,10 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           PrevBB.isSuccessor(FallThrough)) {
         MBB->moveAfter(--MBB->getParent()->end());
         MadeChange = true;
-        return;
+        return MadeChange;
       }
     }
   }
+
+  return MadeChange;
 }
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
new file mode 100644
index 000000000000..9763e3339a20
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,84 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <vector>
+
+namespace llvm {
+  class MachineFunction;
+  class MachineModuleInfo;
+  class RegScavenger;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+
+  class BranchFolder {
+  public:
+    explicit BranchFolder(bool defaultEnableTailMerge);
+
+    bool OptimizeFunction(MachineFunction &MF,
+                          const TargetInstrInfo *tii,
+                          const TargetRegisterInfo *tri,
+                          MachineModuleInfo *mmi);
+  private:
+    typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+    std::vector<MergePotentialsElt> MergePotentials;
+
+    typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+    std::vector<SameTailElt> SameTails;
+
+    bool EnableTailMerge;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineModuleInfo *MMI;
+    RegScavenger *RS;
+
+    bool TailMergeBlocks(MachineFunction &MF);
+    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+                        MachineBasicBlock* PredBB);
+    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                 MachineBasicBlock *NewDest);
+    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+                                  MachineBasicBlock::iterator BBI1);
+    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+    void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+                                                MachineBasicBlock* PredBB);
+    unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                       unsigned maxCommonTailLength);
+
+    bool OptimizeBranches(MachineFunction &MF);
+    bool OptimizeBlock(MachineBasicBlock *MBB);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+    
+    bool CanFallThrough(MachineBasicBlock *CurBB);
+    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
+                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond);
+  };
+
+
+  /// BranchFolderPass - Wrap branch folder in a machine function pass.
+  class BranchFolderPass : public MachineFunctionPass,
+                           public BranchFolder {
+  public:
+    static char ID;
+    explicit BranchFolderPass(bool defaultEnableTailMerge)
+      :  MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+  };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 48f17d0d04c1..713c30c7d4ab 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMCodeGen
   DwarfEHPrepare.cpp
   ELFCodeEmitter.cpp
   ELFWriter.cpp
+  ExactHazardRecognizer.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCStrategy.cpp
@@ -12,7 +13,6 @@ add_llvm_library(LLVMCodeGen
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
-  LazyLiveness.cpp
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
   LiveStackAnalysis.cpp
@@ -23,27 +23,28 @@ add_llvm_library(LLVMCodeGen
   MachineBasicBlock.cpp
   MachineDominators.cpp
   MachineFunction.cpp
+  MachineFunctionAnalysis.cpp
+  MachineFunctionPass.cpp
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
   MachineModuleInfo.cpp
+  MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
   MachineSink.cpp
   MachineVerifier.cpp
+  ObjectCodeEmitter.cpp
   OcamlGC.cpp
-  PBQP.cpp
   PHIElimination.cpp
   Passes.cpp
   PostRASchedulerList.cpp
   PreAllocSplitting.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
-  RegAllocBigBlock.cpp
   RegAllocLinearScan.cpp
   RegAllocLocal.cpp
   RegAllocPBQP.cpp
-  RegAllocSimple.cpp
   RegisterCoalescer.cpp
   RegisterScavenging.cpp
   ScheduleDAG.cpp
@@ -53,6 +54,7 @@ add_llvm_library(LLVMCodeGen
   ShadowStackGC.cpp
   ShrinkWrapping.cpp
   SimpleRegisterCoalescing.cpp
+  SjLjEHPrepare.cpp
   Spiller.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 383098e11efd..932fae4f316c 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -95,11 +95,11 @@ FunctionPass *llvm::createCodePlacementOptPass() {
 ///       ...
 ///       jmp B
 ///
-///       C:  --> new loop header
+///       C:
 ///       ...
 ///       <fallthough to B>
 ///       
-///       B:
+///       B:  --> loop header
 ///       ...
 ///       jcc <cond> C, [exit]
 ///
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 4832a5ee9ae0..078ed3d31b1c 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
@@ -36,7 +37,7 @@ namespace {
     DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
 
   private:
-    bool isDead(MachineInstr *MI) const;
+    bool isDead(const MachineInstr *MI) const;
   };
 }
 char DeadMachineInstructionElim::ID = 0;
@@ -49,10 +50,10 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() {
   return new DeadMachineInstructionElim();
 }
 
-bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const {
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
   // Don't delete instructions with side effects.
   bool SawStore = false;
-  if (!MI->isSafeToMove(TII, SawStore))
+  if (!MI->isSafeToMove(TII, SawStore, 0))
     return false;
 
   // Examine each operand.
@@ -110,7 +111,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
 
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
-        DOUT << "DeadMachineInstructionElim: DELETING: " << *MI;
+        DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI);
         AnyChanges = true;
         MI->eraseFromParent();
         MIE = MBB->rend();
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 720e3d19b759..72b3f92d326e 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -107,7 +107,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
 
 /// NormalizeLandingPads - Normalize and discover landing pads, noting them
 /// in the LandingPads set.  A landing pad is normal if the only CFG edges
-/// that end at it are unwind edges from invoke instructions.
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
 /// Abnormal landing pads are fixed up by redirecting all unwind edges to
 /// a new basic block which falls through to the original.
 bool DwarfEHPrepare::NormalizeLandingPads() {
@@ -132,6 +134,7 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
         break;
       }
     }
+
     if (OnlyUnwoundTo) {
       // Only unwind edges lead to the landing pad.  Remember the landing pad.
       LandingPads.insert(LPad);
@@ -142,7 +145,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
     // edges to a new basic block which falls through into this one.
 
     // Create the new basic block.
-    BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge");
+    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+                                           LPad->getName() + "_unwind_edge");
 
     // Insert it into the function right before the original landing pad.
     LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
@@ -218,28 +222,43 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
 /// at runtime if there is no such exception: using unwind to throw a new
 /// exception is currently not supported.
 bool DwarfEHPrepare::LowerUnwinds() {
-  bool Changed = false;
+  SmallVector<TerminatorInst*, 16> UnwindInsts;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
     TerminatorInst *TI = I->getTerminator();
-    if (!isa<UnwindInst>(TI))
-      continue;
+    if (isa<UnwindInst>(TI))
+      UnwindInsts.push_back(TI);
+  }
+
+  if (UnwindInsts.empty()) return false;
+
+  // Find the rewind function if we didn't already.
+  if (!RewindFunction) {
+    LLVMContext &Ctx = UnwindInsts[0]->getContext();
+    std::vector<const Type*>
+      Params(1, Type::getInt8PtrTy(Ctx));
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+                                          Params, false);
+    const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+  }
+
+  bool Changed = false;
+
+  for (SmallVectorImpl<TerminatorInst*>::iterator
+         I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) {
+    TerminatorInst *TI = *I;
 
     // Replace the unwind instruction with a call to _Unwind_Resume (or the
     // appropriate target equivalent) followed by an UnreachableInst.
 
-    // Find the rewind function if we didn't already.
-    if (!RewindFunction) {
-      std::vector<const Type*> Params(1, PointerType::getUnqual(Type::Int8Ty));
-      FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false);
-      const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
-      RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
-    }
-
     // Create the call...
-    CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI);
+    CallInst *CI = CallInst::Create(RewindFunction,
+                                    CreateReadOfExceptionValue(TI->getParent()),
+                                    "", TI);
+    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
     // ...followed by an UnreachableInst.
-    new UnreachableInst(TI);
+    new UnreachableInst(TI->getContext(), TI);
 
     // Nuke the unwind instruction.
     TI->eraseFromParent();
@@ -314,7 +333,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() {
   if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
     // Turn the exception temporary into registers and phi nodes if possible.
     std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
-    PromoteMemToReg(Allocas, *DT, *DF);
+    PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext());
     return true;
   }
   return false;
@@ -354,8 +373,8 @@ Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
 
   // Create the temporary if we didn't already.
   if (!ExceptionValueVar) {
-    ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty),
-                                       "eh.value", F->begin()->begin());
+    ExceptionValueVar = new AllocaInst(PointerType::getUnqual(
+           Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin());
     ++NumStackTempsIntroduced;
   }
 
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index 7e983a4d0512..b466e89cb261 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -52,6 +52,159 @@ namespace llvm {
     EV_CURRENT = 1
   };
 
+  /// ELFSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct ELFSym {
+
+    // ELF symbols are related to llvm ones by being one of the two llvm
+    // types, for the other ones (section, file, func) a null pointer is
+    // assumed by default.
+    union {
+      const GlobalValue *GV;  // If this is a pointer to a GV
+      const char *Ext;        // If this is a pointer to a named symbol
+    } Source;
+
+    // Describes from which source type this ELF symbol comes from,
+    // they can be GlobalValue, ExternalSymbol or neither.
+    enum {
+      isGV,      // The Source.GV field is valid.
+      isExtSym,  // The Source.ExtSym field is valid.
+      isOther    // Not a GlobalValue or External Symbol
+    };
+    unsigned SourceType;
+
+    bool isGlobalValue() const { return SourceType == isGV; }
+    bool isExternalSym() const { return SourceType == isExtSym; }
+
+    // getGlobalValue - If this is a global value which originated the
+    // elf symbol, return a reference to it.
+    const GlobalValue *getGlobalValue() const {
+      assert(SourceType == isGV && "This is not a global value");
+      return Source.GV;
+    };
+
+    // getExternalSym - If this is an external symbol which originated the
+    // elf symbol, return a reference to it.
+    const char *getExternalSymbol() const {
+      assert(SourceType == isExtSym && "This is not an external symbol");
+      return Source.Ext;
+    };
+
+    // getGV - From a global value return a elf symbol to represent it
+    static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
+                         unsigned Type, unsigned Visibility) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(Type);
+      Sym->setVisibility(Visibility);
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // getExtSym - Create and return an elf symbol to represent an
+    // external symbol
+    static ELFSym *getExtSym(const char *Ext) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.Ext = Ext;
+      Sym->setBind(STB_GLOBAL);
+      Sym->setType(STT_NOTYPE);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SourceType = isExtSym;
+      return Sym;
+    }
+
+    // getSectionSym - Returns a elf symbol to represent an elf section
+    static ELFSym *getSectionSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(STB_LOCAL);
+      Sym->setType(STT_SECTION);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getFileSym - Returns a elf symbol to represent the module identifier
+    static ELFSym *getFileSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(STB_LOCAL);
+      Sym->setType(STT_FILE);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SectionIdx = 0xfff1;  // ELFSection::SHN_ABS;
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getUndefGV - Returns a STT_NOTYPE symbol
+    static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(STT_NOTYPE);
+      Sym->setVisibility(STV_DEFAULT);
+      Sym->SectionIdx = 0;  //ELFSection::SHN_UNDEF;
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // ELF specific fields
+    unsigned NameIdx;         // Index in .strtab of name, once emitted.
+    uint64_t Value;
+    unsigned Size;
+    uint8_t Info;
+    uint8_t Other;
+    unsigned short SectionIdx;
+
+    // Symbol index into the Symbol table
+    unsigned SymTabIdx;
+
+    enum {
+      STB_LOCAL = 0,  // Local sym, not visible outside obj file containing def
+      STB_GLOBAL = 1, // Global sym, visible to all object files being combined
+      STB_WEAK = 2    // Weak symbol, like global but lower-precedence
+    };
+
+    enum {
+      STT_NOTYPE = 0,  // Symbol's type is not specified
+      STT_OBJECT = 1,  // Symbol is a data object (variable, array, etc.)
+      STT_FUNC = 2,    // Symbol is executable code (function, etc.)
+      STT_SECTION = 3, // Symbol refers to a section
+      STT_FILE = 4     // Local, absolute symbol that refers to a file
+    };
+
+    enum {
+      STV_DEFAULT = 0,  // Visibility is specified by binding type
+      STV_INTERNAL = 1, // Defined by processor supplements
+      STV_HIDDEN = 2,   // Not visible to other components
+      STV_PROTECTED = 3 // Visible in other components but not preemptable
+    };
+
+    ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
+               Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0),
+               SymTabIdx(0) {}
+
+    unsigned getBind() const { return (Info >> 4) & 0xf; }
+    unsigned getType() const { return Info & 0xf; }
+    bool isLocalBind() const { return getBind() == STB_LOCAL; }
+    bool isFileType() const { return getType() == STT_FILE; }
+
+    void setBind(unsigned X) {
+      assert(X == (X & 0xF) && "Bind value out of range!");
+      Info = (Info & 0x0F) | (X << 4);
+    }
+
+    void setType(unsigned X) {
+      assert(X == (X & 0xF) && "Type value out of range!");
+      Info = (Info & 0xF0) | X;
+    }
+
+    void setVisibility(unsigned V) {
+      assert(V == (V & 0x3) && "Visibility value out of range!");
+      Other = V;
+    }
+  };
+
   /// ELFSection - This struct contains information about each section that is
   /// emitted to the file.  This is eventually turned into the section header
   /// table at the end of the file.
@@ -117,78 +270,19 @@ namespace llvm {
     /// SectionIdx - The number of the section in the Section Table.
     unsigned short SectionIdx;
 
-    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
-      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
-        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {}
-  };
-
-  /// ELFSym - This struct contains information about each symbol that is
-  /// added to logical symbol table for the module.  This is eventually
-  /// turned into a real symbol table in the file.
-  struct ELFSym {
-    // The global value this corresponds to. Global symbols can be on of the 
-    // 3 types : if this symbol has a zero initializer, it is common or should
-    // be placed in bss section otherwise it's a constant.
-    const GlobalValue *GV;
-    bool IsCommon;
-    bool IsBss;
-    bool IsConstant;
-
-    // ELF specific fields
-    unsigned NameIdx;         // Index in .strtab of name, once emitted.
-    uint64_t Value;
-    unsigned Size;
-    uint8_t Info;
-    uint8_t Other;
-    unsigned short SectionIdx;
+    /// Sym - The symbol to represent this section if it has one.
+    ELFSym *Sym;
 
-    // Symbol index into the Symbol table
-    unsigned SymTabIdx;
-
-    enum { 
-      STB_LOCAL = 0,
-      STB_GLOBAL = 1,
-      STB_WEAK = 2 
-    };
-
-    enum { 
-      STT_NOTYPE = 0,
-      STT_OBJECT = 1,
-      STT_FUNC = 2,
-      STT_SECTION = 3,
-      STT_FILE = 4 
-    };
-
-    enum {
-      STV_DEFAULT = 0,  // Visibility is specified by binding type
-      STV_INTERNAL = 1, // Defined by processor supplements
-      STV_HIDDEN = 2,   // Not visible to other components
-      STV_PROTECTED = 3 // Visible in other components but not preemptable
-    };
-
-    ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false),
-                                    IsConstant(false), NameIdx(0), Value(0),
-                                    Size(0), Info(0), Other(STV_DEFAULT),
-                                    SectionIdx(ELFSection::SHN_UNDEF),
-                                    SymTabIdx(0) {}
-
-    unsigned getBind() { return (Info >> 4) & 0xf; }
-    unsigned getType() { return Info & 0xf; }
-
-    void setBind(unsigned X) {
-      assert(X == (X & 0xF) && "Bind value out of range!");
-      Info = (Info & 0x0F) | (X << 4);
+    /// getSymIndex - Returns the symbol table index of the symbol
+    /// representing this section.
+    unsigned getSymbolTableIndex() const {
+      assert(Sym && "section not present in the symbol table");
+      return Sym->SymTabIdx;
     }
 
-    void setType(unsigned X) {
-      assert(X == (X & 0xF) && "Type value out of range!");
-      Info = (Info & 0xF0) | X;
-    }
-
-    void setVisibility(unsigned V) {
-      assert(V == (V & 0x3) && "Type value out of range!");
-      Other = V;
-    }
+    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
   };
 
   /// ELFRelocation - This class contains all the information necessary to
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 691f19408d47..a6429f70001a 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -17,12 +17,16 @@
 #include "llvm/Function.h"
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 //===----------------------------------------------------------------------===//
 //                       ELFCodeEmitter Implementation
@@ -33,84 +37,75 @@ namespace llvm {
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  // Get the ELF Section that this function belongs in.
-  ES = &EW.getTextSection();
+  DEBUG(errs() << "processing function: "
+        << MF.getFunction()->getName() << "\n");
 
-  DOUT << "processing function: " << MF.getFunction()->getName() << "\n";
+  // Get the ELF Section that this function belongs in.
+  ES = &EW.getTextSection(MF.getFunction());
 
-  // FIXME: better memory management, this will be replaced by BinaryObjects
-  BinaryData &BD = ES->getData();
-  BD.reserve(4096);
-  BufferBegin = &BD[0];
-  BufferEnd = BufferBegin + BD.capacity();
+  // Set the desired binary object to be used by the code emitters
+  setBinaryObject(ES);
 
   // Get the function alignment in bytes
   unsigned Align = (1 << MF.getAlignment());
 
-  // Align the section size with the function alignment, so the function can
-  // start in a aligned offset, also update the section alignment if needed.
-  if (ES->Align < Align) ES->Align = Align;
-  ES->Size = (ES->Size + (Align-1)) & (-Align);
-
-  // Snaity check on allocated space for text section
-  assert( ES->Size < 4096 && "no more space in TextSection" );
-
-  // FIXME: Using ES->Size directly here instead of calculating it from the
-  // output buffer size (impossible because the code emitter deals only in raw
-  // bytes) forces us to manually synchronize size and write padding zero bytes
-  // to the output buffer for all non-text sections.  For text sections, we do
-  // not synchonize the output buffer, and we just blow up if anyone tries to
-  // write non-code to it.  An assert should probably be added to
-  // AddSymbolToSection to prevent calling it on the text section.
-  CurBufferPtr = BufferBegin + ES->Size;
-
-  // Record function start address relative to BufferBegin
-  FnStartPtr = CurBufferPtr;
+  // The function must start on its required alignment
+  ES->emitAlignment(Align);
+
+  // Update the section alignment if needed.
+  ES->Align = std::max(ES->Align, Align);
+
+  // Record the function start offset
+  FnStartOff = ES->getCurrentPCOffset();
+
+  // Emit constant pool and jump tables to their appropriate sections.
+  // They need to be emitted before the function because in some targets
+  // the later may reference JT or CP entry address.
+  emitConstantPool(MF.getConstantPool());
+  emitJumpTables(MF.getJumpTableInfo());
 }
 
 /// finishFunction - This callback is invoked after the function is completely
 /// finished.
 bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
-  // Update Section Size
-  ES->Size = CurBufferPtr - BufferBegin;
-
   // Add a symbol to represent the function.
   const Function *F = MF.getFunction();
-  ELFSym FnSym(F);
-  FnSym.setType(ELFSym::STT_FUNC);
-  FnSym.setBind(EW.getGlobalELFLinkage(F));
-  FnSym.setVisibility(EW.getGlobalELFVisibility(F));
-  FnSym.SectionIdx = ES->SectionIdx;
-  FnSym.Size = CurBufferPtr-FnStartPtr;
+  ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC,
+                                EW.getGlobalELFVisibility(F));
+  FnSym->SectionIdx = ES->SectionIdx;
+  FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+  EW.AddPendingGlobalSymbol(F, true);
 
   // Offset from start of Section
-  FnSym.Value = FnStartPtr-BufferBegin;
-
-  // Locals should go on the symbol list front
-  if (!F->hasPrivateLinkage()) {
-    if (FnSym.getBind() == ELFSym::STB_LOCAL)
-      EW.SymbolList.push_front(FnSym);
-    else
-      EW.SymbolList.push_back(FnSym);
+  FnSym->Value = FnStartOff;
+
+  if (!F->hasPrivateLinkage())
+    EW.SymbolList.push_back(FnSym);
+
+  // Patch up Jump Table Section relocations to use the real MBBs offsets
+  // now that the MBB label offsets inside the function are known.
+  if (!MF.getJumpTableInfo()->isEmpty()) {
+    ELFSection &JTSection = EW.getJumpTableSection();
+    for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+         MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+      MachineRelocation &MR = *MRI;
+      unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setResultPointer((void*)MBBOffset);
+      MR.setConstantVal(ES->SectionIdx);
+      JTSection.addRelocation(MR);
+    }
   }
 
-  // Emit constant pool to appropriate section(s)
-  emitConstantPool(MF.getConstantPool());
-
-  // Emit jump tables to appropriate section
-  emitJumpTables(MF.getJumpTableInfo());
-
-  // Relocations
-  // -----------
   // If we have emitted any relocations to function-specific objects such as
   // basic blocks, constant pools entries, or jump tables, record their
-  // addresses now so that we can rewrite them with the correct addresses
-  // later.
+  // addresses now so that we can rewrite them with the correct addresses later
   for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
     MachineRelocation &MR = Relocations[i];
     intptr_t Addr;
     if (MR.isGlobalValue()) {
-      EW.PendingGlobals.insert(MR.getGlobalValue());
+      EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+    } else if (MR.isExternalSymbol()) {
+      EW.AddPendingExternalSymbol(MR.getExternalSymbol());
     } else if (MR.isBasicBlock()) {
       Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
       MR.setConstantVal(ES->SectionIdx);
@@ -120,16 +115,18 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
       MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
       MR.setResultPointer((void*)Addr);
     } else if (MR.isJumpTableIndex()) {
+      ELFSection &JTSection = EW.getJumpTableSection();
       Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+      MR.setConstantVal(JTSection.SectionIdx);
       MR.setResultPointer((void*)Addr);
-      MR.setConstantVal(JumpTableSectionIdx);
     } else {
-      assert(0 && "Unhandled relocation type");
+      llvm_unreachable("Unhandled relocation type");
     }
     ES->addRelocation(MR);
   }
 
   // Clear per-function data structures.
+  JTRelocations.clear();
   Relocations.clear();
   CPLocations.clear();
   CPSections.clear();
@@ -148,25 +145,19 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
   assert(TM.getRelocationModel() != Reloc::PIC_ &&
          "PIC codegen not yet handled for elf constant pools!");
 
-  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
   for (unsigned i = 0, e = CP.size(); i != e; ++i) {
     MachineConstantPoolEntry CPE = CP[i];
 
-    // Get the right ELF Section for this constant pool entry
-    std::string CstPoolName =
-      TAI->SelectSectionForMachineConst(CPE.getType())->getName();
-    ELFSection &CstPoolSection =
-      EW.getConstantPoolSection(CstPoolName, CPE.getAlignment());
-
     // Record the constant pool location and the section index
-    CPLocations.push_back(CstPoolSection.size());
-    CPSections.push_back(CstPoolSection.SectionIdx);
+    ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+    CPLocations.push_back(CstPool.size());
+    CPSections.push_back(CstPool.SectionIdx);
 
     if (CPE.isMachineConstantPoolEntry())
       assert("CPE.isMachineConstantPoolEntry not supported yet");
 
     // Emit the constant to constant pool section
-    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPoolSection);
+    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
   }
 }
 
@@ -180,44 +171,32 @@ void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
   assert(TM.getRelocationModel() != Reloc::PIC_ &&
          "PIC codegen not yet handled for elf jump tables!");
 
-  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+  const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+  unsigned EntrySize = MJTI->getEntrySize();
 
   // Get the ELF Section to emit the jump table
-  unsigned Align = TM.getTargetData()->getPointerABIAlignment();
-  std::string JTName(TAI->getJumpTableDataSection());
-  ELFSection &JTSection = EW.getJumpTableSection(JTName, Align);
-  JumpTableSectionIdx = JTSection.SectionIdx;
-
-  // Entries in the JT Section are relocated against the text section
-  ELFSection &TextSection = EW.getTextSection();
+  ELFSection &JTSection = EW.getJumpTableSection();
 
   // For each JT, record its offset from the start of the section
   for (unsigned i = 0, e = JT.size(); i != e; ++i) {
     const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
 
-    DOUT << "JTSection.size(): " << JTSection.size() << "\n";
-    DOUT << "JTLocations.size: " << JTLocations.size() << "\n";
-
     // Record JT 'i' offset in the JT section
     JTLocations.push_back(JTSection.size());
 
     // Each MBB entry in the Jump table section has a relocation entry
     // against the current text section.
     for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+      unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
       MachineRelocation MR =
-        MachineRelocation::getBB(JTSection.size(),
-                                 MachineRelocation::VANILLA,
-                                 MBBs[mi]);
-
-      // Offset of JT 'i' in JT section
-      MR.setResultPointer((void*)getMachineBasicBlockAddress(MBBs[mi]));
-      MR.setConstantVal(TextSection.SectionIdx);
+        MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
 
       // Add the relocation to the Jump Table section
-      JTSection.addRelocation(MR);
+      JTRelocations.push_back(MR);
 
       // Output placeholder for MBB in the JT section
-      JTSection.emitWord(0);
+      for (unsigned s=0; s < EntrySize; ++s)
+        JTSection.emitByte(0);
     }
   }
 }
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index 982aebf8fcc0..b5e9c844ec69 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -10,7 +10,7 @@
 #ifndef ELFCODEEMITTER_H
 #define ELFCODEEMITTER_H
 
-#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include <vector>
 
 namespace llvm {
@@ -19,7 +19,7 @@ namespace llvm {
 
   /// ELFCodeEmitter - This class is used by the ELFWriter to 
   /// emit the code for functions to the ELF file.
-  class ELFCodeEmitter : public MachineCodeEmitter {
+  class ELFCodeEmitter : public ObjectCodeEmitter {
     ELFWriter &EW;
 
     /// Target machine description
@@ -28,102 +28,48 @@ namespace llvm {
     /// Section containing code for functions
     ELFSection *ES;
 
-    /// Relocations - These are the relocations that the function needs, as
-    /// emitted.
+    /// Relocations - Record relocations needed by the current function 
     std::vector<MachineRelocation> Relocations;
 
-    /// CPLocations - This is a map of constant pool indices to offsets from the
-    /// start of the section for that constant pool index.
-    std::vector<uintptr_t> CPLocations;
+    /// JTRelocations - Record relocations needed by the relocation
+    /// section.
+    std::vector<MachineRelocation> JTRelocations;
 
-    /// CPSections - This is a map of constant pool indices to the MachOSection
-    /// containing the constant pool entry for that index.
-    std::vector<unsigned> CPSections;
-
-    /// JTLocations - This is a map of jump table indices to offsets from the
-    /// start of the section for that jump table index.
-    std::vector<uintptr_t> JTLocations;
-
-    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
-    /// It is filled in by the StartMachineBasicBlock callback and queried by
-    /// the getMachineBasicBlockAddress callback.
-    std::vector<uintptr_t> MBBLocations;
-
-    /// FnStartPtr - Pointer to the start location of the current function
-    /// in the buffer
-    uint8_t *FnStartPtr;
-
-    /// JumpTableSectionIdx - Holds the index of the Jump Table Section 
-    unsigned JumpTableSectionIdx;
+    /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+    uintptr_t FnStartOff;
   public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM),
-                                             JumpTableSectionIdx(0) {}
-
-    void startFunction(MachineFunction &F);
-    bool finishFunction(MachineFunction &F);
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
 
+    /// addRelocation - Register new relocations for this function
     void addRelocation(const MachineRelocation &MR) {
       Relocations.push_back(MR);
     }
 
-    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
-      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-        MBBLocations.resize((MBB->getNumber()+1)*2);
-      MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-    }
+    /// emitConstantPool - For each constant pool entry, figure out which
+    /// section the constant should live in and emit data to it
+    void emitConstantPool(MachineConstantPool *MCP);
 
-    virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-      assert(CPLocations.size() > Index && "CP not emitted!");
-      return CPLocations[Index];
-    }
+    /// emitJumpTables - Emit all the jump tables for a given jump table
+    /// info and record them to the appropriate section.
+    void emitJumpTables(MachineJumpTableInfo *MJTI);
 
-    virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-      assert(JTLocations.size() > Index && "JT not emitted!");
-      return JTLocations[Index];
-    }
+    void startFunction(MachineFunction &F);
+    bool finishFunction(MachineFunction &F);
 
-    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
-             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-      return MBBLocations[MBB->getNumber()];
+    /// emitLabel - Emits a label
+    virtual void emitLabel(uint64_t LabelID) {
+      assert("emitLabel not implemented");
     }
 
+    /// getLabelAddress - Return the address of the specified LabelID, 
+    /// only usable after the LabelID has been emitted.
     virtual uintptr_t getLabelAddress(uint64_t Label) const {
-      assert(0 && "Label address not implementated yet!");
-      abort();
+      assert("getLabelAddress not implemented");
       return 0;
     }
 
-    virtual void emitLabel(uint64_t LabelID) {
-      assert(0 && "emit Label not implementated yet!");
-      abort();
-    }
-
-    /// emitConstantPool - For each constant pool entry, figure out which section
-    /// the constant should live in and emit the constant.
-    void emitConstantPool(MachineConstantPool *MCP);
-
-    /// emitJumpTables - Emit all the jump tables for a given jump table info
-    /// record to the appropriate section.
-    void emitJumpTables(MachineJumpTableInfo *MJTI);
-
     virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
 
-    /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
-    void startGVStub(const GlobalValue* F, unsigned StubSize,
-                     unsigned Alignment = 1) {
-      assert(0 && "JIT specific function called!");
-      abort();
-    }
-    void startGVStub(const GlobalValue* F,  void *Buffer, unsigned StubSize) {
-      assert(0 && "JIT specific function called!");
-      abort();
-    }
-    void *finishGVStub(const GlobalValue *F) {
-      assert(0 && "JIT specific function called!");
-      abort();
-      return 0;
-    }
 };  // end class ELFCodeEmitter
 
 } // end namespace llvm
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 9e915245525a..3e1ee11b2166 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -29,7 +29,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "elfwriter"
-
 #include "ELF.h"
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
@@ -40,26 +39,33 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
+
 using namespace llvm;
 
 char ELFWriter::ID = 0;
-/// AddELFWriter - Concrete function to add the ELF writer to the function pass
-/// manager.
-MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
-                                       raw_ostream &O,
-                                       TargetMachine &TM) {
+
+/// AddELFWriter - Add the ELF writer to the function pass manager
+ObjectCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
+                                      raw_ostream &O,
+                                      TargetMachine &TM) {
   ELFWriter *EW = new ELFWriter(O, TM);
   PM.add(EW);
-  return &EW->getMachineCodeEmitter();
+  return EW->getObjectCodeEmitter();
 }
 
 //===----------------------------------------------------------------------===//
@@ -68,27 +74,51 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(&ID), O(o), TM(tm),
+    OutContext(*new MCContext()),
+    TLOF(TM.getTargetLowering()->getObjFileLowering()),
     is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
     isLittleEndian(TM.getTargetData()->isLittleEndian()),
     ElfHdr(isLittleEndian, is64Bit) {
 
-  TAI = TM.getTargetAsmInfo();
+  MAI = TM.getMCAsmInfo();
   TEW = TM.getELFWriterInfo();
 
-  // Create the machine code emitter object for this target.
-  MCE = new ELFCodeEmitter(*this);
+  // Create the object code emitter object for this target.
+  ElfCE = new ELFCodeEmitter(*this);
 
   // Inital number of sections
   NumSections = 0;
 }
 
 ELFWriter::~ELFWriter() {
-  delete MCE;
+  delete ElfCE;
+  delete &OutContext;
+
+  while(!SymbolList.empty()) {
+    delete SymbolList.back(); 
+    SymbolList.pop_back();
+  }
+
+  while(!PrivateSyms.empty()) {
+    delete PrivateSyms.back(); 
+    PrivateSyms.pop_back();
+  }
+
+  while(!SectionList.empty()) {
+    delete SectionList.back(); 
+    SectionList.pop_back();
+  }
+
+  // Release the name mangler object.
+  delete Mang; Mang = 0;
 }
 
 // doInitialization - Emit the file header and all of the global variables for
 // the module to the ELF file.
 bool ELFWriter::doInitialization(Module &M) {
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+  
   Mang = new Mangler(M);
 
   // ELF Header
@@ -138,13 +168,115 @@ bool ELFWriter::doInitialization(Module &M) {
   // Add the null section, which is required to be first in the file.
   getNullSection();
 
+  // The first entry in the symtab is the null symbol and the second
+  // is a local symbol containing the module/file name
+  SymbolList.push_back(new ELFSym());
+  SymbolList.push_back(ELFSym::getFileSym());
+
   return false;
 }
 
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                       bool AddToLookup /* = false */) {
+  PendingGlobals.insert(GV);
+  if (AddToLookup) 
+    GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+  PendingExternals.insert(External);
+  ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+  const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+  return getSection(Data->getSectionName(), Data->getType(), 
+                    Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+  const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+  return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+  const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+  return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); 
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+  const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+  return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(Function *F) {
+  const MCSectionELF *Text = 
+    (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+  return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when 
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+  const MCSectionELF *JT = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+  return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+                    TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine 
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+  SectionKind Kind;
+  switch (CPE.getRelocationInfo()) {
+  default: llvm_unreachable("Unknown section kind");
+  case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+  case 1:
+    Kind = SectionKind::getReadOnlyWithRelLocal();
+    break;
+  case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16(); break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+  }
+
+  const MCSectionELF *CPSect = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+  return getSection(CPSect->getSectionName(), CPSect->getType(), 
+                    CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA' 
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+  unsigned SectionType = TEW->hasRelocationAddend() ?
+                ELFSection::SHT_RELA : ELFSection::SHT_REL;
+
+  std::string SectionName(".rel");
+  if (TEW->hasRelocationAddend())
+    SectionName.append("a");
+  SectionName.append(S.getName());
+
+  return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
 unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
   switch (GV->getVisibility()) {
   default:
-    assert(0 && "unknown visibility type");
+    llvm_unreachable("unknown visibility type");
   case GlobalValue::DefaultVisibility:
     return ELFSym::STV_DEFAULT;
   case GlobalValue::HiddenVisibility:
@@ -152,134 +284,132 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
   case GlobalValue::ProtectedVisibility:
     return ELFSym::STV_PROTECTED;
   }
-
   return 0;
 }
 
-unsigned ELFWriter::getGlobalELFLinkage(const GlobalValue *GV) {
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
   if (GV->hasInternalLinkage())
     return ELFSym::STB_LOCAL;
 
-  if (GV->hasWeakLinkage())
+  if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
     return ELFSym::STB_WEAK;
 
   return ELFSym::STB_GLOBAL;
 }
 
-// getElfSectionFlags - Get the ELF Section Header based on the
-// flags defined in ELFTargetAsmInfo.
-unsigned ELFWriter::getElfSectionFlags(unsigned Flags) {
-  unsigned ElfSectionFlags = ELFSection::SHF_ALLOC;
-
-  if (Flags & SectionFlags::Code)
-    ElfSectionFlags |= ELFSection::SHF_EXECINSTR;
-  if (Flags & SectionFlags::Writeable)
-    ElfSectionFlags |= ELFSection::SHF_WRITE;
-  if (Flags & SectionFlags::Mergeable)
-    ElfSectionFlags |= ELFSection::SHF_MERGE;
-  if (Flags & SectionFlags::TLS)
-    ElfSectionFlags |= ELFSection::SHF_TLS;
-  if (Flags & SectionFlags::Strings)
-    ElfSectionFlags |= ELFSection::SHF_STRINGS;
-
-  return ElfSectionFlags;
-}
-
-// For global symbols without a section, return the Null section as a
-// placeholder
-ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV,
-                                              ELFSym &Sym) {
-  // If this is a declaration, the symbol does not have a section.
-  if (!GV->hasInitializer()) {
-    Sym.SectionIdx = ELFSection::SHN_UNDEF;
-    return getNullSection();
-  }
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+  if (GV->isDeclaration())
+    return ELFSym::STT_NOTYPE;
 
-  // Get the name and flags of the section for the global
-  const Section *S = TAI->SectionForGlobal(GV);
-  unsigned SectionType = ELFSection::SHT_PROGBITS;
-  unsigned SectionFlags = getElfSectionFlags(S->getFlags());
-  DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n";
+  if (isa<Function>(GV))
+    return ELFSym::STT_FUNC;
 
-  const TargetData *TD = TM.getTargetData();
-  unsigned Align = TD->getPreferredAlignment(GV);
-  Constant *CV = GV->getInitializer();
-
-  // If this global has a zero initializer, go to .bss or common section.
-  // Variables are part of the common block if they are zero initialized
-  // and allowed to be merged with other symbols.
-  if (CV->isNullValue() || isa<UndefValue>(CV)) {
-    SectionType = ELFSection::SHT_NOBITS;
-    ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags);
-    if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
-        GV->hasCommonLinkage()) {
-      Sym.SectionIdx = ELFSection::SHN_COMMON;
-      Sym.IsCommon = true;
-      ElfS.Align = 1;
-      return ElfS;
-    }
-    Sym.IsBss = true;
-    Sym.SectionIdx = ElfS.SectionIdx;
-    if (Align) ElfS.Size = (ElfS.Size + Align-1) & ~(Align-1);
-    ElfS.Align = std::max(ElfS.Align, Align);
-    return ElfS;
-  }
-
-  Sym.IsConstant = true;
-  ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags);
-  Sym.SectionIdx = ElfS.SectionIdx;
-  ElfS.Align = std::max(ElfS.Align, Align);
-  return ElfS;
+  return ELFSym::STT_OBJECT;
 }
 
-void ELFWriter::EmitFunctionDeclaration(const Function *F) {
-  ELFSym GblSym(F);
-  GblSym.setBind(ELFSym::STB_GLOBAL);
-  GblSym.setType(ELFSym::STT_NOTYPE);
-  GblSym.setVisibility(ELFSym::STV_DEFAULT);
-  GblSym.SectionIdx = ELFSection::SHN_UNDEF;
-  SymbolList.push_back(GblSym);
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+  return GV->isDeclaration() || (isa<Function>(GV));
 }
 
-void ELFWriter::EmitGlobalVar(const GlobalVariable *GV) {
-  unsigned SymBind = getGlobalELFLinkage(GV);
-  unsigned Align=0, Size=0;
-  ELFSym GblSym(GV);
-  GblSym.setBind(SymBind);
-  GblSym.setVisibility(getGlobalELFVisibility(GV));
-
-  if (GV->hasInitializer()) {
-    GblSym.setType(ELFSym::STT_OBJECT);
-    const TargetData *TD = TM.getTargetData();
-    Align = TD->getPreferredAlignment(GV);
-    Size = TD->getTypeAllocSize(GV->getInitializer()->getType());
-    GblSym.Size = Size;
+// AddToSymbolList - Update the symbol lookup and If the symbol is 
+// private add it to PrivateSyms list, otherwise to SymbolList. 
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+  assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+  const GlobalValue *GV = GblSym->getGlobalValue(); 
+  if (GV->hasPrivateLinkage()) {
+    // For a private symbols, keep track of the index inside 
+    // the private list since it will never go to the symbol 
+    // table and won't be patched up later.
+    PrivateSyms.push_back(GblSym);
+    GblSymLookup[GV] = PrivateSyms.size()-1;
   } else {
-    GblSym.setType(ELFSym::STT_NOTYPE);
+    // Non private symbol are left with zero indices until 
+    // they are patched up during the symbol table emition 
+    // (where the indicies are created).
+    SymbolList.push_back(GblSym);
+    GblSymLookup[GV] = 0;
   }
+}
 
-  ELFSection &GblSection = getGlobalSymELFSection(GV, GblSym);
-
-  if (GblSym.IsCommon) {
-    GblSym.Value = Align;
-  } else if (GblSym.IsBss) {
-    GblSym.Value = GblSection.Size;
-    GblSection.Size += Size;
-  } else if (GblSym.IsConstant){
-    // GblSym.Value should contain the symbol index inside the section,
-    // and all symbols should start on their required alignment boundary
-    GblSym.Value = (GblSection.size() + (Align-1)) & (-Align);
-    GblSection.emitAlignment(Align);
-    EmitGlobalConstant(GV->getInitializer(), GblSection);
-  }
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
 
-  // Local symbols should come first on the symbol table.
-  if (!GV->hasPrivateLinkage()) {
-    if (SymBind == ELFSym::STB_LOCAL)
-      SymbolList.push_front(GblSym);
-    else
-      SymbolList.push_back(GblSym);
+  // Check if the referenced symbol is already emitted
+  if (GblSymLookup.find(GV) != GblSymLookup.end())
+    return;
+
+  // Handle ELF Bind, Visibility and Type for the current symbol
+  unsigned SymBind = getGlobalELFBinding(GV);
+  unsigned SymType = getGlobalELFType(GV);
+  bool IsUndefSym = IsELFUndefSym(GV);
+
+  ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+    : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+  if (!IsUndefSym) {
+    assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+    // Handle special llvm globals
+    if (EmitSpecialLLVMGlobal(GVar))
+      return;
+
+    // Get the ELF section where this global belongs from TLOF
+    const MCSectionELF *S = 
+      (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+    ELFSection &ES = 
+      getSection(S->getSectionName(), S->getType(), S->getFlags());
+    SectionKind Kind = S->getKind();
+
+    // The symbol align should update the section alignment if needed
+    const TargetData *TD = TM.getTargetData();
+    unsigned Align = TD->getPreferredAlignment(GVar);
+    unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+    GblSym->Size = Size;
+
+    if (S->HasCommonSymbols()) { // Symbol must go to a common section
+      GblSym->SectionIdx = ELFSection::SHN_COMMON;
+
+      // A new linkonce section is created for each global in the
+      // common section, the default alignment is 1 and the symbol
+      // value contains its alignment.
+      ES.Align = 1;
+      GblSym->Value = Align;
+
+    } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // Update the size with alignment and the next object can
+      // start in the right offset in the section
+      if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+      ES.Align = std::max(ES.Align, Align);
+
+      // GblSym->Value should contain the virtual offset inside the section.
+      // Virtual because the BSS space is not allocated on ELF objects
+      GblSym->Value = ES.Size;
+      ES.Size += Size;
+
+    } else { // The symbol must go to some kind of data section
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // GblSym->Value should contain the symbol offset inside the section,
+      // and all symbols should start on their required alignment boundary
+      ES.Align = std::max(ES.Align, Align);
+      ES.emitAlignment(Align);
+      GblSym->Value = ES.size();
+
+      // Emit the global to the data section 'ES'
+      EmitGlobalConstant(GVar->getInitializer(), ES);
+    }
   }
+
+  AddToSymbolList(GblSym);
 }
 
 void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
@@ -305,8 +435,7 @@ void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
     // as padding to ensure that the next field starts at the right offset.
-    for (unsigned p=0; p < padSize; p++)
-      GblS.emitByte(0);
+    GblS.emitZeros(padSize);
   }
   assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
          "Layout of constant struct may be incorrect!");
@@ -317,65 +446,242 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
   unsigned Size = TD->getTypeAllocSize(CV->getType());
 
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
-    if (CVA->isString()) {
-      std::string GblStr = CVA->getAsString();
-      GblStr.resize(GblStr.size()-1);
-      GblS.emitString(GblStr);
-    } else { // Not a string.  Print the values in successive locations
-      for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
-        EmitGlobalConstant(CVA->getOperand(i), GblS);
-    }
+    for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstant(CVA->getOperand(i), GblS);
+    return;
+  } else if (isa<ConstantAggregateZero>(CV)) {
+    GblS.emitZeros(Size);
     return;
   } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
     EmitGlobalConstantStruct(CVS, GblS);
     return;
   } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    if (CFP->getType() == Type::DoubleTy)
-      GblS.emitWord64(Val);
-    else if (CFP->getType() == Type::FloatTy)
-      GblS.emitWord32(Val);
-    else if (CFP->getType() == Type::X86_FP80Ty) {
-      assert(0 && "X86_FP80Ty global emission not implemented");
-    } else if (CFP->getType() == Type::PPC_FP128Ty)
-      assert(0 && "PPC_FP128Ty global emission not implemented");
+    APInt Val = CFP->getValueAPF().bitcastToAPInt();
+    if (CFP->getType()->isDoubleTy())
+      GblS.emitWord64(Val.getZExtValue());
+    else if (CFP->getType()->isFloatTy())
+      GblS.emitWord32(Val.getZExtValue());
+    else if (CFP->getType()->isX86_FP80Ty()) {
+      unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+                         TD->getTypeStoreSize(CFP->getType());
+      GblS.emitWordFP80(Val.getRawData(), PadSize);
+    } else if (CFP->getType()->isPPC_FP128Ty())
+      llvm_unreachable("PPC_FP128Ty global emission not implemented");
     return;
   } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (Size == 4)
+    if (Size == 1)
+      GblS.emitByte(CI->getZExtValue());
+    else if (Size == 2) 
+      GblS.emitWord16(CI->getZExtValue());
+    else if (Size == 4)
       GblS.emitWord32(CI->getZExtValue());
-    else if (Size == 8)
-      GblS.emitWord64(CI->getZExtValue());
-    else
-      assert(0 && "LargeInt global emission not implemented");
+    else 
+      EmitGlobalConstantLargeInt(CI, GblS);
     return;
   } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     const VectorType *PTy = CP->getType();
     for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
       EmitGlobalConstant(CP->getOperand(I), GblS);
     return;
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Resolve a constant expression which returns a (Constant, Offset)
+    // pair. If 'Res.first' is a GlobalValue, emit a relocation with 
+    // the offset 'Res.second', otherwise emit a global constant like
+    // it is always done for not contant expression types.
+    CstExprResTy Res = ResolveConstantExpr(CE);
+    const Constant *Op = Res.first;
+
+    if (isa<GlobalValue>(Op))
+      EmitGlobalDataRelocation(cast<const GlobalValue>(Op), 
+                               TD->getTypeAllocSize(Op->getType()), 
+                               GblS, Res.second);
+    else
+      EmitGlobalConstant(Op, GblS);
+
+    return;
+  } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+    // Fill the data entry with zeros or emit a relocation entry
+    if (isa<ConstantPointerNull>(CV))
+      GblS.emitZeros(Size);
+    else 
+      EmitGlobalDataRelocation(cast<const GlobalValue>(CV), 
+                               Size, GblS);
+    return;
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    // This is a constant address for a global variable or function and
+    // therefore must be referenced using a relocation entry.
+    EmitGlobalDataRelocation(GV, Size, GblS);
+    return;
+  }
+
+  std::string msg;
+  raw_string_ostream ErrorMsg(msg);
+  ErrorMsg << "Constant unimp for type: " << *CV->getType();
+  llvm_report_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+  const TargetData *TD = TM.getTargetData();
+  
+  // There ins't constant expression inside others anymore
+  if (!isa<ConstantExpr>(CV))
+    return std::make_pair(CV, 0);
+
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+    return ResolveConstantExpr(CE->getOperand(0));
+  
+  case Instruction::GetElementPtr: {
+    const Constant *ptrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                          idxVec.size());
+    return std::make_pair(ptrVal, Offset);
+  }
+  case Instruction::IntToPtr: {
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return ResolveConstantExpr(Op);
+  }
+  case Instruction::PtrToInt: {
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot greater or equal to the size of the pointer.
+    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+      return ResolveConstantExpr(Op);
+
+    llvm_unreachable("Integer size less then pointer size");
+  }
+  case Instruction::Add:
+  case Instruction::Sub: {
+    // Only handle cases where there's a constant expression with GlobalValue
+    // as first operand and ConstantInt as second, which are the cases we can
+    // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+    // 1)  Instruction::Add  => (global) + CstInt
+    // 2)  Instruction::Sub  => (global) + -CstInt
+    const Constant *Op0 = CE->getOperand(0); 
+    const Constant *Op1 = CE->getOperand(1); 
+    assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+    CstExprResTy Res = ResolveConstantExpr(Op0);
+    assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+    const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+    switch (CE->getOpcode()) {
+    case Instruction::Add: 
+      return std::make_pair(Res.first, RHS.getSExtValue());
+    case Instruction::Sub:
+      return std::make_pair(Res.first, (-RHS).getSExtValue());
+    }
+  }
+  }
+
+  std::string msg(CE->getOpcodeName());
+  raw_string_ostream ErrorMsg(msg);
+  ErrorMsg << ": Unsupported ConstantExpr type";
+  llvm_report_error(ErrorMsg.str());
+
+  return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+                                         ELFSection &GblS, int64_t Offset) {
+  // Create the relocation entry for the global value
+  MachineRelocation MR =
+    MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+                             TEW->getAbsoluteLabelMachineRelTy(),
+                             const_cast<GlobalValue*>(GV),
+                             Offset);
+
+  // Fill the data entry with zeros
+  GblS.emitZeros(Size);
+
+  // Add the relocation entry for the current data section
+  GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, 
+                                           ELFSection &S) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned BitWidth = CI->getBitWidth();
+  assert(isPowerOf2_32(BitWidth) &&
+         "Non-power-of-2-sized integers not handled!");
+
+  const uint64_t *RawData = CI->getValue().getRawData();
+  uint64_t Val = 0;
+  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+    Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+    S.emitWord64(Val);
   }
-  assert(0 && "unknown global constant");
 }
 
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  if (GV->getName() == "llvm.used")
+    llvm_unreachable("not implemented yet");
+
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
+  if (GV->getSection() == "llvm.metadata" ||
+      GV->hasAvailableExternallyLinkage())
+    return true;
+  
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+  
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = TD->getPointerPrefAlignment();
+  if (GV->getName() == "llvm.global_ctors") {
+    ELFSection &Ctor = getCtorSection();
+    Ctor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Ctor);
+    return true;
+  } 
+  
+  if (GV->getName() == "llvm.global_dtors") {
+    ELFSection &Dtor = getDtorSection();
+    Dtor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Dtor);
+    return true;
+  }
+  
+  return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (!isa<ConstantArray>(List)) return;
+  ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      // Emit the function pointer.
+      EmitGlobalConstant(CS->getOperand(1), Xtor);
+    }
+}
 
 bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
-  // Nothing to do here, this is all done through the MCE object above.
+  // Nothing to do here, this is all done through the ElfCE object above.
   return false;
 }
 
 /// doFinalization - Now that the module has been completely processed, emit
 /// the ELF file to 'O'.
 bool ELFWriter::doFinalization(Module &M) {
-  /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the
-  /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data
-  /// vector size for .text sections, so this is a quick dirty fix
-  ELFSection &TS = getTextSection();
-  if (TS.Size) {
-    BinaryData &BD = TS.getData();
-    for (unsigned e=0; e<TS.Size; ++e)
-      BD.push_back(BD[e]);
-  }
-
   // Emit .data section placeholder
   getDataSection();
 
@@ -384,57 +690,34 @@ bool ELFWriter::doFinalization(Module &M) {
 
   // Build and emit data, bss and "common" sections.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    EmitGlobalVar(I);
-    GblSymLookup[I] = 0;
-  }
+       I != E; ++I)
+    EmitGlobal(I);
 
   // Emit all pending globals
-  // TODO: this should be done only for referenced symbols
-  for (SetVector<GlobalValue*>::const_iterator I = PendingGlobals.begin(),
-       E = PendingGlobals.end(); I != E; ++I) {
+  for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+       I != E; ++I)
+    EmitGlobal(*I);
 
-    // No need to emit the symbol again
-    if (GblSymLookup.find(*I) != GblSymLookup.end())
-      continue;
-
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) {
-      EmitGlobalVar(GV);
-    } else if (Function *F = dyn_cast<Function>(*I)) {
-      // If function is not in GblSymLookup, it doesn't have a body,
-      // so emit the symbol as a function declaration (no section associated)
-      EmitFunctionDeclaration(F);
-    } else {
-      assert("unknown howto handle pending global");
-    }
-    GblSymLookup[*I] = 0;
-  }
+  // Emit all pending externals
+  for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+       I != E; ++I)
+    SymbolList.push_back(ELFSym::getExtSym(*I));
 
   // Emit non-executable stack note
-  if (TAI->getNonexecutableStackDirective())
+  if (MAI->getNonexecutableStackDirective())
     getNonExecStackSection();
 
-  // Emit a symbol for each section created until now
-  for (std::map<std::string, ELFSection*>::iterator I = SectionLookup.begin(),
-       E = SectionLookup.end(); I != E; ++I) {
-    ELFSection *ES = I->second;
-
-    // Skip null section
-    if (ES->SectionIdx == 0) continue;
-
-    ELFSym SectionSym(0);
-    SectionSym.SectionIdx = ES->SectionIdx;
-    SectionSym.Size = 0;
-    SectionSym.setBind(ELFSym::STB_LOCAL);
-    SectionSym.setType(ELFSym::STT_SECTION);
-    SectionSym.setVisibility(ELFSym::STV_DEFAULT);
-
-    // Local symbols go in the list front
-    SymbolList.push_front(SectionSym);
+  // Emit a symbol for each section created until now, skip null section
+  for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    ELFSym *SectionSym = ELFSym::getSectionSym();
+    SectionSym->SectionIdx = ES.SectionIdx;
+    SymbolList.push_back(SectionSym);
+    ES.Sym = SymbolList.back();
   }
 
   // Emit string table
-  EmitStringTable();
+  EmitStringTable(M.getModuleIdentifier());
 
   // Emit the symbol table now, if non-empty.
   EmitSymbolTable();
@@ -448,77 +731,106 @@ bool ELFWriter::doFinalization(Module &M) {
   // Dump the sections and section table to the .o file.
   OutputSectionsAndSectionTable();
 
-  // We are done with the abstract symbols.
-  SectionList.clear();
-  NumSections = 0;
-
-  // Release the name mangler object.
-  delete Mang; Mang = 0;
   return false;
 }
 
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+                              int64_t Value, unsigned Size) {
+  if (Size == 32)
+    BO.fixWord32(Value, Offset);
+  else if (Size == 64)
+    BO.fixWord64(Value, Offset);
+  else
+    llvm_unreachable("don't know howto patch relocatable field");
+}
+
 /// EmitRelocations - Emit relocations
 void ELFWriter::EmitRelocations() {
 
+  // True if the target uses the relocation entry to hold the addend,
+  // otherwise the addend is written directly to the relocatable field.
+  bool HasRelA = TEW->hasRelocationAddend();
+
   // Create Relocation sections for each section which needs it.
-  for (std::list<ELFSection>::iterator I = SectionList.begin(),
-       E = SectionList.end(); I != E; ++I) {
+  for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+    ELFSection &S = *SectionList[i];
 
     // This section does not have relocations
-    if (!I->hasRelocations()) continue;
-
-    // Get the relocation section for section 'I'
-    bool HasRelA = TEW->hasRelocationAddend();
-    ELFSection &RelSec = getRelocSection(I->getName(), HasRelA,
-                                         TEW->getPrefELFAlignment());
+    if (!S.hasRelocations()) continue;
+    ELFSection &RelSec = getRelocSection(S);
 
     // 'Link' - Section hdr idx of the associated symbol table
     // 'Info' - Section hdr idx of the section to which the relocation applies
     ELFSection &SymTab = getSymbolTableSection();
     RelSec.Link = SymTab.SectionIdx;
-    RelSec.Info = I->SectionIdx;
+    RelSec.Info = S.SectionIdx;
     RelSec.EntSize = TEW->getRelocationEntrySize();
 
     // Get the relocations from Section
-    std::vector<MachineRelocation> Relos = I->getRelocations();
+    std::vector<MachineRelocation> Relos = S.getRelocations();
     for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
          MRE = Relos.end(); MRI != MRE; ++MRI) {
       MachineRelocation &MR = *MRI;
 
-      // Offset from the start of the section containing the symbol
-      unsigned Offset = MR.getMachineCodeOffset();
+      // Relocatable field offset from the section start
+      unsigned RelOffset = MR.getMachineCodeOffset();
 
       // Symbol index in the symbol table
       unsigned SymIdx = 0;
 
-      // Target specific ELF relocation type
+      // Target specific relocation field type and size
       unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
-
-      // Constant addend used to compute the value to be stored 
-      // into the relocatable field
+      unsigned RelTySize = TEW->getRelocationTySize(RelType);
       int64_t Addend = 0;
 
       // There are several machine relocations types, and each one of
       // them needs a different approach to retrieve the symbol table index.
       if (MR.isGlobalValue()) {
         const GlobalValue *G = MR.getGlobalValue();
+        int64_t GlobalOffset = MR.getConstantVal();
         SymIdx = GblSymLookup[G];
-        Addend = TEW->getAddendForRelTy(RelType);
+        if (G->hasPrivateLinkage()) {
+          // If the target uses a section offset in the relocation:
+          // SymIdx + Addend = section sym for global + section offset
+          unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+          Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+          SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+        } else {
+          Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+        }
+      } else if (MR.isExternalSymbol()) {
+        const char *ExtSym = MR.getExternalSymbol();
+        SymIdx = ExtSymLookup[ExtSym];
+        Addend = TEW->getDefaultAddendForRelTy(RelType);
       } else {
+        // Get the symbol index for the section symbol
         unsigned SectionIdx = MR.getConstantVal();
-        // TODO: use a map for this.
-        for (std::list<ELFSym>::iterator I = SymbolList.begin(),
-             E = SymbolList.end(); I != E; ++I)
-          if ((SectionIdx == I->SectionIdx) &&
-              (I->getType() == ELFSym::STT_SECTION)) {
-            SymIdx = I->SymTabIdx;
-            break;
-          }
-        Addend = (uint64_t)MR.getResultPointer();
+        SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+        // The symbol offset inside the section
+        int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+        // For pc relative relocations where symbols are defined in the same
+        // section they are referenced, ignore the relocation entry and patch
+        // the relocatable field with the symbol offset directly.
+        if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+          int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+          RelocateField(S, RelOffset, Value, RelTySize);
+          continue;
+        }
+
+        Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
       }
 
+      // The target without addend on the relocation symbol must be
+      // patched in the relocation place itself to contain the addend
+      // otherwise write zeros to make sure there is no garbage there
+      RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
       // Get the relocation entry and emit to the relocation section
-      ELFRelocation Rel(Offset, SymIdx, RelType, HasRelA, Addend);
+      ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
       EmitRelocation(RelSec, Rel, HasRelA);
     }
   }
@@ -554,7 +866,7 @@ void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
 
 /// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
 /// Section Header Table
-void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, 
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
                                   const ELFSection &SHdr) {
   SHdrTab.emitWord32(SHdr.NameIdx);
   SHdrTab.emitWord32(SHdr.Type);
@@ -581,27 +893,30 @@ void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
 
 /// EmitStringTable - If the current symbol table is non-empty, emit the string
 /// table for it
-void ELFWriter::EmitStringTable() {
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
   if (!SymbolList.size()) return;  // Empty symbol table.
   ELFSection &StrTab = getStringTableSection();
 
   // Set the zero'th symbol to a null byte, as required.
   StrTab.emitByte(0);
 
-  // Walk on the symbol list and write symbol names into the
-  // string table.
+  // Walk on the symbol list and write symbol names into the string table.
   unsigned Index = 1;
-  for (std::list<ELFSym>::iterator I = SymbolList.begin(),
-       E = SymbolList.end(); I != E; ++I) {
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    ELFSym &Sym = *(*I);
 
-    // Use the name mangler to uniquify the LLVM symbol.
     std::string Name;
-    if (I->GV) Name.append(Mang->getValueName(I->GV));
+    if (Sym.isGlobalValue())
+      Name.append(Mang->getMangledName(Sym.getGlobalValue()));
+    else if (Sym.isExternalSym())
+      Name.append(Sym.getExternalSymbol());
+    else if (Sym.isFileType())
+      Name.append(ModuleName);
 
     if (Name.empty()) {
-      I->NameIdx = 0;
+      Sym.NameIdx = 0;
     } else {
-      I->NameIdx = Index;
+      Sym.NameIdx = Index;
       StrTab.emitString(Name);
 
       // Keep track of the number of bytes emitted to this section.
@@ -612,11 +927,38 @@ void ELFWriter::EmitStringTable() {
   StrTab.Size = Index;
 }
 
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+  unsigned FirstNonLocalSymbol;
+  std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    if ((*I)->isLocalBind())
+      LocalSyms.push_back(*I);
+    else
+      OtherSyms.push_back(*I);
+  }
+  SymbolList.clear();
+  FirstNonLocalSymbol = LocalSyms.size();
+
+  for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+    SymbolList.push_back(LocalSyms[i]);
+
+  for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+    SymbolList.push_back(*I);
+
+  LocalSyms.clear();
+  OtherSyms.clear();
+
+  return FirstNonLocalSymbol;
+}
+
 /// EmitSymbolTable - Emit the symbol table itself.
 void ELFWriter::EmitSymbolTable() {
   if (!SymbolList.size()) return;  // Empty symbol table.
 
-  unsigned FirstNonLocalSymbol = 1;
   // Now that we have emitted the string table and know the offset into the
   // string table of each symbol, emit the symbol table itself.
   ELFSection &SymTab = getSymbolTableSection();
@@ -628,30 +970,27 @@ void ELFWriter::EmitSymbolTable() {
   // Size of each symtab entry.
   SymTab.EntSize = TEW->getSymTabEntrySize();
 
-  // The first entry in the symtab is the null symbol
-  ELFSym NullSym = ELFSym(0);
-  EmitSymbol(SymTab, NullSym);
+  // Reorder the symbol table with local symbols first!
+  unsigned FirstNonLocalSymbol = SortSymbols();
 
-  // Emit all the symbols to the symbol table. Skip the null
-  // symbol, cause it's emitted already
-  unsigned Index = 1;
-  for (std::list<ELFSym>::iterator I = SymbolList.begin(),
-       E = SymbolList.end(); I != E; ++I, ++Index) {
-    // Keep track of the first non-local symbol
-    if (I->getBind() == ELFSym::STB_LOCAL)
-      FirstNonLocalSymbol++;
+  // Emit all the symbols to the symbol table.
+  for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+    ELFSym &Sym = *SymbolList[i];
 
     // Emit symbol to the symbol table
-    EmitSymbol(SymTab, *I);
+    EmitSymbol(SymTab, Sym);
 
-    // Record the symbol table index for each global value
-    if (I->GV)
-      GblSymLookup[I->GV] = Index;
+    // Record the symbol table index for each symbol
+    if (Sym.isGlobalValue())
+      GblSymLookup[Sym.getGlobalValue()] = i;
+    else if (Sym.isExternalSym())
+      ExtSymLookup[Sym.getExternalSymbol()] = i;
 
     // Keep track on the symbol index into the symbol table
-    I->SymTabIdx = Index;
+    Sym.SymTabIdx = i;
   }
 
+  // One greater than the symbol table index of the last local symbol
   SymTab.Info = FirstNonLocalSymbol;
   SymTab.Size = SymTab.size();
 }
@@ -671,15 +1010,15 @@ void ELFWriter::EmitSectionTableStringTable() {
   // the string table.
   unsigned Index = 0;
 
-  for (std::list<ELFSection>::iterator I = SectionList.begin(),
-         E = SectionList.end(); I != E; ++I) {
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
     // Set the index into the table.  Note if we have lots of entries with
     // common suffixes, we could memoize them here if we cared.
-    I->NameIdx = Index;
-    SHStrTab.emitString(I->getName());
+    S.NameIdx = Index;
+    SHStrTab.emitString(S.getName());
 
     // Keep track of the number of bytes emitted to this section.
-    Index += I->getName().size()+1;
+    Index += S.getName().size()+1;
   }
 
   // Set the size of .shstrtab now that we know what it is.
@@ -694,29 +1033,24 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   // Pass #1: Compute the file offset for each section.
   size_t FileOff = ElfHdr.size();   // File header first.
 
-  // Adjust alignment of all section if needed.
-  for (std::list<ELFSection>::iterator I = SectionList.begin(),
-         E = SectionList.end(); I != E; ++I) {
-
-    // Section idx 0 has 0 offset
-    if (!I->SectionIdx)
-      continue;
-
-    if (!I->size()) {
-      I->Offset = FileOff;
+  // Adjust alignment of all section if needed, skip the null section.
+  for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    if (!ES.size()) {
+      ES.Offset = FileOff;
       continue;
     }
 
     // Update Section size
-    if (!I->Size)
-      I->Size = I->size();
+    if (!ES.Size)
+      ES.Size = ES.size();
 
     // Align FileOff to whatever the alignment restrictions of the section are.
-    if (I->Align)
-      FileOff = (FileOff+I->Align-1) & ~(I->Align-1);
+    if (ES.Align)
+      FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
 
-    I->Offset = FileOff;
-    FileOff += I->Size;
+    ES.Offset = FileOff;
+    FileOff += ES.Size;
   }
 
   // Align Section Header.
@@ -740,11 +1074,11 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   BinaryObject SHdrTable(isLittleEndian, is64Bit);
 
   // Emit all of sections to the file and build the section header table.
-  while (!SectionList.empty()) {
-    ELFSection &S = *SectionList.begin();
-    DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
-         << ", Size: " << S.Size << ", Offset: " << S.Offset
-         << ", SectionData Size: " << S.size() << "\n";
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
+    DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+                 << ", Size: " << S.Size << ", Offset: " << S.Offset
+                 << ", SectionData Size: " << S.size() << "\n");
 
     // Align FileOff to whatever the alignment restrictions of the section are.
     if (S.size()) {
@@ -758,7 +1092,6 @@ void ELFWriter::OutputSectionsAndSectionTable() {
     }
 
     EmitSectionHeader(SHdrTable, S);
-    SectionList.pop_front();
   }
 
   // Align output for the section table.
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index bab118c6e356..b61b4848b654 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -16,23 +16,35 @@
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include <list>
 #include <map>
 
 namespace llvm {
   class BinaryObject;
   class Constant;
+  class ConstantInt;
   class ConstantStruct;
   class ELFCodeEmitter;
+  class ELFRelocation;
+  class ELFSection;
+  struct ELFSym;
   class GlobalVariable;
+  class JITDebugRegisterer;
   class Mangler;
   class MachineCodeEmitter;
-  class TargetAsmInfo;
+  class MachineConstantPoolEntry;
+  class ObjectCodeEmitter;
+  class MCAsmInfo;
   class TargetELFWriterInfo;
+  class TargetLoweringObjectFile;
   class raw_ostream;
-  class ELFSection;
-  class ELFSym;
-  class ELFRelocation;
+  class SectionKind;
+  class MCContext;
+
+  typedef std::vector<ELFSym*>::iterator ELFSymIter;
+  typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+  typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+  typedef SetVector<const char *>::const_iterator PendingExtsIter;
+  typedef std::pair<const Constant *, int64_t> CstExprResTy;
 
   /// ELFWriter - This class implements the common target-independent code for
   /// writing ELF files.  Targets should derive a class from this to
@@ -40,18 +52,18 @@ namespace llvm {
   ///
   class ELFWriter : public MachineFunctionPass {
     friend class ELFCodeEmitter;
+    friend class JITDebugRegisterer;
   public:
     static char ID;
 
-    MachineCodeEmitter &getMachineCodeEmitter() const {
-      return *(MachineCodeEmitter*)MCE;
+    /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+    ObjectCodeEmitter *getObjectCodeEmitter() {
+      return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
     }
 
     ELFWriter(raw_ostream &O, TargetMachine &TM);
     ~ELFWriter();
 
-    typedef std::vector<unsigned char> DataBuffer;
-
   protected:
     /// Output stream to send the resultant object file to.
     raw_ostream &O;
@@ -59,6 +71,9 @@ namespace llvm {
     /// Target machine description.
     TargetMachine &TM;
 
+    /// Context object for machine code objects.
+    MCContext &OutContext;
+    
     /// Target Elf Writer description.
     const TargetELFWriterInfo *TEW;
 
@@ -67,11 +82,15 @@ namespace llvm {
 
     /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
     /// code for functions to the .o file.
-    ELFCodeEmitter *MCE;
+    ELFCodeEmitter *ElfCE;
 
-    /// TAI - Target Asm Info, provide information about section names for
+    /// TLOF - Target Lowering Object File, provide section names for globals 
+    /// and other object file specific stuff
+    const TargetLoweringObjectFile &TLOF;
+
+    /// MAI - Target Asm Info, provide information about section names for
     /// globals and other target specific stuff.
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     //===------------------------------------------------------------------===//
     // Properties inferred automatically from the target machine.
@@ -95,59 +114,49 @@ namespace llvm {
     BinaryObject ElfHdr;
 
     /// SectionList - This is the list of sections that we have emitted to the
-    /// file.  Once the file has been completely built, the section header table
+    /// file. Once the file has been completely built, the section header table
     /// is constructed from this info.
-    std::list<ELFSection> SectionList;
+    std::vector<ELFSection*> SectionList;
     unsigned NumSections;   // Always = SectionList.size()
 
     /// SectionLookup - This is a mapping from section name to section number in
-    /// the SectionList.
+    /// the SectionList. Used to quickly gather the Section Index from MAI names
     std::map<std::string, ELFSection*> SectionLookup;
 
+    /// PendingGlobals - Globals not processed as symbols yet.
+    SetVector<const GlobalValue*> PendingGlobals;
+
     /// GblSymLookup - This is a mapping from global value to a symbol index
-    /// in the symbol table. This is useful since relocations symbol references
-    /// must be quickly mapped to a symbol table index
+    /// in the symbol table or private symbols list. This is useful since reloc
+    /// symbol references must be quickly mapped to their indices on the lists.
     std::map<const GlobalValue*, uint32_t> GblSymLookup;
 
-    /// SymbolList - This is the list of symbols emitted to the symbol table
-    /// Local symbols go to the front and Globals to the back.
-    std::list<ELFSym> SymbolList;
-
-    /// PendingGlobals - List of externally defined symbols that we have been
-    /// asked to emit, but have not seen a reference to.  When a reference
-    /// is seen, the symbol will move from this list to the SymbolList.
-    SetVector<GlobalValue*> PendingGlobals;
-
-    // Remove tab from section name prefix. This is necessary becase TAI 
-    // sometimes return a section name prefixed with a "\t" char. This is
-    // a little bit dirty. FIXME: find a better approach, maybe add more
-    // methods to TAI to get the clean name?
-    void fixNameForSection(std::string &Name) {
-      size_t Pos = Name.find("\t");
-      if (Pos != std::string::npos)
-        Name.erase(Pos, 1);
-
-      Pos = Name.find(".section ");
-      if (Pos != std::string::npos)
-        Name.erase(Pos, 9);
-
-      Pos = Name.find("\n");
-      if (Pos != std::string::npos)
-        Name.erase(Pos, 1);
-    }
+    /// PendingExternals - Externals not processed as symbols yet.
+    SetVector<const char *> PendingExternals;
+
+    /// ExtSymLookup - This is a mapping from externals to a symbol index
+    /// in the symbol table list. This is useful since reloc symbol references
+    /// must be quickly mapped to their symbol table indices.
+    std::map<const char *, uint32_t> ExtSymLookup;
+
+    /// SymbolList - This is the list of symbols emitted to the symbol table.
+    /// When the SymbolList is finally built, local symbols must be placed in
+    /// the beginning while non-locals at the end.
+    std::vector<ELFSym*> SymbolList;
+
+    /// PrivateSyms - Record private symbols, every symbol here must never be
+    /// present in the SymbolList.
+    std::vector<ELFSym*> PrivateSyms;
 
     /// getSection - Return the section with the specified name, creating a new
     /// section if one does not already exist.
     ELFSection &getSection(const std::string &Name, unsigned Type,
                            unsigned Flags = 0, unsigned Align = 0) {
-      std::string SectionName(Name);
-      fixNameForSection(SectionName);
-
-      ELFSection *&SN = SectionLookup[SectionName];
+      ELFSection *&SN = SectionLookup[Name];
       if (SN) return *SN;
 
-      SectionList.push_back(ELFSection(SectionName, isLittleEndian, is64Bit));
-      SN = &SectionList.back();
+      SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+      SN = SectionList.back();
       SN->SectionIdx = NumSections++;
       SN->Type = Type;
       SN->Flags = Flags;
@@ -156,37 +165,6 @@ namespace llvm {
       return *SN;
     }
 
-    /// TODO: support mangled names here to emit the right .text section
-    /// for c++ object files.
-    ELFSection &getTextSection() {
-      return getSection(".text", ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
-    }
-
-    /// Get jump table section on the section name returned by TAI
-    ELFSection &getJumpTableSection(std::string SName, unsigned Align) {
-      return getSection(SName, ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_ALLOC, Align);
-    }
-
-    /// Get a constant pool section based on the section name returned by TAI
-    ELFSection &getConstantPoolSection(std::string SName, unsigned Align) {
-      return getSection(SName, ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_MERGE | ELFSection::SHF_ALLOC, Align);
-    }
-
-    /// Return the relocation section of section 'S'. 'RelA' is true
-    /// if the relocation section contains entries with addends.
-    ELFSection &getRelocSection(std::string SName, bool RelA, unsigned Align) {
-      std::string RelSName(".rel");
-      unsigned SHdrTy = RelA ? ELFSection::SHT_RELA : ELFSection::SHT_REL;
-
-      if (RelA) RelSName.append("a");
-      RelSName.append(SName);
-
-      return getSection(RelSName, SHdrTy, 0, Align);
-    }
-
     ELFSection &getNonExecStackSection() {
       return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
     }
@@ -203,24 +181,38 @@ namespace llvm {
       return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1);
     }
 
-    ELFSection &getDataSection() {
-      return getSection(".data", ELFSection::SHT_PROGBITS,
-                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4);
-    }
-
-    ELFSection &getBSSSection() {
-      return getSection(".bss", ELFSection::SHT_NOBITS,
-                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4);
-    }
-
     ELFSection &getNullSection() {
       return getSection("", ELFSection::SHT_NULL, 0);
     }
 
+    ELFSection &getDataSection();
+    ELFSection &getBSSSection();
+    ELFSection &getCtorSection();
+    ELFSection &getDtorSection();
+    ELFSection &getJumpTableSection();
+    ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+    ELFSection &getTextSection(Function *F);
+    ELFSection &getRelocSection(ELFSection &S);
+
     // Helpers for obtaining ELF specific info.
-    unsigned getGlobalELFLinkage(const GlobalValue *GV);
+    unsigned getGlobalELFBinding(const GlobalValue *GV);
+    unsigned getGlobalELFType(const GlobalValue *GV);
     unsigned getGlobalELFVisibility(const GlobalValue *GV);
-    unsigned getElfSectionFlags(unsigned Flags);
+
+    // AddPendingGlobalSymbol - Add a global to be processed and to
+    // the global symbol lookup, use a zero index because the table
+    // index will be determined later.
+    void AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                bool AddToLookup = false);
+    
+    // AddPendingExternalSymbol - Add the external to be processed
+    // and to the external symbol lookup, use a zero index because
+    // the symbol table index will be determined later.
+    void AddPendingExternalSymbol(const char *External);
+
+    // AddToSymbolList - Update the symbol lookup and If the symbol is 
+    // private add it to PrivateSyms list, otherwise to SymbolList. 
+    void AddToSymbolList(ELFSym *GblSym);
 
     // As we complete the ELF file, we need to update fields in the ELF header
     // (e.g. the location of the section table).  These members keep track of
@@ -231,20 +223,27 @@ namespace llvm {
     unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
 
   private:
-    void EmitFunctionDeclaration(const Function *F);
-    void EmitGlobalVar(const GlobalVariable *GV);
+    void EmitGlobal(const GlobalValue *GV);
     void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
     void EmitGlobalConstantStruct(const ConstantStruct *CVS,
                                   ELFSection &GblS);
-    ELFSection &getGlobalSymELFSection(const GlobalVariable *GV, ELFSym &Sym);
+    void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+    void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
+                                  ELFSection &GblS, int64_t Offset = 0);
+    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+    void EmitXXStructorList(Constant *List, ELFSection &Xtor);
     void EmitRelocations();
     void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
     void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
     void EmitSectionTableStringTable();
     void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
     void EmitSymbolTable();
-    void EmitStringTable();
+    void EmitStringTable(const std::string &ModuleName);
     void OutputSectionsAndSectionTable();
+    void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+                       unsigned Size);
+    unsigned SortSymbols();
+    CstExprResTy ResolveConstantExpr(const Constant *CV);
   };
 }
 
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
new file mode 100644
index 000000000000..4f32c2b78b1f
--- /dev/null
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -0,0 +1,160 @@
+//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a a hazard recognizer using the instructions itineraries
+// defined for the current target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "exact-hazards"
+#include "ExactHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+using namespace llvm;
+
+ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData) :
+  ScheduleHazardRecognizer(), ItinData(LItinData) 
+{
+  // Determine the maximum depth of any itinerary. This determines the
+  // depth of the scoreboard. We always make the scoreboard at least 1
+  // cycle deep to avoid dealing with the boundary condition.
+  ScoreboardDepth = 1;
+  if (!ItinData.isEmpty()) {
+    for (unsigned idx = 0; ; ++idx) {
+      if (ItinData.isEndMarker(idx))
+        break;
+
+      const InstrStage *IS = ItinData.beginStage(idx);
+      const InstrStage *E = ItinData.endStage(idx);
+      unsigned ItinDepth = 0;
+      for (; IS != E; ++IS)
+        ItinDepth += IS->getCycles();
+
+      ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
+    }
+  }
+
+  Scoreboard = new unsigned[ScoreboardDepth];
+  ScoreboardHead = 0;
+
+  DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " 
+               << ScoreboardDepth << '\n');
+}
+
+ExactHazardRecognizer::~ExactHazardRecognizer() {
+  delete [] Scoreboard;
+}
+
+void ExactHazardRecognizer::Reset() {
+  memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
+  ScoreboardHead = 0;
+}
+
+unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
+  return (ScoreboardHead + offset) % ScoreboardDepth;
+}
+
+void ExactHazardRecognizer::dumpScoreboard() {
+  errs() << "Scoreboard:\n";
+  
+  unsigned last = ScoreboardDepth - 1;
+  while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+    last--;
+
+  for (unsigned i = 0; i <= last; i++) {
+    unsigned FUs = Scoreboard[getFutureIndex(i)];
+    errs() << "\t";
+    for (int j = 31; j >= 0; j--)
+      errs() << ((FUs & (1 << j)) ? '1' : '0');
+    errs() << '\n';
+  }
+}
+
+ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) {
+  if (ItinData.isEmpty())
+    return NoHazard;
+
+  unsigned cycle = 0;
+
+  // Use the itinerary for the underlying instruction to check for
+  // free FU's in the scoreboard at the appropriate future cycles.
+  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
+  for (const InstrStage *IS = ItinData.beginStage(idx),
+         *E = ItinData.endStage(idx); IS != E; ++IS) {
+    // We must find one of the stage's units free for every cycle the
+    // stage is occupied. FIXME it would be more accurate to find the
+    // same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < ScoreboardDepth) && 
+             "Scoreboard depth exceeded!");
+      
+      unsigned index = getFutureIndex(cycle + i);
+      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+      if (!freeUnits) {
+        DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(SU->getInstr()->dump());
+        return Hazard;
+      }
+    }
+    
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  return NoHazard;
+}
+    
+void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
+  if (ItinData.isEmpty())
+    return;
+
+  unsigned cycle = 0;
+
+  // Use the itinerary for the underlying instruction to reserve FU's
+  // in the scoreboard at the appropriate future cycles.
+  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
+  for (const InstrStage *IS = ItinData.beginStage(idx), 
+         *E = ItinData.endStage(idx); IS != E; ++IS) {
+    // We must reserve one of the stage's units for every cycle the
+    // stage is occupied. FIXME it would be more accurate to reserve
+    // the same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < ScoreboardDepth) &&
+             "Scoreboard depth exceeded!");
+      
+      unsigned index = getFutureIndex(cycle + i);
+      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+      
+      // reduce to a single unit
+      unsigned freeUnit = 0;
+      do {
+        freeUnit = freeUnits;
+        freeUnits = freeUnit & (freeUnit - 1);
+      } while (freeUnits);
+      
+      assert(freeUnit && "No function unit available!");
+      Scoreboard[index] |= freeUnit;
+    }
+    
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+  
+  DEBUG(dumpScoreboard());
+}
+    
+void ExactHazardRecognizer::AdvanceCycle() {
+  Scoreboard[ScoreboardHead] = 0;
+  ScoreboardHead = getFutureIndex(1);
+}
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
new file mode 100644
index 000000000000..71ac979e6cd8
--- /dev/null
+++ b/lib/CodeGen/ExactHazardRecognizer.h
@@ -0,0 +1,61 @@
+//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ExactHazardRecognizer class, which
+// implements hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+namespace llvm {
+  class ExactHazardRecognizer : public ScheduleHazardRecognizer {
+    // Itinerary data for the target.
+    const InstrItineraryData &ItinData;
+
+    // Scoreboard to track function unit usage. Scoreboard[0] is a
+    // mask of the FUs in use in the cycle currently being
+    // schedule. Scoreboard[1] is a mask for the next cycle. The
+    // Scoreboard is used as a circular buffer with the current cycle
+    // indicated by ScoreboardHead.
+    unsigned *Scoreboard;
+
+    // The maximum number of cycles monitored by the Scoreboard. This
+    // value is determined based on the target itineraries to ensure
+    // that all hazards can be tracked.
+    unsigned ScoreboardDepth;
+
+    // Indices into the Scoreboard that represent the current cycle.
+    unsigned ScoreboardHead;
+
+    // Return the scoreboard index to use for 'offset' cycles in the
+    // future. 'offset' of 0 returns ScoreboardHead.
+    unsigned getFutureIndex(unsigned offset);
+
+    // Print the scoreboard.
+    void dumpScoreboard();
+
+  public:
+    ExactHazardRecognizer(const InstrItineraryData &ItinData);
+    ~ExactHazardRecognizer();
+    
+    virtual HazardType getHazardType(SUnit *SU);
+    virtual void Reset();
+    virtual void EmitInstruction(SUnit *SU);
+    virtual void AdvanceCycle();
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index cf2ebb39ad82..a57296c2a67f 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -18,17 +18,20 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
-
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
   
   class VISIBILITY_HIDDEN Printer : public FunctionPass {
     static char ID;
-    std::ostream &OS;
+    raw_ostream &OS;
     
   public:
-    explicit Printer(std::ostream &OS = *cerr);
+    Printer() : FunctionPass(&ID), OS(errs()) {}
+    explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {}
+
     
     const char *getPassName() const;
     void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -74,27 +77,24 @@ GCModuleInfo::~GCModuleInfo() {
 
 GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
                                               const std::string &Name) {
-  const char *Start = Name.c_str();
-  
-  strategy_map_type::iterator NMI =
-    StrategyMap.find(Start, Start + Name.size());
+  strategy_map_type::iterator NMI = StrategyMap.find(Name);
   if (NMI != StrategyMap.end())
     return NMI->getValue();
   
   for (GCRegistry::iterator I = GCRegistry::begin(),
                             E = GCRegistry::end(); I != E; ++I) {
-    if (strcmp(Start, I->getName()) == 0) {
+    if (Name == I->getName()) {
       GCStrategy *S = I->instantiate();
       S->M = M;
       S->Name = Name;
-      StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S);
+      StrategyMap.GetOrCreateValue(Name).setValue(S);
       StrategyList.push_back(S);
       return S;
     }
   }
-  
-  cerr << "unsupported GC: " << Name << "\n";
-  abort();
+ 
+  errs() << "unsupported GC: " << Name << "\n";
+  llvm_unreachable(0);
 }
 
 GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
@@ -124,12 +124,10 @@ void GCModuleInfo::clear() {
 
 char Printer::ID = 0;
 
-FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) {
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
   return new Printer(OS);
 }
 
-Printer::Printer(std::ostream &OS)
-  : FunctionPass(&ID), OS(OS) {}
 
 const char *Printer::getPassName() const {
   return "Print Garbage Collector Information";
@@ -143,7 +141,7 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-    default: assert(0 && "Unknown GC point kind");
+    default: llvm_unreachable("Unknown GC point kind");
     case GC::Loop:     return "loop";
     case GC::Return:   return "return";
     case GC::PreCall:  return "pre-call";
@@ -155,12 +153,12 @@ bool Printer::runOnFunction(Function &F) {
   if (!F.hasGC()) {
     GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
     
-    OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n";
+    OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
     for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
                                         RE = FD->roots_end(); RI != RE; ++RI)
       OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
     
-    OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n";
+    OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
     for (GCFunctionInfo::iterator PI = FD->begin(),
                                   PE = FD->end(); PI != PE; ++PI) {
       
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index 5a5ef84fa4eb..9cd2925e2d28 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -20,11 +20,11 @@ GCMetadataPrinter::GCMetadataPrinter() { }
 GCMetadataPrinter::~GCMetadataPrinter() { }
 
 void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                      const TargetAsmInfo &TAI) {
+                                      const MCAsmInfo &MAI) {
   // Default is no action.
 }
 
 void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
-                                       const TargetAsmInfo &TAI) {
+                                       const MCAsmInfo &MAI) {
   // Default is no action.
 }
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index ad7421abc211..6d0de41e2c31 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -28,6 +28,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -70,7 +72,8 @@ namespace {
     void FindSafePoints(MachineFunction &MF);
     void VisitCallPoint(MachineBasicBlock::iterator MI);
     unsigned InsertLabel(MachineBasicBlock &MBB, 
-                         MachineBasicBlock::iterator MI) const;
+                         MachineBasicBlock::iterator MI,
+                         DebugLoc DL) const;
     
     void FindStackOffsets(MachineFunction &MF);
     
@@ -107,8 +110,8 @@ GCStrategy::~GCStrategy() {
 bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
  
 bool GCStrategy::performCustomLowering(Function &F) {
-  cerr << "gc " << getName() << " must override performCustomLowering.\n";
-  abort();
+  errs() << "gc " << getName() << " must override performCustomLowering.\n";
+  llvm_unreachable(0);
   return 0;
 }
 
@@ -327,11 +330,13 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
-                                     MachineBasicBlock::iterator MI) const {
+                                     MachineBasicBlock::iterator MI,
+                                     DebugLoc DL) const {
   unsigned Label = MMI->NextLabelID();
-  // N.B. we assume that MI is *not* equal to the "end()" iterator.
-  BuildMI(MBB, MI, MI->getDebugLoc(),
+  
+  BuildMI(MBB, MI, DL,
           TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label);
+  
   return Label;
 }
 
@@ -342,10 +347,12 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
   ++RAI;                                
   
   if (FI->getStrategy().needsSafePoint(GC::PreCall))
-    FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI));
+    FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
+                                              CI->getDebugLoc()));
   
   if (FI->getStrategy().needsSafePoint(GC::PostCall))
-    FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI));
+    FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
+                                               CI->getDebugLoc()));
 }
 
 void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index d5e7ea59a745..7b613ff25013 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -21,6 +22,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -226,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   if (!TII) return false;
 
-  DOUT << "\nIfcvt: function (" << ++FnNum <<  ") \'"
-       << MF.getFunction()->getName() << "\'";
+  DEBUG(errs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+               << MF.getFunction()->getName() << "\'");
 
   if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
-    DOUT << " skipped\n";
+    DEBUG(errs() << " skipped\n");
     return false;
   }
-  DOUT << "\n";
+  DEBUG(errs() << "\n");
 
   MF.RenumberBlocks();
   BBAnalysis.resize(MF.getNumBlockIDs());
@@ -278,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
         if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
-        DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
-             << "): BB#" << BBI.BB->getNumber() << " ("
-             << ((Kind == ICSimpleFalse)
-                 ? BBI.FalseBB->getNumber()
-                 : BBI.TrueBB->getNumber()) << ") ";
+        DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+                     << "): BB#" << BBI.BB->getNumber() << " ("
+                     << ((Kind == ICSimpleFalse)
+                         ? BBI.FalseBB->getNumber()
+                         : BBI.TrueBB->getNumber()) << ") ");
         RetVal = IfConvertSimple(BBI, Kind);
-        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) NumSimpleFalse++;
           else         NumSimple++;
@@ -301,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         if (DisableTriangleR && !isFalse && isRev) break;
         if (DisableTriangleF && isFalse && !isRev) break;
         if (DisableTriangleFR && isFalse && isRev) break;
-        DOUT << "Ifcvt (Triangle";
+        DEBUG(errs() << "Ifcvt (Triangle");
         if (isFalse)
-          DOUT << " false";
+          DEBUG(errs() << " false");
         if (isRev)
-          DOUT << " rev";
-        DOUT << "): BB#" << BBI.BB->getNumber() << " (T:"
-             << BBI.TrueBB->getNumber() << ",F:"
-             << BBI.FalseBB->getNumber() << ") ";
+          DEBUG(errs() << " rev");
+        DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertTriangle(BBI, Kind);
-        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) {
             if (isRev) NumTriangleFRev++;
@@ -324,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       }
       case ICDiamond: {
         if (DisableDiamond) break;
-        DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
-             << BBI.TrueBB->getNumber() << ",F:"
-             << BBI.FalseBB->getNumber() << ") ";
+        DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
-        DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) NumDiamonds++;
         break;
       }
@@ -358,6 +361,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   Roots.clear();
   BBAnalysis.clear();
 
+  if (MadeChange) {
+    BranchFolder BF(false);
+    BF.OptimizeFunction(MF, TII,
+                        MF.getTarget().getRegisterInfo(),
+                        getAnalysisIfAvailable<MachineModuleInfo>());
+  }
+
   return MadeChange;
 }
 
@@ -1130,8 +1140,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     if (TII->isPredicated(I))
       continue;
     if (!TII->PredicateInstruction(I, Cond)) {
-      cerr << "Unable to predicate " << *I << "!\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Unable to predicate " << *I << "!\n";
+#endif
+      llvm_unreachable(0);
     }
   }
 
@@ -1164,8 +1176,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
     if (!isPredicated)
       if (!TII->PredicateInstruction(MI, Cond)) {
-        cerr << "Unable to predicate " << *MI << "!\n";
-        abort();
+#ifndef NDEBUG
+        errs() << "Unable to predicate " << *I << "!\n";
+#endif
+        llvm_unreachable(0);
       }
   }
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 052334a05ba5..3e3b28a8109b 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -16,7 +16,9 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
@@ -39,11 +41,11 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
   switch((int)Fn->arg_begin()->getType()->getTypeID()) {
   case Type::FloatTyID:
     EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
-                         Type::FloatTy);
+                         Type::getFloatTy(M.getContext()));
     break;
   case Type::DoubleTyID:
     EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
-                         Type::DoubleTy);
+                         Type::getDoubleTy(M.getContext()));
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
@@ -82,39 +84,43 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 }
 
 void IntrinsicLowering::AddPrototypes(Module &M) {
+  LLVMContext &Context = M.getContext();
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     if (I->isDeclaration() && !I->use_empty())
       switch (I->getIntrinsicID()) {
       default: break;
       case Intrinsic::setjmp:
         EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
-                             Type::Int32Ty);
+                             Type::getInt32Ty(M.getContext()));
         break;
       case Intrinsic::longjmp:
         EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
-                             Type::VoidTy);
+                             Type::getVoidTy(M.getContext()));
         break;
       case Intrinsic::siglongjmp:
         EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
-                             Type::VoidTy);
+                             Type::getVoidTy(M.getContext()));
         break;
       case Intrinsic::memcpy:
-        M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              TD.getIntPtrType(), (Type *)0);
+        M.getOrInsertFunction("memcpy",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::memmove:
-        M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              TD.getIntPtrType(), (Type *)0);
+        M.getOrInsertFunction("memmove",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::memset:
-        M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), 
-                              Type::Int32Ty, 
-                              TD.getIntPtrType(), (Type *)0);
+        M.getOrInsertFunction("memset",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt32Ty(M.getContext()), 
+                              TD.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::sqrt:
         EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
@@ -148,7 +154,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
 
 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
 /// instruction IP.
-static Value *LowerBSWAP(Value *V, Instruction *IP) {
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
@@ -156,7 +162,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
   IRBuilder<> Builder(IP->getParent(), IP);
 
   switch(BitSize) {
-  default: assert(0 && "Unhandled type size of value to byteswap!");
+  default: llvm_unreachable("Unhandled type size of value to byteswap!");
   case 16: {
     Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
                                     "bswap.2");
@@ -172,11 +178,13 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
                                     "bswap.3");
     Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
                                      "bswap.2");
-    Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+    Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
                                      "bswap.1");
-    Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000),
+    Tmp3 = Builder.CreateAnd(Tmp3,
+                         ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
                              "bswap.and3");
-    Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00),
+    Tmp2 = Builder.CreateAnd(Tmp2,
+                           ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
                              "bswap.and2");
     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
@@ -194,31 +202,38 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
                                     "bswap.5");
     Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
                                      "bswap.4");
-    Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+    Value* Tmp3 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 24),
                                      "bswap.3");
-    Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40),
+    Value* Tmp2 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 40),
                                      "bswap.2");
-    Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56),
+    Value* Tmp1 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 56),
                                      "bswap.1");
     Tmp7 = Builder.CreateAnd(Tmp7,
-                             ConstantInt::get(Type::Int64Ty,
+                             ConstantInt::get(Type::getInt64Ty(Context),
                                               0xFF000000000000ULL),
                              "bswap.and7");
     Tmp6 = Builder.CreateAnd(Tmp6,
-                             ConstantInt::get(Type::Int64Ty,
+                             ConstantInt::get(Type::getInt64Ty(Context),
                                               0xFF0000000000ULL),
                              "bswap.and6");
     Tmp5 = Builder.CreateAnd(Tmp5,
-                             ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00000000ULL),
                              "bswap.and5");
     Tmp4 = Builder.CreateAnd(Tmp4,
-                             ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF000000ULL),
                              "bswap.and4");
     Tmp3 = Builder.CreateAnd(Tmp3,
-                             ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF0000ULL),
                              "bswap.and3");
     Tmp2 = Builder.CreateAnd(Tmp2,
-                             ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00ULL),
                              "bswap.and2");
     Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
     Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
@@ -235,7 +250,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) {
 
 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
 /// instruction IP.
-static Value *LowerCTPOP(Value *V, Instruction *IP) {
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
 
   static const uint64_t MaskValues[6] = {
@@ -257,7 +272,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) {
       Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
       Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
       Value *VShift = Builder.CreateLShr(PartValue,
-                                         ConstantInt::get(V->getType(), i),
+                                        ConstantInt::get(V->getType(), i),
                                          "ctpop.sh");
       Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
       PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
@@ -275,7 +290,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) {
 
 /// LowerCTLZ - Emit the code to lower ctlz of V before the specified
 /// instruction IP.
-static Value *LowerCTLZ(Value *V, Instruction *IP) {
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
 
   IRBuilder<> Builder(IP->getParent(), IP);
 
@@ -287,353 +302,21 @@ static Value *LowerCTLZ(Value *V, Instruction *IP) {
   }
 
   V = Builder.CreateNot(V);
-  return LowerCTPOP(V, IP);
-}
-
-/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes 
-/// three integer arguments. The first argument is the Value from which the
-/// bits will be selected. It may be of any bit width. The second and third
-/// arguments specify a range of bits to select with the second argument 
-/// specifying the low bit and the third argument specifying the high bit. Both
-/// must be type i32. The result is the corresponding selected bits from the
-/// Value in the same width as the Value (first argument). If the low bit index
-/// is higher than the high bit index then the inverse selection is done and 
-/// the bits are returned in inverse order. 
-/// @brief Lowering of llvm.part.select intrinsic.
-static Instruction *LowerPartSelect(CallInst *CI) {
-  IRBuilder<> Builder;
-
-  // Make sure we're dealing with a part select intrinsic here
-  Function *F = CI->getCalledFunction();
-  const FunctionType *FT = F->getFunctionType();
-  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
-      FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
-      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
-    return CI;
-
-  // Get the intrinsic implementation function by converting all the . to _
-  // in the intrinsic's function name and then reconstructing the function
-  // declaration.
-  std::string Name(F->getName());
-  for (unsigned i = 4; i < Name.length(); ++i)
-    if (Name[i] == '.')
-      Name[i] = '_';
-  Module* M = F->getParent();
-  F = cast<Function>(M->getOrInsertFunction(Name, FT));
-  F->setLinkage(GlobalValue::WeakAnyLinkage);
-
-  // If we haven't defined the impl function yet, do so now
-  if (F->isDeclaration()) {
-
-    // Get the arguments to the function
-    Function::arg_iterator args = F->arg_begin();
-    Value* Val = args++; Val->setName("Val");
-    Value* Lo = args++; Lo->setName("Lo");
-    Value* Hi = args++; Hi->setName("High");
-
-    // We want to select a range of bits here such that [Hi, Lo] is shifted
-    // down to the low bits. However, it is quite possible that Hi is smaller
-    // than Lo in which case the bits have to be reversed. 
-    
-    // Create the blocks we will need for the two cases (forward, reverse)
-    BasicBlock* CurBB   = BasicBlock::Create("entry", F);
-    BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
-    BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
-    BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
-    BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
-    BasicBlock *RsltBlk = BasicBlock::Create("result",  CurBB->getParent());
-
-    Builder.SetInsertPoint(CurBB);
-
-    // Cast Hi and Lo to the size of Val so the widths are all the same
-    if (Hi->getType() != Val->getType())
-      Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false,
-                                 "tmp");
-    if (Lo->getType() != Val->getType())
-      Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false,
-                                 "tmp");
-
-    // Compute a few things that both cases will need, up front.
-    Constant* Zero = ConstantInt::get(Val->getType(), 0);
-    Constant* One = ConstantInt::get(Val->getType(), 1);
-    Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
-
-    // Compare the Hi and Lo bit positions. This is used to determine 
-    // which case we have (forward or reverse)
-    Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less");
-    Builder.CreateCondBr(Cmp, RevSize, FwdSize);
-
-    // First, compute the number of bits in the forward case.
-    Builder.SetInsertPoint(FwdSize);
-    Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits");
-    Builder.CreateBr(Compute);
-
-    // Second, compute the number of bits in the reverse case.
-    Builder.SetInsertPoint(RevSize);
-    Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits");
-    Builder.CreateBr(Compute);
-
-    // Now, compute the bit range. Start by getting the bitsize and the shift
-    // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for 
-    // the number of bits we want in the range. We shift the bits down to the 
-    // least significant bits, apply the mask to zero out unwanted high bits, 
-    // and we have computed the "forward" result. It may still need to be 
-    // reversed.
-    Builder.SetInsertPoint(Compute);
-
-    // Get the BitSize from one of the two subtractions
-    PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits");
-    BitSize->reserveOperandSpace(2);
-    BitSize->addIncoming(FBitSize, FwdSize);
-    BitSize->addIncoming(RBitSize, RevSize);
-
-    // Get the ShiftAmount as the smaller of Hi/Lo
-    PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt");
-    ShiftAmt->reserveOperandSpace(2);
-    ShiftAmt->addIncoming(Lo, FwdSize);
-    ShiftAmt->addIncoming(Hi, RevSize);
-
-    // Increment the bit size
-    Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits");
-
-    // Create a Mask to zero out the high order bits.
-    Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask");
-    Mask = Builder.CreateNot(Mask, "mask");
-
-    // Shift the bits down and apply the mask
-    Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres");
-    FRes = Builder.CreateAnd(FRes, Mask, "fres");
-    Builder.CreateCondBr(Cmp, Reverse, RsltBlk);
-
-    // In the Reverse block we have the mask already in FRes but we must reverse
-    // it by shifting FRes bits right and putting them in RRes by shifting them 
-    // in from left.
-    Builder.SetInsertPoint(Reverse);
-
-    // First set up our loop counters
-    PHINode *Count = Builder.CreatePHI(Val->getType(), "count");
-    Count->reserveOperandSpace(2);
-    Count->addIncoming(BitSizePlusOne, Compute);
-
-    // Next, get the value that we are shifting.
-    PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
-    BitsToShift->reserveOperandSpace(2);
-    BitsToShift->addIncoming(FRes, Compute);
-
-    // Finally, get the result of the last computation
-    PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
-    RRes->reserveOperandSpace(2);
-    RRes->addIncoming(Zero, Compute);
-
-    // Decrement the counter
-    Value *Decr = Builder.CreateSub(Count, One, "decr");
-    Count->addIncoming(Decr, Reverse);
-
-    // Compute the Bit that we want to move
-    Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit");
-
-    // Compute the new value for next iteration.
-    Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift");
-    BitsToShift->addIncoming(NewVal, Reverse);
-
-    // Shift the bit into the low bits of the result.
-    Value *NewRes = Builder.CreateShl(RRes, One, "lshift");
-    NewRes = Builder.CreateOr(NewRes, Bit, "addbit");
-    RRes->addIncoming(NewRes, Reverse);
-    
-    // Terminate loop if we've moved all the bits.
-    Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond");
-    Builder.CreateCondBr(Cond, RsltBlk, Reverse);
-
-    // Finally, in the result block, select one of the two results with a PHI
-    // node and return the result;
-    Builder.SetInsertPoint(RsltBlk);
-    PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select");
-    BitSelect->reserveOperandSpace(2);
-    BitSelect->addIncoming(FRes, Compute);
-    BitSelect->addIncoming(NewRes, Reverse);
-    Builder.CreateRet(BitSelect);
-  }
-
-  // Return a call to the implementation function
-  Builder.SetInsertPoint(CI->getParent(), CI);
-  CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1),
-                                        CI->getOperand(2), CI->getOperand(3));
-  NewCI->setName(CI->getName());
-  return NewCI;
-}
-
-/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes 
-/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
-/// The first two arguments can be any bit width. The result is the same width
-/// as %Value. The operation replaces bits between %Low and %High with the value
-/// in %Replacement. If %Replacement is not the same width, it is truncated or
-/// zero extended as appropriate to fit the bits being replaced. If %Low is
-/// greater than %High then the inverse set of bits are replaced.
-/// @brief Lowering of llvm.bit.part.set intrinsic.
-static Instruction *LowerPartSet(CallInst *CI) {
-  IRBuilder<> Builder;
-
-  // Make sure we're dealing with a part select intrinsic here
-  Function *F = CI->getCalledFunction();
-  const FunctionType *FT = F->getFunctionType();
-  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
-      FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
-      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
-      !FT->getParamType(3)->isInteger())
-    return CI;
-
-  // Get the intrinsic implementation function by converting all the . to _
-  // in the intrinsic's function name and then reconstructing the function
-  // declaration.
-  std::string Name(F->getName());
-  for (unsigned i = 4; i < Name.length(); ++i)
-    if (Name[i] == '.')
-      Name[i] = '_';
-  Module* M = F->getParent();
-  F = cast<Function>(M->getOrInsertFunction(Name, FT));
-  F->setLinkage(GlobalValue::WeakAnyLinkage);
-
-  // If we haven't defined the impl function yet, do so now
-  if (F->isDeclaration()) {
-    // Get the arguments for the function.
-    Function::arg_iterator args = F->arg_begin();
-    Value* Val = args++; Val->setName("Val");
-    Value* Rep = args++; Rep->setName("Rep");
-    Value* Lo  = args++; Lo->setName("Lo");
-    Value* Hi  = args++; Hi->setName("Hi");
-
-    // Get some types we need
-    const IntegerType* ValTy = cast<IntegerType>(Val->getType());
-    const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
-    uint32_t RepBits = RepTy->getBitWidth();
-
-    // Constant Definitions
-    ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
-    ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
-    ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
-    ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
-    ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
-    ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
-    ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
-
-    // Basic blocks we fill in below.
-    BasicBlock* entry = BasicBlock::Create("entry", F, 0);
-    BasicBlock* large = BasicBlock::Create("large", F, 0);
-    BasicBlock* small = BasicBlock::Create("small", F, 0);
-    BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
-    BasicBlock* result = BasicBlock::Create("result", F, 0);
-
-    // BASIC BLOCK: entry
-    Builder.SetInsertPoint(entry);
-    // First, get the number of bits that we're placing as an i32
-    Value* is_forward = Builder.CreateICmpULT(Lo, Hi);
-    Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo);
-    Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi);
-    Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn);
-    NumBits = Builder.CreateAdd(NumBits, One);
-    // Now, convert Lo and Hi to ValTy bit width
-    Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false);
-    // Determine if the replacement bits are larger than the number of bits we
-    // are replacing and deal with it.
-    Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth);
-    Builder.CreateCondBr(is_large, large, small);
-
-    // BASIC BLOCK: large
-    Builder.SetInsertPoint(large);
-    Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits);
-    MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(),
-                                     /* isSigned */ false);
-    Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits);
-    Value* Rep2 = Builder.CreateAnd(Mask1, Rep);
-    Builder.CreateBr(small);
-
-    // BASIC BLOCK: small
-    Builder.SetInsertPoint(small);
-    PHINode* Rep3 = Builder.CreatePHI(RepTy);
-    Rep3->reserveOperandSpace(2);
-    Rep3->addIncoming(Rep2, large);
-    Rep3->addIncoming(Rep, entry);
-    Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false);
-    Builder.CreateCondBr(is_forward, result, reverse);
-
-    // BASIC BLOCK: reverse (reverses the bits of the replacement)
-    Builder.SetInsertPoint(reverse);
-    // Set up our loop counter as a PHI so we can decrement on each iteration.
-    // We will loop for the number of bits in the replacement value.
-    PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count");
-    Count->reserveOperandSpace(2);
-    Count->addIncoming(NumBits, small);
-
-    // Get the value that we are shifting bits out of as a PHI because
-    // we'll change this with each iteration.
-    PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
-    BitsToShift->reserveOperandSpace(2);
-    BitsToShift->addIncoming(Rep4, small);
-
-    // Get the result of the last computation or zero on first iteration
-    PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
-    RRes->reserveOperandSpace(2);
-    RRes->addIncoming(ValZero, small);
-
-    // Decrement the loop counter by one
-    Value *Decr = Builder.CreateSub(Count, One);
-    Count->addIncoming(Decr, reverse);
-
-    // Get the bit that we want to move into the result
-    Value *Bit = Builder.CreateAnd(BitsToShift, ValOne);
-
-    // Compute the new value of the bits to shift for the next iteration.
-    Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne);
-    BitsToShift->addIncoming(NewVal, reverse);
-
-    // Shift the bit we extracted into the low bit of the result.
-    Value *NewRes = Builder.CreateShl(RRes, ValOne);
-    NewRes = Builder.CreateOr(NewRes, Bit);
-    RRes->addIncoming(NewRes, reverse);
-    
-    // Terminate loop if we've moved all the bits.
-    Value *Cond = Builder.CreateICmpEQ(Decr, Zero);
-    Builder.CreateCondBr(Cond, result, reverse);
-
-    // BASIC BLOCK: result
-    Builder.SetInsertPoint(result);
-    PHINode *Rplcmnt = Builder.CreatePHI(Val->getType());
-    Rplcmnt->reserveOperandSpace(2);
-    Rplcmnt->addIncoming(NewRes, reverse);
-    Rplcmnt->addIncoming(Rep4, small);
-    Value* t0   = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false);
-    Value* t1   = Builder.CreateShl(ValMask, Lo);
-    Value* t2   = Builder.CreateNot(t1);
-    Value* t3   = Builder.CreateShl(t1, t0);
-    Value* t4   = Builder.CreateOr(t2, t3);
-    Value* t5   = Builder.CreateAnd(t4, Val);
-    Value* t6   = Builder.CreateShl(Rplcmnt, Lo);
-    Value* Rslt = Builder.CreateOr(t5, t6, "part_set");
-    Builder.CreateRet(Rslt);
-  }
-
-  // Return a call to the implementation function
-  Builder.SetInsertPoint(CI->getParent(), CI);
-  CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1),
-                                        CI->getOperand(2), CI->getOperand(3),
-                                        CI->getOperand(4));
-  NewCI->setName(CI->getName());
-  return NewCI;
+  return LowerCTPOP(Context, V, IP);
 }
 
 static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
                                        const char *Dname,
                                        const char *LDname) {
   switch (CI->getOperand(1)->getType()->getTypeID()) {
-  default: assert(0 && "Invalid type in intrinsic"); abort();
+  default: llvm_unreachable("Invalid type in intrinsic");
   case Type::FloatTyID:
     ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::FloatTy);
+                  Type::getFloatTy(CI->getContext()));
     break;
   case Type::DoubleTyID:
     ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::DoubleTy);
+                  Type::getDoubleTy(CI->getContext()));
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
@@ -646,19 +329,18 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
 
 void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   IRBuilder<> Builder(CI->getParent(), CI);
+  LLVMContext &Context = CI->getContext();
 
   Function *Callee = CI->getCalledFunction();
   assert(Callee && "Cannot lower an indirect call!");
 
   switch (Callee->getIntrinsicID()) {
   case Intrinsic::not_intrinsic:
-    cerr << "Cannot lower a call to a non-intrinsic function '"
-         << Callee->getName() << "'!\n";
-    abort();
+    llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
+                      Callee->getName() + "'!");
   default:
-    cerr << "Error: Code generator does not support intrinsic function '"
-         << Callee->getName() << "'!\n";
-    abort();
+    llvm_report_error("Code generator does not support intrinsic function '"+
+                      Callee->getName()+"'!");
 
     // The setjmp/longjmp intrinsics should only exist in the code if it was
     // never optimized (ie, right out of the CFE), or if it has been hacked on
@@ -666,38 +348,38 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     // convert the call to an explicit setjmp or longjmp call.
   case Intrinsic::setjmp: {
     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                               Type::Int32Ty);
-    if (CI->getType() != Type::VoidTy)
+                               Type::getInt32Ty(Context));
+    if (CI->getType() != Type::getVoidTy(Context))
       CI->replaceAllUsesWith(V);
     break;
   }
   case Intrinsic::sigsetjmp:
-     if (CI->getType() != Type::VoidTy)
+     if (CI->getType() != Type::getVoidTy(Context))
        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
      break;
 
   case Intrinsic::longjmp: {
     ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                    Type::VoidTy);
+                    Type::getVoidTy(Context));
     break;
   }
 
   case Intrinsic::siglongjmp: {
     // Insert the call to abort
     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
-                    Type::VoidTy);
+                    Type::getVoidTy(Context));
     break;
   }
   case Intrinsic::ctpop:
-    CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
+    CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI));
     break;
 
   case Intrinsic::bswap:
-    CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
+    CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI));
     break;
     
   case Intrinsic::ctlz:
-    CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
+    CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI));
     break;
 
   case Intrinsic::cttz: {
@@ -707,24 +389,16 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     NotSrc->setName(Src->getName() + ".not");
     Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
     SrcM1 = Builder.CreateSub(Src, SrcM1);
-    Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI);
+    Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
     CI->replaceAllUsesWith(Src);
     break;
   }
 
-  case Intrinsic::part_select:
-    CI->replaceAllUsesWith(LowerPartSelect(CI));
-    break;
-
-  case Intrinsic::part_set:
-    CI->replaceAllUsesWith(LowerPartSet(CI));
-    break;
-
   case Intrinsic::stacksave:
   case Intrinsic::stackrestore: {
     if (!Warned)
-      cerr << "WARNING: this target does not support the llvm.stack"
-           << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+      errs() << "WARNING: this target does not support the llvm.stack"
+             << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
                "save" : "restore") << " intrinsic.\n";
     Warned = true;
     if (Callee->getIntrinsicID() == Intrinsic::stacksave)
@@ -734,8 +408,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     
   case Intrinsic::returnaddress:
   case Intrinsic::frameaddress:
-    cerr << "WARNING: this target does not support the llvm."
-         << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+    errs() << "WARNING: this target does not support the llvm."
+           << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
              "return" : "frame") << "address intrinsic.\n";
     CI->replaceAllUsesWith(ConstantPointerNull::get(
                                             cast<PointerType>(CI->getType())));
@@ -747,9 +421,9 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   case Intrinsic::pcmarker:
     break;    // Simply strip out pcmarker on unsupported architectures
   case Intrinsic::readcyclecounter: {
-    cerr << "WARNING: this target does not support the llvm.readcyclecoun"
-         << "ter intrinsic.  It is being lowered to a constant 0\n";
-    CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
+    errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+           << "ter intrinsic.  It is being lowered to a constant 0\n";
+    CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
     break;
   }
 
@@ -761,13 +435,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;    // Simply strip out debugging intrinsics
 
   case Intrinsic::eh_exception:
-  case Intrinsic::eh_selector_i32:
-  case Intrinsic::eh_selector_i64:
+  case Intrinsic::eh_selector:
     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
     break;
 
-  case Intrinsic::eh_typeid_for_i32:
-  case Intrinsic::eh_typeid_for_i64:
+  case Intrinsic::eh_typeid_for:
     // Return something different to eh_selector.
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
     break;
@@ -776,7 +448,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    const IntegerType *IntPtr = TD.getIntPtrType();
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -787,7 +459,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memmove: {
-    const IntegerType *IntPtr = TD.getIntPtrType();
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -798,13 +470,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memset: {
-    const IntegerType *IntPtr = TD.getIntPtrType();
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
     Ops[0] = CI->getOperand(1);
     // Extend the amount to i32.
-    Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
+    Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context),
                                    /* isSigned */ false);
     Ops[2] = Size;
     ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
@@ -840,7 +512,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   }
   case Intrinsic::flt_rounds:
      // Lower to "round to the nearest"
-     if (CI->getType() != Type::VoidTy)
+     if (CI->getType() != Type::getVoidTy(Context))
        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
      break;
   }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a163cac75b0f..4e713a6ed316 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -15,14 +15,16 @@
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
 namespace llvm {
@@ -37,26 +39,31 @@ static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden,
     cl::desc("Dump emitter generated instructions as assembly"));
 static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
+static cl::opt<bool> HoistConstants("hoist-constants", cl::Hidden,
+    cl::desc("Hoist constants out of loops"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
-// When this works it will be on by default.
-static cl::opt<bool>
-DisablePostRAScheduler("disable-post-RA-scheduler",
-                       cl::desc("Disable scheduling after register allocation"),
-                       cl::init(true));
-
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -fast.
+// able to enable or disable fast-isel independently from -O0.
 static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
-  cl::desc("Enable the experimental \"fast\" instruction selector"));
+  cl::desc("Enable the \"fast\" instruction selector"));
+
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+                                     const std::string &TargetTriple)
+  : TargetMachine(T) {
+  AsmInfo = T.createAsmInfo(TargetTriple);
+}
+
+
 
 FileModel::Model
 LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                       raw_ostream &Out,
+                                       formatted_raw_ostream &Out,
                                        CodeGenFileType FileType,
                                        CodeGenOpt::Level OptLevel) {
   // Add common CodeGen passes.
@@ -67,10 +74,10 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   PM.add(createDebugLabelFoldingPass());
 
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
   if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
   if (OptLevel != CodeGenOpt::None)
     PM.add(createCodePlacementOptPass());
@@ -92,6 +99,19 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   return FileModel::Error;
 }
 
+bool LLVMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+                                           CodeGenOpt::Level OptLevel,
+                                           bool Verbose,
+                                           formatted_raw_ostream &Out) {
+  FunctionPass *Printer =
+    getTarget().createAsmPrinter(Out, *this, getMCAsmInfo(), Verbose);
+  if (!Printer)
+    return true;
+
+  PM.add(Printer);
+  return false;
+}
+
 /// addPassesToEmitFileFinish - If the passes to emit the specified file had to
 /// be split up (e.g., to add an object writer pass), this method can be used to
 /// finish up adding passes to emit the file, if necessary.
@@ -99,13 +119,12 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
                                                   MachineCodeEmitter *MCE,
                                                   CodeGenOpt::Level OptLevel) {
   if (MCE)
-    addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE);
+    addSimpleCodeEmitter(PM, OptLevel, *MCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
-
   return false; // success!
 }
 
@@ -116,12 +135,27 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
                                                   JITCodeEmitter *JCE,
                                                   CodeGenOpt::Level OptLevel) {
   if (JCE)
-    addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE);
+    addSimpleCodeEmitter(PM, OptLevel, *JCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
+  return false; // success!
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
+                                                  ObjectCodeEmitter *OCE,
+                                                  CodeGenOpt::Level OptLevel) {
+  if (OCE)
+    addSimpleCodeEmitter(PM, OptLevel, *OCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
+
+  PM.add(createGCInfoDeleter());
 
   return false; // success!
 }
@@ -140,15 +174,14 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
     return true;
 
   if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
-  addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE);
+  addCodeEmitter(PM, OptLevel, MCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
-
   return false; // success!
 }
 
@@ -166,22 +199,21 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
     return true;
 
   if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
-  addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE);
+  addCodeEmitter(PM, OptLevel, JCE);
+  if (PrintEmittedAsm)
+    addAssemblyEmitter(PM, OptLevel, true, ferrs());
 
   PM.add(createGCInfoDeleter());
 
-  // Delete machine code for this function
-  PM.add(createMachineCodeDeleter());
-
   return false; // success!
 }
 
 static void printAndVerify(PassManagerBase &PM,
                            bool allowDoubleDefs = false) {
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(cerr));
+    PM.add(createMachineFunctionPrinterPass(errs()));
 
   if (VerifyMachineCode)
     PM.add(createMachineVerifierPass(allowDoubleDefs));
@@ -203,18 +235,31 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Turn exception handling constructs into something the code generators can
   // handle.
-  if (!getTargetAsmInfo()->doesSupportExceptionHandling())
-    PM.add(createLowerInvokePass(getTargetLowering()));
-  else
+  switch (getMCAsmInfo()->getExceptionHandlingType())
+  {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
     PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createSjLjEHPass(getTargetLowering()));
+    break;
+  case ExceptionHandling::Dwarf:
+    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    break;
+  case ExceptionHandling::None:
+    PM.add(createLowerInvokePass(getTargetLowering()));
+    break;
+  }
 
   PM.add(createGCLoweringPass());
 
   // Make sure that no unreachable blocks are instruction selected.
   PM.add(createUnreachableBlockEliminationPass());
 
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
+    if (HoistConstants)
+      PM.add(createCodeGenLICMPass());
     PM.add(createCodeGenPreparePass(getTargetLowering()));
+  }
 
   PM.add(createStackProtectorPass(getTargetLowering()));
 
@@ -225,6 +270,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Standard Lower-Level Passes.
 
+  // Set up a MachineFunction for the rest of CodeGen to work on.
+  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
   // Enable FastISel with -fast, but allow that to be overridden.
   if (EnableFastISelOption == cl::BOU_TRUE ||
       (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
@@ -240,19 +288,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createMachineLICMPass());
     PM.add(createMachineSinkingPass());
-    printAndVerify(PM, /* allowDoubleDefs= */ false);
+    printAndVerify(PM, /* allowDoubleDefs= */ true);
   }
 
   // Run pre-ra passes.
   if (addPreRegAlloc(PM, OptLevel))
-    printAndVerify(PM);
+    printAndVerify(PM, /* allowDoubleDefs= */ true);
 
   // Perform register allocation.
   PM.add(createRegisterAllocator());
 
   // Perform stack slot coloring.
   if (OptLevel != CodeGenOpt::None)
-    PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive));
+    // FIXME: Re-enable coloring with register when it's capable of adding
+    // kill markers.
+    PM.add(createStackSlotColoringPass(false));
 
   printAndVerify(PM);           // Print the register-allocated code
 
@@ -267,8 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   PM.add(createPrologEpilogCodeInserter());
   printAndVerify(PM);
 
+  // Run pre-sched2 passes.
+  if (addPreSched2(PM, OptLevel))
+    printAndVerify(PM);
+
   // Second pass scheduler.
-  if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) {
+  if (OptLevel != CodeGenOpt::None) {
     PM.add(createPostRAScheduler());
     printAndVerify(PM);
   }
@@ -283,7 +337,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   printAndVerify(PM);
 
   if (PrintGCInfo)
-    PM.add(createGCInfoPrinter(*cerr));
+    PM.add(createGCInfoPrinter(errs()));
 
   return false;
 }
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 26722a3ca11a..a02a4a6c83a1 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -23,12 +23,16 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
-#include <ostream>
 using namespace llvm;
 
+// Print a LiveIndex to a raw_ostream.
+void LiveIndex::print(raw_ostream &os) const {
+  os << (index & ~PHI_BIT);
+}
+
 // An example for liveAt():
 //
 // this = [1,4), liveAt(0) will return false. The instruction defining this
@@ -36,7 +40,7 @@ using namespace llvm;
 // variable it represents. This is because slot 1 is used (def slot) and spans
 // up to slot 3 (store slot).
 //
-bool LiveInterval::liveAt(unsigned I) const {
+bool LiveInterval::liveAt(LiveIndex I) const {
   Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
 
   if (r == ranges.begin())
@@ -49,7 +53,7 @@ bool LiveInterval::liveAt(unsigned I) const {
 // liveBeforeAndAt - Check if the interval is live at the index and the index
 // just before it. If index is liveAt, check if it starts a new live range.
 // If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(unsigned I) const {
+bool LiveInterval::liveBeforeAndAt(LiveIndex I) const {
   Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
 
   if (r == ranges.begin())
@@ -127,7 +131,7 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
 
 /// overlaps - Return true if the live interval overlaps a range specified
 /// by [Start, End).
-bool LiveInterval::overlaps(unsigned Start, unsigned End) const {
+bool LiveInterval::overlaps(LiveIndex Start, LiveIndex End) const {
   assert(Start < End && "Invalid range");
   const_iterator I  = begin();
   const_iterator E  = end();
@@ -145,10 +149,10 @@ bool LiveInterval::overlaps(unsigned Start, unsigned End) const {
 /// specified by I to end at the specified endpoint.  To do this, we should
 /// merge and eliminate all ranges that this will overlap with.  The iterator is
 /// not invalidated.
-void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) {
   assert(I != ranges.end() && "Not a valid interval!");
   VNInfo *ValNo = I->valno;
-  unsigned OldEnd = I->end;
+  LiveIndex OldEnd = I->end;
 
   // Search for the first interval that we can't merge with.
   Ranges::iterator MergeTo = next(I);
@@ -163,7 +167,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
   ranges.erase(next(I), MergeTo);
 
   // Update kill info.
-  removeKills(ValNo, OldEnd, I->end-1);
+  ValNo->removeKills(OldEnd, I->end.prevSlot_());
 
   // If the newly formed range now touches the range after it and if they have
   // the same value number, merge the two ranges into one range.
@@ -179,7 +183,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
 /// specified by I to start at the specified endpoint.  To do this, we should
 /// merge and eliminate all ranges that this will overlap with.
 LiveInterval::Ranges::iterator
-LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStart) {
   assert(I != ranges.end() && "Not a valid interval!");
   VNInfo *ValNo = I->valno;
 
@@ -212,7 +216,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
 
 LiveInterval::iterator
 LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
-  unsigned Start = LR.start, End = LR.end;
+  LiveIndex Start = LR.start, End = LR.end;
   iterator it = std::upper_bound(From, ranges.end(), Start);
 
   // If the inserted interval starts in the middle or right at the end of
@@ -246,7 +250,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
           extendIntervalEndTo(it, End);
         else if (End < it->end)
           // Overlapping intervals, there might have been a kill here.
-          removeKill(it->valno, End);
+          it->valno->removeKill(End);
         return it;
       }
     } else {
@@ -262,33 +266,32 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
   return ranges.insert(it, LR);
 }
 
-/// isInOneLiveRange - Return true if the range specified is entirely in the
+/// isInOneLiveRange - Return true if the range specified is entirely in 
 /// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) {
+bool LiveInterval::isInOneLiveRange(LiveIndex Start, LiveIndex End) {
   Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
   if (I == ranges.begin())
     return false;
   --I;
-  return I->contains(Start) && I->contains(End-1);
+  return I->containsRange(Start, End);
 }
 
 
 /// removeRange - Remove the specified range from this interval.  Note that
 /// the range must be in a single LiveRange in its entirety.
-void LiveInterval::removeRange(unsigned Start, unsigned End,
+void LiveInterval::removeRange(LiveIndex Start, LiveIndex End,
                                bool RemoveDeadValNo) {
   // Find the LiveRange containing this span.
   Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
   assert(I != ranges.begin() && "Range is not in interval!");
   --I;
-  assert(I->contains(Start) && I->contains(End-1) &&
-         "Range is not entirely in interval!");
+  assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
 
   // If the span we are removing is at the start of the LiveRange, adjust it.
   VNInfo *ValNo = I->valno;
   if (I->start == Start) {
     if (I->end == End) {
-      removeKills(I->valno, Start, End);
+      ValNo->removeKills(Start, End);
       if (RemoveDeadValNo) {
         // Check if val# is dead.
         bool isDead = true;
@@ -322,13 +325,13 @@ void LiveInterval::removeRange(unsigned Start, unsigned End,
   // Otherwise if the span we are removing is at the end of the LiveRange,
   // adjust the other way.
   if (I->end == End) {
-    removeKills(ValNo, Start, End);
+    ValNo->removeKills(Start, End);
     I->end = Start;
     return;
   }
 
   // Otherwise, we are splitting the LiveRange into two pieces.
-  unsigned OldEnd = I->end;
+  LiveIndex OldEnd = I->end;
   I->end = Start;   // Trim the old interval.
 
   // Insert the new one.
@@ -362,11 +365,12 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
  
 /// scaleNumbering - Renumber VNI and ranges to provide gaps for new
 /// instructions.                                                   
+
 void LiveInterval::scaleNumbering(unsigned factor) {
   // Scale ranges.                                                            
   for (iterator RI = begin(), RE = end(); RI != RE; ++RI) {
-    RI->start = InstrSlots::scale(RI->start, factor);
-    RI->end = InstrSlots::scale(RI->end, factor);
+    RI->start = RI->start.scale(factor);
+    RI->end = RI->end.scale(factor);
   }
 
   // Scale VNI info.                                                          
@@ -374,19 +378,20 @@ void LiveInterval::scaleNumbering(unsigned factor) {
     VNInfo *vni = *VNI;
 
     if (vni->isDefAccurate())
-      vni->def = InstrSlots::scale(vni->def, factor);
+      vni->def = vni->def.scale(factor);
 
     for (unsigned i = 0; i < vni->kills.size(); ++i) {
-      if (vni->kills[i] != 0)
-        vni->kills[i] = InstrSlots::scale(vni->kills[i], factor);
+      if (!vni->kills[i].isPHIIndex())
+        vni->kills[i] = vni->kills[i].scale(factor);
     }
   }
 }
 
+
 /// getLiveRangeContaining - Return the live range that contains the
 /// specified index, or null if there is none.
 LiveInterval::const_iterator 
-LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
+LiveInterval::FindLiveRangeContaining(LiveIndex Idx) const {
   const_iterator It = std::upper_bound(begin(), end(), Idx);
   if (It != ranges.begin()) {
     --It;
@@ -398,7 +403,7 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
 }
 
 LiveInterval::iterator 
-LiveInterval::FindLiveRangeContaining(unsigned Idx) {
+LiveInterval::FindLiveRangeContaining(LiveIndex Idx) {
   iterator It = std::upper_bound(begin(), end(), Idx);
   if (It != begin()) {
     --It;
@@ -409,17 +414,27 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) {
   return end();
 }
 
-/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index
-/// (register interval) or defined by the specified register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
-  VNInfo *VNI = NULL;
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// index (register interval).
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(LiveIndex Idx) const {
   for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
-       i != e; ++i)
-    if ((*i)->def == DefIdxOrReg) {
-      VNI = *i;
-      break;
-    }
-  return VNI;
+       i != e; ++i) {
+    if ((*i)->def == Idx)
+      return *i;
+  }
+
+  return 0;
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// register (stack inteval).
+VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
+  for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+       i != e; ++i) {
+    if ((*i)->getReg() == reg)
+      return *i;
+  }
+  return 0;
 }
 
 /// join - Join two live intervals (this, and other) together.  This applies
@@ -502,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
     InsertPos = addRangeFrom(*I, InsertPos);
   }
 
-  weight += Other.weight;
+  ComputeJoinedWeight(Other);
 
   // Update regalloc hint if currently there isn't one.
   if (TargetRegisterInfo::isVirtualRegister(reg) &&
@@ -546,7 +561,7 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
   for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
     if (I->valno != RHSValNo)
       continue;
-    unsigned Start = I->start, End = I->end;
+    LiveIndex Start = I->start, End = I->end;
     IP = std::upper_bound(IP, end(), Start);
     // If the start of this range overlaps with an existing liverange, trim it.
     if (IP != begin() && IP[-1].end > Start) {
@@ -622,20 +637,21 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
     else if (UnusedValNo)
       ClobberValNo = UnusedValNo;
     else {
-      UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
+      UnusedValNo = ClobberValNo =
+        getNextValue(LiveIndex(), 0, false, VNInfoAllocator);
       ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
     }
 
     bool Done = false;
-    unsigned Start = I->start, End = I->end;
+    LiveIndex Start = I->start, End = I->end;
     // If a clobber range starts before an existing range and ends after
     // it, the clobber range will need to be split into multiple ranges.
     // Loop until the entire clobber range is handled.
     while (!Done) {
       Done = true;
       IP = std::upper_bound(IP, end(), Start);
-      unsigned SubRangeStart = Start;
-      unsigned SubRangeEnd = End;
+      LiveIndex SubRangeStart = Start;
+      LiveIndex SubRangeEnd = End;
 
       // If the start of this range overlaps with an existing liverange, trim it.
       if (IP != begin() && IP[-1].end > SubRangeStart) {
@@ -671,11 +687,13 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
   }
 }
 
-void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
+void LiveInterval::MergeInClobberRange(LiveIndex Start,
+                                       LiveIndex End,
                                        BumpPtrAllocator &VNInfoAllocator) {
   // Find a value # to use for the clobber ranges.  If there is already a value#
   // for unknown values, use it.
-  VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
+  VNInfo *ClobberValNo =
+    getNextValue(LiveIndex(), 0, false, VNInfoAllocator);
   
   iterator IP = begin();
   IP = std::upper_bound(IP, end(), Start);
@@ -711,7 +729,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
 
   // Make sure V2 is smaller than V1.
   if (V1->id < V2->id) {
-    copyValNumInfo(V1, V2);
+    V1->copyFrom(*V2);
     std::swap(V1, V2);
   }
 
@@ -788,20 +806,42 @@ void LiveInterval::Copy(const LiveInterval &RHS,
 unsigned LiveInterval::getSize() const {
   unsigned Sum = 0;
   for (const_iterator I = begin(), E = end(); I != E; ++I)
-    Sum += I->end - I->start;
+    Sum += I->start.distance(I->end);
   return Sum;
 }
 
-std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) {
+/// ComputeJoinedWeight - Set the weight of a live interval Joined
+/// after Other has been merged into it.
+void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
+  // If either of these intervals was spilled, the weight is the
+  // weight of the non-spilled interval.  This can only happen with
+  // iterative coalescers.
+
+  if (Other.weight != HUGE_VALF) {
+    weight += Other.weight;
+  }
+  else if (weight == HUGE_VALF &&
+      !TargetRegisterInfo::isPhysicalRegister(reg)) {
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining to spilled interval");
+    weight = Other.weight;
+  }
+  else {
+    // Otherwise the weight stays the same
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining from spilled interval");
+  }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
   return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
 }
 
 void LiveRange::dump() const {
-  cerr << *this << "\n";
+  errs() << *this << "\n";
 }
 
-void LiveInterval::print(std::ostream &OS,
-                         const TargetRegisterInfo *TRI) const {
+void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
   if (isStackSlot())
     OS << "SS#" << getStackSlotIndex();
   else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
@@ -841,6 +881,8 @@ void LiveInterval::print(std::ostream &OS,
           OS << "-(";
           for (unsigned j = 0; j != ee; ++j) {
             OS << vni->kills[j];
+            if (vni->kills[j].isPHIIndex())
+              OS << "*";
             if (j != ee-1)
               OS << " ";
           }
@@ -857,10 +899,10 @@ void LiveInterval::print(std::ostream &OS,
 }
 
 void LiveInterval::dump() const {
-  cerr << *this << "\n";
+  errs() << *this << "\n";
 }
 
 
-void LiveRange::print(std::ostream &os) const {
+void LiveRange::print(raw_ostream &os) const {
   os << *this;
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 52a30bc06795..93d3d4c83896 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
@@ -34,6 +35,8 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -47,24 +50,24 @@ using namespace llvm;
 static cl::opt<bool> DisableReMat("disable-rematerialization", 
                                   cl::init(false), cl::Hidden);
 
-static cl::opt<bool> SplitAtBB("split-intervals-at-bb", 
-                               cl::init(true), cl::Hidden);
-static cl::opt<int> SplitLimit("split-limit",
-                               cl::init(-1), cl::Hidden);
-
-static cl::opt<bool> EnableAggressiveRemat("aggressive-remat", cl::Hidden);
-
 static cl::opt<bool> EnableFastSpilling("fast-spill",
                                         cl::init(false), cl::Hidden);
 
-STATISTIC(numIntervals, "Number of original intervals");
-STATISTIC(numFolds    , "Number of loads/stores folded into instructions");
-STATISTIC(numSplits   , "Number of intervals split");
+static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false));
+
+static cl::opt<int> CoalescingLimit("early-coalescing-limit",
+                                    cl::init(-1), cl::Hidden);
+
+STATISTIC(numIntervals , "Number of original intervals");
+STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits    , "Number of intervals split");
+STATISTIC(numCoalescing, "Number of early coalescing performed");
 
 char LiveIntervals::ID = 0;
 static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
 
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
   AU.addPreserved<LiveVariables>();
@@ -92,15 +95,32 @@ void LiveIntervals::releaseMemory() {
   mi2iMap_.clear();
   i2miMap_.clear();
   r2iMap_.clear();
+  terminatorGaps.clear();
+  phiJoinCopies.clear();
+
   // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
   VNInfoAllocator.Reset();
-  while (!ClonedMIs.empty()) {
-    MachineInstr *MI = ClonedMIs.back();
-    ClonedMIs.pop_back();
+  while (!CloneMIs.empty()) {
+    MachineInstr *MI = CloneMIs.back();
+    CloneMIs.pop_back();
     mf_->DeleteMachineInstr(MI);
   }
 }
 
+static bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
+                                   unsigned OpIdx, const TargetInstrInfo *tii_){
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      Reg == SrcReg)
+    return true;
+
+  if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+    return true;
+  if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+    return true;
+  return false;
+}
+
 /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
 /// there is one implicit_def for each use. Add isUndef marker to
 /// implicit_def defs and their uses.
@@ -119,16 +139,33 @@ void LiveIntervals::processImplicitDefs() {
       ++I;
       if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
         unsigned Reg = MI->getOperand(0).getReg();
-        MI->getOperand(0).setIsUndef();
         ImpDefRegs.insert(Reg);
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+            ImpDefRegs.insert(*SS);
+        }
         ImpDefMIs.push_back(MI);
         continue;
       }
 
+      if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+        MachineOperand &MO = MI->getOperand(2);
+        if (ImpDefRegs.count(MO.getReg())) {
+          // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
+          // This is an identity copy, eliminate it now.
+          if (MO.isKill()) {
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+            vi.removeKill(MI);
+          }
+          MI->eraseFromParent();
+          continue;
+        }
+      }
+
       bool ChangedToImpDef = false;
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg() || !MO.isUse())
+        if (!MO.isReg() || !MO.isUse() || MO.isUndef())
           continue;
         unsigned Reg = MO.getReg();
         if (!Reg)
@@ -136,22 +173,30 @@ void LiveIntervals::processImplicitDefs() {
         if (!ImpDefRegs.count(Reg))
           continue;
         // Use is a copy, just turn it into an implicit_def.
-        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-        if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-            Reg == SrcReg) {
+        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
           bool isKill = MO.isKill();
           MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
           for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
             MI->RemoveOperand(j);
-          if (isKill)
+          if (isKill) {
             ImpDefRegs.erase(Reg);
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            vi.removeKill(MI);
+          }
           ChangedToImpDef = true;
           break;
         }
 
         MO.setIsUndef();
-        if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+          // Make sure other uses of 
+          for (unsigned j = i+1; j != e; ++j) {
+            MachineOperand &MOJ = MI->getOperand(j);
+            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+              MOJ.setIsUndef();
+          }
           ImpDefRegs.erase(Reg);
+        }
       }
 
       if (ChangedToImpDef) {
@@ -171,11 +216,13 @@ void LiveIntervals::processImplicitDefs() {
     for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
       MachineInstr *MI = ImpDefMIs[i];
       unsigned Reg = MI->getOperand(0).getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(Reg))
-        // Physical registers are not liveout (yet).
-        continue;
-      if (!ImpDefRegs.count(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+          !ImpDefRegs.count(Reg)) {
+        // Delete all "local" implicit_def's. That include those which define
+        // physical registers since they cannot be liveout.
+        MI->eraseFromParent();
         continue;
+      }
 
       // If there are multiple defs of the same register and at least one
       // is not an implicit_def, do not insert implicit_def's before the
@@ -191,6 +238,10 @@ void LiveIntervals::processImplicitDefs() {
       if (Skip)
         continue;
 
+      // The only implicit_def which we want to keep are those that are live
+      // out of its block.
+      MI->eraseFromParent();
+
       for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
              UE = mri_->use_end(); UI != UE; ) {
         MachineOperand &RMO = UI.getOperand();
@@ -199,12 +250,19 @@ void LiveIntervals::processImplicitDefs() {
         MachineBasicBlock *RMBB = RMI->getParent();
         if (RMBB == MBB)
           continue;
+
+        // Turn a copy use into an implicit_def.
+        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+        if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+            Reg == SrcReg) {
+          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            RMI->RemoveOperand(j);
+          continue;
+        }
+
         const TargetRegisterClass* RC = mri_->getRegClass(Reg);
         unsigned NewVReg = mri_->createVirtualRegister(RC);
-        MachineInstrBuilder MIB =
-          BuildMI(*RMBB, RMI, RMI->getDebugLoc(),
-                  tii_->get(TargetInstrInfo::IMPLICIT_DEF), NewVReg);
-        (*MIB).getOperand(0).setIsUndef();
         RMO.setReg(NewVReg);
         RMO.setIsUndef();
         RMO.setIsKill();
@@ -215,6 +273,7 @@ void LiveIntervals::processImplicitDefs() {
   }
 }
 
+
 void LiveIntervals::computeNumbering() {
   Index2MiMap OldI2MI = i2miMap_;
   std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap;
@@ -223,44 +282,79 @@ void LiveIntervals::computeNumbering() {
   MBB2IdxMap.clear();
   mi2iMap_.clear();
   i2miMap_.clear();
+  terminatorGaps.clear();
+  phiJoinCopies.clear();
   
   FunctionSize = 0;
   
   // Number MachineInstrs and MachineBasicBlocks.
   // Initialize MBB indexes to a sentinal.
-  MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U));
+  MBB2IdxMap.resize(mf_->getNumBlockIDs(),
+                    std::make_pair(LiveIndex(),LiveIndex()));
   
-  unsigned MIIndex = 0;
+  LiveIndex MIIndex;
   for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
        MBB != E; ++MBB) {
-    unsigned StartIdx = MIIndex;
+    LiveIndex StartIdx = MIIndex;
 
     // Insert an empty slot at the beginning of each block.
-    MIIndex += InstrSlots::NUM;
+    MIIndex = getNextIndex(MIIndex);
     i2miMap_.push_back(0);
 
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
          I != E; ++I) {
+      
+      if (I == MBB->getFirstTerminator()) {
+        // Leave a gap for before terminators, this is where we will point
+        // PHI kills.
+        LiveIndex tGap(true, MIIndex);
+        bool inserted =
+          terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second;
+        assert(inserted && 
+               "Multiple 'first' terminators encountered during numbering.");
+        inserted = inserted; // Avoid compiler warning if assertions turned off.
+        i2miMap_.push_back(0);
+
+        MIIndex = getNextIndex(MIIndex);
+      }
+
       bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
       assert(inserted && "multiple MachineInstr -> index mappings");
       inserted = true;
       i2miMap_.push_back(I);
-      MIIndex += InstrSlots::NUM;
+      MIIndex = getNextIndex(MIIndex);
       FunctionSize++;
       
       // Insert max(1, numdefs) empty slots after every instruction.
       unsigned Slots = I->getDesc().getNumDefs();
       if (Slots == 0)
         Slots = 1;
-      MIIndex += InstrSlots::NUM * Slots;
-      while (Slots--)
+      while (Slots--) {
+        MIIndex = getNextIndex(MIIndex);
         i2miMap_.push_back(0);
+      }
+
+    }
+  
+    if (MBB->getFirstTerminator() == MBB->end()) {
+      // Leave a gap for before terminators, this is where we will point
+      // PHI kills.
+      LiveIndex tGap(true, MIIndex);
+      bool inserted =
+        terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second;
+      assert(inserted && 
+             "Multiple 'first' terminators encountered during numbering.");
+      inserted = inserted; // Avoid compiler warning if assertions turned off.
+      i2miMap_.push_back(0);
+ 
+      MIIndex = getNextIndex(MIIndex);
     }
     
     // Set the MBB2IdxMap entry for this MBB.
-    MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1);
+    MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, getPrevSlot(MIIndex));
     Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB));
   }
+
   std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
   
   if (!OldI2MI.empty())
@@ -272,9 +366,9 @@ void LiveIntervals::computeNumbering() {
         // number, or our best guess at what it _should_ correspond to if the
         // original instruction has been erased.  This is either the following
         // instruction or its predecessor.
-        unsigned index = LI->start / InstrSlots::NUM;
-        unsigned offset = LI->start % InstrSlots::NUM;
-        if (offset == InstrSlots::LOAD) {
+        unsigned index = LI->start.getVecIndex();
+        LiveIndex::Slot offset = LI->start.getSlot();
+        if (LI->start.isLoad()) {
           std::vector<IdxMBBPair>::const_iterator I =
                   std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start);
           // Take the pair containing the index
@@ -283,29 +377,34 @@ void LiveIntervals::computeNumbering() {
           
           LI->start = getMBBStartIdx(J->second);
         } else {
-          LI->start = mi2iMap_[OldI2MI[index]] + offset;
+          LI->start = LiveIndex(
+            LiveIndex(mi2iMap_[OldI2MI[index]]), 
+                              (LiveIndex::Slot)offset);
         }
         
         // Remap the ending index in the same way that we remapped the start,
         // except for the final step where we always map to the immediately
         // following instruction.
-        index = (LI->end - 1) / InstrSlots::NUM;
-        offset  = LI->end % InstrSlots::NUM;
-        if (offset == InstrSlots::LOAD) {
+        index = (getPrevSlot(LI->end)).getVecIndex();
+        offset  = LI->end.getSlot();
+        if (LI->end.isLoad()) {
           // VReg dies at end of block.
           std::vector<IdxMBBPair>::const_iterator I =
                   std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end);
           --I;
           
-          LI->end = getMBBEndIdx(I->second) + 1;
+          LI->end = getNextSlot(getMBBEndIdx(I->second));
         } else {
           unsigned idx = index;
           while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
           
           if (index != OldI2MI.size())
-            LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0);
+            LI->end =
+              LiveIndex(mi2iMap_[OldI2MI[index]],
+                (idx == index ? offset : LiveIndex::LOAD));
           else
-            LI->end = InstrSlots::NUM * i2miMap_.size();
+            LI->end =
+              LiveIndex(LiveIndex::NUM * i2miMap_.size());
         }
       }
       
@@ -317,9 +416,9 @@ void LiveIntervals::computeNumbering() {
         // start indices above. VN's with special sentinel defs
         // don't need to be remapped.
         if (vni->isDefAccurate() && !vni->isUnused()) {
-          unsigned index = vni->def / InstrSlots::NUM;
-          unsigned offset = vni->def % InstrSlots::NUM;
-          if (offset == InstrSlots::LOAD) {
+          unsigned index = vni->def.getVecIndex();
+          LiveIndex::Slot offset = vni->def.getSlot();
+          if (vni->def.isLoad()) {
             std::vector<IdxMBBPair>::const_iterator I =
                   std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def);
             // Take the pair containing the index
@@ -328,25 +427,36 @@ void LiveIntervals::computeNumbering() {
           
             vni->def = getMBBStartIdx(J->second);
           } else {
-            vni->def = mi2iMap_[OldI2MI[index]] + offset;
+            vni->def = LiveIndex(mi2iMap_[OldI2MI[index]], offset);
           }
         }
         
         // Remap the VNInfo kill indices, which works the same as
         // the end indices above.
         for (size_t i = 0; i < vni->kills.size(); ++i) {
-          // PHI kills don't need to be remapped.
-          if (!vni->kills[i]) continue;
-          
-          unsigned index = (vni->kills[i]-1) / InstrSlots::NUM;
-          unsigned offset = vni->kills[i] % InstrSlots::NUM;
-          if (offset == InstrSlots::LOAD) {
-            std::vector<IdxMBBPair>::const_iterator I =
+          unsigned index = getPrevSlot(vni->kills[i]).getVecIndex();
+          LiveIndex::Slot offset = vni->kills[i].getSlot();
+
+          if (vni->kills[i].isLoad()) {
+            assert("Value killed at a load slot.");
+            /*std::vector<IdxMBBPair>::const_iterator I =
              std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
             --I;
 
-            vni->kills[i] = getMBBEndIdx(I->second);
+            vni->kills[i] = getMBBEndIdx(I->second);*/
           } else {
+            if (vni->kills[i].isPHIIndex()) {
+              std::vector<IdxMBBPair>::const_iterator I =
+                std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
+              --I;
+              vni->kills[i] = terminatorGaps[I->second];  
+            } else {
+              assert(OldI2MI[index] != 0 &&
+                     "Kill refers to instruction not present in index maps.");
+              vni->kills[i] = LiveIndex(mi2iMap_[OldI2MI[index]], offset);
+            }
+           
+            /*
             unsigned idx = index;
             while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
             
@@ -355,6 +465,7 @@ void LiveIntervals::computeNumbering() {
                               (idx == index ? offset : 0);
             else
               vni->kills[i] = InstrSlots::NUM * i2miMap_.size();
+            */
           }
         }
       }
@@ -372,13 +483,20 @@ void LiveIntervals::scaleNumbering(int factor) {
   Idx2MBBMap.clear();
   for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end();
        MBB != MBBE; ++MBB) {
-    std::pair<unsigned, unsigned> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
-    mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor);
-    mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor);
+    std::pair<LiveIndex, LiveIndex> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
+    mbbIndices.first = mbbIndices.first.scale(factor);
+    mbbIndices.second = mbbIndices.second.scale(factor);
     Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); 
   }
   std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
 
+  // Scale terminator gaps.
+  for (DenseMap<MachineBasicBlock*, LiveIndex>::iterator
+       TGI = terminatorGaps.begin(), TGE = terminatorGaps.end();
+       TGI != TGE; ++TGI) {
+    terminatorGaps[TGI->first] = TGI->second.scale(factor);
+  }
+
   // Scale the intervals.
   for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
     LI->second->scaleNumbering(factor);
@@ -386,19 +504,20 @@ void LiveIntervals::scaleNumbering(int factor) {
 
   // Scale MachineInstrs.
   Mi2IndexMap oldmi2iMap = mi2iMap_;
-  unsigned highestSlot = 0;
+  LiveIndex highestSlot;
   for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end();
        MI != ME; ++MI) {
-    unsigned newSlot = InstrSlots::scale(MI->second, factor);
+    LiveIndex newSlot = MI->second.scale(factor);
     mi2iMap_[MI->first] = newSlot;
     highestSlot = std::max(highestSlot, newSlot); 
   }
 
+  unsigned highestVIndex = highestSlot.getVecIndex();
   i2miMap_.clear();
-  i2miMap_.resize(highestSlot + 1);
+  i2miMap_.resize(highestVIndex + 1);
   for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end();
        MI != ME; ++MI) {
-    i2miMap_[MI->second] = MI->first;
+    i2miMap_[MI->second.getVecIndex()] = const_cast<MachineInstr *>(MI->first);
   }
 
 }
@@ -419,6 +538,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   processImplicitDefs();
   computeNumbering();
   computeIntervals();
+  performEarlyCoalescing();
 
   numIntervals += getNumIntervals();
 
@@ -427,36 +547,45 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
 }
 
 /// print - Implement the dump method.
-void LiveIntervals::print(std::ostream &O, const Module* ) const {
-  O << "********** INTERVALS **********\n";
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+  OS << "********** INTERVALS **********\n";
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    I->second->print(O, tri_);
-    O << "\n";
+    I->second->print(OS, tri_);
+    OS << "\n";
   }
 
-  O << "********** MACHINEINSTRS **********\n";
+  printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+  OS << "********** MACHINEINSTRS **********\n";
+
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
-    O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+    OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
-      O << getInstructionIndex(mii) << '\t' << *mii;
+      OS << getInstructionIndex(mii) << '\t' << *mii;
     }
   }
 }
 
+void LiveIntervals::dumpInstrs() const {
+  printInstrs(errs());
+}
+
 /// conflictsWithPhysRegDef - Returns true if the specified register
 /// is defined during the duration of the specified interval.
 bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
                                             VirtRegMap &vrm, unsigned reg) {
   for (LiveInterval::Ranges::const_iterator
          I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (unsigned index = getBaseIndex(I->start),
-           end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
-         index += InstrSlots::NUM) {
+    for (LiveIndex index = getBaseIndex(I->start),
+           end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end;
+         index = getNextIndex(index)) {
       // skip deleted instructions
       while (index != end && !getInstructionFromIndex(index))
-        index += InstrSlots::NUM;
+        index = getNextIndex(index);
       if (index == end) break;
 
       MachineInstr *MI = getInstructionFromIndex(index);
@@ -492,16 +621,16 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
   for (LiveInterval::Ranges::const_iterator
          I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (unsigned index = getBaseIndex(I->start),
-           end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
-         index += InstrSlots::NUM) {
+    for (LiveIndex index = getBaseIndex(I->start),
+           end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end;
+         index = getNextIndex(index)) {
       // Skip deleted instructions.
       MachineInstr *MI = 0;
       while (index != end) {
         MI = getInstructionFromIndex(index);
         if (MI)
           break;
-        index += InstrSlots::NUM;
+        index = getNextIndex(index);
       }
       if (index == end) break;
 
@@ -525,35 +654,36 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
   return false;
 }
 
-
-void LiveIntervals::printRegName(unsigned reg) const {
+#ifndef NDEBUG
+static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    cerr << tri_->getName(reg);
+    errs() << tri_->getName(reg);
   else
-    cerr << "%reg" << reg;
+    errs() << "%reg" << reg;
 }
+#endif
 
 void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              MachineBasicBlock::iterator mi,
-                                             unsigned MIIdx, MachineOperand& MO,
+                                             LiveIndex MIIdx,
+                                             MachineOperand& MO,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
-  DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
-  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
-
-  if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
-    DOUT << "is a implicit_def\n";
-    return;
-  }
+  DEBUG({
+      errs() << "\t\tregister: ";
+      printRegName(interval.reg, tri_);
+    });
 
   // Virtual registers may be defined multiple times (due to phi
   // elimination and 2-addr elimination).  Much of what we do only has to be
   // done once for the vreg.  We use an empty interval to detect the first
   // time we see a vreg.
+  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
   if (interval.empty()) {
     // Get the Idx of the defining instructions.
-    unsigned defIndex = getDefIndex(MIIdx);
-    // Earlyclobbers move back one.
+    LiveIndex defIndex = getDefIndex(MIIdx);
+    // Earlyclobbers move back one, so that they overlap the live range
+    // of inputs.
     if (MO.isEarlyClobber())
       defIndex = getUseIndex(MIIdx);
     VNInfo *ValNo;
@@ -575,11 +705,16 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // will be a single kill, in MBB, which comes after the definition.
     if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
       // FIXME: what about dead vars?
-      unsigned killIdx;
+      LiveIndex killIdx;
       if (vi.Kills[0] != mi)
-        killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+        killIdx = getNextSlot(getUseIndex(getInstructionIndex(vi.Kills[0])));
+      else if (MO.isEarlyClobber())
+        // Earlyclobbers that die in this instruction move up one extra, to
+        // compensate for having the starting point moved back one.  This
+        // gets them to overlap the live range of other outputs.
+        killIdx = getNextSlot(getNextSlot(defIndex));
       else
-        killIdx = defIndex+1;
+        killIdx = getNextSlot(defIndex);
 
       // If the kill happens after the definition, we have an intra-block
       // live range.
@@ -588,8 +723,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                "Shouldn't be alive across any blocks!");
         LiveRange LR(defIndex, killIdx, ValNo);
         interval.addRange(LR);
-        DOUT << " +" << LR << "\n";
-        interval.addKill(ValNo, killIdx);
+        DEBUG(errs() << " +" << LR << "\n");
+        ValNo->addKill(killIdx);
         return;
       }
     }
@@ -598,8 +733,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // of the defining block, potentially live across some blocks, then is
     // live into some number of blocks, but gets killed.  Start by adding a
     // range that goes from this definition to the end of the defining block.
-    LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo);
-    DOUT << " +" << NewLR;
+    LiveRange NewLR(defIndex, getNextSlot(getMBBEndIdx(mbb)), ValNo);
+    DEBUG(errs() << " +" << NewLR);
     interval.addRange(NewLR);
 
     // Iterate over all of the blocks that the variable is completely
@@ -608,22 +743,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), 
              E = vi.AliveBlocks.end(); I != E; ++I) {
       LiveRange LR(getMBBStartIdx(*I),
-                   getMBBEndIdx(*I)+1,  // MBB ends at -1.
+                   getNextSlot(getMBBEndIdx(*I)),  // MBB ends at -1.
                    ValNo);
       interval.addRange(LR);
-      DOUT << " +" << LR;
+      DEBUG(errs() << " +" << LR);
     }
 
     // Finally, this virtual register is live from the start of any killing
     // block to the 'use' slot of the killing instruction.
     for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
       MachineInstr *Kill = vi.Kills[i];
-      unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1;
-      LiveRange LR(getMBBStartIdx(Kill->getParent()),
-                   killIdx, ValNo);
+      LiveIndex killIdx =
+        getNextSlot(getUseIndex(getInstructionIndex(Kill)));
+      LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
       interval.addRange(LR);
-      interval.addKill(ValNo, killIdx);
-      DOUT << " +" << LR;
+      ValNo->addKill(killIdx);
+      DEBUG(errs() << " +" << LR);
     }
 
   } else {
@@ -638,12 +773,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // need to take the LiveRegion that defines this register and split it
       // into two values.
       assert(interval.containsOneValue());
-      unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
-      unsigned RedefIndex = getDefIndex(MIIdx);
+      LiveIndex DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
+      LiveIndex RedefIndex = getDefIndex(MIIdx);
       if (MO.isEarlyClobber())
         RedefIndex = getUseIndex(MIIdx);
 
-      const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1);
+      const LiveRange *OldLR =
+        interval.getLiveRangeContaining(getPrevSlot(RedefIndex));
       VNInfo *OldValNo = OldLR->valno;
 
       // Delete the initial value, which should be short and continuous,
@@ -656,68 +792,85 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
 
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
-      VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+      VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
                                             false, // update at *
                                             VNInfoAllocator);
       ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
 
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
-      OldValNo->copy = 0;
+      OldValNo->setCopy(0);
       if (MO.isEarlyClobber())
         OldValNo->setHasRedefByEC(true);
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
-      DOUT << " replace range with " << LR;
+      DEBUG(errs() << " replace range with " << LR);
       interval.addRange(LR);
-      interval.addKill(ValNo, RedefIndex);
+      ValNo->addKill(RedefIndex);
 
       // If this redefinition is dead, we need to add a dummy unit live
       // range covering the def slot.
       if (MO.isDead())
-        interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo));
-
-      DOUT << " RESULT: ";
-      interval.print(DOUT, tri_);
-
+        interval.addRange(
+          LiveRange(RedefIndex, MO.isEarlyClobber() ?
+                                getNextSlot(getNextSlot(RedefIndex)) :
+                                getNextSlot(RedefIndex), OldValNo));
+
+      DEBUG({
+          errs() << " RESULT: ";
+          interval.print(errs(), tri_);
+        });
     } else {
       // Otherwise, this must be because of phi elimination.  If this is the
       // first redefinition of the vreg that we have seen, go back and change
       // the live range in the PHI block to be a different value number.
       if (interval.containsOneValue()) {
-        assert(vi.Kills.size() == 1 &&
-               "PHI elimination vreg should have one kill, the PHI itself!");
-
         // Remove the old range that we now know has an incorrect number.
         VNInfo *VNI = interval.getValNumInfo(0);
         MachineInstr *Killer = vi.Kills[0];
-        unsigned Start = getMBBStartIdx(Killer->getParent());
-        unsigned End = getUseIndex(getInstructionIndex(Killer))+1;
-        DOUT << " Removing [" << Start << "," << End << "] from: ";
-        interval.print(DOUT, tri_); DOUT << "\n";
-        interval.removeRange(Start, End);
+        phiJoinCopies.push_back(Killer);
+        LiveIndex Start = getMBBStartIdx(Killer->getParent());
+        LiveIndex End =
+          getNextSlot(getUseIndex(getInstructionIndex(Killer)));
+        DEBUG({
+            errs() << " Removing [" << Start << "," << End << "] from: ";
+            interval.print(errs(), tri_);
+            errs() << "\n";
+          });
+        interval.removeRange(Start, End);        
+        assert(interval.ranges.size() == 1 &&
+               "Newly discovered PHI interval has >1 ranges.");
+        MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex());
+        VNI->addKill(terminatorGaps[killMBB]);
         VNI->setHasPHIKill(true);
-        DOUT << " RESULT: "; interval.print(DOUT, tri_);
+        DEBUG({
+            errs() << " RESULT: ";
+            interval.print(errs(), tri_);
+          });
 
         // Replace the interval with one of a NEW value number.  Note that this
         // value number isn't actually defined by an instruction, weird huh? :)
         LiveRange LR(Start, End,
-          interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator));
+          interval.getNextValue(LiveIndex(mbb->getNumber()),
+                                0, false, VNInfoAllocator));
         LR.valno->setIsPHIDef(true);
-        DOUT << " replace range with " << LR;
+        DEBUG(errs() << " replace range with " << LR);
         interval.addRange(LR);
-        interval.addKill(LR.valno, End);
-        DOUT << " RESULT: "; interval.print(DOUT, tri_);
+        LR.valno->addKill(End);
+        DEBUG({
+            errs() << " RESULT: ";
+            interval.print(errs(), tri_);
+          });
       }
 
       // In the case of PHI elimination, each variable definition is only
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
-      unsigned defIndex = getDefIndex(MIIdx);
+      LiveIndex defIndex = getDefIndex(MIIdx);
       if (MO.isEarlyClobber())
         defIndex = getUseIndex(MIIdx);
-      
+
       VNInfo *ValNo;
       MachineInstr *CopyMI = NULL;
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
@@ -728,55 +881,63 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         CopyMI = mi;
       ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
       
-      unsigned killIndex = getMBBEndIdx(mbb) + 1;
+      LiveIndex killIndex = getNextSlot(getMBBEndIdx(mbb));
       LiveRange LR(defIndex, killIndex, ValNo);
       interval.addRange(LR);
-      interval.addKill(ValNo, killIndex);
+      ValNo->addKill(terminatorGaps[mbb]);
       ValNo->setHasPHIKill(true);
-      DOUT << " +" << LR;
+      DEBUG(errs() << " +" << LR);
     }
   }
 
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 }
 
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineBasicBlock::iterator mi,
-                                              unsigned MIIdx,
+                                              LiveIndex MIIdx,
                                               MachineOperand& MO,
                                               LiveInterval &interval,
                                               MachineInstr *CopyMI) {
   // A physical register cannot be live across basic block, so its
   // lifetime must end somewhere in its defining basic block.
-  DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+  DEBUG({
+      errs() << "\t\tregister: ";
+      printRegName(interval.reg, tri_);
+    });
 
-  unsigned baseIndex = MIIdx;
-  unsigned start = getDefIndex(baseIndex);
+  LiveIndex baseIndex = MIIdx;
+  LiveIndex start = getDefIndex(baseIndex);
   // Earlyclobbers move back one.
   if (MO.isEarlyClobber())
     start = getUseIndex(MIIdx);
-  unsigned end = start;
+  LiveIndex end = start;
 
   // If it is not used after definition, it is considered dead at
   // the instruction defining it. Hence its interval is:
   // [defSlot(def), defSlot(def)+1)
+  // For earlyclobbers, the defSlot was pushed back one; the extra
+  // advance below compensates.
   if (MO.isDead()) {
-    DOUT << " dead";
-    end = start + 1;
+    DEBUG(errs() << " dead");
+    if (MO.isEarlyClobber())
+      end = getNextSlot(getNextSlot(start));
+    else
+      end = getNextSlot(start);
     goto exit;
   }
 
   // If it is not dead on definition, it must be killed by a
   // subsequent instruction. Hence its interval is:
   // [defSlot(def), useSlot(kill)+1)
-  baseIndex += InstrSlots::NUM;
+  baseIndex = getNextIndex(baseIndex);
   while (++mi != MBB->end()) {
-    while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+    while (baseIndex.getVecIndex() < i2miMap_.size() &&
            getInstructionFromIndex(baseIndex) == 0)
-      baseIndex += InstrSlots::NUM;
+      baseIndex = getNextIndex(baseIndex);
     if (mi->killsRegister(interval.reg, tri_)) {
-      DOUT << " killed";
-      end = getUseIndex(baseIndex) + 1;
+      DEBUG(errs() << " killed");
+      end = getNextSlot(getUseIndex(baseIndex));
       goto exit;
     } else {
       int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
@@ -791,21 +952,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
           // Then the register is essentially dead at the instruction that defines
           // it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
-          DOUT << " dead";
-          end = start + 1;
+          DEBUG(errs() << " dead");
+          end = getNextSlot(start);
         }
         goto exit;
       }
     }
     
-    baseIndex += InstrSlots::NUM;
+    baseIndex = getNextIndex(baseIndex);
   }
   
   // The only case we should have a dead physreg here without a killing or
   // instruction where we know it's dead is if it is live-in to the function
   // and never used. Another possible case is the implicit use of the
   // physical register has been deleted by two-address pass.
-  end = start + 1;
+  end = getNextSlot(start);
 
 exit:
   assert(start < end && "did not find end of interval?");
@@ -819,13 +980,13 @@ exit:
     ValNo->setHasRedefByEC(true);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
-  interval.addKill(LR.valno, end);
-  DOUT << " +" << LR << '\n';
+  LR.valno->addKill(end);
+  DEBUG(errs() << " +" << LR << '\n');
 }
 
 void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
                                       MachineBasicBlock::iterator MI,
-                                      unsigned MIIdx,
+                                      LiveIndex MIIdx,
                                       MachineOperand& MO,
                                       unsigned MOIdx) {
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -852,25 +1013,28 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
 }
 
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
-                                         unsigned MIIdx,
+                                         LiveIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
-  DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg));
+  DEBUG({
+      errs() << "\t\tlivein register: ";
+      printRegName(interval.reg, tri_);
+    });
 
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
   MachineBasicBlock::iterator mi = MBB->begin();
-  unsigned baseIndex = MIIdx;
-  unsigned start = baseIndex;
-  while (baseIndex / InstrSlots::NUM < i2miMap_.size() && 
+  LiveIndex baseIndex = MIIdx;
+  LiveIndex start = baseIndex;
+  while (baseIndex.getVecIndex() < i2miMap_.size() && 
          getInstructionFromIndex(baseIndex) == 0)
-    baseIndex += InstrSlots::NUM;
-  unsigned end = baseIndex;
+    baseIndex = getNextIndex(baseIndex);
+  LiveIndex end = baseIndex;
   bool SeenDefUse = false;
   
   while (mi != MBB->end()) {
     if (mi->killsRegister(interval.reg, tri_)) {
-      DOUT << " killed";
-      end = getUseIndex(baseIndex) + 1;
+      DEBUG(errs() << " killed");
+      end = getNextSlot(getUseIndex(baseIndex));
       SeenDefUse = true;
       break;
     } else if (mi->modifiesRegister(interval.reg, tri_)) {
@@ -878,40 +1042,167 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
-      DOUT << " dead";
-      end = getDefIndex(start) + 1;
+      DEBUG(errs() << " dead");
+      end = getNextSlot(getDefIndex(start));
       SeenDefUse = true;
       break;
     }
 
-    baseIndex += InstrSlots::NUM;
+    baseIndex = getNextIndex(baseIndex);
     ++mi;
     if (mi != MBB->end()) {
-      while (baseIndex / InstrSlots::NUM < i2miMap_.size() && 
+      while (baseIndex.getVecIndex() < i2miMap_.size() && 
              getInstructionFromIndex(baseIndex) == 0)
-        baseIndex += InstrSlots::NUM;
+        baseIndex = getNextIndex(baseIndex);
     }
   }
 
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
     if (isAlias) {
-      DOUT << " dead";
-      end = getDefIndex(MIIdx) + 1;
+      DEBUG(errs() << " dead");
+      end = getNextSlot(getDefIndex(MIIdx));
     } else {
-      DOUT << " live through";
+      DEBUG(errs() << " live through");
       end = baseIndex;
     }
   }
 
   VNInfo *vni =
-    interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator);
+    interval.getNextValue(LiveIndex(MBB->getNumber()),
+                          0, false, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
   
   interval.addRange(LR);
-  interval.addKill(LR.valno, end);
-  DOUT << " +" << LR << '\n';
+  LR.valno->addKill(end);
+  DEBUG(errs() << " +" << LR << '\n');
+}
+
+bool
+LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
+                                   SmallVector<MachineInstr*,16> &IdentCopies,
+                                   SmallVector<MachineInstr*,16> &OtherCopies) {
+  bool HaveConflict = false;
+  unsigned NumIdent = 0;
+  for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg),
+         re = mri_->def_end(); ri != re; ++ri) {
+    MachineInstr *MI = &*ri;
+    unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+    if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+      return false;
+    if (SrcReg != DstInt.reg) {
+      OtherCopies.push_back(MI);
+      HaveConflict |= DstInt.liveAt(getInstructionIndex(MI));
+    } else {
+      IdentCopies.push_back(MI);
+      ++NumIdent;
+    }
+  }
+
+  if (!HaveConflict)
+    return false; // Let coalescer handle it
+  return IdentCopies.size() > OtherCopies.size();
+}
+
+void LiveIntervals::performEarlyCoalescing() {
+  if (!EarlyCoalescing)
+    return;
+
+  /// Perform early coalescing: eliminate copies which feed into phi joins
+  /// and whose sources are defined by the phi joins.
+  for (unsigned i = 0, e = phiJoinCopies.size(); i != e; ++i) {
+    MachineInstr *Join = phiJoinCopies[i];
+    if (CoalescingLimit != -1 && (int)numCoalescing == CoalescingLimit)
+      break;
+
+    unsigned PHISrc, PHIDst, SrcSubReg, DstSubReg;
+    bool isMove= tii_->isMoveInstr(*Join, PHISrc, PHIDst, SrcSubReg, DstSubReg);
+#ifndef NDEBUG
+    assert(isMove && "PHI join instruction must be a move!");
+#else
+    isMove = isMove;
+#endif
+
+    LiveInterval &DstInt = getInterval(PHIDst);
+    LiveInterval &SrcInt = getInterval(PHISrc);
+    SmallVector<MachineInstr*, 16> IdentCopies;
+    SmallVector<MachineInstr*, 16> OtherCopies;
+    if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies))
+      continue;
+
+    DEBUG(errs() << "PHI Join: " << *Join);
+    assert(DstInt.containsOneValue() && "PHI join should have just one val#!");
+    VNInfo *VNI = DstInt.getValNumInfo(0);
+
+    // Change the non-identity copies to directly target the phi destination.
+    for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) {
+      MachineInstr *PHICopy = OtherCopies[i];
+      DEBUG(errs() << "Moving: " << *PHICopy);
+
+      LiveIndex MIIndex = getInstructionIndex(PHICopy);
+      LiveIndex DefIndex = getDefIndex(MIIndex);
+      LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
+      LiveIndex StartIndex = SLR->start;
+      LiveIndex EndIndex = SLR->end;
+
+      // Delete val# defined by the now identity copy and add the range from
+      // beginning of the mbb to the end of the range.
+      SrcInt.removeValNo(SLR->valno);
+      DEBUG(errs() << "  added range [" << StartIndex << ','
+            << EndIndex << "] to reg" << DstInt.reg << '\n');
+      if (DstInt.liveAt(StartIndex))
+        DstInt.removeRange(StartIndex, EndIndex);
+      VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true,
+                                           VNInfoAllocator);
+      NewVNI->setHasPHIKill(true);
+      DstInt.addRange(LiveRange(StartIndex, EndIndex, NewVNI));
+      for (unsigned j = 0, ee = PHICopy->getNumOperands(); j != ee; ++j) {
+        MachineOperand &MO = PHICopy->getOperand(j);
+        if (!MO.isReg() || MO.getReg() != PHISrc)
+          continue;
+        MO.setReg(PHIDst);
+      }
+    }
+
+    // Now let's eliminate all the would-be identity copies.
+    for (unsigned i = 0, e = IdentCopies.size(); i != e; ++i) {
+      MachineInstr *PHICopy = IdentCopies[i];
+      DEBUG(errs() << "Coalescing: " << *PHICopy);
+
+      LiveIndex MIIndex = getInstructionIndex(PHICopy);
+      LiveIndex DefIndex = getDefIndex(MIIndex);
+      LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
+      LiveIndex StartIndex = SLR->start;
+      LiveIndex EndIndex = SLR->end;
+
+      // Delete val# defined by the now identity copy and add the range from
+      // beginning of the mbb to the end of the range.
+      SrcInt.removeValNo(SLR->valno);
+      RemoveMachineInstrFromMaps(PHICopy);
+      PHICopy->eraseFromParent();
+      DEBUG(errs() << "  added range [" << StartIndex << ','
+            << EndIndex << "] to reg" << DstInt.reg << '\n');
+      DstInt.addRange(LiveRange(StartIndex, EndIndex, VNI));
+    }
+
+    // Remove the phi join and update the phi block liveness.
+    LiveIndex MIIndex = getInstructionIndex(Join);
+    LiveIndex UseIndex = getUseIndex(MIIndex);
+    LiveIndex DefIndex = getDefIndex(MIIndex);
+    LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex);
+    LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex);
+    DLR->valno->setCopy(0);
+    DLR->valno->setIsDefAccurate(false);
+    DstInt.addRange(LiveRange(SLR->start, SLR->end, DLR->valno));
+    SrcInt.removeRange(SLR->start, SLR->end);
+    assert(SrcInt.empty());
+    removeInterval(PHISrc);
+    RemoveMachineInstrFromMaps(Join);
+    Join->eraseFromParent();
+
+    ++numCoalescing;
+  }
 }
 
 /// computeIntervals - computes the live intervals for virtual
@@ -919,17 +1210,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 /// live interval is an interval [i, j) where 1 <= i <= j < N for
 /// which a variable is live
 void LiveIntervals::computeIntervals() { 
+  DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
 
-  DOUT << "********** COMPUTING LIVE INTERVALS **********\n"
-       << "********** Function: "
-       << ((Value*)mf_->getFunction())->getName() << '\n';
-  
+  SmallVector<unsigned, 8> UndefUses;
   for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
-    unsigned MIIndex = getMBBStartIdx(MBB);
-    DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+    LiveIndex MIIndex = getMBBStartIdx(MBB);
+    DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
 
@@ -945,37 +1236,52 @@ void LiveIntervals::computeIntervals() {
     }
     
     // Skip over empty initial indices.
-    while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+    while (MIIndex.getVecIndex() < i2miMap_.size() &&
            getInstructionFromIndex(MIIndex) == 0)
-      MIIndex += InstrSlots::NUM;
+      MIIndex = getNextIndex(MIIndex);
     
     for (; MI != miEnd; ++MI) {
-      DOUT << MIIndex << "\t" << *MI;
+      DEBUG(errs() << MIIndex << "\t" << *MI);
 
       // Handle defs.
       for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
         MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+
         // handle register defs - build intervals
-        if (MO.isReg() && MO.getReg() && MO.isDef()) {
+        if (MO.isDef())
           handleRegisterDef(MBB, MI, MIIndex, MO, i);
-        }
+        else if (MO.isUndef())
+          UndefUses.push_back(MO.getReg());
       }
 
       // Skip over the empty slots after each instruction.
       unsigned Slots = MI->getDesc().getNumDefs();
       if (Slots == 0)
         Slots = 1;
-      MIIndex += InstrSlots::NUM * Slots;
+
+      while (Slots--)
+        MIIndex = getNextIndex(MIIndex);
       
       // Skip over empty indices.
-      while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+      while (MIIndex.getVecIndex() < i2miMap_.size() &&
              getInstructionFromIndex(MIIndex) == 0)
-        MIIndex += InstrSlots::NUM;
+        MIIndex = getNextIndex(MIIndex);
     }
   }
+
+  // Create empty intervals for registers defined by implicit_def's (except
+  // for those implicit_def that define values which are liveout of their
+  // blocks.
+  for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
+    unsigned UndefReg = UndefUses[i];
+    (void)getOrCreateInterval(UndefReg);
+  }
 }
 
-bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End,
+bool LiveIntervals::findLiveInMBBs(
+                              LiveIndex Start, LiveIndex End,
                               SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
   std::vector<IdxMBBPair>::const_iterator I =
     std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
@@ -991,7 +1297,8 @@ bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End,
   return ResVal;
 }
 
-bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End,
+bool LiveIntervals::findReachableMBBs(
+                              LiveIndex Start, LiveIndex End,
                               SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
   std::vector<IdxMBBPair>::const_iterator I =
     std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
@@ -1028,23 +1335,23 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
 /// getVNInfoSourceReg - Helper function that parses the specified VNInfo
 /// copy field and returns the source register that defines it.
 unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
-  if (!VNI->copy)
+  if (!VNI->getCopy())
     return 0;
 
-  if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+  if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
     // If it's extracting out of a physical register, return the sub-register.
-    unsigned Reg = VNI->copy->getOperand(1).getReg();
+    unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
-      Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm());
+      Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
     return Reg;
-  } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-             VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
-    return VNI->copy->getOperand(2).getReg();
+  } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+             VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+    return VNI->getCopy()->getOperand(2).getReg();
 
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-  if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+  if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg))
     return SrcReg;
-  assert(0 && "Unrecognized copy instruction!");
+  llvm_unreachable("Unrecognized copy instruction!");
   return 0;
 }
 
@@ -1083,8 +1390,8 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
 /// isValNoAvailableAt - Return true if the val# of the specified interval
 /// which reaches the given instruction also reaches the specified use index.
 bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
-                                       unsigned UseIdx) const {
-  unsigned Index = getInstructionIndex(MI);  
+                                       LiveIndex UseIdx) const {
+  LiveIndex Index = getInstructionIndex(MI);  
   VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
   LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
   return UI != li.end() && UI->valno == ValNo;
@@ -1099,102 +1406,19 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
   if (DisableReMat)
     return false;
 
-  if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-    return true;
-
-  int FrameIdx = 0;
-  if (tii_->isLoadFromStackSlot(MI, FrameIdx) &&
-      mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx))
-    // FIXME: Let target specific isReallyTriviallyReMaterializable determines
-    // this but remember this is not safe to fold into a two-address
-    // instruction.
-    // This is a load from fixed stack slot. It can be rematerialized.
-    return true;
-
-  // If the target-specific rules don't identify an instruction as
-  // being trivially rematerializable, use some target-independent
-  // rules.
-  if (!MI->getDesc().isRematerializable() ||
-      !tii_->isTriviallyReMaterializable(MI)) {
-    if (!EnableAggressiveRemat)
-      return false;
-
-    // If the instruction accesses memory but the memoperands have been lost,
-    // we can't analyze it.
-    const TargetInstrDesc &TID = MI->getDesc();
-    if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty())
-      return false;
-
-    // Avoid instructions obviously unsafe for remat.
-    if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable())
-      return false;
-
-    // If the instruction accesses memory and the memory could be non-constant,
-    // assume the instruction is not rematerializable.
-    for (std::list<MachineMemOperand>::const_iterator
-           I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){
-      const MachineMemOperand &MMO = *I;
-      if (MMO.isVolatile() || MMO.isStore())
-        return false;
-      const Value *V = MMO.getValue();
-      if (!V)
-        return false;
-      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-        if (!PSV->isConstant(mf_->getFrameInfo()))
-          return false;
-      } else if (!aa_->pointsToConstantMemory(V))
-        return false;
-    }
-
-    // If any of the registers accessed are non-constant, conservatively assume
-    // the instruction is not rematerializable.
-    unsigned ImpUse = 0;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg()) {
-        unsigned Reg = MO.getReg();
-        if (Reg == 0)
-          continue;
-        if (TargetRegisterInfo::isPhysicalRegister(Reg))
-          return false;
-
-        // Only allow one def, and that in the first operand.
-        if (MO.isDef() != (i == 0))
-          return false;
-
-        // Only allow constant-valued registers.
-        bool IsLiveIn = mri_->isLiveIn(Reg);
-        MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg),
-                                          E = mri_->def_end();
-
-        // For the def, it should be the only def of that register.
-        if (MO.isDef() && (next(I) != E || IsLiveIn))
-          return false;
-
-        if (MO.isUse()) {
-          // Only allow one use other register use, as that's all the
-          // remat mechanisms support currently.
-          if (Reg != li.reg) {
-            if (ImpUse == 0)
-              ImpUse = Reg;
-            else if (Reg != ImpUse)
-              return false;
-          }
-          // For the use, there should be only one associated def.
-          if (I != E && (next(I) != E || IsLiveIn))
-            return false;
-        }
-      }
-    }
-  }
+  if (!tii_->isTriviallyReMaterializable(MI, aa_))
+    return false;
 
+  // Target-specific code can mark an instruction as being rematerializable
+  // if it has one virtual reg use, though it had better be something like
+  // a PIC base register which is likely to be live everywhere.
   unsigned ImpUse = getReMatImplicitUse(li, MI);
   if (ImpUse) {
     const LiveInterval &ImpLi = getInterval(ImpUse);
     for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
            re = mri_->use_end(); ri != re; ++ri) {
       MachineInstr *UseMI = &*ri;
-      unsigned UseIdx = getInstructionIndex(UseMI);
+      LiveIndex UseIdx = getInstructionIndex(UseMI);
       if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
         continue;
       if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
@@ -1279,7 +1503,7 @@ static bool FilterFoldedOps(MachineInstr *MI,
 /// returns true.
 bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
                                          VirtRegMap &vrm, MachineInstr *DefMI,
-                                         unsigned InstrIdx,
+                                         LiveIndex InstrIdx,
                                          SmallVector<unsigned, 2> &Ops,
                                          bool isSS, int Slot, unsigned Reg) {
   // If it is an implicit def instruction, just delete it.
@@ -1318,7 +1542,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
     vrm.transferRestorePts(MI, fmi);
     vrm.transferEmergencySpills(MI, fmi);
     mi2iMap_.erase(MI);
-    i2miMap_[InstrIdx /InstrSlots::NUM] = fmi;
+    i2miMap_[InstrIdx.getVecIndex()] = fmi;
     mi2iMap_[fmi] = InstrIdx;
     MI = MBB.insert(MBB.erase(MI), fmi);
     ++numFolds;
@@ -1391,7 +1615,8 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
 /// for addIntervalsForSpills to rewrite uses / defs for the given live range.
 bool LiveIntervals::
 rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-                 bool TrySplit, unsigned index, unsigned end,  MachineInstr *MI,
+                 bool TrySplit, LiveIndex index, LiveIndex end, 
+                 MachineInstr *MI,
                  MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
                  unsigned Slot, int LdSlot,
                  bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
@@ -1422,8 +1647,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       // If this is the rematerializable definition MI itself and
       // all of its uses are rematerialized, simply delete it.
       if (MI == ReMatOrigDefMI && CanDelete) {
-        DOUT << "\t\t\t\tErasing re-materlizable def: ";
-        DOUT << MI << '\n';
+        DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: "
+                     << MI << '\n');
         RemoveMachineInstrFromMaps(MI);
         vrm.RemoveMachineInstrFromMaps(MI);
         MI->eraseFromParent();
@@ -1465,23 +1690,13 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
         continue;
       if (RegJ == RegI) {
         Ops.push_back(j);
-        HasUse |= MOj.isUse();
-        HasDef |= MOj.isDef();
+        if (!MOj.isUndef()) {
+          HasUse |= MOj.isUse();
+          HasDef |= MOj.isDef();
+        }
       }
     }
 
-    if (HasUse && !li.liveAt(getUseIndex(index)))
-      // Must be defined by an implicit def. It should not be spilled. Note,
-      // this is for correctness reason. e.g.
-      // 8   %reg1024<def> = IMPLICIT_DEF
-      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-      // The live range [12, 14) are not part of the r1024 live interval since
-      // it's defined by an implicit def. It will not conflicts with live
-      // interval of r1025. Now suppose both registers are spilled, you can
-      // easily see a situation where both registers are reloaded before
-      // the INSERT_SUBREG and both target registers that would overlap.
-      HasUse = false;
-
     // Create a new virtual register for the spill interval.
     // Create the new register now so we can map the fold instruction
     // to the new register so when it is unfolded we get the correct
@@ -1537,7 +1752,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
             
     if (CreatedNewVReg) {
       if (DefIsReMat) {
-        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/);
+        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
         if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
           // Each valnum may have its own remat id.
           ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
@@ -1577,38 +1792,46 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
 
     if (HasUse) {
       if (CreatedNewVReg) {
-        LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
-                     nI.getNextValue(0, 0, false, VNInfoAllocator));
-        DOUT << " +" << LR;
+        LiveRange LR(getLoadIndex(index), getNextSlot(getUseIndex(index)),
+                     nI.getNextValue(LiveIndex(), 0, false,
+                                     VNInfoAllocator));
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
       } else {
         // Extend the split live interval to this def / use.
-        unsigned End = getUseIndex(index)+1;
+        LiveIndex End = getNextSlot(getUseIndex(index));
         LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
                      nI.getValNumInfo(nI.getNumValNums()-1));
-        DOUT << " +" << LR;
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
       }
     }
     if (HasDef) {
       LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                   nI.getNextValue(0, 0, false, VNInfoAllocator));
-      DOUT << " +" << LR;
+                   nI.getNextValue(LiveIndex(), 0, false,
+                                   VNInfoAllocator));
+      DEBUG(errs() << " +" << LR);
       nI.addRange(LR);
     }
 
-    DOUT << "\t\t\t\tAdded new interval: ";
-    nI.print(DOUT, tri_);
-    DOUT << '\n';
+    DEBUG({
+        errs() << "\t\t\t\tAdded new interval: ";
+        nI.print(errs(), tri_);
+        errs() << '\n';
+      });
   }
   return CanFold;
 }
 bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
                                    const VNInfo *VNI,
-                                   MachineBasicBlock *MBB, unsigned Idx) const {
-  unsigned End = getMBBEndIdx(MBB);
+                                   MachineBasicBlock *MBB,
+                                   LiveIndex Idx) const {
+  LiveIndex End = getMBBEndIdx(MBB);
   for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
-    unsigned KillIdx = VNI->kills[j];
+    if (VNI->kills[j].isPHIIndex())
+      continue;
+
+    LiveIndex KillIdx = VNI->kills[j];
     if (KillIdx > Idx && KillIdx < End)
       return true;
   }
@@ -1619,11 +1842,11 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
 /// during spilling.
 namespace {
   struct RewriteInfo {
-    unsigned Index;
+    LiveIndex Index;
     MachineInstr *MI;
     bool HasUse;
     bool HasDef;
-    RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d)
+    RewriteInfo(LiveIndex i, MachineInstr *mi, bool u, bool d)
       : Index(i), MI(mi), HasUse(u), HasDef(d) {}
   };
 
@@ -1652,8 +1875,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
                     std::vector<LiveInterval*> &NewLIs) {
   bool AllCanFold = true;
   unsigned NewVReg = 0;
-  unsigned start = getBaseIndex(I->start);
-  unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM;
+  LiveIndex start = getBaseIndex(I->start);
+  LiveIndex end = getNextIndex(getBaseIndex(getPrevSlot(I->end)));
 
   // First collect all the def / use in this live range that will be rewritten.
   // Make sure they are sorted according to instruction index.
@@ -1664,10 +1887,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineOperand &O = ri.getOperand();
     ++ri;
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
-    unsigned index = getInstructionIndex(MI);
+    LiveIndex index = getInstructionIndex(MI);
     if (index < start || index >= end)
       continue;
-    if (O.isUse() && !li.liveAt(getUseIndex(index)))
+
+    if (O.isUndef())
       // Must be defined by an implicit def. It should not be spilled. Note,
       // this is for correctness reason. e.g.
       // 8   %reg1024<def> = IMPLICIT_DEF
@@ -1687,7 +1911,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
   for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
     RewriteInfo &rwi = RewriteMIs[i];
     ++i;
-    unsigned index = rwi.Index;
+    LiveIndex index = rwi.Index;
     bool MIHasUse = rwi.HasUse;
     bool MIHasDef = rwi.HasDef;
     MachineInstr *MI = rwi.MI;
@@ -1773,7 +1997,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
           HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index));
         else {
           // If this is a two-address code, then this index starts a new VNInfo.
-          const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index));
+          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(getDefIndex(index));
           if (VNI)
             HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index));
         }
@@ -1786,7 +2010,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
             SpillIdxes.insert(std::make_pair(MBBId, S));
           } else if (SII->second.back().vreg != NewVReg) {
             SII->second.push_back(SRInfo(index, NewVReg, true));
-          } else if ((int)index > SII->second.back().index) {
+          } else if (index > SII->second.back().index) {
             // If there is an earlier def and this is a two-address
             // instruction, then it's not possible to fold the store (which
             // would also fold the load).
@@ -1797,7 +2021,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
           SpillMBBs.set(MBBId);
         } else if (SII != SpillIdxes.end() &&
                    SII->second.back().vreg == NewVReg &&
-                   (int)index > SII->second.back().index) {
+                   index > SII->second.back().index) {
           // There is an earlier def that's not killed (must be two-address).
           // The spill is no longer needed.
           SII->second.pop_back();
@@ -1814,7 +2038,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
         SpillIdxes.find(MBBId);
       if (SII != SpillIdxes.end() &&
           SII->second.back().vreg == NewVReg &&
-          (int)index > SII->second.back().index)
+          index > SII->second.back().index)
         // Use(s) following the last def, it's not safe to fold the spill.
         SII->second.back().canFold = false;
       DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
@@ -1848,8 +2072,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
   }
 }
 
-bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr,
-                        BitVector &RestoreMBBs,
+bool LiveIntervals::alsoFoldARestore(int Id, LiveIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
                         DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
   if (!RestoreMBBs[Id])
     return false;
@@ -1862,15 +2086,15 @@ bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr,
   return false;
 }
 
-void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr,
-                        BitVector &RestoreMBBs,
+void LiveIntervals::eraseRestoreInfo(int Id, LiveIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
                         DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
   if (!RestoreMBBs[Id])
     return;
   std::vector<SRInfo> &Restores = RestoreIdxes[Id];
   for (unsigned i = 0, e = Restores.size(); i != e; ++i)
     if (Restores[i].index == index && Restores[i].vreg)
-      Restores[i].index = -1;
+      Restores[i].index = LiveIndex();
 }
 
 /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
@@ -1920,9 +2144,11 @@ addIntervalsForSpillsFast(const LiveInterval &li,
   assert(li.weight != HUGE_VALF &&
          "attempt to spill already spilled interval!");
 
-  DOUT << "\t\t\t\tadding intervals for spills for interval: ";
-  DEBUG(li.dump());
-  DOUT << '\n';
+  DEBUG({
+      errs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.dump();
+      errs() << '\n';
+    });
 
   const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
 
@@ -1967,27 +2193,31 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       }
       
       // Fill in  the new live interval.
-      unsigned index = getInstructionIndex(MI);
+      LiveIndex index = getInstructionIndex(MI);
       if (HasUse) {
         LiveRange LR(getLoadIndex(index), getUseIndex(index),
-                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
-        DOUT << " +" << LR;
+                     nI.getNextValue(LiveIndex(), 0, false,
+                                     getVNInfoAllocator()));
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
       if (HasDef) {
         LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
-        DOUT << " +" << LR;
+                     nI.getNextValue(LiveIndex(), 0, false,
+                                     getVNInfoAllocator()));
+        DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
         vrm.addSpillPoint(NewVReg, true, MI);
       }
       
       added.push_back(&nI);
         
-      DOUT << "\t\t\t\tadded new interval: ";
-      DEBUG(nI.dump());
-      DOUT << '\n';
+      DEBUG({
+          errs() << "\t\t\t\tadded new interval: ";
+          nI.dump();
+          errs() << '\n';
+        });
     }
     
     
@@ -2008,9 +2238,11 @@ addIntervalsForSpills(const LiveInterval &li,
   assert(li.weight != HUGE_VALF &&
          "attempt to spill already spilled interval!");
 
-  DOUT << "\t\t\t\tadding intervals for spills for interval: ";
-  li.print(DOUT, tri_);
-  DOUT << '\n';
+  DEBUG({
+      errs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.print(errs(), tri_);
+      errs() << '\n';
+    });
 
   // Each bit specify whether a spill is required in the MBB.
   BitVector SpillMBBs(mf_->getNumBlockIDs());
@@ -2036,8 +2268,8 @@ addIntervalsForSpills(const LiveInterval &li,
   if (vrm.getPreSplitReg(li.reg)) {
     vrm.setIsSplitFromReg(li.reg, 0);
     // Unset the split kill marker on the last use.
-    unsigned KillIdx = vrm.getKillPoint(li.reg);
-    if (KillIdx) {
+    LiveIndex KillIdx = vrm.getKillPoint(li.reg);
+    if (KillIdx != LiveIndex()) {
       MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
       assert(KillMI && "Last use disappeared?");
       int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
@@ -2081,9 +2313,7 @@ addIntervalsForSpills(const LiveInterval &li,
     return NewLIs;
   }
 
-  bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li);
-  if (SplitLimit != -1 && (int)numSplits >= SplitLimit)
-    TrySplit = false;
+  bool TrySplit = !intervalIsInOneMBB(li);
   if (TrySplit)
     ++numSplits;
   bool NeedStackSlot = false;
@@ -2102,7 +2332,7 @@ addIntervalsForSpills(const LiveInterval &li,
       ReMatOrigDefs[VN] = ReMatDefMI;
       // Original def may be modified so we have to make a copy here.
       MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
-      ClonedMIs.push_back(Clone);
+      CloneMIs.push_back(Clone);
       ReMatDefs[VN] = Clone;
 
       bool CanDelete = true;
@@ -2165,7 +2395,7 @@ addIntervalsForSpills(const LiveInterval &li,
     while (Id != -1) {
       std::vector<SRInfo> &spills = SpillIdxes[Id];
       for (unsigned i = 0, e = spills.size(); i != e; ++i) {
-        int index = spills[i].index;
+        LiveIndex index = spills[i].index;
         unsigned VReg = spills[i].vreg;
         LiveInterval &nI = getOrCreateInterval(VReg);
         bool isReMat = vrm.isReMaterialized(VReg);
@@ -2203,7 +2433,7 @@ addIntervalsForSpills(const LiveInterval &li,
             if (FoundUse) {
               // Also folded uses, do not issue a load.
               eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
-              nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+              nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index)));
             }
             nI.removeRange(getDefIndex(index), getStoreIndex(index));
           }
@@ -2228,8 +2458,8 @@ addIntervalsForSpills(const LiveInterval &li,
   while (Id != -1) {
     std::vector<SRInfo> &restores = RestoreIdxes[Id];
     for (unsigned i = 0, e = restores.size(); i != e; ++i) {
-      int index = restores[i].index;
-      if (index == -1)
+      LiveIndex index = restores[i].index;
+      if (index == LiveIndex())
         continue;
       unsigned VReg = restores[i].vreg;
       LiveInterval &nI = getOrCreateInterval(VReg);
@@ -2284,7 +2514,7 @@ addIntervalsForSpills(const LiveInterval &li,
       // If folding is not possible / failed, then tell the spiller to issue a
       // load / rematerialization for us.
       if (Folded)
-        nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+        nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index)));
       else
         vrm.addRestorePoint(VReg, MI);
     }
@@ -2300,7 +2530,7 @@ addIntervalsForSpills(const LiveInterval &li,
       LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI);
       if (!AddedKill.count(LI)) {
         LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
-        unsigned LastUseIdx = getBaseIndex(LR->end);
+        LiveIndex LastUseIdx = getBaseIndex(LR->end);
         MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
         int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
         assert(UseIdx != -1);
@@ -2351,7 +2581,7 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
     MachineInstr *MI = O.getParent();
-    unsigned Index = getInstructionIndex(MI);
+    LiveIndex Index = getInstructionIndex(MI);
     if (pli.liveAt(Index))
       ++NumConflicts;
   }
@@ -2382,29 +2612,31 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
     if (SeenMIs.count(MI))
       continue;
     SeenMIs.insert(MI);
-    unsigned Index = getInstructionIndex(MI);
+    LiveIndex Index = getInstructionIndex(MI);
     if (pli.liveAt(Index)) {
       vrm.addEmergencySpill(SpillReg, MI);
-      unsigned StartIdx = getLoadIndex(Index);
-      unsigned EndIdx = getStoreIndex(Index)+1;
+      LiveIndex StartIdx = getLoadIndex(Index);
+      LiveIndex EndIdx = getNextSlot(getStoreIndex(Index));
       if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
         pli.removeRange(StartIdx, EndIdx);
         Cut = true;
       } else {
-        cerr << "Ran out of registers during register allocation!\n";
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "Ran out of registers during register allocation!";
         if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
-          cerr << "Please check your inline asm statement for invalid "
+          Msg << "\nPlease check your inline asm statement for invalid "
                << "constraints:\n";
-          MI->print(cerr.stream(), tm_);
+          MI->print(Msg, tm_);
         }
-        exit(1);
+        llvm_report_error(Msg.str());
       }
       for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) {
         if (!hasInterval(*AS))
           continue;
         LiveInterval &spli = getInterval(*AS);
         if (spli.liveAt(Index))
-          spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1);
+          spli.removeRange(getLoadIndex(Index), getNextSlot(getStoreIndex(Index)));
       }
     }
   }
@@ -2412,16 +2644,18 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
 }
 
 LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
-                                                   MachineInstr* startInst) {
+                                                  MachineInstr* startInst) {
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
-            getInstructionIndex(startInst) + InstrSlots::DEF,
-            startInst, true, getVNInfoAllocator());
+    LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF),
+    startInst, true, getVNInfoAllocator());
   VN->setHasPHIKill(true);
-  VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
-  LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
-               getMBBEndIdx(startInst->getParent()) + 1, VN);
+  VN->kills.push_back(terminatorGaps[startInst->getParent()]);
+  LiveRange LR(
+    LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF),
+    getNextSlot(getMBBEndIdx(startInst->getParent())), VN);
   Interval.addRange(LR);
   
   return LR;
 }
+
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 86f7ea20c9be..a7bea1fd4f98 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include <limits>
 using namespace llvm;
@@ -52,15 +53,16 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) {
 }
 
 /// print - Implement the dump method.
-void LiveStacks::print(std::ostream &O, const Module*) const {
-  O << "********** INTERVALS **********\n";
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+  OS << "********** INTERVALS **********\n";
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    I->second.print(O);
+    I->second.print(OS);
     int Slot = I->first;
     const TargetRegisterClass *RC = getIntervalRegClass(Slot);
     if (RC)
-      O << " [" << RC->getName() << "]\n";
+      OS << " [" << RC->getName() << "]\n";
     else
-      O << " [Unknown]\n";
+      OS << " [Unknown]\n";
   }
 }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index bd845085bbf5..139e0291ea7a 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -37,7 +37,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Config/alloca.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -48,20 +47,21 @@ static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
 void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(UnreachableMachineBlockElimID);
   AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
 void LiveVariables::VarInfo::dump() const {
-  cerr << "  Alive in blocks: ";
+  errs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
            E = AliveBlocks.end(); I != E; ++I)
-    cerr << *I << ", ";
-  cerr << "\n  Killed by:";
+    errs() << *I << ", ";
+  errs() << "\n  Killed by:";
   if (Kills.empty())
-    cerr << " No instructions.\n";
+    errs() << " No instructions.\n";
   else {
     for (unsigned i = 0, e = Kills.size(); i != e; ++i)
-      cerr << "\n    #" << i << ": " << *Kills[i];
-    cerr << "\n";
+      errs() << "\n    #" << i << ": " << *Kills[i];
+    errs() << "\n";
   }
 }
 
@@ -180,9 +180,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
 }
 
 /// FindLastPartialDef - Return the last partial def of the specified register.
-/// Also returns the sub-register that's defined.
+/// Also returns the sub-registers that're defined by the instruction.
 MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
-                                                unsigned &PartDefReg) {
+                                            SmallSet<unsigned,4> &PartDefRegs) {
   unsigned LastDefReg = 0;
   unsigned LastDefDist = 0;
   MachineInstr *LastDef = NULL;
@@ -198,7 +198,23 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
       LastDefDist = Dist;
     }
   }
-  PartDefReg = LastDefReg;
+
+  if (!LastDef)
+    return 0;
+
+  PartDefRegs.insert(LastDefReg);
+  for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = LastDef->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+      continue;
+    unsigned DefReg = MO.getReg();
+    if (TRI->isSubRegister(Reg, DefReg)) {
+      PartDefRegs.insert(DefReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+           unsigned SubReg = *SubRegs; ++SubRegs)
+        PartDefRegs.insert(SubReg);
+    }
+  }
   return LastDef;
 }
 
@@ -216,8 +232,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
     // ...
     //    = EAX
     // All of the sub-registers must have been defined before the use of Reg!
-    unsigned PartDefReg = 0;
-    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg);
+    SmallSet<unsigned, 4> PartDefRegs;
+    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
     // If LastPartialDef is NULL, it must be using a livein register.
     if (LastPartialDef) {
       LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
@@ -228,7 +244,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
            unsigned SubReg = *SubRegs; ++SubRegs) {
         if (Processed.count(SubReg))
           continue;
-        if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg))
+        if (PartDefRegs.count(SubReg))
           continue;
         // This part of Reg was defined before the last partial def. It's killed
         // here.
@@ -249,78 +265,13 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
     PhysRegUse[SubReg] =  MI;
 }
 
-/// hasRegisterUseBelow - Return true if the specified register is used after
-/// the current instruction and before it's next definition.
-bool LiveVariables::hasRegisterUseBelow(unsigned Reg,
-                                        MachineBasicBlock::iterator I,
-                                        MachineBasicBlock *MBB) {
-  if (I == MBB->end())
-    return false;
-
-  // First find out if there are any uses / defs below.
-  bool hasDistInfo = true;
-  unsigned CurDist = DistanceMap[I];
-  SmallVector<MachineInstr*, 4> Uses;
-  SmallVector<MachineInstr*, 4> Defs;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineOperand &UDO = RI.getOperand();
-    MachineInstr *UDMI = &*RI;
-    if (UDMI->getParent() != MBB)
-      continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-    bool isBelow = false;
-    if (DI == DistanceMap.end()) {
-      // Must be below if it hasn't been assigned a distance yet.
-      isBelow = true;
-      hasDistInfo = false;
-    } else if (DI->second > CurDist)
-      isBelow = true;
-    if (isBelow) {
-      if (UDO.isUse())
-        Uses.push_back(UDMI);
-      if (UDO.isDef())
-        Defs.push_back(UDMI);
-    }
-  }
-
-  if (Uses.empty())
-    // No uses below.
-    return false;
-  else if (!Uses.empty() && Defs.empty())
-    // There are uses below but no defs below.
-    return true;
-  // There are both uses and defs below. We need to know which comes first.
-  if (!hasDistInfo) {
-    // Complete DistanceMap for this MBB. This information is computed only
-    // once per MBB.
-    ++I;
-    ++CurDist;
-    for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist)
-      DistanceMap.insert(std::make_pair(I, CurDist));
-  }
-
-  unsigned EarliestUse = DistanceMap[Uses[0]];
-  for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
-    unsigned Dist = DistanceMap[Uses[i]];
-    if (Dist < EarliestUse)
-      EarliestUse = Dist;
-  }
-  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
-    unsigned Dist = DistanceMap[Defs[i]];
-    if (Dist < EarliestUse)
-      // The register is defined before its first use below.
-      return false;
-  }
-  return true;
-}
-
 bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
-  if (!PhysRegUse[Reg] && !PhysRegDef[Reg])
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
     return false;
 
-  MachineInstr *LastRefOrPartRef = PhysRegUse[Reg]
-    ? PhysRegUse[Reg] : PhysRegDef[Reg];
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
   unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
   // The whole register is used.
   // AL =
@@ -339,9 +290,22 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   // AX<dead> = AL<imp-def>
   //    = AL<kill>
   // AX = 
+  MachineInstr *LastPartDef = 0;
+  unsigned LastPartDefDist = 0;
   SmallSet<unsigned, 8> PartUses;
   for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist) {
+        LastPartDefDist = Dist;
+        LastPartDef = Def;
+      }
+      continue;
+    }
     if (MachineInstr *Use = PhysRegUse[SubReg]) {
       PartUses.insert(SubReg);
       for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
@@ -354,35 +318,47 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     }
   }
 
-  if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI)
-    // If the last reference is the last def, then it's not used at all.
-    // That is, unless we are currently processing the last reference itself.
-    LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
-
-  // Partial uses. Mark register def dead and add implicit def of
-  // sub-registers which are used.
-  // EAX<dead>  = op  AL<imp-def>
-  // That is, EAX def is dead but AL def extends pass it.
-  // Enable this after live interval analysis is fixed to improve codegen!
-  else if (!PhysRegUse[Reg]) {
+  if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+    if (LastPartDef)
+      // The last partial def kills the register.
+      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/, true/*IsKill*/));
+    else
+      // If the last reference is the last def, then it's not used at all.
+      // That is, unless we are currently processing the last reference itself.
+      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+  } else if (!PhysRegUse[Reg]) {
+    // Partial uses. Mark register def dead and add implicit def of
+    // sub-registers which are used.
+    // EAX<dead>  = op  AL<imp-def>
+    // That is, EAX def is dead but AL def extends pass it.
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
-      if (PartUses.count(SubReg)) {
-        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
-                                                              true, true));
-        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
-          PartUses.erase(*SS);
+      if (!PartUses.count(SubReg))
+        continue;
+      bool NeedDef = true;
+      if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+        MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+        if (MO) {
+          NeedDef = false;
+          assert(!MO->isDead());
+        }
       }
+      if (NeedDef)
+        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+                                                 true/*IsDef*/, true/*IsImp*/));
+      LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        PartUses.erase(*SS);
     }
-  }
-  else
+  } else
     LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
   return true;
 }
 
-void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+                                     SmallVector<unsigned, 4> &Defs) {
   // What parts of the register are previously defined?
   SmallSet<unsigned, 32> Live;
   if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
@@ -398,6 +374,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
       // AL =
       // AH =
       //    = AX
+      if (Live.count(SubReg))
+        continue;
       if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
         Live.insert(SubReg);
         for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
@@ -408,68 +386,25 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
 
   // Start from the largest piece, find the last time any part of the register
   // is referenced.
-  if (!HandlePhysRegKill(Reg, MI)) {
-    // Only some of the sub-registers are used.
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-         unsigned SubReg = *SubRegs; ++SubRegs) {
-      if (!Live.count(SubReg))
-        // Skip if this sub-register isn't defined.
-        continue;
-      if (HandlePhysRegKill(SubReg, MI)) {
-        Live.erase(SubReg);
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
-          Live.erase(*SS);
-      }
-    }
-    assert(Live.empty() && "Not all defined registers are killed / dead?");
+  HandlePhysRegKill(Reg, MI);
+  // Only some of the sub-registers are used.
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    if (!Live.count(SubReg))
+      // Skip if this sub-register isn't defined.
+      continue;
+    HandlePhysRegKill(SubReg, MI);
   }
 
-  if (MI) {
-    // Does this extend the live range of a super-register?
-    SmallSet<unsigned, 8> Processed;
-    for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg);
-         unsigned SuperReg = *SuperRegs; ++SuperRegs) {
-      if (Processed.count(SuperReg))
-        continue;
-      MachineInstr *LastRef = PhysRegUse[SuperReg]
-        ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg];
-      if (LastRef && LastRef != MI) {
-        // The larger register is previously defined. Now a smaller part is
-        // being re-defined. Treat it as read/mod/write if there are uses
-        // below.
-        // EAX =
-        // AX  =        EAX<imp-use,kill>, EAX<imp-def>
-        // ...
-        ///    =  EAX
-        if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) {
-          MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/,
-                                                   true/*IsImp*/,true/*IsKill*/));
-          MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/,
-                                                   true/*IsImp*/));
-          PhysRegDef[SuperReg]  = MI;
-          PhysRegUse[SuperReg]  = NULL;
-          Processed.insert(SuperReg);
-          for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
-            PhysRegDef[*SS]  = MI;
-            PhysRegUse[*SS]  = NULL;
-            Processed.insert(*SS);
-          }
-        } else {
-          // Otherwise, the super register is killed.
-          if (HandlePhysRegKill(SuperReg, MI)) {
-            PhysRegDef[SuperReg]  = NULL;
-            PhysRegUse[SuperReg]  = NULL;
-            for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
-              PhysRegDef[*SS]  = NULL;
-              PhysRegUse[*SS]  = NULL;
-              Processed.insert(*SS);
-            }
-          }
-        }
-      }
-    }
+  if (MI)
+    Defs.push_back(Reg);  // Remember this def.
+}
 
-    // Remember this def.
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+                                      SmallVector<unsigned, 4> &Defs) {
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.back();
+    Defs.pop_back();
     PhysRegDef[Reg]  = MI;
     PhysRegUse[Reg]  = NULL;
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
@@ -480,6 +415,21 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
   }
 }
 
+namespace {
+  struct RegSorter {
+    const TargetRegisterInfo *TRI;
+
+    RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { }
+    bool operator()(unsigned A, unsigned B) {
+      if (TRI->isSubRegister(A, B))
+        return true;
+      else if (TRI->isSubRegister(B, A))
+        return false;
+      return A < B;
+    }
+  };
+}
+
 bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   MRI = &mf.getRegInfo();
@@ -512,11 +462,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     MachineBasicBlock *MBB = *DFI;
 
     // Mark live-in registers as live-in.
+    SmallVector<unsigned, 4> Defs;
     for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
            EE = MBB->livein_end(); II != EE; ++II) {
       assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
              "Cannot have a live-in virtual register!");
-      HandlePhysRegDef(*II, 0);
+      HandlePhysRegDef(*II, 0, Defs);
     }
 
     // Loop over all of the instructions, processing them.
@@ -563,8 +514,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegDef(MOReg, MI);
         else if (!ReservedRegisters[MOReg])
-          HandlePhysRegDef(MOReg, MI);
+          HandlePhysRegDef(MOReg, MI, Defs);
       }
+      UpdatePhysRegDefs(MI, Defs);
     }
 
     // Handle any virtual assignments from PHI nodes which might be at the
@@ -603,7 +555,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     // available at the end of the basic block.
     for (unsigned i = 0; i != NumRegs; ++i)
       if (PhysRegDef[i] || PhysRegUse[i])
-        HandlePhysRegDef(i, 0);
+        HandlePhysRegDef(i, 0, Defs);
 
     std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
     std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 14acb71eeb40..8486bb084fe9 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -19,12 +19,14 @@
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -38,6 +40,7 @@ namespace {
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -53,7 +56,8 @@ namespace {
     void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
                           const TargetRegisterInfo &TRI);
     void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
-                          const TargetRegisterInfo &TRI);
+                          const TargetRegisterInfo &TRI,
+                          bool AddIfNotFound = false);
   };
 
   char LowerSubregsInstructionPass::ID = 0;
@@ -85,10 +89,11 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
 void
 LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
                                               unsigned SrcReg,
-                                              const TargetRegisterInfo &TRI) {
+                                              const TargetRegisterInfo &TRI,
+                                              bool AddIfNotFound) {
   for (MachineBasicBlock::iterator MII =
         prior(MachineBasicBlock::iterator(MI)); ; --MII) {
-    if (MII->addRegisterKilled(SrcReg, &TRI))
+    if (MII->addRegisterKilled(SrcReg, &TRI, AddIfNotFound))
       break;
     assert(MII != MI->getParent()->begin() &&
            "copyRegToReg output doesn't reference source register!");
@@ -100,7 +105,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
   MachineFunction &MF = *MBB->getParent();
   const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  
+
   assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
          MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
          MI->getOperand(2).isImm() && "Malformed extract_subreg");
@@ -114,41 +119,41 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
          "Extract supperg source must be a physical register");
   assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
          "Extract destination must be in a physical register");
-         
-  DOUT << "subreg: CONVERTING: " << *MI;
+  assert(SrcReg && "invalid subregister index for register");
+
+  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
 
   if (SrcReg == DstReg) {
-    // No need to insert an identify copy instruction.
-    DOUT << "subreg: eliminated!";
-    // Find the kill of the destination register's live range, and insert
-    // a kill of the source register at that point.
-    if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead())
-      for (MachineBasicBlock::iterator MII =
-             next(MachineBasicBlock::iterator(MI));
-           MII != MBB->end(); ++MII)
-        if (MII->killsRegister(DstReg, &TRI)) {
-          MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true);
-          break;
-        }
+    // No need to insert an identity copy instruction.
+    if (MI->getOperand(1).isKill()) {
+      // We must make sure the super-register gets killed. Replace the
+      // instruction with KILL.
+      MI->setDesc(TII.get(TargetInstrInfo::KILL));
+      MI->RemoveOperand(2);     // SubIdx
+      DEBUG(errs() << "subreg: replace by: " << *MI);
+      return true;
+    }
+
+    DEBUG(errs() << "subreg: eliminated!");
   } else {
     // Insert copy
-    const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg);
-    assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) &&
-            "Extract subreg and Dst must be of same register class");
-    TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC);
+    const TargetRegisterClass *TRCS = TRI.getPhysicalRegisterRegClass(DstReg);
+    const TargetRegisterClass *TRCD = TRI.getPhysicalRegisterRegClass(SrcReg);
+    bool Emitted = TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
+    (void)Emitted;
+    assert(Emitted && "Subreg and Dst must be of compatible register class");
     // Transfer the kill/dead flags, if needed.
     if (MI->getOperand(0).isDead())
       TransferDeadFlag(MI, DstReg, TRI);
     if (MI->getOperand(1).isKill())
-      TransferKillFlag(MI, SrcReg, TRI);
-
-#ifndef NDEBUG
-    MachineBasicBlock::iterator dMI = MI;
-    DOUT << "subreg: " << *(--dMI);
-#endif
+      TransferKillFlag(MI, SuperReg, TRI, true);
+    DEBUG({
+        MachineBasicBlock::iterator dMI = MI;
+        errs() << "subreg: " << *(--dMI);
+      });
   }
 
-  DOUT << "\n";
+  DEBUG(errs() << '\n');
   MBB->erase(MI);
   return true;
 }
@@ -176,7 +181,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DOUT << "subreg: CONVERTING: " << *MI;
+  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg && InsSIdx == 0) {
     // No need to insert an identify copy instruction.
@@ -185,7 +190,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
     // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
     // The first def is defining RAX, not EAX so the top bits were not
     // zero extended.
-    DOUT << "subreg: eliminated!";
+    DEBUG(errs() << "subreg: eliminated!");
   } else {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
@@ -196,14 +201,13 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
       TransferDeadFlag(MI, DstSubReg, TRI);
     if (MI->getOperand(2).isKill())
       TransferKillFlag(MI, InsReg, TRI);
-
-#ifndef NDEBUG
-    MachineBasicBlock::iterator dMI = MI;
-    DOUT << "subreg: " << *(--dMI);
-#endif
+    DEBUG({
+        MachineBasicBlock::iterator dMI = MI;
+        errs() << "subreg: " << *(--dMI);
+      });
   }
 
-  DOUT << "\n";
+  DEBUG(errs() << '\n');
   MBB->erase(MI);
   return true;                    
 }
@@ -228,49 +232,79 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
   assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
   assert(SubIdx != 0 && "Invalid index for insert_subreg");
   unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
-  
+  assert(DstSubReg && "invalid subregister index for register");
   assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
          "Insert superreg source must be in a physical register");
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DOUT << "subreg: CONVERTING: " << *MI;
+  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg) {
-    // No need to insert an identify copy instruction.
-    DOUT << "subreg: eliminated!";
+    // No need to insert an identity copy instruction. If the SrcReg was
+    // <undef>, we need to make sure it is alive by inserting a KILL
+    if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
+      MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+                                TII.get(TargetInstrInfo::KILL), DstReg);
+      if (MI->getOperand(2).isUndef())
+        MIB.addReg(InsReg, RegState::Undef);
+      else
+        MIB.addReg(InsReg, RegState::Kill);
+    } else {
+      DEBUG(errs() << "subreg: eliminated!\n");
+      MBB->erase(MI);
+      return true;
+    }
   } else {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
     const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
-    TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    if (MI->getOperand(2).isUndef())
+      // If the source register being inserted is undef, then this becomes a
+      // KILL.
+      BuildMI(*MBB, MI, MI->getDebugLoc(),
+              TII.get(TargetInstrInfo::KILL), DstSubReg);
+    else
+      TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    MachineBasicBlock::iterator CopyMI = MI;
+    --CopyMI;
+
+    // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg.
+    if (!MI->getOperand(1).isUndef())
+      CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true));
+
     // Transfer the kill/dead flags, if needed.
-    if (MI->getOperand(0).isDead())
+    if (MI->getOperand(0).isDead()) {
       TransferDeadFlag(MI, DstSubReg, TRI);
-    if (MI->getOperand(1).isKill())
-      TransferKillFlag(MI, InsReg, TRI);
+    } else {
+      // Make sure the full DstReg is live after this replacement.
+      CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true));
+    }
 
-#ifndef NDEBUG
-    MachineBasicBlock::iterator dMI = MI;
-    DOUT << "subreg: " << *(--dMI);
-#endif
+    // Make sure the inserted register gets killed
+    if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef())
+      TransferKillFlag(MI, InsReg, TRI);
   }
 
-  DOUT << "\n";
+  DEBUG({
+      MachineBasicBlock::iterator dMI = MI;
+      errs() << "subreg: " << *(--dMI) << "\n";
+    });
+
   MBB->erase(MI);
-  return true;                    
+  return true;
 }
 
 /// runOnMachineFunction - Reduce subregister inserts and extracts to register
 /// copies.
 ///
 bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "Machine Function\n";
-  
-  bool MadeChange = false;
+  DEBUG(errs() << "Machine Function\n"  
+               << "********** LOWERING SUBREG INSTRS **********\n"
+               << "********** Function: " 
+               << MF.getFunction()->getName() << '\n');
 
-  DOUT << "********** LOWERING SUBREG INSTRS **********\n";
-  DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+  bool MadeChange = false;
 
   for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
        mbbi != mbbe; ++mbbi) {
diff --git a/lib/CodeGen/MachO.h b/lib/CodeGen/MachO.h
index bd9bd61e9ede..f2b40fe58e21 100644
--- a/lib/CodeGen/MachO.h
+++ b/lib/CodeGen/MachO.h
@@ -14,17 +14,15 @@
 #ifndef MACHO_H
 #define MACHO_H
 
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include <string>
 #include <vector>
 
 namespace llvm {
 
-typedef std::vector<unsigned char> DataBuffer;
-  
+class GlobalValue;
+class MCAsmInfo;
+
 /// MachOSym - This struct contains information about each symbol that is
 /// added to logical symbol table for the module.  This is eventually
 /// turned into a real symbol table in the file.
@@ -70,7 +68,7 @@ struct MachOSym {
   };
   
   MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
-           const TargetAsmInfo *TAI);
+           const MCAsmInfo *MAI);
 
   struct SymCmp {
     // FIXME: this does not appear to be sorting 'f' after 'F'
@@ -110,7 +108,7 @@ struct MachOHeader {
   
   /// HeaderData - The actual data for the header which we are building
   /// up for emission to the file.
-  DataBuffer HeaderData;
+  std::vector<unsigned char> HeaderData;
 
   // Constants for the filetype field
   // see <mach-o/loader.h> for additional info on the various types
@@ -180,8 +178,8 @@ struct MachOHeader {
   };
 
   MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
-                  reserved(0) { }
-  
+                  reserved(0) {}
+
   /// cmdSize - This routine returns the size of the MachOSection as written
   /// to disk, depending on whether the destination is a 64 bit Mach-O file.
   unsigned cmdSize(bool is64Bit) const {
@@ -203,7 +201,7 @@ struct MachOHeader {
   }
 
 }; // end struct MachOHeader
-    
+
 /// MachOSegment - This struct contains the necessary information to
 /// emit the load commands for each section in the file.
 struct MachOSegment {
@@ -245,13 +243,13 @@ struct MachOSegment {
          SEG_VM_PROT_EXECUTE  = VM_PROT_EXECUTE,
          SEG_VM_PROT_ALL      = VM_PROT_ALL
   };
-  
+
   // Constants for the cmd field
   // see <mach-o/loader.h>
   enum { LC_SEGMENT    = 0x01,  // segment of this file to be mapped
          LC_SEGMENT_64 = 0x19   // 64-bit segment of this file to be mapped
   };
-  
+
   /// cmdSize - This routine returns the size of the MachOSection as written
   /// to disk, depending on whether the destination is a 64 bit Mach-O file.
   unsigned cmdSize(bool is64Bit) const {
@@ -272,11 +270,10 @@ struct MachOSegment {
 /// turned into the SectionCommand in the load command for a particlar
 /// segment.
 
-struct MachOSection { 
+struct MachOSection : public BinaryObject { 
   std::string  sectname; // name of this section, 
   std::string  segname;  // segment this section goes in
   uint64_t  addr;        // memory address of this section
-  uint64_t  size;        // size in bytes of this section
   uint32_t  offset;      // file offset of this section
   uint32_t  align;       // section alignment (power of 2)
   uint32_t  reloff;      // file offset of relocation entries
@@ -285,24 +282,15 @@ struct MachOSection {
   uint32_t  reserved1;   // reserved (for offset or index)
   uint32_t  reserved2;   // reserved (for count or sizeof)
   uint32_t  reserved3;   // reserved (64 bit only)
-  
+
   /// A unique number for this section, which will be used to match symbols
   /// to the correct section.
   uint32_t Index;
-  
-  /// SectionData - The actual data for this section which we are building
-  /// up for emission to the file.
-  DataBuffer SectionData;
 
   /// RelocBuffer - A buffer to hold the mach-o relocations before we write
   /// them out at the appropriate location in the file.
-  DataBuffer RelocBuffer;
-  
-  /// Relocations - The relocations that we have encountered so far in this 
-  /// section that we will need to convert to MachORelocation entries when
-  /// the file is written.
-  std::vector<MachineRelocation> Relocations;
-  
+  std::vector<unsigned char> RelocBuffer;
+
   // Constants for the section types (low 8 bits of flags field)
   // see <mach-o/loader.h>
   enum { S_REGULAR = 0,
@@ -374,48 +362,49 @@ struct MachOSection {
   }
 
   MachOSection(const std::string &seg, const std::string &sect)
-    : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
-      reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
+    : BinaryObject(), sectname(sect), segname(seg), addr(0), offset(0),
+      align(2), reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
       reserved3(0) { }
 
 }; // end struct MachOSection
 
-    /// MachOSymTab - This struct contains information about the offsets and 
-    /// size of symbol table information.
-    /// segment.
-    struct MachODySymTab {
-      uint32_t cmd;             // LC_DYSYMTAB
-      uint32_t cmdsize;         // sizeof( MachODySymTab )
-      uint32_t ilocalsym;       // index to local symbols
-      uint32_t nlocalsym;       // number of local symbols
-      uint32_t iextdefsym;      // index to externally defined symbols
-      uint32_t nextdefsym;      // number of externally defined symbols
-      uint32_t iundefsym;       // index to undefined symbols
-      uint32_t nundefsym;       // number of undefined symbols
-      uint32_t tocoff;          // file offset to table of contents
-      uint32_t ntoc;            // number of entries in table of contents
-      uint32_t modtaboff;       // file offset to module table
-      uint32_t nmodtab;         // number of module table entries
-      uint32_t extrefsymoff;    // offset to referenced symbol table
-      uint32_t nextrefsyms;     // number of referenced symbol table entries
-      uint32_t indirectsymoff;  // file offset to the indirect symbol table
-      uint32_t nindirectsyms;   // number of indirect symbol table entries
-      uint32_t extreloff;       // offset to external relocation entries
-      uint32_t nextrel;         // number of external relocation entries
-      uint32_t locreloff;       // offset to local relocation entries
-      uint32_t nlocrel;         // number of local relocation entries
-
-      // Constants for the cmd field
-      // see <mach-o/loader.h>
-      enum { LC_DYSYMTAB = 0x0B  // dynamic link-edit symbol table info
-      };
-      
-      MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
-        ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
-        iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
-        nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
-        nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
-    };
+/// MachOSymTab - This struct contains information about the offsets and 
+/// size of symbol table information.
+/// segment.
+struct MachODySymTab {
+  uint32_t cmd;             // LC_DYSYMTAB
+  uint32_t cmdsize;         // sizeof(MachODySymTab)
+  uint32_t ilocalsym;       // index to local symbols
+  uint32_t nlocalsym;       // number of local symbols
+  uint32_t iextdefsym;      // index to externally defined symbols
+  uint32_t nextdefsym;      // number of externally defined symbols
+  uint32_t iundefsym;       // index to undefined symbols
+  uint32_t nundefsym;       // number of undefined symbols
+  uint32_t tocoff;          // file offset to table of contents
+  uint32_t ntoc;            // number of entries in table of contents
+  uint32_t modtaboff;       // file offset to module table
+  uint32_t nmodtab;         // number of module table entries
+  uint32_t extrefsymoff;    // offset to referenced symbol table
+  uint32_t nextrefsyms;     // number of referenced symbol table entries
+  uint32_t indirectsymoff;  // file offset to the indirect symbol table
+  uint32_t nindirectsyms;   // number of indirect symbol table entries
+  uint32_t extreloff;       // offset to external relocation entries
+  uint32_t nextrel;         // number of external relocation entries
+  uint32_t locreloff;       // offset to local relocation entries
+  uint32_t nlocrel;         // number of local relocation entries
+
+  // Constants for the cmd field
+  // see <mach-o/loader.h>
+  enum { LC_DYSYMTAB = 0x0B  // dynamic link-edit symbol table info
+  };
+  
+  MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
+    ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
+    iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
+    nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
+    nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) {}
+
+}; // end struct MachODySymTab
 
 } // end namespace llvm
 
diff --git a/lib/CodeGen/MachOCodeEmitter.cpp b/lib/CodeGen/MachOCodeEmitter.cpp
index 02b02de9ec36..13184772cdb4 100644
--- a/lib/CodeGen/MachOCodeEmitter.cpp
+++ b/lib/CodeGen/MachOCodeEmitter.cpp
@@ -7,22 +7,37 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MachO.h"
+#include "MachOWriter.h"
 #include "MachOCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/OutputBuffer.h"
+#include <vector>
 
 //===----------------------------------------------------------------------===//
 //                       MachOCodeEmitter Implementation
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
-    
+
+MachOCodeEmitter::MachOCodeEmitter(MachOWriter &mow, MachOSection &mos) :
+      ObjectCodeEmitter(&mos), MOW(mow), TM(MOW.TM) {
+  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+  isLittleEndian = TM.getTargetData()->isLittleEndian();
+  MAI = TM.getMCAsmInfo();
+}
+
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 
@@ -39,28 +54,18 @@ void MachOCodeEmitter::startFunction(MachineFunction &MF) {
   // Get the Mach-O Section that this function belongs in.
   MachOSection *MOS = MOW.getTextSection();
   
-  // FIXME: better memory management
-  MOS->SectionData.reserve(4096);
-  BufferBegin = &MOS->SectionData[0];
-  BufferEnd = BufferBegin + MOS->SectionData.capacity();
-
   // Upgrade the section alignment if required.
   if (MOS->align < Align) MOS->align = Align;
 
-  // Round the size up to the correct alignment for starting the new function.
-  if ((MOS->size & ((1 << Align) - 1)) != 0) {
-    MOS->size += (1 << Align);
-    MOS->size &= ~((1 << Align) - 1);
-  }
+  MOS->emitAlignment(Align);
+
+  // Create symbol for function entry
+  const GlobalValue *FuncV = MF.getFunction();
+  MachOSym FnSym(FuncV, MOW.Mang->getMangledName(FuncV), MOS->Index, MAI);
+  FnSym.n_value = getCurrentPCOffset();
 
-  // FIXME: Using MOS->size directly here instead of calculating it from the
-  // output buffer size (impossible because the code emitter deals only in raw
-  // bytes) forces us to manually synchronize size and write padding zero bytes
-  // to the output buffer for all non-text sections.  For text sections, we do
-  // not synchonize the output buffer, and we just blow up if anyone tries to
-  // write non-code to it.  An assert should probably be added to
-  // AddSymbolToSection to prevent calling it on the text section.
-  CurBufferPtr = BufferBegin + MOS->size;
+  // add it to the symtab.
+  MOW.SymbolTable.push_back(FnSym);
 }
 
 /// finishFunction - This callback is invoked after the function is completely
@@ -71,15 +76,6 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
   // Get the Mach-O Section that this function belongs in.
   MachOSection *MOS = MOW.getTextSection();
 
-  // Get a symbol for the function to add to the symbol table
-  // FIXME: it seems like we should call something like AddSymbolToSection
-  // in startFunction rather than changing the section size and symbol n_value
-  // here.
-  const GlobalValue *FuncV = MF.getFunction();
-  MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TAI);
-  FnSym.n_value = MOS->size;
-  MOS->size = CurBufferPtr - BufferBegin;
-  
   // Emit constant pool to appropriate section(s)
   emitConstantPool(MF.getConstantPool());
 
@@ -110,14 +106,11 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
       // FIXME: This should be a set or something that uniques
       MOW.PendingGlobals.push_back(MR.getGlobalValue());
     } else {
-      assert(0 && "Unhandled relocation type");
+      llvm_unreachable("Unhandled relocation type");
     }
-    MOS->Relocations.push_back(MR);
+    MOS->addRelocation(MR);
   }
   Relocations.clear();
-  
-  // Finally, add it to the symtab.
-  MOW.SymbolTable.push_back(FnSym);
 
   // Clear per-function data structures.
   CPLocations.clear();
@@ -151,13 +144,10 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
     unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
 
     MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
-    OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+    OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian);
 
-    CPLocations.push_back(Sec->SectionData.size());
+    CPLocations.push_back(Sec->size());
     CPSections.push_back(Sec->Index);
-    
-    // FIXME: remove when we have unified size + output buffer
-    Sec->size += Size;
 
     // Allocate space in the section for the global.
     // FIXME: need alignment?
@@ -165,14 +155,13 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
     for (unsigned j = 0; j < Size; ++j)
       SecDataOut.outbyte(0);
 
-    MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
-                TM.getTargetData(), Sec->Relocations);
+    MachOWriter::InitMem(CP[i].Val.ConstVal, CPLocations[i],
+                         TM.getTargetData(), Sec);
   }
 }
 
 /// emitJumpTables - Emit all the jump tables for a given jump table info
 /// record to the appropriate section.
-
 void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
@@ -183,24 +172,21 @@ void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
 
   MachOSection *Sec = MOW.getJumpTableSection();
   unsigned TextSecIndex = MOW.getTextSection()->Index;
-  OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+  OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian);
 
   for (unsigned i = 0, e = JT.size(); i != e; ++i) {
     // For each jump table, record its offset from the start of the section,
     // reserve space for the relocations to the MBBs, and add the relocations.
     const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-    JTLocations.push_back(Sec->SectionData.size());
+    JTLocations.push_back(Sec->size());
     for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
-      MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
-                                               MBBs[mi]));
+      MachineRelocation MR(MOW.GetJTRelocation(Sec->size(), MBBs[mi]));
       MR.setResultPointer((void *)JTLocations[i]);
       MR.setConstantVal(TextSecIndex);
-      Sec->Relocations.push_back(MR);
+      Sec->addRelocation(MR);
       SecDataOut.outaddr(0);
     }
   }
-  // FIXME: remove when we have unified size + output buffer
-  Sec->size = Sec->SectionData.size();
 }
 
 } // end namespace llvm
diff --git a/lib/CodeGen/MachOCodeEmitter.h b/lib/CodeGen/MachOCodeEmitter.h
index 0a6e4e4d19ec..475244646bd1 100644
--- a/lib/CodeGen/MachOCodeEmitter.h
+++ b/lib/CodeGen/MachOCodeEmitter.h
@@ -10,16 +10,17 @@
 #ifndef MACHOCODEEMITTER_H
 #define MACHOCODEEMITTER_H
 
-#include "MachOWriter.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include <vector>
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <map>
 
 namespace llvm {
 
+class MachOWriter;
+
 /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code 
 /// for functions to the Mach-O file.
 
-class MachOCodeEmitter : public MachineCodeEmitter {
+class MachOCodeEmitter : public ObjectCodeEmitter {
   MachOWriter &MOW;
 
   /// Target machine description.
@@ -29,36 +30,16 @@ class MachOCodeEmitter : public MachineCodeEmitter {
   /// machine directly, indicating what header values and flags to set.
   bool is64Bit, isLittleEndian;
 
-  const TargetAsmInfo *TAI;
+  const MCAsmInfo *MAI;
 
   /// Relocations - These are the relocations that the function needs, as
   /// emitted.
   std::vector<MachineRelocation> Relocations;
-  
-  /// CPLocations - This is a map of constant pool indices to offsets from the
-  /// start of the section for that constant pool index.
-  std::vector<uintptr_t> CPLocations;
-
-  /// CPSections - This is a map of constant pool indices to the MachOSection
-  /// containing the constant pool entry for that index.
-  std::vector<unsigned> CPSections;
-
-  /// JTLocations - This is a map of jump table indices to offsets from the
-  /// start of the section for that jump table index.
-  std::vector<uintptr_t> JTLocations;
-
-  /// MBBLocations - This vector is a mapping from MBB ID's to their address.
-  /// It is filled in by the StartMachineBasicBlock callback and queried by
-  /// the getMachineBasicBlockAddress callback.
-  std::vector<uintptr_t> MBBLocations;
-  
+
+  std::map<uint64_t, uintptr_t> Labels;
+
 public:
-  MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM)
-  {
-    is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
-    isLittleEndian = TM.getTargetData()->isLittleEndian();
-    TAI = TM.getTargetAsmInfo();  
-  }
+  MachOCodeEmitter(MachOWriter &mow, MachOSection &mos);
 
   virtual void startFunction(MachineFunction &MF);
   virtual bool finishFunction(MachineFunction &MF);
@@ -66,61 +47,20 @@ public:
   virtual void addRelocation(const MachineRelocation &MR) {
     Relocations.push_back(MR);
   }
-  
+
   void emitConstantPool(MachineConstantPool *MCP);
   void emitJumpTables(MachineJumpTableInfo *MJTI);
-  
-  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-    assert(CPLocations.size() > Index && "CP not emitted!");
-    return CPLocations[Index];
-  }
-  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-    assert(JTLocations.size() > Index && "JT not emitted!");
-    return JTLocations[Index];
-  }
-
-  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
-    if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-      MBBLocations.resize((MBB->getNumber()+1)*2);
-    MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-  }
 
-  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-    assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
-           MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-    return MBBLocations[MBB->getNumber()];
+  virtual void emitLabel(uint64_t LabelID) {
+    Labels[LabelID] = getCurrentPCOffset();
   }
 
   virtual uintptr_t getLabelAddress(uint64_t Label) const {
-    assert(0 && "get Label not implemented");
-    abort();
-    return 0;
-  }
-
-  virtual void emitLabel(uint64_t LabelID) {
-    assert(0 && "emit Label not implemented");
-    abort();
+    return Labels.find(Label)->second;
   }
 
   virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
 
-  /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
-  virtual void startGVStub(const GlobalValue* F, unsigned StubSize,
-                           unsigned Alignment = 1) {
-    assert(0 && "JIT specific function called!");
-    abort();
-  }
-  virtual void startGVStub(const GlobalValue* F, void *Buffer, 
-                           unsigned StubSize) {
-    assert(0 && "JIT specific function called!");
-    abort();
-  }
-  virtual void *finishGVStub(const GlobalValue* F) {
-    assert(0 && "JIT specific function called!");
-    abort();
-    return 0;
-  }
-
 }; // end class MachOCodeEmitter
 
 } // end namespace llvm
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
index 163df6994aa6..73b15edba37f 100644
--- a/lib/CodeGen/MachOWriter.cpp
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -22,36 +22,32 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MachO.h"
 #include "MachOWriter.h"
 #include "MachOCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/FileWriters.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/OutputBuffer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cstring>
 
 namespace llvm {
 
 /// AddMachOWriter - Concrete function to add the Mach-O writer to the function
 /// pass manager.
-MachineCodeEmitter *AddMachOWriter(PassManagerBase &PM,
+ObjectCodeEmitter *AddMachOWriter(PassManagerBase &PM,
                                          raw_ostream &O,
                                          TargetMachine &TM) {
   MachOWriter *MOW = new MachOWriter(O, TM);
   PM.add(MOW);
-  return &MOW->getMachineCodeEmitter();
+  return MOW->getObjectCodeEmitter();
 }
 
 //===----------------------------------------------------------------------===//
@@ -65,15 +61,14 @@ MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm)
   is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
   isLittleEndian = TM.getTargetData()->isLittleEndian();
 
-  TAI = TM.getTargetAsmInfo();
+  MAI = TM.getMCAsmInfo();
 
   // Create the machine code emitter object for this target.
-
-  MCE = new MachOCodeEmitter(*this);
+  MachOCE = new MachOCodeEmitter(*this, *getTextSection(true));
 }
 
 MachOWriter::~MachOWriter() {
-  delete MCE;
+  delete MachOCE;
 }
 
 bool MachOWriter::doInitialization(Module &M) {
@@ -97,9 +92,9 @@ bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
 /// the Mach-O file to 'O'.
 bool MachOWriter::doFinalization(Module &M) {
   // FIXME: we don't handle debug info yet, we should probably do that.
-
   // Okay, the.text section has been completed, build the .data, .bss, and
   // "common" sections next.
+
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I)
     EmitGlobal(I);
@@ -125,6 +120,89 @@ bool MachOWriter::doFinalization(Module &M) {
   return false;
 }
 
+// getConstSection - Get constant section for Constant 'C'
+MachOSection *MachOWriter::getConstSection(Constant *C) {
+  const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+  if (CVA && CVA->isCString())
+    return getSection("__TEXT", "__cstring", 
+                      MachOSection::S_CSTRING_LITERALS);
+
+  const Type *Ty = C->getType();
+  if (Ty->isPrimitiveType() || Ty->isInteger()) {
+    unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+    switch(Size) {
+    default: break; // Fall through to __TEXT,__const
+    case 4:
+      return getSection("__TEXT", "__literal4",
+                        MachOSection::S_4BYTE_LITERALS);
+    case 8:
+      return getSection("__TEXT", "__literal8",
+                        MachOSection::S_8BYTE_LITERALS);
+    case 16:
+      return getSection("__TEXT", "__literal16",
+                        MachOSection::S_16BYTE_LITERALS);
+    }
+  }
+  return getSection("__TEXT", "__const");
+}
+
+// getJumpTableSection - Select the Jump Table section
+MachOSection *MachOWriter::getJumpTableSection() {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return getTextSection(false);
+  else
+    return getSection("__TEXT", "__const");
+}
+
+// getSection - Return the section with the specified name, creating a new
+// section if one does not already exist.
+MachOSection *MachOWriter::getSection(const std::string &seg,
+                                      const std::string &sect,
+                                      unsigned Flags /* = 0 */ ) {
+  MachOSection *MOS = SectionLookup[seg+sect];
+  if (MOS) return MOS;
+
+  MOS = new MachOSection(seg, sect);
+  SectionList.push_back(MOS);
+  MOS->Index = SectionList.size();
+  MOS->flags = MachOSection::S_REGULAR | Flags;
+  SectionLookup[seg+sect] = MOS;
+  return MOS;
+}
+
+// getTextSection - Return text section with different flags for code/data
+MachOSection *MachOWriter::getTextSection(bool isCode /* = true */ ) {
+  if (isCode)
+    return getSection("__TEXT", "__text",
+                      MachOSection::S_ATTR_PURE_INSTRUCTIONS |
+                      MachOSection::S_ATTR_SOME_INSTRUCTIONS);
+  else
+    return getSection("__TEXT", "__text");
+}
+
+MachOSection *MachOWriter::getBSSSection() {
+  return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
+}
+
+// GetJTRelocation - Get a relocation a new BB relocation based
+// on target information.
+MachineRelocation MachOWriter::GetJTRelocation(unsigned Offset,
+                                               MachineBasicBlock *MBB) const {
+  return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
+}
+
+// GetTargetRelocation - Returns the number of relocations.
+unsigned MachOWriter::GetTargetRelocation(MachineRelocation &MR,
+                             unsigned FromIdx, unsigned ToAddr,
+                             unsigned ToIndex, OutputBuffer &RelocOut,
+                             OutputBuffer &SecOut, bool Scattered,
+                             bool Extern) {
+  return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
+                                                      ToIndex, RelocOut,
+                                                      SecOut, Scattered,
+                                                      Extern);
+}
+
 void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
   const Type *Ty = GV->getType()->getElementType();
   unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
@@ -133,37 +211,31 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
   // Reserve space in the .bss section for this symbol while maintaining the
   // desired section alignment, which must be at least as much as required by
   // this symbol.
-  OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+  OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian);
 
   if (Align) {
-    uint64_t OrigSize = Sec->size;
     Align = Log2_32(Align);
     Sec->align = std::max(unsigned(Sec->align), Align);
-    Sec->size = (Sec->size + Align - 1) & ~(Align-1);
 
-    // Add alignment padding to buffer as well.
-    // FIXME: remove when we have unified size + output buffer
-    unsigned AlignedSize = Sec->size - OrigSize;
-    for (unsigned i = 0; i < AlignedSize; ++i)
-      SecDataOut.outbyte(0);
+    Sec->emitAlignment(Sec->align);
   }
   // Globals without external linkage apparently do not go in the symbol table.
   if (!GV->hasLocalLinkage()) {
-    MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TAI);
-    Sym.n_value = Sec->size;
+    MachOSym Sym(GV, Mang->getMangledName(GV), Sec->Index, MAI);
+    Sym.n_value = Sec->size();
     SymbolTable.push_back(Sym);
   }
 
   // Record the offset of the symbol, and then allocate space for it.
   // FIXME: remove when we have unified size + output buffer
-  Sec->size += Size;
 
   // Now that we know what section the GlovalVariable is going to be emitted
   // into, update our mappings.
   // FIXME: We may also need to update this when outputting non-GlobalVariable
   // GlobalValues such as functions.
+
   GVSection[GV] = Sec;
-  GVOffset[GV] = Sec->SectionData.size();
+  GVOffset[GV] = Sec->size();
 
   // Allocate space in the section for the global.
   for (unsigned i = 0; i < Size; ++i)
@@ -183,8 +255,8 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
     // merged with other symbols.
     if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
         GV->hasCommonLinkage()) {
-      MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), 
-                              MachOSym::NO_SECT, TAI);
+      MachOSym ExtOrCommonSym(GV, Mang->getMangledName(GV),
+                              MachOSym::NO_SECT, MAI);
       // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
       // bytes of the symbol.
       ExtOrCommonSym.n_value = Size;
@@ -205,8 +277,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
   MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) :
                                          getDataSection();
   AddSymbolToSection(Sec, GV);
-  InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
-          TM.getTargetData(), Sec->Relocations);
+  InitMem(GV->getInitializer(), GVOffset[GV], TM.getTargetData(), Sec);
 }
 
 
@@ -214,6 +285,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
 void MachOWriter::EmitHeaderAndLoadCommands() {
   // Step #0: Fill in the segment load command size, since we need it to figure
   //          out the rest of the header fields
+
   MachOSegment SEG("", is64Bit);
   SEG.nsects  = SectionList.size();
   SEG.cmdsize = SEG.cmdSize(is64Bit) +
@@ -231,7 +303,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
 
   // Step #3: write the header to the file
   // Local alias to shortenify coming code.
-  DataBuffer &FH = Header.HeaderData;
+  std::vector<unsigned char> &FH = Header.HeaderData;
   OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
 
   FHOut.outword(Header.magic);
@@ -247,7 +319,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
   // Step #4: Finish filling in the segment load command and write it out
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I)
-    SEG.filesize += (*I)->size;
+    SEG.filesize += (*I)->size();
 
   SEG.vmsize = SEG.filesize;
   SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
@@ -271,9 +343,8 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
     MachOSection *MOS = *I;
     MOS->addr = currentAddr;
     MOS->offset = currentAddr + SEG.fileoff;
-
     // FIXME: do we need to do something with alignment here?
-    currentAddr += MOS->size;
+    currentAddr += MOS->size();
   }
 
   // Step #6: Emit the symbol table to temporary buffers, so that we know the
@@ -288,6 +359,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I) {
     MachOSection *MOS = *I;
+
     // Convert the relocations to target-specific relocations, and fill in the
     // relocation offset for this section.
     CalculateRelocations(*MOS);
@@ -298,7 +370,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
     FHOut.outstring(MOS->sectname, 16);
     FHOut.outstring(MOS->segname, 16);
     FHOut.outaddr(MOS->addr);
-    FHOut.outaddr(MOS->size);
+    FHOut.outaddr(MOS->size());
     FHOut.outword(MOS->offset);
     FHOut.outword(MOS->align);
     FHOut.outword(MOS->reloff);
@@ -351,24 +423,26 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
 
 /// EmitSections - Now that we have constructed the file header and load
 /// commands, emit the data for each section to the file.
-
 void MachOWriter::EmitSections() {
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I)
     // Emit the contents of each section
-    O.write((char*)&(*I)->SectionData[0], (*I)->size);
+    if ((*I)->size())
+      O.write((char*)&(*I)->getData()[0], (*I)->size());
 }
+
+/// EmitRelocations - emit relocation data from buffer.
 void MachOWriter::EmitRelocations() {
   for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I)
     // Emit the relocation entry data for each section.
-    O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
+    if ((*I)->RelocBuffer.size())
+      O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
 }
 
 /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
 /// each a string table index so that they appear in the correct order in the
 /// output file.
-
 void MachOWriter::BufferSymbolAndStringTable() {
   // The order of the symbol table is:
   // 1. local symbols
@@ -377,11 +451,10 @@ void MachOWriter::BufferSymbolAndStringTable() {
 
   // Before sorting the symbols, check the PendingGlobals for any undefined
   // globals that need to be put in the symbol table.
-
   for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
          E = PendingGlobals.end(); I != E; ++I) {
     if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
-      MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TAI);
+      MachOSym UndfSym(*I, Mang->getMangledName(*I), MachOSym::NO_SECT, MAI);
       SymbolTable.push_back(UndfSym);
       GVOffset[*I] = -1;
     }
@@ -389,19 +462,16 @@ void MachOWriter::BufferSymbolAndStringTable() {
 
   // Sort the symbols by name, so that when we partition the symbols by scope
   // of definition, we won't have to sort by name within each partition.
-
   std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSym::SymCmp());
 
   // Parition the symbol table entries so that all local symbols come before
   // all symbols with external linkage. { 1 | 2 3 }
-
   std::partition(SymbolTable.begin(), SymbolTable.end(),
                  MachOSym::PartitionByLocal);
 
   // Advance iterator to beginning of external symbols and partition so that
   // all external symbols defined in this module come before all external
   // symbols defined elsewhere. { 1 | 2 | 3 }
-
   for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
          E = SymbolTable.end(); I != E; ++I) {
     if (!MachOSym::PartitionByLocal(*I)) {
@@ -413,7 +483,6 @@ void MachOWriter::BufferSymbolAndStringTable() {
   // Calculate the starting index for each of the local, extern defined, and
   // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
   // load command.
-
   for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
          E = SymbolTable.end(); I != E; ++I) {
     if (MachOSym::PartitionByLocal(*I)) {
@@ -430,7 +499,6 @@ void MachOWriter::BufferSymbolAndStringTable() {
 
   // Write out a leading zero byte when emitting string table, for n_strx == 0
   // which means an empty string.
-
   OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
   StrTOut.outbyte(0);
 
@@ -439,7 +507,6 @@ void MachOWriter::BufferSymbolAndStringTable() {
   // 2. strings for local symbols
   // Since this is the opposite order from the symbol table, which we have just
   // sorted, we can walk the symbol table backwards to output the string table.
-
   for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
         E = SymbolTable.rend(); I != E; ++I) {
     if (I->GVName == "") {
@@ -478,24 +545,22 @@ void MachOWriter::BufferSymbolAndStringTable() {
 /// and the offset into that section.  From this information, create the
 /// appropriate target-specific MachORelocation type and add buffer it to be
 /// written out after we are finished writing out sections.
-
 void MachOWriter::CalculateRelocations(MachOSection &MOS) {
-  for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
-    MachineRelocation &MR = MOS.Relocations[i];
+  std::vector<MachineRelocation> Relocations =  MOS.getRelocations();
+  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+    MachineRelocation &MR = Relocations[i];
     unsigned TargetSection = MR.getConstantVal();
     unsigned TargetAddr = 0;
     unsigned TargetIndex = 0;
 
     // This is a scattered relocation entry if it points to a global value with
     // a non-zero offset.
-
     bool Scattered = false;
     bool Extern = false;
 
     // Since we may not have seen the GlobalValue we were interested in yet at
     // the time we emitted the relocation for it, fix it up now so that it
     // points to the offset into the correct section.
-
     if (MR.isGlobalValue()) {
       GlobalValue *GV = MR.getGlobalValue();
       MachOSection *MOSPtr = GVSection[GV];
@@ -503,7 +568,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
 
       // If we have never seen the global before, it must be to a symbol
       // defined in another module (N_UNDF).
-
       if (!MOSPtr) {
         // FIXME: need to append stub suffix
         Extern = true;
@@ -518,7 +582,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
 
     // If the symbol is locally defined, pass in the address of the section and
     // the section index to the code which will generate the target relocation.
-
     if (!Extern) {
         MachOSection &To = *SectionList[TargetSection - 1];
         TargetAddr = To.addr;
@@ -526,7 +589,7 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
     }
 
     OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
-    OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
+    OutputBuffer SecOut(MOS.getData(), is64Bit, isLittleEndian);
 
     MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
                                       RelocOut, SecOut, Scattered, Extern);
@@ -535,12 +598,11 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
 
 // InitMem - Write the value of a Constant to the specified memory location,
 // converting it into bytes and relocations.
-
-void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
-                          const TargetData *TD,
-                          std::vector<MachineRelocation> &MRs) {
+void MachOWriter::InitMem(const Constant *C, uintptr_t Offset,
+                          const TargetData *TD, MachOSection* mos) {
   typedef std::pair<const Constant*, intptr_t> CPair;
   std::vector<CPair> WorkList;
+  uint8_t *Addr = &mos->getData()[0];
 
   WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
 
@@ -572,9 +634,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
       }
       case Instruction::Add:
       default:
-        cerr << "ConstantExpr not handled as global var init: " << *CE << "\n";
-        abort();
-        break;
+        errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n";
+        llvm_unreachable(0);
       }
     } else if (PC->getType()->isSingleValueType()) {
       unsigned char *ptr = (unsigned char *)PA;
@@ -608,7 +669,7 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
           ptr[6] = val >> 48;
           ptr[7] = val >> 56;
         } else {
-          assert(0 && "Not implemented: bit widths > 64");
+          llvm_unreachable("Not implemented: bit widths > 64");
         }
         break;
       }
@@ -643,17 +704,19 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
           memset(ptr, 0, TD->getPointerSize());
         else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) {
           // FIXME: what about function stubs?
-          MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, 
+          mos->addRelocation(MachineRelocation::getGV(PA-(intptr_t)Addr,
                                                  MachineRelocation::VANILLA,
                                                  const_cast<GlobalValue*>(GV),
                                                  ScatteredOffset));
           ScatteredOffset = 0;
         } else
-          assert(0 && "Unknown constant pointer type!");
+          llvm_unreachable("Unknown constant pointer type!");
         break;
       default:
-        cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n";
-        abort();
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "ERROR: Constant unimp for type: " << *PC->getType();
+        llvm_report_error(Msg.str());
       }
     } else if (isa<ConstantAggregateZero>(PC)) {
       memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType()));
@@ -669,8 +732,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
         WorkList.push_back(CPair(CPS->getOperand(i),
                                  PA+SL->getElementOffset(i)));
     } else {
-      cerr << "Bad Type: " << *PC->getType() << "\n";
-      assert(0 && "Unknown constant type to initialize memory with!");
+      errs() << "Bad Type: " << *PC->getType() << "\n";
+      llvm_unreachable("Unknown constant type to initialize memory with!");
     }
   }
 }
@@ -680,13 +743,14 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
 //===----------------------------------------------------------------------===//
 
 MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
-                   const TargetAsmInfo *TAI) :
+                   const MCAsmInfo *MAI) :
   GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
   n_desc(0), n_value(0) {
 
+  // FIXME: This is completely broken, it should use the mangler interface.
   switch (GV->getLinkage()) {
   default:
-    assert(0 && "Unexpected linkage type!");
+    llvm_unreachable("Unexpected linkage type!");
     break;
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
@@ -695,17 +759,19 @@ MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
   case GlobalValue::CommonLinkage:
     assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
   case GlobalValue::ExternalLinkage:
-    GVName = TAI->getGlobalPrefix() + name;
+    GVName = MAI->getGlobalPrefix() + name;
     n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT;
     break;
   case GlobalValue::PrivateLinkage:
-    GVName = TAI->getPrivateGlobalPrefix() + name;
+    GVName = MAI->getPrivateGlobalPrefix() + name;
+    break;
+  case GlobalValue::LinkerPrivateLinkage:
+    GVName = MAI->getLinkerPrivateGlobalPrefix() + name;
     break;
   case GlobalValue::InternalLinkage:
-    GVName = TAI->getGlobalPrefix() + name;
+    GVName = MAI->getGlobalPrefix() + name;
     break;
   }
 }
 
 } // end namespace llvm
-
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
index 3af2b0af4b78..9273f3854863 100644
--- a/lib/CodeGen/MachOWriter.h
+++ b/lib/CodeGen/MachOWriter.h
@@ -14,22 +14,28 @@
 #ifndef MACHOWRITER_H
 #define MACHOWRITER_H
 
-#include "MachO.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachOWriterInfo.h"
+#include <vector>
 #include <map>
 
 namespace llvm {
+  class Constant;
   class GlobalVariable;
   class Mangler;
-  class MachineCodeEmitter;
+  class MachineBasicBlock;
+  class MachineRelocation;
   class MachOCodeEmitter;
+  struct MachODySymTab;
+  struct MachOHeader;
+  struct MachOSection;
+  struct MachOSym;
+  class TargetData;
+  class TargetMachine;
+  class MCAsmInfo;
+  class ObjectCodeEmitter;
   class OutputBuffer;
   class raw_ostream;
 
-      
   /// MachOWriter - This class implements the common target-independent code for
   /// writing Mach-O files.  Targets should derive a class from this to
   /// parameterize the output format.
@@ -38,8 +44,9 @@ namespace llvm {
     friend class MachOCodeEmitter;
   public:
     static char ID;
-    MachineCodeEmitter &getMachineCodeEmitter() const {
-      return *(MachineCodeEmitter*)MCE;
+
+    ObjectCodeEmitter *getObjectCodeEmitter() {
+      return reinterpret_cast<ObjectCodeEmitter*>(MachOCE);
     }
 
     MachOWriter(raw_ostream &O, TargetMachine &TM);
@@ -61,36 +68,30 @@ namespace llvm {
     /// Mang - The object used to perform name mangling for this module.
     ///
     Mangler *Mang;
-    
-    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
-    /// code for functions to the .o file.
 
-    MachOCodeEmitter *MCE;
+    /// MachOCE - The MachineCodeEmitter object that we are exposing to emit
+    /// machine code for functions to the .o file.
+    MachOCodeEmitter *MachOCE;
 
     /// is64Bit/isLittleEndian - This information is inferred from the target
     /// machine directly, indicating what header values and flags to set.
-
     bool is64Bit, isLittleEndian;
 
     // Target Asm Info
-
-    const TargetAsmInfo *TAI;
+    const MCAsmInfo *MAI;
 
     /// Header - An instance of MachOHeader that we will update while we build
     /// the file, and then emit during finalization.
-    
     MachOHeader Header;
 
     /// doInitialization - Emit the file header and all of the global variables
     /// for the module to the Mach-O file.
-
     bool doInitialization(Module &M);
 
     bool runOnMachineFunction(MachineFunction &MF);
 
     /// doFinalization - Now that the module has been completely processed, emit
     /// the Mach-O file to 'O'.
-
     bool doFinalization(Module &M);
 
   private:
@@ -98,85 +99,37 @@ namespace llvm {
     /// SectionList - This is the list of sections that we have emitted to the
     /// file.  Once the file has been completely built, the segment load command
     /// SectionCommands are constructed from this info.
-
     std::vector<MachOSection*> SectionList;
 
     /// SectionLookup - This is a mapping from section name to SectionList entry
-
     std::map<std::string, MachOSection*> SectionLookup;
-    
+
     /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
     /// to aid in emitting relocations.
-
     std::map<GlobalValue*, MachOSection*> GVSection;
 
-    /// GVOffset - This is a mapping from a GlobalValue to an offset from the 
+    /// GVOffset - This is a mapping from a GlobalValue to an offset from the
     /// start of the section in which the GV resides, to aid in emitting
     /// relocations.
-
     std::map<GlobalValue*, intptr_t> GVOffset;
 
     /// getSection - Return the section with the specified name, creating a new
     /// section if one does not already exist.
-
     MachOSection *getSection(const std::string &seg, const std::string &sect,
-                             unsigned Flags = 0) {
-      MachOSection *MOS = SectionLookup[seg+sect];
-      if (MOS) return MOS;
-
-      MOS = new MachOSection(seg, sect);
-      SectionList.push_back(MOS);
-      MOS->Index = SectionList.size();
-      MOS->flags = MachOSection::S_REGULAR | Flags;
-      SectionLookup[seg+sect] = MOS;
-      return MOS;
-    }
-    MachOSection *getTextSection(bool isCode = true) {
-      if (isCode)
-        return getSection("__TEXT", "__text", 
-                          MachOSection::S_ATTR_PURE_INSTRUCTIONS |
-                          MachOSection::S_ATTR_SOME_INSTRUCTIONS);
-      else
-        return getSection("__TEXT", "__text");
-    }
-    MachOSection *getBSSSection() {
-      return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
-    }
+                             unsigned Flags = 0);
+
+    /// getTextSection - Return text section with different flags for code/data
+    MachOSection *getTextSection(bool isCode = true);
+
     MachOSection *getDataSection() {
       return getSection("__DATA", "__data");
     }
-    MachOSection *getConstSection(Constant *C) {
-      const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
-      if (CVA && CVA->isCString())
-        return getSection("__TEXT", "__cstring", 
-                          MachOSection::S_CSTRING_LITERALS);
-      
-      const Type *Ty = C->getType();
-      if (Ty->isPrimitiveType() || Ty->isInteger()) {
-        unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
-        switch(Size) {
-        default: break; // Fall through to __TEXT,__const
-        case 4:
-          return getSection("__TEXT", "__literal4",
-                            MachOSection::S_4BYTE_LITERALS);
-        case 8:
-          return getSection("__TEXT", "__literal8",
-                            MachOSection::S_8BYTE_LITERALS);
-        case 16:
-          return getSection("__TEXT", "__literal16",
-                            MachOSection::S_16BYTE_LITERALS);
-        }
-      }
-      return getSection("__TEXT", "__const");
-    }
-    MachOSection *getJumpTableSection() {
-      if (TM.getRelocationModel() == Reloc::PIC_)
-        return getTextSection(false);
-      else
-        return getSection("__TEXT", "__const");
-    }
-    
-    /// MachOSymTab - This struct contains information about the offsets and 
+
+    MachOSection *getBSSSection();
+    MachOSection *getConstSection(Constant *C);
+    MachOSection *getJumpTableSection();
+
+    /// MachOSymTab - This struct contains information about the offsets and
     /// size of symbol table information.
     /// segment.
     struct MachOSymTab {
@@ -191,43 +144,42 @@ namespace llvm {
       // see <mach-o/loader.h>
       enum { LC_SYMTAB = 0x02  // link-edit stab symbol table info
       };
-      
+
       MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
         nsyms(0), stroff(0), strsize(0) { }
     };
-    
+
     /// SymTab - The "stab" style symbol table information
-    MachOSymTab   SymTab;     
+    MachOSymTab SymTab;
     /// DySymTab - symbol table info for the dynamic link editor
     MachODySymTab DySymTab;
 
   protected:
-  
+
     /// SymbolTable - This is the list of symbols we have emitted to the file.
     /// This actually gets rearranged before emission to the file (to put the
     /// local symbols first in the list).
     std::vector<MachOSym> SymbolTable;
-    
+
     /// SymT - A buffer to hold the symbol table before we write it out at the
     /// appropriate location in the file.
-    DataBuffer SymT;
-    
+    std::vector<unsigned char> SymT;
+
     /// StrT - A buffer to hold the string table before we write it out at the
     /// appropriate location in the file.
-    DataBuffer StrT;
-    
+    std::vector<unsigned char> StrT;
+
     /// PendingSyms - This is a list of externally defined symbols that we have
     /// been asked to emit, but have not seen a reference to.  When a reference
     /// is seen, the symbol will move from this list to the SymbolTable.
     std::vector<GlobalValue*> PendingGlobals;
-    
+
     /// DynamicSymbolTable - This is just a vector of indices into
     /// SymbolTable to aid in emitting the DYSYMTAB load command.
     std::vector<unsigned> DynamicSymbolTable;
-    
-    static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
-                        const TargetData *TD, 
-                        std::vector<MachineRelocation> &MRs);
+
+    static void InitMem(const Constant *C, uintptr_t Offset,
+                        const TargetData *TD, MachOSection* mos);
 
   private:
     void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
@@ -238,25 +190,16 @@ namespace llvm {
     void BufferSymbolAndStringTable();
     void CalculateRelocations(MachOSection &MOS);
 
+    // GetJTRelocation - Get a relocation a new BB relocation based
+    // on target information.
     MachineRelocation GetJTRelocation(unsigned Offset,
-                                      MachineBasicBlock *MBB) const {
-      return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
-    }
+                                      MachineBasicBlock *MBB) const;
 
     /// GetTargetRelocation - Returns the number of relocations.
-    unsigned GetTargetRelocation(MachineRelocation &MR,
-                                 unsigned FromIdx,
-                                 unsigned ToAddr,
-                                 unsigned ToIndex,
-                                 OutputBuffer &RelocOut,
-                                 OutputBuffer &SecOut,
-                                 bool Scattered,
-                                 bool Extern) {
-      return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
-                                                          ToIndex, RelocOut,
-                                                          SecOut, Scattered,
-                                                          Extern);
-    }
+    unsigned GetTargetRelocation(MachineRelocation &MR, unsigned FromIdx,
+                                 unsigned ToAddr, unsigned ToIndex,
+                                 OutputBuffer &RelocOut, OutputBuffer &SecOut,
+                                 bool Scattered, bool Extern);
   };
 }
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 71e6b3e4d0f8..b3eb2da76281 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -31,7 +32,7 @@ MachineBasicBlock::~MachineBasicBlock() {
   LeakDetector::removeGarbageObject(this);
 }
 
-std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   MBB.print(OS);
   return OS;
 }
@@ -43,7 +44,7 @@ std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
 /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
 /// gets the next available unique MBB number. If it is removed from a
 /// MachineFunction, it goes back to being #-1.
-void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
   MachineFunction &MF = *N->getParent();
   N->Number = MF.addToMBBNumbering(N);
 
@@ -55,7 +56,7 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
   LeakDetector::removeGarbageObject(N);
 }
 
-void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
   N->getParent()->removeFromMBBNumbering(N->Number);
   N->Number = -1;
   LeakDetector::addGarbageObject(N);
@@ -65,7 +66,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
 /// addNodeToList (MI) - When we add an instruction to a basic block
 /// list, we update its parent pointer and add its operands from reg use/def
 /// lists if appropriate.
-void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   assert(N->getParent() == 0 && "machine instruction already in a basic block");
   N->setParent(Parent);
   
@@ -80,7 +81,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
 /// removeNodeFromList (MI) - When we remove an instruction from a basic block
 /// list, we update its parent pointer and remove its operands from reg use/def
 /// lists if appropriate.
-void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   assert(N->getParent() != 0 && "machine instruction not in a basic block");
 
   // Remove from the use/def lists.
@@ -94,10 +95,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
 /// transferNodesFromList (MI) - When moving a range of instructions from one
 /// MBB list to another, we need to update the parent pointers and the use/def
 /// lists.
-void ilist_traits<MachineInstr>::transferNodesFromList(
-      ilist_traits<MachineInstr>& fromList,
-      MachineBasicBlock::iterator first,
-      MachineBasicBlock::iterator last) {
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+                      MachineBasicBlock::iterator first,
+                      MachineBasicBlock::iterator last) {
   assert(Parent->getParent() == fromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
 
@@ -123,21 +124,41 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   return I;
 }
 
-bool
-MachineBasicBlock::isOnlyReachableByFallthrough() const {
-  return !isLandingPad() &&
-         !pred_empty() &&
-         next(pred_begin()) == pred_end() &&
-         (*pred_begin())->isLayoutSuccessor(this) &&
-         ((*pred_begin())->empty() ||
-          !(*pred_begin())->back().getDesc().isBarrier());
+/// isOnlyReachableViaFallthough - Return true if this basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (isLandingPad() || pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  const_pred_iterator PI = pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(this))
+    return false;
+  
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+  
+  // Otherwise, check the last instruction.
+  const MachineInstr &LastInst = Pred->back();
+  return !LastInst.getDesc().isBarrier();
 }
 
 void MachineBasicBlock::dump() const {
-  print(*cerr.stream());
+  print(errs());
 }
 
-static inline void OutputReg(std::ostream &os, unsigned RegNo,
+static inline void OutputReg(raw_ostream &os, unsigned RegNo,
                              const TargetRegisterInfo *TRI = 0) {
   if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) {
     if (TRI)
@@ -148,16 +169,16 @@ static inline void OutputReg(std::ostream &os, unsigned RegNo,
     os << " %reg" << RegNo;
 }
 
-void MachineBasicBlock::print(std::ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS) const {
   const MachineFunction *MF = getParent();
-  if(!MF) {
+  if (!MF) {
     OS << "Can't print out MachineBasicBlock because parent MachineFunction"
        << " is null\n";
     return;
   }
 
   const BasicBlock *LBB = getBasicBlock();
-  OS << "\n";
+  OS << '\n';
   if (LBB) OS << LBB->getName() << ": ";
   OS << (const void*)this
      << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
@@ -170,18 +191,18 @@ void MachineBasicBlock::print(std::ostream &OS) const {
     OS << "Live Ins:";
     for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
       OutputReg(OS, *I, TRI);
-    OS << "\n";
+    OS << '\n';
   }
   // Print the preds of this block according to the CFG.
   if (!pred_empty()) {
     OS << "    Predecessors according to CFG:";
     for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
-      OS << " " << *PI << " (#" << (*PI)->getNumber() << ")";
-    OS << "\n";
+      OS << ' ' << *PI << " (#" << (*PI)->getNumber() << ')';
+    OS << '\n';
   }
   
   for (const_iterator I = begin(); I != end(); ++I) {
-    OS << "\t";
+    OS << '\t';
     I->print(OS, &getParent()->getTarget());
   }
 
@@ -189,8 +210,8 @@ void MachineBasicBlock::print(std::ostream &OS) const {
   if (!succ_empty()) {
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
-      OS << " " << *SI << " (#" << (*SI)->getNumber() << ")";
-    OS << "\n";
+      OS << ' ' << *SI << " (#" << (*SI)->getNumber() << ')';
+    OS << '\n';
   }
 }
 
@@ -245,16 +266,15 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
   Predecessors.erase(I);
 }
 
-void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB)
-{
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
   
-  for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), 
-      end = fromMBB->succ_end(); iter != end; ++iter) {
-      addSuccessor(*iter);
-  }
-  while(!fromMBB->succ_empty())
+  for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), 
+       E = fromMBB->succ_end(); I != E; ++I)
+    addSuccessor(*I);
+  
+  while (!fromMBB->succ_empty())
     fromMBB->removeSuccessor(fromMBB->succ_begin());
 }
 
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 37c86019d4a2..0f796f3952c3 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -51,3 +51,7 @@ MachineDominatorTree::~MachineDominatorTree() {
 void MachineDominatorTree::releaseMemory() {
   DT->releaseMemory();
 }
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+  DT->print(OS);
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 599efb8bd276..b0ec809c6929 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -32,89 +33,56 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include <fstream>
-#include <sstream>
 using namespace llvm;
 
-bool MachineFunctionPass::runOnFunction(Function &F) {
-  // Do not codegen any 'available_externally' functions at all, they have
-  // definitions outside the translation unit.
-  if (F.hasAvailableExternallyLinkage())
-    return false;
-  
-  return runOnMachineFunction(MachineFunction::get(&F));
-}
-
 namespace {
   struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
     static char ID;
 
-    std::ostream *OS;
+    raw_ostream &OS;
     const std::string Banner;
 
-    Printer (std::ostream *os, const std::string &banner) 
+    Printer(raw_ostream &os, const std::string &banner) 
       : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
 
     const char *getPassName() const { return "MachineFunction Printer"; }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
     }
 
     bool runOnMachineFunction(MachineFunction &MF) {
-      (*OS) << Banner;
-      MF.print (*OS);
+      OS << Banner;
+      MF.print(OS);
       return false;
     }
   };
   char Printer::ID = 0;
 }
 
-/// Returns a newly-created MachineFunction Printer pass. The default output
-/// stream is std::cerr; the default banner is empty.
+/// Returns a newly-created MachineFunction Printer pass. The default banner is
+/// empty.
 ///
-FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS,
+FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
                                                      const std::string &Banner){
   return new Printer(OS, Banner);
 }
 
-namespace {
-  struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
-    static char ID;
-    Deleter() : MachineFunctionPass(&ID) {}
-
-    const char *getPassName() const { return "Machine Code Deleter"; }
-
-    bool runOnMachineFunction(MachineFunction &MF) {
-      // Delete the annotation from the function now.
-      MachineFunction::destruct(MF.getFunction());
-      return true;
-    }
-  };
-  char Deleter::ID = 0;
-}
-
-/// MachineCodeDeletion Pass - This pass deletes all of the machine code for
-/// the current function, which should happen after the function has been
-/// emitted to a .s file or to memory.
-FunctionPass *llvm::createMachineCodeDeleter() {
-  return new Deleter();
-}
-
-
-
 //===---------------------------------------------------------------------===//
 // MachineFunction implementation
 //===---------------------------------------------------------------------===//
 
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
 void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
-MachineFunction::MachineFunction(const Function *F,
+MachineFunction::MachineFunction(Function *F,
                                  const TargetMachine &TM)
-  : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")),
-    Fn(F), Target(TM) {
+  : Fn(F), Target(TM) {
   if (TM.getRegisterInfo())
     RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
                   MachineRegisterInfo(*TM.getRegisterInfo());
@@ -131,7 +99,8 @@ MachineFunction::MachineFunction(const Function *F,
   const TargetData &TD = *TM.getTargetData();
   bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
   unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
-  unsigned TyAlignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+  unsigned TyAlignment = IsPic ?
+                       TD.getABITypeAlignment(Type::getInt32Ty(F->getContext()))
                                : TD.getPointerABIAlignment();
   JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
                       MachineJumpTableInfo(EntrySize, TyAlignment);
@@ -221,11 +190,6 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
 ///
 void
 MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
-  // Clear the instructions memoperands. This must be done manually because
-  // the instruction's parent pointer is now null, so it can't properly
-  // deallocate them on its own.
-  MI->clearMemOperands(*this);
-
   MI->~MachineInstr();
   InstructionRecycler.Deallocate(Allocator, MI);
 }
@@ -248,12 +212,99 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
   BasicBlockRecycler.Deallocate(Allocator, MBB);
 }
 
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
+                                      int64_t o, uint64_t s,
+                                      unsigned base_alignment) {
+  return new (Allocator.Allocate<MachineMemOperand>())
+             MachineMemOperand(v, f, o, s, base_alignment);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+                                      int64_t Offset, uint64_t Size) {
+  return new (Allocator.Allocate<MachineMemOperand>())
+             MachineMemOperand(MMO->getValue(), MMO->getFlags(),
+                               int64_t(uint64_t(MMO->getOffset()) +
+                                       uint64_t(Offset)),
+                               Size, MMO->getBaseAlignment());
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+  return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+                                    MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isLoad())
+      ++Num;
+
+  // Allocate a new array and populate it with the load information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isLoad()) {
+      if (!(*I)->isStore())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the store flag.
+        MachineMemOperand *JustLoad =
+          getMachineMemOperand((*I)->getValue(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOStore,
+                               (*I)->getOffset(), (*I)->getSize(),
+                               (*I)->getBaseAlignment());
+        Result[Index] = JustLoad;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+                                     MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isStore())
+      ++Num;
+
+  // Allocate a new array and populate it with the store information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isStore()) {
+      if (!(*I)->isLoad())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the load flag.
+        MachineMemOperand *JustStore =
+          getMachineMemOperand((*I)->getValue(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+                               (*I)->getOffset(), (*I)->getSize(),
+                               (*I)->getBaseAlignment());
+        Result[Index] = JustStore;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
 void MachineFunction::dump() const {
-  print(*cerr.stream());
+  print(errs());
 }
 
-void MachineFunction::print(std::ostream &OS) const {
-  OS << "# Machine code for " << Fn->getName () << "():\n";
+void MachineFunction::print(raw_ostream &OS) const {
+  OS << "# Machine code for " << Fn->getName() << "():\n";
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -262,10 +313,7 @@ void MachineFunction::print(std::ostream &OS) const {
   JumpTableInfo->print(OS);
 
   // Print Constant Pool
-  {
-    raw_os_ostream OSS(OS);
-    ConstantPool->print(OSS);
-  }
+  ConstantPool->print(OS);
   
   const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
   
@@ -279,32 +327,32 @@ void MachineFunction::print(std::ostream &OS) const {
         OS << " Reg #" << I->first;
       
       if (I->second)
-        OS << " in VR#" << I->second << " ";
+        OS << " in VR#" << I->second << ' ';
     }
-    OS << "\n";
+    OS << '\n';
   }
   if (RegInfo && !RegInfo->liveout_empty()) {
     OS << "Live Outs:";
     for (MachineRegisterInfo::liveout_iterator
          I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
       if (TRI)
-        OS << " " << TRI->getName(*I);
+        OS << ' ' << TRI->getName(*I);
       else
         OS << " Reg #" << *I;
-    OS << "\n";
+    OS << '\n';
   }
   
-  for (const_iterator BB = begin(); BB != end(); ++BB)
+  for (const_iterator BB = begin(), E = end(); BB != E; ++BB)
     BB->print(OS);
 
-  OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
+  OS << "\n# End machine code for " << Fn->getName() << "().\n\n";
 }
 
 namespace llvm {
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getFunction()->getName() + "' function";
+      return "CFG for '" + F->getFunction()->getNameStr() + "' function";
     }
 
     static std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -312,17 +360,18 @@ namespace llvm {
                                     bool ShortNames) {
       if (ShortNames && Node->getBasicBlock() &&
           !Node->getBasicBlock()->getName().empty())
-        return Node->getBasicBlock()->getName() + ":";
-
-      std::ostringstream Out;
-      if (ShortNames) {
-        Out << Node->getNumber() << ':';
-        return Out.str();
+        return Node->getBasicBlock()->getNameStr() + ":";
+
+      std::string OutStr;
+      {
+        raw_string_ostream OSS(OutStr);
+        
+        if (ShortNames)
+          OSS << Node->getNumber() << ':';
+        else
+          Node->print(OSS);
       }
 
-      Node->print(Out);
-
-      std::string OutStr = Out.str();
       if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
 
       // Process string output to make it nicer...
@@ -339,59 +388,23 @@ namespace llvm {
 void MachineFunction::viewCFG() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getName());
+  ViewGraph(this, "mf" + getFunction()->getNameStr());
 #else
-  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif // NDEBUG
 }
 
 void MachineFunction::viewCFGOnly() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getName(), true);
+  ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
 #else
-  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif // NDEBUG
 }
 
-// The next two methods are used to construct and to retrieve
-// the MachineCodeForFunction object for the given function.
-// construct() -- Allocates and initializes for a given function and target
-// get()       -- Returns a handle to the object.
-//                This should not be called before "construct()"
-//                for a given Function.
-//
-MachineFunction&
-MachineFunction::construct(const Function *Fn, const TargetMachine &Tar)
-{
-  AnnotationID MF_AID =
-                    AnnotationManager::getID("CodeGen::MachineCodeForFunction");
-  assert(Fn->getAnnotation(MF_AID) == 0 &&
-         "Object already exists for this function!");
-  MachineFunction* mcInfo = new MachineFunction(Fn, Tar);
-  Fn->addAnnotation(mcInfo);
-  return *mcInfo;
-}
-
-void MachineFunction::destruct(const Function *Fn) {
-  AnnotationID MF_AID =
-                    AnnotationManager::getID("CodeGen::MachineCodeForFunction");
-  bool Deleted = Fn->deleteAnnotation(MF_AID);
-  assert(Deleted && "Machine code did not exist for function!"); 
-  Deleted = Deleted; // silence warning when no assertions.
-}
-
-MachineFunction& MachineFunction::get(const Function *F)
-{
-  AnnotationID MF_AID =
-                    AnnotationManager::getID("CodeGen::MachineCodeForFunction");
-  MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID);
-  assert(mc && "Call construct() method first to allocate the object");
-  return *mc;
-}
-
 /// addLiveIn - Add the specified physical register as a live-in value and
 /// create a corresponding virtual register for it.
 unsigned MachineFunction::addLiveIn(unsigned PReg,
@@ -402,23 +415,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   return VReg;
 }
 
-/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given
-/// source file, line, and column. If none currently exists, create a new
-/// DebugLocTuple, and insert it into the DebugIdMap.
-unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit,
-                                                unsigned Line, unsigned Col) {
-  DebugLocTuple Tuple(CompileUnit, Line, Col);
-  DenseMap<DebugLocTuple, unsigned>::iterator II
-    = DebugLocInfo.DebugIdMap.find(Tuple);
-  if (II != DebugLocInfo.DebugIdMap.end())
-    return II->second;
-  // Add a new tuple.
-  unsigned Id = DebugLocInfo.DebugLocations.size();
-  DebugLocInfo.DebugLocations.push_back(Tuple);
-  DebugLocInfo.DebugIdMap[Tuple] = Id;
-  return Id;
-}
-
 /// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object.
 DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
   unsigned Idx = DL.getIndex();
@@ -444,7 +440,38 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
 }
 
 
-void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+  assert(MBB && "MBB must be valid");
+  const MachineFunction *MF = MBB->getParent();
+  assert(MF && "MBB must be part of a MachineFunction");
+  const TargetMachine &TM = MF->getTarget();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  BitVector BV(TRI->getNumRegs());
+
+  // Before CSI is calculated, no registers are considered pristine. They can be
+  // freely used and PEI will make sure they are saved.
+  if (!isCalleeSavedInfoValid())
+    return BV;
+
+  for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+    BV.set(*CSR);
+
+  // The entry MBB always has all CSRs pristine.
+  if (MBB == &MF->front())
+    return BV;
+
+  // On other MBBs the saved CSRs are not pristine.
+  const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+         E = CSI.end(); I != E; ++I)
+    BV.reset(I->getReg());
+
+  return BV;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
   int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
 
@@ -481,10 +508,9 @@ void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
 }
 
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
-  print(MF, *cerr.stream());
+  print(MF, errs());
 }
 
-
 //===----------------------------------------------------------------------===//
 //  MachineJumpTableInfo implementation
 //===----------------------------------------------------------------------===//
@@ -521,7 +547,7 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
   return MadeChange;
 }
 
-void MachineJumpTableInfo::print(std::ostream &OS) const {
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
   // FIXME: this is lame, maybe we could print out the MBB numbers or something
   // like {1, 2, 4, 5, 3, 0}
   for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
@@ -530,7 +556,7 @@ void MachineJumpTableInfo::print(std::ostream &OS) const {
   }
 }
 
-void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
+void MachineJumpTableInfo::dump() const { print(errs()); }
 
 
 //===----------------------------------------------------------------------===//
@@ -539,10 +565,17 @@ void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
 
 const Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
-      return Val.MachineCPVal->getType();
+    return Val.MachineCPVal->getType();
   return Val.ConstVal->getType();
 }
 
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getRelocationInfo();
+  return Val.ConstVal->getRelocationInfo();
+}
+
 MachineConstantPool::~MachineConstantPool() {
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
     if (Constants[i].isMachineConstantPoolEntry())
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 000000000000..56294d90398f
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,50 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+using namespace llvm;
+
+// Register this pass with PassInfo directly to avoid having to define
+// a default constructor.
+static PassInfo
+X("Machine Function Analysis", "machine-function-analysis",
+  intptr_t(&MachineFunctionAnalysis::ID), 0,
+  /*CFGOnly=*/false, /*is_analysis=*/true);
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm,
+                                                 CodeGenOpt::Level OL) :
+  FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+  releaseMemory();
+  assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+  assert(!MF && "MachineFunctionAnalysis already initialized!");
+  MF = new MachineFunction(&F, TM);
+  return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+  delete MF;
+  MF = 0;
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 000000000000..2f8d4c9e7aa4
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,50 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+  // Do not codegen any 'available_externally' functions at all, they have
+  // definitions outside the translation unit.
+  if (F.hasAvailableExternallyLinkage())
+    return false;
+
+  MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+  return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addPreserved<MachineFunctionAnalysis>();
+
+  // MachineFunctionPass preserves all LLVM IR passes, but there's no
+  // high-level way to express this. Instead, just list a bunch of
+  // passes explicitly. This does not include setPreservesCFG,
+  // because CodeGen overloads that to mean preserving the MachineBasicBlock
+  // CFG in addition to the LLVM IR CFG.
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved("scalar-evolution");
+  AU.addPreserved("iv-users");
+  AU.addPreserved("memdep");
+  AU.addPreserved("live-values");
+  AU.addPreserved("domtree");
+  AU.addPreserved("domfrontier");
+  AU.addPreserved("loops");
+  AU.addPreserved("lda");
+
+  FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index d44305f33338..cbe5c7cb51e3 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,17 +15,20 @@
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Value.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
 using namespace llvm;
@@ -156,7 +159,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return false;
   
   switch (getType()) {
-  default: assert(0 && "Unrecognized operand type");
+  default: llvm_unreachable("Unrecognized operand type");
   case MachineOperand::MO_Register:
     return getReg() == Other.getReg() && isDef() == Other.isDef() &&
            getSubReg() == Other.getSubReg();
@@ -182,11 +185,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
 
 /// print - Print the specified machine operand.
 ///
-void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const {
-  raw_os_ostream RawOS(OS);
-  print(RawOS, TM);
-}
-
 void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
@@ -242,7 +240,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << getImm();
     break;
   case MachineOperand::MO_FPImmediate:
-    if (getFPImm()->getType() == Type::FloatTy)
+    if (getFPImm()->getType()->isFloatTy())
       OS << getFPImm()->getValueAPF().convertToFloat();
     else
       OS << getFPImm()->getValueAPF().convertToDouble();
@@ -274,7 +272,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << '>';
     break;
   default:
-    assert(0 && "Unrecognized operand type");
+    llvm_unreachable("Unrecognized operand type");
   }
   
   if (unsigned TF = getTargetFlags())
@@ -289,7 +287,7 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
                                      int64_t o, uint64_t s, unsigned int a)
   : Offset(o), Size(s), V(v),
     Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
-  assert(isPowerOf2_32(a) && "Alignment is not a power of 2!");
+  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 }
 
@@ -302,6 +300,66 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
   ID.AddInteger(Flags);
 }
 
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+  // The Value and Offset may differ due to CSE. But the flags and size
+  // should be the same.
+  assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+  assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+  if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+    // Update the alignment value.
+    Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3);
+    // Also update the base and offset, because the new alignment may
+    // not be applicable with the old ones.
+    V = MMO->getValue();
+    Offset = MMO->getOffset();
+  }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+  return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+  assert((MMO.isLoad() || MMO.isStore()) &&
+         "SV has to be a load, store or both.");
+  
+  if (MMO.isVolatile())
+    OS << "Volatile ";
+
+  if (MMO.isLoad())
+    OS << "LD";
+  if (MMO.isStore())
+    OS << "ST";
+  OS << MMO.getSize();
+  
+  // Print the address information.
+  OS << "[";
+  if (!MMO.getValue())
+    OS << "<unknown>";
+  else
+    WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+  // If the alignment of the memory reference itself differs from the alignment
+  // of the base pointer, print the base alignment explicitly, next to the base
+  // pointer.
+  if (MMO.getBaseAlignment() != MMO.getAlignment())
+    OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+  if (MMO.getOffset() != 0)
+    OS << "+" << MMO.getOffset();
+  OS << "]";
+
+  // Print the alignment of the reference.
+  if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+      MMO.getBaseAlignment() != MMO.getSize())
+    OS << "(align=" << MMO.getAlignment() << ")";
+
+  return OS;
+}
+
 //===----------------------------------------------------------------------===//
 // MachineInstr Implementation
 //===----------------------------------------------------------------------===//
@@ -309,7 +367,8 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
 /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
 /// TID NULL and no operands.
 MachineInstr::MachineInstr()
-  : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+  : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+    Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
 }
@@ -328,7 +387,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// TargetInstrDesc or the numOperands if it is not zero. (for
 /// instructions with variable number of operands).
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Parent(0), 
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
     debugLoc(DebugLoc::getUnknownLoc()) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -346,7 +405,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
 /// MachineInstr ctor - As above, but with a DebugLoc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+    Parent(0), debugLoc(dl) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
       NumImplicitOps++;
@@ -365,7 +425,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// basic block.
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Parent(0), 
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), 
     debugLoc(DebugLoc::getUnknownLoc()) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
@@ -385,7 +445,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+    Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -403,8 +464,9 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0), 
-        debugLoc(MI.getDebugLoc()) {
+  : TID(&MI.getDesc()), NumImplicitOps(0),
+    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
 
   // Add operands
@@ -412,11 +474,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
     addOperand(MI.getOperand(i));
   NumImplicitOps = MI.NumImplicitOps;
 
-  // Add memory operands.
-  for (std::list<MachineMemOperand>::const_iterator i = MI.memoperands_begin(),
-       j = MI.memoperands_end(); i != j; ++i)
-    addMemOperand(MF, *i);
-
   // Set parent to null.
   Parent = 0;
 
@@ -425,8 +482,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
 
 MachineInstr::~MachineInstr() {
   LeakDetector::removeGarbageObject(this);
-  assert(MemOperands.empty() &&
-         "MachineInstr being deleted with live memoperands!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
     assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
@@ -587,18 +642,24 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
   }
 }
 
-/// addMemOperand - Add a MachineMemOperand to the machine instruction,
-/// referencing arbitrary storage.
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
 void MachineInstr::addMemOperand(MachineFunction &MF,
-                                 const MachineMemOperand &MO) {
-  MemOperands.push_back(MO);
-}
+                                 MachineMemOperand *MO) {
+  mmo_iterator OldMemRefs = MemRefs;
+  mmo_iterator OldMemRefsEnd = MemRefsEnd;
 
-/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands.
-void MachineInstr::clearMemOperands(MachineFunction &MF) {
-  MemOperands.clear();
-}
+  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
+
+  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  NewMemRefs[NewNum - 1] = MO;
 
+  MemRefs = NewMemRefs;
+  MemRefsEnd = NewMemRefsEnd;
+}
 
 /// removeFromParent - This method unlinks 'this' from the containing basic
 /// block, and returns it, but does not delete it.
@@ -657,7 +718,7 @@ bool MachineInstr::isDebugLabel() const {
 }
 
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
-/// the specific register or -1 if it is not found. It further tightening
+/// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
 int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
                                           const TargetRegisterInfo *TRI) const {
@@ -731,7 +792,9 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
     unsigned DefPart = 0;
     for (unsigned i = 1, e = getNumOperands(); i < e; ) {
       const MachineOperand &FMO = getOperand(i);
-      assert(FMO.isImm());
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!FMO.isImm())
+        return false;
       // Skip over this def.
       unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
       unsigned PrevDef = i + 1;
@@ -782,16 +845,22 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
     const MachineOperand &MO = getOperand(UseOpIdx);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
       return false;
-    int FlagIdx = UseOpIdx - 1;
-    if (FlagIdx < 1)
-      return false;
-    while (!getOperand(FlagIdx).isImm()) {
-      if (--FlagIdx == 0)
+
+    // Find the flag operand corresponding to UseOpIdx
+    unsigned FlagIdx, NumOps=0;
+    for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+      const MachineOperand &UFMO = getOperand(FlagIdx);
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!UFMO.isImm())
         return false;
+      NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
+      assert(NumOps < getNumOperands() && "Invalid inline asm flag");
+      if (UseOpIdx < FlagIdx+NumOps+1)
+        break;
     }
-    const MachineOperand &UFMO = getOperand(FlagIdx);
-    if (FlagIdx + InlineAsm::getNumOperandRegisters(UFMO.getImm()) < UseOpIdx)
+    if (FlagIdx >= UseOpIdx)
       return false;
+    const MachineOperand &UFMO = getOperand(FlagIdx);
     unsigned DefNo;
     if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
       if (!DefOpIdx)
@@ -864,7 +933,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
 /// SawStore is set to true, it means that there is a store (or call) between
 /// the instruction's location and its intended destination.
 bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
-                                bool &SawStore) const {
+                                bool &SawStore,
+                                AliasAnalysis *AA) const {
   // Ignore stuff that we obviously can't move.
   if (TID->mayStore() || TID->isCall()) {
     SawStore = true;
@@ -878,9 +948,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (TID->mayLoad() && !TII->isInvariantLoad(this))
+  if (TID->mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
-    // end of block, or if the laod is volatile, we can't move it.
+    // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
 
   return true;
@@ -889,11 +959,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
 /// isSafeToReMat - Return true if it's safe to rematerialize the specified
 /// instruction which defined the specified register instead of copying it.
 bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
-                                 unsigned DstReg) const {
+                                 unsigned DstReg,
+                                 AliasAnalysis *AA) const {
   bool SawStore = false;
-  if (!getDesc().isRematerializable() ||
-      !TII->isTriviallyReMaterializable(this) ||
-      !isSafeToMove(TII, SawStore))
+  if (!TII->isTriviallyReMaterializable(this, AA) ||
+      !isSafeToMove(TII, SawStore, AA))
     return false;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
@@ -930,21 +1000,55 @@ bool MachineInstr::hasVolatileMemoryRef() const {
     return true;
   
   // Check the memory reference information for volatile references.
-  for (std::list<MachineMemOperand>::const_iterator I = memoperands_begin(),
-       E = memoperands_end(); I != E; ++I)
-    if (I->isVolatile())
+  for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+    if ((*I)->isVolatile())
       return true;
 
   return false;
 }
 
-void MachineInstr::dump() const {
-  cerr << "  " << *this;
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function.  For example,
+/// loading a value from the constant pool or from from the argument area
+/// of a function if it does not change.  This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+  // If the instruction doesn't load at all, it isn't an invariant load.
+  if (!TID->mayLoad())
+    return false;
+
+  // If the instruction has lost its memoperands, conservatively assume that
+  // it may not be an invariant load.
+  if (memoperands_empty())
+    return false;
+
+  const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+  for (mmo_iterator I = memoperands_begin(),
+       E = memoperands_end(); I != E; ++I) {
+    if ((*I)->isVolatile()) return false;
+    if ((*I)->isStore()) return false;
+
+    if (const Value *V = (*I)->getValue()) {
+      // A load from a constant PseudoSourceValue is invariant.
+      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+        if (PSV->isConstant(MFI))
+          continue;
+      // If we have an AliasAnalysis, ask it whether the memory is constant.
+      if (AA && AA->pointsToConstantMemory(V))
+        continue;
+    }
+
+    // Otherwise assume conservatively.
+    return false;
+  }
+
+  // Everything checks out.
+  return true;
 }
 
-void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const {
-  raw_os_ostream RawOS(OS);
-  print(RawOS, TM);
+void MachineInstr::dump() const {
+  errs() << "  " << *this;
 }
 
 void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -967,46 +1071,23 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
 
   if (!memoperands_empty()) {
     OS << ", Mem:";
-    for (std::list<MachineMemOperand>::const_iterator i = memoperands_begin(),
-         e = memoperands_end(); i != e; ++i) {
-      const MachineMemOperand &MRO = *i;
-      const Value *V = MRO.getValue();
-
-      assert((MRO.isLoad() || MRO.isStore()) &&
-             "SV has to be a load, store or both.");
-      
-      if (MRO.isVolatile())
-        OS << "Volatile ";
-
-      if (MRO.isLoad())
-        OS << "LD";
-      if (MRO.isStore())
-        OS << "ST";
-        
-      OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") [";
-      
-      if (!V)
-        OS << "<unknown>";
-      else if (!V->getName().empty())
-        OS << V->getName();
-      else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-        PSV->print(OS);
-      } else
-        OS << V;
-
-      OS << " + " << MRO.getOffset() << "]";
+    for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+         i != e; ++i) {
+      OS << **i;
+      if (next(i) != e)
+        OS << " ";
     }
   }
 
   if (!debugLoc.isUnknown()) {
     const MachineFunction *MF = getParent()->getParent();
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
-    DICompileUnit CU(DLT.CompileUnit);
-    std::string Dir, Fn;
-    OS << " [dbg: "
-       << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << ","
-       << DLT.Line << ","
-       << DLT.Col  << "]";
+    DICompileUnit CU(DLT.Scope);
+    if (!CU.isNull())
+      OS << " [dbg: "
+         << CU.getDirectory() << '/' << CU.getFilename() << ","
+         << DLT.Line << ","
+         << DLT.Col  << "]";
   }
 
   OS << "\n";
@@ -1021,7 +1102,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
   SmallVector<unsigned,4> DeadOps;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     MachineOperand &MO = getOperand(i);
-    if (!MO.isReg() || !MO.isUse())
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
@@ -1032,6 +1113,9 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
         if (MO.isKill())
           // The register is already marked kill.
           return true;
+        if (isPhysReg && isRegTiedToDefOperand(i))
+          // Two-address uses of physregs must not be marked kill.
+          return true;
         MO.setIsKill();
         Found = true;
       }
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index aaa4de4b2c15..f92ddb2b908a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,11 +28,12 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -43,8 +44,11 @@ namespace {
   class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass {
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    BitVector AllocatableSet;
 
     // Various analyses that we use...
+    AliasAnalysis        *AA;      // Alias analysis info.
     MachineLoopInfo      *LI;      // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
     MachineRegisterInfo  *RegInfo; // Machine register information
@@ -70,6 +74,7 @@ namespace {
       AU.setPreservesCFG();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<AliasAnalysis>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -126,20 +131,19 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
 /// loop.
 ///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    return false;
-
-  DOUT << "******** Machine LICM ********\n";
+  DEBUG(errs() << "******** Machine LICM ********\n");
 
   Changed = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
   RegInfo = &MF.getRegInfo();
+  AllocatableSet = TRI->getAllocatableSet(MF);
 
   // Get our Loop information...
   LI = &getAnalysis<MachineLoopInfo>();
   DT = &getAnalysis<MachineDominatorTree>();
+  AA = &getAnalysis<AliasAnalysis>();
 
   for (MachineLoopInfo::iterator
          I = LI->begin(), E = LI->end(); I != E; ++I) {
@@ -210,7 +214,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
-    if (!TII->isInvariantLoad(&I))
+    if (!I.isInvariantLoad(AA))
       // FIXME: we should be able to sink loads with no other side effects if
       // there is nothing that can change memory from here until the end of
       // block. This is a trivial form of alias analysis.
@@ -218,28 +222,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   }
 
   DEBUG({
-      DOUT << "--- Checking if we can hoist " << I;
+      errs() << "--- Checking if we can hoist " << I;
       if (I.getDesc().getImplicitUses()) {
-        DOUT << "  * Instruction has implicit uses:\n";
+        errs() << "  * Instruction has implicit uses:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
              *ImpUses; ++ImpUses)
-          DOUT << "      -> " << TRI->getName(*ImpUses) << "\n";
+          errs() << "      -> " << TRI->getName(*ImpUses) << "\n";
       }
 
       if (I.getDesc().getImplicitDefs()) {
-        DOUT << "  * Instruction has implicit defines:\n";
+        errs() << "  * Instruction has implicit defines:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
              *ImpDefs; ++ImpDefs)
-          DOUT << "      -> " << TRI->getName(*ImpDefs) << "\n";
+          errs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
       }
     });
 
   if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
-    DOUT << "Cannot hoist with implicit defines or uses\n";
+    DEBUG(errs() << "Cannot hoist with implicit defines or uses\n");
     return false;
   }
 
@@ -254,8 +258,30 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     if (Reg == 0) continue;
 
     // Don't hoist an instruction that uses or defines a physical register.
-    if (TargetRegisterInfo::isPhysicalRegister(Reg))
-      return false;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!RegInfo->def_empty(Reg))
+          return false;
+        if (AllocatableSet.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!RegInfo->def_empty(AliasReg))
+            return false;
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+        // Otherwise it's safe to move.
+        continue;
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
+        return false;
+      }
+    }
 
     if (!MO.isUse())
       continue;
@@ -291,13 +317,10 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
     return false;
 
-  const TargetInstrDesc &TID = MI.getDesc();
-
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
-  if (!TID.mayLoad() && (!TID.isRematerializable() ||
-                         !TII->isTriviallyReMaterializable(&MI)))
+  if (!TII->isTriviallyReMaterializable(&MI, AA))
     return false;
 
   // If result(s) of this instruction is used by PHIs, then don't hoist it.
@@ -355,14 +378,14 @@ void MachineLICM::Hoist(MachineInstr &MI) {
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
   DEBUG({
-      DOUT << "Hoisting " << MI;
+      errs() << "Hoisting " << MI;
       if (CurPreheader->getBasicBlock())
-        DOUT << " to MachineBasicBlock "
-             << CurPreheader->getBasicBlock()->getName();
+        errs() << " to MachineBasicBlock "
+               << CurPreheader->getBasicBlock()->getName();
       if (MI.getParent()->getBasicBlock())
-        DOUT << " from MachineBasicBlock "
-             << MI.getParent()->getBasicBlock()->getName();
-      DOUT << "\n";
+        errs() << " from MachineBasicBlock "
+               << MI.getParent()->getBasicBlock()->getName();
+      errs() << "\n";
     });
 
   // Look for opportunity to CSE the hoisted instruction.
@@ -374,8 +397,7 @@ void MachineLICM::Hoist(MachineInstr &MI) {
   if (CI != CSEMap.end()) {
     const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo);
     if (Dup) {
-      DOUT << "CSEing " << MI;
-      DOUT << " with " << *Dup;
+      DEBUG(errs() << "CSEing " << MI << " with " << *Dup);
       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
         const MachineOperand &MO = MI.getOperand(i);
         if (MO.isReg() && MO.isDef())
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index ff56f4de5906..2da8e3760e9a 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -19,8 +19,12 @@
 #include "llvm/CodeGen/Passes.h"
 using namespace llvm;
 
-TEMPLATE_INSTANTIATION(class LoopBase<MachineBasicBlock>);
-TEMPLATE_INSTANTIATION(class LoopInfoBase<MachineBasicBlock>);
+#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLB);
+#undef MLB
+#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLIB);
+#undef MLIB
 
 char MachineLoopInfo::ID = 0;
 static RegisterPass<MachineLoopInfo>
@@ -37,4 +41,5 @@ bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
 void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 1d8109eb8d99..b62803f105e4 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -23,7 +23,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
@@ -32,23 +32,23 @@ static RegisterPass<MachineModuleInfo>
 X("machinemoduleinfo", "Module Information");
 char MachineModuleInfo::ID = 0;
 
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
 //===----------------------------------------------------------------------===//
-  
+
 MachineModuleInfo::MachineModuleInfo()
 : ImmutablePass(&ID)
-, LabelIDList()
-, FrameMoves()
-, LandingPads()
-, Personalities()
+, ObjFileMMI(0)
 , CallsEHReturn(0)
 , CallsUnwindInit(0)
-, DbgInfoAvailable(false)
-{
-  // Always emit "no personality" info
+, DbgInfoAvailable(false) {
+  // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
 }
-MachineModuleInfo::~MachineModuleInfo() {
 
+MachineModuleInfo::~MachineModuleInfo() {
+  delete ObjFileMMI;
 }
 
 /// doInitialization - Initialize the state for a new module.
@@ -63,18 +63,12 @@ bool MachineModuleInfo::doFinalization() {
   return false;
 }
 
-/// BeginFunction - Begin gathering function meta information.
-///
-void MachineModuleInfo::BeginFunction(MachineFunction *MF) {
-  // Coming soon.
-}
-
 /// EndFunction - Discard function meta information.
 ///
 void MachineModuleInfo::EndFunction() {
   // Clean up frame info.
   FrameMoves.clear();
-  
+
   // Clean up exception info.
   LandingPads.clear();
   TypeInfos.clear();
@@ -82,12 +76,16 @@ void MachineModuleInfo::EndFunction() {
   FilterEnds.clear();
   CallsEHReturn = 0;
   CallsUnwindInit = 0;
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  VariableDbgInfo.clear();
+#endif
 }
 
 /// AnalyzeModule - Scan the module for global debug information.
 ///
 void MachineModuleInfo::AnalyzeModule(Module &M) {
-  // Insert functions in the llvm.used array into UsedFunctions.
+  // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+  // UsedFunctions.
   GlobalVariable *GV = M.getGlobalVariable("llvm.used");
   if (!GV || !GV->hasInitializer()) return;
 
@@ -95,12 +93,10 @@ void MachineModuleInfo::AnalyzeModule(Module &M) {
   ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (InitList == 0) return;
 
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InitList->getOperand(i)))
-      if (CE->getOpcode() == Instruction::BitCast)
-        if (Function *F = dyn_cast<Function>(CE->getOperand(0)))
-          UsedFunctions.insert(F);
-  }
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (Function *F =
+          dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+      UsedFunctions.insert(F);
 }
 
 //===-EH-------------------------------------------------------------------===//
@@ -115,7 +111,7 @@ LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
     if (LP.LandingPadBlock == LandingPad)
       return LP;
   }
-  
+
   LandingPads.push_back(LandingPadInfo(LandingPad));
   return LandingPads[N];
 }
@@ -134,7 +130,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
 unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
   unsigned LandingPadLabel = NextLabelID();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.LandingPadLabel = LandingPadLabel;  
+  LP.LandingPadLabel = LandingPadLabel;
   return LandingPadLabel;
 }
 
@@ -148,8 +144,13 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
   for (unsigned i = 0; i < Personalities.size(); ++i)
     if (Personalities[i] == Personality)
       return;
-  
-  Personalities.push_back(Personality);
+
+  // If this is the first personality we're adding go
+  // ahead and add it at the beginning.
+  if (Personalities[0] == NULL)
+    Personalities[0] = Personality;
+  else
+    Personalities.push_back(Personality);
 }
 
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
@@ -224,7 +225,7 @@ void MachineModuleInfo::TidyLandingPads() {
   }
 }
 
-/// getTypeIDFor - Return the type id for the specified typeinfo.  This is 
+/// getTypeIDFor - Return the type id for the specified typeinfo.  This is
 /// function wide.
 unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
   for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
@@ -273,24 +274,24 @@ Function *MachineModuleInfo::getPersonality() const {
 }
 
 /// getPersonalityIndex - Return unique index for current personality
-/// function. NULL personality function should always get zero index.
+/// function. NULL/first personality function should always get zero index.
 unsigned MachineModuleInfo::getPersonalityIndex() const {
   const Function* Personality = NULL;
-  
+
   // Scan landing pads. If there is at least one non-NULL personality - use it.
   for (unsigned i = 0; i != LandingPads.size(); ++i)
     if (LandingPads[i].Personality) {
       Personality = LandingPads[i].Personality;
       break;
     }
-  
+
   for (unsigned i = 0; i < Personalities.size(); ++i) {
     if (Personalities[i] == Personality)
       return i;
   }
 
-  // This should never happen
-  assert(0 && "Personality function should be set!");
+  // This will happen if the current personality function is
+  // in the zero index.
   return 0;
 }
 
@@ -306,6 +307,7 @@ struct DebugLabelFolder : public MachineFunctionPass {
   DebugLabelFolder() : MachineFunctionPass(&ID) {}
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
     AU.addPreservedID(MachineLoopInfoID);
     AU.addPreservedID(MachineDominatorsID);
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -321,12 +323,12 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
   // Get machine module info.
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   if (!MMI) return false;
-  
+
   // Track if change is made.
   bool MadeChange = false;
   // No prior label to begin.
   unsigned PriorLabel = 0;
-  
+
   // Iterate through basic blocks.
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
        BB != E; ++BB) {
@@ -336,7 +338,7 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
       if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){
         // The label ID # is always operand #0, an immediate.
         unsigned NextLabel = I->getOperand(0).getImm();
-        
+
         // If there was an immediate prior label.
         if (PriorLabel) {
           // Remap the current label to prior label.
@@ -354,15 +356,14 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
         // No consecutive labels.
         PriorLabel = 0;
       }
-      
+
       ++I;
     }
   }
-  
+
   return MadeChange;
 }
 
 FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
 
 }
-
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 000000000000..7a6292910f4b
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::Anchor() {}
+
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+  const MCSymbol *LHSS =
+    ((const std::pair<const MCSymbol*, const MCSymbol*>*)LHS)->first;
+  const MCSymbol *RHSS =
+    ((const std::pair<const MCSymbol*, const MCSymbol*>*)RHS)->first;
+  return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoMachO::SymbolListTy
+MachineModuleInfoMachO::GetSortedStubs(const DenseMap<const MCSymbol*, 
+                                                      const MCSymbol*> &Map) {
+  MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end());
+  if (!List.empty())
+    qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+  return List;
+}
+
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 544d83a33f7f..b31973e04fd9 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -110,11 +110,9 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
          "Invalid vreg!");
-  for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
-    // Since we are in SSA form, we can stop at the first definition.
-    if (I.getOperand().isDef())
-      return &*I;
-  }
+  // Since we are in SSA form, we can use the first definition.
+  if (!def_empty(Reg))
+    return &*def_begin(Reg);
   return 0;
 }
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 0e18fa742f5b..0f3b33f54d46 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -7,7 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass 
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,12 +20,14 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumSunk, "Number of machine instructions sunk");
@@ -29,9 +36,12 @@ namespace {
   class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass {
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
     MachineFunction       *CurMF; // Current MachineFunction
     MachineRegisterInfo  *RegInfo; // Machine register information
-    MachineDominatorTree *DT;   // Machine dominator tree for the current Loop
+    MachineDominatorTree *DT;   // Machine dominator tree
+    AliasAnalysis *AA;
+    BitVector AllocatableSet;   // Which physregs are allocatable?
 
   public:
     static char ID; // Pass identification
@@ -40,7 +50,9 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
     }
@@ -63,10 +75,8 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
                                              MachineBasicBlock *MBB) const {
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
-  for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg),
-       E = RegInfo->reg_end(); I != E; ++I) {
-    if (I.getOperand().isDef()) continue;  // ignore def.
-    
+  for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg),
+       E = RegInfo->use_end(); I != E; ++I) {
     // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
@@ -85,13 +95,16 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
 
 
 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "******** Machine Sinking ********\n";
+  DEBUG(errs() << "******** Machine Sinking ********\n");
   
   CurMF = &MF;
   TM = &CurMF->getTarget();
   TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
   RegInfo = &CurMF->getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
+  AA = &getAnalysis<AliasAnalysis>();
+  AllocatableSet = TRI->getAllocatableSet(*CurMF);
 
   bool EverMadeChange = false;
   
@@ -142,7 +155,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
 /// instruction out of its current block into a successor.
 bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, SawStore))
+  if (!MI->isSafeToMove(TII, SawStore, AA))
     return false;
   
   // FIXME: This should include support for sinking instructions within the
@@ -151,7 +164,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // also sink them down before their first use in the block.  This xform has to
   // be careful not to *increase* register pressure though, e.g. sinking
   // "x = y + z" down if it kills y and z would increase the live ranges of y
-  // and z only the shrink the live range of x.
+  // and z and only shrink the live range of x.
   
   // Loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
@@ -169,10 +182,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     if (Reg == 0) continue;
     
     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      // If this is a physical register use, we can't move it.  If it is a def,
-      // we can move it, but only if the def is dead.
-      if (MO.isUse() || !MO.isDead())
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!RegInfo->def_empty(Reg))
+          return false;
+        if (AllocatableSet.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!RegInfo->def_empty(AliasReg))
+            return false;
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
         return false;
+      }
     } else {
       // Virtual register uses are always safe to sink.
       if (MO.isUse()) continue;
@@ -232,15 +261,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   if (MI->getParent() == SuccToSinkTo)
     return false;
   
-  DEBUG(cerr << "Sink instr " << *MI);
-  DEBUG(cerr << "to block " << *SuccToSinkTo);
+  DEBUG(errs() << "Sink instr " << *MI);
+  DEBUG(errs() << "to block " << *SuccToSinkTo);
   
   // If the block has multiple predecessors, this would introduce computation on
   // a path that it doesn't already exist.  We could split the critical edge,
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(cerr << " *** PUNTING: Critical edge found\n");
+    DEBUG(errs() << " *** PUNTING: Critical edge found\n");
     return false;
   }
   
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index be1396c7a810..18a3ead3bc18 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,21 +23,23 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include <fstream>
-
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -53,6 +55,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
     }
 
     bool runOnMachineFunction(MachineFunction &MF);
@@ -61,7 +64,7 @@ namespace {
     const bool allowPhysDoubleDefs;
 
     const char *const OutFileName;
-    std::ostream *OS;
+    raw_ostream *OS;
     const MachineFunction *MF;
     const TargetMachine *TM;
     const TargetRegisterInfo *TRI;
@@ -75,7 +78,8 @@ namespace {
 
     BitVector regsReserved;
     RegSet regsLive;
-    RegVector regsDefined, regsImpDefined, regsDead, regsKilled;
+    RegVector regsDefined, regsDead, regsKilled;
+    RegSet regsLiveInButUnused;
 
     // Add Reg and any sub-registers to RV
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
@@ -85,14 +89,6 @@ namespace {
           RV.push_back(*R);
     }
 
-    // Does RS contain any super-registers of Reg?
-    bool anySuperRegisters(const RegSet &RS, unsigned Reg) {
-      for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++)
-        if (RS.count(*R))
-          return true;
-      return false;
-    }
-
     struct BBInfo {
       // Is this MBB reachable from the MF entry point?
       bool reachable;
@@ -148,7 +144,7 @@ namespace {
     DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
 
     bool isReserved(unsigned Reg) {
-      return Reg < regsReserved.size() && regsReserved[Reg];
+      return Reg < regsReserved.size() && regsReserved.test(Reg);
     }
 
     void visitMachineFunctionBefore();
@@ -176,21 +172,24 @@ static RegisterPass<MachineVerifier>
 MachineVer("machineverifier", "Verify generated machine code");
 static const PassInfo *const MachineVerifyID = &MachineVer;
 
-FunctionPass *
-llvm::createMachineVerifierPass(bool allowPhysDoubleDefs)
-{
+FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
   return new MachineVerifier(allowPhysDoubleDefs);
 }
 
-bool
-MachineVerifier::runOnMachineFunction(MachineFunction &MF)
-{
-  std::ofstream OutFile;
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+  raw_ostream *OutFile = 0;
   if (OutFileName) {
-    OutFile.open(OutFileName, std::ios::out | std::ios::app);
-    OS = &OutFile;
+    std::string ErrorInfo;
+    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+                                 raw_fd_ostream::F_Append);
+    if (!ErrorInfo.empty()) {
+      errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+      exit(1);
+    }
+
+    OS = OutFile;
   } else {
-    OS = cerr.stream();
+    OS = &errs();
   }
 
   foundErrors = 0;
@@ -215,51 +214,48 @@ MachineVerifier::runOnMachineFunction(MachineFunction &MF)
   }
   visitMachineFunctionAfter();
 
-  if (OutFileName)
-    OutFile.close();
+  if (OutFile)
+    delete OutFile;
+  else if (foundErrors)
+    llvm_report_error("Found "+Twine(foundErrors)+" machine code errors.");
 
-  if (foundErrors) {
-    cerr << "\nStopping with " << foundErrors << " machine code errors.\n";
-    exit(1);
-  }
+  // Clean up.
+  regsLive.clear();
+  regsDefined.clear();
+  regsDead.clear();
+  regsKilled.clear();
+  regsLiveInButUnused.clear();
+  MBBInfoMap.clear();
 
   return false;                 // no changes
 }
 
-void
-MachineVerifier::report(const char *msg, const MachineFunction *MF)
-{
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   assert(MF);
-  *OS << "\n";
+  *OS << '\n';
   if (!foundErrors++)
-    MF->print(OS);
+    MF->print(*OS);
   *OS << "*** Bad machine code: " << msg << " ***\n"
-      << "- function:    " << MF->getFunction()->getName() << "\n";
+      << "- function:    " << MF->getFunction()->getNameStr() << "\n";
 }
 
-void
-MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB)
-{
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: " << MBB->getBasicBlock()->getName()
+  *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr()
       << " " << (void*)MBB
       << " (#" << MBB->getNumber() << ")\n";
 }
 
-void
-MachineVerifier::report(const char *msg, const MachineInstr *MI)
-{
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   assert(MI);
   report(msg, MI->getParent());
   *OS << "- instruction: ";
-  MI->print(OS, TM);
+  MI->print(*OS, TM);
 }
 
-void
-MachineVerifier::report(const char *msg,
-                        const MachineOperand *MO, unsigned MONum)
-{
+void MachineVerifier::report(const char *msg,
+                             const MachineOperand *MO, unsigned MONum) {
   assert(MO);
   report(msg, MO->getParent());
   *OS << "- operand " << MONum << ":   ";
@@ -267,9 +263,7 @@ MachineVerifier::report(const char *msg,
   *OS << "\n";
 }
 
-void
-MachineVerifier::markReachable(const MachineBasicBlock *MBB)
-{
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
   BBInfo &MInfo = MBBInfoMap[MBB];
   if (!MInfo.reachable) {
     MInfo.reachable = true;
@@ -279,16 +273,158 @@ MachineVerifier::markReachable(const MachineBasicBlock *MBB)
   }
 }
 
-void
-MachineVerifier::visitMachineFunctionBefore()
-{
+void MachineVerifier::visitMachineFunctionBefore() {
   regsReserved = TRI->getReservedRegs(*MF);
+
+  // A sub-register of a reserved register is also reserved
+  for (int Reg = regsReserved.find_first(); Reg>=0;
+       Reg = regsReserved.find_next(Reg)) {
+    for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+      // FIXME: This should probably be:
+      // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+      regsReserved.set(*Sub);
+    }
+  }
   markReachable(&MF->front());
 }
 
-void
-MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
-{
+void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  // Start with minimal CFG sanity checks.
+  MachineFunction::const_iterator MBBI = MBB;
+  ++MBBI;
+  if (MBBI != MF->end()) {
+    // Block is not last in function.
+    if (!MBB->isSuccessor(MBBI)) {
+      // Block does not fall through.
+      if (MBB->empty()) {
+        report("MBB doesn't fall through but is empty!", MBB);
+      }
+    }
+    if (TII->BlockHasNoFallThrough(*MBB)) {
+      if (MBB->empty()) {
+        report("TargetInstrInfo says the block has no fall through, but the "
+               "block is empty!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("TargetInstrInfo says the block has no fall through, but the "
+               "block does not end in a barrier!", MBB);
+      }
+    }
+  } else {
+    // Block is last in function.
+    if (MBB->empty()) {
+      report("MBB is last in function but is empty!", MBB);
+    }
+  }
+
+  // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+                          TBB, FBB, Cond)) {
+    // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+    // check whether its answers match up with reality.
+    if (!TBB && !FBB) {
+      // Block falls through to its successor.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actually fall
+        // out the bottom of the function.
+      } else if (MBB->succ_empty()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actuall fall
+        // out of the block.
+      } else if (MBB->succ_size() != 1) {
+        report("MBB exits via unconditional fall-through but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (MBB->succ_begin()[0] != MBBI) {
+        report("MBB exits via unconditional fall-through but its successor "
+               "differs from its CFG successor!", MBB);
+      }
+      if (!MBB->empty() && MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via unconditional fall-through but ends with a "
+               "barrier instruction!", MBB);
+      }
+      if (!Cond.empty()) {
+        report("MBB exits via unconditional fall-through but has a condition!",
+               MBB);
+      }
+    } else if (TBB && !FBB && Cond.empty()) {
+      // Block unconditionally branches somewhere.
+      if (MBB->succ_size() != 1) {
+        report("MBB exits via unconditional branch but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (MBB->succ_begin()[0] != TBB) {
+        report("MBB exits via unconditional branch but the CFG "
+               "successor doesn't match the actual successor!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via unconditional branch but doesn't contain "
+               "any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via unconditional branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via unconditional branch but the branch isn't a "
+               "terminator instruction!", MBB);
+      }
+    } else if (TBB && !FBB && !Cond.empty()) {
+      // Block conditionally branches somewhere, otherwise falls through.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        report("MBB conditionally falls through out of function!", MBB);
+      } if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/fall-through but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) ||
+                 (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) {
+        report("MBB exits via conditional branch/fall-through but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/fall-through but doesn't "
+               "contain any instructions!", MBB);
+      } else if (MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/fall-through but ends with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/fall-through but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+    } else if (TBB && FBB) {
+      // Block conditionally branches somewhere, otherwise branches
+      // somewhere else.
+      if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/branch but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) ||
+                 (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) {
+        report("MBB exits via conditional branch/branch but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/branch but doesn't "
+               "contain any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/branch but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+      if (Cond.empty()) {
+        report("MBB exits via conditinal branch/branch but there's no "
+               "condition!", MBB);
+      }
+    } else {
+      report("AnalyzeBranch returned invalid data!", MBB);
+    }
+  }
+
   regsLive.clear();
   for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I) {
@@ -300,32 +436,41 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
     for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
       regsLive.insert(*R);
   }
+  regsLiveInButUnused = regsLive;
+
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  assert(MFI && "Function has no frame info");
+  BitVector PR = MFI->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+    regsLive.insert(I);
+    for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+      regsLive.insert(*R);
+  }
+
   regsKilled.clear();
   regsDefined.clear();
-  regsImpDefined.clear();
 }
 
-void
-MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI)
-{
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   const TargetInstrDesc &TI = MI->getDesc();
-  if (MI->getNumExplicitOperands() < TI.getNumOperands()) {
+  if (MI->getNumOperands() < TI.getNumOperands()) {
     report("Too few operands", MI);
     *OS << TI.getNumOperands() << " operands expected, but "
         << MI->getNumExplicitOperands() << " given.\n";
   }
-  if (!TI.isVariadic()) {
-    if (MI->getNumExplicitOperands() > TI.getNumOperands()) {
-      report("Too many operands", MI);
-      *OS << TI.getNumOperands() << " operands expected, but "
-          << MI->getNumExplicitOperands() << " given.\n";
-    }
+
+  // Check the MachineMemOperands for basic consistency.
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I) {
+    if ((*I)->isLoad() && !TI.mayLoad())
+      report("Missing mayLoad flag", MI);
+    if ((*I)->isStore() && !TI.mayStore())
+      report("Missing mayStore flag", MI);
   }
 }
 
 void
-MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
-{
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const TargetInstrDesc &TI = MI->getDesc();
 
@@ -337,6 +482,16 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
       report("Explicit definition marked as use", MO, MONum);
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
+  } else if (MONum < TI.getNumOperands()) {
+    if (MO->isReg()) {
+      if (MO->isDef())
+        report("Explicit operand marked as def", MO, MONum);
+      if (MO->isImplicit())
+        report("Explicit operand marked as implicit", MO, MONum);
+    }
+  } else {
+    if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic())
+      report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
   switch (MO->getType()) {
@@ -346,18 +501,26 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
       return;
 
     // Check Live Variables.
-    if (MO->isUse()) {
+    if (MO->isUndef()) {
+      // An <undef> doesn't refer to any register, so just skip it.
+    } else if (MO->isUse()) {
+      regsLiveInButUnused.erase(Reg);
+
       if (MO->isKill()) {
         addRegWithSubRegs(regsKilled, Reg);
+        // Tied operands on two-address instuctions MUST NOT have a <kill> flag.
+        if (MI->isRegTiedToDefOperand(MONum))
+            report("Illegal kill flag on two-address instruction operand",
+                   MO, MONum);
       } else {
-        // TwoAddress instr modyfying a reg is treated as kill+def.
+        // TwoAddress instr modifying a reg is treated as kill+def.
         unsigned defIdx;
         if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
             MI->getOperand(defIdx).getReg() == Reg)
           addRegWithSubRegs(regsKilled, Reg);
       }
-      // Explicit use of a dead register.
-      if (!MO->isImplicit() && !regsLive.count(Reg)) {
+      // Use of a dead register.
+      if (!regsLive.count(Reg)) {
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
           // Reserved registers may be used even when 'dead'.
           if (!isReserved(Reg))
@@ -374,15 +537,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
         }
       }
     } else {
+      assert(MO->isDef());
       // Register defined.
       // TODO: verify that earlyclobber ops are not used.
-      if (MO->isImplicit())
-        addRegWithSubRegs(regsImpDefined, Reg);
-      else
-        addRegWithSubRegs(regsDefined, Reg);
-
       if (MO->isDead())
         addRegWithSubRegs(regsDead, Reg);
+      else
+        addRegWithSubRegs(regsDefined, Reg);
     }
 
     // Check register classes.
@@ -401,8 +562,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
           }
           sr = s;
         }
-        if (TOI.RegClass) {
-          const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
           if (!DRC->contains(sr)) {
             report("Illegal physical register for instruction", MO, MONum);
             *OS << TRI->getName(sr) << " is not a "
@@ -419,8 +579,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
           }
           RC = *(RC->subregclasses_begin()+SubIdx);
         }
-        if (TOI.RegClass) {
-          const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
           if (RC != DRC && !RC->hasSuperClass(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
             *OS << "Expected a " << DRC->getName() << " register, but got a "
@@ -431,34 +590,35 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
     }
     break;
   }
-    // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do.
-    // case MachineOperand::MO_MachineBasicBlock:
-    //   if (MI->getOpcode() == TargetInstrInfo::PHI) {
-    //     if (!MO->getMBB()->isSuccessor(MI->getParent()))
-    //       report("PHI operand is not in the CFG", MO, MONum);
-    //   }
-    //   break;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    if (MI->getOpcode() == TargetInstrInfo::PHI) {
+      if (!MO->getMBB()->isSuccessor(MI->getParent()))
+        report("PHI operand is not in the CFG", MO, MONum);
+    }
+    break;
+
   default:
     break;
   }
 }
 
-void
-MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI)
-{
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
   BBInfo &MInfo = MBBInfoMap[MI->getParent()];
   set_union(MInfo.regsKilled, regsKilled);
   set_subtract(regsLive, regsKilled);
   regsKilled.clear();
 
-  for (RegVector::const_iterator I = regsDefined.begin(),
-         E = regsDefined.end(); I != E; ++I) {
+  // Verify that both <def> and <def,dead> operands refer to dead registers.
+  RegVector defs(regsDefined);
+  defs.append(regsDead.begin(), regsDead.end());
+
+  for (RegVector::const_iterator I = defs.begin(), E = defs.end();
+       I != E; ++I) {
     if (regsLive.count(*I)) {
       if (TargetRegisterInfo::isPhysicalRegister(*I)) {
-        // We allow double defines to physical registers with live
-        // super-registers.
         if (!allowPhysDoubleDefs && !isReserved(*I) &&
-            !anySuperRegisters(regsLive, *I)) {
+            !regsLiveInButUnused.count(*I)) {
           report("Redefining a live physical register", MI);
           *OS << "Register " << TRI->getName(*I)
               << " was defined but already live.\n";
@@ -478,14 +638,12 @@ MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI)
     }
   }
 
-  set_union(regsLive, regsDefined); regsDefined.clear();
-  set_union(regsLive, regsImpDefined); regsImpDefined.clear();
   set_subtract(regsLive, regsDead); regsDead.clear();
+  set_union(regsLive, regsDefined); regsDefined.clear();
 }
 
 void
-MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB)
-{
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
   MBBInfoMap[MBB].regsLiveOut = regsLive;
   regsLive.clear();
 }
@@ -493,9 +651,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB)
 // Calculate the largest possible vregsPassed sets. These are the registers that
 // can pass through an MBB live, but may not be live every time. It is assumed
 // that all vregsPassed sets are empty before the call.
-void
-MachineVerifier::calcMaxRegsPassed()
-{
+void MachineVerifier::calcMaxRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
   DenseSet<const MachineBasicBlock*> todo;
@@ -533,9 +689,7 @@ MachineVerifier::calcMaxRegsPassed()
 // Calculate the minimum vregsPassed set. These are the registers that always
 // pass live through an MBB. The calculation assumes that calcMaxRegsPassed has
 // been called earlier.
-void
-MachineVerifier::calcMinRegsPassed()
-{
+void MachineVerifier::calcMinRegsPassed() {
   DenseSet<const MachineBasicBlock*> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI)
@@ -570,9 +724,7 @@ MachineVerifier::calcMinRegsPassed()
 
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
-void
-MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB)
-{
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
     DenseSet<const MachineBasicBlock*> seen;
@@ -601,9 +753,7 @@ MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB)
   }
 }
 
-void
-MachineVerifier::visitMachineFunctionAfter()
-{
+void MachineVerifier::visitMachineFunctionAfter() {
   calcMaxRegsPassed();
 
   // With the maximal set of vregsPassed we can verify dead-in registers.
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
new file mode 100644
index 000000000000..cf05275d7a31
--- /dev/null
+++ b/lib/CodeGen/ObjectCodeEmitter.cpp
@@ -0,0 +1,141 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+
+//===----------------------------------------------------------------------===//
+//                       ObjectCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
+ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
+ObjectCodeEmitter::~ObjectCodeEmitter() {}
+
+/// setBinaryObject - set the BinaryObject we are writting to
+void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
+
+/// emitByte - This callback is invoked when a byte needs to be
+/// written to the data stream, without buffer overflow testing.
+void ObjectCodeEmitter::emitByte(uint8_t B) {
+  BO->emitByte(B);
+}
+
+/// emitWordLE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitWordLE(uint32_t W) {
+  BO->emitWordLE(W);
+}
+
+/// emitWordBE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitWordBE(uint32_t W) {
+  BO->emitWordBE(W);
+}
+
+/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
+  BO->emitDWordLE(W);
+}
+
+/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
+  BO->emitDWordBE(W);
+}
+
+/// emitAlignment - Align 'BO' to the necessary alignment boundary.
+void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
+                                      uint8_t fill /* 0 */) {
+  BO->emitAlignment(Alignment, fill);
+}
+
+/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
+  BO->emitULEB128Bytes(Value);
+}
+
+/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
+  BO->emitSLEB128Bytes(Value);
+}
+
+/// emitString - This callback is invoked when a String needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitString(const std::string &String) {
+  BO->emitString(String);
+}
+
+/// getCurrentPCValue - This returns the address that the next emitted byte
+/// will be output to.
+uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// getCurrentPCOffset - Return the offset from the start of the emitted
+/// buffer that we are currently writing to.
+uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// addRelocation - Whenever a relocatable address is needed, it should be
+/// noted with this interface.
+void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
+  BO->addRelocation(relocation);
+}
+
+/// StartMachineBasicBlock - This should be called by the target when a new
+/// basic block is about to be emitted.  This way the MCE knows where the
+/// start of the block is, and can implement getMachineBasicBlockAddress.
+void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
+  if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+    MBBLocations.resize((MBB->getNumber()+1)*2);
+  MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+}
+
+/// getMachineBasicBlockAddress - Return the address of the specified
+/// MachineBasicBlock, only usable after the label for the MBB has been
+/// emitted.
+uintptr_t
+ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+  assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+         MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+  return MBBLocations[MBB->getNumber()];
+}
+
+/// getJumpTableEntryAddress - Return the address of the jump table with index
+/// 'Index' in the function that last called initJumpTableInfo.
+uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
+  assert(JTLocations.size() > Index && "JT not emitted!");
+  return JTLocations[Index];
+}
+
+/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
+  assert(CPLocations.size() > Index && "CP not emitted!");
+  return CPLocations[Index];
+}
+
+/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
+  assert(CPSections.size() > Index && "CP not emitted!");
+  return CPSections[Index];
+}
+
+} // end namespace llvm
+
diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h
new file mode 100644
index 000000000000..904061ca4fbc
--- /dev/null
+++ b/lib/CodeGen/PBQP/AnnotatedGraph.h
@@ -0,0 +1,184 @@
+//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Annotated PBQP Graph class. This class is used internally by the PBQP solver
+// to cache information to speed up reduction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
+#define LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
+
+#include "GraphBase.h"
+
+namespace PBQP {
+
+
+template <typename NodeData, typename EdgeData> class AnnotatedEdge;
+
+template <typename NodeData, typename EdgeData>
+class AnnotatedNode : public NodeBase<AnnotatedNode<NodeData, EdgeData>,
+                                      AnnotatedEdge<NodeData, EdgeData> > {
+private:
+
+  NodeData nodeData; 
+
+public:
+
+  AnnotatedNode(const Vector &costs, const NodeData &nodeData) :
+    NodeBase<AnnotatedNode<NodeData, EdgeData>,
+             AnnotatedEdge<NodeData, EdgeData> >(costs),
+             nodeData(nodeData) {}
+
+  NodeData& getNodeData() { return nodeData; }
+  const NodeData& getNodeData() const { return nodeData; }
+
+};
+
+template <typename NodeData, typename EdgeData>
+class AnnotatedEdge : public EdgeBase<AnnotatedNode<NodeData, EdgeData>,
+                                      AnnotatedEdge<NodeData, EdgeData> > {
+private:
+
+  typedef typename GraphBase<AnnotatedNode<NodeData, EdgeData>,
+                             AnnotatedEdge<NodeData, EdgeData> >::NodeIterator
+    NodeIterator;
+
+  EdgeData edgeData; 
+
+public:
+
+
+  AnnotatedEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
+                const Matrix &costs, const EdgeData &edgeData) :
+    EdgeBase<AnnotatedNode<NodeData, EdgeData>,
+             AnnotatedEdge<NodeData, EdgeData> >(node1Itr, node2Itr, costs),
+    edgeData(edgeData) {}
+
+  EdgeData& getEdgeData() { return edgeData; }
+  const EdgeData& getEdgeData() const { return edgeData; }
+
+};
+
+template <typename NodeData, typename EdgeData>
+class AnnotatedGraph : public GraphBase<AnnotatedNode<NodeData, EdgeData>,
+                                        AnnotatedEdge<NodeData, EdgeData> > {
+private:
+
+  typedef GraphBase<AnnotatedNode<NodeData, EdgeData>,
+                    AnnotatedEdge<NodeData, EdgeData> > PGraph;
+
+  typedef AnnotatedNode<NodeData, EdgeData> NodeEntry;
+  typedef AnnotatedEdge<NodeData, EdgeData> EdgeEntry;
+
+
+  void copyFrom(const AnnotatedGraph &other) {
+    if (!other.areNodeIDsValid()) {
+      other.assignNodeIDs();
+    }
+    std::vector<NodeIterator> newNodeItrs(other.getNumNodes());
+
+    for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr));
+    }
+
+    for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      unsigned node1ID = other.getNodeID(other.getEdgeNode1(eItr)),
+               node2ID = other.getNodeID(other.getEdgeNode2(eItr));
+
+      addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
+              other.getEdgeCosts(eItr), other.getEdgeData(eItr));
+    }
+
+  }
+
+public:
+
+  typedef typename PGraph::NodeIterator NodeIterator;
+  typedef typename PGraph::ConstNodeIterator ConstNodeIterator;
+  typedef typename PGraph::EdgeIterator EdgeIterator;
+  typedef typename PGraph::ConstEdgeIterator ConstEdgeIterator;
+
+  AnnotatedGraph() {}
+
+  AnnotatedGraph(const AnnotatedGraph &other) {
+    copyFrom(other);
+  }
+
+  AnnotatedGraph& operator=(const AnnotatedGraph &other) {
+    PGraph::clear();
+    copyFrom(other);
+    return *this;
+  }
+
+  NodeIterator addNode(const Vector &costs, const NodeData &data) {
+    return PGraph::addConstructedNode(NodeEntry(costs, data));
+  }
+
+  EdgeIterator addEdge(const NodeIterator &node1Itr,
+                       const NodeIterator &node2Itr,
+                       const Matrix &costs, const EdgeData &data) {
+    return PGraph::addConstructedEdge(EdgeEntry(node1Itr, node2Itr,
+                                                costs, data));
+  }
+
+  NodeData& getNodeData(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).getNodeData();
+  }
+
+  const NodeData& getNodeData(const NodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).getNodeData();
+  }
+
+  EdgeData& getEdgeData(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getEdgeData();
+  }
+
+  const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getEdgeData();
+  }
+
+  SimpleGraph toSimpleGraph() const {
+    SimpleGraph g;
+
+    if (!PGraph::areNodeIDsValid()) {
+      PGraph::assignNodeIDs();
+    }
+    std::vector<SimpleGraph::NodeIterator> newNodeItrs(PGraph::getNumNodes());
+
+    for (ConstNodeIterator nItr = PGraph::nodesBegin(), 
+         nEnd = PGraph::nodesEnd();
+         nItr != nEnd; ++nItr) {
+
+      newNodeItrs[getNodeID(nItr)] = g.addNode(getNodeCosts(nItr));
+    }
+
+    for (ConstEdgeIterator
+         eItr = PGraph::edgesBegin(), eEnd = PGraph::edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      unsigned node1ID = getNodeID(getEdgeNode1(eItr)),
+               node2ID = getNodeID(getEdgeNode2(eItr));
+
+        g.addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
+                  getEdgeCosts(eItr));
+    }
+
+    return g;
+  }
+
+};
+
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h
new file mode 100644
index 000000000000..b2f2e6f620fd
--- /dev/null
+++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h
@@ -0,0 +1,110 @@
+//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Uses a trivial brute force algorithm to solve a PBQP problem.
+// PBQP is NP-HARD - This solver should only be used for debugging small
+// problems.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H
+#define LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H
+
+#include "Solver.h"
+
+namespace PBQP {
+
+/// A brute force PBQP solver. This solver takes exponential time. It should
+/// only be used for debugging purposes. 
+class ExhaustiveSolverImpl {
+private:
+
+  const SimpleGraph &g;
+
+  PBQPNum getSolutionCost(const Solution &solution) const {
+    PBQPNum cost = 0.0;
+    
+    for (SimpleGraph::ConstNodeIterator
+         nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+      
+      unsigned nodeId = g.getNodeID(nodeItr);
+
+      cost += g.getNodeCosts(nodeItr)[solution.getSelection(nodeId)];
+    }
+
+    for (SimpleGraph::ConstEdgeIterator
+         edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+      
+      SimpleGraph::ConstNodeIterator n1 = g.getEdgeNode1Itr(edgeItr),
+                                     n2 = g.getEdgeNode2Itr(edgeItr);
+      unsigned sol1 = solution.getSelection(g.getNodeID(n1)),
+               sol2 = solution.getSelection(g.getNodeID(n2));
+
+      cost += g.getEdgeCosts(edgeItr)[sol1][sol2];
+    }
+
+    return cost;
+  }
+
+public:
+
+  ExhaustiveSolverImpl(const SimpleGraph &g) : g(g) {}
+
+  Solution solve() const {
+    Solution current(g.getNumNodes(), true), optimal(current);
+
+    PBQPNum bestCost = std::numeric_limits<PBQPNum>::infinity();
+    bool finished = false;
+
+    while (!finished) {
+      PBQPNum currentCost = getSolutionCost(current);
+
+      if (currentCost < bestCost) {
+        optimal = current;
+        bestCost = currentCost;
+      }
+
+      // assume we're done.
+      finished = true;
+
+      for (unsigned i = 0; i < g.getNumNodes(); ++i) {
+        if (current.getSelection(i) ==
+            (g.getNodeCosts(g.getNodeItr(i)).getLength() - 1)) {
+          current.setSelection(i, 0);
+        }
+        else {
+          current.setSelection(i, current.getSelection(i) + 1);
+          finished = false;
+          break;
+        }
+      }
+
+    }
+
+    optimal.setSolutionCost(bestCost);
+
+    return optimal;
+  }
+
+};
+
+class ExhaustiveSolver : public Solver {
+public:
+  ~ExhaustiveSolver() {}
+  Solution solve(const SimpleGraph &g) const {
+    ExhaustiveSolverImpl solver(g);
+    return solver.solve();
+  }
+};
+
+}
+
+#endif // LLVM_CODGEN_PBQP_EXHAUSTIVESOLVER_HPP
diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h
new file mode 100644
index 000000000000..cc3e017adda1
--- /dev/null
+++ b/lib/CodeGen/PBQP/GraphBase.h
@@ -0,0 +1,582 @@
+//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Base class for PBQP Graphs.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_GRAPHBASE_H
+#define LLVM_CODEGEN_PBQP_GRAPHBASE_H
+
+#include "PBQPMath.h"
+
+#include <list>
+#include <vector>
+
+namespace PBQP {
+
+// UGLY, but I'm not sure there's a good way around this: We need to be able to
+// look up a Node's "adjacent edge list" structure type before the Node type is
+// fully constructed.  We can enable this by pushing the choice of data type
+// out into this traits class.
+template <typename Graph>
+class NodeBaseTraits {
+  public:
+    typedef std::list<typename Graph::EdgeIterator> AdjEdgeList;
+    typedef typename AdjEdgeList::iterator AdjEdgeIterator;
+    typedef typename AdjEdgeList::const_iterator ConstAdjEdgeIterator;
+};
+
+/// \brief Base for concrete graph classes. Provides a basic set of graph
+///        operations which are useful for PBQP solvers.
+template <typename NodeEntry, typename EdgeEntry>
+class GraphBase {
+private:
+
+  typedef GraphBase<NodeEntry, EdgeEntry> ThisGraphT;
+
+  typedef std::list<NodeEntry> NodeList;
+  typedef std::list<EdgeEntry> EdgeList;
+
+  NodeList nodeList;
+  unsigned nodeListSize;
+
+  EdgeList edgeList;
+  unsigned edgeListSize;
+
+  GraphBase(const ThisGraphT &other) { abort(); }
+  void operator=(const ThisGraphT &other) { abort(); } 
+  
+public:
+
+  /// \brief Iterates over the nodes of a graph.
+  typedef typename NodeList::iterator NodeIterator;
+  /// \brief Iterates over the nodes of a const graph.
+  typedef typename NodeList::const_iterator ConstNodeIterator;
+  /// \brief Iterates over the edges of a graph.
+  typedef typename EdgeList::iterator EdgeIterator;
+  /// \brief Iterates over the edges of a const graph.
+  typedef typename EdgeList::const_iterator ConstEdgeIterator;
+
+  /// \brief Iterates over the edges attached to a node.
+  typedef typename NodeBaseTraits<ThisGraphT>::AdjEdgeIterator
+    AdjEdgeIterator;
+
+  /// \brief Iterates over the edges attached to a node in a const graph.
+  typedef typename NodeBaseTraits<ThisGraphT>::ConstAdjEdgeIterator
+    ConstAdjEdgeIterator;
+
+private:
+
+  typedef std::vector<NodeIterator> IDToNodeMap;
+
+  IDToNodeMap idToNodeMap;
+  bool nodeIDsValid;
+
+  void invalidateNodeIDs() {
+    if (nodeIDsValid) {
+      idToNodeMap.clear();
+      nodeIDsValid = false;
+    }
+  }
+
+  template <typename ItrT>
+  bool iteratorInRange(ItrT itr, const ItrT &begin, const ItrT &end) {
+    for (ItrT t = begin; t != end; ++t) {
+      if (itr == t)
+        return true;
+    }
+
+    return false;
+  }
+
+protected:
+
+  GraphBase() : nodeListSize(0), edgeListSize(0), nodeIDsValid(false) {}
+  
+  NodeEntry& getNodeEntry(const NodeIterator &nodeItr) { return *nodeItr; }
+  const NodeEntry& getNodeEntry(const ConstNodeIterator &nodeItr) const {
+    return *nodeItr;
+  }
+
+  EdgeEntry& getEdgeEntry(const EdgeIterator &edgeItr) { return *edgeItr; }
+  const EdgeEntry& getEdgeEntry(const ConstEdgeIterator &edgeItr) const {
+    return *edgeItr;
+  }
+
+  NodeIterator addConstructedNode(const NodeEntry &nodeEntry) {
+    ++nodeListSize;
+
+    invalidateNodeIDs();
+
+    NodeIterator newNodeItr = nodeList.insert(nodeList.end(), nodeEntry);
+
+    return newNodeItr;
+  }
+
+  EdgeIterator addConstructedEdge(const EdgeEntry &edgeEntry) {
+
+    assert((findEdge(edgeEntry.getNode1Itr(), edgeEntry.getNode2Itr())
+          == edgeList.end()) && "Attempt to add duplicate edge.");
+
+    ++edgeListSize;
+
+    // Add the edge to the graph.
+    EdgeIterator edgeItr = edgeList.insert(edgeList.end(), edgeEntry);
+
+    // Get a reference to the version in the graph.
+    EdgeEntry &newEdgeEntry = getEdgeEntry(edgeItr);
+
+    // Node entries:
+    NodeEntry &node1Entry = getNodeEntry(newEdgeEntry.getNode1Itr()),
+              &node2Entry = getNodeEntry(newEdgeEntry.getNode2Itr());
+
+    // Sanity check on matrix dimensions.
+    assert((node1Entry.getCosts().getLength() == 
+            newEdgeEntry.getCosts().getRows()) && 
+           (node2Entry.getCosts().getLength() == 
+            newEdgeEntry.getCosts().getCols()) &&
+        "Matrix dimensions do not match cost vector dimensions.");
+
+    // Create links between nodes and edges.
+    newEdgeEntry.setNode1ThisEdgeItr(
+        node1Entry.addAdjEdge(edgeItr));
+    newEdgeEntry.setNode2ThisEdgeItr(
+        node2Entry.addAdjEdge(edgeItr));
+
+    return edgeItr;
+  }
+
+public:
+
+  /// \brief Returns the number of nodes in this graph.
+  unsigned getNumNodes() const { return nodeListSize; }
+
+  /// \brief Returns the number of edges in this graph.
+  unsigned getNumEdges() const { return edgeListSize; } 
+
+  /// \brief Return the cost vector for the given node.
+  Vector& getNodeCosts(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).getCosts();
+  }
+
+  /// \brief Return the cost vector for the give node. 
+  const Vector& getNodeCosts(const ConstNodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).getCosts();
+  }
+
+  /// \brief Return the degree of the given node.
+  unsigned getNodeDegree(const NodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).getDegree();
+  }
+
+  /// \brief Assigns sequential IDs to the nodes, starting at 0, which
+  /// remain valid until the next addition or removal of a node.
+  void assignNodeIDs() {
+    unsigned curID = 0;
+    idToNodeMap.resize(getNumNodes());
+    for (NodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr, ++curID) {
+      getNodeEntry(nodeItr).setID(curID);
+      idToNodeMap[curID] = nodeItr;
+    }
+    nodeIDsValid = true;
+  }
+
+  /// \brief Assigns sequential IDs to the nodes using the ordering of the
+  /// given vector.
+  void assignNodeIDs(const std::vector<NodeIterator> &nodeOrdering) {
+    assert((getNumNodes() == nodeOrdering.size()) && 
+           "Wrong number of nodes in node ordering.");
+    idToNodeMap = nodeOrdering;
+    for (unsigned nodeID = 0; nodeID < idToNodeMap.size(); ++nodeID) {
+      getNodeEntry(idToNodeMap[nodeID]).setID(nodeID);
+    }
+    nodeIDsValid = true;
+  }
+
+  /// \brief Returns true if valid node IDs are assigned, false otherwise.
+  bool areNodeIDsValid() const { return nodeIDsValid; }
+
+  /// \brief Return the numeric ID of the given node.
+  ///
+  /// Calls to this method will result in an assertion failure if there have
+  /// been any node additions or removals since the last call to
+  /// assignNodeIDs().
+  unsigned getNodeID(const ConstNodeIterator &nodeItr) const {
+    assert(nodeIDsValid && "Attempt to retrieve invalid ID.");
+    return getNodeEntry(nodeItr).getID();
+  }
+
+  /// \brief Returns the iterator associated with the given node ID.
+  NodeIterator getNodeItr(unsigned nodeID) {
+    assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID.");
+    return idToNodeMap[nodeID];
+  }
+
+  /// \brief Returns the iterator associated with the given node ID.
+  ConstNodeIterator getNodeItr(unsigned nodeID) const {
+    assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID.");
+    return idToNodeMap[nodeID];
+  }
+
+  /// \brief Removes the given node (and all attached edges) from the graph.
+  void removeNode(const NodeIterator &nodeItr) {
+    assert(iteratorInRange(nodeItr, nodeList.begin(), nodeList.end()) &&
+           "Iterator does not belong to this graph!");
+
+    invalidateNodeIDs();
+    
+    NodeEntry &nodeEntry = getNodeEntry(nodeItr);
+
+    // We need to copy this out because it will be destroyed as the edges are
+    // removed.
+    typedef std::vector<EdgeIterator> AdjEdgeList;
+    typedef typename AdjEdgeList::iterator AdjEdgeListItr;
+
+    AdjEdgeList adjEdges;
+    adjEdges.reserve(nodeEntry.getDegree());
+    std::copy(nodeEntry.adjEdgesBegin(), nodeEntry.adjEdgesEnd(),
+              std::back_inserter(adjEdges));
+
+    // Iterate over the copied out edges and remove them from the graph.
+    for (AdjEdgeListItr itr = adjEdges.begin(), end = adjEdges.end();
+         itr != end; ++itr) {
+      removeEdge(*itr);
+    }
+
+    // Erase the node from the nodelist.
+    nodeList.erase(nodeItr);
+    --nodeListSize;
+  }
+
+  NodeIterator nodesBegin() { return nodeList.begin(); }
+  ConstNodeIterator nodesBegin() const { return nodeList.begin(); }
+  NodeIterator nodesEnd() { return nodeList.end(); }
+  ConstNodeIterator nodesEnd() const { return nodeList.end(); }
+
+  AdjEdgeIterator adjEdgesBegin(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).adjEdgesBegin();
+  }
+
+  ConstAdjEdgeIterator adjEdgesBegin(const ConstNodeIterator &nodeItr) const {
+    return getNodeEntry(nodeItr).adjEdgesBegin();
+  }
+
+  AdjEdgeIterator adjEdgesEnd(const NodeIterator &nodeItr) {
+    return getNodeEntry(nodeItr).adjEdgesEnd();
+  }
+  
+  ConstAdjEdgeIterator adjEdgesEnd(const ConstNodeIterator &nodeItr) const {
+    getNodeEntry(nodeItr).adjEdgesEnd();
+  }
+
+  EdgeIterator findEdge(const NodeIterator &node1Itr,
+                        const NodeIterator &node2Itr) {
+
+    for (AdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr),
+         adjEdgeEnd = adjEdgesEnd(node1Itr);
+         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+      if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) ||
+          (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) {
+        return *adjEdgeItr;
+      }
+    }
+
+    return edgeList.end();
+  }
+
+  ConstEdgeIterator findEdge(const ConstNodeIterator &node1Itr,
+                             const ConstNodeIterator &node2Itr) const {
+
+    for (ConstAdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr),
+         adjEdgeEnd = adjEdgesEnd(node1Itr);
+         adjEdgeItr != adjEdgesEnd; ++adjEdgeItr) {
+      if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) ||
+          (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) {
+        return *adjEdgeItr;
+      }
+    }
+
+    return edgeList.end();
+  }
+
+  Matrix& getEdgeCosts(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getCosts();
+  }
+
+  const Matrix& getEdgeCosts(const ConstEdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getCosts();
+  }
+
+  NodeIterator getEdgeNode1Itr(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getNode1Itr();
+  }
+
+  ConstNodeIterator getEdgeNode1Itr(const ConstEdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getNode1Itr();
+  }
+
+  NodeIterator getEdgeNode2Itr(const EdgeIterator &edgeItr) {
+    return getEdgeEntry(edgeItr).getNode2Itr();
+  }
+
+  ConstNodeIterator getEdgeNode2Itr(const ConstEdgeIterator &edgeItr) const {
+    return getEdgeEntry(edgeItr).getNode2Itr();
+  }
+
+  NodeIterator getEdgeOtherNode(const EdgeIterator &edgeItr,
+                                const NodeIterator &nodeItr) {
+
+    EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
+    if (nodeItr == edgeEntry.getNode1Itr()) {
+      return edgeEntry.getNode2Itr();
+    }
+    //else
+    return edgeEntry.getNode1Itr();
+  }
+
+  ConstNodeIterator getEdgeOtherNode(const ConstEdgeIterator &edgeItr,
+                                     const ConstNodeIterator &nodeItr) const {
+
+    const EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
+    if (nodeItr == edgeEntry.getNode1Itr()) {
+      return edgeEntry.getNode2Itr();
+    }
+    //else
+    return edgeEntry.getNode1Itr();
+  }
+
+  void removeEdge(const EdgeIterator &edgeItr) {
+    assert(iteratorInRange(edgeItr, edgeList.begin(), edgeList.end()) &&
+           "Iterator does not belong to this graph!");
+
+    --edgeListSize;
+
+    // Get the edge entry.
+    EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
+
+    // Get the nodes entry.
+    NodeEntry &node1Entry(getNodeEntry(edgeEntry.getNode1Itr())),
+              &node2Entry(getNodeEntry(edgeEntry.getNode2Itr()));
+
+    // Disconnect the edge from the nodes.
+    node1Entry.removeAdjEdge(edgeEntry.getNode1ThisEdgeItr());
+    node2Entry.removeAdjEdge(edgeEntry.getNode2ThisEdgeItr());
+
+    // Remove the edge from the graph.
+    edgeList.erase(edgeItr);
+  }
+
+  EdgeIterator edgesBegin() { return edgeList.begin(); }
+  ConstEdgeIterator edgesBegin() const { return edgeList.begin(); }
+  EdgeIterator edgesEnd() { return edgeList.end(); }
+  ConstEdgeIterator edgesEnd() const { return edgeList.end(); }
+
+  void clear() {
+    nodeList.clear();
+    nodeListSize = 0;
+    edgeList.clear();
+    edgeListSize = 0;
+    idToNodeMap.clear();
+  }
+
+  template <typename OStream>
+  void printDot(OStream &os) const {
+    
+    assert(areNodeIDsValid() &&
+           "Cannot print a .dot of a graph unless IDs have been assigned.");
+    
+    os << "graph {\n";
+
+    for (ConstNodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      os << "  node" << getNodeID(nodeItr) << " [ label=\""
+         << getNodeID(nodeItr) << ": " << getNodeCosts(nodeItr) << "\" ]\n";
+    }
+
+    os << "  edge [ len=" << getNumNodes() << " ]\n";
+
+    for (ConstEdgeIterator edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      os << "  node" << getNodeID(getEdgeNode1Itr(edgeItr))
+         << " -- node" << getNodeID(getEdgeNode2Itr(edgeItr))
+         << " [ label=\"";
+
+      const Matrix &edgeCosts = getEdgeCosts(edgeItr);
+
+      for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
+        os << edgeCosts.getRowAsVector(i) << "\\n";
+      }
+
+      os << "\" ]\n";
+    }
+
+    os << "}\n";
+  }
+
+  template <typename OStream>
+  void printDot(OStream &os) {
+    if (!areNodeIDsValid()) {
+      assignNodeIDs();
+    }
+
+    const_cast<const ThisGraphT*>(this)->printDot(os);
+  }
+
+  template <typename OStream>
+  void dumpTo(OStream &os) const {
+    typedef ConstNodeIterator ConstNodeID;
+    
+    assert(areNodeIDsValid() &&
+           "Cannot dump a graph unless IDs have been assigned.");
+
+    for (ConstNodeIterator nItr = nodesBegin(), nEnd = nodesEnd();
+         nItr != nEnd; ++nItr) {
+      os << getNodeID(nItr) << "\n";
+    }
+
+    unsigned edgeNumber = 1;
+    for (ConstEdgeIterator eItr = edgesBegin(), eEnd = edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      os << edgeNumber++ << ": { "
+         << getNodeID(getEdgeNode1Itr(eItr)) << ", "
+         << getNodeID(getEdgeNode2Itr(eItr)) << " }\n";
+    }
+
+  }
+
+  template <typename OStream>
+  void dumpTo(OStream &os) {
+    if (!areNodeIDsValid()) {
+      assignNodeIDs();
+    }
+
+    const_cast<const ThisGraphT*>(this)->dumpTo(os);
+  }
+
+};
+
+/// \brief Provides a base from which to derive nodes for GraphBase.
+template <typename NodeImpl, typename EdgeImpl>
+class NodeBase {
+private:
+
+  typedef GraphBase<NodeImpl, EdgeImpl> GraphBaseT;
+  typedef NodeBaseTraits<GraphBaseT> ThisNodeBaseTraits;
+
+public:
+  typedef typename GraphBaseT::EdgeIterator EdgeIterator;
+
+private:
+  typedef typename ThisNodeBaseTraits::AdjEdgeList AdjEdgeList;
+
+  unsigned degree, id;
+  Vector costs;
+  AdjEdgeList adjEdges;
+
+  void operator=(const NodeBase& other) {
+    assert(false && "Can't assign NodeEntrys.");
+  }
+
+public:
+
+  typedef typename ThisNodeBaseTraits::AdjEdgeIterator AdjEdgeIterator;
+  typedef typename ThisNodeBaseTraits::ConstAdjEdgeIterator
+    ConstAdjEdgeIterator;
+
+  NodeBase(const Vector &costs) : degree(0), costs(costs) {
+    assert((costs.getLength() > 0) && "Can't have zero-length cost vector.");
+  }
+
+  Vector& getCosts() { return costs; }
+  const Vector& getCosts() const { return costs; }
+
+  unsigned getDegree() const { return degree;  }
+
+  void setID(unsigned id) { this->id = id; }
+  unsigned getID() const { return id; }
+
+  AdjEdgeIterator addAdjEdge(const EdgeIterator &edgeItr) {
+    ++degree;
+    return adjEdges.insert(adjEdges.end(), edgeItr);
+  }
+
+  void removeAdjEdge(const AdjEdgeIterator &adjEdgeItr) {
+    --degree;
+    adjEdges.erase(adjEdgeItr);
+  }
+
+  AdjEdgeIterator adjEdgesBegin() { return adjEdges.begin(); } 
+  ConstAdjEdgeIterator adjEdgesBegin() const { return adjEdges.begin(); }
+  AdjEdgeIterator adjEdgesEnd() { return adjEdges.end(); }
+  ConstAdjEdgeIterator adjEdgesEnd() const { return adjEdges.end(); }
+
+};
+
+template <typename NodeImpl, typename EdgeImpl>
+class EdgeBase {
+public:
+  typedef typename GraphBase<NodeImpl, EdgeImpl>::NodeIterator NodeIterator;
+  typedef typename GraphBase<NodeImpl, EdgeImpl>::EdgeIterator EdgeIterator;
+
+  typedef typename NodeImpl::AdjEdgeIterator NodeAdjEdgeIterator;
+
+private:
+
+  NodeIterator node1Itr, node2Itr;
+  NodeAdjEdgeIterator node1ThisEdgeItr, node2ThisEdgeItr;
+  Matrix costs;
+
+  void operator=(const EdgeBase &other) {
+    assert(false && "Can't assign EdgeEntrys.");
+  }
+
+public:
+
+  EdgeBase(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
+           const Matrix &costs) :
+    node1Itr(node1Itr), node2Itr(node2Itr), costs(costs) {
+
+    assert((costs.getRows() > 0) && (costs.getCols() > 0) &&
+           "Can't have zero-dimensioned cost matrices");
+  }
+
+  Matrix& getCosts() { return costs; }
+  const Matrix& getCosts() const { return costs; }
+
+  const NodeIterator& getNode1Itr() const { return node1Itr; }
+  const NodeIterator& getNode2Itr() const { return node2Itr; }
+
+  void setNode1ThisEdgeItr(const NodeAdjEdgeIterator &node1ThisEdgeItr) {
+    this->node1ThisEdgeItr = node1ThisEdgeItr;
+  }
+
+  const NodeAdjEdgeIterator& getNode1ThisEdgeItr() const {
+    return node1ThisEdgeItr;
+  }
+
+  void setNode2ThisEdgeItr(const NodeAdjEdgeIterator &node2ThisEdgeItr) {
+    this->node2ThisEdgeItr = node2ThisEdgeItr;
+  }
+
+  const NodeAdjEdgeIterator& getNode2ThisEdgeItr() const {
+    return node2ThisEdgeItr;
+  }
+
+};
+
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_GRAPHBASE_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
new file mode 100644
index 000000000000..e786246b4e05
--- /dev/null
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -0,0 +1,789 @@
+//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Heuristic PBQP solver. This solver is able to perform optimal reductions for
+// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
+// used to to select a node for reduction. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+
+#include "Solver.h"
+#include "AnnotatedGraph.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+
+namespace PBQP {
+
+/// \brief Important types for the HeuristicSolverImpl.
+/// 
+/// Declared seperately to allow access to heuristic classes before the solver
+/// is fully constructed.
+template <typename HeuristicNodeData, typename HeuristicEdgeData>
+class HSITypes {
+public:
+
+  class NodeData;
+  class EdgeData;
+
+  typedef AnnotatedGraph<NodeData, EdgeData> SolverGraph;
+  typedef typename SolverGraph::NodeIterator GraphNodeIterator;
+  typedef typename SolverGraph::EdgeIterator GraphEdgeIterator;
+  typedef typename SolverGraph::AdjEdgeIterator GraphAdjEdgeIterator;
+
+  typedef std::list<GraphNodeIterator> NodeList;
+  typedef typename NodeList::iterator NodeListIterator;
+
+  typedef std::vector<GraphNodeIterator> NodeStack;
+  typedef typename NodeStack::iterator NodeStackIterator;
+
+  class NodeData {
+    friend class EdgeData;
+
+  private:
+
+    typedef std::list<GraphEdgeIterator> LinksList;
+
+    unsigned numLinks;
+    LinksList links, solvedLinks;
+    NodeListIterator bucketItr;
+    HeuristicNodeData heuristicData;
+
+  public:
+
+    typedef typename LinksList::iterator AdjLinkIterator;
+
+  private:
+
+    AdjLinkIterator addLink(const GraphEdgeIterator &edgeItr) {
+      ++numLinks;
+      return links.insert(links.end(), edgeItr);
+    }
+
+    void delLink(const AdjLinkIterator &adjLinkItr) {
+      --numLinks;
+      links.erase(adjLinkItr);
+    }
+
+  public:
+
+    NodeData() : numLinks(0) {}
+
+    unsigned getLinkDegree() const { return numLinks; }
+
+    HeuristicNodeData& getHeuristicData() { return heuristicData; }
+    const HeuristicNodeData& getHeuristicData() const {
+      return heuristicData;
+    }
+
+    void setBucketItr(const NodeListIterator &bucketItr) {
+      this->bucketItr = bucketItr;
+    }
+
+    const NodeListIterator& getBucketItr() const {
+      return bucketItr;
+    }
+
+    AdjLinkIterator adjLinksBegin() {
+      return links.begin();
+    }
+
+    AdjLinkIterator adjLinksEnd() {
+      return links.end();
+    }
+
+    void addSolvedLink(const GraphEdgeIterator &solvedLinkItr) {
+      solvedLinks.push_back(solvedLinkItr);
+    }
+
+    AdjLinkIterator solvedLinksBegin() {
+      return solvedLinks.begin();
+    }
+
+    AdjLinkIterator solvedLinksEnd() {
+      return solvedLinks.end();
+    }
+
+  };
+
+  class EdgeData {
+  private:
+
+    SolverGraph &g;
+    GraphNodeIterator node1Itr, node2Itr;
+    HeuristicEdgeData heuristicData;
+    typename NodeData::AdjLinkIterator node1ThisEdgeItr, node2ThisEdgeItr;
+
+  public:
+
+    EdgeData(SolverGraph &g) : g(g) {}
+
+    HeuristicEdgeData& getHeuristicData() { return heuristicData; }
+    const HeuristicEdgeData& getHeuristicData() const {
+      return heuristicData;
+    }
+
+    void setup(const GraphEdgeIterator &thisEdgeItr) {
+      node1Itr = g.getEdgeNode1Itr(thisEdgeItr);
+      node2Itr = g.getEdgeNode2Itr(thisEdgeItr);
+
+      node1ThisEdgeItr = g.getNodeData(node1Itr).addLink(thisEdgeItr);
+      node2ThisEdgeItr = g.getNodeData(node2Itr).addLink(thisEdgeItr);
+    }
+
+    void unlink() {
+      g.getNodeData(node1Itr).delLink(node1ThisEdgeItr);
+      g.getNodeData(node2Itr).delLink(node2ThisEdgeItr);
+    }
+
+  };
+
+};
+
+template <typename Heuristic>
+class HeuristicSolverImpl {
+public:
+  // Typedefs to make life easier:
+  typedef HSITypes<typename Heuristic::NodeData,
+                   typename Heuristic::EdgeData> HSIT;
+  typedef typename HSIT::SolverGraph SolverGraph;
+  typedef typename HSIT::NodeData NodeData;
+  typedef typename HSIT::EdgeData EdgeData;
+  typedef typename HSIT::GraphNodeIterator GraphNodeIterator;
+  typedef typename HSIT::GraphEdgeIterator GraphEdgeIterator;
+  typedef typename HSIT::GraphAdjEdgeIterator GraphAdjEdgeIterator;
+
+  typedef typename HSIT::NodeList NodeList;
+  typedef typename HSIT::NodeListIterator NodeListIterator;
+
+  typedef std::vector<GraphNodeIterator> NodeStack;
+  typedef typename NodeStack::iterator NodeStackIterator;
+
+  /// \brief Constructor, which performs all the actual solver work.
+  HeuristicSolverImpl(const SimpleGraph &orig) :
+    solution(orig.getNumNodes(), true)
+  {
+    copyGraph(orig);
+    simplify();
+    setup();
+    computeSolution();
+    computeSolutionCost(orig);
+  }
+
+  /// \brief Returns the graph for this solver.
+  SolverGraph& getGraph() { return g; }
+
+  /// \brief Return the solution found by this solver.
+  const Solution& getSolution() const { return solution; }
+
+private:
+
+  /// \brief Add the given node to the appropriate bucket for its link
+  /// degree.
+  void addToBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    switch (nodeData.getLinkDegree()) {
+      case 0: nodeData.setBucketItr(
+                r0Bucket.insert(r0Bucket.end(), nodeItr));
+              break;                                            
+      case 1: nodeData.setBucketItr(
+                r1Bucket.insert(r1Bucket.end(), nodeItr));
+              break;
+      case 2: nodeData.setBucketItr(
+                r2Bucket.insert(r2Bucket.end(), nodeItr));
+              break;
+      default: heuristic.addToRNBucket(nodeItr);
+               break;
+    }
+  }
+
+  /// \brief Remove the given node from the appropriate bucket for its link
+  /// degree.
+  void removeFromBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    switch (nodeData.getLinkDegree()) {
+      case 0: r0Bucket.erase(nodeData.getBucketItr()); break;
+      case 1: r1Bucket.erase(nodeData.getBucketItr()); break;
+      case 2: r2Bucket.erase(nodeData.getBucketItr()); break;
+      default: heuristic.removeFromRNBucket(nodeItr); break;
+    }
+  }
+
+public:
+
+  /// \brief Add a link.
+  void addLink(const GraphEdgeIterator &edgeItr) {
+    g.getEdgeData(edgeItr).setup(edgeItr);
+
+    if ((g.getNodeData(g.getEdgeNode1Itr(edgeItr)).getLinkDegree() > 2) ||
+        (g.getNodeData(g.getEdgeNode2Itr(edgeItr)).getLinkDegree() > 2)) {
+      heuristic.handleAddLink(edgeItr);
+    }
+  }
+
+  /// \brief Remove link, update info for node.
+  ///
+  /// Only updates information for the given node, since usually the other
+  /// is about to be removed.
+  void removeLink(const GraphEdgeIterator &edgeItr,
+                  const GraphNodeIterator &nodeItr) {
+
+    if (g.getNodeData(nodeItr).getLinkDegree() > 2) {
+      heuristic.handleRemoveLink(edgeItr, nodeItr);
+    }
+    g.getEdgeData(edgeItr).unlink();
+  }
+
+  /// \brief Remove link, update info for both nodes. Useful for R2 only.
+  void removeLinkR2(const GraphEdgeIterator &edgeItr) {
+    GraphNodeIterator node1Itr = g.getEdgeNode1Itr(edgeItr);
+
+    if (g.getNodeData(node1Itr).getLinkDegree() > 2) {
+      heuristic.handleRemoveLink(edgeItr, node1Itr);
+    }
+    removeLink(edgeItr, g.getEdgeNode2Itr(edgeItr));
+  }
+
+  /// \brief Removes all links connected to the given node.
+  void unlinkNode(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    typedef std::vector<GraphEdgeIterator> TempEdgeList;
+
+    TempEdgeList edgesToUnlink;
+    edgesToUnlink.reserve(nodeData.getLinkDegree());
+
+    // Copy adj edges into a temp vector. We want to destroy them during
+    // the unlink, and we can't do that while we're iterating over them.
+    std::copy(nodeData.adjLinksBegin(), nodeData.adjLinksEnd(),
+              std::back_inserter(edgesToUnlink));
+
+    for (typename TempEdgeList::iterator
+         edgeItr = edgesToUnlink.begin(), edgeEnd = edgesToUnlink.end();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      GraphNodeIterator otherNode = g.getEdgeOtherNode(*edgeItr, nodeItr);
+
+      removeFromBucket(otherNode);
+      removeLink(*edgeItr, otherNode);
+      addToBucket(otherNode);
+    }
+  }
+
+  /// \brief Push the given node onto the stack to be solved with
+  /// backpropagation.
+  void pushStack(const GraphNodeIterator &nodeItr) {
+    stack.push_back(nodeItr);
+  }
+
+  /// \brief Set the solution of the given node.
+  void setSolution(const GraphNodeIterator &nodeItr, unsigned solIndex) {
+    solution.setSelection(g.getNodeID(nodeItr), solIndex);
+
+    for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr),
+         adjEdgeEnd = g.adjEdgesEnd(nodeItr);
+         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+      GraphEdgeIterator edgeItr(*adjEdgeItr);
+      GraphNodeIterator adjNodeItr(g.getEdgeOtherNode(edgeItr, nodeItr));
+      g.getNodeData(adjNodeItr).addSolvedLink(edgeItr);
+    }
+  }
+
+private:
+
+  SolverGraph g;
+  Heuristic heuristic;
+  Solution solution;
+
+  NodeList r0Bucket,
+           r1Bucket,
+           r2Bucket;
+
+  NodeStack stack;
+
+  // Copy the SimpleGraph into an annotated graph which we can use for reduction.
+  void copyGraph(const SimpleGraph &orig) {
+
+    assert((g.getNumEdges() == 0) && (g.getNumNodes() == 0) &&
+           "Graph should be empty prior to solver setup.");
+
+    assert(orig.areNodeIDsValid() &&
+           "Cannot copy from a graph with invalid node IDs.");
+
+    std::vector<GraphNodeIterator> newNodeItrs;
+
+    for (unsigned nodeID = 0; nodeID < orig.getNumNodes(); ++nodeID) {
+      newNodeItrs.push_back(
+        g.addNode(orig.getNodeCosts(orig.getNodeItr(nodeID)), NodeData()));
+    }
+
+    for (SimpleGraph::ConstEdgeIterator
+         origEdgeItr = orig.edgesBegin(), origEdgeEnd = orig.edgesEnd();
+         origEdgeItr != origEdgeEnd; ++origEdgeItr) {
+
+      unsigned id1 = orig.getNodeID(orig.getEdgeNode1Itr(origEdgeItr)),
+               id2 = orig.getNodeID(orig.getEdgeNode2Itr(origEdgeItr));
+
+      g.addEdge(newNodeItrs[id1], newNodeItrs[id2],
+                orig.getEdgeCosts(origEdgeItr), EdgeData(g));
+    }
+
+    // Assign IDs to the new nodes using the ordering from the old graph,
+    // this will lead to nodes in the new graph getting the same ID as the
+    // corresponding node in the old graph.
+    g.assignNodeIDs(newNodeItrs);
+  }
+
+  // Simplify the annotated graph by eliminating independent edges and trivial
+  // nodes. 
+  void simplify() {
+    disconnectTrivialNodes();
+    eliminateIndependentEdges();
+  }
+
+  // Eliminate trivial nodes.
+  void disconnectTrivialNodes() {
+    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      if (g.getNodeCosts(nodeItr).getLength() == 1) {
+
+        std::vector<GraphEdgeIterator> edgesToRemove;
+
+        for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr),
+             adjEdgeEnd = g.adjEdgesEnd(nodeItr);
+             adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+
+          GraphEdgeIterator edgeItr = *adjEdgeItr;
+
+          if (g.getEdgeNode1Itr(edgeItr) == nodeItr) {
+            GraphNodeIterator otherNodeItr = g.getEdgeNode2Itr(edgeItr);
+            g.getNodeCosts(otherNodeItr) +=
+              g.getEdgeCosts(edgeItr).getRowAsVector(0);
+          }
+          else {
+            GraphNodeIterator otherNodeItr = g.getEdgeNode1Itr(edgeItr);
+            g.getNodeCosts(otherNodeItr) +=
+              g.getEdgeCosts(edgeItr).getColAsVector(0);
+          }
+
+          edgesToRemove.push_back(edgeItr);
+        }
+
+        while (!edgesToRemove.empty()) {
+          g.removeEdge(edgesToRemove.back());
+          edgesToRemove.pop_back();
+        }
+      }
+    }
+  }
+
+  void eliminateIndependentEdges() {
+    std::vector<GraphEdgeIterator> edgesToProcess;
+
+    for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+      edgesToProcess.push_back(edgeItr);
+    }
+
+    while (!edgesToProcess.empty()) {
+      tryToEliminateEdge(edgesToProcess.back());
+      edgesToProcess.pop_back();
+    }
+  }
+
+  void tryToEliminateEdge(const GraphEdgeIterator &edgeItr) {
+    if (tryNormaliseEdgeMatrix(edgeItr)) {
+      g.removeEdge(edgeItr); 
+    }
+  }
+
+  bool tryNormaliseEdgeMatrix(const GraphEdgeIterator &edgeItr) {
+
+    Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+    Vector &uCosts = g.getNodeCosts(g.getEdgeNode1Itr(edgeItr)),
+               &vCosts = g.getNodeCosts(g.getEdgeNode2Itr(edgeItr));
+
+    for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+      PBQPNum rowMin = edgeCosts.getRowMin(r);
+      uCosts[r] += rowMin;
+      if (rowMin != std::numeric_limits<PBQPNum>::infinity()) {
+        edgeCosts.subFromRow(r, rowMin);
+      }
+      else {
+        edgeCosts.setRow(r, 0);
+      }
+    }
+
+    for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+      PBQPNum colMin = edgeCosts.getColMin(c);
+      vCosts[c] += colMin;
+      if (colMin != std::numeric_limits<PBQPNum>::infinity()) {
+        edgeCosts.subFromCol(c, colMin);
+      }
+      else {
+        edgeCosts.setCol(c, 0);
+      }
+    }
+
+    return edgeCosts.isZero();
+  }
+
+  void setup() {
+    setupLinks();
+    heuristic.initialise(*this);
+    setupBuckets();
+  }
+
+  void setupLinks() {
+    for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+      g.getEdgeData(edgeItr).setup(edgeItr);
+    }
+  }
+
+  void setupBuckets() {
+    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+      addToBucket(nodeItr);
+    }
+  }
+
+  void computeSolution() {
+    assert(g.areNodeIDsValid() &&
+           "Nodes cannot be added/removed during reduction.");
+
+    reduce();
+    computeTrivialSolutions();
+    backpropagate();
+  }
+
+  void printNode(const GraphNodeIterator &nodeItr) {
+    llvm::errs() << "Node " << g.getNodeID(nodeItr) << " (" << &*nodeItr << "):\n"
+                 << "  costs = " << g.getNodeCosts(nodeItr) << "\n"
+                 << "  link degree = " << g.getNodeData(nodeItr).getLinkDegree() << "\n"
+                 << "  links = [ ";
+
+    for (typename HSIT::NodeData::AdjLinkIterator 
+         aeItr = g.getNodeData(nodeItr).adjLinksBegin(),
+         aeEnd = g.getNodeData(nodeItr).adjLinksEnd();
+         aeItr != aeEnd; ++aeItr) {
+      llvm::errs() << "(" << g.getNodeID(g.getEdgeNode1Itr(*aeItr))
+                   << ", " << g.getNodeID(g.getEdgeNode2Itr(*aeItr))
+                   << ") ";
+    }
+    llvm::errs() << "]\n";
+  }
+
+  void dumpState() {
+    llvm::errs() << "\n";
+
+    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+      printNode(nodeItr);
+    }
+
+    NodeList* buckets[] = { &r0Bucket, &r1Bucket, &r2Bucket };
+
+    for (unsigned b = 0; b < 3; ++b) {
+      NodeList &bucket = *buckets[b];
+
+      llvm::errs() << "Bucket " << b << ": [ ";
+
+      for (NodeListIterator nItr = bucket.begin(), nEnd = bucket.end();
+           nItr != nEnd; ++nItr) {
+        llvm::errs() << g.getNodeID(*nItr) << " ";
+      }
+
+      llvm::errs() << "]\n";
+    }
+
+    llvm::errs() << "Stack: [ ";
+    for (NodeStackIterator nsItr = stack.begin(), nsEnd = stack.end();
+         nsItr != nsEnd; ++nsItr) {
+      llvm::errs() << g.getNodeID(*nsItr) << " ";
+    }
+    llvm::errs() << "]\n";
+  }
+
+  void reduce() {
+    bool reductionFinished = r1Bucket.empty() && r2Bucket.empty() &&
+      heuristic.rNBucketEmpty();
+
+    while (!reductionFinished) {
+
+      if (!r1Bucket.empty()) {
+        processR1();
+      }
+      else if (!r2Bucket.empty()) {
+        processR2();
+      }
+      else if (!heuristic.rNBucketEmpty()) {
+        solution.setProvedOptimal(false);
+        solution.incRNReductions();
+        heuristic.processRN();
+      } 
+      else reductionFinished = true;
+    }
+      
+  };
+
+  void processR1() {
+
+    // Remove the first node in the R0 bucket:
+    GraphNodeIterator xNodeItr = r1Bucket.front();
+    r1Bucket.pop_front();
+
+    solution.incR1Reductions();
+
+    //llvm::errs() << "Applying R1 to " << g.getNodeID(xNodeItr) << "\n";
+
+    assert((g.getNodeData(xNodeItr).getLinkDegree() == 1) &&
+           "Node in R1 bucket has degree != 1");
+
+    GraphEdgeIterator edgeItr = *g.getNodeData(xNodeItr).adjLinksBegin();
+
+    const Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+
+    const Vector &xCosts = g.getNodeCosts(xNodeItr);
+    unsigned xLen = xCosts.getLength();
+
+    // Duplicate a little code to avoid transposing matrices:
+    if (xNodeItr == g.getEdgeNode1Itr(edgeItr)) {
+      GraphNodeIterator yNodeItr = g.getEdgeNode2Itr(edgeItr);
+      Vector &yCosts = g.getNodeCosts(yNodeItr);
+      unsigned yLen = yCosts.getLength();
+
+      for (unsigned j = 0; j < yLen; ++j) {
+        PBQPNum min = edgeCosts[0][j] + xCosts[0];
+        for (unsigned i = 1; i < xLen; ++i) {
+          PBQPNum c = edgeCosts[i][j] + xCosts[i];
+          if (c < min)
+            min = c;
+        }
+        yCosts[j] += min;
+      }
+    }
+    else {
+      GraphNodeIterator yNodeItr = g.getEdgeNode1Itr(edgeItr);
+      Vector &yCosts = g.getNodeCosts(yNodeItr);
+      unsigned yLen = yCosts.getLength();
+
+      for (unsigned i = 0; i < yLen; ++i) {
+        PBQPNum min = edgeCosts[i][0] + xCosts[0];
+
+        for (unsigned j = 1; j < xLen; ++j) {
+          PBQPNum c = edgeCosts[i][j] + xCosts[j];
+          if (c < min)
+            min = c;
+        }
+        yCosts[i] += min;
+      }
+    }
+
+    unlinkNode(xNodeItr);
+    pushStack(xNodeItr);
+  }
+
+  void processR2() {
+
+    GraphNodeIterator xNodeItr = r2Bucket.front();
+    r2Bucket.pop_front();
+
+    solution.incR2Reductions();
+
+    // Unlink is unsafe here. At some point it may optimistically more a node
+    // to a lower-degree list when its degree will later rise, or vice versa,
+    // violating the assumption that node degrees monotonically decrease
+    // during the reduction phase. Instead we'll bucket shuffle manually.
+    pushStack(xNodeItr);
+
+    assert((g.getNodeData(xNodeItr).getLinkDegree() == 2) &&
+           "Node in R2 bucket has degree != 2");
+
+    const Vector &xCosts = g.getNodeCosts(xNodeItr);
+
+    typename NodeData::AdjLinkIterator tempItr =
+      g.getNodeData(xNodeItr).adjLinksBegin();
+
+    GraphEdgeIterator yxEdgeItr = *tempItr,
+                      zxEdgeItr = *(++tempItr);
+
+    GraphNodeIterator yNodeItr = g.getEdgeOtherNode(yxEdgeItr, xNodeItr),
+                      zNodeItr = g.getEdgeOtherNode(zxEdgeItr, xNodeItr);
+
+    removeFromBucket(yNodeItr);
+    removeFromBucket(zNodeItr);
+
+    removeLink(yxEdgeItr, yNodeItr);
+    removeLink(zxEdgeItr, zNodeItr);
+
+    // Graph some of the costs:
+    bool flipEdge1 = (g.getEdgeNode1Itr(yxEdgeItr) == xNodeItr),
+         flipEdge2 = (g.getEdgeNode1Itr(zxEdgeItr) == xNodeItr);
+
+    const Matrix *yxCosts = flipEdge1 ?
+      new Matrix(g.getEdgeCosts(yxEdgeItr).transpose()) :
+      &g.getEdgeCosts(yxEdgeItr),
+                     *zxCosts = flipEdge2 ?
+      new Matrix(g.getEdgeCosts(zxEdgeItr).transpose()) :
+        &g.getEdgeCosts(zxEdgeItr);
+
+    unsigned xLen = xCosts.getLength(),
+             yLen = yxCosts->getRows(),
+             zLen = zxCosts->getRows();
+
+    // Compute delta:
+    Matrix delta(yLen, zLen);
+
+    for (unsigned i = 0; i < yLen; ++i) {
+      for (unsigned j = 0; j < zLen; ++j) {
+        PBQPNum min = (*yxCosts)[i][0] + (*zxCosts)[j][0] + xCosts[0];
+        for (unsigned k = 1; k < xLen; ++k) {
+          PBQPNum c = (*yxCosts)[i][k] + (*zxCosts)[j][k] + xCosts[k];
+          if (c < min) {
+            min = c;
+          }
+        }
+        delta[i][j] = min;
+      }
+    }
+
+    if (flipEdge1)
+      delete yxCosts;
+
+    if (flipEdge2)
+      delete zxCosts;
+
+    // Deal with the potentially induced yz edge.
+    GraphEdgeIterator yzEdgeItr = g.findEdge(yNodeItr, zNodeItr);
+    if (yzEdgeItr == g.edgesEnd()) {
+      yzEdgeItr = g.addEdge(yNodeItr, zNodeItr, delta, EdgeData(g));
+    }
+    else {
+      // There was an edge, but we're going to screw with it. Delete the old
+      // link, update the costs. We'll re-link it later.
+      removeLinkR2(yzEdgeItr);
+      g.getEdgeCosts(yzEdgeItr) +=
+        (yNodeItr == g.getEdgeNode1Itr(yzEdgeItr)) ?
+        delta : delta.transpose();
+    }
+
+    bool nullCostEdge = tryNormaliseEdgeMatrix(yzEdgeItr);
+
+    // Nulled the edge, remove it entirely.
+    if (nullCostEdge) {
+      g.removeEdge(yzEdgeItr);
+    }
+    else {
+      // Edge remains - re-link it.
+      addLink(yzEdgeItr);
+    }
+
+    addToBucket(yNodeItr);
+    addToBucket(zNodeItr);
+    }
+
+  void computeTrivialSolutions() {
+
+    for (NodeListIterator r0Itr = r0Bucket.begin(), r0End = r0Bucket.end();
+         r0Itr != r0End; ++r0Itr) {
+      GraphNodeIterator nodeItr = *r0Itr;
+
+      solution.incR0Reductions();
+      setSolution(nodeItr, g.getNodeCosts(nodeItr).minIndex());
+    }
+
+  }
+
+  void backpropagate() {
+    while (!stack.empty()) {
+      computeSolution(stack.back());
+      stack.pop_back();
+    }
+  }
+
+  void computeSolution(const GraphNodeIterator &nodeItr) {
+
+    NodeData &nodeData = g.getNodeData(nodeItr);
+
+    Vector v(g.getNodeCosts(nodeItr));
+
+    // Solve based on existing links.
+    for (typename NodeData::AdjLinkIterator
+         solvedLinkItr = nodeData.solvedLinksBegin(),
+         solvedLinkEnd = nodeData.solvedLinksEnd();
+         solvedLinkItr != solvedLinkEnd; ++solvedLinkItr) {
+
+      GraphEdgeIterator solvedEdgeItr(*solvedLinkItr);
+      Matrix &edgeCosts = g.getEdgeCosts(solvedEdgeItr);
+
+      if (nodeItr == g.getEdgeNode1Itr(solvedEdgeItr)) {
+        GraphNodeIterator adjNode(g.getEdgeNode2Itr(solvedEdgeItr));
+        unsigned adjSolution =
+          solution.getSelection(g.getNodeID(adjNode));
+        v += edgeCosts.getColAsVector(adjSolution);
+      }
+      else {
+        GraphNodeIterator adjNode(g.getEdgeNode1Itr(solvedEdgeItr));
+        unsigned adjSolution =
+          solution.getSelection(g.getNodeID(adjNode));
+        v += edgeCosts.getRowAsVector(adjSolution);
+      }
+
+    }
+
+    setSolution(nodeItr, v.minIndex());
+  }
+
+  void computeSolutionCost(const SimpleGraph &orig) {
+    PBQPNum cost = 0.0;
+
+    for (SimpleGraph::ConstNodeIterator
+         nodeItr = orig.nodesBegin(), nodeEnd = orig.nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      unsigned nodeId = orig.getNodeID(nodeItr);
+
+      cost += orig.getNodeCosts(nodeItr)[solution.getSelection(nodeId)];
+    }
+
+    for (SimpleGraph::ConstEdgeIterator
+         edgeItr = orig.edgesBegin(), edgeEnd = orig.edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      SimpleGraph::ConstNodeIterator n1 = orig.getEdgeNode1Itr(edgeItr),
+                                     n2 = orig.getEdgeNode2Itr(edgeItr);
+      unsigned sol1 = solution.getSelection(orig.getNodeID(n1)),
+               sol2 = solution.getSelection(orig.getNodeID(n2));
+
+      cost += orig.getEdgeCosts(edgeItr)[sol1][sol2];
+    }
+
+    solution.setSolutionCost(cost);
+  }
+
+};
+
+template <typename Heuristic>
+class HeuristicSolver : public Solver {
+public:
+  Solution solve(const SimpleGraph &g) const {
+    HeuristicSolverImpl<Heuristic> solverImpl(g);
+    return solverImpl.getSolution();
+  }
+};
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
new file mode 100644
index 000000000000..3ac9e707bab4
--- /dev/null
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -0,0 +1,383 @@
+//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the Briggs test for "allocability" of nodes in a
+// PBQP graph representing a register allocation problem. Nodes which can be
+// proven allocable (by a safe and relatively accurate test) are removed from
+// the PBQP graph first. If no provably allocable node is present in the graph
+// then the node with the minimal spill-cost to degree ratio is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+
+#include "../HeuristicSolver.h"
+
+#include <set>
+
+namespace PBQP {
+namespace Heuristics {
+
+class Briggs {
+  public:
+
+    class NodeData;
+    class EdgeData;
+
+  private:
+
+    typedef HeuristicSolverImpl<Briggs> Solver;
+    typedef HSITypes<NodeData, EdgeData> HSIT;
+    typedef HSIT::SolverGraph SolverGraph;
+    typedef HSIT::GraphNodeIterator GraphNodeIterator;
+    typedef HSIT::GraphEdgeIterator GraphEdgeIterator;
+
+    class LinkDegreeComparator {
+      public:
+        LinkDegreeComparator() : g(0) {}
+        LinkDegreeComparator(SolverGraph *g) : g(g) {}
+
+        bool operator()(const GraphNodeIterator &node1Itr,
+                        const GraphNodeIterator &node2Itr) const {
+          assert((g != 0) && "Graph object not set, cannot access node data.");
+          unsigned n1Degree = g->getNodeData(node1Itr).getLinkDegree(),
+                   n2Degree = g->getNodeData(node2Itr).getLinkDegree();
+          if (n1Degree > n2Degree) {
+            return true;
+          }
+          else if (n1Degree < n2Degree) {
+            return false;
+          }
+          // else they're "equal" by degree, differentiate based on ID.
+          return g->getNodeID(node1Itr) < g->getNodeID(node2Itr);
+        }
+
+      private:
+        SolverGraph *g;
+    };
+
+    class SpillPriorityComparator {
+      public:
+        SpillPriorityComparator() : g(0) {}
+        SpillPriorityComparator(SolverGraph *g) : g(g) {}
+
+        bool operator()(const GraphNodeIterator &node1Itr,
+                        const GraphNodeIterator &node2Itr) const {
+          assert((g != 0) && "Graph object not set, cannot access node data.");
+          PBQPNum cost1 =
+            g->getNodeCosts(node1Itr)[0] /
+            g->getNodeData(node1Itr).getLinkDegree(),
+            cost2 =
+              g->getNodeCosts(node2Itr)[0] /
+              g->getNodeData(node2Itr).getLinkDegree();
+
+          if (cost1 < cost2) {
+            return true;
+          }
+          else if (cost1 > cost2) {
+            return false;
+          }
+          // else they'er "equal" again, differentiate based on address again.
+          return g->getNodeID(node1Itr) < g->getNodeID(node2Itr);
+        }
+
+      private:
+        SolverGraph *g;
+    };
+
+    typedef std::set<GraphNodeIterator, LinkDegreeComparator>
+      RNAllocableNodeList;
+    typedef RNAllocableNodeList::iterator RNAllocableNodeListIterator;
+
+    typedef std::set<GraphNodeIterator, SpillPriorityComparator>
+      RNUnallocableNodeList;
+    typedef RNUnallocableNodeList::iterator RNUnallocableNodeListIterator;
+
+  public:
+
+    class NodeData {
+      private:
+        RNAllocableNodeListIterator rNAllocableNodeListItr;
+        RNUnallocableNodeListIterator rNUnallocableNodeListItr;
+        unsigned numRegOptions, numDenied, numSafe;
+        std::vector<unsigned> unsafeDegrees;
+        bool allocable;
+
+        void addRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
+            const GraphEdgeIterator &edgeItr, bool add) {
+
+          //assume we're adding...
+          unsigned udTarget = 0, dir = 1;
+
+          if (!add) {
+            udTarget = 1;
+            dir = ~0;
+          }
+
+          EdgeData &linkEdgeData = g.getEdgeData(edgeItr).getHeuristicData();
+
+          EdgeData::ConstUnsafeIterator edgeUnsafeBegin, edgeUnsafeEnd;
+
+          if (nodeItr == g.getEdgeNode1Itr(edgeItr)) {
+            numDenied += (dir * linkEdgeData.getWorstDegree());
+            edgeUnsafeBegin = linkEdgeData.unsafeBegin();
+            edgeUnsafeEnd = linkEdgeData.unsafeEnd();
+          }
+          else {
+            numDenied += (dir * linkEdgeData.getReverseWorstDegree());
+            edgeUnsafeBegin = linkEdgeData.reverseUnsafeBegin();
+            edgeUnsafeEnd = linkEdgeData.reverseUnsafeEnd();
+          }
+
+          assert((unsafeDegrees.size() ==
+                static_cast<unsigned>(
+                  std::distance(edgeUnsafeBegin, edgeUnsafeEnd)))
+              && "Unsafe array size mismatch.");
+
+          std::vector<unsigned>::iterator unsafeDegreesItr =
+            unsafeDegrees.begin();
+
+          for (EdgeData::ConstUnsafeIterator edgeUnsafeItr = edgeUnsafeBegin;
+              edgeUnsafeItr != edgeUnsafeEnd;
+              ++edgeUnsafeItr, ++unsafeDegreesItr) {
+
+            if ((*edgeUnsafeItr == 1) && (*unsafeDegreesItr == udTarget))  {
+              numSafe -= dir;
+            }
+            *unsafeDegreesItr += (dir * (*edgeUnsafeItr));
+          }
+
+          allocable = (numDenied < numRegOptions) || (numSafe > 0);
+        }
+
+      public:
+
+        void setup(SolverGraph &g, const GraphNodeIterator &nodeItr) {
+
+          numRegOptions = g.getNodeCosts(nodeItr).getLength() - 1;
+
+          numSafe = numRegOptions; // Optimistic, correct below.
+          numDenied = 0; // Also optimistic.
+          unsafeDegrees.resize(numRegOptions, 0);
+
+          HSIT::NodeData &nodeData = g.getNodeData(nodeItr);
+
+          for (HSIT::NodeData::AdjLinkIterator
+              adjLinkItr = nodeData.adjLinksBegin(),
+              adjLinkEnd = nodeData.adjLinksEnd();
+              adjLinkItr != adjLinkEnd; ++adjLinkItr) {
+
+            addRemoveLink(g, nodeItr, *adjLinkItr, true);
+          }
+        }
+
+        bool isAllocable() const { return allocable; }
+
+        void handleAddLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
+            const GraphEdgeIterator &adjEdge) {
+          addRemoveLink(g, nodeItr, adjEdge, true);
+        }
+
+        void handleRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
+            const GraphEdgeIterator &adjEdge) {
+          addRemoveLink(g, nodeItr, adjEdge, false);
+        }
+
+        void setRNAllocableNodeListItr(
+            const RNAllocableNodeListIterator &rNAllocableNodeListItr) {
+
+          this->rNAllocableNodeListItr = rNAllocableNodeListItr;
+        }
+
+        RNAllocableNodeListIterator getRNAllocableNodeListItr() const {
+          return rNAllocableNodeListItr;
+        }
+
+        void setRNUnallocableNodeListItr(
+            const RNUnallocableNodeListIterator &rNUnallocableNodeListItr) {
+
+          this->rNUnallocableNodeListItr = rNUnallocableNodeListItr;
+        }
+
+        RNUnallocableNodeListIterator getRNUnallocableNodeListItr() const {
+          return rNUnallocableNodeListItr;
+        }
+
+
+    };
+
+    class EdgeData {
+      private:
+
+        typedef std::vector<unsigned> UnsafeArray;
+
+        unsigned worstDegree,
+                 reverseWorstDegree;
+        UnsafeArray unsafe, reverseUnsafe;
+
+      public:
+
+        EdgeData() : worstDegree(0), reverseWorstDegree(0) {}
+
+        typedef UnsafeArray::const_iterator ConstUnsafeIterator;
+
+        void setup(SolverGraph &g, const GraphEdgeIterator &edgeItr) {
+          const Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+          unsigned numRegs = edgeCosts.getRows() - 1,
+                   numReverseRegs = edgeCosts.getCols() - 1;
+
+          unsafe.resize(numRegs, 0);
+          reverseUnsafe.resize(numReverseRegs, 0);
+
+          std::vector<unsigned> rowInfCounts(numRegs, 0),
+                                colInfCounts(numReverseRegs, 0);
+
+          for (unsigned i = 0; i < numRegs; ++i) {
+            for (unsigned j = 0; j < numReverseRegs; ++j) {
+              if (edgeCosts[i + 1][j + 1] ==
+                  std::numeric_limits<PBQPNum>::infinity()) {
+                unsafe[i] = 1;
+                reverseUnsafe[j] = 1;
+                ++rowInfCounts[i];
+                ++colInfCounts[j];
+
+                if (colInfCounts[j] > worstDegree) {
+                  worstDegree = colInfCounts[j];
+                }
+
+                if (rowInfCounts[i] > reverseWorstDegree) {
+                  reverseWorstDegree = rowInfCounts[i];
+                }
+              }
+            }
+          }
+        }
+
+        unsigned getWorstDegree() const { return worstDegree; }
+        unsigned getReverseWorstDegree() const { return reverseWorstDegree; }
+        ConstUnsafeIterator unsafeBegin() const { return unsafe.begin(); }
+        ConstUnsafeIterator unsafeEnd() const { return unsafe.end(); }
+        ConstUnsafeIterator reverseUnsafeBegin() const {
+          return reverseUnsafe.begin();
+        }
+        ConstUnsafeIterator reverseUnsafeEnd() const {
+          return reverseUnsafe.end();
+        }
+    };
+
+  void initialise(Solver &solver) {
+    this->s = &solver;
+    g = &s->getGraph();
+    rNAllocableBucket = RNAllocableNodeList(LinkDegreeComparator(g));
+    rNUnallocableBucket =
+      RNUnallocableNodeList(SpillPriorityComparator(g));
+    
+    for (GraphEdgeIterator
+         edgeItr = g->edgesBegin(), edgeEnd = g->edgesEnd();
+         edgeItr != edgeEnd; ++edgeItr) {
+
+      g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr);
+    }
+
+    for (GraphNodeIterator
+         nodeItr = g->nodesBegin(), nodeEnd = g->nodesEnd();
+         nodeItr != nodeEnd; ++nodeItr) {
+
+      g->getNodeData(nodeItr).getHeuristicData().setup(*g, nodeItr);
+    }
+  }
+
+  void addToRNBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
+
+    if (nodeData.isAllocable()) {
+      nodeData.setRNAllocableNodeListItr(
+        rNAllocableBucket.insert(rNAllocableBucket.begin(), nodeItr));
+    }
+    else {
+      nodeData.setRNUnallocableNodeListItr(
+        rNUnallocableBucket.insert(rNUnallocableBucket.begin(), nodeItr));
+    }
+  }
+
+  void removeFromRNBucket(const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
+
+    if (nodeData.isAllocable()) {
+      rNAllocableBucket.erase(nodeData.getRNAllocableNodeListItr());
+    }
+    else {
+      rNUnallocableBucket.erase(nodeData.getRNUnallocableNodeListItr());
+    }
+  }
+
+  void handleAddLink(const GraphEdgeIterator &edgeItr) {
+    // We assume that if we got here this edge is attached to at least
+    // one high degree node.
+    g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr);
+
+    GraphNodeIterator n1Itr = g->getEdgeNode1Itr(edgeItr),
+                      n2Itr = g->getEdgeNode2Itr(edgeItr);
+   
+    HSIT::NodeData &n1Data = g->getNodeData(n1Itr),
+                   &n2Data = g->getNodeData(n2Itr);
+
+    if (n1Data.getLinkDegree() > 2) {
+      n1Data.getHeuristicData().handleAddLink(*g, n1Itr, edgeItr);
+    }
+    if (n2Data.getLinkDegree() > 2) {
+      n2Data.getHeuristicData().handleAddLink(*g, n2Itr, edgeItr);
+    }
+  }
+
+  void handleRemoveLink(const GraphEdgeIterator &edgeItr,
+                        const GraphNodeIterator &nodeItr) {
+    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
+    nodeData.handleRemoveLink(*g, nodeItr, edgeItr);
+  }
+
+  void processRN() {
+    
+    if (!rNAllocableBucket.empty()) {
+      GraphNodeIterator selectedNodeItr = *rNAllocableBucket.begin();
+      //std::cerr << "RN safely pushing " << g->getNodeID(selectedNodeItr) << "\n";
+      rNAllocableBucket.erase(rNAllocableBucket.begin());
+      s->pushStack(selectedNodeItr);
+      s->unlinkNode(selectedNodeItr);
+    }
+    else {
+      GraphNodeIterator selectedNodeItr = *rNUnallocableBucket.begin();
+      //std::cerr << "RN optimistically pushing " << g->getNodeID(selectedNodeItr) << "\n";
+      rNUnallocableBucket.erase(rNUnallocableBucket.begin());
+      s->pushStack(selectedNodeItr);
+      s->unlinkNode(selectedNodeItr);
+    }
+ 
+  }
+
+  bool rNBucketEmpty() const {
+    return (rNAllocableBucket.empty() && rNUnallocableBucket.empty());
+  }
+
+private:
+
+  Solver *s;
+  SolverGraph *g;
+  RNAllocableNodeList rNAllocableBucket;
+  RNUnallocableNodeList rNUnallocableBucket;
+};
+
+
+
+}
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h
new file mode 100644
index 000000000000..11f4b4b4e34c
--- /dev/null
+++ b/lib/CodeGen/PBQP/PBQPMath.h
@@ -0,0 +1,288 @@
+//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_PBQPMATH_H 
+#define LLVM_CODEGEN_PBQP_PBQPMATH_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace PBQP {
+
+typedef double PBQPNum;
+
+/// \brief PBQP Vector class.
+class Vector {
+  public:
+
+    /// \brief Construct a PBQP vector of the given size.
+    explicit Vector(unsigned length) :
+      length(length), data(new PBQPNum[length]) {
+      }
+
+    /// \brief Construct a PBQP vector with initializer.
+    Vector(unsigned length, PBQPNum initVal) :
+      length(length), data(new PBQPNum[length]) {
+        std::fill(data, data + length, initVal);
+      }
+
+    /// \brief Copy construct a PBQP vector.
+    Vector(const Vector &v) :
+      length(v.length), data(new PBQPNum[length]) {
+        std::copy(v.data, v.data + length, data);
+      }
+
+    /// \brief Destroy this vector, return its memory.
+    ~Vector() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Vector& operator=(const Vector &v) {
+      delete[] data;
+      length = v.length;
+      data = new PBQPNum[length];
+      std::copy(v.data, v.data + length, data);
+      return *this;
+    }
+
+    /// \brief Return the length of the vector
+    unsigned getLength() const {
+      return length;
+    }
+
+    /// \brief Element access.
+    PBQPNum& operator[](unsigned index) {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Const element access.
+    const PBQPNum& operator[](unsigned index) const {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Add another vector to this one.
+    Vector& operator+=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Subtract another vector from this one.
+    Vector& operator-=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Returns the index of the minimum value in this vector
+    unsigned minIndex() const {
+      return std::min_element(data, data + length) - data;
+    }
+
+  private:
+    unsigned length;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given vector on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Vector &v) {
+  assert((v.getLength() != 0) && "Zero-length vector badness.");
+
+  os << "[ " << v[0];
+  for (unsigned i = 1; i < v.getLength(); ++i) {
+    os << ", " << v[i];
+  }
+  os << " ]";
+
+  return os;
+} 
+
+
+/// \brief PBQP Matrix class
+class Matrix {
+  public:
+
+    /// \brief Construct a PBQP Matrix with the given dimensions.
+    Matrix(unsigned rows, unsigned cols) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+    }
+
+    /// \brief Construct a PBQP Matrix with the given dimensions and initial
+    /// value.
+    Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+        std::fill(data, data + (rows * cols), initVal);
+    }
+
+    /// \brief Copy construct a PBQP matrix.
+    Matrix(const Matrix &m) :
+      rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+        std::copy(m.data, m.data + (rows * cols), data);  
+    }
+
+    /// \brief Destroy this matrix, return its memory.
+    ~Matrix() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Matrix& operator=(const Matrix &m) {
+      delete[] data;
+      rows = m.rows; cols = m.cols;
+      data = new PBQPNum[rows * cols];
+      std::copy(m.data, m.data + (rows * cols), data);
+      return *this;
+    }
+
+    /// \brief Return the number of rows in this matrix.
+    unsigned getRows() const { return rows; }
+
+    /// \brief Return the number of cols in this matrix.
+    unsigned getCols() const { return cols; }
+
+    /// \brief Matrix element access.
+    PBQPNum* operator[](unsigned r) {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Matrix element access.
+    const PBQPNum* operator[](unsigned r) const {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Returns the given row as a vector.
+    Vector getRowAsVector(unsigned r) const {
+      Vector v(cols);
+      for (unsigned c = 0; c < cols; ++c)
+        v[c] = (*this)[r][c];
+      return v; 
+    }
+
+    /// \brief Returns the given column as a vector.
+    Vector getColAsVector(unsigned c) const {
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][c];
+      return v;
+    }
+
+    /// \brief Reset the matrix to the given value.
+    Matrix& reset(PBQPNum val = 0) {
+      std::fill(data, data + (rows * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single row of this matrix to the given value.
+    Matrix& setRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds.");
+      std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single column of this matrix to the given value.
+    Matrix& setCol(unsigned c, PBQPNum val) {
+      assert(c < cols && "Column out of bounds.");
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] = val;
+      return *this;
+    }
+
+    /// \brief Matrix transpose.
+    Matrix transpose() const {
+      Matrix m(cols, rows);
+      for (unsigned r = 0; r < rows; ++r)
+        for (unsigned c = 0; c < cols; ++c)
+          m[c][r] = (*this)[r][c];
+      return m;
+    }
+
+    /// \brief Returns the diagonal of the matrix as a vector.
+    ///
+    /// Matrix must be square.
+    Vector diagonalize() const {
+      assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][r];
+      return v;
+    } 
+
+    /// \brief Add the given matrix to this one.
+    Matrix& operator+=(const Matrix &m) {
+      assert(rows == m.rows && cols == m.cols &&
+          "Matrix dimensions mismatch.");
+      std::transform(data, data + (rows * cols), m.data, data,
+          std::plus<PBQPNum>());
+      return *this;
+    }
+
+    /// \brief Returns the minimum of the given row
+    PBQPNum getRowMin(unsigned r) const {
+      assert(r < rows && "Row out of bounds");
+      return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+    }
+
+    /// \brief Returns the minimum of the given column
+    PBQPNum getColMin(unsigned c) const {
+      PBQPNum minElem = (*this)[0][c];
+      for (unsigned r = 1; r < rows; ++r)
+        if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+      return minElem;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given row.
+    Matrix& subFromRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds");
+      std::transform(data + (r * cols), data + ((r + 1) * cols),
+          data + (r * cols),
+          std::bind2nd(std::minus<PBQPNum>(), val));
+      return *this;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given column.
+    Matrix& subFromCol(unsigned c, PBQPNum val) {
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] -= val;
+      return *this;
+    }
+
+    /// \brief Returns true if this is a zero matrix.
+    bool isZero() const {
+      return find_if(data, data + (rows * cols),
+          std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+        data + (rows * cols);
+    }
+
+  private:
+    unsigned rows, cols;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given matrix on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Matrix &m) {
+
+  assert((m.getRows() != 0) && "Zero-row matrix badness.");
+
+  for (unsigned i = 0; i < m.getRows(); ++i) {
+    os << m.getRowAsVector(i);
+  }
+
+  return os;
+}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_PBQPMATH_HPP
diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h
new file mode 100644
index 000000000000..1ca9caee3467
--- /dev/null
+++ b/lib/CodeGen/PBQP/SimpleGraph.h
@@ -0,0 +1,100 @@
+//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple PBQP graph class representing a PBQP problem. Graphs of this type
+// can be passed to a PBQPSolver instance to solve the PBQP problem.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
+#define LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
+
+#include "GraphBase.h"
+
+namespace PBQP {
+
+class SimpleEdge;
+
+class SimpleNode : public NodeBase<SimpleNode, SimpleEdge> {
+public:
+  SimpleNode(const Vector &costs) :
+    NodeBase<SimpleNode, SimpleEdge>(costs) {}
+};
+
+class SimpleEdge : public EdgeBase<SimpleNode, SimpleEdge> {
+public:
+  SimpleEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
+             const Matrix &costs) :
+    EdgeBase<SimpleNode, SimpleEdge>(node1Itr, node2Itr, costs) {}
+};
+
+class SimpleGraph : public GraphBase<SimpleNode, SimpleEdge> {
+private:
+
+  typedef GraphBase<SimpleNode, SimpleEdge> PGraph;
+
+  void copyFrom(const SimpleGraph &other) {
+    assert(other.areNodeIDsValid() &&
+           "Cannot copy from another graph unless IDs have been assigned.");
+   
+    std::vector<NodeIterator> newNodeItrs(other.getNumNodes());
+
+    for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr));
+    }
+
+    for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd();
+         eItr != eEnd; ++eItr) {
+
+      unsigned node1ID = other.getNodeID(other.getEdgeNode1Itr(eItr)),
+               node2ID = other.getNodeID(other.getEdgeNode2Itr(eItr));
+
+      addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
+              other.getEdgeCosts(eItr));
+    }
+  }
+
+  void copyFrom(SimpleGraph &other) {
+    if (!other.areNodeIDsValid()) {
+      other.assignNodeIDs();
+    }
+    copyFrom(const_cast<const SimpleGraph&>(other));
+  }
+
+public:
+
+  SimpleGraph() {}
+
+
+  SimpleGraph(const SimpleGraph &other) : PGraph() {
+    copyFrom(other);
+  }
+
+  SimpleGraph& operator=(const SimpleGraph &other) {
+    clear();
+    copyFrom(other);
+    return *this;
+  }
+
+  NodeIterator addNode(const Vector &costs) {
+    return PGraph::addConstructedNode(SimpleNode(costs));
+  }
+
+  EdgeIterator addEdge(const NodeIterator &node1Itr,
+                       const NodeIterator &node2Itr,
+                       const Matrix &costs) {
+    return PGraph::addConstructedEdge(SimpleEdge(node1Itr, node2Itr, costs));
+  }
+
+};
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
new file mode 100644
index 000000000000..c91e2fa560a0
--- /dev/null
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -0,0 +1,88 @@
+//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Annotated PBQP Graph class. This class is used internally by the PBQP solver
+// to cache information to speed up reduction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
+#define LLVM_CODEGEN_PBQP_SOLUTION_H
+
+#include "PBQPMath.h"
+
+namespace PBQP {
+
+class Solution {
+
+  friend class SolverImplementation;
+
+private:
+
+  std::vector<unsigned> selections;
+  PBQPNum solutionCost;
+  bool provedOptimal;
+  unsigned r0Reductions, r1Reductions,
+           r2Reductions, rNReductions;
+
+public:
+
+  Solution() :
+    solutionCost(0.0), provedOptimal(false),
+    r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
+
+  Solution(unsigned length, bool assumeOptimal) :
+    selections(length), solutionCost(0.0), provedOptimal(assumeOptimal),
+    r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
+
+  void setProvedOptimal(bool provedOptimal) {
+    this->provedOptimal = provedOptimal;
+  }
+
+  void setSelection(unsigned nodeID, unsigned selection) {
+    selections[nodeID] = selection;
+  }
+
+  void setSolutionCost(PBQPNum solutionCost) {
+    this->solutionCost = solutionCost;
+  }
+
+  void incR0Reductions() { ++r0Reductions; }
+  void incR1Reductions() { ++r1Reductions; }
+  void incR2Reductions() { ++r2Reductions; }
+  void incRNReductions() { ++rNReductions; }
+
+  unsigned numNodes() const { return selections.size(); }
+
+  unsigned getSelection(unsigned nodeID) const {
+    return selections[nodeID];
+  }
+
+  PBQPNum getCost() const { return solutionCost; }
+
+  bool isProvedOptimal() const { return provedOptimal; }
+
+  unsigned getR0Reductions() const { return r0Reductions; }
+  unsigned getR1Reductions() const { return r1Reductions; }
+  unsigned getR2Reductions() const { return r2Reductions; }
+  unsigned getRNReductions() const { return rNReductions; }
+
+  bool operator==(const Solution &other) const {
+    return (selections == other.selections);
+  }
+
+  bool operator!=(const Solution &other) const {
+    return !(*this == other);
+  }
+
+};
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h
new file mode 100644
index 000000000000..a9c5f837c453
--- /dev/null
+++ b/lib/CodeGen/PBQP/Solver.h
@@ -0,0 +1,31 @@
+//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_SOLVER_H
+#define LLVM_CODEGEN_PBQP_SOLVER_H
+
+#include "SimpleGraph.h"
+#include "Solution.h"
+
+namespace PBQP {
+
+/// \brief Interface for solver classes.
+class Solver {
+public:
+
+  virtual ~Solver() = 0;
+  virtual Solution solve(const SimpleGraph &orig) const = 0;
+};
+
+Solver::~Solver() {}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLVER_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index c5c76fc79467..8071b0a81a89 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
+#include "PHIElimination.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/CodeGen/LiveVariables.h"
@@ -22,7 +23,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -34,78 +34,25 @@ using namespace llvm;
 
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
 
-namespace {
-  class VISIBILITY_HIDDEN PNE : public MachineFunctionPass {
-    MachineRegisterInfo  *MRI; // Machine register information
-
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    PNE() : MachineFunctionPass(&ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<LiveVariables>();
-      AU.addPreservedID(MachineLoopInfoID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-  private:
-    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
-    /// in predecessor basic blocks.
-    ///
-    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
-    void LowerAtomicPHINode(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator AfterPHIsIt);
-
-    /// analyzePHINodes - Gather information about the PHI nodes in
-    /// here. In particular, we want to map the number of uses of a virtual
-    /// register which is used in a PHI node. We map that to the BB the
-    /// vreg is coming from. This is used later to determine when the vreg
-    /// is killed in the BB.
-    ///
-    void analyzePHINodes(const MachineFunction& Fn);
-
-    // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
-    // SrcReg.  This needs to be after any def or uses of SrcReg, but before
-    // any subsequent point where control flow might jump out of the basic
-    // block.
-    MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                                    unsigned SrcReg);
-
-    // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
-    // also after any exception handling labels: in landing pads execution
-    // starts at the label, so any copies placed before it won't be executed!
-    MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
-                                                MachineBasicBlock::iterator I) {
-      // Rather than assuming that EH labels come before other kinds of labels,
-      // just skip all labels.
-      while (I != MBB.end() &&
-             (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
-        ++I;
-      return I;
-    }
-
-    typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
-    typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
-
-    VRegPHIUse VRegPHIUseCount;
-
-    // Defs of PHI sources which are implicit_def.
-    SmallPtrSet<MachineInstr*, 4> ImpDefs;
-  };
-}
-
-char PNE::ID = 0;
-static RegisterPass<PNE>
+char PHIElimination::ID = 0;
+static RegisterPass<PHIElimination>
 X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
 
 const PassInfo *const llvm::PHIEliminationID = &X;
 
-bool PNE::runOnMachineFunction(MachineFunction &Fn) {
+void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addPreserved<LiveVariables>();
+  AU.addPreservedID(MachineLoopInfoID);
+  AU.addPreservedID(MachineDominatorsID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   MRI = &Fn.getRegInfo();
 
+  PHIDefs.clear();
+  PHIKills.clear();
   analyzePHINodes(Fn);
 
   bool Changed = false;
@@ -132,7 +79,8 @@ bool PNE::runOnMachineFunction(MachineFunction &Fn) {
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
 ///
-bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) {
+bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+                                             MachineBasicBlock &MBB) {
   if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
     return false;   // Quick exit for basic blocks without PHIs.
 
@@ -162,8 +110,9 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
 // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
 // This needs to be after any def or uses of SrcReg, but before any subsequent
 // point where control flow might jump out of the basic block.
-MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                                     unsigned SrcReg) {
+MachineBasicBlock::iterator
+llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
+                                          unsigned SrcReg) {
   // Handle the trivial case trivially.
   if (MBB.empty())
     return MBB.begin();
@@ -206,9 +155,10 @@ MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB,
 /// under the assuption that it needs to be lowered in a way that supports
 /// atomic execution of PHIs.  This lowering method is always correct all of the
 /// time.
-/// 
-void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator AfterPHIsIt) {
+///  
+void llvm::PHIElimination::LowerAtomicPHINode(
+                                      MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator AfterPHIsIt) {
   // Unlink the PHI node from the basic block, but don't delete the PHI yet.
   MachineInstr *MPhi = MBB.remove(MBB.begin());
 
@@ -235,6 +185,10 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
     TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
   }
 
+  // Record PHI def.
+  assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); 
+  PHIDefs[DestReg] = &MBB;
+
   // Update live variable information if there is any.
   LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
   if (LV) {
@@ -276,6 +230,13 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
     assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
            "Machine PHI Operands must all be virtual registers!");
 
+    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+    // path the PHI.
+    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+    // Record the kill.
+    PHIKills[SrcReg].insert(&opBlock);
+
     // If source is defined by an implicit def, there is no need to insert a
     // copy.
     MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
@@ -284,10 +245,6 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
       continue;
     }
 
-    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
-    // path the PHI.
-    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
-
     // Check to make sure we haven't already emitted the copy for this block.
     // This can happen because PHI nodes may have multiple entries for the same
     // basic block.
@@ -420,7 +377,7 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
 /// used in a PHI node. We map that to the BB the vreg is coming from. This is
 /// used later to determine when the vreg is killed in the BB.
 ///
-void PNE::analyzePHINodes(const MachineFunction& Fn) {
+void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
   for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
        I != E; ++I)
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
new file mode 100644
index 000000000000..3d02dfdcddba
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.h
@@ -0,0 +1,125 @@
+//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
+#define LLVM_CODEGEN_PHIELIMINATION_HPP
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+  /// Lower PHI instructions to copies.  
+  class PHIElimination : public MachineFunctionPass {
+    MachineRegisterInfo  *MRI; // Machine register information
+  private:
+
+    typedef SmallSet<MachineBasicBlock*, 4> PHIKillList;
+    typedef DenseMap<unsigned, PHIKillList> PHIKillMap;
+    typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap;
+
+  public:
+
+    typedef PHIKillList::iterator phi_kill_iterator;
+    typedef PHIKillList::const_iterator const_phi_kill_iterator;
+
+    static char ID; // Pass identification, replacement for typeid
+    PHIElimination() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    /// Return true if the given vreg was defined by a PHI intsr prior to
+    /// lowering.
+    bool hasPHIDef(unsigned vreg) const {
+      return PHIDefs.count(vreg);
+    }
+
+    /// Returns the block in which the PHI instruction which defined the
+    /// given vreg used to reside. 
+    MachineBasicBlock* getPHIDefBlock(unsigned vreg) {
+      PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg);
+      assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def.");
+      return phiDefItr->second;
+    }
+
+    /// Returns true if the given vreg was killed by a PHI instr.
+    bool hasPHIKills(unsigned vreg) const {
+      return PHIKills.count(vreg);
+    }
+
+    /// Returns an iterator over the BasicBlocks which contained PHI
+    /// kills of this register prior to lowering.
+    phi_kill_iterator phiKillsBegin(unsigned vreg) {
+      PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
+      assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
+      return phiKillItr->second.begin();
+    } 
+    phi_kill_iterator phiKillsEnd(unsigned vreg) {
+      PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
+      assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
+      return phiKillItr->second.end();
+    }
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+    // SrcReg.  This needs to be after any def or uses of SrcReg, but before
+    // any subsequent point where control flow might jump out of the basic
+    // block.
+    MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+                                                    unsigned SrcReg);
+
+    // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
+    // also after any exception handling labels: in landing pads execution
+    // starts at the label, so any copies placed before it won't be executed!
+    MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
+                                                MachineBasicBlock::iterator I) {
+      // Rather than assuming that EH labels come before other kinds of labels,
+      // just skip all labels.
+      while (I != MBB.end() &&
+             (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
+        ++I;
+      return I;
+    }
+
+    typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
+    typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+    PHIDefMap PHIDefs;
+    PHIKillMap PHIKills;
+
+    // Defs of PHI sources which are implicit_def.
+    SmallPtrSet<MachineInstr*, 4> ImpDefs;
+  };
+
+}
+
+#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index de7746855b3f..e52158cfeb4e 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -19,45 +19,73 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "post-RA-sched"
+#include "ExactHazardRecognizer.h"
+#include "SimpleHazardRecognizer.h"
 #include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include <map>
+#include <set>
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
 STATISTIC(NumStalls, "Number of pipeline stalls");
 
+// Post-RA scheduling is enabled with
+// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+                       cl::desc("Enable scheduling after register allocation"),
+                       cl::init(false), cl::Hidden);
 static cl::opt<bool>
 EnableAntiDepBreaking("break-anti-dependencies",
                       cl::desc("Break post-RA scheduling anti-dependencies"),
                       cl::init(true), cl::Hidden);
-
 static cl::opt<bool>
 EnablePostRAHazardAvoidance("avoid-hazards",
-                      cl::desc("Enable simple hazard-avoidance"),
+                      cl::desc("Enable exact hazard avoidance"),
                       cl::init(true), cl::Hidden);
 
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+
 namespace {
   class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
+    AliasAnalysis *AA;
+
   public:
     static char ID;
     PostRAScheduler() : MachineFunctionPass(&ID) {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
@@ -95,6 +123,9 @@ namespace {
     /// HazardRec - The hazard recognizer to use.
     ScheduleHazardRecognizer *HazardRec;
 
+    /// AA - AliasAnalysis for making memory reference queries.
+    AliasAnalysis *AA;
+
     /// Classes - For live regs that are only used in one register class in a
     /// live range, the register class. If the register is not live, the
     /// corresponding value is null. If the register is live but used in
@@ -106,22 +137,27 @@ namespace {
     /// RegRegs - Map registers to all their references within a live range.
     std::multimap<unsigned, MachineOperand *> RegRefs;
 
-    /// The index of the most recent kill (proceding bottom-up), or ~0u if
-    /// the register is not live.
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
     unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
 
-    /// The index of the most recent complete def (proceding bottom up), or ~0u
-    /// if the register is live.
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
     unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
 
+    /// KeepRegs - A set of registers which are live and cannot be changed to
+    /// break anti-dependencies.
+    SmallSet<unsigned, 4> KeepRegs;
+
   public:
     SchedulePostRATDList(MachineFunction &MF,
                          const MachineLoopInfo &MLI,
                          const MachineDominatorTree &MDT,
-                         ScheduleHazardRecognizer *HR)
+                         ScheduleHazardRecognizer *HR,
+                         AliasAnalysis *aa)
       : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
         AllocatableSet(TRI->getAllocatableSet(MF)),
-        HazardRec(HR) {}
+        HazardRec(HR), AA(aa) {}
 
     ~SchedulePostRATDList() {
       delete HazardRec;
@@ -135,6 +171,11 @@ namespace {
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
     void Schedule();
+    
+    /// FixupKills - Fix register kill flags that have been made
+    /// invalid due to scheduling
+    ///
+    void FixupKills(MachineBasicBlock *MBB);
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
@@ -153,62 +194,15 @@ namespace {
     void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
     void ListScheduleTopDown();
     bool BreakAntiDependencies();
-  };
-
-  /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
-  /// a coarse classification and attempts to avoid that instructions of
-  /// a given class aren't grouped too densely together.
-  class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
-    /// Class - A simple classification for SUnits.
-    enum Class {
-      Other, Load, Store
-    };
-
-    /// Window - The Class values of the most recently issued
-    /// instructions.
-    Class Window[8];
-
-    /// getClass - Classify the given SUnit.
-    Class getClass(const SUnit *SU) {
-      const MachineInstr *MI = SU->getInstr();
-      const TargetInstrDesc &TID = MI->getDesc();
-      if (TID.mayLoad())
-        return Load;
-      if (TID.mayStore())
-        return Store;
-      return Other;
-    }
-
-    /// Step - Rotate the existing entries in Window and insert the
-    /// given class value in position as the most recent.
-    void Step(Class C) {
-      std::copy(Window+1, array_endof(Window), Window);
-      Window[array_lengthof(Window)-1] = C;
-    }
-
-  public:
-    SimpleHazardRecognizer() : Window() {}
-
-    virtual HazardType getHazardType(SUnit *SU) {
-      Class C = getClass(SU);
-      if (C == Other)
-        return NoHazard;
-      unsigned Score = 0;
-      for (unsigned i = 0; i != array_lengthof(Window); ++i)
-        if (Window[i] == C)
-          Score += i + 1;
-      if (Score > array_lengthof(Window) * 2)
-        return Hazard;
-      return NoHazard;
-    }
-
-    virtual void EmitInstruction(SUnit *SU) {
-      Step(getClass(SU));
-    }
-
-    virtual void AdvanceCycle() {
-      Step(Other);
-    }
+    unsigned findSuitableFreeRegister(unsigned AntiDepReg,
+                                      unsigned LastNewReg,
+                                      const TargetRegisterClass *);
+    void StartBlockForKills(MachineBasicBlock *BB);
+    
+    // ToggleKillFlag - Toggle a register operand kill flag. Other
+    // adjustments may be made to the instruction if necessary. Return
+    // true if the operand has been deleted, false if not.
+    bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
   };
 }
 
@@ -235,19 +229,44 @@ static bool isSchedulingBoundary(const MachineInstr *MI,
 }
 
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
-  DOUT << "PostRAScheduler\n";
+  AA = &getAnalysis<AliasAnalysis>();
+
+  // Check for explicit enable/disable of post-ra scheduling.
+  if (EnablePostRAScheduler.getPosition() > 0) {
+    if (!EnablePostRAScheduler)
+      return true;
+  } else {
+    // Check that post-RA scheduling is enabled for this function
+    const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+    if (!ST.enablePostRAScheduler())
+      return true;
+  }
+
+  DEBUG(errs() << "PostRAScheduler\n");
 
   const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
   ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
-                                 new SimpleHazardRecognizer :
-                                 new ScheduleHazardRecognizer();
+    (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
+    (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
 
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR);
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA);
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
        MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+    // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+    if (DebugDiv > 0) {
+      static int bbcnt = 0;
+      if (bbcnt++ % DebugDiv != DebugMod)
+        continue;
+      errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+        ":MBB ID#" << MBB->getNumber() << " ***\n";
+    }
+#endif
+
     // Initialize register live-range state for scheduling in this block.
     Scheduler.StartBlock(MBB);
 
@@ -259,7 +278,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       MachineInstr *MI = prior(I);
       if (isSchedulingBoundary(MI, Fn)) {
         Scheduler.Run(MBB, I, Current, CurrentCount);
-        Scheduler.EmitSchedule();
+        Scheduler.EmitSchedule(0);
         Current = MI;
         CurrentCount = Count - 1;
         Scheduler.Observe(MI, CurrentCount);
@@ -271,10 +290,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
     Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
-    Scheduler.EmitSchedule();
+    Scheduler.EmitSchedule(0);
 
     // Clean up register live-range state.
     Scheduler.FinishBlock();
+
+    // Update register kills
+    Scheduler.FixupKills(MBB);
   }
 
   return true;
@@ -287,6 +309,9 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
   // Call the superclass.
   ScheduleDAGInstrs::StartBlock(BB);
 
+  // Reset the hazard recognizer.
+  HazardRec->Reset();
+
   // Clear out the register class data.
   std::fill(Classes, array_endof(Classes),
             static_cast<const TargetRegisterClass *>(0));
@@ -295,8 +320,13 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
   std::fill(KillIndices, array_endof(KillIndices), ~0u);
   std::fill(DefIndices, array_endof(DefIndices), BB->size());
 
+  // Clear "do not change" set.
+  KeepRegs.clear();
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
   // Determine the live-out physregs for this block.
-  if (!BB->empty() && BB->back().getDesc().isReturn())
+  if (IsReturnBlock) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
          E = MRI.liveout_end(); I != E; ++I) {
@@ -312,7 +342,7 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
         DefIndices[AliasReg] = ~0u;
       }
     }
-  else
+  } else {
     // In a non-return block, examine the live-in regs of all successors.
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
          SE = BB->succ_end(); SI != SE; ++SI)
@@ -330,18 +360,16 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
           DefIndices[AliasReg] = ~0u;
         }
       }
+  }
 
-  // Consider callee-saved registers as live-out, since we're running after
-  // prologue/epilogue insertion so there's no way to add additional
-  // saved registers.
-  //
-  // TODO: If the callee saves and restores these, then we can potentially
-  // use them between the save and the restore. To do that, we could scan
-  // the exit blocks to see which of these registers are defined.
-  // Alternatively, callee-saved registers that aren't saved and restored
-  // could be marked live-in in every block.
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
   for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
     unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
     KillIndices[Reg] = BB->size();
     DefIndices[Reg] = ~0u;
@@ -358,10 +386,10 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
 /// Schedule - Schedule the instruction range using list scheduling.
 ///
 void SchedulePostRATDList::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
   
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(AA);
 
   if (EnableAntiDepBreaking) {
     if (BreakAntiDependencies()) {
@@ -374,10 +402,13 @@ void SchedulePostRATDList::Schedule() {
       SUnits.clear();
       EntrySU = SUnit();
       ExitSU = SUnit();
-      BuildSchedGraph();
+      BuildSchedGraph(AA);
     }
   }
 
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
   AvailableQueue.initNodes(SUnits);
 
   ListScheduleTopDown();
@@ -448,8 +479,10 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    const TargetRegisterClass *NewRC =
-      getInstrOperandRegClass(TRI, MI->getDesc(), i);
+    const TargetRegisterClass *NewRC = 0;
+    
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -473,6 +506,16 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
     // If we're still willing to consider this register, note the reference.
     if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
       RegRefs.insert(std::make_pair(Reg, &MO));
+
+    // It's not safe to change register allocation for source operands of
+    // that have special allocation requirements.
+    if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
+      if (KeepRegs.insert(Reg)) {
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          KeepRegs.insert(*Subreg);
+      }
+    }
   }
 }
 
@@ -492,9 +535,10 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
 
     DefIndices[Reg] = Count;
     KillIndices[Reg] = ~0u;
-          assert(((KillIndices[Reg] == ~0u) !=
-                  (DefIndices[Reg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for Reg!");
+    assert(((KillIndices[Reg] == ~0u) !=
+            (DefIndices[Reg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for Reg!");
+    KeepRegs.erase(Reg);
     Classes[Reg] = 0;
     RegRefs.erase(Reg);
     // Repeat, for all subregs.
@@ -503,6 +547,7 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
       unsigned SubregReg = *Subreg;
       DefIndices[SubregReg] = Count;
       KillIndices[SubregReg] = ~0u;
+      KeepRegs.erase(SubregReg);
       Classes[SubregReg] = 0;
       RegRefs.erase(SubregReg);
     }
@@ -520,8 +565,9 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
     if (Reg == 0) continue;
     if (!MO.isUse()) continue;
 
-    const TargetRegisterClass *NewRC =
-      getInstrOperandRegClass(TRI, MI->getDesc(), i);
+    const TargetRegisterClass *NewRC = 0;
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -551,6 +597,36 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
   }
 }
 
+unsigned
+SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg,
+                                               unsigned LastNewReg,
+                                               const TargetRegisterClass *RC) {
+  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+       RE = RC->allocation_order_end(MF); R != RE; ++R) {
+    unsigned NewReg = *R;
+    // Don't replace a register with itself.
+    if (NewReg == AntiDepReg) continue;
+    // Don't replace a register with one that was recently used to repair
+    // an anti-dependence with this AntiDepReg, because that would
+    // re-introduce that anti-dependence.
+    if (NewReg == LastNewReg) continue;
+    // If NewReg is dead and NewReg's most recent def is not before
+    // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for NewReg!");
+    if (KillIndices[NewReg] != ~0u ||
+        Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+        KillIndices[AntiDepReg] > DefIndices[NewReg])
+      continue;
+    return NewReg;
+  }
+
+  // No registers are free and available!
+  return 0;
+}
+
 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
 /// of the ScheduleDAG and break them by renaming registers.
 ///
@@ -567,8 +643,18 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
       Max = SU;
   }
 
-  DOUT << "Critical path has total latency "
-       << (Max->getDepth() + Max->Latency) << "\n";
+#ifndef NDEBUG
+  {
+    DEBUG(errs() << "Critical path has total latency "
+          << (Max->getDepth() + Max->Latency) << "\n");
+    DEBUG(errs() << "Available regs:");
+    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+      if (KillIndices[Reg] == ~0u)
+        DEBUG(errs() << " " << TRI->getName(Reg));
+    }
+    DEBUG(errs() << '\n');
+  }
+#endif
 
   // Track progress along the critical path through the SUnit graph as we walk
   // the instructions.
@@ -598,7 +684,7 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   // isn't A which is free.  This re-introduces anti-dependencies
   // at all but one of the original anti-dependencies that we were
   // trying to break.  To avoid this, keep track of the most recent
-  // register that each register was replaced with, avoid avoid
+  // register that each register was replaced with, avoid
   // using it to repair an anti-dependence on the same register.
   // This lets us produce this:
   //   A = ...
@@ -627,13 +713,6 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
        I != E; --Count) {
     MachineInstr *MI = --I;
 
-    // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
-    // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
-    // is left behind appearing to clobber the super-register, while the
-    // subregister needs to remain live. So we just ignore them.
-    if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-      continue;
-
     // Check if this instruction has a dependence on the critical path that
     // is an anti-dependence that we may be able to break. If it is, set
     // AntiDepReg to the non-zero register associated with the anti-dependence.
@@ -656,8 +735,12 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          // Don't break anti-dependencies on non-allocatable registers.
           if (!AllocatableSet.test(AntiDepReg))
+            // Don't break anti-dependencies on non-allocatable registers.
+            AntiDepReg = 0;
+          else if (KeepRegs.count(AntiDepReg))
+            // Don't break anti-dependencies if an use down below requires
+            // this exact register.
             AntiDepReg = 0;
           else {
             // If the SUnit has other dependencies on the SUnit that it
@@ -689,16 +772,22 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
 
     PrescanInstruction(MI);
 
-    // If this instruction has a use of AntiDepReg, breaking it
-    // is invalid.
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg()) continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0) continue;
-      if (MO.isUse() && AntiDepReg == Reg) {
-        AntiDepReg = 0;
-        break;
+    if (MI->getDesc().hasExtraDefRegAllocReq())
+      // If this instruction's defs have special allocation requirement, don't
+      // break this anti-dependency.
+      AntiDepReg = 0;
+    else if (AntiDepReg) {
+      // If this instruction has a use of AntiDepReg, breaking it
+      // is invalid.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (Reg == 0) continue;
+        if (MO.isUse() && AntiDepReg == Reg) {
+          AntiDepReg = 0;
+          break;
+        }
       }
     }
 
@@ -715,60 +804,43 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
     if (AntiDepReg != 0) {
-      for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
-           RE = RC->allocation_order_end(MF); R != RE; ++R) {
-        unsigned NewReg = *R;
-        // Don't replace a register with itself.
-        if (NewReg == AntiDepReg) continue;
-        // Don't replace a register with one that was recently used to repair
-        // an anti-dependence with this AntiDepReg, because that would
-        // re-introduce that anti-dependence.
-        if (NewReg == LastNewReg[AntiDepReg]) continue;
-        // If NewReg is dead and NewReg's most recent def is not before
-        // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
-        assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for AntiDepReg!");
-        assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for NewReg!");
-        if (KillIndices[NewReg] == ~0u &&
-            Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) &&
-            KillIndices[AntiDepReg] <= DefIndices[NewReg]) {
-          DOUT << "Breaking anti-dependence edge on "
-               << TRI->getName(AntiDepReg)
-               << " with " << RegRefs.count(AntiDepReg) << " references"
-               << " using " << TRI->getName(NewReg) << "!\n";
-
-          // Update the references to the old register to refer to the new
-          // register.
-          std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
-                    std::multimap<unsigned, MachineOperand *>::iterator>
-             Range = RegRefs.equal_range(AntiDepReg);
-          for (std::multimap<unsigned, MachineOperand *>::iterator
-               Q = Range.first, QE = Range.second; Q != QE; ++Q)
-            Q->second->setReg(NewReg);
-
-          // We just went back in time and modified history; the
-          // liveness information for the anti-depenence reg is now
-          // inconsistent. Set the state as if it were dead.
-          Classes[NewReg] = Classes[AntiDepReg];
-          DefIndices[NewReg] = DefIndices[AntiDepReg];
-          KillIndices[NewReg] = KillIndices[AntiDepReg];
-          assert(((KillIndices[NewReg] == ~0u) !=
-                  (DefIndices[NewReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for NewReg!");
-
-          Classes[AntiDepReg] = 0;
-          DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
-          KillIndices[AntiDepReg] = ~0u;
-          assert(((KillIndices[AntiDepReg] == ~0u) !=
-                  (DefIndices[AntiDepReg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for AntiDepReg!");
-
-          RegRefs.erase(AntiDepReg);
-          Changed = true;
-          LastNewReg[AntiDepReg] = NewReg;
-          break;
-        }
+      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
+                                                     LastNewReg[AntiDepReg],
+                                                     RC)) {
+        DEBUG(errs() << "Breaking anti-dependence edge on "
+              << TRI->getName(AntiDepReg)
+              << " with " << RegRefs.count(AntiDepReg) << " references"
+              << " using " << TRI->getName(NewReg) << "!\n");
+
+        // Update the references to the old register to refer to the new
+        // register.
+        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                  std::multimap<unsigned, MachineOperand *>::iterator>
+           Range = RegRefs.equal_range(AntiDepReg);
+        for (std::multimap<unsigned, MachineOperand *>::iterator
+             Q = Range.first, QE = Range.second; Q != QE; ++Q)
+          Q->second->setReg(NewReg);
+
+        // We just went back in time and modified history; the
+        // liveness information for the anti-depenence reg is now
+        // inconsistent. Set the state as if it were dead.
+        Classes[NewReg] = Classes[AntiDepReg];
+        DefIndices[NewReg] = DefIndices[AntiDepReg];
+        KillIndices[NewReg] = KillIndices[AntiDepReg];
+        assert(((KillIndices[NewReg] == ~0u) !=
+                (DefIndices[NewReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for NewReg!");
+
+        Classes[AntiDepReg] = 0;
+        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+        KillIndices[AntiDepReg] = ~0u;
+        assert(((KillIndices[AntiDepReg] == ~0u) !=
+                (DefIndices[AntiDepReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for AntiDepReg!");
+
+        RegRefs.erase(AntiDepReg);
+        Changed = true;
+        LastNewReg[AntiDepReg] = NewReg;
       }
     }
 
@@ -778,6 +850,177 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   return Changed;
 }
 
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+  // Initialize the indices to indicate that no registers are live.
+  std::fill(KillIndices, array_endof(KillIndices), ~0u);
+
+  // Determine the live-out physregs for this block.
+  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+           E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      KillIndices[Reg] = BB->size();
+      // Repeat, for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = BB->size();
+      }
+    }
+  }
+  else {
+    // In a non-return block, examine the live-in regs of all successors.
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI) {
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        KillIndices[Reg] = BB->size();
+        // Repeat, for all subregs.
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          KillIndices[*Subreg] = BB->size();
+        }
+      }
+    }
+  }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+                                          MachineOperand &MO) {
+  // Setting kill flag...
+  if (!MO.isKill()) {
+    MO.setIsKill(true);
+    return false;
+  }
+  
+  // If MO itself is live, clear the kill flag...
+  if (KillIndices[MO.getReg()] != ~0u) {
+    MO.setIsKill(false);
+    return false;
+  }
+
+  // If any subreg of MO is live, then create an imp-def for that
+  // subreg and keep MO marked as killed.
+  MO.setIsKill(false);
+  bool AllDead = true;
+  const unsigned SuperReg = MO.getReg();
+  for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+       *Subreg; ++Subreg) {
+    if (KillIndices[*Subreg] != ~0u) {
+      MI->addOperand(MachineOperand::CreateReg(*Subreg,
+                                               true  /*IsDef*/,
+                                               true  /*IsImp*/,
+                                               false /*IsKill*/,
+                                               false /*IsDead*/));
+      AllDead = false;
+    }
+  }
+
+  if(AllDead)
+    MO.setIsKill(true);
+  return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+  DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
+
+  std::set<unsigned> killedRegs;
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+  StartBlockForKills(MBB);
+  
+  // Examine block from end to start...
+  unsigned Count = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+
+    // Update liveness.  Registers that are defed but not used in this
+    // instruction are now dead. Mark register and all subregs as they
+    // are completely defined.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+      
+      KillIndices[Reg] = ~0u;
+      
+      // Repeat for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = ~0u;
+      }
+    }
+
+    // Examine all used registers and set/clear kill flag. When a
+    // register is used multiple times we only set the kill flag on
+    // the first use.
+    killedRegs.clear();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      bool kill = false;
+      if (killedRegs.find(Reg) == killedRegs.end()) {
+        kill = true;
+        // A register is not killed if any subregs are live...
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          if (KillIndices[*Subreg] != ~0u) {
+            kill = false;
+            break;
+          }
+        }
+
+        // If subreg is not live, then register is killed if it became
+        // live in this instruction
+        if (kill)
+          kill = (KillIndices[Reg] == ~0u);
+      }
+      
+      if (MO.isKill() != kill) {
+        bool removed = ToggleKillFlag(MI, MO);
+        if (removed) {
+          DEBUG(errs() << "Fixed <removed> in ");
+        } else {
+          DEBUG(errs() << "Fixed " << MO << " in ");
+        }
+        DEBUG(MI->dump());
+      }
+      
+      killedRegs.insert(Reg);
+    }
+    
+    // Mark any used register (that is not using undef) and subregs as
+    // now live...
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      KillIndices[Reg] = Count;
+      
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = Count;
+      }
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Top-Down Scheduling
 //===----------------------------------------------------------------------===//
@@ -786,17 +1029,17 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
 void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
-  --SuccSU->NumPredsLeft;
-  
+
 #ifndef NDEBUG
-  if (SuccSU->NumPredsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (SuccSU->NumPredsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --SuccSU->NumPredsLeft;
+
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
@@ -819,7 +1062,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -848,6 +1091,10 @@ void SchedulePostRATDList::ListScheduleTopDown() {
     }
   }
 
+  // In any cycle where we can't schedule any instructions, we must
+  // stall or emit a noop, depending on the target.
+  bool CycleHasInsts = false;
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   std::vector<SUnit*> NotReady;
@@ -866,13 +1113,14 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       } else if (PendingQueue[i]->getDepth() < MinDepth)
         MinDepth = PendingQueue[i]->getDepth();
     }
-    
-    // If there are no instructions available, don't try to issue anything, and
-    // don't advance the hazard recognizer.
-    if (AvailableQueue.empty()) {
-      CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
-      continue;
-    }
+
+    DEBUG(errs() << "\n*** Examining Available\n";
+          LatencyPriorityQueue q = AvailableQueue;
+          while (!q.empty()) {
+            SUnit *su = q.pop();
+            errs() << "Height " << su->getHeight() << ": ";
+            su->dump(this);
+          });
 
     SUnit *FoundSUnit = 0;
 
@@ -903,27 +1151,38 @@ void SchedulePostRATDList::ListScheduleTopDown() {
     if (FoundSUnit) {
       ScheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
-
-      // If this is a pseudo-op node, we don't want to increment the current
-      // cycle.
-      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
-        ++CurCycle;
-    } else if (!HasNoopHazards) {
-      // Otherwise, we have a pipeline stall, but no other problem, just advance
-      // the current cycle and try again.
-      DOUT << "*** Advancing cycle, no work to do\n";
-      HazardRec->AdvanceCycle();
-      ++NumStalls;
-      ++CurCycle;
+      CycleHasInsts = true;
+
+      // If we are using the target-specific hazards, then don't
+      // advance the cycle time just because we schedule a node. If
+      // the target allows it we can schedule multiple nodes in the
+      // same cycle.
+      if (!EnablePostRAHazardAvoidance) {
+        if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+          ++CurCycle;
+      }
     } else {
-      // Otherwise, we have no instructions to issue and we have instructions
-      // that will fault if we don't do this right.  This is the case for
-      // processors without pipeline interlocks and other cases.
-      DOUT << "*** Emitting noop\n";
-      HazardRec->EmitNoop();
-      Sequence.push_back(0);   // NULL here means noop
-      ++NumNoops;
+      if (CycleHasInsts) {
+        DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+      } else if (!HasNoopHazards) {
+        // Otherwise, we have a pipeline stall, but no other problem,
+        // just advance the current cycle and try again.
+        DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+        ++NumStalls;
+      } else {
+        // Otherwise, we have no instructions to issue and we have instructions
+        // that will fault if we don't do this right.  This is the case for
+        // processors without pipeline interlocks and other cases.
+        DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+        HazardRec->EmitNoop();
+        Sequence.push_back(0);   // NULL here means noop
+        ++NumNoops;
+      }
+
       ++CurCycle;
+      CycleHasInsts = false;
     }
   }
 
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index ae60c86c3d7c..8fa07d4d9afc 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -67,7 +68,7 @@ namespace {
     MachineBasicBlock     *BarrierMBB;
 
     // Barrier - Current barrier index.
-    unsigned              BarrierIdx;
+    LiveIndex     BarrierIdx;
 
     // CurrLI - Current live interval being split.
     LiveInterval          *CurrLI;
@@ -82,7 +83,7 @@ namespace {
     DenseMap<unsigned, int> IntervalSSMap;
 
     // Def2SpillMap - A map from a def instruction index to spill index.
-    DenseMap<unsigned, unsigned> Def2SpillMap;
+    DenseMap<LiveIndex, LiveIndex> Def2SpillMap;
 
   public:
     static char ID;
@@ -91,6 +92,7 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<LiveIntervals>();
       AU.addPreserved<LiveIntervals>();
       AU.addRequired<LiveStacks>();
@@ -119,33 +121,31 @@ namespace {
     }
 
     /// print - Implement the dump method.
-    virtual void print(std::ostream &O, const Module* M = 0) const {
+    virtual void print(raw_ostream &O, const Module* M = 0) const {
       LIs->print(O, M);
     }
 
-    void print(std::ostream *O, const Module* M = 0) const {
-      if (O) print(*O, M);
-    }
 
   private:
     MachineBasicBlock::iterator
       findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
-                        unsigned&);
+                        LiveIndex&);
 
     MachineBasicBlock::iterator
       findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
-                     SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+                     SmallPtrSet<MachineInstr*, 4>&, LiveIndex&);
 
     MachineBasicBlock::iterator
-      findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned,
-                     SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+      findRestorePoint(MachineBasicBlock*, MachineInstr*, LiveIndex,
+                     SmallPtrSet<MachineInstr*, 4>&, LiveIndex&);
 
     int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
 
-    bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned,
-                            unsigned&, int&) const;
+    bool IsAvailableInStack(MachineBasicBlock*, unsigned,
+                            LiveIndex, LiveIndex,
+                            LiveIndex&, int&) const;
 
-    void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned);
+    void UpdateSpillSlotInterval(VNInfo*, LiveIndex, LiveIndex);
 
     bool SplitRegLiveInterval(LiveInterval*);
 
@@ -157,7 +157,7 @@ namespace {
     bool Rematerialize(unsigned vreg, VNInfo* ValNo,
                        MachineInstr* DefMI,
                        MachineBasicBlock::iterator RestorePt,
-                       unsigned RestoreIdx,
+                       LiveIndex RestoreIdx,
                        SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
     MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
                             MachineInstr* DefMI,
@@ -209,11 +209,12 @@ const PassInfo *const llvm::PreAllocSplittingID = &X;
 /// instruction index map. If there isn't one, return end().
 MachineBasicBlock::iterator
 PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
-                                     unsigned &SpotIndex) {
+                                     LiveIndex &SpotIndex) {
   MachineBasicBlock::iterator MII = MI;
   if (++MII != MBB->end()) {
-    unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
-    if (Index) {
+    LiveIndex Index =
+      LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
+    if (Index != LiveIndex()) {
       SpotIndex = Index;
       return MII;
     }
@@ -229,7 +230,7 @@ MachineBasicBlock::iterator
 PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
                                   MachineInstr *DefMI,
                                   SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                  unsigned &SpillIndex) {
+                                  LiveIndex &SpillIndex) {
   MachineBasicBlock::iterator Pt = MBB->begin();
 
   MachineBasicBlock::iterator MII = MI;
@@ -242,7 +243,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
   if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
     
   while (MII != EndPt && !RefsInMBB.count(MII)) {
-    unsigned Index = LIs->getInstructionIndex(MII);
+    LiveIndex Index = LIs->getInstructionIndex(MII);
     
     // We can't insert the spill between the barrier (a call), and its
     // corresponding call frame setup.
@@ -275,9 +276,9 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
 /// found.
 MachineBasicBlock::iterator
 PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                    unsigned LastIdx,
+                                    LiveIndex LastIdx,
                                     SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                    unsigned &RestoreIndex) {
+                                    LiveIndex &RestoreIndex) {
   // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
   // begin index accordingly.
   MachineBasicBlock::iterator Pt = MBB->end();
@@ -298,10 +299,10 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
   // FIXME: Limit the number of instructions to examine to reduce
   // compile time?
   while (MII != EndPt) {
-    unsigned Index = LIs->getInstructionIndex(MII);
+    LiveIndex Index = LIs->getInstructionIndex(MII);
     if (Index > LastIdx)
       break;
-    unsigned Gap = LIs->findGapBeforeInstr(Index);
+    LiveIndex Gap = LIs->findGapBeforeInstr(Index);
       
     // We can't insert a restore between the barrier (a call) and its 
     // corresponding call frame teardown.
@@ -310,7 +311,7 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
         if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
         ++MII;
       } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
-    } else if (Gap) {
+    } else if (Gap != LiveIndex()) {
       Pt = MII;
       RestoreIndex = Gap;
     }
@@ -343,7 +344,8 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
+    CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false,
+                                       LSs->getVNInfoAllocator());
   return SS;
 }
 
@@ -351,8 +353,9 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
 /// slot at the specified index.
 bool
 PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
-                                    unsigned Reg, unsigned DefIndex,
-                                    unsigned RestoreIndex, unsigned &SpillIndex,
+                                    unsigned Reg, LiveIndex DefIndex,
+                                    LiveIndex RestoreIndex,
+                                    LiveIndex &SpillIndex,
                                     int& SS) const {
   if (!DefMBB)
     return false;
@@ -360,7 +363,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
   DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
   if (I == IntervalSSMap.end())
     return false;
-  DenseMap<unsigned, unsigned>::iterator II = Def2SpillMap.find(DefIndex);
+  DenseMap<LiveIndex, LiveIndex>::iterator
+    II = Def2SpillMap.find(DefIndex);
   if (II == Def2SpillMap.end())
     return false;
 
@@ -380,8 +384,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
 /// interval being split, and the spill and restore indicies, update the live
 /// interval of the spill stack slot.
 void
-PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
-                                           unsigned RestoreIndex) {
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex,
+                                           LiveIndex RestoreIndex) {
   assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
          "Expect restore in the barrier mbb");
 
@@ -394,8 +398,8 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
   }
 
   SmallPtrSet<MachineBasicBlock*, 4> Processed;
-  unsigned EndIdx = LIs->getMBBEndIdx(MBB);
-  LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo);
+  LiveIndex EndIdx = LIs->getMBBEndIdx(MBB);
+  LiveRange SLR(SpillIndex, LIs->getNextSlot(EndIdx), CurrSValNo);
   CurrSLI->addRange(SLR);
   Processed.insert(MBB);
 
@@ -414,7 +418,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
     WorkList.pop_back();
     if (Processed.count(MBB))
       continue;
-    unsigned Idx = LIs->getMBBStartIdx(MBB);
+    LiveIndex Idx = LIs->getMBBStartIdx(MBB);
     LR = CurrLI->getLiveRangeContaining(Idx);
     if (LR && LR->valno == ValNo) {
       EndIdx = LIs->getMBBEndIdx(MBB);
@@ -424,7 +428,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
         CurrSLI->addRange(SLR);
       } else if (LR->end > EndIdx) {
         // Live range extends beyond end of mbb, process successors.
-        LiveRange SLR(Idx, EndIdx+1, CurrSValNo);
+        LiveRange SLR(Idx, LIs->getNextIndex(EndIdx), CurrSValNo);
         CurrSLI->addRange(SLR);
         for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
                SE = MBB->succ_end(); SI != SE; ++SI)
@@ -487,12 +491,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     }
     
     // Once we've found it, extend its VNInfo to our instruction.
-    unsigned DefIndex = LIs->getInstructionIndex(Walker);
-    DefIndex = LiveIntervals::getDefIndex(DefIndex);
-    unsigned EndIndex = LIs->getMBBEndIdx(MBB);
+    LiveIndex DefIndex = LIs->getInstructionIndex(Walker);
+    DefIndex = LIs->getDefIndex(DefIndex);
+    LiveIndex EndIndex = LIs->getMBBEndIdx(MBB);
     
     RetVNI = NewVNs[Walker];
-    LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI));
+    LI->addRange(LiveRange(DefIndex, LIs->getNextSlot(EndIndex), RetVNI));
   } else if (!ContainsDefs && ContainsUses) {
     SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
     
@@ -524,12 +528,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
                                               IsTopLevel, IsIntraBlock);
     }
 
-    unsigned UseIndex = LIs->getInstructionIndex(Walker);
-    UseIndex = LiveIntervals::getUseIndex(UseIndex);
-    unsigned EndIndex = 0;
+    LiveIndex UseIndex = LIs->getInstructionIndex(Walker);
+    UseIndex = LIs->getUseIndex(UseIndex);
+    LiveIndex EndIndex;
     if (IsIntraBlock) {
       EndIndex = LIs->getInstructionIndex(UseI);
-      EndIndex = LiveIntervals::getUseIndex(EndIndex);
+      EndIndex = LIs->getUseIndex(EndIndex);
     } else
       EndIndex = LIs->getMBBEndIdx(MBB);
 
@@ -538,12 +542,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
                                     NewVNs, LiveOut, Phis, false, true);
     
-    LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI));
+    LI->addRange(LiveRange(UseIndex, LIs->getNextSlot(EndIndex), RetVNI));
     
     // FIXME: Need to set kills properly for inter-block stuff.
-    if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex);
+    if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex);
     if (IsIntraBlock)
-      LI->addKill(RetVNI, EndIndex);
+      RetVNI->addKill(EndIndex);
   } else if (ContainsDefs && ContainsUses) {
     SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
     SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
@@ -584,13 +588,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
                                               IsTopLevel, IsIntraBlock);
     }
 
-    unsigned StartIndex = LIs->getInstructionIndex(Walker);
-    StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) :
-                            LiveIntervals::getUseIndex(StartIndex);
-    unsigned EndIndex = 0;
+    LiveIndex StartIndex = LIs->getInstructionIndex(Walker);
+    StartIndex = foundDef ? LIs->getDefIndex(StartIndex) :
+                            LIs->getUseIndex(StartIndex);
+    LiveIndex EndIndex;
     if (IsIntraBlock) {
       EndIndex = LIs->getInstructionIndex(UseI);
-      EndIndex = LiveIntervals::getUseIndex(EndIndex);
+      EndIndex = LIs->getUseIndex(EndIndex);
     } else
       EndIndex = LIs->getMBBEndIdx(MBB);
 
@@ -600,12 +604,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
       RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
                                       NewVNs, LiveOut, Phis, false, true);
 
-    LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+    LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI));
     
-    if (foundUse && LI->isKill(RetVNI, StartIndex))
-      LI->removeKill(RetVNI, StartIndex);
+    if (foundUse && RetVNI->isKill(StartIndex))
+      RetVNI->removeKill(StartIndex);
     if (IsIntraBlock) {
-      LI->addKill(RetVNI, EndIndex);
+      RetVNI->addKill(EndIndex);
     }
   }
   
@@ -636,9 +640,10 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
   // assume that we are not intrablock here.
   if (Phis.count(MBB)) return Phis[MBB]; 
 
-  unsigned StartIndex = LIs->getMBBStartIdx(MBB);
+  LiveIndex StartIndex = LIs->getMBBStartIdx(MBB);
   VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator());
+    LI->getNextValue(LiveIndex(), /*FIXME*/ 0, false,
+                     LIs->getVNInfoAllocator());
 
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
     
@@ -680,21 +685,21 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
     for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
            IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
       I->second->setHasPHIKill(true);
-      unsigned KillIndex = LIs->getMBBEndIdx(I->first);
-      if (!LiveInterval::isKill(I->second, KillIndex))
-        LI->addKill(I->second, KillIndex);
+      LiveIndex KillIndex = LIs->getMBBEndIdx(I->first);
+      if (!I->second->isKill(KillIndex))
+        I->second->addKill(KillIndex);
     }
   }
       
-  unsigned EndIndex = 0;
+  LiveIndex EndIndex;
   if (IsIntraBlock) {
     EndIndex = LIs->getInstructionIndex(UseI);
-    EndIndex = LiveIntervals::getUseIndex(EndIndex);
+    EndIndex = LIs->getUseIndex(EndIndex);
   } else
     EndIndex = LIs->getMBBEndIdx(MBB);
-  LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+  LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI));
   if (IsIntraBlock)
-    LI->addKill(RetVNI, EndIndex);
+    RetVNI->addKill(EndIndex);
 
   // Memoize results so we don't have to recompute them.
   if (!IsIntraBlock)
@@ -728,8 +733,8 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
        DE = MRI->def_end(); DI != DE; ++DI) {
     Defs[(*DI).getParent()].insert(&*DI);
     
-    unsigned DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = LiveIntervals::getDefIndex(DefIdx);
+    LiveIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = LIs->getDefIndex(DefIdx);
     
     assert(DI->getOpcode() != TargetInstrInfo::PHI &&
            "Following NewVN isPHIDef flag incorrect. Fix me!");
@@ -739,7 +744,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
       if (DstReg == LI->reg)
-        NewVN->copy = &*DI;
+        NewVN->setCopy(&*DI);
     
     NewVNs[&*DI] = NewVN;
   }
@@ -764,14 +769,32 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
   // Add ranges for dead defs
   for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
        DE = MRI->def_end(); DI != DE; ++DI) {
-    unsigned DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = LiveIntervals::getDefIndex(DefIdx);
+    LiveIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = LIs->getDefIndex(DefIdx);
     
     if (LI->liveAt(DefIdx)) continue;
     
     VNInfo* DeadVN = NewVNs[&*DI];
-    LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN));
-    LI->addKill(DeadVN, DefIdx);
+    LI->addRange(LiveRange(DefIdx, LIs->getNextSlot(DefIdx), DeadVN));
+    DeadVN->addKill(DefIdx);
+  }
+
+  // Update kill markers.
+  for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+       VI != VE; ++VI) {
+    VNInfo* VNI = *VI;
+    for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) {
+      LiveIndex KillIdx = VNI->kills[i];
+      if (KillIdx.isPHIIndex())
+        continue;
+      MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx);
+      if (KillMI) {
+        MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg);
+        if (KillMO)
+          // It could be a dead def.
+          KillMO->setIsKill();
+      }
+    }
   }
 }
 
@@ -801,14 +824,16 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     VNsToCopy.push_back(OldVN);
     
     // Locate two-address redefinitions
-    for (SmallVector<unsigned, 4>::iterator KI = OldVN->kills.begin(),
+    for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(),
          KE = OldVN->kills.end(); KI != KE; ++KI) {
+      assert(!KI->isPHIIndex() &&
+             "VN previously reported having no PHI kills.");
       MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
       unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
       if (DefIdx == ~0U) continue;
       if (MI->isRegTiedToUseOperand(DefIdx)) {
         VNInfo* NextVN =
-                     CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI));
+          CurrLI->findDefinedVNInfoForRegInt(LIs->getDefIndex(*KI));
         if (NextVN == OldVN) continue;
         Stack.push_back(NextVN);
       }
@@ -840,10 +865,10 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
          E = MRI->reg_end(); I != E; ++I) {
     MachineOperand& MO = I.getOperand();
-    unsigned InstrIdx = LIs->getInstructionIndex(&*I);
+    LiveIndex InstrIdx = LIs->getInstructionIndex(&*I);
     
-    if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) ||
-        (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx))))
+    if ((MO.isUse() && NewLI.liveAt(LIs->getUseIndex(InstrIdx))) ||
+        (MO.isDef() && NewLI.liveAt(LIs->getDefIndex(InstrIdx))))
       OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
   }
   
@@ -865,15 +890,15 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
   NumRenumbers++;
 }
 
-bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
+bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
                                       MachineInstr* DefMI,
                                       MachineBasicBlock::iterator RestorePt,
-                                      unsigned RestoreIdx,
+                                      LiveIndex RestoreIdx,
                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
   MachineBasicBlock& MBB = *RestorePt->getParent();
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  unsigned KillIdx = 0;
+  LiveIndex KillIdx;
   if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
   else
@@ -882,13 +907,13 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
   if (KillPt == DefMI->getParent()->end())
     return false;
   
-  TII->reMaterialize(MBB, RestorePt, vreg, DefMI);
+  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI);
   LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
   
   ReconstructLiveInterval(CurrLI);
-  unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt));
-  RematIdx = LiveIntervals::getDefIndex(RematIdx);
-  RenumberValno(CurrLI->findDefinedVNInfo(RematIdx));
+  LiveIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
+  RematIdx = LIs->getDefIndex(RematIdx);
+  RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
   
   ++NumSplits;
   ++NumRemats;
@@ -943,7 +968,8 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
+      CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false,
+                                         LSs->getVNInfoAllocator());
   }
   
   return FMI;
@@ -1033,11 +1059,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
   VNInfo *ValNo = LR->valno;
 
-  if (ValNo->isUnused()) {
-    // Defined by a dead def? How can this be?
-    assert(0 && "Val# is defined by a dead def?");
-    abort();
-  }
+  assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
 
   MachineInstr *DefMI = ValNo->isDefAccurate()
     ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
@@ -1056,7 +1078,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Find a point to restore the value after the barrier.
-  unsigned RestoreIndex = 0;
+  LiveIndex RestoreIndex;
   MachineBasicBlock::iterator RestorePt =
     findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
   if (RestorePt == BarrierMBB->end())
@@ -1070,7 +1092,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   // Add a spill either before the barrier or after the definition.
   MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
   const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
-  unsigned SpillIndex = 0;
+  LiveIndex SpillIndex;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
   if (!ValNo->isDefAccurate()) {
@@ -1098,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       return false; // Def is dead. Do nothing.
     
     if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
-                            BarrierMBB, SS, RefsInMBB))) {
+                             BarrierMBB, SS, RefsInMBB))) {
       SpillIndex = LIs->getInstructionIndex(SpillMI);
     } else {
       // Check if it's possible to insert a spill after the def MI.
@@ -1114,11 +1136,9 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
         if (SpillPt == DefMBB->end())
           return false; // No gap to insert spill.
       }
-      // Add spill. The store instruction kills the register if def is before
-      // the barrier in the barrier block.
+      // Add spill. 
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg,
-                               DefMBB == BarrierMBB, SS, RC);
+      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
       SpillMI = prior(SpillPt);
       LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
     }
@@ -1142,15 +1162,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Update spill stack slot live interval.
-  UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1,
+  UpdateSpillSlotInterval(ValNo, LIs->getNextSlot(LIs->getUseIndex(SpillIndex)),
                           LIs->getDefIndex(RestoreIndex));
 
   ReconstructLiveInterval(CurrLI);
-  
+
   if (!FoldedRestore) {
-    unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
-    RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx);
-    RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx));
+    LiveIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+    RestoreIdx = LIs->getDefIndex(RestoreIdx);
+    RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
   }
   
   ++NumSplits;
@@ -1189,8 +1209,6 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
   while (!Intervals.empty()) {
     if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
       break;
-    else if (NumSplits == 4)
-      Change |= Change;
     LiveInterval *LI = Intervals.back();
     Intervals.pop_back();
     bool result = SplitRegLiveInterval(LI);
@@ -1236,8 +1254,8 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
     // reaching definition (VNInfo).
     for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
-      unsigned index = LIs->getInstructionIndex(&*UI);
-      index = LiveIntervals::getUseIndex(index);
+      LiveIndex index = LIs->getInstructionIndex(&*UI);
+      index = LIs->getUseIndex(index);
       
       const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
       VNUseCount[LR->valno].insert(&*UI);
@@ -1386,7 +1404,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
   if (LR->valno->hasPHIKill())
     return false;
   
-  unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+  LiveIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
   if (LR->end < MBBEnd)
     return false;
   
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 9e7ad6752a73..8793df7705fa 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -31,7 +31,9 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include <climits>
 
@@ -51,22 +53,26 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
 /// frame indexes with appropriate references.
 ///
 bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+  const Function* F = Fn.getFunction();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
 
   // Get MachineModuleInfo so that we can track the construction of the
   // frame.
   if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
     Fn.getFrameInfo()->setMachineModuleInfo(MMI);
 
+  // Calculate the MaxCallFrameSize and HasCalls variables for the function's
+  // frame information. Also eliminates call frame pseudo instructions.
+  calculateCallsInformation(Fn);
+
   // Allow the target machine to make some adjustments to the function
   // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
   TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
 
-  // Scan the function for modified callee saved registers and insert spill
-  // code for any callee saved registers that are modified.  Also calculate
-  // the MaxCallFrameSize and HasCalls variables for the function's frame
-  // information and eliminates call frame pseudo instructions.
+  // Scan the function for modified callee saved registers and insert spill code
+  // for any callee saved registers that are modified.
   calculateCalleeSavedRegisters(Fn);
 
   // Determine placement of CSR spill/restore code:
@@ -78,7 +84,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   placeCSRSpillsAndRestores(Fn);
 
   // Add the code to save and restore the callee saved registers
-  insertCSRSpillsAndRestores(Fn);
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertCSRSpillsAndRestores(Fn);
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
@@ -92,13 +99,20 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // called functions.  Because of this, calculateCalleeSavedRegisters
   // must be called before this function in order to set the HasCalls
   // and MaxCallFrameSize variables.
-  insertPrologEpilogCode(Fn);
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertPrologEpilogCode(Fn);
 
   // Replace all MO_FrameIndex operands with physical register references
   // and actual offsets.
   //
   replaceFrameIndices(Fn);
 
+  // If register scavenging is needed, as we've enabled doing it as a
+  // post-pass, scavenge the virtual registers that frame index elimiation
+  // inserted.
+  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+    scavengeFrameVirtualRegs(Fn);
+
   delete RS;
   clearAllSets();
   return true;
@@ -117,35 +131,24 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 #endif
 
-/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
-/// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
-/// the function's frame information and eliminates call frame pseudo
-/// instructions.
-///
-void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
 
-  // Get the callee saved register list...
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+  unsigned MaxCallFrameSize = 0;
+  bool HasCalls = false;
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
   int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
 
-  // These are used to keep track the callee-save area. Initialize them.
-  MinCSFrameIndex = INT_MAX;
-  MaxCSFrameIndex = 0;
-
-  // Early exit for targets which have no callee saved registers and no call
-  // frame setup/destroy pseudo instructions.
-  if ((CSRegs == 0 || CSRegs[0] == 0) &&
-      FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+  // Early exit for targets which have no call frame setup/destroy pseudo
+  // instructions.
+  if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
     return;
 
-  unsigned MaxCallFrameSize = 0;
-  bool HasCalls = false;
-
   std::vector<MachineBasicBlock::iterator> FrameSDOps;
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
@@ -157,31 +160,57 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
         if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
         HasCalls = true;
         FrameSDOps.push_back(I);
+      } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) {
+        // An InlineAsm might be a call; assume it is to get the stack frame
+        // aligned correctly for calls.
+        HasCalls = true;
       }
 
   MachineFrameInfo *FFI = Fn.getFrameInfo();
   FFI->setHasCalls(HasCalls);
   FFI->setMaxCallFrameSize(MaxCallFrameSize);
 
-  for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
-    MachineBasicBlock::iterator I = FrameSDOps[i];
-    // If call frames are not being included as part of the stack frame,
-    // and there is no dynamic allocation (therefore referencing frame slots
-    // off sp), leave the pseudo ops alone. We'll eliminate them later.
+  for (std::vector<MachineBasicBlock::iterator>::iterator
+         i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+    MachineBasicBlock::iterator I = *i;
+
+    // If call frames are not being included as part of the stack frame, and
+    // there is no dynamic allocation (therefore referencing frame slots off
+    // sp), leave the pseudo ops alone. We'll eliminate them later.
     if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
       RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
   }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+  MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+  // Get the callee saved register list...
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+  // These are used to keep track the callee-save area. Initialize them.
+  MinCSFrameIndex = INT_MAX;
+  MaxCSFrameIndex = 0;
+
+  // Early exit for targets which have no callee saved registers.
+  if (CSRegs == 0 || CSRegs[0] == 0)
+    return;
 
-  // Now figure out which *callee saved* registers are modified by the current
+  // Figure out which *callee saved* registers are modified by the current
   // function, thus needing to be saved and restored in the prolog/epilog.
-  //
-  const TargetRegisterClass* const *CSRegClasses =
+  const TargetRegisterClass * const *CSRegClasses =
     RegInfo->getCalleeSavedRegClasses(&Fn);
+
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
     if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
-        // If the reg is modified, save it!
+      // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
     } else {
       for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
@@ -198,39 +227,47 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;   // Early exit if no callee saved registers are modified!
 
   unsigned NumFixedSpillSlots;
-  const std::pair<unsigned,int> *FixedSpillSlots =
+  const TargetFrameInfo::SpillSlot *FixedSpillSlots =
     TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
 
   // Now that we know which registers need to be saved and restored, allocate
   // stack slots for them.
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    const TargetRegisterClass *RC = CSI[i].getRegClass();
+  for (std::vector<CalleeSavedInfo>::iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    unsigned Reg = I->getReg();
+    const TargetRegisterClass *RC = I->getRegClass();
+
+    int FrameIdx;
+    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+      I->setFrameIdx(FrameIdx);
+      continue;
+    }
 
     // Check to see if this physreg must be spilled to a particular stack slot
     // on this target.
-    const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
+    const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
     while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
-           FixedSlot->first != Reg)
+           FixedSlot->Reg != Reg)
       ++FixedSlot;
 
-    int FrameIdx;
-    if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
+    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
       // Nope, just spill it anywhere convenient.
       unsigned Align = RC->getAlignment();
       unsigned StackAlign = TFI->getStackAlignment();
-      // We may not be able to sastify the desired alignment specification of
-      // the TargetRegisterClass if the stack alignment is smaller.
-      // Use the min.
+
+      // We may not be able to satisfy the desired alignment specification of
+      // the TargetRegisterClass if the stack alignment is smaller. Use the
+      // min.
       Align = std::min(Align, StackAlign);
       FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
+      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset);
     }
-    CSI[i].setFrameIdx(FrameIdx);
+
+    I->setFrameIdx(FrameIdx);
   }
 
   FFI->setCalleeSavedInfo(CSI);
@@ -244,6 +281,8 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   MachineFrameInfo *FFI = Fn.getFrameInfo();
   const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
 
+  FFI->setCalleeSavedInfoValid(true);
+
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
     return;
@@ -403,8 +442,7 @@ static inline void
 AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign) {
-  // If stack grows down, we need to add size of find the lowest address of the
-  // object.
+  // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
     Offset += FFI->getObjectSize(FrameIdx);
 
@@ -437,16 +475,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *FFI = Fn.getFrameInfo();
 
-  unsigned MaxAlign = FFI->getMaxAlignment();
+  unsigned MaxAlign = 1;
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
   // of stack growth -- so it's always nonnegative.
-  int64_t Offset = TFI.getOffsetOfLocalArea();
+  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
   if (StackGrowsDown)
-    Offset = -Offset;
-  assert(Offset >= 0
+    LocalAreaOffset = -LocalAreaOffset;
+  assert(LocalAreaOffset >= 0
          && "Local area offset should be in direction of stack growth");
+  int64_t Offset = LocalAreaOffset;
 
   // If there are fixed sized objects that are preallocated in the local area,
   // non-fixed objects can't be allocated right at the start of local area.
@@ -538,32 +577,38 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
-  // Round up the size to a multiple of the alignment, but only if there are
-  // calls or alloca's in the function.  This ensures that any calls to
-  // subroutines have their stack frames suitable aligned.
-  // Also do this if we need runtime alignment of the stack.  In this case
-  // offsets will be relative to SP not FP; round up the stack size so this
-  // works.
-  if (!RegInfo->targetHandlesStackFrameRounding() &&
-      (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
-       (RegInfo->needsStackRealignment(Fn) &&
-        FFI->getObjectIndexEnd() != 0))) {
+  if (!RegInfo->targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (RegInfo->hasReservedCallFrame(Fn))
+    if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
       Offset += FFI->getMaxCallFrameSize();
 
-    unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
+    // Round up the size to a multiple of the alignment.  If the function has
+    // any calls or alloca's, align to the target's StackAlignment value to
+    // ensure that the callee's frame or the alloca data is suitably aligned;
+    // otherwise, for leaf functions, align to the TransientStackAlignment
+    // value.
+    unsigned StackAlign;
+    if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+      StackAlign = TFI.getStackAlignment();
+    else
+      StackAlign = TFI.getTransientStackAlignment();
+    // If the frame pointer is eliminated, all frame offsets will be relative
+    // to SP not FP; align to MaxAlign so this works.
+    StackAlign = std::max(StackAlign, MaxAlign);
+    unsigned AlignMask = StackAlign - 1;
     Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
   }
 
   // Update frame info to pretend that this is part of the stack...
-  FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
+  FFI->setStackSize(Offset - LocalAreaOffset);
 
   // Remember the required stack alignment in case targets need it to perform
   // dynamic stack alignment.
-  FFI->setMaxAlignment(MaxAlign);
+  if (MaxAlign > FFI->getMaxAlignment())
+    FFI->setMaxAlignment(MaxAlign);
 }
 
 
@@ -604,14 +649,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   for (MachineFunction::iterator BB = Fn.begin(),
          E = Fn.end(); BB != E; ++BB) {
     int SPAdj = 0;  // SP offset due to call frame setup / destroy.
-    if (RS) RS->enterBasicBlock(BB);
+    if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
 
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
-      if (I->getOpcode() == TargetInstrInfo::DECLARE) {
-        // Ignore it.
-        ++I;
-        continue;
-      }
 
       if (I->getOpcode() == FrameSetupOpcode ||
           I->getOpcode() == FrameDestroyOpcode) {
@@ -654,8 +694,16 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
           // If this instruction has a FrameIndex operand, we need to
           // use that target machine register info object to eliminate
           // it.
-
-          TRI.eliminateFrameIndex(MI, SPAdj, RS);
+          int Value;
+          unsigned VReg =
+            TRI.eliminateFrameIndex(MI, SPAdj, &Value,
+                                    FrameIndexVirtualScavenging ?  NULL : RS);
+          if (VReg) {
+            assert (FrameIndexVirtualScavenging &&
+                    "Not scavenging, but virtual returned from "
+                    "eliminateFrameIndex()!");
+            FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj);
+          }
 
           // Reset the iterator if we were at the beginning of the BB.
           if (AtBeginning) {
@@ -670,10 +718,170 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
       if (DoIncr && I != BB->end()) ++I;
 
       // Update register states.
-      if (RS && MI) RS->forward(MI);
+      if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
     }
 
     assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
   }
 }
 
+/// findLastUseReg - find the killing use of the specified register within
+/// the instruciton range. Return the operand number of the kill in Operand.
+static MachineBasicBlock::iterator
+findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME,
+               unsigned Reg, unsigned *Operand) {
+  // Scan forward to find the last use of this virtual register
+  for (++I; I != ME; ++I) {
+    MachineInstr *MI = I;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+      if (MI->getOperand(i).isReg()) {
+        unsigned OpReg = MI->getOperand(i).getReg();
+        if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg))
+          continue;
+        assert (OpReg == Reg
+                && "overlapping use of scavenged index register!");
+        // If this is the killing use, we're done
+        if (MI->getOperand(i).isKill()) {
+          if (Operand)
+            *Operand = i;
+          return I;
+        }
+      }
+  }
+  // If we hit the end of the basic block, there was no kill of
+  // the virtual register, which is wrong.
+  assert (0 && "scavenged index register never killed!");
+  return ME;
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+  // Run through the instructions and find any virtual registers.
+  for (MachineFunction::iterator BB = Fn.begin(),
+       E = Fn.end(); BB != E; ++BB) {
+    RS->enterBasicBlock(BB);
+
+    unsigned CurrentVirtReg = 0;
+    unsigned CurrentScratchReg = 0;
+    bool havePrevValue = false;
+    unsigned PrevScratchReg = 0;
+    int PrevValue;
+    MachineInstr *PrevLastUseMI = NULL;
+    unsigned PrevLastUseOp = 0;
+    bool trackingCurrentValue = false;
+    int SPAdj = 0;
+    int Value = 0;
+
+    // The instruction stream may change in the loop, so check BB->end()
+    // directly.
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+      MachineInstr *MI = I;
+      // Likewise, call getNumOperands() each iteration, as the MI may change
+      // inside the loop (with 'i' updated accordingly).
+      for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+        if (MI->getOperand(i).isReg()) {
+          MachineOperand &MO = MI->getOperand(i);
+          unsigned Reg = MO.getReg();
+          if (Reg == 0)
+            continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+            // If we have an active scavenged register, we shouldn't be
+            // seeing any references to it.
+            assert (Reg != CurrentScratchReg
+                    && "overlapping use of scavenged frame index register!");
+
+            // If we have a previous scratch reg, check and see if anything
+            // here kills whatever value is in there.
+            if (Reg == PrevScratchReg) {
+              if (MO.isUse()) {
+                // Two-address operands implicitly kill
+                if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+                  havePrevValue = false;
+                  PrevScratchReg = 0;
+                }
+              } else {
+                assert (MO.isDef());
+                havePrevValue = false;
+                PrevScratchReg = 0;
+              }
+            }
+            continue;
+          }
+
+          // Have we already allocated a scratch register for this virtual?
+          if (Reg != CurrentVirtReg) {
+            // When we first encounter a new virtual register, it
+            // must be a definition.
+            assert(MI->getOperand(i).isDef() &&
+                   "frame index virtual missing def!");
+            // We can't have nested virtual register live ranges because
+            // there's only a guarantee of one scavenged register at a time.
+            assert (CurrentVirtReg == 0 &&
+                    "overlapping frame index virtual registers!");
+
+            // If the target gave us information about what's in the register,
+            // we can use that to re-use scratch regs.
+            DenseMap<unsigned, FrameConstantEntry>::iterator Entry =
+              FrameConstantRegMap.find(Reg);
+            trackingCurrentValue = Entry != FrameConstantRegMap.end();
+            if (trackingCurrentValue) {
+              SPAdj = (*Entry).second.second;
+              Value = (*Entry).second.first;
+            } else
+              SPAdj = Value = 0;
+
+            // If the scratch register from the last allocation is still
+            // available, see if the value matches. If it does, just re-use it.
+            if (trackingCurrentValue && havePrevValue && PrevValue == Value) {
+              // FIXME: This assumes that the instructions in the live range
+              // for the virtual register are exclusively for the purpose
+              // of populating the value in the register. That's reasonable
+              // for these frame index registers, but it's still a very, very
+              // strong assumption. Perhaps this implies that the frame index
+              // elimination should be before register allocation, with
+              // conservative heuristics since we'll know less then, and
+              // the reuse calculations done directly when doing the code-gen?
+
+              // Find the last use of the new virtual register. Remove all
+              // instruction between here and there, and update the current
+              // instruction to reference the last use insn instead.
+              MachineBasicBlock::iterator LastUseMI =
+                findLastUseReg(I, BB->end(), Reg, &i);
+              // Remove all instructions up 'til the last use, since they're
+              // just calculating the value we already have.
+              BB->erase(I, LastUseMI);
+              MI = I = LastUseMI;
+
+              CurrentScratchReg = PrevScratchReg;
+              // Extend the live range of the register
+              PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false);
+              RS->setUsed(CurrentScratchReg);
+            } else {
+              CurrentVirtReg = Reg;
+              const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+              CurrentScratchReg = RS->FindUnusedReg(RC);
+              if (CurrentScratchReg == 0)
+                // No register is "free". Scavenge a register.
+                CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+
+              PrevValue = Value;
+            }
+          }
+          assert (CurrentScratchReg && "Missing scratch register!");
+          MI->getOperand(i).setReg(CurrentScratchReg);
+
+          // If this is the last use of the register, stop tracking it.
+          if (MI->getOperand(i).isKill()) {
+            PrevScratchReg = CurrentScratchReg;
+            PrevLastUseMI = MI;
+            PrevLastUseOp = i;
+            CurrentScratchReg = CurrentVirtReg = 0;
+            havePrevValue = trackingCurrentValue;
+          }
+        }
+      RS->forward(MI);
+    }
+  }
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index c158dd8ac232..931f1eb231b2 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class RegScavenger;
@@ -93,6 +94,17 @@ namespace llvm {
     // functions.
     bool ShrinkWrapThisFunction;
 
+    // Flag to control whether to use the register scavenger to resolve
+    // frame index materialization registers. Set according to
+    // TRI->requiresFrameIndexScavenging() for the curren function.
+    bool FrameIndexVirtualScavenging;
+
+    // When using the scavenger post-pass to resolve frame reference
+    // materialization registers, maintain a map of the registers to
+    // the constant value and SP adjustment associated with it.
+    typedef std::pair<int, int> FrameConstantEntry;
+    DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
+
 #ifndef NDEBUG
     // Machine function handle.
     MachineFunction* MF;
@@ -118,10 +130,12 @@ namespace llvm {
                                CSRegBlockMap &prevRestores);
     void placeSpillsAndRestores(MachineFunction &Fn);
     void placeCSRSpillsAndRestores(MachineFunction &Fn);
+    void calculateCallsInformation(MachineFunction &Fn);
     void calculateCalleeSavedRegisters(MachineFunction &Fn);
     void insertCSRSpillsAndRestores(MachineFunction &Fn);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     void replaceFrameIndices(MachineFunction &Fn);
+    void scavengeFrameVirtualRegs(MachineFunction &Fn);
     void insertPrologEpilogCode(MachineFunction &Fn);
 
     // Initialize DFA sets, called before iterations.
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index b4c20e6bfd31..00c5d46d21a1 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
@@ -38,15 +39,16 @@ static const char *const PSVNames[] = {
   "ConstantPool"
 };
 
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static.  For now, we can safely use the global context for the time being to
+// squeak by.
 PseudoSourceValue::PseudoSourceValue() :
-  Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {}
+  Value(Type::getInt8PtrTy(getGlobalContext()),
+        PseudoSourceValueVal) {}
 
-void PseudoSourceValue::dump() const {
-  print(errs()); errs() << '\n';
-}
-
-void PseudoSourceValue::print(raw_ostream &OS) const {
-  OS << PSVNames[this - *PSVs];
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+  O << PSVNames[this - *PSVs];
 }
 
 namespace {
@@ -61,7 +63,7 @@ namespace {
 
     virtual bool isConstant(const MachineFrameInfo *MFI) const;
 
-    virtual void print(raw_ostream &OS) const {
+    virtual void printCustom(raw_ostream &OS) const {
       OS << "FixedStack" << FI;
     }
   };
@@ -83,7 +85,7 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
       this == getConstantPool() ||
       this == getJumpTable())
     return true;
-  assert(0 && "Unknown PseudoSourceValue!");
+  llvm_unreachable("Unknown PseudoSourceValue!");
   return false;
 }
 
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 64374ce137fd..b655dda41153 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -30,44 +30,6 @@ It also increase the likelyhood the store may become dead.
 
 //===---------------------------------------------------------------------===//
 
-I think we should have a "hasSideEffects" flag (which is automatically set for
-stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able
-to remat any instruction that has no side effects, if it can handle it and if
-profitable.
-
-For now, I'd suggest having the remat stuff work like this:
-
-1. I need to spill/reload this thing.
-2. Check to see if it has side effects.
-3. Check to see if it is simple enough: e.g. it only has one register
-destination and no register input.
-4. If so, clone the instruction, do the xform, etc.
-
-Advantages of this are:
-
-1. the .td file describes the behavior of the instructions, not the way the
-   algorithm should work.
-2. as remat gets smarter in the future, we shouldn't have to be changing the .td
-   files.
-3. it is easier to explain what the flag means in the .td file, because you
-   don't have to pull in the explanation of how the current remat algo works.
-
-Some potential added complexities:
-
-1. Some instructions have to be glued to it's predecessor or successor. All of
-   the PC relative instructions and condition code setting instruction. We could
-   mark them as hasSideEffects, but that's not quite right. PC relative loads
-   from constantpools can be remat'ed, for example. But it requires more than
-   just cloning the instruction. Some instructions can be remat'ed but it
-   expands to more than one instruction. But allocator will have to make a
-   decision.
-
-4. As stated in 3, not as simple as cloning in some cases. The target will have
-   to decide how to remat it. For example, an ARM 2-piece constant generation
-   instruction is remat'ed as a load from constantpool.
-
-//===---------------------------------------------------------------------===//
-
 bb27 ...
         ...
         %reg1037 = ADDri %reg1039, 1
@@ -206,3 +168,32 @@ Stack coloring improvments:
    not spill slots.
 2. Reorder objects to fill in gaps between objects.
    e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
+
+//===---------------------------------------------------------------------===//
+
+The scheduler should be able to sort nearby instructions by their address. For
+example, in an expanded memset sequence it's not uncommon to see code like this:
+
+  movl $0, 4(%rdi)
+  movl $0, 8(%rdi)
+  movl $0, 12(%rdi)
+  movl $0, 0(%rdi)
+
+Each of the stores is independent, and the scheduler is currently making an
+arbitrary decision about the order.
+
+//===---------------------------------------------------------------------===//
+
+Another opportunitiy in this code is that the $0 could be moved to a register:
+
+  movl $0, 4(%rdi)
+  movl $0, 8(%rdi)
+  movl $0, 12(%rdi)
+  movl $0, 0(%rdi)
+
+This would save substantial code size, especially for longer sequences like
+this. It would be easy to have a rule telling isel to avoid matching MOV32mi
+if the immediate has more than some fixed number of uses. It's more involved
+to teach the register allocator how to do late folding to recover from
+excessive register pressure.
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 904b4cb2a46f..5d58ea984f21 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -33,8 +33,10 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <set>
 #include <queue>
@@ -142,6 +144,7 @@ namespace {
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<LiveIntervals>();
       if (StrongPHIElim)
         AU.addRequiredID(StrongPHIEliminationID);
@@ -173,11 +176,11 @@ namespace {
 
     /// processActiveIntervals - expire old intervals and move non-overlapping
     /// ones to the inactive list.
-    void processActiveIntervals(unsigned CurPoint);
+    void processActiveIntervals(LiveIndex CurPoint);
 
     /// processInactiveIntervals - expire old intervals and move overlapping
     /// ones to the active list.
-    void processInactiveIntervals(unsigned CurPoint);
+    void processInactiveIntervals(LiveIndex CurPoint);
 
     /// hasNextReloadInterval - Return the next liveinterval that's being
     /// defined by a reload from the same SS as the specified one.
@@ -230,12 +233,12 @@ namespace {
       bool Error = false;
       for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
         if (regUse_[i] != 0) {
-          cerr << tri_->getName(i) << " is still in use!\n";
+          errs() << tri_->getName(i) << " is still in use!\n";
           Error = true;
         }
       }
       if (Error)
-        abort();
+        llvm_unreachable(0);
 #endif
       regUse_.clear();
       regUseBackUp_.clear();
@@ -295,15 +298,20 @@ namespace {
 
     template <typename ItTy>
     void printIntervals(const char* const str, ItTy i, ItTy e) const {
-      if (str) DOUT << str << " intervals:\n";
-      for (; i != e; ++i) {
-        DOUT << "\t" << *i->first << " -> ";
-        unsigned reg = i->first->reg;
-        if (TargetRegisterInfo::isVirtualRegister(reg)) {
-          reg = vrm_->getPhys(reg);
-        }
-        DOUT << tri_->getName(reg) << '\n';
-      }
+      DEBUG({
+          if (str)
+            errs() << str << " intervals:\n";
+
+          for (; i != e; ++i) {
+            errs() << "\t" << *i->first << " -> ";
+
+            unsigned reg = i->first->reg;
+            if (TargetRegisterInfo::isVirtualRegister(reg))
+              reg = vrm_->getPhys(reg);
+
+            errs() << tri_->getName(reg) << '\n';
+          }
+        });
     }
   };
   char RALinScan::ID = 0;
@@ -358,7 +366,8 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     return Reg;
 
   VNInfo *vni = cur.begin()->valno;
-  if (!vni->def || vni->isUnused() || !vni->isDefAccurate())
+  if ((vni->def == LiveIndex()) ||
+      vni->isUnused() || !vni->isDefAccurate())
     return Reg;
   MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
@@ -380,18 +389,18 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
 
   // Try to coalesce.
   if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) {
-    DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
-         << '\n';
+    DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
+                 << '\n');
     vrm_->clearVirt(cur.reg);
     vrm_->assignVirt2Phys(cur.reg, PhysReg);
 
     // Remove unnecessary kills since a copy does not clobber the register.
     if (li_->hasInterval(SrcReg)) {
       LiveInterval &SrcLI = li_->getInterval(SrcReg);
-      for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg),
-             E = mri_->reg_end(); I != E; ++I) {
+      for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg),
+             E = mri_->use_end(); I != E; ++I) {
         MachineOperand &O = I.getOperand();
-        if (!O.isUse() || !O.isKill())
+        if (!O.isKill())
           continue;
         MachineInstr *MI = &*I;
         if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI))))
@@ -478,24 +487,25 @@ void RALinScan::initIntervalSets()
   }
 }
 
-void RALinScan::linearScan()
-{
+void RALinScan::linearScan() {
   // linear scan algorithm
-  DOUT << "********** LINEAR SCAN **********\n";
-  DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n';
-
-  DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end()));
+  DEBUG({
+      errs() << "********** LINEAR SCAN **********\n"
+             << "********** Function: " 
+             << mf_->getFunction()->getName() << '\n';
+      printIntervals("fixed", fixed_.begin(), fixed_.end());
+    });
 
   while (!unhandled_.empty()) {
     // pick the interval with the earliest start point
     LiveInterval* cur = unhandled_.top();
     unhandled_.pop();
     ++NumIters;
-    DOUT << "\n*** CURRENT ***: " << *cur << '\n';
+    DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n');
 
     if (!cur->empty()) {
-      processActiveIntervals(cur->beginNumber());
-      processInactiveIntervals(cur->beginNumber());
+      processActiveIntervals(cur->beginIndex());
+      processInactiveIntervals(cur->beginIndex());
 
       assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
              "Can only allocate virtual registers!");
@@ -506,15 +516,17 @@ void RALinScan::linearScan()
     // assign it one.
     assignRegOrStackSlotAtInterval(cur);
 
-    DEBUG(printIntervals("active", active_.begin(), active_.end()));
-    DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+    DEBUG({
+        printIntervals("active", active_.begin(), active_.end());
+        printIntervals("inactive", inactive_.begin(), inactive_.end());
+      });
   }
 
   // Expire any remaining active intervals
   while (!active_.empty()) {
     IntervalPtr &IP = active_.back();
     unsigned reg = IP.first->reg;
-    DOUT << "\tinterval " << *IP.first << " expired\n";
+    DEBUG(errs() << "\tinterval " << *IP.first << " expired\n");
     assert(TargetRegisterInfo::isVirtualRegister(reg) &&
            "Can only allocate virtual registers!");
     reg = vrm_->getPhys(reg);
@@ -523,9 +535,11 @@ void RALinScan::linearScan()
   }
 
   // Expire any remaining inactive intervals
-  DEBUG(for (IntervalPtrs::reverse_iterator
-               i = inactive_.rbegin(); i != inactive_.rend(); ++i)
-        DOUT << "\tinterval " << *i->first << " expired\n");
+  DEBUG({
+      for (IntervalPtrs::reverse_iterator
+             i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+        errs() << "\tinterval " << *i->first << " expired\n";
+    });
   inactive_.clear();
 
   // Add live-ins to every BB except for entry. Also perform trivial coalescing.
@@ -560,7 +574,7 @@ void RALinScan::linearScan()
     }
   }
 
-  DOUT << *vrm_;
+  DEBUG(errs() << *vrm_);
 
   // Look for physical registers that end up not being allocated even though
   // register allocator had to spill other registers in its register class.
@@ -572,9 +586,9 @@ void RALinScan::linearScan()
 
 /// processActiveIntervals - expire old intervals and move non-overlapping ones
 /// to the inactive list.
-void RALinScan::processActiveIntervals(unsigned CurPoint)
+void RALinScan::processActiveIntervals(LiveIndex CurPoint)
 {
-  DOUT << "\tprocessing active intervals:\n";
+  DEBUG(errs() << "\tprocessing active intervals:\n");
 
   for (unsigned i = 0, e = active_.size(); i != e; ++i) {
     LiveInterval *Interval = active_[i].first;
@@ -584,7 +598,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {     // Remove expired intervals.
-      DOUT << "\t\tinterval " << *Interval << " expired\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -597,7 +611,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint)
 
     } else if (IntervalPos->start > CurPoint) {
       // Move inactive intervals to inactive list.
-      DOUT << "\t\tinterval " << *Interval << " inactive\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -618,9 +632,9 @@ void RALinScan::processActiveIntervals(unsigned CurPoint)
 
 /// processInactiveIntervals - expire old intervals and move overlapping
 /// ones to the active list.
-void RALinScan::processInactiveIntervals(unsigned CurPoint)
+void RALinScan::processInactiveIntervals(LiveIndex CurPoint)
 {
-  DOUT << "\tprocessing inactive intervals:\n";
+  DEBUG(errs() << "\tprocessing inactive intervals:\n");
 
   for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
     LiveInterval *Interval = inactive_[i].first;
@@ -630,7 +644,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {       // remove expired intervals.
-      DOUT << "\t\tinterval " << *Interval << " expired\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
 
       // Pop off the end of the list.
       inactive_[i] = inactive_.back();
@@ -638,7 +652,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint)
       --i; --e;
     } else if (IntervalPos->start <= CurPoint) {
       // move re-activated intervals in active list
-      DOUT << "\t\tinterval " << *Interval << " active\n";
+      DEBUG(errs() << "\t\tinterval " << *Interval << " active\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -699,7 +713,7 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
   return IP.end();
 }
 
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, LiveIndex Point){
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     RALinScan::IntervalPtr &IP = V[i];
     LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
@@ -725,7 +739,8 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
   if (SI.hasAtLeastOneValue())
     VNI = SI.getValNumInfo(0);
   else
-    VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator());
+    VNI = SI.getNextValue(LiveIndex(), 0, false,
+                          ls_->getVNInfoAllocator());
 
   LiveInterval &RI = li_->getInterval(cur->reg);
   // FIXME: This may be overly conservative.
@@ -764,10 +779,12 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
   float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
   SmallVector<LiveInterval*, 8> SLIs[3];
 
-  DOUT << "\tConsidering " << NumCands << " candidates: ";
-  DEBUG(for (unsigned i = 0; i != NumCands; ++i)
-          DOUT << tri_->getName(Candidates[i].first) << " ";
-        DOUT << "\n";);
+  DEBUG({
+      errs() << "\tConsidering " << NumCands << " candidates: ";
+      for (unsigned i = 0; i != NumCands; ++i)
+        errs() << tri_->getName(Candidates[i].first) << " ";
+      errs() << "\n";
+    });
   
   // Calculate the number of conflicts of each candidate.
   for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
@@ -865,16 +882,15 @@ void RALinScan::UpgradeRegister(unsigned Reg) {
 namespace {
   struct LISorter {
     bool operator()(LiveInterval* A, LiveInterval* B) {
-      return A->beginNumber() < B->beginNumber();
+      return A->beginIndex() < B->beginIndex();
     }
   };
 }
 
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
-void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
-{
-  DOUT << "\tallocating current interval: ";
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
+  DEBUG(errs() << "\tallocating current interval: ");
 
   // This is an implicitly defined live interval, just assign any register.
   const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
@@ -882,7 +898,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
       physReg = *RC->allocation_order_begin(*mf_);
-    DOUT <<  tri_->getName(physReg) << '\n';
+    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
     // Note the register is not really in use.
     vrm_->assignVirt2Phys(cur->reg, physReg);
     return;
@@ -891,7 +907,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   backUpRegUses();
 
   std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
-  unsigned StartPosition = cur->beginNumber();
+  LiveIndex StartPosition = cur->beginIndex();
   const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
 
   // If start of this live interval is defined by a move instruction and its
@@ -901,7 +917,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
   if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if (vni->def && !vni->isUnused() && vni->isDefAccurate()) {
+    if ((vni->def != LiveIndex()) && !vni->isUnused() &&
+         vni->isDefAccurate()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
       if (CopyMI &&
@@ -963,7 +980,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
         // Okay, this reg is on the fixed list.  Check to see if we actually
         // conflict.
         LiveInterval *I = IP.first;
-        if (I->endNumber() > StartPosition) {
+        if (I->endIndex() > StartPosition) {
           LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
           IP.second = II;
           if (II != I->begin() && II->start > StartPosition)
@@ -988,7 +1005,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
 
         const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
         if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&       
-            I->endNumber() > StartPosition) {
+            I->endIndex() > StartPosition) {
           LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
           IP.second = II;
           if (II != I->begin() && II->start > StartPosition)
@@ -1015,7 +1032,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // the free physical register and add this interval to the active
   // list.
   if (physReg) {
-    DOUT <<  tri_->getName(physReg) << '\n';
+    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
     vrm_->assignVirt2Phys(cur->reg, physReg);
     addRegUse(physReg);
     active_.push_back(std::make_pair(cur, cur->begin()));
@@ -1031,7 +1048,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     }
     return;
   }
-  DOUT << "no free registers\n";
+  DEBUG(errs() << "no free registers\n");
 
   // Compile the spill weights into an array that is better for scanning.
   std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
@@ -1049,7 +1066,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
   }
  
-  DOUT << "\tassigning stack slot at interval "<< *cur << ":\n";
+  DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n");
 
   // Find a register to spill.
   float minWeight = HUGE_VALF;
@@ -1102,8 +1119,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
         DowngradedRegs.clear();
         assignRegOrStackSlotAtInterval(cur);
       } else {
-        cerr << "Ran out of registers during register allocation!\n";
-        exit(1);
+        llvm_report_error("Ran out of registers during register allocation!");
       }
       return;
     }
@@ -1117,16 +1133,19 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     --LastCandidate;
   }
 
-  DOUT << "\t\tregister(s) with min weight(s): ";
-  DEBUG(for (unsigned i = 0; i != LastCandidate; ++i)
-          DOUT << tri_->getName(RegsWeights[i].first)
-               << " (" << RegsWeights[i].second << ")\n");
+  DEBUG({
+      errs() << "\t\tregister(s) with min weight(s): ";
+
+      for (unsigned i = 0; i != LastCandidate; ++i)
+        errs() << tri_->getName(RegsWeights[i].first)
+               << " (" << RegsWeights[i].second << ")\n";
+    });
 
   // If the current has the minimum weight, we need to spill it and
   // add any added intervals back to unhandled, and restart
   // linearscan.
   if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
-    DOUT << "\t\t\tspilling(c): " << *cur << '\n';
+    DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n');
     SmallVector<LiveInterval*, 8> spillIs;
     std::vector<LiveInterval*> added;
     
@@ -1154,14 +1173,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
       LiveInterval *ReloadLi = added[i];
       if (ReloadLi->weight == HUGE_VALF &&
           li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-        unsigned ReloadIdx = ReloadLi->beginNumber();
+        LiveIndex ReloadIdx = ReloadLi->beginIndex();
         MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
         int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
         if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
           // Last reload of same SS is in the same MBB. We want to try to
           // allocate both reloads the same register and make sure the reg
           // isn't clobbered in between if at all possible.
-          assert(LastReload->beginNumber() < ReloadIdx);
+          assert(LastReload->beginIndex() < ReloadIdx);
           NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
         }
         LastReloadMBB = ReloadMBB;
@@ -1206,12 +1225,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // mark our rollback point.
   std::vector<LiveInterval*> added;
   while (!spillIs.empty()) {
-    bool epicFail = false;
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
-    DOUT << "\t\t\tspilling(a): " << *sli << '\n';
+    DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n');
     earliestStartInterval =
-      (earliestStartInterval->beginNumber() < sli->beginNumber()) ?
+      (earliestStartInterval->beginIndex() < sli->beginIndex()) ?
          earliestStartInterval : sli;
        
     std::vector<LiveInterval*> newIs;
@@ -1223,15 +1241,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     addStackInterval(sli, ls_, li_, mri_, *vrm_);
     std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
     spilled.insert(sli->reg);
-
-    if (epicFail) {
-      //abort();
-    }
   }
 
-  unsigned earliestStart = earliestStartInterval->beginNumber();
+  LiveIndex earliestStart = earliestStartInterval->beginIndex();
 
-  DOUT << "\t\trolling back to: " << earliestStart << '\n';
+  DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
 
   // Scan handled in reverse order up to the earliest start of a
   // spilled live interval and undo each one, restoring the state of
@@ -1239,9 +1253,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   while (!handled_.empty()) {
     LiveInterval* i = handled_.back();
     // If this interval starts before t we are done.
-    if (i->beginNumber() < earliestStart)
+    if (i->beginIndex() < earliestStart)
       break;
-    DOUT << "\t\t\tundo changes for: " << *i << '\n';
+    DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n');
     handled_.pop_back();
 
     // When undoing a live interval allocation we must know if it is active or
@@ -1290,8 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
     LiveInterval *HI = handled_[i];
     if (!HI->expiredAt(earliestStart) &&
-        HI->expiredAt(cur->beginNumber())) {
-      DOUT << "\t\t\tundo changes for: " << *HI << '\n';
+        HI->expiredAt(cur->beginIndex())) {
+      DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n');
       active_.push_back(std::make_pair(HI, HI->begin()));
       assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
       addRegUse(vrm_->getPhys(HI->reg));
@@ -1310,14 +1324,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     LiveInterval *ReloadLi = added[i];
     if (ReloadLi->weight == HUGE_VALF &&
         li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-      unsigned ReloadIdx = ReloadLi->beginNumber();
+      LiveIndex ReloadIdx = ReloadLi->beginIndex();
       MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
       int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
       if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
         // Last reload of same SS is in the same MBB. We want to try to
         // allocate both reloads the same register and make sure the reg
         // isn't clobbered in between if at all possible.
-        assert(LastReload->beginNumber() < ReloadIdx);
+        assert(LastReload->beginIndex() < ReloadIdx);
         NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
       }
       LastReloadMBB = ReloadMBB;
@@ -1420,7 +1434,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
   // available first.
   unsigned Preference = vrm_->getRegAllocPref(cur->reg);
   if (Preference) {
-    DOUT << "(preferred: " << tri_->getName(Preference) << ") ";
+    DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") ");
     if (isRegAvail(Preference) && 
         RC->contains(Preference))
       return Preference;
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index e1cc20cf4fb1..6caa2d3b824f 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -25,6 +25,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
@@ -151,6 +153,7 @@ namespace {
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequiredID(PHIEliminationID);
       AU.addRequiredID(TwoAddressInstructionPassID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -291,11 +294,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
   assert(VirtReg && "Spilling a physical register is illegal!"
          " Must not have appropriate kill for the register or use exists beyond"
          " the intended one.");
-  DOUT << "  Spilling register " << TRI->getName(PhysReg)
-       << " containing %reg" << VirtReg;
+  DEBUG(errs() << "  Spilling register " << TRI->getName(PhysReg)
+               << " containing %reg" << VirtReg);
   
   if (!isVirtRegModified(VirtReg)) {
-    DOUT << " which has not been modified, so no store necessary!";
+    DEBUG(errs() << " which has not been modified, so no store necessary!");
     std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
     if (LastUse.first)
       LastUse.first->getOperand(LastUse.second).setIsKill();
@@ -305,7 +308,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
     // modified.
     const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DOUT << " to stack slot #" << FrameIndex;
+    DEBUG(errs() << " to stack slot #" << FrameIndex);
     // If the instruction reads the register that's spilled, (e.g. this can
     // happen if it is a move to a physical register), then the spill
     // instruction is not a kill.
@@ -316,7 +319,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
 
   getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
 
-  DOUT << "\n";
+  DEBUG(errs() << '\n');
   removePhysReg(PhysReg);
 }
 
@@ -505,8 +508,8 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
 
   markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
 
-  DOUT << "  Reloading %reg" << VirtReg << " into "
-       << TRI->getName(PhysReg) << "\n";
+  DEBUG(errs() << "  Reloading %reg" << VirtReg << " into "
+               << TRI->getName(PhysReg) << "\n");
 
   // Add move instruction(s)
   TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
@@ -517,24 +520,28 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
   getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
 
   if (!ReloadedRegs.insert(PhysReg)) {
-    cerr << "Ran out of registers during register allocation!\n";
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
     if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
-      cerr << "Please check your inline asm statement for invalid "
+      Msg << "\nPlease check your inline asm statement for invalid "
            << "constraints:\n";
-      MI->print(cerr.stream(), TM);
+      MI->print(Msg, TM);
     }
-    exit(1);
+    llvm_report_error(Msg.str());
   }
   for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
        *SubRegs; ++SubRegs) {
     if (!ReloadedRegs.insert(*SubRegs)) {
-      cerr << "Ran out of registers during register allocation!\n";
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Ran out of registers during register allocation!";
       if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
-        cerr << "Please check your inline asm statement for invalid "
+        Msg << "\nPlease check your inline asm statement for invalid "
              << "constraints:\n";
-        MI->print(cerr.stream(), TM);
+        MI->print(Msg, TM);
       }
-      exit(1);
+      llvm_report_error(Msg.str());
     }
   }
 
@@ -707,8 +714,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
   // loop over each instruction
   MachineBasicBlock::iterator MII = MBB.begin();
   
-  DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
-        if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+  DEBUG({
+      const BasicBlock *LBB = MBB.getBasicBlock();
+      if (LBB)
+        errs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+    });
 
   // Add live-in registers as active.
   for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
@@ -733,13 +743,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
   while (MII != MBB.end()) {
     MachineInstr *MI = MII++;
     const TargetInstrDesc &TID = MI->getDesc();
-    DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI;
-          DOUT << "  Regs have values: ";
-          for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
-            if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
-               DOUT << "[" << TRI->getName(i)
-                    << ",%reg" << PhysRegsUsed[i] << "] ";
-          DOUT << "\n");
+    DEBUG({
+        errs() << "\nStarting RegAlloc of: " << *MI;
+        errs() << "  Regs have values: ";
+        for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+            errs() << "[" << TRI->getName(i)
+                   << ",%reg" << PhysRegsUsed[i] << "] ";
+        errs() << '\n';
+      });
 
     // Loop over the implicit uses, making sure that they are at the head of the
     // use order list, so they don't get reallocated.
@@ -783,8 +795,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
             markVirtRegModified(DestVirtReg);
             getVirtRegLastUse(DestVirtReg) =
                    std::make_pair((MachineInstr*)0, 0);
-            DOUT << "  Assigning " << TRI->getName(DestPhysReg)
-                 << " to %reg" << DestVirtReg << "\n";
+            DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+                         << " to %reg" << DestVirtReg << "\n");
             MO.setReg(DestPhysReg);  // Assign the earlyclobber register
           } else {
             unsigned Reg = MO.getReg();
@@ -849,15 +861,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DOUT << "  Last use of " << TRI->getName(PhysReg)
-             << "[%reg" << VirtReg <<"], removing it from live set\n";
+        DEBUG(errs() << "  Last use of " << TRI->getName(PhysReg)
+                     << "[%reg" << VirtReg <<"], removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
              *SubRegs; ++SubRegs) {
           if (PhysRegsUsed[*SubRegs] != -2) {
-            DOUT  << "  Last use of "
-                  << TRI->getName(*SubRegs)
-                  << "[%reg" << VirtReg <<"], removing it from live set\n";
+            DEBUG(errs()  << "  Last use of "
+                          << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+                          <<"], removing it from live set\n");
             removePhysReg(*SubRegs);
           }
         }
@@ -942,8 +954,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
         MF->getRegInfo().setPhysRegUsed(DestPhysReg);
         markVirtRegModified(DestVirtReg);
         getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
-        DOUT << "  Assigning " << TRI->getName(DestPhysReg)
-             << " to %reg" << DestVirtReg << "\n";
+        DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+                     << " to %reg" << DestVirtReg << "\n");
         MO.setReg(DestPhysReg);  // Assign the output register
       }
     }
@@ -965,16 +977,16 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DOUT  << "  Register " << TRI->getName(PhysReg)
-              << " [%reg" << VirtReg
-              << "] is never used, removing it from live set\n";
+        DEBUG(errs()  << "  Register " << TRI->getName(PhysReg)
+                      << " [%reg" << VirtReg
+                      << "] is never used, removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
              *AliasSet; ++AliasSet) {
           if (PhysRegsUsed[*AliasSet] != -2) {
-            DOUT  << "  Register " << TRI->getName(*AliasSet)
-                  << " [%reg" << *AliasSet
-                  << "] is never used, removing it from live set\n";
+            DEBUG(errs()  << "  Register " << TRI->getName(*AliasSet)
+                          << " [%reg" << *AliasSet
+                          << "] is never used, removing it from live set\n");
             removePhysReg(*AliasSet);
           }
         }
@@ -1022,7 +1034,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
-  DOUT << "Machine Function " << "\n";
+  DEBUG(errs() << "Machine Function\n");
   MF = &Fn;
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 89e2c59fe805..bee5d931319e 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,7 +31,9 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "PBQP.h"
+#include "PBQP/HeuristicSolver.h"
+#include "PBQP/SimpleGraph.h"
+#include "PBQP/Heuristics/Briggs.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -42,6 +44,7 @@
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <limits>
@@ -53,32 +56,38 @@
 using namespace llvm;
 
 static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator",
-                     createPBQPRegisterAllocator);
+registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
+                      llvm::createPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+               cl::desc("Attempt coalescing during PBQP register allocation."),
+               cl::init(false), cl::Hidden);
 
 namespace {
 
-  //!
-  //! PBQP based allocators solve the register allocation problem by mapping
-  //! register allocation problems to Partitioned Boolean Quadratic
-  //! Programming problems.
+  ///
+  /// PBQP based allocators solve the register allocation problem by mapping
+  /// register allocation problems to Partitioned Boolean Quadratic
+  /// Programming problems.
   class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass {
   public:
 
     static char ID;
 
-    //! Construct a PBQP register allocator.
-    PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {}
+    /// Construct a PBQP register allocator.
+    PBQPRegAlloc() : MachineFunctionPass(&ID) {}
 
-    //! Return the pass name.
-    virtual const char* getPassName() const throw() {
+    /// Return the pass name.
+    virtual const char* getPassName() const {
       return "PBQP Register Allocator";
     }
 
-    //! PBQP analysis usage.
+    /// PBQP analysis usage.
     virtual void getAnalysisUsage(AnalysisUsage &au) const {
       au.addRequired<LiveIntervals>();
-      au.addRequiredTransitive<RegisterCoalescer>();
+      //au.addRequiredID(SplitCriticalEdgesID);
+      au.addRequired<RegisterCoalescer>();
       au.addRequired<LiveStacks>();
       au.addPreserved<LiveStacks>();
       au.addRequired<MachineLoopInfo>();
@@ -87,7 +96,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(au);
     }
 
-    //! Perform register allocation
+    /// Perform register allocation
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
   private:
@@ -97,7 +106,7 @@ namespace {
     typedef std::vector<AllowedSet> AllowedSetMap;
     typedef std::set<unsigned> RegSet;
     typedef std::pair<unsigned, unsigned> RegPair;
-    typedef std::map<RegPair, PBQPNum> CoalesceMap;
+    typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
 
     typedef std::set<LiveInterval*> LiveIntervalSet;
 
@@ -119,60 +128,60 @@ namespace {
                     emptyVRegIntervals;
 
 
-    //! Builds a PBQP cost vector.
+    /// Builds a PBQP cost vector.
     template <typename RegContainer>
-    PBQPVector* buildCostVector(unsigned vReg,
-                                const RegContainer &allowed,
-                                const CoalesceMap &cealesces,
-                                PBQPNum spillCost) const;
-
-    //! \brief Builds a PBQP interference matrix.
-    //!
-    //! @return Either a pointer to a non-zero PBQP matrix representing the
-    //!         allocation option costs, or a null pointer for a zero matrix.
-    //!
-    //! Expects allowed sets for two interfering LiveIntervals. These allowed
-    //! sets should contain only allocable registers from the LiveInterval's
-    //! register class, with any interfering pre-colored registers removed.
+    PBQP::Vector buildCostVector(unsigned vReg,
+                                 const RegContainer &allowed,
+                                 const CoalesceMap &cealesces,
+                                 PBQP::PBQPNum spillCost) const;
+
+    /// \brief Builds a PBQP interference matrix.
+    ///
+    /// @return Either a pointer to a non-zero PBQP matrix representing the
+    ///         allocation option costs, or a null pointer for a zero matrix.
+    ///
+    /// Expects allowed sets for two interfering LiveIntervals. These allowed
+    /// sets should contain only allocable registers from the LiveInterval's
+    /// register class, with any interfering pre-colored registers removed.
     template <typename RegContainer>
-    PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1,
-                                        const RegContainer &allowed2) const;
-
-    //!
-    //! Expects allowed sets for two potentially coalescable LiveIntervals,
-    //! and an estimated benefit due to coalescing. The allowed sets should
-    //! contain only allocable registers from the LiveInterval's register
-    //! classes, with any interfering pre-colored registers removed.
+    PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
+                                          const RegContainer &allowed2) const;
+
+    ///
+    /// Expects allowed sets for two potentially coalescable LiveIntervals,
+    /// and an estimated benefit due to coalescing. The allowed sets should
+    /// contain only allocable registers from the LiveInterval's register
+    /// classes, with any interfering pre-colored registers removed.
     template <typename RegContainer>
-    PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1,
-                                      const RegContainer &allowed2,
-                                      PBQPNum cBenefit) const;
-
-    //! \brief Finds coalescing opportunities and returns them as a map.
-    //!
-    //! Any entries in the map are guaranteed coalescable, even if their
-    //! corresponding live intervals overlap.
+    PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
+                                        const RegContainer &allowed2,
+                                        PBQP::PBQPNum cBenefit) const;
+
+    /// \brief Finds coalescing opportunities and returns them as a map.
+    ///
+    /// Any entries in the map are guaranteed coalescable, even if their
+    /// corresponding live intervals overlap.
     CoalesceMap findCoalesces();
 
-    //! \brief Finds the initial set of vreg intervals to allocate.
+    /// \brief Finds the initial set of vreg intervals to allocate.
     void findVRegIntervalsToAlloc();
 
-    //! \brief Constructs a PBQP problem representation of the register
-    //! allocation problem for this function.
-    //!
-    //! @return a PBQP solver object for the register allocation problem.
-    pbqp* constructPBQPProblem();
+    /// \brief Constructs a PBQP problem representation of the register
+    /// allocation problem for this function.
+    ///
+    /// @return a PBQP solver object for the register allocation problem.
+    PBQP::SimpleGraph constructPBQPProblem();
 
-    //! \brief Adds a stack interval if the given live interval has been
-    //! spilled. Used to support stack slot coloring.
+    /// \brief Adds a stack interval if the given live interval has been
+    /// spilled. Used to support stack slot coloring.
     void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
 
-    //! \brief Given a solved PBQP problem maps this solution back to a register
-    //! assignment.
-    bool mapPBQPToRegAlloc(pbqp *problem);
+    /// \brief Given a solved PBQP problem maps this solution back to a register
+    /// assignment.
+    bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
 
-    //! \brief Postprocessing before final spilling. Sets basic block "live in"
-    //! variables.
+    /// \brief Postprocessing before final spilling. Sets basic block "live in"
+    /// variables.
     void finalizeAlloc() const;
 
   };
@@ -182,17 +191,17 @@ namespace {
 
 
 template <typename RegContainer>
-PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg,
-                                          const RegContainer &allowed,
-                                          const CoalesceMap &coalesces,
-                                          PBQPNum spillCost) const {
+PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
+                                           const RegContainer &allowed,
+                                           const CoalesceMap &coalesces,
+                                           PBQP::PBQPNum spillCost) const {
 
   typedef typename RegContainer::const_iterator AllowedItr;
 
   // Allocate vector. Additional element (0th) used for spill option
-  PBQPVector *v = new PBQPVector(allowed.size() + 1);
+  PBQP::Vector v(allowed.size() + 1, 0);
 
-  (*v)[0] = spillCost;
+  v[0] = spillCost;
 
   // Iterate over the allowed registers inserting coalesce benefits if there
   // are any.
@@ -210,14 +219,14 @@ PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg,
       continue;
 
     // We have a coalesce - insert the benefit.
-    (*v)[ai + 1] = -cmItr->second;
+    v[ai + 1] = -cmItr->second;
   }
 
   return v;
 }
 
 template <typename RegContainer>
-PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
+PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
       const RegContainer &allowed1, const RegContainer &allowed2) const {
 
   typedef typename RegContainer::const_iterator RegContainerIterator;
@@ -230,7 +239,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
   // that the spill option (element 0,0) has zero cost, since we can allocate
   // both intervals to memory safely (the cost for each individual allocation
   // to memory is accounted for by the cost vectors for each live interval).
-  PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+  PBQP::Matrix *m =
+    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
 
   // Assume this is a zero matrix until proven otherwise.  Zero matrices occur
   // between interfering live ranges with non-overlapping register sets (e.g.
@@ -259,8 +269,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
       unsigned reg2 = *a2Itr;
 
       // If the row/column regs are identical or alias insert an infinity.
-      if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) {
-        (*m)[ri][ci] = std::numeric_limits<PBQPNum>::infinity();
+      if (tri->regsOverlap(reg1, reg2)) {
+        (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
         isZeroMatrix = false;
       }
 
@@ -282,9 +292,9 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
 }
 
 template <typename RegContainer>
-PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix(
+PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
       const RegContainer &allowed1, const RegContainer &allowed2,
-      PBQPNum cBenefit) const {
+      PBQP::PBQPNum cBenefit) const {
 
   typedef typename RegContainer::const_iterator RegContainerIterator;
 
@@ -293,7 +303,8 @@ PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix(
   // for the LiveIntervals which are (potentially) to be coalesced. The amount
   // -cBenefit will be placed in any element representing the same register
   // for both intervals.
-  PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+  PBQP::Matrix *m =
+    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
 
   // Reset costs to zero.
   m->reset(0);
@@ -442,7 +453,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
                vniItr != vniEnd; ++vniItr) {
 
           // We want to make sure we skip the copy instruction itself.
-          if ((*vniItr)->copy == instr)
+          if ((*vniItr)->getCopy() == instr)
             continue;
 
           if (srcLI->liveAt((*vniItr)->def)) {
@@ -495,10 +506,11 @@ void PBQPRegAlloc::findVRegIntervalsToAlloc() {
   }
 }
 
-pbqp* PBQPRegAlloc::constructPBQPProblem() {
+PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() {
 
   typedef std::vector<const LiveInterval*> LIVector;
   typedef std::vector<unsigned> RegVector;
+  typedef std::vector<PBQP::SimpleGraph::NodeIterator> NodeVector;
 
   // This will store the physical intervals for easy reference.
   LIVector physIntervals;
@@ -530,10 +542,15 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
   }
 
   // Get the set of potential coalesces.
-  CoalesceMap coalesces(findCoalesces());
+  CoalesceMap coalesces;
+
+  if (pbqpCoalescing) {
+    coalesces = findCoalesces();
+  }
 
   // Construct a PBQP solver for this problem
-  pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size());
+  PBQP::SimpleGraph problem;
+  NodeVector problemNodes(vregIntervalsToAlloc.size());
 
   // Resize allowedSets container appropriately.
   allowedSets.resize(vregIntervalsToAlloc.size());
@@ -594,13 +611,13 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
 
     // Set the spill cost to the interval weight, or epsilon if the
     // interval weight is zero
-    PBQPNum spillCost = (li->weight != 0.0) ?
-        li->weight : std::numeric_limits<PBQPNum>::min();
+    PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
+        li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
 
     // Build a cost vector for this interval.
-    add_pbqp_nodecosts(solver, node,
-                       buildCostVector(li->reg, allowedSets[node], coalesces,
-                                       spillCost));
+    problemNodes[node] =
+      problem.addNode(
+        buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
 
   }
 
@@ -616,7 +633,7 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
       CoalesceMap::const_iterator cmItr =
         coalesces.find(RegPair(li->reg, li2->reg));
 
-      PBQPMatrix *m = 0;
+      PBQP::Matrix *m = 0;
 
       if (cmItr != coalesces.end()) {
         m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
@@ -627,14 +644,29 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() {
       }
 
       if (m != 0) {
-        add_pbqp_edgecosts(solver, node1, node2, m);
+        problem.addEdge(problemNodes[node1],
+                        problemNodes[node2],
+                        *m);
+
         delete m;
       }
     }
   }
 
+  problem.assignNodeIDs();
+
+  assert(problem.getNumNodes() == allowedSets.size());
+  for (unsigned i = 0; i < allowedSets.size(); ++i) {
+    assert(problem.getNodeItr(i) == problemNodes[i]);
+  }
+/*
+  std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
+            << problem.getNumEdges() << " edges.\n";
+
+  problem.printDot(std::cerr);
+*/
   // We're done, PBQP problem constructed - return it.
-  return solver;
+  return problem;
 }
 
 void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
@@ -651,14 +683,14 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
   if (stackInterval.getNumValNums() != 0)
     vni = stackInterval.getValNumInfo(0);
   else
-    vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator());
+    vni = stackInterval.getNextValue(
+      LiveIndex(), 0, false, lss->getVNInfoAllocator());
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
 }
 
-bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
-
+bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
   // Set to true if we have any spills
   bool anotherRoundNeeded = false;
 
@@ -668,14 +700,16 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
   // Iterate over the nodes mapping the PBQP solution to a register assignment.
   for (unsigned node = 0; node < node2LI.size(); ++node) {
     unsigned virtReg = node2LI[node]->reg,
-             allocSelection = get_pbqp_solution(problem, node);
+             allocSelection = solution.getSelection(node);
+
 
     // If the PBQP solution is non-zero it's a physical register...
     if (allocSelection != 0) {
       // Get the physical reg, subtracting 1 to account for the spill option.
       unsigned physReg = allowedSets[node][allocSelection - 1];
 
-      DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n";
+      DEBUG(errs() << "VREG " << virtReg << " -> "
+                   << tri->getName(physReg) << "\n");
 
       assert(physReg != 0);
 
@@ -697,8 +731,9 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
         lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
       addStackInterval(spillInterval, mri);
 
-      DOUT << "VREG " << virtReg << " -> SPILLED (Cost: "
-           << oldSpillWeight << ", New vregs: ";
+      (void) oldSpillWeight;
+      DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: "
+                   << oldSpillWeight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
       // allocate.
@@ -708,12 +743,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
 
         assert(!(*itr)->empty() && "Empty spill range.");
 
-        DOUT << (*itr)->reg << " ";
+        DEBUG(errs() << (*itr)->reg << " ");
 
         vregIntervalsToAlloc.insert(*itr);
       }
 
-      DOUT << ")\n";
+      DEBUG(errs() << ")\n");
 
       // We need another round if spill intervals were added.
       anotherRoundNeeded |= !newSpills.empty();
@@ -734,6 +769,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
     LiveInterval *li = *itr;
 
     unsigned physReg = vrm->getRegAllocPref(li->reg);
+
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
       physReg = *liRC->allocation_order_begin(*mf);
@@ -764,8 +800,8 @@ void PBQPRegAlloc::finalizeAlloc() const {
       continue;
     }
 
-    // Ignore unallocated vregs:
     if (reg == 0) {
+      // Filter out zero regs - they're for intervals that were spilled.
       continue;
     }
 
@@ -804,7 +840,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   vrm = &getAnalysis<VirtRegMap>();
 
-  DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n";
+  DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
 
   // Allocator main loop:
   //
@@ -829,15 +865,14 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     unsigned round = 0;
 
     while (!pbqpAllocComplete) {
-      DOUT << "  PBQP Regalloc round " << round << ":\n";
-
-      pbqp *problem = constructPBQPProblem();
-
-      solve_pbqp(problem);
+      DEBUG(errs() << "  PBQP Regalloc round " << round << ":\n");
 
-      pbqpAllocComplete = mapPBQPToRegAlloc(problem);
+      PBQP::SimpleGraph problem = constructPBQPProblem();
+      PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver;
+      problem.assignNodeIDs();
+      PBQP::Solution solution = solver.solve(problem);
 
-      free_pbqp(problem);
+      pbqpAllocComplete = mapPBQPToRegAlloc(solution);
 
       ++round;
     }
@@ -852,7 +887,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   node2LI.clear();
   allowedSets.clear();
 
-  DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n";
+  DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
   // Run rewriter
   std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index d7fe7a2d5454..5f1c4e2594c2 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,46 +16,21 @@
 
 #define DEBUG_TYPE "reg-scavenging"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
-/// RedefinesSuperRegPart - Return true if the specified register is redefining
-/// part of a super-register.
-static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg,
-                                  const TargetRegisterInfo *TRI) {
-  bool SeenSuperUse = false;
-  bool SeenSuperDef = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isUndef())
-      continue;
-    if (TRI->isSuperRegister(SubReg, MO.getReg())) {
-      if (MO.isUse())
-        SeenSuperUse = true;
-      else if (MO.isImplicit())
-        SeenSuperDef = true;
-    }
-  }
-
-  return SeenSuperDef && SeenSuperUse;
-}
-
-static bool RedefinesSuperRegPart(const MachineInstr *MI,
-                                  const MachineOperand &MO,
-                                  const TargetRegisterInfo *TRI) {
-  assert(MO.isReg() && MO.isDef() && "Not a register def!");
-  return RedefinesSuperRegPart(MI, MO.getReg(), TRI);
-}
-
 /// setUsed - Set the register and its sub-registers as being used.
 void RegScavenger::setUsed(unsigned Reg) {
   RegsAvailable.reset(Reg);
@@ -65,14 +40,38 @@ void RegScavenger::setUsed(unsigned Reg) {
     RegsAvailable.reset(SubReg);
 }
 
-/// setUnused - Set the register and its sub-registers as being unused.
-void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) {
-  RegsAvailable.set(Reg);
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+  if (isUsed(Reg))
+    return true;
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+    if (isUsed(*R))
+      return true;
+  return false;
+}
 
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-       unsigned SubReg = *SubRegs; ++SubRegs)
-    if (!RedefinesSuperRegPart(MI, Reg, TRI))
-      RegsAvailable.set(SubReg);
+void RegScavenger::initRegState() {
+  ScavengedReg = 0;
+  ScavengedRC = NULL;
+  ScavengeRestore = NULL;
+
+  // All registers started out unused.
+  RegsAvailable.set();
+
+  // Reserved registers are always used.
+  RegsAvailable ^= ReservedRegs;
+
+  if (!MBB)
+    return;
+
+  // Live-in registers are in use.
+  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    setUsed(*I);
+
+  // Pristine CSRs are also unavailable.
+  BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+    setUsed(I);
 }
 
 void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
@@ -85,6 +84,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
   assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
          "Target changed?");
 
+  // Self-initialize.
   if (!MBB) {
     NumPhysRegs = TRI->getNumRegs();
     RegsAvailable.resize(NumPhysRegs);
@@ -100,73 +100,26 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
         CalleeSavedRegs.set(CSRegs[i]);
   }
 
-  MBB = mbb;
-  ScavengedReg = 0;
-  ScavengedRC = NULL;
-  ScavengeRestore = NULL;
-  CurrDist = 0;
-  DistanceMap.clear();
-
-  // All registers started out unused.
-  RegsAvailable.set();
-
-  // Reserved registers are always used.
-  RegsAvailable ^= ReservedRegs;
-
-  // Live-in registers are in use.
-  if (!MBB->livein_empty())
-    for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
-           E = MBB->livein_end(); I != E; ++I)
-      setUsed(*I);
+  // RS used within emit{Pro,Epi}logue()
+  if (mbb != MBB) {
+    MBB = mbb;
+    initRegState();
+  }
 
   Tracking = false;
 }
 
-void RegScavenger::restoreScavengedReg() {
-  TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg,
-                            ScavengingFrameIndex, ScavengedRC);
-  MachineBasicBlock::iterator II = prior(MBBI);
-  TRI->eliminateFrameIndex(II, 0, this);
-  setUsed(ScavengedReg);
-  ScavengedReg = 0;
-  ScavengedRC = NULL;
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+    BV.set(*R);
 }
 
-#ifndef NDEBUG
-/// isLiveInButUnusedBefore - Return true if register is livein the MBB not
-/// not used before it reaches the MI that defines register.
-static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI,
-                                    MachineBasicBlock *MBB,
-                                    const TargetRegisterInfo *TRI,
-                                    MachineRegisterInfo* MRI) {
-  // First check if register is livein.
-  bool isLiveIn = false;
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
-         E = MBB->livein_end(); I != E; ++I)
-    if (Reg == *I || TRI->isSuperRegister(Reg, *I)) {
-      isLiveIn = true;
-      break;
-    }
-  if (!isLiveIn)
-    return false;
-
-  // Is there any use of it before the specified MI?
-  SmallPtrSet<MachineInstr*, 4> UsesInMBB;
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI->getParent() == MBB)
-      UsesInMBB.insert(UseMI);
-  }
-  if (UsesInMBB.empty())
-    return true;
-
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I)
-    if (UsesInMBB.count(&*I))
-      return false;
-  return true;
+void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+    BV.set(*R);
 }
-#endif
 
 void RegScavenger::forward() {
   // Move ptr forward.
@@ -179,7 +132,6 @@ void RegScavenger::forward() {
   }
 
   MachineInstr *MI = MBBI;
-  DistanceMap.insert(std::make_pair(MI, CurrDist++));
 
   if (MI == ScavengeRestore) {
     ScavengedReg = 0;
@@ -187,153 +139,63 @@ void RegScavenger::forward() {
     ScavengeRestore = NULL;
   }
 
-#if 0
-  if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-    return;
-#endif
-
-  // Separate register operands into 3 classes: uses, defs, earlyclobbers.
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+  // Find out which registers are early clobbered, killed, defined, and marked
+  // def-dead in this instruction.
+  BitVector EarlyClobberRegs(NumPhysRegs);
+  BitVector KillRegs(NumPhysRegs);
+  BitVector DefRegs(NumPhysRegs);
+  BitVector DeadRegs(NumPhysRegs);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
+    if (!MO.isReg() || MO.isUndef())
       continue;
-    if (MO.isUse())
-      UseMOs.push_back(std::make_pair(&MO,i));
-    else if (MO.isEarlyClobber())
-      EarlyClobberMOs.push_back(std::make_pair(&MO,i));
-    else
-      DefMOs.push_back(std::make_pair(&MO,i));
-  }
-
-  // Process uses first.
-  BitVector KillRegs(NumPhysRegs);
-  for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
-    const MachineOperand MO = *UseMOs[i].first;
     unsigned Reg = MO.getReg();
-
-    assert(isUsed(Reg) && "Using an undefined register!");
-
-    if (MO.isKill() && !isReserved(Reg)) {
-      KillRegs.set(Reg);
-
-      // Mark sub-registers as used.
-      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-           unsigned SubReg = *SubRegs; ++SubRegs)
-        KillRegs.set(SubReg);
-    }
-  }
-
-  // Change states of all registers after all the uses are processed to guard
-  // against multiple uses.
-  setUnused(KillRegs);
-
-  // Process early clobber defs then process defs. We can have a early clobber
-  // that is dead, it should not conflict with a def that happens one "slot"
-  // (see InstrSlots in LiveIntervalAnalysis.h) later.
-  unsigned NumECs = EarlyClobberMOs.size();
-  unsigned NumDefs = DefMOs.size();
-
-  for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
-    const MachineOperand &MO = (i < NumECs)
-      ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first;
-    unsigned Idx = (i < NumECs)
-      ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second;
-    unsigned Reg = MO.getReg();
-    if (MO.isUndef())
+    if (!Reg || isReserved(Reg))
       continue;
 
-    // If it's dead upon def, then it is now free.
-    if (MO.isDead()) {
-      setUnused(Reg, MI);
-      continue;
-    }
-
-    // Skip two-address destination operand.
-    unsigned UseIdx;
-    if (MI->isRegTiedToUseOperand(Idx, &UseIdx) &&
-        !MI->getOperand(UseIdx).isUndef()) {
-      assert(isUsed(Reg) && "Using an undefined register!");
-      continue;
+    if (MO.isUse()) {
+      // Two-address operands implicitly kill.
+      if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+        addRegWithSubRegs(KillRegs, Reg);
+    } else {
+      assert(MO.isDef());
+      if (MO.isDead())
+        addRegWithSubRegs(DeadRegs, Reg);
+      else
+        addRegWithSubRegs(DefRegs, Reg);
+      if (MO.isEarlyClobber())
+        addRegWithAliases(EarlyClobberRegs, Reg);
     }
-
-    // Skip if this is merely redefining part of a super-register.
-    if (RedefinesSuperRegPart(MI, MO, TRI))
-      continue;
-
-    // Implicit def is allowed to "re-define" any register. Similarly,
-    // implicitly defined registers can be clobbered.
-    assert((isReserved(Reg) || isUnused(Reg) ||
-            isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
-           "Re-defining a live register!");
-    setUsed(Reg);
   }
-}
-
-void RegScavenger::backward() {
-  assert(Tracking && "Not tracking states!");
-  assert(MBBI != MBB->begin() && "Already at start of basic block!");
-  // Move ptr backward.
-  MBBI = prior(MBBI);
-
-  MachineInstr *MI = MBBI;
-  DistanceMap.erase(MI);
-  --CurrDist;
 
-  // Separate register operands into 3 classes: uses, defs, earlyclobbers.
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
-  SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+  // Verify uses and defs.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
-      continue;
-    if (MO.isUse())
-      UseMOs.push_back(std::make_pair(&MO,i));
-    else if (MO.isEarlyClobber())
-      EarlyClobberMOs.push_back(std::make_pair(&MO,i));
-    else
-      DefMOs.push_back(std::make_pair(&MO,i));
-  }
-
-
-  // Process defs first.
-  unsigned NumECs = EarlyClobberMOs.size();
-  unsigned NumDefs = DefMOs.size();
-  for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
-    const MachineOperand &MO = (i < NumDefs)
-      ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first;
-    unsigned Idx = (i < NumECs)
-      ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second;
-    if (MO.isUndef())
-      continue;
-
-    // Skip two-address destination operand.
-    if (MI->isRegTiedToUseOperand(Idx))
+    if (!MO.isReg() || MO.isUndef())
       continue;
-
     unsigned Reg = MO.getReg();
-    assert(isUsed(Reg));
-    if (!isReserved(Reg))
-      setUnused(Reg, MI);
+    if (!Reg || isReserved(Reg))
+      continue;
+    if (MO.isUse()) {
+      assert(isUsed(Reg) && "Using an undefined register!");
+      assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
+             "Using an early clobbered register!");
+    } else {
+      assert(MO.isDef());
+#if 0
+      // FIXME: Enable this once we've figured out how to correctly transfer
+      // implicit kills during codegen passes like the coalescer.
+      assert((KillRegs.test(Reg) || isUnused(Reg) ||
+              isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+             "Re-defining a live register!");
+#endif
+    }
   }
 
-  // Process uses.
-  BitVector UseRegs(NumPhysRegs);
-  for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
-    const MachineOperand MO = *UseMOs[i].first;
-    unsigned Reg = MO.getReg();
-    assert(isUnused(Reg) || isReserved(Reg));
-    UseRegs.set(Reg);
-
-    // Set the sub-registers as "used".
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-         unsigned SubReg = *SubRegs; ++SubRegs)
-      UseRegs.set(SubReg);
-  }
-  setUsed(UseRegs);
+  // Commit the changes.
+  setUnused(KillRegs);
+  setUnused(DeadRegs);
+  setUsed(DefRegs);
 }
 
 void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
@@ -351,129 +213,110 @@ static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
     Mask.set(*I);
 }
 
-unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
-                                     const BitVector &Candidates) const {
-  // Mask off the registers which are not in the TargetRegisterClass.
-  BitVector RegsAvailableCopy(NumPhysRegs, false);
-  CreateRegClassMask(RegClass, RegsAvailableCopy);
-  RegsAvailableCopy &= RegsAvailable;
-
-  // Restrict the search to candidates.
-  RegsAvailableCopy &= Candidates;
-
-  // Returns the first unused (bit is set) register, or 0 is none is found.
-  int Reg = RegsAvailableCopy.find_first();
-  return (Reg == -1) ? 0 : Reg;
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I)
+    if (!isAliasUsed(*I))
+      return *I;
+  return 0;
 }
 
-unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
-                                     bool ExCalleeSaved) const {
-  // Mask off the registers which are not in the TargetRegisterClass.
-  BitVector RegsAvailableCopy(NumPhysRegs, false);
-  CreateRegClassMask(RegClass, RegsAvailableCopy);
-  RegsAvailableCopy &= RegsAvailable;
-
-  // If looking for a non-callee-saved register, mask off all the callee-saved
-  // registers.
-  if (ExCalleeSaved)
-    RegsAvailableCopy &= ~CalleeSavedRegs;
-
-  // Returns the first unused (bit is set) register, or 0 is none is found.
-  int Reg = RegsAvailableCopy.find_first();
-  return (Reg == -1) ? 0 : Reg;
-}
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after MBBI. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI,
+                                       BitVector &Candidates,
+                                       unsigned InstrLimit,
+                                       MachineBasicBlock::iterator &UseMI) {
+  int Survivor = Candidates.find_first();
+  assert(Survivor > 0 && "No candidates for scavenging");
+
+  MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+  assert(MI != ME && "MI already at terminator");
+
+  for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+    // Remove any candidates touched by instruction.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      Candidates.reset(MO.getReg());
+      for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+        Candidates.reset(*R);
+    }
 
-/// findFirstUse - Calculate the distance to the first use of the
-/// specified register.
-MachineInstr*
-RegScavenger::findFirstUse(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator I, unsigned Reg,
-                           unsigned &Dist) {
-  MachineInstr *UseMI = 0;
-  Dist = ~0U;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineInstr *UDMI = &*RI;
-    if (UDMI->getParent() != MBB)
+    // Was our survivor untouched by this instruction?
+    if (Candidates.test(Survivor))
       continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-    if (DI == DistanceMap.end()) {
-      // If it's not in map, it's below current MI, let's initialize the
-      // map.
-      I = next(I);
-      unsigned Dist = CurrDist + 1;
-      while (I != MBB->end()) {
-        DistanceMap.insert(std::make_pair(I, Dist++));
-        I = next(I);
-      }
-    }
-    DI = DistanceMap.find(UDMI);
-    if (DI->second > CurrDist && DI->second < Dist) {
-      Dist = DI->second;
-      UseMI = UDMI;
-    }
+
+    // All candidates gone?
+    if (Candidates.none())
+      break;
+
+    Survivor = Candidates.find_first();
   }
-  return UseMI;
+
+  // We ran out of candidates, so stop the search.
+  UseMI = MI;
+  return Survivor;
 }
 
 unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
                                         MachineBasicBlock::iterator I,
                                         int SPAdj) {
-  assert(ScavengingFrameIndex >= 0 &&
-         "Cannot scavenge a register without an emergency spill slot!");
-
   // Mask off the registers which are not in the TargetRegisterClass.
   BitVector Candidates(NumPhysRegs, false);
   CreateRegClassMask(RC, Candidates);
-  Candidates ^= ReservedRegs;  // Do not include reserved registers.
+  // Do not include reserved registers.
+  Candidates ^= ReservedRegs & Candidates;
 
   // Exclude all the registers being used by the instruction.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = I->getOperand(i);
-    if (MO.isReg())
+    if (MO.isReg() && MO.getReg() != 0 &&
+        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       Candidates.reset(MO.getReg());
   }
 
   // Find the register whose use is furthest away.
-  unsigned SReg = 0;
-  unsigned MaxDist = 0;
-  MachineInstr *MaxUseMI = 0;
-  int Reg = Candidates.find_first();
-  while (Reg != -1) {
-    unsigned Dist;
-    MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist);
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
-      unsigned AsDist;
-      MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist);
-      if (AsDist < Dist) {
-        Dist = AsDist;
-        UseMI = AsUseMI;
-      }
-    }
-    if (Dist >= MaxDist) {
-      MaxDist = Dist;
-      MaxUseMI = UseMI;
-      SReg = Reg;
-    }
-    Reg = Candidates.find_next(Reg);
-  }
+  MachineBasicBlock::iterator UseMI;
+  unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
 
-  if (ScavengedReg != 0) {
-    assert(0 && "Scavenger slot is live, unable to scavenge another register!");
-    abort();
-  }
+  // If we found an unused register there is no reason to spill it. We have
+  // probably found a callee-saved register that has been saved in the
+  // prologue, but happens to be unused at this point.
+  if (!isAliasUsed(SReg))
+    return SReg;
 
-  // Spill the scavenged register before I.
-  TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
-  MachineBasicBlock::iterator II = prior(I);
-  TRI->eliminateFrameIndex(II, SPAdj, this);
+  assert(ScavengedReg == 0 &&
+         "Scavenger slot is live, unable to scavenge another register!");
 
-  // Restore the scavenged register before its use (or first terminator).
-  II = MaxUseMI
-    ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator();
-  TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC);
-  ScavengeRestore = prior(II);
+  // Avoid infinite regress
   ScavengedReg = SReg;
+
+  // If the target knows how to save/restore the register, let it do so;
+  // otherwise, use the emergency stack spill slot.
+  if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) {
+    // Spill the scavenged register before I.
+    assert(ScavengingFrameIndex >= 0 &&
+           "Cannot scavenge register without an emergency spill slot!");
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+    MachineBasicBlock::iterator II = prior(I);
+    TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+
+    // Restore the scavenged register before its use (or first terminator).
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC);
+  } else
+    TRI->restoreScavengerRegister(*MBB, UseMI, RC, SReg);
+
+  ScavengeRestore = prior(UseMI);
+
+  // Doing this here leads to infinite regress.
+  // ScavengedReg = SReg;
   ScavengedRC = RC;
 
   return SReg;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index a8452dff272b..5a59862090b1 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
@@ -40,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const {
     if (SUnit *SU = Sequence[i])
       SU->dump(this);
     else
-      cerr << "**** NOOP ****\n";
+      errs() << "**** NOOP ****\n";
   }
 }
 
@@ -59,9 +60,11 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
 
   Schedule();
 
-  DOUT << "*** Final schedule ***\n";
-  DEBUG(dumpSchedule());
-  DOUT << "\n";
+  DEBUG({
+      errs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      errs() << '\n';
+    });
 }
 
 /// addPred - This adds the specified edge as a pred of the current node if
@@ -79,13 +82,19 @@ void SUnit::addPred(const SDep &D) {
   SUnit *N = D.getSUnit();
   // Update the bookkeeping.
   if (D.getKind() == SDep::Data) {
+    assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+    assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
     ++NumPreds;
     ++N->NumSuccs;
   }
-  if (!N->isScheduled)
+  if (!N->isScheduled) {
+    assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
     ++NumPredsLeft;
-  if (!isScheduled)
+  }
+  if (!isScheduled) {
+    assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
     ++N->NumSuccsLeft;
+  }
   Preds.push_back(D);
   N->Succs.push_back(P);
   if (P.getLatency() != 0) {
@@ -118,13 +127,19 @@ void SUnit::removePred(const SDep &D) {
       Preds.erase(I);
       // Update the bookkeeping.
       if (P.getKind() == SDep::Data) {
+        assert(NumPreds > 0 && "NumPreds will underflow!");
+        assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
         --NumPreds;
         --N->NumSuccs;
       }
-      if (!N->isScheduled)
+      if (!N->isScheduled) {
+        assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
         --NumPredsLeft;
-      if (!isScheduled)
+      }
+      if (!isScheduled) {
+        assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
         --N->NumSuccsLeft;
+      }
       if (P.getLatency() != 0) {
         this->setDepthDirty();
         N->setHeightDirty();
@@ -256,56 +271,58 @@ void SUnit::ComputeHeight() {
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
-  cerr << "SU(" << NodeNum << "): ";
+  errs() << "SU(" << NodeNum << "): ";
   G->dumpNode(this);
 }
 
 void SUnit::dumpAll(const ScheduleDAG *G) const {
   dump(G);
 
-  cerr << "  # preds left       : " << NumPredsLeft << "\n";
-  cerr << "  # succs left       : " << NumSuccsLeft << "\n";
-  cerr << "  Latency            : " << Latency << "\n";
-  cerr << "  Depth              : " << Depth << "\n";
-  cerr << "  Height             : " << Height << "\n";
+  errs() << "  # preds left       : " << NumPredsLeft << "\n";
+  errs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  errs() << "  Latency            : " << Latency << "\n";
+  errs() << "  Depth              : " << Depth << "\n";
+  errs() << "  Height             : " << Height << "\n";
 
   if (Preds.size() != 0) {
-    cerr << "  Predecessors:\n";
+    errs() << "  Predecessors:\n";
     for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
          I != E; ++I) {
-      cerr << "   ";
+      errs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        cerr << "val "; break;
-      case SDep::Anti:        cerr << "anti"; break;
-      case SDep::Output:      cerr << "out "; break;
-      case SDep::Order:       cerr << "ch  "; break;
+      case SDep::Data:        errs() << "val "; break;
+      case SDep::Anti:        errs() << "anti"; break;
+      case SDep::Output:      errs() << "out "; break;
+      case SDep::Order:       errs() << "ch  "; break;
       }
-      cerr << "#";
-      cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      errs() << "#";
+      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        cerr << " *";
-      cerr << "\n";
+        errs() << " *";
+      errs() << ": Latency=" << I->getLatency();
+      errs() << "\n";
     }
   }
   if (Succs.size() != 0) {
-    cerr << "  Successors:\n";
+    errs() << "  Successors:\n";
     for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
          I != E; ++I) {
-      cerr << "   ";
+      errs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        cerr << "val "; break;
-      case SDep::Anti:        cerr << "anti"; break;
-      case SDep::Output:      cerr << "out "; break;
-      case SDep::Order:       cerr << "ch  "; break;
+      case SDep::Data:        errs() << "val "; break;
+      case SDep::Anti:        errs() << "anti"; break;
+      case SDep::Output:      errs() << "out "; break;
+      case SDep::Order:       errs() << "ch  "; break;
       }
-      cerr << "#";
-      cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      errs() << "#";
+      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        cerr << " *";
-      cerr << "\n";
+        errs() << " *";
+      errs() << ": Latency=" << I->getLatency();
+      errs() << "\n";
     }
   }
-  cerr << "\n";
+  errs() << "\n";
 }
 
 #ifndef NDEBUG
@@ -323,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
         continue;
       }
       if (!AnyNotSched)
-        cerr << "*** Scheduling failed! ***\n";
+        errs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      cerr << "has not been scheduled!\n";
+      errs() << "has not been scheduled!\n";
       AnyNotSched = true;
     }
     if (SUnits[i].isScheduled &&
         (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
           unsigned(INT_MAX)) {
       if (!AnyNotSched)
-        cerr << "*** Scheduling failed! ***\n";
+        errs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      cerr << "has an unexpected "
+      errs() << "has an unexpected "
            << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
     }
     if (isBottomUp) {
       if (SUnits[i].NumSuccsLeft != 0) {
         if (!AnyNotSched)
-          cerr << "*** Scheduling failed! ***\n";
+          errs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        cerr << "has successors left!\n";
+        errs() << "has successors left!\n";
         AnyNotSched = true;
       }
     } else {
       if (SUnits[i].NumPredsLeft != 0) {
         if (!AnyNotSched)
-          cerr << "*** Scheduling failed! ***\n";
+          errs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        cerr << "has predecessors left!\n";
+        errs() << "has predecessors left!\n";
         AnyNotSched = true;
       }
     }
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 770f5bbbdbb1..0d15c0214125 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -28,10 +28,6 @@
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
-void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) {
-  MI->addMemOperand(MF, MO);
-}
-
 void ScheduleDAG::EmitNoop() {
   TII->insertNoop(*BB, InsertPos);
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 8e18b3d17fda..44e9296661aa 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -14,8 +14,10 @@
 
 #define DEBUG_TYPE "sched-instrs"
 #include "ScheduleDAGInstrs.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetMachine.h"
@@ -45,35 +47,24 @@ void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
   ScheduleDAG::Run(bb, end);
 }
 
-/// getOpcode - If this is an Instruction or a ConstantExpr, return the
-/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getOpcode();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    return CE->getOpcode();
-  // Use UserOp1 to mean there's no opcode.
-  return Instruction::UserOp1;
-}
-
 /// getUnderlyingObjectFromInt - This is the function that does the work of
 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
 static const Value *getUnderlyingObjectFromInt(const Value *V) {
   do {
-    if (const User *U = dyn_cast<User>(V)) {
+    if (const Operator *U = dyn_cast<Operator>(V)) {
       // If we find a ptrtoint, we can transfer control back to the
       // regular getUnderlyingObjectFromInt.
-      if (getOpcode(U) == Instruction::PtrToInt)
+      if (U->getOpcode() == Instruction::PtrToInt)
         return U->getOperand(0);
       // If we find an add of a constant or a multiplied value, it's
       // likely that the other operand will lead us to the base
       // object. We don't have to worry about the case where the
-      // object address is somehow being computed bt the multiply,
+      // object address is somehow being computed by the multiply,
       // because our callers only care when the result is an
       // identifibale object.
-      if (getOpcode(U) != Instruction::Add ||
+      if (U->getOpcode() != Instruction::Add ||
           (!isa<ConstantInt>(U->getOperand(1)) &&
-           getOpcode(U->getOperand(1)) != Instruction::Mul))
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
         return V;
       V = U->getOperand(0);
     } else {
@@ -90,7 +81,7 @@ static const Value *getUnderlyingObject(const Value *V) {
   do {
     V = V->getUnderlyingObject();
     // If it found an inttoptr, use special code to continue climing.
-    if (getOpcode(V) != Instruction::IntToPtr)
+    if (Operator::getOpcode(V) != Instruction::IntToPtr)
       break;
     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
     // If that succeeded in finding a pointer, continue the search.
@@ -106,11 +97,11 @@ static const Value *getUnderlyingObject(const Value *V) {
 /// object, return the Value for that object. Otherwise return null.
 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) {
   if (!MI->hasOneMemOperand() ||
-      !MI->memoperands_begin()->getValue() ||
-      MI->memoperands_begin()->isVolatile())
+      !(*MI->memoperands_begin())->getValue() ||
+      (*MI->memoperands_begin())->isVolatile())
     return 0;
 
-  const Value *V = MI->memoperands_begin()->getValue();
+  const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
     return 0;
 
@@ -132,7 +123,7 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
     }
 }
 
-void ScheduleDAGInstrs::BuildSchedGraph() {
+void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // We'll be allocating one SUnit for each instruction, plus one for
   // the region exit node.
   SUnits.reserve(BB->size());
@@ -155,8 +146,8 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
   bool UnitLatencies = ForceUnitLatencies();
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
-  unsigned SpecialAddressLatency =
-    TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 
   // Walk the list of instructions, from bottom moving up.
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
@@ -184,16 +175,20 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
       std::vector<SUnit *> &UseList = Uses[Reg];
       std::vector<SUnit *> &DefList = Defs[Reg];
-      // Optionally add output and anti dependencies.
-      // TODO: Using a latency of 1 here assumes there's no cost for
-      //       reusing registers.
+      // Optionally add output and anti dependencies. For anti
+      // dependencies we use a latency of 0 because for a multi-issue
+      // target we want to allow the defining instruction to issue
+      // in the same cycle as the using instruction.
+      // TODO: Using a latency of 1 here for output dependencies assumes
+      //       there's no cost for reusing registers.
       SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
       for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
         SUnit *DefSU = DefList[i];
         if (DefSU != SU &&
             (Kind != SDep::Output || !MO.isDead() ||
              !DefSU->getInstr()->registerDefIsDead(Reg)))
-          DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
       }
       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         std::vector<SUnit *> &DefList = Defs[*Alias];
@@ -202,7 +197,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
           if (DefSU != SU &&
               (Kind != SDep::Output || !MO.isDead() ||
                !DefSU->getInstr()->registerDefIsDead(Reg)))
-            DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
         }
       }
 
@@ -216,6 +211,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
             // Optionally add in a special extra latency for nodes that
             // feed addresses.
             // TODO: Do this for register aliases too.
+            // TODO: Perhaps we should get rid of
+            // SpecialAddressLatency and just move this into
+            // adjustSchedDependency for the targets that care about
+            // it.
             if (SpecialAddressLatency != 0 && !UnitLatencies) {
               MachineInstr *UseMI = UseSU->getInstr();
               const TargetInstrDesc &UseTID = UseMI->getDesc();
@@ -226,15 +225,29 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
                   UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
                 LDataLatency += SpecialAddressLatency;
             }
-            UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
+            // Adjust the dependence latency using operand def/use
+            // information (if any), and then allow the target to
+            // perform its own adjustments.
+            const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            }
+            UseSU->addPred(dep);
           }
         }
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           std::vector<SUnit *> &UseList = Uses[*Alias];
           for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
             SUnit *UseSU = UseList[i];
-            if (UseSU != SU)
-              UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+            if (UseSU != SU) {
+              const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+              if (!UnitLatencies) {
+                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              }
+              UseSU->addPred(dep);
+            }
           }
         }
 
@@ -323,10 +336,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
       if (!ChainTID.isCall() &&
           !ChainTID.hasUnmodeledSideEffects() &&
           ChainMI->hasOneMemOperand() &&
-          !ChainMI->memoperands_begin()->isVolatile() &&
-          ChainMI->memoperands_begin()->getValue())
+          !(*ChainMI->memoperands_begin())->isVolatile() &&
+          (*ChainMI->memoperands_begin())->getValue())
         // We know that the Chain accesses one specific memory location.
-        ChainMMO = &*ChainMI->memoperands_begin();
+        ChainMMO = *ChainMI->memoperands_begin();
       else
         // Unknown memory accesses. Assume the worst.
         ChainMMO = 0;
@@ -362,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
         // Treat all other stores conservatively.
         goto new_chain;
     } else if (TID.mayLoad()) {
-      if (TII->isInvariantLoad(MI)) {
+      if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else if (const Value *V = getUnderlyingObjectForInstr(MI)) {
         // A load from a specific PseudoSourceValue. Add precise dependencies.
@@ -409,10 +422,9 @@ void ScheduleDAGInstrs::FinishBlock() {
 void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
   const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
 
-  // Compute the latency for the node.  We use the sum of the latencies for
-  // all nodes flagged together into this SUnit.
+  // Compute the latency for the node.
   SU->Latency =
-    InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
 
   // Simplistic target-independent heuristic: assume that loads take
   // extra time.
@@ -421,6 +433,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
       SU->Latency += 2;
 }
 
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+                                              SDep& dep) const {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty())
+    return;
+  
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+    if (DefCycle >= 0) {
+      MachineInstr *UseMI = Use->getInstr();
+      const unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+      // For all uses of the register, calculate the maxmimum latency
+      int Latency = -1;
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
+        if (UseCycle >= 0)
+          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+      }
+
+      // If we found a latency, then replace the existing dependence latency.
+      if (Latency >= 0)
+        dep.setLatency(Latency);
+    }
+  }
+}
+
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
   SU->getInstr()->dump();
 }
@@ -438,7 +494,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 }
 
 // EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+MachineBasicBlock *ScheduleDAGInstrs::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   // For MachineInstr-based scheduling, we're rescheduling the instructions in
   // the block, so start by removing them from the block.
   while (Begin != InsertPos) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 00d6268d1a14..29e1c98cb31f 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -15,12 +15,13 @@
 #ifndef SCHEDULEDAGINSTRS_H
 #define SCHEDULEDAGINSTRS_H
 
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
@@ -120,7 +121,6 @@ namespace llvm {
     SmallSet<unsigned, 8> LoopLiveInRegs;
 
   public:
-    MachineBasicBlock *BB;                // Current basic block
     MachineBasicBlock::iterator Begin;    // The beginning of the range to
                                           // be scheduled. The range extends
                                           // to InsertPos.
@@ -154,13 +154,20 @@ namespace llvm {
 
     /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
     /// input.
-    virtual void BuildSchedGraph();
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
 
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
 
-    virtual MachineBasicBlock *EmitSchedule();
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
+    virtual MachineBasicBlock*
+    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*);
 
     /// StartBlock - Prepare to perform scheduling in the given block.
     ///
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 5efd274eea50..95ad05e7d784 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -86,14 +86,14 @@ void ScheduleDAG::viewGraph() {
 // This code is only for debugging!
 #ifndef NDEBUG
   if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
-              BB->getBasicBlock()->getName());
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + 
+              ":" + BB->getBasicBlock()->getNameStr());
   else
-    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getName());
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
 #else
-  cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 4ffe88fda5a5..c766859ae9c8 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG
   CallingConvLower.cpp
   DAGCombiner.cpp
   FastISel.cpp
+  InstrEmitter.cpp
   LegalizeDAG.cpp
   LegalizeFloatTypes.cpp
   LegalizeIntegerTypes.cpp
@@ -9,13 +10,12 @@ add_llvm_library(LLVMSelectionDAG
   LegalizeTypesGeneric.cpp
   LegalizeVectorOps.cpp
   LegalizeVectorTypes.cpp
-  ScheduleDAGSDNodes.cpp
-  ScheduleDAGSDNodesEmit.cpp
   ScheduleDAGFast.cpp
   ScheduleDAGList.cpp
   ScheduleDAGRRList.cpp
-  SelectionDAGBuild.cpp
+  ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
+  SelectionDAGBuild.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index 7cd2b73e8704..fbe40b678639 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -13,15 +13,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
-                 SmallVector<CCValAssign, 16> &locs)
+CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
+                 SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
   : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
-    TRI(*TM.getRegisterInfo()), Locs(locs) {
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
   // No stack is used.
   StackOffset = 0;
   
@@ -31,8 +33,8 @@ CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
 // HandleByVal - Allocate a stack slot large enough to pass an argument by
 // value. The size and alignment information of the argument is encoded in its
 // parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
-                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+                          EVT LocVT, CCValAssign::LocInfo LocInfo,
                           int MinSize, int MinAlign,
                           ISD::ArgFlagsTy ArgFlags) {
   unsigned Align = ArgFlags.getByValAlign();
@@ -55,94 +57,107 @@ void CCState::MarkAllocated(unsigned Reg) {
       UsedRegs[Reg/32] |= 1 << (Reg&31);
 }
 
-/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// AnalyzeFormalArguments - Analyze an array of argument values,
 /// incorporating info about the formals into this state.
-void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
-  unsigned NumArgs = TheArgs->getNumValues()-1;
-  
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  unsigned NumArgs = Ins.size();
+
   for (unsigned i = 0; i != NumArgs; ++i) {
-    MVT ArgVT = TheArgs->getValueType(i);
-    ISD::ArgFlagsTy ArgFlags =
-      cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags();
+    EVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-      cerr << "Formal argument #" << i << " has unhandled type "
-           << ArgVT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Formal argument #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
-/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// AnalyzeReturn - Analyze the returned values of a return,
 /// incorporating info about the result values into this state.
-void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                            CCAssignFn Fn) {
   // Determine which register each value should be copied into.
-  for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
-    MVT VT = TheRet->getOperand(i*2+1).getValueType();
-    ISD::ArgFlagsTy ArgFlags =
-      cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags();
-    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){
-      cerr << "Return operand #" << i << " has unhandled type "
-           << VT.getMVTString() << "\n";
-      abort();
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    EVT VT = Outs[i].Val.getValueType();
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      errs() << "Return operand #" << i << " has unhandled type "
+             << VT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
 
-/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
-/// about the passed values into this state.
-void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) {
-  unsigned NumOps = TheCall->getNumArgs();
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = Outs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    MVT ArgVT = TheCall->getArg(i).getValueType();
-    ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+    EVT ArgVT = Outs[i].Val.getValueType();
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-      cerr << "Call operand #" << i << " has unhandled type "
-           << ArgVT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
 /// AnalyzeCallOperands - Same as above except it takes vectors of types
 /// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
                                   SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                                   CCAssignFn Fn) {
   unsigned NumOps = ArgVTs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    MVT ArgVT = ArgVTs[i];
+    EVT ArgVT = ArgVTs[i];
     ISD::ArgFlagsTy ArgFlags = Flags[i];
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-      cerr << "Call operand #" << i << " has unhandled type "
-           << ArgVT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
-/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// AnalyzeCallResult - Analyze the return values of a call,
 /// incorporating info about the passed values into this state.
-void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) {
-  for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) {
-    MVT VT = TheCall->getRetValType(i);
-    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (TheCall->isInreg())
-      Flags.setInReg();
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    EVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
-      cerr << "Call result #" << i << " has unhandled type "
-           << VT.getMVTString() << "\n";
-      abort();
+#ifndef NDEBUG
+      errs() << "Call result #" << i << " has unhandled type "
+             << VT.getEVTString();
+#endif
+      llvm_unreachable(0);
     }
   }
 }
 
 /// AnalyzeCallResult - Same as above except it's specialized for calls which
 /// produce a single value.
-void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
-    cerr << "Call result has unhandled type "
-         << VT.getMVTString() << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "Call result has unhandled type "
+           << VT.getEVTString();
+#endif
+    llvm_unreachable(0);
   }
 }
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 609ec82c5ad1..1ed308215201 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19,6 +19,7 @@
 #define DEBUG_TYPE "dagcombine"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
@@ -33,7 +34,9 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
@@ -213,12 +216,12 @@ namespace {
     SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
                              SDValue N3, ISD::CondCode CC,
                              bool NotExtCompare = false);
-    SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           DebugLoc DL, bool foldBooleans = true);
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
-    SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);
-    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);
+    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
@@ -236,14 +239,17 @@ namespace {
     /// overlap.
     bool isAlias(SDValue Ptr1, int64_t Size1,
                  const Value *SrcValue1, int SrcValueOffset1,
+                 unsigned SrcValueAlign1,
                  SDValue Ptr2, int64_t Size2,
-                 const Value *SrcValue2, int SrcValueOffset2) const;
+                 const Value *SrcValue2, int SrcValueOffset2,
+                 unsigned SrcValueAlign2) const;
 
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
                        SDValue &Ptr, int64_t &Size,
-                       const Value *&SrcValue, int &SrcValueOffset) const;
+                       const Value *&SrcValue, int &SrcValueOffset,
+                       unsigned &SrcValueAlignment) const;
 
     /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
     /// looking for a better chain (aliasing node.)
@@ -251,7 +257,7 @@ namespace {
 
     /// getShiftAmountTy - Returns a type large enough to hold any valid
     /// shift amount - before type legalization these can be huge.
-    MVT getShiftAmountTy() {
+    EVT getShiftAmountTy() {
       return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
     }
 
@@ -392,7 +398,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
   switch (Op.getOpcode()) {
-  default: assert(0 && "Unknown code");
+  default: llvm_unreachable("Unknown code");
   case ISD::ConstantFP: {
     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
     V.changeSign();
@@ -495,7 +501,7 @@ static bool isOneUseSetCC(SDValue N) {
 
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
                                     SDValue N0, SDValue N1) {
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
     if (isa<ConstantSDNode>(N1)) {
       // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
@@ -537,10 +543,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                                bool AddTo) {
   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
   ++NodesCombined;
-  DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));
-  DOUT << " and " << NumTo-1 << " other values\n";
-  DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)
+  DEBUG(errs() << "\nReplacing.1 ";
+        N->dump(&DAG);
+        errs() << "\nWith: ";
+        To[0].getNode()->dump(&DAG);
+        errs() << " and " << NumTo-1 << " other values\n";
+        for (unsigned i = 0, e = NumTo; i != e; ++i)
           assert(N->getValueType(i) == To[i].getValueType() &&
                  "Cannot combine value to value of different type!"));
   WorkListRemover DeadNodes(*this);
@@ -612,9 +620,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 
   // Replace the old value with the new one.
   ++NodesCombined;
-  DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));
-  DOUT << '\n';
+  DEBUG(errs() << "\nReplacing.2 "; 
+        TLO.Old.getNode()->dump(&DAG);
+        errs() << "\nWith: ";
+        TLO.New.getNode()->dump(&DAG);
+        errs() << '\n');
 
   CommitTargetLoweringOpt(TLO);
   return true;
@@ -680,9 +690,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
-    DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
-    DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));
-    DOUT << '\n';
+    DEBUG(errs() << "\nReplacing.3 "; 
+          N->dump(&DAG);
+          errs() << "\nWith: ";
+          RV.getNode()->dump(&DAG);
+          errs() << '\n');
     WorkListRemover DeadNodes(*this);
     if (N->getNumValues() == RV.getNode()->getNumValues())
       DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -800,7 +812,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
 
       // Expose the DAG combiner to the target combiner impls.
       TargetLowering::DAGCombinerInfo
-        DagCombineInfo(DAG, Level == Unrestricted, false, this);
+        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
 
       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
     }
@@ -877,7 +889,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
         break;
 
       case ISD::TokenFactor:
-        if ((CombinerAA || Op.hasOneUse()) &&
+        if (Op.hasOneUse() &&
             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
           // Queue up for processing.
           TFs.push_back(Op.getNode());
@@ -898,7 +910,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       }
     }
   }
-
+  
   SDValue Result;
 
   // If we've change things around then replace token factor.
@@ -922,9 +934,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
 /// MERGE_VALUES can always be eliminated.
 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   WorkListRemover DeadNodes(*this);
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
-                                  &DeadNodes);
+  // Replacing results may cause a different MERGE_VALUES to suddenly
+  // be CSE'd with N, and carry its uses with it. Iterate until no
+  // uses remain, to ensure that the node can be safely deleted.
+  do {
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+                                    &DeadNodes);
+  } while (!N->use_empty());
   removeFromWorkList(N);
   DAG.DeleteNode(N);
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -933,7 +950,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
 static
 SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
                               SelectionDAG &DAG) {
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   SDValue N00 = N0.getOperand(0);
   SDValue N01 = N0.getOperand(1);
   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
@@ -957,7 +974,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1080,7 +1097,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
   if (N->hasNUsesOfValue(0, 1))
@@ -1142,7 +1159,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1215,7 +1232,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1308,7 +1325,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1395,7 +1412,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1415,7 +1432,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   if (N1.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
-        MVT ADDVT = N1.getOperand(1).getValueType();
+        EVT ADDVT = N1.getOperand(1).getValueType();
         SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
                                   N1.getOperand(1),
                                   DAG.getConstant(SHC->getAPIntValue()
@@ -1447,7 +1464,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (srem c1, c2) -> c1%c2
   if (N0C && N1C && !N1C->isNullValue())
@@ -1489,7 +1506,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (urem c1, c2) -> c1%c2
   if (N0C && N1C && !N1C->isNullValue())
@@ -1541,7 +1558,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (mulhs x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1562,7 +1579,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (mulhu x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1665,7 +1682,7 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
 /// two operands of the same opcode, try to simplify it.
 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
 
   // For each of OP in AND/OR/XOR:
@@ -1677,7 +1694,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
        N0.getOpcode() == ISD::SIGN_EXTEND ||
        (N0.getOpcode() == ISD::TRUNCATE &&
         !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
     SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                  N0.getOperand(0).getValueType(),
                                  N0.getOperand(0), N1.getOperand(0));
@@ -1709,7 +1728,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue LL, LR, RL, RR, CC0, CC1;
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N1.getValueType();
+  EVT VT = N1.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
 
   // fold vector ops
@@ -1820,18 +1839,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // fold (zext_inreg (extload x)) -> (zextload x)
   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
     unsigned BitWidth = N1.getValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - EVT.getSizeInBits())) &&
+                                     BitWidth - MemVT.getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(), LN0->getBasePtr(),
                                        LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -1842,18 +1861,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
       N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
     unsigned BitWidth = N1.getValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - EVT.getSizeInBits())) &&
+                                     BitWidth - MemVT.getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -1869,24 +1888,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         LN0->isUnindexed() && N0.hasOneUse() &&
         // Do not change the width of a volatile load.
         !LN0->isVolatile()) {
-      MVT EVT = MVT::Other;
+      EVT ExtVT = MVT::Other;
       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
       if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        EVT = MVT::getIntegerVT(ActiveBits);
+        ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
 
-      MVT LoadedVT = LN0->getMemoryVT();
+      EVT LoadedVT = LN0->getMemoryVT();
 
       // Do not generate loads of non-round integer types since these can
       // be expensive (and would be wrong if the type is not byte sized).
-      if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
-        MVT PtrType = N0.getOperand(1).getValueType();
+      if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+        EVT PtrType = N0.getOperand(1).getValueType();
 
         // For big endian targets, we need to add an offset to the pointer to
         // load the correct bytes.  For little endian systems, we merely need to
         // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;
-        unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;
+        unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+        unsigned EVTStoreBytes = ExtVT.getStoreSize();
         unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
         unsigned Alignment = LN0->getAlignment();
         SDValue NewPtr = LN0->getBasePtr();
@@ -1901,7 +1920,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         SDValue Load =
           DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
                          NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         EVT, LN0->isVolatile(), Alignment);
+                         ExtVT, LN0->isVolatile(), Alignment);
         AddToWorkList(N);
         CombineTo(N0.getNode(), Load, Load.getValue(1));
         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1918,7 +1937,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue LL, LR, RL, RR, CC0, CC1;
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N1.getValueType();
+  EVT VT = N1.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1928,7 +1947,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
 
   // fold (or x, undef) -> -1
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(~0ULL, VT);
+    return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
   // fold (or c1, c2) -> c1|c2
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
@@ -2058,7 +2077,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
 // a rot[lr].
 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
-  MVT VT = LHS.getValueType();
+  EVT VT = LHS.getValueType();
   if (!TLI.isTypeLegal(VT)) return 0;
 
   // The target must have at least one rotate flavor.
@@ -2219,7 +2238,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue LHS, RHS, CC;
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -2258,8 +2277,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
       switch (N0.getOpcode()) {
       default:
-        assert(0 && "Unhandled SetCC Equivalent!");
-        abort();
+        llvm_unreachable("Unhandled SetCC Equivalent!");
       case ISD::SETCC:
         return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
       case ISD::SELECT_CC:
@@ -2388,7 +2406,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
     return SDValue();
 
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // If this is a signed shift right, and the high bit is modified by the
   // logical operation, do not perform the transformation. The highBitSet
@@ -2418,7 +2436,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getSizeInBits();
 
   // fold (shl c1, c2) -> c1<<c2
@@ -2443,7 +2461,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
     SDValue N101 = N1.getOperand(0).getOperand(1);
     if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
       TruncC.trunc(TruncVT.getSizeInBits());
@@ -2474,20 +2492,33 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::SRL &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),
-                               DAG.getConstant(~0ULL << c1, VT));
-    if (c2 > c1)
-      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
-                         DAG.getConstant(c2-c1, N1.getValueType()));
-    else
-      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
-                         DAG.getConstant(c1-c2, N1.getValueType()));
+    if (c1 < VT.getSizeInBits()) {
+      uint64_t c2 = N1C->getZExtValue();
+      SDValue HiBitsMask =
+        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                              VT.getSizeInBits() - c1),
+                        VT);
+      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+                                 N0.getOperand(0),
+                                 HiBitsMask);
+      if (c2 > c1)
+        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c2-c1, N1.getValueType()));
+      else
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c1-c2, N1.getValueType()));
+    }
   }
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    SDValue HiBitsMask =
+      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                            VT.getSizeInBits() -
+                                              N1C->getZExtValue()),
+                      VT);
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
-                       DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));
+                       HiBitsMask);
+  }
 
   return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
 }
@@ -2497,7 +2528,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
 
   // fold (sra c1, c2) -> (sra c1, c2)
   if (N0C && N1C)
@@ -2518,7 +2549,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // sext_inreg.
   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
     unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
-    MVT EVT = MVT::getIntegerVT(LowBits);
+    EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
     if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
                          N0.getOperand(0), DAG.getValueType(EVT));
@@ -2545,8 +2576,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     if (N01C && N1C) {
       // Determine what the truncate's result bitsize and type would be.
       unsigned VTValSize = VT.getSizeInBits();
-      MVT TruncVT =
-        MVT::getIntegerVT(VTValSize - N1C->getZExtValue());
+      EVT TruncVT =
+        EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue());
       // Determine the residual right-shift amount.
       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
 
@@ -2576,7 +2607,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
     SDValue N101 = N1.getOperand(0).getOperand(1);
     if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
       TruncC.trunc(TruncVT.getSizeInBits());
@@ -2607,7 +2638,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getSizeInBits();
 
   // fold (srl c1, c2) -> c1 >>u c2
@@ -2641,7 +2672,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     // Shifting in all undef bits?
-    MVT SmallVT = N0.getOperand(0).getValueType();
+    EVT SmallVT = N0.getOperand(0).getValueType();
     if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
       return DAG.getUNDEF(VT);
 
@@ -2700,7 +2731,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
     SDValue N101 = N1.getOperand(0).getOperand(1);
     if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
       TruncC.trunc(TruncVT.getSizeInBits());
@@ -2724,7 +2755,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (ctlz c1) -> c2
   if (isa<ConstantSDNode>(N0))
@@ -2734,7 +2765,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
 
 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (cttz c1) -> c2
   if (isa<ConstantSDNode>(N0))
@@ -2744,7 +2775,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
 
 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (ctpop c1) -> c2
   if (isa<ConstantSDNode>(N0))
@@ -2759,8 +2790,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
-  MVT VT = N->getValueType(0);
-  MVT VT0 = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT VT0 = N0.getValueType();
 
   // fold (select C, X, X) -> X
   if (N1 == N2)
@@ -2825,7 +2856,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     // Check against MVT::Other for SELECT_CC, which is a workaround for targets
     // having to say they don't support SELECT_CC on every type the DAG knows
     // about, since there is no way to mark an opcode illegal at all value types
-    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))
+    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
       return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
                          N0.getOperand(0), N0.getOperand(1),
                          N1, N2, N0.getOperand(2));
@@ -2945,7 +2977,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
 
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (sext c1) -> c1
   if (isa<ConstantSDNode>(N0))
@@ -3054,13 +3086,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
-        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {
+        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
@@ -3071,14 +3103,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
-  // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
   if (N0.getOpcode() == ISD::SETCC) {
+    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    if (VT.isVector() &&
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
+      
+        // Only do this before legalize for now.
+        !LegalOperations) {
+      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                           N0.getOperand(1),
+                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
+    }
+    
+    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    SDValue NegOne =
+      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+                       NegOne, DAG.getConstant(0, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
   }
+  
+  
 
   // fold (sext x) -> (zext x) if the sign bit is known zero.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3090,7 +3142,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (zext c1) -> c1
   if (isa<ConstantSDNode>(N0))
@@ -3194,13 +3246,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
-        TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {
+        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
+                                       LN0->getSrcValueOffset(), MemVT,
                                        LN0->isVolatile(), LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
@@ -3225,7 +3277,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (aext c1) -> c1
   if (isa<ConstantSDNode>(N0))
@@ -3330,11 +3382,11 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
       N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
     SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
                                      VT, LN0->getChain(), LN0->getBasePtr(),
                                      LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->getSrcValueOffset(), MemVT,
                                      LN0->isVolatile(), LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
@@ -3400,8 +3452,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   unsigned Opc = N->getOpcode();
   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
-  MVT EVT = VT;
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = VT;
 
   // This transformation isn't valid for vector loads.
   if (VT.isVector())
@@ -3411,20 +3463,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   // extended to VT.
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
-    EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))
+    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
       return SDValue();
   }
 
-  unsigned EVTBits = EVT.getSizeInBits();
+  unsigned EVTBits = ExtVT.getSizeInBits();
   unsigned ShAmt = 0;
-  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       ShAmt = N01->getZExtValue();
       // Is the shift amount a multiple of size of VT?
       if ((ShAmt & (EVTBits-1)) == 0) {
         N0 = N0.getOperand(0);
-        if (N0.getValueType().getSizeInBits() <= EVTBits)
+        // Is the load width a multiple of size of VT?
+        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
           return SDValue();
       }
     }
@@ -3432,18 +3485,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
-  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&
+  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
       cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT PtrType = N0.getOperand(1).getValueType();
+    EVT PtrType = N0.getOperand(1).getValueType();
 
     // For big endian targets, we need to adjust the offset to the pointer to
     // load the correct bytes.
     if (TLI.isBigEndian()) {
       unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
-      unsigned EVTStoreBits = EVT.getStoreSizeInBits();
+      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
       ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
     }
 
@@ -3460,7 +3513,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
                     LN0->isVolatile(), NewAlign)
       : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       EVT, LN0->isVolatile(), NewAlign);
+                       ExtVT, LN0->isVolatile(), NewAlign);
 
     // Replace the old load's chain with the new load's chain.
     WorkListRemover DeadNodes(*this);
@@ -3477,8 +3530,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  MVT VT = N->getValueType(0);
-  MVT EVT = cast<VTSDNode>(N1)->getVT();
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N1)->getVT();
   unsigned VTBits = VT.getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
@@ -3573,7 +3626,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
@@ -3623,14 +3676,14 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
 
 /// CombineConsecutiveLoads - build_pair (load, load) -> load
 /// if load locations are consecutive.
-SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   assert(N->getOpcode() == ISD::BUILD_PAIR);
 
   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
     return SDValue();
-  MVT LD1VT = LD1->getValueType(0);
+  EVT LD1VT = LD1->getValueType(0);
   const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
 
   if (ISD::isNON_EXTLoad(LD2) &&
@@ -3642,7 +3695,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
       TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
     unsigned Align = LD1->getAlignment();
     unsigned NewAlign = TLI.getTargetData()->
-      getABITypeAlignment(VT.getTypeForMVT());
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
 
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
@@ -3656,7 +3709,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
 
 SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   // Only do this before legalize, since afterward the target may be depending
@@ -3674,7 +3727,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
         break;
       }
 
-    MVT DestEltVT = N->getValueType(0).getVectorElementType();
+    EVT DestEltVT = N->getValueType(0).getVectorElementType();
     assert(!DestEltVT.isVector() &&
            "Element type of vector ValueType must not be vector!");
     if (isSimple)
@@ -3684,7 +3737,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   // If the input is a constant, let getNode fold it.
   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
     SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
-    if (Res.getNode() != N) return Res;
+    if (Res.getNode() != N) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+        return Res;
+
+      // Folding it resulted in an illegal node, and it's too late to
+      // do that. Clean up the old node and forego the transformation.
+      // Ideally this won't happen very often, because instcombine
+      // and the earlier dagcombine runs (where illegal nodes are
+      // permitted) should have folded most of them already.
+      DAG.DeleteNode(Res.getNode());
+    }
   }
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
@@ -3700,7 +3764,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     unsigned Align = TLI.getTargetData()->
-      getABITypeAlignment(VT.getTypeForMVT());
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
     unsigned OrigAlign = LN0->getAlignment();
 
     if (Align <= OrigAlign) {
@@ -3743,7 +3807,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
       VT.isInteger() && !VT.isVector()) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
-    MVT IntXVT = MVT::getIntegerVT(OrigXWidth);
+    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
     if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
       SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
@@ -3791,7 +3855,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   return CombineConsecutiveLoads(N, VT);
 }
 
@@ -3799,8 +3863,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
 /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
 /// destination element value type.
 SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
-  MVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
 
   // If this is already the right type, we're done.
   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
@@ -3822,7 +3886,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
                                 DstEltVT, Op));
       AddToWorkList(Ops.back().getNode());
     }
-    MVT VT = MVT::getVectorVT(DstEltVT,
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                               BV->getValueType(0).getVectorNumElements());
     return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
                        &Ops[0], Ops.size());
@@ -3835,7 +3899,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
     // Convert the input float vector to a int vector where the elements are the
     // same sizes.
     assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
-    MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
     BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
     SrcEltVT = IntVT;
   }
@@ -3844,7 +3908,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
   // convert to integer first, then to FP of the right size.
   if (DstEltVT.isFloatingPoint()) {
     assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
-    MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());
+    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
     SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
 
     // Next, convert to FP elements of the same size.
@@ -3880,7 +3944,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
         Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
     }
 
-    MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
     return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
                        &Ops[0], Ops.size());
   }
@@ -3889,7 +3953,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
   // turns into multiple outputs.
   bool isS2V = ISD::isScalarToVector(BV);
   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
-  MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                            NumOutputsPerInput*BV->getNumOperands());
   SmallVector<SDValue, 8> Ops;
 
   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
@@ -3926,7 +3991,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -3967,7 +4032,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -4001,7 +4066,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -4024,7 +4089,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
-  // fold (fmul X, (fneg 1.0)) -> (fneg X)
+  // fold (fmul X, -1.0) -> (fneg X)
   if (N1CFP && N1CFP->isExactlyValue(-1.0))
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
@@ -4056,7 +4121,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -4089,7 +4154,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (frem c1, c2) -> fmod(c1,c2)
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
@@ -4103,7 +4168,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
@@ -4151,8 +4216,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  MVT VT = N->getValueType(0);
-  MVT OpVT = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128)
@@ -4173,8 +4238,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  MVT VT = N->getValueType(0);
-  MVT OpVT = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128)
@@ -4195,7 +4260,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fp_to_sint c1fp) -> c1
   if (N0CFP)
@@ -4207,7 +4272,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fp_to_uint c1fp) -> c1
   if (N0CFP && VT != MVT::ppcf128)
@@ -4220,7 +4285,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fp_round c1fp) -> c1fp
   if (N0CFP && N0.getValueType() != MVT::ppcf128)
@@ -4253,8 +4318,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
 
 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
 
   // fold (fp_round_inreg c1fp) -> c1fp
@@ -4269,7 +4334,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   if (N->hasOneUse() &&
@@ -4326,7 +4391,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
-    MVT IntVT = Int.getValueType();
+    EVT IntVT = Int.getValueType();
     if (IntVT.isInteger() && !IntVT.isVector()) {
       Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
               DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
@@ -4342,7 +4407,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
 SDValue DAGCombiner::visitFABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // fold (fabs c1) -> fabs(c1)
   if (N0CFP && VT != MVT::ppcf128)
@@ -4361,7 +4426,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
-    MVT IntVT = Int.getValueType();
+    EVT IntVT = Int.getValueType();
     if (IntVT.isInteger() && !IntVT.isVector()) {
       Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
              DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
@@ -4419,7 +4484,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     if (Op0.getOpcode() == ISD::AND &&
         Op0.hasOneUse() &&
         Op1.getOpcode() == ISD::Constant) {
-      SDValue AndOp0 = Op0.getOperand(0);
       SDValue AndOp1 = Op0.getOperand(1);
 
       if (AndOp1.getOpcode() == ISD::Constant) {
@@ -4491,7 +4555,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 
   bool isLoad = true;
   SDValue Ptr;
-  MVT VT;
+  EVT VT;
   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
     if (LD->isIndexed())
       return false;
@@ -4579,9 +4643,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
                                  BasePtr, Offset, AM);
   ++PreIndexedNodes;
   ++NodesCombined;
-  DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
-  DOUT << '\n';
+  DEBUG(errs() << "\nReplacing.4 ";
+        N->dump(&DAG);
+        errs() << "\nWith: ";
+        Result.getNode()->dump(&DAG);
+        errs() << '\n');
   WorkListRemover DeadNodes(*this);
   if (isLoad) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4616,7 +4682,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
 
   bool isLoad = true;
   SDValue Ptr;
-  MVT VT;
+  EVT VT;
   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
     if (LD->isIndexed())
       return false;
@@ -4652,7 +4718,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
     SDValue Offset;
     ISD::MemIndexedMode AM = ISD::UNINDEXED;
     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
-      if (Ptr == Offset)
+      if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
         std::swap(BasePtr, Offset);
       if (Ptr != BasePtr)
         continue;
@@ -4711,9 +4777,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
                                 BasePtr, Offset, AM);
         ++PostIndexedNodes;
         ++NodesCombined;
-        DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
-        DOUT << '\n';
+        DEBUG(errs() << "\nReplacing.5 ";
+              N->dump(&DAG);
+              errs() << "\nWith: ";
+              Result.getNode()->dump(&DAG);
+              errs() << '\n');
         WorkListRemover DeadNodes(*this);
         if (isLoad) {
           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4815,9 +4883,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         // v3         = add v2, c
         // Now we replace use of chain2 with chain1.  This makes the second load
         // isomorphic to the one we are deleting, and thus makes this load live.
-        DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));
-        DOUT << "\n";
+        DEBUG(errs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              errs() << "\nWith chain: ";
+              Chain.getNode()->dump(&DAG);
+              errs() << "\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
 
@@ -4833,9 +4903,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
       if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));
-        DOUT << " and 2 other values\n";
+        DEBUG(errs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              errs() << "\nWith: ";
+              Undef.getNode()->dump(&DAG);
+              errs() << " and 2 other values\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
@@ -4890,7 +4962,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // Create token factor to keep old chain connected.
       SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                   MVT::Other, Chain, ReplLoad.getValue(1));
-
+      
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+      
       // Replace uses with load result and token factor. Don't add users
       // to work list.
       return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -4917,7 +4992,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   SDValue Chain = ST->getChain();
   SDValue Value = ST->getValue();
   SDValue Ptr   = ST->getBasePtr();
-  MVT VT = Value.getValueType();
+  EVT VT = Value.getValueType();
 
   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
     return SDValue();
@@ -4944,12 +5019,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     unsigned ShAmt = Imm.countTrailingZeros();
     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
-    MVT NewVT = MVT::getIntegerVT(NewBW);
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     while (NewBW < BitWidth &&
            !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
              TLI.isNarrowingProfitable(VT, NewVT))) {
       NewBW = NextPowerOf2(NewBW);
-      NewVT = MVT::getIntegerVT(NewBW);
+      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     }
     if (NewBW >= BitWidth)
       return SDValue();
@@ -4971,7 +5046,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
       if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))
+          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -5024,9 +5099,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
-    MVT SVT = Value.getOperand(0).getValueType();
+    EVT SVT = Value.getOperand(0).getValueType();
     unsigned Align = TLI.getTargetData()->
-      getABITypeAlignment(SVT.getTypeForMVT());
+      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
     if (Align <= OrigAlign &&
         ((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
@@ -5043,8 +5118,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // transform should not be done in this case.
     if (Value.getOpcode() != ISD::TargetConstantFP) {
       SDValue Tmp;
-      switch (CFP->getValueType(0).getSimpleVT()) {
-      default: assert(0 && "Unknown FP type");
+      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown FP type");
       case MVT::f80:    // We don't do this for these yet.
       case MVT::f128:
       case MVT::ppcf128:
@@ -5111,8 +5186,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
     // If there is a better chain.
     if (Chain != BetterChain) {
-      // Replace the chain to avoid dependency.
       SDValue ReplStore;
+
+      // Replace the chain to avoid dependency.
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                       ST->getSrcValue(),ST->getSrcValueOffset(),
@@ -5128,6 +5204,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                   MVT::Other, Chain, ReplStore);
 
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
       // Don't add users to work list.
       return CombineTo(N, Token, false);
     }
@@ -5211,10 +5290,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   // BUILD_VECTOR with undef elements and the inserted element.
   if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 
       isa<ConstantSDNode>(EltNo)) {
-    MVT VT = InVec.getValueType();
-    MVT EVT = VT.getVectorElementType();
+    EVT VT = InVec.getValueType();
+    EVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
+    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
 
     unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     if (Elt < Ops.size())
@@ -5232,7 +5311,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
    // If the operand is wider than the vector element type then it is implicitly
    // truncated.  Make that explicit here.
-   MVT EltVT = InVec.getValueType().getVectorElementType();
+   EVT EltVT = InVec.getValueType().getVectorElementType();
    SDValue InOp = InVec.getOperand(0);
    if (InOp.getValueType() != EltVT)
      return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
@@ -5252,18 +5331,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
-    MVT VT = InVec.getValueType();
-    MVT EVT = VT.getVectorElementType();
-    MVT LVT = EVT;
+    EVT VT = InVec.getValueType();
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
 
     if (InVec.getOpcode() == ISD::BIT_CONVERT) {
-      MVT BCVT = InVec.getOperand(0).getValueType();
-      if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
       if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
         BCNumEltsChanged = true;
       InVec = InVec.getOperand(0);
-      EVT = BCVT.getVectorElementType();
+      ExtVT = BCVT.getVectorElementType();
       NewLoad = true;
     }
 
@@ -5272,7 +5351,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     if (ISD::isNormalLoad(InVec.getNode())) {
       LN0 = cast<LoadSDNode>(InVec);
     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-               InVec.getOperand(0).getValueType() == EVT &&
+               InVec.getOperand(0).getValueType() == ExtVT &&
                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
@@ -5306,7 +5385,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       // Check the resultant load doesn't need a higher alignment than the
       // original load.
       unsigned NewAlign =
-        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());
+        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
 
       if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
         return SDValue();
@@ -5317,7 +5396,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     SDValue NewPtr = LN0->getBasePtr();
     if (Elt) {
       unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
-      MVT PtrType = NewPtr.getValueType();
+      EVT PtrType = NewPtr.getValueType();
       if (TLI.isBigEndian())
         PtrOff = VT.getSizeInBits() / 8 - PtrOff;
       NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
@@ -5334,8 +5413,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   unsigned NumInScalars = N->getNumOperands();
-  MVT VT = N->getValueType(0);
-  MVT EltType = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -5432,11 +5510,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return SDValue();
   
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
 
   SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
 
   assert(N0.getValueType().getVectorNumElements() == NumElts &&
         "Vector shuffle must be normalized in DAG");
@@ -5494,7 +5571,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -5517,14 +5594,14 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
       }
 
       // Let's see if the target supports this vector_shuffle.
-      MVT RVT = RHS.getValueType();
+      EVT RVT = RHS.getValueType();
       if (!TLI.isVectorClearMaskLegal(Indices, RVT))
         return SDValue();
 
       // Return the new VECTOR_SHUFFLE node.
-      MVT EVT = RVT.getVectorElementType();
+      EVT EltVT = RVT.getVectorElementType();
       SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                     DAG.getConstant(0, EVT));
+                                     DAG.getConstant(0, EltVT));
       SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
                                  RVT, &ZeroOps[0], ZeroOps.size());
       LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
@@ -5543,10 +5620,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   // things. Simplifying them may result in a loss of legality.
   if (LegalOperations) return SDValue();
 
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
 
-  MVT EltType = VT.getVectorElementType();
+  EVT EltType = VT.getVectorElementType();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   SDValue Shuffle = XformToShuffleWithZero(N);
@@ -5589,7 +5666,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
     }
 
     if (Ops.size() == LHS.getNumOperands()) {
-      MVT VT = LHS.getValueType();
+      EVT VT = LHS.getValueType();
       return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
                          &Ops[0], Ops.size());
     }
@@ -5728,7 +5805,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   // (x ? y : y) -> y.
   if (N2 == N3) return N2;
   
-  MVT VT = N2.getValueType();
+  EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
@@ -5820,8 +5897,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       N2.getValueType().isInteger() &&
       (N1C->isNullValue() ||                         // (a < 0) ? b : 0
        (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
-    MVT XType = N0.getValueType();
-    MVT AType = N2.getValueType();
+    EVT XType = N0.getValueType();
+    EVT AType = N2.getValueType();
     if (XType.bitsGE(AType)) {
       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
       // single-bit constant.
@@ -5900,7 +5977,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   // FIXME: Turn all of these into setcc if setcc if setcc is legal
   // otherwise, go ahead with the folds.
   if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
-    MVT XType = N0.getValueType();
+    EVT XType = N0.getValueType();
     if (!LegalOperations ||
         TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
       SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
@@ -5942,7 +6019,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
       N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
       N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
-    MVT XType = N0.getValueType();
+    EVT XType = N0.getValueType();
     SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
                                 DAG.getConstant(XType.getSizeInBits()-1,
                                                 getShiftAmountTy()));
@@ -5957,7 +6034,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
       N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
     if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
-      MVT XType = N0.getValueType();
+      EVT XType = N0.getValueType();
       if (SubC->isNullValue() && XType.isInteger()) {
         SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
                                     N0,
@@ -5976,11 +6053,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
 }
 
 /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
-SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
                                    SDValue N1, ISD::CondCode Cond,
                                    DebugLoc DL, bool foldBooleans) {
   TargetLowering::DAGCombinerInfo
-    DagCombineInfo(DAG, Level == Unrestricted, false, this);
+    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
 }
 
@@ -6012,11 +6089,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   return S;
 }
 
-/// FindBaseOffset - Return true if base is known not to alias with anything
-/// but itself.  Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself.  Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+                           GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
-  Base = Ptr; Offset = 0;
+  Base = Ptr; Offset = 0; GV = 0; CV = 0;
 
   // If it's an adding a simple constant then integrate the offset.
   if (Base.getOpcode() == ISD::ADD) {
@@ -6025,36 +6103,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
       Offset += C->getZExtValue();
     }
   }
+  
+  // Return the underlying GlobalValue, and update the Offset.  Return false
+  // for GlobalAddressSDNode since the same GlobalAddress may be represented
+  // by multiple nodes with different offsets.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+    GV = G->getGlobal();
+    Offset += G->getOffset();
+    return false;
+  }
 
+  // Return the underlying Constant value, and update the Offset.  Return false
+  // for ConstantSDNodes since the same constant pool entry may be represented
+  // by multiple nodes with different offsets.
+  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+                                         : (void *)C->getConstVal();
+    Offset += C->getOffset();
+    return false;
+  }
   // If it's any of the following then it can't alias with anything but itself.
-  return isa<FrameIndexSDNode>(Base) ||
-         isa<ConstantPoolSDNode>(Base) ||
-         isa<GlobalAddressSDNode>(Base);
+  return isa<FrameIndexSDNode>(Base);
 }
 
 /// isAlias - Return true if there is any possibility that the two addresses
 /// overlap.
 bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
                           const Value *SrcValue1, int SrcValueOffset1,
+                          unsigned SrcValueAlign1,
                           SDValue Ptr2, int64_t Size2,
-                          const Value *SrcValue2, int SrcValueOffset2) const {
+                          const Value *SrcValue2, int SrcValueOffset2,
+                          unsigned SrcValueAlign2) const {
   // If they are the same then they must be aliases.
   if (Ptr1 == Ptr2) return true;
 
   // Gather base node and offset information.
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
-  bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
-  bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+  GlobalValue *GV1, *GV2;
+  void *CV1, *CV2;
+  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
 
-  // If they have a same base address then...
-  if (Base1 == Base2)
-    // Check to see if the addresses overlap.
+  // If they have a same base address then check to see if they overlap.
+  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
     return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
 
-  // If we know both bases then they can't alias.
-  if (KnownBase1 && KnownBase2) return false;
+  // If we know what the bases are, and they aren't identical, then we know they
+  // cannot alias.
+  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+    return false;
 
+  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+  // compared to the size and offset of the access, we may be able to prove they
+  // do not alias.  This check is conservative for now to catch cases created by
+  // splitting vector types.
+  if ((SrcValueAlign1 == SrcValueAlign2) &&
+      (SrcValueOffset1 != SrcValueOffset2) &&
+      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+    
+    // There is no overlap between these relatively aligned accesses of similar
+    // size, return no alias.
+    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+      return false;
+  }
+  
   if (CombinerGlobalAA) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
@@ -6074,20 +6189,24 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
                         SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue, int &SrcValueOffset) const {
+                        const Value *&SrcValue, 
+                        int &SrcValueOffset,
+                        unsigned &SrcValueAlign) const {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     Size = LD->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = LD->getSrcValue();
     SrcValueOffset = LD->getSrcValueOffset();
+    SrcValueAlign = LD->getOriginalAlignment();
     return true;
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
     Size = ST->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = ST->getSrcValue();
     SrcValueOffset = ST->getSrcValueOffset();
+    SrcValueAlign = ST->getOriginalAlignment();
   } else {
-    assert(0 && "FindAliasInfo expected a memory operand");
+    llvm_unreachable("FindAliasInfo expected a memory operand");
   }
 
   return false;
@@ -6098,28 +6217,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
                                    SmallVector<SDValue, 8> &Aliases) {
   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
-  std::set<SDNode *> Visited;           // Visited node set.
+  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
 
   // Get alias information for node.
   SDValue Ptr;
-  int64_t Size = 0;
-  const Value *SrcValue = 0;
-  int SrcValueOffset = 0;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  unsigned SrcValueAlign;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 
+                              SrcValueAlign);
 
   // Starting off.
   Chains.push_back(OriginalChain);
-
+  unsigned Depth = 0;
+  
   // Look at each chain and determine if it is an alias.  If so, add it to the
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
     SDValue Chain = Chains.back();
     Chains.pop_back();
+    
+    // For TokenFactor nodes, look at each operand and only continue up the 
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll 
+    // find more and revert to original chain since the xform is unlikely to be
+    // profitable.
+    // 
+    // FIXME: The depth check could be made to return the last non-aliasing 
+    // chain we found before we hit a tokenfactor rather than the original
+    // chain.
+    if (Depth > 6 || Aliases.size() == 2) {
+      Aliases.clear();
+      Aliases.push_back(OriginalChain);
+      break;
+    }
 
-     // Don't bother if we've been before.
-    if (Visited.find(Chain.getNode()) != Visited.end()) continue;
-    Visited.insert(Chain.getNode());
+    // Don't bother if we've been before.
+    if (!Visited.insert(Chain.getNode()))
+      continue;
 
     switch (Chain.getOpcode()) {
     case ISD::EntryToken:
@@ -6130,35 +6266,40 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
     case ISD::STORE: {
       // Get alias information for Chain.
       SDValue OpPtr;
-      int64_t OpSize = 0;
-      const Value *OpSrcValue = 0;
-      int OpSrcValueOffset = 0;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      unsigned OpSrcValueAlign;
       bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
-                                    OpSrcValue, OpSrcValueOffset);
+                                    OpSrcValue, OpSrcValueOffset,
+                                    OpSrcValueAlign);
 
       // If chain is alias then stop here.
       if (!(IsLoad && IsOpLoad) &&
-          isAlias(Ptr, Size, SrcValue, SrcValueOffset,
-                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+                  OpSrcValueAlign)) {
         Aliases.push_back(Chain);
       } else {
         // Look further up the chain.
         Chains.push_back(Chain.getOperand(0));
-        // Clean up old chain.
-        AddToWorkList(Chain.getNode());
+        ++Depth;
       }
       break;
     }
 
     case ISD::TokenFactor:
-      // We have to check each of the operands of the token factor, so we queue
-      // then up.  Adding the  operands to the queue (stack) in reverse order
-      // maintains the original order and increases the likelihood that getNode
-      // will find a matching token factor (CSE.)
+      // We have to check each of the operands of the token factor for "small"
+      // token factors, so we queue them up.  Adding the operands to the queue
+      // (stack) in reverse order maintains the original order and increases the
+      // likelihood that getNode will find a matching token factor (CSE.)
+      if (Chain.getNumOperands() > 16) {
+        Aliases.push_back(Chain);
+        break;
+      }
       for (unsigned n = Chain.getNumOperands(); n;)
         Chains.push_back(Chain.getOperand(--n));
-      // Eliminate the token factor if we can.
-      AddToWorkList(Chain.getNode());
+      ++Depth;
       break;
 
     default:
@@ -6184,15 +6325,10 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
     // If a single operand then chain to it.  We don't need to revisit it.
     return Aliases[0];
   }
-
+  
   // Construct a custom tailored token factor.
-  SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
-                                 &Aliases[0], Aliases.size());
-
-  // Make sure the old chain gets cleaned up.
-  if (NewChain != OldChain) AddToWorkList(OldChain.getNode());
-
-  return NewChain;
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 
+                     &Aliases[0], Aliases.size());
 }
 
 // SelectionDAG::Combine - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index cd2d5ac8ec23..8e955aff98fe 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -57,7 +57,7 @@
 using namespace llvm;
 
 unsigned FastISel::getRegForValue(Value *V) {
-  MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+  EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
   if (!RealVT.isSimple())
     return 0;
@@ -65,11 +65,11 @@ unsigned FastISel::getRegForValue(Value *V) {
   // Ignore illegal types. We must do this before looking up the value
   // in ValueMap because Arguments are given virtual registers regardless
   // of whether FastISel can handle them.
-  MVT::SimpleValueType VT = RealVT.getSimpleVT();
+  MVT VT = RealVT.getSimpleVT();
   if (!TLI.isTypeLegal(VT)) {
     // Promote MVT::i1 to a legal type though, because it's common and easy.
     if (VT == MVT::i1)
-      VT = TLI.getTypeToTransformTo(VT).getSimpleVT();
+      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
     else
       return 0;
   }
@@ -92,13 +92,14 @@ unsigned FastISel::getRegForValue(Value *V) {
   } else if (isa<ConstantPointerNull>(V)) {
     // Translate this as an integer zero so that it can be
     // local-CSE'd with actual integer zeros.
-    Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType()));
+    Reg =
+      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
   } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
     Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
 
     if (!Reg) {
       const APFloat &Flt = CF->getValueAPF();
-      MVT IntVT = TLI.getPointerTy();
+      EVT IntVT = TLI.getPointerTy();
 
       uint64_t x[2];
       uint32_t IntBitWidth = IntVT.getSizeInBits();
@@ -108,7 +109,8 @@ unsigned FastISel::getRegForValue(Value *V) {
       if (isExact) {
         APInt IntVal(IntBitWidth, 2, x);
 
-        unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal));
+        unsigned IntegerReg =
+          getRegForValue(ConstantInt::get(V->getContext(), IntVal));
         if (IntegerReg != 0)
           Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
       }
@@ -174,13 +176,11 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
   MVT PtrVT = TLI.getPointerTy();
-  MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false);
+  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT))
-    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
-                      ISD::SIGN_EXTEND, IdxN);
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
   else if (IdxVT.bitsGT(PtrVT))
-    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
-                      ISD::TRUNCATE, IdxN);
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
   return IdxN;
 }
 
@@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
 bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
-  MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);
+  EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
@@ -203,7 +203,7 @@ bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
     if (VT == MVT::i1 &&
         (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
          ISDOpcode == ISD::XOR))
-      VT = TLI.getTypeToTransformTo(VT);
+      VT = TLI.getTypeToTransformTo(I->getContext(), VT);
     else
       return false;
   }
@@ -260,7 +260,7 @@ bool FastISel::SelectGetElementPtr(User *I) {
     return false;
 
   const Type *Ty = I->getOperand(0)->getType();
-  MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT();
+  MVT VT = TLI.getPointerTy();
   for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
        OI != E; ++OI) {
     Value *Idx = *OI;
@@ -335,7 +335,7 @@ bool FastISel::SelectCall(User *I) {
     if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW
         && DW->ShouldEmitDwarfDebug()) {
       unsigned ID = 
-        DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext()));
+        DW->RecordRegionStart(RSI->getContext());
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
       BuildMI(MBB, DL, II).addImm(ID);
     }
@@ -346,7 +346,7 @@ bool FastISel::SelectCall(User *I) {
     if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW
         && DW->ShouldEmitDwarfDebug()) {
      unsigned ID = 0;
-     DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext()));
+     DISubprogram Subprogram(REI->getContext());
      if (isInlinedFnEnd(*REI, MF.getFunction())) {
         // This is end of an inlined function.
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
@@ -359,7 +359,7 @@ bool FastISel::SelectCall(User *I) {
           BuildMI(MBB, DL, II).addImm(ID);
       } else {
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-        ID =  DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext()));
+        ID =  DW->RecordRegionEnd(REI->getContext());
         BuildMI(MBB, DL, II).addImm(ID);
       }
     }
@@ -384,11 +384,10 @@ bool FastISel::SelectCall(User *I) {
       setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
       
       DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-      DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
-      unsigned LabelID = DW->RecordInlinedFnStart(SP,
-                                                  DICompileUnit(PrevLocTpl.CompileUnit),
-                                                  PrevLocTpl.Line,
-                                                  PrevLocTpl.Col);
+      DISubprogram SP(FSI->getSubprogram());
+      unsigned LabelID = 
+        DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope),
+                                 PrevLocTpl.Line, PrevLocTpl.Col);
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
       BuildMI(MBB, DL, II).addImm(LabelID);
       return true;
@@ -398,7 +397,7 @@ bool FastISel::SelectCall(User *I) {
     MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
     
     // llvm.dbg.func_start also defines beginning of function scope.
-    DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));
+    DW->RecordRegionStart(FSI->getSubprogram());
     return true;
   }
   case Intrinsic::dbg_declare: {
@@ -407,7 +406,6 @@ bool FastISel::SelectCall(User *I) {
         || !DW->ShouldEmitDwarfDebug())
       return true;
 
-    Value *Variable = DI->getVariable();
     Value *Address = DI->getAddress();
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
       Address = BCI->getOperand(0);
@@ -418,20 +416,15 @@ bool FastISel::SelectCall(User *I) {
       StaticAllocaMap.find(AI);
     if (SI == StaticAllocaMap.end()) break; // VLAs.
     int FI = SI->second;
-    
-    // Determine the debug globalvariable.
-    GlobalValue *GV = cast<GlobalVariable>(Variable);
-    
-    // Build the DECLARE instruction.
-    const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);
-    MachineInstr *DeclareMI 
-      = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);
-    DIVariable DV(cast<GlobalVariable>(GV));
-    DW->RecordVariableScope(DV, DeclareMI);
+    if (MMI)
+      MMI->setVariableDbgInfo(DI->getVariable(), FI);
+#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN
+    DW->RecordVariable(DI->getVariable(), FI);
+#endif
     return true;
   }
   case Intrinsic::eh_exception: {
-    MVT VT = TLI.getValueType(I->getType());
+    EVT VT = TLI.getValueType(I->getType());
     switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
     default: break;
     case TargetLowering::Expand: {
@@ -449,15 +442,11 @@ bool FastISel::SelectCall(User *I) {
     }
     break;
   }
-  case Intrinsic::eh_selector_i32:
-  case Intrinsic::eh_selector_i64: {
-    MVT VT = TLI.getValueType(I->getType());
+  case Intrinsic::eh_selector: {
+    EVT VT = TLI.getValueType(I->getType());
     switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
     default: break;
     case TargetLowering::Expand: {
-      MVT VT = (IID == Intrinsic::eh_selector_i32 ?
-                           MVT::i32 : MVT::i64);
-
       if (MMI) {
         if (MBB->isLandingPad())
           AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
@@ -471,12 +460,25 @@ bool FastISel::SelectCall(User *I) {
         }
 
         unsigned Reg = TLI.getExceptionSelectorRegister();
-        const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+        EVT SrcVT = TLI.getPointerTy();
+        const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
         unsigned ResultReg = createResultReg(RC);
-        bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
-                                             Reg, RC, RC);
+        bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
+                                             RC, RC);
         assert(InsertedCopy && "Can't copy address registers!");
         InsertedCopy = InsertedCopy;
+
+        // Cast the register to the type of the selector.
+        if (SrcVT.bitsGT(MVT::i32))
+          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                                 ResultReg);
+        else if (SrcVT.bitsLT(MVT::i32))
+          ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                                 ISD::SIGN_EXTEND, ResultReg);
+        if (ResultReg == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+
         UpdateValueMap(I, ResultReg);
       } else {
         unsigned ResultReg =
@@ -493,8 +495,8 @@ bool FastISel::SelectCall(User *I) {
 }
 
 bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
-  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
     
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple())
@@ -524,14 +526,14 @@ bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
 
   // If the operand is i1, arrange for the high bits in the register to be zero.
   if (SrcVT == MVT::i1) {
-   SrcVT = TLI.getTypeToTransformTo(SrcVT);
+   SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
    InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
    if (!InputReg)
      return false;
   }
   // If the result is i1, truncate to the target's type for i1 first.
   if (DstVT == MVT::i1)
-    DstVT = TLI.getTypeToTransformTo(DstVT);
+    DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
 
   unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
                                   DstVT.getSimpleVT(),
@@ -555,8 +557,8 @@ bool FastISel::SelectBitCast(User *I) {
   }
 
   // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
-  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
   
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple() ||
@@ -616,6 +618,49 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
   MBB->addSuccessor(MSucc);
 }
 
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(User *I) {
+  unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+  if (OpReg == 0) return false;
+
+  // If the target has ISD::FNEG, use it.
+  EVT VT = TLI.getValueType(I->getType());
+  unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+                                  ISD::FNEG, OpReg);
+  if (ResultReg != 0) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  // Bitcast the value to integer, twiddle the sign bit with xor,
+  // and then bitcast it back to floating-point.
+  if (VT.getSizeInBits() > 64) return false;
+  EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+  if (!TLI.isTypeLegal(IntVT))
+    return false;
+
+  unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+                               ISD::BIT_CONVERT, OpReg);
+  if (IntReg == 0)
+    return false;
+
+  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+                                       UINT64_C(1) << (VT.getSizeInBits()-1),
+                                       IntVT.getSimpleVT());
+  if (IntResultReg == 0)
+    return false;
+
+  ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+                         ISD::BIT_CONVERT, IntResultReg);
+  if (ResultReg == 0)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
 bool
 FastISel::SelectOperator(User *I, unsigned Opcode) {
   switch (Opcode) {
@@ -626,6 +671,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
   case Instruction::Sub:
     return SelectBinaryOp(I, ISD::SUB);
   case Instruction::FSub:
+    // FNeg is currently represented in LLVM IR as a special case of FSub.
+    if (BinaryOperator::isFNeg(I))
+      return SelectFNeg(I);
     return SelectBinaryOp(I, ISD::FSUB);
   case Instruction::Mul:
     return SelectBinaryOp(I, ISD::MUL);
@@ -709,8 +757,8 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
 
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
-    MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-    MVT DstVT = TLI.getValueType(I->getType());
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
     if (DstVT.bitsGT(SrcVT))
       return SelectCast(I, ISD::ZERO_EXTEND);
     if (DstVT.bitsLT(SrcVT))
@@ -758,45 +806,44 @@ FastISel::FastISel(MachineFunction &mf,
 
 FastISel::~FastISel() {}
 
-unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_(MVT, MVT,
                              ISD::NodeType) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_r(MVT, MVT,
                               ISD::NodeType, unsigned /*Op0*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, 
+unsigned FastISel::FastEmit_rr(MVT, MVT, 
                                ISD::NodeType, unsigned /*Op0*/,
                                unsigned /*Op0*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType,
-                              ISD::NodeType, uint64_t /*Imm*/) {
+unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_f(MVT, MVT,
                               ISD::NodeType, ConstantFP * /*FPImm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_ri(MVT, MVT,
                                ISD::NodeType, unsigned /*Op0*/,
                                uint64_t /*Imm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_rf(MVT, MVT,
                                ISD::NodeType, unsigned /*Op0*/,
                                ConstantFP * /*FPImm*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,
+unsigned FastISel::FastEmit_rri(MVT, MVT,
                                 ISD::NodeType,
                                 unsigned /*Op0*/, unsigned /*Op1*/,
                                 uint64_t /*Imm*/) {
@@ -807,9 +854,9 @@ unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,
 /// to emit an instruction with an immediate operand using FastEmit_ri.
 /// If that fails, it materializes the immediate into a register and try
 /// FastEmit_rr instead.
-unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode,
                                 unsigned Op0, uint64_t Imm,
-                                MVT::SimpleValueType ImmType) {
+                                MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the ri form.
   unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
   if (ResultReg != 0)
@@ -824,9 +871,9 @@ unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
 /// to emit an instruction with a floating-point immediate operand using
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode,
                                 unsigned Op0, ConstantFP *FPImm,
-                                MVT::SimpleValueType ImmType) {
+                                MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
   unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
   if (ResultReg != 0)
@@ -842,7 +889,7 @@ unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
     // be replaced by code that creates a load from a constant-pool entry,
     // which will require some target-specific work.
     const APFloat &Flt = FPImm->getValueAPF();
-    MVT IntVT = TLI.getPointerTy();
+    EVT IntVT = TLI.getPointerTy();
 
     uint64_t x[2];
     uint32_t IntBitWidth = IntVT.getSizeInBits();
@@ -987,7 +1034,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
                                               unsigned Op0, uint32_t Idx) {
   const TargetRegisterClass* RC = MRI.getRegClass(Op0);
   
@@ -1008,6 +1055,6 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
 
 /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
 /// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) {
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
   return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
 }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 000000000000..d3ffb2a22d93
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,693 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Flag)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional flag operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VRBase = 0;
+  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+    // Just use the input register directly!
+    SDValue Op(Node, ResNo);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+    return;
+  }
+
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  bool MatchReg = true;
+  const TargetRegisterClass *UseRC = NULL;
+  if (!IsClone && !IsCloned)
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      bool Match = true;
+      if (User->getOpcode() == ISD::CopyToReg && 
+          User->getOperand(2).getNode() == Node &&
+          User->getOperand(2).getResNo() == ResNo) {
+        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+          VRBase = DestReg;
+          Match = false;
+        } else if (DestReg != SrcReg)
+          Match = false;
+      } else {
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          SDValue Op = User->getOperand(i);
+          if (Op.getNode() != Node || Op.getResNo() != ResNo)
+            continue;
+          EVT VT = Node->getValueType(Op.getResNo());
+          if (VT == MVT::Other || VT == MVT::Flag)
+            continue;
+          Match = false;
+          if (User->isMachineOpcode()) {
+            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+            const TargetRegisterClass *RC = 0;
+            if (i+II.getNumDefs() < II.getNumOperands())
+              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+            if (!UseRC)
+              UseRC = RC;
+            else if (RC) {
+              const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+              // If multiple uses expect disjoint register classes, we emit
+              // copies in AddRegisterOperand.
+              if (ComRC)
+                UseRC = ComRC;
+            }
+          }
+        }
+      }
+      MatchReg &= Match;
+      if (VRBase)
+        break;
+    }
+
+  EVT VT = Node->getValueType(ResNo);
+  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+  SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+  
+  // Figure out the register class to create for the destreg.
+  if (VRBase) {
+    DstRC = MRI->getRegClass(VRBase);
+  } else if (UseRC) {
+    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+    DstRC = UseRC;
+  } else {
+    DstRC = TLI->getRegClassFor(VT);
+  }
+    
+  // If all uses are reading from the src physical register and copying the
+  // register is either impossible or very expensive, then don't create a copy.
+  if (MatchReg && SrcRC->getCopyCost() < 0) {
+    VRBase = SrcReg;
+  } else {
+    // Create the reg, emit the copy.
+    VRBase = MRI->createVirtualRegister(DstRC);
+    bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
+                                     DstRC, SrcRC);
+
+    assert(Emitted && "Unable to issue a copy instruction!\n");
+    (void) Emitted;
+  }
+
+  SDValue Op(Node, ResNo);
+  if (IsClone)
+    VRBaseMap.erase(Op);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                                unsigned ResNo) const {
+  if (!Node->hasOneUse())
+    return 0;
+
+  SDNode *User = *Node->use_begin();
+  if (User->getOpcode() == ISD::CopyToReg && 
+      User->getOperand(2).getNode() == Node &&
+      User->getOperand(2).getResNo() == ResNo) {
+    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return Reg;
+  }
+  return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                                       const TargetInstrDesc &II,
+                                       bool IsClone, bool IsCloned,
+                                       DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&
+         "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+  for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg in the same register class, use the CopyToReg'd destination
+    // register instead of creating a new vreg.
+    unsigned VRBase = 0;
+    const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+    if (II.OpInfo[i].isOptionalDef()) {
+      // Optional def must be a physical register.
+      unsigned NumResults = CountResults(Node);
+      VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    if (!VRBase && !IsClone && !IsCloned)
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *User = *UI;
+        if (User->getOpcode() == ISD::CopyToReg && 
+            User->getOperand(2).getNode() == Node &&
+            User->getOperand(2).getResNo() == i) {
+          unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+            const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+            if (RegRC == RC) {
+              VRBase = Reg;
+              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              break;
+            }
+          }
+        }
+      }
+
+    // Create the result registers for this node and add the result regs to
+    // the machine instruction.
+    if (VRBase == 0) {
+      assert(RC && "Isn't a register operand!");
+      VRBase = MRI->createVirtualRegister(RC);
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    SDValue Op(Node, i);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+                             DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode() &&
+      Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+    // Add an IMPLICIT_DEF instruction before every use.
+    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+    // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+    // does not include operand register class info.
+    if (!VReg) {
+      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      VReg = MRI->createVirtualRegister(RC);
+    }
+    BuildMI(MBB, Op.getDebugLoc(),
+            TII->get(TargetInstrInfo::IMPLICIT_DEF), VReg);
+    return VReg;
+  }
+
+  DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                                 unsigned IIOpNum,
+                                 const TargetInstrDesc *II,
+                                 DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Op.getValueType() != MVT::Other &&
+         Op.getValueType() != MVT::Flag &&
+         "Chain and flag operands should occur at end of operand list!");
+  // Get/emit the operand.
+  unsigned VReg = getVR(Op, VRBaseMap);
+  assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  bool isOptDef = IIOpNum < TID.getNumOperands() &&
+    TID.OpInfo[IIOpNum].isOptionalDef();
+
+  // If the instruction requires a register in a different class, create
+  // a new virtual register and copy the value into it.
+  if (II) {
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *DstRC = 0;
+    if (IIOpNum < II->getNumOperands())
+      DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
+    assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+           "Don't have operand info for this instruction!");
+    if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+      bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+                                       DstRC, SrcRC);
+      assert(Emitted && "Unable to issue a copy instruction!\n");
+      (void) Emitted;
+      VReg = NewVReg;
+    }
+  }
+
+  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+                              unsigned IIOpNum,
+                              const TargetInstrDesc *II,
+                              DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode()) {
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+  } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+  } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+    const ConstantFP *CFP = F->getConstantFPValue();
+    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+  } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
+  } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+  } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+  } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM->getTargetData()->getTypeAllocSize(Type);
+      }
+    }
+    
+    unsigned Idx;
+    MachineConstantPool *MCP = MF->getConstantPool();
+    if (CP->isMachineConstantPoolEntry())
+      Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
+  } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+                                            ES->getTargetFlags()));
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Flag &&
+           "Chain and flag operands should occur at end of operand list!");
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+  }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+                         unsigned SubIdx, EVT VT) {
+  // Pick the register class of the superegister for this type
+  for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+         E = TRC->superregclasses_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node, 
+                                  DenseMap<SDValue, unsigned> &VRBaseMap){
+  unsigned VRBase = 0;
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+       UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() == ISD::CopyToReg && 
+        User->getOperand(2).getNode() == Node) {
+      unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+        VRBase = DestReg;
+        break;
+      }
+    }
+  }
+  
+  if (Opc == TargetInstrInfo::EXTRACT_SUBREG) {
+    unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+    // Create the extract_subreg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetInstrInfo::EXTRACT_SUBREG));
+
+    // Figure out the register class to create for the destreg.
+    unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+    const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+    assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Add def, source, and subreg index
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else if (Opc == TargetInstrInfo::INSERT_SUBREG ||
+             Opc == TargetInstrInfo::SUBREG_TO_REG) {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+    unsigned SubReg = getVR(N1, VRBaseMap);
+    unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx,
+                               Node->getValueType(0));
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Create the insert_subreg or subreg_to_reg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    
+    // If creating a subreg_to_reg, then the first input operand
+    // is an implicit value immediate, otherwise it's a register
+    if (Opc == TargetInstrInfo::SUBREG_TO_REG) {
+      const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+    } else
+      AddOperand(MI, N0, 0, 0, VRBaseMap);
+    // Add the subregster being inserted
+    AddOperand(MI, N1, 0, 0, VRBaseMap);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else
+    llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+     
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+                                     DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+  const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+  const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+
+  // Create the new VReg in the destination class and emit a copy.
+  unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+  bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+                                   DstRC, SrcRC);
+  assert(Emitted &&
+         "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
+  (void) Emitted;
+
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                         DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  // If machine instruction
+  if (Node->isMachineOpcode()) {
+    unsigned Opc = Node->getMachineOpcode();
+    
+    // Handle subreg insert/extract specially
+    if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 
+        Opc == TargetInstrInfo::INSERT_SUBREG ||
+        Opc == TargetInstrInfo::SUBREG_TO_REG) {
+      EmitSubregNode(Node, VRBaseMap);
+      return;
+    }
+
+    // Handle COPY_TO_REGCLASS specially.
+    if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) {
+      EmitCopyToRegClassNode(Node, VRBaseMap);
+      return;
+    }
+
+    if (Opc == TargetInstrInfo::IMPLICIT_DEF)
+      // We want a unique VR for each IMPLICIT_DEF use.
+      return;
+    
+    const TargetInstrDesc &II = TII->get(Opc);
+    unsigned NumResults = CountResults(Node);
+    unsigned NodeOperands = CountOperands(Node);
+    bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
+                          II.getImplicitDefs() != 0;
+#ifndef NDEBUG
+    unsigned NumMIOperands = NodeOperands + NumResults;
+    assert((II.getNumOperands() == NumMIOperands ||
+            HasPhysRegOuts || II.isVariadic()) &&
+           "#operands for dag node doesn't match .td file!"); 
+#endif
+
+    // Create the new machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+    
+    // Add result register values for things that are defined by this
+    // instruction.
+    if (NumResults)
+      CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+    
+    // Emit all of the actual operands of this instruction, adding them to the
+    // instruction as appropriate.
+    bool HasOptPRefs = II.getNumDefs() > NumResults;
+    assert((!HasOptPRefs || !HasPhysRegOuts) &&
+           "Unable to cope with optional defs and phys regs defs!");
+    unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+    for (unsigned i = NumSkip; i != NodeOperands; ++i)
+      AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+                 VRBaseMap);
+
+    // Transfer all of the memory reference descriptions of this instruction.
+    MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+                   cast<MachineSDNode>(Node)->memoperands_end());
+
+    if (II.usesCustomDAGSchedInsertionHook()) {
+      // Insert this instruction into the basic block using a target
+      // specific inserter which may returns a new basic block.
+      MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+      InsertPos = MBB->end();
+    } else {
+      MBB->insert(InsertPos, MI);
+    }
+
+    // Additional results must be an physical register def.
+    if (HasPhysRegOuts) {
+      for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+        unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+        if (Node->hasAnyUseOfValue(i))
+          EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+      }
+    }
+    return;
+  }
+
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    Node->dump();
+#endif
+    llvm_unreachable("This target-independent node should have been selected!");
+    break;
+  case ISD::EntryToken:
+    llvm_unreachable("EntryToken should have been excluded from the schedule!");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor: // fall thru
+    break;
+  case ISD::CopyToReg: {
+    unsigned SrcReg;
+    SDValue SrcVal = Node->getOperand(2);
+    if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+      SrcReg = R->getReg();
+    else
+      SrcReg = getVR(SrcVal, VRBaseMap);
+      
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+      break;
+      
+    const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
+    // Get the register classes of the src/dst.
+    if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+      SrcTRC = MRI->getRegClass(SrcReg);
+    else
+      SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
+
+    if (TargetRegisterInfo::isVirtualRegister(DestReg))
+      DstTRC = MRI->getRegClass(DestReg);
+    else
+      DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
+                                            Node->getOperand(1).getValueType());
+
+    bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
+                                     DstTRC, SrcTRC);
+    assert(Emitted && "Unable to issue a copy instruction!\n");
+    (void) Emitted;
+    break;
+  }
+  case ISD::CopyFromReg: {
+    unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+    break;
+  }
+  case ISD::INLINEASM: {
+    unsigned NumOps = Node->getNumOperands();
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+      --NumOps;  // Ignore the flag operand.
+      
+    // Create the inline asm machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetInstrInfo::INLINEASM));
+
+    // Add the asm string as an external symbol operand.
+    const char *AsmStr =
+      cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+    MI->addOperand(MachineOperand::CreateES(AsmStr));
+      
+    // Add all of the operand registers to the instruction.
+    for (unsigned i = 2; i != NumOps;) {
+      unsigned Flags =
+        cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        
+      MI->addOperand(MachineOperand::CreateImm(Flags));
+      ++i;  // Skip the ID value.
+        
+      switch (Flags & 7) {
+      default: llvm_unreachable("Bad flags!");
+      case 2:   // Def of register.
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, true));
+        }
+        break;
+      case 6:   // Def of earlyclobber register.
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
+                                                   false, false, true));
+        }
+        break;
+      case 1:  // Use of register.
+      case 3:  // Immediate.
+      case 4:  // Addressing mode.
+        // The addressing mode has been selected, just add all of the
+        // operands to the machine instruction.
+        for (; NumVals; --NumVals, ++i)
+          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+        break;
+      }
+    }
+    MBB->insert(InsertPos, MI);
+    break;
+  }
+  }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+                           MachineBasicBlock::iterator insertpos)
+  : MF(mbb->getParent()),
+    MRI(&MF->getRegInfo()),
+    TM(&MF->getTarget()),
+    TII(TM->getInstrInfo()),
+    TRI(TM->getRegisterInfo()),
+    TLI(TM->getTargetLowering()),
+    MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 000000000000..bb4634d04b2a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,119 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class TargetInstrDesc;
+
+class InstrEmitter {
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+  const TargetMachine *TM;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+
+  MachineBasicBlock *MBB;
+  MachineBasicBlock::iterator InsertPos;
+
+  /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+  /// implicit physical register output.
+  void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+                       bool IsClone, bool IsCloned,
+                       unsigned SrcReg,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getDstOfCopyToRegUse - If the only use of the specified result number of
+  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                    unsigned ResNo) const;
+
+  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                              const TargetInstrDesc &II,
+                              bool IsClone, bool IsCloned,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getVR - Return the virtual register corresponding to the specified result
+  /// of the specified node.
+  unsigned getVR(SDValue Op,
+                 DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddRegisterOperand - Add the specified register as an operand to the
+  /// specified machine instr. Insert register copies if the register is
+  /// not in the required register class.
+  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                          unsigned IIOpNum,
+                          const TargetInstrDesc *II,
+                          DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddOperand - Add the specified operand to the specified machine instr.  II
+  /// specifies the instruction information for the node, and IIOpNum is the
+  /// operand number (in the II) that we are adding. IIOpNum and II are used for
+  /// assertions only.
+  void AddOperand(MachineInstr *MI, SDValue Op,
+                  unsigned IIOpNum,
+                  const TargetInstrDesc *II,
+                  DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitSubregNode - Generate machine code for subreg nodes.
+  ///
+  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+  /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+  /// register is constrained to be in a particular register class.
+  ///
+  void EmitCopyToRegClassNode(SDNode *Node,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+public:
+  /// CountResults - The results of target nodes have register or immediate
+  /// operands first, then an optional chain, and optional flag operands
+  /// (which do not go into the machine instrs.)
+  static unsigned CountResults(SDNode *Node);
+
+  /// CountOperands - The inputs to target nodes have any actual inputs first,
+  /// followed by an optional chain operand, then flag operands.  Compute
+  /// the number of actual operands that will go into the resulting
+  /// MachineInstr.
+  static unsigned CountOperands(SDNode *Node);
+
+  /// EmitNode - Generate machine code for an node and needed dependencies.
+  ///
+  void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap,
+                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+
+  /// getBlock - Return the current basic block.
+  MachineBasicBlock *getBlock() { return MBB; }
+
+  /// getInsertPos - Return the current insertion position.
+  MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+  /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+  /// at the given position in the given block.
+  InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1413d9552d0e..fc01b07f65a2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -30,9 +30,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -98,13 +101,14 @@ public:
   /// getTypeAction - Return how we should legalize values of this type, either
   /// it is already legal or we need to expand it into multiple registers of
   /// smaller integer type, or we need to promote it to a larger type.
-  LegalizeAction getTypeAction(MVT VT) const {
-    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+  LegalizeAction getTypeAction(EVT VT) const {
+    return
+        (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
   ///
-  bool isTypeLegal(MVT VT) const {
+  bool isTypeLegal(EVT VT) const {
     return getTypeAction(VT) == Legal;
   }
 
@@ -131,14 +135,14 @@ private:
   /// performs the same shuffe in terms of order or result bytes, but on a type
   /// whose vector element type is narrower than the original shuffle type.
   /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
-  SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,
+  SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
                                      SDValue N1, SDValue N2, 
                                      SmallVectorImpl<int> &Mask) const;
 
   bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
 
-  void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
                              DebugLoc dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
@@ -149,18 +153,18 @@ private:
                            RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
 
-  SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl);
+  SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
   SDValue ExpandDBG_STOPPOINT(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
                                 SmallVectorImpl<SDValue> &Results);
   SDValue ExpandFCOPYSIGN(SDNode *Node);
-  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT,
+  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
                                DebugLoc dl);
-  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned,
+  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 DebugLoc dl);
-  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned,
+  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 DebugLoc dl);
 
   SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
@@ -179,10 +183,10 @@ private:
 /// whose vector element type is narrower than the original shuffle type.
 /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
 SDValue 
-SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT,  DebugLoc dl, 
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl, 
                                                  SDValue N1, SDValue N2,
                                              SmallVectorImpl<int> &Mask) const {
-  MVT EltVT = NVT.getVectorElementType();
+  EVT EltVT = NVT.getVectorElementType();
   unsigned NumMaskElts = VT.getVectorNumElements();
   unsigned NumDestElts = NVT.getVectorNumElements();
   unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -342,7 +346,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
   // double.  This shrinks FP constants and canonicalizes them for targets where
   // an FP extending load is the same cost as a normal load (such as on the x87
   // fp stack or PPC FP unit).
-  MVT VT = CFP->getValueType(0);
+  EVT VT = CFP->getValueType(0);
   ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
   if (!UseCP) {
     assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
@@ -350,16 +354,16 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
                            (VT == MVT::f64) ? MVT::i64 : MVT::i32);
   }
 
-  MVT OrigVT = VT;
-  MVT SVT = VT;
+  EVT OrigVT = VT;
+  EVT SVT = VT;
   while (SVT != MVT::f32) {
-    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1);
+    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
     if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
         // Only do this if the target has a native EXTLOAD instruction from
         // smaller type.
         TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
         TLI.ShouldShrinkFPConstant(OrigVT)) {
-      const Type *SType = SVT.getTypeForMVT();
+      const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
       LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
       VT = SVT;
       Extend = true;
@@ -384,13 +388,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
   SDValue Val = ST->getValue();
-  MVT VT = Val.getValueType();
+  EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
   int SVOffset = ST->getSrcValueOffset();
   DebugLoc dl = ST->getDebugLoc();
   if (ST->getMemoryVT().isFloatingPoint() ||
       ST->getMemoryVT().isVector()) {
-    MVT intVT = MVT::getIntegerVT(VT.getSizeInBits());
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
     if (TLI.isTypeLegal(intVT)) {
       // Expand to a bitconvert of the value to the integer type of the
       // same size, then a (misaligned) int store.
@@ -401,9 +405,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
-      MVT StoredVT = ST->getMemoryVT();
-      MVT RegVT =
-        TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits()));
+      EVT StoredVT = ST->getMemoryVT();
+      EVT RegVT =
+        TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
       unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
@@ -437,7 +441,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // The last store may be partial.  Do a truncating store.  On big-endian
       // machines this requires an extending load from the stack slot to ensure
       // that the bits are in the right place.
-      MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
@@ -456,8 +460,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
          !ST->getMemoryVT().isVector() &&
          "Unaligned store of unknown type.");
   // Get the half-size VT
-  MVT NewStoredVT =
-    (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1);
+  EVT NewStoredVT =
+    (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT().SimpleTy - 1);
   int NumBits = NewStoredVT.getSizeInBits();
   int IncrementSize = NumBits / 8;
 
@@ -488,11 +492,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   int SVOffset = LD->getSrcValueOffset();
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
-  MVT VT = LD->getValueType(0);
-  MVT LoadedVT = LD->getMemoryVT();
+  EVT VT = LD->getValueType(0);
+  EVT LoadedVT = LD->getMemoryVT();
   DebugLoc dl = LD->getDebugLoc();
   if (VT.isFloatingPoint() || VT.isVector()) {
-    MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits());
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
     if (TLI.isTypeLegal(intVT)) {
       // Expand to a (misaligned) integer load of the same size,
       // then bitconvert to floating point or vector.
@@ -508,7 +512,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     } else {
       // Copy the value to a (aligned) stack slot using (unaligned) integer
       // loads and stores, then do a (aligned) load from the stack slot.
-      MVT RegVT = TLI.getRegisterType(intVT);
+      EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
       unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
@@ -538,7 +542,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       }
 
       // The last copy may be partial.  Do an extending load.
-      MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
@@ -568,8 +572,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   // Compute the new VT that is half the size of the old one.  This is an
   // integer MVT.
   unsigned NumBits = LoadedVT.getSizeInBits();
-  MVT NewLoadedVT;
-  NewLoadedVT = MVT::getIntegerVT(NumBits/2);
+  EVT NewLoadedVT;
+  NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
   NumBits >>= 1;
 
   unsigned Alignment = LD->getAlignment();
@@ -629,10 +633,10 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   // with a "move to register" or "extload into register" instruction, then
   // permute it into place, if the idx is a constant and if the idx is
   // supported by the target.
-  MVT VT    = Tmp1.getValueType();
-  MVT EltVT = VT.getVectorElementType();
-  MVT IdxVT = Tmp3.getValueType();
-  MVT PtrVT = TLI.getPointerTy();
+  EVT VT    = Tmp1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Tmp3.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
   SDValue StackPtr = DAG.CreateStackTemporary(VT);
 
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -663,7 +667,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
     // SCALAR_TO_VECTOR requires that the type of the value being inserted
     // match the element type of the vector being created, except for
     // integers in which case the inserted value can be over width.
-    MVT EltVT = Vec.getValueType().getVectorElementType();
+    EVT EltVT = Vec.getValueType().getVectorElementType();
     if (Val.getValueType() == EltVT ||
         (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
       SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -785,7 +789,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   case ISD::FP_ROUND_INREG:
   case ISD::SIGN_EXTEND_INREG: {
-    MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
     Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
     break;
   }
@@ -795,7 +799,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
                          Node->getOpcode() == ISD::SETCC ? 2 : 1;
     unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
-    MVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
     ISD::CondCode CCCode =
         cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
     Action = TLI.getCondCodeAction(CCCode, OpVT);
@@ -821,11 +825,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // special case should be done as part of making LegalizeDAG non-recursive.
     SimpleFinishLegalizing = false;
     break;
-  case ISD::CALL:
-    // FIXME: Legalization for calls requires custom-lowering the call before
-    // legalizing the operands!  (I haven't looked into precisely why.)
-    SimpleFinishLegalizing = false;
-    break;
   case ISD::EXTRACT_ELEMENT:
   case ISD::FLT_ROUNDS_:
   case ISD::SADDO:
@@ -847,7 +846,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::TRAMPOLINE:
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
-  case ISD::FORMAL_ARGUMENTS:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be custom-lowered.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -885,7 +883,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::BR_JT:
     case ISD::BR_CC:
     case ISD::BRCOND:
-    case ISD::RET:
       // Branches tweak the chain to include LastCALLSEQ_END
       Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
                             LastCALLSEQ_END);
@@ -902,6 +899,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       if (!Ops[1].getValueType().isVector())
         Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
       break;
+    case ISD::SRL_PARTS:
+    case ISD::SRA_PARTS:
+    case ISD::SHL_PARTS:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[2].getValueType().isVector())
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+      break;
     }
 
     Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
@@ -946,44 +951,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   switch (Node->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+    errs() << "NODE: ";
+    Node->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to legalize this operator!");
-    abort();
-  case ISD::CALL:
-    // The only option for this is to custom lower it.
-    Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
-    assert(Tmp3.getNode() && "Target didn't custom lower this node!");
-    // A call within a calling sequence must be legalized to something
-    // other than the normal CALLSEQ_END.  Violating this gets Legalize
-    // into an infinite loop.
-    assert ((!IsLegalizingCall ||
-             Node->getOpcode() != ISD::CALL ||
-             Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) &&
-            "Nested CALLSEQ_START..CALLSEQ_END not supported.");
-
-    // The number of incoming and outgoing values should match; unless the final
-    // outgoing value is a flag.
-    assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() ||
-            (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 &&
-             Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) ==
-               MVT::Flag)) &&
-           "Lowering call/formal_arguments produced unexpected # results!");
-
-    // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
-    // remember that we legalized all of them, so it doesn't get relegalized.
-    for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) {
-      if (Tmp3.getNode()->getValueType(i) == MVT::Flag)
-        continue;
-      Tmp1 = LegalizeOp(Tmp3.getValue(i));
-      if (Op.getResNo() == i)
-        Tmp2 = Tmp1;
-      AddLegalizedOperand(SDValue(Node, i), Tmp1);
-    }
-    return Tmp2;
+    llvm_unreachable("Do not know how to legalize this operator!");
+
   case ISD::BUILD_VECTOR:
     switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: assert(0 && "This action is not supported yet!");
+    default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
       Tmp3 = TLI.LowerOperation(Result, DAG);
       if (Tmp3.getNode()) {
@@ -1094,22 +1070,22 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     ISD::LoadExtType ExtType = LD->getExtensionType();
     if (ExtType == ISD::NON_EXTLOAD) {
-      MVT VT = Node->getValueType(0);
+      EVT VT = Node->getValueType(0);
       Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
       Tmp3 = Result.getValue(0);
       Tmp4 = Result.getValue(1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
-        if (!TLI.allowsUnalignedMemoryAccesses()) {
-          unsigned ABIAlignment = TLI.getTargetData()->
-            getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+        if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+          const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
           if (LD->getAlignment() < ABIAlignment){
-            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
-                                         TLI);
+            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), 
+                                         DAG, TLI);
             Tmp3 = Result.getOperand(0);
             Tmp4 = Result.getOperand(1);
             Tmp3 = LegalizeOp(Tmp3);
@@ -1128,7 +1104,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Only promote a load of vector type to another.
         assert(VT.isVector() && "Cannot promote this load!");
         // Change base type to a different vector type.
-        MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+        EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                            LD->getSrcValueOffset(),
@@ -1144,7 +1120,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       AddLegalizedOperand(SDValue(Node, 1), Tmp4);
       return Op.getResNo() ? Tmp4 : Tmp3;
     } else {
-      MVT SrcVT = LD->getMemoryVT();
+      EVT SrcVT = LD->getMemoryVT();
       unsigned SrcWidth = SrcVT.getSizeInBits();
       int SVOffset = LD->getSrcValueOffset();
       unsigned Alignment = LD->getAlignment();
@@ -1163,7 +1139,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Promote to a byte-sized load if not loading an integral number of
         // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
         unsigned NewWidth = SrcVT.getStoreSizeInBits();
-        MVT NVT = MVT::getIntegerVT(NewWidth);
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
         SDValue Ch;
 
         // The extra bits are guaranteed to be zero, since we stored them that
@@ -1201,8 +1177,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         assert(ExtraWidth < RoundWidth);
         assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
                "Load size not an integral number of bytes!");
-        MVT RoundVT = MVT::getIntegerVT(RoundWidth);
-        MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
         SDValue Lo, Hi, Ch;
         unsigned IncrementSize;
 
@@ -1269,7 +1245,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Tmp2 = LegalizeOp(Ch);
       } else {
         switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Custom:
           isCustom = true;
           // FALLTHROUGH
@@ -1287,12 +1263,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           } else {
             // If this is an unaligned load and the target doesn't support it,
             // expand it.
-            if (!TLI.allowsUnalignedMemoryAccesses()) {
-              unsigned ABIAlignment = TLI.getTargetData()->
-                getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+              const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+              unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
               if (LD->getAlignment() < ABIAlignment){
-                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
-                                             TLI);
+                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), 
+                                             DAG, TLI);
                 Tmp1 = Result.getOperand(0);
                 Tmp2 = Result.getOperand(1);
                 Tmp1 = LegalizeOp(Tmp1);
@@ -1303,10 +1279,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           break;
         case TargetLowering::Expand:
           // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
-          if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+          // f128 = EXTLOAD {f32,f64} too
+          if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
+                                     Node->getValueType(0) == MVT::f128)) ||
+              (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
             SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
-                                         LD->getSrcValueOffset(),
-                                         LD->isVolatile(), LD->getAlignment());
+                                       LD->getSrcValueOffset(),
+                                       LD->isVolatile(), LD->getAlignment());
             Result = DAG.getNode(ISD::FP_EXTEND, dl,
                                  Node->getValueType(0), Load);
             Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
@@ -1359,18 +1338,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
                                         ST->getOffset());
 
-        MVT VT = Tmp3.getValueType();
+        EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsUnalignedMemoryAccesses()) {
-            unsigned ABIAlignment = TLI.getTargetData()->
-              getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
-                                            TLI);
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
           }
           break;
         case TargetLowering::Custom:
@@ -1391,14 +1370,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     } else {
       Tmp3 = LegalizeOp(ST->getValue());
 
-      MVT StVT = ST->getMemoryVT();
+      EVT StVT = ST->getMemoryVT();
       unsigned StWidth = StVT.getSizeInBits();
 
       if (StWidth != StVT.getStoreSizeInBits()) {
         // Promote to a byte-sized store with upper bits zero if not
         // storing an integral number of bytes.  For example, promote
         // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
-        MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits());
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                    SVOffset, NVT, isVolatile, Alignment);
@@ -1412,8 +1391,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         assert(ExtraWidth < RoundWidth);
         assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
                "Store size not an integral number of bytes!");
-        MVT RoundVT = MVT::getIntegerVT(RoundWidth);
-        MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
         SDValue Lo, Hi;
         unsigned IncrementSize;
 
@@ -1460,16 +1439,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                           ST->getOffset());
 
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsUnalignedMemoryAccesses()) {
-            unsigned ABIAlignment = TLI.getTargetData()->
-              getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
-                                            TLI);
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
           }
           break;
         case TargetLowering::Custom:
@@ -1522,7 +1501,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
 
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
-  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+  if (Op.getValueType().isVector())
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+  else
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                          NULL, 0, Vec.getValueType().getVectorElementType());
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1530,8 +1513,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // aligned object on the stack, store each element into it, then load
   // the result as a vector.
   // Create the stack frame object.
-  MVT VT = Node->getValueType(0);
-  MVT OpVT = Node->getOperand(0).getValueType();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
@@ -1574,7 +1557,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
           "Ugly special-cased code!");
   // Get the sign bit of the RHS.
   SDValue SignBit;
-  MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+  EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
   if (isTypeLegal(IVT)) {
     SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
   } else {
@@ -1613,9 +1596,8 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
   bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);
 
   const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);
-  GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit());
-  if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) {
-    DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit()));
+  MDNode *CU_Node = DSP->getCompileUnit();
+  if (DW && (useDEBUG_LOC || useLABEL)) {
 
     unsigned Line = DSP->getLine();
     unsigned Col = DSP->getColumn();
@@ -1627,9 +1609,9 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
         return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),
                            DAG.getConstant(Line, MVT::i32),
                            DAG.getConstant(Col, MVT::i32),
-                           DAG.getSrcValue(CU.getGV()));
+                           DAG.getSrcValue(CU_Node));
       } else {
-        unsigned ID = DW->RecordSourceLine(Line, Col, CU);
+        unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node);
         return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);
       }
     }
@@ -1643,7 +1625,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
           " not tell us which reg is the stack pointer!");
   DebugLoc dl = Node->getDebugLoc();
-  MVT VT = Node->getValueType(0);
+  EVT VT = Node->getValueType(0);
   SDValue Tmp1 = SDValue(Node, 0);
   SDValue Tmp2 = SDValue(Node, 1);
   SDValue Tmp3 = Node->getOperand(2);
@@ -1676,14 +1658,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
 /// condition code CC on the current target. This routine assumes LHS and rHS
 /// have already been legalized by LegalizeSetCCOperands. It expands SETCC with
 /// illegal condition code into AND / OR of multiple SETCC values.
-void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
                                                  SDValue &LHS, SDValue &RHS,
                                                  SDValue &CC,
                                                  DebugLoc dl) {
-  MVT OpVT = LHS.getValueType();
+  EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: assert(0 && "Unknown condition code action!");
+  default: llvm_unreachable("Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1691,7 +1673,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: assert(0 && "Don't know how to expand this condition!"); abort();
+    default: llvm_unreachable("Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -1722,13 +1704,13 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
 /// a load from the stack slot to DestVT, extending it if needed.
 /// The resultant code need not be legal.
 SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
-                                               MVT SlotVT,
-                                               MVT DestVT,
+                                               EVT SlotVT,
+                                               EVT DestVT,
                                                DebugLoc dl) {
   // Create the stack frame object.
   unsigned SrcAlign =
     TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
-                                              getTypeForMVT());
+                                              getTypeForEVT(*DAG.getContext()));
   SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
 
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
@@ -1739,7 +1721,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   unsigned DestAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT());
+    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1787,9 +1769,9 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
   unsigned NumElems = Node->getNumOperands();
   SDValue Value1, Value2;
   DebugLoc dl = Node->getDebugLoc();
-  MVT VT = Node->getValueType(0);
-  MVT OpVT = Node->getOperand(0).getValueType();
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
 
   // If the only non-undef value is the low element, turn this into a
   // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
@@ -1833,7 +1815,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
         CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
       } else {
         assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
-        const Type *OpNTy = OpVT.getTypeForMVT();
+        const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext());
         CV.push_back(UndefValue::get(OpNTy));
       }
     }
@@ -1886,8 +1868,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-    MVT ArgVT = Node->getOperand(i).getValueType();
-    const Type *ArgTy = ArgVT.getTypeForMVT();
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
     Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
@@ -1897,10 +1879,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                          TLI.getPointerTy());
 
   // Splice the libcall in wherever FindInputOutputChains tells us to.
-  const Type *RetTy = Node->getValueType(0).getTypeForMVT();
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, CallingConv::C, false, Callee, Args, DAG,
+                    0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG,
                     Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
@@ -1916,8 +1900,8 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_F80,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
-  switch (Node->getValueType(0).getSimpleVT()) {
-  default: assert(0 && "Unexpected request for libcall!");
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -1932,8 +1916,8 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I64,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
-  switch (Node->getValueType(0).getSimpleVT()) {
-  default: assert(0 && "Unexpected request for libcall!");
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i16: LC = Call_I16; break;
   case MVT::i32: LC = Call_I32; break;
   case MVT::i64: LC = Call_I64; break;
@@ -1948,7 +1932,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
 /// legal for the target.
 SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                                    SDValue Op0,
-                                                   MVT DestVT,
+                                                   EVT DestVT,
                                                    DebugLoc dl) {
   if (Op0.getValueType() == MVT::i32) {
     // simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -2018,15 +2002,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // as a negative number.  To counteract this, the dynamic code adds an
   // offset depending on the data type.
   uint64_t FF;
-  switch (Op0.getValueType().getSimpleVT()) {
-  default: assert(0 && "Unsupported integer type!");
+  switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
   case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
   }
   if (TLI.isLittleEndian()) FF <<= 32;
-  Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+  Constant *FudgeFactor = ConstantInt::get(
+                                       Type::getInt64Ty(*DAG.getContext()), FF);
 
   SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -2054,17 +2039,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
 /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
 /// operation that takes a larger input.
 SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
-                                                    MVT DestVT,
+                                                    EVT DestVT,
                                                     bool isSigned,
                                                     DebugLoc dl) {
   // First step, figure out the appropriate *INT_TO_FP operation to use.
-  MVT NewInTy = LegalOp.getValueType();
+  EVT NewInTy = LegalOp.getValueType();
 
   unsigned OpToUse = 0;
 
   // Scan for the appropriate larger type to use.
   while (1) {
-    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
+    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
     assert(NewInTy.isInteger() && "Ran out of possibilities!");
 
     // If the target supports SINT_TO_FP of this type, use it.
@@ -2096,17 +2081,17 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
 /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
 /// operation that returns a larger result.
 SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
-                                                    MVT DestVT,
+                                                    EVT DestVT,
                                                     bool isSigned,
                                                     DebugLoc dl) {
   // First step, figure out the appropriate FP_TO*INT operation to use.
-  MVT NewOutTy = DestVT;
+  EVT NewOutTy = DestVT;
 
   unsigned OpToUse = 0;
 
   // Scan for the appropriate larger type to use.
   while (1) {
-    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1);
+    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
     assert(NewOutTy.isInteger() && "Ran out of possibilities!");
 
     if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
@@ -2134,11 +2119,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
 /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
 ///
 SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
-  MVT VT = Op.getValueType();
-  MVT SHVT = TLI.getShiftAmountTy();
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
-  switch (VT.getSimpleVT()) {
-  default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2183,15 +2168,15 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: assert(0 && "Cannot expand this yet!");
+  default: llvm_unreachable("Cannot expand this yet!");
   case ISD::CTPOP: {
     static const uint64_t mask[6] = {
       0x5555555555555555ULL, 0x3333333333333333ULL,
       0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
       0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
     };
-    MVT VT = Op.getValueType();
-    MVT ShVT = TLI.getShiftAmountTy();
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy();
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
@@ -2217,8 +2202,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // return popcount(~x);
     //
     // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
-    MVT VT = Op.getValueType();
-    MVT ShVT = TLI.getShiftAmountTy();
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy();
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
@@ -2233,7 +2218,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // unless the target has ctlz but not ctpop, in which case we use:
     // { return 32 - nlz(~x & (x-1)); }
     // see also http://www.hackersdelight.org/HDcode/ntz.cc
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
     SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
                                DAG.getNOT(dl, Op, VT),
                                DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2272,7 +2257,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
     break;
   case ISD::EH_RETURN:
-  case ISD::DECLARE:
   case ISD::DBG_LABEL:
   case ISD::EH_LABEL:
   case ISD::PREFETCH:
@@ -2291,21 +2275,22 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       Results.push_back(Node->getOperand(i));
     break;
   case ISD::UNDEF: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     if (VT.isInteger())
       Results.push_back(DAG.getConstant(0, VT));
     else if (VT.isFloatingPoint())
       Results.push_back(DAG.getConstantFP(0, VT));
     else
-      assert(0 && "Unknown value type!");
+      llvm_unreachable("Unknown value type!");
     break;
   }
   case ISD::TRAP: {
     // If this operation is not supported, lower it to 'abort()' call
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
-      TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy,
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C, false,
+                      /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
     Results.push_back(CallResult.second);
@@ -2326,7 +2311,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::SIGN_EXTEND_INREG: {
     // NOTE: we could fall back on load/store here too for targets without
     // SAR.  However, it is doubtful that any exist.
-    MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -
                         ExtraVT.getSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
@@ -2343,7 +2328,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // NOTE: there is a choice here between constantly creating new stack
     // slots and always reusing the same one.  We currently always create
     // new ones, as reuse may inhibit scheduling.
-    MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
                             Node->getValueType(0), dl);
     Results.push_back(Tmp1);
@@ -2357,8 +2342,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   case ISD::FP_TO_UINT: {
     SDValue True, False;
-    MVT VT =  Node->getOperand(0).getValueType();
-    MVT NVT = Node->getValueType(0);
+    EVT VT =  Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
     const uint64_t zero[] = {0, 0};
     APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
@@ -2379,14 +2364,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::VAARG: {
     const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = Node->getOperand(1);
     SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
-                                       getTypeAllocSize(VT.getTypeForMVT()),
+                                       getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
@@ -2434,8 +2419,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SmallVector<int, 8> Mask;
     cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
 
-    MVT VT = Node->getValueType(0);
-    MVT EltVT = VT.getVectorElementType();
+    EVT VT = Node->getValueType(0);
+    EVT EltVT = VT.getVectorElementType();
     unsigned NumElems = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0; i != NumElems; ++i) {
@@ -2458,7 +2443,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::EXTRACT_ELEMENT: {
-    MVT OpTy = Node->getOperand(0).getValueType();
+    EVT OpTy = Node->getOperand(0).getValueType();
     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
@@ -2507,7 +2492,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   case ISD::FABS: {
     // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = DAG.getConstantFP(0.0, VT);
     Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
@@ -2622,7 +2607,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::SUB: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
            TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
            "Don't know how to expand this subtraction!");
@@ -2634,7 +2619,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::UREM:
   case ISD::SREM: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     bool isSigned = Node->getOpcode() == ISD::SREM;
     unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
@@ -2662,7 +2647,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::SDIV: {
     bool isSigned = Node->getOpcode() == ISD::SDIV;
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
@@ -2680,7 +2665,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::MULHS: {
     unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
                                                               ISD::SMUL_LOHI;
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
            "If this wasn't legal, it shouldn't have been created!");
@@ -2690,7 +2675,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::MUL: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
     // See if multiply or divide can be lowered using two-result operations.
     // We just need the low half of the multiply; try both the signed
@@ -2729,7 +2714,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                               ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
                               LHS, RHS);
     Results.push_back(Sum);
-    MVT OType = Node->getValueType(1);
+    EVT OType = Node->getValueType(1);
 
     SDValue Zero = DAG.getConstant(0, LHS.getValueType());
 
@@ -2770,7 +2755,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::UMULO:
   case ISD::SMULO: {
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     SDValue LHS = Node->getOperand(0);
     SDValue RHS = Node->getOperand(1);
     SDValue BottomHalf;
@@ -2786,8 +2771,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) {
-      MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2);
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -2800,7 +2785,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       // type in some cases cases.
       // Also, we can fall back to a division in some cases, but that's a big
       // performance hit in the general case.
-      assert(0 && "Don't know how to expand this operation yet!");
+      llvm_unreachable("Don't know how to expand this operation yet!");
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -2816,7 +2801,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::BUILD_PAIR: {
-    MVT PairTy = Node->getValueType(0);
+    EVT PairTy = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
     Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
@@ -2845,14 +2830,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue Table = Node->getOperand(1);
     SDValue Index = Node->getOperand(2);
 
-    MVT PTy = TLI.getPointerTy();
+    EVT PTy = TLI.getPointerTy();
     MachineFunction &MF = DAG.getMachineFunction();
     unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
     Index= DAG.getNode(ISD::MUL, dl, PTy,
                         Index, DAG.getConstant(EntrySize, PTy));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
 
-    MVT MemVT = MVT::getIntegerVT(EntrySize * 8);
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
     SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
                                 PseudoSourceValue::getJumpTable(), 0, MemVT);
     Addr = LD;
@@ -2899,7 +2884,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     // Otherwise, SETCC for the given comparison type must be completely
     // illegal; expand it into a SELECT_CC.
-    MVT VT = Node->getValueType(0);
+    EVT VT = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
                        DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
     Results.push_back(Tmp1);
@@ -2958,12 +2943,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 }
 void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                                        SmallVectorImpl<SDValue> &Results) {
-  MVT OVT = Node->getValueType(0);
+  EVT OVT = Node->getValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
-      Node->getOpcode() == ISD::SINT_TO_FP) {
+      Node->getOpcode() == ISD::SINT_TO_FP ||
+      Node->getOpcode() == ISD::SETCC) {
     OVT = Node->getOperand(0).getValueType();
   }
-  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
@@ -2973,10 +2959,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     // Zero extend the argument.
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     // Perform the larger operation.
-    Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1);
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     if (Node->getOpcode() == ISD::CTTZ) {
       //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
-      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
                           Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
@@ -2987,7 +2973,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                           DAG.getConstant(NVT.getSizeInBits() -
                                           OVT.getSizeInBits(), NVT));
     }
-    Results.push_back(Tmp1);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
     break;
   case ISD::BSWAP: {
     unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
@@ -3012,16 +2998,26 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     break;
   case ISD::AND:
   case ISD::OR:
-  case ISD::XOR:
-    assert(OVT.isVector() && "Don't know how to promote scalar logic ops");
-    // Bit convert each of the values to the new type.
-    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
-    Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+  case ISD::XOR: {
+    unsigned ExtOp, TruncOp;
+    if (OVT.isVector()) {
+      ExtOp   = ISD::BIT_CONVERT;
+      TruncOp = ISD::BIT_CONVERT;
+    } else if (OVT.isInteger()) {
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    } else {
+      llvm_report_error("Cannot promote logic operation");
+    }
+    // Promote each of the values to the new type.
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    // Perform the larger operation, then convert back
     Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
-    // Bit convert the result back the original type.
-    Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1));
+    Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
     break;
-  case ISD::SELECT:
+  }
+  case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
     if (Node->getValueType(0).isVector()) {
       ExtOp   = ISD::BIT_CONVERT;
@@ -3046,6 +3042,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                          DAG.getIntPtrConstant(0));
     Results.push_back(Tmp1);
     break;
+  }
   case ISD::VECTOR_SHUFFLE: {
     SmallVector<int, 8> Mask;
     cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
@@ -3061,31 +3058,14 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     break;
   }
   case ISD::SETCC: {
-    // First step, figure out the appropriate operation to use.
-    // Allow SETCC to not be supported for all legal data types
-    // Mostly this targets FP
-    MVT NewInTy = Node->getOperand(0).getValueType();
-    MVT OldVT = NewInTy; OldVT = OldVT;
-
-    // Scan for the appropriate larger type to use.
-    while (1) {
-      NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
-
-      assert(NewInTy.isInteger() == OldVT.isInteger() &&
-              "Fell off of the edge of the integer world");
-      assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() &&
-              "Fell off of the edge of the floating point world");
-
-      // If the target supports SETCC of this type, use it.
-      if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy))
-        break;
-    }
-    if (NewInTy.isInteger())
-      assert(0 && "Cannot promote Legal Integer SETCC yet");
-    else {
-      Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1);
-      Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2);
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(2))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
     }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
     Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index c3c1beabd5f0..84e39b480396 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -20,10 +20,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 /// GetFPLibCall - Return the right libcall for the given floating point type.
-static RTLIB::Libcall GetFPLibCall(MVT VT,
+static RTLIB::Libcall GetFPLibCall(EVT VT,
                                    RTLIB::Libcall Call_F32,
                                    RTLIB::Libcall Call_F64,
                                    RTLIB::Libcall Call_F80,
@@ -41,18 +43,17 @@ static RTLIB::Libcall GetFPLibCall(MVT VT,
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        errs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "SoftenFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to soften the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to soften the result of this operator!");
 
     case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
     case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
@@ -107,14 +108,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
   // Convert the inputs to integers, and build a new pair out of them.
   return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      BitConvertToInteger(N->getOperand(0)),
                      BitConvertToInteger(N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
   return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
-                         TLI.getTypeToTransformTo(N->getValueType(0)));
+                         TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -125,7 +126,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned Size = NVT.getSizeInBits();
 
   // Mask = ~(1 << (Size-1))
@@ -136,7 +137,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -148,7 +149,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::CEIL_F32,
@@ -163,8 +164,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   SDValue RHS = BitConvertToInteger(N->getOperand(1));
   DebugLoc dl = N->getDebugLoc();
 
-  MVT LVT = LHS.getValueType();
-  MVT RVT = RHS.getValueType();
+  EVT LVT = LHS.getValueType();
+  EVT RVT = RHS.getValueType();
 
   unsigned LSize = LVT.getSizeInBits();
   unsigned RSize = RVT.getSizeInBits();
@@ -199,7 +200,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::COS_F32,
@@ -210,7 +211,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -222,7 +223,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::EXP_F32,
@@ -233,7 +234,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::EXP2_F32,
@@ -244,7 +245,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::FLOOR_F32,
@@ -255,7 +256,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::LOG_F32,
@@ -266,7 +267,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::LOG2_F32,
@@ -277,7 +278,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::LOG10_F32,
@@ -288,7 +289,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -300,7 +301,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::NEARBYINT_F32,
@@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
   SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
                      GetSoftenedFloat(N->getOperand(0)) };
@@ -324,7 +325,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
@@ -332,7 +333,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
@@ -340,7 +341,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -354,7 +355,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
   assert(N->getOperand(1).getValueType() == MVT::i32 &&
          "Unsupported power type!");
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::POWI_F32,
@@ -365,7 +366,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -377,7 +378,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::RINT_F32,
@@ -388,7 +389,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::SIN_F32,
@@ -399,7 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::SQRT_F32,
@@ -410,7 +411,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
@@ -422,7 +423,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return MakeLibCall(GetFPLibCall(N->getValueType(0),
                                   RTLIB::TRUNC_F32,
@@ -434,8 +435,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   LoadSDNode *L = cast<LoadSDNode>(N);
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   SDValue NewL;
@@ -479,19 +480,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
   SDValue Chain = N->getOperand(0); // Get the chain.
   SDValue Ptr = N->getOperand(1); // Get the pointer.
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   SDValue NewVAARG;
   NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
-  
+
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
@@ -500,9 +501,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
-  MVT SVT = N->getOperand(0).getValueType();
-  MVT RVT = N->getValueType(0);
-  MVT NVT = MVT();
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+  EVT NVT = EVT();
   DebugLoc dl = N->getDebugLoc();
 
   // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
@@ -521,7 +522,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                            NVT, N->getOperand(0));
-  return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl);
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
 }
 
 
@@ -530,18 +531,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "SoftenFloatOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to soften this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to soften this operator's operand!");
 
   case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
   case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
@@ -574,7 +574,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
                                            ISD::CondCode &CCCode, DebugLoc dl) {
   SDValue LHSInt = GetSoftenedFloat(NewLHS);
   SDValue RHSInt = GetSoftenedFloat(NewRHS);
-  MVT VT = NewLHS.getValueType();
+  EVT VT = NewLHS.getValueType();
 
   assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
 
@@ -637,7 +637,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
     }
   }
 
-  MVT RetVT = MVT::i32; // FIXME: is this the correct return type?
+  EVT RetVT = MVT::i32; // FIXME: is this the correct return type?
   SDValue Ops[2] = { LHSInt, RHSInt };
   NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
   NewRHS = DAG.getConstant(0, RetVT);
@@ -659,8 +659,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
-  MVT SVT = N->getOperand(0).getValueType();
-  MVT RVT = N->getValueType(0);
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
 
   RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
@@ -688,7 +688,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -696,7 +696,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -767,7 +767,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -778,11 +778,10 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "ExpandFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to expand the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to expand the result of this operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
@@ -830,7 +829,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
 
 void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
                                                  SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   assert(NVT.getSizeInBits() == integerPartWidth &&
          "Do not know how to expand this float constant!");
   APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
@@ -982,7 +981,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
   Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
 }
@@ -1067,7 +1066,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
   SDValue Ptr = LD->getBasePtr();
   DebugLoc dl = N->getDebugLoc();
 
-  MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
@@ -1090,10 +1089,10 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
                                                  SDValue &Hi) {
   assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Src = N->getOperand(0);
-  MVT SrcVT = Src.getValueType();
+  EVT SrcVT = Src.getValueType();
   bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
   DebugLoc dl = N->getDebugLoc();
 
@@ -1135,7 +1134,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
   static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
   const uint64_t *Parts = 0;
 
-  switch (SrcVT.getSimpleVT()) {
+  switch (SrcVT.getSimpleVT().SimpleTy) {
   default:
     assert(false && "Unsupported UINT_TO_FP!");
   case MVT::i32:
@@ -1167,7 +1166,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 /// types of the node are known to be legal, but other operands of the node may
 /// need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
@@ -1178,11 +1177,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
     switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-      cerr << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); cerr << "\n";
+      errs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); errs() << "\n";
   #endif
-      assert(0 && "Do not know how to expand this operator's operand!");
-      abort();
+      llvm_unreachable("Do not know how to expand this operator's operand!");
 
     case ISD::BIT_CONVERT:     Res = ExpandOp_BIT_CONVERT(N); break;
     case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
@@ -1224,7 +1222,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
   GetExpandedFloat(NewLHS, LHSLo, LHSHi);
   GetExpandedFloat(NewRHS, RHSLo, RHSHi);
 
-  MVT VT = NewLHS.getValueType();
+  EVT VT = NewLHS.getValueType();
   assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
 
   // FIXME:  This generated code sucks.  We want to generate
@@ -1276,7 +1274,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
@@ -1297,7 +1295,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
-  MVT RVT = N->getValueType(0);
+  EVT RVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
@@ -1374,7 +1372,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
 
-  MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0c826f67c24a..8ac8063be9ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,6 +20,8 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -31,7 +33,7 @@ using namespace llvm;
 /// may also have invalid operands or may have other results that need
 /// expansion, we just know that (at least) one result needs promotion.
 void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   // See if the target wants to custom expand this node.
@@ -41,11 +43,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "PromoteIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to promote this operator!");
-    abort();
+    llvm_unreachable("Do not know how to promote this operator!");
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
   case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
   case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
@@ -161,10 +162,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
-  MVT NInVT = TLI.getTypeToTransformTo(InVT);
-  MVT OutVT = N->getValueType(0);
-  MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+  EVT InVT = InOp.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
@@ -201,7 +202,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
       std::swap(Lo, Hi);
 
     InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       MVT::getIntegerVT(NOutVT.getSizeInBits()),
+                       EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
   }
@@ -211,24 +212,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
       return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
   }
 
-  // Otherwise, lower the bit-convert to a store/load from the stack.
-  // Create the stack frame object.  Make sure it is aligned for both
-  // the source and destination types.
-  SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
-
-  // Result is an extending load from the stack slot.
-  return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT);
+  return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                     CreateStackStoreLoad(InOp, OutVT));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
-  MVT OVT = N->getValueType(0);
-  MVT NVT = Op.getValueType();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
@@ -240,18 +231,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
   // The pair element type may be legal, or may not promote to the same type as
   // the result, for example i14 = BUILD_PAIR (i7, i7).  Handle all cases.
   return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      JoinIntegers(N->getOperand(0), N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   // FIXME there is no actual debug info here
   DebugLoc dl = N->getDebugLoc();
   // Zero extend things like i1, sign extend everything else.  It shouldn't
   // matter in theory which one we pick, but this tends to give better code?
   unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-  SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT),
+  SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT),
                                SDValue(N, 0));
   assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
   return Result;
@@ -263,7 +254,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
            CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
            CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
           "can only promote integers");
-  MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
                               N->getOperand(1), N->getOperand(2),
                               N->getOperand(3), N->getOperand(4), CvtCode);
@@ -273,8 +264,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   // Zero extend to the promoted type and do the count there.
   SDValue Op = ZExtPromotedInteger(N->getOperand(0));
   DebugLoc dl = N->getDebugLoc();
-  MVT OVT = N->getValueType(0);
-  MVT NVT = Op.getValueType();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
   Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(ISD::SUB, dl, NVT, Op,
@@ -290,8 +281,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
-  MVT OVT = N->getValueType(0);
-  MVT NVT = Op.getValueType();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
   // The count is the same in the promoted type except if the original
   // value was zero.  This can be handled by setting the bit just off
@@ -303,63 +294,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
-  MVT OldVT = N->getValueType(0);
-  SDValue OldVec = N->getOperand(0);
-  if (getTypeAction(OldVec.getValueType()) == WidenVector)
-    OldVec = GetWidenedVector(N->getOperand(0));
-  unsigned OldElts = OldVec.getValueType().getVectorNumElements();
   DebugLoc dl = N->getDebugLoc();
-
-  if (OldElts == 1) {
-    assert(!isTypeLegal(OldVec.getValueType()) &&
-           "Legal one-element vector of a type needing promotion!");
-    // It is tempting to follow GetScalarizedVector by a call to
-    // GetPromotedInteger, but this would be wrong because the
-    // scalarized value may not yet have been processed.
-    return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT),
-                       GetScalarizedVector(OldVec));
-  }
-
-  // Convert to a vector half as long with an element type of twice the width,
-  // for example <4 x i16> -> <2 x i32>.
-  assert(!(OldElts & 1) && "Odd length vectors not supported!");
-  MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits());
-  assert(OldVT.isSimple() && NewVT.isSimple());
-
-  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 MVT::getVectorVT(NewVT, OldElts / 2),
-                                 OldVec);
-
-  // Extract the element at OldIdx / 2 from the new vector.
-  SDValue OldIdx = N->getOperand(1);
-  SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx,
-                               DAG.getConstant(1, TLI.getPointerTy()));
-  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx);
-
-  // Select the appropriate half of the element: Lo if OldIdx was even,
-  // Hi if it was odd.
-  SDValue Lo = Elt;
-  SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt,
-                           DAG.getConstant(OldVT.getSizeInBits(),
-                                           TLI.getPointerTy()));
-  if (TLI.isBigEndian())
-    std::swap(Lo, Hi);
-
-  // Extend to the promoted type.
-  SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx);
-  SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo);
-  return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+                     N->getOperand(1));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned NewOpc = N->getOpcode();
   DebugLoc dl = N->getDebugLoc();
 
   // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
   // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
   // and SINT conversions are Custom, there is no way to tell which is preferable.
-  // We choose SINT because that's the right thing on PPC.)  
+  // We choose SINT because that's the right thing on PPC.)
   if (N->getOpcode() == ISD::FP_TO_UINT &&
       !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
@@ -376,7 +325,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
 
   if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
@@ -403,7 +352,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   ISD::LoadExtType ExtType =
     ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
   DebugLoc dl = N->getDebugLoc();
@@ -421,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
 /// Promote the overflow flag of an overflowing arithmetic node.
 SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   // Simply change the return type of the boolean result.
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1));
-  MVT ValueVTs[] = { N->getValueType(0), NVT };
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+  EVT ValueVTs[] = { N->getValueType(0), NVT };
   SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
   SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
                             DAG.getVTList(ValueVTs, 2), Ops, 2);
@@ -442,8 +391,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
   // sign extension of its truncation to the original type.
   SDValue LHS = SExtPromotedInteger(N->getOperand(0));
   SDValue RHS = SExtPromotedInteger(N->getOperand(1));
-  MVT OVT = N->getOperand(0).getValueType();
-  MVT NVT = LHS.getValueType();
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Do the arithmetic in the larger type.
@@ -487,7 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
-  MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
   assert(isTypeLegal(SVT) && "Illegal SetCC type!");
   DebugLoc dl = N->getDebugLoc();
 
@@ -496,14 +445,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
                               N->getOperand(1), N->getOperand(2));
 
   // Convert to the expected type.
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
   return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
   return DAG.getNode(ISD::SHL, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
 }
 
@@ -532,18 +481,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
   // The input value must be properly zero extended.
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Res = ZExtPromotedInteger(N->getOperand(0));
   return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res;
 
   switch (getTypeAction(N->getOperand(0).getValueType())) {
-  default: assert(0 && "Unknown type action!");
+  default: llvm_unreachable("Unknown type action!");
   case Legal:
   case ExpandInteger:
     Res = N->getOperand(0);
@@ -565,8 +514,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
   // zero extension of its truncation to the original type.
   SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
   SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
-  MVT OVT = N->getOperand(0).getValueType();
-  MVT NVT = LHS.getValueType();
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Do the arithmetic in the larger type.
@@ -594,17 +543,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   SDValue Chain = N->getOperand(0); // Get the chain.
   SDValue Ptr = N->getOperand(1); // Get the pointer.
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  MVT RegVT = TLI.getRegisterType(VT);
-  unsigned NumRegs = TLI.getNumRegisters(VT);
+  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
   // The argument is passed as NumRegs registers of type RegVT.
 
   SmallVector<SDValue, 8> Parts(NumRegs);
@@ -618,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
     std::reverse(Parts.begin(), Parts.end());
 
   // Assemble the parts in the promoted type.
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
   for (unsigned i = 1; i < NumRegs; ++i) {
     SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
@@ -650,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -659,11 +608,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-    cerr << "PromoteIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); errs() << "\n";
   #endif
-    assert(0 && "Do not know how to promote this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to promote this operator's operand!");
 
   case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
   case ISD::BIT_CONVERT:  Res = PromoteIntOp_BIT_CONVERT(N); break;
@@ -719,7 +667,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
   // insert sign extends for ALL conditions, but zero extend is cheaper on
   // many machines (an AND instead of two shifts), so prefer it.
   switch (CCCode) {
-  default: assert(0 && "Unknown integer comparison!");
+  default: llvm_unreachable("Unknown integer comparison!");
   case ISD::SETEQ:
   case ISD::SETNE:
   case ISD::SETUGE:
@@ -770,7 +718,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
   assert(OpNo == 1 && "only know how to promote condition");
 
   // Promote all the way up to the canonical SetCC type.
-  MVT SVT = TLI.getSetCCResultType(MVT::Other);
+  EVT SVT = TLI.getSetCCResultType(MVT::Other);
   SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
 
   // The chain (Op#0) and basic block destination (Op#2) are always legal types.
@@ -780,7 +728,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
   // Since the result type is legal, the operands must promote to it.
-  MVT OVT = N->getOperand(0).getValueType();
+  EVT OVT = N->getOperand(0).getValueType();
   SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
   SDValue Hi = GetPromotedInteger(N->getOperand(1));
   assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
@@ -795,7 +743,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
   // The vector type is legal but the element type is not.  This implies
   // that the vector is a power-of-two in length and that the element
   // type does not have a strange size (eg: it is not i1).
-  MVT VecVT = N->getValueType(0);
+  EVT VecVT = N->getValueType(0);
   unsigned NumElts = VecVT.getVectorNumElements();
   assert(!(NumElts & 1) && "Legal vector of one illegal element?");
 
@@ -871,7 +819,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
   assert(OpNo == 0 && "Only know how to promote condition");
 
   // Promote all the way up to the canonical SetCC type.
-  MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
   SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
 
   return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
@@ -962,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -973,11 +921,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "ExpandIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); errs() << "\n";
 #endif
-    assert(0 && "Do not know how to expand the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to expand the result of this operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
@@ -1043,10 +990,10 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
-  MVT NVT = InL.getValueType();
+  EVT NVT = InL.getValueType();
   unsigned VTBits = N->getValueType(0).getSizeInBits();
   unsigned NVTBits = NVT.getSizeInBits();
-  MVT ShTy = N->getOperand(1).getValueType();
+  EVT ShTy = N->getOperand(1).getValueType();
 
   if (N->getOpcode() == ISD::SHL) {
     if (Amt > VTBits) {
@@ -1060,7 +1007,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       Hi = InL;
     } else if (Amt == 1 &&
                TLI.isOperationLegalOrCustom(ISD::ADDC,
-                                            TLI.getTypeToExpandTo(NVT))) {
+                                            TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
       // Emit this X << 1 as X+X.
       SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
       SDValue LoOps[2] = { InL, InL };
@@ -1130,8 +1077,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
 bool DAGTypeLegalizer::
 ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
-  MVT ShTy = Amt.getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
   unsigned ShBits = ShTy.getSizeInBits();
   unsigned NVTBits = NVT.getSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
@@ -1158,7 +1105,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                       DAG.getConstant(~HighBitMask, ShTy));
 
     switch (N->getOpcode()) {
-    default: assert(0 && "Unknown shift");
+    default: llvm_unreachable("Unknown shift");
     case ISD::SHL:
       Lo = DAG.getConstant(0, NVT);              // Low part is zero.
       Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
@@ -1186,7 +1133,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                                  Amt);
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
-    default: assert(0 && "Unknown shift");
+    default: llvm_unreachable("Unknown shift");
     case ISD::SHL:  Op1 = ISD::SHL; Op2 = ISD::SRL; break;
     case ISD::SRL:
     case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
@@ -1208,8 +1155,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 bool DAGTypeLegalizer::
 ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
-  MVT ShTy = Amt.getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
   unsigned NVTBits = NVT.getSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
          "Expanded integer type size not a power of two!");
@@ -1226,7 +1173,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   SDValue Lo1, Hi1, Lo2, Hi2;
   switch (N->getOpcode()) {
-  default: assert(0 && "Unknown shift");
+  default: llvm_unreachable("Unknown shift");
   case ISD::SHL:
     // ShAmt < NVTBits
     Lo1 = DAG.getConstant(0, NVT);                  // Low part is zero.
@@ -1283,7 +1230,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
   GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
 
-  MVT NVT = LHSL.getValueType();
+  EVT NVT = LHSL.getValueType();
   SDValue LoOps[2] = { LHSL, RHSL };
   SDValue HiOps[3] = { LHSH, RHSH };
 
@@ -1295,7 +1242,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   bool hasCarry =
     TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
                                    ISD::ADDC : ISD::SUBC,
-                                 TLI.getTypeToExpandTo(NVT));
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
 
   if (hasCarry) {
     SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
@@ -1384,7 +1331,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
   if (Op.getValueType().bitsLE(NVT)) {
@@ -1408,14 +1355,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   unsigned NVTBits = NVT.getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
-                     DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
@@ -1428,14 +1375,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   unsigned NVTBits = NVT.getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
-                     DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
@@ -1453,7 +1400,7 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned NBitWidth = NVT.getSizeInBits();
   const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
   Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
@@ -1465,7 +1412,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
+  EVT NVT = Lo.getValueType();
 
   SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
@@ -1484,7 +1431,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
+  EVT NVT = Lo.getValueType();
   Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
                    DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
   Hi = DAG.getConstant(0, NVT);
@@ -1495,7 +1442,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT NVT = Lo.getValueType();
+  EVT NVT = Lo.getValueType();
 
   SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
@@ -1512,7 +1459,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
@@ -1522,7 +1469,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
@@ -1538,8 +1485,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
   assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
 
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   ISD::LoadExtType ExtType = N->getExtensionType();
@@ -1551,10 +1498,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   if (N->getMemoryVT().bitsLE(NVT)) {
-    MVT EVT = N->getMemoryVT();
+    EVT MemVT = N->getMemoryVT();
 
     Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        EVT, isVolatile, Alignment);
+                        MemVT, isVolatile, Alignment);
 
     // Remember the chain.
     Ch = Lo.getValue(1);
@@ -1580,7 +1527,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
-    MVT NEVT = MVT::getIntegerVT(ExcessBits);
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
 
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
@@ -1597,14 +1544,15 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   } else {
     // Big-endian - high bits are at low addresses.  Favor aligned loads at
     // the cost of some bit-fiddling.
-    MVT EVT = N->getMemoryVT();
-    unsigned EBytes = EVT.getStoreSizeInBits()/8;
+    EVT MemVT = N->getMemoryVT();
+    unsigned EBytes = MemVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
 
     // Load both the high bits and maybe some of the low bits.
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits),
+                        EVT::getIntegerVT(*DAG.getContext(),
+                                          MemVT.getSizeInBits() - ExcessBits),
                         isVolatile, Alignment);
 
     // Increment the pointer to the other half.
@@ -1613,7 +1561,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     // Load the rest of the low bits.
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize,
-                        MVT::getIntegerVT(ExcessBits),
+                        EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
                         isVolatile, MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
@@ -1652,8 +1600,8 @@ void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
                                         SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
@@ -1742,7 +1690,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1762,7 +1710,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // If we can emit an efficient shift operation, do so now.  Check to see if
@@ -1788,7 +1736,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
 
   // Next check to see if the target supports this SHL_PARTS operation or if it
   // will custom expand it.
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
   if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
       Action == TargetLowering::Custom) {
@@ -1797,7 +1745,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
 
     SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
-    MVT VT = LHSL.getValueType();
+    EVT VT = LHSL.getValueType();
     Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
     Hi = Lo.getValue(1);
     return;
@@ -1838,7 +1786,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     else if (VT == MVT::i128)
       LC = RTLIB::SRA_I128;
   }
-  
+
   if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
     SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
     SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
@@ -1846,12 +1794,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
   }
 
   if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
-    assert(0 && "Unsupported shift!");
+    llvm_unreachable("Unsupported shift!");
 }
 
 void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
   if (Op.getValueType().bitsLE(NVT)) {
@@ -1874,7 +1822,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
   }
 }
 
@@ -1882,7 +1830,7 @@ void DAGTypeLegalizer::
 ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), Lo, Hi);
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
 
   if (EVT.bitsLE(Lo.getValueType())) {
     // sext_inreg the low part if needed.
@@ -1900,13 +1848,13 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     unsigned ExcessBits =
       EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
   }
 }
 
 void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1926,7 +1874,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
   Hi = DAG.getNode(ISD::SRL, dl,
@@ -1937,7 +1885,7 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1957,7 +1905,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1977,7 +1925,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
 
 void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
   if (Op.getValueType().bitsLE(NVT)) {
@@ -1996,7 +1944,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
     SplitInteger(Res, Lo, Hi);
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
-    Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits));
+    Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
   }
 }
 
@@ -2010,7 +1958,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -2019,11 +1967,10 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
   default:
   #ifndef NDEBUG
-    cerr << "ExpandIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); errs() << "\n";
   #endif
-    assert(0 && "Do not know how to expand this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to expand this operator's operand!");
 
   case ISD::BIT_CONVERT:       Res = ExpandOp_BIT_CONVERT(N); break;
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
@@ -2070,7 +2017,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   GetExpandedInteger(NewLHS, LHSLo, LHSHi);
   GetExpandedInteger(NewRHS, RHSLo, RHSHi);
 
-  MVT VT = NewLHS.getValueType();
+  EVT VT = NewLHS.getValueType();
 
   if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
     if (RHSLo == RHSHi) {
@@ -2105,7 +2052,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   // FIXME: This generated code sucks.
   ISD::CondCode LowCC;
   switch (CCCode) {
-  default: assert(0 && "Unknown integer setcc!");
+  default: llvm_unreachable("Unknown integer setcc!");
   case ISD::SETLT:
   case ISD::SETULT: LowCC = ISD::SETULT; break;
   case ISD::SETGT:
@@ -2122,7 +2069,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
 
   // NOTE: on targets without efficient SELECT of bools, we can always use
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
-  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
   SDValue Tmp1, Tmp2;
   Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
                            LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
@@ -2228,7 +2175,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
-  MVT DstVT = N->getValueType(0);
+  EVT DstVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this SINT_TO_FP!");
@@ -2242,8 +2189,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
   assert(OpNo == 1 && "Can only expand the stored value so far");
 
-  MVT VT = N->getOperand(1).getValueType();
-  MVT NVT = TLI.getTypeToTransformTo(VT);
+  EVT VT = N->getOperand(1).getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   int SVOffset = N->getSrcValueOffset();
@@ -2267,7 +2214,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
-    MVT NEVT = MVT::getIntegerVT(ExcessBits);
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
 
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
@@ -2282,11 +2229,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // the cost of some bit-fiddling.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
-    MVT EVT = N->getMemoryVT();
-    unsigned EBytes = EVT.getStoreSizeInBits()/8;
+    EVT ExtVT = N->getMemoryVT();
+    unsigned EBytes = ExtVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits);
+    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
 
     if (ExcessBits < NVT.getSizeInBits()) {
       // Transfer high bits from the top of Lo to the bottom of Hi.
@@ -2309,7 +2256,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // Store the lowest ExcessBits bits in the second half.
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize,
-                           MVT::getIntegerVT(ExcessBits),
+                           EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
                            isVolatile, MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   }
@@ -2324,8 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
-  MVT SrcVT = Op.getValueType();
-  MVT DstVT = N->getValueType(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
@@ -2360,7 +2307,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
                                    ISD::SETLT);
 
     // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
-    SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)),
+    SDValue FudgePtr = DAG.getConstantPool(
+                               ConstantInt::get(*DAG.getContext(), FF.zext(64)),
                                            TLI.getPointerTy());
 
     // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 3135a445431e..5992f5d534da 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -15,9 +15,11 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -113,43 +115,43 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
 
       if (I->getNodeId() != Processed) {
         if (Mapped != 0) {
-          cerr << "Unprocessed value in a map!";
+          errs() << "Unprocessed value in a map!";
           Failed = true;
         }
       } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
         if (Mapped > 1) {
-          cerr << "Value with legal type was transformed!";
+          errs() << "Value with legal type was transformed!";
           Failed = true;
         }
       } else {
         if (Mapped == 0) {
-          cerr << "Processed value not in any map!";
+          errs() << "Processed value not in any map!";
           Failed = true;
         } else if (Mapped & (Mapped - 1)) {
-          cerr << "Value in multiple maps!";
+          errs() << "Value in multiple maps!";
           Failed = true;
         }
       }
 
       if (Failed) {
         if (Mapped & 1)
-          cerr << " ReplacedValues";
+          errs() << " ReplacedValues";
         if (Mapped & 2)
-          cerr << " PromotedIntegers";
+          errs() << " PromotedIntegers";
         if (Mapped & 4)
-          cerr << " SoftenedFloats";
+          errs() << " SoftenedFloats";
         if (Mapped & 8)
-          cerr << " ScalarizedVectors";
+          errs() << " ScalarizedVectors";
         if (Mapped & 16)
-          cerr << " ExpandedIntegers";
+          errs() << " ExpandedIntegers";
         if (Mapped & 32)
-          cerr << " ExpandedFloats";
+          errs() << " ExpandedFloats";
         if (Mapped & 64)
-          cerr << " SplitVectors";
+          errs() << " SplitVectors";
         if (Mapped & 128)
-          cerr << " WidenedVectors";
-        cerr << "\n";
-        abort();
+          errs() << " WidenedVectors";
+        errs() << "\n";
+        llvm_unreachable(0);
       }
     }
   }
@@ -210,7 +212,7 @@ bool DAGTypeLegalizer::run() {
     // Scan the values produced by the node, checking to see if any result
     // types are illegal.
     for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
-      MVT ResultVT = N->getValueType(i);
+      EVT ResultVT = N->getValueType(i);
       switch (getTypeAction(ResultVT)) {
       default:
         assert(false && "Unknown action!");
@@ -263,7 +265,7 @@ ScanOperands:
       if (IgnoreNodeResults(N->getOperand(i).getNode()))
         continue;
 
-      MVT OpVT = N->getOperand(i).getValueType();
+      EVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
       default:
         assert(false && "Unknown action!");
@@ -336,7 +338,7 @@ ScanOperands:
     }
 
     if (i == NumOperands) {
-      DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n");
+      DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n");
     }
     }
 NodeDone:
@@ -405,7 +407,7 @@ NodeDone:
     if (!IgnoreNodeResults(I))
       for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
         if (!isTypeLegal(I->getValueType(i))) {
-          cerr << "Result type " << i << " illegal!\n";
+          errs() << "Result type " << i << " illegal!\n";
           Failed = true;
         }
 
@@ -413,25 +415,25 @@ NodeDone:
     for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
       if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
           !isTypeLegal(I->getOperand(i).getValueType())) {
-        cerr << "Operand type " << i << " illegal!\n";
+        errs() << "Operand type " << i << " illegal!\n";
         Failed = true;
       }
 
     if (I->getNodeId() != Processed) {
        if (I->getNodeId() == NewNode)
-         cerr << "New node not analyzed?\n";
+         errs() << "New node not analyzed?\n";
        else if (I->getNodeId() == Unanalyzed)
-         cerr << "Unanalyzed node not noticed?\n";
+         errs() << "Unanalyzed node not noticed?\n";
        else if (I->getNodeId() > 0)
-         cerr << "Operand not processed?\n";
+         errs() << "Operand not processed?\n";
        else if (I->getNodeId() == ReadyToProcess)
-         cerr << "Not added to worklist?\n";
+         errs() << "Not added to worklist?\n";
        Failed = true;
     }
 
     if (Failed) {
-      I->dump(&DAG); cerr << "\n";
-      abort();
+      I->dump(&DAG); errs() << "\n";
+      llvm_unreachable(0);
     }
   }
 #endif
@@ -479,8 +481,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
       NewOps.push_back(Op);
     } else if (Op != OrigOp) {
       // This is the first operand to change - add all operands so far.
-      for (unsigned j = 0; j < i; ++j)
-        NewOps.push_back(N->getOperand(j));
+      NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
       NewOps.push_back(Op);
     }
   }
@@ -732,6 +733,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
 }
 
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted integer");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = PromotedIntegers[Op];
@@ -740,6 +743,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for softened float");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = SoftenedFloats[Op];
@@ -748,6 +753,8 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+         "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = ScalarizedVectors[Op];
@@ -767,6 +774,9 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
                                           SDValue Hi) {
+  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded integer");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
@@ -790,6 +800,9 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
                                         SDValue Hi) {
+  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded float");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
@@ -813,6 +826,12 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
                                       SDValue Hi) {
+  assert(Lo.getValueType().getVectorElementType() ==
+         Op.getValueType().getVectorElementType() &&
+         2*Lo.getValueType().getVectorNumElements() ==
+         Op.getValueType().getVectorNumElements() &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for split vector");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
@@ -825,6 +844,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for widened vector");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = WidenedVectors[Op];
@@ -841,7 +862,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
 SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
   unsigned BitWidth = Op.getValueType().getSizeInBits();
   return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
-                     MVT::getIntegerVT(BitWidth), Op);
+                     EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
 }
 
 /// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
@@ -849,14 +870,14 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
 SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
   assert(Op.getValueType().isVector() && "Only applies to vectors!");
   unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
-  MVT EltNVT = MVT::getIntegerVT(EltWidth);
+  EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
   unsigned NumElts = Op.getValueType().getVectorNumElements();
   return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
-                     MVT::getVectorVT(EltNVT, NumElts), Op);
+                     EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
 }
 
 SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
-                                               MVT DestVT) {
+                                               EVT DestVT) {
   DebugLoc dl = Op.getDebugLoc();
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and destination types.
@@ -875,7 +896,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
 /// The last parameter being TRUE means we are dealing with a
 /// node with illegal result types. The second parameter denotes the type of
 /// illegal ResNo in that case.
-bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
   // See if the target wants to custom lower this node.
   if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
     return false;
@@ -900,21 +921,14 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {
 
 /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
 /// which is split into two not necessarily identical pieces.
-void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+  // Currently all types are split in half.
   if (!InVT.isVector()) {
-    LoVT = HiVT = TLI.getTypeToTransformTo(InVT);
+    LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
   } else {
-    MVT NewEltVT = InVT.getVectorElementType();
     unsigned NumElements = InVT.getVectorNumElements();
-    if ((NumElements & (NumElements-1)) == 0) {  // Simple power of two vector.
-      NumElements >>= 1;
-      LoVT = HiVT =  MVT::getVectorVT(NewEltVT, NumElements);
-    } else {                                     // Non-power-of-two vectors.
-      unsigned NewNumElts_Lo = 1 << Log2_32(NumElements);
-      unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo;
-      LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo);
-      HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi);
-    }
+    assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
   }
 }
 
@@ -923,14 +937,14 @@ void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {
 void DAGTypeLegalizer::GetPairElements(SDValue Pair,
                                        SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = Pair.getDebugLoc();
-  MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
   Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
                    DAG.getIntPtrConstant(0));
   Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
                    DAG.getIntPtrConstant(1));
 }
 
-SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT,
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
                                                   SDValue Index) {
   DebugLoc dl = Index.getDebugLoc();
   // Make sure the index type is big enough to compute in.
@@ -952,9 +966,9 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   // Arbitrarily use dlHi for result DebugLoc
   DebugLoc dlHi = Hi.getDebugLoc();
   DebugLoc dlLo = Lo.getDebugLoc();
-  MVT LVT = Lo.getValueType();
-  MVT HVT = Hi.getValueType();
-  MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits());
+  EVT LVT = Lo.getValueType();
+  EVT HVT = Hi.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
 
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
@@ -986,7 +1000,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
 
 /// MakeLibCall - Generate a libcall taking the given operands as arguments and
 /// returning a result of type RetVT.
-SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
                                       const SDValue *Ops, unsigned NumOps,
                                       bool isSigned, DebugLoc dl) {
   TargetLowering::ArgListTy Args;
@@ -995,7 +1009,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0; i != NumOps; ++i) {
     Entry.Node = Ops[i];
-    Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
     Args.push_back(Entry);
@@ -1003,17 +1017,19 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  const Type *RetTy = RetVT.getTypeForMVT();
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, 0, CallingConv::C, false, Callee, Args, DAG, dl);
+                    false, 0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
   return CallInfo.first;
 }
 
 /// PromoteTargetBoolean - Promote the given target boolean to a target boolean
 /// of the given type.  A target boolean is an integer value, not necessarily of
 /// type i1, the bits of which conform to getBooleanContents.
-SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
   DebugLoc dl = Bool.getDebugLoc();
   ISD::NodeType ExtendCode;
   switch (TLI.getBooleanContents()) {
@@ -1039,7 +1055,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {
 /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
 /// bits in Hi.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
-                                    MVT LoVT, MVT HiVT,
+                                    EVT LoVT, EVT HiVT,
                                     SDValue &Lo, SDValue &Hi) {
   DebugLoc dl = Op.getDebugLoc();
   assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
@@ -1054,7 +1070,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
 /// type half the size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 02b073221f6a..859c65668da4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -64,7 +64,7 @@ private:
     SoftenFloat,     // Convert this float type to a same size integer type.
     ExpandFloat,     // Split this float type into two of half the size.
     ScalarizeVector, // Replace this one-element vector with its element type.
-    SplitVector,     // This vector type should be split into smaller vectors.
+    SplitVector,     // Split this vector type into two of half the size.
     WidenVector      // This vector type should be widened into a larger vector.
   };
 
@@ -74,8 +74,8 @@ private:
   TargetLowering::ValueTypeActionImpl ValueTypeActions;
 
   /// getTypeAction - Return how we should legalize values of this type.
-  LegalizeAction getTypeAction(MVT VT) const {
-    switch (ValueTypeActions.getTypeAction(VT)) {
+  LegalizeAction getTypeAction(EVT VT) const {
+    switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
     default:
       assert(false && "Unknown legalize action!");
     case TargetLowering::Legal:
@@ -96,7 +96,7 @@ private:
         if (VT.isInteger())
           return ExpandInteger;
         else if (VT.getSizeInBits() ==
-                 TLI.getTypeToTransformTo(VT).getSizeInBits())
+                 TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
           return SoftenFloat;
         else
           return ExpandFloat;
@@ -109,8 +109,9 @@ private:
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
-  bool isTypeLegal(MVT VT) const {
-    return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+  bool isTypeLegal(EVT VT) const {
+    return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == 
+            TargetLowering::Legal);
   }
 
   /// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -185,19 +186,19 @@ private:
   // Common routines.
   SDValue BitConvertToInteger(SDValue Op);
   SDValue BitConvertVectorToIntegerVector(SDValue Op);
-  SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT);
-  bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult);
-  SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);
+  SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+  bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
-  SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+  SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
                       const SDValue *Ops, unsigned NumOps, bool isSigned,
                       DebugLoc dl);
-  SDValue PromoteTargetBoolean(SDValue Bool, MVT VT);
+  SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
   void ReplaceValueWith(SDValue From, SDValue To);
   void ReplaceValueWithHelper(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
-  void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT,
+  void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
                     SDValue &Lo, SDValue &Hi);
 
   //===--------------------------------------------------------------------===//
@@ -224,7 +225,7 @@ private:
   /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
   /// final size.
   SDValue SExtPromotedInteger(SDValue Op) {
-    MVT OldVT = Op.getValueType();
+    EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
@@ -234,7 +235,7 @@ private:
   /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
   /// final size.
   SDValue ZExtPromotedInteger(SDValue Op) {
-    MVT OldVT = Op.getValueType();
+    EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
     return DAG.getZeroExtendInReg(Op, dl, OldVT);
@@ -506,7 +507,6 @@ private:
   // Vector Result Scalarization: <1 x ty> -> ty.
   void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecRes_BinOp(SDNode *N);
-  SDValue ScalarizeVecRes_ShiftOp(SDNode *N);
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
 
   SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
@@ -518,6 +518,7 @@ private:
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+  SDValue ScalarizeVecRes_SETCC(SDNode *N);
   SDValue ScalarizeVecRes_UNDEF(SDNode *N);
   SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
   SDValue ScalarizeVecRes_VSETCC(SDNode *N);
@@ -533,8 +534,8 @@ private:
   // Vector Splitting Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
 
-  /// GetSplitVector - Given a processed vector Op which was split into smaller
-  /// vectors, this method returns the smaller vectors.  The first elements of
+  /// GetSplitVector - Given a processed vector Op which was split into vectors
+  /// of half the size, this method returns the halves.  The first elements of
   /// Op coincide with the elements of Lo; the remaining elements of Op coincide
   /// with the elements of Hi: Op is what you would get by concatenating Lo and
   /// Hi.  For example, if Op is a v8i32 that was split into two v4i32's, then
@@ -558,10 +559,10 @@ private:
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, 
+  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
                                   SDValue &Hi);
-  void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -641,7 +642,7 @@ private:
                               SDValue BasePtr, const Value *SV,
                               int SVOffset, unsigned Alignment,
                               bool isVolatile, unsigned LdWidth,
-                              MVT ResType, DebugLoc dl);
+                              EVT ResType, DebugLoc dl);
 
   /// Helper genWidenVectorStores - Helper function to generate a set of
   /// stores to store a widen vector into non widen memory
@@ -664,7 +665,7 @@ private:
 
   /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
   /// input vector must have the same element type as NVT.
-  SDValue ModifyToType(SDValue InOp, MVT WidenVT);
+  SDValue ModifyToType(SDValue InOp, EVT WidenVT);
 
 
   //===--------------------------------------------------------------------===//
@@ -686,7 +687,7 @@ private:
 
   /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
   /// which is split (or expanded) into two not necessarily identical pieces.
-  void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT);
+  void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
 
   /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
   /// high parts of the given value.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 6e5adee84c34..0eafe62b8576 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -11,9 +11,11 @@
 // The routines here perform legalization when the details of the type (such as
 // whether it is an integer or a float) do not matter.
 // Expansion is the act of changing a computation in an illegal type to be a
-// computation in two identical registers of a smaller type.
+// computation in two identical registers of a smaller type.  The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
 // Splitting is the act of changing a computation in an illegal type to be a
 // computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,10 +34,10 @@ using namespace llvm;
 
 void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                              SDValue &Hi) {
-  MVT OutVT = N->getValueType(0);
-  MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Handle some special cases efficiently.
@@ -59,16 +61,12 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
       Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
       return;
     case SplitVector:
-      // Convert the split parts of the input if it was split in two.
       GetSplitVector(InOp, Lo, Hi);
-      if (Lo.getValueType() == Hi.getValueType()) {
-        if (TLI.isBigEndian())
-          std::swap(Lo, Hi);
-        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
-        return;
-      }
-      break;
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      return;
     case ScalarizeVector:
       // Convert the element instead.
       SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
@@ -78,7 +76,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
     case WidenVector: {
       assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
       InOp = GetWidenedVector(InOp);
-      MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+      EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                    InVT.getVectorNumElements()/2);
       Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                        DAG.getIntPtrConstant(0));
@@ -95,7 +93,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   if (InVT.isVector() && OutVT.isInteger()) {
     // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
     // is legal but the result is not.
-    MVT NVT = MVT::getVectorVT(NOutVT, 2);
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
 
     if (isTypeLegal(NVT)) {
       SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
@@ -106,7 +104,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      
+
       return;
     }
   }
@@ -117,7 +115,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and expanded destination types.
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT());
+    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
@@ -169,11 +167,11 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Convert to a vector of the expanded element type, for example
   // <3 x i64> -> <6 x i32>.
-  MVT OldVT = N->getValueType(0);
-  MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+  EVT OldVT = N->getValueType(0);
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 MVT::getVectorVT(NewVT, 2*OldElts),
+                                 EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
                                  OldVec);
 
   // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
@@ -200,7 +198,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   DebugLoc dl = N->getDebugLoc();
 
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   int SVOffset = LD->getSrcValueOffset();
@@ -235,7 +233,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
 }
 
 void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
-  MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Chain = N->getOperand(0);
   SDValue Ptr = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
@@ -265,8 +263,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
     // instead, but only if the new vector type is legal (otherwise there
     // is no point, and it might create expansion loops).  For example, on
     // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
-    MVT OVT = N->getOperand(0).getValueType();
-    MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2);
+    EVT OVT = N->getOperand(0).getValueType();
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
 
     if (isTypeLegal(NVT)) {
       SDValue Parts[2];
@@ -286,10 +284,10 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   // The vector type is legal but the element type needs expansion.
-  MVT VecVT = N->getValueType(0);
+  EVT VecVT = N->getValueType(0);
   unsigned NumElts = VecVT.getVectorNumElements();
-  MVT OldVT = N->getOperand(0).getValueType();
-  MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+  EVT OldVT = N->getOperand(0).getValueType();
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
   DebugLoc dl = N->getDebugLoc();
 
   assert(OldVT == VecVT.getVectorElementType() &&
@@ -310,7 +308,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   }
 
   SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 MVT::getVectorVT(NewVT, NewElts.size()),
+                                 EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
                                  &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
@@ -325,20 +323,20 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   // The vector type is legal but the element type needs expansion.
-  MVT VecVT = N->getValueType(0);
+  EVT VecVT = N->getValueType(0);
   unsigned NumElts = VecVT.getVectorNumElements();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue Val = N->getOperand(1);
-  MVT OldEVT = Val.getValueType();
-  MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);
+  EVT OldEVT = Val.getValueType();
+  EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
 
   assert(OldEVT == VecVT.getVectorElementType() &&
          "Inserted element type doesn't match vector element type!");
 
   // Bitconvert to a vector of twice the length with elements of the expanded
   // type, insert the expanded vector elements, and then convert back.
-  MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
                                NewVecVT, N->getOperand(0));
 
@@ -360,7 +358,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
          "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
   unsigned NumElts = VT.getVectorNumElements();
@@ -378,7 +376,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   DebugLoc dl = N->getDebugLoc();
 
   StoreSDNode *St = cast<StoreSDNode>(N);
-  MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
   int SVOffset = St->getSrcValueOffset();
@@ -464,7 +462,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
 }
 
 void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   Lo = DAG.getUNDEF(LoVT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 335c73cd5964..ca194305d989 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -20,8 +20,8 @@
 // type i8 which must be promoted.
 //
 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
-// or operations that happen to take a vector which are custom-lowered like
-// ISD::CALL; the legalization for such operations never produces nodes
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
 // with illegal types, so it's okay to put off legalizing them until
 // SelectionDAG::Legalize runs.
 //
@@ -129,7 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   if (!HasVectorValue)
     return TranslateLegalizeResults(Op, Result);
 
-  MVT QueryType;
+  EVT QueryType;
   switch (Op.getOpcode()) {
   default:
     return TranslateLegalizeResults(Op, Result);
@@ -231,10 +231,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   // Vector "promotion" is basically just bitcasting and doing the operation
   // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
   // v1i64.
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
-  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
   DebugLoc dl = Op.getDebugLoc();
   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 
@@ -260,11 +260,11 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
 }
 
 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   unsigned NumElems = VT.getVectorNumElements();
-  MVT EltVT = VT.getVectorElementType();
+  EVT EltVT = VT.getVectorElementType();
   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
-  MVT TmpEltVT = LHS.getValueType().getVectorElementType();
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
@@ -287,11 +287,11 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
 /// the operation be expanded.  "Unroll" the vector, splitting out the scalars
 /// and operating on each element individually.
 SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
   unsigned NE = VT.getVectorNumElements();
-  MVT EltVT = VT.getVectorElementType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
 
   SmallVector<SDValue, 8> Scalars;
@@ -299,10 +299,10 @@ SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
   for (unsigned i = 0; i != NE; ++i) {
     for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
       SDValue Operand = Op.getOperand(j);
-      MVT OperandVT = Operand.getValueType();
+      EVT OperandVT = Operand.getValueType();
       if (OperandVT.isVector()) {
         // A vector operand; extract a single element.
-        MVT OperandEltVT = OperandVT.getVectorElementType();
+        EVT OperandEltVT = OperandVT.getVectorElementType();
         Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                   OperandEltVT,
                                   Operand,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 68967cc638fd..a03f825a9f04 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -15,14 +15,16 @@
 // eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
 // types.
 // Splitting is the act of changing a computation in an invalid vector type to
-// be a computation in multiple vectors of a smaller type.  For example,
-// implementing <128 x f32> operations in terms of two <64 x f32> operations.
+// be a computation in two vectors of half the size.  For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
 //
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -30,18 +32,19 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Scalarize node result " << ResNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "ScalarizeVectorResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to scalarize the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to scalarize the result of this operator!");
 
   case ISD::BIT_CONVERT:       R = ScalarizeVecRes_BIT_CONVERT(N); break;
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
@@ -53,6 +56,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
   case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
   case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
   case ISD::VSETCC:            R = ScalarizeVecRes_VSETCC(N); break;
@@ -72,9 +76,14 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FCEIL:
   case ISD::FRINT:
   case ISD::FNEARBYINT:
+  case ISD::UINT_TO_FP:
   case ISD::SINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    R = ScalarizeVecRes_UnaryOp(N);
+    break;
 
   case ISD::ADD:
   case ISD::AND:
@@ -91,11 +100,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SUB:
   case ISD::UDIV:
   case ISD::UREM:
-  case ISD::XOR:  R = ScalarizeVecRes_BinOp(N); break;
-
+  case ISD::XOR:
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break;
+  case ISD::SRL:
+    R = ScalarizeVecRes_BinOp(N);
+    break;
   }
 
   // If R is null, the sub-method took care of registering the result.
@@ -110,21 +120,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
                      LHS.getValueType(), LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) {
-  SDValue LHS = GetScalarizedVector(N->getOperand(0));
-  SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
-                     LHS.getValueType(), LHS, ShiftAmt);
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
-  MVT NewVT = N->getValueType(0).getVectorElementType();
+  EVT NewVT = N->getValueType(0).getVectorElementType();
   return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
                      NewVT, N->getOperand(0));
 }
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
-  MVT NewVT = N->getValueType(0).getVectorElementType();
+  EVT NewVT = N->getValueType(0).getVectorElementType();
   SDValue Op0 = GetScalarizedVector(N->getOperand(0));
   return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
                               Op0, DAG.getValueType(NewVT),
@@ -150,7 +153,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   // The value to insert may have a wider type than the vector element type,
   // so be sure to truncate it to the element type if necessary.
   SDValue Op = N->getOperand(1);
-  MVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT EltVT = N->getValueType(0).getVectorElementType();
   if (Op.getValueType() != EltVT)
     // FIXME: Can this happen for floating point types?
     Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
@@ -167,7 +170,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT().getVectorElementType(),
-                               N->isVolatile(), N->getAlignment());
+                               N->isVolatile(), N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -177,7 +180,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
-  MVT DestVT = N->getValueType(0).getVectorElementType();
+  EVT DestVT = N->getValueType(0).getVectorElementType();
   SDValue Op = GetScalarizedVector(N->getOperand(0));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
 }
@@ -185,7 +188,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   // If the operand is wider than the vector element type then it is implicitly
   // truncated.  Make that explicit here.
-  MVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT EltVT = N->getValueType(0).getVectorElementType();
   SDValue InOp = N->getOperand(0);
   if (InOp.getValueType() != EltVT)
     return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
@@ -207,6 +210,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
                      N->getOperand(4));
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
   return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
 }
@@ -223,12 +235,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
 SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(0));
   SDValue RHS = GetScalarizedVector(N->getOperand(1));
-  MVT NVT = N->getValueType(0).getVectorElementType();
-  MVT SVT = TLI.getSetCCResultType(LHS.getValueType());
-  DebugLoc dl = N->getDebugLoc();
+  EVT NVT = N->getValueType(0).getVectorElementType();
+  EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+  DebugLoc DL = N->getDebugLoc();
 
   // Turn it into a scalar SETCC.
-  SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2));
+  SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
 
   // VSETCC always returns a sign-extended value, while SETCC may not.  The
   // SETCC result type may not match the vector element type.  Correct these.
@@ -237,19 +249,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
     // Ensure the SETCC result is sign-extended.
     if (TLI.getBooleanContents() !=
         TargetLowering::ZeroOrNegativeOneBooleanContent)
-      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res,
+      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
                         DAG.getValueType(MVT::i1));
     // Truncate to the final type.
-    return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
-  } else {
-    // The SETCC result type is smaller than the vector element type.
-    // If the SetCC result is not sign-extended, chop it down to MVT::i1.
-    if (TLI.getBooleanContents() !=
-        TargetLowering::ZeroOrNegativeOneBooleanContent)
-      Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res);
-    // Sign extend to the final type.
-    return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res);
+    return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
   }
+
+  // The SETCC result type is smaller than the vector element type.
+  // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+  if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+    Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+  // Sign extend to the final type.
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
 }
 
 
@@ -258,31 +270,32 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Scalarize node operand " << OpNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      cerr << "ScalarizeVectorOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); cerr << "\n";
+      errs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      errs() << "\n";
 #endif
-      assert(0 && "Do not know how to scalarize this operator's operand!");
-      abort();
-
+      llvm_unreachable("Do not know how to scalarize this operator's operand!");
     case ISD::BIT_CONVERT:
-      Res = ScalarizeVecOp_BIT_CONVERT(N); break;
-
+      Res = ScalarizeVecOp_BIT_CONVERT(N);
+      break;
     case ISD::CONCAT_VECTORS:
-      Res = ScalarizeVecOp_CONCAT_VECTORS(N); break;
-
+      Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+      break;
     case ISD::EXTRACT_VECTOR_ELT:
-      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break;
-
+      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+      break;
     case ISD::STORE:
-      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
     }
   }
 
@@ -323,7 +336,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
 /// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
 /// index.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
-  return GetScalarizedVector(N->getOperand(0));
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != N->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+                      Res);
+  return Res;
 }
 
 /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
@@ -343,7 +360,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
                       N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
-                      N->isVolatile(), N->getAlignment());
+                      N->isVolatile(), N->getOriginalAlignment());
 }
 
 
@@ -357,17 +374,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
 /// legalization, we just know that (at least) one result needs vector
 /// splitting.
 void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Split node result: ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Lo, Hi;
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "SplitVectorResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "SplitVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to split the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to split the result of this operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
@@ -382,10 +401,16 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
-  case ISD::LOAD:              SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
+  case ISD::LOAD:
+    SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+    break;
+  case ISD::SETCC:
+  case ISD::VSETCC:
+    SplitVecRes_SETCC(N, Lo, Hi);
+    break;
   case ISD::VECTOR_SHUFFLE:
-      SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break;
-  case ISD::VSETCC:            SplitVecRes_VSETCC(N, Lo, Hi); break;
+    SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+    break;
 
   case ISD::CTTZ:
   case ISD::CTLZ:
@@ -403,8 +428,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    SplitVecRes_UnaryOp(N, Lo, Hi);
+    break;
 
   case ISD::ADD:
   case ISD::SUB:
@@ -424,7 +454,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SRL:
   case ISD::UREM:
   case ISD::SREM:
-  case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break;
+  case ISD::FREM:
+    SplitVecRes_BinOp(N, Lo, Hi);
+    break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -448,12 +480,12 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   // We know the result is a vector.  The input may be either a vector or a
   // scalar value.
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   DebugLoc dl = N->getDebugLoc();
 
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
 
   // Handle some special cases efficiently.
   switch (getTypeAction(InVT)) {
@@ -488,8 +520,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   }
 
   // In the general case, convert the input to an integer and split it by hand.
-  MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits());
-  MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits());
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
   if (TLI.isBigEndian())
     std::swap(LoIntVT, HiIntVT);
 
@@ -503,7 +535,7 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   unsigned LoNumElts = LoVT.getVectorNumElements();
@@ -525,7 +557,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
     return;
   }
 
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
   SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
@@ -537,7 +569,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
                                                   SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
@@ -550,12 +582,11 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
 
   // Split the input.
   SDValue VLo, VHi;
-  MVT InVT = N->getOperand(0).getValueType();
+  EVT InVT = N->getOperand(0).getValueType();
   switch (getTypeAction(InVT)) {
-  default: assert(0 && "Unexpected type action!");
+  default: llvm_unreachable("Unexpected type action!");
   case Legal: {
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                       DAG.getIntPtrConstant(0));
@@ -570,9 +601,8 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
     // If the result needs to be split and the input needs to be widened,
     // the two types must have different lengths. Use the widened result
     // and extract from it to do the split.
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
     SDValue InOp = GetWidenedVector(N->getOperand(0));
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                      DAG.getIntPtrConstant(0));
@@ -595,14 +625,11 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
   SDValue Idx = N->getOperand(1);
-  MVT IdxVT = Idx.getValueType();
+  EVT IdxVT = Idx.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-  // The indices are not guaranteed to be a multiple of the new vector
-  // size unless the original vector type was split in two.
-  assert(LoVT == HiVT && "Non power-of-two vectors not supported!");
 
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
   Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
@@ -639,8 +666,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   }
 
   // Spill the vector to the stack.
-  MVT VecVT = Vec.getValueType();
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
 
@@ -648,7 +675,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT());
+    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
 
   // Load the Lo part from the stack slot.
@@ -666,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
                                                     SDValue &Hi) {
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
   Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
@@ -676,7 +703,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                                         SDValue &Hi) {
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = LD->getDebugLoc();
   GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
 
@@ -686,11 +713,11 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
   const Value *SV = LD->getSrcValue();
   int SVOffset = LD->getSrcValueOffset();
-  MVT MemoryVT = LD->getMemoryVT();
-  unsigned Alignment = LD->getAlignment();
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
 
-  MVT LoMemVT, HiMemVT;
+  EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
@@ -700,7 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   SVOffset += IncrementSize;
-  Alignment = MinAlign(Alignment, IncrementSize);
   Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
                    SV, SVOffset, HiMemVT, isVolatile, Alignment);
 
@@ -714,20 +740,43 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   ReplaceValueWith(SDValue(LD, 1), Ch);
 }
 
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  SDValue LL, LH, RL, RH;
+  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                               LoVT.getVectorNumElements());
+  LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(0));
+  LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(0));
+  RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
 void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   // Get the dest types - they may not match the input types, e.g. int_to_fp.
-  MVT LoVT, HiVT;
+  EVT LoVT, HiVT;
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
   // Split the input.
-  MVT InVT = N->getOperand(0).getValueType();
+  EVT InVT = N->getOperand(0).getValueType();
   switch (getTypeAction(InVT)) {
-  default: assert(0 && "Unexpected type action!");
+  default: llvm_unreachable("Unexpected type action!");
   case Legal: {
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                      DAG.getIntPtrConstant(0));
@@ -742,9 +791,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
     // If the result needs to be split and the input needs to be widened,
     // the two types must have different lengths. Use the widened result
     // and extract from it to do the split.
-    assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
     SDValue InOp = GetWidenedVector(N->getOperand(0));
-    MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                      DAG.getIntPtrConstant(0));
@@ -765,10 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
   DebugLoc dl = N->getDebugLoc();
   GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
   GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
-  MVT NewVT = Inputs[0].getValueType();
+  EVT NewVT = Inputs[0].getValueType();
   unsigned NewElts = NewVT.getVectorNumElements();
-  assert(NewVT == Inputs[1].getValueType() &&
-         "Non power-of-two vectors not supported!");
 
   // If Lo or Hi uses elements from at most two of the four input vectors, then
   // express it as a vector shuffle of those two inputs.  Otherwise extract the
@@ -825,7 +871,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
     }
 
     if (useBuildVector) {
-      MVT EltVT = NewVT.getVectorElementType();
+      EVT EltVT = NewVT.getVectorElementType();
       SmallVector<SDValue, 16> SVOps;
 
       // Extract the input elements by hand.
@@ -868,20 +914,6 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
   }
 }
 
-void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
-                                          SDValue &Hi) {
-  MVT LoVT, HiVT;
-  DebugLoc dl = N->getDebugLoc();
-  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-
-  SDValue LL, LH, RL, RH;
-  GetSplitVector(N->getOperand(0), LL, LH);
-  GetSplitVector(N->getOperand(1), RL, RH);
-
-  Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2));
-  Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2));
-}
-
 
 //===----------------------------------------------------------------------===//
 //  Operand Vector Splitting
@@ -892,24 +924,27 @@ void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need legalization as well as the specified one.
 bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n");
+  DEBUG(errs() << "Split node operand: ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      cerr << "SplitVectorOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); cerr << "\n";
+      errs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      errs() << "\n";
 #endif
-      assert(0 && "Do not know how to split this operator's operand!");
-      abort();
+      llvm_unreachable("Do not know how to split this operator's operand!");
 
     case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break;
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
-    case ISD::STORE:             Res = SplitVecOp_STORE(cast<StoreSDNode>(N),
-                                                        OpNo); break;
+    case ISD::STORE:
+      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
 
     case ISD::CTTZ:
     case ISD::CTLZ:
@@ -917,8 +952,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
     case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:
     case ISD::TRUNCATE:
-    case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break;
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      Res = SplitVecOp_UnaryOp(N);
+      break;
     }
   }
 
@@ -939,15 +979,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   // The result has a legal vector type, but the input needs splitting.
-  MVT ResVT = N->getValueType(0);
+  EVT ResVT = N->getValueType(0);
   SDValue Lo, Hi;
   DebugLoc dl = N->getDebugLoc();
   GetSplitVector(N->getOperand(0), Lo, Hi);
-  assert(Lo.getValueType() == Hi.getValueType() &&
-         "Returns legal non-power-of-two vector type?");
-  MVT InVT = Lo.getValueType();
+  EVT InVT = Lo.getValueType();
 
-  MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
                                InVT.getVectorNumElements());
 
   Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
@@ -975,7 +1013,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
   // We know that the extracted result type is legal.  For now, assume the index
   // is a constant.
-  MVT SubVT = N->getValueType(0);
+  EVT SubVT = N->getValueType(0);
   SDValue Idx = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
   SDValue Lo, Hi;
@@ -997,7 +1035,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue Vec = N->getOperand(0);
   SDValue Idx = N->getOperand(1);
-  MVT VecVT = Vec.getValueType();
+  EVT VecVT = Vec.getValueType();
 
   if (isa<ConstantSDNode>(Idx)) {
     uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
@@ -1010,14 +1048,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 
     if (IdxVal < LoElts)
       return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
-    else
-      return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
-                                    DAG.getConstant(IdxVal - LoElts,
-                                                    Idx.getValueType()));
+    return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+                                  DAG.getConstant(IdxVal - LoElts,
+                                                  Idx.getValueType()));
   }
 
   // Store the vector to the stack.
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT EltVT = VecVT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -1026,7 +1063,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
-  return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        SV, 0, EltVT);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1038,13 +1076,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   int SVOffset = N->getSrcValueOffset();
-  MVT MemoryVT = N->getMemoryVT();
-  unsigned Alignment = N->getAlignment();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
   SDValue Lo, Hi;
   GetSplitVector(N->getOperand(1), Lo, Hi);
 
-  MVT LoMemVT, HiMemVT;
+  EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
@@ -1059,15 +1097,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
+  SVOffset += IncrementSize;
 
   if (isTruncating)
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
-                           N->getSrcValue(), SVOffset+IncrementSize,
-                           HiMemVT,
-                           isVol, MinAlign(Alignment, IncrementSize));
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+                           HiMemVT, isVol, Alignment);
   else
-    Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize,
-                      isVol, MinAlign(Alignment, IncrementSize));
+    Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+                      isVol, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
@@ -1078,18 +1115,19 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Widen node result " << ResNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "WidenVectorResult #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to widen the result of this operator!");
-    abort();
+    llvm_unreachable("Do not know how to widen the result of this operator!");
 
   case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
   case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
@@ -1102,9 +1140,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
-  case ISD::VECTOR_SHUFFLE:    
-      Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break;
-  case ISD::VSETCC:            Res = WidenVecRes_VSETCC(N); break;
+  case ISD::VECTOR_SHUFFLE:
+    Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+    break;
+  case ISD::VSETCC:
+    Res = WidenVecRes_VSETCC(N);
+    break;
 
   case ISD::ADD:
   case ISD::AND:
@@ -1126,21 +1167,27 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UDIV:
   case ISD::UREM:
   case ISD::SUB:
-  case ISD::XOR:               Res = WidenVecRes_Binary(N); break;
+  case ISD::XOR:
+    Res = WidenVecRes_Binary(N);
+    break;
 
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL:               Res = WidenVecRes_Shift(N); break;
+  case ISD::SRL:
+    Res = WidenVecRes_Shift(N);
+    break;
 
-  case ISD::ANY_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
-  case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
-  case ISD::UINT_TO_FP:        Res = WidenVecRes_Convert(N); break;
+  case ISD::ANY_EXTEND:
+    Res = WidenVecRes_Convert(N);
+    break;
 
   case ISD::CTLZ:
   case ISD::CTPOP:
@@ -1149,7 +1196,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FCOS:
   case ISD::FNEG:
   case ISD::FSIN:
-  case ISD::FSQRT:             Res = WidenVecRes_Unary(N); break;
+  case ISD::FSQRT:
+    Res = WidenVecRes_Unary(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -1159,7 +1208,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp1 = GetWidenedVector(N->getOperand(0));
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
@@ -1169,12 +1218,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   DebugLoc dl = N->getDebugLoc();
 
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
-  MVT InVT = InOp.getValueType();
-  MVT InEltVT = InVT.getVectorElementType();
-  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
 
   unsigned Opcode = N->getOpcode();
   unsigned InVTNumElts = InVT.getVectorNumElements();
@@ -1216,7 +1265,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = WidenVT.getVectorElementType();
+  EVT EltVT = WidenVT.getVectorElementType();
   unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
   unsigned i;
   for (i=0; i < MinElts; ++i)
@@ -1232,16 +1281,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   SDValue ShOp = N->getOperand(1);
 
-  MVT ShVT = ShOp.getValueType();
+  EVT ShVT = ShOp.getValueType();
   if (getTypeAction(ShVT) == WidenVector) {
     ShOp = GetWidenedVector(ShOp);
     ShVT = ShOp.getValueType();
   }
-  MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
                                    WidenVT.getVectorNumElements());
   if (ShVT != ShWidenVT)
     ShOp = ModifyToType(ShOp, ShWidenVT);
@@ -1251,16 +1300,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
   // Unary op widening.
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
   SDValue InOp = N->getOperand(0);
-  MVT InVT = InOp.getValueType();
-  MVT VT = N->getValueType(0);
-  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT InVT = InOp.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
@@ -1300,13 +1349,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     // Determine new input vector type.  The new input vector type will use
     // the same element type (if its a vector) or use the input type as a
     // vector.  It is the same size as the type to widen to.
-    MVT NewInVT;
+    EVT NewInVT;
     unsigned NewNumElts = WidenSize / InSize;
     if (InVT.isVector()) {
-      MVT InEltVT = InVT.getVectorElementType();
-      NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits());
+      EVT InEltVT = InVT.getVectorElementType();
+      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
     } else {
-      NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
 
     if (TLI.isTypeLegal(NewInVT)) {
@@ -1332,28 +1381,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     }
   }
 
-  // This should occur rarely. Lower the bit-convert to a store/load
-  // from the stack. Create the stack frame object.  Make sure it is aligned
-  // for both the source and destination types.
-  SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
-
-  // Result is a load from the stack slot.
-  return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0);
+  return CreateStackStoreLoad(InOp, WidenVT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   // Build a vector with undefined for the new nodes.
-  MVT VT = N->getValueType(0);
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   unsigned NumElts = VT.getVectorNumElements();
 
-  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
@@ -1365,8 +1403,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
-  MVT InVT = N->getOperand(0).getValueType();
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
   unsigned NumOperands = N->getNumOperands();
@@ -1387,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     }
   } else {
     InputWidened = true;
-    if (WidenVT == TLI.getTypeToTransformTo(InVT)) {
+    if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
       // The inputs and the result are widen to the same value.
       unsigned i;
       for (i=1; i < NumOperands; ++i)
@@ -1406,7 +1444,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
           MaskOps[i] = i;
           MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
         }
-        return DAG.getVectorShuffle(WidenVT, dl, 
+        return DAG.getVectorShuffle(WidenVT, dl,
                                     GetWidenedVector(N->getOperand(0)),
                                     GetWidenedVector(N->getOperand(1)),
                                     &MaskOps[0]);
@@ -1415,7 +1453,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
   }
 
   // Fall back to use extracts and build vector.
-  MVT EltVT = WidenVT.getVectorElementType();
+  EVT EltVT = WidenVT.getVectorElementType();
   unsigned NumInElts = InVT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   unsigned Idx = 0;
@@ -1439,12 +1477,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   SDValue RndOp = N->getOperand(3);
   SDValue SatOp = N->getOperand(4);
 
-  MVT      WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
-  MVT InVT = InOp.getValueType();
-  MVT InEltVT = InVT.getVectorElementType();
-  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
 
   SDValue DTyOp = DAG.getValueType(WidenVT);
   SDValue STyOp = DAG.getValueType(InWidenVT);
@@ -1491,7 +1529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = WidenVT.getVectorElementType();
+  EVT EltVT = WidenVT.getVectorElementType();
   DTyOp = DAG.getValueType(EltVT);
   STyOp = DAG.getValueType(InEltVT);
 
@@ -1512,8 +1550,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
-  MVT      VT = N->getValueType(0);
-  MVT      WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT      VT = N->getValueType(0);
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
   SDValue  InOp = N->getOperand(0);
   SDValue  Idx  = N->getOperand(1);
@@ -1522,7 +1560,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   if (getTypeAction(InOp.getValueType()) == WidenVector)
     InOp = GetWidenedVector(InOp);
 
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
 
   ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
   if (CIdx) {
@@ -1540,8 +1578,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   // We could try widening the input to the right length but for now, extract
   // the original elements, fill the rest with undefs and build a vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = VT.getVectorElementType();
-  MVT IdxVT = Idx.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Idx.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
   unsigned i;
   if (CIdx) {
@@ -1573,8 +1611,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0));
-  MVT LdVT    = LD->getMemoryVT();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT    = LD->getMemoryVT();
   DebugLoc dl = N->getDebugLoc();
   assert(LdVT.isVector() && WidenVT.isVector());
 
@@ -1593,8 +1631,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
     // For extension loads, we can not play the tricks of chopping legal
     // vector types and bit cast it to the right type.  Instead, we unroll
     // the load and build a vector.
-    MVT EltVT = WidenVT.getVectorElementType();
-    MVT LdEltVT = LdVT.getVectorElementType();
+    EVT EltVT = WidenVT.getVectorElementType();
+    EVT LdEltVT = LdVT.getVectorElementType();
     unsigned NumElts = LdVT.getVectorNumElements();
 
     // Load each element and widen
@@ -1638,26 +1676,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
 
   // Modified the chain - switch anything that used the old chain to use
   // the new one.
-  ReplaceValueWith(SDValue(N, 1), Chain);
+  ReplaceValueWith(SDValue(N, 1), NewChain);
 
   return Result;
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
                      WidenVT, N->getOperand(0));
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   SDValue Cond1 = N->getOperand(0);
-  MVT CondVT = Cond1.getValueType();
+  EVT CondVT = Cond1.getValueType();
   if (CondVT.isVector()) {
-    MVT CondEltVT = CondVT.getVectorElementType();
-    MVT CondWidenVT =  MVT::getVectorVT(CondEltVT, WidenNumElts);
+    EVT CondEltVT = CondVT.getVectorElementType();
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
     if (getTypeAction(CondVT) == WidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
@@ -1681,15 +1719,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
- MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
  return DAG.getUNDEF(WidenVT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  MVT WidenVT = TLI.getTypeToTransformTo(VT);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   unsigned NumElts = VT.getVectorNumElements();
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
@@ -1711,13 +1749,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
-  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   SDValue InOp1 = N->getOperand(0);
-  MVT InVT = InOp1.getValueType();
+  EVT InVT = InOp1.getValueType();
   assert(InVT.isVector() && "can not widen non vector type");
-  MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts);
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -1735,18 +1773,19 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG);
-        cerr << "\n");
+  DEBUG(errs() << "Widen node operand " << ResNo << ": ";
+        N->dump(&DAG);
+        errs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    cerr << "WidenVectorOperand op #" << ResNo << ": ";
-    N->dump(&DAG); cerr << "\n";
+    errs() << "WidenVectorOperand op #" << ResNo << ": ";
+    N->dump(&DAG);
+    errs() << "\n";
 #endif
-    assert(0 && "Do not know how to widen this operator's operand!");
-    abort();
+    llvm_unreachable("Do not know how to widen this operator's operand!");
 
   case ISD::BIT_CONVERT:        Res = WidenVecOp_BIT_CONVERT(N); break;
   case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
@@ -1757,8 +1796,13 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::UINT_TO_FP:         Res = WidenVecOp_Convert(N); break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Res = WidenVecOp_Convert(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -1781,15 +1825,15 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   // Since the result is legal and the input is illegal, it is unlikely
   // that we can fix the input to a legal type so unroll the convert
   // into some scalar code and create a nasty build vector.
-  MVT VT = N->getValueType(0);
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   unsigned NumElts = VT.getVectorNumElements();
   SDValue InOp = N->getOperand(0);
   if (getTypeAction(InOp.getValueType()) == WidenVector)
     InOp = GetWidenedVector(InOp);
-  MVT InVT = InOp.getValueType();
-  MVT InEltVT = InVT.getVectorElementType();
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
 
   unsigned Opcode = N->getOpcode();
   SmallVector<SDValue, 16> Ops(NumElts);
@@ -1802,9 +1846,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   SDValue InOp = GetWidenedVector(N->getOperand(0));
-  MVT InWidenVT = InOp.getValueType();
+  EVT InWidenVT = InOp.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // Check if we can convert between two legal vector types and extract.
@@ -1812,7 +1856,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
   unsigned Size = VT.getSizeInBits();
   if (InWidenSize % Size == 0 && !VT.isVector()) {
     unsigned NewNumElts = InWidenSize / Size;
-    MVT NewVT = MVT::getVectorVT(VT, NewNumElts);
+    EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
     if (TLI.isTypeLegal(NewVT)) {
       SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
@@ -1820,31 +1864,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
     }
   }
 
-  // Lower the bit-convert to a store/load from the stack. Create the stack
-  // frame object.  Make sure it is aligned for both the source and destination
-  // types.
-  SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
-
-  // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0);
+  return CreateStackStoreLoad(InOp, VT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
   // If the input vector is not legal, it is likely that we will not find a
   // legal vector of the same size. Replace the concatenate vector with a
   // nasty build vector.
-  MVT VT = N->getValueType(0);
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   unsigned NumElts = VT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(NumElts);
 
-  MVT InVT = N->getOperand(0).getValueType();
+  EVT InVT = N->getOperand(0).getValueType();
   unsigned NumInElts = InVT.getVectorNumElements();
 
   unsigned Idx = 0;
@@ -1862,9 +1895,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue InOp = GetWidenedVector(N->getOperand(0));
-  MVT EltVT = InOp.getValueType().getVectorElementType();
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
-                     EltVT, InOp, N->getOperand(1));
+                     N->getValueType(0), InOp, N->getOperand(1));
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
@@ -1880,8 +1912,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = N->getDebugLoc();
 
-  MVT StVT = ST->getMemoryVT();
-  MVT ValVT = ValOp.getValueType();
+  EVT StVT = ST->getMemoryVT();
+  EVT ValVT = ValOp.getValueType();
   // It must be true that we the widen vector type is bigger than where
   // we need to store.
   assert(StVT.isVector() && ValOp.getValueType().isVector());
@@ -1892,8 +1924,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
     // For truncating stores, we can not play the tricks of chopping legal
     // vector types and bit cast it to the right type.  Instead, we unroll
     // the store.
-    MVT StEltVT  = StVT.getVectorElementType();
-    MVT ValEltVT = ValVT.getVectorElementType();
+    EVT StEltVT  = StVT.getVectorElementType();
+    EVT ValEltVT = ValVT.getVectorElementType();
     unsigned Increment = ValEltVT.getSizeInBits() / 8;
     unsigned NumElts = StVT.getVectorNumElements();
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
@@ -1938,9 +1970,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
 //  VecVT: Vector value type whose size we must match.
 // Returns NewVecVT and NewEltVT - the vector type and its associated
 // element type.
-static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
-                                  MVT VecVT,
-                                  MVT& NewEltVT, MVT& NewVecVT) {
+static void FindAssocWidenVecType(SelectionDAG& DAG,
+                                  const TargetLowering &TLI, unsigned Width,
+                                  EVT VecVT,
+                                  EVT& NewEltVT, EVT& NewVecVT) {
   unsigned EltWidth = Width + 1;
   if (TLI.isTypeLegal(VecVT)) {
     // We start with the preferred with, making it a power of 2 and find a
@@ -1950,9 +1983,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
     do {
       assert(EltWidth > 0);
       EltWidth = 1 << Log2_32(EltWidth - 1);
-      NewEltVT = MVT::getIntegerVT(EltWidth);
+      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
       unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
-      NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
     } while (!TLI.isTypeLegal(NewVecVT) ||
              VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
   } else {
@@ -1965,9 +1998,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
      do {
       assert(EltWidth > 0);
       EltWidth = 1 << Log2_32(EltWidth - 1);
-      NewEltVT = MVT::getIntegerVT(EltWidth);
+      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
       unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
-      NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
     } while (!TLI.isTypeLegal(NewEltVT) ||
              VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
   }
@@ -1981,7 +2014,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
                                               unsigned     Alignment,
                                               bool         isVolatile,
                                               unsigned     LdWidth,
-                                              MVT          ResType,
+                                              EVT          ResType,
                                               DebugLoc     dl) {
   // The strategy assumes that we can efficiently load powers of two widths.
   // The routines chops the vector into the largest power of 2 load and
@@ -1992,9 +2025,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   //       the load is nonvolatile, we an use a wider load for the value.
 
   // Find the vector type that can load from.
-  MVT NewEltVT, NewVecVT;
+  EVT NewEltVT, NewVecVT;
   unsigned NewEltVTWidth;
-  FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+  FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
   NewEltVTWidth = NewEltVT.getSizeInBits();
 
   SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
@@ -2021,7 +2054,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       // Our current type we are using is too large, use a smaller size by
       // using a smaller power of 2
       unsigned oNewEltVTWidth = NewEltVTWidth;
-      FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+      FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
       NewEltVTWidth = NewEltVT.getSizeInBits();
       // Readjust position and vector position based on new load type
       Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
@@ -2056,10 +2089,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
   // want to store.  This avoids requiring a stack convert.
 
   // Find a width of the element type we can store with
-  MVT WidenVT = ValOp.getValueType();
-  MVT NewEltVT, NewVecVT;
+  EVT WidenVT = ValOp.getValueType();
+  EVT NewEltVT, NewVecVT;
 
-  FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+  FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
   unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
 
   SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
@@ -2088,7 +2121,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       // Our current type we are using is too large, use a smaller size by
       // using a smaller power of 2
       unsigned oNewEltVTWidth = NewEltVTWidth;
-      FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+      FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
       NewEltVTWidth = NewEltVT.getSizeInBits();
       // Readjust position and vector position based on new load type
       Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
@@ -2106,10 +2139,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
 
 /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
 /// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   // Note that InOp might have been widened so it might already have
   // the right width or it might need be narrowed.
-  MVT InVT = InOp.getValueType();
+  EVT InVT = InOp.getValueType();
   assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
          "input and widen element type must match");
   DebugLoc dl = InOp.getDebugLoc();
@@ -2137,7 +2170,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
 
   // Fall back to extract and build.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
-  MVT EltVT = NVT.getVectorElementType();
+  EVT EltVT = NVT.getVectorElementType();
   unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
   unsigned Idx;
   for (Idx = 0; Idx < MinNumElts; ++Idx)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index af73b28fae93..e0f93d85c751 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -24,6 +24,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
@@ -108,14 +110,14 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGFast::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(NULL);
 
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
@@ -132,17 +134,17 @@ void ScheduleDAGFast::Schedule() {
 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
-  --PredSU->NumSuccsLeft;
-  
+
 #ifndef NDEBUG
-  if (PredSU->NumSuccsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (PredSU->NumSuccsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --PredSU->NumSuccsLeft;
+
   // If all the node's successors are scheduled, this node is ready
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -174,7 +176,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -214,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   SUnit *NewSU;
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
-    MVT VT = N->getValueType(i);
+    EVT VT = N->getValueType(i);
     if (VT == MVT::Flag)
       return NULL;
     else if (VT == MVT::Other)
@@ -222,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   }
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
-    MVT VT = Op.getNode()->getValueType(Op.getResNo());
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
     if (VT == MVT::Flag)
       return NULL;
   }
@@ -232,7 +234,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -342,7 +344,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = Clone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -419,7 +421,7 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
 /// definition of the specified node.
 /// FIXME: Move to SelectionDAG?
-static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
   assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
@@ -533,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
         const TargetRegisterClass *RC =
           TRI->getPhysicalRegisterRegClass(Reg, VT);
         const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -549,16 +551,16 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DOUT << "Adding an edge from SU # " << TrySU->NodeNum
-               << " to SU #" << Copies.front()->NodeNum << "\n";
+          DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
                               /*isMustAlias=*/false, /*isArtificial=*/true));
           NewDef = Copies.back();
         }
 
-        DOUT << "Adding an edge from SU # " << NewDef->NodeNum
-             << " to SU #" << TrySU->NodeNum << "\n";
+        DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
                              /*Reg=*/0, /*isNormalMemory=*/false,
@@ -568,8 +570,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
       }
 
       if (!CurSU) {
-        assert(false && "Unable to resolve live physical register dependencies!");
-        abort();
+        llvm_unreachable("Unable to resolve live physical register dependencies!");
       }
     }
 
@@ -587,41 +588,11 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
     ++CurCycle;
   }
 
-  // Reverse the order if it is bottom up.
+  // Reverse the order since it is bottom up.
   std::reverse(Sequence.begin(), Sequence.end());
-  
-  
+
 #ifndef NDEBUG
-  // Verify that all SUnits were scheduled.
-  bool AnyNotSched = false;
-  unsigned DeadNodes = 0;
-  unsigned Noops = 0;
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    if (!SUnits[i].isScheduled) {
-      if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
-        ++DeadNodes;
-        continue;
-      }
-      if (!AnyNotSched)
-        cerr << "*** List scheduling failed! ***\n";
-      SUnits[i].dump(this);
-      cerr << "has not been scheduled!\n";
-      AnyNotSched = true;
-    }
-    if (SUnits[i].NumSuccsLeft != 0) {
-      if (!AnyNotSched)
-        cerr << "*** List scheduling failed! ***\n";
-      SUnits[i].dump(this);
-      cerr << "has successors left!\n";
-      AnyNotSched = true;
-    }
-  }
-  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
-    if (!Sequence[i])
-      ++Noops;
-  assert(!AnyNotSched);
-  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
-         "The number of nodes scheduled doesn't match the expected number!");
+  VerifySchedule(/*isBottomUp=*/true);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index c4325349990d..c8d21584616a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -29,6 +29,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/Statistic.h"
 #include <climits>
@@ -86,10 +88,10 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGList::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
   
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(NULL);
 
   AvailableQueue->initNodes(SUnits);
   
@@ -106,17 +108,17 @@ void ScheduleDAGList::Schedule() {
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
   SUnit *SuccSU = D.getSUnit();
-  --SuccSU->NumPredsLeft;
-  
+
 #ifndef NDEBUG
-  if (SuccSU->NumPredsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (SuccSU->NumPredsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --SuccSU->NumPredsLeft;
+
   SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
   
   // If all the node's predecessors are scheduled, this node is ready
@@ -140,7 +142,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -232,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
     } else if (!HasNoopHazards) {
       // Otherwise, we have a pipeline stall, but no other problem, just advance
       // the current cycle and try again.
-      DOUT << "*** Advancing cycle, no work to do\n";
+      DEBUG(errs() << "*** Advancing cycle, no work to do\n");
       HazardRec->AdvanceCycle();
       ++NumStalls;
       ++CurCycle;
@@ -240,7 +242,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       // Otherwise, we have no instructions to issue and we have instructions
       // that will fault if we don't do this right.  This is the case for
       // processors without pipeline interlocks and other cases.
-      DOUT << "*** Emitting noop\n";
+      DEBUG(errs() << "*** Emitting noop\n");
       HazardRec->EmitNoop();
       Sequence.push_back(0);   // NULL here means noop
       ++NumNoops;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c97e2a8c86bf..cec24e606f99 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -25,10 +25,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
@@ -163,14 +165,14 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
-  DOUT << "********** List Scheduling **********\n";
+  DEBUG(errs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
-  BuildSchedGraph();
+  BuildSchedGraph(NULL);
 
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
@@ -195,17 +197,17 @@ void ScheduleDAGRRList::Schedule() {
 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
-  --PredSU->NumSuccsLeft;
-  
+
 #ifndef NDEBUG
-  if (PredSU->NumSuccsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (PredSU->NumSuccsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --PredSU->NumSuccsLeft;
+
   // If all the node's successors are scheduled, this node is ready
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -237,7 +239,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -276,13 +278,14 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
       AvailableQueue->remove(PredSU);
   }
 
+  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
   ++PredSU->NumSuccsLeft;
 }
 
 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
-  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
+  DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
   DEBUG(SU->dump(this));
 
   AvailableQueue->UnscheduledNode(SU);
@@ -351,7 +354,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   SUnit *NewSU;
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
-    MVT VT = N->getValueType(i);
+    EVT VT = N->getValueType(i);
     if (VT == MVT::Flag)
       return NULL;
     else if (VT == MVT::Other)
@@ -359,7 +362,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   }
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
-    MVT VT = Op.getNode()->getValueType(Op.getResNo());
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
     if (VT == MVT::Flag)
       return NULL;
   }
@@ -369,7 +372,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -488,7 +491,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = CreateClone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -570,7 +573,7 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
 /// definition of the specified node.
 /// FIXME: Move to SelectionDAG?
-static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
   assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
@@ -753,7 +756,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
         const TargetRegisterClass *RC =
           TRI->getPhysicalRegisterRegClass(Reg, VT);
         const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -769,8 +772,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DOUT << "Adding an edge from SU #" << TrySU->NodeNum
-               << " to SU #" << Copies.front()->NodeNum << "\n";
+          DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
                               /*isMustAlias=*/false,
@@ -778,8 +781,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DOUT << "Adding an edge from SU #" << NewDef->NodeNum
-             << " to SU #" << TrySU->NodeNum << "\n";
+        DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
                              /*Reg=*/0, /*isNormalMemory=*/false,
@@ -822,17 +825,17 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
-  --SuccSU->NumPredsLeft;
-  
+
 #ifndef NDEBUG
-  if (SuccSU->NumPredsLeft < 0) {
-    cerr << "*** Scheduling failed! ***\n";
+  if (SuccSU->NumPredsLeft == 0) {
+    errs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    cerr << " has been released too many times!\n";
-    assert(0);
+    errs() << " has been released too many times!\n";
+    llvm_unreachable(0);
   }
 #endif
-  
+  --SuccSU->NumPredsLeft;
+
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
@@ -856,7 +859,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
@@ -1215,7 +1218,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
     if (!SUImpDefs)
       return false;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-      MVT VT = N->getValueType(i);
+      EVT VT = N->getValueType(i);
       if (VT == MVT::Flag || VT == MVT::Other)
         continue;
       if (!N->hasAnyUseOfValue(i))
@@ -1328,9 +1331,9 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 
     // Ok, the transformation is safe and the heuristics suggest it is
     // profitable. Update the graph.
-    DOUT << "Prescheduling SU # " << SU->NodeNum
-         << " next to PredSU # " << PredSU->NodeNum
-         << " to guide scheduling in the presence of multiple uses\n";
+    DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum
+                 << " next to PredSU # " << PredSU->NodeNum
+                 << " to guide scheduling in the presence of multiple uses\n");
     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
       SDep Edge = PredSU->Succs[i];
       assert(!Edge.isAssignedRegDep());
@@ -1418,8 +1421,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
-          DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
-               << " to SU #" << SuccSU->NodeNum << "\n";
+          DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # "
+                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
           scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
                                         /*Reg=*/0, /*isNormalMemory=*/false,
                                         /*isMustAlias=*/false,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 7aa15bcc6862..d53de347a556 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -14,10 +14,12 @@
 
 #define DEBUG_TYPE "pre-RA-sched"
 #include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -152,6 +154,11 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   // Pass 2: add the preds, succs, etc.
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
     SUnit *SU = &SUnits[su];
@@ -175,7 +182,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
       if (N->isMachineOpcode() &&
           TII->get(N->getMachineOpcode()).getImplicitDefs()) {
         SU->hasPhysRegClobbers = true;
-        unsigned NumUsed = CountResults(N);
+        unsigned NumUsed = InstrEmitter::CountResults(N);
         while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
           --NumUsed;    // Skip over unused values at the end.
         if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
@@ -189,7 +196,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         assert(OpSU && "Node has no SUnit!");
         if (OpSU == SU) continue;           // In the same group.
 
-        MVT OpVT = N->getOperand(i).getValueType();
+        EVT OpVT = N->getOperand(i).getValueType();
         assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
         bool isChain = OpVT == MVT::Other;
 
@@ -206,8 +213,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         // dependency. This may change in the future though.
         if (Cost >= 0)
           PhysReg = 0;
-        SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
-                         OpSU->Latency, PhysReg));
+
+        const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+                               OpSU->Latency, PhysReg);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+        }
+
+        SU->addPred(dep);
       }
     }
   }
@@ -217,7 +231,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 /// are input.  This SUnit graph is similar to the SelectionDAG, but
 /// excludes nodes that aren't interesting to scheduling, and represents
 /// flagged together nodes with a single SUnit.
-void ScheduleDAGSDNodes::BuildSchedGraph() {
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
   // Populate the SUnits array.
   BuildSchedUnits();
   // Compute all the scheduling dependencies between nodes.
@@ -230,65 +244,68 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   // Compute the latency for the node.  We use the sum of the latencies for
   // all nodes flagged together into this SUnit.
   SU->Latency = 0;
-  bool SawMachineOpcode = false;
   for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
     if (N->isMachineOpcode()) {
-      SawMachineOpcode = true;
-      SU->Latency +=
-        InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+      SU->Latency += InstrItins.
+        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
     }
 }
 
-/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
-/// not go into the resulting MachineInstr).
-unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) {
-  unsigned N = Node->getNumValues();
-  while (N && Node->getValueType(N - 1) == MVT::Flag)
-    --N;
-  if (N && Node->getValueType(N - 1) == MVT::Other)
-    --N;    // Skip over chain result.
-  return N;
-}
-
-/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by special operands that describe memory references, then an
-/// optional chain operand, then an optional flag operand.  Compute the number
-/// of actual operands that will go into the resulting MachineInstr.
-unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) {
-  unsigned N = ComputeMemOperandsEnd(Node);
-  while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))
-    --N; // Ignore MEMOPERAND nodes
-  return N;
-}
-
-/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode
-/// operand
-unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
-  unsigned N = Node->getNumOperands();
-  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
-    --N;
-  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
-    --N; // Ignore chain if it exists.
-  return N;
-}
-
-
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   if (!SU->getNode()) {
-    cerr << "PHYS REG COPY\n";
+    errs() << "PHYS REG COPY\n";
     return;
   }
 
   SU->getNode()->dump(DAG);
-  cerr << "\n";
+  errs() << "\n";
   SmallVector<SDNode *, 4> FlaggedNodes;
   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
     FlaggedNodes.push_back(N);
   while (!FlaggedNodes.empty()) {
-    cerr << "    ";
+    errs() << "    ";
     FlaggedNodes.back()->dump(DAG);
-    cerr << "\n";
+    errs() << "\n";
     FlaggedNodes.pop_back();
   }
 }
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    // For pre-regalloc scheduling, create instructions corresponding to the
+    // SDNode and any flagged SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
+    SmallVector<SDNode *, 4> FlaggedNodes;
+    for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
+         N = N->getFlaggedNode())
+      FlaggedNodes.push_back(N);
+    while (!FlaggedNodes.empty()) {
+      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+                       VRBaseMap, EM);
+      FlaggedNodes.pop_back();
+    }
+    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+                     VRBaseMap, EM);
+  }
+
+  BB = Emitter.getBlock();
+  InsertPos = Emitter.getInsertPos();
+  return BB;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2a278b749a8c..c9c36f7e42e7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -58,7 +58,6 @@ namespace llvm {
       if (isa<ConstantPoolSDNode>(Node))   return true;
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
-      if (isa<MemOperandSDNode>(Node))     return true;
       if (Node->getOpcode() == ISD::EntryToken) return true;
       return false;
     }
@@ -87,35 +86,14 @@ namespace llvm {
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
-    virtual void BuildSchedGraph();
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
 
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
 
-    /// CountResults - The results of target nodes have register or immediate
-    /// operands first, then an optional chain, and optional flag operands
-    /// (which do not go into the machine instrs.)
-    static unsigned CountResults(SDNode *Node);
-
-    /// CountOperands - The inputs to target nodes have any actual inputs first,
-    /// followed by special operands that describe memory references, then an
-    /// optional chain operand, then flag operands.  Compute the number of
-    /// actual operands that will go into the resulting MachineInstr.
-    static unsigned CountOperands(SDNode *Node);
-
-    /// ComputeMemOperandsEnd - Find the index one past the last
-    /// MemOperandSDNode operand
-    static unsigned ComputeMemOperandsEnd(SDNode *Node);
-
-    /// EmitNode - Generate machine code for an node and needed dependencies.
-    /// VRBaseMap contains, for each already emitted node, the first virtual
-    /// register number for the results of the node.
-    ///
-    void EmitNode(SDNode *Node, bool IsClone, bool HasClone,
-                  DenseMap<SDValue, unsigned> &VRBaseMap);
-    
-    virtual MachineBasicBlock *EmitSchedule();
+    virtual MachineBasicBlock *
+    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
 
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
@@ -129,47 +107,6 @@ namespace llvm {
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
   private:
-    /// EmitSubregNode - Generate machine code for subreg nodes.
-    ///
-    void EmitSubregNode(SDNode *Node, 
-                        DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS
-    /// nodes.
-    ///
-    void EmitCopyToRegClassNode(SDNode *Node,
-                                DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// getVR - Return the virtual register corresponding to the specified result
-    /// of the specified node.
-    unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap);
-  
-    /// getDstOfCopyToRegUse - If the only use of the specified result number of
-    /// node is a CopyToReg, return its destination register. Return 0 otherwise.
-    unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const;
-
-    void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum,
-                    const TargetInstrDesc *II,
-                    DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// AddRegisterOperand - Add the specified register as an operand to the
-    /// specified machine instr. Insert register copies if the register is
-    /// not in the required register class.
-    void AddRegisterOperand(MachineInstr *MI, SDValue Op,
-                            unsigned IIOpNum, const TargetInstrDesc *II,
-                            DenseMap<SDValue, unsigned> &VRBaseMap);
-
-    /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
-    /// implicit physical register output.
-    void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
-                         bool IsCloned, unsigned SrcReg,
-                         DenseMap<SDValue, unsigned> &VRBaseMap);
-    
-    void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
-                                const TargetInstrDesc &II, bool IsClone,
-                                bool IsCloned,
-                                DenseMap<SDValue, unsigned> &VRBaseMap);
-
     /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
     void BuildSchedUnits();
     void AddSchedEdges();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c8f4b520ff18..542bf647eb0f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Constants.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
@@ -31,6 +32,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,14 +48,14 @@ using namespace llvm;
 
 /// makeVTList - Return an instance of the SDVTList struct initialized with the
 /// specified members.
-static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) {
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
   SDVTList Res = {VTs, NumVTs};
   return Res;
 }
 
-static const fltSemantics *MVTToAPFloatSemantics(MVT VT) {
-  switch (VT.getSimpleVT()) {
-  default: assert(0 && "Unknown FP format");
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
   case MVT::f32:     return &APFloat::IEEEsingle;
   case MVT::f64:     return &APFloat::IEEEdouble;
   case MVT::f80:     return &APFloat::x87DoubleExtended;
@@ -76,7 +78,7 @@ bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
   return getValueAPF().bitwiseIsEqual(V);
 }
 
-bool ConstantFPSDNode::isValueValidForType(MVT VT,
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
                                            const APFloat& Val) {
   assert(VT.isFloatingPoint() && "Can only convert between FP types");
 
@@ -88,7 +90,7 @@ bool ConstantFPSDNode::isValueValidForType(MVT VT,
   // convert modifies in place, so make a copy.
   APFloat Val2 = APFloat(Val);
   bool losesInfo;
-  (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+  (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
                       &losesInfo);
   return !losesInfo;
 }
@@ -243,7 +245,7 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
 /// if the operation does not depend on the sign of the input (setne and seteq).
 static int isSignedOp(ISD::CondCode Opcode) {
   switch (Opcode) {
-  default: assert(0 && "Illegal integer setcc operation!");
+  default: llvm_unreachable("Illegal integer setcc operation!");
   case ISD::SETEQ:
   case ISD::SETNE: return 0;
   case ISD::SETLT:
@@ -363,11 +365,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   switch (N->getOpcode()) {
   case ISD::TargetExternalSymbol:
   case ISD::ExternalSymbol:
-    assert(0 && "Should only be used on nodes with operands");
+    llvm_unreachable("Should only be used on nodes with operands");
   default: break;  // Normal nodes don't need extra info.
-  case ISD::ARG_FLAGS:
-    ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());
-    break;
   case ISD::TargetConstant:
   case ISD::Constant:
     ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
@@ -403,11 +402,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
-  case ISD::MEMOPERAND: {
-    const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO;
-    MO.Profile(ID);
-    break;
-  }
   case ISD::FrameIndex:
   case ISD::TargetFrameIndex:
     ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
@@ -429,12 +423,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddInteger(CP->getTargetFlags());
     break;
   }
-  case ISD::CALL: {
-    const CallSDNode *Call = cast<CallSDNode>(N);
-    ID.AddInteger(Call->getCallingConv());
-    ID.AddInteger(Call->isVarArg());
-    break;
-  }
   case ISD::LOAD: {
     const LoadSDNode *LD = cast<LoadSDNode>(N);
     ID.AddInteger(LD->getMemoryVT().getRawBits());
@@ -466,7 +454,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   case ISD::VECTOR_SHUFFLE: {
     const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
-    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); 
+    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
          i != e; ++i)
       ID.AddInteger(SVN->getMaskElt(i));
     break;
@@ -488,20 +476,18 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
 }
 
 /// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries alignment, volatility, indexing mode, and
+/// the CSE map that carries volatility, indexing mode, and
 /// extension/truncation information.
 ///
 static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM,
-                     bool isVolatile, unsigned Alignment) {
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
   assert((ConvType & 3) == ConvType &&
          "ConvType may not require more than 2 bits!");
   assert((AM & 7) == AM &&
          "AM may not require more than 3 bits!");
   return ConvType |
          (AM << 2) |
-         (isVolatile << 5) |
-         ((Log2_32(Alignment) + 1) << 6);
+         (isVolatile << 5);
 }
 
 //===----------------------------------------------------------------------===//
@@ -519,7 +505,6 @@ static bool doNotCSE(SDNode *N) {
   case ISD::DBG_LABEL:
   case ISD::DBG_STOPPOINT:
   case ISD::EH_LABEL:
-  case ISD::DECLARE:
     return true;   // Never CSE these nodes.
   }
 
@@ -626,7 +611,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   bool Erased = false;
   switch (N->getOpcode()) {
   case ISD::EntryToken:
-    assert(0 && "EntryToken should not be in CSEMaps!");
+    llvm_unreachable("EntryToken should not be in CSEMaps!");
     return false;
   case ISD::HANDLENODE: return false;  // noop.
   case ISD::CONDCODE:
@@ -646,12 +631,12 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
     break;
   }
   case ISD::VALUETYPE: {
-    MVT VT = cast<VTSDNode>(N)->getVT();
+    EVT VT = cast<VTSDNode>(N)->getVT();
     if (VT.isExtended()) {
       Erased = ExtendedValueTypeNodes.erase(VT);
     } else {
-      Erased = ValueTypeNodes[VT.getSimpleVT()] != 0;
-      ValueTypeNodes[VT.getSimpleVT()] = 0;
+      Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+      ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
     }
     break;
   }
@@ -667,8 +652,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
       !N->isMachineOpcode() && !doNotCSE(N)) {
     N->dump(this);
-    cerr << "\n";
-    assert(0 && "Node is not in map!");
+    errs() << "\n";
+    llvm_unreachable("Node is not in map!");
   }
 #endif
   return Erased;
@@ -762,7 +747,7 @@ void SelectionDAG::VerifyNode(SDNode *N) {
   default:
     break;
   case ISD::BUILD_PAIR: {
-    MVT VT = N->getValueType(0);
+    EVT VT = N->getValueType(0);
     assert(N->getNumValues() == 1 && "Too many results!");
     assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
            "Wrong return type!");
@@ -780,7 +765,7 @@ void SelectionDAG::VerifyNode(SDNode *N) {
     assert(N->getValueType(0).isVector() && "Wrong return type!");
     assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
            "Wrong number of operands!");
-    MVT EltVT = N->getValueType(0).getVectorElementType();
+    EVT EltVT = N->getValueType(0).getVectorElementType();
     for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
       assert((I->getValueType() == EltVT ||
              (EltVT.isInteger() && I->getValueType().isInteger() &&
@@ -791,13 +776,13 @@ void SelectionDAG::VerifyNode(SDNode *N) {
   }
 }
 
-/// getMVTAlignment - Compute the default alignment value for the
+/// getEVTAlignment - Compute the default alignment value for the
 /// given type.
 ///
-unsigned SelectionDAG::getMVTAlignment(MVT VT) const {
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
   const Type *Ty = VT == MVT::iPTR ?
-                   PointerType::get(Type::Int8Ty, 0) :
-                   VT.getTypeForMVT();
+                   PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+                   VT.getTypeForEVT(*getContext());
 
   return TLI.getTargetData()->getABITypeAlignment(Ty);
 }
@@ -815,6 +800,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
   MF = &mf;
   MMI = mmi;
   DW = dw;
+  Context = &mf.getFunction()->getContext();
 }
 
 SelectionDAG::~SelectionDAG() {
@@ -846,7 +832,19 @@ void SelectionDAG::clear() {
   Root = getEntryNode();
 }
 
-SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
   if (Op.getValueType() == VT) return Op;
   APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),
                                    VT.getSizeInBits());
@@ -856,29 +854,29 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {
 
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 ///
-SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) {
-  MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   SDValue NegOne =
     getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
-SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) {
-  MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
   return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
 }
 
-SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) {
-  return getConstant(*ConstantInt::get(Val), VT, isT);
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+  return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
 }
 
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
-  MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
          "APInt size does not match type size!");
 
@@ -913,14 +911,14 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
 }
 
 
-SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) {
-  return getConstantFP(*ConstantFP::get(V), VT, isTarget);
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
 }
 
-SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
 
-  MVT EltVT =
+  EVT EltVT =
     VT.isVector() ? VT.getVectorElementType() : VT;
 
   // Do the map lookup using the actual bit pattern for the floating point
@@ -953,8 +951,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){
   return Result;
 }
 
-SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
-  MVT EltVT =
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+  EVT EltVT =
     VT.isVector() ? VT.getVectorElementType() : VT;
   if (EltVT==MVT::f32)
     return getConstantFP(APFloat((float)Val), VT, isTarget);
@@ -963,14 +961,15 @@ SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
 }
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
-                                       MVT VT, int64_t Offset,
+                                       EVT VT, int64_t Offset,
                                        bool isTargetGA,
                                        unsigned char TargetFlags) {
   assert((TargetFlags == 0 || isTargetGA) &&
          "Cannot set target flags on target-independent globals");
-  
+
   // Truncate (with sign-extension) the offset value to the pointer size.
-  unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+  EVT PTy = TLI.getPointerTy();
+  unsigned BitWidth = PTy.getSizeInBits();
   if (BitWidth < 64)
     Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
 
@@ -1002,7 +1001,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1017,7 +1016,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget,
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
                                    unsigned char TargetFlags) {
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent jump tables");
@@ -1036,9 +1035,9 @@ SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
+SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
                                       unsigned Alignment, int Offset,
-                                      bool isTarget, 
+                                      bool isTarget,
                                       unsigned char TargetFlags) {
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
@@ -1062,7 +1061,7 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
 }
 
 
-SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
                                       unsigned Alignment, int Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
@@ -1101,26 +1100,13 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) {
-  FoldingSetNodeID ID;
-  AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0);
-  ID.AddInteger(Flags.getRawBits());
-  void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-    return SDValue(E, 0);
-  SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>();
-  new (N) ARG_FLAGSSDNode(Flags);
-  CSEMap.InsertNode(N, IP);
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
-SDValue SelectionDAG::getValueType(MVT VT) {
-  if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size())
-    ValueTypeNodes.resize(VT.getSimpleVT()+1);
+SDValue SelectionDAG::getValueType(EVT VT) {
+  if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+      ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
 
   SDNode *&N = VT.isExtended() ?
-    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()];
+    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
 
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<VTSDNode>();
@@ -1129,7 +1115,7 @@ SDValue SelectionDAG::getValueType(MVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
@@ -1138,7 +1124,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT,
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
                                               unsigned char TargetFlags) {
   SDNode *&N =
     TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
@@ -1177,19 +1163,19 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
   }
 }
 
-SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, 
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
                                        SDValue N2, const int *Mask) {
   assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
-  assert(VT.isVector() && N1.getValueType().isVector() && 
+  assert(VT.isVector() && N1.getValueType().isVector() &&
          "Vector Shuffle VTs must be a vectors");
   assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
          && "Vector Shuffle VTs must have same element type");
 
   // Canonicalize shuffle undef, undef -> undef
   if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
-    return N1;
+    return getUNDEF(VT);
 
-  // Validate that all indices in Mask are within the range of the elements 
+  // Validate that all indices in Mask are within the range of the elements
   // input to the shuffle.
   unsigned NElts = VT.getVectorNumElements();
   SmallVector<int, 8> MaskVec;
@@ -1197,18 +1183,18 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
     assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
     MaskVec.push_back(Mask[i]);
   }
-  
+
   // Canonicalize shuffle v, v -> v, undef
   if (N1 == N2) {
     N2 = getUNDEF(VT);
     for (unsigned i = 0; i != NElts; ++i)
       if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
   }
-  
+
   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   if (N1.getOpcode() == ISD::UNDEF)
     commuteShuffle(N1, N2, MaskVec);
-  
+
   // Canonicalize all index into lhs, -> shuffle lhs, undef
   // Canonicalize all index into rhs, -> shuffle rhs, undef
   bool AllLHS = true, AllRHS = true;
@@ -1231,7 +1217,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
     N1 = getUNDEF(VT);
     commuteShuffle(N1, N2, MaskVec);
   }
-  
+
   // If Identity shuffle, or all shuffle in to undef, return that node.
   bool AllUndef = true;
   bool Identity = true;
@@ -1239,7 +1225,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
     if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
     if (MaskVec[i] >= 0) AllUndef = false;
   }
-  if (Identity)
+  if (Identity && NElts == N1.getValueType().getVectorNumElements())
     return N1;
   if (AllUndef)
     return getUNDEF(VT);
@@ -1249,17 +1235,17 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
   AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
   for (unsigned i = 0; i != NElts; ++i)
     ID.AddInteger(MaskVec[i]);
-  
+
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-  
+
   // Allocate the mask array for the node out of the BumpPtrAllocator, since
   // SDNode doesn't have access to it.  This memory will be "leaked" when
   // the node is deallocated, but recovered when the NodeAllocator is released.
   int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
   memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
-  
+
   ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
   new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
   CSEMap.InsertNode(N, IP);
@@ -1267,7 +1253,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
                                        SDValue Val, SDValue DTy,
                                        SDValue STy, SDValue Rnd, SDValue Sat,
                                        ISD::CvtCode Code) {
@@ -1289,7 +1275,7 @@ SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
   ID.AddInteger(RegNo);
@@ -1305,7 +1291,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {
 
 SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,
                                       unsigned Line, unsigned Col,
-                                      Value *CU) {
+                                      MDNode *CU) {
   SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();
   new (N) DbgStopPointSDNode(Root, Line, Col, CU);
   N->setDebugLoc(DL);
@@ -1349,32 +1335,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) {
-#ifndef NDEBUG
-  const Value *v = MO.getValue();
-  assert((!v || isa<PointerType>(v->getType())) &&
-         "SrcValue is not a pointer?");
-#endif
-
-  FoldingSetNodeID ID;
-  AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0);
-  MO.Profile(ID);
-
-  void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-    return SDValue(E, 0);
-
-  SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>();
-  new (N) MemOperandSDNode(MO);
-  CSEMap.InsertNode(N, IP);
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
 /// getShiftAmountOperand - Return the specified value casted to
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
-  MVT OpTy = Op.getValueType();
+  EVT OpTy = Op.getValueType();
   MVT ShTy = TLI.getShiftAmountTy();
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
@@ -1384,10 +1348,10 @@ SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
 
 /// CreateStackTemporary - Create a stack temporary, suitable for holding the
 /// specified value type.
-SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
-  unsigned ByteSize = VT.getStoreSizeInBits()/8;
-  const Type *Ty = VT.getTypeForMVT();
+  unsigned ByteSize = VT.getStoreSize();
+  const Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
   std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
 
@@ -1397,11 +1361,11 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {
 
 /// CreateStackTemporary - Create a stack temporary suitable for holding
 /// either of the specified value types.
-SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
   unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
                             VT2.getStoreSizeInBits())/8;
-  const Type *Ty1 = VT1.getTypeForMVT();
-  const Type *Ty2 = VT2.getTypeForMVT();
+  const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+  const Type *Ty2 = VT2.getTypeForEVT(*getContext());
   const TargetData *TD = TLI.getTargetData();
   unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
                             TD->getPrefTypeAlignment(Ty2));
@@ -1411,7 +1375,7 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {
   return getFrameIndex(FrameIdx, TLI.getPointerTy());
 }
 
-SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
                                 SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
   // These setcc operations always fold.
   switch (Cond) {
@@ -1441,7 +1405,7 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
       const APInt &C1 = N1C->getAPIntValue();
 
       switch (Cond) {
-      default: assert(0 && "Unknown integer setcc!");
+      default: llvm_unreachable("Unknown integer setcc!");
       case ISD::SETEQ:  return getConstant(C1 == C2, VT);
       case ISD::SETNE:  return getConstant(C1 != C2, VT);
       case ISD::SETULT: return getConstant(C1.ult(C2), VT);
@@ -1516,6 +1480,10 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
 /// use this predicate to simplify operations downstream.
 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+  // This predicate is not safe for vector operations.
+  if (Op.getValueType().isVector())
+    return false;
+
   unsigned BitWidth = Op.getValueSizeInBits();
   return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
 }
@@ -1743,7 +1711,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     }
     return;
   case ISD::SIGN_EXTEND_INREG: {
-    MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     unsigned EBits = EVT.getSizeInBits();
 
     // Sign extension.  Compute the demanded bits in the result that are not
@@ -1788,14 +1756,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::LOAD: {
     if (ISD::isZEXTLoad(Op.getNode())) {
       LoadSDNode *LD = cast<LoadSDNode>(Op);
-      MVT VT = LD->getMemoryVT();
+      EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
     }
     return;
   }
   case ISD::ZERO_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
     APInt InMask    = Mask;
@@ -1809,7 +1777,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::SIGN_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
@@ -1850,7 +1818,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::ANY_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InMask = Mask;
     InMask.trunc(InBits);
@@ -1862,7 +1830,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::TRUNCATE: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InMask = Mask;
     InMask.zext(InBits);
@@ -1875,7 +1843,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     break;
   }
   case ISD::AssertZext: {
-    MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
     ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
                       KnownOne, Depth+1);
@@ -1981,7 +1949,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
-      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+                                         Depth);
     }
     return;
   }
@@ -1993,7 +1962,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 /// information.  For example, immediately after an "SRA X, 2", we know that
 /// the top 3 bits are all equal to each other, so we return 3.
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(VT.isInteger() && "Invalid VT!");
   unsigned VTBits = VT.getSizeInBits();
   unsigned Tmp, Tmp2;
@@ -2212,6 +2181,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
 }
 
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+  // If we're told that NaNs won't happen, assume they won't.
+  if (FiniteOnlyFPMath())
+    return true;
+
+  // If the value is a constant, we can obviously see if it is a NaN or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->getValueAPF().isNaN();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
 
 bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
@@ -2228,7 +2210,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
 /// element of the result of the vector shuffle.
 SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
                                           unsigned i) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
   if (N->getMaskElt(i) < 0)
     return getUNDEF(VT.getVectorElementType());
@@ -2239,7 +2221,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
 
   if (V.getOpcode() == ISD::BIT_CONVERT) {
     V = V.getOperand(0);
-    MVT VVT = V.getValueType();
+    EVT VVT = V.getValueType();
     if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
       return SDValue();
   }
@@ -2256,7 +2238,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
 
 /// getNode - Gets or creates the specified node.
 ///
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
   void *IP = 0;
@@ -2274,7 +2256,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              MVT VT, SDValue Operand) {
+                              EVT VT, SDValue Operand) {
   // Constant fold unary operations with an integer constant operand.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
     const APInt &Val = C->getAPIntValue();
@@ -2332,7 +2314,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         bool ignored;
         // This can return overflow, underflow, or inexact; we don't care.
         // FIXME need to be more flexible about rounding mode.
-        (void)V.convert(*MVTToAPFloatSemantics(VT),
+        (void)V.convert(*EVTToAPFloatSemantics(VT),
                         APFloat::rmNearestTiesToEven, &ignored);
         return getConstantFP(V, VT);
       }
@@ -2366,7 +2348,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   case ISD::MERGE_VALUES:
   case ISD::CONCAT_VECTORS:
     return Operand;         // Factor, merge or concat of one node?  No need.
-  case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node");
+  case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
   case ISD::FP_EXTEND:
     assert(VT.isFloatingPoint() &&
            Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
@@ -2487,7 +2469,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
 }
 
 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
-                                             MVT VT,
+                                             EVT VT,
                                              ConstantSDNode *Cst1,
                                              ConstantSDNode *Cst2) {
   const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
@@ -2522,7 +2504,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
   return SDValue();
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -2624,7 +2606,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
       return N1;
     break;
   case ISD::FP_ROUND_INREG: {
-    MVT EVT = cast<VTSDNode>(N2)->getVT();
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg round!");
     assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
            "Cannot FP_ROUND_INREG integer types");
@@ -2641,7 +2623,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     break;
   case ISD::AssertSext:
   case ISD::AssertZext: {
-    MVT EVT = cast<VTSDNode>(N2)->getVT();
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
@@ -2650,7 +2632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     break;
   }
   case ISD::SIGN_EXTEND_INREG: {
-    MVT EVT = cast<VTSDNode>(N2)->getVT();
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
@@ -2688,13 +2670,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     // expanding large vector constants.
     if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
       SDValue Elt = N1.getOperand(N2C->getZExtValue());
-      if (Elt.getValueType() != VT) {
+      EVT VEltTy = N1.getValueType().getVectorElementType();
+      if (Elt.getValueType() != VEltTy) {
         // If the vector element type is not legal, the BUILD_VECTOR operands
         // are promoted and implicitly truncated.  Make that explicit here.
-        assert(VT.isInteger() && Elt.getValueType().isInteger() &&
-               VT.bitsLE(Elt.getValueType()) &&
-               "Bad type for BUILD_VECTOR operand");
-        Elt = getNode(ISD::TRUNCATE, DL, VT, Elt);
+        Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+      }
+      if (VT != VEltTy) {
+        // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+        // result is implicitly extended.
+        Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
       }
       return Elt;
     }
@@ -2895,7 +2880,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3) {
   // Perform various simplifications.
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
@@ -2938,7 +2923,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     }
     break;
   case ISD::VECTOR_SHUFFLE:
-    assert(0 && "should use getVectorShuffle constructor!");
+    llvm_unreachable("should use getVectorShuffle constructor!");
     break;
   case ISD::BIT_CONVERT:
     // Fold bit_convert nodes from a type to themselves.
@@ -2971,23 +2956,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3,
                               SDValue N4) {
   SDValue Ops[] = { N1, N2, N3, N4 };
   return getNode(Opcode, DL, VT, Ops, 4);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3,
                               SDValue N4, SDValue N5) {
   SDValue Ops[] = { N1, N2, N3, N4, N5 };
   return getNode(Opcode, DL, VT, Ops, 5);
 }
 
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+  SmallVector<SDValue, 8> ArgChains;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument.
+  for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+       UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0)
+          ArgChains.push_back(SDValue(L, 1));
+
+  // Build a tokenfactor for all the chains.
+  return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                 &ArgChains[0], ArgChains.size());
+}
+
 /// getMemsetValue - Vectorized representation of the memset value
 /// operand.
-static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               DebugLoc dl) {
   unsigned NumBits = VT.isVector() ?
     VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
@@ -3021,9 +3029,9 @@ static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,
 /// getMemsetStringVal - Similar to getMemsetValue. Except this is only
 /// used when a memcpy is turned into a memset when the source is a constant
 /// string ptr.
-static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
-                                    const TargetLowering &TLI,
-                                    std::string &Str, unsigned Offset) {
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  std::string &Str, unsigned Offset) {
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
@@ -3031,7 +3039,8 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
     unsigned NumElts = VT.getVectorNumElements();
     MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                       DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts)));
+                       DAG.getConstant(0,
+                       EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
@@ -3051,7 +3060,7 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
 ///
 static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
                                       SelectionDAG &DAG) {
-  MVT VT = Base.getValueType();
+  EVT VT = Base.getValueType();
   return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
                      VT, Base, DAG.getConstant(Offset, VT));
 }
@@ -3083,7 +3092,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
 /// to replace the memset / memcpy is below the threshold. It also returns the
 /// types of the sequence of memory ops to perform memset / memcpy.
 static
-bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
+bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
                               SDValue Dst, SDValue Src,
                               unsigned Limit, uint64_t Size, unsigned &Align,
                               std::string &Str, bool &isSrcStr,
@@ -3091,11 +3100,11 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
                               const TargetLowering &TLI) {
   isSrcStr = isMemSrcFromString(Src, Str);
   bool isSrcConst = isa<ConstantSDNode>(Src);
-  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
-  MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
+  EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
+  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
   if (VT != MVT::iAny) {
-    unsigned NewAlign = (unsigned)
-      TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());
+    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
     // If source is a string constant, this will require an unaligned load.
     if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
       if (Dst.getOpcode() != ISD::FrameIndex) {
@@ -3120,7 +3129,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
   }
 
   if (VT == MVT::iAny) {
-    if (AllowUnalign) {
+    if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
       VT = MVT::i64;
     } else {
       switch (Align & 7) {
@@ -3133,7 +3142,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
 
     MVT LVT = MVT::i64;
     while (!TLI.isTypeLegal(LVT))
-      LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1);
+      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
     assert(LVT.isInteger());
 
     if (VT.bitsGT(LVT))
@@ -3148,12 +3157,12 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
       if (VT.isVector()) {
         VT = MVT::i64;
         while (!TLI.isTypeLegal(VT))
-          VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
         VTSize = VT.getSizeInBits() / 8;
       } else {
         // This can result in a type that is not legal on the target, e.g.
         // 1 or 2 bytes on PPC.
-        VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
         VTSize >>= 1;
       }
     }
@@ -3177,7 +3186,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   // Expand memcpy to a series of load and store ops if the size operand falls
   // below a certain threshold.
-  std::vector<MVT> MemOps;
+  std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemcpy();
@@ -3193,8 +3202,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> OutChains;
   unsigned NumMemOps = MemOps.size();
   uint64_t SrcOff = 0, DstOff = 0;
-  for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+  for (unsigned i = 0; i != NumMemOps; ++i) {
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
@@ -3214,7 +3223,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
       // to Load/Store if NVT==VT.
       // FIXME does the case above also need this?
-      MVT NVT = TLI.getTypeToTransformTo(VT);
+      EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
@@ -3242,7 +3251,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   // Expand memmove to a series of load and store ops if the size operand falls
   // below a certain threshold.
-  std::vector<MVT> MemOps;
+  std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemmove();
@@ -3260,7 +3269,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> OutChains;
   unsigned NumMemOps = MemOps.size();
   for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
@@ -3275,7 +3284,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                       &LoadChains[0], LoadChains.size());
   OutChains.clear();
   for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
@@ -3299,7 +3308,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   // Expand memset to a series of load/store ops if the size operand
   // falls below a certain threshold.
-  std::vector<MVT> MemOps;
+  std::vector<EVT> MemOps;
   std::string Str;
   bool CopyFromStr;
   if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
@@ -3311,7 +3320,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   unsigned NumMemOps = MemOps.size();
   for (unsigned i = 0; i < NumMemOps; i++) {
-    MVT VT = MemOps[i];
+    EVT VT = MemOps[i];
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value = getMemsetValue(Src, VT, DAG, dl);
     SDValue Store = DAG.getStore(Chain, dl, Value,
@@ -3368,15 +3377,18 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME: pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
-    TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, 0, CallingConv::C, false,
-                    getExternalSymbol("memcpy", TLI.getPointerTy()),
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+                                      TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
 }
@@ -3414,15 +3426,18 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME:  pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
-    TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, 0, CallingConv::C, false,
-                    getExternalSymbol("memmove", TLI.getPointerTy()),
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+                                      TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
 }
@@ -3456,7 +3471,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     return Result;
 
   // Emit a library call.
-  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -3466,31 +3481,61 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
   else
     Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
-  Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+  Entry.Node = Src;
+  Entry.Ty = Type::getInt32Ty(*getContext());
+  Entry.isSExt = true;
   Args.push_back(Entry);
-  Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+  Entry.Node = Size;
+  Entry.Ty = IntPtrTy;
+  Entry.isSExt = false;
   Args.push_back(Entry);
   // FIXME: pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
-    TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, 0, CallingConv::C, false,
-                    getExternalSymbol("memset", TLI.getPointerTy()),
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+                                      TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
                                 SDValue Chain,
                                 SDValue Ptr, SDValue Cmp,
                                 SDValue Swp, const Value* PtrVal,
                                 unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  // Check if the memory reference references a frame index
+  if (!PtrVal)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrVal, Flags, 0,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachineMemOperand *MMO) {
   assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
 
-  MVT VT = Cmp.getValueType();
-
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(MemVT);
+  EVT VT = Cmp.getValueType();
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   FoldingSetNodeID ID;
@@ -3498,21 +3543,48 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
   void* IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
-                       Chain, Ptr, Cmp, Swp, PtrVal, Alignment);
+  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
                                 SDValue Chain,
                                 SDValue Ptr, SDValue Val,
                                 const Value* PtrVal,
                                 unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  // Check if the memory reference references a frame index
+  if (!PtrVal)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrVal, Flags, 0,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                MachineMemOperand *MMO) {
   assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
           Opcode == ISD::ATOMIC_LOAD_SUB ||
           Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -3526,10 +3598,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
           Opcode == ISD::ATOMIC_SWAP) &&
          "Invalid Atomic Op");
 
-  MVT VT = Val.getValueType();
-
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(MemVT);
+  EVT VT = Val.getValueType();
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   FoldingSetNodeID ID;
@@ -3537,11 +3606,12 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
   SDValue Ops[] = {Chain, Ptr, Val};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
   void* IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
-                       Chain, Ptr, Val, PtrVal, Alignment);
+  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3554,7 +3624,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
   if (NumOps == 1)
     return Ops[0];
 
-  SmallVector<MVT, 4> VTs;
+  SmallVector<EVT, 4> VTs;
   VTs.reserve(NumOps);
   for (unsigned i = 0; i < NumOps; ++i)
     VTs.push_back(Ops[i].getValueType());
@@ -3564,9 +3634,9 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
 
 SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
-                                  const MVT *VTs, unsigned NumVTs,
+                                  const EVT *VTs, unsigned NumVTs,
                                   const SDValue *Ops, unsigned NumOps,
-                                  MVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, const Value *srcValue, int SVOff,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
   return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
@@ -3577,81 +3647,104 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
 SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                                   const SDValue *Ops, unsigned NumOps,
-                                  MVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, const Value *srcValue, int SVOff,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
+  if (Align == 0)  // Ensure that codegen never sees alignment 0
+    Align = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = 0;
+  if (WriteMem)
+    Flags |= MachineMemOperand::MOStore;
+  if (ReadMem)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Vol)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(srcValue, Flags, SVOff,
+                            MemVT.getStoreSize(), Align);
+
+  return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachineMemOperand *MMO) {
+  assert((Opcode == ISD::INTRINSIC_VOID ||
+          Opcode == ISD::INTRINSIC_W_CHAIN ||
+          (Opcode <= INT_MAX &&
+           (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+         "Opcode is not a memory-accessing opcode!");
+
   // Memoize the node unless it returns a flag.
   MemIntrinsicSDNode *N;
   if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
       return SDValue(E, 0);
+    }
 
     N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
-                               srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
+    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
     CSEMap.InsertNode(N, IP);
   } else {
     N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
-                               srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
-  }
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
-SDValue
-SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
-                      bool IsTailCall, bool IsInreg, SDVTList VTs,
-                      const SDValue *Operands, unsigned NumOperands,
-                      unsigned NumFixedArgs) {
-  // Do not include isTailCall in the folding set profile.
-  FoldingSetNodeID ID;
-  AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands);
-  ID.AddInteger(CallingConv);
-  ID.AddInteger(IsVarArgs);
-  void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
-    // Instead of including isTailCall in the folding set, we just
-    // set the flag of the existing node.
-    if (!IsTailCall)
-      cast<CallSDNode>(E)->setNotTailCall();
-    return SDValue(E, 0);
+    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
   }
-  SDNode *N = NodeAllocator.Allocate<CallSDNode>();
-  new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg,
-                     VTs, Operands, NumOperands, NumFixedArgs);
-  CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
 SDValue
 SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
-                      ISD::LoadExtType ExtType, MVT VT, SDValue Chain,
+                      ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
-                      const Value *SV, int SVOffset, MVT EVT,
+                      const Value *SV, int SVOffset, EVT MemVT,
                       bool isVolatile, unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(VT);
+    Alignment = getEVTAlignment(VT);
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset,
+                            MemVT.getStoreSize(), Alignment);
+  return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO);
+}
 
-  if (VT == EVT) {
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+                      ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+                      SDValue Ptr, SDValue Offset, EVT MemVT,
+                      MachineMemOperand *MMO) {
+  if (VT == MemVT) {
     ExtType = ISD::NON_EXTLOAD;
   } else if (ExtType == ISD::NON_EXTLOAD) {
-    assert(VT == EVT && "Non-extending load from different memory type!");
+    assert(VT == MemVT && "Non-extending load from different memory type!");
   } else {
     // Extending load.
     if (VT.isVector())
-      assert(EVT.getVectorNumElements() == VT.getVectorNumElements() &&
+      assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() &&
              "Invalid vector extload!");
     else
-      assert(EVT.bitsLT(VT) &&
+      assert(MemVT.bitsLT(VT) &&
              "Should only be an extending load, not truncating!");
     assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&
            "Cannot sign/zero extend a FP/Vector load!");
-    assert(VT.isInteger() == EVT.isInteger() &&
+    assert(VT.isInteger() == MemVT.isInteger() &&
            "Cannot convert from FP to Int or Int -> FP!");
   }
 
@@ -3664,20 +3757,21 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
   SDValue Ops[] = { Chain, Ptr, Offset };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
-  ID.AddInteger(EVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment));
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
-  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset,
-                     Alignment, isVolatile);
+  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               const Value *SV, int SVOffset,
                               bool isVolatile, unsigned Alignment) {
@@ -3686,14 +3780,14 @@ SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,
                  SV, SVOffset, VT, isVolatile, Alignment);
 }
 
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
                                  const Value *SV,
-                                 int SVOffset, MVT EVT,
+                                 int SVOffset, EVT MemVT,
                                  bool isVolatile, unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, EVT, isVolatile, Alignment);
+                 SV, SVOffset, MemVT, isVolatile, Alignment);
 }
 
 SDValue
@@ -3711,25 +3805,43 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, const Value *SV, int SVOffset,
                                bool isVolatile, unsigned Alignment) {
-  MVT VT = Val.getValueType();
-
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(VT);
+    Alignment = getEVTAlignment(Val.getValueType());
 
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset,
+                            Val.getValueType().getStoreSize(), Alignment);
+
+  return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
   SDValue Ops[] = { Chain, Val, Ptr, Undef };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED,
-                                     isVolatile, Alignment));
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false,
-                      VT, SV, SVOffset, Alignment, isVolatile);
+  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3737,19 +3849,39 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     SDValue Ptr, const Value *SV,
-                                    int SVOffset, MVT SVT,
+                                    int SVOffset, EVT SVT,
                                     bool isVolatile, unsigned Alignment) {
-  MVT VT = Val.getValueType();
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(SVT);
+
+  // Check if the memory reference references a frame index
+  if (!SV)
+    if (const FrameIndexSDNode *FI =
+          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+
+  return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, EVT SVT,
+                                    MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
 
   if (VT == SVT)
-    return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment);
+    return getStore(Chain, dl, Val, Ptr, MMO);
 
   assert(VT.bitsGT(SVT) && "Not a truncation?");
   assert(VT.isInteger() == SVT.isInteger() &&
          "Can't do FP-INT conversion!");
 
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getMVTAlignment(VT);
 
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -3757,14 +3889,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(SVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED,
-                                     isVolatile, Alignment));
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true,
-                      SVT, SV, SVOffset, Alignment, isVolatile);
+  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3788,21 +3920,20 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
   new (N) StoreSDNode(Ops, dl, VTs, AM,
                       ST->isTruncatingStore(), ST->getMemoryVT(),
-                      ST->getSrcValue(), ST->getSrcValueOffset(),
-                      ST->getAlignment(), ST->isVolatile());
+                      ST->getMemOperand());
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl,
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
                                SDValue Chain, SDValue Ptr,
                                SDValue SV) {
   SDValue Ops[] = { Chain, Ptr, SV };
   return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               const SDUse *Ops, unsigned NumOps) {
   switch (NumOps) {
   case 0: return getNode(Opcode, DL, VT);
@@ -3818,7 +3949,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
   return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                               const SDValue *Ops, unsigned NumOps) {
   switch (NumOps) {
   case 0: return getNode(Opcode, DL, VT);
@@ -3876,14 +4007,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              const std::vector<MVT> &ResultTys,
+                              const std::vector<EVT> &ResultTys,
                               const SDValue *Ops, unsigned NumOps) {
   return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
                  Ops, NumOps);
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              const MVT *VTs, unsigned NumVTs,
+                              const EVT *VTs, unsigned NumVTs,
                               const SDValue *Ops, unsigned NumOps) {
   if (NumVTs == 1)
     return getNode(Opcode, DL, VTs[0], Ops, NumOps);
@@ -3895,11 +4026,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
   if (VTList.NumVTs == 1)
     return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
 
+#if 0
   switch (Opcode) {
   // FIXME: figure out how to safely handle things like
   // int foo(int x) { return 1 << (x & 255); }
   // int bar() { return foo(256); }
-#if 0
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:
   case ISD::SHL_PARTS:
@@ -3915,8 +4046,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
     break;
-#endif
   }
+#endif
 
   // Memoize the node unless it returns a flag.
   SDNode *N;
@@ -3998,17 +4129,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
   return getNode(Opcode, DL, VTList, Ops, 5);
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT) {
+SDVTList SelectionDAG::getVTList(EVT VT) {
   return makeVTList(SDNode::getValueTypeList(VT), 1);
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
   for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
        E = VTList.rend(); I != E; ++I)
     if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
       return *I;
 
-  MVT *Array = Allocator.Allocate<MVT>(2);
+  EVT *Array = Allocator.Allocate<EVT>(2);
   Array[0] = VT1;
   Array[1] = VT2;
   SDVTList Result = makeVTList(Array, 2);
@@ -4016,14 +4147,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {
   return Result;
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
   for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
        E = VTList.rend(); I != E; ++I)
     if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
                           I->VTs[2] == VT3)
       return *I;
 
-  MVT *Array = Allocator.Allocate<MVT>(3);
+  EVT *Array = Allocator.Allocate<EVT>(3);
   Array[0] = VT1;
   Array[1] = VT2;
   Array[2] = VT3;
@@ -4032,14 +4163,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {
   return Result;
 }
 
-SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
   for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
        E = VTList.rend(); I != E; ++I)
     if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
                           I->VTs[2] == VT3 && I->VTs[3] == VT4)
       return *I;
 
-  MVT *Array = Allocator.Allocate<MVT>(3);
+  EVT *Array = Allocator.Allocate<EVT>(3);
   Array[0] = VT1;
   Array[1] = VT2;
   Array[2] = VT3;
@@ -4049,9 +4180,9 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {
   return Result;
 }
 
-SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
   switch (NumVTs) {
-    case 0: assert(0 && "Cannot have nodes without results!");
+    case 0: llvm_unreachable("Cannot have nodes without results!");
     case 1: return getVTList(VTs[0]);
     case 2: return getVTList(VTs[0], VTs[1]);
     case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
@@ -4073,7 +4204,7 @@ SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {
       return *I;
   }
 
-  MVT *Array = Allocator.Allocate<MVT>(NumVTs);
+  EVT *Array = Allocator.Allocate<EVT>(NumVTs);
   std::copy(VTs, VTs+NumVTs, Array);
   SDVTList Result = makeVTList(Array, NumVTs);
   VTList.push_back(Result);
@@ -4215,20 +4346,20 @@ void SDNode::DropOperands() {
 /// machine opcode.
 ///
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT) {
+                                   EVT VT) {
   SDVTList VTs = getVTList(VT);
   return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, SDValue Op1) {
+                                   EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
   return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, SDValue Op1,
+                                   EVT VT, SDValue Op1,
                                    SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
@@ -4236,7 +4367,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, SDValue Op1,
+                                   EVT VT, SDValue Op1,
                                    SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
@@ -4244,41 +4375,41 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT, const SDValue *Ops,
+                                   EVT VT, const SDValue *Ops,
                                    unsigned NumOps) {
   SDVTList VTs = getVTList(VT);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, const SDValue *Ops,
+                                   EVT VT1, EVT VT2, const SDValue *Ops,
                                    unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2) {
+                                   EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
   return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, MVT VT3,
+                                   EVT VT1, EVT VT2, EVT VT3,
                                    const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, MVT VT3, MVT VT4,
+                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
                                    const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
   return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2,
+                                   EVT VT1, EVT VT2,
                                    SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
@@ -4286,7 +4417,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2,
+                                   EVT VT1, EVT VT2,
                                    SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
@@ -4294,7 +4425,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2,
+                                   EVT VT1, EVT VT2,
                                    SDValue Op1, SDValue Op2,
                                    SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -4303,7 +4434,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   MVT VT1, MVT VT2, MVT VT3,
+                                   EVT VT1, EVT VT2, EVT VT3,
                                    SDValue Op1, SDValue Op2,
                                    SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
@@ -4318,20 +4449,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT) {
+                                  EVT VT) {
   SDVTList VTs = getVTList(VT);
   return MorphNodeTo(N, Opc, VTs, 0, 0);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, SDValue Op1) {
+                                  EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
   return MorphNodeTo(N, Opc, VTs, Ops, 1);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, SDValue Op1,
+                                  EVT VT, SDValue Op1,
                                   SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
@@ -4339,7 +4470,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, SDValue Op1,
+                                  EVT VT, SDValue Op1,
                                   SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
@@ -4347,34 +4478,34 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT, const SDValue *Ops,
+                                  EVT VT, const SDValue *Ops,
                                   unsigned NumOps) {
   SDVTList VTs = getVTList(VT);
   return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2, const SDValue *Ops,
+                                  EVT VT1, EVT VT2, const SDValue *Ops,
                                   unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2);
   return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2) {
+                                  EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
   return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2, MVT VT3,
+                                  EVT VT1, EVT VT2, EVT VT3,
                                   const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2,
+                                  EVT VT1, EVT VT2,
                                   SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
@@ -4382,7 +4513,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2,
+                                  EVT VT1, EVT VT2,
                                   SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
@@ -4390,7 +4521,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  MVT VT1, MVT VT2,
+                                  EVT VT1, EVT VT2,
                                   SDValue Op1, SDValue Op2,
                                   SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -4441,29 +4572,35 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
       DeadNodeSet.insert(Used);
   }
 
-  // If NumOps is larger than the # of operands we currently have, reallocate
-  // the operand list.
-  if (NumOps > N->NumOperands) {
-    if (N->OperandsNeedDelete)
-      delete[] N->OperandList;
-
-    if (N->isMachineOpcode()) {
-      // We're creating a final node that will live unmorphed for the
-      // remainder of the current SelectionDAG iteration, so we can allocate
-      // the operands directly out of a pool with no recycling metadata.
-      N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps);
-      N->OperandsNeedDelete = false;
-    } else {
-      N->OperandList = new SDUse[NumOps];
+  if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+    // Initialize the memory references information.
+    MN->setMemRefs(0, 0);
+    // If NumOps is larger than the # of operands we can have in a
+    // MachineSDNode, reallocate the operand list.
+    if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+      if (MN->OperandsNeedDelete)
+        delete[] MN->OperandList;
+      if (NumOps > array_lengthof(MN->LocalOperands))
+        // We're creating a final node that will live unmorphed for the
+        // remainder of the current SelectionDAG iteration, so we can allocate
+        // the operands directly out of a pool with no recycling metadata.
+        MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                        Ops, NumOps);
+      else
+        MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+      MN->OperandsNeedDelete = false;
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
+  } else {
+    // If NumOps is larger than the # of operands we currently have, reallocate
+    // the operand list.
+    if (NumOps > N->NumOperands) {
+      if (N->OperandsNeedDelete)
+        delete[] N->OperandList;
+      N->InitOperands(new SDUse[NumOps], Ops, NumOps);
       N->OperandsNeedDelete = true;
-    }
-  }
-
-  // Assign the new operands.
-  N->NumOperands = NumOps;
-  for (unsigned i = 0, e = NumOps; i != e; ++i) {
-    N->OperandList[i].setUser(N);
-    N->OperandList[i].setInitial(Ops[i]);
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
   }
 
   // Delete any nodes that are still dead after adding the uses for the
@@ -4481,108 +4618,189 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 }
 
 
-/// getTargetNode - These are used for target selectors to create a new node
-/// with specified return type(s), target opcode, and operands.
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
 ///
-/// Note that getTargetNode returns the resultant node.  If there is already a
+/// Note that getMachineNode returns the resultant node.  If there is already a
 /// node of the specified opcode and operands, it returns that node instead of
 /// the current one.
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) {
-  return getNode(~Opcode, dl, VT).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    SDValue Op1) {
-  return getNode(~Opcode, dl, VT, Op1).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    SDValue Op1, SDValue Op2) {
-  return getNode(~Opcode, dl, VT, Op1, Op2).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    SDValue Op1, SDValue Op2,
-                                    SDValue Op3) {
-  return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
-                                    const SDValue *Ops, unsigned NumOps) {
-  return getNode(~Opcode, dl, VT, Ops, NumOps).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Op;
-  return getNode(~Opcode, dl, VTs, &Op, 0).getNode();
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, SDValue Op1) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getNode(~Opcode, dl, VTs, &Op1, 1).getNode();
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, SDValue Op1,
-                                    SDValue Op2) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
-  return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, SDValue Op1,
-                                    SDValue Op2, SDValue Op3) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1,
+                             SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2,
-                                    const SDValue *Ops, unsigned NumOps) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2,
+                             const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2, MVT VT3,
-                                    SDValue Op1, SDValue Op2) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2 };
-  return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2, MVT VT3,
-                                    SDValue Op1, SDValue Op2,
-                                    SDValue Op3) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    MVT VT1, MVT VT2, MVT VT3,
-                                    const SDValue *Ops, unsigned NumOps) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
-                                    MVT VT2, MVT VT3, MVT VT4,
-                                    const SDValue *Ops, unsigned NumOps) {
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                             EVT VT2, EVT VT3, EVT VT4,
+                             const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
 }
 
-SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
-                                    const std::vector<MVT> &ResultTys,
-                                    const SDValue *Ops, unsigned NumOps) {
-  return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode();
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             const std::vector<EVT> &ResultTys,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                             const SDValue *Ops, unsigned NumOps) {
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+  MachineSDNode *N;
+  void *IP;
+
+  if (DoCSE) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+    IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return cast<MachineSDNode>(E);
+  }
+
+  // Allocate a new MachineSDNode.
+  N = NodeAllocator.Allocate<MachineSDNode>();
+  new (N) MachineSDNode(~Opcode, DL, VTs);
+
+  // Initialize the operands list.
+  if (NumOps > array_lengthof(N->LocalOperands))
+    // We're creating a final node that will live unmorphed for the
+    // remainder of the current SelectionDAG iteration, so we can allocate
+    // the operands directly out of a pool with no recycling metadata.
+    N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                    Ops, NumOps);
+  else
+    N->InitOperands(N->LocalOperands, Ops, NumOps);
+  N->OperandsNeedDelete = false;
+
+  if (DoCSE)
+    CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyNode(N);
+#endif
+  return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetInstrInfo::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                     SDValue Operand) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Subreg = getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, DL,
+                                  VT, Operand, SRIdxVal);
+  return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetInstrInfo::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                    SDValue Operand, SDValue Subreg) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL,
+                                  VT, Operand, Subreg, SRIdxVal);
+  return SDValue(Result, 0);
 }
 
 /// getNodeIfExists - Get the specified node if it's already available, or
@@ -4937,64 +5155,28 @@ HandleSDNode::~HandleSDNode() {
 }
 
 GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
-                                         MVT VT, int64_t o, unsigned char TF)
+                                         EVT VT, int64_t o, unsigned char TF)
   : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
     Offset(o), TargetFlags(TF) {
   TheGlobal = const_cast<GlobalValue*>(GA);
 }
 
-MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt,
-                     const Value *srcValue, int SVO,
-                     unsigned alignment, bool vol)
- : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
-  assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
-  assert(getAlignment() == alignment && "Alignment representation error!");
-  assert(isVolatile() == vol && "Volatile representation error!");
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+                     MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
-                     const SDValue *Ops,
-                     unsigned NumOps, MVT memvt, const Value *srcValue,
-                     int SVO, unsigned alignment, bool vol)
+                     const SDValue *Ops, unsigned NumOps, EVT memvt, 
+                     MachineMemOperand *mmo)
    : SDNode(Opc, dl, VTs, Ops, NumOps),
-     MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
-  assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
-  assert(getAlignment() == alignment && "Alignment representation error!");
-  assert(isVolatile() == vol && "Volatile representation error!");
-}
-
-/// getMemOperand - Return a MachineMemOperand object describing the memory
-/// reference performed by this memory reference.
-MachineMemOperand MemSDNode::getMemOperand() const {
-  int Flags = 0;
-  if (isa<LoadSDNode>(this))
-    Flags = MachineMemOperand::MOLoad;
-  else if (isa<StoreSDNode>(this))
-    Flags = MachineMemOperand::MOStore;
-  else if (isa<AtomicSDNode>(this)) {
-    Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
-  }
-  else {
-    const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this);
-    assert(MemIntrinNode && "Unknown MemSDNode opcode!");
-    if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad;
-    if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore;
-  }
-
-  int Size = (getMemoryVT().getSizeInBits() + 7) >> 3;
-  if (isVolatile()) Flags |= MachineMemOperand::MOVolatile;
-
-  // Check if the memory reference references a frame index
-  const FrameIndexSDNode *FI =
-  dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode());
-  if (!getSrcValue() && FI)
-    return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()),
-                             Flags, 0, Size, getAlignment());
-  else
-    return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(),
-                             Size, getAlignment());
+     MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
 
 /// Profile - Gather unique data for the node.
@@ -5003,19 +5185,30 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
   AddNodeIDNode(ID, this);
 }
 
-static ManagedStatic<std::set<MVT, MVT::compareRawBits> > EVTs;
-static MVT VTs[MVT::LAST_VALUETYPE];
+namespace {
+  struct EVTArray {
+    std::vector<EVT> VTs;
+    
+    EVTArray() {
+      VTs.reserve(MVT::LAST_VALUETYPE);
+      for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+        VTs.push_back(MVT((MVT::SimpleValueType)i));
+    }
+  };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
 static ManagedStatic<sys::SmartMutex<true> > VTMutex;
 
 /// getValueTypeList - Return a pointer to the specified value type.
 ///
-const MVT *SDNode::getValueTypeList(MVT VT) {
-  sys::SmartScopedLock<true> Lock(&*VTMutex);
+const EVT *SDNode::getValueTypeList(EVT VT) {
   if (VT.isExtended()) {
+    sys::SmartScopedLock<true> Lock(*VTMutex);
     return &(*EVTs->insert(VT).first);
   } else {
-    VTs[VT.getSimpleVT()] = VT;
-    return &VTs[VT.getSimpleVT()];
+    return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
   }
 }
 
@@ -5186,14 +5379,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::PCMARKER:      return "PCMarker";
   case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
   case ISD::SRCVALUE:      return "SrcValue";
-  case ISD::MEMOPERAND:    return "MemOperand";
   case ISD::EntryToken:    return "EntryToken";
   case ISD::TokenFactor:   return "TokenFactor";
   case ISD::AssertSext:    return "AssertSext";
   case ISD::AssertZext:    return "AssertZext";
 
   case ISD::BasicBlock:    return "BasicBlock";
-  case ISD::ARG_FLAGS:     return "ArgFlags";
   case ISD::VALUETYPE:     return "ValueType";
   case ISD::Register:      return "Register";
 
@@ -5208,6 +5399,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FRAMEADDR: return "FRAMEADDR";
   case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
   case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::LSDAADDR: return "LSDAADDR";
   case ISD::EHSELECTION: return "EHSELECTION";
   case ISD::EH_RETURN: return "EH_RETURN";
   case ISD::ConstantPool:  return "ConstantPool";
@@ -5239,10 +5431,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::INLINEASM:     return "inlineasm";
   case ISD::DBG_LABEL:     return "dbg_label";
   case ISD::EH_LABEL:      return "eh_label";
-  case ISD::DECLARE:       return "declare";
   case ISD::HANDLENODE:    return "handlenode";
-  case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
-  case ISD::CALL:          return "call";
 
   // Unary operators
   case ISD::FABS:   return "fabs";
@@ -5332,7 +5521,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
 
   case ISD::CONVERT_RNDSAT: {
     switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
-    default: assert(0 && "Unknown cvt code!");
+    default: llvm_unreachable("Unknown cvt code!");
     case ISD::CVT_FF:  return "cvt_ff";
     case ISD::CVT_FS:  return "cvt_fs";
     case ISD::CVT_FU:  return "cvt_fu";
@@ -5351,7 +5540,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::BR_JT:   return "br_jt";
   case ISD::BRCOND:  return "brcond";
   case ISD::BR_CC:   return "br_cc";
-  case ISD::RET:     return "ret";
   case ISD::CALLSEQ_START:  return "callseq_start";
   case ISD::CALLSEQ_END:    return "callseq_end";
 
@@ -5384,7 +5572,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
 
   case ISD::CONDCODE:
     switch (cast<CondCodeSDNode>(this)->get()) {
-    default: assert(0 && "Unknown setcc condition!");
+    default: llvm_unreachable("Unknown setcc condition!");
     case ISD::SETOEQ:  return "setoeq";
     case ISD::SETOGT:  return "setogt";
     case ISD::SETOGE:  return "setoge";
@@ -5463,14 +5651,26 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
     if (getValueType(i) == MVT::Other)
       OS << "ch";
     else
-      OS << getValueType(i).getMVTString();
+      OS << getValueType(i).getEVTString();
   }
   OS << " = " << getOperationName(G);
 }
 
 void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
-  if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
-    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this);
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
     OS << "<";
     for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
       int Idx = SVN->getMaskElt(i);
@@ -5481,9 +5681,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
         OS << Idx;
     }
     OS << ">";
-  }
-
-  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
     OS << '<' << CSDN->getAPIntValue() << '>';
   } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
     if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
@@ -5505,13 +5703,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
-    if (unsigned char TF = GADN->getTargetFlags())
+    if (unsigned int TF = GADN->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
     OS << "<" << FIDN->getIndex() << ">";
   } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
     OS << "<" << JTDN->getIndex() << ">";
-    if (unsigned char TF = JTDN->getTargetFlags())
+    if (unsigned int TF = JTDN->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
     int offset = CP->getOffset();
@@ -5523,7 +5721,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
-    if (unsigned char TF = CP->getTargetFlags())
+    if (unsigned int TF = CP->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
     OS << "<";
@@ -5541,80 +5739,47 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
     OS << "'" << ES->getSymbol() << "'";
-    if (unsigned char TF = ES->getTargetFlags())
+    if (unsigned int TF = ES->getTargetFlags())
       OS << " [TF=" << TF << ']';
   } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
     if (M->getValue())
       OS << "<" << M->getValue() << ">";
     else
       OS << "<null>";
-  } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) {
-    if (M->MO.getValue())
-      OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">";
-    else
-      OS << "<null:" << M->MO.getOffset() << ">";
-  } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) {
-    OS << N->getArgFlags().getArgFlagsString();
   } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
-    OS << ":" << N->getVT().getMVTString();
+    OS << ":" << N->getVT().getEVTString();
   }
   else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
-    const Value *SrcValue = LD->getSrcValue();
-    int SrcOffset = LD->getSrcValueOffset();
-    OS << " <";
-    if (SrcValue)
-      OS << SrcValue;
-    else
-      OS << "null";
-    OS << ":" << SrcOffset << ">";
+    OS << " <" << *LD->getMemOperand();
 
     bool doExt = true;
     switch (LD->getExtensionType()) {
     default: doExt = false; break;
-    case ISD::EXTLOAD: OS << " <anyext "; break;
-    case ISD::SEXTLOAD: OS << " <sext "; break;
-    case ISD::ZEXTLOAD: OS << " <zext "; break;
+    case ISD::EXTLOAD: OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
     }
     if (doExt)
-      OS << LD->getMemoryVT().getMVTString() << ">";
+      OS << " from " << LD->getMemoryVT().getEVTString();
 
     const char *AM = getIndexedModeName(LD->getAddressingMode());
     if (*AM)
-      OS << " " << AM;
-    if (LD->isVolatile())
-      OS << " <volatile>";
-    OS << " alignment=" << LD->getAlignment();
+      OS << ", " << AM;
+
+    OS << ">";
   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
-    const Value *SrcValue = ST->getSrcValue();
-    int SrcOffset = ST->getSrcValueOffset();
-    OS << " <";
-    if (SrcValue)
-      OS << SrcValue;
-    else
-      OS << "null";
-    OS << ":" << SrcOffset << ">";
+    OS << " <" << *ST->getMemOperand();
 
     if (ST->isTruncatingStore())
-      OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">";
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
 
     const char *AM = getIndexedModeName(ST->getAddressingMode());
     if (*AM)
-      OS << " " << AM;
-    if (ST->isVolatile())
-      OS << " <volatile>";
-    OS << " alignment=" << ST->getAlignment();
-  } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) {
-    const Value *SrcValue = AT->getSrcValue();
-    int SrcOffset = AT->getSrcValueOffset();
-    OS << " <";
-    if (SrcValue)
-      OS << SrcValue;
-    else
-      OS << "null";
-    OS << ":" << SrcOffset << ">";
-    if (AT->isVolatile())
-      OS << " <volatile>";
-    OS << " alignment=" << AT->getAlignment();
+      OS << ", " << AM;
+    
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << " <" << *M->getMemOperand() << ">";
   }
 }
 
@@ -5635,16 +5800,17 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
     if (N->getOperand(i).getNode()->hasOneUse())
       DumpNodes(N->getOperand(i).getNode(), indent+2, G);
     else
-      cerr << "\n" << std::string(indent+2, ' ')
-           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+      errs() << "\n" << std::string(indent+2, ' ')
+             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
 
 
-  cerr << "\n" << std::string(indent, ' ');
+  errs() << "\n";
+  errs().indent(indent);
   N->dump(G);
 }
 
 void SelectionDAG::dump() const {
-  cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
   for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
        I != E; ++I) {
@@ -5655,7 +5821,7 @@ void SelectionDAG::dump() const {
 
   if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
 
-  cerr << "\n\n";
+  errs() << "\n\n";
 }
 
 void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
@@ -5699,6 +5865,11 @@ void SDNode::dumpr() const {
   DumpNodesr(errs(), this, 0, 0, once);
 }
 
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(errs(), this, 0, G, once);
+}
+
 
 // getAddressSpace - Return the address space this GlobalAddress belongs to.
 unsigned GlobalAddressSDNode::getAddressSpace() const {
@@ -5717,7 +5888,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
                                         unsigned &SplatBitSize,
                                         bool &HasAnyUndefs,
                                         unsigned MinSplatBits) {
-  MVT VT = getValueType(0);
+  EVT VT = getValueType(0);
   assert(VT.isVector() && "Expected a vector type");
   unsigned sz = VT.getSizeInBits();
   if (MinSplatBits > sz)
@@ -5767,7 +5938,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
 
     SplatValue = HighValue | LowValue;
     SplatUndef = HighUndef & LowUndef;
-   
+
     sz = HalfSize;
   }
 
@@ -5775,14 +5946,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   return true;
 }
 
-bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   // Find the first non-undef value in the shuffle mask.
   unsigned i, e;
   for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
     /* search */;
 
   assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
-  
+
   // Make sure all remaining elements are either undef or the same as the first
   // non-undef value.
   for (int Idx = Mask[i]; i != e; ++i)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 260911e3b994..9017e435962b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Constants.h"
+#include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -49,6 +50,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -104,14 +106,14 @@ static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
 }
 
 /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// MVTs that represent all the individual underlying
+/// EVTs that represent all the individual underlying
 /// non-aggregate types that comprise it.
 ///
 /// If Offsets is non-null, it points to a vector to be filled in
 /// with the in-memory offsets of each of the individual values.
 ///
 static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                            SmallVectorImpl<MVT> &ValueVTs,
+                            SmallVectorImpl<EVT> &ValueVTs,
                             SmallVectorImpl<uint64_t> *Offsets = 0,
                             uint64_t StartingOffset = 0) {
   // Given a struct type, recursively traverse the elements.
@@ -135,9 +137,9 @@ static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
     return;
   }
   // Interpret void as zero return values.
-  if (Ty == Type::VoidTy)
+  if (Ty == Type::getVoidTy(Ty->getContext()))
     return;
-  // Base case: we can get an MVT for this LLVM IR type.
+  // Base case: we can get an EVT for this LLVM IR type.
   ValueVTs.push_back(TLI.getValueType(Ty));
   if (Offsets)
     Offsets->push_back(StartingOffset);
@@ -161,7 +163,7 @@ namespace llvm {
     /// ValueVTs - The value types of the values, which may not be legal, and
     /// may need be promoted or synthesized from one or more registers.
     ///
-    SmallVector<MVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs;
 
     /// RegVTs - The value types of the registers. This is the same size as
     /// ValueVTs and it records, for each value, what the type of the assigned
@@ -172,7 +174,7 @@ namespace llvm {
     /// getRegisterType member function, however when with physical registers
     /// it is necessary to have a separate record of the types.
     ///
-    SmallVector<MVT, 4> RegVTs;
+    SmallVector<EVT, 4> RegVTs;
 
     /// Regs - This list holds the registers assigned to the values.
     /// Each legal or promoted value requires one register, and each
@@ -184,21 +186,21 @@ namespace llvm {
 
     RegsForValue(const TargetLowering &tli,
                  const SmallVector<unsigned, 4> &regs,
-                 MVT regvt, MVT valuevt)
+                 EVT regvt, EVT valuevt)
       : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
     RegsForValue(const TargetLowering &tli,
                  const SmallVector<unsigned, 4> &regs,
-                 const SmallVector<MVT, 4> &regvts,
-                 const SmallVector<MVT, 4> &valuevts)
+                 const SmallVector<EVT, 4> &regvts,
+                 const SmallVector<EVT, 4> &valuevts)
       : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-    RegsForValue(const TargetLowering &tli,
+    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
                  unsigned Reg, const Type *Ty) : TLI(&tli) {
       ComputeValueVTs(tli, Ty, ValueVTs);
 
       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        MVT ValueVT = ValueVTs[Value];
-        unsigned NumRegs = TLI->getNumRegisters(ValueVT);
-        MVT RegisterVT = TLI->getRegisterType(ValueVT);
+        EVT ValueVT = ValueVTs[Value];
+        unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
+        EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
         for (unsigned i = 0; i != NumRegs; ++i)
           Regs.push_back(Reg + i);
         RegVTs.push_back(RegisterVT);
@@ -352,11 +354,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
       unsigned PHIReg = ValueMap[PN];
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
 
-      SmallVector<MVT, 4> ValueVTs;
+      SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        MVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(VT);
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
         const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
         for (unsigned i = 0; i != NumRegisters; ++i)
           BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
@@ -366,7 +368,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
   }
 }
 
-unsigned FunctionLoweringInfo::MakeReg(MVT VT) {
+unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
   return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
 }
 
@@ -378,15 +380,15 @@ unsigned FunctionLoweringInfo::MakeReg(MVT VT) {
 /// will assign registers for each member or element.
 ///
 unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, V->getType(), ValueVTs);
 
   unsigned FirstReg = 0;
   for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    MVT ValueVT = ValueVTs[Value];
-    MVT RegisterVT = TLI.getRegisterType(ValueVT);
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
 
-    unsigned NumRegs = TLI.getNumRegisters(ValueVT);
+    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
       unsigned R = MakeReg(RegisterVT);
       if (!FirstReg) FirstReg = R;
@@ -402,7 +404,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
 /// (ISD::AssertSext).
 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
                                 const SDValue *Parts,
-                                unsigned NumParts, MVT PartVT, MVT ValueVT,
+                                unsigned NumParts, EVT PartVT, EVT ValueVT,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -418,11 +420,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       unsigned RoundParts = NumParts & (NumParts - 1) ?
         1 << Log2_32(NumParts) : NumParts;
       unsigned RoundBits = PartBits * RoundParts;
-      MVT RoundVT = RoundBits == ValueBits ?
-        ValueVT : MVT::getIntegerVT(RoundBits);
+      EVT RoundVT = RoundBits == ValueBits ?
+        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
       SDValue Lo, Hi;
 
-      MVT HalfVT = MVT::getIntegerVT(RoundBits/2);
+      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
 
       if (RoundParts > 2) {
         Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
@@ -439,7 +441,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       if (RoundParts < NumParts) {
         // Assemble the trailing non-power-of-2 part.
         unsigned OddParts = NumParts - RoundParts;
-        MVT OddVT = MVT::getIntegerVT(OddParts * PartBits);
+        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
         Hi = getCopyFromParts(DAG, dl,
                               Parts+RoundParts, OddParts, PartVT, OddVT);
 
@@ -447,7 +449,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
         Lo = Val;
         if (TLI.isBigEndian())
           std::swap(Lo, Hi);
-        MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits);
+        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
         Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
                          DAG.getConstant(Lo.getValueType().getSizeInBits(),
@@ -457,11 +459,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       }
     } else if (ValueVT.isVector()) {
       // Handle a multi-element vector.
-      MVT IntermediateVT, RegisterVT;
+      EVT IntermediateVT, RegisterVT;
       unsigned NumIntermediates;
       unsigned NumRegs =
-        TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
-                                   RegisterVT);
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 
+                                   NumIntermediates, RegisterVT);
       assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
       NumParts = NumRegs; // Silence a compiler warning.
       assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -494,11 +496,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
                         ValueVT, &Ops[0], NumIntermediates);
     } else if (PartVT.isFloatingPoint()) {
       // FP split into multiple FP parts (for ppcf128)
-      assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) &&
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
              "Unexpected split");
       SDValue Lo, Hi;
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]);
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
@@ -506,7 +508,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
       // FP split into integer parts (soft fp)
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
              !PartVT.isVector() && "Unexpected split");
-      MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits());
+      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
       Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
     }
   }
@@ -555,7 +557,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
     return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 
-  assert(0 && "Unknown mismatch!");
+  llvm_unreachable("Unknown mismatch!");
   return SDValue();
 }
 
@@ -563,11 +565,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
-                           SDValue *Parts, unsigned NumParts, MVT PartVT,
+                           SDValue *Parts, unsigned NumParts, EVT PartVT,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MVT PtrVT = TLI.getPointerTy();
-  MVT ValueVT = Val.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
+  EVT ValueVT = Val.getValueType();
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
   assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
@@ -588,10 +590,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
         assert(NumParts == 1 && "Do not know what to promote to!");
         Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
       } else if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
       } else {
-        assert(0 && "Unknown mismatch!");
+        llvm_unreachable("Unknown mismatch!");
       }
     } else if (PartBits == ValueVT.getSizeInBits()) {
       // Different types of the same size.
@@ -600,10 +602,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
     } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
       // If the parts cover less bits than value has, truncate the value.
       if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
       } else {
-        assert(0 && "Unknown mismatch!");
+        llvm_unreachable("Unknown mismatch!");
       }
     }
 
@@ -634,19 +636,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
         // The odd parts were reversed by getCopyToParts - unreverse them.
         std::reverse(Parts + RoundParts, Parts + NumParts);
       NumParts = RoundParts;
-      ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
     }
 
     // The number of parts is a power of 2.  Repeatedly bisect the value using
     // EXTRACT_ELEMENT.
     Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::getIntegerVT(ValueVT.getSizeInBits()),
+                           EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
                            Val);
     for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
       for (unsigned i = 0; i < NumParts; i += StepSize) {
         unsigned ThisBits = StepSize * PartBits / 2;
-        MVT ThisVT = MVT::getIntegerVT (ThisBits);
+        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
         SDValue &Part0 = Parts[i];
         SDValue &Part1 = Parts[i+StepSize/2];
 
@@ -692,11 +694,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
   }
 
   // Handle a multi-element vector.
-  MVT IntermediateVT, RegisterVT;
+  EVT IntermediateVT, RegisterVT;
   unsigned NumIntermediates;
-  unsigned NumRegs = TLI
-      .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
-                              RegisterVT);
+  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+                              IntermediateVT, NumIntermediates, RegisterVT);
   unsigned NumElements = ValueVT.getVectorNumElements();
 
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
@@ -750,8 +751,10 @@ void SelectionDAGLowering::clear() {
   NodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
+  EdgeMapping.clear();
   DAG.clear();
   CurDebugLoc = DebugLoc::getUnknownLoc();
+  HasTailCall = false;
 }
 
 /// getRoot - Return the current virtual root of the Selection DAG,
@@ -817,8 +820,7 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
-  default: assert(0 && "Unknown instruction type encountered!");
-           abort();
+  default: llvm_unreachable("Unknown instruction type encountered!");
     // Build the switch statement using the Instruction.def file.
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
   case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
@@ -831,7 +833,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
   if (N.getNode()) return N;
 
   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
-    MVT VT = TLI.getValueType(V->getType(), true);
+    EVT VT = TLI.getValueType(V->getType(), true);
 
     if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return N = DAG.getConstant(*CI, VT);
@@ -860,6 +862,10 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
       for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
            OI != OE; ++OI) {
         SDNode *Val = getValue(*OI).getNode();
+        // If the operand is an empty aggregate, there are no values.
+        if (!Val) continue;
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
         for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
           Constants.push_back(SDValue(Val, i));
       }
@@ -871,14 +877,14 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
              "Unknown struct or array constant!");
 
-      SmallVector<MVT, 4> ValueVTs;
+      SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(TLI, C->getType(), ValueVTs);
       unsigned NumElts = ValueVTs.size();
       if (NumElts == 0)
         return SDValue(); // empty struct
       SmallVector<SDValue, 4> Constants(NumElts);
       for (unsigned i = 0; i != NumElts; ++i) {
-        MVT EltVT = ValueVTs[i];
+        EVT EltVT = ValueVTs[i];
         if (isa<UndefValue>(C))
           Constants[i] = DAG.getUNDEF(EltVT);
         else if (EltVT.isFloatingPoint())
@@ -900,7 +906,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
         Ops.push_back(getValue(CP->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
-      MVT EltVT = TLI.getValueType(VecTy->getElementType());
+      EVT EltVT = TLI.getValueType(VecTy->getElementType());
 
       SDValue Op;
       if (EltVT.isFloatingPoint())
@@ -927,30 +933,24 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
   unsigned InReg = FuncInfo.ValueMap[V];
   assert(InReg && "Value not in map!");
 
-  RegsForValue RFV(TLI, InReg, V->getType());
+  RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
   SDValue Chain = DAG.getEntryNode();
   return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
 }
 
 
 void SelectionDAGLowering::visitRet(ReturnInst &I) {
-  if (I.getNumOperands() == 0) {
-    DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(),
-                            MVT::Other, getControlRoot()));
-    return;
-  }
-
-  SmallVector<SDValue, 8> NewValues;
-  NewValues.push_back(getControlRoot());
+  SDValue Chain = getControlRoot();
+  SmallVector<ISD::OutputArg, 8> Outs;
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-    SmallVector<MVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
     if (NumValues == 0) continue;
 
     SDValue RetOp = getValue(I.getOperand(i));
     for (unsigned j = 0, f = NumValues; j != f; ++j) {
-      MVT VT = ValueVTs[j];
+      EVT VT = ValueVTs[j];
 
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -965,13 +965,13 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) {
       // conventions. The frontend should mark functions whose return values
       // require promoting with signext or zeroext attributes.
       if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-        MVT MinVT = TLI.getRegisterType(MVT::i32);
+        EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
         if (VT.bitsLT(MinVT))
           VT = MinVT;
       }
 
-      unsigned NumParts = TLI.getNumRegisters(VT);
-      MVT PartVT = TLI.getRegisterType(VT);
+      unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+      EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       getCopyToParts(DAG, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -981,14 +981,30 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) {
       ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
       if (F->paramHasAttr(0, Attribute::InReg))
         Flags.setInReg();
-      for (unsigned i = 0; i < NumParts; ++i) {
-        NewValues.push_back(Parts[i]);
-        NewValues.push_back(DAG.getArgFlags(Flags));
-      }
+
+      // Propagate extension type if any
+      if (F->paramHasAttr(0, Attribute::SExt))
+        Flags.setSExt();
+      else if (F->paramHasAttr(0, Attribute::ZExt))
+        Flags.setZExt();
+
+      for (unsigned i = 0; i < NumParts; ++i)
+        Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
     }
   }
-  DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other,
-                          &NewValues[0], NewValues.size()));
+
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  CallingConv::ID CallConv =
+    DAG.getMachineFunction().getFunction()->getCallingConv();
+  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+                          Outs, getCurDebugLoc(), DAG);
+
+  // Verify that the target's LowerReturn behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerReturn didn't return a valid chain!");
+
+  // Update the DAG with the new chain value resulting from return lowering.
+  DAG.setRoot(Chain);
 }
 
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
@@ -1073,7 +1089,7 @@ static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
   case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
   case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
   default:
-    assert(0 && "Invalid FCmp predicate opcode!");
+    llvm_unreachable("Invalid FCmp predicate opcode!");
     FOC = FPC = ISD::SETFALSE;
     break;
   }
@@ -1099,7 +1115,7 @@ static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
   case ICmpInst::ICMP_SGT: return ISD::SETGT;
   case ICmpInst::ICMP_UGT: return ISD::SETUGT;
   default:
-    assert(0 && "Invalid ICmp predicate opcode!");
+    llvm_unreachable("Invalid ICmp predicate opcode!");
     return ISD::SETNE;
   }
 }
@@ -1131,7 +1147,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
         Condition = getFCmpCondCode(FC->getPredicate());
       } else {
         Condition = ISD::SETEQ; // silence warning.
-        assert(0 && "Unknown compare instruction");
+        llvm_unreachable("Unknown compare instruction");
       }
 
       CaseBlock CB(Condition, BOp->getOperand(0),
@@ -1142,7 +1158,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
   }
 
   // Create a CaseBlock record representing this branch.
-  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
                NULL, TBB, FBB, CurBB);
   SwitchCases.push_back(CB);
 }
@@ -1229,7 +1245,7 @@ void SelectionDAGLowering::visitBr(BranchInst &I) {
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (I.isUnconditional()) {
@@ -1290,14 +1306,14 @@ void SelectionDAGLowering::visitBr(BranchInst &I) {
       // Okay, we decided not to do this, remove any inserted MBB's and clear
       // SwitchCases.
       for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
-        CurMBB->getParent()->erase(SwitchCases[i].ThisBB);
+        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
 
       SwitchCases.clear();
     }
   }
 
   // Create a CaseBlock record representing this branch.
-  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
                NULL, Succ0MBB, Succ1MBB, CurMBB);
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
@@ -1315,9 +1331,11 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
   if (CB.CmpMHS == NULL) {
     // Fold "(X == true)" to X and "(X == false)" to !X to
     // handle common cases produced by branch lowering.
-    if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+        CB.CC == ISD::SETEQ)
       Cond = CondLHS;
-    else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+             CB.CC == ISD::SETEQ) {
       SDValue True = DAG.getConstant(1, CondLHS.getValueType());
       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
     } else
@@ -1329,7 +1347,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
     const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
 
     SDValue CmpOp = getValue(CB.CmpMHS);
-    MVT VT = CmpOp.getValueType();
+    EVT VT = CmpOp.getValueType();
 
     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
       Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
@@ -1350,7 +1368,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   // If the lhs block is the next block, invert the condition so that we can
@@ -1385,7 +1403,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
 void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
   // Emit the code for the jump table
   assert(JT.Reg != -1U && "Should lower JT Header first!");
-  MVT PTy = TLI.getPointerTy();
+  EVT PTy = TLI.getPointerTy();
   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                      JT.Reg, PTy);
   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1402,7 +1420,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
   SDValue SwitchOp = getValue(JTH.SValue);
-  MVT VT = SwitchOp.getValueType();
+  EVT VT = SwitchOp.getValueType();
   SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
                             DAG.getConstant(JTH.First, VT));
 
@@ -1411,12 +1429,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
   // can be used as an index into the jump table in a subsequent basic block.
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
-  if (VT.bitsGT(TLI.getPointerTy()))
-    SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                           TLI.getPointerTy(), SUB);
-  else
-    SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                           TLI.getPointerTy(), SUB);
+  SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
 
   unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
@@ -1435,7 +1448,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
@@ -1454,7 +1467,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
 void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
-  MVT VT = SwitchOp.getValueType();
+  EVT VT = SwitchOp.getValueType();
   SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
                             DAG.getConstant(B.First, VT));
 
@@ -1464,13 +1477,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
                                   SUB, DAG.getConstant(B.Range, VT),
                                   ISD::SETUGT);
 
-  SDValue ShiftOp;
-  if (VT.bitsGT(TLI.getPointerTy()))
-    ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                          TLI.getPointerTy(), SUB);
-  else
-    ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                          TLI.getPointerTy(), SUB);
+  SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
 
   B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
@@ -1480,7 +1487,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
@@ -1531,7 +1538,7 @@ void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (NextMBB == NextBlock)
@@ -1584,13 +1591,13 @@ bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CR.CaseBB;
 
-  if (++BBI != CurMBB->getParent()->end())
+  if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   // TODO: If any two of the cases has the same destination, and if one value
@@ -1698,14 +1705,11 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != CurMBB->getParent()->end())
-    NextBlock = BBI;
+  ++BBI;
 
   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
 
@@ -1771,14 +1775,11 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
                                                    MachineBasicBlock* Default) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
   MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != CurMBB->getParent()->end())
-    NextBlock = BBI;
+  ++BBI;
 
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
@@ -1898,14 +1899,15 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
                                                     CaseRecVector& WorkList,
                                                     Value* SV,
                                                     MachineBasicBlock* Default){
-  unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits();
+  EVT PTy = TLI.getPointerTy();
+  unsigned IntPtrBits = PTy.getSizeInBits();
 
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = CurMBB->getParent();
+  MachineFunction *CurMF = FuncInfo.MF;
 
   // If target does not have legal shift left, do not emit bit tests at all.
   if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
@@ -2069,7 +2071,6 @@ size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
 void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
 
   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
 
@@ -2174,24 +2175,26 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
   if (!isa<VectorType>(I.getType()) &&
       Op2.getValueType() != TLI.getShiftAmountTy()) {
     // If the operand is smaller than the shift count type, promote it.
-    if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
+    EVT PTy = TLI.getPointerTy();
+    EVT STy = TLI.getShiftAmountTy();
+    if (STy.bitsGT(Op2.getValueType()))
       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
                         TLI.getShiftAmountTy(), Op2);
     // If the operand is larger than the shift count type but the shift
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
     // optimization early.
-    else if (TLI.getShiftAmountTy().getSizeInBits() >=
+    else if (STy.getSizeInBits() >=
              Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                         TLI.getShiftAmountTy(), Op2);
     // Otherwise we'll need to temporarily settle for some other
     // convenient type; type legalization will make adjustments as
     // needed.
-    else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
+    else if (PTy.bitsLT(Op2.getValueType()))
       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                         TLI.getPointerTy(), Op2);
-    else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))
+    else if (PTy.bitsGT(Op2.getValueType()))
       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
                         TLI.getPointerTy(), Op2);
   }
@@ -2209,7 +2212,9 @@ void SelectionDAGLowering::visitICmp(User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Opcode = getICmpCondCode(predicate);
-  setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode));
+  
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
 void SelectionDAGLowering::visitFCmp(User &I) {
@@ -2221,38 +2226,12 @@ void SelectionDAGLowering::visitFCmp(User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
-  setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition));
-}
-
-void SelectionDAGLowering::visitVICmp(User &I) {
-  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
-  if (VICmpInst *IC = dyn_cast<VICmpInst>(&I))
-    predicate = IC->getPredicate();
-  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
-    predicate = ICmpInst::Predicate(IC->getPredicate());
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  ISD::CondCode Opcode = getICmpCondCode(predicate);
-  setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(),
-                             Op1, Op2, Opcode));
-}
-
-void SelectionDAGLowering::visitVFCmp(User &I) {
-  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
-  if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I))
-    predicate = FC->getPredicate();
-  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
-    predicate = FCmpInst::Predicate(FC->getPredicate());
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  ISD::CondCode Condition = getFCmpCondCode(predicate);
-  MVT DestVT = TLI.getValueType(I.getType());
-
-  setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
 void SelectionDAGLowering::visitSelect(User &I) {
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
   if (NumValues != 0) {
@@ -2277,7 +2256,7 @@ void SelectionDAGLowering::visitSelect(User &I) {
 void SelectionDAGLowering::visitTrunc(User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
@@ -2285,7 +2264,7 @@ void SelectionDAGLowering::visitZExt(User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
@@ -2293,14 +2272,14 @@ void SelectionDAGLowering::visitSExt(User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitFPTrunc(User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
                            DestVT, N, DAG.getIntPtrConstant(0)));
 }
@@ -2308,35 +2287,35 @@ void SelectionDAGLowering::visitFPTrunc(User &I) {
 void SelectionDAGLowering::visitFPExt(User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitFPToUI(User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitFPToSI(User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitUIToFP(User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGLowering::visitSIToFP(User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
@@ -2344,14 +2323,9 @@ void SelectionDAGLowering::visitPtrToInt(User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  MVT SrcVT = N.getValueType();
-  MVT DestVT = TLI.getValueType(I.getType());
-  SDValue Result;
-  if (DestVT.bitsLT(SrcVT))
-    Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);
-  else
-    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
-    Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
   setValue(&I, Result);
 }
 
@@ -2359,19 +2333,14 @@ void SelectionDAGLowering::visitIntToPtr(User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  MVT SrcVT = N.getValueType();
-  MVT DestVT = TLI.getValueType(I.getType());
-  if (DestVT.bitsLT(SrcVT))
-    setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
-  else
-    // Note: ZERO_EXTEND can handle cases where the sizes are equal too
-    setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                             DestVT, N));
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
 void SelectionDAGLowering::visitBitCast(User &I) {
   SDValue N = getValue(I.getOperand(0));
-  MVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this
   // is either a BIT_CONVERT or a no-op.
@@ -2422,7 +2391,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
   // Convert the ConstantVector mask operand into an array of ints, with -1
   // representing undef values.
   SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), 
+                                                     MaskElts);
   unsigned MaskNumElts = MaskElts.size();
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     if (isa<UndefValue>(MaskElts[i]))
@@ -2431,8 +2401,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
       Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
   }
   
-  MVT VT = TLI.getValueType(I.getType());
-  MVT SrcVT = Src1.getValueType();
+  EVT VT = TLI.getValueType(I.getType());
+  EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
   if (SrcNumElts == MaskNumElts) {
@@ -2531,7 +2501,7 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
       }
     }
 
-    if (RangeUse[0] == 0 && RangeUse[0] == 0) {
+    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
       setValue(&I, DAG.getUNDEF(VT));  // Vectors are not used.
       return;
     }
@@ -2566,8 +2536,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
   // We can't use either concat vectors or extract subvectors so fall back to
   // replacing the shuffle with extract and build vector.
   // to insert and build vector.
-  MVT EltVT = VT.getVectorElementType();
-  MVT PtrVT = TLI.getPointerTy();
+  EVT EltVT = VT.getVectorElementType();
+  EVT PtrVT = TLI.getPointerTy();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     if (Mask[i] < 0) {
@@ -2598,9 +2568,9 @@ void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
                                             I.idx_begin(), I.idx_end());
 
-  SmallVector<MVT, 4> AggValueVTs;
+  SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
-  SmallVector<MVT, 4> ValValueVTs;
+  SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
 
   unsigned NumAggValues = AggValueVTs.size();
@@ -2637,7 +2607,7 @@ void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
                                             I.idx_begin(), I.idx_end());
 
-  SmallVector<MVT, 4> ValValueVTs;
+  SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
 
   unsigned NumValValues = ValValueVTs.size();
@@ -2682,7 +2652,8 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         SDValue OffsVal;
-        unsigned PtrBits = TLI.getPointerTy().getSizeInBits();
+        EVT PTy = TLI.getPointerTy();
+        unsigned PtrBits = PTy.getSizeInBits();
         if (PtrBits < 64) {
           OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                                 TLI.getPointerTy(),
@@ -2700,12 +2671,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
 
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
-      if (IdxN.getValueType().bitsLT(N.getValueType()))
-        IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(),
-                           N.getValueType(), IdxN);
-      else if (IdxN.getValueType().bitsGT(N.getValueType()))
-        IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                           N.getValueType(), IdxN);
+      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
 
       // If this is a multiply by a power of two, turn it into a shl
       // immediately.  This is a very common case.
@@ -2749,13 +2715,8 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
   
   
   
-  MVT IntPtr = TLI.getPointerTy();
-  if (IntPtr.bitsLT(AllocSize.getValueType()))
-    AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                            IntPtr, AllocSize);
-  else if (IntPtr.bitsGT(AllocSize.getValueType()))
-    AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                            IntPtr, AllocSize);
+  EVT IntPtr = TLI.getPointerTy();
+  AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
 
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
@@ -2784,7 +2745,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
 
   // Inform the Frame Information that we have just allocated a variable-sized
   // object.
-  CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
 }
 
 void SelectionDAGLowering::visitLoad(LoadInst &I) {
@@ -2795,7 +2756,7 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) {
   bool isVolatile = I.isVolatile();
   unsigned Alignment = I.getAlignment();
 
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
@@ -2818,14 +2779,13 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) {
 
   SmallVector<SDValue, 4> Values(NumValues);
   SmallVector<SDValue, 4> Chains(NumValues);
-  MVT PtrVT = Ptr.getValueType();
+  EVT PtrVT = Ptr.getValueType();
   for (unsigned i = 0; i != NumValues; ++i) {
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                              DAG.getNode(ISD::ADD, getCurDebugLoc(),
-                                          PtrVT, Ptr,
-                                          DAG.getConstant(Offsets[i], PtrVT)),
-                              SV, Offsets[i],
-                              isVolatile, Alignment);
+                            DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                        PtrVT, Ptr,
+                                        DAG.getConstant(Offsets[i], PtrVT)),
+                            SV, Offsets[i], isVolatile, Alignment);
     Values[i] = L;
     Chains[i] = L.getValue(1);
   }
@@ -2850,7 +2810,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
   Value *SrcV = I.getOperand(0);
   Value *PtrV = I.getOperand(1);
 
-  SmallVector<MVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
@@ -2865,7 +2825,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
 
   SDValue Root = getRoot();
   SmallVector<SDValue, 4> Chains(NumValues);
-  MVT PtrVT = Ptr.getValueType();
+  EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
   unsigned Alignment = I.getAlignment();
   for (unsigned i = 0; i != NumValues; ++i)
@@ -2874,8 +2834,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
                              DAG.getNode(ISD::ADD, getCurDebugLoc(),
                                          PtrVT, Ptr,
                                          DAG.getConstant(Offsets[i], PtrVT)),
-                             PtrV, Offsets[i],
-                             isVolatile, Alignment);
+                             PtrV, Offsets[i], isVolatile, Alignment);
 
   DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                           MVT::Other, &Chains[0], NumValues));
@@ -2915,24 +2874,18 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
     Ops.push_back(Op);
   }
 
-  std::vector<MVT> VTArray;
-  if (I.getType() != Type::VoidTy) {
-    MVT VT = TLI.getValueType(I.getType());
-    if (VT.isVector()) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      MVT EltVT = TLI.getValueType(DestTy->getElementType());
-
-      VT = MVT::getVectorVT(EltVT, DestTy->getNumElements());
-      assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
-    }
-
-    assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
-    VTArray.push_back(VT);
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+           "Intrinsic uses a non-legal type?");
   }
+#endif // NDEBUG
   if (HasChain)
-    VTArray.push_back(MVT::Other);
+    ValueVTs.push_back(MVT::Other);
 
-  SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size());
+  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
 
   // Create the node.
   SDValue Result;
@@ -2947,7 +2900,7 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
   else if (!HasChain)
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
-  else if (I.getType() != Type::VoidTy)
+  else if (I.getType() != Type::getVoidTy(*DAG.getContext()))
     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
   else
@@ -2961,9 +2914,9 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
     else
       DAG.setRoot(Chain);
   }
-  if (I.getType() != Type::VoidTy) {
+  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
-      MVT VT = TLI.getValueType(PTy);
+      EVT VT = TLI.getValueType(PTy);
       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
     }
     setValue(&I, Result);
@@ -3890,7 +3843,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW
         && DW->ShouldEmitDwarfDebug()) {
       unsigned LabelID =
-        DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));
+        DW->RecordRegionStart(RSI.getContext());
       DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
                                getRoot(), LabelID));
     }
@@ -3905,7 +3858,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       return 0;
 
     MachineFunction &MF = DAG.getMachineFunction();
-    DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    DISubprogram Subprogram(REI.getContext());
     
     if (isInlinedFnEnd(REI, MF.getFunction())) {
       // This is end of inlined function. Debugging information for inlined
@@ -3924,7 +3877,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     } 
 
     unsigned LabelID =
-      DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
+      DW->RecordRegionEnd(REI.getContext());
     DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
                              getRoot(), LabelID));
     return 0;
@@ -3932,8 +3885,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::dbg_func_start: {
     DwarfWriter *DW = DAG.getDwarfWriter();
     DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
-    if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None) || !DW
-        || !DW->ShouldEmitDwarfDebug()) 
+    if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None))
       return 0;
 
     MachineFunction &MF = DAG.getMachineFunction();
@@ -3954,9 +3906,11 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       // Record the source line.
       setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
       
+      if (!DW || !DW->ShouldEmitDwarfDebug())
+        return 0;
       DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-      DISubprogram SP(cast<GlobalVariable>(FSI.getSubprogram()));
-      DICompileUnit CU(PrevLocTpl.CompileUnit);
+      DISubprogram SP(FSI.getSubprogram());
+      DICompileUnit CU(PrevLocTpl.Scope);
       unsigned LabelID = DW->RecordInlinedFnStart(SP, CU,
                                                   PrevLocTpl.Line,
                                                   PrevLocTpl.Col);
@@ -3967,23 +3921,44 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 
     // This is a beginning of a new function.
     MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-                    
+
+    if (!DW || !DW->ShouldEmitDwarfDebug())
+      return 0;
     // llvm.dbg.func_start also defines beginning of function scope.
-    DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
+    DW->RecordRegionStart(FSI.getSubprogram());
     return 0;
   }
   case Intrinsic::dbg_declare: {
     if (OptLevel != CodeGenOpt::None) 
       // FIXME: Variable debug info is not supported here.
       return 0;
-
+    DwarfWriter *DW = DAG.getDwarfWriter();
+    if (!DW)
+      return 0;
     DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
       return 0;
 
-    Value *Variable = DI.getVariable();
-    DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
-                            getValue(DI.getAddress()), getValue(Variable)));
+    MDNode *Variable = DI.getVariable();
+    Value *Address = DI.getAddress();
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+      Address = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end()) 
+      return 0; // VLAs.
+    int FI = SI->second;
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    if (MMI) 
+      MMI->setVariableDbgInfo(Variable, FI);
+#else
+    DW->RecordVariable(Variable, FI);
+#endif
     return 0;
   }
   case Intrinsic::eh_exception: {
@@ -3998,54 +3973,45 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   }
 
-  case Intrinsic::eh_selector_i32:
-  case Intrinsic::eh_selector_i64: {
+  case Intrinsic::eh_selector: {
     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-    MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ?
-                         MVT::i32 : MVT::i64);
 
-    if (MMI) {
-      if (CurMBB->isLandingPad())
-        AddCatchInfo(I, MMI, CurMBB);
-      else {
+    if (CurMBB->isLandingPad())
+      AddCatchInfo(I, MMI, CurMBB);
+    else {
 #ifndef NDEBUG
-        FuncInfo.CatchInfoLost.insert(&I);
+      FuncInfo.CatchInfoLost.insert(&I);
 #endif
-        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-        unsigned Reg = TLI.getExceptionSelectorRegister();
-        if (Reg) CurMBB->addLiveIn(Reg);
-      }
-
-      // Insert the EHSELECTION instruction.
-      SDVTList VTs = DAG.getVTList(VT, MVT::Other);
-      SDValue Ops[2];
-      Ops[0] = getValue(I.getOperand(1));
-      Ops[1] = getRoot();
-      SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-      setValue(&I, Op);
-      DAG.setRoot(Op.getValue(1));
-    } else {
-      setValue(&I, DAG.getConstant(0, VT));
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) CurMBB->addLiveIn(Reg);
     }
 
+    // Insert the EHSELECTION instruction.
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[2];
+    Ops[0] = getValue(I.getOperand(1));
+    Ops[1] = getRoot();
+    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+
+    DAG.setRoot(Op.getValue(1));
+
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
     return 0;
   }
 
-  case Intrinsic::eh_typeid_for_i32:
-  case Intrinsic::eh_typeid_for_i64: {
+  case Intrinsic::eh_typeid_for: {
     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-    MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ?
-                         MVT::i32 : MVT::i64);
 
     if (MMI) {
       // Find the type id for the given typeinfo.
       GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
 
       unsigned TypeID = MMI->getTypeIDFor(GV);
-      setValue(&I, DAG.getConstant(TypeID, VT));
+      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
     } else {
       // Return something different to eh_selector.
-      setValue(&I, DAG.getConstant(1, VT));
+      setValue(&I, DAG.getConstant(1, MVT::i32));
     }
 
     return 0;
@@ -4073,14 +4039,9 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
 
   case Intrinsic::eh_dwarf_cfa: {
-    MVT VT = getValue(I.getOperand(1)).getValueType();
-    SDValue CfaArg;
-    if (VT.bitsGT(TLI.getPointerTy()))
-      CfaArg = DAG.getNode(ISD::TRUNCATE, dl,
-                           TLI.getPointerTy(), getValue(I.getOperand(1)));
-    else
-      CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl,
-                           TLI.getPointerTy(), getValue(I.getOperand(1)));
+    EVT VT = getValue(I.getOperand(1)).getValueType();
+    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+                                        TLI.getPointerTy());
 
     SDValue Offset = DAG.getNode(ISD::ADD, dl,
                                  TLI.getPointerTy(),
@@ -4096,7 +4057,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                              Offset));
     return 0;
   }
-
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4118,7 +4078,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
-    MVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI.getValueType(I.getType());
     Value* Op1 = I.getOperand(1);
     setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
                                 DAG.getValueType(DestVT),
@@ -4182,16 +4142,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Tmp.getValue(1));
     return 0;
   }
-  case Intrinsic::part_select: {
-    // Currently not implemented: just abort
-    assert(0 && "part_select intrinsic not implemented");
-    abort();
-  }
-  case Intrinsic::part_set: {
-    // Currently not implemented: just abort
-    assert(0 && "part_set intrinsic not implemented");
-    abort();
-  }
   case Intrinsic::bswap:
     setValue(&I, DAG.getNode(ISD::BSWAP, dl,
                              getValue(I.getOperand(1)).getValueType(),
@@ -4199,21 +4149,21 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getOperand(1));
-    MVT Ty = Arg.getValueType();
+    EVT Ty = Arg.getValueType();
     SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
     setValue(&I, result);
     return 0;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getOperand(1));
-    MVT Ty = Arg.getValueType();
+    EVT Ty = Arg.getValueType();
     SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
     setValue(&I, result);
     return 0;
   }
   case Intrinsic::ctpop: {
     SDValue Arg = getValue(I.getOperand(1));
-    MVT Ty = Arg.getValueType();
+    EVT Ty = Arg.getValueType();
     SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
     setValue(&I, result);
     return 0;
@@ -4235,7 +4185,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    MVT PtrTy = TLI.getPointerTy();
+    EVT PtrTy = TLI.getPointerTy();
 
     SDValue Src = getValue(I.getOperand(1));   // The guard's value.
     AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
@@ -4289,7 +4239,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::gcread:
   case Intrinsic::gcwrite:
-    assert(0 && "GC failed to lower gcread/gcwrite intrinsics!");
+    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
     return 0;
 
   case Intrinsic::flt_rounds: {
@@ -4373,9 +4323,76 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 }
 
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+/// For target-dependent requirements, a target should override
+/// TargetLowering::IsEligibleForTailCallOptimization.
+///
+static bool
+isInTailCallPosition(const Instruction *I, Attributes RetAttr,
+                     const TargetLowering &TLI) {
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or an unreachable.
+  if (!Ret && !isa<UnreachableInst>(Term)) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return.
+  if (F->getAttributes().getRetAttributes() != RetAttr)
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (isa<PointerType>(U->getOperand(0)->getType()) &&
+          isa<PointerType>(U->getType()))))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
 
 void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
-                                       bool IsTailCall,
+                                       bool isTailCall,
                                        MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
@@ -4385,8 +4402,9 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Args.reserve(CS.arg_size());
+  unsigned j = 1;
   for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
-       i != e; ++i) {
+       i != e; ++i, ++j) {
     SDValue ArgNode = getValue(*i);
     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
 
@@ -4405,6 +4423,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
     BeginLabel = MMI->NextLabelID();
+
     // Both PendingLoads and PendingExports must be flushed here;
     // this call might not return.
     (void)getRoot();
@@ -4412,17 +4431,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
                              getControlRoot(), BeginLabel));
   }
 
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI.LowerCallTo.
+  if (isTailCall &&
+      !isInTailCallPosition(CS.getInstruction(),
+                            CS.getAttributes().getRetAttributes(),
+                            TLI))
+    isTailCall = false;
+
   std::pair<SDValue,SDValue> Result =
     TLI.LowerCallTo(getRoot(), CS.getType(),
                     CS.paramHasAttr(0, Attribute::SExt),
                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                     CS.getCallingConv(),
-                    IsTailCall && PerformTailCallOpt,
+                    isTailCall,
+                    !CS.getInstruction()->use_empty(),
                     Callee, Args, DAG, getCurDebugLoc());
-  if (CS.getType() != Type::VoidTy)
+  assert((isTailCall || Result.second.getNode()) &&
+         "Non-null chain expected with non-tail call!");
+  assert((Result.second.getNode() || !Result.first.getNode()) &&
+         "Null value expected with tail call!");
+  if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
-  DAG.setRoot(Result.second);
+  // As a special case, a null chain means that a tail call has
+  // been emitted and the DAG root is already updated.
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
+  else
+    HasTailCall = true;
 
   if (LandingPad && MMI) {
     // Insert a label at the end of the invoke call to mark the try range.  This
@@ -4458,12 +4495,9 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
 
     // Check for well-known libc/libm calls.  If the function is internal, it
     // can't be a library call.
-    unsigned NameLen = F->getNameLen();
-    if (!F->hasLocalLinkage() && NameLen) {
-      const char *NameStr = F->getNameStart();
-      if (NameStr[0] == 'c' &&
-          ((NameLen == 8 && !strcmp(NameStr, "copysign")) ||
-           (NameLen == 9 && !strcmp(NameStr, "copysignf")))) {
+    if (!F->hasLocalLinkage() && F->hasName()) {
+      StringRef Name = F->getName();
+      if (Name == "copysign" || Name == "copysignf") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
             I.getType() == I.getOperand(1)->getType() &&
@@ -4474,10 +4508,7 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
                                    LHS.getValueType(), LHS, RHS));
           return;
         }
-      } else if (NameStr[0] == 'f' &&
-                 ((NameLen == 4 && !strcmp(NameStr, "fabs")) ||
-                  (NameLen == 5 && !strcmp(NameStr, "fabsf")) ||
-                  (NameLen == 5 && !strcmp(NameStr, "fabsl")))) {
+      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
             I.getType() == I.getOperand(1)->getType()) {
@@ -4486,30 +4517,36 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (NameStr[0] == 's' &&
-                 ((NameLen == 3 && !strcmp(NameStr, "sin")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "sinf")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "sinl")))) {
+      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType()) {
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (NameStr[0] == 'c' &&
-                 ((NameLen == 3 && !strcmp(NameStr, "cos")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "cosf")) ||
-                  (NameLen == 4 && !strcmp(NameStr, "cosl")))) {
+      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType()) {
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
                                    Tmp.getValueType(), Tmp));
           return;
         }
+      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
       }
     }
   } else if (isa<InlineAsm>(I.getOperand(0))) {
@@ -4523,7 +4560,12 @@ void SelectionDAGLowering::visitCall(CallInst &I) {
   else
     Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
 
-  LowerCallTo(&I, Callee, I.isTailCall());
+  // Check if we can potentially perform a tail call. More detailed
+  // checking is be done within LowerCallTo, after more information
+  // about the call is known.
+  bool isTailCall = PerformTailCallOpt && I.isTailCall();
+
+  LowerCallTo(&I, Callee, isTailCall);
 }
 
 
@@ -4539,9 +4581,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> Parts;
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     // Copy the legal parts from the registers.
-    MVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI->getNumRegisters(ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -4570,7 +4612,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
           // FIXME: We capture more information than the dag can represent.  For
           // now, just use the tightest assertzext/assertsext possible.
           bool isSExt = true;
-          MVT FromVT(MVT::Other);
+          EVT FromVT(MVT::Other);
           if (NumSignBits == RegSize)
             isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
           else if (NumZeroBits >= RegSize-1)
@@ -4620,9 +4662,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    MVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI->getNumRegisters(ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
 
     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
                    &Parts[Part], NumParts, RegisterVT);
@@ -4665,15 +4707,15 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code,
                                         bool HasMatching,unsigned MatchingIdx,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
-  MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
   assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
   unsigned Flag = Code | (Regs.size() << 3);
   if (HasMatching)
     Flag |= 0x80000000 | (MatchingIdx << 16);
   Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    EVT RegisterVT = RegVTs[Value];
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
@@ -4688,11 +4730,11 @@ static const TargetRegisterClass *
 isAllocatableRegister(unsigned Reg, MachineFunction &MF,
                       const TargetLowering &TLI,
                       const TargetRegisterInfo *TRI) {
-  MVT FoundVT = MVT::Other;
+  EVT FoundVT = MVT::Other;
   const TargetRegisterClass *FoundRC = 0;
   for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
        E = TRI->regclass_end(); RCI != E; ++RCI) {
-    MVT ThisVT = MVT::Other;
+    EVT ThisVT = MVT::Other;
 
     const TargetRegisterClass *RC = *RCI;
     // If none of the the value types for this register class are valid, we
@@ -4765,10 +4807,11 @@ public:
     }
   }
 
-  /// getCallOperandValMVT - Return the MVT of the Value* that this operand
+  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
-  MVT getCallOperandValMVT(const TargetLowering &TLI,
+  EVT getCallOperandValEVT(LLVMContext &Context, 
+                           const TargetLowering &TLI,
                            const TargetData *TD) const {
     if (CallOperandVal == 0) return MVT::Other;
 
@@ -4794,7 +4837,7 @@ public:
       case 32:
       case 64:
       case 128:
-        OpTy = IntegerType::get(BitSize);
+        OpTy = IntegerType::get(Context, BitSize);
         break;
       }
     }
@@ -4830,6 +4873,8 @@ void SelectionDAGLowering::
 GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
                      std::set<unsigned> &OutputRegs,
                      std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = FuncInfo.Fn->getContext();
+
   // Compute whether this value requires an input register, an output register,
   // or both.
   bool isOutReg = false;
@@ -4869,10 +4914,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
     // value disagrees with the register class we plan to stick this in.
     if (OpInfo.Type == InlineAsm::isInput &&
         PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
-      // Try to convert to the first MVT that the reg class contains.  If the
+      // Try to convert to the first EVT that the reg class contains.  If the
       // types are identical size, use a bitcast to convert (e.g. two differing
       // vector types).
-      MVT RegVT = *PhysReg.second->vt_begin();
+      EVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
@@ -4882,18 +4927,19 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
         // bitcast to the corresponding integer type.  This turns an f64 value
         // into i64, which can be passed with two i32 values on a 32-bit
         // machine.
-        RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+        RegVT = EVT::getIntegerVT(Context, 
+                                  OpInfo.ConstraintVT.getSizeInBits());
         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       }
     }
 
-    NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
   }
 
-  MVT RegVT;
-  MVT ValueVT = OpInfo.ConstraintVT;
+  EVT RegVT;
+  EVT ValueVT = OpInfo.ConstraintVT;
 
   // If this is a constraint for a specific physical register, like {r17},
   // assign it now.
@@ -5047,7 +5093,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
     ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
-    MVT OpVT = MVT::Other;
+    EVT OpVT = MVT::Other;
 
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
@@ -5060,7 +5106,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
-      assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+             "Bad inline asm!");
       if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
         OpVT = TLI.getValueType(STy->getElementType(ResNo));
       } else {
@@ -5080,13 +5127,16 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
     // If this is an input or an indirect output, process the call argument.
     // BasicBlocks are labels, currently appearing only in asm's.
     if (OpInfo.CallOperandVal) {
+      // Strip bitcasts, if any.  This mostly comes up for functions.
+      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
+
       if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT = OpInfo.getCallOperandValMVT(TLI, TD);
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
     }
 
     OpInfo.ConstraintVT = OpVT;
@@ -5108,9 +5158,9 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
              Input.ConstraintVT.isInteger()) ||
             (OpInfo.ConstraintVT.getSizeInBits() !=
              Input.ConstraintVT.getSizeInBits())) {
-          cerr << "llvm: error: Unsupported asm: input constraint with a "
-               << "matching output constraint of incompatible type!\n";
-          exit(1);
+          llvm_report_error("Unsupported asm: input constraint"
+                            " with a matching output constraint of incompatible"
+                            " type!");
         }
         Input.ConstraintVT = OpInfo.ConstraintVT;
       }
@@ -5213,9 +5263,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
       if (OpInfo.AssignedRegs.Regs.empty()) {
-        cerr << "llvm: error: Couldn't allocate output reg for constraint '"
-             << OpInfo.ConstraintCode << "'!\n";
-        exit(1);
+        llvm_report_error("Couldn't allocate output reg for"
+                          " constraint '" + OpInfo.ConstraintCode + "'!");
       }
 
       // If this is an indirect operand, store through the pointer after the
@@ -5225,7 +5274,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
                                                       OpInfo.CallOperandVal));
       } else {
         // This is the result value of the call.
-        assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+               "Bad inline asm!");
         // Concatenate this output onto the outputs list.
         RetValRegs.append(OpInfo.AssignedRegs);
       }
@@ -5268,15 +5318,13 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
             || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
           if (OpInfo.isIndirect) {
-            cerr << "llvm: error: "
-                    "Don't know how to handle tied indirect "
-                    "register inputs yet!\n";
-            exit(1);
+            llvm_report_error("Don't know how to handle tied indirect "
+                              "register inputs yet!");
           }
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
-          MVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
           MatchedRegs.RegVTs.push_back(RegVT);
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
@@ -5313,9 +5361,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                          hasMemory, Ops, DAG);
         if (Ops.empty()) {
-          cerr << "llvm: error: Invalid operand for inline asm constraint '"
-               << OpInfo.ConstraintCode << "'!\n";
-          exit(1);
+          llvm_report_error("Invalid operand for inline asm"
+                            " constraint '" + OpInfo.ConstraintCode + "'!");
         }
 
         // Add information to the INLINEASM node to know about this input.
@@ -5345,9 +5392,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 
       // Copy the input into the appropriate registers.
       if (OpInfo.AssignedRegs.Regs.empty()) {
-        cerr << "llvm: error: Couldn't allocate output reg for constraint '"
-             << OpInfo.ConstraintCode << "'!\n";
-        exit(1);
+        llvm_report_error("Couldn't allocate input reg for"
+                          " constraint '"+ OpInfo.ConstraintCode +"'!");
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
@@ -5385,7 +5431,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      MVT ResultType = TLI.getValueType(CS.getType());
+      EVT ResultType = TLI.getValueType(CS.getType());
 
       // If any of the results of the inline asm is a vector, it may have the
       // wrong width/num elts.  This can happen for register classes that can
@@ -5449,45 +5495,56 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) {
   // multiply on 64-bit targets.
   // FIXME: Malloc inst should go away: PR715.
   uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType());
-  if (ElementSize != 1)
+  if (ElementSize != 1) {
+    // Src is always 32-bits, make sure the constant fits.
+    assert(Src.getValueType() == MVT::i32);
+    ElementSize = (uint32_t)ElementSize;
     Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
                       Src, DAG.getConstant(ElementSize, Src.getValueType()));
+  }
   
-  MVT IntPtr = TLI.getPointerTy();
+  EVT IntPtr = TLI.getPointerTy();
 
-  if (IntPtr.bitsLT(Src.getValueType()))
-    Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src);
-  else if (IntPtr.bitsGT(Src.getValueType()))
-    Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src);
+  Src = DAG.getZExtOrTrunc(Src, getCurDebugLoc(), IntPtr);
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Src;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
   Args.push_back(Entry);
 
+  bool isTailCall = PerformTailCallOpt &&
+                    isInTailCallPosition(&I, Attribute::None, TLI);
   std::pair<SDValue,SDValue> Result =
     TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false,
-                    0, CallingConv::C, PerformTailCallOpt,
+                    0, CallingConv::C, isTailCall,
+                    /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("malloc", IntPtr),
                     Args, DAG, getCurDebugLoc());
-  setValue(&I, Result.first);  // Pointers always fit in registers
-  DAG.setRoot(Result.second);
+  if (Result.first.getNode())
+    setValue(&I, Result.first);  // Pointers always fit in registers
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
 }
 
 void SelectionDAGLowering::visitFree(FreeInst &I) {
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = getValue(I.getOperand(0));
-  Entry.Ty = TLI.getTargetData()->getIntPtrType();
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
   Args.push_back(Entry);
-  MVT IntPtr = TLI.getPointerTy();
+  EVT IntPtr = TLI.getPointerTy();
+  bool isTailCall = PerformTailCallOpt &&
+                    isInTailCallPosition(&I, Attribute::None, TLI);
   std::pair<SDValue,SDValue> Result =
-    TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false,
-                    0, CallingConv::C, PerformTailCallOpt,
+    TLI.LowerCallTo(getRoot(), Type::getVoidTy(*DAG.getContext()),
+                    false, false, false, false,
+                    0, CallingConv::C, isTailCall,
+                    /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("free", IntPtr), Args, DAG,
                     getCurDebugLoc());
-  DAG.setRoot(Result.second);
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
 }
 
 void SelectionDAGLowering::visitVAStart(CallInst &I) {
@@ -5521,161 +5578,31 @@ void SelectionDAGLowering::visitVACopy(CallInst &I) {
                           DAG.getSrcValue(I.getOperand(2))));
 }
 
-/// TargetLowering::LowerArguments - This is the default LowerArguments
-/// implementation, which just inserts a FORMAL_ARGUMENTS node.  FIXME: When all
-/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be
-/// integrated into SDISel.
-void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &ArgValues,
-                                    DebugLoc dl) {
-  // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
-  SmallVector<SDValue, 3+16> Ops;
-  Ops.push_back(DAG.getRoot());
-  Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
-  Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
-
-  // Add one result value for each formal argument.
-  SmallVector<MVT, 16> RetVals;
-  unsigned j = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
-       I != E; ++I, ++j) {
-    SmallVector<MVT, 4> ValueVTs;
-    ComputeValueVTs(*this, I->getType(), ValueVTs);
-    for (unsigned Value = 0, NumValues = ValueVTs.size();
-         Value != NumValues; ++Value) {
-      MVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForMVT();
-      ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment =
-        getTargetData()->getABITypeAlignment(ArgTy);
-
-      if (F.paramHasAttr(j, Attribute::ZExt))
-        Flags.setZExt();
-      if (F.paramHasAttr(j, Attribute::SExt))
-        Flags.setSExt();
-      if (F.paramHasAttr(j, Attribute::InReg))
-        Flags.setInReg();
-      if (F.paramHasAttr(j, Attribute::StructRet))
-        Flags.setSRet();
-      if (F.paramHasAttr(j, Attribute::ByVal)) {
-        Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(I->getType());
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
-        if (F.getParamAlignment(j))
-          FrameAlign = F.getParamAlignment(j);
-        Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
-      }
-      if (F.paramHasAttr(j, Attribute::Nest))
-        Flags.setNest();
-      Flags.setOrigAlign(OriginalAlignment);
-
-      MVT RegisterVT = getRegisterType(VT);
-      unsigned NumRegs = getNumRegisters(VT);
-      for (unsigned i = 0; i != NumRegs; ++i) {
-        RetVals.push_back(RegisterVT);
-        ISD::ArgFlagsTy MyFlags = Flags;
-        if (NumRegs > 1 && i == 0)
-          MyFlags.setSplit();
-        // if it isn't first piece, alignment must be 1
-        else if (i > 0)
-          MyFlags.setOrigAlign(1);
-        Ops.push_back(DAG.getArgFlags(MyFlags));
-      }
-    }
-  }
-
-  RetVals.push_back(MVT::Other);
-
-  // Create the node.
-  SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl,
-                               DAG.getVTList(&RetVals[0], RetVals.size()),
-                               &Ops[0], Ops.size()).getNode();
-
-  // Prelower FORMAL_ARGUMENTS.  This isn't required for functionality, but
-  // allows exposing the loads that may be part of the argument access to the
-  // first DAGCombiner pass.
-  SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG);
-
-  // The number of results should match up, except that the lowered one may have
-  // an extra flag result.
-  assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() ||
-          (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() &&
-           TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag))
-         && "Lowering produced unexpected number of results!");
-
-  // The FORMAL_ARGUMENTS node itself is likely no longer needed.
-  if (Result != TmpRes.getNode() && Result->use_empty()) {
-    HandleSDNode Dummy(DAG.getRoot());
-    DAG.RemoveDeadNode(Result);
-  }
-
-  Result = TmpRes.getNode();
-
-  unsigned NumArgRegs = Result->getNumValues() - 1;
-  DAG.setRoot(SDValue(Result, NumArgRegs));
-
-  // Set up the return result vector.
-  unsigned i = 0;
-  unsigned Idx = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
-      ++I, ++Idx) {
-    SmallVector<MVT, 4> ValueVTs;
-    ComputeValueVTs(*this, I->getType(), ValueVTs);
-    for (unsigned Value = 0, NumValues = ValueVTs.size();
-         Value != NumValues; ++Value) {
-      MVT VT = ValueVTs[Value];
-      MVT PartVT = getRegisterType(VT);
-
-      unsigned NumParts = getNumRegisters(VT);
-      SmallVector<SDValue, 4> Parts(NumParts);
-      for (unsigned j = 0; j != NumParts; ++j)
-        Parts[j] = SDValue(Result, i++);
-
-      ISD::NodeType AssertOp = ISD::DELETED_NODE;
-      if (F.paramHasAttr(Idx, Attribute::SExt))
-        AssertOp = ISD::AssertSext;
-      else if (F.paramHasAttr(Idx, Attribute::ZExt))
-        AssertOp = ISD::AssertZext;
-
-      ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts,
-                                           PartVT, VT, AssertOp));
-    }
-  }
-  assert(i == NumArgRegs && "Argument register count mismatch!");
-}
-
-
 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
-/// implementation, which just inserts an ISD::CALL node, which is later custom
-/// lowered by the target to something concrete.  FIXME: When all targets are
-/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
 std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             bool RetSExt, bool RetZExt, bool isVarArg,
                             bool isInreg, unsigned NumFixedArgs,
-                            unsigned CallingConv, bool isTailCall,
+                            CallingConv::ID CallConv, bool isTailCall,
+                            bool isReturnValueUsed,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+
   assert((!isTailCall || PerformTailCallOpt) &&
          "isTailCall set when tail-call optimizations are disabled!");
 
-  SmallVector<SDValue, 32> Ops;
-  Ops.push_back(Chain);   // Op#0 - Chain
-  Ops.push_back(Callee);
-
   // Handle all of the outgoing arguments.
+  SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    SmallVector<MVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
     for (unsigned Value = 0, NumValues = ValueVTs.size();
          Value != NumValues; ++Value) {
-      MVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForMVT();
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
       SDValue Op = SDValue(Args[i].Node.getNode(),
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
@@ -5707,8 +5634,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
-      MVT PartVT = getRegisterType(VT);
-      unsigned NumParts = getNumRegisters(VT);
+      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -5719,75 +5646,105 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
 
       getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
 
-      for (unsigned i = 0; i != NumParts; ++i) {
+      for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
-        ISD::ArgFlagsTy MyFlags = Flags;
-        if (NumParts > 1 && i == 0)
-          MyFlags.setSplit();
-        else if (i != 0)
-          MyFlags.setOrigAlign(1);
-
-        Ops.push_back(Parts[i]);
-        Ops.push_back(DAG.getArgFlags(MyFlags));
+        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+        if (NumParts > 1 && j == 0)
+          MyFlags.Flags.setSplit();
+        else if (j != 0)
+          MyFlags.Flags.setOrigAlign(1);
+
+        Outs.push_back(MyFlags);
       }
     }
   }
 
-  // Figure out the result value types. We start by making a list of
-  // the potentially illegal return value types.
-  SmallVector<MVT, 4> LoweredRetTys;
-  SmallVector<MVT, 4> RetTys;
+  // Handle the incoming return values from the call.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
   ComputeValueVTs(*this, RetTy, RetTys);
-
-  // Then we translate that to a list of legal types.
   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    MVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(VT);
-    unsigned NumRegs = getNumRegisters(VT);
-    for (unsigned i = 0; i != NumRegs; ++i)
-      LoweredRetTys.push_back(RegisterVT);
-  }
-
-  LoweredRetTys.push_back(MVT::Other);  // Always has a chain.
-
-  // Create the CALL node.
-  SDValue Res = DAG.getCall(CallingConv, dl,
-                            isVarArg, isTailCall, isInreg,
-                            DAG.getVTList(&LoweredRetTys[0],
-                                          LoweredRetTys.size()),
-                            &Ops[0], Ops.size(), NumFixedArgs
-                            );
-  Chain = Res.getValue(LoweredRetTys.size() - 1);
-
-  // Gather up the call result into a single value.
-  if (RetTy != Type::VoidTy && !RetTys.empty()) {
-    ISD::NodeType AssertOp = ISD::DELETED_NODE;
-
-    if (RetSExt)
-      AssertOp = ISD::AssertSext;
-    else if (RetZExt)
-      AssertOp = ISD::AssertZext;
-
-    SmallVector<SDValue, 4> ReturnValues;
-    unsigned RegNo = 0;
-    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-      MVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(VT);
-      unsigned NumRegs = getNumRegisters(VT);
-      unsigned RegNoEnd = NumRegs + RegNo;
-      SmallVector<SDValue, 4> Results;
-      for (; RegNo != RegNoEnd; ++RegNo)
-        Results.push_back(Res.getValue(RegNo));
-      SDValue ReturnValue =
-        getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT,
-                         AssertOp);
-      ReturnValues.push_back(ReturnValue);
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT;
+      MyFlags.Used = isReturnValueUsed;
+      if (RetSExt)
+        MyFlags.Flags.setSExt();
+      if (RetZExt)
+        MyFlags.Flags.setZExt();
+      if (isInreg)
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
     }
-    Res = DAG.getNode(ISD::MERGE_VALUES, dl,
-                      DAG.getVTList(&RetTys[0], RetTys.size()),
-                      &ReturnValues[0], ReturnValues.size());
   }
 
+  // Check if target-dependent constraints permit a tail call here.
+  // Target-independent constraints should be checked by the caller.
+  if (isTailCall &&
+      !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
+    isTailCall = false;
+
+  SmallVector<SDValue, 4> InVals;
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+                    Outs, Ins, dl, DAG, InVals);
+
+  // Verify that the target's LowerCall behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerCall didn't return a valid chain!");
+  assert((!isTailCall || InVals.empty()) &&
+         "LowerCall emitted a return value for a tail call!");
+  assert((isTailCall || InVals.size() == Ins.size()) &&
+         "LowerCall didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
+  // For a tail call, the return value is merely live-out and there aren't
+  // any nodes in the DAG representing it. Return a special value to
+  // indicate that a tail call has been emitted and no more Instructions
+  // should be processed in the current block.
+  if (isTailCall) {
+    DAG.setRoot(Chain);
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  // Collect the legal value parts into potentially illegal values
+  // that correspond to the original function's return values.
+  ISD::NodeType AssertOp = ISD::DELETED_NODE;
+  if (RetSExt)
+    AssertOp = ISD::AssertSext;
+  else if (RetZExt)
+    AssertOp = ISD::AssertZext;
+  SmallVector<SDValue, 4> ReturnValues;
+  unsigned CurReg = 0;
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+    SDValue ReturnValue =
+      getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT,
+                       AssertOp);
+    ReturnValues.push_back(ReturnValue);
+    CurReg += NumRegs;
+  }
+
+  // For a function returning void, there is no return value. We can't create
+  // such a node, so we just return a null return value in that case. In
+  // that case, nothing will actualy look at the value.
+  if (ReturnValues.empty())
+    return std::make_pair(SDValue(), Chain);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+                            DAG.getVTList(&RetTys[0], RetTys.size()),
+                            &ReturnValues[0], ReturnValues.size());
+
   return std::make_pair(Res, Chain);
 }
 
@@ -5800,8 +5757,7 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
 }
 
 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
-  assert(0 && "LowerOperation not implemented for this target!");
-  abort();
+  llvm_unreachable("LowerOperation not implemented for this target!");
   return SDValue();
 }
 
@@ -5813,7 +5769,7 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
          "Copy from a reg to the same reg!");
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
-  RegsForValue RFV(TLI, Reg, V->getType());
+  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
   RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
   PendingExports.push_back(Chain);
@@ -5825,25 +5781,122 @@ void SelectionDAGISel::
 LowerArguments(BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
   Function &F = *LLVMBB->getParent();
-  SDValue OldRoot = SDL->DAG.getRoot();
-  SmallVector<SDValue, 16> Args;
-  TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc());
-
-  unsigned a = 0;
-  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
-       AI != E; ++AI) {
-    SmallVector<MVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, AI->getType(), ValueVTs);
+  SelectionDAG &DAG = SDL->DAG;
+  SDValue OldRoot = DAG.getRoot();
+  DebugLoc dl = SDL->getCurDebugLoc();
+  const TargetData *TD = TLI.getTargetData();
+
+  // Set up the incoming argument description vector.
+  SmallVector<ISD::InputArg, 16> Ins;
+  unsigned Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++Idx) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    bool isArgValueUsed = !I->use_empty();
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        TD->getABITypeAlignment(ArgTy);
+
+      if (F.paramHasAttr(Idx, Attribute::ZExt))
+        Flags.setZExt();
+      if (F.paramHasAttr(Idx, Attribute::SExt))
+        Flags.setSExt();
+      if (F.paramHasAttr(Idx, Attribute::InReg))
+        Flags.setInReg();
+      if (F.paramHasAttr(Idx, Attribute::StructRet))
+        Flags.setSRet();
+      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(I->getType());
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
+        if (F.getParamAlignment(Idx))
+          FrameAlign = F.getParamAlignment(Idx);
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (F.paramHasAttr(Idx, Attribute::Nest))
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        if (NumRegs > 1 && i == 0)
+          MyFlags.Flags.setSplit();
+        // if it isn't first piece, alignment must be 1
+        else if (i > 0)
+          MyFlags.Flags.setOrigAlign(1);
+        Ins.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Call the target to set up the argument values.
+  SmallVector<SDValue, 8> InVals;
+  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+                                             F.isVarArg(), Ins,
+                                             dl, DAG, InVals);
+
+  // Verify that the target's LowerFormalArguments behaved as expected.
+  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+         "LowerFormalArguments didn't return a valid chain!");
+  assert(InVals.size() == Ins.size() &&
+         "LowerFormalArguments didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerFormalArguments emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerFormalArguments emitted a value with the wrong type!");
+        });
+
+  // Update the DAG with the new chain value resulting from argument lowering.
+  DAG.setRoot(NewRoot);
+
+  // Set up the argument values.
+  unsigned i = 0;
+  Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+      ++I, ++Idx) {
+    SmallVector<SDValue, 4> ArgValues;
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
-    if (!AI->use_empty()) {
-      SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues,
-                                                SDL->getCurDebugLoc()));
+    for (unsigned Value = 0; Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+      if (!I->use_empty()) {
+        ISD::NodeType AssertOp = ISD::DELETED_NODE;
+        if (F.paramHasAttr(Idx, Attribute::SExt))
+          AssertOp = ISD::AssertSext;
+        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+          AssertOp = ISD::AssertZext;
+
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+                                             PartVT, VT, AssertOp));
+      }
+      i += NumParts;
+    }
+    if (!I->use_empty()) {
+      SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
+                                          SDL->getCurDebugLoc()));
       // If this argument is live outside of the entry block, insert a copy from
       // whereever we got it to the vreg that other BB's will reference it as.
-      SDL->CopyToExportRegsIfNeeded(AI);
+      SDL->CopyToExportRegsIfNeeded(I);
     }
-    a += NumValues;
   }
+  assert(i == InVals.size() && "Argument register count mismatch!");
 
   // Finally, if the target has anything special to do, allow it to do so.
   // FIXME: this should insert code into the DAG!
@@ -5908,11 +5961,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
 
       // Remember that this register needs to added to the machine PHI node as
       // the input for this MBB.
-      SmallVector<MVT, 4> ValueVTs;
+      SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        MVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(VT);
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
           SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
@@ -5962,11 +6015,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
       // own moves. Second, this check is necessary becuase FastISel doesn't
       // use CreateRegForValue to create registers, so it always creates
       // exactly one register for each non-void instruction.
-      MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
         // Promote MVT::i1.
         if (VT == MVT::i1)
-          VT = TLI.getTypeToTransformTo(VT);
+          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
         else {
           SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
           return false;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index 057c8410da0e..06acc8a6bfac 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -15,6 +15,7 @@
 #define SELECTIONDAGBUILD_H
 
 #include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #ifndef NDEBUG
@@ -23,6 +24,7 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <vector>
 #include <set>
@@ -75,8 +77,6 @@ class TruncInst;
 class UIToFPInst;
 class UnreachableInst;
 class UnwindInst;
-class VICmpInst;
-class VFCmpInst;
 class VAArgInst;
 class ZExtInst;
 
@@ -117,7 +117,7 @@ public:
   SmallSet<Instruction*, 8> CatchInfoFound;
 #endif
 
-  unsigned MakeReg(MVT VT);
+  unsigned MakeReg(EVT VT);
   
   /// isExportedInst - Return true if the specified value is an instruction
   /// exported from its block.
@@ -345,9 +345,15 @@ public:
   /// BitTestCases - Vector of BitTestBlock structures used to communicate
   /// SwitchInst code generation information.
   std::vector<BitTestBlock> BitTestCases;
-  
+
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
   std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
 
+  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
+  /// scheduler custom lowering), track the change here.
+  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
+
   // Emit PHI-node-operand constants only once even if used by multiple
   // PHI nodes.
   DenseMap<Constant*, unsigned> ConstantsOut;
@@ -363,11 +369,21 @@ public:
   /// GFI - Garbage collection metadata for the function.
   GCFunctionInfo *GFI;
 
+  /// HasTailCall - This is set to true if a call in the current
+  /// block has been translated as a tail call. In this case,
+  /// no subsequent DAG nodes should be created.
+  ///
+  bool HasTailCall;
+
+  LLVMContext *Context;
+
   SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
                        FunctionLoweringInfo &funcinfo,
                        CodeGenOpt::Level ol)
     : CurDebugLoc(DebugLoc::getUnknownLoc()), 
-      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) {
+      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false),
+      Context(dag.getContext()) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
@@ -489,8 +505,6 @@ private:
   void visitAShr(User &I) { visitShift(I, ISD::SRA); }
   void visitICmp(User &I);
   void visitFCmp(User &I);
-  void visitVICmp(User &I);
-  void visitVFCmp(User &I);
   // Visit the conversion instructions
   void visitTrunc(User &I);
   void visitZExt(User &I);
@@ -539,12 +553,10 @@ private:
   void visitVACopy(CallInst &I);
 
   void visitUserOp1(Instruction &I) {
-    assert(0 && "UserOp1 should not exist at instruction selection time!");
-    abort();
+    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
   }
   void visitUserOp2(Instruction &I) {
-    assert(0 && "UserOp2 should not exist at instruction selection time!");
-    abort();
+    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
   
   const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 9d72a128d18b..ae98da5ef8b8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -16,6 +16,7 @@
 #include "SelectionDAGBuild.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
@@ -29,6 +30,7 @@
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -47,8 +49,10 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -150,12 +154,15 @@ namespace llvm {
 // insert.  The specified MachineInstr is created but not inserted into any
 // basic blocks, and the scheduler passes ownership of it to this method.
 MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *MBB) const {
-  cerr << "If a target marks an instruction with "
-       << "'usesCustomDAGSchedInserter', it must implement "
-       << "TargetLowering::EmitInstrWithCustomInserter!\n";
-  abort();
-  return 0;  
+                                                         MachineBasicBlock *MBB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+#ifndef NDEBUG
+  errs() << "If a target marks an instruction with "
+          "'usesCustomDAGSchedInserter', it must implement "
+          "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+  llvm_unreachable(0);
+  return 0;
 }
 
 /// EmitLiveInCopy - Emit a copy for a live in physical register. If the
@@ -215,8 +222,11 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB,
     --Pos;
   }
 
-  TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
-  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+  (void) Emitted;
+
+CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
   if (Coalesced) {
     if (&*InsertPos == UseMI) ++InsertPos;
     MBB->erase(UseMI);
@@ -247,8 +257,10 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
            E = MRI.livein_end(); LI != E; ++LI)
       if (LI->second) {
         const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
-                         LI->second, LI->first, RC, RC);
+        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+                                        LI->second, LI->first, RC, RC);
+        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+        (void) Emitted;
       }
   }
 }
@@ -258,7 +270,7 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
 //===----------------------------------------------------------------------===//
 
 SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
-  FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+  MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
   CurDAG(new SelectionDAG(TLI, *FuncInfo)),
   SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),
@@ -273,44 +285,42 @@ SelectionDAGISel::~SelectionDAGISel() {
   delete FuncInfo;
 }
 
-unsigned SelectionDAGISel::MakeReg(MVT VT) {
+unsigned SelectionDAGISel::MakeReg(EVT VT) {
   return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
 }
 
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<GCModuleInfo>();
   AU.addRequired<DwarfWriter>();
-  AU.setPreservesAll();
+  AU.addPreserved<DwarfWriter>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool SelectionDAGISel::runOnFunction(Function &Fn) {
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  Function &Fn = *mf.getFunction();
+
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
-  // Do not codegen any 'available_externally' functions at all, they have
-  // definitions outside the translation unit.
-  if (Fn.hasAvailableExternallyLinkage())
-    return false;
-
-
   // Get alias analysis for load/store combining.
   AA = &getAnalysis<AliasAnalysis>();
 
-  TargetMachine &TM = TLI.getTargetMachine();
-  MF = &MachineFunction::construct(&Fn, TM);
+  MF = &mf;
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
-  if (MF->getFunction()->hasGC())
-    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction());
+  if (Fn.hasGC())
+    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
   else
     GFI = 0;
   RegInfo = &MF->getRegInfo();
-  DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+  DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
@@ -358,140 +368,50 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
     }
 }
 
-/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and
-/// whether object offset >= 0.
-static bool
-IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) {
-  if (!isa<FrameIndexSDNode>(Op)) return false;
-
-  FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);
-  int FrameIdx =  FrameIdxNode->getIndex();
-  return MFI->isFixedObjectIndex(FrameIdx) &&
-    MFI->getObjectOffset(FrameIdx) >= 0;
-}
-
-/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
-/// possibly be overwritten when lowering the outgoing arguments in a tail
-/// call. Currently the implementation of this call is very conservative and
-/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
-/// virtual registers would be overwritten by direct lowering.
-static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op,
-                                                    MachineFrameInfo *MFI) {
-  RegisterSDNode * OpReg = NULL;
-  if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
-      (Op.getOpcode()== ISD::CopyFromReg &&
-       (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
-       (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
-      (Op.getOpcode() == ISD::LOAD &&
-       IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||
-      (Op.getOpcode() == ISD::MERGE_VALUES &&
-       Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD &&
-       IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()).
-                                       getOperand(1))))
-    return true;
-  return false;
-}
-
-/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
-/// DAG and fixes their tailcall attribute operand.
-static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, 
-                                           const TargetLowering& TLI) {
-  SDNode * Ret = NULL;
-  SDValue Terminator = DAG.getRoot();
-
-  // Find RET node.
-  if (Terminator.getOpcode() == ISD::RET) {
-    Ret = Terminator.getNode();
-  }
- 
-  // Fix tail call attribute of CALL nodes.
-  for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),
-         BI = DAG.allnodes_end(); BI != BE; ) {
-    --BI;
-    if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) {
-      SDValue OpRet(Ret, 0);
-      SDValue OpCall(BI, 0);
-      bool isMarkedTailCall = TheCall->isTailCall();
-      // If CALL node has tail call attribute set to true and the call is not
-      // eligible (no RET or the target rejects) the attribute is fixed to
-      // false. The TargetLowering::IsEligibleForTailCallOptimization function
-      // must correctly identify tail call optimizable calls.
-      if (!isMarkedTailCall) continue;
-      if (Ret==NULL ||
-          !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) {
-        // Not eligible. Mark CALL node as non tail call. Note that we
-        // can modify the call node in place since calls are not CSE'd.
-        TheCall->setNotTailCall();
-      } else {
-        // Look for tail call clobbered arguments. Emit a series of
-        // copyto/copyfrom virtual register nodes to protect them.
-        SmallVector<SDValue, 32> Ops;
-        SDValue Chain = TheCall->getChain(), InFlag;
-        Ops.push_back(Chain);
-        Ops.push_back(TheCall->getCallee());
-        for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
-          SDValue Arg = TheCall->getArg(i);
-          bool isByVal = TheCall->getArgFlags(i).isByVal();
-          MachineFunction &MF = DAG.getMachineFunction();
-          MachineFrameInfo *MFI = MF.getFrameInfo();
-          if (!isByVal &&
-              IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
-            MVT VT = Arg.getValueType();
-            unsigned VReg = MF.getRegInfo().
-              createVirtualRegister(TLI.getRegClassFor(VT));
-            Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(),
-                                     VReg, Arg, InFlag);
-            InFlag = Chain.getValue(1);
-            Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(),
-                                     VReg, VT, InFlag);
-            Chain = Arg.getValue(1);
-            InFlag = Arg.getValue(2);
-          }
-          Ops.push_back(Arg);
-          Ops.push_back(TheCall->getArgFlagsVal(i));
-        }
-        // Link in chain of CopyTo/CopyFromReg.
-        Ops[0] = Chain;
-        DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
-      }
-    }
-  }
-}
-
 void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
                                         BasicBlock::iterator Begin,
                                         BasicBlock::iterator End) {
   SDL->setCurrentBasicBlock(BB);
-
-  // Lower all of the non-terminator instructions.
-  for (BasicBlock::iterator I = Begin; I != End; ++I)
+  MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+
+  // Lower all of the non-terminator instructions. If a call is emitted
+  // as a tail call, cease emitting nodes for this block.
+  for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) {
+    if (MDDbgKind) {
+      // Update DebugLoc if debug information is attached with this
+      // instruction.
+      if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+        DILocation DILoc(Dbg);
+        DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+        SDL->setCurDebugLoc(Loc);
+        if (MF->getDefaultDebugLoc().isUnknown())
+          MF->setDefaultDebugLoc(Loc);
+      }
+    }
     if (!isa<TerminatorInst>(I))
       SDL->visit(*I);
+  }
 
-  // Ensure that all instructions which are used outside of their defining
-  // blocks are available as virtual registers.  Invoke is handled elsewhere.
-  for (BasicBlock::iterator I = Begin; I != End; ++I)
-    if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-      SDL->CopyToExportRegsIfNeeded(I);
+  if (!SDL->HasTailCall) {
+    // Ensure that all instructions which are used outside of their defining
+    // blocks are available as virtual registers.  Invoke is handled elsewhere.
+    for (BasicBlock::iterator I = Begin; I != End; ++I)
+      if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
+        SDL->CopyToExportRegsIfNeeded(I);
 
-  // Handle PHI nodes in successor blocks.
-  if (End == LLVMBB->end()) {
-    HandlePHINodesInSuccessorBlocks(LLVMBB);
+    // Handle PHI nodes in successor blocks.
+    if (End == LLVMBB->end()) {
+      HandlePHINodesInSuccessorBlocks(LLVMBB);
 
-    // Lower the terminator after the copies are emitted.
-    SDL->visit(*LLVMBB->getTerminator());
+      // Lower the terminator after the copies are emitted.
+      SDL->visit(*LLVMBB->getTerminator());
+    }
   }
-    
+
   // Make sure the root of the DAG is up-to-date.
   CurDAG->setRoot(SDL->getControlRoot());
 
-  // Check whether calls in this block are real tail calls. Fix up CALL nodes
-  // with correct tailcall attribute so that the target can rely on the tailcall
-  // attribute indicating whether the call is really eligible for tail call
-  // optimization.
-  if (PerformTailCallOpt)
-    CheckDAGForTailCallsAndFixThem(*CurDAG, TLI);
-
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
   SDL->clear();
@@ -500,51 +420,51 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   SmallPtrSet<SDNode*, 128> VisitedNodes;
   SmallVector<SDNode*, 128> Worklist;
-  
+
   Worklist.push_back(CurDAG->getRoot().getNode());
-  
+
   APInt Mask;
   APInt KnownZero;
   APInt KnownOne;
-  
+
   while (!Worklist.empty()) {
     SDNode *N = Worklist.back();
     Worklist.pop_back();
-    
+
     // If we've already seen this node, ignore it.
     if (!VisitedNodes.insert(N))
       continue;
-    
+
     // Otherwise, add all chain operands to the worklist.
     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
       if (N->getOperand(i).getValueType() == MVT::Other)
         Worklist.push_back(N->getOperand(i).getNode());
-    
+
     // If this is a CopyToReg with a vreg dest, process it.
     if (N->getOpcode() != ISD::CopyToReg)
       continue;
-    
+
     unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     if (!TargetRegisterInfo::isVirtualRegister(DestReg))
       continue;
-    
+
     // Ignore non-scalar or non-integer values.
     SDValue Src = N->getOperand(2);
-    MVT SrcVT = Src.getValueType();
+    EVT SrcVT = Src.getValueType();
     if (!SrcVT.isInteger() || SrcVT.isVector())
       continue;
-    
+
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
     Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
     CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
-    
+
     // Only install this information if it tells us something.
     if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
       DestReg -= TargetRegisterInfo::FirstVirtualRegister;
-      FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo();
-      if (DestReg >= FLI.LiveOutRegInfo.size())
-        FLI.LiveOutRegInfo.resize(DestReg+1);
-      FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg];
+      if (DestReg >= FuncInfo->LiveOutRegInfo.size())
+        FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+      FunctionLoweringInfo::LiveOutInfo &LOI =
+        FuncInfo->LiveOutRegInfo[DestReg];
       LOI.NumSignBits = NumSignBits;
       LOI.KnownOne = KnownOne;
       LOI.KnownZero = KnownZero;
@@ -560,10 +480,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
       ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
       ViewSUnitDAGs)
-    BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' +
-                BB->getBasicBlock()->getName();
+    BlockName = MF->getFunction()->getNameStr() + ":" +
+                BB->getBasicBlock()->getNameStr();
 
-  DOUT << "Initial selection DAG:\n";
+  DEBUG(errs() << "Initial selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -575,10 +495,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   } else {
     CurDAG->Combine(Unrestricted, *AA, OptLevel);
   }
-  
-  DOUT << "Optimized lowered selection DAG:\n";
+
+  DEBUG(errs() << "Optimized lowered selection DAG:\n");
   DEBUG(CurDAG->dump());
-  
+
   // Second step, hack on the DAG until it only uses operations and types that
   // the target supports.
   if (!DisableLegalizeTypes) {
@@ -593,7 +513,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       Changed = CurDAG->LegalizeTypes();
     }
 
-    DOUT << "Type-legalized selection DAG:\n";
+    DEBUG(errs() << "Type-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
 
     if (Changed) {
@@ -608,7 +528,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
         CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
       }
 
-      DOUT << "Optimized type-legalized selection DAG:\n";
+      DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
       DEBUG(CurDAG->dump());
     }
 
@@ -638,11 +558,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
         CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
       }
 
-      DOUT << "Optimized vector-legalized selection DAG:\n";
+      DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
       DEBUG(CurDAG->dump());
     }
   }
-  
+
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
 
   if (TimePassesIsEnabled) {
@@ -651,10 +571,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   } else {
     CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
   }
-  
-  DOUT << "Legalized selection DAG:\n";
+
+  DEBUG(errs() << "Legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
-  
+
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
 
   // Run the DAG combiner in post-legalize mode.
@@ -664,12 +584,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   } else {
     CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
   }
-  
-  DOUT << "Optimized legalized selection DAG:\n";
+
+  DEBUG(errs() << "Optimized legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
-  
+
   if (OptLevel != CodeGenOpt::None)
     ComputeLiveOutVRegInfo();
 
@@ -682,7 +602,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     InstructionSelect();
   }
 
-  DOUT << "Selected selection DAG:\n";
+  DEBUG(errs() << "Selected selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -698,13 +618,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   if (ViewSUnitDAGs) Scheduler->viewGraph();
 
-  // Emit machine code to BB.  This can change 'BB' to the last block being 
+  // Emit machine code to BB.  This can change 'BB' to the last block being
   // inserted into.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Creation", GroupName);
-    BB = Scheduler->EmitSchedule();
+    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
   } else {
-    BB = Scheduler->EmitSchedule();
+    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
   }
 
   // Free the scheduler state.
@@ -715,9 +635,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     delete Scheduler;
   }
 
-  DOUT << "Selected machine code:\n";
+  DEBUG(errs() << "Selected machine code:\n");
   DEBUG(BB->dump());
-}  
+}
 
 void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
                                             MachineFunction &MF,
@@ -736,6 +656,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 #endif
                                 );
 
+  MetadataContext &TheMetadata = Fn.getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+
   // Iterate over all basic blocks in the function.
   for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     BasicBlock *LLVMBB = &*I;
@@ -758,7 +681,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
              I != E; ++I, ++j)
           if (Fn.paramHasAttr(j, Attribute::ByVal)) {
             if (EnableFastISelVerbose || EnableFastISelAbort)
-              cerr << "FastISel skips entry block due to byval argument\n";
+              errs() << "FastISel skips entry block due to byval argument\n";
             SuppressFastISel = true;
             break;
           }
@@ -818,16 +741,29 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
       for (; BI != End; ++BI) {
+        if (MDDbgKind) {
+          // Update DebugLoc if debug information is attached with this
+          // instruction.
+          if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
+            DILocation DILoc(Dbg);
+            DebugLoc Loc = ExtractDebugLocation(DILoc,
+                                                MF.getDebugLocInfo());
+            FastIS->setCurDebugLoc(Loc);
+            if (MF.getDefaultDebugLoc().isUnknown())
+              MF.setDefaultDebugLoc(Loc);
+          }
+        }
+
         // Just before the terminator instruction, insert instructions to
         // feed PHI nodes in successor blocks.
         if (isa<TerminatorInst>(BI))
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
             if (EnableFastISelVerbose || EnableFastISelAbort) {
-              cerr << "FastISel miss: ";
+              errs() << "FastISel miss: ";
               BI->dump();
             }
-            if (EnableFastISelAbort)
-              assert(0 && "FastISel didn't handle a PHI in a successor");
+            assert(!EnableFastISelAbort &&
+                   "FastISel didn't handle a PHI in a successor");
             break;
           }
 
@@ -842,11 +778,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            cerr << "FastISel missed call: ";
+            errs() << "FastISel missed call: ";
             BI->dump();
           }
 
-          if (BI->getType() != Type::VoidTy) {
+          if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
@@ -864,13 +800,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            cerr << "FastISel miss: ";
+            errs() << "FastISel miss: ";
             BI->dump();
           }
           if (EnableFastISelAbort)
             // The "fast" selector couldn't handle something and bailed.
             // For the purpose of debugging, just abort.
-            assert(0 && "FastISel didn't select the entire block");
+            llvm_unreachable("FastISel didn't select the entire block");
         }
         break;
       }
@@ -895,15 +831,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 void
 SelectionDAGISel::FinishBasicBlock() {
 
-  DOUT << "Target-post-processed machine code:\n";
+  DEBUG(errs() << "Target-post-processed machine code:\n");
   DEBUG(BB->dump());
 
-  DOUT << "Total amount of phi nodes to update: "
-       << SDL->PHINodesToUpdate.size() << "\n";
+  DEBUG(errs() << "Total amount of phi nodes to update: "
+               << SDL->PHINodesToUpdate.size() << "\n");
   DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)
-          DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first
-               << ", " << SDL->PHINodesToUpdate[i].second << ")\n";);
-  
+          errs() << "Node " << i << " : ("
+                 << SDL->PHINodesToUpdate[i].first
+                 << ", " << SDL->PHINodesToUpdate[i].second << ")\n");
+
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
   if (SDL->SwitchCases.empty() &&
@@ -932,7 +869,7 @@ SelectionDAGISel::FinishBasicBlock() {
       CurDAG->setRoot(SDL->getRoot());
       CodeGenAndEmitDAG();
       SDL->clear();
-    }    
+    }
 
     for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
@@ -947,8 +884,8 @@ SelectionDAGISel::FinishBasicBlock() {
         SDL->visitBitTestCase(SDL->BitTestCases[i].Default,
                               SDL->BitTestCases[i].Reg,
                               SDL->BitTestCases[i].Cases[j]);
-        
-        
+
+
       CurDAG->setRoot(SDL->getRoot());
       CodeGenAndEmitDAG();
       SDL->clear();
@@ -1001,7 +938,7 @@ SelectionDAGISel::FinishBasicBlock() {
       CodeGenAndEmitDAG();
       SDL->clear();
     }
-    
+
     // Set the current basic block to the mbb we wish to insert the code into
     BB = SDL->JTCases[i].second.MBB;
     SDL->setCurrentBasicBlock(BB);
@@ -1010,7 +947,7 @@ SelectionDAGISel::FinishBasicBlock() {
     CurDAG->setRoot(SDL->getRoot());
     CodeGenAndEmitDAG();
     SDL->clear();
-    
+
     // Update PHI Nodes
     for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
       MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
@@ -1019,20 +956,21 @@ SelectionDAGISel::FinishBasicBlock() {
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
       if (PHIBB == SDL->JTCases[i].second.Default) {
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
-                                                  false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
+        PHI->addOperand
+          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
+        PHI->addOperand
+          (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
-                                                  false));
+        PHI->addOperand
+          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
   }
   SDL->JTCases.clear();
-  
+
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
   for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
@@ -1045,25 +983,31 @@ SelectionDAGISel::FinishBasicBlock() {
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
   }
-  
+
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
   for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    BB = SDL->SwitchCases[i].ThisBB;
+    MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB;
     SDL->setCurrentBasicBlock(BB);
-    
+
     // Emit the code
     SDL->visitSwitchCase(SDL->SwitchCases[i]);
     CurDAG->setRoot(SDL->getRoot());
     CodeGenAndEmitDAG();
-    SDL->clear();
-    
+
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
     // occur multiple times in PHINodesToUpdate.  We have to be very careful to
     // handle them the right number of times.
     while ((BB = SDL->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+      // If new BB's are created during scheduling, the edges may have been
+      // updated. That is, the edge from ThisBB to BB may have been split and
+      // BB's predecessor is now another block.
+      DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
+        SDL->EdgeMapping.find(BB);
+      if (EI != SDL->EdgeMapping.end())
+        ThisBB = EI->second;
       for (MachineBasicBlock::iterator Phi = BB->begin();
            Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
         // This value for this PHI node is recorded in PHINodesToUpdate, get it.
@@ -1073,21 +1017,22 @@ SelectionDAGISel::FinishBasicBlock() {
           if (SDL->PHINodesToUpdate[pn].first == Phi) {
             Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].
                                                       second, false));
-            Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB));
+            Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
             break;
           }
         }
       }
-      
+
       // Don't process RHS if same block as LHS.
       if (BB == SDL->SwitchCases[i].FalseBB)
         SDL->SwitchCases[i].FalseBB = 0;
-      
+
       // If we haven't handled the RHS, do so now.  Otherwise, we're done.
       SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;
       SDL->SwitchCases[i].FalseBB = 0;
     }
     assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);
+    SDL->clear();
   }
   SDL->SwitchCases.clear();
 
@@ -1101,12 +1046,12 @@ SelectionDAGISel::FinishBasicBlock() {
 ///
 ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
   RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
-  
+
   if (!Ctor) {
     Ctor = ISHeuristic;
     RegisterScheduler::setDefault(Ctor);
   }
-  
+
   return Ctor(this, OptLevel);
 }
 
@@ -1123,25 +1068,25 @@ ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
 /// the dag combiner simplified the 255, we still want to match.  RHS is the
 /// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
 /// specified in the .td file (e.g. 255).
-bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, 
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
                                     int64_t DesiredMaskS) const {
   const APInt &ActualMask = RHS->getAPIntValue();
   const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
-  
+
   // If the actual mask exactly matches, success!
   if (ActualMask == DesiredMask)
     return true;
-  
+
   // If the actual AND mask is allowing unallowed bits, this doesn't match.
   if (ActualMask.intersects(~DesiredMask))
     return false;
-  
+
   // Otherwise, the DAG Combiner may have proven that the value coming in is
   // either already zero or is not demanded.  Check for known zero input bits.
   APInt NeededMask = DesiredMask & ~ActualMask;
   if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
     return true;
-  
+
   // TODO: check to see if missing bits are just not demanded.
 
   // Otherwise, this pattern doesn't match.
@@ -1152,32 +1097,32 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
 /// the dag combiner simplified the 255, we still want to match.  RHS is the
 /// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
 /// specified in the .td file (e.g. 255).
-bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, 
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
                                    int64_t DesiredMaskS) const {
   const APInt &ActualMask = RHS->getAPIntValue();
   const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
-  
+
   // If the actual mask exactly matches, success!
   if (ActualMask == DesiredMask)
     return true;
-  
+
   // If the actual AND mask is allowing unallowed bits, this doesn't match.
   if (ActualMask.intersects(~DesiredMask))
     return false;
-  
+
   // Otherwise, the DAG Combiner may have proven that the value coming in is
   // either already zero or is not demanded.  Check for known zero input bits.
   APInt NeededMask = DesiredMask & ~ActualMask;
-  
+
   APInt KnownZero, KnownOne;
   CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
-  
+
   // If all the missing bits in the or are already known to be set, match!
   if ((NeededMask & KnownOne) == NeededMask)
     return true;
-  
+
   // TODO: check to see if missing bits are just not demanded.
-  
+
   // Otherwise, this pattern doesn't match.
   return false;
 }
@@ -1196,7 +1141,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
   unsigned i = 2, e = InOps.size();
   if (InOps[e-1].getValueType() == MVT::Flag)
     --e;  // Don't process a flag operand if it is here.
-  
+
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
     if ((Flags & 7) != 4 /*MEM*/) {
@@ -1210,25 +1155,25 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
       if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
-        cerr << "Could not match memory address.  Inline asm failure!\n";
-        exit(1);
+        llvm_report_error("Could not match memory address.  Inline asm"
+                          " failure!");
       }
-      
+
       // Add this to the output node.
-      MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy();
+      EVT IntPtrTy = TLI.getPointerTy();
       Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
                                               IntPtrTy));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
   }
-  
+
   // Add the flag input back if present.
   if (e != InOps.size())
     Ops.push_back(InOps.back());
 }
 
-/// findFlagUse - Return use of MVT::Flag value produced by the specified
+/// findFlagUse - Return use of EVT::Flag value produced by the specified
 /// SDNode.
 ///
 static SDNode *findFlagUse(SDNode *N) {
@@ -1331,7 +1276,7 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   // Fold. But since Fold and FU are flagged together, this will create
   // a cycle in the scheduling graph.
 
-  MVT VT = Root->getValueType(Root->getNumValues()-1);
+  EVT VT = Root->getValueType(Root->getNumValues()-1);
   while (VT == MVT::Flag) {
     SDNode *FU = findFlagUse(Root);
     if (FU == NULL)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 6fd5df2b937d..ccc5e3c75c99 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -44,7 +44,7 @@ namespace llvm {
     }
 
     static std::string getEdgeDestLabel(const void *Node, unsigned i) {
-      return ((const SDNode *) Node)->getValueType(i).getMVTString();
+      return ((const SDNode *) Node)->getValueType(i).getEVTString();
     }
 
     /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
@@ -84,7 +84,7 @@ namespace llvm {
     template<typename EdgeIter>
     static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
       SDValue Op = EI.getNode()->getOperand(EI.getOperand());
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       if (VT == MVT::Flag)
         return "color=red,style=bold";
       else if (VT == MVT::Other)
@@ -138,11 +138,11 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false,
-            Title);
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), 
+            false, Title);
 #else
-  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
 
@@ -158,8 +158,8 @@ void SelectionDAG::clearGraphAttrs() {
 #ifndef NDEBUG
   NodeGraphAttrs.clear();
 #else
-  cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
@@ -170,8 +170,8 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
 #ifndef NDEBUG
   NodeGraphAttrs[N] = Attrs;
 #else
-  cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
@@ -188,8 +188,8 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
   else
     return "";
 #else
-  cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
   return std::string("");
 #endif
 }
@@ -200,8 +200,8 @@ void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
 #ifndef NDEBUG
   NodeGraphAttrs[N] = std::string("color=") + Color;
 #else
-  cerr << "SelectionDAG::setGraphColor is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
@@ -216,7 +216,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
   if (level >= 20) {
     if (!printed) {
       printed = true;
-      DOUT << "setSubgraphColor hit max level\n";
+      DEBUG(errs() << "setSubgraphColor hit max level\n");
     }
     return true;
   }
@@ -232,8 +232,8 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
     }
   }
 #else
-  cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
   return hit_limit;
 }
@@ -255,8 +255,8 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
   }
 
 #else
-  cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
-       << " on systems with Graphviz or gv!\n";
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 83357e066009..a2baee42310a 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,18 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
@@ -239,12 +241,23 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::UO_F64] = "__unorddf2";
   Names[RTLIB::O_F32] = "__unordsf2";
   Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::MEMCPY] = "memcpy";
+  Names[RTLIB::MEMMOVE] = "memmove";
+  Names[RTLIB::MEMSET] = "memset";
   Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
 }
 
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+    CCs[i] = CallingConv::C;
+  }
+}
+
 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::f64)
       return FPEXT_F32_F64;
@@ -254,7 +267,7 @@ RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {
 
 /// getFPROUND - Return the FPROUND_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
   if (RetVT == MVT::f32) {
     if (OpVT == MVT::f64)
       return FPROUND_F64_F32;
@@ -273,7 +286,7 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
 
 /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::i8)
       return FPTOSINT_F32_I8;
@@ -312,7 +325,7 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
 
 /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::i8)
       return FPTOUINT_F32_I8;
@@ -351,7 +364,7 @@ RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
 
 /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::i32) {
     if (RetVT == MVT::f32)
       return SINTTOFP_I32_F32;
@@ -385,7 +398,7 @@ RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {
 
 /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) {
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::i32) {
     if (RetVT == MVT::f32)
       return UINTTOFP_I32_F32;
@@ -439,8 +452,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
   CCs[RTLIB::O_F64] = ISD::SETEQ;
 }
 
-TargetLowering::TargetLowering(TargetMachine &tm)
-  : TM(tm), TD(TM.getTargetData()) {
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -490,12 +504,10 @@ TargetLowering::TargetLowering(TargetMachine &tm)
     
   IsLittleEndian = TD->isLittleEndian();
   UsesGlobalOffsetTable = false;
-  ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType());
-  ShiftAmtHandling = Undefined;
+  ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
-  allowUnalignedMemoryAccesses = false;
   benefitFromCodePlacementOpt = false;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
@@ -515,14 +527,62 @@ TargetLowering::TargetLowering(TargetMachine &tm)
 
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
 
   // Tell Legalize whether the assembler supports DEBUG_LOC.
-  const TargetAsmInfo *TASM = TM.getTargetAsmInfo();
+  const MCAsmInfo *TASM = TM.getMCAsmInfo();
   if (!TASM || !TASM->hasDotLocAndDotFile())
     setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 }
 
-TargetLowering::~TargetLowering() {}
+TargetLowering::~TargetLowering() {
+  delete &TLOF;
+}
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+                                       unsigned &NumIntermediates,
+                                       EVT &RegisterVT,
+                                       TargetLowering* TLI) {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltTy = VT.getVectorElementType();
+  
+  unsigned NumVectorRegs = 1;
+  
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+  
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+  
+  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  if (!TLI->isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = TLI->getRegisterType(NewVT);
+  RegisterVT = DestVT;
+  if (EVT(DestVT).bitsLT(NewVT)) {
+    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+  } else {
+    // Otherwise, promotion or legal types use the same number of registers as
+    // the vector decimated to the appropriate level.
+    return NumVectorRegs;
+  }
+  
+  return 1;
+}
 
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
@@ -546,13 +606,13 @@ void TargetLowering::computeRegisterProperties() {
   // Every integer value type larger than this largest register takes twice as
   // many registers to represent as the previous ValueType.
   for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
-    MVT EVT = (MVT::SimpleValueType)ExpandedReg;
-    if (!EVT.isInteger())
+    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+    if (!ExpandedVT.isInteger())
       break;
     NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
     RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
     TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
-    ValueTypeActions.setTypeAction(EVT, Expand);
+    ValueTypeActions.setTypeAction(ExpandedVT, Expand);
   }
 
   // Inspect all of the ValueType's smaller than the largest integer
@@ -560,7 +620,7 @@ void TargetLowering::computeRegisterProperties() {
   unsigned LegalIntReg = LargestIntReg;
   for (unsigned IntReg = LargestIntReg - 1;
        IntReg >= (unsigned)MVT::i1; --IntReg) {
-    MVT IVT = (MVT::SimpleValueType)IntReg;
+    EVT IVT = (MVT::SimpleValueType)IntReg;
     if (isTypeLegal(IVT)) {
       LegalIntReg = IntReg;
     } else {
@@ -608,20 +668,20 @@ void TargetLowering::computeRegisterProperties() {
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
     MVT VT = (MVT::SimpleValueType)i;
     if (!isTypeLegal(VT)) {
-      MVT IntermediateVT, RegisterVT;
+      MVT IntermediateVT;
+      EVT RegisterVT;
       unsigned NumIntermediates;
       NumRegistersForVT[i] =
-        getVectorTypeBreakdown(VT,
-                               IntermediateVT, NumIntermediates,
-                               RegisterVT);
+        getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT, this);
       RegisterTypeForVT[i] = RegisterVT;
       
       // Determine if there is a legal wider type.
       bool IsLegalWiderType = false;
-      MVT EltVT = VT.getVectorElementType();
+      EVT EltVT = VT.getVectorElementType();
       unsigned NElts = VT.getVectorNumElements();
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        MVT SVT = (MVT::SimpleValueType)nVT;
+        EVT SVT = (MVT::SimpleValueType)nVT;
         if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
             SVT.getVectorNumElements() > NElts) {
           TransformToType[i] = SVT;
@@ -631,7 +691,7 @@ void TargetLowering::computeRegisterProperties() {
         }
       }
       if (!IsLegalWiderType) {
-        MVT NVT = VT.getPow2VectorType();
+        EVT NVT = VT.getPow2VectorType();
         if (NVT == VT) {
           // Type is already a power of 2.  The default action is to split.
           TransformToType[i] = MVT::Other;
@@ -650,11 +710,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
 }
 
 
-MVT TargetLowering::getSetCCResultType(MVT VT) const {
-  return getValueType(TD->getIntPtrType());
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+  return PointerTy.SimpleTy;
 }
 
-
 /// getVectorTypeBreakdown - Vector types are broken down into some number of
 /// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
@@ -664,13 +723,13 @@ MVT TargetLowering::getSetCCResultType(MVT VT) const {
 /// register.  It also returns the VT and quantity of the intermediate values
 /// before they are promoted/expanded.
 ///
-unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
-                                                MVT &IntermediateVT,
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                                EVT &IntermediateVT,
                                                 unsigned &NumIntermediates,
-                                      MVT &RegisterVT) const {
+                                                EVT &RegisterVT) const {
   // Figure out the right, legal destination reg to copy into.
   unsigned NumElts = VT.getVectorNumElements();
-  MVT EltTy = VT.getVectorElementType();
+  EVT EltTy = VT.getVectorElementType();
   
   unsigned NumVectorRegs = 1;
   
@@ -683,19 +742,20 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
   
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
-  while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+  while (NumElts > 1 && !isTypeLegal(
+                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
     NumElts >>= 1;
     NumVectorRegs <<= 1;
   }
 
   NumIntermediates = NumVectorRegs;
   
-  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
   if (!isTypeLegal(NewVT))
     NewVT = EltTy;
   IntermediateVT = NewVT;
 
-  MVT DestVT = getRegisterType(NewVT);
+  EVT DestVT = getRegisterType(Context, NewVT);
   RegisterVT = DestVT;
   if (DestVT.bitsLT(NewVT)) {
     // Value is expanded, e.g. i64 -> i16.
@@ -714,7 +774,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
 /// If there is no vector type that we want to widen to, returns MVT::Other
 /// When and where to widen is target dependent based on the cost of
 /// scalarizing vs using the wider vector type.
-MVT TargetLowering::getWidenVectorType(MVT VT) const {
+EVT TargetLowering::getWidenVectorType(EVT VT) const {
   assert(VT.isVector());
   if (isTypeLegal(VT))
     return VT;
@@ -781,7 +841,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
 
     // if we can expand it to have all bits set, do it
     if (C->getAPIntValue().intersects(~Demanded)) {
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
                                 DAG.getConstant(Demanded &
                                                 C->getAPIntValue(), 
@@ -822,7 +882,7 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   if (!isPowerOf2_32(SmallVTBits))
     SmallVTBits = NextPowerOf2(SmallVTBits);
   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
-    MVT SmallVT = MVT::getIntegerVT(SmallVTBits);
+    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
         TLI.isZExtFree(SmallVT, Op.getValueType())) {
       // We found a type with free casts.
@@ -1008,7 +1068,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
     if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
       if ((KnownOne & KnownOne2) == KnownOne) {
-        MVT VT = Op.getValueType();
+        EVT VT = Op.getValueType();
         SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, 
                                                  Op.getOperand(0), ANDC));
@@ -1023,7 +1083,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // if we can expand it to have all bits set, do it
       if (Expanded.isAllOnesValue()) {
         if (Expanded != C->getAPIntValue()) {
-          MVT VT = Op.getValueType();
+          EVT VT = Op.getValueType();
           SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
                                           TLO.DAG.getConstant(Expanded, VT));
           return TLO.CombineTo(Op, New);
@@ -1099,7 +1159,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           
           SDValue NewSA = 
             TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
-          MVT VT = Op.getValueType();
+          EVT VT = Op.getValueType();
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
@@ -1116,7 +1176,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   case ISD::SRL:
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       unsigned ShAmt = SA->getZExtValue();
       unsigned VTSize = VT.getSizeInBits();
       SDValue InOp = Op.getOperand(0);
@@ -1168,7 +1228,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                                Op.getOperand(0), Op.getOperand(1)));
 
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      MVT VT = Op.getValueType();
+      EVT VT = Op.getValueType();
       unsigned ShAmt = SA->getZExtValue();
       
       // If the shift count is an invalid immediate, don't do anything.
@@ -1205,7 +1265,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
 
     // Sign extension.  Compute the demanded bits in the result that are not 
     // present in the input.
@@ -1272,7 +1332,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::SIGN_EXTEND: {
-    MVT InVT = Op.getOperand(0).getValueType();
+    EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getSizeInBits();
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
@@ -1371,7 +1431,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::AssertZext: {
-    MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth,
                                         VT.getSizeInBits());
     if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
@@ -1385,7 +1445,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 #if 0
     // If this is an FP->Int bitcast and if the sign bit is the only thing that
     // is demanded, turn this into a FGETSIGN.
-    if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) &&
+    if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
         MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
         !MVT::isVector(Op.getOperand(0).getValueType())) {
       // Only do this xform if FGETSIGN is valid or if before legalize.
@@ -1492,7 +1552,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
   // to handle some common cases.
 
   // Fall back to ComputeMaskedBits to catch other known cases.
-  MVT OpVT = Val.getValueType();
+  EVT OpVT = Val.getValueType();
   unsigned BitWidth = OpVT.getSizeInBits();
   APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero, KnownOne;
@@ -1504,10 +1564,11 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
 /// SimplifySetCC - Try to simplify a setcc built with the specified operands 
 /// and cc. If it is unable to simplify it, return a null SDValue.
 SDValue
-TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               ISD::CondCode Cond, bool foldBooleans,
                               DAGCombinerInfo &DCI, DebugLoc dl) const {
   SelectionDAG &DAG = DCI.DAG;
+  LLVMContext &Context = *DAG.getContext();
 
   // These setcc operations always fold.
   switch (Cond) {
@@ -1518,316 +1579,321 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
   case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
   }
 
+  if (isa<ConstantSDNode>(N0.getNode())) {
+    // Ensure that the constant occurs on the RHS, and fold constant
+    // comparisons.
+    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  }
+
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
-    if (isa<ConstantSDNode>(N0.getNode())) {
-      return DAG.FoldSetCC(VT, N0, N1, Cond, dl);
-    } else {
-      // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
-      // equality comparison, then we're just comparing whether X itself is
-      // zero.
-      if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
-          N0.getOperand(0).getOpcode() == ISD::CTLZ &&
-          N0.getOperand(1).getOpcode() == ISD::Constant) {
-        unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-        if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-            ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
-          if ((C1 == 0) == (Cond == ISD::SETEQ)) {
-            // (srl (ctlz x), 5) == 0  -> X != 0
-            // (srl (ctlz x), 5) != 1  -> X != 0
-            Cond = ISD::SETNE;
-          } else {
-            // (srl (ctlz x), 5) != 0  -> X == 0
-            // (srl (ctlz x), 5) == 1  -> X == 0
-            Cond = ISD::SETEQ;
-          }
-          SDValue Zero = DAG.getConstant(0, N0.getValueType());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
-                              Zero, Cond);
+
+    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+    // equality comparison, then we're just comparing whether X itself is
+    // zero.
+    if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+        N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+        N0.getOperand(1).getOpcode() == ISD::Constant) {
+      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+        if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+          // (srl (ctlz x), 5) == 0  -> X != 0
+          // (srl (ctlz x), 5) != 1  -> X != 0
+          Cond = ISD::SETNE;
+        } else {
+          // (srl (ctlz x), 5) != 0  -> X == 0
+          // (srl (ctlz x), 5) == 1  -> X == 0
+          Cond = ISD::SETEQ;
         }
+        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+                            Zero, Cond);
       }
+    }
 
-      // If the LHS is '(and load, const)', the RHS is 0,
-      // the test is for equality or unsigned, and all 1 bits of the const are
-      // in the same partial word, see if we can shorten the load.
-      if (DCI.isBeforeLegalize() &&
-          N0.getOpcode() == ISD::AND && C1 == 0 &&
-          N0.getNode()->hasOneUse() &&
-          isa<LoadSDNode>(N0.getOperand(0)) &&
-          N0.getOperand(0).getNode()->hasOneUse() &&
-          isa<ConstantSDNode>(N0.getOperand(1))) {
-        LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
-        uint64_t bestMask = 0;
-        unsigned bestWidth = 0, bestOffset = 0;
-        if (!Lod->isVolatile() && Lod->isUnindexed() &&
-            // FIXME: This uses getZExtValue() below so it only works on i64 and
-            // below.
-            N0.getValueType().getSizeInBits() <= 64) {
-          unsigned origWidth = N0.getValueType().getSizeInBits();
-          // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
-          // 8 bits, but have to be careful...
-          if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
-            origWidth = Lod->getMemoryVT().getSizeInBits();
-          uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-          for (unsigned width = origWidth / 2; width>=8; width /= 2) {
-            uint64_t newMask = (1ULL << width) - 1;
-            for (unsigned offset=0; offset<origWidth/width; offset++) {
-              if ((newMask & Mask) == Mask) {
-                if (!TD->isLittleEndian())
-                  bestOffset = (origWidth/width - offset - 1) * (width/8);
-                else
-                  bestOffset = (uint64_t)offset * (width/8);
-                bestMask = Mask >> (offset * (width/8) * 8);
-                bestWidth = width;
-                break;
-              }
-              newMask = newMask << width;
+    // If the LHS is '(and load, const)', the RHS is 0,
+    // the test is for equality or unsigned, and all 1 bits of the const are
+    // in the same partial word, see if we can shorten the load.
+    if (DCI.isBeforeLegalize() &&
+        N0.getOpcode() == ISD::AND && C1 == 0 &&
+        N0.getNode()->hasOneUse() &&
+        isa<LoadSDNode>(N0.getOperand(0)) &&
+        N0.getOperand(0).getNode()->hasOneUse() &&
+        isa<ConstantSDNode>(N0.getOperand(1))) {
+      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+      uint64_t bestMask = 0;
+      unsigned bestWidth = 0, bestOffset = 0;
+      if (!Lod->isVolatile() && Lod->isUnindexed() &&
+          // FIXME: This uses getZExtValue() below so it only works on i64 and
+          // below.
+          N0.getValueType().getSizeInBits() <= 64) {
+        unsigned origWidth = N0.getValueType().getSizeInBits();
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
+        // 8 bits, but have to be careful...
+        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+          origWidth = Lod->getMemoryVT().getSizeInBits();
+        uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+          uint64_t newMask = (1ULL << width) - 1;
+          for (unsigned offset=0; offset<origWidth/width; offset++) {
+            if ((newMask & Mask) == Mask) {
+              if (!TD->isLittleEndian())
+                bestOffset = (origWidth/width - offset - 1) * (width/8);
+              else
+                bestOffset = (uint64_t)offset * (width/8);
+              bestMask = Mask >> (offset * (width/8) * 8);
+              bestWidth = width;
+              break;
             }
+            newMask = newMask << width;
           }
         }
-        if (bestWidth) {
-          MVT newVT = MVT::getIntegerVT(bestWidth);
-          if (newVT.isRound()) {
-            MVT PtrType = Lod->getOperand(1).getValueType();
-            SDValue Ptr = Lod->getBasePtr();
-            if (bestOffset != 0)
-              Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
-                                DAG.getConstant(bestOffset, PtrType));
-            unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
-            SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
-                                          Lod->getSrcValue(), 
-                                          Lod->getSrcValueOffset() + bestOffset,
-                                          false, NewAlign);
-            return DAG.getSetCC(dl, VT, 
-                                DAG.getNode(ISD::AND, dl, newVT, NewLoad,
-                                            DAG.getConstant(bestMask, newVT)),
-                                DAG.getConstant(0LL, newVT), Cond);
-          }
+      }
+      if (bestWidth) {
+        EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+        if (newVT.isRound()) {
+          EVT PtrType = Lod->getOperand(1).getValueType();
+          SDValue Ptr = Lod->getBasePtr();
+          if (bestOffset != 0)
+            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+                              DAG.getConstant(bestOffset, PtrType));
+          unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+          SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+                                        Lod->getSrcValue(), 
+                                        Lod->getSrcValueOffset() + bestOffset,
+                                        false, NewAlign);
+          return DAG.getSetCC(dl, VT, 
+                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+                                          DAG.getConstant(bestMask, newVT)),
+                              DAG.getConstant(0LL, newVT), Cond);
         }
       }
+    }
 
-      // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
-      if (N0.getOpcode() == ISD::ZERO_EXTEND) {
-        unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
-
-        // If the comparison constant has bits in the upper part, the
-        // zero-extended value could never match.
-        if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
-                                                C1.getBitWidth() - InSize))) {
-          switch (Cond) {
-          case ISD::SETUGT:
-          case ISD::SETUGE:
-          case ISD::SETEQ: return DAG.getConstant(0, VT);
-          case ISD::SETULT:
-          case ISD::SETULE:
-          case ISD::SETNE: return DAG.getConstant(1, VT);
-          case ISD::SETGT:
-          case ISD::SETGE:
-            // True if the sign bit of C1 is set.
-            return DAG.getConstant(C1.isNegative(), VT);
-          case ISD::SETLT:
-          case ISD::SETLE:
-            // True if the sign bit of C1 isn't set.
-            return DAG.getConstant(C1.isNonNegative(), VT);
-          default:
-            break;
-          }
-        }
+    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
 
-        // Otherwise, we can perform the comparison with the low bits.
+      // If the comparison constant has bits in the upper part, the
+      // zero-extended value could never match.
+      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+                                              C1.getBitWidth() - InSize))) {
         switch (Cond) {
-        case ISD::SETEQ:
-        case ISD::SETNE:
         case ISD::SETUGT:
         case ISD::SETUGE:
+        case ISD::SETEQ: return DAG.getConstant(0, VT);
         case ISD::SETULT:
         case ISD::SETULE:
-          return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                          DAG.getConstant(APInt(C1).trunc(InSize),
-                                          N0.getOperand(0).getValueType()),
-                          Cond);
+        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETGT:
+        case ISD::SETGE:
+          // True if the sign bit of C1 is set.
+          return DAG.getConstant(C1.isNegative(), VT);
+        case ISD::SETLT:
+        case ISD::SETLE:
+          // True if the sign bit of C1 isn't set.
+          return DAG.getConstant(C1.isNonNegative(), VT);
         default:
-          break;   // todo, be more careful with signed comparisons
-        }
-      } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-        MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
-        unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
-        MVT ExtDstTy = N0.getValueType();
-        unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
-
-        // If the extended part has any inconsistent bits, it cannot ever
-        // compare equal.  In other words, they have to be all ones or all
-        // zeros.
-        APInt ExtBits =
-          APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
-        if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
-          return DAG.getConstant(Cond == ISD::SETNE, VT);
-        
-        SDValue ZextOp;
-        MVT Op0Ty = N0.getOperand(0).getValueType();
-        if (Op0Ty == ExtSrcTy) {
-          ZextOp = N0.getOperand(0);
-        } else {
-          APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
-          ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
-                               DAG.getConstant(Imm, Op0Ty));
-        }
-        if (!DCI.isCalledByLegalizer())
-          DCI.AddToWorklist(ZextOp.getNode());
-        // Otherwise, make this a use of a zext.
-        return DAG.getSetCC(dl, VT, ZextOp, 
-                            DAG.getConstant(C1 & APInt::getLowBitsSet(
-                                                               ExtDstTyBits,
-                                                               ExtSrcTyBits), 
-                                            ExtDstTy),
-                            Cond);
-      } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
-                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-        
-        // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
-        if (N0.getOpcode() == ISD::SETCC) {
-          bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
-          if (TrueWhenTrue)
-            return N0;
-          
-          // Invert the condition.
-          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-          CC = ISD::getSetCCInverse(CC, 
-                                   N0.getOperand(0).getValueType().isInteger());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
-        }
-        
-        if ((N0.getOpcode() == ISD::XOR ||
-             (N0.getOpcode() == ISD::AND && 
-              N0.getOperand(0).getOpcode() == ISD::XOR &&
-              N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
-            isa<ConstantSDNode>(N0.getOperand(1)) &&
-            cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
-          // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
-          // can only do this if the top bits are known zero.
-          unsigned BitWidth = N0.getValueSizeInBits();
-          if (DAG.MaskedValueIsZero(N0,
-                                    APInt::getHighBitsSet(BitWidth,
-                                                          BitWidth-1))) {
-            // Okay, get the un-inverted input value.
-            SDValue Val;
-            if (N0.getOpcode() == ISD::XOR)
-              Val = N0.getOperand(0);
-            else {
-              assert(N0.getOpcode() == ISD::AND && 
-                     N0.getOperand(0).getOpcode() == ISD::XOR);
-              // ((X^1)&1)^1 -> X & 1
-              Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
-                                N0.getOperand(0).getOperand(0),
-                                N0.getOperand(1));
-            }
-            return DAG.getSetCC(dl, VT, Val, N1,
-                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
-          }
+          break;
         }
       }
+
+      // Otherwise, we can perform the comparison with the low bits.
+      switch (Cond) {
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGT:
+      case ISD::SETUGE:
+      case ISD::SETULT:
+      case ISD::SETULE: {
+        EVT newVT = N0.getOperand(0).getValueType();
+        if (DCI.isBeforeLegalizeOps() ||
+            (isOperationLegal(ISD::SETCC, newVT) &&
+              getCondCodeAction(Cond, newVT)==Legal))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                              DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+                              Cond);
+        break;
+      }
+      default:
+        break;   // todo, be more careful with signed comparisons
+      }
+    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+      EVT ExtDstTy = N0.getValueType();
+      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+      // If the extended part has any inconsistent bits, it cannot ever
+      // compare equal.  In other words, they have to be all ones or all
+      // zeros.
+      APInt ExtBits =
+        APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
+      if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+        return DAG.getConstant(Cond == ISD::SETNE, VT);
       
-      APInt MinVal, MaxVal;
-      unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
-      if (ISD::isSignedIntSetCC(Cond)) {
-        MinVal = APInt::getSignedMinValue(OperandBitSize);
-        MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+      SDValue ZextOp;
+      EVT Op0Ty = N0.getOperand(0).getValueType();
+      if (Op0Ty == ExtSrcTy) {
+        ZextOp = N0.getOperand(0);
       } else {
-        MinVal = APInt::getMinValue(OperandBitSize);
-        MaxVal = APInt::getMaxValue(OperandBitSize);
+        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+        ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+                              DAG.getConstant(Imm, Op0Ty));
       }
-
-      // Canonicalize GE/LE comparisons to use GT/LT comparisons.
-      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
-        if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
-        // X >= C0 --> X > (C0-1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(C1-1, N1.getValueType()),
-                            (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(ZextOp.getNode());
+      // Otherwise, make this a use of a zext.
+      return DAG.getSetCC(dl, VT, ZextOp, 
+                          DAG.getConstant(C1 & APInt::getLowBitsSet(
+                                                              ExtDstTyBits,
+                                                              ExtSrcTyBits), 
+                                          ExtDstTy),
+                          Cond);
+    } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      
+      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+      if (N0.getOpcode() == ISD::SETCC) {
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
+        if (TrueWhenTrue)
+          return N0;
+        
+        // Invert the condition.
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        CC = ISD::getSetCCInverse(CC, 
+                                  N0.getOperand(0).getValueType().isInteger());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
-
-      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
-        if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
-        // X <= C0 --> X < (C0+1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(C1+1, N1.getValueType()),
-                            (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+      
+      if ((N0.getOpcode() == ISD::XOR ||
+            (N0.getOpcode() == ISD::AND && 
+            N0.getOperand(0).getOpcode() == ISD::XOR &&
+            N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+          isa<ConstantSDNode>(N0.getOperand(1)) &&
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+        // can only do this if the top bits are known zero.
+        unsigned BitWidth = N0.getValueSizeInBits();
+        if (DAG.MaskedValueIsZero(N0,
+                                  APInt::getHighBitsSet(BitWidth,
+                                                        BitWidth-1))) {
+          // Okay, get the un-inverted input value.
+          SDValue Val;
+          if (N0.getOpcode() == ISD::XOR)
+            Val = N0.getOperand(0);
+          else {
+            assert(N0.getOpcode() == ISD::AND && 
+                    N0.getOperand(0).getOpcode() == ISD::XOR);
+            // ((X^1)&1)^1 -> X & 1
+            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+                              N0.getOperand(0).getOperand(0),
+                              N0.getOperand(1));
+          }
+          return DAG.getSetCC(dl, VT, Val, N1,
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
       }
+    }
+    
+    APInt MinVal, MaxVal;
+    unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+    if (ISD::isSignedIntSetCC(Cond)) {
+      MinVal = APInt::getSignedMinValue(OperandBitSize);
+      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+    } else {
+      MinVal = APInt::getMinValue(OperandBitSize);
+      MaxVal = APInt::getMaxValue(OperandBitSize);
+    }
 
-      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
-        return DAG.getConstant(0, VT);      // X < MIN --> false
-      if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
-        return DAG.getConstant(1, VT);      // X >= MIN --> true
-      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
-        return DAG.getConstant(0, VT);      // X > MAX --> false
-      if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
-        return DAG.getConstant(1, VT);      // X <= MAX --> true
-
-      // Canonicalize setgt X, Min --> setne X, Min
-      if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
-        return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
-      // Canonicalize setlt X, Max --> setne X, Max
-      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
-        return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
-
-      // If we have setult X, 1, turn it into seteq X, 0
-      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(MinVal, N0.getValueType()), 
-                            ISD::SETEQ);
-      // If we have setugt X, Max-1, turn it into seteq X, Max
-      else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(MaxVal, N0.getValueType()),
-                            ISD::SETEQ);
-
-      // If we have "setcc X, C0", check to see if we can shrink the immediate
-      // by changing cc.
-
-      // SETUGT X, SINTMAX  -> SETLT X, 0
-      if (Cond == ISD::SETUGT && 
-          C1 == APInt::getSignedMaxValue(OperandBitSize))
-        return DAG.getSetCC(dl, VT, N0, 
-                            DAG.getConstant(0, N1.getValueType()),
-                            ISD::SETLT);
-
-      // SETULT X, SINTMIN  -> SETGT X, -1
-      if (Cond == ISD::SETULT &&
-          C1 == APInt::getSignedMinValue(OperandBitSize)) {
-        SDValue ConstMinusOne =
-            DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
-                            N1.getValueType());
-        return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
-      }
+    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+    if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      // X >= C0 --> X > (C0-1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(C1-1, N1.getValueType()),
+                          (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+    }
 
-      // Fold bit comparisons when we can.
-      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-          VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
-        if (ConstantSDNode *AndRHS =
-                    dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-          MVT ShiftTy = DCI.isBeforeLegalize() ?
-            getPointerTy() : getShiftAmountTy();
-          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
-            // Perform the xform if the AND RHS is a single bit.
-            if (isPowerOf2_64(AndRHS->getZExtValue())) {
-              return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                 DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
-                                                 ShiftTy));
-            }
-          } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
-            // (X & 8) == 8  -->  (X & 8) >> 3
-            // Perform the xform if C1 is a single bit.
-            if (C1.isPowerOf2()) {
-              return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                 DAG.getConstant(C1.logBase2(), ShiftTy));
-            }
+    if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      // X <= C0 --> X < (C0+1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(C1+1, N1.getValueType()),
+                          (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+    }
+
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+      return DAG.getConstant(0, VT);      // X < MIN --> false
+    if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+      return DAG.getConstant(1, VT);      // X >= MIN --> true
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+      return DAG.getConstant(0, VT);      // X > MAX --> false
+    if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+      return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+    // Canonicalize setgt X, Min --> setne X, Min
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+    // Canonicalize setlt X, Max --> setne X, Max
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+    // If we have setult X, 1, turn it into seteq X, 0
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(MinVal, N0.getValueType()), 
+                          ISD::SETEQ);
+    // If we have setugt X, Max-1, turn it into seteq X, Max
+    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          ISD::SETEQ);
+
+    // If we have "setcc X, C0", check to see if we can shrink the immediate
+    // by changing cc.
+
+    // SETUGT X, SINTMAX  -> SETLT X, 0
+    if (Cond == ISD::SETUGT && 
+        C1 == APInt::getSignedMaxValue(OperandBitSize))
+      return DAG.getSetCC(dl, VT, N0, 
+                          DAG.getConstant(0, N1.getValueType()),
+                          ISD::SETLT);
+
+    // SETULT X, SINTMIN  -> SETGT X, -1
+    if (Cond == ISD::SETULT &&
+        C1 == APInt::getSignedMinValue(OperandBitSize)) {
+      SDValue ConstMinusOne =
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+                          N1.getValueType());
+      return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+    }
+
+    // Fold bit comparisons when we can.
+    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+        VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS =
+                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+        EVT ShiftTy = DCI.isBeforeLegalize() ?
+          getPointerTy() : getShiftAmountTy();
+        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+          // Perform the xform if the AND RHS is a single bit.
+          if (isPowerOf2_64(AndRHS->getZExtValue())) {
+            return DAG.getNode(ISD::SRL, dl, VT, N0,
+                                DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
+                                                ShiftTy));
+          }
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
+          // (X & 8) == 8  -->  (X & 8) >> 3
+          // Perform the xform if C1 is a single bit.
+          if (C1.isPowerOf2()) {
+            return DAG.getNode(ISD::SRL, dl, VT, N0,
+                                DAG.getConstant(C1.logBase2(), ShiftTy));
           }
         }
-    }
-  } else if (isa<ConstantSDNode>(N0.getNode())) {
-      // Ensure that the constant occurs on the RHS.
-    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+      }
   }
 
   if (isa<ConstantFPSDNode>(N0.getNode())) {
@@ -1840,7 +1906,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
     if (CFP->getValueAPF().isNaN()) {
       // If an operand is known to be a nan, we can fold it.
       switch (ISD::getUnorderedFlavor(Cond)) {
-      default: assert(0 && "Unknown flavor!");
+      default: llvm_unreachable("Unknown flavor!");
       case 0:  // Known false.
         return DAG.getConstant(0, VT);
       case 1:  // Known true.
@@ -1856,6 +1922,43 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
     // materialize 0.0.
     if (Cond == ISD::SETO || Cond == ISD::SETUO)
       return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+    // If the condition is not legal, see if we can find an equivalent one
+    // which is legal.
+    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+      // If the comparison was an awkward floating-point == or != and one of
+      // the comparison operands is infinity or negative infinity, convert the
+      // condition to a less-awkward <= or >=.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+        } else {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+        }
+      }
+    }
   }
 
   if (N0 == N1) {
@@ -2000,7 +2103,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
   SDValue Temp;
   if (N0.getValueType() == MVT::i1 && foldBooleans) {
     switch (Cond) {
-    default: assert(0 && "Unknown integer setcc!");
+    default: llvm_unreachable("Unknown integer setcc!");
     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
       Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
       N0 = DAG.getNOT(dl, Temp, MVT::i1);
@@ -2090,7 +2193,7 @@ bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
                                        const MachineFrameInfo *MFI) const {
   if (LD->getChain() != Base->getChain())
     return false;
-  MVT VT = LD->getValueType(0);
+  EVT VT = LD->getValueType(0);
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 
@@ -2171,7 +2274,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
 /// LowerXConstraint - try to replace an X constraint, which matches anything,
 /// with another that has more specific requirements based on the type of the
 /// corresponding operand.
-const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
   if (ConstraintVT.isInteger())
     return "r";
   if (ConstraintVT.isFloatingPoint())
@@ -2244,14 +2347,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 std::vector<unsigned> TargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const {
+                                  EVT VT) const {
   return std::vector<unsigned>();
 }
 
 
 std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint,
-                             MVT VT) const {
+                             EVT VT) const {
   if (Constraint[0] != '{')
     return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2280,7 +2383,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     
     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
          I != E; ++I) {
-      if (StringsEqualNoCase(RegName, RI->get(*I).AsmName))
+      if (StringsEqualNoCase(RegName, RI->getName(*I)))
         return std::make_pair(*I, RC);
     }
   }
@@ -2310,7 +2413,7 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   switch (CT) {
-  default: assert(0 && "Unknown constraint type!");
+  default: llvm_unreachable("Unknown constraint type!");
   case TargetLowering::C_Other:
   case TargetLowering::C_Unknown:
     return 0;
@@ -2406,10 +2509,13 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
   // 'X' matches anything.
   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
     // Labels and constants are handled elsewhere ('X' is the only thing
-    // that matches labels).
-    if (isa<BasicBlock>(OpInfo.CallOperandVal) ||
-        isa<ConstantInt>(OpInfo.CallOperandVal))
+    // that matches labels).  For Functions, the type here is the type of
+    // the result, which is not what we want to look at; leave them alone.
+    Value *v = OpInfo.CallOperandVal;
+    if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+      OpInfo.CallOperandVal = v;
       return;
+    }
     
     // Otherwise, try to resolve it to something we know about by looking at
     // the actual operand type.
@@ -2464,7 +2570,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
                                   std::vector<SDNode*>* Created) const {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
   
   // Check to see if we can do this.
@@ -2521,7 +2627,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
                                   std::vector<SDNode*>* Created) const {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Check to see if we can do this.
@@ -2569,45 +2675,3 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
                        DAG.getConstant(magics.s-1, getShiftAmountTy()));
   }
 }
-
-/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET
-/// node that don't prevent tail call optimization.
-static SDValue IgnoreHarmlessInstructions(SDValue node) {
-  // Found call return.
-  if (node.getOpcode() == ISD::CALL) return node;
-  // Ignore MERGE_VALUES. Will have at least one operand.
-  if (node.getOpcode() == ISD::MERGE_VALUES)
-    return IgnoreHarmlessInstructions(node.getOperand(0));
-  // Ignore ANY_EXTEND node.
-  if (node.getOpcode() == ISD::ANY_EXTEND)
-    return IgnoreHarmlessInstructions(node.getOperand(0));
-  if (node.getOpcode() == ISD::TRUNCATE)
-    return IgnoreHarmlessInstructions(node.getOperand(0));
-  // Any other node type.
-  return node;
-} 
-
-bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
-                                                    SDValue Ret) {
-  unsigned NumOps = Ret.getNumOperands();
-  // ISD::CALL results:(value0, ..., valuen, chain)
-  // ISD::RET  operands:(chain, value0, flag0, ..., valuen, flagn)
-  // Value return:
-  // Check that operand of the RET node sources from the CALL node. The RET node
-  // has at least two operands. Operand 0 holds the chain. Operand 1 holds the
-  // value.
-  // Also we need to check that there is no code in between the call and the
-  // return. Hence we also check that the incomming chain to the return sources
-  // from the outgoing chain of the call.
-  if (NumOps > 1 &&
-      IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) &&
-      Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
-    return true;
-  // void return: The RET node  has the chain result value of the CALL node as
-  // input.
-  if (NumOps == 1 &&
-      Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
-    return true;
-
-  return false;
-}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 2402f81bb04f..25a499b88968 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -62,9 +62,11 @@ namespace {
     Constant *GetFrameMap(Function &F);
     const Type* GetConcreteStackEntryType(Function &F);
     void CollectRoots(Function &F);
-    static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context, 
+                                        IRBuilder<> &B, Value *BasePtr,
                                         int Idx1, const char *Name);
-    static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+                                        IRBuilder<> &B, Value *BasePtr,
                                         int Idx1, int Idx2, const char *Name);
   };
 
@@ -93,7 +95,7 @@ namespace {
 
   public:
     EscapeEnumerator(Function &F, const char *N = "cleanup")
-      : F(F), CleanupBBName(N), State(0) {}
+      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
 
     IRBuilder<> *Next() {
       switch (State) {
@@ -136,8 +138,9 @@ namespace {
           return 0;
 
         // Create a cleanup block.
-        BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F);
-        UnwindInst *UI = new UnwindInst(CleanupBB);
+        BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
+                                                   CleanupBBName, &F);
+        UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
 
         // Transform the 'call' instructions into 'invoke's branching to the
         // cleanup block. Go in reverse order to make prettier BB names.
@@ -186,8 +189,7 @@ ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
 
 Constant *ShadowStackGC::GetFrameMap(Function &F) {
   // doInitialization creates the abstract type of this value.
-
-  Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
+  const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
 
   // Truncate the ShadowStackDescriptor if some metadata is null.
   unsigned NumMeta = 0;
@@ -200,17 +202,18 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
   }
 
   Constant *BaseElts[] = {
-    ConstantInt::get(Type::Int32Ty, Roots.size(), false),
-    ConstantInt::get(Type::Int32Ty, NumMeta, false),
+    ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
+    ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
   };
 
   Constant *DescriptorElts[] = {
-    ConstantStruct::get(BaseElts, 2),
+    ConstantStruct::get(F.getContext(), BaseElts, 2, false),
     ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
                        Metadata.begin(), NumMeta)
   };
 
-  Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
+  Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
+                                           false);
 
   std::string TypeName("gc_map.");
   TypeName += utostr(NumMeta);
@@ -229,13 +232,14 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
   //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
   //        (which uses a FunctionPassManager (which segfaults (not asserts) if
   //        provided a ModulePass))).
-  Constant *GV = new GlobalVariable(FrameMap->getType(), true,
+  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
                                     GlobalVariable::InternalLinkage,
-                                    FrameMap, "__gc_" + F.getName(),
-                                    F.getParent());
+                                    FrameMap, "__gc_" + F.getName());
 
-  Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
-                              ConstantInt::get(Type::Int32Ty, 0) };
+  Constant *GEPIndices[2] = {
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+                          };
   return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
 }
 
@@ -245,7 +249,7 @@ const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
   EltTys.push_back(StackEntryTy);
   for (size_t I = 0; I != Roots.size(); I++)
     EltTys.push_back(Roots[I].second->getAllocatedType());
-  Type *Ty = StructType::get(EltTys);
+  Type *Ty = StructType::get(F.getContext(), EltTys);
 
   std::string TypeName("gc_stackentry.");
   TypeName += F.getName();
@@ -263,9 +267,11 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   void *Meta[];     // May be absent for roots without metadata.
   // };
   std::vector<const Type*> EltTys;
-  EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :)
-  EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array.
-  StructType *FrameMapTy = StructType::get(EltTys);
+  // 32 bits is ok up to a 32GB stack frame. :)
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  // Specifies length of variable length array. 
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  StructType *FrameMapTy = StructType::get(M.getContext(), EltTys);
   M.addTypeName("gc_map", FrameMapTy);
   PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
 
@@ -274,12 +280,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   FrameMap *Map;          // Pointer to constant FrameMap.
   //   void *Roots[];          // Stack roots (in-place array, so we pretend).
   // };
-  OpaqueType *RecursiveTy = OpaqueType::get();
+  OpaqueType *RecursiveTy = OpaqueType::get(M.getContext());
 
   EltTys.clear();
   EltTys.push_back(PointerType::getUnqual(RecursiveTy));
   EltTys.push_back(FrameMapPtrTy);
-  PATypeHolder LinkTyH = StructType::get(EltTys);
+  PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys);
 
   RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
   StackEntryTy = cast<StructType>(LinkTyH.get());
@@ -292,10 +298,10 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   if (!Head) {
     // If the root chain does not exist, insert a new one with linkonce
     // linkage!
-    Head = new GlobalVariable(StackEntryPtrTy, false,
+    Head = new GlobalVariable(M, StackEntryPtrTy, false,
                               GlobalValue::LinkOnceAnyLinkage,
                               Constant::getNullValue(StackEntryPtrTy),
-                              "llvm_gc_root_chain", &M);
+                              "llvm_gc_root_chain");
   } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
     Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
     Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
@@ -338,11 +344,11 @@ void ShadowStackGC::CollectRoots(Function &F) {
 }
 
 GetElementPtrInst *
-ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
                          int Idx, int Idx2, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
-                       ConstantInt::get(Type::Int32Ty, Idx),
-                       ConstantInt::get(Type::Int32Ty, Idx2) };
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
   Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
 
   assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -351,10 +357,10 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
 }
 
 GetElementPtrInst *
-ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
                          int Idx, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
-                       ConstantInt::get(Type::Int32Ty, Idx) };
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx) };
   Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
 
   assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -364,6 +370,8 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
 
 /// runOnFunction - Insert code to maintain the shadow stack.
 bool ShadowStackGC::performCustomLowering(Function &F) {
+  LLVMContext &Context = F.getContext();
+  
   // Find calls to llvm.gcroot.
   CollectRoots(F);
 
@@ -388,13 +396,14 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
 
   // Initialize the map pointer and load the current head of the shadow stack.
   Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
-  Instruction *EntryMapPtr  = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map");
+  Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
+                                        0,1,"gc_frame.map");
                               AtEntry.CreateStore(FrameMap, EntryMapPtr);
 
   // After all the allocas...
   for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
     // For each root, find the corresponding slot in the aggregate...
-    Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
+    Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
 
     // And use it in lieu of the alloca.
     AllocaInst *OriginalAlloca = Roots[I].second;
@@ -410,17 +419,19 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
   AtEntry.SetInsertPoint(IP->getParent(), IP);
 
   // Push the entry onto the shadow stack.
-  Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next");
-  Instruction *NewHeadVal   = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
-                              AtEntry.CreateStore(CurrentHead, EntryNextPtr);
-                              AtEntry.CreateStore(NewHeadVal, Head);
+  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+                                        StackEntry,0,0,"gc_frame.next");
+  Instruction *NewHeadVal   = CreateGEP(Context, AtEntry, 
+                                        StackEntry, 0, "gc_newhead");
+  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+  AtEntry.CreateStore(NewHeadVal, Head);
 
   // For each instruction that escapes...
   EscapeEnumerator EE(F, "gc_cleanup");
   while (IRBuilder<> *AtExit = EE.Next()) {
     // Pop the entry from the shadow stack. Don't reuse CurrentHead from
     // AtEntry, since that would make the value live for the entire function.
-    Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
+    Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
                                            "gc_frame.next");
     Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
                        AtExit->CreateStore(SavedHead, Head);
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index e44a138cf925..8070570cb84b 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -158,7 +158,7 @@ void PEI::initShrinkWrappingInfo() {
   // via --shrink-wrap-func=<funcname>.
 #ifndef NDEBUG
   if (ShrinkWrapFunc != "") {
-    std::string MFName = MF->getFunction()->getName();
+    std::string MFName = MF->getFunction()->getNameStr();
     ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
   }
 #endif
@@ -185,8 +185,8 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
   initShrinkWrappingInfo();
 
   DEBUG(if (ShrinkWrapThisFunction) {
-      DOUT << "Place CSR spills/restores for "
-           << MF->getFunction()->getName() << "\n";
+      errs() << "Place CSR spills/restores for "
+             << MF->getFunction()->getName() << "\n";
     });
 
   if (calculateSets(Fn))
@@ -297,20 +297,26 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
     }
   }
 
-  DEBUG(if (ShrinkWrapDebugging >= Details) {
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << " Antic/Avail Sets:\n";
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << "iterations = " << iterations << "\n";
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
-      DOUT << "-----------------------------------------------------------\n";
-      for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
-           MBBI != MBBE; ++MBBI) {
-        MachineBasicBlock* MBB = MBBI;
-        dumpSets(MBB);
+  DEBUG({
+      if (ShrinkWrapDebugging >= Details) {
+        errs()
+          << "-----------------------------------------------------------\n"
+          << " Antic/Avail Sets:\n"
+          << "-----------------------------------------------------------\n"
+          << "iterations = " << iterations << "\n"
+          << "-----------------------------------------------------------\n"
+          << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+          << "-----------------------------------------------------------\n";
+
+        for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+             MBBI != MBBE; ++MBBI) {
+          MachineBasicBlock* MBB = MBBI;
+          dumpSets(MBB);
+        }
+
+        errs()
+          << "-----------------------------------------------------------\n";
       }
-      DOUT << "-----------------------------------------------------------\n";
     });
 }
 
@@ -357,8 +363,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // If no CSRs used, we are done.
   if (CSI.empty()) {
     DEBUG(if (ShrinkWrapThisFunction)
-            DOUT << "DISABLED: " << Fn.getFunction()->getName()
-                 << ": uses no callee-saved registers\n");
+            errs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": uses no callee-saved registers\n");
     return false;
   }
 
@@ -377,8 +383,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // implementation to functions with <= 500 MBBs.
   if (Fn.size() > 500) {
     DEBUG(if (ShrinkWrapThisFunction)
-            DOUT << "DISABLED: " << Fn.getFunction()->getName()
-                 << ": too large (" << Fn.size() << " MBBs)\n");
+            errs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": too large (" << Fn.size() << " MBBs)\n");
     ShrinkWrapThisFunction = false;
   }
 
@@ -459,8 +465,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   }
 
   if (allCSRUsesInEntryBlock) {
-    DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
-          << ": all CSRs used in EntryBlock\n");
+    DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+                 << ": all CSRs used in EntryBlock\n");
     ShrinkWrapThisFunction = false;
   } else {
     bool allCSRsUsedInEntryFanout = true;
@@ -471,8 +477,8 @@ bool PEI::calculateSets(MachineFunction &Fn) {
         allCSRsUsedInEntryFanout = false;
     }
     if (allCSRsUsedInEntryFanout) {
-      DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
-            << ": all CSRs used in imm successors of EntryBlock\n");
+      DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": all CSRs used in imm successors of EntryBlock\n");
       ShrinkWrapThisFunction = false;
     }
   }
@@ -498,9 +504,9 @@ bool PEI::calculateSets(MachineFunction &Fn) {
       if (dominatesExitNodes) {
         CSRUsedInChokePoints |= CSRUsed[MBB];
         if (CSRUsedInChokePoints == UsedCSRegs) {
-          DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
-                << ": all CSRs used in choke point(s) at "
-                << getBasicBlockName(MBB) << "\n");
+          DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+                       << ": all CSRs used in choke point(s) at "
+                       << getBasicBlockName(MBB) << "\n");
           ShrinkWrapThisFunction = false;
           break;
         }
@@ -514,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) {
     return false;
 
   DEBUG({
-      DOUT << "ENABLED: " << Fn.getFunction()->getName();
+      errs() << "ENABLED: " << Fn.getFunction()->getName();
       if (HasFastExitPath)
-        DOUT << " (fast exit path)";
-      DOUT << "\n";
+        errs() << " (fast exit path)";
+      errs() << "\n";
       if (ShrinkWrapDebugging >= BasicInfo) {
-        DOUT << "------------------------------"
+        errs() << "------------------------------"
              << "-----------------------------\n";
-        DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+        errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
         if (ShrinkWrapDebugging >= Details) {
-          DOUT << "------------------------------"
+          errs() << "------------------------------"
                << "-----------------------------\n";
           dumpAllUsed();
         }
@@ -596,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(SUCC);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              DOUT << getBasicBlockName(MBB)
+              errs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "successor " << getBasicBlockName(SUCC) << "\n");
     }
@@ -612,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(PRED);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              DOUT << getBasicBlockName(MBB)
+              errs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "predecessor " << getBasicBlockName(PRED) << "\n");
     }
@@ -650,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
         CSRUsed[EXB] |= loopSpills;
         addedUses = true;
         DEBUG(if (ShrinkWrapDebugging >= Iterations)
-                DOUT << "LOOP " << getBasicBlockName(MBB)
+                errs() << "LOOP " << getBasicBlockName(MBB)
                      << "(" << stringifyCSRegSet(loopSpills) << ")->"
                      << getBasicBlockName(EXB) << "\n");
         if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
@@ -717,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRSave[MBB]) << "\n");
 
   return placedSpills;
@@ -778,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+          errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
 
   return placedRestores;
@@ -802,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
     ++iterations;
 
     DEBUG(if (ShrinkWrapDebugging >= Iterations)
-            DOUT << "iter " << iterations
+            errs() << "iter " << iterations
                  << " --------------------------------------------------\n");
 
     // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
@@ -852,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
   unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
   numSRReduced += numSRReducedThisFunc;
   DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
-      DOUT << "-----------------------------------------------------------\n";
-      DOUT << "total iterations = " << iterations << " ( "
+      errs() << "-----------------------------------------------------------\n";
+      errs() << "total iterations = " << iterations << " ( "
            << Fn.getFunction()->getName()
            << " " << numSRReducedThisFunc
            << " " << Fn.size()
            << " )\n";
-      DOUT << "-----------------------------------------------------------\n";
+      errs() << "-----------------------------------------------------------\n";
       dumpSRSets();
-      DOUT << "-----------------------------------------------------------\n";
+      errs() << "-----------------------------------------------------------\n";
       if (numSRReducedThisFunc)
         verifySpillRestorePlacement();
     });
@@ -893,7 +899,7 @@ void PEI::findFastExitPath() {
     // Check the immediate successors.
     if (isReturnBlock(SUCC)) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << getBasicBlockName(SUCC) << "\n";
       break;
     }
@@ -911,7 +917,7 @@ void PEI::findFastExitPath() {
     }
     if (HasFastExitPath) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << exitPath << "\n";
       break;
     }
@@ -945,10 +951,10 @@ void PEI::verifySpillRestorePlacement() {
     if (spilled.empty())
       continue;
 
-    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(spilled)
-         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(spilled)
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
 
     if (CSRRestore[MBB].intersects(spilled)) {
       restored |= (CSRRestore[MBB] & spilled);
@@ -977,11 +983,11 @@ void PEI::verifySpillRestorePlacement() {
       if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
         if (restored != spilled) {
           CSRegSet notRestored = (spilled - restored);
-          DOUT << MF->getFunction()->getName() << ": "
-               << stringifyCSRegSet(notRestored)
-               << " spilled at " << getBasicBlockName(MBB)
-               << " are never restored on path to return "
-               << getBasicBlockName(SBB) << "\n";
+          DEBUG(errs() << MF->getFunction()->getName() << ": "
+                       << stringifyCSRegSet(notRestored)
+                       << " spilled at " << getBasicBlockName(MBB)
+                       << " are never restored on path to return "
+                       << getBasicBlockName(SBB) << "\n");
         }
         restored.clear();
       }
@@ -998,10 +1004,10 @@ void PEI::verifySpillRestorePlacement() {
     if (restored.empty())
       continue;
 
-    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(CSRSave[MBB])
-         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(restored) << "\n";
+    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB])
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(restored) << "\n");
 
     if (CSRSave[MBB].intersects(restored)) {
       spilled |= (CSRSave[MBB] & restored);
@@ -1025,23 +1031,24 @@ void PEI::verifySpillRestorePlacement() {
     }
     if (spilled != restored) {
       CSRegSet notSpilled = (restored - spilled);
-      DOUT << MF->getFunction()->getName() << ": "
-           << stringifyCSRegSet(notSpilled)
-           << " restored at " << getBasicBlockName(MBB)
-           << " are never spilled\n";
+      DEBUG(errs() << MF->getFunction()->getName() << ": "
+                   << stringifyCSRegSet(notSpilled)
+                   << " restored at " << getBasicBlockName(MBB)
+                   << " are never spilled\n");
     }
   }
 }
 
 // Debugging print methods.
 std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+  if (!MBB)
+    return "";
+
+  if (MBB->getBasicBlock())
+    return MBB->getBasicBlock()->getNameStr();
+
   std::ostringstream name;
-  if (MBB) {
-    if (MBB->getBasicBlock())
-      name << MBB->getBasicBlock()->getName();
-    else
-      name << "_MBB_" << MBB->getNumber();
-  }
+  name << "_MBB_" << MBB->getNumber();
   return name.str();
 }
 
@@ -1071,14 +1078,15 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
 }
 
 void PEI::dumpSet(const CSRegSet& s) {
-  DOUT << stringifyCSRegSet(s) << "\n";
+  DEBUG(errs() << stringifyCSRegSet(s) << "\n");
 }
 
 void PEI::dumpUsed(MachineBasicBlock* MBB) {
-  if (MBB) {
-    DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
-         << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
-  }
+  DEBUG({
+      if (MBB)
+        errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
+    });
 }
 
 void PEI::dumpAllUsed() {
@@ -1090,27 +1098,29 @@ void PEI::dumpAllUsed() {
 }
 
 void PEI::dumpSets(MachineBasicBlock* MBB) {
-  if (MBB) {
-    DOUT << getBasicBlockName(MBB)           << " | "
-         << stringifyCSRegSet(CSRUsed[MBB])  << " | "
-         << stringifyCSRegSet(AnticIn[MBB])  << " | "
-         << stringifyCSRegSet(AnticOut[MBB]) << " | "
-         << stringifyCSRegSet(AvailIn[MBB])  << " | "
-         << stringifyCSRegSet(AvailOut[MBB]) << "\n";
-  }
+  DEBUG({
+      if (MBB)
+        errs() << getBasicBlockName(MBB)           << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])  << " | "
+               << stringifyCSRegSet(AnticIn[MBB])  << " | "
+               << stringifyCSRegSet(AnticOut[MBB]) << " | "
+               << stringifyCSRegSet(AvailIn[MBB])  << " | "
+               << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+    });
 }
 
 void PEI::dumpSets1(MachineBasicBlock* MBB) {
-  if (MBB) {
-    DOUT << getBasicBlockName(MBB)             << " | "
-         << stringifyCSRegSet(CSRUsed[MBB])    << " | "
-         << stringifyCSRegSet(AnticIn[MBB])    << " | "
-         << stringifyCSRegSet(AnticOut[MBB])   << " | "
-         << stringifyCSRegSet(AvailIn[MBB])    << " | "
-         << stringifyCSRegSet(AvailOut[MBB])   << " | "
-         << stringifyCSRegSet(CSRSave[MBB])    << " | "
-         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
-  }
+  DEBUG({
+      if (MBB)
+        errs() << getBasicBlockName(MBB)             << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])    << " | "
+               << stringifyCSRegSet(AnticIn[MBB])    << " | "
+               << stringifyCSRegSet(AnticOut[MBB])   << " | "
+               << stringifyCSRegSet(AvailIn[MBB])    << " | "
+               << stringifyCSRegSet(AvailOut[MBB])   << " | "
+               << stringifyCSRegSet(CSRSave[MBB])    << " | "
+               << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+    });
 }
 
 void PEI::dumpAllSets() {
@@ -1122,20 +1132,21 @@ void PEI::dumpAllSets() {
 }
 
 void PEI::dumpSRSets() {
-  for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
-       MBB != E; ++MBB) {
-    if (! CSRSave[MBB].empty()) {
-      DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
-           << stringifyCSRegSet(CSRSave[MBB]);
-      if (CSRRestore[MBB].empty())
-        DOUT << "\n";
-    }
-    if (! CSRRestore[MBB].empty()) {
-      if (! CSRSave[MBB].empty())
-        DOUT << "    ";
-      DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
-           << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
-    }
-  }
+  DEBUG({
+      for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+           MBB != E; ++MBB) {
+        if (!CSRSave[MBB].empty()) {
+          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB]);
+          if (CSRRestore[MBB].empty())
+            errs() << '\n';
+        }
+
+        if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+          errs() << "    "
+                 << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+      }
+    });
 }
 #endif
diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h
new file mode 100644
index 000000000000..f69feaf9e570
--- /dev/null
+++ b/lib/CodeGen/SimpleHazardRecognizer.h
@@ -0,0 +1,89 @@
+//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SimpleHazardRecognizer class, which
+// implements hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+  /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
+  /// a coarse classification and attempts to avoid that instructions of
+  /// a given class aren't grouped too densely together.
+  class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
+    /// Class - A simple classification for SUnits.
+    enum Class {
+      Other, Load, Store
+    };
+
+    /// Window - The Class values of the most recently issued
+    /// instructions.
+    Class Window[8];
+
+    /// getClass - Classify the given SUnit.
+    Class getClass(const SUnit *SU) {
+      const MachineInstr *MI = SU->getInstr();
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.mayLoad())
+        return Load;
+      if (TID.mayStore())
+        return Store;
+      return Other;
+    }
+
+    /// Step - Rotate the existing entries in Window and insert the
+    /// given class value in position as the most recent.
+    void Step(Class C) {
+      std::copy(Window+1, array_endof(Window), Window);
+      Window[array_lengthof(Window)-1] = C;
+    }
+
+  public:
+    SimpleHazardRecognizer() : Window() {
+      Reset();
+    }
+
+    virtual HazardType getHazardType(SUnit *SU) {
+      Class C = getClass(SU);
+      if (C == Other)
+        return NoHazard;
+      unsigned Score = 0;
+      for (unsigned i = 0; i != array_lengthof(Window); ++i)
+        if (Window[i] == C)
+          Score += i + 1;
+      if (Score > array_lengthof(Window) * 2)
+        return Hazard;
+      return NoHazard;
+    }
+
+    virtual void Reset() {
+      for (unsigned i = 0; i != array_lengthof(Window); ++i)
+        Window[i] = Other;
+    }
+
+    virtual void EmitInstruction(SUnit *SU) {
+      Step(getClass(SU));
+    }
+
+    virtual void AdvanceCycle() {
+      Step(Other);
+    }
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 7e7d6b8f68f1..9c283b0f0234 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -17,6 +17,7 @@
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -28,6 +29,8 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -51,13 +54,8 @@ EnableJoining("join-liveintervals",
               cl::init(true));
 
 static cl::opt<bool>
-NewHeuristic("new-coalescer-heuristic",
-             cl::desc("Use new coalescer heuristic"),
-             cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-CrossClassJoin("join-cross-class-copies",
-               cl::desc("Coalesce cross register class copies"),
+DisableCrossClassJoin("disable-cross-class-join",
+               cl::desc("Avoid coalescing cross register class copies"),
                cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
@@ -65,7 +63,7 @@ PhysJoinTweak("tweak-phys-join-heuristics",
                cl::desc("Tweak heuristics for joining phys reg with vr"),
                cl::init(false), cl::Hidden);
 
-static RegisterPass<SimpleRegisterCoalescing> 
+static RegisterPass<SimpleRegisterCoalescing>
 X("simple-register-coalescing", "Simple Register Coalescing");
 
 // Declare that we implement the RegisterCoalescer interface
@@ -74,6 +72,8 @@ static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
 const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
 
 void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
   AU.addRequired<MachineLoopInfo>();
@@ -105,22 +105,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
 bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
                                                     LiveInterval &IntB,
                                                     MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
 
   // BValNo is a value number in B that is defined by a copy from A.  'B3' in
   // the example above.
   LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
   assert(BLR != IntB.end() && "Live range not found!");
   VNInfo *BValNo = BLR->valno;
-  
+
   // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we 
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->copy) return false;
+  if (!BValNo->getCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-  
+
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+  LiveIndex CopyUseIdx = li_->getUseIndex(CopyIdx);
+  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
   // If it's re-defined by an early clobber somewhere in the live range, then
@@ -143,26 +144,28 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // The coalescer has no idea there was a def in the middle of [174,230].
   if (AValNo->hasRedefByEC())
     return false;
-  
-  // If AValNo is defined as a copy from IntB, we can potentially process this.  
+
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
   // Get the instruction that defines this value number.
   unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
   if (!SrcReg) return false;  // Not defined by a copy.
-    
+
   // If the value number is not defined by a copy instruction, ignore it.
 
   // If the source register comes from an interval other than IntB, we can't
   // handle this.
   if (SrcReg != IntB.reg) return false;
-  
+
   // Get the LiveRange in IntB that this value number starts with.
-  LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1);
+  LiveInterval::iterator ValLR =
+    IntB.FindLiveRangeContaining(li_->getPrevSlot(AValNo->def));
   assert(ValLR != IntB.end() && "Live range not found!");
-  
+
   // Make sure that the end of the live range is inside the same block as
   // CopyMI.
-  MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1);
-  if (!ValLREndInst || 
+  MachineInstr *ValLREndInst =
+    li_->getInstructionFromIndex(li_->getPrevSlot(ValLR->end));
+  if (!ValLREndInst ||
       ValLREndInst->getParent() != CopyMI->getParent()) return false;
 
   // Okay, we now know that ValLR ends in the same block that the CopyMI
@@ -177,28 +180,33 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
       *tri_->getSubRegisters(IntB.reg)) {
     for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
       if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
-        DOUT << "Interfere with sub-register ";
-        DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+        DEBUG({
+            errs() << "Interfere with sub-register ";
+            li_->getInterval(*SR).print(errs(), tri_);
+          });
         return false;
       }
   }
-  
-  DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
-  
-  unsigned FillerStart = ValLR->end, FillerEnd = BLR->start;
+
+  DEBUG({
+      errs() << "\nExtending: ";
+      IntB.print(errs(), tri_);
+    });
+
+  LiveIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
   // We are about to delete CopyMI, so need to remove it as the 'instruction
   // that defines this value #'. Update the the valnum with the new defining
   // instruction #.
   BValNo->def  = FillerStart;
-  BValNo->copy = NULL;
-  
+  BValNo->setCopy(0);
+
   // Okay, we can merge them.  We need to insert a new liverange:
   // [ValLR.end, BLR.begin) of either value number, then we merge the
   // two value numbers.
   IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
 
   // If the IntB live range is assigned to a physical register, and if that
-  // physreg has sub-registers, update their live intervals as well. 
+  // physreg has sub-registers, update their live intervals as well.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
     for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
       LiveInterval &SRLI = li_->getInterval(*SR);
@@ -213,17 +221,26 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     IntB.addKills(ValLR->valno, BValNo->kills);
     IntB.MergeValueNumberInto(BValNo, ValLR->valno);
   }
-  DOUT << "   result = "; IntB.print(DOUT, tri_);
-  DOUT << "\n";
+  DEBUG({
+      errs() << "   result = ";
+      IntB.print(errs(), tri_);
+      errs() << "\n";
+    });
 
   // If the source instruction was killing the source register before the
   // merge, unset the isKill marker given the live range has been extended.
   int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
   if (UIdx != -1) {
     ValLREndInst->getOperand(UIdx).setIsKill(false);
-    IntB.removeKill(ValLR->valno, FillerStart);
+    ValLR->valno->removeKill(FillerStart);
   }
 
+  // If the copy instruction was killing the destination register before the
+  // merge, find the last use and trim the live range. That will also add the
+  // isKill marker.
+  if (CopyMI->killsRegister(IntA.reg))
+    TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
+
   ++numExtends;
   return true;
 }
@@ -253,6 +270,16 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
   return false;
 }
 
+static void
+TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
+  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+       i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      NewMI->addOperand(MO);
+  }
+}
+
 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA
 /// being the source and IntB being the dest, thus this defines a value number
 /// in IntB.  If the source value number (in IntA) is defined by a commutable
@@ -279,7 +306,8 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
 bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
                                                         LiveInterval &IntB,
                                                         MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx =
+    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
 
   // FIXME: For now, only eliminate the copy by commuting its def when the
   // source register is a virtual register. We want to guard against cases
@@ -293,15 +321,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
   assert(BLR != IntB.end() && "Live range not found!");
   VNInfo *BValNo = BLR->valno;
-  
+
   // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we 
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->copy) return false;
+  if (!BValNo->getCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-  
+
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+  LiveInterval::iterator ALR =
+    IntA.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx));
+
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
@@ -312,9 +342,23 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
   const TargetInstrDesc &TID = DefMI->getDesc();
-  unsigned NewDstIdx;
-  if (!TID.isCommutable() ||
-      !tii_->CommuteChangesDestination(DefMI, NewDstIdx))
+  if (!TID.isCommutable())
+    return false;
+  // If DefMI is a two-address instruction then commuting it will change the
+  // destination register.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+  assert(DefIdx != -1);
+  unsigned UseOpIdx;
+  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+    return false;
+  unsigned Op1, Op2, NewDstIdx;
+  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+    return false;
+  if (Op1 == UseOpIdx)
+    NewDstIdx = Op2;
+  else if (Op2 == UseOpIdx)
+    NewDstIdx = Op1;
+  else
     return false;
 
   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
@@ -332,7 +376,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
          UE = mri_->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
-    unsigned UseIdx = li_->getInstructionIndex(UseMI);
+    LiveIndex UseIdx = li_->getInstructionIndex(UseMI);
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end())
       continue;
@@ -356,8 +400,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
 
   bool BHasPHIKill = BValNo->hasPHIKill();
   SmallVector<VNInfo*, 4> BDeadValNos;
-  SmallVector<unsigned, 4> BKills;
-  std::map<unsigned, unsigned> BExtend;
+  VNInfo::KillSet BKills;
+  std::map<LiveIndex, LiveIndex> BExtend;
 
   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
   // A = or A, B
@@ -384,7 +428,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     ++UI;
     if (JoinedCopies.count(UseMI))
       continue;
-    unsigned UseIdx = li_->getInstructionIndex(UseMI);
+    LiveIndex UseIdx= li_->getUseIndex(li_->getInstructionIndex(UseMI));
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
       continue;
@@ -395,7 +439,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       if (Extended)
         UseMO.setIsKill(false);
       else
-        BKills.push_back(li_->getUseIndex(UseIdx)+1);
+        BKills.push_back(li_->getNextSlot(UseIdx));
     }
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -404,7 +448,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       // This copy will become a noop. If it's defining a new val#,
       // remove that val# as well. However this live range is being
       // extended to the end of the existing live range defined by the copy.
-      unsigned DefIdx = li_->getDefIndex(UseIdx);
+      LiveIndex DefIdx = li_->getDefIndex(UseIdx);
       const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
       BHasPHIKill |= DLR->valno->hasPHIKill();
       assert(DLR->valno->def == DefIdx);
@@ -420,7 +464,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
 
   // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
   // simply extend BLR if CopyMI doesn't end the range.
-  DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
+  DEBUG({
+      errs() << "\nExtending: ";
+      IntB.print(errs(), tri_);
+    });
 
   // Remove val#'s defined by copies that will be coalesced away.
   for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
@@ -439,24 +486,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   // is updated. Kills are also updated.
   VNInfo *ValNo = BValNo;
   ValNo->def = AValNo->def;
-  ValNo->copy = NULL;
+  ValNo->setCopy(0);
   for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
-    unsigned Kill = ValNo->kills[j];
-    if (Kill != BLR->end)
-      BKills.push_back(Kill);
+    if (ValNo->kills[j] != BLR->end)
+      BKills.push_back(ValNo->kills[j]);
   }
   ValNo->kills.clear();
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
-    unsigned End = AI->end;
-    std::map<unsigned, unsigned>::iterator EI = BExtend.find(End);
+    LiveIndex End = AI->end;
+    std::map<LiveIndex, LiveIndex>::iterator
+      EI = BExtend.find(End);
     if (EI != BExtend.end())
       End = EI->second;
     IntB.addRange(LiveRange(AI->start, End, ValNo));
 
     // If the IntB live range is assigned to a physical register, and if that
-    // physreg has sub-registers, update their live intervals as well. 
+    // physreg has sub-registers, update their live intervals as well.
     if (BHasSubRegs) {
       for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
         LiveInterval &SRLI = li_->getInterval(*SR);
@@ -467,13 +514,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   IntB.addKills(ValNo, BKills);
   ValNo->setHasPHIKill(BHasPHIKill);
 
-  DOUT << "   result = "; IntB.print(DOUT, tri_);
-  DOUT << "\n";
+  DEBUG({
+      errs() << "   result = ";
+      IntB.print(errs(), tri_);
+      errs() << '\n';
+      errs() << "\nShortening: ";
+      IntA.print(errs(), tri_);
+    });
 
-  DOUT << "\nShortening: "; IntA.print(DOUT, tri_);
   IntA.removeValNo(AValNo);
-  DOUT << "   result = "; IntA.print(DOUT, tri_);
-  DOUT << "\n";
+
+  DEBUG({
+      errs() << "   result = ";
+      IntA.print(errs(), tri_);
+      errs() << '\n';
+    });
 
   ++numCommutes;
   return true;
@@ -495,7 +550,8 @@ static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
 /// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
 /// from a physical register live interval as well as from the live intervals
 /// of its sub-registers.
-static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
+static void removeRange(LiveInterval &li,
+                        LiveIndex Start, LiveIndex End,
                         LiveIntervals *li_, const TargetRegisterInfo *tri_) {
   li.removeRange(Start, End, true);
   if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
@@ -503,14 +559,15 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
       if (!li_->hasInterval(*SR))
         continue;
       LiveInterval &sli = li_->getInterval(*SR);
-      unsigned RemoveEnd = Start;
+      LiveIndex RemoveStart = Start;
+      LiveIndex RemoveEnd = Start;
       while (RemoveEnd != End) {
-        LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start);
+        LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart);
         if (LR == sli.end())
           break;
         RemoveEnd = (LR->end < End) ? LR->end : End;
-        sli.removeRange(Start, RemoveEnd, true);
-        Start = RemoveEnd;
+        sli.removeRange(RemoveStart, RemoveEnd, true);
+        RemoveStart = RemoveEnd;
       }
     }
   }
@@ -520,14 +577,14 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
 /// as the copy instruction, trim the live interval to the last use and return
 /// true.
 bool
-SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
                                                     MachineBasicBlock *CopyMBB,
                                                     LiveInterval &li,
                                                     const LiveRange *LR) {
-  unsigned MBBStart = li_->getMBBStartIdx(CopyMBB);
-  unsigned LastUseIdx;
-  MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg,
-                                            LastUseIdx);
+  LiveIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
+  LiveIndex LastUseIdx;
+  MachineOperand *LastUse =
+    lastRegisterUse(LR->start, li_->getPrevSlot(CopyIdx), li.reg, LastUseIdx);
   if (LastUse) {
     MachineInstr *LastUseMI = LastUse->getParent();
     if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
@@ -547,7 +604,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
     // of last use.
     LastUse->setIsKill();
     removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_);
-    li.addKill(LR->valno, LastUseIdx+1);
+    LR->valno->addKill(li_->getNextSlot(LastUseIdx));
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
         DstReg == li.reg) {
@@ -560,7 +617,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
 
   // Is it livein?
   if (LR->start <= MBBStart && LR->end > MBBStart) {
-    if (LR->start == 0) {
+    if (LR->start == LiveIndex()) {
       assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
       // Live-in to the function but dead. Remove it from entry live-in set.
       mf_->begin()->removeLiveIn(li.reg);
@@ -575,8 +632,9 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
 /// computation, replace the copy by rematerialize the definition.
 bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
                                                        unsigned DstReg,
+                                                       unsigned DstSubIdx,
                                                        MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
   LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
@@ -590,24 +648,52 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   const TargetInstrDesc &TID = DefMI->getDesc();
   if (!TID.isAsCheapAsAMove())
     return false;
-  if (!DefMI->getDesc().isRematerializable() ||
-      !tii_->isTriviallyReMaterializable(DefMI))
+  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
     return false;
   bool SawStore = false;
-  if (!DefMI->isSafeToMove(tii_, SawStore))
+  if (!DefMI->isSafeToMove(tii_, SawStore, AA))
+    return false;
+  if (TID.getNumDefs() != 1)
     return false;
+  if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+    // Make sure the copy destination register class fits the instruction
+    // definition register class. The mismatch can happen as a result of earlier
+    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+    const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_);
+    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      if (mri_->getRegClass(DstReg) != RC)
+        return false;
+    } else if (!RC->contains(DstReg))
+      return false;
+  }
 
-  unsigned DefIdx = li_->getDefIndex(CopyIdx);
+  // If destination register has a sub-register index on it, make sure it mtches
+  // the instruction register class.
+  if (DstSubIdx) {
+    const TargetInstrDesc &TID = DefMI->getDesc();
+    if (TID.getNumDefs() != 1)
+      return false;
+    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+    const TargetRegisterClass *DstSubRC =
+      DstRC->getSubRegisterRegClass(DstSubIdx);
+    const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_);
+    if (DefRC == DstRC)
+      DstSubIdx = 0;
+    else if (DefRC != DstSubRC)
+      return false;
+  }
+
+  LiveIndex DefIdx = li_->getDefIndex(CopyIdx);
   const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
-  DLR->valno->copy = NULL;
+  DLR->valno->setCopy(0);
   // Don't forget to update sub-register intervals.
   if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
     for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
       if (!li_->hasInterval(*SR))
         continue;
       DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
-      if (DLR && DLR->valno->copy == CopyMI)
-        DLR->valno->copy = NULL;
+      if (DLR && DLR->valno->getCopy() == CopyMI)
+        DLR->valno->setCopy(0);
     }
   }
 
@@ -621,7 +707,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     }
 
   MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
-  tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI);
   MachineInstr *NewMI = prior(MII);
 
   if (checkForDeadDef) {
@@ -630,7 +716,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     // should mark it dead:
     if (DefMI->getParent() == MBB) {
       DefMI->addRegisterDead(SrcInt.reg, tri_);
-      SrcLR->end = SrcLR->start + 1;
+      SrcLR->end = li_->getNextSlot(SrcLR->start);
     }
   }
 
@@ -644,11 +730,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     if (MO.isDef() && li_->hasInterval(MO.getReg())) {
       unsigned Reg = MO.getReg();
       DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
-      if (DLR && DLR->valno->copy == CopyMI)
-        DLR->valno->copy = NULL;
+      if (DLR && DLR->valno->getCopy() == CopyMI)
+        DLR->valno->setCopy(0);
     }
   }
 
+  TransferImplicitOps(CopyMI, NewMI);
   li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
@@ -657,30 +744,6 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   return true;
 }
 
-/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy.
-///
-bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
-                                              unsigned DstReg) const {
-  MachineBasicBlock *MBB = CopyMI->getParent();
-  const MachineLoop *L = loopInfo->getLoopFor(MBB);
-  if (!L)
-    return false;
-  if (MBB != L->getLoopLatch())
-    return false;
-
-  LiveInterval &LI = li_->getInterval(DstReg);
-  unsigned DefIdx = li_->getInstructionIndex(CopyMI);
-  LiveInterval::const_iterator DstLR =
-    LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx));
-  if (DstLR == LI.end())
-    return false;
-  unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
-  if (DstLR->valno->kills.size() == 1 &&
-      DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill())
-    return true;
-  return false;
-}
-
 /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
 /// update the subregister number if it is not zero. If DstReg is a
 /// physical register and the existing subregister number of the def / use
@@ -714,7 +777,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
           CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
         // If the use is a copy and it won't be coalesced away, and its source
         // is defined by a trivial computation, try to rematerialize it instead.
-        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI))
+        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
+                                    CopyDstSubIdx, UseMI))
           continue;
       }
 
@@ -751,44 +815,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
         (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
          allocatableRegs_[CopyDstReg])) {
       LiveInterval &LI = li_->getInterval(CopyDstReg);
-      unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI));
+      LiveIndex DefIdx =
+        li_->getDefIndex(li_->getInstructionIndex(UseMI));
       if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) {
         if (DLR->valno->def == DefIdx)
-          DLR->valno->copy = UseMI;
+          DLR->valno->setCopy(UseMI);
       }
     }
   }
 }
 
-/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining"
-/// registers due to insert_subreg coalescing. e.g.
-/// r1024 = op
-/// r1025 = implicit_def
-/// r1025 = insert_subreg r1025, r1024
-///       = op r1025
-/// =>
-/// r1025 = op
-/// r1025 = implicit_def
-/// r1025 = insert_subreg r1025, r1025
-///       = op r1025
-void
-SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) {
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
-         E = mri_->reg_end(); I != E; ) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *DefMI = &*I;
-    ++I;
-    if (!O.isDef())
-      continue;
-    if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
-      continue;
-    if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI)))
-      continue;
-    li_->RemoveMachineInstrFromMaps(DefMI);
-    DefMI->eraseFromParent();
-  }
-}
-
 /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
 /// due to live range lengthening as the result of coalescing.
 void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
@@ -796,12 +832,27 @@ void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
          UE = mri_->use_end(); UI != UE; ++UI) {
     MachineOperand &UseMO = UI.getOperand();
-    if (UseMO.isKill()) {
-      MachineInstr *UseMI = UseMO.getParent();
-      unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
-      const LiveRange *UI = LI.getLiveRangeContaining(UseIdx);
-      if (!UI || !LI.isKill(UI->valno, UseIdx+1))
-        UseMO.setIsKill(false);
+    if (!UseMO.isKill())
+      continue;
+    MachineInstr *UseMI = UseMO.getParent();
+    LiveIndex UseIdx =
+      li_->getUseIndex(li_->getInstructionIndex(UseMI));
+    const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
+    if (!LR ||
+        (!LR->valno->isKill(li_->getNextSlot(UseIdx)) &&
+         LR->valno->def != li_->getNextSlot(UseIdx))) {
+      // Interesting problem. After coalescing reg1027's def and kill are both
+      // at the same point:  %reg1027,0.000000e+00 = [56,814:0)  0@70-(814)
+      //
+      // bb5:
+      // 60   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
+      // 68   %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0
+      // 76   t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def>
+      // 84   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
+      // 96   t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill>
+      //
+      // Do not remove the kill marker on t2LDRi12.
+      UseMO.setIsKill(false);
     }
   }
 }
@@ -830,15 +881,16 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
 /// Return true if live interval is removed.
 bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
                                                         MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+  LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI);
   LiveInterval::iterator MLR =
     li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx));
   if (MLR == li.end())
     return false;  // Already removed by ShortenDeadCopySrcLiveRange.
-  unsigned RemoveStart = MLR->start;
-  unsigned RemoveEnd = MLR->end;
+  LiveIndex RemoveStart = MLR->start;
+  LiveIndex RemoveEnd = MLR->end;
+  LiveIndex DefIdx = li_->getDefIndex(CopyIdx);
   // Remove the liverange that's defined by this.
-  if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) {
+  if (RemoveStart == DefIdx && RemoveEnd == li_->getNextSlot(DefIdx)) {
     removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
     return removeIntervalIfEmpty(li, li_, tri_);
   }
@@ -849,7 +901,7 @@ bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
 /// the val# it defines. If the live interval becomes empty, remove it as well.
 bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
                                              MachineInstr *DefMI) {
-  unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
+  LiveIndex DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
   LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
   if (DefIdx != MLR->valno->def)
     return false;
@@ -860,17 +912,18 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
 /// PropagateDeadness - Propagate the dead marker to the instruction which
 /// defines the val#.
 static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
-                              unsigned &LRStart, LiveIntervals *li_,
+                              LiveIndex &LRStart, LiveIntervals *li_,
                               const TargetRegisterInfo* tri_) {
   MachineInstr *DefMI =
     li_->getInstructionFromIndex(li_->getDefIndex(LRStart));
   if (DefMI && DefMI != CopyMI) {
-    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_);
-    if (DeadIdx != -1) {
+    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false);
+    if (DeadIdx != -1)
       DefMI->getOperand(DeadIdx).setIsDead();
-      // A dead def should have a single cycle interval.
-      ++LRStart;
-    }
+    else
+      DefMI->addOperand(MachineOperand::CreateReg(li.reg,
+                                                  true, true, false, true));
+    LRStart = li_->getNextSlot(LRStart);
   }
 }
 
@@ -881,8 +934,8 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
 bool
 SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
                                                       MachineInstr *CopyMI) {
-  unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
-  if (CopyIdx == 0) {
+  LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+  if (CopyIdx == LiveIndex()) {
     // FIXME: special case: function live in. It can be a general case if the
     // first instruction index starts at > 0 value.
     assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
@@ -894,13 +947,14 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
     return removeIntervalIfEmpty(li, li_, tri_);
   }
 
-  LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1);
+  LiveInterval::iterator LR =
+    li.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx));
   if (LR == li.end())
     // Livein but defined by a phi.
     return false;
 
-  unsigned RemoveStart = LR->start;
-  unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1;
+  LiveIndex RemoveStart = LR->start;
+  LiveIndex RemoveEnd = li_->getNextSlot(li_->getDefIndex(CopyIdx));
   if (LR->end > RemoveEnd)
     // More uses past this copy? Nothing to do.
     return false;
@@ -911,22 +965,25 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
   if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
     return false;
 
+  // There are other kills of the val#. Nothing to do.
+  if (!li.isOnlyLROfValNo(LR))
+    return false;
+
   MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
   if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
     // If the live range starts in another mbb and the copy mbb is not a fall
     // through mbb, then we can only cut the range from the beginning of the
     // copy mbb.
-    RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1;
+    RemoveStart = li_->getNextSlot(li_->getMBBStartIdx(CopyMBB));
 
   if (LR->valno->def == RemoveStart) {
     // If the def MI defines the val# and this copy is the only kill of the
     // val#, then propagate the dead marker.
-    if (li.isOnlyLROfValNo(LR)) {
-      PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
-      ++numDeadValNo;
-    }
-    if (li.isKill(LR->valno, RemoveEnd))
-      li.removeKill(LR->valno, RemoveEnd);
+    PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
+    ++numDeadValNo;
+
+    if (LR->valno->isKill(RemoveEnd))
+      LR->valno->removeKill(RemoveEnd);
   }
 
   removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
@@ -940,97 +997,19 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
                                                      LiveInterval &ImpLi) const{
   if (!CopyMI->killsRegister(ImpLi.reg))
     return false;
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
-  if (LR == li.end())
-    return false;
-  if (LR->valno->hasPHIKill())
-    return false;
-  if (LR->valno->def != CopyIdx)
-    return false;
-  // Make sure all of val# uses are copies.
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg),
+  // Make sure this is the only use.
+  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg),
          UE = mri_->use_end(); UI != UE;) {
     MachineInstr *UseMI = &*UI;
     ++UI;
-    if (JoinedCopies.count(UseMI))
-      continue;
-    unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
-    LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
-    if (ULR == li.end() || ULR->valno != LR->valno)
+    if (CopyMI == UseMI || JoinedCopies.count(UseMI))
       continue;
-    // If the use is not a use, then it's not safe to coalesce the move.
-    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-    if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
-      if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG &&
-          UseMI->getOperand(1).getReg() == li.reg)
-        continue;
-      return false;
-    }
+    return false;
   }
   return true;
 }
 
 
-/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
-/// implicit_def and it is being removed. Turn all copies from this value#
-/// into implicit_defs.
-void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li,
-                                                            VNInfo *VNI) {
-  SmallVector<MachineInstr*, 4> ImpDefs;
-  MachineOperand *LastUse = NULL;
-  unsigned LastUseIdx = li_->getUseIndex(VNI->def);
-  for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg),
-         RE = mri_->reg_end(); RI != RE;) {
-    MachineOperand *MO = &RI.getOperand();
-    MachineInstr *MI = &*RI;
-    ++RI;
-    if (MO->isDef()) {
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-        ImpDefs.push_back(MI);
-      continue;
-    }
-    if (JoinedCopies.count(MI))
-      continue;
-    unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI));
-    LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
-    if (ULR == li.end() || ULR->valno != VNI)
-      continue;
-    // If the use is a copy, turn it into an identity copy.
-    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-    if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-        SrcReg == li.reg) {
-      // Change it to an implicit_def.
-      MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-      for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i)
-        MI->RemoveOperand(i);
-      // It's no longer a copy, update the valno it defines.
-      unsigned DefIdx = li_->getDefIndex(UseIdx);
-      LiveInterval &DstInt = li_->getInterval(DstReg);
-      LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx);
-      assert(DLR != DstInt.end() && "Live range not found!");
-      assert(DLR->valno->copy == MI);
-      DLR->valno->copy = NULL;
-      ReMatCopies.insert(MI);
-    } else if (UseIdx > LastUseIdx) {
-      LastUseIdx = UseIdx;
-      LastUse = MO;
-    }
-  }
-  if (LastUse) {
-    LastUse->setIsKill();
-    li.addKill(VNI, LastUseIdx+1);
-  } else {
-    // Remove dead implicit_def's.
-    while (!ImpDefs.empty()) {
-      MachineInstr *ImpDef = ImpDefs.back();
-      ImpDefs.pop_back();
-      li_->RemoveMachineInstrFromMaps(ImpDef);
-      ImpDef->eraseFromParent();
-    }
-  }
-}
-
 /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
 /// a virtual destination register with physical source register.
 bool
@@ -1051,13 +1030,14 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
 
   // If the virtual register live interval extends into a loop, turn down
   // aggressiveness.
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex CopyIdx =
+    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
   const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
   if (!L) {
     // Let's see if the virtual register live interval extends into the loop.
     LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
     assert(DLR != DstInt.end() && "Live range not found!");
-    DLR = DstInt.FindLiveRangeContaining(DLR->end+1);
+    DLR = DstInt.FindLiveRangeContaining(li_->getNextSlot(DLR->end));
     if (DLR != DstInt.end()) {
       CopyMBB = li_->getMBBFromIndex(DLR->start);
       L = loopInfo->getLoopFor(CopyMBB);
@@ -1067,7 +1047,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
   if (!L || Length <= Threshold)
     return true;
 
-  unsigned UseIdx = li_->getUseIndex(CopyIdx);
+  LiveIndex UseIdx = li_->getUseIndex(CopyIdx);
   LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
   MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
   if (loopInfo->getLoopFor(SMBB) != L) {
@@ -1080,7 +1060,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
       if (SuccMBB == CopyMBB)
         continue;
       if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
-                          li_->getMBBEndIdx(SuccMBB)+1))
+                          li_->getNextSlot(li_->getMBBEndIdx(SuccMBB))))
         return false;
     }
   }
@@ -1111,11 +1091,12 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
 
   // If the virtual register live interval is defined or cross a loop, turn
   // down aggressiveness.
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  unsigned UseIdx = li_->getUseIndex(CopyIdx);
+  LiveIndex CopyIdx =
+    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  LiveIndex UseIdx = li_->getUseIndex(CopyIdx);
   LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
   assert(SLR != SrcInt.end() && "Live range not found!");
-  SLR = SrcInt.FindLiveRangeContaining(SLR->start-1);
+  SLR = SrcInt.FindLiveRangeContaining(li_->getPrevSlot(SLR->start));
   if (SLR == SrcInt.end())
     return true;
   MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
@@ -1135,7 +1116,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
       if (PredMBB == SMBB)
         continue;
       if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
-                          li_->getMBBEndIdx(PredMBB)+1))
+                          li_->getNextSlot(li_->getMBBEndIdx(PredMBB))))
         return false;
     }
   }
@@ -1236,14 +1217,18 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   LiveInterval &RHS = li_->getInterval(SrcReg);
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
-    DOUT << "Interfere with register ";
-    DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_));
+    DEBUG({
+        errs() << "Interfere with register ";
+        li_->getInterval(RealDstReg).print(errs(), tri_);
+      });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
-      DOUT << "Interfere with sub-register ";
-      DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+      DEBUG({
+          errs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(errs(), tri_);
+        });
       return false; // Not coalescable
     }
   return true;
@@ -1263,14 +1248,18 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   LiveInterval &RHS = li_->getInterval(DstReg);
   if (li_->hasInterval(RealSrcReg) &&
       RHS.overlaps(li_->getInterval(RealSrcReg))) {
-    DOUT << "Interfere with register ";
-    DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_));
+    DEBUG({
+        errs() << "Interfere with register ";
+        li_->getInterval(RealSrcReg).print(errs(), tri_);
+      });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
-      DOUT << "Interfere with sub-register ";
-      DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+      DEBUG({
+          errs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(errs(), tri_);
+        });
       return false; // Not coalescable
     }
   return true;
@@ -1299,7 +1288,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
     return false; // Already done.
 
-  DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI;
+  DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
 
   unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
   bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
@@ -1312,41 +1301,43 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     SrcReg    = CopyMI->getOperand(1).getReg();
     SrcSubIdx = CopyMI->getOperand(2).getImm();
   } else if (isInsSubReg || isSubRegToReg) {
-    if (CopyMI->getOperand(2).getSubReg()) {
-      DOUT << "\tSource of insert_subreg is already coalesced "
-           << "to another register.\n";
-      return false;  // Not coalescable.
-    }
     DstReg    = CopyMI->getOperand(0).getReg();
     DstSubIdx = CopyMI->getOperand(3).getImm();
     SrcReg    = CopyMI->getOperand(2).getReg();
+    SrcSubIdx = CopyMI->getOperand(2).getSubReg();
+    if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
+      // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
+      // coalesced to a larger register so the subreg indices cancel out.
+      DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already "
+                      "coalesced to another register.\n");
+      return false;  // Not coalescable.
+    }
   } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
-    assert(0 && "Unrecognized copy instruction!");
-    return false;
+    llvm_unreachable("Unrecognized copy instruction!");
   }
 
   // If they are already joined we continue.
   if (SrcReg == DstReg) {
-    DOUT << "\tCopy already coalesced.\n";
+    DEBUG(errs() << "\tCopy already coalesced.\n");
     return false;  // Not coalescable.
   }
-  
+
   bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
   bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
 
   // If they are both physical registers, we cannot join them.
   if (SrcIsPhys && DstIsPhys) {
-    DOUT << "\tCan not coalesce physregs.\n";
+    DEBUG(errs() << "\tCan not coalesce physregs.\n");
     return false;  // Not coalescable.
   }
-  
+
   // We only join virtual registers with allocatable physical registers.
   if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
-    DOUT << "\tSrc reg is unallocatable physreg.\n";
+    DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
   if (DstIsPhys && !allocatableRegs_[DstReg]) {
-    DOUT << "\tDst reg is unallocatable physreg.\n";
+    DEBUG(errs() << "\tDst reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
 
@@ -1360,9 +1351,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
     assert(DstSubRC && "Illegal subregister index");
     if (!DstSubRC->contains(SrcSubReg)) {
-      DOUT << "\tIncompatible destination regclass: "
-           << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName()
-           << ".\n";
+      DEBUG(errs() << "\tIncompatible destination regclass: "
+                   << tri_->getName(SrcSubReg) << " not in "
+                   << DstSubRC->getName() << ".\n");
       return false;             // Not coalescable.
     }
   }
@@ -1377,15 +1368,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
     assert(SrcSubRC && "Illegal subregister index");
     if (!SrcSubRC->contains(DstReg)) {
-      DOUT << "\tIncompatible source regclass: "
-           << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName()
-           << ".\n";
+      DEBUG(errs() << "\tIncompatible source regclass: "
+                   << tri_->getName(DstSubReg) << " not in "
+                   << SrcSubRC->getName() << ".\n");
+      (void)DstSubReg;
       return false;             // Not coalescable.
     }
   }
 
   // Should be non-null only when coalescing to a sub-register class.
   bool CrossRC = false;
+  const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
+  const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
   const TargetRegisterClass *NewRC = NULL;
   MachineBasicBlock *CopyMBB = CopyMI->getParent();
   unsigned RealDstReg = 0;
@@ -1400,7 +1394,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (DstSubIdx != SubIdx) {
-          DOUT << "\t Sub-register indices mismatch.\n";
+          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1413,7 +1407,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (SrcSubIdx != SubIdx) {
-          DOUT << "\t Sub-register indices mismatch.\n";
+          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1422,8 +1416,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     } else if ((DstIsPhys && isExtSubReg) ||
                (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
       if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
-        DOUT << "\tSrc of extract_subreg already coalesced with reg"
-             << " of a super-class.\n";
+        DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg"
+                     << " of a super-class.\n");
         return false; // Not coalescable.
       }
 
@@ -1446,11 +1440,22 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           // class as the would be resulting register.
           SubIdx = 0;
         else {
-          DOUT << "\t Sub-register indices mismatch.\n";
+          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       }
       if (SubIdx) {
+        if (!DstIsPhys && !SrcIsPhys) {
+          if (isInsSubReg || isSubRegToReg) {
+            NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
+          } else // extract_subreg {
+            NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
+          }
+        if (!NewRC) {
+          DEBUG(errs() << "\t Conflicting sub-register indices.\n");
+          return false;  // Not coalescable
+        }
+
         unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
         unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
         unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
@@ -1461,7 +1466,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       }
     }
   } else if (differingRegisterClasses(SrcReg, DstReg)) {
-    if (!CrossClassJoin)
+    if (DisableCrossClassJoin)
       return false;
     CrossRC = true;
 
@@ -1502,11 +1507,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       }
     }
 
-    const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
-    const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
     unsigned LargeReg = SrcReg;
     unsigned SmallReg = DstReg;
-    unsigned Limit = 0;
 
     // Now determine the register class of the joined register.
     if (isExtSubReg) {
@@ -1517,13 +1519,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         Again = true;
         return false;
       }
-      Limit = allocatableRCRegs_[DstRC].count();
+      if (!DstIsPhys && !SrcIsPhys)
+        NewRC = SrcRC;
     } else if (!SrcIsPhys && !DstIsPhys) {
       NewRC = getCommonSubClass(SrcRC, DstRC);
       if (!NewRC) {
-        DOUT << "\tDisjoint regclasses: "
-             << SrcRC->getName() << ", "
-             << DstRC->getName() << ".\n";
+        DEBUG(errs() << "\tDisjoint regclasses: "
+                     << SrcRC->getName() << ", "
+                     << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
       }
       if (DstRC->getSize() > SrcRC->getSize())
@@ -1537,7 +1540,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (isExtSubReg || DstRC->isASubClass()) &&
         !isWinToJoinCrossClass(LargeReg, SmallReg,
                                allocatableRCRegs_[NewRC].count())) {
-      DOUT << "\tSrc/Dest are different register classes.\n";
+      DEBUG(errs() << "\tSrc/Dest are different register classes.\n");
       // Allow the coalescer to try again in case either side gets coalesced to
       // a physical register that's compatible with the other side. e.g.
       // r1024 = MOV32to32_ r1025
@@ -1552,15 +1555,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;
   if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
     return false;
-  
+
   LiveInterval &SrcInt = li_->getInterval(SrcReg);
   LiveInterval &DstInt = li_->getInterval(DstReg);
   assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
          "Register mapping is horribly broken!");
 
-  DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_);
-  DOUT << " and "; DstInt.print(DOUT, tri_);
-  DOUT << ": ";
+  DEBUG({
+      errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_);
+      errs() << " and "; DstInt.print(errs(), tri_);
+      errs() << ": ";
+    });
 
   // Save a copy of the virtual register live interval. We'll manually
   // merge this into the "real" physical register live interval this is
@@ -1590,7 +1595,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
             ++numAborts;
-            DOUT << "\tMay tie down a physical register, abort!\n";
+            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1598,7 +1603,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
             ++numAborts;
-            DOUT << "\tMay tie down a physical register, abort!\n";
+            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1612,9 +1617,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
         const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
         unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
-        if (TheCopy.isBackEdge)
-          Threshold *= 2; // Favors back edge copies.
-
         unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
         float Ratio = 1.0 / Threshold;
         if (Length > Threshold &&
@@ -1622,7 +1624,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
                                    mri_->use_end()) / Length) < Ratio)) {
           mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
-          DOUT << "\tMay tie down a physical register, abort!\n";
+          DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1641,7 +1643,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // Only coalesce an empty interval (defined by implicit_def) with
     // another interval which has a valno defined by the CopyMI and the CopyMI
     // is a kill of the implicit def.
-    DOUT << "Not profitable!\n";
+    DEBUG(errs() << "Not profitable!\n");
     return false;
   }
 
@@ -1651,9 +1653,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // If definition of source is defined by trivial computation, try
     // rematerializing it.
     if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
-        ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI))
+        ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
       return true;
-    
+
     // If we can eliminate the copy without merging the live ranges, do so now.
     if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
         (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
@@ -1661,9 +1663,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       JoinedCopies.insert(CopyMI);
       return true;
     }
-    
+
     // Otherwise, we are unable to join the intervals.
-    DOUT << "Interference!\n";
+    DEBUG(errs() << "Interference!\n");
     Again = true;  // May be possible to coalesce later.
     return false;
   }
@@ -1676,7 +1678,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
   assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
          "LiveInterval::join didn't work right!");
-                               
+
   // If we're about to merge live ranges into a physical register live interval,
   // we have to update any aliased register's live ranges to indicate that they
   // have clobbered values for this range.
@@ -1690,14 +1692,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
              E = SavedLI->vni_end(); I != E; ++I) {
         const VNInfo *ValNo = *I;
-        VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+        VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(),
                                                 false, // updated at *
                                                 li_->getVNInfoAllocator());
         NewValNo->setFlags(ValNo->getFlags()); // * updated here.
         RealInt.addKills(NewValNo, ValNo->kills);
         RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
       }
-      RealInt.weight += SavedLI->weight;      
+      RealInt.weight += SavedLI->weight;
       DstReg = RealDstReg ? RealDstReg : RealSrcReg;
     }
 
@@ -1721,32 +1723,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // Coalescing to a virtual register that is of a sub-register class of the
   // other. Make sure the resulting register is set to the right register class.
-  if (CrossRC) {
-      ++numCrossRCs;
-    if (NewRC)
-      mri_->setRegClass(DstReg, NewRC);
-  }
-
-  if (NewHeuristic) {
-    // Add all copies that define val# in the source interval into the queue.
-    for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
-           e = ResSrcInt->vni_end(); i != e; ++i) {
-      const VNInfo *vni = *i;
-      // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
-      if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate())
-        continue;
-      MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
-      unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
-      if (CopyMI &&
-          JoinedCopies.count(CopyMI) == 0 &&
-          tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg,
-                            NewSrcSubIdx, NewDstSubIdx)) {
-        unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB);
-        JoinQueue->push(CopyRec(CopyMI, LoopDepth,
-                                isBackEdgeCopy(CopyMI, DstReg)));
-      }
-    }
-  }
+  if (CrossRC)
+    ++numCrossRCs;
+
+  // This may happen even if it's cross-rc coalescing. e.g.
+  // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4
+  // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
+  // be allocate a register from GR64_ABCD.
+  if (NewRC)
+    mri_->setRegClass(DstReg, NewRC);
 
   // Remember to delete the copy instruction.
   JoinedCopies.insert(CopyMI);
@@ -1757,13 +1742,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   if (TargetRegisterInfo::isVirtualRegister(DstReg))
     RemoveUnnecessaryKills(DstReg, *ResDstInt);
 
-  if (isInsSubReg)
-    // Avoid:
-    // r1024 = op
-    // r1024 = implicit_def
-    // ...
-    //       = r1024
-    RemoveDeadImpDef(DstReg, *ResDstInt);
   UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
 
   // SrcReg is guarateed to be the register whose live interval that is
@@ -1779,29 +1757,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     delete SavedLI;
   }
 
-  if (isEmpty) {
-    // Now the copy is being coalesced away, the val# previously defined
-    // by the copy is being defined by an IMPLICIT_DEF which defines a zero
-    // length interval. Remove the val#.
-    unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-    const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx);
-    VNInfo *ImpVal = LR->valno;
-    assert(ImpVal->def == CopyIdx);
-    unsigned NextDef = LR->end;
-    TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal);
-    ResDstInt->removeValNo(ImpVal);
-    LR = ResDstInt->FindLiveRangeContaining(NextDef);
-    if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
-      // Special case: vr1024 = implicit_def
-      //               vr1024 = insert_subreg vr1024, vr1025, c
-      // The insert_subreg becomes a "copy" that defines a val# which can itself
-      // be coalesced away.
-      MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef);
-      if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG)
-        LR->valno->copy = DefMI;
-    }
-  }
-
   // If resulting interval has a preference that no longer fits because of subreg
   // coalescing, just clear the preference.
   unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
@@ -1812,8 +1767,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
   }
 
-  DOUT << "\n\t\tJoined.  Result = "; ResDstInt->print(DOUT, tri_);
-  DOUT << "\n";
+  DEBUG({
+      errs() << "\n\t\tJoined.  Result = ";
+      ResDstInt->print(errs(), tri_);
+      errs() << "\n";
+    });
 
   ++numJoins;
   return true;
@@ -1860,7 +1818,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
   // been computed, return it.
   if (OtherValNoAssignments[OtherValNo->id] >= 0)
     return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-  
+
   // Mark this value number as currently being computed, then ask what the
   // ultimate value # of the other value is.
   ThisValNoAssignments[VN] = -2;
@@ -1896,7 +1854,7 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
         DstReg == li.reg && SrcReg == Reg) {
       // Cache computed info.
       LR->valno->def  = LR->start;
-      LR->valno->copy = DefMI;
+      LR->valno->setCopy(DefMI);
       return true;
     }
   }
@@ -1910,16 +1868,16 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
 /// joins them and returns true.
 bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
   assert(RHS.containsOneValue());
-  
+
   // Some number (potentially more than one) value numbers in the current
   // interval may be defined as copies from the RHS.  Scan the overlapping
   // portions of the LHS and RHS, keeping track of this and looking for
   // overlapping live ranges that are NOT defined as copies.  If these exist, we
   // cannot coalesce.
-  
+
   LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
   LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
-  
+
   if (LHSIt->start < RHSIt->start) {
     LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
     if (LHSIt != LHS.begin()) --LHSIt;
@@ -1927,9 +1885,9 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
     RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
     if (RHSIt != RHS.begin()) --RHSIt;
   }
-  
+
   SmallVector<VNInfo*, 8> EliminatedLHSVals;
-  
+
   while (1) {
     // Determine if these live intervals overlap.
     bool Overlaps = false;
@@ -1937,7 +1895,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
       Overlaps = LHSIt->end > RHSIt->start;
     else
       Overlaps = RHSIt->end > LHSIt->start;
-    
+
     // If the live intervals overlap, there are two interesting cases: if the
     // LHS interval is defined by a copy from the RHS, it's ok and we record
     // that the LHS value # is the same as the RHS.  If it's not, then we cannot
@@ -1955,7 +1913,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           //   vr1025 = copy vr1024
           //   ..
           // BB2:
-          //   vr1024 = op 
+          //   vr1024 = op
           //          = vr1025
           // Even though vr1025 is copied from vr1024, it's not safe to
           // coalesce them since the live range of vr1025 intersects the
@@ -1964,12 +1922,12 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           return false;
         EliminatedLHSVals.push_back(LHSIt->valno);
       }
-      
+
       // We know this entire LHS live range is okay, so skip it now.
       if (++LHSIt == LHSEnd) break;
       continue;
     }
-    
+
     if (LHSIt->end < RHSIt->end) {
       if (++LHSIt == LHSEnd) break;
     } else {
@@ -1993,7 +1951,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
               //   vr1025 = copy vr1024
               //   ..
               // BB2:
-              //   vr1024 = op 
+              //   vr1024 = op
               //          = vr1025
               // Even though vr1025 is copied from vr1024, it's not safe to
               // coalesced them since live range of vr1025 intersects the
@@ -2007,11 +1965,11 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           }
         }
       }
-      
+
       if (++RHSIt == RHSEnd) break;
     }
   }
-  
+
   // If we got here, we know that the coalescing will be successful and that
   // the value numbers in EliminatedLHSVals will all be merged together.  Since
   // the most common case is that EliminatedLHSVals has a single number, we
@@ -2039,28 +1997,29 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
         *tri_->getSuperRegisters(LHS.reg))
       // Imprecise sub-register information. Can't handle it.
       return false;
-    assert(0 && "No copies from the RHS?");
+    llvm_unreachable("No copies from the RHS?");
   } else {
     LHSValNo = EliminatedLHSVals[0];
   }
-  
+
   // Okay, now that there is a single LHS value number that we're merging the
   // RHS into, update the value number info for the LHS to indicate that the
   // value number is defined where the RHS value number was.
   const VNInfo *VNI = RHS.getValNumInfo(0);
   LHSValNo->def  = VNI->def;
-  LHSValNo->copy = VNI->copy;
-  
+  LHSValNo->setCopy(VNI->getCopy());
+
   // Okay, the final step is to loop over the RHS live intervals, adding them to
   // the LHS.
   if (VNI->hasPHIKill())
     LHSValNo->setHasPHIKill(true);
   LHS.addKills(LHSValNo, VNI->kills);
   LHS.MergeRangesInAsValue(RHS, LHSValNo);
-  LHS.weight += RHS.weight;
+
+  LHS.ComputeJoinedWeight(RHS);
 
   // Update regalloc hint if both are virtual registers.
-  if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && 
+  if (TargetRegisterInfo::isVirtualRegister(LHS.reg) &&
       TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
     std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
     std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
@@ -2122,8 +2081,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
-          DOUT << "Interfere with sub-register ";
-          DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+          DEBUG({
+              errs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(errs(), tri_);
+            });
           return false;
         }
     }
@@ -2137,19 +2098,21 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
-          DOUT << "Interfere with sub-register ";
-          DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+          DEBUG({
+              errs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(errs(), tri_);
+            });
           return false;
         }
     }
   }
-                          
+
   // Compute ultimate value numbers for the LHS and RHS values.
   if (RHS.containsOneValue()) {
     // Copies from a liveinterval with a single value are simple to handle and
     // very common, handle the special case here.  This is important, because
     // often RHS is small and LHS is large (e.g. a physreg).
-    
+
     // Find out if the RHS is defined as a copy from some value in the LHS.
     int RHSVal0DefinedFromLHS = -1;
     int RHSValID = -1;
@@ -2167,15 +2130,16 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       }
     } else {
       // It was defined as a copy from the LHS, find out what value # it is.
-      RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno;
+      RHSValNoInfo =
+        LHS.getLiveRangeContaining(li_->getPrevSlot(RHSValNoInfo0->def))->valno;
       RHSValID = RHSValNoInfo->id;
       RHSVal0DefinedFromLHS = RHSValID;
     }
-    
+
     LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
     RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
     NewVNInfo.resize(LHS.getNumValNums(), NULL);
-    
+
     // Okay, *all* of the values in LHS that are defined as a copy from RHS
     // should now get updated.
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
@@ -2207,7 +2171,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
         LHSValNoAssignments[VN] = VN;
       }
     }
-    
+
     assert(RHSValID != -1 && "Didn't find value #?");
     RHSValNoAssignments[0] = RHSValID;
     if (RHSVal0DefinedFromLHS != -1) {
@@ -2221,44 +2185,46 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
         continue;
-      
+
       // DstReg is known to be a register in the LHS interval.  If the src is
       // from the RHS interval, we can use its value #.
       if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
         continue;
-      
+
       // Figure out the value # from the RHS.
-      LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno;
+      LHSValsDefinedFromRHS[VNI]=
+        RHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno;
     }
-    
+
     // Loop over the value numbers of the RHS, seeing if any are defined from
     // the LHS.
     for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
         continue;
-      
+
       // DstReg is known to be a register in the RHS interval.  If the src is
       // from the LHS interval, we can use its value #.
       if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
         continue;
-      
+
       // Figure out the value # from the LHS.
-      RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno;
+      RHSValsDefinedFromLHS[VNI]=
+        LHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno;
     }
-    
+
     LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
     RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
     NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
-    
+
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
       unsigned VN = VNI->id;
-      if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 
+      if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
         continue;
       ComputeUltimateVN(VNI, NewVNInfo,
                         LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
@@ -2276,20 +2242,20 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
         RHSValNoAssignments[VN] = NewVNInfo.size()-1;
         continue;
       }
-      
+
       ComputeUltimateVN(VNI, NewVNInfo,
                         RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
                         RHSValNoAssignments, LHSValNoAssignments);
     }
   }
-  
+
   // Armed with the mappings of LHS/RHS values to ultimate values, walk the
   // interval lists to see if these intervals are coalescable.
   LiveInterval::const_iterator I = LHS.begin();
   LiveInterval::const_iterator IE = LHS.end();
   LiveInterval::const_iterator J = RHS.begin();
   LiveInterval::const_iterator JE = RHS.end();
-  
+
   // Skip ahead until the first place of potential sharing.
   if (I->start < J->start) {
     I = std::upper_bound(I, IE, J->start);
@@ -2298,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     J = std::upper_bound(J, JE, I->start);
     if (J != RHS.begin()) --J;
   }
-  
+
   while (1) {
     // Determine if these two live ranges overlap.
     bool Overlaps;
@@ -2316,7 +2282,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
           RHSValNoAssignments[J->valno->id])
         return false;
     }
-    
+
     if (I->end < J->end) {
       ++I;
       if (I == IE) break;
@@ -2331,7 +2297,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
     VNInfo *VNI = I->first;
     unsigned LHSValID = LHSValNoAssignments[VNI->id];
-    LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
+    NewVNInfo[LHSValID]->removeKill(VNI->def);
     if (VNI->hasPHIKill())
       NewVNInfo[LHSValID]->setHasPHIKill(true);
     RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
@@ -2342,7 +2308,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
     VNInfo *VNI = I->first;
     unsigned RHSValID = RHSValNoAssignments[VNI->id];
-    LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
+    NewVNInfo[RHSValID]->removeKill(VNI->def);
     if (VNI->hasPHIKill())
       NewVNInfo[RHSValID]->setHasPHIKill(true);
     LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
@@ -2377,37 +2343,17 @@ namespace {
   };
 }
 
-/// getRepIntervalSize - Returns the size of the interval that represents the
-/// specified register.
-template<class SF>
-unsigned JoinPriorityQueue<SF>::getRepIntervalSize(unsigned Reg) {
-  return Rc->getRepIntervalSize(Reg);
-}
-
-/// CopyRecSort::operator - Join priority queue sorting function.
-///
-bool CopyRecSort::operator()(CopyRec left, CopyRec right) const {
-  // Inner loops first.
-  if (left.LoopDepth > right.LoopDepth)
-    return false;
-  else if (left.LoopDepth == right.LoopDepth)
-    if (left.isBackEdge && !right.isBackEdge)
-      return false;
-  return true;
-}
-
 void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
-  DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+  DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
 
   std::vector<CopyRec> VirtCopies;
   std::vector<CopyRec> PhysCopies;
   std::vector<CopyRec> ImpDefCopies;
-  unsigned LoopDepth = loopInfo->getLoopDepth(MBB);
   for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
        MII != E;) {
     MachineInstr *Inst = MII++;
-    
+
     // If this isn't a copy nor a extract_subreg, we can't join intervals.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
@@ -2422,21 +2368,14 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 
     bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
     bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-    if (NewHeuristic) {
-      JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg)));
-    } else {
-      if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
-        ImpDefCopies.push_back(CopyRec(Inst, 0, false));
-      else if (SrcIsPhys || DstIsPhys)
-        PhysCopies.push_back(CopyRec(Inst, 0, false));
-      else
-        VirtCopies.push_back(CopyRec(Inst, 0, false));
-    }
+    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+      ImpDefCopies.push_back(CopyRec(Inst, 0));
+    else if (SrcIsPhys || DstIsPhys)
+      PhysCopies.push_back(CopyRec(Inst, 0));
+    else
+      VirtCopies.push_back(CopyRec(Inst, 0));
   }
 
-  if (NewHeuristic)
-    return;
-
   // Try coalescing implicit copies first, followed by copies to / from
   // physical registers, then finally copies from virtual registers to
   // virtual registers.
@@ -2464,10 +2403,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 }
 
 void SimpleRegisterCoalescing::joinIntervals() {
-  DOUT << "********** JOINING INTERVALS ***********\n";
-
-  if (NewHeuristic)
-    JoinQueue = new JoinPriorityQueue<CopyRecSort>(this);
+  DEBUG(errs() << "********** JOINING INTERVALS ***********\n");
 
   std::vector<CopyRec> TryAgainList;
   if (loopInfo->empty()) {
@@ -2495,52 +2431,26 @@ void SimpleRegisterCoalescing::joinIntervals() {
     for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
       CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
   }
-  
+
   // Joining intervals can allow other intervals to be joined.  Iteratively join
   // until we make no progress.
-  if (NewHeuristic) {
-    SmallVector<CopyRec, 16> TryAgain;
-    bool ProgressMade = true;
-    while (ProgressMade) {
-      ProgressMade = false;
-      while (!JoinQueue->empty()) {
-        CopyRec R = JoinQueue->pop();
-        bool Again = false;
-        bool Success = JoinCopy(R, Again);
-        if (Success)
-          ProgressMade = true;
-        else if (Again)
-          TryAgain.push_back(R);
-      }
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
 
-      if (ProgressMade) {
-        while (!TryAgain.empty()) {
-          JoinQueue->push(TryAgain.back());
-          TryAgain.pop_back();
-        }
-      }
-    }
-  } else {
-    bool ProgressMade = true;
-    while (ProgressMade) {
-      ProgressMade = false;
-
-      for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
-        CopyRec &TheCopy = TryAgainList[i];
-        if (TheCopy.MI) {
-          bool Again = false;
-          bool Success = JoinCopy(TheCopy, Again);
-          if (Success || !Again) {
-            TheCopy.MI = 0;   // Mark this one as done.
-            ProgressMade = true;
-          }
-        }
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      CopyRec &TheCopy = TryAgainList[i];
+      if (!TheCopy.MI)
+        continue;
+
+      bool Again = false;
+      bool Success = JoinCopy(TheCopy, Again);
+      if (Success || !Again) {
+        TheCopy.MI = 0;   // Mark this one as done.
+        ProgressMade = true;
       }
     }
   }
-
-  if (NewHeuristic)
-    delete JoinQueue;  
 }
 
 /// Return true if the two specified registers belong to different register
@@ -2567,9 +2477,11 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
 /// lastRegisterUse - Returns the last use of the specific register between
 /// cycles Start and End or NULL if there are no uses.
 MachineOperand *
-SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
-                                          unsigned Reg, unsigned &UseIdx) const{
-  UseIdx = 0;
+SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start,
+                                          LiveIndex End,
+                                          unsigned Reg,
+                                          LiveIndex &UseIdx) const{
+  UseIdx = LiveIndex();
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     MachineOperand *LastUse = NULL;
     for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg),
@@ -2581,7 +2493,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
           SrcReg == DstReg)
         // Ignore identity copies.
         continue;
-      unsigned Idx = li_->getInstructionIndex(UseMI);
+      LiveIndex Idx = li_->getInstructionIndex(UseMI);
       if (Idx >= Start && Idx < End && Idx >= UseIdx) {
         LastUse = &Use;
         UseIdx = li_->getUseIndex(Idx);
@@ -2590,13 +2502,13 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
     return LastUse;
   }
 
-  int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
-  int s = Start;
+  LiveIndex s = Start;
+  LiveIndex e = li_->getBaseIndex(li_->getPrevSlot(End));
   while (e >= s) {
     // Skip deleted instructions
     MachineInstr *MI = li_->getInstructionFromIndex(e);
-    while ((e - InstrSlots::NUM) >= s && !MI) {
-      e -= InstrSlots::NUM;
+    while (e != LiveIndex() && li_->getPrevIndex(e) >= s && !MI) {
+      e = li_->getPrevIndex(e);
       MI = li_->getInstructionFromIndex(e);
     }
     if (e < s || MI == NULL)
@@ -2615,7 +2527,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
         }
       }
 
-    e -= InstrSlots::NUM;
+    e = li_->getPrevIndex(e);
   }
 
   return NULL;
@@ -2624,9 +2536,9 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
 
 void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    cerr << tri_->getName(reg);
+    errs() << tri_->getName(reg);
   else
-    cerr << "%reg" << reg;
+    errs() << "%reg" << reg;
 }
 
 void SimpleRegisterCoalescing::releaseMemory() {
@@ -2635,64 +2547,106 @@ void SimpleRegisterCoalescing::releaseMemory() {
   ReMatDefs.clear();
 }
 
-static bool isZeroLengthInterval(LiveInterval *li) {
+/// Returns true if the given live interval is zero length.
+static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
   for (LiveInterval::Ranges::const_iterator
          i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
-    if (i->end - i->start > LiveInterval::InstrSlots::NUM)
+    if (li_->getPrevIndex(i->end) > i->start)
       return false;
   return true;
 }
 
-/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
-/// turn the copy into an implicit def.
-bool
-SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
-                                             MachineBasicBlock *MBB,
-                                             unsigned DstReg, unsigned SrcReg) {
-  MachineInstr *CopyMI = &*I;
-  unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  if (!li_->hasInterval(SrcReg))
-    return false;
-  LiveInterval &SrcInt = li_->getInterval(SrcReg);
-  if (!SrcInt.empty())
-    return false;
-  if (!li_->hasInterval(DstReg))
-    return false;
-  LiveInterval &DstInt = li_->getInterval(DstReg);
-  const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
-  // If the valno extends beyond this basic block, then it's not safe to delete
-  // the val# or else livein information won't be correct.
-  MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end);
-  if (EndMBB != MBB)
-    return false;
-  DstInt.removeValNo(DstLR->valno);
-  CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-  for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
-    CopyMI->RemoveOperand(i);
-  CopyMI->getOperand(0).setIsUndef();
-  bool NoUse = mri_->use_empty(SrcReg);
-  if (NoUse) {
-    for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(SrcReg),
-           RE = mri_->reg_end(); RI != RE; ) {
-      assert(RI.getOperand().isDef());
-      MachineInstr *DefMI = &*RI;
-      ++RI;
-      // The implicit_def source has no other uses, delete it.
-      assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
-      li_->RemoveMachineInstrFromMaps(DefMI);
-      DefMI->eraseFromParent();
+void SimpleRegisterCoalescing::CalculateSpillWeights() {
+  SmallSet<unsigned, 4> Processed;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* MBB = mbbi;
+    LiveIndex MBBEnd = li_->getMBBEndIdx(MBB);
+    MachineLoop* loop = loopInfo->getLoopFor(MBB);
+    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+    bool isExit = loop ? loop->isLoopExit(MBB) : false;
+
+    for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
+         mii != mie; ++mii) {
+      MachineInstr *MI = mii;
+
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &mopi = MI->getOperand(i);
+        if (!mopi.isReg() || mopi.getReg() == 0)
+          continue;
+        unsigned Reg = mopi.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+          continue;
+        // Multiple uses of reg by the same instruction. It should not
+        // contribute to spill weight again.
+        if (!Processed.insert(Reg))
+          continue;
+
+        bool HasDef = mopi.isDef();
+        bool HasUse = !HasDef;
+        for (unsigned j = i+1; j != e; ++j) {
+          const MachineOperand &mopj = MI->getOperand(j);
+          if (!mopj.isReg() || mopj.getReg() != Reg)
+            continue;
+          HasDef |= mopj.isDef();
+          HasUse |= mopj.isUse();
+          if (HasDef && HasUse)
+            break;
+        }
+
+        LiveInterval &RegInt = li_->getInterval(Reg);
+        float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth);
+        if (HasDef && isExit) {
+          // Looks like this is a loop count variable update.
+          LiveIndex DefIdx =
+            li_->getDefIndex(li_->getInstructionIndex(MI));
+          const LiveRange *DLR =
+            li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+          if (DLR->end > MBBEnd)
+            Weight *= 3.0F;
+        }
+        RegInt.weight += Weight;
+      }
+      Processed.clear();
     }
   }
 
-  // Mark uses of implicit_def isUndef.
-  for (MachineRegisterInfo::use_iterator RI = mri_->use_begin(DstReg),
-         RE = mri_->use_end(); RI != RE; ++RI) {
-    assert((*RI).getParent() == MBB);
-    RI.getOperand().setIsUndef();
-  }
+  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+    LiveInterval &LI = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      // If the live interval length is essentially zero, i.e. in every live
+      // range the use follows def immediately, it doesn't make sense to spill
+      // it and hope it will be easier to allocate for this li.
+      if (isZeroLengthInterval(&LI, li_)) {
+        LI.weight = HUGE_VALF;
+        continue;
+      }
 
-  ++I;
-  return true;
+      bool isLoad = false;
+      SmallVector<LiveInterval*, 4> SpillIs;
+      if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
+        // If all of the definitions of the interval are re-materializable,
+        // it is a preferred candidate for spilling. If non of the defs are
+        // loads, then it's potentially very cheap to re-materialize.
+        // FIXME: this gets much more complicated once we support non-trivial
+        // re-materialization.
+        if (isLoad)
+          LI.weight *= 0.9F;
+        else
+          LI.weight *= 0.5F;
+      }
+
+      // Slightly prefer live interval that has been assigned a preferred reg.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+      if (Hint.first || Hint.second)
+        LI.weight *= 1.01F;
+
+      // Divide the weight of the interval by its size.  This encourages
+      // spilling of intervals that are large and have few uses, and
+      // discourages spilling of small intervals with many uses.
+      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
+    }
+  }
 }
 
 
@@ -2703,11 +2657,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   tri_ = tm_->getRegisterInfo();
   tii_ = tm_->getInstrInfo();
   li_ = &getAnalysis<LiveIntervals>();
+  AA = &getAnalysis<AliasAnalysis>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
-  DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
-       << "********** Function: "
-       << ((Value*)mf_->getFunction())->getName() << '\n';
+  DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
 
   allocatableRegs_ = tri_->getAllocatableSet(fn);
   for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
@@ -2719,10 +2674,10 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   if (EnableJoining) {
     joinIntervals();
     DEBUG({
-        DOUT << "********** INTERVALS POST JOINING **********\n";
+        errs() << "********** INTERVALS POST JOINING **********\n";
         for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){
-          I->second->print(DOUT, tri_);
-          DOUT << "\n";
+          I->second->print(errs(), tri_);
+          errs() << "\n";
         }
       });
   }
@@ -2733,29 +2688,40 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
     MachineBasicBlock* mbb = mbbi;
-    unsigned loopDepth = loopInfo->getLoopDepth(mbb);
-
     for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
          mii != mie; ) {
       MachineInstr *MI = mii;
       unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
       if (JoinedCopies.count(MI)) {
         // Delete all coalesced copies.
+        bool DoDelete = true;
         if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
           assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
                   MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
                   MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
                  "Unrecognized copy instruction");
           DstReg = MI->getOperand(0).getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+            // Do not delete extract_subreg, insert_subreg of physical
+            // registers unless the definition is dead. e.g.
+            // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+            // or else the scavenger may complain. LowerSubregs will
+            // change this to an IMPLICIT_DEF later.
+            DoDelete = false;
         }
         if (MI->registerDefIsDead(DstReg)) {
           LiveInterval &li = li_->getInterval(DstReg);
           if (!ShortenDeadCopySrcLiveRange(li, MI))
             ShortenDeadCopyLiveRange(li, MI);
+          DoDelete = true;
+        }
+        if (!DoDelete)
+          mii = next(mii);
+        else {
+          li_->RemoveMachineInstrFromMaps(MI);
+          mii = mbbi->erase(mii);
+          ++numPeep;
         }
-        li_->RemoveMachineInstrFromMaps(MI);
-        mii = mbbi->erase(mii);
-        ++numPeep;
         continue;
       }
 
@@ -2807,70 +2773,20 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         li_->RemoveMachineInstrFromMaps(MI);
         mii = mbbi->erase(mii);
         ++numPeep;
-      } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) {
-        SmallSet<unsigned, 4> UniqueUses;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &mop = MI->getOperand(i);
-          if (mop.isReg() && mop.getReg() &&
-              TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
-            unsigned reg = mop.getReg();
-            // Multiple uses of reg by the same instruction. It should not
-            // contribute to spill weight again.
-            if (UniqueUses.count(reg) != 0)
-              continue;
-            LiveInterval &RegInt = li_->getInterval(reg);
-            RegInt.weight +=
-              li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
-            UniqueUses.insert(reg);
-          }
-        }
+      } else {
         ++mii;
       }
     }
   }
 
-  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
-    LiveInterval &LI = *I->second;
-    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-      // If the live interval length is essentially zero, i.e. in every live
-      // range the use follows def immediately, it doesn't make sense to spill
-      // it and hope it will be easier to allocate for this li.
-      if (isZeroLengthInterval(&LI))
-        LI.weight = HUGE_VALF;
-      else {
-        bool isLoad = false;
-        SmallVector<LiveInterval*, 4> SpillIs;
-        if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
-          // If all of the definitions of the interval are re-materializable,
-          // it is a preferred candidate for spilling. If non of the defs are
-          // loads, then it's potentially very cheap to re-materialize.
-          // FIXME: this gets much more complicated once we support non-trivial
-          // re-materialization.
-          if (isLoad)
-            LI.weight *= 0.9F;
-          else
-            LI.weight *= 0.5F;
-        }
-      }
-
-      // Slightly prefer live interval that has been assigned a preferred reg.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
-      if (Hint.first || Hint.second)
-        LI.weight *= 1.01F;
-
-      // Divide the weight of the interval by its size.  This encourages 
-      // spilling of intervals that are large and have few uses, and
-      // discourages spilling of small intervals with many uses.
-      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
-    }
-  }
+  CalculateSpillWeights();
 
   DEBUG(dump());
   return true;
 }
 
 /// print - Implement the dump method.
-void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const {
+void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const {
    li_->print(O, m);
 }
 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index d2c55810f60c..3ebe3a1f7de4 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/ADT/BitVector.h"
-#include <queue>
 
 namespace llvm {
   class SimpleRegisterCoalescing;
@@ -33,44 +32,8 @@ namespace llvm {
   struct CopyRec {
     MachineInstr *MI;
     unsigned LoopDepth;
-    bool isBackEdge;
-    CopyRec(MachineInstr *mi, unsigned depth, bool be)
-      : MI(mi), LoopDepth(depth), isBackEdge(be) {};
-  };
-
-  template<class SF> class JoinPriorityQueue;
-
-  /// CopyRecSort - Sorting function for coalescer queue.
-  ///
-  struct CopyRecSort : public std::binary_function<CopyRec,CopyRec,bool> {
-    JoinPriorityQueue<CopyRecSort> *JPQ;
-    explicit CopyRecSort(JoinPriorityQueue<CopyRecSort> *jpq) : JPQ(jpq) {}
-    CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {}
-    bool operator()(CopyRec left, CopyRec right) const;
-  };
-
-  /// JoinQueue - A priority queue of copy instructions the coalescer is
-  /// going to process.
-  template<class SF>
-  class JoinPriorityQueue {
-    SimpleRegisterCoalescing *Rc;
-    std::priority_queue<CopyRec, std::vector<CopyRec>, SF> Queue;
-
-  public:
-    explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc)
-      : Rc(rc), Queue(SF(this)) {}
-
-    bool empty() const { return Queue.empty(); }
-    void push(CopyRec R) { Queue.push(R); }
-    CopyRec pop() {
-      if (empty()) return CopyRec(0, 0, false);
-      CopyRec R = Queue.top();
-      Queue.pop();
-      return R;
-    }
-
-    // Callbacks to SimpleRegisterCoalescing.
-    unsigned getRepIntervalSize(unsigned Reg);
+    CopyRec(MachineInstr *mi, unsigned depth)
+      : MI(mi), LoopDepth(depth) {};
   };
 
   class SimpleRegisterCoalescing : public MachineFunctionPass,
@@ -82,14 +45,11 @@ namespace llvm {
     const TargetInstrInfo* tii_;
     LiveIntervals *li_;
     const MachineLoopInfo* loopInfo;
+    AliasAnalysis *AA;
     
     BitVector allocatableRegs_;
     DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
 
-    /// JoinQueue - A priority queue of copy instructions the coalescer is
-    /// going to process.
-    JoinPriorityQueue<CopyRecSort> *JoinQueue;
-
     /// JoinedCopies - Keep track of copies eliminated due to coalescing.
     ///
     SmallPtrSet<MachineInstr*, 32> JoinedCopies;
@@ -127,20 +87,8 @@ namespace llvm {
       return false;
     };
 
-    /// getRepIntervalSize - Called from join priority queue sorting function.
-    /// It returns the size of the interval that represent the given register.
-    unsigned getRepIntervalSize(unsigned Reg) {
-      if (!li_->hasInterval(Reg))
-        return 0;
-      return li_->getApproximateInstructionCount(li_->getInterval(Reg)) *
-             LiveInterval::InstrSlots::NUM;
-    }
-
     /// print - Implement the dump method.
-    virtual void print(std::ostream &O, const Module* = 0) const;
-    void print(std::ostream *O, const Module* M = 0) const {
-      if (O) print(*O, M);
-    }
+    virtual void print(raw_ostream &O, const Module* = 0) const;
 
   private:
     /// joinIntervals - join compatible live intervals
@@ -176,7 +124,6 @@ namespace llvm {
     /// classes.  The registers may be either phys or virt regs.
     bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
 
-
     /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
     /// the source value number is defined by a copy from the destination reg
     /// see if we can merge these two destination reg valno# into a single
@@ -199,20 +146,14 @@ namespace llvm {
     /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
     /// block as the copy instruction, trim the ive interval to the last use
     /// and return true.
-    bool TrimLiveIntervalToLastUse(unsigned CopyIdx,
+    bool TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
                                    MachineBasicBlock *CopyMBB,
                                    LiveInterval &li, const LiveRange *LR);
 
     /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
     /// computation, replace the copy by rematerialize the definition.
     bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
-                                 MachineInstr *CopyMI);
-
-    /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
-    /// turn the copy into an implicit def.
-    bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
-                            MachineBasicBlock *MBB,
-                            unsigned DstReg, unsigned SrcReg);
+                                 unsigned DstSubIdx, MachineInstr *CopyMI);
 
     /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
     /// from an implicit def to another register can be coalesced away.
@@ -266,10 +207,6 @@ namespace llvm {
     bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
                                      unsigned Reg);
 
-    /// isBackEdgeCopy - Return true if CopyMI is a back edge copy.
-    ///
-    bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const;
-
     /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
     /// update the subregister number if it is not zero. If DstReg is a
     /// physical register and the existing subregister number of the def / use
@@ -277,10 +214,6 @@ namespace llvm {
     /// subregister.
     void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
 
-    /// RemoveDeadImpDef - Remove implicit_def instructions which are
-    /// "re-defining" registers due to insert_subreg coalescing. e.g.
-    void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI);
-
     /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
     /// due to live range lengthening as the result of coalescing.
     void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
@@ -302,8 +235,13 @@ namespace llvm {
 
     /// lastRegisterUse - Returns the last use of the specific register between
     /// cycles Start and End or NULL if there are no uses.
-    MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg,
-                                    unsigned &LastUseIdx) const;
+    MachineOperand *lastRegisterUse(LiveIndex Start,
+                                    LiveIndex End, unsigned Reg,
+                                    LiveIndex &LastUseIdx) const;
+
+    /// CalculateSpillWeights - Compute spill weights for all virtual register
+    /// live intervals.
+    void CalculateSpillWeights();
 
     void printRegName(unsigned reg) const;
   };
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 000000000000..e987fa2fbc8e
--- /dev/null
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,520 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+  class VISIBILITY_HIDDEN SjLjEHPass : public FunctionPass {
+
+    const TargetLowering *TLI;
+
+    const Type *FunctionContextTy;
+    Constant *RegisterFn;
+    Constant *UnregisterFn;
+    Constant *ResumeFn;
+    Constant *BuiltinSetjmpFn;
+    Constant *FrameAddrFn;
+    Constant *LSDAAddrFn;
+    Value *PersonalityFn;
+    Constant *SelectorFn;
+    Constant *ExceptionFn;
+
+    Value *CallSite;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+      : FunctionPass(&ID), TLI(tli) { }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    const char *getPassName() const {
+      return "SJLJ Exception Handling preparation";
+    }
+
+  private:
+    void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+                            Value *CallSite,
+                            SwitchInst *CatchSwitch);
+    void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+    bool insertSjLjEHSupport(Function &F);
+  };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+  return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+  // Build the function context structure.
+  // builtin_setjmp uses a five word jbuf
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
+  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  FunctionContextTy =
+    StructType::get(M.getContext(),
+                    VoidPtrTy,                        // __prev
+                    Int32Ty,                          // call_site
+                    ArrayType::get(Int32Ty, 4),       // __data
+                    VoidPtrTy,                        // __personality
+                    VoidPtrTy,                        // __lsda
+                    ArrayType::get(VoidPtrTy, 5),     // __jbuf
+                    NULL);
+  RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+                                     Type::getVoidTy(M.getContext()),
+                                     PointerType::getUnqual(FunctionContextTy),
+                                     (Type *)0);
+  UnregisterFn =
+    M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+                          Type::getVoidTy(M.getContext()),
+                          PointerType::getUnqual(FunctionContextTy),
+                          (Type *)0);
+  ResumeFn =
+    M.getOrInsertFunction("_Unwind_SjLj_Resume",
+                          Type::getVoidTy(M.getContext()),
+                          VoidPtrTy,
+                          (Type *)0);
+  FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+  BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+  LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+  SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+  ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+  PersonalityFn = 0;
+
+  return true;
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+                                    Value *CallSite,
+                                    SwitchInst *CatchSwitch) {
+  ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
+  // The runtime comes back to the dispatcher with the call_site - 1 in
+  // the context. Odd, but there it is.
+  ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo - 1);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert a store of the invoke num before the invoke and store zero into the
+  // location afterward.
+  new StoreInst(CallSiteNoC, CallSite, true, II);  // volatile
+
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+  // We still want this to look like an invoke so we emit the LSDA properly
+  // FIXME: ??? Or will this cause strangeness with mis-matched IDs like
+  //  when it was in the front end?
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge.  This process also splits all critical edges
+/// coming out of invoke's.
+void SjLjEHPass::
+splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    // This is always a no-op cast because we're casting AI to AI->getType() so
+    // src and destination types are identical. BitCast is the only possibility.
+    CastInst *NC = new BitCastInst(
+      AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+    AI->replaceAllUsesWith(NC);
+    // Normally its is forbidden to replace a CastInst's operand because it
+    // could cause the opcode to reflect an illegal conversion. However, we're
+    // replacing it here with the same value it was constructed with to simply
+    // make NC its user.
+    NC->setOperand(0, AI);
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+
+  // Look through the terminators of the basic blocks to find invokes, returns
+  // and unwinds
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+  // If we don't have any invokes or unwinds, there's nothing to do.
+  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  // Find the eh.selector.*  and eh.exception calls. We'll use the first
+  // eh.selector to determine the right personality function to use. For
+  // SJLJ, we always use the same personality for the whole function,
+  // not on a per-selector basis.
+  // FIXME: That's a bit ugly. Better way?
+  SmallVector<CallInst*,16> EH_Selectors;
+  SmallVector<CallInst*,16> EH_Exceptions;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (CI->getCalledFunction() == SelectorFn) {
+          if (!PersonalityFn) PersonalityFn = CI->getOperand(2);
+          EH_Selectors.push_back(CI);
+        } else if (CI->getCalledFunction() == ExceptionFn) {
+          EH_Exceptions.push_back(CI);
+        }
+      }
+    }
+  }
+  // If we don't have any eh.selector calls, we can't determine the personality
+  // function. Without a personality function, we can't process exceptions.
+  if (!PersonalityFn) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  if (!Invokes.empty()) {
+    // We have invokes, so we need to add register/unregister calls to get
+    // this function onto the global unwind stack.
+    //
+    // First thing we need to do is scan the whole function for values that are
+    // live across unwind edges.  Each value that is live across an unwind edge
+    // we spill into a stack location, guaranteeing that there is nothing live
+    // across the unwind edge.  This process also splits all critical edges
+    // coming out of invoke's.
+    splitLiveRangesLiveAcrossInvokes(Invokes);
+
+    BasicBlock *EntryBB = F.begin();
+    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+    // that needs to be restored on all exits from the function.  This is an
+    // alloca because the value needs to be added to the global context list.
+    unsigned Align = 4; // FIXME: Should be a TLI check?
+    AllocaInst *FunctionContext =
+      new AllocaInst(FunctionContextTy, 0, Align,
+                     "fcn_context", F.begin()->begin());
+
+    Value *Idxs[2];
+    const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+    Value *Zero = ConstantInt::get(Int32Ty, 0);
+    // We need to also keep around a reference to the call_site field
+    Idxs[0] = Zero;
+    Idxs[1] = ConstantInt::get(Int32Ty, 1);
+    CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                         "call_site",
+                                         EntryBB->getTerminator());
+
+    // The exception selector comes back in context->data[1]
+    Idxs[1] = ConstantInt::get(Int32Ty, 2);
+    Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                              "fc_data",
+                                              EntryBB->getTerminator());
+    Idxs[1] = ConstantInt::get(Int32Ty, 1);
+    Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                    "exc_selector_gep",
+                                                    EntryBB->getTerminator());
+    // The exception value comes back in context->data[0]
+    Idxs[1] = Zero;
+    Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                     "exception_gep",
+                                                     EntryBB->getTerminator());
+
+    // The result of the eh.selector call will be replaced with a
+    // a reference to the selector value returned in the function
+    // context. We leave the selector itself so the EH analysis later
+    // can use it.
+    for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+      CallInst *I = EH_Selectors[i];
+      Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+      I->replaceAllUsesWith(SelectorVal);
+    }
+    // eh.exception calls are replaced with references to the proper
+    // location in the context. Unlike eh.selector, the eh.exception
+    // calls are removed entirely.
+    for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+      CallInst *I = EH_Exceptions[i];
+      // Possible for there to be duplicates, so check to make sure
+      // the instruction hasn't already been removed.
+      if (!I->getParent()) continue;
+      Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+      const Type *Ty = Type::getInt8PtrTy(F.getContext());
+      Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+      I->replaceAllUsesWith(Val);
+      I->eraseFromParent();
+    }
+
+
+
+
+    // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+    // branch to a dispatch block for non-zero returns. If we return normally,
+    // we're not handling an exception and just register the function context
+    // and continue.
+
+    // Create the dispatch block.  The dispatch block is basically a big switch
+    // statement that goes to all of the invoke landing pads.
+    BasicBlock *DispatchBlock =
+            BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+    // Insert a load in the Catch block, and a switch on its value.  By default,
+    // we go to a block that just does an unwind (which is the correct action
+    // for a standard call).
+    BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+    Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+                                       DispatchBlock);
+    SwitchInst *DispatchSwitch =
+      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock);
+    // Split the entry block to insert the conditional branch for the setjmp.
+    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                     "eh.sjlj.setjmp.cont");
+
+    // Populate the Function Context
+    //   1. LSDA address
+    //   2. Personality function address
+    //   3. jmpbuf (save FP and call eh.sjlj.setjmp)
+
+    // LSDA address
+    Idxs[0] = Zero;
+    Idxs[1] = ConstantInt::get(Int32Ty, 4);
+    Value *LSDAFieldPtr =
+      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "lsda_gep",
+                                EntryBB->getTerminator());
+    Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+                                   EntryBB->getTerminator());
+    new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+    Idxs[1] = ConstantInt::get(Int32Ty, 3);
+    Value *PersonalityFieldPtr =
+      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "lsda_gep",
+                                EntryBB->getTerminator());
+    new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+                  EntryBB->getTerminator());
+
+    //   Save the frame pointer.
+    Idxs[1] = ConstantInt::get(Int32Ty, 5);
+    Value *FieldPtr
+      = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                  "jbuf_gep",
+                                  EntryBB->getTerminator());
+    Idxs[1] = ConstantInt::get(Int32Ty, 0);
+    Value *ElemPtr =
+      GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+                                EntryBB->getTerminator());
+
+    Value *Val = CallInst::Create(FrameAddrFn,
+                                  ConstantInt::get(Int32Ty, 0),
+                                  "fp",
+                                  EntryBB->getTerminator());
+    new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator());
+    // Call the setjmp instrinsic. It fills in the rest of the jmpbuf
+    Value *SetjmpArg =
+      CastInst::Create(Instruction::BitCast, FieldPtr,
+                       Type::getInt8PtrTy(F.getContext()), "",
+                       EntryBB->getTerminator());
+    Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+                                          "dispatch",
+                                          EntryBB->getTerminator());
+    // check the return value of the setjmp. non-zero goes to dispatcher
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, DispatchVal, Zero,
+                                   "notunwind");
+    // Nuke the uncond branch.
+    EntryBB->getTerminator()->eraseFromParent();
+
+    // Put in a new condbranch in its place.
+    BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+    // Register the function context and make sure it's known to not throw
+    CallInst *Register =
+      CallInst::Create(RegisterFn, FunctionContext, "",
+                       ContBlock->getTerminator());
+    Register->setDoesNotThrow();
+
+    // At this point, we are all set up, update the invoke instructions
+    // to mark their call_site values, and fill in the dispatch switch
+    // accordingly.
+    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+      markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+    // The front end has likely added calls to _Unwind_Resume. We need
+    // to find those calls and mark the call_site as -1 immediately prior.
+    // resume is a noreturn function, so any block that has a call to it
+    // should end in an 'unreachable' instruction with the call immediately
+    // prior. That's how we'll search.
+    // ??? There's got to be a better way. this is fugly.
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) {
+        BasicBlock::iterator I = BB->getTerminator();
+        // Check the previous instruction and see if it's a resume call
+        if (I == BB->begin()) continue;
+        if (CallInst *CI = dyn_cast<CallInst>(--I)) {
+          if (CI->getCalledFunction() == ResumeFn) {
+            Value *NegativeOne = Constant::getAllOnesValue(Int32Ty);
+            new StoreInst(NegativeOne, CallSite, true, I);  // volatile
+          }
+        }
+      }
+
+    // Replace all unwinds with a branch to the unwind handler.
+    // ??? Should this ever happen with sjlj exceptions?
+    for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+      BranchInst::Create(UnwindBlock, Unwinds[i]);
+      Unwinds[i]->eraseFromParent();
+    }
+
+    // Finally, for any returns from this function, if this function contains an
+    // invoke, add a call to unregister the function context.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+      CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+  }
+
+  return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+  bool Res = insertSjLjEHSupport(F);
+  return Res;
+}
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 405cd8087ac5..0277d64cdd96 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -13,12 +13,13 @@
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -50,13 +51,13 @@ protected:
 
   /// Ensures there is space before the given machine instruction, returns the
   /// instruction's new number.
-  unsigned makeSpaceBefore(MachineInstr *mi) {
+  LiveIndex makeSpaceBefore(MachineInstr *mi) {
     if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
       lis->scaleNumbering(2);
       ls->scaleNumbering(2);
     }
 
-    unsigned miIdx = lis->getInstructionIndex(mi);
+    LiveIndex miIdx = lis->getInstructionIndex(mi);
 
     assert(lis->hasGapBeforeInstr(miIdx));
     
@@ -65,13 +66,13 @@ protected:
 
   /// Ensure there is space after the given machine instruction, returns the
   /// instruction's new number.
-  unsigned makeSpaceAfter(MachineInstr *mi) {
+  LiveIndex makeSpaceAfter(MachineInstr *mi) {
     if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
       lis->scaleNumbering(2);
       ls->scaleNumbering(2);
     }
 
-    unsigned miIdx = lis->getInstructionIndex(mi);
+    LiveIndex miIdx = lis->getInstructionIndex(mi);
 
     assert(lis->hasGapAfterInstr(miIdx));
 
@@ -82,19 +83,19 @@ protected:
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertStoreAfter(MachineInstr *mi, unsigned ss,
-                          unsigned vreg,
-                          const TargetRegisterClass *trc) {
+  LiveIndex insertStoreAfter(MachineInstr *mi, unsigned ss,
+                                     unsigned vreg,
+                                     const TargetRegisterClass *trc) {
 
     MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
-    unsigned miIdx = makeSpaceAfter(mi);
+    LiveIndex miIdx = makeSpaceAfter(mi);
 
     tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(next(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+    LiveIndex storeInstIdx = lis->getNextIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -107,15 +108,15 @@ protected:
   /// Insert a store of the given vreg to the given stack slot immediately
   /// before the given instructnion. Returns the base index of the inserted
   /// Instruction.
-  unsigned insertStoreBefore(MachineInstr *mi, unsigned ss,
-                            unsigned vreg,
-                            const TargetRegisterClass *trc) {
-    unsigned miIdx = makeSpaceBefore(mi);
+  LiveIndex insertStoreBefore(MachineInstr *mi, unsigned ss,
+                                      unsigned vreg,
+                                      const TargetRegisterClass *trc) {
+    LiveIndex miIdx = makeSpaceBefore(mi);
   
     tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(prior(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    unsigned storeInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+    LiveIndex storeInstIdx = lis->getPrevIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -130,14 +131,15 @@ protected:
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
 
-    unsigned storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
-    unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
-             end = lis->getUseIndex(storeInstIdx);
+    LiveIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
+    LiveIndex start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+                      end = lis->getUseIndex(storeInstIdx);
 
     VNInfo *vni =
       li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
-    vni->kills.push_back(storeInstIdx);
-    DOUT << "    Inserting store range: [" << start << ", " << end << ")\n";
+    vni->addKill(storeInstIdx);
+    DEBUG(errs() << "    Inserting store range: [" << start
+                 << ", " << end << ")\n");
     LiveRange lr(start, end, vni);
       
     li->addRange(lr);
@@ -147,18 +149,18 @@ protected:
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsibel for adding/removing an appropriate
   /// range vreg's LiveInterval.
-  unsigned insertLoadAfter(MachineInstr *mi, unsigned ss,
-                          unsigned vreg,
-                          const TargetRegisterClass *trc) {
+  LiveIndex insertLoadAfter(MachineInstr *mi, unsigned ss,
+                                    unsigned vreg,
+                                    const TargetRegisterClass *trc) {
 
     MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
-    unsigned miIdx = makeSpaceAfter(mi);
+    LiveIndex miIdx = makeSpaceAfter(mi);
 
     tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(next(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    unsigned loadInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+    LiveIndex loadInstIdx = lis->getNextIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -172,15 +174,15 @@ protected:
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertLoadBefore(MachineInstr *mi, unsigned ss,
-                            unsigned vreg,
-                            const TargetRegisterClass *trc) {  
-    unsigned miIdx = makeSpaceBefore(mi);
+  LiveIndex insertLoadBefore(MachineInstr *mi, unsigned ss,
+                                     unsigned vreg,
+                                     const TargetRegisterClass *trc) {  
+    LiveIndex miIdx = makeSpaceBefore(mi);
   
     tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(prior(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+    LiveIndex loadInstIdx = lis->getPrevIndex(miIdx);
 
     assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
            "Load inst index already in use.");
@@ -195,14 +197,15 @@ protected:
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
 
-    unsigned loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
-    unsigned start = lis->getDefIndex(loadInstIdx),
-             end = lis->getUseIndex(lis->getInstructionIndex(mi));
+    LiveIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
+    LiveIndex start = lis->getDefIndex(loadInstIdx),
+                      end = lis->getUseIndex(lis->getInstructionIndex(mi));
 
     VNInfo *vni =
       li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
-    vni->kills.push_back(lis->getInstructionIndex(mi));
-    DOUT << "    Intserting load range: [" << start << ", " << end << ")\n";
+    vni->addKill(lis->getInstructionIndex(mi));
+    DEBUG(errs() << "    Intserting load range: [" << start
+                 << ", " << end << ")\n");
     LiveRange lr(start, end, vni);
 
     li->addRange(lr);
@@ -214,7 +217,7 @@ protected:
   /// immediately before each use, and stores after each def. No folding is
   /// attempted.
   std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
-    DOUT << "Spilling everywhere " << *li << "\n";
+    DEBUG(errs() << "Spilling everywhere " << *li << "\n");
 
     assert(li->weight != HUGE_VALF &&
            "Attempting to spill already spilled value.");
@@ -222,7 +225,7 @@ protected:
     assert(!li->isStackSlot() &&
            "Trying to spill a stack slot.");
 
-    DOUT << "Trivial spill everywhere of reg" << li->reg << "\n";
+    DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n");
 
     std::vector<LiveInterval*> added;
     
@@ -234,7 +237,7 @@ protected:
 
       MachineInstr *mi = &*regItr;
 
-      DOUT << "  Processing " << *mi;
+      DEBUG(errs() << "  Processing " << *mi);
 
       do {
         ++regItr;
@@ -318,23 +321,21 @@ public:
     vrm->assignVirt2StackSlot(li->reg, ss);
 
     MachineInstr *mi = 0;
-    unsigned storeIdx = 0;
+    LiveIndex storeIdx = LiveIndex();
 
     if (valno->isDefAccurate()) {
       // If we have an accurate def we can just grab an iterator to the instr
       // after the def.
       mi = lis->getInstructionFromIndex(valno->def);
-      storeIdx = insertStoreAfter(mi, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::DEF;
+      storeIdx = lis->getDefIndex(insertStoreAfter(mi, ss, li->reg, trc));
     } else {
       // if we get here we have a PHI def.
       mi = &lis->getMBBFromIndex(valno->def)->front();
-      storeIdx = insertStoreBefore(mi, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::DEF;
+      storeIdx = lis->getDefIndex(insertStoreBefore(mi, ss, li->reg, trc));
     }
 
     MachineBasicBlock *defBlock = mi->getParent();
-    unsigned loadIdx = 0;
+    LiveIndex loadIdx = LiveIndex();
 
     // Now we need to find the load...
     MachineBasicBlock::iterator useItr(mi);
@@ -342,13 +343,11 @@ public:
 
     if (useItr != defBlock->end()) {
       MachineInstr *loadInst = useItr;
-      loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::USE;
+      loadIdx = lis->getUseIndex(insertLoadBefore(loadInst, ss, li->reg, trc));
     }
     else {
       MachineInstr *loadInst = &defBlock->back();
-      loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc) +
-        LiveInterval::InstrSlots::USE;
+      loadIdx = lis->getUseIndex(insertLoadAfter(loadInst, ss, li->reg, trc));
     }
 
     li->removeRange(storeIdx, loadIdx, true);
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index c179f1e3df97..350bc6e1ade7 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -148,7 +148,8 @@ bool StackProtector::InsertStackProtectors() {
       //     StackGuard = load __stack_chk_guard
       //     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
       // 
-      PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty);
+      PointerType *PtrTy = PointerType::getUnqual(
+          Type::getInt8Ty(RI->getContext()));
       StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
 
       BasicBlock &Entry = F->getEntryBlock();
@@ -201,7 +202,7 @@ bool StackProtector::InsertStackProtectors() {
     // Generate the stack protector instructions in the old basic block.
     LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
     LoadInst *LI2 = new LoadInst(AI, "", true, BB);
-    ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB);
+    ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
     BranchInst::Create(NewBB, FailBB, Cmp, BB);
   }
 
@@ -215,10 +216,12 @@ bool StackProtector::InsertStackProtectors() {
 /// CreateFailBB - Create a basic block to jump to when the stack protector
 /// check fails.
 BasicBlock *StackProtector::CreateFailBB() {
-  BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F);
+  BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+                                          "CallStackCheckFailBlk", F);
   Constant *StackChkFail =
-    M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL);
+    M->getOrInsertFunction("__stack_chk_fail",
+                           Type::getVoidTy(F->getContext()), NULL);
   CallInst::Create(StackChkFail, "", FailBB);
-  new UnreachableInst(FailBB);
+  new UnreachableInst(F->getContext(), FailBB);
   return FailBB;
 }
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 582464478cfc..fad0808c8931 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CommandLine.h"
@@ -97,6 +98,7 @@ namespace {
       MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<LiveStacks>();
       AU.addRequired<VirtRegMap>();
       AU.addPreserved<VirtRegMap>();      
@@ -197,7 +199,7 @@ void StackSlotColoring::InitializeSlots() {
   Assignments.resize(LastFI);
 
   // Gather all spill slots into a list.
-  DOUT << "Spill slot intervals:\n";
+  DEBUG(errs() << "Spill slot intervals:\n");
   for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
     LiveInterval &li = i->second;
     DEBUG(li.dump());
@@ -209,7 +211,7 @@ void StackSlotColoring::InitializeSlots() {
     OrigSizes[FI]      = MFI->getObjectSize(FI);
     AllColors.set(FI);
   }
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 
   // Sort them by weight.
   std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
@@ -241,7 +243,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
     return false;
 
   bool Changed = false;
-  DOUT << "Assigning unused registers to spill slots:\n";
+  DEBUG(errs() << "Assigning unused registers to spill slots:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -271,7 +273,8 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
         AllColored = false;
         continue;
       } else {
-        DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n';
+        DEBUG(errs() << "Assigning fi#" << RSS << " to "
+                     << TRI->getName(Reg) << '\n');
         ColoredRegs.push_back(Reg);
         SlotMapping[RSS] = Reg;
         SlotIsReg.set(RSS);
@@ -298,7 +301,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
       ++NumEliminated;
     }
   }
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 
   return Changed;
 }
@@ -333,7 +336,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
   // Record the assignment.
   Assignments[Color].push_back(li);
   int FI = li->getStackSlotIndex();
-  DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n";
+  DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
   // objects sharing the same slot, then make sure the size and alignment
@@ -357,7 +360,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   BitVector SlotIsReg(NumObjs);
   BitVector UsedColors(NumObjs);
 
-  DOUT << "Color spill slot intervals:\n";
+  DEBUG(errs() << "Color spill slot intervals:\n");
   bool Changed = false;
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
@@ -371,7 +374,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
     Changed |= (SS != NewSS);
   }
 
-  DOUT << "\nSpill slots after coloring:\n";
+  DEBUG(errs() << "\nSpill slots after coloring:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -383,7 +386,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 #ifndef NDEBUG
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
     DEBUG(SSIntervals[i]->dump());
-  DOUT << '\n';
+  DEBUG(errs() << '\n');
 #endif
 
   // Can we "color" a stack slot with a unused register?
@@ -415,7 +418,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 
   // Delete unused stack slots.
   while (NextColor != -1) {
-    DOUT << "Removing unused stack object fi#" << NextColor << "\n";
+    DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n");
     MFI->RemoveStackObject(NextColor);
     NextColor = AllColors.find_next(NextColor);
   }
@@ -449,6 +452,7 @@ bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
 /// to old frame index with new one.
 void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
                                            int NewFI, MachineFunction &MF) {
+  // Update the operands.
   for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isFI())
@@ -459,22 +463,15 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
     MO.setIndex(NewFI);
   }
 
-  // Update the MachineMemOperand for the new memory location.
-  // FIXME: We need a better method of managing these too.
-  SmallVector<MachineMemOperand, 2> MMOs(MI->memoperands_begin(),
-                                         MI->memoperands_end());
-  MI->clearMemOperands(MF);
+  // Update the memory references. This changes the MachineMemOperands
+  // directly. They may be in use by multiple instructions, however all
+  // instructions using OldFI are being rewritten to use NewFI.
   const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
-  for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) {
-    if (MMOs[i].getValue() != OldSV)
-      MI->addMemOperand(MF, MMOs[i]);
-    else {
-      MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI),
-                            MMOs[i].getFlags(), MMOs[i].getOffset(),
-                            MMOs[i].getSize(),  MMOs[i].getAlignment());
-      MI->addMemOperand(MF, MMO);
-    }
-  }
+  const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I)
+    if ((*I)->getValue() == OldSV)
+      (*I)->setValue(NewSV);
 }
 
 /// PropagateBackward - Traverse backward and look for the definition of
@@ -503,7 +500,16 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
       if (Reg == OldReg) {
         if (MO.isImplicit())
           return false;
-        const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+
+        // Abort the use is actually a sub-register def. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg() ||
+            TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+            TID.getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+            TID.getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+          return false;
+
+        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
         if (RC && !RC->contains(NewReg))
           return false;
 
@@ -547,7 +553,6 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
 
   SmallVector<MachineOperand*, 4> Uses;
   while (++MII != MBB->end()) {
-    bool FoundUse = false;
     bool FoundKill = false;
     const TargetInstrDesc &TID = MII->getDesc();
     for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
@@ -561,12 +566,18 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
         if (MO.isDef() || MO.isImplicit())
           return false;
 
-        const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+        // Abort the use is actually a sub-register use. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg() ||
+            TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+          return false;
+
+        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
         if (RC && !RC->contains(NewReg))
           return false;
-        FoundUse = true;
         if (MO.isKill())
           FoundKill = true;
+
         Uses.push_back(&MO);
       } else if (TRI->regsOverlap(Reg, NewReg) ||
                  TRI->regsOverlap(Reg, OldReg))
@@ -593,7 +604,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
   MachineBasicBlock *MBB = MI->getParent();
   if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
     if (PropagateForward(MI, MBB, DstReg, Reg)) {
-      DOUT << "Eliminated load: ";
+      DEBUG(errs() << "Eliminated load: ");
       DEBUG(MI->dump());
       ++NumLoadElim;
     } else {
@@ -609,7 +620,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
     }
   } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
     if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
-      DOUT << "Eliminated store: ";
+      DEBUG(errs() << "Eliminated store: ");
       DEBUG(MI->dump());
       ++NumStoreElim;
     } else {
@@ -687,7 +698,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 
 
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "********** Stack Slot Coloring **********\n";
+  DEBUG(errs() << "********** Stack Slot Coloring **********\n");
 
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo(); 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index ca9952863b7c..48d6dc1db4c6 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -71,6 +71,7 @@ namespace {
     bool runOnMachineFunction(MachineFunction &Fn);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<LiveIntervals>();
       
@@ -294,7 +295,7 @@ StrongPHIElimination::computeDomForest(
 static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
                      LiveIntervals& LI) {
   LiveInterval& I = LI.getOrCreateInterval(r);
-  unsigned idx = LI.getMBBStartIdx(MBB);
+  LiveIndex idx = LI.getMBBStartIdx(MBB);
   return I.liveAt(idx);
 }
 
@@ -427,7 +428,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     }
 
     LiveInterval& PI = LI.getOrCreateInterval(DestReg);
-    unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
+    LiveIndex pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
     VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
     PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
 
@@ -553,8 +554,8 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     // Add the renaming set for this PHI node to our overall renaming information
     for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
          QE = PHIUnion.end(); QI != QE; ++QI) {
-      DOUT << "Adding Renaming: " << QI->first << " -> "
-           << P->getOperand(0).getReg() << "\n";
+      DEBUG(errs() << "Adding Renaming: " << QI->first << " -> "
+                   << P->getOperand(0).getReg() << "\n");
     }
     
     RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
@@ -696,7 +697,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
         TII->copyRegToReg(*PI->getParent(), PI, t,
                           curr.second, RC, RC);
         
-        DOUT << "Inserted copy from " << curr.second << " to " << t << "\n";
+        DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t
+                     << "\n");
         
         // Push temporary on Stacks
         Stacks[curr.second].push_back(t);
@@ -712,8 +714,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
                         map[curr.first], RC, RC);
       map[curr.first] = curr.second;
-      DOUT << "Inserted copy from " << curr.first << " to "
-           << curr.second << "\n";
+      DEBUG(errs() << "Inserted copy from " << curr.first << " to "
+                   << curr.second << "\n");
       
       // Push this copy onto InsertedPHICopies so we can
       // update LiveIntervals with it.
@@ -746,7 +748,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       
       LiveInterval& I = LI.getInterval(curr.second);
       MachineBasicBlock::iterator term = MBB->getFirstTerminator();
-      unsigned endIdx = 0;
+      LiveIndex endIdx = LiveIndex();
       if (term != MBB->end())
         endIdx = LI.getInstructionIndex(term);
       else
@@ -782,16 +784,15 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
        InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
     if (RegHandled.insert(I->first).second) {
       LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      unsigned instrIdx = LI.getInstructionIndex(I->second);
-      if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
-        Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
-                        LI.getMBBEndIdx(I->second->getParent())+1,
+      LiveIndex instrIdx = LI.getInstructionIndex(I->second);
+      if (Int.liveAt(LI.getDefIndex(instrIdx)))
+        Int.removeRange(LI.getDefIndex(instrIdx),
+                        LI.getNextSlot(LI.getMBBEndIdx(I->second->getParent())),
                         true);
       
       LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
-      R.valno->copy = I->second;
-      R.valno->def =
-                  LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second));
+      R.valno->setCopy(I->second);
+      R.valno->def = LI.getDefIndex(LI.getInstructionIndex(I->second));
     }
   }
 }
@@ -817,7 +818,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         // Remove the live range for the old vreg.
         LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
         LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining(
-                  LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+                  LI.getUseIndex(LI.getInstructionIndex(I)));
         if (OldLR != OldInt.end())
           OldInt.removeRange(*OldLR, true);
         
@@ -829,11 +830,11 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         VNInfo* FirstVN = *Int.vni_begin();
         FirstVN->setHasPHIKill(false);
         if (I->getOperand(i).isKill())
-          FirstVN->kills.push_back(
-                         LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+          FirstVN->addKill(
+                 LI.getUseIndex(LI.getInstructionIndex(I)));
         
         LiveRange LR (LI.getMBBStartIdx(I->getParent()),
-                      LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1,
+                      LI.getNextSlot(LI.getUseIndex(LI.getInstructionIndex(I))),
                       FirstVN);
         
         Int.addRange(LR);
@@ -868,8 +869,8 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
   for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
     LiveRange R = *I;
  
-    unsigned Start = R.start;
-    unsigned End = R.end;
+    LiveIndex Start = R.start;
+    LiveIndex End = R.end;
     if (LHS.getLiveRangeContaining(Start))
       return false;
     
@@ -927,7 +928,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         unsigned reg = OI->first;
         ++OI;
         I->second.erase(reg);
-        DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n";
+        DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first
+                     << "\n");
       }
     }
   }
@@ -944,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
     while (I->second.size()) {
       std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
       
-      DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n";
+      DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
       
       if (SI->first != I->first) {
         if (mergeLiveIntervals(I->first, SI->first)) {
@@ -965,19 +967,19 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
           LI.computeNumbering();
           
           LiveInterval& Int = LI.getOrCreateInterval(I->first);
-          unsigned instrIdx =
+          LiveIndex instrIdx =
                      LI.getInstructionIndex(--SI->second->getFirstTerminator());
-          if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
-            Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
-                            LI.getMBBEndIdx(SI->second)+1, true);
+          if (Int.liveAt(LI.getDefIndex(instrIdx)))
+            Int.removeRange(LI.getDefIndex(instrIdx),
+                            LI.getNextSlot(LI.getMBBEndIdx(SI->second)), true);
 
           LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
                                             --SI->second->getFirstTerminator());
-          R.valno->copy = --SI->second->getFirstTerminator();
-          R.valno->def = LiveIntervals::getDefIndex(instrIdx);
+          R.valno->setCopy(--SI->second->getFirstTerminator());
+          R.valno->def = LI.getDefIndex(instrIdx);
           
-          DOUT << "Renaming failed: " << SI->first << " -> "
-               << I->first << "\n";
+          DEBUG(errs() << "Renaming failed: " << SI->first << " -> "
+                       << I->first << "\n");
         }
       }
       
@@ -1009,7 +1011,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       if (PI.containsOneValue()) {
         LI.removeInterval(DestReg);
       } else {
-        unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+        LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
         PI.removeRange(*PI.getLiveRangeContaining(idx), true);
       }
     } else {
@@ -1023,8 +1025,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         LiveInterval& InputI = LI.getInterval(reg);
         if (MBB != PInstr->getParent() &&
             InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
-            InputI.expiredAt(LI.getInstructionIndex(PInstr) + 
-                             LiveInterval::InstrSlots::NUM))
+            InputI.expiredAt(LI.getNextIndex(LI.getInstructionIndex(PInstr))))
           InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
                              LI.getInstructionIndex(PInstr),
                              true);
@@ -1032,7 +1033,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       
       // If the PHI is not dead, then the valno defined by the PHI
       // now has an unknown def.
-      unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+      LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
       const LiveRange* PLR = PI.getLiveRangeContaining(idx);
       PLR->valno->setIsPHIDef(true);
       LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index b7595990de74..c646869e8a73 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -13,21 +13,35 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // commuteInstruction - The default implementation of this method just exchanges
-// operand 1 and 2.
+// the two operands returned by findCommutedOpIndices.
 MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
                                                       bool NewMI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   bool HasDef = TID.getNumDefs();
-  unsigned Idx1 = HasDef ? 1 : 0;
-  unsigned Idx2 = HasDef ? 2 : 1;
+  if (HasDef && !MI->getOperand(0).isReg())
+    // No idea how to commute this instruction. Target should implement its own.
+    return 0;
+  unsigned Idx1, Idx2;
+  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Don't know how to commute: " << *MI;
+    llvm_report_error(Msg.str());
+  }
 
   assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
          "This only knows how to commute register operands so far");
@@ -70,26 +84,24 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
   return MI;
 }
 
-/// CommuteChangesDestination - Return true if commuting the specified
-/// instruction will also changes the destination operand. Also return the
-/// current operand index of the would be new destination register by
-/// reference. This can happen when the commutable instruction is also a
-/// two-address instruction.
-bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI,
-                                                    unsigned &OpIdx) const{
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
+                                                unsigned &SrcOpIdx1,
+                                                unsigned &SrcOpIdx2) const {
   const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getNumDefs())
+  if (!TID.isCommutable())
     return false;
-  assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
-         "This only knows how to commute register operands so far");
-  if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
-    // Must be two address instruction!
-    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
-           "Expecting a two-address instruction!");
-    OpIdx = 2;
-    return true;
-  }
-  return false;
+  // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+  // is not true, then the target must implement this.
+  SrcOpIdx1 = TID.getNumDefs();
+  SrcOpIdx2 = SrcOpIdx1 + 1;
+  if (!MI->getOperand(SrcOpIdx1).isReg() ||
+      !MI->getOperand(SrcOpIdx2).isReg())
+    // No idea.
+    return false;
+  return true;
 }
 
 
@@ -122,9 +134,12 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
 void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator I,
                                         unsigned DestReg,
+                                        unsigned SubIdx,
                                         const MachineInstr *Orig) const {
   MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
-  MI->getOperand(0).setReg(DestReg);
+  MachineOperand &MO = MI->getOperand(0);
+  MO.setReg(DestReg);
+  MO.setSubReg(SubIdx);
   MBB.insert(I, MI);
 }
 
@@ -171,11 +186,11 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
          "Folded a use to a non-load!");
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   assert(MFI.getObjectOffset(FrameIndex) != -1);
-  MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex),
-                        Flags,
-                        MFI.getObjectOffset(FrameIndex),
-                        MFI.getObjectSize(FrameIndex),
-                        MFI.getObjectAlignment(FrameIndex));
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex),
+                            Flags, /*Offset=*/0,
+                            MFI.getObjectSize(FrameIndex),
+                            MFI.getObjectAlignment(FrameIndex));
   NewMI->addMemOperand(MF, MMO);
 
   return NewMI;
@@ -200,9 +215,93 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
   if (!NewMI) return 0;
 
   // Copy the memoperands from the load to the folded instruction.
-  for (std::list<MachineMemOperand>::iterator I = LoadMI->memoperands_begin(),
-       E = LoadMI->memoperands_end(); I != E; ++I)
-    NewMI->addMemOperand(MF, *I);
+  NewMI->setMemRefs(LoadMI->memoperands_begin(),
+                    LoadMI->memoperands_end());
 
   return NewMI;
 }
+
+bool
+TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
+                                                            MI,
+                                                          AliasAnalysis *
+                                                            AA) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  // A load from a fixed stack slot can be rematerialized. This may be
+  // redundant with subsequent checks, but it's target-independent,
+  // simple, and a common case.
+  int FrameIdx = 0;
+  if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+    return true;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+
+  // Avoid instructions obviously unsafe for remat.
+  if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
+      TID.mayStore())
+    return false;
+
+  // Avoid instructions which load from potentially varying memory.
+  if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+    return false;
+
+  // If any of the registers accessed are non-constant, conservatively assume
+  // the instruction is not rematerializable.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // Check for a well-behaved physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI.def_empty(Reg))
+          return false;
+        BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
+        if (AllocatableRegs.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI.def_empty(AliasReg))
+            return false;
+          if (AllocatableRegs.test(AliasReg))
+            return false;
+        }
+      } else {
+        // A physreg def. We can't remat it.
+        return false;
+      }
+      continue;
+    }
+
+    // Only allow one virtual-register def, and that in the first operand.
+    if (MO.isDef() != (i == 0))
+      return false;
+
+    // For the def, it should be the only def of that register.
+    if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() ||
+                       MRI.isLiveIn(Reg)))
+      return false;
+
+    // Don't allow any virtual-register uses. Rematting an instruction with
+    // virtual register uses would length the live ranges of the uses, which
+    // is not necessarily a good idea, certainly not "trivial".
+    if (MO.isUse())
+      return false;
+  }
+
+  // Everything checked out.
+  return true;
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 3c404046f15e..a5a0f5bdcc22 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -62,6 +63,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo *MRI;
     LiveVariables *LV;
+    AliasAnalysis *AA;
 
     // DistanceMap - Keep track the distance of a MI from the start of the
     // current basic block.
@@ -106,13 +108,31 @@ namespace {
                             MachineFunction::iterator &mbbi,
                             unsigned RegB, unsigned Dist);
 
+    typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
+    bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                               SmallVector<NewKill, 4> &NewKills,
+                               MachineBasicBlock *MBB, unsigned Dist);
+    bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                           MachineBasicBlock::iterator &nmi,
+                           MachineFunction::iterator &mbbi,
+                           unsigned regB, unsigned regBIdx, unsigned Dist);
+
+    bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                                 MachineBasicBlock::iterator &nmi,
+                                 MachineFunction::iterator &mbbi,
+                                 unsigned SrcIdx, unsigned DstIdx,
+                                 unsigned Dist);
+
     void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
                      SmallPtrSet<MachineInstr*, 8> &Processed);
+
   public:
     static char ID; // Pass identification, replacement for typeid
     TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
       AU.addPreserved<LiveVariables>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
@@ -143,7 +163,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
                                            MachineBasicBlock::iterator OldPos) {
   // Check if it's safe to move this instruction.
   bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(TII, SeenStore))
+  if (!MI->isSafeToMove(TII, SeenStore, AA))
     return false;
 
   unsigned DefReg = 0;
@@ -556,15 +576,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
                                MachineFunction::iterator &mbbi,
                                unsigned RegB, unsigned RegC, unsigned Dist) {
   MachineInstr *MI = mi;
-  DOUT << "2addr: COMMUTING  : " << *MI;
+  DEBUG(errs() << "2addr: COMMUTING  : " << *MI);
   MachineInstr *NewMI = TII->commuteInstruction(MI);
 
   if (NewMI == 0) {
-    DOUT << "2addr: COMMUTING FAILED!\n";
+    DEBUG(errs() << "2addr: COMMUTING FAILED!\n");
     return false;
   }
 
-  DOUT << "2addr: COMMUTED TO: " << *NewMI;
+  DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI);
   // If the instruction changed to commute it, update livevar.
   if (NewMI != MI) {
     if (LV)
@@ -611,8 +631,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               unsigned RegB, unsigned Dist) {
   MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
   if (NewMI) {
-    DOUT << "2addr: CONVERTING 2-ADDR: " << *mi;
-    DOUT << "2addr:         TO 3-ADDR: " << *NewMI;
+    DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+    DEBUG(errs() << "2addr:         TO 3-ADDR: " << *NewMI);
     bool Sunk = false;
 
     if (NewMI->findRegisterUseOperand(RegB, false, TRI))
@@ -734,25 +754,174 @@ static bool isSafeToDelete(MachineInstr *MI, unsigned Reg,
   return true;
 }
 
+/// canUpdateDeletedKills - Check if all the registers listed in Kills are
+/// killed by instructions in MBB preceding the current instruction at
+/// position Dist.  If so, return true and record information about the
+/// preceding kills in NewKills.
+bool TwoAddressInstructionPass::
+canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                      SmallVector<NewKill, 4> &NewKills,
+                      MachineBasicBlock *MBB, unsigned Dist) {
+  while (!Kills.empty()) {
+    unsigned Kill = Kills.back();
+    Kills.pop_back();
+    if (TargetRegisterInfo::isPhysicalRegister(Kill))
+      return false;
+
+    MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
+    if (!LastKill)
+      return false;
+
+    bool isModRef = LastKill->modifiesRegister(Kill);
+    NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
+                                      LastKill));
+  }
+  return true;
+}
+
+/// DeleteUnusedInstr - If an instruction with a tied register operand can
+/// be safely deleted, just delete it.
+bool
+TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                                             MachineBasicBlock::iterator &nmi,
+                                             MachineFunction::iterator &mbbi,
+                                             unsigned regB, unsigned regBIdx,
+                                             unsigned Dist) {
+  // Check if the instruction has no side effects and if all its defs are dead.
+  SmallVector<unsigned, 4> Kills;
+  if (!isSafeToDelete(mi, regB, TII, Kills))
+    return false;
+
+  // If this instruction kills some virtual registers, we need to
+  // update the kill information. If it's not possible to do so,
+  // then bail out.
+  SmallVector<NewKill, 4> NewKills;
+  if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
+    return false;
+
+  if (LV) {
+    while (!NewKills.empty()) {
+      MachineInstr *NewKill = NewKills.back().second;
+      unsigned Kill = NewKills.back().first.first;
+      bool isDead = NewKills.back().first.second;
+      NewKills.pop_back();
+      if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+        if (isDead)
+          LV->addVirtualRegisterDead(Kill, NewKill);
+        else
+          LV->addVirtualRegisterKilled(Kill, NewKill);
+      }
+    }
+
+    // If regB was marked as a kill, update its Kills list.
+    if (mi->getOperand(regBIdx).isKill())
+      LV->removeVirtualRegisterKilled(regB, mi);
+  }
+
+  mbbi->erase(mi); // Nuke the old inst.
+  mi = nmi;
+  return true;
+}
+
+/// TryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy.  Returns true if the tied operands
+/// are eliminated altogether.
+bool TwoAddressInstructionPass::
+TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                        MachineBasicBlock::iterator &nmi,
+                        MachineFunction::iterator &mbbi,
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
+  const TargetInstrDesc &TID = mi->getDesc();
+  unsigned regA = mi->getOperand(DstIdx).getReg();
+  unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+         "cannot make instruction into two-address form");
+
+  // If regA is dead and the instruction can be deleted, just delete
+  // it so it doesn't clobber regB.
+  bool regBKilled = isKilled(*mi, regB, MRI, TII);
+  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+      DeleteUnusedInstr(mi, nmi, mbbi, regB, SrcIdx, Dist)) {
+    ++NumDeletes;
+    return true; // Done with this instruction.
+  }
+
+  // Check if it is profitable to commute the operands.
+  unsigned SrcOp1, SrcOp2;
+  unsigned regC = 0;
+  unsigned regCIdx = ~0U;
+  bool TryCommute = false;
+  bool AggressiveCommute = false;
+  if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+    if (SrcIdx == SrcOp1)
+      regCIdx = SrcOp2;
+    else if (SrcIdx == SrcOp2)
+      regCIdx = SrcOp1;
+
+    if (regCIdx != ~0U) {
+      regC = mi->getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+        // If C dies but B does not, swap the B and C operands.
+        // This makes the live ranges of A and C joinable.
+        TryCommute = true;
+      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+        TryCommute = true;
+        AggressiveCommute = true;
+      }
+    }
+  }
+
+  // If it's profitable to commute, try to do so.
+  if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+    ++NumCommuted;
+    if (AggressiveCommute)
+      ++NumAggrCommuted;
+    return false;
+  }
+
+  if (TID.isConvertibleTo3Addr()) {
+    // This instruction is potentially convertible to a true
+    // three-address instruction.  Check if it is profitable.
+    if (!regBKilled || isProfitableToConv3Addr(regA)) {
+      // Try to convert it.
+      if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+        ++NumConvertedTo3Addr;
+        return true; // Done with this instruction.
+      }
+    }
+  }
+  return false;
+}
+
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DOUT << "Machine Function\n";
+  DEBUG(errs() << "Machine Function\n");
   const TargetMachine &TM = MF.getTarget();
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
   LV = getAnalysisIfAvailable<LiveVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
 
   bool MadeChange = false;
 
-  DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n";
-  DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+  DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+  DEBUG(errs() << "********** Function: " 
+        << MF.getFunction()->getName() << '\n');
 
   // ReMatRegs - Keep track of the registers whose def's are remat'ed.
   BitVector ReMatRegs;
   ReMatRegs.resize(MRI->getLastVirtReg()+1);
 
+  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
+    TiedOperandMap;
+  TiedOperandMap TiedOperands(4);
+
   SmallPtrSet<MachineInstr*, 8> Processed;
   for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
        mbbi != mbbe; ++mbbi) {
@@ -771,175 +940,102 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
       ProcessCopy(&*mi, &*mbbi, Processed);
 
+      // First scan through all the tied register uses in this instruction
+      // and record a list of pairs of tied operands for each register.
       unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
         ? mi->getNumOperands() : TID.getNumOperands();
-      for (unsigned si = 0; si < NumOps; ++si) {
-        unsigned ti = 0;
-        if (!mi->isRegTiedToDefOperand(si, &ti))
+      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+        unsigned DstIdx = 0;
+        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
           continue;
 
         if (FirstTied) {
+          FirstTied = false;
           ++NumTwoAddressInstrs;
-          DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM));
+          DEBUG(errs() << '\t' << *mi);
         }
 
-        FirstTied = false;
+        assert(mi->getOperand(SrcIdx).isReg() &&
+               mi->getOperand(SrcIdx).getReg() &&
+               mi->getOperand(SrcIdx).isUse() &&
+               "two address instruction invalid");
 
-        assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() &&
-               mi->getOperand(si).isUse() && "two address instruction invalid");
+        unsigned regB = mi->getOperand(SrcIdx).getReg();
+        TiedOperandMap::iterator OI = TiedOperands.find(regB);
+        if (OI == TiedOperands.end()) {
+          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
+          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
+        }
+        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+      }
 
-        // If the two operands are the same we just remove the use
-        // and mark the def as def&use, otherwise we have to insert a copy.
-        if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) {
-          // Rewrite:
-          //     a = b op c
-          // to:
-          //     a = b
-          //     a = a op c
-          unsigned regA = mi->getOperand(ti).getReg();
-          unsigned regB = mi->getOperand(si).getReg();
+      // Now iterate over the information collected above.
+      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+             OE = TiedOperands.end(); OI != OE; ++OI) {
+        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
+
+        // If the instruction has a single pair of tied operands, try some
+        // transformations that may either eliminate the tied operands or
+        // improve the opportunities for coalescing away the register copy.
+        if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+          unsigned SrcIdx = TiedPairs[0].first;
+          unsigned DstIdx = TiedPairs[0].second;
+
+          // If the registers are already equal, nothing needs to be done.
+          if (mi->getOperand(SrcIdx).getReg() ==
+              mi->getOperand(DstIdx).getReg())
+            break; // Done with this instruction.
+
+          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
+            break; // The tied operands have been eliminated.
+        }
+
+        bool RemovedKillFlag = false;
+        bool AllUsesCopied = true;
+        unsigned LastCopiedReg = 0;
+        unsigned regB = OI->first;
+        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+          unsigned SrcIdx = TiedPairs[tpi].first;
+          unsigned DstIdx = TiedPairs[tpi].second;
+          unsigned regA = mi->getOperand(DstIdx).getReg();
+          // Grab regB from the instruction because it may have changed if the
+          // instruction was commuted.
+          regB = mi->getOperand(SrcIdx).getReg();
+
+          if (regA == regB) {
+            // The register is tied to multiple destinations (or else we would
+            // not have continued this far), but this use of the register
+            // already matches the tied destination.  Leave it.
+            AllUsesCopied = false;
+            continue;
+          }
+          LastCopiedReg = regA;
 
           assert(TargetRegisterInfo::isVirtualRegister(regB) &&
-                 "cannot update physical register live information");
+                 "cannot make instruction into two-address form");
 
 #ifndef NDEBUG
-          // First, verify that we don't have a use of a in the instruction (a =
-          // b + a for example) because our transformation will not work. This
-          // should never occur because we are in SSA form.
+          // First, verify that we don't have a use of "a" in the instruction
+          // (a = b + a for example) because our transformation will not
+          // work. This should never occur because we are in SSA form.
           for (unsigned i = 0; i != mi->getNumOperands(); ++i)
-            assert(i == ti ||
+            assert(i == DstIdx ||
                    !mi->getOperand(i).isReg() ||
                    mi->getOperand(i).getReg() != regA);
 #endif
 
-          // If this instruction is not the killing user of B, see if we can
-          // rearrange the code to make it so.  Making it the killing user will
-          // allow us to coalesce A and B together, eliminating the copy we are
-          // about to insert.
-          if (!isKilled(*mi, regB, MRI, TII)) {
-            // If regA is dead and the instruction can be deleted, just delete
-            // it so it doesn't clobber regB.
-            SmallVector<unsigned, 4> Kills;
-            if (mi->getOperand(ti).isDead() &&
-                isSafeToDelete(mi, regB, TII, Kills)) {
-              SmallVector<std::pair<std::pair<unsigned, bool>
-                ,MachineInstr*>, 4> NewKills;
-              bool ReallySafe = true;
-              // If this instruction kills some virtual registers, we need
-              // update the kill information. If it's not possible to do so,
-              // then bail out.
-              while (!Kills.empty()) {
-                unsigned Kill = Kills.back();
-                Kills.pop_back();
-                if (TargetRegisterInfo::isPhysicalRegister(Kill)) {
-                  ReallySafe = false;
-                  break;
-                }
-                MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist);
-                if (LastKill) {
-                  bool isModRef = LastKill->modifiesRegister(Kill);
-                  NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef),
-                                                    LastKill));
-                } else {
-                  ReallySafe = false;
-                  break;
-                }
-              }
-
-              if (ReallySafe) {
-                if (LV) {
-                  while (!NewKills.empty()) {
-                    MachineInstr *NewKill = NewKills.back().second;
-                    unsigned Kill = NewKills.back().first.first;
-                    bool isDead = NewKills.back().first.second;
-                    NewKills.pop_back();
-                    if (LV->removeVirtualRegisterKilled(Kill,  mi)) {
-                      if (isDead)
-                        LV->addVirtualRegisterDead(Kill, NewKill);
-                      else
-                        LV->addVirtualRegisterKilled(Kill, NewKill);
-                    }
-                  }
-                }
-
-                // We're really going to nuke the old inst. If regB was marked
-                // as a kill we need to update its Kills list.
-                if (mi->getOperand(si).isKill())
-                  LV->removeVirtualRegisterKilled(regB, mi);
-
-                mbbi->erase(mi); // Nuke the old inst.
-                mi = nmi;
-                ++NumDeletes;
-                break; // Done with this instruction.
-              }
-            }
-
-            // If this instruction is commutative, check to see if C dies.  If
-            // so, swap the B and C operands.  This makes the live ranges of A
-            // and C joinable.
-            // FIXME: This code also works for A := B op C instructions.
-            if (TID.isCommutable() && mi->getNumOperands() >= 3) {
-              assert(mi->getOperand(3-si).isReg() &&
-                     "Not a proper commutative instruction!");
-              unsigned regC = mi->getOperand(3-si).getReg();
-              if (isKilled(*mi, regC, MRI, TII)) {
-                if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
-                  ++NumCommuted;
-                  regB = regC;
-                  goto InstructionRearranged;
-                }
-              }
-            }
-
-            // If this instruction is potentially convertible to a true
-            // three-address instruction,
-            if (TID.isConvertibleTo3Addr()) {
-              // FIXME: This assumes there are no more operands which are tied
-              // to another register.
-#ifndef NDEBUG
-              for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i)
-                assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1);
-#endif
-
-              if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
-                ++NumConvertedTo3Addr;
-                break; // Done with this instruction.
-              }
-            }
-          }
-
-          // If it's profitable to commute the instruction, do so.
-          if (TID.isCommutable() && mi->getNumOperands() >= 3) {
-            unsigned regC = mi->getOperand(3-si).getReg();
-            if (isProfitableToCommute(regB, regC, mi, mbbi, Dist))
-              if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
-                ++NumAggrCommuted;
-                ++NumCommuted;
-                regB = regC;
-                goto InstructionRearranged;
-              }
-          }
-
-          // If it's profitable to convert the 2-address instruction to a
-          // 3-address one, do so.
-          if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) {
-            if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
-              ++NumConvertedTo3Addr;
-              break; // Done with this instruction.
-            }
-          }
-
-        InstructionRearranged:
-          const TargetRegisterClass* rc = MRI->getRegClass(regB);
+          // Emit a copy or rematerialize the definition.
+          const TargetRegisterClass *rc = MRI->getRegClass(regB);
           MachineInstr *DefMI = MRI->getVRegDef(regB);
           // If it's safe and profitable, remat the definition instead of
           // copying it.
           if (DefMI &&
               DefMI->getDesc().isAsCheapAsAMove() &&
-              DefMI->isSafeToReMat(TII, regB) &&
+              DefMI->isSafeToReMat(TII, regB, AA) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
-            DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n");
-            TII->reMaterialize(*mbbi, mi, regA, DefMI);
+            DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
+            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
+            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI);
             ReMatRegs.set(regB);
             ++NumReMats;
           } else {
@@ -953,32 +1049,57 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           DistanceMap.insert(std::make_pair(prevMI, Dist));
           DistanceMap[mi] = ++Dist;
 
-          // Update live variables for regB.
-          if (LV) {
-            if (LV->removeVirtualRegisterKilled(regB,  mi))
-              LV->addVirtualRegisterKilled(regB, prevMI);
+          DEBUG(errs() << "\t\tprepend:\t" << *prevMI);
 
-            if (LV->removeVirtualRegisterDead(regB, mi))
-              LV->addVirtualRegisterDead(regB, prevMI);
+          MachineOperand &MO = mi->getOperand(SrcIdx);
+          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
+                 "inconsistent operand info for 2-reg pass");
+          if (MO.isKill()) {
+            MO.setIsKill(false);
+            RemovedKillFlag = true;
           }
+          MO.setReg(regA);
+        }
 
-          DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM));
-          
-          // Replace all occurences of regB with regA.
+        if (AllUsesCopied) {
+          // Replace other (un-tied) uses of regB with LastCopiedReg.
           for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-            if (mi->getOperand(i).isReg() &&
-                mi->getOperand(i).getReg() == regB)
-              mi->getOperand(i).setReg(regA);
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              if (MO.isKill()) {
+                MO.setIsKill(false);
+                RemovedKillFlag = true;
+              }
+              MO.setReg(LastCopiedReg);
+            }
           }
-        }
 
-        assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse());
-        mi->getOperand(ti).setReg(mi->getOperand(si).getReg());
+          // Update live variables for regB.
+          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
+            LV->addVirtualRegisterKilled(regB, prior(mi));
+
+        } else if (RemovedKillFlag) {
+          // Some tied uses of regB matched their destination registers, so
+          // regB is still used in this instruction, but a kill flag was
+          // removed from a different tied use of regB, so now we need to add
+          // a kill flag to one of the remaining uses of regB.
+          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              MO.setIsKill(true);
+              break;
+            }
+          }
+        }
+          
         MadeChange = true;
 
-        DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM));
+        DEBUG(errs() << "\t\trewrite to:\t" << *mi);
       }
 
+      // Clear TiedOperands here instead of at the top of the loop
+      // since most instructions do not have tied operands.
+      TiedOperands.clear();
       mi = nmi;
     }
   }
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index c3b213cebe95..e7c34129268e 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,8 +26,11 @@
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
@@ -42,6 +45,10 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     UnreachableBlockElim() : FunctionPass(&ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<ProfileInfo>();
+    }
   };
 }
 char UnreachableBlockElim::ID = 0;
@@ -77,8 +84,11 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
     }
 
   // Actually remove the blocks now.
-  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+  ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+    if (PI) PI->removeBlock(DeadBlocks[i]);
     DeadBlocks[i]->eraseFromParent();
+  }
 
   return DeadBlocks.size();
 }
@@ -88,6 +98,7 @@ namespace {
   class VISIBILITY_HIDDEN UnreachableMachineBlockElim :
         public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     MachineModuleInfo *MMI;
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -102,10 +113,18 @@ Y("unreachable-mbb-elimination",
 
 const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
 
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
 bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
   SmallPtrSet<MachineBasicBlock*, 8> Reachable;
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+  MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
 
   // Mark all reachable blocks.
   for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
@@ -123,6 +142,10 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
     if (!Reachable.count(BB)) {
       DeadBlocks.push_back(BB);
 
+      // Update dominator and loop info.
+      if (MLI) MLI->removeBlock(BB);
+      if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
       while (BB->succ_begin() != BB->succ_end()) {
         MachineBasicBlock* succ = *BB->succ_begin();
 
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 4d3417fdff51..c78f35bdb136 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -258,7 +259,7 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   return AnyUnused;
 }
 
-void VirtRegMap::print(std::ostream &OS, const Module* M) const {
+void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
   const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
 
   OS << "********** REGISTER MAP **********\n";
@@ -277,5 +278,5 @@ void VirtRegMap::print(std::ostream &OS, const Module* M) const {
 }
 
 void VirtRegMap::dump() const {
-  print(cerr);
+  print(errs());
 }
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index fe767b7671e1..bdc2d1f71276 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -18,13 +18,13 @@
 #define LLVM_CODEGEN_VIRTREGMAP_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Streams.h"
 #include <map>
 
 namespace llvm {
@@ -34,6 +34,7 @@ namespace llvm {
   class MachineRegisterInfo;
   class TargetInstrInfo;
   class TargetRegisterInfo;
+  class raw_ostream;
 
   class VirtRegMap : public MachineFunctionPass {
   public:
@@ -79,7 +80,7 @@ namespace llvm {
 
     /// Virt2SplitKillMap - This is splitted virtual register to its last use
     /// (kill) index mapping.
-    IndexedMap<unsigned> Virt2SplitKillMap;
+    IndexedMap<LiveIndex> Virt2SplitKillMap;
 
     /// ReMatMap - This is virtual register to re-materialized instruction
     /// mapping. Each virtual register whose definition is going to be
@@ -141,7 +142,7 @@ namespace llvm {
     VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
                    Virt2StackSlotMap(NO_STACK_SLOT), 
                    Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
-                   Virt2SplitKillMap(0), ReMatMap(NULL),
+                   Virt2SplitKillMap(LiveIndex()), ReMatMap(NULL),
                    ReMatId(MAX_STACK_SLOT+1),
                    LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
     virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -265,17 +266,17 @@ namespace llvm {
     }
 
     /// @brief record the last use (kill) of a split virtual register.
-    void addKillPoint(unsigned virtReg, unsigned index) {
+    void addKillPoint(unsigned virtReg, LiveIndex index) {
       Virt2SplitKillMap[virtReg] = index;
     }
 
-    unsigned getKillPoint(unsigned virtReg) const {
+    LiveIndex getKillPoint(unsigned virtReg) const {
       return Virt2SplitKillMap[virtReg];
     }
 
     /// @brief remove the last use (kill) of a split virtual register.
     void removeKillPoint(unsigned virtReg) {
-      Virt2SplitKillMap[virtReg] = 0;
+      Virt2SplitKillMap[virtReg] = LiveIndex();
     }
 
     /// @brief returns true if the specified MachineInstr is a spill point.
@@ -481,16 +482,11 @@ namespace llvm {
       return 0;
     }
 
-    void print(std::ostream &OS, const Module* M = 0) const;
-    void print(std::ostream *OS) const { if (OS) print(*OS); }
+    void print(raw_ostream &OS, const Module* M = 0) const;
     void dump() const;
   };
 
-  inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) {
-    VRM.print(OS);
-    return OS;
-  }
-  inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) {
+  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
     VRM.print(OS);
     return OS;
   }
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index be0b016b669c..401bcb618e42 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -9,10 +9,19 @@
 
 #define DEBUG_TYPE "virtregrewriter"
 #include "VirtRegRewriter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -45,10 +54,15 @@ RewriterOpt("rewriter",
                        clEnumValEnd),
             cl::init(local));
 
+static cl::opt<bool>
+ScheduleSpills("schedule-spills",
+               cl::desc("Schedule spill code"),
+               cl::init(false));
+
 VirtRegRewriter::~VirtRegRewriter() {}
 
+namespace {
 
- 
 /// This class is intended for use with the new spilling framework only. It
 /// rewrites vreg def/uses to use the assigned preg, but does not insert any
 /// spill code.
@@ -56,8 +70,13 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
 
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
-    DOUT << "********** REWRITE MACHINE CODE **********\n";
-    DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+    DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n");
+    DEBUG(errs() << "********** Function: " 
+          << MF.getFunction()->getName() << '\n');
+    DEBUG(errs() << "**** Machine Instrs"
+          << "(NOTE! Does not include spills and reloads!) ****\n");
+    DEBUG(MF.dump());
+
     MachineRegisterInfo *mri = &MF.getRegInfo();
 
     bool changed = false;
@@ -79,14 +98,22 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
         }
       }
     }
+
+    
+    DEBUG(errs() << "**** Post Machine Instrs ****\n");
+    DEBUG(MF.dump());
     
     return changed;
   }
 
 };
 
+}
+
 // ************************************************************************ //
 
+namespace {
+
 /// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
 /// from top down, keep track of which spill slots or remat are available in
 /// each register.
@@ -154,10 +181,11 @@ public:
                                               (unsigned)CanClobber;
 
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1;
+      DEBUG(errs() << "Remembering RM#"
+                   << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
     else
-      DOUT << "Remembering SS#" << SlotOrReMat;
-    DOUT << " in physreg " << TRI->getName(Reg) << "\n";
+      DEBUG(errs() << "Remembering SS#" << SlotOrReMat);
+    DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n");
   }
 
   /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -209,8 +237,82 @@ public:
                                 std::vector<MachineOperand*> &KillOps);
 };
 
+}
+
 // ************************************************************************ //
 
+// Given a location where a reload of a spilled register or a remat of
+// a constant is to be inserted, attempt to find a safe location to
+// insert the load at an earlier point in the basic-block, to hide
+// latency of the load and to avoid address-generation interlock
+// issues.
+static MachineBasicBlock::iterator
+ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
+                 MachineBasicBlock::iterator const Begin,
+                 unsigned PhysReg,
+                 const TargetRegisterInfo *TRI,
+                 bool DoReMat,
+                 int SSorRMId,
+                 const TargetInstrInfo *TII,
+                 const MachineFunction &MF)
+{
+  if (!ScheduleSpills)
+    return InsertLoc;
+
+  // Spill backscheduling is of primary interest to addresses, so
+  // don't do anything if the register isn't in the register class
+  // used for pointers.
+
+  const TargetLowering *TL = MF.getTarget().getTargetLowering();
+
+  if (!TL->isTypeLegal(TL->getPointerTy()))
+    // Believe it or not, this is true on PIC16.
+    return InsertLoc;
+
+  const TargetRegisterClass *ptrRegClass =
+    TL->getRegClassFor(TL->getPointerTy());
+  if (!ptrRegClass->contains(PhysReg))
+    return InsertLoc;
+
+  // Scan upwards through the preceding instructions. If an instruction doesn't
+  // reference the stack slot or the register we're loading, we can
+  // backschedule the reload up past it.
+  MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
+  while (NewInsertLoc != Begin) {
+    MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
+    for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
+      MachineOperand &Op = Prev->getOperand(i);
+      if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
+        goto stop;
+    }
+    if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
+        Prev->findRegisterDefOperand(PhysReg))
+      goto stop;
+    for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
+      if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
+          Prev->findRegisterDefOperand(*Alias))
+        goto stop;
+    NewInsertLoc = Prev;
+  }
+stop:;
+
+  // If we made it to the beginning of the block, turn around and move back
+  // down just past any existing reloads. They're likely to be reloads/remats
+  // for instructions earlier than what our current reload/remat is for, so
+  // they should be scheduled earlier.
+  if (NewInsertLoc == Begin) {
+    int FrameIdx;
+    while (InsertLoc != NewInsertLoc &&
+           (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
+            TII->isTriviallyReMaterializable(NewInsertLoc)))
+      ++NewInsertLoc;
+  }
+
+  return NewInsertLoc;
+}
+
+namespace {
+
 // ReusedOp - For each reused operand, we keep track of a bit of information,
 // in case we need to rollback upon processing a new operand.  See comments
 // below.
@@ -276,7 +378,8 @@ public:
   /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
   /// is some other operand that is using the specified register, either pick
   /// a new register to use, or evict the previous reload and use this reg. 
-  unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+  unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
+                           MachineFunction &MF, MachineInstr *MI,
                            AvailableSpills &Spills,
                            std::vector<MachineInstr*> &MaybeDeadStores,
                            SmallSet<unsigned, 8> &Rejected,
@@ -295,18 +398,21 @@ public:
   ///       sees r1 is taken by t2, tries t2's reload register r0
   ///       sees r0 is taken by t3, tries t3's reload register r1
   ///       sees r1 is taken by t2, tries t2's reload register r0 ...
-  unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+  unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
                            AvailableSpills &Spills,
                            std::vector<MachineInstr*> &MaybeDeadStores,
                            BitVector &RegKills,
                            std::vector<MachineOperand*> &KillOps,
                            VirtRegMap &VRM) {
     SmallSet<unsigned, 8> Rejected;
-    return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected,
-                           RegKills, KillOps, VRM);
+    MachineFunction &MF = *MI->getParent()->getParent();
+    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+    return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
+                           Rejected, RegKills, KillOps, VRM);
   }
 };
 
+}
 
 // ****************** //
 // Utility Functions  //
@@ -489,7 +595,14 @@ static void ReMaterialize(MachineBasicBlock &MBB,
                           const TargetInstrInfo *TII,
                           const TargetRegisterInfo *TRI,
                           VirtRegMap &VRM) {
-  TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg));
+  MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
+#ifndef NDEBUG
+  const TargetInstrDesc &TID = ReMatDefMI->getDesc();
+  assert(TID.getNumDefs() == 1 &&
+         "Don't know how to remat instructions that define > 1 values!");
+#endif
+  TII->reMaterialize(MBB, MII, DestReg,
+                     ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI);
   MachineInstr *NewMI = prior(MII);
   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
@@ -538,8 +651,8 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
-    DOUT << "PhysReg " << TRI->getName(PhysReg)
-         << " copied, it is available for use but can no longer be modified\n";
+    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+         << " copied, it is available for use but can no longer be modified\n");
   }
 }
 
@@ -563,12 +676,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
-    DOUT << "PhysReg " << TRI->getName(PhysReg)
-         << " clobbered, invalidating ";
+    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+          << " clobbered, invalidating ");
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n";
+      DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
     else
-      DOUT << "SS#" << SlotOrReMat << "\n";
+      DEBUG(errs() << "SS#" << SlotOrReMat << "\n");
   }
 }
 
@@ -650,15 +763,17 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
 /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
 /// is some other operand that is using the specified register, either pick
 /// a new register to use, or evict the previous reload and use this reg.
-unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
-                         AvailableSpills &Spills,
+unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
+                         unsigned PhysReg,
+                         MachineFunction &MF,
+                         MachineInstr *MI, AvailableSpills &Spills,
                          std::vector<MachineInstr*> &MaybeDeadStores,
                          SmallSet<unsigned, 8> &Rejected,
                          BitVector &RegKills,
                          std::vector<MachineOperand*> &KillOps,
                          VirtRegMap &VRM) {
-  const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget()
-                               .getInstrInfo();
+  const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Spills.getRegInfo();
   
   if (Reuses.empty()) return PhysReg;  // This is most often empty.
 
@@ -670,19 +785,19 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
     // considered and subsequently rejected because it has also been reused
     // by another operand.
     if (Op.PhysRegReused == PhysReg &&
-        Rejected.count(Op.AssignedPhysReg) == 0) {
+        Rejected.count(Op.AssignedPhysReg) == 0 &&
+        RC->contains(Op.AssignedPhysReg)) {
       // Yup, use the reload register that we didn't use before.
       unsigned NewReg = Op.AssignedPhysReg;
       Rejected.insert(PhysReg);
-      return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected,
+      return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected,
                              RegKills, KillOps, VRM);
     } else {
       // Otherwise, we might also have a problem if a previously reused
-      // value aliases the new register.  If so, codegen the previous reload
+      // value aliases the new register. If so, codegen the previous reload
       // and use this one.          
       unsigned PRRU = Op.PhysRegReused;
-      const TargetRegisterInfo *TRI = Spills.getRegInfo();
-      if (TRI->areAliases(PRRU, PhysReg)) {
+      if (TRI->regsOverlap(PRRU, PhysReg)) {
         // Okay, we found out that an alias of a reused register
         // was used.  This isn't good because it means we have
         // to undo a previous reuse.
@@ -695,21 +810,45 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
         ReusedOp NewOp = Op;
         Reuses.erase(Reuses.begin()+ro);
 
+        // MI may be using only a sub-register of PhysRegUsed.
+        unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
+        unsigned SubIdx = 0;
+        assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
+               "A reuse cannot be a virtual register");
+        if (PRRU != RealPhysRegUsed) {
+          // What was the sub-register index?
+          unsigned SubReg;
+          for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++)
+            if (SubReg == RealPhysRegUsed)
+              break;
+          assert(SubReg == RealPhysRegUsed &&
+                 "Operand physreg is not a sub-register of PhysRegUsed");
+        }
+
         // Ok, we're going to try to reload the assigned physreg into the
         // slot that we were supposed to in the first place.  However, that
         // register could hold a reuse.  Check to see if it conflicts or
         // would prefer us to use a different register.
-        unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg,
-                                              MI, Spills, MaybeDeadStores,
-                                          Rejected, RegKills, KillOps, VRM);
-        
-        MachineBasicBlock::iterator MII = MI;
-        if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) {
-          ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM);
-        } else {
-          TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg,
+        unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
+                                              MF, MI, Spills, MaybeDeadStores,
+                                              Rejected, RegKills, KillOps, VRM);
+
+        bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
+        int SSorRMId = DoReMat
+          ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat;
+
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
+                           DoReMat, SSorRMId, TII, MF);
+
+        if (DoReMat) {
+          ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
+                        TRI, VRM);
+        } else { 
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
                                     NewOp.StackSlotOrReMat, AliasRC);
-          MachineInstr *LoadMI = prior(MII);
+          MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
           // Any stores to this stack slot are not dead anymore.
           MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;            
@@ -718,17 +857,15 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
         Spills.ClobberPhysReg(NewPhysReg);
         Spills.ClobberPhysReg(NewOp.PhysRegReused);
 
-        unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg();
         unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg;
         MI->getOperand(NewOp.Operand).setReg(RReg);
         MI->getOperand(NewOp.Operand).setSubReg(0);
 
         Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
-        --MII;
-        UpdateKills(*MII, TRI, RegKills, KillOps);
-        DOUT << '\t' << *MII;
+        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+        DEBUG(errs() << '\t' << *prior(InsertLoc));
         
-        DOUT << "Reuse undone!\n";
+        DEBUG(errs() << "Reuse undone!\n");
         --NumReused;
         
         // Finally, PhysReg is now available, go ahead and use it.
@@ -856,6 +993,8 @@ namespace {
 // Local Spiller Implementation  //
 // ***************************** //
 
+namespace {
+
 class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter {
   MachineRegisterInfo *RegInfo;
   const TargetRegisterInfo *TRI;
@@ -870,10 +1009,10 @@ public:
     TRI = MF.getTarget().getRegisterInfo();
     TII = MF.getTarget().getInstrInfo();
     AllocatableRegs = TRI->getAllocatableSet(MF);
-    DOUT << "\n**** Local spiller rewriting function '"
-         << MF.getFunction()->getName() << "':\n";
-    DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
-            " ****\n";
+    DEBUG(errs() << "\n**** Local spiller rewriting function '"
+          << MF.getFunction()->getName() << "':\n");
+    DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and"
+                    " reloads!) ****\n");
     DEBUG(MF.dump());
 
     // Spills - Keep track of which spilled values are available in physregs
@@ -924,7 +1063,7 @@ public:
       Spills.clear();
     }
 
-    DOUT << "**** Post Machine Instrs ****\n";
+    DEBUG(errs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
 
     // Mark unused spill slots.
@@ -988,6 +1127,9 @@ private:
     if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
       return false;
 
+    // Back-schedule reloads and remats.
+    ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF);
+
     // Load from SS to the spare physical register.
     TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
     // This invalidates Phys.
@@ -999,7 +1141,7 @@ private:
     // Unfold current MI.
     SmallVector<MachineInstr*, 4> NewMIs;
     if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
-      assert(0 && "Unable unfold the load / store folding instruction!");
+      llvm_unreachable("Unable unfold the load / store folding instruction!");
     assert(NewMIs.size() == 1);
     AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
     VRM.transferRestorePts(&MI, NewMIs[0]);
@@ -1015,7 +1157,7 @@ private:
       NextMII = next(NextMII);
       NewMIs.clear();
       if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
-        assert(0 && "Unable unfold the load / store folding instruction!");
+        llvm_unreachable("Unable unfold the load / store folding instruction!");
       assert(NewMIs.size() == 1);
       AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
       VRM.transferRestorePts(&NextMI, NewMIs[0]);
@@ -1157,6 +1299,32 @@ private:
     return false;
   }
 
+  /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+  /// where SrcReg is r1 and it is tied to r0. Return true if after
+  /// commuting this instruction it will be r0 = op r2, r1.
+  static bool CommuteChangesDestination(MachineInstr *DefMI,
+                                        const TargetInstrDesc &TID,
+                                        unsigned SrcReg,
+                                        const TargetInstrInfo *TII,
+                                        unsigned &DstIdx) {
+    if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+      return false;
+    if (!DefMI->getOperand(1).isReg() ||
+        DefMI->getOperand(1).getReg() != SrcReg)
+      return false;
+    unsigned DefIdx;
+    if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+      return false;
+    unsigned SrcIdx1, SrcIdx2;
+    if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+      return false;
+    if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+      DstIdx = 2;
+      return true;
+    }
+    return false;
+  }
+
   /// CommuteToFoldReload -
   /// Look for
   /// r1 = load fi#1
@@ -1185,7 +1353,7 @@ private:
     unsigned NewDstIdx;
     if (DefMII != MBB.begin() &&
         TID.isCommutable() &&
-        TII->CommuteChangesDestination(DefMI, NewDstIdx)) {
+        CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
       MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
       unsigned NewReg = NewDstMO.getReg();
       if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
@@ -1266,11 +1434,11 @@ private:
     TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
     MachineInstr *StoreMI = next(MII);
     VRM.addSpillSlotUse(StackSlot, StoreMI);
-    DOUT << "Store:\t" << *StoreMI;
+    DEBUG(errs() << "Store:\t" << *StoreMI);
 
     // If there is a dead store to this stack slot, nuke it now.
     if (LastStore) {
-      DOUT << "Removed dead store:\t" << *LastStore;
+      DEBUG(errs() << "Removed dead store:\t" << *LastStore);
       ++NumDSE;
       SmallVector<unsigned, 2> KillRegs;
       InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
@@ -1310,6 +1478,29 @@ private:
     ++NumStores;
   }
 
+  /// isSafeToDelete - Return true if this instruction doesn't produce any side
+  /// effect and all of its defs are dead.
+  static bool isSafeToDelete(MachineInstr &MI) {
+    const TargetInstrDesc &TID = MI.getDesc();
+    if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+        TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+        TID.hasUnmodeledSideEffects())
+      return false;
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (MO.isDef() && !MO.isDead())
+        return false;
+      if (MO.isUse() && MO.isKill())
+        // FIXME: We can't remove kill markers or else the scavenger will assert.
+        // An alternative is to add a ADD pseudo instruction to replace kill
+        // markers.
+        return false;
+    }
+    return true;
+  }
+
   /// TransferDeadness - A identity copy definition is dead and it's being
   /// removed. Find the last def or use and mark it as dead / kill.
   void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
@@ -1351,9 +1542,7 @@ private:
       if (LastUD->isDef()) {
         // If the instruction has no side effect, delete it and propagate
         // backward further. Otherwise, mark is dead and we are done.
-        const TargetInstrDesc &TID = LastUDMI->getDesc();
-        if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
-            TID.hasUnmodeledSideEffects()) {
+        if (!isSafeToDelete(*LastUDMI)) {
           LastUD->setIsDead();
           break;
         }
@@ -1375,8 +1564,8 @@ private:
                   AvailableSpills &Spills, BitVector &RegKills,
                   std::vector<MachineOperand*> &KillOps) {
 
-    DOUT << "\n**** Local spiller rewriting MBB '"
-         << MBB.getBasicBlock()->getName() << "':\n";
+    DEBUG(errs() << "\n**** Local spiller rewriting MBB '"
+          << MBB.getBasicBlock()->getName() << "':\n");
 
     MachineFunction &MF = *MBB.getParent();
     
@@ -1425,15 +1614,23 @@ private:
           assert(RC && "Unable to determine register class!");
           int SS = VRM.getEmergencySpillSlot(RC);
           if (UsedSS.count(SS))
-            assert(0 && "Need to spill more than one physical registers!");
+            llvm_unreachable("Need to spill more than one physical registers!");
           UsedSS.insert(SS);
           TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
           MachineInstr *StoreMI = prior(MII);
           VRM.addSpillSlotUse(SS, StoreMI);
-          TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC);
-          MachineInstr *LoadMI = next(MII);
+
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false,
+                             SS, TII, MF);
+
+          TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
+
+          MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(SS, LoadMI);
           ++NumPSpills;
+          DistanceMap.insert(std::make_pair(LoadMI, Dist++));
         }
         NextMII = next(MII);
       }
@@ -1467,28 +1664,36 @@ private:
             // If the value is already available in the expected register, save
             // a reload / remat.
             if (SSorRMId)
-              DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << SSorRMId;
-            DOUT << " from physreg "
-                 << TRI->getName(InReg) << " for vreg"
-                 << VirtReg <<" instead of reloading into physreg "
-                 << TRI->getName(Phys) << "\n";
+              DEBUG(errs() << "Reusing SS#" << SSorRMId);
+            DEBUG(errs() << " from physreg "
+                         << TRI->getName(InReg) << " for vreg"
+                         << VirtReg <<" instead of reloading into physreg "
+                         << TRI->getName(Phys) << '\n');
             ++NumOmitted;
             continue;
           } else if (InReg && InReg != Phys) {
             if (SSorRMId)
-              DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << SSorRMId;
-            DOUT << " from physreg "
-                 << TRI->getName(InReg) << " for vreg"
-                 << VirtReg <<" by copying it into physreg "
-                 << TRI->getName(Phys) << "\n";
+              DEBUG(errs() << "Reusing SS#" << SSorRMId);
+            DEBUG(errs() << " from physreg "
+                         << TRI->getName(InReg) << " for vreg"
+                         << VirtReg <<" by copying it into physreg "
+                         << TRI->getName(Phys) << '\n');
 
             // If the reloaded / remat value is available in another register,
             // copy it to the desired register.
-            TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC);
+
+            // Back-schedule reloads and remats.
+            MachineBasicBlock::iterator InsertLoc =
+              ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
+                               SSorRMId, TII, MF);
+
+            TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC);
 
             // This invalidates Phys.
             Spills.ClobberPhysReg(Phys);
@@ -1496,24 +1701,30 @@ private:
             Spills.addAvailable(SSorRMId, Phys);
 
             // Mark is killed.
-            MachineInstr *CopyMI = prior(MII);
+            MachineInstr *CopyMI = prior(InsertLoc);
             MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
             KillOpnd->setIsKill();
             UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
-            DOUT << '\t' << *CopyMI;
+            DEBUG(errs() << '\t' << *CopyMI);
             ++NumCopified;
             continue;
           }
 
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
+                             SSorRMId, TII, MF);
+
           if (VRM.isReMaterialized(VirtReg)) {
-            ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM);
+            ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM);
           } else {
             const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(MII);
+            TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC);
+            MachineInstr *LoadMI = prior(InsertLoc);
             VRM.addSpillSlotUse(SSorRMId, LoadMI);
             ++NumLoads;
+            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
           }
 
           // This invalidates Phys.
@@ -1521,8 +1732,8 @@ private:
           // Remember it's available.
           Spills.addAvailable(SSorRMId, Phys);
 
-          UpdateKills(*prior(MII), TRI, RegKills, KillOps);
-          DOUT << '\t' << *prior(MII);
+          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+          DEBUG(errs() << '\t' << *prior(MII));
         }
       }
 
@@ -1541,7 +1752,7 @@ private:
           TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
           MachineInstr *StoreMI = next(MII);
           VRM.addSpillSlotUse(StackSlot, StoreMI);
-          DOUT << "Store:\t" << *StoreMI;
+          DEBUG(errs() << "Store:\t" << *StoreMI);
           VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
         }
         NextMII = next(MII);
@@ -1660,13 +1871,14 @@ private:
           if (CanReuse) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                           << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << ReuseSlot;
-            DOUT << " from physreg "
-                 << TRI->getName(PhysReg) << " for vreg"
-                 << VirtReg <<" instead of reloading into physreg "
-                 << TRI->getName(VRM.getPhys(VirtReg)) << "\n";
+              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(errs() << " from physreg "
+                         << TRI->getName(PhysReg) << " for vreg"
+                         << VirtReg <<" instead of reloading into physreg "
+                         << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
             unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
             MI.getOperand(i).setReg(RReg);
             MI.getOperand(i).setSubReg(0);
@@ -1733,20 +1945,22 @@ private:
           // available.  If this occurs, use the register indicated by the
           // reuser.
           if (ReusedOperands.hasReuses())
-            DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, 
-                                 Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+            DesignatedReg = ReusedOperands.GetRegForReload(VirtReg,
+                                                           DesignatedReg, &MI, 
+                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
           
           // If the mapped designated register is actually the physreg we have
           // incoming, we don't need to inserted a dead copy.
           if (DesignatedReg == PhysReg) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+              DEBUG(errs() << "Reusing RM#"
+                    << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DOUT << "Reusing SS#" << ReuseSlot;
-            DOUT << " from physreg " << TRI->getName(PhysReg)
-                 << " for vreg" << VirtReg
-                 << " instead of reloading into same physreg.\n";
+              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(errs() << " from physreg " << TRI->getName(PhysReg)
+                         << " for vreg" << VirtReg
+                         << " instead of reloading into same physreg.\n");
             unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
             MI.getOperand(i).setReg(RReg);
             MI.getOperand(i).setSubReg(0);
@@ -1758,9 +1972,15 @@ private:
           const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
           RegInfo->setPhysRegUsed(DesignatedReg);
           ReusedOperands.markClobbered(DesignatedReg);
-          TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC);
 
-          MachineInstr *CopyMI = prior(MII);
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat,
+                             SSorRMId, TII, MF);
+
+          TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+
+          MachineInstr *CopyMI = prior(InsertLoc);
           UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
           // This invalidates DesignatedReg.
@@ -1771,7 +1991,7 @@ private:
             SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
-          DOUT << '\t' << *prior(MII);
+          DEBUG(errs() << '\t' << *prior(MII));
           ++NumReused;
           continue;
         } // if (PhysReg)
@@ -1785,22 +2005,28 @@ private:
         // available.  If this occurs, use the register indicated by the
         // reuser.
         if (ReusedOperands.hasReuses())
-          PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, 
-                                 Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
+                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
         
         RegInfo->setPhysRegUsed(PhysReg);
         ReusedOperands.markClobbered(PhysReg);
         if (AvoidReload)
           ++NumAvoided;
         else {
+          // Back-schedule reloads and remats.
+          MachineBasicBlock::iterator InsertLoc =
+            ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat,
+                             SSorRMId, TII, MF);
+
           if (DoReMat) {
-            ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM);
+            ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM);
           } else {
             const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(MII);
+            TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC);
+            MachineInstr *LoadMI = prior(InsertLoc);
             VRM.addSpillSlotUse(SSorRMId, LoadMI);
             ++NumLoads;
+            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
           }
           // This invalidates PhysReg.
           Spills.ClobberPhysReg(PhysReg);
@@ -1817,8 +2043,8 @@ private:
             KilledMIRegs.insert(VirtReg);
           }
 
-          UpdateKills(*prior(MII), TRI, RegKills, KillOps);
-          DOUT << '\t' << *prior(MII);
+          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+          DEBUG(errs() << '\t' << *prior(InsertLoc));
         }
         unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
         MI.getOperand(i).setReg(RReg);
@@ -1832,7 +2058,7 @@ private:
         int PDSSlot = PotentialDeadStoreSlots[j];
         MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
         if (DeadStore) {
-          DOUT << "Removed dead store:\t" << *DeadStore;
+          DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
           InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
           VRM.RemoveMachineInstrFromMaps(DeadStore);
           MBB.erase(DeadStore);
@@ -1842,7 +2068,7 @@ private:
       }
 
 
-      DOUT << '\t' << MI;
+      DEBUG(errs() << '\t' << MI);
 
 
       // If we have folded references to memory operands, make sure we clear all
@@ -1852,7 +2078,7 @@ private:
       for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
         unsigned VirtReg = I->second.first;
         VirtRegMap::ModRef MR = I->second.second;
-        DOUT << "Folded vreg: " << VirtReg << "  MR: " << MR;
+        DEBUG(errs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
 
         // MI2VirtMap be can updated which invalidate the iterator.
         // Increment the iterator first.
@@ -1861,7 +2087,7 @@ private:
         if (SS == VirtRegMap::NO_STACK_SLOT)
           continue;
         FoldedSS.insert(SS);
-        DOUT << " - StackSlot: " << SS << "\n";
+        DEBUG(errs() << " - StackSlot: " << SS << "\n");
         
         // If this folded instruction is just a use, check to see if it's a
         // straight load from the virt reg slot.
@@ -1872,7 +2098,7 @@ private:
             // If this spill slot is available, turn it into a copy (or nothing)
             // instead of leaving it as a load!
             if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DOUT << "Promoted Load To Copy: " << MI;
+              DEBUG(errs() << "Promoted Load To Copy: " << MI);
               if (DestReg != InReg) {
                 const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
                 TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
@@ -1895,7 +2121,7 @@ private:
 
                 BackTracked = true;
               } else {
-                DOUT << "Removing now-noop copy: " << MI;
+                DEBUG(errs() << "Removing now-noop copy: " << MI);
                 // Unset last kill since it's being reused.
                 InvalidateKill(InReg, TRI, RegKills, KillOps);
                 Spills.disallowClobberPhysReg(InReg);
@@ -1965,7 +2191,7 @@ private:
 
           if (isDead) {  // Previous store is dead.
             // If we get here, the store is dead, nuke it now.
-            DOUT << "Removed dead store:\t" << *DeadStore;
+            DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
             InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
             VRM.RemoveMachineInstrFromMaps(DeadStore);
             MBB.erase(DeadStore);
@@ -2036,7 +2262,7 @@ private:
           if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
               !MI.findRegisterUseOperand(Src)->isUndef()) {
             ++NumDCE;
-            DOUT << "Removing now-noop copy: " << MI;
+            DEBUG(errs() << "Removing now-noop copy: " << MI);
             SmallVector<unsigned, 2> KillRegs;
             InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
             if (MO.isDead() && !KillRegs.empty()) {
@@ -2100,8 +2326,8 @@ private:
           if (ReusedOperands.isClobbered(PhysReg)) {
             // Another def has taken the assigned physreg. It must have been a
             // use&def which got it due to reuse. Undo the reuse!
-            PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, 
-                                 Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+            PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
+                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
           }
         }
 
@@ -2124,7 +2350,7 @@ private:
             unsigned Src, Dst, SrcSR, DstSR;
             if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
               ++NumDCE;
-              DOUT << "Removing now-noop copy: " << MI;
+              DEBUG(errs() << "Removing now-noop copy: " << MI);
               InvalidateKills(MI, TRI, RegKills, KillOps);
               VRM.RemoveMachineInstrFromMaps(&MI);
               MBB.erase(&MI);
@@ -2136,7 +2362,15 @@ private:
         }    
       }
     ProcessNextInst:
-      DistanceMap.insert(std::make_pair(&MI, Dist++));
+      // Delete dead instructions without side effects.
+      if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+        InvalidateKills(MI, TRI, RegKills, KillOps);
+        VRM.RemoveMachineInstrFromMaps(&MI);
+        MBB.erase(&MI);
+        Erased = true;
+      }
+      if (!Erased)
+        DistanceMap.insert(std::make_pair(&MI, Dist++));
       if (!Erased && !BackTracked) {
         for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
           UpdateKills(*II, TRI, RegKills, KillOps);
@@ -2148,9 +2382,11 @@ private:
 
 };
 
+}
+
 llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
   switch (RewriterOpt) {
-  default: assert(0 && "Unreachable!");
+  default: llvm_unreachable("Unreachable!");
   case local:
     return new LocalRewriter();
   case trivial:
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index f9d7fbbfa1d3..44f9df659c81 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -10,27 +10,9 @@
 #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
 #define LLVM_CODEGEN_VIRTREGREWRITER_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Streams.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
 #include "VirtRegMap.h"
-#include <map>
 
 namespace llvm {
   
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
index a3364e8a72f0..d90c50d67d92 100644
--- a/lib/CompilerDriver/BuiltinOptions.cpp
+++ b/lib/CompilerDriver/BuiltinOptions.cpp
@@ -25,6 +25,8 @@ cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
                                      cl::ZeroOrMore);
 cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"),
                                     cl::value_desc("file"), cl::Prefix);
+cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
+                                 cl::value_desc("<directory>"), cl::Prefix);
 cl::list<std::string> Languages("x",
           cl::desc("Specify the language of the following input files"),
           cl::ZeroOrMore);
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index f3039433b031..bb0eb7bcf197 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -514,13 +514,13 @@ namespace llvm {
 }
 
 void CompilationGraph::writeGraph(const std::string& OutputFilename) {
-  std::ofstream O(OutputFilename.c_str());
+  std::string ErrorInfo;
+  raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
 
-  if (O.good()) {
+  if (ErrorInfo.empty()) {
     errs() << "Writing '"<< OutputFilename << "' file...";
     llvm::WriteGraph(O, this);
     errs() << "done.\n";
-    O.close();
   }
   else {
     throw std::runtime_error("Error opening file '" + OutputFilename
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index c9c0413028d8..3e1fc9f124e6 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -31,20 +31,29 @@ namespace {
   sys::Path getTempDir() {
     sys::Path tempDir;
 
+    // The --temp-dir option.
+    if (!TempDirname.empty()) {
+      tempDir = TempDirname;
+    }
     // GCC 4.5-style -save-temps handling.
-    if (SaveTemps == SaveTempsEnum::Unset) {
+    else if (SaveTemps == SaveTempsEnum::Unset) {
       tempDir = sys::Path::GetTemporaryDirectory();
+      return tempDir;
     }
     else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
       tempDir = OutputFilename;
+      tempDir = tempDir.getDirname();
+    }
+    else {
+      // SaveTemps == Cwd --> use current dir (leave tempDir empty).
+      return tempDir;
+    }
 
-      if (!tempDir.exists()) {
-        std::string ErrMsg;
-        if (tempDir.createDirectoryOnDisk(true, &ErrMsg))
-          throw std::runtime_error(ErrMsg);
-      }
+    if (!tempDir.exists()) {
+      std::string ErrMsg;
+      if (tempDir.createDirectoryOnDisk(true, &ErrMsg))
+        throw std::runtime_error(ErrMsg);
     }
-    // else if (SaveTemps == Cwd) -> use current dir (leave tempDir empty)
 
     return tempDir;
   }
@@ -53,17 +62,18 @@ namespace {
   int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
     int ret;
     const sys::Path& tempDir = getTempDir();
+    bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
 
     try {
       ret = graph.Build(tempDir, langMap);
     }
     catch(...) {
-      if (SaveTemps == SaveTempsEnum::Unset)
+      if (toDelete)
         tempDir.eraseFromDisk(true);
       throw;
     }
 
-    if (SaveTemps == SaveTempsEnum::Unset)
+    if (toDelete)
       tempDir.eraseFromDisk(true);
     return ret;
   }
diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp
index cb3c7be39dd3..7310d120bff5 100644
--- a/lib/CompilerDriver/Plugin.cpp
+++ b/lib/CompilerDriver/Plugin.cpp
@@ -42,7 +42,7 @@ namespace {
 namespace llvmc {
 
   PluginLoader::PluginLoader() {
-    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
+    llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
     if (!pluginListInitialized) {
       for (PluginRegistry::iterator B = PluginRegistry::begin(),
              E = PluginRegistry::end(); B != E; ++B)
@@ -53,7 +53,7 @@ namespace llvmc {
   }
 
   PluginLoader::~PluginLoader() {
-    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
+    llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
     if (pluginListInitialized) {
       for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
            B != E; ++B)
@@ -63,14 +63,14 @@ namespace llvmc {
   }
 
   void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) {
-    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
+    llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
     for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
          B != E; ++B)
       (*B)->PopulateLanguageMap(langMap);
   }
 
   void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) {
-    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
+    llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
     for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
          B != E; ++B)
       (*B)->PopulateCompilationGraph(graph);
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 7953dd24934b..5a32fd33c3ee 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -56,7 +56,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
   sys::Path Out;
 
   if (StopCompilation) {
-    if (!OutputFilename.empty() && SaveTemps != SaveTempsEnum::Obj ) {
+    if (!OutputFilename.empty()) {
       Out.set(OutputFilename);
     }
     else if (IsJoin()) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index a80513f3df9e..053d96020d37 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -13,16 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Config/alloca.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/System/Host.h"
 #include "llvm/Target/TargetData.h"
@@ -33,12 +36,19 @@ using namespace llvm;
 STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
 STATISTIC(NumGlobals  , "Number of global vars initialized");
 
-ExecutionEngine::EECtorFn ExecutionEngine::JITCtor = 0;
-ExecutionEngine::EECtorFn ExecutionEngine::InterpCtor = 0;
+ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP,
+                                             std::string *ErrorStr,
+                                             JITMemoryManager *JMM,
+                                             CodeGenOpt::Level OptLevel,
+                                             bool GVsWithCode) = 0;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP,
+                                                std::string *ErrorStr) = 0;
 ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
 
 
-ExecutionEngine::ExecutionEngine(ModuleProvider *P) : LazyFunctionCreator(0) {
+ExecutionEngine::ExecutionEngine(ModuleProvider *P)
+  : EEState(*this),
+    LazyFunctionCreator(0) {
   LazyCompilationDisabled = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
@@ -105,6 +115,22 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
 }
 
 
+void *ExecutionEngineState::RemoveMapping(
+  const MutexGuard &, const GlobalValue *ToUnmap) {
+  std::map<MapUpdatingCVH, void *>::iterator I =
+    GlobalAddressMap.find(getVH(ToUnmap));
+  void *OldVal;
+  if (I == GlobalAddressMap.end())
+    OldVal = 0;
+  else {
+    OldVal = I->second;
+    GlobalAddressMap.erase(I);
+  }
+
+  GlobalAddressReverseMap.erase(OldVal);
+  return OldVal;
+}
+
 /// addGlobalMapping - Tell the execution engine that the specified global is
 /// at the specified location.  This is used internally as functions are JIT'd
 /// and as global variables are laid out in memory.  It can and should also be
@@ -113,14 +139,16 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
 
-  DOUT << "JIT: Map \'" << GV->getNameStart() << "\' to [" << Addr << "]\n";  
-  void *&CurVal = state.getGlobalAddressMap(locked)[GV];
+  DEBUG(errs() << "JIT: Map \'" << GV->getName() 
+        << "\' to [" << Addr << "]\n";);
+  void *&CurVal = EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)];
   assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
   CurVal = Addr;
   
   // If we are using the reverse mapping, add it too
-  if (!state.getGlobalAddressReverseMap(locked).empty()) {
-    const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr];
+  if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+    AssertingVH<const GlobalValue> &V =
+      EEState.getGlobalAddressReverseMap(locked)[Addr];
     assert((V == 0 || GV == 0) && "GlobalMapping already established!");
     V = GV;
   }
@@ -131,8 +159,8 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
 void ExecutionEngine::clearAllGlobalMappings() {
   MutexGuard locked(lock);
   
-  state.getGlobalAddressMap(locked).clear();
-  state.getGlobalAddressReverseMap(locked).clear();
+  EEState.getGlobalAddressMap(locked).clear();
+  EEState.getGlobalAddressReverseMap(locked).clear();
 }
 
 /// clearGlobalMappingsFromModule - Clear all global mappings that came from a
@@ -141,13 +169,11 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
   MutexGuard locked(lock);
   
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
-    state.getGlobalAddressMap(locked).erase(FI);
-    state.getGlobalAddressReverseMap(locked).erase(FI);
+    EEState.RemoveMapping(locked, FI);
   }
   for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); 
        GI != GE; ++GI) {
-    state.getGlobalAddressMap(locked).erase(GI);
-    state.getGlobalAddressReverseMap(locked).erase(GI);
+    EEState.RemoveMapping(locked, GI);
   }
 }
 
@@ -157,34 +183,25 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
 void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
 
-  std::map<const GlobalValue*, void *> &Map = state.getGlobalAddressMap(locked);
+  std::map<ExecutionEngineState::MapUpdatingCVH, void *> &Map =
+    EEState.getGlobalAddressMap(locked);
 
   // Deleting from the mapping?
   if (Addr == 0) {
-    std::map<const GlobalValue*, void *>::iterator I = Map.find(GV);
-    void *OldVal;
-    if (I == Map.end())
-      OldVal = 0;
-    else {
-      OldVal = I->second;
-      Map.erase(I); 
-    }
-    
-    if (!state.getGlobalAddressReverseMap(locked).empty())
-      state.getGlobalAddressReverseMap(locked).erase(Addr);
-    return OldVal;
+    return EEState.RemoveMapping(locked, GV);
   }
   
-  void *&CurVal = Map[GV];
+  void *&CurVal = Map[EEState.getVH(GV)];
   void *OldVal = CurVal;
 
-  if (CurVal && !state.getGlobalAddressReverseMap(locked).empty())
-    state.getGlobalAddressReverseMap(locked).erase(CurVal);
+  if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
+    EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
   CurVal = Addr;
   
   // If we are using the reverse mapping, add it too
-  if (!state.getGlobalAddressReverseMap(locked).empty()) {
-    const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr];
+  if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+    AssertingVH<const GlobalValue> &V =
+      EEState.getGlobalAddressReverseMap(locked)[Addr];
     assert((V == 0 || GV == 0) && "GlobalMapping already established!");
     V = GV;
   }
@@ -197,9 +214,9 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
   MutexGuard locked(lock);
   
-  std::map<const GlobalValue*, void*>::iterator I =
-  state.getGlobalAddressMap(locked).find(GV);
-  return I != state.getGlobalAddressMap(locked).end() ? I->second : 0;
+  std::map<ExecutionEngineState::MapUpdatingCVH, void*>::iterator I =
+    EEState.getGlobalAddressMap(locked).find(EEState.getVH(GV));
+  return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
 }
 
 /// getGlobalValueAtAddress - Return the LLVM global value object that starts
@@ -209,34 +226,34 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
   MutexGuard locked(lock);
 
   // If we haven't computed the reverse mapping yet, do so first.
-  if (state.getGlobalAddressReverseMap(locked).empty()) {
-    for (std::map<const GlobalValue*, void *>::iterator
-         I = state.getGlobalAddressMap(locked).begin(),
-         E = state.getGlobalAddressMap(locked).end(); I != E; ++I)
-      state.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second,
+  if (EEState.getGlobalAddressReverseMap(locked).empty()) {
+    for (std::map<ExecutionEngineState::MapUpdatingCVH, void *>::iterator
+         I = EEState.getGlobalAddressMap(locked).begin(),
+         E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
+      EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second,
                                                                      I->first));
   }
 
-  std::map<void *, const GlobalValue*>::iterator I =
-    state.getGlobalAddressReverseMap(locked).find(Addr);
-  return I != state.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+  std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
+    EEState.getGlobalAddressReverseMap(locked).find(Addr);
+  return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
 }
 
 // CreateArgv - Turn a vector of strings into a nice argv style array of
 // pointers to null terminated strings.
 //
-static void *CreateArgv(ExecutionEngine *EE,
+static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE,
                         const std::vector<std::string> &InputArgv) {
   unsigned PtrSize = EE->getTargetData()->getPointerSize();
   char *Result = new char[(InputArgv.size()+1)*PtrSize];
 
-  DOUT << "JIT: ARGV = " << (void*)Result << "\n";
-  const Type *SBytePtr = PointerType::getUnqual(Type::Int8Ty);
+  DEBUG(errs() << "JIT: ARGV = " << (void*)Result << "\n");
+  const Type *SBytePtr = Type::getInt8PtrTy(C);
 
   for (unsigned i = 0; i != InputArgv.size(); ++i) {
     unsigned Size = InputArgv[i].size()+1;
     char *Dest = new char[Size];
-    DOUT << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n";
+    DEBUG(errs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
 
     std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
     Dest[Size-1] = 0;
@@ -257,7 +274,8 @@ static void *CreateArgv(ExecutionEngine *EE,
 /// runStaticConstructorsDestructors - This method is used to execute all of
 /// the static constructors or destructors for a module, depending on the
 /// value of isDtors.
-void ExecutionEngine::runStaticConstructorsDestructors(Module *module, bool isDtors) {
+void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
+                                                       bool isDtors) {
   const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
   
   // Execute global ctors/dtors for each module in the program.
@@ -327,49 +345,47 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
   unsigned NumArgs = Fn->getFunctionType()->getNumParams();
   const FunctionType *FTy = Fn->getFunctionType();
   const Type* PPInt8Ty = 
-    PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty));
+    PointerType::getUnqual(PointerType::getUnqual(
+          Type::getInt8Ty(Fn->getContext())));
   switch (NumArgs) {
   case 3:
    if (FTy->getParamType(2) != PPInt8Ty) {
-     cerr << "Invalid type for third argument of main() supplied\n";
-     abort();
+     llvm_report_error("Invalid type for third argument of main() supplied");
    }
    // FALLS THROUGH
   case 2:
    if (FTy->getParamType(1) != PPInt8Ty) {
-     cerr << "Invalid type for second argument of main() supplied\n";
-     abort();
+     llvm_report_error("Invalid type for second argument of main() supplied");
    }
    // FALLS THROUGH
   case 1:
-   if (FTy->getParamType(0) != Type::Int32Ty) {
-     cerr << "Invalid type for first argument of main() supplied\n";
-     abort();
+   if (FTy->getParamType(0) != Type::getInt32Ty(Fn->getContext())) {
+     llvm_report_error("Invalid type for first argument of main() supplied");
    }
    // FALLS THROUGH
   case 0:
    if (!isa<IntegerType>(FTy->getReturnType()) &&
-       FTy->getReturnType() != Type::VoidTy) {
-     cerr << "Invalid return type of main() supplied\n";
-     abort();
+       FTy->getReturnType() != Type::getVoidTy(FTy->getContext())) {
+     llvm_report_error("Invalid return type of main() supplied");
    }
    break;
   default:
-   cerr << "Invalid number of arguments of main() supplied\n";
-   abort();
+   llvm_report_error("Invalid number of arguments of main() supplied");
   }
   
   if (NumArgs) {
     GVArgs.push_back(GVArgc); // Arg #0 = argc.
     if (NumArgs > 1) {
-      GVArgs.push_back(PTOGV(CreateArgv(this, argv))); // Arg #1 = argv.
+      // Arg #1 = argv.
+      GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, argv))); 
       assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
              "argv[0] was null after CreateArgv");
       if (NumArgs > 2) {
         std::vector<std::string> EnvVars;
         for (unsigned i = 0; envp[i]; ++i)
           EnvVars.push_back(envp[i]);
-        GVArgs.push_back(PTOGV(CreateArgv(this, EnvVars))); // Arg #2 = envp.
+        // Arg #2 = envp.
+        GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, EnvVars)));
       }
     }
   }
@@ -383,27 +399,73 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
 ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP,
                                          bool ForceInterpreter,
                                          std::string *ErrorStr,
-                                         CodeGenOpt::Level OptLevel) {
-  ExecutionEngine *EE = 0;
+                                         CodeGenOpt::Level OptLevel,
+                                         bool GVsWithCode) {
+  return EngineBuilder(MP)
+      .setEngineKind(ForceInterpreter
+                     ? EngineKind::Interpreter
+                     : EngineKind::JIT)
+      .setErrorStr(ErrorStr)
+      .setOptLevel(OptLevel)
+      .setAllocateGVsWithCode(GVsWithCode)
+      .create();
+}
 
+ExecutionEngine *ExecutionEngine::create(Module *M) {
+  return EngineBuilder(M).create();
+}
+
+/// EngineBuilder - Overloaded constructor that automatically creates an
+/// ExistingModuleProvider for an existing module.
+EngineBuilder::EngineBuilder(Module *m) : MP(new ExistingModuleProvider(m)) {
+  InitEngine();
+}
+
+ExecutionEngine *EngineBuilder::create() {
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
     return 0;
 
-  // Unless the interpreter was explicitly selected, try making a JIT.
-  if (!ForceInterpreter && JITCtor)
-    EE = JITCtor(MP, ErrorStr, OptLevel);
+  // If the user specified a memory manager but didn't specify which engine to
+  // create, we assume they only want the JIT, and we fail if they only want
+  // the interpreter.
+  if (JMM) {
+    if (WhichEngine & EngineKind::JIT)
+      WhichEngine = EngineKind::JIT;
+    else {
+      if (ErrorStr)
+        *ErrorStr = "Cannot create an interpreter with a memory manager.";
+      return 0;
+    }
+  }
 
-  // If we can't make a JIT, make an interpreter instead.
-  if (EE == 0 && InterpCtor)
-    EE = InterpCtor(MP, ErrorStr, OptLevel);
+  // Unless the interpreter was explicitly selected or the JIT is not linked,
+  // try making a JIT.
+  if (WhichEngine & EngineKind::JIT) {
+    if (ExecutionEngine::JITCtor) {
+      ExecutionEngine *EE =
+        ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel,
+                                 AllocateGVsWithCode);
+      if (EE) return EE;
+    }
+  }
 
-  return EE;
-}
+  // If we can't make a JIT and we didn't request one specifically, try making
+  // an interpreter instead.
+  if (WhichEngine & EngineKind::Interpreter) {
+    if (ExecutionEngine::InterpCtor)
+      return ExecutionEngine::InterpCtor(MP, ErrorStr);
+    if (ErrorStr)
+      *ErrorStr = "Interpreter has not been linked in.";
+    return 0;
+  }
 
-ExecutionEngine *ExecutionEngine::create(Module *M) {
-  return create(new ExistingModuleProvider(M));
+  if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
+    if (ErrorStr)
+      *ErrorStr = "JIT has not been linked in.";
+  }    
+  return 0;
 }
 
 /// getPointerToGlobal - This returns the address of the specified global
@@ -414,7 +476,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     return getPointerToFunction(F);
 
   MutexGuard locked(lock);
-  void *p = state.getGlobalAddressMap(locked)[GV];
+  void *p = EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)];
   if (p)
     return p;
 
@@ -423,8 +485,8 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
           const_cast<GlobalVariable *>(dyn_cast<GlobalVariable>(GV)))
     EmitGlobalVariable(GVar);
   else
-    assert(0 && "Global hasn't had an address allocated yet!");
-  return state.getGlobalAddressMap(locked)[GV];
+    llvm_unreachable("Global hasn't had an address allocated yet!");
+  return EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)];
 }
 
 /// This function converts a Constant* into a GenericValue. The interesting 
@@ -482,11 +544,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     }
     case Instruction::UIToFP: {
       GenericValue GV = getConstantValue(Op0);
-      if (CE->getType() == Type::FloatTy)
+      if (CE->getType()->isFloatTy())
         GV.FloatVal = float(GV.IntVal.roundToDouble());
-      else if (CE->getType() == Type::DoubleTy)
+      else if (CE->getType()->isDoubleTy())
         GV.DoubleVal = GV.IntVal.roundToDouble();
-      else if (CE->getType() == Type::X86_FP80Ty) {
+      else if (CE->getType()->isX86_FP80Ty()) {
         const uint64_t zero[] = {0, 0};
         APFloat apf = APFloat(APInt(80, 2, zero));
         (void)apf.convertFromAPInt(GV.IntVal, 
@@ -498,11 +560,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     }
     case Instruction::SIToFP: {
       GenericValue GV = getConstantValue(Op0);
-      if (CE->getType() == Type::FloatTy)
+      if (CE->getType()->isFloatTy())
         GV.FloatVal = float(GV.IntVal.signedRoundToDouble());
-      else if (CE->getType() == Type::DoubleTy)
+      else if (CE->getType()->isDoubleTy())
         GV.DoubleVal = GV.IntVal.signedRoundToDouble();
-      else if (CE->getType() == Type::X86_FP80Ty) {
+      else if (CE->getType()->isX86_FP80Ty()) {
         const uint64_t zero[] = { 0, 0};
         APFloat apf = APFloat(APInt(80, 2, zero));
         (void)apf.convertFromAPInt(GV.IntVal, 
@@ -516,11 +578,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     case Instruction::FPToSI: {
       GenericValue GV = getConstantValue(Op0);
       uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
-      if (Op0->getType() == Type::FloatTy)
+      if (Op0->getType()->isFloatTy())
         GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth);
-      else if (Op0->getType() == Type::DoubleTy)
+      else if (Op0->getType()->isDoubleTy())
         GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
-      else if (Op0->getType() == Type::X86_FP80Ty) {
+      else if (Op0->getType()->isX86_FP80Ty()) {
         APFloat apf = APFloat(GV.IntVal);
         uint64_t v;
         bool ignored;
@@ -550,20 +612,22 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       GenericValue GV = getConstantValue(Op0);
       const Type* DestTy = CE->getType();
       switch (Op0->getType()->getTypeID()) {
-        default: assert(0 && "Invalid bitcast operand");
+        default: llvm_unreachable("Invalid bitcast operand");
         case Type::IntegerTyID:
           assert(DestTy->isFloatingPoint() && "invalid bitcast");
-          if (DestTy == Type::FloatTy)
+          if (DestTy->isFloatTy())
             GV.FloatVal = GV.IntVal.bitsToFloat();
-          else if (DestTy == Type::DoubleTy)
+          else if (DestTy->isDoubleTy())
             GV.DoubleVal = GV.IntVal.bitsToDouble();
           break;
         case Type::FloatTyID: 
-          assert(DestTy == Type::Int32Ty && "Invalid bitcast");
+          assert(DestTy == Type::getInt32Ty(DestTy->getContext()) &&
+                 "Invalid bitcast");
           GV.IntVal.floatToBits(GV.FloatVal);
           break;
         case Type::DoubleTyID:
-          assert(DestTy == Type::Int64Ty && "Invalid bitcast");
+          assert(DestTy == Type::getInt64Ty(DestTy->getContext()) &&
+                 "Invalid bitcast");
           GV.IntVal.doubleToBits(GV.DoubleVal);
           break;
         case Type::PointerTyID:
@@ -589,10 +653,10 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       GenericValue RHS = getConstantValue(CE->getOperand(1));
       GenericValue GV;
       switch (CE->getOperand(0)->getType()->getTypeID()) {
-      default: assert(0 && "Bad add type!"); abort();
+      default: llvm_unreachable("Bad add type!");
       case Type::IntegerTyID:
         switch (CE->getOpcode()) {
-          default: assert(0 && "Invalid integer opcode");
+          default: llvm_unreachable("Invalid integer opcode");
           case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break;
           case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break;
           case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break;
@@ -607,7 +671,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
         break;
       case Type::FloatTyID:
         switch (CE->getOpcode()) {
-          default: assert(0 && "Invalid float opcode"); abort();
+          default: llvm_unreachable("Invalid float opcode");
           case Instruction::FAdd:
             GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
           case Instruction::FSub:
@@ -622,7 +686,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
         break;
       case Type::DoubleTyID:
         switch (CE->getOpcode()) {
-          default: assert(0 && "Invalid double opcode"); abort();
+          default: llvm_unreachable("Invalid double opcode");
           case Instruction::FAdd:
             GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
           case Instruction::FSub:
@@ -640,7 +704,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::FP128TyID: {
         APFloat apfLHS = APFloat(LHS.IntVal);
         switch (CE->getOpcode()) {
-          default: assert(0 && "Invalid long double opcode"); abort();
+          default: llvm_unreachable("Invalid long double opcode");llvm_unreachable(0);
           case Instruction::FAdd:
             apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
@@ -670,8 +734,10 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     default:
       break;
     }
-    cerr << "ConstantExpr not handled: " << *CE << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "ConstantExpr not handled: " << *CE;
+    llvm_report_error(Msg.str());
   }
 
   GenericValue Result;
@@ -698,11 +764,13 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     else if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
       Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
     else
-      assert(0 && "Unknown constant pointer type!");
+      llvm_unreachable("Unknown constant pointer type!");
     break;
   default:
-    cerr << "ERROR: Constant unimplemented for type: " << *C->getType() << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "ERROR: Constant unimplemented for type: " << *C->getType();
+    llvm_report_error(Msg.str());
   }
   return Result;
 }
@@ -762,7 +830,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
   default:
-    cerr << "Cannot store value of type " << *Ty << "!\n";
+    errs() << "Cannot store value of type " << *Ty << "!\n";
   }
 
   if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian())
@@ -803,15 +871,6 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
                                           const Type *Ty) {
   const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty);
 
-  if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) {
-    // Host and target are different endian - reverse copy the stored
-    // bytes into a buffer, and load from that.
-    uint8_t *Src = (uint8_t*)Ptr;
-    uint8_t *Buf = (uint8_t*)alloca(LoadBytes);
-    std::reverse_copy(Src, Src + LoadBytes, Buf);
-    Ptr = (GenericValue*)Buf;
-  }
-
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
     // An APInt with all words initially zero.
@@ -836,8 +895,10 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     break;
   }
   default:
-    cerr << "Cannot load value of type " << *Ty << "!\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Cannot load value of type " << *Ty << "!";
+    llvm_report_error(Msg.str());
   }
 }
 
@@ -845,7 +906,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
 // specified memory location...
 //
 void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
-  DOUT << "JIT: Initializing " << Addr << " ";
+  DEBUG(errs() << "JIT: Initializing " << Addr << " ");
   DEBUG(Init->dump());
   if (isa<UndefValue>(Init)) {
     return;
@@ -876,8 +937,8 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
     return;
   }
 
-  cerr << "Bad Type: " << *Init->getType() << "\n";
-  assert(0 && "Unknown constant type to initialize memory with!");
+  errs() << "Bad Type: " << *Init->getType() << "\n";
+  llvm_unreachable("Unknown constant type to initialize memory with!");
 }
 
 /// EmitGlobals - Emit all of the global variables to memory, storing their
@@ -950,12 +1011,11 @@ void ExecutionEngine::emitGlobals() {
         // External variable reference. Try to use the dynamic loader to
         // get a pointer to it.
         if (void *SymAddr =
-            sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName().c_str()))
+            sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
           addGlobalMapping(I, SymAddr);
         else {
-          cerr << "Could not resolve external global address: "
-               << I->getName() << "\n";
-          abort();
+          llvm_report_error("Could not resolve external global address: "
+                            +I->getName());
         }
       }
     }
@@ -1011,3 +1071,18 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
   NumInitBytes += (unsigned)GVSize;
   ++NumGlobals;
 }
+
+ExecutionEngineState::MapUpdatingCVH::MapUpdatingCVH(
+  ExecutionEngineState &EES, const GlobalValue *GV)
+  : CallbackVH(const_cast<GlobalValue*>(GV)), EES(EES) {}
+
+void ExecutionEngineState::MapUpdatingCVH::deleted() {
+  MutexGuard locked(EES.EE.lock);
+  EES.RemoveMapping(locked, *this);  // Destroys *this.
+}
+
+void ExecutionEngineState::MapUpdatingCVH::allUsesReplacedWith(
+  Value *new_value) {
+  assert(false && "The ExecutionEngine doesn't know how to handle a"
+         " RAUW on a value it has a global mapping for.");
+}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 401a22647e1d..5901cd757dc1 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -15,6 +15,7 @@
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
 using namespace llvm;
@@ -45,8 +46,7 @@ LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) {
     GenVal->DoubleVal = N;
     break;
   default:
-    assert(0 && "LLVMGenericValueToFloat supports only float and double.");
-    break;
+    llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
   }
   return wrap(GenVal);
 }
@@ -75,7 +75,7 @@ double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
   case Type::DoubleTyID:
     return unwrap(GenVal)->DoubleVal;
   default:
-    assert(0 && "LLVMGenericValueToFloat supports only float and double.");
+    llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
     break;
   }
   return 0; // Not reached
@@ -91,7 +91,10 @@ int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
                               LLVMModuleProviderRef MP,
                               char **OutError) {
   std::string Error;
-  if (ExecutionEngine *EE = ExecutionEngine::create(unwrap(MP), false, &Error)){
+  EngineBuilder builder(unwrap(MP));
+  builder.setEngineKind(EngineKind::Either)
+         .setErrorStr(&Error);
+  if (ExecutionEngine *EE = builder.create()){
     *OutEE = wrap(EE);
     return 0;
   }
@@ -103,8 +106,10 @@ int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
                           LLVMModuleProviderRef MP,
                           char **OutError) {
   std::string Error;
-  if (ExecutionEngine *Interp =
-      ExecutionEngine::create(unwrap(MP), true, &Error)) {
+  EngineBuilder builder(unwrap(MP));
+  builder.setEngineKind(EngineKind::Interpreter)
+         .setErrorStr(&Error);
+  if (ExecutionEngine *Interp = builder.create()) {
     *OutInterp = wrap(Interp);
     return 0;
   }
@@ -117,9 +122,11 @@ int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
                           unsigned OptLevel,
                           char **OutError) {
   std::string Error;
-  if (ExecutionEngine *JIT =
-      ExecutionEngine::create(unwrap(MP), false, &Error,
-                                 (CodeGenOpt::Level)OptLevel)) {
+  EngineBuilder builder(unwrap(MP));
+  builder.setEngineKind(EngineKind::JIT)
+         .setErrorStr(&Error)
+         .setOptLevel((CodeGenOpt::Level)OptLevel);
+  if (ExecutionEngine *JIT = builder.create()) {
     *OutJIT = wrap(JIT);
     return 0;
   }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index bb3f64e626f0..f8c775ee7c18 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -22,10 +22,10 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cmath>
-#include <cstring>
 using namespace llvm;
 
 STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
@@ -37,15 +37,6 @@ static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
 //                     Various Helper Functions
 //===----------------------------------------------------------------------===//
 
-static inline uint64_t doSignExtension(uint64_t Val, const IntegerType* ITy) {
-  // Determine if the value is signed or not
-  bool isSigned = (Val & (1 << (ITy->getBitWidth()-1))) != 0;
-  // If its signed, extend the sign bits
-  if (isSigned)
-    Val |= ~ITy->getBitMask();
-  return Val;
-}
-
 static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
   SF.Values[V] = Val;
 }
@@ -65,8 +56,8 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(+, Float);
     IMPLEMENT_BINARY_OPERATOR(+, Double);
   default:
-    cerr << "Unhandled type for FAdd instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
 }
 
@@ -76,8 +67,8 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(-, Float);
     IMPLEMENT_BINARY_OPERATOR(-, Double);
   default:
-    cerr << "Unhandled type for FSub instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
 }
 
@@ -87,8 +78,8 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(*, Float);
     IMPLEMENT_BINARY_OPERATOR(*, Double);
   default:
-    cerr << "Unhandled type for FMul instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
 }
 
@@ -98,8 +89,8 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(/, Float);
     IMPLEMENT_BINARY_OPERATOR(/, Double);
   default:
-    cerr << "Unhandled type for FDiv instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
 }
 
@@ -113,8 +104,8 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
     Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
     break;
   default:
-    cerr << "Unhandled type for Rem instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
 }
 
@@ -140,8 +131,8 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(eq,Ty);
     IMPLEMENT_POINTER_ICMP(==);
   default:
-    cerr << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -153,8 +144,8 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ne,Ty);
     IMPLEMENT_POINTER_ICMP(!=);
   default:
-    cerr << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -166,8 +157,8 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ult,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
-    cerr << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -179,8 +170,8 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(slt,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
-    cerr << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -192,8 +183,8 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ugt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
-    cerr << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -205,8 +196,8 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(sgt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
-    cerr << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -218,8 +209,8 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ule,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
-    cerr << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -231,8 +222,8 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(sle,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
-    cerr << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -244,8 +235,8 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(uge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
-    cerr << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -257,8 +248,8 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(sge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
-    cerr << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -282,8 +273,8 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
   case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
   case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
   default:
-    cerr << "Don't know how to handle this ICmp predicate!\n-->" << I;
-    abort();
+    errs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
+    llvm_unreachable(0);
   }
  
   SetValue(&I, R, SF);
@@ -301,8 +292,8 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(==, Float);
     IMPLEMENT_FCMP(==, Double);
   default:
-    cerr << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -315,8 +306,8 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(!=, Double);
 
   default:
-    cerr << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -328,8 +319,8 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(<=, Float);
     IMPLEMENT_FCMP(<=, Double);
   default:
-    cerr << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -341,8 +332,8 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(>=, Float);
     IMPLEMENT_FCMP(>=, Double);
   default:
-    cerr << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -354,8 +345,8 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(<, Float);
     IMPLEMENT_FCMP(<, Double);
   default:
-    cerr << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
@@ -367,14 +358,14 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(>, Float);
     IMPLEMENT_FCMP(>, Double);
   default:
-    cerr << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
   return Dest;
 }
 
 #define IMPLEMENT_UNORDERED(TY, X,Y)                                     \
-  if (TY == Type::FloatTy) {                                             \
+  if (TY->isFloatTy()) {                                                 \
     if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {          \
       Dest.IntVal = APInt(1,true);                                       \
       return Dest;                                                       \
@@ -430,7 +421,7 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
 static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
                                      const Type *Ty) {
   GenericValue Dest;
-  if (Ty == Type::FloatTy)
+  if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
                            Src2.FloatVal == Src2.FloatVal));
   else
@@ -442,7 +433,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
 static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
                                      const Type *Ty) {
   GenericValue Dest;
-  if (Ty == Type::FloatTy)
+  if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
                            Src2.FloatVal != Src2.FloatVal));
   else
@@ -476,8 +467,8 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
   default:
-    cerr << "Don't know how to handle this FCmp predicate!\n-->" << I;
-    abort();
+    errs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
   }
  
   SetValue(&I, R, SF);
@@ -522,8 +513,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
     return Result;
   }
   default:
-    cerr << "Unhandled Cmp predicate\n";
-    abort();
+    errs() << "Unhandled Cmp predicate\n";
+    llvm_unreachable(0);
   }
 }
 
@@ -551,8 +542,8 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
   case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
   default:
-    cerr << "Don't know how to handle this binary operator!\n-->" << I;
-    abort();
+    errs() << "Don't know how to handle this binary operator!\n-->" << I;
+    llvm_unreachable(0);
   }
 
   SetValue(&I, R, SF);
@@ -610,7 +601,8 @@ void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy,
     // fill in the return value...
     ExecutionContext &CallingSF = ECStack.back();
     if (Instruction *I = CallingSF.Caller.getInstruction()) {
-      if (CallingSF.Caller.getType() != Type::VoidTy)      // Save result...
+      // Save result...
+      if (CallingSF.Caller.getType() != Type::getVoidTy(RetTy->getContext()))
         SetValue(I, Result, CallingSF);
       if (InvokeInst *II = dyn_cast<InvokeInst> (I))
         SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
@@ -621,7 +613,7 @@ void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy,
 
 void Interpreter::visitReturnInst(ReturnInst &I) {
   ExecutionContext &SF = ECStack.back();
-  const Type *RetTy = Type::VoidTy;
+  const Type *RetTy = Type::getVoidTy(I.getContext());
   GenericValue Result;
 
   // Save away the return value... (if we are not 'ret void')
@@ -639,7 +631,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
   do {
     ECStack.pop_back ();
     if (ECStack.empty ())
-      abort ();
+      llvm_report_error("Empty stack during unwind!");
     Inst = ECStack.back ().Caller.getInstruction ();
   } while (!(Inst && isa<InvokeInst> (Inst)));
 
@@ -652,8 +644,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
 }
 
 void Interpreter::visitUnreachableInst(UnreachableInst &I) {
-  cerr << "ERROR: Program executed an 'unreachable' instruction!\n";
-  abort();
+  llvm_report_error("Program executed an 'unreachable' instruction!");
 }
 
 void Interpreter::visitBranchInst(BranchInst &I) {
@@ -746,9 +737,9 @@ void Interpreter::visitAllocationInst(AllocationInst &I) {
   // Allocate enough memory to hold the type...
   void *Memory = malloc(MemToAlloc);
 
-  DOUT << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " 
-       << NumElements << " (Total: " << MemToAlloc << ") at "
-       << uintptr_t(Memory) << '\n';
+  DEBUG(errs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " 
+               << NumElements << " (Total: " << MemToAlloc << ") at "
+               << uintptr_t(Memory) << '\n');
 
   GenericValue Result = PTOGV(Memory);
   assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
@@ -804,7 +795,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
 
   GenericValue Result;
   Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
-  DOUT << "GEP Index " << Total << " bytes.\n";
+  DEBUG(errs() << "GEP Index " << Total << " bytes.\n");
   return Result;
 }
 
@@ -822,7 +813,7 @@ void Interpreter::visitLoadInst(LoadInst &I) {
   LoadValueFromMemory(Result, Ptr, I.getType());
   SetValue(&I, Result, SF);
   if (I.isVolatile() && PrintVolatile)
-    cerr << "Volatile load " << I;
+    errs() << "Volatile load " << I;
 }
 
 void Interpreter::visitStoreInst(StoreInst &I) {
@@ -832,7 +823,7 @@ void Interpreter::visitStoreInst(StoreInst &I) {
   StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
                      I.getOperand(0)->getType());
   if (I.isVolatile() && PrintVolatile)
-    cerr << "Volatile store: " << I;
+    errs() << "Volatile store: " << I;
 }
 
 //===----------------------------------------------------------------------===//
@@ -979,7 +970,7 @@ GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
                                              ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcVal->getType() == Type::DoubleTy && DstTy == Type::FloatTy &&
+  assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
          "Invalid FPTrunc instruction");
   Dest.FloatVal = (float) Src.DoubleVal;
   return Dest;
@@ -988,7 +979,7 @@ GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
                                            ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcVal->getType() == Type::FloatTy && DstTy == Type::DoubleTy &&
+  assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
          "Invalid FPTrunc instruction");
   Dest.DoubleVal = (double) Src.FloatVal;
   return Dest;
@@ -1079,28 +1070,28 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
     assert(isa<PointerType>(SrcTy) && "Invalid BitCast");
     Dest.PointerVal = Src.PointerVal;
   } else if (DstTy->isInteger()) {
-    if (SrcTy == Type::FloatTy) {
+    if (SrcTy->isFloatTy()) {
       Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
       Dest.IntVal.floatToBits(Src.FloatVal);
-    } else if (SrcTy == Type::DoubleTy) {
+    } else if (SrcTy->isDoubleTy()) {
       Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
       Dest.IntVal.doubleToBits(Src.DoubleVal);
     } else if (SrcTy->isInteger()) {
       Dest.IntVal = Src.IntVal;
     } else 
-      assert(0 && "Invalid BitCast");
-  } else if (DstTy == Type::FloatTy) {
+      llvm_unreachable("Invalid BitCast");
+  } else if (DstTy->isFloatTy()) {
     if (SrcTy->isInteger())
       Dest.FloatVal = Src.IntVal.bitsToFloat();
     else
       Dest.FloatVal = Src.FloatVal;
-  } else if (DstTy == Type::DoubleTy) {
+  } else if (DstTy->isDoubleTy()) {
     if (SrcTy->isInteger())
       Dest.DoubleVal = Src.IntVal.bitsToDouble();
     else
       Dest.DoubleVal = Src.DoubleVal;
   } else
-    assert(0 && "Invalid Bitcast");
+    llvm_unreachable("Invalid Bitcast");
 
   return Dest;
 }
@@ -1184,8 +1175,8 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
     IMPLEMENT_VAARG(Float);
     IMPLEMENT_VAARG(Double);
   default:
-    cerr << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
-    abort();
+    errs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
   }
 
   // Set the Value of this Instruction.
@@ -1271,8 +1262,8 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
     Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
     break;
   default:
-    cerr << "Unhandled ConstantExpr: " << *CE << "\n";
-    abort();
+    errs() << "Unhandled ConstantExpr: " << *CE << "\n";
+    llvm_unreachable(0);
     return GenericValue();
   }
   return Dest;
@@ -1344,30 +1335,29 @@ void Interpreter::run() {
     // Track the number of dynamic instructions executed.
     ++NumDynamicInsts;
 
-    DOUT << "About to interpret: " << I;
+    DEBUG(errs() << "About to interpret: " << I);
     visit(I);   // Dispatch to one of the visit* methods...
 #if 0
     // This is not safe, as visiting the instruction could lower it and free I.
-#ifndef NDEBUG
+DEBUG(
     if (!isa<CallInst>(I) && !isa<InvokeInst>(I) && 
         I.getType() != Type::VoidTy) {
-      DOUT << "  --> ";
+      errs() << "  --> ";
       const GenericValue &Val = SF.Values[&I];
       switch (I.getType()->getTypeID()) {
-      default: assert(0 && "Invalid GenericValue Type");
-      case Type::VoidTyID:    DOUT << "void"; break;
-      case Type::FloatTyID:   DOUT << "float " << Val.FloatVal; break;
-      case Type::DoubleTyID:  DOUT << "double " << Val.DoubleVal; break;
-      case Type::PointerTyID: DOUT << "void* " << intptr_t(Val.PointerVal);
+      default: llvm_unreachable("Invalid GenericValue Type");
+      case Type::VoidTyID:    errs() << "void"; break;
+      case Type::FloatTyID:   errs() << "float " << Val.FloatVal; break;
+      case Type::DoubleTyID:  errs() << "double " << Val.DoubleVal; break;
+      case Type::PointerTyID: errs() << "void* " << intptr_t(Val.PointerVal);
         break;
       case Type::IntegerTyID: 
-        DOUT << "i" << Val.IntVal.getBitWidth() << " "
-        << Val.IntVal.toStringUnsigned(10)
-        << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
+        errs() << "i" << Val.IntVal.getBitWidth() << " "
+               << Val.IntVal.toStringUnsigned(10)
+               << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
         break;
       }
-    }
-#endif
+    });
 #endif
   }
 }
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index b8525a30ecad..8c45a36b56a1 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -23,7 +23,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Config/config.h"     // Detect libffi
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -54,7 +54,7 @@ static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
 static std::map<std::string, ExFunc> FuncNames;
 
 #ifdef USE_LIBFFI
-typedef void (*RawFunc)(void);
+typedef void (*RawFunc)();
 static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
 #endif
 
@@ -95,15 +95,15 @@ static ExFunc lookupFunction(const Function *F) {
   const FunctionType *FT = F->getFunctionType();
   for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
     ExtName += getTypeID(FT->getContainedType(i));
-  ExtName += "_" + F->getName();
+  ExtName + "_" + F->getNameStr();
 
-  sys::ScopedLock Writer(&*FunctionsLock);
+  sys::ScopedLock Writer(*FunctionsLock);
   ExFunc FnPtr = FuncNames[ExtName];
   if (FnPtr == 0)
-    FnPtr = FuncNames["lle_X_"+F->getName()];
+    FnPtr = FuncNames["lle_X_" + F->getNameStr()];
   if (FnPtr == 0)  // Try calling a generic function... if it exists...
-    FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
-            ("lle_X_"+F->getName()).c_str());
+    FnPtr = (ExFunc)(intptr_t)
+      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr());
   if (FnPtr != 0)
     ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
   return FnPtr;
@@ -126,8 +126,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) {
     default: break;
   }
   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
-  cerr << "Type could not be mapped for use with libffi.\n";
-  abort();
+  llvm_report_error("Type could not be mapped for use with libffi.");
   return NULL;
 }
 
@@ -175,8 +174,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
     default: break;
   }
   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
-  cerr << "Type value could not be mapped for use with libffi.\n";
-  abort();
+  llvm_report_error("Type value could not be mapped for use with libffi.");
   return NULL;
 }
 
@@ -190,9 +188,8 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
   // TODO: We don't have type information about the remaining arguments, because
   // this information is never passed into ExecutionEngine::runFunction().
   if (ArgVals.size() > NumArgs && F->isVarArg()) {
-    cerr << "Calling external var arg function '" << F->getName()
-         << "' is not supported by the Interpreter.\n";
-    abort();
+    llvm_report_error("Calling external var arg function '" + F->getName()
+                      + "' is not supported by the Interpreter.");
   }
 
   unsigned ArgBytes = 0;
@@ -206,9 +203,10 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
     ArgBytes += TD->getTypeStoreSize(ArgTy);
   }
 
-  uint8_t *ArgData = (uint8_t*) alloca(ArgBytes);
-  uint8_t *ArgDataPtr = ArgData;
-  std::vector<void*> values(NumArgs);
+  SmallVector<uint8_t, 128> ArgData;
+  ArgData.resize(ArgBytes);
+  uint8_t *ArgDataPtr = ArgData.data();
+  SmallVector<void*, 16> values(NumArgs);
   for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
        A != E; ++A) {
     const unsigned ArgNo = A->getArgNo();
@@ -221,22 +219,22 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
   ffi_type *rtype = ffiTypeFor(RetTy);
 
   if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
-    void *ret = NULL;
+    SmallVector<uint8_t, 128> ret;
     if (RetTy->getTypeID() != Type::VoidTyID)
-      ret = alloca(TD->getTypeStoreSize(RetTy));
-    ffi_call(&cif, Fn, ret, &values[0]);
+      ret.resize(TD->getTypeStoreSize(RetTy));
+    ffi_call(&cif, Fn, ret.data(), values.data());
     switch (RetTy->getTypeID()) {
       case Type::IntegerTyID:
         switch (cast<IntegerType>(RetTy)->getBitWidth()) {
-          case 8:  Result.IntVal = APInt(8 , *(int8_t *) ret); break;
-          case 16: Result.IntVal = APInt(16, *(int16_t*) ret); break;
-          case 32: Result.IntVal = APInt(32, *(int32_t*) ret); break;
-          case 64: Result.IntVal = APInt(64, *(int64_t*) ret); break;
+          case 8:  Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break;
+          case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break;
+          case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break;
+          case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break;
         }
         break;
-      case Type::FloatTyID:   Result.FloatVal   = *(float *) ret; break;
-      case Type::DoubleTyID:  Result.DoubleVal  = *(double*) ret; break;
-      case Type::PointerTyID: Result.PointerVal = *(void **) ret; break;
+      case Type::FloatTyID:   Result.FloatVal   = *(float *) ret.data(); break;
+      case Type::DoubleTyID:  Result.DoubleVal  = *(double*) ret.data(); break;
+      case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break;
       default: break;
     }
     return true;
@@ -272,7 +270,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
   } else {
     RawFn = RF->second;
   }
-  
+
   FunctionsLock->release();
 
   GenericValue Result;
@@ -280,10 +278,12 @@ GenericValue Interpreter::callExternalFunction(Function *F,
     return Result;
 #endif // USE_LIBFFI
 
-  cerr << "Tried to execute an unknown external function: "
-       << F->getType()->getDescription() << " " << F->getName() << "\n";
-  if (F->getName() != "__main")
-    abort();
+  if (F->getName() == "__main")
+    errs() << "Tried to execute an unknown external function: "
+      << F->getType()->getDescription() << " __main\n";
+  else
+    llvm_report_error("Tried to execute an unknown external function: " +
+                      F->getType()->getDescription() + " " +F->getName());
   return GenericValue();
 }
 
@@ -291,6 +291,12 @@ GenericValue Interpreter::callExternalFunction(Function *F,
 //===----------------------------------------------------------------------===//
 //  Functions "exported" to the running application...
 //
+
+// Visual Studio warns about returning GenericValue in extern "C" linkage
+#ifdef _MSC_VER
+    #pragma warning(disable : 4190)
+#endif
+
 extern "C" {  // Don't add C++ manglings to llvm mangling :)
 
 // void atexit(Function*)
@@ -313,6 +319,8 @@ GenericValue lle_X_exit(const FunctionType *FT,
 // void abort(void)
 GenericValue lle_X_abort(const FunctionType *FT,
                          const std::vector<GenericValue> &Args) {
+  //FIXME: should we report or raise here?
+  //llvm_report_error("Interpreted program raised SIGABRT");
   raise (SIGABRT);
   return GenericValue();
 }
@@ -327,7 +335,7 @@ GenericValue lle_X_sprintf(const FunctionType *FT,
 
   // printf should return # chars printed.  This is completely incorrect, but
   // close enough for now.
-  GenericValue GV; 
+  GenericValue GV;
   GV.IntVal = APInt(32, strlen(FmtStr));
   while (1) {
     switch (*FmtStr) {
@@ -385,7 +393,8 @@ GenericValue lle_X_sprintf(const FunctionType *FT,
         sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
       case 's':
         sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
-      default:  cerr << "<unknown printf code '" << *FmtStr << "'!>";
+      default:
+        errs() << "<unknown printf code '" << *FmtStr << "'!>";
         ArgNo++; break;
       }
       strcpy(OutputBuffer, Buffer);
@@ -406,11 +415,12 @@ GenericValue lle_X_printf(const FunctionType *FT,
   NewArgs.push_back(PTOGV((void*)&Buffer[0]));
   NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
   GenericValue GV = lle_X_sprintf(FT, NewArgs);
-  cout << Buffer;
+  outs() << Buffer;
   return GV;
 }
 
-static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1,
+static void ByteswapSCANFResults(LLVMContext &C,
+                                 const char *Fmt, void *Arg0, void *Arg1,
                                  void *Arg2, void *Arg3, void *Arg4, void *Arg5,
                                  void *Arg6, void *Arg7, void *Arg8) {
   void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 };
@@ -450,26 +460,26 @@ static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1,
         case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p':
         case 'd':
           if (Long || LongLong) {
-            Size = 8; Ty = Type::Int64Ty;
+            Size = 8; Ty = Type::getInt64Ty(C);
           } else if (Half) {
-            Size = 4; Ty = Type::Int16Ty;
+            Size = 4; Ty = Type::getInt16Ty(C);
           } else {
-            Size = 4; Ty = Type::Int32Ty;
+            Size = 4; Ty = Type::getInt32Ty(C);
           }
           break;
 
         case 'e': case 'g': case 'E':
         case 'f':
           if (Long || LongLong) {
-            Size = 8; Ty = Type::DoubleTy;
+            Size = 8; Ty = Type::getDoubleTy(C);
           } else {
-            Size = 4; Ty = Type::FloatTy;
+            Size = 4; Ty = Type::getFloatTy(C);
           }
           break;
 
         case 's': case 'c': case '[':  // No byteswap needed
           Size = 1;
-          Ty = Type::Int8Ty;
+          Ty = Type::getInt8Ty(C);
           break;
 
         default: break;
@@ -498,7 +508,8 @@ GenericValue lle_X_sscanf(const FunctionType *FT,
   GenericValue GV;
   GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
                         Args[5], Args[6], Args[7], Args[8], Args[9]));
-  ByteswapSCANFResults(Args[1], Args[2], Args[3], Args[4],
+  ByteswapSCANFResults(FT->getContext(),
+                       Args[1], Args[2], Args[3], Args[4],
                        Args[5], Args[6], Args[7], Args[8], Args[9], 0);
   return GV;
 }
@@ -515,7 +526,8 @@ GenericValue lle_X_scanf(const FunctionType *FT,
   GenericValue GV;
   GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
                         Args[5], Args[6], Args[7], Args[8], Args[9]));
-  ByteswapSCANFResults(Args[0], Args[1], Args[2], Args[3], Args[4],
+  ByteswapSCANFResults(FT->getContext(),
+                       Args[0], Args[1], Args[2], Args[3], Args[4],
                        Args[5], Args[6], Args[7], Args[8], Args[9]);
   return GV;
 }
@@ -537,9 +549,14 @@ GenericValue lle_X_fprintf(const FunctionType *FT,
 
 } // End extern "C"
 
+// Done with externals; turn the warning back on
+#ifdef _MSC_VER
+    #pragma warning(default: 4190)
+#endif
+
 
 void Interpreter::initializeExternalFunctions() {
-  sys::ScopedLock Writer(&*FunctionsLock);
+  sys::ScopedLock Writer(*FunctionsLock);
   FuncNames["lle_X_atexit"]       = lle_X_atexit;
   FuncNames["lle_X_exit"]         = lle_X_exit;
   FuncNames["lle_X_abort"]        = lle_X_abort;
@@ -550,4 +567,3 @@ void Interpreter::initializeExternalFunctions() {
   FuncNames["lle_X_scanf"]        = lle_X_scanf;
   FuncNames["lle_X_fprintf"]      = lle_X_fprintf;
 }
-
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index d7f38ef548f2..9be6a9265d61 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -33,8 +33,7 @@ extern "C" void LLVMLinkInInterpreter() { }
 
 /// create - Create a new interpreter object.  This can never fail.
 ///
-ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr,
-                                     CodeGenOpt::Level OptLevel /*unused*/) {
+ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr) {
   // Tell this ModuleProvide to materialize and release the module
   if (!MP->materializeModule(ErrStr))
     // We got an error, just return 0
@@ -98,4 +97,3 @@ Interpreter::runFunction(Function *F,
 
   return ExitValue;
 }
-
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 6b13c90f6671..e026287bb559 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -17,11 +17,12 @@
 #include "llvm/Function.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/DataTypes.h"
-
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
 namespace llvm {
 
 class IntrinsicLowering;
@@ -107,8 +108,7 @@ public:
   
   /// create - Create an interpreter ExecutionEngine. This can never fail.
   ///
-  static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0,
-                                 CodeGenOpt::Level = CodeGenOpt::Default);
+  static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0);
 
   /// run - Start execution with the specified function and arguments.
   ///
@@ -144,7 +144,9 @@ public:
   void visitLoadInst(LoadInst &I);
   void visitStoreInst(StoreInst &I);
   void visitGetElementPtrInst(GetElementPtrInst &I);
-  void visitPHINode(PHINode &PN) { assert(0 && "PHI nodes already handled!"); }
+  void visitPHINode(PHINode &PN) { 
+    llvm_unreachable("PHI nodes already handled!"); 
+  }
   void visitTruncInst(TruncInst &I);
   void visitZExtInst(ZExtInst &I);
   void visitSExtInst(SExtInst &I);
@@ -172,8 +174,8 @@ public:
 
   void visitVAArgInst(VAArgInst &I);
   void visitInstruction(Instruction &I) {
-    cerr << I;
-    assert(0 && "Instruction not interpretable yet!");
+    errs() << I;
+    llvm_unreachable("Instruction not interpretable yet!");
   }
 
   GenericValue callExternalFunction(Function *F,
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index bf915f7c4ca1..41b3b4e99cb1 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -4,9 +4,11 @@ add_definitions(-DENABLE_X86_JIT)
 add_llvm_library(LLVMJIT
   Intercept.cpp
   JIT.cpp
+  JITDebugRegisterer.cpp
   JITDwarfEmitter.cpp
   JITEmitter.cpp
   JITMemoryManager.cpp
   MacOSJITEventListener.cpp
+  OProfileJITEventListener.cpp
   TargetSelect.cpp
   )
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index 3dcc4626a1fa..c00b60a276c2 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -16,7 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "JIT.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
@@ -56,6 +56,7 @@ static void runAtExitHandlers() {
  * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' 
  * available as an exported symbol, so we have to add it explicitly.
  */
+namespace {
 class StatSymbols {
 public:
   StatSymbols() {
@@ -72,6 +73,7 @@ public:
     sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
   }
 };
+}
 static StatSymbols initStatSymbols;
 #endif // __linux__
 
@@ -82,7 +84,7 @@ static void jit_exit(int Status) {
 }
 
 // jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)(void)) {
+static int jit_atexit(void (*Fn)()) {
   AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
   return 0;  // Always successful
 }
@@ -140,9 +142,8 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
       return RP;
 
   if (AbortOnFailure) {
-    cerr << "ERROR: Program used external function '" << Name
-         << "' which could not be resolved!\n";
-    abort();
+    llvm_report_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
   }
   return 0;
 }
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 1d8312f76298..b2a268bce8b7 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetJITInfo.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Config/config.h"
@@ -196,25 +197,44 @@ void DarwinRegisterFrame(void* FrameBegin) {
 ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP,
                                             std::string *ErrorStr,
                                             JITMemoryManager *JMM,
-                                            CodeGenOpt::Level OptLevel) {
-  ExecutionEngine *EE = JIT::createJIT(MP, ErrorStr, JMM, OptLevel);
-  if (!EE) return 0;
-  
+                                            CodeGenOpt::Level OptLevel,
+                                            bool GVsWithCode) {
+    return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode);
+}
+
+ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
+                                std::string *ErrorStr,
+                                JITMemoryManager *JMM,
+                                CodeGenOpt::Level OptLevel,
+                                bool GVsWithCode) {
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
-  sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr);
-  return EE;
+  if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
+    return 0;
+
+  // Pick a target either via -march or by guessing the native arch.
+  TargetMachine *TM = JIT::selectTarget(MP, ErrorStr);
+  if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+
+  // If the target supports JIT code generation, create a the JIT.
+  if (TargetJITInfo *TJ = TM->getJITInfo()) {
+    return new JIT(MP, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+  } else {
+    if (ErrorStr)
+      *ErrorStr = "target does not support JIT code generation";
+    return 0;
+  }
 }
 
 JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
-         JITMemoryManager *JMM, CodeGenOpt::Level OptLevel)
-  : ExecutionEngine(MP), TM(tm), TJI(tji) {
+         JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
+  : ExecutionEngine(MP), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) {
   setTargetData(TM.getTargetData());
 
   jitstate = new JITState(MP);
 
   // Initialize JCE
-  JCE = createEmitter(*this, JMM);
+  JCE = createEmitter(*this, JMM, TM);
 
   // Add target data
   MutexGuard locked(lock);
@@ -224,8 +244,7 @@ JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
   // Turn the machine code intermediate representation into bytes in memory that
   // may be executed.
   if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
-    cerr << "Target does not support machine code emission!\n";
-    abort();
+    llvm_report_error("Target does not support machine code emission!");
   }
   
   // Register routine for informing unwinding runtime about new EH frames
@@ -273,8 +292,7 @@ void JIT::addModuleProvider(ModuleProvider *MP) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      cerr << "Target does not support machine code emission!\n";
-      abort();
+      llvm_report_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -306,8 +324,7 @@ Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      cerr << "Target does not support machine code emission!\n";
-      abort();
+      llvm_report_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -338,8 +355,7 @@ void JIT::deleteModuleProvider(ModuleProvider *MP, std::string *E) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      cerr << "Target does not support machine code emission!\n";
-      abort();
+      llvm_report_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -366,10 +382,11 @@ GenericValue JIT::runFunction(Function *F,
 
   // Handle some common cases first.  These cases correspond to common `main'
   // prototypes.
-  if (RetTy == Type::Int32Ty || RetTy == Type::VoidTy) {
+  if (RetTy == Type::getInt32Ty(F->getContext()) ||
+      RetTy == Type::getVoidTy(F->getContext())) {
     switch (ArgValues.size()) {
     case 3:
-      if (FTy->getParamType(0) == Type::Int32Ty &&
+      if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) &&
           isa<PointerType>(FTy->getParamType(1)) &&
           isa<PointerType>(FTy->getParamType(2))) {
         int (*PF)(int, char **, const char **) =
@@ -384,7 +401,7 @@ GenericValue JIT::runFunction(Function *F,
       }
       break;
     case 2:
-      if (FTy->getParamType(0) == Type::Int32Ty &&
+      if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) &&
           isa<PointerType>(FTy->getParamType(1))) {
         int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
 
@@ -397,7 +414,7 @@ GenericValue JIT::runFunction(Function *F,
       break;
     case 1:
       if (FTy->getNumParams() == 1 &&
-          FTy->getParamType(0) == Type::Int32Ty) {
+          FTy->getParamType(0) == Type::getInt32Ty(F->getContext())) {
         GenericValue rv;
         int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
         rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
@@ -411,7 +428,7 @@ GenericValue JIT::runFunction(Function *F,
   if (ArgValues.empty()) {
     GenericValue rv;
     switch (RetTy->getTypeID()) {
-    default: assert(0 && "Unknown return type for function call!");
+    default: llvm_unreachable("Unknown return type for function call!");
     case Type::IntegerTyID: {
       unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
       if (BitWidth == 1)
@@ -425,7 +442,7 @@ GenericValue JIT::runFunction(Function *F,
       else if (BitWidth <= 64)
         rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
       else 
-        assert(0 && "Integer types > 64 bits not supported");
+        llvm_unreachable("Integer types > 64 bits not supported");
       return rv;
     }
     case Type::VoidTyID:
@@ -440,7 +457,7 @@ GenericValue JIT::runFunction(Function *F,
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
     case Type::PPC_FP128TyID:
-      assert(0 && "long double not supported yet");
+      llvm_unreachable("long double not supported yet");
       return rv;
     case Type::PointerTyID:
       return PTOGV(((void*(*)())(intptr_t)FPtr)());
@@ -458,7 +475,7 @@ GenericValue JIT::runFunction(Function *F,
                                     F->getParent());
 
   // Insert a basic block.
-  BasicBlock *StubBB = BasicBlock::Create("", Stub);
+  BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub);
 
   // Convert all of the GenericValue arguments over to constants.  Note that we
   // currently don't support varargs.
@@ -468,28 +485,31 @@ GenericValue JIT::runFunction(Function *F,
     const Type *ArgTy = FTy->getParamType(i);
     const GenericValue &AV = ArgValues[i];
     switch (ArgTy->getTypeID()) {
-    default: assert(0 && "Unknown argument type for function call!");
+    default: llvm_unreachable("Unknown argument type for function call!");
     case Type::IntegerTyID:
-        C = ConstantInt::get(AV.IntVal);
+        C = ConstantInt::get(F->getContext(), AV.IntVal);
         break;
     case Type::FloatTyID:
-        C = ConstantFP::get(APFloat(AV.FloatVal));
+        C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal));
         break;
     case Type::DoubleTyID:
-        C = ConstantFP::get(APFloat(AV.DoubleVal));
+        C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal));
         break;
     case Type::PPC_FP128TyID:
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
-        C = ConstantFP::get(APFloat(AV.IntVal));
+        C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal));
         break;
     case Type::PointerTyID:
       void *ArgPtr = GVTOP(AV);
       if (sizeof(void*) == 4)
-        C = ConstantInt::get(Type::Int32Ty, (int)(intptr_t)ArgPtr);
+        C = ConstantInt::get(Type::getInt32Ty(F->getContext()), 
+                             (int)(intptr_t)ArgPtr);
       else
-        C = ConstantInt::get(Type::Int64Ty, (intptr_t)ArgPtr);
-      C = ConstantExpr::getIntToPtr(C, ArgTy);  // Cast the integer to pointer
+        C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
+                             (intptr_t)ArgPtr);
+      // Cast the integer to pointer
+      C = ConstantExpr::getIntToPtr(C, ArgTy);
       break;
     }
     Args.push_back(C);
@@ -499,10 +519,11 @@ GenericValue JIT::runFunction(Function *F,
                                        "", StubBB);
   TheCall->setCallingConv(F->getCallingConv());
   TheCall->setTailCall();
-  if (TheCall->getType() != Type::VoidTy)
-    ReturnInst::Create(TheCall, StubBB);    // Return result of the call.
+  if (TheCall->getType() != Type::getVoidTy(F->getContext()))
+    // Return result of the call.
+    ReturnInst::Create(F->getContext(), TheCall, StubBB);
   else
-    ReturnInst::Create(StubBB);             // Just return void.
+    ReturnInst::Create(F->getContext(), StubBB);           // Just return void.
 
   // Finally, return the value returned by our nullary stub function.
   return runFunction(Stub, std::vector<GenericValue>());
@@ -629,9 +650,8 @@ void *JIT::getPointerToFunction(Function *F) {
     
     std::string ErrorMsg;
     if (MP->materializeFunction(F, &ErrorMsg)) {
-      cerr << "Error reading function '" << F->getName()
-           << "' from bitcode file: " << ErrorMsg << "\n";
-      abort();
+      llvm_report_error("Error reading function '" + F->getName()+
+                        "' from bitcode file: " + ErrorMsg);
     }
 
     // Now retry to get the address.
@@ -669,45 +689,18 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
     if (GV->getName() == "__dso_handle")
       return (void*)&__dso_handle;
 #endif
-    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName().c_str());
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
     if (Ptr == 0 && !areDlsymStubsEnabled()) {
-      cerr << "Could not resolve external global address: "
-           << GV->getName() << "\n";
-      abort();
+      llvm_report_error("Could not resolve external global address: "
+                        +GV->getName());
     }
     addGlobalMapping(GV, Ptr);
   } else {
-    // GlobalVariable's which are not "constant" will cause trouble in a server
-    // situation. It's returned in the same block of memory as code which may
-    // not be writable.
-    if (isGVCompilationDisabled() && !GV->isConstant()) {
-      cerr << "Compilation of non-internal GlobalValue is disabled!\n";
-      abort();
-    }
     // If the global hasn't been emitted to memory yet, allocate space and
-    // emit it into memory.  It goes in the same array as the generated
-    // code, jump tables, etc.
-    const Type *GlobalType = GV->getType()->getElementType();
-    size_t S = getTargetData()->getTypeAllocSize(GlobalType);
-    size_t A = getTargetData()->getPreferredAlignment(GV);
-    if (GV->isThreadLocal()) {
-      MutexGuard locked(lock);
-      Ptr = TJI.allocateThreadLocalMemory(S);
-    } else if (TJI.allocateSeparateGVMemory()) {
-      if (A <= 8) {
-        Ptr = malloc(S);
-      } else {
-        // Allocate S+A bytes of memory, then use an aligned pointer within that
-        // space.
-        Ptr = malloc(S+A);
-        unsigned MisAligned = ((intptr_t)Ptr & (A-1));
-        Ptr = (char*)Ptr + (MisAligned ? (A-MisAligned) : 0);
-      }
-    } else {
-      Ptr = JCE->allocateSpace(S, A);
-    }
+    // emit it into memory.
+    Ptr = getMemoryForGV(GV);
     addGlobalMapping(GV, Ptr);
-    EmitGlobalVariable(GV);
+    EmitGlobalVariable(GV);  // Initialize the variable.
   }
   return Ptr;
 }
@@ -742,14 +735,41 @@ void *JIT::recompileAndRelinkFunction(Function *F) {
 /// on the target.
 ///
 char* JIT::getMemoryForGV(const GlobalVariable* GV) {
-  const Type *ElTy = GV->getType()->getElementType();
-  size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+  char *Ptr;
+
+  // GlobalVariable's which are not "constant" will cause trouble in a server
+  // situation. It's returned in the same block of memory as code which may
+  // not be writable.
+  if (isGVCompilationDisabled() && !GV->isConstant()) {
+    llvm_report_error("Compilation of non-internal GlobalValue is disabled!");
+  }
+
+  // Some applications require globals and code to live together, so they may
+  // be allocated into the same buffer, but in general globals are allocated
+  // through the memory manager which puts them near the code but not in the
+  // same buffer.
+  const Type *GlobalType = GV->getType()->getElementType();
+  size_t S = getTargetData()->getTypeAllocSize(GlobalType);
+  size_t A = getTargetData()->getPreferredAlignment(GV);
   if (GV->isThreadLocal()) {
     MutexGuard locked(lock);
-    return TJI.allocateThreadLocalMemory(GVSize);
+    Ptr = TJI.allocateThreadLocalMemory(S);
+  } else if (TJI.allocateSeparateGVMemory()) {
+    if (A <= 8) {
+      Ptr = (char*)malloc(S);
+    } else {
+      // Allocate S+A bytes of memory, then use an aligned pointer within that
+      // space.
+      Ptr = (char*)malloc(S+A);
+      unsigned MisAligned = ((intptr_t)Ptr & (A-1));
+      Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0);
+    }
+  } else if (AllocateGVsWithCode) {
+    Ptr = (char*)JCE->allocateSpace(S, A);
   } else {
-    return new char[GVSize];
+    Ptr = (char*)JCE->allocateGlobal(S, A);
   }
+  return Ptr;
 }
 
 void JIT::addPendingFunction(Function *F) {
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 66417a71b2c8..525cc84f945c 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -16,11 +16,12 @@
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/PassManager.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
 class Function;
-class JITEvent_EmittedFunctionDetails;
+struct JITEvent_EmittedFunctionDetails;
 class MachineCodeEmitter;
 class MachineCodeInfo;
 class TargetJITInfo;
@@ -33,7 +34,7 @@ private:
 
   /// PendingFunctions - Functions which have not been code generated yet, but
   /// were called from a function being code generated.
-  std::vector<Function*> PendingFunctions;
+  std::vector<AssertingVH<Function> > PendingFunctions;
 
 public:
   explicit JITState(ModuleProvider *MP) : PM(MP), MP(MP) {}
@@ -43,7 +44,7 @@ public:
   }
   
   ModuleProvider *getMP() const { return MP; }
-  std::vector<Function*> &getPendingFunctions(const MutexGuard &L) {
+  std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
     return PendingFunctions;
   }
 };
@@ -55,10 +56,16 @@ class JIT : public ExecutionEngine {
   JITCodeEmitter *JCE;     // JCE object
   std::vector<JITEventListener*> EventListeners;
 
+  /// AllocateGVsWithCode - Some applications require that global variables and
+  /// code be allocated into the same region of memory, in which case this flag
+  /// should be set to true.  Doing so breaks freeMachineCodeForFunction.
+  bool AllocateGVsWithCode;
+
   JITState *jitstate;
 
-  JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, 
-      JITMemoryManager *JMM, CodeGenOpt::Level OptLevel);
+  JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
+      JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+      bool AllocateGVsWithCode);
 public:
   ~JIT();
 
@@ -73,10 +80,13 @@ public:
   /// create - Create an return a new JIT compiler if there is one available
   /// for the current target.  Otherwise, return null.
   ///
-  static ExecutionEngine *create(ModuleProvider *MP, std::string *Err,
+  static ExecutionEngine *create(ModuleProvider *MP,
+                                 std::string *Err,
+                                 JITMemoryManager *JMM,
                                  CodeGenOpt::Level OptLevel =
-                                   CodeGenOpt::Default) {
-    return createJIT(MP, Err, 0, OptLevel);
+                                   CodeGenOpt::Default,
+                                 bool GVsWithCode = true) {
+    return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode);
   }
 
   virtual void addModuleProvider(ModuleProvider *MP);
@@ -145,16 +155,22 @@ public:
   /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
   /// function was encountered.  Add it to a pending list to be processed after 
   /// the current function.
-  /// 
+  ///
   void addPendingFunction(Function *F);
-  
+
   /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+  ///
   JITCodeEmitter *getCodeEmitter() const { return JCE; }
-  
-  static ExecutionEngine *createJIT(ModuleProvider *MP, std::string *Err,
-                                    JITMemoryManager *JMM,
-                                    CodeGenOpt::Level OptLevel);
 
+  /// selectTarget - Pick a target either via -march or by guessing the native
+  /// arch.  Add any CPU features specified via -mcpu or -mattr.
+  static TargetMachine *selectTarget(ModuleProvider *MP, std::string *Err);
+
+  static ExecutionEngine *createJIT(ModuleProvider *MP,
+                                    std::string *ErrorStr,
+                                    JITMemoryManager *JMM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool GVsWithCode);
 
   // Run the JIT on F and return information about the generated code
   void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
@@ -170,7 +186,8 @@ public:
   void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
 
 private:
-  static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM);
+  static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
+                                       TargetMachine &tm);
   void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
   void updateFunctionStub(Function *F);
   void updateDlsymStubTable();
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
new file mode 100644
index 000000000000..fa640103c28b
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -0,0 +1,208 @@
+//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITDebugRegisterer.h"
+#include "../../CodeGen/ELF.h"
+#include "../../CodeGen/ELFWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+  // Debuggers puts a breakpoint in this function.
+  void DISABLE_INLINE __jit_debug_register_code() { }
+
+  // We put information about the JITed function in this global, which the
+  // debugger reads.  Make sure to specify the version statically, because the
+  // debugger checks the version before we can set it during runtime.
+  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+}
+
+namespace {
+
+  /// JITDebugLock - Used to serialize all code registration events, since they
+  /// modify global variables.
+  sys::Mutex JITDebugLock;
+
+}
+
+JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { }
+
+JITDebugRegisterer::~JITDebugRegisterer() {
+  // Free all ELF memory.
+  for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end();
+       I != E; ++I) {
+    // Call the private method that doesn't update the map so our iterator
+    // doesn't break.
+    UnregisterFunctionInternal(I);
+  }
+  FnMap.clear();
+}
+
+std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
+  // Stack allocate an empty module with an empty LLVMContext for the ELFWriter
+  // API.  We don't use the real module because then the ELFWriter would write
+  // out unnecessary GlobalValues during finalization.
+  LLVMContext Context;
+  Module M("", Context);
+
+  // Make a buffer for the ELF in memory.
+  std::string Buffer;
+  raw_string_ostream O(Buffer);
+  ELFWriter EW(O, TM);
+  EW.doInitialization(M);
+
+  // Copy the binary into the .text section.  This isn't necessary, but it's
+  // useful to be able to disassemble the ELF by hand.
+  ELFSection &Text = EW.getTextSection((Function *)F);
+  Text.Addr = (uint64_t)I.FnStart;
+  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+  // instead of a vector.
+  Text.getData().assign(I.FnStart, I.FnEnd);
+
+  // Copy the exception handling call frame information into the .eh_frame
+  // section.  This allows GDB to get a good stack trace, particularly on
+  // linux x86_64.  Mark this as a PROGBITS section that needs to be loaded
+  // into memory at runtime.
+  ELFSection &EH = EW.getSection(".eh_frame", ELFSection::SHT_PROGBITS,
+                                 ELFSection::SHF_ALLOC);
+  // Pointers in the DWARF EH info are all relative to the EH frame start,
+  // which is stored here.
+  EH.Addr = (uint64_t)I.EhStart;
+  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+  // instead of a vector.
+  EH.getData().assign(I.EhStart, I.EhEnd);
+
+  // Add this single function to the symbol table, so the debugger prints the
+  // name instead of '???'.  We give the symbol default global visibility.
+  ELFSym *FnSym = ELFSym::getGV(F,
+                                ELFSym::STB_GLOBAL,
+                                ELFSym::STT_FUNC,
+                                ELFSym::STV_DEFAULT);
+  FnSym->SectionIdx = Text.SectionIdx;
+  FnSym->Size = I.FnEnd - I.FnStart;
+  FnSym->Value = 0;  // Offset from start of section.
+  EW.SymbolList.push_back(FnSym);
+
+  EW.doFinalization(M);
+  O.flush();
+
+  // When trying to debug why GDB isn't getting the debug info right, it's
+  // awfully helpful to write the object file to disk so that it can be
+  // inspected with readelf and objdump.
+  if (JITEmitDebugInfoToDisk) {
+    std::string Filename;
+    raw_string_ostream O2(Filename);
+    O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o";
+    O2.flush();
+    std::string Errors;
+    raw_fd_ostream O3(Filename.c_str(), Errors);
+    O3 << Buffer;
+    O3.close();
+  }
+
+  return Buffer;
+}
+
+void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
+  // TODO: Support non-ELF platforms.
+  if (!TM.getELFWriterInfo())
+    return;
+
+  std::string Buffer = MakeELF(F, I);
+
+  jit_code_entry *JITCodeEntry = new jit_code_entry();
+  JITCodeEntry->symfile_addr = Buffer.c_str();
+  JITCodeEntry->symfile_size = Buffer.size();
+
+  // Add a mapping from F to the entry and buffer, so we can delete this
+  // info later.
+  FnMap[F] = std::make_pair<std::string, jit_code_entry*>(Buffer, JITCodeEntry);
+
+  // Acquire the lock and do the registration.
+  {
+    MutexGuard locked(JITDebugLock);
+    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+
+    // Insert this entry at the head of the list.
+    JITCodeEntry->prev_entry = NULL;
+    jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
+    JITCodeEntry->next_entry = NextEntry;
+    if (NextEntry != NULL) {
+      NextEntry->prev_entry = JITCodeEntry;
+    }
+    __jit_debug_descriptor.first_entry = JITCodeEntry;
+    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+    __jit_debug_register_code();
+  }
+}
+
+void JITDebugRegisterer::UnregisterFunctionInternal(
+    RegisteredFunctionsMap::iterator I) {
+  jit_code_entry *JITCodeEntry = I->second.second;
+
+  // Acquire the lock and do the unregistration.
+  {
+    MutexGuard locked(JITDebugLock);
+    __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
+
+    // Remove the jit_code_entry from the linked list.
+    jit_code_entry *PrevEntry = JITCodeEntry->prev_entry;
+    jit_code_entry *NextEntry = JITCodeEntry->next_entry;
+    if (NextEntry) {
+      NextEntry->prev_entry = PrevEntry;
+    }
+    if (PrevEntry) {
+      PrevEntry->next_entry = NextEntry;
+    } else {
+      assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
+      __jit_debug_descriptor.first_entry = NextEntry;
+    }
+
+    // Tell GDB which entry we removed, and unregister the code.
+    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+    __jit_debug_register_code();
+  }
+
+  // Free the ELF file in memory.
+  std::string &Buffer = I->second.first;
+  Buffer.clear();
+}
+
+void JITDebugRegisterer::UnregisterFunction(const Function *F) {
+  // TODO: Support non-ELF platforms.
+  if (!TM.getELFWriterInfo())
+    return;
+
+  RegisteredFunctionsMap::iterator I = FnMap.find(F);
+  if (I == FnMap.end()) return;
+  UnregisterFunctionInternal(I);
+  FnMap.erase(I);
+}
+
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
new file mode 100644
index 000000000000..dce506bbfefd
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -0,0 +1,116 @@
+//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } jit_actions_t;
+
+  struct jit_code_entry {
+    struct jit_code_entry *next_entry;
+    struct jit_code_entry *prev_entry;
+    const char *symfile_addr;
+    uint64_t symfile_size;
+  };
+
+  struct jit_descriptor {
+    uint32_t version;
+    // This should be jit_actions_t, but we want to be specific about the
+    // bit-width.
+    uint32_t action_flag;
+    struct jit_code_entry *relevant_entry;
+    struct jit_code_entry *first_entry;
+  };
+
+}
+
+namespace llvm {
+
+class ELFSection;
+class Function;
+class TargetMachine;
+
+
+/// This class encapsulates information we want to send to the debugger.
+///
+struct DebugInfo {
+  uint8_t *FnStart;
+  uint8_t *FnEnd;
+  uint8_t *EhStart;
+  uint8_t *EhEnd;
+
+  DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {}
+};
+
+typedef DenseMap< const Function*, std::pair<std::string, jit_code_entry*> >
+  RegisteredFunctionsMap;
+
+/// This class registers debug info for JITed code with an attached debugger.
+/// Without proper debug info, GDB can't do things like source level debugging
+/// or even produce a proper stack trace on linux-x86_64.  To use this class,
+/// whenever a function is JITed, create a DebugInfo struct and pass it to the
+/// RegisterFunction method.  The method will then do whatever is necessary to
+/// inform the debugger about the JITed function.
+class JITDebugRegisterer {
+
+  TargetMachine &TM;
+
+  /// FnMap - A map of functions that have been registered to the associated
+  /// temporary files.  Used for cleanup.
+  RegisteredFunctionsMap FnMap;
+
+  /// MakeELF - Builds the ELF file in memory and returns a std::string that
+  /// contains the ELF.
+  std::string MakeELF(const Function *F, DebugInfo &I);
+
+public:
+  JITDebugRegisterer(TargetMachine &tm);
+
+  /// ~JITDebugRegisterer - Unregisters all code and frees symbol files.
+  ///
+  ~JITDebugRegisterer();
+
+  /// RegisterFunction - Register debug info for the given function with an
+  /// attached debugger.  Clients must call UnregisterFunction on all
+  /// registered functions before deleting them to free the associated symbol
+  /// file and unregister it from the debugger.
+  void RegisterFunction(const Function *F, DebugInfo &I);
+
+  /// UnregisterFunction - Unregister the debug info for the given function
+  /// from the debugger and free associated memory.
+  void UnregisterFunction(const Function *F);
+
+private:
+  /// UnregisterFunctionInternal - Unregister the debug info for the given
+  /// function from the debugger and delete any temporary files.  The private
+  /// version of this method does not remove the function from FnMap so that it
+  /// can be called while iterating over FnMap.
+  void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I);
+
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index e101ef371ed0..f2b28ad326e5 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -21,25 +21,27 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 using namespace llvm;
 
-JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : Jit(theJit) {}
+JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
 
 
 unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, 
                                                JITCodeEmitter& jce,
                                                unsigned char* StartFunction,
-                                               unsigned char* EndFunction) {
+                                               unsigned char* EndFunction,
+                                               unsigned char* &EHFramePtr) {
+  assert(MMI && "MachineModuleInfo not registered!");
+
   const TargetMachine& TM = F.getTarget();
   TD = TM.getTargetData();
-  needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding();
   stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
   RI = TM.getRegisterInfo();
   JCE = &jce;
@@ -48,14 +50,13 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
                                                      EndFunction);
       
   unsigned char* Result = 0;
-  unsigned char* EHFramePtr = 0;
 
   const std::vector<Function *> Personalities = MMI->getPersonalities();
   EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
 
   Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
                        StartFunction, EndFunction, ExceptionTable);
-  
+
   return Result;
 }
 
@@ -106,11 +107,9 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
           JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
         }
         
-        int Offset = -Src.getOffset();
-        
-        JCE->emitULEB128Bytes(Offset);
+        JCE->emitULEB128Bytes(-Src.getOffset());
       } else {
-        assert(0 && "Machine move no supported yet.");
+        llvm_unreachable("Machine move not supported yet.");
       }
     } else if (Src.isReg() &&
       Src.getReg() == MachineLocation::VirtualFP) {
@@ -118,7 +117,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
         JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
         JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
       } else {
-        assert(0 && "Machine move no supported yet.");
+        llvm_unreachable("Machine move not supported yet.");
       }
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
@@ -209,6 +208,8 @@ struct CallSiteEntry {
 unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
                                          unsigned char* StartFunction,
                                          unsigned char* EndFunction) const {
+  assert(MMI && "MachineModuleInfo not registered!");
+
   // Map all labels and get rid of any dead landing pads.
   MMI->TidyLandingPads();
 
@@ -241,7 +242,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
     E = FilterIds.end(); I != E; ++I) {
     FilterOffsets.push_back(Offset);
-    Offset -= TargetAsmInfo::getULEB128Size(*I);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
   }
 
   // Compute the actions table and gather the first action index for each
@@ -266,10 +267,10 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
         const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
         assert(Actions.size());
         PrevAction = &Actions.back();
-        SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
-          SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+          SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
           SizeAction += -PrevAction->NextAction;
           PrevAction = PrevAction->Previous;
         }
@@ -280,10 +281,10 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
         int TypeID = TypeIds[I];
         assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
         int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
-        unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
         SizeSiteActions += SizeAction;
 
         ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
@@ -386,29 +387,19 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
                                             sizeof(int32_t) + // Site length.
                                             sizeof(int32_t)); // Landing pad.
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
-    SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
 
   unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
 
   unsigned TypeOffset = sizeof(int8_t) + // Call site format
                         // Call-site table length
-                        TargetAsmInfo::getULEB128Size(SizeSites) + 
+                        MCAsmInfo::getULEB128Size(SizeSites) + 
                         SizeSites + SizeActions + SizeTypes;
 
-  unsigned TotalSize = sizeof(int8_t) + // LPStart format
-                       sizeof(int8_t) + // TType format
-                       TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
-                       TypeOffset;
-
-  unsigned SizeAlign = (4 - TotalSize) & 3;
-
   // Begin the exception table.
-  JCE->emitAlignment(4);
-  for (unsigned i = 0; i != SizeAlign; ++i) {
-    JCE->emitByte(0);
-    // Asm->EOL("Padding");
-  }
-  
+  JCE->emitAlignmentWithFill(4, 0);
+  // Asm->EOL("Padding");
+
   unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
 
   // Emit the header.
@@ -475,11 +466,10 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
     GlobalVariable *GV = TypeInfos[M - 1];
     
     if (GV) {
-      if (TD->getPointerSize() == sizeof(int32_t)) {
+      if (TD->getPointerSize() == sizeof(int32_t))
         JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
-      } else {
+      else
         JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
-      }
     } else {
       if (TD->getPointerSize() == sizeof(int32_t))
         JCE->emitInt32(0);
@@ -495,8 +485,8 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
     JCE->emitULEB128Bytes(TypeID);
     //Asm->EOL("Filter TypeInfo index");
   }
-  
-  JCE->emitAlignment(4);
+
+  JCE->emitAlignmentWithFill(4, 0);
 
   return DwarfExceptionTable;
 }
@@ -517,7 +507,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
   JCE->emitULEB128Bytes(1);
   JCE->emitSLEB128Bytes(stackGrowth);
   JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
-  
+
   if (Personality) {
     // Augmentation Size: 3 small ULEBs of one byte each, and the personality
     // function which size is PointerSize.
@@ -533,10 +523,9 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
       JCE->emitByte(dwarf::DW_EH_PE_sdata8);
       JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
     }
-    
+
     JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
     JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-      
   } else {
     JCE->emitULEB128Bytes(1);
     JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
@@ -545,11 +534,12 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
   std::vector<MachineMove> Moves;
   RI->getInitialFrameState(Moves);
   EmitFrameMoves(0, Moves);
-  JCE->emitAlignment(PointerSize);
-  
-  JCE->emitInt32At((uintptr_t*)StartCommonPtr, 
-              (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - 
-                          FrameCommonBeginPtr));
+
+  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+  JCE->emitInt32At((uintptr_t*)StartCommonPtr,
+                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+                               FrameCommonBeginPtr));
 
   return StartCommonPtr;
 }
@@ -574,13 +564,19 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
 
   // If there is a personality and landing pads then point to the language
   // specific data area in the exception table.
-  if (MMI->getPersonalityIndex()) {
-    JCE->emitULEB128Bytes(4);
+  if (Personality) {
+    JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
         
-    if (!MMI->getLandingPads().empty()) {
-      JCE->emitInt32(ExceptionTable - (unsigned char*)JCE->getCurrentPCValue());
+    if (PointerSize == 4) {
+      if (!MMI->getLandingPads().empty())
+        JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+      else
+        JCE->emitInt32((int)0);
     } else {
-      JCE->emitInt32((int)0);
+      if (!MMI->getLandingPads().empty())
+        JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+      else
+        JCE->emitInt64((int)0);
     }
   } else {
     JCE->emitULEB128Bytes(0);
@@ -589,14 +585,14 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
   // Indicate locations of function specific  callee saved registers in
   // frame.
   EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
-      
-  JCE->emitAlignment(PointerSize);
-  
+
+  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
   // Indicate the size of the table
-  JCE->emitInt32At((uintptr_t*)StartEHPtr, 
-              (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - 
-                          StartEHPtr));
-  
+  JCE->emitInt32At((uintptr_t*)StartEHPtr,
+                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+                               StartEHPtr));
+
   // Double zeroes for the unwind runtime
   if (PointerSize == 8) {
     JCE->emitInt64(0);
@@ -605,7 +601,6 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
     JCE->emitInt32(0);
     JCE->emitInt32(0);
   }
-
   
   return StartEHPtr;
 }
@@ -616,7 +611,6 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
                                          unsigned char* EndFunction) {
   const TargetMachine& TM = F.getTarget();
   TD = TM.getTargetData();
-  needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding();
   stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
   RI = TM.getRegisterInfo();
   JCE = &jce;
@@ -630,7 +624,7 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
 
   FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()],
                                      StartFunction);
-  
+
   return FinalSize;
 }
 
@@ -653,11 +647,11 @@ JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality,
   FinalSize += 3 * PointerSize;
   // If there is a personality and landing pads then point to the language
   // specific data area in the exception table.
-  if (MMI->getPersonalityIndex()) {
-    FinalSize += TargetAsmInfo::getULEB128Size(4); 
+  if (Personality) {
+    FinalSize += MCAsmInfo::getULEB128Size(4); 
     FinalSize += PointerSize;
   } else {
-    FinalSize += TargetAsmInfo::getULEB128Size(0);
+    FinalSize += MCAsmInfo::getULEB128Size(0);
   }
       
   // Indicate locations of function specific  callee saved registers in
@@ -685,24 +679,24 @@ unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personalit
   FinalSize += 4;
   FinalSize += 1;
   FinalSize += Personality ? 5 : 3; // "zPLR" or "zR"
-  FinalSize += TargetAsmInfo::getULEB128Size(1);
-  FinalSize += TargetAsmInfo::getSLEB128Size(stackGrowth);
+  FinalSize += MCAsmInfo::getULEB128Size(1);
+  FinalSize += MCAsmInfo::getSLEB128Size(stackGrowth);
   FinalSize += 1;
   
   if (Personality) {
-    FinalSize += TargetAsmInfo::getULEB128Size(7);
+    FinalSize += MCAsmInfo::getULEB128Size(7);
     
     // Encoding
     FinalSize+= 1;
     //Personality
     FinalSize += PointerSize;
     
-    FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
-    FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
+    FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
+    FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
       
   } else {
-    FinalSize += TargetAsmInfo::getULEB128Size(1);
-    FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
+    FinalSize += MCAsmInfo::getULEB128Size(1);
+    FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
   }
 
   std::vector<MachineMove> Moves;
@@ -754,23 +748,23 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
         } else {
           ++FinalSize;
           unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true);
-          FinalSize += TargetAsmInfo::getULEB128Size(RegNum);
+          FinalSize += MCAsmInfo::getULEB128Size(RegNum);
         }
         
         int Offset = -Src.getOffset();
         
-        FinalSize += TargetAsmInfo::getULEB128Size(Offset);
+        FinalSize += MCAsmInfo::getULEB128Size(Offset);
       } else {
-        assert(0 && "Machine move no supported yet.");
+        llvm_unreachable("Machine move no supported yet.");
       }
     } else if (Src.isReg() &&
       Src.getReg() == MachineLocation::VirtualFP) {
       if (Dst.isReg()) {
         ++FinalSize;
         unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true);
-        FinalSize += TargetAsmInfo::getULEB128Size(RegNum);
+        FinalSize += MCAsmInfo::getULEB128Size(RegNum);
       } else {
-        assert(0 && "Machine move no supported yet.");
+        llvm_unreachable("Machine move no supported yet.");
       }
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
@@ -778,15 +772,15 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
       
       if (Offset < 0) {
         ++FinalSize;
-        FinalSize += TargetAsmInfo::getULEB128Size(Reg);
-        FinalSize += TargetAsmInfo::getSLEB128Size(Offset);
+        FinalSize += MCAsmInfo::getULEB128Size(Reg);
+        FinalSize += MCAsmInfo::getSLEB128Size(Offset);
       } else if (Reg < 64) {
         ++FinalSize;
-        FinalSize += TargetAsmInfo::getULEB128Size(Offset);
+        FinalSize += MCAsmInfo::getULEB128Size(Offset);
       } else {
         ++FinalSize;
-        FinalSize += TargetAsmInfo::getULEB128Size(Reg);
-        FinalSize += TargetAsmInfo::getULEB128Size(Offset);
+        FinalSize += MCAsmInfo::getULEB128Size(Reg);
+        FinalSize += MCAsmInfo::getULEB128Size(Offset);
       }
     }
   }
@@ -829,7 +823,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
   for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
     E = FilterIds.end(); I != E; ++I) {
     FilterOffsets.push_back(Offset);
-    Offset -= TargetAsmInfo::getULEB128Size(*I);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
   }
 
   // Compute the actions table and gather the first action index for each
@@ -854,10 +848,10 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
         const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
         assert(Actions.size());
         PrevAction = &Actions.back();
-        SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
-          SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+          SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
           SizeAction += -PrevAction->NextAction;
           PrevAction = PrevAction->Previous;
         }
@@ -868,10 +862,10 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
         int TypeID = TypeIds[I];
         assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
         int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
-        unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
         SizeSiteActions += SizeAction;
 
         ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
@@ -974,18 +968,18 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
                                             sizeof(int32_t) + // Site length.
                                             sizeof(int32_t)); // Landing pad.
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
-    SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
 
   unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
 
   unsigned TypeOffset = sizeof(int8_t) + // Call site format
                         // Call-site table length
-                        TargetAsmInfo::getULEB128Size(SizeSites) + 
+                        MCAsmInfo::getULEB128Size(SizeSites) + 
                         SizeSites + SizeActions + SizeTypes;
 
   unsigned TotalSize = sizeof(int8_t) + // LPStart format
                        sizeof(int8_t) + // TType format
-                       TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
+                       MCAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
                        TypeOffset;
 
   unsigned SizeAlign = (4 - TotalSize) & 3;
@@ -1023,7 +1017,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
     // Asm->EOL("Landing pad");
     FinalSize += PointerSize;
 
-    FinalSize += TargetAsmInfo::getULEB128Size(S.Action);
+    FinalSize += MCAsmInfo::getULEB128Size(S.Action);
     // Asm->EOL("Action");
   }
 
@@ -1032,9 +1026,9 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
     ActionEntry &Action = Actions[I];
 
     //Asm->EOL("TypeInfo index");
-    FinalSize += TargetAsmInfo::getSLEB128Size(Action.ValueForTypeID);
+    FinalSize += MCAsmInfo::getSLEB128Size(Action.ValueForTypeID);
     //Asm->EOL("Next action");
-    FinalSize += TargetAsmInfo::getSLEB128Size(Action.NextAction);
+    FinalSize += MCAsmInfo::getSLEB128Size(Action.NextAction);
   }
 
   // Emit the type ids.
@@ -1046,7 +1040,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
   // Emit the filter typeids.
   for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
     unsigned TypeID = FilterIds[j];
-    FinalSize += TargetAsmInfo::getULEB128Size(TypeID);
+    FinalSize += MCAsmInfo::getULEB128Size(TypeID);
     //Asm->EOL("Filter TypeInfo index");
   }
   
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index 9120ed44e6a6..e627550d6d0e 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -32,7 +32,6 @@ class JITDwarfEmitter {
   const TargetRegisterInfo* RI;
   MachineModuleInfo* MMI;
   JIT& Jit;
-  bool needsIndirectEncoding;
   bool stackGrowthDirection;
   
   unsigned char* EmitExceptionTable(MachineFunction* MF,
@@ -68,7 +67,8 @@ public:
   unsigned char* EmitDwarfTable(MachineFunction& F, 
                                 JITCodeEmitter& JCE,
                                 unsigned char* StartFunction,
-                                unsigned char* EndFunction);
+                                unsigned char* EndFunction,
+                                unsigned char* &EHFramePtr);
   
   
   unsigned GetDwarfTableSizeInBytes(MachineFunction& F, 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 8fe7ab848b73..eacd9f972058 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -14,7 +14,9 @@
 
 #define DEBUG_TYPE "jit"
 #include "JIT.h"
+#include "JITDebugRegisterer.h"
 #include "JITDwarfEmitter.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/DerivedTypes.h"
@@ -33,8 +35,10 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Disassembler.h"
 #include "llvm/System/Memory.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -49,6 +53,7 @@ using namespace llvm;
 
 STATISTIC(NumBytes, "Number of bytes of machine code compiled");
 STATISTIC(NumRelos, "Number of relocations applied");
+STATISTIC(NumRetries, "Number of retries with more memory");
 static JIT *TheJIT = 0;
 
 
@@ -59,7 +64,7 @@ namespace {
   class JITResolverState {
   public:
     typedef std::map<AssertingVH<Function>, void*> FunctionToStubMapTy;
-    typedef std::map<void*, Function*> StubToFunctionMapTy;
+    typedef std::map<void*, AssertingVH<Function> > StubToFunctionMapTy;
     typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
   private:
     /// FunctionToStubMap - Keep track of the stub created for a particular
@@ -193,9 +198,9 @@ void *JITResolver::getFunctionStub(Function *F) {
 
   // Call the lazy resolver function unless we are JIT'ing non-lazily, in which
   // case we must resolve the symbol now.
-  void *Actual =  TheJIT->isLazyCompilationDisabled() 
+  void *Actual = TheJIT->isLazyCompilationDisabled()
     ? (void *)0 : (void *)(intptr_t)LazyResolverFn;
-   
+  
   // If this is an external declaration, attempt to resolve the address now
   // to place in the stub.
   if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) {
@@ -220,20 +225,20 @@ void *JITResolver::getFunctionStub(Function *F) {
     TheJIT->updateGlobalMapping(F, Stub);
   }
 
-  DOUT << "JIT: Stub emitted at [" << Stub << "] for function '"
-       << F->getName() << "'\n";
+  DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for function '"
+        << F->getName() << "'\n");
 
   // Finally, keep track of the stub-to-Function mapping so that the
   // JITCompilerFn knows which function to compile!
   state.getStubToFunctionMap(locked)[Stub] = F;
-  
+
   // If we are JIT'ing non-lazily but need to call a function that does not
   // exist yet, add it to the JIT's work list so that we can fill in the stub
   // address later.
   if (!Actual && TheJIT->isLazyCompilationDisabled())
     if (!F->isDeclaration() || F->hasNotBeenReadFromBitcode())
       TheJIT->addPendingFunction(F);
-  
+
   return Stub;
 }
 
@@ -250,8 +255,8 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
   IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
                                                      *TheJIT->getCodeEmitter());
 
-  DOUT << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '"
-       << GV->getName() << "'\n";
+  DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym 
+        << "] for GV '" << GV->getName() << "'\n");
 
   return IndirectSym;
 }
@@ -266,8 +271,8 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) {
   Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr,
                                                *TheJIT->getCodeEmitter());
 
-  DOUT << "JIT: Stub emitted at [" << Stub
-       << "] for external function at '" << FnAddr << "'\n";
+  DEBUG(errs() << "JIT: Stub emitted at [" << Stub
+               << "] for external function at '" << FnAddr << "'\n");
   return Stub;
 }
 
@@ -276,7 +281,8 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) {
   if (!idx) {
     idx = ++nextGOTIndex;
     revGOTMap[addr] = idx;
-    DOUT << "JIT: Adding GOT entry " << idx << " for addr [" << addr << "]\n";
+    DEBUG(errs() << "JIT: Adding GOT entry " << idx << " for addr ["
+                 << addr << "]\n");
   }
   return idx;
 }
@@ -373,9 +379,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     
     // If lazy compilation is disabled, emit a useful error message and abort.
     if (TheJIT->isLazyCompilationDisabled()) {
-      cerr << "LLVM JIT requested to do lazy compilation of function '"
-      << F->getName() << "' when lazy compiles are disabled!\n";
-      abort();
+      llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
+                        + F->getName() + "' when lazy compiles are disabled!");
     }
   
     // We might like to remove the stub from the StubToFunction map.
@@ -385,9 +390,9 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // it needs to call.
     //JR.state.getStubToFunctionMap(locked).erase(I);
 
-    DOUT << "JIT: Lazily resolving function '" << F->getName()
-         << "' In stub ptr = " << Stub << " actual ptr = "
-         << ActualPtr << "\n";
+    DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName()
+          << "' In stub ptr = " << Stub << " actual ptr = "
+          << ActualPtr << "\n");
 
     Result = TheJIT->getPointerToFunction(F);
   }
@@ -424,6 +429,12 @@ namespace {
     // save BufferBegin/BufferEnd/CurBufferPtr here.
     uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
 
+    // When reattempting to JIT a function after running out of space, we store
+    // the estimated size of the function we're trying to JIT here, so we can
+    // ask the memory manager for at least this much space.  When we
+    // successfully emit the function, we reset this back to zero.
+    uintptr_t SizeEstimate;
+
     /// Relocations - These are the relocations that the function needs, as
     /// emitted.
     std::vector<MachineRelocation> Relocations;
@@ -455,9 +466,12 @@ namespace {
 
     /// Resolver - This contains info about the currently resolved functions.
     JITResolver Resolver;
-    
+
     /// DE - The dwarf emitter for the jit.
-    JITDwarfEmitter *DE;
+    OwningPtr<JITDwarfEmitter> DE;
+
+    /// DR - The debug registerer for the jit.
+    OwningPtr<JITDebugRegisterer> DR;
 
     /// LabelLocations - This vector is a mapping from Label ID's to their 
     /// address.
@@ -472,7 +486,12 @@ namespace {
     // CurFn - The llvm function being emitted.  Only valid during 
     // finishFunction().
     const Function *CurFn;
-    
+
+    /// Information about emitted code, which is passed to the
+    /// JITEventListeners.  This is reset in startFunction and used in
+    /// finishFunction.
+    JITEvent_EmittedFunctionDetails EmissionDetails;
+
     // CurFnStubUses - For a given Function, a vector of stubs that it
     // references.  This facilitates the JIT detecting that a stub is no
     // longer used, so that it may be deallocated.
@@ -487,19 +506,26 @@ namespace {
     // in the JITResolver's ExternalFnToStubMap.
     StringMap<void *> ExtFnStubs;
 
+    DebugLocTuple PrevDLT;
+
   public:
-    JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0) {
+    JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+        : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
-        DOUT << "JIT is managing a GOT\n";
+        DEBUG(errs() << "JIT is managing a GOT\n");
       }
 
-      if (ExceptionHandling) DE = new JITDwarfEmitter(jit);
+      if (DwarfExceptionHandling || JITEmitDebugInfo) {
+        DE.reset(new JITDwarfEmitter(jit));
+      }
+      if (JITEmitDebugInfo) {
+        DR.reset(new JITDebugRegisterer(TM));
+      }
     }
     ~JITEmitter() { 
       delete MemMgr;
-      if (ExceptionHandling) delete DE;
     }
 
     /// classof - Methods for support type inquiry through isa, cast, and
@@ -527,6 +553,11 @@ namespace {
     /// allocate a new one of the given size.
     virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
 
+    /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
+    /// this method does not allocate memory in the current output buffer,
+    /// because a global may live longer than the current function.
+    virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
     virtual void addRelocation(const MachineRelocation &MR) {
       Relocations.push_back(MR);
     }
@@ -535,8 +566,8 @@ namespace {
       if (MBBLocations.size() <= (unsigned)MBB->getNumber())
         MBBLocations.resize((MBB->getNumber()+1)*2);
       MBBLocations[MBB->getNumber()] = getCurrentPCValue();
-      DOUT << "JIT: Emitting BB" << MBB->getNumber() << " at ["
-           << (void*) getCurrentPCValue() << "]\n";
+      DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+                   << (void*) getCurrentPCValue() << "]\n");
     }
 
     virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
@@ -548,9 +579,14 @@ namespace {
       return MBBLocations[MBB->getNumber()];
     }
 
+    /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+    /// given function.  Increase the minimum allocation size so that we get
+    /// more memory next time.
+    void retryWithMoreMemory(MachineFunction &F);
+
     /// deallocateMemForFunction - Deallocate all memory for the specified
     /// function body.
-    void deallocateMemForFunction(Function *F);
+    void deallocateMemForFunction(const Function *F);
 
     /// AddStubToCurrentFunction - Mark the current function being JIT'd as
     /// using the stub at the specified address. Allows
@@ -561,6 +597,8 @@ namespace {
     /// MachineRelocations that reference external functions by name.
     const StringMap<void*> &getExternalFnStubs() const { return ExtFnStubs; }
     
+    virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
     virtual void emitLabel(uint64_t LabelID) {
       if (LabelLocations.size() <= LabelID)
         LabelLocations.resize((LabelID+1)*2);
@@ -575,14 +613,14 @@ namespace {
  
     virtual void setModuleInfo(MachineModuleInfo* Info) {
       MMI = Info;
-      if (ExceptionHandling) DE->setModuleInfo(Info);
+      if (DE.get()) DE->setModuleInfo(Info);
     }
 
-    void setMemoryExecutable(void) {
+    void setMemoryExecutable() {
       MemMgr->setMemoryExecutable();
     }
     
-    JITMemoryManager *getMemMgr(void) const { return MemMgr; }
+    JITMemoryManager *getMemMgr() const { return MemMgr; }
 
   private:
     void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub);
@@ -606,7 +644,7 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
   // If we have already compiled the function, return a pointer to its body.
   Function *F = cast<Function>(V);
   void *ResultPtr;
-  if (!DoesntNeedStub && !TheJIT->isLazyCompilationDisabled()) {
+  if (!DoesntNeedStub) {
     // Return the function stub if it's already created.
     ResultPtr = Resolver.getFunctionStubIfAvailable(F);
     if (ResultPtr)
@@ -658,11 +696,8 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
 }
 
 void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
-  if (!TheJIT->areDlsymStubsEnabled())
-    return;
-  
   assert(CurFn && "Stub added to current function, but current function is 0!");
-  
+
   SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn];
   StubsUsed.push_back(StubAddr);
 
@@ -670,6 +705,23 @@ void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
   FnRefs.insert(CurFn);
 }
 
+void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
+  if (!DL.isUnknown()) {
+    DebugLocTuple CurDLT = EmissionDetails.MF->getDebugLocTuple(DL);
+
+    if (BeforePrintingInsn) {
+      if (CurDLT.Scope != 0 && PrevDLT != CurDLT) {
+        JITEvent_EmittedFunctionDetails::LineStart NextLine;
+        NextLine.Address = getCurrentPCValue();
+        NextLine.Loc = DL;
+        EmissionDetails.LineStarts.push_back(NextLine);
+      }
+  
+      PrevDLT = CurDLT;
+    }
+  }
+}
+
 static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
                                            const TargetData *TD) {
   const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
@@ -713,7 +765,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
   size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
   size_t GVAlign = 
       (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
-  DOUT << "JIT: Adding in size " << GVSize << " alignment " << GVAlign;
+  DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
   DEBUG(GV->dump());
   // Assume code section ends with worst possible alignment, so first
   // variable needs maximal padding.
@@ -772,8 +824,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
       break;
     }
     default: {
-       cerr << "ConstantExpr not handled: " << *CE << "\n";
-      abort();
+       std::string msg;
+       raw_string_ostream Msg(msg);
+       Msg << "ConstantExpr not handled: " << *CE;
+       llvm_report_error(Msg.str());
     }
     }
   }
@@ -839,7 +893,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
       }
     }
   }
-  DOUT << "JIT: About to look through initializers\n";
+  DEBUG(errs() << "JIT: About to look through initializers\n");
   // Look for more globals that are referenced only from initializers.
   // GVSet.end is computed each time because the set can grow as we go.
   for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); 
@@ -853,14 +907,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
 }
 
 void JITEmitter::startFunction(MachineFunction &F) {
-  DOUT << "JIT: Starting CodeGen of Function "
-       << F.getFunction()->getName() << "\n";
+  DEBUG(errs() << "JIT: Starting CodeGen of Function "
+        << F.getFunction()->getName() << "\n");
 
   uintptr_t ActualSize = 0;
   // Set the memory writable, if it's not already
   MemMgr->setMemoryWritable();
   if (MemMgr->NeedsExactSize()) {
-    DOUT << "JIT: ExactSize\n";
+    DEBUG(errs() << "JIT: ExactSize\n");
     const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
     MachineJumpTableInfo *MJTI = F.getJumpTableInfo();
     MachineConstantPool *MCP = F.getConstantPool();
@@ -887,12 +941,15 @@ void JITEmitter::startFunction(MachineFunction &F) {
     // Add the function size
     ActualSize += TII->GetFunctionSizeInBytes(F);
 
-    DOUT << "JIT: ActualSize before globals " << ActualSize << "\n";
+    DEBUG(errs() << "JIT: ActualSize before globals " << ActualSize << "\n");
     // Add the size of the globals that will be allocated after this function.
     // These are all the ones referenced from this function that were not
     // previously allocated.
     ActualSize += GetSizeOfGlobalsInBytes(F);
-    DOUT << "JIT: ActualSize after globals " << ActualSize << "\n";
+    DEBUG(errs() << "JIT: ActualSize after globals " << ActualSize << "\n");
+  } else if (SizeEstimate > 0) {
+    // SizeEstimate will be non-zero on reallocation attempts.
+    ActualSize = SizeEstimate;
   }
 
   BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
@@ -910,17 +967,22 @@ void JITEmitter::startFunction(MachineFunction &F) {
   TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
 
   MBBLocations.clear();
+
+  EmissionDetails.MF = &F;
+  EmissionDetails.LineStarts.clear();
 }
 
 bool JITEmitter::finishFunction(MachineFunction &F) {
   if (CurBufferPtr == BufferEnd) {
-    // FIXME: Allocate more space, then try again.
-    cerr << "JIT: Ran out of space for generated machine code!\n";
-    abort();
+    // We must call endFunctionBody before retrying, because
+    // deallocateMemForFunction requires it.
+    MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+    retryWithMoreMemory(F);
+    return true;
   }
-  
+
   emitJumpTableInfo(F.getJumpTableInfo());
-  
+
   // FnStart is the start of the text, not the start of the constant pool and
   // other per-function data.
   uint8_t *FnStart =
@@ -941,8 +1003,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
         if (MR.isExternalSymbol()) {
           ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
                                                         false);
-          DOUT << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
-               << ResultPtr << "]\n";  
+          DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+                       << ResultPtr << "]\n"); 
 
           // If the target REALLY wants a stub for this function, emit it now.
           if (!MR.doesntNeedStub()) {
@@ -983,9 +1045,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
         unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
         MR.setGOTIndex(idx);
         if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
-          DOUT << "JIT: GOT was out of date for " << ResultPtr
-               << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
-               << "\n";
+          DEBUG(errs() << "JIT: GOT was out of date for " << ResultPtr
+                       << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+                       << "\n");
           ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
         }
       }
@@ -1000,8 +1062,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   if (MemMgr->isManagingGOT()) {
     unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
     if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
-      DOUT << "JIT: GOT was out of date for " << (void*)BufferBegin
-           << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n";
+      DEBUG(errs() << "JIT: GOT was out of date for " << (void*)BufferBegin
+                   << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+                   << "\n");
       ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
     }
   }
@@ -1011,9 +1074,12 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
 
   if (CurBufferPtr == BufferEnd) {
-    // FIXME: Allocate more space, then try again.
-    cerr << "JIT: Ran out of space for generated machine code!\n";
-    abort();
+    retryWithMoreMemory(F);
+    return true;
+  } else {
+    // Now that we've succeeded in emitting the function, reset the
+    // SizeEstimate back down to zero.
+    SizeEstimate = 0;
   }
 
   BufferBegin = CurBufferPtr = 0;
@@ -1022,14 +1088,13 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   // Invalidate the icache if necessary.
   sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
 
-  JITEvent_EmittedFunctionDetails Details;
   TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
-                                Details);
+                                EmissionDetails);
 
-  DOUT << "JIT: Finished CodeGen of [" << (void*)FnStart
-       << "] Function: " << F.getFunction()->getName()
-       << ": " << (FnEnd-FnStart) << " bytes of text, "
-       << Relocations.size() << " relocations\n";
+  DEBUG(errs() << "JIT: Finished CodeGen of [" << (void*)FnStart
+        << "] Function: " << F.getFunction()->getName()
+        << ": " << (FnEnd-FnStart) << " bytes of text, "
+        << Relocations.size() << " relocations\n");
 
   Relocations.clear();
   ConstPoolAddresses.clear();
@@ -1037,45 +1102,42 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   // Mark code region readable and executable if it's not so already.
   MemMgr->setMemoryExecutable();
 
-#ifndef NDEBUG
-  {
+  DEBUG(
     if (sys::hasDisassembler()) {
-      DOUT << "JIT: Disassembled code:\n";
-      DOUT << sys::disassembleBuffer(FnStart, FnEnd-FnStart, (uintptr_t)FnStart);
+      errs() << "JIT: Disassembled code:\n";
+      errs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+                                       (uintptr_t)FnStart);
     } else {
-      DOUT << "JIT: Binary code:\n";
-      DOUT << std::hex;
+      errs() << "JIT: Binary code:\n";
       uint8_t* q = FnStart;
       for (int i = 0; q < FnEnd; q += 4, ++i) {
         if (i == 4)
           i = 0;
         if (i == 0)
-          DOUT << "JIT: " << std::setw(8) << std::setfill('0')
-               << (long)(q - FnStart) << ": ";
+          errs() << "JIT: " << (long)(q - FnStart) << ": ";
         bool Done = false;
         for (int j = 3; j >= 0; --j) {
           if (q + j >= FnEnd)
             Done = true;
           else
-            DOUT << std::setw(2) << std::setfill('0') << (unsigned short)q[j];
+            errs() << (unsigned short)q[j];
         }
         if (Done)
           break;
-        DOUT << ' ';
+        errs() << ' ';
         if (i == 3)
-          DOUT << '\n';
+          errs() << '\n';
       }
-      DOUT << std::dec;
-      DOUT<< '\n';
+      errs()<< '\n';
     }
-  }
-#endif
-  if (ExceptionHandling) {
+        );
+
+  if (DwarfExceptionHandling || JITEmitDebugInfo) {
     uintptr_t ActualSize = 0;
     SavedBufferBegin = BufferBegin;
     SavedBufferEnd = BufferEnd;
     SavedCurBufferPtr = CurBufferPtr;
-    
+
     if (MemMgr->NeedsExactSize()) {
       ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
     }
@@ -1083,14 +1145,28 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
                                                              ActualSize);
     BufferEnd = BufferBegin+ActualSize;
-    uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd);
+    uint8_t *EhStart;
+    uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd,
+                                                EhStart);
     MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
                               FrameRegister);
+    uint8_t *EhEnd = CurBufferPtr;
     BufferBegin = SavedBufferBegin;
     BufferEnd = SavedBufferEnd;
     CurBufferPtr = SavedCurBufferPtr;
 
-    TheJIT->RegisterTable(FrameRegister);
+    if (DwarfExceptionHandling) {
+      TheJIT->RegisterTable(FrameRegister);
+    }
+
+    if (JITEmitDebugInfo) {
+      DebugInfo I;
+      I.FnStart = FnStart;
+      I.FnEnd = FnEnd;
+      I.EhStart = EhStart;
+      I.EhEnd = EhEnd;
+      DR->RegisterFunction(F.getFunction(), I);
+    }
   }
 
   if (MMI)
@@ -1099,11 +1175,28 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   return false;
 }
 
+void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
+  DEBUG(errs() << "JIT: Ran out of space for native code.  Reattempting.\n");
+  Relocations.clear();  // Clear the old relocations or we'll reapply them.
+  ConstPoolAddresses.clear();
+  ++NumRetries;
+  deallocateMemForFunction(F.getFunction());
+  // Try again with at least twice as much free space.
+  SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
+}
+
 /// deallocateMemForFunction - Deallocate all memory for the specified
 /// function body.  Also drop any references the function has to stubs.
-void JITEmitter::deallocateMemForFunction(Function *F) {
+void JITEmitter::deallocateMemForFunction(const Function *F) {
   MemMgr->deallocateMemForFunction(F);
 
+  // TODO: Do we need to unregister exception handling information from libgcc
+  // here?
+
+  if (JITEmitDebugInfo) {
+    DR->UnregisterFunction(F);
+  }
+
   // If the function did not reference any stubs, return.
   if (CurFnStubUses.find(F) == CurFnStubUses.end())
     return;
@@ -1125,7 +1218,7 @@ void JITEmitter::deallocateMemForFunction(Function *F) {
     // in the JITResolver.  Were there a memory manager deallocateStub routine,
     // we could call that at this point too.
     if (FnRefs.empty()) {
-      DOUT << "\nJIT: Invalidated Stub at [" << Stub << "]\n";
+      DEBUG(errs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n");
       StubFnRefs.erase(Stub);
 
       // Invalidate the stub.  If it is a GV stub, update the JIT's global
@@ -1161,6 +1254,11 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
   return CurBufferPtr;
 }
 
+void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+  // Delegate this call through the memory manager.
+  return MemMgr->allocateGlobal(Size, Alignment);
+}
+
 void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
   if (TheJIT->getJITInfo().hasCustomConstantPool())
     return;
@@ -1175,8 +1273,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
 
   if (ConstantPoolBase == 0) return;  // Buffer overflow.
 
-  DOUT << "JIT: Emitted constant pool at [" << ConstantPoolBase
-       << "] (size: " << Size << ", alignment: " << Align << ")\n";
+  DEBUG(errs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
+               << "] (size: " << Size << ", alignment: " << Align << ")\n");
 
   // Initialize the memory for all of the constant pool entries.
   unsigned Offset = 0;
@@ -1189,13 +1287,12 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
     ConstPoolAddresses.push_back(CAddr);
     if (CPE.isMachineConstantPoolEntry()) {
       // FIXME: add support to lower machine constant pool values into bytes!
-      cerr << "Initialize memory with machine specific constant pool entry"
-           << " has not been implemented!\n";
-      abort();
+      llvm_report_error("Initialize memory with machine specific constant pool"
+                        "entry has not been implemented!");
     }
     TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
-    DOUT << "JIT:   CP" << i << " at [0x"
-         << std::hex << CAddr << std::dec << "]\n";
+    DEBUG(errs() << "JIT:   CP" << i << " at [0x";
+          errs().write_hex(CAddr) << "]\n");
 
     const Type *Ty = CPE.Val.ConstVal->getType();
     Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
@@ -1322,8 +1419,9 @@ uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
 //  Public interface to this file
 //===----------------------------------------------------------------------===//
 
-JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM) {
-  return new JITEmitter(jit, JMM);
+JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
+                                   TargetMachine &tm) {
+  return new JITEmitter(jit, JMM, tm);
 }
 
 // getPointerToNamedFunction - This function is used as a global wrapper to
@@ -1396,7 +1494,7 @@ void JIT::updateDlsymStubTable() {
   SmallVector<unsigned, 8> Offsets;
   for (unsigned i = 0; i != GVs.size(); ++i) {
     Offsets.push_back(offset);
-    offset += GVs[i]->getName().length() + 1;
+    offset += GVs[i]->getName().size() + 1;
   }
   for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); 
        i != e; ++i) {
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 70ccdccb8049..474843f06624 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -11,9 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/GlobalValue.h"
+#define DEBUG_TYPE "jit"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Memory.h"
 #include <map>
 #include <vector>
@@ -24,6 +31,7 @@
 #include <cstring>
 using namespace llvm;
 
+STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
 
 JITMemoryManager::~JITMemoryManager() {}
 
@@ -140,7 +148,7 @@ FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
 /// FreeRangeHeader to allocate from.
 FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
   MemoryRangeHeader *FollowingBlock = &getBlockAfter();
-  assert(ThisAllocated && "This block is already allocated!");
+  assert(ThisAllocated && "This block is already free!");
   assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
   
   FreeRangeHeader *FreeListToReturn = FreeList;
@@ -243,67 +251,160 @@ TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
 // Memory Block Implementation.
 //===----------------------------------------------------------------------===//
 
-namespace {  
+namespace {
+
+  class DefaultJITMemoryManager;
+
+  class JITSlabAllocator : public SlabAllocator {
+    DefaultJITMemoryManager &JMM;
+  public:
+    JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
+    virtual ~JITSlabAllocator() { }
+    virtual MemSlab *Allocate(size_t Size);
+    virtual void Deallocate(MemSlab *Slab);
+  };
+
   /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
   /// This splits a large block of MAP_NORESERVE'd memory into two
   /// sections, one for function stubs, one for the functions themselves.  We
   /// have to do this because we may need to emit a function stub while in the
   /// middle of emitting a function, and we don't know how large the function we
   /// are emitting is.
-  class VISIBILITY_HIDDEN DefaultJITMemoryManager : public JITMemoryManager {
-    std::vector<sys::MemoryBlock> Blocks; // Memory blocks allocated by the JIT
-    FreeRangeHeader *FreeMemoryList;      // Circular list of free blocks.
-    
+  class DefaultJITMemoryManager : public JITMemoryManager {
+
+    // Whether to poison freed memory.
+    bool PoisonMemory;
+
+    /// LastSlab - This points to the last slab allocated and is used as the
+    /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all
+    /// stubs, data, and code contiguously in memory.  In general, however, this
+    /// is not possible because the NearBlock parameter is ignored on Windows
+    /// platforms and even on Unix it works on a best-effort pasis.
+    sys::MemoryBlock LastSlab;
+
+    // Memory slabs allocated by the JIT.  We refer to them as slabs so we don't
+    // confuse them with the blocks of memory descibed above.
+    std::vector<sys::MemoryBlock> CodeSlabs;
+    JITSlabAllocator BumpSlabAllocator;
+    BumpPtrAllocator StubAllocator;
+    BumpPtrAllocator DataAllocator;
+
+    // Circular list of free blocks.
+    FreeRangeHeader *FreeMemoryList;
+
     // When emitting code into a memory block, this is the block.
     MemoryRangeHeader *CurBlock;
-    
-    uint8_t *CurStubPtr, *StubBase;
+
     uint8_t *GOTBase;     // Target Specific reserved memory
     void *DlsymTable;     // Stub external symbol information
 
-    // Centralize memory block allocation.
-    sys::MemoryBlock getNewMemoryBlock(unsigned size);
-    
     std::map<const Function*, MemoryRangeHeader*> FunctionBlocks;
     std::map<const Function*, MemoryRangeHeader*> TableBlocks;
   public:
     DefaultJITMemoryManager();
     ~DefaultJITMemoryManager();
 
+    /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the
+    /// last slab it allocated, so that subsequent allocations follow it.
+    sys::MemoryBlock allocateNewSlab(size_t size);
+
+    /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at
+    /// least this much unless more is requested.
+    static const size_t DefaultCodeSlabSize;
+
+    /// DefaultSlabSize - Allocate data into slabs of this size unless we get
+    /// an allocation above SizeThreshold.
+    static const size_t DefaultSlabSize;
+
+    /// DefaultSizeThreshold - For any allocation larger than this threshold, we
+    /// should allocate a separate slab.
+    static const size_t DefaultSizeThreshold;
+
     void AllocateGOT();
     void SetDlsymTable(void *);
-    
-    uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
-                          unsigned Alignment);
-    
+
+    // Testing methods.
+    virtual bool CheckInvariants(std::string &ErrorStr);
+    size_t GetDefaultCodeSlabSize() { return DefaultCodeSlabSize; }
+    size_t GetDefaultDataSlabSize() { return DefaultSlabSize; }
+    size_t GetDefaultStubSlabSize() { return DefaultSlabSize; }
+    unsigned GetNumCodeSlabs() { return CodeSlabs.size(); }
+    unsigned GetNumDataSlabs() { return DataAllocator.GetNumSlabs(); }
+    unsigned GetNumStubSlabs() { return StubAllocator.GetNumSlabs(); }
+
     /// startFunctionBody - When a function starts, allocate a block of free
     /// executable memory, returning a pointer to it and its actual size.
     uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
-      
+
       FreeRangeHeader* candidateBlock = FreeMemoryList;
       FreeRangeHeader* head = FreeMemoryList;
       FreeRangeHeader* iter = head->Next;
 
       uintptr_t largest = candidateBlock->BlockSize;
-      
+
       // Search for the largest free block
       while (iter != head) {
-          if (iter->BlockSize > largest) {
-              largest = iter->BlockSize;
-              candidateBlock = iter;
-          }
-          iter = iter->Next;
+        if (iter->BlockSize > largest) {
+          largest = iter->BlockSize;
+          candidateBlock = iter;
+        }
+        iter = iter->Next;
       }
+
+      largest = largest - sizeof(MemoryRangeHeader);
       
+      // If this block isn't big enough for the allocation desired, allocate
+      // another block of memory and add it to the free list.
+      if (largest < ActualSize ||
+          largest <= FreeRangeHeader::getMinBlockSize()) {
+        DEBUG(errs() << "JIT: Allocating another slab of memory for function.");
+        candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
+      }
+
       // Select this candidate block for allocation
       CurBlock = candidateBlock;
 
       // Allocate the entire memory block.
       FreeMemoryList = candidateBlock->AllocateBlock();
-      ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader);
-      return (uint8_t *)(CurBlock+1);
+      ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader);
+      return (uint8_t *)(CurBlock + 1);
     }
-    
+
+    /// allocateNewCodeSlab - Helper method to allocate a new slab of code
+    /// memory from the OS and add it to the free list.  Returns the new
+    /// FreeRangeHeader at the base of the slab.
+    FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) {
+      // If the user needs at least MinSize free memory, then we account for
+      // two MemoryRangeHeaders: the one in the user's block, and the one at the
+      // end of the slab.
+      size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader);
+      size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin);
+      sys::MemoryBlock B = allocateNewSlab(SlabSize);
+      CodeSlabs.push_back(B);
+      char *MemBase = (char*)(B.base());
+
+      // Put a tiny allocated block at the end of the memory chunk, so when
+      // FreeBlock calls getBlockAfter it doesn't fall off the end.
+      MemoryRangeHeader *EndBlock =
+          (MemoryRangeHeader*)(MemBase + B.size()) - 1;
+      EndBlock->ThisAllocated = 1;
+      EndBlock->PrevAllocated = 0;
+      EndBlock->BlockSize = sizeof(MemoryRangeHeader);
+
+      // Start out with a vast new block of free memory.
+      FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase;
+      NewBlock->ThisAllocated = 0;
+      // Make sure getFreeBlockBefore doesn't look into unmapped memory.
+      NewBlock->PrevAllocated = 1;
+      NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock;
+      NewBlock->SetEndOfBlockSizeMarker();
+      NewBlock->AddToFreeList(FreeMemoryList);
+
+      assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize &&
+             "The block was too small!");
+      return NewBlock;
+    }
+
     /// endFunctionBody - The function F is now allocated, and takes the memory
     /// in the range [FunctionStart,FunctionEnd).
     void endFunctionBody(const Function *F, uint8_t *FunctionStart,
@@ -319,12 +420,13 @@ namespace {
       FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
     }
 
-    /// allocateSpace - Allocate a memory block of the given size.
+    /// allocateSpace - Allocate a memory block of the given size.  This method
+    /// cannot be called between calls to startFunctionBody and endFunctionBody.
     uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
       CurBlock = FreeMemoryList;
       FreeMemoryList = FreeMemoryList->AllocateBlock();
 
-      uint8_t *result = (uint8_t *)CurBlock+1;
+      uint8_t *result = (uint8_t *)(CurBlock + 1);
 
       if (Alignment == 0) Alignment = 1;
       result = (uint8_t*)(((intptr_t)result+Alignment-1) &
@@ -336,6 +438,17 @@ namespace {
       return result;
     }
 
+    /// allocateStub - Allocate memory for a function stub.
+    uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                          unsigned Alignment) {
+      return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment);
+    }
+
+    /// allocateGlobal - Allocate memory for a global.
+    uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+      return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+    }
+
     /// startExceptionTable - Use startFunctionBody to allocate memory for the 
     /// function's exception table.
     uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
@@ -375,12 +488,12 @@ namespace {
       // Find the block that is allocated for this function.
       MemoryRangeHeader *MemRange = I->second;
       assert(MemRange->ThisAllocated && "Block isn't allocated!");
-      
+
       // Fill the buffer with garbage!
-#ifndef NDEBUG
-      memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
-#endif
-      
+      if (PoisonMemory) {
+        memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+      }
+
       // Free the memory.
       FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
       
@@ -393,12 +506,12 @@ namespace {
       // Find the block that is allocated for this function.
       MemRange = I->second;
       assert(MemRange->ThisAllocated && "Block isn't allocated!");
-      
+
       // Fill the buffer with garbage!
-#ifndef NDEBUG
-      memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
-#endif
-      
+      if (PoisonMemory) {
+        memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+      }
+
       // Free the memory.
       FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
       
@@ -408,36 +521,57 @@ namespace {
 
     /// setMemoryWritable - When code generation is in progress,
     /// the code pages may need permissions changed.
-    void setMemoryWritable(void)
+    void setMemoryWritable()
     {
-      for (unsigned i = 0, e = Blocks.size(); i != e; ++i)
-        sys::Memory::setWritable(Blocks[i]);
+      for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+        sys::Memory::setWritable(CodeSlabs[i]);
     }
     /// setMemoryExecutable - When code generation is done and we're ready to
     /// start execution, the code pages may need permissions changed.
-    void setMemoryExecutable(void)
+    void setMemoryExecutable()
     {
-      for (unsigned i = 0, e = Blocks.size(); i != e; ++i)
-        sys::Memory::setExecutable(Blocks[i]);
+      for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+        sys::Memory::setExecutable(CodeSlabs[i]);
+    }
+
+    /// setPoisonMemory - Controls whether we write garbage over freed memory.
+    ///
+    void setPoisonMemory(bool poison) {
+      PoisonMemory = poison;
     }
   };
 }
 
-DefaultJITMemoryManager::DefaultJITMemoryManager() {
-  // Allocate a 16M block of memory for functions.
-#if defined(__APPLE__) && defined(__arm__)
-  sys::MemoryBlock MemBlock = getNewMemoryBlock(4 << 20);
+MemSlab *JITSlabAllocator::Allocate(size_t Size) {
+  sys::MemoryBlock B = JMM.allocateNewSlab(Size);
+  MemSlab *Slab = (MemSlab*)B.base();
+  Slab->Size = B.size();
+  Slab->NextPtr = 0;
+  return Slab;
+}
+
+void JITSlabAllocator::Deallocate(MemSlab *Slab) {
+  sys::MemoryBlock B(Slab, Slab->Size);
+  sys::Memory::ReleaseRWX(B);
+}
+
+DefaultJITMemoryManager::DefaultJITMemoryManager()
+  :
+#ifdef NDEBUG
+    PoisonMemory(false),
 #else
-  sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20);
+    PoisonMemory(true),
 #endif
+    LastSlab(0, 0),
+    BumpSlabAllocator(*this),
+    StubAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator),
+    DataAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator) {
 
-  uint8_t *MemBase = static_cast<uint8_t*>(MemBlock.base());
+  // Allocate space for code.
+  sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
+  CodeSlabs.push_back(MemBlock);
+  uint8_t *MemBase = (uint8_t*)MemBlock.base();
 
-  // Allocate stubs backwards from the base, allocate functions forward
-  // from the base.
-  StubBase   = MemBase;
-  CurStubPtr = MemBase + 512*1024; // Use 512k for stubs, working backwards.
-  
   // We set up the memory chunk with 4 mem regions, like this:
   //  [ START
   //    [ Free      #0 ] -> Large space to allocate functions from.
@@ -453,7 +587,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() {
   MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
   Mem3->ThisAllocated = 1;
   Mem3->PrevAllocated = 0;
-  Mem3->BlockSize     = 0;
+  Mem3->BlockSize     = sizeof(MemoryRangeHeader);
   
   /// Add a tiny free region so that the free list always has one entry.
   FreeRangeHeader *Mem2 = 
@@ -469,12 +603,12 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() {
   MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
   Mem1->ThisAllocated = 1;
   Mem1->PrevAllocated = 0;
-  Mem1->BlockSize     = (char*)Mem2 - (char*)Mem1;
+  Mem1->BlockSize     = sizeof(MemoryRangeHeader);
   
   // Add a FreeRangeHeader to the start of the function body region, indicating
   // that the space is free.  Mark the previous block allocated so we never look
   // at it.
-  FreeRangeHeader *Mem0 = (FreeRangeHeader*)CurStubPtr;
+  FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase;
   Mem0->ThisAllocated = 0;
   Mem0->PrevAllocated = 1;
   Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
@@ -499,43 +633,128 @@ void DefaultJITMemoryManager::SetDlsymTable(void *ptr) {
 }
 
 DefaultJITMemoryManager::~DefaultJITMemoryManager() {
-  for (unsigned i = 0, e = Blocks.size(); i != e; ++i)
-    sys::Memory::ReleaseRWX(Blocks[i]);
-  
-  delete[] GOTBase;
-  Blocks.clear();
-}
+  for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+    sys::Memory::ReleaseRWX(CodeSlabs[i]);
 
-uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F,
-                                                     unsigned StubSize,
-                                                     unsigned Alignment) {
-  CurStubPtr -= StubSize;
-  CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) &
-                          ~(intptr_t)(Alignment-1));
-  if (CurStubPtr < StubBase) {
-    // FIXME: allocate a new block
-    fprintf(stderr, "JIT ran out of memory for function stubs!\n");
-    abort();
-  }
-  return CurStubPtr;
+  delete[] GOTBase;
 }
 
-sys::MemoryBlock DefaultJITMemoryManager::getNewMemoryBlock(unsigned size) {
+sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
   // Allocate a new block close to the last one.
-  const sys::MemoryBlock *BOld = Blocks.empty() ? 0 : &Blocks.front();
   std::string ErrMsg;
-  sys::MemoryBlock B = sys::Memory::AllocateRWX(size, BOld, &ErrMsg);
+  sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
+  sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
   if (B.base() == 0) {
-    fprintf(stderr,
-            "Allocation failed when allocating new memory in the JIT\n%s\n",
-            ErrMsg.c_str());
-    abort();
+    llvm_report_error("Allocation failed when allocating new memory in the"
+                      " JIT\n" + ErrMsg);
+  }
+  LastSlab = B;
+  ++NumSlabs;
+  // Initialize the slab to garbage when debugging.
+  if (PoisonMemory) {
+    memset(B.base(), 0xCD, B.size());
   }
-  Blocks.push_back(B);
   return B;
 }
 
+/// CheckInvariants - For testing only.  Return "" if all internal invariants
+/// are preserved, and a helpful error message otherwise.  For free and
+/// allocated blocks, make sure that adding BlockSize gives a valid block.
+/// For free blocks, make sure they're in the free list and that their end of
+/// block size marker is correct.  This function should return an error before
+/// accessing bad memory.  This function is defined here instead of in
+/// JITMemoryManagerTest.cpp so that we don't have to expose all of the
+/// implementation details of DefaultJITMemoryManager.
+bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
+  raw_string_ostream Err(ErrorStr);
+
+  // Construct a the set of FreeRangeHeader pointers so we can query it
+  // efficiently.
+  llvm::SmallPtrSet<MemoryRangeHeader*, 16> FreeHdrSet;
+  FreeRangeHeader* FreeHead = FreeMemoryList;
+  FreeRangeHeader* FreeRange = FreeHead;
+
+  do {
+    // Check that the free range pointer is in the blocks we've allocated.
+    bool Found = false;
+    for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+         E = CodeSlabs.end(); I != E && !Found; ++I) {
+      char *Start = (char*)I->base();
+      char *End = Start + I->size();
+      Found = (Start <= (char*)FreeRange && (char*)FreeRange < End);
+    }
+    if (!Found) {
+      Err << "Corrupt free list; points to " << FreeRange;
+      return false;
+    }
+
+    if (FreeRange->Next->Prev != FreeRange) {
+      Err << "Next and Prev pointers do not match.";
+      return false;
+    }
+
+    // Otherwise, add it to the set.
+    FreeHdrSet.insert(FreeRange);
+    FreeRange = FreeRange->Next;
+  } while (FreeRange != FreeHead);
+
+  // Go over each block, and look at each MemoryRangeHeader.
+  for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+       E = CodeSlabs.end(); I != E; ++I) {
+    char *Start = (char*)I->base();
+    char *End = Start + I->size();
+
+    // Check each memory range.
+    for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL;
+         Start <= (char*)Hdr && (char*)Hdr < End;
+         Hdr = &Hdr->getBlockAfter()) {
+      if (Hdr->ThisAllocated == 0) {
+        // Check that this range is in the free list.
+        if (!FreeHdrSet.count(Hdr)) {
+          Err << "Found free header at " << Hdr << " that is not in free list.";
+          return false;
+        }
+
+        // Now make sure the size marker at the end of the block is correct.
+        uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1;
+        if (!(Start <= (char*)Marker && (char*)Marker < End)) {
+          Err << "Block size in header points out of current MemoryBlock.";
+          return false;
+        }
+        if (Hdr->BlockSize != *Marker) {
+          Err << "End of block size marker (" << *Marker << ") "
+              << "and BlockSize (" << Hdr->BlockSize << ") don't match.";
+          return false;
+        }
+      }
+
+      if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) {
+        Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != "
+            << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")";
+        return false;
+      } else if (!LastHdr && !Hdr->PrevAllocated) {
+        Err << "The first header should have PrevAllocated true.";
+        return false;
+      }
+
+      // Remember the last header.
+      LastHdr = Hdr;
+    }
+  }
+
+  // All invariants are preserved.
+  return true;
+}
 
 JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
   return new DefaultJITMemoryManager();
 }
+
+// Allocate memory for code in 512K slabs.
+const size_t DefaultJITMemoryManager::DefaultCodeSlabSize = 512 * 1024;
+
+// Allocate globals and stubs in slabs of 64K.  (probably 16 pages)
+const size_t DefaultJITMemoryManager::DefaultSlabSize = 64 * 1024;
+
+// Waste at most 16K at the end of each bump slab.  (probably 4 pages)
+const size_t DefaultJITMemoryManager::DefaultSizeThreshold = 16 * 1024;
diff --git a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
index 3b8b84ce5bcb..53585b877b19 100644
--- a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
@@ -84,8 +84,7 @@ JITEventListener *createMacOSJITEventListener() {
 void MacOSJITEventListener::NotifyFunctionEmitted(
     const Function &F, void *FnStart, size_t FnSize,
     const EmittedFunctionDetails &) {
-  const char *const FnName = F.getNameStart();
-  assert(FnName != 0 && FnStart != 0 && "Bad symbol to add");
+  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
   JITSymbolTable **SymTabPtrPtr = 0;
   SymTabPtrPtr = &__jitSymbolTable;
 
@@ -120,7 +119,7 @@ void MacOSJITEventListener::NotifyFunctionEmitted(
 
   // Otherwise, we have enough space, just tack it onto the end of the array.
   JITSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
-  Entry.FnName = strdup(FnName);
+  Entry.FnName = strdup(F.getName().data());
   Entry.FnStart = FnStart;
   Entry.FnSize = FnSize;
   ++SymTabPtr->NumSymbols;
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
new file mode 100644
index 000000000000..69398be5080c
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -0,0 +1,178 @@
+//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that calls into OProfile to tell
+// it about JITted functions.  For now, we only record function names and sizes,
+// but eventually we'll also record line number information.
+//
+// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
+// definition of the interface we're using.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Errno.h"
+#include "llvm/Config/config.h"
+#include <stddef.h>
+using namespace llvm;
+
+#if USE_OPROFILE
+
+#include <opagent.h>
+
+namespace {
+
+class OProfileJITEventListener : public JITEventListener {
+  op_agent_t Agent;
+public:
+  OProfileJITEventListener();
+  ~OProfileJITEventListener();
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+};
+
+OProfileJITEventListener::OProfileJITEventListener()
+    : Agent(op_open_agent()) {
+  if (Agent == NULL) {
+    const std::string err_str = sys::StrError();
+    DEBUG(errs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+  } else {
+    DEBUG(errs() << "Connected to OProfile agent.\n");
+  }
+}
+
+OProfileJITEventListener::~OProfileJITEventListener() {
+  if (Agent != NULL) {
+    if (op_close_agent(Agent) == -1) {
+      const std::string err_str = sys::StrError();
+      DEBUG(errs() << "Failed to disconnect from OProfile agent: "
+                   << err_str << "\n");
+    } else {
+      DEBUG(errs() << "Disconnected from OProfile agent.\n");
+    }
+  }
+}
+
+class FilenameCache {
+  // Holds the filename of each CompileUnit, so that we can pass the
+  // pointer into oprofile.  These char*s are freed in the destructor.
+  DenseMap<MDNode*, char*> Filenames;
+
+ public:
+  const char *getFilename(MDNode *CompileUnit) {
+    char *&Filename = Filenames[CompileUnit];
+    if (Filename == NULL) {
+      DICompileUnit CU(CompileUnit);
+      Filename = strdup(CU.getFilename());
+    }
+    return Filename;
+  }
+  ~FilenameCache() {
+    for (DenseMap<MDNode*, char*>::iterator
+             I = Filenames.begin(), E = Filenames.end(); I != E; ++I) {
+      free(I->second);
+    }
+  }
+};
+
+static debug_line_info LineStartToOProfileFormat(
+    const MachineFunction &MF, FilenameCache &Filenames,
+    uintptr_t Address, DebugLoc Loc) {
+  debug_line_info Result;
+  Result.vma = Address;
+  const DebugLocTuple &tuple = MF.getDebugLocTuple(Loc);
+  Result.lineno = tuple.Line;
+  Result.filename = Filenames.getFilename(tuple.CompileUnit);
+  DEBUG(errs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+               << Result.filename << ":" << Result.lineno << "\n");
+  return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void OProfileJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &Details) {
+  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
+  if (op_write_native_code(Agent, F.getName().data(),
+                           reinterpret_cast<uint64_t>(FnStart),
+                           FnStart, FnSize) == -1) {
+    DEBUG(errs() << "Failed to tell OProfile about native function " 
+          << F.getName() << " at [" 
+          << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    return;
+  }
+
+  // Now we convert the line number information from the address/DebugLoc format
+  // in Details to the address/filename/lineno format that OProfile expects.
+  // OProfile 0.9.4 (and maybe later versions) has a bug that causes it to
+  // ignore line numbers for addresses above 4G.
+  FilenameCache Filenames;
+  std::vector<debug_line_info> LineInfo;
+  LineInfo.reserve(1 + Details.LineStarts.size());
+  if (!Details.MF->getDefaultDebugLoc().isUnknown()) {
+    LineInfo.push_back(LineStartToOProfileFormat(
+        *Details.MF, Filenames,
+        reinterpret_cast<uintptr_t>(FnStart),
+        Details.MF->getDefaultDebugLoc()));
+  }
+  for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+           I = Details.LineStarts.begin(), E = Details.LineStarts.end();
+       I != E; ++I) {
+    LineInfo.push_back(LineStartToOProfileFormat(
+        *Details.MF, Filenames, I->Address, I->Loc));
+  }
+  if (!LineInfo.empty()) {
+    if (op_write_debug_line_info(Agent, FnStart,
+                                 LineInfo.size(), &*LineInfo.begin()) == -1) {
+      DEBUG(errs() 
+            << "Failed to tell OProfile about line numbers for native function "
+            << F.getName() << " at [" 
+            << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    }
+  }
+}
+
+// Removes the to-be-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(
+    const Function &F, void *FnStart) {
+  assert(FnStart && "Invalid function pointer");
+  if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+    DEBUG(errs() << "Failed to tell OProfile about unload of native function "
+                 << F.getName() << " at " << FnStart << "\n");
+  }
+}
+
+}  // anonymous namespace.
+
+namespace llvm {
+JITEventListener *createOProfileJITEventListener() {
+  return new OProfileJITEventListener;
+}
+}
+
+#else  // USE_OPROFILE
+
+namespace llvm {
+// By defining this to return NULL, we can let clients call it unconditionally,
+// even if they haven't configured with the OProfile libraries.
+JITEventListener *createOProfileJITEventListener() {
+  return NULL;
+}
+}  // namespace llvm
+
+#endif  // USE_OPROFILE
diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp
index 0f208193075b..8bed33bb7d42 100644
--- a/lib/ExecutionEngine/JIT/TargetSelect.cpp
+++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -7,24 +7,27 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This just asks the TargetMachineRegistry for the appropriate JIT to use, and
-// allows the user to specify a specific one on the commandline with -march=x.
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
 //
 //===----------------------------------------------------------------------===//
 
 #include "JIT.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
-#include "llvm/Support/RegistryParser.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Host.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static cl::opt<const TargetMachineRegistry::entry*, false,
-               RegistryParser<TargetMachine> >
-MArch("march", cl::desc("Architecture to generate assembly for:"));
+static cl::opt<std::string>
+MArch("march",
+      cl::desc("Architecture to generate assembly for (see --version)"));
 
 static cl::opt<std::string>
 MCPU("mcpu",
@@ -38,25 +41,51 @@ MAttrs("mattr",
   cl::desc("Target specific attributes (-mattr=help for details)"),
   cl::value_desc("a1,+a2,-a3,..."));
 
-/// createInternal - Create an return a new JIT compiler if there is one
-/// available for the current target.  Otherwise, return null.
-///
-ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr,
-                                JITMemoryManager *JMM,
-                                CodeGenOpt::Level OptLevel) {
-  const TargetMachineRegistry::entry *TheArch = MArch;
-  if (TheArch == 0) {
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch.  Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *JIT::selectTarget(ModuleProvider *MP, std::string *ErrorStr) {
+  Module &Mod = *MP->getModule();
+
+  Triple TheTriple(Mod.getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Adjust the triple to match what the user requested.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      *ErrorStr = "No available targets are compatible with this -march, "
+        "see -version for the available targets.\n";
+      return 0;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
     std::string Error;
-    TheArch = TargetMachineRegistry::getClosestTargetForJIT(Error);
-    if (TheArch == 0) {
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+    if (TheTarget == 0) {
       if (ErrorStr)
         *ErrorStr = Error;
       return 0;
     }
-  } else if (TheArch->JITMatchQualityFn() == 0) {
-    cerr << "WARNING: This target JIT is not designed for the host you are"
-         << " running.  If bad things happen, please choose a different "
-         << "-march switch.\n";
+  }
+
+  if (!TheTarget->hasJIT()) {
+    errs() << "WARNING: This target JIT is not designed for the host you are"
+           << " running.  If bad things happen, please choose a different "
+           << "-march switch.\n";
   }
 
   // Package up features to be passed to target/subtarget
@@ -70,14 +99,8 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr,
   }
 
   // Allocate a target...
-  TargetMachine *Target = TheArch->CtorFn(*MP->getModule(), FeaturesStr);
+  TargetMachine *Target = 
+    TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
   assert(Target && "Could not allocate target machine!");
-
-  // If the target supports JIT code generation, return a new JIT now.
-  if (TargetJITInfo *TJ = Target->getJITInfo())
-    return new JIT(MP, *Target, *TJ, JMM, OptLevel);
-
-  if (ErrorStr)
-    *ErrorStr = "target does not support JIT code generation";
-  return 0;
+  return Target;
 }
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index faf01af127e9..76d81c219426 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -96,10 +96,10 @@ bool
 Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
   // Make sure this is an archive file we're dealing with
   if (!Filename.isArchive())
-    return error("File '" + Filename.toString() + "' is not an archive.");
+    return error("File '" + Filename.str() + "' is not an archive.");
 
   // Open the archive file
-  verbose("Linking archive file '" + Filename.toString() + "'");
+  verbose("Linking archive file '" + Filename.str() + "'");
 
   // Find all of the symbols currently undefined in the bitcode program.
   // If all the symbols are defined, the program is complete, and there is
@@ -108,8 +108,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
   GetAllUndefinedSymbols(Composite, UndefinedSymbols);
 
   if (UndefinedSymbols.empty()) {
-    verbose("No symbols undefined, skipping library '" +
-            Filename.toString() + "'");
+    verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
     return false;  // No need to link anything in!
   }
 
@@ -120,7 +119,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
   Archive* arch = AutoArch.get();
 
   if (!arch)
-    return error("Cannot read archive '" + Filename.toString() +
+    return error("Cannot read archive '" + Filename.str() +
                  "': " + ErrMsg);
   if (!arch->isBitcodeArchive()) {
     is_native = true;
@@ -143,7 +142,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
     // Find the modules we need to link into the target module
     std::set<ModuleProvider*> Modules;
     if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
-      return error("Cannot find symbols in '" + Filename.toString() + 
+      return error("Cannot find symbols in '" + Filename.str() + 
                    "': " + ErrMsg);
 
     // If we didn't find any more modules to link this time, we are done
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index dc0f7c17bf42..61f3c26c6a1c 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -14,9 +14,10 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-
+#include "llvm/System/Path.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
 using namespace llvm;
 
 // LinkItems - This function is the main entry point into linking. It takes a
@@ -69,20 +70,20 @@ Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
 
 /// LinkInLibrary - links one library into the HeadModule.
 ///
-bool Linker::LinkInLibrary(const std::string& Lib, bool& is_native) {
+bool Linker::LinkInLibrary(const StringRef &Lib, bool& is_native) {
   is_native = false;
   // Determine where this library lives.
   sys::Path Pathname = FindLib(Lib);
   if (Pathname.isEmpty())
-    return error("Cannot find library '" + Lib + "'");
+    return error("Cannot find library '" + Lib.str() + "'");
 
   // If its an archive, try to link it in
   std::string Magic;
   Pathname.getMagicNumber(Magic, 64);
   switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
-    default: assert(0 && "Bad file type identification");
+    default: llvm_unreachable("Bad file type identification");
     case sys::Unknown_FileType:
-      return warning("Supposed library '" + Lib + "' isn't a library.");
+      return warning("Supposed library '" + Lib.str() + "' isn't a library.");
 
     case sys::Bitcode_FileType:
       // LLVM ".so" file.
@@ -92,7 +93,7 @@ bool Linker::LinkInLibrary(const std::string& Lib, bool& is_native) {
 
     case sys::Archive_FileType:
       if (LinkInArchive(Pathname, is_native))
-        return error("Cannot link archive '" + Pathname.toString() + "'");
+        return error("Cannot link archive '" + Pathname.str() + "'");
       break;
 
     case sys::ELF_Relocatable_FileType:
@@ -157,7 +158,7 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
   is_native = false;
   
   // Check for a file of name "-", which means "read standard input"
-  if (File.toString() == "-") {
+  if (File.str() == "-") {
     std::auto_ptr<Module> M;
     if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) {
       M.reset(ParseBitcodeFile(Buffer, Context, &Error));
@@ -172,34 +173,34 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
 
   // Make sure we can at least read the file
   if (!File.canRead())
-    return error("Cannot find linker input '" + File.toString() + "'");
+    return error("Cannot find linker input '" + File.str() + "'");
 
   // If its an archive, try to link it in
   std::string Magic;
   File.getMagicNumber(Magic, 64);
   switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
-    default: assert(0 && "Bad file type identification");
+    default: llvm_unreachable("Bad file type identification");
     case sys::Unknown_FileType:
-      return warning("Ignoring file '" + File.toString() + 
+      return warning("Ignoring file '" + File.str() + 
                    "' because does not contain bitcode.");
 
     case sys::Archive_FileType:
       // A user may specify an ar archive without -l, perhaps because it
       // is not installed as a library. Detect that and link the archive.
-      verbose("Linking archive file '" + File.toString() + "'");
+      verbose("Linking archive file '" + File.str() + "'");
       if (LinkInArchive(File, is_native))
         return true;
       break;
 
     case sys::Bitcode_FileType: {
-      verbose("Linking bitcode file '" + File.toString() + "'");
+      verbose("Linking bitcode file '" + File.str() + "'");
       std::auto_ptr<Module> M(LoadObject(File));
       if (M.get() == 0)
-        return error("Cannot load file '" + File.toString() + "': " + Error);
+        return error("Cannot load file '" + File.str() + "': " + Error);
       if (LinkInModule(M.get(), &Error))
-        return error("Cannot link file '" + File.toString() + "': " + Error);
+        return error("Cannot link file '" + File.str() + "': " + Error);
 
-      verbose("Linked in file '" + File.toString() + "'");
+      verbose("Linked in file '" + File.str() + "'");
       break;
     }
 
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 4a15d88d8f36..e64c200cf632 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -19,21 +19,22 @@
 #include "llvm/Linker.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Path.h"
 #include "llvm/ADT/DenseMap.h"
-#include <sstream>
 using namespace llvm;
 
 // Error - Simple wrapper function to conditionally assign to E and return true.
 // This just makes error return conditions a little bit simpler...
-static inline bool Error(std::string *E, const std::string &Message) {
-  if (E) *E = Message;
+static inline bool Error(std::string *E, const Twine &Message) {
+  if (E) *E = Message.str();
   return true;
 }
 
@@ -143,7 +144,7 @@ protected:
 
   // for debugging...
   virtual void dump() const {
-    cerr << "AbstractTypeSet!\n";
+    errs() << "AbstractTypeSet!\n";
   }
 };
 }
@@ -336,11 +337,11 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
 static void PrintMap(const std::map<const Value*, Value*> &M) {
   for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end();
        I != E; ++I) {
-    cerr << " Fr: " << (void*)I->first << " ";
+    errs() << " Fr: " << (void*)I->first << " ";
     I->first->dump();
-    cerr << " To: " << (void*)I->second << " ";
+    errs() << " To: " << (void*)I->second << " ";
     I->second->dump();
-    cerr << "\n";
+    errs() << "\n";
   }
 }
 #endif
@@ -348,7 +349,8 @@ static void PrintMap(const std::map<const Value*, Value*> &M) {
 
 // RemapOperand - Use ValueMap to convert constants from one module to another.
 static Value *RemapOperand(const Value *In,
-                           std::map<const Value*, Value*> &ValueMap) {
+                           std::map<const Value*, Value*> &ValueMap,
+                           LLVMContext &Context) {
   std::map<const Value*,Value*>::const_iterator I = ValueMap.find(In);
   if (I != ValueMap.end())
     return I->second;
@@ -363,29 +365,37 @@ static Value *RemapOperand(const Value *In,
     if (const ConstantArray *CPA = dyn_cast<ConstantArray>(CPV)) {
       std::vector<Constant*> Operands(CPA->getNumOperands());
       for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
-        Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap));
-      Result = ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
+        Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap, 
+                                                 Context));
+      Result =
+          ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
     } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(CPV)) {
       std::vector<Constant*> Operands(CPS->getNumOperands());
       for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
-        Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap));
-      Result = ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
+        Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap,
+                                                 Context));
+      Result =
+         ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
     } else if (isa<ConstantPointerNull>(CPV) || isa<UndefValue>(CPV)) {
       Result = const_cast<Constant*>(CPV);
     } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CPV)) {
       std::vector<Constant*> Operands(CP->getNumOperands());
       for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-        Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap));
+        Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap,
+                                     Context));
       Result = ConstantVector::get(Operands);
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
       std::vector<Constant*> Ops;
       for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
-        Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap)));
+        Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap,
+                                     Context)));
       Result = CE->getWithOperands(Ops);
     } else {
       assert(!isa<GlobalValue>(CPV) && "Unmapped global?");
-      assert(0 && "Unknown type of derived type constant value!");
+      llvm_unreachable("Unknown type of derived type constant value!");
     }
+  } else if (isa<MetadataBase>(In)) {
+    Result = const_cast<Value*>(In);
   } else if (isa<InlineAsm>(In)) {
     Result = const_cast<Value*>(In);
   }
@@ -397,11 +407,11 @@ static Value *RemapOperand(const Value *In,
   }
 
 #ifndef NDEBUG
-  cerr << "LinkModules ValueMap: \n";
+  errs() << "LinkModules ValueMap: \n";
   PrintMap(ValueMap);
 
-  cerr << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
-  assert(0 && "Couldn't remap value!");
+  errs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
+  llvm_unreachable("Couldn't remap value!");
 #endif
   return 0;
 }
@@ -521,6 +531,22 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
   return false;
 }
 
+// Insert all of the named mdnoes in Src into the Dest module.
+static void LinkNamedMDNodes(Module *Dest, Module *Src) {
+  for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(),
+         E = Src->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *SrcNMD = I;
+    NamedMDNode *DestNMD = Dest->getNamedMetadata(SrcNMD->getName());
+    if (!DestNMD)
+      NamedMDNode::Create(SrcNMD, Dest);
+    else {
+      // Add Src elements into Dest node.
+      for (unsigned i = 0, e = SrcNMD->getNumElements(); i != e; ++i) 
+        DestNMD->addElement(SrcNMD->getElement(i));
+    }
+  }
+}
+
 // LinkGlobals - Loop through the global variables in the src module and merge
 // them into the dest module.
 static bool LinkGlobals(Module *Dest, const Module *Src,
@@ -538,8 +564,7 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
     // Check to see if may have to link the global with the global, alias or
     // function.
     if (SGV->hasName() && !SGV->hasLocalLinkage())
-      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getNameStart(),
-                                                        SGV->getNameEnd()));
+      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getName()));
 
     // If we found a global with the same name in the dest module, but it has
     // internal linkage, we are really not doing any linkage here.
@@ -564,9 +589,9 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
       // symbol over in the dest module... the initializer will be filled in
       // later by LinkGlobalInits.
       GlobalVariable *NewDGV =
-        new GlobalVariable(SGV->getType()->getElementType(),
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
                            SGV->isConstant(), SGV->getLinkage(), /*init*/0,
-                           SGV->getName(), Dest, false,
+                           SGV->getName(), 0, false,
                            SGV->getType()->getAddressSpace());
       // Propagate alignment, visibility and section info.
       CopyGVAttributes(NewDGV, SGV);
@@ -597,9 +622,9 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
       // AppendingVars map.  The name is cleared out so that no linkage is
       // performed.
       GlobalVariable *NewDGV =
-        new GlobalVariable(SGV->getType()->getElementType(),
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
                            SGV->isConstant(), SGV->getLinkage(), /*init*/0,
-                           "", Dest, false,
+                           "", 0, false,
                            SGV->getType()->getAddressSpace());
 
       // Set alignment allowing CopyGVAttributes merge it with alignment of SGV.
@@ -625,13 +650,15 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
       // we are replacing may be a function (if a prototype, weak, etc) or a
       // global variable.
       GlobalVariable *NewDGV =
-        new GlobalVariable(SGV->getType()->getElementType(), SGV->isConstant(),
-                           NewLinkage, /*init*/0, DGV->getName(), Dest, false,
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(), 
+                           SGV->isConstant(), NewLinkage, /*init*/0, 
+                           DGV->getName(), 0, false,
                            SGV->getType()->getAddressSpace());
 
       // Propagate alignment, section, and visibility info.
       CopyGVAttributes(NewDGV, SGV);
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, 
+                                                              DGV->getType()));
 
       // DGV will conflict with NewDGV because they both had the same
       // name. We must erase this now so ForceRenaming doesn't assert
@@ -697,6 +724,9 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
   else if (SL == GlobalValue::InternalLinkage &&
            DL == GlobalValue::InternalLinkage)
     return GlobalValue::InternalLinkage;
+  else if (SL == GlobalValue::LinkerPrivateLinkage &&
+           DL == GlobalValue::LinkerPrivateLinkage)
+    return GlobalValue::LinkerPrivateLinkage;
   else {
     assert (SL == GlobalValue::PrivateLinkage &&
             DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
@@ -866,7 +896,8 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
     if (SGV->hasInitializer()) {      // Only process initialized GV's
       // Figure out what the initializer looks like in the dest module...
       Constant *SInit =
-        cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap));
+        cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap,
+                       Dest->getContext()));
       // Grab destination global variable or alias.
       GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
 
@@ -885,9 +916,9 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
             // Nothing is required, mapped values will take the new global
             // automatically.
           } else if (DGVar->hasAppendingLinkage()) {
-            assert(0 && "Appending linkage unimplemented!");
+            llvm_unreachable("Appending linkage unimplemented!");
           } else {
-            assert(0 && "Unknown linkage!");
+            llvm_unreachable("Unknown linkage!");
           }
         } else {
           // Copy the initializer over now...
@@ -923,8 +954,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
     // Check to see if may have to link the function with the global, alias or
     // function.
     if (SF->hasName() && !SF->hasLocalLinkage())
-      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getNameStart(),
-                                                        SF->getNameEnd()));
+      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getName()));
 
     // If we found a global with the same name in the dest module, but it has
     // internal linkage, we are really not doing any linkage here.
@@ -979,7 +1009,8 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
       CopyGVAttributes(NewDF, SF);
 
       // Any uses of DF need to change to NewDF, with cast
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, 
+                                                              DGV->getType()));
 
       // DF will conflict with NewDF because they both had the same. We must
       // erase this now so ForceRenaming doesn't assert because DF might
@@ -1053,7 +1084,7 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
       for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
            OI != OE; ++OI)
         if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
-          *OI = RemapOperand(*OI, ValueMap);
+          *OI = RemapOperand(*OI, ValueMap, Dest->getContext());
 
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
@@ -1132,14 +1163,15 @@ static bool LinkAppendingVars(Module *M,
          "Appending variables with different section name need to be linked!");
 
       unsigned NewSize = T1->getNumElements() + T2->getNumElements();
-      ArrayType *NewType = ArrayType::get(T1->getElementType(), NewSize);
+      ArrayType *NewType = ArrayType::get(T1->getElementType(), 
+                                                         NewSize);
 
       G1->setName("");   // Clear G1's name in case of a conflict!
 
       // Create the new global variable...
       GlobalVariable *NG =
-        new GlobalVariable(NewType, G1->isConstant(), G1->getLinkage(),
-                           /*init*/0, First->first, M, G1->isThreadLocal(),
+        new GlobalVariable(*M, NewType, G1->isConstant(), G1->getLinkage(),
+                           /*init*/0, First->first, 0, G1->isThreadLocal(),
                            G1->getType()->getAddressSpace());
 
       // Propagate alignment, visibility and section info.
@@ -1173,8 +1205,10 @@ static bool LinkAppendingVars(Module *M,
 
       // FIXME: This should rewrite simple/straight-forward uses such as
       // getelementptr instructions to not use the Cast!
-      G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G1->getType()));
-      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G2->getType()));
+      G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
+                             G1->getType()));
+      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, 
+                             G2->getType()));
 
       // Remove the two globals from the module now...
       M->getGlobalList().erase(G1);
@@ -1239,10 +1273,10 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
 
   if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() &&
       Src->getDataLayout() != Dest->getDataLayout())
-    cerr << "WARNING: Linking two modules of different data layouts!\n";
+    errs() << "WARNING: Linking two modules of different data layouts!\n";
   if (!Src->getTargetTriple().empty() &&
       Dest->getTargetTriple() != Src->getTargetTriple())
-    cerr << "WARNING: Linking two modules of different target triples!\n";
+    errs() << "WARNING: Linking two modules of different target triples!\n";
 
   // Append the module inline asm string.
   if (!Src->getModuleInlineAsm().empty()) {
@@ -1282,6 +1316,9 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
       AppendingVars.insert(std::make_pair(I->getName(), I));
   }
 
+  // Insert all of the named mdnoes in Src into the Dest module.
+  LinkNamedMDNodes(Dest, Src);
+
   // Insert all of the globals in src into the Dest module... without linking
   // initializers (which could refer to functions not yet mapped over).
   if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index 6e0b760b85de..aef79d08f423 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -14,12 +14,13 @@
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Config/config.h"
+#include "llvm/System/Path.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
 using namespace llvm;
 
-Linker::Linker(const std::string& progname, const std::string& modname,
+Linker::Linker(const StringRef &progname, const StringRef &modname,
                LLVMContext& C, unsigned flags): 
   Context(C),
   Composite(new Module(modname, C)),
@@ -28,7 +29,7 @@ Linker::Linker(const std::string& progname, const std::string& modname,
   Error(),
   ProgramName(progname) { }
 
-Linker::Linker(const std::string& progname, Module* aModule, unsigned flags) : 
+Linker::Linker(const StringRef &progname, Module* aModule, unsigned flags) : 
   Context(aModule->getContext()),
   Composite(aModule),
   LibPaths(),
@@ -41,25 +42,25 @@ Linker::~Linker() {
 }
 
 bool
-Linker::error(const std::string& message) {
+Linker::error(const StringRef &message) {
   Error = message;
   if (!(Flags&QuietErrors))
-    cerr << ProgramName << ": error: " << message << "\n";
+    errs() << ProgramName << ": error: " << message << "\n";
   return true;
 }
 
 bool
-Linker::warning(const std::string& message) {
+Linker::warning(const StringRef &message) {
   Error = message;
   if (!(Flags&QuietWarnings))
-    cerr << ProgramName << ": warning: " << message << "\n";
+    errs() << ProgramName << ": warning: " << message << "\n";
   return false;
 }
 
 void
-Linker::verbose(const std::string& message) {
+Linker::verbose(const StringRef &message) {
   if (Flags&Verbose)
-    cerr << "  " << message << "\n";
+    errs() << "  " << message << "\n";
 }
 
 void
@@ -69,11 +70,8 @@ Linker::addPath(const sys::Path& path) {
 
 void
 Linker::addPaths(const std::vector<std::string>& paths) {
-  for (unsigned i = 0; i != paths.size(); ++i) {
-    sys::Path aPath;
-    aPath.set(paths[i]);
-    LibPaths.push_back(aPath);
-  }
+  for (unsigned i = 0, e = paths.size(); i != e; ++i)
+    LibPaths.push_back(sys::Path(paths[i]));
 }
 
 void
@@ -100,16 +98,15 @@ Linker::LoadObject(const sys::Path &FN) {
   std::string ParseErrorMessage;
   Module *Result = 0;
   
-  const std::string &FNS = FN.toString();
-  std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(FNS.c_str()));
+  std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(FN.c_str()));
   if (Buffer.get())
     Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
   else
-    ParseErrorMessage = "Error reading file '" + FNS + "'";
+    ParseErrorMessage = "Error reading file '" + FN.str() + "'";
     
   if (Result)
     return std::auto_ptr<Module>(Result);
-  Error = "Bitcode file '" + FN.toString() + "' could not be loaded";
+  Error = "Bitcode file '" + FN.str() + "' could not be loaded";
   if (ParseErrorMessage.size())
     Error += ": " + ParseErrorMessage;
   return std::auto_ptr<Module>();
@@ -117,13 +114,13 @@ Linker::LoadObject(const sys::Path &FN) {
 
 // IsLibrary - Determine if "Name" is a library in "Directory". Return
 // a non-empty sys::Path if its found, an empty one otherwise.
-static inline sys::Path IsLibrary(const std::string& Name,
-                                  const sys::Path& Directory) {
+static inline sys::Path IsLibrary(const StringRef &Name,
+                                  const sys::Path &Directory) {
 
   sys::Path FullPath(Directory);
 
   // Try the libX.a form
-  FullPath.appendComponent("lib" + Name);
+  FullPath.appendComponent(("lib" + Name).str());
   FullPath.appendSuffix("a");
   if (FullPath.isArchive())
     return FullPath;
@@ -156,7 +153,7 @@ static inline sys::Path IsLibrary(const std::string& Name,
 /// Path if no matching file can be found.
 ///
 sys::Path
-Linker::FindLib(const std::string &Filename) {
+Linker::FindLib(const StringRef &Filename) {
   // Determine if the pathname can be found as it stands.
   sys::Path FilePath(Filename);
   if (FilePath.canRead() &&
@@ -167,7 +164,7 @@ Linker::FindLib(const std::string &Filename) {
   // there.
   for (unsigned Index = 0; Index != LibPaths.size(); ++Index) {
     sys::Path Directory(LibPaths[Index]);
-    sys::Path FullPath = IsLibrary(Filename,Directory);
+    sys::Path FullPath = IsLibrary(Filename, Directory);
     if (!FullPath.isEmpty())
       return FullPath;
   }
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 6307ffe32c8a..8a1a05863746 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,5 +1,24 @@
 add_llvm_library(LLVMMC
+  MCAsmInfo.cpp
+  MCAsmInfoCOFF.cpp
+  MCAsmInfoDarwin.cpp
+  MCAsmLexer.cpp
+  MCAsmParser.cpp
   MCAsmStreamer.cpp
+  MCAssembler.cpp
+  MCCodeEmitter.cpp
   MCContext.cpp
+  MCDisassembler.cpp
+  MCExpr.cpp
+  MCInst.cpp
+  MCInstPrinter.cpp
+  MCMachOStreamer.cpp
+  MCNullStreamer.cpp
+  MCSection.cpp
+  MCSectionELF.cpp
+  MCSectionMachO.cpp
   MCStreamer.cpp
+  MCSymbol.cpp
+  MCValue.cpp
+  TargetAsmParser.cpp
   )
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
new file mode 100644
index 000000000000..74fb930fbc14
--- /dev/null
+++ b/lib/MC/MCAsmInfo.cpp
@@ -0,0 +1,107 @@
+//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include <cctype>
+#include <cstring>
+using namespace llvm;
+
+MCAsmInfo::MCAsmInfo() {
+  ZeroFillDirective = 0;
+  NonexecutableStackDirective = 0;
+  NeedsSet = false;
+  MaxInstLength = 4;
+  PCSymbol = "$";
+  SeparatorChar = ';';
+  CommentColumn = 60;
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".";
+  LinkerPrivateGlobalPrefix = "";
+  InlineAsmStart = "APP";
+  InlineAsmEnd = "NO_APP";
+  AssemblerDialect = 0;
+  AllowQuotesInName = false;
+  AllowNameToStartWithDigit = false;
+  ZeroDirective = "\t.zero\t";
+  ZeroDirectiveSuffix = 0;
+  AsciiDirective = "\t.ascii\t";
+  AscizDirective = "\t.asciz\t";
+  Data8bitsDirective = "\t.byte\t";
+  Data16bitsDirective = "\t.short\t";
+  Data32bitsDirective = "\t.long\t";
+  Data64bitsDirective = "\t.quad\t";
+  SunStyleELFSectionSwitchSyntax = false;
+  UsesELFSectionDirectiveForBSS = false;
+  AlignDirective = "\t.align\t";
+  AlignmentIsInBytes = true;
+  TextAlignFillValue = 0;
+  JumpTableDirective = 0;
+  PICJumpTableDirective = 0;
+  GlobalDirective = "\t.globl\t";
+  SetDirective = 0;
+  LCOMMDirective = 0;
+  COMMDirective = "\t.comm\t";
+  COMMDirectiveTakesAlignment = true;
+  HasDotTypeDotSizeDirective = true;
+  HasSingleParameterDotFile = true;
+  UsedDirective = 0;
+  WeakRefDirective = 0;
+  WeakDefDirective = 0;
+  // FIXME: These are ELFish - move to ELFMAI.
+  HiddenDirective = "\t.hidden\t";
+  ProtectedDirective = "\t.protected\t";
+  AbsoluteDebugSectionOffsets = false;
+  AbsoluteEHSectionOffsets = false;
+  HasLEB128 = false;
+  HasDotLocAndDotFile = false;
+  SupportsDebugInformation = false;
+  ExceptionsType = ExceptionHandling::None;
+  DwarfRequiresFrameSection = true;
+  DwarfUsesInlineInfoSection = false;
+  Is_EHSymbolPrivate = true;
+  GlobalEHDirective = 0;
+  SupportsWeakOmittedEHFrame = true;
+  DwarfSectionOffsetDirective = 0;
+
+  AsmTransCBE = 0;
+}
+
+MCAsmInfo::~MCAsmInfo() {
+}
+
+
+unsigned MCAsmInfo::getULEB128Size(unsigned Value) {
+  unsigned Size = 0;
+  do {
+    Value >>= 7;
+    Size += sizeof(int8_t);
+  } while (Value);
+  return Size;
+}
+
+unsigned MCAsmInfo::getSLEB128Size(int Value) {
+  unsigned Size = 0;
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+
+  do {
+    unsigned Byte = Value & 0x7f;
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    Size += sizeof(int8_t);
+  } while (IsMore);
+  return Size;
+}
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
new file mode 100644
index 000000000000..23b0dd77916b
--- /dev/null
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -0,0 +1,37 @@
+//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on COFF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+MCAsmInfoCOFF::MCAsmInfoCOFF() {
+  GlobalPrefix = "_";
+  LCOMMDirective = "\t.lcomm\t";
+  COMMDirectiveTakesAlignment = false;
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = false;
+  HiddenDirective = NULL;
+  PrivateGlobalPrefix = "L";  // Prefix for private global symbols
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+  AbsoluteDebugSectionOffsets = true;
+  AbsoluteEHSectionOffsets = false;
+  SupportsDebugInformation = true;
+  DwarfSectionOffsetDirective = "\t.secrel32\t";
+}
+
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
new file mode 100644
index 000000000000..d99120d4d788
--- /dev/null
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -0,0 +1,52 @@
+//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+using namespace llvm;
+
+MCAsmInfoDarwin::MCAsmInfoDarwin() {
+  // Common settings for all Darwin targets.
+  // Syntax:
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LinkerPrivateGlobalPrefix = "l";
+  NeedsSet = true;
+  AllowQuotesInName = true;
+  HasSingleParameterDotFile = false;
+
+  AlignmentIsInBytes = false;
+  InlineAsmStart = " InlineAsm Start";
+  InlineAsmEnd = " InlineAsm End";
+
+  // Directives:
+  WeakDefDirective = "\t.weak_definition ";
+  WeakRefDirective = "\t.weak_reference ";
+  HiddenDirective = "\t.private_extern ";
+  LCOMMDirective = "\t.lcomm\t";
+  ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
+  ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
+  SetDirective = "\t.set";
+  ProtectedDirective = "\t.globl\t";
+  HasDotTypeDotSizeDirective = false;
+  UsedDirective = "\t.no_dead_strip\t";
+
+  // _foo.eh symbols are currently always exported so that the linker knows
+  // about them.  This is not necessary on 10.6 and later, but it
+  // doesn't hurt anything.
+  // FIXME: I need to get this from Triple.
+  Is_EHSymbolPrivate = false;
+  GlobalEHDirective = "\t.globl\t";
+  SupportsWeakOmittedEHFrame = false;
+}
+
diff --git a/lib/MC/MCAsmLexer.cpp b/lib/MC/MCAsmLexer.cpp
new file mode 100644
index 000000000000..1e34ed6f7900
--- /dev/null
+++ b/lib/MC/MCAsmLexer.cpp
@@ -0,0 +1,23 @@
+//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()) {
+}
+
+MCAsmLexer::~MCAsmLexer() {
+}
+
+SMLoc AsmToken::getLoc() const {
+  return SMLoc::getFromPointer(Str.data());
+}
diff --git a/lib/MC/MCAsmParser.cpp b/lib/MC/MCAsmParser.cpp
new file mode 100644
index 000000000000..2287e8965d7b
--- /dev/null
+++ b/lib/MC/MCAsmParser.cpp
@@ -0,0 +1,18 @@
+//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmParser.h"
+
+using namespace llvm;
+
+MCAsmParser::MCAsmParser() {
+}
+
+MCAsmParser::~MCAsmParser() {
+}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 7d9446444889..e56e968380f4 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -8,118 +8,121 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCStreamer.h"
-
+#include "llvm/ADT/SmallString.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
 
-  class MCAsmStreamer : public MCStreamer {
-    raw_ostream &OS;
+class MCAsmStreamer : public MCStreamer {
+  raw_ostream &OS;
+  const MCAsmInfo &MAI;
+  MCInstPrinter *InstPrinter;
+  MCCodeEmitter *Emitter;
+public:
+  MCAsmStreamer(MCContext &Context, raw_ostream &_OS, const MCAsmInfo &tai,
+                MCInstPrinter *_Printer, MCCodeEmitter *_Emitter)
+    : MCStreamer(Context), OS(_OS), MAI(tai), InstPrinter(_Printer),
+      Emitter(_Emitter) {}
+  ~MCAsmStreamer() {}
 
-    MCSection *CurSection;
+  /// @name MCStreamer Interface
+  /// @{
 
-  public:
-    MCAsmStreamer(MCContext &Context, raw_ostream &_OS)
-      : MCStreamer(Context), OS(_OS), CurSection(0) {}
-    ~MCAsmStreamer() {}
+  virtual void SwitchSection(const MCSection *Section);
 
-    /// @name MCStreamer Interface
-    /// @{
+  virtual void EmitLabel(MCSymbol *Symbol);
 
-    virtual void SwitchSection(MCSection *Section);
+  virtual void EmitAssemblerFlag(AssemblerFlag Flag);
 
-    virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
 
-    virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
-                                bool MakeAbsolute = false);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute);
 
-    virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
 
-    virtual void EmitBytes(const char *Data, unsigned Length);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size,
+                                unsigned ByteAlignment);
 
-    virtual void EmitValue(const MCValue &Value, unsigned Size);
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
 
-    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                      unsigned ValueSize = 1,
-                                      unsigned MaxBytesToEmit = 0);
+  virtual void EmitBytes(const StringRef &Data);
 
-    virtual void EmitValueToOffset(const MCValue &Offset, 
-                                   unsigned char Value = 0);
-    
-    virtual void EmitInstruction(const MCInst &Inst);
+  virtual void EmitValue(const MCExpr *Value, unsigned Size);
 
-    virtual void Finish();
-    
-    /// @}
-  };
-
-}
-
-/// Allow printing values directly to a raw_ostream.
-static inline raw_ostream &operator<<(raw_ostream &os, const MCValue &Value) {
-  if (Value.getSymA()) {
-    os << Value.getSymA()->getName();
-    if (Value.getSymB())
-      os << " - " << Value.getSymB()->getName();
-    if (Value.getConstant())
-      os << " + " << Value.getConstant();
-  } else {
-    assert(!Value.getSymB() && "Invalid machine code value!");
-    os << Value.getConstant();
-  }
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
 
-  return os;
-}
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+  
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  virtual void Finish();
+  
+  /// @}
+};
+
+} // end anonymous namespace.
 
 static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
   assert(Bytes && "Invalid size!");
   return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
 }
 
-static inline MCValue truncateToSize(const MCValue &Value, unsigned Bytes) {
-  return MCValue::get(Value.getSymA(), Value.getSymB(), 
-                      truncateToSize(Value.getConstant(), Bytes));
+static inline const MCExpr *truncateToSize(const MCExpr *Value,
+                                           unsigned Bytes) {
+  // FIXME: Do we really need this routine?
+  return Value;
 }
 
-void MCAsmStreamer::SwitchSection(MCSection *Section) {
+void MCAsmStreamer::SwitchSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
   if (Section != CurSection) {
     CurSection = Section;
-
-    // FIXME: Really we would like the segment, flags, etc. to be separate
-    // values instead of embedded in the name. Not all assemblers understand all
-    // this stuff though.
-    OS << ".section " << Section->getName() << "\n";
+    Section->PrintSwitchToSection(MAI, OS);
   }
 }
 
 void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
-  assert(Symbol->getSection() == 0 && "Cannot emit a symbol twice!");
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
   assert(CurSection && "Cannot emit before setting section!");
-  assert(!getContext().GetSymbolValue(Symbol) && 
-         "Cannot emit symbol which was directly assigned to!");
 
-  OS << Symbol->getName() << ":\n";
-  Symbol->setSection(CurSection);
-  Symbol->setExternal(false);
+  Symbol->print(OS, &MAI);
+  OS << ":\n";
+  Symbol->setSection(*CurSection);
 }
 
-void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
-                                   bool MakeAbsolute) {
-  assert(!Symbol->getSection() && "Cannot assign to a label!");
-
-  if (MakeAbsolute) {
-    OS << ".set " << Symbol->getName() << ", " << Value << '\n';
-  } else {
-    OS << Symbol->getName() << " = " << Value << '\n';
+void MCAsmStreamer::EmitAssemblerFlag(AssemblerFlag Flag) {
+  switch (Flag) {
+  default: assert(0 && "Invalid flag!");
+  case SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
   }
+  OS << '\n';
+}
 
-  getContext().SetSymbolValue(Symbol, Value);
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // Only absolute symbols can be redefined.
+  assert((Symbol->isUndefined() || Symbol->isAbsolute()) &&
+         "Cannot define a symbol twice!");
+
+  Symbol->print(OS, &MAI);
+  OS << " = ";
+  Value->print(OS, &MAI);
+  OS << '\n';
 }
 
 void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, 
@@ -139,93 +142,165 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case WeakReference: OS << ".weak_reference"; break;
   }
 
-  OS << ' ' << Symbol->getName() << '\n';
+  OS << ' ';
+  Symbol->print(OS, &MAI);
+  OS << '\n';
+}
+
+void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  OS << ".desc" << ' ';
+  Symbol->print(OS, &MAI);
+  OS << ',' << DescValue << '\n';
+}
+
+void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, unsigned Size,
+                                     unsigned ByteAlignment) {
+  OS << ".comm ";
+  Symbol->print(OS, &MAI);
+  OS << ',' << Size;
+  if (ByteAlignment != 0)
+    OS << ',' << Log2_32(ByteAlignment);
+  OS << '\n';
 }
 
-void MCAsmStreamer::EmitBytes(const char *Data, unsigned Length) {
+void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                 unsigned Size, unsigned ByteAlignment) {
+  // Note: a .zerofill directive does not switch sections.
+  OS << ".zerofill ";
+  
+  // This is a mach-o specific directive.
+  const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
+  OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
+  
+  if (Symbol != NULL) {
+    OS << ',';
+    Symbol->print(OS, &MAI);
+    OS << ',' << Size;
+    if (ByteAlignment != 0)
+      OS << ',' << Log2_32(ByteAlignment);
+  }
+  OS << '\n';
+}
+
+void MCAsmStreamer::EmitBytes(const StringRef &Data) {
   assert(CurSection && "Cannot emit contents before setting section!");
-  for (unsigned i = 0; i != Length; ++i)
-    OS << ".byte " << (unsigned) Data[i] << '\n';
+  for (unsigned i = 0, e = Data.size(); i != e; ++i)
+    OS << ".byte " << (unsigned) (unsigned char) Data[i] << '\n';
 }
 
-void MCAsmStreamer::EmitValue(const MCValue &Value, unsigned Size) {
+void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size) {
   assert(CurSection && "Cannot emit contents before setting section!");
   // Need target hooks to know how to print this.
   switch (Size) {
   default:
-    assert(0 && "Invalid size for machine code value!");
+    llvm_unreachable("Invalid size for machine code value!");
   case 1: OS << ".byte"; break;
   case 2: OS << ".short"; break;
   case 4: OS << ".long"; break;
   case 8: OS << ".quad"; break;
   }
 
-  OS << ' ' << truncateToSize(Value, Size) << '\n';
+  OS << ' ';
+  truncateToSize(Value, Size)->print(OS, &MAI);
+  OS << '\n';
 }
 
 void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
                                          unsigned ValueSize,
                                          unsigned MaxBytesToEmit) {
-  // Some assemblers don't support .balign, so we always emit as .p2align if
-  // this is a power of two. Otherwise we assume the client knows the target
-  // supports .balign and use that.
-  unsigned Pow2 = Log2_32(ByteAlignment);
-  bool IsPow2 = (1U << Pow2) == ByteAlignment;
-
+  // Some assemblers don't support non-power of two alignments, so we always
+  // emit alignments as a power of two if possible.
+  if (isPowerOf2_32(ByteAlignment)) {
+    switch (ValueSize) {
+    default: llvm_unreachable("Invalid size for machine code value!");
+    case 1: OS << MAI.getAlignDirective(); break;
+    // FIXME: use MAI for this!
+    case 2: OS << ".p2alignw "; break;
+    case 4: OS << ".p2alignl "; break;
+    case 8: llvm_unreachable("Unsupported alignment size!");
+    }
+    
+    if (MAI.getAlignmentIsInBytes())
+      OS << ByteAlignment;
+    else
+      OS << Log2_32(ByteAlignment);
+
+    if (Value || MaxBytesToEmit) {
+      OS << ", 0x";
+      OS.write_hex(truncateToSize(Value, ValueSize));
+
+      if (MaxBytesToEmit) 
+        OS << ", " << MaxBytesToEmit;
+    }
+    OS << '\n';
+    return;
+  }
+  
+  // Non-power of two alignment.  This is not widely supported by assemblers.
+  // FIXME: Parameterize this based on MAI.
   switch (ValueSize) {
-  default:
-    assert(0 && "Invalid size for machine code value!");
-  case 8:
-    assert(0 && "Unsupported alignment size!");
-  case 1: OS << (IsPow2 ? ".p2align" : ".balign"); break;
-  case 2: OS << (IsPow2 ? ".p2alignw" : ".balignw"); break;
-  case 4: OS << (IsPow2 ? ".p2alignl" : ".balignl"); break;
+  default: llvm_unreachable("Invalid size for machine code value!");
+  case 1: OS << ".balign";  break;
+  case 2: OS << ".balignw"; break;
+  case 4: OS << ".balignl"; break;
+  case 8: llvm_unreachable("Unsupported alignment size!");
   }
 
-  OS << ' ' << (IsPow2 ? Pow2 : ByteAlignment);
-
+  OS << ' ' << ByteAlignment;
   OS << ", " << truncateToSize(Value, ValueSize);
   if (MaxBytesToEmit) 
     OS << ", " << MaxBytesToEmit;
   OS << '\n';
 }
 
-void MCAsmStreamer::EmitValueToOffset(const MCValue &Offset, 
+void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
                                       unsigned char Value) {
   // FIXME: Verify that Offset is associated with the current section.
-  OS << ".org " << Offset << ", " << (unsigned) Value << '\n';
-}
-
-static raw_ostream &operator<<(raw_ostream &OS, const MCOperand &Op) {
-  if (Op.isReg())
-    return OS << "reg:" << Op.getReg();
-  if (Op.isImm())
-    return OS << "imm:" << Op.getImm();
-  if (Op.isMBBLabel())
-    return OS << "mbblabel:(" 
-              << Op.getMBBLabelFunction() << ", " << Op.getMBBLabelBlock();
-  assert(Op.isMCValue() && "Invalid operand!");
-  return OS << "val:" << Op.getMCValue();
+  OS << ".org ";
+  Offset->print(OS, &MAI);
+  OS << ", " << (unsigned) Value << '\n';
 }
 
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   assert(CurSection && "Cannot emit contents before setting section!");
-  // FIXME: Implement proper printing.
-  OS << "MCInst("
-     << "opcode=" << Inst.getOpcode() << ", "
-     << "operands=[";
-  for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
-    if (i)
-      OS << ", ";
-    OS << Inst.getOperand(i);
+
+  // If we have an AsmPrinter, use that to print.
+  if (InstPrinter) {
+    InstPrinter->printInst(&Inst);
+    OS << '\n';
+
+    // Show the encoding if we have a code emitter.
+    if (Emitter) {
+      SmallString<256> Code;
+      raw_svector_ostream VecOS(Code);
+      Emitter->EncodeInstruction(Inst, VecOS);
+      VecOS.flush();
+  
+      OS.indent(20);
+      OS << " # encoding: [";
+      for (unsigned i = 0, e = Code.size(); i != e; ++i) {
+        if (i)
+          OS << ',';
+        OS << format("%#04x", uint8_t(Code[i]));
+      }
+      OS << "]\n";
+    }
+
+    return;
   }
-  OS << "])\n";
+
+  // Otherwise fall back to a structural printing for now. Eventually we should
+  // always have access to the target specific printer.
+  Inst.print(OS, &MAI);
+  OS << '\n';
 }
 
 void MCAsmStreamer::Finish() {
   OS.flush();
 }
     
-MCStreamer *llvm::createAsmStreamer(MCContext &Context, raw_ostream &OS) {
-  return new MCAsmStreamer(Context, OS);
+MCStreamer *llvm::createAsmStreamer(MCContext &Context, raw_ostream &OS,
+                                    const MCAsmInfo &MAI, MCInstPrinter *IP,
+                                    MCCodeEmitter *CE) {
+  return new MCAsmStreamer(Context, OS, MAI, IP, CE);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
new file mode 100644
index 000000000000..0afdf98cbe79
--- /dev/null
+++ b/lib/MC/MCAssembler.cpp
@@ -0,0 +1,1190 @@
+//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "assembler"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+class MachObjectWriter;
+
+STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
+
+// FIXME FIXME FIXME: There are number of places in this file where we convert
+// what is a 64-bit assembler value used for computation into a value in the
+// object file, which may truncate it. We should detect that truncation where
+// invalid and report errors back.
+
+static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
+                          MachObjectWriter &MOW);
+
+/// isVirtualSection - Check if this is a section which does not actually exist
+/// in the object file.
+static bool isVirtualSection(const MCSection &Section) {
+  // FIXME: Lame.
+  const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+  unsigned Type = SMO.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
+  return (Type == MCSectionMachO::S_ZEROFILL);
+}
+
+class MachObjectWriter {
+  // See <mach-o/loader.h>.
+  enum {
+    Header_Magic32 = 0xFEEDFACE,
+    Header_Magic64 = 0xFEEDFACF
+  };
+  
+  static const unsigned Header32Size = 28;
+  static const unsigned Header64Size = 32;
+  static const unsigned SegmentLoadCommand32Size = 56;
+  static const unsigned Section32Size = 68;
+  static const unsigned SymtabLoadCommandSize = 24;
+  static const unsigned DysymtabLoadCommandSize = 80;
+  static const unsigned Nlist32Size = 12;
+  static const unsigned RelocationInfoSize = 8;
+
+  enum HeaderFileType {
+    HFT_Object = 0x1
+  };
+
+  enum HeaderFlags {
+    HF_SubsectionsViaSymbols = 0x2000
+  };
+
+  enum LoadCommandType {
+    LCT_Segment = 0x1,
+    LCT_Symtab = 0x2,
+    LCT_Dysymtab = 0xb
+  };
+
+  // See <mach-o/nlist.h>.
+  enum SymbolTypeType {
+    STT_Undefined = 0x00,
+    STT_Absolute  = 0x02,
+    STT_Section   = 0x0e
+  };
+
+  enum SymbolTypeFlags {
+    // If any of these bits are set, then the entry is a stab entry number (see
+    // <mach-o/stab.h>. Otherwise the other masks apply.
+    STF_StabsEntryMask = 0xe0,
+
+    STF_TypeMask       = 0x0e,
+    STF_External       = 0x01,
+    STF_PrivateExtern  = 0x10
+  };
+
+  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
+  /// symbol entry.
+  enum IndirectSymbolFlags {
+    ISF_Local    = 0x80000000,
+    ISF_Absolute = 0x40000000
+  };
+
+  /// RelocationFlags - Special flags for addresses.
+  enum RelocationFlags {
+    RF_Scattered = 0x80000000
+  };
+
+  enum RelocationInfoType {
+    RIT_Vanilla             = 0,
+    RIT_Pair                = 1,
+    RIT_Difference          = 2,
+    RIT_PreboundLazyPointer = 3,
+    RIT_LocalDifference     = 4
+  };
+
+  /// MachSymbolData - Helper struct for containing some precomputed information
+  /// on symbols.
+  struct MachSymbolData {
+    MCSymbolData *SymbolData;
+    uint64_t StringIndex;
+    uint8_t SectionIndex;
+
+    // Support lexicographic sorting.
+    bool operator<(const MachSymbolData &RHS) const {
+      const std::string &Name = SymbolData->getSymbol().getName();
+      return Name < RHS.SymbolData->getSymbol().getName();
+    }
+  };
+
+  raw_ostream &OS;
+  bool IsLSB;
+
+public:
+  MachObjectWriter(raw_ostream &_OS, bool _IsLSB = true) 
+    : OS(_OS), IsLSB(_IsLSB) {
+  }
+
+  /// @name Helper Methods
+  /// @{
+
+  void Write8(uint8_t Value) {
+    OS << char(Value);
+  }
+
+  void Write16(uint16_t Value) {
+    if (IsLSB) {
+      Write8(uint8_t(Value >> 0));
+      Write8(uint8_t(Value >> 8));
+    } else {
+      Write8(uint8_t(Value >> 8));
+      Write8(uint8_t(Value >> 0));
+    }
+  }
+
+  void Write32(uint32_t Value) {
+    if (IsLSB) {
+      Write16(uint16_t(Value >> 0));
+      Write16(uint16_t(Value >> 16));
+    } else {
+      Write16(uint16_t(Value >> 16));
+      Write16(uint16_t(Value >> 0));
+    }
+  }
+
+  void Write64(uint64_t Value) {
+    if (IsLSB) {
+      Write32(uint32_t(Value >> 0));
+      Write32(uint32_t(Value >> 32));
+    } else {
+      Write32(uint32_t(Value >> 32));
+      Write32(uint32_t(Value >> 0));
+    }
+  }
+
+  void WriteZeros(unsigned N) {
+    const char Zeros[16] = { 0 };
+    
+    for (unsigned i = 0, e = N / 16; i != e; ++i)
+      OS << StringRef(Zeros, 16);
+    
+    OS << StringRef(Zeros, N % 16);
+  }
+
+  void WriteString(const StringRef &Str, unsigned ZeroFillSize = 0) {
+    OS << Str;
+    if (ZeroFillSize)
+      WriteZeros(ZeroFillSize - Str.size());
+  }
+
+  /// @}
+  
+  void WriteHeader32(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+                     bool SubsectionsViaSymbols) {
+    uint32_t Flags = 0;
+
+    if (SubsectionsViaSymbols)
+      Flags |= HF_SubsectionsViaSymbols;
+
+    // struct mach_header (28 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(Header_Magic32);
+
+    // FIXME: Support cputype.
+    Write32(TargetMachOWriterInfo::HDR_CPU_TYPE_I386);
+    // FIXME: Support cpusubtype.
+    Write32(TargetMachOWriterInfo::HDR_CPU_SUBTYPE_I386_ALL);
+    Write32(HFT_Object);
+    Write32(NumLoadCommands);    // Object files have a single load command, the
+                                 // segment.
+    Write32(LoadCommandsSize);
+    Write32(Flags);
+
+    assert(OS.tell() - Start == Header32Size);
+  }
+
+  /// WriteSegmentLoadCommand32 - Write a 32-bit segment load command.
+  ///
+  /// \arg NumSections - The number of sections in this segment.
+  /// \arg SectionDataSize - The total size of the sections.
+  void WriteSegmentLoadCommand32(unsigned NumSections,
+                                 uint64_t VMSize,
+                                 uint64_t SectionDataStartOffset,
+                                 uint64_t SectionDataSize) {
+    // struct segment_command (56 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Segment);
+    Write32(SegmentLoadCommand32Size + NumSections * Section32Size);
+
+    WriteString("", 16);
+    Write32(0); // vmaddr
+    Write32(VMSize); // vmsize
+    Write32(SectionDataStartOffset); // file offset
+    Write32(SectionDataSize); // file size
+    Write32(0x7); // maxprot
+    Write32(0x7); // initprot
+    Write32(NumSections);
+    Write32(0); // flags
+
+    assert(OS.tell() - Start == SegmentLoadCommand32Size);
+  }
+
+  void WriteSection32(const MCSectionData &SD, uint64_t FileOffset,
+                      uint64_t RelocationsStart, unsigned NumRelocations) {
+    // The offset is unused for virtual sections.
+    if (isVirtualSection(SD.getSection())) {
+      assert(SD.getFileSize() == 0 && "Invalid file size!");
+      FileOffset = 0;
+    }
+
+    // struct section (68 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    // FIXME: cast<> support!
+    const MCSectionMachO &Section =
+      static_cast<const MCSectionMachO&>(SD.getSection());
+    WriteString(Section.getSectionName(), 16);
+    WriteString(Section.getSegmentName(), 16);
+    Write32(SD.getAddress()); // address
+    Write32(SD.getSize()); // size
+    Write32(FileOffset);
+
+    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+    Write32(Log2_32(SD.getAlignment()));
+    Write32(NumRelocations ? RelocationsStart : 0);
+    Write32(NumRelocations);
+    Write32(Section.getTypeAndAttributes());
+    Write32(0); // reserved1
+    Write32(Section.getStubSize()); // reserved2
+
+    assert(OS.tell() - Start == Section32Size);
+  }
+
+  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+                              uint32_t StringTableOffset,
+                              uint32_t StringTableSize) {
+    // struct symtab_command (24 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Symtab);
+    Write32(SymtabLoadCommandSize);
+    Write32(SymbolOffset);
+    Write32(NumSymbols);
+    Write32(StringTableOffset);
+    Write32(StringTableSize);
+
+    assert(OS.tell() - Start == SymtabLoadCommandSize);
+  }
+
+  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                uint32_t NumLocalSymbols,
+                                uint32_t FirstExternalSymbol,
+                                uint32_t NumExternalSymbols,
+                                uint32_t FirstUndefinedSymbol,
+                                uint32_t NumUndefinedSymbols,
+                                uint32_t IndirectSymbolOffset,
+                                uint32_t NumIndirectSymbols) {
+    // struct dysymtab_command (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Dysymtab);
+    Write32(DysymtabLoadCommandSize);
+    Write32(FirstLocalSymbol);
+    Write32(NumLocalSymbols);
+    Write32(FirstExternalSymbol);
+    Write32(NumExternalSymbols);
+    Write32(FirstUndefinedSymbol);
+    Write32(NumUndefinedSymbols);
+    Write32(0); // tocoff
+    Write32(0); // ntoc
+    Write32(0); // modtaboff
+    Write32(0); // nmodtab
+    Write32(0); // extrefsymoff
+    Write32(0); // nextrefsyms
+    Write32(IndirectSymbolOffset);
+    Write32(NumIndirectSymbols);
+    Write32(0); // extreloff
+    Write32(0); // nextrel
+    Write32(0); // locreloff
+    Write32(0); // nlocrel
+
+    assert(OS.tell() - Start == DysymtabLoadCommandSize);
+  }
+
+  void WriteNlist32(MachSymbolData &MSD) {
+    MCSymbolData &Data = *MSD.SymbolData;
+    const MCSymbol &Symbol = Data.getSymbol();
+    uint8_t Type = 0;
+    uint16_t Flags = Data.getFlags();
+    uint32_t Address = 0;
+
+    // Set the N_TYPE bits. See <mach-o/nlist.h>.
+    //
+    // FIXME: Are the prebound or indirect fields possible here?
+    if (Symbol.isUndefined())
+      Type = STT_Undefined;
+    else if (Symbol.isAbsolute())
+      Type = STT_Absolute;
+    else
+      Type = STT_Section;
+
+    // FIXME: Set STAB bits.
+
+    if (Data.isPrivateExtern())
+      Type |= STF_PrivateExtern;
+
+    // Set external bit.
+    if (Data.isExternal() || Symbol.isUndefined())
+      Type |= STF_External;
+
+    // Compute the symbol address.
+    if (Symbol.isDefined()) {
+      if (Symbol.isAbsolute()) {
+        llvm_unreachable("FIXME: Not yet implemented!");
+      } else {
+        Address = Data.getFragment()->getAddress() + Data.getOffset();
+      }
+    } else if (Data.isCommon()) {
+      // Common symbols are encoded with the size in the address
+      // field, and their alignment in the flags.
+      Address = Data.getCommonSize();
+
+      // Common alignment is packed into the 'desc' bits.
+      if (unsigned Align = Data.getCommonAlignment()) {
+        unsigned Log2Size = Log2_32(Align);
+        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+        if (Log2Size > 15)
+          llvm_report_error("invalid 'common' alignment '" +
+                            Twine(Align) + "'");
+        // FIXME: Keep this mask with the SymbolFlags enumeration.
+        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+      }
+    }
+
+    // struct nlist (12 bytes)
+
+    Write32(MSD.StringIndex);
+    Write8(Type);
+    Write8(MSD.SectionIndex);
+    
+    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+    // value.
+    Write16(Flags);
+    Write32(Address);
+  }
+
+  struct MachRelocationEntry {
+    uint32_t Word0;
+    uint32_t Word1;
+  };
+  void ComputeScatteredRelocationInfo(MCAssembler &Asm,
+                                      MCSectionData::Fixup &Fixup,
+                             DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
+                                     std::vector<MachRelocationEntry> &Relocs) {
+    uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+    unsigned IsPCRel = 0;
+    unsigned Type = RIT_Vanilla;
+
+    // See <reloc.h>.
+
+    const MCSymbol *A = Fixup.Value.getSymA();
+    MCSymbolData *SD = SymbolMap.lookup(A);
+    uint32_t Value = SD->getFragment()->getAddress() + SD->getOffset();
+    uint32_t Value2 = 0;
+
+    if (const MCSymbol *B = Fixup.Value.getSymB()) {
+      Type = RIT_LocalDifference;
+
+      MCSymbolData *SD = SymbolMap.lookup(B);
+      Value2 = SD->getFragment()->getAddress() + SD->getOffset();
+    }
+
+    unsigned Log2Size = Log2_32(Fixup.Size);
+    assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
+
+    // The value which goes in the fixup is current value of the expression.
+    Fixup.FixedValue = Value - Value2 + Fixup.Value.getConstant();
+
+    MachRelocationEntry MRE;
+    MRE.Word0 = ((Address   <<  0) |
+                 (Type      << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 RF_Scattered);
+    MRE.Word1 = Value;
+    Relocs.push_back(MRE);
+
+    if (Type == RIT_LocalDifference) {
+      Type = RIT_Pair;
+
+      MachRelocationEntry MRE;
+      MRE.Word0 = ((0         <<  0) |
+                   (Type      << 24) |
+                   (Log2Size  << 28) |
+                   (0   << 30) |
+                   RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocs.push_back(MRE);
+    }
+  }
+
+  void ComputeRelocationInfo(MCAssembler &Asm,
+                             MCSectionData::Fixup &Fixup,
+                             DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
+                             std::vector<MachRelocationEntry> &Relocs) {
+    // If this is a local symbol plus an offset or a difference, then we need a
+    // scattered relocation entry.
+    if (Fixup.Value.getSymB()) // a - b
+      return ComputeScatteredRelocationInfo(Asm, Fixup, SymbolMap, Relocs);
+    if (Fixup.Value.getSymA() && Fixup.Value.getConstant())
+      if (!Fixup.Value.getSymA()->isUndefined())
+        return ComputeScatteredRelocationInfo(Asm, Fixup, SymbolMap, Relocs);
+        
+    // See <reloc.h>.
+    uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+    uint32_t Value = 0;
+    unsigned Index = 0;
+    unsigned IsPCRel = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    if (Fixup.Value.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      Type = RIT_Vanilla;
+      Value = 0;
+      llvm_unreachable("FIXME: Not yet implemented!");
+    } else {
+      const MCSymbol *Symbol = Fixup.Value.getSymA();
+      MCSymbolData *SD = SymbolMap.lookup(Symbol);
+      
+      if (Symbol->isUndefined()) {
+        IsExtern = 1;
+        Index = SD->getIndex();
+        Value = 0;
+      } else {
+        // The index is the section ordinal.
+        //
+        // FIXME: O(N)
+        Index = 1;
+        for (MCAssembler::iterator it = Asm.begin(),
+               ie = Asm.end(); it != ie; ++it, ++Index)
+          if (&*it == SD->getFragment()->getParent())
+            break;
+        Value = SD->getFragment()->getAddress() + SD->getOffset();
+      }
+
+      Type = RIT_Vanilla;
+    }
+
+    // The value which goes in the fixup is current value of the expression.
+    Fixup.FixedValue = Value + Fixup.Value.getConstant();
+
+    unsigned Log2Size = Log2_32(Fixup.Size);
+    assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
+
+    // struct relocation_info (8 bytes)
+    MachRelocationEntry MRE;
+    MRE.Word0 = Address;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocs.push_back(MRE);
+  }
+  
+  void BindIndirectSymbols(MCAssembler &Asm,
+                           DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap) {
+    // This is the point where 'as' creates actual symbols for indirect symbols
+    // (in the following two passes). It would be easier for us to do this
+    // sooner when we see the attribute, but that makes getting the order in the
+    // symbol table much more complicated than it is worth.
+    //
+    // FIXME: Revisit this when the dust settles.
+
+    // Bind non lazy symbol pointers first.
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // FIXME: cast<> support!
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+
+      unsigned Type =
+        Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
+      if (Type != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+        continue;
+
+      MCSymbolData *&Entry = SymbolMap[it->Symbol];
+      if (!Entry)
+        Entry = new MCSymbolData(*it->Symbol, 0, 0, &Asm);
+    }
+
+    // Then lazy symbol pointers and symbol stubs.
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // FIXME: cast<> support!
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+
+      unsigned Type =
+        Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
+      if (Type != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+          Type != MCSectionMachO::S_SYMBOL_STUBS)
+        continue;
+
+      MCSymbolData *&Entry = SymbolMap[it->Symbol];
+      if (!Entry) {
+        Entry = new MCSymbolData(*it->Symbol, 0, 0, &Asm);
+
+        // Set the symbol type to undefined lazy, but only on construction.
+        //
+        // FIXME: Do not hardcode.
+        Entry->setFlags(Entry->getFlags() | 0x0001);
+      }
+    }
+  }
+
+  /// ComputeSymbolTable - Compute the symbol table data
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData) {
+    // Build section lookup table.
+    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+    unsigned Index = 1;
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it, ++Index)
+      SectionIndexMap[&it->getSection()] = Index;
+    assert(Index <= 256 && "Too many sections!");
+
+    // Index 0 is always the empty string.
+    StringMap<uint64_t> StringIndexMap;
+    StringTable += '\x00';
+
+    // Build the symbol arrays and the string table, but only for non-local
+    // symbols.
+    //
+    // The particular order that we collect the symbols and create the string
+    // table, then sort the symbols is chosen to match 'as'. Even though it
+    // doesn't matter for correctness, this is important for letting us diff .o
+    // files.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore assembler temporaries.
+      if (it->getSymbol().isTemporary())
+        continue;
+
+      if (!it->isExternal() && !Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isUndefined()) {
+        MSD.SectionIndex = 0;
+        UndefinedSymbolData.push_back(MSD);
+      } else if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        ExternalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        ExternalSymbolData.push_back(MSD);
+      }
+    }
+
+    // Now add the data for local symbols.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore assembler temporaries.
+      if (it->getSymbol().isTemporary())
+        continue;
+
+      if (it->isExternal() || Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        LocalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        LocalSymbolData.push_back(MSD);
+      }
+    }
+
+    // External and undefined symbols are required to be in lexicographic order.
+    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+    // Set the symbol indices.
+    Index = 0;
+    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+      LocalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+      ExternalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+    // The string table is padded to a multiple of 4.
+    //
+    // FIXME: Check to see if this varies per arch.
+    while (StringTable.size() % 4)
+      StringTable += '\x00';
+  }
+
+  void WriteObject(MCAssembler &Asm) {
+    unsigned NumSections = Asm.size();
+
+    // Compute the symbol -> symbol data map.
+    //
+    // FIXME: This should not be here.
+    DenseMap<const MCSymbol*, MCSymbolData *> SymbolMap;
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it)
+      SymbolMap[&it->getSymbol()] = it;
+
+    // Create symbol data for any indirect symbols.
+    BindIndirectSymbols(Asm, SymbolMap);
+
+    // Compute symbol table information.
+    SmallString<256> StringTable;
+    std::vector<MachSymbolData> LocalSymbolData;
+    std::vector<MachSymbolData> ExternalSymbolData;
+    std::vector<MachSymbolData> UndefinedSymbolData;
+    unsigned NumSymbols = Asm.symbol_size();
+
+    // No symbol table command is written if there are no symbols.
+    if (NumSymbols)
+      ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                         UndefinedSymbolData);
+  
+    // The section data starts after the header, the segment load command (and
+    // section headers) and the symbol table.
+    unsigned NumLoadCommands = 1;
+    uint64_t LoadCommandsSize =
+      SegmentLoadCommand32Size + NumSections * Section32Size;
+
+    // Add the symbol table load command sizes, if used.
+    if (NumSymbols) {
+      NumLoadCommands += 2;
+      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
+    }
+
+    // Compute the total size of the section data, as well as its file size and
+    // vm size.
+    uint64_t SectionDataStart = Header32Size + LoadCommandsSize;
+    uint64_t SectionDataSize = 0;
+    uint64_t SectionDataFileSize = 0;
+    uint64_t VMSize = 0;
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      MCSectionData &SD = *it;
+
+      VMSize = std::max(VMSize, SD.getAddress() + SD.getSize());
+
+      if (isVirtualSection(SD.getSection()))
+        continue;
+
+      SectionDataSize = std::max(SectionDataSize,
+                                 SD.getAddress() + SD.getSize());
+      SectionDataFileSize = std::max(SectionDataFileSize, 
+                                     SD.getAddress() + SD.getFileSize());
+    }
+
+    // The section data is passed to 4 bytes.
+    //
+    // FIXME: Is this machine dependent?
+    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+    SectionDataFileSize += SectionDataPadding;
+
+    // Write the prolog, starting with the header and load command...
+    WriteHeader32(NumLoadCommands, LoadCommandsSize,
+                  Asm.getSubsectionsViaSymbols());
+    WriteSegmentLoadCommand32(NumSections, VMSize,
+                              SectionDataStart, SectionDataSize);
+  
+    // ... and then the section headers.
+    // 
+    // We also compute the section relocations while we do this. Note that
+    // compute relocation info will also update the fixup to have the correct
+    // value; this will be overwrite the appropriate data in the fragment when
+    // it is written.
+    std::vector<MachRelocationEntry> RelocInfos;
+    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie;
+         ++it) {
+      MCSectionData &SD = *it;
+
+      // The assembler writes relocations in the reverse order they were seen.
+      //
+      // FIXME: It is probably more complicated than this.
+      unsigned NumRelocsStart = RelocInfos.size();
+      for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i)
+        ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap,
+                              RelocInfos);
+
+      unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
+      uint64_t SectionStart = SectionDataStart + SD.getAddress();
+      WriteSection32(SD, SectionStart, RelocTableEnd, NumRelocs);
+      RelocTableEnd += NumRelocs * RelocationInfoSize;
+    }
+    
+    // Write the symbol table load command, if used.
+    if (NumSymbols) {
+      unsigned FirstLocalSymbol = 0;
+      unsigned NumLocalSymbols = LocalSymbolData.size();
+      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+      unsigned NumExternalSymbols = ExternalSymbolData.size();
+      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+      unsigned NumSymTabSymbols =
+        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+      uint64_t IndirectSymbolOffset = 0;
+
+      // If used, the indirect symbols are written after the section data.
+      if (NumIndirectSymbols)
+        IndirectSymbolOffset = RelocTableEnd;
+
+      // The symbol table is written after the indirect symbol data.
+      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+      // The string table is written after symbol table.
+      uint64_t StringTableOffset =
+        SymbolTableOffset + NumSymTabSymbols * Nlist32Size;
+      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                             StringTableOffset, StringTable.size());
+
+      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                               FirstExternalSymbol, NumExternalSymbols,
+                               FirstUndefinedSymbol, NumUndefinedSymbols,
+                               IndirectSymbolOffset, NumIndirectSymbols);
+    }
+
+    // Write the actual section data.
+    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+      WriteFileData(OS, *it, *this);
+
+    // Write the extra padding.
+    WriteZeros(SectionDataPadding);
+
+    // Write the relocation entries.
+    for (unsigned i = 0, e = RelocInfos.size(); i != e; ++i) {
+      Write32(RelocInfos[i].Word0);
+      Write32(RelocInfos[i].Word1);
+    }
+
+    // Write the symbol table data, if used.
+    if (NumSymbols) {
+      // Write the indirect symbol entries.
+      for (MCAssembler::indirect_symbol_iterator
+             it = Asm.indirect_symbol_begin(),
+             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+        // Indirect symbols in the non lazy symbol pointer section have some
+        // special handling.
+        const MCSectionMachO &Section =
+          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+        unsigned Type =
+          Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
+        if (Type == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+          // If this symbol is defined and internal, mark it as such.
+          if (it->Symbol->isDefined() &&
+              !SymbolMap.lookup(it->Symbol)->isExternal()) {
+            uint32_t Flags = ISF_Local;
+            if (it->Symbol->isAbsolute())
+              Flags |= ISF_Absolute;
+            Write32(Flags);
+            continue;
+          }
+        }
+
+        Write32(SymbolMap[it->Symbol]->getIndex());
+      }
+
+      // FIXME: Check that offsets match computed ones.
+
+      // Write the symbol table entries.
+      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+        WriteNlist32(LocalSymbolData[i]);
+      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+        WriteNlist32(ExternalSymbolData[i]);
+      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+        WriteNlist32(UndefinedSymbolData[i]);
+
+      // Write the string table.
+      OS << StringTable.str();
+    }
+  }
+};
+
+/* *** */
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)) {
+}
+
+MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
+  : Kind(_Kind),
+    Parent(_Parent),
+    FileSize(~UINT64_C(0))
+{
+  if (Parent)
+    Parent->getFragmentList().push_back(this);
+}
+
+MCFragment::~MCFragment() {
+}
+
+uint64_t MCFragment::getAddress() const {
+  assert(getParent() && "Missing Section!");
+  return getParent()->getAddress() + Offset;
+}
+
+/* *** */
+
+MCSectionData::MCSectionData() : Section(0) {}
+
+MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
+  : Section(&_Section),
+    Alignment(1),
+    Address(~UINT64_C(0)),
+    Size(~UINT64_C(0)),
+    FileSize(~UINT64_C(0)),
+    LastFixupLookup(~0)
+{
+  if (A)
+    A->getSectionList().push_back(this);
+}
+
+const MCSectionData::Fixup *
+MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const {
+  // Use a one level cache to turn the common case of accessing the fixups in
+  // order into O(1) instead of O(N).
+  unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size();
+  if (i >= End)
+    i = 0;
+  while (Count--) {
+    const Fixup &F = Fixups[i];
+    if (F.Fragment == Fragment && F.Offset == Offset) {
+      LastFixupLookup = i;
+      return &F;
+    }
+
+    ++i;
+    if (i == End)
+      i = 0;
+  }
+
+  return 0;
+}
+                                                       
+/* *** */
+
+MCSymbolData::MCSymbolData() : Symbol(0) {}
+
+MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
+                           uint64_t _Offset, MCAssembler *A)
+  : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
+    IsExternal(false), IsPrivateExtern(false),
+    CommonSize(0), CommonAlign(0), Flags(0), Index(0)
+{
+  if (A)
+    A->getSymbolList().push_back(this);
+}
+
+/* *** */
+
+MCAssembler::MCAssembler(MCContext &_Context, raw_ostream &_OS)
+  : Context(_Context), OS(_OS), SubsectionsViaSymbols(false)
+{
+}
+
+MCAssembler::~MCAssembler() {
+}
+
+void MCAssembler::LayoutSection(MCSectionData &SD) {
+  uint64_t Address = SD.getAddress();
+
+  for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) {
+    MCFragment &F = *it;
+
+    F.setOffset(Address - SD.getAddress());
+
+    // Evaluate fragment size.
+    switch (F.getKind()) {
+    case MCFragment::FT_Align: {
+      MCAlignFragment &AF = cast<MCAlignFragment>(F);
+      
+      uint64_t Size = OffsetToAlignment(Address, AF.getAlignment());
+      if (Size > AF.getMaxBytesToEmit())
+        AF.setFileSize(0);
+      else
+        AF.setFileSize(Size);
+      break;
+    }
+
+    case MCFragment::FT_Data:
+      F.setFileSize(F.getMaxFileSize());
+      break;
+
+    case MCFragment::FT_Fill: {
+      MCFillFragment &FF = cast<MCFillFragment>(F);
+
+      F.setFileSize(F.getMaxFileSize());
+
+      // If the fill value is constant, thats it.
+      if (FF.getValue().isAbsolute())
+        break;
+
+      // Otherwise, add fixups for the values.
+      for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
+        MCSectionData::Fixup Fix(F, i * FF.getValueSize(),
+                                 FF.getValue(),FF.getValueSize());
+        SD.getFixups().push_back(Fix);
+      }
+      break;
+    }
+
+    case MCFragment::FT_Org: {
+      MCOrgFragment &OF = cast<MCOrgFragment>(F);
+
+      if (!OF.getOffset().isAbsolute())
+        llvm_unreachable("FIXME: Not yet implemented!");
+      uint64_t OrgOffset = OF.getOffset().getConstant();
+      uint64_t Offset = Address - SD.getAddress();
+
+      // FIXME: We need a way to communicate this error.
+      if (OrgOffset < Offset)
+        llvm_report_error("invalid .org offset '" + Twine(OrgOffset) + 
+                          "' (at offset '" + Twine(Offset) + "'");
+        
+      F.setFileSize(OrgOffset - Offset);
+      break;
+    }      
+
+    case MCFragment::FT_ZeroFill: {
+      MCZeroFillFragment &ZFF = cast<MCZeroFillFragment>(F);
+
+      // Align the fragment offset; it is safe to adjust the offset freely since
+      // this is only in virtual sections.
+      uint64_t Aligned = RoundUpToAlignment(Address, ZFF.getAlignment());
+      F.setOffset(Aligned - SD.getAddress());
+
+      // FIXME: This is misnamed.
+      F.setFileSize(ZFF.getSize());
+      break;
+    }
+    }
+
+    Address += F.getFileSize();
+  }
+
+  // Set the section sizes.
+  SD.setSize(Address - SD.getAddress());
+  if (isVirtualSection(SD.getSection()))
+    SD.setFileSize(0);
+  else
+    SD.setFileSize(Address - SD.getAddress());
+}
+
+/// WriteFileData - Write the \arg F data to the output file.
+static void WriteFileData(raw_ostream &OS, const MCFragment &F,
+                          MachObjectWriter &MOW) {
+  uint64_t Start = OS.tell();
+  (void) Start;
+    
+  ++EmittedFragments;
+
+  // FIXME: Embed in fragments instead?
+  switch (F.getKind()) {
+  case MCFragment::FT_Align: {
+    MCAlignFragment &AF = cast<MCAlignFragment>(F);
+    uint64_t Count = AF.getFileSize() / AF.getValueSize();
+
+    // FIXME: This error shouldn't actually occur (the front end should emit
+    // multiple .align directives to enforce the semantics it wants), but is
+    // severe enough that we want to report it. How to handle this?
+    if (Count * AF.getValueSize() != AF.getFileSize())
+      llvm_report_error("undefined .align directive, value size '" + 
+                        Twine(AF.getValueSize()) + 
+                        "' is not a divisor of padding size '" +
+                        Twine(AF.getFileSize()) + "'");
+
+    for (uint64_t i = 0; i != Count; ++i) {
+      switch (AF.getValueSize()) {
+      default:
+        assert(0 && "Invalid size!");
+      case 1: MOW.Write8 (uint8_t (AF.getValue())); break;
+      case 2: MOW.Write16(uint16_t(AF.getValue())); break;
+      case 4: MOW.Write32(uint32_t(AF.getValue())); break;
+      case 8: MOW.Write64(uint64_t(AF.getValue())); break;
+      }
+    }
+    break;
+  }
+
+  case MCFragment::FT_Data:
+    OS << cast<MCDataFragment>(F).getContents().str();
+    break;
+
+  case MCFragment::FT_Fill: {
+    MCFillFragment &FF = cast<MCFillFragment>(F);
+
+    int64_t Value = 0;
+    if (FF.getValue().isAbsolute())
+      Value = FF.getValue().getConstant();
+    for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
+      if (!FF.getValue().isAbsolute()) {
+        // Find the fixup.
+        //
+        // FIXME: Find a better way to write in the fixes.
+        const MCSectionData::Fixup *Fixup =
+          F.getParent()->LookupFixup(&F, i * FF.getValueSize());
+        assert(Fixup && "Missing fixup for fill value!");
+        Value = Fixup->FixedValue;
+      }
+
+      switch (FF.getValueSize()) {
+      default:
+        assert(0 && "Invalid size!");
+      case 1: MOW.Write8 (uint8_t (Value)); break;
+      case 2: MOW.Write16(uint16_t(Value)); break;
+      case 4: MOW.Write32(uint32_t(Value)); break;
+      case 8: MOW.Write64(uint64_t(Value)); break;
+      }
+    }
+    break;
+  }
+    
+  case MCFragment::FT_Org: {
+    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+
+    for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i)
+      MOW.Write8(uint8_t(OF.getValue()));
+
+    break;
+  }
+
+  case MCFragment::FT_ZeroFill: {
+    assert(0 && "Invalid zero fill fragment in concrete section!");
+    break;
+  }
+  }
+
+  assert(OS.tell() - Start == F.getFileSize());
+}
+
+/// WriteFileData - Write the \arg SD data to the output file.
+static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
+                          MachObjectWriter &MOW) {
+  // Ignore virtual sections.
+  if (isVirtualSection(SD.getSection())) {
+    assert(SD.getFileSize() == 0);
+    return;
+  }
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+      
+  for (MCSectionData::const_iterator it = SD.begin(),
+         ie = SD.end(); it != ie; ++it)
+    WriteFileData(OS, *it, MOW);
+
+  // Add section padding.
+  assert(SD.getFileSize() >= SD.getSize() && "Invalid section sizes!");
+  MOW.WriteZeros(SD.getFileSize() - SD.getSize());
+
+  assert(OS.tell() - Start == SD.getFileSize());
+}
+
+void MCAssembler::Finish() {
+  // Layout the concrete sections and fragments.
+  uint64_t Address = 0;
+  MCSectionData *Prev = 0;
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    MCSectionData &SD = *it;
+
+    // Skip virtual sections.
+    if (isVirtualSection(SD.getSection()))
+      continue;
+
+    // Align this section if necessary by adding padding bytes to the previous
+    // section.
+    if (uint64_t Pad = OffsetToAlignment(Address, it->getAlignment())) {
+      assert(Prev && "Missing prev section!");
+      Prev->setFileSize(Prev->getFileSize() + Pad);
+      Address += Pad;
+    }
+
+    // Layout the section fragments and its size.
+    SD.setAddress(Address);
+    LayoutSection(SD);
+    Address += SD.getFileSize();
+
+    Prev = &SD;
+  }
+
+  // Layout the virtual sections.
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    MCSectionData &SD = *it;
+
+    if (!isVirtualSection(SD.getSection()))
+      continue;
+
+    SD.setAddress(Address);
+    LayoutSection(SD);
+    Address += SD.getSize();
+  }
+
+  // Write the object file.
+  MachObjectWriter MOW(OS);
+  MOW.WriteObject(*this);
+
+  OS.flush();
+}
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
new file mode 100644
index 000000000000..c122763b2fe5
--- /dev/null
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -0,0 +1,18 @@
+//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeEmitter.h"
+
+using namespace llvm;
+
+MCCodeEmitter::MCCodeEmitter() {
+}
+
+MCCodeEmitter::~MCCodeEmitter() {
+}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 6c6019c76ffd..f36564a6afae 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -14,23 +14,15 @@
 #include "llvm/MC/MCValue.h"
 using namespace llvm;
 
-MCContext::MCContext()
-{
+MCContext::MCContext() {
 }
 
 MCContext::~MCContext() {
+  // NOTE: The sections are all allocated out of a bump pointer allocator,
+  // we don't need to free them here.
 }
 
-MCSection *MCContext::GetSection(const char *Name) {
-  MCSection *&Entry = Sections[Name];
-  
-  if (!Entry)
-    Entry = new (*this) MCSection(Name);
-
-  return Entry;
-}
-
-MCSymbol *MCContext::CreateSymbol(const char *Name) {
+MCSymbol *MCContext::CreateSymbol(const StringRef &Name) {
   assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!");
 
   // Create and bind the symbol, and ensure that names are unique.
@@ -39,17 +31,16 @@ MCSymbol *MCContext::CreateSymbol(const char *Name) {
   return Entry = new (*this) MCSymbol(Name, false);
 }
 
-MCSymbol *MCContext::GetOrCreateSymbol(const char *Name) {
+MCSymbol *MCContext::GetOrCreateSymbol(const StringRef &Name) {
   MCSymbol *&Entry = Symbols[Name];
   if (Entry) return Entry;
 
   return Entry = new (*this) MCSymbol(Name, false);
 }
 
-
-MCSymbol *MCContext::CreateTemporarySymbol(const char *Name) {
+MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) {
   // If unnamed, just create a symbol.
-  if (Name[0] == '\0')
+  if (Name.empty())
     new (*this) MCSymbol("", true);
     
   // Otherwise create as usual.
@@ -58,20 +49,20 @@ MCSymbol *MCContext::CreateTemporarySymbol(const char *Name) {
   return Entry = new (*this) MCSymbol(Name, true);
 }
 
-MCSymbol *MCContext::LookupSymbol(const char *Name) const {
+MCSymbol *MCContext::LookupSymbol(const StringRef &Name) const {
   return Symbols.lookup(Name);
 }
 
-void MCContext::ClearSymbolValue(MCSymbol *Sym) {
+void MCContext::ClearSymbolValue(const MCSymbol *Sym) {
   SymbolValues.erase(Sym);
 }
 
-void MCContext::SetSymbolValue(MCSymbol *Sym, const MCValue &Value) {
+void MCContext::SetSymbolValue(const MCSymbol *Sym, const MCValue &Value) {
   SymbolValues[Sym] = Value;
 }
 
-const MCValue *MCContext::GetSymbolValue(MCSymbol *Sym) const {
-  DenseMap<MCSymbol*, MCValue>::iterator it = SymbolValues.find(Sym);
+const MCValue *MCContext::GetSymbolValue(const MCSymbol *Sym) const {
+  DenseMap<const MCSymbol*, MCValue>::iterator it = SymbolValues.find(Sym);
 
   if (it == SymbolValues.end())
     return 0;
diff --git a/lib/MC/MCDisassembler.cpp b/lib/MC/MCDisassembler.cpp
new file mode 100644
index 000000000000..08096906462f
--- /dev/null
+++ b/lib/MC/MCDisassembler.cpp
@@ -0,0 +1,14 @@
+//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+using namespace llvm;
+
+MCDisassembler::~MCDisassembler() {
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
new file mode 100644
index 000000000000..0f3e053de8ec
--- /dev/null
+++ b/lib/MC/MCExpr.cpp
@@ -0,0 +1,286 @@
+//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  switch (getKind()) {
+  case MCExpr::Constant:
+    OS << cast<MCConstantExpr>(*this).getValue();
+    return;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbol &Sym = cast<MCSymbolRefExpr>(*this).getSymbol();
+    
+    // Parenthesize names that start with $ so that they don't look like
+    // absolute names.
+    if (Sym.getName()[0] == '$') {
+      OS << '(';
+      Sym.print(OS, MAI);
+      OS << ')';
+    } else {
+      Sym.print(OS, MAI);
+    }
+    return;
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
+    switch (UE.getOpcode()) {
+    default: assert(0 && "Invalid opcode!");
+    case MCUnaryExpr::LNot:  OS << '!'; break;
+    case MCUnaryExpr::Minus: OS << '-'; break;
+    case MCUnaryExpr::Not:   OS << '~'; break;
+    case MCUnaryExpr::Plus:  OS << '+'; break;
+    }
+    UE.getSubExpr()->print(OS, MAI);
+    return;
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
+    
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
+      BE.getLHS()->print(OS, MAI);
+    } else {
+      OS << '(';
+      BE.getLHS()->print(OS, MAI);
+      OS << ')';
+    }
+    
+    switch (BE.getOpcode()) {
+    default: assert(0 && "Invalid opcode!");
+    case MCBinaryExpr::Add:
+      // Print "X-42" instead of "X+-42".
+      if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+        if (RHSC->getValue() < 0) {
+          OS << RHSC->getValue();
+          return;
+        }
+      }
+        
+      OS <<  '+';
+      break;
+    case MCBinaryExpr::And:  OS <<  '&'; break;
+    case MCBinaryExpr::Div:  OS <<  '/'; break;
+    case MCBinaryExpr::EQ:   OS << "=="; break;
+    case MCBinaryExpr::GT:   OS <<  '>'; break;
+    case MCBinaryExpr::GTE:  OS << ">="; break;
+    case MCBinaryExpr::LAnd: OS << "&&"; break;
+    case MCBinaryExpr::LOr:  OS << "||"; break;
+    case MCBinaryExpr::LT:   OS <<  '<'; break;
+    case MCBinaryExpr::LTE:  OS << "<="; break;
+    case MCBinaryExpr::Mod:  OS <<  '%'; break;
+    case MCBinaryExpr::Mul:  OS <<  '*'; break;
+    case MCBinaryExpr::NE:   OS << "!="; break;
+    case MCBinaryExpr::Or:   OS <<  '|'; break;
+    case MCBinaryExpr::Shl:  OS << "<<"; break;
+    case MCBinaryExpr::Shr:  OS << ">>"; break;
+    case MCBinaryExpr::Sub:  OS <<  '-'; break;
+    case MCBinaryExpr::Xor:  OS <<  '^'; break;
+    }
+    
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+      BE.getRHS()->print(OS, MAI);
+    } else {
+      OS << '(';
+      BE.getRHS()->print(OS, MAI);
+      OS << ')';
+    }
+    return;
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+}
+
+void MCExpr::dump() const {
+  print(errs(), 0);
+  errs() << '\n';
+}
+
+/* *** */
+
+const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS,
+                                         const MCExpr *RHS, MCContext &Ctx) {
+  return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
+}
+
+const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr,
+                                       MCContext &Ctx) {
+  return new (Ctx) MCUnaryExpr(Opc, Expr);
+}
+
+const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
+  return new (Ctx) MCConstantExpr(Value);
+}
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+                                               MCContext &Ctx) {
+  return new (Ctx) MCSymbolRefExpr(Sym);
+}
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(const StringRef &Name,
+                                               MCContext &Ctx) {
+  return Create(Ctx.GetOrCreateSymbol(Name), Ctx);
+}
+
+
+/* *** */
+
+bool MCExpr::EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const {
+  MCValue Value;
+  
+  if (!EvaluateAsRelocatable(Ctx, Value) || !Value.isAbsolute())
+    return false;
+
+  Res = Value.getConstant();
+  return true;
+}
+
+static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, 
+                                const MCSymbol *RHS_B, int64_t RHS_Cst,
+                                MCValue &Res) {
+  // We can't add or subtract two symbols.
+  if ((LHS.getSymA() && RHS_A) ||
+      (LHS.getSymB() && RHS_B))
+    return false;
+
+  const MCSymbol *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
+  const MCSymbol *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
+  if (B) {
+    // If we have a negated symbol, then we must have also have a non-negated
+    // symbol in order to encode the expression. We can do this check later to
+    // permit expressions which eventually fold to a representable form -- such
+    // as (a + (0 - b)) -- if necessary.
+    if (!A)
+      return false;
+  }
+  Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst);
+  return true;
+}
+
+bool MCExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const {
+  switch (getKind()) {
+  case Constant:
+    Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
+    return true;
+
+  case SymbolRef: {
+    const MCSymbol &Sym = cast<MCSymbolRefExpr>(this)->getSymbol();
+    if (const MCValue *Value = Ctx.GetSymbolValue(&Sym))
+      Res = *Value;
+    else
+      Res = MCValue::get(&Sym, 0, 0);
+    return true;
+  }
+
+  case Unary: {
+    const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
+    MCValue Value;
+
+    if (!AUE->getSubExpr()->EvaluateAsRelocatable(Ctx, Value))
+      return false;
+
+    switch (AUE->getOpcode()) {
+    case MCUnaryExpr::LNot:
+      if (!Value.isAbsolute())
+        return false;
+      Res = MCValue::get(!Value.getConstant());
+      break;
+    case MCUnaryExpr::Minus:
+      /// -(a - b + const) ==> (b - a - const)
+      if (Value.getSymA() && !Value.getSymB())
+        return false;
+      Res = MCValue::get(Value.getSymB(), Value.getSymA(), 
+                         -Value.getConstant()); 
+      break;
+    case MCUnaryExpr::Not:
+      if (!Value.isAbsolute())
+        return false;
+      Res = MCValue::get(~Value.getConstant()); 
+      break;
+    case MCUnaryExpr::Plus:
+      Res = Value;
+      break;
+    }
+
+    return true;
+  }
+
+  case Binary: {
+    const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
+    MCValue LHSValue, RHSValue;
+    
+    if (!ABE->getLHS()->EvaluateAsRelocatable(Ctx, LHSValue) ||
+        !ABE->getRHS()->EvaluateAsRelocatable(Ctx, RHSValue))
+      return false;
+
+    // We only support a few operations on non-constant expressions, handle
+    // those first.
+    if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
+      switch (ABE->getOpcode()) {
+      default:
+        return false;
+      case MCBinaryExpr::Sub:
+        // Negate RHS and add.
+        return EvaluateSymbolicAdd(LHSValue,
+                                   RHSValue.getSymB(), RHSValue.getSymA(),
+                                   -RHSValue.getConstant(),
+                                   Res);
+
+      case MCBinaryExpr::Add:
+        return EvaluateSymbolicAdd(LHSValue,
+                                   RHSValue.getSymA(), RHSValue.getSymB(),
+                                   RHSValue.getConstant(),
+                                   Res);
+      }
+    }
+
+    // FIXME: We need target hooks for the evaluation. It may be limited in
+    // width, and gas defines the result of comparisons differently from Apple
+    // as (the result is sign extended).
+    int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
+    int64_t Result = 0;
+    switch (ABE->getOpcode()) {
+    case MCBinaryExpr::Add:  Result = LHS + RHS; break;
+    case MCBinaryExpr::And:  Result = LHS & RHS; break;
+    case MCBinaryExpr::Div:  Result = LHS / RHS; break;
+    case MCBinaryExpr::EQ:   Result = LHS == RHS; break;
+    case MCBinaryExpr::GT:   Result = LHS > RHS; break;
+    case MCBinaryExpr::GTE:  Result = LHS >= RHS; break;
+    case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
+    case MCBinaryExpr::LOr:  Result = LHS || RHS; break;
+    case MCBinaryExpr::LT:   Result = LHS < RHS; break;
+    case MCBinaryExpr::LTE:  Result = LHS <= RHS; break;
+    case MCBinaryExpr::Mod:  Result = LHS % RHS; break;
+    case MCBinaryExpr::Mul:  Result = LHS * RHS; break;
+    case MCBinaryExpr::NE:   Result = LHS != RHS; break;
+    case MCBinaryExpr::Or:   Result = LHS | RHS; break;
+    case MCBinaryExpr::Shl:  Result = LHS << RHS; break;
+    case MCBinaryExpr::Shr:  Result = LHS >> RHS; break;
+    case MCBinaryExpr::Sub:  Result = LHS - RHS; break;
+    case MCBinaryExpr::Xor:  Result = LHS ^ RHS; break;
+    }
+
+    Res = MCValue::get(Result);
+    return true;
+  }
+  }
+
+  assert(0 && "Invalid assembly expression kind!");
+  return false;
+}
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
new file mode 100644
index 000000000000..d05031870add
--- /dev/null
+++ b/lib/MC/MCInst.cpp
@@ -0,0 +1,50 @@
+//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  OS << "<MCOperand ";
+  if (!isValid())
+    OS << "INVALID";
+  else if (isReg())
+    OS << "Reg:" << getReg();
+  else if (isImm())
+    OS << "Imm:" << getImm();
+  else if (isExpr()) {
+    OS << "Expr:(";
+    getExpr()->print(OS, MAI);
+    OS << ")";
+  } else
+    OS << "UNDEFINED";
+  OS << ">";
+}
+
+void MCOperand::dump() const {
+  print(errs(), 0);
+  errs() << "\n";
+}
+
+void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  OS << "<MCInst " << getOpcode();
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    OS << " ";
+    getOperand(i).print(OS, MAI);
+  }
+  OS << ">";
+}
+
+void MCInst::dump() const {
+  print(errs(), 0);
+  errs() << "\n";
+}
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
new file mode 100644
index 000000000000..e90c03c0cf42
--- /dev/null
+++ b/lib/MC/MCInstPrinter.cpp
@@ -0,0 +1,14 @@
+//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstPrinter.h"
+using namespace llvm;
+
+MCInstPrinter::~MCInstPrinter() {
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
new file mode 100644
index 000000000000..e04bd1fd1cb8
--- /dev/null
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -0,0 +1,379 @@
+//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class MCMachOStreamer : public MCStreamer {
+  /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest
+  /// 16 bits of the implementation defined flags.
+  enum SymbolFlags { // See <mach-o/nlist.h>.
+    SF_DescFlagsMask                        = 0xFFFF,
+
+    // Reference type flags.
+    SF_ReferenceTypeMask                    = 0x0007,
+    SF_ReferenceTypeUndefinedNonLazy        = 0x0000,
+    SF_ReferenceTypeUndefinedLazy           = 0x0001,
+    SF_ReferenceTypeDefined                 = 0x0002,
+    SF_ReferenceTypePrivateDefined          = 0x0003,
+    SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
+    SF_ReferenceTypePrivateUndefinedLazy    = 0x0005,
+
+    // Other 'desc' flags.
+    SF_NoDeadStrip                          = 0x0020,
+    SF_WeakReference                        = 0x0040,
+    SF_WeakDefinition                       = 0x0080
+  };
+
+private:
+  MCAssembler Assembler;
+
+  MCCodeEmitter *Emitter;
+
+  MCSectionData *CurSectionData;
+
+  DenseMap<const MCSection*, MCSectionData*> SectionMap;
+  
+  DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap;
+
+private:
+  MCFragment *getCurrentFragment() const {
+    assert(CurSectionData && "No current section!");
+
+    if (!CurSectionData->empty())
+      return &CurSectionData->getFragmentList().back();
+
+    return 0;
+  }
+
+  MCSectionData &getSectionData(const MCSection &Section) {
+    MCSectionData *&Entry = SectionMap[&Section];
+
+    if (!Entry)
+      Entry = new MCSectionData(Section, &Assembler);
+
+    return *Entry;
+  }
+
+  MCSymbolData &getSymbolData(const MCSymbol &Symbol) {
+    MCSymbolData *&Entry = SymbolMap[&Symbol];
+
+    if (!Entry)
+      Entry = new MCSymbolData(Symbol, 0, 0, &Assembler);
+
+    return *Entry;
+  }
+
+public:
+  MCMachOStreamer(MCContext &Context, raw_ostream &_OS, MCCodeEmitter *_Emitter)
+    : MCStreamer(Context), Assembler(Context, _OS), Emitter(_Emitter),
+      CurSectionData(0) {}
+  ~MCMachOStreamer() {}
+
+  const MCExpr *AddValueSymbols(const MCExpr *Value) {
+    switch (Value->getKind()) {
+    case MCExpr::Constant:
+      break;
+
+    case MCExpr::Binary: {
+      const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+      AddValueSymbols(BE->getLHS());
+      AddValueSymbols(BE->getRHS());
+      break;
+    }
+
+    case MCExpr::SymbolRef:
+      getSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+      break;
+
+    case MCExpr::Unary:
+      AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+      break;
+    }
+
+    return Value;
+  }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void SwitchSection(const MCSection *Section);
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+
+  virtual void EmitAssemblerFlag(AssemblerFlag Flag);
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute);
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size,
+                                unsigned ByteAlignment);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+
+  virtual void EmitBytes(const StringRef &Data);
+
+  virtual void EmitValue(const MCExpr *Value, unsigned Size);
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  virtual void Finish();
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+void MCMachOStreamer::SwitchSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+  
+  // If already in this section, then this is a noop.
+  if (Section == CurSection) return;
+
+  CurSection = Section;
+  CurSectionData = &getSectionData(*Section);
+}
+
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  // FIXME: We should also use offsets into Fill fragments.
+  MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!F)
+    F = new MCDataFragment(CurSectionData);
+
+  MCSymbolData &SD = getSymbolData(*Symbol);
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+
+  // This causes the reference type and weak reference flags to be cleared.
+  SD.setFlags(SD.getFlags() & ~(SF_WeakReference | SF_ReferenceTypeMask));
+  
+  Symbol->setSection(*CurSection);
+}
+
+void MCMachOStreamer::EmitAssemblerFlag(AssemblerFlag Flag) {
+  switch (Flag) {
+  case SubsectionsViaSymbols:
+    Assembler.setSubsectionsViaSymbols(true);
+    return;
+  }
+
+  assert(0 && "invalid assembler flag!");
+}
+
+void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // Only absolute symbols can be redefined.
+  assert((Symbol->isUndefined() || Symbol->isAbsolute()) &&
+         "Cannot define a symbol twice!");
+
+  llvm_unreachable("FIXME: Not yet implemented!");
+}
+
+void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          SymbolAttr Attribute) {
+  // Indirect symbols are handled differently, to match how 'as' handles
+  // them. This makes writing matching .o files easier.
+  if (Attribute == MCStreamer::IndirectSymbol) {
+    // Note that we intentionally cannot use the symbol data here; this is
+    // important for matching the string table that 'as' generates.
+    IndirectSymbolData ISD;
+    ISD.Symbol = Symbol;
+    ISD.SectionData = CurSectionData;
+    Assembler.getIndirectSymbols().push_back(ISD);
+    return;
+  }
+
+  // Adding a symbol attribute always introduces the symbol, note that an
+  // important side effect of calling getSymbolData here is to register the
+  // symbol with the assembler.
+  MCSymbolData &SD = getSymbolData(*Symbol);
+
+  // The implementation of symbol attributes is designed to match 'as', but it
+  // leaves much to desired. It doesn't really make sense to arbitrarily add and
+  // remove flags, but 'as' allows this (in particular, see .desc).
+  //
+  // In the future it might be worth trying to make these operations more well
+  // defined.
+  switch (Attribute) {
+  case MCStreamer::IndirectSymbol:
+  case MCStreamer::Hidden:
+  case MCStreamer::Internal:
+  case MCStreamer::Protected:
+  case MCStreamer::Weak:
+    assert(0 && "Invalid symbol attribute for Mach-O!");
+    break;
+
+  case MCStreamer::Global:
+    SD.setExternal(true);
+    break;
+
+  case MCStreamer::LazyReference:
+    // FIXME: This requires -dynamic.
+    SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+    if (Symbol->isUndefined())
+      SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy);
+    break;
+
+    // Since .reference sets the no dead strip bit, it is equivalent to
+    // .no_dead_strip in practice.
+  case MCStreamer::Reference:
+  case MCStreamer::NoDeadStrip:
+    SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+    break;
+
+  case MCStreamer::PrivateExtern:
+    SD.setExternal(true);
+    SD.setPrivateExtern(true);
+    break;
+
+  case MCStreamer::WeakReference:
+    // FIXME: This requires -dynamic.
+    if (Symbol->isUndefined())
+      SD.setFlags(SD.getFlags() | SF_WeakReference);
+    break;
+
+  case MCStreamer::WeakDefinition:
+    // FIXME: 'as' enforces that this is defined and global. The manual claims
+    // it has to be in a coalesced section, but this isn't enforced.
+    SD.setFlags(SD.getFlags() | SF_WeakDefinition);
+    break;
+  }
+}
+
+void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  // Encode the 'desc' value into the lowest implementation defined bits.
+  assert(DescValue == (DescValue & SF_DescFlagsMask) && 
+         "Invalid .desc value!");
+  getSymbolData(*Symbol).setFlags(DescValue & SF_DescFlagsMask);
+}
+
+void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, unsigned Size,
+                                       unsigned ByteAlignment) {
+  // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCSymbolData &SD = getSymbolData(*Symbol);
+  SD.setExternal(true);
+  SD.setCommon(Size, ByteAlignment);
+}
+
+void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                   unsigned Size, unsigned ByteAlignment) {
+  MCSectionData &SectData = getSectionData(*Section);
+
+  // The symbol may not be present, which only creates the section.
+  if (!Symbol)
+    return;
+
+  // FIXME: Assert that this section has the zerofill type.
+
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCSymbolData &SD = getSymbolData(*Symbol);
+
+  MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData);
+  SD.setFragment(F);
+
+  Symbol->setSection(*Section);
+
+  // Update the maximum alignment on the zero fill section if necessary.
+  if (ByteAlignment > SectData.getAlignment())
+    SectData.setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitBytes(const StringRef &Data) {
+  MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!DF)
+    DF = new MCDataFragment(CurSectionData);
+  DF->getContents().append(Data.begin(), Data.end());
+}
+
+void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size) {
+  MCValue RelocValue;
+
+  if (!AddValueSymbols(Value)->EvaluateAsRelocatable(getContext(), RelocValue))
+    return llvm_report_error("expected relocatable expression");
+
+  new MCFillFragment(RelocValue, Size, 1, CurSectionData);
+}
+
+void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value, unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      CurSectionData);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > CurSectionData->getAlignment())
+    CurSectionData->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                        unsigned char Value) {
+  MCValue RelocOffset;
+
+  if (!AddValueSymbols(Offset)->EvaluateAsRelocatable(getContext(),
+                                                      RelocOffset))
+    return llvm_report_error("expected relocatable expression");
+
+  new MCOrgFragment(RelocOffset, Value, CurSectionData);
+}
+
+void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
+    if (Inst.getOperand(i).isExpr())
+      AddValueSymbols(Inst.getOperand(i).getExpr());
+
+  if (!Emitter)
+    llvm_unreachable("no code emitter available!");
+
+  // FIXME: Relocations!
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  Emitter->EncodeInstruction(Inst, VecOS);
+  EmitBytes(VecOS.str());
+}
+
+void MCMachOStreamer::Finish() {
+  Assembler.Finish();
+}
+
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, raw_ostream &OS,
+                                      MCCodeEmitter *CE) {
+  return new MCMachOStreamer(Context, OS, CE);
+}
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
new file mode 100644
index 000000000000..3cd22ca6f009
--- /dev/null
+++ b/lib/MC/MCNullStreamer.cpp
@@ -0,0 +1,70 @@
+//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+namespace {
+
+  class MCNullStreamer : public MCStreamer {
+  public:
+    MCNullStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+    /// @name MCStreamer Interface
+    /// @{
+
+    virtual void SwitchSection(const MCSection *Section) {
+      CurSection = Section;
+    }
+
+    virtual void EmitLabel(MCSymbol *Symbol) {}
+
+    virtual void EmitAssemblerFlag(AssemblerFlag Flag) {}
+
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute) {}
+
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size,
+                                  unsigned ByteAlignment) {}
+
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                              unsigned Size = 0, unsigned ByteAlignment = 0) {}
+
+    virtual void EmitBytes(const StringRef &Data) {}
+
+    virtual void EmitValue(const MCExpr *Value, unsigned Size) {}
+
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0) {}
+
+    virtual void EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value = 0) {}
+    
+    virtual void EmitInstruction(const MCInst &Inst) {}
+
+    virtual void Finish() {}
+    
+    /// @}
+  };
+
+}
+    
+MCStreamer *llvm::createNullStreamer(MCContext &Context) {
+  return new MCNullStreamer(Context);
+}
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
new file mode 100644
index 000000000000..333a4710f962
--- /dev/null
+++ b/lib/MC/MCSection.cpp
@@ -0,0 +1,45 @@
+//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MCSection
+//===----------------------------------------------------------------------===//
+
+MCSection::~MCSection() {
+}
+
+//===----------------------------------------------------------------------===//
+// MCSectionCOFF
+//===----------------------------------------------------------------------===//
+
+MCSectionCOFF *MCSectionCOFF::
+Create(const StringRef &Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
+  return new (Ctx) MCSectionCOFF(Name, IsDirective, K);
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                         raw_ostream &OS) const {
+  
+  if (isDirective()) {
+    OS << getName() << '\n';
+    return;
+  }
+  OS << "\t.section\t" << getName() << ",\"";
+  if (getKind().isText())
+    OS << 'x';
+  if (getKind().isWriteable())
+    OS << 'w';
+  OS << "\"\n";
+}
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
new file mode 100644
index 000000000000..660a8c9489f0
--- /dev/null
+++ b/lib/MC/MCSectionELF.cpp
@@ -0,0 +1,138 @@
+//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+using namespace llvm;
+
+MCSectionELF *MCSectionELF::
+Create(const StringRef &Section, unsigned Type, unsigned Flags,
+       SectionKind K, bool isExplicit, MCContext &Ctx) {
+  return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit);
+}
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionELF::ShouldOmitSectionDirective(const char *Name,
+                                        const MCAsmInfo &MAI) const {
+  
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  if (strcmp(Name, ".text") == 0 ||
+      strcmp(Name, ".data") == 0 ||
+      (strcmp(Name, ".bss") == 0 &&
+       !MAI.usesELFSectionDirectiveForBSS())) 
+    return true;
+
+  return false;
+}
+
+// ShouldPrintSectionType - Only prints the section type if supported
+bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const {
+  
+  if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS))
+    return false;
+
+  return true;
+}
+
+void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                        raw_ostream &OS) const {
+   
+  if (ShouldOmitSectionDirective(SectionName.c_str(), MAI)) {
+    OS << '\t' << getSectionName() << '\n';
+    return;
+  }
+
+  OS << "\t.section\t" << getSectionName();
+  
+  // Handle the weird solaris syntax if desired.
+  if (MAI.usesSunStyleELFSectionSwitchSyntax() && 
+      !(Flags & MCSectionELF::SHF_MERGE)) {
+    if (Flags & MCSectionELF::SHF_ALLOC)
+      OS << ",#alloc";
+    if (Flags & MCSectionELF::SHF_EXECINSTR)
+      OS << ",#execinstr";
+    if (Flags & MCSectionELF::SHF_WRITE)
+      OS << ",#write";
+    if (Flags & MCSectionELF::SHF_TLS)
+      OS << ",#tls";
+  } else {
+    OS << ",\"";
+    if (Flags & MCSectionELF::SHF_ALLOC)
+      OS << 'a';
+    if (Flags & MCSectionELF::SHF_EXECINSTR)
+      OS << 'x';
+    if (Flags & MCSectionELF::SHF_WRITE)
+      OS << 'w';
+    if (Flags & MCSectionELF::SHF_MERGE)
+      OS << 'M';
+    if (Flags & MCSectionELF::SHF_STRINGS)
+      OS << 'S';
+    if (Flags & MCSectionELF::SHF_TLS)
+      OS << 'T';
+    
+    // If there are target-specific flags, print them.
+    if (Flags & ~MCSectionELF::TARGET_INDEP_SHF)
+      PrintTargetSpecificSectionFlags(MAI, OS);
+    
+    OS << '"';
+
+    if (ShouldPrintSectionType(Type)) {
+      OS << ',';
+   
+      // If comment string is '@', e.g. as on ARM - use '%' instead
+      if (MAI.getCommentString()[0] == '@')
+        OS << '%';
+      else
+        OS << '@';
+    
+      if (Type == MCSectionELF::SHT_INIT_ARRAY)
+        OS << "init_array";
+      else if (Type == MCSectionELF::SHT_FINI_ARRAY)
+        OS << "fini_array";
+      else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
+        OS << "preinit_array";
+      else if (Type == MCSectionELF::SHT_NOBITS)
+        OS << "nobits";
+      else if (Type == MCSectionELF::SHT_PROGBITS)
+        OS << "progbits";
+    
+      if (getKind().isMergeable1ByteCString()) {
+        OS << ",1";
+      } else if (getKind().isMergeable2ByteCString()) {
+        OS << ",2";
+      } else if (getKind().isMergeable4ByteCString() || 
+                 getKind().isMergeableConst4()) {
+        OS << ",4";
+      } else if (getKind().isMergeableConst8()) {
+        OS << ",8";
+      } else if (getKind().isMergeableConst16()) {
+        OS << ",16";
+      }
+    }
+  }
+  
+  OS << '\n';
+}
+
+// HasCommonSymbols - True if this section holds common symbols, this is
+// indicated on the ELF object file by a symbol with SHN_COMMON section 
+// header index.
+bool MCSectionELF::HasCommonSymbols() const {
+  
+  if (strncmp(SectionName.c_str(), ".gnu.linkonce.", 14) == 0)
+    return true;
+
+  return false;
+}
+
+
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
new file mode 100644
index 000000000000..b3aeb9c1789f
--- /dev/null
+++ b/lib/MC/MCSectionMachO.cpp
@@ -0,0 +1,271 @@
+//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// SectionTypeDescriptors - These are strings that describe the various section
+/// types.  This *must* be kept in order with and stay synchronized with the
+/// section type list.
+static const struct {
+  const char *AssemblerName, *EnumName;
+} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = {
+  { "regular",                  "S_REGULAR" },                    // 0x00
+  { 0,                          "S_ZEROFILL" },                   // 0x01
+  { "cstring_literals",         "S_CSTRING_LITERALS" },           // 0x02
+  { "4byte_literals",           "S_4BYTE_LITERALS" },             // 0x03
+  { "8byte_literals",           "S_8BYTE_LITERALS" },             // 0x04
+  { "literal_pointers",         "S_LITERAL_POINTERS" },           // 0x05
+  { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" },   // 0x06
+  { "lazy_symbol_pointers",     "S_LAZY_SYMBOL_POINTERS" },       // 0x07
+  { "symbol_stubs",             "S_SYMBOL_STUBS" },               // 0x08
+  { "mod_init_funcs",           "S_MOD_INIT_FUNC_POINTERS" },     // 0x09
+  { "mod_term_funcs",           "S_MOD_TERM_FUNC_POINTERS" },     // 0x0A
+  { "coalesced",                "S_COALESCED" },                  // 0x0B
+  { 0, /*FIXME??*/              "S_GB_ZEROFILL" },                // 0x0C
+  { "interposing",              "S_INTERPOSING" },                // 0x0D
+  { "16byte_literals",          "S_16BYTE_LITERALS" },            // 0x0E
+  { 0, /*FIXME??*/              "S_DTRACE_DOF" },                 // 0x0F
+  { 0, /*FIXME??*/              "S_LAZY_DYLIB_SYMBOL_POINTERS" }  // 0x10
+};
+
+
+/// SectionAttrDescriptors - This is an array of descriptors for section
+/// attributes.  Unlike the SectionTypeDescriptors, this is not directly indexed
+/// by attribute, instead it is searched.  The last entry has an AttrFlagEnd
+/// AttrFlag value.
+static const struct {
+  unsigned AttrFlag;
+  const char *AssemblerName, *EnumName;
+} SectionAttrDescriptors[] = {
+#define ENTRY(ASMNAME, ENUM) \
+  { MCSectionMachO::ENUM, ASMNAME, #ENUM },
+ENTRY("pure_instructions",   S_ATTR_PURE_INSTRUCTIONS)
+ENTRY("no_toc",              S_ATTR_NO_TOC)
+ENTRY("strip_static_syms",   S_ATTR_STRIP_STATIC_SYMS)
+ENTRY("no_dead_strip",       S_ATTR_NO_DEAD_STRIP)
+ENTRY("live_support",        S_ATTR_LIVE_SUPPORT)
+ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
+ENTRY("debug",               S_ATTR_DEBUG)
+ENTRY(0 /*FIXME*/,           S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(0 /*FIXME*/,           S_ATTR_EXT_RELOC)
+ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
+#undef ENTRY
+  { 0, "none", 0 }, // used if section has no attributes but has a stub size
+#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set
+  { AttrFlagEnd, 0, 0 }
+};
+
+
+MCSectionMachO *MCSectionMachO::
+Create(const StringRef &Segment, const StringRef &Section,
+       unsigned TypeAndAttributes, unsigned Reserved2,
+       SectionKind K, MCContext &Ctx) {
+  // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero.
+  return new (Ctx) MCSectionMachO(Segment, Section, TypeAndAttributes,
+                                  Reserved2, K);
+}
+
+void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                          raw_ostream &OS) const {
+  OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
+  
+  // Get the section type and attributes.
+  unsigned TAA = getTypeAndAttributes();
+  if (TAA == 0) {
+    OS << '\n';
+    return;
+  }
+
+  OS << ',';
+  
+  unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
+  assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
+         "Invalid SectionType specified!");
+
+  if (SectionTypeDescriptors[SectionType].AssemblerName)
+    OS << SectionTypeDescriptors[SectionType].AssemblerName;
+  else
+    OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>";
+  
+  // If we don't have any attributes, we're done.
+  unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
+  if (SectionAttrs == 0) {
+    // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
+    // the attribute specifier.
+    if (Reserved2 != 0)
+      OS << ",none," << Reserved2;
+    OS << '\n';
+    return;
+  }
+
+  // Check each attribute to see if we have it.
+  char Separator = ',';
+  for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) {
+    // Check to see if we have this attribute.
+    if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
+      continue;
+    
+    // Yep, clear it and print it.
+    SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
+    
+    OS << Separator;
+    if (SectionAttrDescriptors[i].AssemblerName)
+      OS << SectionAttrDescriptors[i].AssemblerName;
+    else
+      OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
+    Separator = '+';
+  }
+  
+  assert(SectionAttrs == 0 && "Unknown section attributes!");
+  
+  // If we have a S_SYMBOL_STUBS size specified, print it.
+  if (Reserved2 != 0)
+    OS << ',' << Reserved2;
+  OS << '\n';
+}
+
+/// StripSpaces - This removes leading and trailing spaces from the StringRef.
+static void StripSpaces(StringRef &Str) {
+  while (!Str.empty() && isspace(Str[0]))
+    Str = Str.substr(1);
+  while (!Str.empty() && isspace(Str.back()))
+    Str = Str.substr(0, Str.size()-1);
+}
+
+/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+/// This is a string that can appear after a .section directive in a mach-o
+/// flavored .s file.  If successful, this fills in the specified Out
+/// parameters and returns an empty string.  When an invalid section
+/// specifier is present, this returns a string indicating the problem.
+std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
+                                                  StringRef &Segment,    // Out.
+                                                  StringRef &Section,    // Out.
+                                                  unsigned  &TAA,        // Out.
+                                                  unsigned  &StubSize) { // Out.
+  // Find the first comma.
+  std::pair<StringRef, StringRef> Comma = Spec.split(',');
+  
+  // If there is no comma, we fail.
+  if (Comma.second.empty())
+    return "mach-o section specifier requires a segment and section "
+           "separated by a comma";
+  
+  // Capture segment, remove leading and trailing whitespace.
+  Segment = Comma.first;
+  StripSpaces(Segment);
+
+  // Verify that the segment is present and not too long.
+  if (Segment.empty() || Segment.size() > 16)
+    return "mach-o section specifier requires a segment whose length is "
+           "between 1 and 16 characters";
+  
+  // Split the section name off from any attributes if present.
+  Comma = Comma.second.split(',');
+
+  // Capture section, remove leading and trailing whitespace.
+  Section = Comma.first;
+  StripSpaces(Section);
+  
+  // Verify that the section is present and not too long.
+  if (Section.empty() || Section.size() > 16)
+    return "mach-o section specifier requires a section whose length is "
+           "between 1 and 16 characters";
+
+  // If there is no comma after the section, we're done.
+  TAA = 0;
+  StubSize = 0;
+  if (Comma.second.empty())
+    return "";
+  
+  // Otherwise, we need to parse the section type and attributes.
+  Comma = Comma.second.split(',');
+  
+  // Get the section type.
+  StringRef SectionType = Comma.first;
+  StripSpaces(SectionType);
+  
+  // Figure out which section type it is.
+  unsigned TypeID;
+  for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
+    if (SectionTypeDescriptors[TypeID].AssemblerName &&
+        SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
+      break;
+  
+  // If we didn't find the section type, reject it.
+  if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
+    return "mach-o section specifier uses an unknown section type";
+  
+  // Remember the TypeID.
+  TAA = TypeID;
+
+  // If we have no comma after the section type, there are no attributes.
+  if (Comma.second.empty()) {
+    // S_SYMBOL_STUBS always require a symbol stub size specifier.
+    if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+      return "mach-o section specifier of type 'symbol_stubs' requires a size "
+             "specifier";
+    return "";
+  }
+
+  // Otherwise, we do have some attributes.  Split off the size specifier if
+  // present.
+  Comma = Comma.second.split(',');
+  StringRef Attrs = Comma.first;
+  
+  // The attribute list is a '+' separated list of attributes.
+  std::pair<StringRef, StringRef> Plus = Attrs.split('+');
+  
+  while (1) {
+    StringRef Attr = Plus.first;
+    StripSpaces(Attr);
+
+    // Look up the attribute.
+    for (unsigned i = 0; ; ++i) {
+      if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
+        return "mach-o section specifier has invalid attribute";
+      
+      if (SectionAttrDescriptors[i].AssemblerName &&
+          Attr == SectionAttrDescriptors[i].AssemblerName) {
+        TAA |= SectionAttrDescriptors[i].AttrFlag;
+        break;
+      }
+    }
+    
+    if (Plus.second.empty()) break;
+    Plus = Plus.second.split('+');
+  };
+
+  // Okay, we've parsed the section attributes, see if we have a stub size spec.
+  if (Comma.second.empty()) {
+    // S_SYMBOL_STUBS always require a symbol stub size specifier.
+    if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+      return "mach-o section specifier of type 'symbol_stubs' requires a size "
+      "specifier";
+    return "";
+  }
+
+  // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS.
+  if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
+    return "mach-o section specifier cannot have a stub size specified because "
+           "it does not have type 'symbol_stubs'";
+  
+  // Okay, if we do, it must be a number.
+  StringRef StubSizeStr = Comma.second;
+  StripSpaces(StubSizeStr);
+  
+  // Convert the stub size from a string to an integer.
+  if (StubSizeStr.getAsInteger(0, StubSize))
+    return "mach-o section specifier has a malformed stub size";
+  
+  return "";
+}
+
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index a634f33ad34a..8a6dcdae7a40 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -11,7 +11,7 @@
 
 using namespace llvm;
 
-MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context) {
+MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context), CurSection(0) {
 }
 
 MCStreamer::~MCStreamer() {
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
new file mode 100644
index 000000000000..86ff3f3bddb1
--- /dev/null
+++ b/lib/MC/MCSymbol.cpp
@@ -0,0 +1,110 @@
+//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Sentinel value for the absolute pseudo section.
+const MCSection *MCSymbol::AbsolutePseudoSection =
+  reinterpret_cast<const MCSection *>(1);
+
+static bool isAcceptableChar(char C) {
+  if ((C < 'a' || C > 'z') &&
+      (C < 'A' || C > 'Z') &&
+      (C < '0' || C > '9') &&
+      C != '_' && C != '$' && C != '.' && C != '@')
+    return false;
+  return true;
+}
+
+static char HexDigit(int V) {
+  return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static void MangleLetter(raw_ostream &OS, unsigned char C) {
+  OS << '_' << HexDigit(C >> 4) << HexDigit(C & 15) << '_';
+}
+
+/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
+/// for this assembler.
+static bool NameNeedsEscaping(const StringRef &Str, const MCAsmInfo &MAI) {
+  assert(!Str.empty() && "Cannot create an empty MCSymbol");
+  
+  // If the first character is a number and the target does not allow this, we
+  // need quotes.
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
+    return true;
+
+  // If any of the characters in the string is an unacceptable character, force
+  // quotes.
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    if (!isAcceptableChar(Str[i]))
+      return true;
+  return false;
+}
+
+static void PrintMangledName(raw_ostream &OS, StringRef Str,
+                             const MCAsmInfo &MAI) {
+  // The first character is not allowed to be a number unless the target
+  // explicitly allows it.
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+    MangleLetter(OS, Str[0]);
+    Str = Str.substr(1);
+  }
+  
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (!isAcceptableChar(Str[i]))
+      MangleLetter(OS, Str[i]);
+    else
+      OS << Str[i];
+  }
+}
+
+/// PrintMangledQuotedName - On systems that support quoted symbols, we still
+/// have to escape some (obscure) characters like " and \n which would break the
+/// assembler's lexing.
+static void PrintMangledQuotedName(raw_ostream &OS, StringRef Str) {
+  OS << '"';
+
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] == '"')
+      OS << "_QQ_";
+    else if (Str[i] == '\n')
+      OS << "_NL_";
+    else
+      OS << Str[i];
+  }
+  OS << '"';
+}
+
+
+void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  if (MAI == 0 || !NameNeedsEscaping(getName(), *MAI)) {
+    OS << getName();
+    return;
+  }
+
+  // On systems that do not allow quoted names, print with mangling.
+  if (!MAI->doesAllowQuotesInName())
+    return PrintMangledName(OS, getName(), *MAI);
+
+  // If the string contains a double quote or newline, we still have to mangle
+  // it.
+  if (getName().find('"') != std::string::npos ||
+      getName().find('\n') != std::string::npos)
+    return PrintMangledQuotedName(OS, getName());
+    
+  OS << '"' << getName() << '"';
+}
+
+void MCSymbol::dump() const {
+  print(errs(), 0);
+}
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
new file mode 100644
index 000000000000..69bd10c8e699
--- /dev/null
+++ b/lib/MC/MCValue.cpp
@@ -0,0 +1,34 @@
+//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  if (isAbsolute()) {
+    OS << getConstant();
+    return;
+  }
+
+  getSymA()->print(OS, MAI);
+
+  if (getSymB()) {
+    OS << " - "; 
+    getSymB()->print(OS, MAI);
+  }
+
+  if (getConstant())
+    OS << " + " << getConstant();
+}
+
+void MCValue::dump() const {
+  print(errs(), 0);
+}
diff --git a/lib/MC/TargetAsmParser.cpp b/lib/MC/TargetAsmParser.cpp
new file mode 100644
index 000000000000..05760c96cc65
--- /dev/null
+++ b/lib/MC/TargetAsmParser.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+TargetAsmParser::TargetAsmParser(const Target &T) 
+  : TheTarget(T)
+{
+}
+
+TargetAsmParser::~TargetAsmParser() {
+}
diff --git a/lib/Makefile b/lib/Makefile
index 1e87d9ebfd10..3807f31c7037 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ LEVEL = ..
 include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Debugger Linker MC CompilerDriver
+                Target ExecutionEngine Linker MC CompilerDriver
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 3b03c54e9764..e431d2790239 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cstring>
 
@@ -122,27 +124,30 @@ assertArithmeticOK(const llvm::fltSemantics &semantics) {
    If the exponent overflows, returns a large exponent with the
    appropriate sign.  */
 static int
-readExponent(const char *p)
+readExponent(StringRef::iterator begin, StringRef::iterator end)
 {
   bool isNegative;
   unsigned int absExponent;
   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
+  StringRef::iterator p = begin;
+
+  assert(p != end && "Exponent has no digits");
 
   isNegative = (*p == '-');
-  if (*p == '-' || *p == '+')
+  if (*p == '-' || *p == '+') {
     p++;
+    assert(p != end && "Exponent has no digits");
+  }
 
   absExponent = decDigitValue(*p++);
-  assert (absExponent < 10U);
+  assert(absExponent < 10U && "Invalid character in exponent");
 
-  for (;;) {
+  for (; p != end; ++p) {
     unsigned int value;
 
     value = decDigitValue(*p);
-    if (value >= 10U)
-      break;
+    assert(value < 10U && "Invalid character in exponent");
 
-    p++;
     value += absExponent * 10;
     if (absExponent >= overlargeExponent) {
       absExponent = overlargeExponent;
@@ -151,6 +156,8 @@ readExponent(const char *p)
     absExponent = value;
   }
 
+  assert(p == end && "Invalid exponent in exponent");
+
   if (isNegative)
     return -(int) absExponent;
   else
@@ -160,28 +167,29 @@ readExponent(const char *p)
 /* This is ugly and needs cleaning up, but I don't immediately see
    how whilst remaining safe.  */
 static int
-totalExponent(const char *p, int exponentAdjustment)
+totalExponent(StringRef::iterator p, StringRef::iterator end,
+              int exponentAdjustment)
 {
   int unsignedExponent;
   bool negative, overflow;
   int exponent;
 
-  /* Move past the exponent letter and sign to the digits.  */
-  p++;
+  assert(p != end && "Exponent has no digits");
+
   negative = *p == '-';
-  if(*p == '-' || *p == '+')
+  if(*p == '-' || *p == '+') {
     p++;
+    assert(p != end && "Exponent has no digits");
+  }
 
   unsignedExponent = 0;
   overflow = false;
-  for(;;) {
+  for(; p != end; ++p) {
     unsigned int value;
 
     value = decDigitValue(*p);
-    if(value >= 10U)
-      break;
+    assert(value < 10U && "Invalid character in exponent");
 
-    p++;
     unsignedExponent = unsignedExponent * 10 + value;
     if(unsignedExponent > 65535)
       overflow = true;
@@ -205,16 +213,21 @@ totalExponent(const char *p, int exponentAdjustment)
   return exponent;
 }
 
-static const char *
-skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
+static StringRef::iterator
+skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
+                           StringRef::iterator *dot)
 {
-  *dot = 0;
-  while(*p == '0')
+  StringRef::iterator p = begin;
+  *dot = end;
+  while(*p == '0' && p != end)
     p++;
 
   if(*p == '.') {
     *dot = p++;
-    while(*p == '0')
+
+    assert(end - begin != 1 && "Significand has no digits");
+
+    while(*p == '0' && p != end)
       p++;
   }
 
@@ -242,41 +255,50 @@ struct decimalInfo {
 };
 
 static void
-interpretDecimal(const char *p, decimalInfo *D)
+interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
+                 decimalInfo *D)
 {
-  const char *dot;
-
-  p = skipLeadingZeroesAndAnyDot (p, &dot);
+  StringRef::iterator dot = end;
+  StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
 
   D->firstSigDigit = p;
   D->exponent = 0;
   D->normalizedExponent = 0;
 
-  for (;;) {
+  for (; p != end; ++p) {
     if (*p == '.') {
-      assert(dot == 0);
+      assert(dot == end && "String contains multiple dots");
       dot = p++;
+      if (p == end)
+        break;
     }
     if (decDigitValue(*p) >= 10U)
       break;
-    p++;
   }
 
-  /* If number is all zerooes accept any exponent.  */
-  if (p != D->firstSigDigit) {
-    if (*p == 'e' || *p == 'E')
-      D->exponent = readExponent(p + 1);
+  if (p != end) {
+    assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
+    assert(p != begin && "Significand has no digits");
+    assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+    /* p points to the first non-digit in the string */
+    D->exponent = readExponent(p + 1, end);
 
     /* Implied decimal point?  */
-    if (!dot)
+    if (dot == end)
       dot = p;
+  }
 
+  /* If number is all zeroes accept any exponent.  */
+  if (p != D->firstSigDigit) {
     /* Drop insignificant trailing zeroes.  */
-    do
+    if (p != begin) {
       do
-        p--;
-      while (*p == '0');
-    while (*p == '.');
+        do
+          p--;
+        while (p != begin && *p == '0');
+      while (p != begin && *p == '.');
+    }
 
     /* Adjust the exponents for any decimal point.  */
     D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
@@ -292,7 +314,8 @@ interpretDecimal(const char *p, decimalInfo *D)
    DIGITVALUE is the first hex digit of the fraction, P points to
    the next digit.  */
 static lostFraction
-trailingHexadecimalFraction(const char *p, unsigned int digitValue)
+trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
+                            unsigned int digitValue)
 {
   unsigned int hexDigit;
 
@@ -307,6 +330,8 @@ trailingHexadecimalFraction(const char *p, unsigned int digitValue)
   while(*p == '0')
     p++;
 
+  assert(p != end && "Invalid trailing hexadecimal fraction!");
+
   hexDigit = hexDigitValue(*p);
 
   /* If we ran off the end it is exactly zero or one-half, otherwise
@@ -667,6 +692,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
   normalize(rmNearestTiesToEven, lfExactlyZero);
 }
 
+APFloat::APFloat(const fltSemantics &ourSemantics) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  category = fcZero;
+  sign = false;
+}
+
+
 APFloat::APFloat(const fltSemantics &ourSemantics,
                  fltCategory ourCategory, bool negative, unsigned type)
 {
@@ -680,7 +713,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
     makeNaN(type);
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
+APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
 {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
@@ -1068,7 +1101,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
 
   switch (rounding_mode) {
   default:
-    assert(0);
+    llvm_unreachable(0);
 
   case rmNearestTiesToAway:
     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
@@ -1207,7 +1240,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
 {
   switch (convolve(category, rhs.category)) {
   default:
-    assert(0);
+    llvm_unreachable(0);
 
   case convolve(fcNaN, fcZero):
   case convolve(fcNaN, fcNormal):
@@ -1331,7 +1364,7 @@ APFloat::multiplySpecials(const APFloat &rhs)
 {
   switch (convolve(category, rhs.category)) {
   default:
-    assert(0);
+    llvm_unreachable(0);
 
   case convolve(fcNaN, fcZero):
   case convolve(fcNaN, fcNormal):
@@ -1373,7 +1406,7 @@ APFloat::divideSpecials(const APFloat &rhs)
 {
   switch (convolve(category, rhs.category)) {
   default:
-    assert(0);
+    llvm_unreachable(0);
 
   case convolve(fcNaN, fcZero):
   case convolve(fcNaN, fcNormal):
@@ -1415,7 +1448,7 @@ APFloat::modSpecials(const APFloat &rhs)
 {
   switch (convolve(category, rhs.category)) {
   default:
-    assert(0);
+    llvm_unreachable(0);
 
   case convolve(fcNaN, fcZero):
   case convolve(fcNaN, fcNormal):
@@ -1692,7 +1725,7 @@ APFloat::compare(const APFloat &rhs) const
 
   switch (convolve(category, rhs.category)) {
   default:
-    assert(0);
+    llvm_unreachable(0);
 
   case convolve(fcNaN, fcZero):
   case convolve(fcNaN, fcNormal):
@@ -2106,13 +2139,13 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
 }
 
 APFloat::opStatus
-APFloat::convertFromHexadecimalString(const char *p,
+APFloat::convertFromHexadecimalString(const StringRef &s,
                                       roundingMode rounding_mode)
 {
-  lostFraction lost_fraction;
+  lostFraction lost_fraction = lfExactlyZero;
   integerPart *significand;
   unsigned int bitPos, partsCount;
-  const char *dot, *firstSignificantDigit;
+  StringRef::iterator dot, firstSignificantDigit;
 
   zeroSignificand();
   exponent = 0;
@@ -2123,47 +2156,58 @@ APFloat::convertFromHexadecimalString(const char *p,
   bitPos = partsCount * integerPartWidth;
 
   /* Skip leading zeroes and any (hexa)decimal point.  */
-  p = skipLeadingZeroesAndAnyDot(p, &dot);
+  StringRef::iterator begin = s.begin();
+  StringRef::iterator end = s.end();
+  StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
   firstSignificantDigit = p;
 
-  for(;;) {
+  for(; p != end;) {
     integerPart hex_value;
 
     if(*p == '.') {
-      assert(dot == 0);
+      assert(dot == end && "String contains multiple dots");
       dot = p++;
+      if (p == end) {
+        break;
+      }
     }
 
     hex_value = hexDigitValue(*p);
     if(hex_value == -1U) {
-      lost_fraction = lfExactlyZero;
       break;
     }
 
     p++;
 
-    /* Store the number whilst 4-bit nibbles remain.  */
-    if(bitPos) {
-      bitPos -= 4;
-      hex_value <<= bitPos % integerPartWidth;
-      significand[bitPos / integerPartWidth] |= hex_value;
-    } else {
-      lost_fraction = trailingHexadecimalFraction(p, hex_value);
-      while(hexDigitValue(*p) != -1U)
-        p++;
+    if (p == end) {
       break;
+    } else {
+      /* Store the number whilst 4-bit nibbles remain.  */
+      if(bitPos) {
+        bitPos -= 4;
+        hex_value <<= bitPos % integerPartWidth;
+        significand[bitPos / integerPartWidth] |= hex_value;
+      } else {
+        lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
+        while(p != end && hexDigitValue(*p) != -1U)
+          p++;
+        break;
+      }
     }
   }
 
   /* Hex floats require an exponent but not a hexadecimal point.  */
-  assert(*p == 'p' || *p == 'P');
+  assert(p != end && "Hex strings require an exponent");
+  assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
+  assert(p != begin && "Significand has no digits");
+  assert((dot == end || p - begin != 1) && "Significand has no digits");
 
   /* Ignore the exponent if we are zero.  */
   if(p != firstSignificantDigit) {
     int expAdjustment;
 
     /* Implicit hexadecimal point?  */
-    if(!dot)
+    if (dot == end)
       dot = p;
 
     /* Calculate the exponent adjustment implicit in the number of
@@ -2179,7 +2223,7 @@ APFloat::convertFromHexadecimalString(const char *p,
     expAdjustment -= partsCount * integerPartWidth;
 
     /* Adjust for the given exponent.  */
-    exponent = totalExponent(p, expAdjustment);
+    exponent = totalExponent(p + 1, end, expAdjustment);
   }
 
   return normalize(rounding_mode, lost_fraction);
@@ -2271,13 +2315,14 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
 }
 
 APFloat::opStatus
-APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
+APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
 {
   decimalInfo D;
   opStatus fs;
 
   /* Scan the text.  */
-  interpretDecimal(p, &D);
+  StringRef::iterator p = str.begin();
+  interpretDecimal(p, str.end(), &D);
 
   /* Handle the quick cases.  First the case of no significant digits,
      i.e. zero, and then exponents that are obviously too large or too
@@ -2332,10 +2377,14 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
       multiplier = 1;
 
       do {
-        if (*p == '.')
+        if (*p == '.') {
           p++;
-
+          if (p == str.end()) {
+            break;
+          }
+        }
         decValue = decDigitValue(*p++);
+        assert(decValue < 10U && "Invalid character in significand");
         multiplier *= 10;
         val = val * 10 + decValue;
         /* The maximum number that can be multiplied by ten with any
@@ -2363,20 +2412,28 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
 }
 
 APFloat::opStatus
-APFloat::convertFromString(const char *p, roundingMode rounding_mode)
+APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
 {
   assertArithmeticOK(*semantics);
+  assert(!str.empty() && "Invalid string length");
 
   /* Handle a leading minus sign.  */
-  if(*p == '-')
-    sign = 1, p++;
-  else
-    sign = 0;
+  StringRef::iterator p = str.begin();
+  size_t slen = str.size();
+  sign = *p == '-' ? 1 : 0;
+  if(*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String has no digits");
+  }
 
-  if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
-    return convertFromHexadecimalString(p + 2, rounding_mode);
+  if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+    assert(slen - 2 && "Invalid string");
+    return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
+                                        rounding_mode);
+  }
 
-  return convertFromDecimalString(p, rounding_mode);
+  return convertFromDecimalString(StringRef(p, slen), rounding_mode);
 }
 
 /* Write out a hexadecimal representation of the floating point value
@@ -2661,6 +2718,42 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
 }
 
 APInt
+APFloat::convertQuadrupleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
+  assert (partCount()==2);
+
+  uint64_t myexponent, mysignificand, mysignificand2;
+
+  if (category==fcNormal) {
+    myexponent = exponent+16383; //bias
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+    if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = mysignificand2 = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7fff;
+    mysignificand = mysignificand2 = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x7fff;
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+  }
+
+  uint64_t words[2];
+  words[0] = mysignificand;
+  words[1] = ((uint64_t)(sign & 1) << 63) |
+             ((myexponent & 0x7fff) << 48) |
+             (mysignificand2 & 0xffffffffffffLL);
+
+  return APInt(128, 2, words);
+}
+
+APInt
 APFloat::convertDoubleAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
@@ -2728,10 +2821,13 @@ APFloat::bitcastToAPInt() const
 {
   if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
     return convertFloatAPFloatToAPInt();
-  
+
   if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
     return convertDoubleAPFloatToAPInt();
 
+  if (semantics == (const llvm::fltSemantics*)&IEEEquad)
+    return convertQuadrupleAPFloatToAPInt();
+
   if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
     return convertPPCDoubleDoubleAPFloatToAPInt();
 
@@ -2743,7 +2839,8 @@ APFloat::bitcastToAPInt() const
 float
 APFloat::convertToFloat() const
 {
-  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
+         "Float semantics are not IEEEsingle");
   APInt api = bitcastToAPInt();
   return api.bitsToFloat();
 }
@@ -2751,7 +2848,8 @@ APFloat::convertToFloat() const
 double
 APFloat::convertToDouble() const
 {
-  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
+         "Float semantics are not IEEEdouble");
   APInt api = bitcastToAPInt();
   return api.bitsToDouble();
 }
@@ -2848,6 +2946,46 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
 }
 
 void
+APFloat::initFromQuadrupleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==128);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i2 >> 48) & 0x7fff;
+  uint64_t mysignificand  = i1;
+  uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+
+  initialize(&APFloat::IEEEquad);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i2>>63);
+  if (myexponent==0 &&
+      (mysignificand==0 && mysignificand2==0)) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7fff &&
+             (mysignificand==0 && mysignificand2==0)) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7fff &&
+             (mysignificand!=0 || mysignificand2 !=0)) {
+    // exponent meaningless
+    category = fcNaN;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 16383;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+    if (myexponent==0)          // denormal
+      exponent = -16382;
+    else
+      significandParts()[1] |= 0x1000000000000LL;  // integer bit
+  }
+}
+
+void
 APFloat::initFromDoubleAPInt(const APInt &api)
 {
   assert(api.getBitWidth()==64);
@@ -2926,10 +3064,11 @@ APFloat::initFromAPInt(const APInt& api, bool isIEEE)
     return initFromDoubleAPInt(api);
   else if (api.getBitWidth()==80)
     return initFromF80LongDoubleAPInt(api);
-  else if (api.getBitWidth()==128 && !isIEEE)
-    return initFromPPCDoubleDoubleAPInt(api);
+  else if (api.getBitWidth()==128)
+    return (isIEEE ?
+            initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
   else
-    assert(0);
+    llvm_unreachable(0);
 }
 
 APFloat::APFloat(const APInt& api, bool isIEEE)
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 30dc3526abd4..56d47736eaba 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -14,9 +14,11 @@
 
 #define DEBUG_TYPE "apint"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cmath>
@@ -34,7 +36,7 @@ inline static uint64_t* getClearedMemory(unsigned numWords) {
   return result;
 }
 
-/// A utility function for allocating memory and checking for allocation 
+/// A utility function for allocating memory and checking for allocation
 /// failure.  The content is not zeroed.
 inline static uint64_t* getMemory(unsigned numWords) {
   uint64_t * result = new uint64_t[numWords];
@@ -42,10 +44,36 @@ inline static uint64_t* getMemory(unsigned numWords) {
   return result;
 }
 
+/// A utility function that converts a character to a digit.
+inline static unsigned getDigit(char cdigit, uint8_t radix) {
+  unsigned r;
+
+  if (radix == 16) {
+    r = cdigit - '0';
+    if (r <= 9)
+      return r;
+
+    r = cdigit - 'A';
+    if (r <= 5)
+      return r + 10;
+
+    r = cdigit - 'a';
+    if (r <= 5)
+      return r + 10;
+  }
+
+  r = cdigit - '0';
+  if (r < radix)
+    return r;
+
+  return -1U;
+}
+
+
 void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
   pVal = getClearedMemory(getNumWords());
   pVal[0] = val;
-  if (isSigned && int64_t(val) < 0) 
+  if (isSigned && int64_t(val) < 0)
     for (unsigned i = 1; i < getNumWords(); ++i)
       pVal[i] = -1ULL;
 }
@@ -58,7 +86,7 @@ void APInt::initSlowCase(const APInt& that) {
 
 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
   : BitWidth(numBits), VAL(0) {
-  assert(BitWidth && "bitwidth too small");
+  assert(BitWidth && "Bitwidth too small");
   assert(bigVal && "Null pointer detected!");
   if (isSingleWord())
     VAL = bigVal[0];
@@ -74,11 +102,10 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
   clearUnusedBits();
 }
 
-APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen,
-             uint8_t radix) 
+APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
   : BitWidth(numbits), VAL(0) {
-  assert(BitWidth && "bitwidth too small");
-  fromString(numbits, StrStart, slen, radix);
+  assert(BitWidth && "Bitwidth too small");
+  fromString(numbits, Str, radix);
 }
 
 APInt& APInt::AssignSlowCase(const APInt& RHS) {
@@ -99,7 +126,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) {
     VAL = 0;
     pVal = getMemory(RHS.getNumWords());
     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
-  } else if (getNumWords() == RHS.getNumWords()) 
+  } else if (getNumWords() == RHS.getNumWords())
     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
   else if (RHS.isSingleWord()) {
     delete [] pVal;
@@ -114,7 +141,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) {
 }
 
 APInt& APInt::operator=(uint64_t RHS) {
-  if (isSingleWord()) 
+  if (isSingleWord())
     VAL = RHS;
   else {
     pVal[0] = RHS;
@@ -126,7 +153,7 @@ APInt& APInt::operator=(uint64_t RHS) {
 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 void APInt::Profile(FoldingSetNodeID& ID) const {
   ID.AddInteger(BitWidth);
-  
+
   if (isSingleWord()) {
     ID.AddInteger(VAL);
     return;
@@ -137,7 +164,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
     ID.AddInteger(pVal[i]);
 }
 
-/// add_1 - This function adds a single "digit" integer, y, to the multiple 
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 /// @returns the carry of the addition.
@@ -156,15 +183,15 @@ static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 
 /// @brief Prefix increment operator. Increments the APInt by one.
 APInt& APInt::operator++() {
-  if (isSingleWord()) 
+  if (isSingleWord())
     ++VAL;
   else
     add_1(pVal, pVal, getNumWords(), 1);
   return clearUnusedBits();
 }
 
-/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from 
-/// the multi-digit integer array, x[], propagating the borrowed 1 value until 
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 /// In other words, if y > x then this function returns 1, otherwise 0.
@@ -173,7 +200,7 @@ static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
   for (unsigned i = 0; i < len; ++i) {
     uint64_t X = x[i];
     x[i] -= y;
-    if (y > X) 
+    if (y > X)
       y = 1;  // We have to "borrow 1" from next "digit"
     else {
       y = 0;  // No need to borrow
@@ -185,7 +212,7 @@ static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
 
 /// @brief Prefix decrement operator. Decrements the APInt by one.
 APInt& APInt::operator--() {
-  if (isSingleWord()) 
+  if (isSingleWord())
     --VAL;
   else
     sub_1(pVal, getNumWords(), 1);
@@ -193,10 +220,10 @@ APInt& APInt::operator--() {
 }
 
 /// add - This function adds the integer array x to the integer array Y and
-/// places the result in dest. 
+/// places the result in dest.
 /// @returns the carry out from the addition
 /// @brief General addition of 64-bit integer arrays
-static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, 
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
                 unsigned len) {
   bool carry = false;
   for (unsigned i = 0; i< len; ++i) {
@@ -209,10 +236,10 @@ static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 
 /// Adds the RHS APint to this APInt.
 /// @returns this, after addition of RHS.
-/// @brief Addition assignment operator. 
+/// @brief Addition assignment operator.
 APInt& APInt::operator+=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-  if (isSingleWord()) 
+  if (isSingleWord())
     VAL += RHS.VAL;
   else {
     add(pVal, pVal, RHS.pVal, getNumWords());
@@ -220,10 +247,10 @@ APInt& APInt::operator+=(const APInt& RHS) {
   return clearUnusedBits();
 }
 
-/// Subtracts the integer array y from the integer array x 
+/// Subtracts the integer array y from the integer array x
 /// @returns returns the borrow out.
 /// @brief Generalized subtraction of 64-bit integer arrays.
-static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, 
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
                 unsigned len) {
   bool borrow = false;
   for (unsigned i = 0; i < len; ++i) {
@@ -236,10 +263,10 @@ static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 
 /// Subtracts the RHS APInt from this APInt
 /// @returns this, after subtraction
-/// @brief Subtraction assignment operator. 
+/// @brief Subtraction assignment operator.
 APInt& APInt::operator-=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-  if (isSingleWord()) 
+  if (isSingleWord())
     VAL -= RHS.VAL;
   else
     sub(pVal, pVal, RHS.pVal, getNumWords());
@@ -247,7 +274,7 @@ APInt& APInt::operator-=(const APInt& RHS) {
 }
 
 /// Multiplies an integer array, x by a a uint64_t integer and places the result
-/// into dest. 
+/// into dest.
 /// @returns the carry out of the multiplication.
 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
@@ -269,19 +296,19 @@ static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
     // Determine if the add above introduces carry.
     hasCarry = (dest[i] < carry) ? 1 : 0;
     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
-    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + 
+    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
     // (2^32 - 1) + 2^32 = 2^64.
     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 
     carry += (lx * hy) & 0xffffffffULL;
     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
-    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + 
+    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
   }
   return carry;
 }
 
-/// Multiplies integer array x by integer array y and stores the result into 
+/// Multiplies integer array x by integer array y and stores the result into
 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 /// @brief Generalized multiplicate of integer arrays.
 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
@@ -307,7 +334,7 @@ static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
       resul = (carry << 32) | (resul & 0xffffffffULL);
       dest[i+j] += resul;
       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
-              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + 
+              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
               ((lx * hy) >> 32) + hx * hy;
     }
     dest[i+xlen] = carry;
@@ -325,7 +352,7 @@ APInt& APInt::operator*=(const APInt& RHS) {
   // Get some bit facts about LHS and check for zero
   unsigned lhsBits = getActiveBits();
   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
-  if (!lhsWords) 
+  if (!lhsWords)
     // 0 * X ===> 0
     return *this;
 
@@ -385,7 +412,7 @@ APInt& APInt::operator^=(const APInt& RHS) {
     VAL ^= RHS.VAL;
     this->clearUnusedBits();
     return *this;
-  } 
+  }
   unsigned numWords = getNumWords();
   for (unsigned i = 0; i < numWords; ++i)
     pVal[i] ^= RHS.pVal[i];
@@ -423,7 +450,7 @@ bool APInt::operator !() const {
     return !VAL;
 
   for (unsigned i = 0; i < getNumWords(); ++i)
-    if (pVal[i]) 
+    if (pVal[i])
       return false;
   return true;
 }
@@ -456,7 +483,7 @@ APInt APInt::operator-(const APInt& RHS) const {
 }
 
 bool APInt::operator[](unsigned bitPosition) const {
-  return (maskBit(bitPosition) & 
+  return (maskBit(bitPosition) &
           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 }
 
@@ -466,7 +493,7 @@ bool APInt::EqualSlowCase(const APInt& RHS) const {
   unsigned n2 = RHS.getActiveBits();
 
   // If the number of bits isn't the same, they aren't equal
-  if (n1 != n2) 
+  if (n1 != n2)
     return false;
 
   // If the number of bits fits in a word, we only need to compare the low word.
@@ -475,7 +502,7 @@ bool APInt::EqualSlowCase(const APInt& RHS) const {
 
   // Otherwise, compare everything
   for (int i = whichWord(n1 - 1); i >= 0; --i)
-    if (pVal[i] != RHS.pVal[i]) 
+    if (pVal[i] != RHS.pVal[i])
       return false;
   return true;
 }
@@ -512,9 +539,9 @@ bool APInt::ult(const APInt& RHS) const {
   // Otherwise, compare all words
   unsigned topWord = whichWord(std::max(n1,n2)-1);
   for (int i = topWord; i >= 0; --i) {
-    if (pVal[i] > RHS.pVal[i]) 
+    if (pVal[i] > RHS.pVal[i])
       return false;
-    if (pVal[i] < RHS.pVal[i]) 
+    if (pVal[i] < RHS.pVal[i])
       return true;
   }
   return false;
@@ -552,14 +579,14 @@ bool APInt::slt(const APInt& RHS) const {
       return true;
   else if (rhsNeg)
     return false;
-  else 
+  else
     return lhs.ult(rhs);
 }
 
 APInt& APInt::set(unsigned bitPosition) {
-  if (isSingleWord()) 
+  if (isSingleWord())
     VAL |= maskBit(bitPosition);
-  else 
+  else
     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
   return *this;
 }
@@ -567,16 +594,16 @@ APInt& APInt::set(unsigned bitPosition) {
 /// Set the given bit to 0 whose position is given as "bitPosition".
 /// @brief Set a given bit to 0.
 APInt& APInt::clear(unsigned bitPosition) {
-  if (isSingleWord()) 
+  if (isSingleWord())
     VAL &= ~maskBit(bitPosition);
-  else 
+  else
     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
   return *this;
 }
 
 /// @brief Toggle every bit to its opposite value.
 
-/// Toggle a given bit to its opposite value whose position is given 
+/// Toggle a given bit to its opposite value whose position is given
 /// as "bitPosition".
 /// @brief Toggles a given bit to its opposite value.
 APInt& APInt::flip(unsigned bitPosition) {
@@ -586,16 +613,22 @@ APInt& APInt::flip(unsigned bitPosition) {
   return *this;
 }
 
-unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
-  assert(str != 0 && "Invalid value string");
-  assert(slen > 0 && "Invalid string length");
+unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
+  assert(!str.empty() && "Invalid string length");
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  size_t slen = str.size();
 
-  // Each computation below needs to know if its negative
-  unsigned isNegative = str[0] == '-';
-  if (isNegative) {
+  // Each computation below needs to know if it's negative.
+  StringRef::iterator p = str.begin();
+  unsigned isNegative = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
     slen--;
-    str++;
+    assert(slen && "String is only a sign, needs a value.");
   }
+
   // For radixes of power-of-two values, the bits required is accurately and
   // easily computed
   if (radix == 2)
@@ -605,22 +638,27 @@ unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
   if (radix == 16)
     return slen * 4 + isNegative;
 
-  // Otherwise it must be radix == 10, the hard case
-  assert(radix == 10 && "Invalid radix");
-
   // This is grossly inefficient but accurate. We could probably do something
   // with a computation of roughly slen*64/20 and then adjust by the value of
   // the first few digits. But, I'm not sure how accurate that could be.
 
   // Compute a sufficient number of bits that is always large enough but might
-  // be too large. This avoids the assertion in the constructor.
-  unsigned sufficient = slen*64/18;
+  // be too large. This avoids the assertion in the constructor. This
+  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+  // bits in that case.
+  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
 
   // Convert to the actual binary value.
-  APInt tmp(sufficient, str, slen, radix);
+  APInt tmp(sufficient, StringRef(p, slen), radix);
 
-  // Compute how many bits are required.
-  return isNegative + tmp.logBase2() + 1;
+  // Compute how many bits are required. If the log is infinite, assume we need
+  // just bit.
+  unsigned log = tmp.logBase2();
+  if (log == (unsigned)-1) {
+    return isNegative + 1;
+  } else {
+    return isNegative + log + 1;
+  }
 }
 
 // From http://www.burtleburtle.net, byBob Jenkins.
@@ -720,7 +758,7 @@ APInt APInt::getHiBits(unsigned numBits) const {
 
 /// LoBits - This function returns the low "numBits" bits of this APInt.
 APInt APInt::getLoBits(unsigned numBits) const {
-  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), 
+  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
                         BitWidth - numBits);
 }
 
@@ -837,7 +875,7 @@ APInt APInt::byteSwap() const {
   }
 }
 
-APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, 
+APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
                                             const APInt& API2) {
   APInt A = API1, B = API2;
   while (!!B) {
@@ -870,7 +908,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 
   // If the exponent doesn't shift all bits out of the mantissa
   if (exp < 52)
-    return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 
+    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
                     APInt(width, mantissa >> (52 - exp));
 
   // If the client didn't provide enough bits for us to shift the mantissa into
@@ -884,22 +922,23 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
   return isNeg ? -Tmp : Tmp;
 }
 
-/// RoundToDouble - This function convert this APInt to a double.
+/// RoundToDouble - This function converts this APInt to a double.
 /// The layout for double is as following (IEEE Standard 754):
 ///  --------------------------------------
 /// |  Sign    Exponent    Fraction    Bias |
 /// |-------------------------------------- |
 /// |  1[63]   11[62-52]   52[51-00]   1023 |
-///  -------------------------------------- 
+///  --------------------------------------
 double APInt::roundToDouble(bool isSigned) const {
 
   // Handle the simple case where the value is contained in one uint64_t.
+  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
     if (isSigned) {
-      int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
       return double(sext);
     } else
-      return double(VAL);
+      return double(getWord(0));
   }
 
   // Determine if the value is negative.
@@ -920,7 +959,7 @@ double APInt::roundToDouble(bool isSigned) const {
   if (exp > 1023) {
     if (!isSigned || !isNeg)
       return std::numeric_limits<double>::infinity();
-    else 
+    else
       return -std::numeric_limits<double>::infinity();
   }
   exp += 1023; // Increment for 1023 bias
@@ -1030,7 +1069,7 @@ APInt &APInt::zext(unsigned width) {
     uint64_t *newVal = getClearedMemory(wordsAfter);
     if (wordsBefore == 1)
       newVal[0] = VAL;
-    else 
+    else
       for (unsigned i = 0; i < wordsBefore; ++i)
         newVal[i] = pVal[i];
     if (wordsBefore != 1)
@@ -1076,7 +1115,7 @@ APInt APInt::ashr(unsigned shiftAmt) const {
       return APInt(BitWidth, 0); // undefined
     else {
       unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
-      return APInt(BitWidth, 
+      return APInt(BitWidth,
         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
     }
   }
@@ -1113,11 +1152,11 @@ APInt APInt::ashr(unsigned shiftAmt) const {
       if (bitsInWord < APINT_BITS_PER_WORD)
         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
   } else {
-    // Shift the low order words 
+    // Shift the low order words
     for (unsigned i = 0; i < breakWord; ++i) {
       // This combines the shifted corresponding word with the low bits from
       // the next word (shifted into this word's high bits).
-      val[i] = (pVal[i+offset] >> wordShift) | 
+      val[i] = (pVal[i+offset] >> wordShift) |
                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
     }
 
@@ -1130,10 +1169,10 @@ APInt APInt::ashr(unsigned shiftAmt) const {
     if (isNegative()) {
       if (wordShift > bitsInWord) {
         if (breakWord > 0)
-          val[breakWord-1] |= 
+          val[breakWord-1] |=
             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
         val[breakWord] |= ~0ULL;
-      } else 
+      } else
         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
     }
   }
@@ -1157,7 +1196,7 @@ APInt APInt::lshr(unsigned shiftAmt) const {
   if (isSingleWord()) {
     if (shiftAmt == BitWidth)
       return APInt(BitWidth, 0);
-    else 
+    else
       return APInt(BitWidth, this->VAL >> shiftAmt);
   }
 
@@ -1168,7 +1207,7 @@ APInt APInt::lshr(unsigned shiftAmt) const {
     return APInt(BitWidth, 0);
 
   // If none of the bits are shifted out, the result is *this. This avoids
-  // issues with shifting by the size of the integer type, which produces 
+  // issues with shifting by the size of the integer type, which produces
   // undefined results in the code below. This is also an optimization.
   if (shiftAmt == 0)
     return *this;
@@ -1199,7 +1238,7 @@ APInt APInt::lshr(unsigned shiftAmt) const {
     return APInt(val,BitWidth).clearUnusedBits();
   }
 
-  // Shift the low order words 
+  // Shift the low order words
   unsigned breakWord = getNumWords() - offset -1;
   for (unsigned i = 0; i < breakWord; ++i)
     val[i] = (pVal[i+offset] >> wordShift) |
@@ -1306,7 +1345,7 @@ APInt APInt::rotr(unsigned rotateAmt) const {
 // values using less than 52 bits, the value is converted to double and then
 // the libc sqrt function is called. The result is rounded and then converted
 // back to a uint64_t which is then used to construct the result. Finally,
-// the Babylonian method for computing square roots is used. 
+// the Babylonian method for computing square roots is used.
 APInt APInt::sqrt() const {
 
   // Determine the magnitude of the value.
@@ -1318,7 +1357,7 @@ APInt APInt::sqrt() const {
     static const uint8_t results[32] = {
       /*     0 */ 0,
       /*  1- 2 */ 1, 1,
-      /*  3- 6 */ 2, 2, 2, 2, 
+      /*  3- 6 */ 2, 2, 2, 2,
       /*  7-12 */ 3, 3, 3, 3, 3, 3,
       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -1334,10 +1373,10 @@ APInt APInt::sqrt() const {
   if (magnitude < 52) {
 #ifdef _MSC_VER
     // Amazingly, VC++ doesn't have round().
-    return APInt(BitWidth, 
+    return APInt(BitWidth,
                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
 #else
-    return APInt(BitWidth, 
+    return APInt(BitWidth,
                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
 #endif
   }
@@ -1346,7 +1385,7 @@ APInt APInt::sqrt() const {
   // is a classical Babylonian method for computing the square root. This code
   // was adapted to APINt from a wikipedia article on such computations.
   // See http://www.wikipedia.org/ and go to the page named
-  // Calculate_an_integer_square_root. 
+  // Calculate_an_integer_square_root.
   unsigned nbits = BitWidth, i = 4;
   APInt testy(BitWidth, 16);
   APInt x_old(BitWidth, 1);
@@ -1354,13 +1393,13 @@ APInt APInt::sqrt() const {
   APInt two(BitWidth, 2);
 
   // Select a good starting value using binary logarithms.
-  for (;; i += 2, testy = testy.shl(2)) 
+  for (;; i += 2, testy = testy.shl(2))
     if (i >= nbits || this->ule(testy)) {
       x_old = x_old.shl(i / 2);
       break;
     }
 
-  // Use the Babylonian method to arrive at the integer square root: 
+  // Use the Babylonian method to arrive at the integer square root:
   for (;;) {
     x_new = (this->udiv(x_old) + x_old).udiv(two);
     if (x_old.ule(x_new))
@@ -1369,9 +1408,9 @@ APInt APInt::sqrt() const {
   }
 
   // Make sure we return the closest approximation
-  // NOTE: The rounding calculation below is correct. It will produce an 
+  // NOTE: The rounding calculation below is correct. It will produce an
   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
-  // determined to be a rounding issue with pari/gp as it begins to use a 
+  // determined to be a rounding issue with pari/gp as it begins to use a
   // floating point representation after 192 bits. There are no discrepancies
   // between this algorithm and pari/gp for bit widths < 192 bits.
   APInt square(x_old * x_old);
@@ -1386,7 +1425,7 @@ APInt APInt::sqrt() const {
     else
       return x_old + 1;
   } else
-    assert(0 && "Error in APInt::sqrt computation");
+    llvm_unreachable("Error in APInt::sqrt computation");
   return x_old + 1;
 }
 
@@ -1409,7 +1448,7 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const {
   APInt r[2] = { modulo, *this };
   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
   APInt q(BitWidth, 0);
-  
+
   unsigned i;
   for (i = 0; r[i^1] != 0; i ^= 1) {
     // An overview of the math without the confusing bit-flipping:
@@ -1442,11 +1481,9 @@ APInt::ms APInt::magic() const {
   const APInt& d = *this;
   unsigned p;
   APInt ad, anc, delta, q1, r1, q2, r2, t;
-  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
-  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
   struct ms mag;
-  
+
   ad = d.abs();
   t = signedMin + (d.lshr(d.getBitWidth() - 1));
   anc = t - 1 - t.urem(ad);   // absolute value of nc
@@ -1471,7 +1508,7 @@ APInt::ms APInt::magic() const {
     }
     delta = ad - r2;
   } while (q1.ule(delta) || (q1 == delta && r1 == 0));
-  
+
   mag.m = q2 + 1;
   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
   mag.s = p - d.getBitWidth();          // resulting shift
@@ -1543,17 +1580,17 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
   uint64_t b = uint64_t(1) << 32;
 
 #if 0
-  DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
-  DEBUG(cerr << "KnuthDiv: original:");
-  DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
-  DEBUG(cerr << " by");
-  DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
-  DEBUG(cerr << '\n');
+  DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+  DEBUG(errs() << "KnuthDiv: original:");
+  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
+  DEBUG(errs() << " by");
+  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
+  DEBUG(errs() << '\n');
 #endif
-  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 
-  // u and v by d. Note that we have taken Knuth's advice here to use a power 
-  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 
-  // 2 allows us to shift instead of multiply and it is easy to determine the 
+  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+  // u and v by d. Note that we have taken Knuth's advice here to use a power
+  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+  // 2 allows us to shift instead of multiply and it is easy to determine the
   // shift amount from the leading zeros.  We are basically normalizing the u
   // and v so that its high bits are shifted to the top of v's range without
   // overflow. Note that this can require an extra word in u so that u must
@@ -1575,27 +1612,27 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
   }
   u[m+n] = u_carry;
 #if 0
-  DEBUG(cerr << "KnuthDiv:   normal:");
-  DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
-  DEBUG(cerr << " by");
-  DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
-  DEBUG(cerr << '\n');
+  DEBUG(errs() << "KnuthDiv:   normal:");
+  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
+  DEBUG(errs() << " by");
+  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
+  DEBUG(errs() << '\n');
 #endif
 
   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
   int j = m;
   do {
-    DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
-    // D3. [Calculate q'.]. 
+    DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n');
+    // D3. [Calculate q'.].
     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
     // on v[n-2] determines at high speed most of the cases in which the trial
-    // value qp is one too large, and it eliminates all cases where qp is two 
-    // too large. 
+    // value qp is one too large, and it eliminates all cases where qp is two
+    // too large.
     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
-    DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
+    DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n');
     uint64_t qp = dividend / v[n-1];
     uint64_t rp = dividend % v[n-1];
     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
@@ -1604,20 +1641,20 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
         qp--;
     }
-    DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+    DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
 
     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
     // consists of a simple multiplication by a one-place number, combined with
-    // a subtraction. 
+    // a subtraction.
     bool isNeg = false;
     for (unsigned i = 0; i < n; ++i) {
       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
       bool borrow = subtrahend > u_tmp;
-      DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp 
-                 << ", subtrahend == " << subtrahend
-                 << ", borrow = " << borrow << '\n');
+      DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp
+                   << ", subtrahend == " << subtrahend
+                   << ", borrow = " << borrow << '\n');
 
       uint64_t result = u_tmp - subtrahend;
       unsigned k = j + i;
@@ -1629,14 +1666,14 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
         k++;
       }
       isNeg |= borrow;
-      DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " << 
-                    u[j+i+1] << '\n'); 
+      DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
+                    u[j+i+1] << '\n');
     }
-    DEBUG(cerr << "KnuthDiv: after subtraction:");
-    DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
-    DEBUG(cerr << '\n');
-    // The digits (u[j+n]...u[j]) should be kept positive; if the result of 
-    // this step is actually negative, (u[j+n]...u[j]) should be left as the 
+    DEBUG(errs() << "KnuthDiv: after subtraction:");
+    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
+    DEBUG(errs() << '\n');
+    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+    // this step is actually negative, (u[j+n]...u[j]) should be left as the
     // true value plus b**(n+1), namely as the b's complement of
     // the true value, and a "borrow" to the left should be remembered.
     //
@@ -1647,20 +1684,20 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
         carry = carry && u[i] == 0;
       }
     }
-    DEBUG(cerr << "KnuthDiv: after complement:");
-    DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
-    DEBUG(cerr << '\n');
+    DEBUG(errs() << "KnuthDiv: after complement:");
+    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
+    DEBUG(errs() << '\n');
 
-    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 
+    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
     // negative, go to step D6; otherwise go on to step D7.
     q[j] = (unsigned)qp;
     if (isNeg) {
-      // D6. [Add back]. The probability that this step is necessary is very 
+      // D6. [Add back]. The probability that this step is necessary is very
       // small, on the order of only 2/b. Make sure that test data accounts for
-      // this possibility. Decrease q[j] by 1 
+      // this possibility. Decrease q[j] by 1
       q[j]--;
-      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 
-      // A carry will occur to the left of u[j+n], and it should be ignored 
+      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+      // A carry will occur to the left of u[j+n], and it should be ignored
       // since it cancels with the borrow that occurred in D4.
       bool carry = false;
       for (unsigned i = 0; i < n; i++) {
@@ -1670,16 +1707,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
       }
       u[j+n] += carry;
     }
-    DEBUG(cerr << "KnuthDiv: after correction:");
-    DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
-    DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
+    DEBUG(errs() << "KnuthDiv: after correction:");
+    DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]);
+    DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
 
   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
   } while (--j >= 0);
 
-  DEBUG(cerr << "KnuthDiv: quotient:");
-  DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
-  DEBUG(cerr << '\n');
+  DEBUG(errs() << "KnuthDiv: quotient:");
+  DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]);
+  DEBUG(errs() << '\n');
 
   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
   // remainder may be obtained by dividing u[...] by d. If r is non-null we
@@ -1690,22 +1727,22 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
     // shift right here. In order to mak
     if (shift) {
       unsigned carry = 0;
-      DEBUG(cerr << "KnuthDiv: remainder:");
+      DEBUG(errs() << "KnuthDiv: remainder:");
       for (int i = n-1; i >= 0; i--) {
         r[i] = (u[i] >> shift) | carry;
         carry = u[i] << (32 - shift);
-        DEBUG(cerr << " " << r[i]);
+        DEBUG(errs() << " " << r[i]);
       }
     } else {
       for (int i = n-1; i >= 0; i--) {
         r[i] = u[i];
-        DEBUG(cerr << " " << r[i]);
+        DEBUG(errs() << " " << r[i]);
       }
     }
-    DEBUG(cerr << '\n');
+    DEBUG(errs() << '\n');
   }
 #if 0
-  DEBUG(cerr << std::setbase(10) << '\n');
+  DEBUG(errs() << '\n');
 #endif
 }
 
@@ -1715,12 +1752,12 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
 {
   assert(lhsWords >= rhsWords && "Fractional result");
 
-  // First, compose the values into an array of 32-bit words instead of 
+  // First, compose the values into an array of 32-bit words instead of
   // 64-bit words. This is a necessity of both the "short division" algorithm
-  // and the the Knuth "classical algorithm" which requires there to be native 
-  // operations for +, -, and * on an m bit value with an m*2 bit result. We 
-  // can't use 64-bit operands here because we don't have native results of 
-  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 
+  // and the the Knuth "classical algorithm" which requires there to be native
+  // operations for +, -, and * on an m bit value with an m*2 bit result. We
+  // can't use 64-bit operands here because we don't have native results of
+  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
   // work on large-endian machines.
   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
   unsigned n = rhsWords * 2;
@@ -1769,9 +1806,9 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
   if (Remainder)
     memset(R, 0, n * sizeof(unsigned));
 
-  // Now, adjust m and n for the Knuth division. n is the number of words in 
+  // Now, adjust m and n for the Knuth division. n is the number of words in
   // the divisor. m is the number of words by which the dividend exceeds the
-  // divisor (i.e. m+n is the length of the dividend). These sizes must not 
+  // divisor (i.e. m+n is the length of the dividend). These sizes must not
   // contain any zero words or the Knuth algorithm fails.
   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
     n--;
@@ -1828,10 +1865,10 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
     } else
       Quotient->clear();
 
-    // The quotient is in Q. Reconstitute the quotient into Quotient's low 
+    // The quotient is in Q. Reconstitute the quotient into Quotient's low
     // order words.
     if (lhsWords == 1) {
-      uint64_t tmp = 
+      uint64_t tmp =
         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
       if (Quotient->isSingleWord())
         Quotient->VAL = tmp;
@@ -1840,7 +1877,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
     } else {
       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
       for (unsigned i = 0; i < lhsWords; ++i)
-        Quotient->pVal[i] = 
+        Quotient->pVal[i] =
           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
     }
   }
@@ -1862,7 +1899,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
     // The remainder is in R. Reconstitute the remainder into Remainder's low
     // order words.
     if (rhsWords == 1) {
-      uint64_t tmp = 
+      uint64_t tmp =
         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
       if (Remainder->isSingleWord())
         Remainder->VAL = tmp;
@@ -1871,7 +1908,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
     } else {
       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
       for (unsigned i = 0; i < rhsWords; ++i)
-        Remainder->pVal[i] = 
+        Remainder->pVal[i] =
           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
     }
   }
@@ -1902,9 +1939,9 @@ APInt APInt::udiv(const APInt& RHS) const {
   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
 
   // Deal with some degenerate cases
-  if (!lhsWords) 
+  if (!lhsWords)
     // 0 / X ===> 0
-    return APInt(BitWidth, 0); 
+    return APInt(BitWidth, 0);
   else if (lhsWords < rhsWords || this->ult(RHS)) {
     // X / Y ===> 0, iff X < Y
     return APInt(BitWidth, 0);
@@ -1959,7 +1996,7 @@ APInt APInt::urem(const APInt& RHS) const {
   return Remainder;
 }
 
-void APInt::udivrem(const APInt &LHS, const APInt &RHS, 
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
                     APInt &Quotient, APInt &Remainder) {
   // Get some size facts about the dividend and divisor
   unsigned lhsBits  = LHS.getActiveBits();
@@ -1968,24 +2005,24 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
 
   // Check the degenerate cases
-  if (lhsWords == 0) {              
+  if (lhsWords == 0) {
     Quotient = 0;                // 0 / Y ===> 0
     Remainder = 0;               // 0 % Y ===> 0
     return;
-  } 
-  
-  if (lhsWords < rhsWords || LHS.ult(RHS)) { 
+  }
+
+  if (lhsWords < rhsWords || LHS.ult(RHS)) {
     Quotient = 0;               // X / Y ===> 0, iff X < Y
     Remainder = LHS;            // X % Y ===> X, iff X < Y
     return;
-  } 
-  
+  }
+
   if (LHS == RHS) {
     Quotient  = 1;              // X / X ===> 1
     Remainder = 0;              // X % X ===> 0;
     return;
-  } 
-  
+  }
+
   if (lhsWords == 1 && rhsWords == 1) {
     // There is only one word to consider so use the native versions.
     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
@@ -1999,19 +2036,25 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
 }
 
-void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
-                       uint8_t radix) {
+void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
   // Check our assumptions here
+  assert(!str.empty() && "Invalid string length");
   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
          "Radix should be 2, 8, 10, or 16!");
-  assert(str && "String is null?");
-  bool isNeg = str[0] == '-';
-  if (isNeg)
-    str++, slen--;
+
+  StringRef::iterator p = str.begin();
+  size_t slen = str.size();
+  bool isNeg = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String is only a sign, needs a value.");
+  }
   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
-  assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
+  assert((((slen-1)*64)/22 <= numbits || radix != 10)
+         && "Insufficient bit width");
 
   // Allocate memory
   if (!isSingleWord())
@@ -2026,30 +2069,9 @@ void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
   APInt apradix(getBitWidth(), radix);
 
   // Enter digit traversal loop
-  for (unsigned i = 0; i < slen; i++) {
-    // Get a digit
-    unsigned digit = 0;
-    char cdigit = str[i];
-    if (radix == 16) {
-      if (!isxdigit(cdigit))
-        assert(0 && "Invalid hex digit in string");
-      if (isdigit(cdigit))
-        digit = cdigit - '0';
-      else if (cdigit >= 'a')
-        digit = cdigit - 'a' + 10;
-      else if (cdigit >= 'A')
-        digit = cdigit - 'A' + 10;
-      else
-        assert(0 && "huh? we shouldn't get here");
-    } else if (isdigit(cdigit)) {
-      digit = cdigit - '0';
-      assert((radix == 10 ||
-              (radix == 8 && digit != 8 && digit != 9) ||
-              (radix == 2 && (digit == 0 || digit == 1))) &&
-             "Invalid digit in string for given radix");
-    } else {
-      assert(0 && "Invalid character in digit string");
-    }
+  for (StringRef::iterator e = str.end(); p != e; ++p) {
+    unsigned digit = getDigit(*p, radix);
+    assert(digit < radix && "Invalid character in digit string");
 
     // Shift or multiply the value by the radix
     if (slen > 1) {
@@ -2077,19 +2099,19 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
                      bool Signed) const {
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
          "Radix should be 2, 8, 10, or 16!");
-  
+
   // First, check for a zero value and just short circuit the logic below.
   if (*this == 0) {
     Str.push_back('0');
     return;
   }
-  
+
   static const char Digits[] = "0123456789ABCDEF";
-  
+
   if (isSingleWord()) {
     char Buffer[65];
     char *BufPtr = Buffer+65;
-    
+
     uint64_t N;
     if (Signed) {
       int64_t I = getSExtValue();
@@ -2101,7 +2123,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     } else {
       N = getZExtValue();
     }
-    
+
     while (N) {
       *--BufPtr = Digits[N % Radix];
       N /= Radix;
@@ -2111,7 +2133,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
   }
 
   APInt Tmp(*this);
-  
+
   if (Signed && isNegative()) {
     // They want to print the signed version and it is a negative value
     // Flip the bits and add one to turn it into the equivalent positive
@@ -2120,18 +2142,18 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     Tmp++;
     Str.push_back('-');
   }
-  
+
   // We insert the digits backward, then reverse them to get the right order.
   unsigned StartDig = Str.size();
-  
-  // For the 2, 8 and 16 bit cases, we can just shift instead of divide 
-  // because the number of bits per digit (1, 3 and 4 respectively) divides 
+
+  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+  // because the number of bits per digit (1, 3 and 4 respectively) divides
   // equaly.  We just shift until the value is zero.
   if (Radix != 10) {
     // Just shift tmp right for each digit width until it becomes zero
     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
     unsigned MaskAmt = Radix - 1;
-    
+
     while (Tmp != 0) {
       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
       Str.push_back(Digits[Digit]);
@@ -2142,7 +2164,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     while (Tmp != 0) {
       APInt APdigit(1, 0);
       APInt tmp2(Tmp.getBitWidth(), 0);
-      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, 
+      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
              &APdigit);
       unsigned Digit = (unsigned)APdigit.getZExtValue();
       assert(Digit < Radix && "divide failed");
@@ -2150,7 +2172,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
       Tmp = tmp2;
     }
   }
-  
+
   // Reverse the digits before returning.
   std::reverse(Str.begin()+StartDig, Str.end());
 }
@@ -2161,7 +2183,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   SmallString<40> S;
   toString(S, Radix, Signed);
-  return S.c_str();
+  return S.str();
 }
 
 
@@ -2169,26 +2191,21 @@ void APInt::dump() const {
   SmallString<40> S, U;
   this->toStringUnsigned(U);
   this->toStringSigned(S);
-  fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
+  errs() << "APInt(" << BitWidth << "b, "
+         << U.str() << "u " << S.str() << "s)";
 }
 
 void APInt::print(raw_ostream &OS, bool isSigned) const {
   SmallString<40> S;
   this->toString(S, 10, isSigned);
-  OS << S.c_str();
-}
-
-std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) {
-  raw_os_ostream OS(o);
-  OS << I;
-  return o;
+  OS << S.str();
 }
 
 // This implements a variety of operations on a representation of
 // arbitrary precision, two's-complement, bignum integer values.
 
-/* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
-   and unrestricting assumption.  */
+// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
+// and unrestricting assumption.
 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
 
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index db0d8f31e55d..7a3fd87c17ee 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -12,130 +12,160 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Recycler.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Streams.h"
-#include <ostream>
-using namespace llvm;
+#include "llvm/Support/Recycler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Memory.h"
+#include <cstring>
 
-//===----------------------------------------------------------------------===//
-// MemRegion class implementation
-//===----------------------------------------------------------------------===//
+namespace llvm {
 
-namespace {
-/// MemRegion - This is one chunk of the BumpPtrAllocator.
-class MemRegion {
-  unsigned RegionSize;
-  MemRegion *Next;
-  char *NextPtr;
-public:
-  void Init(unsigned size, unsigned Alignment, MemRegion *next) {
-    RegionSize = size;
-    Next = next;
-    NextPtr = (char*)(this+1);
-    
-    // Align NextPtr.
-    NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
-                      ~(intptr_t)(Alignment-1));
-  }
-  
-  const MemRegion *getNext() const { return Next; }
-  unsigned getNumBytesAllocated() const {
-    return NextPtr-(const char*)this;
-  }
-  
-  /// Allocate - Allocate and return at least the specified number of bytes.
-  ///
-  void *Allocate(size_t AllocSize, size_t Alignment, MemRegion **RegPtr) {
-    
-    char* Result = (char*) (((uintptr_t) (NextPtr+Alignment-1)) 
-                            & ~((uintptr_t) Alignment-1));
-
-    // Speculate the new value of NextPtr.
-    char* NextPtrTmp = Result + AllocSize;
-    
-    // If we are still within the current region, return Result.
-    if (unsigned (NextPtrTmp - (char*) this) <= RegionSize) {
-      NextPtr = NextPtrTmp;
-      return Result;
-    }
-    
-    // Otherwise, we have to allocate a new chunk.  Create one twice as big as
-    // this one.
-    MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
-    NewRegion->Init(RegionSize*2, Alignment, this);
-
-    // Update the current "first region" pointer  to point to the new region.
-    *RegPtr = NewRegion;
-    
-    // Try allocating from it now.
-    return NewRegion->Allocate(AllocSize, Alignment, RegPtr);
-  }
-  
-  /// Deallocate - Recursively release all memory for this and its next regions
-  /// to the system.
-  void Deallocate() {
-    MemRegion *next = Next;
-    free(this);
-    if (next)
-      next->Deallocate();
-  }
+BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
+                                   SlabAllocator &allocator)
+    : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
+      CurSlab(0), BytesAllocated(0) {
+  StartNewSlab();
+}
 
-  /// DeallocateAllButLast - Recursively release all memory for this and its
-  /// next regions to the system stopping at the last region in the list.
-  /// Returns the pointer to the last region.
-  MemRegion *DeallocateAllButLast() {
-    MemRegion *next = Next;
-    if (!next)
-      return this;
-    free(this);
-    return next->DeallocateAllButLast();
-  }
-};
+BumpPtrAllocator::~BumpPtrAllocator() {
+  DeallocateSlabs(CurSlab);
 }
 
-//===----------------------------------------------------------------------===//
-// BumpPtrAllocator class implementation
-//===----------------------------------------------------------------------===//
+/// AlignPtr - Align Ptr to Alignment bytes, rounding up.  Alignment should
+/// be a power of two.  This method rounds up, so AlignPtr(7, 4) == 8 and
+/// AlignPtr(8, 4) == 8.
+char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
+  assert(Alignment && (Alignment & (Alignment - 1)) == 0 &&
+         "Alignment is not a power of two!");
 
-BumpPtrAllocator::BumpPtrAllocator() {
-  TheMemory = malloc(4096);
-  ((MemRegion*)TheMemory)->Init(4096, 1, 0);
+  // Do the alignment.
+  return (char*)(((uintptr_t)Ptr + Alignment - 1) &
+                 ~(uintptr_t)(Alignment - 1));
 }
 
-BumpPtrAllocator::~BumpPtrAllocator() {
-  ((MemRegion*)TheMemory)->Deallocate();
+/// StartNewSlab - Allocate a new slab and move the bump pointers over into
+/// the new slab.  Modifies CurPtr and End.
+void BumpPtrAllocator::StartNewSlab() {
+  MemSlab *NewSlab = Allocator.Allocate(SlabSize);
+  NewSlab->NextPtr = CurSlab;
+  CurSlab = NewSlab;
+  CurPtr = (char*)(CurSlab + 1);
+  End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// DeallocateSlabs - Deallocate all memory slabs after and including this
+/// one.
+void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
+  while (Slab) {
+    MemSlab *NextSlab = Slab->NextPtr;
+#ifndef NDEBUG
+    // Poison the memory so stale pointers crash sooner.  Note we must
+    // preserve the Size and NextPtr fields at the beginning.
+    sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab));
+    memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab));
+#endif
+    Allocator.Deallocate(Slab);
+    Slab = NextSlab;
+  }
 }
 
+/// Reset - Deallocate all but the current slab and reset the current pointer
+/// to the beginning of it, freeing all memory allocated so far.
 void BumpPtrAllocator::Reset() {
-  MemRegion *MRP = (MemRegion*)TheMemory;
-  MRP = MRP->DeallocateAllButLast();
-  MRP->Init(4096, 1, 0);
-  TheMemory = MRP;
+  DeallocateSlabs(CurSlab->NextPtr);
+  CurSlab->NextPtr = 0;
+  CurPtr = (char*)(CurSlab + 1);
+  End = ((char*)CurSlab) + CurSlab->Size;
 }
 
-void *BumpPtrAllocator::Allocate(size_t Size, size_t Align) {
-  MemRegion *MRP = (MemRegion*)TheMemory;
-  void *Ptr = MRP->Allocate(Size, Align, &MRP);
-  TheMemory = MRP;
+/// Allocate - Allocate space at the specified alignment.
+///
+void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+  // Keep track of how many bytes we've allocated.
+  BytesAllocated += Size;
+
+  // 0-byte alignment means 1-byte alignment.
+  if (Alignment == 0) Alignment = 1;
+
+  // Allocate the aligned space, going forwards from CurPtr.
+  char *Ptr = AlignPtr(CurPtr, Alignment);
+
+  // Check if we can hold it.
+  if (Ptr + Size <= End) {
+    CurPtr = Ptr + Size;
+    return Ptr;
+  }
+
+  // If Size is really big, allocate a separate slab for it.
+  size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1;
+  if (PaddedSize > SizeThreshold) {
+    MemSlab *NewSlab = Allocator.Allocate(PaddedSize);
+
+    // Put the new slab after the current slab, since we are not allocating
+    // into it.
+    NewSlab->NextPtr = CurSlab->NextPtr;
+    CurSlab->NextPtr = NewSlab;
+
+    Ptr = AlignPtr((char*)(NewSlab + 1), Alignment);
+    assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size);
+    return Ptr;
+  }
+
+  // Otherwise, start a new slab and try again.
+  StartNewSlab();
+  Ptr = AlignPtr(CurPtr, Alignment);
+  CurPtr = Ptr + Size;
+  assert(CurPtr <= End && "Unable to allocate memory!");
   return Ptr;
 }
 
+unsigned BumpPtrAllocator::GetNumSlabs() const {
+  unsigned NumSlabs = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    ++NumSlabs;
+  }
+  return NumSlabs;
+}
+
 void BumpPtrAllocator::PrintStats() const {
-  unsigned BytesUsed = 0;
-  unsigned NumRegions = 0;
-  const MemRegion *R = (MemRegion*)TheMemory;
-  for (; R; R = R->getNext(), ++NumRegions)
-    BytesUsed += R->getNumBytesAllocated();
-
-  cerr << "\nNumber of memory regions: " << NumRegions << "\n";
-  cerr << "Bytes allocated: " << BytesUsed << "\n";
+  unsigned NumSlabs = 0;
+  size_t TotalMemory = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    TotalMemory += Slab->Size;
+    ++NumSlabs;
+  }
+
+  errs() << "\nNumber of memory regions: " << NumSlabs << '\n'
+         << "Bytes used: " << BytesAllocated << '\n'
+         << "Bytes allocated: " << TotalMemory << '\n'
+         << "Bytes wasted: " << (TotalMemory - BytesAllocated)
+         << " (includes alignment, etc)\n";
+}
+
+MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator =
+  MallocSlabAllocator();
+
+SlabAllocator::~SlabAllocator() { }
+
+MallocSlabAllocator::~MallocSlabAllocator() { }
+
+MemSlab *MallocSlabAllocator::Allocate(size_t Size) {
+  MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0);
+  Slab->Size = Size;
+  Slab->NextPtr = 0;
+  return Slab;
+}
+
+void MallocSlabAllocator::Deallocate(MemSlab *Slab) {
+  Allocator.Deallocate(Slab);
+}
+
+void PrintRecyclerStats(size_t Size,
+                        size_t Align,
+                        size_t FreeListSize) {
+  errs() << "Recycler element size: " << Size << '\n'
+         << "Recycler element alignment: " << Align << '\n'
+         << "Number of elements free for recycling: " << FreeListSize << '\n';
 }
 
-void llvm::PrintRecyclerStats(size_t Size,
-                              size_t Align,
-                              size_t FreeListSize) {
-  cerr << "Recycler element size: " << Size << '\n';
-  cerr << "Recycler element alignment: " << Align << '\n';
-  cerr << "Number of elements free for recycling: " << FreeListSize << '\n';
 }
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index f26c2c0f4eca..cd355ffe3604 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -3,32 +3,43 @@ add_llvm_library(LLVMSupport
   APInt.cpp
   APSInt.cpp
   Allocator.cpp
-  Annotation.cpp
   CommandLine.cpp
   ConstantRange.cpp
   Debug.cpp
   Dwarf.cpp
+  ErrorHandling.cpp
   FileUtilities.cpp
   FoldingSet.cpp
+  FormattedStream.cpp
   GraphWriter.cpp
   IsInf.cpp
   IsNAN.cpp
   ManagedStatic.cpp
   MemoryBuffer.cpp
+  MemoryObject.cpp
   PluginLoader.cpp
   PrettyStackTrace.cpp
+  Regex.cpp
   SlowOperationInformer.cpp
   SmallPtrSet.cpp
   SourceMgr.cpp
   Statistic.cpp
-  Streams.cpp
   StringExtras.cpp
   StringMap.cpp
   StringPool.cpp
+  StringRef.cpp
   SystemUtils.cpp
+  TargetRegistry.cpp
   Timer.cpp
   Triple.cpp
+  Twine.cpp
+  raw_os_ostream.cpp
   raw_ostream.cpp
+  regcomp.c
+  regerror.c
+  regexec.c
+  regfree.c
+  regstrlcpy.c
   )
 
 target_link_libraries (LLVMSupport LLVMSystem)
diff --git a/lib/Support/COPYRIGHT.regex b/lib/Support/COPYRIGHT.regex
new file mode 100644
index 000000000000..a6392fd37c3d
--- /dev/null
+++ b/lib/Support/COPYRIGHT.regex
@@ -0,0 +1,54 @@
+$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
+
+Copyright 1992, 1993, 1994 Henry Spencer.  All rights reserved.
+This software is not subject to any license of the American Telephone
+and Telegraph Company or of the Regents of the University of California.
+
+Permission is granted to anyone to use this software for any purpose on
+any computer system, and to alter it and redistribute it, subject
+to the following restrictions:
+
+1. The author is not responsible for the consequences of use of this
+   software, no matter how awful, even if they arise from flaws in it.
+
+2. The origin of this software must not be misrepresented, either by
+   explicit claim or by omission.  Since few users ever read sources,
+   credits must appear in the documentation.
+
+3. Altered versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.  Since few users
+   ever read sources, credits must appear in the documentation.
+
+4. This notice may not be removed or altered.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)COPYRIGHT	8.1 (Berkeley) 3/16/94
+ */
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 4922560200a0..626daa254dd7 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -16,22 +16,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/System/Host.h"
 #include "llvm/System/Path.h"
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <ostream>
-#include <set>
-#include <cstdlib>
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include <cerrno>
-#include <cstring>
-#include <climits>
+#include <cstdlib>
 using namespace llvm;
 using namespace cl;
 
@@ -105,10 +105,10 @@ void Option::addArgument() {
 
 /// GetOptionInfo - Scan the list of registered options, turning them into data
 /// structures that are easier to handle.
-static void GetOptionInfo(std::vector<Option*> &PositionalOpts,
-                          std::vector<Option*> &SinkOpts,
-                          std::map<std::string, Option*> &OptionsMap) {
-  std::vector<const char*> OptionNames;
+static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
+                          SmallVectorImpl<Option*> &SinkOpts,
+                          StringMap<Option*> &OptionsMap) {
+  SmallVector<const char*, 16> OptionNames;
   Option *CAOpt = 0;  // The ConsumeAfter option if it exists.
   for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
     // If this option wants to handle multiple option names, get the full set.
@@ -120,9 +120,8 @@ static void GetOptionInfo(std::vector<Option*> &PositionalOpts,
     // Handle named options.
     for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
       // Add argument to the argument map!
-      if (!OptionsMap.insert(std::pair<std::string,Option*>(OptionNames[i],
-                                                            O)).second) {
-        cerr << ProgramName << ": CommandLine Error: Argument '"
+      if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
+        errs() << ProgramName << ": CommandLine Error: Argument '"
              << OptionNames[i] << "' defined more than once!\n";
       }
     }
@@ -151,29 +150,39 @@ static void GetOptionInfo(std::vector<Option*> &PositionalOpts,
 
 /// LookupOption - Lookup the option specified by the specified option on the
 /// command line.  If there is a value specified (after an equal sign) return
-/// that as well.
-static Option *LookupOption(const char *&Arg, const char *&Value,
-                            std::map<std::string, Option*> &OptionsMap) {
-  while (*Arg == '-') ++Arg;  // Eat leading dashes
-
-  const char *ArgEnd = Arg;
-  while (*ArgEnd && *ArgEnd != '=')
-    ++ArgEnd; // Scan till end of argument name.
+/// that as well.  This assumes that leading dashes have already been stripped.
+static Option *LookupOption(StringRef &Arg, StringRef &Value,
+                            const StringMap<Option*> &OptionsMap) {
+  // Reject all dashes.
+  if (Arg.empty()) return 0;
+  
+  size_t EqualPos = Arg.find('=');
+  
+  // If we have an equals sign, remember the value.
+  if (EqualPos == StringRef::npos) {
+    // Look up the option.
+    StringMap<Option*>::const_iterator I = OptionsMap.find(Arg);
+    return I != OptionsMap.end() ? I->second : 0;
+  }
 
-  if (*ArgEnd == '=')  // If we have an equals sign...
-    Value = ArgEnd+1;  // Get the value, not the equals
+  // If the argument before the = is a valid option name, we match.  If not,
+  // return Arg unmolested.
+  StringMap<Option*>::const_iterator I =
+    OptionsMap.find(Arg.substr(0, EqualPos));
+  if (I == OptionsMap.end()) return 0;
+  
+  Value = Arg.substr(EqualPos+1);
+  Arg = Arg.substr(0, EqualPos);
+  return I->second;
+}
 
 
-  if (*Arg == 0) return 0;
 
-  // Look up the option.
-  std::map<std::string, Option*>::iterator I =
-    OptionsMap.find(std::string(Arg, ArgEnd));
-  return I != OptionsMap.end() ? I->second : 0;
-}
-
-static inline bool ProvideOption(Option *Handler, const char *ArgName,
-                                 const char *Value, int argc, char **argv,
+/// ProvideOption - For Value, this differentiates between an empty value ("")
+/// and a null value (StringRef()).  The later is accepted for arguments that
+/// don't allow a value (-foo) the former is rejected (-foo=).
+static inline bool ProvideOption(Option *Handler, StringRef ArgName,
+                                 StringRef Value, int argc, char **argv,
                                  int &i) {
   // Is this a multi-argument option?
   unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
@@ -181,68 +190,62 @@ static inline bool ProvideOption(Option *Handler, const char *ArgName,
   // Enforce value requirements
   switch (Handler->getValueExpectedFlag()) {
   case ValueRequired:
-    if (Value == 0) {       // No value specified?
-      if (i+1 < argc) {     // Steal the next argument, like for '-o filename'
-        Value = argv[++i];
-      } else {
-        return Handler->error(" requires a value!");
-      }
+    if (Value.data() == 0) {       // No value specified?
+      if (i+1 >= argc)
+        return Handler->error("requires a value!");
+      // Steal the next argument, like for '-o filename'
+      Value = argv[++i];
     }
     break;
   case ValueDisallowed:
     if (NumAdditionalVals > 0)
-      return Handler->error(": multi-valued option specified"
-      " with ValueDisallowed modifier!");
+      return Handler->error("multi-valued option specified"
+                            " with ValueDisallowed modifier!");
 
-    if (Value)
-      return Handler->error(" does not allow a value! '" +
-                            std::string(Value) + "' specified.");
+    if (Value.data())
+      return Handler->error("does not allow a value! '" +
+                            Twine(Value) + "' specified.");
     break;
   case ValueOptional:
     break;
+      
   default:
-    cerr << ProgramName
+    errs() << ProgramName
          << ": Bad ValueMask flag! CommandLine usage error:"
          << Handler->getValueExpectedFlag() << "\n";
-    abort();
-    break;
+    llvm_unreachable(0);
   }
 
   // If this isn't a multi-arg option, just run the handler.
-  if (NumAdditionalVals == 0) {
-    return Handler->addOccurrence(i, ArgName, Value ? Value : "");
-  }
+  if (NumAdditionalVals == 0)
+    return Handler->addOccurrence(i, ArgName, Value);
+
   // If it is, run the handle several times.
-  else {
-    bool MultiArg = false;
-
-    if (Value) {
-      if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
-        return true;
-      --NumAdditionalVals;
-      MultiArg = true;
-    }
+  bool MultiArg = false;
 
-    while (NumAdditionalVals > 0) {
+  if (Value.data()) {
+    if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
+      return true;
+    --NumAdditionalVals;
+    MultiArg = true;
+  }
 
-      if (i+1 < argc) {
-        Value = argv[++i];
-      } else {
-        return Handler->error(": not enough values!");
-      }
-      if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
-        return true;
-      MultiArg = true;
-      --NumAdditionalVals;
-    }
-    return false;
+  while (NumAdditionalVals > 0) {
+    if (i+1 >= argc)
+      return Handler->error("not enough values!");
+    Value = argv[++i];
+    
+    if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
+      return true;
+    MultiArg = true;
+    --NumAdditionalVals;
   }
+  return false;
 }
 
-static bool ProvidePositionalOption(Option *Handler, const std::string &Arg,
-                                    int i) {
+static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
   int Dummy = i;
-  return ProvideOption(Handler, Handler->ArgStr, Arg.c_str(), 0, 0, Dummy);
+  return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy);
 }
 
 
@@ -260,33 +263,78 @@ static inline bool isPrefixedOrGrouping(const Option *O) {
 // see if there options that satisfy the predicate.  If we find one, return it,
 // otherwise return null.
 //
-static Option *getOptionPred(std::string Name, size_t &Length,
+static Option *getOptionPred(StringRef Name, size_t &Length,
                              bool (*Pred)(const Option*),
-                             std::map<std::string, Option*> &OptionsMap) {
+                             const StringMap<Option*> &OptionsMap) {
 
-  std::map<std::string, Option*>::iterator OMI = OptionsMap.find(Name);
-  if (OMI != OptionsMap.end() && Pred(OMI->second)) {
-    Length = Name.length();
-    return OMI->second;
-  }
+  StringMap<Option*>::const_iterator OMI = OptionsMap.find(Name);
 
-  if (Name.size() == 1) return 0;
-  do {
-    Name.erase(Name.end()-1, Name.end());   // Chop off the last character...
+  // Loop while we haven't found an option and Name still has at least two
+  // characters in it (so that the next iteration will not be the empty
+  // string.
+  while (OMI == OptionsMap.end() && Name.size() > 1) {
+    Name = Name.substr(0, Name.size()-1);   // Chop off the last character.
     OMI = OptionsMap.find(Name);
-
-    // Loop while we haven't found an option and Name still has at least two
-    // characters in it (so that the next iteration will not be the empty
-    // string...
-  } while ((OMI == OptionsMap.end() || !Pred(OMI->second)) && Name.size() > 1);
+  }
 
   if (OMI != OptionsMap.end() && Pred(OMI->second)) {
-    Length = Name.length();
+    Length = Name.size();
     return OMI->second;    // Found one!
   }
   return 0;                // No option found!
 }
 
+/// HandlePrefixedOrGroupedOption - The specified argument string (which started
+/// with at least one '-') does not fully match an available option.  Check to
+/// see if this is a prefix or grouped option.  If so, split arg into output an
+/// Arg/Value pair and return the Option to parse it with.
+static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
+                                             bool &ErrorParsing,
+                                         const StringMap<Option*> &OptionsMap) {
+  if (Arg.size() == 1) return 0;
+
+  // Do the lookup!
+  size_t Length = 0;
+  Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
+  if (PGOpt == 0) return 0;
+  
+  // If the option is a prefixed option, then the value is simply the
+  // rest of the name...  so fall through to later processing, by
+  // setting up the argument name flags and value fields.
+  if (PGOpt->getFormattingFlag() == cl::Prefix) {
+    Value = Arg.substr(Length);
+    Arg = Arg.substr(0, Length);
+    assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
+    return PGOpt;
+  }
+  
+  // This must be a grouped option... handle them now.  Grouping options can't
+  // have values.
+  assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+  
+  do {
+    // Move current arg name out of Arg into OneArgName.
+    StringRef OneArgName = Arg.substr(0, Length);
+    Arg = Arg.substr(Length);
+    
+    // Because ValueRequired is an invalid flag for grouped arguments,
+    // we don't need to pass argc/argv in.
+    assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
+           "Option can not be cl::Grouping AND cl::ValueRequired!");
+    int Dummy;
+    ErrorParsing |= ProvideOption(PGOpt, OneArgName,
+                                  StringRef(), 0, 0, Dummy);
+    
+    // Get the next grouping option.
+    PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
+  } while (PGOpt && Length != Arg.size());
+  
+  // Return the last option with Arg cut down to just the last one.
+  return PGOpt;
+}
+
+
+
 static bool RequiresValue(const Option *O) {
   return O->getNumOccurrencesFlag() == cl::Required ||
          O->getNumOccurrencesFlag() == cl::OneOrMore;
@@ -300,45 +348,35 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
 /// ParseCStringVector - Break INPUT up wherever one or more
 /// whitespace characters are found, and store the resulting tokens in
 /// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
-/// using strdup (), so it is the caller's responsibility to free ()
+/// using strdup(), so it is the caller's responsibility to free()
 /// them later.
 ///
-static void ParseCStringVector(std::vector<char *> &output,
-                               const char *input) {
+static void ParseCStringVector(std::vector<char *> &OutputVector,
+                               const char *Input) {
   // Characters which will be treated as token separators:
-  static const char *const delims = " \v\f\t\r\n";
-
-  std::string work (input);
-  // Skip past any delims at head of input string.
-  size_t pos = work.find_first_not_of (delims);
-  // If the string consists entirely of delims, then exit early.
-  if (pos == std::string::npos) return;
-  // Otherwise, jump forward to beginning of first word.
-  work = work.substr (pos);
-  // Find position of first delimiter.
-  pos = work.find_first_of (delims);
-
-  while (!work.empty() && pos != std::string::npos) {
-    // Everything from 0 to POS is the next word to copy.
-    output.push_back (strdup (work.substr (0,pos).c_str ()));
-    // Is there another word in the string?
-    size_t nextpos = work.find_first_not_of (delims, pos + 1);
-    if (nextpos != std::string::npos) {
-      // Yes? Then remove delims from beginning ...
-      work = work.substr (work.find_first_not_of (delims, pos + 1));
-      // and find the end of the word.
-      pos = work.find_first_of (delims);
-    } else {
-      // No? (Remainder of string is delims.) End the loop.
-      work = "";
-      pos = std::string::npos;
+  StringRef Delims = " \v\f\t\r\n";
+
+  StringRef WorkStr(Input);
+  while (!WorkStr.empty()) {
+    // If the first character is a delimiter, strip them off.
+    if (Delims.find(WorkStr[0]) != StringRef::npos) {
+      size_t Pos = WorkStr.find_first_not_of(Delims);
+      if (Pos == StringRef::npos) Pos = WorkStr.size();
+      WorkStr = WorkStr.substr(Pos);
+      continue;
     }
-  }
-
-  // If `input' ended with non-delim char, then we'll get here with
-  // the last word of `input' in `work'; copy it now.
-  if (!work.empty ()) {
-    output.push_back (strdup (work.c_str ()));
+    
+    // Find position of first delimiter.
+    size_t Pos = WorkStr.find_first_of(Delims);
+    if (Pos == StringRef::npos) Pos = WorkStr.size();
+    
+    // Everything from 0 to Pos is the next word to copy.
+    char *NewStr = (char*)malloc(Pos+1);
+    memcpy(NewStr, WorkStr.data(), Pos);
+    NewStr[Pos] = 0;
+    OutputVector.push_back(NewStr);
+    
+    WorkStr = WorkStr.substr(Pos);
   }
 }
 
@@ -372,20 +410,19 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
   // Free all the strdup()ed strings.
   for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
        i != e; ++i)
-    free (*i);
+    free(*i);
 }
 
 
 /// ExpandResponseFiles - Copy the contents of argv into newArgv,
 /// substituting the contents of the response files for the arguments
 /// of type @file.
-static void ExpandResponseFiles(int argc, char** argv,
+static void ExpandResponseFiles(unsigned argc, char** argv,
                                 std::vector<char*>& newArgv) {
-  for (int i = 1; i != argc; ++i) {
-    char* arg = argv[i];
+  for (unsigned i = 1; i != argc; ++i) {
+    char *arg = argv[i];
 
     if (arg[0] == '@') {
-
       sys::PathWithStatus respFile(++arg);
 
       // Check that the response file is not empty (mmap'ing empty
@@ -418,9 +455,9 @@ static void ExpandResponseFiles(int argc, char** argv,
 void cl::ParseCommandLineOptions(int argc, char **argv,
                                  const char *Overview, bool ReadResponseFiles) {
   // Process all registered options.
-  std::vector<Option*> PositionalOpts;
-  std::vector<Option*> SinkOpts;
-  std::map<std::string, Option*> Opts;
+  SmallVector<Option*, 4> PositionalOpts;
+  SmallVector<Option*, 4> SinkOpts;
+  StringMap<Option*> Opts;
   GetOptionInfo(PositionalOpts, SinkOpts, Opts);
 
   assert((!Opts.empty() || !PositionalOpts.empty()) &&
@@ -469,7 +506,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
         // unless there is only one positional argument...
         if (PositionalOpts.size() > 2)
           ErrorParsing |=
-            Opt->error(" error - this positional option will never be matched, "
+            Opt->error("error - this positional option will never be matched, "
                        "because it does not Require a value, and a "
                        "cl::ConsumeAfter option is active!");
       } else if (UnboundedFound && !Opt->ArgStr[0]) {
@@ -477,7 +514,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
         // not specified after an option that eats all extra arguments, or this
         // one will never get any!
         //
-        ErrorParsing |= Opt->error(" error - option can never match, because "
+        ErrorParsing |= Opt->error("error - option can never match, because "
                                    "another positional argument will match an "
                                    "unbounded number of values, and this option"
                                    " does not require a value!");
@@ -488,9 +525,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
   }
 
   // PositionalVals - A vector of "positional" arguments we accumulate into
-  // the process at the end...
+  // the process at the end.
   //
-  std::vector<std::pair<std::string,unsigned> > PositionalVals;
+  SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals;
 
   // If the program has named positional arguments, and the name has been run
   // across, keep track of which positional argument was named.  Otherwise put
@@ -501,8 +538,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
   bool DashDashFound = false;  // Have we read '--'?
   for (int i = 1; i < argc; ++i) {
     Option *Handler = 0;
-    const char *Value = 0;
-    const char *ArgName = "";
+    StringRef Value;
+    StringRef ArgName = "";
 
     // If the option list changed, this means that some command line
     // option has just been registered or deregistered.  This can occur in
@@ -524,7 +561,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       if (ActivePositionalArg) {
         ProvidePositionalOption(ActivePositionalArg, argv[i], i);
         continue;  // We are done!
-      } else if (!PositionalOpts.empty()) {
+      }
+      
+      if (!PositionalOpts.empty()) {
         PositionalVals.push_back(std::make_pair(argv[i],i));
 
         // All of the positional arguments have been fulfulled, give the rest to
@@ -550,69 +589,37 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       // option is another positional argument.  If so, treat it as an argument,
       // otherwise feed it to the eating positional.
       ArgName = argv[i]+1;
+      // Eat leading dashes.
+      while (!ArgName.empty() && ArgName[0] == '-')
+        ArgName = ArgName.substr(1);
+      
       Handler = LookupOption(ArgName, Value, Opts);
       if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
         ProvidePositionalOption(ActivePositionalArg, argv[i], i);
         continue;  // We are done!
       }
 
-    } else {     // We start with a '-', must be an argument...
+    } else {     // We start with a '-', must be an argument.
       ArgName = argv[i]+1;
+      // Eat leading dashes.
+      while (!ArgName.empty() && ArgName[0] == '-')
+        ArgName = ArgName.substr(1);
+      
       Handler = LookupOption(ArgName, Value, Opts);
 
       // Check to see if this "option" is really a prefixed or grouped argument.
-      if (Handler == 0) {
-        std::string RealName(ArgName);
-        if (RealName.size() > 1) {
-          size_t Length = 0;
-          Option *PGOpt = getOptionPred(RealName, Length, isPrefixedOrGrouping,
-                                        Opts);
-
-          // If the option is a prefixed option, then the value is simply the
-          // rest of the name...  so fall through to later processing, by
-          // setting up the argument name flags and value fields.
-          //
-          if (PGOpt && PGOpt->getFormattingFlag() == cl::Prefix) {
-            Value = ArgName+Length;
-            assert(Opts.find(std::string(ArgName, Value)) != Opts.end() &&
-                   Opts.find(std::string(ArgName, Value))->second == PGOpt);
-            Handler = PGOpt;
-          } else if (PGOpt) {
-            // This must be a grouped option... handle them now.
-            assert(isGrouping(PGOpt) && "Broken getOptionPred!");
-
-            do {
-              // Move current arg name out of RealName into RealArgName...
-              std::string RealArgName(RealName.begin(),
-                                      RealName.begin() + Length);
-              RealName.erase(RealName.begin(), RealName.begin() + Length);
-
-              // Because ValueRequired is an invalid flag for grouped arguments,
-              // we don't need to pass argc/argv in...
-              //
-              assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
-                     "Option can not be cl::Grouping AND cl::ValueRequired!");
-              int Dummy;
-              ErrorParsing |= ProvideOption(PGOpt, RealArgName.c_str(),
-                                            0, 0, 0, Dummy);
-
-              // Get the next grouping option...
-              PGOpt = getOptionPred(RealName, Length, isGrouping, Opts);
-            } while (PGOpt && Length != RealName.size());
-
-            Handler = PGOpt; // Ate all of the options.
-          }
-        }
-      }
+      if (Handler == 0)
+        Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
+                                                ErrorParsing, Opts);
     }
 
     if (Handler == 0) {
       if (SinkOpts.empty()) {
-        cerr << ProgramName << ": Unknown command line argument '"
+        errs() << ProgramName << ": Unknown command line argument '"
              << argv[i] << "'.  Try: '" << argv[0] << " --help'\n";
         ErrorParsing = true;
       } else {
-        for (std::vector<Option*>::iterator I = SinkOpts.begin(),
+        for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
                E = SinkOpts.end(); I != E ; ++I)
           (*I)->addOccurrence(i, "", argv[i]);
       }
@@ -620,24 +627,23 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     }
 
     // Check to see if this option accepts a comma separated list of values.  If
-    // it does, we have to split up the value into multiple values...
-    if (Value && Handler->getMiscFlags() & CommaSeparated) {
-      std::string Val(Value);
-      std::string::size_type Pos = Val.find(',');
-
-      while (Pos != std::string::npos) {
-        // Process the portion before the comma...
-        ErrorParsing |= ProvideOption(Handler, ArgName,
-                                      std::string(Val.begin(),
-                                                  Val.begin()+Pos).c_str(),
+    // it does, we have to split up the value into multiple values.
+    if (Handler->getMiscFlags() & CommaSeparated) {
+      StringRef Val(Value);
+      StringRef::size_type Pos = Val.find(',');
+
+      while (Pos != StringRef::npos) {
+        // Process the portion before the comma.
+        ErrorParsing |= ProvideOption(Handler, ArgName, Val.substr(0, Pos),
                                       argc, argv, i);
-        // Erase the portion before the comma, AND the comma...
-        Val.erase(Val.begin(), Val.begin()+Pos+1);
-        Value += Pos+1;  // Increment the original value pointer as well...
+        // Erase the portion before the comma, AND the comma.
+        Val = Val.substr(Pos+1);
+        Value.substr(Pos+1);  // Increment the original value pointer as well.
 
-        // Check for another comma...
+        // Check for another comma.
         Pos = Val.find(',');
       }
+      Value = Val;
     }
 
     // If this is a named positional argument, just remember that it is the
@@ -650,7 +656,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
 
   // Check and handle positional arguments now...
   if (NumPositionalRequired > PositionalVals.size()) {
-    cerr << ProgramName
+    errs() << ProgramName
          << ": Not enough positional command line arguments specified!\n"
          << "Must specify at least " << NumPositionalRequired
          << " positional arguments: See: " << argv[0] << " --help\n";
@@ -658,14 +664,14 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     ErrorParsing = true;
   } else if (!HasUnlimitedPositionals
              && PositionalVals.size() > PositionalOpts.size()) {
-    cerr << ProgramName
+    errs() << ProgramName
          << ": Too many positional arguments specified!\n"
          << "Can specify at most " << PositionalOpts.size()
          << " positional arguments: See: " << argv[0] << " --help\n";
     ErrorParsing = true;
 
   } else if (ConsumeAfterOpt == 0) {
-    // Positional args have already been handled if ConsumeAfter is specified...
+    // Positional args have already been handled if ConsumeAfter is specified.
     unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
     for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
       if (RequiresValue(PositionalOpts[i])) {
@@ -693,7 +699,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
           ValNo++;
           break;
         default:
-          assert(0 && "Internal error, unexpected NumOccurrences flag in "
+          llvm_unreachable("Internal error, unexpected NumOccurrences flag in "
                  "positional argument processing!");
         }
       }
@@ -730,13 +736,13 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
   }
 
   // Loop over args and make sure all required args are specified!
-  for (std::map<std::string, Option*>::iterator I = Opts.begin(),
+  for (StringMap<Option*>::iterator I = Opts.begin(),
          E = Opts.end(); I != E; ++I) {
     switch (I->second->getNumOccurrencesFlag()) {
     case Required:
     case OneOrMore:
       if (I->second->getNumOccurrences() == 0) {
-        I->second->error(" must be specified at least once!");
+        I->second->error("must be specified at least once!");
         ErrorParsing = true;
       }
       // Fall through
@@ -756,7 +762,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     // Free all the strdup()ed strings.
     for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
          i != e; ++i)
-      free (*i);
+      free(*i);
   }
 
   // If we had an error processing our arguments, don't let the program execute
@@ -767,36 +773,35 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
 // Option Base class implementation
 //
 
-bool Option::error(std::string Message, const char *ArgName) {
-  if (ArgName == 0) ArgName = ArgStr;
-  if (ArgName[0] == 0)
-    cerr << HelpStr;  // Be nice for positional arguments
+bool Option::error(const Twine &Message, StringRef ArgName) {
+  if (ArgName.data() == 0) ArgName = ArgStr;
+  if (ArgName.empty())
+    errs() << HelpStr;  // Be nice for positional arguments
   else
-    cerr << ProgramName << ": for the -" << ArgName;
+    errs() << ProgramName << ": for the -" << ArgName;
 
-  cerr << " option: " << Message << "\n";
+  errs() << " option: " << Message << "\n";
   return true;
 }
 
-bool Option::addOccurrence(unsigned pos, const char *ArgName,
-                           const std::string &Value,
-                           bool MultiArg) {
+bool Option::addOccurrence(unsigned pos, StringRef ArgName,
+                           StringRef Value, bool MultiArg) {
   if (!MultiArg)
     NumOccurrences++;   // Increment the number of times we have been seen
 
   switch (getNumOccurrencesFlag()) {
   case Optional:
     if (NumOccurrences > 1)
-      return error(": may only occur zero or one times!", ArgName);
+      return error("may only occur zero or one times!", ArgName);
     break;
   case Required:
     if (NumOccurrences > 1)
-      return error(": must occur exactly one time!", ArgName);
+      return error("must occur exactly one time!", ArgName);
     // Fall through
   case OneOrMore:
   case ZeroOrMore:
   case ConsumeAfter: break;
-  default: return error(": bad num occurrences flag value!");
+  default: return error("bad num occurrences flag value!");
   }
 
   return handleOccurrence(pos, ArgName, Value);
@@ -823,8 +828,8 @@ size_t alias::getOptionWidth() const {
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
   size_t L = std::strlen(ArgStr);
-  cout << "  -" << ArgStr << std::string(GlobalWidth-L-6, ' ') << " - "
-       << HelpStr << "\n";
+  errs() << "  -" << ArgStr;
+  errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
 }
 
 
@@ -850,13 +855,12 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const {
 //
 void basic_parser_impl::printOptionInfo(const Option &O,
                                         size_t GlobalWidth) const {
-  cout << "  -" << O.ArgStr;
+  outs() << "  -" << O.ArgStr;
 
   if (const char *ValName = getValueName())
-    cout << "=<" << getValueStr(O, ValName) << ">";
+    outs() << "=<" << getValueStr(O, ValName) << '>';
 
-  cout << std::string(GlobalWidth-getOptionWidth(O), ' ') << " - "
-       << O.HelpStr << "\n";
+  outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
 }
 
 
@@ -864,81 +868,78 @@ void basic_parser_impl::printOptionInfo(const Option &O,
 
 // parser<bool> implementation
 //
-bool parser<bool>::parse(Option &O, const char *ArgName,
-                         const std::string &Arg, bool &Value) {
+bool parser<bool>::parse(Option &O, StringRef ArgName,
+                         StringRef Arg, bool &Value) {
   if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
       Arg == "1") {
     Value = true;
-  } else if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+    return false;
+  }
+  
+  if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
     Value = false;
-  } else {
-    return O.error(": '" + Arg +
-                   "' is invalid value for boolean argument! Try 0 or 1");
+    return false;
   }
-  return false;
+  return O.error("'" + Arg +
+                 "' is invalid value for boolean argument! Try 0 or 1");
 }
 
 // parser<boolOrDefault> implementation
 //
-bool parser<boolOrDefault>::parse(Option &O, const char *ArgName,
-                         const std::string &Arg, boolOrDefault &Value) {
+bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName,
+                                  StringRef Arg, boolOrDefault &Value) {
   if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
       Arg == "1") {
     Value = BOU_TRUE;
-  } else if (Arg == "false" || Arg == "FALSE"
-             || Arg == "False" || Arg == "0") {
+    return false;
+  }
+  if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
     Value = BOU_FALSE;
-  } else {
-    return O.error(": '" + Arg +
-                   "' is invalid value for boolean argument! Try 0 or 1");
+    return false;
   }
-  return false;
+  
+  return O.error("'" + Arg +
+                 "' is invalid value for boolean argument! Try 0 or 1");
 }
 
 // parser<int> implementation
 //
-bool parser<int>::parse(Option &O, const char *ArgName,
-                        const std::string &Arg, int &Value) {
-  char *End;
-  Value = (int)strtol(Arg.c_str(), &End, 0);
-  if (*End != 0)
-    return O.error(": '" + Arg + "' value invalid for integer argument!");
+bool parser<int>::parse(Option &O, StringRef ArgName,
+                        StringRef Arg, int &Value) {
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for integer argument!");
   return false;
 }
 
 // parser<unsigned> implementation
 //
-bool parser<unsigned>::parse(Option &O, const char *ArgName,
-                             const std::string &Arg, unsigned &Value) {
-  char *End;
-  errno = 0;
-  unsigned long V = strtoul(Arg.c_str(), &End, 0);
-  Value = (unsigned)V;
-  if (((V == ULONG_MAX) && (errno == ERANGE))
-      || (*End != 0)
-      || (Value != V))
-    return O.error(": '" + Arg + "' value invalid for uint argument!");
+bool parser<unsigned>::parse(Option &O, StringRef ArgName,
+                             StringRef Arg, unsigned &Value) {
+
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for uint argument!");
   return false;
 }
 
 // parser<double>/parser<float> implementation
 //
-static bool parseDouble(Option &O, const std::string &Arg, double &Value) {
-  const char *ArgStart = Arg.c_str();
+static bool parseDouble(Option &O, StringRef Arg, double &Value) {
+  SmallString<32> TmpStr(Arg.begin(), Arg.end());
+  const char *ArgStart = TmpStr.c_str();
   char *End;
   Value = strtod(ArgStart, &End);
   if (*End != 0)
-    return O.error(": '" +Arg+ "' value invalid for floating point argument!");
+    return O.error("'" + Arg + "' value invalid for floating point argument!");
   return false;
 }
 
-bool parser<double>::parse(Option &O, const char *AN,
-                           const std::string &Arg, double &Val) {
+bool parser<double>::parse(Option &O, StringRef ArgName,
+                           StringRef Arg, double &Val) {
   return parseDouble(O, Arg, Val);
 }
 
-bool parser<float>::parse(Option &O, const char *AN,
-                          const std::string &Arg, float &Val) {
+bool parser<float>::parse(Option &O, StringRef ArgName,
+                          StringRef Arg, float &Val) {
   double dVal;
   if (parseDouble(O, Arg, dVal))
     return true;
@@ -955,14 +956,12 @@ bool parser<float>::parse(Option &O, const char *AN,
 // argument string.  If the option is not found, getNumOptions() is returned.
 //
 unsigned generic_parser_base::findOption(const char *Name) {
-  unsigned i = 0, e = getNumOptions();
-  std::string N(Name);
+  unsigned e = getNumOptions();
 
-  while (i != e)
-    if (getOption(i) == N)
+  for (unsigned i = 0; i != e; ++i) {
+    if (strcmp(getOption(i), Name) == 0)
       return i;
-    else
-      ++i;
+  }
   return e;
 }
 
@@ -989,21 +988,21 @@ void generic_parser_base::printOptionInfo(const Option &O,
                                           size_t GlobalWidth) const {
   if (O.hasArgStr()) {
     size_t L = std::strlen(O.ArgStr);
-    cout << "  -" << O.ArgStr << std::string(GlobalWidth-L-6, ' ')
-         << " - " << O.HelpStr << "\n";
+    outs() << "  -" << O.ArgStr;
+    outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n';
 
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
       size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
-      cout << "    =" << getOption(i) << std::string(NumSpaces, ' ')
-           << " -   " << getDescription(i) << "\n";
+      outs() << "    =" << getOption(i);
+      outs().indent(NumSpaces) << " -   " << getDescription(i) << '\n';
     }
   } else {
     if (O.HelpStr[0])
-      cout << "  " << O.HelpStr << "\n";
+      outs() << "  " << O.HelpStr << '\n';
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
       size_t L = std::strlen(getOption(i));
-      cout << "    -" << getOption(i) << std::string(GlobalWidth-L-8, ' ')
-           << " - " << getDescription(i) << "\n";
+      outs() << "    -" << getOption(i);
+      outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n';
     }
   }
 }
@@ -1013,6 +1012,12 @@ void generic_parser_base::printOptionInfo(const Option &O,
 // --help and --help-hidden option implementation
 //
 
+static int OptNameCompare(const void *LHS, const void *RHS) {
+  typedef std::pair<const char *, Option*> pair_ty;
+  
+  return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+}
+
 namespace {
 
 class HelpPrinter {
@@ -1020,14 +1025,6 @@ class HelpPrinter {
   const Option *EmptyArg;
   const bool ShowHidden;
 
-  // isHidden/isReallyHidden - Predicates to be used to filter down arg lists.
-  inline static bool isHidden(std::pair<std::string, Option *> &OptPair) {
-    return OptPair.second->getOptionHiddenFlag() >= Hidden;
-  }
-  inline static bool isReallyHidden(std::pair<std::string, Option *> &OptPair) {
-    return OptPair.second->getOptionHiddenFlag() == ReallyHidden;
-  }
-
 public:
   explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
     EmptyArg = 0;
@@ -1037,34 +1034,40 @@ public:
     if (Value == false) return;
 
     // Get all the options.
-    std::vector<Option*> PositionalOpts;
-    std::vector<Option*> SinkOpts;
-    std::map<std::string, Option*> OptMap;
+    SmallVector<Option*, 4> PositionalOpts;
+    SmallVector<Option*, 4> SinkOpts;
+    StringMap<Option*> OptMap;
     GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
 
-    // Copy Options into a vector so we can sort them as we like...
-    std::vector<std::pair<std::string, Option*> > Opts;
-    copy(OptMap.begin(), OptMap.end(), std::back_inserter(Opts));
-
-    // Eliminate Hidden or ReallyHidden arguments, depending on ShowHidden
-    Opts.erase(std::remove_if(Opts.begin(), Opts.end(),
-                          std::ptr_fun(ShowHidden ? isReallyHidden : isHidden)),
-               Opts.end());
-
-    // Eliminate duplicate entries in table (from enum flags options, f.e.)
-    {  // Give OptionSet a scope
-      std::set<Option*> OptionSet;
-      for (unsigned i = 0; i != Opts.size(); ++i)
-        if (OptionSet.count(Opts[i].second) == 0)
-          OptionSet.insert(Opts[i].second);   // Add new entry to set
-        else
-          Opts.erase(Opts.begin()+i--);    // Erase duplicate
+    // Copy Options into a vector so we can sort them as we like.
+    SmallVector<std::pair<const char *, Option*>, 128> Opts;
+    SmallPtrSet<Option*, 128> OptionSet;  // Duplicate option detection.
+
+    for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
+         I != E; ++I) {
+      // Ignore really-hidden options.
+      if (I->second->getOptionHiddenFlag() == ReallyHidden)
+        continue;
+      
+      // Unless showhidden is set, ignore hidden flags.
+      if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
+        continue;
+      
+      // If we've already seen this option, don't add it to the list again.
+      if (!OptionSet.insert(I->second))
+        continue;
+
+      Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
+                                                      I->second));
     }
+    
+    // Sort the options list alphabetically.
+    qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
 
     if (ProgramOverview)
-      cout << "OVERVIEW: " << ProgramOverview << "\n";
+      outs() << "OVERVIEW: " << ProgramOverview << "\n";
 
-    cout << "USAGE: " << ProgramName << " [options]";
+    outs() << "USAGE: " << ProgramName << " [options]";
 
     // Print out the positional options.
     Option *CAOpt = 0;   // The cl::ConsumeAfter option, if it exists...
@@ -1074,28 +1077,28 @@ public:
 
     for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
       if (PositionalOpts[i]->ArgStr[0])
-        cout << " --" << PositionalOpts[i]->ArgStr;
-      cout << " " << PositionalOpts[i]->HelpStr;
+        outs() << " --" << PositionalOpts[i]->ArgStr;
+      outs() << " " << PositionalOpts[i]->HelpStr;
     }
 
     // Print the consume after option info if it exists...
-    if (CAOpt) cout << " " << CAOpt->HelpStr;
+    if (CAOpt) outs() << " " << CAOpt->HelpStr;
 
-    cout << "\n\n";
+    outs() << "\n\n";
 
     // Compute the maximum argument length...
     MaxArgLen = 0;
     for (size_t i = 0, e = Opts.size(); i != e; ++i)
       MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
 
-    cout << "OPTIONS:\n";
+    outs() << "OPTIONS:\n";
     for (size_t i = 0, e = Opts.size(); i != e; ++i)
       Opts[i].second->printOptionInfo(MaxArgLen);
 
     // Print any extra help the user has declared.
     for (std::vector<const char *>::iterator I = MoreHelp->begin(),
           E = MoreHelp->end(); I != E; ++I)
-      cout << *I;
+      outs() << *I;
     MoreHelp->clear();
 
     // Halt the program since help information was printed
@@ -1120,37 +1123,64 @@ HHOp("help-hidden", cl::desc("Display all available options"),
 
 static void (*OverrideVersionPrinter)() = 0;
 
+static int TargetArraySortFn(const void *LHS, const void *RHS) {
+  typedef std::pair<const char *, const Target*> pair_ty;
+  return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
+}
+
 namespace {
 class VersionPrinter {
 public:
   void print() {
-        cout << "Low Level Virtual Machine (http://llvm.org/):\n";
-        cout << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
+    raw_ostream &OS = outs();
+    OS << "Low Level Virtual Machine (http://llvm.org/):\n"
+       << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
 #ifdef LLVM_VERSION_INFO
-        cout << LLVM_VERSION_INFO;
+    OS << LLVM_VERSION_INFO;
 #endif
-        cout << "\n  ";
+    OS << "\n  ";
 #ifndef __OPTIMIZE__
-        cout << "DEBUG build";
+    OS << "DEBUG build";
 #else
-        cout << "Optimized build";
+    OS << "Optimized build";
 #endif
 #ifndef NDEBUG
-        cout << " with assertions";
+    OS << " with assertions";
 #endif
-        cout << ".\n";
-        cout << "  Built " << __DATE__ << "(" << __TIME__ << ").\n";
+    OS << ".\n"
+       << "  Built " << __DATE__ << " (" << __TIME__ << ").\n"
+       << "  Host: " << sys::getHostTriple() << '\n'
+       << '\n'
+       << "  Registered Targets:\n";
+
+    std::vector<std::pair<const char *, const Target*> > Targets;
+    size_t Width = 0;
+    for (TargetRegistry::iterator it = TargetRegistry::begin(), 
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      Targets.push_back(std::make_pair(it->getName(), &*it));
+      Width = std::max(Width, strlen(Targets.back().first));
+    }
+    if (!Targets.empty())
+      qsort(&Targets[0], Targets.size(), sizeof(Targets[0]),
+            TargetArraySortFn);
+
+    for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
+      OS << "    " << Targets[i].first;
+      OS.indent(Width - strlen(Targets[i].first)) << " - "
+             << Targets[i].second->getShortDescription() << '\n';
+    }
+    if (Targets.empty())
+      OS << "    (none)\n";
   }
   void operator=(bool OptionWasSpecified) {
-    if (OptionWasSpecified) {
-      if (OverrideVersionPrinter == 0) {
-        print();
-        exit(1);
-      } else {
-        (*OverrideVersionPrinter)();
-        exit(1);
-      }
+    if (!OptionWasSpecified) return;
+    
+    if (OverrideVersionPrinter == 0) {
+      print();
+      exit(1);
     }
+    (*OverrideVersionPrinter)();
+    exit(1);
   }
 };
 } // End anonymous namespace
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index cb8c4b013c32..423e90d99352 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -23,12 +23,12 @@
 
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Instructions.h"
 using namespace llvm;
 
 /// Initialize a full (the default) or empty set for the specified type.
 ///
-ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) :
-  Lower(BitWidth, 0), Upper(BitWidth, 0) {
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
   if (Full)
     Lower = Upper = APInt::getMaxValue(BitWidth);
   else
@@ -37,16 +37,63 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) :
 
 /// Initialize a range to hold the single specified value.
 ///
-ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) { }
+ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) {}
 
 ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
   Lower(L), Upper(U) {
-  assert(L.getBitWidth() == U.getBitWidth() && 
+  assert(L.getBitWidth() == U.getBitWidth() &&
          "ConstantRange with unequal bit widths");
   assert((L != U || (L.isMaxValue() || L.isMinValue())) &&
          "Lower == Upper, but they aren't min or max value!");
 }
 
+ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
+                                            const ConstantRange &CR) {
+  uint32_t W = CR.getBitWidth();
+  switch (Pred) {
+    default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
+    case ICmpInst::ICMP_EQ:
+      return CR;
+    case ICmpInst::ICMP_NE:
+      if (CR.isSingleElement())
+        return ConstantRange(CR.getUpper(), CR.getLower());
+      return ConstantRange(W);
+    case ICmpInst::ICMP_ULT:
+      return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax());
+    case ICmpInst::ICMP_SLT:
+      return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax());
+    case ICmpInst::ICMP_ULE: {
+      APInt UMax(CR.getUnsignedMax());
+      if (UMax.isMaxValue())
+        return ConstantRange(W);
+      return ConstantRange(APInt::getMinValue(W), UMax + 1);
+    }
+    case ICmpInst::ICMP_SLE: {
+      APInt SMax(CR.getSignedMax());
+      if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue())
+        return ConstantRange(W);
+      return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+    }
+    case ICmpInst::ICMP_UGT:
+      return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W));
+    case ICmpInst::ICMP_SGT:
+      return ConstantRange(CR.getSignedMin() + 1,
+                           APInt::getSignedMinValue(W));
+    case ICmpInst::ICMP_UGE: {
+      APInt UMin(CR.getUnsignedMin());
+      if (UMin.isMinValue())
+        return ConstantRange(W);
+      return ConstantRange(UMin, APInt::getNullValue(W));
+    }
+    case ICmpInst::ICMP_SGE: {
+      APInt SMin(CR.getSignedMin());
+      if (SMin.isMinSignedValue())
+        return ConstantRange(W);
+      return ConstantRange(SMin, APInt::getSignedMinValue(W));
+    }
+  }
+}
+
 /// isFullSet - Return true if this set contains all of the elements possible
 /// for this data-type
 bool ConstantRange::isFullSet() const {
@@ -112,14 +159,10 @@ APInt ConstantRange::getSignedMax() const {
     else
       return SignedMax;
   } else {
-    if ((getUpper() - 1).slt(getLower())) {
-      if (getLower() != SignedMax)
-        return SignedMax;
-      else
-        return getUpper() - 1;
-    } else {
+    if (getLower().isNegative() == getUpper().isNegative())
+      return SignedMax;
+    else
       return getUpper() - 1;
-    }
   }
 }
 
@@ -157,6 +200,30 @@ bool ConstantRange::contains(const APInt &V) const {
     return Lower.ule(V) || V.ult(Upper);
 }
 
+/// contains - Return true if the argument is a subset of this range.
+/// Two equal set contain each other. The empty set is considered to be
+/// contained by all other sets.
+///
+bool ConstantRange::contains(const ConstantRange &Other) const {
+  if (isFullSet()) return true;
+  if (Other.isFullSet()) return false;
+  if (Other.isEmptySet()) return true;
+  if (isEmptySet()) return false;
+
+  if (!isWrappedSet()) {
+    if (Other.isWrappedSet())
+      return false;
+
+    return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
+  }
+
+  if (!Other.isWrappedSet())
+    return Other.getUpper().ule(Upper) ||
+           Lower.ule(Other.getLower());
+
+  return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
+}
+
 /// subtract - Subtract the specified constant from the endpoints of this
 /// constant range.
 ConstantRange ConstantRange::subtract(const APInt &Val) const {
@@ -208,59 +275,20 @@ ConstantRange::intersect1Wrapped(const ConstantRange &LHS,
 }
 
 /// intersectWith - Return the range that results from the intersection of this
-/// range with another range.
-///
+/// range with another range.  The resultant range is guaranteed to include all
+/// elements contained in both input ranges, and to have the smallest possible
+/// set size that does so.  Because there may be two intersections with the
+/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A).
 ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
   assert(getBitWidth() == CR.getBitWidth() && 
          "ConstantRange types don't agree!");
-  // Handle common special cases
-  if (isEmptySet() || CR.isFullSet())  
-    return *this;
-  if (isFullSet()  || CR.isEmptySet()) 
-    return CR;
-
-  if (!isWrappedSet()) {
-    if (!CR.isWrappedSet()) {
-      using namespace APIntOps;
-      APInt L = umax(Lower, CR.Lower);
-      APInt U = umin(Upper, CR.Upper);
-
-      if (L.ult(U)) // If range isn't empty...
-        return ConstantRange(L, U);
-      else
-        return ConstantRange(getBitWidth(), false);// Otherwise, empty set
-    } else
-      return intersect1Wrapped(CR, *this);
-  } else {   // We know "this" is wrapped...
-    if (!CR.isWrappedSet())
-      return intersect1Wrapped(*this, CR);
-    else {
-      // Both ranges are wrapped...
-      using namespace APIntOps;
-      APInt L = umax(Lower, CR.Lower);
-      APInt U = umin(Upper, CR.Upper);
-      return ConstantRange(L, U);
-    }
-  }
-  return *this;
-}
-
-/// maximalIntersectWith - Return the range that results from the intersection
-/// of this range with another range.  The resultant range is guaranteed to
-/// include all elements contained in both input ranges, and to have the
-/// smallest possible set size that does so.  Because there may be two
-/// intersections with the same set size, A.maximalIntersectWith(B) might not
-/// be equal to B.maximalIntersect(A).
-ConstantRange ConstantRange::maximalIntersectWith(const ConstantRange &CR) const {
-  assert(getBitWidth() == CR.getBitWidth() && 
-         "ConstantRange types don't agree!");
 
   // Handle common cases.
   if (   isEmptySet() || CR.isFullSet()) return *this;
   if (CR.isEmptySet() ||    isFullSet()) return CR;
 
   if (!isWrappedSet() && CR.isWrappedSet())
-    return CR.maximalIntersectWith(*this);
+    return CR.intersectWith(*this);
 
   if (!isWrappedSet() && !CR.isWrappedSet()) {
     if (Lower.ult(CR.Lower)) {
@@ -343,69 +371,74 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
 
   if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
 
-  APInt L = Lower, U = Upper;
-
   if (!isWrappedSet() && !CR.isWrappedSet()) {
+    if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
+      // If the two ranges are disjoint, find the smaller gap and bridge it.
+      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+      if (d1.ult(d2))
+        return ConstantRange(Lower, CR.Upper);
+      else
+        return ConstantRange(CR.Lower, Upper);
+    }
+
+    APInt L = Lower, U = Upper;
     if (CR.Lower.ult(L))
       L = CR.Lower;
-
-    if (CR.Upper.ugt(U))
+    if ((CR.Upper - 1).ugt(U - 1))
       U = CR.Upper;
+
+    if (L == 0 && U == 0)
+      return ConstantRange(getBitWidth());
+
+    return ConstantRange(L, U);
   }
 
-  if (isWrappedSet() && !CR.isWrappedSet()) {
-    if ((CR.Lower.ult(Upper) && CR.Upper.ult(Upper)) ||
-        (CR.Lower.ugt(Lower) && CR.Upper.ugt(Lower))) {
+  if (!CR.isWrappedSet()) {
+    // ------U   L-----  and  ------U   L----- : this
+    //   L--U                            L--U  : CR
+    if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
       return *this;
-    }
 
-    if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) {
+    // ------U   L----- : this
+    //    L---------U   : CR
+    if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
       return ConstantRange(getBitWidth());
-    }
-
-    if (CR.Lower.ule(Upper) && CR.Upper.ule(Lower)) {
-      APInt d1 = CR.Upper - Upper, d2 = Lower - CR.Upper;
-      if (d1.ult(d2)) {
-        U = CR.Upper;
-      } else {
-        L = CR.Upper;
-      }
-    }
 
-    if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower)) {
+    // ----U       L---- : this
+    //       L---U       : CR
+    //    <d1>  <d2>
+    if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
       APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
-      if (d1.ult(d2)) {
-        U = CR.Lower + 1;
-      } else {
-        L = CR.Upper - 1;
-      }
+      if (d1.ult(d2))
+        return ConstantRange(Lower, CR.Upper);
+      else
+        return ConstantRange(CR.Lower, Upper);
     }
 
-    if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) {
-      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Lower;
+    // ----U     L----- : this
+    //        L----U    : CR
+    if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+      return ConstantRange(CR.Lower, Upper);
 
-      if (d1.ult(d2)) {
-        U = CR.Lower + 1;
-      } else {
-        L = CR.Lower;
-      }
-    }
+    // ------U    L---- : this
+    //    L-----U       : CR
+    if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower))
+      return ConstantRange(Lower, CR.Upper);
   }
 
-  if (isWrappedSet() && CR.isWrappedSet()) {
-    if (Lower.ult(CR.Upper) || CR.Lower.ult(Upper))
-      return ConstantRange(getBitWidth());
+  assert(isWrappedSet() && CR.isWrappedSet() &&
+         "ConstantRange::unionWith missed wrapped union unwrapped case");
 
-    if (CR.Upper.ugt(U)) {
-      U = CR.Upper;
-    }
-
-    if (CR.Lower.ult(L)) {
-      L = CR.Lower;
-    }
+  // ------U    L----  and  ------U    L---- : this
+  // -U  L-----------  and  ------------U  L : CR
+  if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
+    return ConstantRange(getBitWidth());
 
-    if (L == U) return ConstantRange(getBitWidth());
-  }
+  APInt L = Lower, U = Upper;
+  if (CR.Upper.ugt(U))
+    U = CR.Upper;
+  if (CR.Lower.ult(L))
+    L = CR.Lower;
 
   return ConstantRange(L, U);
 }
@@ -435,7 +468,7 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
   assert(SrcTySize < DstTySize && "Not a value extension");
   if (isFullSet()) {
     return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
-                         APInt::getLowBitsSet(DstTySize, SrcTySize-1));
+                         APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
   }
 
   APInt L = Lower; L.sext(DstTySize);
@@ -459,6 +492,99 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   return ConstantRange(L, U);
 }
 
+ConstantRange
+ConstantRange::add(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+  APInt NewLower = getLower() + Other.getLower();
+  APInt NewUpper = getUpper() + Other.getUpper() - 1;
+  if (NewLower == NewUpper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+    // We've wrapped, therefore, full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return X;
+}
+
+ConstantRange
+ConstantRange::multiply(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
+  APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
+  APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
+  APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2);
+
+  ConstantRange Result_zext = ConstantRange(this_min * Other_min,
+                                            this_max * Other_max + 1);
+  return Result_zext.truncate(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::smax(const ConstantRange &Other) const {
+  // X smax Y is: range(smax(X_smin, Y_smin),
+  //                    smax(X_smax, Y_smax))
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
+  APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
+  if (NewU == NewL)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::umax(const ConstantRange &Other) const {
+  // X umax Y is: range(umax(X_umin, Y_umin),
+  //                    umax(X_umax, Y_umax))
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+  APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
+  if (NewU == NewL)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::udiv(const ConstantRange &RHS) const {
+  if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (RHS.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
+
+  APInt RHS_umin = RHS.getUnsignedMin();
+  if (RHS_umin == 0) {
+    // We want the lowest value in RHS excluding zero. Usually that would be 1
+    // except for a range in the form of [X, 1) in which case it would be X.
+    if (RHS.getUpper() == 1)
+      RHS_umin = RHS.getLower();
+    else
+      RHS_umin = APInt(getBitWidth(), 1);
+  }
+
+  APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+
+  // If the LHS is Full and the RHS is a wrapped interval containing 1 then
+  // this could occur.
+  if (Lower == Upper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return ConstantRange(Lower, Upper);
+}
+
 /// print - Print out the bounds to a stream...
 ///
 void ConstantRange::print(raw_ostream &OS) const {
@@ -470,3 +596,5 @@ void ConstantRange::print(raw_ostream &OS) const {
 void ConstantRange::dump() const {
   print(errs());
 }
+
+
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index a09cddf9022a..71ff411def2b 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -27,51 +27,37 @@
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
+// All Debug.h functionality is a no-op in NDEBUG mode.
+#ifndef NDEBUG
 bool llvm::DebugFlag;  // DebugFlag - Exported boolean set by the -debug option
 
-namespace {
-#ifndef NDEBUG
-  // -debug - Command line option to enable the DEBUG statements in the passes.
-  // This flag may only be enabled in debug builds.
-  static cl::opt<bool, true>
-  Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
-        cl::location(DebugFlag));
+// -debug - Command line option to enable the DEBUG statements in the passes.
+// This flag may only be enabled in debug builds.
+static cl::opt<bool, true>
+Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
+      cl::location(DebugFlag));
 
-  static std::string CurrentDebugType;
-  static struct DebugOnlyOpt {
-    void operator=(const std::string &Val) const {
-      DebugFlag |= !Val.empty();
-      CurrentDebugType = Val;
-    }
-  } DebugOnlyOptLoc;
+static std::string CurrentDebugType;
+static struct DebugOnlyOpt {
+  void operator=(const std::string &Val) const {
+    DebugFlag |= !Val.empty();
+    CurrentDebugType = Val;
+  }
+} DebugOnlyOptLoc;
 
-  static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
-  DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
-            cl::Hidden, cl::value_desc("debug string"),
-            cl::location(DebugOnlyOptLoc), cl::ValueRequired);
-#endif
-}
+static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+          cl::Hidden, cl::value_desc("debug string"),
+          cl::location(DebugOnlyOptLoc), cl::ValueRequired);
 
 // isCurrentDebugType - Return true if the specified string is the debug type
 // specified on the command line, or if none was specified on the command line
 // with the -debug-only=X option.
 //
 bool llvm::isCurrentDebugType(const char *DebugType) {
-#ifndef NDEBUG
   return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+}
 #else
-  return false;
+// Avoid "has no symbols" warning.
+int Debug_dummy = 0;
 #endif
-}
-
-// getErrorOutputStream - Returns the error output stream (std::cerr). This
-// places the std::c* I/O streams into one .cpp file and relieves the whole
-// program from having to have hundreds of static c'tor/d'tors for them.
-// 
-OStream &llvm::getErrorOutputStream(const char *DebugType) {
-  static OStream cnoout(0);
-  if (DebugFlag && isCurrentDebugType(DebugType))
-    return cerr;
-  else
-    return cnoout;
-}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index fa99035b679f..8b688cae2a63 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #include <cassert>
 
@@ -83,7 +84,7 @@ const char *TagString(unsigned Tag) {
     case DW_TAG_lo_user:                   return "DW_TAG_lo_user";
     case DW_TAG_hi_user:                   return "DW_TAG_hi_user";
   }
-  assert(0 && "Unknown Dwarf Tag");
+  llvm_unreachable("Unknown Dwarf Tag");
   return "";
 }
 
@@ -94,7 +95,7 @@ const char *ChildrenString(unsigned Children) {
     case DW_CHILDREN_no:                   return "CHILDREN_no";
     case DW_CHILDREN_yes:                  return "CHILDREN_yes";
   }
-  assert(0 && "Unknown Dwarf ChildrenFlag");
+  llvm_unreachable("Unknown Dwarf ChildrenFlag");
   return "";
 }
 
@@ -205,7 +206,7 @@ const char *AttributeString(unsigned Attribute) {
     case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
     case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
   }
-  assert(0 && "Unknown Dwarf Attribute");
+  llvm_unreachable("Unknown Dwarf Attribute");
   return "";
 }
 
@@ -235,7 +236,7 @@ const char *FormEncodingString(unsigned Encoding) {
     case DW_FORM_ref_udata:                return "FORM_ref_udata";
     case DW_FORM_indirect:                 return "FORM_indirect";
   }
-  assert(0 && "Unknown Dwarf Form Encoding");
+  llvm_unreachable("Unknown Dwarf Form Encoding");
   return "";
 }
 
@@ -310,7 +311,7 @@ const char *OperationEncodingString(unsigned Encoding) {
     case DW_OP_lo_user:                    return "OP_lo_user";
     case DW_OP_hi_user:                    return "OP_hi_user";
   }
-  assert(0 && "Unknown Dwarf Operation Encoding");
+  llvm_unreachable("Unknown Dwarf Operation Encoding");
   return "";
 }
 
@@ -336,7 +337,7 @@ const char *AttributeEncodingString(unsigned Encoding) {
     case DW_ATE_lo_user:                   return "ATE_lo_user";
     case DW_ATE_hi_user:                   return "ATE_hi_user";
   }
-  assert(0 && "Unknown Dwarf Attribute Encoding");
+  llvm_unreachable("Unknown Dwarf Attribute Encoding");
   return "";
 }
 
@@ -350,7 +351,7 @@ const char *DecimalSignString(unsigned Sign) {
     case DW_DS_leading_separate:           return "DS_leading_separate";
     case DW_DS_trailing_separate:          return "DS_trailing_separate";
   }
-  assert(0 && "Unknown Dwarf Decimal Sign Attribute");
+  llvm_unreachable("Unknown Dwarf Decimal Sign Attribute");
   return "";
 }
 
@@ -364,7 +365,7 @@ const char *EndianityString(unsigned Endian) {
     case DW_END_lo_user:                   return "END_lo_user";
     case DW_END_hi_user:                   return "END_hi_user";
   }
-  assert(0 && "Unknown Dwarf Endianity");
+  llvm_unreachable("Unknown Dwarf Endianity");
   return "";
 }
 
@@ -377,7 +378,7 @@ const char *AccessibilityString(unsigned Access) {
     case DW_ACCESS_protected:              return "ACCESS_protected";
     case DW_ACCESS_private:                return "ACCESS_private";
   }
-  assert(0 && "Unknown Dwarf Accessibility");
+  llvm_unreachable("Unknown Dwarf Accessibility");
   return "";
 }
 
@@ -389,7 +390,7 @@ const char *VisibilityString(unsigned Visibility) {
     case DW_VIS_exported:                  return "VIS_exported";
     case DW_VIS_qualified:                 return "VIS_qualified";
   }
-  assert(0 && "Unknown Dwarf Visibility");
+  llvm_unreachable("Unknown Dwarf Visibility");
   return "";
 }
 
@@ -401,7 +402,7 @@ const char *VirtualityString(unsigned Virtuality) {
     case DW_VIRTUALITY_virtual:            return "VIRTUALITY_virtual";
     case DW_VIRTUALITY_pure_virtual:       return "VIRTUALITY_pure_virtual";
   }
-  assert(0 && "Unknown Dwarf Virtuality");
+  llvm_unreachable("Unknown Dwarf Virtuality");
   return "";
 }
 
@@ -431,7 +432,7 @@ const char *LanguageString(unsigned Language) {
     case DW_LANG_lo_user:                  return "LANG_lo_user";
     case DW_LANG_hi_user:                  return "LANG_hi_user";
   }
-  assert(0 && "Unknown Dwarf Language");
+  llvm_unreachable("Unknown Dwarf Language");
   return "";
 }
 
@@ -444,7 +445,7 @@ const char *CaseString(unsigned Case) {
     case DW_ID_down_case:                  return "ID_down_case";
     case DW_ID_case_insensitive:           return "ID_case_insensitive";
   }
-  assert(0 && "Unknown Dwarf Identifier Case");
+  llvm_unreachable("Unknown Dwarf Identifier Case");
   return "";
 }
 
@@ -458,7 +459,7 @@ const char *ConventionString(unsigned Convention) {
     case DW_CC_lo_user:                    return "CC_lo_user";
     case DW_CC_hi_user:                    return "CC_hi_user";
   }
-  assert(0 && "Unknown Dwarf Calling Convention");
+  llvm_unreachable("Unknown Dwarf Calling Convention");
   return "";
 }
 
@@ -471,7 +472,7 @@ const char *InlineCodeString(unsigned Code) {
     case DW_INL_declared_not_inlined:      return "INL_declared_not_inlined";
     case DW_INL_declared_inlined:          return "INL_declared_inlined";
   }
-  assert(0 && "Unknown Dwarf Inline Code");
+  llvm_unreachable("Unknown Dwarf Inline Code");
   return "";
 }
 
@@ -482,7 +483,7 @@ const char *ArrayOrderString(unsigned Order) {
     case DW_ORD_row_major:                 return "ORD_row_major";
     case DW_ORD_col_major:                 return "ORD_col_major";
   }
-  assert(0 && "Unknown Dwarf Array Order");
+  llvm_unreachable("Unknown Dwarf Array Order");
   return "";
 }
 
@@ -493,7 +494,7 @@ const char *DiscriminantString(unsigned Discriminant) {
     case DW_DSC_label:                     return "DSC_label";
     case DW_DSC_range:                     return "DSC_range";
   }
-  assert(0 && "Unknown Dwarf Discriminant Descriptor");
+  llvm_unreachable("Unknown Dwarf Discriminant Descriptor");
   return "";
 }
 
@@ -514,7 +515,7 @@ const char *LNStandardString(unsigned Standard) {
     case DW_LNS_set_epilogue_begin:        return "LNS_set_epilogue_begin";
     case DW_LNS_set_isa:                   return "LNS_set_isa";
   }
-  assert(0 && "Unknown Dwarf Line Number Standard");
+  llvm_unreachable("Unknown Dwarf Line Number Standard");
   return "";
 }
 
@@ -529,7 +530,7 @@ const char *LNExtendedString(unsigned Encoding) {
     case DW_LNE_lo_user:                   return "LNE_lo_user";
     case DW_LNE_hi_user:                   return "LNE_hi_user";
   }
-  assert(0 && "Unknown Dwarf Line Number Extended Opcode Encoding");
+  llvm_unreachable("Unknown Dwarf Line Number Extended Opcode Encoding");
   return "";
 }
 
@@ -544,7 +545,7 @@ const char *MacinfoString(unsigned Encoding) {
     case DW_MACINFO_end_file:              return "MACINFO_end_file";
     case DW_MACINFO_vendor_ext:            return "MACINFO_vendor_ext";
   }
-  assert(0 && "Unknown Dwarf Macinfo Type Encodings");
+  llvm_unreachable("Unknown Dwarf Macinfo Type Encodings");
   return "";
 }
 
@@ -580,7 +581,7 @@ const char *CallFrameString(unsigned Encoding) {
     case DW_CFA_lo_user:                   return "CFA_lo_user";
     case DW_CFA_hi_user:                   return "CFA_hi_user";
   }
-  assert(0 && "Unknown Dwarf Call Frame Instruction Encodings");
+  llvm_unreachable("Unknown Dwarf Call Frame Instruction Encodings");
   return "";
 }
 
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
new file mode 100644
index 000000000000..dff4f030fefe
--- /dev/null
+++ b/lib/Support/ErrorHandling.cpp
@@ -0,0 +1,73 @@
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API for error handling, it supersedes cerr+abort(), and 
+// cerr+exit() style error handling.
+// Callbacks can be registered for these errors through this API.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Threading.h"
+#include <cassert>
+#include <cstdlib>
+
+using namespace llvm;
+using namespace std;
+
+static llvm_error_handler_t ErrorHandler = 0;
+static void *ErrorHandlerUserData = 0;
+
+namespace llvm {
+void llvm_install_error_handler(llvm_error_handler_t handler,
+                                void *user_data) {
+  assert(!llvm_is_multithreaded() &&
+         "Cannot register error handlers after starting multithreaded mode!\n");
+  assert(!ErrorHandler && "Error handler already registered!\n");
+  ErrorHandler = handler;
+  ErrorHandlerUserData = user_data;
+}
+
+void llvm_remove_error_handler() {
+  ErrorHandler = 0;
+}
+
+void llvm_report_error(const char *reason) {
+  llvm_report_error(Twine(reason));
+}
+
+void llvm_report_error(const std::string &reason) {
+  llvm_report_error(Twine(reason));
+}
+
+void llvm_report_error(const Twine &reason) {
+  if (!ErrorHandler) {
+    errs() << "LLVM ERROR: " << reason << "\n";
+  } else {
+    ErrorHandler(ErrorHandlerUserData, reason.str());
+  }
+  exit(1);
+}
+
+void llvm_unreachable_internal(const char *msg, const char *file, 
+                               unsigned line) {
+  // This code intentionally doesn't call the ErrorHandler callback, because
+  // llvm_unreachable is intended to be used to indicate "impossible"
+  // situations, and not legitimate runtime errors.
+  if (msg)
+    errs() << msg << "\n";
+  errs() << "UNREACHABLE executed";
+  if (file)
+    errs() << " at " << file << ":" << line;
+  errs() << "!\n";
+  abort();
+}
+}
+
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 41c730e3e1e6..954dc77dff1e 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cstring>
@@ -50,7 +51,7 @@ void FoldingSetNodeID::AddInteger(unsigned long I) {
   else if (sizeof(long) == sizeof(long long)) {
     AddInteger((unsigned long long)I);
   } else {
-    assert(0 && "unexpected sizeof(long)");
+    llvm_unreachable("unexpected sizeof(long)");
   }
 }
 void FoldingSetNodeID::AddInteger(long long I) {
@@ -62,14 +63,14 @@ void FoldingSetNodeID::AddInteger(unsigned long long I) {
     Bits.push_back(unsigned(I >> 32));
 }
 
-void FoldingSetNodeID::AddString(const char *String, const char *End) {
-  unsigned Size =  static_cast<unsigned>(End - String);
+void FoldingSetNodeID::AddString(StringRef String) {
+  unsigned Size =  String.size();
   Bits.push_back(Size);
   if (!Size) return;
 
   unsigned Units = Size / 4;
   unsigned Pos = 0;
-  const unsigned *Base = (const unsigned *)String;
+  const unsigned *Base = (const unsigned*) String.data();
   
   // If the string is aligned do a bulk transfer.
   if (!((intptr_t)Base & 3)) {
@@ -99,14 +100,6 @@ void FoldingSetNodeID::AddString(const char *String, const char *End) {
   Bits.push_back(V);
 }
 
-void FoldingSetNodeID::AddString(const char *String) {
-  AddString(String, String + strlen(String));
-}
-
-void FoldingSetNodeID::AddString(const std::string &String) {
-  AddString(&*String.begin(), &*String.end());
-}
-
 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 
 /// lookup the node in the FoldingSetImpl.
 unsigned FoldingSetNodeID::ComputeHash() const {
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
new file mode 100644
index 000000000000..70f2cfa6ae88
--- /dev/null
+++ b/lib/Support/FormattedStream.cpp
@@ -0,0 +1,93 @@
+//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of formatted_raw_ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+/// CountColumns - Examine the given char sequence and figure out which
+/// column we end up in after output.
+///
+static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) {
+  // Keep track of the current column by scanning the string for
+  // special characters
+
+  for (const char *End = Ptr + Size; Ptr != End; ++Ptr) {
+    ++Column;
+    if (*Ptr == '\n' || *Ptr == '\r')
+      Column = 0;
+    else if (*Ptr == '\t')
+      // Assumes tab stop = 8 characters.
+      Column += (8 - (Column & 0x7)) & 0x7;
+  }
+
+  return Column;
+}
+
+/// ComputeColumn - Examine the current output and figure out which
+/// column we end up in after output.
+void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
+  // If our previous scan pointer is inside the buffer, assume we already
+  // scanned those bytes. This depends on raw_ostream to not change our buffer
+  // in unexpected ways.
+  if (Ptr <= Scanned && Scanned <= Ptr + Size) {
+    // Scan all characters added since our last scan to determine the new
+    // column.
+    ColumnScanned = CountColumns(ColumnScanned, Scanned, 
+                                 Size - (Scanned - Ptr));
+  } else
+    ColumnScanned = CountColumns(ColumnScanned, Ptr, Size);
+
+  // Update the scanning pointer.
+  Scanned = Ptr + Size;
+}
+
+/// PadToColumn - Align the output to some column number.
+///
+/// \param NewCol - The column to move to.
+/// \param MinPad - The minimum space to give after the most recent
+/// I/O, even if the current column + minpad > newcol.
+///
+void formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
+  // Figure out what's in the buffer and add it to the column count.
+  ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
+
+  // Output spaces until we reach the desired column.
+  indent(std::max(int(NewCol - ColumnScanned), 1));
+}
+
+void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+  // Figure out what's in the buffer and add it to the column count.
+  ComputeColumn(Ptr, Size);
+
+  // Write the data to the underlying stream (which is unbuffered, so
+  // the data will be immediately written out).
+  TheStream->write(Ptr, Size);
+
+  // Reset the scanning pointer.
+  Scanned = 0;
+}
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output.  Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &llvm::fouts() {
+  static formatted_raw_ostream S(outs());
+  return S;
+}
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error.  Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &llvm::ferrs() {
+  static formatted_raw_ostream S(errs());
+  return S;
+}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index c359dfb82ea7..c8bca6ef887c 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -12,13 +12,47 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/System/Path.h"
 #include "llvm/System/Program.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
-void llvm::DisplayGraph(const sys::Path &Filename) {
+std::string llvm::DOT::EscapeString(const std::string &Label) {
+  std::string Str(Label);
+  for (unsigned i = 0; i != Str.length(); ++i)
+  switch (Str[i]) {
+    case '\n':
+      Str.insert(Str.begin()+i, '\\');  // Escape character...
+      ++i;
+      Str[i] = 'n';
+      break;
+    case '\t':
+      Str.insert(Str.begin()+i, ' ');  // Convert to two spaces
+      ++i;
+      Str[i] = ' ';
+      break;
+    case '\\':
+      if (i+1 != Str.length())
+        switch (Str[i+1]) {
+          case 'l': continue; // don't disturb \l
+          case '|': case '{': case '}':
+            Str.erase(Str.begin()+i); continue;
+          default: break;
+        }
+    case '{': case '}':
+    case '<': case '>':
+    case '|': case '"':
+      Str.insert(Str.begin()+i, '\\');  // Escape character...
+      ++i;  // don't infinite loop
+      break;
+  }
+  return Str;
+}
+
+
+
+void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
+                        GraphProgram::Name program) {
   std::string ErrMsg;
 #if HAVE_GRAPHVIZ
   sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
@@ -28,18 +62,61 @@ void llvm::DisplayGraph(const sys::Path &Filename) {
   args.push_back(Filename.c_str());
   args.push_back(0);
   
-  cerr << "Running 'Graphviz' program... " << std::flush;
-  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
-    cerr << "Error viewing graph: " << ErrMsg << "\n";
-  }
-#elif (HAVE_GV && HAVE_DOT)
+  errs() << "Running 'Graphviz' program... ";
+  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg))
+    errs() << "Error viewing graph " << Filename.str() << ": " << ErrMsg
+           << "\n";
+  else
+    Filename.eraseFromDisk();
+
+#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
+                   HAVE_TWOPI || HAVE_CIRCO))
   sys::Path PSFilename = Filename;
   PSFilename.appendSuffix("ps");
-  
-  sys::Path dot(LLVM_PATH_DOT);
+
+  sys::Path prog;
+
+  // Set default grapher
+#if HAVE_CIRCO
+  prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+#if HAVE_TWOPI
+  prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if HAVE_NEATO
+  prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if HAVE_FDP
+  prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if HAVE_DOT
+  prog = sys::Path(LLVM_PATH_DOT);
+#endif
+
+  // Find which program the user wants
+#if HAVE_DOT
+  if (program == GraphProgram::DOT)
+    prog = sys::Path(LLVM_PATH_DOT);
+#endif
+#if (HAVE_FDP)
+  if (program == GraphProgram::FDP)
+    prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if (HAVE_NEATO)
+  if (program == GraphProgram::NEATO)
+    prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if (HAVE_TWOPI)
+  if (program == GraphProgram::TWOPI)
+    prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if (HAVE_CIRCO)
+  if (program == GraphProgram::CIRCO)
+    prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
 
   std::vector<const char*> args;
-  args.push_back(dot.c_str());
+  args.push_back(prog.c_str());
   args.push_back("-Tps");
   args.push_back("-Nfontname=Courier");
   args.push_back("-Gsize=7.5,10");
@@ -48,11 +125,13 @@ void llvm::DisplayGraph(const sys::Path &Filename) {
   args.push_back(PSFilename.c_str());
   args.push_back(0);
   
-  cerr << "Running 'dot' program... " << std::flush;
-  if (sys::Program::ExecuteAndWait(dot, &args[0],0,0,0,0,&ErrMsg)) {
-    cerr << "Error viewing graph: '" << ErrMsg << "\n";
+  errs() << "Running '" << prog.str() << "' program... ";
+
+  if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
+     errs() << "Error viewing graph " << Filename.str() << ": '"
+            << ErrMsg << "\n";
   } else {
-    cerr << " done. \n";
+    errs() << " done. \n";
 
     sys::Path gv(LLVM_PATH_GV);
     args.clear();
@@ -62,11 +141,18 @@ void llvm::DisplayGraph(const sys::Path &Filename) {
     args.push_back(0);
     
     ErrMsg.clear();
-    if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) {
-      cerr << "Error viewing graph: " << ErrMsg << "\n";
+    if (wait) {
+       if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
+          errs() << "Error viewing graph: " << ErrMsg << "\n";
+       Filename.eraseFromDisk();
+       PSFilename.eraseFromDisk();
+    }
+    else {
+       sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
+       errs() << "Remember to erase graph files: " << Filename.str() << " "
+              << PSFilename.str() << "\n";
     }
   }
-  PSFilename.eraseFromDisk();
 #elif HAVE_DOTTY
   sys::Path dotty(LLVM_PATH_DOTTY);
 
@@ -75,15 +161,15 @@ void llvm::DisplayGraph(const sys::Path &Filename) {
   args.push_back(Filename.c_str());
   args.push_back(0);
   
-  cerr << "Running 'dotty' program... " << std::flush;
+  errs() << "Running 'dotty' program... ";
   if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
-    cerr << "Error viewing graph: " << ErrMsg << "\n";
+     errs() << "Error viewing graph " << Filename.str() << ": "
+            << ErrMsg << "\n";
   } else {
 #ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns
     return;
 #endif
+    Filename.eraseFromDisk();
   }
 #endif
-  
-  Filename.eraseFromDisk();
 }
diff --git a/lib/Support/MemoryObject.cpp b/lib/Support/MemoryObject.cpp
new file mode 100644
index 000000000000..91e3ecd23a2e
--- /dev/null
+++ b/lib/Support/MemoryObject.cpp
@@ -0,0 +1,34 @@
+//===- MemoryObject.cpp - Abstract memory interface -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryObject.h"
+using namespace llvm;
+  
+MemoryObject::~MemoryObject() {
+}
+
+int MemoryObject::readBytes(uint64_t address,
+                            uint64_t size,
+                            uint8_t* buf,
+                            uint64_t* copied) const {
+  uint64_t current = address;
+  uint64_t limit = getBase() + getExtent();
+  
+  while (current - address < size && current < limit) {
+    if (readByte(current, &buf[(current - address)]))
+      return -1;
+    
+    current++;
+  }
+  
+  if (copied)
+    *copied = current - address;
+  
+  return 0;
+}
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
index ef32af4b3f38..36caecffeede 100644
--- a/lib/Support/PluginLoader.cpp
+++ b/lib/Support/PluginLoader.cpp
@@ -14,10 +14,9 @@
 #define DONT_GET_PLUGIN_LOADER_OPTION
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/System/Mutex.h"
-#include <ostream>
 #include <vector>
 using namespace llvm;
 
@@ -25,23 +24,23 @@ static ManagedStatic<std::vector<std::string> > Plugins;
 static ManagedStatic<sys::SmartMutex<true> > PluginsLock;
 
 void PluginLoader::operator=(const std::string &Filename) {
-  sys::SmartScopedLock<true> Lock(&*PluginsLock);
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
   std::string Error;
   if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) {
-    cerr << "Error opening '" << Filename << "': " << Error
-         << "\n  -load request ignored.\n";
+    errs() << "Error opening '" << Filename << "': " << Error
+           << "\n  -load request ignored.\n";
   } else {
     Plugins->push_back(Filename);
   }
 }
 
 unsigned PluginLoader::getNumPlugins() {
-  sys::SmartScopedLock<true> Lock(&*PluginsLock);
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
   return Plugins.isConstructed() ? Plugins->size() : 0;
 }
 
 std::string &PluginLoader::getPlugin(unsigned num) {
-  sys::SmartScopedLock<true> Lock(&*PluginsLock);
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
   assert(Plugins.isConstructed() && num < Plugins->size() &&
          "Asking for an out of bounds plugin");
   return (*Plugins)[num];
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 14290a1284fe..68b41a7f0942 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -19,6 +19,10 @@
 #include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
+namespace llvm {
+  bool DisablePrettyStackTrace = false;
+}
+
 // FIXME: This should be thread local when llvm supports threads.
 static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
 
@@ -67,15 +71,16 @@ static void CrashHandler(void *Cookie) {
   }
   
   if (!TmpStr.empty()) {
-    __crashreporter_info__ = strdup(TmpStr.c_str());
-    errs() << __crashreporter_info__;
+    __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
+    errs() << TmpStr.str();
   }
   
 #endif
 }
 
 static bool RegisterCrashPrinter() {
-  sys::AddSignalHandler(CrashHandler, 0);
+  if (!DisablePrettyStackTrace)
+    sys::AddSignalHandler(CrashHandler, 0);
   return false;
 }
 
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
new file mode 100644
index 000000000000..618ca0524a04
--- /dev/null
+++ b/lib/Support/Regex.cpp
@@ -0,0 +1,92 @@
+//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "regex_impl.h"
+#include <string>
+using namespace llvm;
+
+Regex::Regex(const StringRef &regex, unsigned Flags) {
+  unsigned flags = 0;
+  preg = new llvm_regex();
+  preg->re_endp = regex.end();
+  if (Flags & IgnoreCase) 
+    flags |= REG_ICASE;
+  if (Flags & Newline)
+    flags |= REG_NEWLINE;
+  error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+}
+
+Regex::~Regex() {
+  llvm_regfree(preg);
+  delete preg;
+}
+
+bool Regex::isValid(std::string &Error) {
+  if (!error)
+    return true;
+  
+  size_t len = llvm_regerror(error, preg, NULL, 0);
+  
+  Error.resize(len);
+  llvm_regerror(error, preg, &Error[0], len);
+  return false;
+}
+
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+  return preg->re_nsub;
+}
+
+bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
+  unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+
+  // pmatch needs to have at least one element.
+  SmallVector<llvm_regmatch_t, 8> pm;
+  pm.resize(nmatch > 0 ? nmatch : 1);
+  pm[0].rm_so = 0;
+  pm[0].rm_eo = String.size();
+
+  int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+
+  if (rc == REG_NOMATCH)
+    return false;
+  if (rc != 0) {
+    // regexec can fail due to invalid pattern or running out of memory.
+    error = rc;
+    return false;
+  }
+
+  // There was a match.
+
+  if (Matches) { // match position requested
+    Matches->clear();
+    
+    for (unsigned i = 0; i != nmatch; ++i) {
+      if (pm[i].rm_so == -1) {
+        // this group didn't match
+        Matches->push_back(StringRef());
+        continue;
+      }
+      assert(pm[i].rm_eo > pm[i].rm_so);
+      Matches->push_back(StringRef(String.data()+pm[i].rm_so,
+                                   pm[i].rm_eo-pm[i].rm_so));
+    }
+  }
+
+  return true;
+}
diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp
index d5ffff9d937f..b4e9430e5fdf 100644
--- a/lib/Support/SlowOperationInformer.cpp
+++ b/lib/Support/SlowOperationInformer.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SlowOperationInformer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Alarm.h"
 #include <sstream>
 #include <cassert>
@@ -28,8 +28,8 @@ SlowOperationInformer::~SlowOperationInformer() {
   if (LastPrintAmount) {
     // If we have printed something, make _sure_ we print the 100% amount, and
     // also print a newline.
-    cout << std::string(LastPrintAmount, '\b') << "Progress "
-         << OperationName << ": 100%  \n";
+    outs() << std::string(LastPrintAmount, '\b') << "Progress "
+           << OperationName << ": 100%  \n";
   }
 }
 
@@ -40,7 +40,7 @@ SlowOperationInformer::~SlowOperationInformer() {
 bool SlowOperationInformer::progress(unsigned Amount) {
   int status = sys::AlarmStatus();
   if (status == -1) {
-    cout << "\n";
+    outs() << "\n";
     LastPrintAmount = 0;
     return true;
   }
@@ -61,6 +61,7 @@ bool SlowOperationInformer::progress(unsigned Amount) {
     OS << "%  ";
 
   LastPrintAmount = OS.str().size();
-  cout << ToPrint+OS.str() << std::flush;
+  outs() << ToPrint+OS.str();
+  outs().flush();
   return false;
 }
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 6b0d55c19f22..4b93f7f99a24 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -18,7 +18,24 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+namespace {
+  struct LineNoCacheTy {
+    int LastQueryBufferID;
+    const char *LastQuery;
+    unsigned LineNoOfQuery;
+  };
+}
+
+static LineNoCacheTy *getCache(void *Ptr) {
+  return (LineNoCacheTy*)Ptr;
+}
+
+
 SourceMgr::~SourceMgr() {
+  // Delete the line # cache if allocated.
+  if (LineNoCacheTy *Cache = getCache(LineNoCache))
+    delete Cache;
+    
   while (!Buffers.empty()) {
     delete Buffers.back().Buffer;
     Buffers.pop_back();
@@ -71,8 +88,31 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
   
   const char *Ptr = Buff->getBufferStart();
 
+  // If we have a line number cache, and if the query is to a later point in the
+  // same file, start searching from the last query location.  This optimizes
+  // for the case when multiple diagnostics come out of one file in order.
+  if (LineNoCacheTy *Cache = getCache(LineNoCache))
+    if (Cache->LastQueryBufferID == BufferID && 
+        Cache->LastQuery <= Loc.getPointer()) {
+      Ptr = Cache->LastQuery;
+      LineNo = Cache->LineNoOfQuery;
+    }
+
+  // Scan for the location being queried, keeping track of the number of lines
+  // we see.
   for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
     if (*Ptr == '\n') ++LineNo;
+  
+  
+  // Allocate the line number cache if it doesn't exist.
+  if (LineNoCache == 0)
+    LineNoCache = new LineNoCacheTy();
+  
+  // Update the line # cache.
+  LineNoCacheTy &Cache = *getCache(LineNoCache);
+  Cache.LastQueryBufferID = BufferID;
+  Cache.LastQuery = Ptr;
+  Cache.LineNoOfQuery = LineNo;
   return LineNo;
 }
 
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 33570b0ee534..14f94bc28447 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -24,16 +24,15 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
-#include <ostream>
 #include <cstring>
 using namespace llvm;
 
 // GetLibSupportInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); }
+namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); }
 
 /// -stats - Command line option to cause transformations to emit stats about
 /// what they did.
@@ -58,14 +57,14 @@ public:
 }
 
 static ManagedStatic<StatisticInfo> StatInfo;
-static ManagedStatic<sys::Mutex> StatLock;
+static ManagedStatic<sys::SmartMutex<true> > StatLock;
 
 /// RegisterStatistic - The first time a statistic is bumped, this method is
 /// called.
 void Statistic::RegisterStatistic() {
   // If stats are enabled, inform StatInfo that this statistic should be
   // printed.
-  sys::ScopedLock Writer(&*StatLock);
+  sys::SmartScopedLock<true> Writer(*StatLock);
   if (!Initialized) {
     if (Enabled)
       StatInfo->addStatistic(this);
@@ -96,7 +95,7 @@ StatisticInfo::~StatisticInfo() {
   if (Stats.empty()) return;
 
   // Get the stream to write to.
-  std::ostream &OutStream = *GetLibSupportInfoOutputFile();
+  raw_ostream &OutStream = *GetLibSupportInfoOutputFile();
 
   // Figure out how long the biggest Value and Name fields are.
   unsigned MaxNameLen = 0, MaxValLen = 0;
@@ -125,8 +124,9 @@ StatisticInfo::~StatisticInfo() {
     
   }
   
-  OutStream << std::endl;  // Flush the output stream...
+  OutStream << '\n';  // Flush the output stream...
+  OutStream.flush();
   
-  if (&OutStream != cerr.stream() && &OutStream != cout.stream())
+  if (&OutStream != &outs() && &OutStream != &errs())
     delete &OutStream;   // Close the file.
 }
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index 0c61732a61b3..040308bbfd48 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -65,14 +65,13 @@ static unsigned HashString(const char *Start, const char *End) {
 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
 /// case, the FullHashValue field of the bucket will be set to the hash value
 /// of the string.
-unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
-                                        const char *NameEnd) {
+unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) {
   unsigned HTSize = NumBuckets;
   if (HTSize == 0) {  // Hash table unallocated so far?
     init(16);
     HTSize = NumBuckets;
   }
-  unsigned FullHashValue = HashString(NameStart, NameEnd);
+  unsigned FullHashValue = HashString(Name.begin(), Name.end());
   unsigned BucketNo = FullHashValue & (HTSize-1);
   
   unsigned ProbeAmt = 1;
@@ -102,12 +101,10 @@ unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
       // being non-null and for the full hash value) not at the items.  This
       // is important for cache locality.
       
-      // Do the comparison like this because NameStart isn't necessarily
+      // Do the comparison like this because Name isn't necessarily
       // null-terminated!
       char *ItemStr = (char*)BucketItem+ItemSize;
-      unsigned ItemStrLen = BucketItem->getKeyLength();
-      if (unsigned(NameEnd-NameStart) == ItemStrLen &&
-          memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
+      if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
         // We found a match!
         return BucketNo;
       }
@@ -126,10 +123,10 @@ unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
 /// FindKey - Look up the bucket that contains the specified key. If it exists
 /// in the map, return the bucket number of the key.  Otherwise return -1.
 /// This does not modify the map.
-int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
+int StringMapImpl::FindKey(const StringRef &Key) const {
   unsigned HTSize = NumBuckets;
   if (HTSize == 0) return -1;  // Really empty table?
-  unsigned FullHashValue = HashString(KeyStart, KeyEnd);
+  unsigned FullHashValue = HashString(Key.begin(), Key.end());
   unsigned BucketNo = FullHashValue & (HTSize-1);
   
   unsigned ProbeAmt = 1;
@@ -151,9 +148,7 @@ int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
       // Do the comparison like this because NameStart isn't necessarily
       // null-terminated!
       char *ItemStr = (char*)BucketItem+ItemSize;
-      unsigned ItemStrLen = BucketItem->getKeyLength();
-      if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
-          memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
+      if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
         // We found a match!
         return BucketNo;
       }
@@ -172,16 +167,15 @@ int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
 /// delete it.  This aborts if the value isn't in the table.
 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
   const char *VStr = (char*)V + ItemSize;
-  StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
+  StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
   V2 = V2;
   assert(V == V2 && "Didn't find key?");
 }
 
 /// RemoveKey - Remove the StringMapEntry for the specified key from the
 /// table, returning it.  If the key is not in the table, this returns null.
-StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
-                                             const char *KeyEnd) {
-  int Bucket = FindKey(KeyStart, KeyEnd);
+StringMapEntryBase *StringMapImpl::RemoveKey(const StringRef &Key) {
+  int Bucket = FindKey(Key);
   if (Bucket == -1) return 0;
   
   StringMapEntryBase *Result = TheTable[Bucket].Item;
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index b9c1fd0465bd..1ee917f119f7 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/StringPool.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/ADT/StringRef.h"
 
 using namespace llvm;
 
@@ -22,12 +22,12 @@ StringPool::~StringPool() {
   assert(InternTable.empty() && "PooledStringPtr leaked!");
 }
 
-PooledStringPtr StringPool::intern(const char *Begin, const char *End) {
-  table_t::iterator I = InternTable.find(Begin, End);
+PooledStringPtr StringPool::intern(const StringRef &Key) {
+  table_t::iterator I = InternTable.find(Key);
   if (I != InternTable.end())
     return PooledStringPtr(&*I);
   
-  entry_t *S = entry_t::Create(Begin, End);
+  entry_t *S = entry_t::Create(Key.begin(), Key.end());
   S->getValue().Pool = this;
   InternTable.insert(S);
   
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
new file mode 100644
index 000000000000..deaa19efe998
--- /dev/null
+++ b/lib/Support/StringRef.cpp
@@ -0,0 +1,188 @@
+//===-- StringRef.cpp - Lightweight String References ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+
+// MSVC emits references to this into the translation units which reference it.
+#ifndef _MSC_VER
+const size_t StringRef::npos;
+#endif
+
+//===----------------------------------------------------------------------===//
+// String Searching
+//===----------------------------------------------------------------------===//
+
+
+/// find - Search for the first string \arg Str in the string.
+///
+/// \return - The index of the first occurence of \arg Str, or npos if not
+/// found.
+size_t StringRef::find(const StringRef &Str) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+    if (substr(i, N).equals(Str))
+      return i;
+  return npos;
+}
+
+/// rfind - Search for the last string \arg Str in the string.
+///
+/// \return - The index of the last occurence of \arg Str, or npos if not
+/// found.
+size_t StringRef::rfind(const StringRef &Str) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t i = Length - N + 1, e = 0; i != e;) {
+    --i;
+    if (substr(i, N).equals(Str))
+      return i;
+  }
+  return npos;
+}
+
+/// find_first_of - Find the first character from the string 'Chars' in the
+/// current string or return npos if not in string.
+StringRef::size_type StringRef::find_first_of(StringRef Chars) const {
+  for (size_type i = 0, e = Length; i != e; ++i)
+    if (Chars.find(Data[i]) != npos)
+      return i;
+  return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string 'Chars' or return npos if all are in string. Same as find.
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const {
+  for (size_type i = 0, e = Length; i != e; ++i)
+    if (Chars.find(Data[i]) == npos)
+      return i;
+  return npos;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helpful Algorithms
+//===----------------------------------------------------------------------===//
+
+/// count - Return the number of non-overlapped occurrences of \arg Str in
+/// the string.
+size_t StringRef::count(const StringRef &Str) const {
+  size_t Count = 0;
+  size_t N = Str.size();
+  if (N > Length)
+    return 0;
+  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+    if (substr(i, N).equals(Str))
+      ++Count;
+  return Count;
+}
+
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                 unsigned long long &Result) {
+  // Autosense radix if not specified.
+  if (Radix == 0) {
+    if (Str.startswith("0x")) {
+      Str = Str.substr(2);
+      Radix = 16;
+    } else if (Str.startswith("0b")) {
+      Str = Str.substr(2);
+      Radix = 2;
+    } else if (Str.startswith("0"))
+      Radix = 8;
+    else
+      Radix = 10;
+  }
+  
+  // Empty strings (after the radix autosense) are invalid.
+  if (Str.empty()) return true;
+  
+  // Parse all the bytes of the string given this radix.  Watch for overflow.
+  Result = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= '0' && Str[0] <= '9')
+      CharVal = Str[0]-'0';
+    else if (Str[0] >= 'a' && Str[0] <= 'z')
+      CharVal = Str[0]-'a'+10;
+    else if (Str[0] >= 'A' && Str[0] <= 'Z')
+      CharVal = Str[0]-'A'+10;
+    else
+      return true;
+    
+    // If the parsed value is larger than the integer radix, the string is
+    // invalid.
+    if (CharVal >= Radix)
+      return true;
+    
+    // Add in this character.
+    unsigned long long PrevResult = Result;
+    Result = Result*Radix+CharVal;
+    
+    // Check for overflow.
+    if (Result < PrevResult)
+      return true;
+
+    Str = Str.substr(1);
+  }
+  
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
+  return GetAsUnsignedInteger(*this, Radix, Result);
+}
+
+
+bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+  unsigned long long ULLVal;
+  
+  // Handle positive strings first.
+  if (empty() || front() != '-') {
+    if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+        // Check for value so large it overflows a signed value.
+        (long long)ULLVal < 0)
+      return true;
+    Result = ULLVal;
+    return false;
+  }
+  
+  // Get the positive part of the value.
+  if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+      // Reject values so large they'd overflow as negative signed, but allow
+      // "-0".  This negates the unsigned so that the negative isn't undefined
+      // on signed overflow.
+      (long long)-ULLVal > 0)
+    return true;
+  
+  Result = -ULLVal;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
+  long long Val;
+  if (getAsInteger(Radix, Val) ||
+      (int)Val != Val)
+    return true;
+  Result = Val;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
+  unsigned long long Val;
+  if (getAsInteger(Radix, Val) ||
+      (unsigned)Val != Val)
+    return true;
+  Result = Val;
+  return false;
+}  
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index c8c323876bfb..299032f18715 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -12,22 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/System/Process.h"
 #include "llvm/System/Program.h"
-#include <ostream>
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check,
+bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
                                        bool print_warning) {
-  if (stream_to_check == cout.stream() &&
-      sys::Process::StandardOutIsDisplayed()) {
+  if (stream_to_check.is_displayed()) {
     if (print_warning) {
-      cerr << "WARNING: You're attempting to print out a bitcode file.\n"
-           << "This is inadvisable as it may cause display problems. If\n"
-           << "you REALLY want to taste LLVM bitcode first-hand, you\n"
-           << "can force output with the `-f' option.\n\n";
+      errs() << "WARNING: You're attempting to print out a bitcode file.\n"
+             << "This is inadvisable as it may cause display problems. If\n"
+             << "you REALLY want to taste LLVM bitcode first-hand, you\n"
+             << "can force output with the `-f' option.\n\n";
     }
     return true;
   }
@@ -35,24 +33,17 @@ bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check,
 }
 
 /// FindExecutable - Find a named executable, giving the argv[0] of program
-/// being executed. This allows us to find another LLVM tool if it is built
-/// into the same directory, but that directory is neither the current
-/// directory, nor in the PATH.  If the executable cannot be found, return an
-/// empty string. Return the input string if given a full path to an executable.
-///
+/// being executed. This allows us to find another LLVM tool if it is built in
+/// the same directory.  If the executable cannot be found, return an
+/// empty string.
+/// @brief Find a named executable.
 #undef FindExecutable   // needed on windows :(
 sys::Path llvm::FindExecutable(const std::string &ExeName,
-                               const std::string &ProgramPath) {
-  // First check if the given name is already a valid path to an executable.
-  sys::Path Result(ExeName);
-  Result.makeAbsolute();
-  if (Result.canExecute())
-    return Result;
-
-  // Otherwise check the directory that the calling program is in.  We can do
+                               const char *Argv0, void *MainAddr) {
+  // Check the directory that the calling program is in.  We can do
   // this if ProgramPath contains at least one / character, indicating that it
-  // is a relative path to bugpoint itself.
-  Result = ProgramPath;
+  // is a relative path to the executable itself.
+  sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
   Result.eraseComponent();
   if (!Result.isEmpty()) {
     Result.appendComponent(ExeName);
@@ -60,5 +51,5 @@ sys::Path llvm::FindExecutable(const std::string &ExeName,
       return Result;
   }
 
-  return sys::Program::FindProgramByName(ExeName);
+  return sys::Path();
 }
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
new file mode 100644
index 000000000000..5896447f5ea5
--- /dev/null
+++ b/lib/Support/TargetRegistry.cpp
@@ -0,0 +1,92 @@
+//===--- TargetRegistry.cpp - Target registration -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/System/Host.h"
+#include <cassert>
+using namespace llvm;
+
+// Clients are responsible for avoid race conditions in registration.
+static Target *FirstTarget = 0;
+
+TargetRegistry::iterator TargetRegistry::begin() {
+  return iterator(FirstTarget);
+}
+
+const Target *TargetRegistry::lookupTarget(const std::string &TT,
+                                           std::string &Error) {
+  // Provide special warning when no targets are initialized.
+  if (begin() == end()) {
+    Error = "Unable to find target for this triple (no targets are registered)";
+    return 0;
+  }
+  const Target *Best = 0, *EquallyBest = 0;
+  unsigned BestQuality = 0;
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (unsigned Qual = it->TripleMatchQualityFn(TT)) {
+      if (!Best || Qual > BestQuality) {
+        Best = &*it;
+        EquallyBest = 0;
+        BestQuality = Qual;
+      } else if (Qual == BestQuality)
+        EquallyBest = &*it;
+    }
+  }
+
+  if (!Best) {
+    Error = "No available targets are compatible with this triple, "
+      "see -version for the available targets.";
+    return 0;
+  }
+
+  // Otherwise, take the best target, but make sure we don't have two equally
+  // good best targets.
+  if (EquallyBest) {
+    Error = std::string("Cannot choose between targets \"") +
+      Best->Name  + "\" and \"" + EquallyBest->Name + "\"";
+    return 0;
+  }
+
+  return Best;
+}
+
+void TargetRegistry::RegisterTarget(Target &T,
+                                    const char *Name,
+                                    const char *ShortDesc,
+                                    Target::TripleMatchQualityFnTy TQualityFn,
+                                    bool HasJIT) {
+  assert(Name && ShortDesc && TQualityFn &&
+         "Missing required target information!");
+
+  // Check if this target has already been initialized, we allow this as a
+  // convenience to some clients.
+  if (T.Name)
+    return;
+         
+  // Add to the list of targets.
+  T.Next = FirstTarget;
+  FirstTarget = &T;
+
+  T.Name = Name;
+  T.ShortDesc = ShortDesc;
+  T.TripleMatchQualityFn = TQualityFn;
+  T.HasJIT = HasJIT;
+}
+
+const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
+  const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error);
+
+  if (TheTarget && !TheTarget->hasJIT()) {
+    Error = "No JIT compatible target available for this host";
+    return 0;
+  }
+
+  return TheTarget;
+}
+
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index ede1dc96e827..dd58d1f68b4d 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -14,16 +14,16 @@
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
 #include "llvm/System/Process.h"
 #include <algorithm>
-#include <fstream>
 #include <functional>
 #include <map>
 using namespace llvm;
 
 // GetLibSupportInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); }
+namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); }
 
 // getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
 // of constructor/destructor ordering being unspecified by C++.  Basically the
@@ -145,7 +145,7 @@ static TimeRecord getTimeRecord(bool Start) {
 static ManagedStatic<std::vector<Timer*> > ActiveTimers;
 
 void Timer::startTimer() {
-  sys::SmartScopedLock<true> L(&Lock);
+  sys::SmartScopedLock<true> L(Lock);
   Started = true;
   ActiveTimers->push_back(this);
   TimeRecord TR = getTimeRecord(true);
@@ -157,7 +157,7 @@ void Timer::startTimer() {
 }
 
 void Timer::stopTimer() {
-  sys::SmartScopedLock<true> L(&Lock);
+  sys::SmartScopedLock<true> L(Lock);
   TimeRecord TR = getTimeRecord(false);
   Elapsed    += TR.Elapsed;
   UserTime   += TR.UserTime;
@@ -229,7 +229,7 @@ static ManagedStatic<Name2Timer> NamedTimers;
 static ManagedStatic<Name2Pair> NamedGroupedTimers;
 
 static Timer &getNamedRegionTimer(const std::string &Name) {
-  sys::SmartScopedLock<true> L(&*TimerLock);
+  sys::SmartScopedLock<true> L(*TimerLock);
   Name2Timer::iterator I = NamedTimers->find(Name);
   if (I != NamedTimers->end())
     return I->second;
@@ -239,7 +239,7 @@ static Timer &getNamedRegionTimer(const std::string &Name) {
 
 static Timer &getNamedRegionTimer(const std::string &Name,
                                   const std::string &GroupName) {
-  sys::SmartScopedLock<true> L(&*TimerLock);
+  sys::SmartScopedLock<true> L(*TimerLock);
 
   Name2Pair::iterator I = NamedGroupedTimers->find(GroupName);
   if (I == NamedGroupedTimers->end()) {
@@ -269,38 +269,17 @@ NamedRegionTimer::NamedRegionTimer(const std::string &Name,
 //   TimerGroup Implementation
 //===----------------------------------------------------------------------===//
 
-// printAlignedFP - Simulate the printf "%A.Bf" format, where A is the
-// TotalWidth size, and B is the AfterDec size.
-//
-static void printAlignedFP(double Val, unsigned AfterDec, unsigned TotalWidth,
-                           std::ostream &OS) {
-  assert(TotalWidth >= AfterDec+1 && "Bad FP Format!");
-  OS.width(TotalWidth-AfterDec-1);
-  char OldFill = OS.fill();
-  OS.fill(' ');
-  OS << (int)Val;  // Integer part;
-  OS << ".";
-  OS.width(AfterDec);
-  OS.fill('0');
-  unsigned ResultFieldSize = 1;
-  while (AfterDec--) ResultFieldSize *= 10;
-  OS << (int)(Val*ResultFieldSize) % ResultFieldSize;
-  OS.fill(OldFill);
-}
 
-static void printVal(double Val, double Total, std::ostream &OS) {
+static void printVal(double Val, double Total, raw_ostream &OS) {
   if (Total < 1e-7)   // Avoid dividing by zero...
     OS << "        -----     ";
   else {
-    OS << "  ";
-    printAlignedFP(Val, 4, 7, OS);
-    OS << " (";
-    printAlignedFP(Val*100/Total, 1, 5, OS);
-    OS << "%)";
+    OS << "  " << format("%7.4f", Val) << " (";
+    OS << format("%5.1f", Val*100/Total) << "%)";
   }
 }
 
-void Timer::print(const Timer &Total, std::ostream &OS) {
+void Timer::print(const Timer &Total, raw_ostream &OS) {
   if (&Total < this) {
     Total.Lock.acquire();
     Lock.acquire();
@@ -320,13 +299,11 @@ void Timer::print(const Timer &Total, std::ostream &OS) {
   OS << "  ";
 
   if (Total.MemUsed) {
-    OS.width(9);
-    OS << MemUsed << "  ";
+    OS << format("%9lld", (long long)MemUsed) << "  ";
   }
   if (Total.PeakMem) {
     if (PeakMem) {
-      OS.width(9);
-      OS << PeakMem << "  ";
+      OS << format("%9lld", (long long)PeakMem) << "  ";
     } else
       OS << "           ";
   }
@@ -344,28 +321,30 @@ void Timer::print(const Timer &Total, std::ostream &OS) {
 }
 
 // GetLibSupportInfoOutputFile - Return a file stream to print our output on...
-std::ostream *
+raw_ostream *
 llvm::GetLibSupportInfoOutputFile() {
   std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename();
   if (LibSupportInfoOutputFilename.empty())
-    return cerr.stream();
+    return &errs();
   if (LibSupportInfoOutputFilename == "-")
-    return cout.stream();
+    return &outs();
 
-  std::ostream *Result = new std::ofstream(LibSupportInfoOutputFilename.c_str(),
-                                           std::ios::app);
-  if (!Result->good()) {
-    cerr << "Error opening info-output-file '"
+  
+  std::string Error;
+  raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(),
+                                           Error, raw_fd_ostream::F_Append);
+  if (Error.empty())
+    return Result;
+  
+  errs() << "Error opening info-output-file '"
          << LibSupportInfoOutputFilename << " for appending!\n";
-    delete Result;
-    return cerr.stream();
-  }
-  return Result;
+  delete Result;
+  return &errs();
 }
 
 
 void TimerGroup::removeTimer() {
-  sys::SmartScopedLock<true> L(&*TimerLock);
+  sys::SmartScopedLock<true> L(*TimerLock);
   if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report...
     // Sort the timers in descending order by amount of time taken...
     std::sort(TimersToPrint.begin(), TimersToPrint.end(),
@@ -375,7 +354,7 @@ void TimerGroup::removeTimer() {
     unsigned Padding = (80-Name.length())/2;
     if (Padding > 80) Padding = 0;         // Don't allow "negative" numbers
 
-    std::ostream *OutStream = GetLibSupportInfoOutputFile();
+    raw_ostream *OutStream = GetLibSupportInfoOutputFile();
 
     ++NumTimers;
     {  // Scope to contain Total timer... don't allow total timer to drop us to
@@ -397,10 +376,8 @@ void TimerGroup::removeTimer() {
       if (this != DefaultTimerGroup) {
         *OutStream << "  Total Execution Time: ";
 
-        printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream);
-        *OutStream << " seconds (";
-        printAlignedFP(Total.getWallTime(), 4, 5, *OutStream);
-        *OutStream << " wall clock)\n";
+        *OutStream << format("%5.4f", Total.getProcessTime()) << " seconds (";
+        *OutStream << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
       }
       *OutStream << "\n";
 
@@ -422,24 +399,25 @@ void TimerGroup::removeTimer() {
         TimersToPrint[i].print(Total, *OutStream);
 
       Total.print(Total, *OutStream);
-      *OutStream << std::endl;  // Flush output
+      *OutStream << '\n';
+      OutStream->flush();
     }
     --NumTimers;
 
     TimersToPrint.clear();
 
-    if (OutStream != cerr.stream() && OutStream != cout.stream())
+    if (OutStream != &errs() && OutStream != &outs())
       delete OutStream;   // Close the file...
   }
 }
 
 void TimerGroup::addTimer() {
-  sys::SmartScopedLock<true> L(&*TimerLock);
+  sys::SmartScopedLock<true> L(*TimerLock);
   ++NumTimers;
 }
 
 void TimerGroup::addTimerToPrint(const Timer &T) {
-  sys::SmartScopedLock<true> L(&*TimerLock);
+  sys::SmartScopedLock<true> L(*TimerLock);
   TimersToPrint.push_back(Timer(true, T));
 }
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 279bd43ac5a8..6f805da33299 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -8,6 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Triple.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include <cassert>
 #include <cstring>
 using namespace llvm;
@@ -18,22 +21,60 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   switch (Kind) {
   case InvalidArch: return "<invalid>";
   case UnknownArch: return "unknown";
-
-  case x86: return "i386";
-  case x86_64: return "x86_64";
-  case ppc: return "powerpc";
-  case ppc64: return "powerpc64";
+    
+  case alpha:   return "alpha";
+  case arm:     return "arm";
+  case bfin:    return "bfin";
+  case cellspu: return "cellspu";
+  case mips:    return "mips";
+  case mipsel:  return "mipsel";
+  case msp430:  return "msp430";
+  case pic16:   return "pic16";
+  case ppc64:   return "powerpc64";
+  case ppc:     return "powerpc";
+  case sparc:   return "sparc";
+  case systemz: return "s390x";
+  case tce:     return "tce";
+  case thumb:   return "thumb";
+  case x86:     return "i386";
+  case x86_64:  return "x86_64";
+  case xcore:   return "xcore";
   }
 
   return "<invalid>";
 }
 
+const char *Triple::getArchTypePrefix(ArchType Kind) {
+  switch (Kind) {
+  default:
+    return 0;
+
+  case alpha:   return "alpha";
+
+  case arm:
+  case thumb:   return "arm";
+
+  case bfin:    return "bfin";
+
+  case cellspu: return "spu";
+
+  case ppc64:
+  case ppc:     return "ppc";
+
+  case sparc:   return "sparc";
+
+  case x86:
+  case x86_64:  return "x86";
+  case xcore:   return "xcore";
+  }
+}
+
 const char *Triple::getVendorTypeName(VendorType Kind) {
   switch (Kind) {
   case UnknownVendor: return "unknown";
 
   case Apple: return "apple";
-  case PC: return "PC";
+  case PC: return "pc";
   }
 
   return "<invalid>";
@@ -44,35 +85,166 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case UnknownOS: return "unknown";
 
   case AuroraUX: return "auroraux";
+  case Cygwin: return "cygwin";
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
   case Linux: return "linux";
+  case MinGW32: return "mingw32";
+  case MinGW64: return "mingw64";
+  case NetBSD: return "netbsd";
   case OpenBSD: return "openbsd";
+  case Solaris: return "solaris";
+  case Win32: return "win32";
   }
 
   return "<invalid>";
 }
 
+Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) {
+  if (Name == "alpha")
+    return alpha;
+  if (Name == "arm")
+    return arm;
+  if (Name == "bfin")
+    return bfin;
+  if (Name == "cellspu")
+    return cellspu;
+  if (Name == "mips")
+    return mips;
+  if (Name == "mipsel")
+    return mipsel;
+  if (Name == "msp430")
+    return msp430;
+  if (Name == "pic16")
+    return pic16;
+  if (Name == "ppc64")
+    return ppc64;
+  if (Name == "ppc")
+    return ppc;
+  if (Name == "sparc")
+    return sparc;
+  if (Name == "systemz")
+    return systemz;
+  if (Name == "tce")
+    return tce;
+  if (Name == "thumb")
+    return thumb;
+  if (Name == "x86")
+    return x86;
+  if (Name == "x86-64")
+    return x86_64;
+  if (Name == "xcore")
+    return xcore;
+
+  return UnknownArch;
+}
+
+Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) {
+  // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
+  // archs which Darwin doesn't use.
+
+  // The matching this routine does is fairly pointless, since it is neither the
+  // complete architecture list, nor a reasonable subset. The problem is that
+  // historically the driver driver accepts this and also ties its -march=
+  // handling to the architecture name, so we need to be careful before removing
+  // support for it.
+
+  // This code must be kept in sync with Clang's Darwin specific argument
+  // translation.
+
+  if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" ||
+      Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" ||
+      Str == "ppc7450" || Str == "ppc970")
+    return Triple::ppc;
+
+  if (Str == "ppc64")
+    return Triple::ppc64;
+
+  if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" ||
+      Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" ||
+      Str == "pentIIm5" || Str == "pentium4")
+    return Triple::x86;
+
+  if (Str == "x86_64")
+    return Triple::x86_64;
+
+  // This is derived from the driver driver.
+  if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
+      Str == "armv6" || Str == "armv7")
+    return Triple::arm;
+
+  return Triple::UnknownArch;
+}
+
 //
 
 void Triple::Parse() const {
   assert(!isInitialized() && "Invalid parse call.");
 
-  std::string ArchName = getArchName();
+  StringRef ArchName = getArchName();
+  StringRef VendorName = getVendorName();
+  StringRef OSName = getOSName();
+
   if (ArchName.size() == 4 && ArchName[0] == 'i' && 
-      ArchName[2] == '8' && ArchName[3] == '6')
+      ArchName[2] == '8' && ArchName[3] == '6' && 
+      ArchName[1] - '3' < 6) // i[3-9]86
     Arch = x86;
   else if (ArchName == "amd64" || ArchName == "x86_64")
     Arch = x86_64;
+  else if (ArchName == "bfin")
+    Arch = bfin;
+  else if (ArchName == "pic16")
+    Arch = pic16;
   else if (ArchName == "powerpc")
     Arch = ppc;
   else if (ArchName == "powerpc64")
     Arch = ppc64;
+  else if (ArchName == "arm" ||
+           ArchName.startswith("armv") ||
+           ArchName == "xscale")
+    Arch = arm;
+  else if (ArchName == "thumb" ||
+           ArchName.startswith("thumbv"))
+    Arch = thumb;
+  else if (ArchName.startswith("alpha"))
+    Arch = alpha;
+  else if (ArchName == "spu" || ArchName == "cellspu")
+    Arch = cellspu;
+  else if (ArchName == "msp430")
+    Arch = msp430;
+  else if (ArchName == "mips" || ArchName == "mipsallegrex")
+    Arch = mips;
+  else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
+           ArchName == "psp")
+    Arch = mipsel;
+  else if (ArchName == "sparc")
+    Arch = sparc;
+  else if (ArchName == "s390x")
+    Arch = systemz;
+  else if (ArchName == "tce")
+    Arch = tce;
+  else if (ArchName == "xcore")
+    Arch = xcore;
   else
     Arch = UnknownArch;
 
-  std::string VendorName = getVendorName();
+
+  // Handle some exceptional cases where the OS / environment components are
+  // stuck into the vendor field.
+  if (StringRef(getTriple()).count('-') == 1) {
+    StringRef VendorName = getVendorName();
+
+    if (VendorName.startswith("mingw32")) { // 'i386-mingw32', etc.
+      Vendor = PC;
+      OS = MinGW32;
+      return;
+    }
+
+    // arm-elf is another example, but we don't currently parse anything about
+    // the environment.
+  }
+
   if (VendorName == "apple")
     Vendor = Apple;
   else if (VendorName == "pc")
@@ -80,78 +252,129 @@ void Triple::Parse() const {
   else
     Vendor = UnknownVendor;
 
-  std::string OSName = getOSName();
-  if (memcmp(&OSName[0], "auroraux", 8) == 0)
+  if (OSName.startswith("auroraux"))
     OS = AuroraUX;
-  else if (memcmp(&OSName[0], "darwin", 6) == 0)
+  else if (OSName.startswith("cygwin"))
+    OS = Cygwin;
+  else if (OSName.startswith("darwin"))
     OS = Darwin;
-  else if (memcmp(&OSName[0], "dragonfly", 9) == 0)
+  else if (OSName.startswith("dragonfly"))
     OS = DragonFly;
-  else if (memcmp(&OSName[0], "freebsd", 7) == 0)
+  else if (OSName.startswith("freebsd"))
     OS = FreeBSD;
-  else if (memcmp(&OSName[0], "linux", 5) == 0)
+  else if (OSName.startswith("linux"))
     OS = Linux;
-  else if (memcmp(&OSName[0], "openbsd", 7) == 0)
+  else if (OSName.startswith("mingw32"))
+    OS = MinGW32;
+  else if (OSName.startswith("mingw64"))
+    OS = MinGW64;
+  else if (OSName.startswith("netbsd"))
+    OS = NetBSD;
+  else if (OSName.startswith("openbsd"))
     OS = OpenBSD;
+  else if (OSName.startswith("solaris"))
+    OS = Solaris;
+  else if (OSName.startswith("win32"))
+    OS = Win32;
   else
     OS = UnknownOS;
 
   assert(isInitialized() && "Failed to initialize!");
 }
 
-static std::string extract(const std::string &A,
-                           std::string::size_type begin,
-                           std::string::size_type end) {
-  if (begin == std::string::npos)
-    return "";
-  if (end == std::string::npos)
-    return A.substr(begin);
-  return A.substr(begin, end - begin);
+StringRef Triple::getArchName() const {
+  return StringRef(Data).split('-').first;           // Isolate first component
 }
 
-static std::string extract1(const std::string &A,
-                           std::string::size_type begin,
-                           std::string::size_type end) {
-  if (begin == std::string::npos || begin == end)
-    return "";
-  return extract(A, begin + 1, end);
+StringRef Triple::getVendorName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  return Tmp.split('-').first;                       // Isolate second component
 }
 
-std::string Triple::getArchName() const {
-  std::string Tmp = Data;
-  return extract(Tmp, 0, Tmp.find('-'));
+StringRef Triple::getOSName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  Tmp = Tmp.split('-').second;                       // Strip second component
+  return Tmp.split('-').first;                       // Isolate third component
 }
 
-std::string Triple::getVendorName() const {
-  std::string Tmp = Data;
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  return extract(Tmp, 0, Tmp.find('-'));
+StringRef Triple::getEnvironmentName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  Tmp = Tmp.split('-').second;                       // Strip second component
+  return Tmp.split('-').second;                      // Strip third component
 }
 
-std::string Triple::getOSName() const {
-  std::string Tmp = Data;
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  return extract(Tmp, 0, Tmp.find('-'));
+StringRef Triple::getOSAndEnvironmentName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  return Tmp.split('-').second;                      // Strip second component
 }
 
-std::string Triple::getEnvironmentName() const {
-  std::string Tmp = Data;
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  return extract(Tmp, 0, std::string::npos);
+static unsigned EatNumber(StringRef &Str) {
+  assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
+  unsigned Result = Str[0]-'0';
+  
+  // Eat the digit.
+  Str = Str.substr(1);
+  
+  // Handle "darwin11".
+  if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') {
+    Result = Result*10 + (Str[0] - '0');
+    // Eat the digit.
+    Str = Str.substr(1);
+  }
+  
+  return Result;
 }
 
-std::string Triple::getOSAndEnvironmentName() const {
-  std::string Tmp = Data;
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
-  return extract(Tmp, 0, std::string::npos);
+/// getDarwinNumber - Parse the 'darwin number' out of the specific target
+/// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
+/// not defined, return 0's.  This requires that the triple have an OSType of
+/// darwin before it is called.
+void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
+                             unsigned &Revision) const {
+  assert(getOS() == Darwin && "Not a darwin target triple!");
+  StringRef OSName = getOSName();
+  assert(OSName.startswith("darwin") && "Unknown darwin target triple!");
+  
+  // Strip off "darwin".
+  OSName = OSName.substr(6);
+  
+  Maj = Min = Revision = 0;
+
+  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+    return;
+
+  // The major version is the first digit.
+  Maj = EatNumber(OSName);
+  if (OSName.empty()) return;
+  
+  // Handle minor version: 10.4.9 -> darwin8.9.
+  if (OSName[0] != '.')
+    return;
+  
+  // Eat the '.'.
+  OSName = OSName.substr(1);
+
+  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+    return;
+  
+  Min = EatNumber(OSName);
+  if (OSName.empty()) return;
+
+  // Handle revision darwin8.9.1
+  if (OSName[0] != '.')
+    return;
+  
+  // Eat the '.'.
+  OSName = OSName.substr(1);
+  
+  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+    return;
+
+  Revision = EatNumber(OSName);
 }
 
-void Triple::setTriple(const std::string &Str) {
-  Data = Str;
+void Triple::setTriple(const Twine &Str) {
+  Data = Str.str();
   Arch = InvalidArch;
 }
 
@@ -167,15 +390,22 @@ void Triple::setOS(OSType Kind) {
   setOSName(getOSTypeName(Kind));
 }
 
-void Triple::setArchName(const std::string &Str) {
-  setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName());
+void Triple::setArchName(const StringRef &Str) {
+  // Work around a miscompilation bug for Twines in gcc 4.0.3.
+  SmallString<64> Triple;
+  Triple += Str;
+  Triple += "-";
+  Triple += getVendorName();
+  Triple += "-";
+  Triple += getOSAndEnvironmentName();
+  setTriple(Triple.str());
 }
 
-void Triple::setVendorName(const std::string &Str) {
+void Triple::setVendorName(const StringRef &Str) {
   setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
 }
 
-void Triple::setOSName(const std::string &Str) {
+void Triple::setOSName(const StringRef &Str) {
   if (hasEnvironment())
     setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
               "-" + getEnvironmentName());
@@ -183,11 +413,11 @@ void Triple::setOSName(const std::string &Str) {
     setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
 }
 
-void Triple::setEnvironmentName(const std::string &Str) {
+void Triple::setEnvironmentName(const StringRef &Str) {
   setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + 
             "-" + Str);
 }
 
-void Triple::setOSAndEnvironmentName(const std::string &Str) {
+void Triple::setOSAndEnvironmentName(const StringRef &Str) {
   setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
 }
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
new file mode 100644
index 000000000000..292c0c2b9e5e
--- /dev/null
+++ b/lib/Support/Twine.cpp
@@ -0,0 +1,133 @@
+//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+std::string Twine::str() const {
+  SmallString<256> Vec;
+  toVector(Vec);
+  return std::string(Vec.begin(), Vec.end());
+}
+
+void Twine::toVector(SmallVectorImpl<char> &Out) const {
+  raw_svector_ostream OS(Out);
+  print(OS);
+}
+
+void Twine::printOneChild(raw_ostream &OS, const void *Ptr, 
+                          NodeKind Kind) const {
+  switch (Kind) {
+  case Twine::NullKind: break;
+  case Twine::EmptyKind: break;
+  case Twine::TwineKind:
+    static_cast<const Twine*>(Ptr)->print(OS); 
+    break;
+  case Twine::CStringKind: 
+    OS << static_cast<const char*>(Ptr); 
+    break;
+  case Twine::StdStringKind:
+    OS << *static_cast<const std::string*>(Ptr); 
+    break;
+  case Twine::StringRefKind:
+    OS << *static_cast<const StringRef*>(Ptr); 
+    break;
+  case Twine::DecUIKind:
+    OS << *static_cast<const unsigned int*>(Ptr);
+    break;
+  case Twine::DecIKind:
+    OS << *static_cast<const int*>(Ptr);
+    break;
+  case Twine::DecULKind:
+    OS << *static_cast<const unsigned long*>(Ptr);
+    break;
+  case Twine::DecLKind:
+    OS << *static_cast<const long*>(Ptr);
+    break;
+  case Twine::DecULLKind:
+    OS << *static_cast<const unsigned long long*>(Ptr);
+    break;
+  case Twine::DecLLKind:
+    OS << *static_cast<const long long*>(Ptr);
+    break;
+  case Twine::UHexKind:
+    OS.write_hex(*static_cast<const uint64_t*>(Ptr));
+    break;
+  }
+}
+
+void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, 
+                              NodeKind Kind) const {
+  switch (Kind) {
+  case Twine::NullKind:
+    OS << "null"; break;
+  case Twine::EmptyKind:
+    OS << "empty"; break;
+  case Twine::TwineKind:
+    OS << "rope:";
+    static_cast<const Twine*>(Ptr)->printRepr(OS);
+    break;
+  case Twine::CStringKind:
+    OS << "cstring:\""
+       << static_cast<const char*>(Ptr) << "\"";
+    break;
+  case Twine::StdStringKind:
+    OS << "std::string:\""
+       << static_cast<const std::string*>(Ptr) << "\"";
+    break;
+  case Twine::StringRefKind:
+    OS << "stringref:\""
+       << static_cast<const StringRef*>(Ptr) << "\"";
+    break;
+  case Twine::DecUIKind:
+    OS << "decUI:\"" << *static_cast<const unsigned int*>(Ptr) << "\"";
+    break;
+  case Twine::DecIKind:
+    OS << "decI:\"" << *static_cast<const int*>(Ptr) << "\"";
+    break;
+  case Twine::DecULKind:
+    OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
+    break;
+  case Twine::DecLKind:
+    OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\"";
+    break;
+  case Twine::DecULLKind:
+    OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\"";
+    break;
+  case Twine::DecLLKind:
+    OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\"";
+    break;
+  case Twine::UHexKind:
+    OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\"";
+    break;
+  }
+}
+
+void Twine::print(raw_ostream &OS) const {
+  printOneChild(OS, LHS, getLHSKind());
+  printOneChild(OS, RHS, getRHSKind());
+}
+
+void Twine::printRepr(raw_ostream &OS) const {
+  OS << "(Twine ";
+  printOneChildRepr(OS, LHS, getLHSKind());
+  OS << " ";
+  printOneChildRepr(OS, RHS, getRHSKind());
+  OS << ")";
+}
+
+void Twine::dump() const {
+  print(llvm::errs());
+}
+
+void Twine::dumpRepr() const {
+  printRepr(llvm::errs());
+}
diff --git a/lib/Support/raw_os_ostream.cpp b/lib/Support/raw_os_ostream.cpp
new file mode 100644
index 000000000000..3374dd7a66a0
--- /dev/null
+++ b/lib/Support/raw_os_ostream.cpp
@@ -0,0 +1,30 @@
+//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support adapting raw_ostream to std::ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_os_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  raw_os_ostream
+//===----------------------------------------------------------------------===//
+
+raw_os_ostream::~raw_os_ostream() {
+  flush();
+}
+
+void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
+  OS.write(Ptr, Size);
+}
+
+uint64_t raw_os_ostream::current_pos() { return OS.tellp(); }
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 42e6fda97baf..0a82cc1d10c3 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -18,7 +18,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
-#include <ostream>
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #if defined(HAVE_UNISTD_H)
 # include <unistd.h>
@@ -43,10 +47,59 @@
 
 using namespace llvm;
 
+raw_ostream::~raw_ostream() {
+  // raw_ostream's subclasses should take care to flush the buffer
+  // in their destructors.
+  assert(OutBufCur == OutBufStart &&
+         "raw_ostream destructor called with non-empty buffer!");
+
+  if (BufferMode == InternalBuffer)
+    delete [] OutBufStart;
+
+  // If there are any pending errors, report them now. Clients wishing
+  // to avoid llvm_report_error calls should check for errors with
+  // has_error() and clear the error flag with clear_error() before
+  // destructing raw_ostream objects which may have errors.
+  if (Error)
+    llvm_report_error("IO failure on output stream.");
+}
 
 // An out of line virtual method to provide a home for the class vtable.
 void raw_ostream::handle() {}
 
+size_t raw_ostream::preferred_buffer_size() {
+  // BUFSIZ is intended to be a reasonable default.
+  return BUFSIZ;
+}
+
+void raw_ostream::SetBuffered() {
+  // Ask the subclass to determine an appropriate buffer size.
+  if (size_t Size = preferred_buffer_size())
+    SetBufferSize(Size);
+  else
+    // It may return 0, meaning this stream should be unbuffered.
+    SetUnbuffered();
+}
+
+void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, 
+                                    BufferKind Mode) {
+  assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || 
+          (Mode != Unbuffered && BufferStart && Size)) &&
+         "stream must be unbuffered or have at least one byte");
+  // Make sure the current buffer is free of content (we can't flush here; the
+  // child buffer management logic will be in write_impl).
+  assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
+
+  if (BufferMode == InternalBuffer)
+    delete [] OutBufStart;
+  OutBufStart = BufferStart;
+  OutBufEnd = OutBufStart+Size;
+  OutBufCur = OutBufStart;
+  BufferMode = Mode;
+
+  assert(OutBufStart <= OutBufEnd && "Invalid size!");
+}
+
 raw_ostream &raw_ostream::operator<<(unsigned long N) {
   // Zero is a special case.
   if (N == 0)
@@ -73,10 +126,10 @@ raw_ostream &raw_ostream::operator<<(long N) {
 }
 
 raw_ostream &raw_ostream::operator<<(unsigned long long N) {
-  // Zero is a special case.
-  if (N == 0)
-    return *this << '0';
-  
+  // Output using 32-bit div/mod when possible.
+  if (N == static_cast<unsigned long>(N))
+    return this->operator<<(static_cast<unsigned long>(N));
+
   char NumberBuffer[20];
   char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
   char *CurPtr = EndPtr;
@@ -97,10 +150,7 @@ raw_ostream &raw_ostream::operator<<(long long N) {
   return this->operator<<(static_cast<unsigned long long>(N));
 }
 
-raw_ostream &raw_ostream::operator<<(const void *P) {
-  uintptr_t N = (uintptr_t) P;
-  *this << '0' << 'x';
-  
+raw_ostream &raw_ostream::write_hex(unsigned long long N) {
   // Zero is a special case.
   if (N == 0)
     return *this << '0';
@@ -110,7 +160,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) {
   char *CurPtr = EndPtr;
 
   while (N) {
-    unsigned x = N % 16;
+    uintptr_t x = N % 16;
     *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
     N /= 16;
   }
@@ -118,44 +168,78 @@ raw_ostream &raw_ostream::operator<<(const void *P) {
   return write(CurPtr, EndPtr-CurPtr);
 }
 
+raw_ostream &raw_ostream::operator<<(const void *P) {
+  *this << '0' << 'x';
+
+  return write_hex((uintptr_t) P);
+}
+
+raw_ostream &raw_ostream::operator<<(double N) {
+  this->operator<<(ftostr(N));
+  return *this;
+}
+
+
+
 void raw_ostream::flush_nonempty() {
   assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
-  write_impl(OutBufStart, OutBufCur - OutBufStart);
-  OutBufCur = OutBufStart;    
+  size_t Length = OutBufCur - OutBufStart;
+  OutBufCur = OutBufStart;
+  write_impl(OutBufStart, Length);
 }
 
 raw_ostream &raw_ostream::write(unsigned char C) {
   // Group exceptional cases into a single branch.
-  if (OutBufCur >= OutBufEnd) {
-    if (Unbuffered) {
-      write_impl(reinterpret_cast<char*>(&C), 1);
-      return *this;
+  if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) {
+    if (BUILTIN_EXPECT(!OutBufStart, false)) {
+      if (BufferMode == Unbuffered) {
+        write_impl(reinterpret_cast<char*>(&C), 1);
+        return *this;
+      }
+      // Set up a buffer and start over.
+      SetBuffered();
+      return write(C);
     }
-    
-    if (!OutBufStart)
-      SetBufferSize();
-    else
-      flush_nonempty();
+
+    flush_nonempty();
   }
 
   *OutBufCur++ = C;
   return *this;
 }
 
-raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) {
+raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
   // Group exceptional cases into a single branch.
   if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) {
-    if (Unbuffered) {
-      write_impl(Ptr, Size);
-      return *this;
+    if (BUILTIN_EXPECT(!OutBufStart, false)) {
+      if (BufferMode == Unbuffered) {
+        write_impl(Ptr, Size);
+        return *this;
+      }
+      // Set up a buffer and start over.
+      SetBuffered();
+      return write(Ptr, Size);
     }
-    
-    if (!OutBufStart)
-      SetBufferSize();
-    else
+
+    // Write out the data in buffer-sized blocks until the remainder
+    // fits within the buffer.
+    do {
+      size_t NumBytes = OutBufEnd - OutBufCur;
+      copy_to_buffer(Ptr, NumBytes);
       flush_nonempty();
+      Ptr += NumBytes;
+      Size -= NumBytes;
+    } while (OutBufCur+Size > OutBufEnd);
   }
-  
+
+  copy_to_buffer(Ptr, Size);
+
+  return *this;
+}
+
+void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
+  assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
+
   // Handle short strings specially, memcpy isn't very good at very short
   // strings.
   switch (Size) {
@@ -165,40 +249,24 @@ raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) {
   case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
   case 0: break;
   default:
-    // Normally the string to emit is shorter than the buffer.
-    if (Size <= unsigned(OutBufEnd-OutBufStart)) {
-      memcpy(OutBufCur, Ptr, Size);
-      break;
-    } 
-
-    // Otherwise we are emitting a string larger than our buffer. We
-    // know we already flushed, so just write it out directly.
-    write_impl(Ptr, Size);
-    Size = 0;
+    memcpy(OutBufCur, Ptr, Size);
     break;
   }
-  OutBufCur += Size;
 
-  return *this;
+  OutBufCur += Size;
 }
 
 // Formatted output.
 raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
   // If we have more than a few bytes left in our output buffer, try
   // formatting directly onto its end.
-  //
-  // FIXME: This test is a bit silly, since if we don't have enough
-  // space in the buffer we will have to flush the formatted output
-  // anyway. We should just flush upfront in such cases, and use the
-  // whole buffer as our scratch pad. Note, however, that this case is
-  // also necessary for correctness on unbuffered streams.
-  unsigned NextBufferSize = 127;
-  if (OutBufEnd-OutBufCur > 3) {
-    unsigned BufferBytesLeft = OutBufEnd-OutBufCur;
-    unsigned BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
+  size_t NextBufferSize = 127;
+  size_t BufferBytesLeft = OutBufEnd - OutBufCur;
+  if (BufferBytesLeft > 3) {
+    size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
     
     // Common case is that we have plenty of space.
-    if (BytesUsed < BufferBytesLeft) {
+    if (BytesUsed <= BufferBytesLeft) {
       OutBufCur += BytesUsed;
       return *this;
     }
@@ -217,11 +285,11 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
     V.resize(NextBufferSize);
     
     // Try formatting into the SmallVector.
-    unsigned BytesUsed = Fmt.print(&V[0], NextBufferSize);
+    size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
     
     // If BytesUsed fit into the vector, we win.
     if (BytesUsed <= NextBufferSize)
-      return write(&V[0], BytesUsed);
+      return write(V.data(), BytesUsed);
     
     // Otherwise, try again with a new size.
     assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
@@ -229,6 +297,26 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
   }
 }
 
+/// indent - Insert 'NumSpaces' spaces.
+raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
+  static const char Spaces[] = "                                "
+                               "                                "
+                               "                ";
+
+  // Usually the indentation is small, handle it with a fastpath.
+  if (NumSpaces < array_lengthof(Spaces))
+    return write(Spaces, NumSpaces);
+  
+  while (NumSpaces) {
+    unsigned NumToWrite = std::min(NumSpaces,
+                                   (unsigned)array_lengthof(Spaces)-1);
+    write(Spaces, NumToWrite);
+    NumSpaces -= NumToWrite;
+  }
+  return *this;
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Formatted Output
 //===----------------------------------------------------------------------===//
@@ -245,8 +333,12 @@ void format_object_base::home() {
 /// occurs, information about the error is put into ErrorInfo, and the
 /// stream should be immediately destroyed; the string will be empty
 /// if no error occurred.
-raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary,
-                               std::string &ErrorInfo) : pos(0) {
+raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+                               unsigned Flags) : pos(0) {
+  // Verify that we don't have both "append" and "excl".
+  assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
+         "Cannot specify both 'excl' and 'append' file creation flags!");
+  
   ErrorInfo.clear();
 
   // Handle "-" as stdout.
@@ -254,18 +346,26 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary,
     FD = STDOUT_FILENO;
     // If user requested binary then put stdout into binary mode if
     // possible.
-    if (Binary)
+    if (Flags & F_Binary)
       sys::Program::ChangeStdoutToBinary();
     ShouldClose = false;
     return;
   }
   
-  int Flags = O_WRONLY|O_CREAT|O_TRUNC;
+  int OpenFlags = O_WRONLY|O_CREAT;
 #ifdef O_BINARY
-  if (Binary)
-    Flags |= O_BINARY;
+  if (Flags & F_Binary)
+    OpenFlags |= O_BINARY;
 #endif
-  FD = open(Filename, Flags, 0644);
+  
+  if (Flags & F_Append)
+    OpenFlags |= O_APPEND;
+  else
+    OpenFlags |= O_TRUNC;
+  if (Flags & F_Excl)
+    OpenFlags |= O_EXCL;
+  
+  FD = open(Filename, OpenFlags, 0664);
   if (FD < 0) {
     ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
     ShouldClose = false;
@@ -275,33 +375,56 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary,
 }
 
 raw_fd_ostream::~raw_fd_ostream() {
-  if (FD >= 0) {
-    flush();
-    if (ShouldClose)
-      ::close(FD);
-  }
+  if (FD < 0) return;
+  flush();
+  if (ShouldClose)
+    if (::close(FD) != 0)
+      error_detected();
 }
 
-void raw_fd_ostream::write_impl(const char *Ptr, unsigned Size) {
+
+void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   assert (FD >= 0 && "File already closed.");
   pos += Size;
-  ::write(FD, Ptr, Size);
+  if (::write(FD, Ptr, Size) != (ssize_t) Size)
+    error_detected();
 }
 
 void raw_fd_ostream::close() {
   assert (ShouldClose);
   ShouldClose = false;
   flush();
-  ::close(FD);
+  if (::close(FD) != 0)
+    error_detected();
   FD = -1;
 }
 
 uint64_t raw_fd_ostream::seek(uint64_t off) {
   flush();
-  pos = lseek(FD, off, SEEK_SET);
+  pos = ::lseek(FD, off, SEEK_SET);
+  if (pos != off)
+    error_detected();
   return pos;  
 }
 
+size_t raw_fd_ostream::preferred_buffer_size() {
+#if !defined(_MSC_VER) && !defined(__MINGW32__) // Windows has no st_blksize.
+  assert(FD >= 0 && "File not yet open!");
+  struct stat statbuf;
+  if (fstat(FD, &statbuf) == 0) {
+    // If this is a terminal, don't use buffering. Line buffering
+    // would be a more traditional thing to do, but it's not worth
+    // the complexity.
+    if (S_ISCHR(statbuf.st_mode) && isatty(FD))
+      return 0;
+    // Return the preferred block size.
+    return statbuf.st_blksize;
+  }
+  error_detected();
+#endif
+  return raw_ostream::preferred_buffer_size();
+}
+
 raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
                                          bool bg) {
   if (sys::Process::ColorNeedsFlush())
@@ -310,7 +433,7 @@ raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
     (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
     : sys::Process::OutputColor(colors, bold, bg);
   if (colorcode) {
-    unsigned len = strlen(colorcode);
+    size_t len = strlen(colorcode);
     write(colorcode, len);
     // don't account colors towards output characters
     pos -= len;
@@ -323,7 +446,7 @@ raw_ostream &raw_fd_ostream::resetColor() {
     flush();
   const char *colorcode = sys::Process::ResetColor();
   if (colorcode) {
-    unsigned len = strlen(colorcode);
+    size_t len = strlen(colorcode);
     write(colorcode, len);
     // don't account colors towards output characters
     pos -= len;
@@ -331,12 +454,18 @@ raw_ostream &raw_fd_ostream::resetColor() {
   return *this;
 }
 
+bool raw_fd_ostream::is_displayed() const {
+  return sys::Process::FileDescriptorIsDisplayed(FD);
+}
+
 //===----------------------------------------------------------------------===//
 //  raw_stdout/err_ostream
 //===----------------------------------------------------------------------===//
 
+// Set buffer settings to model stdout and stderr behavior.
+// Set standard error to be unbuffered by default.
 raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {}
-raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, 
+raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false,
                                                         true) {}
 
 // An out of line virtual method to provide a home for the class vtable.
@@ -357,23 +486,12 @@ raw_ostream &llvm::errs() {
   return S;
 }
 
-//===----------------------------------------------------------------------===//
-//  raw_os_ostream
-//===----------------------------------------------------------------------===//
-
-raw_os_ostream::~raw_os_ostream() {
-  flush();
-}
-
-void raw_os_ostream::write_impl(const char *Ptr, unsigned Size) {
-  OS.write(Ptr, Size);
+/// nulls() - This returns a reference to a raw_ostream which discards output.
+raw_ostream &llvm::nulls() {
+  static raw_null_ostream S;
+  return S;
 }
 
-uint64_t raw_os_ostream::current_pos() { return OS.tellp(); }
-
-uint64_t raw_os_ostream::tell() { 
-  return (uint64_t)OS.tellp() + GetNumBytesInBuffer(); 
-}
 
 //===----------------------------------------------------------------------===//
 //  raw_string_ostream
@@ -383,7 +501,7 @@ raw_string_ostream::~raw_string_ostream() {
   flush();
 }
 
-void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) {
+void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
   OS.append(Ptr, Size);
 }
 
@@ -391,16 +509,65 @@ void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) {
 //  raw_svector_ostream
 //===----------------------------------------------------------------------===//
 
+// The raw_svector_ostream implementation uses the SmallVector itself as the
+// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
+// always pointing past the end of the vector, but within the vector
+// capacity. This allows raw_ostream to write directly into the correct place,
+// and we only need to set the vector size when the data is flushed.
+
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+  // Set up the initial external buffer. We make sure that the buffer has at
+  // least 128 bytes free; raw_ostream itself only requires 64, but we want to
+  // make sure that we don't grow the buffer unnecessarily on destruction (when
+  // the data is flushed). See the FIXME below.
+  OS.reserve(OS.size() + 128);
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
 raw_svector_ostream::~raw_svector_ostream() {
+  // FIXME: Prevent resizing during this flush().
   flush();
 }
 
-void raw_svector_ostream::write_impl(const char *Ptr, unsigned Size) {
-  OS.append(Ptr, Ptr + Size);
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+  assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() &&
+         "Invalid write_impl() call!");
+
+  // We don't need to copy the bytes, just commit the bytes to the
+  // SmallVector.
+  OS.set_size(OS.size() + Size);
+
+  // Grow the vector if necessary.
+  if (OS.capacity() - OS.size() < 64)
+    OS.reserve(OS.capacity() * 2);
+
+  // Update the buffer position.
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
 }
 
 uint64_t raw_svector_ostream::current_pos() { return OS.size(); }
 
-uint64_t raw_svector_ostream::tell() { 
-  return OS.size() + GetNumBytesInBuffer(); 
+StringRef raw_svector_ostream::str() {
+  flush();
+  return StringRef(OS.begin(), OS.size());
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_null_ostream
+//===----------------------------------------------------------------------===//
+
+raw_null_ostream::~raw_null_ostream() {
+#ifndef NDEBUG
+  // ~raw_ostream asserts that the buffer is empty. This isn't necessary
+  // with raw_null_ostream, but it's better to have raw_null_ostream follow
+  // the rules than to change the rules just for raw_null_ostream.
+  flush();
+#endif
+}
+
+void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
+}
+
+uint64_t raw_null_ostream::current_pos() {
+  return 0;
 }
diff --git a/lib/Support/regcclass.h b/lib/Support/regcclass.h
new file mode 100644
index 000000000000..2cea3e4e5406
--- /dev/null
+++ b/lib/Support/regcclass.h
@@ -0,0 +1,70 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-class table */
+static struct cclass {
+	const char *name;
+	const char *chars;
+	const char *multis;
+} cclasses[] = {
+	{ "alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789",				""} ,
+	{ "alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+					""} ,
+	{ "blank",	" \t",		""} ,
+	{ "cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177",	""} ,
+	{ "digit",	"0123456789",	""} ,
+	{ "graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					""} ,
+	{ "lower",	"abcdefghijklmnopqrstuvwxyz",
+					""} ,
+	{ "print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+					""} ,
+	{ "punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					""} ,
+	{ "space",	"\t\n\v\f\r ",	""} ,
+	{ "upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+					""} ,
+	{ "xdigit",	"0123456789ABCDEFabcdef",
+					""} ,
+	{ NULL,		0,		"" }
+};
diff --git a/lib/Support/regcname.h b/lib/Support/regcname.h
new file mode 100644
index 000000000000..3c0bb248ffa7
--- /dev/null
+++ b/lib/Support/regcname.h
@@ -0,0 +1,139 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cname.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-name table */
+static struct cname {
+	const char *name;
+	char code;
+} cnames[] = {
+	{ "NUL",			'\0' },
+	{ "SOH",			'\001' },
+	{ "STX",			'\002' },
+	{ "ETX",			'\003' },
+	{ "EOT",			'\004' },
+	{ "ENQ",			'\005' },
+	{ "ACK",			'\006' },
+	{ "BEL",			'\007' },
+	{ "alert",			'\007' },
+	{ "BS",				'\010' },
+	{ "backspace",			'\b' },
+	{ "HT",				'\011' },
+	{ "tab",			'\t' },
+	{ "LF",				'\012' },
+	{ "newline",			'\n' },
+	{ "VT",				'\013' },
+	{ "vertical-tab",		'\v' },
+	{ "FF",				'\014' },
+	{ "form-feed",			'\f' },
+	{ "CR",				'\015' },
+	{ "carriage-return",		'\r' },
+	{ "SO",				'\016' },
+	{ "SI",				'\017' },
+	{ "DLE",			'\020' },
+	{ "DC1",			'\021' },
+	{ "DC2",			'\022' },
+	{ "DC3",			'\023' },
+	{ "DC4",			'\024' },
+	{ "NAK",			'\025' },
+	{ "SYN",			'\026' },
+	{ "ETB",			'\027' },
+	{ "CAN",			'\030' },
+	{ "EM",				'\031' },
+	{ "SUB",			'\032' },
+	{ "ESC",			'\033' },
+	{ "IS4",			'\034' },
+	{ "FS",				'\034' },
+	{ "IS3",			'\035' },
+	{ "GS",				'\035' },
+	{ "IS2",			'\036' },
+	{ "RS",				'\036' },
+	{ "IS1",			'\037' },
+	{ "US",				'\037' },
+	{ "space",			' ' },
+	{ "exclamation-mark",		'!' },
+	{ "quotation-mark",		'"' },
+	{ "number-sign",		'#' },
+	{ "dollar-sign",		'$' },
+	{ "percent-sign",		'%' },
+	{ "ampersand",			'&' },
+	{ "apostrophe",			'\'' },
+	{ "left-parenthesis",		'(' },
+	{ "right-parenthesis",		')' },
+	{ "asterisk",			'*' },
+	{ "plus-sign",			'+' },
+	{ "comma",			',' },
+	{ "hyphen",			'-' },
+	{ "hyphen-minus",		'-' },
+	{ "period",			'.' },
+	{ "full-stop",			'.' },
+	{ "slash",			'/' },
+	{ "solidus",			'/' },
+	{ "zero",			'0' },
+	{ "one",			'1' },
+	{ "two",			'2' },
+	{ "three",			'3' },
+	{ "four",			'4' },
+	{ "five",			'5' },
+	{ "six",			'6' },
+	{ "seven",			'7' },
+	{ "eight",			'8' },
+	{ "nine",			'9' },
+	{ "colon",			':' },
+	{ "semicolon",			';' },
+	{ "less-than-sign",		'<' },
+	{ "equals-sign",		'=' },
+	{ "greater-than-sign",		'>' },
+	{ "question-mark",		'?' },
+	{ "commercial-at",		'@' },
+	{ "left-square-bracket",	'[' },
+	{ "backslash",			'\\' },
+	{ "reverse-solidus",		'\\' },
+	{ "right-square-bracket",	']' },
+	{ "circumflex",			'^' },
+	{ "circumflex-accent",		'^' },
+	{ "underscore",			'_' },
+	{ "low-line",			'_' },
+	{ "grave-accent",		'`' },
+	{ "left-brace",			'{' },
+	{ "left-curly-bracket",		'{' },
+	{ "vertical-line",		'|' },
+	{ "right-brace",		'}' },
+	{ "right-curly-bracket",	'}' },
+	{ "tilde",			'~' },
+	{ "DEL",			'\177' },
+	{ NULL,				0 }
+};
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
new file mode 100644
index 000000000000..cd018d5dc5bc
--- /dev/null
+++ b/lib/Support/regcomp.c
@@ -0,0 +1,1525 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+#include "regcclass.h"
+#include "regcname.h"
+
+/*
+ * parse structure, passed up and down to avoid global variables and
+ * other clumsinesses
+ */
+struct parse {
+	char *next;		/* next character in RE */
+	char *end;		/* end of string (-> NUL normally) */
+	int error;		/* has an error been seen? */
+	sop *strip;		/* malloced strip */
+	sopno ssize;		/* malloced strip size (allocated) */
+	sopno slen;		/* malloced strip length (used) */
+	int ncsalloc;		/* number of csets allocated */
+	struct re_guts *g;
+#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
+	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
+	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+};
+
+static void p_ere(struct parse *, int);
+static void p_ere_exp(struct parse *);
+static void p_str(struct parse *);
+static void p_bre(struct parse *, int, int);
+static int p_simp_re(struct parse *, int);
+static int p_count(struct parse *);
+static void p_bracket(struct parse *);
+static void p_b_term(struct parse *, cset *);
+static void p_b_cclass(struct parse *, cset *);
+static void p_b_eclass(struct parse *, cset *);
+static char p_b_symbol(struct parse *);
+static char p_b_coll_elem(struct parse *, int);
+static char othercase(int);
+static void bothcases(struct parse *, int);
+static void ordinary(struct parse *, int);
+static void nonnewline(struct parse *);
+static void repeat(struct parse *, sopno, int, int);
+static int seterr(struct parse *, int);
+static cset *allocset(struct parse *);
+static void freeset(struct parse *, cset *);
+static int freezeset(struct parse *, cset *);
+static int firstch(struct parse *, cset *);
+static int nch(struct parse *, cset *);
+static void mcadd(struct parse *, cset *, const char *);
+static void mcinvert(struct parse *, cset *);
+static void mccase(struct parse *, cset *);
+static int isinsets(struct re_guts *, int);
+static int samesets(struct re_guts *, int, int);
+static void categorize(struct parse *, struct re_guts *);
+static sopno dupl(struct parse *, sopno, sopno);
+static void doemit(struct parse *, sop, size_t);
+static void doinsert(struct parse *, sop, size_t, sopno);
+static void dofwd(struct parse *, sopno, sop);
+static void enlarge(struct parse *, sopno);
+static void stripsnug(struct parse *, struct re_guts *);
+static void findmust(struct parse *, struct re_guts *);
+static sopno pluscount(struct parse *, struct re_guts *);
+
+static char nuls[10];		/* place to point scanner in event of error */
+
+/*
+ * macros for use with parse structure
+ * BEWARE:  these know that the parse structure is named `p' !!!
+ */
+#define	PEEK()	(*p->next)
+#define	PEEK2()	(*(p->next+1))
+#define	MORE()	(p->next < p->end)
+#define	MORE2()	(p->next+1 < p->end)
+#define	SEE(c)	(MORE() && PEEK() == (c))
+#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
+#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define	NEXT()	(p->next++)
+#define	NEXT2()	(p->next += 2)
+#define	NEXTn(n)	(p->next += (n))
+#define	GETNEXT()	(*p->next++)
+#define	SETERROR(e)	seterr(p, (e))
+#define	REQUIRE(co, e)	(void)((co) || SETERROR(e))
+#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
+#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
+#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
+#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
+#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
+#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
+#define	HERE()		(p->slen)
+#define	THERE()		(p->slen - 1)
+#define	THERETHERE()	(p->slen - 2)
+#define	DROP(n)	(p->slen -= (n))
+
+#ifdef	_POSIX2_RE_DUP_MAX
+#define	DUPMAX	_POSIX2_RE_DUP_MAX
+#else
+#define	DUPMAX	255
+#endif
+#define	INFINITY	(DUPMAX + 1)
+
+#ifndef NDEBUG
+static int never = 0;		/* for use in asserts; shuts lint up */
+#else
+#define	never	0		/* some <assert.h>s have bugs too */
+#endif
+
+/*
+ - llvm_regcomp - interface for parser and compilation
+ */
+int				/* 0 success, otherwise REG_something */
+llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
+{
+	struct parse pa;
+	struct re_guts *g;
+	struct parse *p = &pa;
+	int i;
+	size_t len;
+#ifdef REDEBUG
+#	define	GOODFLAGS(f)	(f)
+#else
+#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
+#endif
+
+	cflags = GOODFLAGS(cflags);
+	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
+		return(REG_INVARG);
+
+	if (cflags&REG_PEND) {
+		if (preg->re_endp < pattern)
+			return(REG_INVARG);
+		len = preg->re_endp - pattern;
+	} else
+		len = strlen((const char *)pattern);
+
+	/* do the mallocs early so failure handling is easy */
+	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+							(NC-1)*sizeof(cat_t));
+	if (g == NULL)
+		return(REG_ESPACE);
+	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
+	p->strip = (sop *)calloc(p->ssize, sizeof(sop));
+	p->slen = 0;
+	if (p->strip == NULL) {
+		free((char *)g);
+		return(REG_ESPACE);
+	}
+
+	/* set things up */
+	p->g = g;
+	p->next = (char *)pattern;	/* convenience; we do not modify it */
+	p->end = p->next + len;
+	p->error = 0;
+	p->ncsalloc = 0;
+	for (i = 0; i < NPAREN; i++) {
+		p->pbegin[i] = 0;
+		p->pend[i] = 0;
+	}
+	g->csetsize = NC;
+	g->sets = NULL;
+	g->setbits = NULL;
+	g->ncsets = 0;
+	g->cflags = cflags;
+	g->iflags = 0;
+	g->nbol = 0;
+	g->neol = 0;
+	g->must = NULL;
+	g->mlen = 0;
+	g->nsub = 0;
+	g->ncategories = 1;	/* category 0 is "everything else" */
+	g->categories = &g->catspace[-(CHAR_MIN)];
+	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
+	g->backrefs = 0;
+
+	/* do it */
+	EMIT(OEND, 0);
+	g->firststate = THERE();
+	if (cflags&REG_EXTENDED)
+		p_ere(p, OUT);
+	else if (cflags&REG_NOSPEC)
+		p_str(p);
+	else
+		p_bre(p, OUT, OUT);
+	EMIT(OEND, 0);
+	g->laststate = THERE();
+
+	/* tidy up loose ends and fill things in */
+	categorize(p, g);
+	stripsnug(p, g);
+	findmust(p, g);
+	g->nplus = pluscount(p, g);
+	g->magic = MAGIC2;
+	preg->re_nsub = g->nsub;
+	preg->re_g = g;
+	preg->re_magic = MAGIC1;
+#ifndef REDEBUG
+	/* not debugging, so can't rely on the assert() in llvm_regexec() */
+	if (g->iflags&REGEX_BAD)
+		SETERROR(REG_ASSERT);
+#endif
+
+	/* win or lose, we're done */
+	if (p->error != 0)	/* lose */
+		llvm_regfree(preg);
+	return(p->error);
+}
+
+/*
+ - p_ere - ERE parser top level, concatenation and alternation
+ */
+static void
+p_ere(struct parse *p, int stop)	/* character this ERE should end at */
+{
+	char c;
+	sopno prevback = 0;
+	sopno prevfwd = 0;
+	sopno conc;
+	int first = 1;		/* is this the first alternative? */
+
+	for (;;) {
+		/* do a bunch of concatenated expressions */
+		conc = HERE();
+		while (MORE() && (c = PEEK()) != '|' && c != stop)
+			p_ere_exp(p);
+		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
+
+		if (!EAT('|'))
+			break;		/* NOTE BREAK OUT */
+
+		if (first) {
+			INSERT(OCH_, conc);	/* offset is wrong */
+			prevfwd = conc;
+			prevback = conc;
+			first = 0;
+		}
+		ASTERN(OOR1, prevback);
+		prevback = THERE();
+		AHEAD(prevfwd);			/* fix previous offset */
+		prevfwd = HERE();
+		EMIT(OOR2, 0);			/* offset is very wrong */
+	}
+
+	if (!first) {		/* tail-end fixups */
+		AHEAD(prevfwd);
+		ASTERN(O_CH, prevback);
+	}
+
+	assert(!MORE() || SEE(stop));
+}
+
+/*
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ */
+static void
+p_ere_exp(struct parse *p)
+{
+	char c;
+	sopno pos;
+	int count;
+	int count2;
+	sopno subno;
+	int wascaret = 0;
+
+	assert(MORE());		/* caller should have ensured this */
+	c = GETNEXT();
+
+	pos = HERE();
+	switch (c) {
+	case '(':
+		REQUIRE(MORE(), REG_EPAREN);
+		p->g->nsub++;
+		subno = p->g->nsub;
+		if (subno < NPAREN)
+			p->pbegin[subno] = HERE();
+		EMIT(OLPAREN, subno);
+		if (!SEE(')'))
+			p_ere(p, ')');
+		if (subno < NPAREN) {
+			p->pend[subno] = HERE();
+			assert(p->pend[subno] != 0);
+		}
+		EMIT(ORPAREN, subno);
+		MUSTEAT(')', REG_EPAREN);
+		break;
+#ifndef POSIX_MISTAKE
+	case ')':		/* happens only if no current unmatched ( */
+		/*
+		 * You may ask, why the ifndef?  Because I didn't notice
+		 * this until slightly too late for 1003.2, and none of the
+		 * other 1003.2 regular-expression reviewers noticed it at
+		 * all.  So an unmatched ) is legal POSIX, at least until
+		 * we can get it fixed.
+		 */
+		SETERROR(REG_EPAREN);
+		break;
+#endif
+	case '^':
+		EMIT(OBOL, 0);
+		p->g->iflags |= USEBOL;
+		p->g->nbol++;
+		wascaret = 1;
+		break;
+	case '$':
+		EMIT(OEOL, 0);
+		p->g->iflags |= USEEOL;
+		p->g->neol++;
+		break;
+	case '|':
+		SETERROR(REG_EMPTY);
+		break;
+	case '*':
+	case '+':
+	case '?':
+		SETERROR(REG_BADRPT);
+		break;
+	case '.':
+		if (p->g->cflags&REG_NEWLINE)
+			nonnewline(p);
+		else
+			EMIT(OANY, 0);
+		break;
+	case '[':
+		p_bracket(p);
+		break;
+	case '\\':
+		REQUIRE(MORE(), REG_EESCAPE);
+		c = GETNEXT();
+		ordinary(p, c);
+		break;
+	case '{':		/* okay as ordinary except if digit follows */
+		REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+		/* FALLTHROUGH */
+	default:
+		ordinary(p, c);
+		break;
+	}
+
+	if (!MORE())
+		return;
+	c = PEEK();
+	/* we call { a repetition if followed by a digit */
+	if (!( c == '*' || c == '+' || c == '?' ||
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
+		return;		/* no repetition, we're done */
+	NEXT();
+
+	REQUIRE(!wascaret, REG_BADRPT);
+	switch (c) {
+	case '*':	/* implemented as +? */
+		/* this case does not require the (y|) trick, noKLUDGE */
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+		break;
+	case '+':
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		break;
+	case '?':
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, pos);		/* offset slightly wrong */
+		ASTERN(OOR1, pos);		/* this one's right */
+		AHEAD(pos);			/* fix the OCH_ */
+		EMIT(OOR2, 0);			/* offset very wrong... */
+		AHEAD(THERE());			/* ...so fix it */
+		ASTERN(O_CH, THERETHERE());
+		break;
+	case '{':
+		count = p_count(p);
+		if (EAT(',')) {
+			if (isdigit((uch)PEEK())) {
+				count2 = p_count(p);
+				REQUIRE(count <= count2, REG_BADBR);
+			} else		/* single number with comma */
+				count2 = INFINITY;
+		} else		/* just a single number */
+			count2 = count;
+		repeat(p, pos, count, count2);
+		if (!EAT('}')) {	/* error heuristics */
+			while (MORE() && PEEK() != '}')
+				NEXT();
+			REQUIRE(MORE(), REG_EBRACE);
+			SETERROR(REG_BADBR);
+		}
+		break;
+	}
+
+	if (!MORE())
+		return;
+	c = PEEK();
+	if (!( c == '*' || c == '+' || c == '?' ||
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+		return;
+	SETERROR(REG_BADRPT);
+}
+
+/*
+ - p_str - string (no metacharacters) "parser"
+ */
+static void
+p_str(struct parse *p)
+{
+	REQUIRE(MORE(), REG_EMPTY);
+	while (MORE())
+		ordinary(p, GETNEXT());
+}
+
+/*
+ - p_bre - BRE parser top level, anchoring and concatenation
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor.  The
+ * only undesirable side effect is that '$' gets included as a character
+ * category in such cases.  This is fairly harmless; not worth fixing.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_bre(struct parse *p,
+    int end1,		/* first terminating character */
+    int end2)		/* second terminating character */
+{
+	sopno start = HERE();
+	int first = 1;			/* first subexpression? */
+	int wasdollar = 0;
+
+	if (EAT('^')) {
+		EMIT(OBOL, 0);
+		p->g->iflags |= USEBOL;
+		p->g->nbol++;
+	}
+	while (MORE() && !SEETWO(end1, end2)) {
+		wasdollar = p_simp_re(p, first);
+		first = 0;
+	}
+	if (wasdollar) {	/* oops, that was a trailing anchor */
+		DROP(1);
+		EMIT(OEOL, 0);
+		p->g->iflags |= USEEOL;
+		p->g->neol++;
+	}
+
+	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
+}
+
+/*
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ */
+static int			/* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p,
+    int starordinary)		/* is a leading * an ordinary character? */
+{
+	int c;
+	int count;
+	int count2;
+	sopno pos;
+	int i;
+	sopno subno;
+#	define	BACKSL	(1<<CHAR_BIT)
+
+	pos = HERE();		/* repetion op, if any, covers from here */
+
+	assert(MORE());		/* caller should have ensured this */
+	c = GETNEXT();
+	if (c == '\\') {
+		REQUIRE(MORE(), REG_EESCAPE);
+		c = BACKSL | GETNEXT();
+	}
+	switch (c) {
+	case '.':
+		if (p->g->cflags&REG_NEWLINE)
+			nonnewline(p);
+		else
+			EMIT(OANY, 0);
+		break;
+	case '[':
+		p_bracket(p);
+		break;
+	case BACKSL|'{':
+		SETERROR(REG_BADRPT);
+		break;
+	case BACKSL|'(':
+		p->g->nsub++;
+		subno = p->g->nsub;
+		if (subno < NPAREN)
+			p->pbegin[subno] = HERE();
+		EMIT(OLPAREN, subno);
+		/* the MORE here is an error heuristic */
+		if (MORE() && !SEETWO('\\', ')'))
+			p_bre(p, '\\', ')');
+		if (subno < NPAREN) {
+			p->pend[subno] = HERE();
+			assert(p->pend[subno] != 0);
+		}
+		EMIT(ORPAREN, subno);
+		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+		break;
+	case BACKSL|')':	/* should not get here -- must be user */
+	case BACKSL|'}':
+		SETERROR(REG_EPAREN);
+		break;
+	case BACKSL|'1':
+	case BACKSL|'2':
+	case BACKSL|'3':
+	case BACKSL|'4':
+	case BACKSL|'5':
+	case BACKSL|'6':
+	case BACKSL|'7':
+	case BACKSL|'8':
+	case BACKSL|'9':
+		i = (c&~BACKSL) - '0';
+		assert(i < NPAREN);
+		if (p->pend[i] != 0) {
+			assert(i <= p->g->nsub);
+			EMIT(OBACK_, i);
+			assert(p->pbegin[i] != 0);
+			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+			EMIT(O_BACK, i);
+		} else
+			SETERROR(REG_ESUBREG);
+		p->g->backrefs = 1;
+		break;
+	case '*':
+		REQUIRE(starordinary, REG_BADRPT);
+		/* FALLTHROUGH */
+	default:
+		ordinary(p, (char)c);
+		break;
+	}
+
+	if (EAT('*')) {		/* implemented as +? */
+		/* this case does not require the (y|) trick, noKLUDGE */
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+	} else if (EATTWO('\\', '{')) {
+		count = p_count(p);
+		if (EAT(',')) {
+			if (MORE() && isdigit((uch)PEEK())) {
+				count2 = p_count(p);
+				REQUIRE(count <= count2, REG_BADBR);
+			} else		/* single number with comma */
+				count2 = INFINITY;
+		} else		/* just a single number */
+			count2 = count;
+		repeat(p, pos, count, count2);
+		if (!EATTWO('\\', '}')) {	/* error heuristics */
+			while (MORE() && !SEETWO('\\', '}'))
+				NEXT();
+			REQUIRE(MORE(), REG_EBRACE);
+			SETERROR(REG_BADBR);
+		}
+	} else if (c == '$')	/* $ (but not \$) ends it */
+		return(1);
+
+	return(0);
+}
+
+/*
+ - p_count - parse a repetition count
+ */
+static int			/* the value */
+p_count(struct parse *p)
+{
+	int count = 0;
+	int ndigits = 0;
+
+	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+		count = count*10 + (GETNEXT() - '0');
+		ndigits++;
+	}
+
+	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+	return(count);
+}
+
+/*
+ - p_bracket - parse a bracketed character list
+ *
+ * Note a significant property of this code:  if the allocset() did SETERROR,
+ * no set operations are done.
+ */
+static void
+p_bracket(struct parse *p)
+{
+	cset *cs;
+	int invert = 0;
+
+	/* Dept of Truly Sickening Special-Case Kludges */
+	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+		EMIT(OBOW, 0);
+		NEXTn(6);
+		return;
+	}
+	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+		EMIT(OEOW, 0);
+		NEXTn(6);
+		return;
+	}
+
+	if ((cs = allocset(p)) == NULL) {
+		/* allocset did set error status in p */
+		return;
+	}
+
+	if (EAT('^'))
+		invert++;	/* make note to invert set at end */
+	if (EAT(']'))
+		CHadd(cs, ']');
+	else if (EAT('-'))
+		CHadd(cs, '-');
+	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+		p_b_term(p, cs);
+	if (EAT('-'))
+		CHadd(cs, '-');
+	MUSTEAT(']', REG_EBRACK);
+
+	if (p->error != 0) {	/* don't mess things up further */
+		freeset(p, cs);
+		return;
+	}
+
+	if (p->g->cflags&REG_ICASE) {
+		int i;
+		int ci;
+
+		for (i = p->g->csetsize - 1; i >= 0; i--)
+			if (CHIN(cs, i) && isalpha(i)) {
+				ci = othercase(i);
+				if (ci != i)
+					CHadd(cs, ci);
+			}
+		if (cs->multis != NULL)
+			mccase(p, cs);
+	}
+	if (invert) {
+		int i;
+
+		for (i = p->g->csetsize - 1; i >= 0; i--)
+			if (CHIN(cs, i))
+				CHsub(cs, i);
+			else
+				CHadd(cs, i);
+		if (p->g->cflags&REG_NEWLINE)
+			CHsub(cs, '\n');
+		if (cs->multis != NULL)
+			mcinvert(p, cs);
+	}
+
+	assert(cs->multis == NULL);		/* xxx */
+
+	if (nch(p, cs) == 1) {		/* optimize singleton sets */
+		ordinary(p, firstch(p, cs));
+		freeset(p, cs);
+	} else
+		EMIT(OANYOF, freezeset(p, cs));
+}
+
+/*
+ - p_b_term - parse one term of a bracketed character list
+ */
+static void
+p_b_term(struct parse *p, cset *cs)
+{
+	char c;
+	char start, finish;
+	int i;
+
+	/* classify what we've got */
+	switch ((MORE()) ? PEEK() : '\0') {
+	case '[':
+		c = (MORE2()) ? PEEK2() : '\0';
+		break;
+	case '-':
+		SETERROR(REG_ERANGE);
+		return;			/* NOTE RETURN */
+		break;
+	default:
+		c = '\0';
+		break;
+	}
+
+	switch (c) {
+	case ':':		/* character class */
+		NEXT2();
+		REQUIRE(MORE(), REG_EBRACK);
+		c = PEEK();
+		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+		p_b_cclass(p, cs);
+		REQUIRE(MORE(), REG_EBRACK);
+		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+		break;
+	case '=':		/* equivalence class */
+		NEXT2();
+		REQUIRE(MORE(), REG_EBRACK);
+		c = PEEK();
+		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+		p_b_eclass(p, cs);
+		REQUIRE(MORE(), REG_EBRACK);
+		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+		break;
+	default:		/* symbol, ordinary character, or range */
+/* xxx revision needed for multichar stuff */
+		start = p_b_symbol(p);
+		if (SEE('-') && MORE2() && PEEK2() != ']') {
+			/* range */
+			NEXT();
+			if (EAT('-'))
+				finish = '-';
+			else
+				finish = p_b_symbol(p);
+		} else
+			finish = start;
+/* xxx what about signed chars here... */
+		REQUIRE(start <= finish, REG_ERANGE);
+		for (i = start; i <= finish; i++)
+			CHadd(cs, i);
+		break;
+	}
+}
+
+/*
+ - p_b_cclass - parse a character-class name and deal with it
+ */
+static void
+p_b_cclass(struct parse *p, cset *cs)
+{
+	char *sp = p->next;
+	struct cclass *cp;
+	size_t len;
+	const char *u;
+	char c;
+
+	while (MORE() && isalpha(PEEK()))
+		NEXT();
+	len = p->next - sp;
+	for (cp = cclasses; cp->name != NULL; cp++)
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+			break;
+	if (cp->name == NULL) {
+		/* oops, didn't find it */
+		SETERROR(REG_ECTYPE);
+		return;
+	}
+
+	u = cp->chars;
+	while ((c = *u++) != '\0')
+		CHadd(cs, c);
+	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
+		MCadd(p, cs, u);
+}
+
+/*
+ - p_b_eclass - parse an equivalence-class name and deal with it
+ *
+ * This implementation is incomplete. xxx
+ */
+static void
+p_b_eclass(struct parse *p, cset *cs)
+{
+	char c;
+
+	c = p_b_coll_elem(p, '=');
+	CHadd(cs, c);
+}
+
+/*
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ */
+static char			/* value of symbol */
+p_b_symbol(struct parse *p)
+{
+	char value;
+
+	REQUIRE(MORE(), REG_EBRACK);
+	if (!EATTWO('[', '.'))
+		return(GETNEXT());
+
+	/* collating symbol */
+	value = p_b_coll_elem(p, '.');
+	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+	return(value);
+}
+
+/*
+ - p_b_coll_elem - parse a collating-element name and look it up
+ */
+static char			/* value of collating element */
+p_b_coll_elem(struct parse *p,
+    int endc)			/* name ended by endc,']' */
+{
+	char *sp = p->next;
+	struct cname *cp;
+	int len;
+
+	while (MORE() && !SEETWO(endc, ']'))
+		NEXT();
+	if (!MORE()) {
+		SETERROR(REG_EBRACK);
+		return(0);
+	}
+	len = p->next - sp;
+	for (cp = cnames; cp->name != NULL; cp++)
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+			return(cp->code);	/* known name */
+	if (len == 1)
+		return(*sp);	/* single character */
+	SETERROR(REG_ECOLLATE);			/* neither */
+	return(0);
+}
+
+/*
+ - othercase - return the case counterpart of an alphabetic
+ */
+static char			/* if no counterpart, return ch */
+othercase(int ch)
+{
+	ch = (uch)ch;
+	assert(isalpha(ch));
+	if (isupper(ch))
+		return ((uch)tolower(ch));
+	else if (islower(ch))
+		return ((uch)toupper(ch));
+	else			/* peculiar, but could happen */
+		return(ch);
+}
+
+/*
+ - bothcases - emit a dualcase version of a two-case character
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+bothcases(struct parse *p, int ch)
+{
+	char *oldnext = p->next;
+	char *oldend = p->end;
+	char bracket[3];
+
+	ch = (uch)ch;
+	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
+	p->next = bracket;
+	p->end = bracket+2;
+	bracket[0] = ch;
+	bracket[1] = ']';
+	bracket[2] = '\0';
+	p_bracket(p);
+	assert(p->next == bracket+2);
+	p->next = oldnext;
+	p->end = oldend;
+}
+
+/*
+ - ordinary - emit an ordinary character
+ */
+static void
+ordinary(struct parse *p, int ch)
+{
+	cat_t *cap = p->g->categories;
+
+	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
+		bothcases(p, ch);
+	else {
+		EMIT(OCHAR, (uch)ch);
+		if (cap[ch] == 0)
+			cap[ch] = p->g->ncategories++;
+	}
+}
+
+/*
+ - nonnewline - emit REG_NEWLINE version of OANY
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+nonnewline(struct parse *p)
+{
+	char *oldnext = p->next;
+	char *oldend = p->end;
+	char bracket[4];
+
+	p->next = bracket;
+	p->end = bracket+3;
+	bracket[0] = '^';
+	bracket[1] = '\n';
+	bracket[2] = ']';
+	bracket[3] = '\0';
+	p_bracket(p);
+	assert(p->next == bracket+3);
+	p->next = oldnext;
+	p->end = oldend;
+}
+
+/*
+ - repeat - generate code for a bounded repetition, recursively if needed
+ */
+static void
+repeat(struct parse *p,
+    sopno start,		/* operand from here to end of strip */
+    int from,			/* repeated from this number */
+    int to)			/* to this number of times (maybe INFINITY) */
+{
+	sopno finish = HERE();
+#	define	N	2
+#	define	INF	3
+#	define	REP(f, t)	((f)*8 + (t))
+#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+	sopno copy;
+
+	if (p->error != 0)	/* head off possible runaway recursion */
+		return;
+
+	assert(from <= to);
+
+	switch (REP(MAP(from), MAP(to))) {
+	case REP(0, 0):			/* must be user doing this */
+		DROP(finish-start);	/* drop the operand */
+		break;
+	case REP(0, 1):			/* as x{1,1}? */
+	case REP(0, N):			/* as x{1,n}? */
+	case REP(0, INF):		/* as x{1,}? */
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, start);		/* offset is wrong... */
+		repeat(p, start+1, 1, to);
+		ASTERN(OOR1, start);
+		AHEAD(start);			/* ... fix it */
+		EMIT(OOR2, 0);
+		AHEAD(THERE());
+		ASTERN(O_CH, THERETHERE());
+		break;
+	case REP(1, 1):			/* trivial case */
+		/* done */
+		break;
+	case REP(1, N):			/* as x?x{1,n-1} */
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, start);
+		ASTERN(OOR1, start);
+		AHEAD(start);
+		EMIT(OOR2, 0);			/* offset very wrong... */
+		AHEAD(THERE());			/* ...so fix it */
+		ASTERN(O_CH, THERETHERE());
+		copy = dupl(p, start+1, finish+1);
+		assert(copy == finish+4);
+		repeat(p, copy, 1, to-1);
+		break;
+	case REP(1, INF):		/* as x+ */
+		INSERT(OPLUS_, start);
+		ASTERN(O_PLUS, start);
+		break;
+	case REP(N, N):			/* as xx{m-1,n-1} */
+		copy = dupl(p, start, finish);
+		repeat(p, copy, from-1, to-1);
+		break;
+	case REP(N, INF):		/* as xx{n-1,INF} */
+		copy = dupl(p, start, finish);
+		repeat(p, copy, from-1, to);
+		break;
+	default:			/* "can't happen" */
+		SETERROR(REG_ASSERT);	/* just in case */
+		break;
+	}
+}
+
+/*
+ - seterr - set an error condition
+ */
+static int			/* useless but makes type checking happy */
+seterr(struct parse *p, int e)
+{
+	if (p->error == 0)	/* keep earliest error condition */
+		p->error = e;
+	p->next = nuls;		/* try to bring things to a halt */
+	p->end = nuls;
+	return(0);		/* make the return value well-defined */
+}
+
+/*
+ - allocset - allocate a set of characters for []
+ */
+static cset *
+allocset(struct parse *p)
+{
+	int no = p->g->ncsets++;
+	size_t nc;
+	size_t nbytes;
+	cset *cs;
+	size_t css = (size_t)p->g->csetsize;
+	int i;
+
+	if (no >= p->ncsalloc) {	/* need another column of space */
+		void *ptr;
+
+		p->ncsalloc += CHAR_BIT;
+		nc = p->ncsalloc;
+		assert(nc % CHAR_BIT == 0);
+		nbytes = nc / CHAR_BIT * css;
+
+		ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
+		if (ptr == NULL)
+			goto nomem;
+		p->g->sets = ptr;
+
+		ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
+		if (ptr == NULL)
+			goto nomem;
+		p->g->setbits = ptr;
+
+		for (i = 0; i < no; i++)
+			p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+
+		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+	}
+	/* XXX should not happen */
+	if (p->g->sets == NULL || p->g->setbits == NULL)
+		goto nomem;
+
+	cs = &p->g->sets[no];
+	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
+	cs->mask = 1 << ((no) % CHAR_BIT);
+	cs->hash = 0;
+	cs->smultis = 0;
+	cs->multis = NULL;
+
+	return(cs);
+nomem:
+	free(p->g->sets);
+	p->g->sets = NULL;
+	free(p->g->setbits);
+	p->g->setbits = NULL;
+
+	SETERROR(REG_ESPACE);
+	/* caller's responsibility not to do set ops */
+	return(NULL);
+}
+
+/*
+ - freeset - free a now-unused set
+ */
+static void
+freeset(struct parse *p, cset *cs)
+{
+	size_t i;
+	cset *top = &p->g->sets[p->g->ncsets];
+	size_t css = (size_t)p->g->csetsize;
+
+	for (i = 0; i < css; i++)
+		CHsub(cs, i);
+	if (cs == top-1)	/* recover only the easy case */
+		p->g->ncsets--;
+}
+
+/*
+ - freezeset - final processing on a set of characters
+ *
+ * The main task here is merging identical sets.  This is usually a waste
+ * of time (although the hash code minimizes the overhead), but can win
+ * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
+ * the same value!
+ */
+static int			/* set number */
+freezeset(struct parse *p, cset *cs)
+{
+	uch h = cs->hash;
+	size_t i;
+	cset *top = &p->g->sets[p->g->ncsets];
+	cset *cs2;
+	size_t css = (size_t)p->g->csetsize;
+
+	/* look for an earlier one which is the same */
+	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+		if (cs2->hash == h && cs2 != cs) {
+			/* maybe */
+			for (i = 0; i < css; i++)
+				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
+					break;		/* no */
+			if (i == css)
+				break;			/* yes */
+		}
+
+	if (cs2 < top) {	/* found one */
+		freeset(p, cs);
+		cs = cs2;
+	}
+
+	return((int)(cs - p->g->sets));
+}
+
+/*
+ - firstch - return first character in a set (which must have at least one)
+ */
+static int			/* character; there is no "none" value */
+firstch(struct parse *p, cset *cs)
+{
+	size_t i;
+	size_t css = (size_t)p->g->csetsize;
+
+	for (i = 0; i < css; i++)
+		if (CHIN(cs, i))
+			return((char)i);
+	assert(never);
+	return(0);		/* arbitrary */
+}
+
+/*
+ - nch - number of characters in a set
+ */
+static int
+nch(struct parse *p, cset *cs)
+{
+	size_t i;
+	size_t css = (size_t)p->g->csetsize;
+	int n = 0;
+
+	for (i = 0; i < css; i++)
+		if (CHIN(cs, i))
+			n++;
+	return(n);
+}
+
+/*
+ - mcadd - add a collating element to a cset
+ */
+static void
+mcadd( struct parse *p, cset *cs, const char *cp)
+{
+	size_t oldend = cs->smultis;
+	void *np;
+
+	cs->smultis += strlen(cp) + 1;
+	np = realloc(cs->multis, cs->smultis);
+	if (np == NULL) {
+		if (cs->multis)
+			free(cs->multis);
+		cs->multis = NULL;
+		SETERROR(REG_ESPACE);
+		return;
+	}
+	cs->multis = np;
+
+	llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
+}
+
+/*
+ - mcinvert - invert the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities.  Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mcinvert(struct parse *p, cset *cs)
+{
+	assert(cs->multis == NULL);	/* xxx */
+}
+
+/*
+ - mccase - add case counterparts of the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities.  Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mccase(struct parse *p, cset *cs)
+{
+	assert(cs->multis == NULL);	/* xxx */
+}
+
+/*
+ - isinsets - is this character in any sets?
+ */
+static int			/* predicate */
+isinsets(struct re_guts *g, int c)
+{
+	uch *col;
+	int i;
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+	unsigned uc = (uch)c;
+
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+		if (col[uc] != 0)
+			return(1);
+	return(0);
+}
+
+/*
+ - samesets - are these two characters in exactly the same sets?
+ */
+static int			/* predicate */
+samesets(struct re_guts *g, int c1, int c2)
+{
+	uch *col;
+	int i;
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+	unsigned uc1 = (uch)c1;
+	unsigned uc2 = (uch)c2;
+
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+		if (col[uc1] != col[uc2])
+			return(0);
+	return(1);
+}
+
+/*
+ - categorize - sort out character categories
+ */
+static void
+categorize(struct parse *p, struct re_guts *g)
+{
+	cat_t *cats = g->categories;
+	int c;
+	int c2;
+	cat_t cat;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+		if (cats[c] == 0 && isinsets(g, c)) {
+			cat = g->ncategories++;
+			cats[c] = cat;
+			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
+				if (cats[c2] == 0 && samesets(g, c, c2))
+					cats[c2] = cat;
+		}
+}
+
+/*
+ - dupl - emit a duplicate of a bunch of sops
+ */
+static sopno			/* start of duplicate */
+dupl(struct parse *p,
+    sopno start,		/* from here */
+    sopno finish)		/* to this less one */
+{
+	sopno ret = HERE();
+	sopno len = finish - start;
+
+	assert(finish >= start);
+	if (len == 0)
+		return(ret);
+	enlarge(p, p->ssize + len);	/* this many unexpected additions */
+	assert(p->ssize >= p->slen + len);
+	(void) memmove((char *)(p->strip + p->slen),
+		(char *)(p->strip + start), (size_t)len*sizeof(sop));
+	p->slen += len;
+	return(ret);
+}
+
+/*
+ - doemit - emit a strip operator
+ *
+ * It might seem better to implement this as a macro with a function as
+ * hard-case backup, but it's just too big and messy unless there are
+ * some changes to the data structures.  Maybe later.
+ */
+static void
+doemit(struct parse *p, sop op, size_t opnd)
+{
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	/* deal with oversize operands ("can't happen", more or less) */
+	assert(opnd < 1<<OPSHIFT);
+
+	/* deal with undersized strip */
+	if (p->slen >= p->ssize)
+		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
+	assert(p->slen < p->ssize);
+
+	/* finally, it's all reduced to the easy case */
+	p->strip[p->slen++] = SOP(op, opnd);
+}
+
+/*
+ - doinsert - insert a sop into the strip
+ */
+static void
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
+{
+	sopno sn;
+	sop s;
+	int i;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	sn = HERE();
+	EMIT(op, opnd);		/* do checks, ensure space */
+	assert(HERE() == sn+1);
+	s = p->strip[sn];
+
+	/* adjust paren pointers */
+	assert(pos > 0);
+	for (i = 1; i < NPAREN; i++) {
+		if (p->pbegin[i] >= pos) {
+			p->pbegin[i]++;
+		}
+		if (p->pend[i] >= pos) {
+			p->pend[i]++;
+		}
+	}
+
+	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
+						(HERE()-pos-1)*sizeof(sop));
+	p->strip[pos] = s;
+}
+
+/*
+ - dofwd - complete a forward reference
+ */
+static void
+dofwd(struct parse *p, sopno pos, sop value)
+{
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	assert(value < 1<<OPSHIFT);
+	p->strip[pos] = OP(p->strip[pos]) | value;
+}
+
+/*
+ - enlarge - enlarge the strip
+ */
+static void
+enlarge(struct parse *p, sopno size)
+{
+	sop *sp;
+
+	if (p->ssize >= size)
+		return;
+
+	sp = (sop *)realloc(p->strip, size*sizeof(sop));
+	if (sp == NULL) {
+		SETERROR(REG_ESPACE);
+		return;
+	}
+	p->strip = sp;
+	p->ssize = size;
+}
+
+/*
+ - stripsnug - compact the strip
+ */
+static void
+stripsnug(struct parse *p, struct re_guts *g)
+{
+	g->nstates = p->slen;
+	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+	if (g->strip == NULL) {
+		SETERROR(REG_ESPACE);
+		g->strip = p->strip;
+	}
+}
+
+/*
+ - findmust - fill in must and mlen with longest mandatory literal string
+ *
+ * This algorithm could do fancy things like analyzing the operands of |
+ * for common subsequences.  Someday.  This code is simple and finds most
+ * of the interesting cases.
+ *
+ * Note that must and mlen got initialized during setup.
+ */
+static void
+findmust(struct parse *p, struct re_guts *g)
+{
+	sop *scan;
+	sop *start = 0; /* start initialized in the default case, after that */
+	sop *newstart = 0; /* newstart was initialized in the OCHAR case */
+	sopno newlen;
+	sop s;
+	char *cp;
+	sopno i;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	/* find the longest OCHAR sequence in strip */
+	newlen = 0;
+	scan = g->strip + 1;
+	do {
+		s = *scan++;
+		switch (OP(s)) {
+		case OCHAR:		/* sequence member */
+			if (newlen == 0)		/* new sequence */
+				newstart = scan - 1;
+			newlen++;
+			break;
+		case OPLUS_:		/* things that don't break one */
+		case OLPAREN:
+		case ORPAREN:
+			break;
+		case OQUEST_:		/* things that must be skipped */
+		case OCH_:
+			scan--;
+			do {
+				scan += OPND(s);
+				s = *scan;
+				/* assert() interferes w debug printouts */
+				if (OP(s) != O_QUEST && OP(s) != O_CH &&
+							OP(s) != OOR2) {
+					g->iflags |= REGEX_BAD;
+					return;
+				}
+			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			/* fallthrough */
+		default:		/* things that break a sequence */
+			if (newlen > g->mlen) {		/* ends one */
+				start = newstart;
+				g->mlen = newlen;
+			}
+			newlen = 0;
+			break;
+		}
+	} while (OP(s) != OEND);
+
+	if (g->mlen == 0)		/* there isn't one */
+		return;
+
+	/* turn it into a character string */
+	g->must = malloc((size_t)g->mlen + 1);
+	if (g->must == NULL) {		/* argh; just forget it */
+		g->mlen = 0;
+		return;
+	}
+	cp = g->must;
+	scan = start;
+	for (i = g->mlen; i > 0; i--) {
+		while (OP(s = *scan++) != OCHAR)
+			continue;
+		assert(cp < g->must + g->mlen);
+		*cp++ = (char)OPND(s);
+	}
+	assert(cp == g->must + g->mlen);
+	*cp++ = '\0';		/* just on general principles */
+}
+
+/*
+ - pluscount - count + nesting
+ */
+static sopno			/* nesting depth */
+pluscount(struct parse *p, struct re_guts *g)
+{
+	sop *scan;
+	sop s;
+	sopno plusnest = 0;
+	sopno maxnest = 0;
+
+	if (p->error != 0)
+		return(0);	/* there may not be an OEND */
+
+	scan = g->strip + 1;
+	do {
+		s = *scan++;
+		switch (OP(s)) {
+		case OPLUS_:
+			plusnest++;
+			break;
+		case O_PLUS:
+			if (plusnest > maxnest)
+				maxnest = plusnest;
+			plusnest--;
+			break;
+		}
+	} while (OP(s) != OEND);
+	if (plusnest != 0)
+		g->iflags |= REGEX_BAD;
+	return(maxnest);
+}
diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc
new file mode 100644
index 000000000000..0f27cfd4b5b8
--- /dev/null
+++ b/lib/Support/regengine.inc
@@ -0,0 +1,1027 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)engine.c	8.5 (Berkeley) 3/20/94
+ */
+
+/*
+ * The matching engine and friends.  This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define	matcher	smatcher
+#define	fast	sfast
+#define	slow	sslow
+#define	dissect	sdissect
+#define	backref	sbackref
+#define	step	sstep
+#define	print	sprint
+#define	at	sat
+#define	match	smat
+#define	nope	snope
+#endif
+#ifdef LNAMES
+#define	matcher	lmatcher
+#define	fast	lfast
+#define	slow	lslow
+#define	dissect	ldissect
+#define	backref	lbackref
+#define	step	lstep
+#define	print	lprint
+#define	at	lat
+#define	match	lmat
+#define	nope	lnope
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+	struct re_guts *g;
+	int eflags;
+	llvm_regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
+	char *offp;		/* offsets work from here */
+	char *beginp;		/* start of string -- virtual NUL precedes */
+	char *endp;		/* end of string -- virtual NUL here */
+	char *coldp;		/* can be no match starting before here */
+	char **lastpos;		/* [nplus+1] */
+	STATEVARS;
+	states st;		/* current states */
+	states fresh;		/* states for a fresh start */
+	states tmp;		/* temporary */
+	states empty;		/* empty set of states */
+};
+
+static int matcher(struct re_guts *, char *, size_t, llvm_regmatch_t[], int);
+static char *dissect(struct match *, char *, char *, sopno, sopno);
+static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
+static char *fast(struct match *, char *, char *, sopno, sopno);
+static char *slow(struct match *, char *, char *, sopno, sopno);
+static states step(struct re_guts *, sopno, sopno, states, int, states);
+#define MAX_RECURSION	100
+#define	BOL	(OUT+1)
+#define	EOL	(BOL+1)
+#define	BOLEOL	(BOL+2)
+#define	NOTHING	(BOL+3)
+#define	BOW	(BOL+4)
+#define	EOW	(BOL+5)
+#define	CODEMAX	(BOL+5)		/* highest code used */
+#define	NONCHAR(c)	((c) > CHAR_MAX)
+#define	NNONCHAR	(CODEMAX-CHAR_MAX)
+#ifdef REDEBUG
+static void print(struct match *, char *, states, int, FILE *);
+#endif
+#ifdef REDEBUG
+static void at(struct match *, char *, char *, char *, sopno, sopno);
+#endif
+#ifdef REDEBUG
+static char *pchar(int);
+#endif
+
+#ifdef REDEBUG
+#define	SP(t, s, c)	print(m, t, s, c, stdout)
+#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
+#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+static int nope = 0;
+#else
+#define	SP(t, s, c)	/* nothing */
+#define	AT(t, p1, p2, s1, s2)	/* nothing */
+#define	NOTE(s)	/* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ */
+static int			/* 0 success, REG_NOMATCH failure */
+matcher(struct re_guts *g, char *string, size_t nmatch, llvm_regmatch_t pmatch[],
+    int eflags)
+{
+	char *endp;
+	size_t i;
+	struct match mv;
+	struct match *m = &mv;
+	char *dp;
+	const sopno gf = g->firststate+1;	/* +1 for OEND */
+	const sopno gl = g->laststate;
+	char *start;
+	char *stop;
+
+	/* simplify the situation where possible */
+	if (g->cflags&REG_NOSUB)
+		nmatch = 0;
+	if (eflags&REG_STARTEND) {
+		start = string + pmatch[0].rm_so;
+		stop = string + pmatch[0].rm_eo;
+	} else {
+		start = string;
+		stop = start + strlen(start);
+	}
+	if (stop < start)
+		return(REG_INVARG);
+
+	/* prescreening; this does wonders for this rather slow code */
+	if (g->must != NULL) {
+		for (dp = start; dp < stop; dp++)
+			if (*dp == g->must[0] && stop - dp >= g->mlen &&
+				memcmp(dp, g->must, (size_t)g->mlen) == 0)
+				break;
+		if (dp == stop)		/* we didn't find g->must */
+			return(REG_NOMATCH);
+	}
+
+	/* match struct setup */
+	m->g = g;
+	m->eflags = eflags;
+	m->pmatch = NULL;
+	m->lastpos = NULL;
+	m->offp = string;
+	m->beginp = start;
+	m->endp = stop;
+	STATESETUP(m, 4);
+	SETUP(m->st);
+	SETUP(m->fresh);
+	SETUP(m->tmp);
+	SETUP(m->empty);
+	CLEAR(m->empty);
+
+	/* this loop does only one repetition except for backrefs */
+	for (;;) {
+		endp = fast(m, start, stop, gf, gl);
+		if (endp == NULL) {		/* a miss */
+			free(m->pmatch);
+			free(m->lastpos);
+			STATETEARDOWN(m);
+			return(REG_NOMATCH);
+		}
+		if (nmatch == 0 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* where? */
+		assert(m->coldp != NULL);
+		for (;;) {
+			NOTE("finding start");
+			endp = slow(m, m->coldp, stop, gf, gl);
+			if (endp != NULL)
+				break;
+			assert(m->coldp < m->endp);
+			m->coldp++;
+		}
+		if (nmatch == 1 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* oh my, he wants the subexpressions... */
+		if (m->pmatch == NULL)
+			m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
+							sizeof(llvm_regmatch_t));
+		if (m->pmatch == NULL) {
+			STATETEARDOWN(m);
+			return(REG_ESPACE);
+		}
+		for (i = 1; i <= m->g->nsub; i++)
+			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+			NOTE("dissecting");
+			dp = dissect(m, m->coldp, endp, gf, gl);
+		} else {
+			if (g->nplus > 0 && m->lastpos == NULL)
+				m->lastpos = (char **)malloc((g->nplus+1) *
+							sizeof(char *));
+			if (g->nplus > 0 && m->lastpos == NULL) {
+				free(m->pmatch);
+				STATETEARDOWN(m);
+				return(REG_ESPACE);
+			}
+			NOTE("backref dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		if (dp != NULL)
+			break;
+
+		/* uh-oh... we couldn't find a subexpression-level match */
+		assert(g->backrefs);	/* must be back references doing it */
+		assert(g->nplus == 0 || m->lastpos != NULL);
+		for (;;) {
+			if (dp != NULL || endp <= m->coldp)
+				break;		/* defeat */
+			NOTE("backoff");
+			endp = slow(m, m->coldp, endp-1, gf, gl);
+			if (endp == NULL)
+				break;		/* defeat */
+			/* try it on a shorter possibility */
+#ifndef NDEBUG
+			for (i = 1; i <= m->g->nsub; i++) {
+				assert(m->pmatch[i].rm_so == -1);
+				assert(m->pmatch[i].rm_eo == -1);
+			}
+#endif
+			NOTE("backoff dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		assert(dp == NULL || dp == endp);
+		if (dp != NULL)		/* found a shorter one */
+			break;
+
+		/* despite initial appearances, there is no match here */
+		NOTE("false alarm");
+		if (m->coldp == stop)
+			break;
+		start = m->coldp + 1;	/* recycle starting later */
+	}
+
+	/* fill in the details if requested */
+	if (nmatch > 0) {
+		pmatch[0].rm_so = m->coldp - m->offp;
+		pmatch[0].rm_eo = endp - m->offp;
+	}
+	if (nmatch > 1) {
+		assert(m->pmatch != NULL);
+		for (i = 1; i < nmatch; i++)
+			if (i <= m->g->nsub)
+				pmatch[i] = m->pmatch[i];
+			else {
+				pmatch[i].rm_so = -1;
+				pmatch[i].rm_eo = -1;
+			}
+	}
+
+	if (m->pmatch != NULL)
+		free((char *)m->pmatch);
+	if (m->lastpos != NULL)
+		free((char *)m->lastpos);
+	STATETEARDOWN(m);
+	return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ */
+static char *			/* == stop (success) always */
+dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	sopno es;	/* end sop of current subRE */
+	char *sp;	/* start of string matched by it */
+	char *stp;	/* string matched by it cannot pass here */
+	char *rest;	/* start of rest of string */
+	char *tail;	/* string unmatched by rest of RE */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	char *ssp;	/* start of string matched by subsubRE */
+	char *sep;	/* end of string matched by subsubRE */
+	char *oldssp;	/* previous ssp */
+
+	AT("diss", start, stop, startst, stopst);
+	sp = start;
+	for (ss = startst; ss < stopst; ss = es) {
+		/* identify end of subRE */
+		es = ss;
+		switch (OP(m->g->strip[es])) {
+		case OPLUS_:
+		case OQUEST_:
+			es += OPND(m->g->strip[es]);
+			break;
+		case OCH_:
+			while (OP(m->g->strip[es]) != O_CH)
+				es += OPND(m->g->strip[es]);
+			break;
+		}
+		es++;
+
+		/* figure out what it matched */
+		switch (OP(m->g->strip[ss])) {
+		case OEND:
+			assert(nope);
+			break;
+		case OCHAR:
+			sp++;
+			break;
+		case OBOL:
+		case OEOL:
+		case OBOW:
+		case OEOW:
+			break;
+		case OANY:
+		case OANYOF:
+			sp++;
+			break;
+		case OBACK_:
+		case O_BACK:
+			assert(nope);
+			break;
+		/* cases where length of match is hard to find */
+		case OQUEST_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			/* did innards match? */
+			if (slow(m, sp, rest, ssub, esub) != NULL) {
+				char *dp = dissect(m, sp, rest, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == rest);
+			} else		/* no */
+				assert(sp == rest);
+			sp = rest;
+			break;
+		case OPLUS_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			ssp = sp;
+			oldssp = ssp;
+			for (;;) {	/* find last match of innards */
+				sep = slow(m, ssp, rest, ssub, esub);
+				if (sep == NULL || sep == ssp)
+					break;	/* failed or matched null */
+				oldssp = ssp;	/* on to next try */
+				ssp = sep;
+			}
+			if (sep == NULL) {
+				/* last successful match */
+				sep = ssp;
+				ssp = oldssp;
+			}
+			assert(sep == rest);	/* must exhaust substring */
+			assert(slow(m, ssp, sep, ssub, esub) == rest);
+			{
+				char *dp = dissect(m, ssp, sep, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == sep);
+			}
+			sp = rest;
+			break;
+		case OCH_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = ss + OPND(m->g->strip[ss]) - 1;
+			assert(OP(m->g->strip[esub]) == OOR1);
+			for (;;) {	/* find first matching branch */
+				if (slow(m, sp, rest, ssub, esub) == rest)
+					break;	/* it matched all of it */
+				/* that one missed, try next one */
+				assert(OP(m->g->strip[esub]) == OOR1);
+				esub++;
+				assert(OP(m->g->strip[esub]) == OOR2);
+				ssub = esub + 1;
+				esub += OPND(m->g->strip[esub]);
+				if (OP(m->g->strip[esub]) == OOR2)
+					esub--;
+				else
+					assert(OP(m->g->strip[esub]) == O_CH);
+			}
+			{
+				char *dp = dissect(m, sp, rest, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == rest);
+			}
+			sp = rest;
+			break;
+		case O_PLUS:
+		case O_QUEST:
+		case OOR1:
+		case OOR2:
+		case O_CH:
+			assert(nope);
+			break;
+		case OLPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_so = sp - m->offp;
+			break;
+		case ORPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_eo = sp - m->offp;
+			break;
+		default:		/* uh oh */
+			assert(nope);
+			break;
+		}
+	}
+
+	assert(sp == stop);
+	return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ */
+static char *			/* == stop (success) or NULL (failure) */
+backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
+    sopno lev, int rec)			/* PLUS nesting level */
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	char *sp;	/* start of string matched by it */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	char *ssp;	/* start of string matched by subsubRE */
+	char *dp;
+	size_t len;
+	int hard;
+	sop s;
+	llvm_regoff_t offsave;
+	cset *cs;
+
+	AT("back", start, stop, startst, stopst);
+	sp = start;
+
+	/* get as far as we can with easy stuff */
+	hard = 0;
+	for (ss = startst; !hard && ss < stopst; ss++)
+		switch (OP(s = m->g->strip[ss])) {
+		case OCHAR:
+			if (sp == stop || *sp++ != (char)OPND(s))
+				return(NULL);
+			break;
+		case OANY:
+			if (sp == stop)
+				return(NULL);
+			sp++;
+			break;
+		case OANYOF:
+			cs = &m->g->sets[OPND(s)];
+			if (sp == stop || !CHIN(cs, *sp++))
+				return(NULL);
+			break;
+		case OBOL:
+			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOL:
+			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OBOW:
+			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp > m->beginp &&
+							!ISWORD(*(sp-1))) ) &&
+					(sp < m->endp && ISWORD(*sp)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOW:
+			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp < m->endp && !ISWORD(*sp)) ) &&
+					(sp > m->beginp && ISWORD(*(sp-1))) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case O_QUEST:
+			break;
+		case OOR1:	/* matches null but needs to skip */
+			ss++;
+			s = m->g->strip[ss];
+			do {
+				assert(OP(s) == OOR2);
+				ss += OPND(s);
+			} while (OP(s = m->g->strip[ss]) != O_CH);
+			/* note that the ss++ gets us past the O_CH */
+			break;
+		default:	/* have to make a choice */
+			hard = 1;
+			break;
+		}
+	if (!hard) {		/* that was it! */
+		if (sp != stop)
+			return(NULL);
+		return(sp);
+	}
+	ss--;			/* adjust for the for's final increment */
+
+	/* the hard stuff */
+	AT("hard", sp, stop, ss, stopst);
+	s = m->g->strip[ss];
+	switch (OP(s)) {
+	case OBACK_:		/* the vilest depths */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		if (m->pmatch[i].rm_eo == -1)
+			return(NULL);
+		assert(m->pmatch[i].rm_so != -1);
+		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+		if (len == 0 && rec++ > MAX_RECURSION)
+			return(NULL);
+		assert(stop - m->beginp >= len);
+		if (sp > stop - len)
+			return(NULL);	/* not enough left to match */
+		ssp = m->offp + m->pmatch[i].rm_so;
+		if (memcmp(sp, ssp, len) != 0)
+			return(NULL);
+		while (m->g->strip[ss] != SOP(O_BACK, i))
+			ss++;
+		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+		break;
+	case OQUEST_:		/* to null or not */
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);	/* not */
+		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+		break;
+	case OPLUS_:
+		assert(m->lastpos != NULL);
+		assert(lev+1 <= m->g->nplus);
+		m->lastpos[lev+1] = sp;
+		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+		break;
+	case O_PLUS:
+		if (sp == m->lastpos[lev])	/* last pass matched null */
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		/* try another pass */
+		m->lastpos[lev] = sp;
+		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+		if (dp == NULL)
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		else
+			return(dp);
+		break;
+	case OCH_:		/* find the right one, if any */
+		ssub = ss + 1;
+		esub = ss + OPND(s) - 1;
+		assert(OP(m->g->strip[esub]) == OOR1);
+		for (;;) {	/* find first matching branch */
+			dp = backref(m, sp, stop, ssub, esub, lev, rec);
+			if (dp != NULL)
+				return(dp);
+			/* that one missed, try next one */
+			if (OP(m->g->strip[esub]) == O_CH)
+				return(NULL);	/* there is none */
+			esub++;
+			assert(OP(m->g->strip[esub]) == OOR2);
+			ssub = esub + 1;
+			esub += OPND(m->g->strip[esub]);
+			if (OP(m->g->strip[esub]) == OOR2)
+				esub--;
+			else
+				assert(OP(m->g->strip[esub]) == O_CH);
+		}
+		break;
+	case OLPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_so;
+		m->pmatch[i].rm_so = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_so = offsave;
+		return(NULL);
+		break;
+	case ORPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_eo;
+		m->pmatch[i].rm_eo = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_eo = offsave;
+		return(NULL);
+		break;
+	default:		/* uh oh */
+		assert(nope);
+		break;
+	}
+
+	/* "can't happen" */
+	assert(nope);
+	/* NOTREACHED */
+        return NULL;
+}
+
+/*
+ - fast - step through the string at top speed
+ */
+static char *			/* where tentative match ended, or NULL */
+fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+{
+	states st = m->st;
+	states fresh = m->fresh;
+	states tmp = m->tmp;
+	char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	char *coldp;	/* last p after which no match was underway */
+
+	CLEAR(st);
+	SET1(st, startst);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	ASSIGN(fresh, st);
+	SP("start", st, *p);
+	coldp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+		if (EQ(st, fresh))
+			coldp = p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, fresh);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("aft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	assert(coldp != NULL);
+	m->coldp = coldp;
+	if (ISSET(st, stopst))
+		return(p+1);
+	else
+		return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ */
+static char *			/* where it ended */
+slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+{
+	states st = m->st;
+	states empty = m->empty;
+	states tmp = m->tmp;
+	char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	char *matchp;	/* last p at which a match ended */
+
+	AT("slow", start, stop, startst, stopst);
+	CLEAR(st);
+	SET1(st, startst);
+	SP("sstart", st, *p);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	matchp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst))
+			matchp = p;
+		if (EQ(st, empty) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, empty);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("saft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ */
+static states
+step(struct re_guts *g,
+    sopno start,		/* start state within strip */
+    sopno stop,			/* state after stop state within strip */
+    states bef,			/* states reachable before */
+    int ch,			/* character or NONCHAR code */
+    states aft)			/* states already known reachable after */
+{
+	cset *cs;
+	sop s;
+	sopno pc;
+	onestate here;		/* note, macros know this name */
+	sopno look;
+	int i;
+
+	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+		s = g->strip[pc];
+		switch (OP(s)) {
+		case OEND:
+			assert(pc == stop-1);
+			break;
+		case OCHAR:
+			/* only characters can match */
+			assert(!NONCHAR(ch) || ch != (char)OPND(s));
+			if (ch == (char)OPND(s))
+				FWD(aft, bef, 1);
+			break;
+		case OBOL:
+			if (ch == BOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OEOL:
+			if (ch == EOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OBOW:
+			if (ch == BOW)
+				FWD(aft, bef, 1);
+			break;
+		case OEOW:
+			if (ch == EOW)
+				FWD(aft, bef, 1);
+			break;
+		case OANY:
+			if (!NONCHAR(ch))
+				FWD(aft, bef, 1);
+			break;
+		case OANYOF:
+			cs = &g->sets[OPND(s)];
+			if (!NONCHAR(ch) && CHIN(cs, ch))
+				FWD(aft, bef, 1);
+			break;
+		case OBACK_:		/* ignored here */
+		case O_BACK:
+			FWD(aft, aft, 1);
+			break;
+		case OPLUS_:		/* forward, this is just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case O_PLUS:		/* both forward and back */
+			FWD(aft, aft, 1);
+			i = ISSETBACK(aft, OPND(s));
+			BACK(aft, aft, OPND(s));
+			if (!i && ISSETBACK(aft, OPND(s))) {
+				/* oho, must reconsider loop body */
+				pc -= OPND(s) + 1;
+				INIT(here, pc);
+			}
+			break;
+		case OQUEST_:		/* two branches, both forward */
+			FWD(aft, aft, 1);
+			FWD(aft, aft, OPND(s));
+			break;
+		case O_QUEST:		/* just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case OLPAREN:		/* not significant here */
+		case ORPAREN:
+			FWD(aft, aft, 1);
+			break;
+		case OCH_:		/* mark the first two branches */
+			FWD(aft, aft, 1);
+			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			FWD(aft, aft, OPND(s));
+			break;
+		case OOR1:		/* done a branch, find the O_CH */
+			if (ISSTATEIN(aft, here)) {
+				for (look = 1;
+						OP(s = g->strip[pc+look]) != O_CH;
+						look += OPND(s))
+					assert(OP(s) == OOR2);
+				FWD(aft, aft, look);
+			}
+			break;
+		case OOR2:		/* propagate OCH_'s marking */
+			FWD(aft, aft, 1);
+			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+				FWD(aft, aft, OPND(s));
+			}
+			break;
+		case O_CH:		/* just empty */
+			FWD(aft, aft, 1);
+			break;
+		default:		/* ooooops... */
+			assert(nope);
+			break;
+		}
+	}
+
+	return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ */
+static void
+print(struct match *m, char *caption, states st, int ch, FILE *d)
+{
+	struct re_guts *g = m->g;
+	int i;
+	int first = 1;
+
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)fprintf(d, "%s", caption);
+	if (ch != '\0')
+		(void)fprintf(d, " %s", pchar(ch));
+	for (i = 0; i < g->nstates; i++)
+		if (ISSET(st, i)) {
+			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+			first = 0;
+		}
+	(void)fprintf(d, "\n");
+}
+
+/* 
+ - at - print current situation
+ */
+static void
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
+    sopno stopst)
+{
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)printf("%s %s-", title, pchar(*start));
+	(void)printf("%s ", pchar(*stop));
+	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define	PCHARDONE	/* never again */
+/*
+ - pchar - make a character printable
+ *
+ * Is this identical to regchar() over in debug.c?  Well, yes.  But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient.  It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char *			/* -> representation */
+pchar(int ch)
+{
+	static char pbuf[10];
+
+	if (isprint(ch) || ch == ' ')
+		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+	else
+		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+	return(pbuf);
+}
+#endif
+#endif
+
+#undef	matcher
+#undef	fast
+#undef	slow
+#undef	dissect
+#undef	backref
+#undef	step
+#undef	print
+#undef	at
+#undef	match
+#undef	nope
diff --git a/lib/Support/regerror.c b/lib/Support/regerror.c
new file mode 100644
index 000000000000..1d67c9a2b03b
--- /dev/null
+++ b/lib/Support/regerror.c
@@ -0,0 +1,135 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
+static const char *regatoi(const llvm_regex_t *, char *, int);
+
+static struct rerr {
+	int code;
+	const char *name;
+	const char *explain;
+} rerrs[] = {
+	{ REG_NOMATCH,	"REG_NOMATCH",	"llvm_regexec() failed to match" },
+	{ REG_BADPAT,	"REG_BADPAT",	"invalid regular expression" },
+	{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
+	{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
+	{ REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)" },
+	{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
+	{ REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced" },
+	{ REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced" },
+	{ REG_EBRACE,	"REG_EBRACE",	"braces not balanced" },
+	{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
+	{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
+	{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
+	{ REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid" },
+	{ REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression" },
+	{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
+	{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine" },
+	{ 0,		"",		"*** unknown regexp error code ***" }
+};
+
+/*
+ - llvm_regerror - the interface to error numbers
+ = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+ */
+/* ARGSUSED */
+size_t
+llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+	struct rerr *r;
+	size_t len;
+	int target = errcode &~ REG_ITOA;
+	const char *s;
+	char convbuf[50];
+
+	if (errcode == REG_ATOI)
+		s = regatoi(preg, convbuf, sizeof convbuf);
+	else {
+		for (r = rerrs; r->code != 0; r++)
+			if (r->code == target)
+				break;
+	
+		if (errcode&REG_ITOA) {
+			if (r->code != 0) {
+				assert(strlen(r->name) < sizeof(convbuf));
+				(void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
+			} else
+				(void)snprintf(convbuf, sizeof convbuf,
+				    "REG_0x%x", target);
+			s = convbuf;
+		} else
+			s = r->explain;
+	}
+
+	len = strlen(s) + 1;
+	if (errbuf_size > 0) {
+		llvm_strlcpy(errbuf, s, errbuf_size);
+	}
+
+	return(len);
+}
+
+/*
+ - regatoi - internal routine to implement REG_ATOI
+ */
+static const char *
+regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
+{
+	struct rerr *r;
+
+	for (r = rerrs; r->code != 0; r++)
+		if (strcmp(r->name, preg->re_endp) == 0)
+			break;
+	if (r->code == 0)
+		return("0");
+
+	(void)snprintf(localbuf, localbufsize, "%d", r->code);
+	return(localbuf);
+}
diff --git a/lib/Support/regex2.h b/lib/Support/regex2.h
new file mode 100644
index 000000000000..21659c34449a
--- /dev/null
+++ b/lib/Support/regex2.h
@@ -0,0 +1,157 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
+ */
+
+/*
+ * internals of regex_t
+ */
+#define	MAGIC1	((('r'^0200)<<8) | 'e')
+
+/*
+ * The internal representation is a *strip*, a sequence of
+ * operators ending with an endmarker.  (Some terminology etc. is a
+ * historical relic of earlier versions which used multiple strips.)
+ * Certain oddities in the representation are there to permit running
+ * the machinery backwards; in particular, any deviation from sequential
+ * flow must be marked at both its source and its destination.  Some
+ * fine points:
+ *
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
+ *   OOR1 and OOR2 are respectively the end and the beginning of one of
+ *   the branches.  Note that there is an implicit OOR2 following OCH_
+ *   and an implicit OOR1 preceding O_CH.
+ *
+ * In state representations, an operator's bit is on to signify a state
+ * immediately *preceding* "execution" of that operator.
+ */
+typedef unsigned long sop;	/* strip operator */
+typedef long sopno;
+#define	OPRMASK	0xf8000000LU
+#define	OPDMASK	0x07ffffffLU
+#define	OPSHIFT	((unsigned)27)
+#define	OP(n)	((n)&OPRMASK)
+#define	OPND(n)	((n)&OPDMASK)
+#define	SOP(op, opnd)	((op)|(opnd))
+/* operators			   meaning	operand			*/
+/*						(back, fwd are offsets)	*/
+#define	OEND	(1LU<<OPSHIFT)	/* endmarker	-			*/
+#define	OCHAR	(2LU<<OPSHIFT)	/* character	unsigned char		*/
+#define	OBOL	(3LU<<OPSHIFT)	/* left anchor	-			*/
+#define	OEOL	(4LU<<OPSHIFT)	/* right anchor	-			*/
+#define	OANY	(5LU<<OPSHIFT)	/* .		-			*/
+#define	OANYOF	(6LU<<OPSHIFT)	/* [...]	set number		*/
+#define	OBACK_	(7LU<<OPSHIFT)	/* begin \d	paren number		*/
+#define	O_BACK	(8LU<<OPSHIFT)	/* end \d	paren number		*/
+#define	OPLUS_	(9LU<<OPSHIFT)	/* + prefix	fwd to suffix		*/
+#define	O_PLUS	(10LU<<OPSHIFT)	/* + suffix	back to prefix		*/
+#define	OQUEST_	(11LU<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
+#define	O_QUEST	(12LU<<OPSHIFT)	/* ? suffix	back to prefix		*/
+#define	OLPAREN	(13LU<<OPSHIFT)	/* (		fwd to )		*/
+#define	ORPAREN	(14LU<<OPSHIFT)	/* )		back to (		*/
+#define	OCH_	(15LU<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
+#define	OOR1	(16LU<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
+#define	OOR2	(17LU<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
+#define	O_CH	(18LU<<OPSHIFT)	/* end choice	back to OOR1		*/
+#define	OBOW	(19LU<<OPSHIFT)	/* begin word	-			*/
+#define	OEOW	(20LU<<OPSHIFT)	/* end word	-			*/
+
+/*
+ * Structure for [] character-set representation.  Character sets are
+ * done as bit vectors, grouped 8 to a byte vector for compactness.
+ * The individual set therefore has both a pointer to the byte vector
+ * and a mask to pick out the relevant bit of each byte.  A hash code
+ * simplifies testing whether two sets could be identical.
+ *
+ * This will get trickier for multicharacter collating elements.  As
+ * preliminary hooks for dealing with such things, we also carry along
+ * a string of multi-character elements, and decide the size of the
+ * vectors at run time.
+ */
+typedef struct {
+	uch *ptr;		/* -> uch [csetsize] */
+	uch mask;		/* bit within array */
+	uch hash;		/* hash code */
+	size_t smultis;
+	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
+} cset;
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
+#define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
+#define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
+#define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
+#define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* llvm_regcomp() internal fns */
+#define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
+#define	MCin(p, cs, cp)	mcin(p, cs, cp)
+
+/* stuff for character categories */
+typedef unsigned char cat_t;
+
+/*
+ * main compiled-expression structure
+ */
+struct re_guts {
+	int magic;
+#		define	MAGIC2	((('R'^0200)<<8)|'E')
+	sop *strip;		/* malloced area for strip */
+	int csetsize;		/* number of bits in a cset vector */
+	int ncsets;		/* number of csets in use */
+	cset *sets;		/* -> cset [ncsets] */
+	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
+	int cflags;		/* copy of llvm_regcomp() cflags argument */
+	sopno nstates;		/* = number of sops */
+	sopno firststate;	/* the initial OEND (normally 0) */
+	sopno laststate;	/* the final OEND */
+	int iflags;		/* internal flags */
+#		define	USEBOL	01	/* used ^ */
+#		define	USEEOL	02	/* used $ */
+#		define	REGEX_BAD	04	/* something wrong */
+	int nbol;		/* number of ^ used */
+	int neol;		/* number of $ used */
+	int ncategories;	/* how many character categories */
+	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
+	char *must;		/* match must contain this string */
+	int mlen;		/* length of must */
+	size_t nsub;		/* copy of re_nsub */
+	int backrefs;		/* does it use back references? */
+	sopno nplus;		/* how deep does it nest +s? */
+	/* catspace must be last */
+	cat_t catspace[1];	/* actually [NC] */
+};
+
+/* misc utilities */
+#define	OUT	(CHAR_MAX+1)	/* a non-character value */
+#define	ISWORD(c)	(isalnum(c&0xff) || (c) == '_')
diff --git a/lib/Support/regex_impl.h b/lib/Support/regex_impl.h
new file mode 100644
index 000000000000..f8296c9ff75e
--- /dev/null
+++ b/lib/Support/regex_impl.h
@@ -0,0 +1,108 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992 Henry Spencer.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer of the University of Toronto.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regex.h	8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _REGEX_H_
+#define	_REGEX_H_
+
+#include <sys/types.h>
+typedef off_t llvm_regoff_t;
+typedef struct {
+  llvm_regoff_t rm_so;		/* start of match */
+  llvm_regoff_t rm_eo;		/* end of match */
+} llvm_regmatch_t;
+
+typedef struct llvm_regex {
+  int re_magic;
+  size_t re_nsub;		/* number of parenthesized subexpressions */
+  const char *re_endp;	/* end pointer for REG_PEND */
+  struct re_guts *re_g;	/* none of your business :-) */
+} llvm_regex_t;
+
+/* llvm_regcomp() flags */
+#define	REG_BASIC	0000
+#define	REG_EXTENDED	0001
+#define	REG_ICASE	0002
+#define	REG_NOSUB	0004
+#define	REG_NEWLINE	0010
+#define	REG_NOSPEC	0020
+#define	REG_PEND	0040
+#define	REG_DUMP	0200
+
+/* llvm_regerror() flags */
+#define	REG_NOMATCH	 1
+#define	REG_BADPAT	 2
+#define	REG_ECOLLATE	 3
+#define	REG_ECTYPE	 4
+#define	REG_EESCAPE	 5
+#define	REG_ESUBREG	 6
+#define	REG_EBRACK	 7
+#define	REG_EPAREN	 8
+#define	REG_EBRACE	 9
+#define	REG_BADBR	10
+#define	REG_ERANGE	11
+#define	REG_ESPACE	12
+#define	REG_BADRPT	13
+#define	REG_EMPTY	14
+#define	REG_ASSERT	15
+#define	REG_INVARG	16
+#define	REG_ATOI	255	/* convert name to number (!) */
+#define	REG_ITOA	0400	/* convert number to name (!) */
+
+/* llvm_regexec() flags */
+#define	REG_NOTBOL	00001
+#define	REG_NOTEOL	00002
+#define	REG_STARTEND	00004
+#define	REG_TRACE	00400	/* tracing of execution */
+#define	REG_LARGE	01000	/* force large representation */
+#define	REG_BACKR	02000	/* force use of backref code */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int	llvm_regcomp(llvm_regex_t *, const char *, int);
+size_t	llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+int	llvm_regexec(const llvm_regex_t *, const char *, size_t, 
+                     llvm_regmatch_t [], int);
+void	llvm_regfree(llvm_regex_t *);
+size_t  llvm_strlcpy(char *dst, const char *src, size_t siz);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_REGEX_H_ */
diff --git a/lib/Support/regexec.c b/lib/Support/regexec.c
new file mode 100644
index 000000000000..7d70f6e16c78
--- /dev/null
+++ b/lib/Support/regexec.c
@@ -0,0 +1,161 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
+ */
+
+/*
+ * the outer shell of llvm_regexec()
+ *
+ * This file includes engine.inc *twice*, after muchos fiddling with the
+ * macros that code uses.  This lets the same code operate on two different
+ * representations for state sets.
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/* macros for manipulating states, small version */
+#define	states	long
+#define	states1	states		/* for later use in llvm_regexec() decision */
+#define	CLEAR(v)	((v) = 0)
+#define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
+#define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
+#define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
+#define	ASSIGN(d, s)	((d) = (s))
+#define	EQ(a, b)	((a) == (b))
+#define	STATEVARS	long dummy	/* dummy version */
+#define	STATESETUP(m, n)	/* nothing */
+#define	STATETEARDOWN(m)	/* nothing */
+#define	SETUP(v)	((v) = 0)
+#define	onestate	long
+#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
+#define	INC(o)		((o) <<= 1)
+#define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
+#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
+#define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
+/* function names */
+#define SNAMES			/* engine.inc looks after details */
+
+#include "regengine.inc"
+
+/* now undo things */
+#undef	states
+#undef	CLEAR
+#undef	SET0
+#undef	SET1
+#undef	ISSET
+#undef	ASSIGN
+#undef	EQ
+#undef	STATEVARS
+#undef	STATESETUP
+#undef	STATETEARDOWN
+#undef	SETUP
+#undef	onestate
+#undef	INIT
+#undef	INC
+#undef	ISSTATEIN
+#undef	FWD
+#undef	BACK
+#undef	ISSETBACK
+#undef	SNAMES
+
+/* macros for manipulating states, large version */
+#define	states	char *
+#define	CLEAR(v)	memset(v, 0, m->g->nstates)
+#define	SET0(v, n)	((v)[n] = 0)
+#define	SET1(v, n)	((v)[n] = 1)
+#define	ISSET(v, n)	((v)[n])
+#define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
+#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
+#define	STATEVARS	long vn; char *space
+#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
+				if ((m)->space == NULL) return(REG_ESPACE); \
+				(m)->vn = 0; }
+#define	STATETEARDOWN(m)	{ free((m)->space); }
+#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
+#define	onestate	long
+#define	INIT(o, n)	((o) = (n))
+#define	INC(o)	((o)++)
+#define	ISSTATEIN(v, o)	((v)[o])
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
+#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
+#define	ISSETBACK(v, n)	((v)[here - (n)])
+/* function names */
+#define	LNAMES			/* flag */
+
+#include "regengine.inc"
+
+/*
+ - llvm_regexec - interface for matching
+ *
+ * We put this here so we can exploit knowledge of the state representation
+ * when choosing which matcher to call.  Also, by this point the matchers
+ * have been prototyped.
+ */
+int				/* 0 success, REG_NOMATCH failure */
+llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
+             llvm_regmatch_t pmatch[], int eflags)
+{
+	struct re_guts *g = preg->re_g;
+#ifdef REDEBUG
+#	define	GOODFLAGS(f)	(f)
+#else
+#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
+#endif
+
+	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
+		return(REG_BADPAT);
+	assert(!(g->iflags&REGEX_BAD));
+	if (g->iflags&REGEX_BAD)		/* backstop for no-debug case */
+		return(REG_BADPAT);
+	eflags = GOODFLAGS(eflags);
+
+	if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
+		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
+	else
+		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
+}
diff --git a/lib/Support/regfree.c b/lib/Support/regfree.c
new file mode 100644
index 000000000000..dc2b4af90fa7
--- /dev/null
+++ b/lib/Support/regfree.c
@@ -0,0 +1,72 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regfree.c	8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/*
+ - llvm_regfree - free everything
+ */
+void
+llvm_regfree(llvm_regex_t *preg)
+{
+	struct re_guts *g;
+
+	if (preg->re_magic != MAGIC1)	/* oops */
+		return;			/* nice to complain, but hard */
+
+	g = preg->re_g;
+	if (g == NULL || g->magic != MAGIC2)	/* oops again */
+		return;
+	preg->re_magic = 0;		/* mark it invalid */
+	g->magic = 0;			/* mark it invalid */
+
+	if (g->strip != NULL)
+		free((char *)g->strip);
+	if (g->sets != NULL)
+		free((char *)g->sets);
+	if (g->setbits != NULL)
+		free((char *)g->setbits);
+	if (g->must != NULL)
+		free(g->must);
+	free((char *)g);
+}
diff --git a/lib/Support/regstrlcpy.c b/lib/Support/regstrlcpy.c
new file mode 100644
index 000000000000..8b68afdf75f1
--- /dev/null
+++ b/lib/Support/regstrlcpy.c
@@ -0,0 +1,52 @@
+/*
+ * This code is derived from OpenBSD's libc, original license follows:
+ *
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "regex_impl.h"
+/*
+ * Copy src to string dst of size siz.  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+llvm_strlcpy(char *dst, const char *src, size_t siz)
+{
+	char *d = dst;
+	const char *s = src;
+	size_t n = siz;
+
+	/* Copy as many bytes as will fit */
+	if (n != 0) {
+		while (--n != 0) {
+			if ((*d++ = *s++) == '\0')
+				break;
+		}
+	}
+
+	/* Not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+			*d = '\0';		/* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+
+	return(s - src - 1);	/* count does not include NUL */
+}
diff --git a/lib/Support/regutils.h b/lib/Support/regutils.h
new file mode 100644
index 000000000000..d0ee100a382b
--- /dev/null
+++ b/lib/Support/regutils.h
@@ -0,0 +1,53 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)utils.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* utility definitions */
+#define	NC		(CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define	NDEBUG	/* no assertions please */
+#endif
+#endif
+#include <assert.h>
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define	memmove(d, s, c)	bcopy(s, d, c)
+#endif
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index bf7a0c601e85..2945e33d5b1c 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -13,9 +13,32 @@ add_llvm_library(LLVMSystem
   Program.cpp
   RWMutex.cpp
   Signals.cpp
+  ThreadLocal.cpp
   Threading.cpp
   TimeValue.cpp
-  ThreadLocal.cpp
+  Unix/Alarm.inc
+  Unix/Host.inc
+  Unix/Memory.inc
+  Unix/Mutex.inc
+  Unix/Path.inc
+  Unix/Process.inc
+  Unix/Program.inc
+  Unix/RWMutex.inc
+  Unix/Signals.inc
+  Unix/ThreadLocal.inc
+  Unix/TimeValue.inc
+  Win32/Alarm.inc
+  Win32/DynamicLibrary.inc
+  Win32/Host.inc
+  Win32/Memory.inc
+  Win32/Mutex.inc
+  Win32/Path.inc
+  Win32/Process.inc
+  Win32/Program.inc
+  Win32/RWMutex.inc
+  Win32/Signals.inc
+  Win32/ThreadLocal.inc
+  Win32/TimeValue.inc
   )
 
 if( BUILD_SHARED_LIBS AND NOT WIN32 )
diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp
index 378fe262bc10..bad427a58d8c 100644
--- a/lib/System/Disassembler.cpp
+++ b/lib/System/Disassembler.cpp
@@ -26,7 +26,7 @@
 
 using namespace llvm;
 
-bool llvm::sys::hasDisassembler(void) 
+bool llvm::sys::hasDisassembler()
 {
 #if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
   // We have option to enable udis86 library.
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index ef5c9e632991..6efab948fa76 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -9,42 +9,43 @@
 //
 //  This header file implements the operating system DynamicLibrary concept.
 //
+// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
+// not thread safe!
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/RWMutex.h"
 #include "llvm/Config/config.h"
 #include <cstdio>
 #include <cstring>
 #include <map>
+#include <vector>
 
 // Collection of symbol name/value pairs to be searched prior to any libraries.
-static std::map<std::string, void*> symbols;
-static llvm::sys::SmartRWMutex<true> SymbolsLock;
+static std::map<std::string, void*> *ExplicitSymbols = 0;
 
+static struct ExplicitSymbolsDeleter {
+  ~ExplicitSymbolsDeleter() {
+    if (ExplicitSymbols)
+      delete ExplicitSymbols;
+  }
+} Dummy;
 
 void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
                                           void *symbolValue) {
-  llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
-  symbols[symbolName] = symbolValue;
+  if (ExplicitSymbols == 0)
+    ExplicitSymbols = new std::map<std::string, void*>();
+  (*ExplicitSymbols)[symbolName] = symbolValue;
 }
 
-// It is not possible to use ltdl.c on VC++ builds as the terms of its LGPL
-// license and special exception would cause all of LLVM to be placed under
-// the LGPL.  This is because the exception applies only when libtool is
-// used, and obviously libtool is not used with Visual Studio.  An entirely
-// separate implementation is provided in win32/DynamicLibrary.cpp.
-
 #ifdef LLVM_ON_WIN32
 
 #include "Win32/DynamicLibrary.inc"
 
 #else
 
-//#include "ltdl.h"
 #include <dlfcn.h>
-#include <cassert>
 using namespace llvm;
 using namespace llvm::sys;
 
@@ -53,56 +54,44 @@ using namespace llvm::sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-//static std::vector<lt_dlhandle> OpenedHandles;
-static std::vector<void *> OpenedHandles;
-
-DynamicLibrary::DynamicLibrary() {}
+static std::vector<void *> *OpenedHandles = 0;
 
-DynamicLibrary::~DynamicLibrary() {
-  SmartScopedWriter<true> Writer(&SymbolsLock);
-  while(!OpenedHandles.empty()) {
-    void *H = OpenedHandles.back();   OpenedHandles.pop_back(); 
-    dlclose(H);
-  }
-}
 
 bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
                                             std::string *ErrMsg) {
-  SmartScopedWriter<true> Writer(&SymbolsLock);                                              
   void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
   if (H == 0) {
-    if (ErrMsg)
-      *ErrMsg = dlerror();
+    if (ErrMsg) *ErrMsg = dlerror();
     return true;
   }
-  OpenedHandles.push_back(H);
+  if (OpenedHandles == 0)
+    OpenedHandles = new std::vector<void *>();
+  OpenedHandles->push_back(H);
   return false;
 }
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
-  //  check_ltdl_initialization();
-  
   // First check symbols added via AddSymbol().
-  SymbolsLock.reader_acquire();
-  std::map<std::string, void *>::iterator I = symbols.find(symbolName);
-  std::map<std::string, void *>::iterator E = symbols.end();
-  SymbolsLock.reader_release();
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
   
-  if (I != E)
-    return I->second;
+    if (I != E)
+      return I->second;
+  }
 
-  SymbolsLock.writer_acquire();
   // Now search the libraries.
-  for (std::vector<void *>::iterator I = OpenedHandles.begin(),
-       E = OpenedHandles.end(); I != E; ++I) {
-    //lt_ptr ptr = lt_dlsym(*I, symbolName);
-    void *ptr = dlsym(*I, symbolName);
-    if (ptr) {
-      SymbolsLock.writer_release();
-      return ptr;
+  if (OpenedHandles) {
+    for (std::vector<void *>::iterator I = OpenedHandles->begin(),
+         E = OpenedHandles->end(); I != E; ++I) {
+      //lt_ptr ptr = lt_dlsym(*I, symbolName);
+      void *ptr = dlsym(*I, symbolName);
+      if (ptr) {
+        return ptr;
+      }
     }
   }
-  SymbolsLock.writer_release();
 
 #define EXPLICIT_SYMBOL(SYM) \
    extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
diff --git a/lib/System/Errno.cpp b/lib/System/Errno.cpp
index d046aba04dd5..68f66f6e439b 100644
--- a/lib/System/Errno.cpp
+++ b/lib/System/Errno.cpp
@@ -17,6 +17,10 @@
 #if HAVE_STRING_H
 #include <string.h>
 
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
 //===          independent code.
@@ -26,7 +30,6 @@ namespace llvm {
 namespace sys {
 
 #if HAVE_ERRNO_H
-#include <errno.h>
 std::string StrError() {
   return StrError(errno);
 }
diff --git a/lib/System/Makefile b/lib/System/Makefile
index 49704c3c625a..d4fd60eee5f6 100644
--- a/lib/System/Makefile
+++ b/lib/System/Makefile
@@ -11,6 +11,12 @@ LEVEL = ../..
 LIBRARYNAME = LLVMSystem
 BUILD_ARCHIVE = 1
 
+include $(LEVEL)/Makefile.config
+
+ifeq ($(HOST_OS),MingW)
+  REQUIRES_EH := 1
+endif
+
 EXTRA_DIST = Unix Win32 README.txt
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/System/Memory.cpp b/lib/System/Memory.cpp
index 375c73cf0204..e2d838dce026 100644
--- a/lib/System/Memory.cpp
+++ b/lib/System/Memory.cpp
@@ -37,13 +37,16 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
   
 // icache invalidation for PPC and ARM.
 #if defined(__APPLE__)
-#if (defined(__POWERPC__) || defined (__ppc__) || \
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
      defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
   sys_icache_invalidate(Addr, Len);
-#endif
+#  endif
+
 #else
-#if (defined(__POWERPC__) || defined (__ppc__) || \
-     defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
   const size_t LineSize = 32;
 
   const intptr_t Mask = ~(LineSize - 1);
@@ -57,6 +60,12 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
   for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
     asm volatile("icbi 0, %0" : : "r"(Line));
   asm volatile("isync");
-#endif
+#  elif defined(__arm__) && defined(__GNUC__)
+  // FIXME: Can we safely always call this for __GNUC__ everywhere?
+  char *Start = (char*) Addr;
+  char *End = Start + Len;
+  __clear_cache(Start, End);
+#  endif
+
 #endif  // end apple
 }
diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp
index a5e9920ae3f0..8ccd6e52c4d5 100644
--- a/lib/System/Mutex.cpp
+++ b/lib/System/Mutex.cpp
@@ -115,8 +115,7 @@ MutexImpl::acquire()
 
     int errorcode = pthread_mutex_lock(mutex);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 bool
@@ -129,8 +128,7 @@ MutexImpl::release()
 
     int errorcode = pthread_mutex_unlock(mutex);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 bool
@@ -143,8 +141,7 @@ MutexImpl::tryacquire()
 
     int errorcode = pthread_mutex_trylock(mutex);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 }
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index 72bd7ad6f046..df3357480937 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/System/Path.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstring>
 #include <ostream>
@@ -28,19 +29,10 @@ bool Path::operator==(const Path &that) const {
   return path == that.path;
 }
 
-bool Path::operator!=(const Path &that) const {
-  return path != that.path;
-}
-
 bool Path::operator<(const Path& that) const {
   return path < that.path;
 }
 
-std::ostream& llvm::operator<<(std::ostream &strm, const sys::Path &aPath) {
-  strm << aPath.toString();
-  return strm;
-}
-
 Path
 Path::GetLLVMConfigDir() {
   Path result;
@@ -207,18 +199,6 @@ bool Path::hasMagicNumber(const std::string &Magic) const {
   return false;
 }
 
-void Path::makeAbsolute() {
-  if (isAbsolute())
-    return;
-
-  Path CWD = Path::GetCurrentDirectory();
-  assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!");
-
-  CWD.appendComponent(path);
-
-  path = CWD.toString();
-}
-
 static void getPathList(const char*path, std::vector<Path>& Paths) {
   const char* at = path;
   const char* delim = strchr(at, PathSeparator);
diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp
index eb289d81b2e1..a3049d46fd65 100644
--- a/lib/System/Program.cpp
+++ b/lib/System/Program.cpp
@@ -22,6 +22,33 @@ using namespace sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
+int
+Program::ExecuteAndWait(const Path& path,
+                        const char** args,
+                        const char** envp,
+                        const Path** redirects,
+                        unsigned secondsToWait,
+                        unsigned memoryLimit,
+                        std::string* ErrMsg) {
+  Program prg;
+  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+    return prg.Wait(secondsToWait, ErrMsg);
+  else
+    return -1;
+}
+
+void
+Program::ExecuteNoWait(const Path& path,
+                       const char** args,
+                       const char** envp,
+                       const Path** redirects,
+                       unsigned memoryLimit,
+                       std::string* ErrMsg) {
+  Program prg;
+  prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
+}
+
+
 }
 
 // Include the platform-specific parts of this class.
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
index 15d98cb8f418..5faf220eb916 100644
--- a/lib/System/RWMutex.cpp
+++ b/lib/System/RWMutex.cpp
@@ -117,8 +117,7 @@ RWMutexImpl::reader_acquire()
 
     int errorcode = pthread_rwlock_rdlock(rwlock);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 bool
@@ -131,8 +130,7 @@ RWMutexImpl::reader_release()
 
     int errorcode = pthread_rwlock_unlock(rwlock);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 bool
@@ -145,8 +143,7 @@ RWMutexImpl::writer_acquire()
 
     int errorcode = pthread_rwlock_wrlock(rwlock);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 bool
@@ -159,8 +156,7 @@ RWMutexImpl::writer_release()
 
     int errorcode = pthread_rwlock_unlock(rwlock);
     return errorcode == 0;
-  }
-  return false;
+  } else return false;
 }
 
 }
diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp
index a2d7f82715d9..466c46802647 100644
--- a/lib/System/Threading.cpp
+++ b/lib/System/Threading.cpp
@@ -14,6 +14,7 @@
 #include "llvm/System/Threading.h"
 #include "llvm/System/Atomic.h"
 #include "llvm/System/Mutex.h"
+#include "llvm/Config/config.h"
 #include <cassert>
 
 using namespace llvm;
diff --git a/lib/System/Unix/Alarm.inc b/lib/System/Unix/Alarm.inc
index 28ff1b8a6368..fb42b6c65da1 100644
--- a/lib/System/Unix/Alarm.inc
+++ b/lib/System/Unix/Alarm.inc
@@ -67,6 +67,6 @@ int sys::AlarmStatus() {
   return 0;
 }
 
-void Sleep(unsigned n) {
+void sys::Sleep(unsigned n) {
   ::sleep(n);
 }
diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc
index fb319fd09e1c..c76d6a4e18f1 100644
--- a/lib/System/Unix/Host.inc
+++ b/lib/System/Unix/Host.inc
@@ -16,7 +16,8 @@
 //===          is guaranteed to work on *all* UNIX variants.
 //===----------------------------------------------------------------------===//
 
-#include <llvm/Config/config.h>
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
 #include "Unix.h"
 #include <sys/utsname.h>
 #include <string>
@@ -33,10 +34,47 @@ static std::string getOSVersion() {
 }
 
 std::string sys::getHostTriple() {
-  // FIXME: Derive more directly instead of relying on the autoconf
-  // generated variable.
+  // FIXME: Derive directly instead of relying on the autoconf generated
+  // variable.
 
-  std::string Triple = LLVM_HOSTTRIPLE;
+  StringRef HostTripleString(LLVM_HOSTTRIPLE);
+  std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
+  
+  // Normalize the arch, since the host triple may not actually match the host.
+  std::string Arch = ArchSplit.first;
+
+  // It would be nice to do this in terms of llvm::Triple, but that is in
+  // Support which is layered above us.
+#if defined(__x86_64__)
+  Arch = "x86_64";
+#elif defined(__i386__)
+  Arch = "i386";
+#elif defined(__ppc64__)
+  Arch = "powerpc64";
+#elif defined(__ppc__)
+  Arch = "powerpc";
+#elif defined(__arm__)
+
+  // FIXME: We need to pick the right ARM triple (which involves querying the
+  // chip). However, for now this is most important for LLVM arch selection, so
+  // we only need to make sure to distinguish ARM and Thumb.
+#  if defined(__thumb__)
+  Arch = "thumb";
+#  else
+  Arch = "arm";
+#  endif
+
+#else
+
+  // FIXME: When enough auto-detection is in place, this should just
+  // #error. Then at least the arch selection is done, and we only need the OS
+  // etc selection to kill off the use of LLVM_HOSTTRIPLE.
+
+#endif
+
+  std::string Triple(Arch);
+  Triple += '-';
+  Triple += ArchSplit.second;
 
   // Force i<N>86 to i386.
   if (Triple[0] == 'i' && isdigit(Triple[1]) && 
diff --git a/lib/System/Unix/Memory.inc b/lib/System/Unix/Memory.inc
index b7a70135bcb7..a80f56fbc144 100644
--- a/lib/System/Unix/Memory.inc
+++ b/lib/System/Unix/Memory.inc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Unix.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/System/Process.h"
 
 #ifdef HAVE_SYS_MMAN_H
@@ -28,12 +29,12 @@
 /// is very OS specific.
 ///
 llvm::sys::MemoryBlock 
-llvm::sys::Memory::AllocateRWX(unsigned NumBytes, const MemoryBlock* NearBlock,
+llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
                                std::string *ErrMsg) {
   if (NumBytes == 0) return MemoryBlock();
 
-  unsigned pageSize = Process::GetPageSize();
-  unsigned NumPages = (NumBytes+pageSize-1)/pageSize;
+  size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
 
   int fd = -1;
 #ifdef NEED_DEV_ZERO_FOR_MMAP
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index 1f73571cf140..89285b48132f 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -16,7 +16,7 @@
 //===          is guaranteed to work on *all* UNIX variants.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/alloca.h"
+#include "llvm/ADT/SmallVector.h"
 #include "Unix.h"
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
@@ -57,6 +57,10 @@
 #include <dlfcn.h>
 #endif
 
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
 // Put in a hack for Cygwin which falsely reports that the mkdtemp function
 // is available when it is not.
 #ifdef __CYGWIN__
@@ -92,15 +96,7 @@ Path::isValid() const {
   // Check some obvious things
   if (path.empty())
     return false;
-  else if (path.length() >= MAXPATHLEN)
-    return false;
-
-  // Check that the characters are ascii chars
-  size_t len = path.length();
-  unsigned i = 0;
-  while (i < len && isascii(path[i]))
-    ++i;
-  return i >= len;
+  return path.length() < MAXPATHLEN;
 }
 
 bool
@@ -117,6 +113,19 @@ Path::isAbsolute() const {
     return false;
   return path[0] == '/';
 }
+
+void Path::makeAbsolute() {
+  if (isAbsolute())
+    return;
+
+  Path CWD = Path::GetCurrentDirectory();
+  assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!");
+
+  CWD.appendComponent(path);
+
+  path = CWD.str();
+}
+
 Path
 Path::GetRootDirectory() {
   Path result;
@@ -331,7 +340,17 @@ getprogpath(char ret[PATH_MAX], const char *bin)
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
 Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-#if defined(__FreeBSD__)
+#if defined(__APPLE__)
+  // On OS X the executable path is saved to the stack by dyld. Reading it
+  // from there is much faster than calling dladdr, especially for large
+  // binaries with symbols.
+  char exe_path[MAXPATHLEN];
+  uint32_t size = sizeof(exe_path);
+  if (_NSGetExecutablePath(exe_path, &size) == 0) {
+    char link_path[MAXPATHLEN];
+    return Path(std::string(realpath(exe_path, link_path)));
+  }
+#elif defined(__FreeBSD__)
   char exe_path[PATH_MAX];
 
   if (getprogpath(exe_path, argv0) != NULL)
@@ -339,10 +358,8 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
 #elif defined(__linux__) || defined(__CYGWIN__)
   char exe_path[MAXPATHLEN];
   ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
-  if (len > 0 && len < MAXPATHLEN - 1) {
-    exe_path[len] = '\0';
-    return Path(std::string(exe_path));
-  }
+  if (len >= 0)
+    return Path(std::string(exe_path, len));
 #elif defined(HAVE_DLFCN_H)
   // Use dladdr to get executable path if available.
   Dl_info DLInfo;
@@ -397,7 +414,9 @@ Path::getSuffix() const {
 
 bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
   assert(len < 1024 && "Request for magic string too long");
-  char* buf = (char*) alloca(1 + len);
+  SmallVector<char, 128> Buf;
+  Buf.resize(1 + len);
+  char* buf = Buf.data();
   int fd = ::open(path.c_str(), O_RDONLY);
   if (fd < 0)
     return false;
@@ -426,12 +445,12 @@ Path::isDirectory() const {
 
 bool
 Path::canRead() const {
-  return 0 == access(path.c_str(), F_OK | R_OK );
+  return 0 == access(path.c_str(), R_OK);
 }
 
 bool
 Path::canWrite() const {
-  return 0 == access(path.c_str(), F_OK | W_OK );
+  return 0 == access(path.c_str(), W_OK);
 }
 
 bool
@@ -499,7 +518,7 @@ static bool AddPermissionBits(const Path &File, int bits) {
 
   // Get the file's current mode.
   struct stat buf;
-  if (0 != stat(File.toString().c_str(), &buf))
+  if (0 != stat(File.c_str(), &buf))
     return false;
   // Change the file to have whichever permissions bits from 'bits'
   // that the umask would not disable.
@@ -631,7 +650,7 @@ Path::eraseSuffix() {
 
 static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
 
-  if (access(beg, F_OK | R_OK | W_OK) == 0)
+  if (access(beg, R_OK | W_OK) == 0)
     return false;
 
   if (create_parents) {
@@ -756,7 +775,7 @@ bool
 Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
   if (0 != ::rename(path.c_str(), newName.c_str()))
     return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
-               newName.toString() + "'");
+               newName.str() + "'");
   return false;
 }
 
@@ -778,13 +797,13 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
   int outFile = -1;
   inFile = ::open(Src.c_str(), O_RDONLY);
   if (inFile == -1)
-    return MakeErrMsg(ErrMsg, Src.toString() +
+    return MakeErrMsg(ErrMsg, Src.str() +
       ": can't open source file to copy");
 
   outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
   if (outFile == -1) {
     ::close(inFile);
-    return MakeErrMsg(ErrMsg, Dest.toString() +
+    return MakeErrMsg(ErrMsg, Dest.str() +
       ": can't create destination file for copy");
   }
 
@@ -794,7 +813,7 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
       if (errno != EINTR && errno != EAGAIN) {
         ::close(inFile);
         ::close(outFile);
-        return MakeErrMsg(ErrMsg, Src.toString()+": can't read source file");
+        return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
       }
     } else {
       char *BufPtr = Buffer;
@@ -804,7 +823,7 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
           if (errno != EINTR && errno != EAGAIN) {
             ::close(inFile);
             ::close(outFile);
-            return MakeErrMsg(ErrMsg, Dest.toString() +
+            return MakeErrMsg(ErrMsg, Dest.str() +
               ": can't write destination file");
           }
         } else {
@@ -826,7 +845,9 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
 
   // Append an XXXXXX pattern to the end of the file for use with mkstemp,
   // mktemp or our own implementation.
-  char *FNBuffer = (char*) alloca(path.size()+8);
+  SmallVector<char, 128> Buf;
+  Buf.resize(path.size()+8);
+  char *FNBuffer = Buf.data();
     path.copy(FNBuffer,path.size());
   if (isDirectory())
     strcpy(FNBuffer+path.size(), "/XXXXXX");
diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc
index 2da31c9f215b..94e4c1bde25c 100644
--- a/lib/System/Unix/Process.inc
+++ b/lib/System/Unix/Process.inc
@@ -46,11 +46,11 @@ Process::GetPageSize()
   // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
   // memory protection and mmap() is 4k.
   // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
-  static const int page_size = 0x1000;
+  const int page_size = 0x1000;
 #elif defined(HAVE_GETPAGESIZE)
-  static const int page_size = ::getpagesize();
+  const int page_size = ::getpagesize();
 #elif defined(HAVE_SYSCONF)
-  static long page_size = ::sysconf(_SC_PAGE_SIZE);
+  long page_size = ::sysconf(_SC_PAGE_SIZE);
 #else
 #warning Cannot get the page size on this machine
 #endif
@@ -91,7 +91,7 @@ Process::GetTotalMemoryUsage()
   malloc_statistics_t Stats;
   malloc_zone_statistics(malloc_default_zone(), &Stats);
   return Stats.size_allocated;   // darwin
-#elif defined(HAVE_GETRUSAGE)
+#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
   struct rusage usage;
   ::getrusage(RUSAGE_SELF, &usage);
   return usage.ru_maxrss;
@@ -179,27 +179,24 @@ void Process::PreventCoreFiles() {
 }
 
 bool Process::StandardInIsUserInput() {
-#if HAVE_ISATTY
-  return isatty(0);
-#endif
-  // If we don't have isatty, just return false.
-  return false;
+  return FileDescriptorIsDisplayed(STDIN_FILENO);
 }
 
 bool Process::StandardOutIsDisplayed() {
-#if HAVE_ISATTY
-  return isatty(1);
-#endif
-  // If we don't have isatty, just return false.
-  return false;
+  return FileDescriptorIsDisplayed(STDOUT_FILENO);
 }
 
 bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDERR_FILENO);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
 #if HAVE_ISATTY
-  return isatty(2);
-#endif
+  return isatty(fd);
+#else
   // If we don't have isatty, just return false.
   return false;
+#endif
 }
 
 static unsigned getColumns(int FileID) {
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index cdc6fee60949..56dea250a779 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -1,10 +1,10 @@
 //===- llvm/System/Unix/Program.cpp -----------------------------*- C++ -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Unix specific portion of the Program class.
@@ -18,7 +18,6 @@
 
 #include <llvm/Config/config.h>
 #include "Unix.h"
-#include <iostream>
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
@@ -35,6 +34,15 @@
 namespace llvm {
 using namespace sys;
 
+Program::Program() : Data_(0) {}
+
+Program::~Program() {}
+
+unsigned Program::GetPid() const {
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  return static_cast<unsigned>(pid);
+}
+
 // This function just uses the PATH environment variable to find the program.
 Path
 Program::FindProgramByName(const std::string& progName) {
@@ -45,16 +53,17 @@ Program::FindProgramByName(const std::string& progName) {
   Path temp;
   if (!temp.set(progName)) // invalid name
     return Path();
-  // FIXME: have to check for absolute filename - we cannot assume anything
-  // about "." being in $PATH
-  if (temp.canExecute()) // already executable as is
+  // Use the given path verbatim if it contains any slashes; this matches
+  // the behavior of sh(1) and friends.
+  if (progName.find('/') != std::string::npos)
     return temp;
 
-  // At this point, the file name is valid and its not executable
- 
+  // At this point, the file name does not contain slashes. Search for it
+  // through the directories specified in the PATH environment variable.
+
   // Get the path. If its empty, we can't do anything to find it.
   const char *PathStr = getenv("PATH");
-  if (PathStr == 0) 
+  if (PathStr == 0)
     return Path();
 
   // Now we have a colon separated list of directories to search; try them.
@@ -93,7 +102,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
     // Redirect empty paths to /dev/null
     File = "/dev/null";
   else
-    File = Path->toString();
+    File = Path->str();
 
   // Open the file
   int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
@@ -112,11 +121,6 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   return false;
 }
 
-static bool Timeout = false;
-static void TimeOutHandler(int Sig) {
-  Timeout = true;
-}
-
 static void SetMemoryLimits (unsigned size)
 {
 #if HAVE_SYS_RESOURCE_H
@@ -142,49 +146,47 @@ static void SetMemoryLimits (unsigned size)
 #endif
 }
 
-int 
-Program::ExecuteAndWait(const Path& path, 
-                        const char** args,
-                        const char** envp,
-                        const Path** redirects,
-                        unsigned secondsToWait,
-                        unsigned memoryLimit,
-                        std::string* ErrMsg) 
+bool
+Program::Execute(const Path& path,
+                 const char** args,
+                 const char** envp,
+                 const Path** redirects,
+                 unsigned memoryLimit,
+                 std::string* ErrMsg)
 {
   if (!path.canExecute()) {
     if (ErrMsg)
-      *ErrMsg = path.toString() + " is not executable";
-    return -1;
+      *ErrMsg = path.str() + " is not executable";
+    return false;
   }
 
-#ifdef HAVE_SYS_WAIT_H
   // Create a child process.
   int child = fork();
   switch (child) {
     // An error occured:  Return to the caller.
     case -1:
       MakeErrMsg(ErrMsg, "Couldn't fork");
-      return -1;
+      return false;
 
     // Child process: Execute the program.
     case 0: {
       // Redirect file descriptors...
       if (redirects) {
         // Redirect stdin
-        if (RedirectIO(redirects[0], 0, ErrMsg)) { return -1; }
+        if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
         // Redirect stdout
-        if (RedirectIO(redirects[1], 1, ErrMsg)) { return -1; }
-        if (redirects[1] && redirects[2] && 
+        if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
+        if (redirects[1] && redirects[2] &&
             *(redirects[1]) == *(redirects[2])) {
           // If stdout and stderr should go to the same place, redirect stderr
           // to the FD already open for stdout.
           if (-1 == dup2(1,2)) {
             MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
-            return -1;
+            return false;
           }
         } else {
           // Just redirect stderr
-          if (RedirectIO(redirects[2], 2, ErrMsg)) { return -1; }
+          if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
         }
       }
 
@@ -192,15 +194,19 @@ Program::ExecuteAndWait(const Path& path,
       if (memoryLimit!=0) {
         SetMemoryLimits(memoryLimit);
       }
-      
+
       // Execute!
       if (envp != 0)
-        execve (path.c_str(), (char**)args, (char**)envp);
+        execve(path.c_str(), (char**)args, (char**)envp);
       else
-        execv (path.c_str(), (char**)args);
-      // If the execve() failed, we should exit and let the parent pick up
-      // our non-zero exit status.
-      exit (errno);
+        execv(path.c_str(), (char**)args);
+      // If the execve() failed, we should exit. Follow Unix protocol and
+      // return 127 if the executable was not found, and 126 otherwise.
+      // Use _exit rather than exit so that atexit functions and static
+      // object destructors cloned from the parent process aren't
+      // redundantly run, and so that any data buffered in stdio buffers
+      // cloned from the parent aren't redundantly written out.
+      _exit(errno == ENOENT ? 127 : 126);
     }
 
     // Parent process: Break out of the switch to do our processing.
@@ -208,32 +214,41 @@ Program::ExecuteAndWait(const Path& path,
       break;
   }
 
-  // Make sure stderr and stdout have been flushed
-  std::cerr << std::flush;
-  std::cout << std::flush;
-  fsync(1);
-  fsync(2);
+  Data_ = reinterpret_cast<void*>(child);
+
+  return true;
+}
 
+int
+Program::Wait(unsigned secondsToWait,
+              std::string* ErrMsg)
+{
+#ifdef HAVE_SYS_WAIT_H
   struct sigaction Act, Old;
 
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
   // Install a timeout handler.
   if (secondsToWait) {
-    Timeout = false;
-    Act.sa_sigaction = 0;
-    Act.sa_handler = TimeOutHandler;
+    memset(&Act, 0, sizeof(Act));
+    Act.sa_handler = SIG_IGN;
     sigemptyset(&Act.sa_mask);
-    Act.sa_flags = 0;
     sigaction(SIGALRM, &Act, &Old);
     alarm(secondsToWait);
   }
 
   // Parent process: Wait for the child process to terminate.
   int status;
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  pid_t child = static_cast<pid_t>(pid);
   while (wait(&status) != child)
     if (secondsToWait && errno == EINTR) {
       // Kill the child.
       kill(child, SIGKILL);
-        
+
       // Turn off the alarm and restore the signal handler
       alarm(0);
       sigaction(SIGALRM, &Old, 0);
@@ -271,7 +286,25 @@ Program::ExecuteAndWait(const Path& path,
 #else
   return -99;
 #endif
-    
+
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
+  pid_t pid = static_cast<pid_t>(pid64);
+
+  if (kill(pid, SIGKILL) != 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
 }
 
 bool Program::ChangeStdinToBinary(){
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index e385e0c55662..d39e1e99a0c5 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -14,6 +14,7 @@
 
 #include "Unix.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/System/Mutex.h"
 #include <vector>
 #include <algorithm>
 #if HAVE_EXECINFO_H
@@ -33,6 +34,8 @@ using namespace llvm;
 
 static RETSIGTYPE SignalHandler(int Sig);  // defined below.
 
+static SmartMutex<true> SignalsMutex;
+
 /// InterruptFunction - The function to call if ctrl-c is pressed.
 static void (*InterruptFunction)() = 0;
 
@@ -113,6 +116,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
   sigfillset(&SigMask);
   sigprocmask(SIG_UNBLOCK, &SigMask, 0);
 
+  SignalsMutex.acquire();
   if (FilesToRemove != 0)
     while (!FilesToRemove->empty()) {
       FilesToRemove->back().eraseFromDisk(true);
@@ -122,14 +126,19 @@ static RETSIGTYPE SignalHandler(int Sig) {
   if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
     if (InterruptFunction) {
       void (*IF)() = InterruptFunction;
+      SignalsMutex.release();
       InterruptFunction = 0;
       IF();        // run the interrupt function.
       return;
     }
+    
+    SignalsMutex.release();
     raise(Sig);   // Execute the default handler.
     return;
   }
 
+  SignalsMutex.release();
+
   // Otherwise if it is a fault (like SEGV) run any handler.
   if (CallBacksToRun)
     for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
@@ -139,18 +148,23 @@ static RETSIGTYPE SignalHandler(int Sig) {
 
 
 void llvm::sys::SetInterruptFunction(void (*IF)()) {
+  SignalsMutex.acquire();
   InterruptFunction = IF;
+  SignalsMutex.release();
   RegisterHandlers();
 }
 
 // RemoveFileOnSignal - The public API
 bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
                                    std::string* ErrMsg) {
+  SignalsMutex.acquire();
   if (FilesToRemove == 0)
     FilesToRemove = new std::vector<sys::Path>();
 
   FilesToRemove->push_back(Filename);
 
+  SignalsMutex.release();
+
   RegisterHandlers();
   return false;
 }
diff --git a/lib/System/Unix/TimeValue.inc b/lib/System/Unix/TimeValue.inc
index 8dd30b9322f9..1ae8c7184d55 100644
--- a/lib/System/Unix/TimeValue.inc
+++ b/lib/System/Unix/TimeValue.inc
@@ -21,7 +21,7 @@
 namespace llvm {
   using namespace sys;
 
-std::string TimeValue::toString() const {
+std::string TimeValue::str() const {
   char buffer[32];
 
   time_t ourTime = time_t(this->toEpochTime());
diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc
index aa04268406d4..10e64aa990c9 100644
--- a/lib/System/Win32/DynamicLibrary.inc
+++ b/lib/System/Win32/DynamicLibrary.inc
@@ -67,7 +67,6 @@ extern "C" {
                                     PVOID UserContext)
 #endif
   {
-    llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
     // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
     // into the process.
     if (stricmp(ModuleName, "msvci70") != 0 &&
@@ -89,36 +88,9 @@ extern "C" {
   }
 }
 
-DynamicLibrary::DynamicLibrary() : handle(0) {
-  SmartScopedWriter<true> Writer(&SymbolsLock);
-  handle = GetModuleHandle(NULL);
-  OpenedHandles.push_back((HMODULE)handle);
-}
-
-DynamicLibrary::~DynamicLibrary() {
-  llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
-  if (handle == 0)
-    return;
-
-  // GetModuleHandle() does not increment the ref count, so we must not free
-  // the handle to the executable.
-  if (handle != GetModuleHandle(NULL))
-    FreeLibrary((HMODULE)handle);
-  handle = 0;
-
-  for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
-       E = OpenedHandles.end(); I != E; ++I) {
-    if (*I == handle) {
-      // Note: don't use the swap/pop_back trick here. Order is important.
-      OpenedHandles.erase(I);
-    }
-  }
-}
- 
 bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
                                             std::string *ErrMsg) {                                            
   if (filename) {
-    llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
     HMODULE a_handle = LoadLibrary(filename);
 
     if (a_handle == 0)
@@ -170,24 +142,22 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
   // First check symbols added via AddSymbol().
-  SymbolsLock.reader_acquire();
-  std::map<std::string, void *>::iterator I = symbols.find(symbolName);
-  std::map<std::string, void *>::iterator E = symbols.end();
-  SymbolsLock.reader_release();
-  if (I != E)
-    return I->second;
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I = 
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+    if (I != E)
+      return I->second;
+  }
 
   // Now search the libraries.
-  SymbolsLock.writer_acquire();
   for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
        E = OpenedHandles.end(); I != E; ++I) {
     FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
     if (ptr) {
-      SymbolsLock.writer_release();
       return (void *) ptr;
     }
   }
-  SymbolsLock.writer_release();
 
 #if defined(__MINGW32__)
   {
diff --git a/lib/System/Win32/Memory.inc b/lib/System/Win32/Memory.inc
index 5e5cf7a6762d..7611ecdb929a 100644
--- a/lib/System/Win32/Memory.inc
+++ b/lib/System/Win32/Memory.inc
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Win32.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/System/Process.h"
 
 namespace llvm {
@@ -23,13 +24,13 @@ using namespace sys;
 //===          and must not be UNIX code
 //===----------------------------------------------------------------------===//
 
-MemoryBlock Memory::AllocateRWX(unsigned NumBytes,
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
                                 const MemoryBlock *NearBlock,
                                 std::string *ErrMsg) {
   if (NumBytes == 0) return MemoryBlock();
 
-  static const long pageSize = Process::GetPageSize();
-  unsigned NumPages = (NumBytes+pageSize-1)/pageSize;
+  static const size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
 
   //FIXME: support NearBlock if ever needed on Win64.
 
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index 683c94bba44e..46b965f4b052 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -125,9 +125,30 @@ Path::isValid() const {
   return true;
 }
 
+void Path::makeAbsolute() {
+  TCHAR  FullPath[MAX_PATH + 1] = {0}; 
+  LPTSTR FilePart = NULL;
+
+  DWORD RetLength = ::GetFullPathNameA(path.c_str(),
+                        sizeof(FullPath)/sizeof(FullPath[0]),
+                        FullPath, &FilePart);
+
+  if (0 == RetLength) {
+    // FIXME: Report the error GetLastError()
+    assert(0 && "Unable to make absolute path!");
+  } else if (RetLength > MAX_PATH) {
+    // FIXME: Report too small buffer (needed RetLength bytes).
+    assert(0 && "Unable to make absolute path!");
+  } else {
+    path = FullPath;
+  }
+}
+
 bool
 Path::isAbsolute(const char *NameStart, unsigned NameLen) {
   assert(NameStart);
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
   switch (NameLen) {
   case 0:
     return false;
@@ -135,12 +156,15 @@ Path::isAbsolute(const char *NameStart, unsigned NameLen) {
   case 2:
     return NameStart[0] == '/';
   default:
-    return NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/');
+    return (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+           (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
   }
 }
 
 bool 
 Path::isAbsolute() const {
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
   switch (path.length()) {
     case 0:
       return false;
@@ -784,8 +808,8 @@ CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
   // Can't use CopyFile macro defined in Windows.h because it would mess up the
   // above line.  We use the expansion it would have in a non-UNICODE build.
   if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
-    return MakeErrMsg(ErrMsg, "Can't copy '" + Src.toString() +
-               "' to '" + Dest.toString() + "': ");
+    return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
+               "' to '" + Dest.str() + "': ");
   return false;
 }
 
diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc
index cfbe33c85a2f..feb0806116e4 100644
--- a/lib/System/Win32/Process.inc
+++ b/lib/System/Win32/Process.inc
@@ -120,15 +120,19 @@ void Process::PreventCoreFiles() {
 }
 
 bool Process::StandardInIsUserInput() {
-  return GetFileType((HANDLE)_get_osfhandle(0)) == FILE_TYPE_CHAR;
+  return FileDescriptorIsDisplayed(0);
 }
 
 bool Process::StandardOutIsDisplayed() {
-  return GetFileType((HANDLE)_get_osfhandle(1)) == FILE_TYPE_CHAR;
+  return FileDescriptorIsDisplayed(1);
 }
 
 bool Process::StandardErrIsDisplayed() {
-  return GetFileType((HANDLE)_get_osfhandle(2)) == FILE_TYPE_CHAR;
+  return FileDescriptorIsDisplayed(2);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+  return GetFileType((HANDLE)_get_osfhandle(fd)) == FILE_TYPE_CHAR;
 }
 
 unsigned Process::StandardOutColumns() {
diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc
index 49086b8348e6..a69826fdcef4 100644
--- a/lib/System/Win32/Program.inc
+++ b/lib/System/Win32/Program.inc
@@ -22,9 +22,32 @@
 //===          and must not be UNIX code
 //===----------------------------------------------------------------------===//
 
+namespace {
+  struct Win32ProcessInfo {
+    HANDLE hProcess;
+    DWORD  dwProcessId;
+  };
+}
+
 namespace llvm {
 using namespace sys;
 
+Program::Program() : Data_(0) {}
+
+Program::~Program() {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+}
+
+unsigned Program::GetPid() const {
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  return wpi->dwProcessId;
+}
+
 // This function just uses the PATH environment variable to find the program.
 Path
 Program::FindProgramByName(const std::string& progName) {
@@ -82,7 +105,7 @@ static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
   if (path->isEmpty())
     fname = "NUL";
   else
-    fname = path->toString().c_str();
+    fname = path->c_str();
 
   SECURITY_ATTRIBUTES sa;
   sa.nLength = sizeof(sa);
@@ -109,29 +132,41 @@ static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
                                       DWORD cbJobObjectInfoLength);
 #endif
 
-int
-Program::ExecuteAndWait(const Path& path,
-                        const char** args,
-                        const char** envp,
-                        const Path** redirects,
-                        unsigned secondsToWait,
-                        unsigned memoryLimit,
-                        std::string* ErrMsg) {
+/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess.
+static bool ArgNeedsQuotes(const char *Str) {
+  return Str[0] == '\0' || strchr(Str, ' ') != 0;
+}
+
+bool
+Program::Execute(const Path& path,
+                 const char** args,
+                 const char** envp,
+                 const Path** redirects,
+                 unsigned memoryLimit,
+                 std::string* ErrMsg) {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+
   if (!path.canExecute()) {
     if (ErrMsg)
       *ErrMsg = "program not executable";
-    return -1;
+    return false;
   }
 
   // Windows wants a command line, not an array of args, to pass to the new
   // process.  We have to concatenate them all, while quoting the args that
-  // have embedded spaces.
+  // have embedded spaces (or are empty).
 
   // First, determine the length of the command line.
   unsigned len = 0;
   for (unsigned i = 0; args[i]; i++) {
     len += strlen(args[i]) + 1;
-    if (strchr(args[i], ' '))
+    if (ArgNeedsQuotes(args[i]))
       len += 2;
   }
 
@@ -142,7 +177,7 @@ Program::ExecuteAndWait(const Path& path,
   for (unsigned i = 0; args[i]; i++) {
     const char *arg = args[i];
     size_t len = strlen(arg);
-    bool needsQuoting = strchr(arg, ' ') != 0;
+    bool needsQuoting = ArgNeedsQuotes(arg);
     if (needsQuoting)
       *p++ = '"';
     memcpy(p, arg, len);
@@ -195,13 +230,13 @@ Program::ExecuteAndWait(const Path& path,
     si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
     if (si.hStdInput == INVALID_HANDLE_VALUE) {
       MakeErrMsg(ErrMsg, "can't redirect stdin");
-      return -1;
+      return false;
     }
     si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
     if (si.hStdOutput == INVALID_HANDLE_VALUE) {
       CloseHandle(si.hStdInput);
       MakeErrMsg(ErrMsg, "can't redirect stdout");
-      return -1;
+      return false;
     }
     if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
       // If stdout and stderr should go to the same place, redirect stderr
@@ -216,7 +251,7 @@ Program::ExecuteAndWait(const Path& path,
         CloseHandle(si.hStdInput);
         CloseHandle(si.hStdOutput);
         MakeErrMsg(ErrMsg, "can't redirect stderr");
-        return -1;
+        return false;
       }
     }
   }
@@ -237,16 +272,18 @@ Program::ExecuteAndWait(const Path& path,
   CloseHandle(si.hStdError);
 
   // Now return an error if the process didn't get created.
-  if (!rc)
-  {
+  if (!rc) {
     SetLastError(err);
     MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
-               path.toString() + "'");
-    return -1;
+               path.str() + "'");
+    return false;
   }
+  Win32ProcessInfo* wpi = new Win32ProcessInfo;
+  wpi->hProcess = pi.hProcess;
+  wpi->dwProcessId = pi.dwProcessId;
+  Data_ = wpi;
 
   // Make sure these get closed no matter what.
-  AutoHandle hProcess(pi.hProcess);
   AutoHandle hThread(pi.hThread);
 
   // Assign the process to a job if a memory limit is defined.
@@ -270,39 +307,68 @@ Program::ExecuteAndWait(const Path& path,
       MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
       TerminateProcess(pi.hProcess, 1);
       WaitForSingleObject(pi.hProcess, INFINITE);
-      return -1;
+      return false;
     }
   }
 
-  // Wait for it to terminate.
+  return true;
+}
+
+int
+Program::Wait(unsigned secondsToWait,
+              std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+
+  // Wait for the process to terminate.
   DWORD millisecondsToWait = INFINITE;
   if (secondsToWait > 0)
     millisecondsToWait = secondsToWait * 1000;
 
-  if (WaitForSingleObject(pi.hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
-    if (!TerminateProcess(pi.hProcess, 1)) {
-      MakeErrMsg(ErrMsg, std::string("Failed to terminate timed-out program '")
-          + path.toString() + "'");
+  if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+    if (!TerminateProcess(hProcess, 1)) {
+      MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
       return -1;
     }
-    WaitForSingleObject(pi.hProcess, INFINITE);
+    WaitForSingleObject(hProcess, INFINITE);
   }
 
   // Get its exit status.
   DWORD status;
-  rc = GetExitCodeProcess(pi.hProcess, &status);
-  err = GetLastError();
+  BOOL rc = GetExitCodeProcess(hProcess, &status);
+  DWORD err = GetLastError();
 
   if (!rc) {
     SetLastError(err);
-    MakeErrMsg(ErrMsg, std::string("Failed getting status for program '") +
-               path.toString() + "'");
+    MakeErrMsg(ErrMsg, "Failed getting status for program.");
     return -1;
   }
 
   return status;
 }
 
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+  if (TerminateProcess(hProcess, 1) == 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
 bool Program::ChangeStdinToBinary(){
   int result = _setmode( _fileno(stdin), _O_BINARY );
   return result == -1;
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index 3a8f77e3cdb9..dba22185ac7f 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -43,6 +43,9 @@ static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
 static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
 static bool RegisteredUnhandledExceptionFilter = false;
 static bool CleanupExecuted = false;
+#ifdef _MSC_VER
+static bool ExitOnUnhandledExceptions = false;
+#endif
 static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
 
 // Windows creates a new thread to execute the console handler when an event
@@ -57,8 +60,38 @@ namespace llvm {
 //===          and must not be UNIX code
 //===----------------------------------------------------------------------===//
 
+#ifdef _MSC_VER
+/// CRTReportHook - Function called on a CRT debugging event.
+static int CRTReportHook(int ReportType, char *Message, int *Return) {
+  // Don't cause a DebugBreak() on return.
+  if (Return)
+    *Return = 0;
+
+  switch (ReportType) {
+  default:
+  case _CRT_ASSERT:
+    fprintf(stderr, "CRT assert: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_ERROR:
+    fprintf(stderr, "CRT error: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_WARN:
+    fprintf(stderr, "CRT warn: %s\n", Message);
+    break;
+  }
+
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+#endif
 
-static void RegisterHandler() { 
+static void RegisterHandler() {
   if (RegisteredUnhandledExceptionFilter) {
     EnterCriticalSection(&CriticalSection);
     return;
@@ -76,6 +109,14 @@ static void RegisterHandler() {
   OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
   SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
 
+  // Environment variable to disable any kind of crash dialog.
+#ifdef _MSC_VER
+  if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+    _CrtSetReportHook(CRTReportHook);
+    ExitOnUnhandledExceptions = true;
+  }
+#endif
+
   // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
   // else multi-threading problems will ensue.
 }
@@ -136,10 +177,7 @@ static void Cleanup() {
 
   if (FilesToRemove != NULL)
     while (!FilesToRemove->empty()) {
-      try {
-        FilesToRemove->back().eraseFromDisk();
-      } catch (...) {
-      }
+      FilesToRemove->back().eraseFromDisk();
       FilesToRemove->pop_back();
     }
 
@@ -238,6 +276,11 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
       assert(0 && "Crashed in LLVMUnhandledExceptionFilter");
   }
 
+#ifdef _MSC_VER
+  if (ExitOnUnhandledExceptions)
+  	_exit(-3);
+#endif
+
   // Allow dialog box to pop up allowing choice to start debugger.
   if (OldFilter)
     return (*OldFilter)(ep);
diff --git a/lib/System/Win32/TimeValue.inc b/lib/System/Win32/TimeValue.inc
index 0ca87d423325..e37f111fc77c 100644
--- a/lib/System/Win32/TimeValue.inc
+++ b/lib/System/Win32/TimeValue.inc
@@ -30,7 +30,7 @@ TimeValue TimeValue::now() {
   return t;
 }
 
-std::string TimeValue::toString() const {
+std::string TimeValue::str() const {
 #ifdef __MINGW32__
   // This ban may be lifted by either:
   // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 08dc07c64152..487ce1dd434b 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_ARM_H
 #define TARGET_ARM_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 
@@ -24,7 +25,8 @@ class ARMBaseTargetMachine;
 class FunctionPass;
 class MachineCodeEmitter;
 class JITCodeEmitter;
-class raw_ostream;
+class ObjectCodeEmitter;
+class formatted_raw_ostream;
 
 // Enums corresponding to ARM condition codes
 namespace ARMCC {
@@ -50,7 +52,7 @@ namespace ARMCC {
 
   inline static CondCodes getOppositeCondition(CondCodes CC){
     switch (CC) {
-    default: assert(0 && "Unknown condition code");
+    default: llvm_unreachable("Unknown condition code");
     case EQ: return NE;
     case NE: return EQ;
     case HS: return LO;
@@ -71,7 +73,7 @@ namespace ARMCC {
 
 inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   switch (CC) {
-  default: assert(0 && "Unknown condition code");
+  default: llvm_unreachable("Unknown condition code");
   case ARMCC::EQ:  return "eq";
   case ARMCC::NE:  return "ne";
   case ARMCC::HS:  return "hs";
@@ -90,20 +92,23 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   }
 }
 
-FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM);
-FunctionPass *createARMCodePrinterPass(raw_ostream &O,
-                                       ARMBaseTargetMachine &TM,
-                                       bool Verbose);
-FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                       MachineCodeEmitter &MCE);
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+                               CodeGenOpt::Level OptLevel);
 
 FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE);
+FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                             ObjectCodeEmitter &OCE);
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMConstantIslandPass();
+FunctionPass *createNEONPreAllocPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+extern Target TheARMTarget, TheThumbTarget;
 
 } // end namespace llvm;
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 9001e5033c7d..8851fbbf2481 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,27 +89,20 @@ def : ProcNoItin<"xscale",          [ArchV5TE]>;
 def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
 
 // V6 Processors.
-def : Processor<"arm1136j-s",       V6Itineraries,
-                [ArchV6]>;
-def : Processor<"arm1136jf-s",      V6Itineraries,
-                [ArchV6, FeatureVFP2]>;
-def : Processor<"arm1176jz-s",      V6Itineraries,
-                [ArchV6]>;
-def : Processor<"arm1176jzf-s",     V6Itineraries,
-                [ArchV6, FeatureVFP2]>;
-def : Processor<"mpcorenovfp",      V6Itineraries,
-                [ArchV6]>;
-def : Processor<"mpcore",           V6Itineraries,
-                [ArchV6, FeatureVFP2]>;
+def : ProcNoItin<"arm1136j-s",      [ArchV6]>;
+def : ProcNoItin<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
+def : ProcNoItin<"arm1176jz-s",     [ArchV6]>;
+def : ProcNoItin<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
+def : ProcNoItin<"mpcorenovfp",     [ArchV6]>;
+def : ProcNoItin<"mpcore",          [ArchV6, FeatureVFP2]>;
 
 // V6T2 Processors.
-def : Processor<"arm1156t2-s",     V6Itineraries,
-                [ArchV6T2, FeatureThumb2]>;
-def : Processor<"arm1156t2f-s",    V6Itineraries,
-                [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : ProcNoItin<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
+def : ProcNoItin<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
 
 // V7 Processors.
-def : ProcNoItin<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Processor<"cortex-a8",        CortexA8Itineraries,
+                [ArchV7A, FeatureThumb2, FeatureNEON]>;
 def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
@@ -131,13 +124,13 @@ def ARMInstrInfo : InstrInfo {
   let TSFlagsFields = ["AddrModeBits",
                        "SizeFlag",
                        "IndexModeBits",
-                       "isUnaryDataProc",
-                       "Form"];
+                       "Form",
+                       "isUnaryDataProc"];
   let TSFlagsShifts = [0,
                        4,
                        7,
                        9,
-                       10];
+                       15];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 15c9ec1fc23c..183915335192 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -15,11 +15,12 @@
 #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
 
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 
 namespace llvm {
-  
+
 /// ARM_AM - ARM Addressing Mode Stuff
 namespace ARM_AM {
   enum ShiftOpc {
@@ -30,14 +31,14 @@ namespace ARM_AM {
     ror,
     rrx
   };
-  
+
   enum AddrOpc {
     add = '+', sub = '-'
   };
-  
+
   static inline const char *getShiftOpcStr(ShiftOpc Op) {
     switch (Op) {
-    default: assert(0 && "Unknown shift opc!");
+    default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::asr: return "asr";
     case ARM_AM::lsl: return "lsl";
     case ARM_AM::lsr: return "lsr";
@@ -45,7 +46,7 @@ namespace ARM_AM {
     case ARM_AM::rrx: return "rrx";
     }
   }
-  
+
   static inline ShiftOpc getShiftOpcForNode(SDValue N) {
     switch (N.getOpcode()) {
     default:          return ARM_AM::no_shift;
@@ -70,7 +71,7 @@ namespace ARM_AM {
 
   static inline const char *getAMSubModeStr(AMSubMode Mode) {
     switch (Mode) {
-    default: assert(0 && "Unknown addressing sub-mode!");
+    default: llvm_unreachable("Unknown addressing sub-mode!");
     case ARM_AM::ia: return "ia";
     case ARM_AM::ib: return "ib";
     case ARM_AM::da: return "da";
@@ -80,7 +81,7 @@ namespace ARM_AM {
 
   static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
     switch (Mode) {
-    default: assert(0 && "Unknown addressing sub-mode!");
+    default: llvm_unreachable("Unknown addressing sub-mode!");
     case ARM_AM::ia: return isLD ? "fd" : "ea";
     case ARM_AM::ib: return isLD ? "ed" : "fa";
     case ARM_AM::da: return isLD ? "fa" : "ed";
@@ -94,14 +95,14 @@ namespace ARM_AM {
     assert(Amt < 32 && "Invalid rotate amount");
     return (Val >> Amt) | (Val << ((32-Amt)&31));
   }
-  
+
   /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
   ///
   static inline unsigned rotl32(unsigned Val, unsigned Amt) {
     assert(Amt < 32 && "Invalid rotate amount");
     return (Val << Amt) | (Val >> ((32-Amt)&31));
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Addressing Mode #1: shift_operand with registers
   //===--------------------------------------------------------------------===//
@@ -136,7 +137,7 @@ namespace ARM_AM {
   static inline unsigned getSOImmValRot(unsigned Imm) {
     return (Imm >> 8) * 2;
   }
-  
+
   /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
   /// computing the rotate amount to use.  If this immediate value cannot be
   /// handled with a single shifter-op, determine a good rotate amount that will
@@ -145,14 +146,14 @@ namespace ARM_AM {
     // 8-bit (or less) immediates are trivially shifter_operands with a rotate
     // of zero.
     if ((Imm & ~255U) == 0) return 0;
-    
+
     // Use CTZ to compute the rotate amount.
     unsigned TZ = CountTrailingZeros_32(Imm);
-    
+
     // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
     // not 9.
     unsigned RotAmt = TZ & ~1;
-    
+
     // If we can handle this spread, return it.
     if ((rotr32(Imm, RotAmt) & ~255U) == 0)
       return (32-RotAmt)&31;  // HW rotates right, not left.
@@ -165,16 +166,16 @@ namespace ARM_AM {
         // Restart the search for a high-order bit after the initial seconds of
         // ones.
         unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
-      
+
         // Rotate amount must be even.
         unsigned RotAmt2 = TZ2 & ~1;
-        
+
         // If this fits, use it.
         if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
           return (32-RotAmt2)&31;  // HW rotates right, not left.
       }
     }
-    
+
     // Otherwise, we have no way to cover this span of bits with a single
     // shifter_op immediate.  Return a chunk of bits that will be useful to
     // handle.
@@ -188,17 +189,17 @@ namespace ARM_AM {
     // 8-bit (or less) immediates are trivially shifter_operands with a rotate
     // of zero.
     if ((Arg & ~255U) == 0) return Arg;
-    
+
     unsigned RotAmt = getSOImmValRotate(Arg);
 
     // If this cannot be handled with a single shifter_op, bail out.
     if (rotr32(~255U, RotAmt) & Arg)
       return -1;
-      
+
     // Encode this correctly.
     return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
   }
-  
+
   /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
   /// or'ing together two SOImmVal's.
   static inline bool isSOImmTwoPartVal(unsigned V) {
@@ -206,12 +207,12 @@ namespace ARM_AM {
     V = rotr32(~255U, getSOImmValRotate(V)) & V;
     if (V == 0)
       return false;
-    
+
     // If this can be handled with two shifter_op's, accept.
     V = rotr32(~255U, getSOImmValRotate(V)) & V;
     return V == 0;
   }
-  
+
   /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
   /// return the first chunk of it.
   static inline unsigned getSOImmTwoPartFirst(unsigned V) {
@@ -221,14 +222,14 @@ namespace ARM_AM {
   /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
   /// return the second chunk of it.
   static inline unsigned getSOImmTwoPartSecond(unsigned V) {
-    // Mask out the first hunk.  
+    // Mask out the first hunk.
     V = rotr32(~255U, getSOImmValRotate(V)) & V;
-    
+
     // Take what's left.
     assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
     return V;
   }
-  
+
   /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
   /// by a left shift. Returns the shift amount to use.
   static inline unsigned getThumbImmValShift(unsigned Imm) {
@@ -243,7 +244,7 @@ namespace ARM_AM {
   /// isThumbImmShiftedVal - Return true if the specified value can be obtained
   /// by left shifting a 8-bit immediate.
   static inline bool isThumbImmShiftedVal(unsigned V) {
-    // If this can be handled with 
+    // If this can be handled with
     V = (~255U << getThumbImmValShift(V)) & V;
     return V == 0;
   }
@@ -259,10 +260,10 @@ namespace ARM_AM {
     return CountTrailingZeros_32(Imm);
   }
 
-  /// isThumbImm16ShiftedVal - Return true if the specified value can be 
+  /// isThumbImm16ShiftedVal - Return true if the specified value can be
   /// obtained by left shifting a 16-bit immediate.
   static inline bool isThumbImm16ShiftedVal(unsigned V) {
-    // If this can be handled with 
+    // If this can be handled with
     V = (~65535U << getThumbImm16ValShift(V)) & V;
     return V == 0;
   }
@@ -273,28 +274,6 @@ namespace ARM_AM {
     return V >> getThumbImmValShift(V);
   }
 
-  /// getT2SOImmValDecode - Given a 12-bit encoded Thumb-2 modified immediate,
-  /// return the corresponding 32-bit immediate value.
-  /// See ARM Reference Manual A6.3.2.
-  static inline unsigned getT2SOImmValDecode(unsigned Imm) {
-    unsigned Base = Imm & 0xff;
-    switch ((Imm >> 8) & 0xf) {
-    case 0:
-      return Base;
-    case 1:
-      return Base | (Base << 16);
-    case 2:
-      return (Base << 8) | (Base << 24);
-    case 3:
-      return Base | (Base << 8) | (Base << 16) | (Base << 24);
-    default:
-      break;
-    }
-    
-    // shifted immediate
-    unsigned RotAmount = ((Imm >> 7) & 0x1f) - 8;
-    return (Base | 0x80) << (24 - RotAmount);
-  }
 
   /// getT2SOImmValSplat - Return the 12-bit encoded representation
   /// if the specified value can be obtained by splatting the low 8 bits
@@ -305,12 +284,12 @@ namespace ARM_AM {
   ///     abcdefgh abcdefgh abcdefgh abcdefgh    control = 3
   /// Return -1 if none of the above apply.
   /// See ARM Reference Manual A6.3.2.
-  static inline int getT2SOImmValSplat(unsigned V) {
+  static inline int getT2SOImmValSplatVal(unsigned V) {
     unsigned u, Vs, Imm;
     // control = 0
-    if ((V & 0xffffff00) == 0) 
+    if ((V & 0xffffff00) == 0)
       return V;
-    
+
     // If the value is zeroes in the first byte, just shift those off
     Vs = ((V & 0xff) == 0) ? V >> 8 : V;
     // Any passing value only has 8 bits of payload, splatted across the word
@@ -329,11 +308,11 @@ namespace ARM_AM {
     return -1;
   }
 
-  /// getT2SOImmValRotate - Return the 12-bit encoded representation if the
+  /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
   /// specified value is a rotated 8-bit value. Return -1 if no rotation
   /// encoding is possible.
   /// See ARM Reference Manual A6.3.2.
-  static inline int getT2SOImmValRotate (unsigned V) {
+  static inline int getT2SOImmValRotateVal(unsigned V) {
     unsigned RotAmt = CountLeadingZeros_32(V);
     if (RotAmt >= 24)
       return -1;
@@ -346,23 +325,23 @@ namespace ARM_AM {
   }
 
   /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
-  /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit 
+  /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
   /// encoding for it.  If not, return -1.
   /// See ARM Reference Manual A6.3.2.
   static inline int getT2SOImmVal(unsigned Arg) {
     // If 'Arg' is an 8-bit splat, then get the encoded value.
-    int Splat = getT2SOImmValSplat(Arg);
+    int Splat = getT2SOImmValSplatVal(Arg);
     if (Splat != -1)
       return Splat;
-    
+
     // If 'Arg' can be handled with a single shifter_op return the value.
-    int Rot = getT2SOImmValRotate(Arg);
+    int Rot = getT2SOImmValRotateVal(Arg);
     if (Rot != -1)
       return Rot;
 
     return -1;
   }
-  
+
 
   //===--------------------------------------------------------------------===//
   // Addressing Mode #2
@@ -380,7 +359,7 @@ namespace ARM_AM {
   // If this addressing mode is a frame index (before prolog/epilog insertion
   // and code rewriting), this operand will have the form:  FI#, reg0, <offs>
   // with no shift amount for the frame offset.
-  // 
+  //
   static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
     assert(Imm12 < (1 << 12) && "Imm too large!");
     bool isSub = Opc == sub;
@@ -395,8 +374,8 @@ namespace ARM_AM {
   static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
     return (ShiftOpc)(AM2Opc >> 13);
   }
-  
-  
+
+
   //===--------------------------------------------------------------------===//
   // Addressing Mode #3
   //===--------------------------------------------------------------------===//
@@ -409,7 +388,7 @@ namespace ARM_AM {
   // The first operand is always a Reg.  The second operand is a reg if in
   // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
   // in bit 8, the immediate in bits 0-7.
-  
+
   /// getAM3Opc - This function encodes the addrmode3 opc field.
   static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
     bool isSub = Opc == sub;
@@ -421,7 +400,7 @@ namespace ARM_AM {
   static inline AddrOpc getAM3Op(unsigned AM3Opc) {
     return ((AM3Opc >> 8) & 1) ? sub : add;
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Addressing Mode #4
   //===--------------------------------------------------------------------===//
@@ -469,7 +448,7 @@ namespace ARM_AM {
   //
   //    IA - Increment after
   //    DB - Decrement before
-  
+
   /// getAM5Opc - This function encodes the addrmode5 opc field.
   static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
     bool isSub = Opc == sub;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 000000000000..ecdf5a0be643
--- /dev/null
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,1060 @@
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+               cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+ARMBaseInstrInfo::ARMBaseInstrInfo()
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) {
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                        MachineBasicBlock::iterator &MBBI,
+                                        LiveVariables *LV) const {
+  // FIXME: Thumb2 support.
+
+  if (!EnableARM3Addr)
+    return NULL;
+
+  MachineInstr *MI = MBBI;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  unsigned TSFlags = MI->getDesc().TSFlags;
+  bool isPre = false;
+  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+  default: return NULL;
+  case ARMII::IndexModePre:
+    isPre = true;
+    break;
+  case ARMII::IndexModePost:
+    break;
+  }
+
+  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+  // operation.
+  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+  if (MemOpc == 0)
+    return NULL;
+
+  MachineInstr *UpdateMI = NULL;
+  MachineInstr *MemMI = NULL;
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned NumOps = TID.getNumOperands();
+  bool isLoad = !TID.mayStore();
+  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+  const MachineOperand &Base = MI->getOperand(2);
+  const MachineOperand &Offset = MI->getOperand(NumOps-3);
+  unsigned WBReg = WB.getReg();
+  unsigned BaseReg = Base.getReg();
+  unsigned OffReg = Offset.getReg();
+  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+  switch (AddrMode) {
+  default:
+    assert(false && "Unknown indexed op!");
+    return NULL;
+  case ARMII::AddrMode2: {
+    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+    if (OffReg == 0) {
+      if (ARM_AM::getSOImmVal(Amt) == -1)
+        // Can't encode it in a so_imm operand. This transformation will
+        // add more than 1 instruction. Abandon!
+        return NULL;
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else if (Amt != 0) {
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  case ARMII::AddrMode3 : {
+    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+    if (OffReg == 0)
+      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  }
+
+  std::vector<MachineInstr*> NewMIs;
+  if (isPre) {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+    NewMIs.push_back(MemMI);
+    NewMIs.push_back(UpdateMI);
+  } else {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+    if (WB.isDead())
+      UpdateMI->getOperand(0).setIsDead();
+    NewMIs.push_back(UpdateMI);
+    NewMIs.push_back(MemMI);
+  }
+
+  // Transfer LiveVariables states, kill / dead info.
+  if (LV) {
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.getReg() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        unsigned Reg = MO.getReg();
+
+        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+        if (MO.isDef()) {
+          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+          if (MO.isDead())
+            LV->addVirtualRegisterDead(Reg, NewMI);
+        }
+        if (MO.isUse() && MO.isKill()) {
+          for (unsigned j = 0; j < 2; ++j) {
+            // Look at the two new MI's in reverse order.
+            MachineInstr *NewMI = NewMIs[j];
+            if (!NewMI->readsRegister(Reg))
+              continue;
+            LV->addVirtualRegisterKilled(Reg, NewMI);
+            if (VI.removeKill(MI))
+              VI.Kills.push_back(NewMI);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  MFI->insert(MBBI, NewMIs[1]);
+  MFI->insert(MBBI, NewMIs[0]);
+  return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(LastInst->getOperand(1));
+      Cond.push_back(LastInst->getOperand(2));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB =  SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(1));
+    Cond.push_back(SecondLastInst->getOperand(2));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // ...likewise if it ends with a branch table followed by an unconditional
+  // branch. The branch folder can create these, and we must get rid of them for
+  // correctness of Thumb constant islands.
+  if (isJumpTableBranchOpcode(SecondLastOpc) &&
+      isUncondBranchOpcode(LastOpc)) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  if (!isUncondBranchOpcode(I->getOpcode()) &&
+      !isCondBranchOpcode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!isCondBranchOpcode(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond) const {
+  // FIXME this should probably have a DebugLoc argument
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+
+  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+  int BOpc   = !AFI->isThumbFunction()
+    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+  int BccOpc = !AFI->isThumbFunction()
+    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "ARM branch conditions have two components!");
+
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch?
+      BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+    else
+      BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+    return 1;
+  }
+
+  // Two-way conditional branch.
+  BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+  BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+  return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned Opc = MI->getOpcode();
+  if (isUncondBranchOpcode(Opc)) {
+    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    return true;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setImm(Pred[0].getImm());
+    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+    return true;
+  }
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  if (Pred1.size() > 2 || Pred2.size() > 2)
+    return false;
+
+  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+  if (CC1 == CC2)
+    return true;
+
+  switch (CC1) {
+  default:
+    return false;
+  case ARMCC::AL:
+    return true;
+  case ARMCC::HS:
+    return CC2 == ARMCC::HI;
+  case ARMCC::LS:
+    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+  case ARMCC::GE:
+    return CC2 == ARMCC::GT;
+  case ARMCC::LE:
+    return CC2 == ARMCC::LT;
+  }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  // FIXME: This confuses implicit_def with optional CPSR def.
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+    return false;
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+      Pred.push_back(MO);
+      Found = true;
+    }
+  }
+
+  return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) {
+  return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+  // Basic size info comes from the TSFlags field.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned TSFlags = TID.TSFlags;
+
+  unsigned Opc = MI->getOpcode();
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: {
+    // If this machine instr is an inline asm, measure it.
+    if (MI->getOpcode() == ARM::INLINEASM)
+      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+    if (MI->isLabel())
+      return 0;
+    switch (Opc) {
+    default:
+      llvm_unreachable("Unknown or unset size field for instr!");
+    case TargetInstrInfo::IMPLICIT_DEF:
+    case TargetInstrInfo::KILL:
+    case TargetInstrInfo::DBG_LABEL:
+    case TargetInstrInfo::EH_LABEL:
+      return 0;
+    }
+    break;
+  }
+  case ARMII::Size8Bytes: return 8;          // ARM instruction x 2.
+  case ARMII::Size4Bytes: return 4;          // ARM / Thumb2 instruction.
+  case ARMII::Size2Bytes: return 2;          // Thumb1 instruction.
+  case ARMII::SizeSpecial: {
+    switch (Opc) {
+    case ARM::CONSTPOOL_ENTRY:
+      // If this machine instr is a constant pool entry, its size is recorded as
+      // operand #2.
+      return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_setjmp:
+      return 24;
+    case ARM::t2Int_eh_sjlj_setjmp:
+      return 20;
+    case ARM::BR_JTr:
+    case ARM::BR_JTm:
+    case ARM::BR_JTadd:
+    case ARM::tBR_JTr:
+    case ARM::t2BR_JT:
+    case ARM::t2TBB:
+    case ARM::t2TBH: {
+      // These are jumptable branches, i.e. a branch followed by an inlined
+      // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+      // entry is one byte; TBH two byte each.
+      unsigned EntrySize = (Opc == ARM::t2TBB)
+        ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4);
+      unsigned NumOps = TID.getNumOperands();
+      MachineOperand JTOP =
+        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+      unsigned JTI = JTOP.getIndex();
+      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+      assert(JTI < JT.size());
+      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+      // 4 aligned. The assembler / linker may add 2 byte padding just before
+      // the JT entries.  The size does not include this padding; the
+      // constant islands pass does separate bookkeeping for it.
+      // FIXME: If we know the size of the function is less than (1 << 16) *2
+      // bytes, we can use 16-bit entries instead. Then there won't be an
+      // alignment issue.
+      unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+      unsigned NumEntries = getNumJTEntries(JT, JTI);
+      if (Opc == ARM::t2TBB && (NumEntries & 1))
+        // Make sure the instruction that follows TBB is 2-byte aligned.
+        // FIXME: Constant island pass should insert an "ALIGN" instruction
+        // instead.
+        ++NumEntries;
+      return NumEntries * EntrySize + InstSize;
+    }
+    default:
+      // Otherwise, pseudo-instruction sizes are zero.
+      return 0;
+    }
+  }
+  }
+  return 0; // Not reached
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool
+ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
+                              unsigned &SrcReg, unsigned &DstReg,
+                              unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  switch (MI.getOpcode()) {
+  default: break;
+  case ARM::FCPYS:
+  case ARM::FCPYD:
+  case ARM::VMOVD:
+  case  ARM::VMOVQ: {
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+  case ARM::MOVr:
+  case ARM::tMOVr:
+  case ARM::tMOVgpr2tgpr:
+  case ARM::tMOVtgpr2gpr:
+  case ARM::tMOVgpr2gpr:
+  case ARM::t2MOVr: {
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid ARM MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+  }
+
+  return false;
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDR:
+  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::t2LDRi12:
+  case ARM::tRestore:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FLDD:
+  case ARM::FLDS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                     int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STR:
+  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::t2STRi12:
+  case ARM::tSpill:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FSTD:
+  case ARM::FSTS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+bool
+ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I,
+                               unsigned DestReg, unsigned SrcReg,
+                               const TargetRegisterClass *DestRC,
+                               const TargetRegisterClass *SrcRC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (DestRC != SrcRC) {
+    // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies
+    // Allow QPR / QPR_VFP2 cross-class copies
+    if (DestRC == ARM::DPRRegisterClass) {
+      if (SrcRC == ARM::DPR_VFP2RegisterClass ||
+          SrcRC == ARM::DPR_8RegisterClass) {
+      } else
+        return false;
+    } else if (DestRC == ARM::DPR_VFP2RegisterClass) {
+      if (SrcRC == ARM::DPRRegisterClass ||
+          SrcRC == ARM::DPR_8RegisterClass) {
+      } else
+        return false;
+    } else if (DestRC == ARM::DPR_8RegisterClass) {
+      if (SrcRC == ARM::DPRRegisterClass ||
+          SrcRC == ARM::DPR_VFP2RegisterClass) {
+      } else
+        return false;
+    } else if ((DestRC == ARM::QPRRegisterClass &&
+                SrcRC == ARM::QPR_VFP2RegisterClass) ||
+               (DestRC == ARM::QPR_VFP2RegisterClass &&
+                SrcRC == ARM::QPRRegisterClass)) {
+    } else
+      return false;
+  }
+
+  if (DestRC == ARM::GPRRegisterClass) {
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
+                                        DestReg).addReg(SrcReg)));
+  } else if (DestRC == ARM::SPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+                   .addReg(SrcReg));
+  } else if ((DestRC == ARM::DPRRegisterClass) ||
+             (DestRC == ARM::DPR_VFP2RegisterClass) ||
+             (DestRC == ARM::DPR_8RegisterClass)) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+                   .addReg(SrcReg));
+  } else if (DestRC == ARM::QPRRegisterClass ||
+             DestRC == ARM::QPR_VFP2RegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
+  } else {
+    return false;
+  }
+
+  return true;
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                            MachineMemOperand::MOStore, 0,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+
+  if (RC == ARM::GPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+  } else if (RC == ARM::DPRRegisterClass ||
+             RC == ARM::DPR_VFP2RegisterClass ||
+             RC == ARM::DPR_8RegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  } else if (RC == ARM::SPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  } else {
+    assert((RC == ARM::QPRRegisterClass ||
+            RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
+    // FIXME: Neon instructions should support predicates
+    BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  }
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                            MachineMemOperand::MOLoad, 0,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+
+  if (RC == ARM::GPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
+                   .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+  } else if (RC == ARM::DPRRegisterClass ||
+             RC == ARM::DPR_VFP2RegisterClass ||
+             RC == ARM::DPR_8RegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  } else if (RC == ARM::SPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  } else {
+    assert((RC == ARM::QPRRegisterClass ||
+            RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
+    // FIXME: Neon instructions should support predicates
+    BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  }
+}
+
+MachineInstr *ARMBaseInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
+    // If it is updating CPSR, then it cannot be folded.
+    if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead())
+      return NULL;
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
+      if (Opc == ARM::MOVr)
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+          .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+      else // ARM::t2MOVr
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+          .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      if (Opc == ARM::MOVr)
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
+          .addReg(DstReg,
+                  RegState::Define |
+                  getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
+          .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+      else // ARM::t2MOVr
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
+          .addReg(DstReg,
+                  RegState::Define |
+                  getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
+          .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    }
+  } else if (Opc == ARM::tMOVgpr2gpr ||
+             Opc == ARM::tMOVtgpr2gpr ||
+             Opc == ARM::tMOVgpr2tgpr) {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
+    }
+  } else if (Opc == ARM::FCPYS) {
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addFrameIndex(FI)
+        .addImm(0).addImm(Pred).addReg(PredReg);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    }
+  }
+  else if (Opc == ARM::FCPYD) {
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    }
+  }
+
+  return NewMI;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                        MachineInstr* MI,
+                                        const SmallVectorImpl<unsigned> &Ops,
+                                        MachineInstr* LoadMI) const {
+  // FIXME
+  return 0;
+}
+
+bool
+ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                   const SmallVectorImpl<unsigned> &Ops) const {
+  if (Ops.size() != 1) return false;
+
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
+    // If it is updating CPSR, then it cannot be folded.
+    return MI->getOperand(4).getReg() != ARM::CPSR ||
+      MI->getOperand(4).isDead();
+  } else if (Opc == ARM::tMOVgpr2gpr ||
+             Opc == ARM::tMOVtgpr2gpr ||
+             Opc == ARM::tMOVgpr2tgpr) {
+    return true;
+  } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) {
+    return true;
+  } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) {
+    return false; // FIXME
+  }
+
+  return false;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx == -1) {
+    PredReg = 0;
+    return ARMCC::AL;
+  }
+
+  PredReg = MI->getOperand(PIdx+1).getReg();
+  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+  if (Opc == ARM::B)
+    return ARM::Bcc;
+  else if (Opc == ARM::tB)
+    return ARM::tBcc;
+  else if (Opc == ARM::t2B)
+      return ARM::t2Bcc;
+
+  llvm_unreachable("Unknown unconditional branch opcode!");
+  return 0;
+}
+
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg, int NumBytes,
+                               ARMCC::CondCodes Pred, unsigned PredReg,
+                               const ARMBaseInstrInfo &TII) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  while (NumBytes) {
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+    assert(ThisVal && "Didn't extract field correctly");
+
+    // We will handle these bits from offset, clear them.
+    NumBytes &= ~ThisVal;
+
+    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+    // Build the new ADD / SUB.
+    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+      .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+    BaseReg = DestReg;
+  }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                unsigned FrameReg, int &Offset,
+                                const ARMBaseInstrInfo &TII) {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrMode2.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrMode2;
+
+  if (Opcode == ARM::ADDri) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::MOVr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(FrameRegIdx+1);
+      Offset = 0;
+      return true;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(ARM::SUBri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getSOImmVal(Offset) != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+    // as possible.
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    // Get the properly encoded SOImmVal field.
+    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrMode2: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode3: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      break;
+    }
+    case ARMII::AddrMode4:
+      // Can't fold any offset even if it's zero.
+      return false;
+    case ARMII::AddrMode5: {
+      ImmIdx = FrameRegIdx+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    default:
+      llvm_unreachable("Unsupported addressing mode!");
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+    }
+
+    // Attempt to fold address comp. if opcode has offset bits
+    if (NumBits > 0) {
+      // Common case: small offset, fits into instruction.
+      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+      int ImmedOffset = Offset / Scale;
+      unsigned Mask = (1 << NumBits) - 1;
+      if ((unsigned)Offset <= Mask * Scale) {
+        // Replace the FrameIndex with sp
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        if (isSub)
+          ImmedOffset |= 1 << NumBits;
+        ImmOp.ChangeToImmediate(ImmedOffset);
+        Offset = 0;
+        return true;
+      }
+
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      if (isSub)
+        ImmedOffset |= 1 << NumBits;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  Offset = (isSub) ? -Offset : Offset;
+  return Offset == 0;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 000000000000..a13155b9fd0d
--- /dev/null
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,333 @@
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction Flags.
+
+    //===------------------------------------------------------------------===//
+    // This four-bit field describes the addressing mode used.
+
+    AddrModeMask  = 0xf,
+    AddrModeNone    = 0,
+    AddrMode1       = 1,
+    AddrMode2       = 2,
+    AddrMode3       = 3,
+    AddrMode4       = 4,
+    AddrMode5       = 5,
+    AddrMode6       = 6,
+    AddrModeT1_1    = 7,
+    AddrModeT1_2    = 8,
+    AddrModeT1_4    = 9,
+    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
+    AddrModeT2_i12  = 11,
+    AddrModeT2_i8   = 12,
+    AddrModeT2_so   = 13,
+    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
+    AddrModeT2_i8s4 = 15, // i8 * 4
+
+    // Size* - Flags to keep track of the size of an instruction.
+    SizeShift     = 4,
+    SizeMask      = 7 << SizeShift,
+    SizeSpecial   = 1,   // 0 byte pseudo or special case.
+    Size8Bytes    = 2,
+    Size4Bytes    = 3,
+    Size2Bytes    = 4,
+
+    // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+    // and store ops
+    IndexModeShift = 7,
+    IndexModeMask  = 3 << IndexModeShift,
+    IndexModePre   = 1,
+    IndexModePost  = 2,
+
+    //===------------------------------------------------------------------===//
+    // Instruction encoding formats.
+    //
+    FormShift     = 9,
+    FormMask      = 0x3f << FormShift,
+
+    // Pseudo instructions
+    Pseudo        = 0  << FormShift,
+
+    // Multiply instructions
+    MulFrm        = 1  << FormShift,
+
+    // Branch instructions
+    BrFrm         = 2  << FormShift,
+    BrMiscFrm     = 3  << FormShift,
+
+    // Data Processing instructions
+    DPFrm         = 4  << FormShift,
+    DPSoRegFrm    = 5  << FormShift,
+
+    // Load and Store
+    LdFrm         = 6  << FormShift,
+    StFrm         = 7  << FormShift,
+    LdMiscFrm     = 8  << FormShift,
+    StMiscFrm     = 9  << FormShift,
+    LdStMulFrm    = 10 << FormShift,
+
+    // Miscellaneous arithmetic instructions
+    ArithMiscFrm  = 11 << FormShift,
+
+    // Extend instructions
+    ExtFrm        = 12 << FormShift,
+
+    // VFP formats
+    VFPUnaryFrm   = 13 << FormShift,
+    VFPBinaryFrm  = 14 << FormShift,
+    VFPConv1Frm   = 15 << FormShift,
+    VFPConv2Frm   = 16 << FormShift,
+    VFPConv3Frm   = 17 << FormShift,
+    VFPConv4Frm   = 18 << FormShift,
+    VFPConv5Frm   = 19 << FormShift,
+    VFPLdStFrm    = 20 << FormShift,
+    VFPLdStMulFrm = 21 << FormShift,
+    VFPMiscFrm    = 22 << FormShift,
+
+    // Thumb format
+    ThumbFrm      = 23 << FormShift,
+
+    // NEON format
+    NEONFrm       = 24 << FormShift,
+    NEONGetLnFrm  = 25 << FormShift,
+    NEONSetLnFrm  = 26 << FormShift,
+    NEONDupFrm    = 27 << FormShift,
+
+    //===------------------------------------------------------------------===//
+    // Misc flags.
+
+    // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+    // it doesn't have a Rn operand.
+    UnaryDP       = 1 << 15,
+
+    // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+    // a 16-bit Thumb instruction if certain conditions are met.
+    Xform16Bit    = 1 << 16,
+
+    //===------------------------------------------------------------------===//
+    // Field shifts - such shifts are used to set field while generating
+    // machine instructions.
+    M_BitShift     = 5,
+    ShiftImmShift  = 5,
+    ShiftShift     = 7,
+    N_BitShift     = 7,
+    ImmHiShift     = 8,
+    SoRotImmShift  = 8,
+    RegRsShift     = 8,
+    ExtRotImmShift = 10,
+    RegRdLoShift   = 12,
+    RegRdShift     = 12,
+    RegRdHiShift   = 16,
+    RegRnShift     = 16,
+    S_BitShift     = 20,
+    W_BitShift     = 21,
+    AM3_I_BitShift = 22,
+    D_BitShift     = 22,
+    U_BitShift     = 23,
+    P_BitShift     = 24,
+    I_BitShift     = 25,
+    CondShift      = 28
+  };
+}
+
+class ARMBaseInstrInfo : public TargetInstrInfoImpl {
+protected:
+  // Can be only subclassed.
+  explicit ARMBaseInstrInfo();
+public:
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+  // Return true if the block does not fall through.
+  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0;
+
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  // Predication support.
+  bool isPredicated(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+  }
+
+  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+                      : ARMCC::AL;
+  }
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  /// GetInstSize - Returns the size of the specified MachineInstr.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                              const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                              const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const;
+
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+                                          bool isDead = false) {
+  return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+  return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+  return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+  return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+    Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                             unsigned DestReg, unsigned BaseReg, int NumBytes,
+                             ARMCC::CondCodes Pred, unsigned PredReg,
+                             const ARMBaseInstrInfo &TII);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                            unsigned DestReg, unsigned BaseReg, int NumBytes,
+                            ARMCC::CondCodes Pred, unsigned PredReg,
+                            const ARMBaseInstrInfo &TII);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                          unsigned FrameReg, int &Offset,
+                          const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                         unsigned FrameReg, int &Offset,
+                         const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 000000000000..42ef183e5261
--- /dev/null
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,1360 @@
+//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
+                                                   bool *isSPVFP) {
+  if (isSPVFP)
+    *isSPVFP = false;
+
+  using namespace ARM;
+  switch (RegEnum) {
+  default:
+    llvm_unreachable("Unknown ARM register!");
+  case R0:  case D0:  case Q0:  return 0;
+  case R1:  case D1:  case Q1:  return 1;
+  case R2:  case D2:  case Q2:  return 2;
+  case R3:  case D3:  case Q3:  return 3;
+  case R4:  case D4:  case Q4:  return 4;
+  case R5:  case D5:  case Q5:  return 5;
+  case R6:  case D6:  case Q6:  return 6;
+  case R7:  case D7:  case Q7:  return 7;
+  case R8:  case D8:  case Q8:  return 8;
+  case R9:  case D9:  case Q9:  return 9;
+  case R10: case D10: case Q10: return 10;
+  case R11: case D11: case Q11: return 11;
+  case R12: case D12: case Q12: return 12;
+  case SP:  case D13: case Q13: return 13;
+  case LR:  case D14: case Q14: return 14;
+  case PC:  case D15: case Q15: return 15;
+
+  case D16: return 16;
+  case D17: return 17;
+  case D18: return 18;
+  case D19: return 19;
+  case D20: return 20;
+  case D21: return 21;
+  case D22: return 22;
+  case D23: return 23;
+  case D24: return 24;
+  case D25: return 25;
+  case D26: return 27;
+  case D27: return 27;
+  case D28: return 28;
+  case D29: return 29;
+  case D30: return 30;
+  case D31: return 31;
+
+  case S0: case S1: case S2: case S3:
+  case S4: case S5: case S6: case S7:
+  case S8: case S9: case S10: case S11:
+  case S12: case S13: case S14: case S15:
+  case S16: case S17: case S18: case S19:
+  case S20: case S21: case S22: case S23:
+  case S24: case S25: case S26: case S27:
+  case S28: case S29: case S30: case S31: {
+    if (isSPVFP)
+      *isSPVFP = true;
+    switch (RegEnum) {
+    default: return 0; // Avoid compile time warning.
+    case S0: return 0;
+    case S1: return 1;
+    case S2: return 2;
+    case S3: return 3;
+    case S4: return 4;
+    case S5: return 5;
+    case S6: return 6;
+    case S7: return 7;
+    case S8: return 8;
+    case S9: return 9;
+    case S10: return 10;
+    case S11: return 11;
+    case S12: return 12;
+    case S13: return 13;
+    case S14: return 14;
+    case S15: return 15;
+    case S16: return 16;
+    case S17: return 17;
+    case S18: return 18;
+    case S19: return 19;
+    case S20: return 20;
+    case S21: return 21;
+    case S22: return 22;
+    case S23: return 23;
+    case S24: return 24;
+    case S25: return 25;
+    case S26: return 26;
+    case S27: return 27;
+    case S28: return 28;
+    case S29: return 29;
+    case S30: return 30;
+    case S31: return 31;
+    }
+  }
+  }
+}
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+                                         const ARMSubtarget &sti)
+  : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+    TII(tii), STI(sti),
+    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+}
+
+const unsigned*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+
+  static const unsigned DarwinCalleeSavedRegs[] = {
+    // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
+    // register.
+    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
+    ARM::R11, ARM::R10, ARM::R8,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+  return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const *
+ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
+  };
+
+  static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
+    &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
+  };
+
+  static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = {
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
+  };
+
+  static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={
+    &ARM::GPRRegClass,  &ARM::tGPRRegClass, &ARM::tGPRRegClass,
+    &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass,  &ARM::GPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
+  };
+
+  if (STI.isThumb1Only()) {
+    return STI.isTargetDarwin()
+      ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses;
+  }
+  return STI.isTargetDarwin()
+    ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
+}
+
+BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  // FIXME: avoid re-calculating this everytime.
+  BitVector Reserved(getNumRegs());
+  Reserved.set(ARM::SP);
+  Reserved.set(ARM::PC);
+  if (STI.isTargetDarwin() || hasFP(MF))
+    Reserved.set(FramePtr);
+  // Some targets reserve R9.
+  if (STI.isR9Reserved())
+    Reserved.set(ARM::R9);
+  return Reserved;
+}
+
+bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
+                                        unsigned Reg) const {
+  switch (Reg) {
+  default: break;
+  case ARM::SP:
+  case ARM::PC:
+    return true;
+  case ARM::R7:
+  case ARM::R11:
+    if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+      return true;
+    break;
+  case ARM::R9:
+    return STI.isR9Reserved();
+  }
+
+  return false;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return ARM::GPRRegisterClass;
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                        unsigned HintType, unsigned HintReg,
+                                        const MachineFunction &MF) const {
+  // Alternative register allocation orders when favoring even / odd registers
+  // of register pairs.
+
+  // No FP, R9 is available.
+  static const unsigned GPREven1[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd1[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R7, R9 is available.
+  static const unsigned GPREven2[] = {
+    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd2[] = {
+    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R11, R9 is available.
+  static const unsigned GPREven3[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9
+  };
+  static const unsigned GPROdd3[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+    ARM::R8
+  };
+
+  // No FP, R9 is not available.
+  static const unsigned GPREven4[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd4[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R7, R9 is not available.
+  static const unsigned GPREven5[] = {
+    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd5[] = {
+    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R11, R9 is not available.
+  static const unsigned GPREven6[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+  };
+  static const unsigned GPROdd6[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+  };
+
+
+  if (HintType == ARMRI::RegPairEven) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven1,
+                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven4,
+                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven2,
+                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven5,
+                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven3,
+                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven6,
+                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+    }
+  } else if (HintType == ARMRI::RegPairOdd) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd1,
+                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd4,
+                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd2,
+                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd5,
+                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd3,
+                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd6,
+                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+    }
+  }
+  return std::make_pair(RC->allocation_order_begin(MF),
+                        RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                         const MachineFunction &MF) const {
+  if (Reg == 0 || !isPhysicalRegister(Reg))
+    return 0;
+  if (Type == 0)
+    return Reg;
+  else if (Type == (unsigned)ARMRI::RegPairOdd)
+    // Odd register.
+    return getRegisterPairOdd(Reg, MF);
+  else if (Type == (unsigned)ARMRI::RegPairEven)
+    // Even register.
+    return getRegisterPairEven(Reg, MF);
+  return 0;
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                        MachineFunction &MF) const {
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+       Hint.first == (unsigned)ARMRI::RegPairEven) &&
+      Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+    // If 'Reg' is one of the even / odd register pair and it's now changed
+    // (e.g. coalesced) into a different register. The other register of the
+    // pair allocation hint must be updated to reflect the relationship
+    // change.
+    unsigned OtherReg = Hint.second;
+    Hint = MRI->getRegAllocationHint(OtherReg);
+    if (Hint.second == Reg)
+      // Make sure the pair has not already divorced.
+      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+  }
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return (NoFramePointerElim ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  if (NoFramePointerElim && MFI->hasCalls())
+    return true;
+  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  int Offset = 0;
+  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -FFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+    if (FFI->isDeadObjectIndex(i))
+      continue;
+    Offset += FFI->getObjectSize(i);
+    unsigned Align = FFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+  }
+  return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require scratch register during their expansion later.
+unsigned
+ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+  unsigned Limit = (1 << 12) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        const TargetInstrDesc &Desc = TII.get(I->getOpcode());
+        unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+        if (AddrMode == ARMII::AddrMode3 ||
+            AddrMode == ARMII::AddrModeT2_i8)
+          return (1 << 8) - 1;
+
+        if (AddrMode == ARMII::AddrMode5 ||
+            AddrMode == ARMII::AddrModeT2_i8s4)
+          Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+
+        if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF))
+          // When the stack offset is negative, we will end up using
+          // the i8 instructions instead.
+          return (1 << 8) - 1;
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+
+void
+ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                          RegScavenger *RS) const {
+  // This tells PEI to spill the FP as if it is any other callee-save register
+  // to take advantage the eliminateFrameIndex machinery. This also ensures it
+  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+  // to combine multiple loads / stores.
+  bool CanEliminateFrame = true;
+  bool CS1Spilled = false;
+  bool LRSpilled = false;
+  unsigned NumGPRSpills = 0;
+  SmallVector<unsigned, 4> UnspilledCS1GPRs;
+  SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Don't spill FP if the frame can be eliminated. This is determined
+  // by scanning the callee-save registers to see if any is used.
+  const unsigned *CSRegs = getCalleeSavedRegs();
+  const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    bool Spilled = false;
+    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+      AFI->setCSRegisterIsSpilled(Reg);
+      Spilled = true;
+      CanEliminateFrame = false;
+    } else {
+      // Check alias registers too.
+      for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+          Spilled = true;
+          CanEliminateFrame = false;
+        }
+      }
+    }
+
+    if (CSRegClasses[i] == ARM::GPRRegisterClass ||
+        CSRegClasses[i] == ARM::tGPRRegisterClass) {
+      if (Spilled) {
+        NumGPRSpills++;
+
+        if (!STI.isTargetDarwin()) {
+          if (Reg == ARM::LR)
+            LRSpilled = true;
+          CS1Spilled = true;
+          continue;
+        }
+
+        // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+        switch (Reg) {
+        case ARM::LR:
+          LRSpilled = true;
+          // Fallthrough
+        case ARM::R4:
+        case ARM::R5:
+        case ARM::R6:
+        case ARM::R7:
+          CS1Spilled = true;
+          break;
+        default:
+          break;
+        }
+      } else {
+        if (!STI.isTargetDarwin()) {
+          UnspilledCS1GPRs.push_back(Reg);
+          continue;
+        }
+
+        switch (Reg) {
+        case ARM::R4:
+        case ARM::R5:
+        case ARM::R6:
+        case ARM::R7:
+        case ARM::LR:
+          UnspilledCS1GPRs.push_back(Reg);
+          break;
+        default:
+          UnspilledCS2GPRs.push_back(Reg);
+          break;
+        }
+      }
+    }
+  }
+
+  bool ForceLRSpill = false;
+  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+    unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+    // Force LR to be spilled if the Thumb function size is > 2048. This enables
+    // use of BL to implement far jump. If it turns out that it's not needed
+    // then the branch fix up path will undo it.
+    if (FnSize >= (1 << 11)) {
+      CanEliminateFrame = false;
+      ForceLRSpill = true;
+    }
+  }
+
+  bool ExtraCSSpill = false;
+  if (!CanEliminateFrame || cannotEliminateFrame(MF)) {
+    AFI->setHasStackFrame(true);
+
+    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+    if (!LRSpilled && CS1Spilled) {
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+      AFI->setCSRegisterIsSpilled(ARM::LR);
+      NumGPRSpills++;
+      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+      ForceLRSpill = false;
+      ExtraCSSpill = true;
+    }
+
+    // Darwin ABI requires FP to point to the stack slot that contains the
+    // previous FP.
+    if (STI.isTargetDarwin() || hasFP(MF)) {
+      MF.getRegInfo().setPhysRegUsed(FramePtr);
+      NumGPRSpills++;
+    }
+
+    // If stack and double are 8-byte aligned and we are spilling an odd number
+    // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+    // the integer and double callee save areas.
+    unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+          unsigned Reg = UnspilledCS1GPRs[i];
+          // Don't spill high register if the function is thumb1
+          if (!AFI->isThumb1OnlyFunction() ||
+              isARMLowRegister(Reg) || Reg == ARM::LR) {
+            MF.getRegInfo().setPhysRegUsed(Reg);
+            AFI->setCSRegisterIsSpilled(Reg);
+            if (!isReservedReg(MF, Reg))
+              ExtraCSSpill = true;
+            break;
+          }
+        }
+      } else if (!UnspilledCS2GPRs.empty() &&
+                 !AFI->isThumb1OnlyFunction()) {
+        unsigned Reg = UnspilledCS2GPRs.front();
+        MF.getRegInfo().setPhysRegUsed(Reg);
+        AFI->setCSRegisterIsSpilled(Reg);
+        if (!isReservedReg(MF, Reg))
+          ExtraCSSpill = true;
+      }
+    }
+
+    // Estimate if we might need to scavenge a register at some point in order
+    // to materialize a stack offset. If so, either spill one additional
+    // callee-saved register or reserve a special spill slot to facilitate
+    // register scavenging. Thumb1 needs a spill slot for stack pointer
+    // adjustments also, even when the frame itself is small.
+    if (RS && !ExtraCSSpill) {
+      MachineFrameInfo  *MFI = MF.getFrameInfo();
+      // If any of the stack slot references may be out of range of an
+      // immediate offset, make sure a register (or a spill slot) is
+      // available for the register scavenger. Note that if we're indexing
+      // off the frame pointer, the effective stack size is 4 bytes larger
+      // since the FP points to the stack slot of the previous FP.
+      if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0)
+          >= estimateRSStackSizeLimit(MF)) {
+        // If any non-reserved CS register isn't spilled, just spill one or two
+        // extra. That should take care of it!
+        unsigned NumExtras = TargetAlign / 4;
+        SmallVector<unsigned, 2> Extras;
+        while (NumExtras && !UnspilledCS1GPRs.empty()) {
+          unsigned Reg = UnspilledCS1GPRs.back();
+          UnspilledCS1GPRs.pop_back();
+          if (!isReservedReg(MF, Reg)) {
+            Extras.push_back(Reg);
+            NumExtras--;
+          }
+        }
+        // For non-Thumb1 functions, also check for hi-reg CS registers
+        if (!AFI->isThumb1OnlyFunction()) {
+          while (NumExtras && !UnspilledCS2GPRs.empty()) {
+            unsigned Reg = UnspilledCS2GPRs.back();
+            UnspilledCS2GPRs.pop_back();
+            if (!isReservedReg(MF, Reg)) {
+              Extras.push_back(Reg);
+              NumExtras--;
+            }
+          }
+        }
+        if (Extras.size() && NumExtras == 0) {
+          for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+            MF.getRegInfo().setPhysRegUsed(Extras[i]);
+            AFI->setCSRegisterIsSpilled(Extras[i]);
+          }
+        } else if (!AFI->isThumb1OnlyFunction()) {
+          // note: Thumb1 functions spill to R12, not the stack.
+          // Reserve a slot closest to SP or frame pointer.
+          const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+          RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                           RC->getAlignment()));
+        }
+      }
+    }
+  }
+
+  if (ForceLRSpill) {
+    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+    AFI->setCSRegisterIsSpilled(ARM::LR);
+    AFI->setLRIsSpilledForFarJump(true);
+  }
+}
+
+unsigned ARMBaseRegisterInfo::getRARegister() const {
+  return ARM::LR;
+}
+
+unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+  if (STI.isTargetDarwin() || hasFP(MF))
+    return FramePtr;
+  return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                               const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R1:
+    return ARM::R0;
+  case ARM::R3:
+    // FIXME!
+    return STI.isThumb1Only() ? 0 : ARM::R2;
+  case ARM::R5:
+    return ARM::R4;
+  case ARM::R7:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R6;
+  case ARM::R9:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
+  case ARM::R11:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+  case ARM::S1:
+    return ARM::S0;
+  case ARM::S3:
+    return ARM::S2;
+  case ARM::S5:
+    return ARM::S4;
+  case ARM::S7:
+    return ARM::S6;
+  case ARM::S9:
+    return ARM::S8;
+  case ARM::S11:
+    return ARM::S10;
+  case ARM::S13:
+    return ARM::S12;
+  case ARM::S15:
+    return ARM::S14;
+  case ARM::S17:
+    return ARM::S16;
+  case ARM::S19:
+    return ARM::S18;
+  case ARM::S21:
+    return ARM::S20;
+  case ARM::S23:
+    return ARM::S22;
+  case ARM::S25:
+    return ARM::S24;
+  case ARM::S27:
+    return ARM::S26;
+  case ARM::S29:
+    return ARM::S28;
+  case ARM::S31:
+    return ARM::S30;
+
+  case ARM::D1:
+    return ARM::D0;
+  case ARM::D3:
+    return ARM::D2;
+  case ARM::D5:
+    return ARM::D4;
+  case ARM::D7:
+    return ARM::D6;
+  case ARM::D9:
+    return ARM::D8;
+  case ARM::D11:
+    return ARM::D10;
+  case ARM::D13:
+    return ARM::D12;
+  case ARM::D15:
+    return ARM::D14;
+  case ARM::D17:
+    return ARM::D16;
+  case ARM::D19:
+    return ARM::D18;
+  case ARM::D21:
+    return ARM::D20;
+  case ARM::D23:
+    return ARM::D22;
+  case ARM::D25:
+    return ARM::D24;
+  case ARM::D27:
+    return ARM::D26;
+  case ARM::D29:
+    return ARM::D28;
+  case ARM::D31:
+    return ARM::D30;
+  }
+
+  return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
+                                             const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R0:
+    return ARM::R1;
+  case ARM::R2:
+    // FIXME!
+    return STI.isThumb1Only() ? 0 : ARM::R3;
+  case ARM::R4:
+    return ARM::R5;
+  case ARM::R6:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R7;
+  case ARM::R8:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
+  case ARM::R10:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+  case ARM::S0:
+    return ARM::S1;
+  case ARM::S2:
+    return ARM::S3;
+  case ARM::S4:
+    return ARM::S5;
+  case ARM::S6:
+    return ARM::S7;
+  case ARM::S8:
+    return ARM::S9;
+  case ARM::S10:
+    return ARM::S11;
+  case ARM::S12:
+    return ARM::S13;
+  case ARM::S14:
+    return ARM::S15;
+  case ARM::S16:
+    return ARM::S17;
+  case ARM::S18:
+    return ARM::S19;
+  case ARM::S20:
+    return ARM::S21;
+  case ARM::S22:
+    return ARM::S23;
+  case ARM::S24:
+    return ARM::S25;
+  case ARM::S26:
+    return ARM::S27;
+  case ARM::S28:
+    return ARM::S29;
+  case ARM::S30:
+    return ARM::S31;
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D14:
+    return ARM::D15;
+  case ARM::D16:
+    return ARM::D17;
+  case ARM::D18:
+    return ARM::D19;
+  case ARM::D20:
+    return ARM::D21;
+  case ARM::D22:
+    return ARM::D23;
+  case ARM::D24:
+    return ARM::D25;
+  case ARM::D26:
+    return ARM::D27;
+  case ARM::D28:
+    return ARM::D29;
+  case ARM::D30:
+    return ARM::D31;
+  }
+
+  return 0;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator &MBBI,
+                  DebugLoc dl,
+                  unsigned DestReg, unsigned SubIdx, int Val,
+                  ARMCC::CondCodes Pred,
+                  unsigned PredReg) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  Constant *C =
+        ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx)
+    .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+// not required, we reserve argument space for call sites in the function
+// immediately on entry to the current function. This eliminates the need for
+// add/sub sp brackets around call sites. Returns true if the call frame is
+// included as part of the stack frame.
+bool ARMBaseRegisterInfo::
+hasReservedCallFrame(MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(bool isARM,
+             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             DebugLoc dl, const ARMBaseInstrInfo &TII,
+             int NumBytes,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+  if (isARM)
+    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                            Pred, PredReg, TII);
+  else
+    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                           Pred, PredReg, TII);
+}
+
+
+void ARMBaseRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      assert(!AFI->isThumb1OnlyFunction() &&
+             "This eliminateCallFramePseudoInstr does not suppor Thumb1!");
+      bool isARM = !AFI->isThumbFunction();
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
+      // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN?
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+        unsigned PredReg = Old->getOperand(2).getReg();
+        emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
+      } else {
+        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+        unsigned PredReg = Old->getOperand(3).getReg();
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' ARM register. If register scavenger
+/// is not being used, R12 is available. Otherwise, try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
+                             ARMFunctionInfo *AFI) {
+  unsigned Reg = RS ? RS->FindUnusedReg(RC) : (unsigned) ARM::R12;
+  assert(!AFI->isThumb1OnlyFunction());
+  return Reg;
+}
+
+unsigned
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                         int SPAdj, int *Value,
+                                         RegScavenger *RS) const {
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This eliminateFrameIndex does not support Thumb1!");
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+    Offset -= AFI->getDPRCalleeSavedAreaOffset();
+  else if (hasFP(MF) && AFI->hasStackFrame()) {
+    assert(SPAdj == 0 && "Unexpected stack offset!");
+    // Use frame pointer to reference fixed objects unless this is a
+    // frameless function,
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
+
+  // modify MI as necessary to handle as much of 'Offset' as possible
+  bool Done = false;
+  if (!AFI->isThumbFunction())
+    Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+  else {
+    assert(AFI->isThumb2Function());
+    Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+  }
+  if (Done)
+    return 0;
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert((Offset ||
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) &&
+         "This code isn't needed if offset already handled!");
+
+  // Insert a set of r12 with the full address: r12 = sp + offset
+  // If the offset we have is too large to fit into the instruction, we need
+  // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
+  // out of 'Offset'.
+  unsigned ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI);
+  if (ScratchReg == 0)
+    // No register is "free". Scavenge a register.
+    ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj);
+  int PIdx = MI.findFirstPredOperandIdx();
+  ARMCC::CondCodes Pred = (PIdx == -1)
+    ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+  if (Offset == 0)
+    // Must be addrmode4.
+    MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+  else {
+    if (!AFI->isThumbFunction())
+      emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+                              Offset, Pred, PredReg, TII);
+    else {
+      assert(AFI->isThumb2Function());
+      emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+                             Offset, Pred, PredReg, TII);
+    }
+    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+  }
+  return 0;
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator &MBBI,
+                                   int Opc1, int Opc2, unsigned Area,
+                                   const ARMSubtarget &STI) {
+  while (MBBI != MBB.end() &&
+         ((MBBI->getOpcode() == Opc1) || (MBBI->getOpcode() == Opc2)) &&
+         MBBI->getOperand(1).isFI()) {
+    if (Area != 0) {
+      bool Done = false;
+      unsigned Category = 0;
+      switch (MBBI->getOperand(0).getReg()) {
+      case ARM::R4:  case ARM::R5:  case ARM::R6: case ARM::R7:
+      case ARM::LR:
+        Category = 1;
+        break;
+      case ARM::R8:  case ARM::R9:  case ARM::R10: case ARM::R11:
+        Category = STI.isTargetDarwin() ? 2 : 1;
+        break;
+      case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
+      case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+        Category = 3;
+        break;
+      default:
+        Done = true;
+        break;
+      }
+      if (Done || Category != Area)
+        break;
+    }
+
+    ++MBBI;
+  }
+}
+
+void ARMBaseRegisterInfo::
+emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitPrologue does not suppor Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  // Allocate the vararg register save area. This is not counted in NumBytes.
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS1Size);
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 1, STI);
+
+  // Set FP to point to the stack slot that contains the previous FP.
+  // For Darwin, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not Darwin, all the callee-saved registers go
+  // into spill area 1, including the FP in R11.  In either case, it is
+  // now safe to emit this assignment.
+  if (STI.isTargetDarwin() || hasFP(MF)) {
+    unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+    AddDefaultCC(AddDefaultPred(MIB));
+  }
+
+  // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS2Size);
+
+  // Build the new SUBri to adjust SP for FP callee-save spill area.
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 2, STI);
+  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRCSSize);
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Insert it after all the callee-save spills.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF)) {
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+  }
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+                        const ARMBaseInstrInfo &TII,
+                        const unsigned *CSRegs) {
+  return ((MI->getOpcode() == (int)ARM::FLDD ||
+           MI->getOpcode() == (int)ARM::LDR ||
+           MI->getOpcode() == (int)ARM::t2LDRi12) &&
+          MI->getOperand(1).isFI() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void ARMBaseRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  assert(MBBI->getDesc().isReturn() &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitEpilogue does not suppor Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+      if (!isCSRestore(MBBI, TII, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    // Darwin ABI requires FP to point to the stack slot that contains the
+    // previous FP.
+    bool HasFP = hasFP(MF);
+    if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (HasFP ||
+          AFI->getGPRCalleeSavedArea2Size() ||
+          AFI->getDPRCalleeSavedAreaSize()  ||
+          AFI->getDPRCalleeSavedAreaOffset()) {
+        if (NumBytes) {
+          if (isARM)
+            emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+                                    ARMCC::AL, 0, TII);
+          else
+            emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+                                    ARMCC::AL, 0, TII);
+        } else {
+          // Thumb2 or ARM.
+          if (isARM)
+            BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+              .addReg(FramePtr)
+              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+          else
+            BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+              .addReg(FramePtr);
+        }
+      }
+    } else if (NumBytes)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+    // Move SP to start of integer callee save spill area 2.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize());
+
+    // Move SP to start of integer callee save spill area 1.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 2, STI);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea2Size());
+
+    // Move SP to SP upon entry to the function.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 1, STI);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size());
+  }
+
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 000000000000..da703fbc8c19
--- /dev/null
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,148 @@
+//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+  enum {
+    RegPairOdd  = 1,
+    RegPairEven = 2
+  };
+}
+
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+  const ARMBaseInstrInfo &TII;
+  const ARMSubtarget &STI;
+
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+  // Can be only subclassed.
+  explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+                               const ARMSubtarget &STI);
+
+  // Return the opcode that implements 'Op', or 0 if no opcode
+  unsigned getOpcode(int Op) const;
+
+public:
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// ARM::LR, return the number that it corresponds to (e.g. 14). It
+  /// also returns true in isSPVFP if the register is a single precision
+  /// VFP register.
+  static unsigned getRegisterNumbering(unsigned RegEnum, bool *isSPVFP = 0);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  const TargetRegisterClass* const*
+  getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const;
+
+  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                               const MachineFunction &MF) const;
+
+  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                          MachineFunction &MF) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+  bool isLowRegister(unsigned Reg) const;
+
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator &MBBI,
+                                 DebugLoc dl,
+                                 unsigned DestReg, unsigned SubIdx,
+                                 int Val,
+                                 ARMCC::CondCodes Pred = ARMCC::AL,
+                                 unsigned PredReg = 0) const;
+
+  /// Code Generation virtual methods...
+  virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  virtual bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                             MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator I) const;
+
+  virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, int *Value = NULL,
+                                       RegScavenger *RS = NULL) const;
+
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+private:
+  unsigned estimateRSStackSizeLimit(MachineFunction &MF) const;
+
+  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 8a4c741faf95..716163958d9c 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -111,6 +111,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
   CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
                                  S9, S10, S11, S12, S13, S14, S15]>>,
@@ -122,6 +123,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
   CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
                                  S9, S10, S11, S12, S13, S14, S15]>>,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f29576148b32..6f1c624cbf52 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -26,14 +26,18 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #ifndef NDEBUG
 #include <iomanip>
 #endif
@@ -57,12 +61,18 @@ namespace {
     ARMJITInfo                *JTI;
     const ARMInstrInfo        *II;
     const TargetData          *TD;
+    const ARMSubtarget        *Subtarget;
     TargetMachine             &TM;
     CodeEmitter               &MCE;
     const std::vector<MachineConstantPoolEntry> *MCPEs;
     const std::vector<MachineJumpTableEntry> *MJTEs;
     bool IsPIC;
 
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
   public:
     static char ID;
     explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
@@ -160,7 +170,7 @@ namespace {
     /// Routines that handle operands which add machine relocations which are
     /// fixed up by the relocation stage.
     void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
-                           bool NeedStub, intptr_t ACPV = 0);
+                           bool NeedStub,  bool Indirect, intptr_t ACPV = 0);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
     void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
     void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
@@ -174,36 +184,39 @@ namespace {
 /// createARMCodeEmitterPass - Return a pass that emits the collected ARM code
 /// to the specified MCE object.
 
-namespace llvm {
-
-FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                       MachineCodeEmitter &MCE) {
+FunctionPass *llvm::createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                             MachineCodeEmitter &MCE) {
   return new Emitter<MachineCodeEmitter>(TM, MCE);
 }
-FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                          JITCodeEmitter &JCE) {
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
-
-} // end namespace llvm
+FunctionPass *llvm::createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                                   ObjectCodeEmitter &OCE) {
+  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+}
 
 template<class CodeEmitter>
 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
+  JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
   II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
   TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
-  JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = &MF.getJumpTableInfo()->getJumpTables();
   IsPIC = TM.getRelocationModel() == Reloc::PIC_;
   JTI->Initialize(MF, IsPIC);
+  MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
 
   do {
-    DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+    DEBUG(errs() << "JITTing function '"
+          << MF.getFunction()->getName() << "'\n");
     MCE.startFunction(MF);
-    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
+    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
          MBB != E; ++MBB) {
       MCE.StartMachineBasicBlock(MBB);
       for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
@@ -220,7 +233,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
 template<class CodeEmitter>
 unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const {
   switch (ARM_AM::getAM2ShiftOpc(Imm)) {
-  default: assert(0 && "Unknown shift opc!");
+  default: llvm_unreachable("Unknown shift opc!");
   case ARM_AM::asr: return 2;
   case ARM_AM::lsl: return 0;
   case ARM_AM::lsr: return 1;
@@ -240,7 +253,7 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
   else if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
   else if (MO.isGlobal())
-    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true);
+    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
   else if (MO.isSymbol())
     emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
   else if (MO.isCPI()) {
@@ -254,8 +267,10 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
   else if (MO.isMBB())
     emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
   else {
-    cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable(0);
   }
   return 0;
 }
@@ -264,9 +279,14 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
 ///
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
-                                             bool NeedStub, intptr_t ACPV) {
-  MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                                             GV, ACPV, NeedStub));
+                                             bool NeedStub, bool Indirect,
+                                             intptr_t ACPV) {
+  MachineRelocation MR = Indirect
+    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+                                           GV, ACPV, NeedStub)
+    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                               GV, ACPV, NeedStub);
+  MCE.addRelocation(MR);
 }
 
 /// emitExternalSymbolAddress - Arrange for the address of an external symbol to
@@ -294,7 +314,7 @@ void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI,
 /// be emitted to the current location in the function, and allow it to be PC
 /// relative.
 template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex, 
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex,
                                                 unsigned Reloc) {
   MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
                                                     Reloc, JTIndex, 0, true));
@@ -310,32 +330,28 @@ void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB,
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) {
-#ifndef NDEBUG
-  DOUT << "  0x" << std::hex << std::setw(8) << std::setfill('0')
-       << Binary << std::dec << "\n";
-#endif
+  DEBUG(errs() << "  0x";
+        errs().write_hex(Binary) << "\n");
   MCE.emitWordLE(Binary);
 }
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) {
-#ifndef NDEBUG
-  DOUT << "  0x" << std::hex << std::setw(8) << std::setfill('0')
-       << (unsigned)Binary << std::dec << "\n";
-  DOUT << "  0x" << std::hex << std::setw(8) << std::setfill('0')
-       << (unsigned)(Binary >> 32) << std::dec << "\n";
-#endif
+  DEBUG(errs() << "  0x";
+        errs().write_hex(Binary) << "\n");
   MCE.emitDWordLE(Binary);
 }
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
-  DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI;
+  DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+  MCE.processDebugLoc(MI.getDebugLoc(), true);
 
   NumEmitted++;  // Keep track of the # of mi's emitted
   switch (MI.getDesc().TSFlags & ARMII::FormMask) {
   default: {
-    assert(0 && "Unhandled instruction encoding format!");
+    llvm_unreachable("Unhandled instruction encoding format!");
     break;
   }
   case ARMII::Pseudo:
@@ -393,6 +409,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
     emitMiscInstruction(MI);
     break;
   }
+  MCE.processDebugLoc(MI.getDebugLoc(), false);
 }
 
 template<class CodeEmitter>
@@ -400,7 +417,7 @@ void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
   unsigned CPI = MI.getOperand(0).getImm();       // CP instruction index.
   unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
   const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
-  
+
   // Remember the CONSTPOOL_ENTRY address for later relocation.
   JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
 
@@ -410,55 +427,49 @@ void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
     ARMConstantPoolValue *ACPV =
       static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
 
-    DOUT << "  ** ARM constant pool #" << CPI << " @ "
-         << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n';
+    DEBUG(errs() << "  ** ARM constant pool #" << CPI << " @ "
+          << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
 
     GlobalValue *GV = ACPV->getGV();
     if (GV) {
-      assert(!ACPV->isStub() && "Don't know how to deal this yet!");
-      if (ACPV->isNonLazyPointer())
-        MCE.addRelocation(MachineRelocation::getIndirectSymbol(
-                  MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV,
-                  (intptr_t)ACPV, false));
-      else 
-        emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
-                          ACPV->isStub() || isa<Function>(GV), (intptr_t)ACPV);
+      Reloc::Model RelocM = TM.getRelocationModel();
+      emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+                        isa<Function>(GV),
+                        Subtarget->GVIsIndirectSymbol(GV, RelocM),
+                        (intptr_t)ACPV);
      } else  {
-      assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!");
       emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
     }
     emitWordLE(0);
   } else {
     Constant *CV = MCPE.Val.ConstVal;
 
-#ifndef NDEBUG
-    DOUT << "  ** Constant pool #" << CPI << " @ "
-         << (void*)MCE.getCurrentPCValue() << " ";
-    if (const Function *F = dyn_cast<Function>(CV))
-      DOUT << F->getName();
-    else
-      DOUT << *CV;
-    DOUT << '\n';
-#endif
+    DEBUG({
+        errs() << "  ** Constant pool #" << CPI << " @ "
+               << (void*)MCE.getCurrentPCValue() << " ";
+        if (const Function *F = dyn_cast<Function>(CV))
+          errs() << F->getName();
+        else
+          errs() << *CV;
+        errs() << '\n';
+      });
 
     if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
-      emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV));
+      emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
       emitWordLE(0);
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
       uint32_t Val = *(uint32_t*)CI->getValue().getRawData();
       emitWordLE(Val);
     } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-      if (CFP->getType() == Type::FloatTy)
+      if (CFP->getType()->isFloatTy())
         emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
-      else if (CFP->getType() == Type::DoubleTy)
+      else if (CFP->getType()->isDoubleTy())
         emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
       else {
-        assert(0 && "Unable to handle this constantpool entry!");
-        abort();
+        llvm_unreachable("Unable to handle this constantpool entry!");
       }
     } else {
-      assert(0 && "Unable to handle this constantpool entry!");
-      abort();
+      llvm_unreachable("Unable to handle this constantpool entry!");
     }
   }
 }
@@ -467,7 +478,8 @@ template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   const MachineOperand &MO0 = MI.getOperand(0);
   const MachineOperand &MO1 = MI.getOperand(1);
-  assert(MO1.isImm() && "Not a valid so_imm value!");
+  assert(MO1.isImm() && ARM_AM::getSOImmVal(MO1.isImm()) != -1 &&
+                                            "Not a valid so_imm value!");
   unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
   unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
 
@@ -483,7 +495,7 @@ void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   // Encode so_imm.
   // Set bit I(25) to identify this is the immediate form of <shifter_op>
   Binary |= 1 << ARMII::I_BitShift;
-  Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1));
+  Binary |= getMachineSoImmOpValue(V1);
   emitWordLE(Binary);
 
   // Now the 'orr' instruction.
@@ -501,14 +513,14 @@ void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   // Encode so_imm.
   // Set bit I(25) to identify this is the immediate form of <shifter_op>
   Binary |= 1 << ARMII::I_BitShift;
-  Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2));
+  Binary |= getMachineSoImmOpValue(V2);
   emitWordLE(Binary);
 }
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   // It's basically add r, pc, (LJTI - $+8)
-  
+
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Emit the 'add' instruction.
@@ -527,7 +539,6 @@ void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
 
   // Encode the displacement.
-  // Set bit I(25) to identify this is the immediate form of <shifter_op>.
   Binary |= 1 << ARMII::I_BitShift;
   emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
 
@@ -576,8 +587,8 @@ void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) {
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) {
-  DOUT << "  ** LPC" << LabelID << " @ "
-       << (void*)MCE.getCurrentPCValue() << '\n';
+  DEBUG(errs() << "  ** LPC" << LabelID << " @ "
+        << (void*)MCE.getCurrentPCValue() << '\n');
   JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
 }
 
@@ -586,13 +597,13 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
   unsigned Opcode = MI.getDesc().Opcode;
   switch (Opcode) {
   default:
-    abort(); // FIXME:
+    llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+  // FIXME: Add support for MOVimm32.
   case TargetInstrInfo::INLINEASM: {
     // We allow inline assembler nodes with empty bodies - they can
     // implicitly define registers, which is ok for JIT.
     if (MI.getOperand(0).getSymbolName()[0]) {
-      assert(0 && "JIT does not support inline asm!\n");
-      abort();
+      llvm_report_error("JIT does not support inline asm!");
     }
     break;
   }
@@ -601,7 +612,7 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
     MCE.emitLabel(MI.getOperand(0).getImm());
     break;
   case TargetInstrInfo::IMPLICIT_DEF:
-  case TargetInstrInfo::DECLARE:
+  case TargetInstrInfo::KILL:
   case ARM::DWARF_LOC:
     // Do nothing.
     break;
@@ -674,7 +685,7 @@ unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
     // ROR - 0111
     // RRX - 0110 and bit[11:8] clear.
     switch (SOpc) {
-    default: assert(0 && "Unknown shift opc!");
+    default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::lsl: SBits = 0x1; break;
     case ARM_AM::lsr: SBits = 0x3; break;
     case ARM_AM::asr: SBits = 0x5; break;
@@ -688,7 +699,7 @@ unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
     // ASR - 100
     // ROR - 110
     switch (SOpc) {
-    default: assert(0 && "Unknown shift opc!");
+    default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::lsl: SBits = 0x0; break;
     case ARM_AM::lsr: SBits = 0x2; break;
     case ARM_AM::asr: SBits = 0x4; break;
@@ -713,12 +724,15 @@ unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
 
 template<class CodeEmitter>
 unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) {
+  int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+  assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
   // Encode rotate_imm.
-  unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1)
+  unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
     << ARMII::SoRotImmShift;
 
   // Encode immed_8.
-  Binary |= ARM_AM::getSOImmValImm(SoImm);
+  Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
   return Binary;
 }
 
@@ -740,6 +754,10 @@ void Emitter<CodeEmitter>::emitDataProcessingInstruction(
                                                    unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
 
+  if (TID.Opcode == ARM::BFC) {
+    llvm_report_error("ARMv6t2 JIT is not yet supported.");
+  }
+
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
@@ -791,9 +809,7 @@ void Emitter<CodeEmitter>::emitDataProcessingInstruction(
   }
 
   // Encode so_imm.
-  // Set bit I(25) to identify this is the immediate form of <shifter_op>.
-  Binary |= 1 << ARMII::I_BitShift;
-  Binary |= getMachineSoImmOpValue(MO.getImm());
+  Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
 
   emitWordLE(Binary);
 }
@@ -952,8 +968,8 @@ static unsigned getAddrModeUPBits(unsigned Mode) {
   // DA - Decrement after  - bit U = 0 and bit P = 0
   // DB - Decrement before - bit U = 0 and bit P = 1
   switch (Mode) {
-  default: assert(0 && "Unknown addressing sub-mode!");
-  case ARM_AM::da:                      break;
+  default: llvm_unreachable("Unknown addressing sub-mode!");
+  case ARM_AM::da:                                     break;
   case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
   case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
   case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
@@ -983,7 +999,7 @@ void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction(
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // Set registers
-  for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) {
+  for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       break;
@@ -1107,7 +1123,7 @@ void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) {
   unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
   assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
   Binary |= ShiftAmt << ARMII::ShiftShift;
-  
+
   emitWordLE(Binary);
 }
 
@@ -1115,8 +1131,9 @@ template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
-  if (TID.Opcode == ARM::TPsoft)
-    abort(); // FIXME
+  if (TID.Opcode == ARM::TPsoft) {
+    llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+  }
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1135,7 +1152,8 @@ void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) {
   // Remember the base address of the inline jump table.
   uintptr_t JTBase = MCE.getCurrentPCValue();
   JTI->addJumpTableBaseAddr(JTIndex, JTBase);
-  DOUT << "  ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n';
+  DEBUG(errs() << "  ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+               << '\n');
 
   // Now emit the jump table entries.
   const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
@@ -1155,17 +1173,17 @@ void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Handle jump tables.
-  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd ||
-      TID.Opcode == ARM::t2BR_JTr || TID.Opcode == ARM::t2BR_JTadd) {
+  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
     // First emit a ldr pc, [] instruction.
     emitDataProcessingInstruction(MI, ARM::PC);
 
     // Then emit the inline jump table.
-    unsigned JTIndex = (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::t2BR_JTr)
+    unsigned JTIndex =
+      (TID.Opcode == ARM::BR_JTr)
       ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
     emitInlineJumpTable(JTIndex);
     return;
-  } else if (TID.Opcode == ARM::BR_JTm || TID.Opcode == ARM::t2BR_JTm) {
+  } else if (TID.Opcode == ARM::BR_JTm) {
     // First emit a ldr pc, [] instruction.
     emitLoadStoreInstruction(MI, ARM::PC);
 
@@ -1183,7 +1201,7 @@ void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
   if (TID.Opcode == ARM::BX_RET)
     // The return register is LR.
     Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
-  else 
+  else
     // otherwise, set the return register
     Binary |= getMachineOpValue(MI, 0);
 
@@ -1194,7 +1212,7 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegD = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
   bool isSPVFP = false;
-  RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP);
+  RegD = ARMRegisterInfo::getRegisterNumbering(RegD, &isSPVFP);
   if (!isSPVFP)
     Binary |=   RegD               << ARMII::RegRdShift;
   else {
@@ -1208,7 +1226,7 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegN = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
   bool isSPVFP = false;
-  RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP);
+  RegN = ARMRegisterInfo::getRegisterNumbering(RegN, &isSPVFP);
   if (!isSPVFP)
     Binary |=   RegN               << ARMII::RegRnShift;
   else {
@@ -1222,7 +1240,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegM = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
   bool isSPVFP = false;
-  RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP);
+  RegM = ARMRegisterInfo::getRegisterNumbering(RegM, &isSPVFP);
   if (!isSPVFP)
     Binary |=   RegM;
   else {
@@ -1268,7 +1286,7 @@ void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) {
 
   // Encode Dm / Sm.
   Binary |= encodeVFPRm(MI, OpIdx);
-  
+
   emitWordLE(Binary);
 }
 
@@ -1386,11 +1404,11 @@ void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction(
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // First register is encoded in Dd.
-  Binary |= encodeVFPRd(MI, 4);
+  Binary |= encodeVFPRd(MI, 5);
 
   // Number of registers are encoded in offset field.
   unsigned NumRegs = 1;
-  for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) {
+  for (unsigned i = 6, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       break;
@@ -1413,4 +1431,3 @@ void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) {
 }
 
 #include "ARMGenCodeEmitter.inc"
-
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 9fedaa465434..309e3ba2ac25 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -15,24 +15,31 @@
 
 #define DEBUG_TYPE "arm-cp-islands"
 #include "ARM.h"
+#include "ARMAddressingModes.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMInstrInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(NumCPEs,     "Number of constpool entries");
-STATISTIC(NumSplit,    "Number of uncond branches inserted");
-STATISTIC(NumCBrFixed, "Number of cond branches fixed");
-STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+STATISTIC(NumCPEs,       "Number of constpool entries");
+STATISTIC(NumSplit,      "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
+STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
+STATISTIC(NumTBs,        "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
 
 namespace {
   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -63,6 +70,8 @@ namespace {
     /// to a return, unreachable, or unconditional branch).
     std::vector<MachineBasicBlock*> WaterList;
 
+    typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
     /// CPUser - One user of a constant pool, keeping the machine instruction
     /// pointer, the constant pool being referenced, and the max displacement
     /// allowed from the instruction to the CP.
@@ -70,8 +79,11 @@ namespace {
       MachineInstr *MI;
       MachineInstr *CPEMI;
       unsigned MaxDisp;
-      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
-        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+      bool NegOk;
+      bool IsSoImm;
+      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+             bool neg, bool soimm)
+        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {}
     };
 
     /// CPUsers - Keep track of all of the machine instructions that use various
@@ -117,29 +129,34 @@ namespace {
     ///
     SmallVector<MachineInstr*, 4> PushPopMIs;
 
+    /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+    SmallVector<MachineInstr*, 4> T2JumpTables;
+
     /// HasFarJump - True if any far jump instruction has been emitted during
     /// the branch fix up pass.
     bool HasFarJump;
 
     const TargetInstrInfo *TII;
+    const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
     bool isThumb;
+    bool isThumb1;
     bool isThumb2;
   public:
     static char ID;
     ARMConstantIslands() : MachineFunctionPass(&ID) {}
 
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    virtual bool runOnMachineFunction(MachineFunction &MF);
 
     virtual const char *getPassName() const {
       return "ARM constant island placement and branch shortening pass";
     }
 
   private:
-    void DoInitialPlacement(MachineFunction &Fn,
+    void DoInitialPlacement(MachineFunction &MF,
                             std::vector<MachineInstr*> &CPEMIs);
     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
-    void InitialFunctionScan(MachineFunction &Fn,
+    void InitialFunctionScan(MachineFunction &MF,
                              const std::vector<MachineInstr*> &CPEMIs);
     MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
     void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
@@ -147,58 +164,62 @@ namespace {
     bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
     int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
     bool LookForWater(CPUser&U, unsigned UserOffset,
-                      MachineBasicBlock** NewMBB);
-    MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB,
-                        std::vector<MachineBasicBlock*>::iterator IP);
+                      MachineBasicBlock *&NewMBB);
+    MachineBasicBlock *AcceptWater(water_iterator IP);
     void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
-                      MachineBasicBlock** NewMBB);
-    bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex);
+                        MachineBasicBlock *&NewMBB);
+    bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
     void RemoveDeadCPEMI(MachineInstr *CPEMI);
     bool RemoveUnusedCPEntries();
     bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
-                      MachineInstr *CPEMI, unsigned Disp,
-                      bool DoDump);
+                      MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+                      bool DoDump = false);
     bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
                         CPUser &U);
     bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
-                        unsigned Disp, bool NegativeOK);
+                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
     bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
-    bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br);
-    bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br);
-    bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br);
+    bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
     bool UndoLRSpillRestore();
+    bool OptimizeThumb2Instructions(MachineFunction &MF);
+    bool OptimizeThumb2Branches(MachineFunction &MF);
+    bool OptimizeThumb2JumpTables(MachineFunction &MF);
 
     unsigned GetOffsetOf(MachineInstr *MI) const;
     void dumpBBs();
-    void verify(MachineFunction &Fn);
+    void verify(MachineFunction &MF);
   };
   char ARMConstantIslands::ID = 0;
 }
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &Fn) {
+void ARMConstantIslands::verify(MachineFunction &MF) {
   assert(BBOffsets.size() == BBSizes.size());
   for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
     assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
-  if (isThumb) {
-    for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
-         MBBI != E; ++MBBI) {
-      MachineBasicBlock *MBB = MBBI;
-      if (!MBB->empty() &&
-          MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY)
-        assert((BBOffsets[MBB->getNumber()]%4 == 0 &&
-                BBSizes[MBB->getNumber()]%4 == 0) ||
-               (BBOffsets[MBB->getNumber()]%4 != 0 &&
-                BBSizes[MBB->getNumber()]%4 != 0));
+  if (!isThumb)
+    return;
+#ifndef NDEBUG
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    if (!MBB->empty() &&
+        MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+      unsigned MBBId = MBB->getNumber();
+      assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+             (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
     }
   }
+#endif
 }
 
 /// print block size and offset information - debugging
 void ARMConstantIslands::dumpBBs() {
   for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
-    DOUT << "block " << J << " offset " << BBOffsets[J] <<
-                            " size " << BBSizes[J] << "\n";
+    DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
+                 << " size " << BBSizes[J] << "\n");
   }
 }
 
@@ -208,31 +229,36 @@ FunctionPass *llvm::createARMConstantIslandPass() {
   return new ARMConstantIslands();
 }
 
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
-  MachineConstantPool &MCP = *Fn.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
+  MachineConstantPool &MCP = *MF.getConstantPool();
+
+  TII = MF.getTarget().getInstrInfo();
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
 
-  TII = Fn.getTarget().getInstrInfo();
-  AFI = Fn.getInfo<ARMFunctionInfo>();
   isThumb = AFI->isThumbFunction();
+  isThumb1 = AFI->isThumb1OnlyFunction();
   isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
-  Fn.RenumberBlocks();
+  MF.RenumberBlocks();
+
+  // Thumb1 functions containing constant pools get 4-byte alignment.
+  // This is so we can keep exact track of where the alignment padding goes.
 
-  /// Thumb functions containing constant pools get 2-byte alignment.
-  /// This is so we can keep exact track of where the alignment padding goes.
-  /// Set default.
-  AFI->setAlign(isThumb ? 1U : 2U);
+  // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte
+  // aligned.
+  AFI->setAlign(isThumb1 ? 1U : 2U);
 
   // Perform the initial placement of the constant pool entries.  To start with,
   // we put them all at the end of the function.
   std::vector<MachineInstr*> CPEMIs;
   if (!MCP.isEmpty()) {
-    DoInitialPlacement(Fn, CPEMIs);
-    if (isThumb)
+    DoInitialPlacement(MF, CPEMIs);
+    if (isThumb1)
       AFI->setAlign(2U);
   }
 
@@ -242,7 +268,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
   // Do the initial scan of the function, building up information about the
   // sizes of each block, the location of all the water, and finding all of the
   // constant pool users.
-  InitialFunctionScan(Fn, CPEMIs);
+  InitialFunctionScan(MF, CPEMIs);
   CPEMIs.clear();
 
   /// Remove dead constant pool entries.
@@ -251,25 +277,37 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
   // Iteratively place constant pool entries and fix up branches until there
   // is no change.
   bool MadeChange = false;
+  unsigned NoCPIters = 0, NoBRIters = 0;
   while (true) {
-    bool Change = false;
+    bool CPChange = false;
     for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
-      Change |= HandleConstantPoolUser(Fn, i);
+      CPChange |= HandleConstantPoolUser(MF, i);
+    if (CPChange && ++NoCPIters > 30)
+      llvm_unreachable("Constant Island pass failed to converge!");
     DEBUG(dumpBBs());
+
+    bool BRChange = false;
     for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
-      Change |= FixUpImmediateBr(Fn, ImmBranches[i]);
+      BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+    if (BRChange && ++NoBRIters > 30)
+      llvm_unreachable("Branch Fix Up pass failed to converge!");
     DEBUG(dumpBBs());
-    if (!Change)
+
+    if (!CPChange && !BRChange)
       break;
     MadeChange = true;
   }
 
+  // Shrink 32-bit Thumb2 branch, load, and store instructions.
+  if (isThumb2)
+    MadeChange |= OptimizeThumb2Instructions(MF);
+
   // After a while, this might be made debug-only, but it is not expensive.
-  verify(Fn);
+  verify(MF);
 
   // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
   // Undo the spill / restore of LR if possible.
-  if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb)
+  if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
     MadeChange |= UndoLRSpillRestore();
 
   BBSizes.clear();
@@ -279,24 +317,25 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
   CPEntries.clear();
   ImmBranches.clear();
   PushPopMIs.clear();
+  T2JumpTables.clear();
 
   return MadeChange;
 }
 
 /// DoInitialPlacement - Perform the initial placement of the constant pool
 /// entries.  To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
                                         std::vector<MachineInstr*> &CPEMIs) {
   // Create the basic block to hold the CPE's.
-  MachineBasicBlock *BB = Fn.CreateMachineBasicBlock();
-  Fn.push_back(BB);
+  MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
+  MF.push_back(BB);
 
   // Add all of the constants from the constant pool to the end block, use an
   // identity mapping of CPI's to CPE's.
   const std::vector<MachineConstantPoolEntry> &CPs =
-    Fn.getConstantPool()->getConstants();
+    MF.getConstantPool()->getConstants();
 
-  const TargetData &TD = *Fn.getTarget().getTargetData();
+  const TargetData &TD = *MF.getTarget().getTargetData();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
     // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
@@ -313,7 +352,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
     CPEs.push_back(CPEntry(CPEMI, i));
     CPEntries.push_back(CPEs);
     NumCPEs++;
-    DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n";
+    DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
+                 << "\n");
   }
 }
 
@@ -352,10 +392,10 @@ ARMConstantIslands::CPEntry
 /// InitialFunctionScan - Do the initial scan of the function, building up
 /// information about the sizes of each block, the location of all the water,
 /// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
                                  const std::vector<MachineInstr*> &CPEMIs) {
   unsigned Offset = 0;
-  for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock &MBB = *MBBI;
 
@@ -377,18 +417,19 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
         unsigned Scale = 1;
         int UOpc = Opc;
         switch (Opc) {
+        default:
+          continue;  // Ignore other JT branches
         case ARM::tBR_JTr:
-        case ARM::t2BR_JTr:
-        case ARM::t2BR_JTm:
-        case ARM::t2BR_JTadd:
-          // A Thumb table jump may involve padding; for the offsets to
+          // A Thumb1 table jump may involve padding; for the offsets to
           // be right, functions containing these must be 4-byte aligned.
           AFI->setAlign(2U);
           if ((Offset+MBBSize)%4 != 0)
+            // FIXME: Add a pseudo ALIGN instruction instead.
             MBBSize += 2;           // padding
           continue;   // Does not get an entry in ImmBranches
-        default:
-          continue;  // Ignore other JT branches
+        case ARM::t2BR_JT:
+          T2JumpTables.push_back(I);
+          continue;   // Does not get an entry in ImmBranches
         case ARM::Bcc:
           isCond = true;
           UOpc = ARM::B;
@@ -427,6 +468,9 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
       if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
         PushPopMIs.push_back(I);
 
+      if (Opc == ARM::CONSTPOOL_ENTRY)
+        continue;
+
       // Scan the instructions for constant pool operands.
       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
         if (I->getOperand(op).isCPI()) {
@@ -436,50 +480,52 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
           // Basic size info comes from the TSFlags field.
           unsigned Bits = 0;
           unsigned Scale = 1;
-          unsigned TSFlags = I->getDesc().TSFlags;
-          switch (TSFlags & ARMII::AddrModeMask) {
+          bool NegOk = false;
+          bool IsSoImm = false;
+
+          switch (Opc) {
           default:
-            // Constant pool entries can reach anything.
-            if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
-              continue;
-            if (I->getOpcode() == ARM::tLEApcrel) {
-              Bits = 8;  // Taking the address of a CP entry.
-              break;
-            }
-            assert(0 && "Unknown addressing mode for CP reference!");
-          case ARMII::AddrMode1: // AM1: 8 bits << 2
-            Bits = 8;
-            Scale = 4;  // Taking the address of a CP entry.
-            break;
-          case ARMII::AddrMode2:
-            Bits = 12;  // +-offset_12
-            break;
-          case ARMII::AddrMode3:
-            Bits = 8;   // +-offset_8
+            llvm_unreachable("Unknown addressing mode for CP reference!");
             break;
-            // addrmode4 has no immediate offset.
-          case ARMII::AddrMode5:
+
+          // Taking the address of a CP entry.
+          case ARM::LEApcrel:
+            // This takes a SoImm, which is 8 bit immediate rotated. We'll
+            // pretend the maximum offset is 255 * 4. Since each instruction
+            // 4 byte wide, this is always correct. We'llc heck for other
+            // displacements that fits in a SoImm as well.
             Bits = 8;
-            Scale = 4;  // +-(offset_8*4)
+            Scale = 4;
+            NegOk = true;
+            IsSoImm = true;
             break;
-            // addrmode6 has no immediate offset.
-          case ARMII::AddrModeT1_1:
-            Bits = 5;  // +offset_5
+          case ARM::t2LEApcrel:
+            Bits = 12;
+            NegOk = true;
             break;
-          case ARMII::AddrModeT1_2:
-            Bits = 5;
-            Scale = 2;  // +(offset_5*2)
+          case ARM::tLEApcrel:
+            Bits = 8;
+            Scale = 4;
             break;
-          case ARMII::AddrModeT1_4:
-            Bits = 5;
-            Scale = 4;  // +(offset_5*4)
+
+          case ARM::LDR:
+          case ARM::LDRcp:
+          case ARM::t2LDRpci:
+            Bits = 12;  // +-offset_12
+            NegOk = true;
             break;
-          case ARMII::AddrModeT1_s:
+
+          case ARM::tLDRpci:
+          case ARM::tLDRcp:
             Bits = 8;
             Scale = 4;  // +(offset_8*4)
             break;
-          case ARMII::AddrModeT2_pc:
-            Bits = 12;  // +-offset_12
+
+          case ARM::FLDD:
+          case ARM::FLDS:
+            Bits = 8;
+            Scale = 4;  // +-(offset_8*4)
+            NegOk = true;
             break;
           }
 
@@ -487,7 +533,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
           unsigned CPI = I->getOperand(op).getIndex();
           MachineInstr *CPEMI = CPEMIs[CPI];
           unsigned MaxOffs = ((1 << Bits)-1) * Scale;
-          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
 
           // Increment corresponding CPEntry reference count.
           CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
@@ -563,7 +609,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
 
   // Next, update WaterList.  Specifically, we need to add NewMBB as having
   // available water after it.
-  std::vector<MachineBasicBlock*>::iterator IP =
+  water_iterator IP =
     std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
                      CompareMBBNumbers);
   WaterList.insert(IP, NewBB);
@@ -590,8 +636,8 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   // Note the new unconditional branch is not being recorded.
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond to anything in the source.
-  BuildMI(OrigBB, DebugLoc::getUnknownLoc(),
-          TII->get(isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B)).addMBB(NewBB);
+  unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+  BuildMI(OrigBB, DebugLoc::getUnknownLoc(), TII->get(Opc)).addMBB(NewBB);
   NumSplit++;
 
   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
@@ -625,7 +671,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   // available water after it (but not if it's already there, which happens
   // when splitting before a conditional branch that is followed by an
   // unconditional branch - in that case we want to insert NewBB).
-  std::vector<MachineBasicBlock*>::iterator IP =
+  water_iterator IP =
     std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
                      CompareMBBNumbers);
   MachineBasicBlock* WaterBB = *IP;
@@ -648,7 +694,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
 
   // We removed instructions from UserMBB, subtract that off from its size.
   // Add 2 or 4 to the block to count the unconditional branch we added to it.
-  unsigned delta = isThumb ? 2 : 4;
+  int delta = isThumb1 ? 2 : 4;
   BBSizes[OrigBBI] -= NewBBSize - delta;
 
   // ...and adjust BBOffsets for NewBB accordingly.
@@ -664,24 +710,39 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
 /// reference) is within MaxDisp of TrialOffset (a proposed location of a
 /// constant pool entry).
 bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
-                      unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) {
+                                         unsigned TrialOffset, unsigned MaxDisp,
+                                         bool NegativeOK, bool IsSoImm) {
   // On Thumb offsets==2 mod 4 are rounded down by the hardware for
   // purposes of the displacement computation; compensate for that here.
   // Effectively, the valid range of displacements is 2 bytes smaller for such
   // references.
-  if (isThumb && UserOffset%4 !=0)
+  unsigned TotalAdj = 0;
+  if (isThumb && UserOffset%4 !=0) {
     UserOffset -= 2;
+    TotalAdj = 2;
+  }
   // CPEs will be rounded up to a multiple of 4.
-  if (isThumb && TrialOffset%4 != 0)
+  if (isThumb && TrialOffset%4 != 0) {
     TrialOffset += 2;
+    TotalAdj += 2;
+  }
+
+  // In Thumb2 mode, later branch adjustments can shift instructions up and
+  // cause alignment change. In the worst case scenario this can cause the
+  // user's effective address to be subtracted by 2 and the CPE's address to
+  // be plus 2.
+  if (isThumb2 && TotalAdj != 4)
+    MaxDisp -= (4 - TotalAdj);
 
   if (UserOffset <= TrialOffset) {
     // User before the Trial.
-    if (TrialOffset-UserOffset <= MaxDisp)
+    if (TrialOffset - UserOffset <= MaxDisp)
       return true;
+    // FIXME: Make use full range of soimm values.
   } else if (NegativeOK) {
-    if (UserOffset-TrialOffset <= MaxDisp)
+    if (UserOffset - TrialOffset <= MaxDisp)
       return true;
+    // FIXME: Make use full range of soimm values.
   }
   return false;
 }
@@ -690,39 +751,36 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
 /// Water (a basic block) will be in range for the specific MI.
 
 bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
-                         MachineBasicBlock* Water, CPUser &U)
-{
+                                        MachineBasicBlock* Water, CPUser &U) {
   unsigned MaxDisp = U.MaxDisp;
-  MachineFunction::iterator I = next(MachineFunction::iterator(Water));
   unsigned CPEOffset = BBOffsets[Water->getNumber()] +
                        BBSizes[Water->getNumber()];
 
   // If the CPE is to be inserted before the instruction, that will raise
-  // the offset of the instruction.  (Currently applies only to ARM, so
-  // no alignment compensation attempted here.)
+  // the offset of the instruction.
   if (CPEOffset < UserOffset)
     UserOffset += U.CPEMI->getOperand(2).getImm();
 
-  return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb);
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
 }
 
 /// CPEIsInRange - Returns true if the distance between specific MI and
 /// specific ConstPool entry instruction can fit in MI's displacement field.
 bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
-                                      MachineInstr *CPEMI,
-                                      unsigned MaxDisp, bool DoDump) {
+                                      MachineInstr *CPEMI, unsigned MaxDisp,
+                                      bool NegOk, bool DoDump) {
   unsigned CPEOffset  = GetOffsetOf(CPEMI);
   assert(CPEOffset%4 == 0 && "Misaligned CPE");
 
   if (DoDump) {
-    DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm()
-         << " max delta=" << MaxDisp
-         << " insn address=" << UserOffset
-         << " CPE address=" << CPEOffset
-         << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI;
+    DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+                 << " max delta=" << MaxDisp
+                 << " insn address=" << UserOffset
+                 << " CPE address=" << CPEOffset
+                 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
   }
 
-  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb);
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
 }
 
 #ifndef NDEBUG
@@ -745,52 +803,48 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
 void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
                                               int delta) {
   MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
-  for(unsigned i=BB->getNumber()+1; i<BB->getParent()->getNumBlockIDs(); i++) {
+  for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
+      i < e; ++i) {
     BBOffsets[i] += delta;
     // If some existing blocks have padding, adjust the padding as needed, a
     // bit tricky.  delta can be negative so don't use % on that.
-    if (isThumb) {
-      MachineBasicBlock *MBB = MBBI;
-      if (!MBB->empty()) {
-        // Constant pool entries require padding.
-        if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
-          unsigned oldOffset = BBOffsets[i] - delta;
-          if (oldOffset%4==0 && BBOffsets[i]%4!=0) {
-            // add new padding
-            BBSizes[i] += 2;
-            delta += 2;
-          } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) {
-            // remove existing padding
-            BBSizes[i] -=2;
-            delta -= 2;
-          }
+    if (!isThumb)
+      continue;
+    MachineBasicBlock *MBB = MBBI;
+    if (!MBB->empty()) {
+      // Constant pool entries require padding.
+      if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+        unsigned OldOffset = BBOffsets[i] - delta;
+        if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
+          // add new padding
+          BBSizes[i] += 2;
+          delta += 2;
+        } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
+          // remove existing padding
+          BBSizes[i] -= 2;
+          delta -= 2;
         }
-        // Thumb jump tables require padding.  They should be at the end;
-        // following unconditional branches are removed by AnalyzeBranch.
-        MachineInstr *ThumbJTMI = NULL;
-        if ((prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
-            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTr)
-            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTm)
-            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTadd))
-          ThumbJTMI = prior(MBB->end());
-        if (ThumbJTMI) {
-          unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
-          unsigned oldMIOffset = newMIOffset - delta;
-          if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) {
-            // remove existing padding
-            BBSizes[i] -= 2;
-            delta -= 2;
-          } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) {
-            // add new padding
-            BBSizes[i] += 2;
-            delta += 2;
-          }
+      }
+      // Thumb1 jump tables require padding.  They should be at the end;
+      // following unconditional branches are removed by AnalyzeBranch.
+      MachineInstr *ThumbJTMI = prior(MBB->end());
+      if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+        unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
+        unsigned OldMIOffset = NewMIOffset - delta;
+        if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
+          // remove existing padding
+          BBSizes[i] -= 2;
+          delta -= 2;
+        } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
+          // add new padding
+          BBSizes[i] += 2;
+          delta += 2;
         }
-        if (delta==0)
-          return;
       }
-      MBBI = next(MBBI);
+      if (delta==0)
+        return;
     }
+    MBBI = next(MBBI);
   }
 }
 
@@ -824,8 +878,8 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
   MachineInstr *CPEMI  = U.CPEMI;
 
   // Check to see if the CPE is already in-range.
-  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) {
-    DOUT << "In range\n";
+  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
+    DEBUG(errs() << "In range\n");
     return 1;
   }
 
@@ -839,8 +893,9 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
     // Removing CPEs can leave empty entries, skip
     if (CPEs[i].CPEMI == NULL)
       continue;
-    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) {
-      DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n";
+    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
+      DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+                   << CPEs[i].CPI << "\n");
       // Point the CPUser node to the replacement
       U.CPEMI = CPEs[i].CPEMI;
       // Change the CPI in the instruction operand to refer to the clone.
@@ -870,15 +925,15 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
   default:
     break;
   }
-  
+
   return ((1<<23)-1)*4;
 }
 
 /// AcceptWater - Small amount of common code factored out of the following.
-
-MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
-                          std::vector<MachineBasicBlock*>::iterator IP) {
-  DOUT << "found water in range\n";
+///
+MachineBasicBlock *ARMConstantIslands::AcceptWater(water_iterator IP) {
+  DEBUG(errs() << "found water in range\n");
+  MachineBasicBlock *WaterBB = *IP;
   // Remove the original WaterList entry; we want subsequent
   // insertions in this vicinity to go after the one we're
   // about to insert.  This considerably reduces the number
@@ -890,41 +945,44 @@ MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
 
 /// LookForWater - look for an existing entry in the WaterList in which
 /// we can place the CPE referenced from U so it's within range of U's MI.
-/// Returns true if found, false if not.  If it returns true, *NewMBB
-/// is set to the WaterList entry.
-/// For ARM, we prefer the water that's farthest away.  For Thumb, prefer
-/// water that will not introduce padding to water that will; within each
-/// group, prefer the water that's farthest away.
-
+/// Returns true if found, false if not.  If it returns true, NewMBB
+/// is set to the WaterList entry.  For Thumb, prefer water that will not
+/// introduce padding to water that will.  To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
 bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
-                                      MachineBasicBlock** NewMBB) {
-  std::vector<MachineBasicBlock*>::iterator IPThatWouldPad;
-  MachineBasicBlock* WaterBBThatWouldPad = NULL;
-  if (!WaterList.empty()) {
-    for (std::vector<MachineBasicBlock*>::iterator IP = prior(WaterList.end()),
-        B = WaterList.begin();; --IP) {
-      MachineBasicBlock* WaterBB = *IP;
-      if (WaterIsInRange(UserOffset, WaterBB, U)) {
-        if (isThumb &&
-            (BBOffsets[WaterBB->getNumber()] +
-             BBSizes[WaterBB->getNumber()])%4 != 0) {
-          // This is valid Water, but would introduce padding.  Remember
-          // it in case we don't find any Water that doesn't do this.
-          if (!WaterBBThatWouldPad) {
-            WaterBBThatWouldPad = WaterBB;
-            IPThatWouldPad = IP;
-          }
-        } else {
-          *NewMBB = AcceptWater(WaterBB, IP);
-          return true;
+                                      MachineBasicBlock *&NewMBB) {
+  if (WaterList.empty())
+    return false;
+
+  bool FoundWaterThatWouldPad = false;
+  water_iterator IPThatWouldPad;
+  for (water_iterator IP = prior(WaterList.end()),
+         B = WaterList.begin();; --IP) {
+    MachineBasicBlock* WaterBB = *IP;
+    // Check if water is in range and at a lower address than the current one.
+    if (WaterIsInRange(UserOffset, WaterBB, U) &&
+        WaterBB->getNumber() < U.CPEMI->getParent()->getNumber()) {
+      unsigned WBBId = WaterBB->getNumber();
+      if (isThumb &&
+          (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
+        // This is valid Water, but would introduce padding.  Remember
+        // it in case we don't find any Water that doesn't do this.
+        if (!FoundWaterThatWouldPad) {
+          FoundWaterThatWouldPad = true;
+          IPThatWouldPad = IP;
         }
+      } else {
+        NewMBB = AcceptWater(IP);
+        return true;
+      }
     }
-      if (IP == B)
-        break;
-    }
+    if (IP == B)
+      break;
   }
-  if (isThumb && WaterBBThatWouldPad) {
-    *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad);
+  if (FoundWaterThatWouldPad) {
+    NewMBB = AcceptWater(IPThatWouldPad);
     return true;
   }
   return false;
@@ -934,12 +992,12 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
 /// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
 /// block is used if in range, and the conditional branch munged so control
 /// flow is correct.  Otherwise the block is split to create a hole with an
-/// unconditional branch around it.  In either case *NewMBB is set to a
+/// unconditional branch around it.  In either case NewMBB is set to a
 /// block following which the new island can be inserted (the WaterList
 /// is not adjusted).
-
 void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
-                        unsigned UserOffset, MachineBasicBlock** NewMBB) {
+                                        unsigned UserOffset,
+                                        MachineBasicBlock *&NewMBB) {
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
@@ -950,18 +1008,18 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
 
   // If the use is at the end of the block, or the end of the block
   // is within range, make new water there.  (The addition below is
-  // for the unconditional branch we will be adding:  4 bytes on ARM,
-  // 2 on Thumb.  Possible Thumb alignment padding is allowed for
+  // for the unconditional branch we will be adding:  4 bytes on ARM + Thumb2,
+  // 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
   // inside OffsetIsInRange.
   // If the block ends in an unconditional branch already, it is water,
   // and is known to be out of range, so we'll always be adding a branch.)
   if (&UserMBB->back() == UserMI ||
-      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4),
-           U.MaxDisp, !isThumb)) {
-    DOUT << "Split at end of block\n";
+      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
+                      U.MaxDisp, U.NegOk, U.IsSoImm)) {
+    DEBUG(errs() << "Split at end of block\n");
     if (&UserMBB->back() == UserMI)
       assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
-    *NewMBB = next(MachineFunction::iterator(UserMBB));
+    NewMBB = next(MachineFunction::iterator(UserMBB));
     // Add an unconditional branch from UserMBB to fallthrough block.
     // Record it for branch lengthening; this new branch will not get out of
     // range, but if the preceding conditional branch is out of range, the
@@ -969,16 +1027,16 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     // range, so the machinery has to know about it.
     int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
     BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
-            TII->get(UncondBr)).addMBB(*NewMBB);
+            TII->get(UncondBr)).addMBB(NewMBB);
     unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
     ImmBranches.push_back(ImmBranch(&UserMBB->back(),
                           MaxDisp, false, UncondBr));
-    int delta = isThumb ? 2 : 4;
+    int delta = isThumb1 ? 2 : 4;
     BBSizes[UserMBB->getNumber()] += delta;
     AdjustBBOffsetsAfter(UserMBB, delta);
   } else {
     // What a big block.  Find a place within the block to split it.
-    // This is a little tricky on Thumb since instructions are 2 bytes
+    // This is a little tricky on Thumb1 since instructions are 2 bytes
     // and constant pool entries are 4 bytes: if instruction I references
     // island CPE, and instruction I+1 references CPE', it will
     // not work well to put CPE as far forward as possible, since then
@@ -991,7 +1049,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     // if not, we back up the insertion point.
 
     // The 4 in the following is for the unconditional branch we'll be
-    // inserting (allows for long branch on Thumb).  Alignment of the
+    // inserting (allows for long branch on Thumb1).  Alignment of the
     // island is handled inside OffsetIsInRange.
     unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
     // This could point off the end of the block if we've already got
@@ -1000,7 +1058,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     // conditional and a maximally long unconditional).
     if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
       BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
-                              (isThumb ? 6 : 8);
+                              (isThumb1 ? 6 : 8);
     unsigned EndInsertOffset = BaseInsertOffset +
            CPEMI->getOperand(2).getImm();
     MachineBasicBlock::iterator MI = UserMI;
@@ -1011,10 +1069,11 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
          Offset += TII->GetInstSizeInBytes(MI),
             MI = next(MI)) {
       if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+        CPUser &U = CPUsers[CPUIndex];
         if (!OffsetIsInRange(Offset, EndInsertOffset,
-              CPUsers[CPUIndex].MaxDisp, !isThumb)) {
-          BaseInsertOffset -= (isThumb ? 2 : 4);
-          EndInsertOffset -= (isThumb ? 2 : 4);
+                             U.MaxDisp, U.NegOk, U.IsSoImm)) {
+          BaseInsertOffset -= (isThumb1 ? 2 : 4);
+          EndInsertOffset  -= (isThumb1 ? 2 : 4);
         }
         // This is overly conservative, as we don't account for CPEMIs
         // being reused within the block, but it doesn't matter much.
@@ -1022,8 +1081,8 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
         CPUIndex++;
       }
     }
-    DOUT << "Split in middle of big block\n";
-    *NewMBB = SplitBlockBeforeInstr(prior(MI));
+    DEBUG(errs() << "Split in middle of big block\n");
+    NewMBB = SplitBlockBeforeInstr(prior(MI));
   }
 }
 
@@ -1031,7 +1090,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
 /// is out-of-range.  If so, pick up the constant pool value and move it some
 /// place in-range.  Return true if we changed any addresses (thus must run
 /// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
                                                 unsigned CPUserIndex) {
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
@@ -1040,14 +1099,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
   unsigned Size = CPEMI->getOperand(2).getImm();
   MachineBasicBlock *NewMBB;
   // Compute this only once, it's expensive.  The 4 or 8 is the value the
-  // hardware keeps in the PC (2 insns ahead of the reference).
+  // hardware keeps in the PC.
   unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
 
-  // Special case: tLEApcrel are two instructions MI's. The actual user is the
-  // second instruction.
-  if (UserMI->getOpcode() == ARM::tLEApcrel)
-    UserOffset += 2;
-
   // See if the current entry is within range, or there is a clone of it
   // in range.
   int result = LookForExistingCPEntry(U, UserOffset);
@@ -1058,19 +1112,16 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
   // We will be generating a new clone.  Get a UID for it.
   unsigned ID = AFI->createConstPoolEntryUId();
 
-  // Look for water where we can place this CPE.  We look for the farthest one
-  // away that will work.  Forward references only for now (although later
-  // we might find some that are backwards).
-
-  if (!LookForWater(U, UserOffset, &NewMBB)) {
+  // Look for water where we can place this CPE.
+  if (!LookForWater(U, UserOffset, NewMBB)) {
     // No water found.
-    DOUT << "No water found\n";
-    CreateNewWater(CPUserIndex, UserOffset, &NewMBB);
+    DEBUG(errs() << "No water found\n");
+    CreateNewWater(CPUserIndex, UserOffset, NewMBB);
   }
 
   // Okay, we know we can put an island before NewMBB now, do it!
-  MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock();
-  Fn.insert(NewMBB, NewIsland);
+  MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+  MF.insert(NewMBB, NewIsland);
 
   // Update internal data structures to account for the newly inserted MBB.
   UpdateForInsertedWaterBlock(NewIsland);
@@ -1101,7 +1152,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
       break;
     }
 
-  DOUT << "  Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI;
+  DEBUG(errs() << "  Moved CPE to #" << ID << " CPI=" << CPI
+           << '\t' << *UserMI);
 
   return true;
 }
@@ -1115,7 +1167,7 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
   BBSizes[CPEBB->getNumber()] -= Size;
   // All succeeding offsets have the current size value added in, fix this.
   if (CPEBB->empty()) {
-    // In thumb mode, the size of island may be  padded by two to compensate for
+    // In thumb1 mode, the size of island may be padded by two to compensate for
     // the alignment requirement.  Then it will now be 2 when the block is
     // empty, so fix this.
     // All succeeding offsets have the current size value added in, fix this.
@@ -1157,11 +1209,11 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
   unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
   unsigned DestOffset = BBOffsets[DestBB->getNumber()];
 
-  DOUT << "Branch of destination BB#" << DestBB->getNumber()
-       << " from BB#" << MI->getParent()->getNumber()
-       << " max delta=" << MaxDisp
-       << " from " << GetOffsetOf(MI) << " to " << DestOffset
-       << " offset " << int(DestOffset-BrOffset) << "\t" << *MI;
+  DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+               << " from BB#" << MI->getParent()->getNumber()
+               << " max delta=" << MaxDisp
+               << " from " << GetOffsetOf(MI) << " to " << DestOffset
+               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
 
   if (BrOffset <= DestOffset) {
     // Branch before the Dest.
@@ -1176,7 +1228,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
 
 /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
 /// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
@@ -1185,8 +1237,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
     return false;
 
   if (!Br.isCond)
-    return FixUpUnconditionalBr(Fn, Br);
-  return FixUpConditionalBr(Fn, Br);
+    return FixUpUnconditionalBr(MF, Br);
+  return FixUpConditionalBr(MF, Br);
 }
 
 /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
@@ -1194,10 +1246,11 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
 /// spilled in the epilogue, then we can use BL to implement a far jump.
 /// Otherwise, add an intermediate branch instruction to a branch.
 bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *MBB = MI->getParent();
-  assert(isThumb && !isThumb2 && "Expected a Thumb-1 function!");
+  if (!isThumb1)
+    llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
 
   // Use BL to implement far jump.
   Br.MaxDisp = (1 << 21) * 2;
@@ -1207,7 +1260,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
   HasFarJump = true;
   NumUBrFixed++;
 
-  DOUT << "  Changed B to long jump " << *MI;
+  DEBUG(errs() << "  Changed B to long jump " << *MI);
 
   return true;
 }
@@ -1216,7 +1269,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
 /// far away to fit in its displacement field. It is converted to an inverse
 /// conditional branch + an unconditional branch to the destination.
 bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
@@ -1251,7 +1304,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
       // b   L1
       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
       if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
-        DOUT << "  Invert Bcc condition and swap its destination with " << *BMI;
+        DEBUG(errs() << "  Invert Bcc condition and swap its destination with "
+                     << *BMI);
         BMI->getOperand(0).setMBB(DestBB);
         MI->getOperand(0).setMBB(NewDest);
         MI->getOperand(1).setImm(CC);
@@ -1273,9 +1327,9 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
   }
   MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
 
-  DOUT << "  Insert B to BB#" << DestBB->getNumber()
-       << " also invert condition and change dest. to BB#"
-       << NextBB->getNumber() << "\n";
+  DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
+               << " also invert condition and change dest. to BB#"
+               << NextBB->getNumber() << "\n");
 
   // Insert a new conditional branch and a new unconditional branch.
   // Also update the ImmBranch as well as adding a new entry for the new branch.
@@ -1300,14 +1354,17 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
 }
 
 /// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
-/// LR / restores LR to pc.
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
 bool ARMConstantIslands::UndoLRSpillRestore() {
   bool MadeChange = false;
   for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
     MachineInstr *MI = PushPopMIs[i];
+    // First two operands are predicates, the third is a zero since there
+    // is no writeback.
     if (MI->getOpcode() == ARM::tPOP_RET &&
-        MI->getOperand(0).getReg() == ARM::PC &&
-        MI->getNumExplicitOperands() == 1) {
+        MI->getOperand(3).getReg() == ARM::PC &&
+        MI->getNumExplicitOperands() == 4) {
       BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
       MI->eraseFromParent();
       MadeChange = true;
@@ -1315,3 +1372,201 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
   }
   return MadeChange;
 }
+
+bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // Shrink ADR and LDR from constantpool.
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned Opcode = U.MI->getOpcode();
+    unsigned NewOpc = 0;
+    unsigned Scale = 1;
+    unsigned Bits = 0;
+    switch (Opcode) {
+    default: break;
+    case ARM::t2LEApcrel:
+      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+        NewOpc = ARM::tLEApcrel;
+        Bits = 8;
+        Scale = 4;
+      }
+      break;
+    case ARM::t2LDRpci:
+      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+        NewOpc = ARM::tLDRpci;
+        Bits = 8;
+        Scale = 4;
+      }
+      break;
+    }
+
+    if (!NewOpc)
+      continue;
+
+    unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+    unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+    // FIXME: Check if offset is multiple of scale if scale is not 4.
+    if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+      U.MI->setDesc(TII->get(NewOpc));
+      MachineBasicBlock *MBB = U.MI->getParent();
+      BBSizes[MBB->getNumber()] -= 2;
+      AdjustBBOffsetsAfter(MBB, -2);
+      ++NumT2CPShrunk;
+      MadeChange = true;
+    }
+  }
+
+  MadeChange |= OptimizeThumb2Branches(MF);
+  MadeChange |= OptimizeThumb2JumpTables(MF);
+  return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+    ImmBranch &Br = ImmBranches[i];
+    unsigned Opcode = Br.MI->getOpcode();
+    unsigned NewOpc = 0;
+    unsigned Scale = 1;
+    unsigned Bits = 0;
+    switch (Opcode) {
+    default: break;
+    case ARM::t2B:
+      NewOpc = ARM::tB;
+      Bits = 11;
+      Scale = 2;
+      break;
+    case ARM::t2Bcc:
+      NewOpc = ARM::tBcc;
+      Bits = 8;
+      Scale = 2;      
+      break;
+    }
+    if (!NewOpc)
+      continue;
+
+    unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+    if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+      Br.MI->setDesc(TII->get(NewOpc));
+      MachineBasicBlock *MBB = Br.MI->getParent();
+      BBSizes[MBB->getNumber()] -= 2;
+      AdjustBBOffsetsAfter(MBB, -2);
+      ++NumT2BrShrunk;
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+
+/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // FIXME: After the tables are shrunk, can we get rid some of the
+  // constantpool tables?
+  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+    MachineInstr *MI = T2JumpTables[i];
+    const TargetInstrDesc &TID = MI->getDesc();
+    unsigned NumOps = TID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    MachineOperand JTOP = MI->getOperand(JTOpIdx);
+    unsigned JTI = JTOP.getIndex();
+    assert(JTI < JT.size());
+
+    bool ByteOk = true;
+    bool HalfWordOk = true;
+    unsigned JTOffset = GetOffsetOf(MI) + 4;
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+      MachineBasicBlock *MBB = JTBBs[j];
+      unsigned DstOffset = BBOffsets[MBB->getNumber()];
+      // Negative offset is not ok. FIXME: We should change BB layout to make
+      // sure all the branches are forward.
+      if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+        ByteOk = false;
+      unsigned TBHLimit = ((1<<16)-1)*2;
+      if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+        HalfWordOk = false;
+      if (!ByteOk && !HalfWordOk)
+        break;
+    }
+
+    if (ByteOk || HalfWordOk) {
+      MachineBasicBlock *MBB = MI->getParent();
+      unsigned BaseReg = MI->getOperand(0).getReg();
+      bool BaseRegKill = MI->getOperand(0).isKill();
+      if (!BaseRegKill)
+        continue;
+      unsigned IdxReg = MI->getOperand(1).getReg();
+      bool IdxRegKill = MI->getOperand(1).isKill();
+      MachineBasicBlock::iterator PrevI = MI;
+      if (PrevI == MBB->begin())
+        continue;
+
+      MachineInstr *AddrMI = --PrevI;
+      bool OptOk = true;
+      // Examine the instruction that calculate the jumptable entry address.
+      // If it's not the one just before the t2BR_JT, we won't delete it, then
+      // it's not worth doing the optimization.
+      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+        const MachineOperand &MO = AddrMI->getOperand(k);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+        if (MO.isDef() && MO.getReg() != BaseReg) {
+          OptOk = false;
+          break;
+        }
+        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+          OptOk = false;
+          break;
+        }
+      }
+      if (!OptOk)
+        continue;
+
+      // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want
+      // to delete it as well.
+      MachineInstr *LeaMI = --PrevI;
+      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+          LeaMI->getOperand(0).getReg() != BaseReg)
+        OptOk = false;
+
+      if (!OptOk)
+        continue;
+
+      unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH;
+      MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+        .addReg(IdxReg, getKillRegState(IdxRegKill))
+        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+        .addImm(MI->getOperand(JTOpIdx+1).getImm());
+      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+      // is 2-byte aligned. For now, asm printer will fix it up.
+      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+      OrigSize += TII->GetInstSizeInBytes(LeaMI);
+      OrigSize += TII->GetInstSizeInBytes(MI);
+
+      AddrMI->eraseFromParent();
+      LeaMI->eraseFromParent();
+      MI->eraseFromParent();
+
+      int delta = OrigSize - NewSize;
+      BBSizes[MBB->getNumber()] -= delta;
+      AdjustBBOffsetsAfter(MBB, -delta);
+
+      ++NumTBs;
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index a75ed3bd5339..71700893a3e8 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -15,33 +15,31 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Type.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
 using namespace llvm;
 
 ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
-                                           ARMCP::ARMCPKind k,
+                                           ARMCP::ARMCPKind K,
                                            unsigned char PCAdj,
                                            const char *Modif,
                                            bool AddCA)
   : MachineConstantPoolValue((const Type*)gv->getType()),
-    GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj),
+    GV(gv), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
     Modifier(Modif), AddCurrentAddress(AddCA) {}
 
-ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id,
-                                           ARMCP::ARMCPKind k,
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
+                                           const char *s, unsigned id,
                                            unsigned char PCAdj,
                                            const char *Modif,
                                            bool AddCA)
-  : MachineConstantPoolValue((const Type*)Type::Int32Ty),
-    GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj),
+  : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
+    GV(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPValue), PCAdjust(PCAdj),
     Modifier(Modif), AddCurrentAddress(AddCA) {}
 
-ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv,
-                                           ARMCP::ARMCPKind k,
-                                           const char *Modif)
-  : MachineConstantPoolValue((const Type*)Type::Int32Ty),
-    GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0),
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif)
+  : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
+    GV(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
     Modifier(Modif) {}
 
 int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
@@ -56,7 +54,6 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
       if (CPV->GV == GV &&
           CPV->S == S &&
           CPV->LabelId == LabelId &&
-          CPV->Kind == Kind &&
           CPV->PCAdjust == PCAdjust)
         return i;
     }
@@ -65,31 +62,28 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
   return -1;
 }
 
+ARMConstantPoolValue::~ARMConstantPoolValue() {
+  free((void*)S);
+}
+
 void
 ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
   ID.AddPointer(GV);
   ID.AddPointer(S);
   ID.AddInteger(LabelId);
-  ID.AddInteger((unsigned)Kind);
   ID.AddInteger(PCAdjust);
 }
 
 void ARMConstantPoolValue::dump() const {
-  cerr << "  " << *this;
+  errs() << "  " << *this;
 }
 
-void ARMConstantPoolValue::print(std::ostream &O) const {
-  raw_os_ostream RawOS(O);
-  print(RawOS);
-}
 
 void ARMConstantPoolValue::print(raw_ostream &O) const {
   if (GV)
     O << GV->getName();
   else
     O << S;
-  if (isNonLazyPointer()) O << "$non_lazy_ptr";
-  else if (isStub()) O << "$stub";
   if (Modifier) O << "(" << Modifier << ")";
   if (PCAdjust != 0) {
     O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index d2b9066dcc97..00c48086aef6 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,17 +15,16 @@
 #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
 
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include <iosfwd>
 
 namespace llvm {
 
 class GlobalValue;
+class LLVMContext;
 
 namespace ARMCP {
   enum ARMCPKind {
     CPValue,
-    CPNonLazyPtr,
-    CPStub
+    CPLSDA
   };
 }
 
@@ -36,7 +35,7 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
   GlobalValue *GV;         // GlobalValue being loaded.
   const char *S;           // ExtSymbol being loaded.
   unsigned LabelId;        // Label id of the load.
-  ARMCP::ARMCPKind Kind;   // non_lazy_ptr or stub?
+  ARMCP::ARMCPKind Kind;   // Value or LSDA?
   unsigned char PCAdjust;  // Extra adjustment if constantpool is pc relative.
                            // 8 for ARM, 4 for Thumb.
   const char *Modifier;    // GV modifier i.e. (&GV(modifier)-(LPIC+8))
@@ -47,12 +46,12 @@ public:
                        ARMCP::ARMCPKind Kind = ARMCP::CPValue,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
-  ARMConstantPoolValue(const char *s, unsigned id,
-                       ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+  ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
-  ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind,
-                       const char *Modifier);
+  ARMConstantPoolValue(GlobalValue *GV, const char *Modifier);
+  ARMConstantPoolValue();
+  ~ARMConstantPoolValue();
 
 
   GlobalValue *getGV() const { return GV; }
@@ -61,27 +60,27 @@ public:
   bool hasModifier() const { return Modifier != NULL; }
   bool mustAddCurrentAddress() const { return AddCurrentAddress; }
   unsigned getLabelId() const { return LabelId; }
-  bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; }
-  bool isStub() const { return Kind == ARMCP::CPStub; }
   unsigned char getPCAdjustment() const { return PCAdjust; }
+  bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+
+  virtual unsigned getRelocationInfo() const {
+    // FIXME: This is conservatively claiming that these entries require a
+    // relocation, we may be able to do better than this.
+    return 2;
+  }
+
 
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
                                         unsigned Alignment);
 
   virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
 
-  void print(std::ostream *O) const { if (O) print(*O); }
-  void print(std::ostream &O) const;
   void print(raw_ostream *O) const { if (O) print(*O); }
   void print(raw_ostream &O) const;
   void dump() const;
 };
 
-  inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) {
-  V.print(O);
-  return O;
-}
-  
+
 inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
   V.print(O);
   return O;
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
index 405b8f2b9f28..d5dae2442499 100644
--- a/lib/Target/ARM/ARMFrameInfo.h
+++ b/lib/Target/ARM/ARMFrameInfo.h
@@ -15,15 +15,15 @@
 #define ARM_FRAMEINFO_H
 
 #include "ARM.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameInfo.h"
 
 namespace llvm {
 
 class ARMFrameInfo : public TargetFrameInfo {
 public:
   explicit ARMFrameInfo(const ARMSubtarget &ST)
-    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
+    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0, 4) {
   }
 };
 
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6485fc1d3600..bebf4e839994 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -21,6 +21,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -30,10 +31,10 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-using namespace llvm;
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
-static const unsigned arm_dsubreg_0 = 5;
-static const unsigned arm_dsubreg_1 = 6;
+using namespace llvm;
 
 //===--------------------------------------------------------------------===//
 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
@@ -48,8 +49,9 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
   const ARMSubtarget *Subtarget;
 
 public:
-  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm)
-    : SelectionDAGISel(tm), TM(tm),
+  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+                           CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(tm, OptLevel), TM(tm),
     Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
   }
 
@@ -57,7 +59,8 @@ public:
     return "ARM Instruction Selection";
   }
 
- /// getI32Imm - Return a target constant with the specified value, of type i32.
+  /// getI32Imm - Return a target constant of type i32 with the specified
+  /// value.
   inline SDValue getI32Imm(unsigned Imm) {
     return CurDAG->getTargetConstant(Imm, MVT::i32);
   }
@@ -74,6 +77,8 @@ public:
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode3Offset(SDValue Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode4(SDValue Op, SDValue N, SDValue &Addr,
+                       SDValue &Mode);
   bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
   bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
@@ -118,15 +123,63 @@ private:
   SDNode *SelectARMIndexedLoad(SDValue Op);
   SDNode *SelectT2IndexedLoad(SDValue Op);
 
+  /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
+  SDNode *SelectDYN_ALLOC(SDValue Op);
+
+  /// SelectVLD - Select NEON load intrinsics.  NumVecs should
+  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// loads of D registers and even subregs and odd subregs of Q registers.
+  /// For NumVecs == 2, QOpcodes1 is not used.
+  SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
+                    unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
+  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// load/store of D registers and even subregs and odd subregs of Q registers.
+  SDNode *SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs,
+                          unsigned *DOpcodes, unsigned *QOpcodes0,
+                          unsigned *QOpcodes1);
+
+  /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+  SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc);
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                             char ConstraintCode,
                                             std::vector<SDValue> &OutOps);
+
+  /// PairDRegs - Insert a pair of double registers into an implicit def to
+  /// form a quad register.
+  SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
 };
 }
 
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+  return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+  return N->getOpcode() == Opc &&
+         isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+
 void ARMDAGToDAGISel::InstructionSelect() {
   DEBUG(BB->dump());
 
@@ -144,7 +197,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
   // Don't match base register only case. That is matched to a separate
   // lower complexity pattern with explicit register operand.
   if (ShOpcVal == ARM_AM::no_shift) return false;
-  
+
   BaseReg = N.getOperand(0);
   unsigned ShImmVal = 0;
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -198,7 +251,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
                                     MVT::i32);
     return true;
   }
-  
+
   // Match simple R +/- imm12 operands.
   if (N.getOpcode() == ISD::ADD)
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -223,15 +276,15 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
         return true;
       }
     }
-  
+
   // Otherwise this is R +/- [possibly shifted] R
   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
   unsigned ShAmt = 0;
-  
+
   Base   = N.getOperand(0);
   Offset = N.getOperand(1);
-  
+
   if (ShOpcVal != ARM_AM::no_shift) {
     // Check to see if the RHS of the shift is a constant, if not, we can't fold
     // it.
@@ -243,7 +296,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
       ShOpcVal = ARM_AM::no_shift;
     }
   }
-  
+
   // Try matching (R shl C) + (R).
   if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
@@ -260,7 +313,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
       }
     }
   }
-  
+
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
                                   MVT::i32);
   return true;
@@ -315,7 +368,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
     return true;
   }
-  
+
   if (N.getOpcode() != ISD::ADD) {
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
@@ -326,7 +379,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
     return true;
   }
-  
+
   // If the RHS is +/- imm8, fold into addr mode.
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     int RHSC = (int)RHS->getZExtValue();
@@ -348,7 +401,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
       return true;
     }
   }
-  
+
   Base = N.getOperand(0);
   Offset = N.getOperand(1);
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
@@ -377,6 +430,12 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectAddrMode4(SDValue Op, SDValue N,
+                                      SDValue &Addr, SDValue &Mode) {
+  Addr = N;
+  Mode = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
 
 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
                                       SDValue &Base, SDValue &Offset) {
@@ -392,7 +451,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
                                        MVT::i32);
     return true;
   }
-  
+
   // If the RHS is +/- imm8, fold into addr mode.
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     int RHSC = (int)RHS->getZExtValue();
@@ -417,7 +476,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
       }
     }
   }
-  
+
   Base = N;
   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
                                      MVT::i32);
@@ -428,14 +487,14 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
                                       SDValue &Addr, SDValue &Update,
                                       SDValue &Opc) {
   Addr = N;
-  // The optional writeback is handled in ARMLoadStoreOpt.
+  // Default to no writeback.
   Update = CurDAG->getRegister(0, MVT::i32);
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
   return true;
 }
 
 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
-                                        SDValue &Offset, SDValue &Label) {
+                                       SDValue &Offset, SDValue &Label) {
   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
     Offset = N.getOperand(0);
     SDValue N1 = N.getOperand(1);
@@ -451,13 +510,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N,
   // FIXME dl should come from the parent load or store, not the address
   DebugLoc dl = Op.getDebugLoc();
   if (N.getOpcode() != ISD::ADD) {
-    Base = N;
-    // We must materialize a zero in a reg! Returning a constant here
-    // wouldn't work without additional code to position the node within
-    // ISel's topological ordering in a place where ISel will process it
-    // normally.  Instead, just explicitly issue a tMOVri8 node!
-    Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32,
-                                    CurDAG->getTargetConstant(0, MVT::i32)), 0);
+    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+    if (!NC || NC->getZExtValue() != 0)
+      return false;
+
+    Base = Offset = N;
     return true;
   }
 
@@ -567,7 +624,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
       }
     }
   }
-  
+
   return false;
 }
 
@@ -594,41 +651,70 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
                                             SDValue &Base, SDValue &OffImm) {
   // Match simple R + imm12 operands.
-  if (N.getOpcode() != ISD::ADD)
-    return false;
+
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+    if (N.getOpcode() == ISD::FrameIndex) {
+      // Match frame index...
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      return true;
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::TargetConstantPool)
+        return false;  // We want to select t2LDRpci instead.
+    } else
+      Base = N;
+    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
 
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    if (SelectT2AddrModeImm8(Op, N, Base, OffImm))
+      // Let t2LDRi8 handle (R - imm8).
+      return false;
+
     int RHSC = (int)RHS->getZExtValue();
-    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits.
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
       Base   = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
       return true;
     }
   }
 
-  return false;
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
 }
 
 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N,
                                            SDValue &Base, SDValue &OffImm) {
-  if (N.getOpcode() == ISD::ADD) {
+  // Match simple R - imm8 operands.
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getZExtValue();
-      if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
-        Base   = N.getOperand(0);
+      int RHSC = (int)RHS->getSExtValue();
+      if (N.getOpcode() == ISD::SUB)
+        RHSC = -RHSC;
+
+      if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+        Base = N.getOperand(0);
+        if (Base.getOpcode() == ISD::FrameIndex) {
+          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+          Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+        }
         OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
         return true;
       }
     }
-  } else if (N.getOpcode() == ISD::SUB) {
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getZExtValue();
-      if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
-        Base   = N.getOperand(0);
-        OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
-        return true;
-      }
-    }
   }
 
   return false;
@@ -643,7 +729,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) {
     int RHSC = (int)RHS->getZExtValue();
     if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
-      OffImm = (AM == ISD::PRE_INC)
+      OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
         ? CurDAG->getTargetConstant(RHSC, MVT::i32)
         : CurDAG->getTargetConstant(-RHSC, MVT::i32);
       return true;
@@ -658,7 +744,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N,
   if (N.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
       int RHSC = (int)RHS->getZExtValue();
-      if (((RHSC & 0x3) == 0) && (RHSC < 0 && RHSC > -0x400)) { // 8 bits.
+      if (((RHSC & 0x3) == 0) &&
+          ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits.
         Base   = N.getOperand(0);
         OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
         return true;
@@ -681,20 +768,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N,
 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N,
                                             SDValue &Base,
                                             SDValue &OffReg, SDValue &ShImm) {
-  // Base only.
-  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
-    Base = N;
-    if (N.getOpcode() == ISD::FrameIndex) {
-      int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
-      Base = N.getOperand(0);
-      if (Base.getOpcode() == ISD::TargetConstantPool)
-        return false;  // We want to select t2LDRpci instead.
-    }
-    OffReg = CurDAG->getRegister(0, MVT::i32);
-    ShImm  = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
+  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+  if (N.getOpcode() != ISD::ADD)
+    return false;
+
+  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+      return false;
+    else if (RHSC < 0 && RHSC >= -255) // 8 bits
+      return false;
   }
 
   // Look for (R + R) or (R + (R << [1,2,3])).
@@ -708,8 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N,
     ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
     if (ShOpcVal == ARM_AM::lsl)
       std::swap(Base, OffReg);
-  }  
-  
+  }
+
   if (ShOpcVal == ARM_AM::lsl) {
     // Check to see if the RHS of the shift is a constant, if not, we can't fold
     // it.
@@ -723,11 +807,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N,
     } else {
       ShOpcVal = ARM_AM::no_shift;
     }
-  } else if (SelectT2AddrModeImm12(Op, N, Base, ShImm) ||
-             SelectT2AddrModeImm8 (Op, N, Base, ShImm))
-    // Don't match if it's possible to match to one of the r +/- imm cases.
-    return false;
-  
+  }
+
   ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
 
   return true;
@@ -746,7 +827,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
   if (AM == ISD::UNINDEXED)
     return NULL;
 
-  MVT LoadedVT = LD->getMemoryVT();
+  EVT LoadedVT = LD->getMemoryVT();
   SDValue Offset, AMOpc;
   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   unsigned Opcode = 0;
@@ -780,8 +861,8 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
     SDValue Base = LD->getBasePtr();
     SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
-    return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
-                                 MVT::Other, Ops, 6);
+    return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+                                  MVT::Other, Ops, 6);
   }
 
   return NULL;
@@ -793,14 +874,14 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
   if (AM == ISD::UNINDEXED)
     return NULL;
 
-  MVT LoadedVT = LD->getMemoryVT();
+  EVT LoadedVT = LD->getMemoryVT();
   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
   SDValue Offset;
   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   unsigned Opcode = 0;
   bool Match = false;
   if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) {
-    switch (LoadedVT.getSimpleVT()) {
+    switch (LoadedVT.getSimpleVT().SimpleTy) {
     case MVT::i32:
       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
       break;
@@ -828,13 +909,300 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
     SDValue Base = LD->getBasePtr();
     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
-    return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
-                                 MVT::Other, Ops, 5);
+    return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+                                  MVT::Other, Ops, 5);
+  }
+
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) {
+  SDNode *N = Op.getNode();
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  SDValue Align = Op.getOperand(2);
+  SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
+  int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
+  if (AlignVal < 0)
+    // We need to align the stack. Use Thumb1 tAND which is the only thumb
+    // instruction that can read and write SP. This matches to a pseudo
+    // instruction that has a chain to ensure the result is written back to
+    // the stack pointer.
+    SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0);
+
+  bool isC = isa<ConstantSDNode>(Size);
+  uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
+  // Handle the most common case for both Thumb1 and Thumb2:
+  // tSUBspi - immediate is between 0 ... 508 inclusive.
+  if (C <= 508 && ((C & 3) == 0))
+    // FIXME: tSUBspi encode scale 4 implicitly.
+    return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
+                                CurDAG->getTargetConstant(C/4, MVT::i32),
+                                Chain);
+
+  if (Subtarget->isThumb1Only()) {
+    // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
+    // should have negated the size operand already. FIXME: We can't insert
+    // new target independent node at this stage so we are forced to negate
+    // it earlier. Is there a better solution?
+    return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
+                                Chain);
+  } else if (Subtarget->isThumb2()) {
+    if (isC && Predicate_t2_so_imm(Size.getNode())) {
+      // t2SUBrSPi
+      SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
+      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
+    } else if (isC && Predicate_imm0_4095(Size.getNode())) {
+      // t2SUBrSPi12
+      SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
+      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
+    } else {
+      // t2SUBrSPs
+      SDValue Ops[] = { SP, Size,
+                        getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
+      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
+    }
+  }
+
+  // FIXME: Add ADD / SUB sp instructions for ARM.
+  return 0;
+}
+
+/// PairDRegs - Insert a pair of double registers into an implicit def to
+/// form a quad register.
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue Undef =
+    SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
+  SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+                                        VT, Undef, V0, SubReg0);
+  return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+                                VT, SDValue(Pair, 0), V1, SubReg1);
+}
+
+/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
+/// for a 64-bit subregister of the vector.
+static EVT GetNEONSubregVT(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled NEON type");
+  case MVT::v16i8: return MVT::v8i8;
+  case MVT::v8i16: return MVT::v4i16;
+  case MVT::v4f32: return MVT::v2f32;
+  case MVT::v4i32: return MVT::v2i32;
+  case MVT::v2i64: return MVT::v1i64;
+  }
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
+                                   unsigned *DOpcodes, unsigned *QOpcodes0,
+                                   unsigned *QOpcodes1) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+  SDNode *N = Op.getNode();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, MemUpdate, MemOpc;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  bool is64BitVector = VT.is64BitVector();
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v1i64: OpcodeIndex = 3; break;
+    // Quad-register operations:
+  case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 2; break;
+  }
+
+  if (is64BitVector) {
+    unsigned Opc = DOpcodes[OpcodeIndex];
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    std::vector<EVT> ResTys(NumVecs, VT);
+    ResTys.push_back(MVT::Other);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+  }
+
+  EVT RegVT = GetNEONSubregVT(VT);
+  if (NumVecs == 2) {
+    // Quad registers are directly supported for VLD2,
+    // loading 2 pairs of D regs.
+    unsigned Opc = QOpcodes0[OpcodeIndex];
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    std::vector<EVT> ResTys(4, VT);
+    ResTys.push_back(MVT::Other);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+    Chain = SDValue(VLd, 4);
+
+    // Combine the even and odd subregs to produce the result.
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+      SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
+      ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+    }
+  } else {
+    // Otherwise, quad registers are loaded with two separate instructions,
+    // where one loads the even registers and the other loads the odd registers.
+
+    // Enable writeback to the address register.
+    MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+
+    std::vector<EVT> ResTys(NumVecs, RegVT);
+    ResTys.push_back(MemAddr.getValueType());
+    ResTys.push_back(MVT::Other);
+
+    // Load the even subreg.
+    unsigned Opc = QOpcodes0[OpcodeIndex];
+    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
+    Chain = SDValue(VLdA, NumVecs+1);
+
+    // Load the odd subreg.
+    Opc = QOpcodes1[OpcodeIndex];
+    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
+    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
+    Chain = SDValue(VLdB, NumVecs+1);
+
+    // Combine the even and odd subregs to produce the result.
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+      SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
+      ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+    }
+  }
+  ReplaceUses(SDValue(N, NumVecs), Chain);
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
+                                         unsigned NumVecs, unsigned *DOpcodes,
+                                         unsigned *QOpcodes0,
+                                         unsigned *QOpcodes1) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+  SDNode *N = Op.getNode();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, MemUpdate, MemOpc;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  unsigned Lane =
+    cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue();
+  EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+
+  // Quad registers are handled by load/store of subregs. Find the subreg info.
+  unsigned NumElts = 0;
+  int SubregIdx = 0;
+  EVT RegVT = VT;
+  if (!is64BitVector) {
+    RegVT = GetNEONSubregVT(VT);
+    NumElts = RegVT.getVectorNumElements();
+    SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
+  }
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld/vst lane type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+    // Quad-register operations:
+  case MVT::v8i16: OpcodeIndex = 0; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 1; break;
+  }
+
+  SmallVector<SDValue, 9> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(MemUpdate);
+  Ops.push_back(MemOpc);
+
+  unsigned Opc = 0;
+  if (is64BitVector) {
+    Opc = DOpcodes[OpcodeIndex];
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops.push_back(N->getOperand(Vec+3));
+  } else {
+    // Check if this is loading the even or odd subreg of a Q register.
+    if (Lane < NumElts) {
+      Opc = QOpcodes0[OpcodeIndex];
+    } else {
+      Lane -= NumElts;
+      Opc = QOpcodes1[OpcodeIndex];
+    }
+    // Extract the subregs of the input vector.
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
+                                                   N->getOperand(Vec+3)));
+  }
+  Ops.push_back(getI32Imm(Lane));
+  Ops.push_back(Chain);
+
+  if (!IsLoad)
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+
+  std::vector<EVT> ResTys(NumVecs, RegVT);
+  ResTys.push_back(MVT::Other);
+  SDNode *VLdLn =
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5);
+  // For a 64-bit vector load to D registers, nothing more needs to be done.
+  if (is64BitVector)
+    return VLdLn;
+
+  // For 128-bit vectors, take the 64-bit results of the load and insert them
+  // as subregs into the result.
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+    SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT,
+                                                    N->getOperand(Vec+3),
+                                                    SDValue(VLdLn, Vec));
+    ReplaceUses(SDValue(N, Vec), QuadVec);
   }
 
+  Chain = SDValue(VLdLn, NumVecs);
+  ReplaceUses(SDValue(N, NumVecs), Chain);
   return NULL;
 }
 
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
+                                                     unsigned Opc) {
+  if (!Subtarget->hasV6T2Ops())
+    return NULL;
+
+  unsigned Shl_imm = 0;
+  if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){
+    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+    unsigned Srl_imm = 0;
+    if (isInt32Immediate(Op.getOperand(1), Srl_imm)) {
+      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+      unsigned Width = 32 - Srl_imm;
+      int LSB = Srl_imm - Shl_imm;
+      if ((LSB + Width) > 32)
+        return NULL;
+      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+      SDValue Ops[] = { Op.getOperand(0).getOperand(0),
+                        CurDAG->getTargetConstant(LSB, MVT::i32),
+                        CurDAG->getTargetConstant(Width, MVT::i32),
+                        getAL(CurDAG), Reg0 };
+      return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32, Ops, 5);
+    }
+  }
+  return NULL;
+}
 
 SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   SDNode *N = Op.getNode();
@@ -848,44 +1216,50 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   case ISD::Constant: {
     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
     bool UseCP = true;
-    if (Subtarget->isThumb()) {
-      if (Subtarget->hasThumb2())
-        // Thumb2 has the MOVT instruction, so all immediates can
-        // be done with MOV + MOVT, at worst.
-        UseCP = 0;
-      else
+    if (Subtarget->hasThumb2())
+      // Thumb2-aware targets have the MOVT instruction, so all immediates can
+      // be done with MOV + MOVT, at worst.
+      UseCP = 0;
+    else {
+      if (Subtarget->isThumb()) {
         UseCP = (Val > 255 &&                          // MOV
                  ~Val > 255 &&                         // MOV + MVN
                  !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
-    } else
-      UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
-               ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
-               !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
+      } else
+        UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
+                 ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
+                 !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
+    }
+
     if (UseCP) {
       SDValue CPIdx =
-        CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+        CurDAG->getTargetConstantPool(ConstantInt::get(
+                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
                                       TLI.getPointerTy());
 
       SDNode *ResNode;
-      if (Subtarget->isThumb())
-        ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
-                                        CPIdx, CurDAG->getEntryNode());
-      else {
+      if (Subtarget->isThumb1Only()) {
+        SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32);
+        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+        ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
+                                         Ops, 4);
+      } else {
         SDValue Ops[] = {
-          CPIdx, 
+          CPIdx,
           CurDAG->getRegister(0, MVT::i32),
           CurDAG->getTargetConstant(0, MVT::i32),
           getAL(CurDAG),
           CurDAG->getRegister(0, MVT::i32),
           CurDAG->getEntryNode()
         };
-        ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
-                                      Ops, 6);
+        ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+                                       Ops, 6);
       }
       ReplaceUses(Op, SDValue(ResNode, 0));
       return NULL;
     }
-      
+
     // Other cases are autogenerated.
     break;
   }
@@ -893,80 +1267,106 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    if (Subtarget->isThumb()) {
+    if (Subtarget->isThumb1Only()) {
       return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
                                   CurDAG->getTargetConstant(0, MVT::i32));
     } else {
+      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+                      ARM::t2ADDri : ARM::ADDri);
       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
-                          getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
-                          CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5);
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
     }
   }
-  case ISD::ADD: {
-    if (!Subtarget->isThumb())
-      break;
-    // Select add sp, c to tADDhirr.
-    SDValue N0 = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
-    RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
-    if (LHSR && LHSR->getReg() == ARM::SP) {
-      std::swap(N0, N1);
-      std::swap(LHSR, RHSR);
-    }
-    if (RHSR && RHSR->getReg() == ARM::SP) {
-      SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl,
-                                  Op.getValueType(), N0, N0), 0);
-      return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
-    }
+  case ARMISD::DYN_ALLOC:
+    return SelectDYN_ALLOC(Op);
+  case ISD::SRL:
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(Op,
+                      Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX))
+      return I;
+    break;
+  case ISD::SRA:
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(Op,
+                      Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX))
+      return I;
     break;
-  }
   case ISD::MUL:
-    if (Subtarget->isThumb())
+    if (Subtarget->isThumb1Only())
       break;
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       unsigned RHSV = C->getZExtValue();
       if (!RHSV) break;
       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
+        unsigned ShImm = Log2_32(RHSV-1);
+        if (ShImm >= 32)
+          break;
         SDValue V = Op.getOperand(0);
-        unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
-        SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
-                            CurDAG->getTargetConstant(ShImm, MVT::i32),
-                            getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
-                            CurDAG->getRegister(0, MVT::i32) };
-        return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        if (Subtarget->isThumb()) {
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+        } else {
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+        }
       }
       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
+        unsigned ShImm = Log2_32(RHSV+1);
+        if (ShImm >= 32)
+          break;
         SDValue V = Op.getOperand(0);
-        unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
-        SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
-                            CurDAG->getTargetConstant(ShImm, MVT::i32),
-                            getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
-                            CurDAG->getRegister(0, MVT::i32) };
-        return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        if (Subtarget->isThumb()) {
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5);
+        } else {
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+        }
       }
     }
     break;
   case ARMISD::FMRRD:
-    return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
-                                 Op.getOperand(0), getAL(CurDAG),
-                                 CurDAG->getRegister(0, MVT::i32));
+    return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+                                  Op.getOperand(0), getAL(CurDAG),
+                                  CurDAG->getRegister(0, MVT::i32));
   case ISD::UMUL_LOHI: {
-    SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+    if (Subtarget->isThumb1Only())
+      break;
+    if (Subtarget->isThumb()) {
+      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
+    } else {
+      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
-    return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+    }
   }
   case ISD::SMUL_LOHI: {
-    SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+    if (Subtarget->isThumb1Only())
+      break;
+    if (Subtarget->isThumb()) {
+      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
+    } else {
+      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
-    return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+    }
   }
   case ISD::LOAD: {
     SDNode *ResNode = 0;
-    if (Subtarget->isThumb2())
+    if (Subtarget->isThumb() && Subtarget->hasThumb2())
       ResNode = SelectT2IndexedLoad(Op);
     else
       ResNode = SelectARMIndexedLoad(Op);
@@ -988,7 +1388,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
     // Pattern complexity = 6  cost = 1  size = 0
 
-    unsigned Opc = Subtarget->isThumb() ? 
+    unsigned Opc = Subtarget->isThumb() ?
       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
     SDValue Chain = Op.getOperand(0);
     SDValue N1 = Op.getOperand(1);
@@ -1003,8 +1403,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                                cast<ConstantSDNode>(N2)->getZExtValue()),
                                MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
-    SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other, 
-                                            MVT::Flag, Ops, 5);
+    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+                                             MVT::Flag, Ops, 5);
     Chain = SDValue(ResNode, 0);
     if (Op.getNode()->getNumValues() == 2) {
       InFlag = SDValue(ResNode, 1);
@@ -1014,8 +1414,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     return NULL;
   }
   case ARMISD::CMOV: {
-    bool isThumb = Subtarget->isThumb();
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
     SDValue N0 = Op.getOperand(0);
     SDValue N1 = Op.getOperand(1);
     SDValue N2 = Op.getOperand(2);
@@ -1024,39 +1423,79 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     assert(N2.getOpcode() == ISD::Constant);
     assert(N3.getOpcode() == ISD::Register);
 
-    // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
-    // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
-    // Pattern complexity = 18  cost = 1  size = 0
-    SDValue CPTmp0;
-    SDValue CPTmp1;
-    SDValue CPTmp2;
-    if (!isThumb && VT == MVT::i32 &&
-        SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
-      SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N2)->getZExtValue()),
-                               MVT::i32);
-      SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
-      return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
-    }
+    if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+      // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+      // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+      // Pattern complexity = 18  cost = 1  size = 0
+      SDValue CPTmp0;
+      SDValue CPTmp1;
+      SDValue CPTmp2;
+      if (Subtarget->isThumb()) {
+        if (SelectT2ShifterOperandReg(Op, N1, CPTmp0, CPTmp1)) {
+          unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+          unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+          unsigned Opc = 0;
+          switch (SOShOp) {
+          case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+          case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+          case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+          case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+          default:
+            llvm_unreachable("Unknown so_reg opcode!");
+            break;
+          }
+          SDValue SOShImm =
+            CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+          SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                                   cast<ConstantSDNode>(N2)->getZExtValue()),
+                                   MVT::i32);
+          SDValue Ops[] = { N0, CPTmp0, SOShImm, Tmp2, N3, InFlag };
+          return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6);
+        }
+      } else {
+        if (SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
+          SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                                   cast<ConstantSDNode>(N2)->getZExtValue()),
+                                   MVT::i32);
+          SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
+          return CurDAG->SelectNodeTo(Op.getNode(),
+                                      ARM::MOVCCs, MVT::i32, Ops, 7);
+        }
+      }
 
-    // Pattern: (ARMcmov:i32 GPR:i32:$false,
-    //             (imm:i32)<<P:Predicate_so_imm>><<X:so_imm_XFORM>>:$true,
-    //             (imm:i32):$cc)
-    // Emits: (MOVCCi:i32 GPR:i32:$false,
-    //           (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc)
-    // Pattern complexity = 10  cost = 1  size = 0
-    if (VT == MVT::i32 &&
-        N3.getOpcode() == ISD::Constant &&
-        Predicate_so_imm(N3.getNode())) {
-      SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N1)->getZExtValue()),
-                               MVT::i32);
-      Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode());
-      SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N2)->getZExtValue()),
-                               MVT::i32);
-      SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
-      return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5);
+      // Pattern: (ARMcmov:i32 GPR:i32:$false,
+      //             (imm:i32)<<P:Predicate_so_imm>>:$true,
+      //             (imm:i32):$cc)
+      // Emits: (MOVCCi:i32 GPR:i32:$false,
+      //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+      // Pattern complexity = 10  cost = 1  size = 0
+      if (N3.getOpcode() == ISD::Constant) {
+        if (Subtarget->isThumb()) {
+          if (Predicate_t2_so_imm(N3.getNode())) {
+            SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
+                                     cast<ConstantSDNode>(N1)->getZExtValue()),
+                                     MVT::i32);
+            SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                                     cast<ConstantSDNode>(N2)->getZExtValue()),
+                                     MVT::i32);
+            SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
+            return CurDAG->SelectNodeTo(Op.getNode(),
+                                        ARM::t2MOVCCi, MVT::i32, Ops, 5);
+          }
+        } else {
+          if (Predicate_so_imm(N3.getNode())) {
+            SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
+                                     cast<ConstantSDNode>(N1)->getZExtValue()),
+                                     MVT::i32);
+            SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                                     cast<ConstantSDNode>(N2)->getZExtValue()),
+                                     MVT::i32);
+            SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
+            return CurDAG->SelectNodeTo(Op.getNode(),
+                                        ARM::MOVCCi, MVT::i32, Ops, 5);
+          }
+        }
+      }
     }
 
     // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
@@ -1073,23 +1512,25 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                                MVT::i32);
     SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
     unsigned Opc = 0;
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
     default: assert(false && "Illegal conditional move type!");
       break;
     case MVT::i32:
-      Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr;
+      Opc = Subtarget->isThumb()
+        ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+        : ARM::MOVCCr;
       break;
     case MVT::f32:
       Opc = ARM::FCPYScc;
       break;
     case MVT::f64:
       Opc = ARM::FCPYDcc;
-      break; 
+      break;
     }
     return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
   }
   case ARMISD::CNEG: {
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
     SDValue N0 = Op.getOperand(0);
     SDValue N1 = Op.getOperand(1);
     SDValue N2 = Op.getOperand(2);
@@ -1103,7 +1544,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                                MVT::i32);
     SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
     unsigned Opc = 0;
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
     default: assert(false && "Illegal conditional move type!");
       break;
     case MVT::f32:
@@ -1116,104 +1557,308 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
   }
 
-  case ISD::DECLARE: {
-    SDValue Chain = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    SDValue N2 = Op.getOperand(2);
-    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
-    // FIXME: handle VLAs.
-    if (!FINode) {
-      ReplaceUses(Op.getValue(0), Chain);
-      return NULL;
+  case ARMISD::VZIP: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
+    case MVT::v4i16: Opc = ARM::VZIPd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VZIPd32; break;
+    case MVT::v16i8: Opc = ARM::VZIPq8; break;
+    case MVT::v8i16: Opc = ARM::VZIPq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VZIPq32; break;
     }
-    if (N2.getOpcode() == ARMISD::PIC_ADD && isa<LoadSDNode>(N2.getOperand(0)))
-      N2 = N2.getOperand(0);
-    LoadSDNode *Ld = dyn_cast<LoadSDNode>(N2);
-    if (!Ld) {
-      ReplaceUses(Op.getValue(0), Chain);
-      return NULL;
+    return CurDAG->getMachineNode(Opc, dl, VT, VT,
+                                  N->getOperand(0), N->getOperand(1));
+  }
+  case ARMISD::VUZP: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
+    case MVT::v4i16: Opc = ARM::VUZPd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VUZPd32; break;
+    case MVT::v16i8: Opc = ARM::VUZPq8; break;
+    case MVT::v8i16: Opc = ARM::VUZPq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VUZPq32; break;
     }
-    SDValue BasePtr = Ld->getBasePtr();
-    assert(BasePtr.getOpcode() == ARMISD::Wrapper &&
-           isa<ConstantPoolSDNode>(BasePtr.getOperand(0)) &&
-           "llvm.dbg.variable should be a constantpool node");
-    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(BasePtr.getOperand(0));
-    GlobalValue *GV = 0;
-    if (CP->isMachineConstantPoolEntry()) {
-      ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal();
-      GV = ACPV->getGV();
-    } else
-      GV = dyn_cast<GlobalValue>(CP->getConstVal());
-    if (!GV) {
-      ReplaceUses(Op.getValue(0), Chain);
-      return NULL;
+    return CurDAG->getMachineNode(Opc, dl, VT, VT,
+                                  N->getOperand(0), N->getOperand(1));
+  }
+  case ARMISD::VTRN: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
+    case MVT::v4i16: Opc = ARM::VTRNd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VTRNd32; break;
+    case MVT::v16i8: Opc = ARM::VTRNq8; break;
+    case MVT::v8i16: Opc = ARM::VTRNq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VTRNq32; break;
     }
-    
-    SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
-                                               TLI.getPointerTy());
-    SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
-    SDValue Ops[] = { Tmp1, Tmp2, Chain };
-    return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
-                                 MVT::Other, Ops, 3);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT,
+                                  N->getOperand(0), N->getOperand(1));
   }
 
-  case ISD::CONCAT_VECTORS: {
-    MVT VT = Op.getValueType();
-    assert(VT.is128BitVector() && Op.getNumOperands() == 2 &&
-           "unexpected CONCAT_VECTORS");
-    SDValue N0 = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    SDNode *Result =
-      CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT);
-    if (N0.getOpcode() != ISD::UNDEF)
-      Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT,
-                                     SDValue(Result, 0), N0,
-                                     CurDAG->getTargetConstant(arm_dsubreg_0,
-                                                               MVT::i32));
-    if (N1.getOpcode() != ISD::UNDEF)
-      Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT,
-                                     SDValue(Result, 0), N1,
-                                     CurDAG->getTargetConstant(arm_dsubreg_1,
-                                                               MVT::i32));
-    return Result;
-  }
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    EVT VT = N->getValueType(0);
+    unsigned Opc = 0;
+
+    switch (IntNo) {
+    default:
+      break;
+
+    case Intrinsic::arm_neon_vld2: {
+      unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+                              ARM::VLD2d32, ARM::VLD2d64 };
+      unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+      return SelectVLD(Op, 2, DOpcodes, QOpcodes, 0);
+    }
 
-  case ISD::VECTOR_SHUFFLE: {
-    MVT VT = Op.getValueType();
-
-    // Match 128-bit splat to VDUPLANEQ.  (This could be done with a Pat in
-    // ARMInstrNEON.td but it is awkward because the shuffle mask needs to be
-    // transformed first into a lane number and then to both a subregister
-    // index and an adjusted lane number.)  If the source operand is a
-    // SCALAR_TO_VECTOR, leave it so it will be matched later as a VDUP.
-    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-    if (VT.is128BitVector() && SVOp->isSplat() &&
-        Op.getOperand(0).getOpcode() != ISD::SCALAR_TO_VECTOR &&
-        Op.getOperand(1).getOpcode() == ISD::UNDEF) {
-      unsigned LaneVal = SVOp->getSplatIndex();
-
-      MVT HalfVT;
-      unsigned Opc = 0;
-      switch (VT.getVectorElementType().getSimpleVT()) {
-      default: assert(false && "unhandled VDUP splat type");
-      case MVT::i8:  Opc = ARM::VDUPLN8q;  HalfVT = MVT::v8i8; break;
-      case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break;
-      case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break;
-      case MVT::f32: Opc = ARM::VDUPLNfq;  HalfVT = MVT::v2f32; break;
+    case Intrinsic::arm_neon_vld3: {
+      unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
+                              ARM::VLD3d32, ARM::VLD3d64 };
+      unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
+      unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
+      return SelectVLD(Op, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld4: {
+      unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
+                              ARM::VLD4d32, ARM::VLD4d64 };
+      unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
+      unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
+      return SelectVLD(Op, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld2lane: {
+      unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
+      unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
+      unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
+      return SelectVLDSTLane(Op, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld3lane: {
+      unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
+      unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
+      unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
+      return SelectVLDSTLane(Op, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld4lane: {
+      unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
+      unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
+      unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
+      return SelectVLDSTLane(Op, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst2: {
+      SDValue MemAddr, MemUpdate, MemOpc;
+      if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+        return NULL;
+      SDValue Chain = N->getOperand(0);
+      VT = N->getOperand(3).getValueType();
+      if (VT.is64BitVector()) {
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: llvm_unreachable("unhandled vst2 type");
+        case MVT::v8i8:  Opc = ARM::VST2d8; break;
+        case MVT::v4i16: Opc = ARM::VST2d16; break;
+        case MVT::v2f32:
+        case MVT::v2i32: Opc = ARM::VST2d32; break;
+        case MVT::v1i64: Opc = ARM::VST2d64; break;
+        }
+        const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+                                N->getOperand(3), N->getOperand(4), Chain };
+        return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6);
+      }
+      // Quad registers are stored as pairs of double registers.
+      EVT RegVT;
+      switch (VT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("unhandled vst2 type");
+      case MVT::v16i8: Opc = ARM::VST2q8; RegVT = MVT::v8i8; break;
+      case MVT::v8i16: Opc = ARM::VST2q16; RegVT = MVT::v4i16; break;
+      case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break;
+      case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break;
+      }
+      SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(3));
+      SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(3));
+      SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(4));
+      SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(4));
+      const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+                              D0, D1, D2, D3, Chain };
+      return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
+    }
+
+    case Intrinsic::arm_neon_vst3: {
+      SDValue MemAddr, MemUpdate, MemOpc;
+      if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+        return NULL;
+      SDValue Chain = N->getOperand(0);
+      VT = N->getOperand(3).getValueType();
+      if (VT.is64BitVector()) {
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: llvm_unreachable("unhandled vst3 type");
+        case MVT::v8i8:  Opc = ARM::VST3d8; break;
+        case MVT::v4i16: Opc = ARM::VST3d16; break;
+        case MVT::v2f32:
+        case MVT::v2i32: Opc = ARM::VST3d32; break;
+        case MVT::v1i64: Opc = ARM::VST3d64; break;
+        }
+        const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+                                N->getOperand(3), N->getOperand(4),
+                                N->getOperand(5), Chain };
+        return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7);
       }
+      // Quad registers are stored with two separate instructions, where one
+      // stores the even registers and the other stores the odd registers.
+      EVT RegVT;
+      unsigned Opc2 = 0;
+      switch (VT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("unhandled vst3 type");
+      case MVT::v16i8:
+        Opc = ARM::VST3q8a;  Opc2 = ARM::VST3q8b;  RegVT = MVT::v8i8; break;
+      case MVT::v8i16:
+        Opc = ARM::VST3q16a; Opc2 = ARM::VST3q16b; RegVT = MVT::v4i16; break;
+      case MVT::v4f32:
+        Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2f32; break;
+      case MVT::v4i32:
+        Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break;
+      }
+      // Enable writeback to the address register.
+      MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+
+      SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(3));
+      SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(4));
+      SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(5));
+      const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, D0, D2, D4, Chain };
+      SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                            MVT::Other, OpsA, 7);
+      Chain = SDValue(VStA, 1);
+
+      SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(3));
+      SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(4));
+      SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(5));
+      MemAddr = SDValue(VStA, 0);
+      const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, D1, D3, D5, Chain };
+      SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(),
+                                            MVT::Other, OpsB, 7);
+      Chain = SDValue(VStB, 1);
+      ReplaceUses(SDValue(N, 0), Chain);
+      return NULL;
+    }
 
-      // The source operand needs to be changed to a subreg of the original
-      // 128-bit operand, and the lane number needs to be adjusted accordingly.
-      unsigned NumElts = VT.getVectorNumElements() / 2;
-      unsigned SRVal = (LaneVal < NumElts ? arm_dsubreg_0 : arm_dsubreg_1);
-      SDValue SR = CurDAG->getTargetConstant(SRVal, MVT::i32);
-      SDValue NewLane = CurDAG->getTargetConstant(LaneVal % NumElts, MVT::i32);
-      SDNode *SubReg = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
-                                             dl, HalfVT, N->getOperand(0), SR);
-      return CurDAG->SelectNodeTo(N, Opc, VT, SDValue(SubReg, 0), NewLane);
+    case Intrinsic::arm_neon_vst4: {
+      SDValue MemAddr, MemUpdate, MemOpc;
+      if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+        return NULL;
+      SDValue Chain = N->getOperand(0);
+      VT = N->getOperand(3).getValueType();
+      if (VT.is64BitVector()) {
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: llvm_unreachable("unhandled vst4 type");
+        case MVT::v8i8:  Opc = ARM::VST4d8; break;
+        case MVT::v4i16: Opc = ARM::VST4d16; break;
+        case MVT::v2f32:
+        case MVT::v2i32: Opc = ARM::VST4d32; break;
+        case MVT::v1i64: Opc = ARM::VST4d64; break;
+        }
+        const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
+                                N->getOperand(3), N->getOperand(4),
+                                N->getOperand(5), N->getOperand(6), Chain };
+        return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
+      }
+      // Quad registers are stored with two separate instructions, where one
+      // stores the even registers and the other stores the odd registers.
+      EVT RegVT;
+      unsigned Opc2 = 0;
+      switch (VT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("unhandled vst4 type");
+      case MVT::v16i8:
+        Opc = ARM::VST4q8a;  Opc2 = ARM::VST4q8b;  RegVT = MVT::v8i8; break;
+      case MVT::v8i16:
+        Opc = ARM::VST4q16a; Opc2 = ARM::VST4q16b; RegVT = MVT::v4i16; break;
+      case MVT::v4f32:
+        Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2f32; break;
+      case MVT::v4i32:
+        Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break;
+      }
+      // Enable writeback to the address register.
+      MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+
+      SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(3));
+      SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(4));
+      SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(5));
+      SDValue D6 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                  N->getOperand(6));
+      const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc,
+                               D0, D2, D4, D6, Chain };
+      SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                            MVT::Other, OpsA, 8);
+      Chain = SDValue(VStA, 1);
+
+      SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(3));
+      SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(4));
+      SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(5));
+      SDValue D7 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                  N->getOperand(6));
+      MemAddr = SDValue(VStA, 0);
+      const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc,
+                               D1, D3, D5, D7, Chain };
+      SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(),
+                                            MVT::Other, OpsB, 8);
+      Chain = SDValue(VStB, 1);
+      ReplaceUses(SDValue(N, 0), Chain);
+      return NULL;
     }
 
-    break;
+    case Intrinsic::arm_neon_vst2lane: {
+      unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
+      unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
+      unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
+      return SelectVLDSTLane(Op, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst3lane: {
+      unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
+      unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
+      unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
+      return SelectVLDSTLane(Op, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst4lane: {
+      unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
+      unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
+      unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
+      return SelectVLDSTLane(Op, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+    }
   }
   }
 
@@ -1224,20 +1869,17 @@ bool ARMDAGToDAGISel::
 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
                              std::vector<SDValue> &OutOps) {
   assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
-
-  SDValue Base, Offset, Opc;
-  if (!SelectAddrMode2(Op, Op, Base, Offset, Opc))
-    return true;
-  
-  OutOps.push_back(Base);
-  OutOps.push_back(Offset);
-  OutOps.push_back(Opc);
+  // Require the address to be in a register.  That is safe for all ARM
+  // variants and it is hard to do anything much smarter without knowing
+  // how the operand is used.
+  OutOps.push_back(Op);
   return false;
 }
 
 /// createARMISelDag - This pass converts a legalized DAG into a
 /// ARM-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM) {
-  return new ARMDAGToDAGISel(TM);
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel) {
+  return new ARMDAGToDAGISel(TM, OptLevel);
 }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 41c9ecc43a9f..426cecb28eb7 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -17,9 +17,11 @@
 #include "ARMConstantPoolValue.h"
 #include "ARMISelLowering.h"
 #include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
 #include "ARMRegisterInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -36,74 +38,101 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <sstream>
 using namespace llvm;
 
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags,
                                    CCState &State);
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                     CCValAssign::LocInfo &LocInfo,
                                     ISD::ArgFlagsTy &ArgFlags,
                                     CCState &State);
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                       CCValAssign::LocInfo &LocInfo,
                                       ISD::ArgFlagsTy &ArgFlags,
                                       CCState &State);
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                        CCValAssign::LocInfo &LocInfo,
                                        ISD::ArgFlagsTy &ArgFlags,
                                        CCState &State);
 
-void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
-                                       MVT PromotedBitwiseVT) {
+void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
+                                       EVT PromotedBitwiseVT) {
   if (VT != PromotedLdStVT) {
-    setOperationAction(ISD::LOAD, VT, Promote);
-    AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
+    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
+                       PromotedLdStVT.getSimpleVT());
 
-    setOperationAction(ISD::STORE, VT, Promote);
-    AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
+    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
+                       PromotedLdStVT.getSimpleVT());
   }
 
-  MVT ElemTy = VT.getVectorElementType();
+  EVT ElemTy = VT.getVectorElementType();
   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
-    setOperationAction(ISD::VSETCC, VT, Custom);
+    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
   if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
-  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
-  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+  if (ElemTy != MVT::i32) {
+    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+  }
+  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
   if (VT.isInteger()) {
-    setOperationAction(ISD::SHL, VT, Custom);
-    setOperationAction(ISD::SRA, VT, Custom);
-    setOperationAction(ISD::SRL, VT, Custom);
+    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
   }
 
   // Promote all bit-wise operations.
   if (VT.isInteger() && VT != PromotedBitwiseVT) {
-    setOperationAction(ISD::AND, VT, Promote);
-    AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
-    setOperationAction(ISD::OR,  VT, Promote);
-    AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
-    setOperationAction(ISD::XOR, VT, Promote);
-    AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
-  }
-}
-
-void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
+    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+  }
+
+  // Neon does not support vector divide/remainder operations.
+  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
   addRegisterClass(VT, ARM::DPRRegisterClass);
   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
 }
 
-void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
+void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
   addRegisterClass(VT, ARM::QPRRegisterClass);
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
 
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+    return new TargetLoweringObjectFileMachO();
+  return new ARMElfTargetObjectFile();
+}
+
 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
-    : TargetLowering(TM), ARMPCLabelIndex(0) {
+    : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
   if (Subtarget->isTargetDarwin()) {
@@ -188,11 +217,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLibcallName(RTLIB::SRL_I128, 0);
   setLibcallName(RTLIB::SRA_I128, 0);
 
-  if (Subtarget->isThumb())
+  // Libcalls should use the AAPCS base standard ABI, even if hard float
+  // is in effect, as per the ARM RTABI specification, section 4.1.2.
+  if (Subtarget->isAAPCS_ABI()) {
+    for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+      setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
+                            CallingConv::ARM_AAPCS);
+    }
+  }
+
+  if (Subtarget->isThumb1Only())
     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
   else
     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
     addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
 
@@ -213,6 +251,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     addQRTypeForNEON(MVT::v4i32);
     addQRTypeForNEON(MVT::v2i64);
 
+    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+    // neither Neon nor VFP support any arithmetic operations on it.
+    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+    // Neon does not support some operations on v1i64 and v2i64 types.
+    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+
     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
     setTargetDAGCombine(ISD::SHL);
     setTargetDAGCombine(ISD::SRL);
@@ -246,7 +317,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
 
   // i64 operation support.
-  if (Subtarget->isThumb()) {
+  if (Subtarget->isThumb1Only()) {
     setOperationAction(ISD::MUL,     MVT::i64, Expand);
     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
     setOperationAction(ISD::MULHS,   MVT::i32, Expand);
@@ -287,7 +358,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 
-  setOperationAction(ISD::RET,           MVT::Other, Custom);
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
@@ -300,7 +370,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
+  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
+  // FIXME: Shouldn't need this, since no register is used, but the legalizer
+  // doesn't yet know how to not do that for SjLj.
+  setExceptionSelectorRegister(ARM::R0);
+  if (Subtarget->isThumb())
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  else
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
 
   if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
@@ -309,7 +386,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb())
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
     // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
 
@@ -339,7 +416,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
   }
@@ -347,7 +424,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
   // int <-> fp are custom expanded into bit_convert + ARMISD ops.
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
@@ -361,26 +438,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
   setSchedulingPreference(SchedulingForRegPressure);
-  setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
-  setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
-
-  if (!Subtarget->isThumb()) {
-    // Use branch latency information to determine if-conversion limits.
-    // FIXME: If-converter should use instruction latency of the branch being
-    // eliminated to compute the threshold. For ARMv6, the branch "latency"
-    // varies depending on whether it's dynamically or statically predicted
-    // and on whether the destination is in the prefetch buffer.
-    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-    const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
-    unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
-    if (Latency > 1) {
-      setIfCvtBlockSizeLimit(Latency-1);
-      if (Latency > 2)
-        setIfCvtDupBlockSizeLimit(Latency-2);
-    } else {
-      setIfCvtBlockSizeLimit(10);
-      setIfCvtDupBlockSizeLimit(2);
-    }
+
+  // FIXME: If-converter should use instruction latency to determine
+  // profitability rather than relying on fixed limits.
+  if (Subtarget->getCPUString() == "generic") {
+    // Generic (and overly aggressive) if-conversion limits.
+    setIfCvtBlockSizeLimit(10);
+    setIfCvtDupBlockSizeLimit(2);
+  } else if (Subtarget->hasV6Ops()) {
+    setIfCvtBlockSizeLimit(2);
+    setIfCvtDupBlockSizeLimit(1);
+  } else {
+    setIfCvtBlockSizeLimit(3);
+    setIfCvtDupBlockSizeLimit(2);
   }
 
   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
@@ -401,6 +471,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::tCALL:         return "ARMISD::tCALL";
   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
+  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
   case ARMISD::CMP:           return "ARMISD::CMP";
@@ -425,6 +496,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 
+  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
+
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
   case ARMISD::VCGE:          return "ARMISD::VCGE";
   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
@@ -453,13 +526,21 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
-  case ARMISD::VDUPLANEQ:     return "ARMISD::VDUPLANEQ";
+  case ARMISD::VDUP:          return "ARMISD::VDUP";
+  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
+  case ARMISD::VEXT:          return "ARMISD::VEXT";
+  case ARMISD::VREV64:        return "ARMISD::VREV64";
+  case ARMISD::VREV32:        return "ARMISD::VREV32";
+  case ARMISD::VREV16:        return "ARMISD::VREV16";
+  case ARMISD::VZIP:          return "ARMISD::VZIP";
+  case ARMISD::VUZP:          return "ARMISD::VUZP";
+  case ARMISD::VTRN:          return "ARMISD::VTRN";
   }
 }
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
-  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
+  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -469,7 +550,7 @@ unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Unknown condition code!");
+  default: llvm_unreachable("Unknown condition code!");
   case ISD::SETNE:  return ARMCC::NE;
   case ISD::SETEQ:  return ARMCC::EQ;
   case ISD::SETGT:  return ARMCC::GT;
@@ -483,15 +564,12 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
   }
 }
 
-/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
-/// returns true if the operands should be inverted to form the proper
-/// comparison.
-static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
                         ARMCC::CondCodes &CondCode2) {
-  bool Invert = false;
   CondCode2 = ARMCC::AL;
   switch (CC) {
-  default: assert(0 && "Unknown FP condition!");
+  default: llvm_unreachable("Unknown FP condition!");
   case ISD::SETEQ:
   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
   case ISD::SETGT:
@@ -499,7 +577,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   case ISD::SETGE:
   case ISD::SETOGE: CondCode = ARMCC::GE; break;
   case ISD::SETOLT: CondCode = ARMCC::MI; break;
-  case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+  case ISD::SETOLE: CondCode = ARMCC::LS; break;
   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
   case ISD::SETO:   CondCode = ARMCC::VC; break;
   case ISD::SETUO:  CondCode = ARMCC::VS; break;
@@ -513,24 +591,16 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   case ISD::SETNE:
   case ISD::SETUNE: CondCode = ARMCC::NE; break;
   }
-  return Invert;
 }
 
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
-//
-//  The lower operations present on calling convention works on this order:
-//      LowerCALL (virt regs --> phys regs, virt regs --> stack)
-//      LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
-//      LowerRET (virt regs --> phys regs)
-//      LowerCALL (phys regs --> virt regs)
-//
 //===----------------------------------------------------------------------===//
 
 #include "ARMGenCallingConv.inc"
 
 // APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                           CCValAssign::LocInfo &LocInfo,
                           CCState &State, bool CanFail) {
   static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
@@ -560,7 +630,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return true;
 }
 
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags,
                                    CCState &State) {
@@ -573,7 +643,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 }
 
 // AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                            CCValAssign::LocInfo &LocInfo,
                            CCState &State, bool CanFail) {
   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
@@ -603,7 +673,7 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return true;
 }
 
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                     CCValAssign::LocInfo &LocInfo,
                                     ISD::ArgFlagsTy &ArgFlags,
                                     CCState &State) {
@@ -615,7 +685,7 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return true;  // we handled it
 }
 
-static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                          CCValAssign::LocInfo &LocInfo, CCState &State) {
   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
@@ -635,7 +705,7 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return true;
 }
 
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                       CCValAssign::LocInfo &LocInfo,
                                       ISD::ArgFlagsTy &ArgFlags,
                                       CCState &State) {
@@ -646,7 +716,7 @@ static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return true;  // we handled it
 }
 
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                        CCValAssign::LocInfo &LocInfo,
                                        ISD::ArgFlagsTy &ArgFlags,
                                        CCState &State) {
@@ -656,49 +726,48 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 
 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 /// given CallingConvention value.
-CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
-                                                 bool Return) const {
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+                                                 bool Return,
+                                                 bool isVarArg) const {
   switch (CC) {
   default:
-   assert(0 && "Unsupported calling convention");
+    llvm_unreachable("Unsupported calling convention");
   case CallingConv::C:
   case CallingConv::Fast:
-   // Use target triple & subtarget features to do actual dispatch.
-   if (Subtarget->isAAPCS_ABI()) {
-     if (Subtarget->hasVFP2() &&
-         FloatABIType == FloatABI::Hard)
-       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
-     else
-       return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
-   } else
-     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+    // Use target triple & subtarget features to do actual dispatch.
+    if (Subtarget->isAAPCS_ABI()) {
+      if (Subtarget->hasVFP2() &&
+          FloatABIType == FloatABI::Hard && !isVarArg)
+        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+      else
+        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+    } else
+        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
   case CallingConv::ARM_AAPCS_VFP:
-   return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
   case CallingConv::ARM_AAPCS:
-   return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
-   return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
   }
 }
 
-/// LowerCallResult - Lower the result values of an ISD::CALL into the
-/// appropriate copies out of appropriate physical registers.  This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-/// being lowered.  The returns a SDNode with the same number of values as the
-/// ISD::CALL.
-SDNode *ARMTargetLowering::
-LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
-                unsigned CallingConv, SelectionDAG &DAG) {
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) {
 
-  DebugLoc dl = TheCall->getDebugLoc();
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  bool isVarArg = TheCall->isVarArg();
-  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall,
-                           CCAssignFnForNode(CallingConv, /* Return*/ true));
-
-  SmallVector<SDValue, 8> ResultVals;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins,
+                           CCAssignFnForNode(CallConv, /* Return*/ true,
+                                             isVarArg));
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -743,20 +812,17 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
     }
 
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::BCvt:
       Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
       break;
     }
 
-    ResultVals.push_back(Val);
+    InVals.push_back(Val);
   }
 
-  // Merge everything together with a MERGE_VALUES node.
-  ResultVals.push_back(Chain);
-  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                     &ResultVals[0], ResultVals.size()).getNode();
+  return Chain;
 }
 
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
@@ -776,11 +842,11 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
 
 /// LowerMemOpCallTo - Store the argument to the stack.
 SDValue
-ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
-                                    const SDValue &StackPtr,
-                                    const CCValAssign &VA, SDValue Chain,
-                                    SDValue Arg, ISD::ArgFlagsTy Flags) {
-  DebugLoc dl = TheCall->getDebugLoc();
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                    SDValue StackPtr, SDValue Arg,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    ISD::ArgFlagsTy Flags) {
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
@@ -791,14 +857,13 @@ ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
                       PseudoSourceValue::getStack(), LocMemOffset);
 }
 
-void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG,
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
                                          SDValue Chain, SDValue &Arg,
                                          RegsToPassVector &RegsToPass,
                                          CCValAssign &VA, CCValAssign &NextVA,
                                          SDValue &StackPtr,
                                          SmallVector<SDValue, 8> &MemOpChains,
                                          ISD::ArgFlagsTy Flags) {
-  DebugLoc dl = TheCall->getDebugLoc();
 
   SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
@@ -811,27 +876,31 @@ void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG,
     if (StackPtr.getNode() == 0)
       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
-    MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, NextVA,
-                                           Chain, fmrrd.getValue(1), Flags));
+    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+                                           dl, DAG, NextVA,
+                                           Flags));
   }
 }
 
-/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <-
+/// LowerCall - Lowering a call into a callseq_start <-
 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
 /// nodes.
-SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  MVT RetVT           = TheCall->getRetValType(0);
-  SDValue Chain       = TheCall->getChain();
-  unsigned CC         = TheCall->getCallingConv();
-  bool isVarArg       = TheCall->isVarArg();
-  SDValue Callee      = TheCall->getCallee();
-  DebugLoc dl         = TheCall->getDebugLoc();
+SDValue
+ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) {
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false));
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs,
+                             CCAssignFnForNode(CallConv, /* Return*/ false,
+                                               isVarArg));
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -851,12 +920,12 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
        i != e;
        ++i, ++realArgIdx) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = TheCall->getArg(realArgIdx);
-    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx);
+    SDValue Arg = Outs[realArgIdx].Val;
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::SExt:
       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
@@ -872,7 +941,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
       break;
     }
 
-    // f64 and v2f64 are passed in i32 pairs and must be split into pieces
+    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
     if (VA.needsCustom()) {
       if (VA.getLocVT() == MVT::v2f64) {
         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
@@ -880,23 +949,23 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
                                   DAG.getConstant(1, MVT::i32));
 
-        PassF64ArgInRegs(TheCall, DAG, Chain, Op0, RegsToPass,
+        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
 
         VA = ArgLocs[++i]; // skip ahead to next loc
         if (VA.isRegLoc()) {
-          PassF64ArgInRegs(TheCall, DAG, Chain, Op1, RegsToPass,
+          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
         } else {
           assert(VA.isMemLoc());
           if (StackPtr.getNode() == 0)
             StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
-          MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
-                                                 Chain, Op1, Flags));
+          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+                                                 dl, DAG, VA, Flags));
         }
       } else {
-        PassF64ArgInRegs(TheCall, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
                          StackPtr, MemOpChains, Flags);
       }
     } else if (VA.isRegLoc()) {
@@ -906,8 +975,8 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
       if (StackPtr.getNode() == 0)
         StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
-      MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
-                                             Chain, Arg, Flags));
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
     }
   }
 
@@ -933,17 +1002,17 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     GlobalValue *GV = G->getGlobal();
     isDirect = true;
-    bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
-                  GV->hasLinkOnceLinkage());
+    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
                    getTargetMachine().getRelocationModel() != Reloc::Static;
     isARMFunc = !Subtarget->isThumb() || isStub;
     // ARM call to a local ARM function is predicable.
     isLocalARMFunc = !Subtarget->isThumb() && !isExt;
     // tBX takes a register source operand.
-    if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
-      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
-                                                           ARMCP::CPStub, 4);
+    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 4);
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
@@ -960,9 +1029,9 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
     isARMFunc = !Subtarget->isThumb() || isStub;
     // tBX takes a register source operand.
     const char *Sym = S->getSymbol();
-    if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
-      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
-                                                           ARMCP::CPStub, 4);
+    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 4);
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
@@ -977,7 +1046,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   // FIXME: handle tail calls differently.
   unsigned CallOpc;
   if (Subtarget->isThumb()) {
-    if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
     else
       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
@@ -986,7 +1055,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
       ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
       : ARMISD::CALL_NOLINK;
   }
-  if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) {
+  if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
     // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
     Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
     InFlag = Chain.getValue(1);
@@ -1011,30 +1080,31 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                              DAG.getIntPtrConstant(0, true), InFlag);
-  if (RetVT != MVT::Other)
+  if (!Ins.empty())
     InFlag = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
-                                 Op.getResNo());
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+                         dl, DAG, InVals);
 }
 
-SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
-  // The chain is always operand #0
-  SDValue Chain = Op.getOperand(0);
-  DebugLoc dl = Op.getDebugLoc();
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               DebugLoc dl, SelectionDAG &DAG) {
 
   // CCValAssign - represent the assignment of the return value to a location.
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC   = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
 
   // CCState - Info about the registers and stack slots.
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
 
-  // Analyze return values of ISD::RET.
-  CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true));
+  // Analyze outgoing return values.
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+                                               isVarArg));
 
   // If this is the first return lowered for this function, add
   // the regs to the liveout set for the function.
@@ -1053,12 +1123,10 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    // ISD::RET => ret chain, (regnum1,val1), ...
-    // So i*2+1 index only the regnums
-    SDValue Arg = Op.getOperand(realRVLocIdx*2+1);
+    SDValue Arg = Outs[realRVLocIdx].Val;
 
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::BCvt:
       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
@@ -1112,13 +1180,13 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
 }
 
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
-// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
 // one of the above mentioned nodes. It has to be wrapped because otherwise
 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
 // be used to form addressing mode. These wrapped nodes will be selected
 // into MOVi.
 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   // FIXME there is no actual debug info here
   DebugLoc dl = Op.getDebugLoc();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -1137,11 +1205,11 @@ SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                                                  SelectionDAG &DAG) {
   DebugLoc dl = GA->getDebugLoc();
-  MVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy();
   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   ARMConstantPoolValue *CPV =
-    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
-                             PCAdj, "tlsgd", true);
+    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                             ARMCP::CPValue, PCAdj, "tlsgd", true);
   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
@@ -1154,12 +1222,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   ArgListTy Args;
   ArgListEntry Entry;
   Entry.Node = Argument;
-  Entry.Ty = (const Type *) Type::Int32Ty;
+  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
   Args.push_back(Entry);
   // FIXME: is there useful debug info available here?
   std::pair<SDValue, SDValue> CallResult =
-    LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
-                0, CallingConv::C, false,
+    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                false, false, false, false,
+                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   return CallResult.first;
 }
@@ -1173,16 +1242,16 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   DebugLoc dl = GA->getDebugLoc();
   SDValue Offset;
   SDValue Chain = DAG.getEntryNode();
-  MVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy();
   // Get the Thread Pointer
   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
 
-  if (GV->isDeclaration()){
+  if (GV->isDeclaration()) {
     // initial exec model
     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
     ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
-                               PCAdj, "gottpoff", true);
+      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                               ARMCP::CPValue, PCAdj, "gottpoff", true);
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
@@ -1194,8 +1263,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
   } else {
     // local exec model
-    ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
@@ -1222,59 +1290,47 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
 
 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
                                                  SelectionDAG &DAG) {
-  MVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   if (RelocM == Reloc::PIC_) {
     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
     ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
+      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
-                                 CPAddr, NULL, 0);
+                                 CPAddr,
+                                 PseudoSourceValue::getConstantPool(), 0);
     SDValue Chain = Result.getValue(1);
     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
     if (!UseGOTOFF)
-      Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+                           PseudoSourceValue::getGOT(), 0);
     return Result;
   } else {
     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                       PseudoSourceValue::getConstantPool(), 0);
   }
 }
 
-/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
-/// even in non-static mode.
-static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
-  // If symbol visibility is hidden, the extra load is not needed if
-  // the symbol is definitely defined in the current translation unit.
-  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
-  if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage()))
-    return false;
-  return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker());
-}
-
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                                     SelectionDAG &DAG) {
-  MVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-  bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
   SDValue CPAddr;
   if (RelocM == Reloc::Static)
     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   else {
-    unsigned PCAdj = (RelocM != Reloc::PIC_)
-      ? 0 : (Subtarget->isThumb() ? 4 : 8);
-    ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
-      : ARMCP::CPValue;
-    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
-                                                         Kind, PCAdj);
+    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   }
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1286,7 +1342,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   }
-  if (IsIndirect)
+
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
     Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
 
   return Result;
@@ -1296,32 +1353,55 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
                                                     SelectionDAG &DAG){
   assert(Subtarget->isTargetELF() &&
          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
-  MVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
-  ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
-                                                       ARMPCLabelIndex,
-                                                       ARMCP::CPValue, PCAdj);
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       "_GLOBAL_OFFSET_TABLE_",
+                                                       ARMPCLabelIndex, PCAdj);
   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               PseudoSourceValue::getConstantPool(), 0);
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
 
 SDValue
 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
-  case Intrinsic::arm_thread_pointer:
-      return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+  case Intrinsic::arm_thread_pointer: {
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+  }
+  case Intrinsic::eh_sjlj_lsda: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    EVT PtrVT = getPointerTy();
+    DebugLoc dl = Op.getDebugLoc();
+    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+    SDValue CPAddr;
+    unsigned PCAdj = (RelocM != Reloc::PIC_)
+      ? 0 : (Subtarget->isThumb() ? 4 : 8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
+                               ARMCP::CPLSDA, PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result =
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+    SDValue Chain = Result.getValue(1);
+
+    if (RelocM == Reloc::PIC_) {
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+    }
+    return Result;
+  }
   case Intrinsic::eh_sjlj_setjmp:
-      SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32,
-                         Op.getOperand(1));
-      return Res;
+    return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1));
   }
 }
 
@@ -1330,13 +1410,60 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   DebugLoc dl = Op.getDebugLoc();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
 }
 
 SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  SDValue Align = Op.getOperand(2);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+  unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
+  unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
+  if (AlignVal > StackAlign)
+    // Do this now since selection pass cannot introduce new target
+    // independent node.
+    Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
+
+  // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
+  // using a "add r, sp, r" instead. Negate the size now so we don't have to
+  // do even more horrible hack later.
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (AFI->isThumb1OnlyFunction()) {
+    bool Negate = true;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
+    if (C) {
+      uint32_t Val = C->getZExtValue();
+      if (Val <= 508 && ((Val & 3) == 0))
+        Negate = false;
+    }
+    if (Negate)
+      Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
+  }
+
+  SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+  SDValue Ops1[] = { Chain, Size, Align };
+  SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
+  Chain = Res.getValue(1);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+                             DAG.getIntPtrConstant(0, true), SDValue());
+  SDValue Ops2[] = { Res, Chain };
+  return DAG.getMergeValues(Ops2, 2, dl);
+}
+
+SDValue
 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
                                         SDValue &Root, SelectionDAG &DAG,
                                         DebugLoc dl) {
@@ -1344,7 +1471,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
   TargetRegisterClass *RC;
-  if (AFI->isThumbFunction())
+  if (AFI->isThumb1OnlyFunction())
     RC = ARM::tGPRRegisterClass;
   else
     RC = ARM::GPRRegisterClass;
@@ -1371,21 +1498,25 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 }
 
 SDValue
-ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  SDValue Root = Op.getOperand(0);
-  DebugLoc dl = Op.getDebugLoc();
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  unsigned CC = MF.getFunction()->getCallingConv();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeFormalArguments(Op.getNode(),
-                                CCAssignFnForNode(CC, /* Return*/ false));
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins,
+                                CCAssignFnForNode(CallConv, /* Return*/ false,
+                                                  isVarArg));
 
   SmallVector<SDValue, 16> ArgValues;
 
@@ -1394,7 +1525,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
 
     // Arguments stored in registers.
     if (VA.isRegLoc()) {
-      MVT RegVT = VA.getLocVT();
+      EVT RegVT = VA.getLocVT();
 
       SDValue ArgValue;
       if (VA.needsCustom()) {
@@ -1404,43 +1535,43 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
 
         if (VA.getLocVT() == MVT::v2f64) {
           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
-                                                   Root, DAG, dl);
+                                                   Chain, DAG, dl);
           VA = ArgLocs[++i]; // skip ahead to next loc
           SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
-                                                   Root, DAG, dl);
+                                                   Chain, DAG, dl);
           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
         } else
-          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Root, DAG, dl);
+          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
 
       } else {
         TargetRegisterClass *RC;
-        if (FloatABIType == FloatABI::Hard && RegVT == MVT::f32)
+
+        if (RegVT == MVT::f32)
           RC = ARM::SPRRegisterClass;
-        else if (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)
+        else if (RegVT == MVT::f64)
           RC = ARM::DPRRegisterClass;
-        else if (AFI->isThumbFunction())
-          RC = ARM::tGPRRegisterClass;
+        else if (RegVT == MVT::v2f64)
+          RC = ARM::QPRRegisterClass;
+        else if (RegVT == MVT::i32)
+          RC = (AFI->isThumb1OnlyFunction() ?
+                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
         else
-          RC = ARM::GPRRegisterClass;
-
-        assert((RegVT == MVT::i32 || RegVT == MVT::f32 ||
-                (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)) &&
-               "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
 
         // Transform the arguments in physical registers into virtual ones.
         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-        ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
       }
 
       // If this is an 8 or 16-bit value, it is really passed promoted
       // to 32 bits.  Insert an assert[sz]ext to capture this, then
       // truncate to the right size.
       switch (VA.getLocInfo()) {
-      default: assert(0 && "Unknown loc info!");
+      default: llvm_unreachable("Unknown loc info!");
       case CCValAssign::Full: break;
       case CCValAssign::BCvt:
         ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
@@ -1457,7 +1588,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
         break;
       }
 
-      ArgValues.push_back(ArgValue);
+      InVals.push_back(ArgValue);
 
     } else { // VA.isRegLoc()
 
@@ -1470,7 +1601,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
     }
   }
 
@@ -1500,31 +1631,27 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
       SmallVector<SDValue, 4> MemOps;
       for (; NumGPRs < 4; ++NumGPRs) {
         TargetRegisterClass *RC;
-        if (AFI->isThumbFunction())
+        if (AFI->isThumb1OnlyFunction())
           RC = ARM::tGPRRegisterClass;
         else
           RC = ARM::GPRRegisterClass;
 
         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
-        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
       }
       if (!MemOps.empty())
-        Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                           &MemOps[0], MemOps.size());
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
     } else
       // This will point to the next argument passed via stack.
       VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
   }
 
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+  return Chain;
 }
 
 /// isFloatingPointZero - Return true if this is +0.0.
@@ -1543,46 +1670,46 @@ static bool isFloatingPointZero(SDValue Op) {
   return false;
 }
 
-static bool isLegalCmpImmediate(unsigned C, bool isThumb) {
-  return ( isThumb && (C & ~255U) == 0) ||
-         (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) {
+  return ( isThumb1Only && (C & ~255U) == 0) ||
+         (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1);
 }
 
 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
 /// the given operands.
 static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                         SDValue &ARMCC, SelectionDAG &DAG, bool isThumb,
+                         SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only,
                          DebugLoc dl) {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     unsigned C = RHSC->getZExtValue();
-    if (!isLegalCmpImmediate(C, isThumb)) {
+    if (!isLegalCmpImmediate(C, isThumb1Only)) {
       // Constant does not fit, try adjusting it by one?
       switch (CC) {
       default: break;
       case ISD::SETLT:
       case ISD::SETGE:
-        if (isLegalCmpImmediate(C-1, isThumb)) {
+        if (isLegalCmpImmediate(C-1, isThumb1Only)) {
           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
           RHS = DAG.getConstant(C-1, MVT::i32);
         }
         break;
       case ISD::SETULT:
       case ISD::SETUGE:
-        if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) {
+        if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) {
           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
           RHS = DAG.getConstant(C-1, MVT::i32);
         }
         break;
       case ISD::SETLE:
       case ISD::SETGT:
-        if (isLegalCmpImmediate(C+1, isThumb)) {
+        if (isLegalCmpImmediate(C+1, isThumb1Only)) {
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
           RHS = DAG.getConstant(C+1, MVT::i32);
         }
         break;
       case ISD::SETULE:
       case ISD::SETUGT:
-        if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) {
+        if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) {
           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
           RHS = DAG.getConstant(C+1, MVT::i32);
         }
@@ -1620,7 +1747,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
 
 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
                               const ARMSubtarget *ST) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -1631,13 +1758,12 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMCC;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
-  if (FPCCToARMCC(CC, CondCode, CondCode2))
-    std::swap(TrueVal, FalseVal);
+  FPCCToARMCC(CC, CondCode, CondCode2);
 
   SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
@@ -1666,16 +1792,14 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMCC;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
                        Chain, Dest, ARMCC, CCR,Cmp);
   }
 
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   ARMCC::CondCodes CondCode, CondCode2;
-  if (FPCCToARMCC(CC, CondCode, CondCode2))
-    // Swap the LHS/RHS of the comparison if needed.
-    std::swap(LHS, RHS);
+  FPCCToARMCC(CC, CondCode, CondCode2);
 
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
@@ -1697,21 +1821,32 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
   SDValue Index = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
 
-  MVT PTy = getPointerTy();
+  EVT PTy = getPointerTy();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
-  SDValue UId =  DAG.getConstant(AFI->createJumpTableUId(), PTy);
+  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
-  bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
-                     Chain, Addr, NULL, 0);
-  Chain = Addr.getValue(1);
-  if (isPIC)
+  if (Subtarget->isThumb2()) {
+    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+    // which does another jump to the destination. This also makes it easier
+    // to translate it to TBB / TBH later.
+    // FIXME: This might not work if the function is extremely large.
+    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+                       Addr, Op.getOperand(2), JTI, UId);
+  }
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, NULL, 0);
+    Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
-  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+  } else {
+    Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0);
+    Chain = Addr.getValue(1);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+  }
 }
 
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
@@ -1723,7 +1858,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned Opc =
     Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
@@ -1737,8 +1872,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   SDValue Tmp0 = Op.getOperand(0);
   SDValue Tmp1 = Op.getOperand(1);
   DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
-  MVT SrcVT = Tmp1.getValueType();
+  EVT VT = Op.getValueType();
+  EVT SrcVT = Tmp1.getValueType();
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
   SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
   SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
@@ -1749,7 +1884,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
@@ -1784,7 +1919,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   unsigned BytesLeft = SizeVal & 3;
   unsigned NumMemOps = SizeVal >> 2;
   unsigned EmittedNumMemOps = 0;
-  MVT VT = MVT::i32;
+  EVT VT = MVT::i32;
   unsigned VTSize = 4;
   unsigned i = 0;
   const unsigned MAX_LOADS_IN_LDM = 6;
@@ -1890,45 +2025,55 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
 
 /// getZeroVector - Returns a vector of specified type with all zero elements.
 ///
-static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
   // Zero vectors are used to represent vector negation and in those cases
   // will be implemented with the NEON VNEG instruction.  However, VNEG does
   // not support i64 elements, so sometimes the zero vectors will need to be
   // explicitly constructed.  For those cases, and potentially other uses in
-  // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+  // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
   // to their dest type.  This ensures they get CSE'd.
   SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
-  if (VT.getSizeInBits() == 64)
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  else
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
+  SmallVector<SDValue, 8> Ops;
+  MVT TVT;
+
+  if (VT.getSizeInBits() == 64) {
+    Ops.assign(8, Cst); TVT = MVT::v8i8;
+  } else {
+    Ops.assign(16, Cst); TVT = MVT::v16i8;
+  }
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
 
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
 }
 
 /// getOnesVector - Returns a vector of specified type with all bits set.
 ///
-static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
-  // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
-  // type.  This ensures they get CSE'd.
+  // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their
+  // dest type. This ensures they get CSE'd.
   SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
-  if (VT.getSizeInBits() == 64)
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  else
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
+  SmallVector<SDValue, 8> Ops;
+  MVT TVT;
+
+  if (VT.getSizeInBits() == 64) {
+    Ops.assign(8, Cst); TVT = MVT::v8i8;
+  } else {
+    Ops.assign(16, Cst); TVT = MVT::v16i8;
+  }
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
 
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
 }
 
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                           const ARMSubtarget *ST) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
   // Lower vector shifts on NEON to use VSHL.
@@ -1947,7 +2092,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
     // NEON uses the same intrinsics for both left and right shifts.  For
     // right shifts, the shift amounts are negative, so negate the vector of
     // shift amounts.
-    MVT ShiftVT = N->getOperand(1).getValueType();
+    EVT ShiftVT = N->getOperand(1).getValueType();
     SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
                                        getZeroVector(ShiftVT, DAG, dl),
                                        N->getOperand(1));
@@ -1959,8 +2104,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                        N->getOperand(0), NegatedCount);
   }
 
-  assert(VT == MVT::i64 &&
-         (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+  // We can get here for a node like i32 = ISD::SHL i32, i64
+  if (VT != MVT::i64)
+    return SDValue();
+
+  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
          "Unknown shift to lower!");
 
   // We only lower SRA, SRL of 1 here, all others use generic lowering.
@@ -1969,7 +2117,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // If we are in thumb mode, we don't have RRX.
-  if (ST->isThumb()) return SDValue();
+  if (ST->isThumb1Only()) return SDValue();
 
   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
@@ -1998,13 +2146,13 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue CC = Op.getOperand(2);
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   DebugLoc dl = Op.getDebugLoc();
 
   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
     switch (SetCCOpcode) {
-    default: assert(0 && "Illegal FP comparison"); break;
+    default: llvm_unreachable("Illegal FP comparison"); break;
     case ISD::SETUNE:
     case ISD::SETNE:  Invert = true; // Fallthrough
     case ISD::SETOEQ:
@@ -2043,7 +2191,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   } else {
     // Integer comparisons.
     switch (SetCCOpcode) {
-    default: assert(0 && "Illegal integer comparison"); break;
+    default: llvm_unreachable("Illegal integer comparison"); break;
     case ISD::SETNE:  Invert = true;
     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
     case ISD::SETLT:  Swap = true;
@@ -2056,7 +2204,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
     }
 
-    // Detect VTST (Vector Test Bits) = vicmp ne (and (op0, op1), zero).
+    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
     if (Opc == ARMISD::VCEQ) {
 
       SDValue AndOp;
@@ -2147,7 +2295,7 @@ static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
   }
 
   default:
-    assert(0 && "unexpected size for isVMOVSplat");
+    llvm_unreachable("unexpected size for isVMOVSplat");
     break;
   }
 
@@ -2174,22 +2322,123 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
                      SplatBitSize, DAG);
 }
 
-static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+                       bool &ReverseVEXT, unsigned &Imm) {
+  unsigned NumElts = VT.getVectorNumElements();
+  ReverseVEXT = false;
+  Imm = M[0];
+
+  // If this is a VEXT shuffle, the immediate value is the index of the first
+  // element.  The other shuffle indices must be the successive elements after
+  // the first one.
+  unsigned ExpectedElt = Imm;
+  for (unsigned i = 1; i < NumElts; ++i) {
+    // Increment the expected index.  If it wraps around, it may still be
+    // a VEXT but the source vectors must be swapped.
+    ExpectedElt += 1;
+    if (ExpectedElt == NumElts * 2) {
+      ExpectedElt = 0;
+      ReverseVEXT = true;
+    }
+
+    if (ExpectedElt != static_cast<unsigned>(M[i]))
+      return false;
+  }
+
+  // Adjust the index value if the source operands will be swapped.
+  if (ReverseVEXT)
+    Imm -= NumElts;
+
+  return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize.  (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned BlockSize) {
+  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+         "Only possible block sizes for VREV are: 16, 32, 64");
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned BlockElts = M[0] + 1;
+
+  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+    return false;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    if ((unsigned) M[i] !=
+        (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+      return false;
+  }
+
+  return true;
+}
+
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((unsigned) M[i] != i + WhichResult ||
+        (unsigned) M[i+1] != i + NumElts + WhichResult)
+      return false;
+  }
+  return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if ((unsigned) M[i] != 2 * i + WhichResult)
+      return false;
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32)
+    return false;
+
+  return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((unsigned) M[i] != Idx ||
+        (unsigned) M[i+1] != Idx + NumElts)
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32)
+    return false;
+
+  return true;
+}
+
+static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   // Canonicalize all-zeros and all-ones vectors.
-  ConstantSDNode *ConstVal = dyn_cast<ConstantSDNode>(Val.getNode());
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
   if (ConstVal->isNullValue())
     return getZeroVector(VT, DAG, dl);
   if (ConstVal->isAllOnesValue())
     return getOnesVector(VT, DAG, dl);
 
-  MVT CanonicalVT;
+  EVT CanonicalVT;
   if (VT.is64BitVector()) {
     switch (Val.getValueType().getSizeInBits()) {
     case 8:  CanonicalVT = MVT::v8i8; break;
     case 16: CanonicalVT = MVT::v4i16; break;
     case 32: CanonicalVT = MVT::v2i32; break;
     case 64: CanonicalVT = MVT::v1i64; break;
-    default: assert(0 && "unexpected splat element type"); break;
+    default: llvm_unreachable("unexpected splat element type"); break;
     }
   } else {
     assert(VT.is128BitVector() && "unknown splat vector size");
@@ -2198,7 +2447,7 @@ static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) {
     case 16: CanonicalVT = MVT::v8i16; break;
     case 32: CanonicalVT = MVT::v4i32; break;
     case 64: CanonicalVT = MVT::v2i64; break;
-    default: assert(0 && "unexpected splat element type"); break;
+    default: llvm_unreachable("unexpected splat element type"); break;
     }
   }
 
@@ -2213,69 +2462,291 @@ static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) {
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.
 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
   DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
 
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
-    SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
-                              SplatUndef.getZExtValue(), SplatBitSize, DAG);
-    if (Val.getNode())
-      return BuildSplat(Val, Op.getValueType(), DAG, dl);
+    if (SplatBitSize <= 64) {
+      SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
+                                SplatUndef.getZExtValue(), SplatBitSize, DAG);
+      if (Val.getNode())
+        return BuildSplat(Val, VT, DAG, dl);
+    }
+  }
+
+  // If there are only 2 elements in a 128-bit vector, insert them into an
+  // undef vector.  This handles the common case for 128-bit vector argument
+  // passing, where the insertions should be translated to subreg accesses
+  // with no real instructions.
+  if (VT.is128BitVector() && Op.getNumOperands() == 2) {
+    SDValue Val = DAG.getUNDEF(VT);
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+    if (Op0.getOpcode() != ISD::UNDEF)
+      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
+                        DAG.getIntPtrConstant(0));
+    if (Op1.getOpcode() != ISD::UNDEF)
+      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
+                        DAG.getIntPtrConstant(1));
+    return Val;
   }
 
   return SDValue();
 }
 
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
-  return Op;
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                      EVT VT) const {
+  if (VT.getVectorNumElements() == 4 &&
+      (VT.is128BitVector() || VT.is64BitVector())) {
+    unsigned PFIndexes[4];
+    for (unsigned i = 0; i != 4; ++i) {
+      if (M[i] < 0)
+        PFIndexes[i] = 8;
+      else
+        PFIndexes[i] = M[i];
+    }
+
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost = (PFEntry >> 30);
+
+    if (Cost <= 4)
+      return true;
+  }
+
+  bool ReverseVEXT;
+  unsigned Imm, WhichResult;
+
+  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+          isVREVMask(M, VT, 64) ||
+          isVREVMask(M, VT, 32) ||
+          isVREVMask(M, VT, 16) ||
+          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+          isVTRNMask(M, VT, WhichResult) ||
+          isVUZPMask(M, VT, WhichResult) ||
+          isVZIPMask(M, VT, WhichResult));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+                                      SDValue RHS, SelectionDAG &DAG,
+                                      DebugLoc dl) {
+  unsigned OpNum = (PFEntry >> 26) & 0x0F;
+  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
+
+  enum {
+    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+    OP_VREV,
+    OP_VDUP0,
+    OP_VDUP1,
+    OP_VDUP2,
+    OP_VDUP3,
+    OP_VEXT1,
+    OP_VEXT2,
+    OP_VEXT3,
+    OP_VUZPL, // VUZP, left result
+    OP_VUZPR, // VUZP, right result
+    OP_VZIPL, // VZIP, left result
+    OP_VZIPR, // VZIP, right result
+    OP_VTRNL, // VTRN, left result
+    OP_VTRNR  // VTRN, right result
+  };
+
+  if (OpNum == OP_COPY) {
+    if (LHSID == (1*9+2)*9+3) return LHS;
+    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+    return RHS;
+  }
+
+  SDValue OpLHS, OpRHS;
+  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+  EVT VT = OpLHS.getValueType();
+
+  switch (OpNum) {
+  default: llvm_unreachable("Unknown shuffle opcode!");
+  case OP_VREV:
+    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+  case OP_VDUP0:
+  case OP_VDUP1:
+  case OP_VDUP2:
+  case OP_VDUP3:
+    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+  case OP_VEXT1:
+  case OP_VEXT2:
+  case OP_VEXT3:
+    return DAG.getNode(ARMISD::VEXT, dl, VT,
+                       OpLHS, OpRHS,
+                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+  case OP_VUZPL:
+  case OP_VUZPR:
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+  case OP_VZIPL:
+  case OP_VZIPR:
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+  case OP_VTRNL:
+  case OP_VTRNR:
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+  }
 }
 
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  return Op;
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+  SmallVector<int, 8> ShuffleMask;
+
+  // Convert shuffles that are directly supported on NEON to target-specific
+  // DAG nodes, instead of keeping them as shuffles and matching them again
+  // during code selection.  This is more efficient and avoids the possibility
+  // of inconsistencies between legalization and selection.
+  // FIXME: floating-point vectors should be canonicalized to integer vectors
+  // of the same time so that they get CSEd properly.
+  SVN->getMask(ShuffleMask);
+
+  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+    int Lane = SVN->getSplatIndex();
+    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+      return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+    }
+    return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+                       DAG.getConstant(Lane, MVT::i32));
+  }
+
+  bool ReverseVEXT;
+  unsigned Imm;
+  if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+    if (ReverseVEXT)
+      std::swap(V1, V2);
+    return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+                       DAG.getConstant(Imm, MVT::i32));
+  }
+
+  if (isVREVMask(ShuffleMask, VT, 64))
+    return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+  if (isVREVMask(ShuffleMask, VT, 32))
+    return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+  if (isVREVMask(ShuffleMask, VT, 16))
+    return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+  // Check for Neon shuffles that modify both input vectors in place.
+  // If both results are used, i.e., if there are two shuffles with the same
+  // source operands and with masks corresponding to both results of one of
+  // these operations, DAG memoization will ensure that a single node is
+  // used for both shuffles.
+  unsigned WhichResult;
+  if (isVTRNMask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       V1, V2).getValue(WhichResult);
+  if (isVUZPMask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                       V1, V2).getValue(WhichResult);
+  if (isVZIPMask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                       V1, V2).getValue(WhichResult);
+
+  // If the shuffle is not directly supported and it has 4 elements, use
+  // the PerfectShuffle-generated table to synthesize it from other shuffles.
+  if (VT.getVectorNumElements() == 4 &&
+      (VT.is128BitVector() || VT.is64BitVector())) {
+    unsigned PFIndexes[4];
+    for (unsigned i = 0; i != 4; ++i) {
+      if (ShuffleMask[i] < 0)
+        PFIndexes[i] = 8;
+      else
+        PFIndexes[i] = ShuffleMask[i];
+    }
+
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost = (PFEntry >> 30);
+
+    if (Cost <= 4)
+      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+  }
+
+  return SDValue();
 }
 
 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
-  assert((VT == MVT::i8 || VT == MVT::i16) &&
-         "unexpected type for custom-lowering vector extract");
   SDValue Vec = Op.getOperand(0);
   SDValue Lane = Op.getOperand(1);
+
+  // FIXME: This is invalid for 8 and 16-bit elements - the information about
+  // sign / zero extension is lost!
   Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
   Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT));
-  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+
+  if (VT.bitsLT(MVT::i32))
+    Op = DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+  else if (VT.bitsGT(MVT::i32))
+    Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op);
+
+  return Op;
 }
 
-static SDValue LowerCONCAT_VECTORS(SDValue Op) {
-  if (Op.getValueType().is128BitVector() && Op.getNumOperands() == 2)
-    return Op;
-  return SDValue();
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+  // The only time a CONCAT_VECTORS operation can have legal types is when
+  // two 64-bit vectors are concatenated to a 128-bit vector.
+  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+         "unexpected CONCAT_VECTORS");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Val = DAG.getUNDEF(MVT::v2f64);
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  if (Op0.getOpcode() != ISD::UNDEF)
+    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
+                      DAG.getIntPtrConstant(0));
+  if (Op1.getOpcode() != ISD::UNDEF)
+    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
+                      DAG.getIntPtrConstant(1));
+  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
 }
 
 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
-  default: assert(0 && "Don't know how to custom lower this!"); abort();
+  default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
   case ISD::GlobalAddress:
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
-  case ISD::CALL:          return LowerCALL(Op, DAG);
-  case ISD::RET:           return LowerRET(Op, DAG);
   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG, Subtarget);
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG, Subtarget);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
-  case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
   case ISD::RETURNADDR:    break;
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
@@ -2287,9 +2758,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
-  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
-  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op);
+  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
   }
   return SDValue();
 }
@@ -2301,7 +2771,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SelectionDAG &DAG) {
   switch (N->getOpcode()) {
   default:
-    assert(0 && "Don't know how to custom expand this!");
+    llvm_unreachable("Don't know how to custom expand this!");
     return;
   case ISD::BIT_CONVERT:
     Results.push_back(ExpandBIT_CONVERT(N, DAG));
@@ -2322,12 +2792,14 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
 
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB) const {
+                                               MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   switch (MI->getOpcode()) {
-  default: assert(false && "Unexpected instr type to insert");
-  case ARM::tMOVCCr: {
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
+  case ARM::tMOVCCr_pseudo: {
     // To "insert" a SELECT_CC instruction, we actually have to insert the
     // diamond control-flow pattern.  The incoming instruction knows the
     // destination vreg to set, the condition code register to branch on, the
@@ -2352,12 +2824,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-        e = BB->succ_end(); i != e; ++i)
-      sinkMBB->addSuccessor(*i);
+    // Also inform sdisel of the edge changes.
+    for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
+           E = BB->succ_end(); I != E; ++I) {
+      EM->insert(std::make_pair(*I, sinkMBB));
+      sinkMBB->addSuccessor(*I);
+    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
-    while(!BB->succ_empty())
+    while (!BB->succ_empty())
       BB->removeSuccessor(BB->succ_begin());
     BB->addSuccessor(copy0MBB);
     BB->addSuccessor(sinkMBB);
@@ -2381,6 +2856,78 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
     return BB;
   }
+
+  case ARM::tANDsp:
+  case ARM::tADDspr_:
+  case ARM::tSUBspi_:
+  case ARM::t2SUBrSPi_:
+  case ARM::t2SUBrSPi12_:
+  case ARM::t2SUBrSPs_: {
+    MachineFunction *MF = BB->getParent();
+    unsigned DstReg = MI->getOperand(0).getReg();
+    unsigned SrcReg = MI->getOperand(1).getReg();
+    bool DstIsDead = MI->getOperand(0).isDead();
+    bool SrcIsKill = MI->getOperand(1).isKill();
+
+    if (SrcReg != ARM::SP) {
+      // Copy the source to SP from virtual register.
+      const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
+      unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
+        ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
+      BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+        .addReg(SrcReg, getKillRegState(SrcIsKill));
+    }
+
+    unsigned OpOpc = 0;
+    bool NeedPred = false, NeedCC = false, NeedOp3 = false;
+    switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected pseudo instruction!");
+    case ARM::tANDsp:
+      OpOpc = ARM::tAND;
+      NeedPred = true;
+      break;
+    case ARM::tADDspr_:
+      OpOpc = ARM::tADDspr;
+      break;
+    case ARM::tSUBspi_:
+      OpOpc = ARM::tSUBspi;
+      break;
+    case ARM::t2SUBrSPi_:
+      OpOpc = ARM::t2SUBrSPi;
+      NeedPred = true; NeedCC = true;
+      break;
+    case ARM::t2SUBrSPi12_:
+      OpOpc = ARM::t2SUBrSPi12;
+      NeedPred = true;
+      break;
+    case ARM::t2SUBrSPs_:
+      OpOpc = ARM::t2SUBrSPs;
+      NeedPred = true; NeedCC = true; NeedOp3 = true;
+      break;
+    }
+    MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+    if (OpOpc == ARM::tAND)
+      AddDefaultT1CC(MIB);
+    MIB.addReg(ARM::SP);
+    MIB.addOperand(MI->getOperand(2));
+    if (NeedOp3)
+      MIB.addOperand(MI->getOperand(3));
+    if (NeedPred)
+      AddDefaultPred(MIB);
+    if (NeedCC)
+      AddDefaultCC(MIB);
+
+    // Copy the result from SP to virtual register.
+    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
+    unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
+      ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
+    BuildMI(BB, dl, TII->get(CopyOpc))
+      .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+      .addReg(ARM::SP);
+    MF->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+    return BB;
+  }
   }
 }
 
@@ -2393,7 +2940,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
                             TargetLowering::DAGCombinerInfo &DCI) {
   SelectionDAG &DAG = DCI.DAG;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   unsigned Opc = N->getOpcode();
   bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
   SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
@@ -2421,7 +2968,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
              cast<ConstantSDNode>(RHS)->isNullValue()) {
     std::swap(LHS, RHS);
     SDValue Op0 = Slct.getOperand(0);
-    MVT OpVT = isSlctCC ? Op0.getValueType() :
+    EVT OpVT = isSlctCC ? Op0.getValueType() :
                           Op0.getOperand(0).getValueType();
     bool isInt = OpVT.isInteger();
     CC = ISD::getSetCCInverse(CC, isInt);
@@ -2516,7 +3063,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
 /// operand of a vector shift left operation.  That value must be in the range:
 ///   0 <= Value < ElementBits for a left shift; or
 ///   0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) {
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
   if (! getVShiftImm(Op, ElementBits, Cnt))
@@ -2530,7 +3077,7 @@ static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) {
 /// absolute value must be in the range:
 ///   1 <= |Value| <= ElementBits for a right shift; or
 ///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, MVT VT, bool isNarrow, bool isIntrinsic,
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
                          int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
@@ -2571,7 +3118,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
   case Intrinsic::arm_neon_vqrshiftns:
   case Intrinsic::arm_neon_vqrshiftnu:
   case Intrinsic::arm_neon_vqrshiftnsu: {
-    MVT VT = N->getOperand(1).getValueType();
+    EVT VT = N->getOperand(1).getValueType();
     int64_t Cnt;
     unsigned VShiftOpc = 0;
 
@@ -2593,8 +3140,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     case Intrinsic::arm_neon_vshiftlu:
       if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
         break;
-      assert(0 && "invalid shift count for vshll intrinsic");
-      abort();
+      llvm_unreachable("invalid shift count for vshll intrinsic");
 
     case Intrinsic::arm_neon_vrshifts:
     case Intrinsic::arm_neon_vrshiftu:
@@ -2611,8 +3157,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     case Intrinsic::arm_neon_vqshiftsu:
       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
         break;
-      assert(0 && "invalid shift count for vqshlu intrinsic");
-      abort();
+      llvm_unreachable("invalid shift count for vqshlu intrinsic");
 
     case Intrinsic::arm_neon_vshiftn:
     case Intrinsic::arm_neon_vrshiftn:
@@ -2625,11 +3170,10 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
       // Narrowing shifts require an immediate right shift.
       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
         break;
-      assert(0 && "invalid shift count for narrowing vector shift intrinsic");
-      abort();
+      llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
 
     default:
-      assert(0 && "unhandled vector shift");
+      llvm_unreachable("unhandled vector shift");
     }
 
     switch (IntNo) {
@@ -2678,7 +3222,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
   }
 
   case Intrinsic::arm_neon_vshiftins: {
-    MVT VT = N->getOperand(1).getValueType();
+    EVT VT = N->getOperand(1).getValueType();
     int64_t Cnt;
     unsigned VShiftOpc = 0;
 
@@ -2687,8 +3231,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
       VShiftOpc = ARMISD::VSRI;
     else {
-      assert(0 && "invalid shift count for vsli/vsri intrinsic");
-      abort();
+      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
     }
 
     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
@@ -2712,7 +3255,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
 /// their values after they get legalized to loads from a constant pool.
 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
                                    const ARMSubtarget *ST) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
 
   // Nothing to be done for scalar shifts.
   if (! VT.isVector())
@@ -2722,7 +3265,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
   int64_t Cnt;
 
   switch (N->getOpcode()) {
-  default: assert(0 && "unexpected shift opcode");
+  default: llvm_unreachable("unexpected shift opcode");
 
   case ISD::SHL:
     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
@@ -2755,8 +3298,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
   if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     SDValue Vec = N0.getOperand(0);
     SDValue Lane = N0.getOperand(1);
-    MVT VT = N->getValueType(0);
-    MVT EltVT = N0.getValueType();
+    EVT VT = N->getValueType(0);
+    EVT EltVT = N0.getValueType();
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
     if (VT == MVT::i32 &&
@@ -2765,7 +3308,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
 
       unsigned Opc = 0;
       switch (N->getOpcode()) {
-      default: assert(0 && "unexpected opcode");
+      default: llvm_unreachable("unexpected opcode");
       case ISD::SIGN_EXTEND:
         Opc = ARMISD::VGETLANEs;
         break;
@@ -2802,10 +3345,88 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   return SDValue();
 }
 
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+  if (!Subtarget->hasV6Ops())
+    // Pre-v6 does not support unaligned mem access.
+    return false;
+  else if (!Subtarget->hasV6Ops()) {
+    // v6 may or may not support unaligned mem access.
+    if (!Subtarget->isTargetDarwin())
+      return false;
+  }
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return false;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    return true;
+  // FIXME: VLD1 etc with standard alignment is legal.
+  }
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+  if (V < 0)
+    return false;
+
+  unsigned Scale = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+    // Scale == 1;
+    break;
+  case MVT::i16:
+    // Scale == 2;
+    Scale = 2;
+    break;
+  case MVT::i32:
+    // Scale == 4;
+    Scale = 4;
+    break;
+  }
+
+  if ((V & (Scale - 1)) != 0)
+    return false;
+  V /= Scale;
+  return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+                                      const ARMSubtarget *Subtarget) {
+  bool isNeg = false;
+  if (V < 0) {
+    isNeg = true;
+    V = - V;
+  }
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    // + imm12 or - imm8
+    if (isNeg)
+      return V == (V & ((1LL << 8) - 1));
+    return V == (V & ((1LL << 12) - 1));
+  case MVT::f32:
+  case MVT::f64:
+    // Same as ARM mode. FIXME: NEON?
+    if (!Subtarget->hasVFP2())
+      return false;
+    if ((V & 3) != 0)
+      return false;
+    V >>= 2;
+    return V == (V & ((1LL << 8) - 1));
+  }
+}
+
 /// isLegalAddressImmediate - Return true if the integer value can be used
 /// as the offset of the target addressing mode for load / store of the
 /// given type.
-static bool isLegalAddressImmediate(int64_t V, MVT VT,
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
                                     const ARMSubtarget *Subtarget) {
   if (V == 0)
     return true;
@@ -2813,36 +3434,15 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT,
   if (!VT.isSimple())
     return false;
 
-  if (Subtarget->isThumb()) {
-    if (V < 0)
-      return false;
-
-    unsigned Scale = 1;
-    switch (VT.getSimpleVT()) {
-    default: return false;
-    case MVT::i1:
-    case MVT::i8:
-      // Scale == 1;
-      break;
-    case MVT::i16:
-      // Scale == 2;
-      Scale = 2;
-      break;
-    case MVT::i32:
-      // Scale == 4;
-      Scale = 4;
-      break;
-    }
-
-    if ((V & (Scale - 1)) != 0)
-      return false;
-    V /= Scale;
-    return V == (V & ((1LL << 5) - 1));
-  }
+  if (Subtarget->isThumb1Only())
+    return isLegalT1AddressImmediate(V, VT);
+  else if (Subtarget->isThumb2())
+    return isLegalT2AddressImmediate(V, VT, Subtarget);
 
+  // ARM mode.
   if (V < 0)
     V = - V;
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default: return false;
   case MVT::i1:
   case MVT::i8:
@@ -2854,7 +3454,7 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT,
     return V == (V & ((1LL << 8) - 1));
   case MVT::f32:
   case MVT::f64:
-    if (!Subtarget->hasVFP2())
+    if (!Subtarget->hasVFP2()) // FIXME: NEON?
       return false;
     if ((V & 3) != 0)
       return false;
@@ -2863,11 +3463,44 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT,
   }
 }
 
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+                                                      EVT VT) const {
+  int Scale = AM.Scale;
+  if (Scale < 0)
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    if (Scale == 1)
+      return true;
+    // r + r << imm
+    Scale = Scale & ~1;
+    return Scale == 2 || Scale == 4 || Scale == 8;
+  case MVT::i64:
+    // r + r
+    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+      return true;
+    return false;
+  case MVT::isVoid:
+    // Note, we allow "void" uses (basically, uses that aren't loads or
+    // stores), because arm allows folding a scale into many arithmetic
+    // operations.  This should be made more precise and revisited later.
+
+    // Allow r << imm, but the imm has to be a multiple of two.
+    if (Scale & 1) return false;
+    return isPowerOf2_32(Scale);
+  }
+}
+
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                               const Type *Ty) const {
-  MVT VT = getValueType(Ty, true);
+  EVT VT = getValueType(Ty, true);
   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
     return false;
 
@@ -2879,7 +3512,7 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   case 0:  // no scale reg, must be "r+i" or "r", or "i".
     break;
   case 1:
-    if (Subtarget->isThumb())
+    if (Subtarget->isThumb1Only())
       return false;
     // FALL THROUGH.
   default:
@@ -2890,22 +3523,22 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
     if (!VT.isSimple())
       return false;
 
+    if (Subtarget->isThumb2())
+      return isLegalT2ScaledAddressingMode(AM, VT);
+
     int Scale = AM.Scale;
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
     default: return false;
     case MVT::i1:
     case MVT::i8:
     case MVT::i32:
-    case MVT::i64:
-      // This assumes i64 is legalized to a pair of i32. If not (i.e.
-      // ldrd / strd are used, then its address mode is same as i16.
-      // r + r
       if (Scale < 0) Scale = -Scale;
       if (Scale == 1)
         return true;
       // r + r << imm
       return isPowerOf2_32(Scale & ~1);
     case MVT::i16:
+    case MVT::i64:
       // r + r
       if (((unsigned)AM.HasBaseReg + Scale) <= 2)
         return true;
@@ -2917,15 +3550,15 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
       // operations.  This should be made more precise and revisited later.
 
       // Allow r << imm, but the imm has to be a multiple of two.
-      if (AM.Scale & 1) return false;
-      return isPowerOf2_32(AM.Scale);
+      if (Scale & 1) return false;
+      return isPowerOf2_32(Scale);
     }
     break;
   }
   return true;
 }
 
-static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT,
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
                                       bool isSEXTLoad, SDValue &Base,
                                       SDValue &Offset, bool &isInc,
                                       SelectionDAG &DAG) {
@@ -2983,7 +3616,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT,
   return false;
 }
 
-static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT,
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
                                      bool isSEXTLoad, SDValue &Base,
                                      SDValue &Offset, bool &isInc,
                                      SelectionDAG &DAG) {
@@ -3019,7 +3652,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (Subtarget->isThumb1Only())
     return false;
 
-  MVT VT;
+  EVT VT;
   SDValue Ptr;
   bool isSEXTLoad = false;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
@@ -3037,7 +3670,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (Subtarget->isThumb2())
     isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
                                        Offset, isInc, DAG);
-  else 
+  else
     isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
                                         Offset, isInc, DAG);
   if (!isLegal)
@@ -3058,7 +3691,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   if (Subtarget->isThumb1Only())
     return false;
 
-  MVT VT;
+  EVT VT;
   SDValue Ptr;
   bool isSEXTLoad = false;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
@@ -3074,7 +3707,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   if (Subtarget->isThumb2())
     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
                                         isInc, DAG);
-  else 
+  else
     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
                                         isInc, DAG);
   if (!isLegal)
@@ -3128,12 +3761,12 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::pair<unsigned, const TargetRegisterClass*>
 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT VT) const {
+                                                EVT VT) const {
   if (Constraint.size() == 1) {
     // GCC RS6000 Constraint Letters
     switch (Constraint[0]) {
     case 'l':
-      if (Subtarget->isThumb())
+      if (Subtarget->isThumb1Only())
         return std::make_pair(0U, ARM::tGPRRegisterClass);
       else
         return std::make_pair(0U, ARM::GPRRegisterClass);
@@ -3152,7 +3785,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
 std::vector<unsigned> ARMTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const {
+                                  EVT VT) const {
   if (Constraint.size() != 1)
     return std::vector<unsigned>();
 
@@ -3214,10 +3847,16 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
     switch (Constraint) {
       case 'I':
-        if (Subtarget->isThumb()) {
-          // This must be a constant between 0 and 255, for ADD immediates.
+        if (Subtarget->isThumb1Only()) {
+          // This must be a constant between 0 and 255, for ADD
+          // immediates.
           if (CVal >= 0 && CVal <= 255)
             break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant that can be used as an immediate value in a
+          // data-processing instruction.
+          if (ARM_AM::getT2SOImmVal(CVal) != -1)
+            break;
         } else {
           // A constant that can be used as an immediate value in a
           // data-processing instruction.
@@ -3227,7 +3866,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'J':
-        if (Subtarget->isThumb()) {
+        if (Subtarget->isThumb()) {  // FIXME thumb2
           // This must be a constant between -255 and -1, for negated ADD
           // immediates. This can be used in GCC with an "n" modifier that
           // prints the negated value, for use with SUB instructions. It is
@@ -3244,13 +3883,21 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'K':
-        if (Subtarget->isThumb()) {
+        if (Subtarget->isThumb1Only()) {
           // A 32-bit value where only one byte has a nonzero value. Exclude
           // zero to match GCC. This constraint is used by GCC internally for
           // constants that can be loaded with a move/shift combination.
           // It is not useful otherwise but is implemented for compatibility.
           if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
             break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant whose bitwise inverse can be used as an immediate
+          // value in a data-processing instruction. This can be used in GCC
+          // with a "B" modifier that prints the inverted value, for use with
+          // BIC and MVN instructions. It is not useful otherwise but is
+          // implemented for compatibility.
+          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+            break;
         } else {
           // A constant whose bitwise inverse can be used as an immediate
           // value in a data-processing instruction. This can be used in GCC
@@ -3263,11 +3910,19 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'L':
-        if (Subtarget->isThumb()) {
+        if (Subtarget->isThumb1Only()) {
           // This must be a constant between -7 and 7,
           // for 3-operand ADD/SUB immediate instructions.
           if (CVal >= -7 && CVal < 7)
             break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant whose negation can be used as an immediate value in a
+          // data-processing instruction. This can be used in GCC with an "n"
+          // modifier that prints the negated value, for use with SUB
+          // instructions. It is not useful otherwise but is implemented for
+          // compatibility.
+          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+            break;
         } else {
           // A constant whose negation can be used as an immediate value in a
           // data-processing instruction. This can be used in GCC with an "n"
@@ -3280,7 +3935,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'M':
-        if (Subtarget->isThumb()) {
+        if (Subtarget->isThumb()) { // FIXME thumb2
           // This must be a multiple of 4 between 0 and 1020, for
           // ADD sp + immediate.
           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
@@ -3295,7 +3950,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'N':
-        if (Subtarget->isThumb()) {
+        if (Subtarget->isThumb()) {  // FIXME thumb2
           // This must be a constant between 0 and 31, for shift amounts.
           if (CVal >= 0 && CVal <= 31)
             break;
@@ -3303,7 +3958,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'O':
-        if (Subtarget->isThumb()) {
+        if (Subtarget->isThumb()) {  // FIXME thumb2
           // This must be a multiple of 4 between -508 and 508, for
           // ADD/SUB sp = sp + immediate.
           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
@@ -3322,3 +3977,9 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
                                                       Ops, DAG);
 }
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The ARM target isn't yet aware of offsets.
+  return false;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 553a86d077b7..7d85f458d8e9 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -40,6 +40,7 @@ namespace llvm {
       tCALL,        // Thumb function call.
       BRCOND,       // Conditional branch.
       BR_JT,        // Jumptable branch.
+      BR2_JT,       // Jumptable branch (2 level - jumptable entry is a jump).
       RET_FLAG,     // Return with a flag operand.
 
       PIC_ADD,      // Add with a PC operand and a PIC label.
@@ -64,11 +65,13 @@ namespace llvm {
       FMRRD,        // double to two gprs.
       FMDRR,        // Two gprs to double.
 
-      EH_SJLJ_SETJMP,    // SjLj exception handling setjmp
-      EH_SJLJ_LONGJMP,   // SjLj exception handling longjmp
+      EH_SJLJ_SETJMP,    // SjLj exception handling setjmp.
+      EH_SJLJ_LONGJMP,   // SjLj exception handling longjmp.
 
       THREAD_POINTER,
 
+      DYN_ALLOC,    // Dynamic allocation on the stack.
+
       VCEQ,         // Vector compare equal.
       VCGE,         // Vector compare greater than or equal.
       VCGEU,        // Vector compare unsigned greater than or equal.
@@ -112,8 +115,18 @@ namespace llvm {
       VGETLANEu,    // zero-extend vector extract element
       VGETLANEs,    // sign-extend vector extract element
 
-      // Vector duplicate lane (128-bit result only; 64-bit is a shuffle)
-      VDUPLANEQ     // splat a lane from a 64-bit vector to a 128-bit vector
+      // Vector duplicate:
+      VDUP,
+      VDUPLANE,
+
+      // Vector shuffles:
+      VEXT,         // extract
+      VREV64,       // reverse elements within 64-bit doublewords
+      VREV32,       // reverse elements within 32-bit words
+      VREV16,       // reverse elements within 16-bit halfwords
+      VZIP,         // zip (interleave)
+      VUZP,         // unzip (deinterleave)
+      VTRN          // transpose
     };
   }
 
@@ -147,11 +160,18 @@ namespace llvm {
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                       DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
 
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+    bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
 
     /// getPreIndexedAddressParts - returns true by value, base pointer and
     /// offset pointer and addressing mode by reference if the node's address
@@ -175,13 +195,15 @@ namespace llvm {
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth) const;
+
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const;
+                                   EVT VT) const;
     std::vector<unsigned>
     getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      MVT VT) const;
+                                      EVT VT) const;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
@@ -200,21 +222,23 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
+    bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
   private:
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when generating code for different targets.
     const ARMSubtarget *Subtarget;
 
-    /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+    /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
     ///
     unsigned ARMPCLabelIndex;
 
-    void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
-    void addDRTypeForNEON(MVT VT);
-    void addQRTypeForNEON(MVT VT);
+    void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
+    void addDRTypeForNEON(EVT VT);
+    void addQRTypeForNEON(EVT VT);
 
     typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
-    void PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG,
+    void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
                           SDValue Chain, SDValue &Arg,
                           RegsToPassVector &RegsToPass,
                           CCValAssign &VA, CCValAssign &NextVA,
@@ -224,15 +248,13 @@ namespace llvm {
     SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
                                  SDValue &Root, SelectionDAG &DAG, DebugLoc dl);
 
-    CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const;
-    SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
-                             const SDValue &StackPtr, const CCValAssign &VA,
-                             SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
-    SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
-                            unsigned CallingConv, SelectionDAG &DAG);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+    CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const;
+    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,
+                             ISD::ArgFlagsTy Flags);
+    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG);
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
@@ -241,9 +263,9 @@ namespace llvm {
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                    SelectionDAG &DAG);
     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
 
     SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain,
@@ -252,6 +274,33 @@ namespace llvm {
                                       bool AlwaysInline,
                                       const Value *DstSV, uint64_t DstSVOff,
                                       const Value *SrcSV, uint64_t SrcSVOff);
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
   };
 }
 
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 301a6c1a5cca..3d19f2345d30 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -54,9 +54,16 @@ def NEONGetLnFrm  : Format<25>;
 def NEONSetLnFrm  : Format<26>;
 def NEONDupFrm    : Format<27>;
 
-// Misc flag for data processing instructions that indicates whether
+// Misc flags.
+
 // the instruction has a Rn register operand.
-class UnaryDP  { bit isUnaryDataProc = 1; }
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP    { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
 
 //===----------------------------------------------------------------------===//
 // ARM Instruction flags.  These need to match ARMInstrInfo.h.
@@ -77,7 +84,7 @@ def AddrModeT1_1  : AddrMode<7>;
 def AddrModeT1_2  : AddrMode<8>;
 def AddrModeT1_4  : AddrMode<9>;
 def AddrModeT1_s  : AddrMode<10>;
-def AddrModeT2_i12: AddrMode<12>;
+def AddrModeT2_i12: AddrMode<11>;
 def AddrModeT2_i8 : AddrMode<12>;
 def AddrModeT2_so : AddrMode<13>;
 def AddrModeT2_pc : AddrMode<14>;
@@ -103,11 +110,33 @@ def IndexModePost : IndexMode<2>;
 
 //===----------------------------------------------------------------------===//
 
+// ARM special operands.
+//
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+                                     (ops (i32 14), (i32 zero_reg))> {
+  let PrintMethod = "printPredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+  let PrintMethod = "printSBitModifierOperand";
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+  let PrintMethod = "printSBitModifierOperand";
+}
+
+//===----------------------------------------------------------------------===//
+
 // ARM Instruction templates.
 //
 
 class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
-              Format f, string cstr>
+              Format f, string cstr, InstrItinClass itin>
   : Instruction {
   field bits<32> Inst;
 
@@ -130,12 +159,15 @@ class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
   // Attributes specific to ARM instructions...
   //
   bit isUnaryDataProc = 0;
+  bit canXformTo16Bit = 0;
   
   let Constraints = cstr;
+  let Itinerary = itin;
 }
 
-class PseudoInst<dag oops, dag iops, string asm, list<dag> pattern>
-  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, ""> {
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, 
+                 string asm, list<dag> pattern>
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, "", itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -144,9 +176,10 @@ class PseudoInst<dag oops, dag iops, string asm, list<dag> pattern>
 
 // Almost all ARM instructions are predicable.
 class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
-        IndexMode im, Format f, string opc, string asm, string cstr,
+        IndexMode im, Format f, InstrItinClass itin, 
+        string opc, string asm, string cstr,
         list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr> {
+  : InstARM<am, sz, im, f, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
@@ -158,9 +191,10 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 // an input operand since by default it's a zero register. It will
 // become an implicit def once it's "flipped".
 class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
-         IndexMode im, Format f, string opc, string asm, string cstr,
+         IndexMode im, Format f, InstrItinClass itin,
+         string opc, string asm, string cstr,
          list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr> {
+  : InstARM<am, sz, im, f, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${p}${s}", asm));
@@ -170,8 +204,9 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 // Special cases
 class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
-         IndexMode im, Format f, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr> {
+         IndexMode im, Format f, InstrItinClass itin,
+         string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -179,90 +214,93 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   list<Predicate> Predicates = [IsARM];
 }
 
-class AI<dag oops, dag iops, Format f, string opc,
-         string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern>;
-class AsI<dag oops, dag iops, Format f, string opc,
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+         string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+       opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern>;
-class AXI<dag oops, dag iops, Format f, string asm,
-          list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, asm,
-       "", pattern>;
 
 // Ctrl flow instructions
-class ABI<bits<4> opcod, dag oops, dag iops, string opc,
-         string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, opc,
-      asm, "", pattern> {
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+      opc, asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
-class ABXI<bits<4> opcod, dag oops, dag iops, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, asm,
-       "", pattern> {
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+       asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
-class ABXIx2<dag oops, dag iops, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, asm,
-       "", pattern>;
+class ABXIx2<dag oops, dag iops, InstrItinClass itin,
+             string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin,
+       asm, "", pattern>;
 
 // BR_JT instructions
-class JTI<dag oops, dag iops, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm,
+class JTI<dag oops, dag iops, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
        asm, "", pattern>;
 
 // addrmode1 instructions
-class AI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = {0,0};
 }
-class AsI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
-           string asm, list<dag> pattern>
-  : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
-       asm, "", pattern> {
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+       opc, asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = {0,0};
 }
-class AXI1<bits<4> opcod, dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, asm,
-       "", pattern> {
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+       asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = {0,0};
 }
-class AI1x2<dag oops, dag iops, Format f, string opc,
-            string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, opc,
-      asm, "", pattern>;
+class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, 
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern>;
 
 
 // addrmode2 loads and stores
-class AI2<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{27-26} = {0,1};
 }
 
 // loads
-class AI2ldw<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 0; // B bit
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AXI2ldw<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, 
+              string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
@@ -270,19 +308,19 @@ class AXI2ldw<dag oops, dag iops, Format f, string asm,
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AI2ldb<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 1; // B bit
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AXI2ldb<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, 
+              string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
@@ -292,19 +330,19 @@ class AXI2ldb<dag oops, dag iops, Format f, string asm,
 }
 
 // stores
-class AI2stw<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 0; // B bit
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AXI2stw<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
+              string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
@@ -312,19 +350,19 @@ class AXI2stw<dag oops, dag iops, Format f, string asm,
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AI2stb<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 1; // B bit
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AXI2stb<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
+              string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
@@ -334,20 +372,20 @@ class AXI2stb<dag oops, dag iops, Format f, string asm,
 }
 
 // Pre-indexed loads
-class AI2ldwpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{22}    = 0; // B bit
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AI2ldbpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{22}    = 1; // B bit
@@ -356,20 +394,20 @@ class AI2ldbpr<dag oops, dag iops, Format f, string opc,
 }
 
 // Pre-indexed stores
-class AI2stwpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{22}    = 0; // B bit
   let Inst{24}    = 1; // P bit
   let Inst{27-26} = {0,1};
 }
-class AI2stbpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{22}    = 1; // B bit
@@ -378,20 +416,20 @@ class AI2stbpr<dag oops, dag iops, Format f, string opc,
 }
 
 // Post-indexed loads
-class AI2ldwpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 0; // B bit
   let Inst{24}    = 0; // P bit
   let Inst{27-26} = {0,1};
 }
-class AI2ldbpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 1; // B bit
@@ -400,20 +438,20 @@ class AI2ldbpo<dag oops, dag iops, Format f, string opc,
 }
 
 // Post-indexed stores
-class AI2stwpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 0; // B bit
   let Inst{24}    = 0; // P bit
   let Inst{27-26} = {0,1};
 }
-class AI2stbpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{22}    = 1; // B bit
@@ -422,20 +460,20 @@ class AI2stbpo<dag oops, dag iops, Format f, string opc,
 }
 
 // addrmode3 instructions
-class AI3<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern>;
-class AXI3<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, asm,
-       "", pattern>;
+class AI3<dag oops, dag iops, Format f, InstrItinClass itin, 
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern>;
+class AXI3<dag oops, dag iops, Format f, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+       asm, "", pattern>;
 
 // loads
-class AI3ldh<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI3ldh<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
@@ -443,10 +481,11 @@ class AI3ldh<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AXI3ldh<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+class AXI3ldh<dag oops, dag iops, Format f, InstrItinClass itin,
+              string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -456,10 +495,10 @@ class AXI3ldh<dag oops, dag iops, Format f, string asm,
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
 }
-class AI3ldsh<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 1; // S bit
@@ -467,10 +506,11 @@ class AI3ldsh<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AXI3ldsh<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+class AXI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin,
+               string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -480,10 +520,10 @@ class AXI3ldsh<dag oops, dag iops, Format f, string asm,
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
 }
-class AI3ldsb<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 0; // H bit
   let Inst{6}     = 1; // S bit
@@ -491,10 +531,11 @@ class AI3ldsb<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AXI3ldsb<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+class AXI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin,
+               string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 0; // H bit
@@ -504,10 +545,10 @@ class AXI3ldsb<dag oops, dag iops, Format f, string asm,
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
 }
-class AI3ldd<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI3ldd<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 0; // H bit
   let Inst{6}     = 1; // S bit
@@ -515,13 +556,14 @@ class AI3ldd<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
 
 // stores
-class AI3sth<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI3sth<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
@@ -529,10 +571,11 @@ class AI3sth<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AXI3sth<dag oops, dag iops, Format f, string asm,
-           list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+class AXI3sth<dag oops, dag iops, Format f, InstrItinClass itin,
+              string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -542,10 +585,10 @@ class AXI3sth<dag oops, dag iops, Format f, string asm,
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
 }
-class AI3std<dag oops, dag iops, Format f, string opc,
-          string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
-      asm, "", pattern> {
+class AI3std<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 1; // S bit
@@ -553,13 +596,14 @@ class AI3std<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
 
 // Pre-indexed loads
-class AI3ldhpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI3ldhpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
@@ -567,11 +611,12 @@ class AI3ldhpr<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AI3ldshpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI3ldshpr<dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 1; // S bit
@@ -579,11 +624,12 @@ class AI3ldshpr<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AI3ldsbpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 0; // H bit
   let Inst{6}     = 1; // S bit
@@ -591,13 +637,14 @@ class AI3ldsbpr<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
 
 // Pre-indexed stores
-class AI3sthpr<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
-      asm, cstr, pattern> {
+class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
@@ -605,13 +652,14 @@ class AI3sthpr<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
 }
 
 // Post-indexed loads
-class AI3ldhpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
@@ -619,11 +667,12 @@ class AI3ldhpo<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AI3ldshpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 1; // S bit
@@ -631,11 +680,12 @@ class AI3ldshpo<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
 }
-class AI3ldsbpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 0; // H bit
   let Inst{6}     = 1; // S bit
@@ -643,13 +693,14 @@ class AI3ldsbpo<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 1; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
 }
 
 // Post-indexed stores
-class AI3sthpo<dag oops, dag iops, Format f, string opc,
-            string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
-      asm, cstr,pattern> {
+class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
@@ -657,57 +708,60 @@ class AI3sthpo<dag oops, dag iops, Format f, string opc,
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 1; // W bit
   let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
 }
 
 
 // addrmode4 instructions
-class AXI4ld<dag oops, dag iops, Format f, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
-       "", pattern> {
+class AXI4ld<dag oops, dag iops, Format f, InstrItinClass itin,
+             string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, itin,
+       asm, "", pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{22}    = 0; // S bit
   let Inst{27-25} = 0b100;
 }
-class AXI4st<dag oops, dag iops, Format f, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
-       "", pattern> {
+class AXI4st<dag oops, dag iops, Format f, InstrItinClass itin,
+             string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, itin,
+       asm, "", pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{22}    = 0; // S bit
   let Inst{27-25} = 0b100;
 }
 
 // Unsigned multiply, multiply-accumulate instructions.
-class AMul1I<bits<7> opcod, dag oops, dag iops, string opc,
-         string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
-      asm, "", pattern> {
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{20}    = 0; // S bit
   let Inst{27-21} = opcod;
 }
-class AsMul1I<bits<7> opcod, dag oops, dag iops, string opc,
-          string asm, list<dag> pattern>
-  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
-       asm, "", pattern> {
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+       opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{27-21} = opcod;
 }
 
 // Most significant word multiply
-class AMul2I<bits<7> opcod, dag oops, dag iops, string opc,
-         string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
-      asm, "", pattern> {
+class AMul2I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{20}    = 1;
   let Inst{27-21} = opcod;
 }
 
 // SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
-class AMulxyI<bits<7> opcod, dag oops, dag iops, string opc,
-         string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
-      asm, "", pattern> {
+class AMulxyI<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
   let Inst{4}     = 0;
   let Inst{7}     = 1;
   let Inst{20}    = 0;
@@ -715,19 +769,19 @@ class AMulxyI<bits<7> opcod, dag oops, dag iops, string opc,
 }
 
 // Extend instructions.
-class AExtI<bits<8> opcod, dag oops, dag iops, string opc,
-            string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, opc,
-      asm, "", pattern> {
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
+      opc, asm, "", pattern> {
   let Inst{7-4}   = 0b0111;
   let Inst{27-20} = opcod;
 }
 
 // Misc Arithmetic instructions.
-class AMiscA1I<bits<8> opcod, dag oops, dag iops, string opc,
-               string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, opc,
-      asm, "", pattern> {
+class AMiscA1I<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+               string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+      opc, asm, "", pattern> {
   let Inst{27-20} = opcod;
 }
 
@@ -751,74 +805,120 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 
 // TI - Thumb instruction.
 
-class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
-             string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
-  let OutOperandList = outs;
-  let InOperandList = ins;
+class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+             InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
   let AsmString   = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb];
 }
 
-class TI<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
 
-// BL, BLX(1) are translated by assembler into two instructions
-class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
-
-// BR_JT instructions
-class TJTI<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst", pattern>;
 
-// TPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
-class TPat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsThumb];
-}
+// tBL, tBX instructions
+class TIx2<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
 
-class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsThumb, HasV5T];
-}
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
 
 // Thumb1 only
-class Thumb1I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
-             string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
-  let OutOperandList = outs;
-  let InOperandList = ins;
+class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
   let AsmString   = asm;
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
 }
 
-class T1I<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
-class T1I1<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeT1_1, Size2Bytes, asm, "", pattern>;
-class T1I2<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeT1_2, Size2Bytes, asm, "", pattern>;
-class T1I4<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeT1_4, Size2Bytes, asm, "", pattern>;
-class T1Is<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeT1_s, Size2Bytes, asm, "", pattern>;
-class T1Ix2<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
-class T1JTI<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
+class T1I<dag oops, dag iops, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+class T1JTI<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
 
 // Two-address instructions
-class T1It<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+class T1It<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, 
+            asm, "$lhs = $dst", pattern>;
 
-class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  let OutOperandList = !con(oops, (ops s_cc_out:$s));
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb1Only];
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+            "$lhs = $dst", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
 }
 
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+            "$lhs = $dst", pattern>;
+
+class T1pI1<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeT1_1, Size2Bytes, itin, opc, asm, "", pattern>;
+class T1pI2<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeT1_2, Size2Bytes, itin, opc, asm, "", pattern>;
+class T1pI4<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>;
+class T1pIs<dag oops, dag iops, 
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
+
 // Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
 class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              InstrItinClass itin,
               string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
@@ -832,8 +932,9 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 // FIXME: This uses unified syntax so {s} comes before {p}. We should make it
 // more consistent.
 class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
@@ -843,8 +944,9 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 // Special cases
 class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
                string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -852,31 +954,46 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   list<Predicate> Predicates = [IsThumb2];
 }
 
-class T2I<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
-class T2Ii12<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, opc, asm, "", pattern>;
-class T2Ii8<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, opc, asm, "", pattern>;
-class T2Iso<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, opc, asm, "", pattern>;
-class T2Ipc<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, opc, asm, "", pattern>;
-class T2Ii8s4<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, opc, asm, "", pattern>;
+class T2I<dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii8s4<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "", pattern>;
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
 
-class T2sI<dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+class T2Ix2<dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
 
-class T2XI<dag oops, dag iops, string asm, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, asm, "", pattern>;
-class T2JTI<dag oops, dag iops, string asm, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, asm, "", pattern>;
 
 // T2Iidxldst - Thumb2 indexed load / store instructions.
 class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
+                 InstrItinClass itin,
                  string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, ThumbFrm, cstr> {
+  : InstARM<am, Size4Bytes, im, ThumbFrm, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
@@ -884,6 +1001,15 @@ class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
   list<Predicate> Predicates = [IsThumb2];
 }
 
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb1Only];
+}
 
 // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
 class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
@@ -896,11 +1022,41 @@ class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
 // ARM VFP Instruction templates.
 //
 
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+           IndexMode im, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+            IndexMode im, Format f, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+         opc, asm, "", pattern>;
+
 // ARM VFP addrmode5 loads and stores
 class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
-      VFPLdStFrm, opc, asm, "", pattern> {
+  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+      VFPLdStFrm, itin, opc, asm, "", pattern> {
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-24} = opcod1;
   let Inst{21-20} = opcod2;
@@ -908,9 +1064,10 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 }
 
 class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
-      VFPLdStFrm, opc, asm, "", pattern> {
+  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+      VFPLdStFrm, itin, opc, asm, "", pattern> {
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-24} = opcod1;
   let Inst{21-20} = opcod2;
@@ -918,27 +1075,28 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 }
 
 // Load / store multiple
-class AXSI5<dag oops, dag iops, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
-       VFPLdStMulFrm, asm, "", pattern> {
+class AXDI5<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+       VFPLdStMulFrm, itin, asm, "", pattern> {
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
   let Inst{11-8}  = 0b1011;
 }
 
-class AXDI5<dag oops, dag iops, string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
-       VFPLdStMulFrm, asm, "", pattern> {
+class AXSI5<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+       VFPLdStMulFrm, itin, asm, "", pattern> {
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
   let Inst{11-8}  = 0b1010;
 }
 
-
 // Double precision, unary
 class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
-           string opc, string asm, list<dag> pattern>
-  : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+           InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
   let Inst{27-20} = opcod1;
   let Inst{19-16} = opcod2;
   let Inst{11-8}  = 0b1011;
@@ -946,17 +1104,17 @@ class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
 }
 
 // Double precision, binary
-class ADbI<bits<8> opcod, dag oops, dag iops, string opc,
-           string asm, list<dag> pattern>
-  : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+class ADbI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
   let Inst{27-20} = opcod;
   let Inst{11-8}  = 0b1011;
 }
 
 // Single precision, unary
 class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
-           string opc, string asm, list<dag> pattern>
-  : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+           InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
   // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding.
   let Inst{27-20} = opcod1;
   let Inst{19-16} = opcod2;
@@ -964,48 +1122,74 @@ class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
   let Inst{7-4}   = opcod3;
 }
 
+// Single precision unary, if no NEON
+// Same as ASuI except not available if NEON is enabled
+class ASuIn<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+            InstrItinClass itin,    string opc, string asm, list<dag> pattern>
+  : ASuI<opcod1, opcod2, opcod2, oops, iops, itin, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
 // Single precision, binary
-class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
-           string asm, list<dag> pattern>
-  : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+class ASbI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
   // Bit 22 (D bit) can be changed during instruction encoding.
   let Inst{27-20} = opcod;
   let Inst{11-8}  = 0b1010;
 }
 
+// Single precision binary, if no NEON
+// Same as ASbI except not available if NEON is enabled
+class ASbIn<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : ASbI<opcod, oops, iops, itin, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
 // VFP conversion instructions
 class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
-               dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : AI<oops, iops, VFPConv1Frm, opc, asm, pattern> {
+               dag oops, dag iops, InstrItinClass itin,
+               string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
   let Inst{27-20} = opcod1;
   let Inst{19-16} = opcod2;
   let Inst{11-8}  = opcod3;
   let Inst{6}     = 1;
 }
 
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
 class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
-             string opc, string asm, list<dag> pattern>
-  : AI<oops, iops, f, opc, asm, pattern> {
+               InstrItinClass itin,
+               string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
   let Inst{27-20} = opcod1;
   let Inst{11-8}  = opcod2;
   let Inst{4}     = 1;
 }
 
-class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
-              string asm, list<dag> pattern>
-  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, opc, asm, pattern>;
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
 
-class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
-              string asm, list<dag> pattern>
-  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, opc, asm, pattern>;
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, 
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
 
-class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
-              string asm, list<dag> pattern>
-  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, opc, asm, pattern>;
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
 
-class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
-              string asm, list<dag> pattern>
-  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, opc, asm, pattern>;
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
 
 //===----------------------------------------------------------------------===//
 
@@ -1013,9 +1197,9 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
 // ARM NEON Instruction templates.
 //
 
-class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, string asm,
-            string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, NEONFrm, cstr> {
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, NEONFrm, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString = asm;
@@ -1023,20 +1207,33 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, string asm,
   list<Predicate> Predicates = [HasNEON];
 }
 
-class NI<dag oops, dag iops, string asm, list<dag> pattern>
-  : NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, "", pattern> {
+class NI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, "", pattern> {
+}
+
+class NI4<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : NeonI<oops, iops, AddrMode4, IndexModeNone, itin, asm, "", pattern> {
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+            dag oops, dag iops, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, asm, cstr, pattern> {
+  let Inst{31-24} = 0b11110100;
 }
 
-class NDataI<dag oops, dag iops, string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, cstr, pattern> {
+class NDataI<dag oops, dag iops, InstrItinClass itin,
+             string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, cstr, pattern> {
   let Inst{31-25} = 0b1111001;
 }
 
 // NEON "one register and a modified immediate" format.
 class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
                bit op5, bit op4,
-               dag oops, dag iops, string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, asm, cstr, pattern> {
+               dag oops, dag iops, InstrItinClass itin,
+               string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, asm, cstr, pattern> {
   let Inst{23} = op23;
   let Inst{21-19} = op21_19;
   let Inst{11-8} = op11_8;
@@ -1049,8 +1246,9 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
 // NEON 2 vector register format.
 class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
           bits<5> op11_7, bit op6, bit op4,
-          dag oops, dag iops, string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, asm, cstr, pattern> {
+          dag oops, dag iops, InstrItinClass itin,
+          string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, asm, cstr, pattern> {
   let Inst{24-23} = op24_23;
   let Inst{21-20} = op21_20;
   let Inst{19-18} = op19_18;
@@ -1063,8 +1261,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
 // NEON 2 vector register with immediate.
 class N2VImm<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
              bit op6, bit op4,
-             dag oops, dag iops, string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, asm, cstr, pattern> {
+             dag oops, dag iops, InstrItinClass itin,
+             string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, asm, cstr, pattern> {
   let Inst{24} = op24;
   let Inst{23} = op23;
   let Inst{21-16} = op21_16;
@@ -1076,8 +1275,9 @@ class N2VImm<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
 
 // NEON 3 vector register format.
 class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
-          dag oops, dag iops, string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, asm, cstr, pattern> {
+          dag oops, dag iops, InstrItinClass itin,
+          string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, asm, cstr, pattern> {
   let Inst{24} = op24;
   let Inst{23} = op23;
   let Inst{21-20} = op21_20;
@@ -1088,9 +1288,9 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
 
 // NEON VMOVs between scalar and core registers.
 class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
-               dag oops, dag iops, Format f, string opc, string asm,
-               list<dag> pattern>
-  : AI<oops, iops, f, opc, asm, pattern> {
+               dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, list<dag> pattern>
+  : AI<oops, iops, f, itin, opc, asm, pattern> {
   let Inst{27-20} = opcod1;
   let Inst{11-8} = opcod2;
   let Inst{6-5} = opcod3;
@@ -1098,13 +1298,23 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
   list<Predicate> Predicates = [HasNEON];
 }
 class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
-                dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, opc, asm,
-             pattern>;
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin,
+             opc, asm, pattern>;
 class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
-                dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, opc, asm,
-             pattern>;
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin,
+             opc, asm, pattern>;
 class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
-            dag oops, dag iops, string opc, string asm, list<dag> pattern>
-  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, opc, asm, pattern>;
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
+             opc, asm, pattern>;
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 443fdc742eab..4c92891c82bd 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -21,52 +21,15 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-static cl::opt<bool>
-EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
-               cl::desc("Enable ARM 2-addr to 3-addr conv"));
-
-static inline
-const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
-  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
-}
-
-static inline
-const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
-  return MIB.addReg(0);
-}
-
-ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) {
-}
-
 ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+  : RI(*this, STI), Subtarget(STI) {
 }
 
-void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I,
-                                 unsigned DestReg,
-                                 const MachineInstr *Orig) const {
-  DebugLoc dl = Orig->getDebugLoc();
-  if (Orig->getOpcode() == ARM::MOVi2pieces) {
-    RI.emitLoadConstPool(MBB, I, this, dl,
-                         DestReg,
-                         Orig->getOperand(1).getImm(),
-                         (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
-                         Orig->getOperand(3).getReg());
-    return;
-  }
-
-  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
-  MI->getOperand(0).setReg(DestReg);
-  MBB.insert(I, MI);
-}
-
-static unsigned getUnindexedOpcode(unsigned Opc) {
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
   switch (Opc) {
   default: break;
   case ARM::LDR_PRE:
@@ -94,820 +57,45 @@ static unsigned getUnindexedOpcode(unsigned Opc) {
   case ARM::STRB_POST:
     return ARM::STRB;
   }
-  return 0;
-}
-
-MachineInstr *
-ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-                                        MachineBasicBlock::iterator &MBBI,
-                                        LiveVariables *LV) const {
-  if (!EnableARM3Addr)
-    return NULL;
-
-  MachineInstr *MI = MBBI;
-  MachineFunction &MF = *MI->getParent()->getParent();
-  unsigned TSFlags = MI->getDesc().TSFlags;
-  bool isPre = false;
-  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
-  default: return NULL;
-  case ARMII::IndexModePre:
-    isPre = true;
-    break;
-  case ARMII::IndexModePost:
-    break;
-  }
-
-  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
-  // operation.
-  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
-  if (MemOpc == 0)
-    return NULL;
-
-  MachineInstr *UpdateMI = NULL;
-  MachineInstr *MemMI = NULL;
-  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned NumOps = TID.getNumOperands();
-  bool isLoad = !TID.mayStore();
-  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
-  const MachineOperand &Base = MI->getOperand(2);
-  const MachineOperand &Offset = MI->getOperand(NumOps-3);
-  unsigned WBReg = WB.getReg();
-  unsigned BaseReg = Base.getReg();
-  unsigned OffReg = Offset.getReg();
-  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
-  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
-  switch (AddrMode) {
-  default:
-    assert(false && "Unknown indexed op!");
-    return NULL;
-  case ARMII::AddrMode2: {
-    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
-    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
-    if (OffReg == 0) {
-      int SOImmVal = ARM_AM::getSOImmVal(Amt);
-      if (SOImmVal == -1)
-        // Can't encode it in a so_imm operand. This transformation will
-        // add more than 1 instruction. Abandon!
-        return NULL;
-      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
-                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
-        .addReg(BaseReg).addImm(SOImmVal)
-        .addImm(Pred).addReg(0).addReg(0);
-    } else if (Amt != 0) {
-      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
-      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
-      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
-                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
-        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
-        .addImm(Pred).addReg(0).addReg(0);
-    } else
-      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
-                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
-        .addReg(BaseReg).addReg(OffReg)
-        .addImm(Pred).addReg(0).addReg(0);
-    break;
-  }
-  case ARMII::AddrMode3 : {
-    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
-    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
-    if (OffReg == 0)
-      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
-      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
-                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
-        .addReg(BaseReg).addImm(Amt)
-        .addImm(Pred).addReg(0).addReg(0);
-    else
-      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
-                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
-        .addReg(BaseReg).addReg(OffReg)
-        .addImm(Pred).addReg(0).addReg(0);
-    break;
-  }
-  }
-
-  std::vector<MachineInstr*> NewMIs;
-  if (isPre) {
-    if (isLoad)
-      MemMI = BuildMI(MF, MI->getDebugLoc(),
-                      get(MemOpc), MI->getOperand(0).getReg())
-        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
-    else
-      MemMI = BuildMI(MF, MI->getDebugLoc(),
-                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
-        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
-    NewMIs.push_back(MemMI);
-    NewMIs.push_back(UpdateMI);
-  } else {
-    if (isLoad)
-      MemMI = BuildMI(MF, MI->getDebugLoc(),
-                      get(MemOpc), MI->getOperand(0).getReg())
-        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
-    else
-      MemMI = BuildMI(MF, MI->getDebugLoc(),
-                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
-        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
-    if (WB.isDead())
-      UpdateMI->getOperand(0).setIsDead();
-    NewMIs.push_back(UpdateMI);
-    NewMIs.push_back(MemMI);
-  }
-
-  // Transfer LiveVariables states, kill / dead info.
-  if (LV) {
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.getReg() &&
-          TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-        unsigned Reg = MO.getReg();
-
-        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
-        if (MO.isDef()) {
-          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
-          if (MO.isDead())
-            LV->addVirtualRegisterDead(Reg, NewMI);
-        }
-        if (MO.isUse() && MO.isKill()) {
-          for (unsigned j = 0; j < 2; ++j) {
-            // Look at the two new MI's in reverse order.
-            MachineInstr *NewMI = NewMIs[j];
-            if (!NewMI->readsRegister(Reg))
-              continue;
-            LV->addVirtualRegisterKilled(Reg, NewMI);
-            if (VI.removeKill(MI))
-              VI.Kills.push_back(NewMI);
-            break;
-          }
-        }
-      }
-    }
-  }
-
-  MFI->insert(MBBI, NewMIs[1]);
-  MFI->insert(MBBI, NewMIs[0]);
-  return NewMIs[0];
-}
-
-// Branch analysis.
-bool
-ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                                MachineBasicBlock *&FBB,
-                                SmallVectorImpl<MachineOperand> &Cond,
-                                bool AllowModify) const {
-  // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
-    return false;
-
-  // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
-
-  // If there is only one terminator instruction, process it.
-  unsigned LastOpc = LastInst->getOpcode();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B) {
-      TBB = LastInst->getOperand(0).getMBB();
-      return false;
-    }
-    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc || LastOpc == ARM::t2Bcc) {
-      // Block ends with fall-through condbranch.
-      TBB = LastInst->getOperand(0).getMBB();
-      Cond.push_back(LastInst->getOperand(1));
-      Cond.push_back(LastInst->getOperand(2));
-      return false;
-    }
-    return true;  // Can't handle indirect branch.
-  }
-
-  // Get the instruction before it if it is a terminator.
-  MachineInstr *SecondLastInst = I;
-
-  // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
-    return true;
-
-  // If the block ends with ARM::B/ARM::tB/ARM::t2B and a 
-  // ARM::Bcc/ARM::tBcc/ARM::t2Bcc, handle it.
-  unsigned SecondLastOpc = SecondLastInst->getOpcode();
-  if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
-      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB) ||
-      (SecondLastOpc == ARM::t2Bcc && LastOpc == ARM::t2B)) {
-    TBB =  SecondLastInst->getOperand(0).getMBB();
-    Cond.push_back(SecondLastInst->getOperand(1));
-    Cond.push_back(SecondLastInst->getOperand(2));
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-  // If the block ends with two unconditional branches, handle it.  The second
-  // one is not executed, so remove it.
-  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB || 
-       SecondLastOpc==ARM::t2B) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) {
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return false;
-  }
-
-  // ...likewise if it ends with a branch table followed by an unconditional
-  // branch. The branch folder can create these, and we must get rid of them for
-  // correctness of Thumb constant islands.
-  if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
-       SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr ||
-       SecondLastOpc == ARM::t2BR_JTr || SecondLastOpc==ARM::t2BR_JTm ||
-       SecondLastOpc == ARM::t2BR_JTadd) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) {
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return true;
-  }
-
-  // Otherwise, can't handle this.
-  return true;
-}
-
-
-unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int BOpc   = AFI->isThumbFunction() ? 
-    (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B;
-  int BccOpc = AFI->isThumbFunction() ? 
-    (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
-
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
-    return 0;
-
-  // Remove the branch.
-  I->eraseFromParent();
-
-  I = MBB.end();
-
-  if (I == MBB.begin()) return 1;
-  --I;
-  if (I->getOpcode() != BccOpc)
-    return 1;
-
-  // Remove the branch.
-  I->eraseFromParent();
-  return 2;
-}
-
-unsigned
-ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                               MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond) const {
-  // FIXME this should probably have a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int BOpc   = AFI->isThumbFunction() ? 
-    (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B;
-  int BccOpc = AFI->isThumbFunction() ? 
-    (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
-
-  // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 2 || Cond.size() == 0) &&
-         "ARM branch conditions have two components!");
 
-  if (FBB == 0) {
-    if (Cond.empty()) // Unconditional branch?
-      BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
-    else
-      BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
-        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
-    return 1;
-  }
-
-  // Two-way conditional branch.
-  BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
-    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
-  BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
-  return 2;
+  return 0;
 }
 
-bool
-ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   if (MBB.empty()) return false;
 
   switch (MBB.back().getOpcode()) {
   case ARM::BX_RET:   // Return.
   case ARM::LDM_RET:
-  case ARM::tBX_RET:
-  case ARM::tBX_RET_vararg:
-  case ARM::tPOP_RET:
   case ARM::B:
-  case ARM::tB:
-  case ARM::t2B:      // Uncond branch.
-  case ARM::tBR_JTr:
-  case ARM::t2BR_JTr:
   case ARM::BR_JTr:   // Jumptable branch.
-  case ARM::t2BR_JTm:
   case ARM::BR_JTm:   // Jumptable branch through mem.
-  case ARM::t2BR_JTadd:
   case ARM::BR_JTadd: // Jumptable branch add to pc.
     return true;
-  default: return false;
-  }
-}
-
-bool ARMBaseInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
-  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
-  return false;
-}
-
-bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
-  int PIdx = MI->findFirstPredOperandIdx();
-  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
-}
-
-bool ARMBaseInstrInfo::
-PredicateInstruction(MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Pred) const {
-  unsigned Opc = MI->getOpcode();
-  if (Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B) {
-    MI->setDesc(get((Opc == ARM::B) ? ARM::Bcc :
-                    ((Opc == ARM::tB) ? ARM::tBcc : ARM::t2Bcc)));
-    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
-    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
-    return true;
-  }
-
-  int PIdx = MI->findFirstPredOperandIdx();
-  if (PIdx != -1) {
-    MachineOperand &PMO = MI->getOperand(PIdx);
-    PMO.setImm(Pred[0].getImm());
-    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
-    return true;
-  }
-  return false;
-}
-
-bool ARMBaseInstrInfo::
-SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                  const SmallVectorImpl<MachineOperand> &Pred2) const {
-  if (Pred1.size() > 2 || Pred2.size() > 2)
-    return false;
-
-  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
-  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
-  if (CC1 == CC2)
-    return true;
-
-  switch (CC1) {
-  default:
-    return false;
-  case ARMCC::AL:
-    return true;
-  case ARMCC::HS:
-    return CC2 == ARMCC::HI;
-  case ARMCC::LS:
-    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
-  case ARMCC::GE:
-    return CC2 == ARMCC::GT;
-  case ARMCC::LE:
-    return CC2 == ARMCC::LT;
-  }
-}
-
-bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
-                                    std::vector<MachineOperand> &Pred) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
-    return false;
-
-  bool Found = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
-      Pred.push_back(MO);
-      Found = true;
-    }
-  }
-
-  return Found;
-}
-
-
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) DISABLE_INLINE;
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) {
-  return JT[JTI].MBBs.size();
-}
-
-/// GetInstSize - Return the size of the specified MachineInstr.
-///
-unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const MachineBasicBlock &MBB = *MI->getParent();
-  const MachineFunction *MF = MBB.getParent();
-  const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
-
-  // Basic size info comes from the TSFlags field.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned TSFlags = TID.TSFlags;
-
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: {
-    // If this machine instr is an inline asm, measure it.
-    if (MI->getOpcode() == ARM::INLINEASM)
-      return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
-    if (MI->isLabel())
-      return 0;
-    switch (MI->getOpcode()) {
-    default:
-      assert(0 && "Unknown or unset size field for instr!");
-      break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::DECLARE:
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
-      return 0;
-    }
-    break;
-  }
-  case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
-  case ARMII::Size4Bytes: return 4;          // Arm instruction.
-  case ARMII::Size2Bytes: return 2;          // Thumb instruction.
-  case ARMII::SizeSpecial: {
-    switch (MI->getOpcode()) {
-    case ARM::CONSTPOOL_ENTRY:
-      // If this machine instr is a constant pool entry, its size is recorded as
-      // operand #2.
-      return MI->getOperand(2).getImm();
-    case ARM::Int_eh_sjlj_setjmp: return 12;
-    case ARM::BR_JTr:
-    case ARM::BR_JTm:
-    case ARM::BR_JTadd:
-    case ARM::t2BR_JTr:
-    case ARM::t2BR_JTm:
-    case ARM::t2BR_JTadd:
-    case ARM::tBR_JTr: {
-      // These are jumptable branches, i.e. a branch followed by an inlined
-      // jumptable. The size is 4 + 4 * number of entries.
-      unsigned NumOps = TID.getNumOperands();
-      MachineOperand JTOP =
-        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
-      unsigned JTI = JTOP.getIndex();
-      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-      assert(JTI < JT.size());
-      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
-      // 4 aligned. The assembler / linker may add 2 byte padding just before
-      // the JT entries.  The size does not include this padding; the
-      // constant islands pass does separate bookkeeping for it.
-      // FIXME: If we know the size of the function is less than (1 << 16) *2
-      // bytes, we can use 16-bit entries instead. Then there won't be an
-      // alignment issue.
-      return getNumJTEntries(JT, JTI) * 4 +
-        ((MI->getOpcode()==ARM::tBR_JTr) ? 2 : 4);
-    }
-    default:
-      // Otherwise, pseudo-instruction sizes are zero.
-      return 0;
-    }
-  }
-  }
-  return 0; // Not reached
-}
-
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool
-ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
-                              unsigned &SrcReg, unsigned &DstReg,
-                              unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
-  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
-  unsigned oc = MI.getOpcode();
-  switch (oc) {
   default:
-    return false;
-  case ARM::FCPYS:
-  case ARM::FCPYD:
-  case ARM::VMOVD:
-  case ARM::VMOVQ:
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  case ARM::MOVr:
-    assert(MI.getDesc().getNumOperands() >= 2 &&
-           MI.getOperand(0).isReg() &&
-           MI.getOperand(1).isReg() &&
-           "Invalid ARM MOV instruction");
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  }
-}
-
-unsigned 
-ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::LDR:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::FLDD:
-  case ARM::FLDS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-unsigned
-ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                     int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::STR:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::FSTD:
-  case ARM::FSTS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
     break;
   }
 
-  return 0;
-}
-
-bool
-ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator I,
-                               unsigned DestReg, unsigned SrcReg,
-                               const TargetRegisterClass *DestRC,
-                               const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
-  if (DestRC != SrcRC) {
-    // Not yet supported!
-    return false;
-  }
-
-  if (DestRC == ARM::GPRRegisterClass)
-    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
-                                .addReg(SrcReg)));
-  else if (DestRC == ARM::SPRRegisterClass)
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
-                   .addReg(SrcReg));
-  else if (DestRC == ARM::DPRRegisterClass)
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
-                   .addReg(SrcReg));
-  else if (DestRC == ARM::QPRRegisterClass)
-    BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
-  else
-    return false;
-
-  return true;
-}
-
-void ARMBaseInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
-  if (RC == ARM::GPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
-                   .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::DPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
-                   .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addImm(0));
-  } else {
-    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
-                   .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addImm(0));
-  }
-}
-
-void 
-ARMBaseInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                 bool isKill,
-                                 SmallVectorImpl<MachineOperand> &Addr,
-                                 const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const{
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
-  if (RC == ARM::GPRRegisterClass) {
-    Opc = ARM::STR;
-  } else if (RC == ARM::DPRRegisterClass) {
-    Opc = ARM::FSTD;
-  } else {
-    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
-    Opc = ARM::FSTS;
-  }
-
-  MachineInstrBuilder MIB =
-    BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  AddDefaultPred(MIB);
-  NewMIs.push_back(MIB);
-  return;
-}
-
-void ARMBaseInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
-  if (RC == ARM::GPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
-                   .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::DPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
-                   .addFrameIndex(FI).addImm(0));
-  } else {
-    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
-                   .addFrameIndex(FI).addImm(0));
-  }
-}
-
-void ARMBaseInstrInfo::
-loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                SmallVectorImpl<MachineOperand> &Addr,
-                const TargetRegisterClass *RC,
-                SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
-  if (RC == ARM::GPRRegisterClass) {
-    Opc = ARM::LDR;
-  } else if (RC == ARM::DPRRegisterClass) {
-    Opc = ARM::FLDD;
-  } else {
-    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
-    Opc = ARM::FLDS;
-  }
-
-  MachineInstrBuilder MIB =  BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  AddDefaultPred(MIB);
-  NewMIs.push_back(MIB);
-  return;
+  return false;
 }
 
-MachineInstr *ARMBaseInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
-  if (Ops.size() != 1) return NULL;
-
-  unsigned OpNum = Ops[0];
-  unsigned Opc = MI->getOpcode();
-  MachineInstr *NewMI = NULL;
-  switch (Opc) {
-  default: break;
-  case ARM::MOVr: {
-    if (MI->getOperand(4).getReg() == ARM::CPSR)
-      // If it is updating CPSR, then it cannot be folded.
-      break;
-    unsigned Pred = MI->getOperand(2).getImm();
-    unsigned PredReg = MI->getOperand(3).getReg();
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
-        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
-        .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      bool isDead = MI->getOperand(0).isDead();
-      bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
-        .addReg(DstReg,
-                RegState::Define |
-                getDeadRegState(isDead) |
-                getUndefRegState(isUndef))
-        .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
-    }
-    break;
-  }
-  case ARM::FCPYS: {
-    unsigned Pred = MI->getOperand(2).getImm();
-    unsigned PredReg = MI->getOperand(3).getReg();
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
-        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
-        .addFrameIndex(FI)
-        .addImm(0).addImm(Pred).addReg(PredReg);
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      bool isDead = MI->getOperand(0).isDead();
-      bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
-        .addReg(DstReg,
-                RegState::Define |
-                getDeadRegState(isDead) |
-                getUndefRegState(isUndef))
-        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
-    }
-    break;
-  }
-  case ARM::FCPYD: {
-    unsigned Pred = MI->getOperand(2).getImm();
-    unsigned PredReg = MI->getOperand(3).getReg();
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
-        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
-        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      bool isDead = MI->getOperand(0).isDead();
-      bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
-        .addReg(DstReg,
-                RegState::Define |
-                getDeadRegState(isDead) |
-                getUndefRegState(isUndef))
-        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
-    }
-    break;
-  }
+void ARMInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+              MachineBasicBlock::iterator I,
+              unsigned DestReg, unsigned SubIdx,
+              const MachineInstr *Orig) const {
+  DebugLoc dl = Orig->getDebugLoc();
+  if (Orig->getOpcode() == ARM::MOVi2pieces) {
+    RI.emitLoadConstPool(MBB, I, dl,
+                         DestReg, SubIdx,
+                         Orig->getOperand(1).getImm(),
+                         (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
+                         Orig->getOperand(3).getReg());
+    return;
   }
 
-  return NewMI;
-}
-
-MachineInstr* 
-ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                        MachineInstr* MI,
-                                        const SmallVectorImpl<unsigned> &Ops,
-                                        MachineInstr* LoadMI) const {
-  return 0;
+  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+  MI->getOperand(0).setReg(DestReg);
+  MBB.insert(I, MI);
 }
 
-bool
-ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                       const SmallVectorImpl<unsigned> &Ops) const {
-  if (Ops.size() != 1) return false;
-
-  unsigned Opc = MI->getOpcode();
-  switch (Opc) {
-  default: break;
-  case ARM::MOVr:
-    // If it is updating CPSR, then it cannot be folded.
-    return MI->getOperand(4).getReg() != ARM::CPSR;
-  case ARM::FCPYS:
-  case ARM::FCPYD:
-    return true;
-
-  case ARM::VMOVD:
-  case ARM::VMOVQ:
-    return false; // FIXME
-  }
-
-  return false;
-}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 8c8f7883a06c..c616949e3790 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -15,247 +15,27 @@
 #define ARMINSTRUCTIONINFO_H
 
 #include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
 #include "ARM.h"
 
 namespace llvm {
   class ARMSubtarget;
 
-/// ARMII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace ARMII {
-  enum {
-    //===------------------------------------------------------------------===//
-    // Instruction Flags.
-
-    //===------------------------------------------------------------------===//
-    // This four-bit field describes the addressing mode used.
-
-    AddrModeMask  = 0xf,
-    AddrModeNone    = 0,
-    AddrMode1       = 1,
-    AddrMode2       = 2,
-    AddrMode3       = 3,
-    AddrMode4       = 4,
-    AddrMode5       = 5,
-    AddrMode6       = 6,
-    AddrModeT1_1    = 7,
-    AddrModeT1_2    = 8,
-    AddrModeT1_4    = 9,
-    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
-    AddrModeT2_i12  = 11,
-    AddrModeT2_i8   = 12,
-    AddrModeT2_so   = 13,
-    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
-    AddrModeT2_i8s4 = 15, // i8 * 4
-
-    // Size* - Flags to keep track of the size of an instruction.
-    SizeShift     = 4,
-    SizeMask      = 7 << SizeShift,
-    SizeSpecial   = 1,   // 0 byte pseudo or special case.
-    Size8Bytes    = 2,
-    Size4Bytes    = 3,
-    Size2Bytes    = 4,
-
-    // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
-    // and store ops
-    IndexModeShift = 7,
-    IndexModeMask  = 3 << IndexModeShift,
-    IndexModePre   = 1,
-    IndexModePost  = 2,
-
-    //===------------------------------------------------------------------===//
-    // Misc flags.
-
-    // UnaryDP - Indicates this is a unary data processing instruction, i.e.
-    // it doesn't have a Rn operand.
-    UnaryDP       = 1 << 9,
-
-    //===------------------------------------------------------------------===//
-    // Instruction encoding formats.
-    //
-    FormShift     = 10,
-    FormMask      = 0x1f << FormShift,
-
-    // Pseudo instructions
-    Pseudo        = 0  << FormShift,
-
-    // Multiply instructions
-    MulFrm        = 1  << FormShift,
-
-    // Branch instructions
-    BrFrm         = 2  << FormShift,
-    BrMiscFrm     = 3  << FormShift,
-
-    // Data Processing instructions
-    DPFrm         = 4  << FormShift,
-    DPSoRegFrm    = 5  << FormShift,
-
-    // Load and Store
-    LdFrm         = 6  << FormShift,
-    StFrm         = 7  << FormShift,
-    LdMiscFrm     = 8  << FormShift,
-    StMiscFrm     = 9  << FormShift,
-    LdStMulFrm    = 10 << FormShift,
-
-    // Miscellaneous arithmetic instructions
-    ArithMiscFrm  = 11 << FormShift,
-
-    // Extend instructions
-    ExtFrm        = 12 << FormShift,
-
-    // VFP formats
-    VFPUnaryFrm   = 13 << FormShift,
-    VFPBinaryFrm  = 14 << FormShift,
-    VFPConv1Frm   = 15 << FormShift,
-    VFPConv2Frm   = 16 << FormShift,
-    VFPConv3Frm   = 17 << FormShift,
-    VFPConv4Frm   = 18 << FormShift,
-    VFPConv5Frm   = 19 << FormShift,
-    VFPLdStFrm    = 20 << FormShift,
-    VFPLdStMulFrm = 21 << FormShift,
-    VFPMiscFrm    = 22 << FormShift,
-
-    // Thumb format
-    ThumbFrm      = 23 << FormShift,
-
-    // NEON format
-    NEONFrm       = 24 << FormShift,
-    NEONGetLnFrm  = 25 << FormShift,
-    NEONSetLnFrm  = 26 << FormShift,
-    NEONDupFrm    = 27 << FormShift,
-
-    //===------------------------------------------------------------------===//
-    // Field shifts - such shifts are used to set field while generating
-    // machine instructions.
-    M_BitShift     = 5,
-    ShiftImmShift  = 5,
-    ShiftShift     = 7,
-    N_BitShift     = 7,
-    ImmHiShift     = 8,
-    SoRotImmShift  = 8,
-    RegRsShift     = 8,
-    ExtRotImmShift = 10,
-    RegRdLoShift   = 12,
-    RegRdShift     = 12,
-    RegRdHiShift   = 16,
-    RegRnShift     = 16,
-    S_BitShift     = 20,
-    W_BitShift     = 21,
-    AM3_I_BitShift = 22,
-    D_BitShift     = 22,
-    U_BitShift     = 23,
-    P_BitShift     = 24,
-    I_BitShift     = 25,
-    CondShift      = 28
-  };
-}
-
-class ARMBaseInstrInfo : public TargetInstrInfoImpl {
-protected:
-  // Can be only subclassed.
-  explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
-public:
-  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
-                                              MachineBasicBlock::iterator &MBBI,
-                                              LiveVariables *LV) const;
-
-  virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
-
-  // Branch analysis.
-  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                             MachineBasicBlock *&FBB,
-                             SmallVectorImpl<MachineOperand> &Cond,
-                             bool AllowModify) const;
-  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                                MachineBasicBlock *FBB,
-                            const SmallVectorImpl<MachineOperand> &Cond) const;
-
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-  virtual
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
-  // Predication support.
-  virtual bool isPredicated(const MachineInstr *MI) const;
-
-  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
-    int PIdx = MI->findFirstPredOperandIdx();
-    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
-                      : ARMCC::AL;
-  }
-
-  virtual
-  bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const;
-
-  virtual
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                         const SmallVectorImpl<MachineOperand> &Pred2) const;
-
-  virtual bool DefinesPredicate(MachineInstr *MI,
-                                std::vector<MachineOperand> &Pred) const;
-
-  /// GetInstSize - Returns the size of the specified MachineInstr.
-  ///
-  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
-
-  /// Return true if the instruction is a register to register move and return
-  /// the source and dest operands and their sub-register indices by reference.
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-
-  virtual bool copyRegToReg(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator I,
-                            unsigned DestReg, unsigned SrcReg,
-                            const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
-
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
-
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-  
-  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                              MachineInstr* MI,
-                                              const SmallVectorImpl<unsigned> &Ops,
-                                              int FrameIndex) const;
-
-  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                              MachineInstr* MI,
-                                              const SmallVectorImpl<unsigned> &Ops,
-                                              MachineInstr* LoadMI) const;
-};
-
 class ARMInstrInfo : public ARMBaseInstrInfo {
   ARMRegisterInfo RI;
+  const ARMSubtarget &Subtarget;
 public:
   explicit ARMInstrInfo(const ARMSubtarget &STI);
 
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  // Return true if the block does not fall through.
+  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
@@ -263,7 +43,8 @@ public:
   const ARMRegisterInfo &getRegisterInfo() const { return RI; }
 
   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                     unsigned DestReg, const MachineInstr *Orig) const;
+                     unsigned DestReg, unsigned SubIdx,
+                     const MachineInstr *Orig) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 408f47a6e106..8adfac3fb4c5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -34,6 +34,10 @@ def SDT_ARMBrJT    : SDTypeProfile<0, 3,
                                   [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
                                    SDTCisVT<2, i32>]>;
 
+def SDT_ARMBr2JT   : SDTypeProfile<0, 4,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
 def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
 
 def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -71,6 +75,8 @@ def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
 
 def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
                               [SDNPHasChain]>;
+def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+                              [SDNPHasChain]>;
 
 def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
                               [SDNPOutFlag]>;
@@ -93,10 +99,14 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
 def HasV5T    : Predicate<"Subtarget->hasV5TOps()">;
 def HasV5TE   : Predicate<"Subtarget->hasV5TEOps()">;
 def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
+def HasV6T2   : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2    : Predicate<"!Subtarget->hasV6T2Ops()">;
 def HasV7     : Predicate<"Subtarget->hasV7Ops()">;
 def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
 def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
 def IsThumb   : Predicate<"Subtarget->isThumb()">;
 def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
 def IsThumb2  : Predicate<"Subtarget->isThumb2()">;
@@ -117,25 +127,16 @@ class RegConstraint<string C> {
 //  ARM specific transformation functions and pattern fragments.
 //
 
-// so_imm_XFORM - Return a so_imm value packed into the format described for
-// so_imm def below.
-def so_imm_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()),
-                                   MVT::i32);
-}]>;
-
 // so_imm_neg_XFORM - Return a so_imm value packed into the format described for
 // so_imm_neg def below.
 def so_imm_neg_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()),
-                                   MVT::i32);
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
 }]>;
 
 // so_imm_not_XFORM - Return a so_imm value packed into the format described for
 // so_imm_not def below.
 def so_imm_not_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()),
-                                   MVT::i32);
+  return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
 }]>;
 
 // rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
@@ -169,6 +170,48 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
   return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
 }]>;
 
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+                      PatLeaf<(imm), [{ 
+  uint32_t v = (uint32_t)N->getZExtValue();
+  if (v == 0xffffffff)
+    return 0;
+  // there can be 1's on either or both "outsides", all the "inside"
+  // bits must be 0's
+  unsigned int lsb = 0, msb = 31;
+  while (v & (1 << msb)) --msb;
+  while (v & (1 << lsb)) ++lsb;
+  for (unsigned int i = lsb; i <= msb; ++i) {
+    if (v & (1 << i))
+      return 0;
+  }
+  return 1;
+}] > {
+  let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def lo16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
+                                   MVT::i32);
+}]>;
+
+def hi16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+  // Returns true if all low 16-bits are 0.
+  return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+  }], hi16>;
+
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range 
+/// [0.65535].
+def imm0_65535 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 65536;
+}]>;
+
 class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
 class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
 
@@ -192,6 +235,9 @@ def cpinst_operand : Operand<i32> {
 def jtblock_operand : Operand<i32> {
   let PrintMethod = "printJTBlockOperand";
 }
+def jt2block_operand : Operand<i32> {
+  let PrintMethod = "printJT2BlockOperand";
+}
 
 // Local PC labels.
 def pclabel : Operand<i32> {
@@ -212,9 +258,9 @@ def so_reg : Operand<i32>,    // reg reg imm
 // into so_imm instructions: the 8-bit immediate is the least significant bits
 // [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
 def so_imm : Operand<i32>,
-             PatLeaf<(imm),
-                     [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }],
-                     so_imm_XFORM> {
+             PatLeaf<(imm), [{
+      return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+    }]> {
   let PrintMethod = "printSOImmOperand";
 }
 
@@ -230,14 +276,18 @@ def so_imm2part : Operand<i32>,
 
 def so_imm2part_1 : SDNodeXForm<imm, [{
   unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+  return CurDAG->getTargetConstant(V, MVT::i32);
 }]>;
 
 def so_imm2part_2 : SDNodeXForm<imm, [{
   unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+  return CurDAG->getTargetConstant(V, MVT::i32);
 }]>;
 
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
+  return (int32_t)N->getZExtValue() < 32;
+}]>;
 
 // Define ARM specific addressing modes.
 
@@ -274,7 +324,7 @@ def am3offset : Operand<i32>,
 // addrmode4 := reg, <mode|W>
 //
 def addrmode4 : Operand<i32>,
-                ComplexPattern<i32, 2, "", []> {
+                ComplexPattern<i32, 2, "SelectAddrMode4", []> {
   let PrintMethod = "printAddrMode4Operand";
   let MIOperandInfo = (ops GPR, i32imm);
 }
@@ -303,17 +353,8 @@ def addrmodepc : Operand<i32>,
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
-// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
-// register whose default is 0 (no register).
-def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
-                                     (ops (i32 14), (i32 zero_reg))> {
-  let PrintMethod = "printPredicateOperand";
-}
-
-// Conditional code result for instructions whose 's' bit is set, e.g. subs.
-//
-def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
-  let PrintMethod = "printSBitModifierOperand";
+def nohash_imm : Operand<i32> {
+  let PrintMethod = "printNoHashImmediate";
 }
 
 //===----------------------------------------------------------------------===//
@@ -329,34 +370,44 @@ include "ARMInstrFormats.td"
 multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
                         bit Commutable = 0> {
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               opc, " $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+               IIC_iALUi, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
+    let Inst{25} = 1;
+  }
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               opc, " $dst, $a, $b",
+               IIC_iALUr, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+    let Inst{25} = 0;
     let isCommutable = Commutable;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               opc, " $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+               IIC_iALUsr, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+    let Inst{25} = 0;
+  }
 }
 
 /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
-/// instruction modifies the CSPR register.
+/// instruction modifies the CPSR register.
 let Defs = [CPSR] in {
 multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
                          bit Commutable = 0> {
   def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               opc, "s $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+               IIC_iALUi, opc, "s $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
+    let Inst{25} = 1;
+  }
   def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               opc, "s $dst, $a, $b",
+               IIC_iALUr, opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
     let isCommutable = Commutable;
+    let Inst{25} = 0;
   }
   def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               opc, "s $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+               IIC_iALUsr, opc, "s $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+    let Inst{25} = 0;
+  }
 }
 }
 
@@ -366,17 +417,25 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
 let Defs = [CPSR] in {
 multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
                        bit Commutable = 0> {
-  def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm,
+  def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
                opc, " $a, $b",
-               [(opnode GPR:$a, so_imm:$b)]>;
-  def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm,
+               [(opnode GPR:$a, so_imm:$b)]> {
+    let Inst{20} = 1;
+    let Inst{25} = 1;
+  }
+  def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
                opc, " $a, $b",
                [(opnode GPR:$a, GPR:$b)]> {
+    let Inst{20} = 1;
+    let Inst{25} = 0;
     let isCommutable = Commutable;
   }
-  def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+  def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
                opc, " $a, $b",
-               [(opnode GPR:$a, so_reg:$b)]>;
+               [(opnode GPR:$a, so_reg:$b)]> {
+    let Inst{20} = 1;
+    let Inst{25} = 0;
+  }
 }
 }
 
@@ -384,15 +443,15 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
 /// register and one whose operand is a register rotated by 8/16/24.
 /// FIXME: Remove the 'r' variant. Its rot_imm is zero.
 multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
-  def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src),
-                 opc, " $dst, $Src",
-                 [(set GPR:$dst, (opnode GPR:$Src))]>,
+  def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
+                 IIC_iUNAr, opc, " $dst, $src",
+                 [(set GPR:$dst, (opnode GPR:$src))]>,
               Requires<[IsARM, HasV6]> {
                 let Inst{19-16} = 0b1111;
               }
-  def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
-                 opc, " $dst, $Src, ror $rot",
-                 [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+  def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
+                 IIC_iUNAsi, opc, " $dst, $src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
               Requires<[IsARM, HasV6]> {
                 let Inst{19-16} = 0b1111;
               }
@@ -402,11 +461,11 @@ multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
   def rr     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
-                  opc, " $dst, $LHS, $RHS",
+                  IIC_iALUr, opc, " $dst, $LHS, $RHS",
                   [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
                   Requires<[IsARM, HasV6]>;
   def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
-                  opc, " $dst, $LHS, $RHS, ror $rot",
+                  IIC_iALUsi, opc, " $dst, $LHS, $RHS, ror $rot",
                   [(set GPR:$dst, (opnode GPR:$LHS,
                                           (rotr GPR:$RHS, rot_imm:$rot)))]>,
                   Requires<[IsARM, HasV6]>;
@@ -417,37 +476,45 @@ let Uses = [CPSR] in {
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                              bit Commutable = 0> {
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, opc, " $dst, $a, $b",
+                DPFrm, IIC_iALUi, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
-               Requires<[IsARM, CarryDefIsUnused]>;
+               Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{25} = 1;
+  }
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, opc, " $dst, $a, $b",
+                DPFrm, IIC_iALUr, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
     let isCommutable = Commutable;
+    let Inst{25} = 0;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, opc, " $dst, $a, $b",
+                DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
-               Requires<[IsARM, CarryDefIsUnused]>;
+               Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{25} = 0;
+  }
   // Carry setting variants
   def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, !strconcat(opc, "s $dst, $a, $b"),
+                DPFrm, IIC_iALUi, !strconcat(opc, "s $dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
-                 let Defs = [CPSR];
+    let Defs = [CPSR];
+    let Inst{25} = 1;
   }
   def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, !strconcat(opc, "s $dst, $a, $b"),
+                DPFrm, IIC_iALUr, !strconcat(opc, "s $dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
-                 let Defs = [CPSR];
+    let Defs = [CPSR];
+    let Inst{25} = 0;
   }
   def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, !strconcat(opc, "s $dst, $a, $b"),
+                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s $dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
-                 let Defs = [CPSR];
+    let Defs = [CPSR];
+    let Inst{25} = 0;
   }
 }
 }
@@ -467,23 +534,23 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
 let neverHasSideEffects = 1, isNotDuplicable = 1 in
 def CONSTPOOL_ENTRY :
 PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
-                    i32imm:$size),
+                    i32imm:$size), NoItinerary,
            "${instid:label} ${cpidx:cpentry}", []>;
 
 let Defs = [SP], Uses = [SP] in {
 def ADJCALLSTACKUP :
-PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p),
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
            "@ ADJCALLSTACKUP $amt1",
            [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
 
 def ADJCALLSTACKDOWN : 
-PseudoInst<(outs), (ins i32imm:$amt, pred:$p),
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
            "@ ADJCALLSTACKDOWN $amt",
            [(ARMcallseq_start timm:$amt)]>;
 }
 
 def DWARF_LOC :
-PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary,
            ".loc $file, $line, $col",
            [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
 
@@ -491,42 +558,42 @@ PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
-                  Pseudo, "$cp:\n\tadd$p $dst, pc, $a",
+                  Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p $dst, pc, $a",
                    [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
 
 let AddedComplexity = 10 in {
 let canFoldAsLoad = 1 in
 def PICLDR  : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, "${addr:label}:\n\tldr$p $dst, $addr",
+                  Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p $dst, $addr",
                   [(set GPR:$dst, (load addrmodepc:$addr))]>;
 
 def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr",
+                 Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h $dst, $addr",
                   [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr",
+                 Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b $dst, $addr",
                   [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
 
 def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr",
+                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh $dst, $addr",
                   [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr",
+                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb $dst, $addr",
                   [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
 }
 let AddedComplexity = 10 in {
 def PICSTR  : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, "${addr:label}:\n\tstr$p $src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p $src, $addr",
                [(store GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h $src, $addr",
                [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b $src, $addr",
                [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
 }
 } // isNotDuplicable = 1
@@ -534,135 +601,152 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
 
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
-def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo,
-                   !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
-                                         "${:private}PCRELL${:uid}+8))\n"),
-                              !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                         "add$p $dst, pc, #PCRELV${:uid}")),
+def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
+                    Pseudo, IIC_iALUi,
+            !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(",
+                                  "${:private}PCRELL${:uid}+8))\n"),
+                       !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                  "add$p $dst, pc, #${:private}PCRELV${:uid}")),
                    []>;
 
 def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
-                           (ins i32imm:$label, i32imm:$id, pred:$p),
-          Pseudo,
-          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
-                                         "${:private}PCRELL${:uid}+8))\n"),
-                              !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                         "add$p $dst, pc, #PCRELV${:uid}")),
-                   []>;
+                           (ins i32imm:$label, nohash_imm:$id, pred:$p),
+          Pseudo, IIC_iALUi,
+   !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, "
+                         "(${label}_${id}-(",
+                                  "${:private}PCRELL${:uid}+8))\n"),
+                       !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                  "add$p $dst, pc, #${:private}PCRELV${:uid}")),
+                   []> {
+    let Inst{25} = 1;
+}
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
 //
 
-let isReturn = 1, isTerminator = 1 in
-  def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> {
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in
+  def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, 
+                  "bx", " lr", [(ARMretflag)]> {
   let Inst{7-4}   = 0b0001;
   let Inst{19-8}  = 0b111111111111;
   let Inst{27-20} = 0b00010010;
 }
 
 // FIXME: remove when we have a way to marking a MI with these properties.
-// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
-// operand list.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
-let isReturn = 1, isTerminator = 1 in
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
   def LDM_RET : AXI4ld<(outs),
-                    (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
-                    LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+                    (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+                    LdStMulFrm, IIC_Br, "ldm${p}${addr:submode} $addr, $wb",
                     []>;
 
 // On non-Darwin platforms R9 is callee-saved.
-let isCall = 1, Itinerary = IIC_Br,
-  Defs = [R0, R1, R2, R3, R12, LR,
-          D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
+let isCall = 1,
+  Defs = [R0,  R1,  R2,  R3,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                "bl ${func:call}",
-                [(ARMcall tglobaladdr:$func)]>, Requires<[IsNotDarwin]>;
+                IIC_Br, "bl ${func:call}",
+                [(ARMcall tglobaladdr:$func)]>,
+            Requires<[IsARM, IsNotDarwin]>;
 
   def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                   "bl", " ${func:call}",
-                   [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsNotDarwin]>;
+                   IIC_Br, "bl", " ${func:call}",
+                   [(ARMcall_pred tglobaladdr:$func)]>,
+                Requires<[IsARM, IsNotDarwin]>;
 
   // ARMv5T and above
   def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
-                "blx $func",
-                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsNotDarwin]> {
+                IIC_Br, "blx $func",
+                [(ARMcall GPR:$func)]>,
+            Requires<[IsARM, HasV5T, IsNotDarwin]> {
     let Inst{7-4}   = 0b0011;
     let Inst{19-8}  = 0b111111111111;
     let Inst{27-20} = 0b00010010;
   }
 
-  let Uses = [LR] in {
-    // ARMv4T
-    def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
-                     "mov lr, pc\n\tbx $func",
-                    [(ARMcall_nolink GPR:$func)]>, Requires<[IsNotDarwin]>;
+  // ARMv4T
+  def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+                  IIC_Br, "mov lr, pc\n\tbx $func",
+                  [(ARMcall_nolink GPR:$func)]>,
+           Requires<[IsARM, IsNotDarwin]> {
+    let Inst{7-4}   = 0b0001;
+    let Inst{19-8}  = 0b111111111111;
+    let Inst{27-20} = 0b00010010;
   }
 }
 
 // On Darwin R9 is call-clobbered.
-let isCall = 1, Itinerary = IIC_Br,
-  Defs = [R0, R1, R2, R3, R9, R12, LR,
-          D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
+let isCall = 1,
+  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   def BLr9  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                "bl ${func:call}",
-                [(ARMcall tglobaladdr:$func)]>, Requires<[IsDarwin]>;
+                IIC_Br, "bl ${func:call}",
+                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>;
 
   def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                   "bl", " ${func:call}",
-                   [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsDarwin]>;
+                   IIC_Br, "bl", " ${func:call}",
+                   [(ARMcall_pred tglobaladdr:$func)]>,
+                  Requires<[IsARM, IsDarwin]>;
 
   // ARMv5T and above
   def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
-                "blx $func",
+                IIC_Br, "blx $func",
                 [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
     let Inst{7-4}   = 0b0011;
     let Inst{19-8}  = 0b111111111111;
     let Inst{27-20} = 0b00010010;
   }
 
-  let Uses = [LR] in {
-    // ARMv4T
-    def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops),
-                     "mov lr, pc\n\tbx $func",
-                    [(ARMcall_nolink GPR:$func)]>, Requires<[IsDarwin]>;
+  // ARMv4T
+  def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+                  IIC_Br, "mov lr, pc\n\tbx $func",
+                  [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> {
+    let Inst{7-4}   = 0b0001;
+    let Inst{19-8}  = 0b111111111111;
+    let Inst{27-20} = 0b00010010;
   }
 }
 
-let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
+let isBranch = 1, isTerminator = 1 in {
   // B is "predicable" since it can be xformed into a Bcc.
   let isBarrier = 1 in {
     let isPredicable = 1 in
-    def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target",
-                [(br bb:$target)]>;
+    def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
+                "b $target", [(br bb:$target)]>;
 
   let isNotDuplicable = 1, isIndirectBranch = 1 in {
   def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
-                    "mov pc, $target \n$jt",
+                    IIC_Br, "mov pc, $target \n$jt",
                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
     let Inst{20}    = 0; // S Bit
     let Inst{24-21} = 0b1101;
-    let Inst{27-26} = {0,0};
+    let Inst{27-25} = 0b000;
   }
   def BR_JTm : JTI<(outs),
                    (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
-                   "ldr pc, $target \n$jt",
-                  [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
-                    imm:$id)]> {
+                   IIC_Br, "ldr pc, $target \n$jt",
+                   [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+                     imm:$id)]> {
     let Inst{20}    = 1; // L bit
     let Inst{21}    = 0; // W bit
     let Inst{22}    = 0; // B bit
     let Inst{24}    = 1; // P bit
-    let Inst{27-26} = {0,1};
+    let Inst{27-25} = 0b011;
   }
   def BR_JTadd : JTI<(outs),
                    (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
-                     "add pc, $target, $idx \n$jt",
+                    IIC_Br, "add pc, $target, $idx \n$jt",
                     [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
                       imm:$id)]> {
     let Inst{20}    = 0; // S bit
     let Inst{24-21} = 0b0100;
-    let Inst{27-26} = {0,0};
+    let Inst{27-25} = 0b000;
   }
   } // isNotDuplicable = 1, isIndirectBranch = 1
   } // isBarrier = 1
@@ -670,7 +754,7 @@ let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
   // FIXME: should be able to write a pattern for ARMBrcond, but can't use
   // a two-value operand where a dag node expects two operands. :( 
   def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
-               "b", " $target",
+               IIC_Br, "b", " $target",
                [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
 }
 
@@ -679,133 +763,141 @@ let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
 //
 
 // Load
-let canFoldAsLoad = 1 in 
-def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+let canFoldAsLoad = 1, isReMaterializable = 1 in 
+def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                "ldr", " $dst, $addr",
                [(set GPR:$dst, (load addrmode2:$addr))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
-def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                  "ldr", " $dst, $addr", []>;
 
 // Loads with zero extension
 def LDRH  : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                 "ldr", "h $dst, $addr",
-                [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+                  IIC_iLoadr, "ldr", "h $dst, $addr",
+                  [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
 
-def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
-                 "ldr", "b $dst, $addr",
-                [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, 
+                  IIC_iLoadr, "ldr", "b $dst, $addr",
+                  [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
 
 // Loads with sign extension
 def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                 "ldr", "sh $dst, $addr",
-                [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+                   IIC_iLoadr, "ldr", "sh $dst, $addr",
+                   [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
 
 def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                 "ldr", "sb $dst, $addr",
-                [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+                   IIC_iLoadr, "ldr", "sb $dst, $addr",
+                   [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
 
-let mayLoad = 1 in {
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
-                "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>;
+                 IIC_iLoadr, "ldr", "d $dst1, $addr",
+                 []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed loads
 def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
-                     (ins addrmode2:$addr), LdFrm,
+                     (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
                      "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
-                     (ins GPR:$base, am2offset:$offset), LdFrm,
+                     (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
                      "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRH_PRE  : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
-                     (ins addrmode3:$addr), LdMiscFrm,
+                     (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
                      "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
-                     (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+                     (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
                      "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRB_PRE  : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
-                     (ins addrmode2:$addr), LdFrm,
+                     (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
                      "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
-                     (ins GPR:$base,am2offset:$offset), LdFrm,
+                     (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
                      "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
-                      (ins addrmode3:$addr), LdMiscFrm,
+                      (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
                       "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
-                      (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+                      (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
                     "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
-                      (ins addrmode3:$addr), LdMiscFrm,
+                      (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
                       "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
-                      (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+                      (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
                     "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
 }
 
 // Store
-def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
                "str", " $src, $addr",
                [(store GPR:$src, addrmode2:$addr)]>;
 
 // Stores with truncate
-def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
                "str", "h $src, $addr",
                [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
 
-def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
                "str", "b $src, $addr",
                [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
 
 // Store doubleword
-let mayStore = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm,
-               "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>;
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+               StMiscFrm, IIC_iStorer,
+               "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm,
+                     (ins GPR:$src, GPR:$base, am2offset:$offset), 
+                     StFrm, IIC_iStoreru,
                     "str", " $src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
 
 def STR_POST : AI2stwpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     StFrm, IIC_iStoreru,
                     "str", " $src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
 
 def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+                     (ins GPR:$src, GPR:$base,am3offset:$offset), 
+                     StMiscFrm, IIC_iStoreru,
                      "str", "h $src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
 
 def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+                     (ins GPR:$src, GPR:$base,am3offset:$offset), 
+                     StMiscFrm, IIC_iStoreru,
                      "str", "h $src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
                                          GPR:$base, am3offset:$offset))]>;
 
 def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     StFrm, IIC_iStoreru,
                      "str", "b $src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
 def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     StFrm, IIC_iStoreru,
                      "str", "b $src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
@@ -814,17 +906,16 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
 //  Load / store multiple Instructions.
 //
 
-// FIXME: $dst1 should be a def.
-let mayLoad = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def LDM : AXI4ld<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
-               LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+               LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode} $addr, $wb",
                []>;
 
-let mayStore = 1 in
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def STM : AXI4st<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
-               LdStMulFrm, "stm${p}${addr:submode} $addr, $src1",
+               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+               LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode} $addr, $wb",
                []>;
 
 //===----------------------------------------------------------------------===//
@@ -832,16 +923,42 @@ def STM : AXI4st<(outs),
 //
 
 let neverHasSideEffects = 1 in
-def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
-                 "mov", " $dst, $src", []>, UnaryDP;
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
-                 "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
+def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
+                "mov", " $dst, $src", []>, UnaryDP;
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), 
+                DPSoRegFrm, IIC_iMOVsr,
+                "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm,
-                 "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP;
+def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
+                "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP {
+  let Inst{25} = 1;
+}
 
-def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), 
+                 DPFrm, IIC_iMOVi,
+                 "movw", " $dst, $src",
+                 [(set GPR:$dst, imm0_65535:$src)]>,
+                 Requires<[IsARM, HasV6T2]> {
+  let Inst{20} = 0;
+  let Inst{25} = 1;
+}
+
+let Constraints = "$src = $dst" in
+def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                  DPFrm, IIC_iMOVi,
+                  "movt", " $dst, $imm", 
+                  [(set GPR:$dst,
+                        (or (and GPR:$src, 0xffff), 
+                            lo16AllZero:$imm))]>, UnaryDP,
+                  Requires<[IsARM, HasV6T2]> {
+  let Inst{20} = 0;
+  let Inst{25} = 1;
+}
+
+let Uses = [CPSR] in
+def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
                  "mov", " $dst, $src, rrx",
                  [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
 
@@ -849,11 +966,11 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
 // due to flag operands.
 
 let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
-                      "mov", "s $dst, $src, lsr #1",
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, 
+                      IIC_iMOVsi, "mov", "s $dst, $src, lsr #1",
                       [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
 def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
-                      "mov", "s $dst, $src, asr #1",
+                      IIC_iMOVsi, "mov", "s $dst, $src, asr #1",
                       [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
 }
 
@@ -901,6 +1018,24 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
 
 // TODO: UXT(A){B|H}16
 
+def SBFX  : I<(outs GPR:$dst),
+              (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
+               "sbfx", " $dst, $src, $lsb, $width", "", []>,
+               Requires<[IsARM, HasV6T2]> {
+  let Inst{27-21} = 0b0111101;
+  let Inst{6-4}   = 0b101;
+}
+
+def UBFX  : I<(outs GPR:$dst),
+              (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
+               "ubfx", " $dst, $src, $lsb, $width", "", []>,
+               Requires<[IsARM, HasV6T2]> {
+  let Inst{27-21} = 0b0111111;
+  let Inst{6-4}   = 0b101;
+}
+
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
@@ -923,30 +1058,36 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
 
 // These don't define reg/reg forms, because they are handled above.
 def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                  "rsb", " $dst, $a, $b",
-                  [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>;
+                  IIC_iALUi, "rsb", " $dst, $a, $b",
+                  [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
+    let Inst{25} = 1;
+}
 
 def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                  "rsb", " $dst, $a, $b",
+                  IIC_iALUsr, "rsb", " $dst, $a, $b",
                   [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>;
 
 // RSB with 's' bit set.
 let Defs = [CPSR] in {
 def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                 "rsb", "s $dst, $a, $b",
-                 [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>;
+                 IIC_iALUi, "rsb", "s $dst, $a, $b",
+                 [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
+    let Inst{25} = 1;
+}
 def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                 "rsb", "s $dst, $a, $b",
+                 IIC_iALUsr, "rsb", "s $dst, $a, $b",
                  [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
 }
 
 let Uses = [CPSR] in {
 def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                 DPFrm, "rsc", " $dst, $a, $b",
+                 DPFrm, IIC_iALUi, "rsc", " $dst, $a, $b",
                  [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
-                 Requires<[IsARM, CarryDefIsUnused]>;
+                 Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{25} = 1;
+}
 def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                 DPSoRegFrm, "rsc", " $dst, $a, $b",
+                 DPSoRegFrm, IIC_iALUsr, "rsc", " $dst, $a, $b",
                  [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
                  Requires<[IsARM, CarryDefIsUnused]>;
 }
@@ -954,11 +1095,13 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
 // FIXME: Allow these to be predicated.
 let Defs = [CPSR], Uses = [CPSR] in {
 def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                  DPFrm, "rscs $dst, $a, $b",
+                  DPFrm, IIC_iALUi, "rscs $dst, $a, $b",
                   [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
-                  Requires<[IsARM, CarryDefIsUnused]>;
+                  Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{25} = 1;
+}
 def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                  DPSoRegFrm, "rscs $dst, $a, $b",
+                  DPSoRegFrm, IIC_iALUsr, "rscs $dst, $a, $b",
                   [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
                   Requires<[IsARM, CarryDefIsUnused]>;
 }
@@ -992,16 +1135,27 @@ defm EOR   : AsI1_bin_irs<0b0001, "eor",
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
+               "bfc", " $dst, $imm", "$src = $dst",
+               [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+               Requires<[IsARM, HasV6T2]> {
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-0}   = 0b0011111;
+}
+
+def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
                   "mvn", " $dst, $src",
                   [(set GPR:$dst, (not GPR:$src))]>, UnaryDP;
 def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
-                  "mvn", " $dst, $src",
+                  IIC_iMOVsr, "mvn", " $dst, $src",
                   [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
-                  "mvn", " $dst, $imm",
-                  [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP;
+def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, 
+                  IIC_iMOVi, "mvn", " $dst, $imm",
+                  [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
+    let Inst{25} = 1;
+}
 
 def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
              (BICri GPR:$src, so_imm_not:$imm)>;
@@ -1012,43 +1166,48 @@ def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
 
 let isCommutable = 1 in
 def MUL   : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                    "mul", " $dst, $a, $b",
+                   IIC_iMUL32, "mul", " $dst, $a, $b",
                    [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
 
 def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-                    "mla", " $dst, $a, $b, $c",
+                    IIC_iMAC32, "mla", " $dst, $a, $b, $c",
                    [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
+def MLS   : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+                   IIC_iMAC32, "mls", " $dst, $a, $b, $c",
+                   [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
+                   Requires<[IsARM, HasV6T2]>;
+
 // Extra precision multiplies with low / high results
 let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b),
+                               (ins GPR:$a, GPR:$b), IIC_iMUL64,
                     "smull", " $ldst, $hdst, $a, $b", []>;
 
 def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b),
+                               (ins GPR:$a, GPR:$b), IIC_iMUL64,
                     "umull", " $ldst, $hdst, $a, $b", []>;
 }
 
 // Multiply + accumulate
 def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b),
+                               (ins GPR:$a, GPR:$b), IIC_iMAC64,
                     "smlal", " $ldst, $hdst, $a, $b", []>;
 
 def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b),
+                               (ins GPR:$a, GPR:$b), IIC_iMAC64,
                     "umlal", " $ldst, $hdst, $a, $b", []>;
 
 def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b),
+                               (ins GPR:$a, GPR:$b), IIC_iMAC64,
                     "umaal", " $ldst, $hdst, $a, $b", []>,
                     Requires<[IsARM, HasV6]>;
 } // neverHasSideEffects
 
 // Most significant word multiply
 def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-               "smmul", " $dst, $a, $b",
+               IIC_iMUL32, "smmul", " $dst, $a, $b",
                [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
             Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b0001;
@@ -1056,7 +1215,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
 }
 
 def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               "smmla", " $dst, $a, $b, $c",
+               IIC_iMAC32, "smmla", " $dst, $a, $b, $c",
                [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
             Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b0001;
@@ -1064,7 +1223,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 
 
 def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               "smmls", " $dst, $a, $b, $c",
+               IIC_iMAC32, "smmls", " $dst, $a, $b, $c",
                [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
             Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b1101;
@@ -1072,7 +1231,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 
 multiclass AI_smul<string opc, PatFrag opnode> {
   def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              !strconcat(opc, "bb"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "bb"), " $dst, $a, $b",
               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
                                       (sext_inreg GPR:$b, i16)))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1081,7 +1240,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              !strconcat(opc, "bt"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "bt"), " $dst, $a, $b",
               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
                                       (sra GPR:$b, (i32 16))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1090,7 +1249,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              !strconcat(opc, "tb"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "tb"), " $dst, $a, $b",
               [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
                                       (sext_inreg GPR:$b, i16)))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1099,7 +1258,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              !strconcat(opc, "tt"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "tt"), " $dst, $a, $b",
               [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
                                       (sra GPR:$b, (i32 16))))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1108,7 +1267,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              !strconcat(opc, "wb"), " $dst, $a, $b",
+              IIC_iMUL16, !strconcat(opc, "wb"), " $dst, $a, $b",
               [(set GPR:$dst, (sra (opnode GPR:$a,
                                     (sext_inreg GPR:$b, i16)), (i32 16)))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1117,7 +1276,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              !strconcat(opc, "wt"), " $dst, $a, $b",
+              IIC_iMUL16, !strconcat(opc, "wt"), " $dst, $a, $b",
               [(set GPR:$dst, (sra (opnode GPR:$a,
                                     (sra GPR:$b, (i32 16))), (i32 16)))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1129,7 +1288,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
 
 multiclass AI_smla<string opc, PatFrag opnode> {
   def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc,
                                (opnode (sext_inreg GPR:$a, i16),
                                        (sext_inreg GPR:$b, i16))))]>,
@@ -1139,7 +1298,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
                                                      (sra GPR:$b, (i32 16)))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1148,7 +1307,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
                                                  (sext_inreg GPR:$b, i16))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1157,7 +1316,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
                                                      (sra GPR:$b, (i32 16)))))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1166,7 +1325,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
                                        (sext_inreg GPR:$b, i16)), (i32 16))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1175,7 +1334,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
                                          (sra GPR:$b, (i32 16))), (i32 16))))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1194,7 +1353,7 @@ defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 //  Misc. Arithmetic Instructions.
 //
 
-def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src),
+def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
               "clz", " $dst, $src",
               [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
   let Inst{7-4}   = 0b0001;
@@ -1202,7 +1361,7 @@ def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src),
   let Inst{19-16} = 0b1111;
 }
 
-def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
               "rev", " $dst, $src",
               [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b0011;
@@ -1210,7 +1369,7 @@ def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
   let Inst{19-16} = 0b1111;
 }
 
-def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                "rev16", " $dst, $src",
                [(set GPR:$dst,
                    (or (and (srl GPR:$src, (i32 8)), 0xFF),
@@ -1223,7 +1382,7 @@ def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
   let Inst{19-16} = 0b1111;
 }
 
-def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src),
+def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                "revsh", " $dst, $src",
                [(set GPR:$dst,
                   (sext_inreg
@@ -1237,7 +1396,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src),
 
 def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
                                  (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-               "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+               IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt",
                [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
                                    (and (shl GPR:$src2, (i32 imm:$shamt)),
                                         0xFFFF0000)))]>,
@@ -1254,7 +1413,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
 
 def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
                                  (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-               "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+               IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt",
                [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
                                    (and (sra GPR:$src2, imm16_31:$shamt),
                                         0xFFFF)))]>, Requires<[IsARM, HasV6]> {
@@ -1300,21 +1459,23 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :( 
 def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
-                "mov", " $dst, $true",
+                IIC_iCMOVr, "mov", " $dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP;
 
 def MOVCCs : AI1<0b1101, (outs GPR:$dst),
-                        (ins GPR:$false, so_reg:$true), DPSoRegFrm,
+                        (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr,
                 "mov", " $dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP;
 
 def MOVCCi : AI1<0b1101, (outs GPR:$dst),
-                        (ins GPR:$false, so_imm:$true), DPFrm,
+                        (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
                 "mov", " $dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst">, UnaryDP;
+                RegConstraint<"$false = $dst">, UnaryDP {
+    let Inst{25} = 1;
+}
 
 
 //===----------------------------------------------------------------------===//
@@ -1324,14 +1485,14 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
 // __aeabi_read_tp preserves the registers r1-r3.
 let isCall = 1,
   Defs = [R0, R12, LR, CPSR] in {
-  def TPsoft : ABXI<0b1011, (outs), (ins),
+  def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br,
                "bl __aeabi_read_tp",
                [(set R0, ARMthread_pointer)]>;
 }
 
 //===----------------------------------------------------------------------===//
 // SJLJ Exception handling intrinsics
-//   eh_sjlj_setjmp() is a three instruction sequence to store the return 
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
 //   address and save #0 in R0 for the non-longjmp case.
 //   Since by its nature we may be coming from some other function to get
 //   here, and we're using the stack frame for the containing function to
@@ -1342,13 +1503,19 @@ let isCall = 1,
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved resgisters, which is exactly what we want.
 let Defs = 
-  [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR,
-    D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in {
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
+    D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
+    D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+    D31 ] in {
   def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src),
-                               AddrModeNone, SizeSpecial, IndexModeNone, Pseudo,
-                               "add r0, pc, #4\n\t"
-                               "str r0, [$src, #+4]\n\t"
-                               "mov r0, #0 @ eh_setjmp", "",
+                               AddrModeNone, SizeSpecial, IndexModeNone,
+                               Pseudo, NoItinerary,
+                               "str sp, [$src, #+8] @ eh_setjmp begin\n\t"
+                               "add r12, pc, #8\n\t"
+                               "str r12, [$src, #+4]\n\t"
+                               "mov r0, #0\n\t"
+                               "add pc, pc, #0\n\t"
+                               "mov r0, #1 @ eh_setjmp end", "",
                                [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
 }
 
@@ -1366,25 +1533,36 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 
 // Two piece so_imms.
 let isReMaterializable = 1 in
-def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo,
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), 
+                         Pseudo, IIC_iMOVi,
                          "mov", " $dst, $src",
-                         [(set GPR:$dst, so_imm2part:$src)]>;
+                         [(set GPR:$dst, so_imm2part:$src)]>,
+                  Requires<[IsARM, NoV6T2]>;
 
 def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
-              (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
-                     (so_imm2part_2 imm:$RHS))>;
+             (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+                    (so_imm2part_2 imm:$RHS))>;
 def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
-              (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
-                     (so_imm2part_2 imm:$RHS))>;
+             (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+                    (so_imm2part_2 imm:$RHS))>;
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable. Remove
+// when we can do generalized remat.
+let isReMaterializable = 1 in
+def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
+                     "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                     [(set GPR:$dst, (i32 imm:$src))]>,
+               Requires<[IsARM, HasV6T2]>;
 
 // TODO: add,sub,and, 3-instr forms?
 
 
 // Direct calls
 def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
-      Requires<[IsNotDarwin]>;
+      Requires<[IsARM, IsNotDarwin]>;
 def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
-      Requires<[IsDarwin]>;
+      Requires<[IsARM, IsDarwin]>;
 
 // zextload i1 -> zextload i8
 def : ARMPat<(zextloadi1 addrmode2:$addr),  (LDRB addrmode2:$addr)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index a62597bad840..cd370aa97adb 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -65,8 +65,28 @@ def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
 
-def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ",
-                           SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>;
+def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
+                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                                SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
+def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
 
 //===----------------------------------------------------------------------===//
 // NEON operand definitions
@@ -87,28 +107,409 @@ def addrmode_neonldstm : Operand<i32>,
 //===----------------------------------------------------------------------===//
 
 /* TODO: Take advantage of vldm.
-let mayLoad = 1 in {
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 def VLDMD : NI<(outs),
                (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
+               IIC_fpLoadm,
                "vldm${addr:submode} ${addr:base}, $dst1",
-               []>;
+               []> {
+  let Inst{27-25} = 0b110;
+  let Inst{20}    = 1;
+  let Inst{11-9}  = 0b101;
+}
 
 def VLDMS : NI<(outs),
                (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
+               IIC_fpLoadm,
                "vldm${addr:submode} ${addr:base}, $dst1",
-               []>;
+               []> {
+  let Inst{27-25} = 0b110;
+  let Inst{20}    = 1;
+  let Inst{11-9}  = 0b101;
+}
 }
 */
 
 // Use vldmia to load a Q register as a D register pair.
-def VLDRQ : NI<(outs QPR:$dst), (ins GPR:$addr),
+def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
+               IIC_fpLoadm,
                "vldmia $addr, ${dst:dregpair}",
-               [(set QPR:$dst, (v2f64 (load GPR:$addr)))]>;
+               [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
+  let Inst{27-25} = 0b110;
+  let Inst{24}    = 0; // P bit
+  let Inst{23}    = 1; // U bit
+  let Inst{20}    = 1;
+  let Inst{11-9}  = 0b101;
+}
 
 // Use vstmia to store a Q register as a D register pair.
-def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr),
+def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
+               IIC_fpStorem,
                "vstmia $addr, ${src:dregpair}",
-               [(store (v2f64 QPR:$src), GPR:$addr)]>;
+               [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
+  let Inst{27-25} = 0b110;
+  let Inst{24}    = 0; // P bit
+  let Inst{23}    = 1; // U bit
+  let Inst{20}    = 0;
+  let Inst{11-9}  = 0b101;
+}
+
+//   VLD1     : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
+          !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "",
+          [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
+class VLD1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
+          !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "",
+          [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
+
+def  VLD1d8   : VLD1D<0b0000, "vld1.8",  v8i8,  int_arm_neon_vld1>;
+def  VLD1d16  : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>;
+def  VLD1d32  : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>;
+def  VLD1df   : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>;
+def  VLD1d64  : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>;
+
+def  VLD1q8   : VLD1Q<0b0000, "vld1.8",  v16i8, int_arm_neon_vld1>;
+def  VLD1q16  : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>;
+def  VLD1q32  : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>;
+def  VLD1qf   : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>;
+def  VLD1q64  : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+
+//   VLD2     : Vector Load (multiple 2-element structures)
+class VLD2D<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2),
+          (ins addrmode6:$addr), IIC_VLD2,
+          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>;
+class VLD2Q<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b10,0b0011,op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr), IIC_VLD2,
+          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+          "", []>;
+
+def  VLD2d8   : VLD2D<0b0000, "vld2.8">;
+def  VLD2d16  : VLD2D<0b0100, "vld2.16">;
+def  VLD2d32  : VLD2D<0b1000, "vld2.32">;
+def  VLD2d64  : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
+                      (ins addrmode6:$addr), IIC_VLD1,
+                      "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>;
+
+def  VLD2q8   : VLD2Q<0b0000, "vld2.8">;
+def  VLD2q16  : VLD2Q<0b0100, "vld2.16">;
+def  VLD2q32  : VLD2Q<0b1000, "vld2.32">;
+
+//   VLD3     : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr), IIC_VLD3,
+          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>;
+class VLD3WB<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr), IIC_VLD3,
+          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"),
+          "$addr.addr = $wb", []>;
+
+def  VLD3d8   : VLD3D<0b0000, "vld3.8">;
+def  VLD3d16  : VLD3D<0b0100, "vld3.16">;
+def  VLD3d32  : VLD3D<0b1000, "vld3.32">;
+def  VLD3d64  : NLdSt<0,0b10,0b0110,0b1100,
+                      (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+                      (ins addrmode6:$addr), IIC_VLD1,
+                      "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
+
+// vld3 to double-spaced even registers.
+def  VLD3q8a  : VLD3WB<0b0000, "vld3.8">;
+def  VLD3q16a : VLD3WB<0b0100, "vld3.16">;
+def  VLD3q32a : VLD3WB<0b1000, "vld3.32">;
+
+// vld3 to double-spaced odd registers.
+def  VLD3q8b  : VLD3WB<0b0000, "vld3.8">;
+def  VLD3q16b : VLD3WB<0b0100, "vld3.16">;
+def  VLD3q32b : VLD3WB<0b1000, "vld3.32">;
+
+//   VLD4     : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b10,0b0000,op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr), IIC_VLD4,
+          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+          "", []>;
+class VLD4WB<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b10,0b0001,op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr), IIC_VLD4,
+          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+          "$addr.addr = $wb", []>;
+
+def  VLD4d8   : VLD4D<0b0000, "vld4.8">;
+def  VLD4d16  : VLD4D<0b0100, "vld4.16">;
+def  VLD4d32  : VLD4D<0b1000, "vld4.32">;
+def  VLD4d64  : NLdSt<0,0b10,0b0010,0b1100,
+                      (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+                      (ins addrmode6:$addr), IIC_VLD1,
+                      "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>;
+
+// vld4 to double-spaced even registers.
+def  VLD4q8a  : VLD4WB<0b0000, "vld4.8">;
+def  VLD4q16a : VLD4WB<0b0100, "vld4.16">;
+def  VLD4q32a : VLD4WB<0b1000, "vld4.32">;
+
+// vld4 to double-spaced odd registers.
+def  VLD4q8b  : VLD4WB<0b0000, "vld4.8">;
+def  VLD4q16b : VLD4WB<0b0100, "vld4.16">;
+def  VLD4q32b : VLD4WB<0b1000, "vld4.32">;
+
+//   VLD1LN   : Vector Load (single element to one lane)
+//   FIXME: Not yet implemented.
+
+//   VLD2LN   : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, string OpcodeStr>
+  : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VLD2,
+          !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"),
+          "$src1 = $dst1, $src2 = $dst2", []>;
+
+def VLD2LNd8  : VLD2LN<0b0001, "vld2.8">;
+def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">;
+def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">;
+
+// vld2 to double-spaced even registers.
+def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">;
+def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">;
+
+// vld2 to double-spaced odd registers.
+def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">;
+def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">;
+
+//   VLD3LN   : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, string OpcodeStr>
+  : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+          nohash_imm:$lane), IIC_VLD3,
+          !strconcat(OpcodeStr,
+          "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"),
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+
+def VLD3LNd8  : VLD3LN<0b0010, "vld3.8">;
+def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">;
+def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">;
+
+// vld3 to double-spaced even registers.
+def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">;
+def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">;
+
+// vld3 to double-spaced odd registers.
+def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">;
+def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">;
+
+//   VLD4LN   : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, string OpcodeStr>
+  : NLdSt<1,0b10,op11_8,0b0000,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+          nohash_imm:$lane), IIC_VLD4,
+          !strconcat(OpcodeStr,
+          "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"),
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+
+def VLD4LNd8  : VLD4LN<0b0011, "vld4.8">;
+def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">;
+def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">;
+
+// vld4 to double-spaced even registers.
+def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">;
+def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">;
+
+// vld4 to double-spaced odd registers.
+def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">;
+def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">;
+
+//   VLD1DUP  : Vector Load (single element to all lanes)
+//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
+//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
+//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
+//   FIXME: Not yet implemented.
+} // mayLoad = 1, hasExtraDefRegAllocReq = 1
+
+//   VST1     : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "",
+          [(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
+class VST1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
+          !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "",
+          [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
+
+let hasExtraSrcRegAllocReq = 1 in {
+def  VST1d8   : VST1D<0b0000, "vst1.8",  v8i8,  int_arm_neon_vst1>;
+def  VST1d16  : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>;
+def  VST1d32  : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>;
+def  VST1df   : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>;
+def  VST1d64  : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>;
+
+def  VST1q8   : VST1Q<0b0000, "vst1.8",  v16i8, int_arm_neon_vst1>;
+def  VST1q16  : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>;
+def  VST1q32  : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>;
+def  VST1qf   : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>;
+def  VST1q64  : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>;
+} // hasExtraSrcRegAllocReq
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+
+//   VST2     : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b00,0b1000,op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>;
+class VST2Q<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b00,0b0011,op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+          "", []>;
+
+def  VST2d8   : VST2D<0b0000, "vst2.8">;
+def  VST2d16  : VST2D<0b0100, "vst2.16">;
+def  VST2d32  : VST2D<0b1000, "vst2.32">;
+def  VST2d64  : NLdSt<0,0b00,0b1010,0b1100, (outs),
+                      (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+                      "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>;
+
+def  VST2q8   : VST2Q<0b0000, "vst2.8">;
+def  VST2q16  : VST2Q<0b0100, "vst2.16">;
+def  VST2q32  : VST2Q<0b1000, "vst2.32">;
+
+//   VST3     : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b00,0b0100,op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>;
+class VST3WB<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"),
+          "$addr.addr = $wb", []>;
+
+def  VST3d8   : VST3D<0b0000, "vst3.8">;
+def  VST3d16  : VST3D<0b0100, "vst3.16">;
+def  VST3d32  : VST3D<0b1000, "vst3.32">;
+def  VST3d64  : NLdSt<0,0b00,0b0110,0b1100, (outs),
+                      (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
+                      IIC_VST,
+                      "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>;
+
+// vst3 to double-spaced even registers.
+def  VST3q8a  : VST3WB<0b0000, "vst3.8">;
+def  VST3q16a : VST3WB<0b0100, "vst3.16">;
+def  VST3q32a : VST3WB<0b1000, "vst3.32">;
+
+// vst3 to double-spaced odd registers.
+def  VST3q8b  : VST3WB<0b0000, "vst3.8">;
+def  VST3q16b : VST3WB<0b0100, "vst3.16">;
+def  VST3q32b : VST3WB<0b1000, "vst3.32">;
+
+//   VST4     : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b00,0b0000,op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+          "", []>;
+class VST4WB<bits<4> op7_4, string OpcodeStr>
+  : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+          "$addr.addr = $wb", []>;
+
+def  VST4d8   : VST4D<0b0000, "vst4.8">;
+def  VST4d16  : VST4D<0b0100, "vst4.16">;
+def  VST4d32  : VST4D<0b1000, "vst4.32">;
+def  VST4d64  : NLdSt<0,0b00,0b0010,0b1100, (outs),
+                      (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+                       DPR:$src4), IIC_VST,
+                      "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>;
+
+// vst4 to double-spaced even registers.
+def  VST4q8a  : VST4WB<0b0000, "vst4.8">;
+def  VST4q16a : VST4WB<0b0100, "vst4.16">;
+def  VST4q32a : VST4WB<0b1000, "vst4.32">;
+
+// vst4 to double-spaced odd registers.
+def  VST4q8b  : VST4WB<0b0000, "vst4.8">;
+def  VST4q16b : VST4WB<0b0100, "vst4.16">;
+def  VST4q32b : VST4WB<0b1000, "vst4.32">;
+
+//   VST1LN   : Vector Store (single element from one lane)
+//   FIXME: Not yet implemented.
+
+//   VST2LN   : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, string OpcodeStr>
+  : NLdSt<1,0b00,op11_8,0b0000, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VST,
+          !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"),
+          "", []>;
+
+def VST2LNd8  : VST2LN<0b0000, "vst2.8">;
+def VST2LNd16 : VST2LN<0b0100, "vst2.16">;
+def VST2LNd32 : VST2LN<0b1000, "vst2.32">;
+
+// vst2 to double-spaced even registers.
+def VST2LNq16a: VST2LN<0b0100, "vst2.16">;
+def VST2LNq32a: VST2LN<0b1000, "vst2.32">;
+
+// vst2 to double-spaced odd registers.
+def VST2LNq16b: VST2LN<0b0100, "vst2.16">;
+def VST2LNq32b: VST2LN<0b1000, "vst2.32">;
+
+//   VST3LN   : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, string OpcodeStr>
+  : NLdSt<1,0b00,op11_8,0b0000, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+           nohash_imm:$lane), IIC_VST,
+          !strconcat(OpcodeStr,
+          "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>;
+
+def VST3LNd8  : VST3LN<0b0010, "vst3.8">;
+def VST3LNd16 : VST3LN<0b0110, "vst3.16">;
+def VST3LNd32 : VST3LN<0b1010, "vst3.32">;
+
+// vst3 to double-spaced even registers.
+def VST3LNq16a: VST3LN<0b0110, "vst3.16">;
+def VST3LNq32a: VST3LN<0b1010, "vst3.32">;
+
+// vst3 to double-spaced odd registers.
+def VST3LNq16b: VST3LN<0b0110, "vst3.16">;
+def VST3LNq32b: VST3LN<0b1010, "vst3.32">;
+
+//   VST4LN   : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, string OpcodeStr>
+  : NLdSt<1,0b00,op11_8,0b0000, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+           nohash_imm:$lane), IIC_VST,
+          !strconcat(OpcodeStr,
+          "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"),
+          "", []>;
+
+def VST4LNd8  : VST4LN<0b0011, "vst4.8">;
+def VST4LNd16 : VST4LN<0b0111, "vst4.16">;
+def VST4LNd32 : VST4LN<0b1011, "vst4.32">;
+
+// vst4 to double-spaced even registers.
+def VST4LNq16a: VST4LN<0b0111, "vst4.16">;
+def VST4LNq32a: VST4LN<0b1011, "vst4.32">;
+
+// vst4 to double-spaced odd registers.
+def VST4LNq16b: VST4LN<0b0111, "vst4.16">;
+def VST4LNq32b: VST4LN<0b1011, "vst4.32">;
+
+} // mayStore = 1, hasExtraSrcRegAllocReq = 1
 
 
 //===----------------------------------------------------------------------===//
@@ -117,18 +518,27 @@ def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr),
 
 // Extract D sub-registers of Q registers.
 // (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6)
-def SubReg_i8_reg  : SDNodeXForm<imm, [{
+def DSubReg_i8_reg  : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32);
 }]>;
-def SubReg_i16_reg : SDNodeXForm<imm, [{
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32);
 }]>;
-def SubReg_i32_reg : SDNodeXForm<imm, [{
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32);
 }]>;
-def SubReg_f64_reg : SDNodeXForm<imm, [{
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32);
 }]>;
+def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.)
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32);
+}]>;
 
 // Translate lane numbers from Q registers to D subregs.
 def SubReg_i8_lane  : SDNodeXForm<imm, [{
@@ -150,117 +560,337 @@ class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
            ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
            ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
 
+// Basic 2-register operations, scalar single-precision.
+class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+            ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
+        IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
+
+class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
+  : NEONFPPat<(ResTy (OpNode SPR:$a)),
+       (EXTRACT_SUBREG
+           (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
+        arm_ssubreg_0)>;
+
 // Basic 2-register intrinsics, both double- and quad-register.
 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-              bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+              bits<2> op17_16, bits<5> op11_7, bit op4, 
+              InstrItinClass itin, string OpcodeStr,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-              bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+              bits<2> op17_16, bits<5> op11_7, bit op4,
+              InstrItinClass itin, string OpcodeStr,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
 
+// Basic 2-register intrinsics, scalar single-precision
+class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op4, 
+              InstrItinClass itin, string OpcodeStr,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
+
+class N2VDIntsPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a)),
+       (EXTRACT_SUBREG
+           (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
+        arm_ssubreg_0)>;
+
 // Narrow 2-register intrinsics.
 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
-              string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+              InstrItinClass itin, string OpcodeStr,
+              ValueType TyD, ValueType TyQ, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
-        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
 
 // Long 2-register intrinsics.  (This is currently only used for VMOVL and is
 // derived from N2VImm instead of N2V because of the way the size is encoded.)
 class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
-              bit op6, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD,
-              Intrinsic IntOp>
+              bit op6, bit op4, InstrItinClass itin, string OpcodeStr,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst),
-        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
 
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr>
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
+        (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 
+        !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+        "$src1 = $dst1, $src2 = $dst2", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+                  InstrItinClass itin, string OpcodeStr>
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
+        (ins QPR:$src1, QPR:$src2), itin, 
+        !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+        "$src1 = $dst1, $src2 = $dst2", []>;
+
 // Basic 3-register operations, both double- and quad-register.
 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           string OpcodeStr, ValueType ResTy, ValueType OpTy,
+           InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2),
+        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
+class N3VDSL<bits<2> op21_20, bits<4> op11_8, 
+             InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (Ty DPR:$dst),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
+                                          imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 
+               string OpcodeStr, ValueType Ty, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+        IIC_VMULi16D,
+        !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (Ty DPR:$dst),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (NEONvduplane (Ty DPR_8:$src2),
+                                          imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           string OpcodeStr, ValueType ResTy, ValueType OpTy,
+           InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2),
+        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
   let isCommutable = Commutable;
 }
+class N3VQSL<bits<2> op21_20, bits<4> op11_8, 
+             InstrItinClass itin, string OpcodeStr, 
+             ValueType ResTy, ValueType OpTy, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (ResTy QPR:$dst),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+                                                imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, 
+               string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+        IIC_VMULi16Q,
+        !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (ResTy QPR:$dst),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (NEONvduplane (OpTy DPR_8:$src2),
+                                                imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
+// Basic 3-register operations, scalar single-precision
+class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           string OpcodeStr, ValueType ResTy, ValueType OpTy,
+           SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> {
+  let isCommutable = Commutable;
+}
+class N3VDsPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+       (EXTRACT_SUBREG
+           (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
+                 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
+        arm_ssubreg_0)>;
 
 // Basic 3-register intrinsics, both double- and quad-register.
 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              string OpcodeStr, ValueType ResTy, ValueType OpTy,
+              InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2),
+        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
+                string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (Ty DPR:$dst),
+              (Ty (IntOp (Ty DPR:$src1),
+                         (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
+                                           imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (Ty DPR:$dst),
+              (Ty (IntOp (Ty DPR:$src1),
+                         (Ty (NEONvduplane (Ty DPR_8:$src2),
+                                           imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              string OpcodeStr, ValueType ResTy, ValueType OpTy,
+              InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2),
+        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
   let isCommutable = Commutable;
 }
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
+                string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (ResTy QPR:$dst),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+                                                 imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (ResTy QPR:$dst),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (ResTy (NEONvduplane (OpTy DPR_8:$src2),
+                                                 imm:$lane)))))]> {
+  let isCommutable = 0;
+}
 
 // Multiply-Add/Sub operations, both double- and quad-register.
 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode>
+                InstrItinClass itin, string OpcodeStr, 
+                ValueType Ty, SDNode MulOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
         !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
         [(set DPR:$dst, (Ty (OpNode DPR:$src1,
                              (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$dst),
+        (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        [(set (Ty DPR:$dst),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (MulOp DPR:$src2,
+                                   (Ty (NEONvduplane (Ty DPR_VFP2:$src3),
+                                                     imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                    string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$dst),
+        (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        [(set (Ty DPR:$dst),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (MulOp DPR:$src2,
+                                   (Ty (NEONvduplane (Ty DPR_8:$src3),
+                                                     imm:$lane)))))))]>;
+
 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode>
+                InstrItinClass itin, string OpcodeStr, ValueType Ty,
+                SDNode MulOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
         !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
         [(set QPR:$dst, (Ty (OpNode QPR:$src1,
                              (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, ValueType ResTy, ValueType OpTy,
+                  SDNode MulOp, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst),
+        (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        [(set (ResTy QPR:$dst),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (MulOp QPR:$src2,
+                                         (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+                                                              imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                    string OpcodeStr, ValueType ResTy, ValueType OpTy,
+                    SDNode MulOp, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst),
+        (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        [(set (ResTy QPR:$dst),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (MulOp QPR:$src2,
+                                         (ResTy (NEONvduplane (OpTy DPR_8:$src3),
+                                                              imm:$lane)))))))]>;
+
+// Multiply-Add/Sub operations, scalar single-precision
+class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                 InstrItinClass itin, string OpcodeStr,
+                 ValueType Ty, SDNode MulOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR_VFP2:$dst),
+        (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>;
+
+class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+      (EXTRACT_SUBREG
+          (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
+                (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a,   arm_ssubreg_0),
+                (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b,   arm_ssubreg_0)),
+       arm_ssubreg_0)>;
 
 // Neon 3-argument intrinsics, both double- and quad-register.
 // The destination register is also used as the first source operand register.
 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-               string OpcodeStr, ValueType ResTy, ValueType OpTy,
-               Intrinsic IntOp>
+               InstrItinClass itin, string OpcodeStr,
+               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
         !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
                                       (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-               string OpcodeStr, ValueType ResTy, ValueType OpTy,
-               Intrinsic IntOp>
+               InstrItinClass itin, string OpcodeStr,
+               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
         !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
                                       (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
@@ -268,19 +898,44 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 // Neon Long 3-argument intrinsic.  The destination register is
 // a quad-register and is also used as the first source operand register.
 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-               string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+               InstrItinClass itin, string OpcodeStr,
+               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3),
+        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin,
         !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
         [(set QPR:$dst,
           (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst),
+        (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        [(set (ResTy QPR:$dst),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (OpTy DPR:$src2),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+                                                imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                   string OpcodeStr, ValueType ResTy, ValueType OpTy,
+                   Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst),
+        (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        [(set (ResTy QPR:$dst),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (OpTy DPR:$src2),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$src3),
+                                                imm:$lane)))))]>;
+
 
 // Narrowing 3-register intrinsics.
 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               string OpcodeStr, ValueType TyD, ValueType TyQ,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins QPR:$src1, QPR:$src2),
+        (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D,
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
   let isCommutable = Commutable;
@@ -288,21 +943,40 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 
 // Long 3-register intrinsics.
 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              string OpcodeStr, ValueType TyQ, ValueType TyD,
+              InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2),
+        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (ResTy QPR:$dst),
+              (ResTy (IntOp (OpTy DPR:$src1),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+                                                imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, ValueType ResTy, ValueType OpTy, 
+                  Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 
+        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        [(set (ResTy QPR:$dst),
+              (ResTy (IntOp (OpTy DPR:$src1),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$src2),
+                                                imm:$lane)))))]>;
 
 // Wide 3-register intrinsics.
 class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               string OpcodeStr, ValueType TyQ, ValueType TyD,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2),
+        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD,
         !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
         [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
   let isCommutable = Commutable;
@@ -313,13 +987,13 @@ class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
 
 // Pairwise long 2-register accumulate intrinsics,
@@ -329,29 +1003,31 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                  bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2),
+        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
         !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
         [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                  bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2),
+        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
         !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
         [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
 
 // Shift by immediate,
 // both double- and quad-register.
 class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
-             bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode>
+             bit op4, InstrItinClass itin, string OpcodeStr,
+             ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
-           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM),
+           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin,
            !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
            [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
 class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
-             bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode>
+             bit op4, InstrItinClass itin, string OpcodeStr,
+             ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
-           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM),
+           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
            !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
            [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
 
@@ -360,17 +1036,17 @@ class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
              bit op6, bit op4, string OpcodeStr, ValueType ResTy,
              ValueType OpTy, SDNode OpNode>
   : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4,
-           (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM),
+           (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD,
            !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
            [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
                                           (i32 imm:$SIMM))))]>;
 
 // Narrow shift by immediate.
 class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
-             bit op6, bit op4, string OpcodeStr, ValueType ResTy,
-             ValueType OpTy, SDNode OpNode>
+             bit op6, bit op4, InstrItinClass itin, string OpcodeStr,
+             ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4,
-           (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM),
+           (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
            !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
            [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
                                           (i32 imm:$SIMM))))]>;
@@ -381,6 +1057,7 @@ class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
                 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
            (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM),
+           IIC_VPALiD, 
            !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
            [(set DPR:$dst, (Ty (add DPR:$src1,
                                 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
@@ -388,6 +1065,7 @@ class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
                 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
            (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM),
+           IIC_VPALiD, 
            !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
            [(set QPR:$dst, (Ty (add QPR:$src1,
                                 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
@@ -398,12 +1076,14 @@ class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
                 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
            (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM),
+           IIC_VSHLiD, 
            !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
            [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
 class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
                 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
            (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM),
+           IIC_VSHLiQ, 
            !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
            [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
 
@@ -413,14 +1093,14 @@ class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
               bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
-           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM),
+           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, 
            !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
            [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
 class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
               bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp>
   : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
-           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM),
+           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, 
            !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
            [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
 
@@ -428,50 +1108,68 @@ class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
 // Multiclasses
 //===----------------------------------------------------------------------===//
 
+// Abbreviations used in multiclass suffixes:
+//   Q = quarter int (8 bit) elements
+//   H = half int (16 bit) elements
+//   S = single int (32 bit) elements
+//   D = double int (64 bit) elements
+
 // Neon 3-register vector operations.
 
 // First with only element sizes of 8, 16 and 32 bits:
 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                   InstrItinClass itinD16, InstrItinClass itinD32,
+                   InstrItinClass itinQ16, InstrItinClass itinQ32,
                    string OpcodeStr, SDNode OpNode, bit Commutable = 0> {
   // 64-bit vector types.
-  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
-                   v8i8, v8i8, OpNode, Commutable>;
-  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"),
-                   v4i16, v4i16, OpNode, Commutable>;
-  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"),
-                   v2i32, v2i32, OpNode, Commutable>;
+  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 
+                   !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>;
+  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+                   !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>;
+  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+                   !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>;
 
   // 128-bit vector types.
-  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
-                   v16i8, v16i8, OpNode, Commutable>;
-  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"),
-                   v8i16, v8i16, OpNode, Commutable>;
-  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"),
-                   v4i32, v4i32, OpNode, Commutable>;
+  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+                   !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>;
+  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+                   !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>;
+  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+                   !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+  def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>;
+  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>;
+  def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>;
+  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>;
 }
 
 // ....then also with element size 64 bits:
 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    InstrItinClass itinD, InstrItinClass itinQ,
                     string OpcodeStr, SDNode OpNode, bit Commutable = 0>
-  : N3V_QHS<op24, op23, op11_8, op4, OpcodeStr, OpNode, Commutable> {
-  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"),
-                   v1i64, v1i64, OpNode, Commutable>;
-  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"),
-                   v2i64, v2i64, OpNode, Commutable>;
+  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+            OpcodeStr, OpNode, Commutable> {
+  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+                   !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>;
+  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+                   !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>;
 }
 
 
 // Neon Narrowing 2-register vector intrinsics,
 //   source operand element sizes of 16, 32 and 64 bits:
 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
-                       bits<5> op11_7, bit op6, bit op4, string OpcodeStr,
+                       bits<5> op11_7, bit op6, bit op4, 
+                       InstrItinClass itin, string OpcodeStr,
                        Intrinsic IntOp> {
   def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
-                      !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>;
+                      itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>;
   def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
-                      !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>;
+                      itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>;
   def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
-                      !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>;
+                      itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>;
 }
 
 
@@ -480,11 +1178,11 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
 multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
                        bit op4, string OpcodeStr, Intrinsic IntOp> {
   def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4,
-                      !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
+                      IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
   def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4,
-                      !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+                      IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
   def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4,
-                      !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+                      IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
 }
 
 
@@ -492,38 +1190,56 @@ multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                     InstrItinClass itinD16, InstrItinClass itinD32,
+                     InstrItinClass itinQ16, InstrItinClass itinQ32,
                      string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
   // 64-bit vector types.
-  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
+  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"),
                       v4i16, v4i16, IntOp, Commutable>;
-  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
+  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"),
                       v2i32, v2i32, IntOp, Commutable>;
 
   // 128-bit vector types.
-  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
+  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"),
                       v8i16, v8i16, IntOp, Commutable>;
-  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
+  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"),
                       v4i32, v4i32, IntOp, Commutable>;
 }
 
+multiclass N3VIntSL_HS<bits<4> op11_8, 
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, Intrinsic IntOp> {
+  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>;
+  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>;
+  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>;
+  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>;
+}
+
 // ....then also with element size of 8 bits:
 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                      InstrItinClass itinD16, InstrItinClass itinD32,
+                      InstrItinClass itinQ16, InstrItinClass itinQ32,
                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
-  : N3VInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> {
-  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
-                      v8i8, v8i8, IntOp, Commutable>;
-  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
-                      v16i8, v16i8, IntOp, Commutable>;
+  : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
+              OpcodeStr, IntOp, Commutable> {
+  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
+                      !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>;
+  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
+                      !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>;
 }
 
 // ....then also with element size of 64 bits:
 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
                        string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
-  : N3VInt_QHS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> {
-  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"),
-                      v1i64, v1i64, IntOp, Commutable>;
-  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"),
-                      v2i64, v2i64, IntOp, Commutable>;
+  : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
+               OpcodeStr, IntOp, Commutable> {
+  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
+                      !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>;
+  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
+                      !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>;
 }
 
 
@@ -544,19 +1260,29 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                      string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
-  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
-                      v4i32, v4i16, IntOp, Commutable>;
-  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
-                      v2i64, v2i32, IntOp, Commutable>;
+                      InstrItinClass itin, string OpcodeStr,
+                      Intrinsic IntOp, bit Commutable = 0> {
+  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 
+                      !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>;
+  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin,
+                      !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+                        InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> {
+  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 
+                          !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+                        !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
 }
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
-  : N3VLInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> {
-  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
-                      v8i16, v8i8, IntOp, Commutable>;
+                       InstrItinClass itin, string OpcodeStr,
+                       Intrinsic IntOp, bit Commutable = 0>
+  : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> {
+  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 
+                      !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>;
 }
 
 
@@ -576,43 +1302,58 @@ multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 // Neon Multiply-Op vector operations,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itinD16, InstrItinClass itinD32,
+                        InstrItinClass itinQ16, InstrItinClass itinQ32,
                         string OpcodeStr, SDNode OpNode> {
   // 64-bit vector types.
-  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4,
+  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
                         !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>;
-  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4,
+  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
                         !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>;
-  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4,
+  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
                         !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>;
 
   // 128-bit vector types.
-  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4,
+  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
                         !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>;
-  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4,
+  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
                         !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>;
-  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4,
+  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
                         !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>;
 }
 
+multiclass N3VMulOpSL_HS<bits<4> op11_8, 
+                         InstrItinClass itinD16, InstrItinClass itinD32,
+                         InstrItinClass itinQ16, InstrItinClass itinQ32,
+                         string OpcodeStr, SDNode ShOp> {
+  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+                            !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>;
+  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+                          !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>;
+  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+                            !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>;
+  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+                          !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>;
+}
 
 // Neon 3-argument intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                        string OpcodeStr, Intrinsic IntOp> {
   // 64-bit vector types.
-  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4,
+  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
                         !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
-  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4,
+  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
                         !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
-  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4,
+  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
                         !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
-  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4,
+  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
                         !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
-  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4,
+  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
                         !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
-  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4,
+  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
                         !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
 }
 
@@ -622,17 +1363,25 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
                        string OpcodeStr, Intrinsic IntOp> {
-  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4,
+  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
                        !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
-  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4,
+  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D,
                        !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
 }
 
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+                         string OpcodeStr, Intrinsic IntOp> {
+  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+                           !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+                         !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+}
+
 // ....then also with element size of 8 bits:
 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                         string OpcodeStr, Intrinsic IntOp>
   : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> {
-  def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4,
+  def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
                        !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
 }
 
@@ -640,23 +1389,24 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 // Neon 2-register vector intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
-                      bits<5> op11_7, bit op4, string OpcodeStr,
-                      Intrinsic IntOp> {
+                      bits<5> op11_7, bit op4,
+                      InstrItinClass itinD, InstrItinClass itinQ,
+                      string OpcodeStr, Intrinsic IntOp> {
   // 64-bit vector types.
   def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                      !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
+                      itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
   def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                      !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
+                      itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
   def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                      !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
+                      itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                      !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
+                      itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
   def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                      !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
+                      itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
   def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                      !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
+                      itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
 }
 
 
@@ -709,25 +1459,25 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
 // Neon 2-register vector shift by immediate,
 //   element sizes of 8, 16, 32 and 64 bits:
 multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
-                      string OpcodeStr, SDNode OpNode> {
+                      InstrItinClass itin, string OpcodeStr, SDNode OpNode> {
   // 64-bit vector types.
-  def v8i8  : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4,
+  def v8i8  : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin,
                      !strconcat(OpcodeStr, "8"), v8i8, OpNode>;
-  def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4,
+  def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin,
                      !strconcat(OpcodeStr, "16"), v4i16, OpNode>;
-  def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4,
+  def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin,
                      !strconcat(OpcodeStr, "32"), v2i32, OpNode>;
-  def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4,
+  def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin,
                      !strconcat(OpcodeStr, "64"), v1i64, OpNode>;
 
   // 128-bit vector types.
-  def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4,
+  def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin,
                      !strconcat(OpcodeStr, "8"), v16i8, OpNode>;
-  def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4,
+  def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin,
                      !strconcat(OpcodeStr, "16"), v8i16, OpNode>;
-  def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4,
+  def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin,
                      !strconcat(OpcodeStr, "32"), v4i32, OpNode>;
-  def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4,
+  def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin,
                      !strconcat(OpcodeStr, "64"), v2i64, OpNode>;
 }
 
@@ -790,24 +1540,30 @@ multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 // Vector Add Operations.
 
 //   VADD     : Vector Add (integer and floating-point)
-defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, "vadd.i", add, 1>;
-def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd, 1>;
-def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, "vadd.f32", v4f32, v4f32, fadd, 1>;
+defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>;
+def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>;
+def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>;
 //   VADDL    : Vector Add Long (Q = D + D)
-defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>;
-defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>;
+defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>;
+defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>;
 //   VADDW    : Vector Add Wide (Q = Q + D)
 defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>;
 defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>;
 //   VHADD    : Vector Halving Add
-defm VHADDs   : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>;
-defm VHADDu   : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>;
+defm VHADDs   : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>;
+defm VHADDu   : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>;
 //   VRHADD   : Vector Rounding Halving Add
-defm VRHADDs  : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>;
-defm VRHADDu  : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>;
+defm VRHADDs  : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu  : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>;
 //   VQADD    : Vector Saturating Add
-defm VQADDs   : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>;
-defm VQADDu   : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>;
+defm VQADDs   : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                            IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>;
+defm VQADDu   : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                            IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>;
 //   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
 defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
 //   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
@@ -816,64 +1572,208 @@ defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
 // Vector Multiply Operations.
 
 //   VMUL     : Vector Multiply (integer, polynomial and floating-point)
-defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, "vmul.i", mul, 1>;
-def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8,
+defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q,
+                        IIC_VMULi32Q, "vmul.i", mul, 1>;
+def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8,
                         int_arm_neon_vmulp, 1>;
-def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8,
+def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8,
                         int_arm_neon_vmulp, 1>;
-def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul, 1>;
-def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, "vmul.f32", v4f32, v4f32, fmul, 1>;
+def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>;
+def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>;
+defm VMULsl  : N3VSL_HS<0b1000, "vmul.i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>;
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                                     (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                                     (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+          (v4f32 (VMULslfq (v4f32 QPR:$src1),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
+                                                  (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
 //   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
-defm VQDMULH  : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
+                          IIC_VMULi16Q, IIC_VMULi32Q, 
+                          "vqdmulh.s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+                            IIC_VMULi16Q, IIC_VMULi32Q,
+                            "vqdmulh.s",  int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                                        (DSubReg_i16_reg imm:$lane))),
+                                 (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                                        (DSubReg_i32_reg imm:$lane))),
+                                 (SubReg_i32_lane imm:$lane)))>;
+
 //   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
-defm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
+                            IIC_VMULi16Q, IIC_VMULi32Q,
+                            "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+                              IIC_VMULi16Q, IIC_VMULi32Q,
+                              "vqrdmulh.s",  int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                                         (DSubReg_i16_reg imm:$lane))),
+                                  (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                                         (DSubReg_i32_reg imm:$lane))),
+                                  (SubReg_i32_lane imm:$lane)))>;
+
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>;
-defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>;
-def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8,
+defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>;
+defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>;
+def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8,
                         int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>;
+defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>;
+
 //   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
-defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>;
 
 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
 
 //   VMLA     : Vector Multiply Accumulate (integer and floating-point)
-defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>;
-def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>;
-def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>;
+defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>;
+def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>;
+def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>;
+defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>;
+def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>;
+def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+                      (mul (v8i16 QPR:$src2),
+                           (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1),
+                              (v8i16 QPR:$src2),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
+                                                     (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+                      (mul (v4i32 QPR:$src2),
+                           (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1),
+                              (v4i32 QPR:$src2),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
+                                                     (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
+                       (fmul (v4f32 QPR:$src2),
+                             (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+                           (v4f32 QPR:$src2),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
+                                                  (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
 defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>;
 defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>;
+
+defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>;
+defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>;
+
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
 defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>;
+
 //   VMLS     : Vector Multiply Subtract (integer and floating-point)
-defm VMLS     : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>;
-def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>;
-def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>;
+defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>;
+def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>;
+def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>;
+defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>;
+def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>;
+def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+                      (mul (v8i16 QPR:$src2),
+                           (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1),
+                              (v8i16 QPR:$src2),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
+                                                     (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+                      (mul (v4i32 QPR:$src2),
+                           (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1),
+                              (v4i32 QPR:$src2),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
+                                                     (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
+                       (fmul (v4f32 QPR:$src2),
+                             (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLSslfq (v4f32 QPR:$src1),
+                           (v4f32 QPR:$src2),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
+                                                  (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
 defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>;
 defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
+
+defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>;
+defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>;
+
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
 defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
 
 // Vector Subtract Operations.
 
 //   VSUB     : Vector Subtract (integer and floating-point)
-defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, "vsub.i", sub, 0>;
-def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub, 0>;
-def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, "vsub.f32", v4f32, v4f32, fsub, 0>;
+defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>;
+def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>;
+def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
-defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>;
-defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>;
+defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>;
+defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>;
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
 defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>;
 defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>;
 //   VHSUB    : Vector Halving Subtract
-defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>;
-defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>;
+defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>;
 //   VQSUB    : Vector Saturing Subtract
-defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>;
-defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>;
+defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                            IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                            IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>;
 //   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
 defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
 //   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
@@ -882,85 +1782,101 @@ defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
 // Vector Comparisons.
 
 //   VCEQ     : Vector Compare Equal
-defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, "vceq.i", NEONvceq, 1>;
-def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, "vceq.f32", v2i32, v2f32, NEONvceq, 1>;
-def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, "vceq.f32", v4i32, v4f32, NEONvceq, 1>;
+defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                        IIC_VBINi4Q, "vceq.i", NEONvceq, 1>;
+def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>;
+def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>;
 //   VCGE     : Vector Compare Greater Than or Equal
-defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, "vcge.s", NEONvcge, 0>;
-defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, "vcge.u", NEONvcgeu, 0>;
-def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, "vcge.f32", v2i32, v2f32, NEONvcge, 0>;
-def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, "vcge.f32", v4i32, v4f32, NEONvcge, 0>;
+defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                        IIC_VBINi4Q, "vcge.s", NEONvcge, 0>;
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
+                        IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>;
+def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>;
+def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>;
 //   VCGT     : Vector Compare Greater Than
-defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, "vcgt.s", NEONvcgt, 0>;
-defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, "vcgt.u", NEONvcgtu, 0>;
-def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>;
-def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>;
+defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
+                        IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>;
+defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
+                        IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>;
+def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>;
+def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>;
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32,
+def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32,
                         int_arm_neon_vacged, 0>;
-def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32,
+def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32,
                         int_arm_neon_vacgeq, 0>;
 //   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
-def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32,
+def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32,
                         int_arm_neon_vacgtd, 0>;
-def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32,
+def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32,
                         int_arm_neon_vacgtq, 0>;
 //   VTST     : Vector Test Bits
-defm VTST     : N3V_QHS<0, 0, 0b1000, 1, "vtst.i", NEONvtst, 1>;
+defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
+                        IIC_VBINi4Q, "vtst.i", NEONvtst, 1>;
 
 // Vector Bitwise Operations.
 
 //   VAND     : Vector Bitwise AND
-def  VANDd    : N3VD<0, 0, 0b00, 0b0001, 1, "vand", v2i32, v2i32, and, 1>;
-def  VANDq    : N3VQ<0, 0, 0b00, 0b0001, 1, "vand", v4i32, v4i32, and, 1>;
+def  VANDd    : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>;
+def  VANDq    : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>;
 
 //   VEOR     : Vector Bitwise Exclusive OR
-def  VEORd    : N3VD<1, 0, 0b00, 0b0001, 1, "veor", v2i32, v2i32, xor, 1>;
-def  VEORq    : N3VQ<1, 0, 0b00, 0b0001, 1, "veor", v4i32, v4i32, xor, 1>;
+def  VEORd    : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>;
+def  VEORq    : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>;
 
 //   VORR     : Vector Bitwise OR
-def  VORRd    : N3VD<0, 0, 0b10, 0b0001, 1, "vorr", v2i32, v2i32, or, 1>;
-def  VORRq    : N3VQ<0, 0, 0b10, 0b0001, 1, "vorr", v4i32, v4i32, or, 1>;
+def  VORRd    : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>;
+def  VORRq    : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>;
 
 //   VBIC     : Vector Bitwise Bit Clear (AND NOT)
 def  VBICd    : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
-                    (ins DPR:$src1, DPR:$src2), "vbic\t$dst, $src1, $src2", "",
-                    [(set DPR:$dst, (v2i32 (and DPR:$src1,(vnot DPR:$src2))))]>;
+                    (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
+                    "vbic\t$dst, $src1, $src2", "",
+                    [(set DPR:$dst, (v2i32 (and DPR:$src1,
+                                                (vnot_conv DPR:$src2))))]>;
 def  VBICq    : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
-                    (ins QPR:$src1, QPR:$src2), "vbic\t$dst, $src1, $src2", "",
-                    [(set QPR:$dst, (v4i32 (and QPR:$src1,(vnot QPR:$src2))))]>;
+                    (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
+                    "vbic\t$dst, $src1, $src2", "",
+                    [(set QPR:$dst, (v4i32 (and QPR:$src1,
+                                                (vnot_conv QPR:$src2))))]>;
 
 //   VORN     : Vector Bitwise OR NOT
 def  VORNd    : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
-                    (ins DPR:$src1, DPR:$src2), "vorn\t$dst, $src1, $src2", "",
-                    [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot DPR:$src2))))]>;
+                    (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
+                    "vorn\t$dst, $src1, $src2", "",
+                    [(set DPR:$dst, (v2i32 (or DPR:$src1,
+                                               (vnot_conv DPR:$src2))))]>;
 def  VORNq    : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
-                    (ins QPR:$src1, QPR:$src2), "vorn\t$dst, $src1, $src2", "",
-                    [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot QPR:$src2))))]>;
+                    (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
+                    "vorn\t$dst, $src1, $src2", "",
+                    [(set QPR:$dst, (v4i32 (or QPR:$src1,
+                                               (vnot_conv QPR:$src2))))]>;
 
 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
-                    (outs DPR:$dst), (ins DPR:$src), "vmvn\t$dst, $src", "",
+                    (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
+                    "vmvn\t$dst, $src", "",
                     [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>;
 def  VMVNq    : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
-                    (outs QPR:$dst), (ins QPR:$src), "vmvn\t$dst, $src", "",
+                    (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
+                    "vmvn\t$dst, $src", "",
                     [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>;
 def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>;
 def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>;
 
 //   VBSL     : Vector Bitwise Select
 def  VBSLd    : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
-                    (ins DPR:$src1, DPR:$src2, DPR:$src3),
+                    (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD,
                     "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
                     [(set DPR:$dst,
                       (v2i32 (or (and DPR:$src2, DPR:$src1),
-                                 (and DPR:$src3, (vnot DPR:$src1)))))]>;
+                                 (and DPR:$src3, (vnot_conv DPR:$src1)))))]>;
 def  VBSLq    : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
-                    (ins QPR:$src1, QPR:$src2, QPR:$src3),
+                    (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ,
                     "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
                     [(set QPR:$dst,
                       (v4i32 (or (and QPR:$src2, QPR:$src1),
-                                 (and QPR:$src3, (vnot QPR:$src1)))))]>;
+                                 (and QPR:$src3, (vnot_conv QPR:$src1)))))]>;
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst",
@@ -973,16 +1889,18 @@ def  VBSLq    : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
 // Vector Absolute Differences.
 
 //   VABD     : Vector Absolute Difference
-defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>;
-defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>;
-def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32,
-                        int_arm_neon_vabdf, 0>;
-def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32,
-                        int_arm_neon_vabdf, 0>;
+defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>;
+defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>;
+def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32,
+                        int_arm_neon_vabds, 0>;
+def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32,
+                        int_arm_neon_vabds, 0>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>;
-defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>;
+defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>;
+defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate
 defm VABAs    : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>;
@@ -995,32 +1913,36 @@ defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>;
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
-defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>;
-defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>;
-def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32,
-                        int_arm_neon_vmaxf, 1>;
-def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32,
-                        int_arm_neon_vmaxf, 1>;
+defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>;
+defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32,
+                        int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32,
+                        int_arm_neon_vmaxs, 1>;
 
 //   VMIN     : Vector Minimum
-defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>;
-defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>;
-def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32,
-                        int_arm_neon_vminf, 1>;
-def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32,
-                        int_arm_neon_vminf, 1>;
+defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>;
+defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                           IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32,
+                        int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32,
+                        int_arm_neon_vmins, 1>;
 
 // Vector Pairwise Operations.
 
 //   VPADD    : Vector Pairwise Add
-def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8,
-                        int_arm_neon_vpaddi, 0>;
-def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16,
-                        int_arm_neon_vpaddi, 0>;
-def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32,
-                        int_arm_neon_vpaddi, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32,
-                        int_arm_neon_vpaddf, 0>;
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8,
+                        int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16,
+                        int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32,
+                        int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32,
+                        int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long
 defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
@@ -1035,81 +1957,91 @@ defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u",
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
-def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8,
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8,
                         int_arm_neon_vpmaxs, 0>;
-def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16,
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16,
                         int_arm_neon_vpmaxs, 0>;
-def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32,
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32,
                         int_arm_neon_vpmaxs, 0>;
-def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8,
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8,
                         int_arm_neon_vpmaxu, 0>;
-def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16,
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16,
                         int_arm_neon_vpmaxu, 0>;
-def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32,
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32,
                         int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32,
-                        int_arm_neon_vpmaxf, 0>;
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32,
+                        int_arm_neon_vpmaxs, 0>;
 
 //   VPMIN    : Vector Pairwise Minimum
-def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8,
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8,
                         int_arm_neon_vpmins, 0>;
-def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16,
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16,
                         int_arm_neon_vpmins, 0>;
-def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32,
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32,
                         int_arm_neon_vpmins, 0>;
-def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8,
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8,
                         int_arm_neon_vpminu, 0>;
-def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16,
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16,
                         int_arm_neon_vpminu, 0>;
-def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32,
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32,
                         int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32,
-                        int_arm_neon_vpminf, 0>;
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32,
+                        int_arm_neon_vpmins, 0>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 
 //   VRECPE   : Vector Reciprocal Estimate
-def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
+def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
+                        IIC_VUNAD, "vrecpe.u32",
                         v2i32, v2i32, int_arm_neon_vrecpe>;
-def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
+def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
+                        IIC_VUNAQ, "vrecpe.u32",
                         v4i32, v4i32, int_arm_neon_vrecpe>;
-def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
-                        v2f32, v2f32, int_arm_neon_vrecpef>;
-def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
-                        v4f32, v4f32, int_arm_neon_vrecpef>;
+def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+                        IIC_VUNAD, "vrecpe.f32",
+                        v2f32, v2f32, int_arm_neon_vrecpe>;
+def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+                        IIC_VUNAQ, "vrecpe.f32",
+                        v4f32, v4f32, int_arm_neon_vrecpe>;
 
 //   VRECPS   : Vector Reciprocal Step
-def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32,
+def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32,
                         int_arm_neon_vrecps, 1>;
-def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32,
+def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32,
                         int_arm_neon_vrecps, 1>;
 
 //   VRSQRTE  : Vector Reciprocal Square Root Estimate
-def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
-                        v2i32, v2i32, int_arm_neon_vrsqrte>;
-def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
-                        v4i32, v4i32, int_arm_neon_vrsqrte>;
-def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
-                        v2f32, v2f32, int_arm_neon_vrsqrtef>;
-def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
-                        v4f32, v4f32, int_arm_neon_vrsqrtef>;
+def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+                         IIC_VUNAD, "vrsqrte.u32",
+                         v2i32, v2i32, int_arm_neon_vrsqrte>;
+def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+                         IIC_VUNAQ, "vrsqrte.u32",
+                         v4i32, v4i32, int_arm_neon_vrsqrte>;
+def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+                         IIC_VUNAD, "vrsqrte.f32",
+                         v2f32, v2f32, int_arm_neon_vrsqrte>;
+def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 
+                         IIC_VUNAQ, "vrsqrte.f32",
+                         v4f32, v4f32, int_arm_neon_vrsqrte>;
 
 //   VRSQRTS  : Vector Reciprocal Square Root Step
-def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32,
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32,
                         int_arm_neon_vrsqrts, 1>;
-def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32,
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32,
                         int_arm_neon_vrsqrts, 1>;
 
 // Vector Shifts.
 
 //   VSHL     : Vector Shift
-defm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>;
-defm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>;
+defm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
+                            IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>;
+defm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
+                            IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>;
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>;
+defm VSHLi    : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
 //   VSHR     : Vector Shift Right (Immediate)
-defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>;
-defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>;
+defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>;
+defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>;
 
 //   VSHLL    : Vector Shift Left Long
 def  VSHLLs8  : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8",
@@ -1134,86 +2066,90 @@ def  VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32",
                        v2i64, v2i32, NEONvshlli>;
 
 //   VSHRN    : Vector Shift Right and Narrow
-def  VSHRN16  : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16",
-                       v8i8, v8i16, NEONvshrn>;
-def  VSHRN32  : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32",
-                       v4i16, v4i32, NEONvshrn>;
-def  VSHRN64  : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64",
-                       v2i32, v2i64, NEONvshrn>;
+def  VSHRN16  : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, 
+                       IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>;
+def  VSHRN32  : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1,
+                       IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>;
+def  VSHRN64  : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1,
+                       IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>;
 
 //   VRSHL    : Vector Rounding Shift
-defm VRSHLs   : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>;
-defm VRSHLu   : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>;
+defm VRSHLs   : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+                            IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>;
+defm VRSHLu   : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+                            IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>;
 //   VRSHR    : Vector Rounding Shift Right
-defm VRSHRs   : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>;
-defm VRSHRu   : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>;
+defm VRSHRs   : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>;
+defm VRSHRu   : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
-def  VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16",
-                       v8i8, v8i16, NEONvrshrn>;
-def  VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32",
-                       v4i16, v4i32, NEONvrshrn>;
-def  VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64",
-                       v2i32, v2i64, NEONvrshrn>;
+def  VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1,
+                       IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>;
+def  VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, 
+                       IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>;
+def  VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1,
+                       IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>;
 
 //   VQSHL    : Vector Saturating Shift
-defm VQSHLs   : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>;
-defm VQSHLu   : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>;
+defm VQSHLs   : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+                            IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>;
+defm VQSHLu   : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+                            IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi  : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>;
-defm VQSHLui  : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>;
+defm VQSHLsi  : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>;
+defm VQSHLui  : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>;
 //   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu  : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>;
+defm VQSHLsu  : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>;
 
 //   VQSHRN   : Vector Saturating Shift Right and Narrow
-def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16",
-                       v8i8, v8i16, NEONvqshrns>;
-def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32",
-                       v4i16, v4i32, NEONvqshrns>;
-def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64",
-                       v2i32, v2i64, NEONvqshrns>;
-def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16",
-                       v8i8, v8i16, NEONvqshrnu>;
-def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32",
-                       v4i16, v4i32, NEONvqshrnu>;
-def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64",
-                       v2i32, v2i64, NEONvqshrnu>;
+def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, 
+                       IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>;
+def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>;
+def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, 
+                       IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>;
+def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>;
+def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>;
+def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>;
 
 //   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
-def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16",
-                       v8i8, v8i16, NEONvqshrnsu>;
-def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32",
-                       v4i16, v4i32, NEONvqshrnsu>;
-def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64",
-                       v2i32, v2i64, NEONvqshrnsu>;
+def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>;
+def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>;
+def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1,
+                       IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>;
 
 //   VQRSHL   : Vector Saturating Rounding Shift
-defm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s",
-                            int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u",
-                            int_arm_neon_vqrshiftu, 0>;
+defm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+                            IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>;
+defm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+                            IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>;
 
 //   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
-def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16",
-                       v8i8, v8i16, NEONvqrshrns>;
-def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32",
-                       v4i16, v4i32, NEONvqrshrns>;
-def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64",
-                       v2i32, v2i64, NEONvqrshrns>;
-def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16",
-                       v8i8, v8i16, NEONvqrshrnu>;
-def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32",
-                       v4i16, v4i32, NEONvqrshrnu>;
-def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64",
-                       v2i32, v2i64, NEONvqrshrnu>;
+def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>;
+def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>;
+def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>;
+def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>;
+def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>;
+def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, 
+                       IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>;
 
 //   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
-def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16",
-                       v8i8, v8i16, NEONvqrshrnsu>;
-def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32",
-                       v4i16, v4i32, NEONvqrshrnsu>;
-def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64",
-                       v2i32, v2i64, NEONvqrshrnsu>;
+def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>;
+def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, 
+                       IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>;
+def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1,
+                       IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>;
 
 //   VSRA     : Vector Shift Right and Accumulate
 defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>;
@@ -1230,15 +2166,19 @@ defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>;
 // Vector Absolute and Saturating Absolute.
 
 //   VABS     : Vector Absolute Value
-defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s",
+defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 
+                           IIC_VUNAiD, IIC_VUNAiQ, "vabs.s",
                            int_arm_neon_vabs>;
-def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
-                        v2f32, v2f32, int_arm_neon_vabsf>;
-def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
-                        v4f32, v4f32, int_arm_neon_vabsf>;
+def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                        IIC_VUNAD, "vabs.f32",
+                        v2f32, v2f32, int_arm_neon_vabs>;
+def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                        IIC_VUNAQ, "vabs.f32",
+                        v4f32, v4f32, int_arm_neon_vabs>;
 
 //   VQABS    : Vector Saturating Absolute Value
-defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
+defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s",
                            int_arm_neon_vqabs>;
 
 // Vector Negate.
@@ -1248,11 +2188,11 @@ def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>;
 
 class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set DPR:$dst, (Ty (vneg DPR:$src)))]>;
 class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
         [(set QPR:$dst, (Ty (vneg QPR:$src)))]>;
 
 //   VNEG     : Vector Negate
@@ -1265,10 +2205,12 @@ def  VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>;
 
 //   VNEG     : Vector Negate (floating-point)
 def  VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
-                    (outs DPR:$dst), (ins DPR:$src), "vneg.f32\t$dst, $src", "",
+                    (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
+                    "vneg.f32\t$dst, $src", "",
                     [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
 def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
-                    (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "",
+                    (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
+                    "vneg.f32\t$dst, $src", "",
                     [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
 
 def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
@@ -1279,21 +2221,26 @@ def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>;
 def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>;
 
 //   VQNEG    : Vector Saturating Negate
-defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, "vqneg.s",
+defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s",
                            int_arm_neon_vqneg>;
 
 // Vector Bit Counting Operations.
 
 //   VCLS     : Vector Count Leading Sign Bits
-defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, "vcls.s",
+defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 
+                           IIC_VCNTiD, IIC_VCNTiQ, "vcls.s",
                            int_arm_neon_vcls>;
 //   VCLZ     : Vector Count Leading Zeros
-defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, "vclz.i",
+defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 
+                           IIC_VCNTiD, IIC_VCNTiQ, "vclz.i",
                            int_arm_neon_vclz>;
 //   VCNT     : Vector Count One Bits
-def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8",
+def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 
+                        IIC_VCNTiD, "vcnt.8",
                         v8i8, v8i8, int_arm_neon_vcnt>;
-def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8",
+def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+                        IIC_VCNTiQ, "vcnt.8",
                         v16i8, v16i8, int_arm_neon_vcnt>;
 
 // Vector Move Operations.
@@ -1301,9 +2248,9 @@ def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8",
 //   VMOV     : Vector Move (Register)
 
 def  VMOVD    : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
-                    "vmov\t$dst, $src", "", []>;
+                    IIC_VMOVD, "vmov\t$dst, $src", "", []>;
 def  VMOVQ    : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
-                    "vmov\t$dst, $src", "", []>;
+                    IIC_VMOVD, "vmov\t$dst, $src", "", []>;
 
 //   VMOV     : Vector Move (Immediate)
 
@@ -1343,146 +2290,188 @@ def vmovImm64 : PatLeaf<(build_vector), [{
 // be encoded based on the immed values.
 
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
-                         (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "",
+                         (ins i8imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i8\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
-                         (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "",
+                         (ins i8imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i8\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
 
 def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
-                         (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "",
+                         (ins i16imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i16\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
 def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
-                         (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "",
+                         (ins i16imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i16\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
 
 def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
-                         (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "",
+                         (ins i32imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i32\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
 def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
-                         (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "",
+                         (ins i32imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i32\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
 
 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
-                         (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "",
+                         (ins i64imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i64\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
-                         (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "",
+                         (ins i64imm:$SIMM), IIC_VMOVImm,
+                         "vmov.i64\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
 
 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
 
 def VGETLNs8  : NVGetLane<0b11100101, 0b1011, 0b00,
-                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
-                          "vmov", ".s8\t$dst, $src[$lane]",
+                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
                                            imm:$lane))]>;
 def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01,
-                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
-                          "vmov", ".s16\t$dst, $src[$lane]",
+                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
                                            imm:$lane))]>;
 def VGETLNu8  : NVGetLane<0b11101101, 0b1011, 0b00,
-                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
-                          "vmov", ".u8\t$dst, $src[$lane]",
+                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
                                            imm:$lane))]>;
 def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01,
-                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
-                          "vmov", ".u16\t$dst, $src[$lane]",
+                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
                                            imm:$lane))]>;
 def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00,
-                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
-                          "vmov", ".32\t$dst, $src[$lane]",
+                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]",
                           [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
                                            imm:$lane))]>;
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
           (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
-                           (SubReg_i8_reg imm:$lane))),
+                           (DSubReg_i8_reg imm:$lane))),
                      (SubReg_i8_lane imm:$lane))>;
 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
           (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
-                             (SubReg_i16_reg imm:$lane))),
+                             (DSubReg_i16_reg imm:$lane))),
                      (SubReg_i16_lane imm:$lane))>;
 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
           (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
-                           (SubReg_i8_reg imm:$lane))),
+                           (DSubReg_i8_reg imm:$lane))),
                      (SubReg_i8_lane imm:$lane))>;
 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
           (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
-                             (SubReg_i16_reg imm:$lane))),
+                             (DSubReg_i16_reg imm:$lane))),
                      (SubReg_i16_lane imm:$lane))>;
 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
           (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
-                             (SubReg_i32_reg imm:$lane))),
+                             (DSubReg_i32_reg imm:$lane))),
                      (SubReg_i32_lane imm:$lane))>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2),
+                          (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2),
+                          (SSubReg_f32_reg imm:$src2))>;
 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
-//          (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>;
+//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
-          (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>;
+          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
 
 
 //   VMOV     : Vector Set Lane (move ARM core register to scalar)
 
 let Constraints = "$src1 = $dst" in {
 def VSETLNi8  : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst),
-                          (ins DPR:$src1, GPR:$src2, i32imm:$lane),
-                          "vmov", ".8\t$dst[$lane], $src2",
+                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2",
                           [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
                                            GPR:$src2, imm:$lane))]>;
 def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst),
-                          (ins DPR:$src1, GPR:$src2, i32imm:$lane),
-                          "vmov", ".16\t$dst[$lane], $src2",
+                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2",
                           [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
                                            GPR:$src2, imm:$lane))]>;
 def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst),
-                          (ins DPR:$src1, GPR:$src2, i32imm:$lane),
-                          "vmov", ".32\t$dst[$lane], $src2",
+                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2",
                           [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
                                            GPR:$src2, imm:$lane))]>;
 }
 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
           (v16i8 (INSERT_SUBREG QPR:$src1, 
                   (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
-                                   (SubReg_i8_reg imm:$lane))),
+                                   (DSubReg_i8_reg imm:$lane))),
                             GPR:$src2, (SubReg_i8_lane imm:$lane)),
-                  (SubReg_i8_reg imm:$lane)))>;
+                  (DSubReg_i8_reg imm:$lane)))>;
 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
           (v8i16 (INSERT_SUBREG QPR:$src1, 
                   (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
-                                     (SubReg_i16_reg imm:$lane))),
+                                     (DSubReg_i16_reg imm:$lane))),
                              GPR:$src2, (SubReg_i16_lane imm:$lane)),
-                  (SubReg_i16_reg imm:$lane)))>;
+                  (DSubReg_i16_reg imm:$lane)))>;
 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
           (v4i32 (INSERT_SUBREG QPR:$src1, 
                   (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
-                                     (SubReg_i32_reg imm:$lane))),
+                                     (DSubReg_i32_reg imm:$lane))),
                              GPR:$src2, (SubReg_i32_lane imm:$lane)),
-                  (SubReg_i32_reg imm:$lane)))>;
+                  (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+          (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2),
+                         SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+          (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2),
+                         SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
 
 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
-//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>;
+//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
-          (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>;
+          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+def : Pat<(v2f64 (scalar_to_vector DPR:$src)),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         arm_dsubreg_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         arm_dsubreg_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         arm_dsubreg_0)>;
 
 //   VDUP     : Vector Duplicate (from ARM core register to all elements)
 
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
-                       (vector_shuffle node:$lhs, node:$rhs), [{
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
 class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
   : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
-          "vdup", !strconcat(asmSize, "\t$dst, $src"),
-          [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
+          IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"),
+          [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
   : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
-          "vdup", !strconcat(asmSize, "\t$dst, $src"),
-          [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
+          IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"),
+          [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
 
 def  VDUP8d   : VDUPD<0b11101100, 0b00, ".8", v8i8>;
 def  VDUP16d  : VDUPD<0b11101000, 0b01, ".16", v4i16>;
@@ -1492,45 +2481,28 @@ def  VDUP16q  : VDUPQ<0b11101010, 0b01, ".16", v8i16>;
 def  VDUP32q  : VDUPQ<0b11101010, 0b00, ".32", v4i32>;
 
 def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
-                      "vdup", ".32\t$dst, $src",
-                      [(set DPR:$dst, (v2f32 (splat_lo
-                                              (scalar_to_vector
-                                               (f32 (bitconvert GPR:$src))),
-                                              undef)))]>;
+                      IIC_VMOVIS, "vdup", ".32\t$dst, $src",
+                      [(set DPR:$dst, (v2f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$src)))))]>;
 def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
-                      "vdup", ".32\t$dst, $src",
-                      [(set QPR:$dst, (v4f32 (splat_lo
-                                              (scalar_to_vector
-                                               (f32 (bitconvert GPR:$src))),
-                                              undef)))]>;
+                      IIC_VMOVIS, "vdup", ".32\t$dst, $src",
+                      [(set QPR:$dst, (v4f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$src)))))]>;
 
 //   VDUP     : Vector Duplicate Lane (from scalar to all elements)
 
-def SHUFFLE_get_splat_lane : SDNodeXForm<vector_shuffle, [{
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32);
-}]>;
-
-def splat_lane : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return SVOp->isSplat();
-}], SHUFFLE_get_splat_lane>;
-
 class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
-        (outs DPR:$dst), (ins DPR:$src, i32imm:$lane),
+        (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
         !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
-        [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>;
+        [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
 
-// vector_shuffle requires that the source and destination types match, so
-// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node.
 class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr,
               ValueType ResTy, ValueType OpTy>
   : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
-        (outs QPR:$dst), (ins DPR:$src, i32imm:$lane),
+        (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
         !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
-        [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>;
+        [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>;
 
 def VDUPLN8d  : VDUPLND<0b00, 0b01, "vdup.8", v8i8>;
 def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>;
@@ -1541,15 +2513,51 @@ def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>;
 def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>;
 def VDUPLNfq  : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>;
 
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+                                  (DSubReg_i8_reg imm:$lane))),
+                           (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i16_reg imm:$lane))),
+                            (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i32_reg imm:$lane))),
+                            (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+          (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
+def VDUPfdf   : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0,
+                    (outs DPR:$dst), (ins SPR:$src),
+                    IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "",
+                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+
+def VDUPfqf   : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0,
+                    (outs QPR:$dst), (ins SPR:$src),
+                    IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "",
+                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
+          (INSERT_SUBREG QPR:$src, 
+                         (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+                         (DSubReg_f64_other_reg imm:$lane))>;
+def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
+          (INSERT_SUBREG QPR:$src, 
+                         (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+                         (DSubReg_f64_other_reg imm:$lane))>;
+
 //   VMOVN    : Vector Narrowing Move
-defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i",
+defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i",
                             int_arm_neon_vmovn>;
 //   VQMOVN   : Vector Saturating Narrowing Move
-defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, "vqmovn.s",
+defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s",
                             int_arm_neon_vqmovns>;
-defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, "vqmovn.u",
+defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u",
                             int_arm_neon_vqmovnu>;
-defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, "vqmovun.s",
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s",
                             int_arm_neon_vqmovnsu>;
 //   VMOVL    : Vector Lengthening Move
 defm VMOVLs   : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>;
@@ -1597,6 +2605,247 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32",
 def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32",
                         v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
 
+// Vector Reverse.
+
+//   VREV64   : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
+        (ins DPR:$src), IIC_VMOVD, 
+        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
+        (ins QPR:$src), IIC_VMOVD, 
+        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
+
+def VREV64d8  : VREV64D<0b00, "vrev64.8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>;
+def VREV64df  : VREV64D<0b10, "vrev64.32", v2f32>;
+
+def VREV64q8  : VREV64Q<0b00, "vrev64.8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>;
+def VREV64qf  : VREV64Q<0b10, "vrev64.32", v4f32>;
+
+//   VREV32   : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
+        (ins DPR:$src), IIC_VMOVD, 
+        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
+        (ins QPR:$src), IIC_VMOVD, 
+        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
+
+def VREV32d8  : VREV32D<0b00, "vrev32.8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>;
+
+def VREV32q8  : VREV32Q<0b00, "vrev32.8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>;
+
+//   VREV16   : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
+        (ins DPR:$src), IIC_VMOVD, 
+        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
+        (ins QPR:$src), IIC_VMOVD, 
+        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
+
+def VREV16d8  : VREV16D<0b00, "vrev16.8", v8i8>;
+def VREV16q8  : VREV16Q<0b00, "vrev16.8", v16i8>;
+
+// Other Vector Shuffles.
+
+//   VEXT     : Vector Extract
+
+class VEXTd<string OpcodeStr, ValueType Ty>
+  : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst),
+        (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
+        !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "",
+        [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
+                                      (Ty DPR:$rhs), imm:$index)))]>;
+
+class VEXTq<string OpcodeStr, ValueType Ty>
+  : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst),
+        (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
+        !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "",
+        [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
+                                      (Ty QPR:$rhs), imm:$index)))]>;
+
+def VEXTd8  : VEXTd<"vext.8",  v8i8>;
+def VEXTd16 : VEXTd<"vext.16", v4i16>;
+def VEXTd32 : VEXTd<"vext.32", v2i32>;
+def VEXTdf  : VEXTd<"vext.32", v2f32>;
+
+def VEXTq8  : VEXTq<"vext.8",  v16i8>;
+def VEXTq16 : VEXTq<"vext.16", v8i16>;
+def VEXTq32 : VEXTq<"vext.32", v4i32>;
+def VEXTqf  : VEXTq<"vext.32", v4f32>;
+
+//   VTRN     : Vector Transpose
+
+def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn.8">;
+def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn.16">;
+def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn.32">;
+
+def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">;
+def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">;
+def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">;
+
+//   VUZP     : Vector Unzip (Deinterleave)
+
+def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp.8">;
+def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp.16">;
+def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp.32">;
+
+def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">;
+def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">;
+def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">;
+
+//   VZIP     : Vector Zip (Interleave)
+
+def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip.8">;
+def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip.16">;
+def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip.32">;
+
+def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">;
+def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">;
+def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">;
+
+// Vector Table Lookup and Table Extension.
+
+//   VTBL     : Vector Table Lookup
+def  VTBL1
+  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
+        (ins DPR:$tbl1, DPR:$src), IIC_VTB1,
+        "vtbl.8\t$dst, \\{$tbl1\\}, $src", "",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def  VTBL2
+  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
+        (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2,
+        "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
+                               DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+def  VTBL3
+  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
+        (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3,
+        "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
+                               DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+def  VTBL4
+  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
+        (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4,
+        "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
+                               DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+} // hasExtraSrcRegAllocReq = 1
+
+//   VTBX     : Vector Table Extension
+def  VTBX1
+  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
+        (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1,
+        "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
+                               DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def  VTBX2
+  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
+        (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2,
+        "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
+                               DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+def  VTBX3
+  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
+        (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3,
+        "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
+                               DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+def  VTBX4
+  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
+        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4,
+        "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst",
+        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
+                               DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+} // hasExtraSrcRegAllocReq = 1
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+// These need separate instructions because they must use DPR_VFP2 register
+// class which have SPR sub-registers.
+
+// Vector Add Operations used for single-precision FP
+let neverHasSideEffects = 1 in
+def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>;
+def : N3VDsPat<fadd, VADDfd_sfp>;
+
+// Vector Sub Operations used for single-precision FP
+let neverHasSideEffects = 1 in
+def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>;
+def : N3VDsPat<fsub, VSUBfd_sfp>;
+
+// Vector Multiply Operations used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
+def : N3VDsPat<fmul, VMULfd_sfp>;
+
+// Vector Multiply-Accumulate/Subtract used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
+def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+
+let neverHasSideEffects = 1 in
+def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
+def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+
+// Vector Absolute used for single-precision FP
+let neverHasSideEffects = 1 in
+def  VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                           IIC_VUNAD, "vabs.f32",
+                           v2f32, v2f32, int_arm_neon_vabs>;
+def : N2VDIntsPat<fabs, VABSfd_sfp>;
+
+// Vector Negate used for single-precision FP
+let neverHasSideEffects = 1 in
+def  VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+                        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+                        "vneg.f32\t$dst, $src", "", []>;
+def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
+
+// Vector Convert between single-precision FP and integer
+let neverHasSideEffects = 1 in
+def  VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+                          v2i32, v2f32, fp_to_sint>;
+def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
+
+let neverHasSideEffects = 1 in
+def  VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+                          v2i32, v2f32, fp_to_uint>;
+def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
+
+let neverHasSideEffects = 1 in
+def  VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+                          v2f32, v2i32, sint_to_fp>;
+def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
+
+let neverHasSideEffects = 1 in
+def  VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+                          v2f32, v2i32, uint_to_fp>;
+def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 904d9b1d5273..9816addf7d6a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -117,86 +117,150 @@ def t_addrmode_sp : Operand<i32>,
 
 let Defs = [SP], Uses = [SP] in {
 def tADJCALLSTACKUP :
-PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
            "@ tADJCALLSTACKUP $amt1",
-           [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>;
+           [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
 
 def tADJCALLSTACKDOWN :
-PseudoInst<(outs), (ins i32imm:$amt),
+PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
            "@ tADJCALLSTACKDOWN $amt",
-           [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>;
+           [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
 }
 
+// For both thumb1 and thumb2.
 let isNotDuplicable = 1 in
-def tPICADD : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
-                  "$cp:\n\tadd $dst, pc",
-                  [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
+                 "\n$cp:\n\tadd $dst, pc",
+                 [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
 
 // PC relative add.
-def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs),
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi,
                   "add $dst, pc, $rhs * 4", []>;
 
 // ADD rd, sp, #imm8
-// FIXME: hard code sp?
-def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
-                  "add $dst, $sp, $rhs * 4 @ addrspi", []>;
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi,
+                  "add $dst, $sp, $rhs * 4", []>;
 
 // ADD sp, sp, #imm7
-// FIXME: hard code sp?
-def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
                   "add $dst, $rhs * 4", []>;
 
-// FIXME: Make use of the following?
-// ADD rm, sp, rm
+// SUB sp, sp, #imm7
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
+                  "sub $dst, $rhs * 4", []>;
+
+// ADD rm, sp
+def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  "add $dst, $rhs", []>;
+
 // ADD sp, rm
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  "add $dst, $rhs", []>;
+
+// Pseudo instruction that will expand into a tSUBspi + a copy.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+               NoItinerary, "@ sub $dst, $rhs * 4", []>;
+
+def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+               NoItinerary, "@ add $dst, $rhs", []>;
+
+let Defs = [CPSR] in
+def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+             NoItinerary, "@ and $dst, $rhs", []>;
+} // usesCustomDAGSchedInserter
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
 //
 
-let isReturn = 1, isTerminator = 1 in {
-  def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>;
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx lr", [(ARMretflag)]>;
   // Alternative return instruction used by vararg functions.
-  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>;
+  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx $target", []>;
 }
 
 // FIXME: remove when we have a way to marking a MI with these properties.
-let isReturn = 1, isTerminator = 1 in
-def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins),
-                   "pop $dst1", []>;
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
+                   "pop${p} $wb", []>;
 
 let isCall = 1,
-  Defs = [R0, R1, R2, R3, LR,
-          D0, D1, D2, D3, D4, D5, D6, D7] in {
-  def tBL  : TIx2<(outs), (ins i32imm:$func, variable_ops),
+  Defs = [R0,  R1,  R2,  R3,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
+  // Also used for Thumb2
+  def tBL  : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
                    "bl ${func:call}",
-                   [(ARMtcall tglobaladdr:$func)]>;
-  // ARMv5T and above
-  def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops),
+                   [(ARMtcall tglobaladdr:$func)]>,
+             Requires<[IsThumb, IsNotDarwin]>;
+
+  // ARMv5T and above, also used for Thumb2
+  def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
                     "blx ${func:call}",
-                    [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
-  def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops),
+                    [(ARMcall tglobaladdr:$func)]>,
+              Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+
+  // Also used for Thumb2
+  def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, 
+                  "blx $func",
+                  [(ARMtcall GPR:$func)]>,
+              Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+
+  // ARMv4T
+  def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, 
+                  "mov lr, pc\n\tbx $func",
+                  [(ARMcall_nolink tGPR:$func)]>,
+            Requires<[IsThumb1Only, IsNotDarwin]>;
+}
+
+// On Darwin R9 is call-clobbered.
+let isCall = 1,
+  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
+  // Also used for Thumb2
+  def tBLr9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
+                   "bl ${func:call}",
+                   [(ARMtcall tglobaladdr:$func)]>,
+              Requires<[IsThumb, IsDarwin]>;
+
+  // ARMv5T and above, also used for Thumb2
+  def tBLXi_r9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
+                      "blx ${func:call}",
+                      [(ARMcall tglobaladdr:$func)]>,
+                 Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+  // Also used for Thumb2
+  def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, 
                   "blx $func",
-                  [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>;
+                  [(ARMtcall GPR:$func)]>,
+                 Requires<[IsThumb, HasV5T, IsDarwin]>;
+
   // ARMv4T
-  def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops),
-                  "cpy lr, pc\n\tbx $func",
-                  [(ARMcall_nolink tGPR:$func)]>;
+  def tBXr9 : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, 
+                  "mov lr, pc\n\tbx $func",
+                  [(ARMcall_nolink tGPR:$func)]>,
+              Requires<[IsThumb1Only, IsDarwin]>;
 }
 
 let isBranch = 1, isTerminator = 1 in {
   let isBarrier = 1 in {
     let isPredicable = 1 in
-    def tB   : T1I<(outs), (ins brtarget:$target), "b $target",
-                   [(br bb:$target)]>;
+    def tB   : T1I<(outs), (ins brtarget:$target), IIC_Br,
+                   "b $target", [(br bb:$target)]>;
 
   // Far jump
-  def tBfar : T1Ix2<(outs), (ins brtarget:$target), 
+  let Defs = [LR] in
+  def tBfar : TIx2<(outs), (ins brtarget:$target), IIC_Br, 
                     "bl $target\t@ far jump",[]>;
 
   def tBR_JTr : T1JTI<(outs),
                       (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
-                      "cpy pc, $target \n\t.align\t2\n$jt",
+                      IIC_Br, "mov pc, $target\n\t.align\t2\n$jt",
                       [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
   }
 }
@@ -204,7 +268,8 @@ let isBranch = 1, isTerminator = 1 in {
 // FIXME: should be able to write a pattern for ARMBrcond, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
-  def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
+  def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br,
+                 "b$cc $target",
                  [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
 //===----------------------------------------------------------------------===//
@@ -212,384 +277,363 @@ let isBranch = 1, isTerminator = 1 in
 //
 
 let canFoldAsLoad = 1 in
-def tLDR : T1I4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
-               "ldr $dst, $addr",
+def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, 
+               "ldr", " $dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
 
-def tLDRB : T1I1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
-                "ldrb $dst, $addr",
+def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
+                "ldrb", " $dst, $addr",
                 [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
 
-def tLDRH : T1I2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
-                "ldrh $dst, $addr",
+def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
+                "ldrh", " $dst, $addr",
                 [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
 
-def tLDRSB : T1I1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
-                 "ldrsb $dst, $addr",
+let AddedComplexity = 10 in
+def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+                 "ldrsb", " $dst, $addr",
                  [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
 
-def tLDRSH : T1I2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
-                 "ldrsh $dst, $addr",
+let AddedComplexity = 10 in
+def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+                 "ldrsh", " $dst, $addr",
                  [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
 
 let canFoldAsLoad = 1 in
-def tLDRspi : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
-                  "ldr $dst, $addr",
+def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
+                  "ldr", " $dst, $addr",
                   [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
 
 // Special instruction for restore. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 let canFoldAsLoad = 1, mayLoad = 1 in
-def tRestore : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
-                    "ldr $dst, $addr", []>;
+def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
+                    "ldr", " $dst, $addr", []>;
 
 // Load tconstpool
 let canFoldAsLoad = 1 in
-def tLDRpci : T1Is<(outs tGPR:$dst), (ins i32imm:$addr),
-                  "ldr $dst, $addr",
+def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
+                  "ldr", " $dst, $addr",
                   [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
-def tLDRcp  : T1Is<(outs tGPR:$dst), (ins i32imm:$addr),
-                  "ldr $dst, $addr", []>;
+def tLDRcp  : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
+                  "ldr", " $dst, $addr", []>;
 
-def tSTR : T1I4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
-               "str $src, $addr",
+def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
+               "str", " $src, $addr",
                [(store tGPR:$src, t_addrmode_s4:$addr)]>;
 
-def tSTRB : T1I1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
-                 "strb $src, $addr",
+def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
+                 "strb", " $src, $addr",
                  [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
 
-def tSTRH : T1I2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
-                 "strh $src, $addr",
+def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
+                 "strh", " $src, $addr",
                  [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
 
-def tSTRspi : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
-                   "str $src, $addr",
+def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
+                   "str", " $src, $addr",
                    [(store tGPR:$src, t_addrmode_sp:$addr)]>;
 
 let mayStore = 1 in {
 // Special instruction for spill. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
-def tSpill : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
-                  "str $src, $addr", []>;
+def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
+                  "str", " $src, $addr", []>;
 }
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
 
-// TODO: A7-44: LDMIA - load multiple
+// These requires base address to be written back or one of the loaded regs.
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+def tLDM : T1I<(outs),
+               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+               IIC_iLoadm,
+               "ldm${addr:submode}${p} $addr, $wb", []>;
 
-let mayLoad = 1 in
-def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins),
-               "pop $dst1", []>;
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+def tSTM : T1I<(outs),
+               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+               IIC_iStorem,
+               "stm${addr:submode}${p} $addr, $wb", []>;
 
-let mayStore = 1 in
-def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
-                "push $src1", []>;
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
+               "pop${p} $wb", []>;
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
+                "push${p} $wb", []>;
 
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
 
 // Add with carry register
-let isCommutable = 1, Defs = [CPSR], Uses = [CPSR] in
-def tADCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "adc $dst, $rhs",
-                [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
+let isCommutable = 1, Uses = [CPSR] in
+def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                 "adc", " $dst, $rhs",
+                 [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
 
 // Add immediate
-let Defs = [CPSR] in {
-def tADDi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                "add $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
-def tADDSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                 "add $dst, $lhs, $rhs",
-                 [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7:$rhs))]>;
-}
+def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
+                   "add", " $dst, $lhs, $rhs",
+                   [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
 
-let Defs = [CPSR] in {
-def tADDi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                 "add $dst, $rhs",
-                 [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
-def tADDSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                  "add $dst, $rhs",
-                  [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255:$rhs))]>;
-}
+def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
+                   "add", " $dst, $rhs",
+                   [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
 
 // Add register
-let isCommutable = 1, Defs = [CPSR] in {
-def tADDrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "add $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
-def tADDSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                 "add $dst, $lhs, $rhs",
-                 [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
-}
+let isCommutable = 1 in
+def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                   "add", " $dst, $lhs, $rhs",
+                   [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
 
 let neverHasSideEffects = 1 in
-def tADDhirr : T1It<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                   "add $dst, $rhs @ addhirr", []>;
+def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                     "add", " $dst, $rhs", []>;
 
 // And register
-let isCommutable = 1, Defs = [CPSR] in
-def tAND : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "and $dst, $rhs",
-                [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
+let isCommutable = 1 in
+def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                 "and", " $dst, $rhs",
+                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
 
 // ASR immediate
-let Defs = [CPSR] in
-def tASRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                "asr $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>;
+def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+                  "asr", " $dst, $lhs, $rhs",
+                  [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>;
 
 // ASR register
-let Defs = [CPSR] in
-def tASRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                 "asr $dst, $rhs",
-                 [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
+def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
+                   "asr", " $dst, $rhs",
+                   [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
 
 // BIC register
-let Defs = [CPSR] in
-def tBIC : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "bic $dst, $rhs",
-               [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
+def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                 "bic", " $dst, $rhs",
+                 [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
 
 // CMN register
 let Defs = [CPSR] in {
-def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-              "cmn $lhs, $rhs",
-              [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
-def tCMNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "cmn $lhs, $rhs",
-               [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
+                "cmn", " $lhs, $rhs",
+                [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+def tCMNZ : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
+                 "cmn", " $lhs, $rhs",
+                 [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
 }
 
 // CMP immediate
 let Defs = [CPSR] in {
-def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
-               "cmp $lhs, $rhs",
-               [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
-def tCMPZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
-                "cmp $lhs, $rhs",
-                [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>;
+def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
+                  "cmp", " $lhs, $rhs",
+                  [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
+def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
+                  "cmp", " $lhs, $rhs",
+                  [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>;
 
 }
 
 // CMP register
 let Defs = [CPSR] in {
-def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "cmp $lhs, $rhs",
-               [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
-def tCMPZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-                "cmp $lhs, $rhs",
-                [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>;
+def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
+                 "cmp", " $lhs, $rhs",
+                 [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
+def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
+                  "cmp", " $lhs, $rhs",
+                  [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+                   "cmp", " $lhs, $rhs", []>;
+def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+                    "cmp", " $lhs, $rhs", []>;
 }
 
-// TODO: A7-37: CMP(3) - cmp hi regs
 
 // XOR register
-let isCommutable = 1, Defs = [CPSR] in
-def tEOR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "eor $dst, $rhs",
-               [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
+let isCommutable = 1 in
+def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                 "eor", " $dst, $rhs",
+                 [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
 
 // LSL immediate
-let Defs = [CPSR] in
-def tLSLri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                "lsl $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>;
+def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+                  "lsl", " $dst, $lhs, $rhs",
+                  [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
 // LSL register
-let Defs = [CPSR] in
-def tLSLrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                 "lsl $dst, $rhs",
-                 [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
+def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
+                   "lsl", " $dst, $rhs",
+                   [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
 
 // LSR immediate
-let Defs = [CPSR] in
-def tLSRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                "lsr $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>;
+def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+                  "lsr", " $dst, $lhs, $rhs",
+                  [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
 // LSR register
-let Defs = [CPSR] in
-def tLSRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                 "lsr $dst, $rhs",
-                 [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
+def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
+                   "lsr", " $dst, $rhs",
+                   [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
 
 // move register
-let Defs = [CPSR] in
-def tMOVi8 : T1I<(outs tGPR:$dst), (ins i32imm:$src),
-                 "mov $dst, $src",
-                 [(set tGPR:$dst, imm0_255:$src)]>;
+def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+                  "mov", " $dst, $src",
+                  [(set tGPR:$dst, imm0_255:$src)]>;
 
 // TODO: A7-73: MOV(2) - mov setting flag.
 
 
-// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
-// which is MOV(3).  This also supports high registers.
 let neverHasSideEffects = 1 in {
-def tMOVr       : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                      "cpy $dst, $src", []>;
-def tMOVhir2lor : T1I<(outs tGPR:$dst), (ins GPR:$src),
-                      "cpy $dst, $src\t@ hir2lor", []>;
-def tMOVlor2hir : T1I<(outs GPR:$dst), (ins tGPR:$src),
-                      "cpy $dst, $src\t@ lor2hir", []>;
-def tMOVhir2hir : T1I<(outs GPR:$dst), (ins GPR:$src),
-                      "cpy $dst, $src\t@ hir2hir", []>;
+// FIXME: Make this predicable.
+def tMOVr       : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
+                      "mov $dst, $src", []>;
+let Defs = [CPSR] in
+def tMOVSr      : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
+                       "movs $dst, $src", []>;
+
+// FIXME: Make these predicable.
+def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr,
+                       "mov $dst, $src", []>;
+def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr,
+                       "mov $dst, $src", []>;
+def tMOVgpr2gpr  : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+                       "mov $dst, $src", []>;
 } // neverHasSideEffects
 
 // multiply register
-let isCommutable = 1, Defs = [CPSR] in
-def tMUL : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "mul $dst, $rhs",
-               [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
+let isCommutable = 1 in
+def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
+                 "mul", " $dst, $rhs",
+                 [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
 
 // move inverse register
-let Defs = [CPSR] in
-def tMVN : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-              "mvn $dst, $src",
-              [(set tGPR:$dst, (not tGPR:$src))]>;
-
-// negate register
-let Defs = [CPSR] in
-def tNEG : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-              "neg $dst, $src",
-              [(set tGPR:$dst, (ineg tGPR:$src))]>;
+def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
+                "mvn", " $dst, $src",
+                [(set tGPR:$dst, (not tGPR:$src))]>;
 
 // bitwise or register
-let isCommutable = 1, Defs = [CPSR] in
-def tORR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "orr $dst, $rhs",
-               [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
+let isCommutable = 1 in
+def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),  IIC_iALUr,
+                 "orr", " $dst, $rhs",
+                 [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
 
 // swaps
-def tREV : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-              "rev $dst, $src",
-              [(set tGPR:$dst, (bswap tGPR:$src))]>,
-              Requires<[IsThumb, HasV6]>;
-
-def tREV16 : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                "rev16 $dst, $src",
-                [(set tGPR:$dst,
-                    (or (and (srl tGPR:$src, (i32 8)), 0xFF),
-                        (or (and (shl tGPR:$src, (i32 8)), 0xFF00),
-                            (or (and (srl tGPR:$src, (i32 8)), 0xFF0000),
-                                (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>,
-                Requires<[IsThumb, HasV6]>;
-
-def tREVSH : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                "revsh $dst, $src",
-                [(set tGPR:$dst,
-                   (sext_inreg
-                     (or (srl (and tGPR:$src, 0xFFFF), (i32 8)),
-                         (shl tGPR:$src, (i32 8))), i16))]>,
-                Requires<[IsThumb, HasV6]>;
+def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                "rev", " $dst, $src",
+                [(set tGPR:$dst, (bswap tGPR:$src))]>,
+                Requires<[IsThumb1Only, HasV6]>;
+
+def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                  "rev16", " $dst, $src",
+             [(set tGPR:$dst,
+                   (or (and (srl tGPR:$src, (i32 8)), 0xFF),
+                       (or (and (shl tGPR:$src, (i32 8)), 0xFF00),
+                           (or (and (srl tGPR:$src, (i32 8)), 0xFF0000),
+                               (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>,
+                Requires<[IsThumb1Only, HasV6]>;
+
+def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                  "revsh", " $dst, $src",
+                  [(set tGPR:$dst,
+                        (sext_inreg
+                          (or (srl (and tGPR:$src, 0xFF00), (i32 8)),
+                              (shl tGPR:$src, (i32 8))), i16))]>,
+                  Requires<[IsThumb1Only, HasV6]>;
 
 // rotate right register
-let Defs = [CPSR] in
-def tROR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "ror $dst, $rhs",
-                [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
+def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
+                 "ror", " $dst, $rhs",
+                 [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
+
+// negate register
+def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi,
+                "rsb", " $dst, $src, #0",
+                [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
 // Subtract with carry register
-let Defs = [CPSR], Uses = [CPSR] in
-def tSBCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "sbc $dst, $rhs",
-                [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
+let Uses = [CPSR] in
+def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                 "sbc", " $dst, $rhs",
+                 [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
 
 // Subtract immediate
-let Defs = [CPSR] in {
-def tSUBi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                "sub $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
-def tSUBSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                 "sub $dst, $lhs, $rhs",
-                 [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7_neg:$rhs))]>;
-}
+def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
+                  "sub", " $dst, $lhs, $rhs",
+                  [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
 
-let Defs = [CPSR] in {
-def tSUBi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                  "sub $dst, $rhs",
-                  [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
-def tSUBSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                   "sub $dst, $rhs",
-                   [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255_neg:$rhs))]>;
-}
+def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
+                   "sub", " $dst, $rhs",
+                   [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
 
 // subtract register
-let Defs = [CPSR] in {
-def tSUBrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "sub $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
-def tSUBSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "sub $dst, $lhs, $rhs",
-                [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
-}
+def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+                  "sub", " $dst, $lhs, $rhs",
+                  [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
 
 // TODO: A7-96: STMIA - store multiple.
 
-def tSUBspi : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
-                  "sub $dst, $rhs * 4", []>;
-
 // sign-extend byte
-def tSXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                "sxtb $dst, $src",
-                [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
-                Requires<[IsThumb, HasV6]>;
+def tSXTB  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                  "sxtb", " $dst, $src",
+                  [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
+                  Requires<[IsThumb1Only, HasV6]>;
 
 // sign-extend short
-def tSXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                "sxth $dst, $src",
-                [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
-                Requires<[IsThumb, HasV6]>;
+def tSXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                  "sxth", " $dst, $src",
+                  [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
+                  Requires<[IsThumb1Only, HasV6]>;
 
 // test
 let isCommutable = 1, Defs = [CPSR] in
-def tTST  : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "tst $lhs, $rhs",
-               [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
+def tTST  : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
+                 "tst", " $lhs, $rhs",
+                 [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
 
 // zero-extend byte
-def tUXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                "uxtb $dst, $src",
-                [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
-                Requires<[IsThumb, HasV6]>;
+def tUXTB  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                  "uxtb", " $dst, $src",
+                  [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
+                  Requires<[IsThumb1Only, HasV6]>;
 
 // zero-extend short
-def tUXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
-                "uxth $dst, $src",
-                [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
-                Requires<[IsThumb, HasV6]>;
+def tUXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
+                  "uxth", " $dst, $src",
+                  [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
+                  Requires<[IsThumb1Only, HasV6]>;
 
 
 // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
 // Expanded by the scheduler into a branch sequence.
 let usesCustomDAGSchedInserter = 1 in  // Expanded by the scheduler.
-  def tMOVCCr :
+  def tMOVCCr_pseudo :
   PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
-              "@ tMOVCCr $cc",
-              [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+              NoItinerary, "@ tMOVCCr $cc",
+             [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+
+// 16-bit movcc in IT blocks for Thumb2.
+def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
+                    "mov", " $dst, $rhs", []>;
+
+def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
+                    "mov", " $dst, $rhs", []>;
 
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
-def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label),
-                    !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
-                                          "${:private}PCRELL${:uid}+4))\n"),
-                               !strconcat("\tmov $dst, #PCRELV${:uid}\n",
-                                  "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
-                    []>;
-
-def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id),
-          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
-                                         "${:private}PCRELL${:uid}+4))\n"),
-                     !strconcat("\tmov $dst, #PCRELV${:uid}\n",
-                                "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
-                    []>;
+def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
+                    "adr$p $dst, #$label", []>;
+
+def tLEApcrelJT : T1I<(outs tGPR:$dst),
+                      (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                      IIC_iALUi, "adr$p $dst, #${label}_${id}", []>;
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
@@ -598,7 +642,7 @@ def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id),
 // __aeabi_read_tp preserves the registers r1-r3.
 let isCall = 1,
   Defs = [R0, LR] in {
-  def tTPsoft  : TIx2<(outs), (ins),
+  def tTPsoft  : TIx2<(outs), (ins), IIC_Br,
                "bl __aeabi_read_tp",
                [(set R0, ARMthread_pointer)]>;
 }
@@ -607,20 +651,46 @@ let isCall = 1,
 // Non-Instruction Patterns
 //
 
+// Add with carry
+def : T1Pat<(addc   tGPR:$lhs, imm0_7:$rhs),
+            (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, imm8_255:$rhs),
+            (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, tGPR:$rhs),
+            (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc   tGPR:$lhs, imm0_7_neg:$rhs),
+            (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, imm8_255_neg:$rhs),
+            (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc   tGPR:$lhs, tGPR:$rhs),
+            (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
 // ConstantPool, GlobalAddress
-def : TPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
-def : TPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+def : T1Pat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
 
 // JumpTable
-def : TPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-           (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (tLEApcrelJT tjumptable:$dst, imm:$id)>;
 
 // Direct calls
-def : TPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
-def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+      Requires<[IsThumb, IsNotDarwin]>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
+      Requires<[IsThumb, IsDarwin]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
+      Requires<[IsThumb, HasV5T, IsDarwin]>;
 
 // Indirect calls to ARM routines
-def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
+      Requires<[IsThumb, HasV5T, IsDarwin]>;
 
 // zextload i1 -> zextload i8
 def : T1Pat<(zextloadi1 t_addrmode_s1:$addr),
@@ -631,6 +701,20 @@ def : T1Pat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
 def : T1Pat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
 def : T1Pat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
 
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_s1:$addr),
+            (tSXTB (tLDRB t_addrmode_s1:$addr))>,
+      Requires<[IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_s2:$addr),
+            (tSXTH (tLDRH t_addrmode_s2:$addr))>,
+      Requires<[IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_s1:$addr),
+            (tASRri (tLSLri (tLDRB t_addrmode_s1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_s1:$addr),
+            (tASRri (tLSLri (tLDRH t_addrmode_s1:$addr), 16), 16)>;
+
 // Large immediate handling.
 
 // Two piece imms.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 50345a68fddd..0750dcc7fdc4 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -11,6 +11,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+// IT block predicate field
+def it_pred : Operand<i32> {
+  let PrintMethod = "printPredicateOperand";
+}
+
+// IT block condition mask
+def it_mask : Operand<i32> {
+  let PrintMethod = "printThumbITMask";
+}
+
+// Table branch address
+def tb_addrmode : Operand<i32> {
+  let PrintMethod = "printTBAddrMode";
+}
+
 // Shifted operands. No register controlled shifts for Thumb2.
 // Note: We do not support rrx shifted operands yet.
 def t2_so_reg : Operand<i32>,    // reg imm
@@ -20,23 +35,14 @@ def t2_so_reg : Operand<i32>,    // reg imm
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
-// t2_so_imm_XFORM - Return a t2_so_imm value packed into the format 
-// described for t2_so_imm def below.
-def t2_so_imm_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(
-        ARM_AM::getT2SOImmVal(N->getZExtValue()), MVT::i32);
-}]>;
-
 // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
 def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
-    return CurDAG->getTargetConstant(
-        ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())), MVT::i32);
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
 }]>;
 
 // t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
 def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
-    return CurDAG->getTargetConstant(
-        ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())), MVT::i32);
+  return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
 }]>;
 
 // t2_so_imm - Match a 32-bit immediate operand, which is an
@@ -47,27 +53,21 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
 def t2_so_imm : Operand<i32>,
                 PatLeaf<(imm), [{
-       return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
-     }], t2_so_imm_XFORM> {
-  let PrintMethod = "printT2SOImmOperand";
-}
+  return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; 
+}]>;
 
 // t2_so_imm_not - Match an immediate that is a complement 
 // of a t2_so_imm.
 def t2_so_imm_not : Operand<i32>,
                     PatLeaf<(imm), [{
-       return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
-     }], t2_so_imm_not_XFORM> {
-  let PrintMethod = "printT2SOImmOperand";
-}
+  return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM>;
 
 // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
 def t2_so_imm_neg : Operand<i32>,
                     PatLeaf<(imm), [{
-       return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1;
-     }], t2_so_imm_neg_XFORM> {
-  let PrintMethod = "printT2SOImmOperand";
-}
+  return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1;
+}], t2_so_imm_neg_XFORM>;
 
 /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
 def imm1_31 : PatLeaf<(i32 imm), [{
@@ -75,7 +75,8 @@ def imm1_31 : PatLeaf<(i32 imm), [{
 }]>;
 
 /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095 : PatLeaf<(i32 imm), [{
+def imm0_4095 : Operand<i32>,
+                PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 4096;
 }]>;
 
@@ -83,48 +84,9 @@ def imm0_4095_neg : PatLeaf<(i32 imm), [{
  return (uint32_t)(-N->getZExtValue()) < 4096; 
 }], imm_neg_XFORM>; 
 
-/// imm0_65535 predicate - True if the 32-bit immediate is in the range 
-/// [0.65535].
-def imm0_65535 : PatLeaf<(i32 imm), [{
-  return (uint32_t)N->getZExtValue() < 65536;
-}]>;
-
-/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
-/// e.g., 0xf000ffff
-def bf_inv_mask_imm : Operand<i32>,
-                      PatLeaf<(imm), [{ 
-  uint32_t v = (uint32_t)N->getZExtValue();
-  if (v == 0xffffffff)
-    return 0;
-  // naive checker. should do better, but simple is best for now since it's
-  // more likely to be correct.
-  while (v & 1) v >>= 1;    // shift off the leading 1's
-  if (v)
-    {
-      while (!(v & 1)) v >>=1;  // shift off the mask
-      while (v & 1) v >>= 1;    // shift off the trailing 1's
-    }
-  // if this is a mask for clearing a bitfield, what's left should be zero.
-  return (v == 0);
-}] > {
-  let PrintMethod = "printBitfieldInvMaskImmOperand";
-}
-
-/// Split a 32-bit immediate into two 16 bit parts.
-def t2_lo16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
-                                   MVT::i32);
-}]>;
-
-def t2_hi16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
-}]>;
-
-def t2_lo16AllZero : PatLeaf<(i32 imm), [{
-  // Returns true if all low 16-bits are 0.
-  return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
-  }], t2_hi16>;
-
+def imm0_255_neg : PatLeaf<(i32 imm), [{
+  return (uint32_t)(-N->getZExtValue()) < 255;
+}], imm_neg_XFORM>; 
 
 // Define Thumb2 specific addressing modes.
 
@@ -147,14 +109,14 @@ def t2am_imm8_offset : Operand<i32>,
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
 }
 
-// t2addrmode_imm8s4  := reg + (imm8 << 2)
+// t2addrmode_imm8s4  := reg +/- (imm8 << 2)
 def t2addrmode_imm8s4 : Operand<i32>,
                         ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> {
-  let PrintMethod = "printT2AddrModeImm8Operand";
+  let PrintMethod = "printT2AddrModeImm8s4Operand";
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
-// t2addrmode_so_reg  := reg + reg << imm2
+// t2addrmode_so_reg  := reg + (reg << imm2)
 def t2addrmode_so_reg : Operand<i32>,
                         ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
   let PrintMethod = "printT2AddrModeSoRegOperand";
@@ -171,52 +133,58 @@ def t2addrmode_so_reg : Operand<i32>,
 /// changed to modify CPSR.
 multiclass T2I_un_irs<string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0>{
    // shifted imm
-   def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src),
+   def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
                 opc, " $dst, $src",
                 [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
      let isAsCheapAsAMove = Cheap;
      let isReMaterializable = ReMat;
    }
    // register
-   def r : T2I<(outs GPR:$dst), (ins GPR:$src),
-               opc, " $dst, $src",
+   def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+               opc, ".w $dst, $src",
                 [(set GPR:$dst, (opnode GPR:$src))]>;
    // shifted register
-   def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src),
-               opc, " $dst, $src",
+   def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+               opc, ".w $dst, $src",
                [(set GPR:$dst, (opnode t2_so_reg:$src))]>;
 }
 
 /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
 //  binary operation that produces a value. These are predicable and can be
 /// changed to modify CPSR.
-multiclass T2I_bin_irs<string opc, PatFrag opnode, bit Commutable = 0> {
+multiclass T2I_bin_irs<string opc, PatFrag opnode, 
+                       bit Commutable = 0, string wide =""> {
    // shifted imm
-   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
    // register
-   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                 opc, !strconcat(wide, " $dst, $lhs, $rhs"),
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
      let isCommutable = Commutable;
    }
    // shifted register
-   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+                 opc, !strconcat(wide, " $dst, $lhs, $rhs"),
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+//  the ".w" prefix to indicate that they are wide.
+multiclass T2I_bin_w_irs<string opc, PatFrag opnode, bit Commutable = 0> :
+    T2I_bin_irs<opc, opnode, Commutable, ".w">;
+
 /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
 /// reversed. It doesn't define the 'rr' form since it's handled by its
 /// T2I_bin_irs counterpart.
 multiclass T2I_rbin_is<string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                opc, " $dst, $rhs, $lhs",
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+                opc, ".w $dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
                 opc, " $dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
@@ -226,18 +194,18 @@ multiclass T2I_rbin_is<string opc, PatFrag opnode> {
 let Defs = [CPSR] in {
 multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+                !strconcat(opc, "s"), ".w $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
    // register
-   def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
+   def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                !strconcat(opc, "s"), ".w $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
      let isCommutable = Commutable;
    }
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+                !strconcat(opc, "s"), ".w $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 }
@@ -246,22 +214,22 @@ multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> {
 /// patterns for a binary operation that produces a value.
 multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
    // 12-bit imm
-   def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+   def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
                    !strconcat(opc, "w"), " $dst, $lhs, $rhs",
                    [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
    // register
-   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
      let isCommutable = Commutable;
    }
    // shifted register
-   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
@@ -271,41 +239,41 @@ multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
 let Uses = [CPSR] in {
 multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
                  Requires<[IsThumb2, CarryDefIsUnused]>;
    // register
-   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
                  Requires<[IsThumb2, CarryDefIsUnused]> {
      let isCommutable = Commutable;
    }
    // shifted register
-   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
                  Requires<[IsThumb2, CarryDefIsUnused]>;
    // Carry setting variants
    // shifted imm
-   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
                   Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                   }
    // register
-   def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+   def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  !strconcat(opc, "s.w $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
                   Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                     let isCommutable = Commutable;
    }
    // shifted register
-   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+                  !strconcat(opc, "s.w $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
                   Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
@@ -313,49 +281,17 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
 }
 }
 
-/// T2I_rsc_is - Same as T2I_adde_sube_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_adde_sube_irs counterpart.
-let Defs = [CPSR], Uses = [CPSR] in {
-multiclass T2I_rsc_is<string opc, PatFrag opnode> {
-   // shifted imm
-   def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                 opc, " $dst, $rhs, $lhs",
-                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUnused]>;
-   // shifted register
-   def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
-                 opc, " $dst, $rhs, $lhs",
-                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUnused]>;
-   // shifted imm
-   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                 !strconcat(opc, "s $dst, $rhs, $lhs"),
-                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUsed]> {
-                   let Defs = [CPSR];
-   }
-   // shifted register
-   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
-                 !strconcat(opc, "s $dst, $rhs, $lhs"),
-                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUsed]> {
-                   let Defs = [CPSR];
-   }
-}
-}
-
-/// T2I_rbin_s_is - Same as T2I_bin_s_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_bin_s_irs counterpart.
+/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
 let Defs = [CPSR] in {
 multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
-                 !strconcat(opc, "${s} $dst, $rhs, $lhs"),
+                 IIC_iALUi,
+                 !strconcat(opc, "${s}.w $dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
    def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
+                 IIC_iALUsi,
                  !strconcat(opc, "${s} $dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
@@ -365,96 +301,96 @@ multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
 //  rotate operation that produces a value.
 multiclass T2I_sh_ir<string opc, PatFrag opnode> {
    // 5-bit imm
-   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>;
    // register
-   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                 opc, " $dst, $lhs, $rhs",
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr,
+                 opc, ".w $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>;
 }
 
-/// T21_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// T2I_cmp_is - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
 /// patterns. Similar to T2I_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
-let Uses = [CPSR] in {
+let Defs = [CPSR] in {
 multiclass T2I_cmp_is<string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs),
-                opc, " $lhs, $rhs",
+   def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
+                opc, ".w $lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_imm:$rhs)]>;
    // register
-   def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs),
-                opc, " $lhs, $rhs",
+   def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+                opc, ".w $lhs, $rhs",
                 [(opnode GPR:$lhs, GPR:$rhs)]>;
    // shifted register
-   def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs),
-                opc, " $lhs, $rhs",
+   def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi,
+                opc, ".w $lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_reg:$rhs)]>;
 }
 }
 
 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
 multiclass T2I_ld<string opc, PatFrag opnode> {
-  def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr),
-                   opc, " $dst, $addr",
+  def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi,
+                   opc, ".w $dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>;
-  def i8  : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr),
+  def i8  : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi,
                    opc, " $dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>;
-  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr),
-                   opc, " $dst, $addr",
+  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr,
+                   opc, ".w $dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>;
-  def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr),
-                   opc, " $dst, $addr",
+  def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
+                   opc, ".w $dst, $addr",
                    [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>;
 }
 
 /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
 multiclass T2I_st<string opc, PatFrag opnode> {
-  def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr),
-                   opc, " $src, $addr",
+  def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei,
+                   opc, ".w $src, $addr",
                    [(opnode GPR:$src, t2addrmode_imm12:$addr)]>;
-  def i8  : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr),
+  def i8  : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei,
                    opc, " $src, $addr",
                    [(opnode GPR:$src, t2addrmode_imm8:$addr)]>;
-  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr),
-                   opc, " $src, $addr",
+  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer,
+                   opc, ".w $src, $addr",
                    [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>;
 }
 
 /// T2I_picld - Defines the PIC load pattern.
 class T2I_picld<string opc, PatFrag opnode> :
-      T2I<(outs GPR:$dst), (ins addrmodepc:$addr),
-          !strconcat("${addr:label}:\n\t", opc), " $dst, $addr",
+      T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi,
+          !strconcat("\n${addr:label}:\n\t", opc), " $dst, $addr",
           [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
 
 /// T2I_picst - Defines the PIC store pattern.
 class T2I_picst<string opc, PatFrag opnode> :
-      T2I<(outs), (ins GPR:$src, addrmodepc:$addr),
-          !strconcat("${addr:label}:\n\t", opc), " $src, $addr",
+      T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer,
+          !strconcat("\n${addr:label}:\n\t", opc), " $src, $addr",
           [(opnode GPR:$src, addrmodepc:$addr)]>;
 
 
 /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass T2I_unary_rrot<string opc, PatFrag opnode> {
-  def r     : T2I<(outs GPR:$dst), (ins GPR:$Src),
-                  opc, " $dst, $Src",
-                 [(set GPR:$dst, (opnode GPR:$Src))]>;
-  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
-                  opc, " $dst, $Src, ror $rot",
-                 [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>;
+  def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                  opc, ".w $dst, $src",
+                 [(set GPR:$dst, (opnode GPR:$src))]>;
+  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+                  opc, ".w $dst, $src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>;
 }
 
 /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass T2I_bin_rrot<string opc, PatFrag opnode> {
-  def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+  def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
                   opc, " $dst, $LHS, $RHS",
                   [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>;
   def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
-                  opc, " $dst, $LHS, $RHS, ror $rot",
+                  IIC_iALUsr, opc, " $dst, $LHS, $RHS, ror $rot",
                   [(set GPR:$dst, (opnode GPR:$LHS,
                                           (rotr GPR:$RHS, rot_imm:$rot)))]>;
 }
@@ -467,42 +403,46 @@ multiclass T2I_bin_rrot<string opc, PatFrag opnode> {
 //  Miscellaneous Instructions.
 //
 
-let isNotDuplicable = 1 in
-def t2PICADD : T2XI<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
-                    "$cp:\n\tadd $dst, pc",
-                    [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
-
-
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
-def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p),
-                   !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
-                                         "${:private}PCRELL${:uid}+8))\n"),
-                              !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                         "add$p $dst, pc, #PCRELV${:uid}")),
-                   []>;
+def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
+                      "adr$p.w $dst, #$label", []>;
 
 def t2LEApcrelJT : T2XI<(outs GPR:$dst),
-                       (ins i32imm:$label, i32imm:$id, pred:$p),
-          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
-                                         "${:private}PCRELL${:uid}+8))\n"),
-                              !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                         "add$p $dst, pc, #PCRELV${:uid}")),
-                   []>;
-
-// ADD rd, sp, #so_imm
-def t2ADDrSPi : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                     "add $dst, $sp, $imm",
-                     []>;
-
-// ADD rd, sp, #imm12
-def t2ADDrSPi12 : T2XI<(outs GPR:$dst), (ins GPR:$sp, i32imm:$imm),
-                       "addw $dst, $sp, $imm",
-                       []>;
-
-def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
-                     "addw $dst, $sp, $rhs",
-                     []>;
+                        (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
+                        "adr$p.w $dst, #${label}_${id}", []>;
+
+// ADD r, sp, {so_imm|i12}
+def t2ADDrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+                        IIC_iALUi, "add", ".w $dst, $sp, $imm", []>;
+def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), 
+                       IIC_iALUi, "addw", " $dst, $sp, $imm", []>;
+
+// ADD r, sp, so_reg
+def t2ADDrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+                        IIC_iALUsi, "add", ".w $dst, $sp, $rhs", []>;
+
+// SUB r, sp, {so_imm|i12}
+def t2SUBrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+                        IIC_iALUi, "sub", ".w $dst, $sp, $imm", []>;
+def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
+                       IIC_iALUi, "subw", " $dst, $sp, $imm", []>;
+
+// SUB r, sp, so_reg
+def t2SUBrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+                       IIC_iALUsi,
+                       "sub", " $dst, $sp, $rhs", []>;
+
+
+// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def t2SUBrSPi_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+                   NoItinerary, "@ sub.w $dst, $sp, $imm", []>;
+def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
+                   NoItinerary, "@ subw $dst, $sp, $imm", []>;
+def t2SUBrSPs_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+                   NoItinerary, "@ sub $dst, $sp, $rhs", []>;
+} // usesCustomDAGSchedInserter
 
 
 //===----------------------------------------------------------------------===//
@@ -521,12 +461,14 @@ defm t2LDRB  : T2I_ld<"ldrb", UnOpFrag<(zextloadi8  node:$Src)>>;
 defm t2LDRSH : T2I_ld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
 defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8  node:$Src)>>;
 
-let mayLoad = 1 in {
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
-def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst), (ins t2addrmode_imm8s4:$addr),
-                       "ldrd", " $dst, $addr", []>;
-def t2LDRDpci : T2Ii8s4<(outs GPR:$dst), (ins i32imm:$addr),
-                       "ldrd", " $dst, $addr", []>;
+def t2LDRDi8  : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2),
+                        (ins t2addrmode_imm8s4:$addr),
+                        IIC_iLoadi, "ldrd", " $dst1, $addr", []>;
+def t2LDRDpci : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2),
+                        (ins i32imm:$addr), IIC_iLoadi,
+                       "ldrd", " $dst1, $addr", []>;
 }
 
 // zextload i1 -> zextload i8
@@ -573,57 +515,57 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
 let mayLoad = 1 in {
 def t2LDR_PRE  : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
                             "ldr", " $dst, $addr!", "$addr.base = $base_wb",
                             []>;
 
 def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
                            "ldr", " $dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
                             "ldrb", " $dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
                           "ldrb", " $dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
                             "ldrh", " $dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
                           "ldrh", " $dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
                             "ldrsb", " $dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
                          "ldrsb", " $dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
                             "ldrsh", " $dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
                          "ldrsh", " $dst, [$base], $offset", "$base = $base_wb",
                             []>;
 }
@@ -634,108 +576,95 @@ defm t2STRB  : T2I_st<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
 defm t2STRH  : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 // Store doubleword
-let mayLoad = 1 in
-def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src, t2addrmode_imm8s4:$addr),
-                        "strd", " $src, $addr", []>;
+let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<(outs),
+                       (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
+               IIC_iStorer, "strd", " $src1, $addr", []>;
 
 // Indexed stores
 def t2STR_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
                           "str", " $src, [$base, $offset]!", "$base = $base_wb",
              [(set GPR:$base_wb,
                    (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
                            "str", " $src, [$base], $offset", "$base = $base_wb",
              [(set GPR:$base_wb,
                    (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRH_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
                          "strh", " $src, [$base, $offset]!", "$base = $base_wb",
         [(set GPR:$base_wb,
               (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
                           "strh", " $src, [$base], $offset", "$base = $base_wb",
        [(set GPR:$base_wb,
              (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRB_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePre,
+                            AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
                          "strb", " $src, [$base, $offset]!", "$base = $base_wb",
          [(set GPR:$base_wb,
                (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost,
+                            AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
                           "strb", " $src, [$base], $offset", "$base = $base_wb",
         [(set GPR:$base_wb,
               (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 
-// Address computation and loads and stores in PIC mode.
-let isNotDuplicable = 1, AddedComplexity = 10 in {
-let canFoldAsLoad = 1 in
-def t2PICLDR   : T2I_picld<"ldr",  UnOpFrag<(load node:$Src)>>;
-
-def t2PICLDRH  : T2I_picld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
-def t2PICLDRB  : T2I_picld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
-def t2PICLDRSH : T2I_picld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
-def t2PICLDRSB : T2I_picld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
-
-def t2PICSTR   : T2I_picst<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
-def t2PICSTRH  : T2I_picst<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
-def t2PICSTRB  : T2I_picst<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-} // isNotDuplicable = 1, AddedComplexity = 10
-
+// FIXME: ldrd / strd pre / post variants
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def t2LDM : T2XI<(outs),
-                 (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
-                 "ldm${p}${addr:submode} $addr, $dst1", []>;
+                 (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+              IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide} $addr, $wb", []>;
 
-let mayStore = 1 in
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STM : T2XI<(outs),
-                 (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
-                 "stm${p}${addr:submode} $addr, $src1", []>;
+                 (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+              IIC_iStorem, "stm${addr:submode}${p}${addr:wide} $addr, $wb", []>;
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
 
 let neverHasSideEffects = 1 in
-def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src),
-                   "mov", " $dst, $src", []>;
+def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+                   "mov", ".w $dst, $src", []>;
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src),
-                   "mov", " $dst, $src",
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
+def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+                   "mov", ".w $dst, $src",
                    [(set GPR:$dst, t2_so_imm:$src)]>;
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src),
+def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
                    "movw", " $dst, $src",
                    [(set GPR:$dst, imm0_65535:$src)]>;
 
-// FIXME: Also available in ARM mode.
 let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
-                     "movt", " $dst, $imm",
-                     [(set GPR:$dst,
-                           (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
+def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
+                    "movt", " $dst, $imm",
+                    [(set GPR:$dst,
+                          (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>;
 
 //===----------------------------------------------------------------------===//
 //  Extend Instructions.
@@ -785,12 +714,14 @@ defm t2SUBS : T2I_bin_s_irs <"sub",  BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 defm t2ADC  : T2I_adde_sube_irs<"adc",BinOpFrag<(adde node:$LHS, node:$RHS)>,1>;
 defm t2SBC  : T2I_adde_sube_irs<"sbc",BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
-// RSB, RSC
+// RSB
 defm t2RSB  : T2I_rbin_is   <"rsb", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 defm t2RSBS : T2I_rbin_s_is <"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
-defm t2RSC  : T2I_rsc_is    <"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+let AddedComplexity = 1 in
+def : T2Pat<(add       GPR:$src, imm0_255_neg:$imm),
+            (t2SUBri   GPR:$src, imm0_255_neg:$imm)>;
 def : T2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
             (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
 def : T2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
@@ -806,105 +737,250 @@ defm t2LSR  : T2I_sh_ir<"lsr", BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
 defm t2ASR  : T2I_sh_ir<"asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
 defm t2ROR  : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
 
-def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src),
-                   "mov", " $dst, $src, rrx",
+let Uses = [CPSR] in {
+def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                   "rrx", " $dst, $src",
                    [(set GPR:$dst, (ARMrrx GPR:$src))]>;
+}
+
+let Defs = [CPSR] in {
+def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                         "lsrs.w $dst, $src, #1",
+                         [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>;
+def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                         "asrs.w $dst, $src, #1",
+                         [(set GPR:$dst, (ARMsra_flag GPR:$src))]>;
+}
 
 //===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
 //
 
-defm t2AND  : T2I_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
-defm t2ORR  : T2I_bin_irs<"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
-defm t2EOR  : T2I_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+defm t2AND  : T2I_bin_w_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR  : T2I_bin_w_irs<"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+defm t2EOR  : T2I_bin_w_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 
-defm t2BIC  : T2I_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2BIC  : T2I_bin_w_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-def : T2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
-            (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+let Constraints = "$src = $dst" in
+def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+                IIC_iALUi, "bfc", " $dst, $imm",
+                [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>;
 
-defm t2ORN  : T2I_bin_irs<"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
+def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+                 IIC_iALUi, "sbfx", " $dst, $src, $lsb, $width", []>;
 
-def : T2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
-            (t2ORNri GPR:$src, t2_so_imm_not:$imm)>;
+def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+                 IIC_iALUi, "ubfx", " $dst, $src, $lsb, $width", []>;
+
+// FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
+
+defm t2ORN  : T2I_bin_irs<"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
 
 // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
 let AddedComplexity = 1 in
 defm t2MVN  : T2I_un_irs  <"mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
 
-def : T2Pat<(t2_so_imm_not:$src),
-            (t2MVNi t2_so_imm_not:$src)>;
 
-// A8.6.17  BFC - Bitfield clear
-// FIXME: Also available in ARM mode.
-let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
-                "bfc", " $dst, $imm",
-                [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>;
+def : T2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
+            (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
 
-// FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
+            (t2ORNri GPR:$src, t2_so_imm_not:$imm)>,
+            Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+            (t2MVNi t2_so_imm_not:$src)>;
 
 //===----------------------------------------------------------------------===//
 //  Multiply Instructions.
 //
 let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
                 "mul", " $dst, $a, $b",
                 [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
 
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
 		"mla", " $dst, $a, $b, $c",
 		[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
 		"mls", " $dst, $a, $b, $c",
                 [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
 
-// FIXME: SMULL, etc.
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+                   "smull", " $ldst, $hdst, $a, $b", []>;
+
+def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+                   "umull", " $ldst, $hdst, $a, $b", []>;
+}
+
+// Multiply + accumulate
+def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+                  "smlal", " $ldst, $hdst, $a, $b", []>;
+
+def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+                  "umlal", " $ldst, $hdst, $a, $b", []>;
+
+def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+                  "umaal", " $ldst, $hdst, $a, $b", []>;
+} // neverHasSideEffects
+
+// Most significant word multiply
+def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+                  "smmul", " $dst, $a, $b",
+                  [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
+
+def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+                  "smmla", " $dst, $a, $b, $c",
+                  [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
+
+
+def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+                   "smmls", " $dst, $a, $b, $c",
+                   [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+  def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+              !strconcat(opc, "bb"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+                                      (sext_inreg GPR:$b, i16)))]>;
+
+  def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+              !strconcat(opc, "bt"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+                                      (sra GPR:$b, (i32 16))))]>;
+
+  def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+              !strconcat(opc, "tb"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
+                                      (sext_inreg GPR:$b, i16)))]>;
+
+  def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+              !strconcat(opc, "tt"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
+                                      (sra GPR:$b, (i32 16))))]>;
+
+  def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+              !strconcat(opc, "wb"), " $dst, $a, $b",
+              [(set GPR:$dst, (sra (opnode GPR:$a,
+                                    (sext_inreg GPR:$b, i16)), (i32 16)))]>;
+
+  def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+              !strconcat(opc, "wt"), " $dst, $a, $b",
+              [(set GPR:$dst, (sra (opnode GPR:$a,
+                                    (sra GPR:$b, (i32 16))), (i32 16)))]>;
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+  def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+              !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc,
+                               (opnode (sext_inreg GPR:$a, i16),
+                                       (sext_inreg GPR:$b, i16))))]>;
+
+  def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+             !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+             [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+                                                    (sra GPR:$b, (i32 16)))))]>;
+
+  def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+              !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
+                                                 (sext_inreg GPR:$b, i16))))]>;
+
+  def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+              !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+             [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
+                                                    (sra GPR:$b, (i32 16)))))]>;
+
+  def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+              !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                       (sext_inreg GPR:$b, i16)), (i32 16))))]>;
+
+  def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+              !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                         (sra GPR:$b, (i32 16))), (i32 16))))]>;
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+
 
 //===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
 //
 
-def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src),
+def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                 "clz", " $dst, $src",
                 [(set GPR:$dst, (ctlz GPR:$src))]>;
 
-def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "rev", " $dst, $src",
+def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                "rev", ".w $dst, $src",
                 [(set GPR:$dst, (bswap GPR:$src))]>;
 
-def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "rev16", " $dst, $src",
+def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                "rev16", ".w $dst, $src",
                 [(set GPR:$dst,
                     (or (and (srl GPR:$src, (i32 8)), 0xFF),
                         (or (and (shl GPR:$src, (i32 8)), 0xFF00),
                             (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
                                 (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
 
-/////
-/// A8.6.137  REVSH
-/////
-def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src),
-                 "revsh", " $dst, $src",
+def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                 "revsh", ".w $dst, $src",
                  [(set GPR:$dst,
                     (sext_inreg
-                      (or (srl (and GPR:$src, 0xFFFF), (i32 8)),
+                      (or (srl (and GPR:$src, 0xFF00), (i32 8)),
                           (shl GPR:$src, (i32 8))), i16))]>;
 
-// FIXME: PKHxx etc.
+def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+                  IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+                  [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+                                      (and (shl GPR:$src2, (i32 imm:$shamt)),
+                                           0xFFFF0000)))]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+            (t2PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+            (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+                  IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+                  [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+                                      (and (sra GPR:$src2, imm16_31:$shamt),
+                                           0xFFFF)))]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
+            (t2PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
+                     (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+            (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
 
 //===----------------------------------------------------------------------===//
 //  Comparison Instructions...
 //
 
-defm t2CMP   : T2I_cmp_is<"cmp",
-                          BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm t2CMP  : T2I_cmp_is<"cmp",
+                         BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
 defm t2CMPz : T2I_cmp_is<"cmp",
                          BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
 
-defm t2CMN   : T2I_cmp_is<"cmn",
-                          BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMN  : T2I_cmp_is<"cmn",
+                         BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
 defm t2CMNz : T2I_cmp_is<"cmn",
                          BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
@@ -923,45 +999,132 @@ defm t2TEQ  : T2I_cmp_is<"teq",
 // Short range conditional branch. Looks awesome for loops. Need to figure
 // out how to use this one.
 
-// FIXME: Conditional moves
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :( 
+def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
+                   "mov", ".w $dst, $true",
+      [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $dst">;
+
+def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true),
+                   IIC_iCMOVi, "mov", ".w $dst, $true",
+[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $dst">;
+
+def t2MOVCClsl : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
+                   IIC_iCMOVsi, "lsl", ".w $dst, $true, $rhs", []>,
+                   RegConstraint<"$false = $dst">;
+def t2MOVCClsr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
+                   IIC_iCMOVsi, "lsr", ".w $dst, $true, $rhs", []>,
+                   RegConstraint<"$false = $dst">;
+def t2MOVCCasr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
+                   IIC_iCMOVsi, "asr", ".w $dst, $true, $rhs", []>,
+                   RegConstraint<"$false = $dst">;
+def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
+                   IIC_iCMOVsi, "ror", ".w $dst, $true, $rhs", []>,
+                   RegConstraint<"$false = $dst">;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+  Defs = [R0, R12, LR, CPSR] in {
+  def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
+                     "bl __aeabi_read_tp",
+                     [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everthing out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+let Defs = 
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
+    D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
+    D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+    D31 ] in {
+  def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src),
+                               AddrModeNone, SizeSpecial, NoItinerary,
+                               "str.w sp, [$src, #+8] @ eh_setjmp begin\n"
+                               "\tadr r12, 0f\n"
+                               "\torr r12, #1\n"
+                               "\tstr.w r12, [$src, #+4]\n"
+                               "\tmovs r0, #0\n"
+                               "\tb 1f\n"
+                               "0:\tmovs r0, #1 @ eh_setjmp end\n"
+                               "1:", "",
+                               [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
+
+
 
 //===----------------------------------------------------------------------===//
 // Control-Flow Instructions
 //
 
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
+// operand list.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
+  def t2LDM_RET : T2XI<(outs),
+                    (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+                    IIC_Br, "ldm${addr:submode}${p}${addr:wide} $addr, $wb",
+                    []>;
+
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 let isPredicable = 1 in
-def t2B   : T2XI<(outs), (ins brtarget:$target),
-                 "b $target",
+def t2B   : T2XI<(outs), (ins brtarget:$target), IIC_Br,
+                 "b.w $target",
                  [(br bb:$target)]>;
 
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
-def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
-                     "mov pc, $target \n$jt",
-                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+def t2BR_JT :
+    T2JTI<(outs),
+          (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
+           IIC_Br, "mov pc, $target\n$jt",
+          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
 
-def t2BR_JTm : 
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB :
     T2JTI<(outs),
-          (ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id),
-          "ldr pc, $target \n$jt",
-          [(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt,
-             imm:$id)]>;
+        (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
+         IIC_Br, "tbb $index\n$jt", []>;
 
-def t2BR_JTadd : 
+def t2TBH :
     T2JTI<(outs),
-          (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
-          "add pc, $target, $idx \n$jt",
-          [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>;
-} // isNotDuplicate, isIndirectBranch
+        (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
+         IIC_Br, "tbh $index\n$jt", []>;
+} // isNotDuplicable, isIndirectBranch
+
 } // isBranch, isTerminator, isBarrier
 
 // FIXME: should be able to write a pattern for ARMBrcond, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
-def t2Bcc : T2I<(outs), (ins brtarget:$target), 
-                "b", " $target",
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+                "b", ".w $target",
                 [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
+
+// IT block
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+                    AddrModeNone, Size2Bytes,  IIC_iALUx,
+                    "it$mask $cc", "", []>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
@@ -972,7 +1135,10 @@ def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
 def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
             (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
 
-// Large immediate handling.
-
-def : T2Pat<(i32 imm:$src),
-            (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)), (t2_hi16 imm:$src))>;
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable. Remove
+// when we can do generalized remat.
+let isReMaterializable = 1 in
+def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+                     "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                     [(set GPR:$dst, (i32 imm:$src))]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 9104c77115f0..56336d131abc 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -36,57 +36,57 @@ def arm_fmdrr  : SDNode<"ARMISD::FMDRR",  SDT_FMDRR>;
 
 let canFoldAsLoad = 1 in {
 def FLDD  : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
-                 "fldd", " $dst, $addr",
+                 IIC_fpLoad64, "fldd", " $dst, $addr",
                  [(set DPR:$dst, (load addrmode5:$addr))]>;
 
 def FLDS  : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
-                 "flds", " $dst, $addr",
+                 IIC_fpLoad32, "flds", " $dst, $addr",
                  [(set SPR:$dst, (load addrmode5:$addr))]>;
 } // canFoldAsLoad
 
 def FSTD  : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
-                 "fstd", " $src, $addr",
+                 IIC_fpStore64, "fstd", " $src, $addr",
                  [(store DPR:$src, addrmode5:$addr)]>;
 
 def FSTS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
-                 "fsts", " $src, $addr",
+                 IIC_fpStore32, "fsts", " $src, $addr",
                  [(store SPR:$src, addrmode5:$addr)]>;
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1 in {
-def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
-                           variable_ops),
-                  "fldm${addr:submode}d${p} ${addr:base}, $dst1",
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+                           variable_ops), IIC_fpLoadm,
+                  "fldm${addr:submode}d${p} ${addr:base}, $wb",
                   []> {
   let Inst{20} = 1;
 }
 
-def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
-                           variable_ops),
-                  "fldm${addr:submode}s${p} ${addr:base}, $dst1",
+def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+                           variable_ops), IIC_fpLoadm, 
+                  "fldm${addr:submode}s${p} ${addr:base}, $wb",
                   []> {
   let Inst{20} = 1;
 }
-}
+} // mayLoad, hasExtraDefRegAllocReq
 
-let mayStore = 1 in {
-def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
-                           variable_ops),
-                 "fstm${addr:submode}d${p} ${addr:base}, $src1",
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+                           variable_ops), IIC_fpStorem,
+                 "fstm${addr:submode}d${p} ${addr:base}, $wb",
                  []> {
   let Inst{20} = 0;
 }
 
-def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
-                           variable_ops),
-                 "fstm${addr:submode}s${p} ${addr:base}, $src1",
+def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+                           variable_ops), IIC_fpStorem,
+                 "fstm${addr:submode}s${p} ${addr:base}, $wb",
                  []> {
   let Inst{20} = 0;
 }
-} // mayStore
+} // mayStore, hasExtraSrcRegAllocReq
 
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
@@ -95,46 +95,48 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
 //
 
 def FADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 "faddd", " $dst, $a, $b",
+                 IIC_fpALU64, "faddd", " $dst, $a, $b",
                  [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
 
-def FADDS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fadds", " $dst, $a, $b",
-                 [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+def FADDS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpALU32, "fadds", " $dst, $a, $b",
+                  [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
 
 // These are encoded as unary instructions.
+let Defs = [FPSCR] in {
 def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
-                 "fcmped", " $a, $b",
+                 IIC_fpCMP64, "fcmped", " $a, $b",
                  [(arm_cmpfp DPR:$a, DPR:$b)]>;
 
 def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
-                 "fcmpes", " $a, $b",
+                 IIC_fpCMP32, "fcmpes", " $a, $b",
                  [(arm_cmpfp SPR:$a, SPR:$b)]>;
+}
 
 def FDIVD  : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 "fdivd", " $dst, $a, $b",
+                 IIC_fpDIV64, "fdivd", " $dst, $a, $b",
                  [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
 
 def FDIVS  : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fdivs", " $dst, $a, $b",
+                 IIC_fpDIV32, "fdivs", " $dst, $a, $b",
                  [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
 
 def FMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 "fmuld", " $dst, $a, $b",
+                 IIC_fpMUL64, "fmuld", " $dst, $a, $b",
                  [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
 
-def FMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fmuls", " $dst, $a, $b",
-                 [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+def FMULS  : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpMUL32, "fmuls", " $dst, $a, $b",
+                  [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
                  
 def FNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                  "fnmuld", " $dst, $a, $b",
+                  IIC_fpMUL64, "fnmuld", " $dst, $a, $b",
                   [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
   let Inst{6} = 1;
 }
 
 def FNMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  "fnmuls", " $dst, $a, $b",
+                  IIC_fpMUL32, "fnmuls", " $dst, $a, $b",
                   [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
   let Inst{6} = 1;
 }
@@ -147,14 +149,14 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
 
 
 def FSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 "fsubd", " $dst, $a, $b",
+                 IIC_fpALU64, "fsubd", " $dst, $a, $b",
                  [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
   let Inst{6} = 1;
 }
 
-def FSUBS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fsubs", " $dst, $a, $b",
-                 [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
+def FSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpALU32, "fsubs", " $dst, $a, $b",
+                  [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
   let Inst{6} = 1;
 }
 
@@ -163,29 +165,31 @@ def FSUBS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
 //
 
 def FABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
-                 "fabsd", " $dst, $a",
+                 IIC_fpUNA64, "fabsd", " $dst, $a",
                  [(set DPR:$dst, (fabs DPR:$a))]>;
 
-def FABSS  : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                 "fabss", " $dst, $a",
-                 [(set SPR:$dst, (fabs SPR:$a))]>;
+def FABSS  : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+                  IIC_fpUNA32, "fabss", " $dst, $a",
+                  [(set SPR:$dst, (fabs SPR:$a))]>;
 
+let Defs = [FPSCR] in {
 def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
-                  "fcmpezd", " $a",
+                  IIC_fpCMP64, "fcmpezd", " $a",
                   [(arm_cmpfp0 DPR:$a)]>;
 
 def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
-                  "fcmpezs", " $a",
+                  IIC_fpCMP32, "fcmpezs", " $a",
                   [(arm_cmpfp0 SPR:$a)]>;
+}
 
 def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
-                 "fcvtds", " $dst, $a",
+                 IIC_fpCVTDS, "fcvtds", " $dst, $a",
                  [(set DPR:$dst, (fextend SPR:$a))]>;
 
 // Special case encoding: bits 11-8 is 0b1011.
-def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
-                 "fcvtsd", " $dst, $a",
-                 [(set SPR:$dst, (fround DPR:$a))]> {
+def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+                   IIC_fpCVTSD, "fcvtsd", " $dst, $a",
+                   [(set SPR:$dst, (fround DPR:$a))]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-16} = 0b110111;
   let Inst{11-8}  = 0b1011;
@@ -194,26 +198,26 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 
 let neverHasSideEffects = 1 in {
 def FCPYD  : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
-                 "fcpyd", " $dst, $a", []>;
+                 IIC_fpUNA64, "fcpyd", " $dst, $a", []>;
 
 def FCPYS  : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                 "fcpys", " $dst, $a", []>;
+                 IIC_fpUNA32, "fcpys", " $dst, $a", []>;
 } // neverHasSideEffects
 
 def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
-                 "fnegd", " $dst, $a",
+                 IIC_fpUNA64, "fnegd", " $dst, $a",
                  [(set DPR:$dst, (fneg DPR:$a))]>;
 
-def FNEGS  : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                 "fnegs", " $dst, $a",
-                 [(set SPR:$dst, (fneg SPR:$a))]>;
+def FNEGS  : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+                  IIC_fpUNA32, "fnegs", " $dst, $a",
+                  [(set SPR:$dst, (fneg SPR:$a))]>;
 
 def FSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
-                 "fsqrtd", " $dst, $a",
+                 IIC_fpSQRT64, "fsqrtd", " $dst, $a",
                  [(set DPR:$dst, (fsqrt DPR:$a))]>;
 
 def FSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                 "fsqrts", " $dst, $a",
+                 IIC_fpSQRT32, "fsqrts", " $dst, $a",
                  [(set SPR:$dst, (fsqrt SPR:$a))]>;
 
 //===----------------------------------------------------------------------===//
@@ -221,16 +225,16 @@ def FSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
 //
 
 def FMRS   : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 "fmrs", " $dst, $src",
+                 IIC_VMOVSI, "fmrs", " $dst, $src",
                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
 
 def FMSR   : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 "fmsr", " $dst, $src",
+                 IIC_VMOVIS, "fmsr", " $dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
 def FMRRD  : AVConv3I<0b11000101, 0b1011,
-                      (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src),
-                 "fmrrd", " $dst1, $dst2, $src",
+                      (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
+                 IIC_VMOVDI, "fmrrd", " $wb, $dst2, $src",
                  [/* FIXME: Can't write pattern for multiple result instr*/]>;
 
 // FMDHR: GPR -> SPR
@@ -238,7 +242,7 @@ def FMRRD  : AVConv3I<0b11000101, 0b1011,
 
 def FMDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                "fmdrr", " $dst, $src1, $src2",
+                IIC_VMOVID, "fmdrr", " $dst, $src1, $src2",
                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
 
 // FMRDH: SPR -> GPR
@@ -254,23 +258,23 @@ def FMDRR : AVConv5I<0b11000100, 0b1011,
 // Int to FP:
 
 def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
-                 "fsitod", " $dst, $a",
+                 IIC_fpCVTID, "fsitod", " $dst, $a",
                  [(set DPR:$dst, (arm_sitof SPR:$a))]> {
   let Inst{7} = 1;
 }
 
-def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
-                 "fsitos", " $dst, $a",
+def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+                 IIC_fpCVTIS, "fsitos", " $dst, $a",
                  [(set SPR:$dst, (arm_sitof SPR:$a))]> {
   let Inst{7} = 1;
 }
 
 def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
-                 "fuitod", " $dst, $a",
+                 IIC_fpCVTID, "fuitod", " $dst, $a",
                  [(set DPR:$dst, (arm_uitof SPR:$a))]>;
 
-def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
-                 "fuitos", " $dst, $a",
+def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+                 IIC_fpCVTIS, "fuitos", " $dst, $a",
                  [(set SPR:$dst, (arm_uitof SPR:$a))]>;
 
 // FP to Int:
@@ -278,28 +282,28 @@ def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
 
 def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
-                 "ftosizd", " $dst, $a",
+                 IIC_fpCVTDI, "ftosizd", " $dst, $a",
                  [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010,
-                       (outs SPR:$dst), (ins SPR:$a),
-                 "ftosizs", " $dst, $a",
+def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
+                        (outs SPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTSI, "ftosizs", " $dst, $a",
                  [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
 def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
-                 "ftouizd", " $dst, $a",
+                 IIC_fpCVTDI, "ftouizd", " $dst, $a",
                  [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010,
-                       (outs SPR:$dst), (ins SPR:$a),
-                 "ftouizs", " $dst, $a",
+def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
+                        (outs SPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTSI, "ftouizs", " $dst, $a",
                  [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
@@ -309,48 +313,53 @@ def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010,
 //
 
 def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                "fmacd", " $dst, $a, $b",
+                IIC_fpMAC64, "fmacd", " $dst, $a, $b",
                 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                "fmacs", " $dst, $a, $b",
-                [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
+def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                 IIC_fpMAC32, "fmacs", " $dst, $a, $b",
+                 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
 
 def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                "fmscd", " $dst, $a, $b",
+                IIC_fpMAC64, "fmscd", " $dst, $a, $b",
                 [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
 def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                "fmscs", " $dst, $a, $b",
+                IIC_fpMAC32, "fmscs", " $dst, $a, $b",
                 [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
 def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 "fnmacd", " $dst, $a, $b",
+                 IIC_fpMAC64, "fnmacd", " $dst, $a, $b",
              [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
-def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                "fnmacs", " $dst, $a, $b",
+def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                  IIC_fpMAC32, "fnmacs", " $dst, $a, $b",
              [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
+def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)),
+          (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
+          (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+
 def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 "fnmscd", " $dst, $a, $b",
+                 IIC_fpMAC64, "fnmscd", " $dst, $a, $b",
              [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
 def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                "fnmscs", " $dst, $a, $b",
+                IIC_fpMAC32, "fnmscs", " $dst, $a, $b",
              [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
@@ -362,25 +371,25 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
 
 def FCPYDcc  : ADuI<0b11101011, 0b0000, 0b0100,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    "fcpyd", " $dst, $true",
+                    IIC_fpUNA64, "fcpyd", " $dst, $true",
                 [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
 def FCPYScc  : ASuI<0b11101011, 0b0000, 0b0100,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    "fcpys", " $dst, $true",
+                    IIC_fpUNA32, "fcpys", " $dst, $true",
                 [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
 def FNEGDcc  : ADuI<0b11101011, 0b0001, 0b0100,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    "fnegd", " $dst, $true",
+                    IIC_fpUNA64, "fnegd", " $dst, $true",
                 [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
 def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    "fnegs", " $dst, $true",
+                    IIC_fpUNA32, "fnegs", " $dst, $true",
                 [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
@@ -389,8 +398,8 @@ def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
 // Misc.
 //
 
-let Defs = [CPSR] in
-def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> {
+let Defs = [CPSR], Uses = [FPSCR] in
+def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fmstat)]> {
   let Inst{27-20} = 0b11101111;
   let Inst{19-16} = 0b0001;
   let Inst{15-12} = 0b1111;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index e551c41936f8..24990e67a381 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -19,15 +19,15 @@
 #include "ARMSubtarget.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Config/alloca.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Memory.h"
 #include <cstdlib>
 using namespace llvm;
 
 void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  abort();
+  llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction");
 }
 
 /// JITCompilerFunction - This contains the address of the JIT function used to
@@ -45,11 +45,11 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
 // CompilationCallback stub - We can't use a C function with inline assembly in
 // it, because we the prolog/epilog inserted by GCC won't work for us (we need
 // to preserve more context and manipulate the stack directly).  Instead,
-// write our own wrapper, which does things our way, so we have complete 
+// write our own wrapper, which does things our way, so we have complete
 // control over register saving and restoring.
 extern "C" {
 #if defined(__arm__)
-  void ARMCompilationCallback(void);
+  void ARMCompilationCallback();
   asm(
     ".text\n"
     ".align 2\n"
@@ -77,11 +77,11 @@ extern "C" {
     // order for the registers.
     //      +--------+
     //   0  | LR     | Original return address
-    //      +--------+    
+    //      +--------+
     //   1  | LR     | Stub address (start of stub)
     // 2-5  | R3..R0 | Saved registers (we need to preserve all regs)
     // 6-20 | D0..D7 | Saved VFP registers
-    //      +--------+    
+    //      +--------+
     //
 #ifndef __SOFTFP__
     // Restore VFP caller-saved registers.
@@ -103,15 +103,14 @@ extern "C" {
       );
 #else  // Not an ARM host
   void ARMCompilationCallback() {
-    assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n");
-    abort();
+    llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
   }
 #endif
 }
 
-/// ARMCompilationCallbackC - This is the target-specific function invoked 
-/// by the function stub when we did not know the real target of a call.  
-/// This function must locate the start of the stub or call site and pass 
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
 /// it into the JIT compiler function.
 extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
   // Get the address of the compiled code for this function.
@@ -123,14 +122,12 @@ extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
   //   ldr pc, [pc,#-4]
   //   <addr>
   if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
-    cerr << "ERROR: Unable to mark stub writable\n";
-    abort();
+    llvm_unreachable("ERROR: Unable to mark stub writable");
   }
   *(intptr_t *)StubAddr = 0xe51ff004;  // ldr pc, [pc, #-4]
   *(intptr_t *)(StubAddr+4) = NewVal;
   if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
-    cerr << "ERROR: Unable to mark stub executable\n";
-    abort();
+    llvm_unreachable("ERROR: Unable to mark stub executable");
   }
 }
 
@@ -143,7 +140,14 @@ ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
 void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
                                              JITCodeEmitter &JCE) {
   JCE.startGVStub(GV, 4, 4);
+  intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+  if (!sys::Memory::setRangeWritable((void*)Addr, 4)) {
+    llvm_unreachable("ERROR: Unable to mark indirect symbol writable");
+  }
   JCE.emitWordLE((intptr_t)Ptr);
+  if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) {
+    llvm_unreachable("ERROR: Unable to mark indirect symbol executable");
+  }
   void *PtrAddr = JCE.finishGVStub(GV);
   addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
   return PtrAddr;
@@ -161,31 +165,43 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
       if (!LazyPtr) {
         // In PIC mode, the function stub is loading a lazy-ptr.
         LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
-        if (F)
-          DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '"
-               << F->getName() << "'\n";
-        else
-          DOUT << "JIT: Stub emitted at [" << LazyPtr
-               << "] for external function at '" << Fn << "'\n";
+        DEBUG(if (F)
+                errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+                       << "] for GV '" << F->getName() << "'\n";
+              else
+                errs() << "JIT: Stub emitted at [" << LazyPtr
+                       << "] for external function at '" << Fn << "'\n");
       }
       JCE.startGVStub(F, 16, 4);
       intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable((void*)Addr, 16)) {
+        llvm_unreachable("ERROR: Unable to mark stub writable");
+      }
       JCE.emitWordLE(0xe59fc004);            // ldr pc, [pc, #+4]
       JCE.emitWordLE(0xe08fc00c);            // L_func$scv: add ip, pc, ip
       JCE.emitWordLE(0xe59cf000);            // ldr pc, [ip]
       JCE.emitWordLE(LazyPtr - (Addr+4+8));  // func - (L_func$scv+8)
       sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+      if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) {
+        llvm_unreachable("ERROR: Unable to mark stub executable");
+      }
     } else {
       // The stub is 8-byte size and 4-aligned.
       JCE.startGVStub(F, 8, 4);
       intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable((void*)Addr, 8)) {
+        llvm_unreachable("ERROR: Unable to mark stub writable");
+      }
       JCE.emitWordLE(0xe51ff004);    // ldr pc, [pc, #-4]
       JCE.emitWordLE((intptr_t)Fn);  // addr of function
       sys::Memory::InvalidateInstructionCache((void*)Addr, 8);
+      if (!sys::Memory::setRangeExecutable((void*)Addr, 8)) {
+        llvm_unreachable("ERROR: Unable to mark stub executable");
+      }
     }
   } else {
     // The compilation callback will overwrite the first two words of this
-    // stub with indirect branch instructions targeting the compiled code. 
+    // stub with indirect branch instructions targeting the compiled code.
     // This stub sets the return address to restart the stub, so that
     // the new branch will be invoked when we come back.
     //
@@ -193,6 +209,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
     // The stub is 16-byte size and 4-byte aligned.
     JCE.startGVStub(F, 16, 4);
     intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+    if (!sys::Memory::setRangeWritable((void*)Addr, 16)) {
+      llvm_unreachable("ERROR: Unable to mark stub writable");
+    }
     // Save LR so the callback can determine which stub called it.
     // The compilation callback is responsible for popping this prior
     // to returning.
@@ -204,6 +223,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
     // The address of the compilation callback.
     JCE.emitWordLE((intptr_t)ARMCompilationCallback);
     sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+    if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) {
+      llvm_unreachable("ERROR: Unable to mark stub executable");
+    }
   }
 
   return JCE.finishGVStub(F);
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 59cf125a9b99..d2ec9ee6cdf9 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -15,9 +15,11 @@
 #define DEBUG_TYPE "arm-ldst-opt"
 #include "ARM.h"
 #include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -29,6 +31,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -61,6 +64,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     ARMFunctionInfo *AFI;
     RegScavenger *RS;
+    bool isThumb2;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
@@ -93,6 +97,15 @@ namespace {
     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator &MBBI);
+    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MBBI,
+                                  const TargetInstrInfo *TII,
+                                  bool &Advance,
+                                  MachineBasicBlock::iterator &I);
+    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   bool &Advance,
+                                   MachineBasicBlock::iterator &I);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -107,6 +120,14 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
   case ARM::STR:
     NumSTMGened++;
     return ARM::STM;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    NumLDMGened++;
+    return ARM::t2LDM;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    NumSTMGened++;
+    return ARM::t2STM;
   case ARM::FLDS:
     NumFLDMGened++;
     return ARM::FLDMS;
@@ -119,14 +140,30 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
   case ARM::FSTD:
     NumFSTMGened++;
     return ARM::FSTMD;
-  default: abort();
+  default: llvm_unreachable("Unhandled opcode!");
   }
   return 0;
 }
 
+static bool isT2i32Load(unsigned Opc) {
+  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+  return Opc == ARM::LDR || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+  return Opc == ARM::STR || isT2i32Store(Opc);
+}
+
 /// MergeOps - Create and insert a LDM or STM with Base as base register and
 /// registers in Regs as the register operands that would be loaded / stored.
-/// It returns true if the transformation is done. 
+/// It returns true if the transformation is done.
 bool
 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
@@ -140,14 +177,20 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     return false;
 
   ARM_AM::AMSubMode Mode = ARM_AM::ia;
-  bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
-  if (isAM4 && Offset == 4)
+  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
+  if (isAM4 && Offset == 4) {
+    if (isThumb2)
+      // Thumb2 does not support ldmib / stmib.
+      return false;
     Mode = ARM_AM::ib;
-  else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+  } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
+    if (isThumb2)
+      // Thumb2 does not support ldmda / stmda.
+      return false;
     Mode = ARM_AM::da;
-  else if (isAM4 && Offset == -4 * (int)NumRegs)
+  } else if (isAM4 && Offset == -4 * (int)NumRegs) {
     Mode = ARM_AM::db;
-  else if (Offset != 0) {
+  } else if (Offset != 0) {
     // If starting offset isn't zero, insert a MI to materialize a new base.
     // But only do so if it is cost effective, i.e. merging more than two
     // loads / stores.
@@ -155,7 +198,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       return false;
 
     unsigned NewBase;
-    if (Opcode == ARM::LDR)
+    if (isi32Load(Opcode))
       // If it is a load, then just use one of the destination register to
       // use as the new base.
       NewBase = Regs[NumRegs-1].first;
@@ -165,24 +208,30 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       if (NewBase == 0)
         return false;
     }
-    int BaseOpc = ARM::ADDri;
+    int BaseOpc = !isThumb2
+      ? ARM::ADDri
+      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
     if (Offset < 0) {
-      BaseOpc = ARM::SUBri;
+      BaseOpc = !isThumb2
+        ? ARM::SUBri
+        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
       Offset = - Offset;
     }
-    int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+    int ImmedOffset = isThumb2
+      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
     if (ImmedOffset == -1)
+      // FIXME: Try t2ADDri12 or t2SUBri12?
       return false;  // Probably not worth it then.
 
     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
-      .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset)
+      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
       .addImm(Pred).addReg(PredReg).addReg(0);
     Base = NewBase;
     BaseKill = true;  // New base is always killed right its use.
   }
 
   bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
-  bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+  bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
   Opcode = getLoadStoreMultipleOpcode(Opcode);
   MachineInstrBuilder MIB = (isAM4)
     ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
@@ -192,6 +241,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
         .addReg(Base, getKillRegState(BaseKill))
         .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
         .addImm(Pred).addReg(PredReg);
+  MIB.addReg(0); // Add optional writeback (0 for now).
   for (unsigned i = 0; i != NumRegs; ++i)
     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
                      | getKillRegState(Regs[i].second));
@@ -207,7 +257,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
                           ARMCC::CondCodes Pred, unsigned PredReg,
                           unsigned Scratch, MemOpQueue &MemOps,
                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
-  bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
   int Offset = MemOps[SIndex].Offset;
   int SOffset = Offset;
   unsigned Pos = MemOps[SIndex].Position;
@@ -265,41 +315,53 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   return;
 }
 
-/// getInstrPredicate - If instruction is predicated, returns its predicate
-/// condition, otherwise returns AL. It also returns the condition code
-/// register by reference.
-static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
-  int PIdx = MI->findFirstPredOperandIdx();
-  if (PIdx == -1) {
-    PredReg = 0;
-    return ARMCC::AL;
-  }
-
-  PredReg = MI->getOperand(PIdx+1).getReg();
-  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
-}
-
 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
-                                       unsigned Bytes, ARMCC::CondCodes Pred,
-                                       unsigned PredReg) {
+                                       unsigned Bytes, unsigned Limit,
+                                       ARMCC::CondCodes Pred, unsigned PredReg){
   unsigned MyPredReg = 0;
-  return (MI && MI->getOpcode() == ARM::SUBri &&
-          MI->getOperand(0).getReg() == Base &&
+  if (!MI)
+    return false;
+  if (MI->getOpcode() != ARM::t2SUBri &&
+      MI->getOpcode() != ARM::t2SUBrSPi &&
+      MI->getOpcode() != ARM::t2SUBrSPi12 &&
+      MI->getOpcode() != ARM::tSUBspi &&
+      MI->getOpcode() != ARM::SUBri)
+    return false;
+
+  // Make sure the offset fits in 8 bits.
+  if (Bytes <= 0 || (Limit && Bytes >= Limit))
+    return false;
+
+  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+  return (MI->getOperand(0).getReg() == Base &&
           MI->getOperand(1).getReg() == Base &&
-          ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
-          getInstrPredicate(MI, MyPredReg) == Pred &&
+          (MI->getOperand(2).getImm()*Scale) == Bytes &&
+          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
           MyPredReg == PredReg);
 }
 
 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
-                                       unsigned Bytes, ARMCC::CondCodes Pred,
-                                       unsigned PredReg) {
+                                       unsigned Bytes, unsigned Limit,
+                                       ARMCC::CondCodes Pred, unsigned PredReg){
   unsigned MyPredReg = 0;
-  return (MI && MI->getOpcode() == ARM::ADDri &&
-          MI->getOperand(0).getReg() == Base &&
+  if (!MI)
+    return false;
+  if (MI->getOpcode() != ARM::t2ADDri &&
+      MI->getOpcode() != ARM::t2ADDrSPi &&
+      MI->getOpcode() != ARM::t2ADDrSPi12 &&
+      MI->getOpcode() != ARM::tADDspi &&
+      MI->getOpcode() != ARM::ADDri)
+    return false;
+
+  if (Bytes <= 0 || (Limit && Bytes >= Limit))
+    // Make sure the offset fits in 8 bits.
+    return false;
+
+  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+  return (MI->getOperand(0).getReg() == Base &&
           MI->getOperand(1).getReg() == Base &&
-          ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
-          getInstrPredicate(MI, MyPredReg) == Pred &&
+          (MI->getOperand(2).getImm()*Scale) == Bytes &&
+          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
           MyPredReg == PredReg);
 }
 
@@ -308,6 +370,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   default: return 0;
   case ARM::LDR:
   case ARM::STR:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
   case ARM::FLDS:
   case ARM::FSTS:
     return 4;
@@ -316,7 +382,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
     return 8;
   case ARM::LDM:
   case ARM::STM:
-    return (MI->getNumOperands() - 4) * 4;
+  case ARM::t2LDM:
+  case ARM::t2STM:
+    return (MI->getNumOperands() - 5) * 4;
   case ARM::FLDMS:
   case ARM::FSTMS:
   case ARM::FLDMD:
@@ -325,7 +393,7 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   }
 }
 
-/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
 ///
 /// stmia rn, <ra, rb, rc>
@@ -337,17 +405,18 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
 /// ldmia rn, <ra, rb, rc>
 /// =>
 /// ldmdb rn!, <ra, rb, rc>
-static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator MBBI,
-                                      bool &Advance,
-                                      MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               bool &Advance,
+                                               MachineBasicBlock::iterator &I) {
   MachineInstr *MI = MBBI;
   unsigned Base = MI->getOperand(0).getReg();
   unsigned Bytes = getLSMultipleTransferSize(MI);
   unsigned PredReg = 0;
-  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
   int Opcode = MI->getOpcode();
-  bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+  bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
+    Opcode == ARM::STM || Opcode == ARM::t2STM;
 
   if (isAM4) {
     if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
@@ -364,13 +433,17 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     if (MBBI != MBB.begin()) {
       MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
       if (Mode == ARM_AM::ia &&
-          isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+        MI->getOperand(4).setReg(Base);
+        MI->getOperand(4).setIsDef();
         MBB.erase(PrevMBBI);
         return true;
       } else if (Mode == ARM_AM::ib &&
-                 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+                 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+        MI->getOperand(4).setReg(Base);  // WB to base
+        MI->getOperand(4).setIsDef();
         MBB.erase(PrevMBBI);
         return true;
       }
@@ -379,8 +452,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     if (MBBI != MBB.end()) {
       MachineBasicBlock::iterator NextMBBI = next(MBBI);
       if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
-          isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+        MI->getOperand(4).setReg(Base);  // WB to base
+        MI->getOperand(4).setIsDef();
         if (NextMBBI == I) {
           Advance = true;
           ++I;
@@ -388,8 +463,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
         MBB.erase(NextMBBI);
         return true;
       } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
-                 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+                 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+        MI->getOperand(4).setReg(Base);  // WB to base
+        MI->getOperand(4).setIsDef();
         if (NextMBBI == I) {
           Advance = true;
           ++I;
@@ -408,8 +485,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     if (MBBI != MBB.begin()) {
       MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
       if (Mode == ARM_AM::ia &&
-          isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+        MI->getOperand(4).setReg(Base);  // WB to base
+        MI->getOperand(4).setIsDef();
         MBB.erase(PrevMBBI);
         return true;
       }
@@ -418,8 +497,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     if (MBBI != MBB.end()) {
       MachineBasicBlock::iterator NextMBBI = next(MBBI);
       if (Mode == ARM_AM::ia &&
-          isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+        MI->getOperand(4).setReg(Base);  // WB to base
+        MI->getOperand(4).setIsDef();
         if (NextMBBI == I) {
           Advance = true;
           ++I;
@@ -441,7 +522,13 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
   case ARM::FLDD: return ARM::FLDMD;
   case ARM::FSTS: return ARM::FSTMS;
   case ARM::FSTD: return ARM::FSTMD;
-  default: abort();
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    return ARM::t2LDR_PRE;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return ARM::t2STR_PRE;
+  default: llvm_unreachable("Unhandled opcode!");
   }
   return 0;
 }
@@ -454,48 +541,62 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
   case ARM::FLDD: return ARM::FLDMD;
   case ARM::FSTS: return ARM::FSTMS;
   case ARM::FSTD: return ARM::FSTMD;
-  default: abort();
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    return ARM::t2LDR_POST;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return ARM::t2STR_POST;
+  default: llvm_unreachable("Unhandled opcode!");
   }
   return 0;
 }
 
-/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
-static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     const TargetInstrInfo *TII,
-                                     bool &Advance,
-                                     MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               const TargetInstrInfo *TII,
+                                               bool &Advance,
+                                               MachineBasicBlock::iterator &I) {
   MachineInstr *MI = MBBI;
   unsigned Base = MI->getOperand(1).getReg();
   bool BaseKill = MI->getOperand(1).isKill();
   unsigned Bytes = getLSMultipleTransferSize(MI);
   int Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
+  bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
+    Opcode == ARM::FSTD || Opcode == ARM::FSTS;
   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
-  if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
-      (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+  if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
     return false;
+  else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+    return false;
+  else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
+    if (MI->getOperand(2).getImm() != 0)
+      return false;
 
-  bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+  bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
   // Can't do the merge if the destination register is the same as the would-be
   // writeback register.
   if (isLd && MI->getOperand(0).getReg() == Base)
     return false;
 
   unsigned PredReg = 0;
-  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
   bool DoMerge = false;
   ARM_AM::AddrOpc AddSub = ARM_AM::add;
   unsigned NewOpc = 0;
+  // AM2 - 12 bits, thumb2 - 8 bits.
+  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
   if (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
-    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
       DoMerge = true;
       AddSub = ARM_AM::sub;
       NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
-    } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
-                                            Pred, PredReg)) {
+    } else if (!isAM5 &&
+               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
       DoMerge = true;
       NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
     }
@@ -505,11 +606,12 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
 
   if (!DoMerge && MBBI != MBB.end()) {
     MachineBasicBlock::iterator NextMBBI = next(MBBI);
-    if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+    if (!isAM5 &&
+        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
       DoMerge = true;
       AddSub = ARM_AM::sub;
       NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
-    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
       DoMerge = true;
       NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
     }
@@ -526,33 +628,51 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     return false;
 
   bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
-  unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
-    : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
-                        true, isDPR ? 2 : 1);
+  unsigned Offset = 0;
+  if (isAM5)
+    Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
+                               ? ARM_AM::db
+                               : ARM_AM::ia, true, (isDPR ? 2 : 1));
+  else if (isAM2)
+    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+  else
+    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
   if (isLd) {
-    if (isAM2)
-      // LDR_PRE, LDR_POST;
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
-        .addReg(Base, RegState::Define)
-        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
-    else
+    if (isAM5)
       // FLDMS, FLDMD
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
         .addReg(Base, getKillRegState(BaseKill))
         .addImm(Offset).addImm(Pred).addReg(PredReg)
+        .addReg(Base, getDefRegState(true)) // WB base register
         .addReg(MI->getOperand(0).getReg(), RegState::Define);
-  } else {
-    MachineOperand &MO = MI->getOperand(0);
-    if (isAM2)
-      // STR_PRE, STR_POST;
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
-        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+    else if (isAM2)
+      // LDR_PRE, LDR_POST,
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
     else
+      // t2LDR_PRE, t2LDR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
+        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  } else {
+    MachineOperand &MO = MI->getOperand(0);
+    if (isAM5)
       // FSTMS, FSTMD
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
         .addImm(Pred).addReg(PredReg)
+        .addReg(Base, getDefRegState(true)) // WB base register
         .addReg(MO.getReg(), getKillRegState(MO.isKill()));
+    else if (isAM2)
+      // STR_PRE, STR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // t2STR_PRE, t2STR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
   }
   MBB.erase(MBBI);
 
@@ -561,7 +681,7 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
 
 /// isMemoryOp - Returns true if instruction is a memory operations (that this
 /// pass is capable of operating on).
-static bool isMemoryOp(MachineInstr *MI) {
+static bool isMemoryOp(const MachineInstr *MI) {
   int Opcode = MI->getOpcode();
   switch (Opcode) {
   default: break;
@@ -574,6 +694,11 @@ static bool isMemoryOp(MachineInstr *MI) {
   case ARM::FLDD:
   case ARM::FSTD:
     return MI->getOperand(1).isReg();
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return MI->getOperand(1).isReg();
   }
   return false;
 }
@@ -600,6 +725,12 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   unsigned NumOperands = MI->getDesc().getNumOperands();
   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
+    return OffField;
+
   int Offset = isAM2
     ? ARM_AM::getAM2Offset(OffField)
     : (isAM3 ? ARM_AM::getAM3Offset(OffField)
@@ -621,37 +752,43 @@ static void InsertLDR_STR(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MBBI,
                           int OffImm, bool isDef,
                           DebugLoc dl, unsigned NewOpc,
-                          unsigned Reg, bool RegDeadKill,
-                          unsigned BaseReg, bool BaseKill,
-                          unsigned OffReg, bool OffKill,
+                          unsigned Reg, bool RegDeadKill, bool RegUndef,
+                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
+                          unsigned OffReg, bool OffKill, bool OffUndef,
                           ARMCC::CondCodes Pred, unsigned PredReg,
-                          const TargetInstrInfo *TII) {
-  unsigned Offset;
-  if (OffImm < 0)
-    Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
-  else
-    Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
-  if (isDef)
-    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+                          const TargetInstrInfo *TII, bool isT2) {
+  int Offset = OffImm;
+  if (!isT2) {
+    if (OffImm < 0)
+      Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
+    else
+      Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
+  }
+  if (isDef) {
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                      TII->get(NewOpc))
       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
-      .addReg(BaseReg, getKillRegState(BaseKill))
-      .addReg(OffReg,  getKillRegState(OffKill))
-      .addImm(Offset)
-      .addImm(Pred).addReg(PredReg);
-  else
-    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
-      .addReg(Reg, getKillRegState(RegDeadKill))
-      .addReg(BaseReg, getKillRegState(BaseKill))
-      .addReg(OffReg,  getKillRegState(OffKill))
-      .addImm(Offset)
-      .addImm(Pred).addReg(PredReg);
+      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+    if (!isT2)
+      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
+    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+  } else {
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                      TII->get(NewOpc))
+      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+    if (!isT2)
+      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
+    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+  }
 }
 
 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator &MBBI) {
   MachineInstr *MI = &*MBBI;
   unsigned Opcode = MI->getOpcode();
-  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
     unsigned EvenReg = MI->getOperand(0).getReg();
     unsigned OddReg  = MI->getOperand(1).getReg();
     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
@@ -659,45 +796,59 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
     if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
       return false;
 
-    bool isLd = Opcode == ARM::LDRD;
+    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
     bool EvenDeadKill = isLd ?
       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+    bool EvenUndef = MI->getOperand(0).isUndef();
     bool OddDeadKill  = isLd ?
       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+    bool OddUndef = MI->getOperand(1).isUndef();
     const MachineOperand &BaseOp = MI->getOperand(2);
     unsigned BaseReg = BaseOp.getReg();
     bool BaseKill = BaseOp.isKill();
-    const MachineOperand &OffOp = MI->getOperand(3);
-    unsigned OffReg = OffOp.getReg();
-    bool OffKill = OffOp.isKill();
+    bool BaseUndef = BaseOp.isUndef();
+    unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
+    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
     int OffImm = getMemoryOpOffset(MI);
     unsigned PredReg = 0;
-    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
 
     if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
       // Ascending register numbers and no offset. It's safe to change it to a
       // ldm or stm.
-      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
+      unsigned NewOpc = (isLd)
+        ? (isT2 ? ARM::t2LDM : ARM::LDM)
+        : (isT2 ? ARM::t2STM : ARM::STM);
       if (isLd) {
         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
           .addReg(BaseReg, getKillRegState(BaseKill))
           .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
           .addImm(Pred).addReg(PredReg)
+          .addReg(0)
           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
-          .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
         ++NumLDRD2LDM;
       } else {
         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
           .addReg(BaseReg, getKillRegState(BaseKill))
           .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
           .addImm(Pred).addReg(PredReg)
-          .addReg(EvenReg, getKillRegState(EvenDeadKill))
-          .addReg(OddReg, getKillRegState(OddDeadKill));
+          .addReg(0)
+          .addReg(EvenReg,
+                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+          .addReg(OddReg,
+                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
         ++NumSTRD2STM;
       }
     } else {
       // Split into two instructions.
-      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
+      assert((!isT2 || !OffReg) &&
+             "Thumb2 ldrd / strd does not encode offset register!");
+      unsigned NewOpc = (isLd)
+        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
+        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
       DebugLoc dl = MBBI->getDebugLoc();
       // If this is a load and base register is killed, it may have been
       // re-defed by the load, make sure the first load does not clobber it.
@@ -707,17 +858,23 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
            (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
         assert(!TRI->regsOverlap(OddReg, BaseReg) &&
                (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
-                      BaseReg, false, OffReg, false, Pred, PredReg, TII);
-        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
-                      BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, false,
+                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
+                      Pred, PredReg, TII, isT2);
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, false,
+                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
+                      Pred, PredReg, TII, isT2);
       } else {
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
-                      EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
-                      Pred, PredReg, TII);
+                      EvenReg, EvenDeadKill, EvenUndef,
+                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
+                      Pred, PredReg, TII, isT2);
         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
-                      OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
-                      Pred, PredReg, TII);
+                      OddReg, OddDeadKill, OddUndef,
+                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
+                      Pred, PredReg, TII, isT2);
       }
       if (isLd)
         ++NumLDRD2LDR;
@@ -761,7 +918,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       unsigned Size = getLSMultipleTransferSize(MBBI);
       unsigned Base = MBBI->getOperand(1).getReg();
       unsigned PredReg = 0;
-      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
       int Offset = getMemoryOpOffset(MBBI);
       // Watch out for:
       // r4 := ldr [r5]
@@ -772,7 +929,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       // looks like the later ldr(s) use the same base register. Try to
       // merge the ldr's so far, including this one. But don't try to
       // combine the following ldr(s).
-      Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
       if (CurrBase == 0 && !Clobber) {
         // Start of a new chain.
         CurrBase = Base;
@@ -825,12 +982,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         // Try to find a free register to use as a new base in case it's needed.
         // First advance to the instruction just before the start of the chain.
         AdvanceRS(MBB, MemOps);
-        // Find a scratch register. Make sure it's a call clobbered register or
-        // a spilled callee-saved register.
-        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
-        if (!Scratch)
-          Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
-                                      AFI->getSpilledCSRegisters());
+        // Find a scratch register.
+        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
         // Process the load / store instructions.
         RS->forward(prior(MBBI));
 
@@ -842,7 +995,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         // Try folding preceeding/trailing base inc/dec into the generated
         // LDM/STM ops.
         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
-          if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
             ++NumMerges;
         NumMerges += Merges.size();
 
@@ -850,15 +1003,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         // that were not merged to form LDM/STM ops.
         for (unsigned i = 0; i != NumMemOps; ++i)
           if (!MemOps[i].Merged)
-            if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
               ++NumMerges;
 
-        // RS may be pointing to an instruction that's deleted. 
+        // RS may be pointing to an instruction that's deleted.
         RS->skipTo(prior(MBBI));
       } else if (NumMemOps == 1) {
         // Try folding preceeding/trailing base inc/dec into the single
         // load/store.
-        if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
           ++NumMerges;
           RS->forward(prior(MBBI));
         }
@@ -907,16 +1060,18 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   if (MBB.empty()) return false;
 
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+  if (MBBI != MBB.begin() &&
+      (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
     MachineInstr *PrevMI = prior(MBBI);
-    if (PrevMI->getOpcode() == ARM::LDM) {
+    if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
-      if (MO.getReg() == ARM::LR) {
-        PrevMI->setDesc(TII->get(ARM::LDM_RET));
-        MO.setReg(ARM::PC);
-        MBB.erase(MBBI);
-        return true;
-      }
+      if (MO.getReg() != ARM::LR)
+        return false;
+      unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
+      PrevMI->setDesc(TII->get(NewOpc));
+      MO.setReg(ARM::PC);
+      MBB.erase(MBBI);
+      return true;
     }
   }
   return false;
@@ -928,6 +1083,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
   RS = new RegScavenger();
+  isThumb2 = AFI->isThumb2Function();
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
@@ -956,6 +1112,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     const ARMSubtarget *STI;
     MachineRegisterInfo *MRI;
+    MachineFunction *MF;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
@@ -967,8 +1124,9 @@ namespace {
     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
                           unsigned &NewOpc, unsigned &EvenReg,
                           unsigned &OddReg, unsigned &BaseReg,
-                          unsigned &OffReg, unsigned &Offset,
-                          unsigned &PredReg, ARMCC::CondCodes &Pred);
+                          unsigned &OffReg, int &Offset,
+                          unsigned &PredReg, ARMCC::CondCodes &Pred,
+                          bool &isT2);
     bool RescheduleOps(MachineBasicBlock *MBB,
                        SmallVector<MachineInstr*, 4> &Ops,
                        unsigned Base, bool isLd,
@@ -984,6 +1142,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   TRI = Fn.getTarget().getRegisterInfo();
   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   MRI = &Fn.getRegInfo();
+  MF  = &Fn;
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
@@ -1045,48 +1204,83 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
                                           DebugLoc &dl,
                                           unsigned &NewOpc, unsigned &EvenReg,
                                           unsigned &OddReg, unsigned &BaseReg,
-                                          unsigned &OffReg, unsigned &Offset,
+                                          unsigned &OffReg, int &Offset,
                                           unsigned &PredReg,
-                                          ARMCC::CondCodes &Pred) {
+                                          ARMCC::CondCodes &Pred,
+                                          bool &isT2) {
+  // Make sure we're allowed to generate LDRD/STRD.
+  if (!STI->hasV5TEOps())
+    return false;
+
   // FIXME: FLDS / FSTS -> FLDD / FSTD
+  unsigned Scale = 1;
   unsigned Opcode = Op0->getOpcode();
   if (Opcode == ARM::LDR)
     NewOpc = ARM::LDRD;
   else if (Opcode == ARM::STR)
     NewOpc = ARM::STRD;
-  else
-    return 0;
+  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+    NewOpc = ARM::t2LDRDi8;
+    Scale = 4;
+    isT2 = true;
+  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+    NewOpc = ARM::t2STRDi8;
+    Scale = 4;
+    isT2 = true;
+  } else
+    return false;
+
+  // Make sure the offset registers match.
+  if (!isT2 &&
+      (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
+      return false;
 
   // Must sure the base address satisfies i64 ld / st alignment requirement.
   if (!Op0->hasOneMemOperand() ||
-      !Op0->memoperands_begin()->getValue() ||
-      Op0->memoperands_begin()->isVolatile())
+      !(*Op0->memoperands_begin())->getValue() ||
+      (*Op0->memoperands_begin())->isVolatile())
     return false;
 
-  unsigned Align = Op0->memoperands_begin()->getAlignment();
+  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+  Function *Func = MF->getFunction();
   unsigned ReqAlign = STI->hasV6Ops()
-    ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
+    ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 
+    : 8;  // Pre-v6 need 8-byte align
   if (Align < ReqAlign)
     return false;
 
   // Then make sure the immediate offset fits.
   int OffImm = getMemoryOpOffset(Op0);
-  ARM_AM::AddrOpc AddSub = ARM_AM::add;
-  if (OffImm < 0) {
-    AddSub = ARM_AM::sub;
-    OffImm = - OffImm;
+  if (isT2) {
+    if (OffImm < 0) {
+      if (OffImm < -255)
+        // Can't fall back to t2LDRi8 / t2STRi8.
+        return false;
+    } else {
+      int Limit = (1 << 8) * Scale;
+      if (OffImm >= Limit || (OffImm & (Scale-1)))
+        return false;
+    }
+    Offset = OffImm;
+  } else {
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (OffImm < 0) {
+      AddSub = ARM_AM::sub;
+      OffImm = - OffImm;
+    }
+    int Limit = (1 << 8) * Scale;
+    if (OffImm >= Limit || (OffImm & (Scale-1)))
+      return false;
+    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
   }
-  if (OffImm >= 256) // 8 bits
-    return false;
-  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
-
   EvenReg = Op0->getOperand(0).getReg();
   OddReg  = Op1->getOperand(0).getReg();
   if (EvenReg == OddReg)
     return false;
   BaseReg = Op0->getOperand(1).getReg();
-  OffReg = Op0->getOperand(2).getReg();
-  Pred = getInstrPredicate(Op0, PredReg);
+  if (!isT2)
+    OffReg = Op0->getOperand(2).getReg();
+  Pred = llvm::getInstrPredicate(Op0, PredReg);
   dl = Op0->getDebugLoc();
   return true;
 }
@@ -1138,7 +1332,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
       LastOffset = Offset;
       LastBytes = Bytes;
       LastOpcode = Opcode;
-      if (++NumMove == 8) // FIXME: Tune
+      if (++NumMove == 8) // FIXME: Tune this limit.
         break;
     }
 
@@ -1174,29 +1368,36 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         unsigned EvenReg = 0, OddReg = 0;
         unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
         ARMCC::CondCodes Pred = ARMCC::AL;
+        bool isT2 = false;
         unsigned NewOpc = 0;
-        unsigned Offset = 0;
+        int Offset = 0;
         DebugLoc dl;
         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
                                              EvenReg, OddReg, BaseReg, OffReg,
-                                             Offset, PredReg, Pred)) {
+                                             Offset, PredReg, Pred, isT2)) {
           Ops.pop_back();
           Ops.pop_back();
 
           // Form the pair instruction.
           if (isLd) {
-            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
+                                              dl, TII->get(NewOpc))
               .addReg(EvenReg, RegState::Define)
               .addReg(OddReg, RegState::Define)
-              .addReg(BaseReg).addReg(0).addImm(Offset)
-              .addImm(Pred).addReg(PredReg);
+              .addReg(BaseReg);
+            if (!isT2)
+              MIB.addReg(OffReg);
+            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
             ++NumLDRDFormed;
           } else {
-            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
+                                              dl, TII->get(NewOpc))
               .addReg(EvenReg)
               .addReg(OddReg)
-              .addReg(BaseReg).addReg(0).addImm(Offset)
-              .addImm(Pred).addReg(PredReg);
+              .addReg(BaseReg);
+            if (!isT2)
+              MIB.addReg(OffReg);
+            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
             ++NumSTRDFormed;
           }
           MBB->erase(Op0);
@@ -1249,12 +1450,11 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       if (!isMemoryOp(MI))
         continue;
       unsigned PredReg = 0;
-      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
         continue;
 
-      int Opcode = MI->getOpcode();
-      bool isLd = Opcode == ARM::LDR ||
-        Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+      int Opc = MI->getOpcode();
+      bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
       unsigned Base = MI->getOperand(1).getReg();
       int Offset = getMemoryOpOffset(MI);
 
diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp
new file mode 100644
index 000000000000..0ff65d2af88b
--- /dev/null
+++ b/lib/Target/ARM/ARMMCAsmInfo.cpp
@@ -0,0 +1,72 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+using namespace llvm;
+
+static const char *const arm_asm_table[] = {
+  "{r0}", "r0",
+  "{r1}", "r1",
+  "{r2}", "r2",
+  "{r3}", "r3",
+  "{r4}", "r4",
+  "{r5}", "r5",
+  "{r6}", "r6",
+  "{r7}", "r7",
+  "{r8}", "r8",
+  "{r9}", "r9",
+  "{r10}", "r10",
+  "{r11}", "r11",
+  "{r12}", "r12",
+  "{r13}", "r13",
+  "{r14}", "r14",
+  "{lr}", "lr",
+  "{sp}", "sp",
+  "{ip}", "ip",
+  "{fp}", "fp",
+  "{sl}", "sl",
+  "{memory}", "memory",
+  "{cc}", "cc",
+  0,0
+};
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+  AsmTransCBE = arm_asm_table;
+  Data64bitsDirective = 0;
+  CommentString = "@";
+  COMMDirectiveTakesAlignment = false;
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::SjLj;
+  AbsoluteEHSectionOffsets = false;
+}
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+  AlignmentIsInBytes = false;
+  Data64bitsDirective = 0;
+  CommentString = "@";
+  COMMDirectiveTakesAlignment = false;
+  
+  NeedsSet = false;
+  HasLEB128 = true;
+  AbsoluteDebugSectionOffsets = true;
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+  LCOMMDirective = "\t.lcomm\t";
+
+  DwarfRequiresFrameSection = false;
+
+  SupportsDebugInformation = true;
+}
diff --git a/lib/Target/ARM/ARMMCAsmInfo.h b/lib/Target/ARM/ARMMCAsmInfo.h
new file mode 100644
index 000000000000..90f7822ea580
--- /dev/null
+++ b/lib/Target/ARM/ARMMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+  struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit ARMMCAsmInfoDarwin();
+  };
+
+  struct ARMELFMCAsmInfo : public MCAsmInfo {
+    explicit ARMELFMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 66d3df60e078..2176b2735a2b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,10 +1,10 @@
 //====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file declares ARM-specific per-machine-function information.
@@ -52,10 +52,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// enable far jump.
   bool LRSpilledForFarJump;
 
-  /// R3IsLiveIn - True if R3 is live in to this function.
-  /// FIXME: Remove when register scavenger for Thumb is done.
-  bool R3IsLiveIn;
-
   /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
   /// spill stack offset.
   unsigned FramePtrSpillOffset;
@@ -100,7 +96,7 @@ public:
     hasThumb2(false),
     Align(2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
-    LRSpilledForFarJump(false), R3IsLiveIn(false),
+    LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
@@ -111,7 +107,7 @@ public:
     hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
     Align(isThumb ? 1U : 2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
-    LRSpilledForFarJump(false), R3IsLiveIn(false),
+    LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
@@ -119,6 +115,7 @@ public:
     JumpTableUId(0), ConstPoolEntryUId(0) {}
 
   bool isThumbFunction() const { return isThumb; }
+  bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
   bool isThumb2Function() const { return isThumb && hasThumb2; }
 
   unsigned getAlign() const { return Align; }
@@ -133,13 +130,9 @@ public:
   bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
   void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
 
-  // FIXME: Remove when register scavenger for Thumb is done.
-  bool isR3LiveIn() const { return R3IsLiveIn; }
-  void setR3IsLiveIn(bool l) { R3IsLiveIn = l; }
-
   unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
   void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
-  
+
   unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
   unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
   unsigned getDPRCalleeSavedAreaOffset()  const { return DPRCSOffset; }
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 000000000000..5ff7c381bc51
--- /dev/null
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+  135053414U,	// <0,0,0,0>: Cost 1 vdup0 LHS
+  1543503974U,	// <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+  2618572962U,	// <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+  2568054923U,	// <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1476398390U,	// <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+  2550140624U,	// <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+  2550141434U,	// <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+  2591945711U,	// <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+  135053414U,	// <0,0,0,u>: Cost 1 vdup0 LHS
+  2886516736U,	// <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+  1812775014U,	// <0,0,1,1>: Cost 2 vzipl LHS, LHS
+  1618133094U,	// <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2625209292U,	// <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+  2886558034U,	// <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+  2617246864U,	// <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+  3659723031U,	// <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+  2591953904U,	// <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+  1812775581U,	// <0,0,1,u>: Cost 2 vzipl LHS, LHS
+  3020734464U,	// <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+  3020734474U,	// <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+  1946992742U,	// <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2631181989U,	// <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+  3020734668U,	// <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+  3826550569U,	// <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+  2617247674U,	// <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+  2591962097U,	// <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1946992796U,	// <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+  2635163787U,	// <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2686419196U,	// <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+  2686492933U,	// <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+  2617248156U,	// <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+  2617248258U,	// <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+  3826551298U,	// <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+  3690990200U,	// <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+  3713551042U,	// <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+  2635163787U,	// <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2617248658U,	// <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+  2888450150U,	// <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021570150U,	// <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  3641829519U,	// <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+  3021570252U,	// <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+  1543507254U,	// <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752810294U,	// <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  3786998152U,	// <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+  1543507497U,	// <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+  2684354972U,	// <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+  2617249488U,	// <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+  3765617070U,	// <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+  3635865780U,	// <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+  2617249734U,	// <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+  2617249796U,	// <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+  2718712274U,	// <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+  2617249960U,	// <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+  2720039396U,	// <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+  2684355053U,	// <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+  3963609190U,	// <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+  2617250298U,	// <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+  3796435464U,	// <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+  3659762998U,	// <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+  3659763810U,	// <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+  2617250616U,	// <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+  2657727309U,	// <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+  2658390942U,	// <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+  2659054575U,	// <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  3635880854U,	// <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+  3635881401U,	// <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+  3734787298U,	// <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+  2617251174U,	// <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+  3659772002U,	// <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+  3659772189U,	// <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+  2617251436U,	// <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+  2659054575U,	// <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  135053414U,	// <0,0,u,0>: Cost 1 vdup0 LHS
+  1817419878U,	// <0,0,u,1>: Cost 2 vzipl LHS, LHS
+  1947435110U,	// <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+  2568120467U,	// <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+  1476463926U,	// <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+  1543510170U,	// <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752813210U,	// <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  2592011255U,	// <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+  135053414U,	// <0,0,u,u>: Cost 1 vdup0 LHS
+  2618581002U,	// <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+  1557446758U,	// <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2618581155U,	// <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+  2690548468U,	// <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+  2626543954U,	// <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+  4094985216U,	// <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+  2592019278U,	// <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+  2592019448U,	// <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+  1557447325U,	// <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+  1476476938U,	// <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+  2886517556U,	// <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+  2886517654U,	// <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+  2886517720U,	// <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+  1476480310U,	// <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+  2886558864U,	// <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+  2550223354U,	// <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+  2550223856U,	// <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+  1476482862U,	// <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+  1494401126U,	// <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+  3020735284U,	// <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+  2562172349U,	// <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+  835584U,	// <0,1,2,3>: Cost 0 copy LHS
+  1494404406U,	// <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+  3020735488U,	// <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+  2631190458U,	// <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+  1518294010U,	// <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+  835584U,	// <0,1,2,u>: Cost 0 copy LHS
+  2692318156U,	// <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+  2691875800U,	// <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+  2691875806U,	// <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+  2692539367U,	// <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+  2562182454U,	// <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+  2691875840U,	// <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+  2692760578U,	// <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+  2639817411U,	// <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+  2691875863U,	// <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+  2568159334U,	// <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+  4095312692U,	// <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+  2568160934U,	// <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+  2568161432U,	// <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+  2568162614U,	// <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+  1557450038U,	// <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754235702U,	// <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  2592052220U,	// <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+  1557450281U,	// <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+  3765617775U,	// <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+  2647781007U,	// <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+  3704934138U,	// <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+  2691875984U,	// <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+  2657734598U,	// <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+  2650435539U,	// <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+  2651099172U,	// <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+  2651762805U,	// <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+  2691876029U,	// <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+  2592063590U,	// <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+  3765617871U,	// <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+  2654417337U,	// <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+  3765617889U,	// <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+  2592066870U,	// <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+  3765617907U,	// <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+  2657071869U,	// <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+  1583993678U,	// <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+  1584657311U,	// <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+  2657735672U,	// <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+  2657735808U,	// <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+  2631193772U,	// <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+  2661053667U,	// <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+  2657736038U,	// <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+  3721524621U,	// <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+  2657736158U,	// <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+  2657736300U,	// <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+  2657736322U,	// <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+  1494450278U,	// <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+  1557452590U,	// <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2754238254U,	// <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+  835584U,	// <0,1,u,3>: Cost 0 copy LHS
+  1494453558U,	// <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+  1557452954U,	// <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754238618U,	// <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  1518343168U,	// <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+  835584U,	// <0,1,u,u>: Cost 0 copy LHS
+  2752299008U,	// <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+  1544847462U,	// <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678557286U,	// <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+  2696521165U,	// <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+  2752340172U,	// <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+  2691876326U,	// <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+  2618589695U,	// <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+  2592093185U,	// <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+  1678557340U,	// <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+  2618589942U,	// <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+  2752299828U,	// <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+  2886518376U,	// <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+  2752299766U,	// <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  2550295862U,	// <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+  2752340992U,	// <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+  2886559674U,	// <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+  3934208106U,	// <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+  2752340771U,	// <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+  1476558868U,	// <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+  2226628029U,	// <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+  2752300648U,	// <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+  3020736114U,	// <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476562230U,	// <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+  2550304464U,	// <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+  2618591162U,	// <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+  2550305777U,	// <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+  1476564782U,	// <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+  2618591382U,	// <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+  2752301206U,	// <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  3826043121U,	// <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+  2752301468U,	// <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618591746U,	// <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+  2752301570U,	// <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  3830688102U,	// <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+  2698807012U,	// <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+  2752301269U,	// <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562261094U,	// <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+  4095313828U,	// <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+  2226718152U,	// <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+  2568235169U,	// <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+  2562264374U,	// <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+  1544850742U,	// <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678560566U,	// <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+  2592125957U,	// <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1678560584U,	// <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+  2691876686U,	// <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+  2618592976U,	// <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+  3765618528U,	// <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+  3765618536U,	// <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+  2618593222U,	// <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+  2752303108U,	// <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+  2618593378U,	// <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+  2824785206U,	// <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2824785207U,	// <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2752303950U,	// <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+  3830690081U,	// <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+  2618593786U,	// <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+  2691876794U,	// <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+  2752303990U,	// <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+  3830690445U,	// <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+  2752303928U,	// <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+  2657743695U,	// <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+  2691876839U,	// <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+  2659070961U,	// <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  2659734594U,	// <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+  3734140051U,	// <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+  2701166596U,	// <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+  2662389094U,	// <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+  2662389126U,	// <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+  3736794583U,	// <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+  2752304748U,	// <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+  2659070961U,	// <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  1476608026U,	// <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+  1544853294U,	// <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678563118U,	// <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+  3021178482U,	// <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476611382U,	// <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+  1544853658U,	// <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678563482U,	// <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+  2824785449U,	// <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  1678563172U,	// <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+  2556329984U,	// <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+  2686421142U,	// <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+  2562303437U,	// <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+  4094986652U,	// <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+  2556333366U,	// <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+  4094986754U,	// <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+  3798796488U,	// <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+  3776530634U,	// <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+  2556335918U,	// <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+  2886518934U,	// <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+  2556338933U,	// <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+  2691877105U,	// <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+  2886519196U,	// <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+  2886519298U,	// <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+  4095740418U,	// <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+  3659944242U,	// <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+  3769600286U,	// <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+  2886519582U,	// <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+  1482604646U,	// <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+  1482605302U,	// <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+  2556348008U,	// <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+  3020736924U,	// <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482607926U,	// <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+  3020737026U,	// <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598154746U,	// <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+  2598155258U,	// <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+  1482610478U,	// <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+  3692341398U,	// <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+  2635851999U,	// <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+  3636069840U,	// <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+  2691877276U,	// <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+  3961522690U,	// <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+  3826797058U,	// <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+  3703622282U,	// <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+  3769600452U,	// <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+  2640497430U,	// <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+  3962194070U,	// <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+  2232617112U,	// <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+  2232690849U,	// <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+  4095314332U,	// <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+  3962194434U,	// <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+  2691877378U,	// <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+  3826765110U,	// <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+  3665941518U,	// <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+  2691877405U,	// <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+  3630112870U,	// <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+  3630113526U,	// <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+  4035199734U,	// <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+  3769600578U,	// <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+  2232846516U,	// <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+  3779037780U,	// <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+  2718714461U,	// <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+  2706106975U,	// <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+  2233141464U,	// <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+  2691877496U,	// <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+  3727511914U,	// <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+  3765619338U,	// <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+  3765619347U,	// <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+  3765987996U,	// <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+  3306670270U,	// <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+  3792456365U,	// <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+  2706770608U,	// <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+  2706844345U,	// <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+  3769600707U,	// <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+  2659742787U,	// <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+  3636102612U,	// <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+  3769600740U,	// <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+  3769600747U,	// <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+  3769600758U,	// <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+  3659993400U,	// <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+  3781176065U,	// <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+  2664388218U,	// <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+  1482653798U,	// <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+  1482654460U,	// <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+  2556397160U,	// <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+  3021179292U,	// <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482657078U,	// <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+  3021179394U,	// <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598203898U,	// <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+  2708097874U,	// <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+  1482659630U,	// <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+  2617278468U,	// <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+  2618605670U,	// <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2618605734U,	// <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+  3642091695U,	// <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+  2753134796U,	// <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+  2718714770U,	// <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+  3021245750U,	// <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  3665982483U,	// <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+  3021245768U,	// <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2568355942U,	// <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+  3692348212U,	// <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+  3692348310U,	// <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+  2568358064U,	// <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+  2568359222U,	// <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+  1812778294U,	// <0,4,1,5>: Cost 2 vzipl LHS, RHS
+  3022671158U,	// <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+  2592248852U,	// <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1812778537U,	// <0,4,1,u>: Cost 2 vzipl LHS, RHS
+  2568364134U,	// <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+  2238573423U,	// <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+  3692349032U,	// <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+  2631214761U,	// <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+  2568367414U,	// <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+  2887028022U,	// <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+  1946996022U,	// <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U,	// <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1946996040U,	// <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+  3692349590U,	// <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+  3826878614U,	// <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+  3826878625U,	// <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+  3692349852U,	// <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+  3692349954U,	// <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+  3826878978U,	// <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+  4095200566U,	// <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+  3713583814U,	// <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+  3692350238U,	// <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+  2550464552U,	// <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+  3962194914U,	// <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+  3693677631U,	// <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+  3642124467U,	// <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+  2718715088U,	// <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+  2618608950U,	// <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+  2753137974U,	// <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+  3666015255U,	// <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+  2618609193U,	// <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+  2568388710U,	// <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+  2568389526U,	// <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+  3636159963U,	// <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+  2568390836U,	// <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+  2568391990U,	// <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+  2718715180U,	// <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+  1618136374U,	// <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592281624U,	// <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+  1618136392U,	// <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2550480938U,	// <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+  3826880801U,	// <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+  2562426332U,	// <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+  3786190181U,	// <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+  2718715252U,	// <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+  3826881165U,	// <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+  2712669568U,	// <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+  2657760081U,	// <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+  2718715284U,	// <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+  3654090854U,	// <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+  3934229326U,	// <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+  3734156437U,	// <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+  3734820070U,	// <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+  3654094134U,	// <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+  2713259464U,	// <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2713333201U,	// <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+  3654095866U,	// <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+  2713259464U,	// <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2568413286U,	// <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+  2618611502U,	// <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2753140526U,	// <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+  2568415415U,	// <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+  2568416566U,	// <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+  1817423158U,	// <0,4,u,5>: Cost 2 vzipl LHS, RHS
+  1947438390U,	// <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+  2592306203U,	// <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+  1947438408U,	// <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+  3630219264U,	// <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+  2625912934U,	// <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  3692355748U,	// <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+  3693019384U,	// <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+  3630222646U,	// <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+  3699655062U,	// <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+  2718715508U,	// <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+  3087011126U,	// <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+  2625913501U,	// <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+  1500659814U,	// <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+  2886520528U,	// <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+  2574403176U,	// <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+  2574403734U,	// <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+  1500662674U,	// <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+  2886520836U,	// <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+  2886520930U,	// <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+  2718715600U,	// <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+  1500665646U,	// <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+  2556493926U,	// <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+  2244546120U,	// <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+  3692357256U,	// <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+  2568439994U,	// <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+  2556497206U,	// <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+  3020738564U,	// <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+  4027877161U,	// <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+  3093220662U,	// <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3093220663U,	// <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3699656854U,	// <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+  3699656927U,	// <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+  3699657006U,	// <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+  3699657116U,	// <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+  2637859284U,	// <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+  3790319453U,	// <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+  3699657354U,	// <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+  2716725103U,	// <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+  2716798840U,	// <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+  2661747602U,	// <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+  3630252810U,	// <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+  3636225507U,	// <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+  3716910172U,	// <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+  3962195892U,	// <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+  2625916214U,	// <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  3718901071U,	// <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+  2718715846U,	// <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+  2625916457U,	// <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+  3791278034U,	// <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+  3791351771U,	// <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+  3318386260U,	// <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+  3791499245U,	// <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+  3318533734U,	// <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+  2718715908U,	// <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+  2657767522U,	// <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+  2718715928U,	// <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+  2718715937U,	// <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+  2592358502U,	// <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+  3792015404U,	// <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+  3731509754U,	// <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+  3785748546U,	// <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+  2592361782U,	// <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+  2592362594U,	// <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+  3785748576U,	// <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+  1644974178U,	// <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+  1645047915U,	// <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+  2562506854U,	// <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+  2562507670U,	// <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+  2562508262U,	// <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+  3636250774U,	// <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+  2562510134U,	// <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+  2718716072U,	// <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+  2718716074U,	// <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+  2719379635U,	// <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+  2562512686U,	// <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+  1500717158U,	// <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+  2625918766U,	// <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  2719674583U,	// <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+  2568489152U,	// <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+  1500720025U,	// <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+  2625919130U,	// <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  2586407243U,	// <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+  1646301444U,	// <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+  1646375181U,	// <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+  2586411110U,	// <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+  2619949158U,	// <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2619949220U,	// <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+  3785748789U,	// <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+  2619949386U,	// <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+  2586415202U,	// <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+  2586415436U,	// <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+  2952793398U,	// <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+  2619949725U,	// <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562531430U,	// <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+  3693691700U,	// <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+  2886521338U,	// <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+  3693691864U,	// <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+  2562534710U,	// <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+  2580450932U,	// <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+  2886521656U,	// <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+  2966736182U,	// <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  2966736183U,	// <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+  1500741734U,	// <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+  2250518817U,	// <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+  2574485096U,	// <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+  2631894694U,	// <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+  1500744604U,	// <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+  2574487248U,	// <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+  3020739384U,	// <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+  2954136886U,	// <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+  1500747566U,	// <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+  3693693078U,	// <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+  3705637136U,	// <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+  3705637192U,	// <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+  3693693340U,	// <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+  2637867477U,	// <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+  3705637424U,	// <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+  3666154056U,	// <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+  2722697800U,	// <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+  2722771537U,	// <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+  2562556006U,	// <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+  4095316257U,	// <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+  2562557420U,	// <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+  3636299926U,	// <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+  2562559286U,	// <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+  2619952438U,	// <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2723287696U,	// <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+  4027895094U,	// <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+  2619952681U,	// <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+  2718716594U,	// <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3648250774U,	// <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+  3792458436U,	// <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+  3705638767U,	// <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+  3648252831U,	// <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+  3797619416U,	// <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+  3792458472U,	// <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+  4035202358U,	// <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+  2718716594U,	// <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3786412796U,	// <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+  3792458504U,	// <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+  3728200126U,	// <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+  3798135575U,	// <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+  3786412836U,	// <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+  3792458543U,	// <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+  2718716728U,	// <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+  2718716738U,	// <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+  2718716747U,	// <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+  2718716750U,	// <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+  2724909910U,	// <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+  3636323823U,	// <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+  2725057384U,	// <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+  2718716790U,	// <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+  2718716800U,	// <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+  3792458629U,	// <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+  2725352332U,	// <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+  2718716822U,	// <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+  1500790886U,	// <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+  2619954990U,	// <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562590192U,	// <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+  2725721017U,	// <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+  1500793762U,	// <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+  2619955354U,	// <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2725942228U,	// <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+  2954186038U,	// <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+  1500796718U,	// <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+  2256401391U,	// <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+  2632564838U,	// <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256548865U,	// <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+  3700998396U,	// <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+  2718716952U,	// <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+  2718716962U,	// <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+  2621284845U,	// <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+  3904685542U,	// <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+  2632565405U,	// <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256409584U,	// <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+  3706307380U,	// <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+  2632565654U,	// <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+  3769603168U,	// <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+  2256704532U,	// <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+  3769603184U,	// <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+  3700999366U,	// <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+  2886522476U,	// <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+  2256999480U,	// <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+  2586501222U,	// <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+  1182749690U,	// <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+  3636356595U,	// <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+  2727711916U,	// <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+  2586504502U,	// <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+  2632566606U,	// <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+  2586505559U,	// <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+  3020740204U,	// <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+  1183265849U,	// <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+  3701000342U,	// <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+  3706308849U,	// <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+  3330315268U,	// <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+  3706309020U,	// <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+  3706309122U,	// <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+  3712281127U,	// <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+  2639202936U,	// <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  3802412321U,	// <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+  2640530202U,	// <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+  3654287462U,	// <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+  2256507900U,	// <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+  2256581637U,	// <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+  3660262008U,	// <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+  3786413405U,	// <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+  2632568118U,	// <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  3718917457U,	// <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+  3787003255U,	// <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+  2632568361U,	// <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+  3706310268U,	// <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+  3792459156U,	// <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+  3330331654U,	// <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+  3722899255U,	// <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+  2256737304U,	// <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+  3724226521U,	// <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+  2718717377U,	// <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+  2729997763U,	// <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+  2720044499U,	// <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+  3712946517U,	// <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+  2256524286U,	// <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+  3792459246U,	// <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+  3796440567U,	// <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+  3654307126U,	// <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+  2656457394U,	// <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+  3792459281U,	// <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+  2730661396U,	// <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+  2658448293U,	// <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+  3787003431U,	// <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+  3654312854U,	// <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+  3654313446U,	// <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+  3804771905U,	// <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+  3654315318U,	// <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+  3654315651U,	// <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+  3660288348U,	// <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+  2718717548U,	// <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+  2664420990U,	// <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+  2256466935U,	// <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+  1182798848U,	// <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+  2256614409U,	// <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+  2731693714U,	// <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+  2256761883U,	// <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+  2632571034U,	// <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  2669066421U,	// <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+  2731988662U,	// <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+  1183315007U,	// <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+  135053414U,	// <0,u,0,0>: Cost 1 vdup0 LHS
+  1544896614U,	// <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1678999654U,	// <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+  2691880677U,	// <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+  1476988214U,	// <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+  2718791419U,	// <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+  3021248666U,	// <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2592535607U,	// <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+  135053414U,	// <0,u,0,u>: Cost 1 vdup0 LHS
+  1476993097U,	// <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+  1812780846U,	// <0,u,1,1>: Cost 2 vzipl LHS, LHS
+  1618138926U,	// <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2752742134U,	// <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  1476996406U,	// <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+  1812781210U,	// <0,u,1,5>: Cost 2 vzipl LHS, RHS
+  2887006416U,	// <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+  2966736200U,	// <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  1812781413U,	// <0,u,1,u>: Cost 2 vzipl LHS, LHS
+  1482973286U,	// <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+  1482973987U,	// <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+  1946998574U,	// <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+  835584U,	// <0,u,2,3>: Cost 0 copy LHS
+  1482976566U,	// <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+  3020781631U,	// <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+  1946998938U,	// <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+  1518810169U,	// <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+  835584U,	// <0,u,2,u>: Cost 0 copy LHS
+  2618640534U,	// <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+  2752743574U,	// <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  2636556597U,	// <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+  2752743836U,	// <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618640898U,	// <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+  2752743938U,	// <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  2639202936U,	// <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  2639874762U,	// <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+  2752743637U,	// <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562703462U,	// <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+  2888455982U,	// <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021575982U,	// <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  2568677591U,	// <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+  2562706742U,	// <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+  1544899894U,	// <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679002934U,	// <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+  2718718033U,	// <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+  1679002952U,	// <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+  2568683622U,	// <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+  2568684438U,	// <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+  3765622902U,	// <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+  2691881087U,	// <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+  2568686902U,	// <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+  2650492890U,	// <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+  1618139290U,	// <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2824834358U,	// <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+  1618139308U,	// <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592579686U,	// <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+  2262496983U,	// <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+  2654474688U,	// <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+  2691881168U,	// <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+  2592582966U,	// <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+  2656465587U,	// <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+  2657129220U,	// <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+  1584051029U,	// <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+  1584714662U,	// <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+  2562728038U,	// <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+  2562728854U,	// <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+  2562729473U,	// <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+  2661111018U,	// <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+  2562731318U,	// <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+  2718718258U,	// <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+  2586620261U,	// <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+  2657793644U,	// <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+  2562733870U,	// <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+  135053414U,	// <0,u,u,0>: Cost 1 vdup0 LHS
+  1544902446U,	// <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1679005486U,	// <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+  835584U,	// <0,u,u,3>: Cost 0 copy LHS
+  1483025718U,	// <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+  1544902810U,	// <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679005850U,	// <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+  1518859327U,	// <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+  835584U,	// <0,u,u,u>: Cost 0 copy LHS
+  2689744896U,	// <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+  1610694666U,	// <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+  2689744916U,	// <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+  2619310332U,	// <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+  2684657701U,	// <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+  2620637598U,	// <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+  3708977654U,	// <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+  3666351168U,	// <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+  1611210825U,	// <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+  2556780646U,	// <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+  2556781355U,	// <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+  1616003174U,	// <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  3693052888U,	// <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+  2556783926U,	// <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+  2580672143U,	// <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+  2724839566U,	// <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+  3654415354U,	// <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+  1616003228U,	// <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+  2685690019U,	// <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+  2685763756U,	// <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+  2698297524U,	// <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+  2685911230U,	// <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+  2689745100U,	// <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+  3764814038U,	// <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+  2724839640U,	// <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+  2592625658U,	// <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+  2686279915U,	// <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+  3087843328U,	// <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  3087843338U,	// <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+  67944550U,	// <1,0,3,2>: Cost 1 vrev LHS
+  2568743135U,	// <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+  2562772278U,	// <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+  4099850454U,	// <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+  3704998538U,	// <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+  2592633923U,	// <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+  68386972U,	// <1,0,3,u>: Cost 1 vrev LHS
+  2620640146U,	// <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+  2689745234U,	// <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+  2689745244U,	// <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+  3760980320U,	// <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+  3761054057U,	// <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+  2619313462U,	// <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  3761201531U,	// <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+  3666383940U,	// <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+  2619313705U,	// <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+  4029300736U,	// <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+  2895249510U,	// <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+  3028287590U,	// <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3642501345U,	// <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+  2215592058U,	// <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+  3724242907U,	// <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+  3724906540U,	// <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+  3911118134U,	// <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+  3028287644U,	// <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3762086375U,	// <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+  2698297846U,	// <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+  3760022015U,	// <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+  3642509538U,	// <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+  3762381323U,	// <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+  3730215604U,	// <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+  3730879237U,	// <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+  2657801046U,	// <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+  2658464679U,	// <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+  2659128312U,	// <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+  4047898278U,	// <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+  2215460970U,	// <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+  3734861035U,	// <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+  3731543398U,	// <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+  3736188301U,	// <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+  2663110110U,	// <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+  3731543660U,	// <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+  2664437376U,	// <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+  3087884288U,	// <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  1616003730U,	// <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+  67985515U,	// <1,0,u,2>: Cost 1 vrev LHS
+  2689893028U,	// <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+  2689745586U,	// <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+  2619316378U,	// <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  2669082807U,	// <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+  2592674888U,	// <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+  68427937U,	// <1,0,u,u>: Cost 1 vrev LHS
+  1543585802U,	// <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+  1548894310U,	// <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+  2618654892U,	// <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+  2689745654U,	// <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+  2622636370U,	// <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+  2620645791U,	// <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+  3696378367U,	// <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+  3666424905U,	// <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+  1548894866U,	// <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+  1483112550U,	// <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U,	// <1,1,1,1>: Cost 1 vdup1 LHS
+  2622636950U,	// <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+  2622637016U,	// <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+  1483115830U,	// <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2622637200U,	// <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+  2622637263U,	// <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+  2592691274U,	// <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+  202162278U,	// <1,1,1,u>: Cost 1 vdup1 LHS
+  2550890588U,	// <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+  2617329183U,	// <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+  2622637672U,	// <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+  2622637734U,	// <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+  2550893878U,	// <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+  3696379744U,	// <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+  2622638010U,	// <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+  3804554170U,	// <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+  2622638139U,	// <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+  2622638230U,	// <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+  3087844148U,	// <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+  4161585244U,	// <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+  2014101606U,	// <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+  2622638594U,	// <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+  2689745920U,	// <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+  3763487753U,	// <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+  2592707660U,	// <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+  2014101611U,	// <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+  2556878950U,	// <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+  2221335351U,	// <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+  3696380988U,	// <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+  3763487805U,	// <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+  2556882230U,	// <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+  1548897590U,	// <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2758184246U,	// <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+  3666457677U,	// <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+  1548897833U,	// <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+  2693653615U,	// <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+  2617331408U,	// <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+  4029302934U,	// <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+  2689746064U,	// <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+  2221564755U,	// <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+  2955559250U,	// <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+  2617331810U,	// <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+  2825293110U,	// <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  2689746109U,	// <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+  3696382241U,	// <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+  2689746127U,	// <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+  2617332218U,	// <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+  3763487969U,	// <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+  3696382605U,	// <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+  4029309266U,	// <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+  2617332536U,	// <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+  2724840702U,	// <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+  2725504263U,	// <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+  2617332720U,	// <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+  2659800138U,	// <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  3691074717U,	// <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+  4167811174U,	// <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+  2617333094U,	// <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+  3295396702U,	// <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+  3803891014U,	// <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+  2617333356U,	// <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+  2659800138U,	// <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  1483112550U,	// <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U,	// <1,1,u,1>: Cost 1 vdup1 LHS
+  2622642056U,	// <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+  2014142566U,	// <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+  1483115830U,	// <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  1548900506U,	// <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2622642384U,	// <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+  2825293353U,	// <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  202162278U,	// <1,1,u,u>: Cost 1 vdup1 LHS
+  2635251712U,	// <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+  1561509990U,	// <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+  2618663085U,	// <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+  2696529358U,	// <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+  2635252050U,	// <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+  3769533926U,	// <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+  2621317617U,	// <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+  2659140170U,	// <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+  1561510557U,	// <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+  2623308516U,	// <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+  2635252532U,	// <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+  2631271318U,	// <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+  2958180454U,	// <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+  2550959414U,	// <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+  2635252880U,	// <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+  2635252952U,	// <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+  3732882731U,	// <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+  2958180459U,	// <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+  2629281213U,	// <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+  2635253280U,	// <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+  2618664552U,	// <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+  2689746546U,	// <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+  3764815485U,	// <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+  3760023176U,	// <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+  2635253690U,	// <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+  2659141610U,	// <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+  2689746591U,	// <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+  403488870U,	// <1,2,3,0>: Cost 1 vext1 LHS, LHS
+  1477231350U,	// <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477232232U,	// <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477233052U,	// <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+  403492150U,	// <1,2,3,4>: Cost 1 vext1 LHS, RHS
+  1525010128U,	// <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1525010938U,	// <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525011450U,	// <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  403494702U,	// <1,2,3,u>: Cost 1 vext1 LHS, LHS
+  2641226607U,	// <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+  3624723446U,	// <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+  3301123609U,	// <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+  2598759198U,	// <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+  2659142864U,	// <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+  1561513270U,	// <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  2659143028U,	// <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+  2659143112U,	// <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+  1561513513U,	// <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+  2550988902U,	// <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+  2550989824U,	// <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+  3624732264U,	// <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+  2955559014U,	// <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2550992182U,	// <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+  2659143684U,	// <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+  2659143778U,	// <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+  2659143848U,	// <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+  2550994734U,	// <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+  2700289945U,	// <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+  2635256232U,	// <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+  2659144186U,	// <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+  2689746874U,	// <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+  3763488705U,	// <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+  3763488716U,	// <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+  2659144504U,	// <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+  2657817432U,	// <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+  2689746919U,	// <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+  1585402874U,	// <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+  2659144770U,	// <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+  3708998858U,	// <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+  2635257059U,	// <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+  2659145062U,	// <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+  3732886916U,	// <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+  3732886998U,	// <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+  2659145255U,	// <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+  1590711938U,	// <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+  403529835U,	// <1,2,u,0>: Cost 1 vext1 LHS, LHS
+  1477272310U,	// <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477273192U,	// <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477273750U,	// <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+  403533110U,	// <1,2,u,4>: Cost 1 vext1 LHS, RHS
+  1561516186U,	// <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  1525051898U,	// <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525052410U,	// <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  403535662U,	// <1,2,u,u>: Cost 1 vext1 LHS, LHS
+  2819407872U,	// <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+  1551564902U,	// <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+  2819408630U,	// <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+  2619334911U,	// <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+  2625306962U,	// <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+  3832725879U,	// <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+  3699048959U,	// <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+  3776538827U,	// <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+  1551565469U,	// <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+  2618671862U,	// <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+  2819408692U,	// <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+  2624643975U,	// <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+  1745666150U,	// <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+  2557005110U,	// <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+  2625307792U,	// <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+  3698386127U,	// <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+  2592838748U,	// <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+  1745666155U,	// <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+  2819408790U,	// <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2625308193U,	// <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+  2819408036U,	// <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819851890U,	// <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819408794U,	// <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  3893149890U,	// <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+  2819408076U,	// <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  3772041583U,	// <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+  2819408042U,	// <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  1483276390U,	// <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+  1483277128U,	// <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+  2557019752U,	// <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+  2819408856U,	// <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+  1483279670U,	// <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+  2819409614U,	// <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2598826490U,	// <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+  3087844352U,	// <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+  1483282222U,	// <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+  2568970342U,	// <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+  2568971224U,	// <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+  3832761290U,	// <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+  2233428219U,	// <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+  2568973622U,	// <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+  1551568182U,	// <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410434U,	// <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+  3666605151U,	// <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+  1551568425U,	// <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+  2563006566U,	// <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+  2568979456U,	// <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+  2563008035U,	// <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+  2233436412U,	// <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+  2563009846U,	// <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+  2867187716U,	// <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+  2655834214U,	// <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+  1745669430U,	// <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1745669431U,	// <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2867187810U,	// <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+  3699052931U,	// <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+  2654507460U,	// <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+  3766291091U,	// <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+  2655834726U,	// <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+  3923384562U,	// <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+  2657161992U,	// <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+  2819852218U,	// <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819852219U,	// <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  2706926275U,	// <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+  2659816524U,	// <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+  3636766245U,	// <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+  2867187903U,	// <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+  2625312102U,	// <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+  2867188598U,	// <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+  3728250344U,	// <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+  2867187880U,	// <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+  2707516171U,	// <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+  1483317350U,	// <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+  1483318093U,	// <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+  2819410718U,	// <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+  1745666717U,	// <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+  1483320630U,	// <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+  1551571098U,	// <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410758U,	// <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+  1745669673U,	// <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+  1745666722U,	// <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+  2617352205U,	// <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+  2619342950U,	// <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  3692421295U,	// <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+  2619343104U,	// <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2617352530U,	// <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+  1634880402U,	// <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+  2713930652U,	// <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+  3732898396U,	// <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+  1635101613U,	// <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+  3693085430U,	// <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+  2623988535U,	// <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+  3693085590U,	// <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+  3692422134U,	// <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+  3693085726U,	// <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+  2892401974U,	// <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+  3026619702U,	// <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  3800206324U,	// <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+  2892402217U,	// <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+  3966978927U,	// <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+  3966979018U,	// <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+  3693086312U,	// <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+  2635269798U,	// <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+  3966979280U,	// <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+  2893204790U,	// <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  3693086650U,	// <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+  3666662502U,	// <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+  2893205033U,	// <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+  2563063910U,	// <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+  2563064730U,	// <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+  2563065386U,	// <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+  3693087132U,	// <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+  2619345410U,	// <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+  3087843666U,	// <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+  3087843676U,	// <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+  3666670695U,	// <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+  3087843669U,	// <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+  2620672914U,	// <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+  3630842706U,	// <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+  3313069003U,	// <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+  3642788100U,	// <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+  2713930960U,	// <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+  2619346230U,	// <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+  2713930980U,	// <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+  3736882642U,	// <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+  2619346473U,	// <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+  2557108326U,	// <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+  2557109075U,	// <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+  2598913774U,	// <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+  3630852246U,	// <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+  2557111606U,	// <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+  2895252790U,	// <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616006454U,	// <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  3899059510U,	// <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+  1616006472U,	// <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2557116518U,	// <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+  2557117236U,	// <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+  3630859880U,	// <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+  2569062550U,	// <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+  2557119798U,	// <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+  3763490174U,	// <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+  3763490183U,	// <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+  2712751498U,	// <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  2557122350U,	// <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+  2659161084U,	// <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+  3732903040U,	// <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+  3734230174U,	// <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+  3734893807U,	// <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+  3660729654U,	// <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+  3786493384U,	// <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+  2713341394U,	// <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+  3660731386U,	// <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+  2664470148U,	// <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+  2557132902U,	// <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+  2619348782U,	// <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  2563106351U,	// <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+  2713783816U,	// <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+  2622666815U,	// <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+  1640189466U,	// <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+  1616006697U,	// <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  2712751498U,	// <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  1616006715U,	// <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2620014592U,	// <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+  1546272870U,	// <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2618687664U,	// <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+  3693093120U,	// <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+  1546273106U,	// <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2620678563U,	// <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+  2714668660U,	// <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+  3772042877U,	// <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+  1546273437U,	// <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620015350U,	// <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+  2620015412U,	// <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+  2620015510U,	// <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+  2618688512U,	// <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+  2620015677U,	// <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+  2620015727U,	// <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+  2620015859U,	// <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+  3093728566U,	// <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+  2620015981U,	// <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+  3692430816U,	// <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+  2620016163U,	// <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+  2620016232U,	// <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+  2620016294U,	// <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+  3693758221U,	// <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+  3692431209U,	// <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+  2620016570U,	// <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+  4173598006U,	// <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+  2620016699U,	// <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+  2620016790U,	// <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+  2569110672U,	// <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+  3693758785U,	// <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+  2620017052U,	// <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+  2620017154U,	// <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+  3135623172U,	// <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+  4161587048U,	// <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+  2014104886U,	// <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2014104887U,	// <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2620017554U,	// <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+  2620017634U,	// <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  3693759551U,	// <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+  3642861837U,	// <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+  2575092710U,	// <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+  1546276150U,	// <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2759855414U,	// <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+  2713931718U,	// <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+  1546276393U,	// <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+  2557182054U,	// <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+  2557182812U,	// <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+  3630925347U,	// <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+  4029301675U,	// <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+  2557185334U,	// <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+  2713931780U,	// <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+  2667794530U,	// <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+  2713931800U,	// <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+  2557187886U,	// <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+  2718208036U,	// <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+  2620019115U,	// <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+  2667794938U,	// <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+  3787673666U,	// <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+  3693761165U,	// <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+  3319279297U,	// <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+  2667795256U,	// <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+  2713931874U,	// <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+  2713931883U,	// <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+  2557198438U,	// <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+  2557199156U,	// <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+  2569143974U,	// <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+  2569144592U,	// <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+  2557201718U,	// <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+  2713931944U,	// <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+  3787673770U,	// <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+  2719387828U,	// <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+  2557204270U,	// <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+  2620020435U,	// <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+  1546278702U,	// <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620020616U,	// <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+  2620020668U,	// <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+  1594054682U,	// <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+  1546279066U,	// <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2620020944U,	// <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+  2014145846U,	// <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+  2014145847U,	// <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+  3692437504U,	// <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+  2618695782U,	// <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  2618695857U,	// <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+  3794161970U,	// <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+  2620023122U,	// <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+  2620686756U,	// <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+  2621350389U,	// <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+  4028599606U,	// <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+  2618696349U,	// <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+  3692438262U,	// <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+  2625995572U,	// <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+  3692438422U,	// <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+  3692438488U,	// <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+  2625995820U,	// <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+  3692438672U,	// <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+  3692438720U,	// <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+  2958183734U,	// <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  2958183735U,	// <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+  2721526201U,	// <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+  3692439097U,	// <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+  3692439144U,	// <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+  3692439206U,	// <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+  3636948278U,	// <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+  3787674092U,	// <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+  2618697658U,	// <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+  2970799414U,	// <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2970799415U,	// <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+  2563211366U,	// <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+  3699738854U,	// <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+  2563212860U,	// <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+  3692439964U,	// <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+  2563214646U,	// <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+  4191820018U,	// <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+  2587103648U,	// <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+  3087845306U,	// <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  3087845307U,	// <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+  3693767570U,	// <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+  3693767650U,	// <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+  3636962877U,	// <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+  3325088134U,	// <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+  3693767898U,	// <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+  2618699062U,	// <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  3833670966U,	// <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+  4028632374U,	// <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+  2618699305U,	// <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+  3693768264U,	// <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+  3630998373U,	// <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+  3636971070U,	// <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+  3642943767U,	// <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+  3693768628U,	// <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+  3732918276U,	// <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+  2620690530U,	// <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+  2955562294U,	// <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+  2955562295U,	// <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+  2724180733U,	// <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+  3631006566U,	// <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+  3631007674U,	// <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+  3692442184U,	// <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+  3631009078U,	// <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+  3787674416U,	// <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+  2713932600U,	// <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+  2713932610U,	// <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+  2713932619U,	// <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+  1651102542U,	// <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+  2724918103U,	// <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+  2698302306U,	// <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+  3642960153U,	// <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+  2713932662U,	// <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+  2725213051U,	// <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+  2724844426U,	// <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+  4035956022U,	// <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+  1651692438U,	// <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+  1651766175U,	// <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+  2618701614U,	// <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  3135663508U,	// <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+  3692443580U,	// <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+  2713932743U,	// <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+  2618701978U,	// <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  2622683344U,	// <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+  3087886266U,	// <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  1652356071U,	// <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+  2726171632U,	// <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+  2626666598U,	// <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  3695100067U,	// <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+  3707044102U,	// <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+  2726466580U,	// <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+  3654921933U,	// <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+  2621358582U,	// <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+  2622022215U,	// <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+  2626667165U,	// <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+  2593128550U,	// <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+  2626667316U,	// <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+  3700409238U,	// <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+  2257294428U,	// <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+  2593131830U,	// <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+  2626667646U,	// <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+  2627331279U,	// <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+  2593133696U,	// <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+  2628658545U,	// <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+  2587164774U,	// <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+  3701073445U,	// <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+  3700409960U,	// <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+  2638612134U,	// <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+  2587168054U,	// <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+  3706382167U,	// <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+  2587169192U,	// <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+  3660911610U,	// <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+  2587170606U,	// <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+  1507459174U,	// <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+  2569257984U,	// <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+  2581202536U,	// <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+  2569259294U,	// <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+  1507462454U,	// <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+  1507462864U,	// <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+  2581205498U,	// <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+  2581206010U,	// <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+  1507465006U,	// <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+  2728826164U,	// <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+  3654951732U,	// <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+  3330987094U,	// <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+  3331060831U,	// <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+  3787674971U,	// <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+  2626669878U,	// <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+  3785979241U,	// <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+  3787085176U,	// <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+  2626670121U,	// <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+  2569273446U,	// <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+  2569274368U,	// <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+  3643016808U,	// <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+  2569275680U,	// <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+  2569276726U,	// <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+  4102034790U,	// <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+  2651222067U,	// <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+  3899378998U,	// <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+  2569279278U,	// <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+  2730153430U,	// <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+  2724845022U,	// <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+  3643025338U,	// <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+  3643025697U,	// <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+  3643026742U,	// <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+  3654971091U,	// <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+  3787675153U,	// <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+  2724845076U,	// <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+  2725508637U,	// <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+  2730817063U,	// <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+  3631088436U,	// <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+  3660949158U,	// <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+  3801904705U,	// <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+  3631090998U,	// <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+  2662503828U,	// <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+  3660951981U,	// <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+  2713933420U,	// <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+  2731406959U,	// <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+  1507500134U,	// <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+  2626672430U,	// <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  2581243496U,	// <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+  2569300259U,	// <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+  1507503414U,	// <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+  1507503829U,	// <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+  2581246458U,	// <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+  2581246970U,	// <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+  1507505966U,	// <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+  1543643153U,	// <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+  1546297446U,	// <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+  2819448852U,	// <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+  2619375876U,	// <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+  1546297685U,	// <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+  1658771190U,	// <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+  2736789248U,	// <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+  2659189376U,	// <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+  1546298013U,	// <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+  1483112550U,	// <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U,	// <1,u,1,1>: Cost 1 vdup1 LHS
+  1616009006U,	// <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  1745707110U,	// <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+  1483115830U,	// <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2620040336U,	// <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+  3026622618U,	// <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  2958183752U,	// <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  202162278U,	// <1,u,1,u>: Cost 1 vdup1 LHS
+  2819449750U,	// <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2893207342U,	// <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+  2819448996U,	// <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819450482U,	// <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819449754U,	// <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  2893207706U,	// <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  2819449036U,	// <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  2970799432U,	// <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2819449002U,	// <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  403931292U,	// <1,u,3,0>: Cost 1 vext1 LHS, LHS
+  1477673718U,	// <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  115726126U,	// <1,u,3,2>: Cost 1 vrev LHS
+  2014102173U,	// <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+  403934518U,	// <1,u,3,4>: Cost 1 vext1 LHS, RHS
+  1507536601U,	// <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+  1525453306U,	// <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  2014105129U,	// <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+  403937070U,	// <1,u,3,u>: Cost 1 vext1 LHS, LHS
+  2620042157U,	// <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+  2620042237U,	// <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+  2263217967U,	// <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+  2569341224U,	// <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+  2569342262U,	// <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+  1546300726U,	// <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  2819449180U,	// <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+  2724845649U,	// <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+  1546300969U,	// <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+  2551431270U,	// <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+  2551432192U,	// <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+  3028293422U,	// <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  2955559068U,	// <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2551434550U,	// <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+  2895255706U,	// <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616009370U,	// <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710390U,	// <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+  1745710391U,	// <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+  2653221159U,	// <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+  2725509303U,	// <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+  2659193338U,	// <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+  2689751248U,	// <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+  2867228774U,	// <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+  3764820194U,	// <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+  2657202957U,	// <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+  2819450810U,	// <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819450811U,	// <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  1585452032U,	// <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+  2557420340U,	// <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+  2569365158U,	// <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+  2569365803U,	// <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+  2557422902U,	// <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+  2662512021U,	// <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+  2724845884U,	// <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+  2659194476U,	// <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+  1590761096U,	// <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+  403972257U,	// <1,u,u,0>: Cost 1 vext1 LHS, LHS
+  202162278U,	// <1,u,u,1>: Cost 1 vdup1 LHS
+  115767091U,	// <1,u,u,2>: Cost 1 vrev LHS
+  1745707677U,	// <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+  403975478U,	// <1,u,u,4>: Cost 1 vext1 LHS, RHS
+  1546303642U,	// <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  1616009613U,	// <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710633U,	// <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+  403978030U,	// <1,u,u,u>: Cost 1 vext1 LHS, LHS
+  2551463936U,	// <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+  2685698058U,	// <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+  1610776596U,	// <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+  2619384069U,	// <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+  2551467318U,	// <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+  3899836596U,	// <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+  2621374968U,	// <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+  4168271334U,	// <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+  1611219018U,	// <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+  2551472138U,	// <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+  2690564186U,	// <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+  1611956326U,	// <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2826092646U,	// <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+  2551475510U,	// <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+  3692463248U,	// <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+  2587308473U,	// <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+  3661050874U,	// <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+  1611956380U,	// <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  1477738598U,	// <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+  2551481078U,	// <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+  2551481796U,	// <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+  2551482518U,	// <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+  1477741878U,	// <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+  2551484112U,	// <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+  2551484759U,	// <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+  2551485434U,	// <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+  1477744430U,	// <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+  2953625600U,	// <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2953627302U,	// <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  2953625764U,	// <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+  4027369695U,	// <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+  3625233718U,	// <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+  3899836110U,	// <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+  4032012618U,	// <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+  3899835392U,	// <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+  2953625770U,	// <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+  2551496806U,	// <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+  2685698386U,	// <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+  2685698396U,	// <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+  3625240726U,	// <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+  2551500086U,	// <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+  2618723638U,	// <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765409590U,	// <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  3799990664U,	// <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+  2685698450U,	// <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+  3625246822U,	// <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+  3289776304U,	// <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+  2690564526U,	// <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+  3289923778U,	// <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+  2216255691U,	// <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+  3726307332U,	// <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+  3726307426U,	// <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+  2826095926U,	// <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  2216550639U,	// <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+  4162420736U,	// <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+  2901885030U,	// <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+  2685698559U,	// <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+  3643173171U,	// <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+  2216263884U,	// <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+  3730289341U,	// <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+  3726308152U,	// <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+  3899836346U,	// <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+  2216558832U,	// <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+  2659202049U,	// <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  3726308437U,	// <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+  2726249034U,	// <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+  3734934772U,	// <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+  3726308710U,	// <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+  3726308814U,	// <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+  3736925671U,	// <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+  3726308972U,	// <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+  2659202049U,	// <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  1477787750U,	// <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+  2953668262U,	// <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  1611956893U,	// <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2551531670U,	// <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+  1477791030U,	// <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+  2618726554U,	// <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765412506U,	// <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  2826096169U,	// <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  1611956947U,	// <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  2569453670U,	// <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+  2619392102U,	// <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+  3759440619U,	// <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+  1616823030U,	// <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+  2569456950U,	// <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+  2690712328U,	// <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+  3661115841U,	// <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+  2622046794U,	// <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+  1617191715U,	// <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+  2551545958U,	// <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+  2685698868U,	// <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+  2628682646U,	// <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+  2685698888U,	// <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+  2551549238U,	// <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+  3693134992U,	// <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+  3661124034U,	// <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+  3625292794U,	// <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+  2685698933U,	// <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+  2551554150U,	// <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+  3893649571U,	// <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+  2551555688U,	// <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+  2685698966U,	// <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+  2551557430U,	// <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+  3763422123U,	// <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+  3693135802U,	// <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+  2726249402U,	// <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+  2685699011U,	// <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+  2551562342U,	// <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+  2953625610U,	// <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953627798U,	// <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  2953626584U,	// <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+  2551565622U,	// <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+  2953625938U,	// <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U,	// <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  4032013519U,	// <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+  2953625617U,	// <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+  2690565154U,	// <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+  3625313270U,	// <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+  3771532340U,	// <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+  1148404634U,	// <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+  3625315638U,	// <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+  2619395382U,	// <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+  3837242678U,	// <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+  3799991394U,	// <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+  1148773319U,	// <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+  2551578726U,	// <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+  2551579648U,	// <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+  3625321952U,	// <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+  2685699216U,	// <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+  2551582006U,	// <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+  3740913668U,	// <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+  3661156806U,	// <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+  3893652790U,	// <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+  2685699261U,	// <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+  2551586918U,	// <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+  3625329398U,	// <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+  2551588794U,	// <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+  3088679014U,	// <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+  2551590198U,	// <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+  4029382994U,	// <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+  3625333560U,	// <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+  3731624800U,	// <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+  2551592750U,	// <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+  2622051322U,	// <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+  3733615699U,	// <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+  3795125538U,	// <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+  2222171037U,	// <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+  3740915046U,	// <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+  3296060335U,	// <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+  3736933864U,	// <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+  3805300055U,	// <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+  2669827714U,	// <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+  2551603302U,	// <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+  2953666570U,	// <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953668758U,	// <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  1148437406U,	// <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+  2551606582U,	// <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+  2953666898U,	// <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U,	// <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  2669828370U,	// <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+  1148806091U,	// <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+  1543667732U,	// <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+  1548976230U,	// <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  2685699524U,	// <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+  2685699535U,	// <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+  2551614774U,	// <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+  3704422830U,	// <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+  3893657642U,	// <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+  3770574323U,	// <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+  1548976796U,	// <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+  2622718710U,	// <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+  2622718772U,	// <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+  2622718870U,	// <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+  2819915878U,	// <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+  3625364790U,	// <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+  2622719120U,	// <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+  3760031292U,	// <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+  3667170468U,	// <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+  2819915883U,	// <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+  1489829990U,	// <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  2563572470U,	// <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+  269271142U,	// <2,2,2,2>: Cost 1 vdup2 LHS
+  2685699698U,	// <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+  1489833270U,	// <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  2685699720U,	// <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+  2622719930U,	// <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+  2593436837U,	// <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+  269271142U,	// <2,2,2,u>: Cost 1 vdup2 LHS
+  2685699750U,	// <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+  2690565806U,	// <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+  2953627240U,	// <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+  1879883878U,	// <2,2,3,3>: Cost 2 vzipr LHS, LHS
+  2685699790U,	// <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+  3893659342U,	// <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+  2958270812U,	// <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2593445030U,	// <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+  1879883883U,	// <2,2,3,u>: Cost 2 vzipr LHS, LHS
+  2551644262U,	// <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+  3625386742U,	// <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+  2551645902U,	// <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+  3759441686U,	// <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+  2551647542U,	// <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+  1548979510U,	// <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2764901686U,	// <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+  3667195047U,	// <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+  1548979753U,	// <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+  3696463432U,	// <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+  2617413328U,	// <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+  2685699936U,	// <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+  4027383910U,	// <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+  2228201085U,	// <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+  2617413636U,	// <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+  2617413730U,	// <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+  2819919158U,	// <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  2819919159U,	// <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+  3625402554U,	// <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+  3760031652U,	// <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+  2617414138U,	// <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+  2685700026U,	// <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+  3625405750U,	// <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+  3760031692U,	// <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+  3088679116U,	// <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+  2657891169U,	// <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+  2685700071U,	// <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+  2726250474U,	// <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+  3704427616U,	// <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+  2660545701U,	// <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+  4030718054U,	// <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+  2617415014U,	// <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+  3302033032U,	// <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+  3661246929U,	// <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+  2617415276U,	// <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+  2731558962U,	// <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+  1489829990U,	// <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  1548982062U,	// <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  269271142U,	// <2,2,u,2>: Cost 1 vdup2 LHS
+  1879924838U,	// <2,2,u,3>: Cost 2 vzipr LHS, LHS
+  1489833270U,	// <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  1548982426U,	// <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2953666908U,	// <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2819919401U,	// <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  269271142U,	// <2,2,u,u>: Cost 1 vdup2 LHS
+  1544339456U,	// <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  470597734U,	// <2,3,0,1>: Cost 1 vext2 LHS, LHS
+  1548984484U,	// <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2619408648U,	// <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+  1548984658U,	// <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665857454U,	// <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  2622726655U,	// <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2593494188U,	// <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+  470598301U,	// <2,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544340214U,	// <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544340276U,	// <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544340374U,	// <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1548985304U,	// <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2551696694U,	// <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+  1548985488U,	// <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2622727375U,	// <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+  2665858347U,	// <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+  1548985709U,	// <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  2622727613U,	// <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+  2622727711U,	// <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  1544341096U,	// <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544341158U,	// <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  2622727958U,	// <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+  2622728032U,	// <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+  1548986298U,	// <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2665859050U,	// <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+  1548986427U,	// <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1548986518U,	// <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2622728415U,	// <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+  1489913458U,	// <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+  1544341916U,	// <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+  1548986882U,	// <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2665859632U,	// <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+  2234304870U,	// <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+  2958271632U,	// <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  1548987166U,	// <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+  1483948134U,	// <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+  1483948954U,	// <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+  2622729276U,	// <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2557692054U,	// <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+  1483951414U,	// <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+  470601014U,	// <2,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592118644U,	// <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2593526960U,	// <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+  470601257U,	// <2,3,4,u>: Cost 1 vext2 LHS, RHS
+  2551726182U,	// <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+  1592118992U,	// <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2665860862U,	// <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+  2551728642U,	// <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+  1592119238U,	// <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592119300U,	// <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592119394U,	// <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1592119464U,	// <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1592119545U,	// <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+  2622730529U,	// <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2557707164U,	// <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+  1592119802U,	// <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2665861682U,	// <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+  2622730893U,	// <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  2665861810U,	// <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+  1592120120U,	// <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592120142U,	// <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1592120223U,	// <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+  1592120314U,	// <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659890261U,	// <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+  2660553894U,	// <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+  2665862371U,	// <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592120678U,	// <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665862534U,	// <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2665862614U,	// <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+  1592120940U,	// <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592120962U,	// <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1548990163U,	// <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+  470603566U,	// <2,3,u,1>: Cost 1 vext2 LHS, LHS
+  1548990341U,	// <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  1548990396U,	// <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+  1548990527U,	// <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+  470603930U,	// <2,3,u,5>: Cost 1 vext2 LHS, RHS
+  1548990672U,	// <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1592121600U,	// <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+  470604133U,	// <2,3,u,u>: Cost 1 vext2 LHS, LHS
+  2617425942U,	// <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+  2618753126U,	// <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2618753208U,	// <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+  2619416841U,	// <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+  2587593628U,	// <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+  2712832914U,	// <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+  1634962332U,	// <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  3799993252U,	// <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+  1634962332U,	// <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  2619417334U,	// <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+  3692495668U,	// <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+  2625389466U,	// <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+  2826125414U,	// <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+  3699794995U,	// <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+  3692496016U,	// <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+  3763424238U,	// <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+  3667317942U,	// <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+  2826125419U,	// <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+  2629371336U,	// <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+  3699131946U,	// <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+  2630698602U,	// <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+  2618754766U,	// <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+  2826126234U,	// <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+  2899119414U,	// <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+  3033337142U,	// <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+  3800214597U,	// <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+  2899119657U,	// <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+  2635344033U,	// <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+  4032012325U,	// <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+  3692497228U,	// <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+  3692497308U,	// <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+  3001404624U,	// <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+  2953627342U,	// <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2953625804U,	// <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3899868160U,	// <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+  2953625806U,	// <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  2710916266U,	// <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+  3899869648U,	// <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+  3899869658U,	// <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+  3899868930U,	// <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+  2712833232U,	// <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+  2618756406U,	// <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+  2765737270U,	// <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+  4168304426U,	// <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+  2618756649U,	// <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+  2551800011U,	// <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+  2569716470U,	// <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+  2563745405U,	// <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+  2569718102U,	// <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+  2551803190U,	// <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+  3625545732U,	// <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+  1611959606U,	// <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128694U,	// <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959624U,	// <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478066278U,	// <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+  2551808758U,	// <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+  2551809516U,	// <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+  2551810198U,	// <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+  1478069558U,	// <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+  2901888310U,	// <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  2551812920U,	// <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+  2726251914U,	// <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+  1478072110U,	// <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+  2659234821U,	// <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  3786722726U,	// <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+  3734303911U,	// <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+  3734967544U,	// <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+  3727005030U,	// <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+  2726251976U,	// <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+  2726251986U,	// <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+  3727005292U,	// <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+  2659234821U,	// <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  1478082662U,	// <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+  2618758958U,	// <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2551826024U,	// <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+  2551826582U,	// <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+  1478085942U,	// <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+  2953668302U,	// <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  1611959849U,	// <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128937U,	// <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959867U,	// <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  3691839488U,	// <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+  2618097766U,	// <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620088484U,	// <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+  2619425034U,	// <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+  2620088667U,	// <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+  2620752300U,	// <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+  3693830655U,	// <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+  3094531382U,	// <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  2618098333U,	// <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  3691840246U,	// <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+  3691840308U,	// <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+  2626061206U,	// <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+  2618098688U,	// <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+  2626061364U,	// <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+  3691840656U,	// <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+  3789082310U,	// <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+  2712833744U,	// <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+  2628715896U,	// <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+  3693831613U,	// <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+  4026698642U,	// <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+  2632033896U,	// <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+  3691841190U,	// <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+  2632034061U,	// <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+  3691841352U,	// <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+  3691841466U,	// <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+  3088354614U,	// <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  3088354615U,	// <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+  2557829222U,	// <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+  2557830059U,	// <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+  2575746766U,	// <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+  3691841948U,	// <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+  2619427330U,	// <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+  2581720847U,	// <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+  2953628162U,	// <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953626624U,	// <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2953626625U,	// <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+  2569781350U,	// <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+  3631580076U,	// <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+  2569782990U,	// <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+  2569783646U,	// <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+  2569784630U,	// <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+  2618101046U,	// <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  3893905922U,	// <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+  3094564150U,	// <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  2618101289U,	// <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+  2551873638U,	// <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+  3637560320U,	// <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+  3637560966U,	// <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+  3723030343U,	// <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+  2551876918U,	// <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+  2712834052U,	// <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+  4028713474U,	// <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+  2712834072U,	// <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+  2712834081U,	// <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+  2575769702U,	// <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+  3631596462U,	// <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+  2655924730U,	// <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+  3643541856U,	// <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+  2655924849U,	// <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+  3787755607U,	// <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+  4029385218U,	// <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+  3088682294U,	// <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+  3088682295U,	// <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+  2563833958U,	// <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+  2551890678U,	// <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+  2563835528U,	// <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+  3637577878U,	// <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+  2563837238U,	// <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+  2712834216U,	// <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+  2712834220U,	// <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+  4174449974U,	// <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+  2563839790U,	// <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+  2563842150U,	// <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+  2618103598U,	// <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2563843721U,	// <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+  2569816418U,	// <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+  2622748735U,	// <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+  2618103962U,	// <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  2953669122U,	// <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953667584U,	// <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2618104165U,	// <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620096512U,	// <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+  1546354790U,	// <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620096676U,	// <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+  3693838588U,	// <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+  1546355036U,	// <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+  3694502317U,	// <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+  2551911246U,	// <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+  2720723287U,	// <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+  1546355357U,	// <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620097270U,	// <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+  2620097332U,	// <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+  2620097430U,	// <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+  2820243558U,	// <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2620097598U,	// <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+  2620097680U,	// <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+  3693839585U,	// <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+  2721386920U,	// <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+  2820243563U,	// <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2714014137U,	// <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+  2712834500U,	// <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+  2620098152U,	// <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+  2620098214U,	// <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+  2632042254U,	// <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+  2712834540U,	// <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+  2820243660U,	// <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+  2958265654U,	// <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+  2620098619U,	// <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+  2620098710U,	// <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+  3893986982U,	// <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+  2569848762U,	// <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+  2620098972U,	// <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+  2620099074U,	// <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+  3893987022U,	// <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+  3001404644U,	// <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1879887158U,	// <2,6,3,7>: Cost 2 vzipr LHS, RHS
+  1879887159U,	// <2,6,3,u>: Cost 2 vzipr LHS, RHS
+  2620099484U,	// <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+  2620099566U,	// <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+  2620099644U,	// <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+  3643599207U,	// <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+  2575830080U,	// <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+  1546358070U,	// <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2667875700U,	// <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+  4028042550U,	// <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+  1546358313U,	// <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+  3693841992U,	// <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+  2667876048U,	// <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+  2712834756U,	// <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+  3643607400U,	// <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+  2252091873U,	// <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+  2667876356U,	// <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+  2667876450U,	// <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+  2820246838U,	// <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2820246839U,	// <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2563899494U,	// <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+  3893988683U,	// <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+  2563901072U,	// <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+  3893987236U,	// <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+  2563902774U,	// <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+  3893988723U,	// <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+  2712834872U,	// <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+  2955644214U,	// <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+  2955644215U,	// <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+  2712834894U,	// <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+  2724926296U,	// <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+  2725000033U,	// <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+  2702365544U,	// <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+  2712834934U,	// <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+  3776107393U,	// <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+  2725294981U,	// <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+  2726253452U,	// <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+  2712834966U,	// <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+  2620102355U,	// <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+  1546360622U,	// <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620102536U,	// <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+  2820244125U,	// <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  1594136612U,	// <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+  1546360986U,	// <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2620102864U,	// <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+  1879928118U,	// <2,6,u,7>: Cost 2 vzipr LHS, RHS
+  1879928119U,	// <2,6,u,u>: Cost 2 vzipr LHS, RHS
+  2726179825U,	// <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+  1652511738U,	// <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+  2621431972U,	// <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+  2257949868U,	// <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+  2726474773U,	// <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+  2620768686U,	// <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+  2621432319U,	// <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+  2599760953U,	// <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+  1653027897U,	// <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+  2639348470U,	// <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695174452U,	// <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+  3695174550U,	// <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+  3694511104U,	// <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+  3713090594U,	// <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+  3693184144U,	// <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+  2627405016U,	// <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+  3799995519U,	// <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+  2639348470U,	// <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695175101U,	// <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+  3643655168U,	// <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+  2257892517U,	// <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+  3695175334U,	// <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+  3695175465U,	// <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+  2632714080U,	// <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+  2633377713U,	// <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+  3695175658U,	// <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+  2634704979U,	// <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+  1514094694U,	// <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+  2569921680U,	// <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+  2587838056U,	// <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+  2569922927U,	// <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+  1514097974U,	// <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+  2581868321U,	// <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+  1514099194U,	// <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+  2587841530U,	// <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+  1514100526U,	// <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+  2708706617U,	// <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+  3649643418U,	// <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+  3649644330U,	// <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+  2257982640U,	// <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+  3649645641U,	// <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+  2621435190U,	// <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  2712835441U,	// <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+  3799995762U,	// <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+  2621435433U,	// <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+  2729497990U,	// <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+  3643679744U,	// <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+  3637708424U,	// <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+  3643681137U,	// <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+  2599800118U,	// <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+  3786577334U,	// <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+  3786577345U,	// <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+  2599802214U,	// <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+  2599802670U,	// <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+  2581889126U,	// <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+  3643687936U,	// <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+  2663240186U,	// <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+  3643689330U,	// <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+  2581892406U,	// <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+  2581892900U,	// <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+  2587865597U,	// <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+  3786577428U,	// <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+  2581894958U,	// <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+  2726254119U,	// <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+  3804640817U,	// <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+  3637724826U,	// <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+  3734992123U,	// <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+  2552040758U,	// <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+  3799995992U,	// <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+  2663241198U,	// <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+  2712835692U,	// <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+  2731562607U,	// <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+  1514135654U,	// <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+  1657820802U,	// <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+  2587879016U,	// <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+  2569963892U,	// <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+  1514138934U,	// <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+  2621438106U,	// <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  1514140159U,	// <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+  2587882490U,	// <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+  1514141486U,	// <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+  1544380416U,	// <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  470638699U,	// <2,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544380580U,	// <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1658631909U,	// <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+  1544380754U,	// <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665898414U,	// <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  1658853120U,	// <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+  3094531625U,	// <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  470639261U,	// <2,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544381174U,	// <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544381236U,	// <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544381334U,	// <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544381400U,	// <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618123325U,	// <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544381584U,	// <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618123489U,	// <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2726254427U,	// <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+  1544381823U,	// <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+  1478328422U,	// <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+  2618123807U,	// <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  269271142U,	// <2,u,2,2>: Cost 1 vdup2 LHS
+  1544382118U,	// <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1478331702U,	// <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+  2618124136U,	// <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544382394U,	// <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  3088354857U,	// <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  269271142U,	// <2,u,2,u>: Cost 1 vdup2 LHS
+  1544382614U,	// <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2953627374U,	// <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+  1490282143U,	// <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+  1879883932U,	// <2,u,3,3>: Cost 2 vzipr LHS, LHS
+  1544382978U,	// <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2953627378U,	// <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+  1514172931U,	// <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+  1879887176U,	// <2,u,3,7>: Cost 2 vzipr LHS, RHS
+  1879883937U,	// <2,u,3,u>: Cost 2 vzipr LHS, LHS
+  1484316774U,	// <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+  1484317639U,	// <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+  2552088270U,	// <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+  1190213513U,	// <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+  1484320054U,	// <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+  470641974U,	// <2,u,4,5>: Cost 1 vext2 LHS, RHS
+  1592159604U,	// <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  3094564393U,	// <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  470642217U,	// <2,u,4,u>: Cost 1 vext2 LHS, RHS
+  2552094959U,	// <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+  1592159952U,	// <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2564040353U,	// <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+  2690275455U,	// <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+  1592160198U,	// <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592160260U,	// <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1611962522U,	// <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1592160424U,	// <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1611962540U,	// <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478361190U,	// <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+  2552103670U,	// <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+  1592160762U,	// <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2685704400U,	// <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+  1478364470U,	// <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+  2901891226U,	// <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  1592161080U,	// <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592161102U,	// <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1478367022U,	// <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+  1592161274U,	// <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659931226U,	// <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+  2564056739U,	// <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+  2665903331U,	// <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592161638U,	// <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665903494U,	// <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2587947527U,	// <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+  1592161900U,	// <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592161922U,	// <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1478377574U,	// <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+  470644526U,	// <2,u,u,1>: Cost 1 vext2 LHS, LHS
+  269271142U,	// <2,u,u,2>: Cost 1 vdup2 LHS
+  1879924892U,	// <2,u,u,3>: Cost 2 vzipr LHS, LHS
+  1478380854U,	// <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+  470644890U,	// <2,u,u,5>: Cost 1 vext2 LHS, RHS
+  1611962765U,	// <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1879928136U,	// <2,u,u,7>: Cost 2 vzipr LHS, RHS
+  470645093U,	// <2,u,u,u>: Cost 1 vext2 LHS, LHS
+  1611448320U,	// <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611890698U,	// <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611890708U,	// <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  3763576860U,	// <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+  2689835045U,	// <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+  3698508206U,	// <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+  3763576887U,	// <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+  3667678434U,	// <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+  1616093258U,	// <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+  1490337894U,	// <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+  2685632602U,	// <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+  537706598U,	// <3,0,1,2>: Cost 1 vext3 LHS, LHS
+  2624766936U,	// <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+  1490341174U,	// <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+  2624767120U,	// <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+  2732966030U,	// <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+  2593944803U,	// <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+  537706652U,	// <3,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U,	// <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685632684U,	// <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  2685632692U,	// <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+  2685632702U,	// <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611890892U,	// <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2732966102U,	// <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+  2624767930U,	// <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+  2685632744U,	// <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+  1611890924U,	// <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2624768150U,	// <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+  2685632764U,	// <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  2685632774U,	// <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+  2624768412U,	// <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+  2624768514U,	// <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+  3702491714U,	// <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+  2624768632U,	// <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+  3702491843U,	// <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+  2686959934U,	// <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+  2689835336U,	// <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+  1611891026U,	// <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611891036U,	// <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3763577184U,	// <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+  2689835374U,	// <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+  1551027510U,	// <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2666573172U,	// <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+  3667711206U,	// <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+  1616093586U,	// <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2685190556U,	// <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+  2666573520U,	// <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+  3040886886U,	// <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+  3625912834U,	// <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+  2666573766U,	// <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+  2666573828U,	// <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+  2732966354U,	// <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+  2666573992U,	// <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+  3040886940U,	// <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+  2685190637U,	// <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+  2732966390U,	// <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+  2689835519U,	// <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+  3667724438U,	// <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+  3763577355U,	// <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+  3806708243U,	// <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+  2666574648U,	// <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+  2657948520U,	// <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+  2689835573U,	// <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+  2666574842U,	// <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+  2685633095U,	// <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+  2660603052U,	// <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+  3643844997U,	// <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+  2666575206U,	// <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+  3655790391U,	// <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+  3731690968U,	// <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+  2666575468U,	// <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+  2664584850U,	// <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+  1616093834U,	// <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+  1611891346U,	// <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+  537707165U,	// <3,0,u,2>: Cost 1 vext3 LHS, LHS
+  2689835684U,	// <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1616093874U,	// <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551030426U,	// <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2624772304U,	// <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+  2594002154U,	// <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+  537707219U,	// <3,0,u,u>: Cost 1 vext3 LHS, LHS
+  2552201318U,	// <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+  2618802278U,	// <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+  2618802366U,	// <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+  1611449078U,	// <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2552204598U,	// <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+  2732966663U,	// <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+  3906258396U,	// <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+  3667752171U,	// <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+  1611891491U,	// <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  2689835819U,	// <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+  1611449140U,	// <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+  2624775063U,	// <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+  1611891528U,	// <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  2689835859U,	// <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+  2689835868U,	// <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  3763577701U,	// <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+  3765273452U,	// <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+  1611891573U,	// <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+  2629420494U,	// <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+  2689835911U,	// <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2564163248U,	// <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+  1611449238U,	// <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+  2564164918U,	// <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+  2689835947U,	// <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  3692545978U,	// <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+  2732966842U,	// <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+  1611891651U,	// <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+  1484456038U,	// <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+  1611891672U,	// <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685633502U,	// <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2685633512U,	// <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+  1484459318U,	// <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+  1611891712U,	// <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2689836041U,	// <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2733409294U,	// <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+  1611891735U,	// <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  2552234086U,	// <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+  2732966955U,	// <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+  2732966964U,	// <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+  2685633597U,	// <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+  2552237366U,	// <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+  2618805558U,	// <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+  2769472822U,	// <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+  3667784943U,	// <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+  2685633642U,	// <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+  2689836143U,	// <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+  2564187280U,	// <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+  2564187827U,	// <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+  1611891856U,	// <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  2689836183U,	// <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+  3759375522U,	// <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+  3720417378U,	// <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+  2832518454U,	// <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611891901U,	// <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  3763578048U,	// <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+  2689836239U,	// <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2732967128U,	// <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+  2685633761U,	// <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  3763578088U,	// <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+  2689836275U,	// <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  3763578108U,	// <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+  2732967166U,	// <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+  2685633806U,	// <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+  3631972454U,	// <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+  2659947612U,	// <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+  4036102294U,	// <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+  3095396454U,	// <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  3631975734U,	// <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+  2222982144U,	// <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+  3296797705U,	// <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+  3720418924U,	// <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+  3095396459U,	// <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1484496998U,	// <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+  1611892077U,	// <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+  2685633907U,	// <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  1611892092U,	// <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+  1484500278U,	// <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+  1611892117U,	// <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685633950U,	// <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  2832518697U,	// <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611892140U,	// <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+  2623455232U,	// <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+  1549713510U,	// <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  2689836484U,	// <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+  2685633997U,	// <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2623455570U,	// <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+  2732967398U,	// <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+  2689836524U,	// <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2229044964U,	// <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+  1549714077U,	// <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+  1549714166U,	// <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+  2623456052U,	// <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+  2623456150U,	// <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+  2685634079U,	// <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2552286518U,	// <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+  2623456400U,	// <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+  2689836604U,	// <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  3667834101U,	// <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+  1155385070U,	// <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+  2689836629U,	// <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+  2689836640U,	// <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+  1611449960U,	// <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892338U,	// <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  2689836669U,	// <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+  2689836680U,	// <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2689836688U,	// <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+  3763578518U,	// <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+  1611892383U,	// <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+  1611450022U,	// <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+  2685191854U,	// <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+  2685191865U,	// <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+  2685191875U,	// <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+  1611450062U,	// <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+  2732967635U,	// <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+  2732967645U,	// <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+  2732967652U,	// <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+  1611450094U,	// <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+  2558279782U,	// <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+  2558280602U,	// <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+  2732967692U,	// <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+  2685634326U,	// <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2558283062U,	// <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+  1549716790U,	// <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689836844U,	// <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+  2229077736U,	// <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+  1549717033U,	// <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+  2552316006U,	// <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+  2228643507U,	// <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+  2689836896U,	// <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685634408U,	// <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1155122894U,	// <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+  2665263108U,	// <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+  2689836932U,	// <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2665263272U,	// <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+  1155417842U,	// <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+  2689836953U,	// <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+  2689836964U,	// <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+  2689836976U,	// <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+  1611892666U,	// <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  2689836993U,	// <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+  2689837004U,	// <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689837013U,	// <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2665263950U,	// <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+  1611892711U,	// <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  2665264122U,	// <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+  2623460419U,	// <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+  4169138340U,	// <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+  2962358374U,	// <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+  2665264486U,	// <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+  2228954841U,	// <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+  2229028578U,	// <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+  2665264748U,	// <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+  2962358379U,	// <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+  1611892795U,	// <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+  1549719342U,	// <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  1611449960U,	// <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892824U,	// <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  1611892835U,	// <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+  1549719706U,	// <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689837168U,	// <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+  2665265408U,	// <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+  1611892867U,	// <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+  2685192331U,	// <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+  1611450518U,	// <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+  2685634717U,	// <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+  2564294806U,	// <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+  2685634736U,	// <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+  2732968122U,	// <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+  3763579075U,	// <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+  4034053264U,	// <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+  1611450581U,	// <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+  2685192415U,	// <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+  1550385992U,	// <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+  2685192433U,	// <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+  2685634808U,	// <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+  2558332214U,	// <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+  2685634828U,	// <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+  3759376661U,	// <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+  2703477022U,	// <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+  1555031423U,	// <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+  2564309094U,	// <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+  2630100513U,	// <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+  1557022322U,	// <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+  2685192520U,	// <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+  2564312374U,	// <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+  2732968286U,	// <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+  2685634918U,	// <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+  2704140655U,	// <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+  1561004120U,	// <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+  1496547430U,	// <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  2624129256U,	// <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+  2630764866U,	// <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+  336380006U,	// <3,3,3,3>: Cost 1 vdup3 LHS
+  1496550710U,	// <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  2732968368U,	// <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+  2624129683U,	// <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+  2594182400U,	// <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+  336380006U,	// <3,3,3,u>: Cost 1 vdup3 LHS
+  2558353510U,	// <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+  2558354411U,	// <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+  2564327108U,	// <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+  2564327938U,	// <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+  2960343962U,	// <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+  1611893250U,	// <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+  2771619126U,	// <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+  4034086032U,	// <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+  1611893277U,	// <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+  2558361702U,	// <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+  2558362604U,	// <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+  2558363342U,	// <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+  2732968512U,	// <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+  2558364982U,	// <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+  3101279950U,	// <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+  2665934946U,	// <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+  2826636598U,	// <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2826636599U,	// <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2732968568U,	// <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+  3763579521U,	// <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+  2732968586U,	// <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+  2732968595U,	// <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+  2732968604U,	// <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+  3763579557U,	// <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+  2732968621U,	// <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+  2657973099U,	// <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+  2658636732U,	// <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+  2558378086U,	// <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+  2558378990U,	// <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+  2564351687U,	// <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+  2661291264U,	// <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+  2558381366U,	// <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+  2732968694U,	// <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+  3781126907U,	// <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+  3095397376U,	// <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+  2558383918U,	// <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+  1496547430U,	// <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  1611893534U,	// <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+  1592858504U,	// <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+  336380006U,	// <3,3,u,3>: Cost 1 vdup3 LHS
+  1496550710U,	// <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  1611893574U,	// <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+  2690280268U,	// <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+  2826636841U,	// <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  336380006U,	// <3,3,u,u>: Cost 1 vdup3 LHS
+  2624798720U,	// <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+  1551056998U,	// <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624798884U,	// <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+  3693232384U,	// <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+  2624799058U,	// <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+  1659227026U,	// <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+  1659227036U,	// <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+  3667973382U,	// <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+  1551057565U,	// <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624799478U,	// <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+  2624799540U,	// <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+  1551057818U,	// <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+  2624799704U,	// <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+  2564377910U,	// <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+  2689838050U,	// <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+  2689838062U,	// <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2628117807U,	// <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+  1555039616U,	// <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+  3626180710U,	// <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+  2624800298U,	// <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+  2624800360U,	// <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+  2624800422U,	// <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+  2624800514U,	// <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+  2709965878U,	// <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+  2689838140U,	// <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+  2634090504U,	// <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+  2689838158U,	// <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+  2624800918U,	// <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+  2636081403U,	// <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+  2636745036U,	// <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+  2624801180U,	// <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+  2624801232U,	// <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+  2905836854U,	// <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+  3040054582U,	// <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+  3702524611U,	// <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+  2624801566U,	// <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+  2564399206U,	// <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+  2564400026U,	// <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+  2564400845U,	// <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+  2570373542U,	// <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  1659227344U,	// <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1551060278U,	// <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  1659227364U,	// <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+  3668006154U,	// <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+  1551060521U,	// <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+  1490665574U,	// <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+  2689838341U,	// <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1490667214U,	// <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+  2564409494U,	// <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+  1490668854U,	// <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+  2689838381U,	// <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+  537709878U,	// <3,4,5,6>: Cost 1 vext3 LHS, RHS
+  2594272523U,	// <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+  537709896U,	// <3,4,5,u>: Cost 1 vext3 LHS, RHS
+  2689838411U,	// <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+  2558444534U,	// <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+  2666607098U,	// <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+  2558446082U,	// <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+  1659227508U,	// <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2689838462U,	// <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  2689838471U,	// <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+  2657981292U,	// <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+  1659227540U,	// <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+  2666607610U,	// <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+  3702527072U,	// <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+  2660635824U,	// <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+  3644139945U,	// <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+  2666607974U,	// <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+  2732969416U,	// <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+  2732969425U,	// <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+  2666608236U,	// <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+  2664617622U,	// <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+  1490690150U,	// <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+  1551062830U,	// <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  1490691793U,	// <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+  2624804796U,	// <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+  1490693430U,	// <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+  1551063194U,	// <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  537710121U,	// <3,4,u,6>: Cost 1 vext3 LHS, RHS
+  2594297102U,	// <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+  537710139U,	// <3,4,u,u>: Cost 1 vext3 LHS, RHS
+  3692576768U,	// <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+  2618835046U,	// <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+  2618835138U,	// <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+  3692577024U,	// <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+  2689838690U,	// <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+  2732969579U,	// <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2732969588U,	// <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+  2246963055U,	// <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+  2618835613U,	// <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+  2594308198U,	// <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+  3692577588U,	// <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+  2624807835U,	// <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+  2625471468U,	// <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+  2626135101U,	// <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+  2594311888U,	// <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+  3699877107U,	// <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+  1641680592U,	// <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+  1641754329U,	// <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+  3692578274U,	// <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+  2630116899U,	// <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+  3692578408U,	// <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+  2625472206U,	// <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+  2632107798U,	// <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+  2715938575U,	// <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+  3692578746U,	// <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+  2716086049U,	// <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+  2634762330U,	// <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+  3692578966U,	// <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+  2636089596U,	// <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+  3699214668U,	// <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+  2638080412U,	// <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+  2618837506U,	// <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+  2832844494U,	// <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+  4033415682U,	// <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+  3095072054U,	// <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+  3095072055U,	// <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+  2600304742U,	// <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+  3763580815U,	// <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+  2564474582U,	// <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+  3699879044U,	// <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+  2600308022U,	// <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+  2618838326U,	// <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+  2772454710U,	// <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1659228102U,	// <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+  1659228111U,	// <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+  2570453094U,	// <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+  2624810704U,	// <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+  2570454734U,	// <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+  2570455472U,	// <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+  2570456374U,	// <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+  1659228164U,	// <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  2732969998U,	// <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+  1659228184U,	// <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+  1659228193U,	// <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+  2732970020U,	// <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+  2732970035U,	// <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+  2564490968U,	// <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+  2732970050U,	// <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+  2732970060U,	// <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+  2732970071U,	// <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+  2732970080U,	// <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+  1659228258U,	// <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+  1659228267U,	// <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+  1484783718U,	// <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484784640U,	// <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+  2558527080U,	// <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+  2558527638U,	// <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+  1484786998U,	// <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+  1659228328U,	// <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2732970154U,	// <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+  2558531180U,	// <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+  1484789550U,	// <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484791910U,	// <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+  1484792833U,	// <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+  2558535272U,	// <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+  2558535830U,	// <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+  1484795190U,	// <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+  1659228409U,	// <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+  2772457626U,	// <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1646326023U,	// <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+  1484797742U,	// <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+  2558541926U,	// <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+  2689839393U,	// <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+  2689839404U,	// <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+  3706519808U,	// <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+  2689839420U,	// <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+  2732970314U,	// <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+  2732970316U,	// <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+  2960313654U,	// <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  2689839456U,	// <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+  3763581290U,	// <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+  3763581297U,	// <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+  2624816028U,	// <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+  3763581315U,	// <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+  2626143294U,	// <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+  3763581335U,	// <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+  2721321376U,	// <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+  2721395113U,	// <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+  2628797826U,	// <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+  2594390118U,	// <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+  2721616324U,	// <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+  2630788725U,	// <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+  3763581395U,	// <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+  2632115991U,	// <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+  2632779624U,	// <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+  2594394618U,	// <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+  1648316922U,	// <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+  1648390659U,	// <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+  3693914262U,	// <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+  3638281176U,	// <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+  3696568678U,	// <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+  2638088604U,	// <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+  2632780290U,	// <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+  3712494145U,	// <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+  3698559612U,	// <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+  2959674678U,	// <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  2959674679U,	// <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+  3763581536U,	// <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+  2722943590U,	// <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+  2732970609U,	// <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+  3698560147U,	// <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+  2732970628U,	// <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+  2689839757U,	// <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+  2732970640U,	// <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+  2960346422U,	// <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+  2689839784U,	// <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+  2576498790U,	// <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+  3650241270U,	// <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+  2732970692U,	// <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+  2576501250U,	// <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  2576501906U,	// <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+  3650244622U,	// <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+  4114633528U,	// <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+  2732970735U,	// <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+  2576504622U,	// <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+  2732970749U,	// <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+  2724270856U,	// <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+  2624819706U,	// <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+  3656223234U,	// <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+  2732970788U,	// <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+  2732970800U,	// <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+  1659228984U,	// <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659228994U,	// <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+  1659229003U,	// <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+  1659229006U,	// <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+  2558600201U,	// <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+  2558601146U,	// <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+  2725081963U,	// <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+  1659229046U,	// <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+  2715423611U,	// <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+  2722059141U,	// <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+  2962361654U,	// <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+  1659229078U,	// <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+  1659229087U,	// <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+  2689840041U,	// <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+  2558609339U,	// <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+  2576525853U,	// <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+  1659229127U,	// <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+  2689840081U,	// <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+  1659228984U,	// <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1652298720U,	// <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+  1659229159U,	// <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+  2626813952U,	// <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+  1553072230U,	// <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814116U,	// <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+  3700556028U,	// <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+  2626814290U,	// <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+  2582507375U,	// <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+  2588480072U,	// <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+  2732971055U,	// <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+  1553072797U,	// <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814710U,	// <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+  2626814772U,	// <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+  2626814870U,	// <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+  2625487854U,	// <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+  2582514998U,	// <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+  1553073296U,	// <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+  2627478753U,	// <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+  2727367810U,	// <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+  1555064195U,	// <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+  2588491878U,	// <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+  3700557318U,	// <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+  2626815592U,	// <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+  2626815654U,	// <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+  2588495158U,	// <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+  2632787817U,	// <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+  1559709626U,	// <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+  2728031443U,	// <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+  1561036892U,	// <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+  2626816150U,	// <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+  2626816268U,	// <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+  2633451878U,	// <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+  2626816412U,	// <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+  2626816514U,	// <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+  2638760514U,	// <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+  2639424147U,	// <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+  2826961920U,	// <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+  2626816798U,	// <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+  2582536294U,	// <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+  2582537360U,	// <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+  2588510138U,	// <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+  3700558996U,	// <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+  2582539574U,	// <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+  1553075510U,	// <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  2588512844U,	// <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+  2564625766U,	// <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+  1553075753U,	// <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+  2732971398U,	// <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+  2626817744U,	// <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+  3700559649U,	// <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+  2626817903U,	// <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+  2258728203U,	// <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+  2732971446U,	// <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+  2732971457U,	// <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+  2826964278U,	// <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2826964279U,	// <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2732971478U,	// <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+  2732971486U,	// <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+  2633454074U,	// <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+  2633454152U,	// <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+  2732971518U,	// <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+  2732971526U,	// <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+  2732971537U,	// <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+  2732971540U,	// <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+  2726041124U,	// <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+  2570616934U,	// <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+  2570617856U,	// <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+  2564646635U,	// <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+  2570619332U,	// <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+  2570620214U,	// <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+  2582564726U,	// <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+  2588537423U,	// <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+  1659229804U,	// <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1659229804U,	// <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+  2626819795U,	// <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+  1553078062U,	// <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626819973U,	// <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+  2826961565U,	// <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+  2626820159U,	// <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+  1553078426U,	// <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  1595545808U,	// <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+  1659229804U,	// <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1553078629U,	// <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  1611448320U,	// <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611896531U,	// <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+  1659672284U,	// <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+  1616099045U,	// <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  2685638381U,	// <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+  1663874806U,	// <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+  1663874816U,	// <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+  2960313672U,	// <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  1611896594U,	// <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+  1549763324U,	// <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+  1550426957U,	// <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+  537712430U,	// <3,u,1,2>: Cost 1 vext3 LHS, LHS
+  1616541495U,	// <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+  1490930998U,	// <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+  1553081489U,	// <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+  2627486946U,	// <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+  1659230043U,	// <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+  537712484U,	// <3,u,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U,	// <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2624833102U,	// <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+  1557063287U,	// <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+  1616099205U,	// <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+  1611890892U,	// <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2689841054U,	// <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+  1559717819U,	// <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+  1659230124U,	// <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+  1616541618U,	// <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+  1611896764U,	// <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+  1484973079U,	// <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+  2685638607U,	// <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+  336380006U,	// <3,u,3,3>: Cost 1 vdup3 LHS
+  1611896804U,	// <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+  1616541679U,	// <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  2690283512U,	// <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+  2959674696U,	// <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  336380006U,	// <3,u,3,u>: Cost 1 vdup3 LHS
+  2558722150U,	// <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+  1659672602U,	// <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+  1659672612U,	// <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  2689841196U,	// <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+  1659227344U,	// <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1611896895U,	// <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+  1663875144U,	// <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+  1659230289U,	// <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+  1611896922U,	// <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+  1490960486U,	// <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+  2689841261U,	// <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+  1490962162U,	// <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+  1616541823U,	// <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1490963766U,	// <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+  1659228164U,	// <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  537712794U,	// <3,u,5,6>: Cost 1 vext3 LHS, RHS
+  1659230371U,	// <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+  537712812U,	// <3,u,5,u>: Cost 1 vext3 LHS, RHS
+  2689841327U,	// <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+  2558739482U,	// <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+  2689841351U,	// <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+  1616099536U,	// <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1659227508U,	// <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2690283746U,	// <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+  1659228984U,	// <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659230445U,	// <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+  1616099581U,	// <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+  1485004902U,	// <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+  1485005851U,	// <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+  2558748264U,	// <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+  3095397021U,	// <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1485008182U,	// <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+  1659228328U,	// <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2722060599U,	// <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+  1659229804U,	// <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1485010734U,	// <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+  1616099665U,	// <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+  1611897179U,	// <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+  537712997U,	// <3,u,u,2>: Cost 1 vext3 LHS, LHS
+  336380006U,	// <3,u,u,3>: Cost 1 vdup3 LHS
+  1616099705U,	// <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+  1611897219U,	// <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+  537713037U,	// <3,u,u,6>: Cost 1 vext3 LHS, RHS
+  1659230607U,	// <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+  537713051U,	// <3,u,u,u>: Cost 1 vext3 LHS, LHS
+  2691907584U,	// <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+  2691907594U,	// <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+  2691907604U,	// <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+  3709862144U,	// <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+  2684682280U,	// <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+  3694600633U,	// <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+  3291431290U,	// <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+  3668342067U,	// <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+  2691907657U,	// <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+  2570715238U,	// <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+  2570716058U,	// <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+  1618165862U,	// <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570717648U,	// <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+  2570718518U,	// <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+  2594607206U,	// <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+  3662377563U,	// <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+  2594608436U,	// <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+  1618165916U,	// <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2685714598U,	// <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+  3759530159U,	// <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+  2685862072U,	// <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+  2631476937U,	// <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+  2685714636U,	// <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+  3765649622U,	// <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+  2686157020U,	// <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  3668358453U,	// <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+  2686304494U,	// <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+  3632529510U,	// <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+  2686451968U,	// <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2686525705U,	// <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+  3760341266U,	// <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+  3632532790U,	// <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+  3913254606U,	// <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+  3705219740U,	// <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+  3713845990U,	// <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+  2686451968U,	// <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2552823910U,	// <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+  2691907922U,	// <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+  2691907932U,	// <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+  3626567830U,	// <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+  2552827190U,	// <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+  2631478582U,	// <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  3626570017U,	// <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+  3668374839U,	// <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+  2552829742U,	// <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+  2558804070U,	// <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+  1839644774U,	// <4,0,5,1>: Cost 2 vzipl RHS, LHS
+  2913386660U,	// <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+  2570750420U,	// <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+  2558807350U,	// <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+  3987128750U,	// <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+  3987128822U,	// <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+  2594641208U,	// <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+  1839645341U,	// <4,0,5,u>: Cost 2 vzipl RHS, LHS
+  2552840294U,	// <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+  3047604234U,	// <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+  1973862502U,	// <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2570758613U,	// <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+  2552843574U,	// <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+  2217664887U,	// <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+  3662418528U,	// <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+  2658022257U,	// <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+  1973862556U,	// <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+  3731764218U,	// <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+  3988324454U,	// <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+  4122034278U,	// <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+  3735082246U,	// <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+  3731764536U,	// <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+  3937145718U,	// <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+  3737073145U,	// <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+  3731764844U,	// <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+  4122034332U,	// <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+  2552856678U,	// <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+  1841635430U,	// <4,0,u,1>: Cost 2 vzipl RHS, LHS
+  1618166429U,	// <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570774999U,	// <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+  2552859958U,	// <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+  2631481498U,	// <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  2686157020U,	// <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  2594665787U,	// <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+  1618166483U,	// <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2617548837U,	// <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+  2622857318U,	// <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+  3693281484U,	// <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+  2691908342U,	// <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+  2622857554U,	// <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+  3764470538U,	// <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+  3695272459U,	// <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+  3733094980U,	// <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+  2622857885U,	// <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+  3696599798U,	// <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+  2691097399U,	// <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+  2631484314U,	// <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+  2691908424U,	// <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+  3696600125U,	// <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+  3696600175U,	// <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+  3696600307U,	// <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+  3668423997U,	// <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+  2691908469U,	// <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+  2570797158U,	// <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+  2570797978U,	// <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+  3696600680U,	// <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+  1618166682U,	// <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+  2570800438U,	// <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+  3765650347U,	// <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+  3696601018U,	// <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+  3668432190U,	// <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+  1618535367U,	// <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+  2564833382U,	// <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+  2691908568U,	// <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+  2691908578U,	// <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+  2692572139U,	// <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+  2564836662U,	// <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+  2691908608U,	// <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+  2588725862U,	// <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  3662468090U,	// <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+  2691908631U,	// <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+  3760194590U,	// <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+  3693947874U,	// <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+  3765650484U,	// <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+  3113877606U,	// <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+  3760194630U,	// <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+  2622860598U,	// <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  3297436759U,	// <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+  3800007772U,	// <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+  2622860841U,	// <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+  1479164006U,	// <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552906486U,	// <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+  2552907299U,	// <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+  2552907926U,	// <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+  1479167286U,	// <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  2913387664U,	// <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+  2600686074U,	// <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+  2600686586U,	// <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479169838U,	// <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552914022U,	// <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+  2558886708U,	// <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+  4028205206U,	// <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+  3089858662U,	// <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+  2552917302U,	// <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+  2223637584U,	// <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+  4121347081U,	// <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+  3721155406U,	// <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+  2552919854U,	// <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+  2659357716U,	// <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  3733763173U,	// <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+  3734426806U,	// <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+  2695226671U,	// <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+  3721155942U,	// <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+  3721155976U,	// <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+  3662500458U,	// <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+  3721156204U,	// <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+  2659357716U,	// <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  1479188582U,	// <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+  2552931062U,	// <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+  2552931944U,	// <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+  1622148480U,	// <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+  1479191862U,	// <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+  2622863514U,	// <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  2588725862U,	// <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2600686586U,	// <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479194414U,	// <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+  2617557030U,	// <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+  2622865510U,	// <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+  2622865612U,	// <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+  3693289753U,	// <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+  2635473244U,	// <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+  3765650918U,	// <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+  2696775148U,	// <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+  3695944285U,	// <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+  2622866077U,	// <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+  3696607990U,	// <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+  3696608052U,	// <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+  3696608150U,	// <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+  3895574630U,	// <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+  2691909162U,	// <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608400U,	// <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+  3760784956U,	// <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+  3773908549U,	// <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+  2691909162U,	// <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608748U,	// <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+  3696608828U,	// <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+  2691909224U,	// <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+  2691909234U,	// <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+  3759605368U,	// <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+  3696609156U,	// <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+  3760785040U,	// <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+  3668505927U,	// <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+  2691909279U,	// <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+  2691909286U,	// <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+  3764840111U,	// <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+  3765651129U,	// <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+  2698544836U,	// <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+  2685863630U,	// <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+  2698692310U,	// <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+  3772507871U,	// <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+  2698839784U,	// <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+  2691909358U,	// <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+  2564915302U,	// <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+  2564916122U,	// <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+  2564917004U,	// <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+  2699208469U,	// <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+  2564918582U,	// <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+  2622868790U,	// <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229667632U,	// <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+  3800082229U,	// <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+  2622869033U,	// <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+  2552979558U,	// <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+  2558952342U,	// <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+  2564925032U,	// <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+  2967060582U,	// <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+  2552982838U,	// <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+  3987130190U,	// <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+  2913388474U,	// <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+  3895577910U,	// <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+  2552985390U,	// <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+  1479245926U,	// <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+  2552988406U,	// <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+  2552989288U,	// <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+  2954461286U,	// <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+  1479249206U,	// <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+  2229610281U,	// <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+  2600767994U,	// <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+  2600768506U,	// <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+  1479251758U,	// <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+  2659365909U,	// <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  3733771366U,	// <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+  3734434999U,	// <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+  2701199368U,	// <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+  4175774618U,	// <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+  3303360298U,	// <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+  3727136217U,	// <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+  3727136364U,	// <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+  2659365909U,	// <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  1479262310U,	// <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+  2553004790U,	// <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+  2553005672U,	// <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+  2954477670U,	// <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+  1479265590U,	// <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+  2622871706U,	// <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229700404U,	// <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+  2600784890U,	// <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+  1479268142U,	// <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+  3765651595U,	// <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+  2691909782U,	// <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+  2702452897U,	// <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+  3693297946U,	// <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+  3760711856U,	// <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+  2235533820U,	// <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+  3309349381U,	// <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+  3668563278U,	// <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+  2691909845U,	// <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+  2235173328U,	// <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+  3764840678U,	// <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+  2630173594U,	// <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+  2703190267U,	// <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+  3760195840U,	// <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+  3765651724U,	// <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+  3309357574U,	// <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+  3769633054U,	// <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+  2703558952U,	// <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+  3626770534U,	// <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+  2630174250U,	// <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+  3765651777U,	// <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+  2703853900U,	// <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+  3626773814U,	// <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+  2704001374U,	// <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+  3765651814U,	// <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+  3769633135U,	// <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+  2634819681U,	// <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+  3765651839U,	// <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+  3765651848U,	// <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+  3710552404U,	// <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+  2691910044U,	// <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2704591270U,	// <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+  3769633202U,	// <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+  3703917212U,	// <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+  3769633220U,	// <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+  2691910044U,	// <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2691910096U,	// <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+  2691910106U,	// <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+  2564990741U,	// <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+  3765651946U,	// <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+  2691910136U,	// <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+  2686454274U,	// <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+  2235640329U,	// <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+  3801483792U,	// <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+  2691910168U,	// <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+  2559025254U,	// <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559026237U,	// <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+  2564998862U,	// <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+  2570971548U,	// <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+  2559028534U,	// <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+  4163519477U,	// <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+  3309390346U,	// <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+  2706139747U,	// <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+  2559031086U,	// <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559033446U,	// <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+  2559034430U,	// <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+  2565007127U,	// <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+  2570979740U,	// <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+  2559036726U,	// <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+  1161841154U,	// <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+  4028203932U,	// <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+  2706803380U,	// <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+  1162062365U,	// <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+  3769633475U,	// <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+  3769633488U,	// <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+  3638757144U,	// <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+  3769633508U,	// <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+  3769633515U,	// <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+  3769633526U,	// <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+  3662647932U,	// <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+  3781208837U,	// <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+  3769633547U,	// <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+  2559049830U,	// <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+  2691910430U,	// <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+  2565023513U,	// <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+  2707835698U,	// <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+  2559053110U,	// <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+  1161857540U,	// <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+  2235673101U,	// <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+  2708130646U,	// <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+  1162078751U,	// <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+  2617573416U,	// <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+  1570373734U,	// <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779676774U,	// <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  3760196480U,	// <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+  2576977100U,	// <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+  2718747538U,	// <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+  2718747548U,	// <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+  3668637015U,	// <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+  1570374301U,	// <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+  2644116214U,	// <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+  2644116276U,	// <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+  2691910602U,	// <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+  2644116440U,	// <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+  2711227356U,	// <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+  2709310438U,	// <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+  3765652462U,	// <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+  3768970231U,	// <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+  2695891968U,	// <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+  3703260634U,	// <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+  3765652499U,	// <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+  2644117096U,	// <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+  2631509709U,	// <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+  2644117269U,	// <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+  3705251698U,	// <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+  2710047808U,	// <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+  3783863369U,	// <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+  2634827874U,	// <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+  2644117654U,	// <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+  3638797210U,	// <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+  3638798082U,	// <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+  2637482406U,	// <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  2638146039U,	// <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+  3913287374U,	// <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+  3765652625U,	// <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+  3713878762U,	// <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+  2637482406U,	// <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  1503264870U,	// <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+  2577007514U,	// <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+  2577008232U,	// <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+  2571037175U,	// <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+  161926454U,	// <4,4,4,4>: Cost 1 vdup0 RHS
+  1570377014U,	// <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+  2779680054U,	// <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+  2594927963U,	// <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+  161926454U,	// <4,4,4,u>: Cost 1 vdup0 RHS
+  2571042918U,	// <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+  2571043738U,	// <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+  3638814495U,	// <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+  2571045368U,	// <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+  2571046198U,	// <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+  1839648054U,	// <4,4,5,5>: Cost 2 vzipl RHS, RHS
+  1618169142U,	// <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594936156U,	// <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+  1618169160U,	// <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  2553135206U,	// <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+  3626877686U,	// <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+  2565080782U,	// <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+  2571053561U,	// <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+  2553138486U,	// <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+  2241555675U,	// <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+  1973865782U,	// <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2658055029U,	// <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+  1973865800U,	// <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+  2644120570U,	// <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+  3638829978U,	// <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+  3638830881U,	// <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+  3735115018U,	// <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+  2662036827U,	// <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  2713292236U,	// <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+  2713365973U,	// <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+  2644121196U,	// <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+  2662036827U,	// <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  1503297638U,	// <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+  1570379566U,	// <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779682606U,	// <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  2571069947U,	// <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+  161926454U,	// <4,4,u,4>: Cost 1 vdup0 RHS
+  1841638710U,	// <4,4,u,5>: Cost 2 vzipl RHS, RHS
+  1618169385U,	// <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594960735U,	// <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+  161926454U,	// <4,4,u,u>: Cost 1 vdup0 RHS
+  2631516160U,	// <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+  1557774438U,	// <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2618908875U,	// <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+  2571078140U,	// <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+  2626871634U,	// <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+  3705258414U,	// <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+  2594968438U,	// <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+  2594968928U,	// <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+  1557775005U,	// <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631516918U,	// <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+  2624217939U,	// <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+  2631517078U,	// <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+  2821341286U,	// <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+  3895086054U,	// <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+  2626872471U,	// <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+  3895083131U,	// <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+  2718748368U,	// <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+  2821341291U,	// <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+  2571092070U,	// <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+  3699287585U,	// <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+  2630854269U,	// <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+  1557776078U,	// <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+  2631517974U,	// <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+  3692652384U,	// <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+  2631518138U,	// <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+  4164013366U,	// <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+  1561094243U,	// <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+  2631518358U,	// <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+  3895084710U,	// <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+  2631518540U,	// <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+  2631518620U,	// <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+  2631518716U,	// <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+  2631518784U,	// <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+  2658060980U,	// <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+  2640145131U,	// <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+  2631519006U,	// <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+  2571108454U,	// <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+  3632907342U,	// <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+  2571110094U,	// <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+  2571110912U,	// <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+  2571111734U,	// <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+  1557777718U,	// <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2645454195U,	// <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+  2718748614U,	// <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+  1557777961U,	// <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+  1503346790U,	// <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+  2913398480U,	// <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+  2631519998U,	// <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+  2577090710U,	// <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+  1503349978U,	// <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+  2631520260U,	// <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+  2913390690U,	// <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+  2821344566U,	// <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+  1503352622U,	// <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+  1497383014U,	// <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+  2559181904U,	// <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+  2565154601U,	// <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+  1497385474U,	// <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+  1497386294U,	// <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+  3047608324U,	// <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+  2571129656U,	// <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+  27705344U,	// <4,5,6,7>: Cost 0 copy RHS
+  27705344U,	// <4,5,6,u>: Cost 0 copy RHS
+  2565161062U,	// <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+  2565161882U,	// <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+  2565162794U,	// <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+  2661381387U,	// <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+  2565164342U,	// <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+  2718748840U,	// <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+  2718748846U,	// <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+  2719412407U,	// <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+  2565166894U,	// <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+  1497399398U,	// <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+  1557780270U,	// <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631522181U,	// <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+  1497401860U,	// <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+  1497402678U,	// <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+  1557780634U,	// <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2631522512U,	// <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+  27705344U,	// <4,5,u,7>: Cost 0 copy RHS
+  27705344U,	// <4,5,u,u>: Cost 0 copy RHS
+  2618916864U,	// <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+  1545175142U,	// <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1545175244U,	// <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+  3692658940U,	// <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+  2618917202U,	// <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+  3852910806U,	// <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+  2253525648U,	// <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+  4040764726U,	// <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+  1545175709U,	// <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  2618917622U,	// <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+  2618917684U,	// <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+  2618917782U,	// <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+  2618917848U,	// <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+  3692659773U,	// <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+  2618918032U,	// <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+  3692659937U,	// <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+  4032146742U,	// <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+  2618918253U,	// <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+  2618918380U,	// <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+  2618918460U,	// <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+  2618918504U,	// <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+  2618918566U,	// <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+  2618918679U,	// <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+  2618918788U,	// <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+  2618918842U,	// <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+  2718749178U,	// <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+  2618918971U,	// <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+  2618919062U,	// <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+  2636171526U,	// <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+  3692661057U,	// <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+  2618919324U,	// <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+  2618919426U,	// <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+  2638826058U,	// <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+  3913303030U,	// <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+  2722730572U,	// <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+  2618919710U,	// <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+  2565210214U,	// <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+  2718749286U,	// <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+  2565211952U,	// <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+  2571184649U,	// <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+  2565213494U,	// <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+  1545178422U,	// <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705430326U,	// <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+  2595075437U,	// <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+  1545178665U,	// <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+  2565218406U,	// <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+  2645462736U,	// <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+  2913399290U,	// <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+  3913305394U,	// <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+  2645462982U,	// <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+  2779172868U,	// <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+  2913391416U,	// <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+  2821426486U,	// <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+  2821426487U,	// <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+  1503428710U,	// <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+  2577171190U,	// <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+  2645463546U,	// <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+  2577172630U,	// <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+  1503431908U,	// <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+  2253501069U,	// <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+  2618921784U,	// <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+  2954464566U,	// <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+  1503434542U,	// <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+  2645464058U,	// <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+  2779173882U,	// <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+  3638978355U,	// <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+  2725090156U,	// <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+  2645464422U,	// <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+  2779174246U,	// <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  3852915914U,	// <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+  2779174508U,	// <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2779173945U,	// <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+  1503445094U,	// <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+  1545180974U,	// <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1705432878U,	// <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+  2618922940U,	// <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+  1503448294U,	// <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+  1545181338U,	// <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705433242U,	// <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+  2954480950U,	// <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+  1545181541U,	// <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  3706601472U,	// <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+  2632859750U,	// <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2726343685U,	// <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+  3701293312U,	// <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+  3706601810U,	// <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+  2259424608U,	// <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+  3695321617U,	// <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+  3800454194U,	// <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+  2632860317U,	// <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+  2259064116U,	// <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+  3700630324U,	// <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+  2632860570U,	// <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+  3769635936U,	// <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+  3656920374U,	// <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+  3700630681U,	// <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+  3701294314U,	// <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+  3793818754U,	// <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+  2259654012U,	// <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+  3656925286U,	// <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+  3706603050U,	// <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+  3706603112U,	// <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+  2727744688U,	// <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+  3705939745U,	// <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+  2632861554U,	// <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+  3706603450U,	// <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+  3792491731U,	// <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+  2634852453U,	// <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+  3706603670U,	// <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+  3662906266U,	// <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+  3725183326U,	// <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+  3706603932U,	// <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+  3701295618U,	// <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+  2638834251U,	// <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+  2639497884U,	// <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+  3802445093U,	// <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+  2640825150U,	// <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+  2718750004U,	// <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+  3706604490U,	// <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+  3656943474U,	// <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+  3779884371U,	// <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+  2259383643U,	// <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+  2632863030U,	// <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+  2259531117U,	// <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+  3907340074U,	// <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+  2632863273U,	// <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+  2913391610U,	// <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+  3645006848U,	// <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+  2589181646U,	// <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+  3645008403U,	// <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+  2913391974U,	// <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+  2583211973U,	// <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+  2589184670U,	// <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+  2913392236U,	// <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+  2913392258U,	// <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+  1509474406U,	// <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+  3047609338U,	// <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+  2583217768U,	// <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+  2583218326U,	// <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+  1509477686U,	// <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+  1509478342U,	// <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+  2583220730U,	// <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+  3047609964U,	// <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509480238U,	// <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+  3650994278U,	// <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+  3650995098U,	// <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+  3650996010U,	// <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+  3804804677U,	// <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+  3650997486U,	// <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+  2662725039U,	// <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+  3662942880U,	// <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+  2718750316U,	// <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+  2664715938U,	// <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+  1509490790U,	// <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+  2632865582U,	// <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2583234152U,	// <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+  2583234710U,	// <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+  1509494070U,	// <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+  1509494728U,	// <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+  2583237114U,	// <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+  3047757420U,	// <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509496622U,	// <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+  2618933248U,	// <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+  1545191526U,	// <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1545191630U,	// <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+  2691913445U,	// <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+  2618933586U,	// <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+  2265397305U,	// <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+  2595189625U,	// <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+  2595190139U,	// <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+  1545192093U,	// <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+  2618934006U,	// <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+  2618934068U,	// <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+  1618171694U,	// <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2618934232U,	// <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+  2695894848U,	// <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+  2618934416U,	// <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+  3692676321U,	// <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+  2718750555U,	// <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+  1618171748U,	// <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2553397350U,	// <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+  2630215215U,	// <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+  2618934888U,	// <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+  1557800657U,	// <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+  2618935065U,	// <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+  2733864859U,	// <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+  2618935226U,	// <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+  2718750636U,	// <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+  1561118822U,	// <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+  2618935446U,	// <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+  2779318422U,	// <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+  2636851545U,	// <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+  2618935708U,	// <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+  2618935810U,	// <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+  2691913711U,	// <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+  2588725862U,	// <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2640169710U,	// <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+  2618936094U,	// <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+  1503559782U,	// <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+  2692282391U,	// <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+  2565359426U,	// <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+  2571332123U,	// <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+  161926454U,	// <4,u,4,4>: Cost 1 vdup0 RHS
+  1545194806U,	// <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1705577782U,	// <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+  2718750801U,	// <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+  161926454U,	// <4,u,4,u>: Cost 1 vdup0 RHS
+  1479164006U,	// <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  1839650606U,	// <4,u,5,1>: Cost 2 vzipl RHS, LHS
+  2565367502U,	// <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+  3089777309U,	// <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+  1479167286U,	// <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  1839650970U,	// <4,u,5,5>: Cost 2 vzipl RHS, RHS
+  1618172058U,	// <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  3089780265U,	// <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+  1618172076U,	// <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  1479688294U,	// <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+  2553430774U,	// <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+  1973868334U,	// <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+  1497606685U,	// <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+  1479691574U,	// <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+  1509552079U,	// <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+  1973868698U,	// <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+  27705344U,	// <4,u,6,7>: Cost 0 copy RHS
+  27705344U,	// <4,u,6,u>: Cost 0 copy RHS
+  2565382246U,	// <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+  2565383066U,	// <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+  2565384005U,	// <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+  2661405966U,	// <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+  2565385526U,	// <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+  2779321702U,	// <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  2589274793U,	// <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+  2779321964U,	// <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2565388078U,	// <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+  1479704678U,	// <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+  1545197358U,	// <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1618172261U,	// <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  1497623071U,	// <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+  161926454U,	// <4,u,u,4>: Cost 1 vdup0 RHS
+  1545197722U,	// <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1618172301U,	// <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  27705344U,	// <4,u,u,7>: Cost 0 copy RHS
+  27705344U,	// <4,u,u,u>: Cost 0 copy RHS
+  2687123456U,	// <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+  2687123466U,	// <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+  2687123476U,	// <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+  3710599434U,	// <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+  2642166098U,	// <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+  3657060306U,	// <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+  3292094923U,	// <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+  3669005700U,	// <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+  2687123530U,	// <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+  2559434854U,	// <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+  2559435887U,	// <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+  1613381734U,	// <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  3698656256U,	// <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+  2559438134U,	// <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+  2583326675U,	// <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+  3715908851U,	// <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+  3657069562U,	// <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+  1613381788U,	// <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2686017700U,	// <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+  2685796528U,	// <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2698625208U,	// <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+  2685944002U,	// <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+  2686017739U,	// <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+  2686091476U,	// <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+  2725167324U,	// <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+  2595280230U,	// <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  2686312687U,	// <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+  3760128248U,	// <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+  3759685888U,	// <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+  2686533898U,	// <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+  3760349459U,	// <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+  2638187004U,	// <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+  3776348452U,	// <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+  3713256094U,	// <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+  3914064896U,	// <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+  2686976320U,	// <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+  2559459430U,	// <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+  1613381970U,	// <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  2687123804U,	// <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+  3761013092U,	// <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+  2559462710U,	// <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+  2638187830U,	// <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  3761234303U,	// <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+  2646150600U,	// <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1613381970U,	// <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  3766763926U,	// <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+  2919268454U,	// <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+  3053486182U,	// <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+  3723210589U,	// <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+  3766763966U,	// <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+  2650796031U,	// <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+  3719893090U,	// <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+  3914067254U,	// <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+  2919269021U,	// <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+  4047519744U,	// <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+  2920038502U,	// <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  3759759871U,	// <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+  3645164070U,	// <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+  3762414095U,	// <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+  3993780690U,	// <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+  3719893816U,	// <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+  2662077302U,	// <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+  2920039069U,	// <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+  2565455974U,	// <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+  2565456790U,	// <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+  2565457742U,	// <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+  3639199894U,	// <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+  2565459254U,	// <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+  2589347938U,	// <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+  2589348530U,	// <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+  4188456422U,	// <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+  2565461806U,	// <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+  2687124106U,	// <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+  1616036502U,	// <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+  1613382301U,	// <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  2689925800U,	// <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+  2687124146U,	// <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+  2638190746U,	// <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  2589356723U,	// <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+  2595280230U,	// <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  1613382355U,	// <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2646818816U,	// <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+  1573077094U,	// <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2646818980U,	// <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+  2687124214U,	// <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+  2641510738U,	// <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+  2641510814U,	// <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+  3720561142U,	// <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+  3298141357U,	// <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+  1573077661U,	// <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+  2223891567U,	// <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+  2687124276U,	// <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+  2646819734U,	// <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+  2687124296U,	// <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+  2691326803U,	// <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+  2691400540U,	// <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+  3765216101U,	// <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+  3765289838U,	// <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+  2687124341U,	// <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+  3297641584U,	// <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+  3763520391U,	// <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+  2646820456U,	// <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+  2687124374U,	// <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+  2691990436U,	// <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+  2687124395U,	// <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+  2646820794U,	// <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+  3808199610U,	// <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+  2687124419U,	// <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+  2577440870U,	// <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+  2687124440U,	// <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+  3759686627U,	// <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+  2692580332U,	// <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+  2687124469U,	// <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+  2685207552U,	// <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+  3760866313U,	// <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+  2692875280U,	// <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+  2687124503U,	// <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+  1567771538U,	// <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+  2693096491U,	// <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+  2693170228U,	// <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+  2687124541U,	// <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+  2646822096U,	// <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+  1573080374U,	// <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646822260U,	// <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+  3298174129U,	// <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+  1573080602U,	// <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+  2687124591U,	// <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+  2646822543U,	// <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+  3760866433U,	// <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+  2687124624U,	// <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+  2687124631U,	// <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+  2646822916U,	// <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+  2646823010U,	// <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+  2646823080U,	// <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+  2687124663U,	// <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+  2553577574U,	// <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+  3763520719U,	// <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+  2646823418U,	// <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+  3760866529U,	// <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+  2553580854U,	// <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+  2687124723U,	// <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+  2646823736U,	// <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+  2646823758U,	// <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+  2646823839U,	// <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+  2559557734U,	// <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+  2559558452U,	// <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+  2571503270U,	// <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+  2040971366U,	// <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2559561014U,	// <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+  2595393232U,	// <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+  4188455035U,	// <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+  2646824556U,	// <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+  2040971371U,	// <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1591662326U,	// <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+  1573082926U,	// <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2695824760U,	// <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+  2040979558U,	// <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+  2687124874U,	// <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+  1573083290U,	// <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646825168U,	// <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+  2646825216U,	// <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+  2040979563U,	// <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+  3702652928U,	// <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+  2628911206U,	// <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2641518756U,	// <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+  3759760847U,	// <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+  3760866775U,	// <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+  3759539680U,	// <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+  3760866796U,	// <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+  3304114054U,	// <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+  2628911773U,	// <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+  2623603464U,	// <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+  3698008921U,	// <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+  3633325603U,	// <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+  2687125027U,	// <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+  3633327414U,	// <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+  3759539760U,	// <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+  3760866876U,	// <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+  3304122247U,	// <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+  2687125072U,	// <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+  3633332326U,	// <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+  3759760992U,	// <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+  2687125096U,	// <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+  2687125106U,	// <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+  2697963133U,	// <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+  3759466120U,	// <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+  3760866960U,	// <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+  3771926168U,	// <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+  2687125151U,	// <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+  2687125158U,	// <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+  2698405555U,	// <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+  2577516238U,	// <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+  3759687365U,	// <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+  1624884942U,	// <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+  2698700503U,	// <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+  3772368608U,	// <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+  3702655716U,	// <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+  1625179890U,	// <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+  2641521555U,	// <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+  3772368642U,	// <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+  2699142925U,	// <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+  2698626838U,	// <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+  2698626848U,	// <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+  2628914486U,	// <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2645503353U,	// <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+  3304146826U,	// <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+  2628914729U,	// <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+  2553643110U,	// <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+  3758950227U,	// <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+  3759761248U,	// <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+  2982396006U,	// <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+  2553646390U,	// <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+  2553647108U,	// <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+  3760867204U,	// <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+  3702657141U,	// <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+  2982396011U,	// <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+  3627393126U,	// <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+  3760867236U,	// <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+  2645504506U,	// <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+  2687125434U,	// <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+  2700617665U,	// <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+  3760867276U,	// <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+  3763521493U,	// <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+  3719246670U,	// <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+  2687125479U,	// <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+  2565603430U,	// <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+  2553660150U,	// <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+  2565605216U,	// <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+  2961178726U,	// <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+  2565606710U,	// <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+  4034920552U,	// <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+  3114713292U,	// <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+  3702658668U,	// <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+  2961178731U,	// <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+  2687125563U,	// <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+  2628917038U,	// <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2565613409U,	// <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+  2687125592U,	// <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+  1628203107U,	// <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+  2628917402U,	// <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2702092405U,	// <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+  3304179598U,	// <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+  1628498055U,	// <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+  3760867467U,	// <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+  2687125654U,	// <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+  3759761565U,	// <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+  3633391766U,	// <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+  2687125680U,	// <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+  3760277690U,	// <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+  3310013014U,	// <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+  2236344927U,	// <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+  2687125717U,	// <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+  3760867551U,	// <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+  3760867558U,	// <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+  2624938923U,	// <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+  2703198460U,	// <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+  3760867587U,	// <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+  2636219536U,	// <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+  3698681075U,	// <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+  2703493408U,	// <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+  2628920721U,	// <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+  3766765870U,	// <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+  3698681379U,	// <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+  3760867649U,	// <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+  2698627404U,	// <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+  2703935830U,	// <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+  2698627422U,	// <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+  3760867686U,	// <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+  3769788783U,	// <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+  2701945209U,	// <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+  3760867711U,	// <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+  2636220684U,	// <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+  3772369298U,	// <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+  2687125916U,	// <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+  2704599463U,	// <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+  2704673200U,	// <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+  3709962935U,	// <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+  3772369346U,	// <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+  2704894411U,	// <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+  2704968148U,	// <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+  3698682850U,	// <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+  2642857014U,	// <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+  2705189359U,	// <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+  2705263096U,	// <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+  2685946370U,	// <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+  3779152394U,	// <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+  2236377699U,	// <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+  2687126045U,	// <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+  2571632742U,	// <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+  2559689870U,	// <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+  2571634382U,	// <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+  2571635264U,	// <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+  2571636022U,	// <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+  2559692804U,	// <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+  3720581218U,	// <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+  2236385892U,	// <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+  2571638574U,	// <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+  2565668966U,	// <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+  3633439887U,	// <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+  2565670760U,	// <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+  2565671426U,	// <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+  2565672246U,	// <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+  3639414630U,	// <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+  4047521640U,	// <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+  2725169844U,	// <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+  2565674798U,	// <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+  1485963366U,	// <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485964432U,	// <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+  2559706728U,	// <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+  2559707286U,	// <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+  1485966646U,	// <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+  2559708880U,	// <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+  2601513466U,	// <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+  3114714112U,	// <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+  1485969198U,	// <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485971558U,	// <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+  1485972625U,	// <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+  2559714920U,	// <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+  2559715478U,	// <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+  1485974838U,	// <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+  2687126342U,	// <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+  2601521658U,	// <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+  2236410471U,	// <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+  1485977390U,	// <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+  3627491430U,	// <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+  2636890214U,	// <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+  3703333028U,	// <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+  3782249348U,	// <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+  2642198866U,	// <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+  2687126418U,	// <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+  2242243887U,	// <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+  3316059448U,	// <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+  2636890781U,	// <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+  2241809658U,	// <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+  3698025307U,	// <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+  3698688940U,	// <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+  3698689024U,	// <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+  3700016206U,	// <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+  2687126498U,	// <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+  3760868336U,	// <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+  3316067641U,	// <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+  2242399554U,	// <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+  3703334371U,	// <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+  3703998004U,	// <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+  3704661637U,	// <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+  2636891854U,	// <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+  3705988903U,	// <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+  2698628150U,	// <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+  3760868415U,	// <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+  3783871562U,	// <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+  2666752099U,	// <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+  3639459942U,	// <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+  3709970701U,	// <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+  2636892510U,	// <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+  3710634396U,	// <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+  2638219776U,	// <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+  3766987908U,	// <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+  2710719634U,	// <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+  3914097664U,	// <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+  2640874308U,	// <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+  2583642214U,	// <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+  2642201574U,	// <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+  3710635062U,	// <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+  3717270664U,	// <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+  2713963728U,	// <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+  1637567706U,	// <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+  2242276659U,	// <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+  2646183372U,	// <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+  1637788917U,	// <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+  2559762534U,	// <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+  2559763607U,	// <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+  2698628366U,	// <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+  3633506454U,	// <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+  2559765814U,	// <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+  2583654395U,	// <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+  1613385014U,	// <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  3901639990U,	// <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+  1613385032U,	// <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+  2559770726U,	// <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+  2559771648U,	// <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+  3633514088U,	// <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+  2571717122U,	// <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+  2559774006U,	// <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+  2712636796U,	// <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+  3760868743U,	// <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+  2712784270U,	// <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+  2559776558U,	// <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+  2565750886U,	// <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+  2565751706U,	// <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+  2565752690U,	// <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+  2571725387U,	// <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+  2565754166U,	// <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+  3114713426U,	// <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+  94817590U,	// <5,4,7,6>: Cost 1 vrev RHS
+  2595616175U,	// <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+  94965064U,	// <5,4,7,u>: Cost 1 vrev RHS
+  2559787110U,	// <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+  2559788186U,	// <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+  2242014483U,	// <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+  2667419628U,	// <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+  2559790390U,	// <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+  1640222238U,	// <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+  94825783U,	// <5,4,u,6>: Cost 1 vrev RHS
+  2714111536U,	// <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+  94973257U,	// <5,4,u,u>: Cost 1 vrev RHS
+  2646851584U,	// <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+  1573109862U,	// <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646851748U,	// <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+  3760279130U,	// <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+  2687127138U,	// <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+  2248142847U,	// <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+  3720593910U,	// <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+  4182502710U,	// <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+  1573110429U,	// <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646852342U,	// <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+  2624291676U,	// <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+  2646852502U,	// <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+  2646852568U,	// <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+  2715217591U,	// <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+  2628936848U,	// <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+  3698033907U,	// <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+  2713964240U,	// <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+  2628937107U,	// <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+  3645497446U,	// <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+  3760869099U,	// <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+  2646853224U,	// <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+  2698628862U,	// <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+  3772370694U,	// <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+  2713964303U,	// <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+  2646853562U,	// <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+  4038198272U,	// <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+  2701946667U,	// <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+  2646853782U,	// <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+  3698034922U,	// <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+  3702679919U,	// <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+  2637564336U,	// <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+  2646854146U,	// <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+  2638891602U,	// <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+  3702680247U,	// <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+  3702680259U,	// <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+  2646854430U,	// <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+  2646854546U,	// <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+  2642209767U,	// <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+  3711306806U,	// <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+  3645516369U,	// <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+  1570458842U,	// <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1573113142U,	// <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+  2645527932U,	// <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+  2713964486U,	// <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+  1573113374U,	// <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+  1509982310U,	// <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2646855376U,	// <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+  2583725672U,	// <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+  2583726230U,	// <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+  1509985590U,	// <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U,	// <5,5,5,5>: Cost 1 vdup1 RHS
+  2646855778U,	// <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+  2646855848U,	// <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+  229035318U,	// <5,5,5,u>: Cost 1 vdup1 RHS
+  2577760358U,	// <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+  3633587361U,	// <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+  2646856186U,	// <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+  3633588738U,	// <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+  2718535756U,	// <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+  2644202223U,	// <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+  2973780482U,	// <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+  2646856526U,	// <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+  2646856607U,	// <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+  2571796582U,	// <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+  3633595392U,	// <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+  2571798222U,	// <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+  2571799124U,	// <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+  2571799862U,	// <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+  3114717188U,	// <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+  4034923010U,	// <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+  2040974646U,	// <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+  2040974647U,	// <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+  1509982310U,	// <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  1573115694U,	// <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2571806414U,	// <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+  2571807317U,	// <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+  1509985590U,	// <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U,	// <5,5,u,5>: Cost 1 vdup1 RHS
+  2646857936U,	// <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+  2040982838U,	// <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+  229035318U,	// <5,5,u,u>: Cost 1 vdup1 RHS
+  2638233600U,	// <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+  1564491878U,	// <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  2632261796U,	// <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+  2638233856U,	// <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+  2638233938U,	// <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+  3706003885U,	// <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+  3706003967U,	// <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+  4047473974U,	// <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+  1564492445U,	// <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+  2638234358U,	// <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+  2638234420U,	// <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+  2638234518U,	// <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+  2638234584U,	// <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+  2626290768U,	// <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+  2638234768U,	// <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+  3700032719U,	// <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+  2982366518U,	// <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  2628945300U,	// <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+  3706004925U,	// <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+  3711976966U,	// <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+  2638235240U,	// <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+  2638235302U,	// <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+  2632263465U,	// <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+  2638235496U,	// <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+  2638235578U,	// <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+  2713965050U,	// <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+  2634917997U,	// <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+  2638235798U,	// <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+  3711977695U,	// <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+  3710650720U,	// <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+  2638236060U,	// <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+  1564494338U,	// <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+  2638236234U,	// <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+  3711978104U,	// <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+  4034227510U,	// <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+  1567148870U,	// <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+  2577817702U,	// <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+  3700034544U,	// <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+  2723033713U,	// <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+  2638236818U,	// <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+  2644208859U,	// <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+  1564495158U,	// <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  2645536125U,	// <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+  2723402398U,	// <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+  1564495401U,	// <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+  2577825894U,	// <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+  2662125264U,	// <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+  3775836867U,	// <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+  3711979343U,	// <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+  2650181556U,	// <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+  2662125572U,	// <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+  2638237732U,	// <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+  2982399286U,	// <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+  2982399287U,	// <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+  2583806054U,	// <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+  3711979910U,	// <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+  2662126074U,	// <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+  2583808514U,	// <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+  2583809334U,	// <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+  2583810062U,	// <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+  2638238520U,	// <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+  2973781302U,	// <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2973781303U,	// <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+  430358630U,	// <5,6,7,0>: Cost 1 vext1 RHS, LHS
+  1504101110U,	// <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1504101992U,	// <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504102550U,	// <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  430361910U,	// <5,6,7,4>: Cost 1 vext1 RHS, RHS
+  1504104390U,	// <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+  1504105272U,	// <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+  1504106092U,	// <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+  430364462U,	// <5,6,7,u>: Cost 1 vext1 RHS, LHS
+  430366822U,	// <5,6,u,0>: Cost 1 vext1 RHS, LHS
+  1564497710U,	// <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  1504110184U,	// <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504110742U,	// <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  430370103U,	// <5,6,u,4>: Cost 1 vext1 RHS, RHS
+  1564498074U,	// <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  1504113146U,	// <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1504113658U,	// <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+  430372654U,	// <5,6,u,u>: Cost 1 vext1 RHS, LHS
+  2625634304U,	// <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+  1551892582U,	// <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625634468U,	// <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+  2571889247U,	// <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+  2625634642U,	// <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+  2595778728U,	// <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+  3699376639U,	// <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+  2260235715U,	// <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+  1551893149U,	// <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625635062U,	// <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+  2624308020U,	// <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+  2625635222U,	// <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+  1551893504U,	// <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+  2571898166U,	// <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+  2625635472U,	// <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+  2627626227U,	// <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+  3702031684U,	// <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+  1555211669U,	// <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+  2629617126U,	// <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+  3699377670U,	// <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+  2625635944U,	// <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+  2625636006U,	// <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+  2632271658U,	// <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+  2625636201U,	// <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+  2625636282U,	// <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+  3708004381U,	// <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+  2625636411U,	// <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+  2625636502U,	// <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+  2625636604U,	// <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+  3699378478U,	// <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+  2625636764U,	// <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+  2625636866U,	// <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+  2625636959U,	// <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+  3699378808U,	// <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+  2640235254U,	// <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+  2625637150U,	// <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+  2571919462U,	// <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+  2571920384U,	// <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+  3699379260U,	// <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+  2571922019U,	// <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+  2571922742U,	// <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+  1551895862U,	// <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2846277980U,	// <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646207951U,	// <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+  1551896105U,	// <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+  2583871590U,	// <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+  2652180176U,	// <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+  2625638177U,	// <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+  2625638262U,	// <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+  2583874870U,	// <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+  2846281732U,	// <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+  2651517015U,	// <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+  1772539190U,	// <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+  1772539191U,	// <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+  2846281826U,	// <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+  3699380615U,	// <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+  2846281108U,	// <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+  2589854210U,	// <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+  2846281830U,	// <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+  2725467658U,	// <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+  2846281076U,	// <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2846279610U,	// <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+  2846279611U,	// <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+  1510146150U,	// <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+  2846282574U,	// <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+  2583889512U,	// <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+  2846281919U,	// <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+  1510149430U,	// <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+  1510150168U,	// <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+  2583892474U,	// <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+  2625640044U,	// <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+  1510151982U,	// <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+  1510154342U,	// <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+  1551898414U,	// <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625640325U,	// <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+  1772536477U,	// <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+  1510157622U,	// <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+  1551898778U,	// <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2625640656U,	// <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+  1772539433U,	// <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+  1551898981U,	// <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625642496U,	// <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+  1551900774U,	// <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625642660U,	// <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+  2698630885U,	// <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+  2687129325U,	// <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+  2689783542U,	// <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+  2266134675U,	// <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+  2595853772U,	// <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+  1551901341U,	// <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625643254U,	// <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+  2625643316U,	// <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+  1613387566U,	// <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1551901697U,	// <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+  2626307154U,	// <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+  2689783622U,	// <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+  2627634420U,	// <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+  2982366536U,	// <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  1613387620U,	// <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2846286742U,	// <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+  2685796528U,	// <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2625644136U,	// <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+  2687129480U,	// <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+  2632279851U,	// <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+  2625644394U,	// <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+  2625644474U,	// <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+  2713966508U,	// <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+  2625644603U,	// <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+  2687129532U,	// <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+  2636261649U,	// <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+  2636925282U,	// <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+  2625644956U,	// <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+  1564510724U,	// <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+  2625645160U,	// <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+  2734610422U,	// <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+  2640243447U,	// <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+  1567165256U,	// <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+  1567828889U,	// <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+  1661163546U,	// <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+  2734463012U,	// <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+  2698631212U,	// <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+  1570458842U,	// <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1551904054U,	// <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+  2846286172U,	// <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646216144U,	// <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+  1551904297U,	// <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+  1509982310U,	// <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2560058555U,	// <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+  2698926194U,	// <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+  2698631295U,	// <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+  1509985590U,	// <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U,	// <5,u,5,5>: Cost 1 vdup1 RHS
+  1613387930U,	// <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  1772547382U,	// <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+  229035318U,	// <5,u,5,u>: Cost 1 vdup1 RHS
+  2566037606U,	// <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+  2920044334U,	// <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  2566039445U,	// <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+  2687129808U,	// <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+  2566040886U,	// <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+  2920044698U,	// <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+  2846289268U,	// <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2973781320U,	// <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2687129853U,	// <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+  430506086U,	// <5,u,7,0>: Cost 1 vext1 RHS, LHS
+  1486333117U,	// <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+  1504249448U,	// <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  2040971933U,	// <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+  430509384U,	// <5,u,7,4>: Cost 1 vext1 RHS, RHS
+  1504251600U,	// <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  118708378U,	// <5,u,7,6>: Cost 1 vrev RHS
+  2040974889U,	// <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+  430511918U,	// <5,u,7,u>: Cost 1 vext1 RHS, LHS
+  430514278U,	// <5,u,u,0>: Cost 1 vext1 RHS, LHS
+  1551906606U,	// <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  1613388133U,	// <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1772544669U,	// <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+  430517577U,	// <5,u,u,4>: Cost 1 vext1 RHS, RHS
+  229035318U,	// <5,u,u,5>: Cost 1 vdup1 RHS
+  118716571U,	// <5,u,u,6>: Cost 1 vrev RHS
+  1772547625U,	// <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+  430520110U,	// <5,u,u,u>: Cost 1 vext1 RHS, LHS
+  2686025728U,	// <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+  2686025738U,	// <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+  2686025748U,	// <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+  3779084320U,	// <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+  2642903388U,	// <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+  3657723939U,	// <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+  3926676514U,	// <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+  3926675786U,	// <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+  2686025802U,	// <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+  2566070374U,	// <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+  3759767642U,	// <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+  1612284006U,	// <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2583988738U,	// <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+  2566073654U,	// <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+  2583990308U,	// <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+  2589963005U,	// <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+  2595935702U,	// <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+  1612284060U,	// <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2686025892U,	// <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+  2685804721U,	// <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+  3759620282U,	// <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+  2705342658U,	// <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+  1612284108U,	// <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+  3706029956U,	// <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+  2686173406U,	// <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+  3651769338U,	// <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+  1612579056U,	// <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+  3706030230U,	// <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+  2705342720U,	// <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+  2705342730U,	// <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+  3706030492U,	// <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+  2644896258U,	// <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+  3718638154U,	// <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+  3729918619U,	// <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+  3926672384U,	// <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+  2705342784U,	// <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+  2687058250U,	// <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+  2686026066U,	// <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+  1613463900U,	// <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+  3761021285U,	// <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+  2687353198U,	// <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+  2632289590U,	// <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2645560704U,	// <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+  2646224337U,	// <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+  1613906322U,	// <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+  3651788902U,	// <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+  2687795620U,	// <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+  3761611181U,	// <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+  3723284326U,	// <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+  2646224838U,	// <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+  3718639630U,	// <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+  2652196962U,	// <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+  2852932918U,	// <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852932919U,	// <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852933730U,	// <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+  2925985894U,	// <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+  3060203622U,	// <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+  3718640178U,	// <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+  2656178832U,	// <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+  3725939378U,	// <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+  2657506098U,	// <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+  2619020110U,	// <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+  2925986461U,	// <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+  2572091494U,	// <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+  2572092310U,	// <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+  2980495524U,	// <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+  2572094072U,	// <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+  2572094774U,	// <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+  4054238242U,	// <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+  3645837653U,	// <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+  4054239054U,	// <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+  2572097326U,	// <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+  2686026378U,	// <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+  2686026386U,	// <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+  1612284573U,	// <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2705343144U,	// <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+  1616265906U,	// <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+  2632292506U,	// <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2590020356U,	// <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+  2852933161U,	// <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  1612284627U,	// <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2595995750U,	// <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+  2646229094U,	// <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+  3694092492U,	// <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+  2686026486U,	// <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+  2595999030U,	// <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+  3767730952U,	// <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+  2596000590U,	// <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+  2596001246U,	// <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+  2686026531U,	// <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+  3763602219U,	// <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+  2686026548U,	// <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+  3764929346U,	// <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+  2686026568U,	// <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+  2691334996U,	// <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+  3760874332U,	// <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+  3765224294U,	// <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+  3669751263U,	// <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+  2686026613U,	// <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+  2554208358U,	// <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+  3763602311U,	// <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+  3639895971U,	// <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+  2686026646U,	// <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+  2554211638U,	// <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+  3760874411U,	// <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+  2554212858U,	// <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+  3802973114U,	// <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+  2686026691U,	// <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+  2566160486U,	// <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+  2686026712U,	// <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+  2686026724U,	// <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+  3759768552U,	// <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+  2692662262U,	// <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+  2686026752U,	// <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+  2590053128U,	// <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+  3663795194U,	// <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+  2686026775U,	// <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+  2641587099U,	// <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+  2693104684U,	// <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+  3639912357U,	// <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+  2687206462U,	// <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+  3633941814U,	// <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+  2693399632U,	// <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+  3765077075U,	// <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+  2646232530U,	// <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+  2687206507U,	// <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+  2647559796U,	// <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+  3765077118U,	// <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+  3767583878U,	// <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+  2686026896U,	// <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+  2693989528U,	// <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+  3767805089U,	// <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+  2652868706U,	// <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+  3908250934U,	// <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+  2686026941U,	// <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+  2554241126U,	// <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+  3763602639U,	// <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+  3759547607U,	// <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+  3115221094U,	// <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2554244406U,	// <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+  3760874739U,	// <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+  2554245944U,	// <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+  3719975758U,	// <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+  3115221099U,	// <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2560221286U,	// <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560222415U,	// <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+  2980497558U,	// <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+  3103211622U,	// <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+  2560224566U,	// <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+  2980495698U,	// <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+  3633967526U,	// <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+  4054237686U,	// <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+  2560227118U,	// <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560229478U,	// <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+  2686027117U,	// <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+  2686027129U,	// <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+  2686027132U,	// <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+  2687206795U,	// <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+  2686027157U,	// <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+  2590094093U,	// <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+  2596066790U,	// <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+  2686027177U,	// <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+  2646900736U,	// <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+  1573159014U,	// <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2646900900U,	// <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+  3759769037U,	// <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+  2641592668U,	// <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+  3779085794U,	// <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+  2686027244U,	// <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+  3669816807U,	// <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+  1573159581U,	// <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+  2230527897U,	// <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+  2646901556U,	// <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+  2646901654U,	// <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+  2847047782U,	// <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+  3771049517U,	// <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+  2646901904U,	// <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+  2686027324U,	// <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+  3669825000U,	// <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+  2231117793U,	// <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+  3763603029U,	// <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+  3759769184U,	// <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+  2686027368U,	// <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+  2686027378U,	// <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+  2697971326U,	// <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+  3759769224U,	// <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+  2698118800U,	// <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+  3920794092U,	// <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+  2686027423U,	// <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+  2686027430U,	// <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+  3759769262U,	// <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+  2698487485U,	// <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+  2705344196U,	// <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+  2686027470U,	// <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+  2698708696U,	// <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+  2724660961U,	// <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+  2729232104U,	// <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+  2686027502U,	// <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+  1567853468U,	// <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  3759769351U,	// <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+  2699151118U,	// <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+  2686027543U,	// <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+  2699298592U,	// <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+  1573162294U,	// <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686027564U,	// <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+  3719982547U,	// <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+  1573162532U,	// <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+  3779086154U,	// <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+  2646904528U,	// <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+  3759769440U,	// <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+  2699888488U,	// <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+  2230855617U,	// <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+  2646904836U,	// <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+  2646904930U,	// <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+  2847051062U,	// <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  2700257173U,	// <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+  2687207321U,	// <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+  2686027684U,	// <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+  2566260656U,	// <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+  2685806522U,	// <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+  2687207361U,	// <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+  2686027724U,	// <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+  2646905656U,	// <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+  2646905678U,	// <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+  2686027751U,	// <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+  2554323046U,	// <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+  2572239606U,	// <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+  2566268849U,	// <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+  1906753638U,	// <6,2,7,3>: Cost 2 vzipr RHS, LHS
+  2554326326U,	// <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+  3304687564U,	// <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+  2980495708U,	// <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2646906476U,	// <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+  1906753643U,	// <6,2,7,u>: Cost 2 vzipr RHS, LHS
+  1591744256U,	// <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+  1573164846U,	// <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2701805650U,	// <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+  1906761830U,	// <6,2,u,3>: Cost 2 vzipr RHS, LHS
+  2686027875U,	// <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+  1573165210U,	// <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686322800U,	// <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+  2847051305U,	// <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  1906761835U,	// <6,2,u,u>: Cost 2 vzipr RHS, LHS
+  3759769739U,	// <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+  2686027926U,	// <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+  2686027937U,	// <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+  3640027286U,	// <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+  2687207601U,	// <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+  2705344698U,	// <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+  3663917847U,	// <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+  2237008560U,	// <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+  2686027989U,	// <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+  3759769823U,	// <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+  3759769830U,	// <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+  3759769841U,	// <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+  3759769848U,	// <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+  2703280390U,	// <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3759769868U,	// <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+  3704063194U,	// <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+  3767732510U,	// <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+  2703280390U,	// <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3704063468U,	// <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+  2630321724U,	// <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769921U,	// <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+  3759769928U,	// <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+  3704063767U,	// <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+  3704063876U,	// <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+  2636957626U,	// <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+  3777907058U,	// <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+  2630321724U,	// <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769983U,	// <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+  3710036245U,	// <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+  2636958054U,	// <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+  2686028188U,	// <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+  2704607656U,	// <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+  3773041072U,	// <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+  3711363731U,	// <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+  3767732676U,	// <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+  2707999179U,	// <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+  2584232038U,	// <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+  2642267118U,	// <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+  2642930751U,	// <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+  2705197552U,	// <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+  2584235318U,	// <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+  1631603202U,	// <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+  2654211444U,	// <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+  2237041332U,	// <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+  1631824413U,	// <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+  3640066150U,	// <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+  3772746288U,	// <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+  3640067790U,	// <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+  3773041216U,	// <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+  2705934922U,	// <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+  3773041236U,	// <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+  3779086940U,	// <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+  3767732831U,	// <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+  2706229870U,	// <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+  2602164326U,	// <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+  2654212512U,	// <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+  2566334393U,	// <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+  3704066588U,	// <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+  2602167524U,	// <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+  3710702321U,	// <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+  2724661933U,	// <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+  3710702465U,	// <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+  2602170158U,	// <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+  1492598886U,	// <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+  2560369889U,	// <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+  1492600762U,	// <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+  2566342806U,	// <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+  1492602166U,	// <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+  2602176208U,	// <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+  2566345210U,	// <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+  2980496528U,	// <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492604718U,	// <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+  1492607078U,	// <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+  2686028574U,	// <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+  1492608955U,	// <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+  2566350998U,	// <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+  1492610358U,	// <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+  1634257734U,	// <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+  2566353489U,	// <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+  2980504720U,	// <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492612910U,	// <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+  3703406592U,	// <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+  2629664870U,	// <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2629664972U,	// <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+  3779087232U,	// <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+  2642936156U,	// <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+  2712570770U,	// <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+  2687208348U,	// <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+  3316723081U,	// <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+  2629665437U,	// <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+  2242473291U,	// <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+  3700089652U,	// <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+  3703407510U,	// <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+  2852962406U,	// <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+  3628166454U,	// <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+  3760876514U,	// <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+  2687208430U,	// <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+  3316731274U,	// <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+  2243063187U,	// <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+  2629666284U,	// <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+  3703408188U,	// <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+  3703408232U,	// <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+  3703408294U,	// <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+  2632320816U,	// <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+  2923384118U,	// <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+  2687208508U,	// <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+  3760950341U,	// <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+  2634975348U,	// <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+  3703408790U,	// <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+  3316305238U,	// <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+  3703408947U,	// <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+  3703409052U,	// <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+  2644929026U,	// <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+  3718670922U,	// <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+  2705345682U,	// <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+  3926705152U,	// <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+  2668817222U,	// <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+  2590277734U,	// <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+  3716017135U,	// <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+  2642938944U,	// <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+  3717344401U,	// <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+  2712571088U,	// <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+  2629668150U,	// <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1637649636U,	// <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2646257109U,	// <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+  1637649636U,	// <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2566398054U,	// <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+  3760876805U,	// <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+  2566399937U,	// <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+  2584316418U,	// <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+  2566401334U,	// <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+  2584318028U,	// <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+  1612287286U,	// <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965686U,	// <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287304U,	// <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504608358U,	// <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2578350838U,	// <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+  2578351720U,	// <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+  2578352278U,	// <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+  1504611638U,	// <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+  2578353872U,	// <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+  2578354682U,	// <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+  2578355194U,	// <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+  1504614190U,	// <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+  2572386406U,	// <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+  2572387226U,	// <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+  3640157902U,	// <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+  2572389020U,	// <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+  2572389686U,	// <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+  2980497102U,	// <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+  2980495564U,	// <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+  4054239090U,	// <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+  2572392238U,	// <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+  1504608358U,	// <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2629670702U,	// <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2566424516U,	// <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+  2584340994U,	// <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+  1640156694U,	// <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+  2629671066U,	// <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1612287529U,	// <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965929U,	// <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287547U,	// <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  3708723200U,	// <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+  2634981478U,	// <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+  3694125260U,	// <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+  3779087962U,	// <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+  3760877154U,	// <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+  4195110916U,	// <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+  3696779775U,	// <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+  1175212130U,	// <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+  1175285867U,	// <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+  2248445988U,	// <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+  3698107237U,	// <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+  3708724118U,	// <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+  3908575334U,	// <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+  3716023376U,	// <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+  3708724368U,	// <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+  3767733960U,	// <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+  2712571600U,	// <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+  2712571609U,	// <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+  2578391142U,	// <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+  3704079934U,	// <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+  3708724840U,	// <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+  3705407182U,	// <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+  2578394422U,	// <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+  3717351272U,	// <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+  2634983354U,	// <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+  3115486518U,	// <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+  2634983541U,	// <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+  3708725398U,	// <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+  3710052631U,	// <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+  3708725606U,	// <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+  3708725660U,	// <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+  2643610114U,	// <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+  3717352010U,	// <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+  3773632358U,	// <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+  2248978533U,	// <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+  2249052270U,	// <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+  2596323430U,	// <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+  3716025328U,	// <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+  3716688961U,	// <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+  2643610770U,	// <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+  2596326710U,	// <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+  2634984758U,	// <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  3767734199U,	// <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+  1643696070U,	// <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+  1643769807U,	// <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+  2578415718U,	// <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+  3652158198U,	// <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+  3652159080U,	// <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+  3652159638U,	// <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+  2578418998U,	// <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+  2712571908U,	// <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+  2718027790U,	// <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+  2712571928U,	// <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+  2712571937U,	// <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+  2705346596U,	// <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+  3767144496U,	// <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+  3773116473U,	// <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+  2705346626U,	// <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+  2705346636U,	// <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+  3908577217U,	// <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+  2578428728U,	// <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+  2712572002U,	// <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+  2705346668U,	// <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+  2560516198U,	// <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560517363U,	// <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+  2566490060U,	// <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+  3634260118U,	// <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+  2560519478U,	// <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+  2980498650U,	// <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+  2980497922U,	// <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  3103214902U,	// <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+  2560522030U,	// <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560524390U,	// <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+  2560525556U,	// <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+  2566498253U,	// <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+  2646931439U,	// <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+  2560527670U,	// <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+  2634987674U,	// <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  2980506114U,	// <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  1175277674U,	// <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+  1175351411U,	// <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+  2578448486U,	// <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+  1573191782U,	// <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2686030124U,	// <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+  3779088690U,	// <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+  2687209788U,	// <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+  3652194000U,	// <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+  2254852914U,	// <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+  4041575734U,	// <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+  1573192349U,	// <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+  2646934262U,	// <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+  2646934324U,	// <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+  2646934422U,	// <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+  2846785638U,	// <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3760951694U,	// <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+  2646934672U,	// <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+  2712572320U,	// <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+  3775549865U,	// <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+  2846785643U,	// <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3759772094U,	// <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+  3704751676U,	// <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+  2631009936U,	// <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+  2646935206U,	// <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+  3759772127U,	// <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+  3704752004U,	// <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+  2646935482U,	// <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+  2712572410U,	// <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+  2712572419U,	// <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+  2646935702U,	// <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+  3777024534U,	// <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+  3704752453U,	// <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+  2646935964U,	// <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+  2705347122U,	// <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+  3779678778U,	// <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+  2657553069U,	// <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+  4039609654U,	// <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+  2708001366U,	// <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+  2578481254U,	// <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+  3652223734U,	// <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+  3760951922U,	// <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+  3779089019U,	// <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+  1570540772U,	// <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1573195062U,	// <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  2712572560U,	// <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+  2723410591U,	// <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+  1573195304U,	// <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+  3640287334U,	// <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+  2646937296U,	// <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+  3640289235U,	// <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+  3720679279U,	// <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+  2646937542U,	// <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+  2646937604U,	// <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+  2646937698U,	// <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+  2846788918U,	// <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+  2846788919U,	// <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+  1516699750U,	// <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  2590442230U,	// <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+  2646938106U,	// <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+  2590443670U,	// <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+  1516703030U,	// <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  2590445264U,	// <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+  296144182U,	// <6,6,6,6>: Cost 1 vdup2 RHS
+  2712572738U,	// <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+  296144182U,	// <6,6,6,u>: Cost 1 vdup2 RHS
+  2566561894U,	// <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+  3634332924U,	// <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+  2566563797U,	// <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+  2584480258U,	// <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+  2566565174U,	// <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+  2717438846U,	// <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+  2980500280U,	// <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+  1906756918U,	// <6,6,7,7>: Cost 2 vzipr RHS, RHS
+  1906756919U,	// <6,6,7,u>: Cost 2 vzipr RHS, RHS
+  1516699750U,	// <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  1573197614U,	// <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2566571990U,	// <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+  2846786205U,	// <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  1516703030U,	// <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  1573197978U,	// <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  296144182U,	// <6,6,u,6>: Cost 1 vdup2 RHS
+  1906765110U,	// <6,6,u,7>: Cost 2 vzipr RHS, RHS
+  296144182U,	// <6,6,u,u>: Cost 1 vdup2 RHS
+  1571209216U,	// <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497467494U,	// <6,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571209380U,	// <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2644951292U,	// <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+  1571209554U,	// <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510756450U,	// <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+  2644951542U,	// <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  2584499194U,	// <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+  497468061U,	// <6,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571209974U,	// <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571210036U,	// <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571210134U,	// <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1571210200U,	// <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2644952098U,	// <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+  1571210384U,	// <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644952271U,	// <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2578535418U,	// <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+  1571210605U,	// <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+  2644952509U,	// <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+  2644952582U,	// <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571210856U,	// <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571210918U,	// <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2644952828U,	// <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+  2633009028U,	// <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+  1571211194U,	// <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2668840938U,	// <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+  1571211323U,	// <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571211414U,	// <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644953311U,	// <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2644953390U,	// <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+  1571211676U,	// <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571211778U,	// <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2644953648U,	// <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+  2644953720U,	// <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+  2644953795U,	// <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571212062U,	// <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1573202834U,	// <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644954058U,	// <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  2644954166U,	// <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2644954258U,	// <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+  1571212496U,	// <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497470774U,	// <6,7,4,5>: Cost 1 vext2 RHS, RHS
+  1573203316U,	// <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2646281688U,	// <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+  497471017U,	// <6,7,4,u>: Cost 1 vext2 RHS, RHS
+  2644954696U,	// <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1573203664U,	// <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2644954878U,	// <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2644954991U,	// <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571213254U,	// <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571213316U,	// <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571213410U,	// <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1573204136U,	// <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1573204217U,	// <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  2644955425U,	// <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+  2644955561U,	// <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+  1573204474U,	// <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2644955698U,	// <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  2644955789U,	// <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+  2644955889U,	// <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+  1571214136U,	// <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571214158U,	// <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1573204895U,	// <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1573204986U,	// <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2572608656U,	// <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+  2644956362U,	// <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+  2572610231U,	// <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+  1573205350U,	// <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  2646947220U,	// <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+  1516786498U,	// <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+  1571214956U,	// <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+  1573205634U,	// <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+  1571215059U,	// <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+  497473326U,	// <6,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571215237U,	// <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571215292U,	// <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571215423U,	// <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+  497473690U,	// <6,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571215568U,	// <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  1573206272U,	// <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+  497473893U,	// <6,7,u,u>: Cost 1 vext2 RHS, LHS
+  1571217408U,	// <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497475686U,	// <6,u,0,1>: Cost 1 vext2 RHS, LHS
+  1571217572U,	// <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2689865445U,	// <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+  1571217746U,	// <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510830187U,	// <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+  2644959734U,	// <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  1193130221U,	// <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+  497476253U,	// <6,u,0,u>: Cost 1 vext2 RHS, LHS
+  1571218166U,	// <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571218228U,	// <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1612289838U,	// <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571218392U,	// <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2566663478U,	// <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+  1571218576U,	// <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644960463U,	// <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2717439835U,	// <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+  1612289892U,	// <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  1504870502U,	// <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+  2644960774U,	// <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571219048U,	// <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571219110U,	// <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  1504873782U,	// <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+  2633017221U,	// <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+  1571219386U,	// <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2712573868U,	// <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+  1571219515U,	// <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571219606U,	// <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644961503U,	// <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2566678499U,	// <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+  1571219868U,	// <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571219970U,	// <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2689865711U,	// <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+  2708002806U,	// <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+  2644961987U,	// <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571220254U,	// <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571220370U,	// <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644962250U,	// <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  1661245476U,	// <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+  2686031917U,	// <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+  1571220688U,	// <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497478967U,	// <6,u,4,5>: Cost 1 vext2 RHS, RHS
+  1571220852U,	// <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1661614161U,	// <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+  497479209U,	// <6,u,4,u>: Cost 1 vext2 RHS, RHS
+  2566692966U,	// <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+  1571221200U,	// <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2566694885U,	// <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+  2689865855U,	// <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+  1571221446U,	// <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571221508U,	// <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1612290202U,	// <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  1571221672U,	// <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1612290220U,	// <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504903270U,	// <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+  2644963752U,	// <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571222010U,	// <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2686032080U,	// <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+  1504906550U,	// <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+  2644964079U,	// <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+  296144182U,	// <6,u,6,6>: Cost 1 vdup2 RHS
+  1571222350U,	// <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  296144182U,	// <6,u,6,u>: Cost 1 vdup2 RHS
+  1492967526U,	// <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+  2560738574U,	// <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+  1492969447U,	// <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+  1906753692U,	// <6,u,7,3>: Cost 2 vzipr RHS, LHS
+  1492970806U,	// <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+  2980495761U,	// <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+  1516860235U,	// <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+  1906756936U,	// <6,u,7,7>: Cost 2 vzipr RHS, RHS
+  1492973358U,	// <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+  1492975718U,	// <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+  497481518U,	// <6,u,u,1>: Cost 1 vext2 RHS, LHS
+  1612290405U,	// <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571223484U,	// <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1492978998U,	// <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+  497481882U,	// <6,u,u,5>: Cost 1 vext2 RHS, RHS
+  296144182U,	// <6,u,u,6>: Cost 1 vdup2 RHS
+  1906765128U,	// <6,u,u,7>: Cost 2 vzipr RHS, RHS
+  497482085U,	// <6,u,u,u>: Cost 1 vext2 RHS, LHS
+  1638318080U,	// <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638318090U,	// <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+  1638318100U,	// <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+  3646442178U,	// <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+  2712059941U,	// <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+  2651603364U,	// <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+  2590618445U,	// <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+  3785801798U,	// <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+  1638318153U,	// <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+  1516879974U,	// <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+  2693922911U,	// <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+  564576358U,	// <7,0,1,2>: Cost 1 vext3 RHS, LHS
+  2638996480U,	// <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+  1516883254U,	// <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+  2649613456U,	// <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+  1516884814U,	// <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+  2590626808U,	// <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+  564576412U,	// <7,0,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U,	// <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2692743344U,	// <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+  2712060084U,	// <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+  2712060094U,	// <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+  1638318284U,	// <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712060118U,	// <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2651604922U,	// <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+  2686255336U,	// <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+  1638318316U,	// <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+  2651605142U,	// <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+  2712060156U,	// <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+  2712060165U,	// <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+  2651605404U,	// <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+  2651605506U,	// <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+  2638998111U,	// <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+  2639661744U,	// <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+  3712740068U,	// <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+  2640989010U,	// <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+  2712060232U,	// <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+  1638318418U,	// <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+  1638318428U,	// <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+  3646474950U,	// <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+  2712060270U,	// <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+  1577864502U,	// <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  2651606388U,	// <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+  3787792776U,	// <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+  1638318481U,	// <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+  2590654566U,	// <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+  2651606736U,	// <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+  2712060334U,	// <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649616239U,	// <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+  2651606982U,	// <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+  2651607044U,	// <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+  1577865314U,	// <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+  2651607208U,	// <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+  1579192580U,	// <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+  2688393709U,	// <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+  2712060406U,	// <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  2688541183U,	// <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+  2655588936U,	// <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+  3762430481U,	// <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+  2651607730U,	// <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+  2651607864U,	// <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+  2651607886U,	// <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+  2688983605U,	// <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+  2651608058U,	// <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+  2932703334U,	// <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+  3066921062U,	// <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+  3712742678U,	// <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+  2651608422U,	// <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+  2651608513U,	// <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+  2663552532U,	// <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+  2651608684U,	// <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+  2651608706U,	// <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+  1638318730U,	// <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+  1638318738U,	// <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+  564576925U,	// <7,0,u,2>: Cost 1 vext3 RHS, LHS
+  2572765898U,	// <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+  1638318770U,	// <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+  1577867418U,	// <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  1516942165U,	// <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+  2651609344U,	// <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+  564576979U,	// <7,0,u,u>: Cost 1 vext3 RHS, LHS
+  2590687334U,	// <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+  2639003750U,	// <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+  2793357414U,	// <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+  1638318838U,	// <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+  2590690614U,	// <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+  2712060679U,	// <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2590692182U,	// <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+  3785802521U,	// <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+  1638318883U,	// <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+  2712060715U,	// <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+  1638318900U,	// <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  3774300994U,	// <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+  1638318920U,	// <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+  2712060755U,	// <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+  2691416926U,	// <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+  2590700375U,	// <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+  3765158766U,	// <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+  1638318965U,	// <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+  2712060796U,	// <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+  2712060807U,	// <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+  3712747112U,	// <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+  1638318998U,	// <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+  2712060836U,	// <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+  2712060843U,	// <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+  2590708568U,	// <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+  2735948730U,	// <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+  1638319043U,	// <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+  2712060876U,	// <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+  1638319064U,	// <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2712060894U,	// <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+  2692596718U,	// <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+  2712060917U,	// <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+  1619002368U,	// <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+  2692817929U,	// <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+  2735948814U,	// <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+  1619223579U,	// <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+  2712060962U,	// <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+  2712060971U,	// <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+  2712060980U,	// <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+  2712060989U,	// <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+  3785802822U,	// <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+  2639007030U,	// <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+  2645642634U,	// <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+  3719384520U,	// <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+  2639007273U,	// <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+  2572812390U,	// <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+  2693776510U,	// <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+  3774301318U,	// <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+  1620182160U,	// <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+  2572815670U,	// <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+  3766486178U,	// <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+  2651615331U,	// <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+  2652278964U,	// <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+  1620550845U,	// <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+  3768108230U,	// <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+  2694440143U,	// <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+  2712061144U,	// <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2694587617U,	// <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+  3768403178U,	// <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+  2694735091U,	// <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+  3768550652U,	// <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+  2652279630U,	// <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+  2694956302U,	// <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+  2645644282U,	// <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+  2859062094U,	// <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+  3779462437U,	// <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+  3121938534U,	// <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2554916150U,	// <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+  3769140548U,	// <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+  3726022164U,	// <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+  2554918508U,	// <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+  3121938539U,	// <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2572836966U,	// <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+  1638319469U,	// <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+  2712061299U,	// <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+  1622173059U,	// <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+  2572840246U,	// <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+  1622320533U,	// <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+  2696136094U,	// <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+  2859060777U,	// <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+  1622541744U,	// <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+  2712061364U,	// <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+  2712061373U,	// <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+  2712061380U,	// <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+  2712061389U,	// <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+  2712061404U,	// <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+  2696725990U,	// <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+  2712061417U,	// <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+  3785803251U,	// <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+  2696947201U,	// <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+  2712061446U,	// <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+  3785803276U,	// <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+  3785803285U,	// <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+  2712061471U,	// <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+  2712061482U,	// <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+  3766486576U,	// <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+  2712061500U,	// <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+  2602718850U,	// <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  2712061516U,	// <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+  2712061525U,	// <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+  2712061536U,	// <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+  1638319720U,	// <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638319730U,	// <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+  2712061565U,	// <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+  2698053256U,	// <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+  2712061584U,	// <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+  3771795096U,	// <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+  1638319775U,	// <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+  1638319782U,	// <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+  2693924531U,	// <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+  2700560061U,	// <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+  2693924551U,	// <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+  1638319822U,	// <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+  2698716889U,	// <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+  2712061665U,	// <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+  2735949540U,	// <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+  1638319854U,	// <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+  2712061692U,	// <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+  2712061698U,	// <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+  2712061708U,	// <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+  2712061718U,	// <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+  2712061728U,	// <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+  2699380522U,	// <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+  2712061740U,	// <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+  3809691445U,	// <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+  2699601733U,	// <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+  2699675470U,	// <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+  3766486867U,	// <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+  2699822944U,	// <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+  2692745065U,	// <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+  2699970418U,	// <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+  3766486907U,	// <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+  2700117892U,	// <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+  3771795334U,	// <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+  2692745110U,	// <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+  2572894310U,	// <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+  2712061860U,	// <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+  2700486577U,	// <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+  1626818490U,	// <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+  2572897590U,	// <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+  2700707788U,	// <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+  2700781525U,	// <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+  3774597086U,	// <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+  1627187175U,	// <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+  2735949802U,	// <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+  3780200434U,	// <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+  3773564928U,	// <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+  2986541158U,	// <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+  2554989878U,	// <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+  3775113245U,	// <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+  4060283228U,	// <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+  2554992236U,	// <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+  2986541163U,	// <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+  1638320187U,	// <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+  2693924936U,	// <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+  1638319720U,	// <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1628145756U,	// <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+  1638320227U,	// <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+  2702035054U,	// <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+  2702108791U,	// <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+  2735949945U,	// <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+  1628514441U,	// <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+  2712062091U,	// <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+  1638320278U,	// <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+  2712062109U,	// <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+  2590836886U,	// <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+  2712062128U,	// <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+  2712062138U,	// <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+  2590839656U,	// <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+  3311414017U,	// <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+  1638320341U,	// <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+  2237164227U,	// <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+  2712062182U,	// <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+  2712062193U,	// <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+  2692745468U,	// <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+  2712062214U,	// <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+  2693925132U,	// <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+  3768183059U,	// <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+  2692745504U,	// <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+  2696063273U,	// <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+  2712062254U,	// <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+  2712062262U,	// <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+  2712062273U,	// <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+  2712062280U,	// <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+  2712062294U,	// <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+  2712062302U,	// <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+  2700560742U,	// <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+  2712062319U,	// <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+  2712062325U,	// <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+  2712062335U,	// <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+  2636368158U,	// <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+  2637031791U,	// <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+  1638320540U,	// <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062374U,	// <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+  2704689586U,	// <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+  2590864235U,	// <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+  2704837060U,	// <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+  1638320540U,	// <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062416U,	// <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+  2712062426U,	// <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+  2566981640U,	// <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+  2712062447U,	// <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+  2712062456U,	// <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+  1638320642U,	// <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+  2648313204U,	// <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+  3311446789U,	// <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+  1638320669U,	// <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+  2602819686U,	// <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+  1574571728U,	// <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+  2648977185U,	// <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+  2705869378U,	// <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+  2237491947U,	// <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+  2706016852U,	// <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+  2648313954U,	// <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+  2692745823U,	// <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+  1579217159U,	// <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+  2706311800U,	// <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+  2654286249U,	// <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+  1581208058U,	// <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+  2706533011U,	// <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+  2706606748U,	// <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+  3780422309U,	// <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+  2712062637U,	// <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+  2706827959U,	// <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+  1585189856U,	// <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+  2693925571U,	// <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+  2693925584U,	// <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+  2700561114U,	// <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+  2572978916U,	// <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+  2693925611U,	// <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+  2707344118U,	// <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+  2654950894U,	// <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+  2648315500U,	// <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+  2693925643U,	// <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+  2237221578U,	// <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+  1638320926U,	// <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+  1593153452U,	// <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+  1638320540U,	// <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2237516526U,	// <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+  1638320966U,	// <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+  2712062796U,	// <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+  2692967250U,	// <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+  1638320989U,	// <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+  2651635712U,	// <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+  1577893990U,	// <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2651635876U,	// <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+  3785804672U,	// <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+  2651636050U,	// <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+  1638468498U,	// <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638468508U,	// <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787795364U,	// <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1640459181U,	// <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+  2651636470U,	// <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+  2651636532U,	// <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+  2712062922U,	// <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+  2639029248U,	// <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+  2712062940U,	// <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+  2712062946U,	// <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+  2712062958U,	// <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+  3785804791U,	// <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+  2712062973U,	// <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+  3785804807U,	// <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+  3785804818U,	// <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+  2651637352U,	// <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+  2651637414U,	// <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+  3716753194U,	// <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+  2712063030U,	// <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  2712063036U,	// <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+  3773123658U,	// <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+  2712063054U,	// <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+  2651637910U,	// <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+  3712772348U,	// <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+  3785804906U,	// <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+  2651638172U,	// <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+  2651638274U,	// <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+  2639030883U,	// <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+  2712063122U,	// <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+  3712772836U,	// <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+  2641021782U,	// <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+  2714053802U,	// <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+  3785804978U,	// <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+  3716754505U,	// <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+  3785804998U,	// <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+  1638321360U,	// <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638468826U,	// <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+  1638468836U,	// <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  3785215214U,	// <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+  1640459509U,	// <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+  1517207654U,	// <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+  2573034640U,	// <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+  2712063246U,	// <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+  2573036267U,	// <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+  1517210934U,	// <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+  2711989549U,	// <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+  564579638U,	// <7,4,5,6>: Cost 1 vext3 RHS, RHS
+  2651639976U,	// <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+  564579656U,	// <7,4,5,u>: Cost 1 vext3 RHS, RHS
+  2712063307U,	// <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+  3767668056U,	// <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+  2651640314U,	// <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+  2655621708U,	// <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+  1638468980U,	// <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712063358U,	// <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+  2712063367U,	// <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+  2712210826U,	// <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1638469012U,	// <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+  2651640826U,	// <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+  3773713830U,	// <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+  3773713842U,	// <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+  3780349372U,	// <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+  2651641140U,	// <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+  2712210888U,	// <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  2712210898U,	// <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+  2651641452U,	// <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+  2713538026U,	// <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+  1517232230U,	// <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+  1577899822U,	// <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2712063489U,	// <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+  2573060846U,	// <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+  1640312342U,	// <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+  1638469146U,	// <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+  564579881U,	// <7,4,u,6>: Cost 1 vext3 RHS, RHS
+  2714054192U,	// <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+  564579899U,	// <7,4,u,u>: Cost 1 vext3 RHS, RHS
+  2579038310U,	// <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+  2636382310U,	// <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2796339302U,	// <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+  3646810719U,	// <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+  2712063586U,	// <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+  2735951467U,	// <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+  2735951476U,	// <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+  2579043322U,	// <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+  2636382877U,	// <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211087U,	// <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+  3698180916U,	// <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+  3710124950U,	// <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+  2636383232U,	// <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+  2712211127U,	// <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+  2590994128U,	// <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+  2590995323U,	// <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+  1638469328U,	// <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638469337U,	// <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  3785805536U,	// <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+  3785805544U,	// <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+  3704817288U,	// <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+  2712063742U,	// <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+  3716761386U,	// <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+  2714054415U,	// <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  3774304024U,	// <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+  2712063777U,	// <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+  2712063787U,	// <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+  3634888806U,	// <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+  2636384544U,	// <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+  3710790001U,	// <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+  3710126492U,	// <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+  3634892086U,	// <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+  2639039076U,	// <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+  3713444533U,	// <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+  2693926767U,	// <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+  2712063864U,	// <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+  2579071078U,	// <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+  3646841856U,	// <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+  3716762698U,	// <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+  3646843491U,	// <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+  2579074358U,	// <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+  2636385590U,	// <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+  2645675406U,	// <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+  1638322118U,	// <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1638469583U,	// <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+  2714054611U,	// <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+  2652974800U,	// <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+  3710127905U,	// <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+  3785805808U,	// <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+  2712211450U,	// <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+  1638322180U,	// <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+  2712064014U,	// <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638469656U,	// <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  1638469665U,	// <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+  2712064036U,	// <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+  2714054707U,	// <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+  3785805879U,	// <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+  2712064066U,	// <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+  2712064076U,	// <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+  2714054743U,	// <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712064096U,	// <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  1638322274U,	// <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+  1638469739U,	// <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+  1511325798U,	// <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+  2692747392U,	// <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+  2585069160U,	// <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+  2573126390U,	// <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+  1511329078U,	// <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+  1638469800U,	// <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712211626U,	// <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2712211636U,	// <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+  1638469823U,	// <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+  1511333990U,	// <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+  2636388142U,	// <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211671U,	// <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+  2573134583U,	// <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+  1511337270U,	// <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+  1638469881U,	// <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+  2712064258U,	// <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+  1638469892U,	// <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+  1638469904U,	// <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+  2650324992U,	// <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+  1576583270U,	// <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712064300U,	// <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+  2255295336U,	// <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+  2712064316U,	// <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+  2585088098U,	// <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+  2735952204U,	// <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+  2712211799U,	// <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+  1576583837U,	// <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+  1181340494U,	// <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+  2650325812U,	// <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+  2650325910U,	// <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+  2650325976U,	// <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+  2579123510U,	// <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+  2650326160U,	// <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+  2714055072U,	// <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2712064425U,	// <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+  1181930390U,	// <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+  2712211897U,	// <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+  2714055108U,	// <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+  2650326632U,	// <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+  2650326694U,	// <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+  2714055137U,	// <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+  2714055148U,	// <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+  2650326970U,	// <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+  1638470138U,	// <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638470147U,	// <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2650327190U,	// <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+  2255172441U,	// <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+  2255246178U,	// <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+  2650327452U,	// <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+  2712064562U,	// <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+  2650327627U,	// <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+  3713452726U,	// <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+  2700563016U,	// <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+  2712064593U,	// <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+  2650327954U,	// <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+  2735952486U,	// <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+  2735952497U,	// <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+  2255328108U,	// <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+  2712212100U,	// <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+  1576586550U,	// <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  2714055312U,	// <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+  2712212126U,	// <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+  1576586793U,	// <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+  2579152998U,	// <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+  2650328784U,	// <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+  2714055364U,	// <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+  3785806538U,	// <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+  1576587206U,	// <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+  2650329092U,	// <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+  2650329186U,	// <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+  2712064753U,	// <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+  1181963162U,	// <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+  2714055421U,	// <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+  2714055432U,	// <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+  2650329594U,	// <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+  3785806619U,	// <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+  2712212260U,	// <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+  2714055472U,	// <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+  1638323000U,	// <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470466U,	// <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  1638470475U,	// <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+  1638323022U,	// <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+  2712064854U,	// <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+  2712064865U,	// <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+  2712064872U,	// <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+  1638323062U,	// <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+  2712064894U,	// <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+  2712064905U,	// <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+  2712064915U,	// <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+  1638323094U,	// <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+  1638470559U,	// <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+  1576589102U,	// <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712212402U,	// <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+  2712212409U,	// <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+  1638470599U,	// <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+  1576589466U,	// <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  1638323000U,	// <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470624U,	// <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+  1638470631U,	// <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+  2712065007U,	// <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+  1638323194U,	// <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+  2712065025U,	// <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+  3646958337U,	// <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+  2712065044U,	// <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+  2585161907U,	// <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+  2591134604U,	// <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+  2591134714U,	// <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+  1638323257U,	// <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+  2712065091U,	// <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+  2712065098U,	// <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+  2712065109U,	// <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+  2692748384U,	// <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+  2585169206U,	// <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+  2693928048U,	// <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+  2585170766U,	// <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+  2735953024U,	// <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+  2695918731U,	// <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+  3770471574U,	// <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+  3785807002U,	// <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+  2712065189U,	// <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+  2712065196U,	// <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+  3773125818U,	// <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+  3766490305U,	// <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+  2700563658U,	// <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+  2735953107U,	// <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+  2701890780U,	// <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+  2712065251U,	// <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+  3766490350U,	// <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+  3774305530U,	// <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+  2637728196U,	// <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+  2712065291U,	// <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+  2585186486U,	// <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+  2639719095U,	// <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+  2640382728U,	// <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+  2641046361U,	// <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+  2712212792U,	// <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+  3646989312U,	// <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+  3785807176U,	// <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+  3646991109U,	// <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+  2712065371U,	// <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+  1638323558U,	// <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+  2712212845U,	// <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+  2591167846U,	// <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+  1638323585U,	// <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+  2585198694U,	// <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+  2712212884U,	// <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+  3711471393U,	// <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+  2649673590U,	// <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+  2712065455U,	// <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+  1577259032U,	// <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+  2712065473U,	// <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+  2712212936U,	// <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+  1579249931U,	// <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+  2591178854U,	// <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+  2735953374U,	// <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+  2712212974U,	// <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+  2655646287U,	// <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+  2591182134U,	// <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+  2656973553U,	// <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+  1583895362U,	// <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+  2712065556U,	// <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+  1585222628U,	// <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+  1523417190U,	// <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  2597159670U,	// <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+  2597160552U,	// <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+  2597161110U,	// <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+  1523420470U,	// <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  2651002296U,	// <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+  2657637906U,	// <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+  363253046U,	// <7,7,7,7>: Cost 1 vdup3 RHS
+  363253046U,	// <7,7,7,u>: Cost 1 vdup3 RHS
+  1523417190U,	// <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  1638471298U,	// <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+  2712213132U,	// <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+  2712213138U,	// <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+  1523420470U,	// <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  1638471338U,	// <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+  1595840756U,	// <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+  363253046U,	// <7,7,u,7>: Cost 1 vdup3 RHS
+  363253046U,	// <7,7,u,u>: Cost 1 vdup3 RHS
+  1638318080U,	// <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638323923U,	// <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+  1662211804U,	// <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+  1638323941U,	// <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+  2712065773U,	// <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+  1662359286U,	// <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+  1662359296U,	// <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  2987150664U,	// <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+  1638323986U,	// <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+  1517469798U,	// <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+  1638318900U,	// <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  564582190U,	// <7,u,1,2>: Cost 1 vext3 RHS, LHS
+  1638324023U,	// <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+  1517473078U,	// <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+  2693928777U,	// <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+  1517474710U,	// <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+  1640462171U,	// <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+  564582244U,	// <7,u,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U,	// <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2712065907U,	// <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+  1638319720U,	// <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638324101U,	// <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+  1638318284U,	// <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712065947U,	// <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+  2700564387U,	// <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+  1640314796U,	// <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  1638324146U,	// <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+  1638324156U,	// <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+  1638319064U,	// <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2700564435U,	// <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+  1638320540U,	// <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  1638324196U,	// <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+  1638324207U,	// <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+  2700564472U,	// <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+  2695919610U,	// <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+  1638324228U,	// <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+  2712066061U,	// <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+  1662212122U,	// <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+  1662212132U,	// <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+  2712066092U,	// <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+  1638321360U,	// <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638324287U,	// <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+  1662359624U,	// <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+  1640314961U,	// <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  1638324314U,	// <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+  1517502566U,	// <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+  1574612693U,	// <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+  2712066162U,	// <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+  1638324351U,	// <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+  1576603592U,	// <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+  1577267225U,	// <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+  564582554U,	// <7,u,5,6>: Cost 1 vext3 RHS, RHS
+  1640462499U,	// <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+  564582572U,	// <7,u,5,u>: Cost 1 vext3 RHS, RHS
+  2712066223U,	// <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+  2712066238U,	// <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+  1581249023U,	// <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+  1638324432U,	// <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+  1638468980U,	// <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712066274U,	// <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+  1583903555U,	// <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+  1640315117U,	// <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+  1638324477U,	// <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+  1638471936U,	// <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+  2692970763U,	// <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+  2700933399U,	// <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+  2573347601U,	// <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+  1638471976U,	// <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+  1511551171U,	// <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+  2712213815U,	// <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+  363253046U,	// <7,u,7,7>: Cost 1 vdup3 RHS
+  363253046U,	// <7,u,7,u>: Cost 1 vdup3 RHS
+  1638324561U,	// <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+  1638324571U,	// <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+  564582757U,	// <7,u,u,2>: Cost 1 vext3 RHS, LHS
+  1638324587U,	// <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+  1638324601U,	// <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+  1638324611U,	// <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+  564582797U,	// <7,u,u,6>: Cost 1 vext3 RHS, RHS
+  363253046U,	// <7,u,u,7>: Cost 1 vdup3 RHS
+  564582811U,	// <7,u,u,u>: Cost 1 vext3 RHS, LHS
+  135053414U,	// <u,0,0,0>: Cost 1 vdup0 LHS
+  1611489290U,	// <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611489300U,	// <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  2568054923U,	// <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1481706806U,	// <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+  2555449040U,	// <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+  2591282078U,	// <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+  2591945711U,	// <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+  135053414U,	// <u,0,0,u>: Cost 1 vdup0 LHS
+  1493655654U,	// <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+  1860550758U,	// <u,0,1,1>: Cost 2 vzipl LHS, LHS
+  537747563U,	// <u,0,1,2>: Cost 1 vext3 LHS, LHS
+  2625135576U,	// <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+  1493658934U,	// <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+  2625135760U,	// <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+  1517548447U,	// <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+  2591290362U,	// <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+  537747612U,	// <u,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611489444U,	// <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685231276U,	// <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  1994768486U,	// <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2685231294U,	// <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611489484U,	// <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2712068310U,	// <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2625136570U,	// <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+  2591962097U,	// <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1611489516U,	// <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2954067968U,	// <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2685231356U,	// <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  72589981U,	// <u,0,3,2>: Cost 1 vrev LHS
+  2625137052U,	// <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+  2625137154U,	// <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+  2639071848U,	// <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+  2639735481U,	// <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+  2597279354U,	// <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+  73032403U,	// <u,0,3,u>: Cost 1 vrev LHS
+  2687074636U,	// <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+  1611489618U,	// <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611489628U,	// <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3629222038U,	// <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+  2555481398U,	// <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+  1551396150U,	// <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  2651680116U,	// <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+  2646150600U,	// <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1611932050U,	// <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2561458278U,	// <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+  1863532646U,	// <u,0,5,1>: Cost 2 vzipl RHS, LHS
+  2712068526U,	// <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649689976U,	// <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+  2220237489U,	// <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+  2651680772U,	// <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+  1577939051U,	// <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+  2830077238U,	// <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  1579266317U,	// <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+  2555494502U,	// <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+  2712068598U,	// <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  1997750374U,	// <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2655662673U,	// <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+  2555497782U,	// <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+  2651681459U,	// <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+  2651681592U,	// <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+  2651681614U,	// <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+  1997750428U,	// <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+  2567446630U,	// <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+  2567447446U,	// <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+  2567448641U,	// <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+  2573421338U,	// <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+  2567449910U,	// <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+  2651682242U,	// <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+  2591339429U,	// <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+  2651682412U,	// <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+  2567452462U,	// <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+  135053414U,	// <u,0,u,0>: Cost 1 vdup0 LHS
+  1611489938U,	// <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+  537748125U,	// <u,0,u,2>: Cost 1 vext3 LHS, LHS
+  2685674148U,	// <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1611932338U,	// <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551399066U,	// <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  1517605798U,	// <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+  2830077481U,	// <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  537748179U,	// <u,0,u,u>: Cost 1 vext3 LHS, LHS
+  1544101961U,	// <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+  1558036582U,	// <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+  2619171051U,	// <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+  1611490038U,	// <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2555522358U,	// <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+  2712068871U,	// <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2591355815U,	// <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+  2597328512U,	// <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+  1611490083U,	// <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  1481785446U,	// <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+  202162278U,	// <u,1,1,1>: Cost 1 vdup1 LHS
+  2555528808U,	// <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+  1611490120U,	// <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  1481788726U,	// <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+  2689876828U,	// <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  2591364008U,	// <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+  2592691274U,	// <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+  202162278U,	// <u,1,1,u>: Cost 1 vdup1 LHS
+  1499709542U,	// <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+  2689876871U,	// <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2631116445U,	// <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+  835584U,	// <u,1,2,3>: Cost 0 copy LHS
+  1499712822U,	// <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+  2689876907U,	// <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  2631780282U,	// <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+  1523603074U,	// <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+  835584U,	// <u,1,2,u>: Cost 0 copy LHS
+  1487773798U,	// <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+  1611490264U,	// <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685232094U,	// <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2018746470U,	// <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+  1487777078U,	// <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+  1611490304U,	// <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2685674505U,	// <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2640407307U,	// <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+  1611490327U,	// <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  1567992749U,	// <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+  2693121070U,	// <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+  2693194807U,	// <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+  1152386432U,	// <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+  2555555126U,	// <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+  1558039862U,	// <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+  2645716371U,	// <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+  2597361284U,	// <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+  1152755117U,	// <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+  1481818214U,	// <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+  2555560694U,	// <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+  2555561576U,	// <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+  1611490448U,	// <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  1481821494U,	// <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+  2651025435U,	// <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+  2651689068U,	// <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+  2823966006U,	// <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+  1611932861U,	// <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  2555568230U,	// <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+  2689877199U,	// <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2712069336U,	// <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2685232353U,	// <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  2555571510U,	// <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+  2689877235U,	// <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  2657661765U,	// <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+  1584583574U,	// <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+  1585247207U,	// <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+  2561548390U,	// <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+  2561549681U,	// <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+  2573493926U,	// <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+  2042962022U,	// <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2561551670U,	// <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+  2226300309U,	// <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+  2658325990U,	// <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+  2658326124U,	// <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+  2042962027U,	// <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1481842790U,	// <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+  202162278U,	// <u,1,u,1>: Cost 1 vdup1 LHS
+  2685674867U,	// <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  835584U,	// <u,1,u,3>: Cost 0 copy LHS
+  1481846070U,	// <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+  1611933077U,	// <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685674910U,	// <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  1523652232U,	// <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+  835584U,	// <u,1,u,u>: Cost 0 copy LHS
+  1544110154U,	// <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+  1545437286U,	// <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  1545437420U,	// <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+  2685232589U,	// <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2619179346U,	// <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+  2712069606U,	// <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+  2689877484U,	// <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2659656273U,	// <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+  1545437853U,	// <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+  1550082851U,	// <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+  2619179828U,	// <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+  2619179926U,	// <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+  2685232671U,	// <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2555604278U,	// <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+  2619180176U,	// <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+  2689877564U,	// <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  2602718850U,	// <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  1158703235U,	// <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+  1481867366U,	// <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+  2555609846U,	// <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+  269271142U,	// <u,2,2,2>: Cost 1 vdup2 LHS
+  1611490930U,	// <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  1481870646U,	// <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+  2689877640U,	// <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2619180986U,	// <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+  2593436837U,	// <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+  269271142U,	// <u,2,2,u>: Cost 1 vdup2 LHS
+  408134301U,	// <u,2,3,0>: Cost 1 vext1 LHS, LHS
+  1481876214U,	// <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1481877096U,	// <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1880326246U,	// <u,2,3,3>: Cost 2 vzipr LHS, LHS
+  408137014U,	// <u,2,3,4>: Cost 1 vext1 LHS, RHS
+  1529654992U,	// <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1529655802U,	// <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1529656314U,	// <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  408139566U,	// <u,2,3,u>: Cost 1 vext1 LHS, LHS
+  1567853468U,	// <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  2561598362U,	// <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+  2555627214U,	// <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+  2685232918U,	// <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2555628854U,	// <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+  1545440566U,	// <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1571982740U,	// <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+  2592125957U,	// <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1545440809U,	// <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+  2555633766U,	// <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+  2561606550U,	// <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+  2689877856U,	// <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685233000U,	// <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1158441059U,	// <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+  2645725188U,	// <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+  2689877892U,	// <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2823900470U,	// <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+  1158736007U,	// <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+  1481900134U,	// <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+  2555642614U,	// <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+  2555643496U,	// <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+  1611491258U,	// <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  1481903414U,	// <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+  2689877964U,	// <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689877973U,	// <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2645726030U,	// <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+  1611933671U,	// <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  1585919033U,	// <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+  2573566710U,	// <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+  2567596115U,	// <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+  1906901094U,	// <u,2,7,3>: Cost 2 vzipr RHS, LHS
+  2555653430U,	// <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+  2800080230U,	// <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+  2980643164U,	// <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2645726828U,	// <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+  1906901099U,	// <u,2,7,u>: Cost 2 vzipr RHS, LHS
+  408175266U,	// <u,2,u,0>: Cost 1 vext1 LHS, LHS
+  1545443118U,	// <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  269271142U,	// <u,2,u,2>: Cost 1 vdup2 LHS
+  1611491416U,	// <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  408177974U,	// <u,2,u,4>: Cost 1 vext1 LHS, RHS
+  1545443482U,	// <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1726339226U,	// <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+  1529697274U,	// <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  408180526U,	// <u,2,u,u>: Cost 1 vext1 LHS, LHS
+  1544781824U,	// <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  471040156U,	// <u,3,0,1>: Cost 1 vext2 LHS, LHS
+  1544781988U,	// <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2618523900U,	// <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+  1544782162U,	// <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2238188352U,	// <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+  2623169023U,	// <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2238335826U,	// <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+  471040669U,	// <u,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544782582U,	// <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544782644U,	// <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544782742U,	// <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544782808U,	// <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618524733U,	// <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544782992U,	// <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618524897U,	// <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2703517987U,	// <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+  1544783213U,	// <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  1529716838U,	// <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+  1164167966U,	// <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+  1544783464U,	// <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544783526U,	// <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1529720118U,	// <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+  2618525544U,	// <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544783802U,	// <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2704181620U,	// <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+  1544783931U,	// <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1544784022U,	// <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  1487922559U,	// <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+  1493895256U,	// <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+  336380006U,	// <u,3,3,3>: Cost 1 vdup3 LHS
+  1544784386U,	// <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2824054478U,	// <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2238286668U,	// <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+  2954069136U,	// <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  336380006U,	// <u,3,3,u>: Cost 1 vdup3 LHS
+  1487929446U,	// <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+  1487930752U,	// <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+  2623171644U,	// <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2561673366U,	// <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+  1487932726U,	// <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+  471043382U,	// <u,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592561012U,	// <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2238368598U,	// <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+  471043625U,	// <u,3,4,u>: Cost 1 vext2 LHS, RHS
+  2555707494U,	// <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+  1574645465U,	// <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+  2567653106U,	// <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+  2555709954U,	// <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+  1592561606U,	// <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592561668U,	// <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592561762U,	// <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1750314294U,	// <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1750314295U,	// <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2623172897U,	// <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2561688962U,	// <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+  1581281795U,	// <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+  2706541204U,	// <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+  2623173261U,	// <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  1164495686U,	// <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+  1592562488U,	// <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592562510U,	// <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1164716897U,	// <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+  1487954022U,	// <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+  1487955331U,	// <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+  1493928028U,	// <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+  2561697942U,	// <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+  1487957302U,	// <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+  2707352311U,	// <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+  2655024623U,	// <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+  1592563308U,	// <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1487959854U,	// <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+  1544787667U,	// <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+  471045934U,	// <u,3,u,1>: Cost 1 vext2 LHS, LHS
+  1549432709U,	// <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  336380006U,	// <u,3,u,3>: Cost 1 vdup3 LHS
+  1544788031U,	// <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+  471046298U,	// <u,3,u,5>: Cost 1 vext2 LHS, RHS
+  1549433040U,	// <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1750314537U,	// <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+  471046501U,	// <u,3,u,u>: Cost 1 vext2 LHS, LHS
+  2625167360U,	// <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+  1551425638U,	// <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  2619195630U,	// <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+  2619343104U,	// <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2625167698U,	// <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+  1638329234U,	// <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638329244U,	// <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787803556U,	// <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1551426205U,	// <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+  2555748454U,	// <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+  2625168180U,	// <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+  1551426503U,	// <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+  2625168344U,	// <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+  2555751734U,	// <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+  1860554038U,	// <u,4,1,5>: Cost 2 vzipl LHS, RHS
+  2689879022U,	// <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2592248852U,	// <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1555408301U,	// <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+  2555756646U,	// <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+  2625168943U,	// <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+  2625169000U,	// <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+  2619197134U,	// <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+  2555759926U,	// <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+  2712071222U,	// <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  1994771766U,	// <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U,	// <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1994771784U,	// <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+  2625169558U,	// <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+  2567709594U,	// <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+  2567710817U,	// <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+  2625169820U,	// <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+  2625169922U,	// <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+  2954069710U,	// <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2954068172U,	// <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3903849472U,	// <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+  2954068174U,	// <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  1505919078U,	// <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+  2567717831U,	// <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+  2567719010U,	// <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+  2570373542U,	// <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  161926454U,	// <u,4,4,4>: Cost 1 vdup0 RHS
+  1551428918U,	// <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  1638329572U,	// <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  2594927963U,	// <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+  161926454U,	// <u,4,4,u>: Cost 1 vdup0 RHS
+  1493983334U,	// <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+  2689879301U,	// <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1493985379U,	// <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+  2567727254U,	// <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+  1493986614U,	// <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+  1863535926U,	// <u,4,5,5>: Cost 2 vzipl RHS, RHS
+  537750838U,	// <u,4,5,6>: Cost 1 vext3 LHS, RHS
+  2830110006U,	// <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+  537750856U,	// <u,4,5,u>: Cost 1 vext3 LHS, RHS
+  1482047590U,	// <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+  2555790070U,	// <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+  2555790952U,	// <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+  2555791510U,	// <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+  1482050870U,	// <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+  2689879422U,	// <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  1997753654U,	// <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2712071562U,	// <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1482053422U,	// <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+  2567741542U,	// <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+  2567742362U,	// <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+  2567743589U,	// <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+  2573716286U,	// <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+  2567744822U,	// <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+  2712071624U,	// <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  96808489U,	// <u,4,7,6>: Cost 1 vrev RHS
+  2651715180U,	// <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+  96955963U,	// <u,4,7,u>: Cost 1 vrev RHS
+  1482063974U,	// <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+  1551431470U,	// <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  1494009958U,	// <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+  2555807894U,	// <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+  161926454U,	// <u,4,u,4>: Cost 1 vdup0 RHS
+  1551431834U,	// <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  537751081U,	// <u,4,u,6>: Cost 1 vext3 LHS, RHS
+  2830110249U,	// <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+  537751099U,	// <u,4,u,u>: Cost 1 vext3 LHS, RHS
+  2631811072U,	// <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+  1558069350U,	// <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+  2619203823U,	// <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+  2619867456U,	// <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+  1546273106U,	// <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2733010539U,	// <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2597622682U,	// <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+  1176539396U,	// <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+  1558069917U,	// <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+  1505968230U,	// <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+  2624512887U,	// <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+  2631811990U,	// <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+  2618541056U,	// <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+  1505971510U,	// <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+  2627167419U,	// <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+  2579714554U,	// <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+  1638330064U,	// <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638477529U,	// <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  2561802342U,	// <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+  2561803264U,	// <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+  2631149217U,	// <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+  1558071026U,	// <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+  2561805622U,	// <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+  2714062607U,	// <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  2631813050U,	// <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+  3092335926U,	// <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+  1561389191U,	// <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+  2561810534U,	// <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+  2561811857U,	// <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+  2631813474U,	// <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+  2631813532U,	// <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+  2619869698U,	// <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+  3001847002U,	// <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+  2954070530U,	// <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2018749750U,	// <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2018749751U,	// <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2573762662U,	// <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+  2620017634U,	// <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  2573764338U,	// <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+  2573765444U,	// <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+  1570680053U,	// <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+  1558072630U,	// <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+  2645749143U,	// <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+  1638330310U,	// <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1558072873U,	// <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+  1506000998U,	// <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+  2561827984U,	// <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+  2579744360U,	// <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+  2579744918U,	// <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+  1506004278U,	// <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+  229035318U,	// <u,5,5,5>: Cost 1 vdup1 RHS
+  2712072206U,	// <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638330392U,	// <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  229035318U,	// <u,5,5,u>: Cost 1 vdup1 RHS
+  1500037222U,	// <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+  2561836436U,	// <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+  2567809133U,	// <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+  1500040006U,	// <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+  1500040502U,	// <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+  2714062935U,	// <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712072288U,	// <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  27705344U,	// <u,5,6,7>: Cost 0 copy RHS
+  27705344U,	// <u,5,6,u>: Cost 0 copy RHS
+  1488101478U,	// <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488102805U,	// <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+  2561844840U,	// <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+  2561845398U,	// <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+  1488104758U,	// <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+  1638330536U,	// <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712072362U,	// <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2042965302U,	// <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+  1488107310U,	// <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488109670U,	// <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+  1488110998U,	// <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+  2561853032U,	// <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+  1500056392U,	// <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+  1488112950U,	// <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+  229035318U,	// <u,5,u,5>: Cost 1 vdup1 RHS
+  2954111490U,	// <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  27705344U,	// <u,5,u,7>: Cost 0 copy RHS
+  27705344U,	// <u,5,u,u>: Cost 0 copy RHS
+  2619211776U,	// <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+  1545470054U,	// <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1545470192U,	// <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+  2255958969U,	// <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+  1546797458U,	// <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+  2720624971U,	// <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+  2256180180U,	// <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+  2960682294U,	// <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+  1545470621U,	// <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+  1182004127U,	// <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+  2619212596U,	// <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+  2619212694U,	// <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+  2619212760U,	// <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+  2626511979U,	// <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+  2619212944U,	// <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+  2714063264U,	// <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2967326006U,	// <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+  1182594023U,	// <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+  1506050150U,	// <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+  2579792630U,	// <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+  2619213416U,	// <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+  2619213478U,	// <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+  1506053430U,	// <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+  2633148309U,	// <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+  2619213754U,	// <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+  1638330874U,	// <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638478339U,	// <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2619213974U,	// <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+  2255836074U,	// <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+  2255909811U,	// <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+  2619214236U,	// <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+  1564715549U,	// <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+  2639121006U,	// <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+  3001847012U,	// <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1880329526U,	// <u,6,3,7>: Cost 2 vzipr LHS, RHS
+  1880329527U,	// <u,6,3,u>: Cost 2 vzipr LHS, RHS
+  2567864422U,	// <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+  2733011558U,	// <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+  2567866484U,	// <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+  2638458005U,	// <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+  1570540772U,	// <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1545473334U,	// <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  1572015512U,	// <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+  2960715062U,	// <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+  1545473577U,	// <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+  2567872614U,	// <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+  2645757648U,	// <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+  2567874490U,	// <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+  2576501250U,	// <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  1576660943U,	// <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+  2645757956U,	// <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+  2645758050U,	// <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+  2824080694U,	// <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+  1182626795U,	// <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+  1506082918U,	// <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+  2579825398U,	// <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+  2645758458U,	// <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+  2579826838U,	// <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+  1506086198U,	// <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+  2579828432U,	// <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+  296144182U,	// <u,6,6,6>: Cost 1 vdup2 RHS
+  1638331202U,	// <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  296144182U,	// <u,6,6,u>: Cost 1 vdup2 RHS
+  432349286U,	// <u,6,7,0>: Cost 1 vext1 RHS, LHS
+  1506091766U,	// <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1506092648U,	// <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506093206U,	// <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  432352809U,	// <u,6,7,4>: Cost 1 vext1 RHS, RHS
+  1506094800U,	// <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  1506095610U,	// <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1906904374U,	// <u,6,7,7>: Cost 2 vzipr RHS, RHS
+  432355118U,	// <u,6,7,u>: Cost 1 vext1 RHS, LHS
+  432357478U,	// <u,6,u,0>: Cost 1 vext1 RHS, LHS
+  1545475886U,	// <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1506100840U,	// <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506101398U,	// <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  432361002U,	// <u,6,u,4>: Cost 1 vext1 RHS, RHS
+  1545476250U,	// <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  296144182U,	// <u,6,u,6>: Cost 1 vdup2 RHS
+  1880370486U,	// <u,6,u,7>: Cost 2 vzipr LHS, RHS
+  432363310U,	// <u,6,u,u>: Cost 1 vext1 RHS, LHS
+  1571356672U,	// <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497614950U,	// <u,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571356836U,	// <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2573880146U,	// <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+  1571357010U,	// <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1512083716U,	// <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+  2621874741U,	// <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+  2585826298U,	// <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+  497615517U,	// <u,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571357430U,	// <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571357492U,	// <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571357590U,	// <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1552114715U,	// <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+  2573888822U,	// <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+  1553441981U,	// <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+  2627847438U,	// <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+  2727408775U,	// <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+  1555432880U,	// <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+  2629838337U,	// <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+  1188058754U,	// <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+  1571358312U,	// <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571358374U,	// <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2632492869U,	// <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+  2633156502U,	// <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+  1560078311U,	// <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+  2728072408U,	// <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+  1561405577U,	// <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+  1571358870U,	// <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2627184913U,	// <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+  2633820523U,	// <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+  1571359132U,	// <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571359234U,	// <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  1512108295U,	// <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+  1518080992U,	// <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+  2640456465U,	// <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+  1571359518U,	// <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571359634U,	// <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2573911067U,	// <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+  2645101622U,	// <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2573912918U,	// <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+  1571359952U,	// <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497618248U,	// <u,7,4,5>: Cost 1 vext2 RHS, RHS
+  1571360116U,	// <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2645102024U,	// <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+  497618473U,	// <u,7,4,u>: Cost 1 vext2 RHS, RHS
+  2645102152U,	// <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1571360464U,	// <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2645102334U,	// <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2645102447U,	// <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571360710U,	// <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571360772U,	// <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571360866U,	// <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1571360936U,	// <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1571361017U,	// <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  1530044518U,	// <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+  2645103016U,	// <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571361274U,	// <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2645103154U,	// <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  1530047798U,	// <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+  1188386474U,	// <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+  1571361592U,	// <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571361614U,	// <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1571361695U,	// <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1571361786U,	// <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2573935616U,	// <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+  2645103781U,	// <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+  2573937497U,	// <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+  1571362150U,	// <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  1512141067U,	// <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+  1518113764U,	// <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+  363253046U,	// <u,7,7,7>: Cost 1 vdup3 RHS
+  363253046U,	// <u,7,7,u>: Cost 1 vdup3 RHS
+  1571362515U,	// <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+  497620782U,	// <u,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571362693U,	// <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571362748U,	// <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571362879U,	// <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+  497621146U,	// <u,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571363024U,	// <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  363253046U,	// <u,7,u,7>: Cost 1 vdup3 RHS
+  497621349U,	// <u,7,u,u>: Cost 1 vext2 RHS, LHS
+  135053414U,	// <u,u,0,0>: Cost 1 vdup0 LHS
+  471081121U,	// <u,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544822948U,	// <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1616140005U,	// <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  1544823122U,	// <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  1512157453U,	// <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+  1662220032U,	// <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  1194457487U,	// <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+  471081629U,	// <u,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544823542U,	// <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  202162278U,	// <u,u,1,1>: Cost 1 vdup1 LHS
+  537753390U,	// <u,u,1,2>: Cost 1 vext3 LHS, LHS
+  1544823768U,	// <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  1494248758U,	// <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+  1544823952U,	// <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  1518138343U,	// <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+  1640322907U,	// <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+  537753444U,	// <u,u,1,u>: Cost 1 vext3 LHS, LHS
+  1482309734U,	// <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+  1194031451U,	// <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+  269271142U,	// <u,u,2,2>: Cost 1 vdup2 LHS
+  835584U,	// <u,u,2,3>: Cost 0 copy LHS
+  1482313014U,	// <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+  2618566504U,	// <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544824762U,	// <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  1638479788U,	// <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  835584U,	// <u,u,2,u>: Cost 0 copy LHS
+  408576723U,	// <u,u,3,0>: Cost 1 vext1 LHS, LHS
+  1482318582U,	// <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  120371557U,	// <u,u,3,2>: Cost 1 vrev LHS
+  336380006U,	// <u,u,3,3>: Cost 1 vdup3 LHS
+  408579382U,	// <u,u,3,4>: Cost 1 vext1 LHS, RHS
+  1616140271U,	// <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  1530098170U,	// <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1880329544U,	// <u,u,3,7>: Cost 2 vzipr LHS, RHS
+  408581934U,	// <u,u,3,u>: Cost 1 vext1 LHS, LHS
+  1488298086U,	// <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+  1488299437U,	// <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+  1659271204U,	// <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  1194195311U,	// <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+  161926454U,	// <u,u,4,4>: Cost 1 vdup0 RHS
+  471084342U,	// <u,u,4,5>: Cost 1 vext2 LHS, RHS
+  1571368308U,	// <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1640323153U,	// <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  471084585U,	// <u,u,4,u>: Cost 1 vext2 LHS, RHS
+  1494278246U,	// <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+  1571368656U,	// <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  1494280327U,	// <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+  1616140415U,	// <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1494281526U,	// <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+  229035318U,	// <u,u,5,5>: Cost 1 vdup1 RHS
+  537753754U,	// <u,u,5,6>: Cost 1 vext3 LHS, RHS
+  1750355254U,	// <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+  537753772U,	// <u,u,5,u>: Cost 1 vext3 LHS, RHS
+  1482342502U,	// <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+  2556084982U,	// <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+  1571369466U,	// <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  1611938000U,	// <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1482345782U,	// <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+  1194359171U,	// <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+  296144182U,	// <u,u,6,6>: Cost 1 vdup2 RHS
+  27705344U,	// <u,u,6,7>: Cost 0 copy RHS
+  27705344U,	// <u,u,6,u>: Cost 0 copy RHS
+  432496742U,	// <u,u,7,0>: Cost 1 vext1 RHS, LHS
+  1488324016U,	// <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+  1494296713U,	// <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+  1906901148U,	// <u,u,7,3>: Cost 2 vzipr RHS, LHS
+  432500283U,	// <u,u,7,4>: Cost 1 vext1 RHS, RHS
+  1506242256U,	// <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  120699277U,	// <u,u,7,6>: Cost 1 vrev RHS
+  363253046U,	// <u,u,7,7>: Cost 1 vdup3 RHS
+  432502574U,	// <u,u,7,u>: Cost 1 vext1 RHS, LHS
+  408617688U,	// <u,u,u,0>: Cost 1 vext1 LHS, LHS
+  471086894U,	// <u,u,u,1>: Cost 1 vext2 LHS, LHS
+  537753957U,	// <u,u,u,2>: Cost 1 vext3 LHS, LHS
+  835584U,	// <u,u,u,3>: Cost 0 copy LHS
+  408620342U,	// <u,u,u,4>: Cost 1 vext1 LHS, RHS
+  471087258U,	// <u,u,u,5>: Cost 1 vext2 LHS, RHS
+  537753997U,	// <u,u,u,6>: Cost 1 vext3 LHS, RHS
+  27705344U,	// <u,u,u,7>: Cost 0 copy RHS
+  835584U,	// <u,u,u,u>: Cost 0 copy LHS
+  0
+};
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index f809f3750921..d5bc3f60b01a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "ARM.h"
 #include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
@@ -26,6 +27,7 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -33,1370 +35,7 @@
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
-  using namespace ARM;
-  switch (RegEnum) {
-  case R0:  case S0:  case D0:  return 0;
-  case R1:  case S1:  case D1:  return 1;
-  case R2:  case S2:  case D2:  return 2;
-  case R3:  case S3:  case D3:  return 3;
-  case R4:  case S4:  case D4:  return 4;
-  case R5:  case S5:  case D5:  return 5;
-  case R6:  case S6:  case D6:  return 6;
-  case R7:  case S7:  case D7:  return 7;
-  case R8:  case S8:  case D8:  return 8;
-  case R9:  case S9:  case D9:  return 9;
-  case R10: case S10: case D10: return 10;
-  case R11: case S11: case D11: return 11;
-  case R12: case S12: case D12: return 12;
-  case SP:  case S13: case D13: return 13;
-  case LR:  case S14: case D14: return 14;
-  case PC:  case S15: case D15: return 15;
-  case S16: return 16;
-  case S17: return 17;
-  case S18: return 18;
-  case S19: return 19;
-  case S20: return 20;
-  case S21: return 21;
-  case S22: return 22;
-  case S23: return 23;
-  case S24: return 24;
-  case S25: return 25;
-  case S26: return 26;
-  case S27: return 27;
-  case S28: return 28;
-  case S29: return 29;
-  case S30: return 30;
-  case S31: return 31;
-  default:
-    assert(0 && "Unknown ARM register!");
-    abort();
-  }
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
-                                                   bool &isSPVFP) {
-  isSPVFP = false;
-
-  using namespace ARM;
-  switch (RegEnum) {
-  default:
-    assert(0 && "Unknown ARM register!");
-    abort();
-  case R0:  case D0:  return 0;
-  case R1:  case D1:  return 1;
-  case R2:  case D2:  return 2;
-  case R3:  case D3:  return 3;
-  case R4:  case D4:  return 4;
-  case R5:  case D5:  return 5;
-  case R6:  case D6:  return 6;
-  case R7:  case D7:  return 7;
-  case R8:  case D8:  return 8;
-  case R9:  case D9:  return 9;
-  case R10: case D10: return 10;
-  case R11: case D11: return 11;
-  case R12: case D12: return 12;
-  case SP:  case D13: return 13;
-  case LR:  case D14: return 14;
-  case PC:  case D15: return 15;
-
-  case S0: case S1: case S2: case S3:
-  case S4: case S5: case S6: case S7:
-  case S8: case S9: case S10: case S11:
-  case S12: case S13: case S14: case S15:
-  case S16: case S17: case S18: case S19:
-  case S20: case S21: case S22: case S23:
-  case S24: case S25: case S26: case S27:
-  case S28: case S29: case S30: case S31:  {
-    isSPVFP = true;
-    switch (RegEnum) {
-    default: return 0; // Avoid compile time warning.
-    case S0: return 0;
-    case S1: return 1;
-    case S2: return 2;
-    case S3: return 3;
-    case S4: return 4;
-    case S5: return 5;
-    case S6: return 6;
-    case S7: return 7;
-    case S8: return 8;
-    case S9: return 9;
-    case S10: return 10;
-    case S11: return 11;
-    case S12: return 12;
-    case S13: return 13;
-    case S14: return 14;
-    case S15: return 15;
-    case S16: return 16;
-    case S17: return 17;
-    case S18: return 18;
-    case S19: return 19;
-    case S20: return 20;
-    case S21: return 21;
-    case S22: return 22;
-    case S23: return 23;
-    case S24: return 24;
-    case S25: return 25;
-    case S26: return 26;
-    case S27: return 27;
-    case S28: return 28;
-    case S29: return 29;
-    case S30: return 30;
-    case S31: return 31;
-    }
-  }
-  }
-}
-
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const TargetInstrInfo &tii,
-                                         const ARMSubtarget &sti)
-  : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
-    TII(tii), STI(sti),
-    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
-}
-
-ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
                                  const ARMSubtarget &sti)
   : ARMBaseRegisterInfo(tii, sti) {
 }
-
-static inline
-const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
-  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
-}
-
-static inline
-const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
-  return MIB.addReg(0);
-}
-
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator &MBBI,
-                                        const TargetInstrInfo *TII, DebugLoc dl,
-                                        unsigned DestReg, int Val,
-                                        ARMCC::CondCodes Pred,
-                                        unsigned PredReg) const {
-  MachineFunction &MF = *MBB.getParent();
-  MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
-  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-
-  BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
-    .addConstantPoolIndex(Idx)
-    .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
-}
-
-const unsigned*
-ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  static const unsigned CalleeSavedRegs[] = {
-    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
-    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
-
-    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
-    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
-    0
-  };
-
-  static const unsigned DarwinCalleeSavedRegs[] = {
-    // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
-    // register.
-    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
-    ARM::R11, ARM::R10, ARM::R8,
-
-    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
-    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
-    0
-  };
-  return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
-}
-
-const TargetRegisterClass* const *
-ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    0
-  };
-
-  static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
-    &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
-
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    0
-  };
-
-  static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = {
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-    &ARM::GPRRegClass, &ARM::GPRRegClass,
-
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    0
-  };
-
-  static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={
-    &ARM::GPRRegClass,  &ARM::tGPRRegClass, &ARM::tGPRRegClass,
-    &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass,
-    &ARM::GPRRegClass,  &ARM::GPRRegClass,
-
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
-    0
-  };
-
-  if (STI.isThumb()) {
-    return STI.isTargetDarwin()
-      ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses;
-  }
-  return STI.isTargetDarwin()
-    ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
-}
-
-BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  // FIXME: avoid re-calculating this everytime.
-  BitVector Reserved(getNumRegs());
-  Reserved.set(ARM::SP);
-  Reserved.set(ARM::PC);
-  if (STI.isTargetDarwin() || hasFP(MF))
-    Reserved.set(FramePtr);
-  // Some targets reserve R9.
-  if (STI.isR9Reserved())
-    Reserved.set(ARM::R9);
-  return Reserved;
-}
-
-bool
-ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
-  switch (Reg) {
-  default: break;
-  case ARM::SP:
-  case ARM::PC:
-    return true;
-  case ARM::R7:
-  case ARM::R11:
-    if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
-      return true;
-    break;
-  case ARM::R9:
-    return STI.isR9Reserved();
-  }
-
-  return false;
-}
-
-const TargetRegisterClass *ARMBaseRegisterInfo::getPointerRegClass() const {
-  return &ARM::GPRRegClass;
-}
-
-/// getAllocationOrder - Returns the register allocation order for a specified
-/// register class in the form of a pair of TargetRegisterClass iterators.
-std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
-                                        unsigned HintType, unsigned HintReg,
-                                        const MachineFunction &MF) const {
-  // Alternative register allocation orders when favoring even / odd registers
-  // of register pairs.
-
-  // No FP, R9 is available.
-  static const unsigned GPREven1[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
-    ARM::R9, ARM::R11
-  };
-  static const unsigned GPROdd1[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
-    ARM::R8, ARM::R10
-  };
-
-  // FP is R7, R9 is available.
-  static const unsigned GPREven2[] = {
-    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
-    ARM::R9, ARM::R11
-  };
-  static const unsigned GPROdd2[] = {
-    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
-    ARM::R8, ARM::R10
-  };
-
-  // FP is R11, R9 is available.
-  static const unsigned GPREven3[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
-    ARM::R9
-  };
-  static const unsigned GPROdd3[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
-    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
-    ARM::R8
-  };
-
-  // No FP, R9 is not available.
-  static const unsigned GPREven4[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
-    ARM::R11
-  };
-  static const unsigned GPROdd4[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R10
-  };
-
-  // FP is R7, R9 is not available.
-  static const unsigned GPREven5[] = {
-    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
-    ARM::R11
-  };
-  static const unsigned GPROdd5[] = {
-    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R10
-  };
-
-  // FP is R11, R9 is not available.
-  static const unsigned GPREven6[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
-    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
-  };
-  static const unsigned GPROdd6[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
-    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
-  };
-
-
-  if (HintType == ARMRI::RegPairEven) {
-    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
-      // It's no longer possible to fulfill this hint. Return the default
-      // allocation order.
-      return std::make_pair(RC->allocation_order_begin(MF),
-                            RC->allocation_order_end(MF));
-
-    if (!STI.isTargetDarwin() && !hasFP(MF)) {
-      if (!STI.isR9Reserved())
-        return std::make_pair(GPREven1,
-                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
-      else
-        return std::make_pair(GPREven4,
-                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
-    } else if (FramePtr == ARM::R7) {
-      if (!STI.isR9Reserved())
-        return std::make_pair(GPREven2,
-                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
-      else
-        return std::make_pair(GPREven5,
-                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
-    } else { // FramePtr == ARM::R11
-      if (!STI.isR9Reserved())
-        return std::make_pair(GPREven3,
-                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
-      else
-        return std::make_pair(GPREven6,
-                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
-    }
-  } else if (HintType == ARMRI::RegPairOdd) {
-    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
-      // It's no longer possible to fulfill this hint. Return the default
-      // allocation order.
-      return std::make_pair(RC->allocation_order_begin(MF),
-                            RC->allocation_order_end(MF));
-
-    if (!STI.isTargetDarwin() && !hasFP(MF)) {
-      if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd1,
-                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
-      else
-        return std::make_pair(GPROdd4,
-                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
-    } else if (FramePtr == ARM::R7) {
-      if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd2,
-                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
-      else
-        return std::make_pair(GPROdd5,
-                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
-    } else { // FramePtr == ARM::R11
-      if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd3,
-                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
-      else
-        return std::make_pair(GPROdd6,
-                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
-    }
-  }
-  return std::make_pair(RC->allocation_order_begin(MF),
-                        RC->allocation_order_end(MF));
-}
-
-/// ResolveRegAllocHint - Resolves the specified register allocation hint
-/// to a physical register. Returns the physical register if it is successful.
-unsigned
-ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                         const MachineFunction &MF) const {
-  if (Reg == 0 || !isPhysicalRegister(Reg))
-    return 0;
-  if (Type == 0)
-    return Reg;
-  else if (Type == (unsigned)ARMRI::RegPairOdd)
-    // Odd register.
-    return getRegisterPairOdd(Reg, MF);
-  else if (Type == (unsigned)ARMRI::RegPairEven)
-    // Even register.
-    return getRegisterPairEven(Reg, MF);
-  return 0;
-}
-
-void
-ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
-                                        MachineFunction &MF) const {
-  MachineRegisterInfo *MRI = &MF.getRegInfo();
-  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
-  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
-       Hint.first == (unsigned)ARMRI::RegPairEven) &&
-      Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
-    // If 'Reg' is one of the even / odd register pair and it's now changed
-    // (e.g. coalesced) into a different register. The other register of the
-    // pair allocation hint must be updated to reflect the relationship
-    // change.
-    unsigned OtherReg = Hint.second;
-    Hint = MRI->getRegAllocationHint(OtherReg);
-    if (Hint.second == Reg)
-      // Make sure the pair has not already divorced.
-      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
-  }
-}
-
-bool
-ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  return true;
-}
-
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register.  This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-///
-bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return (NoFramePointerElim ||
-          MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken());
-}
-
-// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-// not required, we reserve argument space for call sites in the function
-// immediately on entry to the current function. This eliminates the need for
-// add/sub sp brackets around call sites. Returns true if the call frame is
-// included as part of the stack frame.
-bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  unsigned CFSize = FFI->getMaxCallFrameSize();
-  // It's not always a good idea to include the call frame as part of the
-  // stack frame. ARM (especially Thumb) has small immediate offset to
-  // address the stack frame. So a large call frame can cause poor codegen
-  // and may even makes it impossible to scavenge a register.
-  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
-    return false;
-
-  return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-/// emitARMRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in ARM code.
-static
-void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator &MBBI,
-                             unsigned DestReg, unsigned BaseReg, int NumBytes,
-                             ARMCC::CondCodes Pred, unsigned PredReg,
-                             const TargetInstrInfo &TII,
-                             DebugLoc dl) {
-  bool isSub = NumBytes < 0;
-  if (isSub) NumBytes = -NumBytes;
-
-  while (NumBytes) {
-    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
-    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
-    assert(ThisVal && "Didn't extract field correctly");
-
-    // We will handle these bits from offset, clear them.
-    NumBytes &= ~ThisVal;
-
-    // Get the properly encoded SOImmVal field.
-    int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
-    assert(SOImmVal != -1 && "Bit extraction didn't work?");
-
-    // Build the new ADD / SUB.
-    BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
-      .addReg(BaseReg, RegState::Kill).addImm(SOImmVal)
-      .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
-    BaseReg = DestReg;
-  }
-}
-
-static void
-emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-             const TargetInstrInfo &TII, DebugLoc dl,
-             int NumBytes,
-             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
-  emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
-                          Pred, PredReg, TII, dl);
-}
-
-void ARMRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
-    // If we have alloca, convert as follows:
-    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
-    // ADJCALLSTACKUP   -> add, sp, sp, amount
-    MachineInstr *Old = I;
-    DebugLoc dl = Old->getDebugLoc();
-    unsigned Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      // Replace the pseudo instruction with a new instruction...
-      unsigned Opc = Old->getOpcode();
-      ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
-      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
-        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
-        unsigned PredReg = Old->getOperand(2).getReg();
-        emitSPUpdate(MBB, I, TII, dl, -Amount, Pred, PredReg);
-      } else {
-        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
-        unsigned PredReg = Old->getOperand(3).getReg();
-        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(MBB, I, TII, dl, Amount, Pred, PredReg);
-      }
-    }
-  }
-  MBB.erase(I);
-}
-
-/// findScratchRegister - Find a 'free' ARM register. If register scavenger
-/// is not being used, R12 is available. Otherwise, try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
-                             ARMFunctionInfo *AFI) {
-  unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12;
-  assert (!AFI->isThumbFunction());
-  if (Reg == 0)
-    // Try a already spilled CS register.
-    Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters());
-
-  return Reg;
-}
-
-void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                          int SPAdj, RegScavenger *RS) const{
-  unsigned i = 0;
-  MachineInstr &MI = *II;
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc dl = MI.getDebugLoc();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  unsigned FrameReg = ARM::SP;
-  int FrameIndex = MI.getOperand(i).getIndex();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MF.getFrameInfo()->getStackSize() + SPAdj;
-
-  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea1Offset();
-  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea2Offset();
-  else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
-    Offset -= AFI->getDPRCalleeSavedAreaOffset();
-  else if (hasFP(MF)) {
-    assert(SPAdj == 0 && "Unexpected");
-    // There is alloca()'s in this function, must reference off the frame
-    // pointer instead.
-    FrameReg = getFrameRegister(MF);
-    Offset -= AFI->getFramePtrSpillOffset();
-  }
-
-  unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
-  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-  bool isSub = false;
-
-  // Memory operands in inline assembly always use AddrMode2.
-  if (Opcode == ARM::INLINEASM)
-    AddrMode = ARMII::AddrMode2;
-
-  if (Opcode == ARM::ADDri) {
-    Offset += MI.getOperand(i+1).getImm();
-    if (Offset == 0) {
-      // Turn it into a move.
-      MI.setDesc(TII.get(ARM::MOVr));
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.RemoveOperand(i+1);
-      return;
-    } else if (Offset < 0) {
-      Offset = -Offset;
-      isSub = true;
-      MI.setDesc(TII.get(ARM::SUBri));
-    }
-
-    // Common case: small offset, fits into instruction.
-    int ImmedOffset = ARM_AM::getSOImmVal(Offset);
-    if (ImmedOffset != -1) {
-      // Replace the FrameIndex with sp / fp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
-      return;
-    }
-
-    // Otherwise, we fallback to common code below to form the imm offset with
-    // a sequence of ADDri instructions.  First though, pull as much of the imm
-    // into this ADDri as possible.
-    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
-    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
-
-    // We will handle these bits from offset, clear them.
-    Offset &= ~ThisImmVal;
-
-    // Get the properly encoded SOImmVal field.
-    int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
-    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
-    MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
-  } else {
-    unsigned ImmIdx = 0;
-    int InstrOffs = 0;
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    switch (AddrMode) {
-    case ARMII::AddrMode2: {
-      ImmIdx = i+2;
-      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
-      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
-        InstrOffs *= -1;
-      NumBits = 12;
-      break;
-    }
-    case ARMII::AddrMode3: {
-      ImmIdx = i+2;
-      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
-      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
-        InstrOffs *= -1;
-      NumBits = 8;
-      break;
-    }
-    case ARMII::AddrMode5: {
-      ImmIdx = i+1;
-      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
-      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
-        InstrOffs *= -1;
-      NumBits = 8;
-      Scale = 4;
-      break;
-    }
-    default:
-      assert(0 && "Unsupported addressing mode!");
-      abort();
-      break;
-    }
-
-    Offset += InstrOffs * Scale;
-    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
-    if (Offset < 0) {
-      Offset = -Offset;
-      isSub = true;
-    }
-
-    // Common case: small offset, fits into instruction.
-    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
-    int ImmedOffset = Offset / Scale;
-    unsigned Mask = (1 << NumBits) - 1;
-    if ((unsigned)Offset <= Mask * Scale) {
-      // Replace the FrameIndex with sp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      if (isSub)
-        ImmedOffset |= 1 << NumBits;
-      ImmOp.ChangeToImmediate(ImmedOffset);
-      return;
-    }
-
-    // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
-    ImmedOffset = ImmedOffset & Mask;
-    if (isSub)
-      ImmedOffset |= 1 << NumBits;
-    ImmOp.ChangeToImmediate(ImmedOffset);
-    Offset &= ~(Mask*Scale);
-  }
-
-  // If we get here, the immediate doesn't fit into the instruction.  We folded
-  // as much as possible above, handle the rest, providing a register that is
-  // SP+LargeImm.
-  assert(Offset && "This code isn't needed if offset already handled!");
-
-  // Insert a set of r12 with the full address: r12 = sp + offset
-  // If the offset we have is too large to fit into the instruction, we need
-  // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
-  // out of 'Offset'.
-  unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
-  if (ScratchReg == 0)
-    // No register is "free". Scavenge a register.
-    ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
-  int PIdx = MI.findFirstPredOperandIdx();
-  ARMCC::CondCodes Pred = (PIdx == -1)
-    ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
-  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
-  emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
-                          isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
-  MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
-}
-
-static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset) Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset+Align-1)/Align*Align;
-  }
-  return (unsigned)Offset;
-}
-
-void
-ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                          RegScavenger *RS) const {
-  // This tells PEI to spill the FP as if it is any other callee-save register
-  // to take advantage the eliminateFrameIndex machinery. This also ensures it
-  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
-  // to combine multiple loads / stores.
-  bool CanEliminateFrame = true;
-  bool CS1Spilled = false;
-  bool LRSpilled = false;
-  unsigned NumGPRSpills = 0;
-  SmallVector<unsigned, 4> UnspilledCS1GPRs;
-  SmallVector<unsigned, 4> UnspilledCS2GPRs;
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-
-  // Don't spill FP if the frame can be eliminated. This is determined
-  // by scanning the callee-save registers to see if any is used.
-  const unsigned *CSRegs = getCalleeSavedRegs();
-  const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
-  for (unsigned i = 0; CSRegs[i]; ++i) {
-    unsigned Reg = CSRegs[i];
-    bool Spilled = false;
-    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
-      AFI->setCSRegisterIsSpilled(Reg);
-      Spilled = true;
-      CanEliminateFrame = false;
-    } else {
-      // Check alias registers too.
-      for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
-        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
-          Spilled = true;
-          CanEliminateFrame = false;
-        }
-      }
-    }
-
-    if (CSRegClasses[i] == &ARM::GPRRegClass) {
-      if (Spilled) {
-        NumGPRSpills++;
-
-        if (!STI.isTargetDarwin()) {
-          if (Reg == ARM::LR)
-            LRSpilled = true;
-          CS1Spilled = true;
-          continue;
-        }
-
-        // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
-        switch (Reg) {
-        case ARM::LR:
-          LRSpilled = true;
-          // Fallthrough
-        case ARM::R4:
-        case ARM::R5:
-        case ARM::R6:
-        case ARM::R7:
-          CS1Spilled = true;
-          break;
-        default:
-          break;
-        }
-      } else {
-        if (!STI.isTargetDarwin()) {
-          UnspilledCS1GPRs.push_back(Reg);
-          continue;
-        }
-
-        switch (Reg) {
-        case ARM::R4:
-        case ARM::R5:
-        case ARM::R6:
-        case ARM::R7:
-        case ARM::LR:
-          UnspilledCS1GPRs.push_back(Reg);
-          break;
-        default:
-          UnspilledCS2GPRs.push_back(Reg);
-          break;
-        }
-      }
-    }
-  }
-
-  bool ForceLRSpill = false;
-  if (!LRSpilled && AFI->isThumbFunction()) {
-    unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
-    // Force LR to be spilled if the Thumb function size is > 2048. This enables
-    // use of BL to implement far jump. If it turns out that it's not needed
-    // then the branch fix up path will undo it.
-    if (FnSize >= (1 << 11)) {
-      CanEliminateFrame = false;
-      ForceLRSpill = true;
-    }
-  }
-
-  bool ExtraCSSpill = false;
-  if (!CanEliminateFrame || hasFP(MF)) {
-    AFI->setHasStackFrame(true);
-
-    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
-    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
-    if (!LRSpilled && CS1Spilled) {
-      MF.getRegInfo().setPhysRegUsed(ARM::LR);
-      AFI->setCSRegisterIsSpilled(ARM::LR);
-      NumGPRSpills++;
-      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
-                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
-      ForceLRSpill = false;
-      ExtraCSSpill = true;
-    }
-
-    // Darwin ABI requires FP to point to the stack slot that contains the
-    // previous FP.
-    if (STI.isTargetDarwin() || hasFP(MF)) {
-      MF.getRegInfo().setPhysRegUsed(FramePtr);
-      NumGPRSpills++;
-    }
-
-    // If stack and double are 8-byte aligned and we are spilling an odd number
-    // of GPRs. Spill one extra callee save GPR so we won't have to pad between
-    // the integer and double callee save areas.
-    unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
-      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
-        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
-          unsigned Reg = UnspilledCS1GPRs[i];
-          // Don't spiil high register if the function is thumb
-          if (!AFI->isThumbFunction() ||
-              isARMLowRegister(Reg) || Reg == ARM::LR) {
-            MF.getRegInfo().setPhysRegUsed(Reg);
-            AFI->setCSRegisterIsSpilled(Reg);
-            if (!isReservedReg(MF, Reg))
-              ExtraCSSpill = true;
-            break;
-          }
-        }
-      } else if (!UnspilledCS2GPRs.empty() &&
-                 !AFI->isThumbFunction()) {
-        unsigned Reg = UnspilledCS2GPRs.front();
-        MF.getRegInfo().setPhysRegUsed(Reg);
-        AFI->setCSRegisterIsSpilled(Reg);
-        if (!isReservedReg(MF, Reg))
-          ExtraCSSpill = true;
-      }
-    }
-
-    // Estimate if we might need to scavenge a register at some point in order
-    // to materialize a stack offset. If so, either spill one additional
-    // callee-saved register or reserve a special spill slot to facilitate
-    // register scavenging.
-    if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
-      MachineFrameInfo  *MFI = MF.getFrameInfo();
-      unsigned Size = estimateStackSize(MF, MFI);
-      unsigned Limit = (1 << 12) - 1;
-      for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB)
-        for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) {
-          for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-            if (I->getOperand(i).isFI()) {
-              unsigned Opcode = I->getOpcode();
-              const TargetInstrDesc &Desc = TII.get(Opcode);
-              unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-              if (AddrMode == ARMII::AddrMode3) {
-                Limit = (1 << 8) - 1;
-                goto DoneEstimating;
-              } else if (AddrMode == ARMII::AddrMode5) {
-                unsigned ThisLimit = ((1 << 8) - 1) * 4;
-                if (ThisLimit < Limit)
-                  Limit = ThisLimit;
-              }
-            }
-        }
-    DoneEstimating:
-      if (Size >= Limit) {
-        // If any non-reserved CS register isn't spilled, just spill one or two
-        // extra. That should take care of it!
-        unsigned NumExtras = TargetAlign / 4;
-        SmallVector<unsigned, 2> Extras;
-        while (NumExtras && !UnspilledCS1GPRs.empty()) {
-          unsigned Reg = UnspilledCS1GPRs.back();
-          UnspilledCS1GPRs.pop_back();
-          if (!isReservedReg(MF, Reg)) {
-            Extras.push_back(Reg);
-            NumExtras--;
-          }
-        }
-        while (NumExtras && !UnspilledCS2GPRs.empty()) {
-          unsigned Reg = UnspilledCS2GPRs.back();
-          UnspilledCS2GPRs.pop_back();
-          if (!isReservedReg(MF, Reg)) {
-            Extras.push_back(Reg);
-            NumExtras--;
-          }
-        }
-        if (Extras.size() && NumExtras == 0) {
-          for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
-            MF.getRegInfo().setPhysRegUsed(Extras[i]);
-            AFI->setCSRegisterIsSpilled(Extras[i]);
-          }
-        } else {
-          // Reserve a slot closest to SP or frame pointer.
-          const TargetRegisterClass *RC = &ARM::GPRRegClass;
-          RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                           RC->getAlignment()));
-        }
-      }
-    }
-  }
-
-  if (ForceLRSpill) {
-    MF.getRegInfo().setPhysRegUsed(ARM::LR);
-    AFI->setCSRegisterIsSpilled(ARM::LR);
-    AFI->setLRIsSpilledForFarJump(true);
-  }
-}
-
-/// Move iterator pass the next bunch of callee save load / store ops for
-/// the particular spill area (1: integer area 1, 2: integer area 2,
-/// 3: fp area, 0: don't care).
-static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator &MBBI,
-                                   int Opc, unsigned Area,
-                                   const ARMSubtarget &STI) {
-  while (MBBI != MBB.end() &&
-         MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) {
-    if (Area != 0) {
-      bool Done = false;
-      unsigned Category = 0;
-      switch (MBBI->getOperand(0).getReg()) {
-      case ARM::R4:  case ARM::R5:  case ARM::R6: case ARM::R7:
-      case ARM::LR:
-        Category = 1;
-        break;
-      case ARM::R8:  case ARM::R9:  case ARM::R10: case ARM::R11:
-        Category = STI.isTargetDarwin() ? 2 : 1;
-        break;
-      case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
-      case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
-        Category = 3;
-        break;
-      default:
-        Done = true;
-        break;
-      }
-      if (Done || Category != Area)
-        break;
-    }
-
-    ++MBBI;
-  }
-}
-
-void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo  *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  unsigned NumBytes = MFI->getStackSize();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
-
-  // Determine the sizes of each callee-save spill areas and record which frame
-  // belongs to which callee-save spill areas.
-  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
-  int FramePtrSpillFI = 0;
-
-  if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, -VARegSaveSize);
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
-    return;
-  }
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    int FI = CSI[i].getFrameIdx();
-    switch (Reg) {
-    case ARM::R4:
-    case ARM::R5:
-    case ARM::R6:
-    case ARM::R7:
-    case ARM::LR:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      AFI->addGPRCalleeSavedArea1Frame(FI);
-      GPRCS1Size += 4;
-      break;
-    case ARM::R8:
-    case ARM::R9:
-    case ARM::R10:
-    case ARM::R11:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
-        AFI->addGPRCalleeSavedArea2Frame(FI);
-        GPRCS2Size += 4;
-      } else {
-        AFI->addGPRCalleeSavedArea1Frame(FI);
-        GPRCS1Size += 4;
-      }
-      break;
-    default:
-      AFI->addDPRCalleeSavedAreaFrame(FI);
-      DPRCSSize += 8;
-    }
-  }
-
-  // Build the new SUBri to adjust SP for integer callee-save spill area 1.
-  emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size);
-  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
-
-  // Darwin ABI requires FP to point to the stack slot that contains the
-  // previous FP.
-  if (STI.isTargetDarwin() || hasFP(MF)) {
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::ADDri), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
-    AddDefaultCC(AddDefaultPred(MIB));
-  }
-
-  // Build the new SUBri to adjust SP for integer callee-save spill area 2.
-  emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size);
-
-  // Build the new SUBri to adjust SP for FP callee-save spill area.
-  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
-  emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize);
-
-  // Determine starting offsets of spill areas.
-  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
-  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
-  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
-  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
-  NumBytes = DPRCSOffset;
-  if (NumBytes) {
-    // Insert it after all the callee-save spills.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
-    emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
-  }
-
-  if (STI.isTargetELF() && hasFP(MF)) {
-    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
-                             AFI->getFramePtrSpillOffset());
-  }
-
-  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
-  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
-  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  return ((MI->getOpcode() == ARM::FLDD ||
-           MI->getOpcode() == ARM::LDR) &&
-          MI->getOperand(1).isFI() &&
-          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
-}
-
-void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                   MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert(MBBI->getOpcode() == ARM::BX_RET &&
-         "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  int NumBytes = (int)MFI->getStackSize();
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, NumBytes);
-  } else {
-    // Unwind MBBI to point to first LDR / FLDD.
-    const unsigned *CSRegs = getCalleeSavedRegs();
-    if (MBBI != MBB.begin()) {
-      do
-        --MBBI;
-      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
-      if (!isCSRestore(MBBI, CSRegs))
-        ++MBBI;
-    }
-
-    // Move SP to start of FP callee save spill area.
-    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
-                 AFI->getGPRCalleeSavedArea2Size() +
-                 AFI->getDPRCalleeSavedAreaSize());
-
-    // Darwin ABI requires FP to point to the stack slot that contains the
-    // previous FP.
-    if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
-      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-      // Reset SP based on frame pointer only if the stack frame extends beyond
-      // frame pointer stack slot or target is ELF and the function has FP.
-      if (AFI->getGPRCalleeSavedArea2Size() ||
-          AFI->getDPRCalleeSavedAreaSize()  ||
-          AFI->getDPRCalleeSavedAreaOffset()||
-          hasFP(MF)) {
-        if (NumBytes)
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
-            .addImm(NumBytes)
-            .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-        else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
-            .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-      }
-    } else if (NumBytes) {
-      emitSPUpdate(MBB, MBBI, TII, dl, NumBytes);
-    }
-
-    // Move SP to start of integer callee save spill area 2.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
-    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize());
-
-    // Move SP to start of integer callee save spill area 1.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
-    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size());
-
-    // Move SP to SP upon entry to the function.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
-    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size());
-  }
-
-  if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, VARegSaveSize);
-
-}
-
-unsigned ARMBaseRegisterInfo::getRARegister() const {
-  return ARM::LR;
-}
-
-unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
-  if (STI.isTargetDarwin() || hasFP(MF))
-    return FramePtr;
-  return ARM::SP;
-}
-
-unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
-  assert(0 && "What is the exception register");
-  return 0;
-}
-
-unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
-  assert(0 && "What is the exception handler register");
-  return 0;
-}
-
-int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
-                                               const MachineFunction &MF) const {
-  switch (Reg) {
-  default: break;
-  // Return 0 if either register of the pair is a special register.
-  // So no R12, etc.
-  case ARM::R1:
-    return ARM::R0;
-  case ARM::R3:
-    // FIXME!
-    return STI.isThumb() ? 0 : ARM::R2;
-  case ARM::R5:
-    return ARM::R4;
-  case ARM::R7:
-    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R6;
-  case ARM::R9:
-    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
-  case ARM::R11:
-    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
-
-  case ARM::S1:
-    return ARM::S0;
-  case ARM::S3:
-    return ARM::S2;
-  case ARM::S5:
-    return ARM::S4;
-  case ARM::S7:
-    return ARM::S6;
-  case ARM::S9:
-    return ARM::S8;
-  case ARM::S11:
-    return ARM::S10;
-  case ARM::S13:
-    return ARM::S12;
-  case ARM::S15:
-    return ARM::S14;
-  case ARM::S17:
-    return ARM::S16;
-  case ARM::S19:
-    return ARM::S18;
-  case ARM::S21:
-    return ARM::S20;
-  case ARM::S23:
-    return ARM::S22;
-  case ARM::S25:
-    return ARM::S24;
-  case ARM::S27:
-    return ARM::S26;
-  case ARM::S29:
-    return ARM::S28;
-  case ARM::S31:
-    return ARM::S30;
-
-  case ARM::D1:
-    return ARM::D0;
-  case ARM::D3:
-    return ARM::D2;
-  case ARM::D5:
-    return ARM::D4;
-  case ARM::D7:
-    return ARM::D6;
-  case ARM::D9:
-    return ARM::D8;
-  case ARM::D11:
-    return ARM::D10;
-  case ARM::D13:
-    return ARM::D12;
-  case ARM::D15:
-    return ARM::D14;
-  }
-
-  return 0;
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
-                                             const MachineFunction &MF) const {
-  switch (Reg) {
-  default: break;
-  // Return 0 if either register of the pair is a special register.
-  // So no R12, etc.
-  case ARM::R0:
-    return ARM::R1;
-  case ARM::R2:
-    // FIXME!
-    return STI.isThumb() ? 0 : ARM::R3;
-  case ARM::R4:
-    return ARM::R5;
-  case ARM::R6:
-    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R7;
-  case ARM::R8:
-    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
-  case ARM::R10:
-    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
-
-  case ARM::S0:
-    return ARM::S1;
-  case ARM::S2:
-    return ARM::S3;
-  case ARM::S4:
-    return ARM::S5;
-  case ARM::S6:
-    return ARM::S7;
-  case ARM::S8:
-    return ARM::S9;
-  case ARM::S10:
-    return ARM::S11;
-  case ARM::S12:
-    return ARM::S13;
-  case ARM::S14:
-    return ARM::S15;
-  case ARM::S16:
-    return ARM::S17;
-  case ARM::S18:
-    return ARM::S19;
-  case ARM::S20:
-    return ARM::S21;
-  case ARM::S22:
-    return ARM::S23;
-  case ARM::S24:
-    return ARM::S25;
-  case ARM::S26:
-    return ARM::S27;
-  case ARM::S28:
-    return ARM::S29;
-  case ARM::S30:
-    return ARM::S31;
-
-  case ARM::D0:
-    return ARM::D1;
-  case ARM::D2:
-    return ARM::D3;
-  case ARM::D4:
-    return ARM::D5;
-  case ARM::D6:
-    return ARM::D7;
-  case ARM::D8:
-    return ARM::D9;
-  case ARM::D10:
-    return ARM::D11;
-  case ARM::D12:
-    return ARM::D13;
-  case ARM::D14:
-    return ARM::D15;
-  }
-
-  return 0;
-}
-
-#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 7fe075a65ee8..041afd041402 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -16,127 +16,26 @@
 
 #include "ARM.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "ARMGenRegisterInfo.h.inc"
+#include "ARMBaseRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
-  class TargetInstrInfo;
+  class ARMBaseInstrInfo;
   class Type;
 
-/// Register allocation hints.
-namespace ARMRI {
-  enum {
-    RegPairOdd  = 1,
-    RegPairEven = 2
+namespace ARM {
+  /// SubregIndex - The index of various subregister classes. Note that 
+  /// these indices must be kept in sync with the class indices in the 
+  /// ARMRegisterInfo.td file.
+  enum SubregIndex {
+    SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4,
+    DSUBREG_0 = 5, DSUBREG_1 = 6
   };
 }
 
-/// isARMLowRegister - Returns true if the register is low register r0-r7.
-///
-static inline bool isARMLowRegister(unsigned Reg) {
-  using namespace ARM;
-  switch (Reg) {
-  case R0:  case R1:  case R2:  case R3:
-  case R4:  case R5:  case R6:  case R7:
-    return true;
-  default:
-    return false;
-  }
-}
-
-struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
-protected:
-  const TargetInstrInfo &TII;
-  const ARMSubtarget &STI;
-
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
-public:
-  ARMBaseRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
-
-  /// getRegisterNumbering - Given the enum value for some register, e.g.
-  /// ARM::LR, return the number that it corresponds to (e.g. 14).
-  static unsigned getRegisterNumbering(unsigned RegEnum);
-
-  /// Same as previous getRegisterNumbering except it returns true in isSPVFP
-  /// if the register is a single precision VFP register.
-  static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
-
-  /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
-  const TargetRegisterClass* const*
-  getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
-  BitVector getReservedRegs(const MachineFunction &MF) const;
-
-  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
-
-  const TargetRegisterClass *getPointerRegClass() const;
-
-  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-  getAllocationOrder(const TargetRegisterClass *RC,
-                     unsigned HintType, unsigned HintReg,
-                     const MachineFunction &MF) const;
-
-  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                               const MachineFunction &MF) const;
-
-  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
-                          MachineFunction &MF) const;
-
-  bool hasFP(const MachineFunction &MF) const;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS = NULL) const;
-
-  // Debug information queries.
-  unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
-
-  // Exception handling queries.
-  unsigned getEHExceptionRegister() const;
-  unsigned getEHHandlerRegister() const;
-
-  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
-
-  bool isLowRegister(unsigned Reg) const;
-
-private:
-  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
-
-  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
-};
-
 struct ARMRegisterInfo : public ARMBaseRegisterInfo {
 public:
-  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
-
-  /// emitLoadConstPool - Emits a load from constpool to materialize the
-  /// specified immediate.
-  void emitLoadConstPool(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         const TargetInstrInfo *TII, DebugLoc dl,
-                         unsigned DestReg, int Val,
-                         ARMCC::CondCodes Pred = ARMCC::AL,
-                         unsigned PredReg = 0) const;
-
-  /// Code Generation virtual methods...
-  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
-
-  bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
-  bool hasReservedCallFrame(MachineFunction &MF) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index a057e5cabf60..20a7355b7653 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -18,8 +18,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
   let SubRegs = subregs;
 }
 
-class ARMFReg<bits<5> num, string n> : Register<n> {
-  field bits<5> Num;
+class ARMFReg<bits<6> num, string n> : Register<n> {
+  field bits<6> Num;
   let Namespace = "ARM";
 }
 
@@ -58,10 +58,11 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
 def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
 def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
 def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+def SDummy : ARMFReg<63, "sINVALID">;
 
 // Aliases of the F* registers used to hold 64-bit fp values (doubles)
 def D0  : ARMReg< 0,  "d0", [S0,   S1]>;
-def D1  : ARMReg< 1,  "d1", [S2,   S3]>; 
+def D1  : ARMReg< 1,  "d1", [S2,   S3]>;
 def D2  : ARMReg< 2,  "d2", [S4,   S5]>;
 def D3  : ARMReg< 3,  "d3", [S6,   S7]>;
 def D4  : ARMReg< 4,  "d4", [S8,   S9]>;
@@ -78,18 +79,18 @@ def D14 : ARMReg<14, "d14", [S28, S29]>;
 def D15 : ARMReg<15, "d15", [S30, S31]>;
 
 // VFP3 defines 16 additional double registers
-def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d16">;
-def D18 : ARMFReg<18, "d16">; def D19 : ARMFReg<19, "d16">;
-def D20 : ARMFReg<20, "d16">; def D21 : ARMFReg<21, "d16">;
-def D22 : ARMFReg<22, "d16">; def D23 : ARMFReg<23, "d16">;
-def D24 : ARMFReg<24, "d16">; def D25 : ARMFReg<25, "d16">;
-def D26 : ARMFReg<26, "d16">; def D27 : ARMFReg<27, "d16">;
-def D28 : ARMFReg<28, "d16">; def D29 : ARMFReg<29, "d16">;
-def D30 : ARMFReg<30, "d16">; def D31 : ARMFReg<31, "d16">;
+def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
+def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">;
+def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">;
+def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">;
+def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">;
+def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">;
+def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
+def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
 
 // Advanced SIMD (NEON) defines 16 quad-word aliases
 def Q0  : ARMReg< 0,  "q0", [D0,   D1]>;
-def Q1  : ARMReg< 1,  "q1", [D2,   D3]>; 
+def Q1  : ARMReg< 1,  "q1", [D2,   D3]>;
 def Q2  : ARMReg< 2,  "q2", [D4,   D5]>;
 def Q3  : ARMReg< 3,  "q3", [D6,   D7]>;
 def Q4  : ARMReg< 4,  "q4", [D8,   D9]>;
@@ -106,7 +107,9 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>;
 def Q15 : ARMReg<15, "q15", [D30, D31]>;
 
 // Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
+def CPSR  : ARMReg<0, "cpsr">;
+
+def FPSCR : ARMReg<1, "fpscr">;
 
 // Register classes.
 //
@@ -158,6 +161,13 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       ARM::R4, ARM::R5, ARM::R6,
       ARM::R8, ARM::R10,ARM::R11,
       ARM::R7 };
+    // FP is R7, R9 is available as callee-saved register.
+    // This is used by non-Darwin platform in Thumb mode.
+    static const unsigned ARM_GPR_AO_5[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6,
+      ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 };
 
     GPRClass::iterator
     GPRClass::allocation_order_begin(const MachineFunction &MF) const {
@@ -171,6 +181,8 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       } else {
         if (Subtarget.isR9Reserved())
           return ARM_GPR_AO_2;
+        else if (Subtarget.isThumb())
+          return ARM_GPR_AO_5;
         else
           return ARM_GPR_AO_1;
       }
@@ -191,6 +203,8 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       } else {
         if (Subtarget.isR9Reserved())
           I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+        else if (Subtarget.isThumb())
+          I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned));
         else
           I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
       }
@@ -240,32 +254,45 @@ def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
   S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
   S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
 
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32,
+                          [S0, S1,  S2,  S3,  S4,  S5,  S6,  S7,
+                           S8, S9, S10, S11, S12, S13, S14, S15]>;
+
+// Dummy f32 regclass to represent impossible subreg indices.
+def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> {
+  let CopyCost = -1;
+}
+
 // Scalar double precision floating point / generic 64-bit vector register
 // class.
 // ARM requires only word alignment for double. It's more performant if it
 // is double-word alignment though.
 def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                         [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                         D8,  D9,  D10, D11, D12, D13, D14, D15]> {
-  let SubRegClassList = [SPR, SPR];
+                         D8,  D9,  D10, D11, D12, D13, D14, D15,
+                         D16, D17, D18, D19, D20, D21, D22, D23,
+                         D24, D25, D26, D27, D28, D29, D30, D31]> {
+  let SubRegClassList = [SPR_INVALID, SPR_INVALID];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
     // VFP2
-    static const unsigned ARM_DPR_VFP2[] = { 
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3, 
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7, 
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11, 
+    static const unsigned ARM_DPR_VFP2[] = {
+      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
+      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
+      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
       ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
     // VFP3
     static const unsigned ARM_DPR_VFP3[] = {
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3, 
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7, 
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11, 
+      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
+      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
+      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
       ARM::D12, ARM::D13, ARM::D14, ARM::D15,
-      ARM::D16, ARM::D17, ARM::D18, ARM::D15,
+      ARM::D16, ARM::D17, ARM::D18, ARM::D19,
       ARM::D20, ARM::D21, ARM::D22, ARM::D23,
       ARM::D24, ARM::D25, ARM::D26, ARM::D27,
       ARM::D28, ARM::D29, ARM::D30, ARM::D31 };
@@ -290,11 +317,34 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
   }];
 }
 
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v2i32, v2f32], 64,
+                             [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+                              D8,  D9,  D10, D11, D12, D13, D14, D15]> {
+  let SubRegClassList = [SPR, SPR];
+}
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v4i16, v2f32], 64,
+                          [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7]> {
+  let SubRegClassList = [SPR_8, SPR_8];
+}
+
 // Generic 128-bit vector register class.
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
                         [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
                          Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15]> {
-  let SubRegClassList = [SPR, SPR, SPR, SPR, DPR, DPR];
+  let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID,
+                         DPR, DPR];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                             128,
+                             [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7]> {
+  let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2];
 }
 
 // Condition code registers.
@@ -341,4 +391,3 @@ def : SubRegSet<6, [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
                     Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15],
                    [D1,  D3,  D5,  D7,  D9,  D11, D13, D15,
                     D17, D19, D21, D23, D25, D27, D29, D31]>;
-
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 75fa707f9f93..fc4c5f5830b0 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -10,26 +10,151 @@
 //===----------------------------------------------------------------------===//
 // Functional units across ARM processors
 //
-def FU_iALU   : FuncUnit; // Integer alu unit
-def FU_iLdSt  : FuncUnit; // Integer load / store unit
-def FU_FpALU  : FuncUnit; // FP alu unit
-def FU_FpLdSt : FuncUnit; // FP load / store unit
-def FU_Br     : FuncUnit; // Branch unit
+def FU_Issue   : FuncUnit; // issue
+def FU_Pipe0   : FuncUnit; // pipeline 0
+def FU_Pipe1   : FuncUnit; // pipeline 1
+def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
+def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
+def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def FU_NLSPipe : FuncUnit; // NEON LS pipe
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
 //
-def IIC_iALU    : InstrItinClass;
-def IIC_iLoad   : InstrItinClass;
-def IIC_iStore  : InstrItinClass;
-def IIC_fpALU   : InstrItinClass;
-def IIC_fpLoad  : InstrItinClass;
-def IIC_fpStore : InstrItinClass;
-def IIC_Br      : InstrItinClass;
+def IIC_iALUx      : InstrItinClass;
+def IIC_iALUi      : InstrItinClass;
+def IIC_iALUr      : InstrItinClass;
+def IIC_iALUsi     : InstrItinClass;
+def IIC_iALUsr     : InstrItinClass;
+def IIC_iUNAr      : InstrItinClass;
+def IIC_iUNAsi     : InstrItinClass;
+def IIC_iUNAsr     : InstrItinClass;
+def IIC_iCMPi      : InstrItinClass;
+def IIC_iCMPr      : InstrItinClass;
+def IIC_iCMPsi     : InstrItinClass;
+def IIC_iCMPsr     : InstrItinClass;
+def IIC_iMOVi      : InstrItinClass;
+def IIC_iMOVr      : InstrItinClass;
+def IIC_iMOVsi     : InstrItinClass;
+def IIC_iMOVsr     : InstrItinClass;
+def IIC_iCMOVi     : InstrItinClass;
+def IIC_iCMOVr     : InstrItinClass;
+def IIC_iCMOVsi    : InstrItinClass;
+def IIC_iCMOVsr    : InstrItinClass;
+def IIC_iMUL16     : InstrItinClass;
+def IIC_iMAC16     : InstrItinClass;
+def IIC_iMUL32     : InstrItinClass;
+def IIC_iMAC32     : InstrItinClass;
+def IIC_iMUL64     : InstrItinClass;
+def IIC_iMAC64     : InstrItinClass;
+def IIC_iLoadi     : InstrItinClass;
+def IIC_iLoadr     : InstrItinClass;
+def IIC_iLoadsi    : InstrItinClass;
+def IIC_iLoadiu    : InstrItinClass;
+def IIC_iLoadru    : InstrItinClass;
+def IIC_iLoadsiu   : InstrItinClass;
+def IIC_iLoadm     : InstrItinClass;
+def IIC_iStorei    : InstrItinClass;
+def IIC_iStorer    : InstrItinClass;
+def IIC_iStoresi   : InstrItinClass;
+def IIC_iStoreiu   : InstrItinClass;
+def IIC_iStoreru   : InstrItinClass;
+def IIC_iStoresiu  : InstrItinClass;
+def IIC_iStorem    : InstrItinClass;
+def IIC_Br         : InstrItinClass;
+def IIC_fpSTAT     : InstrItinClass;
+def IIC_fpUNA32    : InstrItinClass;
+def IIC_fpUNA64    : InstrItinClass;
+def IIC_fpCMP32    : InstrItinClass;
+def IIC_fpCMP64    : InstrItinClass;
+def IIC_fpCVTSD    : InstrItinClass;
+def IIC_fpCVTDS    : InstrItinClass;
+def IIC_fpCVTIS    : InstrItinClass;
+def IIC_fpCVTID    : InstrItinClass;
+def IIC_fpCVTSI    : InstrItinClass;
+def IIC_fpCVTDI    : InstrItinClass;
+def IIC_fpALU32    : InstrItinClass;
+def IIC_fpALU64    : InstrItinClass;
+def IIC_fpMUL32    : InstrItinClass;
+def IIC_fpMUL64    : InstrItinClass;
+def IIC_fpMAC32    : InstrItinClass;
+def IIC_fpMAC64    : InstrItinClass;
+def IIC_fpDIV32    : InstrItinClass;
+def IIC_fpDIV64    : InstrItinClass;
+def IIC_fpSQRT32   : InstrItinClass;
+def IIC_fpSQRT64   : InstrItinClass;
+def IIC_fpLoad32   : InstrItinClass;
+def IIC_fpLoad64   : InstrItinClass;
+def IIC_fpLoadm    : InstrItinClass;
+def IIC_fpStore32  : InstrItinClass;
+def IIC_fpStore64  : InstrItinClass;
+def IIC_fpStorem   : InstrItinClass;
+def IIC_VLD1       : InstrItinClass;
+def IIC_VLD2       : InstrItinClass;
+def IIC_VLD3       : InstrItinClass;
+def IIC_VLD4       : InstrItinClass;
+def IIC_VST        : InstrItinClass;
+def IIC_VUNAD      : InstrItinClass;
+def IIC_VUNAQ      : InstrItinClass;
+def IIC_VBIND      : InstrItinClass;
+def IIC_VBINQ      : InstrItinClass;
+def IIC_VMOVImm    : InstrItinClass;
+def IIC_VMOVD      : InstrItinClass;
+def IIC_VMOVQ      : InstrItinClass;
+def IIC_VMOVIS     : InstrItinClass;
+def IIC_VMOVID     : InstrItinClass;
+def IIC_VMOVISL    : InstrItinClass;
+def IIC_VMOVSI     : InstrItinClass;
+def IIC_VMOVDI     : InstrItinClass;
+def IIC_VPERMD     : InstrItinClass;
+def IIC_VPERMQ     : InstrItinClass;
+def IIC_VPERMQ3    : InstrItinClass;
+def IIC_VMACD      : InstrItinClass;
+def IIC_VMACQ      : InstrItinClass;
+def IIC_VRECSD     : InstrItinClass;
+def IIC_VRECSQ     : InstrItinClass;
+def IIC_VCNTiD     : InstrItinClass;
+def IIC_VCNTiQ     : InstrItinClass;
+def IIC_VUNAiD     : InstrItinClass;
+def IIC_VUNAiQ     : InstrItinClass;
+def IIC_VQUNAiD    : InstrItinClass;
+def IIC_VQUNAiQ    : InstrItinClass;
+def IIC_VBINiD     : InstrItinClass;
+def IIC_VBINiQ     : InstrItinClass;
+def IIC_VSUBiD     : InstrItinClass;
+def IIC_VSUBiQ     : InstrItinClass;
+def IIC_VBINi4D    : InstrItinClass;
+def IIC_VBINi4Q    : InstrItinClass;
+def IIC_VSHLiD     : InstrItinClass;
+def IIC_VSHLiQ     : InstrItinClass;
+def IIC_VSHLi4D    : InstrItinClass;
+def IIC_VSHLi4Q    : InstrItinClass;
+def IIC_VPALiD     : InstrItinClass;
+def IIC_VPALiQ     : InstrItinClass;
+def IIC_VMULi16D   : InstrItinClass;
+def IIC_VMULi32D   : InstrItinClass;
+def IIC_VMULi16Q   : InstrItinClass;
+def IIC_VMULi32Q   : InstrItinClass;
+def IIC_VMACi16D   : InstrItinClass;
+def IIC_VMACi32D   : InstrItinClass;
+def IIC_VMACi16Q   : InstrItinClass;
+def IIC_VMACi32Q   : InstrItinClass;
+def IIC_VEXTD      : InstrItinClass;
+def IIC_VEXTQ      : InstrItinClass;
+def IIC_VTB1       : InstrItinClass;
+def IIC_VTB2       : InstrItinClass;
+def IIC_VTB3       : InstrItinClass;
+def IIC_VTB4       : InstrItinClass;
+def IIC_VTBX1      : InstrItinClass;
+def IIC_VTBX2      : InstrItinClass;
+def IIC_VTBX3      : InstrItinClass;
+def IIC_VTBX4      : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
 
 def GenericItineraries : ProcessorItineraries<[]>;
 
+
 include "ARMScheduleV6.td"
+include "ARMScheduleV7.td"
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 596a57f8aefd..1ace718c9e17 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -1,4 +1,4 @@
-//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===//
+//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
 // 
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,12 +11,4 @@
 //
 //===----------------------------------------------------------------------===//
 
-def V6Itineraries : ProcessorItineraries<[
-  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_iALU]>]>,
-  InstrItinData<IIC_iLoad   , [InstrStage<2, [FU_iLdSt]>]>,
-  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_iLdSt]>]>,
-  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_FpALU]>]>,
-  InstrItinData<IIC_fpLoad  , [InstrStage<2, [FU_FpLdSt]>]>,
-  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>,
-  InstrItinData<IIC_Br      , [InstrStage<3, [FU_Br]>]>
-]>;
+// TODO: Add model for an ARM11
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
new file mode 100644
index 000000000000..e56581395237
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -0,0 +1,587 @@
+//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v7 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+//
+// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<[
+
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe1], 0>, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<1, [FU_Pipe1], 0>, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<1, [FU_Pipe1], 0>, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<2, [FU_Pipe1], 0>, 
+                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<2, [FU_Pipe1], 0>, 
+                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
+  
+  // Integer load pipeline
+  //
+  // loads have an extra cycle of latency, but are fully pipelined
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple
+  InstrItinData<IIC_iLoadm   , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<2, [FU_Pipe0], 0>,
+                                InstrStage<2, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>]>,
+
+  // Integer store pipeline
+  //
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStoreru  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStorem  , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<2, [FU_Pipe0], 0>,
+                                InstrStage<2, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                              InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe], 0>,
+                               InstrStage<4, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe], 0>,
+                               InstrStage<4, [FU_NLSPipe]>]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<7, [FU_NPipe], 0>,
+                               InstrStage<7, [FU_NLSPipe]>]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<5, [FU_NPipe], 0>,
+                               InstrStage<5, [FU_NLSPipe]>]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<8, [FU_NPipe], 0>,
+                               InstrStage<8, [FU_NLSPipe]>]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<8, [FU_NPipe], 0>,
+                               InstrStage<8, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<9, [FU_NPipe], 0>,
+                               InstrStage<9, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<11, [FU_NPipe], 0>,
+                               InstrStage<11, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<19, [FU_NPipe], 0>,
+                               InstrStage<19, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<20, [FU_NPipe], 0>,
+                               InstrStage<20, [FU_NLSPipe]>]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<29, [FU_NPipe], 0>,
+                               InstrStage<29, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<19, [FU_NPipe], 0>,
+                               InstrStage<19, [FU_NLSPipe]>]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<29, [FU_NPipe], 0>,
+                               InstrStage<29, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0], 0>,
+                               InstrStage<1, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // FP Load Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [FU_Issue], 0>, 
+                               InstrStage<2, [FU_Pipe0], 0>,
+                               InstrStage<2, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0], 0>,
+                               InstrStage<1, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // FP Store Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, 
+                               InstrStage<2, [FU_Pipe0], 0>,
+                               InstrStage<2, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+  // VLD1
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // VLD2
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  InstrItinData<IIC_VST,      [InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [10, 2, 2]>,
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>,
+                               InstrStage<2, [FU_NLSPipe], 0>,
+                               InstrStage<3, [FU_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>,
+                               InstrStage<2, [FU_NLSPipe], 0>,
+                               InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>,
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e61108857413..cf1ee3f02953 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,8 +13,7 @@
 
 #include "ARMSubtarget.h"
 #include "ARMGenSubtarget.inc"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
@@ -22,13 +21,19 @@ using namespace llvm;
 static cl::opt<bool>
 ReserveR9("arm-reserve-r9", cl::Hidden,
           cl::desc("Reserve R9, making it unavailable as GPR"));
+static cl::opt<bool>
+UseNEONFP("arm-use-neon-fp",
+          cl::desc("Use NEON for single-precision FP"),
+          cl::init(false), cl::Hidden);
 
-ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                            bool isThumb)
   : ARMArchVersion(V4T)
   , ARMFPUType(None)
+  , UseNEONForSinglePrecisionFP(UseNEONFP)
   , IsThumb(isThumb)
   , ThumbMode(Thumb1)
+  , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
   , stackAlignment(4)
   , CPUString("generic")
@@ -45,7 +50,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
 
   // Set the boolean corresponding to the current target triple, or the default
   // if one cannot be determined, to true.
-  const std::string& TT = M.getTargetTriple();
   unsigned Len = TT.length();
   unsigned Idx = 0;
 
@@ -75,14 +79,14 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
     }
   }
 
+  // Thumb2 implies at least V6T2.
+  if (ARMArchVersion < V6T2 && ThumbMode >= Thumb2)
+    ARMArchVersion = V6T2;
+
   if (Len >= 10) {
     if (TT.find("-darwin") != std::string::npos)
       // arm-darwin
       TargetType = isDarwin;
-  } else if (TT.empty()) {
-#if defined(__APPLE__)
-    TargetType = isDarwin;
-#endif
   }
 
   if (TT.find("eabi") != std::string::npos)
@@ -93,4 +97,61 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
 
   if (isTargetDarwin())
     IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+
+  // Set CPU specific features.
+  if (CPUString == "cortex-a8") {
+    PostRAScheduler = true;
+    if (UseNEONFP.getPosition() == 0)
+      UseNEONForSinglePrecisionFP = true;
+  }
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
+  if (RelocM == Reloc::Static)
+    return false;
+
+  // GV with ghost linkage (in JIT lazy compilation mode) do not require an
+  // extra load from stub.
+  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+
+  if (!isTargetDarwin()) {
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return false;
+    return true;
+  } else {
+    if (RelocM == Reloc::PIC_) {
+      // If this is a strong reference to a definition, it is definitely not
+      // through a stub.
+      if (!isDecl && !GV->isWeakForLinker())
+        return false;
+
+      // Unless we have a symbol with hidden visibility, we have to go through a
+      // normal $non_lazy_ptr stub because this symbol might be resolved late.
+      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+        return true;
+
+      // If symbol visibility is hidden, we have a stub for common symbol
+      // references and external declarations.
+      if (isDecl || GV->hasCommonLinkage())
+        // Hidden $non_lazy_ptr reference.
+        return true;
+
+      return false;
+    } else {
+      // If this is a strong reference to a definition, it is definitely not
+      // through a stub.
+      if (!isDecl && !GV->isWeakForLinker())
+        return false;
+    
+      // Unless we have a symbol with hidden visibility, we have to go through a
+      // normal $non_lazy_ptr stub because this symbol might be resolved late.
+      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+        return true;
+    }
+  }
+
+  return false;
 }
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 5110b3157c48..7098fd4f36ba 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -15,11 +15,12 @@
 #define ARMSUBTARGET_H
 
 #include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtarget.h"
 #include <string>
 
 namespace llvm {
-class Module;
+class GlobalValue;
 
 class ARMSubtarget : public TargetSubtarget {
 protected:
@@ -43,12 +44,20 @@ protected:
   /// ARMFPUType - Floating Point Unit type.
   ARMFPEnum ARMFPUType;
 
+  /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+  /// specified. Use the method useNEONForSinglePrecisionFP() to
+  /// determine if NEON should actually be used.
+  bool UseNEONForSinglePrecisionFP;
+
   /// IsThumb - True if we are in thumb mode, false if in ARM mode.
   bool IsThumb;
 
   /// ThumbMode - Indicates supported Thumb version.
   ThumbTypeEnum ThumbMode;
 
+  /// PostRAScheduler - True if using post-register-allocation scheduler.
+  bool PostRAScheduler;
+
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
@@ -61,7 +70,7 @@ protected:
 
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
-  
+
  public:
   enum {
     isELF, isDarwin
@@ -73,9 +82,9 @@ protected:
   } TargetABI;
 
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  ARMSubtarget(const Module &M, const std::string &FS, bool isThumb);
+  ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -99,6 +108,8 @@ protected:
   bool hasVFP2() const { return ARMFPUType >= VFPv2; }
   bool hasVFP3() const { return ARMFPUType >= VFPv3; }
   bool hasNEON() const { return ARMFPUType >= NEON;  }
+  bool useNEONForSinglePrecisionFP() const {
+    return hasNEON() && UseNEONForSinglePrecisionFP; }
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }
@@ -108,14 +119,18 @@ protected:
 
   bool isThumb() const { return IsThumb; }
   bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
   bool hasThumb2() const { return ThumbMode >= Thumb2; }
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
   const std::string & getCPUString() const { return CPUString; }
+  
+  /// enablePostRAScheduler - From TargetSubtarget, return true to
+  /// enable post-RA scheduler.
+  bool enablePostRAScheduler() const { return PostRAScheduler; }
 
-  /// getInstrItins - Return the instruction itineraies based on subtarget 
+  /// getInstrItins - Return the instruction itineraies based on subtarget
   /// selection.
   const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
 
@@ -123,6 +138,10 @@ protected:
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
   unsigned getStackAlignment() const { return stackAlignment; }
+
+  /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+  /// symbol.
+  bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 23447332198d..32ddc20a5604 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,188 +11,122 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetMachine.h"
-#include "ARMTargetAsmInfo.h"
+#include "ARMMCAsmInfo.h"
 #include "ARMFrameInfo.h"
 #include "ARM.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
-                              cl::desc("Disable load store optimization pass"));
-static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
-                              cl::desc("Disable if-conversion pass"));
-
-/// ARMTargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int ARMTargetMachineModule;
-int ARMTargetMachineModule = 0;
-
-// Register the target.
-static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
-static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
-
-// Force static initialization.
-extern "C" void LLVMInitializeARMTarget() { }
-
-// No assembler printer by default
-ARMBaseTargetMachine::AsmPrinterCtorFn ARMBaseTargetMachine::AsmPrinterCtor = 0;
-
-/// ThumbTargetMachine - Create an Thumb architecture model.
-///
-unsigned ThumbTargetMachine::getJITMatchQuality() {
-#if defined(__thumb__)
-  return 10;
-#endif
-  return 0;
+static const MCAsmInfo *createMCAsmInfo(const Target &T,
+                                        const StringRef &TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return new ARMMCAsmInfoDarwin();
+  default:
+    return new ARMELFMCAsmInfo();
+  }
 }
 
-unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
-  std::string TT = M.getTargetTriple();
-  // Match thumb-foo-bar, as well as things like thumbv5blah-*
-  if (TT.size() >= 6 &&
-      (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv"))
-    return 20;
 
-  // If the target triple is something non-thumb, we don't match.
-  if (!TT.empty()) return 0;
+extern "C" void LLVMInitializeARMTarget() {
+  // Register the target.
+  RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+  RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
 
-  if (M.getEndianness()  == Module::LittleEndian &&
-      M.getPointerSize() == Module::Pointer32)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
-
-  return getJITMatchQuality()/2;
+  // Register the target asm info.
+  RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
+  RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
 }
 
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
-ARMBaseTargetMachine::ARMBaseTargetMachine(const Module &M,
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
+                                           const std::string &TT,
                                            const std::string &FS,
                                            bool isThumb)
-  : Subtarget(M, FS, isThumb),
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS, isThumb),
     FrameInfo(Subtarget),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
-ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
-  : ARMBaseTargetMachine(M, FS, false), InstrInfo(Subtarget),
+ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS)
+  : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:32-i64:32:32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64")),
     TLInfo(*this) {
 }
 
-ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
-  : ARMBaseTargetMachine(M, FS, true),
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS)
+  : ARMBaseTargetMachine(T, TT, FS, true),
+    InstrInfo(Subtarget.hasThumb2()
+              ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+              : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:32-i64:32:32-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
     TLInfo(*this) {
-  // Create the approriate type of Thumb InstrInfo
-  if (Subtarget.hasThumb2())
-    InstrInfo = new Thumb2InstrInfo(Subtarget);
-  else
-    InstrInfo = new Thumb1InstrInfo(Subtarget);
-}
-
-unsigned ARMTargetMachine::getJITMatchQuality() {
-#if defined(__arm__)
-  return 10;
-#endif
-  return 0;
-}
-
-unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
-  std::string TT = M.getTargetTriple();
-  // Match arm-foo-bar, as well as things like armv5blah-*
-  if (TT.size() >= 4 &&
-      (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv"))
-    return 20;
-  // If the target triple is something non-arm, we don't match.
-  if (!TT.empty()) return 0;
-
-  if (M.getEndianness()  == Module::LittleEndian &&
-      M.getPointerSize() == Module::Pointer32)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
-
-  return getJITMatchQuality()/2;
 }
 
 
-const TargetAsmInfo *ARMBaseTargetMachine::createTargetAsmInfo() const {
-  switch (Subtarget.TargetType) {
-   case ARMSubtarget::isDarwin:
-    return new ARMDarwinTargetAsmInfo(*this);
-   case ARMSubtarget::isELF:
-    return new ARMELFTargetAsmInfo(*this);
-   default:
-    return new ARMGenericTargetAsmInfo(*this);
-  }
-}
-
 
 // Pass Pipeline Configuration
 bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
                                            CodeGenOpt::Level OptLevel) {
-  PM.add(createARMISelDag(*this));
+  PM.add(createARMISelDag(*this, OptLevel));
   return false;
 }
 
 bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
-  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
-  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+  if (Subtarget.hasNEON())
+    PM.add(createNEONPreAllocPass());
+
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass(true));
   return true;
 }
 
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel) {
-  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
-  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass());
 
-  if (OptLevel != CodeGenOpt::None &&
-      !DisableIfConversion && !Subtarget.isThumb())
-    PM.add(createIfConverterPass());
-
-  PM.add(createARMConstantIslandPass());
   return true;
 }
 
-bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                              CodeGenOpt::Level OptLevel,
-                                              bool Verbose,
-                                              raw_ostream &Out) {
-  // Output assembly language.
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, Verbose));
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+    PM.add(createIfConverterPass());
+
+  if (Subtarget.isThumb2()) {
+    PM.add(createThumb2ITBlockPass());
+    PM.add(createThumb2SizeReductionPass());
+  }
 
-  return false;
+  PM.add(createARMConstantIslandPass());
+  return true;
 }
 
-
 bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
-                                          bool DumpAsm,
                                           MachineCodeEmitter &MCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
@@ -200,18 +134,11 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
 
   // Machine code emitter pass for ARM.
   PM.add(createARMCodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
-
   return false;
 }
 
 bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
-                                          bool DumpAsm,
                                           JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
@@ -219,43 +146,42 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
 
   // Machine code emitter pass for ARM.
   PM.add(createARMJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
+  return false;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          ObjectCodeEmitter &OCE) {
+  // FIXME: Move this to TargetJITInfo!
+  if (DefRelocModel == Reloc::Default)
+    setRelocationModel(Reloc::Static);
 
+  // Machine code emitter pass for ARM.
+  PM.add(createARMObjectCodeEmitterPass(*this, OCE));
   return false;
 }
 
 bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                                 CodeGenOpt::Level OptLevel,
-                                                bool DumpAsm,
                                                 MachineCodeEmitter &MCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMCodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
-
   return false;
 }
 
 bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                                 CodeGenOpt::Level OptLevel,
-                                                bool DumpAsm,
                                                 JITCodeEmitter &JCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
-
   return false;
 }
 
+bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel,
+                                            ObjectCodeEmitter &OCE) {
+  // Machine code emitter pass for ARM.
+  PM.add(createARMObjectCodeEmitterPass(*this, OCE));
+  return false;
+}
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a0df54d6d528..71a53488f164 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "ARMInstrInfo.h"
 #include "ARMFrameInfo.h"
 #include "ARMJITInfo.h"
@@ -27,8 +26,6 @@
 
 namespace llvm {
 
-class Module;
-
 class ARMBaseTargetMachine : public LLVMTargetMachine {
 protected:
   ARMSubtarget        Subtarget;
@@ -39,16 +36,9 @@ private:
   InstrItineraryData  InstrItins;
   Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
 
-protected:
-  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
-  // set this functions to ctor pointer at startup time if they are linked in.
-  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            ARMBaseTargetMachine &tm,
-                                            bool verbose);
-  static AsmPrinterCtorFn AsmPrinterCtor;
-
 public:
-  ARMBaseTargetMachine(const Module &M, const std::string &FS, bool isThumb);
+  ARMBaseTargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS, bool isThumb);
 
   virtual const ARMFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
@@ -57,34 +47,26 @@ public:
     return InstrItins;
   }
 
-  static void registerAsmPrinter(AsmPrinterCtorFn F) {
-    AsmPrinterCtor = F;
-  }
-
-  static unsigned getModuleMatchQuality(const Module &M);
-  static unsigned getJITMatchQuality();
-
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel,
-                                  bool Verbose, raw_ostream &Out);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, MachineCodeEmitter &MCE);
+                              MachineCodeEmitter &MCE);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &MCE);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, JITCodeEmitter &MCE);
+                              ObjectCodeEmitter &OCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm,
                                     MachineCodeEmitter &MCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm,
                                     JITCodeEmitter &MCE);
+  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+                                    CodeGenOpt::Level OptLevel,
+                                    ObjectCodeEmitter &OCE);
 };
 
 /// ARMTargetMachine - ARM target machine.
@@ -94,7 +76,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   const TargetData    DataLayout;       // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
 public:
-  ARMTargetMachine(const Module &M, const std::string &FS);
+  ARMTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS);
 
   virtual const ARMRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
@@ -106,9 +89,6 @@ public:
 
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
 };
 
 /// ThumbTargetMachine - Thumb target machine.
@@ -120,7 +100,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   const TargetData    DataLayout;   // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
 public:
-  ThumbTargetMachine(const Module &M, const std::string &FS);
+  ThumbTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
 
   /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo
   virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
@@ -134,9 +115,6 @@ public:
   /// returns either Thumb1InstrInfo or Thumb2InstrInfo
   virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 000000000000..9703403db22a
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,39 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+  class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+  public:
+    ARMElfTargetObjectFile() : TargetLoweringObjectFileELF() {}
+
+    void Initialize(MCContext &Ctx, const TargetMachine &TM) {
+      TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+      if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+        StaticCtorSection =
+          getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, 
+                        MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                        SectionKind::getDataRel());
+        StaticDtorSection =
+          getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
+                        MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                        SectionKind::getDataRel());
+      }
+    }
+  };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 000000000000..7438ea9c79f3
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,618 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLexer.h"
+#include "llvm/MC/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+namespace {
+struct ARMOperand;
+
+// The shift types for register controlled shifts in arm memory addressing
+enum ShiftType {
+  Lsl,
+  Lsr,
+  Asr,
+  Ror,
+  Rrx
+};
+
+class ARMAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+
+private:
+  MCAsmParser &getParser() const { return Parser; }
+
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  bool ParseRegister(ARMOperand &Op);
+
+  bool ParseRegisterList(ARMOperand &Op);
+
+  bool ParseMemory(ARMOperand &Op);
+
+  bool ParseShift(enum ShiftType *St, const MCExpr *&ShiftAmount);
+
+  bool ParseOperand(ARMOperand &Op);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  // TODO - For now hacked versions of the next two are in here in this file to
+  // allow some parser testing until the table gen versions are implemented.
+
+  /// @name Auto-generated Match Functions
+  /// {
+  bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
+                        MCInst &Inst);
+
+  /// MatchRegisterName - Match the given string to a register name and return
+  /// its register number, or -1 if there is no match.  To allow return values
+  /// to be used directly in register lists, arm registers have values between
+  /// 0 and 15.
+  int MatchRegisterName(const StringRef &Name);
+
+  /// }
+
+
+public:
+  ARMAsmParser(const Target &T, MCAsmParser &_Parser)
+    : TargetAsmParser(T), Parser(_Parser) {}
+
+  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+  
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// instruction.
+struct ARMOperand {
+  enum {
+    Token,
+    Register,
+    Immediate,
+    Memory
+  } Kind;
+
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNum;
+      bool Writeback;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    // This is for all forms of ARM address expressions
+    struct {
+      unsigned BaseRegNum;
+      bool OffsetIsReg;
+      const MCExpr *Offset; // used when OffsetIsReg is false
+      unsigned OffsetRegNum; // used when OffsetIsReg is true
+      bool OffsetRegShifted; // only used when OffsetIsReg is true
+      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
+      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
+      bool Preindexed;
+      bool Postindexed;
+      bool Negative; // only used when OffsetIsReg is true
+      bool Writeback;
+    } Mem;
+
+  };
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  bool isToken() const {return Kind == Token; }
+
+  bool isReg() const { return Kind == Register; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  static ARMOperand CreateToken(StringRef Str) {
+    ARMOperand Res;
+    Res.Kind = Token;
+    Res.Tok.Data = Str.data();
+    Res.Tok.Length = Str.size();
+    return Res;
+  }
+
+  static ARMOperand CreateReg(unsigned RegNum, bool Writeback) {
+    ARMOperand Res;
+    Res.Kind = Register;
+    Res.Reg.RegNum = RegNum;
+    Res.Reg.Writeback = Writeback;
+    return Res;
+  }
+
+  static ARMOperand CreateImm(const MCExpr *Val) {
+    ARMOperand Res;
+    Res.Kind = Immediate;
+    Res.Imm.Val = Val;
+    return Res;
+  }
+
+  static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
+                              const MCExpr *Offset, unsigned OffsetRegNum,
+                              bool OffsetRegShifted, enum ShiftType ShiftType,
+                              const MCExpr *ShiftAmount, bool Preindexed,
+                              bool Postindexed, bool Negative, bool Writeback) {
+    ARMOperand Res;
+    Res.Kind = Memory;
+    Res.Mem.BaseRegNum = BaseRegNum;
+    Res.Mem.OffsetIsReg = OffsetIsReg;
+    Res.Mem.Offset = Offset;
+    Res.Mem.OffsetRegNum = OffsetRegNum;
+    Res.Mem.OffsetRegShifted = OffsetRegShifted;
+    Res.Mem.ShiftType = ShiftType;
+    Res.Mem.ShiftAmount = ShiftAmount;
+    Res.Mem.Preindexed = Preindexed;
+    Res.Mem.Postindexed = Postindexed;
+    Res.Mem.Negative = Negative;
+    Res.Mem.Writeback = Writeback;
+    return Res;
+  }
+};
+
+} // end anonymous namespace.
+
+// Try to parse a register name.  The token must be an Identifier when called,
+// and if it is a register name a Reg operand is created, the token is eaten
+// and false is returned.  Else true is returned and no token is eaten.
+// TODO this is likely to change to allow different register types and or to
+// parse for a specific register type.
+bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
+  const AsmToken &Tok = getLexer().getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  // FIXME: Validate register for the current architecture; we have to do
+  // validation later, so maybe there is no need for this here.
+  int RegNum;
+
+  RegNum = MatchRegisterName(Tok.getString());
+  if (RegNum == -1)
+    return true;
+  getLexer().Lex(); // Eat identifier token.
+
+  bool Writeback = false;
+  const AsmToken &ExclaimTok = getLexer().getTok();
+  if (ExclaimTok.is(AsmToken::Exclaim)) {
+    Writeback = true;
+    getLexer().Lex(); // Eat exclaim token
+  }
+
+  Op = ARMOperand::CreateReg(RegNum, Writeback);
+
+  return false;
+}
+
+// Try to parse a register list.  The first token must be a '{' when called
+// for now.
+bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
+  assert(getLexer().getTok().is(AsmToken::LCurly) &&
+         "Token is not an Left Curly Brace");
+  getLexer().Lex(); // Eat left curly brace token.
+
+  const AsmToken &RegTok = getLexer().getTok();
+  SMLoc RegLoc = RegTok.getLoc();
+  if (RegTok.isNot(AsmToken::Identifier))
+    return Error(RegLoc, "register expected");
+  int RegNum = MatchRegisterName(RegTok.getString());
+  if (RegNum == -1)
+    return Error(RegLoc, "register expected");
+  getLexer().Lex(); // Eat identifier token.
+  unsigned RegList = 1 << RegNum;
+
+  int HighRegNum = RegNum;
+  // TODO ranges like "{Rn-Rm}"
+  while (getLexer().getTok().is(AsmToken::Comma)) {
+    getLexer().Lex(); // Eat comma token.
+
+    const AsmToken &RegTok = getLexer().getTok();
+    SMLoc RegLoc = RegTok.getLoc();
+    if (RegTok.isNot(AsmToken::Identifier))
+      return Error(RegLoc, "register expected");
+    int RegNum = MatchRegisterName(RegTok.getString());
+    if (RegNum == -1)
+      return Error(RegLoc, "register expected");
+
+    if (RegList & (1 << RegNum))
+      Warning(RegLoc, "register duplicated in register list");
+    else if (RegNum <= HighRegNum)
+      Warning(RegLoc, "register not in ascending order in register list");
+    RegList |= 1 << RegNum;
+    HighRegNum = RegNum;
+
+    getLexer().Lex(); // Eat identifier token.
+  }
+  const AsmToken &RCurlyTok = getLexer().getTok();
+  if (RCurlyTok.isNot(AsmToken::RCurly))
+    return Error(RCurlyTok.getLoc(), "'}' expected");
+  getLexer().Lex(); // Eat left curly brace token.
+
+  return false;
+}
+
+// Try to parse an arm memory expression.  It must start with a '[' token.
+// TODO Only preindexing and postindexing addressing are started, unindexed
+// with option, etc are still to do.
+bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
+  assert(getLexer().getTok().is(AsmToken::LBrac) &&
+         "Token is not an Left Bracket");
+  getLexer().Lex(); // Eat left bracket token.
+
+  const AsmToken &BaseRegTok = getLexer().getTok();
+  if (BaseRegTok.isNot(AsmToken::Identifier))
+    return Error(BaseRegTok.getLoc(), "register expected");
+  int BaseRegNum = MatchRegisterName(BaseRegTok.getString());
+  if (BaseRegNum == -1)
+    return Error(BaseRegTok.getLoc(), "register expected");
+  getLexer().Lex(); // Eat identifier token.
+
+  bool Preindexed = false;
+  bool Postindexed = false;
+  bool OffsetIsReg = false;
+  bool Negative = false;
+  bool Writeback = false;
+
+  // First look for preindexed address forms:
+  //  [Rn, +/-Rm]
+  //  [Rn, #offset]
+  //  [Rn, +/-Rm, shift]
+  // that is after the "[Rn" we now have see if the next token is a comma.
+  const AsmToken &Tok = getLexer().getTok();
+  if (Tok.is(AsmToken::Comma)) {
+    Preindexed = true;
+    getLexer().Lex(); // Eat comma token.
+
+    const AsmToken &NextTok = getLexer().getTok();
+    if (NextTok.is(AsmToken::Plus))
+      getLexer().Lex(); // Eat plus token.
+    else if (NextTok.is(AsmToken::Minus)) {
+      Negative = true;
+      getLexer().Lex(); // Eat minus token
+    }
+
+    // See if there is a register following the "[Rn," we have so far.
+    const AsmToken &OffsetRegTok = getLexer().getTok();
+    int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
+    bool OffsetRegShifted = false;
+    enum ShiftType ShiftType;
+    const MCExpr *ShiftAmount;
+    const MCExpr *Offset;
+    if (OffsetRegNum != -1) {
+      OffsetIsReg = true;
+      getLexer().Lex(); // Eat identifier token for the offset register.
+      // Look for a comma then a shift
+      const AsmToken &Tok = getLexer().getTok();
+      if (Tok.is(AsmToken::Comma)) {
+        getLexer().Lex(); // Eat comma token.
+
+        const AsmToken &Tok = getLexer().getTok();
+        if (ParseShift(&ShiftType, ShiftAmount))
+          return Error(Tok.getLoc(), "shift expected");
+        OffsetRegShifted = true;
+      }
+    }
+    else { // "[Rn," we have so far was not followed by "Rm"
+      // Look for #offset following the "[Rn,"
+      const AsmToken &HashTok = getLexer().getTok();
+      if (HashTok.isNot(AsmToken::Hash))
+        return Error(HashTok.getLoc(), "'#' expected");
+      getLexer().Lex(); // Eat hash token.
+
+      if (getParser().ParseExpression(Offset))
+       return true;
+    }
+    const AsmToken &RBracTok = getLexer().getTok();
+    if (RBracTok.isNot(AsmToken::RBrac))
+      return Error(RBracTok.getLoc(), "']' expected");
+    getLexer().Lex(); // Eat right bracket token.
+
+    const AsmToken &ExclaimTok = getLexer().getTok();
+    if (ExclaimTok.is(AsmToken::Exclaim)) {
+      Writeback = true;
+      getLexer().Lex(); // Eat exclaim token
+    }
+    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
+                               OffsetRegShifted, ShiftType, ShiftAmount,
+                               Preindexed, Postindexed, Negative, Writeback);
+    return false;
+  }
+  // The "[Rn" we have so far was not followed by a comma.
+  else if (Tok.is(AsmToken::RBrac)) {
+    // This is a post indexing addressing forms:
+    //  [Rn], #offset
+    //  [Rn], +/-Rm
+    //  [Rn], +/-Rm, shift
+    // that is a ']' follows after the "[Rn".
+    Postindexed = true;
+    Writeback = true;
+    getLexer().Lex(); // Eat right bracket token.
+
+    const AsmToken &CommaTok = getLexer().getTok();
+    if (CommaTok.isNot(AsmToken::Comma))
+      return Error(CommaTok.getLoc(), "',' expected");
+    getLexer().Lex(); // Eat comma token.
+
+    const AsmToken &NextTok = getLexer().getTok();
+    if (NextTok.is(AsmToken::Plus))
+      getLexer().Lex(); // Eat plus token.
+    else if (NextTok.is(AsmToken::Minus)) {
+      Negative = true;
+      getLexer().Lex(); // Eat minus token
+    }
+
+    // See if there is a register following the "[Rn]," we have so far.
+    const AsmToken &OffsetRegTok = getLexer().getTok();
+    int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
+    bool OffsetRegShifted = false;
+    enum ShiftType ShiftType;
+    const MCExpr *ShiftAmount;
+    const MCExpr *Offset;
+    if (OffsetRegNum != -1) {
+      OffsetIsReg = true;
+      getLexer().Lex(); // Eat identifier token for the offset register.
+      // Look for a comma then a shift
+      const AsmToken &Tok = getLexer().getTok();
+      if (Tok.is(AsmToken::Comma)) {
+        getLexer().Lex(); // Eat comma token.
+
+        const AsmToken &Tok = getLexer().getTok();
+        if (ParseShift(&ShiftType, ShiftAmount))
+          return Error(Tok.getLoc(), "shift expected");
+        OffsetRegShifted = true;
+      }
+    }
+    else { // "[Rn]," we have so far was not followed by "Rm"
+      // Look for #offset following the "[Rn],"
+      const AsmToken &HashTok = getLexer().getTok();
+      if (HashTok.isNot(AsmToken::Hash))
+        return Error(HashTok.getLoc(), "'#' expected");
+      getLexer().Lex(); // Eat hash token.
+
+      if (getParser().ParseExpression(Offset))
+       return true;
+    }
+    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
+                               OffsetRegShifted, ShiftType, ShiftAmount,
+                               Preindexed, Postindexed, Negative, Writeback);
+    return false;
+  }
+
+  return true;
+}
+
+/// ParseShift as one of these two:
+///   ( lsl | lsr | asr | ror ) , # shift_amount
+///   rrx
+/// and returns true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) {
+  const AsmToken &Tok = getLexer().getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return true;
+  const StringRef &ShiftName = Tok.getString();
+  if (ShiftName == "lsl" || ShiftName == "LSL")
+    *St = Lsl;
+  else if (ShiftName == "lsr" || ShiftName == "LSR")
+    *St = Lsr;
+  else if (ShiftName == "asr" || ShiftName == "ASR")
+    *St = Asr;
+  else if (ShiftName == "ror" || ShiftName == "ROR")
+    *St = Ror;
+  else if (ShiftName == "rrx" || ShiftName == "RRX")
+    *St = Rrx;
+  else
+    return true;
+  getLexer().Lex(); // Eat shift type token.
+
+  // For all but a Rotate right there must be a '#' and a shift amount
+  if (*St != Rrx) {
+    // Look for # following the shift type
+    const AsmToken &HashTok = getLexer().getTok();
+    if (HashTok.isNot(AsmToken::Hash))
+      return Error(HashTok.getLoc(), "'#' expected");
+    getLexer().Lex(); // Eat hash token.
+
+    if (getParser().ParseExpression(ShiftAmount))
+      return true;
+  }
+
+  return false;
+}
+
+// A hack to allow some testing
+int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
+  if (Name == "r0" || Name == "R0")
+    return 0;
+  else if (Name == "r1" || Name == "R1")
+    return 1;
+  else if (Name == "r2" || Name == "R2")
+    return 2;
+  else if (Name == "r3" || Name == "R3")
+    return 3;
+  else if (Name == "r3" || Name == "R3")
+    return 3;
+  else if (Name == "r4" || Name == "R4")
+    return 4;
+  else if (Name == "r5" || Name == "R5")
+    return 5;
+  else if (Name == "r6" || Name == "R6")
+    return 6;
+  else if (Name == "r7" || Name == "R7")
+    return 7;
+  else if (Name == "r8" || Name == "R8")
+    return 8;
+  else if (Name == "r9" || Name == "R9")
+    return 9;
+  else if (Name == "r10" || Name == "R10")
+    return 10;
+  else if (Name == "r11" || Name == "R11" || Name == "fp")
+    return 11;
+  else if (Name == "r12" || Name == "R12" || Name == "ip")
+    return 12;
+  else if (Name == "r13" || Name == "R13" || Name == "sp")
+    return 13;
+  else if (Name == "r14" || Name == "R14" || Name == "lr")
+      return 14;
+  else if (Name == "r15" || Name == "R15" || Name == "pc")
+    return 15;
+  return -1;
+}
+
+// A hack to allow some testing
+bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
+                                    MCInst &Inst) {
+  struct ARMOperand Op0 = Operands[0];
+  assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
+  const StringRef &Mnemonic = Op0.getToken();
+  if (Mnemonic == "add" ||
+      Mnemonic == "stmfd" ||
+      Mnemonic == "str" ||
+      Mnemonic == "ldmfd" ||
+      Mnemonic == "ldr" ||
+      Mnemonic == "mov" ||
+      Mnemonic == "sub")
+    return false;
+
+  return true;
+}
+
+// TODO - this is a work in progress
+bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
+  switch (getLexer().getKind()) {
+  case AsmToken::Identifier:
+    if (!ParseRegister(Op))
+      return false;
+    // TODO parse other operands that start with an identifier like labels
+    return Error(getLexer().getTok().getLoc(), "labels not yet supported");
+  case AsmToken::LBrac:
+    if (!ParseMemory(Op))
+      return false;
+  case AsmToken::LCurly:
+    if (!ParseRegisterList(Op))
+      return false;
+  case AsmToken::Hash:
+    // #42 -> immediate.
+    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+    getLexer().Lex();
+    const MCExpr *Val;
+    if (getParser().ParseExpression(Val))
+      return true;
+    Op = ARMOperand::CreateImm(Val);
+    return false;
+  default:
+    return Error(getLexer().getTok().getLoc(), "unexpected token in operand");
+  }
+}
+
+bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
+  SmallVector<ARMOperand, 7> Operands;
+
+  Operands.push_back(ARMOperand::CreateToken(Name));
+
+  SMLoc Loc = getLexer().getTok().getLoc();
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+    // Read the first operand.
+    Operands.push_back(ARMOperand());
+    if (ParseOperand(Operands.back()))
+      return true;
+
+    while (getLexer().is(AsmToken::Comma)) {
+      getLexer().Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      Operands.push_back(ARMOperand());
+      if (ParseOperand(Operands.back()))
+        return true;
+    }
+  }
+  if (!MatchInstruction(Operands, Inst))
+    return false;
+
+  Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
+  return true;
+}
+
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+      
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      getLexer().Lex();
+    }
+  }
+
+  getLexer().Lex();
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
+  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+}
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..308c6cff8da9
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmParser
+  ARMAsmParser.cpp
+  )
+
diff --git a/lib/Target/ARM/AsmParser/Makefile b/lib/Target/ARM/AsmParser/Makefile
new file mode 100644
index 000000000000..97e56126d8eb
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmParser
+
+# Hack: we need to include 'main' ARM target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 434a19abef62..546731b00d3c 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -1,5 +1,3 @@
-//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===//
-//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
@@ -21,23 +19,30 @@
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
-#include "llvm/MDNode.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
 #include <cctype>
 using namespace llvm;
 
@@ -45,7 +50,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
   class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
-    DwarfWriter *DW;
 
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when printing asm code for different targets.
@@ -68,22 +72,18 @@ namespace {
 
     /// GVNonLazyPtrs - Keeps the set of GlobalValues that require
     /// non-lazy-pointers for indirect access.
-    StringSet<> GVNonLazyPtrs;
+    StringMap<std::string> GVNonLazyPtrs;
 
     /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden
     /// visibility that require non-lazy-pointers for indirect access.
-    StringSet<> HiddenGVNonLazyPtrs;
-
-    /// FnStubs - Keeps the set of external function GlobalAddresses that the
-    /// asm printer should generate stubs for.
-    StringSet<> FnStubs;
+    StringMap<std::string> HiddenGVNonLazyPtrs;
 
     /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
     bool InCPMode;
   public:
-    explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, bool V)
-      : AsmPrinter(O, TM, T, V), DW(0), AFI(NULL), MCP(NULL),
+    explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                           const MCAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL),
         InCPMode(false) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
@@ -110,6 +110,7 @@ namespace {
                                 const char *Modifier = 0);
     void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
 
+    void printThumbITMask(const MachineInstr *MI, int OpNum);
     void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum);
     void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
                                       unsigned Scale);
@@ -118,10 +119,10 @@ namespace {
     void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum);
     void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum);
 
-    void printT2SOImmOperand(const MachineInstr *MI, int OpNum);
     void printT2SOOperand(const MachineInstr *MI, int OpNum);
     void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum);
     void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeImm8s4Operand(const MachineInstr *MI, int OpNum);
     void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum);
     void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
 
@@ -132,6 +133,9 @@ namespace {
     void printCPInstOperand(const MachineInstr *MI, int OpNum,
                             const char *Modifier);
     void printJTBlockOperand(const MachineInstr *MI, int OpNum);
+    void printJT2BlockOperand(const MachineInstr *MI, int OpNum);
+    void printTBAddrMode(const MachineInstr *MI, int OpNum);
+    void printNoHashImmediate(const MachineInstr *MI, int OpNum);
 
     virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                  unsigned AsmVariant, const char *ExtraCode);
@@ -139,12 +143,14 @@ namespace {
                                        unsigned AsmVariant,
                                        const char *ExtraCode);
 
-    void printModuleLevelGV(const GlobalVariable* GVar);
-    bool printInstruction(const MachineInstr *MI);  // autogenerated.
+    void PrintGlobalVariable(const GlobalVariable* GVar);
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
     void printMachineInstruction(const MachineInstr *MI);
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
     bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
 
     /// EmitMachineConstantPoolValue - Print a machine constantpool value to
     /// the .s file.
@@ -153,24 +159,35 @@ namespace {
 
       ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
       GlobalValue *GV = ACPV->getGV();
-      std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix();
-      if (!GV)
-        Name += ACPV->getSymbol();
-      if (ACPV->isNonLazyPointer()) {
-        if (GV->hasHiddenVisibility())
-          HiddenGVNonLazyPtrs.insert(Name);
-        else
-          GVNonLazyPtrs.insert(Name);
-        printSuffixedName(Name, "$non_lazy_ptr");
-      } else if (ACPV->isStub()) {
-        FnStubs.insert(Name);
-        printSuffixedName(Name, "$stub");
+      std::string Name;
+
+      if (ACPV->isLSDA()) {
+        SmallString<16> LSDAName;
+        raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+          "_LSDA_" << getFunctionNumber();
+        Name = LSDAName.str();
+      } else if (GV) {
+        bool isIndirect = Subtarget->isTargetDarwin() &&
+          Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+        if (!isIndirect)
+          Name = Mang->getMangledName(GV);
+        else {
+          // FIXME: Remove this when Darwin transition to @GOT like syntax.
+          std::string SymName = Mang->getMangledName(GV);
+          Name = Mang->getMangledName(GV, "$non_lazy_ptr", true);
+          if (GV->hasHiddenVisibility())
+            HiddenGVNonLazyPtrs[SymName] = Name;
+          else
+            GVNonLazyPtrs[SymName] = Name;
+        }
       } else
-        O << Name;
+        Name = Mang->makeNameProper(ACPV->getSymbol());
+      O << Name;
+
       if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
       if (ACPV->getPCAdjustment() != 0) {
-        O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
-          << utostr(ACPV->getLabelId())
+        O << "-(" << MAI->getPrivateGlobalPrefix() << "PC"
+          << ACPV->getLabelId()
           << "+" << (unsigned)ACPV->getPCAdjustment();
          if (ACPV->mustAddCurrentAddress())
            O << "-.";
@@ -178,7 +195,7 @@ namespace {
       }
       O << "\n";
     }
-    
+
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AsmPrinter::getAnalysisUsage(AU);
       AU.setPreservesAll();
@@ -205,38 +222,39 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // NOTE: we don't print out constant pools here, they are handled as
   // instructions.
 
-  O << "\n";
+  O << '\n';
+
   // Print out labels for the function.
   const Function *F = MF.getFunction();
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+
   switch (F->getLinkage()) {
-  default: assert(0 && "Unknown linkage type!");
+  default: llvm_unreachable("Unknown linkage type!");
   case Function::PrivateLinkage:
   case Function::InternalLinkage:
-    SwitchToTextSection("\t.text", F);
     break;
   case Function::ExternalLinkage:
-    SwitchToTextSection("\t.text", F);
     O << "\t.globl\t" << CurrentFnName << "\n";
     break;
+  case Function::LinkerPrivateLinkage:
   case Function::WeakAnyLinkage:
   case Function::WeakODRLinkage:
   case Function::LinkOnceAnyLinkage:
   case Function::LinkOnceODRLinkage:
     if (Subtarget->isTargetDarwin()) {
-      SwitchToTextSection(
-                ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
       O << "\t.globl\t" << CurrentFnName << "\n";
       O << "\t.weak_definition\t" << CurrentFnName << "\n";
     } else {
-      O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+      O << MAI->getWeakRefDirective() << CurrentFnName << "\n";
     }
     break;
   }
 
   printVisibility(CurrentFnName, F->getVisibility());
 
+  unsigned FnAlign = 1 << MF.getAlignment();  // MF alignment is log2.
   if (AFI->isThumbFunction()) {
-    EmitAlignment(MF.getAlignment(), F, AFI->getAlign());
+    EmitAlignment(FnAlign, F, AFI->getAlign());
     O << "\t.code\t16\n";
     O << "\t.thumb_func";
     if (Subtarget->isTargetDarwin())
@@ -244,7 +262,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     O << "\n";
     InCPMode = false;
   } else {
-    EmitAlignment(MF.getAlignment(), F);
+    EmitAlignment(FnAlign, F);
   }
 
   O << CurrentFnName << ":\n";
@@ -266,8 +284,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true, VerboseAsm);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
@@ -276,14 +293,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  if (TAI->hasDotTypeDotSizeDirective())
+  if (MAI->hasDotTypeDotSizeDirective())
     O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
 
   // Emit post-function debug information.
   DW->EndFunction(&MF);
 
-  O.flush();
-
   return false;
 }
 
@@ -298,37 +313,39 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
         unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0
         unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1
         O << '{'
-          << TRI->getAsmName(DRegLo) << "-" << TRI->getAsmName(DRegHi)
+          << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
           << '}';
+      } else if (Modifier && strcmp(Modifier, "lane") == 0) {
+        unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+        unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1,
+                                                 &ARM::DPR_VFP2RegClass);
+        O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
       } else {
-        O << TRI->getAsmName(Reg);
+        O << getRegisterName(Reg);
       }
     } else
-      assert(0 && "not implemented");
+      llvm_unreachable("not implemented");
     break;
   }
   case MachineOperand::MO_Immediate: {
-    if (!Modifier || strcmp(Modifier, "no_hash") != 0)
-      O << "#";
-
-    O << MO.getImm();
+    int64_t Imm = MO.getImm();
+    O << '#';
+    if (Modifier) {
+      if (strcmp(Modifier, "lo16") == 0)
+        O << ":lower16:";
+      else if (strcmp(Modifier, "hi16") == 0)
+        O << ":upper16:";
+    }
+    O << Imm;
     break;
   }
   case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
     GlobalValue *GV = MO.getGlobal();
-    std::string Name = Mang->getValueName(GV);
-    bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
-                  GV->hasLinkOnceLinkage());
-    if (isExt && isCallOp && Subtarget->isTargetDarwin() &&
-        TM.getRelocationModel() != Reloc::Static) {
-      printSuffixedName(Name, "$stub");
-      FnStubs.insert(Name);
-    } else
-      O << Name;
+    O << Mang->getMangledName(GV);
 
     printOffset(MO.getOffset());
 
@@ -339,25 +356,20 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   }
   case MachineOperand::MO_ExternalSymbol: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    std::string Name(TAI->getGlobalPrefix());
-    Name += MO.getSymbolName();
-    if (isCallOp && Subtarget->isTargetDarwin() &&
-        TM.getRelocationModel() != Reloc::Static) {
-      printSuffixedName(Name, "$stub");
-      FnStubs.insert(Name);
-    } else
-      O << Name;
+    std::string Name = Mang->makeNameProper(MO.getSymbolName());
+
+    O << Name;
     if (isCallOp && Subtarget->isTargetELF() &&
         TM.getRelocationModel() == Reloc::PIC_)
       O << "(PLT)";
     break;
   }
   case MachineOperand::MO_ConstantPoolIndex:
-    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << MO.getIndex();
     break;
   case MachineOperand::MO_JumpTableIndex:
-    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << MO.getIndex();
     break;
   default:
@@ -365,9 +377,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   }
 }
 
-static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
-                       const TargetAsmInfo *TAI) {
-  assert(V < (1 << 12) && "Not a valid so_imm value!");
+static void printSOImm(formatted_raw_ostream &O, int64_t V, bool VerboseAsm,
+                       const MCAsmInfo *MAI) {
+  // Break it up into two parts that make up a shifter immediate.
+  V = ARM_AM::getSOImmVal(V);
+  assert(V != -1 && "Not a valid so_imm value!");
+
   unsigned Imm = ARM_AM::getSOImmValImm(V);
   unsigned Rot = ARM_AM::getSOImmValRot(V);
 
@@ -377,7 +392,7 @@ static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
     O << "#" << Imm << ", " << Rot;
     // Pretty printed version.
     if (VerboseAsm)
-      O << ' ' << TAI->getCommentString()
+      O << ' ' << MAI->getCommentString()
         << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
   } else {
     O << "#" << Imm;
@@ -389,7 +404,7 @@ static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
 void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isImm() && "Not a valid so_imm value!");
-  printSOImm(O, MO.getImm(), VerboseAsm, TAI);
+  printSOImm(O, MO.getImm(), VerboseAsm, MAI);
 }
 
 /// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
@@ -399,15 +414,15 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   assert(MO.isImm() && "Not a valid so_imm value!");
   unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm());
   unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm());
-  printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI);
+  printSOImm(O, V1, VerboseAsm, MAI);
   O << "\n\torr";
   printPredicateOperand(MI, 2);
   O << " ";
-  printOperand(MI, 0); 
+  printOperand(MI, 0);
   O << ", ";
-  printOperand(MI, 0); 
+  printOperand(MI, 0);
   O << ", ";
-  printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
+  printSOImm(O, V2, VerboseAsm, MAI);
 }
 
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
@@ -420,8 +435,7 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   const MachineOperand &MO3 = MI->getOperand(Op+2);
 
-  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << getRegisterName(MO1.getReg());
 
   // Print the shift opc.
   O << ", "
@@ -429,8 +443,7 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
     << " ";
 
   if (MO2.getReg()) {
-    assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
-    O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+    O << getRegisterName(MO2.getReg());
     assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
   } else {
     O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
@@ -447,7 +460,7 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
     return;
   }
 
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
 
   if (!MO2.getReg()) {
     if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
@@ -460,8 +473,8 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
 
   O << ", "
     << (char)ARM_AM::getAM2Op(MO3.getImm())
-    << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
-  
+    << getRegisterName(MO2.getReg());
+
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
     O << ", "
       << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
@@ -483,8 +496,8 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
   }
 
   O << (char)ARM_AM::getAM2Op(MO2.getImm())
-    << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
-  
+    << getRegisterName(MO1.getReg());
+
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
     O << ", "
       << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
@@ -495,18 +508,18 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   const MachineOperand &MO3 = MI->getOperand(Op+2);
-  
+
   assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
 
   if (MO2.getReg()) {
     O << ", "
       << (char)ARM_AM::getAM3Op(MO3.getImm())
-      << TM.getRegisterInfo()->get(MO2.getReg()).AsmName
+      << getRegisterName(MO2.getReg())
       << "]";
     return;
   }
-  
+
   if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
     O << ", #"
       << (char)ARM_AM::getAM3Op(MO3.getImm())
@@ -520,7 +533,7 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
 
   if (MO1.getReg()) {
     O << (char)ARM_AM::getAM3Op(MO2.getImm())
-      << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+      << getRegisterName(MO1.getReg());
     return;
   }
 
@@ -530,7 +543,7 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
     << (char)ARM_AM::getAM3Op(MO2.getImm())
     << ImmOffs;
 }
-  
+
 void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
                                           const char *Modifier) {
   const MachineOperand &MO1 = MI->getOperand(Op);
@@ -538,11 +551,18 @@ void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
   ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
   if (Modifier && strcmp(Modifier, "submode") == 0) {
     if (MO1.getReg() == ARM::SP) {
+      // FIXME
       bool isLDM = (MI->getOpcode() == ARM::LDM ||
-                    MI->getOpcode() == ARM::LDM_RET);
+                    MI->getOpcode() == ARM::LDM_RET ||
+                    MI->getOpcode() == ARM::t2LDM ||
+                    MI->getOpcode() == ARM::t2LDM_RET);
       O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
     } else
       O << ARM_AM::getAMSubModeStr(Mode);
+  } else if (Modifier && strcmp(Modifier, "wide") == 0) {
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+    if (Mode == ARM_AM::ia)
+      O << ".w";
   } else {
     printOperand(MI, Op);
     if (ARM_AM::getAM4WBFlag(MO2.getImm()))
@@ -559,7 +579,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
     printOperand(MI, Op);
     return;
   }
-  
+
   assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
 
   if (Modifier && strcmp(Modifier, "submode") == 0) {
@@ -573,14 +593,14 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
     return;
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
-    O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+    O << getRegisterName(MO1.getReg());
     if (ARM_AM::getAM5WBFlag(MO2.getImm()))
       O << "!";
     return;
   }
-  
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
-  
+
+  O << "[" << getRegisterName(MO1.getReg());
+
   if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
     O << ", #"
       << (char)ARM_AM::getAM5Op(MO2.getImm())
@@ -595,13 +615,13 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO3 = MI->getOperand(Op+2);
 
   // FIXME: No support yet for specifying alignment.
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+  O << "[" << getRegisterName(MO1.getReg()) << "]";
 
   if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
     if (MO2.getReg() == 0)
       O << "!";
     else
-      O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+      O << ", " << getRegisterName(MO2.getReg());
   }
 }
 
@@ -614,7 +634,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
 
   const MachineOperand &MO1 = MI->getOperand(Op);
   assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+  O << "[pc, +" << getRegisterName(MO1.getReg()) << "]";
 }
 
 void
@@ -630,11 +650,26 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
 //===--------------------------------------------------------------------===//
 
 void
+ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(Op).getImm();
+  unsigned NumTZ = CountTrailingZeros_32(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = (Mask & (1 << Pos)) == 0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void
 ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
-  O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]";
+  O << "[" << getRegisterName(MO1.getReg());
+  O << ", " << getRegisterName(MO2.getReg()) << "]";
 }
 
 void
@@ -649,9 +684,9 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
     return;
   }
 
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
   if (MO3.getReg())
-    O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName;
+    O << ", " << getRegisterName(MO3.getReg());
   else if (unsigned ImmOffs = MO2.getImm()) {
     O << ", #" << ImmOffs;
     if (Scale > 1)
@@ -676,7 +711,7 @@ ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) {
 void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm())
     O << ", #" << ImmOffs << " * 4";
   O << "]";
@@ -684,20 +719,6 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
 
 //===--------------------------------------------------------------------===//
 
-/// printT2SOImmOperand - T2SOImm is:
-///  1. a 4-bit splat control value and 8 bit immediate value
-///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
-///     represented by a normalizedin 7-bit value (msb is always 1)
-void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-
-  unsigned Imm = ARM_AM::getT2SOImmValDecode(MO.getImm());  
-  // Always print the immediate directly, as the "rotate" form
-  // is deprecated in some contexts.
-  O << "#" << Imm;
-}
-
 // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
 // register with shift forms.
 // REG 0   0           - e.g. R5
@@ -708,7 +729,7 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) {
 
   unsigned Reg = MO1.getReg();
   assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-  O << TM.getRegisterInfo()->getAsmName(Reg);
+  O << getRegisterName(Reg);
 
   // Print the shift opc.
   O << ", "
@@ -724,7 +745,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
   const MachineOperand &MO1 = MI->getOperand(OpNum);
   const MachineOperand &MO2 = MI->getOperand(OpNum+1);
 
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
 
   unsigned OffImm = MO2.getImm();
   if (OffImm)  // Don't print +0.
@@ -737,7 +758,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
   const MachineOperand &MO1 = MI->getOperand(OpNum);
   const MachineOperand &MO2 = MI->getOperand(OpNum+1);
 
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
 
   int32_t OffImm = (int32_t)MO2.getImm();
   // Don't print +0.
@@ -748,6 +769,22 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
   O << "]";
 }
 
+void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
+                                                 int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm << " * 4";
+  else if (OffImm > 0)
+    O << ", #+" << OffImm << " * 4";
+  O << "]";
+}
+
 void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
                                                      int OpNum) {
   const MachineOperand &MO1 = MI->getOperand(OpNum);
@@ -765,17 +802,15 @@ void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
   const MachineOperand &MO2 = MI->getOperand(OpNum+1);
   const MachineOperand &MO3 = MI->getOperand(OpNum+2);
 
-  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << "[" << getRegisterName(MO1.getReg());
 
-  if (MO2.getReg()) {
-    O << ", +"
-      << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", " << getRegisterName(MO2.getReg());
 
-    unsigned ShAmt = MO3.getImm();
-    if (ShAmt) {
-      assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-      O << ", lsl #" << ShAmt;
-    }
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl #" << ShAmt;
   }
   O << "]";
 }
@@ -799,14 +834,17 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
 
 void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
   int Id = (int)MI->getOperand(OpNum).getImm();
-  O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+  O << MAI->getPrivateGlobalPrefix() << "PC" << Id;
 }
 
 void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
   O << "{";
-  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+  // Always skip the first operand, it's the optional (and implicit writeback).
+  for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) {
+    if (MI->getOperand(i).isImplicit())
+      continue;
+    if ((int)i != OpNum+1) O << ", ";
     printOperand(MI, i);
-    if (i != e-1) O << ", ";
   }
   O << "}";
 }
@@ -818,14 +856,14 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
   // data itself.
   if (!strcmp(Modifier, "label")) {
     unsigned ID = MI->getOperand(OpNum).getImm();
-    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << ID << ":\n";
   } else {
     assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
     unsigned CPI = MI->getOperand(OpNum).getIndex();
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
-    
+
     if (MCPE.isMachineConstantPoolEntry()) {
       EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
     } else {
@@ -835,57 +873,119 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
 }
 
 void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
+  assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!");
+
   const MachineOperand &MO1 = MI->getOperand(OpNum);
   const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   unsigned JTI = MO1.getIndex();
-  O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+  O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
     << '_' << JTI << '_' << MO2.getImm() << ":\n";
 
-  const char *JTEntryDirective = TAI->getJumpTableDirective();
-  if (!JTEntryDirective)
-    JTEntryDirective = TAI->getData32bitsDirective();
+  const char *JTEntryDirective = MAI->getData32bitsDirective();
 
   const MachineFunction *MF = MI->getParent()->getParent();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-  bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
-  std::set<MachineBasicBlock*> JTSets;
+  bool UseSet= MAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+  SmallPtrSet<MachineBasicBlock*, 8> JTSets;
   for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
     MachineBasicBlock *MBB = JTBBs[i];
-    if (UseSet && JTSets.insert(MBB).second)
+    bool isNew = JTSets.insert(MBB);
+
+    if (UseSet && isNew)
       printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
 
     O << JTEntryDirective << ' ';
     if (UseSet)
-      O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+      O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
         << '_' << JTI << '_' << MO2.getImm()
         << "_set_" << MBB->getNumber();
     else if (TM.getRelocationModel() == Reloc::PIC_) {
-      printBasicBlockLabel(MBB, false, false, false);
-      // If the arch uses custom Jump Table directives, don't calc relative to JT
-      if (!TAI->getJumpTableDirective()) 
-        O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
-          << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
-    } else
-      printBasicBlockLabel(MBB, false, false, false);
+      GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+      O << '-' << MAI->getPrivateGlobalPrefix() << "JTI"
+        << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
+    } else {
+      GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+    }
+    if (i != e-1)
+      O << '\n';
+  }
+}
+
+void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+  O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    << '_' << JTI << '_' << MO2.getImm() << ":\n";
+
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  bool ByteOffset = false, HalfWordOffset = false;
+  if (MI->getOpcode() == ARM::t2TBB)
+    ByteOffset = true;
+  else if (MI->getOpcode() == ARM::t2TBH)
+    HalfWordOffset = true;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (ByteOffset)
+      O << MAI->getData8bitsDirective();
+    else if (HalfWordOffset)
+      O << MAI->getData16bitsDirective();
+    if (ByteOffset || HalfWordOffset) {
+      O << '(';
+      GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+      O << "-" << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+        << '_' << JTI << '_' << MO2.getImm() << ")/2";
+    } else {
+      O << "\tb.w ";
+      GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+    }
     if (i != e-1)
       O << '\n';
   }
+
+  // Make sure the instruction that follows TBB is 2-byte aligned.
+  // FIXME: Constant island pass should insert an "ALIGN" instruction instead.
+  if (ByteOffset && (JTBBs.size() & 1)) {
+    O << '\n';
+    EmitAlignment(1);
+  }
+}
+
+void ARMAsmPrinter::printTBAddrMode(const MachineInstr *MI, int OpNum) {
+  O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
+  if (MI->getOpcode() == ARM::t2TBH)
+    O << ", lsl #1";
+  O << ']';
 }
 
+void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) {
+  O << MI->getOperand(OpNum).getImm();
+}
 
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                     unsigned AsmVariant, const char *ExtraCode){
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
-    
+
     switch (ExtraCode[0]) {
     default: return true;  // Unknown modifier.
-    case 'a': // Don't print "#" before a global var name or constant.
-    case 'c': // Don't print "$" before a global var name or constant.
-      printOperand(MI, OpNum, "no_hash");
+    case 'a': // Print as a memory address.
+      if (MI->getOperand(OpNum).isReg()) {
+        O << "[" << getRegisterName(MI->getOperand(OpNum).getReg()) << "]";
+        return false;
+      }
+      // Fallthrough
+    case 'c': // Don't print "#" before an immediate operand.
+      if (!MI->getOperand(OpNum).isImm())
+        return true;
+      printNoHashImmediate(MI, OpNum);
       return false;
     case 'P': // Print a VFP double precision register.
       printOperand(MI, OpNum);
@@ -898,7 +998,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       if (TM.getTargetData()->isBigEndian())
         break;
       // Fallthrough
-    case 'H': // Write second word of DI / DF reference.  
+    case 'H': // Write second word of DI / DF reference.
       // Verify that this operand has two consecutive registers.
       if (!MI->getOperand(OpNum).isReg() ||
           OpNum+1 == MI->getNumOperands() ||
@@ -907,7 +1007,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       ++OpNum;   // Return the high-part.
     }
   }
-  
+
   printOperand(MI, OpNum);
   return false;
 }
@@ -917,7 +1017,10 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                           const char *ExtraCode) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
-  printAddrMode2Operand(MI, OpNum);
+
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "[" << getRegisterName(MO.getReg()) << "]";
   return false;
 }
 
@@ -938,16 +1041,47 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   }}
 
   // Call the autogenerated instruction printer routines.
+  processDebugLoc(MI, true);
   printInstruction(MI);
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
+  processDebugLoc(MI, false);
 }
 
-bool ARMAsmPrinter::doInitialization(Module &M) {
-
-  bool Result = AsmPrinter::doInitialization(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin()) {
+    Reloc::Model RelocM = TM.getRelocationModel();
+    if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+      // Declare all the text sections up front (before the DWARF sections
+      // emitted by AsmPrinter::doInitialization) so the assembler will keep
+      // them together at the beginning of the object file.  This helps
+      // avoid out-of-range branches that are due a fundamental limitation of
+      // the way symbol offsets are encoded with the current Darwin ARM
+      // relocations.
+      TargetLoweringObjectFileMachO &TLOFMacho = 
+        static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+      OutStreamer.SwitchSection(TLOFMacho.getTextSection());
+      OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+      OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+      if (RelocM == Reloc::DynamicNoPIC) {
+        const MCSection *sect =
+          TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4",
+                                    MCSectionMachO::S_SYMBOL_STUBS,
+                                    12, SectionKind::getText());
+        OutStreamer.SwitchSection(sect);
+      } else {
+        const MCSection *sect =
+          TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4",
+                                    MCSectionMachO::S_SYMBOL_STUBS,
+                                    16, SectionKind::getText());
+        OutStreamer.SwitchSection(sect);
+      }
+    }
+  }
 
-  // Thumb-2 instructions are supported only in unified assembler syntax mode.
-  if (Subtarget->hasThumb2())
+  // Use unified assembler syntax mode for Thumb.
+  if (Subtarget->isThumb())
     O << "\t.syntax unified\n";
 
   // Emit ARM Build Attributes
@@ -975,22 +1109,16 @@ bool ARMAsmPrinter::doInitialization(Module &M) {
     O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n"
       << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n";
 
+    // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
+    if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard)
+      O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_HardFP_use << ", 3\n"
+        << "\t.eabi_attribute " << ARMBuildAttrs::ABI_VFP_args << ", 1\n";
+
     // FIXME: Should we signal R9 usage?
   }
-
-  return Result;
-}
-
-/// PrintUnmangledNameSafely - Print out the printable characters in the name.
-/// Don't print things like \\n or \\0.
-static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
-  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
-       Name != E; ++Name)
-    if (isprint(*Name))
-      OS << *Name;
 }
 
-void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer())   // External global require no code
@@ -1009,10 +1137,8 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
     return;
   }
 
-  std::string name = Mang->getValueName(GVar);
+  std::string name = Mang->getMangledName(GVar);
   Constant *C = GVar->getInitializer();
-  if (isa<MDNode>(C) || isa<MDString>(C))
-    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -1023,14 +1149,16 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   if (Subtarget->isTargetELF())
     O << "\t.type " << name << ",%object\n";
 
-  if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() &&
-      !(isDarwin &&
-        TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) {
-    // FIXME: This seems to be pretty darwin-specific
+  const MCSection *TheSection =
+    getObjFileLowering().SectionForGlobal(GVar, Mang, TM);
+  OutStreamer.SwitchSection(TheSection);
 
+  // FIXME: get this stuff from section kind flags.
+  if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() &&
+      // Don't put things that should go in the cstring section into "comm".
+      !TheSection->getKind().isMergeableCString()) {
     if (GVar->hasExternalLinkage()) {
-      SwitchToSection(TAI->SectionForGlobal(GVar));
-      if (const char *Directive = TAI->getZeroFillDirective()) {
+      if (const char *Directive = MAI->getZeroFillDirective()) {
         O << "\t.globl\t" << name << "\n";
         O << Directive << "__DATA, __common, " << name << ", "
           << Size << ", " << Align << "\n";
@@ -1043,57 +1171,56 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
       if (isDarwin) {
         if (GVar->hasLocalLinkage()) {
-          O << TAI->getLCOMMDirective()  << name << "," << Size
+          O << MAI->getLCOMMDirective()  << name << "," << Size
             << ',' << Align;
         } else if (GVar->hasCommonLinkage()) {
-          O << TAI->getCOMMDirective()  << name << "," << Size
+          O << MAI->getCOMMDirective()  << name << "," << Size
             << ',' << Align;
         } else {
-          SwitchToSection(TAI->SectionForGlobal(GVar));
+          OutStreamer.SwitchSection(TheSection);
           O << "\t.globl " << name << '\n'
-            << TAI->getWeakDefDirective() << name << '\n';
+            << MAI->getWeakDefDirective() << name << '\n';
           EmitAlignment(Align, GVar);
           O << name << ":";
           if (VerboseAsm) {
-            O << "\t\t\t\t" << TAI->getCommentString() << ' ';
-            PrintUnmangledNameSafely(GVar, O);
+            O << "\t\t\t\t" << MAI->getCommentString() << ' ';
+            WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
           }
           O << '\n';
           EmitGlobalConstant(C);
           return;
         }
-      } else if (TAI->getLCOMMDirective() != NULL) {
+      } else if (MAI->getLCOMMDirective() != NULL) {
         if (GVar->hasLocalLinkage()) {
-          O << TAI->getLCOMMDirective() << name << "," << Size;
+          O << MAI->getLCOMMDirective() << name << "," << Size;
         } else {
-          O << TAI->getCOMMDirective()  << name << "," << Size;
-          if (TAI->getCOMMDirectiveTakesAlignment())
-            O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+          O << MAI->getCOMMDirective()  << name << "," << Size;
+          if (MAI->getCOMMDirectiveTakesAlignment())
+            O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
         }
       } else {
-        SwitchToSection(TAI->SectionForGlobal(GVar));
         if (GVar->hasLocalLinkage())
           O << "\t.local\t" << name << "\n";
-        O << TAI->getCOMMDirective()  << name << "," << Size;
-        if (TAI->getCOMMDirectiveTakesAlignment())
-          O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+        O << MAI->getCOMMDirective()  << name << "," << Size;
+        if (MAI->getCOMMDirectiveTakesAlignment())
+          O << "," << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
       }
       if (VerboseAsm) {
-        O << "\t\t" << TAI->getCommentString() << " ";
-        PrintUnmangledNameSafely(GVar, O);
+        O << "\t\t" << MAI->getCommentString() << " ";
+        WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
       }
       O << "\n";
       return;
     }
   }
 
-  SwitchToSection(TAI->SectionForGlobal(GVar));
   switch (GVar->getLinkage()) {
-   case GlobalValue::CommonLinkage:
-   case GlobalValue::LinkOnceAnyLinkage:
-   case GlobalValue::LinkOnceODRLinkage:
-   case GlobalValue::WeakAnyLinkage:
-   case GlobalValue::WeakODRLinkage:
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
     if (isDarwin) {
       O << "\t.globl " << name << "\n"
         << "\t.weak_definition " << name << "\n";
@@ -1101,28 +1228,27 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
       O << "\t.weak " << name << "\n";
     }
     break;
-   case GlobalValue::AppendingLinkage:
-    // FIXME: appending linkage variables should go into a section of
-    // their name or something.  For now, just emit them as external.
-   case GlobalValue::ExternalLinkage:
+  case GlobalValue::AppendingLinkage:
+  // FIXME: appending linkage variables should go into a section of
+  // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
     O << "\t.globl " << name << "\n";
-    // FALL THROUGH
-   case GlobalValue::PrivateLinkage:
-   case GlobalValue::InternalLinkage:
     break;
-   default:
-    assert(0 && "Unknown linkage type!");
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::InternalLinkage:
     break;
+  default:
+    llvm_unreachable("Unknown linkage type!");
   }
 
   EmitAlignment(Align, GVar);
   O << name << ":";
   if (VerboseAsm) {
-    O << "\t\t\t\t" << TAI->getCommentString() << " ";
-    PrintUnmangledNameSafely(GVar, O);
+    O << "\t\t\t\t" << MAI->getCommentString() << " ";
+    WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
   }
   O << "\n";
-  if (TAI->hasDotTypeDotSizeDirective())
+  if (MAI->hasDotTypeDotSizeDirective())
     O << "\t.size " << name << ", " << Size << "\n";
 
   EmitGlobalConstant(C);
@@ -1131,83 +1257,36 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
 
 bool ARMAsmPrinter::doFinalization(Module &M) {
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    printModuleLevelGV(I);
-
   if (Subtarget->isTargetDarwin()) {
-    SwitchToDataSection("");
-
-    // Output stubs for dynamically-linked functions
-    for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
-         i != e; ++i) {
-      if (TM.getRelocationModel() == Reloc::PIC_)
-        SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
-                            "none,16", 0);
-      else
-        SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
-                            "none,12", 0);
+    // All darwin targets use mach-o.
+    TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
 
-      EmitAlignment(2);
-      O << "\t.code\t32\n";
-
-      const char *p = i->getKeyData();
-      printSuffixedName(p, "$stub");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << "\n";
-      O << "\tldr ip, ";
-      printSuffixedName(p, "$slp");
-      O << "\n";
-      if (TM.getRelocationModel() == Reloc::PIC_) {
-        printSuffixedName(p, "$scv");
-        O << ":\n";
-        O << "\tadd ip, pc, ip\n";
-      }
-      O << "\tldr pc, [ip, #0]\n";
-      printSuffixedName(p, "$slp");
-      O << ":\n";
-      O << "\t.long\t";
-      printSuffixedName(p, "$lazy_ptr");
-      if (TM.getRelocationModel() == Reloc::PIC_) {
-        O << "-(";
-        printSuffixedName(p, "$scv");
-        O << "+8)\n";
-      } else
-        O << "\n";
-      SwitchToDataSection(".lazy_symbol_pointer", 0);
-      printSuffixedName(p, "$lazy_ptr");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << "\n";
-      O << "\t.long\tdyld_stub_binding_helper\n";
-    }
-    O << "\n";
+    O << '\n';
 
     // Output non-lazy-pointers for external and common global variables.
     if (!GVNonLazyPtrs.empty()) {
-      SwitchToDataSection("\t.non_lazy_symbol_pointer", 0);
-      for (StringSet<>::iterator i =  GVNonLazyPtrs.begin(),
-             e = GVNonLazyPtrs.end(); i != e; ++i) {
-        const char *p = i->getKeyData();
-        printSuffixedName(p, "$non_lazy_ptr");
-        O << ":\n";
-        O << "\t.indirect_symbol " << p << "\n";
+      // Switch with ".non_lazy_symbol_pointer" directive.
+      OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+      EmitAlignment(2);
+      for (StringMap<std::string>::iterator I = GVNonLazyPtrs.begin(),
+           E = GVNonLazyPtrs.end(); I != E; ++I) {
+        O << I->second << ":\n";
+        O << "\t.indirect_symbol " << I->getKeyData() << "\n";
         O << "\t.long\t0\n";
       }
     }
 
     if (!HiddenGVNonLazyPtrs.empty()) {
-      SwitchToSection(TAI->getDataSection());
-      for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(),
-             e = HiddenGVNonLazyPtrs.end(); i != e; ++i) {
-        const char *p = i->getKeyData();
-        EmitAlignment(2);
-        printSuffixedName(p, "$non_lazy_ptr");
-        O << ":\n";
-        O << "\t.long " << p << "\n";
+      OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+      EmitAlignment(2);
+      for (StringMap<std::string>::iterator I = HiddenGVNonLazyPtrs.begin(),
+             E = HiddenGVNonLazyPtrs.end(); I != E; ++I) {
+        O << I->second << ":\n";
+        O << "\t.long " << I->getKeyData() << "\n";
       }
     }
 
-
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
     // implementation of multiple entry points).  If this doesn't occur, the
@@ -1219,24 +1298,8 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
   return AsmPrinter::doFinalization(M);
 }
 
-/// createARMCodePrinterPass - Returns a pass that prints the ARM
-/// assembly code for a MachineFunction to the given output stream,
-/// using the given target machine description.  This should work
-/// regardless of whether the function is in SSA form.
-///
-FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
-                                             ARMBaseTargetMachine &tm,
-                                             bool verbose) {
-  return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-}
-
-namespace {
-  static struct Register {
-    Register() {
-      ARMBaseTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
-    }
-  } Registrator;
-}
-
 // Force static initialization.
-extern "C" void LLVMInitializeARMAsmPrinter() { }
+extern "C" void LLVMInitializeARMAsmPrinter() {
+  RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+  RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+}
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
index ce36cec47b6e..208beccce8a3 100644
--- a/lib/Target/ARM/AsmPrinter/Makefile
+++ b/lib/Target/ARM/AsmPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9c46fe0484b6..6e09eb2ff4d5 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -12,6 +12,8 @@ tablegen(ARMGenCallingConv.inc -gen-callingconv)
 tablegen(ARMGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(ARMCodeGen
+  ARMBaseInstrInfo.cpp
+  ARMBaseRegisterInfo.cpp
   ARMCodeEmitter.cpp
   ARMConstantIslandPass.cpp
   ARMConstantPoolValue.cpp
@@ -20,14 +22,17 @@ add_llvm_target(ARMCodeGen
   ARMISelLowering.cpp
   ARMJITInfo.cpp
   ARMLoadStoreOptimizer.cpp
+  ARMMCAsmInfo.cpp
   ARMRegisterInfo.cpp
   ARMSubtarget.cpp
-  ARMTargetAsmInfo.cpp
   ARMTargetMachine.cpp
+  NEONPreAllocPass.cpp
   Thumb1InstrInfo.cpp
   Thumb1RegisterInfo.cpp
+  Thumb2ITBlockPass.cpp
   Thumb2InstrInfo.cpp
   Thumb2RegisterInfo.cpp
+  Thumb2SizeReduction.cpp
   )
 
 target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 9a3b9be5b345..a8dd38cb362e 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -18,6 +18,6 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
                 ARMGenCodeEmitter.inc ARMGenCallingConv.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter AsmParser TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
new file mode 100644
index 000000000000..821b872ac7cd
--- /dev/null
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -0,0 +1,394 @@
+//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "neon-prealloc"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+namespace {
+  class VISIBILITY_HIDDEN NEONPreAllocPass : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID;
+    NEONPreAllocPass() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "NEON register pre-allocation pass";
+    }
+
+  private:
+    bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
+  };
+
+  char NEONPreAllocPass::ID = 0;
+}
+
+static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
+                             unsigned &Offset, unsigned &Stride) {
+  // Default to unit stride with no offset.
+  Stride = 1;
+  Offset = 0;
+
+  switch (Opcode) {
+  default:
+    break;
+
+  case ARM::VLD2d8:
+  case ARM::VLD2d16:
+  case ARM::VLD2d32:
+  case ARM::VLD2d64:
+  case ARM::VLD2LNd8:
+  case ARM::VLD2LNd16:
+  case ARM::VLD2LNd32:
+    FirstOpnd = 0;
+    NumRegs = 2;
+    return true;
+
+  case ARM::VLD2q8:
+  case ARM::VLD2q16:
+  case ARM::VLD2q32:
+    FirstOpnd = 0;
+    NumRegs = 4;
+    return true;
+
+  case ARM::VLD2LNq16a:
+  case ARM::VLD2LNq32a:
+    FirstOpnd = 0;
+    NumRegs = 2;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD2LNq16b:
+  case ARM::VLD2LNq32b:
+    FirstOpnd = 0;
+    NumRegs = 2;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD3d8:
+  case ARM::VLD3d16:
+  case ARM::VLD3d32:
+  case ARM::VLD3d64:
+  case ARM::VLD3LNd8:
+  case ARM::VLD3LNd16:
+  case ARM::VLD3LNd32:
+    FirstOpnd = 0;
+    NumRegs = 3;
+    return true;
+
+  case ARM::VLD3q8a:
+  case ARM::VLD3q16a:
+  case ARM::VLD3q32a:
+    FirstOpnd = 0;
+    NumRegs = 3;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD3q8b:
+  case ARM::VLD3q16b:
+  case ARM::VLD3q32b:
+    FirstOpnd = 0;
+    NumRegs = 3;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD3LNq16a:
+  case ARM::VLD3LNq32a:
+    FirstOpnd = 0;
+    NumRegs = 3;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD3LNq16b:
+  case ARM::VLD3LNq32b:
+    FirstOpnd = 0;
+    NumRegs = 3;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD4d8:
+  case ARM::VLD4d16:
+  case ARM::VLD4d32:
+  case ARM::VLD4d64:
+  case ARM::VLD4LNd8:
+  case ARM::VLD4LNd16:
+  case ARM::VLD4LNd32:
+    FirstOpnd = 0;
+    NumRegs = 4;
+    return true;
+
+  case ARM::VLD4q8a:
+  case ARM::VLD4q16a:
+  case ARM::VLD4q32a:
+    FirstOpnd = 0;
+    NumRegs = 4;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD4q8b:
+  case ARM::VLD4q16b:
+  case ARM::VLD4q32b:
+    FirstOpnd = 0;
+    NumRegs = 4;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD4LNq16a:
+  case ARM::VLD4LNq32a:
+    FirstOpnd = 0;
+    NumRegs = 4;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VLD4LNq16b:
+  case ARM::VLD4LNq32b:
+    FirstOpnd = 0;
+    NumRegs = 4;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VST2d8:
+  case ARM::VST2d16:
+  case ARM::VST2d32:
+  case ARM::VST2d64:
+  case ARM::VST2LNd8:
+  case ARM::VST2LNd16:
+  case ARM::VST2LNd32:
+    FirstOpnd = 3;
+    NumRegs = 2;
+    return true;
+
+  case ARM::VST2q8:
+  case ARM::VST2q16:
+  case ARM::VST2q32:
+    FirstOpnd = 3;
+    NumRegs = 4;
+    return true;
+
+  case ARM::VST2LNq16a:
+  case ARM::VST2LNq32a:
+    FirstOpnd = 3;
+    NumRegs = 2;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VST2LNq16b:
+  case ARM::VST2LNq32b:
+    FirstOpnd = 3;
+    NumRegs = 2;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VST3d8:
+  case ARM::VST3d16:
+  case ARM::VST3d32:
+  case ARM::VST3d64:
+  case ARM::VST3LNd8:
+  case ARM::VST3LNd16:
+  case ARM::VST3LNd32:
+    FirstOpnd = 3;
+    NumRegs = 3;
+    return true;
+
+  case ARM::VST3q8a:
+  case ARM::VST3q16a:
+  case ARM::VST3q32a:
+    FirstOpnd = 4;
+    NumRegs = 3;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VST3q8b:
+  case ARM::VST3q16b:
+  case ARM::VST3q32b:
+    FirstOpnd = 4;
+    NumRegs = 3;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VST3LNq16a:
+  case ARM::VST3LNq32a:
+    FirstOpnd = 3;
+    NumRegs = 3;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VST3LNq16b:
+  case ARM::VST3LNq32b:
+    FirstOpnd = 3;
+    NumRegs = 3;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VST4d8:
+  case ARM::VST4d16:
+  case ARM::VST4d32:
+  case ARM::VST4d64:
+  case ARM::VST4LNd8:
+  case ARM::VST4LNd16:
+  case ARM::VST4LNd32:
+    FirstOpnd = 3;
+    NumRegs = 4;
+    return true;
+
+  case ARM::VST4q8a:
+  case ARM::VST4q16a:
+  case ARM::VST4q32a:
+    FirstOpnd = 4;
+    NumRegs = 4;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VST4q8b:
+  case ARM::VST4q16b:
+  case ARM::VST4q32b:
+    FirstOpnd = 4;
+    NumRegs = 4;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VST4LNq16a:
+  case ARM::VST4LNq32a:
+    FirstOpnd = 3;
+    NumRegs = 4;
+    Offset = 0;
+    Stride = 2;
+    return true;
+
+  case ARM::VST4LNq16b:
+  case ARM::VST4LNq32b:
+    FirstOpnd = 3;
+    NumRegs = 4;
+    Offset = 1;
+    Stride = 2;
+    return true;
+
+  case ARM::VTBL2:
+    FirstOpnd = 1;
+    NumRegs = 2;
+    return true;
+
+  case ARM::VTBL3:
+    FirstOpnd = 1;
+    NumRegs = 3;
+    return true;
+
+  case ARM::VTBL4:
+    FirstOpnd = 1;
+    NumRegs = 4;
+    return true;
+
+  case ARM::VTBX2:
+    FirstOpnd = 2;
+    NumRegs = 2;
+    return true;
+
+  case ARM::VTBX3:
+    FirstOpnd = 2;
+    NumRegs = 3;
+    return true;
+
+  case ARM::VTBX4:
+    FirstOpnd = 2;
+    NumRegs = 4;
+    return true;
+  }
+
+  return false;
+}
+
+bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  for (; MBBI != E; ++MBBI) {
+    MachineInstr *MI = &*MBBI;
+    unsigned FirstOpnd, NumRegs, Offset, Stride;
+    if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
+      continue;
+
+    MachineBasicBlock::iterator NextI = next(MBBI);
+    for (unsigned R = 0; R < NumRegs; ++R) {
+      MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+      assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+      unsigned VirtReg = MO.getReg();
+      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+             "expected a virtual register");
+
+      // For now, just assign a fixed set of adjacent registers.
+      // This leaves plenty of room for future improvements.
+      static const unsigned NEONDRegs[] = {
+        ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+        ARM::D4, ARM::D5, ARM::D6, ARM::D7
+      };
+      MO.setReg(NEONDRegs[Offset + R * Stride]);
+
+      if (MO.isUse()) {
+        // Insert a copy from VirtReg.
+        TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
+                          ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+        if (MO.isKill()) {
+          MachineInstr *CopyMI = prior(MBBI);
+          CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
+        }
+        MO.setIsKill();
+      } else if (MO.isDef() && !MO.isDead()) {
+        // Add a copy to VirtReg.
+        TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
+                          ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+      }
+    }
+  }
+
+  return Modified;
+}
+
+bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= PreAllocNEONRegisters(MBB);
+  }
+
+  return Modified;
+}
+
+/// createNEONPreAllocPass - returns an instance of the NEON register
+/// pre-allocation pass.
+FunctionPass *llvm::createNEONPreAllocPass() {
+  return new NEONPreAllocPass();
+}
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 4d3200b445c1..a961a576f40d 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -226,3 +226,31 @@ etc. Almost all Thumb instructions clobber condition code.
 //===---------------------------------------------------------------------===//
 
 Add ldmia, stmia support.
+
+//===---------------------------------------------------------------------===//
+
+Thumb load / store address mode offsets are scaled. The values kept in the
+instruction operands are pre-scale values. This probably ought to be changed
+to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
+
+//===---------------------------------------------------------------------===//
+
+We need to make (some of the) Thumb1 instructions predicable. That will allow
+shrinking of predicated Thumb2 instructions. To allow this, we need to be able
+to toggle the 's' bit since they do not set CPSR when they are inside IT blocks.
+
+//===---------------------------------------------------------------------===//
+
+Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
+
+//===---------------------------------------------------------------------===//
+
+Thumb1 immediate field sometimes keep pre-scaled values. See
+Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+Thumb2.
+
+//===---------------------------------------------------------------------===//
+
+Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
+add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes
+won't have to over-estimate. It can also be used for loop alignment pass.
diff --git a/lib/Target/ARM/README-Thumb2.txt b/lib/Target/ARM/README-Thumb2.txt
new file mode 100644
index 000000000000..e7c2552d9e4c
--- /dev/null
+++ b/lib/Target/ARM/README-Thumb2.txt
@@ -0,0 +1,6 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb2 specific).
+//===---------------------------------------------------------------------===//
+
+Make sure jumptable destinations are below the jumptable in order to make use
+of tbb / tbh.
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index f3377f91ab96..8fb1da30088f 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -537,3 +537,66 @@ Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
 LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
 ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
 while ARMConstantIslandPass only need to worry about LDR (literal).
+
+//===---------------------------------------------------------------------===//
+
+We need to fix constant isel for ARMv6t2 to use MOVT.
+
+//===---------------------------------------------------------------------===//
+
+Constant island pass should make use of full range SoImm values for LEApcrel.
+Be careful though as the last attempt caused infinite looping on lencod.
+
+//===---------------------------------------------------------------------===//
+
+Predication issue. This function:   
+
+extern unsigned array[ 128 ];
+int     foo( int x ) {
+  int     y;
+  y = array[ x & 127 ];
+  if ( x & 128 )
+     y = 123456789 & ( y >> 2 );
+  else
+     y = 123456789 & y;
+  return y;
+}
+
+compiles to:
+
+_foo:
+	and r1, r0, #127
+	ldr r2, LCPI1_0
+	ldr r2, [r2]
+	ldr r1, [r2, +r1, lsl #2]
+	mov r2, r1, lsr #2
+	tst r0, #128
+	moveq r2, r1
+	ldr r0, LCPI1_1
+	and r0, r2, r0
+	bx lr
+
+It would be better to do something like this, to fold the shift into the
+conditional move:
+
+	and r1, r0, #127
+	ldr r2, LCPI1_0
+	ldr r2, [r2]
+	ldr r1, [r2, +r1, lsl #2]
+	tst r0, #128
+	movne r1, r1, lsr #2
+	ldr r0, LCPI1_1
+	and r0, r1, r0
+	bx lr
+
+it saves an instruction and a register.
+
+//===---------------------------------------------------------------------===//
+
+add/sub/and/or + i32 imm can be simplified by folding part of the immediate
+into the operation.
+
+//===---------------------------------------------------------------------===//
+
+It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
+with the same bottom half.
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 000000000000..163a0a987584
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() { 
+  RegisterTarget<Triple::arm, /*HasJIT=*/true>
+    X(TheARMTarget, "arm", "ARM");
+
+  RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+    Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..3910bb02e219
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMInfo
+  ARMTargetInfo.cpp
+  )
+
+add_dependencies(LLVMARMInfo ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/TargetInfo/Makefile b/lib/Target/ARM/TargetInfo/Makefile
new file mode 100644
index 000000000000..6292ab14b346
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index e13a8117bf2f..7eed30edf25c 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,63 +22,29 @@
 
 using namespace llvm;
 
-Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) {
 }
 
-bool Thumb1InstrInfo::isMoveInstr(const MachineInstr &MI,
-                                  unsigned &SrcReg, unsigned &DstReg,
-                                  unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
-  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
-  unsigned oc = MI.getOpcode();
-  switch (oc) {
-  default:
-    return false;
-  case ARM::tMOVr:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2hir:
-    assert(MI.getDesc().getNumOperands() >= 2 &&
-           MI.getOperand(0).isReg() &&
-           MI.getOperand(1).isReg() &&
-           "Invalid Thumb MOV instruction");
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  }
-}
-
-unsigned Thumb1InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                              int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::tRestore:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
   return 0;
 }
 
-unsigned Thumb1InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::tSpill:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
+bool
+Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+  if (MBB.empty()) return false;
+
+  switch (MBB.back().getOpcode()) {
+  case ARM::tBX_RET:
+  case ARM::tBX_RET_vararg:
+  case ARM::tPOP_RET:
+  case ARM::tB:
+  case ARM::tBR_JTr:
+    return true;
+  default:
     break;
   }
-  return 0;
+
+  return false;
 }
 
 bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
@@ -91,15 +57,15 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 
   if (DestRC == ARM::GPRRegisterClass) {
     if (SrcRC == ARM::GPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
       return true;
     } else if (SrcRC == ARM::tGPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
       return true;
     }
   } else if (DestRC == ARM::tGPRRegisterClass) {
     if (SrcRC == ARM::GPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
       return true;
     } else if (SrcRC == ARM::tGPRRegisterClass) {
       BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
@@ -120,17 +86,19 @@ canFoldMemoryOperand(const MachineInstr *MI,
   switch (Opc) {
   default: break;
   case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
+  case ARM::tMOVtgpr2gpr:
+  case ARM::tMOVgpr2tgpr:
+  case ARM::tMOVgpr2gpr: {
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
-      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+      if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+          !isARMLowRegister(SrcReg))
         // tSpill cannot take a high register operand.
         return false;
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+      if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+          !isARMLowRegister(DstReg))
         // tRestore cannot target a high register operand.
         return false;
     }
@@ -148,36 +116,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+  assert((RC == ARM::tGPRRegisterClass ||
+          (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+           isARMLowRegister(SrcReg))) && "Unknown regclass!");
 
   if (RC == ARM::tGPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tSpill))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI).addImm(0);
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0));
   }
 }
 
-void Thumb1InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                     bool isKill,
-                                     SmallVectorImpl<MachineOperand> &Addr,
-                                     const TargetRegisterClass *RC,
-                                     SmallVectorImpl<MachineInstr*> &NewMIs) const{
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
-
-  assert(RC == ARM::GPRRegisterClass && "Unknown regclass!");
-  if (RC == ARM::GPRRegisterClass) {
-    Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
-  }
-
-  MachineInstrBuilder MIB =
-    BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
-
 void Thumb1InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
@@ -185,33 +134,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+  assert((RC == ARM::tGPRRegisterClass ||
+          (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+           isARMLowRegister(DestReg))) && "Unknown regclass!");
 
   if (RC == ARM::tGPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
-      .addFrameIndex(FI).addImm(0);
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+                   .addFrameIndex(FI).addImm(0));
   }
 }
 
-void Thumb1InstrInfo::
-loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                SmallVectorImpl<MachineOperand> &Addr,
-                const TargetRegisterClass *RC,
-                SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
-
-  if (RC == ARM::GPRRegisterClass) {
-    Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
-  }
-
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
-
 bool Thumb1InstrInfo::
 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
@@ -223,6 +155,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+  AddDefaultPred(MIB);
+  MIB.addReg(0); // No write back.
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     // Add the callee-saved register as live-in. It's killed at the spill.
@@ -242,7 +176,12 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
     return false;
 
   bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
-  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP));
+  AddDefaultPred(MIB);
+  MIB.addReg(0); // No write back.
+
+  bool NumRegs = 0;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     if (Reg == ARM::LR) {
@@ -250,15 +189,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
       if (isVarArg)
         continue;
       Reg = ARM::PC;
-      PopMI->setDesc(get(ARM::tPOP_RET));
+      (*MIB).setDesc(get(ARM::tPOP_RET));
       MI = MBB.erase(MI);
     }
-    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+    MIB.addReg(Reg, getDefRegState(true));
+    ++NumRegs;
   }
 
   // It's illegal to emit pop instruction without operands.
-  if (PopMI->getNumOperands() > 0)
-    MBB.insert(MI, PopMI);
+  if (NumRegs)
+    MBB.insert(MI, &*MIB);
 
   return true;
 }
@@ -274,27 +214,30 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   switch (Opc) {
   default: break;
   case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
+  case ARM::tMOVtgpr2gpr:
+  case ARM::tMOVgpr2tgpr:
+  case ARM::tMOVgpr2gpr: {
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
-      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+      if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+          !isARMLowRegister(SrcReg))
         // tSpill cannot take a high register operand.
         break;
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
-        .addReg(SrcReg, getKillRegState(isKill))
-        .addFrameIndex(FI).addImm(0);
+      NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+                             .addReg(SrcReg, getKillRegState(isKill))
+                             .addFrameIndex(FI).addImm(0));
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+      if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+          !isARMLowRegister(DstReg))
         // tRestore cannot target a high register operand.
         break;
       bool isDead = MI->getOperand(0).isDead();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
-        .addFrameIndex(FI).addImm(0);
+      NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+                             .addReg(DstReg,
+                                     RegState::Define | getDeadRegState(isDead))
+                             .addFrameIndex(FI).addImm(0));
     }
     break;
   }
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 1bfa1d0bdc34..13cc5787b5b9 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -27,6 +27,13 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
 public:
   explicit Thumb1InstrInfo(const ARMSubtarget &STI);
 
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  // Return true if the block does not fall through.
+  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
@@ -40,14 +47,6 @@ public:
                                    MachineBasicBlock::iterator MI,
                                    const std::vector<CalleeSavedInfo> &CSI) const;
 
-  bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-  unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-
   bool copyRegToReg(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
@@ -58,21 +57,11 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC) const;
 
-  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
 
-  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   bool canFoldMemoryOperand(const MachineInstr *MI,
                                     const SmallVectorImpl<unsigned> &Ops) const;
 
@@ -80,7 +69,7 @@ public:
                                       MachineInstr* MI,
                                       const SmallVectorImpl<unsigned> &Ops,
                                       int FrameIndex) const;
- 
+
   MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                       MachineInstr* MI,
                                       const SmallVectorImpl<unsigned> &Ops,
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 92f01d1006dd..3c896da4c0ca 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -13,12 +13,15 @@
 
 #include "ARM.h"
 #include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb1RegisterInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,14 +33,11 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static cl::opt<bool>
-ThumbRegScavenging("enable-thumb-reg-scavenging",
-                   cl::Hidden,
-                   cl::desc("Enable register scavenging on Thumb"));
-
-Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii,
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
                                        const ARMSubtarget &sti)
   : ARMBaseRegisterInfo(tii, sti) {
 }
@@ -46,20 +46,24 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii,
 /// specified immediate.
 void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator &MBBI,
-                                           unsigned DestReg, int Val,
-                                           const TargetInstrInfo *TII,
-                                           DebugLoc dl) const {
+                                           DebugLoc dl,
+                                           unsigned DestReg, unsigned SubIdx,
+                                           int Val,
+                                           ARMCC::CondCodes Pred,
+                                           unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  Constant *C = ConstantInt::get(
+          Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
-  BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg)
-    .addConstantPoolIndex(Idx);
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRcp))
+          .addReg(DestReg, getDefRegState(true), SubIdx)
+          .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
 }
 
 const TargetRegisterClass*
-Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const {
   if (isARMLowRegister(Reg))
     return ARM::tGPRRegisterClass;
   switch (Reg) {
@@ -75,9 +79,16 @@ Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
 
 bool
 Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  return ThumbRegScavenging;
+  return true;
+}
+
+bool
+Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
+  const {
+  return true;
 }
 
+
 bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
   const MachineFrameInfo *FFI = MF.getFrameInfo();
   unsigned CFSize = FFI->getMaxCallFrameSize();
@@ -91,6 +102,7 @@ bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
+
 /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
 /// a destreg = basereg + immediate in Thumb code. Materialize the immediate
 /// in a register using mov / mvn sequences or load the immediate from a
@@ -103,6 +115,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
                               const TargetInstrInfo &TII,
                               const Thumb1RegisterInfo& MRI,
                               DebugLoc dl) {
+    MachineFunction &MF = *MBB.getParent();
     bool isHigh = !isARMLowRegister(DestReg) ||
                   (BaseReg != 0 && !isARMLowRegister(BaseReg));
     bool isSub = false;
@@ -117,31 +130,31 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
     unsigned LdReg = DestReg;
     if (DestReg == ARM::SP) {
       assert(BaseReg == ARM::SP && "Unexpected!");
-      LdReg = ARM::R3;
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R3, RegState::Kill);
+      LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
     }
 
     if (NumBytes <= 255 && NumBytes >= 0)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+        .addImm(NumBytes);
     else if (NumBytes < 0 && NumBytes >= -255) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+        .addImm(NumBytes);
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
         .addReg(LdReg, RegState::Kill);
     } else
-      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl);
+      MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes);
 
     // Emit add / sub.
     int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
-    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
-                                            TII.get(Opc), DestReg);
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+    if (Opc != ARM::tADDhirr)
+      MIB = AddDefaultT1CC(MIB);
     if (DestReg == ARM::SP || isSub)
       MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
     else
       MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
-    if (DestReg == ARM::SP)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-        .addReg(ARM::R12, RegState::Kill);
+    AddDefaultPred(MIB);
 }
 
 /// calcNumMI - Returns the number of instructions required to materialize
@@ -187,6 +200,8 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
   unsigned Scale = 1;
   int Opc = 0;
   int ExtraOpc = 0;
+  bool NeedCC = false;
+  bool NeedPred = false;
 
   if (DestReg == BaseReg && BaseReg == ARM::SP) {
     assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
@@ -213,7 +228,16 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
     if (DestReg != BaseReg)
       DstNotEqBase = true;
     NumBits = 8;
-    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    if (DestReg == ARM::SP) {
+      Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+      assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+      NumBits = 7;
+      Scale = 4;
+    } else {
+      Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+      NumBits = 8;
+      NeedPred = NeedCC = true;
+    }
     isTwoAddr = true;
   }
 
@@ -233,8 +257,10 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
       unsigned Chunk = (1 << 3) - 1;
       unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
       Bytes -= ThisVal;
-      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
-        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+      const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+      const MachineInstrBuilder MIB =
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg));
+      AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
     } else {
       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
         .addReg(BaseReg, RegState::Kill);
@@ -248,13 +274,22 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
     Bytes -= ThisVal;
     ThisVal /= Scale;
     // Build the new tADD / tSUB.
-    if (isTwoAddr)
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(DestReg).addImm(ThisVal);
+    if (isTwoAddr) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+      if (NeedCC)
+        MIB = AddDefaultT1CC(MIB);
+      MIB .addReg(DestReg).addImm(ThisVal);
+      if (NeedPred)
+        MIB = AddDefaultPred(MIB);
+    }
     else {
       bool isKill = BaseReg != ARM::SP;
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+      if (NeedCC)
+        MIB = AddDefaultT1CC(MIB);
+      MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      if (NeedPred)
+        MIB = AddDefaultPred(MIB);
       BaseReg = DestReg;
 
       if (Opc == ARM::tADDrSPi) {
@@ -265,15 +300,17 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
         Scale = 1;
         Chunk = ((1 << NumBits) - 1) * Scale;
         Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-        isTwoAddr = true;
+        NeedPred = NeedCC = isTwoAddr = true;
       }
     }
   }
 
-  if (ExtraOpc)
-    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
-      .addReg(DestReg, RegState::Kill)
-      .addImm(((unsigned)NumBytes) & 3);
+  if (ExtraOpc) {
+    const TargetInstrDesc &TID = TII.get(ExtraOpc);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+                   .addReg(DestReg, RegState::Kill)
+                   .addImm(((unsigned)NumBytes) & 3));
+  }
 }
 
 static void emitSPUpdate(MachineBasicBlock &MBB,
@@ -329,16 +366,64 @@ static void emitThumbConstant(MachineBasicBlock &MBB,
   int Chunk = (1 << 8) - 1;
   int ThisVal = (Imm > Chunk) ? Chunk : Imm;
   Imm -= ThisVal;
-  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+  AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+                                        DestReg))
+                 .addImm(ThisVal));
   if (Imm > 0)
     emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
-  if (isSub)
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
-      .addReg(DestReg, RegState::Kill);
+  if (isSub) {
+    const TargetInstrDesc &TID = TII.get(ARM::tRSB);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+                   .addReg(DestReg, RegState::Kill));
+  }
 }
 
-void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                             int SPAdj, RegScavenger *RS) const{
+static void removeOperands(MachineInstr &MI, unsigned i) {
+  unsigned Op = i;
+  for (unsigned e = MI.getNumOperands(); i != e; ++i)
+    MI.RemoveOperand(Op);
+}
+
+int Thumb1RegisterInfo::
+rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                  unsigned FrameReg, int Offset,
+                  unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const
+{
+  // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
+  // version then can pull out Thumb1 specific parts here
+  return 0;
+}
+
+/// saveScavengerRegister - Save the register so it can be used by the
+/// register scavenger. Return true.
+bool Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator I,
+                                               const TargetRegisterClass *RC,
+                                               unsigned Reg) const {
+  // Thumb1 can't use the emergency spill slot on the stack because
+  // ldr/str immediate offsets must be positive, and if we're referencing
+  // off the frame pointer (if, for example, there are alloca() calls in
+  // the function, the offset will be negative. Use R12 instead since that's
+  // a call clobbered register that we know won't be used in Thumb1 mode.
+
+  TII.copyRegToReg(MBB, I, ARM::R12, Reg, ARM::GPRRegisterClass, RC);
+  return true;
+}
+
+/// restoreScavengerRegister - restore a registers saved by
+// saveScavengerRegister().
+void Thumb1RegisterInfo::restoreScavengerRegister(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator I,
+                                               const TargetRegisterClass *RC,
+                                               unsigned Reg) const {
+  TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass);
+}
+
+unsigned
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                        int SPAdj, int *Value,
+                                        RegScavenger *RS) const{
+  unsigned VReg = 0;
   unsigned i = 0;
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
@@ -380,7 +465,7 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     unsigned Scale = 1;
     if (FrameReg != ARM::SP) {
       Opcode = ARM::tADDi3;
-      MI.setDesc(TII.get(ARM::tADDi3));
+      MI.setDesc(TII.get(Opcode));
       NumBits = 3;
     } else {
       NumBits = 8;
@@ -391,19 +476,26 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
     if (Offset == 0) {
       // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVhir2lor));
+      MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
       MI.getOperand(i).ChangeToRegister(FrameReg, false);
       MI.RemoveOperand(i+1);
-      return;
+      return 0;
     }
 
     // Common case: small offset, fits into instruction.
     unsigned Mask = (1 << NumBits) - 1;
     if (((Offset / Scale) & ~Mask) == 0) {
       // Replace the FrameIndex with sp / fp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
-      return;
+      if (Opcode == ARM::tADDi3) {
+        removeOperands(MI, i);
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+                       .addImm(Offset / Scale));
+      } else {
+        MI.getOperand(i).ChangeToRegister(FrameReg, false);
+        MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+      }
+      return 0;
     }
 
     unsigned DestReg = MI.getOperand(0).getReg();
@@ -415,15 +507,21 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
                                 *this, dl);
       MBB.erase(II);
-      return;
+      return 0;
     }
 
     if (Offset > 0) {
       // Translate r0 = add sp, imm to
       // r0 = add sp, 255*4
       // r0 = add r0, (imm - 255*4)
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Mask);
+      if (Opcode == ARM::tADDi3) {
+        removeOperands(MI, i);
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+      } else {
+        MI.getOperand(i).ChangeToRegister(FrameReg, false);
+        MI.getOperand(i+1).ChangeToImmediate(Mask);
+      }
       Offset = (Offset - Mask * Scale);
       MachineBasicBlock::iterator NII = next(II);
       emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
@@ -433,11 +531,16 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       // r0 = -imm (this is then translated into a series of instructons)
       // r0 = add r0, sp
       emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
       MI.setDesc(TII.get(ARM::tADDhirr));
       MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
       MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+      if (Opcode == ARM::tADDi3) {
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(MIB);
+      }
     }
-    return;
+    return 0;
   } else {
     unsigned ImmIdx = 0;
     int InstrOffs = 0;
@@ -452,8 +555,7 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       break;
     }
     default:
-      assert(0 && "Unsupported addressing mode!");
-      abort();
+      llvm_unreachable("Unsupported addressing mode!");
       break;
     }
 
@@ -468,7 +570,7 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       // Replace the FrameIndex with sp
       MI.getOperand(i).ChangeToRegister(FrameReg, false);
       ImmOp.ChangeToImmediate(ImmedOffset);
-      return;
+      return 0;
     }
 
     bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
@@ -495,6 +597,11 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // SP+LargeImm.
   assert(Offset && "This code isn't needed if offset already handled!");
 
+  // Remove predicate first.
+  int PIdx = MI.findFirstPredOperandIdx();
+  if (PIdx != -1)
+    removeOperands(MI, PIdx);
+
   if (Desc.mayLoad()) {
     // Use the destination register to materialize sp + offset.
     unsigned TmpReg = MI.getOperand(0).getReg();
@@ -504,12 +611,14 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
                                  Offset, false, TII, *this, dl);
       else {
-        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
         UseRR = true;
       }
-    } else
+    } else {
       emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
                                 *this, dl);
+    }
+
     MI.setDesc(TII.get(ARM::tLDR));
     MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
     if (UseRR)
@@ -518,52 +627,37 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     else  // tLDR has an extra register operand.
       MI.addOperand(MachineOperand::CreateReg(0, false));
   } else if (Desc.mayStore()) {
-    // FIXME! This is horrific!!! We need register scavenging.
-    // Our temporary workaround has marked r3 unavailable. Of course, r3 is
-    // also a ABI register so it's possible that is is the register that is
-    // being storing here. If that's the case, we do the following:
-    // r12 = r2
-    // Use r2 to materialize sp + offset
-    // str r3, r2
-    // r2 = r12
-    unsigned ValReg = MI.getOperand(0).getReg();
-    unsigned TmpReg = ARM::R3;
-    bool UseRR = false;
-    if (ValReg == ARM::R3) {
-      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R2, RegState::Kill);
-      TmpReg = ARM::R2;
-    }
-    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R3, RegState::Kill);
-    if (Opcode == ARM::tSpill) {
-      if (FrameReg == ARM::SP)
-        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                 Offset, false, TII, *this, dl);
-      else {
-        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
-        UseRR = true;
-      }
-    } else
-      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                *this, dl);
-    MI.setDesc(TII.get(ARM::tSTR));
-    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-    if (UseRR)  // Use [reg, reg] addrmode.
-      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-    else // tSTR has an extra register operand.
-      MI.addOperand(MachineOperand::CreateReg(0, false));
-
-    MachineBasicBlock::iterator NII = next(II);
-    if (ValReg == ARM::R3)
-      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
-        .addReg(ARM::R12, RegState::Kill);
-    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-        .addReg(ARM::R12, RegState::Kill);
+      VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+      assert (Value && "Frame index virtual allocated, but Value arg is NULL!");
+      *Value = Offset;
+      bool UseRR = false;
+
+      if (Opcode == ARM::tSpill) {
+        if (FrameReg == ARM::SP)
+          emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg,
+                                   Offset, false, TII, *this, dl);
+        else {
+          emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+          UseRR = true;
+        }
+      } else
+        emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
+                                  *this, dl);
+      MI.setDesc(TII.get(ARM::tSTR));
+      MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+      if (UseRR)  // Use [reg, reg] addrmode.
+        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+      else // tSTR has an extra register operand.
+        MI.addOperand(MachineOperand::CreateReg(0, false));
   } else
     assert(false && "Unexpected opcode!");
+
+  // Add predicate back if it's needed.
+  if (MI.getDesc().isPredicable()) {
+    MachineInstrBuilder MIB(&MI);
+    AddDefaultPred(MIB);
+  }
+  return VReg;
 }
 
 void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
@@ -577,15 +671,6 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
   DebugLoc dl = (MBBI != MBB.end() ?
                  MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
 
-  // Check if R3 is live in. It might have to be used as a scratch register.
-  for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
-         E = MF.getRegInfo().livein_end(); I != E; ++I) {
-    if (I->first == ARM::R3) {
-      AFI->setR3IsLiveIn(true);
-      break;
-    }
-  }
-
   // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
   NumBytes = (NumBytes + 3) & ~3;
   MFI->setStackSize(NumBytes);
@@ -647,8 +732,7 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
   // Darwin ABI requires FP to point to the stack slot that contains the
   // previous FP.
   if (STI.isTargetDarwin() || hasFP(MF)) {
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0);
   }
 
@@ -729,7 +813,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
         emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
                                   TII, *this, dl);
       else
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
           .addReg(FramePtr);
     } else {
       if (MBBI->getOpcode() == ARM::tBX_RET &&
@@ -745,11 +829,14 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
   if (VARegSaveSize) {
     // Epilogue for vararg functions: pop LR to R3 and branch off it.
     // FIXME: Verify this is still ok when R3 is no longer being reserved.
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+      .addReg(0) // No write back.
+      .addReg(ARM::R3, RegState::Define);
 
     emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
 
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill);
     MBB.erase(MBBI);
   }
 }
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 6d4f1f0bf5e2..bb7a6199d10d 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -20,28 +20,28 @@
 
 namespace llvm {
   class ARMSubtarget;
-  class TargetInstrInfo;
+  class ARMBaseInstrInfo;
   class Type;
 
 struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
 public:
-  Thumb1RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+  Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
 
   /// emitLoadConstPool - Emits a load from constpool to materialize the
   /// specified immediate.
-  void emitLoadConstPool(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         unsigned DestReg, int Val,
-                         const TargetInstrInfo *TII,
-                         DebugLoc dl) const;
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI,
+                        DebugLoc dl,
+                        unsigned DestReg, unsigned SubIdx, int Val,
+                        ARMCC::CondCodes Pred = ARMCC::AL,
+                        unsigned PredReg = 0) const;
 
   /// Code Generation virtual methods...
   const TargetRegisterClass *
-    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
-
-  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+    getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const;
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
 
   bool hasReservedCallFrame(MachineFunction &MF) const;
 
@@ -49,8 +49,23 @@ public:
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
+  // could not be handled directly in MI.
+  int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                        unsigned FrameReg, int Offset,
+                        unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const;
+
+  bool saveScavengerRegister(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I,
+                             const TargetRegisterClass *RC,
+                             unsigned Reg) const;
+  void restoreScavengerRegister(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I,
+                                const TargetRegisterClass *RC,
+                                unsigned Reg) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 000000000000..98b5cbdfb98f
--- /dev/null
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,158 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumITs,     "Number of IT blocks inserted");
+
+namespace {
+  struct VISIBILITY_HIDDEN Thumb2ITBlockPass : public MachineFunctionPass {
+    static char ID;
+    Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
+
+    const Thumb2InstrInfo *TII;
+    ARMFunctionInfo *AFI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Thumb IT blocks insertion pass";
+    }
+
+  private:
+    MachineBasicBlock::iterator
+      SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                      MachineInstr *MI, DebugLoc dl,
+                      unsigned PredReg, ARMCC::CondCodes CC);
+    bool InsertITBlocks(MachineBasicBlock &MBB);
+  };
+  char Thumb2ITBlockPass::ID = 0;
+}
+
+static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+    return ARMCC::AL;
+  return llvm::getInstrPredicate(MI, PredReg);
+}
+
+MachineBasicBlock::iterator
+Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   MachineInstr *MI,
+                                   DebugLoc dl, unsigned PredReg,
+                                   ARMCC::CondCodes CC) {
+  // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
+  // The only reason it was a single instruction was so it could be
+  // re-materialized. We want to split it before this and the thumb2
+  // size reduction pass to make sure the IT mask is correct and expose
+  // width reduction opportunities. It doesn't make sense to do this in a 
+  // separate pass so here it is.
+  unsigned DstReg = MI->getOperand(0).getReg();
+  bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
+  unsigned Imm = MI->getOperand(1).getImm();
+  unsigned Lo16 = Imm & 0xffff;
+  unsigned Hi16 = (Imm >> 16) & 0xffff;
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
+    .addImm(Lo16).addImm(CC).addReg(PredReg);
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
+    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
+    .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
+  --MBBI;
+  --MBBI;
+  MI->eraseFromParent();
+  return MBBI;
+}
+
+bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineInstr *MI = &*MBBI;
+    DebugLoc dl = MI->getDebugLoc();
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = getPredicate(MI, PredReg);
+
+    if (MI->getOpcode() == ARM::t2MOVi32imm) {
+      MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC);
+      continue;
+    }
+
+    if (CC == ARMCC::AL) {
+      ++MBBI;
+      continue;
+    }
+
+    // Insert an IT instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+      .addImm(CC);
+    ++MBBI;
+
+    // Finalize IT mask.
+    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+    unsigned Mask = 0, Pos = 3;
+    while (MBBI != E && Pos) {
+      MachineInstr *NMI = &*MBBI;
+      DebugLoc ndl = NMI->getDebugLoc();
+      unsigned NPredReg = 0;
+      ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
+      if (NMI->getOpcode() == ARM::t2MOVi32imm) {
+        MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC);
+        continue;
+      }
+
+      if (NCC == OCC) {
+        Mask |= (1 << Pos);
+      } else if (NCC != CC)
+        break;
+      --Pos;
+      ++MBBI;
+    }
+    Mask |= (1 << Pos);
+    MIB.addImm(Mask);
+    Modified = true;
+    ++NumITs;
+  }
+
+  return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetMachine &TM = Fn.getTarget();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+
+  if (!AFI->isThumbFunction())
+    return false;
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= InsertITBlocks(MBB);
+  }
+
+  return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+  return new Thumb2ITBlockPass();
+}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 35d09fdac385..264601bf4143 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "ARMInstrInfo.h"
 #include "ARM.h"
+#include "ARMAddressingModes.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -22,127 +23,62 @@
 
 using namespace llvm;
 
-Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) {
 }
 
-bool Thumb2InstrInfo::isMoveInstr(const MachineInstr &MI,
-                                  unsigned &SrcReg, unsigned &DstReg,
-                                  unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
-  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
-  unsigned oc = MI.getOpcode();
-  switch (oc) {
-  default:
-    return false;
-  // FIXME: Thumb2
-  case ARM::tMOVr:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2hir:
-    assert(MI.getDesc().getNumOperands() >= 2 &&
-           MI.getOperand(0).isReg() &&
-           MI.getOperand(1).isReg() &&
-           "Invalid Thumb MOV instruction");
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  }
-}
-
-unsigned Thumb2InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                              int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  // FIXME: Thumb2
-  case ARM::tRestore:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  // FIXME
   return 0;
 }
 
-unsigned Thumb2InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  // FIXME: Thumb2
-  case ARM::tSpill:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
+bool
+Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+  if (MBB.empty()) return false;
+
+  switch (MBB.back().getOpcode()) {
+  case ARM::t2LDM_RET:
+  case ARM::t2B:        // Uncond branch.
+  case ARM::t2BR_JT:    // Jumptable branch.
+  case ARM::t2TBB:      // Table branch byte.
+  case ARM::t2TBH:      // Table branch halfword.
+  case ARM::tBR_JTr:    // Jumptable branch (16-bit version).
+  case ARM::tBX_RET:
+  case ARM::tBX_RET_vararg:
+  case ARM::tPOP_RET:
+  case ARM::tB:
+    return true;
+  default:
     break;
   }
-  return 0;
+
+  return false;
 }
 
-bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator I,
-                                   unsigned DestReg, unsigned SrcReg,
-                                   const TargetRegisterClass *DestRC,
-                                   const TargetRegisterClass *SrcRC) const {
+bool
+Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DestReg, unsigned SrcReg,
+                              const TargetRegisterClass *DestRC,
+                              const TargetRegisterClass *SrcRC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  // FIXME: Thumb2
-  if (DestRC == ARM::GPRRegisterClass) {
-    if (SrcRC == ARM::GPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
-      return true;
-    } else if (SrcRC == ARM::tGPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
-      return true;
-    }
-  } else if (DestRC == ARM::tGPRRegisterClass) {
-    if (SrcRC == ARM::GPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
-      return true;
-    } else if (SrcRC == ARM::tGPRRegisterClass) {
-      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool Thumb2InstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
-                     const SmallVectorImpl<unsigned> &Ops) const {
-  if (Ops.size() != 1) return false;
-
-  unsigned OpNum = Ops[0];
-  unsigned Opc = MI->getOpcode();
-  switch (Opc) {
-  default: break;
-  case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
-        // tSpill cannot take a high register operand.
-        return false;
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
-        // tRestore cannot target a high register operand.
-        return false;
-    }
+  if (DestRC == ARM::GPRRegisterClass &&
+      SrcRC == ARM::GPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
+    return true;
+  } else if (DestRC == ARM::GPRRegisterClass &&
+             SrcRC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
+    return true;
+  } else if (DestRC == ARM::tGPRRegisterClass &&
+             SrcRC == ARM::GPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
     return true;
-  }
   }
 
-  return false;
+  // Handle SPR, DPR, and QPR copies.
+  return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC);
 }
 
 void Thumb2InstrInfo::
@@ -152,36 +88,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
-
-  // FIXME: Thumb2
-  if (RC == ARM::tGPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tSpill))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI).addImm(0);
-  }
-}
-
-void Thumb2InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                     bool isKill,
-                                     SmallVectorImpl<MachineOperand> &Addr,
-                                     const TargetRegisterClass *RC,
-                                     SmallVectorImpl<MachineInstr*> &NewMIs) const{
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
-
-  // FIXME: Thumb2. Is GPRRegClass here correct?
-  assert(RC == ARM::GPRRegisterClass && "Unknown regclass!");
   if (RC == ARM::GPRRegisterClass) {
-    Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0));
+    return;
   }
 
-  MachineInstrBuilder MIB =
-    BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
+  ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC);
 }
 
 void Thumb2InstrInfo::
@@ -191,122 +105,381 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  // FIXME: Thumb2
-  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
-
-  if (RC == ARM::tGPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
-      .addFrameIndex(FI).addImm(0);
+  if (RC == ARM::GPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+                   .addFrameIndex(FI).addImm(0));
+    return;
   }
+
+  ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
 }
 
-void Thumb2InstrInfo::
-loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                SmallVectorImpl<MachineOperand> &Addr,
-                const TargetRegisterClass *RC,
-                SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
 
-  // FIXME: Thumb2. Is GPRRegClass ok here?
-  if (RC == ARM::GPRRegisterClass) {
-    Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg, int NumBytes,
+                               ARMCC::CondCodes Pred, unsigned PredReg,
+                               const ARMBaseInstrInfo &TII) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  // If profitable, use a movw or movt to materialize the offset.
+  // FIXME: Use the scavenger to grab a scratch register.
+  if (DestReg != ARM::SP && DestReg != BaseReg &&
+      NumBytes >= 4096 &&
+      ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+    bool Fits = false;
+    if (NumBytes < 65536) {
+      // Use a movw to materialize the 16-bit constant.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+        .addImm(NumBytes)
+        .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+      Fits = true;
+    } else if ((NumBytes & 0xffff) == 0) {
+      // Use a movt to materialize the 32-bit constant.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+        .addReg(DestReg)
+        .addImm(NumBytes >> 16)
+        .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+      Fits = true;
+    }
+
+    if (Fits) {
+      if (isSub) {
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+          .addReg(BaseReg, RegState::Kill)
+          .addReg(DestReg, RegState::Kill)
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+          .addReg(DestReg, RegState::Kill)
+          .addReg(BaseReg, RegState::Kill)
+        .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+      }
+      return;
+    }
   }
 
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
+  while (NumBytes) {
+    unsigned ThisVal = NumBytes;
+    unsigned Opc = 0;
+    if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+      // mov sp, rn. Note t2MOVr cannot be used.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
+      BaseReg = ARM::SP;
+      continue;
+    }
 
-bool Thumb2InstrInfo::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          const std::vector<CalleeSavedInfo> &CSI) const {
-  if (CSI.empty())
-    return false;
+    if (BaseReg == ARM::SP) {
+      // sub sp, sp, #imm7
+      if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+        assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+        Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+        // FIXME: Fix Thumb1 immediate encoding.
+        BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+          .addReg(BaseReg).addImm(ThisVal/4);
+        NumBytes = 0;
+        continue;
+      }
+
+      // sub rd, sp, so_imm
+      Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+        NumBytes = 0;
+      } else {
+        // FIXME: Move this to ARMAddressingModes.h?
+        unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+        NumBytes &= ~ThisVal;
+        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+               "Bit extraction didn't work?");
+      }
+    } else {
+      assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+        NumBytes = 0;
+      } else if (ThisVal < 4096) {
+        Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+        NumBytes = 0;
+      } else {
+        // FIXME: Move this to ARMAddressingModes.h?
+        unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+        NumBytes &= ~ThisVal;
+        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+               "Bit extraction didn't work?");
+      }
+    }
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    MIB.addReg(Reg, RegState::Kill);
+    // Build the new ADD / SUB.
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+                                .addReg(BaseReg, RegState::Kill)
+                                .addImm(ThisVal)));
+
+    BaseReg = DestReg;
   }
-  return true;
 }
 
-bool Thumb2InstrInfo::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI,
-                            const std::vector<CalleeSavedInfo> &CSI) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (CSI.empty())
-    return false;
-
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
-  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    if (Reg == ARM::LR) {
-      // Special epilogue for vararg functions. See emitEpilogue
-      if (isVarArg)
-        continue;
-      Reg = ARM::PC;
-      PopMI->setDesc(get(ARM::tPOP_RET));
-      MI = MBB.erase(MI);
-    }
-    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi12:   return ARM::t2LDRi8;
+  case ARM::t2LDRHi12:  return ARM::t2LDRHi8;
+  case ARM::t2LDRBi12:  return ARM::t2LDRBi8;
+  case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+  case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+  case ARM::t2STRi12:   return ARM::t2STRi8;
+  case ARM::t2STRBi12:  return ARM::t2STRBi8;
+  case ARM::t2STRHi12:  return ARM::t2STRHi8;
+
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
   }
 
-  // It's illegal to emit pop instruction without operands.
-  if (PopMI->getNumOperands() > 0)
-    MBB.insert(MI, PopMI);
+  return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi8:   return ARM::t2LDRi12;
+  case ARM::t2LDRHi8:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBi8:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+  case ARM::t2STRi8:   return ARM::t2STRi12;
+  case ARM::t2STRBi8:  return ARM::t2STRBi12;
+  case ARM::t2STRHi8:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+    return opcode;
 
-  return true;
+  default:
+    break;
+  }
+
+  return 0;
 }
 
-MachineInstr *Thumb2InstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
-  if (Ops.size() != 1) return NULL;
-
-  unsigned OpNum = Ops[0];
-  unsigned Opc = MI->getOpcode();
-  MachineInstr *NewMI = NULL;
-  switch (Opc) {
-  default: break;
-  case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
-        // tSpill cannot take a high register operand.
-        break;
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
-        .addReg(SrcReg, getKillRegState(isKill))
-        .addFrameIndex(FI).addImm(0);
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
-        // tRestore cannot target a high register operand.
-        break;
-      bool isDead = MI->getOperand(0).isDead();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
-        .addFrameIndex(FI).addImm(0);
-    }
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRs:   return ARM::t2LDRi12;
+  case ARM::t2LDRHs:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBs:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+  case ARM::t2STRs:   return ARM::t2STRi12;
+  case ARM::t2STRBs:  return ARM::t2STRBi12;
+  case ARM::t2STRHs:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
     break;
   }
+
+  return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                               unsigned FrameReg, int &Offset,
+                               const ARMBaseInstrInfo &TII) {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrModeT2_i12.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+  if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+    bool isSP = FrameReg == ARM::SP;
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVgpr2gpr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(FrameRegIdx+1);
+      Offset = 0;
+      return true;
+    }
+
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+    } else {
+      MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      Offset = 0;
+      return true;
+    }
+    // Another common case: imm12.
+    if (Offset < 4096) {
+      unsigned NewOpc = isSP
+        ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
+        : (isSub ? ARM::t2SUBri12   : ARM::t2ADDri12);
+      MI.setDesc(TII.get(NewOpc));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, extract 8 adjacent bits from the immediate into this
+    // t2ADDri/t2SUBri.
+    unsigned RotAmt = CountLeadingZeros_32(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+  } else {
+
+    // AddrMode4 cannot handle any offset.
+    if (AddrMode == ARMII::AddrMode4)
+      return false;
+
+    // AddrModeT2_so cannot handle any offset. If there is no offset
+    // register then we change to an immediate version.
+    unsigned NewOpc = Opcode;
+    if (AddrMode == ARMII::AddrModeT2_so) {
+      unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+      if (OffsetReg != 0) {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        return Offset == 0;
+      }
+
+      MI.RemoveOperand(FrameRegIdx+1);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+      NewOpc = immediateOffsetOpcode(Opcode);
+      AddrMode = ARMII::AddrModeT2_i12;
+    }
+
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+      // i8 supports only negative, and i12 supports only positive, so
+      // based on Offset sign convert Opcode to the appropriate
+      // instruction
+      Offset += MI.getOperand(FrameRegIdx+1).getImm();
+      if (Offset < 0) {
+        NewOpc = negativeOffsetOpcode(Opcode);
+        NumBits = 8;
+        isSub = true;
+        Offset = -Offset;
+      } else {
+        NewOpc = positiveOffsetOpcode(Opcode);
+        NumBits = 12;
+      }
+    } else {
+      // VFP and NEON address modes.
+      int InstrOffs = 0;
+      if (AddrMode == ARMII::AddrMode5) {
+        const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+        InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+        if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+          InstrOffs *= -1;
+      }
+      NumBits = 8;
+      Scale = 4;
+      Offset += InstrOffs * 4;
+      assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+      if (Offset < 0) {
+        Offset = -Offset;
+        isSub = true;
+      }
+    }
+
+    if (NewOpc != Opcode)
+      MI.setDesc(TII.get(NewOpc));
+
+    MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+    // Attempt to fold address computation
+    // Common case: small offset, fits into instruction.
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with fp/sp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      if (isSub) {
+        if (AddrMode == ARMII::AddrMode5)
+          // FIXME: Not consistent.
+          ImmedOffset |= 1 << NumBits;
+        else
+          ImmedOffset = -ImmedOffset;
+      }
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, offset doesn't fit. Pull in what we can to simplify
+    ImmedOffset = ImmedOffset & Mask;
+    if (isSub) {
+      if (AddrMode == ARMII::AddrMode5)
+        // FIXME: Not consistent.
+        ImmedOffset |= 1 << NumBits;
+      else {
+        ImmedOffset = -ImmedOffset;
+        if (ImmedOffset == 0)
+          // Change the opcode back if the encoded offset is zero.
+          MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+      }
+    }
+    ImmOp.ChangeToImmediate(ImmedOffset);
+    Offset &= ~(Mask*Scale);
   }
 
-  return NewMI;
+  Offset = (isSub) ? -Offset : Offset;
+  return Offset == 0;
 }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 84dcb49a6eed..f3688c0084ae 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -27,66 +27,34 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
 public:
   explicit Thumb2InstrInfo(const ARMSubtarget &STI);
 
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
-
-  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   const std::vector<CalleeSavedInfo> &CSI) const;
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
 
-  bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-  unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
+  // Return true if the block does not fall through.
+  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
 
   bool copyRegToReg(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator I,
-                            unsigned DestReg, unsigned SrcReg,
-                            const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
+                    MachineBasicBlock::iterator I,
+                    unsigned DestReg, unsigned SrcReg,
+                    const TargetRegisterClass *DestRC,
+                    const TargetRegisterClass *SrcRC) const;
 
-  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC) const;
 
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC) const;
 
-  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-  bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      int FrameIndex) const;
-
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr* LoadMI) const {
-    return 0;
-  }
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
 };
 }
 
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 0f0c0e41fc5a..6c4c15dfe354 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -13,12 +13,15 @@
 
 #include "ARM.h"
 #include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "Thumb2InstrInfo.h"
 #include "Thumb2RegisterInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,14 +33,10 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-static cl::opt<bool>
-Thumb2RegScavenging("enable-thumb2-reg-scavenging",
-                    cl::Hidden,
-                    cl::desc("Enable register scavenging on Thumb-2"));
-
-Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii,
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
                                        const ARMSubtarget &sti)
   : ARMBaseRegisterInfo(tii, sti) {
 }
@@ -46,710 +45,23 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii,
 /// specified immediate.
 void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator &MBBI,
-                                           unsigned DestReg, int Val,
-                                           const TargetInstrInfo *TII,
-                                           DebugLoc dl) const {
+                                           DebugLoc dl,
+                                           unsigned DestReg, unsigned SubIdx,
+                                           int Val,
+                                           ARMCC::CondCodes Pred,
+                                           unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  Constant *C = ConstantInt::get(
+           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
-  BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg)
-    .addConstantPoolIndex(Idx);
-}
-
-const TargetRegisterClass*
-Thumb2RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
-  if (isARMLowRegister(Reg))
-    return ARM::tGPRRegisterClass;
-  switch (Reg) {
-   default:
-    break;
-   case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
-   case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
-    return ARM::GPRRegisterClass;
-  }
-
-  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
-}
-
-bool
-Thumb2RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  return Thumb2RegScavenging;
-}
-
-bool Thumb2RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  unsigned CFSize = FFI->getMaxCallFrameSize();
-  // It's not always a good idea to include the call frame as part of the
-  // stack frame. ARM (especially Thumb) has small immediate offset to
-  // address the stack frame. So a large call frame can cause poor codegen
-  // and may even makes it impossible to scavenge a register.
-  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
-    return false;
-
-  return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
-/// in a register using mov / mvn sequences or load the immediate from a
-/// constpool entry.
-static
-void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, unsigned BaseReg,
-                              int NumBytes, bool CanChangeCC,
-                              const TargetInstrInfo &TII,
-                              const Thumb2RegisterInfo& MRI,
-                              DebugLoc dl) {
-    bool isHigh = !isARMLowRegister(DestReg) ||
-                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
-    bool isSub = false;
-    // Subtract doesn't have high register version. Load the negative value
-    // if either base or dest register is a high register. Also, if do not
-    // issue sub as part of the sequence if condition register is to be
-    // preserved.
-    if (NumBytes < 0 && !isHigh && CanChangeCC) {
-      isSub = true;
-      NumBytes = -NumBytes;
-    }
-    unsigned LdReg = DestReg;
-    if (DestReg == ARM::SP) {
-      assert(BaseReg == ARM::SP && "Unexpected!");
-      LdReg = ARM::R3;
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R3, RegState::Kill);
-    }
-
-    if (NumBytes <= 255 && NumBytes >= 0)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-    else if (NumBytes < 0 && NumBytes >= -255) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
-        .addReg(LdReg, RegState::Kill);
-    } else
-      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl);
-
-    // Emit add / sub.
-    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
-    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
-                                            TII.get(Opc), DestReg);
-    if (DestReg == ARM::SP || isSub)
-      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
-    else
-      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
-    if (DestReg == ARM::SP)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-        .addReg(ARM::R12, RegState::Kill);
-}
-
-/// calcNumMI - Returns the number of instructions required to materialize
-/// the specific add / sub r, c instruction.
-static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
-                          unsigned NumBits, unsigned Scale) {
-  unsigned NumMIs = 0;
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-
-  if (Opc == ARM::tADDrSPi) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    NumMIs++;
-    NumBits = 8;
-    Scale = 1;  // Followed by a number of tADDi8.
-    Chunk = ((1 << NumBits) - 1) * Scale;
-  }
-
-  NumMIs += Bytes / Chunk;
-  if ((Bytes % Chunk) != 0)
-    NumMIs++;
-  if (ExtraOpc)
-    NumMIs++;
-  return NumMIs;
-}
-
-/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code.
-static
-void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator &MBBI,
-                               unsigned DestReg, unsigned BaseReg,
-                               int NumBytes, const TargetInstrInfo &TII,
-                               const Thumb2RegisterInfo& MRI,
-                               DebugLoc dl) {
-  bool isSub = NumBytes < 0;
-  unsigned Bytes = (unsigned)NumBytes;
-  if (isSub) Bytes = -NumBytes;
-  bool isMul4 = (Bytes & 3) == 0;
-  bool isTwoAddr = false;
-  bool DstNotEqBase = false;
-  unsigned NumBits = 1;
-  unsigned Scale = 1;
-  int Opc = 0;
-  int ExtraOpc = 0;
-
-  if (DestReg == BaseReg && BaseReg == ARM::SP) {
-    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
-    NumBits = 7;
-    Scale = 4;
-    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
-    isTwoAddr = true;
-  } else if (!isSub && BaseReg == ARM::SP) {
-    // r1 = add sp, 403
-    // =>
-    // r1 = add sp, 100 * 4
-    // r1 = add r1, 3
-    if (!isMul4) {
-      Bytes &= ~3;
-      ExtraOpc = ARM::tADDi3;
-    }
-    NumBits = 8;
-    Scale = 4;
-    Opc = ARM::tADDrSPi;
-  } else {
-    // sp = sub sp, c
-    // r1 = sub sp, c
-    // r8 = sub sp, c
-    if (DestReg != BaseReg)
-      DstNotEqBase = true;
-    NumBits = 8;
-    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-    isTwoAddr = true;
-  }
-
-  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
-  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
-  if (NumMIs > Threshold) {
-    // This will expand into too many instructions. Load the immediate from a
-    // constpool entry.
-    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
-                             MRI, dl);
-    return;
-  }
-
-  if (DstNotEqBase) {
-    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
-      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
-      unsigned Chunk = (1 << 3) - 1;
-      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-      Bytes -= ThisVal;
-      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
-        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
-    } else {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill);
-    }
-    BaseReg = DestReg;
-  }
-
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-  while (Bytes) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    ThisVal /= Scale;
-    // Build the new tADD / tSUB.
-    if (isTwoAddr)
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(DestReg).addImm(ThisVal);
-    else {
-      bool isKill = BaseReg != ARM::SP;
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
-      BaseReg = DestReg;
-
-      if (Opc == ARM::tADDrSPi) {
-        // r4 = add sp, imm
-        // r4 = add r4, imm
-        // ...
-        NumBits = 8;
-        Scale = 1;
-        Chunk = ((1 << NumBits) - 1) * Scale;
-        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-        isTwoAddr = true;
-      }
-    }
-  }
-
-  if (ExtraOpc)
-    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
-      .addReg(DestReg, RegState::Kill)
-      .addImm(((unsigned)NumBytes) & 3);
-}
-
-static void emitSPUpdate(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         const TargetInstrInfo &TII, DebugLoc dl,
-                         const Thumb2RegisterInfo &MRI,
-                         int NumBytes) {
-  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
-                            MRI, dl);
-}
-
-void Thumb2RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
-    // If we have alloca, convert as follows:
-    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
-    // ADJCALLSTACKUP   -> add, sp, sp, amount
-    MachineInstr *Old = I;
-    DebugLoc dl = Old->getDebugLoc();
-    unsigned Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      // Replace the pseudo instruction with a new instruction...
-      unsigned Opc = Old->getOpcode();
-      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
-        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
-      } else {
-        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
-      }
-    }
-  }
-  MBB.erase(I);
-}
-
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, int Imm,
-                              const TargetInstrInfo &TII,
-                              const Thumb2RegisterInfo& MRI,
-                              DebugLoc dl) {
-  bool isSub = Imm < 0;
-  if (isSub) Imm = -Imm;
-
-  int Chunk = (1 << 8) - 1;
-  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
-  Imm -= ThisVal;
-  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
-  if (Imm > 0)
-    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
-  if (isSub)
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
-      .addReg(DestReg, RegState::Kill);
-}
-
-void Thumb2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                             int SPAdj, RegScavenger *RS) const{
-  unsigned i = 0;
-  MachineInstr &MI = *II;
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc dl = MI.getDebugLoc();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  unsigned FrameReg = ARM::SP;
-  int FrameIndex = MI.getOperand(i).getIndex();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MF.getFrameInfo()->getStackSize() + SPAdj;
-
-  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea1Offset();
-  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea2Offset();
-  else if (hasFP(MF)) {
-    assert(SPAdj == 0 && "Unexpected");
-    // There is alloca()'s in this function, must reference off the frame
-    // pointer instead.
-    FrameReg = getFrameRegister(MF);
-    Offset -= AFI->getFramePtrSpillOffset();
-  }
-
-  unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
-  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-
-  if (Opcode == ARM::tADDrSPi) {
-    Offset += MI.getOperand(i+1).getImm();
-
-    // Can't use tADDrSPi if it's based off the frame pointer.
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    if (FrameReg != ARM::SP) {
-      Opcode = ARM::tADDi3;
-      MI.setDesc(TII.get(ARM::tADDi3));
-      NumBits = 3;
-    } else {
-      NumBits = 8;
-      Scale = 4;
-      assert((Offset & 3) == 0 &&
-             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
-    }
-
-    if (Offset == 0) {
-      // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVhir2lor));
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.RemoveOperand(i+1);
-      return;
-    }
-
-    // Common case: small offset, fits into instruction.
-    unsigned Mask = (1 << NumBits) - 1;
-    if (((Offset / Scale) & ~Mask) == 0) {
-      // Replace the FrameIndex with sp / fp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
-      return;
-    }
-
-    unsigned DestReg = MI.getOperand(0).getReg();
-    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
-    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
-    // MI would expand into a large number of instructions. Don't try to
-    // simplify the immediate.
-    if (NumMIs > 2) {
-      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
-                                *this, dl);
-      MBB.erase(II);
-      return;
-    }
-
-    if (Offset > 0) {
-      // Translate r0 = add sp, imm to
-      // r0 = add sp, 255*4
-      // r0 = add r0, (imm - 255*4)
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Mask);
-      Offset = (Offset - Mask * Scale);
-      MachineBasicBlock::iterator NII = next(II);
-      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
-                                *this, dl);
-    } else {
-      // Translate r0 = add sp, -imm to
-      // r0 = -imm (this is then translated into a series of instructons)
-      // r0 = add r0, sp
-      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-      MI.setDesc(TII.get(ARM::tADDhirr));
-      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
-      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
-    }
-    return;
-  } else {
-    unsigned ImmIdx = 0;
-    int InstrOffs = 0;
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    switch (AddrMode) {
-    case ARMII::AddrModeT1_s: {
-      ImmIdx = i+1;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
-      Scale = 4;
-      break;
-    }
-    default:
-      assert(0 && "Unsupported addressing mode!");
-      abort();
-      break;
-    }
-
-    Offset += InstrOffs * Scale;
-    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
-
-    // Common case: small offset, fits into instruction.
-    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
-    int ImmedOffset = Offset / Scale;
-    unsigned Mask = (1 << NumBits) - 1;
-    if ((unsigned)Offset <= Mask * Scale) {
-      // Replace the FrameIndex with sp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      ImmOp.ChangeToImmediate(ImmedOffset);
-      return;
-    }
-
-    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
-    if (AddrMode == ARMII::AddrModeT1_s) {
-      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
-      // a different base register.
-      NumBits = 5;
-      Mask = (1 << NumBits) - 1;
-    }
-    // If this is a thumb spill / restore, we will be using a constpool load to
-    // materialize the offset.
-    if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
-      ImmOp.ChangeToImmediate(0);
-    else {
-      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
-      ImmedOffset = ImmedOffset & Mask;
-      ImmOp.ChangeToImmediate(ImmedOffset);
-      Offset &= ~(Mask*Scale);
-    }
-  }
-
-  // If we get here, the immediate doesn't fit into the instruction.  We folded
-  // as much as possible above, handle the rest, providing a register that is
-  // SP+LargeImm.
-  assert(Offset && "This code isn't needed if offset already handled!");
-
-  if (Desc.mayLoad()) {
-    // Use the destination register to materialize sp + offset.
-    unsigned TmpReg = MI.getOperand(0).getReg();
-    bool UseRR = false;
-    if (Opcode == ARM::tRestore) {
-      if (FrameReg == ARM::SP)
-        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                 Offset, false, TII, *this, dl);
-      else {
-        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
-        UseRR = true;
-      }
-    } else
-      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                *this, dl);
-    MI.setDesc(TII.get(ARM::tLDR));
-    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-    if (UseRR)
-      // Use [reg, reg] addrmode.
-      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-    else  // tLDR has an extra register operand.
-      MI.addOperand(MachineOperand::CreateReg(0, false));
-  } else if (Desc.mayStore()) {
-    // FIXME! This is horrific!!! We need register scavenging.
-    // Our temporary workaround has marked r3 unavailable. Of course, r3 is
-    // also a ABI register so it's possible that is is the register that is
-    // being storing here. If that's the case, we do the following:
-    // r12 = r2
-    // Use r2 to materialize sp + offset
-    // str r3, r2
-    // r2 = r12
-    unsigned ValReg = MI.getOperand(0).getReg();
-    unsigned TmpReg = ARM::R3;
-    bool UseRR = false;
-    if (ValReg == ARM::R3) {
-      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R2, RegState::Kill);
-      TmpReg = ARM::R2;
-    }
-    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R3, RegState::Kill);
-    if (Opcode == ARM::tSpill) {
-      if (FrameReg == ARM::SP)
-        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                 Offset, false, TII, *this, dl);
-      else {
-        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
-        UseRR = true;
-      }
-    } else
-      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                *this, dl);
-    MI.setDesc(TII.get(ARM::tSTR));
-    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-    if (UseRR)  // Use [reg, reg] addrmode.
-      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-    else // tSTR has an extra register operand.
-      MI.addOperand(MachineOperand::CreateReg(0, false));
-
-    MachineBasicBlock::iterator NII = next(II);
-    if (ValReg == ARM::R3)
-      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
-        .addReg(ARM::R12, RegState::Kill);
-    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-        .addReg(ARM::R12, RegState::Kill);
-  } else
-    assert(false && "Unexpected opcode!");
-}
-
-void Thumb2RegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo  *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  unsigned NumBytes = MFI->getStackSize();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
-
-  // Check if R3 is live in. It might have to be used as a scratch register.
-  for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
-         E = MF.getRegInfo().livein_end(); I != E; ++I) {
-    if (I->first == ARM::R3) {
-      AFI->setR3IsLiveIn(true);
-      break;
-    }
-  }
-
-  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
-  NumBytes = (NumBytes + 3) & ~3;
-  MFI->setStackSize(NumBytes);
-
-  // Determine the sizes of each callee-save spill areas and record which frame
-  // belongs to which callee-save spill areas.
-  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
-  int FramePtrSpillFI = 0;
-
-  if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
-    return;
-  }
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    int FI = CSI[i].getFrameIdx();
-    switch (Reg) {
-    case ARM::R4:
-    case ARM::R5:
-    case ARM::R6:
-    case ARM::R7:
-    case ARM::LR:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      AFI->addGPRCalleeSavedArea1Frame(FI);
-      GPRCS1Size += 4;
-      break;
-    case ARM::R8:
-    case ARM::R9:
-    case ARM::R10:
-    case ARM::R11:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
-        AFI->addGPRCalleeSavedArea2Frame(FI);
-        GPRCS2Size += 4;
-      } else {
-        AFI->addGPRCalleeSavedArea1Frame(FI);
-        GPRCS1Size += 4;
-      }
-      break;
-    default:
-      AFI->addDPRCalleeSavedAreaFrame(FI);
-      DPRCSSize += 8;
-    }
-  }
-
-  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
-    ++MBBI;
-    if (MBBI != MBB.end())
-      dl = MBBI->getDebugLoc();
-  }
-
-  // Darwin ABI requires FP to point to the stack slot that contains the
-  // previous FP.
-  if (STI.isTargetDarwin() || hasFP(MF)) {
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
-  }
-
-  // Determine starting offsets of spill areas.
-  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
-  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
-  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
-  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
-  NumBytes = DPRCSOffset;
-  if (NumBytes) {
-    // Insert it after all the callee-save spills.
-    emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
-  }
-
-  if (STI.isTargetELF() && hasFP(MF)) {
-    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
-                             AFI->getFramePtrSpillOffset());
-  }
-
-  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
-  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
-  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0);
 }
 
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  return (MI->getOpcode() == ARM::tRestore &&
-          MI->getOperand(1).isFI() &&
-          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
-}
-
-void Thumb2RegisterInfo::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert((MBBI->getOpcode() == ARM::tBX_RET ||
-          MBBI->getOpcode() == ARM::tPOP_RET) &&
-         "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  int NumBytes = (int)MFI->getStackSize();
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
-  } else {
-    // Unwind MBBI to point to first LDR / FLDD.
-    const unsigned *CSRegs = getCalleeSavedRegs();
-    if (MBBI != MBB.begin()) {
-      do
-        --MBBI;
-      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
-      if (!isCSRestore(MBBI, CSRegs))
-        ++MBBI;
-    }
-
-    // Move SP to start of FP callee save spill area.
-    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
-                 AFI->getGPRCalleeSavedArea2Size() +
-                 AFI->getDPRCalleeSavedAreaSize());
-
-    if (hasFP(MF)) {
-      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-      // Reset SP based on frame pointer only if the stack frame extends beyond
-      // frame pointer stack slot or target is ELF and the function has FP.
-      if (NumBytes)
-        emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
-                                  TII, *this, dl);
-      else
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
-          .addReg(FramePtr);
-    } else {
-      if (MBBI->getOpcode() == ARM::tBX_RET &&
-          &MBB.front() != MBBI &&
-          prior(MBBI)->getOpcode() == ARM::tPOP) {
-        MachineBasicBlock::iterator PMBBI = prior(MBBI);
-        emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
-      } else
-        emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
-    }
-  }
-
-  if (VARegSaveSize) {
-    // Epilogue for vararg functions: pop LR to R3 and branch off it.
-    // FIXME: Verify this is still ok when R3 is no longer being reserved.
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
-
-    emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
-
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
-    MBB.erase(MBBI);
-  }
+bool Thumb2RegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
 }
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index d379c3150833..a63c60b73b80 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -20,40 +20,23 @@
 
 namespace llvm {
   class ARMSubtarget;
-  class TargetInstrInfo;
+  class ARMBaseInstrInfo;
   class Type;
 
 struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
 public:
-  Thumb2RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+  Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
 
   /// emitLoadConstPool - Emits a load from constpool to materialize the
   /// specified immediate.
   void emitLoadConstPool(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator &MBBI,
-                         unsigned DestReg, int Val,
-                         const TargetInstrInfo *TII,
-                         DebugLoc dl) const;
-
-  /// Code Generation virtual methods...
-  const TargetRegisterClass *
-    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
-
-  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+                         DebugLoc dl,
+                         unsigned DestReg, unsigned SubIdx, int Val,
+                         ARMCC::CondCodes Pred = ARMCC::AL,
+                         unsigned PredReg = 0) const;
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
-  bool hasReservedCallFrame(MachineFunction &MF) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 };
 }
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 000000000000..b8879d2ed1fd
--- /dev/null
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,685 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+                                cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+                                     cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+                                     cl::init(-1), cl::Hidden);
+
+namespace {
+  /// ReduceTable - A static table with information on mapping from wide
+  /// opcodes to narrow
+  struct ReduceEntry {
+    unsigned WideOpc;      // Wide opcode
+    unsigned NarrowOpc1;   // Narrow opcode to transform to
+    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
+    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
+    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
+    unsigned LowRegs1 : 1; // Only possible if low-registers are used
+    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
+                           // 1 - No cc field.
+                           // 2 - Always set CPSR.
+    unsigned PredCC2  : 2;
+    unsigned Special  : 1; // Needs to be dealt with specially
+  };
+
+  static const ReduceEntry ReduceTable[] = {
+    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
+    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
+    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
+    // Note: immediate scale is 4.
+    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 0 },
+    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 1 },
+    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 1 },
+    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
+    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  2,0, 0 },
+    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0 },
+    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0 },
+    { ARM::t2CMPzri,ARM::tCMPzi8, 0,             8,   0,    1,   0,  2,0, 0 },
+    { ARM::t2CMPzrr,ARM::tCMPzhir,0,             0,   0,    0,   0,  2,0, 0 },
+    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
+    // FIXME: adr.n immediate offset must be multiple of 4.
+    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,     0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
+    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
+    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
+    // FIXME: Do we need the 16-bit 'S' variant?
+    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
+    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
+    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   0,  0,1, 0 },
+    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
+    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 1 },
+    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
+    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
+    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0 },
+    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0 },
+    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0 },
+    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 },
+
+    // FIXME: Clean this up after splitting each Thumb load / store opcode
+    // into multiple ones.
+    { ARM::t2LDRi12,ARM::tLDR,    0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRs,  ARM::tLDR,    0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRBi12,ARM::tLDRB,  0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRBs, ARM::tLDRB,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRHi12,ARM::tLDRH,  0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRHs, ARM::tLDRH,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRi12,ARM::tSTR,    0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRs,  ARM::tSTR,    0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRBi12,ARM::tSTRB,  0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRBs, ARM::tSTRB,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRHi12,ARM::tSTRH,  0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRHs, ARM::tSTRH,   0,             0,   0,    1,   0,  0,0, 1 },
+
+    { ARM::t2LDM_RET,0,           ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDM,   ARM::tLDM,    ARM::tPOP,     0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2STM,   ARM::tSTM,    ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
+  };
+
+  class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
+  public:
+    static char ID;
+    Thumb2SizeReduce();
+
+    const Thumb2InstrInfo *TII;
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "Thumb2 instruction size reduction pass";
+    }
+
+  private:
+    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+                         bool is2Addr, ARMCC::CondCodes Pred,
+                         bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+                         const ReduceEntry &Entry);
+
+    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+                       const ReduceEntry &Entry, bool LiveCPSR);
+
+    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+    /// instruction.
+    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+                       const ReduceEntry &Entry,
+                       bool LiveCPSR);
+
+    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+    /// non-two-address instruction.
+    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+                        const ReduceEntry &Entry,
+                        bool LiveCPSR);
+
+    /// ReduceMBB - Reduce width of instructions in the specified basic block.
+    bool ReduceMBB(MachineBasicBlock &MBB);
+  };
+  char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
+  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+    unsigned FromOpc = ReduceTable[i].WideOpc;
+    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+      assert(false && "Duplicated entries?");
+  }
+}
+
+static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
+  for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs)
+    if (*Regs == ARM::CPSR)
+      return true;
+  return false;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+                                  bool is2Addr, ARMCC::CondCodes Pred,
+                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
+  if ((is2Addr  && Entry.PredCC2 == 0) ||
+      (!is2Addr && Entry.PredCC1 == 0)) {
+    if (Pred == ARMCC::AL) {
+      // Not predicated, must set CPSR.
+      if (!HasCC) {
+        // Original instruction was not setting CPSR, but CPSR is not
+        // currently live anyway. It's ok to set it. The CPSR def is
+        // dead though.
+        if (!LiveCPSR) {
+          HasCC = true;
+          CCDead = true;
+          return true;
+        }
+        return false;
+      }
+    } else {
+      // Predicated, must not set CPSR.
+      if (HasCC)
+        return false;
+    }
+  } else if ((is2Addr  && Entry.PredCC2 == 2) ||
+             (!is2Addr && Entry.PredCC1 == 2)) {
+    /// Old opcode has an optional def of CPSR.
+    if (HasCC)
+      return true;
+    // If both old opcode does not implicit CPSR def, then it's not ok since
+    // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP.
+    if (!HasImplicitCPSRDef(MI->getDesc()))
+      return false;
+    HasCC = true;
+  } else {
+    // 16-bit instruction does not set CPSR.
+    if (HasCC)
+      return false;
+  }
+
+  return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM);
+  bool isLROk = (Opc == ARM::t2STM);
+  bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == ARM::CPSR)
+      continue;
+    if (isPCOk && Reg == ARM::PC)
+      continue;
+    if (isLROk && Reg == ARM::LR)
+      continue;
+    if (isSPOk && Reg == ARM::SP)
+      continue;
+    if (!isARMLowRegister(Reg))
+      return false;
+  }
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+                                  const ReduceEntry &Entry) {
+  if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+    return false;
+
+  unsigned Scale = 1;
+  bool HasImmOffset = false;
+  bool HasShift = false;
+  bool isLdStMul = false;
+  unsigned Opc = Entry.NarrowOpc1;
+  unsigned OpNum = 3; // First 'rest' of operands.
+  switch (Entry.WideOpc) {
+  default:
+    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+  case ARM::t2LDRi12:
+  case ARM::t2STRi12:
+    Scale = 4;
+    HasImmOffset = true;
+    break;
+  case ARM::t2LDRBi12:
+  case ARM::t2STRBi12:
+    HasImmOffset = true;
+    break;
+  case ARM::t2LDRHi12:
+  case ARM::t2STRHi12:
+    Scale = 2;
+    HasImmOffset = true;
+    break;
+  case ARM::t2LDRs:
+  case ARM::t2LDRBs:
+  case ARM::t2LDRHs:
+  case ARM::t2LDRSBs:
+  case ARM::t2LDRSHs:
+  case ARM::t2STRs:
+  case ARM::t2STRBs:
+  case ARM::t2STRHs:
+    HasShift = true;
+    OpNum = 4;
+    break;
+  case ARM::t2LDM_RET:
+  case ARM::t2LDM:
+  case ARM::t2STM: {
+    OpNum = 0;
+    unsigned BaseReg = MI->getOperand(0).getReg();
+    unsigned Mode = MI->getOperand(1).getImm();
+    if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) {
+      Opc = Entry.NarrowOpc2;
+      OpNum = 2;
+    } else if (Entry.WideOpc == ARM::t2LDM_RET ||
+               !isARMLowRegister(BaseReg) ||
+               !ARM_AM::getAM4WBFlag(Mode) ||
+               ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) {
+      return false;
+    }
+    isLdStMul = true;
+    break;
+  }
+  }
+
+  unsigned OffsetReg = 0;
+  bool OffsetKill = false;
+  if (HasShift) {
+    OffsetReg  = MI->getOperand(2).getReg();
+    OffsetKill = MI->getOperand(2).isKill();
+    if (MI->getOperand(3).getImm())
+      // Thumb1 addressing mode doesn't support shift.
+      return false;
+  }
+
+  unsigned OffsetImm = 0;
+  if (HasImmOffset) {
+    OffsetImm = MI->getOperand(2).getImm();
+    unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale;
+    if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
+      // Make sure the immediate field fits.
+      return false;
+  }
+
+  // Add the 16-bit load / store instruction.
+  // FIXME: Thumb1 addressing mode encode both immediate and register offset.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+  if (!isLdStMul) {
+    MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
+    if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
+      // tLDRSB and tLDRSH do not have an immediate offset field. On the other
+      // hand, it must have an offset register.
+      // FIXME: Remove this special case.
+      MIB.addImm(OffsetImm/Scale);
+    }
+    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+    MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+  }
+
+  // Transfer the rest of operands.
+  for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+    MIB.addOperand(MI->getOperand(OpNum));
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++NumLdSts;
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+                                const ReduceEntry &Entry,
+                                bool LiveCPSR) {
+  if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+    return false;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (TID.mayLoad() || TID.mayStore())
+    return ReduceLoadStore(MBB, MI, Entry);
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::t2ADDSri: 
+  case ARM::t2ADDSrr: {
+    unsigned PredReg = 0;
+    if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+      switch (Opc) {
+      default: break;
+      case ARM::t2ADDSri: {
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
+          return true;
+        // fallthrough
+      }
+      case ARM::t2ADDSrr:
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+      }
+    }
+    break;
+  }
+  case ARM::t2RSBri:
+  case ARM::t2RSBSri:
+    if (MI->getOperand(2).getImm() == 0)
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+    break;
+  }
+  return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+                                const ReduceEntry &Entry,
+                                bool LiveCPSR) {
+
+  if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+    return false;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+  if (Reg0 != Reg1)
+    return false;
+  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+    return false;
+  if (Entry.Imm2Limit) {
+    unsigned Imm = MI->getOperand(2).getImm();
+    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+    if (Imm > Limit)
+      return false;
+  } else {
+    unsigned Reg2 = MI->getOperand(2).getReg();
+    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+      return false;
+  }
+
+  // Check if it's possible / necessary to transfer the predicate.
+  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool SkipPred = false;
+  if (Pred != ARMCC::AL) {
+    if (!NewTID.isPredicable())
+      // Can't transfer predicate, fail.
+      return false;
+  } else {
+    SkipPred = !NewTID.isPredicable();
+  }
+
+  bool HasCC = false;
+  bool CCDead = false;
+  if (TID.hasOptionalDef()) {
+    unsigned NumOps = TID.getNumOperands();
+    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+    if (HasCC && MI->getOperand(NumOps-1).isDead())
+      CCDead = true;
+  }
+  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+    return false;
+
+  // Add the 16-bit instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MIB.addOperand(MI->getOperand(0));
+  if (NewTID.hasOptionalDef()) {
+    if (HasCC)
+      AddDefaultT1CC(MIB, CCDead);
+    else
+      AddNoT1CC(MIB);
+  }
+
+  // Transfer the rest of operands.
+  unsigned NumOps = TID.getNumOperands();
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+      continue;
+    if (SkipPred && TID.OpInfo[i].isPredicate())
+      continue;
+    MIB.addOperand(MI->getOperand(i));
+  }
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++Num2Addrs;
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+                                 const ReduceEntry &Entry,
+                                 bool LiveCPSR) {
+  if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+    return false;
+
+  unsigned Limit = ~0U;
+  unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
+  if (Entry.Imm1Limit)
+    Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+    if (TID.OpInfo[i].isPredicate())
+      continue;
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || Reg == ARM::CPSR)
+        continue;
+      if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
+        continue;
+      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+        return false;
+    } else if (MO.isImm() &&
+               !TID.OpInfo[i].isPredicate()) {
+      if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0)
+        return false;
+    }
+  }
+
+  // Check if it's possible / necessary to transfer the predicate.
+  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool SkipPred = false;
+  if (Pred != ARMCC::AL) {
+    if (!NewTID.isPredicable())
+      // Can't transfer predicate, fail.
+      return false;
+  } else {
+    SkipPred = !NewTID.isPredicable();
+  }
+
+  bool HasCC = false;
+  bool CCDead = false;
+  if (TID.hasOptionalDef()) {
+    unsigned NumOps = TID.getNumOperands();
+    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+    if (HasCC && MI->getOperand(NumOps-1).isDead())
+      CCDead = true;
+  }
+  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+    return false;
+
+  // Add the 16-bit instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MIB.addOperand(MI->getOperand(0));
+  if (NewTID.hasOptionalDef()) {
+    if (HasCC)
+      AddDefaultT1CC(MIB, CCDead);
+    else
+      AddNoT1CC(MIB);
+  }
+
+  // Transfer the rest of operands.
+  unsigned NumOps = TID.getNumOperands();
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+      continue;
+    if ((TID.getOpcode() == ARM::t2RSBSri ||
+         TID.getOpcode() == ARM::t2RSBri) && i == 2)
+      // Skip the zero immediate operand, it's now implicit.
+      continue;
+    bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
+    if (SkipPred && isPred)
+        continue;
+    const MachineOperand &MO = MI->getOperand(i);
+    if (Scale > 1 && !isPred && MO.isImm())
+      MIB.addImm(MO.getImm() / Scale);
+    else {
+      if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+        // Skip implicit def of CPSR. Either it's modeled as an optional
+        // def now or it's already an implicit def on the new instruction.
+        continue;
+      MIB.addOperand(MO);
+    }
+  }
+  if (!TID.isPredicable() && NewTID.isPredicable())
+    AddDefaultPred(MIB);
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++NumNarrows;
+  return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
+  bool HasDef = false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isUse())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+    if (!MO.isDead())
+      HasDef = true;
+  }
+
+  return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isDef())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+    if (MO.isKill()) {
+      LiveCPSR = false;
+      break;
+    }
+  }
+
+  return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  bool LiveCPSR = false;
+  // Yes, CPSR could be livein.
+  for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
+         E = MBB.livein_end(); I != E; ++I) {
+    if (*I == ARM::CPSR) {
+      LiveCPSR = true;
+      break;
+    }
+  }
+
+  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMII;
+  for (; MII != E; MII = NextMII) {
+    NextMII = next(MII);
+
+    MachineInstr *MI = &*MII;
+    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+    if (OPI != ReduceOpcodeMap.end()) {
+      const ReduceEntry &Entry = ReduceTable[OPI->second];
+      // Ignore "special" cases for now.
+      if (Entry.Special) {
+        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
+          Modified = true;
+          MachineBasicBlock::iterator I = prior(NextMII);
+          MI = &*I;
+        }
+        goto ProcessNext;
+      }
+
+      // Try to transform to a 16-bit two-address instruction.
+      if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
+        Modified = true;
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+        goto ProcessNext;
+      }
+
+      // Try to transform ro a 16-bit non-two-address instruction.
+      if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
+        Modified = true;
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+      }
+    }
+
+  ProcessNext:
+    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
+  }
+
+  return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+  const TargetMachine &TM = MF.getTarget();
+  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+
+  bool Modified = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Modified |= ReduceMBB(*I);
+  return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+  return new Thumb2SizeReduce();
+}
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index 0818e25b33a2..b8a06459e1cd 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -22,20 +22,22 @@ namespace llvm {
   class AlphaTargetMachine;
   class FunctionPass;
   class MachineCodeEmitter;
-  class raw_ostream;
+  class ObjectCodeEmitter;
+  class formatted_raw_ostream;
 
   FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
-  FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS,
-                                           TargetMachine &TM,
-                                           bool Verbose);
   FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
   FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
                                            MachineCodeEmitter &MCE);
   FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
-                                           JITCodeEmitter &JCE);
+                                              JITCodeEmitter &JCE);
+  FunctionPass *createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM,
+                                                 ObjectCodeEmitter &OCE);
   FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
   FunctionPass *createAlphaBranchSelectionPass();
 
+  extern Target TheAlphaTarget;
+
 } // end namespace llvm;
 
 // Defines symbolic names for Alpha registers.  This defines a mapping from
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
index e3748c6a09f3..6efdf554e176 100644
--- a/lib/Target/Alpha/Alpha.td
+++ b/lib/Target/Alpha/Alpha.td
@@ -30,6 +30,12 @@ def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
 include "AlphaRegisterInfo.td"
 
 //===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaCallingConv.td"
+
+//===----------------------------------------------------------------------===//
 // Schedule Description
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
index aca8ca734897..719ffaec3eaf 100644
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -17,7 +17,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td
new file mode 100644
index 000000000000..38ada69c2425
--- /dev/null
+++ b/lib/Target/Alpha/AlphaCallingConv.td
@@ -0,0 +1,37 @@
+//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Alpha architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Alpha Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Alpha : CallingConv<[
+  // i64 is returned in register R0
+  CCIfType<[i64], CCAssignToReg<[R0]>>,
+
+  // f32 / f64 are returned in F0/F1
+  CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Alpha Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Alpha : CallingConv<[
+  // The first 6 arguments are passed in registers, whether integer or
+  // floating-point
+  CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
+                                          [F16, F17, F18, F19, F20, F21]>>,
+
+  CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
+                                               [R16, R17, R18, R19, R20, R21]>>,
+
+  // Stack slots are 8 bytes in size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index f50f007c2076..8023add97914 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -19,16 +19,19 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
-  	
+
   class AlphaCodeEmitter {
     MachineCodeEmitter &MCE;
   public:
@@ -57,7 +60,7 @@ namespace {
   public:
     static char ID;
     explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
-      : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), 
+      : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
         II(0), TM(tm), MCE(mce) {}
     Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii)
       : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
@@ -69,8 +72,6 @@ namespace {
       return "Alpha Machine Code Emitter";
     }
 
-    void emitInstruction(const MachineInstr &MI);
-
   private:
     void emitBasicBlock(MachineBasicBlock &MBB);
   };
@@ -91,6 +92,10 @@ FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
                                                   JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
+FunctionPass *llvm::createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM,
+                                                     ObjectCodeEmitter &OCE) {
+  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+}
 
 template <class CodeEmitter>
 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
@@ -111,6 +116,7 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
        I != E; ++I) {
     const MachineInstr &MI = *I;
+    MCE.processDebugLoc(MI.getDebugLoc(), true);
     switch(MI.getOpcode()) {
     default:
       MCE.emitWordLE(getBinaryCodeForInstr(*I));
@@ -119,8 +125,10 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
     case Alpha::PCLABEL:
     case Alpha::MEMLABEL:
     case TargetInstrInfo::IMPLICIT_DEF:
+    case TargetInstrInfo::KILL:
       break; //skip these
     }
+    MCE.processDebugLoc(MI.getDebugLoc(), false);
   }
 }
 
@@ -159,13 +167,12 @@ static unsigned getAlphaRegNumber(unsigned Reg) {
   case Alpha::R30 : case Alpha::F30 : return 30;
   case Alpha::R31 : case Alpha::F31 : return 31;
   default:
-    assert(0 && "Unhandled reg");
-    abort();
+    llvm_unreachable("Unhandled reg");
   }
 }
 
 unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                           		             const MachineOperand &MO) {
+                                             const MachineOperand &MO) {
 
   unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
                    // or things that get fixed up later by the JIT.
@@ -175,7 +182,7 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   } else if (MO.isImm()) {
     rv = MO.getImm();
   } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
-    DOUT << MO << " is a relocated op for " << MI << "\n";
+    DEBUG(errs() << MO << " is a relocated op for " << MI << "\n");
     unsigned Reloc = 0;
     int Offset = 0;
     bool useGOT = false;
@@ -211,8 +218,7 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       Offset = MI.getOperand(3).getImm();
       break;
     default:
-      assert(0 && "unknown relocatable instruction");
-      abort();
+      llvm_unreachable("unknown relocatable instruction");
     }
     if (MO.isGlobal())
       MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
@@ -229,14 +235,14 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   } else if (MO.isMBB()) {
     MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                                Alpha::reloc_bsr, MO.getMBB()));
-  }else {
-    cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-    abort();
+  } else {
+#ifndef NDEBUG
+    errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+#endif
+    llvm_unreachable(0);
   }
 
   return rv;
 }
 
 #include "AlphaGenCodeEmitter.inc"
-
-
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index e3f631a1f5be..e3587fb2c90f 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -26,9 +26,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -114,7 +117,7 @@ namespace {
       uint64_t complow = 1 << (63 - at);
       uint64_t comphigh = 1 << (64 - at);
       //cerr << x << ":" << complow << ":" << comphigh << "\n";
-      if (abs(complow - x) <= abs(comphigh - x))
+      if (abs64(complow - x) <= abs64(comphigh - x))
         return complow;
       else
         return comphigh;
@@ -208,7 +211,6 @@ private:
 /// GOT address into a register.
 ///
 SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
-  MachineFunction *MF = BB->getParent();
   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
@@ -216,7 +218,6 @@ SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
 /// getGlobalRetAddr - Grab the return address.
 ///
 SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
-  MachineFunction *MF = BB->getParent();
   unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
   return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
 }
@@ -269,8 +270,8 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, 
                                  Chain.getValue(1));
     SDNode *CNode =
-      CurDAG->getTargetNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, 
-                            Chain, Chain.getValue(1));
+      CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, 
+                             Chain, Chain.getValue(1));
     Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, 
                                    SDValue(CNode, 1));
     return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
@@ -278,8 +279,8 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
 
   case ISD::READCYCLECOUNTER: {
     SDValue Chain = N->getOperand(0);
-    return CurDAG->getTargetNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
-                                 Chain);
+    return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+                                  Chain);
   }
 
   case ISD::Constant: {
@@ -302,10 +303,11 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
       // val32 >= IMM_LOW  + IMM_LOW  * IMM_MULT) //always true
       break; //(zext (LDAH (LDA)))
     //Else use the constant pool
-    ConstantInt *C = ConstantInt::get(Type::Int64Ty, uval);
+    ConstantInt *C = ConstantInt::get(
+                                Type::getInt64Ty(*CurDAG->getContext()), uval);
     SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
-    SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, dl, MVT::i64, CPI,
-                                        SDValue(getGlobalBaseReg(), 0));
+    SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+                                         SDValue(getGlobalBaseReg(), 0));
     return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, 
                                 CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
   }
@@ -313,7 +315,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
   case ISD::ConstantFP: {
     ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
     bool isDouble = N->getValueType(0) == MVT::f64;
-    MVT T = isDouble ? MVT::f64 : MVT::f32;
+    EVT T = isDouble ? MVT::f64 : MVT::f32;
     if (CN->getValueAPF().isPosZero()) {
       return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
                                   T, CurDAG->getRegister(Alpha::F31, T),
@@ -323,7 +325,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
                                   T, CurDAG->getRegister(Alpha::F31, T),
                                   CurDAG->getRegister(Alpha::F31, T));
     } else {
-      abort();
+      llvm_report_error("Unhandled FP constant type");
     }
     break;
   }
@@ -336,7 +338,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
       bool rev = false;
       bool inv = false;
       switch(CC) {
-      default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!");
+      default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
       case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
         Opc = Alpha::CMPTEQ; break;
       case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: 
@@ -356,48 +358,29 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
       };
       SDValue tmp1 = N->getOperand(rev?1:0);
       SDValue tmp2 = N->getOperand(rev?0:1);
-      SDNode *cmp = CurDAG->getTargetNode(Opc, dl, MVT::f64, tmp1, tmp2);
+      SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
       if (inv) 
-        cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, dl, 
-                                    MVT::f64, SDValue(cmp, 0), 
-                                    CurDAG->getRegister(Alpha::F31, MVT::f64));
+        cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl, 
+                                     MVT::f64, SDValue(cmp, 0), 
+                                     CurDAG->getRegister(Alpha::F31, MVT::f64));
       switch(CC) {
       case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
       case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
        {
-         SDNode* cmp2 = CurDAG->getTargetNode(Alpha::CMPTUN, dl, MVT::f64,
-                                              tmp1, tmp2);
-         cmp = CurDAG->getTargetNode(Alpha::ADDT, dl, MVT::f64, 
-                                     SDValue(cmp2, 0), SDValue(cmp, 0));
+         SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
+                                               tmp1, tmp2);
+         cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64, 
+                                      SDValue(cmp2, 0), SDValue(cmp, 0));
          break;
        }
       default: break;
       }
 
-      SDNode* LD = CurDAG->getTargetNode(Alpha::FTOIT, dl,
-                                         MVT::i64, SDValue(cmp, 0));
-      return CurDAG->getTargetNode(Alpha::CMPULT, dl, MVT::i64, 
-                                   CurDAG->getRegister(Alpha::R31, MVT::i64),
-                                   SDValue(LD,0));
-    }
-    break;
-
-  case ISD::SELECT:
-    if (N->getValueType(0).isFloatingPoint() &&
-        (N->getOperand(0).getOpcode() != ISD::SETCC ||
-         !N->getOperand(0).getOperand(1).getValueType().isFloatingPoint())) {
-      //This should be the condition not covered by the Patterns
-      //FIXME: Don't have SelectCode die, but rather return something testable
-      // so that things like this can be caught in fall though code
-      //move int to fp
-      bool isDouble = N->getValueType(0) == MVT::f64;
-      SDValue cond = N->getOperand(0);
-      SDValue TV = N->getOperand(1);
-      SDValue FV = N->getOperand(2);
-      
-      SDNode* LD = CurDAG->getTargetNode(Alpha::ITOFT, dl, MVT::f64, cond);
-      return CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES,
-                                   dl, MVT::f64, FV, TV, SDValue(LD,0));
+      SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
+                                          MVT::i64, SDValue(cmp, 0));
+      return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64, 
+                                    CurDAG->getRegister(Alpha::R31, MVT::i64),
+                                    SDValue(LD,0));
     }
     break;
 
@@ -422,11 +405,11 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
       
       if (get_zapImm(mask)) {
         SDValue Z = 
-          SDValue(CurDAG->getTargetNode(Alpha::ZAPNOTi, dl, MVT::i64,
-                                          N->getOperand(0).getOperand(0),
-                                          getI64Imm(get_zapImm(mask))), 0);
-        return CurDAG->getTargetNode(Alpha::SRLr, dl, MVT::i64, Z, 
-                                     getI64Imm(sval));
+          SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
+                                         N->getOperand(0).getOperand(0),
+                                         getI64Imm(get_zapImm(mask))), 0);
+        return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z, 
+                                      getI64Imm(sval));
       }
     }
     break;
@@ -443,95 +426,26 @@ void AlphaDAGToDAGISel::SelectCALL(SDValue Op) {
   SDNode *N = Op.getNode();
   SDValue Chain = N->getOperand(0);
   SDValue Addr = N->getOperand(1);
-  SDValue InFlag(0,0);  // Null incoming flag value.
+  SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
   DebugLoc dl = N->getDebugLoc();
 
-   std::vector<SDValue> CallOperands;
-   std::vector<MVT> TypeOperands;
-  
-   //grab the arguments
-   for(int i = 2, e = N->getNumOperands(); i < e; ++i) {
-     TypeOperands.push_back(N->getOperand(i).getValueType());
-     CallOperands.push_back(N->getOperand(i));
-   }
-   int count = N->getNumOperands() - 2;
-
-   static const unsigned args_int[] = {Alpha::R16, Alpha::R17, Alpha::R18,
-                                       Alpha::R19, Alpha::R20, Alpha::R21};
-   static const unsigned args_float[] = {Alpha::F16, Alpha::F17, Alpha::F18,
-                                         Alpha::F19, Alpha::F20, Alpha::F21};
-   
-   for (int i = 6; i < count; ++i) {
-     unsigned Opc = Alpha::WTF;
-     if (TypeOperands[i].isInteger()) {
-       Opc = Alpha::STQ;
-     } else if (TypeOperands[i] == MVT::f32) {
-       Opc = Alpha::STS;
-     } else if (TypeOperands[i] == MVT::f64) {
-       Opc = Alpha::STT;
-     } else
-       assert(0 && "Unknown operand"); 
-
-     SDValue Ops[] = { CallOperands[i],  getI64Imm((i - 6) * 8), 
-                       CurDAG->getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64),
-                       Chain };
-     Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 4), 0);
-   }
-   for (int i = 0; i < std::min(6, count); ++i) {
-     if (TypeOperands[i].isInteger()) {
-       Chain = CurDAG->getCopyToReg(Chain, dl, args_int[i], 
-                                    CallOperands[i], InFlag);
-       InFlag = Chain.getValue(1);
-     } else if (TypeOperands[i] == MVT::f32 || TypeOperands[i] == MVT::f64) {
-       Chain = CurDAG->getCopyToReg(Chain, dl, args_float[i], 
-                                    CallOperands[i], InFlag);
-       InFlag = Chain.getValue(1);
-     } else
-       assert(0 && "Unknown operand"); 
-   }
-
-   // Finally, once everything is in registers to pass to the call, emit the
-   // call itself.
    if (Addr.getOpcode() == AlphaISD::GPRelLo) {
      SDValue GOT = SDValue(getGlobalBaseReg(), 0);
      Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
      InFlag = Chain.getValue(1);
-     Chain = SDValue(CurDAG->getTargetNode(Alpha::BSR, dl, MVT::Other, 
-                                           MVT::Flag, Addr.getOperand(0), 
-                                           Chain, InFlag), 0);
+     Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, 
+                                            MVT::Flag, Addr.getOperand(0),
+                                            Chain, InFlag), 0);
    } else {
      Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
      InFlag = Chain.getValue(1);
-     Chain = SDValue(CurDAG->getTargetNode(Alpha::JSR, dl, MVT::Other,
-                                             MVT::Flag, Chain, InFlag), 0);
+     Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
+                                            MVT::Flag, Chain, InFlag), 0);
    }
    InFlag = Chain.getValue(1);
 
-   std::vector<SDValue> CallResults;
-  
-   switch (N->getValueType(0).getSimpleVT()) {
-   default: assert(0 && "Unexpected ret value!");
-     case MVT::Other: break;
-   case MVT::i64:
-     Chain = CurDAG->getCopyFromReg(Chain, dl, 
-                                    Alpha::R0, MVT::i64, InFlag).getValue(1);
-     CallResults.push_back(Chain.getValue(0));
-     break;
-   case MVT::f32:
-     Chain = CurDAG->getCopyFromReg(Chain, dl, 
-                                    Alpha::F0, MVT::f32, InFlag).getValue(1);
-     CallResults.push_back(Chain.getValue(0));
-     break;
-   case MVT::f64:
-     Chain = CurDAG->getCopyFromReg(Chain, dl,
-                                    Alpha::F0, MVT::f64, InFlag).getValue(1);
-     CallResults.push_back(Chain.getValue(0));
-     break;
-   }
-
-   CallResults.push_back(Chain);
-   for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
-     ReplaceUses(Op.getValue(i), CallResults[i]);
+  ReplaceUses(Op.getValue(0), Chain);
+  ReplaceUses(Op.getValue(1), InFlag);
 }
 
 
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index fa0b65609fba..b3f865cf4a83 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -13,17 +13,22 @@
 
 #include "AlphaISelLowering.h"
 #include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 /// AddLiveIn - This helper function adds the specified physical register to the
@@ -37,14 +42,15 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
   return VReg;
 }
 
-AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
   // Set up the TargetLowering object.
   //I am having problems with shr n i8 1
   setShiftAmountType(MVT::i64);
   setBooleanContents(ZeroOrOneBooleanContent);
-  
+
   setUsesGlobalOffsetTable(true);
-  
+
   addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
   addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
   addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
@@ -54,24 +60,26 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
 
   setLoadExtAction(ISD::EXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-  
+
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
-  
+
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
 
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
   //  setOperationAction(ISD::BRIND,        MVT::Other,   Expand);
   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
-  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);  
+  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   setOperationAction(ISD::FREM, MVT::f32, Expand);
   setOperationAction(ISD::FREM, MVT::f64, Expand);
-  
+
   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -85,7 +93,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
   setOperationAction(ISD::BSWAP    , MVT::i64, Expand);
   setOperationAction(ISD::ROTL     , MVT::i64, Expand);
   setOperationAction(ISD::ROTR     , MVT::i64, Expand);
-  
+
   setOperationAction(ISD::SREM     , MVT::i64, Custom);
   setOperationAction(ISD::UREM     , MVT::i64, Custom);
   setOperationAction(ISD::SDIV     , MVT::i64, Custom);
@@ -99,6 +107,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
 
   // We don't support sin/cos/sqrt/pow
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -123,7 +134,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
   // Not implemented yet.
-  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
 
@@ -141,8 +152,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
   setOperationAction(ISD::VAARG,   MVT::Other, Custom);
   setOperationAction(ISD::VAARG,   MVT::i32,   Custom);
 
-  setOperationAction(ISD::RET,     MVT::Other, Custom);
-
   setOperationAction(ISD::JumpTable, MVT::i64, Custom);
   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
 
@@ -159,7 +168,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
   computeRegisterProperties();
 }
 
-MVT AlphaTargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const {
   return MVT::i64;
 }
 
@@ -187,13 +196,13 @@ unsigned AlphaTargetLowering::getFunctionAlignment(const Function *F) const {
 }
 
 static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
   SDValue Zero = DAG.getConstant(0, PtrVT);
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
-  
+
   SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, JTI,
                              DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
   SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
@@ -219,43 +228,205 @@ static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 // //#define GP    $29
 // //#define SP    $30
 
-static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
-                                       int &VarArgsBase,
-                                       int &VarArgsOffset) {
+#include "AlphaGenCallingConv.inc"
+
+SDValue
+AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) {
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
+
+    // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = Outs[i].Val;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
+
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         PseudoSourceValue::getStack(), 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emited instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                               VA.getLocVT(), InFlag).getValue(1);
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv, bool isVarArg,
+                                          const SmallVectorImpl<ISD::InputArg>
+                                            &Ins,
+                                          DebugLoc dl, SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  std::vector<SDValue> ArgValues;
-  SDValue Root = Op.getOperand(0);
-  DebugLoc dl = Op.getDebugLoc();
 
   unsigned args_int[] = {
     Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
   unsigned args_float[] = {
     Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
-  
-  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) {
+
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
     SDValue argt;
-    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+    EVT ObjectVT = Ins[ArgNo].VT;
     SDValue ArgVal;
 
     if (ArgNo  < 6) {
-      switch (ObjectVT.getSimpleVT()) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
       default:
         assert(false && "Invalid value type!");
       case MVT::f64:
-        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], 
+        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
                                       &Alpha::F8RCRegClass);
-        ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
         break;
       case MVT::f32:
-        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], 
+        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
                                       &Alpha::F4RCRegClass);
-        ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
         break;
       case MVT::i64:
-        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], 
+        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
                                     &Alpha::GPRCRegClass);
-        ArgVal = DAG.getCopyFromReg(Root, dl, args_int[ArgNo], MVT::i64);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
         break;
       }
     } else { //more args
@@ -265,60 +436,58 @@ static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
     }
-    ArgValues.push_back(ArgVal);
+    InVals.push_back(ArgVal);
   }
 
   // If the functions takes variable number of arguments, copy all regs to stack
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
   if (isVarArg) {
-    VarArgsOffset = (Op.getNode()->getNumValues()-1) * 8;
+    VarArgsOffset = Ins.size() * 8;
     std::vector<SDValue> LS;
     for (int i = 0; i < 6; ++i) {
       if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
         args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
-      SDValue argt = DAG.getCopyFromReg(Root, dl, args_int[i], MVT::i64);
+      SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
       if (i == 0) VarArgsBase = FI;
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0));
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
 
       if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
         args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
-      argt = DAG.getCopyFromReg(Root, dl, args_float[i], MVT::f64);
+      argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
       FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
       SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0));
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
     }
 
     //Set up a token factor with all the stack traffic
-    Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
   }
 
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size());
+  return Chain;
 }
 
-static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Copy = DAG.getCopyToReg(Op.getOperand(0), dl, Alpha::R26, 
-                                    DAG.getNode(AlphaISD::GlobalRetAddr, 
-                                                DebugLoc::getUnknownLoc(),
-                                                MVT::i64),
-                                    SDValue());
-  switch (Op.getNumOperands()) {
+SDValue
+AlphaTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 DebugLoc dl, SelectionDAG &DAG) {
+
+  SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
+                                  DAG.getNode(AlphaISD::GlobalRetAddr,
+                                              DebugLoc::getUnknownLoc(),
+                                              MVT::i64),
+                                  SDValue());
+  switch (Outs.size()) {
   default:
-    assert(0 && "Do not know how to return this many arguments!");
-    abort();
-  case 1: 
+    llvm_unreachable("Do not know how to return this many arguments!");
+  case 0:
     break;
     //return SDValue(); // ret void is legal
-  case 3: {
-    MVT ArgVT = Op.getOperand(1).getValueType();
+  case 1: {
+    EVT ArgVT = Outs[0].Val.getValueType();
     unsigned ArgReg;
     if (ArgVT.isInteger())
       ArgReg = Alpha::R0;
@@ -326,14 +495,14 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
       assert(ArgVT.isFloatingPoint());
       ArgReg = Alpha::F0;
     }
-    Copy = DAG.getCopyToReg(Copy, dl, ArgReg, 
-                            Op.getOperand(1), Copy.getValue(1));
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+                            Outs[0].Val, Copy.getValue(1));
     if (DAG.getMachineFunction().getRegInfo().liveout_empty())
       DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
     break;
   }
-  case 5: {
-    MVT ArgVT = Op.getOperand(1).getValueType();
+  case 2: {
+    EVT ArgVT = Outs[0].Val.getValueType();
     unsigned ArgReg1, ArgReg2;
     if (ArgVT.isInteger()) {
       ArgReg1 = Alpha::R0;
@@ -343,104 +512,25 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
       ArgReg1 = Alpha::F0;
       ArgReg2 = Alpha::F1;
     }
-    Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, 
-                            Op.getOperand(1), Copy.getValue(1));
-    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), 
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+                            Outs[0].Val, Copy.getValue(1));
+    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
                   DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
         == DAG.getMachineFunction().getRegInfo().liveout_end())
       DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
-    Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, 
-                            Op.getOperand(3), Copy.getValue(1));
-    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), 
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+                            Outs[1].Val, Copy.getValue(1));
+    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
                    DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
         == DAG.getMachineFunction().getRegInfo().liveout_end())
       DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
     break;
   }
   }
-  return DAG.getNode(AlphaISD::RET_FLAG, dl, 
+  return DAG.getNode(AlphaISD::RET_FLAG, dl,
                      MVT::Other, Copy, Copy.getValue(1));
 }
 
-std::pair<SDValue, SDValue>
-AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, 
-                                 bool RetSExt, bool RetZExt, bool isVarArg,
-                                 bool isInreg, unsigned NumFixedArgs,
-                                 unsigned CallingConv, 
-                                 bool isTailCall, SDValue Callee, 
-                                 ArgListTy &Args, SelectionDAG &DAG,
-                                 DebugLoc dl) {
-  int NumBytes = 0;
-  if (Args.size() > 6)
-    NumBytes = (Args.size() - 6) * 8;
-
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
-  std::vector<SDValue> args_to_use;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i)
-  {
-    switch (getValueType(Args[i].Ty).getSimpleVT()) {
-    default: assert(0 && "Unexpected ValueType for argument!");
-    case MVT::i1:
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-      // Promote the integer to 64 bits.  If the input type is signed use a
-      // sign extend, otherwise use a zero extend.
-      if (Args[i].isSExt)
-        Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, dl, 
-                                   MVT::i64, Args[i].Node);
-      else if (Args[i].isZExt)
-        Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, dl,
-                                   MVT::i64, Args[i].Node);
-      else
-        Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Args[i].Node);
-      break;
-    case MVT::i64:
-    case MVT::f64:
-    case MVT::f32:
-      break;
-    }
-    args_to_use.push_back(Args[i].Node);
-  }
-
-  std::vector<MVT> RetVals;
-  MVT RetTyVT = getValueType(RetTy);
-  MVT ActualRetTyVT = RetTyVT;
-  if (RetTyVT.getSimpleVT() >= MVT::i1 && RetTyVT.getSimpleVT() <= MVT::i32)
-    ActualRetTyVT = MVT::i64;
-
-  if (RetTyVT != MVT::isVoid)
-    RetVals.push_back(ActualRetTyVT);
-  RetVals.push_back(MVT::Other);
-
-  std::vector<SDValue> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-  Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
-  SDValue TheCall = DAG.getNode(AlphaISD::CALL, dl, 
-                                RetVals, &Ops[0], Ops.size());
-  Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), SDValue());
-  SDValue RetVal = TheCall;
-
-  if (RetTyVT != ActualRetTyVT) {
-    ISD::NodeType AssertKind = ISD::DELETED_NODE;
-    if (RetSExt)
-      AssertKind = ISD::AssertSext;
-    else if (RetZExt)
-      AssertKind = ISD::AssertZext;
-
-    if (AssertKind != ISD::DELETED_NODE)
-      RetVal = DAG.getNode(AssertKind, dl, MVT::i64, RetVal,
-                           DAG.getValueType(RetTyVT));
-
-    RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal);
-  }
-
-  return std::make_pair(RetVal, Chain);
-}
-
 void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
                                      SDValue &DataPtr, SelectionDAG &DAG) {
   Chain = N->getOperand(0);
@@ -475,12 +565,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
 SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   switch (Op.getOpcode()) {
-  default: assert(0 && "Wasn't expecting to be able to lower this!");
-  case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, 
-                                                           VarArgsBase,
-                                                           VarArgsOffset);
-
-  case ISD::RET: return LowerRET(Op,DAG);
+  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
   case ISD::JumpTable: return LowerJumpTable(Op, DAG);
 
   case ISD::INTRINSIC_WO_CHAIN: {
@@ -488,11 +573,40 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     switch (IntNo) {
     default: break;    // Don't custom lower most intrinsics.
     case Intrinsic::alpha_umulh:
-      return DAG.getNode(ISD::MULHU, dl, MVT::i64, 
+      return DAG.getNode(ISD::MULHU, dl, MVT::i64,
                          Op.getOperand(1), Op.getOperand(2));
     }
   }
 
+  case ISD::SRL_PARTS: {
+    SDValue ShOpLo = Op.getOperand(0);
+    SDValue ShOpHi = Op.getOperand(1);
+    SDValue ShAmt  = Op.getOperand(2);
+    SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                             DAG.getConstant(64, MVT::i64), ShAmt);
+    SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+                                DAG.getConstant(0, MVT::i64), ISD::SETLE);
+    // if 64 - shAmt <= 0
+    SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
+    SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                                    DAG.getConstant(0, MVT::i64), bm);
+    SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+    // else
+    SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+    SDValue Hi_Pos =  DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
+    SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+    Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+    // Merge
+    SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+    SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+    SDValue Ops[2] = { Lo, Hi };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+    //  case ISD::SRA_PARTS:
+
+    //  case ISD::SHL_PARTS:
+
+
   case ISD::SINT_TO_FP: {
     assert(Op.getOperand(0).getValueType() == MVT::i64 &&
            "Unhandled SINT_TO_FP type in custom expander!");
@@ -509,7 +623,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
     if (!isDouble) //Promote
       src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
-    
+
     src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
 
     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src);
@@ -519,14 +633,14 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     Constant *C = CP->getConstVal();
     SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
     // FIXME there isn't really any debug info here
-    
+
     SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, CPI,
                                DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
     SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
     return Lo;
   }
   case ISD::GlobalTLSAddress:
-    assert(0 && "TLS not implemented for Alpha.");
+    llvm_unreachable("TLS not implemented for Alpha.");
   case ISD::GlobalAddress: {
     GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
     GlobalValue *GV = GSDN->getGlobal();
@@ -540,11 +654,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
       SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
       return Lo;
     } else
-      return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA, 
+      return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
                          DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
   }
   case ISD::ExternalSymbol: {
-    return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, 
+    return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
                        DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
                                                    ->getSymbol(), MVT::i64),
                        DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
@@ -554,7 +668,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SREM:
     //Expand only on constant case
     if (Op.getOperand(1).getOpcode() == ISD::Constant) {
-      MVT VT = Op.getNode()->getValueType(0);
+      EVT VT = Op.getNode()->getValueType(0);
       SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
         BuildUDIV(Op.getNode(), DAG, NULL) :
         BuildSDIV(Op.getNode(), DAG, NULL);
@@ -567,7 +681,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::UDIV:
     if (Op.getValueType().isInteger()) {
       if (Op.getOperand(1).getOpcode() == ISD::Constant)
-        return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL) 
+        return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
           : BuildUDIV(Op.getNode(), DAG, NULL);
       const char* opstr = 0;
       switch (Op.getOpcode()) {
@@ -601,12 +715,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     SDValue SrcP = Op.getOperand(2);
     const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
     const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-    
+
     SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0);
     SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0);
-    SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, 
+    SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
                                DAG.getConstant(8, MVT::i64));
-    Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, 
+    Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
                          NP, NULL,0, MVT::i32);
     SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
                                 DAG.getConstant(8, MVT::i64));
@@ -616,7 +730,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     SDValue Chain = Op.getOperand(0);
     SDValue VAListP = Op.getOperand(1);
     const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-    
+
     // vastart stores the address of the VarArgsBase and VarArgsOffset
     SDValue FR  = DAG.getFrameIndex(VarArgsBase, MVT::i64);
     SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0);
@@ -625,13 +739,13 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
                              SA2, NULL, 0, MVT::i32);
   }
-  case ISD::RETURNADDR:        
+  case ISD::RETURNADDR:
     return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
                        MVT::i64);
       //FIXME: implement
   case ISD::FRAMEADDR:          break;
   }
-  
+
   return SDValue();
 }
 
@@ -655,7 +769,7 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
 
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
-AlphaTargetLowering::ConstraintType 
+AlphaTargetLowering::ConstraintType
 AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -670,37 +784,37 @@ AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::vector<unsigned> AlphaTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const {
+                                  EVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default: break;  // Unknown constriant letter
-    case 'f': 
+    case 'f':
       return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
                                    Alpha::F3 , Alpha::F4 , Alpha::F5 ,
-                                   Alpha::F6 , Alpha::F7 , Alpha::F8 , 
-                                   Alpha::F9 , Alpha::F10, Alpha::F11, 
-                                   Alpha::F12, Alpha::F13, Alpha::F14, 
-                                   Alpha::F15, Alpha::F16, Alpha::F17, 
-                                   Alpha::F18, Alpha::F19, Alpha::F20, 
-                                   Alpha::F21, Alpha::F22, Alpha::F23, 
-                                   Alpha::F24, Alpha::F25, Alpha::F26, 
-                                   Alpha::F27, Alpha::F28, Alpha::F29, 
+                                   Alpha::F6 , Alpha::F7 , Alpha::F8 ,
+                                   Alpha::F9 , Alpha::F10, Alpha::F11,
+                                   Alpha::F12, Alpha::F13, Alpha::F14,
+                                   Alpha::F15, Alpha::F16, Alpha::F17,
+                                   Alpha::F18, Alpha::F19, Alpha::F20,
+                                   Alpha::F21, Alpha::F22, Alpha::F23,
+                                   Alpha::F24, Alpha::F25, Alpha::F26,
+                                   Alpha::F27, Alpha::F28, Alpha::F29,
                                    Alpha::F30, Alpha::F31, 0);
-    case 'r': 
-      return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 , 
-                                   Alpha::R3 , Alpha::R4 , Alpha::R5 , 
-                                   Alpha::R6 , Alpha::R7 , Alpha::R8 , 
-                                   Alpha::R9 , Alpha::R10, Alpha::R11, 
-                                   Alpha::R12, Alpha::R13, Alpha::R14, 
-                                   Alpha::R15, Alpha::R16, Alpha::R17, 
-                                   Alpha::R18, Alpha::R19, Alpha::R20, 
-                                   Alpha::R21, Alpha::R22, Alpha::R23, 
-                                   Alpha::R24, Alpha::R25, Alpha::R26, 
-                                   Alpha::R27, Alpha::R28, Alpha::R29, 
+    case 'r':
+      return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
+                                   Alpha::R3 , Alpha::R4 , Alpha::R5 ,
+                                   Alpha::R6 , Alpha::R7 , Alpha::R8 ,
+                                   Alpha::R9 , Alpha::R10, Alpha::R11,
+                                   Alpha::R12, Alpha::R13, Alpha::R14,
+                                   Alpha::R15, Alpha::R16, Alpha::R17,
+                                   Alpha::R18, Alpha::R19, Alpha::R20,
+                                   Alpha::R21, Alpha::R22, Alpha::R23,
+                                   Alpha::R24, Alpha::R25, Alpha::R26,
+                                   Alpha::R27, Alpha::R28, Alpha::R29,
                                    Alpha::R30, Alpha::R31, 0);
     }
   }
-  
+
   return std::vector<unsigned>();
 }
 //===----------------------------------------------------------------------===//
@@ -709,7 +823,8 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
 
 MachineBasicBlock *
 AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
+                                                 MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   assert((MI->getOpcode() == Alpha::CAS32 ||
           MI->getOpcode() == Alpha::CAS64 ||
@@ -719,10 +834,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
           MI->getOpcode() == Alpha::SWAP64) &&
          "Unexpected instr type to insert");
 
-  bool is32 = MI->getOpcode() == Alpha::CAS32 || 
+  bool is32 = MI->getOpcode() == Alpha::CAS32 ||
     MI->getOpcode() == Alpha::LAS32 ||
     MI->getOpcode() == Alpha::SWAP32;
-  
+
   //Load locked store conditional for atomic ops take on the same form
   //start:
   //ll
@@ -734,30 +849,35 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   DebugLoc dl = MI->getDebugLoc();
   MachineFunction::iterator It = BB;
   ++It;
-  
+
   MachineBasicBlock *thisMBB = BB;
   MachineFunction *F = BB->getParent();
   MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
 
+  // Inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
+         E = BB->succ_end(); I != E; ++I)
+    EM->insert(std::make_pair(*I, sinkMBB));
+
   sinkMBB->transferSuccessors(thisMBB);
 
   F->insert(It, llscMBB);
   F->insert(It, sinkMBB);
 
   BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
-  
+
   unsigned reg_res = MI->getOperand(0).getReg(),
     reg_ptr = MI->getOperand(1).getReg(),
     reg_v2 = MI->getOperand(2).getReg(),
     reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
 
-  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), 
+  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
           reg_res).addImm(0).addReg(reg_ptr);
   switch (MI->getOpcode()) {
   case Alpha::CAS32:
   case Alpha::CAS64: {
-    unsigned reg_cmp 
+    unsigned reg_cmp
       = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
     BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
       .addReg(reg_v2).addReg(reg_res);
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 492536735454..b580c9d71264 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -62,12 +62,11 @@ namespace llvm {
   class AlphaTargetLowering : public TargetLowering {
     int VarArgsOffset;  // What is the offset to the first vaarg
     int VarArgsBase;    // What is the base FrameIndex
-    bool useITOF;
   public:
     explicit AlphaTargetLowering(TargetMachine &TM);
     
     /// getSetCCResultType - Get the SETCC result ValueType
-    virtual MVT getSetCCResultType(MVT VT) const;
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
@@ -82,24 +81,21 @@ namespace llvm {
     // Friendly names for dumps
     const char *getTargetNodeName(unsigned Opcode) const;
 
-    /// LowerCallTo - This hook lowers an abstract call to a function into an
-    /// actual call.
-    virtual std::pair<SDValue, SDValue>
-    LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
-                bool isVarArg, bool isInreg, unsigned NumFixedArgs, unsigned CC,
-                bool isTailCall, SDValue Callee, ArgListTy &Args,
-                SelectionDAG &DAG, DebugLoc dl);
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
     std::vector<unsigned> 
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                        MVT VT) const;
-
-    bool hasITOF() { return useITOF; }
+                                        EVT VT) const;
 
     MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB) const;
+                                                   MachineBasicBlock *BB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -111,6 +107,26 @@ namespace llvm {
     void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
                     SelectionDAG &DAG);
 
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
   };
 }
 
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 76a594fba456..86173ff2721b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 AlphaInstrInfo::AlphaInstrInfo()
@@ -200,29 +201,7 @@ AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addReg(SrcReg, getKillRegState(isKill))
       .addFrameIndex(FrameIdx).addReg(Alpha::F31);
   else
-    abort();
-}
-
-void AlphaInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                       bool isKill,
-                                       SmallVectorImpl<MachineOperand> &Addr,
-                                       const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  unsigned Opc = 0;
-  if (RC == Alpha::F4RCRegisterClass)
-    Opc = Alpha::STS;
-  else if (RC == Alpha::F8RCRegisterClass)
-    Opc = Alpha::STT;
-  else if (RC == Alpha::GPRCRegisterClass)
-    Opc = Alpha::STQ;
-  else
-    abort();
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  MachineInstrBuilder MIB = 
-    BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
+    llvm_unreachable("Unhandled register class");
 }
 
 void
@@ -245,28 +224,7 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
       .addFrameIndex(FrameIdx).addReg(Alpha::F31);
   else
-    abort();
-}
-
-void AlphaInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                        SmallVectorImpl<MachineOperand> &Addr,
-                                        const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  unsigned Opc = 0;
-  if (RC == Alpha::F4RCRegisterClass)
-    Opc = Alpha::LDS;
-  else if (RC == Alpha::F8RCRegisterClass)
-    Opc = Alpha::LDT;
-  else if (RC == Alpha::GPRCRegisterClass)
-    Opc = Alpha::LDQ;
-  else
-    abort();
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  MachineInstrBuilder MIB = 
-    BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
+    llvm_unreachable("Unhandled register class");
 }
 
 MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -331,7 +289,7 @@ static unsigned AlphaRevCondCode(unsigned Opcode) {
   case Alpha::FBLE: return Alpha::FBGT;
   case Alpha::FBLT: return Alpha::FBGE;
   default:
-    assert(0 && "Unknown opcode");
+    llvm_unreachable("Unknown opcode");
   }
   return 0; // Not reached
 }
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index ea0988553acc..274f452ab74d 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -54,20 +54,10 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC) const;
 
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
-
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
   
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index e73bdf9f6e91..3b98206e5b1f 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -702,7 +702,7 @@ def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
 
 //misc FP selects
 //Select double
-     
+
 def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
       (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
 def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
@@ -791,12 +791,14 @@ def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
 
 
 let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in 
-def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",[], s_ftoi>; //Floating to integer move, S_floating
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
+        [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
 let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in 
 def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
         [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
 let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in 
-def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",[], s_itof>; //Integer to floating move, S_floating
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
+    	[(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
 let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in 
 def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
         [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
@@ -818,6 +820,10 @@ let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
 def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
                    [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
 
+def :  Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
+       (f64 (FCMOVEQT  F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; 
+def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
+       (f32 (FCMOVEQS  F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; 
 
 /////////////////////////////////////////////////////////
 //Branching
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index ba7478e90ecc..d32813552f01 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -16,8 +16,9 @@
 #include "AlphaRelocations.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Config/alloca.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
@@ -57,12 +58,12 @@ static void EmitBranchToAt(void *At, void *To) {
 
   AtI[0] = BUILD_OR(0, 27, 27);
 
-  DOUT << "Stub targeting " << To << "\n";
+  DEBUG(errs() << "Stub targeting " << To << "\n");
 
   for (int x = 1; x <= 8; ++x) {
     AtI[2*x - 1] = BUILD_SLLi(27,27,8);
     unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF;
-    //DOUT << "outputing " << hex << d << dec << "\n";
+    //DEBUG(errs() << "outputing " << hex << d << dec << "\n");
     AtI[2*x] = BUILD_ORi(27, 27, d);
   }
   AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv
@@ -71,7 +72,7 @@ static void EmitBranchToAt(void *At, void *To) {
 
 void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
   //FIXME
-  assert(0);
+  llvm_unreachable(0);
 }
 
 static TargetJITInfo::JITCompilerFn JITCompilerFunction;
@@ -86,12 +87,12 @@ extern "C" {
 
     //rewrite the stub to an unconditional branch
     if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) {
-      DOUT << "Came from a stub, rewriting\n";
+      DEBUG(errs() << "Came from a stub, rewriting\n");
       EmitBranchToAt(CameFromStub, Target);
     } else {
-      DOUT << "confused, didn't come from stub at " << CameFromStub
-           << " old jump vector " << oldpv
-           << " new jump vector " << Target << "\n";
+      DEBUG(errs() << "confused, didn't come from stub at " << CameFromStub
+                   << " old jump vector " << oldpv
+                   << " new jump vector " << Target << "\n");
     }
 
     //Change pv to new Target
@@ -184,8 +185,7 @@ extern "C" {
       );
 #else
   void AlphaCompilationCallback() {
-    cerr << "Cannot call AlphaCompilationCallback() on a non-Alpha arch!\n";
-    abort();
+    llvm_unreachable("Cannot call AlphaCompilationCallback() on a non-Alpha arch!");
   }
 #endif
 }
@@ -199,7 +199,7 @@ void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
   for (int x = 0; x < 19; ++ x)
     JCE.emitWordLE(0);
   EmitBranchToAt(Addr, Fn);
-  DOUT << "Emitting Stub to " << Fn << " at [" << Addr << "]\n";
+  DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n");
   return JCE.finishGVStub(F);
 }
 
@@ -241,34 +241,34 @@ void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
     long idx = 0;
     bool doCommon = true;
     switch ((Alpha::RelocationType)MR->getRelocationType()) {
-    default: assert(0 && "Unknown relocation type!");
+    default: llvm_unreachable("Unknown relocation type!");
     case Alpha::reloc_literal:
       //This is a LDQl
       idx = MR->getGOTIndex();
-      DOUT << "Literal relocation to slot " << idx;
+      DEBUG(errs() << "Literal relocation to slot " << idx);
       idx = (idx - GOToffset) * 8;
-      DOUT << " offset " << idx << "\n";
+      DEBUG(errs() << " offset " << idx << "\n");
       break;
     case Alpha::reloc_gprellow:
       idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
       idx = getLower16(idx);
-      DOUT << "gprellow relocation offset " << idx << "\n";
-      DOUT << " Pointer is " << (void*)MR->getResultPointer()
-           << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+      DEBUG(errs() << "gprellow relocation offset " << idx << "\n");
+      DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer()
+           << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n");
       break;
     case Alpha::reloc_gprelhigh:
       idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
       idx = getUpper16(idx);
-      DOUT << "gprelhigh relocation offset " << idx << "\n";
-      DOUT << " Pointer is " << (void*)MR->getResultPointer()
-           << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+      DEBUG(errs() << "gprelhigh relocation offset " << idx << "\n");
+      DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer()
+            << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n");
       break;
     case Alpha::reloc_gpdist:
       switch (*RelocPos >> 26) {
       case 0x09: //LDAH
         idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos;
         idx = getUpper16(idx);
-        DOUT << "LDAH: " << idx << "\n";
+        DEBUG(errs() << "LDAH: " << idx << "\n");
         //add the relocation to the map
         gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos;
         break;
@@ -278,10 +278,10 @@ void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
         idx = &GOTBase[GOToffset * 8] -
           (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())];
         idx = getLower16(idx);
-        DOUT << "LDA: " << idx << "\n";
+        DEBUG(errs() << "LDA: " << idx << "\n");
         break;
       default:
-        assert(0 && "Cannot handle gpdist yet");
+        llvm_unreachable("Cannot handle gpdist yet");
       }
       break;
     case Alpha::reloc_bsr: {
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
new file mode 100644
index 000000000000..b652a5305a01
--- /dev/null
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCAsmInfo.h"
+using namespace llvm;
+
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) {
+  AlignmentIsInBytes = false;
+  PrivateGlobalPrefix = "$";
+  PICJumpTableDirective = ".gprel32";
+  WeakRefDirective = "\t.weak\t";
+}
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/AlphaMCAsmInfo.h
new file mode 100644
index 000000000000..c27065d28427
--- /dev/null
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct AlphaMCAsmInfo : public MCAsmInfo {
+    explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 0ff53c7cc309..98e97304c64e 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -28,6 +28,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
@@ -149,8 +151,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 //variable locals
 //<- SP
 
-void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, RegScavenger *RS) const {
+unsigned
+AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, int *Value,
+                                       RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -172,16 +176,16 @@ void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Now add the frame object offset to the offset from the virtual frame index.
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
 
-  DOUT << "FI: " << FrameIndex << " Offset: " << Offset << "\n";
+  DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
 
   Offset += MF.getFrameInfo()->getStackSize();
 
-  DOUT << "Corrected Offset " << Offset
-       << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n";
+  DEBUG(errs() << "Corrected Offset " << Offset
+       << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
 
   if (Offset > IMM_HIGH || Offset < IMM_LOW) {
-    DOUT << "Unconditionally using R28 for evil purposes Offset: "
-         << Offset << "\n";
+    DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
+          << Offset << "\n");
     //so in this case, we need to use a temporary register, and move the
     //original inst off the SP/FP
     //fix up the old:
@@ -195,6 +199,7 @@ void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   } else {
     MI.getOperand(i).ChangeToImmediate(Offset);
   }
+  return 0;
 }
 
 
@@ -244,8 +249,10 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
     BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
       .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
   } else {
-    cerr << "Too big a stack frame at " << NumBytes << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg); 
+    Msg << "Too big a stack frame at " + NumBytes;
+    llvm_report_error(Msg.str());
   }
 
   //now if we need to, save the old FP and set the new
@@ -294,14 +301,16 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
         .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
     } else {
-      cerr << "Too big a stack frame at " << NumBytes << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg); 
+      Msg << "Too big a stack frame at " + NumBytes;
+      llvm_report_error(Msg.str());
     }
   }
 }
 
 unsigned AlphaRegisterInfo::getRARegister() const {
-  assert(0 && "What is the return address register");
+  llvm_unreachable("What is the return address register");
   return 0;
 }
 
@@ -310,17 +319,17 @@ unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const {
 }
 
 unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
-  assert(0 && "What is the exception register");
+  llvm_unreachable("What is the exception register");
   return 0;
 }
 
 unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
-  assert(0 && "What is the exception handler register");
+  llvm_unreachable("What is the exception handler register");
   return 0;
 }
 
 int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  assert(0 && "What is the dwarf register number");
+  llvm_unreachable("What is the dwarf register number");
   return -1;
 }
 
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 5012fe8ccd1e..66f089873d78 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -41,8 +41,9 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
   //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
index d5a9365d75c1..bda7104ab926 100644
--- a/lib/Target/Alpha/AlphaSubtarget.cpp
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -16,7 +16,7 @@
 #include "AlphaGenSubtarget.inc"
 using namespace llvm;
 
-AlphaSubtarget::AlphaSubtarget(const Module &M, const std::string &FS)
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS)
   : HasCT(false) {
   std::string CPU = "generic";
 
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
index 0a944cb0a634..f0eb93c6cba2 100644
--- a/lib/Target/Alpha/AlphaSubtarget.h
+++ b/lib/Target/Alpha/AlphaSubtarget.h
@@ -20,7 +20,6 @@
 #include <string>
 
 namespace llvm {
-class Module;
 
 class AlphaSubtarget : public TargetSubtarget {
 protected:
@@ -31,9 +30,9 @@ protected:
 
 public:
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  AlphaSubtarget(const Module &M, const std::string &FS);
+  AlphaSubtarget(const std::string &TT, const std::string &FS);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 060089cbb6d6..b8bc13b63097 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -12,60 +12,26 @@
 
 #include "Alpha.h"
 #include "AlphaJITInfo.h"
-#include "AlphaTargetAsmInfo.h"
+#include "AlphaMCAsmInfo.h"
 #include "AlphaTargetMachine.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-// Register the targets
-static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
-
-// No assembler printer by default
-AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
-
-// Force static initialization.
-extern "C" void LLVMInitializeAlphaTarget() { }
-
-const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
-  return new AlphaTargetAsmInfo(*this);
-}
-
-unsigned AlphaTargetMachine::getModuleMatchQuality(const Module &M) {
-  // We strongly match "alpha*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 5 && TT[0] == 'a' && TT[1] == 'l' && TT[2] == 'p' &&
-      TT[3] == 'h' && TT[4] == 'a')
-    return 20;
-  // If the target triple is something non-alpha, we don't match.
-  if (!TT.empty()) return 0;
-
-  if (M.getEndianness()  == Module::LittleEndian &&
-      M.getPointerSize() == Module::Pointer64)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
-
-  return getJITMatchQuality()/2;
-}
-
-unsigned AlphaTargetMachine::getJITMatchQuality() {
-#ifdef __alpha
-  return 10;
-#else
-  return 0;
-#endif
+extern "C" void LLVMInitializeAlphaTarget() { 
+  // Register the target.
+  RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
+  RegisterAsmInfo<AlphaMCAsmInfo> Y(TheAlphaTarget);
 }
 
-AlphaTargetMachine::AlphaTargetMachine(const Module &M, const std::string &FS)
-  : DataLayout("e-f128:128:128"),
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    DataLayout("e-f128:128:128"),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
     JITInfo(*this),
-    Subtarget(M, FS),
+    Subtarget(TT, FS),
     TLInfo(*this) {
   setRelocationModel(Reloc::PIC_);
 }
@@ -84,51 +50,40 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel) {
   // Must run branch selection immediately preceding the asm printer
   PM.add(createAlphaBranchSelectionPass());
-  return false;
-}
-bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool Verbose,
-                                            raw_ostream &Out) {
   PM.add(createAlphaLLRPPass(*this));
-  // Output assembly language.
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
-                                        bool DumpAsm, MachineCodeEmitter &MCE) {
+                                        MachineCodeEmitter &MCE) {
   PM.add(createAlphaCodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
-                                        bool DumpAsm, JITCodeEmitter &JCE) {
+                                        JITCodeEmitter &JCE) {
   PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
+  return false;
+}
+bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel,
+                                        ObjectCodeEmitter &OCE) {
+  PM.add(createAlphaObjectCodeEmitterPass(*this, OCE));
   return false;
 }
 bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                               CodeGenOpt::Level OptLevel,
-                                              bool DumpAsm,
                                               MachineCodeEmitter &MCE) {
-  return addCodeEmitter(PM, OptLevel, DumpAsm, MCE);
+  return addCodeEmitter(PM, OptLevel, MCE);
 }
 bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                               CodeGenOpt::Level OptLevel,
-                                              bool DumpAsm,
                                               JITCodeEmitter &JCE) {
-  return addCodeEmitter(PM, OptLevel, DumpAsm, JCE);
+  return addCodeEmitter(PM, OptLevel, JCE);
+}
+bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                              CodeGenOpt::Level OptLevel,
+                                              ObjectCodeEmitter &OCE) {
+  return addCodeEmitter(PM, OptLevel, OCE);
 }
 
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 26684c7778a4..f03e9388f7de 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -34,18 +34,9 @@ class AlphaTargetMachine : public LLVMTargetMachine {
   AlphaSubtarget Subtarget;
   AlphaTargetLowering TLInfo;
 
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
-  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
-  // set this functions to ctor pointer at startup time if they are linked in.
-  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            TargetMachine &tm,
-                                            bool verbose);
-  static AsmPrinterCtorFn AsmPrinterCtor;
-
 public:
-  AlphaTargetMachine(const Module &M, const std::string &FS);
+  AlphaTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
 
   virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
@@ -61,31 +52,24 @@ public:
     return &JITInfo;
   }
 
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
-
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel,
-                                  bool Verbose, raw_ostream &Out);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, MachineCodeEmitter &MCE);
+                              MachineCodeEmitter &MCE);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &JCE);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, JITCodeEmitter &JCE);
+                              ObjectCodeEmitter &JCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm,
                                     MachineCodeEmitter &MCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm,
                                     JITCodeEmitter &JCE);
-
-  static void registerAsmPrinter(AsmPrinterCtorFn F) {
-    AsmPrinterCtor = F;
-  }
+  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+                                    CodeGenOpt::Level OptLevel,
+                                    ObjectCodeEmitter &OCE);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 982ef5e85194..d8e8b79f5398 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -17,16 +17,20 @@
 #include "AlphaInstrInfo.h"
 #include "AlphaTargetMachine.h"
 #include "llvm/Module.h"
-#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -37,21 +41,22 @@ namespace {
     /// Unique incrementer for label values for referencing Global values.
     ///
 
-    explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm,
-                             const TargetAsmInfo *T, bool V)
+    explicit AlphaAsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
+                             const MCAsmInfo *T, bool V)
       : AsmPrinter(o, tm, T, V) {}
 
     virtual const char *getPassName() const {
       return "Alpha Assembly Printer";
     }
-    bool printInstruction(const MachineInstr *MI);
+    void printInstruction(const MachineInstr *MI);
+    static const char *getRegisterName(unsigned RegNo);
+
     void printOp(const MachineOperand &MO, bool IsCallOp = false);
     void printOperand(const MachineInstr *MI, int opNum);
-    void printBaseOffsetPair (const MachineInstr *MI, int i, bool brackets=true);
-    void printModuleLevelGV(const GlobalVariable* GVar);
+    void printBaseOffsetPair(const MachineInstr *MI, int i, bool brackets=true);
+    void PrintGlobalVariable(const GlobalVariable *GVar);
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
 
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                          unsigned AsmVariant, const char *ExtraCode);
@@ -62,17 +67,6 @@ namespace {
   };
 } // end of anonymous namespace
 
-/// createAlphaCodePrinterPass - Returns a pass that prints the Alpha
-/// assembly code for a MachineFunction to the given output stream,
-/// using the given target machine description.  This should work
-/// regardless of whether the function is in SSA form.
-///
-FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o,
-                                               TargetMachine &tm,
-                                               bool verbose) {
-  return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-}
-
 #include "AlphaGenAsmWriter.inc"
 
 void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum)
@@ -81,7 +75,7 @@ void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum)
   if (MO.getType() == MachineOperand::MO_Register) {
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Not physreg??");
-    O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+    O << getRegisterName(MO.getReg());
   } else if (MO.isImm()) {
     O << MO.getImm();
     assert(MO.getImm() < (1 << 30));
@@ -92,24 +86,21 @@ void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum)
 
 
 void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
-  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    O << RI.get(MO.getReg()).AsmName;
+    O << getRegisterName(MO.getReg());
     return;
 
   case MachineOperand::MO_Immediate:
-    cerr << "printOp() does not handle immediate values\n";
-    abort();
+    llvm_unreachable("printOp() does not handle immediate values");
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
     return;
 
   case MachineOperand::MO_ConstantPoolIndex:
-    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
       << MO.getIndex();
     return;
 
@@ -117,14 +108,12 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
     O << MO.getSymbolName();
     return;
 
-  case MachineOperand::MO_GlobalAddress: {
-    GlobalValue *GV = MO.getGlobal();
-    O << Mang->getValueName(GV);
+  case MachineOperand::MO_GlobalAddress:
+    O << Mang->getMangledName(MO.getGlobal());
     return;
-  }
 
   case MachineOperand::MO_JumpTableIndex:
-    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << MO.getIndex();
     return;
 
@@ -151,13 +140,14 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // Print out labels for the function.
   const Function *F = MF.getFunction();
-  SwitchToSection(TAI->SectionForGlobal(F));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
 
   EmitAlignment(MF.getAlignment(), F);
   switch (F->getLinkage()) {
-  default: assert(0 && "Unknown linkage type!");
+  default: llvm_unreachable("Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
   case Function::PrivateLinkage:
+  case Function::LinkerPrivateLinkage:
     break;
    case Function::ExternalLinkage:
      O << "\t.globl " << CurrentFnName << "\n";
@@ -166,7 +156,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   case Function::WeakODRLinkage:
   case Function::LinkOnceAnyLinkage:
   case Function::LinkOnceODRLinkage:
-    O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+    O << MAI->getWeakRefDirective() << CurrentFnName << "\n";
     break;
   }
 
@@ -180,17 +170,19 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
        I != E; ++I) {
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
       // Print the assembly for the instruction.
       ++EmittedInsts;
-      if (!printInstruction(II)) {
-        assert(0 && "Unhandled instruction in asm writer!");
-        abort();
-      }
+      processDebugLoc(II, true);
+      printInstruction(II);
+      
+      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+        EmitComments(*II);
+      O << '\n';
+      processDebugLoc(II, false);
     }
   }
 
@@ -200,17 +192,15 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-bool AlphaAsmPrinter::doInitialization(Module &M)
-{
-  if(TM.getSubtarget<AlphaSubtarget>().hasCT())
+void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (TM.getSubtarget<AlphaSubtarget>().hasCT())
     O << "\t.arch ev6\n"; //This might need to be ev67, so leave this test here
   else
     O << "\t.arch ev6\n";
   O << "\t.set noat\n";
-  return AsmPrinter::doInitialization(M);
 }
 
-void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+void AlphaAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer()) return;  // External global require no code
@@ -219,15 +209,14 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   if (EmitSpecialLLVMGlobal(GVar))
     return;
 
-  std::string name = Mang->getValueName(GVar);
+  std::string name = Mang->getMangledName(GVar);
   Constant *C = GVar->getInitializer();
-  if (isa<MDNode>(C) || isa<MDString>(C))
-    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
   // 0: Switch to section
-  SwitchToSection(TAI->SectionForGlobal(GVar));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
 
   // 1: Check visibility
   printVisibility(name, GVar->getVisibility());
@@ -239,23 +228,22 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
    case GlobalValue::WeakAnyLinkage:
    case GlobalValue::WeakODRLinkage:
    case GlobalValue::CommonLinkage:
-    O << TAI->getWeakRefDirective() << name << '\n';
+    O << MAI->getWeakRefDirective() << name << '\n';
     break;
    case GlobalValue::AppendingLinkage:
    case GlobalValue::ExternalLinkage:
-      O << TAI->getGlobalDirective() << name << "\n";
+      O << MAI->getGlobalDirective() << name << "\n";
       break;
     case GlobalValue::InternalLinkage:
     case GlobalValue::PrivateLinkage:
+    case GlobalValue::LinkerPrivateLinkage:
       break;
     default:
-      assert(0 && "Unknown linkage type!");
-      cerr << "Unknown linkage type!\n";
-      abort();
+      llvm_unreachable("Unknown linkage type!");
     }
 
   // 3: Type, Size, Align
-  if (TAI->hasDotTypeDotSizeDirective()) {
+  if (MAI->hasDotTypeDotSizeDirective()) {
     O << "\t.type\t" << name << ", @object\n";
     O << "\t.size\t" << name << ", " << Size << "\n";
   }
@@ -268,14 +256,6 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   O << '\n';
 }
 
-bool AlphaAsmPrinter::doFinalization(Module &M) {
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    printModuleLevelGV(I);
-
-  return AsmPrinter::doFinalization(M);
-}
-
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -298,12 +278,6 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeAlphaAsmPrinter() { }
-
-namespace {
-  static struct Register {
-    Register() {
-      AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass);
-    }
-  } Registrator;
+extern "C" void LLVMInitializeAlphaAsmPrinter() { 
+  RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
 }
diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile
index c5b3e946695b..3c64a3c606f3 100644
--- a/lib/Target/Alpha/AsmPrinter/Makefile
+++ b/lib/Target/Alpha/AsmPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Alpha/Makefile ---------------------------*- Makefile -*-===##
+##===- lib/Target/Alpha/AsmPrinter/Makefile ----------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index 2a382d5cadf3..b4f41aebd8db 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -8,6 +8,7 @@ tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
 tablegen(AlphaGenCodeEmitter.inc -gen-emitter)
 tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
 tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
+tablegen(AlphaGenCallingConv.inc -gen-callingconv)
 tablegen(AlphaGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(AlphaCodeGen
@@ -18,9 +19,9 @@ add_llvm_target(AlphaCodeGen
   AlphaISelLowering.cpp
   AlphaJITInfo.cpp
   AlphaLLRP.cpp
+  AlphaMCAsmInfo.cpp
   AlphaRegisterInfo.cpp
   AlphaSubtarget.cpp
-  AlphaTargetAsmInfo.cpp
   AlphaTargetMachine.cpp
   )
 
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
index d6c82c7d7435..d2d71097410b 100644
--- a/lib/Target/Alpha/Makefile
+++ b/lib/Target/Alpha/Makefile
@@ -15,8 +15,8 @@ BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
                 AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
                 AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \
                 AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
-                AlphaGenSubtarget.inc
+                AlphaGenCallingConv.inc AlphaGenSubtarget.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
new file mode 100644
index 000000000000..f7099b9ae975
--- /dev/null
+++ b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+llvm::Target llvm::TheAlphaTarget;
+
+extern "C" void LLVMInitializeAlphaTargetInfo() { 
+  RegisterTarget<Triple::alpha, /*HasJIT=*/true>
+    X(TheAlphaTarget, "alpha", "Alpha [experimental]");
+}
diff --git a/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/lib/Target/Alpha/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..2a7291b90aeb
--- /dev/null
+++ b/lib/Target/Alpha/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAlphaInfo
+  AlphaTargetInfo.cpp
+  )
+
+add_dependencies(LLVMAlphaInfo AlphaCodeGenTable_gen)
diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Alpha/TargetInfo/Makefile
new file mode 100644
index 000000000000..de01d7f8e8ef
--- /dev/null
+++ b/lib/Target/Alpha/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
new file mode 100644
index 000000000000..91fd5dde5a23
--- /dev/null
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -0,0 +1,242 @@
+//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Blackfin.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+  class VISIBILITY_HIDDEN BlackfinAsmPrinter : public AsmPrinter {
+  public:
+    BlackfinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                       const MCAsmInfo *MAI, bool V)
+      : AsmPrinter(O, TM, MAI, V) {}
+
+    virtual const char *getPassName() const {
+      return "Blackfin Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum);
+    void printMemoryOperand(const MachineInstr *MI, int opNum);
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void emitLinkage(const std::string &n, GlobalValue::LinkageTypes l);
+    bool runOnMachineFunction(MachineFunction &F);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode);
+    void PrintGlobalVariable(const GlobalVariable* GVar);
+  };
+} // end of anonymous namespace
+
+#include "BlackfinGenAsmWriter.inc"
+
+extern "C" void LLVMInitializeBlackfinAsmPrinter() {
+  RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
+}
+
+void BlackfinAsmPrinter::emitLinkage(const std::string &name,
+                                     GlobalValue::LinkageTypes l) {
+  switch (l) {
+  default: llvm_unreachable("Unknown linkage type!");
+  case GlobalValue::InternalLinkage:  // Symbols default to internal.
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+    break;
+  case GlobalValue::ExternalLinkage:
+    O << MAI->getGlobalDirective() << name << "\n";
+    break;
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+    O << MAI->getGlobalDirective() << name << "\n";
+    O << MAI->getWeakDefDirective() << name << "\n";
+    break;
+  }
+}
+
+void BlackfinAsmPrinter::PrintGlobalVariable(const GlobalVariable* GV) {
+  const TargetData *TD = TM.getTargetData();
+
+  if (!GV->hasInitializer() || EmitSpecialLLVMGlobal(GV))
+    return;
+
+  std::string name = Mang->getMangledName(GV);
+  Constant *C = GV->getInitializer();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,
+                                                                  TM));
+  emitLinkage(name, GV->getLinkage());
+  EmitAlignment(TD->getPreferredAlignmentLog(GV), GV);
+  printVisibility(name, GV->getVisibility());
+
+  O << "\t.type " << name << ", STT_OBJECT\n";
+  O << "\t.size " << name << ',' << TD->getTypeAllocSize(C->getType()) << '\n';
+  O << name << ":\n";
+  EmitGlobalConstant(C);
+}
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+  EmitConstantPool(MF.getConstantPool());
+  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+  const Function *F = MF.getFunction();
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  EmitAlignment(2, F);
+  emitLinkage(CurrentFnName, F->getLinkage());
+  printVisibility(CurrentFnName, F->getVisibility());
+
+  O << "\t.type\t" << CurrentFnName << ", STT_FUNC\n"
+    << CurrentFnName << ":\n";
+
+  if (DW)
+    DW->BeginFunction(&MF);
+
+  // Print out code for the function.
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+      // Print the assembly for the instruction.
+      processDebugLoc(II, true);
+
+      printInstruction(II);
+      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+        EmitComments(*II);
+      O << '\n';
+      
+      processDebugLoc(II, false);
+      ++EmittedInsts;
+    }
+  }
+
+  O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
+
+  if (DW)
+    DW->EndFunction(&MF);
+
+  return false;
+}
+
+void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+  const MachineOperand &MO = MI->getOperand (opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Virtual registers should be already mapped!");
+    O << getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << Mang->getMangledName(MO.getGlobal());
+    printOffset(MO.getOffset());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << Mang->makeNameProper(MO.getSymbolName());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+    break;
+  }
+}
+
+void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum) {
+  printOperand(MI, opNum);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+
+  O << " + ";
+  printOperand(MI, opNum+1);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                         unsigned OpNo,
+                                         unsigned AsmVariant,
+                                         const char *ExtraCode) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo);
+
+  return false;
+}
+
+bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                               unsigned OpNo,
+                                               unsigned AsmVariant,
+                                               const char *ExtraCode) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printOperand(MI, OpNo);
+  O << ']';
+
+  return false;
+}
diff --git a/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt b/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..795aebfe2b8e
--- /dev/null
+++ b/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMBlackfinAsmPrinter
+  BlackfinAsmPrinter.cpp
+  )
+add_dependencies(LLVMBlackfinAsmPrinter BlackfinCodeGenTable_gen)
diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile
new file mode 100644
index 000000000000..091d4df0bcb8
--- /dev/null
+++ b/lib/Target/Blackfin/AsmPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Blackfin/AsmPrinter/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinAsmPrinter
+
+# Hack: we need to include 'main' Blackfin target directory to grab private
+# headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h
new file mode 100644
index 000000000000..ec1fa8689ded
--- /dev/null
+++ b/lib/Target/Blackfin/Blackfin.h
@@ -0,0 +1,38 @@
+//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Blackfin back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_BLACKFIN_H
+#define TARGET_BLACKFIN_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+  class FunctionPass;
+  class BlackfinTargetMachine;
+
+  FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
+                                      CodeGenOpt::Level OptLevel);
+  extern Target TheBlackfinTarget;
+
+} // end namespace llvm
+
+// Defines symbolic names for Blackfin registers.  This defines a mapping from
+// register name to register number.
+#include "BlackfinGenRegisterNames.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#include "BlackfinGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Blackfin/Blackfin.td b/lib/Target/Blackfin/Blackfin.td
new file mode 100644
index 000000000000..b9046383fa6a
--- /dev/null
+++ b/lib/Target/Blackfin/Blackfin.td
@@ -0,0 +1,201 @@
+//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Blackfin Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
+    "Build for SDRAM">;
+
+def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
+    "Assume instruction cache lookaside buffers are enabled at runtime">;
+
+//===----------------------------------------------------------------------===//
+// Bugs in the silicon becomes workarounds in the compiler.
+// See http://www.analog.com/ for the full list of IC anomalies.
+//===----------------------------------------------------------------------===//
+
+def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
+    "Work around 05000074 - "
+    "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
+
+def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
+    "Work around 05000244 - "
+    "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
+
+def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
+    "Work around 05000245 - "
+    "Access in the Shadow of a Conditional Branch">;
+
+def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
+    "Work around 05000257 - "
+    "Interrupt/Exception During Short Hardware Loop">;
+
+def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
+    "Work around 05000283 - "
+    "System MMR Write Is Stalled Indefinitely when Killed">;
+
+def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
+    "Work around 05000312 - "
+    "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
+
+def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
+                                     "wa_killed_mmr", "true",
+    "Work around 05000315 - "
+    "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
+
+def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
+    "Work around 05000371 - "
+    "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
+
+def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
+    "Work around 05000426 - "
+    "Speculative Fetches of Indirect-Pointer Instructions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "BlackfinRegisterInfo.td"
+include "BlackfinCallingConv.td"
+include "BlackfinInstrInfo.td"
+
+def BlackfinInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Blackfin processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
+ : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
+
+def : Proc<"generic", "", []>;
+
+multiclass Core<string Name,string Suffix,
+                list<SubtargetFeature> Features> {
+  def : Proc<Name, Suffix, Features>;
+  def : Proc<Name, "", Features>;
+  def : Proc<Name, "-none", []>;
+}
+
+multiclass CoreEdinburgh<string Name>
+      : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
+  def : Proc<Name, "-0.5",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS]>;
+  def : Proc<Name, "-0.4",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+}
+multiclass CoreBraemar<string Name>
+       : Core<Name, "-0.3",
+         [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
+  def  : Proc<Name, "-0.2",
+         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+  def  : Proc<Name, "-any",
+         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreStirling<string Name>
+      : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.4",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMoab<string Name>
+      : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.0",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreTeton<string Name>
+      : Core<Name, "-0.5",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]> {
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreKookaburra<string Name>
+      : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMockingbird<string Name>
+      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+multiclass CoreBrodie<string Name>
+      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+
+defm BF512 : CoreBrodie<"bf512">;
+defm BF514 : CoreBrodie<"bf514">;
+defm BF516 : CoreBrodie<"bf516">;
+defm BF518 : CoreBrodie<"bf518">;
+defm BF522 : CoreMockingbird<"bf522">;
+defm BF523 : CoreKookaburra<"bf523">;
+defm BF524 : CoreMockingbird<"bf524">;
+defm BF525 : CoreKookaburra<"bf525">;
+defm BF526 : CoreMockingbird<"bf526">;
+defm BF527 : CoreKookaburra<"bf527">;
+defm BF531 : CoreEdinburgh<"bf531">;
+defm BF532 : CoreEdinburgh<"bf532">;
+defm BF533 : CoreEdinburgh<"bf533">;
+defm BF534 : CoreBraemar<"bf534">;
+defm BF536 : CoreBraemar<"bf536">;
+defm BF537 : CoreBraemar<"bf537">;
+defm BF538 : CoreStirling<"bf538">;
+defm BF539 : CoreStirling<"bf539">;
+defm BF542 : CoreMoab<"bf542">;
+defm BF544 : CoreMoab<"bf544">;
+defm BF548 : CoreMoab<"bf548">;
+defm BF549 : CoreMoab<"bf549">;
+defm BF561 : CoreTeton<"bf561">;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Blackfin : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = BlackfinInstrInfo;
+}
diff --git a/lib/Target/Blackfin/BlackfinCallingConv.td b/lib/Target/Blackfin/BlackfinCallingConv.td
new file mode 100644
index 000000000000..0abc84c3c405
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinCallingConv.td
@@ -0,0 +1,30 @@
+//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Blackfin architectures.
+//
+//===----------------------------------------------------------------------===//
+
+// Blackfin C Calling convention.
+def CC_Blackfin : CallingConv<[
+  CCIfType<[i16], CCPromoteToType<i32>>,
+  CCIfSRet<CCAssignToReg<[P0]>>,
+  CCAssignToReg<[R0, R1, R2]>,
+  CCAssignToStack<4, 4>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Blackfin C return-value convention.
+def RetCC_Blackfin : CallingConv<[
+  CCIfType<[i16], CCPromoteToType<i32>>,
+  CCAssignToReg<[R0, R1]>
+]>;
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fc62a1884b1f
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -0,0 +1,191 @@
+//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Blackfin target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinISelLowering.h"
+#include "BlackfinTargetMachine.h"
+#include "BlackfinRegisterInfo.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
+/// instructions for SelectionDAG operations.
+namespace {
+  class BlackfinDAGToDAGISel : public SelectionDAGISel {
+    /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
+    /// can make the right decision when generating code for different targets.
+    //const BlackfinSubtarget &Subtarget;
+  public:
+    BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel) {}
+
+    virtual void InstructionSelect();
+
+    virtual const char *getPassName() const {
+      return "Blackfin DAG->DAG Pattern Instruction Selection";
+    }
+
+    // Include the pieces autogenerated from the target description.
+#include "BlackfinGenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDValue Op);
+    bool SelectADDRspii(SDValue Op, SDValue Addr,
+                        SDValue &Base, SDValue &Offset);
+
+    // Walk the DAG after instruction selection, fixing register class issues.
+    void FixRegisterClasses(SelectionDAG &DAG);
+
+    const BlackfinInstrInfo &getInstrInfo() {
+      return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
+    }
+    const BlackfinRegisterInfo *getRegisterInfo() {
+      return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
+    }
+  };
+}  // end anonymous namespace
+
+FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
+                                          CodeGenOpt::Level OptLevel) {
+  return new BlackfinDAGToDAGISel(TM, OptLevel);
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void BlackfinDAGToDAGISel::InstructionSelect() {
+  // Select target instructions for the DAG.
+  SelectRoot(*CurDAG);
+  DEBUG(errs() << "Selected selection DAG before regclass fixup:\n");
+  DEBUG(CurDAG->dump());
+  FixRegisterClasses(*CurDAG);
+}
+
+SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) {
+  SDNode *N = Op.getNode();
+  DebugLoc dl = N->getDebugLoc();
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::FrameIndex: {
+    // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
+    // SP, Px
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+    return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
+                                CurDAG->getTargetConstant(0, MVT::i32));
+  }
+  }
+
+  return SelectCode(Op);
+}
+
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Op,
+                                          SDValue Addr,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  FrameIndexSDNode *FIN = 0;
+  if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
+        (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+        (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+      // Constant positive word offset from frame index
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+static inline bool isCC(const TargetRegisterClass *RC) {
+  return RC == &BF::AnyCCRegClass || BF::AnyCCRegClass.hasSubClass(RC);
+}
+
+static inline bool isDCC(const TargetRegisterClass *RC) {
+  return RC == &BF::DRegClass || BF::DRegClass.hasSubClass(RC) || isCC(RC);
+}
+
+static void UpdateNodeOperand(SelectionDAG &DAG,
+                              SDNode *N,
+                              unsigned Num,
+                              SDValue Val) {
+  SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
+  ops[Num] = Val;
+  SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size());
+  DAG.ReplaceAllUsesWith(N, New.getNode());
+}
+
+// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
+// choosing the proper register classes.
+void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
+  const BlackfinInstrInfo &TII = getInstrInfo();
+  const BlackfinRegisterInfo *TRI = getRegisterInfo();
+  DAG.AssignTopologicalOrder();
+  HandleSDNode Dummy(DAG.getRoot());
+
+  for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
+       NI != DAG.allnodes_end(); ++NI) {
+    if (NI->use_empty() || !NI->isMachineOpcode())
+      continue;
+    const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode());
+    for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
+      if (!UI->isMachineOpcode())
+        continue;
+
+      if (UI.getUse().getResNo() >= DefTID.getNumDefs())
+        continue;
+      const TargetRegisterClass *DefRC =
+        DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI);
+
+      const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode());
+      if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands())
+        continue;
+      const TargetRegisterClass *UseRC =
+        UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI);
+      if (!DefRC || !UseRC)
+        continue;
+      // We cannot copy CC <-> !(CC/D)
+      if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
+        SDNode *Copy =
+          DAG.getMachineNode(TargetInstrInfo::COPY_TO_REGCLASS,
+                             NI->getDebugLoc(),
+                             MVT::i32,
+                             UI.getUse().get(),
+                             DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
+        UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
+      }
+    }
+  }
+  DAG.setRoot(Dummy.getValue());
+}
+
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
new file mode 100644
index 000000000000..4b321ec0fda1
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -0,0 +1,614 @@
+//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Blackfin uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinISelLowering.h"
+#include "BlackfinTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  setShiftAmountType(MVT::i16);
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setStackPointerRegisterToSaveRestore(BF::SP);
+  setIntDivIsCheap(false);
+
+  // Set up the legal register classes.
+  addRegisterClass(MVT::i32, BF::DRegisterClass);
+  addRegisterClass(MVT::i16, BF::D16RegisterClass);
+
+  computeRegisterProperties();
+
+  // Blackfin doesn't have i1 loads or stores
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT,     MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
+
+  // i16 registers don't do much
+  setOperationAction(ISD::AND,   MVT::i16, Promote);
+  setOperationAction(ISD::OR,    MVT::i16, Promote);
+  setOperationAction(ISD::XOR,   MVT::i16, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+  // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
+  // immediately.
+  setOperationAction(ISD::CTLZ,  MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ,  MVT::i16, Promote);
+  setOperationAction(ISD::SETCC, MVT::i16, Promote);
+
+  // Blackfin has no division
+  setOperationAction(ISD::SDIV,    MVT::i16, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i16, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM,    MVT::i16, Expand);
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+
+  // No carry-in operations.
+  setOperationAction(ISD::ADDE, MVT::i32, Custom);
+  setOperationAction(ISD::SUBE, MVT::i32, Custom);
+
+  // Blackfin has no intrinsics for these particular operations.
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // i32 has native CTPOP, but not CTLZ/CTTZ
+  setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+
+  // READCYCLECOUNTER needs special type legalization.
+  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
+  // We don't have line number support yet.
+  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+  setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+}
+
+const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case BFISD::CALL:     return "BFISD::CALL";
+  case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
+  case BFISD::Wrapper:  return "BFISD::Wrapper";
+  }
+}
+
+MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+  // SETCC always sets the CC register. Technically that is an i1 register, but
+  // that type is not legal, so we treat it as an i32 register.
+  return MVT::i32;
+}
+
+SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                   SelectionDAG &DAG) {
+  DebugLoc DL = Op.getDebugLoc();
+  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+  Op = DAG.getTargetGlobalAddress(GV, MVT::i32);
+  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc DL = Op.getDebugLoc();
+  int JTI = cast<JumpTableSDNode>(Op)->getIndex();
+
+  Op = DAG.getTargetJumpTable(JTI, MVT::i32);
+  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue
+BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
+                                             CallingConv::ID CallConv, bool isVarArg,
+                                            const SmallVectorImpl<ISD::InputArg>
+                                               &Ins,
+                                             DebugLoc dl, SelectionDAG &DAG,
+                                             SmallVectorImpl<SDValue> &InVals) {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
+        BF::PRegisterClass : BF::DRegisterClass;
+      assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
+      assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
+
+      unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it is really passed promoted to 32
+      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
+      // right size.
+      if (VA.getLocInfo() == CCValAssign::SExt)
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::ZExt)
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+
+      if (VA.getLocInfo() != CCValAssign::Full)
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+      InVals.push_back(ArgValue);
+    } else {
+      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+      unsigned ObjSize = VA.getLocVT().getStoreSize();
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+BlackfinTargetLowering::LowerReturn(SDValue Chain,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    DebugLoc dl, SelectionDAG &DAG) {
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue Opi = Outs[i].Val;
+
+    // Expand to i32 if necessary
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    case CCValAssign::ZExt:
+      Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    case CCValAssign::AExt:
+      Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    }
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode()) {
+    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  } else {
+    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
+  }
+}
+
+SDValue
+BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  bool isTailCall,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) {
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
+  CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = Outs[i].Val;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+      int Offset = VA.getLocMemOffset();
+      assert(Offset%4 == 0 && "Unaligned LocMemOffset");
+      assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
+      SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
+      SDValue OffsetN = DAG.getIntPtrConstant(Offset);
+      OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
+                                         PseudoSourceValue::getStack(),
+                                         Offset));
+    }
+  }
+
+  // Transform all store nodes into one single node because
+  // all store nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  std::vector<EVT> NodeTys;
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
+  SDValue Ops[] = { Chain, Callee, InFlag };
+  Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
+                      InFlag.getNode() ? 3 : 2);
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs,
+                 *DAG.getContext());
+
+  RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &RV = RVLocs[i];
+    unsigned Reg = RV.getLocReg();
+
+    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+                               RVLocs[i].getLocVT(), InFlag);
+    SDValue Val = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+    Chain = Chain.getValue(1);
+
+    // Callee is responsible for extending any i16 return values.
+    switch (RV.getLocInfo()) {
+    case CCValAssign::SExt:
+      Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
+                        DAG.getValueType(RV.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
+                        DAG.getValueType(RV.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate to valtype
+    if (RV.getLocInfo() != CCValAssign::Full)
+      Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
+// add-with-carry instructions.
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) {
+  // Operands: lhs, rhs, carry-in (AC0 flag)
+  // Results: sum, carry-out (AC0 flag)
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
+
+  // zext incoming carry flag in AC0 to 32 bits
+  SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+                                       /* flag= */ Op.getOperand(2));
+  CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
+                               SDValue(CarryIn, 0));
+
+  // Add operands, produce sum and carry flag
+  SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+                                   Op.getOperand(0), Op.getOperand(1));
+
+  // Store intermediate carry from Sum
+  SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+                                      /* flag= */ SDValue(Sum, 1));
+
+  // Add incoming carry, again producing an output flag
+  Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+                           SDValue(Sum, 0), SDValue(CarryIn, 0));
+
+  // Update AC0 with the intermediate carry, producing a flag.
+  SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Flag,
+                                        SDValue(Carry1, 0));
+
+  // Compose (i32, flag) pair
+  SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
+  return DAG.getMergeValues(ops, 2, dl);
+}
+
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getOpcode()) {
+  default:
+    Op.getNode()->dump();
+    llvm_unreachable("Should not custom lower this!");
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Blackfin.");
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    // Frame & Return address.  Currently unimplemented
+  case ISD::FRAMEADDR:          return SDValue();
+  case ISD::RETURNADDR:         return SDValue();
+  case ISD::ADDE:
+  case ISD::SUBE:               return LowerADDE(Op, DAG);
+  }
+}
+
+void
+BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::READCYCLECOUNTER: {
+    // The low part of the cycle counter is in CYCLES, the high part in
+    // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
+    // CYCLES2 last.
+    SDValue TheChain = N->getOperand(0);
+    SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
+    SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
+    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
+    // Outgoing chain. If we were to use the chain from lo instead, it would be
+    // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
+    // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
+    // read beyond the next CYCLES read, leading to invalid results.
+    Results.push_back(hi.getValue(1));
+    return;
+  }
+  }
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned BlackfinTargetLowering::getFunctionAlignment(const Function *F) const {
+  return 2;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Blackfin Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+BlackfinTargetLowering::ConstraintType
+BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() != 1)
+    return TargetLowering::getConstraintType(Constraint);
+
+  switch (Constraint[0]) {
+    // Standard constraints
+  case 'r':
+    return C_RegisterClass;
+
+    // Blackfin-specific constraints
+  case 'a':
+  case 'd':
+  case 'z':
+  case 'D':
+  case 'W':
+  case 'e':
+  case 'b':
+  case 'v':
+  case 'f':
+  case 'c':
+  case 't':
+  case 'u':
+  case 'k':
+  case 'x':
+  case 'y':
+  case 'w':
+    return C_RegisterClass;
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'Z':
+  case 'Y':
+    return C_Register;
+  }
+
+  // Not implemented: q0-q7, qA. Use {R2} etc instead
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
+/// constraint.
+std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
+  using namespace BF;
+
+  if (Constraint.size() != 1)
+    return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  switch (Constraint[0]) {
+    // Standard constraints
+  case 'r':
+    return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
+
+    // Blackfin-specific constraints
+  case 'a': return Pair(0U, PRegisterClass);
+  case 'd': return Pair(0U, DRegisterClass);
+  case 'e': return Pair(0U, AccuRegisterClass);
+  case 'A': return Pair(A0, AccuRegisterClass);
+  case 'B': return Pair(A1, AccuRegisterClass);
+  case 'b': return Pair(0U, IRegisterClass);
+  case 'v': return Pair(0U, BRegisterClass);
+  case 'f': return Pair(0U, MRegisterClass);
+  case 'C': return Pair(CC, JustCCRegisterClass);
+  case 'x': return Pair(0U, GRRegisterClass);
+  case 'w': return Pair(0U, ALLRegisterClass);
+  case 'Z': return Pair(P3, PRegisterClass);
+  case 'Y': return Pair(P1, PRegisterClass);
+  }
+
+  // Not implemented: q0-q7, qA. Use {R2} etc instead.
+  // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to
+  // getRegClassForInlineAsmConstraint()
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> BlackfinTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  using namespace BF;
+
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+  case 'z': return make_vector<unsigned>(P0, P1, P2, 0);
+  case 'D': return make_vector<unsigned>(R0, R2, R4, R6, 0);
+  case 'W': return make_vector<unsigned>(R1, R3, R5, R7, 0);
+  case 'c': return make_vector<unsigned>(I0, I1, I2, I3,
+                                         B0, B1, B2, B3,
+                                         L0, L1, L2, L3, 0);
+  case 't': return make_vector<unsigned>(LT0, LT1, 0);
+  case 'u': return make_vector<unsigned>(LB0, LB1, 0);
+  case 'k': return make_vector<unsigned>(LC0, LC1, 0);
+  case 'y': return make_vector<unsigned>(RETS, RETN, RETI, RETX, RETE,
+                                         ASTAT, SEQSTAT, USP, 0);
+  }
+
+  return std::vector<unsigned>();
+}
+
+bool BlackfinTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Blackfin target isn't yet aware of offsets.
+  return false;
+}
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
new file mode 100644
index 000000000000..cdbc7d258c31
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -0,0 +1,81 @@
+//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Blackfin uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_ISELLOWERING_H
+#define BLACKFIN_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Blackfin.h"
+
+namespace llvm {
+
+  namespace BFISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      CALL,                     // A call instruction.
+      RET_FLAG,                 // Return with a flag operand.
+      Wrapper                   // Address wrapper
+    };
+  }
+
+  class BlackfinTargetLowering : public TargetLowering {
+    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
+  public:
+    BlackfinTargetLowering(TargetMachine &TM);
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual void ReplaceNodeResults(SDNode *N,
+                                    SmallVectorImpl<SDValue> &Results,
+                                    SelectionDAG &DAG);
+
+    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                      EVT VT) const;
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    const char *getTargetNodeName(unsigned Opcode) const;
+    unsigned getFunctionAlignment(const Function *F) const;
+
+  private:
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG);
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+  };
+} // end namespace llvm
+
+#endif    // BLACKFIN_ISELLOWERING_H
diff --git a/lib/Target/Blackfin/BlackfinInstrFormats.td b/lib/Target/Blackfin/BlackfinInstrFormats.td
new file mode 100644
index 000000000000..d8e6e252e787
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinInstrFormats.td
@@ -0,0 +1,34 @@
+//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "BF";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+// Single-word (16-bit) instructions
+class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstBfin<outs, ins, asmstr, pattern> {
+}
+
+// Double-word (32-bit) instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstBfin<outs, ins, asmstr, pattern> {
+}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
new file mode 100644
index 000000000000..3fd5d4dc0bf1
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -0,0 +1,280 @@
+//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "BlackfinGenInstrInfo.inc"
+
+using namespace llvm;
+
+BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
+  : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)),
+    RI(ST, *this),
+    Subtarget(ST) {}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI,
+                                    unsigned &SrcReg,
+                                    unsigned &DstReg,
+                                    unsigned &SrcSR,
+                                    unsigned &DstSR) const {
+  SrcSR = DstSR = 0; // No sub-registers.
+  switch (MI.getOpcode()) {
+  case BF::MOVE:
+  case BF::MOVE_ncccc:
+  case BF::MOVE_ccncc:
+  case BF::MOVECC_zext:
+  case BF::MOVECC_nz:
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
+    return true;
+  case BF::SLL16i:
+    if (MI.getOperand(2).getImm()!=0)
+      return false;
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case BF::LOAD32fi:
+  case BF::LOAD16fi:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case BF::STORE32fi:
+  case BF::STORE16fi:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned BlackfinInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+             MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond) const {
+  // FIXME this should probably have a DebugLoc operand
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, dl, get(BF::JUMPa)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  llvm_unreachable("Implement conditional branches!");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+                    unsigned Reg,
+                    const TargetRegisterClass *RC) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Test.contains(Reg);
+  else
+    return &Test==RC || Test.hasSubClass(RC);
+}
+
+bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     unsigned DestReg,
+                                     unsigned SrcReg,
+                                     const TargetRegisterClass *DestRC,
+                                     const TargetRegisterClass *SrcRC) const {
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+
+  if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
+      inClass(BF::ALLRegClass, SrcReg,  SrcRC)) {
+    BuildMI(MBB, I, dl, get(BF::MOVE), DestReg).addReg(SrcReg);
+    return true;
+  }
+
+  if (inClass(BF::D16RegClass, DestReg, DestRC) &&
+      inClass(BF::D16RegClass, SrcReg,  SrcRC)) {
+    BuildMI(MBB, I, dl, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0);
+    return true;
+  }
+
+  if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) &&
+      inClass(BF::DRegClass, DestReg, DestRC)) {
+    if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) {
+      BuildMI(MBB, I, dl, get(BF::MOVENCC_z), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, dl, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
+    } else {
+      BuildMI(MBB, I, dl, get(BF::MOVECC_zext), DestReg).addReg(SrcReg);
+    }
+    return true;
+  }
+
+  if (inClass(BF::AnyCCRegClass, DestReg, DestRC) &&
+      inClass(BF::DRegClass, SrcReg,  SrcRC)) {
+    if (inClass(BF::NotCCRegClass, DestReg, DestRC))
+      BuildMI(MBB, I, dl, get(BF::SETEQri_not), DestReg).addReg(SrcReg);
+    else
+      BuildMI(MBB, I, dl, get(BF::MOVECC_nz), DestReg).addReg(SrcReg);
+    return true;
+  }
+
+  if (inClass(BF::NotCCRegClass, DestReg, DestRC) &&
+      inClass(BF::JustCCRegClass, SrcReg,  SrcRC)) {
+    BuildMI(MBB, I, dl, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg);
+    return true;
+  }
+
+  if (inClass(BF::JustCCRegClass, DestReg, DestRC) &&
+      inClass(BF::NotCCRegClass, SrcReg,  SrcRC)) {
+    BuildMI(MBB, I, dl, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg);
+    return true;
+  }
+
+  llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+
+                    SrcRC->getName() + " -> " + DestRC->getName()).c_str());
+  return false;
+}
+
+void
+BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned SrcReg,
+                                       bool isKill,
+                                       int FI,
+                                       const TargetRegisterClass *RC) const {
+  DebugLoc DL = I != MBB.end() ?
+    I->getDebugLoc() : DebugLoc::getUnknownLoc();
+
+  if (inClass(BF::DPRegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE32fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::D16RegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE16fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE8fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
+                    RC->getName()).c_str());
+}
+
+void BlackfinInstrInfo::
+storeRegToAddr(MachineFunction &MF,
+               unsigned SrcReg,
+               bool isKill,
+               SmallVectorImpl<MachineOperand> &Addr,
+               const TargetRegisterClass *RC,
+               SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("storeRegToAddr not implemented");
+}
+
+void
+BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned DestReg,
+                                        int FI,
+                                        const TargetRegisterClass *RC) const {
+  DebugLoc DL = I != MBB.end() ?
+    I->getDebugLoc() : DebugLoc::getUnknownLoc();
+  if (inClass(BF::DPRegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::D16RegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  llvm_unreachable("Cannot load regclass from stack slot");
+}
+
+void BlackfinInstrInfo::
+loadRegFromAddr(MachineFunction &MF,
+                unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("loadRegFromAddr not implemented");
+}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
new file mode 100644
index 000000000000..ea3429c1014a
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -0,0 +1,80 @@
+//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFININSTRUCTIONINFO_H
+#define BLACKFININSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "BlackfinRegisterInfo.h"
+
+namespace llvm {
+
+  class BlackfinInstrInfo : public TargetInstrInfoImpl {
+    const BlackfinRegisterInfo RI;
+    const BlackfinSubtarget& Subtarget;
+  public:
+    explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
+
+    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+    /// such, whenever a client has an instance of instruction info, it should
+    /// always be able to get register info as well (through this method).
+    virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
+
+    virtual bool isMoveInstr(const MachineInstr &MI,
+                             unsigned &SrcReg, unsigned &DstReg,
+                             unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+    virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                         int &FrameIndex) const;
+
+    virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                        int &FrameIndex) const;
+
+    virtual unsigned
+    InsertBranch(MachineBasicBlock &MBB,
+                 MachineBasicBlock *TBB,
+                 MachineBasicBlock *FBB,
+                 const SmallVectorImpl<MachineOperand> &Cond) const;
+
+    virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DestReg, unsigned SrcReg,
+                              const TargetRegisterClass *DestRC,
+                              const TargetRegisterClass *SrcRC) const;
+
+    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned SrcReg, bool isKill,
+                                     int FrameIndex,
+                                     const TargetRegisterClass *RC) const;
+
+    virtual void storeRegToAddr(MachineFunction &MF,
+                                unsigned SrcReg, bool isKill,
+                                SmallVectorImpl<MachineOperand> &Addr,
+                                const TargetRegisterClass *RC,
+                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned DestReg, int FrameIndex,
+                                      const TargetRegisterClass *RC) const;
+
+    virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
new file mode 100644
index 000000000000..934b18864cb5
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -0,0 +1,873 @@
+//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Blackfin instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "BlackfinInstrFormats.td"
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_BfinCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
+                              [SDNPHasChain, SDNPOutFlag]>;
+def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
+                    [SDNPHasChain, SDNPOptInFlag]>;
+
+def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Transformations
+//===----------------------------------------------------------------------===//
+
+def trailingZeros_xform : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+                                   MVT::i32);
+}]>;
+
+def trailingOnes_xform : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+                                   MVT::i32);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediates
+//===----------------------------------------------------------------------===//
+
+def imm3  : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUint<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUint<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUint<5>(N->getZExtValue());}]>;
+
+def uimm5m2 : PatLeaf<(imm), [{
+    uint64_t value = N->getZExtValue();
+    return value % 2 == 0 && isUint<5>(value);
+}]>;
+
+def uimm6m4 : PatLeaf<(imm), [{
+    uint64_t value = N->getZExtValue();
+    return value % 4 == 0 && isUint<6>(value);
+}]>;
+
+def imm7   : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
+def imm16  : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUint<16>(N->getZExtValue());}]>;
+
+def ximm16 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value < (1<<16) && value >= -(1<<15);
+}]>;
+
+def imm17m2 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value % 2 == 0 && isInt<17>(value);
+}]>;
+
+def imm18m4 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value % 4 == 0 && isInt<18>(value);
+}]>;
+
+// 32-bit bitmask transformed to a bit number
+def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
+    return isPowerOf2_32(N->getZExtValue());
+}], trailingZeros_xform>;
+
+// 32-bit inverse bitmask transformed to a bit number
+def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
+    return isPowerOf2_32(~N->getZExtValue());
+}], trailingOnes_xform>;
+
+//===----------------------------------------------------------------------===//
+// Operands
+//===----------------------------------------------------------------------===//
+
+def calltarget : Operand<iPTR>;
+
+def brtarget : Operand<OtherVT>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+  let PrintMethod = "printMemoryOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstBfin<outs, ins, asmstr, pattern>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                              "${:comment}ADJCALLSTACKDOWN $amt",
+                              [(BfinCallseqStart timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "${:comment}ADJCALLSTACKUP $amt1 $amt2",
+                            [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-9. Program Flow Control Instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+
+let isIndirectBranch = 1 in
+def JUMPp : F1<(outs), (ins P:$target),
+               "JUMP ($target);",
+               [(brind P:$target)]>;
+
+// TODO JUMP (PC-P)
+
+// NOTE: assembler chooses between JUMP.S and JUMP.L
+def JUMPa : F1<(outs), (ins brtarget:$target),
+               "jump $target;",
+               [(br bb:$target)]>;
+
+def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
+               "if $cc jump $target;",
+               [(brcond AnyCC:$cc, bb:$target)]>;
+}
+
+let isCall = 1,
+    Defs   = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
+def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
+              "call $func;", []>;
+def CALLp: F1<(outs), (ins P:$func, variable_ops),
+              "call ($func);", [(BfinCall P:$func)]>;
+}
+
+let isReturn     = 1,
+    isTerminator = 1,
+    Uses         = [RETS] in
+def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-10. Load / Store Instructions
+//===----------------------------------------------------------------------===//
+
+// Immediate constant loads
+
+// sext immediate, i32 D/P regs
+def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
+                 "$dst = $src (x);",
+                 [(set DP:$dst, imm7:$src)]>;
+
+// zext immediate, i32 reg groups 0-3
+def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+                   "$dst = $src (z);",
+                   [(set GR:$dst, uimm16:$src)]>;
+
+// sext immediate, i32 reg groups 0-3
+def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+                  "$dst = $src (x);",
+                  [(set GR:$dst, imm16:$src)]>;
+
+// Pseudo-instruction for loading a general 32-bit constant.
+def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+                      "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
+                      [(set GR:$dst, imm:$src)]>;
+
+def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+                      "$dst.h = $src; $dst.l = $src;", []>;
+
+
+// 16-bit immediate, i16 reg groups 0-3
+def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
+                 "$dst = $src;", []>;
+
+def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
+          (LOAD32sym tglobaladdr:$addr)>;
+
+def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
+          (LOAD32sym tjumptable:$addr)>;
+
+// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
+// emit GR16 instructions. As a hack, we use this fake instruction instead.
+def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
+                    "$dst = $src;",
+                    [(set D16:$dst, ximm16:$src)]>;
+
+// Memory loads with patterns
+
+def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
+                "$dst = [$ptr];",
+                [(set DP:$dst, (load P:$ptr))]>;
+
+// Pseudo-instruction for loading a stack slot
+def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
+                     "${:comment}FI $dst = [$mem];",
+                     [(set DP:$dst, (load ADDRspii:$mem))]>;
+
+// Note: Expands to multiple insns
+def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
+                     "${:comment}FI $dst = [$mem];",
+                     [(set D16:$dst, (load ADDRspii:$mem))]>;
+
+// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
+// Replaced with Load D + CC=D
+def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
+                    "${:comment}FI $dst = B[$mem];",
+                    [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
+
+def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+                        "$dst = [$ptr + $off];",
+                        [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
+
+def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = [$ptr + $off];",
+                         [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
+
+def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
+                    "$dst = W[$ptr] (z);",
+                    [(set D:$dst, (zextloadi16 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
+
+def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (z);",
+                            [(set D:$dst, (zextloadi16 (add P:$ptr,
+                                                        uimm5m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
+          (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (z);",
+                            [(set D:$dst,
+                                  (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
+          (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
+                    "$dst = w[$ptr] (x);",
+                    [(set D:$dst, (sextloadi16 P:$ptr))]>;
+
+def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (x);",
+                            [(set D:$dst,
+                                  (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
+
+def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (x);",
+                            [(set D:$dst,
+                                  (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
+                "$dst = w[$ptr];",
+                [(set D16:$dst, (load PI:$ptr))]>;
+
+def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
+                   "$dst = B[$ptr] (z);",
+                   [(set D:$dst, (zextloadi8 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
+def : Pat<(i16 (extloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+def : Pat<(i16 (zextloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+
+def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = b[$ptr + $off] (z);",
+                         [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
+          (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
+def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+                           bfin_subreg_lo16)>;
+def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+                           bfin_subreg_lo16)>;
+
+def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
+                   "$dst = b[$ptr] (x);",
+                   [(set D:$dst, (sextloadi8 P:$ptr))]>;
+
+def : Pat<(i16 (sextloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>;
+
+def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = b[$ptr + $off] (x);",
+                         [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
+                           bfin_subreg_lo16)>;
+// Memory loads without patterns
+
+let mayLoad = 1 in {
+
+multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
+                       string mem="", string suf=";"> {
+  def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+                !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
+  def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+                !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
+}
+multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
+                           string mem="", string suf=";">
+         : LOAD_incdec<drc, prc, mem, suf> {
+  def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
+                 !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
+}
+
+defm LOAD32p:    LOAD_incdec<DP, P>;
+defm LOAD32i:    LOAD_incdec<D, I>;
+defm LOAD8z32p:  LOAD_incdec<D, P, "b", " (z);">;
+defm LOAD8s32p:  LOAD_incdec<D, P, "b", " (x);">;
+defm LOADhi:     LOAD_incdec<D16, I, "w">;
+defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
+defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
+
+def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+                     "$dst = [$ptr ++ $off];", []>;
+
+// Note: $fp MUST be FP
+def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
+                         "$dst = [$fp - $off];", []>;
+
+def LOAD32i:      F1<(outs D:$dst), (ins I:$ptr),
+                     "$dst = [$ptr];", []>;
+def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
+                     "$dst = [$ptr ++ $off];", []>;
+
+
+
+def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+                    "$dst = w[$ptr ++ $off];", []>;
+
+
+}
+
+// Memory stores with patterns
+def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
+                 "[$ptr] = $val;",
+                 [(store DP:$val, P:$ptr)]>;
+
+// Pseudo-instructions for storing to a stack slot
+def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
+                      "${:comment}FI [$mem] = $val;",
+                      [(store DP:$val, ADDRspii:$mem)]>;
+
+// Note: This stack-storing pseudo-instruction is expanded to multiple insns
+def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
+                  "${:comment}FI [$mem] = $val;",
+                  [(store D16:$val, ADDRspii:$mem)]>;
+
+// Pseudo-instructions for storing AnyCC register to a stack slot.
+// Replaced with D=CC + STORE byte
+def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
+                      "${:comment}FI b[$mem] = $val;",
+                      [(store AnyCC:$val, ADDRspii:$mem)]>;
+
+def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+                 "[$ptr + $off] = $val;",
+                 [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
+
+def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+                 "[$ptr + $off] = $val;",
+                 [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
+
+def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
+                  "w[$ptr] = $val;",
+                  [(store D16:$val, PI:$ptr)]>;
+
+def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
+                "b[$ptr] = $val;",
+                [(truncstorei8 D:$val, P:$ptr)]>;
+
+def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
+                 "b[$ptr + $off] = $val;",
+                 [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
+
+let Constraints = "$ptr = $ptr_wb" in {
+
+multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
+                        int off=4, string pre=""> {
+  def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+                !strconcat(pre, "[$ptr++] = $val;"),
+                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
+  def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+                !strconcat(pre, "[$ptr--] = $val;"),
+                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
+                                               (ineg off)))]>;
+}
+
+defm STORE32p: STORE_incdec<DP, P>;
+defm STORE16i: STORE_incdec<D16, I, 2, "w">;
+defm STORE8p:  STORE_incdec<D, P, 1, "b">;
+
+def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
+                      "[$ptr ++ $off] = $val;",
+                      [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
+
+def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
+                      "w[$ptr ++ $off] = $val;",
+                      [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
+}
+
+// Memory stores without patterns
+
+let mayStore = 1 in {
+
+// Note: only works for $fp == FP
+def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
+                         "[$fp - $off] = $val;", []>;
+
+def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
+                 "[$ptr] = $val;", []>;
+
+def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+                 "[$ptr++] = $val;", []>;
+
+def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+                 "[$ptr--] = $val;", []>;
+
+def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
+                      "[$ptr ++ $off] = $val;", []>;
+}
+
+def : Pat<(truncstorei16 D:$val, PI:$ptr),
+          (STORE16pi (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$val, D),
+                                     bfin_subreg_lo16), PI:$ptr)>;
+
+def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
+          (STORE16pi (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$val, D),
+                                     bfin_subreg_hi16), PI:$ptr)>;
+
+def : Pat<(truncstorei8 D16L:$val, P:$ptr),
+          (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                  (COPY_TO_REGCLASS D16L:$val, D16L),
+                                  bfin_subreg_lo16),
+                   P:$ptr)>;
+
+//===----------------------------------------------------------------------===//
+// Table C-11. Move Instructions.
+//===----------------------------------------------------------------------===//
+
+def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
+             "$dst = $src;",
+             []>;
+
+let isTwoAddress = 1 in
+def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
+               "if $cc $dst = $src2;",
+               [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
+
+let Defs = [AZ, AN, AC0, V] in {
+def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
+                 "$dst = $src (z);",
+                 [(set D:$dst, (zext D16L:$src))]>;
+
+def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
+                 "$dst = $src (x);",
+                 [(set D:$dst, (sext D16L:$src))]>;
+
+def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
+                  "$dst = $src.b (z);",
+                  [(set D:$dst, (and D:$src, 0xff))]>;
+
+def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
+                  "$dst = $src.b (x);",
+                  [(set D:$dst, (sext_inreg D:$src, i8))]>;
+
+}
+
+def : Pat<(sext_inreg D16L:$src, i8),
+          (EXTRACT_SUBREG (MOVEsext8
+                           (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                          D16L:$src,
+                                          bfin_subreg_lo16)),
+                          bfin_subreg_lo16)>;
+
+def : Pat<(sext_inreg D:$src, i16),
+          (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+
+def : Pat<(and D:$src, 0xffff),
+          (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+
+def : Pat<(i32 (anyext D16L:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                         (COPY_TO_REGCLASS D16L:$src, D16L),
+                         bfin_subreg_lo16)>;
+
+// TODO Dreg = Dreg_byte (X/Z)
+
+// TODO Accumulator moves
+
+//===----------------------------------------------------------------------===//
+// Table C-12. Stack Control Instructions
+//===----------------------------------------------------------------------===//
+
+let Uses = [SP], Defs = [SP] in {
+def PUSH: F1<(outs), (ins ALL:$src),
+             "[--sp] = $src;", []> { let mayStore = 1; }
+
+// NOTE: POP does not work for DP regs, use LOAD instead
+def POP:  F1<(outs ALL:$dst), (ins),
+             "$dst = [sp++];", []> { let mayLoad = 1; }
+}
+
+// TODO: push/pop multiple
+
+def LINK: F2<(outs), (ins i32imm:$amount),
+             "link $amount;", []>;
+
+def UNLINK: F2<(outs), (ins),
+               "unlink;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-13. Control Code Bit Management Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
+  def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              [(set JustCC:$cc, (opnode  D:$a, D:$b))]>;
+
+  def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              [(set JustCC:$cc, (opnode  DP:$a, imm3:$b))]>;
+
+  def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              []>;
+
+  def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
+                  !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+                  [(set NotCC:$cc, (invnode  DP:$a, imm3:$b))]>;
+}
+
+defm SETEQ  : SETCC<seteq,  setne,  "==">;
+defm SETLT  : SETCC<setlt,  setge,  "<">;
+defm SETLE  : SETCC<setle,  setgt,  "<=">;
+defm SETULT : SETCC<setult, setuge, "<",  " (iu);">;
+defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
+
+def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
+                 "cc = $a == $b;",
+                 [(set NotCC:$cc, (setne  D:$a, D:$b))]>;
+
+def : Pat<(setgt  D:$a, D:$b), (SETLTdd  D:$b, D:$a)>;
+def : Pat<(setge  D:$a, D:$b), (SETLEdd  D:$b, D:$a)>;
+def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
+def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
+
+// TODO: compare pointer for P-P comparisons
+// TODO: compare accumulator
+
+let Defs = [AC0] in
+def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
+                   "ac0 \\|= cc;", []>;
+
+let Uses = [AC0] in
+def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
+                   "cc = ac0;", []>;
+
+def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
+                    "cc = !cc;", []>;
+
+def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
+                    "cc = !cc;", []>;
+
+def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
+                      "$dst = $cc;",
+                      [(set D:$dst, (zext JustCC:$cc))]>;
+
+def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
+                   "$dst = cc;", []>;
+
+def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
+                   "cc = $src;",
+                   [(set AnyCC:$cc, (setne D:$src, 0))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-14. Logical Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 & $src2;",
+            [(set D:$dst, (and D:$src1, D:$src2))]>;
+
+def NOT: F1<(outs D:$dst), (ins D:$src),
+            "$dst = ~$src;",
+            [(set D:$dst, (not D:$src))]>;
+
+def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+           "$dst = $src1 \\| $src2;",
+           [(set D:$dst, (or D:$src1, D:$src2))]>;
+
+def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 ^ $src2;",
+            [(set D:$dst, (xor D:$src1, D:$src2))]>;
+
+// missing: BXOR, BXORSHIFT
+
+//===----------------------------------------------------------------------===//
+// Table C-15. Bit Operations Instructions
+//===----------------------------------------------------------------------===//
+
+let isTwoAddress = 1 in {
+def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
+              "bitclr($dst, $src2);",
+              [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
+
+def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+              "bitset($dst, $src2);",
+              [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
+
+def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+              "bittgl($dst, $src2);",
+              [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
+}
+
+def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+              "cc = bittst($src1, $src2);",
+              [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
+                                       (i32 0)))]>;
+
+def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+               "cc = !bittst($src1, $src2);",
+               [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
+                                        (i32 0)))]>;
+
+// TODO: DEPOSIT, EXTRACT, BITMUX
+
+def ONES: F2<(outs D16L:$dst), (ins D:$src),
+              "$dst = ones $src;",
+              [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
+
+def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Table C-16. Shift / Rotate Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHIFT32<SDNode opnode, string ops> {
+  def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
+             !subst("XX", ops, "$dst XX= $amount;"),
+             [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
+  def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
+             !subst("XX", ops, "$dst XX= $amount;"),
+             [(set D:$dst, (opnode D:$src, D:$amount))]>;
+}
+
+let Defs = [AZ, AN, V, VS],
+    isTwoAddress = 1 in {
+defm SRA : SHIFT32<sra, ">>>">;
+defm SRL : SHIFT32<srl, ">>">;
+defm SLL : SHIFT32<shl, "<<">;
+}
+
+// TODO: automatic switching between 2-addr and 3-addr (?)
+
+let Defs = [AZ, AN, V, VS] in {
+def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+             "$dst = lshift $src by $amount;",
+             [(set D:$dst, (shl D:$src, D16L:$amount))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+             "$dst = ashift $src by $amount;",
+             [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
+
+def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src >>> $amount;",
+              [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
+
+def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src >> $amount;",
+              [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+              "$dst = ashift $src BY $amount;",
+              [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
+
+def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src << $amount;",
+              [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
+
+def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+              "$dst = lshift $src by $amount;",
+              [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-17. Arithmetic Operations Instructions
+//===----------------------------------------------------------------------===//
+
+// TODO: ABS
+
+let Defs = [AZ, AN, AC0, V, VS] in {
+
+def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 + $src2;",
+            [(set D:$dst, (add D:$src1, D:$src2))]>;
+
+def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 + $src2;",
+              [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
+
+let isTwoAddress = 1 in
+def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
+                "$dst += $src2;",
+                [(set D:$dst, (add D:$src1, imm7:$src2))]>;
+
+def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 - $src2;",
+            [(set D:$dst, (sub D:$src1, D:$src2))]>;
+
+def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 - $src2;",
+              [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
+
+}
+
+def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
+def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
+
+let Defs = [AZ, AN, V, VS] in
+def NEG: F1<(outs D:$dst), (ins D:$src),
+            "$dst = -$src;",
+            [(set D:$dst, (ineg D:$src))]>;
+
+// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
+def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
+              "$dst = $src1 + $src2;", []>;
+
+let isTwoAddress = 1 in
+def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
+                "$dst += $src2;", []>;
+
+let Defs = [AZ, AN, V] in
+def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
+                  "$dst = $src1 + $src2 (rnd20);", []>;
+
+let Defs = [V, VS] in {
+def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 * $src2 (is);",
+              [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
+
+def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (ih);",
+                [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
+
+def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (is);",
+                [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
+
+def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (is);",
+                [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
+}
+
+
+let isTwoAddress = 1 in
+def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst *= $src2;",
+            [(set D:$dst, (mul D:$src1, D:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-18. External Exent Management Instructions
+//===----------------------------------------------------------------------===//
+
+def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
+def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
+def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
+def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
+def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
+def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
+def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
+def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
+def NOP : F1<(outs), (ins), "nop;", []>;
+def MNOP : F2<(outs), (ins), "mnop;", []>;
+def ABORT : F1<(outs), (ins), "abort;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-19. Cache Control Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Table C-20. Video Pixel Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                "$dst = align8($src1, $src2);",
+                [(set D:$dst, (or (shl D:$src1, (i32 24)),
+                                  (srl D:$src2, (i32 8))))]>;
+
+def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                 "$dst = align16($src1, $src2);",
+                 [(set D:$dst, (or (shl D:$src1, (i32 16)),
+                                   (srl D:$src2, (i32 16))))]>;
+
+def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                 "$dst = align16($src1, $src2);",
+                 [(set D:$dst, (or (shl D:$src1, (i32 8)),
+                                   (srl D:$src2, (i32 24))))]>;
+
+def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
+
+// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
+//       BYTEPACK, BYTEUNPACK
+
+// Table C-21. Vector Operations Instructions
+
+// Patterns
+def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
+          (CALLa tglobaladdr:$dst)>;
+def : Pat<(BfinCall (i32 texternalsym:$dst)),
+          (CALLa texternalsym:$dst)>;
+
+def : Pat<(sext JustCC:$cc),
+          (NEG (MOVECC_zext JustCC:$cc))>;
+def : Pat<(anyext JustCC:$cc),
+          (MOVECC_zext JustCC:$cc)>;
+def : Pat<(i16 (zext JustCC:$cc)),
+          (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
+def : Pat<(i16 (sext JustCC:$cc)),
+          (EXTRACT_SUBREG (NEG (MOVECC_zext JustCC:$cc)), bfin_subreg_lo16)>;
+def : Pat<(i16 (anyext JustCC:$cc)),
+          (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
+
+def : Pat<(i16 (trunc D:$src)),
+          (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$src, D), bfin_subreg_lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
new file mode 100644
index 000000000000..6d0f66cd7a5d
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
@@ -0,0 +1,21 @@
+//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the BlackfinMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCAsmInfo.h"
+
+using namespace llvm;
+
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) {
+  GlobalPrefix = "_";
+  CommentString = "//";
+}
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
new file mode 100644
index 000000000000..0efc29523067
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BlackfinMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETASMINFO_H
+#define BLACKFINTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct BlackfinMCAsmInfo : public MCAsmInfo {
+    explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
new file mode 100644
index 000000000000..8c0a58aca41e
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -0,0 +1,472 @@
+//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinRegisterInfo.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
+                                           const TargetInstrInfo &tii)
+  : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
+    Subtarget(st),
+    TII(tii) {}
+
+const unsigned*
+BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  using namespace BF;
+  static const unsigned CalleeSavedRegs[] = {
+    FP,
+    R4, R5, R6, R7,
+    P3, P4, P5,
+    0 };
+  return  CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const *BlackfinRegisterInfo::
+getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  using namespace BF;
+  static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+    &PRegClass,
+    &DRegClass, &DRegClass, &DRegClass, &DRegClass,
+    &PRegClass, &PRegClass, &PRegClass,
+    0 };
+  return CalleeSavedRegClasses;
+}
+
+BitVector
+BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  using namespace BF;
+  BitVector Reserved(getNumRegs());
+  Reserved.set(AZ);
+  Reserved.set(AN);
+  Reserved.set(AQ);
+  Reserved.set(AC0);
+  Reserved.set(AC1);
+  Reserved.set(AV0);
+  Reserved.set(AV0S);
+  Reserved.set(AV1);
+  Reserved.set(AV1S);
+  Reserved.set(V);
+  Reserved.set(VS);
+  Reserved.set(CYCLES).set(CYCLES2);
+  Reserved.set(L0);
+  Reserved.set(L1);
+  Reserved.set(L2);
+  Reserved.set(L3);
+  Reserved.set(SP);
+  Reserved.set(RETS);
+  if (hasFP(MF))
+    Reserved.set(FP);
+  return Reserved;
+}
+
+const TargetRegisterClass*
+BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
+  assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+  // Pick the smallest register class of the right type that contains
+  // this physreg.
+  const TargetRegisterClass* BestRC = 0;
+  for (regclass_iterator I = regclass_begin(), E = regclass_end();
+       I != E; ++I) {
+    const TargetRegisterClass* RC = *I;
+    if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+        (!BestRC || RC->getNumRegs() < BestRC->getNumRegs()))
+      BestRC = RC;
+  }
+
+  assert(BestRC && "Couldn't find the register class");
+  return BestRC;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return NoFramePointerElim || MFI->hasCalls() || MFI->hasVarSizedObjects();
+}
+
+bool BlackfinRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+// Emit instructions to add delta to D/P register. ScratchReg must be of the
+// same class as Reg (P).
+void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          DebugLoc DL,
+                                          unsigned Reg,
+                                          unsigned ScratchReg,
+                                          int delta) const {
+  if (!delta)
+    return;
+  if (isInt<7>(delta)) {
+    BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
+      .addReg(Reg)              // No kill on two-addr operand
+      .addImm(delta);
+    return;
+  }
+
+  // We must load delta into ScratchReg and add that.
+  loadConstant(MBB, I, DL, ScratchReg, delta);
+  if (BF::PRegClass.contains(Reg)) {
+    assert(BF::PRegClass.contains(ScratchReg) &&
+           "ScratchReg must be a P register");
+    BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill);
+  } else {
+    assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
+    assert(BF::DRegClass.contains(ScratchReg) &&
+           "ScratchReg must be a D register");
+    BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill);
+  }
+}
+
+// Emit instructions to load a constant into D/P register
+void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        DebugLoc DL,
+                                        unsigned Reg,
+                                        int value) const {
+  if (isInt<7>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
+    return;
+  }
+
+  if (isUint<16>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
+    return;
+  }
+
+  if (isInt<16>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
+    return;
+  }
+
+  // We must split into halves
+  BuildMI(MBB, I, DL,
+          TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16))
+    .addImm((value >> 16) & 0xffff)
+    .addReg(Reg, RegState::ImplicitDefine);
+  BuildMI(MBB, I, DL,
+          TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16))
+    .addImm(value & 0xffff)
+    .addReg(Reg, RegState::ImplicitKill)
+    .addReg(Reg, RegState::ImplicitDefine);
+}
+
+void BlackfinRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+    if (Amount != 0) {
+      assert(Amount%4 == 0 && "Unaligned call frame size");
+      if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
+        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
+      } else {
+        assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
+               "Unknown call frame pseudo instruction");
+        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static unsigned findScratchRegister(MachineBasicBlock::iterator II,
+                                    RegScavenger *RS,
+                                    const TargetRegisterClass *RC,
+                                    int SPAdj) {
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  return Reg;
+}
+
+unsigned
+BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj, int *Value,
+                                          RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned FIPos;
+  for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
+    assert(FIPos < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+  int FrameIndex = MI.getOperand(FIPos).getIndex();
+  assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
+    + MI.getOperand(FIPos+1).getImm();
+  unsigned BaseReg = BF::FP;
+  if (hasFP(MF)) {
+    assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
+  } else {
+    BaseReg = BF::SP;
+    Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
+  }
+
+  bool isStore = false;
+
+  switch (MI.getOpcode()) {
+  case BF::STORE32fi:
+    isStore = true;
+  case BF::LOAD32fi: {
+    assert(Offset%4 == 0 && "Unaligned i32 stack access");
+    assert(FIPos==1 && "Bad frame index operand");
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    if (isUint<6>(Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32p_uimm6m4
+                         : BF::LOAD32p_uimm6m4));
+      return 0;
+    }
+    if (BaseReg == BF::FP && isUint<7>(-Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32fp_nimm7m4
+                         : BF::LOAD32fp_nimm7m4));
+      MI.getOperand(FIPos+1).setImm(-Offset);
+      return 0;
+    }
+    if (isInt<18>(Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32p_imm18m4
+                         : BF::LOAD32p_imm18m4));
+      return 0;
+    }
+    // Use RegScavenger to calculate proper offset...
+    MI.dump();
+    llvm_unreachable("Stack frame offset too big");
+    break;
+  }
+  case BF::ADDpp: {
+    assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
+    unsigned DestReg = MI.getOperand(0).getReg();
+    // We need to produce a stack offset in a P register. We emit:
+    // P0 = offset;
+    // P0 = BR + P0;
+    assert(FIPos==1 && "Bad frame index operand");
+    loadConstant(MBB, II, DL, DestReg, Offset);
+    MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
+    MI.getOperand(2).ChangeToRegister(BaseReg, false);
+    break;
+  }
+  case BF::STORE16fi:
+    isStore = true;
+  case BF::LOAD16fi: {
+    assert(Offset%2 == 0 && "Unaligned i16 stack access");
+    assert(FIPos==1 && "Bad frame index operand");
+    // We need a P register to use as an address
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    loadConstant(MBB, II, DL, ScratchReg, Offset);
+    BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
+      .addReg(ScratchReg, RegState::Kill)
+      .addReg(BaseReg);
+    MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
+    MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
+    MI.RemoveOperand(2);
+    break;
+  }
+  case BF::STORE8fi: {
+    // This is an AnyCC spill, we need a scratch register.
+    assert(FIPos==1 && "Bad frame index operand");
+    MachineOperand SpillReg = MI.getOperand(0);
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    if (SpillReg.getReg()==BF::NCC) {
+      BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
+        .addOperand(SpillReg);
+      BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
+        .addReg(ScratchReg).addImm(0);
+    } else {
+      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
+        .addOperand(SpillReg);
+    }
+    // STORE D
+    MI.setDesc(TII.get(BF::STORE8p_imm16));
+    MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    break;
+  }
+  case BF::LOAD8fi: {
+    // This is an restore, we need a scratch register.
+    assert(FIPos==1 && "Bad frame index operand");
+    MachineOperand SpillReg = MI.getOperand(0);
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
+    MI.getOperand(0).ChangeToRegister(ScratchReg, true);
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    ++II;
+    if (SpillReg.getReg()==BF::CC) {
+      // CC = D
+      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
+        .addReg(ScratchReg, RegState::Kill);
+    } else {
+      // Restore NCC (CC = D==0)
+      BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
+        .addReg(ScratchReg, RegState::Kill)
+        .addImm(0);
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("Cannot eliminate frame index");
+    break;
+  }
+  return 0;
+}
+
+void BlackfinRegisterInfo::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterClass *RC = BF::DPRegisterClass;
+  if (requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment()));
+  }
+}
+
+void BlackfinRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  DebugLoc dl = (MBBI != MBB.end()
+                 ? MBBI->getDebugLoc()
+                 : DebugLoc::getUnknownLoc());
+
+  int FrameSize = MFI->getStackSize();
+  if (FrameSize%4) {
+    FrameSize = (FrameSize+3) & ~3;
+    MFI->setStackSize(FrameSize);
+  }
+
+  if (!hasFP(MF)) {
+    assert(!MFI->hasCalls() &&
+           "FP elimination on a non-leaf function is not supported");
+    adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+    return;
+  }
+
+  // emit a LINK instruction
+  if (FrameSize <= 0x3ffff) {
+    BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+    return;
+  }
+
+  // Frame is too big, do a manual LINK:
+  // [--SP] = RETS;
+  // [--SP] = FP;
+  // FP = SP;
+  // P1 = -FrameSize;
+  // SP = SP + P1;
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::RETS, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::FP, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+    .addReg(BF::SP);
+  loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+    .addReg(BF::SP, RegState::Kill)
+    .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  if (!hasFP(MF)) {
+    assert(!MFI->hasCalls() &&
+           "FP elimination on a non-leaf function is not supported");
+    adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+    return;
+  }
+
+  // emit an UNLINK instruction
+  BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+unsigned BlackfinRegisterInfo::getRARegister() const {
+  return BF::RETS;
+}
+
+unsigned BlackfinRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+  return hasFP(MF) ? BF::FP : BF::SP;
+}
+
+unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+#include "BlackfinGenRegisterInfo.inc"
+
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
new file mode 100644
index 000000000000..501f504d06bf
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -0,0 +1,104 @@
+//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINREGISTERINFO_H
+#define BLACKFINREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "BlackfinGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+  class BlackfinSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+  // Subregister indices, keep in sync with BlackfinRegisterInfo.td
+  enum BfinSubregIdx {
+    bfin_subreg_lo16 = 1,
+    bfin_subreg_hi16 = 2,
+    bfin_subreg_lo32 = 3
+  };
+
+  struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
+    BlackfinSubtarget &Subtarget;
+    const TargetInstrInfo &TII;
+
+    BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
+
+    /// Code Generation virtual methods...
+    const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+    const TargetRegisterClass* const*
+    getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+    BitVector getReservedRegs(const MachineFunction &MF) const;
+
+    // getSubReg implemented by tablegen
+
+    const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
+      return &BF::PRegClass;
+    }
+
+    const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg,
+                                                           EVT VT) const;
+
+    bool hasFP(const MachineFunction &MF) const;
+
+    // bool hasReservedCallFrame(MachineFunction &MF) const;
+
+    bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+    void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const;
+
+    unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                 int SPAdj, int *Value = NULL,
+                                 RegScavenger *RS = NULL) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS) const;
+
+    void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    unsigned getFrameRegister(MachineFunction &MF) const;
+    unsigned getRARegister() const;
+
+    // Exception handling queries.
+    unsigned getEHExceptionRegister() const;
+    unsigned getEHHandlerRegister() const;
+
+    int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+    // Utility functions
+    void adjustRegister(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I,
+                        DebugLoc DL,
+                        unsigned Reg,
+                        unsigned ScratchReg,
+                        int delta) const;
+    void loadConstant(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I,
+                      DebugLoc DL,
+                      unsigned Reg,
+                      int value) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
new file mode 100644
index 000000000000..642d10f5aa67
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -0,0 +1,385 @@
+//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Blackfin register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 3-bit group and 3-bit ID numbers.
+
+class BlackfinReg<string n> : Register<n> {
+  field bits<3> Group;
+  field bits<3> Num;
+  let Namespace = "BF";
+}
+
+// Rc - 1-bit registers
+class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
+  field bits<5> BitNum = bitno;
+}
+
+// Rs - 16-bit integer registers
+class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
+  let Group = group;
+  let Num = num;
+  field bits<1> High = hi;
+}
+
+// Ri - 32-bit integer registers with subregs
+class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
+  let Group = group;
+  let Num = num;
+}
+
+// Ra 40-bit accumulator registers
+class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
+  let SubRegs = subs;
+  let Group = 4;
+  let Num = num;
+}
+
+// Ywo halves of 32-bit register
+multiclass Rss<bits<3> group, bits<3> num, string n> {
+  def H : Rs<group, num, 1, !strconcat(n, ".h")>;
+  def L : Rs<group, num, 0, !strconcat(n, ".l")>;
+}
+
+// Rii - 32-bit integer registers with subregs
+class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
+      : BlackfinReg<n> {
+  let SubRegs = subs;
+  let Group = group;
+  let Num = num;
+}
+
+// Status bits are all part of ASTAT
+def AZ   : Rc<0,  "az">;
+def AN   : Rc<1,  "an">;
+def CC   : Rc<5,  "cc">, DwarfRegNum<[34]>;
+def NCC  : Rc<5,  "!cc"> { let Aliases = [CC]; }
+def AQ   : Rc<6,  "aq">;
+def AC0  : Rc<12, "ac0">;
+def AC1  : Rc<13, "ac1">;
+def AV0  : Rc<16, "av0">;
+def AV0S : Rc<17, "av0s">;
+def AV1  : Rc<18, "av1">;
+def AV1S : Rc<19, "av1s">;
+def V    : Rc<24, "v">;
+def VS   : Rc<25, "vs">;
+// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
+
+// Group 0: Integer registers
+defm R0 : Rss<0, 0, "r0">;
+def  R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
+defm R1 : Rss<0, 1, "r1">;
+def  R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
+defm R2 : Rss<0, 2, "r2">;
+def  R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
+defm R3 : Rss<0, 3, "r3">;
+def  R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
+defm R4 : Rss<0, 4, "r4">;
+def  R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
+defm R5 : Rss<0, 5, "r5">;
+def  R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
+defm R6 : Rss<0, 6, "r6">;
+def  R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
+defm R7 : Rss<0, 7, "r7">;
+def  R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
+
+// Group 1: Pointer registers
+defm P0 : Rss<1, 0, "p0">;
+def  P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
+defm P1 : Rss<1, 1, "p1">;
+def  P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
+defm P2 : Rss<1, 2, "p2">;
+def  P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
+defm P3 : Rss<1, 3, "p3">;
+def  P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
+defm P4 : Rss<1, 4, "p4">;
+def  P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
+defm P5 : Rss<1, 5, "p5">;
+def  P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
+defm SP : Rss<1, 6, "sp">;
+def  SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
+defm FP : Rss<1, 7, "fp">;
+def  FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
+
+// Group 2: Index registers
+defm I0 : Rss<2, 0, "i0">;
+def  I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
+defm I1 : Rss<2, 1, "i1">;
+def  I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
+defm I2 : Rss<2, 2, "i2">;
+def  I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
+defm I3 : Rss<2, 3, "i3">;
+def  I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
+defm M0 : Rss<2, 4, "m0">;
+def  M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
+defm M1 : Rss<2, 5, "m1">;
+def  M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
+defm M2 : Rss<2, 6, "m2">;
+def  M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
+defm M3 : Rss<2, 7, "m3">;
+def  M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
+
+// Group 3: Cyclic indexing registers
+defm B0 : Rss<3, 0, "b0">;
+def  B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
+defm B1 : Rss<3, 1, "b1">;
+def  B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
+defm B2 : Rss<3, 2, "b2">;
+def  B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
+defm B3 : Rss<3, 3, "b3">;
+def  B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
+defm L0 : Rss<3, 4, "l0">;
+def  L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
+defm L1 : Rss<3, 5, "l1">;
+def  L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
+defm L2 : Rss<3, 6, "l2">;
+def  L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
+defm L3 : Rss<3, 7, "l3">;
+def  L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
+
+// Accumulators
+def  A0X : Ri <4, 0, "a0.x">;
+defm A0  : Rss<4, 1, "a0">;
+def  A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
+def  A0  : Ra <0, "a0", [A0X, A0W]>;
+
+def  A1X : Ri <4, 2, "a1.x">;
+defm A1  : Rss<4, 3, "a1">;
+def  A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
+def  A1  : Ra <2, "a1", [A1X, A1W]>;
+
+def RETS : Ri<4, 7, "rets">,  DwarfRegNum<[35]>;
+def RETI : Ri<7, 3, "reti">,  DwarfRegNum<[36]>;
+def RETX : Ri<7, 4, "retx">,  DwarfRegNum<[37]>;
+def RETN : Ri<7, 5, "retn">,  DwarfRegNum<[38]>;
+def RETE : Ri<7, 6, "rete">,  DwarfRegNum<[39]>;
+
+def ASTAT   : Ri<4, 6, "astat">,   DwarfRegNum<[40]> {
+  let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+}
+
+def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
+def USP     : Ri<7, 0, "usp">,     DwarfRegNum<[42]>;
+def EMUDAT  : Ri<7, 7, "emudat">,  DwarfRegNum<[43]>;
+def SYSCFG  : Ri<7, 2, "syscfg">;
+def CYCLES  : Ri<6, 6, "cycles">;
+def CYCLES2 : Ri<6, 7, "cycles2">;
+
+// Hardware loops
+def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
+def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
+def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
+def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
+def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
+def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
+
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+// Keep in sync with enum in BlackfinRegisterInfo.h
+def bfin_subreg_lo16  : PatLeaf<(i32 1)>;
+def bfin_subreg_hi16  : PatLeaf<(i32 2)>;
+def bfin_subreg_32bit : PatLeaf<(i32 3)>;
+
+def : SubRegSet<1,
+    [R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,
+     P0,  P1,  P2,  P3,  P4,  P5,  SP,  FP,
+     I0,  I1,  I2,  I3,  M0,  M1,  M2,  M3,
+     B0,  B1,  B2,  B3,  L0,  L1,  L2,  L3],
+    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
+     P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL,
+     I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L,
+     B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>;
+
+def : SubRegSet<2,
+    [R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,
+     P0,  P1,  P2,  P3,  P4,  P5,  SP,  FP,
+     I0,  I1,  I2,  I3,  M0,  M1,  M2,  M3,
+     B0,  B1,  B2,  B3,  L0,  L1,  L2,  L3],
+    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
+     P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH,
+     I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H,
+     B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>;
+
+def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>;
+def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>;
+
+// Register classes.
+def D16 : RegisterClass<"BF", [i16], 16,
+    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
+     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L]>;
+
+def D16L : RegisterClass<"BF", [i16], 16,
+    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L]>;
+
+def D16H : RegisterClass<"BF", [i16], 16,
+    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H]>;
+
+def P16 : RegisterClass<"BF", [i16], 16,
+    [P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
+     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+
+def P16L : RegisterClass<"BF", [i16], 16,
+    [P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+
+def P16H : RegisterClass<"BF", [i16], 16,
+    [P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+
+def DP16 : RegisterClass<"BF", [i16], 16,
+    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
+     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
+     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
+     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+
+def DP16L : RegisterClass<"BF", [i16], 16,
+    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
+     P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+
+def DP16H : RegisterClass<"BF", [i16], 16,
+    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
+     P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+
+def GR16 : RegisterClass<"BF", [i16], 16,
+    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
+     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
+     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
+     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL,
+     I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
+     M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
+     B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
+     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
+
+def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+  let SubRegClassList = [D16L, D16H];
+}
+
+def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
+  let SubRegClassList = [P16L, P16H];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    PClass::iterator
+    PClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      return allocation_order_begin(MF)
+             + (RI->hasFP(MF) ? 7 : 6);
+    }
+  }];
+}
+
+def I : RegisterClass<"BF", [i32], 32, [I0, I1, I2, I3]>;
+def M : RegisterClass<"BF", [i32], 32, [M0, M1, M2, M3]>;
+def B : RegisterClass<"BF", [i32], 32, [B0, B1, B2, B3]>;
+def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
+
+def DP : RegisterClass<"BF", [i32], 32,
+    [R0, R1, R2, R3, R4, R5, R6, R7,
+     P0, P1, P2, P3, P4, P5, FP, SP]> {
+  let SubRegClassList = [DP16L, DP16H];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    DPClass::iterator
+    DPClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      return allocation_order_begin(MF)
+             + (RI->hasFP(MF) ? 15 : 14);
+    }
+  }];
+}
+
+def GR : RegisterClass<"BF", [i32], 32,
+    [R0, R1, R2, R3, R4, R5, R6, R7,
+     P0, P1, P2, P3, P4, P5,
+     I0, I1, I2, I3, M0, M1, M2, M3,
+     B0, B1, B2, B3, L0, L1, L2, L3,
+     FP, SP]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GRClass::iterator
+    GRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      return allocation_order_begin(MF)
+             + (RI->hasFP(MF) ? 31 : 30);
+    }
+  }];
+}
+
+def ALL : RegisterClass<"BF", [i32], 32,
+    [R0, R1, R2, R3, R4, R5, R6, R7,
+     P0, P1, P2, P3, P4, P5,
+     I0, I1, I2, I3, M0, M1, M2, M3,
+     B0, B1, B2, B3, L0, L1, L2, L3,
+     FP, SP,
+     A0X, A0W, A1X, A1W, ASTAT, RETS,
+     LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
+     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    ALLClass::iterator
+    ALLClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      return allocation_order_begin(MF)
+             + (RI->hasFP(MF) ? 31 : 30);
+    }
+  }];
+}
+
+def PI : RegisterClass<"BF", [i32], 32,
+    [P0, P1, P2, P3, P4, P5, I0, I1, I2, I3, FP, SP]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    PIClass::iterator
+    PIClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      return allocation_order_begin(MF)
+             + (RI->hasFP(MF) ? 11 : 10);
+    }
+  }];
+}
+
+// We are going to pretend that CC and !CC are 32-bit registers, even though
+// they only can hold 1 bit.
+let CopyCost = -1, Size = 8 in {
+def JustCC  : RegisterClass<"BF", [i32], 8, [CC]>;
+def NotCC   : RegisterClass<"BF", [i32], 8, [NCC]>;
+def AnyCC   : RegisterClass<"BF", [i32], 8, [CC, NCC]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    AnyCCClass::iterator
+    AnyCCClass::allocation_order_end(const MachineFunction &MF) const {
+      return allocation_order_begin(MF)+1;
+    }
+  }];
+}
+def StatBit : RegisterClass<"BF", [i1], 8,
+    [AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]>;
+}
+
+// Should be i40, but that isn't defined. It is not a legal type yet anyway.
+def Accu : RegisterClass<"BF", [i64], 64, [A0, A1]>;
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp
new file mode 100644
index 000000000000..e104c5245a9e
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -0,0 +1,36 @@
+//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the blackfin specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinSubtarget.h"
+#include "BlackfinGenSubtarget.inc"
+
+using namespace llvm;
+
+BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+                                     const std::string &FS)
+  : sdram(false),
+    icplb(false),
+    wa_mi_shift(false),
+    wa_csync(false),
+    wa_specld(false),
+    wa_mmr_stall(false),
+    wa_lcregs(false),
+    wa_hwloop(false),
+    wa_ind_call(false),
+    wa_killed_mmr(false),
+    wa_rets(false)
+{
+  std::string CPU = "generic";
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h
new file mode 100644
index 000000000000..d667fe26519b
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -0,0 +1,45 @@
+//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BLACKFIN specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_SUBTARGET_H
+#define BLACKFIN_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+
+  class BlackfinSubtarget : public TargetSubtarget {
+    bool sdram;
+    bool icplb;
+    bool wa_mi_shift;
+    bool wa_csync;
+    bool wa_specld;
+    bool wa_mmr_stall;
+    bool wa_lcregs;
+    bool wa_hwloop;
+    bool wa_ind_call;
+    bool wa_killed_mmr;
+    bool wa_rets;
+  public:
+    BlackfinSubtarget(const std::string &TT, const std::string &FS);
+
+    /// ParseSubtargetFeatures - Parses features string setting specified
+    /// subtarget options.  Definition of function is auto generated by tblgen.
+    std::string ParseSubtargetFeatures(const std::string &FS,
+                                       const std::string &CPU);
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
new file mode 100644
index 000000000000..47ba2fe28f58
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -0,0 +1,42 @@
+//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinTargetMachine.h"
+#include "Blackfin.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeBlackfinTarget() {
+  RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
+  RegisterAsmInfo<BlackfinMCAsmInfo> Y(TheBlackfinTarget);
+
+}
+
+BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
+                                             const std::string &TT,
+                                             const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    DataLayout("e-p:32:32-i64:32-f64:32"),
+    Subtarget(TT, FS),
+    TLInfo(*this),
+    InstrInfo(Subtarget),
+    FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) {
+}
+
+bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel) {
+  PM.add(createBlackfinISelDag(*this, OptLevel));
+  return false;
+}
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
new file mode 100644
index 000000000000..73ed3143f530
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -0,0 +1,54 @@
+//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Blackfin specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETMACHINE_H
+#define BLACKFINTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "BlackfinInstrInfo.h"
+#include "BlackfinSubtarget.h"
+#include "BlackfinISelLowering.h"
+
+namespace llvm {
+
+  class BlackfinTargetMachine : public LLVMTargetMachine {
+    const TargetData DataLayout;
+    BlackfinSubtarget Subtarget;
+    BlackfinTargetLowering TLInfo;
+    BlackfinInstrInfo InstrInfo;
+    TargetFrameInfo FrameInfo;
+  public:
+    BlackfinTargetMachine(const Target &T, const std::string &TT,
+                          const std::string &FS);
+
+    virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
+    virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+    virtual const BlackfinSubtarget *getSubtargetImpl() const {
+      return &Subtarget;
+    }
+    virtual const BlackfinRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo();
+    }
+    virtual BlackfinTargetLowering* getTargetLowering() const {
+      return const_cast<BlackfinTargetLowering*>(&TLInfo);
+    }
+    virtual const TargetData *getTargetData() const { return &DataLayout; }
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
new file mode 100644
index 000000000000..6c3b2447a694
--- /dev/null
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -0,0 +1,21 @@
+set(LLVM_TARGET_DEFINITIONS Blackfin.td)
+
+tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(BlackfinGenRegisterNames.inc -gen-register-enums)
+tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc)
+tablegen(BlackfinGenInstrNames.inc -gen-instr-enums)
+tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc)
+tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
+tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
+tablegen(BlackfinGenSubtarget.inc -gen-subtarget)
+tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(BlackfinCodeGen
+  BlackfinInstrInfo.cpp
+  BlackfinISelDAGToDAG.cpp
+  BlackfinISelLowering.cpp
+  BlackfinMCAsmInfo.cpp
+  BlackfinRegisterInfo.cpp
+  BlackfinSubtarget.cpp
+  BlackfinTargetMachine.cpp
+  )
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
new file mode 100644
index 000000000000..c0c1bce793d0
--- /dev/null
+++ b/lib/Target/Blackfin/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMBlackfinCodeGen
+TARGET = Blackfin
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \
+                BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \
+                BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \
+                BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \
+		BlackfinGenCallingConv.inc
+
+DIRS = AsmPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Blackfin/README.txt b/lib/Target/Blackfin/README.txt
new file mode 100644
index 000000000000..b4c8227cd645
--- /dev/null
+++ b/lib/Target/Blackfin/README.txt
@@ -0,0 +1,244 @@
+//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===//
+
+* Condition codes
+** DONE Problem with asymmetric SETCC operations
+The instruction
+
+  CC = R0 < 2
+
+is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC
+JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond
+(not cc), target), the DAG optimizer removes that kind of thing.
+
+This is handled by creating a pseudo-register NCC that aliases CC. Register
+classes JustCC and NotCC are used to control the inversion of CC.
+
+** DONE CC as an i32 register
+The AnyCC register class pretends to hold i32 values. It can only represent the
+values 0 and 1, but we can copy to and from the D class. This hack makes it
+possible to represent the setcc instruction without having i1 as a legal type.
+
+In most cases, the CC register is set by a "CC = .." or BITTST instruction, and
+then used in a conditional branch or move. The code generator thinks it is
+moving 32 bits, but the value stays in CC. In other cases, the result of a
+comparison is actually used as am i32 number, and CC will be copied to a D
+register.
+
+* Stack frames
+** TODO Use Push/Pop instructions
+We should use the push/pop instructions when saving callee-saved
+registers. The are smaller, and we may even use push multiple instructions.
+
+** TODO requiresRegisterScavenging
+We need more intelligence in determining when the scavenger is needed. We
+should keep track of:
+- Spilling D16 registers
+- Spilling AnyCC registers
+
+* Assembler
+** TODO Implement PrintGlobalVariable
+** TODO Remove LOAD32sym
+It's a hack combining two instructions by concatenation.
+
+* Inline Assembly
+
+These are the GCC constraints from bfin/constraints.md:
+
+| Code  | Register class                            | LLVM |
+|-------+-------------------------------------------+------|
+| a     | P                                         | C    |
+| d     | D                                         | C    |
+| z     | Call clobbered P (P0, P1, P2)             | X    |
+| D     | EvenD                                     | X    |
+| W     | OddD                                      | X    |
+| e     | Accu                                      | C    |
+| A     | A0                                        | S    |
+| B     | A1                                        | S    |
+| b     | I                                         | C    |
+| v     | B                                         | C    |
+| f     | M                                         | C    |
+| c     | Circular I, B, L                          | X    |
+| C     | JustCC                                    | S    |
+| t     | LoopTop                                   | X    |
+| u     | LoopBottom                                | X    |
+| k     | LoopCount                                 | X    |
+| x     | GR                                        | C    |
+| y     | RET*, ASTAT, SEQSTAT, USP                 | X    |
+| w     | ALL                                       | C    |
+| Z     | The FD-PIC GOT pointer (P3)               | S    |
+| Y     | The FD-PIC function pointer register (P1) | S    |
+| q0-q7 | R0-R7 individually                        |      |
+| qA    | P0                                        |      |
+|-------+-------------------------------------------+------|
+| Code  | Constant                                  |      |
+|-------+-------------------------------------------+------|
+| J     | 1<<N, N<32                                |      |
+| Ks3   | imm3                                      |      |
+| Ku3   | uimm3                                     |      |
+| Ks4   | imm4                                      |      |
+| Ku4   | uimm4                                     |      |
+| Ks5   | imm5                                      |      |
+| Ku5   | uimm5                                     |      |
+| Ks7   | imm7                                      |      |
+| KN7   | -imm7                                     |      |
+| Ksh   | imm16                                     |      |
+| Kuh   | uimm16                                    |      |
+| L     | ~(1<<N)                                   |      |
+| M1    | 0xff                                      |      |
+| M2    | 0xffff                                    |      |
+| P0-P4 | 0-4                                       |      |
+| PA    | Macflag, not M                            |      |
+| PB    | Macflag, only M                           |      |
+| Q     | Symbol                                    |      |
+
+** TODO Support all register classes
+* DAG combiner
+** Create test case for each Illegal SETCC case
+The DAG combiner may someimes produce illegal i16 SETCC instructions.
+
+*** TODO SETCC (ctlz x), 5) == const
+*** TODO SETCC (and load, const) == const
+*** DONE SETCC (zext x) == const
+*** TODO SETCC (sext x) == const
+
+* Instruction selection
+** TODO Better imediate constants
+Like ARM, build constants as small imm + shift.
+
+** TODO Implement cycle counter
+We have CYCLES and CYCLES2 registers, but the readcyclecounter intrinsic wants
+to return i64, and the code generator doesn't know how to legalize that.
+
+** TODO Instruction alternatives
+Some instructions come in different variants for example:
+
+  D = D + D
+  P = P + P
+
+Cross combinations are not allowed:
+
+  P = D + D (bad)
+
+Similarly for the subreg pseudo-instructions:
+
+ D16L = EXTRACT_SUBREG D16, bfin_subreg_lo16
+ P16L = EXTRACT_SUBREG P16, bfin_subreg_lo16
+
+We want to take advantage of the alternative instructions. This could be done by
+changing the DAG after instruction selection.
+
+
+** Multipatterns for load/store
+We should try to identify multipatterns for load and store instructions. The
+available instruction matrix is a bit irregular.
+
+Loads:
+
+| Addr       | D | P | D 16z | D 16s | D16 | D 8z | D 8s |
+|------------+---+---+-------+-------+-----+------+------|
+| P          | * | * | *     | *     | *   | *    | *    |
+| P++        | * | * | *     | *     |     | *    | *    |
+| P--        | * | * | *     | *     |     | *    | *    |
+| P+uimm5m2  |   |   | *     | *     |     |      |      |
+| P+uimm6m4  | * | * |       |       |     |      |      |
+| P+imm16    |   |   |       |       |     | *    | *    |
+| P+imm17m2  |   |   | *     | *     |     |      |      |
+| P+imm18m4  | * | * |       |       |     |      |      |
+| P++P       | * |   | *     | *     | *   |      |      |
+| FP-uimm7m4 | * | * |       |       |     |      |      |
+| I          | * |   |       |       | *   |      |      |
+| I++        | * |   |       |       | *   |      |      |
+| I--        | * |   |       |       | *   |      |      |
+| I++M       | * |   |       |       |     |      |      |
+
+Stores:
+
+| Addr       | D | P | D16H | D16L | D 8 |
+|------------+---+---+------+------+-----|
+| P          | * | * | *    | *    | *   |
+| P++        | * | * |      | *    | *   |
+| P--        | * | * |      | *    | *   |
+| P+uimm5m2  |   |   |      | *    |     |
+| P+uimm6m4  | * | * |      |      |     |
+| P+imm16    |   |   |      |      | *   |
+| P+imm17m2  |   |   |      | *    |     |
+| P+imm18m4  | * | * |      |      |     |
+| P++P       | * |   | *    | *    |     |
+| FP-uimm7m4 | * | * |      |      |     |
+| I          | * |   | *    | *    |     |
+| I++        | * |   | *    | *    |     |
+| I--        | * |   | *    | *    |     |
+| I++M       | * |   |      |      |     |
+
+* Workarounds and features
+Blackfin CPUs have bugs. Each model comes in a number of silicon revisions with
+different bugs. We learn about the CPU model from the -mcpu switch.
+
+** Interpretation of -mcpu value
+- -mcpu=bf527 refers to the latest known BF527 revision
+- -mcpu=bf527-0.2 refers to silicon rev. 0.2
+- -mcpu=bf527-any refers to all known revisions
+- -mcpu=bf527-none disables all workarounds
+
+The -mcpu setting affects the __SILICON_REVISION__ macro and enabled workarounds:
+
+| -mcpu      | __SILICON_REVISION__ | Workarounds        |
+|------------+----------------------+--------------------|
+| bf527      | Def Latest           | Specific to latest |
+| bf527-1.3  | Def 0x0103           | Specific to 1.3    |
+| bf527-any  | Def 0xffff           | All bf527-x.y      |
+| bf527-none | Undefined            | None               |
+
+These are the known cores and revisions:
+
+| Core        | Silicon            | Processors              |
+|-------------+--------------------+-------------------------|
+| Edinburgh   | 0.3, 0.4, 0.5, 0.6 | BF531 BF532 BF533       |
+| Braemar     | 0.2, 0.3           | BF534 BF536 BF537       |
+| Stirling    | 0.3, 0.4, 0.5      | BF538 BF539             |
+| Moab        | 0.0, 0.1, 0.2      | BF542 BF544 BF548 BF549 |
+| Teton       | 0.3, 0.5           | BF561                   |
+| Kookaburra  | 0.0, 0.1, 0.2      | BF523 BF525 BF527       |
+| Mockingbird | 0.0, 0.1           | BF522 BF524 BF526       |
+| Brodie      | 0.0, 0.1           | BF512 BF514 BF516 BF518 |
+
+
+** Compiler implemented workarounds
+Most workarounds are implemented in header files and source code using the
+__ADSPBF527__ macros. A few workarounds require compiler support.
+
+|  Anomaly | Macro                          | GCC Switch       |
+|----------+--------------------------------+------------------|
+|      Any | __WORKAROUNDS_ENABLED          |                  |
+| 05000074 | WA_05000074                    |                  |
+| 05000244 | __WORKAROUND_SPECULATIVE_SYNCS | -mcsync-anomaly  |
+| 05000245 | __WORKAROUND_SPECULATIVE_LOADS | -mspecld-anomaly |
+| 05000257 | WA_05000257                    |                  |
+| 05000283 | WA_05000283                    |                  |
+| 05000312 | WA_LOAD_LCREGS                 |                  |
+| 05000315 | WA_05000315                    |                  |
+| 05000371 | __WORKAROUND_RETS              |                  |
+| 05000426 | __WORKAROUND_INDIRECT_CALLS    | Not -micplb      |
+
+** GCC feature switches
+| Switch                    | Description                            |
+|---------------------------+----------------------------------------|
+| -msim                     | Use simulator runtime                  |
+| -momit-leaf-frame-pointer | Omit frame pointer for leaf functions  |
+| -mlow64k                  |                                        |
+| -mcsync-anomaly           |                                        |
+| -mspecld-anomaly          |                                        |
+| -mid-shared-library       |                                        |
+| -mleaf-id-shared-library  |                                        |
+| -mshared-library-id=      |                                        |
+| -msep-data                | Enable separate data segment           |
+| -mlong-calls              | Use indirect calls                     |
+| -mfast-fp                 |                                        |
+| -mfdpic                   |                                        |
+| -minline-plt              |                                        |
+| -mstack-check-l1          | Do stack checking in L1 scratch memory |
+| -mmulticore               | Enable multicore support               |
+| -mcorea                   | Build for Core A                       |
+| -mcoreb                   | Build for Core B                       |
+| -msdram                   | Build for SDRAM                        |
+| -micplb                   | Assume ICPLBs are enabled at runtime.  |
diff --git a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
new file mode 100644
index 000000000000..402e0afde81d
--- /dev/null
+++ b/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheBlackfinTarget;
+
+extern "C" void LLVMInitializeBlackfinTargetInfo() {
+  RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
+                                 "Analog Devices Blackfin [experimental]");
+}
diff --git a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..5ca80604f63c
--- /dev/null
+++ b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMBlackfinInfo
+  BlackfinTargetInfo.cpp
+  )
+
+add_dependencies(LLVMBlackfinInfo BlackfinCodeGenTable_gen)
diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Blackfin/TargetInfo/Makefile
new file mode 100644
index 000000000000..c49cfbe69077
--- /dev/null
+++ b/lib/Target/Blackfin/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 294c6d35beab..fe63edf3ff68 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -24,43 +24,36 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/ConstantsScanner.h"
 #include "llvm/Analysis/FindUsedTypes.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/System/Host.h"
 #include "llvm/Config/config.h"
 #include <algorithm>
 #include <sstream>
 using namespace llvm;
 
-/// CBackendTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int CBackendTargetMachineModule;
-int CBackendTargetMachineModule = 0;
-
-// Register the target.
-static RegisterTarget<CTargetMachine> X("c", "C backend");
-
-// Force static initialization.
-extern "C" void LLVMInitializeCBackendTarget() { }
+extern "C" void LLVMInitializeCBackendTarget() { 
+  // Register the target.
+  RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
+}
 
 namespace {
   /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
@@ -88,12 +81,12 @@ namespace {
   /// CWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C translation unit.
   class CWriter : public FunctionPass, public InstVisitor<CWriter> {
-    raw_ostream &Out;
+    formatted_raw_ostream &Out;
     IntrinsicLowering *IL;
     Mangler *Mang;
     LoopInfo *LI;
     const Module *TheModule;
-    const TargetAsmInfo* TAsm;
+    const MCAsmInfo* TAsm;
     const TargetData* TD;
     std::map<const Type *, std::string> TypeNames;
     std::map<const ConstantFP *, unsigned> FPConstantMap;
@@ -101,12 +94,14 @@ namespace {
     std::set<const Argument*> ByValParams;
     unsigned FPCounter;
     unsigned OpaqueCounter;
+    DenseMap<const Value*, unsigned> AnonValueNumbers;
+    unsigned NextAnonValueNumber;
 
   public:
     static char ID;
-    explicit CWriter(raw_ostream &o)
+    explicit CWriter(formatted_raw_ostream &o)
       : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), 
-        TheModule(0), TAsm(0), TD(0), OpaqueCounter(0) {
+        TheModule(0), TAsm(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) {
       FPCounter = 0;
     }
 
@@ -149,24 +144,26 @@ namespace {
       return false;
     }
 
-    raw_ostream &printType(raw_ostream &Out, const Type *Ty, 
-                            bool isSigned = false,
-                            const std::string &VariableName = "",
-                            bool IgnoreName = false,
-                            const AttrListPtr &PAL = AttrListPtr());
+    raw_ostream &printType(formatted_raw_ostream &Out,
+                           const Type *Ty, 
+                           bool isSigned = false,
+                           const std::string &VariableName = "",
+                           bool IgnoreName = false,
+                           const AttrListPtr &PAL = AttrListPtr());
     std::ostream &printType(std::ostream &Out, const Type *Ty, 
                            bool isSigned = false,
                            const std::string &VariableName = "",
                            bool IgnoreName = false,
                            const AttrListPtr &PAL = AttrListPtr());
-    raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, 
-                                  bool isSigned, 
-                                  const std::string &NameSoFar = "");
+    raw_ostream &printSimpleType(formatted_raw_ostream &Out,
+                                 const Type *Ty, 
+                                 bool isSigned, 
+                                 const std::string &NameSoFar = "");
     std::ostream &printSimpleType(std::ostream &Out, const Type *Ty, 
                                  bool isSigned, 
                                  const std::string &NameSoFar = "");
 
-    void printStructReturnPointerFunctionType(raw_ostream &Out,
+    void printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
                                               const AttrListPtr &PAL,
                                               const PointerType *Ty);
 
@@ -239,7 +236,7 @@ namespace {
 
       // Must be an expression, must be used exactly once.  If it is dead, we
       // emit it inline where it would go.
-      if (I.getType() == Type::VoidTy || !I.hasOneUse() ||
+      if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
           isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
           isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
           isa<InsertValueInst>(I))
@@ -286,11 +283,11 @@ namespace {
     void visitBranchInst(BranchInst &I);
     void visitSwitchInst(SwitchInst &I);
     void visitInvokeInst(InvokeInst &I) {
-      assert(0 && "Lowerinvoke pass didn't work!");
+      llvm_unreachable("Lowerinvoke pass didn't work!");
     }
 
     void visitUnwindInst(UnwindInst &I) {
-      assert(0 && "Lowerinvoke pass didn't work!");
+      llvm_unreachable("Lowerinvoke pass didn't work!");
     }
     void visitUnreachableInst(UnreachableInst &I);
 
@@ -321,8 +318,10 @@ namespace {
     void visitExtractValueInst(ExtractValueInst &I);
 
     void visitInstruction(Instruction &I) {
-      cerr << "C Writer does not know about " << I;
-      abort();
+#ifndef NDEBUG
+      errs() << "C Writer does not know about " << I;
+#endif
+      llvm_unreachable(0);
     }
 
     void outputLValue(Instruction *I) {
@@ -430,7 +429,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
 /// printStructReturnPointerFunctionType - This is like printType for a struct
 /// return type, except, instead of printing the type as void (*)(Struct*, ...)
 /// print it as "Struct (*)(...)", for struct return functions.
-void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
                                                    const AttrListPtr &PAL,
                                                    const PointerType *TheTy) {
   const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
@@ -466,7 +465,8 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
 }
 
 raw_ostream &
-CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty,
+                         bool isSigned,
                          const std::string &NameSoFar) {
   assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && 
          "Invalid type for printSimpleType");
@@ -505,8 +505,10 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
   }
     
   default:
-    cerr << "Unknown primitive type: " << *Ty << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "Unknown primitive type: " << *Ty << "\n";
+#endif
+    llvm_unreachable(0);
   }
 }
 
@@ -550,17 +552,20 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
   }
     
   default:
-    cerr << "Unknown primitive type: " << *Ty << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "Unknown primitive type: " << *Ty << "\n";
+#endif
+    llvm_unreachable(0);
   }
 }
 
 // Pass the Type* and the variable name and this prints out the variable
 // declaration.
 //
-raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
-                                 bool isSigned, const std::string &NameSoFar,
-                                 bool IgnoreName, const AttrListPtr &PAL) {
+raw_ostream &CWriter::printType(formatted_raw_ostream &Out,
+                                const Type *Ty,
+                                bool isSigned, const std::string &NameSoFar,
+                                bool IgnoreName, const AttrListPtr &PAL) {
   if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
     printSimpleType(Out, Ty, isSigned, NameSoFar);
     return Out;
@@ -652,8 +657,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
     return Out << TyName << ' ' << NameSoFar;
   }
   default:
-    assert(0 && "Unhandled case in getTypeProps!");
-    abort();
+    llvm_unreachable("Unhandled case in getTypeProps!");
   }
 
   return Out;
@@ -756,8 +760,7 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
     return Out << TyName << ' ' << NameSoFar;
   }
   default:
-    assert(0 && "Unhandled case in getTypeProps!");
-    abort();
+    llvm_unreachable("Unhandled case in getTypeProps!");
   }
 
   return Out;
@@ -769,7 +772,8 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
   // ubytes or an array of sbytes with positive values.
   //
   const Type *ETy = CPA->getType()->getElementType();
-  bool isString = (ETy == Type::Int8Ty || ETy == Type::Int8Ty);
+  bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
+                   ETy == Type::getInt8Ty(CPA->getContext()));
 
   // Make sure the last character is a null char, as automatically added by C
   if (isString && (CPA->getNumOperands() == 0 ||
@@ -855,10 +859,11 @@ void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
 static bool isFPCSafeToPrint(const ConstantFP *CFP) {
   bool ignored;
   // Do long doubles in hex for now.
-  if (CFP->getType() != Type::FloatTy && CFP->getType() != Type::DoubleTy)
+  if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
+      CFP->getType() != Type::getDoubleTy(CFP->getContext()))
     return false;
   APFloat APF = APFloat(CFP->getValueAPF());  // copy
-  if (CFP->getType() == Type::FloatTy)
+  if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
 #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
   char Buffer[100];
@@ -916,7 +921,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
       Out << ')';
       break;
     default:
-      assert(0 && "Invalid cast opcode");
+      llvm_unreachable("Invalid cast opcode");
   }
 
   // Print the source type cast
@@ -946,7 +951,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
     case Instruction::FPToUI:
       break; // These don't need a source cast.
     default:
-      assert(0 && "Invalid cast opcode");
+      llvm_unreachable("Invalid cast opcode");
       break;
   }
 }
@@ -970,12 +975,12 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       Out << "(";
       printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
       if (CE->getOpcode() == Instruction::SExt &&
-          CE->getOperand(0)->getType() == Type::Int1Ty) {
+          CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
         // Make sure we really sext from bool here by subtracting from 0
         Out << "0-";
       }
       printConstant(CE->getOperand(0), Static);
-      if (CE->getType() == Type::Int1Ty &&
+      if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
           (CE->getOpcode() == Instruction::Trunc ||
            CE->getOpcode() == Instruction::FPToUI ||
            CE->getOpcode() == Instruction::FPToSI ||
@@ -1055,10 +1060,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
           case ICmpInst::ICMP_UGT: Out << " > "; break;
           case ICmpInst::ICMP_SGE:
           case ICmpInst::ICMP_UGE: Out << " >= "; break;
-          default: assert(0 && "Illegal ICmp predicate");
+          default: llvm_unreachable("Illegal ICmp predicate");
         }
         break;
-      default: assert(0 && "Illegal opcode here!");
+      default: llvm_unreachable("Illegal opcode here!");
       }
       printConstantWithCast(CE->getOperand(1), CE->getOpcode());
       if (NeedsClosingParens)
@@ -1076,7 +1081,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       else {
         const char* op = 0;
         switch (CE->getPredicate()) {
-        default: assert(0 && "Illegal FCmp predicate");
+        default: llvm_unreachable("Illegal FCmp predicate");
         case FCmpInst::FCMP_ORD: op = "ord"; break;
         case FCmpInst::FCMP_UNO: op = "uno"; break;
         case FCmpInst::FCMP_UEQ: op = "ueq"; break;
@@ -1104,9 +1109,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       return;
     }
     default:
-      cerr << "CWriter Error: Unhandled constant expression: "
+#ifndef NDEBUG
+      errs() << "CWriter Error: Unhandled constant expression: "
            << *CE << "\n";
-      abort();
+#endif
+      llvm_unreachable(0);
     }
   } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
     Out << "((";
@@ -1122,9 +1129,9 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
 
   if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
     const Type* Ty = CI->getType();
-    if (Ty == Type::Int1Ty)
+    if (Ty == Type::getInt1Ty(CPV->getContext()))
       Out << (CI->getZExtValue() ? '1' : '0');
-    else if (Ty == Type::Int32Ty)
+    else if (Ty == Type::getInt32Ty(CPV->getContext()))
       Out << CI->getZExtValue() << 'u';
     else if (Ty->getPrimitiveSizeInBits() > 32)
       Out << CI->getZExtValue() << "ull";
@@ -1151,15 +1158,17 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
     if (I != FPConstantMap.end()) {
       // Because of FP precision problems we must load from a stack allocated
       // value that holds the value in hex.
-      Out << "(*(" << (FPC->getType() == Type::FloatTy ? "float" : 
-                       FPC->getType() == Type::DoubleTy ? "double" :
+      Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
+                       "float" : 
+                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? 
+                       "double" :
                        "long double")
           << "*)&FPConstant" << I->second << ')';
     } else {
       double V;
-      if (FPC->getType() == Type::FloatTy)
+      if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
         V = FPC->getValueAPF().convertToFloat();
-      else if (FPC->getType() == Type::DoubleTy)
+      else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
         V = FPC->getValueAPF().convertToDouble();
       else {
         // Long double.  Convert the number to double, discarding precision.
@@ -1189,7 +1198,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
         std::string Num(&Buffer[0], &Buffer[6]);
         unsigned long Val = strtoul(Num.c_str(), 0, 16);
 
-        if (FPC->getType() == Type::FloatTy)
+        if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
           Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
               << Buffer << "\") /*nan*/ ";
         else
@@ -1198,7 +1207,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       } else if (IsInf(V)) {
         // The value is Inf
         if (V < 0) Out << '-';
-        Out << "LLVM_INF" << (FPC->getType() == Type::FloatTy ? "F" : "")
+        Out << "LLVM_INF" <<
+            (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
             << " /*inf*/ ";
       } else {
         std::string Num;
@@ -1312,8 +1322,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
     }
     // FALL THROUGH
   default:
-    cerr << "Unknown constant type: " << *CPV << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "Unknown constant type: " << *CPV << "\n";
+#endif
+    llvm_unreachable(0);
   }
 }
 
@@ -1359,7 +1371,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
   }
   if (NeedsExplicitCast) {
     Out << "((";
-    if (Ty->isInteger() && Ty != Type::Int1Ty)
+    if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext()))
       printSimpleType(Out, Ty, TypeIsSigned);
     else
       printType(Out, Ty); // not integer, sign doesn't matter
@@ -1419,33 +1431,36 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
 }
 
 std::string CWriter::GetValueName(const Value *Operand) {
-  std::string Name;
-
-  if (!isa<GlobalValue>(Operand) && Operand->getName() != "") {
-    std::string VarName;
-
-    Name = Operand->getName();
-    VarName.reserve(Name.capacity());
-
-    for (std::string::iterator I = Name.begin(), E = Name.end();
-         I != E; ++I) {
-      char ch = *I;
+  // Mangle globals with the standard mangler interface for LLC compatibility.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand))
+    return Mang->getMangledName(GV);
+    
+  std::string Name = Operand->getName();
+    
+  if (Name.empty()) { // Assign unique names to local temporaries.
+    unsigned &No = AnonValueNumbers[Operand];
+    if (No == 0)
+      No = ++NextAnonValueNumber;
+    Name = "tmp__" + utostr(No);
+  }
+    
+  std::string VarName;
+  VarName.reserve(Name.capacity());
 
-      if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
-            (ch >= '0' && ch <= '9') || ch == '_')) {
-        char buffer[5];
-        sprintf(buffer, "_%x_", ch);
-        VarName += buffer;
-      } else
-        VarName += ch;
-    }
+  for (std::string::iterator I = Name.begin(), E = Name.end();
+       I != E; ++I) {
+    char ch = *I;
 
-    Name = "llvm_cbe_" + VarName;
-  } else {
-    Name = Mang->getValueName(Operand);
+    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+          (ch >= '0' && ch <= '9') || ch == '_')) {
+      char buffer[5];
+      sprintf(buffer, "_%x_", ch);
+      VarName += buffer;
+    } else
+      VarName += ch;
   }
 
-  return Name;
+  return "llvm_cbe_" + VarName;
 }
 
 /// writeInstComputationInline - Emit the computation for the specified
@@ -1454,19 +1469,22 @@ void CWriter::writeInstComputationInline(Instruction &I) {
   // We can't currently support integer types other than 1, 8, 16, 32, 64.
   // Validate this.
   const Type *Ty = I.getType();
-  if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty &&
-        Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) {
-      cerr << "The C backend does not currently support integer "
-           << "types of widths other than 1, 8, 16, 32, 64.\n";
-      cerr << "This is being tracked as PR 4158.\n";
-      abort();
+  if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+        Ty!=Type::getInt8Ty(I.getContext()) && 
+        Ty!=Type::getInt16Ty(I.getContext()) &&
+        Ty!=Type::getInt32Ty(I.getContext()) &&
+        Ty!=Type::getInt64Ty(I.getContext()))) {
+      llvm_report_error("The C backend does not currently support integer "
+                        "types of widths other than 1, 8, 16, 32, 64.\n"
+                        "This is being tracked as PR 4158.");
   }
 
   // If this is a non-trivial bool computation, make sure to truncate down to
   // a 1 bit value.  This is important because we want "add i1 x, y" to return
   // "0" when x and y are true, not "2" for example.
   bool NeedBoolTrunc = false;
-  if (I.getType() == Type::Int1Ty && !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+  if (I.getType() == Type::getInt1Ty(I.getContext()) &&
+      !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
     NeedBoolTrunc = true;
   
   if (NeedBoolTrunc)
@@ -1615,7 +1633,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
   // If the operand was a pointer, convert to a large integer type.
   const Type* OpTy = Operand->getType();
   if (isa<PointerType>(OpTy))
-    OpTy = TD->getIntPtrType();
+    OpTy = TD->getIntPtrType(Operand->getContext());
   
   Out << "((";
   printSimpleType(Out, OpTy, castIsSigned);
@@ -1627,13 +1645,13 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
 // generateCompilerSpecificCode - This is where we add conditional compilation
 // directives to cater to specific compilers as need be.
 //
-static void generateCompilerSpecificCode(raw_ostream& Out,
+static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
                                          const TargetData *TD) {
   // Alloca is hard to get, and we don't want to include stdlib.h here.
   Out << "/* get a declaration for alloca */\n"
       << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
       << "#define  alloca(x) __builtin_alloca((x))\n"
-      << "#define _alloca(x) __builtin_alloca((x))\n"    
+      << "#define _alloca(x) __builtin_alloca((x))\n"
       << "#elif defined(__APPLE__)\n"
       << "extern void *__builtin_alloca(unsigned long);\n"
       << "#define alloca(x) __builtin_alloca(x)\n"
@@ -1646,7 +1664,7 @@ static void generateCompilerSpecificCode(raw_ostream& Out,
       << "extern void *__builtin_alloca(unsigned int);\n"
       << "#endif\n"
       << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)\n"
+      << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
       << "#define alloca(x) __builtin_alloca(x)\n"
       << "#elif defined(_MSC_VER)\n"
       << "#define inline _inline\n"
@@ -1803,8 +1821,34 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
   return NotSpecial;
 }
 
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const char *Str, unsigned Length,
+                               raw_ostream &Out) {
+  for (unsigned i = 0; i != Length; ++i) {
+    unsigned char C = Str[i];
+    if (isprint(C) && C != '\\' && C != '"')
+      Out << C;
+    else if (C == '\\')
+      Out << "\\\\";
+    else if (C == '\"')
+      Out << "\\\"";
+    else if (C == '\t')
+      Out << "\\t";
+    else
+      Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+  }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+  PrintEscapedString(Str.c_str(), Str.size(), Out);
+}
 
 bool CWriter::doInitialization(Module &M) {
+  FunctionPass::doInitialization(M);
+  
   // Initialize
   TheModule = &M;
 
@@ -1855,6 +1899,29 @@ bool CWriter::doInitialization(Module &M) {
   // First output all the declarations for the program, because C requires
   // Functions & globals to be declared before they are used.
   //
+  if (!M.getModuleInlineAsm().empty()) {
+    Out << "/* Module asm statements */\n"
+        << "asm(";
+
+    // Split the string into lines, to make it easier to read the .ll file.
+    std::string Asm = M.getModuleInlineAsm();
+    size_t CurPos = 0;
+    size_t NewLine = Asm.find_first_of('\n', CurPos);
+    while (NewLine != std::string::npos) {
+      // We found a newline, print the portion of the asm string from the
+      // last newline up to this newline.
+      Out << "\"";
+      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+                         Out);
+      Out << "\\n\"\n";
+      CurPos = NewLine+1;
+      NewLine = Asm.find_first_of('\n', CurPos);
+    }
+    Out << "\"";
+    PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
+    Out << "\");\n"
+        << "/* End Module asm statements */\n";
+  }
 
   // Loop over the symbol table, emitting all named constants...
   printModuleTypes(M.getTypeSymbolTable());
@@ -1910,7 +1977,7 @@ bool CWriter::doInitialization(Module &M) {
         Out << " __HIDDEN__";
       
       if (I->hasName() && I->getName()[0] == 1)
-        Out << " LLVM_ASM(\"" << I->getName().c_str()+1 << "\")";
+        Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
           
       Out << ";\n";
     }
@@ -2085,20 +2152,20 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
 
   FPConstantMap[FPC] = FPCounter;  // Number the FP constants
   
-  if (FPC->getType() == Type::DoubleTy) {
+  if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
     double Val = FPC->getValueAPF().convertToDouble();
     uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
     Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
     << " = 0x" << utohexstr(i)
     << "ULL;    /* " << Val << " */\n";
-  } else if (FPC->getType() == Type::FloatTy) {
+  } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
     float Val = FPC->getValueAPF().convertToFloat();
     uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
     getZExtValue();
     Out << "static const ConstantFloatTy FPConstant" << FPCounter++
     << " = 0x" << utohexstr(i)
     << "U;    /* " << Val << " */\n";
-  } else if (FPC->getType() == Type::X86_FP80Ty) {
+  } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
     // api needed to prevent premature destruction
     APInt api = FPC->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
@@ -2106,7 +2173,8 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
     << " = { 0x" << utohexstr(p[0]) 
     << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
     << "}; /* Long double constant */\n";
-  } else if (FPC->getType() == Type::PPC_FP128Ty) {
+  } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
+             FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
     APInt api = FPC->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
@@ -2115,7 +2183,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
     << "}; /* Long double constant */\n";
     
   } else {
-    assert(0 && "Unknown float type!");
+    llvm_unreachable("Unknown float type!");
   }
 }
 
@@ -2215,6 +2283,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
    case CallingConv::X86_FastCall:
     Out << "__attribute__((fastcall)) ";
     break;
+   default:
+    break;
   }
   
   // Loop over the arguments, printing them...
@@ -2351,7 +2421,8 @@ void CWriter::printFunction(Function &F) {
       printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
       Out << ";    /* Address-exposed local */\n";
       PrintedVar = true;
-    } else if (I->getType() != Type::VoidTy && !isInlinableInst(*I)) {
+    } else if (I->getType() != Type::getVoidTy(F.getContext()) && 
+               !isInlinableInst(*I)) {
       Out << "  ";
       printType(Out, I->getType(), false, GetValueName(&*I));
       Out << ";\n";
@@ -2428,7 +2499,8 @@ void CWriter::printBasicBlock(BasicBlock *BB) {
   for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
        ++II) {
     if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
-      if (II->getType() != Type::VoidTy && !isInlineAsm(*II))
+      if (II->getType() != Type::getVoidTy(BB->getContext()) &&
+          !isInlineAsm(*II))
         outputLValue(II);
       else
         Out << "  ";
@@ -2603,8 +2675,9 @@ void CWriter::visitBinaryOperator(Instruction &I) {
 
   // We must cast the results of binary operations which might be promoted.
   bool needsCast = false;
-  if ((I.getType() == Type::Int8Ty) || (I.getType() == Type::Int16Ty) 
-      || (I.getType() == Type::FloatTy)) {
+  if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
+      (I.getType() == Type::getInt16Ty(I.getContext())) 
+      || (I.getType() == Type::getFloatTy(I.getContext()))) {
     needsCast = true;
     Out << "((";
     printType(Out, I.getType(), false);
@@ -2623,9 +2696,9 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     Out << ")";
   } else if (I.getOpcode() == Instruction::FRem) {
     // Output a call to fmod/fmodf instead of emitting a%b
-    if (I.getType() == Type::FloatTy)
+    if (I.getType() == Type::getFloatTy(I.getContext()))
       Out << "fmodf(";
-    else if (I.getType() == Type::DoubleTy)
+    else if (I.getType() == Type::getDoubleTy(I.getContext()))
       Out << "fmod(";
     else  // all 3 flavors of long double
       Out << "fmodl(";
@@ -2663,7 +2736,11 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     case Instruction::Shl : Out << " << "; break;
     case Instruction::LShr:
     case Instruction::AShr: Out << " >> "; break;
-    default: cerr << "Invalid operator type!" << I; abort();
+    default: 
+#ifndef NDEBUG
+       errs() << "Invalid operator type!" << I;
+#endif
+       llvm_unreachable(0);
     }
 
     writeOperandWithCast(I.getOperand(1), I.getOpcode());
@@ -2700,7 +2777,11 @@ void CWriter::visitICmpInst(ICmpInst &I) {
   case ICmpInst::ICMP_SLT: Out << " < "; break;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_SGT: Out << " > "; break;
-  default: cerr << "Invalid icmp predicate!" << I; abort();
+  default:
+#ifndef NDEBUG
+    errs() << "Invalid icmp predicate!" << I; 
+#endif
+    llvm_unreachable(0);
   }
 
   writeOperandWithCast(I.getOperand(1), I);
@@ -2724,7 +2805,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) {
 
   const char* op = 0;
   switch (I.getPredicate()) {
-  default: assert(0 && "Illegal FCmp predicate");
+  default: llvm_unreachable("Illegal FCmp predicate");
   case FCmpInst::FCMP_ORD: op = "ord"; break;
   case FCmpInst::FCMP_UNO: op = "uno"; break;
   case FCmpInst::FCMP_UEQ: op = "ueq"; break;
@@ -2752,7 +2833,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) {
 
 static const char * getFloatBitCastField(const Type *Ty) {
   switch (Ty->getTypeID()) {
-    default: assert(0 && "Invalid Type");
+    default: llvm_unreachable("Invalid Type");
     case Type::FloatTyID:  return "Float";
     case Type::DoubleTyID: return "Double";
     case Type::IntegerTyID: {
@@ -2784,12 +2865,13 @@ void CWriter::visitCastInst(CastInst &I) {
   printCast(I.getOpcode(), SrcTy, DstTy);
 
   // Make a sext from i1 work by subtracting the i1 from 0 (an int).
-  if (SrcTy == Type::Int1Ty && I.getOpcode() == Instruction::SExt)
+  if (SrcTy == Type::getInt1Ty(I.getContext()) &&
+      I.getOpcode() == Instruction::SExt)
     Out << "0-";
   
   writeOperand(I.getOperand(0));
     
-  if (DstTy == Type::Int1Ty && 
+  if (DstTy == Type::getInt1Ty(I.getContext()) && 
       (I.getOpcode() == Instruction::Trunc ||
        I.getOpcode() == Instruction::FPToUI ||
        I.getOpcode() == Instruction::FPToSI ||
@@ -3020,10 +3102,12 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     Out << ", ";
     // Output the last argument to the enclosing function.
     if (I.getParent()->getParent()->arg_empty()) {
-      cerr << "The C backend does not currently support zero "
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "The C backend does not currently support zero "
            << "argument varargs functions, such as '"
-           << I.getParent()->getParent()->getName() << "'!\n";
-      abort();
+           << I.getParent()->getParent()->getName() << "'!";
+      llvm_report_error(Msg.str());
     }
     writeOperand(--I.getParent()->getParent()->arg_end());
     Out << ')';
@@ -3092,16 +3176,15 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
   case Intrinsic::dbg_stoppoint: {
     // If we use writeOperand directly we get a "u" suffix which is rejected
     // by gcc.
-    std::stringstream SPIStr;
     DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
-    SPI.getDirectory()->print(SPIStr);
+    std::string dir;
+    GetConstantStringInfo(SPI.getDirectory(), dir);
+    std::string file;
+    GetConstantStringInfo(SPI.getFileName(), file);
     Out << "\n#line "
         << SPI.getLine()
-        << " \"";
-    Out << SPIStr.str();
-    SPIStr.clear();
-    SPI.getFileName()->print(SPIStr);
-    Out << SPIStr.str() << "\"\n";
+        << " \""
+        << dir << '/' << file << "\"\n";
     return true;
   }
   case Intrinsic::x86_sse_cmp_ss:
@@ -3113,7 +3196,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     Out << ')';  
     // Multiple GCC builtins multiplex onto this intrinsic.
     switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) {
-    default: assert(0 && "Invalid llvm.x86.sse.cmp!");
+    default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
     case 0: Out << "__builtin_ia32_cmpeq"; break;
     case 1: Out << "__builtin_ia32_cmplt"; break;
     case 2: Out << "__builtin_ia32_cmple"; break;
@@ -3159,27 +3242,25 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
 
   const char *const *table = 0;
   
-  //Grab the translation table from TargetAsmInfo if it exists
+  // Grab the translation table from MCAsmInfo if it exists.
   if (!TAsm) {
+    std::string Triple = TheModule->getTargetTriple();
+    if (Triple.empty())
+      Triple = llvm::sys::getHostTriple();
+
     std::string E;
-    const TargetMachineRegistry::entry* Match = 
-      TargetMachineRegistry::getClosestStaticTargetForModule(*TheModule, E);
-    if (Match) {
-      //Per platform Target Machines don't exist, so create it
-      // this must be done only once
-      const TargetMachine* TM = Match->CtorFn(*TheModule, "");
-      TAsm = TM->getTargetAsmInfo();
-    }
+    if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+      TAsm = Match->createAsmInfo(Triple);
   }
   if (TAsm)
     table = TAsm->getAsmCBE();
 
-  //Search the translation table if it exists
+  // Search the translation table if it exists.
   for (int i = 0; table && table[i]; i += 2)
     if (c.Codes[0] == table[i])
       return table[i+1];
 
-  //default is identity
+  // Default is identity.
   return c.Codes[0];
 }
 
@@ -3215,7 +3296,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
   std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
   
   std::vector<std::pair<Value*, int> > ResultVals;
-  if (CI.getType() == Type::VoidTy)
+  if (CI.getType() == Type::getVoidTy(CI.getContext()))
     ;
   else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
@@ -3325,7 +3406,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
 }
 
 void CWriter::visitMallocInst(MallocInst &I) {
-  assert(0 && "lowerallocations pass didn't work!");
+  llvm_unreachable("lowerallocations pass didn't work!");
 }
 
 void CWriter::visitAllocaInst(AllocaInst &I) {
@@ -3342,7 +3423,7 @@ void CWriter::visitAllocaInst(AllocaInst &I) {
 }
 
 void CWriter::visitFreeInst(FreeInst &I) {
-  assert(0 && "lowerallocations pass didn't work!");
+  llvm_unreachable("lowerallocations pass didn't work!");
 }
 
 void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
@@ -3603,7 +3684,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
 //===----------------------------------------------------------------------===//
 
 bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
-                                              raw_ostream &o,
+                                              formatted_raw_ostream &o,
                                               CodeGenFileType FileType,
                                               CodeGenOpt::Level OptLevel) {
   if (FileType != TargetMachine::AssemblyFile) return true;
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 8b262455ad34..715bbdaf0c87 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -20,23 +20,20 @@
 namespace llvm {
 
 struct CTargetMachine : public TargetMachine {
-  const TargetData DataLayout;       // Calculates type size & alignment
-
-  CTargetMachine(const Module &M, const std::string &FS)
-    : DataLayout(&M) {}
+  CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
+    : TargetMachine(T) {}
 
   virtual bool WantsWholeFile() const { return true; }
-  virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+  virtual bool addPassesToEmitWholeFile(PassManager &PM,
+                                        formatted_raw_ostream &Out,
                                         CodeGenFileType FileType,
                                         CodeGenOpt::Level OptLevel);
-
-  // This class always works, but must be requested explicitly on 
-  // llc command line.
-  static unsigned getModuleMatchQuality(const Module &M) { return 0; }
   
-  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const TargetData *getTargetData() const { return 0; }
 };
 
+extern Target TheCBackendTarget;
+
 } // End llvm namespace
 
 
diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile
index 336de0c6f440..3b5ef0f34692 100644
--- a/lib/Target/CBackend/Makefile
+++ b/lib/Target/CBackend/Makefile
@@ -9,6 +9,9 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMCBackend
+
+DIRS = TargetInfo
+
 include $(LEVEL)/Makefile.common
 
 CompileCommonOpts += -Wno-format
diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
new file mode 100644
index 000000000000..f7e8ff254848
--- /dev/null
+++ b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCBackendTarget;
+
+extern "C" void LLVMInitializeCBackendTargetInfo() { 
+  RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
+}
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..5b35fa7c065b
--- /dev/null
+++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMCBackendInfo
+  CBackendTargetInfo.cpp
+  )
+
diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/CBackend/TargetInfo/Makefile
new file mode 100644
index 000000000000..d4d5e15b40bb
--- /dev/null
+++ b/lib/Target/CBackend/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCBackendInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 7cffd0e53c17..8769ee297b65 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,18 +1,14 @@
 add_llvm_library(LLVMTarget
-  DarwinTargetAsmInfo.cpp
-  ELFTargetAsmInfo.cpp
   SubtargetFeature.cpp
   Target.cpp
-  TargetAsmInfo.cpp
   TargetData.cpp
   TargetELFWriterInfo.cpp
   TargetFrameInfo.cpp
   TargetInstrInfo.cpp
+  TargetIntrinsicInfo.cpp
+  TargetLoweringObjectFile.cpp
   TargetMachOWriterInfo.cpp
   TargetMachine.cpp
-  TargetMachineRegistry.cpp
   TargetRegisterInfo.cpp
   TargetSubtarget.cpp
   )
-
-# TODO: Support other targets besides X86. See Makefile.
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
index 9684e63a60de..1e508fe18908 100644
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,6 @@ include_directories(
   )
 
 add_llvm_library(LLVMCellSPUAsmPrinter
-  SPUAsmPrinter.cpp
+  SPUAsmPrinter.cpp
   )
 add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
 \ No newline at end of file
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
index dd56df71a5de..69639efca748 100644
--- a/lib/Target/CellSPU/AsmPrinter/Makefile
+++ b/lib/Target/CellSPU/AsmPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
+##===- lib/Target/CellSPU/AsmPrinter/Makefile --------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 2847d0b8393b..0f8d5393ab84 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -19,25 +19,29 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/MDNode.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Support/Mangler.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
 #include <set>
 using namespace llvm;
 
@@ -49,8 +53,8 @@ namespace {
   class VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter {
     std::set<std::string> FnStubs, GVStubs;
   public:
-    explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, bool V) :
+    explicit SPUAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                           const MCAsmInfo *T, bool V) :
       AsmPrinter(O, TM, T, V) {}
 
     virtual const char *getPassName() const {
@@ -62,10 +66,10 @@ namespace {
     }
 
     /// printInstruction - This method is automatically generated by tablegen
-    /// from the instruction set description.  This method returns true if the
-    /// machine instruction was sufficiently described to print it, otherwise it
-    /// returns false.
-    bool printInstruction(const MachineInstr *MI);
+    /// from the instruction set description.
+    void printInstruction(const MachineInstr *MI);
+    static const char *getRegisterName(unsigned RegNo);
+
 
     void printMachineInstruction(const MachineInstr *MI);
     void printOp(const MachineOperand &MO);
@@ -76,14 +80,13 @@ namespace {
       unsigned RegNo = MO.getReg();
       assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
              "Not physreg??");
-      O << TM.getRegisterInfo()->get(RegNo).AsmName;
+      O << getRegisterName(RegNo);
     }
 
     void printOperand(const MachineInstr *MI, unsigned OpNo) {
       const MachineOperand &MO = MI->getOperand(OpNo);
       if (MO.isReg()) {
-        assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??");
-        O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+        O << getRegisterName(MO.getReg());
       } else if (MO.isImm()) {
         O << MO.getImm();
       } else {
@@ -150,8 +153,7 @@ namespace {
       // the value contained in the register.  For this reason, the darwin
       // assembler requires that we print r0 as 0 (no r) when used as the base.
       const MachineOperand &MO = MI->getOperand(OpNo);
-      O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
-      O << ", ";
+      O << getRegisterName(MO.getReg()) << ", ";
       printOperand(MI, OpNo+1);
     }
 
@@ -264,7 +266,7 @@ namespace {
                && "Invalid negated immediate rotate 7-bit argument");
         O << -value;
       } else {
-        assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
       }
     }
 
@@ -275,31 +277,25 @@ namespace {
                && "Invalid negated immediate rotate 7-bit argument");
         O << -value;
       } else {
-        assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
       }
     }
 
     virtual bool runOnMachineFunction(MachineFunction &F) = 0;
-    //! Assembly printer cleanup after function has been emitted
-    virtual bool doFinalization(Module &M) = 0;
   };
 
   /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
   class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
-    DwarfWriter *DW;
   public:
-    explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
-                             const TargetAsmInfo *T, bool V)
-      : SPUAsmPrinter(O, TM, T, V), DW(0) {}
+    explicit LinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                             const MCAsmInfo *T, bool V)
+      : SPUAsmPrinter(O, TM, T, V) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
     }
 
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
-    //! Dump globals, perform cleanup after function emission
-    bool doFinalization(Module &M);
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -309,7 +305,7 @@ namespace {
     }
 
     //! Emit a global variable according to its section and type
-    void printModuleLevelGV(const GlobalVariable* GVar);
+    void PrintGlobalVariable(const GlobalVariable* GVar);
   };
 } // end of anonymous namespace
 
@@ -319,35 +315,34 @@ namespace {
 void SPUAsmPrinter::printOp(const MachineOperand &MO) {
   switch (MO.getType()) {
   case MachineOperand::MO_Immediate:
-    cerr << "printOp() does not handle immediate values\n";
-    abort();
+    llvm_report_error("printOp() does not handle immediate values");
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
     return;
   case MachineOperand::MO_JumpTableIndex:
-    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << MO.getIndex();
     return;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << MO.getIndex();
     return;
   case MachineOperand::MO_ExternalSymbol:
     // Computing the address of an external symbol, not calling it.
     if (TM.getRelocationModel() != Reloc::Static) {
-      std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+      std::string Name(MAI->getGlobalPrefix()); Name += MO.getSymbolName();
       GVStubs.insert(Name);
       O << "L" << Name << "$non_lazy_ptr";
       return;
     }
-    O << TAI->getGlobalPrefix() << MO.getSymbolName();
+    O << MAI->getGlobalPrefix() << MO.getSymbolName();
     return;
   case MachineOperand::MO_GlobalAddress: {
     // Computing the address of a global symbol, not calling it.
     GlobalValue *GV = MO.getGlobal();
-    std::string Name = Mang->getValueName(GV);
+    std::string Name = Mang->getMangledName(GV);
 
     // External or weakly linked global variables need non-lazily-resolved
     // stubs
@@ -410,15 +405,18 @@ bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 ///
 void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
+  processDebugLoc(MI, true);
   printInstruction(MI);
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  processDebugLoc(MI, false);
+  O << '\n';
 }
 
 /// runOnMachineFunction - This uses the printMachineInstruction()
 /// method to print assembly for each instruction.
 ///
-bool
-LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
-{
+bool LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
 
   SetupMachineFunction(MF);
@@ -430,12 +428,13 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
   // Print out labels for the function.
   const Function *F = MF.getFunction();
 
-  SwitchToSection(TAI->SectionForGlobal(F));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
   EmitAlignment(MF.getAlignment(), F);
 
   switch (F->getLinkage()) {
-  default: assert(0 && "Unknown linkage type!");
+  default: llvm_unreachable("Unknown linkage type!");
   case Function::PrivateLinkage:
+  case Function::LinkerPrivateLinkage:
   case Function::InternalLinkage:  // Symbols default to internal.
     break;
   case Function::ExternalLinkage:
@@ -460,8 +459,7 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
@@ -483,29 +481,13 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
 }
 
 
-bool LinuxAsmPrinter::doInitialization(Module &M) {
-  bool Result = AsmPrinter::doInitialization(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  SwitchToTextSection("\t.text");
-  return Result;
-}
-
-/// PrintUnmangledNameSafely - Print out the printable characters in the name.
-/// Don't print things like \\n or \\0.
-static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
-  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
-       Name != E; ++Name)
-    if (isprint(*Name))
-      OS << *Name;
-}
-
 /*!
   Emit a global variable according to its section, alignment, etc.
 
   \note This code was shamelessly copied from the PowerPC's assembly printer,
   which sort of screams for some kind of refactorization of common code.
  */
-void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+void LinuxAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer())
@@ -515,18 +497,17 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   if (EmitSpecialLLVMGlobal(GVar))
     return;
 
-  std::string name = Mang->getValueName(GVar);
+  std::string name = Mang->getMangledName(GVar);
 
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
-  if (isa<MDNode>(C) || isa<MDString>(C))
-    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
-  SwitchToSection(TAI->SectionForGlobal(GVar));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
 
   if (C->isNullValue() && /* FIXME: Verify correct */
       !GVar->hasSection() &&
@@ -540,12 +521,12 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
         O << name << ":\n";
         O << "\t.zero " << Size << '\n';
       } else if (GVar->hasLocalLinkage()) {
-        O << TAI->getLCOMMDirective() << name << ',' << Size;
+        O << MAI->getLCOMMDirective() << name << ',' << Size;
       } else {
         O << ".comm " << name << ',' << Size;
       }
-      O << "\t\t" << TAI->getCommentString() << " '";
-      PrintUnmangledNameSafely(GVar, O);
+      O << "\t\t" << MAI->getCommentString() << " '";
+      WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
       O << "'\n";
       return;
   }
@@ -570,48 +551,23 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
       << "\t.type " << name << ", @object\n";
     // FALL THROUGH
    case GlobalValue::PrivateLinkage:
+   case GlobalValue::LinkerPrivateLinkage:
    case GlobalValue::InternalLinkage:
     break;
    default:
-    cerr << "Unknown linkage type!";
-    abort();
+    llvm_report_error("Unknown linkage type!");
   }
 
   EmitAlignment(Align, GVar);
-  O << name << ":\t\t\t\t" << TAI->getCommentString() << " '";
-  PrintUnmangledNameSafely(GVar, O);
+  O << name << ":\t\t\t\t" << MAI->getCommentString() << " '";
+  WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
   O << "'\n";
 
   EmitGlobalConstant(C);
   O << '\n';
 }
 
-bool LinuxAsmPrinter::doFinalization(Module &M) {
-  // Print out module-level global variables here.
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    printModuleLevelGV(I);
-
-  return AsmPrinter::doFinalization(M);
-}
-
-/// createSPUCodePrinterPass - Returns a pass that prints the Cell SPU
-/// assembly code for a MachineFunction to the given output stream, in a format
-/// that the Linux SPU assembler can deal with.
-///
-FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
-                                            SPUTargetMachine &tm,
-                                            bool verbose) {
-  return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-}
-
 // Force static initialization.
-extern "C" void LLVMInitializeCellSPUAsmPrinter() { }
-
-namespace {
-  static struct Register {
-    Register() {
-      SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass);
-    }
-  } Registrator;
+extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
+  RegisterAsmPrinter<LinuxAsmPrinter> X(TheCellSPUTarget);
 }
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index 8a558459802e..0cb6676d7df7 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -17,9 +17,9 @@ add_llvm_target(CellSPUCodeGen
   SPUInstrInfo.cpp
   SPUISelDAGToDAG.cpp
   SPUISelLowering.cpp
+  SPUMCAsmInfo.cpp
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
-  SPUTargetAsmInfo.cpp
   SPUTargetMachine.cpp
   )
 
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
index a460db3cfeda..8415168aea20 100644
--- a/lib/Target/CellSPU/Makefile
+++ b/lib/Target/CellSPU/Makefile
@@ -17,6 +17,6 @@ BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
 		SPUGenInstrInfo.inc SPUGenDAGISel.inc \
 		SPUGenSubtarget.inc SPUGenCallingConv.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 10d1110f1ad1..02713b5402da 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -21,12 +21,9 @@
 namespace llvm {
   class SPUTargetMachine;
   class FunctionPass;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
-  FunctionPass *createSPUAsmPrinterPass(raw_ostream &o,
-                                        SPUTargetMachine &tm,
-                                        bool verbose);
 
   /*--== Utility functions/predicates/etc used all over the place: --==*/
   //! Predicate test for a signed 10-bit value
@@ -92,6 +89,9 @@ namespace llvm {
   inline bool isU10Constant(uint64_t Value) {
     return (Value == (Value & 0x3ff));
   }
+
+  extern Target TheCellSPUTarget;
+
 }
 
 // Defines symbolic names for the SPU instructions.
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
index caaa71a422fb..9dbab1da9902 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -20,7 +20,7 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/Debug.h"
-
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -115,7 +115,8 @@ SPUHazardRecognizer::getHazardType(SUnit *SU)
   if (mustBeOdd && !EvenOdd)
     retval = Hazard;
 
-  DOUT << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " << retval << "\n";
+  DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
+               << retval << "\n");
   EvenOdd ^= 1;
   return retval;
 #else
@@ -129,7 +130,7 @@ void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
 
 void SPUHazardRecognizer::AdvanceCycle()
 {
-  DOUT << "SPUHazardRecognizer::AdvanceCycle\n";
+  DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
 }
 
 void SPUHazardRecognizer::EmitNoop()
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 779d75d0218a..1f9e5fcc4a7f 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -30,9 +30,12 @@
 #include "llvm/Constants.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -100,7 +103,7 @@ namespace {
   bool
   isIntS16Immediate(ConstantSDNode *CN, short &Imm)
   {
-    MVT vt = CN->getValueType(0);
+    EVT vt = CN->getValueType(0);
     Imm = (short) CN->getZExtValue();
     if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
       return true;
@@ -129,7 +132,7 @@ namespace {
   static bool
   isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
   {
-    MVT vt = FPN->getValueType(0);
+    EVT vt = FPN->getValueType(0);
     if (vt == MVT::f32) {
       int val = FloatToBits(FPN->getValueAPF().convertToFloat());
       int sval = (int) ((val << 16) >> 16);
@@ -151,10 +154,10 @@ namespace {
   }
 
   //===------------------------------------------------------------------===//
-  //! MVT to "useful stuff" mapping structure:
+  //! EVT to "useful stuff" mapping structure:
 
   struct valtype_map_s {
-    MVT VT;
+    EVT VT;
     unsigned ldresult_ins;      /// LDRESULT instruction (0 = undefined)
     bool ldresult_imm;          /// LDRESULT instruction requires immediate?
     unsigned lrinst;            /// LR instruction
@@ -178,7 +181,7 @@ namespace {
 
   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
 
-  const valtype_map_s *getValueTypeMapEntry(MVT VT)
+  const valtype_map_s *getValueTypeMapEntry(EVT VT)
   {
     const valtype_map_s *retval = 0;
     for (size_t i = 0; i < n_valtype_map; ++i) {
@@ -191,10 +194,11 @@ namespace {
 
 #ifndef NDEBUG
     if (retval == 0) {
-      cerr << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for "
-           << VT.getMVTString()
-           << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for "
+           << VT.getEVTString();
+      llvm_report_error(Msg.str());
     }
 #endif
 
@@ -249,10 +253,10 @@ namespace {
       SPUtli(*tm.getTargetLowering())
     { }
 
-    virtual bool runOnFunction(Function &Fn) {
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
       // Make sure we re-emit a set of the global base reg if necessary
       GlobalBaseReg = 0;
-      SelectionDAGISel::runOnFunction(Fn);
+      SelectionDAGISel::runOnMachineFunction(MF);
       return true;
     }
 
@@ -274,8 +278,8 @@ namespace {
       }
 
     SDNode *emitBuildVector(SDValue build_vec) {
-      MVT vecVT = build_vec.getValueType();
-      MVT eltVT = vecVT.getVectorElementType();
+      EVT vecVT = build_vec.getValueType();
+      EVT eltVT = vecVT.getVectorElementType();
       SDNode *bvNode = build_vec.getNode();
       DebugLoc dl = bvNode->getDebugLoc();
 
@@ -319,19 +323,19 @@ namespace {
     SDNode *Select(SDValue Op);
 
     //! Emit the instruction sequence for i64 shl
-    SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
+    SDNode *SelectSHLi64(SDValue &Op, EVT OpVT);
 
     //! Emit the instruction sequence for i64 srl
-    SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
+    SDNode *SelectSRLi64(SDValue &Op, EVT OpVT);
 
     //! Emit the instruction sequence for i64 sra
-    SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
+    SDNode *SelectSRAi64(SDValue &Op, EVT OpVT);
 
     //! Emit the necessary sequence for loading i64 constants:
-    SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl);
+    SDNode *SelectI64Constant(SDValue &Op, EVT OpVT, DebugLoc dl);
 
     //! Alternate instruction emit sequence for loading i64 constants
-    SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl);
+    SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
 
     //! Returns true if the address N is an A-form (local store) address
     bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
@@ -375,7 +379,7 @@ namespace {
         break;
       case 'v':   // not offsetable
 #if 1
-        assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
+        llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
 #else
         SelectAddrIdxOnly(Op, Op, Op0, Op1);
 #endif
@@ -430,23 +434,21 @@ bool
 SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
                     SDValue &Index) {
   // These match the addr256k operand type:
-  MVT OffsVT = MVT::i16;
+  EVT OffsVT = MVT::i16;
   SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
 
   switch (N.getOpcode()) {
   case ISD::Constant:
   case ISD::ConstantPool:
   case ISD::GlobalAddress:
-    cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n";
-    abort();
+    llvm_report_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
     /*NOTREACHED*/
 
   case ISD::TargetConstant:
   case ISD::TargetGlobalAddress:
   case ISD::TargetJumpTable:
-    cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as "
-         << "A-form address.\n";
-    abort();
+    llvm_report_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+                      "not wrapped as A-form address.");
     /*NOTREACHED*/
 
   case SPUISD::AFormAddr:
@@ -512,13 +514,13 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
                                       SDValue &Index, int minOffset,
                                       int maxOffset) {
   unsigned Opc = N.getOpcode();
-  MVT PtrTy = SPUtli.getPointerTy();
+  EVT PtrTy = SPUtli.getPointerTy();
 
   if (Opc == ISD::FrameIndex) {
     // Stack frame index must be less than 512 (divided by 16):
     FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N);
     int FI = int(FIN->getIndex());
-    DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
+    DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
                << FI << "\n");
     if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
       Base = CurDAG->getTargetConstant(0, PtrTy);
@@ -543,7 +545,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
       if (Op0.getOpcode() == ISD::FrameIndex) {
         FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0);
         int FI = int(FIN->getIndex());
-        DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
 
         if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
@@ -564,7 +566,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
       if (Op1.getOpcode() == ISD::FrameIndex) {
         FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1);
         int FI = int(FIN->getIndex());
-        DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
 
         if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
@@ -690,7 +692,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
   unsigned Opc = N->getOpcode();
   int n_ops = -1;
   unsigned NewOpc;
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
   SDValue Ops[8];
   DebugLoc dl = N->getDebugLoc();
 
@@ -711,8 +713,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     } else {
       NewOpc = SPU::Ar32;
       Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
-      Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, dl, Op.getValueType(),
-                                             TFI, Imm0), 0);
+      Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
+                                              Op.getValueType(), TFI, Imm0),
+                       0);
       n_ops = 2;
     }
   } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
@@ -723,17 +726,17 @@ SPUDAGToDAGISel::Select(SDValue Op) {
   } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
              && OpVT == MVT::i64) {
     SDValue Op0 = Op.getOperand(0);
-    MVT Op0VT = Op0.getValueType();
-    MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
-    MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+    EVT Op0VT = Op0.getValueType();
+    EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                                    Op0VT, (128 / Op0VT.getSizeInBits()));
+    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), 
+                                   OpVT, (128 / OpVT.getSizeInBits()));
     SDValue shufMask;
 
-    switch (Op0VT.getSimpleVT()) {
+    switch (Op0VT.getSimpleVT().SimpleTy) {
     default:
-      cerr << "CellSPU Select: Unhandled zero/any extend MVT\n";
-      abort();
+      llvm_report_error("CellSPU Select: Unhandled zero/any extend EVT");
       /*NOTREACHED*/
-      break;
     case MVT::i32:
       shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                  CurDAG->getConstant(0x80808080, MVT::i32),
@@ -811,8 +814,8 @@ SPUDAGToDAGISel::Select(SDValue Op) {
 
         if (shift_amt >= 32) {
           SDNode *hi32 =
-                  CurDAG->getTargetNode(SPU::ORr32_r64, dl, OpVT,
-                                        Op0.getOperand(0));
+                  CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT,
+                                         Op0.getOperand(0));
 
           shift_amt -= 32;
           if (shift_amt > 0) {
@@ -823,8 +826,8 @@ SPUDAGToDAGISel::Select(SDValue Op) {
             if (Op0.getOpcode() == ISD::SRL)
               Opc = SPU::ROTMr32;
 
-            hi32 = CurDAG->getTargetNode(Opc, dl, OpVT, SDValue(hi32, 0),
-                                         shift);
+            hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
+                                          shift);
           }
 
           return hi32;
@@ -856,10 +859,10 @@ SPUDAGToDAGISel::Select(SDValue Op) {
         if (OpVT == MVT::v2f64)
           Opc = SPU::DFNMSv2f64;
 
-        return CurDAG->getTargetNode(Opc, dl, OpVT,
-                                     Op00.getOperand(0),
-                                     Op00.getOperand(1),
-                                     Op0.getOperand(1));
+        return CurDAG->getMachineNode(Opc, dl, OpVT,
+                                      Op00.getOperand(0),
+                                      Op00.getOperand(1),
+                                      Op0.getOperand(1));
       }
     }
 
@@ -876,43 +879,44 @@ SPUDAGToDAGISel::Select(SDValue Op) {
                                                  negConst, negConst));
     }
 
-    return CurDAG->getTargetNode(Opc, dl, OpVT,
-                                 Op.getOperand(0), SDValue(signMask, 0));
+    return CurDAG->getMachineNode(Opc, dl, OpVT,
+                                  Op.getOperand(0), SDValue(signMask, 0));
   } else if (Opc == ISD::FABS) {
     if (OpVT == MVT::f64) {
       SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
-      return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT,
-                                   Op.getOperand(0), SDValue(signMask, 0));
+      return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
+                                    Op.getOperand(0), SDValue(signMask, 0));
     } else if (OpVT == MVT::v2f64) {
       SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
       SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
                                        absConst, absConst);
       SDNode *signMask = emitBuildVector(absVec);
-      return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT,
-                                   Op.getOperand(0), SDValue(signMask, 0));
+      return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
+                                    Op.getOperand(0), SDValue(signMask, 0));
     }
   } else if (Opc == SPUISD::LDRESULT) {
     // Custom select instructions for LDRESULT
-    MVT VT = N->getValueType(0);
+    EVT VT = N->getValueType(0);
     SDValue Arg = N->getOperand(0);
     SDValue Chain = N->getOperand(1);
     SDNode *Result;
     const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 
     if (vtm->ldresult_ins == 0) {
-      cerr << "LDRESULT for unsupported type: "
-           << VT.getMVTString()
-           << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "LDRESULT for unsupported type: "
+           << VT.getEVTString();
+      llvm_report_error(Msg.str());
     }
 
     Opc = vtm->ldresult_ins;
     if (vtm->ldresult_imm) {
       SDValue Zero = CurDAG->getTargetConstant(0, VT);
 
-      Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
+      Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
     } else {
-      Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
+      Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
     }
 
     return Result;
@@ -923,7 +927,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     // SPUInstrInfo catches the following patterns:
     // (SPUindirect (SPUhi ...), (SPUlo ...))
     // (SPUindirect $sp, imm)
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
     SDValue Op0 = N->getOperand(0);
     SDValue Op1 = N->getOperand(1);
     RegisterSDNode *RN;
@@ -948,7 +952,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     if (N->hasOneUse())
       return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
     else
-      return CurDAG->getTargetNode(NewOpc, dl, OpVT, Ops, n_ops);
+      return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
   } else
     return SelectCode(Op);
 }
@@ -966,24 +970,25 @@ SPUDAGToDAGISel::Select(SDValue Op) {
  * @return The SDNode with the entire instruction sequence
  */
 SDNode *
-SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
+SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) {
   SDValue Op0 = Op.getOperand(0);
-  MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), 
+                               OpVT, (128 / OpVT.getSizeInBits()));
   SDValue ShiftAmt = Op.getOperand(1);
-  MVT ShiftAmtVT = ShiftAmt.getValueType();
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
   SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
   SDValue SelMaskVal;
   DebugLoc dl = Op.getDebugLoc();
 
-  VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+  VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
-  SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
-  ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, dl, VecVT,
-                                   CurDAG->getTargetConstant(0, OpVT));
-  VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT,
-                                 SDValue(ZeroFill, 0),
-                                 SDValue(VecOp0, 0),
-                                 SDValue(SelMask, 0));
+  SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+  ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
+                                    CurDAG->getTargetConstant(0, OpVT));
+  VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+                                  SDValue(ZeroFill, 0),
+                                  SDValue(VecOp0, 0),
+                                  SDValue(SelMask, 0));
 
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
     unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
@@ -991,35 +996,35 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
 
     if (bytes > 0) {
       Shift =
-        CurDAG->getTargetNode(SPU::SHLQBYIv2i64, dl, VecVT,
-                              SDValue(VecOp0, 0),
-                              CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+        CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
+                               SDValue(VecOp0, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
     }
 
     if (bits > 0) {
       Shift =
-        CurDAG->getTargetNode(SPU::SHLQBIIv2i64, dl, VecVT,
-                              SDValue((Shift != 0 ? Shift : VecOp0), 0),
-                              CurDAG->getTargetConstant(bits, ShiftAmtVT));
+        CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
     }
   } else {
     SDNode *Bytes =
-      CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT,
-                            ShiftAmt,
-                            CurDAG->getTargetConstant(3, ShiftAmtVT));
+      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(3, ShiftAmtVT));
     SDNode *Bits =
-      CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT,
-                            ShiftAmt,
-                            CurDAG->getTargetConstant(7, ShiftAmtVT));
+      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(7, ShiftAmtVT));
     Shift =
-      CurDAG->getTargetNode(SPU::SHLQBYv2i64, dl, VecVT,
-                            SDValue(VecOp0, 0), SDValue(Bytes, 0));
+      CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
+                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
     Shift =
-      CurDAG->getTargetNode(SPU::SHLQBIv2i64, dl, VecVT,
-                            SDValue(Shift, 0), SDValue(Bits, 0));
+      CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(Bits, 0));
   }
 
-  return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
 }
 
 /*!
@@ -1031,15 +1036,16 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
  * @return The SDNode with the entire instruction sequence
  */
 SDNode *
-SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
+SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) {
   SDValue Op0 = Op.getOperand(0);
-  MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
   SDValue ShiftAmt = Op.getOperand(1);
-  MVT ShiftAmtVT = ShiftAmt.getValueType();
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
   SDNode *VecOp0, *Shift = 0;
   DebugLoc dl = Op.getDebugLoc();
 
-  VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+  VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
 
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
     unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
@@ -1047,45 +1053,45 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
 
     if (bytes > 0) {
       Shift =
-        CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, dl, VecVT,
-                              SDValue(VecOp0, 0),
-                              CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+        CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+                               SDValue(VecOp0, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
     }
 
     if (bits > 0) {
       Shift =
-        CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, dl, VecVT,
-                              SDValue((Shift != 0 ? Shift : VecOp0), 0),
-                              CurDAG->getTargetConstant(bits, ShiftAmtVT));
+        CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
     }
   } else {
     SDNode *Bytes =
-      CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT,
-                            ShiftAmt,
-                            CurDAG->getTargetConstant(3, ShiftAmtVT));
+      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(3, ShiftAmtVT));
     SDNode *Bits =
-      CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT,
-                            ShiftAmt,
-                            CurDAG->getTargetConstant(7, ShiftAmtVT));
+      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(7, ShiftAmtVT));
 
     // Ensure that the shift amounts are negated!
-    Bytes = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
-                                  SDValue(Bytes, 0),
-                                  CurDAG->getTargetConstant(0, ShiftAmtVT));
+    Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                                   SDValue(Bytes, 0),
+                                   CurDAG->getTargetConstant(0, ShiftAmtVT));
 
-    Bits = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
-                                 SDValue(Bits, 0),
-                                 CurDAG->getTargetConstant(0, ShiftAmtVT));
+    Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                                  SDValue(Bits, 0),
+                                  CurDAG->getTargetConstant(0, ShiftAmtVT));
 
     Shift =
-      CurDAG->getTargetNode(SPU::ROTQMBYv2i64, dl, VecVT,
-                            SDValue(VecOp0, 0), SDValue(Bytes, 0));
+      CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
+                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
     Shift =
-      CurDAG->getTargetNode(SPU::ROTQMBIv2i64, dl, VecVT,
-                            SDValue(Shift, 0), SDValue(Bits, 0));
+      CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(Bits, 0));
   }
 
-  return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
 }
 
 /*!
@@ -1097,33 +1103,34 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
  * @return The SDNode with the entire instruction sequence
  */
 SDNode *
-SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
+SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) {
   // Promote Op0 to vector
-  MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), 
+                               OpVT, (128 / OpVT.getSizeInBits()));
   SDValue ShiftAmt = Op.getOperand(1);
-  MVT ShiftAmtVT = ShiftAmt.getValueType();
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
   DebugLoc dl = Op.getDebugLoc();
 
   SDNode *VecOp0 =
-    CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0));
+    CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0));
 
   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
   SDNode *SignRot =
-    CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
-                          SDValue(VecOp0, 0), SignRotAmt);
+    CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+                           SDValue(VecOp0, 0), SignRotAmt);
   SDNode *UpperHalfSign =
-    CurDAG->getTargetNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+    CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
 
   SDNode *UpperHalfSignMask =
-    CurDAG->getTargetNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+    CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
   SDNode *UpperLowerMask =
-    CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT,
-                          CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+    CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
+                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
   SDNode *UpperLowerSelect =
-    CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT,
-                          SDValue(UpperHalfSignMask, 0),
-                          SDValue(VecOp0, 0),
-                          SDValue(UpperLowerMask, 0));
+    CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+                           SDValue(UpperHalfSignMask, 0),
+                           SDValue(VecOp0, 0),
+                           SDValue(UpperLowerMask, 0));
 
   SDNode *Shift = 0;
 
@@ -1134,46 +1141,46 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
     if (bytes > 0) {
       bytes = 31 - bytes;
       Shift =
-        CurDAG->getTargetNode(SPU::ROTQBYIv2i64, dl, VecVT,
-                              SDValue(UpperLowerSelect, 0),
-                              CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+        CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
+                               SDValue(UpperLowerSelect, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
     }
 
     if (bits > 0) {
       bits = 8 - bits;
       Shift =
-        CurDAG->getTargetNode(SPU::ROTQBIIv2i64, dl, VecVT,
-                              SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
-                              CurDAG->getTargetConstant(bits, ShiftAmtVT));
+        CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
     }
   } else {
     SDNode *NegShift =
-      CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
-                            ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+      CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
 
     Shift =
-      CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
-                            SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+      CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
     Shift =
-      CurDAG->getTargetNode(SPU::ROTQBIv2i64, dl, VecVT,
-                            SDValue(Shift, 0), SDValue(NegShift, 0));
+      CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(NegShift, 0));
   }
 
-  return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
 }
 
 /*!
  Do the necessary magic necessary to load a i64 constant
  */
-SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT,
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, EVT OpVT,
                                            DebugLoc dl) {
   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
   return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
 }
 
-SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
                                            DebugLoc dl) {
-  MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
+  EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
   SDValue i64vec =
           SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
 
@@ -1186,8 +1193,8 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
     SDValue Op0 = i64vec.getOperand(0);
 
     ReplaceUses(i64vec, Op0);
-    return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
-                                 SDValue(emitBuildVector(Op0), 0));
+    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
+                                  SDValue(emitBuildVector(Op0), 0));
   } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
     SDValue lhs = i64vec.getOperand(0);
     SDValue rhs = i64vec.getOperand(1);
@@ -1225,14 +1232,14 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
                                    SDValue(lhsNode, 0), SDValue(rhsNode, 0),
                                    SDValue(shufMaskNode, 0)));
 
-    return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
-                                 SDValue(shufNode, 0));
+    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
+                                  SDValue(shufNode, 0));
   } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
-    return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
-                                 SDValue(emitBuildVector(i64vec), 0));
+    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
+                                  SDValue(emitBuildVector(i64vec), 0));
   } else {
-    cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
-    abort();
+    llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+                      "condition");
   }
 }
 
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index d8a77766bd59..aaf07838fb68 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,8 +15,9 @@
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
 #include "SPUFrameInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,13 +25,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
-
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 
 using namespace llvm;
@@ -39,10 +40,10 @@ using namespace llvm;
 namespace {
   std::map<unsigned, const char *> node_names;
 
-  //! MVT mapping to useful data for Cell SPU
+  //! EVT mapping to useful data for Cell SPU
   struct valtype_map_s {
-    const MVT   valtype;
-    const int   prefslot_byte;
+    EVT   valtype;
+    int   prefslot_byte;
   };
 
   const valtype_map_s valtype_map[] = {
@@ -58,7 +59,7 @@ namespace {
 
   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
 
-  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
+  const valtype_map_s *getValueTypeMapEntry(EVT VT) {
     const valtype_map_s *retval = 0;
 
     for (size_t i = 0; i < n_valtype_map; ++i) {
@@ -70,10 +71,11 @@ namespace {
 
 #ifndef NDEBUG
     if (retval == 0) {
-      cerr << "getValueTypeMapEntry returns NULL for "
-           << VT.getMVTString()
-           << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "getValueTypeMapEntry returns NULL for "
+           << VT.getEVTString();
+      llvm_report_error(Msg.str());
     }
 #endif
 
@@ -98,8 +100,8 @@ namespace {
     TargetLowering::ArgListTy Args;
     TargetLowering::ArgListEntry Entry;
     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
-      MVT ArgVT = Op.getOperand(i).getValueType();
-      const Type *ArgTy = ArgVT.getTypeForMVT();
+      EVT ArgVT = Op.getOperand(i).getValueType();
+      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
       Entry.Node = Op.getOperand(i);
       Entry.Ty = ArgTy;
       Entry.isSExt = isSigned;
@@ -110,10 +112,13 @@ namespace {
                                            TLI.getPointerTy());
 
     // Splice the libcall in wherever FindInputOutputChains tells us to.
-    const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
+    const Type *RetTy =
+                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
     std::pair<SDValue, SDValue> CallInfo =
             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                            0, CallingConv::C, false, Callee, Args, DAG,
+                            0, TLI.getLibcallCallingConv(LC), false,
+                            /*isReturnValueUsed=*/true,
+                            Callee, Args, DAG,
                             Op.getDebugLoc());
 
     return CallInfo.first;
@@ -121,9 +126,8 @@ namespace {
 }
 
 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
-  : TargetLowering(TM),
-    SPUTM(TM)
-{
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+    SPUTM(TM) {
   // Fold away setcc operations if possible.
   setPow2DivIsCheap();
 
@@ -151,6 +155,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 
+  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
   // SPU constant load actions are custom lowered:
   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -158,7 +169,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   // SPU's loads and stores have to be custom lowered:
   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
        ++sctype) {
-    MVT VT = (MVT::SimpleValueType)sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 
     setOperationAction(ISD::LOAD,   VT, Custom);
     setOperationAction(ISD::STORE,  VT, Custom);
@@ -167,20 +178,20 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 
     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
-      MVT StoreVT = (MVT::SimpleValueType) stype;
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
       setTruncStoreAction(VT, StoreVT, Expand);
     }
   }
 
   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
        ++sctype) {
-    MVT VT = (MVT::SimpleValueType) sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 
     setOperationAction(ISD::LOAD,   VT, Custom);
     setOperationAction(ISD::STORE,  VT, Custom);
 
     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
-      MVT StoreVT = (MVT::SimpleValueType) stype;
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
       setTruncStoreAction(VT, StoreVT, Expand);
     }
   }
@@ -199,11 +210,37 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   // SPU has no intrinsics for these particular operations:
   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 
-  // SPU has no SREM/UREM instructions
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
-  setOperationAction(ISD::UREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM, MVT::i64, Expand);
-  setOperationAction(ISD::UREM, MVT::i64, Expand);
+  // SPU has no division/remainder instructions
+  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i128, Expand);
+  setOperationAction(ISD::UREM,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 
   // We don't support sin/cos/sqrt/fmod
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -283,11 +320,19 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 
+  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 
+  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
+  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
+  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 
   // SPU has a version of select that implements (a&~c)|(b&c), just like
   // select ought to work:
@@ -305,10 +350,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   // Custom lower i128 -> i64 truncates
   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 
+  // Custom lower i32/i64 -> i128 sign extend
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
   // to expand to a libcall, hence the custom lowering:
   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 
   // FDIV on SPU requires custom lowering
   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
@@ -339,16 +395,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   // appropriate instructions to materialize the address.
   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
        ++sctype) {
-    MVT VT = (MVT::SimpleValueType)sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 
     setOperationAction(ISD::GlobalAddress,  VT, Custom);
     setOperationAction(ISD::ConstantPool,   VT, Custom);
     setOperationAction(ISD::JumpTable,      VT, Custom);
   }
 
-  // RET must be custom lowered, to meet ABI requirements
-  setOperationAction(ISD::RET,           MVT::Other, Custom);
-
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
@@ -385,7 +438,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 
   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 
     // add/sub are legal for all supported vector VT's.
     setOperationAction(ISD::ADD,     VT, Legal);
@@ -461,9 +514,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
-    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
-    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
-    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
@@ -490,9 +540,11 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 // Return the Cell SPU's SETCC result type
 //===----------------------------------------------------------------------===//
 
-MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
   // i16 and i32 are valid SETCC result types
-  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
+  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
+    VT.getSimpleVT().SimpleTy :
+    MVT::i32);
 }
 
 //===----------------------------------------------------------------------===//
@@ -525,9 +577,9 @@ static SDValue
 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   LoadSDNode *LN = cast<LoadSDNode>(Op);
   SDValue the_chain = LN->getChain();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  MVT InVT = LN->getMemoryVT();
-  MVT OutVT = Op.getValueType();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT InVT = LN->getMemoryVT();
+  EVT OutVT = Op.getValueType();
   ISD::LoadExtType ExtType = LN->getExtensionType();
   unsigned alignment = LN->getAlignment();
   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
@@ -632,7 +684,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
     // Convert the loaded v16i8 vector to the appropriate vector type
     // specified by the operand:
-    MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+                                 InVT, (128 / InVT.getSizeInBits()));
     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 
@@ -665,11 +718,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case ISD::POST_INC:
   case ISD::POST_DEC:
   case ISD::LAST_INDEXED_MODE:
-    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+    {
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
             "UNINDEXED\n";
-    cerr << (unsigned) LN->getAddressingMode() << "\n";
-    abort();
-    /*NOTREACHED*/
+      Msg << (unsigned) LN->getAddressingMode();
+      llvm_report_error(Msg.str());
+      /*NOTREACHED*/
+    }
   }
 
   return SDValue();
@@ -685,17 +742,19 @@ static SDValue
 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   StoreSDNode *SN = cast<StoreSDNode>(Op);
   SDValue Value = SN->getValue();
-  MVT VT = Value.getValueType();
-  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT VT = Value.getValueType();
+  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   unsigned alignment = SN->getAlignment();
 
   switch (SN->getAddressingMode()) {
   case ISD::UNINDEXED: {
     // The vector type we really want to load from the 16-byte chunk.
-    MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
-        stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                                 VT, (128 / VT.getSizeInBits())),
+        stVecVT = EVT::getVectorVT(*DAG.getContext(),
+                                   StVT, (128 / StVT.getSizeInBits()));
 
     SDValue alignLoadVec;
     SDValue basePtr = SN->getBasePtr();
@@ -790,9 +849,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     // to the stack pointer, which is always aligned.
 #if !defined(NDEBUG)
       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        cerr << "CellSPU LowerSTORE: basePtr = ";
+        errs() << "CellSPU LowerSTORE: basePtr = ";
         basePtr.getNode()->dump(&DAG);
-        cerr << "\n";
+        errs() << "\n";
       }
 #endif
 
@@ -815,9 +874,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
       const SDValue &currentRoot = DAG.getRoot();
 
       DAG.setRoot(result);
-      cerr << "------- CellSPU:LowerStore result:\n";
+      errs() << "------- CellSPU:LowerStore result:\n";
       DAG.dump();
-      cerr << "-------\n";
+      errs() << "-------\n";
       DAG.setRoot(currentRoot);
     }
 #endif
@@ -830,20 +889,24 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case ISD::POST_INC:
   case ISD::POST_DEC:
   case ISD::LAST_INDEXED_MODE:
-    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+    {
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
             "UNINDEXED\n";
-    cerr << (unsigned) SN->getAddressingMode() << "\n";
-    abort();
-    /*NOTREACHED*/
+      Msg << (unsigned) SN->getAddressingMode();
+      llvm_report_error(Msg.str());
+      /*NOTREACHED*/
+    }
   }
 
   return SDValue();
 }
 
 //! Generate the address of a constant pool entry.
-SDValue
+static SDValue
 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   Constant *C = CP->getConstVal();
   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
@@ -863,9 +926,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     }
   }
 
-  assert(0 &&
-         "LowerConstantPool: Relocation model other than static"
-         " not supported.");
+  llvm_unreachable("LowerConstantPool: Relocation model other than static"
+                   " not supported.");
   return SDValue();
 }
 
@@ -877,7 +939,7 @@ SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM
 
 static SDValue
 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
   SDValue Zero = DAG.getConstant(0, PtrVT);
@@ -895,14 +957,14 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     }
   }
 
-  assert(0 &&
-         "LowerJumpTable: Relocation model other than static not supported.");
+  llvm_unreachable("LowerJumpTable: Relocation model other than static"
+                   " not supported.");
   return SDValue();
 }
 
 static SDValue
 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
   GlobalValue *GV = GSDN->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
@@ -920,9 +982,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
     }
   } else {
-    cerr << "LowerGlobalAddress: Relocation model other than static not "
-         << "supported.\n";
-    abort();
+    llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+                      "not supported.");
     /*NOTREACHED*/
   }
 
@@ -932,7 +993,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 //! Custom lower double precision floating point constants
 static SDValue
 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   // FIXME there is no actual debug info here
   DebugLoc dl = Op.getDebugLoc();
 
@@ -952,16 +1013,17 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue
-LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
-{
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SmallVector<SDValue, 48> ArgValues;
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  DebugLoc dl = Op.getDebugLoc();
 
   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
@@ -970,24 +1032,24 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
   unsigned ArgRegIdx = 0;
   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Add DAG nodes to load the arguments or copy them out of registers.
-  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
-       ArgNo != e; ++ArgNo) {
-    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    EVT ObjectVT = Ins[ArgNo].VT;
     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
     SDValue ArgVal;
 
     if (ArgRegIdx < NumArgRegs) {
       const TargetRegisterClass *ArgRegClass;
 
-      switch (ObjectVT.getSimpleVT()) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
       default: {
-        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-             << ObjectVT.getMVTString()
-             << "\n";
-        abort();
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "LowerFormalArguments Unhandled argument type: "
+             << ObjectVT.getEVTString();
+        llvm_report_error(Msg.str());
       }
       case MVT::i8:
         ArgRegClass = &SPU::R8CRegClass;
@@ -1022,7 +1084,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 
       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
-      ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
       ++ArgRegIdx;
     } else {
       // We need to load the argument to a virtual register if we determined
@@ -1030,13 +1092,13 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
       // or we're forced to do vararg
       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
       ArgOffset += StackSlotSize;
     }
 
-    ArgValues.push_back(ArgVal);
+    InVals.push_back(ArgVal);
     // Update the chain
-    Root = ArgVal.getOperand(0);
+    Chain = ArgVal.getOperand(0);
   }
 
   // vararg handling:
@@ -1051,23 +1113,19 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
-      SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
-      Root = Store.getOperand(0);
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+      Chain = Store.getOperand(0);
       MemOps.push_back(Store);
 
       // Increment address by stack slot size for the next stored argument
       ArgOffset += StackSlotSize;
     }
     if (!MemOps.empty())
-      Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                         &MemOps[0], MemOps.size());
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOps[0], MemOps.size());
   }
 
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size());
+  return Chain;
 }
 
 /// isLSAAddress - Return the immediate to use if the specified
@@ -1084,19 +1142,23 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
 }
 
-static SDValue
-LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain = TheCall->getChain();
-  SDValue Callee    = TheCall->getCallee();
-  unsigned NumOps     = TheCall->getNumArgs();
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) {
+
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  unsigned NumOps     = Outs.size();
   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
-  DebugLoc dl = TheCall->getDebugLoc();
 
   // Handy pointer type
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Accumulate how many bytes are to be pushed on the stack, including the
   // linkage area, and parameter passing area.  According to the SPU ABI,
@@ -1119,15 +1181,15 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   SmallVector<SDValue, 8> MemOpChains;
 
   for (unsigned i = 0; i != NumOps; ++i) {
-    SDValue Arg = TheCall->getArg(i);
+    SDValue Arg = Outs[i].Val;
 
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
 
-    switch (Arg.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected ValueType for argument!");
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
@@ -1193,7 +1255,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   // node so that legalize doesn't hack it.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     GlobalValue *GV = G->getGlobal();
-    MVT CalleeVT = Callee.getValueType();
+    EVT CalleeVT = Callee.getValueType();
     SDValue Zero = DAG.getConstant(0, PtrVT);
     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
 
@@ -1217,7 +1279,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    MVT CalleeVT = Callee.getValueType();
+    EVT CalleeVT = Callee.getValueType();
     SDValue Zero = DAG.getConstant(0, PtrVT);
     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
         Callee.getValueType());
@@ -1251,50 +1313,46 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
                              DAG.getIntPtrConstant(0, true), InFlag);
-  if (TheCall->getValueType(0) != MVT::Other)
+  if (!Ins.empty())
     InFlag = Chain.getValue(1);
 
-  SDValue ResultVals[3];
-  unsigned NumResults = 0;
+  // If the function returns void, just return the chain.
+  if (Ins.empty())
+    return Chain;
 
   // If the call has results, copy the values out of the ret val registers.
-  switch (TheCall->getValueType(0).getSimpleVT()) {
-  default: assert(0 && "Unexpected ret value!");
+  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected ret value!");
   case MVT::Other: break;
   case MVT::i32:
-    if (TheCall->getValueType(1) == MVT::i32) {
+    if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
                                  MVT::i32, InFlag).getValue(1);
-      ResultVals[0] = Chain.getValue(0);
+      InVals.push_back(Chain.getValue(0));
       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
                                  Chain.getValue(2)).getValue(1);
-      ResultVals[1] = Chain.getValue(0);
-      NumResults = 2;
+      InVals.push_back(Chain.getValue(0));
     } else {
       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
                                  InFlag).getValue(1);
-      ResultVals[0] = Chain.getValue(0);
-      NumResults = 1;
+      InVals.push_back(Chain.getValue(0));
     }
     break;
   case MVT::i64:
     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
                                InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
     break;
   case MVT::i128:
     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
                                InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
     break;
   case MVT::f32:
   case MVT::f64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
                                InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
     break;
   case MVT::v2f64:
   case MVT::v2i64:
@@ -1302,31 +1360,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case MVT::v4i32:
   case MVT::v8i16:
   case MVT::v16i8:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
                                    InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
     break;
   }
 
-  // If the function returns void, just return the chain.
-  if (NumResults == 0)
-    return Chain;
-
-  // Otherwise, merge everything together with a MERGE_VALUES node.
-  ResultVals[NumResults++] = Chain;
-  SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
-  return Res.getValue(Op.getResNo());
+  return Chain;
 }
 
-static SDValue
-LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               DebugLoc dl, SelectionDAG &DAG) {
+
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
-  CCState CCInfo(CC, isVarArg, TM, RVLocs);
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
@@ -1335,7 +1387,6 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
-  SDValue Chain = Op.getOperand(0);
   SDValue Flag;
 
   // Copy the result values into the output registers.
@@ -1343,7 +1394,7 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             Op.getOperand(i*2+1), Flag);
+                             Outs[i].Val, Flag);
     Flag = Chain.getValue(1);
   }
 
@@ -1384,7 +1435,7 @@ getVecImm(SDNode *N) {
 /// and the value fits into an unsigned 18-bit constant, and if so, return the
 /// constant
 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
   if (ConstantSDNode *CN = getVecImm(N)) {
     uint64_t Value = CN->getZExtValue();
     if (ValueType == MVT::i64) {
@@ -1406,7 +1457,7 @@ SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
 /// and the value fits into a signed 16-bit constant, and if so, return the
 /// constant
 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
   if (ConstantSDNode *CN = getVecImm(N)) {
     int64_t Value = CN->getSExtValue();
     if (ValueType == MVT::i64) {
@@ -1429,7 +1480,7 @@ SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
 /// and the value fits into a signed 10-bit constant, and if so, return the
 /// constant
 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
   if (ConstantSDNode *CN = getVecImm(N)) {
     int64_t Value = CN->getSExtValue();
     if (ValueType == MVT::i64) {
@@ -1455,7 +1506,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
 /// same value.
 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                             MVT ValueType) {
+                             EVT ValueType) {
   if (ConstantSDNode *CN = getVecImm(N)) {
     int Value = (int) CN->getZExtValue();
     if (ValueType == MVT::i16
@@ -1474,7 +1525,7 @@ SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
 /// and the value fits into a signed 16-bit constant, and if so, return the
 /// constant
 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                               MVT ValueType) {
+                               EVT ValueType) {
   if (ConstantSDNode *CN = getVecImm(N)) {
     uint64_t Value = CN->getZExtValue();
     if ((ValueType == MVT::i32
@@ -1505,10 +1556,10 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
 }
 
 //! Lower a BUILD_VECTOR instruction creatively:
-SDValue
+static SDValue
 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
@@ -1528,13 +1579,15 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
 
   uint64_t SplatBits = APSplatBits.getZExtValue();
 
-  switch (VT.getSimpleVT()) {
-  default:
-    cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
-         << VT.getMVTString()
-         << "\n";
-    abort();
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
+         << VT.getEVTString();
+    llvm_report_error(Msg.str());
     /*NOTREACHED*/
+  }
   case MVT::v4f32: {
     uint32_t Value32 = uint32_t(SplatBits);
     assert(SplatBitSize == 32
@@ -1591,7 +1644,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
 /*!
  */
 SDValue
-SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
                      DebugLoc dl) {
   uint32_t upper = uint32_t(SplatVal >> 32);
   uint32_t lower = uint32_t(SplatVal);
@@ -1704,8 +1757,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   // If we have a single element being moved from V1 to V2, this can be handled
   // using the C*[DX] compute mask instructions, but the vector elements have
   // to be monotonically increasing with one exception element.
-  MVT VecVT = V1.getValueType();
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT VecVT = V1.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
   unsigned EltsFromV2 = 0;
   unsigned V2Elt = 0;
   unsigned V2EltIdx0 = 0;
@@ -1725,7 +1778,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
     V2EltIdx0 = 2;
   } else
-    assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
+    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
 
   for (unsigned i = 0; i != MaxElts; ++i) {
     if (SVN->getMaskElt(i) < 0)
@@ -1770,7 +1823,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     MachineFunction &MF = DAG.getMachineFunction();
     MachineRegisterInfo &RegInfo = MF.getRegInfo();
     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     // Initialize temporary register to 0
     SDValue InitTempReg =
       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
@@ -1816,13 +1869,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 
     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
     SmallVector<SDValue, 16> ConstVecValues;
-    MVT VT;
+    EVT VT;
     size_t n_copies;
 
     // Create a constant vector:
-    switch (Op.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected constant value type in "
-                         "LowerSCALAR_TO_VECTOR");
+    switch (Op.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected constant value type in "
+                              "LowerSCALAR_TO_VECTOR");
     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
@@ -1839,8 +1892,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
                        &ConstVecValues[0], ConstVecValues.size());
   } else {
     // Otherwise, copy the value from one register to another:
-    switch (Op0.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
+    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
@@ -1855,7 +1908,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   SDValue N = Op.getOperand(0);
   SDValue Elt = Op.getOperand(1);
   DebugLoc dl = Op.getDebugLoc();
@@ -1867,13 +1920,13 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 
     // sanity checks:
     if (VT == MVT::i8 && EltNo >= 16)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
     else if (VT == MVT::i16 && EltNo >= 8)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
     else if (VT == MVT::i32 && EltNo >= 4)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
     else if (VT == MVT::i64 && EltNo >= 2)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
 
     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
       // i32 and i64: Element 0 is the preferred slot
@@ -1884,7 +1937,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     int prefslot_begin = -1, prefslot_end = -1;
     int elt_byte = EltNo * VT.getSizeInBits() / 8;
 
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
     default:
       assert(false && "Invalid value type!");
     case MVT::i8: {
@@ -1910,7 +1963,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     assert(prefslot_begin != -1 && prefslot_end != -1 &&
            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
 
-    unsigned int ShufBytes[16];
+    unsigned int ShufBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
     for (int i = 0; i < 16; ++i) {
       // zero fill uppper part of preferred slot, don't care about the
       // other slots:
@@ -1946,10 +2001,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   } else {
     // Variable index: Rotate the requested element into slot 0, then replicate
     // slot 0 across the vector
-    MVT VecVT = N.getValueType();
+    EVT VecVT = N.getValueType();
     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
-      cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
-      abort();
+      llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+                        "vector type!");
     }
 
     // Make life easier by making sure the index is zero-extended to i32
@@ -1974,10 +2029,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     // consistency with the notion of a unified register set)
     SDValue replicate;
 
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
     default:
-      cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
-      abort();
+      llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+                        "type");
       /*NOTREACHED*/
     case MVT::i8: {
       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
@@ -2021,12 +2076,12 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   SDValue ValOp = Op.getOperand(1);
   SDValue IdxOp = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
 
   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Use $sp ($1) because it's always 16-byte aligned and it's available:
   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                 DAG.getRegister(SPU::R1, PtrVT),
@@ -2047,12 +2102,12 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
 {
   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
   DebugLoc dl = Op.getDebugLoc();
-  MVT ShiftVT = TLI.getShiftAmountTy();
+  EVT ShiftVT = TLI.getShiftAmountTy();
 
   assert(Op.getValueType() == MVT::i8);
   switch (Opc) {
   default:
-    assert(0 && "Unhandled i8 math operator");
+    llvm_unreachable("Unhandled i8 math operator");
     /*NOTREACHED*/
     break;
   case ISD::ADD: {
@@ -2078,7 +2133,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
   case ISD::ROTR:
   case ISD::ROTL: {
     SDValue N1 = Op.getOperand(1);
-    MVT N1VT = N1.getValueType();
+    EVT N1VT = N1.getValueType();
 
     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
     if (!N1VT.bitsEq(ShiftVT)) {
@@ -2101,7 +2156,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
   case ISD::SRL:
   case ISD::SHL: {
     SDValue N1 = Op.getOperand(1);
-    MVT N1VT = N1.getValueType();
+    EVT N1VT = N1.getValueType();
 
     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
     if (!N1VT.bitsEq(ShiftVT)) {
@@ -2118,7 +2173,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
   }
   case ISD::SRA: {
     SDValue N1 = Op.getOperand(1);
-    MVT N1VT = N1.getValueType();
+    EVT N1VT = N1.getValueType();
 
     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
     if (!N1VT.bitsEq(ShiftVT)) {
@@ -2151,7 +2206,7 @@ static SDValue
 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
   SDValue ConstVec;
   SDValue Arg;
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
 
   ConstVec = Op.getOperand(0);
@@ -2202,11 +2257,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
   ones per byte, which then have to be accumulated.
 */
 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+  EVT VT = Op.getValueType();
+  EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+                               VT, (128 / VT.getSizeInBits()));
   DebugLoc dl = Op.getDebugLoc();
 
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default:
     assert(false && "Invalid value type!");
   case MVT::i8: {
@@ -2312,9 +2368,9 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
  */
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                               SPUTargetLowering &TLI) {
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
   SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
 
   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
       || OpVT == MVT::i64) {
@@ -2338,9 +2394,9 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
  */
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
                               SPUTargetLowering &TLI) {
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
   SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
 
   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
       || Op0VT == MVT::i64) {
@@ -2369,12 +2425,12 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
 
   SDValue lhs = Op.getOperand(0);
   SDValue rhs = Op.getOperand(1);
-  MVT lhsVT = lhs.getValueType();
+  EVT lhsVT = lhs.getValueType();
   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
 
-  MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-  MVT IntVT(MVT::i64);
+  EVT IntVT(MVT::i64);
 
   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
   // selected to a NOP:
@@ -2458,9 +2514,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
   case ISD::SETONE:
     compareOp = ISD::SETNE; break;
   default:
-    cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
-    abort();
-    break;
+    llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
   }
 
   SDValue result =
@@ -2497,7 +2551,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
 
 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
                               const TargetLowering &TLI) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   SDValue lhs = Op.getOperand(0);
   SDValue rhs = Op.getOperand(1);
   SDValue trueval = Op.getOperand(2);
@@ -2526,14 +2580,15 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
 {
   // Type to truncate to
-  MVT VT = Op.getValueType();
-  MVT::SimpleValueType simpleVT = VT.getSimpleVT();
-  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+  EVT VT = Op.getValueType();
+  MVT simpleVT = VT.getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 
+                               VT, (128 / VT.getSizeInBits()));
   DebugLoc dl = Op.getDebugLoc();
 
   // Type to truncate from
   SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
 
   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
     // Create shuffle mask, least significant doubleword of quadword
@@ -2555,6 +2610,61 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
   return SDValue();             // Leave the truncate unmolested
 }
 
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to extend to
+  MVT OpVT = Op.getValueType().getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+
+  // Type to extend from
+  SDValue Op0 = Op.getOperand(0);
+  MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+  // The type to extend to needs to be a i128 and
+  // the type to extend from needs to be i64 or i32.
+  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+  // Create shuffle mask
+  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
+  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask2, MVT::i32),
+                                 DAG.getConstant(mask3, MVT::i32));
+
+  // Word wise arithmetic right shift to generate at least one byte
+  // that contains sign bits.
+  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+  SDValue sraVal = DAG.getNode(ISD::SRA,
+                 dl,
+                 mvt,
+                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+                 DAG.getConstant(31, MVT::i32));
+
+  // Shuffle bytes - Copy the sign bits into the upper 64 bits
+  // and the input value into the lower 64 bits.
+  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
+
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+}
+
 //! Custom (target-specific) lowering entry point
 /*!
   This is where LLVM's DAG selection process calls to do target-specific
@@ -2564,15 +2674,17 @@ SDValue
 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 {
   unsigned Opc = (unsigned) Op.getOpcode();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
 
   switch (Opc) {
   default: {
-    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
-    cerr << "Op.getOpcode() = " << Opc << "\n";
-    cerr << "*Op.getNode():\n";
+#ifndef NDEBUG
+    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
     Op.getNode()->dump();
-    abort();
+#endif
+    llvm_unreachable(0);
   }
   case ISD::LOAD:
   case ISD::EXTLOAD:
@@ -2589,12 +2701,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
   case ISD::ConstantFP:
     return LowerConstantFP(Op, DAG);
-  case ISD::FORMAL_ARGUMENTS:
-    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
-  case ISD::CALL:
-    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::RET:
-    return LowerRET(Op, DAG, getTargetMachine());
 
   // i8, i64 math ops:
   case ISD::ADD:
@@ -2651,6 +2757,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 
   case ISD::TRUNCATE:
     return LowerTRUNCATE(Op, DAG);
+
+  case ISD::SIGN_EXTEND:
+    return LowerSIGN_EXTEND(Op, DAG);
   }
 
   return SDValue();
@@ -2662,13 +2771,13 @@ void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
 {
 #if 0
   unsigned Opc = (unsigned) N->getOpcode();
-  MVT OpVT = N->getValueType(0);
+  EVT OpVT = N->getValueType(0);
 
   switch (Opc) {
   default: {
-    cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
-    cerr << "Op.getOpcode() = " << Opc << "\n";
-    cerr << "*Op.getNode():\n";
+    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
     N->dump();
     abort();
     /*NOTREACHED*/
@@ -2692,8 +2801,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
   SelectionDAG &DAG = DCI.DAG;
   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
-  MVT NodeVT = N->getValueType(0);      // The node's value type
-  MVT Op0VT = Op0.getValueType();       // The first operand's result
+  EVT NodeVT = N->getValueType(0);      // The node's value type
+  EVT Op0VT = Op0.getValueType();       // The first operand's result
   SDValue Result;                       // Initially, empty result
   DebugLoc dl = N->getDebugLoc();
 
@@ -2722,7 +2831,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
 
 #if !defined(NDEBUG)
           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
                  << "With:    (SPUindirect <arg>, <arg>)\n";
           }
@@ -2738,7 +2847,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
 
 #if !defined(NDEBUG)
           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
                  << "), " << CN0->getSExtValue() << ")\n"
                  << "With:    (SPUindirect <arg>, "
@@ -2762,11 +2871,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
       // Types must match, however...
 #if !defined(NDEBUG)
       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        cerr << "\nReplace: ";
+        errs() << "\nReplace: ";
         N->dump(&DAG);
-        cerr << "\nWith:    ";
+        errs() << "\nWith:    ";
         Op0.getNode()->dump(&DAG);
-        cerr << "\n";
+        errs() << "\n";
       }
 #endif
 
@@ -2781,11 +2890,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
         // (SPUindirect (SPUaform <addr>, 0), 0) ->
         // (SPUaform <addr>, 0)
 
-        DEBUG(cerr << "Replace: ");
+        DEBUG(errs() << "Replace: ");
         DEBUG(N->dump(&DAG));
-        DEBUG(cerr << "\nWith:    ");
+        DEBUG(errs() << "\nWith:    ");
         DEBUG(Op0.getNode()->dump(&DAG));
-        DEBUG(cerr << "\n");
+        DEBUG(errs() << "\n");
 
         return Op0;
       }
@@ -2798,7 +2907,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
 
 #if !defined(NDEBUG)
           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
                  << "With:    (SPUindirect <arg>, <arg>)\n";
           }
@@ -2813,9 +2922,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
   }
   case SPUISD::SHLQUAD_L_BITS:
   case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_SHL:
-  case SPUISD::VEC_SRL:
-  case SPUISD::VEC_SRA:
   case SPUISD::ROTBYTES_LEFT: {
     SDValue Op1 = N->getOperand(1);
 
@@ -2860,11 +2966,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
   // Otherwise, return unchanged.
 #ifndef NDEBUG
   if (Result.getNode()) {
-    DEBUG(cerr << "\nReplace.SPU: ");
+    DEBUG(errs() << "\nReplace.SPU: ");
     DEBUG(N->dump(&DAG));
-    DEBUG(cerr << "\nWith:        ");
+    DEBUG(errs() << "\nWith:        ");
     DEBUG(Result.getNode()->dump(&DAG));
-    DEBUG(cerr << "\n");
+    DEBUG(errs() << "\n");
   }
 #endif
 
@@ -2895,7 +3001,7 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
 
 std::pair<unsigned, const TargetRegisterClass*>
 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT VT) const
+                                                EVT VT) const
 {
   if (Constraint.size() == 1) {
     // GCC RS6000 Constraint Letters
@@ -2943,9 +3049,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   case SPUISD::VEC2PREFSLOT:
   case SPUISD::SHLQUAD_L_BITS:
   case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_SHL:
-  case SPUISD::VEC_SRL:
-  case SPUISD::VEC_SRA:
   case SPUISD::VEC_ROTL:
   case SPUISD::VEC_ROTR:
   case SPUISD::ROTBYTES_LEFT:
@@ -2963,7 +3066,7 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
     return 1;
 
   case ISD::SETCC: {
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
 
     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
       VT = MVT::i32;
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index b1583f4ee2d6..ab349bb7851f 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -43,9 +43,6 @@ namespace llvm {
       VEC2PREFSLOT,             ///< Extract element 0
       SHLQUAD_L_BITS,           ///< Rotate quad left, by bits
       SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes
-      VEC_SHL,                  ///< Vector shift left
-      VEC_SRL,                  ///< Vector shift right (logical)
-      VEC_SRA,                  ///< Vector shift right (arithmetic)
       VEC_ROTL,                 ///< Vector rotate left
       VEC_ROTR,                 ///< Vector rotate right
       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI)
@@ -64,22 +61,22 @@ namespace llvm {
   //! Utility functions specific to CellSPU:
   namespace SPU {
     SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                             MVT ValueType);
+                             EVT ValueType);
     SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                             MVT ValueType);
+                             EVT ValueType);
     SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                             MVT ValueType);
+                             EVT ValueType);
     SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                            MVT ValueType);
+                            EVT ValueType);
     SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType);
+                              EVT ValueType);
     SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
     SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
 
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
                               const SPUTargetMachine &TM);
-    //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form
-    SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
+    //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
+    SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
                              DebugLoc dl);
   }
 
@@ -109,7 +106,7 @@ namespace llvm {
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     /// getSetCCResultType - Return the ValueType for ISD::SETCC
-    virtual MVT getSetCCResultType(MVT VT) const;
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     //! Custom lowering hooks
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@@ -134,7 +131,7 @@ namespace llvm {
 
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const;
+                                   EVT VT) const;
 
     void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
                                       bool hasMemory,
@@ -150,6 +147,28 @@ namespace llvm {
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index e629c8d31aaf..ecce8e3e9316 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -17,8 +17,9 @@
 #include "SPUTargetMachine.h"
 #include "SPUGenInstrInfo.inc"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -313,8 +314,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   } else if (RC == SPU::VECREGRegisterClass) {
     opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
   } else {
-    assert(0 && "Unknown regclass!");
-    abort();
+    llvm_unreachable("Unknown regclass!");
   }
 
   DebugLoc DL = DebugLoc::getUnknownLoc();
@@ -323,43 +323,6 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                     .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
 }
 
-void SPUInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                  bool isKill,
-                                  SmallVectorImpl<MachineOperand> &Addr,
-                                  const TargetRegisterClass *RC,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  cerr << "storeRegToAddr() invoked!\n";
-  abort();
-
-  if (Addr[0].isFI()) {
-    /* do what storeRegToStackSlot does here */
-  } else {
-    unsigned Opc = 0;
-    if (RC == SPU::GPRCRegisterClass) {
-      /* Opc = PPC::STW; */
-    } else if (RC == SPU::R16CRegisterClass) {
-      /* Opc = PPC::STD; */
-    } else if (RC == SPU::R32CRegisterClass) {
-      /* Opc = PPC::STFD; */
-    } else if (RC == SPU::R32FPRegisterClass) {
-      /* Opc = PPC::STFD; */
-    } else if (RC == SPU::R64FPRegisterClass) {
-      /* Opc = PPC::STFS; */
-    } else if (RC == SPU::VECREGRegisterClass) {
-      /* Opc = PPC::STVX; */
-    } else {
-      assert(0 && "Unknown regclass!");
-      abort();
-    }
-    DebugLoc DL = DebugLoc::getUnknownLoc();
-    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
-      .addReg(SrcReg, getKillRegState(isKill));
-    for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-      MIB.addOperand(Addr[i]);
-    NewMIs.push_back(MIB);
-  }
-}
-
 void
 SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
@@ -385,8 +348,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   } else if (RC == SPU::VECREGRegisterClass) {
     opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
   } else {
-    assert(0 && "Unknown regclass in loadRegFromStackSlot!");
-    abort();
+    llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
   }
 
   DebugLoc DL = DebugLoc::getUnknownLoc();
@@ -394,47 +356,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
 }
 
-/*!
-  \note We are really pessimistic here about what kind of a load we're doing.
- */
-void SPUInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                   SmallVectorImpl<MachineOperand> &Addr,
-                                   const TargetRegisterClass *RC,
-                                   SmallVectorImpl<MachineInstr*> &NewMIs)
-    const {
-  cerr << "loadRegToAddr() invoked!\n";
-  abort();
-
-  if (Addr[0].isFI()) {
-    /* do what loadRegFromStackSlot does here... */
-  } else {
-    unsigned Opc = 0;
-    if (RC == SPU::R8CRegisterClass) {
-      /* do brilliance here */
-    } else if (RC == SPU::R16CRegisterClass) {
-      /* Opc = PPC::LWZ; */
-    } else if (RC == SPU::R32CRegisterClass) {
-      /* Opc = PPC::LD; */
-    } else if (RC == SPU::R32FPRegisterClass) {
-      /* Opc = PPC::LFD; */
-    } else if (RC == SPU::R64FPRegisterClass) {
-      /* Opc = PPC::LFS; */
-    } else if (RC == SPU::VECREGRegisterClass) {
-      /* Opc = PPC::LVX; */
-    } else if (RC == SPU::GPRCRegisterClass) {
-      /* Opc = something else! */
-    } else {
-      assert(0 && "Unknown regclass!");
-      abort();
-    }
-    DebugLoc DL = DebugLoc::getUnknownLoc();
-    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-    for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-      MIB.addOperand(Addr[i]);
-    NewMIs.push_back(MIB);
-  }
-}
-
 //! Return true if the specified load or store can be folded
 bool
 SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
@@ -543,7 +464,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
     } else if (isCondBranch(LastInst)) {
       // Block ends with fall-through condbranch.
       TBB = LastInst->getOperand(1).getMBB();
-      DEBUG(cerr << "Pushing LastInst:               ");
+      DEBUG(errs() << "Pushing LastInst:               ");
       DEBUG(LastInst->dump());
       Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
       Cond.push_back(LastInst->getOperand(0));
@@ -564,7 +485,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   // If the block ends with a conditional and unconditional branch, handle it.
   if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
     TBB =  SecondLastInst->getOperand(1).getMBB();
-    DEBUG(cerr << "Pushing SecondLastInst:         ");
+    DEBUG(errs() << "Pushing SecondLastInst:         ");
     DEBUG(SecondLastInst->dump());
     Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
     Cond.push_back(SecondLastInst->getOperand(0));
@@ -596,7 +517,7 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
     return 0;
 
   // Remove the first branch.
-  DEBUG(cerr << "Removing branch:                ");
+  DEBUG(errs() << "Removing branch:                ");
   DEBUG(I->dump());
   I->eraseFromParent();
   I = MBB.end();
@@ -608,7 +529,7 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
     return 1;
 
   // Remove the second branch.
-  DEBUG(cerr << "Removing second branch:         ");
+  DEBUG(errs() << "Removing second branch:         ");
   DEBUG(I->dump());
   I->eraseFromParent();
   return 2;
@@ -632,14 +553,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
       MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR));
       MIB.addMBB(TBB);
 
-      DEBUG(cerr << "Inserted one-way uncond branch: ");
+      DEBUG(errs() << "Inserted one-way uncond branch: ");
       DEBUG((*MIB).dump());
     } else {
       // Conditional branch
       MachineInstrBuilder  MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
       MIB.addReg(Cond[1].getReg()).addMBB(TBB);
 
-      DEBUG(cerr << "Inserted one-way cond branch:   ");
+      DEBUG(errs() << "Inserted one-way cond branch:   ");
       DEBUG((*MIB).dump());
     }
     return 1;
@@ -651,9 +572,9 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
     MIB.addReg(Cond[1].getReg()).addMBB(TBB);
     MIB2.addMBB(FBB);
 
-    DEBUG(cerr << "Inserted conditional branch:    ");
+    DEBUG(errs() << "Inserted conditional branch:    ");
     DEBUG((*MIB).dump());
-    DEBUG(cerr << "part 2: ");
+    DEBUG(errs() << "part 2: ");
     DEBUG((*MIB2).dump());
    return 2;
   }
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index ffb40875ff10..c644a117965c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -68,24 +68,12 @@ namespace llvm {
                                      unsigned SrcReg, bool isKill, int FrameIndex,
                                      const TargetRegisterClass *RC) const;
 
-    //! Store a register to an address, based on its register class
-    virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                                                  SmallVectorImpl<MachineOperand> &Addr,
-                                                  const TargetRegisterClass *RC,
-                                                  SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
     //! Load a register from a stack slot, based on its register class.
     virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MBBI,
                                       unsigned DestReg, int FrameIndex,
                                       const TargetRegisterClass *RC) const;
 
-    //! Loqad a register from an address, based on its register class
-    virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                                         SmallVectorImpl<MachineOperand> &Addr,
-                                                         const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
     //! Return true if the specified load or store can be folded
     virtual
     bool canFoldMemoryOperand(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 63eb85a2921e..09849da45ae2 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -4431,13 +4431,6 @@ def : Pat<(i8 imm:$imm),
           (ILHr8 imm:$imm)>;
 
 //===----------------------------------------------------------------------===//
-// Call instruction patterns:
-//===----------------------------------------------------------------------===//
-// Return void
-def : Pat<(ret),
-          (RET)>;
-
-//===----------------------------------------------------------------------===//
 // Zero/Any/Sign extensions
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..1c921ab87ff2
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -0,0 +1,40 @@
+//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCAsmInfo.h"
+using namespace llvm;
+
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
+  ZeroDirective = "\t.space\t";
+  SetDirective = "\t.set";
+  Data64bitsDirective = "\t.quad\t";
+  AlignmentIsInBytes = false;
+  LCOMMDirective = "\t.lcomm\t";
+      
+  PCSymbol = ".";
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+
+  // Has leb128, .loc and .file
+  HasLEB128 = true;
+  HasDotLocAndDotFile = true;
+
+  SupportsDebugInformation = true;
+  NeedsSet = true;
+
+  // Exception handling is not supported on CellSPU (think about it: you only
+  // have 256K for code+data. Would you support exception handling?)
+  ExceptionsType = ExceptionHandling::None;
+}
+
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/SPUMCAsmInfo.h
new file mode 100644
index 000000000000..8d75ea84116a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.h
@@ -0,0 +1,28 @@
+//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+  
+  struct SPULinuxMCAsmInfo : public MCAsmInfo {
+    explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 87c4115d1b18..c722e4b006ea 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -87,9 +87,9 @@ def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
 def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
 
 // Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
-def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>;
-def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>;
-def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>;
+def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
 
 def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
 def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index e031048e7ccb..8412006124cc 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -35,7 +35,9 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
@@ -176,8 +178,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   case SPU::R126: return 126;
   case SPU::R127: return 127;
   default:
-    cerr << "Unhandled reg in SPURegisterInfo::getRegisterNumbering!\n";
-    abort();
+    llvm_report_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
   }
 }
 
@@ -218,8 +219,8 @@ SPURegisterInfo::getNumArgRegs()
 
 /// getPointerRegClass - Return the register class to use to hold pointers.
 /// This is used for addressing modes.
-const TargetRegisterClass * SPURegisterInfo::getPointerRegClass() const
-{
+const TargetRegisterClass *
+SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &SPU::R32CRegClass;
 }
 
@@ -325,9 +326,9 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
   MBB.erase(I);
 }
 
-void
+unsigned
 SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                                     RegScavenger *RS) const
+                                     int *Value, RegScavenger *RS) const
 {
   unsigned i = 0;
   MachineInstr &MI = *II;
@@ -364,12 +365,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   SPOp.ChangeToRegister(SPU::R1, false);
   if (Offset > SPUFrameInfo::maxFrameOffset()
       || Offset < SPUFrameInfo::minFrameOffset()) {
-    cerr << "Large stack adjustment ("
+    errs() << "Large stack adjustment ("
          << Offset
          << ") in SPURegisterInfo::eliminateFrameIndex.";
   } else {
     MO.ChangeToImmediate(Offset);
   }
+  return 0;
 }
 
 /// determineFrameLayout - Determine the size of the frame and maximum call
@@ -485,8 +487,10 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
         .addReg(SPU::R2)
         .addReg(SPU::R1);
     } else {
-      cerr << "Unhandled frame size: " << FrameSize << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Unhandled frame size: " << FrameSize;
+      llvm_report_error(Msg.str());
     }
 
     if (hasDebugInfo) {
@@ -577,8 +581,10 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
         .addReg(SPU::R2)
         .addReg(SPU::R1);
     } else {
-      cerr << "Unhandled frame size: " << FrameSize << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Unhandled frame size: " << FrameSize;
+      llvm_report_error(Msg.str());
     }
    }
 }
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 5b6e9ec68cdb..1d9d07e9b3e1 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -43,7 +43,8 @@ namespace llvm {
 
     /// getPointerRegClass - Return the register class to use to hold pointers.
     /// This is used for addressing modes.
-    virtual const TargetRegisterClass *getPointerRegClass() const;
+    virtual const TargetRegisterClass *
+    getPointerRegClass(unsigned Kind = 0) const;
 
     //! Return the array of callee-saved registers
     virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
@@ -62,8 +63,9 @@ namespace llvm {
                                        MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator I) const;
     //! Convert frame indicies into machine operands
-    void eliminateFrameIndex(MachineBasicBlock::iterator II, int,
-                             RegScavenger *RS) const;
+    unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                                 int *Value = NULL,
+                                 RegScavenger *RS = NULL) const;
     //! Determine the frame's layour
     void determineFrameLayout(MachineFunction &MF) const;
 
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 0a1c2f75cfe5..0f18b7fa8b26 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -13,15 +13,11 @@
 
 #include "SPUSubtarget.h"
 #include "SPU.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetMachine.h"
 #include "SPUGenSubtarget.inc"
 
 using namespace llvm;
 
-SPUSubtarget::SPUSubtarget(const TargetMachine &tm, const Module &M,
-                           const std::string &FS) :
-  TM(tm),
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
   StackAlignment(16),
   ProcDirective(SPU::DEFAULT_PROC),
   UseLargeMem(false)
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index b6a34099b2f7..94ac73ce39f8 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -20,9 +20,7 @@
 #include <string>
 
 namespace llvm {
-  class Module;
   class GlobalValue;
-  class TargetMachine;
 
   namespace SPU {
     enum {
@@ -33,8 +31,6 @@ namespace llvm {
     
   class SPUSubtarget : public TargetSubtarget {
   protected:
-    const TargetMachine &TM;
-    
     /// stackAlignment - The minimum alignment known to hold of the stack frame
     /// on entry to the function and which must be maintained by every function.
     unsigned StackAlignment;
@@ -52,10 +48,9 @@ namespace llvm {
     
   public:
     /// This constructor initializes the data members to match that
-    /// of the specified module.
+    /// of the specified triple.
     ///
-    SPUSubtarget(const TargetMachine &TM, const Module &M,
-                 const std::string &FS);
+    SPUSubtarget(const std::string &TT, const std::string &FS);
     
     /// ParseSubtargetFeatures - Parses features string setting specified 
     /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 2470972ca496..6500067849db 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -13,62 +13,36 @@
 
 #include "SPU.h"
 #include "SPURegisterNames.h"
-#include "SPUTargetAsmInfo.h"
+#include "SPUMCAsmInfo.h"
 #include "SPUTargetMachine.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Target/TargetMachineRegistry.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 
 using namespace llvm;
 
-namespace {
-  // Register the targets
-  RegisterTarget<SPUTargetMachine>
-  CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
+extern "C" void LLVMInitializeCellSPUTarget() { 
+  // Register the target.
+  RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
+  RegisterAsmInfo<SPULinuxMCAsmInfo> Y(TheCellSPUTarget);
 }
 
-// No assembler printer by default
-SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUTarget() { }
-
 const std::pair<unsigned, int> *
 SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
   NumEntries = 1;
   return &LR[0];
 }
 
-const TargetAsmInfo *
-SPUTargetMachine::createTargetAsmInfo() const
-{
-  return new SPULinuxTargetAsmInfo(*this);
-}
-
-unsigned
-SPUTargetMachine::getModuleMatchQuality(const Module &M)
-{
-  // We strongly match "spu-*" or "cellspu-*".
-  std::string TT = M.getTargetTriple();
-  if ((TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "spu")
-      || (TT.size() == 7 && std::string(TT.begin(), TT.begin()+7) == "cellspu")
-      || (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "spu-")
-      || (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "cellspu-"))
-    return 20;
-  
-  return 0;                     // No match at all...
-}
-
-SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS)
-  : Subtarget(*this, M, FS),
+SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
     DataLayout(Subtarget.getTargetDataString()),
     InstrInfo(*this),
     FrameInfo(*this),
     TLInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData())
-{
+    InstrItins(Subtarget.getInstrItineraryData()) {
   // For the time being, use static relocations, since there's really no
   // support for PIC yet.
   setRelocationModel(Reloc::Static);
@@ -78,22 +52,9 @@ SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS)
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
 
-bool
-SPUTargetMachine::addInstSelector(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel)
-{
+bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
   // Install an instruction selector.
   PM.add(createSPUISelDag(*this));
   return false;
 }
-
-bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool Verbose,
-                                          raw_ostream &Out) {
-  // Output assembly language.
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, Verbose));
-  return false;
-}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 4c28521317b9..9fdcfe9ab619 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -35,19 +35,9 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUFrameInfo        FrameInfo;
   SPUTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
-
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
-  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
-  // set this functions to ctor pointer at startup time if they are linked in.
-  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            SPUTargetMachine &tm,
-                                            bool verbose);
-  static AsmPrinterCtorFn AsmPrinterCtor;
-
 public:
-  SPUTargetMachine(const Module &M, const std::string &FS);
+  SPUTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS);
 
   /// Return the subtarget implementation object
   virtual const SPUSubtarget     *getSubtargetImpl() const {
@@ -66,12 +56,6 @@ public:
   virtual       TargetJITInfo    *getJITInfo() {
     return NULL;
   }
-  
-  //! Module match function
-  /*!
-    Module matching function called by TargetMachineRegistry().
-   */
-  static unsigned getModuleMatchQuality(const Module &M);
 
   virtual       SPUTargetLowering *getTargetLowering() const { 
    return const_cast<SPUTargetLowering*>(&TLInfo); 
@@ -92,13 +76,6 @@ public:
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM,
                                CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel,
-                                  bool Verbose, raw_ostream &Out);
-
-  static void registerAsmPrinter(AsmPrinterCtorFn F) {
-    AsmPrinterCtor = F;
-  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..928d0fe97e0d
--- /dev/null
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMCellSPUInfo
+  CellSPUTargetInfo.cpp
+  )
+
+add_dependencies(LLVMCellSPUInfo CellSPUCodeGenTable_gen)
diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
new file mode 100644
index 000000000000..049ea236e992
--- /dev/null
+++ b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCellSPUTarget;
+
+extern "C" void LLVMInitializeCellSPUTargetInfo() { 
+  RegisterTarget<Triple::cellspu> 
+    X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
+}
diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/CellSPU/TargetInfo/Makefile
new file mode 100644
index 000000000000..9cb6827b4323
--- /dev/null
+++ b/lib/Target/CellSPU/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 28f58e86f623..14ad451074a5 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -23,13 +23,12 @@
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
 #include "llvm/TypeSymbolTable.h"
-#include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Streams.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include <algorithm>
 #include <set>
@@ -71,19 +70,10 @@ static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
   cl::desc("Specify the name of the thing to generate"),
   cl::init("!bad!"));
 
-/// CppBackendTargetMachineModule - Note that this is used on hosts
-/// that cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int CppBackendTargetMachineModule;
-int CppBackendTargetMachineModule = 0;
-
-// Register the target.
-static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
-
-// Force static initialization.
-extern "C" void LLVMInitializeCppBackendTarget() { }
+extern "C" void LLVMInitializeCppBackendTarget() {
+  // Register the target.
+  RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
+}
 
 namespace {
   typedef std::vector<const Type*> TypeList;
@@ -97,7 +87,7 @@ namespace {
   /// CppWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C++ translation unit.
   class CppWriter : public ModulePass {
-    raw_ostream &Out;
+    formatted_raw_ostream &Out;
     const Module *TheModule;
     uint64_t uniqueNum;
     TypeMap TypeNames;
@@ -112,7 +102,7 @@ namespace {
 
   public:
     static char ID;
-    explicit CppWriter(raw_ostream &o) :
+    explicit CppWriter(formatted_raw_ostream &o) :
       ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {}
 
     virtual const char *getPassName() const { return "C++ backend"; }
@@ -133,7 +123,7 @@ namespace {
   private:
     void printLinkageType(GlobalValue::LinkageTypes LT);
     void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
-    void printCallingConv(unsigned cc);
+    void printCallingConv(CallingConv::ID cc);
     void printEscapedString(const std::string& str);
     void printCFP(const ConstantFP* CFP);
 
@@ -165,7 +155,7 @@ namespace {
   };
 
   static unsigned indent_level = 0;
-  inline raw_ostream& nl(raw_ostream& Out, int delta = 0) {
+  inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) {
     Out << "\n";
     if (delta >= 0 || indent_level >= unsigned(-delta))
       indent_level += delta;
@@ -220,8 +210,7 @@ namespace {
   }
 
   void CppWriter::error(const std::string& msg) {
-    cerr << msg << "\n";
-    exit(2);
+    llvm_report_error(msg);
   }
 
   // printCFP - Print a floating point constant .. very carefully :)
@@ -230,9 +219,9 @@ namespace {
   void CppWriter::printCFP(const ConstantFP *CFP) {
     bool ignored;
     APFloat APF = APFloat(CFP->getValueAPF());  // copy
-    if (CFP->getType() == Type::FloatTy)
+    if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
       APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
-    Out << "ConstantFP::get(";
+    Out << "ConstantFP::get(getGlobalContext(), ";
     Out << "APFloat(";
 #if HAVE_PRINTF_A
     char Buffer[100];
@@ -241,7 +230,7 @@ namespace {
          !strncmp(Buffer, "-0x", 3) ||
          !strncmp(Buffer, "+0x", 3)) &&
         APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
-      if (CFP->getType() == Type::DoubleTy)
+      if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
         Out << "BitsToDouble(" << Buffer << ")";
       else
         Out << "BitsToFloat((float)" << Buffer << ")";
@@ -259,11 +248,11 @@ namespace {
            ((StrVal[0] == '-' || StrVal[0] == '+') &&
             (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
           (CFP->isExactlyValue(atof(StrVal.c_str())))) {
-        if (CFP->getType() == Type::DoubleTy)
+        if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
           Out <<  StrVal;
         else
           Out << StrVal << "f";
-      } else if (CFP->getType() == Type::DoubleTy)
+      } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
         Out << "BitsToDouble(0x"
             << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
             << "ULL) /* " << StrVal << " */";
@@ -279,7 +268,7 @@ namespace {
     Out << ")";
   }
 
-  void CppWriter::printCallingConv(unsigned cc){
+  void CppWriter::printCallingConv(CallingConv::ID cc){
     // Print the calling convention.
     switch (cc) {
     case CallingConv::C:     Out << "CallingConv::C"; break;
@@ -296,6 +285,8 @@ namespace {
       Out << "GlobalValue::InternalLinkage"; break;
     case GlobalValue::PrivateLinkage:
       Out << "GlobalValue::PrivateLinkage"; break;
+    case GlobalValue::LinkerPrivateLinkage:
+      Out << "GlobalValue::LinkerPrivateLinkage"; break;
     case GlobalValue::AvailableExternallyLinkage:
       Out << "GlobalValue::AvailableExternallyLinkage "; break;
     case GlobalValue::LinkOnceAnyLinkage:
@@ -325,7 +316,7 @@ namespace {
 
   void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
     switch (VisType) {
-    default: assert(0 && "Unknown GVar visibility");
+    default: llvm_unreachable("Unknown GVar visibility");
     case GlobalValue::DefaultVisibility:
       Out << "GlobalValue::DefaultVisibility";
       break;
@@ -357,20 +348,21 @@ namespace {
     // First, handle the primitive types .. easy
     if (Ty->isPrimitiveType() || Ty->isInteger()) {
       switch (Ty->getTypeID()) {
-      case Type::VoidTyID:   return "Type::VoidTy";
+      case Type::VoidTyID:   return "Type::getVoidTy(getGlobalContext())";
       case Type::IntegerTyID: {
         unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
-        return "IntegerType::get(" + utostr(BitWidth) + ")";
+        return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")";
       }
-      case Type::X86_FP80TyID: return "Type::X86_FP80Ty";
-      case Type::FloatTyID:    return "Type::FloatTy";
-      case Type::DoubleTyID:   return "Type::DoubleTy";
-      case Type::LabelTyID:    return "Type::LabelTy";
+      case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())";
+      case Type::FloatTyID:    return "Type::getFloatTy(getGlobalContext())";
+      case Type::DoubleTyID:   return "Type::getDoubleTy(getGlobalContext())";
+      case Type::LabelTyID:    return "Type::getLabelTy(getGlobalContext())";
       default:
         error("Invalid primitive type");
         break;
       }
-      return "Type::VoidTy"; // shouldn't be returned, but make it sensible
+      // shouldn't be returned, but make it sensible
+      return "Type::getVoidTy(getGlobalContext())";
     }
 
     // Now, see if we've seen the type before and return that
@@ -436,7 +428,10 @@ namespace {
     } else {
       name = getTypePrefix(val->getType());
     }
-    name += (val->hasName() ? val->getName() : utostr(uniqueNum++));
+    if (val->hasName())
+      name += val->getName();
+    else
+      name += utostr(uniqueNum++);
     sanitize(name);
     NameSet::iterator NI = UsedNames.find(name);
     if (NI != UsedNames.end())
@@ -477,6 +472,7 @@ namespace {
         HANDLE_ATTR(Nest);
         HANDLE_ATTR(ReadNone);
         HANDLE_ATTR(ReadOnly);
+        HANDLE_ATTR(InlineHint);
         HANDLE_ATTR(NoInline);
         HANDLE_ATTR(AlwaysInline);
         HANDLE_ATTR(OptimizeForSize);
@@ -519,7 +515,8 @@ namespace {
     if (TI != TypeStack.end()) {
       TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
       if (I == UnresolvedTypes.end()) {
-        Out << "PATypeHolder " << typeName << "_fwd = OpaqueType::get();";
+        Out << "PATypeHolder " << typeName;
+        Out << "_fwd = OpaqueType::get(getGlobalContext());";
         nl(Out);
         UnresolvedTypes[Ty] = typeName;
       }
@@ -579,6 +576,7 @@ namespace {
         nl(Out);
       }
       Out << "StructType* " << typeName << " = StructType::get("
+          << "mod->getContext(), "
           << typeName << "_fields, /*isPacked=*/"
           << (ST->isPacked() ? "true" : "false") << ");";
       nl(Out);
@@ -618,7 +616,8 @@ namespace {
       break;
     }
     case Type::OpaqueTyID: {
-      Out << "OpaqueType* " << typeName << " = OpaqueType::get();";
+      Out << "OpaqueType* " << typeName;
+      Out << " = OpaqueType::get(getGlobalContext());";
       nl(Out);
       break;
     }
@@ -753,9 +752,10 @@ namespace {
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
       std::string constValue = CI->getValue().toString(10, true);
-      Out << "ConstantInt* " << constName << " = ConstantInt::get(APInt("
-          << cast<IntegerType>(CI->getType())->getBitWidth() << ",  \""
-          <<  constValue << "\", " << constValue.length() << ", 10));";
+      Out << "ConstantInt* " << constName
+          << " = ConstantInt::get(getGlobalContext(), APInt("
+          << cast<IntegerType>(CI->getType())->getBitWidth()
+          << ", StringRef(\"" <<  constValue << "\"), 10));";
     } else if (isa<ConstantAggregateZero>(CV)) {
       Out << "ConstantAggregateZero* " << constName
           << " = ConstantAggregateZero::get(" << typeName << ");";
@@ -767,8 +767,11 @@ namespace {
       printCFP(CFP);
       Out << ";";
     } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
-      if (CA->isString() && CA->getType()->getElementType() == Type::Int8Ty) {
-        Out << "Constant* " << constName << " = ConstantArray::get(\"";
+      if (CA->isString() &&
+          CA->getType()->getElementType() ==
+              Type::getInt8Ty(CA->getContext())) {
+        Out << "Constant* " << constName <<
+               " = ConstantArray::get(getGlobalContext(), \"";
         std::string tmp = CA->getAsString();
         bool nullTerminate = false;
         if (tmp[tmp.length()-1] == 0) {
@@ -839,12 +842,12 @@ namespace {
             << getCppName(CE->getOperand(0)) << ", "
             << "&" << constName << "_indices[0], "
             << constName << "_indices.size()"
-            << " );";
+            << ");";
       } else if (CE->isCast()) {
         printConstant(CE->getOperand(0));
         Out << "Constant* " << constName << " = ConstantExpr::getCast(";
         switch (CE->getOpcode()) {
-        default: assert(0 && "Invalid cast opcode");
+        default: llvm_unreachable("Invalid cast opcode");
         case Instruction::Trunc: Out << "Instruction::Trunc"; break;
         case Instruction::ZExt:  Out << "Instruction::ZExt"; break;
         case Instruction::SExt:  Out << "Instruction::SExt"; break;
@@ -995,13 +998,13 @@ namespace {
   void CppWriter::printVariableHead(const GlobalVariable *GV) {
     nl(Out) << "GlobalVariable* " << getCppName(GV);
     if (is_inline) {
-      Out << " = mod->getGlobalVariable(";
+      Out << " = mod->getGlobalVariable(getGlobalContext(), ";
       printEscapedString(GV->getName());
       Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
       nl(Out) << "if (!" << getCppName(GV) << ") {";
       in(); nl(Out) << getCppName(GV);
     }
-    Out << " = new GlobalVariable(";
+    Out << " = new GlobalVariable(/*Module=*/*mod, ";
     nl(Out) << "/*Type=*/";
     printCppName(GV->getType()->getElementType());
     Out << ",";
@@ -1016,8 +1019,7 @@ namespace {
     }
     nl(Out) << "/*Name=*/\"";
     printEscapedString(GV->getName());
-    Out << "\",";
-    nl(Out) << "mod);";
+    Out << "\");";
     nl(Out);
 
     if (GV->hasSection()) {
@@ -1095,7 +1097,7 @@ namespace {
 
     case Instruction::Ret: {
       const ReturnInst* ret =  cast<ReturnInst>(I);
-      Out << "ReturnInst::Create("
+      Out << "ReturnInst::Create(getGlobalContext(), "
           << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
       break;
     }
@@ -1159,8 +1161,9 @@ namespace {
           << bbname << ");";
       break;
     }
-    case Instruction::Unreachable:{
+    case Instruction::Unreachable: {
       Out << "new UnreachableInst("
+          << "getGlobalContext(), "
           << bbname << ");";
       break;
     }
@@ -1210,7 +1213,7 @@ namespace {
       break;
     }
     case Instruction::FCmp: {
-      Out << "FCmpInst* " << iName << " = new FCmpInst(";
+      Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
       switch (cast<FCmpInst>(I)->getPredicate()) {
       case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
       case FCmpInst::FCMP_OEQ  : Out << "FCmpInst::FCMP_OEQ"; break;
@@ -1232,11 +1235,11 @@ namespace {
       }
       Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
       printEscapedString(I->getName());
-      Out << "\", " << bbname << ");";
+      Out << "\");";
       break;
     }
     case Instruction::ICmp: {
-      Out << "ICmpInst* " << iName << " = new ICmpInst(";
+      Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
       switch (cast<ICmpInst>(I)->getPredicate()) {
       case ICmpInst::ICMP_EQ:  Out << "ICmpInst::ICMP_EQ";  break;
       case ICmpInst::ICMP_NE:  Out << "ICmpInst::ICMP_NE";  break;
@@ -1252,7 +1255,7 @@ namespace {
       }
       Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
       printEscapedString(I->getName());
-      Out << "\", " << bbname << ");";
+      Out << "\");";
       break;
     }
     case Instruction::Malloc: {
@@ -1680,7 +1683,8 @@ namespace {
     for (Function::const_iterator BI = F->begin(), BE = F->end();
          BI != BE; ++BI) {
       std::string bbname(getCppName(BI));
-      Out << "BasicBlock* " << bbname << " = BasicBlock::Create(\"";
+      Out << "BasicBlock* " << bbname <<
+             " = BasicBlock::Create(getGlobalContext(), \"";
       if (BI->hasName())
         printEscapedString(BI->getName());
       Out << "\"," << getCppName(BI->getParent()) << ",0);";
@@ -1799,6 +1803,7 @@ namespace {
 
   void CppWriter::printProgram(const std::string& fname,
                                const std::string& mName) {
+    Out << "#include <llvm/LLVMContext.h>\n";
     Out << "#include <llvm/Module.h>\n";
     Out << "#include <llvm/DerivedTypes.h>\n";
     Out << "#include <llvm/Constants.h>\n";
@@ -1808,8 +1813,8 @@ namespace {
     Out << "#include <llvm/BasicBlock.h>\n";
     Out << "#include <llvm/Instructions.h>\n";
     Out << "#include <llvm/InlineAsm.h>\n";
+    Out << "#include <llvm/Support/FormattedStream.h>\n";
     Out << "#include <llvm/Support/MathExtras.h>\n";
-    Out << "#include <llvm/Support/raw_ostream.h>\n";
     Out << "#include <llvm/Pass.h>\n";
     Out << "#include <llvm/PassManager.h>\n";
     Out << "#include <llvm/ADT/SmallVector.h>\n";
@@ -1821,7 +1826,6 @@ namespace {
     Out << "int main(int argc, char**argv) {\n";
     Out << "  Module* Mod = " << fname << "();\n";
     Out << "  verifyModule(*Mod, PrintMessageAction);\n";
-    Out << "  outs().flush();\n";
     Out << "  PassManager PM;\n";
     Out << "  PM.add(createPrintModulePass(&outs()));\n";
     Out << "  PM.run(*Mod);\n";
@@ -1836,7 +1840,7 @@ namespace {
     nl(Out,1) << "// Module Construction";
     nl(Out) << "Module* mod = new Module(\"";
     printEscapedString(mName);
-    Out << "\");";
+    Out << "\", getGlobalContext());";
     if (!TheModule->getTargetTriple().empty()) {
       nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
     }
@@ -2014,7 +2018,7 @@ char CppWriter::ID = 0;
 //===----------------------------------------------------------------------===//
 
 bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
-                                                raw_ostream &o,
+                                                formatted_raw_ostream &o,
                                                 CodeGenFileType FileType,
                                                 CodeGenOpt::Level OptLevel) {
   if (FileType != TargetMachine::AssemblyFile) return true;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index db4bc0e722c8..1f74f76b5ac1 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -19,25 +19,24 @@
 
 namespace llvm {
 
-class raw_ostream;
+class formatted_raw_ostream;
 
 struct CPPTargetMachine : public TargetMachine {
-  const TargetData DataLayout;       // Calculates type size & alignment
-
-  CPPTargetMachine(const Module &M, const std::string &FS)
-    : DataLayout(&M) {}
+  CPPTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS)
+    : TargetMachine(T) {}
 
   virtual bool WantsWholeFile() const { return true; }
-  virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+  virtual bool addPassesToEmitWholeFile(PassManager &PM,
+                                        formatted_raw_ostream &Out,
                                         CodeGenFileType FileType,
                                         CodeGenOpt::Level OptLevel);
 
-  // This class always works, but shouldn't be the default in most cases.
-  static unsigned getModuleMatchQuality(const Module &M) { return 1; }
-
-  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const TargetData *getTargetData() const { return 0; }
 };
 
+extern Target TheCppBackendTarget;
+
 } // End llvm namespace
 
 
diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile
index ca7e1a82c808..dc9cf48c8b1e 100644
--- a/lib/Target/CppBackend/Makefile
+++ b/lib/Target/CppBackend/Makefile
@@ -9,6 +9,9 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMCppBackend
+
+DIRS = TargetInfo
+
 include $(LEVEL)/Makefile.common
 
 CompileCommonOpts += -Wno-format
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..edaf5d3cb188
--- /dev/null
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMCppBackendInfo
+  CppBackendTargetInfo.cpp
+  )
+
diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
new file mode 100644
index 000000000000..d0aeb12499c5
--- /dev/null
+++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCppBackendTarget;
+
+static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
+  // This class always works, but shouldn't be the default in most cases.
+  return 1;
+}
+
+extern "C" void LLVMInitializeCppBackendTargetInfo() { 
+  TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",    
+                                  "C++ backend",
+                                  &CppBackend_TripleMatchQuality);
+}
diff --git a/lib/Target/CppBackend/TargetInfo/Makefile b/lib/Target/CppBackend/TargetInfo/Makefile
new file mode 100644
index 000000000000..6e682838daec
--- /dev/null
+++ b/lib/Target/CppBackend/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/CppBackend/TargetInfo/Makefile -----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCppBackendInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index ee73c381cd4a..26d637b4347b 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -19,44 +19,35 @@
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Analysis/ConstantsScanner.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/Passes.h"
+using namespace llvm;
 
-namespace {
+namespace llvm {
   // TargetMachine for the MSIL 
   struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine {
-    const TargetData DataLayout;       // Calculates type size & alignment
-
-    MSILTarget(const Module &M, const std::string &FS)
-      : DataLayout(&M) {}
+    MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
+      : TargetMachine(T) {}
 
     virtual bool WantsWholeFile() const { return true; }
-    virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+    virtual bool addPassesToEmitWholeFile(PassManager &PM,
+                                          formatted_raw_ostream &Out,
                                           CodeGenFileType FileType,
                                           CodeGenOpt::Level OptLevel);
 
-    // This class always works, but shouldn't be the default in most cases.
-    static unsigned getModuleMatchQuality(const Module &M) { return 1; }
-
-    virtual const TargetData *getTargetData() const { return &DataLayout; }
+    virtual const TargetData *getTargetData() const { return 0; }
   };
 }
 
-/// MSILTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int MSILTargetMachineModule;
-int MSILTargetMachineModule = 0;
-
-static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
-
-// Force static initialization.
-extern "C" void LLVMInitializeMSILTarget() { }
+extern "C" void LLVMInitializeMSILTarget() {
+  // Register the target.
+  RegisterTargetMachine<MSILTarget> X(TheMSILTarget);
+}
 
 bool MSILModule::runOnModule(Module &M) {
   ModulePtr = &M;
@@ -239,8 +230,17 @@ bool MSILWriter::isZeroValue(const Value* V) {
 
 
 std::string MSILWriter::getValueName(const Value* V) {
+  std::string Name;
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    Name = Mang->getMangledName(GV);
+  else {
+    unsigned &No = AnonValueNumbers[V];
+    if (No == 0) No = ++NextAnonValueNumber;
+    Name = "tmp" + utostr(No);
+  }
+  
   // Name into the quotes allow control and space characters.
-  return "'"+Mang->getValueName(V)+"'";
+  return "'"+Name+"'";
 }
 
 
@@ -257,11 +257,20 @@ std::string MSILWriter::getLabelName(const std::string& Name) {
 
 
 std::string MSILWriter::getLabelName(const Value* V) {
-  return getLabelName(Mang->getValueName(V));
+  std::string Name;
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    Name = Mang->getMangledName(GV);
+  else {
+    unsigned &No = AnonValueNumbers[V];
+    if (No == 0) No = ++NextAnonValueNumber;
+    Name = "tmp" + utostr(No);
+  }
+  
+  return getLabelName(Name);
 }
 
 
-std::string MSILWriter::getConvModopt(unsigned CallingConvID) {
+std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
   switch (CallingConvID) {
   case CallingConv::C:
   case CallingConv::Cold:
@@ -272,8 +281,8 @@ std::string MSILWriter::getConvModopt(unsigned CallingConvID) {
   case CallingConv::X86_StdCall:
     return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
   default:
-    cerr << "CallingConvID = " << CallingConvID << '\n';
-    assert(0 && "Unsupported calling convention");
+    errs() << "CallingConvID = " << CallingConvID << '\n';
+    llvm_unreachable("Unsupported calling convention");
   }
   return ""; // Not reached
 }
@@ -318,8 +327,8 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
   case Type::DoubleTyID:
     return "float64 "; 
   default:
-    cerr << "Type = " << *Ty << '\n';
-    assert(0 && "Invalid primitive type");
+    errs() << "Type = " << *Ty << '\n';
+    llvm_unreachable("Invalid primitive type");
   }
   return ""; // Not reached
 }
@@ -346,8 +355,8 @@ std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
       return getArrayTypeName(Ty->getTypeID(),Ty);
     return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
   default:
-    cerr << "Type = " << *Ty << '\n';
-    assert(0 && "Invalid type in getTypeName()");
+    errs() << "Type = " << *Ty << '\n';
+    llvm_unreachable("Invalid type in getTypeName()");
   }
   return ""; // Not reached
 }
@@ -390,8 +399,8 @@ std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
   case Type::PointerTyID:
     return "i"+utostr(TD->getTypeAllocSize(Ty));
   default:
-    cerr << "TypeID = " << Ty->getTypeID() << '\n';
-    assert(0 && "Invalid type in TypeToPostfix()");
+    errs() << "TypeID = " << Ty->getTypeID() << '\n';
+    llvm_unreachable("Invalid type in TypeToPostfix()");
   }
   return ""; // Not reached
 }
@@ -406,7 +415,7 @@ void MSILWriter::printConvToPtr() {
     printSimpleInstruction("conv.u8");
     break;
   default:
-    assert(0 && "Module use not supporting pointer size");
+    llvm_unreachable("Module use not supporting pointer size");
   }
 }
 
@@ -417,15 +426,15 @@ void MSILWriter::printPtrLoad(uint64_t N) {
     printSimpleInstruction("ldc.i4",utostr(N).c_str());
     // FIXME: Need overflow test?
     if (!isUInt32(N)) {
-      cerr << "Value = " << utostr(N) << '\n';
-      assert(0 && "32-bit pointer overflowed");
+      errs() << "Value = " << utostr(N) << '\n';
+      llvm_unreachable("32-bit pointer overflowed");
     }
     break;
   case Module::Pointer64:
     printSimpleInstruction("ldc.i8",utostr(N).c_str());
     break;
   default:
-    assert(0 && "Module use not supporting pointer size");
+    llvm_unreachable("Module use not supporting pointer size");
   }
 }
 
@@ -460,8 +469,8 @@ void MSILWriter::printConstLoad(const Constant* C) {
     // Undefined constant value = NULL.
     printPtrLoad(0);
   } else {
-    cerr << "Constant = " << *C << '\n';
-    assert(0 && "Invalid constant value");
+    errs() << "Constant = " << *C << '\n';
+    llvm_unreachable("Invalid constant value");
   }
   Out << '\n';
 }
@@ -509,8 +518,8 @@ void MSILWriter::printValueLoad(const Value* V) {
     printConstantExpr(cast<ConstantExpr>(V));
     break;
   default:
-    cerr << "Value = " << *V << '\n';
-    assert(0 && "Invalid value location");
+    errs() << "Value = " << *V << '\n';
+    llvm_unreachable("Invalid value location");
   }
 }
 
@@ -524,8 +533,8 @@ void MSILWriter::printValueSave(const Value* V) {
     printSimpleInstruction("stloc",getValueName(V).c_str());
     break;
   default:
-    cerr << "Value  = " << *V << '\n';
-    assert(0 && "Invalid value location");
+    errs() << "Value  = " << *V << '\n';
+    llvm_unreachable("Invalid value location");
   }
 }
 
@@ -651,12 +660,19 @@ void MSILWriter::printIndirectSave(const Type* Ty) {
 
 
 void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
-                                      const Type* Ty) {
+                                      const Type* Ty, const Type* SrcTy) {
   std::string Tmp("");
   printValueLoad(V);
   switch (Op) {
   // Signed
   case Instruction::SExt:
+    // If sign extending int, convert first from unsigned to signed
+    // with the same bit size - because otherwise we will loose the sign.
+    if (SrcTy) {
+      Tmp = "conv."+getTypePostfix(SrcTy,false,true);
+      printSimpleInstruction(Tmp.c_str());
+    }
+    // FALLTHROUGH
   case Instruction::SIToFP:
   case Instruction::FPToSI:
     Tmp = "conv."+getTypePostfix(Ty,false,true);
@@ -679,8 +695,8 @@ void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
     // FIXME: meaning that ld*/st* instruction do not change data format.
     break;
   default:
-    cerr << "Opcode = " << Op << '\n';
-    assert(0 && "Invalid conversion instruction");
+    errs() << "Opcode = " << Op << '\n';
+    llvm_unreachable("Invalid conversion instruction");
   }
 }
 
@@ -770,8 +786,8 @@ void MSILWriter::printFunctionCall(const Value* FnVal,
   else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
     Name = getConvModopt(Invoke->getCallingConv());
   else {
-    cerr << "Instruction = " << Inst->getName() << '\n';
-    assert(0 && "Need \"Invoke\" or \"Call\" instruction only");
+    errs() << "Instruction = " << Inst->getName() << '\n';
+    llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only");
   }
   if (const Function* F = dyn_cast<Function>(FnVal)) {
     // Direct call.
@@ -804,7 +820,8 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
     // Save as pointer type "void*"
     printValueLoad(Inst->getOperand(1));
     printSimpleInstruction("ldloca",Name.c_str());
-    printIndirectSave(PointerType::getUnqual(IntegerType::get(8)));
+    printIndirectSave(PointerType::getUnqual(
+          IntegerType::get(Inst->getContext(), 8)));
     break;
   case Intrinsic::vaend:
     // Close argument list handle.
@@ -818,8 +835,8 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
     printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
     break;        
   default:
-    cerr << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
-    assert(0 && "Invalid intrinsic function");
+    errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
+    llvm_unreachable("Invalid intrinsic function");
   }
 }
 
@@ -877,12 +894,13 @@ void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
     break;
   case ICmpInst::ICMP_UGT:
     printBinaryInstruction("cgt.un",Left,Right);
+    break;
   case ICmpInst::ICMP_SGT:
     printBinaryInstruction("cgt",Left,Right);
     break;
   default:
-    cerr << "Predicate = " << Predicate << '\n';
-    assert(0 && "Invalid icmp predicate");
+    errs() << "Predicate = " << Predicate << '\n';
+    llvm_unreachable("Invalid icmp predicate");
   }
 }
 
@@ -976,7 +994,7 @@ void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
     printSimpleInstruction("or");
     break;
   default:
-    assert(0 && "Illegal FCmp predicate");
+    llvm_unreachable("Illegal FCmp predicate");
   }
 }
 
@@ -1024,7 +1042,8 @@ void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
     "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
   printSimpleInstruction("refanyval","void*");
   std::string Name = 
-    "ldind."+getTypePostfix(PointerType::getUnqual(IntegerType::get(8)),false);
+    "ldind."+getTypePostfix(PointerType::getUnqual(
+            IntegerType::get(Inst->getContext(), 8)),false);
   printSimpleInstruction(Name.c_str());
 }
 
@@ -1132,9 +1151,13 @@ void MSILWriter::printInstruction(const Instruction* Inst) {
   case Instruction::Store:
     printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
     break;
+  case Instruction::SExt:
+    printCastInstruction(Inst->getOpcode(),Left,
+                         cast<CastInst>(Inst)->getDestTy(),
+                         cast<CastInst>(Inst)->getSrcTy());
+    break;
   case Instruction::Trunc:
   case Instruction::ZExt:
-  case Instruction::SExt:
   case Instruction::FPTrunc:
   case Instruction::FPExt:
   case Instruction::UIToFP:
@@ -1169,10 +1192,10 @@ void MSILWriter::printInstruction(const Instruction* Inst) {
     printAllocaInstruction(cast<AllocaInst>(Inst));
     break;
   case Instruction::Malloc:
-    assert(0 && "LowerAllocationsPass used");
+    llvm_unreachable("LowerAllocationsPass used");
     break;
   case Instruction::Free:
-    assert(0 && "LowerAllocationsPass used");
+    llvm_unreachable("LowerAllocationsPass used");
     break;
   case Instruction::Unreachable:
     printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
@@ -1184,8 +1207,8 @@ void MSILWriter::printInstruction(const Instruction* Inst) {
     printVAArgInstruction(cast<VAArgInst>(Inst));
     break;
   default:
-    cerr << "Instruction = " << Inst->getName() << '\n';
-    assert(0 && "Unsupported instruction");
+    errs() << "Instruction = " << Inst->getName() << '\n';
+    llvm_unreachable("Unsupported instruction");
   }
 }
 
@@ -1216,7 +1239,7 @@ void MSILWriter::printBasicBlock(const BasicBlock* BB) {
     // Print instruction
     printInstruction(Inst);
     // Save result
-    if (Inst->getType()!=Type::VoidTy) {
+    if (Inst->getType()!=Type::getVoidTy(BB->getContext())) {
       // Do not save value after invoke, it done in "try" block
       if (Inst->getOpcode()==Instruction::Invoke) continue;
       printValueSave(Inst);
@@ -1245,7 +1268,7 @@ void MSILWriter::printLocalVariables(const Function& F) {
       Ty = PointerType::getUnqual(AI->getAllocatedType());
       Name = getValueName(AI);
       Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
-    } else if (I->getType()!=Type::VoidTy) {
+    } else if (I->getType()!=Type::getVoidTy(F.getContext())) {
       // Operation result.
       Ty = I->getType();
       Name = getValueName(&*I);
@@ -1372,8 +1395,8 @@ void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
     printBinaryInstruction("shr",left,right);
     break;
   default:
-    cerr << "Expression = " << *CE << "\n";
-    assert(0 && "Invalid constant expression");
+    errs() << "Expression = " << *CE << "\n";
+    llvm_unreachable("Invalid constant expression");
   }
 }
 
@@ -1406,8 +1429,8 @@ void MSILWriter::printStaticInitializerList() {
         postfix = "stind."+postfix;
         printSimpleInstruction(postfix.c_str());
       } else {
-        cerr << "Constant = " << *I->constant << '\n';
-        assert(0 && "Invalid static initializer");
+        errs() << "Constant = " << *I->constant << '\n';
+        llvm_unreachable("Invalid static initializer");
       }
     }
   }
@@ -1470,8 +1493,8 @@ unsigned int MSILWriter::getBitWidth(const Type* Ty) {
   case 64:
     return N;
   default:
-    cerr << "Bits = " << N << '\n';
-    assert(0 && "Unsupported integer width");
+    errs() << "Bits = " << N << '\n';
+    llvm_unreachable("Unsupported integer width");
   }
   return 0; // Not reached
 }
@@ -1528,12 +1551,12 @@ void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
       // Null pointer initialization
       if (TySize==4) Out << "int32 (0)";
       else if (TySize==8) Out << "int64 (0)";
-      else assert(0 && "Invalid pointer size");
+      else llvm_unreachable("Invalid pointer size");
     }
     break;
   default:
-    cerr << "TypeID = " << Ty->getTypeID() << '\n';
-    assert(0 && "Invalid type in printStaticConstant()");
+    errs() << "TypeID = " << Ty->getTypeID() << '\n';
+    llvm_unreachable("Invalid type in printStaticConstant()");
   }
   // Increase offset.
   Offset += TySize;
@@ -1555,8 +1578,8 @@ void MSILWriter::printStaticInitializer(const Constant* C,
     Out << getTypeName(C->getType());
     break;
   default:
-    cerr << "Type = " << *C << "\n";
-    assert(0 && "Invalid constant type");
+    errs() << "Type = " << *C << "\n";
+    llvm_unreachable("Invalid constant type");
   }
   // Print initializer
   std::string label = Name;
@@ -1595,17 +1618,18 @@ void MSILWriter::printGlobalVariables() {
 
 
 const char* MSILWriter::getLibraryName(const Function* F) {
-  return getLibraryForSymbol(F->getName().c_str(), true, F->getCallingConv());
+  return getLibraryForSymbol(F->getName(), true, F->getCallingConv());
 }
 
 
 const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
-  return getLibraryForSymbol(Mang->getValueName(GV).c_str(), false, 0);
+  return getLibraryForSymbol(Mang->getMangledName(GV), false, CallingConv::C);
 }
 
 
-const char* MSILWriter::getLibraryForSymbol(const char* Name, bool isFunction,
-                                           unsigned CallingConv) {
+const char* MSILWriter::getLibraryForSymbol(const StringRef &Name, 
+                                            bool isFunction,
+                                            CallingConv::ID CallingConv) {
   // TODO: Read *.def file with function and libraries definitions.
   return "MSVCRT.DLL";  
 }
@@ -1654,11 +1678,10 @@ void MSILWriter::printExternals() {
        E = ModulePtr->global_end(); I!=E; ++I) {
     if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
     // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
-    std::string Label = "not_null$_"+utostr(getUniqID());
     std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
     printSimpleInstruction("ldsflda",Tmp.c_str());
     Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
-    Out << "\tldstr\t\"" << Mang->getValueName(&*I) << "\"\n";
+    Out << "\tldstr\t\"" << Mang->getMangledName(&*I) << "\"\n";
     printSimpleInstruction("call","void* $MSIL_Import(string,string)");
     printIndirectSave(I->getType());
   }
@@ -1671,7 +1694,8 @@ void MSILWriter::printExternals() {
 //                      External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, raw_ostream &o,
+bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
+                                          formatted_raw_ostream &o,
                                           CodeGenFileType FileType,
                                           CodeGenOpt::Level OptLevel)
 {
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
index 45f5579bfb34..2280a3bed915 100644
--- a/lib/Target/MSIL/MSILWriter.h
+++ b/lib/Target/MSIL/MSILWriter.h
@@ -13,24 +13,24 @@
 #ifndef MSILWRITER_H
 #define MSILWRITER_H
 
+#include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/FindUsedTypes.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachineRegistry.h"
 #include "llvm/Support/Mangler.h"
-#include <ios>
-using namespace llvm;
 
-namespace {
+namespace llvm {
+  extern Target TheMSILTarget;
 
   class MSILModule : public ModulePass {
     Module *ModulePtr;
@@ -56,7 +56,7 @@ namespace {
 
   };
 
-  class MSILWriter  : public FunctionPass {
+  class MSILWriter : public FunctionPass {
     struct StaticInitializer {
       const Constant* constant;
       uint64_t offset;
@@ -75,7 +75,7 @@ namespace {
     }
 
   public:
-    raw_ostream &Out;
+    formatted_raw_ostream &Out;
     Module* ModulePtr;
     const TargetData* TD;
     Mangler* Mang;
@@ -85,7 +85,11 @@ namespace {
       StaticInitList;
     const std::set<const Type *>* UsedTypes;
     static char ID;
-    MSILWriter(raw_ostream &o) : FunctionPass(&ID), Out(o) {
+    DenseMap<const Value*, unsigned> AnonValueNumbers;
+    unsigned NextAnonValueNumber;
+
+    MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o),
+         NextAnonValueNumber(0) {
       UniqID = 0;
     }
 
@@ -130,7 +134,7 @@ namespace {
 
     std::string getLabelName(const std::string& Name);
 
-    std::string getConvModopt(unsigned CallingConvID);
+    std::string getConvModopt(CallingConv::ID CallingConvID);
 
     std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
 
@@ -183,7 +187,7 @@ namespace {
     void printIndirectSave(const Type* Ty);
 
     void printCastInstruction(unsigned int Op, const Value* V,
-                              const Type* Ty);
+                              const Type* Ty, const Type* SrcTy=0);
 
     void printGepInstruction(const Value* V, gep_type_iterator I,
                              gep_type_iterator E);
@@ -244,11 +248,12 @@ namespace {
 
     const char* getLibraryName(const GlobalVariable* GV); 
     
-    const char* getLibraryForSymbol(const char* Name, bool isFunction,
-                                    unsigned CallingConv);
+    const char* getLibraryForSymbol(const StringRef &Name, bool isFunction,
+                                    CallingConv::ID CallingConv);
 
     void printExternals();
   };
+
 }
 
 #endif
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
index 94265edf98c5..8057cc748039 100644
--- a/lib/Target/MSIL/Makefile
+++ b/lib/Target/MSIL/Makefile
@@ -9,6 +9,9 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMMSIL
+
+DIRS = TargetInfo
+
 include $(LEVEL)/Makefile.common
 
 CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..9f0c3a09341a
--- /dev/null
+++ b/lib/Target/MSIL/TargetInfo/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSILInfo
+  MSILTargetInfo.cpp
+  )
+
diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
new file mode 100644
index 000000000000..dfd42814e51c
--- /dev/null
+++ b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSILWriter.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSILTarget;
+
+static unsigned MSIL_TripleMatchQuality(const std::string &TT) {
+  // This class always works, but shouldn't be the default in most cases.
+  return 1;
+}
+
+extern "C" void LLVMInitializeMSILTargetInfo() { 
+  TargetRegistry::RegisterTarget(TheMSILTarget, "msil",    
+                                  "MSIL backend",
+                                  &MSIL_TripleMatchQuality);
+}
diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile
new file mode 100644
index 000000000000..30b0950db0f7
--- /dev/null
+++ b/lib/Target/MSIL/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSILInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..6e6688746463
--- /dev/null
+++ b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430AsmPrinter
+  MSP430AsmPrinter.cpp
+  )
+add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
new file mode 100644
index 000000000000..852019febf5e
--- /dev/null
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -0,0 +1,281 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCAsmInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+  class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter {
+  public:
+    MSP430AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                     const MCAsmInfo *MAI, bool V)
+      : AsmPrinter(O, TM, MAI, V) {}
+
+    virtual const char *getPassName() const {
+      return "MSP430 Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum,
+                      const char* Modifier = 0);
+    void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                            const char* Modifier = 0);
+    void printCCOperand(const MachineInstr *MI, int OpNum);
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printMachineInstruction(const MachineInstr * MI);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant,
+                         const char *ExtraCode);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode);
+
+    void emitFunctionHeader(const MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &F);
+
+    virtual void PrintGlobalVariable(const GlobalVariable *GV) {
+      // FIXME: No support for global variables?
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AsmPrinter::getAnalysisUsage(AU);
+      AU.setPreservesAll();
+    }
+  };
+} // end of anonymous namespace
+
+#include "MSP430GenAsmWriter.inc"
+
+
+void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+
+  unsigned FnAlign = MF.getAlignment();
+  EmitAlignment(FnAlign, F);
+
+  switch (F->getLinkage()) {
+  default: llvm_unreachable("Unknown linkage type!");
+  case Function::InternalLinkage:  // Symbols default to internal.
+  case Function::PrivateLinkage:
+  case Function::LinkerPrivateLinkage:
+    break;
+  case Function::ExternalLinkage:
+    O << "\t.globl\t" << CurrentFnName << '\n';
+    break;
+  case Function::LinkOnceAnyLinkage:
+  case Function::LinkOnceODRLinkage:
+  case Function::WeakAnyLinkage:
+  case Function::WeakODRLinkage:
+    O << "\t.weak\t" << CurrentFnName << '\n';
+    break;
+  }
+
+  printVisibility(CurrentFnName, F->getVisibility());
+
+  O << "\t.type\t" << CurrentFnName << ",@function\n"
+    << CurrentFnName << ":\n";
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+  O << "\n\n";
+
+  // Print the 'header' of function
+  emitFunctionHeader(MF);
+
+  // Print out code for the function.
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II)
+      // Print the assembly for the instruction.
+      printMachineInstruction(II);
+  }
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+  // We didn't modify anything
+  return false;
+}
+
+void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  ++EmittedInsts;
+
+  processDebugLoc(MI, true);
+
+  // Call the autogenerated instruction printer routines.
+  printInstruction(MI);
+
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
+
+  processDebugLoc(MI, false);
+}
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                    const char* Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    return;
+  case MachineOperand::MO_Immediate:
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << '#';
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    std::string Name = Mang->getMangledName(MO.getGlobal());
+    uint64_t Offset = MO.getOffset();
+
+    O << (isMemOp ? '&' : '#');
+    if (Offset)
+      O << '(' << Offset << '+';
+
+    O << Name;
+    if (Offset)
+      O << ')';
+
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+
+    O << (isMemOp ? '&' : '#') << Name;
+
+    return;
+  }
+  default:
+    llvm_unreachable("Not implemented yet!");
+  }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                                          const char* Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+  if (Base.isGlobal())
+    printOperand(MI, OpNum, "mem");
+  else if (Disp.isImm() && !Base.getReg())
+    printOperand(MI, OpNum);
+  else if (Base.getReg()) {
+    if (Disp.getImm()) {
+      printOperand(MI, OpNum + 1, "nohash");
+      O << '(';
+      printOperand(MI, OpNum);
+      O << ')';
+    } else {
+      O << '@';
+      printOperand(MI, OpNum);
+    }
+  } else
+    llvm_unreachable("Unsupported memory operand");
+}
+
+void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) {
+  unsigned CC = MI->getOperand(OpNum).getImm();
+
+  switch (CC) {
+  default:
+   llvm_unreachable("Unsupported CC code");
+   break;
+  case MSP430::COND_E:
+   O << "eq";
+   break;
+  case MSP430::COND_NE:
+   O << "ne";
+   break;
+  case MSP430::COND_HS:
+   O << "hs";
+   break;
+  case MSP430::COND_LO:
+   O << "lo";
+   break;
+  case MSP430::COND_GE:
+   O << "ge";
+   break;
+  case MSP430::COND_L:
+   O << 'l';
+   break;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo);
+  return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                             unsigned OpNo, unsigned AsmVariant,
+                                             const char *ExtraCode) {
+  if (ExtraCode && ExtraCode[0]) {
+    return true; // Unknown modifier.
+  }
+  printSrcMemOperand(MI, OpNo);
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+  RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+}
diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile
new file mode 100644
index 000000000000..4f340c673358
--- /dev/null
+++ b/lib/Target/MSP430/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430AsmPrinter
+
+# Hack: we need to include 'main' MSP430 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 67017733cd9c..60e0bb1856c3 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -10,14 +10,14 @@ tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
 tablegen(MSP430GenCallingConv.inc -gen-callingconv)
 tablegen(MSP430GenSubtarget.inc -gen-subtarget)
 
-add_llvm_target(MSP430
-  MSP430AsmPrinter.cpp
-  MSP430FrameInfo.cpp
+add_llvm_target(MSP430CodeGen
   MSP430InstrInfo.cpp
   MSP430ISelDAGToDAG.cpp
   MSP430ISelLowering.cpp
+  MSP430MCAsmInfo.cpp
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
-  MSP430TargetAsmInfo.cpp
   MSP430TargetMachine.cpp
   )
+
+target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index fc13c9e875f1..d9f5f8629541 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -20,13 +20,13 @@
 namespace llvm {
   class MSP430TargetMachine;
   class FunctionPass;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
                                     CodeGenOpt::Level OptLevel);
-  FunctionPass *createMSP430CodePrinterPass(raw_ostream &o,
-                                            MSP430TargetMachine &tm,
-                                            bool verbose);
+
+  extern Target TheMSP430Target;
+
 } // end namespace llvm;
 
 // Defines symbolic names for MSP430 registers.
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index bf49ec0bff46..4195a88f8de0 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -28,8 +28,14 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+
 using namespace llvm;
 
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
 /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
 /// instructions for SelectionDAG operations.
 ///
@@ -50,10 +56,15 @@ namespace {
       return "MSP430 DAG->DAG Pattern Instruction Selection";
     }
 
+    virtual bool
+    SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                                 std::vector<SDValue> &OutOps);
+
     // Include the pieces autogenerated from the target description.
   #include "MSP430GenDAGISel.inc"
 
   private:
+    void PreprocessForRMW();
     SDNode *Select(SDValue Op);
     bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp);
 
@@ -120,21 +131,155 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr,
 }
 
 
+bool MSP430DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+  switch (ConstraintCode) {
+  default: return true;
+  case 'm':   // memory
+    if (!SelectAddr(Op, Op, Op0, Op1))
+      return true;
+    break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
+
+/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
+/// and move load below the TokenFactor. Replace store's chain operand with
+/// load's chain result.
+/// Shamelessly stolen from X86.
+static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
+                                 SDValue Store, SDValue TF) {
+  SmallVector<SDValue, 4> Ops;
+  bool isRMW = false;
+  SDValue TF0, TF1, NewTF;
+  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
+    if (Load.getNode() == TF.getOperand(i).getNode()) {
+      TF0 = Load.getOperand(0);
+      Ops.push_back(TF0);
+    } else {
+      TF1 = TF.getOperand(i);
+      Ops.push_back(TF1);
+      if (LoadSDNode* LD = dyn_cast<LoadSDNode>(TF1))
+        isRMW = !LD->isVolatile();
+    }
+
+  if (isRMW && TF1.getOperand(0).getNode() == TF0.getNode())
+    NewTF = TF0;
+  else
+    NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
+
+  SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
+                                               Load.getOperand(1),
+                                               Load.getOperand(2));
+  CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
+                             Store.getOperand(2), Store.getOperand(3));
+}
+
+/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The chain
+/// produced by the load must only be used by the store's chain operand,
+/// otherwise this may produce a cycle in the DAG.
+/// Shamelessly stolen from X86. FIXME: Should we make this function common?
+static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
+                      SDValue &Load) {
+  if (N.getOpcode() == ISD::BIT_CONVERT)
+    N = N.getOperand(0);
+
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+  if (!LD || LD->isVolatile())
+    return false;
+  if (LD->getAddressingMode() != ISD::UNINDEXED)
+    return false;
+
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
+    return false;
+
+  if (N.hasOneUse() &&
+      LD->hasNUsesOfValue(1, 1) &&
+      N.getOperand(1) == Address &&
+      LD->isOperandOf(Chain.getNode())) {
+    Load = N;
+    return true;
+  }
+  return false;
+}
+
+/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
+/// Shamelessly stolen from X86.
+void MSP430DAGToDAGISel::PreprocessForRMW() {
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+         E = CurDAG->allnodes_end(); I != E; ++I) {
+    if (!ISD::isNON_TRUNCStore(I))
+      continue;
+
+    SDValue Chain = I->getOperand(0);
+    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
+      continue;
+
+    SDValue N1 = I->getOperand(1); // Value to store
+    SDValue N2 = I->getOperand(2); // Address of store
+
+    if (!N1.hasOneUse())
+      continue;
+
+    bool RModW = false;
+    SDValue Load;
+    unsigned Opcode = N1.getNode()->getOpcode();
+    switch (Opcode) {
+      case ISD::ADD:
+      case ISD::AND:
+      case ISD::OR:
+      case ISD::XOR:
+      case ISD::ADDC:
+      case ISD::ADDE: {
+        SDValue N10 = N1.getOperand(0);
+        SDValue N11 = N1.getOperand(1);
+        RModW = isRMWLoad(N10, Chain, N2, Load);
+
+        if (!RModW && isRMWLoad(N11, Chain, N2, Load)) {
+          // Swap the operands, making the RMW load the first operand seems
+          // to help selection and prevent token chain loops.
+          N1 = CurDAG->UpdateNodeOperands(N1, N11, N10);
+          RModW = true;
+        }
+       break;
+      }
+      case ISD::SUB:
+      case ISD::SUBC:
+      case ISD::SUBE: {
+        SDValue N10 = N1.getOperand(0);
+        RModW = isRMWLoad(N10, Chain, N2, Load);
+        break;
+      }
+    }
+
+    if (RModW) {
+      MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
+      ++NumLoadMoved;
+    }
+  }
+}
 
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void MSP430DAGToDAGISel::InstructionSelect() {
+  PreprocessForRMW();
+
+  DEBUG(errs() << "Selection DAG after RMW preprocessing:\n");
+  DEBUG(CurDAG->dump());
+
   DEBUG(BB->dump());
 
   // Codegen the basic block.
-#ifndef NDEBUG
-  DOUT << "===== Instruction selection begins:\n";
-  Indent = 0;
-#endif
+  DEBUG(errs() << "===== Instruction selection begins:\n");
+  DEBUG(Indent = 0);
   SelectRoot(*CurDAG);
-#ifndef NDEBUG
-  DOUT << "===== Instruction selection ends:\n";
-#endif
+  DEBUG(errs() << "===== Instruction selection ends:\n");
 
   CurDAG->RemoveDeadNodes();
 }
@@ -144,21 +289,17 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
   DebugLoc dl = Op.getDebugLoc();
 
   // Dump information about the Node being selected
-  #ifndef NDEBUG
-  DOUT << std::string(Indent, ' ') << "Selecting: ";
+  DEBUG(errs().indent(Indent) << "Selecting: ");
   DEBUG(Node->dump(CurDAG));
-  DOUT << "\n";
-  Indent += 2;
-  #endif
+  DEBUG(errs() << "\n");
+  DEBUG(Indent += 2);
 
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
-    #ifndef NDEBUG
-    DOUT << std::string(Indent-2, ' ') << "== ";
-    DEBUG(Node->dump(CurDAG));
-    DOUT << "\n";
-    Indent -= 2;
-    #endif
+    DEBUG(errs().indent(Indent-2) << "== ";
+          Node->dump(CurDAG);
+          errs() << "\n");
+    DEBUG(Indent -= 2);
     return NULL;
   }
 
@@ -172,23 +313,21 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
     if (Node->hasOneUse())
       return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
                                   TFI, CurDAG->getTargetConstant(0, MVT::i16));
-    return CurDAG->getTargetNode(MSP430::ADD16ri, dl, MVT::i16,
-                                 TFI, CurDAG->getTargetConstant(0, MVT::i16));
+    return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
+                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
   }
   }
 
   // Select the default instruction
   SDNode *ResNode = SelectCode(Op);
 
-  #ifndef NDEBUG
-  DOUT << std::string(Indent-2, ' ') << "=> ";
+  DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
   if (ResNode == NULL || ResNode == Op.getNode())
     DEBUG(Op.getNode()->dump(CurDAG));
   else
     DEBUG(ResNode->dump(CurDAG));
-  DOUT << "\n";
-  Indent -= 2;
-  #endif
+  DEBUG(errs() << "\n");
+  DEBUG(Indent -= 2);
 
   return ResNode;
 }
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 91a8663a632c..b56f069b54de 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -31,12 +31,16 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/VectorExtras.h"
 using namespace llvm;
 
 MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
-  TargetLowering(tm), Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+  TargetLowering(tm, new TargetLoweringObjectFileELF()),
+  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
 
   // Set up the register classes.
   addRegisterClass(MVT::i8,  MSP430::GR8RegisterClass);
@@ -77,7 +81,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::ROTR,             MVT::i8,    Expand);
   setOperationAction(ISD::ROTL,             MVT::i16,   Expand);
   setOperationAction(ISD::ROTR,             MVT::i16,   Expand);
-  setOperationAction(ISD::RET,              MVT::Other, Custom);
   setOperationAction(ISD::GlobalAddress,    MVT::i16,   Custom);
   setOperationAction(ISD::ExternalSymbol,   MVT::i16,   Custom);
   setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
@@ -92,6 +95,24 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::SELECT_CC,        MVT::i8,    Custom);
   setOperationAction(ISD::SELECT_CC,        MVT::i16,   Custom);
   setOperationAction(ISD::SIGN_EXTEND,      MVT::i16,   Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+  setOperationAction(ISD::CTTZ,             MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i8,    Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i16,   Expand);
+
+  setOperationAction(ISD::SHL_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SHL_PARTS,        MVT::i16,   Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i16,   Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i16,   Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,   Expand);
 
   // FIXME: Implement efficiently multiplication by a constant
   setOperationAction(ISD::MUL,              MVT::i16,   Expand);
@@ -110,19 +131,16 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
 
 SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
-  case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
   case ISD::SHL: // FALLTHROUGH
   case ISD::SRL:
   case ISD::SRA:              return LowerShifts(Op, DAG);
-  case ISD::RET:              return LowerRET(Op, DAG);
-  case ISD::CALL:             return LowerCALL(Op, DAG);
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
   case ISD::ExternalSymbol:   return LowerExternalSymbol(Op, DAG);
   case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
   case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
   case ISD::SIGN_EXTEND:      return LowerSIGN_EXTEND(Op, DAG);
   default:
-    assert(0 && "unimplemented operand");
+    llvm_unreachable("unimplemented operand");
     return SDValue();
   }
 }
@@ -133,32 +151,84 @@ unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const {
 }
 
 //===----------------------------------------------------------------------===//
+//                       MSP430 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return C_RegisterClass;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+MSP430TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+      if (VT == MVT::i8)
+        return std::make_pair(0U, MSP430::GR8RegisterClass);
+
+      return std::make_pair(0U, MSP430::GR16RegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
 #include "MSP430GenCallingConv.inc"
 
-SDValue MSP430TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
-                                                    SelectionDAG &DAG) {
-  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  switch (CC) {
+SDValue
+MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
+                                           CallingConv::ID CallConv,
+                                           bool isVarArg,
+                                           const SmallVectorImpl<ISD::InputArg>
+                                             &Ins,
+                                           DebugLoc dl,
+                                           SelectionDAG &DAG,
+                                           SmallVectorImpl<SDValue> &InVals) {
+
+  switch (CallConv) {
   default:
-    assert(0 && "Unsupported calling convention");
+    llvm_unreachable("Unsupported calling convention");
   case CallingConv::C:
   case CallingConv::Fast:
-    return LowerCCCArguments(Op, DAG);
+    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
   }
 }
 
-SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  unsigned CallingConv = TheCall->getCallingConv();
-  switch (CallingConv) {
+SDValue
+MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                bool isTailCall,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                DebugLoc dl, SelectionDAG &DAG,
+                                SmallVectorImpl<SDValue> &InVals) {
+
+  switch (CallConv) {
   default:
-    assert(0 && "Unsupported calling convention");
+    llvm_unreachable("Unsupported calling convention");
   case CallingConv::Fast:
   case CallingConv::C:
-    return LowerCCCCallTo(Op, DAG, CallingConv);
+    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                          Outs, Ins, dl, DAG, InVals);
   }
 }
 
@@ -166,40 +236,46 @@ SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 /// generate load operations for arguments places on the stack.
 // FIXME: struct return stuff
 // FIXME: varargs
-SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op,
-                                                SelectionDAG &DAG) {
+SDValue
+MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
+                                        CallingConv::ID CallConv,
+                                        bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl,
+                                        SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  unsigned CC = MF.getFunction()->getCallingConv();
-  DebugLoc dl = Op.getDebugLoc();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MSP430);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
 
   assert(!isVarArg && "Varargs not supported yet");
 
-  SmallVector<SDValue, 16> ArgValues;
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     if (VA.isRegLoc()) {
       // Arguments passed in registers
-      MVT RegVT = VA.getLocVT();
-      switch (RegVT.getSimpleVT()) {
-      default:
-        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-             << RegVT.getSimpleVT()
-             << "\n";
-        abort();
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
+      default: 
+        {
+#ifndef NDEBUG
+          errs() << "LowerFormalArguments Unhandled argument type: "
+               << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+          llvm_unreachable(0);
+        }
       case MVT::i16:
         unsigned VReg =
           RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
         RegInfo.addLiveIn(VA.getLocReg(), VReg);
-        SDValue ArgValue = DAG.getCopyFromReg(Root, dl, VReg, RegVT);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
 
         // If this is an 8-bit value, it is really passed promoted to 16
         // bits. Insert an assert[sz]ext to capture this, then truncate to the
@@ -214,7 +290,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op,
         if (VA.getLocInfo() != CCValAssign::Full)
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
 
-        ArgValues.push_back(ArgValue);
+        InVals.push_back(ArgValue);
       }
     } else {
       // Sanity check
@@ -222,8 +298,8 @@ SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op,
       // Load the argument to a virtual register
       unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
       if (ObjSize > 2) {
-        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-             << VA.getLocVT().getSimpleVT()
+        errs() << "LowerFormalArguments Unhandled argument type: "
+             << VA.getLocVT().getSimpleVT().SimpleTy
              << "\n";
       }
       // Create the frame index object for this incoming parameter...
@@ -232,30 +308,29 @@ SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
-      ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN,
-                                      PseudoSourceValue::getFixedStack(FI), 0));
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                   PseudoSourceValue::getFixedStack(FI), 0));
     }
   }
 
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+  return Chain;
 }
 
-SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+SDValue
+MSP430TargetLowering::LowerReturn(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  DebugLoc dl, SelectionDAG &DAG) {
+
   // CCValAssign - represent the assignment of the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC   = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
 
-  // Analize return values of ISD::RET
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_MSP430);
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
@@ -265,8 +340,6 @@ SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
-  // The chain is always operand #0
-  SDValue Chain = Op.getOperand(0);
   SDValue Flag;
 
   // Copy the result values into the output registers.
@@ -274,10 +347,8 @@ SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    // ISD::RET => ret chain, (regnum1,val1), ...
-    // So i*2+1 index only the regnums
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             Op.getOperand(i*2+1), Flag);
+                             Outs[i].Val, Flag);
 
     // Guarantee that all emitted copies are stuck together,
     // avoiding something bad.
@@ -294,19 +365,21 @@ SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
 /// LowerCCCCallTo - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 /// TODO: sret.
-SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
-                                             unsigned CC) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain  = TheCall->getChain();
-  SDValue Callee = TheCall->getCallee();
-  bool isVarArg  = TheCall->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
-
+SDValue
+MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     bool isTailCall,
+                                     const SmallVectorImpl<ISD::OutputArg>
+                                       &Outs,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
 
-  CCInfo.AnalyzeCallOperands(TheCall, CC_MSP430);
+  CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -322,12 +395,11 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
 
-    // Arguments start after the 5 first operands of ISD::CALL
-    SDValue Arg = TheCall->getArg(i);
+    SDValue Arg = Outs[i].Val;
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-      default: assert(0 && "Unknown loc info!");
+      default: llvm_unreachable("Unknown loc info!");
       case CCValAssign::Full: break;
       case CCValAssign::SExt:
         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
@@ -412,50 +484,43 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
-                 Op.getResNo());
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+                         DAG, InVals);
 }
 
-/// LowerCallResult - Lower the result values of an ISD::CALL into the
-/// appropriate copies out of appropriate physical registers.  This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-/// being lowered. Returns a SDNode with the same number of values as the
-/// ISD::CALL.
-SDNode*
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
 MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
-                                      CallSDNode *TheCall,
-                                      unsigned CallingConv,
-                                      SelectionDAG &DAG) {
-  bool isVarArg = TheCall->isVarArg();
-  DebugLoc dl = TheCall->getDebugLoc();
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
 
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_MSP430);
-  SmallVector<SDValue, 8> ResultVals;
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
-    ResultVals.push_back(Chain.getValue(0));
+    InVals.push_back(Chain.getValue(0));
   }
 
-  ResultVals.push_back(Chain);
-
-  // Merge everything together with a MERGE_VALUES node.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                     &ResultVals[0], ResultVals.size()).getNode();
+  return Chain;
 }
 
 SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
                                           SelectionDAG &DAG) {
   unsigned Opc = Op.getOpcode();
   SDNode* N = Op.getNode();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   // We currently only lower shifts of constant argument.
@@ -511,7 +576,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, unsigned &TargetCC,
   // FIXME: Handle jump negative someday
   TargetCC = MSP430::COND_INVALID;
   switch (CC) {
-  default: assert(0 && "Invalid integer condition!");
+  default: llvm_unreachable("Invalid integer condition!");
   case ISD::SETEQ:
     TargetCC = MSP430::COND_E;  // aka COND_Z
     break;
@@ -585,7 +650,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
                                                SelectionDAG &DAG) {
   SDValue Val = Op.getOperand(0);
-  MVT VT      = Op.getValueType();
+  EVT VT      = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
 
   assert(VT == MVT::i16 && "Only support i16 for now!");
@@ -616,7 +681,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 MachineBasicBlock*
 MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *BB) const {
+                                                  MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == MSP430::Select16 ||
@@ -646,6 +712,10 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     .addImm(MI->getOperand(3).getImm());
   F->insert(I, copy0MBB);
   F->insert(I, copy1MBB);
+  // Inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 
+         SE = BB->succ_end(); SI != SE; ++SI)
+    EM->insert(std::make_pair(*SI, copy1MBB));
   // Update machine-CFG edges by transferring all successors of the current
   // block to the new block which will contain the Phi node for the select.
   copy1MBB->transferSuccessors(BB);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 4a90a0eb2639..fdbc384f1df0 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -33,7 +33,7 @@ namespace llvm {
       /// Y = RRC X, rotate right via carry
       RRC,
 
-      /// CALL/TAILCALL - These operations represent an abstract call
+      /// CALL - These operations represent an abstract call
       /// instruction, which includes a bunch of information.
       CALL,
 
@@ -77,10 +77,6 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG);
     SDValue LowerShifts(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
@@ -88,16 +84,58 @@ namespace llvm {
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
 
-    SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
-                           unsigned CC);
-    SDNode* LowerCallResult(SDValue Chain, SDValue InFlag,
-                            CallSDNode *TheCall,
-                            unsigned CallingConv, SelectionDAG &DAG);
+    TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB) const;
+                                                   MachineBasicBlock *BB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
   private:
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl,
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals);
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
     const MSP430Subtarget &Subtarget;
     const MSP430TargetMachine &TM;
   };
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 91112c3d732f..37fbb6d9999b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -44,7 +45,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FrameIdx).addImm(0)
       .addReg(SrcReg, getKillRegState(isKill));
   else
-    assert(0 && "Cannot store this register to stack slot!");
+    llvm_unreachable("Cannot store this register to stack slot!");
 }
 
 void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -61,7 +62,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
       .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
   else
-    assert(0 && "Cannot store this register to stack slot!");
+    llvm_unreachable("Cannot store this register to stack slot!");
 }
 
 bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
@@ -171,7 +172,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
   // Conditional branch.
   unsigned Count = 0;
-  assert(0 && "Implement conditional branches!");
+  llvm_unreachable("Implement conditional branches!");
 
   return Count;
 }
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 39c08e40be46..f7e0d2bad638 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -155,7 +155,7 @@ let isCall = 1 in
   let Defs = [R12W, R13W, R14W, R15W, SRW],
       Uses = [SPW] in {
     def CALLi     : Pseudo<(outs), (ins i16imm:$dst, variable_ops),
-                           "call\t${dst:call}", [(MSP430call imm:$dst)]>;
+                           "call\t$dst", [(MSP430call imm:$dst)]>;
     def CALLr     : Pseudo<(outs), (ins GR16:$dst, variable_ops),
                            "call\t$dst", [(MSP430call GR16:$dst)]>;
     def CALLm     : Pseudo<(outs), (ins memsrc:$dst, variable_ops),
@@ -243,6 +243,13 @@ def MOV16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
                 "mov.w\t{$src, $dst}",
                 [(store GR16:$src, addr:$dst)]>;
 
+def MOV8mm  : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+                "mov.b\t{$src, $dst}",
+                [(store (i8 (load addr:$src)), addr:$dst)]>;
+def MOV16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+                "mov.w\t{$src, $dst}",
+                [(store (i16 (load addr:$src)), addr:$dst)]>;
+
 //===----------------------------------------------------------------------===//
 // Arithmetic Instructions
 
@@ -671,30 +678,26 @@ def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
 let isTwoAddress = 0 in {
 def OR8mr  : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
                 "bis.b\t{$src, $dst}",
-                [(store (or (load addr:$dst), GR8:$src), addr:$dst),
-                 (implicit SRW)]>;
+                [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
 def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
                 "bis.w\t{$src, $dst}",
-                [(store (or (load addr:$dst), GR16:$src), addr:$dst),
-                 (implicit SRW)]>;
+                [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
 
 def OR8mi  : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
                 "bis.b\t{$src, $dst}",
-                [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst),
-                 (implicit SRW)]>;
+                [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
                 "bis.w\t{$src, $dst}",
-                [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst),
-                 (implicit SRW)]>;
+                [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
 
 def OR8mm  : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
                 "bis.b\t{$src, $dst}",
-                [(store (or (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
-                 (implicit SRW)]>;
+                [(store (or (i8 (load addr:$dst)),
+                            (i8 (load addr:$src))), addr:$dst)]>;
 def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
                 "bis.w\t{$src, $dst}",
-                [(store (or (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
-                 (implicit SRW)]>;
+                 [(store (or (i16 (load addr:$dst)),
+                             (i16 (load addr:$src))), addr:$dst)]>;
 }
 
 } // isTwoAddress = 1
@@ -722,59 +725,6 @@ def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2),
                      "cmp.w\t{$src1, $src2}",
                       [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
 
-// FIXME: imm is allowed only on src operand, not on dst.
-
-//def CMP8ri  : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
-//                   "cmp.b\t{$src1, $src2}",
-//                   [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>;
-//def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
-//                     "cmp.w\t{$src1, $src2}",
-//                     [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>;
-
-//def CMP8mi  : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
-//                "cmp.b\t{$src1, $src2}",
-//                [(MSP430cmp (load addr:$src1), (i8 imm:$src2)), (implicit SRW)]>;
-//def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
-//                "cmp.w\t{$src1, $src2}",
-//                [(MSP430cmp (load addr:$src1), (i16 imm:$src2)), (implicit SRW)]>;
-
-
-// Imm 0, +1, +2, +4, +8 are encoded via constant generator registers.
-// That's why we can use them as dest operands. 
-// We don't define new class for them, since they would need special encoding
-// in the future.
-
-def CMP8ri0 : Pseudo<(outs), (ins GR8:$src1),
-                     "cmp.b\t{$src1, #0}",
-                     [(MSP430cmp GR8:$src1, 0), (implicit SRW)]>;
-def CMP16ri0: Pseudo<(outs), (ins GR16:$src1),
-                     "cmp.w\t{$src1, #0}",
-                     [(MSP430cmp GR16:$src1, 0), (implicit SRW)]>;
-def CMP8ri1 : Pseudo<(outs), (ins GR8:$src1),
-                     "cmp.b\t{$src1, #1}",
-                     [(MSP430cmp GR8:$src1, 1), (implicit SRW)]>;
-def CMP16ri1: Pseudo<(outs), (ins GR16:$src1),
-                     "cmp.w\t{$src1, #1}",
-                     [(MSP430cmp GR16:$src1, 1), (implicit SRW)]>;
-def CMP8ri2 : Pseudo<(outs), (ins GR8:$src1),
-                     "cmp.b\t{$src1, #2}",
-                     [(MSP430cmp GR8:$src1, 2), (implicit SRW)]>;
-def CMP16ri2: Pseudo<(outs), (ins GR16:$src1),
-                     "cmp.w\t{$src1, #2}",
-                     [(MSP430cmp GR16:$src1, 2), (implicit SRW)]>;
-def CMP8ri4 : Pseudo<(outs), (ins GR8:$src1),
-                     "cmp.b\t{$src1, #4}",
-                     [(MSP430cmp GR8:$src1, 4), (implicit SRW)]>;
-def CMP16ri4: Pseudo<(outs), (ins GR16:$src1),
-                     "cmp.w\t{$src1, #4}",
-                     [(MSP430cmp GR16:$src1, 4), (implicit SRW)]>;
-def CMP8ri8 : Pseudo<(outs), (ins GR8:$src1),
-                     "cmp.b\t{$src1, #8}",
-                     [(MSP430cmp GR8:$src1, 8), (implicit SRW)]>;
-def CMP16ri8: Pseudo<(outs), (ins GR16:$src1),
-                     "cmp.w\t{$src1, #8}",
-                     [(MSP430cmp GR16:$src1, 8), (implicit SRW)]>;
-
 def CMP8rm  : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2),
                      "cmp.b\t{$src1, $src2}",
                      [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>;
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
new file mode 100644
index 000000000000..069313e2ef0b
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCAsmInfo.h"
+using namespace llvm;
+
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
+  AlignmentIsInBytes = false;
+  AllowNameToStartWithDigit = true;
+}
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MSP430MCAsmInfo.h
new file mode 100644
index 000000000000..8318029ae78d
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+  struct MSP430MCAsmInfo : public MCAsmInfo {
+    explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index d40bac73eab8..1a5893e4bfbc 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -45,7 +46,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CalleeSavedRegs;
 }
 
-const TargetRegisterClass* const*
+const TargetRegisterClass *const *
 MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
   static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
     &MSP430::GR16RegClass, &MSP430::GR16RegClass,
@@ -58,8 +59,7 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
   return CalleeSavedRegClasses;
 }
 
-BitVector
-MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
 
   // Mark 4 special registers as reserved.
@@ -75,7 +75,8 @@ MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-const TargetRegisterClass* MSP430RegisterInfo::getPointerRegClass() const {
+const TargetRegisterClass *
+MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &MSP430::GR16RegClass;
 }
 
@@ -146,9 +147,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-void
+unsigned
 MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                        int SPAdj, RegScavenger *RS) const {
+                                        int SPAdj, int *Value,
+                                        RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -186,7 +188,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(i).ChangeToRegister(BasePtr, false);
 
     if (Offset == 0)
-      return;
+      return 0;
 
     // We need to materialize the offset via add instruction.
     unsigned DstReg = MI.getOperand(0).getReg();
@@ -197,11 +199,12 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
         .addReg(DstReg).addImm(Offset);
 
-    return;
+    return 0;
   }
 
   MI.getOperand(i).ChangeToRegister(BasePtr, false);
   MI.getOperand(i+1).ChangeToImmediate(Offset);
+  return 0;
 }
 
 void
@@ -291,7 +294,7 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
   switch (RetOpcode) {
   case MSP430::RET: break;  // These are ok
   default:
-    assert(0 && "Can only insert epilog into returning blocks");
+    llvm_unreachable("Can only insert epilog into returning blocks");
   }
 
   // Get the number of bytes to allocate from the FrameInfo
@@ -310,7 +313,6 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
     NumBytes = StackSize - CSSize;
 
   // Skip the callee-saved pop instructions.
-  MachineBasicBlock::iterator LastCSPop = MBBI;
   while (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PI = prior(MBBI);
     unsigned Opc = PI->getOpcode();
@@ -327,7 +329,16 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
   //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
 
   if (MFI->hasVarSizedObjects()) {
-    assert(0 && "Not implemented yet!");
+    BuildMI(MBB, MBBI, DL,
+            TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+    if (CSSize) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(CSSize);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
   } else {
     // adjust stack pointer back: SPW += numbytes
     if (NumBytes) {
@@ -349,7 +360,7 @@ unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
 }
 
 int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  assert(0 && "Not implemented yet!");
+  llvm_unreachable("Not implemented yet!");
   return 0;
 }
 
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index a210e36e001d..5f3a216866b7 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -40,7 +40,7 @@ public:
     getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
-  const TargetRegisterClass* getPointerRegClass() const;
+  const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
 
   bool hasFP(const MachineFunction &MF) const;
   bool hasReservedCallFrame(MachineFunction &MF) const;
@@ -49,8 +49,9 @@ public:
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index ef9e10339bc3..1346cb9a04dc 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -14,12 +14,10 @@
 #include "MSP430Subtarget.h"
 #include "MSP430.h"
 #include "MSP430GenSubtarget.inc"
-#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
-MSP430Subtarget::MSP430Subtarget(const TargetMachine &TM, const Module &M,
-                                 const std::string &FS) {
+MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) {
   std::string CPU = "generic";
 
   // Parse features string.
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 96c8108b71bc..1070544f0773 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -19,17 +19,14 @@
 #include <string>
 
 namespace llvm {
-class Module;
-class TargetMachine;
 
 class MSP430Subtarget : public TargetSubtarget {
   bool ExtendedInsts;
 public:
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  MSP430Subtarget(const TargetMachine &TM, const Module &M,
-                  const std::string &FS);
+  MSP430Subtarget(const std::string &TT, const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index dd09d43da984..5e21f8ea29ef 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -12,43 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
-#include "MSP430TargetAsmInfo.h"
+#include "MSP430MCAsmInfo.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetMachineRegistry.h"
-
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-/// MSP430TargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int MSP430TargetMachineModule;
-int MSP430TargetMachineModule = 0;
-
-
-// Register the targets
-static RegisterTarget<MSP430TargetMachine>
-X("msp430", "MSP430 [experimental]");
-
-// Force static initialization.
-extern "C" void LLVMInitializeMSP430Target() { }
+extern "C" void LLVMInitializeMSP430Target() {
+  // Register the target.
+  RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+  RegisterAsmInfo<MSP430MCAsmInfo> Z(TheMSP430Target);
+}
 
-MSP430TargetMachine::MSP430TargetMachine(const Module &M,
+MSP430TargetMachine::MSP430TargetMachine(const Target &T,
+                                         const std::string &TT,
                                          const std::string &FS) :
-  Subtarget(*this, M, FS),
+  LLVMTargetMachine(T, TT),
+  Subtarget(TT, FS),
   // FIXME: Check TargetData string.
   DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"),
   InstrInfo(*this), TLInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
 
-const TargetAsmInfo *MSP430TargetMachine::createTargetAsmInfo() const {
-  return new MSP430TargetAsmInfo(*this);
-}
 
 bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
@@ -57,23 +44,3 @@ bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
   return false;
 }
 
-bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                             CodeGenOpt::Level OptLevel,
-                                             bool Verbose,
-                                             raw_ostream &Out) {
-  // Output assembly language.
-  PM.add(createMSP430CodePrinterPass(Out, *this, Verbose));
-  return false;
-}
-
-unsigned MSP430TargetMachine::getModuleMatchQuality(const Module &M) {
-  std::string TT = M.getTargetTriple();
-
-  // We strongly match msp430
-  if (TT.size() >= 6 && TT[0] == 'm' && TT[1] == 's' && TT[2] == 'p' &&
-      TT[3] == '4' &&  TT[4] == '3' && TT[5] == '0')
-    return 20;
-
-  return 0;
-}
-
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index d9ffa2b5ac8f..d38614018c84 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -37,11 +37,9 @@ class MSP430TargetMachine : public LLVMTargetMachine {
   // any MSP430 specific FrameInfo class.
   TargetFrameInfo       FrameInfo;
 
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
 public:
-  MSP430TargetMachine(const Module &M, const std::string &FS);
+  MSP430TargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
 
   virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
   virtual const MSP430InstrInfo *getInstrInfo() const  { return &InstrInfo; }
@@ -57,10 +55,6 @@ public:
   }
 
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel, bool Verbose,
-                                  raw_ostream &Out);
-  static unsigned getModuleMatchQuality(const Module &M);
 }; // MSP430TargetMachine.
 
 } // end namespace llvm
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
index 45cb3aa45b85..4b18bc9ab428 100644
--- a/lib/Target/MSP430/Makefile
+++ b/lib/Target/MSP430/Makefile
@@ -7,7 +7,7 @@
 # 
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../..
-LIBRARYNAME = LLVMMSP430
+LIBRARYNAME = LLVMMSP430CodeGen
 TARGET = MSP430
 
 # Make sure that tblgen is run, first thing.
@@ -17,5 +17,7 @@ BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
 		MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
 		MSP430GenSubtarget.inc
 
+DIRS = AsmPrinter TargetInfo
+
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..1d408d0cb5be
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430Info
+  MSP430TargetInfo.cpp
+  )
+
+add_dependencies(LLVMMSP430Info MSP430Table_gen)
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
new file mode 100644
index 000000000000..f9ca5c49c979
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSP430Target;
+
+extern "C" void LLVMInitializeMSP430TargetInfo() { 
+  RegisterTarget<Triple::msp430> 
+    X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+}
diff --git a/lib/Target/MSP430/TargetInfo/Makefile b/lib/Target/MSP430/TargetInfo/Makefile
new file mode 100644
index 000000000000..abb08f2548ee
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/TargetInfo/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
index 197cc2921edb..56c68a6b4160 100644
--- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,6 @@ include_directories(
   )
 
 add_llvm_library(LLVMMipsAsmPrinter
-  MipsAsmPrinter.cpp
+  MipsAsmPrinter.cpp
   )
 add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
 \ No newline at end of file
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index cb4047988eb9..ccf9ee518d33 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -22,24 +22,28 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h" 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cctype>
 
 using namespace llvm;
@@ -50,8 +54,8 @@ namespace {
   class VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter {
     const MipsSubtarget *Subtarget;
   public:
-    explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM, 
-                            const TargetAsmInfo *T, bool V)
+    explicit MipsAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, 
+                            const MCAsmInfo *T, bool V)
       : AsmPrinter(O, TM, T, V) {
       Subtarget = &TM.getSubtarget<MipsSubtarget>();
     }
@@ -68,34 +72,25 @@ namespace {
                          const char *Modifier = 0);
     void printFCCOperand(const MachineInstr *MI, int opNum, 
                          const char *Modifier = 0);
-    void printModuleLevelGV(const GlobalVariable* GVar);
+    void PrintGlobalVariable(const GlobalVariable *GVar);
     void printSavedRegsBitmask(MachineFunction &MF);
     void printHex32(unsigned int Value);
 
-    const char *emitCurrentABIString(void);
+    const char *emitCurrentABIString();
     void emitFunctionStart(MachineFunction &MF);
     void emitFunctionEnd(MachineFunction &MF);
     void emitFrameDirective(MachineFunction &MF);
 
-    bool printInstruction(const MachineInstr *MI);  // autogenerated.
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
   };
 } // end of anonymous namespace
 
 #include "MipsGenAsmWriter.inc"
 
-/// createMipsCodePrinterPass - Returns a pass that prints the MIPS
-/// assembly code for a MachineFunction to the given output stream,
-/// using the given target machine description.  This should work
-/// regardless of whether the function is in SSA form.
-FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o,
-                                              MipsTargetMachine &tm,
-                                              bool verbose) {
-  return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-}
-
 //===----------------------------------------------------------------------===//
 //
 //  Mips Asm Directives
@@ -186,9 +181,7 @@ printHex32(unsigned int Value)
 //===----------------------------------------------------------------------===//
 
 /// Frame Directive
-void MipsAsmPrinter::
-emitFrameDirective(MachineFunction &MF)
-{
+void MipsAsmPrinter::emitFrameDirective(MachineFunction &MF) {
   const TargetRegisterInfo &RI = *TM.getRegisterInfo();
 
   unsigned stackReg  = RI.getFrameRegister(MF);
@@ -196,16 +189,14 @@ emitFrameDirective(MachineFunction &MF)
   unsigned stackSize = MF.getFrameInfo()->getStackSize();
 
 
-  O << "\t.frame\t" << '$' << LowercaseString(RI.get(stackReg).AsmName)
+  O << "\t.frame\t" << '$' << LowercaseString(getRegisterName(stackReg))
                     << ',' << stackSize << ','
-                    << '$' << LowercaseString(RI.get(returnReg).AsmName)
+                    << '$' << LowercaseString(getRegisterName(returnReg))
                     << '\n';
 }
 
 /// Emit Set directives.
-const char * MipsAsmPrinter::
-emitCurrentABIString(void) 
-{  
+const char *MipsAsmPrinter::emitCurrentABIString() {  
   switch(Subtarget->getTargetABI()) {
     case MipsSubtarget::O32:  return "abi32";  
     case MipsSubtarget::O64:  return "abiO64";
@@ -215,17 +206,15 @@ emitCurrentABIString(void)
     default: break;
   }
 
-  assert(0 && "Unknown Mips ABI");
+  llvm_unreachable("Unknown Mips ABI");
   return NULL;
 }  
 
 /// Emit the directives used by GAS on the start of functions
-void MipsAsmPrinter::
-emitFunctionStart(MachineFunction &MF)
-{
+void MipsAsmPrinter::emitFunctionStart(MachineFunction &MF) {
   // Print out the label for the function.
   const Function *F = MF.getFunction();
-  SwitchToSection(TAI->SectionForGlobal(F));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
 
   // 2 bits aligned
   EmitAlignment(MF.getAlignment(), F);
@@ -235,7 +224,7 @@ emitFunctionStart(MachineFunction &MF)
 
   printVisibility(CurrentFnName, F->getVisibility());
 
-  if ((TAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux())
+  if ((MAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux())
     O << "\t.type\t"   << CurrentFnName << ", @function\n";
 
   O << CurrentFnName << ":\n";
@@ -247,9 +236,7 @@ emitFunctionStart(MachineFunction &MF)
 }
 
 /// Emit the directives used by GAS on the end of functions
-void MipsAsmPrinter::
-emitFunctionEnd(MachineFunction &MF) 
-{
+void MipsAsmPrinter::emitFunctionEnd(MachineFunction &MF) {
   // There are instruction for this macros, but they must
   // always be at the function end, and we can't emit and
   // break with BB logic. 
@@ -257,15 +244,13 @@ emitFunctionEnd(MachineFunction &MF)
   O << "\t.set\treorder\n"; 
 
   O << "\t.end\t" << CurrentFnName << '\n';
-  if (TAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux())
+  if (MAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux())
     O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
 }
 
 /// runOnMachineFunction - This uses the printMachineInstruction()
 /// method to print assembly for each instruction.
-bool MipsAsmPrinter::
-runOnMachineFunction(MachineFunction &MF) 
-{
+bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
 
   SetupMachineFunction(MF);
@@ -287,14 +272,21 @@ runOnMachineFunction(MachineFunction &MF)
 
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
 
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
+      processDebugLoc(II, true);
+
       // Print the assembly for the instruction.
       printInstruction(II);
+      
+      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+        EmitComments(*II);
+      O << '\n';
+
+      processDebugLoc(II, false);      
       ++EmittedInsts;
     }
 
@@ -310,10 +302,8 @@ runOnMachineFunction(MachineFunction &MF)
 }
 
 // Print out an operand for an inline asm expression.
-bool MipsAsmPrinter::
-PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
-                unsigned AsmVariant, const char *ExtraCode) 
-{
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
+                                     unsigned AsmVariant,const char *ExtraCode){
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) 
     return true; // Unknown modifier.
@@ -322,57 +312,33 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-void MipsAsmPrinter::
-printOperand(const MachineInstr *MI, int opNum) 
-{
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  const TargetRegisterInfo  &RI = *TM.getRegisterInfo();
   bool closeP = false;
-  bool isPIC = (TM.getRelocationModel() == Reloc::PIC_);
-  bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large);
-
-  // %hi and %lo used on mips gas to load global addresses on
-  // static code. %got is used to load global addresses when 
-  // using PIC_. %call16 is used to load direct call targets
-  // on PIC_ and small code size. %call_lo and %call_hi load 
-  // direct call targets on PIC_ and large code size.
-  if (MI->getOpcode() == Mips::LUi && !MO.isReg() && !MO.isImm()) {
-    if ((isPIC) && (isCodeLarge))
-      O << "%call_hi(";
-    else
-      O << "%hi(";
+
+  if (MO.getTargetFlags())
     closeP = true;
-  } else if ((MI->getOpcode() == Mips::ADDiu) && !MO.isReg() && !MO.isImm()) {
-    const MachineOperand &firstMO = MI->getOperand(opNum-1);
-    if (firstMO.getReg() == Mips::GP)
-      O << "%gp_rel(";
+
+  switch(MO.getTargetFlags()) {
+  case MipsII::MO_GPREL:    O << "%gp_rel("; break;
+  case MipsII::MO_GOT_CALL: O << "%call16("; break;
+  case MipsII::MO_GOT:
+    if (MI->getOpcode() == Mips::LW)
+      O << "%got(";
     else
       O << "%lo(";
-    closeP = true;
-  } else if ((isPIC) && (MI->getOpcode() == Mips::LW) &&
-             (!MO.isReg()) && (!MO.isImm())) {
-    const MachineOperand &firstMO = MI->getOperand(opNum-1);
-    const MachineOperand &lastMO  = MI->getOperand(opNum+1);
-    if ((firstMO.isReg()) && (lastMO.isReg())) {
-      if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() == Mips::GP) 
-          && (!isCodeLarge))
-        O << "%call16(";
-      else if ((firstMO.getReg() != Mips::T9) && (lastMO.getReg() == Mips::GP))
-        O << "%got(";
-      else if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() != Mips::GP) 
-               && (isCodeLarge))
-        O << "%call_lo(";
-      closeP = true;
-    }
+    break;
+  case MipsII::MO_ABS_HILO:
+    if (MI->getOpcode() == Mips::LUi)
+      O << "%hi(";
+    else
+      O << "%lo(";     
+    break;
   }
- 
-  switch (MO.getType()) 
-  {
+
+  switch (MO.getType()) {
     case MachineOperand::MO_Register:
-      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-        O << '$' << LowercaseString (RI.get(MO.getReg()).AsmName);
-      else
-        O << '$' << MO.getReg();
+      O << '$' << LowercaseString(getRegisterName(MO.getReg()));
       break;
 
     case MachineOperand::MO_Immediate:
@@ -380,14 +346,11 @@ printOperand(const MachineInstr *MI, int opNum)
       break;
 
     case MachineOperand::MO_MachineBasicBlock:
-      printBasicBlockLabel(MO.getMBB());
+      GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
       return;
 
     case MachineOperand::MO_GlobalAddress:
-      {
-        const GlobalValue *GV = MO.getGlobal();
-        O << Mang->getValueName(GV);
-      }
+      O << Mang->getMangledName(MO.getGlobal());
       break;
 
     case MachineOperand::MO_ExternalSymbol:
@@ -395,25 +358,23 @@ printOperand(const MachineInstr *MI, int opNum)
       break;
 
     case MachineOperand::MO_JumpTableIndex:
-      O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << MO.getIndex();
       break;
 
     case MachineOperand::MO_ConstantPoolIndex:
-      O << TAI->getPrivateGlobalPrefix() << "CPI"
+      O << MAI->getPrivateGlobalPrefix() << "CPI"
         << getFunctionNumber() << "_" << MO.getIndex();
       break;
   
     default:
-      O << "<unknown operand type>"; abort (); break;
+      llvm_unreachable("<unknown operand type>");
   }
 
   if (closeP) O << ")";
 }
 
-void MipsAsmPrinter::
-printUnsignedImm(const MachineInstr *MI, int opNum) 
-{
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand(opNum);
   if (MO.getType() == MachineOperand::MO_Immediate)
     O << (unsigned short int)MO.getImm();
@@ -422,8 +383,7 @@ printUnsignedImm(const MachineInstr *MI, int opNum)
 }
 
 void MipsAsmPrinter::
-printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) 
-{
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
   // when using stack locations for not load/store instructions
   // print the same way as all normal 3 operand instructions.
   if (Modifier && !strcmp(Modifier, "stackloc")) {
@@ -443,17 +403,14 @@ printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier)
 }
 
 void MipsAsmPrinter::
-printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) 
-{
+printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
   const MachineOperand& MO = MI->getOperand(opNum);
   O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); 
 }
 
-bool MipsAsmPrinter::
-doInitialization(Module &M) 
-{
-  Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
-
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  // FIXME: Use SwitchSection.
+  
   // Tell the assembler which ABI we are using
   O << "\t.section .mdebug." << emitCurrentABIString() << '\n';
 
@@ -464,12 +421,9 @@ doInitialization(Module &M)
 
   // return to previous section
   O << "\t.previous" << '\n'; 
-
-  return false; // success
 }
 
-void MipsAsmPrinter::
-printModuleLevelGV(const GlobalVariable* GVar) {
+void MipsAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer())
@@ -480,10 +434,8 @@ printModuleLevelGV(const GlobalVariable* GVar) {
     return;
 
   O << "\n\n";
-  std::string name = Mang->getValueName(GVar);
+  std::string name = Mang->getMangledName(GVar);
   Constant *C = GVar->getInitializer();
-  if (isa<MDNode>(C) || isa<MDString>(C))
-    return;
   const Type *CTy = C->getType();
   unsigned Size = TD->getTypeAllocSize(CTy);
   const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
@@ -503,7 +455,8 @@ printModuleLevelGV(const GlobalVariable* GVar) {
 
   printVisibility(name, GVar->getVisibility());
 
-  SwitchToSection(TAI->SectionForGlobal(GVar));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
 
   if (C->isNullValue() && !GVar->hasSection()) {
     if (!GVar->isThreadLocal() &&
@@ -513,8 +466,8 @@ printModuleLevelGV(const GlobalVariable* GVar) {
       if (GVar->hasLocalLinkage())
         O << "\t.local\t" << name << '\n';
 
-      O << TAI->getCOMMDirective() << name << ',' << Size;
-      if (TAI->getCOMMDirectiveTakesAlignment())
+      O << MAI->getCOMMDirective() << name << ',' << Size;
+      if (MAI->getCOMMDirectiveTakesAlignment())
         O << ',' << (1 << Align);
 
       O << '\n';
@@ -536,29 +489,27 @@ printModuleLevelGV(const GlobalVariable* GVar) {
     // or something.  For now, just emit them as external.
    case GlobalValue::ExternalLinkage:
     // If external or appending, declare as a global symbol
-    O << TAI->getGlobalDirective() << name << '\n';
+    O << MAI->getGlobalDirective() << name << '\n';
     // Fall Through
    case GlobalValue::PrivateLinkage:
+   case GlobalValue::LinkerPrivateLinkage:
    case GlobalValue::InternalLinkage:
     if (CVA && CVA->isCString())
       printSizeAndType = false;
     break;
    case GlobalValue::GhostLinkage:
-    cerr << "Should not have any unmaterialized functions!\n";
-    abort();
+    llvm_unreachable("Should not have any unmaterialized functions!");
    case GlobalValue::DLLImportLinkage:
-    cerr << "DLLImport linkage is not supported by this target!\n";
-    abort();
+    llvm_unreachable("DLLImport linkage is not supported by this target!");
    case GlobalValue::DLLExportLinkage:
-    cerr << "DLLExport linkage is not supported by this target!\n";
-    abort();
+    llvm_unreachable("DLLExport linkage is not supported by this target!");
    default:
-    assert(0 && "Unknown linkage type!");
+    llvm_unreachable("Unknown linkage type!");
   }
 
   EmitAlignment(Align, GVar);
 
-  if (TAI->hasDotTypeDotSizeDirective() && printSizeAndType) {
+  if (MAI->hasDotTypeDotSizeDirective() && printSizeAndType) {
     O << "\t.type " << name << ",@object\n";
     O << "\t.size " << name << ',' << Size << '\n';
   }
@@ -567,26 +518,9 @@ printModuleLevelGV(const GlobalVariable* GVar) {
   EmitGlobalConstant(C);
 }
 
-bool MipsAsmPrinter::
-doFinalization(Module &M)
-{
-  // Print out module-level global variables here.
-  for (Module::const_global_iterator I = M.global_begin(),
-         E = M.global_end(); I != E; ++I)
-    printModuleLevelGV(I);
-
-  O << '\n';
-
-  return AsmPrinter::doFinalization(M);
-}
-
-namespace {
-  static struct Register {
-    Register() {
-      MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass);
-    }
-  } Registrator;
-}
 
 // Force static initialization.
-extern "C" void LLVMInitializeMipsAsmPrinter() { }
+extern "C" void LLVMInitializeMipsAsmPrinter() { 
+  RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+  RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index d27e6f174d08..0e3bf5a96d40 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -15,10 +15,11 @@ add_llvm_target(MipsCodeGen
   MipsInstrInfo.cpp
   MipsISelDAGToDAG.cpp
   MipsISelLowering.cpp
+  MipsMCAsmInfo.cpp
   MipsRegisterInfo.cpp
   MipsSubtarget.cpp
-  MipsTargetAsmInfo.cpp
   MipsTargetMachine.cpp
+  MipsTargetObjectFile.cpp
   )
 
 target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 48ab5f994704..078034532d50 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -17,7 +17,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
                 MipsGenSubtarget.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 9b22a91b1609..a9ab050d6f0d 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -21,13 +21,14 @@ namespace llvm {
   class MipsTargetMachine;
   class FunctionPass;
   class MachineCodeEmitter;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
-  FunctionPass *createMipsCodePrinterPass(raw_ostream &OS, 
-                                          MipsTargetMachine &TM,
-                                          bool Verbose);
+
+  extern Target TheMipsTarget;
+  extern Target TheMipselTarget;
+
 } // end namespace llvm;
 
 // Defines symbolic names for Mips registers.  This defines a mapping from
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 53de1bbea66e..cc20dd7b4ff6 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -32,6 +32,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -106,22 +108,16 @@ private:
 
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MipsDAGToDAGISel::
-InstructionSelect() 
-{
+void MipsDAGToDAGISel::InstructionSelect() {
   DEBUG(BB->dump());
   // Codegen the basic block.
-  #ifndef NDEBUG
-  DOUT << "===== Instruction selection begins:\n";
-  Indent = 0;
-  #endif
+  DEBUG(errs() << "===== Instruction selection begins:\n");
+  DEBUG(Indent = 0);
 
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
 
-  #ifndef NDEBUG
-  DOUT << "===== Instruction selection ends:\n";
-  #endif
+  DEBUG(errs() << "===== Instruction selection ends:\n");
 
   CurDAG->RemoveDeadNodes();
 }
@@ -129,7 +125,6 @@ InstructionSelect()
 /// getGlobalBaseReg - Output the instructions required to put the
 /// GOT address into a register.
 SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
-  MachineFunction *MF = BB->getParent();
   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
@@ -186,29 +181,23 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
 
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
-SDNode* MipsDAGToDAGISel::
-Select(SDValue N) 
-{
+SDNode* MipsDAGToDAGISel::Select(SDValue N) {
   SDNode *Node = N.getNode();
   unsigned Opcode = Node->getOpcode();
   DebugLoc dl = Node->getDebugLoc();
 
   // Dump information about the Node being selected
-  #ifndef NDEBUG
-  DOUT << std::string(Indent, ' ') << "Selecting: ";
-  DEBUG(Node->dump(CurDAG));
-  DOUT << "\n";
-  Indent += 2;
-  #endif
+  DEBUG(errs().indent(Indent) << "Selecting: ";
+        Node->dump(CurDAG);
+        errs() << "\n");
+  DEBUG(Indent += 2);
 
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
-    #ifndef NDEBUG
-    DOUT << std::string(Indent-2, ' ') << "== ";
-    DEBUG(Node->dump(CurDAG));
-    DOUT << "\n";
-    Indent -= 2;
-    #endif
+    DEBUG(errs().indent(Indent-2) << "== ";
+          Node->dump(CurDAG);
+          errs() << "\n");
+    DEBUG(Indent -= 2);
     return NULL;
   }
 
@@ -242,10 +231,10 @@ Select(SDValue N)
       SDValue LHS = Node->getOperand(0);
       SDValue RHS = Node->getOperand(1);
 
-      MVT VT = LHS.getValueType();
-      SDNode *Carry = CurDAG->getTargetNode(Mips::SLTu, dl, VT, Ops, 2);
-      SDNode *AddCarry = CurDAG->getTargetNode(Mips::ADDu, dl, VT, 
-                                               SDValue(Carry,0), RHS);
+      EVT VT = LHS.getValueType();
+      SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+      SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, 
+                                                SDValue(Carry,0), RHS);
 
       return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag,
                                   LHS, SDValue(AddCarry,0));
@@ -265,13 +254,13 @@ Select(SDValue N)
       else
         Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
 
-      SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2);
+      SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2);
 
       SDValue InFlag = SDValue(Node, 0);
-      SDNode *Lo = CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, 
-                                         MVT::Flag, InFlag);
+      SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, 
+                                          MVT::Flag, InFlag);
       InFlag = SDValue(Lo,1);
-      SDNode *Hi = CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag);
+      SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
 
       if (!N.getValue(0).use_empty()) 
         ReplaceUses(N.getValue(0), SDValue(Lo,0));
@@ -290,15 +279,15 @@ Select(SDValue N)
       SDValue MulOp2 = Node->getOperand(1);
 
       unsigned MulOp  = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
-      SDNode *MulNode = CurDAG->getTargetNode(MulOp, dl, 
-                                              MVT::Flag, MulOp1, MulOp2);
+      SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, 
+                                               MVT::Flag, MulOp1, MulOp2);
 
       SDValue InFlag = SDValue(MulNode, 0);
 
       if (MulOp == ISD::MUL)
-        return CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, InFlag);
+        return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
       else
-        return CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag);
+        return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
     }
 
     /// Div/Rem operations
@@ -317,10 +306,10 @@ Select(SDValue N)
         Op  = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
         MOp = Mips::MFHI;
       }
-      SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2);
+      SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2);
 
       SDValue InFlag = SDValue(Node, 0);
-      return CurDAG->getTargetNode(MOp, dl, MVT::i32, InFlag);
+      return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag);
     }
 
     // Get target GOT address.
@@ -333,7 +322,6 @@ Select(SDValue N)
     /// be loaded with 3 instructions. 
     case MipsISD::JmpLink: {
       if (TM.getRelocationModel() == Reloc::PIC_) {
-        //bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large);
         SDValue Chain  = Node->getOperand(0);
         SDValue Callee = Node->getOperand(1);
         SDValue T9Reg = CurDAG->getRegister(Mips::T9, MVT::i32);
@@ -347,7 +335,7 @@ Select(SDValue N)
 
           // Use load to get GOT target
           SDValue Ops[] = { Callee, GPReg, Chain };
-          SDValue Load = SDValue(CurDAG->getTargetNode(Mips::LW, dl, MVT::i32, 
+          SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32, 
                                      MVT::Other, Ops, 3), 0);
           Chain = Load.getValue(1);
 
@@ -358,7 +346,7 @@ Select(SDValue N)
           Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Callee, InFlag);
 
         // Emit Jump and Link Register
-        SDNode *ResNode = CurDAG->getTargetNode(Mips::JALR, dl, MVT::Other,
+        SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, MVT::Other,
                                   MVT::Flag, T9Reg, Chain);
         Chain  = SDValue(ResNode, 0);
         InFlag = SDValue(ResNode, 1);
@@ -372,15 +360,13 @@ Select(SDValue N)
   // Select the default instruction
   SDNode *ResNode = SelectCode(N);
 
-  #ifndef NDEBUG
-  DOUT << std::string(Indent-2, ' ') << "=> ";
+  DEBUG(errs().indent(Indent-2) << "=> ");
   if (ResNode == NULL || ResNode == N.getNode())
     DEBUG(N.getNode()->dump(CurDAG));
   else
     DEBUG(ResNode->dump(CurDAG));
-  DOUT << "\n";
-  Indent -= 2;
-  #endif
+  DEBUG(errs() << "\n");
+  DEBUG(Indent -= 2);
 
   return ResNode;
 }
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 3d2e2b7a773d..ab8790ad7dbd 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-lower"
-
 #include "MipsISelLowering.h"
 #include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
+#include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -31,13 +31,11 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-const char *MipsTargetLowering::
-getTargetNodeName(unsigned Opcode) const 
-{
-  switch (Opcode) 
-  {
+const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
     case MipsISD::JmpLink    : return "MipsISD::JmpLink";
     case MipsISD::Hi         : return "MipsISD::Hi";
     case MipsISD::Lo         : return "MipsISD::Lo";
@@ -54,8 +52,8 @@ getTargetNodeName(unsigned Opcode) const
 }
 
 MipsTargetLowering::
-MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) 
-{
+MipsTargetLowering(MipsTargetMachine &TM)
+  : TargetLowering(TM, new MipsTargetObjectFile()) {
   Subtarget = &TM.getSubtarget<MipsSubtarget>();
 
   // Mips does not have i1 type, so use i32 for
@@ -82,6 +80,10 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
 
+  // MIPS doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
   // Used by legalize types to correctly generate the setcc result. 
   // Without this, every float setcc comes with a AND/OR with the result, 
   // we don't want this, since the fpcmp result goes to a flag register, 
@@ -91,7 +93,6 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   // Mips Custom Operations
   setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
-  setOperationAction(ISD::RET,                MVT::Other, Custom);
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
@@ -119,11 +120,20 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
+  setOperationAction(ISD::ROTR,              MVT::i32,   Expand);
   setOperationAction(ISD::SHL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Expand);
   setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Expand);
+  setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
+  setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
+  setOperationAction(ISD::FPOWI,             MVT::f32,   Expand);
+  setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG,              MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG2,             MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
+  setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
 
   // We don't have line number support yet.
   setOperationAction(ISD::DBG_STOPPOINT,     MVT::Other, Expand);
@@ -154,7 +164,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   computeRegisterProperties();
 }
 
-MVT MipsTargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const {
   return MVT::i32;
 }
 
@@ -170,16 +180,13 @@ LowerOperation(SDValue Op, SelectionDAG &DAG)
   {
     case ISD::AND:                return LowerANDOR(Op, DAG);
     case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
-    case ISD::CALL:               return LowerCALL(Op, DAG);
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
     case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
-    case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
     case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
     case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
     case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
     case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
     case ISD::OR:                 return LowerANDOR(Op, DAG);
-    case ISD::RET:                return LowerRET(Op, DAG);
     case ISD::SELECT:             return LowerSELECT(Op, DAG);
     case ISD::SETCC:              return LowerSETCC(Op, DAG);
   }
@@ -202,37 +209,6 @@ AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
   return VReg;
 }
 
-// A address must be loaded from a small section if its size is less than the 
-// small section size threshold. Data in this section must be addressed using 
-// gp_rel operator.
-bool MipsTargetLowering::IsInSmallSection(unsigned Size) {
-  return (Size > 0 && (Size <= Subtarget->getSSectionThreshold()));
-}
-
-// Discover if this global address can be placed into small data/bss section. 
-bool MipsTargetLowering::IsGlobalInSmallSection(GlobalValue *GV)
-{
-  const TargetData *TD = getTargetData();
-  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
-
-  if (!GVA)
-    return false;
-  
-  const Type *Ty = GV->getType()->getElementType();
-  unsigned Size = TD->getTypeAllocSize(Ty);
-
-  // if this is a internal constant string, there is a special
-  // section for it, but not in small data/bss.
-  if (GVA->hasInitializer() && GV->hasLocalLinkage()) {
-    Constant *C = GVA->getInitializer();
-    const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
-    if (CVA && CVA->isCString()) 
-      return false;
-  }
-
-  return IsInSmallSection(Size);
-}
-
 // Get fp branch code (not opcode) from condition code.
 static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
   if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
@@ -247,7 +223,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
 static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
   switch(BC) {
     default:
-      assert(0 && "Unknown branch code");
+      llvm_unreachable("Unknown branch code");
     case Mips::BRANCH_T  : return Mips::BC1T;
     case Mips::BRANCH_F  : return Mips::BC1F;
     case Mips::BRANCH_TL : return Mips::BC1TL;
@@ -257,7 +233,7 @@ static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
 
 static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Unknown fp condition code!");
+  default: llvm_unreachable("Unknown fp condition code!");
   case ISD::SETEQ:  
   case ISD::SETOEQ: return Mips::FCOND_EQ;
   case ISD::SETUNE: return Mips::FCOND_OGL;
@@ -283,7 +259,8 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
 
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                MachineBasicBlock *BB) const {
+                                                MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   bool isFPCmp = false;
   DebugLoc dl = MI->getDebugLoc();
@@ -331,9 +308,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
+    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-        e = BB->succ_end(); i != e; ++i)
+          e = BB->succ_end(); i != e; ++i) {
+      EM->insert(std::make_pair(*i, sinkMBB));
       sinkMBB->addSuccessor(*i);
+    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while(!BB->succ_empty())
@@ -508,29 +488,34 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG)
                      Cond, True, False, CCNode);
 }
 
-SDValue MipsTargetLowering::
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) 
-{
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
 
-  if (!Subtarget->hasABICall()) {
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
     SDVTList VTs = DAG.getVTList(MVT::i32);
-    SDValue Ops[] = { GA };
+    
+    MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
+    
     // %gp_rel relocation
-    if (!isa<Function>(GV) && IsGlobalInSmallSection(GV)) { 
-      SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, Ops, 1);
+    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { 
+      SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, 
+                                              MipsII::MO_GPREL);
+      SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
       SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
       return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); 
     }
     // %hi/%lo relocation
-    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1);
+    SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+                                            MipsII::MO_ABS_HILO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
     return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
 
-  } else { // Abicall relocations, TODO: make this cleaner.
+  } else {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+                                            MipsII::MO_GOT);
     SDValue ResNode = DAG.getLoad(MVT::i32, dl, 
                                   DAG.getEntryNode(), GA, NULL, 0);
     // On functions and global targets not internal linked only
@@ -541,14 +526,14 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
     return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
   }
 
-  assert(0 && "Dont know how to handle GlobalAddress");
+  llvm_unreachable("Dont know how to handle GlobalAddress");
   return SDValue(0,0);
 }
 
 SDValue MipsTargetLowering::
 LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
 {
-  assert(0 && "TLS not implemented for MIPS.");
+  llvm_unreachable("TLS not implemented for MIPS.");
   return SDValue(); // Not reached
 }
 
@@ -559,15 +544,17 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
   SDValue HiPart; 
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO;
 
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
-    SDVTList VTs = DAG.getVTList(MVT::i32);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+
+  if (IsPIC) {
     SDValue Ops[] = { JTI };
-    HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1);
+    HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
   } else // Emit Load from Global Pointer
     HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0);
 
@@ -583,7 +570,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   SDValue ResNode;
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   Constant *C = N->getConstVal();
-  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
+                                         MipsII::MO_ABS_HILO);
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
 
@@ -592,8 +580,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   // but the asm printer currently doens't support this feature without
   // hacking it. This feature should come soon so we can uncomment the 
   // stuff below.
-  //if (!Subtarget->hasABICall() &&  
-  //    IsInSmallSection(getTargetData()->getTypeAllocSize(C->getType()))) {
+  //if (IsInSmallSection(C->getType())) {
   //  SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
   //  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
   //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); 
@@ -608,13 +595,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
 
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
-//
-//  The lower operations present on calling convention works on this order:
-//      LowerCALL (virt regs --> phys regs, virt regs --> stack) 
-//      LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
-//      LowerRET (virt regs --> phys regs)
-//      LowerCALL (phys regs --> virt regs)
-//
 //===----------------------------------------------------------------------===//
 
 #include "MipsGenCallingConv.inc"
@@ -632,8 +612,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
 //       go to stack.
 //===----------------------------------------------------------------------===//
 
-static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
-                       MVT LocVT, CCValAssign::LocInfo LocInfo,
+static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
+                       EVT LocVT, CCValAssign::LocInfo LocInfo,
                        ISD::ArgFlagsTy ArgFlags, CCState &State) {
 
   static const unsigned IntRegsSize=4, FloatRegsSize=2; 
@@ -699,38 +679,38 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
 }
 
 //===----------------------------------------------------------------------===//
-//                  CALL Calling Convention Implementation
+//                  Call Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-/// LowerCALL - functions arguments are copied from virtual regs to 
+/// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 /// TODO: isVarArg, isTailCall.
-SDValue MipsTargetLowering::
-LowerCALL(SDValue Op, SelectionDAG &DAG)
-{
-  MachineFunction &MF = DAG.getMachineFunction();
-
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain = TheCall->getChain();
-  SDValue Callee = TheCall->getCallee();
-  bool isVarArg = TheCall->isVarArg();
-  unsigned CC = TheCall->getCallingConv();
-  DebugLoc dl = TheCall->getDebugLoc();
+SDValue
+MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              bool isTailCall,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) {
 
+  MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
 
   // To meet O32 ABI, Mips must always allocate 16 bytes on
   // the stack (even if less than 4 are used as arguments)
   if (Subtarget->isABI_O32()) {
-    int VTsize = MVT(MVT::i32).getSizeInBits()/8;
+    int VTsize = EVT(MVT::i32).getSizeInBits()/8;
     MFI->CreateFixedObject(VTsize, (VTsize*3));
-    CCInfo.AnalyzeCallOperands(TheCall, CC_MipsO32);
+    CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
   } else
-    CCInfo.AnalyzeCallOperands(TheCall, CC_Mips);
+    CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
   
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -747,12 +727,12 @@ LowerCALL(SDValue Op, SelectionDAG &DAG)
 
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    SDValue Arg = TheCall->getArg(i);
+    SDValue Arg = Outs[i].Val;
     CCValAssign &VA = ArgLocs[i];
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: 
       if (Subtarget->isABI_O32() && VA.isRegLoc()) {
         if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
@@ -825,10 +805,13 @@ LowerCALL(SDValue Op, SelectionDAG &DAG)
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 
   // node so that legalize doesn't hack it. 
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), 
+                                getPointerTy(), 0, OpFlag);
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), 
+                                getPointerTy(), OpFlag);
 
   // MipsJmpLink = #chain, #target_address, #opt_in_flags...
   //             = Chain, Callee, Reg#1, Reg#2, ...  
@@ -859,7 +842,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG)
   // Create a stack location to hold GP when PIC is used. This stack 
   // location is used on function prologue to save GP and also after all 
   // emited CALL's to restore GP. 
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+  if (IsPIC) {
       // Function can have an arbitrary number of calls, so 
       // hold the LastArgStackLoc with the biggest offset.
       int FI;
@@ -887,75 +870,69 @@ LowerCALL(SDValue Op, SelectionDAG &DAG)
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), Op.getResNo());
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
 }
 
-/// LowerCallResult - Lower the result values of an ISD::CALL into the
-/// appropriate copies out of appropriate physical registers.  This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-/// being lowered. Returns a SDNode with the same number of values as the 
-/// ISD::CALL.
-SDNode *MipsTargetLowering::
-LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, 
-        unsigned CallingConv, SelectionDAG &DAG) {
-  
-  bool isVarArg = TheCall->isVarArg();
-  DebugLoc dl = TheCall->getDebugLoc();
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
 
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_Mips);
-  SmallVector<SDValue, 8> ResultVals;
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
-                                 RVLocs[i].getValVT(), InFlag).getValue(1);
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
-    ResultVals.push_back(Chain.getValue(0));
+    InVals.push_back(Chain.getValue(0));
   }
-  
-  ResultVals.push_back(Chain);
 
-  // Merge everything together with a MERGE_VALUES node.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                     &ResultVals[0], ResultVals.size()).getNode();
+  return Chain;
 }
 
 //===----------------------------------------------------------------------===//
-//             FORMAL_ARGUMENTS Calling Convention Implementation
+//             Formal Arguments Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// LowerFormalArguments - transform physical registers into
 /// virtual registers and generate load operations for
 /// arguments places on the stack.
 /// TODO: isVarArg
-SDValue MipsTargetLowering::
-LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) 
-{
-  SDValue Root = Op.getOperand(0);
+SDValue
+MipsTargetLowering::LowerFormalArguments(SDValue Chain,
+                                         CallingConv::ID CallConv, bool isVarArg,
+                                         const SmallVectorImpl<ISD::InputArg>
+                                           &Ins,
+                                         DebugLoc dl, SelectionDAG &DAG,
+                                         SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  DebugLoc dl = Op.getDebugLoc();
-
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
 
   unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
 
   if (Subtarget->isABI_O32())
-    CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MipsO32);
+    CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
   else
-    CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_Mips);
+    CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
 
-  SmallVector<SDValue, 16> ArgValues;
   SDValue StackPtr;
 
   unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
@@ -965,7 +942,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
 
     // Arguments stored on registers
     if (VA.isRegLoc()) {
-      MVT RegVT = VA.getLocVT();
+      EVT RegVT = VA.getLocVT();
       TargetRegisterClass *RC = 0;
 
       if (RegVT == MVT::i32)
@@ -976,12 +953,12 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
         if (!Subtarget->isSingleFloat()) 
           RC = Mips::AFGR64RegisterClass;
       } else  
-        assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+        llvm_unreachable("RegVT not supported by LowerFormalArguments Lowering");
 
       // Transform the arguments stored on 
       // physical registers into virtual ones
       unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
       
       // If this is an 8 or 16-bit value, it has been passed promoted 
       // to 32 bits.  Insert an assert[sz]ext to capture this, then 
@@ -1005,14 +982,14 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
         if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
           unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), 
                                     VA.getLocReg()+1, RC);
-          SDValue ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg2, RegVT);
+          SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
           SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
           SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2);
           ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi);
         }
       }
 
-      ArgValues.push_back(ArgValue);
+      InVals.push_back(ArgValue);
 
       // To meet ABI, when VARARGS are passed on registers, the registers
       // must have their values written to the caller stack frame. 
@@ -1034,7 +1011,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
       
         // emit ISD::STORE whichs stores the 
         // parameter value to a stack Location
-        ArgValues.push_back(DAG.getStore(Root, dl, ArgValue, PtrOff, NULL, 0));
+        InVals.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0));
       }
 
     } else { // VA.isRegLoc()
@@ -1057,7 +1034,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
     }
   }
 
@@ -1070,36 +1047,33 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
       Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
       MipsFI->setSRetReturnReg(Reg);
     }
-    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
-    Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   }
 
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+  return Chain;
 }
 
 //===----------------------------------------------------------------------===//
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-SDValue MipsTargetLowering::
-LowerRET(SDValue Op, SelectionDAG &DAG)
-{
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                DebugLoc dl, SelectionDAG &DAG) {
+
   // CCValAssign - represent the assignment of
   // the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC   = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
 
-  // Analize return values of ISD::RET
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Mips);
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
 
   // If this is the first return lowered for this function, add 
   // the regs to the liveout set for the function.
@@ -1109,8 +1083,6 @@ LowerRET(SDValue Op, SelectionDAG &DAG)
         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
-  // The chain is always operand #0
-  SDValue Chain = Op.getOperand(0);
   SDValue Flag;
 
   // Copy the result values into the output registers.
@@ -1118,10 +1090,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG)
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    // ISD::RET => ret chain, (regnum1,val1), ...
-    // So i*2+1 index only the regnums
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
-                             Op.getOperand(i*2+1), Flag);
+                             Outs[i].Val, Flag);
 
     // guarantee that all emitted copies are
     // stuck together, avoiding something bad
@@ -1138,7 +1108,7 @@ LowerRET(SDValue Op, SelectionDAG &DAG)
     unsigned Reg = MipsFI->getSRetReturnReg();
 
     if (!Reg) 
-      assert(0 && "sret virtual register not created in the entry block");
+      llvm_unreachable("sret virtual register not created in the entry block");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
     Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
@@ -1188,7 +1158,7 @@ getConstraintType(const std::string &Constraint) const
 /// return a list of registers that can be used to satisfy the constraint.
 /// This should only be used for C_RegisterClass constraints.
 std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
 {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -1210,7 +1180,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
 /// pointer.
 std::vector<unsigned> MipsTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const
+                                  EVT VT) const
 {
   if (Constraint.size() != 1)
     return std::vector<unsigned>();
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 9ad4895ce6e5..dddba4291d28 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -66,8 +66,8 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
   //===--------------------------------------------------------------------===//
-  class MipsTargetLowering : public TargetLowering 
-  {
+  
+  class MipsTargetLowering : public TargetLowering  {
   public:
 
     explicit MipsTargetLowering(MipsTargetMachine &TM);
@@ -80,7 +80,7 @@ namespace llvm {
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     /// getSetCCResultType - get the ISD::SETCC result ValueType
-    MVT getSetCCResultType(MVT VT) const;
+    MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
@@ -88,40 +88,62 @@ namespace llvm {
     // Subtarget Info
     const MipsSubtarget *Subtarget;
 
+
     // Lower Operand helpers
-    SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
-                            unsigned CallingConv, SelectionDAG &DAG);
-    bool IsGlobalInSmallSection(GlobalValue *GV); 
-    bool IsInSmallSection(unsigned Size); 
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
 
     // Lower Operand specifics
     SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG);
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
 
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
     std::pair<unsigned, const TargetRegisterClass*> 
               getRegForInlineAsmConstraint(const std::string &Constraint,
-              MVT VT) const;
+              EVT VT) const;
 
     std::vector<unsigned>
     getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              MVT VT) const;
+              EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
   };
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index e16fd8e400c5..91599043cb2c 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "MipsGenInstrInfo.inc"
 
 using namespace llvm;
@@ -208,29 +209,6 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
           .addImm(0).addFrameIndex(FI);
 }
 
-void MipsInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-  bool isKill, SmallVectorImpl<MachineOperand> &Addr, 
-  const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const 
-{
-  unsigned Opc;
-  if (RC == Mips::CPURegsRegisterClass) 
-    Opc = Mips::SW;
-  else if (RC == Mips::FGR32RegisterClass)
-    Opc = Mips::SWC1;
-  else {
-    assert(RC == Mips::AFGR64RegisterClass);
-    Opc = Mips::SDC1;
-  }
-  
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
-    .addReg(SrcReg, getKillRegState(isKill));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
-
 void MipsInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
@@ -251,28 +229,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI);
 }
 
-void MipsInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                    SmallVectorImpl<MachineOperand> &Addr,
-                                    const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  unsigned Opc;
-  if (RC == Mips::CPURegsRegisterClass) 
-    Opc = Mips::LW;
-  else if (RC == Mips::FGR32RegisterClass)
-    Opc = Mips::LWC1;
-  else {
-    assert(RC == Mips::AFGR64RegisterClass);
-    Opc = Mips::LDC1;
-  }
-
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
-
 MachineInstr *MipsInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF,
                       MachineInstr* MI,
@@ -372,7 +328,7 @@ static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc)
 unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) 
 {
   switch (CC) {
-  default: assert(0 && "Illegal condition code!");
+  default: llvm_unreachable("Illegal condition code!");
   case Mips::COND_E   : return Mips::BEQ;
   case Mips::COND_NE  : return Mips::BNE;
   case Mips::COND_GZ  : return Mips::BGTZ;
@@ -421,7 +377,7 @@ unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC)
 Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC) 
 {
   switch (CC) {
-  default: assert(0 && "Illegal condition code!");
+  default: llvm_unreachable("Illegal condition code!");
   case Mips::COND_E   : return Mips::COND_NE;
   case Mips::COND_NE  : return Mips::COND_E;
   case Mips::COND_GZ  : return Mips::COND_LEZ;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 6655c6749fdf..249d3de3b700 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,6 +15,7 @@
 #define MIPSINSTRUCTIONINFO_H
 
 #include "Mips.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MipsRegisterInfo.h"
 
@@ -92,7 +93,7 @@ namespace Mips {
   inline static const char *MipsFCCToString(Mips::CondCode CC) 
   {
     switch (CC) {
-      default: assert(0 && "Unknown condition code");
+      default: llvm_unreachable("Unknown condition code");
       case FCOND_F:
       case FCOND_T:   return "f";
       case FCOND_UN:
@@ -129,6 +130,38 @@ namespace Mips {
   }
 }
 
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // Mips Specific MachineOperand flags.
+ 
+    MO_NO_FLAG,
+
+    /// MO_GOT - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at 
+    /// which the address of a call site relocation entry symbol resides 
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used 
+    /// for the relocatable object file being produced.
+    MO_GPREL,
+
+    /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+    /// address. 
+    MO_ABS_HILO
+
+  };
+}
+
 class MipsInstrInfo : public TargetInstrInfoImpl {
   MipsTargetMachine &TM;
   const MipsRegisterInfo RI;
@@ -182,21 +215,11 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC) const;
 
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
 
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
                                            const SmallVectorImpl<unsigned> &Ops,
diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp
new file mode 100644
index 000000000000..60ef1c9e4fef
--- /dev/null
+++ b/lib/Target/Mips/MipsMCAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCAsmInfo.h"
+using namespace llvm;
+
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) {
+  AlignmentIsInBytes          = false;
+  COMMDirectiveTakesAlignment = true;
+  Data16bitsDirective         = "\t.half\t";
+  Data32bitsDirective         = "\t.word\t";
+  Data64bitsDirective         = 0;
+  PrivateGlobalPrefix         = "$";
+  CommentString               = "#";
+  ZeroDirective               = "\t.space\t";
+  PICJumpTableDirective       = "\t.gpword\t";
+}
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h
new file mode 100644
index 000000000000..33a4b5edb258
--- /dev/null
+++ b/lib/Target/Mips/MipsMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+  
+  class MipsMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit MipsMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index ac3cdfd38e16..949c78aebc93 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -57,7 +57,7 @@ private:
   /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
   MipsFIHolder GPHolder;
 
-  /// On LowerFORMAL_ARGUMENTS the stack size is unknown, so the Stack 
+  /// On LowerFormalArguments the stack size is unknown, so the Stack
   /// Pointer Offset calculation of "not in register arguments" must be 
   /// postponed to emitPrologue. 
   SmallVector<MipsFIHolder, 16> FnLoadArgs;
@@ -65,7 +65,7 @@ private:
 
   // When VarArgs, we must write registers back to caller stack, preserving 
   // on register arguments. Since the stack size is unknown on 
-  // LowerFORMAL_ARGUMENTS, the Stack Pointer Offset calculation must be
+  // LowerFormalArguments, the Stack Pointer Offset calculation must be
   // postponed to emitPrologue. 
   SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
   bool HasStoreVarArgs;
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 579d4db6422f..d2289e9cdbaa 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -31,6 +31,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 
@@ -79,12 +81,12 @@ getRegisterNumbering(unsigned RegEnum)
     case Mips::SP   : case Mips::F29: return 29;
     case Mips::FP   : case Mips::F30: case Mips::D15: return 30;
     case Mips::RA   : case Mips::F31: return 31;
-    default: assert(0 && "Unknown register number!");
+    default: llvm_unreachable("Unknown register number!");
   }    
   return 0; // Not reached
 }
 
-unsigned MipsRegisterInfo::getPICCallReg(void) { return Mips::T9; }
+unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 
 //===----------------------------------------------------------------------===//
 // Callee Saved Registers methods 
@@ -210,7 +212,7 @@ getReservedRegs(const MachineFunction &MF) const
 //   The emitted instruction will be something like:
 //     lw REGX, 16+StackSize(SP)
 //
-// Since the total stack size is unknown on LowerFORMAL_ARGUMENTS, all
+// Since the total stack size is unknown on LowerFormalArguments, all
 // stack references (ObjectOffset) created to reference the function 
 // arguments, are negative numbers. This way, on eliminateFrameIndex it's
 // possible to detect those references and the offsets are adjusted to
@@ -232,7 +234,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
   int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
 
   // Replace the dummy '0' SPOffset by the negative offsets, as explained on 
-  // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid 
+  // LowerFormalArguments. Leaving '0' for while is necessary to avoid
   // the approach done by calculateFrameObjectOffsets to the stack frame.
   MipsFI->adjustLoadArgsFI(MFI);
   MipsFI->adjustStoreVarArgsFI(MFI); 
@@ -346,9 +348,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 // FrameIndex represent objects inside a abstract stack.
 // We must replace FrameIndex with an stack/frame pointer
 // direct reference.
-void MipsRegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, 
-                    RegScavenger *RS) const 
+unsigned MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                    int *Value, RegScavenger *RS) const
 {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
@@ -360,34 +362,27 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
            "Instr doesn't have FrameIndex operand!");
   }
 
-  #ifndef NDEBUG
-  DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n";
-  DOUT << "<--------->\n";
-  MI.print(DOUT);
-  #endif
+  DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        errs() << "<--------->\n" << MI);
 
   int FrameIndex = MI.getOperand(i).getIndex();
   int stackSize  = MF.getFrameInfo()->getStackSize();
   int spOffset   = MF.getFrameInfo()->getObjectOffset(FrameIndex);
 
-  #ifndef NDEBUG
-  DOUT << "FrameIndex : " << FrameIndex << "\n";
-  DOUT << "spOffset   : " << spOffset << "\n";
-  DOUT << "stackSize  : " << stackSize << "\n";
-  #endif
+  DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+               << "spOffset   : " << spOffset << "\n"
+               << "stackSize  : " << stackSize << "\n");
 
-  // as explained on LowerFORMAL_ARGUMENTS, detect negative offsets 
+  // as explained on LowerFormalArguments, detect negative offsets
   // and adjust SPOffsets considering the final stack size.
   int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset));
   Offset    += MI.getOperand(i-1).getImm();
 
-  #ifndef NDEBUG
-  DOUT << "Offset     : " << Offset << "\n";
-  DOUT << "<--------->\n";
-  #endif
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
 
   MI.getOperand(i-1).ChangeToImmediate(Offset);
   MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+  return 0;
 }
 
 void MipsRegisterInfo::
@@ -515,19 +510,19 @@ getFrameRegister(MachineFunction &MF) const {
 
 unsigned MipsRegisterInfo::
 getEHExceptionRegister() const {
-  assert(0 && "What is the exception register");
+  llvm_unreachable("What is the exception register");
   return 0;
 }
 
 unsigned MipsRegisterInfo::
 getEHHandlerRegister() const {
-  assert(0 && "What is the exception handler register");
+  llvm_unreachable("What is the exception handler register");
   return 0;
 }
 
 int MipsRegisterInfo::
 getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  assert(0 && "What is the dwarf register number");
+  llvm_unreachable("What is the dwarf register number");
   return -1;
 }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 808e995b4ed3..122f786656b4 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -34,7 +34,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
   static unsigned getRegisterNumbering(unsigned RegEnum);
 
   /// Get PIC indirect call register
-  static unsigned getPICCallReg(void); 
+  static unsigned getPICCallReg();
 
   /// Adjust the Mips stack frame.
   void adjustMipsStackFrame(MachineFunction &MF) const;
@@ -54,8 +54,9 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
                                      MachineBasicBlock::iterator I) const;
 
   /// Stack Frame Processing Methods
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 4245f274f8f0..db114da00d73 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -14,37 +14,20 @@
 #include "MipsSubtarget.h"
 #include "Mips.h"
 #include "MipsGenSubtarget.inc"
-#include "llvm/Module.h"
-#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-static cl::opt<bool>
-NotABICall("disable-mips-abicall", cl::Hidden,
-           cl::desc("Disable code for SVR4-style dynamic objects"));
-static cl::opt<bool>
-AbsoluteCall("enable-mips-absolute-call", cl::Hidden,
-             cl::desc("Enable absolute call within abicall"));
-static cl::opt<unsigned>
-SSThreshold("mips-ssection-threshold", cl::Hidden,
-            cl::desc("Small data and bss section threshold size (default=8)"),
-            cl::init(8));
-
-MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M, 
-                             const std::string &FS, bool little) : 
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
+                             bool little) : 
   MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
-  IsFP64bit(false), IsGP64bit(false), HasVFPU(false), HasABICall(true), 
-  HasAbsoluteCall(false), IsLinux(true), HasSEInReg(false), HasCondMov(false),
-  HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false)
+  IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
+  HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
+  HasSwap(false), HasBitCount(false)
 {
   std::string CPU = "mips1";
   MipsArchVersion = Mips1;
 
   // Parse features string.
   ParseSubtargetFeatures(FS, CPU);
-  const std::string& TT = M.getTargetTriple();
-
-  // Small section size threshold
-  SSectionThreshold = SSThreshold;
 
   // Is the target system Linux ?
   if (TT.find("linux") == std::string::npos)
@@ -65,13 +48,4 @@ MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M,
     HasSwap = true;
     HasCondMov = true;
   }
-
-  // Abicall is the default for O32 ABI, but is disabled within EABI and in
-  // static code.
-  if (NotABICall || isABI_EABI() || (TM.getRelocationModel() == Reloc::Static))
-    HasABICall = false;
-
-  // TODO: disable when handling 64 bit symbols in the future.
-  if (HasABICall && AbsoluteCall)
-    HasAbsoluteCall = true;
 }
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 61c37c1d377e..1d6f87d8c063 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -20,7 +20,6 @@
 #include <string>
 
 namespace llvm {
-class Module;
 
 class MipsSubtarget : public TargetSubtarget {
 
@@ -58,20 +57,9 @@ protected:
   // HasVFPU - Processor has a vector floating point unit.
   bool HasVFPU;
 
-  // IsABICall - Enable SRV4 code for SVR4-style dynamic objects 
-  bool HasABICall;
-
-  // HasAbsoluteCall - Enable code that is not fully position-independent.
-  // Only works with HasABICall enabled.
-  bool HasAbsoluteCall;
-
   // isLinux - Target system is Linux. Is false we consider ELFOS for now.
   bool IsLinux;
 
-  // Put global and static items less than or equal to SSectionThreshold 
-  // bytes into the small data or bss section. The default is 8.
-  unsigned SSectionThreshold;
-
   /// Features related to the presence of specific instructions.
   
   // HasSEInReg - SEB and SEH (signext in register) instructions.
@@ -103,9 +91,8 @@ public:
   unsigned getTargetABI() const { return MipsABI; }
 
   /// This constructor initializes the data members to match that
-  /// of the specified module.
-  MipsSubtarget(const TargetMachine &TM, const Module &M, 
-                const std::string &FS, bool little);
+  /// of the specified triple.
+  MipsSubtarget(const std::string &TT, const std::string &FS, bool little);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -121,10 +108,7 @@ public:
   bool isSingleFloat() const { return IsSingleFloat; };
   bool isNotSingleFloat() const { return !IsSingleFloat; };
   bool hasVFPU() const { return HasVFPU; };
-  bool hasABICall() const { return HasABICall; };
-  bool hasAbsoluteCall() const { return HasAbsoluteCall; };
   bool isLinux() const { return IsLinux; };
-  unsigned getSSectionThreshold() const { return SSectionThreshold; }
 
   /// Features related to the presence of specific instructions.
   bool hasSEInReg()   const { return HasSEInReg; };
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 4675536ce2a5..4fa5450df138 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -12,35 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips.h"
-#include "MipsTargetAsmInfo.h"
+#include "MipsMCAsmInfo.h"
 #include "MipsTargetMachine.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-/// MipsTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int MipsTargetMachineModule;
-int MipsTargetMachineModule = 0;
-
-// Register the target.
-static RegisterTarget<MipsTargetMachine>    X("mips", "Mips");
-static RegisterTarget<MipselTargetMachine>  Y("mipsel", "Mipsel");
-
-MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
-
-
-// Force static initialization.
-extern "C" void LLVMInitializeMipsTarget() { }
-
-const TargetAsmInfo *MipsTargetMachine::
-createTargetAsmInfo() const 
-{
-  return new MipsTargetAsmInfo(*this);
+extern "C" void LLVMInitializeMipsTarget() {
+  // Register the target.
+  RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
+  RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+  RegisterAsmInfo<MipsMCAsmInfo> A(TheMipsTarget);
+  RegisterAsmInfo<MipsMCAsmInfo> B(TheMipselTarget);
 }
 
 // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -51,17 +34,22 @@ createTargetAsmInfo() const
 // an easier handling.
 // Using CodeModel::Large enables different CALL behavior.
 MipsTargetMachine::
-MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false):
-  Subtarget(*this, M, FS, isLittle), 
+MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
+                  bool isLittle=false):
+  LLVMTargetMachine(T, TT),
+  Subtarget(TT, FS, isLittle), 
   DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") :
                         std::string("E-p:32:32:32-i8:8:32-i16:16:32")), 
   InstrInfo(*this), 
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
-  TLInfo(*this) 
-{
+  TLInfo(*this) {
   // Abicall enables PIC by default
-  if (Subtarget.hasABICall())
-    setRelocationModel(Reloc::PIC_);  
+  if (getRelocationModel() == Reloc::Default) {
+    if (Subtarget.isABI_O32())
+      setRelocationModel(Reloc::PIC_);
+    else
+      setRelocationModel(Reloc::Static);
+  }
 
   // TODO: create an option to enable long calls, like -mlong-calls, 
   // that would be our CodeModel::Large. It must not work with Abicall.
@@ -70,43 +58,9 @@ MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false):
 }
 
 MipselTargetMachine::
-MipselTargetMachine(const Module &M, const std::string &FS) :
-  MipsTargetMachine(M, FS, true) {}
-
-// return 0 and must specify -march to gen MIPS code.
-unsigned MipsTargetMachine::
-getModuleMatchQuality(const Module &M) 
-{
-  // We strongly match "mips*-*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 5 && std::string(TT.begin(), TT.begin()+5) == "mips-")
-    return 20;
-  
-  if (TT.size() >= 13 && std::string(TT.begin(), 
-      TT.begin()+13) == "mipsallegrex-")
-    return 20;
-
-  return 0;
-}
-
-// return 0 and must specify -march to gen MIPSEL code.
-unsigned MipselTargetMachine::
-getModuleMatchQuality(const Module &M) 
-{
-  // We strongly match "mips*el-*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 7 && std::string(TT.begin(), TT.begin()+7) == "mipsel-")
-    return 20;
-
-  if (TT.size() >= 15 && std::string(TT.begin(), 
-      TT.begin()+15) == "mipsallegrexel-")
-    return 20;
-
-  if (TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "psp")
-    return 20;
-  
-  return 0;
-}
+MipselTargetMachine(const Target &T, const std::string &TT,
+                    const std::string &FS) :
+  MipsTargetMachine(T, TT, FS, true) {}
 
 // Install an instruction selector pass using 
 // the ISelDag to gen Mips code.
@@ -126,14 +80,3 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
   PM.add(createMipsDelaySlotFillerPass(*this));
   return true;
 }
-
-// Implements the AssemblyEmitter for the target. Must return
-// true if AssemblyEmitter is supported
-bool MipsTargetMachine::
-addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, 
-                   bool Verbose, raw_ostream &Out)  {
-  // Output assembly language.
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  PM.add(AsmPrinterCtor(Out, *this, Verbose));
-  return false;
-}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 95e5be40f751..c3428be48f59 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -22,7 +22,7 @@
 #include "llvm/Target/TargetFrameInfo.h"
 
 namespace llvm {
-  class raw_ostream;
+  class formatted_raw_ostream;
   
   class MipsTargetMachine : public LLVMTargetMachine {
     MipsSubtarget       Subtarget;
@@ -30,24 +30,9 @@ namespace llvm {
     MipsInstrInfo       InstrInfo;
     TargetFrameInfo     FrameInfo;
     MipsTargetLowering  TLInfo;
-  
-  protected:
-    virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  protected:
-    // To avoid having target depend on the asmprinter stuff libraries,
-    // asmprinter set this functions to ctor pointer at startup time if they are
-    // linked in.
-    typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                              MipsTargetMachine &tm,
-                                              bool verbose);
-    static AsmPrinterCtorFn AsmPrinterCtor;
-    
   public:
-    MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
-
-    static void registerAsmPrinter(AsmPrinterCtorFn F) {
-      AsmPrinterCtor = F;
-    }
+    MipsTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS, bool isLittle);
     
     virtual const MipsInstrInfo   *getInstrInfo()     const 
     { return &InstrInfo; }
@@ -66,25 +51,19 @@ namespace llvm {
       return const_cast<MipsTargetLowering*>(&TLInfo); 
     }
 
-    static unsigned getModuleMatchQuality(const Module &M);
-
     // Pass Pipeline Configuration
     virtual bool addInstSelector(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel);
     virtual bool addPreEmitPass(PassManagerBase &PM,
                                 CodeGenOpt::Level OptLevel);
-    virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    bool Verbose, raw_ostream &Out);
   };
 
 /// MipselTargetMachine - Mipsel target machine.
 ///
 class MipselTargetMachine : public MipsTargetMachine {
 public:
-  MipselTargetMachine(const Module &M, const std::string &FS);
-
-  static unsigned getModuleMatchQuality(const Module &M);
+  MipselTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
 };
 
 } // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
new file mode 100644
index 000000000000..85e9d65a32b1
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -0,0 +1,93 @@
+//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetObjectFile.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+            cl::desc("Small data and bss section threshold size (default=8)"),
+            cl::init(8));
+
+void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ 
+  SmallDataSection =
+    getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getDataRel());
+  
+  SmallBSSSection =
+    getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getBSS());
+  
+}
+
+// A address must be loaded from a small section if its size is less than the 
+// small section size threshold. Data in this section must be addressed using 
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= SSThreshold;
+}
+
+bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+                                                const TargetMachine &TM) const {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+  
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+  
+  // We can only do this for datarel or BSS objects for now.
+  if (!Kind.isBSS() && !Kind.isDataRel())
+    return false;
+  
+  // If this is a internal constant string, there is a special
+  // section for it, but not in small data/bss.
+  if (Kind.isMergeable1ByteCString())
+    return false;
+
+  const Type *Ty = GV->getType()->getElementType();
+  return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+
+
+const MCSection *MipsTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+  // sections?
+  
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+  
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
new file mode 100644
index 000000000000..32e0436f0c97
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+  class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSection *SmallDataSection;
+    const MCSection *SmallBSSSection;
+  public:
+    
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM, SectionKind Kind)const;
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;  
+    
+    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+      
+    // TODO: Classify globals as mips wishes.
+  };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..6e5d56ba4ae7
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsInfo
+  MipsTargetInfo.cpp
+  )
+
+add_dependencies(LLVMMipsInfo MipsCodeGenTable_gen)
diff --git a/lib/Target/Mips/TargetInfo/Makefile b/lib/Target/Mips/TargetInfo/Makefile
new file mode 100644
index 000000000000..32f4e1695b1d
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Mips/TargetInfo/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
new file mode 100644
index 000000000000..cc3d61e4e71d
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+
+extern "C" void LLVMInitializeMipsTargetInfo() { 
+  RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
+
+  RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
+}
diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..2e1b809b92d7
--- /dev/null
+++ b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMPIC16AsmPrinter
+  PIC16AsmPrinter.cpp
+  )
+add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen)
diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile
new file mode 100644
index 000000000000..f4db57e60716
--- /dev/null
+++ b/lib/Target/PIC16/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PIC16/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPIC16AsmPrinter
+
+# Hack: we need to include 'main' pic16 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
new file mode 100644
index 000000000000..3f415afc1090
--- /dev/null
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -0,0 +1,484 @@
+//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PIC16 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16AsmPrinter.h"
+#include "MCSectionPIC16.h"
+#include "PIC16MCAsmInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include <cstring>
+using namespace llvm;
+
+#include "PIC16GenAsmWriter.inc"
+
+PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                                 const MCAsmInfo *T, bool V)
+: AsmPrinter(O, TM, T, V), DbgInfo(O, T) {
+  PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering());
+  PMAI = static_cast<const PIC16MCAsmInfo*>(T);
+  PTOF = (PIC16TargetObjectFile*)&PTLI->getObjFileLowering();
+}
+
+bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  processDebugLoc(MI, true);
+  printInstruction(MI);
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
+  processDebugLoc(MI, false);
+  return true;
+}
+
+/// runOnMachineFunction - This emits the frame section, autos section and 
+/// assembly for each instruction. Also takes care of function begin debug
+/// directive and file begin debug directive (if required) for the function.
+///
+bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  this->MF = &MF;
+
+  // This calls the base class function required to be called at beginning
+  // of runOnMachineFunction.
+  SetupMachineFunction(MF);
+
+  // Get the mangled name.
+  const Function *F = MF.getFunction();
+  CurrentFnName = Mang->getMangledName(F);
+
+  // Emit the function frame (args and temps).
+  EmitFunctionFrame(MF);
+
+  DbgInfo.BeginFunction(MF);
+
+  // Emit the autos section of function.
+  EmitAutos(CurrentFnName);
+
+  // Now emit the instructions of function in its code section.
+  const MCSection *fCodeSection = 
+    getObjFileLowering().getSectionForFunction(CurrentFnName);
+  // Start the Code Section.
+  O <<  "\n";
+  OutStreamer.SwitchSection(fCodeSection);
+
+  // Emit the frame address of the function at the beginning of code.
+  O << "\tretlw  low(" << PAN::getFrameLabel(CurrentFnName) << ")\n";
+  O << "\tretlw  high(" << PAN::getFrameLabel(CurrentFnName) << ")\n";
+
+  // Emit function start label.
+  O << CurrentFnName << ":\n";
+
+  DebugLoc CurDL;
+  O << "\n"; 
+  // Print out code for the function.
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+
+    // Print a label for the basic block.
+    if (I != MF.begin()) {
+      EmitBasicBlockStart(I);
+    }
+    
+    // Print a basic block.
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+
+      // Emit the line directive if source line changed.
+      const DebugLoc DL = II->getDebugLoc();
+      if (!DL.isUnknown() && DL != CurDL) {
+        DbgInfo.ChangeDebugLoc(MF, DL);
+        CurDL = DL;
+      }
+        
+      // Print the assembly for the instruction.
+      printMachineInstruction(II);
+    }
+  }
+  
+  // Emit function end debug directives.
+  DbgInfo.EndFunction(MF);
+
+  return false;  // we didn't modify anything.
+}
+
+
+// printOperand - print operand of insn.
+void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+    case MachineOperand::MO_Register:
+      O << getRegisterName(MO.getReg());
+      return;
+
+    case MachineOperand::MO_Immediate:
+      O << (int)MO.getImm();
+      return;
+
+    case MachineOperand::MO_GlobalAddress: {
+      std::string Sname = Mang->getMangledName(MO.getGlobal());
+      // FIXME: currently we do not have a memcpy def coming in the module
+      // by any chance, as we do not link in those as .bc lib. So these calls
+      // are always external and it is safe to emit an extern.
+      if (PAN::isMemIntrinsic(Sname)) {
+        LibcallDecls.push_back(createESName(Sname));
+      }
+
+      O << Sname;
+      break;
+    }
+    case MachineOperand::MO_ExternalSymbol: {
+       const char *Sname = MO.getSymbolName();
+
+      // If its a libcall name, record it to decls section.
+      if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) {
+        LibcallDecls.push_back(Sname);
+      }
+
+      // Record a call to intrinsic to print the extern declaration for it.
+      std::string Sym = Sname;  
+      if (PAN::isMemIntrinsic(Sym)) {
+        Sym = PAN::addPrefix(Sym);
+        LibcallDecls.push_back(createESName(Sym));
+      }
+
+      O  << Sym;
+      break;
+    }
+    case MachineOperand::MO_MachineBasicBlock:
+      GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+      return;
+
+    default:
+      llvm_unreachable(" Operand type not supported.");
+  }
+}
+
+/// printCCOperand - Print the cond code operand.
+///
+void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+  int CC = (int)MI->getOperand(opNum).getImm();
+  O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
+}
+
+// This function is used to sort the decls list.
+// should return true if s1 should come before s2.
+static bool is_before(const char *s1, const char *s2) {
+  return strcmp(s1, s2) <= 0;
+}
+
+// This is used by list::unique below. 
+// unique will filter out duplicates if it knows them.
+static bool is_duplicate(const char *s1, const char *s2) {
+  return !strcmp(s1, s2);
+}
+
+/// printLibcallDecls - print the extern declarations for compiler 
+/// intrinsics.
+///
+void PIC16AsmPrinter::printLibcallDecls() {
+  // If no libcalls used, return.
+  if (LibcallDecls.empty()) return;
+
+  O << MAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n";
+  // Remove duplicate entries.
+  LibcallDecls.sort(is_before);
+  LibcallDecls.unique(is_duplicate);
+
+  for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); 
+       I != LibcallDecls.end(); I++) {
+    O << MAI->getExternDirective() << *I << "\n";
+    O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n";
+    O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n";
+  }
+  O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n";
+}
+
+/// doInitialization - Perform Module level initializations here.
+/// One task that we do here is to sectionize all global variables.
+/// The MemSelOptimizer pass depends on the sectionizing.
+///
+bool PIC16AsmPrinter::doInitialization(Module &M) {
+  bool Result = AsmPrinter::doInitialization(M);
+
+  // FIXME:: This is temporary solution to generate the include file.
+  // The processor should be passed to llc as in input and the header file
+  // should be generated accordingly.
+  O << "\n\t#include P16F1937.INC\n";
+
+  // Set the section names for all globals.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
+      const MCSection *S = getObjFileLowering().SectionForGlobal(I, Mang, TM);
+      
+      I->setSection(((const MCSectionPIC16*)S)->getName());
+    }
+
+  DbgInfo.BeginModule(M);
+  EmitFunctionDecls(M);
+  EmitUndefinedVars(M);
+  EmitDefinedVars(M);
+  EmitIData(M);
+  EmitUData(M);
+  EmitRomData(M);
+  return Result;
+}
+
+/// Emit extern decls for functions imported from other modules, and emit
+/// global declarations for function defined in this module and which are
+/// available to other modules.
+///
+void PIC16AsmPrinter::EmitFunctionDecls(Module &M) {
+ // Emit declarations for external functions.
+  O <<"\n"<<MAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
+    if (I->isIntrinsic())
+      continue;
+
+    std::string Name = Mang->getMangledName(I);
+    if (Name.compare("@abort") == 0)
+      continue;
+    
+    if (!I->isDeclaration() && !I->hasExternalLinkage())
+      continue;
+
+    // Do not emit memcpy, memset, and memmove here.
+    // Calls to these routines can be generated in two ways,
+    // 1. User calling the standard lib function
+    // 2. Codegen generating these calls for llvm intrinsics.
+    // In the first case a prototype is alread availale, while in
+    // second case the call is via and externalsym and the prototype is missing.
+    // So declarations for these are currently always getting printing by
+    // tracking both kind of references in printInstrunction.
+    if (I->isDeclaration() && PAN::isMemIntrinsic(Name)) continue;
+
+    const char *directive = I->isDeclaration() ? MAI->getExternDirective() :
+                                                 MAI->getGlobalDirective();
+      
+    O << directive << Name << "\n";
+    O << directive << PAN::getRetvalLabel(Name) << "\n";
+    O << directive << PAN::getArgsLabel(Name) << "\n";
+  }
+
+  O << MAI->getCommentString() << "Function Declarations - END." <<"\n";
+}
+
+// Emit variables imported from other Modules.
+void PIC16AsmPrinter::EmitUndefinedVars(Module &M) {
+  std::vector<const GlobalVariable*> Items = PTOF->ExternalVarDecls->Items;
+  if (!Items.size()) return;
+
+  O << "\n" << MAI->getCommentString() << "Imported Variables - BEGIN" << "\n";
+  for (unsigned j = 0; j < Items.size(); j++) {
+    O << MAI->getExternDirective() << Mang->getMangledName(Items[j]) << "\n";
+  }
+  O << MAI->getCommentString() << "Imported Variables - END" << "\n";
+}
+
+// Emit variables defined in this module and are available to other modules.
+void PIC16AsmPrinter::EmitDefinedVars(Module &M) {
+  std::vector<const GlobalVariable*> Items = PTOF->ExternalVarDefs->Items;
+  if (!Items.size()) return;
+
+  O << "\n" << MAI->getCommentString() << "Exported Variables - BEGIN" << "\n";
+  for (unsigned j = 0; j < Items.size(); j++) {
+    O << MAI->getGlobalDirective() << Mang->getMangledName(Items[j]) << "\n";
+  }
+  O <<  MAI->getCommentString() << "Exported Variables - END" << "\n";
+}
+
+// Emit initialized data placed in ROM.
+void PIC16AsmPrinter::EmitRomData(Module &M) {
+  // Print ROM Data section.
+  const std::vector<PIC16Section*> &ROSections = PTOF->ROSections;
+  for (unsigned i = 0; i < ROSections.size(); i++) {
+    const std::vector<const GlobalVariable*> &Items = ROSections[i]->Items;
+    if (!Items.size()) continue;
+    O << "\n";
+    OutStreamer.SwitchSection(PTOF->ROSections[i]->S_);
+    for (unsigned j = 0; j < Items.size(); j++) {
+      O << Mang->getMangledName(Items[j]);
+      Constant *C = Items[j]->getInitializer();
+      int AddrSpace = Items[j]->getType()->getAddressSpace();
+      EmitGlobalConstant(C, AddrSpace);
+    }
+  }
+}
+
+bool PIC16AsmPrinter::doFinalization(Module &M) {
+  printLibcallDecls();
+  EmitRemainingAutos();
+  DbgInfo.EndModule(M);
+  O << "\n\t" << "END\n";
+  return AsmPrinter::doFinalization(M);
+}
+
+void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  std::string FuncName = Mang->getMangledName(F);
+  const TargetData *TD = TM.getTargetData();
+  // Emit the data section name.
+  O << "\n"; 
+  
+  const MCSection *fPDataSection =
+    getObjFileLowering().getSectionForFunctionFrame(CurrentFnName);
+  OutStreamer.SwitchSection(fPDataSection);
+  
+  // Emit function frame label
+  O << PAN::getFrameLabel(CurrentFnName) << ":\n";
+
+  const Type *RetType = F->getReturnType();
+  unsigned RetSize = 0; 
+  if (RetType->getTypeID() != Type::VoidTyID) 
+    RetSize = TD->getTypeAllocSize(RetType);
+  
+  //Emit function return value space
+  // FIXME: Do not emit RetvalLable when retsize is zero. To do this
+  // we will need to avoid printing a global directive for Retval label
+  // in emitExternandGloblas.
+  if(RetSize > 0)
+     O << PAN::getRetvalLabel(CurrentFnName) << " RES " << RetSize << "\n";
+  else
+     O << PAN::getRetvalLabel(CurrentFnName) << ": \n";
+   
+  // Emit variable to hold the space for function arguments 
+  unsigned ArgSize = 0;
+  for (Function::const_arg_iterator argi = F->arg_begin(),
+           arge = F->arg_end(); argi != arge ; ++argi) {
+    const Type *Ty = argi->getType();
+    ArgSize += TD->getTypeAllocSize(Ty);
+   }
+
+  O << PAN::getArgsLabel(CurrentFnName) << " RES " << ArgSize << "\n";
+
+  // Emit temporary space
+  int TempSize = PTLI->GetTmpSize();
+  if (TempSize > 0)
+    O << PAN::getTempdataLabel(CurrentFnName) << " RES  " << TempSize << '\n';
+}
+
+void PIC16AsmPrinter::EmitIData(Module &M) {
+
+  // Print all IDATA sections.
+  const std::vector<PIC16Section*> &IDATASections = PTOF->IDATASections;
+  for (unsigned i = 0; i < IDATASections.size(); i++) {
+    O << "\n";
+    if (IDATASections[i]->S_->getName().find("llvm.") != std::string::npos)
+      continue;
+    OutStreamer.SwitchSection(IDATASections[i]->S_);
+    std::vector<const GlobalVariable*> Items = IDATASections[i]->Items;
+    for (unsigned j = 0; j < Items.size(); j++) {
+      std::string Name = Mang->getMangledName(Items[j]);
+      Constant *C = Items[j]->getInitializer();
+      int AddrSpace = Items[j]->getType()->getAddressSpace();
+      O << Name;
+      EmitGlobalConstant(C, AddrSpace);
+    }
+  }
+}
+
+void PIC16AsmPrinter::EmitUData(Module &M) {
+  const TargetData *TD = TM.getTargetData();
+
+  // Print all BSS sections.
+  const std::vector<PIC16Section*> &BSSSections = PTOF->BSSSections;
+  for (unsigned i = 0; i < BSSSections.size(); i++) {
+    O << "\n";
+    OutStreamer.SwitchSection(BSSSections[i]->S_);
+    std::vector<const GlobalVariable*> Items = BSSSections[i]->Items;
+    for (unsigned j = 0; j < Items.size(); j++) {
+      std::string Name = Mang->getMangledName(Items[j]);
+      Constant *C = Items[j]->getInitializer();
+      const Type *Ty = C->getType();
+      unsigned Size = TD->getTypeAllocSize(Ty);
+
+      O << Name << " RES " << Size << "\n";
+    }
+  }
+}
+
+void PIC16AsmPrinter::EmitAutos(std::string FunctName) {
+  // Section names for all globals are already set.
+  const TargetData *TD = TM.getTargetData();
+
+  // Now print Autos section for this function.
+  std::string SectionName = PAN::getAutosSectionName(FunctName);
+  const std::vector<PIC16Section*> &AutosSections = PTOF->AutosSections;
+  for (unsigned i = 0; i < AutosSections.size(); i++) {
+    O << "\n";
+    if (AutosSections[i]->S_->getName() == SectionName) { 
+      // Set the printing status to true
+      AutosSections[i]->setPrintedStatus(true);
+      OutStreamer.SwitchSection(AutosSections[i]->S_);
+      const std::vector<const GlobalVariable*> &Items = AutosSections[i]->Items;
+      for (unsigned j = 0; j < Items.size(); j++) {
+        std::string VarName = Mang->getMangledName(Items[j]);
+        Constant *C = Items[j]->getInitializer();
+        const Type *Ty = C->getType();
+        unsigned Size = TD->getTypeAllocSize(Ty);
+        // Emit memory reserve directive.
+        O << VarName << "  RES  " << Size << "\n";
+      }
+      break;
+    }
+  }
+}
+
+// Print autos that were not printed during the code printing of functions.
+// As the functions might themselves would have got deleted by the optimizer.
+void PIC16AsmPrinter::EmitRemainingAutos() {
+  const TargetData *TD = TM.getTargetData();
+
+  // Now print Autos section for this function.
+  std::vector <PIC16Section *>AutosSections = PTOF->AutosSections;
+  for (unsigned i = 0; i < AutosSections.size(); i++) {
+    
+    // if the section is already printed then don't print again
+    if (AutosSections[i]->isPrinted()) 
+      continue;
+
+    // Set status as printed
+    AutosSections[i]->setPrintedStatus(true);
+
+    O << "\n";
+    OutStreamer.SwitchSection(AutosSections[i]->S_);
+    const std::vector<const GlobalVariable*> &Items = AutosSections[i]->Items;
+    for (unsigned j = 0; j < Items.size(); j++) {
+      std::string VarName = Mang->getMangledName(Items[j]);
+      Constant *C = Items[j]->getInitializer();
+      const Type *Ty = C->getType();
+      unsigned Size = TD->getTypeAllocSize(Ty);
+      // Emit memory reserve directive.
+      O << VarName << "  RES  " << Size << "\n";
+    }
+  }
+}
+
+
+extern "C" void LLVMInitializePIC16AsmPrinter() { 
+  RegisterAsmPrinter<PIC16AsmPrinter> X(ThePIC16Target);
+}
+
+
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
new file mode 100644
index 000000000000..2dd4600b76c2
--- /dev/null
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -0,0 +1,80 @@
+//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PIC16 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16ASMPRINTER_H
+#define PIC16ASMPRINTER_H
+
+#include "PIC16.h"
+#include "PIC16TargetMachine.h"
+#include "PIC16DebugInfo.h"
+#include "PIC16MCAsmInfo.h"
+#include "PIC16TargetObjectFile.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include <list>
+#include <string>
+
+namespace llvm {
+  class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
+  public:
+    explicit PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                             const MCAsmInfo *T, bool V);
+  private:
+    virtual const char *getPassName() const {
+      return "PIC16 Assembly Printer";
+    }
+    
+    PIC16TargetObjectFile &getObjFileLowering() const {
+      return (PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
+    }
+
+    bool runOnMachineFunction(MachineFunction &F);
+    void printOperand(const MachineInstr *MI, int opNum);
+    void printCCOperand(const MachineInstr *MI, int opNum);
+    void printInstruction(const MachineInstr *MI); // definition autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
+    bool printMachineInstruction(const MachineInstr *MI);
+    void EmitFunctionDecls (Module &M);
+    void EmitUndefinedVars (Module &M);
+    void EmitDefinedVars (Module &M);
+    void EmitIData (Module &M);
+    void EmitUData (Module &M);
+    void EmitAutos (std::string FunctName);
+    void EmitRemainingAutos ();
+    void EmitRomData (Module &M);
+    void EmitFunctionFrame(MachineFunction &MF);
+    void printLibcallDecls();
+  protected:
+    bool doInitialization(Module &M);
+    bool doFinalization(Module &M);
+
+    /// PrintGlobalVariable - Emit the specified global variable and its
+    /// initializer to the output stream.
+    virtual void PrintGlobalVariable(const GlobalVariable *GV) {
+      // PIC16 doesn't use normal hooks for this.
+    }
+    
+  private:
+    PIC16TargetObjectFile *PTOF;
+    PIC16TargetLowering *PTLI;
+    PIC16DbgInfo DbgInfo;
+    const PIC16MCAsmInfo *PMAI;
+    std::list<const char *> LibcallDecls; // List of extern decls.
+  };
+} // end of namespace
+
+#endif
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
index 00d737af4c2e..0ee88f9cda6f 100644
--- a/lib/Target/PIC16/CMakeLists.txt
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -11,14 +11,14 @@ tablegen(PIC16GenCallingConv.inc -gen-callingconv)
 tablegen(PIC16GenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(PIC16
-  PIC16AsmPrinter.cpp
   PIC16DebugInfo.cpp
   PIC16InstrInfo.cpp
   PIC16ISelDAGToDAG.cpp
   PIC16ISelLowering.cpp
   PIC16MemSelOpt.cpp
+  PIC16MCAsmInfo.cpp
   PIC16RegisterInfo.cpp
   PIC16Subtarget.cpp
-  PIC16TargetAsmInfo.cpp
   PIC16TargetMachine.cpp
+  PIC16TargetObjectFile.cpp
   )
diff --git a/lib/Target/PIC16/MCSectionPIC16.h b/lib/Target/PIC16/MCSectionPIC16.h
new file mode 100644
index 000000000000..352be99d71c2
--- /dev/null
+++ b/lib/Target/PIC16/MCSectionPIC16.h
@@ -0,0 +1,88 @@
+//===- MCSectionPIC16.h - PIC16-specific section representation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionPIC16 class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PIC16SECTION_H
+#define LLVM_PIC16SECTION_H
+
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+
+  /// MCSectionPIC16 - Represents a physical section in PIC16 COFF.
+  /// Contains data objects.
+  ///
+  class MCSectionPIC16 : public MCSection {
+    /// Name of the section to uniquely identify it.
+    std::string Name;
+
+    /// User can specify an address at which a section should be placed. 
+    /// Negative value here means user hasn't specified any. 
+    int Address; 
+
+    /// Overlay information - Sections with same color can be overlaid on
+    /// one another.
+    int Color; 
+
+    /// Conatined data objects.
+    std::vector<const GlobalVariable *>Items;
+
+    /// Total size of all data objects contained here.
+    unsigned Size;
+    
+    MCSectionPIC16(const StringRef &name, SectionKind K, int addr, int color)
+      : MCSection(K), Name(name), Address(addr), Color(color) {
+    }
+    
+  public:
+    /// Return the name of the section.
+    const std::string &getName() const { return Name; }
+
+    /// Return the Address of the section.
+    int getAddress() const { return Address; }
+
+    /// Return the Color of the section.
+    int getColor() const { return Color; }
+
+    /// PIC16 Terminology for section kinds is as below.
+    /// UDATA - BSS
+    /// IDATA - initialized data (equiv to Metadata) 
+    /// ROMDATA - ReadOnly.
+    /// UDATA_OVR - Sections that can be overlaid. Section of such type is
+    ///             used to contain function autos an frame. We can think of
+    ///             it as equiv to llvm ThreadBSS)
+    /// So, let's have some convenience functions to Map PIC16 Section types 
+    /// to SectionKind just for the sake of better readability.
+    static SectionKind UDATA_Kind() { return SectionKind::getBSS(); } 
+    static SectionKind IDATA_Kind() { return SectionKind::getMetadata(); }
+    static SectionKind ROMDATA_Kind() { return SectionKind::getReadOnly(); }
+    static SectionKind UDATA_OVR_Kind() { return SectionKind::getThreadBSS(); }
+
+    // If we could just do getKind() == UDATA_Kind() ?
+    bool isUDATA_Kind() { return getKind().isBSS(); }
+    bool isIDATA_Kind() { return getKind().isMetadata(); }
+    bool isROMDATA_Kind() { return getKind().isMetadata(); }
+    bool isUDATA_OVR_Kind() { return getKind().isThreadBSS(); }
+
+    /// This would be the only way to create a section. 
+    static MCSectionPIC16 *Create(const StringRef &Name, SectionKind K, 
+                                  int Address, int Color, MCContext &Ctx);
+    
+    /// Override this as PIC16 has its own way of printing switching
+    /// to a section.
+    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                      raw_ostream &OS) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile
index c429324cc2d1..f913675da892 100644
--- a/lib/Target/PIC16/Makefile
+++ b/lib/Target/PIC16/Makefile
@@ -7,7 +7,7 @@
 # 
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../..
-LIBRARYNAME = LLVMPIC16
+LIBRARYNAME = LLVMPIC16CodeGen
 TARGET = PIC16
 
 # Make sure that tblgen is run, first thing.
@@ -17,5 +17,7 @@ BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \
 		PIC16GenDAGISel.inc PIC16GenCallingConv.inc \
 		PIC16GenSubtarget.inc
 
+DIRS = AsmPrinter TargetInfo
+
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index 7940648928a7..8a3704d7071e 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_TARGET_PIC16_H
 #define LLVM_TARGET_PIC16_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
-#include <iosfwd>
 #include <cassert>
 #include <sstream>
 #include <cstring>
@@ -26,7 +26,7 @@ namespace llvm {
   class PIC16TargetMachine;
   class FunctionPass;
   class MachineCodeEmitter;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
 namespace PIC16CC {
   enum CondCodes {
@@ -83,7 +83,7 @@ namespace PIC16CC {
     // initialized globals - @idata.<num>.#
     // Function frame - @<func>.frame_section.
     // Function autos - @<func>.autos_section.
-    // Declarations - @section.0
+    // Declarations - Enclosed in comments. No section for them.
     //----------------------------------------------------------
     
     // Tags used to mangle different names. 
@@ -221,17 +221,29 @@ namespace PIC16CC {
       return Func1 + tag + "# CODE";
     }
 
-    // udata and idata section names are generated by a given number.
+    // udata, romdata and idata section names are generated by a given number.
     // @udata.<num>.# 
-    static std::string getUdataSectionName(unsigned num) {
+    static std::string getUdataSectionName(unsigned num, 
+                                           std::string prefix = "") {
        std::ostringstream o;
-       o << getTagName(PREFIX_SYMBOL) << "udata." << num << ".# UDATA"; 
+       o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num 
+         << ".# UDATA"; 
        return o.str(); 
     }
 
-    static std::string getIdataSectionName(unsigned num) {
+    static std::string getRomdataSectionName(unsigned num,
+                                             std::string prefix = "") {
        std::ostringstream o;
-       o << getTagName(PREFIX_SYMBOL) << "idata." << num << ".# IDATA"; 
+       o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num 
+         << ".# ROMDATA";
+       return o.str();
+    }
+
+    static std::string getIdataSectionName(unsigned num,
+                                           std::string prefix = "") {
+       std::ostringstream o;
+       o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num 
+         << ".# IDATA"; 
        return o.str(); 
     }
 
@@ -242,6 +254,15 @@ namespace PIC16CC {
       return false;
     }
 
+    inline static bool isMemIntrinsic (const std::string &Name) {
+      if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 ||
+          Name.compare("@memmove") == 0) {
+        return true;
+      }
+      
+      return false;
+    }
+
     inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
       if (! isLocalName(Var)) return false;
 
@@ -295,7 +316,7 @@ namespace PIC16CC {
 
   inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
     switch (CC) {
-    default: assert(0 && "Unknown condition code");
+    default: llvm_unreachable("Unknown condition code");
     case PIC16CC::NE:  return "ne";
     case PIC16CC::EQ:   return "eq";
     case PIC16CC::LT:   return "lt";
@@ -311,7 +332,7 @@ namespace PIC16CC {
 
   inline static bool isSignedComparison(PIC16CC::CondCodes CC) {
     switch (CC) {
-    default: assert(0 && "Unknown condition code");
+    default: llvm_unreachable("Unknown condition code");
     case PIC16CC::NE:  
     case PIC16CC::EQ: 
     case PIC16CC::LT:
@@ -330,11 +351,12 @@ namespace PIC16CC {
 
 
   FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
-  FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS, 
-                                           PIC16TargetMachine &TM,
-                                           bool Verbose);
-  // Banksel optimzer pass.
+  // Banksel optimizer pass.
   FunctionPass *createPIC16MemSelOptimizerPass();
+
+  extern Target ThePIC16Target;
+  extern Target TheCooperTarget;
+  
 } // end namespace llvm;
 
 // Defines symbolic names for PIC16 registers.  This defines a mapping from
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 430058805ddc..961caedbf9ab 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -1,3 +1,4 @@
+
 //===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======//
 //
 //                     The LLVM Compiler Infrastructure
@@ -15,8 +16,10 @@
 #include "PIC16DebugInfo.h" 
 #include "llvm/GlobalVariable.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
 
 using namespace llvm;
 
@@ -25,11 +28,11 @@ using namespace llvm;
 void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
                                       bool &HasAux, int Aux[], 
                                       std::string &TagName) {
-  if (Ty.isBasicType(Ty.getTag())) 
+  if (Ty.isBasicType())
     PopulateBasicTypeInfo (Ty, TypeNo);
-  else if (Ty.isDerivedType(Ty.getTag())) 
+  else if (Ty.isDerivedType())
     PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
-  else if (Ty.isCompositeType(Ty.getTag())) 
+  else if (Ty.isCompositeType())
     PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
   else {
     TypeNo = PIC16Dbg::T_NULL;
@@ -41,8 +44,7 @@ void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
 /// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
 ///
 void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
-  std::string Name = "";
-  Ty.getName(Name);
+  std::string Name = Ty.getName();
   unsigned short BaseTy = GetTypeDebugNumber(Name);
   TypeNo = TypeNo << PIC16Dbg::S_BASIC;
   TypeNo = TypeNo | (0xffff & BaseTy);
@@ -67,7 +69,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
   
   // We also need to encode the the information about the base type of
   // pointer in TypeNo.
-  DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+  DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom();
   PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
 }
 
@@ -76,7 +78,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
                                           bool &HasAux, int Aux[],
                                           std::string &TagName) {
 
-  DICompositeType CTy = DICompositeType(Ty.getGV());
+  DICompositeType CTy = DICompositeType(Ty.getNode());
   DIArray Elements = CTy.getTypeArray();
   unsigned short size = 1;
   unsigned short Dimension[4]={0,0,0,0};
@@ -85,7 +87,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
     if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
       TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
       TypeNo = TypeNo | PIC16Dbg::DT_ARY;
-      DISubrange SubRange = DISubrange(Element.getGV());
+      DISubrange SubRange = DISubrange(Element.getNode());
       Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
       // Each dimension is represented by 2 bytes starting at byte 9.
       Aux[8+i*2+0] = Dimension[i];
@@ -108,16 +110,20 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
                                                   unsigned short &TypeNo,
                                                   bool &HasAux, int Aux[],
                                                   std::string &TagName) {
-  DICompositeType CTy = DICompositeType(Ty.getGV());
+  DICompositeType CTy = DICompositeType(Ty.getNode());
   TypeNo = TypeNo << PIC16Dbg::S_BASIC;
   if (Ty.getTag() == dwarf::DW_TAG_structure_type)
     TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
   else
     TypeNo = TypeNo | PIC16Dbg::T_UNION;
-  CTy.getName(TagName);
+  TagName = CTy.getName();
   // UniqueSuffix is .number where number is obtained from
   // llvm.dbg.composite<number>.
-  std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+  // FIXME: This will break when composite type is not represented by
+  // llvm.dbg.composite* global variable. Since we need to revisit 
+  // PIC16DebugInfo implementation anyways after the MDNodes based 
+  // framework is done, let us continue with the way it is.
+  std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18);
   TagName += UniqueSuffix;
   unsigned short size = CTy.getSizeInBits()/8;
   // 7th and 8th byte represent size.
@@ -200,12 +206,14 @@ short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
 /// required initializations.
 void PIC16DbgInfo::BeginModule(Module &M) {
   // Emit file directive for module.
-  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
-  if (CU) {
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(M);
+  if (DbgFinder.compile_unit_count() != 0) {
+    // FIXME : What if more then one CUs are present in a module ?
+    MDNode *CU = *DbgFinder.compile_unit_begin();
     EmitDebugDirectives = true;
     SwitchToCU(CU);
   }
-
   // Emit debug info for decls of composite types.
   EmitCompositeTypeDecls(M);
 }
@@ -233,10 +241,11 @@ void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
   
   // Retreive the first valid debug Loc and process it.
   const DebugLoc &DL = GetDebugLocForFunction(MF);
-  ChangeDebugLoc(MF, DL, true);
-
-  EmitFunctBeginDI(MF.getFunction());
-  
+  // Emit debug info only if valid debug info is available.
+  if (!DL.isUnknown()) {
+    ChangeDebugLoc(MF, DL, true);
+    EmitFunctBeginDI(MF.getFunction());
+  } 
   // Set current line to 0 so that.line directive is genearted after .bf.
   CurLine = 0;
 }
@@ -249,7 +258,7 @@ void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,
   if (! EmitDebugDirectives) return;
   assert (! DL.isUnknown()  && "can't change to invalid debug loc");
 
-  GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit;
+  MDNode *CU = MF.getDebugLocTuple(DL).Scope;
   unsigned line = MF.getDebugLocTuple(DL).Line;
 
   SwitchToCU(CU);
@@ -268,7 +277,10 @@ void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
 ///
 void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
   if (! EmitDebugDirectives) return;
-  EmitFunctEndDI(MF.getFunction(), CurLine);
+  const DebugLoc &DL = GetDebugLocForFunction(MF);
+  // Emit debug info only if valid debug info is available.
+  if (!DL.isUnknown())
+    EmitFunctEndDI(MF.getFunction(), CurLine);
 }
 
 /// EndModule - Emit .eof for end of module.
@@ -283,7 +295,7 @@ void PIC16DbgInfo::EndModule(Module &M) {
 /// composite type.
 /// 
 void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
-                                              std::string UniqueSuffix) { 
+                                              std::string SuffixNo) {
   unsigned long Value = 0;
   DIArray Elements = CTy.getTypeArray();
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
@@ -292,24 +304,22 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
     bool HasAux = false;
     int ElementAux[PIC16Dbg::AuxSize] = { 0 };
     std::string TagName = "";
-    std::string ElementName;
-    GlobalVariable *GV = Element.getGV();
-    DIDerivedType DITy(GV);
-    DITy.getName(ElementName);
+    DIDerivedType DITy(Element.getNode());
+    const char *ElementName = DITy.getName();
     unsigned short ElementSize = DITy.getSizeInBits()/8;
     // Get mangleddd name for this structure/union  element.
-    std::string MangMemName = ElementName + UniqueSuffix;
+    std::string MangMemName = ElementName + SuffixNo;
     PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
     short Class = 0;
     if( CTy.getTag() == dwarf::DW_TAG_union_type)
       Class = PIC16Dbg::C_MOU;
     else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
       Class = PIC16Dbg::C_MOS;
-    EmitSymbol(MangMemName, Class, TypeNo, Value);
+    EmitSymbol(MangMemName.c_str(), Class, TypeNo, Value);
     if (CTy.getTag() == dwarf::DW_TAG_structure_type)
       Value += ElementSize;
     if (HasAux)
-      EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName);
+      EmitAuxEntry(MangMemName.c_str(), ElementAux, PIC16Dbg::AuxSize, TagName);
   }
 }
 
@@ -317,48 +327,48 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
 /// and union declarations.
 ///
 void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
-  for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
-      E = M.getGlobalList().end(); I != E; I++) {
-    // Structures and union declaration's debug info has llvm.dbg.composite
-    // in its name.
-    if(I->getName().find("llvm.dbg.composite") != std::string::npos) {
-      GlobalVariable *GV = cast<GlobalVariable >(I);
-      DICompositeType CTy(GV);
-      if (CTy.getTag() == dwarf::DW_TAG_union_type ||
-          CTy.getTag() == dwarf::DW_TAG_structure_type ) {
-        std::string name;
-        CTy.getName(name);
-        std::string DIVar = I->getName();
-        // Get the number after llvm.dbg.composite and make UniqueSuffix from 
-        // it.
-        std::string UniqueSuffix = "." + DIVar.substr(18);
-        std::string MangledCTyName = name + UniqueSuffix;
-        unsigned short size = CTy.getSizeInBits()/8;
-        int Aux[PIC16Dbg::AuxSize] = {0};
-        // 7th and 8th byte represent size of structure/union.
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        // Emit .def for structure/union tag.
-        if( CTy.getTag() == dwarf::DW_TAG_union_type)
-          EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG);
-        else if  (CTy.getTag() == dwarf::DW_TAG_structure_type) 
-          EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG);
-
-        // Emit auxiliary debug information for structure/union tag. 
-        EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
-
-        // Emit members.
-        EmitCompositeTypeElements (CTy, UniqueSuffix);
-
-        // Emit mangled Symbol for end of structure/union.
-        std::string EOSSymbol = ".eos" + UniqueSuffix;
-        EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
-        EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName);
-      }
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(M);
+  for (DebugInfoFinder::iterator I = DbgFinder.type_begin(),
+         E = DbgFinder.type_end(); I != E; ++I) {
+    DICompositeType CTy(*I);
+    if (CTy.isNull())
+      continue;
+    if (CTy.getTag() == dwarf::DW_TAG_union_type ||
+        CTy.getTag() == dwarf::DW_TAG_structure_type ) {
+      const char *Name = CTy.getName();
+      // Get the number after llvm.dbg.composite and make UniqueSuffix from 
+      // it.
+      std::string DIVar = CTy.getNode()->getNameStr();
+      std::string UniqueSuffix = "." + DIVar.substr(18);
+      std::string MangledCTyName = Name + UniqueSuffix;
+      unsigned short size = CTy.getSizeInBits()/8;
+      int Aux[PIC16Dbg::AuxSize] = {0};
+      // 7th and 8th byte represent size of structure/union.
+      Aux[6] = size & 0xff;
+      Aux[7] = size >> 8;
+      // Emit .def for structure/union tag.
+      if( CTy.getTag() == dwarf::DW_TAG_union_type)
+        EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_UNTAG);
+      else if  (CTy.getTag() == dwarf::DW_TAG_structure_type) 
+        EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_STRTAG);
+      
+      // Emit auxiliary debug information for structure/union tag. 
+      EmitAuxEntry(MangledCTyName.c_str(), Aux, PIC16Dbg::AuxSize);
+      
+      // Emit members.
+      EmitCompositeTypeElements (CTy, UniqueSuffix);
+      
+      // Emit mangled Symbol for end of structure/union.
+      std::string EOSSymbol = ".eos" + UniqueSuffix;
+      EmitSymbol(EOSSymbol.c_str(), PIC16Dbg::C_EOS);
+      EmitAuxEntry(EOSSymbol.c_str(), Aux, PIC16Dbg::AuxSize, 
+                   MangledCTyName.c_str());
     }
   }
 }
 
+
 /// EmitFunctBeginDI - Emit .bf for function.
 ///
 void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
@@ -425,31 +435,26 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
 /// EmitVarDebugInfo - Emit debug information for all variables.
 ///
 void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
-  GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
-  if (!Root)
-    return;
-
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI) {
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI) {
-      DIGlobalVariable DIGV(cast<GlobalVariable>(*UUI));
-      DIType Ty = DIGV.getType();
-      unsigned short TypeNo = 0;
-      bool HasAux = false;
-      int Aux[PIC16Dbg::AuxSize] = { 0 };
-      std::string TagName = "";
-      std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
-      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
-      // Emit debug info only if type information is availaible.
-      if (TypeNo != PIC16Dbg::T_NULL) {
-        O << "\n\t.type " << VarName << ", " << TypeNo;
-        short ClassNo = getStorageClass(DIGV);
-        O << "\n\t.class " << VarName << ", " << ClassNo;
-        if (HasAux) 
-          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
-      }
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(M);
+  
+  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+         E = DbgFinder.global_variable_end(); I != E; ++I) {
+    DIGlobalVariable DIGV(*I);
+    DIType Ty = DIGV.getType();
+    unsigned short TypeNo = 0;
+    bool HasAux = false;
+    int Aux[PIC16Dbg::AuxSize] = { 0 };
+    std::string TagName = "";
+    std::string VarName = MAI->getGlobalPrefix()+DIGV.getGlobal()->getNameStr();
+    PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
+    // Emit debug info only if type information is availaible.
+    if (TypeNo != PIC16Dbg::T_NULL) {
+      O << "\n\t.type " << VarName << ", " << TypeNo;
+      short ClassNo = getStorageClass(DIGV);
+      O << "\n\t.class " << VarName << ", " << ClassNo;
+      if (HasAux) 
+        EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
     }
   }
   O << "\n";
@@ -457,12 +462,12 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
 
 /// SwitchToCU - Switch to a new compilation unit.
 ///
-void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) {
+void PIC16DbgInfo::SwitchToCU(MDNode *CU) {
   // Get the file path from CU.
   DICompileUnit cu(CU);
-  std::string DirName, FileName;
-  std::string FilePath = cu.getDirectory(DirName) + "/" + 
-                         cu.getFilename(FileName);
+  std::string DirName = cu.getDirectory();
+  std::string FileName = cu.getFilename();
+  std::string FilePath = DirName + "/" + FileName;
 
   // Nothing to do if source file is still same.
   if ( FilePath == CurFile ) return;
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index d126d851b50e..54e27c7c3377 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -16,8 +16,6 @@
 
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Module.h"
-#include "llvm/Target/TargetAsmInfo.h" 
-#include <map>
 
 namespace llvm {
   class MachineFunction;
@@ -90,11 +88,11 @@ namespace llvm {
     };
   }
 
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   class PIC16DbgInfo {
-    raw_ostream &O;
-    const TargetAsmInfo *TAI;
+    formatted_raw_ostream &O;
+    const MCAsmInfo *MAI;
     std::string CurFile;
     unsigned CurLine;
 
@@ -103,7 +101,8 @@ namespace llvm {
     bool EmitDebugDirectives;
 
   public:
-    PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
+    PIC16DbgInfo(formatted_raw_ostream &o, const MCAsmInfo *T)
+      : O(o), MAI(T) {
       CurFile = "";
       CurLine = 0;
       EmitDebugDirectives = false; 
@@ -118,7 +117,7 @@ namespace llvm {
 
 
     private:
-    void SwitchToCU (GlobalVariable *CU);
+    void SwitchToCU (MDNode *CU);
     void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
 
     void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
@@ -144,8 +143,7 @@ namespace llvm {
     short getStorageClass(DIGlobalVariable DIGV);
     void EmitFunctBeginDI(const Function *F);
     void EmitCompositeTypeDecls(Module &M);
-    void EmitCompositeTypeElements (DICompositeType CTy,
-                                    std::string UniqueSuffix);
+    void EmitCompositeTypeElements (DICompositeType CTy, std::string Suffix);
     void EmitFunctEndDI(const Function *F, unsigned Line);
     void EmitAuxEntry(const std::string VarName, int Aux[], 
                       int num = PIC16Dbg::AuxSize, std::string TagName = "");
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index 6c2b8ec9747a..cc57d12c9042 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -13,6 +13,8 @@
 
 #define DEBUG_TYPE "pic16-isel"
 
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "PIC16ISelDAGToDAG.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index 83abed3958a4..3a2f6b47b37e 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -31,7 +31,7 @@ class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
 
   /// PIC16Lowering - This object fully describes how to lower LLVM code to an
   /// PIC16-specific SelectionDAG.
-  PIC16TargetLowering PIC16Lowering;
+  PIC16TargetLowering &PIC16Lowering;
 
 public:
   explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : 
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 0d24f61c49a8..bf986b1354c5 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pic16-lower"
-
 #include "PIC16ISelLowering.h"
+#include "PIC16TargetObjectFile.h"
 #include "PIC16TargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 
 
 using namespace llvm;
@@ -30,7 +31,7 @@ using namespace llvm;
 static const char *getIntrinsicName(unsigned opcode) {
   std::string Basename;
   switch(opcode) {
-  default: assert (0 && "do not know intrinsic name");
+  default: llvm_unreachable("do not know intrinsic name");
   // Arithmetic Right shift for integer types.
   case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
   case RTLIB::SRA_I16: Basename = "sra.i16"; break;
@@ -114,22 +115,48 @@ static const char *getIntrinsicName(unsigned opcode) {
   std::string Fullname = prefix + tagname + Basename; 
 
   // The name has to live through program life.
-  char *tmp = new char[Fullname.size() + 1];
-  strcpy (tmp, Fullname.c_str());
-  
-  return tmp;
+  return createESName(Fullname);
+}
+
+// getStdLibCallName - Get the name for the standard library function.
+static const char *getStdLibCallName(unsigned opcode) {
+  std::string BaseName;
+  switch(opcode) {
+    case RTLIB::COS_F32: BaseName = "cos";
+      break;
+    case RTLIB::SIN_F32: BaseName = "sin";
+      break;
+    case RTLIB::MEMCPY: BaseName = "memcpy";
+      break;
+    case RTLIB::MEMSET: BaseName = "memset";
+      break;
+    case RTLIB::MEMMOVE: BaseName = "memmove";
+      break;
+    default: llvm_unreachable("do not know std lib call name");
+  }
+  std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
+  std::string LibCallName = prefix + BaseName;
+
+  // The name has to live through program life.
+  return createESName(LibCallName);
 }
 
 // PIC16TargetLowering Constructor.
 PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
-  : TargetLowering(TM), TmpSize(0) {
+  : TargetLowering(TM, new PIC16TargetObjectFile()), TmpSize(0) {
  
   Subtarget = &TM.getSubtarget<PIC16Subtarget>();
 
   addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
 
   setShiftAmountType(MVT::i8);
-  setShiftAmountFlavor(Extend);
+  
+  // Std lib call names
+  setLibcallName(RTLIB::COS_F32, getStdLibCallName(RTLIB::COS_F32));
+  setLibcallName(RTLIB::SIN_F32, getStdLibCallName(RTLIB::SIN_F32));
+  setLibcallName(RTLIB::MEMCPY, getStdLibCallName(RTLIB::MEMCPY));
+  setLibcallName(RTLIB::MEMSET, getStdLibCallName(RTLIB::MEMSET));
+  setLibcallName(RTLIB::MEMMOVE, getStdLibCallName(RTLIB::MEMMOVE));
 
   // SRA library call names
   setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8));
@@ -226,6 +253,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setOperationAction(ISD::STORE,  MVT::i8,  Legal);
   setOperationAction(ISD::STORE,  MVT::i16, Custom);
   setOperationAction(ISD::STORE,  MVT::i32, Custom);
+  setOperationAction(ISD::STORE,  MVT::i64, Custom);
 
   setOperationAction(ISD::ADDE,    MVT::i8,  Custom);
   setOperationAction(ISD::ADDC,    MVT::i8,  Custom);
@@ -240,46 +268,27 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setOperationAction(ISD::XOR,    MVT::i8,  Custom);
 
   setOperationAction(ISD::FrameIndex, MVT::i16, Custom);
-  setOperationAction(ISD::CALL,   MVT::i16, Custom);
-  setOperationAction(ISD::RET,    MVT::Other, Custom);
 
-  setOperationAction(ISD::MUL,    MVT::i8,  Custom); 
-  setOperationAction(ISD::MUL,    MVT::i16, Expand);
-  setOperationAction(ISD::MUL,    MVT::i32, Expand);
+  setOperationAction(ISD::MUL,    MVT::i8,  Custom);
 
   setOperationAction(ISD::SMUL_LOHI,    MVT::i8,  Expand);
-  setOperationAction(ISD::SMUL_LOHI,    MVT::i16, Expand);
-  setOperationAction(ISD::SMUL_LOHI,    MVT::i32, Expand);
   setOperationAction(ISD::UMUL_LOHI,    MVT::i8,  Expand);
-  setOperationAction(ISD::UMUL_LOHI,    MVT::i16, Expand);
-  setOperationAction(ISD::UMUL_LOHI,    MVT::i32, Expand);
   setOperationAction(ISD::MULHU,        MVT::i8, Expand);
-  setOperationAction(ISD::MULHU,        MVT::i16, Expand);
-  setOperationAction(ISD::MULHU,        MVT::i32, Expand);
   setOperationAction(ISD::MULHS,        MVT::i8, Expand);
-  setOperationAction(ISD::MULHS,        MVT::i16, Expand);
-  setOperationAction(ISD::MULHS,        MVT::i32, Expand);
 
   setOperationAction(ISD::SRA,    MVT::i8,  Custom);
-  setOperationAction(ISD::SRA,    MVT::i16, Expand);
-  setOperationAction(ISD::SRA,    MVT::i32, Expand);
   setOperationAction(ISD::SHL,    MVT::i8,  Custom);
-  setOperationAction(ISD::SHL,    MVT::i16, Expand);
-  setOperationAction(ISD::SHL,    MVT::i32, Expand);
   setOperationAction(ISD::SRL,    MVT::i8,  Custom);
-  setOperationAction(ISD::SRL,    MVT::i16, Expand);
-  setOperationAction(ISD::SRL,    MVT::i32, Expand);
+
+  setOperationAction(ISD::ROTL,    MVT::i8,  Expand);
+  setOperationAction(ISD::ROTR,    MVT::i8,  Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   // PIC16 does not support shift parts
-  setOperationAction(ISD::SRA_PARTS,    MVT::i8,  Expand);
-  setOperationAction(ISD::SRA_PARTS,    MVT::i16, Expand);
-  setOperationAction(ISD::SRA_PARTS,    MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS,    MVT::i8, Expand);
   setOperationAction(ISD::SHL_PARTS,    MVT::i8, Expand);
-  setOperationAction(ISD::SHL_PARTS,    MVT::i16, Expand);
-  setOperationAction(ISD::SHL_PARTS,    MVT::i32, Expand);
   setOperationAction(ISD::SRL_PARTS,    MVT::i8, Expand);
-  setOperationAction(ISD::SRL_PARTS,    MVT::i16, Expand);
-  setOperationAction(ISD::SRL_PARTS,    MVT::i32, Expand);
 
 
   // PIC16 does not have a SETCC, expand it to SELECT_CC.
@@ -356,7 +365,8 @@ static void PopulateResults(SDValue N, SmallVectorImpl<SDValue>&Results) {
     Results.push_back(N);
 }
 
-MVT PIC16TargetLowering::getSetCCResultType(MVT ValType) const {
+MVT::SimpleValueType
+PIC16TargetLowering::getSetCCResultType(EVT ValType) const {
   return MVT::i8;
 }
 
@@ -379,7 +389,7 @@ PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) {
 
 SDValue
 PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
-                                      MVT RetVT, const SDValue *Ops,
+                                      EVT RetVT, const SDValue *Ops,
                                       unsigned NumOps, bool isSigned,
                                       SelectionDAG &DAG, DebugLoc dl) {
 
@@ -389,17 +399,20 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0; i != NumOps; ++i) {
     Entry.Node = Ops[i];
-    Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
     Args.push_back(Entry);
   }
-  SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i8);
 
-   const Type *RetTy = RetVT.getTypeForMVT();
+  SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i16);
+
+   const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
    std::pair<SDValue,SDValue> CallInfo = 
      LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                 false, 0, CallingConv::C, false, Callee, Args, DAG, dl);
+                 false, 0, CallingConv::C, false,
+                 /*isReturnValueUsed=*/true,
+                 Callee, Args, DAG, dl);
 
   return CallInfo.first;
 }
@@ -429,6 +442,7 @@ const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PIC16ISD::SUBCC:            return "PIC16ISD::SUBCC";
   case PIC16ISD::SELECT_ICC:       return "PIC16ISD::SELECT_ICC";
   case PIC16ISD::BRCOND:           return "PIC16ISD::BRCOND";
+  case PIC16ISD::RET:              return "PIC16ISD::RET";
   case PIC16ISD::Dummy:            return "PIC16ISD::Dummy";
   }
 }
@@ -502,7 +516,7 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
   SDValue Chain = St->getChain();
   SDValue Src = St->getValue();
   SDValue Ptr = St->getBasePtr();
-  MVT ValueType = Src.getValueType();
+  EVT ValueType = Src.getValueType();
   unsigned StoreOffset = 0;
   DebugLoc dl = N->getDebugLoc();
 
@@ -519,6 +533,10 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
     SDValue SrcLo, SrcHi;
     GetExpandedParts(Src, DAG, SrcLo, SrcHi);
     SDValue ChainLo = Chain, ChainHi = Chain;
+    // FIXME: This makes unsafe assumptions. The Chain may be a TokenFactor
+    // created for an unrelated purpose, in which case it may not have
+    // exactly two operands. Also, even if it does have two operands, they
+    // may not be the low and high parts of an aligned load that was split.
     if (Chain.getOpcode() == ISD::TokenFactor) {
       ChainLo = Chain.getOperand(0);
       ChainHi = Chain.getOperand(1);
@@ -546,16 +564,19 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
     GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2);
 
     SDValue ChainLo = Chain, ChainHi = Chain;
+    // FIXME: This makes unsafe assumptions; see the FIXME above.
     if (Chain.getOpcode() == ISD::TokenFactor) {  
       ChainLo = Chain.getOperand(0);
       ChainHi = Chain.getOperand(1);
     }
     SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi,
             ChainHi2 = ChainHi;
+    // FIXME: This makes unsafe assumptions; see the FIXME above.
     if (ChainLo.getOpcode() == ISD::TokenFactor) {
       ChainLo1 = ChainLo.getOperand(0);
       ChainLo2 = ChainLo.getOperand(1);
     }
+    // FIXME: This makes unsafe assumptions; see the FIXME above.
     if (ChainHi.getOpcode() == ISD::TokenFactor) {
       ChainHi1 = ChainHi.getOperand(0);
       ChainHi2 = ChainHi.getOperand(1);
@@ -583,8 +604,26 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
                                  getChain(Store3), getChain(Store4));
     return  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi);
 
-  }
-  else {
+  } else if (ValueType == MVT::i64) {
+    SDValue SrcLo, SrcHi;
+    GetExpandedParts(Src, DAG, SrcLo, SrcHi);
+    SDValue ChainLo = Chain, ChainHi = Chain;
+    // FIXME: This makes unsafe assumptions; see the FIXME above.
+    if (Chain.getOpcode() == ISD::TokenFactor) {
+      ChainLo = Chain.getOperand(0);
+      ChainHi = Chain.getOperand(1);
+    }
+    SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL,
+                                  0 + StoreOffset);
+
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(4, Ptr.getValueType()));
+    SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL,
+                                  1 + StoreOffset);
+
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1,
+                       Store2);
+  } else {
     assert (0 && "value type not supported");
     return SDValue();
   }
@@ -660,7 +699,7 @@ void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
                                            SDValue &Lo, SDValue &Hi) {  
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
-  MVT NewVT = getTypeToTransformTo(N->getValueType(0));
+  EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
 
   // Extract the lo component.
   Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
@@ -808,7 +847,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
 
   SDValue Load, Offset;
   SDVTList Tys; 
-  MVT VT, NewVT;
+  EVT VT, NewVT;
   SDValue PtrLo, PtrHi;
   unsigned LoadOffset;
 
@@ -821,7 +860,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
   unsigned NumLoads = VT.getSizeInBits() / 8; 
   std::vector<SDValue> PICLoads;
   unsigned iter;
-  MVT MemVT = LD->getMemoryVT();
+  EVT MemVT = LD->getMemoryVT();
   if(ISD::isNON_EXTLoad(N)) {
     for (iter=0; iter<NumLoads ; ++iter) {
       // Add the pointer offset if any
@@ -839,7 +878,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
     
     // For extended loads this is the memory value type
     // i.e. without any extension
-    MVT MemVT = LD->getMemoryVT();
+    EVT MemVT = LD->getMemoryVT();
     unsigned MemBytes = MemVT.getSizeInBits() / 8;
     // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero
     // So set it to one
@@ -945,6 +984,19 @@ SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   return Call;
 }
 
+SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+  // We should have handled larger operands in type legalizer itself.
+  assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower");
+
+  SDNode *N = Op.getNode();
+  SmallVector<SDValue, 2> Ops(2);
+  Ops[0] = N->getOperand(0);
+  Ops[1] = N->getOperand(1);
+  SDValue Call = MakePIC16Libcall(PIC16ISD::MUL_I8, N->getValueType(0), 
+                                  &Ops[0], 2, true, DAG, N->getDebugLoc());
+  return Call;
+}
+
 void
 PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
@@ -953,12 +1005,8 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
   SDValue Res;
   unsigned i;
   switch (Op.getOpcode()) {
-    case ISD::FORMAL_ARGUMENTS:
-      Res = LowerFORMAL_ARGUMENTS(Op, DAG); break;
     case ISD::LOAD:
       Res = ExpandLoad(Op.getNode(), DAG); break;
-    case ISD::CALL:
-      Res = LowerCALL(Op, DAG); break;
     default: {
       // All other operations are handled in LowerOperation.
       Res = LowerOperation(Op, DAG);
@@ -978,8 +1026,6 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
 
 SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
-    case ISD::FORMAL_ARGUMENTS:
-      return LowerFORMAL_ARGUMENTS(Op, DAG);
     case ISD::ADD:
     case ISD::ADDC:
     case ISD::ADDE:
@@ -992,6 +1038,8 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
       return ExpandLoad(Op.getNode(), DAG);
     case ISD::STORE:
       return ExpandStore(Op.getNode(), DAG);
+    case ISD::MUL:
+      return LowerMUL(Op, DAG);
     case ISD::SHL:
     case ISD::SRA:
     case ISD::SRL:
@@ -1000,10 +1048,6 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     case ISD::AND:
     case ISD::XOR:
       return LowerBinOp(Op, DAG);
-    case ISD::CALL:
-      return LowerCALL(Op, DAG);
-    case ISD::RET:
-      return LowerRET(Op, DAG);
     case ISD::BR_CC:
       return LowerBR_CC(Op, DAG);
     case ISD::SELECT_CC:
@@ -1048,12 +1092,12 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
 }
 
 SDValue PIC16TargetLowering::
-LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
+LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
                            SDValue DataAddr_Lo, SDValue DataAddr_Hi,
-                           SelectionDAG &DAG) {
-  CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
-  unsigned NumOps = TheCall->getNumArgs();
-  DebugLoc dl = TheCall->getDebugLoc();
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG) {
+  unsigned NumOps = Outs.size();
 
   // If call has no arguments then do nothing and return.
   if (NumOps == 0)
@@ -1064,10 +1108,10 @@ LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
   SDValue Arg, StoreRet;
 
   // For PIC16 ABI the arguments come after the return value. 
-  unsigned RetVals = TheCall->getNumRetVals();
+  unsigned RetVals = Ins.size();
   for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
     // Get the arguments
-    Arg = TheCall->getArg(i);
+    Arg = Outs[i].Val;
     
     Ops.clear();
     Ops.push_back(Chain);
@@ -1087,16 +1131,14 @@ LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
 }
 
 SDValue PIC16TargetLowering::
-LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel, 
-                         SDValue InFlag, SelectionDAG &DAG) {
-  CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
-  unsigned NumOps = TheCall->getNumArgs();
-  DebugLoc dl = TheCall->getDebugLoc();
+LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
+                         const SmallVectorImpl<ISD::OutputArg> &Outs,
+                         DebugLoc dl, SelectionDAG &DAG) {
+  unsigned NumOps = Outs.size();
   std::string Name;
   SDValue Arg, StoreAt;
-  MVT ArgVT;
+  EVT ArgVT;
   unsigned Size=0;
-  unsigned ArgCount=0;
 
   // If call has no arguments then do nothing and return.
   if (NumOps == 0)
@@ -1114,9 +1156,9 @@ LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel,
 
   std::vector<SDValue> Ops;
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  for (unsigned i=ArgCount, Offset = 0; i<NumOps; i++) {
+  for (unsigned i=0, Offset = 0; i<NumOps; i++) {
     // Get the argument
-    Arg = TheCall->getArg(i);
+    Arg = Outs[i].Val;
     StoreOffset = (Offset + AddressOffset);
    
     // Store the argument on frame
@@ -1144,12 +1186,12 @@ LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel,
 }
 
 SDValue PIC16TargetLowering::
-LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag,
-                         SDValue DataAddr_Lo, SDValue DataAddr_Hi,
-                         SelectionDAG &DAG) {
-  CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
-  DebugLoc dl = TheCall->getDebugLoc();
-  unsigned RetVals = TheCall->getNumRetVals();
+LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
+                        SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        DebugLoc dl, SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) {
+  unsigned RetVals = Ins.size();
 
   // If call does not have anything to return
   // then do nothing and go back.
@@ -1157,7 +1199,6 @@ LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag,
     return Chain;
 
   // Call has something to return
-  std::vector<SDValue> ResultVals;
   SDValue LoadRet;
 
   SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
@@ -1167,23 +1208,20 @@ LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag,
                           InFlag);
     InFlag = getOutFlag(LoadRet);
     Chain = getChain(LoadRet);
-    ResultVals.push_back(LoadRet);
+    InVals.push_back(LoadRet);
   }
-  ResultVals.push_back(Chain);
-  SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl);
-  return Res;
+  return Chain;
 }
 
 SDValue PIC16TargetLowering::
-LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue RetLabel,
-                      SDValue InFlag, SelectionDAG &DAG) {
-  CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
-  DebugLoc dl = TheCall->getDebugLoc();
+LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
+                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                      DebugLoc dl, SelectionDAG &DAG,
+                      SmallVectorImpl<SDValue> &InVals) {
+
   // Currently handling primitive types only. They will come in
   // i8 parts
-  unsigned RetVals = TheCall->getNumRetVals();
-  
-  std::vector<SDValue> ResultVals;
+  unsigned RetVals = Ins.size();
 
   // Return immediately if the return type is void
   if (RetVals == 0)
@@ -1209,30 +1247,20 @@ LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue RetLabel,
 
     Chain = getChain(LoadRet);
     Offset++;
-    ResultVals.push_back(LoadRet);
+    InVals.push_back(LoadRet);
   }
 
-  // To return use MERGE_VALUES
-  ResultVals.push_back(Chain);
-  SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl);
-  return Res;
+  return Chain;
 }
 
-SDValue PIC16TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
-  SDValue Chain = Op.getOperand(0);
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (Op.getNumOperands() == 1)   // return void
-    return Op;
+SDValue
+PIC16TargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 DebugLoc dl, SelectionDAG &DAG) {
 
-  // return should have odd number of operands
-  if ((Op.getNumOperands() % 2) == 0 ) {
-    assert(0 && "Do not know how to return this many arguments!");
-    abort();
-  }
-  
   // Number of values to return 
-  unsigned NumRet = (Op.getNumOperands() / 2);
+  unsigned NumRet = Outs.size();
 
   // Function returns value always on stack with the offset starting
   // from 0 
@@ -1246,68 +1274,13 @@ SDValue PIC16TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
   SDValue BS = DAG.getConstant(1, MVT::i8);
   SDValue RetVal;
   for(unsigned i=0;i<NumRet; ++i) {
-    RetVal = Op.getNode()->getOperand(2*i + 1);
+    RetVal = Outs[i].Val;
     Chain =  DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
                         ES, BS,
                         DAG.getConstant (i, MVT::i8));
       
   }
-  return DAG.getNode(ISD::RET, dl, MVT::Other, Chain);
-}
-
-// CALL node may have some operands non-legal to PIC16. Generate new CALL
-// node with all the operands legal.
-// Currently only Callee operand of the CALL node is non-legal. This function
-// legalizes the Callee operand and uses all other operands as are to generate
-// new CALL node.
-
-SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) {
-    CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
-    SDValue Chain = TheCall->getChain();
-    SDValue Callee = TheCall->getCallee();
-    DebugLoc dl = TheCall->getDebugLoc();
-    unsigned i =0;
-
-    assert(Callee.getValueType() == MVT::i16 &&
-           "Don't know how to legalize this call node!!!");
-    assert(Callee.getOpcode() == ISD::BUILD_PAIR &&
-           "Don't know how to legalize this call node!!!");
-
-    if (isDirectAddress(Callee)) {
-       // Come here for direct calls
-       Callee = Callee.getOperand(0).getOperand(0);
-    } else {
-      // Come here for indirect calls
-      SDValue Lo, Hi;
-      // Indirect addresses. Get the hi and lo parts of ptr.
-      GetExpandedParts(Callee, DAG, Lo, Hi);
-      // Connect Lo and Hi parts of the callee with the PIC16Connect
-      Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi);
-    }
-    std::vector<SDValue> Ops;
-    Ops.push_back(Chain);
-    Ops.push_back(Callee);
-
-    // Add the call arguments and their flags
-    unsigned NumArgs = TheCall->getNumArgs();
-    for(i=0;i<NumArgs;i++) {
-       Ops.push_back(TheCall->getArg(i));
-       Ops.push_back(TheCall->getArgFlagsVal(i));
-    }
-    std::vector<MVT> NodeTys;
-    unsigned NumRets = TheCall->getNumRetVals();
-    for(i=0;i<NumRets;i++)
-       NodeTys.push_back(TheCall->getRetValType(i));
-
-   // Return a Chain as well
-   NodeTys.push_back(MVT::Other);
-   
-   SDVTList VTs = DAG.getVTList(&NodeTys[0], NodeTys.size());
-   // Generate new call with all the operands legal
-   return DAG.getCall(TheCall->getCallingConv(), dl,
-                      TheCall->isVarArg(), TheCall->isTailCall(),
-                      TheCall->isInreg(), VTs, &Ops[0], Ops.size(),
-                      TheCall->getNumFixedArgs());
+  return DAG.getNode(PIC16ISD::RET, dl, MVT::Other, Chain);
 }
 
 void PIC16TargetLowering::
@@ -1372,36 +1345,40 @@ GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
    DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag);
 }
 
+SDValue
+PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) {
 
-SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
-    CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
-    SDValue Chain = TheCall->getChain();
-    SDValue Callee = TheCall->getCallee();
-    DebugLoc dl = TheCall->getDebugLoc();
-    if (Callee.getValueType() == MVT::i16 &&
-      Callee.getOpcode() == ISD::BUILD_PAIR) {
-          // Control should come here only from TypeLegalizer for lowering
-          
-          // Legalize the non-legal arguments of call and return the
-          // new call with legal arguments.
-          return LegalizeCALL(Op, DAG);
-    }
-    // Control should come here from Legalize DAG.
-    // Here all the operands of CALL node should be legal.
-    
-    // If this is an indirect call then to pass the arguments
-    // and read the return value back, we need the data address
-    // of the function being called. 
-    // To get the data address two more calls need to be made.
+    assert(Callee.getValueType() == MVT::i16 &&
+           "Don't know how to legalize this call node!!!");
 
     // The flag to track if this is a direct or indirect call.
     bool IsDirectCall = true;    
-    unsigned RetVals = TheCall->getNumRetVals();
-    unsigned NumArgs = TheCall->getNumArgs();
+    unsigned RetVals = Ins.size();
+    unsigned NumArgs = Outs.size();
 
     SDValue DataAddr_Lo, DataAddr_Hi; 
-    if (Callee.getOpcode() == PIC16ISD::PIC16Connect) { 
+    if (!isa<GlobalAddressSDNode>(Callee) &&
+        !isa<ExternalSymbolSDNode>(Callee)) {
        IsDirectCall = false;    // This is indirect call
+
+       // If this is an indirect call then to pass the arguments
+       // and read the return value back, we need the data address
+       // of the function being called.
+       // To get the data address two more calls need to be made.
+
+       // Come here for indirect calls
+       SDValue Lo, Hi;
+       // Indirect addresses. Get the hi and lo parts of ptr.
+       GetExpandedParts(Callee, DAG, Lo, Hi);
+       // Connect Lo and Hi parts of the callee with the PIC16Connect
+       Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi);
+
        // Read DataAddress only if we have to pass arguments or 
        // read return value. 
        if ((RetVals > 0) || (NumArgs > 0)) 
@@ -1457,12 +1434,13 @@ SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
     // Pass the argument to function before making the call.
     SDValue CallArgs;
     if (IsDirectCall) {
-      CallArgs = LowerDirectCallArguments(Op, Chain, ArgLabel, OperFlag, DAG);
+      CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag,
+                                          Outs, dl, DAG);
       Chain = getChain(CallArgs);
       OperFlag = getOutFlag(CallArgs);
     } else {
-      CallArgs = LowerIndirectCallArguments(Op, Chain, OperFlag, DataAddr_Lo, 
-                                            DataAddr_Hi, DAG);
+      CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo,
+                                            DataAddr_Hi, Outs, Ins, dl, DAG);
       Chain = getChain(CallArgs);
       OperFlag = getOutFlag(CallArgs);
     }
@@ -1483,10 +1461,11 @@ SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
     // Lower the return value reading after the call.
     if (IsDirectCall)
-      return LowerDirectCallReturn(Op, Chain, RetLabel, OperFlag, DAG);
+      return LowerDirectCallReturn(RetLabel, Chain, OperFlag,
+                                   Ins, dl, DAG, InVals);
     else
-      return LowerIndirectCallReturn(Op, Chain, OperFlag, DataAddr_Lo,
-                                     DataAddr_Hi, DAG);
+      return LowerIndirectCallReturn(Chain, OperFlag, DataAddr_Lo,
+                                     DataAddr_Hi, Ins, dl, DAG, InVals);
 }
 
 bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
@@ -1591,11 +1570,20 @@ SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
   SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl);
 
   SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
-  if (Op.getOpcode() == ISD::SUBE)
-    return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1),
-                       Op.getOperand(2));
-  else
-    return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1));
+  switch (Op.getOpcode()) {
+    default:
+      assert (0 && "Opcode unknown."); 
+    case ISD::SUBE:
+      return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1),
+                         Op.getOperand(2));
+      break;
+    case ISD::SUBC:
+      return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1));
+      break;
+    case ISD::SUB:
+      return DAG.getNode(Op.getOpcode(), dl, MVT::i8, NewVal, Op.getOperand(1));
+      break;
+  }
 }
 
 void PIC16TargetLowering::InitReservedFrameCount(const Function *F) {
@@ -1609,17 +1597,19 @@ void PIC16TargetLowering::InitReservedFrameCount(const Function *F) {
     ReservedFrameCount = NumArgs + 1;
 }
 
-// LowerFORMAL_ARGUMENTS - Argument values are loaded from the
+// LowerFormalArguments - Argument values are loaded from the
 // <fname>.args + offset. All arguments are already broken to leaglized
 // types, so the offset just runs from 0 to NumArgVals - 1.
 
-SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, 
-                                                   SelectionDAG &DAG) {
-  SmallVector<SDValue, 8> ArgValues;
-  unsigned NumArgVals = Op.getNode()->getNumValues() - 1;
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Chain = Op.getOperand(0);    // Formal arguments' chain
-
+SDValue
+PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv,
+                                          bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                          DebugLoc dl,
+                                          SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals) {
+  unsigned NumArgVals = Ins.size();
 
   // Get the callee's name to create the <fname>.args label to pass args.
   MachineFunction &MF = DAG.getMachineFunction();
@@ -1643,13 +1633,10 @@ SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
     SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS,
                                   Offset);
     Chain = getChain(PICLoad);
-    ArgValues.push_back(PICLoad);
+    InVals.push_back(PICLoad);
   }
 
-  // Return a MERGE_VALUE node.
-  ArgValues.push_back(Op.getOperand(0));
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), 
-                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+  return Chain;
 }
 
 // Perform DAGCombine of PIC16Load.
@@ -1697,7 +1684,7 @@ SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N,
 
 static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Unknown condition code!");
+  default: llvm_unreachable("Unknown condition code!");
   case ISD::SETNE:  return PIC16CC::NE;
   case ISD::SETEQ:  return PIC16CC::EQ;
   case ISD::SETGT:  return PIC16CC::GT;
@@ -1826,7 +1813,8 @@ SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 
 MachineBasicBlock *
 PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
+                                                 MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
   DebugLoc dl = MI->getDebugLoc();
@@ -1852,9 +1840,18 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   F->insert(It, copy0MBB);
   F->insert(It, sinkMBB);
 
-  // Update machine-CFG edges by transferring all successors of the current
+  // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  sinkMBB->transferSuccessors(BB);
+  // Also inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
+         E = BB->succ_end(); I != E; ++I) {
+    EM->insert(std::make_pair(*I, sinkMBB));
+    sinkMBB->addSuccessor(*I);
+  }
+  // Next, remove all successors of the current block, and add the true
+  // and fallthrough blocks as its successors.
+  while (!BB->succ_empty())
+    BB->removeSuccessor(BB->succ_begin());
   // Next, add the true and fallthrough blocks as its successors.
   BB->addSuccessor(copy0MBB);
   BB->addSuccessor(sinkMBB);
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index b40ea12c15f6..286ed2411ef8 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -52,6 +52,7 @@ namespace llvm {
       SUBCC,         // Compare for equality or inequality.
       SELECT_ICC,    // Psuedo to be caught in schedular and expanded to brcond.
       BRCOND,        // Conditional branch.
+      RET,           // Return.
       Dummy
     };
 
@@ -81,39 +82,45 @@ namespace llvm {
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
     /// getSetCCResultType - Return the ISD::SETCC ValueType
-    virtual MVT getSetCCResultType(MVT ValType) const;
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+    virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const;
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
     SDValue LowerADD(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSUB(SDValue Op, SelectionDAG &DAG);
     SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
     // Call returns
     SDValue 
-    LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue FrameAddress, 
-                          SDValue InFlag, SelectionDAG &DAG);
+    LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          DebugLoc dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals);
     SDValue 
-    LowerIndirectCallReturn(SDValue Op, SDValue Chain, SDValue InFlag,
-                            SDValue DataAddr_Lo, SDValue DataAddr_Hi,
-                            SelectionDAG &DAG);
+    LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
+                             SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
 
     // Call arguments
     SDValue 
-    LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue FrameAddress, 
-                             SDValue InFlag, SelectionDAG &DAG);
+    LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             DebugLoc dl, SelectionDAG &DAG);
 
     SDValue 
-    LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, 
+    LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
                                SDValue DataAddr_Lo, SDValue DataAddr_Hi, 
-                               SelectionDAG &DAG);
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG);
 
     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
     SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
                         SelectionDAG &DAG, DebugLoc dl);
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
 
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@@ -124,6 +131,28 @@ namespace llvm {
                                        SmallVectorImpl<SDValue> &Results,
                                        SelectionDAG &DAG);
 
+    virtual SDValue
+    LowerFormalArguments(SDValue Chain,
+                         CallingConv::ID CallConv,
+                         bool isVarArg,
+                         const SmallVectorImpl<ISD::InputArg> &Ins,
+                         DebugLoc dl, SelectionDAG &DAG,
+                         SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
     SDValue ExpandStore(SDNode *N, SelectionDAG &DAG);
     SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG);
     SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG);
@@ -174,12 +203,6 @@ namespace llvm {
     void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, 
                             int &Offset);
 
-
-    // CALL node should have all legal operands only. Legalize all non-legal
-    // operands of CALL node and then return the new call will all operands
-    // legal.
-    SDValue LegalizeCALL(SDValue Op, SelectionDAG &DAG);
-
     // For indirect calls data address of the callee frame need to be
     // extracted. This function fills the arguments DataAddr_Lo and 
     // DataAddr_Hi with the address of the callee frame.
@@ -209,7 +232,7 @@ namespace llvm {
     const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call);
 
     // Make PIC16 Libcall.
-    SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, MVT RetVT, 
+    SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, 
                              const SDValue *Ops, unsigned NumOps, bool isSigned,
                              SelectionDAG &DAG, DebugLoc dl);
 
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 8418423fa06a..cb0c41bc0b5c 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstdio>
 
 
@@ -104,7 +105,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addImm(1); // Emit banksel for it.
   }
   else
-    assert(0 && "Can't store this register to stack slot");
+    llvm_unreachable("Can't store this register to stack slot");
 }
 
 void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 
@@ -144,7 +145,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       .addImm(1); // Emit banksel for it.
   }
   else
-    assert(0 && "Can't load this register from stack slot");
+    llvm_unreachable("Can't load this register from stack slot");
 }
 
 bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
index a054bdcbe3c3..250ca0a373f2 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -115,6 +115,8 @@ def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond,
 def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc, 
                          [SDNPInFlag]>;
 
+def PIC16ret       : SDNode<"PIC16ISD::RET", SDTNone, [SDNPHasChain]>;
+
 //===----------------------------------------------------------------------===//
 // PIC16 Operand Definitions.
 //===----------------------------------------------------------------------===//
@@ -375,8 +377,9 @@ def subfw_2: SUBFW<0, "subwf", subc>;
 let Uses = [STATUS] in
 def subfwb: SUBFW<0, "subwfb", sube>;  // With Borrow.
 
-def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>;
 }
+let Defs = [STATUS], isTerminator = 1 in
+def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>;
 
 // [F] -= W ; 
 let mayStore = 1 in
@@ -425,8 +428,9 @@ class SUBLW<bits<6> opcode, SDNode OpNode> :
 let Defs = [STATUS] in {
 def sublw_1 : SUBLW<0, sub>;
 def sublw_2 : SUBLW<0, subc>;
-def sublw_cc : SUBLW<0, PIC16Subcc>;
 }
+let Defs = [STATUS], isTerminator = 1 in 
+def sublw_cc : SUBLW<0, PIC16Subcc>;
 
 // Call instruction.
 let isCall = 1,
@@ -489,8 +493,9 @@ def pagesel :
 
 
 // Return insn.
+let isTerminator = 1, isBarrier = 1, isReturn = 1 in
 def Return : 
-  ControlFormat<0, (outs), (ins), "return", [(ret)]>;
+  ControlFormat<0, (outs), (ins), "return", [(PIC16ret)]>;
 
 //===----------------------------------------------------------------------===//
 // PIC16 Replacment Patterns.
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
new file mode 100644
index 000000000000..a17d1a8b1c25
--- /dev/null
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
@@ -0,0 +1,58 @@
+//===-- PIC16MCAsmInfo.cpp - PIC16 asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PIC16MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16MCAsmInfo.h"
+
+// FIXME: Layering violation to get enums and static function, should be moved
+// to separate headers.
+#include "PIC16.h"
+#include "PIC16ISelLowering.h"
+using namespace llvm;
+
+PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) {
+  CommentString = ";";
+  GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
+  GlobalDirective = "\tglobal\t";
+  ExternDirective = "\textern\t";
+
+  Data8bitsDirective = " db ";
+  Data16bitsDirective = " dw ";
+  Data32bitsDirective = " dl ";
+  Data64bitsDirective = NULL;
+  ZeroDirective = NULL;
+  AsciiDirective = " dt ";
+  AscizDirective = NULL;
+    
+  RomData8bitsDirective = " dw ";
+  RomData16bitsDirective = " rom_di ";
+  RomData32bitsDirective = " rom_dl ";
+    
+    
+  // Set it to false because we weed to generate c file name and not bc file
+  // name.
+  HasSingleParameterDotFile = false;
+}
+
+const char *PIC16MCAsmInfo::getDataASDirective(unsigned Size,
+                                               unsigned AS) const {
+  if (AS != PIC16ISD::ROM_SPACE)
+    return 0;
+  
+  switch (Size) {
+  case  8: return RomData8bitsDirective;
+  case 16: return RomData16bitsDirective;
+  case 32: return RomData32bitsDirective;
+  default: return NULL;
+  }
+}
+
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h
new file mode 100644
index 000000000000..e84db8532a15
--- /dev/null
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.h
@@ -0,0 +1,35 @@
+//=====-- PIC16MCAsmInfo.h - PIC16 asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PIC16MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16TARGETASMINFO_H
+#define PIC16TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  class PIC16MCAsmInfo : public MCAsmInfo {
+    const char *RomData8bitsDirective;
+    const char *RomData16bitsDirective;
+    const char *RomData32bitsDirective;
+  public:    
+    PIC16MCAsmInfo(const Target &T, const StringRef &TT);
+    
+    virtual const char *getDataASDirective(unsigned size, unsigned AS) const;
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index 43d47ae5292f..c9ebb5756cda 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -22,7 +22,7 @@
 #define DEBUG_TYPE "pic16-codegen"
 #include "PIC16.h"
 #include "PIC16InstrInfo.h"
-#include "PIC16TargetAsmInfo.h"
+#include "PIC16MCAsmInfo.h"
 #include "PIC16TargetMachine.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index eb758d8543d0..47087ab3cb94 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -16,7 +16,7 @@
 #include "PIC16.h"
 #include "PIC16RegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
-
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -51,10 +51,13 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
   return false;
 }
 
-void PIC16RegisterInfo::
+unsigned PIC16RegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                    RegScavenger *RS) const
-{    /* NOT YET IMPLEMENTED */  }
+                    int *Value, RegScavenger *RS) const
+{
+  /* NOT YET IMPLEMENTED */
+  return 0;
+}
 
 void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
 {    /* NOT YET IMPLEMENTED */  }
@@ -65,17 +68,17 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
 
 int PIC16RegisterInfo::
 getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  assert(0 && "Not keeping track of debug information yet!!");
+  llvm_unreachable("Not keeping track of debug information yet!!");
   return -1;
 }
 
 unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const {
-  assert(0 && "PIC16 Does not have any frame register");
+  llvm_unreachable("PIC16 Does not have any frame register");
   return 0;
 }
 
 unsigned PIC16RegisterInfo::getRARegister() const {
-  assert(0 && "PIC16 Does not have any return address register");
+  llvm_unreachable("PIC16 Does not have any return address register");
   return 0;
 }
 
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 83689d0486b1..8aa5a10732e1 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -48,8 +48,9 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
   virtual BitVector getReservedRegs(const MachineFunction &MF) const;
   virtual bool hasFP(const MachineFunction &MF) const;
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                        int SPAdj, RegScavenger *RS=NULL) const;
+  virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                       int SPAdj, int *Value = NULL,
+                                       RegScavenger *RS=NULL) const;
 
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp
index db8a5d84a4bf..33fc3fb16994 100644
--- a/lib/Target/PIC16/PIC16Subtarget.cpp
+++ b/lib/Target/PIC16/PIC16Subtarget.cpp
@@ -16,7 +16,7 @@
 
 using namespace llvm;
 
-PIC16Subtarget::PIC16Subtarget(const Module &M, const std::string &FS, 
+PIC16Subtarget::PIC16Subtarget(const std::string &TT, const std::string &FS, 
                                bool Cooper)
   :IsCooper(Cooper)
 {
diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h
index e5147a0cf892..81e3783d7299 100644
--- a/lib/Target/PIC16/PIC16Subtarget.h
+++ b/lib/Target/PIC16/PIC16Subtarget.h
@@ -19,7 +19,6 @@
 #include <string>
 
 namespace llvm {
-class Module;
 
 class PIC16Subtarget : public TargetSubtarget {
 
@@ -28,9 +27,9 @@ class PIC16Subtarget : public TargetSubtarget {
 
 public:
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  PIC16Subtarget(const Module &M, const std::string &FS, bool Cooper);
+  PIC16Subtarget(const std::string &TT, const std::string &FS, bool Cooper);
   
   /// isCooper - Returns true if the target ISA is Cooper.
   bool isCooper() const { return IsCooper; }
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index 77ad1882ca9e..08307e7cef3f 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -12,51 +12,32 @@
 //===----------------------------------------------------------------------===//
 
 #include "PIC16.h"
-#include "PIC16TargetAsmInfo.h"
+#include "PIC16MCAsmInfo.h"
 #include "PIC16TargetMachine.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 
 using namespace llvm;
 
-/// PIC16TargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int PIC16TargetMachineModule;
-int PIC16TargetMachineModule = 0;
-
-
-// Register the targets
-static RegisterTarget<PIC16TargetMachine> 
-X("pic16", "PIC16 14-bit [experimental].");
-static RegisterTarget<CooperTargetMachine> 
-Y("cooper", "PIC16 Cooper [experimental].");
+extern "C" void LLVMInitializePIC16Target() {
+  // Register the target. Curretnly the codegen works for
+  // enhanced pic16 mid-range.
+  RegisterTargetMachine<PIC16TargetMachine> X(ThePIC16Target);
+  RegisterAsmInfo<PIC16MCAsmInfo> A(ThePIC16Target);
+}
 
-// Force static initialization.
-extern "C" void LLVMInitializePIC16Target() { }
 
-// PIC16TargetMachine - Traditional PIC16 Machine.
-PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
-                                       bool Cooper)
-: Subtarget(M, FS, Cooper),
+// PIC16TargetMachine - Enhanced PIC16 mid-range Machine. May also represent
+// a Traditional Machine if 'Trad' is true.
+PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS, bool Trad)
+: LLVMTargetMachine(T, TT),
+  Subtarget(TT, FS, Trad),
   DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), 
   InstrInfo(*this), TLInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
 
-// CooperTargetMachine - Uses the same PIC16TargetMachine, but makes IsCooper
-// as true.
-CooperTargetMachine::CooperTargetMachine(const Module &M, const std::string &FS)
-  : PIC16TargetMachine(M, FS, true) {}
-
-
-const TargetAsmInfo *PIC16TargetMachine::createTargetAsmInfo() const {
-  return new PIC16TargetAsmInfo(*this);
-}
 
 bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
                                          CodeGenOpt::Level OptLevel) {
@@ -65,15 +46,7 @@ bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
   return false;
 }
 
-bool PIC16TargetMachine::addAssemblyEmitter(PassManagerBase &PM, 
-                                            CodeGenOpt::Level OptLevel,
-                                            bool Verbose, raw_ostream &Out) {
-  // Output assembly language.
-  PM.add(createPIC16CodePrinterPass(Out, *this, Verbose));
-  return false;
-}
-
-bool PIC16TargetMachine::addPostRegAlloc(PassManagerBase &PM, 
+bool PIC16TargetMachine::addPreEmitPass(PassManagerBase &PM, 
                                          CodeGenOpt::Level OptLevel) {
   PM.add(createPIC16MemSelOptimizerPass());
   return true;  // -print-machineinstr should print after this.
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
index 7f62d5c13d64..b11fdd5dba50 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -37,12 +37,9 @@ class PIC16TargetMachine : public LLVMTargetMachine {
   // any PIC16 specific FrameInfo class.
   TargetFrameInfo       FrameInfo;
 
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
 public:
-  PIC16TargetMachine(const Module &M, const std::string &FS, 
-                     bool Cooper = false);
+  PIC16TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS, bool Cooper = false);
 
   virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
   virtual const PIC16InstrInfo *getInstrInfo() const  { return &InstrInfo; }
@@ -59,18 +56,9 @@ public:
 
   virtual bool addInstSelector(PassManagerBase &PM,
                                CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel,
-                                  bool Verbose, raw_ostream &Out);
-  virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
 }; // PIC16TargetMachine.
 
-/// CooperTargetMachine
-class CooperTargetMachine : public PIC16TargetMachine {
-public:
-  CooperTargetMachine(const Module &M, const std::string &FS);
-}; // CooperTargetMachine.
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
new file mode 100644
index 000000000000..a2a4c09d2978
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -0,0 +1,440 @@
+//===-- PIC16TargetObjectFile.cpp - PIC16 object files --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16TargetObjectFile.h"
+#include "MCSectionPIC16.h"
+#include "PIC16ISelLowering.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+MCSectionPIC16 *MCSectionPIC16::Create(const StringRef &Name, SectionKind K, 
+                                       int Address, int Color, MCContext &Ctx) {
+  return new (Ctx) MCSectionPIC16(Name, K, Address, Color);
+}
+
+
+void MCSectionPIC16::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                          raw_ostream &OS) const {
+  OS << getName() << '\n';
+}
+
+
+
+
+PIC16TargetObjectFile::PIC16TargetObjectFile()
+  : ExternalVarDecls(0), ExternalVarDefs(0) {
+}
+
+const MCSectionPIC16 *PIC16TargetObjectFile::
+getPIC16Section(const char *Name, SectionKind Kind, 
+                int Address, int Color) const {
+  MCSectionPIC16 *&Entry = SectionsByName[Name];
+  if (Entry)
+    return Entry;
+
+  return Entry = MCSectionPIC16::Create(Name, Kind, Address, Color, 
+                                        getContext());
+}
+
+
+void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){
+  TargetLoweringObjectFile::Initialize(Ctx, tm);
+  TM = &tm;
+  
+  BSSSection = getPIC16Section("udata.# UDATA", MCSectionPIC16::UDATA_Kind());
+  ReadOnlySection = getPIC16Section("romdata.# ROMDATA", 
+                                    MCSectionPIC16::ROMDATA_Kind());
+  DataSection = getPIC16Section("idata.# IDATA", MCSectionPIC16::IDATA_Kind());
+  
+  // Need because otherwise a .text symbol is emitted by DwarfWriter
+  // in BeginModule, and gpasm cribbs for that .text symbol.
+  TextSection = getPIC16Section("", SectionKind::getText());
+
+  ROSections.push_back(new PIC16Section((MCSectionPIC16*)ReadOnlySection));
+  
+  // FIXME: I don't know what the classification of these sections really is.
+  // These aren't really objects belonging to any section. Just emit them
+  // in AsmPrinter and remove this code from here. 
+  ExternalVarDecls = new PIC16Section(getPIC16Section("ExternalVarDecls",
+                                      SectionKind::getMetadata()));
+  ExternalVarDefs = new PIC16Section(getPIC16Section("ExternalVarDefs",
+                                      SectionKind::getMetadata()));
+}
+
+const MCSection *PIC16TargetObjectFile::
+getSectionForFunction(const std::string &FnName) const {
+  std::string T = PAN::getCodeSectionName(FnName);
+  return getPIC16Section(T.c_str(), SectionKind::getText());
+}
+
+
+const MCSection *PIC16TargetObjectFile::
+getSectionForFunctionFrame(const std::string &FnName) const {
+  std::string T = PAN::getFrameSectionName(FnName);
+  return getPIC16Section(T.c_str(), SectionKind::getDataRel());
+}
+
+const MCSection *
+PIC16TargetObjectFile::getBSSSectionForGlobal(const GlobalVariable *GV) const {
+  assert(GV->hasInitializer() && "This global doesn't need space");
+  Constant *C = GV->getInitializer();
+  assert(C->isNullValue() && "Unitialized globals has non-zero initializer");
+
+  // Find how much space this global needs.
+  const TargetData *TD = TM->getTargetData();
+  const Type *Ty = C->getType(); 
+  unsigned ValSize = TD->getTypeAllocSize(Ty);
+ 
+  // Go through all BSS Sections and assign this variable
+  // to the first available section having enough space.
+  PIC16Section *FoundBSS = NULL;
+  for (unsigned i = 0; i < BSSSections.size(); i++) {
+    if (DataBankSize - BSSSections[i]->Size >= ValSize) {
+      FoundBSS = BSSSections[i];
+      break;
+    }
+  }
+
+  // No BSS section spacious enough was found. Crate a new one.
+  if (!FoundBSS) {
+    std::string name = PAN::getUdataSectionName(BSSSections.size());
+    const MCSectionPIC16 *NewSection
+      = getPIC16Section(name.c_str(), MCSectionPIC16::UDATA_Kind());
+
+    FoundBSS = new PIC16Section(NewSection);
+
+    // Add this newly created BSS section to the list of BSSSections.
+    BSSSections.push_back(FoundBSS);
+  }
+  
+  // Insert the GV into this BSS.
+  FoundBSS->Items.push_back(GV);
+  FoundBSS->Size += ValSize;
+  return FoundBSS->S_;
+} 
+
+const MCSection *
+PIC16TargetObjectFile::getIDATASectionForGlobal(const GlobalVariable *GV) const{
+  assert(GV->hasInitializer() && "This global doesn't need space");
+  Constant *C = GV->getInitializer();
+  assert(!C->isNullValue() && "initialized globals has zero initializer");
+  assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
+         "can split initialized RAM data only");
+
+  // Find how much space this global needs.
+  const TargetData *TD = TM->getTargetData();
+  const Type *Ty = C->getType(); 
+  unsigned ValSize = TD->getTypeAllocSize(Ty);
+ 
+  // Go through all IDATA Sections and assign this variable
+  // to the first available section having enough space.
+  PIC16Section *FoundIDATA = NULL;
+  for (unsigned i = 0; i < IDATASections.size(); i++) {
+    if (DataBankSize - IDATASections[i]->Size >= ValSize) {
+      FoundIDATA = IDATASections[i]; 
+      break;
+    }
+  }
+
+  // No IDATA section spacious enough was found. Crate a new one.
+  if (!FoundIDATA) {
+    std::string name = PAN::getIdataSectionName(IDATASections.size());
+    const MCSectionPIC16 *NewSection =
+      getPIC16Section(name.c_str(), MCSectionPIC16::IDATA_Kind());
+
+    FoundIDATA = new PIC16Section(NewSection);
+
+    // Add this newly created IDATA section to the list of IDATASections.
+    IDATASections.push_back(FoundIDATA);
+  }
+  
+  // Insert the GV into this IDATA.
+  FoundIDATA->Items.push_back(GV);
+  FoundIDATA->Size += ValSize;
+  return FoundIDATA->S_;
+} 
+
+// Get the section for an automatic variable of a function.
+// For PIC16 they are globals only with mangled names.
+const MCSection *
+PIC16TargetObjectFile::getSectionForAuto(const GlobalVariable *GV) const {
+
+  const std::string name = PAN::getSectionNameForSym(GV->getName());
+
+  // Go through all Auto Sections and assign this variable
+  // to the appropriate section.
+  PIC16Section *FoundAutoSec = NULL;
+  for (unsigned i = 0; i < AutosSections.size(); i++) {
+    if (AutosSections[i]->S_->getName() == name) {
+      FoundAutoSec = AutosSections[i];
+      break;
+    }
+  }
+
+  // No Auto section was found. Crate a new one.
+  if (!FoundAutoSec) {
+    const MCSectionPIC16 *NewSection =
+      getPIC16Section(name.c_str(), MCSectionPIC16::UDATA_OVR_Kind());
+
+    FoundAutoSec = new PIC16Section(NewSection);
+
+    // Add this newly created autos section to the list of AutosSections.
+    AutosSections.push_back(FoundAutoSec);
+  }
+
+  // Insert the auto into this section.
+  FoundAutoSec->Items.push_back(GV);
+
+  return FoundAutoSec->S_;
+}
+
+
+// Override default implementation to put the true globals into
+// multiple data sections if required.
+const MCSection *
+PIC16TargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV1,
+                                              SectionKind Kind,
+                                              Mangler *Mang,
+                                              const TargetMachine &TM) const {
+  // We select the section based on the initializer here, so it really
+  // has to be a GlobalVariable.
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GV1); 
+  if (!GV)
+    return TargetLoweringObjectFile::SelectSectionForGlobal(GV1, Kind, Mang,TM);
+
+  // Record External Var Decls.
+  if (GV->isDeclaration()) {
+    ExternalVarDecls->Items.push_back(GV);
+    return ExternalVarDecls->S_;
+  }
+    
+  assert(GV->hasInitializer() && "A def without initializer?");
+
+  // First, if this is an automatic variable for a function, get the section
+  // name for it and return.
+  std::string name = GV->getName();
+  if (PAN::isLocalName(name))
+    return getSectionForAuto(GV);
+
+  // Record Exteranl Var Defs.
+  if (GV->hasExternalLinkage() || GV->hasCommonLinkage())
+    ExternalVarDefs->Items.push_back(GV);
+
+  // See if this is an uninitialized global.
+  const Constant *C = GV->getInitializer();
+  if (C->isNullValue()) 
+    return getBSSSectionForGlobal(GV); 
+
+  // If this is initialized data in RAM. Put it in the correct IDATA section.
+  if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) 
+    return getIDATASectionForGlobal(GV);
+
+  // This is initialized data in rom, put it in the readonly section.
+  if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) 
+    return getROSectionForGlobal(GV);
+
+  // Else let the default implementation take care of it.
+  return TargetLoweringObjectFile::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
+
+PIC16TargetObjectFile::~PIC16TargetObjectFile() {
+  for (unsigned i = 0; i < BSSSections.size(); i++)
+    delete BSSSections[i]; 
+  for (unsigned i = 0; i < IDATASections.size(); i++)
+    delete IDATASections[i]; 
+  for (unsigned i = 0; i < AutosSections.size(); i++)
+    delete AutosSections[i]; 
+  for (unsigned i = 0; i < ROSections.size(); i++)
+    delete ROSections[i];
+  delete ExternalVarDecls;
+  delete ExternalVarDefs;
+}
+
+
+/// getSpecialCasedSectionGlobals - Allow the target to completely override
+/// section assignment of a global.
+const MCSection *PIC16TargetObjectFile::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                         Mangler *Mang, const TargetMachine &TM) const {
+  assert(GV->hasSection());
+  
+  if (const GlobalVariable *GVar = cast<GlobalVariable>(GV)) {
+    std::string SectName = GVar->getSection();
+    // If address for a variable is specified, get the address and create
+    // section.
+    std::string AddrStr = "Address=";
+    if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) {
+      std::string SectAddr = SectName.substr(AddrStr.length());
+      return CreateSectionForGlobal(GVar, Mang, SectAddr);
+    }
+     
+    // Create the section specified with section attribute. 
+    return CreateSectionForGlobal(GVar, Mang);
+  }
+
+  return getPIC16Section(GV->getSection().c_str(), Kind);
+}
+
+// Create a new section for global variable. If Addr is given then create
+// section at that address else create by name.
+const MCSection *
+PIC16TargetObjectFile::CreateSectionForGlobal(const GlobalVariable *GV,
+                                              Mangler *Mang,
+                                              const std::string &Addr) const {
+  // See if this is an uninitialized global.
+  const Constant *C = GV->getInitializer();
+  if (C->isNullValue())
+    return CreateBSSSectionForGlobal(GV, Addr);
+
+  // If this is initialized data in RAM. Put it in the correct IDATA section.
+  if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
+    return CreateIDATASectionForGlobal(GV, Addr);
+
+  // This is initialized data in rom, put it in the readonly section.
+  if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) 
+    return CreateROSectionForGlobal(GV, Addr);
+
+  // Else let the default implementation take care of it.
+  return TargetLoweringObjectFile::SectionForGlobal(GV, Mang, *TM);
+}
+
+// Create uninitialized section for a variable.
+const MCSection *
+PIC16TargetObjectFile::CreateBSSSectionForGlobal(const GlobalVariable *GV,
+                                                 std::string Addr) const {
+  assert(GV->hasInitializer() && "This global doesn't need space");
+  assert(GV->getInitializer()->isNullValue() &&
+         "Unitialized global has non-zero initializer");
+  std::string Name;
+  // If address is given then create a section at that address else create a
+  // section by section name specified in GV.
+  PIC16Section *FoundBSS = NULL;
+  if (Addr.empty()) { 
+    Name = GV->getSection() + " UDATA";
+    for (unsigned i = 0; i < BSSSections.size(); i++) {
+      if (BSSSections[i]->S_->getName() == Name) {
+        FoundBSS = BSSSections[i];
+        break;
+      }
+    }
+  } else {
+    std::string Prefix = GV->getNameStr() + "." + Addr + ".";
+    Name = PAN::getUdataSectionName(BSSSections.size(), Prefix) + " " + Addr;
+  }
+  
+  PIC16Section *NewBSS = FoundBSS;
+  if (NewBSS == NULL) {
+    const MCSectionPIC16 *NewSection =
+      getPIC16Section(Name.c_str(), MCSectionPIC16::UDATA_Kind());
+    NewBSS = new PIC16Section(NewSection);
+    BSSSections.push_back(NewBSS);
+  }
+
+  // Insert the GV into this BSS.
+  NewBSS->Items.push_back(GV);
+
+  // We do not want to put any  GV without explicit section into this section
+  // so set its size to DatabankSize.
+  NewBSS->Size = DataBankSize;
+  return NewBSS->S_;
+}
+
+// Get rom section for a variable. Currently there can be only one rom section
+// unless a variable explicitly requests a section.
+const MCSection *
+PIC16TargetObjectFile::getROSectionForGlobal(const GlobalVariable *GV) const {
+  ROSections[0]->Items.push_back(GV);
+  return ROSections[0]->S_;
+}
+
+// Create initialized data section for a variable.
+const MCSection *
+PIC16TargetObjectFile::CreateIDATASectionForGlobal(const GlobalVariable *GV,
+                                                   std::string Addr) const {
+  assert(GV->hasInitializer() && "This global doesn't need space");
+  assert(!GV->getInitializer()->isNullValue() &&
+         "initialized global has zero initializer");
+  assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
+         "can be used for initialized RAM data only");
+
+  std::string Name;
+  // If address is given then create a section at that address else create a
+  // section by section name specified in GV.
+  PIC16Section *FoundIDATASec = NULL;
+  if (Addr.empty()) {
+    Name = GV->getSection() + " IDATA";
+    for (unsigned i = 0; i < IDATASections.size(); i++) {
+      if (IDATASections[i]->S_->getName() == Name) {
+        FoundIDATASec = IDATASections[i];
+        break;
+      }
+    }
+  } else {
+    std::string Prefix = GV->getNameStr() + "." + Addr + ".";
+    Name = PAN::getIdataSectionName(IDATASections.size(), Prefix) + " " + Addr;
+  }
+
+  PIC16Section *NewIDATASec = FoundIDATASec;
+  if (NewIDATASec == NULL) {
+    const MCSectionPIC16 *NewSection =
+      getPIC16Section(Name.c_str(), MCSectionPIC16::IDATA_Kind());
+    NewIDATASec = new PIC16Section(NewSection);
+    IDATASections.push_back(NewIDATASec);
+  }
+  // Insert the GV into this IDATA Section.
+  NewIDATASec->Items.push_back(GV);
+  // We do not want to put any  GV without explicit section into this section 
+  // so set its size to DatabankSize.
+  NewIDATASec->Size = DataBankSize;
+  return NewIDATASec->S_;
+}
+
+// Create a section in rom for a variable.
+const MCSection *
+PIC16TargetObjectFile::CreateROSectionForGlobal(const GlobalVariable *GV,
+                                                std::string Addr) const {
+  assert(GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE &&
+         "can be used for ROM data only");
+
+  std::string Name;
+  // If address is given then create a section at that address else create a
+  // section by section name specified in GV.
+  PIC16Section *FoundROSec = NULL;
+  if (Addr.empty()) {
+    Name = GV->getSection() + " ROMDATA";
+    for (unsigned i = 1; i < ROSections.size(); i++) {
+      if (ROSections[i]->S_->getName() == Name) {
+        FoundROSec = ROSections[i];
+        break;
+      }
+    }
+  } else {
+    std::string Prefix = GV->getNameStr() + "." + Addr + ".";
+    Name = PAN::getRomdataSectionName(ROSections.size(), Prefix) + " " + Addr;
+  }
+
+  PIC16Section *NewRomSec = FoundROSec;
+  if (NewRomSec == NULL) {
+    const MCSectionPIC16 *NewSection =
+      getPIC16Section(Name.c_str(), MCSectionPIC16::ROMDATA_Kind());
+    NewRomSec = new PIC16Section(NewSection);
+    ROSections.push_back(NewRomSec);
+  }
+
+  // Insert the GV into this ROM Section.
+  NewRomSec->Items.push_back(GV);
+  return NewRomSec->S_;
+}
+
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
new file mode 100644
index 000000000000..75f6cced0ab8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -0,0 +1,120 @@
+//===-- PIC16TargetObjectFile.h - PIC16 Object Info -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PIC16_TARGETOBJECTFILE_H
+#define LLVM_TARGET_PIC16_TARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+#include <string>
+
+namespace llvm {
+  class GlobalVariable;
+  class Module;
+  class PIC16TargetMachine;
+  class MCSectionPIC16;
+  
+  enum { DataBankSize = 80 };
+
+  /// PIC16 Splits the global data into mulitple udata and idata sections.
+  /// Each udata and idata section needs to contain a list of globals that
+  /// they contain, in order to avoid scanning over all the global values 
+  /// again and printing only those that match the current section. 
+  /// Keeping values inside the sections make printing a section much easier.
+  ///
+  /// FIXME: MOVE ALL THIS STUFF TO MCSectionPIC16.
+  ///
+  struct PIC16Section {
+    const MCSectionPIC16 *S_; // Connection to actual Section.
+    unsigned Size;  // Total size of the objects contained.
+    bool SectionPrinted;
+    std::vector<const GlobalVariable*> Items;
+    
+    PIC16Section(const MCSectionPIC16 *s) {
+      S_ = s;
+      Size = 0;
+      SectionPrinted = false;
+    }
+    bool isPrinted() const { return SectionPrinted; }
+    void setPrintedStatus(bool status) { SectionPrinted = status; } 
+  };
+  
+  class PIC16TargetObjectFile : public TargetLoweringObjectFile {
+    /// SectionsByName - Bindings of names to allocated sections.
+    mutable StringMap<MCSectionPIC16*> SectionsByName;
+
+    const TargetMachine *TM;
+    
+    const MCSectionPIC16 *getPIC16Section(const char *Name,
+                                          SectionKind K, 
+                                          int Address = -1, 
+                                          int Color = -1) const;
+  public:
+    mutable std::vector<PIC16Section*> BSSSections;
+    mutable std::vector<PIC16Section*> IDATASections;
+    mutable std::vector<PIC16Section*> AutosSections;
+    mutable std::vector<PIC16Section*> ROSections;
+    mutable PIC16Section *ExternalVarDecls;
+    mutable PIC16Section *ExternalVarDefs;
+
+    PIC16TargetObjectFile();
+    ~PIC16TargetObjectFile();
+    
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    
+    virtual const MCSection *
+    getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                             Mangler *Mang, const TargetMachine &TM) const;
+    
+    virtual const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                                    SectionKind Kind,
+                                                    Mangler *Mang,
+                                                    const TargetMachine&) const;
+
+    const MCSection *getSectionForFunction(const std::string &FnName) const;
+    const MCSection *getSectionForFunctionFrame(const std::string &FnName)const;
+    
+    
+  private:
+    std::string getSectionNameForSym(const std::string &Sym) const;
+
+    const MCSection *getBSSSectionForGlobal(const GlobalVariable *GV) const;
+    const MCSection *getIDATASectionForGlobal(const GlobalVariable *GV) const;
+    const MCSection *getSectionForAuto(const GlobalVariable *GV) const;
+    const MCSection *CreateBSSSectionForGlobal(const GlobalVariable *GV,
+                                               std::string Addr = "") const;
+    const MCSection *CreateIDATASectionForGlobal(const GlobalVariable *GV,
+                                                 std::string Addr = "") const;
+    const MCSection *getROSectionForGlobal(const GlobalVariable *GV) const;
+    const MCSection *CreateROSectionForGlobal(const GlobalVariable *GV,
+                                              std::string Addr = "") const;
+    const MCSection *CreateSectionForGlobal(const GlobalVariable *GV,
+                                            Mangler *Mang,
+                                            const std::string &Addr = "") const;
+  public:
+    void SetSectionForGVs(Module &M);
+    const std::vector<PIC16Section*> &getBSSSections() const {
+      return BSSSections;
+    }
+    const std::vector<PIC16Section*> &getIDATASections() const {
+      return IDATASections;
+    }
+    const std::vector<PIC16Section*> &getAutosSections() const {
+      return AutosSections;
+    }
+    const std::vector<PIC16Section*> &getROSections() const {
+      return ROSections;
+    }
+    
+  };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/TargetInfo/CMakeLists.txt b/lib/Target/PIC16/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..bfc6ff4e8e2e
--- /dev/null
+++ b/lib/Target/PIC16/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPIC16Info
+  PIC16TargetInfo.cpp
+  )
+
+add_dependencies(LLVMPIC16Info PIC16Table_gen)
diff --git a/lib/Target/PIC16/TargetInfo/Makefile b/lib/Target/PIC16/TargetInfo/Makefile
new file mode 100644
index 000000000000..76609f66d652
--- /dev/null
+++ b/lib/Target/PIC16/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PIC16/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPIC16Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
new file mode 100644
index 000000000000..46cc81967ebd
--- /dev/null
+++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- PIC16TargetInfo.cpp - PIC16 Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePIC16Target, llvm::TheCooperTarget;
+
+extern "C" void LLVMInitializePIC16TargetInfo() { 
+  RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]");
+
+  RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]");
+}
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 7f1673cf462e..a0fba86fa6b2 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/MDNode.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -32,16 +31,22 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
@@ -52,13 +57,40 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 namespace {
   class VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter {
   protected:
-    StringSet<> FnStubs, GVStubs, HiddenGVStubs;
+    struct FnStubInfo {
+      std::string Stub, LazyPtr, AnonSymbol;
+      
+      FnStubInfo() {}
+      
+      void Init(const GlobalValue *GV, Mangler *Mang) {
+        // Already initialized.
+        if (!Stub.empty()) return;
+        Stub = Mang->getMangledName(GV, "$stub", true);
+        LazyPtr = Mang->getMangledName(GV, "$lazy_ptr", true);
+        AnonSymbol = Mang->getMangledName(GV, "$stub$tmp", true);
+      }
+
+      void Init(const std::string &GV, Mangler *Mang) {
+        // Already initialized.
+        if (!Stub.empty()) return;
+        Stub = Mang->makeNameProper(GV + "$stub",
+                                    Mangler::Private);
+        LazyPtr = Mang->makeNameProper(GV + "$lazy_ptr",
+                                       Mangler::Private);
+        AnonSymbol = Mang->makeNameProper(GV + "$stub$tmp",
+                                          Mangler::Private);
+      }
+    };
+    
+    StringMap<FnStubInfo> FnStubs;
+    StringMap<std::string> GVStubs, HiddenGVStubs, TOC;
     const PPCSubtarget &Subtarget;
+    uint64_t LabelID;
   public:
-    explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, bool V)
+    explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                           const MCAsmInfo *T, bool V)
       : AsmPrinter(O, TM, T, V),
-        Subtarget(TM.getSubtarget<PPCSubtarget>()) {}
+        Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {}
 
     virtual const char *getPassName() const {
       return "PowerPC Assembly Printer";
@@ -70,7 +102,7 @@ namespace {
 
     unsigned enumRegToMachineReg(unsigned enumReg) {
       switch (enumReg) {
-      default: assert(0 && "Unhandled register!"); break;
+      default: llvm_unreachable("Unhandled register!");
       case PPC::CR0:  return  0;
       case PPC::CR1:  return  1;
       case PPC::CR2:  return  2;
@@ -80,14 +112,16 @@ namespace {
       case PPC::CR6:  return  6;
       case PPC::CR7:  return  7;
       }
-      abort();
+      llvm_unreachable(0);
     }
 
     /// printInstruction - This method is automatically generated by tablegen
     /// from the instruction set description.  This method returns true if the
     /// machine instruction was sufficiently described to print it, otherwise it
     /// returns false.
-    bool printInstruction(const MachineInstr *MI);
+    void printInstruction(const MachineInstr *MI);
+    static const char *getRegisterName(unsigned RegNo);
+
 
     void printMachineInstruction(const MachineInstr *MI);
     void printOp(const MachineOperand &MO);
@@ -117,7 +151,7 @@ namespace {
         return;
       }
 
-      const char *RegName = TM.getRegisterInfo()->get(RegNo).AsmName;
+      const char *RegName = getRegisterName(RegNo);
       // Linux assembler (Others?) does not take register mnemonics.
       // FIXME - What about special registers used in mfspr/mtspr?
       if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
@@ -190,16 +224,16 @@ namespace {
           GlobalValue *GV = MO.getGlobal();
           if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
-            std::string Name = Mang->getValueName(GV);
-            FnStubs.insert(Name);
-            printSuffixedName(Name, "$stub");
+            FnStubInfo &FnInfo = FnStubs[Mang->getMangledName(GV)];
+            FnInfo.Init(GV, Mang);
+            O << FnInfo.Stub;
             return;
           }
         }
         if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
-          std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
-          FnStubs.insert(Name);
-          printSuffixedName(Name, "$stub");
+          FnStubInfo &FnInfo =FnStubs[Mang->makeNameProper(MO.getSymbolName())];
+          FnInfo.Init(MO.getSymbolName(), Mang);
+          O << FnInfo.Stub;
           return;
         }
       }
@@ -281,20 +315,39 @@ namespace {
       printOperand(MI, OpNo+1);
     }
 
+    void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo) {
+      const MachineOperand &MO = MI->getOperand(OpNo);
+
+      assert(MO.getType() == MachineOperand::MO_GlobalAddress);
+
+      GlobalValue *GV = MO.getGlobal();
+
+      std::string Name = Mang->getMangledName(GV);
+
+      // Map symbol -> label of TOC entry.
+      if (TOC.count(Name) == 0) {
+        std::string Label;
+        Label += MAI->getPrivateGlobalPrefix();
+        Label += "C";
+        Label += utostr(LabelID++);
+
+        TOC[Name] = Label;
+      }
+
+      O << TOC[Name] << "@toc";
+    }
+
     void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
                                const char *Modifier);
 
     virtual bool runOnMachineFunction(MachineFunction &F) = 0;
-    virtual bool doFinalization(Module &M) = 0;
-
-    virtual void EmitExternalGlobal(const GlobalVariable *GV);
   };
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
   class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
-    explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
-                                const TargetAsmInfo *T, bool V)
+    explicit PPCLinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                                const MCAsmInfo *T, bool V)
       : PPCAsmPrinter(O, TM, T, V){}
 
     virtual const char *getPassName() const {
@@ -311,16 +364,16 @@ namespace {
       PPCAsmPrinter::getAnalysisUsage(AU);
     }
 
-    void printModuleLevelGV(const GlobalVariable* GVar);
+    void PrintGlobalVariable(const GlobalVariable *GVar);
   };
 
   /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
   /// OS X
   class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter {
-    raw_ostream &OS;
+    formatted_raw_ostream &OS;
   public:
-    explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
-                                 const TargetAsmInfo *T, bool V)
+    explicit PPCDarwinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                                 const MCAsmInfo *T, bool V)
       : PPCAsmPrinter(O, TM, T, V), OS(O) {}
 
     virtual const char *getPassName() const {
@@ -328,8 +381,8 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
     bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -338,7 +391,7 @@ namespace {
       PPCAsmPrinter::getAnalysisUsage(AU);
     }
 
-    void printModuleLevelGV(const GlobalVariable* GVar);
+    void PrintGlobalVariable(const GlobalVariable *GVar);
   };
 } // end of anonymous namespace
 
@@ -348,54 +401,52 @@ namespace {
 void PPCAsmPrinter::printOp(const MachineOperand &MO) {
   switch (MO.getType()) {
   case MachineOperand::MO_Immediate:
-    cerr << "printOp() does not handle immediate values\n";
-    abort();
-    return;
+    llvm_unreachable("printOp() does not handle immediate values");
 
   case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
     return;
   case MachineOperand::MO_JumpTableIndex:
-    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << MO.getIndex();
     // FIXME: PIC relocation model
     return;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << MO.getIndex();
     return;
-  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_ExternalSymbol: {
     // Computing the address of an external symbol, not calling it.
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    
     if (TM.getRelocationModel() != Reloc::Static) {
-      std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
-      GVStubs.insert(Name);
-      printSuffixedName(Name, "$non_lazy_ptr");
-      return;
+      GVStubs[Name] = Name+"$non_lazy_ptr";
+      Name += "$non_lazy_ptr";
     }
-    O << TAI->getGlobalPrefix() << MO.getSymbolName();
+    O << Name;
     return;
+  }
   case MachineOperand::MO_GlobalAddress: {
     // Computing the address of a global symbol, not calling it.
     GlobalValue *GV = MO.getGlobal();
-    std::string Name = Mang->getValueName(GV);
+    std::string Name;
 
     // External or weakly linked global variables need non-lazily-resolved stubs
-    if (TM.getRelocationModel() != Reloc::Static) {
-      if (GV->isDeclaration() || GV->isWeakForLinker()) {
-        if (GV->hasHiddenVisibility()) {
-          if (GV->isDeclaration() || GV->hasCommonLinkage() ||
-              GV->hasAvailableExternallyLinkage()) {
-            HiddenGVStubs.insert(Name);
-            printSuffixedName(Name, "$non_lazy_ptr");
-          } else {
-            O << Name;
-          }
-        } else {
-          GVStubs.insert(Name);
-          printSuffixedName(Name, "$non_lazy_ptr");
-        }
-        return;
+    if (TM.getRelocationModel() != Reloc::Static &&
+        (GV->isDeclaration() || GV->isWeakForLinker())) {
+      if (!GV->hasHiddenVisibility()) {
+        Name = Mang->getMangledName(GV, "$non_lazy_ptr", true);
+        GVStubs[Mang->getMangledName(GV)] = Name;
+      } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+                 GV->hasAvailableExternallyLinkage()) {
+        Name = Mang->getMangledName(GV, "$non_lazy_ptr", true);
+        HiddenGVStubs[Mang->getMangledName(GV)] = Name;
+      } else {
+        Name = Mang->getMangledName(GV);
       }
+    } else {
+      Name = Mang->getMangledName(GV);
     }
     O << Name;
 
@@ -409,22 +460,6 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
   }
 }
 
-/// EmitExternalGlobal - In this case we need to use the indirect symbol.
-///
-void PPCAsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
-  std::string Name;
-  getGlobalLinkName(GV, Name);
-  if (TM.getRelocationModel() != Reloc::Static) {
-    if (GV->hasHiddenVisibility())
-      HiddenGVStubs.insert(Name);
-    else
-      GVStubs.insert(Name);
-    printSuffixedName(Name, "$non_lazy_ptr");
-    return;
-  }
-  O << Name;
-}
-
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -461,15 +496,19 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
 bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                                           unsigned AsmVariant,
                                           const char *ExtraCode) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
-  if (MI->getOperand(OpNo).isReg())
-    printMemRegReg(MI, OpNo);
-  else
-    printMemRegImm(MI, OpNo);
+  assert (MI->getOperand(OpNo).isReg());
+  O << "0(";
+  printOperand(MI, OpNo);
+  O << ")";
   return false;
 }
 
@@ -505,6 +544,8 @@ void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
 ///
 void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
+  
+  processDebugLoc(MI, true);
 
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
@@ -549,12 +590,13 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
     }
   }
 
-  if (printInstruction(MI))
-    return; // Printer was automatically generated
+  printInstruction(MI);
+  
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
 
-  assert(0 && "Unhandled instruction in asm writer!");
-  abort();
-  return;
+  processDebugLoc(MI, false);
 }
 
 /// runOnMachineFunction - This uses the printMachineInstruction()
@@ -571,10 +613,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // Print out labels for the function.
   const Function *F = MF.getFunction();
-  SwitchToSection(TAI->SectionForGlobal(F));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
 
   switch (F->getLinkage()) {
-  default: assert(0 && "Unknown linkage type!");
+  default: llvm_unreachable("Unknown linkage type!");
   case Function::PrivateLinkage:
   case Function::InternalLinkage:  // Symbols default to internal.
     break;
@@ -582,6 +624,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     O << "\t.global\t" << CurrentFnName << '\n'
       << "\t.type\t" << CurrentFnName << ", @function\n";
     break;
+  case Function::LinkerPrivateLinkage:
   case Function::WeakAnyLinkage:
   case Function::WeakODRLinkage:
   case Function::LinkOnceAnyLinkage:
@@ -594,7 +637,19 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   printVisibility(CurrentFnName, F->getVisibility());
 
   EmitAlignment(MF.getAlignment(), F);
-  O << CurrentFnName << ":\n";
+
+  if (Subtarget.isPPC64()) {
+    // Emit an official procedure descriptor.
+    // FIXME 64-bit SVR4: Use MCSection here?
+    O << "\t.section\t\".opd\",\"aw\"\n";
+    O << "\t.align 3\n";
+    O << CurrentFnName << ":\n";
+    O << "\t.quad .L." << CurrentFnName << ",.TOC.@tocbase\n";
+    O << "\t.previous\n";
+    O << ".L." << CurrentFnName << ":\n";
+  } else {
+    O << CurrentFnName << ":\n";
+  }
 
   // Emit pre-function debug information.
   DW->BeginFunction(&MF);
@@ -604,8 +659,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
@@ -619,27 +673,16 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Print out jump tables referenced by the function.
   EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
 
-  SwitchToSection(TAI->SectionForGlobal(F));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
 
   // Emit post-function debug information.
   DW->EndFunction(&MF);
 
-  O.flush();
-
   // We didn't modify anything.
   return false;
 }
 
-/// PrintUnmangledNameSafely - Print out the printable characters in the name.
-/// Don't print things like \\n or \\0.
-static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
-  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
-       Name != E; ++Name)
-    if (isprint(*Name))
-      OS << *Name;
-}
-
-void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+void PPCLinuxAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer())
@@ -649,18 +692,17 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   if (EmitSpecialLLVMGlobal(GVar))
     return;
 
-  std::string name = Mang->getValueName(GVar);
+  std::string name = Mang->getMangledName(GVar);
 
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
-  if (isa<MDNode>(C) || isa<MDString>(C))
-    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
-  SwitchToSection(TAI->SectionForGlobal(GVar));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
 
   if (C->isNullValue() && /* FIXME: Verify correct */
       !GVar->hasSection() &&
@@ -674,13 +716,13 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
         O << name << ":\n";
         O << "\t.zero " << Size << '\n';
       } else if (GVar->hasLocalLinkage()) {
-        O << TAI->getLCOMMDirective() << name << ',' << Size;
+        O << MAI->getLCOMMDirective() << name << ',' << Size;
       } else {
         O << ".comm " << name << ',' << Size;
       }
       if (VerboseAsm) {
-        O << "\t\t" << TAI->getCommentString() << " '";
-        PrintUnmangledNameSafely(GVar, O);
+        O << "\t\t" << MAI->getCommentString() << " '";
+        WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
         O << "'";
       }
       O << '\n';
@@ -693,6 +735,7 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
    case GlobalValue::WeakAnyLinkage:
    case GlobalValue::WeakODRLinkage:
    case GlobalValue::CommonLinkage:
+   case GlobalValue::LinkerPrivateLinkage:
     O << "\t.global " << name << '\n'
       << "\t.type " << name << ", @object\n"
       << "\t.weak " << name << '\n';
@@ -709,15 +752,14 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
    case GlobalValue::PrivateLinkage:
     break;
    default:
-    cerr << "Unknown linkage type!";
-    abort();
+    llvm_unreachable("Unknown linkage type!");
   }
 
   EmitAlignment(Align, GVar);
   O << name << ":";
   if (VerboseAsm) {
-    O << "\t\t\t\t" << TAI->getCommentString() << " '";
-    PrintUnmangledNameSafely(GVar, O);
+    O << "\t\t\t\t" << MAI->getCommentString() << " '";
+    WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
     O << "'";
   }
   O << '\n';
@@ -727,10 +769,20 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 }
 
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  // Print out module-level global variables here.
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    printModuleLevelGV(I);
+  const TargetData *TD = TM.getTargetData();
+
+  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+  if (isPPC64 && !TOC.empty()) {
+    // FIXME 64-bit SVR4: Use MCSection here?
+    O << "\t.section\t\".toc\",\"aw\"\n";
+
+    for (StringMap<std::string>::iterator I = TOC.begin(), E = TOC.end();
+         I != E; ++I) {
+      O << I->second << ":\n";
+      O << "\t.tc " << I->getKeyData() << "[TC]," << I->getKeyData() << '\n';
+    }
+  }
 
   return AsmPrinter::doFinalization(M);
 }
@@ -749,10 +801,10 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // Print out labels for the function.
   const Function *F = MF.getFunction();
-  SwitchToSection(TAI->SectionForGlobal(F));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
 
   switch (F->getLinkage()) {
-  default: assert(0 && "Unknown linkage type!");
+  default: llvm_unreachable("Unknown linkage type!");
   case Function::PrivateLinkage:
   case Function::InternalLinkage:  // Symbols default to internal.
     break;
@@ -763,6 +815,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   case Function::WeakODRLinkage:
   case Function::LinkOnceAnyLinkage:
   case Function::LinkOnceODRLinkage:
+  case Function::LinkerPrivateLinkage:
     O << "\t.globl\t" << CurrentFnName << '\n';
     O << "\t.weak_definition\t" << CurrentFnName << '\n';
     break;
@@ -789,8 +842,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true, VerboseAsm);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
@@ -810,7 +862,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 }
 
 
-bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
   static const char *const CPUDirectives[] = {
     "",
     "ppc",
@@ -833,26 +885,28 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
   assert(Directive <= PPC::DIR_64 && "Directive out of range.");
   O << "\t.machine " << CPUDirectives[Directive] << '\n';
 
-  bool Result = AsmPrinter::doInitialization(M);
-  assert(MMI);
-
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
-  SwitchToTextSection("\t.section __TEXT,__textcoal_nt,coalesced,"
-                      "pure_instructions");
+  TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
   if (TM.getRelocationModel() == Reloc::PIC_) {
-    SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs,"
-                          "pure_instructions,32");
+    OutStreamer.SwitchSection(
+            TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      32, SectionKind::getText()));
   } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
-    SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs,"
-                        "pure_instructions,16");
+    OutStreamer.SwitchSection(
+            TLOFMacho.getMachOSection("__TEXT","__symbol_stub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      16, SectionKind::getText()));
   }
-  SwitchToSection(TAI->getTextSection());
-
-  return Result;
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
 }
 
-void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+void PPCDarwinAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer())
@@ -869,8 +923,7 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
     return;
   }
 
-  std::string name = Mang->getValueName(GVar);
-
+  std::string name = Mang->getMangledName(GVar);
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
@@ -878,13 +931,17 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
-  SwitchToSection(TAI->SectionForGlobal(GVar));
+  const MCSection *TheSection =
+    getObjFileLowering().SectionForGlobal(GVar, Mang, TM);
+  OutStreamer.SwitchSection(TheSection);
 
+  /// FIXME: Drive this off the section!
   if (C->isNullValue() && /* FIXME: Verify correct */
       !GVar->hasSection() &&
       (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
        GVar->isWeakForLinker()) &&
-      TAI->SectionKindForGlobal(GVar) != SectionKind::RODataMergeStr) {
+      // Don't put things that should go in the cstring section into "comm".
+      !TheSection->getKind().isMergeableCString()) {
     if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
 
     if (GVar->hasExternalLinkage()) {
@@ -892,15 +949,15 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
       O << "\t.zerofill __DATA, __common, " << name << ", "
         << Size << ", " << Align;
     } else if (GVar->hasLocalLinkage()) {
-      O << TAI->getLCOMMDirective() << name << ',' << Size << ',' << Align;
+      O << MAI->getLCOMMDirective() << name << ',' << Size << ',' << Align;
     } else if (!GVar->hasCommonLinkage()) {
       O << "\t.globl " << name << '\n'
-        << TAI->getWeakDefDirective() << name << '\n';
+        << MAI->getWeakDefDirective() << name << '\n';
       EmitAlignment(Align, GVar);
       O << name << ":";
       if (VerboseAsm) {
-        O << "\t\t\t\t" << TAI->getCommentString() << " ";
-        PrintUnmangledNameSafely(GVar, O);
+        O << "\t\t\t\t" << MAI->getCommentString() << " ";
+        WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
       }
       O << '\n';
       EmitGlobalConstant(C);
@@ -912,8 +969,8 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
         O << ',' << Align;
     }
     if (VerboseAsm) {
-      O << "\t\t" << TAI->getCommentString() << " '";
-      PrintUnmangledNameSafely(GVar, O);
+      O << "\t\t" << MAI->getCommentString() << " '";
+      WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
       O << "'";
     }
     O << '\n';
@@ -926,6 +983,7 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
    case GlobalValue::WeakAnyLinkage:
    case GlobalValue::WeakODRLinkage:
    case GlobalValue::CommonLinkage:
+   case GlobalValue::LinkerPrivateLinkage:
     O << "\t.globl " << name << '\n'
       << "\t.weak_definition " << name << '\n';
     break;
@@ -940,15 +998,14 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
    case GlobalValue::PrivateLinkage:
     break;
    default:
-    cerr << "Unknown linkage type!";
-    abort();
+    llvm_unreachable("Unknown linkage type!");
   }
 
   EmitAlignment(Align, GVar);
   O << name << ":";
   if (VerboseAsm) {
-    O << "\t\t\t\t" << TAI->getCommentString() << " '";
-    PrintUnmangledNameSafely(GVar, O);
+    O << "\t\t\t\t" << MAI->getCommentString() << " '";
+    WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
     O << "'";
   }
   O << '\n';
@@ -960,141 +1017,110 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   const TargetData *TD = TM.getTargetData();
 
-  // Print out module-level global variables here.
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    printModuleLevelGV(I);
-
   bool isPPC64 = TD->getPointerSizeInBits() == 64;
 
+  // Darwin/PPC always uses mach-o.
+  TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+  
+  const MCSection *LSPSection = 0;
+  if (!FnStubs.empty()) // .lazy_symbol_pointer
+    LSPSection = TLOFMacho.getLazySymbolPointerSection();
+    
+  
   // Output stubs for dynamically-linked functions
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
-         i != e; ++i) {
-      SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs,"
-                          "pure_instructions,32");
+  if (TM.getRelocationModel() == Reloc::PIC_ && !FnStubs.empty()) {
+    const MCSection *StubSection = 
+      TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1",
+                                MCSectionMachO::S_SYMBOL_STUBS |
+                                MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                32, SectionKind::getText());
+     for (StringMap<FnStubInfo>::iterator I = FnStubs.begin(), E = FnStubs.end();
+         I != E; ++I) {
+      OutStreamer.SwitchSection(StubSection);
       EmitAlignment(4);
-      const char *p = i->getKeyData();
-      bool hasQuote = p[0]=='\"';
-      printSuffixedName(p, "$stub");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << '\n';
+      const FnStubInfo &Info = I->second;
+      O << Info.Stub << ":\n";
+      O << "\t.indirect_symbol " << I->getKeyData() << '\n';
       O << "\tmflr r0\n";
-      O << "\tbcl 20,31,";
-      if (hasQuote)
-        O << "\"L0$" << &p[1];
-      else
-        O << "L0$" << p;
-      O << '\n';
-      if (hasQuote)
-        O << "\"L0$" << &p[1];
-      else
-        O << "L0$" << p;
-      O << ":\n";
+      O << "\tbcl 20,31," << Info.AnonSymbol << '\n';
+      O << Info.AnonSymbol << ":\n";
       O << "\tmflr r11\n";
-      O << "\taddis r11,r11,ha16(";
-      printSuffixedName(p, "$lazy_ptr");
-      O << "-";
-      if (hasQuote)
-        O << "\"L0$" << &p[1];
-      else
-        O << "L0$" << p;
+      O << "\taddis r11,r11,ha16(" << Info.LazyPtr << "-" << Info.AnonSymbol;
       O << ")\n";
       O << "\tmtlr r0\n";
-      if (isPPC64)
-        O << "\tldu r12,lo16(";
-      else
-        O << "\tlwzu r12,lo16(";
-      printSuffixedName(p, "$lazy_ptr");
-      O << "-";
-      if (hasQuote)
-        O << "\"L0$" << &p[1];
-      else
-        O << "L0$" << p;
-      O << ")(r11)\n";
+      O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16(";
+      O << Info.LazyPtr << "-" << Info.AnonSymbol << ")(r11)\n";
       O << "\tmtctr r12\n";
       O << "\tbctr\n";
-      SwitchToDataSection(".lazy_symbol_pointer");
-      printSuffixedName(p, "$lazy_ptr");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << '\n';
-      if (isPPC64)
-        O << "\t.quad dyld_stub_binding_helper\n";
-      else
-        O << "\t.long dyld_stub_binding_helper\n";
+      
+      OutStreamer.SwitchSection(LSPSection);
+      O << Info.LazyPtr << ":\n";
+      O << "\t.indirect_symbol " << I->getKeyData() << '\n';
+      O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n";
     }
-  } else {
-    for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
-         i != e; ++i) {
-      SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs,"
-                          "pure_instructions,16");
+  } else if (!FnStubs.empty()) {
+    const MCSection *StubSection =
+      TLOFMacho.getMachOSection("__TEXT","__symbol_stub1",
+                                MCSectionMachO::S_SYMBOL_STUBS |
+                                MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                16, SectionKind::getText());
+    
+    for (StringMap<FnStubInfo>::iterator I = FnStubs.begin(), E = FnStubs.end();
+         I != E; ++I) {
+      OutStreamer.SwitchSection(StubSection);
       EmitAlignment(4);
-      const char *p = i->getKeyData();
-      printSuffixedName(p, "$stub");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << '\n';
-      O << "\tlis r11,ha16(";
-      printSuffixedName(p, "$lazy_ptr");
-      O << ")\n";
-      if (isPPC64)
-        O << "\tldu r12,lo16(";
-      else
-        O << "\tlwzu r12,lo16(";
-      printSuffixedName(p, "$lazy_ptr");
-      O << ")(r11)\n";
+      const FnStubInfo &Info = I->second;
+      O << Info.Stub << ":\n";
+      O << "\t.indirect_symbol " << I->getKeyData() << '\n';
+      O << "\tlis r11,ha16(" << Info.LazyPtr << ")\n";
+      O << (isPPC64 ? "\tldu" :  "\tlwzu") << " r12,lo16(";
+      O << Info.LazyPtr << ")(r11)\n";
       O << "\tmtctr r12\n";
       O << "\tbctr\n";
-      SwitchToDataSection(".lazy_symbol_pointer");
-      printSuffixedName(p, "$lazy_ptr");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << '\n';
-      if (isPPC64)
-        O << "\t.quad dyld_stub_binding_helper\n";
-      else
-        O << "\t.long dyld_stub_binding_helper\n";
+      OutStreamer.SwitchSection(LSPSection);
+      O << Info.LazyPtr << ":\n";
+      O << "\t.indirect_symbol " << I->getKeyData() << '\n';
+      O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n";
     }
   }
 
   O << '\n';
 
-  if (TAI->doesSupportExceptionHandling() && MMI) {
+  if (MAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
     const std::vector<Function *> &Personalities = MMI->getPersonalities();
     for (std::vector<Function *>::const_iterator I = Personalities.begin(),
-           E = Personalities.end(); I != E; ++I)
-      if (*I) GVStubs.insert("_" + (*I)->getName());
+         E = Personalities.end(); I != E; ++I) {
+      if (*I)
+        GVStubs[Mang->getMangledName(*I)] =
+          Mang->getMangledName(*I, "$non_lazy_ptr", true);
+    }
   }
 
-  // Output stubs for external and common global variables.
+  // Output macho stubs for external and common global variables.
   if (!GVStubs.empty()) {
-    SwitchToDataSection(".non_lazy_symbol_pointer");
-    for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
-         i != e; ++i) {
-      std::string p = i->getKeyData();
-      printSuffixedName(p, "$non_lazy_ptr");
-      O << ":\n";
-      O << "\t.indirect_symbol " << p << '\n';
-      if (isPPC64)
-        O << "\t.quad\t0\n";
-      else
-        O << "\t.long\t0\n";
+    // Switch with ".non_lazy_symbol_pointer" directive.
+    OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (StringMap<std::string>::iterator I = GVStubs.begin(),
+         E = GVStubs.end(); I != E; ++I) {
+      O << I->second << ":\n";
+      O << "\t.indirect_symbol " << I->getKeyData() << '\n';
+      O << (isPPC64 ? "\t.quad\t0\n" : "\t.long\t0\n");
     }
   }
 
   if (!HiddenGVStubs.empty()) {
-    SwitchToSection(TAI->getDataSection());
-    for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
-         i != e; ++i) {
-      std::string p = i->getKeyData();
-      EmitAlignment(isPPC64 ? 3 : 2);
-      printSuffixedName(p, "$non_lazy_ptr");
-      O << ":\n";
-      if (isPPC64)
-        O << "\t.quad\t";
-      else
-        O << "\t.long\t";
-      O << p << '\n';
+    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    for (StringMap<std::string>::iterator I = HiddenGVStubs.begin(),
+         E = HiddenGVStubs.end(); I != E; ++I) {
+      O << I->second << ":\n";
+      O << (isPPC64 ? "\t.quad\t" : "\t.long\t") << I->getKeyData() << '\n';
     }
   }
 
@@ -1114,28 +1140,19 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 /// for a MachineFunction to the given output stream, in a format that the
 /// Darwin assembler can deal with.
 ///
-FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o,
-                                            PPCTargetMachine &tm,
-                                            bool verbose) {
+static AsmPrinter *createPPCAsmPrinterPass(formatted_raw_ostream &o,
+                                           TargetMachine &tm,
+                                           const MCAsmInfo *tai,
+                                           bool verbose) {
   const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
 
-  if (Subtarget->isDarwin()) {
-    return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-  } else {
-    return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-  }
-}
-
-namespace {
-  static struct Register {
-    Register() {
-      PPCTargetMachine::registerAsmPrinter(createPPCAsmPrinterPass);
-    }
-  } Registrator;
+  if (Subtarget->isDarwin())
+    return new PPCDarwinAsmPrinter(o, tm, tai, verbose);
+  return new PPCLinuxAsmPrinter(o, tm, tai, verbose);
 }
 
-extern "C" int PowerPCAsmPrinterForceLink;
-int PowerPCAsmPrinterForceLink = 0;
-
 // Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() { }
+extern "C" void LLVMInitializePowerPCAsmPrinter() { 
+  TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index a6479d81a6f1..bdd6d3623980 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -20,10 +20,10 @@ add_llvm_target(PowerPCCodeGen
   PPCISelLowering.cpp
   PPCJITInfo.cpp
   PPCMachOWriterInfo.cpp
+  PPCMCAsmInfo.cpp
   PPCPredicates.cpp
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
-  PPCTargetAsmInfo.cpp
   PPCTargetMachine.cpp
   )
 
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index db688970e9e7..4015d4aa190d 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -17,6 +17,6 @@ BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
                 PPCGenSubtarget.inc PPCGenCallingConv.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f6c3469908b9..7b98268bd83d 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -24,16 +24,21 @@ namespace llvm {
   class PPCTargetMachine;
   class FunctionPass;
   class MachineCodeEmitter;
-  class raw_ostream;
+  class ObjectCodeEmitter;
+  class formatted_raw_ostream;
   
 FunctionPass *createPPCBranchSelectionPass();
 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, PPCTargetMachine &TM,
-                                      bool Verbose);
 FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
-                                       JITCodeEmitter &MCE);
+                                          JITCodeEmitter &MCE);
+FunctionPass *createPPCObjectCodeEmitterPass(PPCTargetMachine &TM,
+                                             ObjectCodeEmitter &OCE);
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+
 } // end namespace llvm;
 
 // Defines symbolic names for PowerPC registers.  This defines a mapping from
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index cd6018de490b..0675293e1144 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -19,12 +19,15 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
@@ -55,8 +58,7 @@ namespace {
 
   template <class CodeEmitter>
   class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
-      public PPCCodeEmitter
-  {
+      public PPCCodeEmitter {
     TargetMachine &TM;
     CodeEmitter &MCE;
 
@@ -88,9 +90,10 @@ namespace {
   template <class CodeEmitter>
     char Emitter<CodeEmitter>::ID = 0;
 }
-	
+
 /// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
 /// to the specified MCE object.
+
 FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM,
                                              MachineCodeEmitter &MCE) {
   return new Emitter<MachineCodeEmitter>(TM, MCE);
@@ -101,6 +104,11 @@ FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
 
+FunctionPass *llvm::createPPCObjectCodeEmitterPass(PPCTargetMachine &TM,
+                                                   ObjectCodeEmitter &OCE) {
+  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+}
+
 template <class CodeEmitter>
 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
@@ -121,9 +129,10 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
 template <class CodeEmitter>
 void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
   MCE.StartMachineBasicBlock(&MBB);
-  
+
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
     const MachineInstr &MI = *I;
+    MCE.processDebugLoc(MI.getDebugLoc(), true);
     switch (MI.getOpcode()) {
     default:
       MCE.emitWordBE(getBinaryCodeForInstr(MI));
@@ -133,6 +142,7 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
       MCE.emitLabel(MI.getOperand(0).getImm());
       break;
     case TargetInstrInfo::IMPLICIT_DEF:
+    case TargetInstrInfo::KILL:
       break; // pseudo opcode, no side effects
     case PPC::MovePCtoLR:
     case PPC::MovePCtoLR8:
@@ -141,6 +151,7 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
       MCE.emitWordBE(0x48000005);   // bl 1
       break;
     }
+    MCE.processDebugLoc(MI.getDebugLoc(), false);
   }
 }
 
@@ -172,7 +183,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
         assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
       }
       switch (MI.getOpcode()) {
-      default: MI.dump(); assert(0 && "Unknown instruction for relocation!");
+      default: MI.dump(); llvm_unreachable("Unknown instruction for relocation!");
       case PPC::LIS:
       case PPC::LIS8:
       case PPC::ADDIS:
@@ -193,7 +204,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       case PPC::LWZ8:
       case PPC::LFS:
       case PPC::LFD:
-      
+
       // Stores.
       case PPC::STB:
       case PPC::STB8:
@@ -214,7 +225,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
         break;
       }
     }
-    
+
     MachineRelocation R;
     if (MO.isGlobal()) {
       R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
@@ -231,7 +242,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
                                           Reloc, MO.getIndex(), 0);
     }
-    
+
     // If in PIC mode, we need to encode the negated address of the
     // 'movepctolr' into the unrelocated field.  After relocation, we'll have
     // &gv-&movepctolr-4 in the imm field.  Once &movepctolr is added to the imm
@@ -242,7 +253,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4);
     }
     MCE.addRelocation(R);
-    
+
   } else if (MO.isMBB()) {
     unsigned Reloc = 0;
     unsigned Opcode = MI.getOpcode();
@@ -252,15 +263,17 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       Reloc = PPC::reloc_pcrel_bx;
     else // BCC instruction
       Reloc = PPC::reloc_pcrel_bcx;
+
     MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                                Reloc, MO.getMBB()));
   } else {
-    cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+#endif
+    llvm_unreachable(0);
   }
 
   return rv;
 }
 
 #include "PPCGenCodeEmitter.inc"
-
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
index 770a560ccf4e..65f113e6fb9a 100644
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -31,33 +31,32 @@ public:
 
   /// getReturnSaveOffset - Return the previous frame offset to save the
   /// return address.
-  static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) {
+  static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
     if (isDarwinABI)
-      return LP64 ? 16 : 8;
+      return isPPC64 ? 16 : 8;
     // SVR4 ABI:
-    return 4;
+    return isPPC64 ? 16 : 4;
   }
 
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
   /// frame pointer.
-  static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) {
+  static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
     // For the Darwin ABI:
     // Use the TOC save slot in the PowerPC linkage area for saving the frame
     // pointer (if needed.)  LLVM does not generate code that uses the TOC (R2
     // is treated as a caller saved register.)
     if (isDarwinABI)
-      return LP64 ? 40 : 20;
+      return isPPC64 ? 40 : 20;
     
-    // SVR4 ABI:
-    // Save it right before the link register
+    // SVR4 ABI: First slot in the general register save area.
     return -4U;
   }
   
   /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
   ///
-  static unsigned getLinkageSize(bool LP64, bool isDarwinABI) {
-    if (isDarwinABI)
-      return 6 * (LP64 ? 8 : 4);
+  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI || isPPC64)
+      return 6 * (isPPC64 ? 8 : 4);
     
     // SVR4 ABI:
     return 8;
@@ -65,118 +64,222 @@ public:
 
   /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
   /// argument area.
-  static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) {
-    // For the Darwin ABI:
+  static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI / 64-bit SVR4 ABI:
     // The prolog code of the callee may store up to 8 GPR argument registers to
     // the stack, allowing va_start to index over them in memory if its varargs.
     // Because we cannot tell if this is needed on the caller side, we have to
     // conservatively assume that it is needed.  As such, make sure we have at
     // least enough stack space for the caller to store the 8 GPRs.
-    if (isDarwinABI)
-      return 8 * (LP64 ? 8 : 4);
+    if (isDarwinABI || isPPC64)
+      return 8 * (isPPC64 ? 8 : 4);
     
-    // SVR4 ABI:
+    // 32-bit SVR4 ABI:
     // There is no default stack allocated for the 8 first GPR arguments.
     return 0;
   }
 
   /// getMinCallFrameSize - Return the minimum size a call frame can be using
   /// the PowerPC ABI.
-  static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) {
+  static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
     // The call frame needs to be at least big enough for linkage and 8 args.
-    return getLinkageSize(LP64, isDarwinABI) +
-           getMinCallArgumentsSize(LP64, isDarwinABI);
+    return getLinkageSize(isPPC64, isDarwinABI) +
+           getMinCallArgumentsSize(isPPC64, isDarwinABI);
   }
 
   // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
-  const std::pair<unsigned, int> *
+  const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const {
     // Early exit if not using the SVR4 ABI.
     if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
       NumEntries = 0;
       return 0;
     }
-    
-    static const std::pair<unsigned, int> Offsets[] = {
+
+    static const SpillSlot Offsets[] = {
       // Floating-point register save area offsets.
-      std::pair<unsigned, int>(PPC::F31, -8),
-      std::pair<unsigned, int>(PPC::F30, -16),
-      std::pair<unsigned, int>(PPC::F29, -24),
-      std::pair<unsigned, int>(PPC::F28, -32),
-      std::pair<unsigned, int>(PPC::F27, -40),
-      std::pair<unsigned, int>(PPC::F26, -48),
-      std::pair<unsigned, int>(PPC::F25, -56),
-      std::pair<unsigned, int>(PPC::F24, -64),
-      std::pair<unsigned, int>(PPC::F23, -72),
-      std::pair<unsigned, int>(PPC::F22, -80),
-      std::pair<unsigned, int>(PPC::F21, -88),
-      std::pair<unsigned, int>(PPC::F20, -96),
-      std::pair<unsigned, int>(PPC::F19, -104),
-      std::pair<unsigned, int>(PPC::F18, -112),
-      std::pair<unsigned, int>(PPC::F17, -120),
-      std::pair<unsigned, int>(PPC::F16, -128),
-      std::pair<unsigned, int>(PPC::F15, -136),
-      std::pair<unsigned, int>(PPC::F14, -144),
-        
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
       // General register save area offsets.
-      std::pair<unsigned, int>(PPC::R31, -4),
-      std::pair<unsigned, int>(PPC::R30, -8),
-      std::pair<unsigned, int>(PPC::R29, -12),
-      std::pair<unsigned, int>(PPC::R28, -16),
-      std::pair<unsigned, int>(PPC::R27, -20),
-      std::pair<unsigned, int>(PPC::R26, -24),
-      std::pair<unsigned, int>(PPC::R25, -28),
-      std::pair<unsigned, int>(PPC::R24, -32),
-      std::pair<unsigned, int>(PPC::R23, -36),
-      std::pair<unsigned, int>(PPC::R22, -40),
-      std::pair<unsigned, int>(PPC::R21, -44),
-      std::pair<unsigned, int>(PPC::R20, -48),
-      std::pair<unsigned, int>(PPC::R19, -52),
-      std::pair<unsigned, int>(PPC::R18, -56),
-      std::pair<unsigned, int>(PPC::R17, -60),
-      std::pair<unsigned, int>(PPC::R16, -64),
-      std::pair<unsigned, int>(PPC::R15, -68),
-      std::pair<unsigned, int>(PPC::R14, -72),
+      {PPC::R31, -4},
+      {PPC::R30, -8},
+      {PPC::R29, -12},
+      {PPC::R28, -16},
+      {PPC::R27, -20},
+      {PPC::R26, -24},
+      {PPC::R25, -28},
+      {PPC::R24, -32},
+      {PPC::R23, -36},
+      {PPC::R22, -40},
+      {PPC::R21, -44},
+      {PPC::R20, -48},
+      {PPC::R19, -52},
+      {PPC::R18, -56},
+      {PPC::R17, -60},
+      {PPC::R16, -64},
+      {PPC::R15, -68},
+      {PPC::R14, -72},
 
       // CR save area offset.
       // FIXME SVR4: Disable CR save area for now.
-//      std::pair<unsigned, int>(PPC::CR2, -4),
-//      std::pair<unsigned, int>(PPC::CR3, -4),
-//      std::pair<unsigned, int>(PPC::CR4, -4),
-//      std::pair<unsigned, int>(PPC::CR2LT, -4),
-//      std::pair<unsigned, int>(PPC::CR2GT, -4),
-//      std::pair<unsigned, int>(PPC::CR2EQ, -4),
-//      std::pair<unsigned, int>(PPC::CR2UN, -4),
-//      std::pair<unsigned, int>(PPC::CR3LT, -4),
-//      std::pair<unsigned, int>(PPC::CR3GT, -4),
-//      std::pair<unsigned, int>(PPC::CR3EQ, -4),
-//      std::pair<unsigned, int>(PPC::CR3UN, -4),
-//      std::pair<unsigned, int>(PPC::CR4LT, -4),
-//      std::pair<unsigned, int>(PPC::CR4GT, -4),
-//      std::pair<unsigned, int>(PPC::CR4EQ, -4),
-//      std::pair<unsigned, int>(PPC::CR4UN, -4),
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
 
       // VRSAVE save area offset.
-      std::pair<unsigned, int>(PPC::VRSAVE, -4),
-      
+      {PPC::VRSAVE, -4},
+
       // Vector register save area
-      std::pair<unsigned, int>(PPC::V31, -16),
-      std::pair<unsigned, int>(PPC::V30, -32),
-      std::pair<unsigned, int>(PPC::V29, -48),
-      std::pair<unsigned, int>(PPC::V28, -64),
-      std::pair<unsigned, int>(PPC::V27, -80),
-      std::pair<unsigned, int>(PPC::V26, -96),
-      std::pair<unsigned, int>(PPC::V25, -112),
-      std::pair<unsigned, int>(PPC::V24, -128),
-      std::pair<unsigned, int>(PPC::V23, -144),
-      std::pair<unsigned, int>(PPC::V22, -160),
-      std::pair<unsigned, int>(PPC::V21, -176),
-      std::pair<unsigned, int>(PPC::V20, -192)
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
     };
-    
-    NumEntries = array_lengthof(Offsets);
-    
-    return Offsets;
+
+    static const SpillSlot Offsets64[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+      //                    mode?
+      {PPC::R31, -4},
+      {PPC::R30, -12},
+      {PPC::R29, -20},
+      {PPC::R28, -28},
+      {PPC::R27, -36},
+      {PPC::R26, -44},
+      {PPC::R25, -52},
+      {PPC::R24, -60},
+      {PPC::R23, -68},
+      {PPC::R22, -76},
+      {PPC::R21, -84},
+      {PPC::R20, -92},
+      {PPC::R19, -100},
+      {PPC::R18, -108},
+      {PPC::R17, -116},
+      {PPC::R16, -124},
+      {PPC::R15, -132},
+      {PPC::R14, -140},
+
+      {PPC::X31, -8},
+      {PPC::X30, -16},
+      {PPC::X29, -24},
+      {PPC::X28, -32},
+      {PPC::X27, -40},
+      {PPC::X26, -48},
+      {PPC::X25, -56},
+      {PPC::X24, -64},
+      {PPC::X23, -72},
+      {PPC::X22, -80},
+      {PPC::X21, -88},
+      {PPC::X20, -96},
+      {PPC::X19, -104},
+      {PPC::X18, -112},
+      {PPC::X17, -120},
+      {PPC::X16, -128},
+      {PPC::X15, -136},
+      {PPC::X14, -144},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+      NumEntries = array_lengthof(Offsets64);
+
+      return Offsets64;
+    } else {
+      NumEntries = array_lengthof(Offsets);
+
+      return Offsets;
+    }
   }
 };
 
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index ec3e757651f4..6af7e0ffbc1a 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -17,6 +17,8 @@
 #include "PPCInstrInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -51,7 +53,7 @@ PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
 }
 
 void PPCHazardRecognizer970::EndDispatchGroup() {
-  DOUT << "=== Start of dispatch group\n";
+  DEBUG(errs() << "=== Start of dispatch group\n");
   NumIssued = 0;
   
   // Structural hazard info.
@@ -141,7 +143,7 @@ getHazardType(SUnit *SU) {
     return Hazard;
       
   switch (InstrType) {
-  default: assert(0 && "Unknown instruction type!");
+  default: llvm_unreachable("Unknown instruction type!");
   case PPCII::PPC970_FXU:
   case PPCII::PPC970_LSU:
   case PPCII::PPC970_FPU:
@@ -167,7 +169,7 @@ getHazardType(SUnit *SU) {
   if (isLoad && NumStores) {
     unsigned LoadSize;
     switch (Opcode) {
-    default: assert(0 && "Unknown load!");
+    default: llvm_unreachable("Unknown load!");
     case PPC::LBZ:   case PPC::LBZU:
     case PPC::LBZX:
     case PPC::LBZ8:  case PPC::LBZU8:
@@ -235,7 +237,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
   if (isStore) {
     unsigned ThisStoreSize;
     switch (Opcode) {
-    default: assert(0 && "Unknown store instruction!");
+    default: llvm_unreachable("Unknown store instruction!");
     case PPC::STB:    case PPC::STB8:
     case PPC::STBU:   case PPC::STBU8:
     case PPC::STBX:   case PPC::STBX8:
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 823e3162191e..8fa6a6614b97 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -20,6 +20,7 @@
 #include "PPCHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
@@ -31,6 +32,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -49,17 +52,12 @@ namespace {
         PPCLowering(*TM.getTargetLowering()),
         PPCSubTarget(*TM.getSubtargetImpl()) {}
     
-    virtual bool runOnFunction(Function &Fn) {
-      // Do not codegen any 'available_externally' functions at all, they have
-      // definitions outside the translation unit.
-      if (Fn.hasAvailableExternallyLinkage())
-        return false;
-
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
       // Make sure we re-emit a set of the global base reg if necessary
       GlobalBaseReg = 0;
-      SelectionDAGISel::runOnFunction(Fn);
+      SelectionDAGISel::runOnMachineFunction(MF);
       
-      InsertVRSaveCode(Fn);
+      InsertVRSaveCode(MF);
       return true;
     }
    
@@ -145,30 +143,14 @@ namespace {
     }
       
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-    /// inline asm expressions.
-    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+    /// inline asm expressions.  It is always correct to compute the value into
+    /// a register.  The case of adding a (possibly relocatable) constant to a
+    /// register can be improved, but it is wrong to substitute Reg+Reg for
+    /// Reg in an asm, because the load or store opcode would have to change.
+   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                               char ConstraintCode,
                                               std::vector<SDValue> &OutOps) {
-      SDValue Op0, Op1;
-      switch (ConstraintCode) {
-      default: return true;
-      case 'm':   // memory
-        if (!SelectAddrIdx(Op, Op, Op0, Op1))
-          SelectAddrImm(Op, Op, Op0, Op1);
-        break;
-      case 'o':   // offsetable
-        if (!SelectAddrImm(Op, Op, Op0, Op1)) {
-          Op0 = Op;
-          Op1 = getSmallIPtrImm(0);
-        }
-        break;
-      case 'v':   // not offsetable
-        SelectAddrIdxOnly(Op, Op, Op0, Op1);
-        break;
-      }
-      
-      OutOps.push_back(Op0);
-      OutOps.push_back(Op1);
+      OutOps.push_back(Op);
       return false;
     }
     
@@ -179,7 +161,7 @@ namespace {
     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
     virtual void InstructionSelect();
     
-    void InsertVRSaveCode(Function &Fn);
+    void InsertVRSaveCode(MachineFunction &MF);
 
     virtual const char *getPassName() const {
       return "PowerPC DAG->DAG Pattern Instruction Selection";
@@ -216,13 +198,12 @@ void PPCDAGToDAGISel::InstructionSelect() {
 /// InsertVRSaveCode - Once the entire function has been instruction selected,
 /// all virtual registers are created and all machine instructions are built,
 /// check to see if we need to save/restore VRSAVE.  If so, do it.
-void PPCDAGToDAGISel::InsertVRSaveCode(Function &F) {
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   // Check to see if this function uses vector registers, which means we have to
   // save and restore the VRSAVE register and update it with the regs we use.  
   //
   // In this case, there will be virtual registers of vector type type created
   // by the scheduler.  Detect them now.
-  MachineFunction &Fn = MachineFunction::get(&F);
   bool HasVectorVReg = false;
   for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, 
        e = RegInfo->getLastVirtReg()+1; i != e; ++i)
@@ -285,7 +266,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
   if (!GlobalBaseReg) {
     const TargetInstrInfo &TII = *TM.getInstrInfo();
     // Insert the set of GlobalBaseReg into the first MBB of the function
-    MachineBasicBlock &FirstMBB = BB->getParent()->front();
+    MachineBasicBlock &FirstMBB = MF->front();
     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
     DebugLoc dl = DebugLoc::getUnknownLoc();
 
@@ -488,7 +469,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
       SH &= 31;
       SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB),
                           getI32Imm(ME) };
-      return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
     }
   }
   return 0;
@@ -507,12 +488,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
       if (isInt32Immediate(RHS, Imm)) {
         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
         if (isUInt16(Imm))
-          return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS,
-                                                 getI32Imm(Imm & 0xFFFF)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
         // If this is a 16-bit signed immediate, fold it.
         if (isInt16((int)Imm))
-          return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS,
-                                                 getI32Imm(Imm & 0xFFFF)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
         
         // For non-equality comparisons, the default code would materialize the
         // constant, then compare against it, like this:
@@ -523,22 +504,22 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         //   xoris r0,r3,0x1234
         //   cmplwi cr0,r0,0x5678
         //   beq cr0,L6
-        SDValue Xor(CurDAG->getTargetNode(PPC::XORIS, dl, MVT::i32, LHS,
-                                            getI32Imm(Imm >> 16)), 0);
-        return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, Xor,
-                                               getI32Imm(Imm & 0xFFFF)), 0);
+        SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+                                           getI32Imm(Imm >> 16)), 0);
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+                                              getI32Imm(Imm & 0xFFFF)), 0);
       }
       Opc = PPC::CMPLW;
     } else if (ISD::isUnsignedIntSetCC(CC)) {
       if (isInt32Immediate(RHS, Imm) && isUInt16(Imm))
-        return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS,
-                                               getI32Imm(Imm & 0xFFFF)), 0);
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+                                              getI32Imm(Imm & 0xFFFF)), 0);
       Opc = PPC::CMPLW;
     } else {
       short SImm;
       if (isIntS16Immediate(RHS, SImm))
-        return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS,
-                                               getI32Imm((int)SImm & 0xFFFF)),
+        return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+                                              getI32Imm((int)SImm & 0xFFFF)),
                          0);
       Opc = PPC::CMPW;
     }
@@ -548,12 +529,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
       if (isInt64Immediate(RHS.getNode(), Imm)) {
         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
         if (isUInt16(Imm))
-          return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS,
-                                                 getI32Imm(Imm & 0xFFFF)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
         // If this is a 16-bit signed immediate, fold it.
         if (isInt16(Imm))
-          return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS,
-                                                 getI32Imm(Imm & 0xFFFF)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
         
         // For non-equality comparisons, the default code would materialize the
         // constant, then compare against it, like this:
@@ -565,23 +546,23 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         //   cmpldi cr0,r0,0x5678
         //   beq cr0,L6
         if (isUInt32(Imm)) {
-          SDValue Xor(CurDAG->getTargetNode(PPC::XORIS8, dl, MVT::i64, LHS,
-                                              getI64Imm(Imm >> 16)), 0);
-          return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, Xor,
-                                                 getI64Imm(Imm & 0xFFFF)), 0);
+          SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+                                             getI64Imm(Imm >> 16)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+                                                getI64Imm(Imm & 0xFFFF)), 0);
         }
       }
       Opc = PPC::CMPLD;
     } else if (ISD::isUnsignedIntSetCC(CC)) {
       if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm))
-        return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS,
-                                               getI64Imm(Imm & 0xFFFF)), 0);
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+                                              getI64Imm(Imm & 0xFFFF)), 0);
       Opc = PPC::CMPLD;
     } else {
       short SImm;
       if (isIntS16Immediate(RHS, SImm))
-        return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS,
-                                               getI64Imm(SImm & 0xFFFF)),
+        return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+                                              getI64Imm(SImm & 0xFFFF)),
                          0);
       Opc = PPC::CMPD;
     }
@@ -591,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
     assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
     Opc = PPC::FCMPUD;
   }
-  return SDValue(CurDAG->getTargetNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
 }
 
 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
@@ -600,8 +581,8 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
   case ISD::SETONE:
   case ISD::SETOLE:
   case ISD::SETOGE:
-    assert(0 && "Should be lowered by legalize!");
-  default: assert(0 && "Unknown condition!"); abort();
+    llvm_unreachable("Should be lowered by legalize!");
+  default: llvm_unreachable("Unknown condition!");
   case ISD::SETOEQ:
   case ISD::SETEQ:  return PPC::PRED_EQ;
   case ISD::SETUNE:
@@ -632,7 +613,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
   Invert = false;
   Other = -1;
   switch (CC) {
-  default: assert(0 && "Unknown condition!"); abort();
+  default: llvm_unreachable("Unknown condition!");
   case ISD::SETOLT:
   case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
   case ISD::SETOGT:
@@ -651,7 +632,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
   case ISD::SETOGE: 
   case ISD::SETOLE: 
   case ISD::SETONE:
-    assert(0 && "Invalid branch code: should be expanded by legalize");
+    llvm_unreachable("Invalid branch code: should be expanded by legalize");
   // These are invalid for floating point.  Assume integer.
   case ISD::SETULT: return 0;
   case ISD::SETUGT: return 1;
@@ -673,14 +654,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
       switch (CC) {
       default: break;
       case ISD::SETEQ: {
-        Op = SDValue(CurDAG->getTargetNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+        Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
         SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
       }
       case ISD::SETNE: {
         SDValue AD =
-          SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
-                                          Op, getI32Imm(~0U)), 0);
+          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+                                         Op, getI32Imm(~0U)), 0);
         return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, 
                                     AD.getValue(1));
       }
@@ -690,8 +671,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
       }
       case ISD::SETGT: {
         SDValue T =
-          SDValue(CurDAG->getTargetNode(PPC::NEG, dl, MVT::i32, Op), 0);
-        T = SDValue(CurDAG->getTargetNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+          SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+        T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
         SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
       }
@@ -701,31 +682,31 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
       switch (CC) {
       default: break;
       case ISD::SETEQ:
-        Op = SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
-                                             Op, getI32Imm(1)), 0);
+        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+                                            Op, getI32Imm(1)), 0);
         return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 
-                              SDValue(CurDAG->getTargetNode(PPC::LI, dl, 
-                                                            MVT::i32,
-                                                            getI32Imm(0)), 0),
+                              SDValue(CurDAG->getMachineNode(PPC::LI, dl, 
+                                                             MVT::i32,
+                                                             getI32Imm(0)), 0),
                                       Op.getValue(1));
       case ISD::SETNE: {
-        Op = SDValue(CurDAG->getTargetNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
-        SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
-                                           Op, getI32Imm(~0U));
+        Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+                                            Op, getI32Imm(~0U));
         return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
                                     Op, SDValue(AD, 1));
       }
       case ISD::SETLT: {
-        SDValue AD = SDValue(CurDAG->getTargetNode(PPC::ADDI, dl, MVT::i32, Op,
-                                                       getI32Imm(1)), 0);
-        SDValue AN = SDValue(CurDAG->getTargetNode(PPC::AND, dl, MVT::i32, AD,
-                                                       Op), 0);
+        SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+                                                    getI32Imm(1)), 0);
+        SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+                                                    Op), 0);
         SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
       }
       case ISD::SETGT: {
         SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
-        Op = SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 
                      0);
         return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, 
                                     getI32Imm(1));
@@ -748,10 +729,10 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
                                InFlag).getValue(1);
   
   if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
-    IntCR = SDValue(CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
-                                            CCReg), 0);
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+                                           CCReg), 0);
   else
-    IntCR = SDValue(CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
   
   SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
                       getI32Imm(31), getI32Imm(31) };
@@ -760,7 +741,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
 
   // Get the specified bit.
   SDValue Tmp =
-    SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
   if (Inv) {
     assert(OtherCondIdx == -1 && "Can't have split plus negation");
     return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
@@ -772,7 +753,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
   // Get the other bit of the comparison.
   Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
   SDValue OtherCond = 
-    SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
 
   return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
 }
@@ -825,17 +806,17 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
       // Simple value.
       if (isInt16(Imm)) {
        // Just the Lo bits.
-        Result = CurDAG->getTargetNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+        Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
       } else if (Lo) {
         // Handle the Hi bits.
         unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
-        Result = CurDAG->getTargetNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+        Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
         // And Lo bits.
-        Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64,
-                                       SDValue(Result, 0), getI32Imm(Lo));
+        Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Lo));
       } else {
        // Just the Hi bits.
-        Result = CurDAG->getTargetNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+        Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
       }
       
       // If no shift, we're done.
@@ -843,19 +824,20 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
 
       // Shift for next step if the upper 32-bits were not zero.
       if (Imm) {
-        Result = CurDAG->getTargetNode(PPC::RLDICR, dl, MVT::i64,
-                                       SDValue(Result, 0),
-                                       getI32Imm(Shift), getI32Imm(63 - Shift));
+        Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+                                        SDValue(Result, 0),
+                                        getI32Imm(Shift),
+                                        getI32Imm(63 - Shift));
       }
 
       // Add in the last bits as required.
       if ((Hi = (Remainder >> 16) & 0xFFFF)) {
-        Result = CurDAG->getTargetNode(PPC::ORIS8, dl, MVT::i64,
-                                       SDValue(Result, 0), getI32Imm(Hi));
+        Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Hi));
       } 
       if ((Lo = Remainder & 0xFFFF)) {
-        Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64,
-                                       SDValue(Result, 0), getI32Imm(Lo));
+        Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Lo));
       }
       
       return Result;
@@ -875,18 +857,18 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
     if (N->hasOneUse())
       return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI,
                                   getSmallIPtrImm(0));
-    return CurDAG->getTargetNode(Opc, dl, Op.getValueType(), TFI,
-                                 getSmallIPtrImm(0));
+    return CurDAG->getMachineNode(Opc, dl, Op.getValueType(), TFI,
+                                  getSmallIPtrImm(0));
   }
 
   case PPCISD::MFCR: {
     SDValue InFlag = N->getOperand(1);
     // Use MFOCRF if supported.
     if (PPCSubTarget.isGigaProcessor())
-      return CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32,
-                                   N->getOperand(0), InFlag);
+      return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+                                    N->getOperand(0), InFlag);
     else
-      return CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, InFlag);
+      return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag);
   }
     
   case ISD::SDIV: {
@@ -900,17 +882,17 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
       SDValue N0 = N->getOperand(0);
       if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
         SDNode *Op =
-          CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
-                                N0, getI32Imm(Log2_32(Imm)));
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+                                 N0, getI32Imm(Log2_32(Imm)));
         return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 
                                     SDValue(Op, 0), SDValue(Op, 1));
       } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
         SDNode *Op =
-          CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
-                                N0, getI32Imm(Log2_32(-Imm)));
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+                                 N0, getI32Imm(Log2_32(-Imm)));
         SDValue PT =
-          SDValue(CurDAG->getTargetNode(PPC::ADDZE, dl, MVT::i32,
-                                          SDValue(Op, 0), SDValue(Op, 1)),
+          SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+                                         SDValue(Op, 0), SDValue(Op, 1)),
                     0);
         return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
       }
@@ -923,7 +905,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
   case ISD::LOAD: {
     // Handle preincrement loads.
     LoadSDNode *LD = cast<LoadSDNode>(Op);
-    MVT LoadedVT = LD->getMemoryVT();
+    EVT LoadedVT = LD->getMemoryVT();
     
     // Normal loads are handled by code generated from the .td file.
     if (LD->getAddressingMode() != ISD::PRE_INC)
@@ -938,8 +920,8 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
       if (LD->getValueType(0) != MVT::i64) {
         // Handle PPC32 integer and normal FP loads.
         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
-        switch (LoadedVT.getSimpleVT()) {
-          default: assert(0 && "Invalid PPC load type!");
+        switch (LoadedVT.getSimpleVT().SimpleTy) {
+          default: llvm_unreachable("Invalid PPC load type!");
           case MVT::f64: Opcode = PPC::LFDU; break;
           case MVT::f32: Opcode = PPC::LFSU; break;
           case MVT::i32: Opcode = PPC::LWZU; break;
@@ -950,8 +932,8 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
       } else {
         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
-        switch (LoadedVT.getSimpleVT()) {
-          default: assert(0 && "Invalid PPC load type!");
+        switch (LoadedVT.getSimpleVT().SimpleTy) {
+          default: llvm_unreachable("Invalid PPC load type!");
           case MVT::i64: Opcode = PPC::LDU; break;
           case MVT::i32: Opcode = PPC::LWZU8; break;
           case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
@@ -964,11 +946,11 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
       SDValue Base = LD->getBasePtr();
       SDValue Ops[] = { Offset, Base, Chain };
       // FIXME: PPC64
-      return CurDAG->getTargetNode(Opcode, dl, LD->getValueType(0),
-                                   PPCLowering.getPointerTy(),
-                                   MVT::Other, Ops, 3);
+      return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+                                    PPCLowering.getPointerTy(),
+                                    MVT::Other, Ops, 3);
     } else {
-      assert(0 && "R+R preindex loads not supported yet!");
+      llvm_unreachable("R+R preindex loads not supported yet!");
     }
   }
     
@@ -1008,7 +990,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                             N->getOperand(0).getOperand(1),
                             getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
-        return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
       }
     }
     
@@ -1058,8 +1040,8 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
               // FIXME: Implement this optzn for PPC64.
               N->getValueType(0) == MVT::i32) {
             SDNode *Tmp =
-              CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
-                                    N->getOperand(0), getI32Imm(~0U));
+              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+                                     N->getOperand(0), getI32Imm(~0U));
             return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
                                         SDValue(Tmp, 0), N->getOperand(0),
                                         SDValue(Tmp, 1));
@@ -1109,51 +1091,10 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
     SDValue Chain = N->getOperand(0);
     SDValue Target = N->getOperand(1);
     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
-    Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Target,
-                                            Chain), 0);
+    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+                                           Chain), 0);
     return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
   }
-  case ISD::DECLARE: {
-    SDValue Chain = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
-    SDValue N2 = N->getOperand(2);
-    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
-    
-    // FIXME: We need to handle this for VLAs.
-    if (!FINode) {
-      ReplaceUses(Op.getValue(0), Chain);
-      return NULL;
-    }
-    
-    if (N2.getOpcode() == ISD::ADD) {
-      if (N2.getOperand(0).getOpcode() == ISD::ADD &&
-          N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg &&
-          N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Hi &&
-          N2.getOperand(1).getOpcode() == PPCISD::Lo)
-        N2 = N2.getOperand(0).getOperand(1).getOperand(0);
-      else if (N2.getOperand(0).getOpcode() == ISD::ADD &&
-          N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg &&
-          N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Lo &&
-               N2.getOperand(1).getOpcode() == PPCISD::Hi)
-        N2 = N2.getOperand(0).getOperand(1).getOperand(0);
-      else if (N2.getOperand(0).getOpcode() == PPCISD::Hi &&
-               N2.getOperand(1).getOpcode() == PPCISD::Lo)
-        N2 = N2.getOperand(0).getOperand(0);
-    }
-    
-    // If we don't have a global address here, the debug info is mangled, just
-    // drop it.
-    if (!isa<GlobalAddressSDNode>(N2)) {
-      ReplaceUses(Op.getValue(0), Chain);
-      return NULL;
-    }
-    int FI = cast<FrameIndexSDNode>(N1)->getIndex();
-    GlobalValue *GV = cast<GlobalAddressSDNode>(N2)->getGlobal();
-    SDValue Tmp1 = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
-    return CurDAG->SelectNodeTo(N, TargetInstrInfo::DECLARE,
-                                MVT::Other, Tmp1, Tmp2, Chain);
-  }
   }
   
   return SelectCode(Op);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1c6b2877889f..3920b3815098 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -31,21 +31,24 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/DerivedTypes.h"
 using namespace llvm;
 
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                      CCValAssign::LocInfo &LocInfo,
                                      ISD::ArgFlagsTy &ArgFlags,
                                      CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                            MVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
+                                            EVT &LocVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
+                                              EVT &LocVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State);
@@ -54,8 +57,15 @@ static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
 cl::desc("enable preincrement load/store generation on PPC (experimental)"),
                                      cl::Hidden);
 
+static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
+  if (TM.getSubtargetImpl()->isDarwin())
+    return new TargetLoweringObjectFileMachO();
+  return new TargetLoweringObjectFileELF();
+}
+
+
 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
-  : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
+  : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
 
   setPow2DivIsCheap();
 
@@ -193,9 +203,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 
-  // RET must be custom lowered, to meet ABI requirements.
-  setOperationAction(ISD::RET               , MVT::Other, Custom);
-
   // TRAP is legal.
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
@@ -205,8 +212,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
-  // VAARG is custom lowered with the SVR4 ABI
-  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI())
+  // VAARG is custom lowered with the 32-bit SVR4 ABI.
+  if (    TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+      && !TM.getSubtarget<PPCSubtarget>().isPPC64())
     setOperationAction(ISD::VAARG, MVT::Other, Custom);
   else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -276,7 +284,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     // will selectively turn on ones that can be effectively codegen'd.
     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
          i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD , VT, Legal);
@@ -412,6 +420,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::VPERM:           return "PPCISD::VPERM";
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
+  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
   case PPCISD::SRL:             return "PPCISD::SRL";
@@ -421,6 +430,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::STD_32:          return "PPCISD::STD_32";
   case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
   case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
+  case PPCISD::NOP:             return "PPCISD::NOP";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
   case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
   case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
@@ -438,12 +448,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
   case PPCISD::MTFSF:           return "PPCISD::MTFSF";
-  case PPCISD::TAILCALL:        return "PPCISD::TAILCALL";
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   }
 }
 
-MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
   return MVT::i32;
 }
 
@@ -900,7 +909,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
 
       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
-      Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0);
+      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
       return true;
     }
   }
@@ -1012,7 +1021,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
         Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
         Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
         unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
-        Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0);
+        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
         return true;
       }
     }
@@ -1038,7 +1047,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (!EnablePPCPreinc) return false;
 
   SDValue Ptr;
-  MVT VT;
+  EVT VT;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     VT = LD->getMemoryVT();
@@ -1086,7 +1095,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
 
 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
                                              SelectionDAG &DAG) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   Constant *C = CP->getConstVal();
   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
@@ -1120,7 +1129,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
 }
 
 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
   SDValue Zero = DAG.getConstant(0, PtrVT);
@@ -1154,13 +1163,13 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                    SelectionDAG &DAG) {
-  assert(0 && "TLS not implemented for PPC.");
+  llvm_unreachable("TLS not implemented for PPC.");
   return SDValue(); // Not reached
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                               SelectionDAG &DAG) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
   GlobalValue *GV = GSDN->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
@@ -1170,6 +1179,13 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
 
   const TargetMachine &TM = DAG.getTarget();
 
+  // 64-bit SVR4 ABI code is always position-independent.
+  // The actual address of the GlobalValue is stored in the TOC.
+  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+    return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA,
+                       DAG.getRegister(PPC::X2, MVT::i64));
+  }
+
   SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
   SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
 
@@ -1191,7 +1207,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
 
   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
 
-  if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
+  if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM))
     return Lo;
 
   // If the global is weak or external, we have to go through the lazy
@@ -1208,7 +1224,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   // fold the new nodes.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
     if (C->isNullValue() && CC == ISD::SETEQ) {
-      MVT VT = Op.getOperand(0).getValueType();
+      EVT VT = Op.getOperand(0).getValueType();
       SDValue Zext = Op.getOperand(0);
       if (VT.bitsLT(MVT::i32)) {
         VT = MVT::i32;
@@ -1232,9 +1248,9 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   // condition register, reading it back out, and masking the correct bit.  The
   // normal approach here uses sub to do this instead of xor.  Using xor exposes
   // the result to other bit-twiddling opportunities.
-  MVT LHSVT = Op.getOperand(0).getValueType();
+  EVT LHSVT = Op.getOperand(0).getValueType();
   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
                                 Op.getOperand(1));
     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
@@ -1249,7 +1265,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                               unsigned VarArgsNumFPR,
                               const PPCSubtarget &Subtarget) {
 
-  assert(0 && "VAARG not yet implemented for the SVR4 ABI!");
+  llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
   return SDValue(); // Not reached
 }
 
@@ -1260,10 +1276,11 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
   DebugLoc dl = Op.getDebugLoc();
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = (PtrVT == MVT::i64);
   const Type *IntPtrTy =
-    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType();
+    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+                                                             *DAG.getContext());
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -1281,8 +1298,9 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
 
   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
   std::pair<SDValue, SDValue> CallResult =
-    LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
-                false, false, 0, CallingConv::C, false,
+    LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+                false, false, false, false, 0, CallingConv::C, false,
+                /*isReturnValueUsed=*/true,
                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
                 Args, DAG, dl);
 
@@ -1300,16 +1318,16 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
                                         const PPCSubtarget &Subtarget) {
   DebugLoc dl = Op.getDebugLoc();
 
-  if (Subtarget.isDarwinABI()) {
+  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
-    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
   }
 
-  // For the SVR4 ABI we follow the layout of the va_list struct.
+  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
   // We suppose the given va_list is already allocated.
   //
   // typedef struct {
@@ -1338,7 +1356,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
 
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
   SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
@@ -1380,15 +1398,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 #include "PPCGenCallingConv.inc"
 
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                      CCValAssign::LocInfo &LocInfo,
                                      ISD::ArgFlagsTy &ArgFlags,
                                      CCState &State) {
   return true;
 }
 
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                            MVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
+                                            EVT &LocVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State) {
@@ -1414,8 +1432,8 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
   return false;
 }
 
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
+                                              EVT &LocVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State) {
@@ -1442,29 +1460,20 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
 }
 
 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
-/// depending on which subtarget is selected.
-static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
-  if (Subtarget.isDarwinABI()) {
-    static const unsigned FPR[] = {
-      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-      PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
-    };
-    return FPR;
-  }
-
-
+/// on Darwin.
+static const unsigned *GetFPR() {
   static const unsigned FPR[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-    PPC::F8
+    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
   };
+
   return FPR;
 }
 
 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
 /// the stack.
-static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
                                        unsigned PtrByteSize) {
-  MVT ArgVT = Arg.getValueType();
   unsigned ArgSize = ArgVT.getSizeInBits()/8;
   if (Flags.isByVal())
     ArgSize = Flags.getByValSize();
@@ -1474,14 +1483,31 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
 }
 
 SDValue
-PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
-                                              SelectionDAG &DAG,
-                                              int &VarArgsFrameIndex,
-                                              int &VarArgsStackOffset,
-                                              unsigned &VarArgsNumGPR,
-                                              unsigned &VarArgsNumFPR,
-                                              const PPCSubtarget &Subtarget) {
-  // SVR4 ABI Stack Frame Layout:
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+    return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
+                                     dl, DAG, InVals);
+  } else {
+    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+                                       dl, DAG, InVals);
+  }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_SVR4(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) {
+
+  // 32-bit SVR4 ABI Stack Frame Layout:
   //              +-----------------------------------+
   //        +-->  |            Back chain             |
   //        |     +-----------------------------------+
@@ -1512,25 +1538,21 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
   
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  SmallVector<SDValue, 8> ArgValues;
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  DebugLoc dl = Op.getDebugLoc();
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
-  unsigned CC = MF.getFunction()->getCallingConv();
-  bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
+  bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast));
   unsigned PtrByteSize = 4;
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
   CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
 
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4);
+  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
   
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -1538,11 +1560,11 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
     // Arguments stored in registers.
     if (VA.isRegLoc()) {
       TargetRegisterClass *RC;
-      MVT ValVT = VA.getValVT();
+      EVT ValVT = VA.getValVT();
       
-      switch (ValVT.getSimpleVT()) {
+      switch (ValVT.getSimpleVT().SimpleTy) {
         default:
-          assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering");
+          llvm_unreachable("ValVT not supported by formal arguments Lowering");
         case MVT::i32:
           RC = PPC::GPRCRegisterClass;
           break;
@@ -1562,9 +1584,9 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
       
       // Transform the arguments stored in physical registers into virtual ones.
       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
 
-      ArgValues.push_back(ArgValue);
+      InVals.push_back(ArgValue);
     } else {
       // Argument stored in memory.
       assert(VA.isMemLoc());
@@ -1575,7 +1597,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
     }
   }
 
@@ -1583,12 +1605,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
   // Aggregates passed by value are stored in the local variable space of the
   // caller's stack frame, right above the parameter list area.
   SmallVector<CCValAssign, 16> ByValArgLocs;
-  CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
+  CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(),
+                      ByValArgLocs, *DAG.getContext());
 
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
 
-  CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal);
+  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
 
   // Area that is at least reserved in the caller of this function.
   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
@@ -1632,7 +1655,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
 
     // Make room for NumGPArgRegs and NumFPArgRegs.
     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
-                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
+                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
 
     VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
                                                 CCInfo.getNextStackOffset());
@@ -1645,7 +1668,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
     unsigned GPRIndex = 0;
     for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
       SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
-      SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1658,7 +1681,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
     for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
       unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
 
-      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
@@ -1666,18 +1689,18 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
 
-    // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is
-    // set.
+    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+    // is set.
     
     // The double arguments are stored to the VarArgsFrameIndex
     // on the stack.
     unsigned FPRIndex = 0;
     for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
       SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
-      SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
-      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
                                          PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
@@ -1685,47 +1708,40 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
     for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
       unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
 
-      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
-      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
                                          PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
   }
 
   if (!MemOps.empty())
-    Root = DAG.getNode(ISD::TokenFactor, dl,
-                       MVT::Other, &MemOps[0], MemOps.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
 
-  
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+  return Chain;
 }
 
 SDValue
-PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
-                                                SelectionDAG &DAG,
-                                                int &VarArgsFrameIndex,
-                                                const PPCSubtarget &Subtarget) {
+PPCTargetLowering::LowerFormalArguments_Darwin(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  SmallVector<SDValue, 8> ArgValues;
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  DebugLoc dl = Op.getDebugLoc();
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
   // Potential tail calls could cause overwriting of argument stack slots.
-  unsigned CC = MF.getFunction()->getCallingConv();
-  bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
+  bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
   unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
@@ -1741,7 +1757,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
 
-  static const unsigned *FPR = GetFPR(Subtarget);
+  static const unsigned *FPR = GetFPR();
 
   static const unsigned VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
@@ -1765,12 +1781,11 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
   // entire point of the following loop.
   unsigned VecArgOffset = ArgOffset;
   if (!isVarArg && !isPPC64) {
-    for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
+    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
          ++ArgNo) {
-      MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+      EVT ObjectVT = Ins[ArgNo].VT;
       unsigned ObjSize = ObjectVT.getSizeInBits()/8;
-      ISD::ArgFlagsTy Flags =
-        cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
 
       if (Flags.isByVal()) {
         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
@@ -1781,8 +1796,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
         continue;
       }
 
-      switch(ObjectVT.getSimpleVT()) {
-      default: assert(0 && "Unhandled argument type!");
+      switch(ObjectVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unhandled argument type!");
       case MVT::i32:
       case MVT::f32:
         VecArgOffset += isPPC64 ? 8 : 4;
@@ -1811,15 +1826,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
-  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
-       ArgNo != e; ++ArgNo) {
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
     SDValue ArgVal;
     bool needsLoad = false;
-    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+    EVT ObjectVT = Ins[ArgNo].VT;
     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
     unsigned ArgSize = ObjSize;
-    ISD::ArgFlagsTy Flags =
-      cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
 
     unsigned CurArgOffset = ArgOffset;
 
@@ -1828,13 +1841,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
       if (isVarArg || isPPC64) {
         MinReservedArea = ((MinReservedArea+15)/16)*16;
-        MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+        MinReservedArea += CalculateStackSlotSize(ObjectVT,
                                                   Flags,
                                                   PtrByteSize);
       } else  nAltivecParamsAtEnd++;
     } else
       // Calculate min reserved area.
-      MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
                                                 Flags,
                                                 PtrByteSize);
 
@@ -1852,11 +1865,11 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
       // The value of the object is its address.
       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgValues.push_back(FIN);
+      InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
-          SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
           MemOps.push_back(Store);
@@ -1875,7 +1888,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-          SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -1888,13 +1901,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
       continue;
     }
 
-    switch (ObjectVT.getSimpleVT()) {
-    default: assert(0 && "Unhandled argument type!");
+    switch (ObjectVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unhandled argument type!");
     case MVT::i32:
       if (!isPPC64) {
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
-          ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
           ++GPR_idx;
         } else {
           needsLoad = true;
@@ -1908,7 +1921,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
-        ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32) {
           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
@@ -1949,7 +1962,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
         else
           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
 
-        ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
       } else {
         needsLoad = true;
@@ -1966,7 +1979,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
       // except in varargs functions.
       if (VR_idx != Num_VR_Regs) {
         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
-        ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         if (isVarArg) {
           while ((ArgOffset % 16) != 0) {
             ArgOffset += PtrByteSize;
@@ -1974,7 +1987,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
               GPR_idx++;
           }
           ArgOffset += 16;
-          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
+          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
         }
         ++VR_idx;
       } else {
@@ -2000,10 +2013,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
                                       CurArgOffset + (ArgSize - ObjSize),
                                       isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
     }
 
-    ArgValues.push_back(ArgVal);
+    InVals.push_back(ArgVal);
   }
 
   // Set the size that is at least reserved in caller of this function.  Tail
@@ -2045,7 +2058,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
       else
         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
-      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
@@ -2055,14 +2068,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
   }
 
   if (!MemOps.empty())
-    Root = DAG.getNode(ISD::TokenFactor, dl,
-                       MVT::Other, &MemOps[0], MemOps.size());
-
-  ArgValues.push_back(Root);
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
 
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size());
+  return Chain;
 }
 
 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
@@ -2072,13 +2081,14 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                                      bool isPPC64,
                                      bool isVarArg,
                                      unsigned CC,
-                                     CallSDNode *TheCall,
+                                     const SmallVectorImpl<ISD::OutputArg>
+                                       &Outs,
                                      unsigned &nAltivecParamsAtEnd) {
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
   unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
-  unsigned NumOps = TheCall->getNumArgs();
+  unsigned NumOps = Outs.size();
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
   // Add up all the space actually used.
@@ -2089,9 +2099,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // 16-byte aligned.
   nAltivecParamsAtEnd = 0;
   for (unsigned i = 0; i != NumOps; ++i) {
-    SDValue Arg = TheCall->getArg(i);
-    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
-    MVT ArgVT = Arg.getValueType();
+    SDValue Arg = Outs[i].Val;
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    EVT ArgVT = Arg.getValueType();
     // Varargs Altivec parameters are padded to a 16 byte boundary.
     if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
         ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
@@ -2104,7 +2114,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
       NumBytes = ((NumBytes+15)/16)*16;
     }
-    NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize);
+    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
   }
 
    // Allow for Altivec parameters at the end, if needed.
@@ -2149,40 +2159,37 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
   return SPDiff;
 }
 
-/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-/// following the call is a return. A function is eligible if caller/callee
-/// calling conventions match, currently only fastcc supports tail calls, and
-/// the function CALL is immediatly followed by a RET.
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
 bool
-PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
-                                                     SDValue Ret,
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
   // Variable argument functions are not supported.
-  if (!PerformTailCallOpt || TheCall->isVarArg())
+  if (isVarArg)
     return false;
 
-  if (CheckTailCallReturnConstraints(TheCall, Ret)) {
-    MachineFunction &MF = DAG.getMachineFunction();
-    unsigned CallerCC = MF.getFunction()->getCallingConv();
-    unsigned CalleeCC = TheCall->getCallingConv();
-    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
-      // Functions containing by val parameters are not supported.
-      for (unsigned i = 0; i != TheCall->getNumArgs(); i++) {
-         ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
-         if (Flags.isByVal()) return false;
-      }
+  MachineFunction &MF = DAG.getMachineFunction();
+  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+    // Functions containing by val parameters are not supported.
+    for (unsigned i = 0; i != Ins.size(); i++) {
+       ISD::ArgFlagsTy Flags = Ins[i].Flags;
+       if (Flags.isByVal()) return false;
+    }
 
-      SDValue Callee = TheCall->getCallee();
-      // Non PIC/GOT  tail calls are supported.
-      if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-        return true;
+    // Non PIC/GOT  tail calls are supported.
+    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+      return true;
 
-      // At the moment we can only do local tail calls (in same module, hidden
-      // or protected) if we are generating PIC.
-      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-        return G->getGlobal()->hasHiddenVisibility()
-            || G->getGlobal()->hasProtectedVisibility();
-    }
+    // At the moment we can only do local tail calls (in same module, hidden
+    // or protected) if we are generating PIC.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+      return G->getGlobal()->hasHiddenVisibility()
+          || G->getGlobal()->hasProtectedVisibility();
   }
 
   return false;
@@ -2251,13 +2258,13 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                                    isDarwinABI);
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                           NewRetAddrLoc);
-    MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
                          PseudoSourceValue::getFixedStack(NewRetAddr), 0);
 
-    // When using the SVR4 ABI there is no need to move the FP stack slot
-    // as the FP is never overwritten.
+    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+    // slot as the FP is never overwritten.
     if (isDarwinABI) {
       int NewFPLoc =
         SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
@@ -2279,7 +2286,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
   int Offset = ArgOffset + SPDiff;
   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
-  MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
   SDValue FIN = DAG.getFrameIndex(FI, VT);
   TailCallArgumentInfo Info;
   Info.Arg = Arg;
@@ -2300,13 +2307,13 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                         DebugLoc dl) {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
-    MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+    EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
     
-    // When using the SVR4 ABI there is no need to load the FP stack slot
-    // as the FP is never overwritten.
+    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+    // slot as the FP is never overwritten.
     if (isDarwinABI) {
       FPOpOut = getFramePointerFrameIndex(DAG);
       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
@@ -2340,7 +2347,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                  bool isVector, SmallVector<SDValue, 8> &MemOpChains,
                  SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
                  DebugLoc dl) {
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   if (!isTailCall) {
     if (isVector) {
       SDValue StackPtr;
@@ -2389,9 +2396,9 @@ static
 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                      SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
                      SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
-                     SmallVector<SDValue, 8> &Ops, std::vector<MVT> &NodeTys,
+                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
                      bool isSVR4ABI) {
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   NodeTys.push_back(MVT::Other);   // Returns a chain
   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
 
@@ -2444,102 +2451,145 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   return CallOpc;
 }
 
-static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM,
-                               CallSDNode *TheCall, SDValue Chain,
-                               SDValue InFlag) {
-  bool isVarArg = TheCall->isVarArg();
-  DebugLoc dl = TheCall->getDebugLoc();
-  SmallVector<SDValue, 16> ResultVals;
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) {
+
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs);
-  CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
+  CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+                    RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
     CCValAssign &VA = RVLocs[i];
-    MVT VT = VA.getValVT();
+    EVT VT = VA.getValVT();
     assert(VA.isRegLoc() && "Can only return in registers!");
     Chain = DAG.getCopyFromReg(Chain, dl,
                                VA.getLocReg(), VT, InFlag).getValue(1);
-    ResultVals.push_back(Chain.getValue(0));
+    InVals.push_back(Chain.getValue(0));
     InFlag = Chain.getValue(2);
   }
 
-  // If the function returns void, just return the chain.
-  if (RVLocs.empty())
-    return Chain;
-
-  // Otherwise, merge everything together with a MERGE_VALUES node.
-  ResultVals.push_back(Chain);
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                            &ResultVals[0], ResultVals.size());
-  return Res.getValue(Op.getResNo());
+  return Chain;
 }
 
-static
-SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM,
-                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
-                   SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee,
-                   int SPDiff, unsigned NumBytes) {
-  unsigned CC = TheCall->getCallingConv();
-  DebugLoc dl = TheCall->getDebugLoc();
-  bool isTailCall = TheCall->isTailCall()
-                 && CC == CallingConv::Fast && PerformTailCallOpt;
-
-  std::vector<MVT> NodeTys;
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+                              bool isTailCall, bool isVarArg,
+                              SelectionDAG &DAG,
+                              SmallVector<std::pair<unsigned, SDValue>, 8>
+                                &RegsToPass,
+                              SDValue InFlag, SDValue Chain,
+                              SDValue &Callee,
+                              int SPDiff, unsigned NumBytes,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              SmallVectorImpl<SDValue> &InVals) {
+  std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
                                  isTailCall, RegsToPass, Ops, NodeTys,
-                                 TM.getSubtarget<PPCSubtarget>().isSVR4ABI());
+                                 PPCSubTarget.isSVR4ABI());
 
   // When performing tail call optimization the callee pops its arguments off
   // the stack. Account for this here so these bytes can be pushed back on in
   // PPCRegisterInfo::eliminateCallFramePseudoInstr.
   int BytesCalleePops =
-    (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+    (CallConv==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
 
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
   // Emit tail call.
   if (isTailCall) {
-    assert(InFlag.getNode() &&
-           "Flag must be set. Depend on flag being set in LowerRET");
-    Chain = DAG.getNode(PPCISD::TAILCALL, dl,
-                        TheCall->getVTList(), &Ops[0], Ops.size());
-    return SDValue(Chain.getNode(), Op.getResNo());
+    // If this is the first return lowered for this function, add the regs
+    // to the liveout set for the function.
+    if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+      SmallVector<CCValAssign, 16> RVLocs;
+      CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                     *DAG.getContext());
+      CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+      for (unsigned i = 0; i != RVLocs.size(); ++i)
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+    }
+
+    assert(((Callee.getOpcode() == ISD::Register &&
+             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+            Callee.getOpcode() == ISD::TargetExternalSymbol ||
+            Callee.getOpcode() == ISD::TargetGlobalAddress ||
+            isa<ConstantSDNode>(Callee)) &&
+    "Expecting an global address, external symbol, absolute value or register");
+
+    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
   }
 
   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
   InFlag = Chain.getValue(1);
 
+  // Add a NOP immediately after the branch instruction when using the 64-bit
+  // SVR4 ABI. At link time, if caller and callee are in a different module and
+  // thus have a different TOC, the call will be replaced with a call to a stub
+  // function which saves the current TOC, loads the TOC of the callee and
+  // branches to the callee. The NOP will be replaced with a load instruction
+  // which restores the TOC of the caller from the TOC save slot of the current
+  // stack frame. If caller and callee belong to the same module (and have the
+  // same TOC), the NOP will remain unchanged.
+  if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+    // Insert NOP.
+    InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag);
+  }
+
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                              DAG.getIntPtrConstant(BytesCalleePops, true),
                              InFlag);
-  if (TheCall->getValueType(0) != MVT::Other)
+  if (!Ins.empty())
     InFlag = Chain.getValue(1);
 
-  return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag);
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) {
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+    return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
+                          isTailCall, Outs, Ins,
+                          dl, DAG, InVals);
+  } else {
+    return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, Outs, Ins,
+                            dl, DAG, InVals);
+  }
 }
 
-SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
-                                          const PPCSubtarget &Subtarget,
-                                          TargetMachine &TM) {
-  // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description
-  // of the SVR4 ABI stack frame layout.
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain  = TheCall->getChain();
-  bool isVarArg   = TheCall->isVarArg();
-  unsigned CC     = TheCall->getCallingConv();
-  assert((CC == CallingConv::C ||
-          CC == CallingConv::Fast) && "Unknown calling convention!");
-  bool isTailCall = TheCall->isTailCall()
-                 && CC == CallingConv::Fast && PerformTailCallOpt;
-  SDValue Callee = TheCall->getCallee();
-  DebugLoc dl = TheCall->getDebugLoc();
-
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+SDValue
+PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  bool isTailCall,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) {
+  // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+  // of the 32-bit SVR4 ABI stack frame layout.
+
+  assert((!isTailCall ||
+          (CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
+         "IsEligibleForTailCallOptimization missed a case!");
+
+  assert((CallConv == CallingConv::C ||
+          CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   unsigned PtrByteSize = 4;
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -2549,7 +2599,7 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (PerformTailCallOpt && CC==CallingConv::Fast)
+  if (PerformTailCallOpt && CallConv==CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
   
   // Count how many bytes are to be pushed on the stack, including the linkage
@@ -2558,7 +2608,8 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
 
   // Assign locations to all of the outgoing arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
   CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
@@ -2567,15 +2618,14 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
     // Handle fixed and variable vector arguments differently.
     // Fixed vector arguments go into registers as long as registers are
     // available. Variable vector arguments always go into memory.
-    unsigned NumArgs = TheCall->getNumArgs();
-    unsigned NumFixedArgs = TheCall->getNumFixedArgs();
+    unsigned NumArgs = Outs.size();
     
     for (unsigned i = 0; i != NumArgs; ++i) {
-      MVT ArgVT = TheCall->getArg(i).getValueType();
-      ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+      EVT ArgVT = Outs[i].Val.getValueType();
+      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
       bool Result;
       
-      if (i < NumFixedArgs) {
+      if (Outs[i].IsFixed) {
         Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
                              CCInfo);
       } else {
@@ -2584,24 +2634,27 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
       }
       
       if (Result) {
-        cerr << "Call operand #" << i << " has unhandled type "
-             << ArgVT.getMVTString() << "\n";
-        abort();
+#ifndef NDEBUG
+        errs() << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getEVTString() << "\n";
+#endif
+        llvm_unreachable(0);
       }
     }
   } else {
     // All arguments are treated the same.
-    CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4);
+    CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
   }
   
   // Assign locations to all of the outgoing aggregate by value arguments.
   SmallVector<CCValAssign, 16> ByValArgLocs;
-  CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
+  CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
+                      *DAG.getContext());
 
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
 
-  CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal);
+  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
 
   // Size of the linkage area, parameter list area and the part of the local
   // space variable where copies of aggregates which are passed by value are
@@ -2637,8 +2690,8 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
        i != e;
        ++i) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = TheCall->getArg(i);
-    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+    SDValue Arg = Outs[i].Val;
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
     
     if (Flags.isByVal()) {
       // Argument is an aggregate which is passed by value, thus we need to
@@ -2712,7 +2765,7 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
   
   // Set CR6 to true if this is a vararg call.
   if (isVarArg) {
-    SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
+    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
     Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -2722,24 +2775,23 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
                     false, TailCallArguments);
   }
 
-  return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
-                    SPDiff, NumBytes);
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
 }
 
-SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
-                                            const PPCSubtarget &Subtarget,
-                                            TargetMachine &TM) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain  = TheCall->getChain();
-  bool isVarArg   = TheCall->isVarArg();
-  unsigned CC     = TheCall->getCallingConv();
-  bool isTailCall = TheCall->isTailCall()
-                 && CC == CallingConv::Fast && PerformTailCallOpt;
-  SDValue Callee = TheCall->getCallee();
-  unsigned NumOps  = TheCall->getNumArgs();
-  DebugLoc dl = TheCall->getDebugLoc();
-
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) {
+
+  unsigned NumOps  = Outs.size();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
@@ -2750,7 +2802,7 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (PerformTailCallOpt && CC==CallingConv::Fast)
+  if (PerformTailCallOpt && CallConv==CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   unsigned nAltivecParamsAtEnd = 0;
@@ -2759,13 +2811,19 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
   unsigned NumBytes =
-    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall,
+    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+                                         Outs,
                                          nAltivecParamsAtEnd);
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
 
+  // To protect arguments on the stack from being clobbered in a tail call,
+  // force all the loads to happen before doing any other lowering.
+  if (isTailCall)
+    Chain = DAG.getStackArgumentTokenFactor(Chain);
+
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
@@ -2801,7 +2859,7 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
-  static const unsigned *FPR = GetFPR(Subtarget);
+  static const unsigned *FPR = GetFPR();
 
   static const unsigned VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
@@ -2818,9 +2876,8 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
 
   SmallVector<SDValue, 8> MemOpChains;
   for (unsigned i = 0; i != NumOps; ++i) {
-    bool inMem = false;
-    SDValue Arg = TheCall->getArg(i);
-    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+    SDValue Arg = Outs[i].Val;
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
@@ -2843,7 +2900,7 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
       if (Size==1 || Size==2) {
         // Very small objects are passed right-justified.
         // Everything else is passed left-justified.
-        MVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
                                           NULL, 0, VT);
@@ -2895,8 +2952,8 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
       continue;
     }
 
-    switch (Arg.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected ValueType for argument!");
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
     case MVT::i32:
     case MVT::i64:
       if (GPR_idx != NumGPRs) {
@@ -2905,7 +2962,6 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
-        inMem = true;
       }
       ArgOffset += PtrByteSize;
       break;
@@ -2945,7 +3001,6 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
-        inMem = true;
       }
       if (isPPC64)
         ArgOffset += 8;
@@ -3017,8 +3072,8 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
     ArgOffset = ((ArgOffset+15)/16)*16;
     ArgOffset += 12*16;
     for (unsigned i = 0; i != NumOps; ++i) {
-      SDValue Arg = TheCall->getArg(i);
-      MVT ArgType = Arg.getValueType();
+      SDValue Arg = Outs[i].Val;
+      EVT ArgType = Arg.getValueType();
       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
         if (++j > NumVRs) {
@@ -3051,18 +3106,21 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
                     FPOp, true, TailCallArguments);
   }
 
-  return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
-                    SPDiff, NumBytes);
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
 }
 
-SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
-                                      TargetMachine &TM) {
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               DebugLoc dl, SelectionDAG &DAG) {
+
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
-  CCState CCInfo(CC, isVarArg, TM, RVLocs);
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
@@ -3071,37 +3129,6 @@ SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
-  SDValue Chain = Op.getOperand(0);
-
-  Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
-  if (Chain.getOpcode() == PPCISD::TAILCALL) {
-    SDValue TailCall = Chain;
-    SDValue TargetAddress = TailCall.getOperand(1);
-    SDValue StackAdjustment = TailCall.getOperand(2);
-
-    assert(((TargetAddress.getOpcode() == ISD::Register &&
-             cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
-            TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
-            TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
-            isa<ConstantSDNode>(TargetAddress)) &&
-    "Expecting an global address, external symbol, absolute value or register");
-
-    assert(StackAdjustment.getOpcode() == ISD::Constant &&
-           "Expecting a const value");
-
-    SmallVector<SDValue,8> Operands;
-    Operands.push_back(Chain.getOperand(0));
-    Operands.push_back(TargetAddress);
-    Operands.push_back(StackAdjustment);
-    // Copy registers used by the call. Last operand is a flag so it is not
-    // copied.
-    for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
-      Operands.push_back(Chain.getOperand(i));
-    }
-    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0],
-                       Operands.size());
-  }
-
   SDValue Flag;
 
   // Copy the result values into the output registers.
@@ -3109,7 +3136,7 @@ SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             Op.getOperand(i*2+1), Flag);
+                             Outs[i].Val, Flag);
     Flag = Chain.getValue(1);
   }
 
@@ -3125,7 +3152,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   DebugLoc dl = Op.getDebugLoc();
 
   // Get the corect type for pointers.
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Construct the stack pointer operand.
   bool IsPPC64 = Subtarget.isPPC64();
@@ -3153,7 +3180,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool IsPPC64 = PPCSubTarget.isPPC64();
   bool isDarwinABI = PPCSubTarget.isDarwinABI();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
   // primarily DYNALLOC instructions.
@@ -3177,7 +3204,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool IsPPC64 = PPCSubTarget.isPPC64();
   bool isDarwinABI = PPCSubTarget.isDarwinABI();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
   // primarily DYNALLOC instructions.
@@ -3207,7 +3234,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   DebugLoc dl = Op.getDebugLoc();
 
   // Get the corect type for pointers.
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Negate the size.
   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
                                   DAG.getConstant(0, PtrVT), Size);
@@ -3232,8 +3259,8 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   // Cannot handle SETEQ/SETNE.
   if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
 
-  MVT ResVT = Op.getValueType();
-  MVT CmpVT = Op.getOperand(0).getValueType();
+  EVT ResVT = Op.getValueType();
+  EVT CmpVT = Op.getOperand(0).getValueType();
   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   DebugLoc dl = Op.getDebugLoc();
@@ -3302,8 +3329,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
 
   SDValue Tmp;
-  switch (Op.getValueType().getSimpleVT()) {
-  default: assert(0 && "Unhandled FP_TO_INT type in custom expander!");
+  switch (Op.getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
                                                          PPCISD::FCTIDZ, 
@@ -3350,20 +3377,23 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   // 64-bit registers.  In particular, sign extend the input value into the
   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
   // then lfd it and fcfid it.
-  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(8, 8);
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
                                 Op.getOperand(0));
 
   // STD the extended value into the stack slot.
-  MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx),
-                       MachineMemOperand::MOStore, 0, 8, 8);
-  SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other,
-                                DAG.getEntryNode(), Ext64, FIdx,
-                                DAG.getMemOperand(MO));
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOStore, 0, 8, 8);
+  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
+  SDValue Store =
+    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
+                            Ops, 4, MVT::i64, MMO);
   // Load the value as a double.
   SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
 
@@ -3396,9 +3426,9 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
   */
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MVT VT = Op.getValueType();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  std::vector<MVT> NodeTys;
+  EVT VT = Op.getValueType();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  std::vector<EVT> NodeTys;
   SDValue MFFSreg, InFlag;
 
   // Save FP Control Word to register
@@ -3437,7 +3467,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   DebugLoc dl = Op.getDebugLoc();
   assert(Op.getNumOperands() == 3 &&
@@ -3449,7 +3479,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Amt = Op.getOperand(2);
-  MVT AmtVT = Amt.getValueType();
+  EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
                              DAG.getConstant(BitWidth, AmtVT), Amt);
@@ -3466,7 +3496,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned BitWidth = VT.getSizeInBits();
   assert(Op.getNumOperands() == 3 &&
@@ -3478,7 +3508,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Amt = Op.getOperand(2);
-  MVT AmtVT = Amt.getValueType();
+  EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
                              DAG.getConstant(BitWidth, AmtVT), Amt);
@@ -3496,7 +3526,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
 
 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   assert(Op.getNumOperands() == 3 &&
          VT == Op.getOperand(1).getValueType() &&
@@ -3506,7 +3536,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Amt = Op.getOperand(2);
-  MVT AmtVT = Amt.getValueType();
+  EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
                              DAG.getConstant(BitWidth, AmtVT), Amt);
@@ -3529,21 +3559,21 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
 
 /// BuildSplatI - Build a canonical splati of Val with an element size of
 /// SplatSize.  Cast the result to VT.
-static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT,
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
                              SelectionDAG &DAG, DebugLoc dl) {
   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
 
-  static const MVT VTys[] = { // canonical VT to use for each size.
+  static const EVT VTys[] = { // canonical VT to use for each size.
     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
   };
 
-  MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
 
   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
   if (Val == -1)
     SplatSize = 1;
 
-  MVT CanonicalVT = VTys[SplatSize-1];
+  EVT CanonicalVT = VTys[SplatSize-1];
 
   // Build a canonical splat for this value.
   SDValue Elt = DAG.getConstant(Val, MVT::i32);
@@ -3558,7 +3588,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT,
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
                                 SelectionDAG &DAG, DebugLoc dl,
-                                MVT DestVT = MVT::Other) {
+                                EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
@@ -3568,7 +3598,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                                 SDValue Op2, SelectionDAG &DAG,
-                                DebugLoc dl, MVT DestVT = MVT::Other) {
+                                DebugLoc dl, EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
@@ -3578,7 +3608,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
 /// amount.  The result has the specified value type.
 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
-                             MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   // Force LHS/RHS to be the right type.
   LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
   RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
@@ -3789,7 +3819,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
 
   int ShufIdxs[16];
   switch (OpNum) {
-  default: assert(0 && "Unknown i32 permute!");
+  default: llvm_unreachable("Unknown i32 permute!");
   case OP_VMRGHW:
     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
@@ -3825,7 +3855,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   case OP_VSLDOI12:
     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
   }
-  MVT VT = OpLHS.getValueType();
+  EVT VT = OpLHS.getValueType();
   OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
   OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
@@ -3842,7 +3872,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
 
   // Cases that are handled by instructions that take permute immediates
   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
@@ -3939,7 +3969,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 
   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
   // that it is in input element units, not in bytes.  Convert now.
-  MVT EltVT = V1.getValueType().getVectorElementType();
+  EVT EltVT = V1.getValueType().getVectorElementType();
   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
 
   SmallVector<SDValue, 16> ResultMask;
@@ -4026,7 +4056,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     Op.getOperand(3),  // RHS
     DAG.getConstant(CompareOpc, MVT::i32)
   };
-  std::vector<MVT> VTs;
+  std::vector<EVT> VTs;
   VTs.push_back(Op.getOperand(2).getValueType());
   VTs.push_back(MVT::Flag);
   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
@@ -4076,7 +4106,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16);
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   // Store the input value into Value#0 of the stack slot.
@@ -4141,8 +4171,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
     }
     return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
   } else {
-    assert(0 && "Unknown mul to lower!");
-    abort();
+    llvm_unreachable("Unknown mul to lower!");
   }
 }
 
@@ -4150,7 +4179,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
-  default: assert(0 && "Wasn't expecting to be able to lower this!");
+  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
@@ -4165,24 +4194,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
                       VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
 
-  case ISD::FORMAL_ARGUMENTS:
-    if (PPCSubTarget.isSVR4ABI()) {
-      return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex,
-                                        VarArgsStackOffset, VarArgsNumGPR,
-                                        VarArgsNumFPR, PPCSubTarget);
-    } else {
-      return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex,
-                                          PPCSubTarget);
-    }
-
-  case ISD::CALL:
-    if (PPCSubTarget.isSVR4ABI()) {
-      return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine());
-    } else {
-      return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine());
-    }
-    
-  case ISD::RET:                return LowerRET(Op, DAG, getTargetMachine());
   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
@@ -4234,7 +4245,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     // This sequence changes FPSCR to do round-to-zero, adds the two halves
     // of the long double, and puts FPSCR back the way it was.  We do not
     // actually model FPSCR.
-    std::vector<MVT> NodeTys;
+    std::vector<EVT> NodeTys;
     SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
 
     NodeTys.push_back(MVT::f64);   // Return register
@@ -4480,7 +4491,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
 
 MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB) const {
+                                               MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
@@ -4516,9 +4528,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
       .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
     F->insert(It, copy0MBB);
     F->insert(It, sinkMBB);
-    // Update machine-CFG edges by transferring all successors of the current
+    // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    sinkMBB->transferSuccessors(BB);
+    // Also inform sdisel of the edge changes.
+    for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
+           E = BB->succ_end(); I != E; ++I) {
+      EM->insert(std::make_pair(*I, sinkMBB));
+      sinkMBB->addSuccessor(*I);
+    }
+    // Next, remove all successors of the current block, and add the true
+    // and fallthrough blocks as its successors.
+    while (!BB->succ_empty())
+      BB->removeSuccessor(BB->succ_begin());
     // Next, add the true and fallthrough blocks as its successors.
     BB->addSuccessor(copy0MBB);
     BB->addSuccessor(sinkMBB);
@@ -4812,7 +4833,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BB = exitMBB;
     BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
   } else {
-    assert(0 && "Unexpected instr type to insert");
+    llvm_unreachable("Unexpected instr type to insert");
   }
 
   F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
@@ -4903,7 +4924,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     }
 
     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
-    if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
+    if (cast<StoreSDNode>(N)->isUnindexed() &&
+        N->getOperand(1).getOpcode() == ISD::BSWAP &&
         N->getOperand(1).getNode()->hasOneUse() &&
         (N->getOperand(1).getValueType() == MVT::i32 ||
          N->getOperand(1).getValueType() == MVT::i16)) {
@@ -4912,9 +4934,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       if (BSwapOp.getValueType() == MVT::i16)
         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
 
-      return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0),
-                         BSwapOp, N->getOperand(2), N->getOperand(3),
-                         DAG.getValueType(N->getOperand(1).getValueType()));
+      SDValue Ops[] = {
+        N->getOperand(0), BSwapOp, N->getOperand(2),
+        DAG.getValueType(N->getOperand(1).getValueType())
+      };
+      return
+        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+                                Ops, array_lengthof(Ops),
+                                cast<StoreSDNode>(N)->getMemoryVT(),
+                                cast<StoreSDNode>(N)->getMemOperand());
     }
     break;
   case ISD::BSWAP:
@@ -4925,17 +4953,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Load = N->getOperand(0);
       LoadSDNode *LD = cast<LoadSDNode>(Load);
       // Create the byte-swapping load.
-      std::vector<MVT> VTs;
-      VTs.push_back(MVT::i32);
-      VTs.push_back(MVT::Other);
-      SDValue MO = DAG.getMemOperand(LD->getMemOperand());
       SDValue Ops[] = {
         LD->getChain(),    // Chain
         LD->getBasePtr(),  // Ptr
-        MO,                // MemOperand
         DAG.getValueType(N->getValueType(0)) // VT
       };
-      SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4);
+      SDValue BSLoad =
+        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
+                                LD->getMemoryVT(), LD->getMemOperand());
 
       // If this is an i16 load, insert the truncate.
       SDValue ResVal = BSLoad;
@@ -5035,7 +5061,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
 
       // Create the PPCISD altivec 'dot' comparison node.
-      std::vector<MVT> VTs;
+      std::vector<EVT> VTs;
       SDValue Ops[] = {
         LHS.getOperand(2),  // LHS of compare
         LHS.getOperand(3),  // RHS of compare
@@ -5090,7 +5116,7 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   default: break;
   case PPCISD::LBRX: {
     // lhbrx is known to have the top bits cleared out.
-    if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
+    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
       KnownZero = 0xFFFF0000;
     break;
   }
@@ -5138,7 +5164,7 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::pair<unsigned, const TargetRegisterClass*>
 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT VT) const {
+                                                EVT VT) const {
   if (Constraint.size() == 1) {
     // GCC RS6000 Constraint Letters
     switch (Constraint[0]) {
@@ -5187,7 +5213,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
     if (!CST) return; // Must be an immediate to match.
     unsigned Value = CST->getZExtValue();
     switch (Letter) {
-    default: assert(0 && "Unknown constraint letter!");
+    default: llvm_unreachable("Unknown constraint letter!");
     case 'I':  // "I" is a signed 16-bit constant.
       if ((short)Value == (int)Value)
         Result = DAG.getTargetConstant(Value, Op.getValueType());
@@ -5304,7 +5330,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
     return SDValue();
 
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -5326,7 +5352,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   return false;
 }
 
-MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
                                            bool isSrcConst, bool isSrcStr,
                                            SelectionDAG &DAG) const {
   if (this->PPCSubTarget.isPPC64()) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 962bbb144dff..ac72d8765b10 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -41,8 +41,7 @@ namespace llvm {
       FCTIDZ, FCTIWZ,
       
       /// STFIWX - The STFIWX instruction.  The first operand is an input token
-      /// chain, then an f64 value to store, then an address to store it to,
-      /// then a SRCVALUE for the address.
+      /// chain, then an f64 value to store, then an address to store it to.
       STFIWX,
       
       // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
@@ -60,6 +59,8 @@ namespace llvm {
       /// though these are usually folded into other nodes.
       Hi, Lo,
       
+      TOC_ENTRY,
+
       /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
       /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
       /// compute an allocation on the stack.
@@ -78,12 +79,12 @@ namespace llvm {
       /// registers.
       EXTSW_32,
 
-      /// STD_32 - This is the STD instruction for use with "32-bit" registers.
-      STD_32,
-      
       /// CALL - A direct function call.
       CALL_Darwin, CALL_SVR4,
       
+      /// NOP - Special NOP which follows 64-bit SVR4 calls.
+      NOP,
+
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
       MTCTR,
@@ -119,18 +120,6 @@ namespace llvm {
       /// an optional input flag argument.
       COND_BRANCH,
       
-      /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a 
-      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
-      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
-      /// i32.
-      STBRX, 
-      
-      /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a 
-      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
-      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
-      /// or i32.
-      LBRX,
-
       // The following 5 instructions are used only as part of the
       // long double-to-int conversion sequence.
 
@@ -160,14 +149,27 @@ namespace llvm {
       /// indexed. This is used to implement atomic operations.
       STCX,
 
-      /// TAILCALL - Indicates a tail call should be taken.
-      TAILCALL,
       /// TC_RETURN - A tail call return.
       ///   operand #0 chain
       ///   operand #1 callee (register or absolute)
       ///   operand #2 stack adjustment
       ///   operand #3 optional in flag
-      TC_RETURN
+      TC_RETURN,
+
+      /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+      STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      
+      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a 
+      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
+      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
+      /// i32.
+      STBRX, 
+      
+      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a 
+      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
+      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
+      /// or i32.
+      LBRX
     };
   }
 
@@ -232,7 +234,7 @@ namespace llvm {
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     /// getSetCCResultType - Return the ISD::SETCC ValueType
-    virtual MVT getSetCCResultType(MVT VT) const;
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     /// getPreIndexedAddressParts - returns true by value, base pointer and
     /// offset pointer and addressing mode by reference if the node's address
@@ -286,7 +288,8 @@ namespace llvm {
                                                 unsigned Depth = 0) const;
 
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
     MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, 
                                         MachineBasicBlock *MBB, bool is64Bit,
                                         unsigned BinOpcode) const;
@@ -297,7 +300,7 @@ namespace llvm {
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*> 
       getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const;
+                                   EVT VT) const;
 
     /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
     /// function arguments in the caller parameter area.  This is the actual
@@ -327,16 +330,16 @@ namespace llvm {
     /// the offset of the target addressing mode.
     virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
 
-    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
-    /// for tail call optimization. Target which want to do tail call
-    /// optimization should implement this function.
-    virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall,
-                                                   SDValue Ret,
-                                                   SelectionDAG &DAG) const;
+    virtual bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
     
-    virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
                                     bool isSrcConst, bool isSrcStr,
                                     SelectionDAG &DAG) const;
 
@@ -370,20 +373,6 @@ namespace llvm {
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
                          int VarArgsStackOffset, unsigned VarArgsNumGPR,
                          unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
-    SDValue LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, SelectionDAG &DAG,
-                                       int &VarArgsFrameIndex, 
-                                       int &VarArgsStackOffset,
-                                       unsigned &VarArgsNumGPR,
-                                       unsigned &VarArgsNumFPR,
-                                       const PPCSubtarget &Subtarget);
-    SDValue LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SelectionDAG &DAG,
-                                         int &VarArgsFrameIndex, 
-                                         const PPCSubtarget &Subtarget);
-    SDValue LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
-                             const PPCSubtarget &Subtarget, TargetMachine &TM);
-    SDValue LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
-                           const PPCSubtarget &Subtarget, TargetMachine &TM);
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM);
     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                 const PPCSubtarget &Subtarget);
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
@@ -400,6 +389,71 @@ namespace llvm {
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
+    SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+                       bool isVarArg,
+                       SelectionDAG &DAG,
+                       SmallVector<std::pair<unsigned, SDValue>, 8>
+                         &RegsToPass,
+                       SDValue InFlag, SDValue Chain,
+                       SDValue &Callee,
+                       int SPDiff, unsigned NumBytes,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
+    SDValue
+      LowerFormalArguments_Darwin(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals);
+    SDValue
+      LowerFormalArguments_SVR4(SDValue Chain,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                DebugLoc dl, SelectionDAG &DAG,
+                                SmallVectorImpl<SDValue> &InVals);
+
+    SDValue
+      LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                       CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                       const SmallVectorImpl<ISD::OutputArg> &Outs,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl, SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals);
+    SDValue
+      LowerCall_SVR4(SDValue Chain, SDValue Callee,
+                     CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals);
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 3823e537f11d..0f68fb939dc0 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -68,7 +68,7 @@ let isCall = 1, PPC970_Unit = 7,
           F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
           V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
           LR8,CTR8,
-          CR0,CR1,CR5,CR6,CR7] in {
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL8_Darwin  : IForm<18, 0, 1,
@@ -94,7 +94,7 @@ let isCall = 1, PPC970_Unit = 7,
           F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
           V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
           LR8,CTR8,
-          CR0,CR1,CR5,CR6,CR7] in {
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL8_ELF  : IForm<18, 0, 1,
@@ -123,6 +123,8 @@ def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
           (BL8_ELF tglobaladdr:$dst)>;
 def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
           (BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCnop),
+          (NOP)>;
 
 // Atomic operations
 let usesCustomDAGSchedInserter = 1 in {
@@ -327,14 +329,15 @@ def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "add $rT, $rA, $rB", IntGeneral,
                      [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
                      
+let Defs = [CARRY] in {
 def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "addc $rT, $rA, $rB", IntGeneral,
                      [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
                      PPC970_DGroup_Cracked;
-def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "adde $rT, $rA, $rB", IntGeneral,
-                     [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
-                     
+def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "addic $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+}
 def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
                      "addi $rD, $rA, $imm", IntGeneral,
                      [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
@@ -342,36 +345,41 @@ def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
                      "addis $rD, $rA, $imm", IntGeneral,
                      [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
 
+let Defs = [CARRY] in {
 def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
                      [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
-def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
 def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                       "subfc $rT, $rA, $rB", IntGeneral,
                       [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
                       PPC970_DGroup_Cracked;
-
-def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                      "subfe $rT, $rA, $rB", IntGeneral,
-                      [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+}
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "subf $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "neg $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "adde $rT, $rA, $rB", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
 def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "addme $rT, $rA", IntGeneral,
                        [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
 def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "addze $rT, $rA", IntGeneral,
                        [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
-def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "neg $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "subfe $rT, $rA, $rB", IntGeneral,
+                       [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
 def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "subfme $rT, $rA", IntGeneral,
                        [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
 def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "subfze $rT, $rA", IntGeneral,
                        [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
-
+}
 
 
 def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
@@ -396,9 +404,11 @@ def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
 def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
                    "srd $rA, $rS, $rB", IntRotateD,
                    [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+let Defs = [CARRY] in {
 def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
                    "srad $rA, $rS, $rB", IntRotateD,
                    [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+}
                    
 def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
                       "extsb $rA, $rS", IntGeneral,
@@ -418,9 +428,11 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
                       "extsw $rA, $rS", IntGeneral,
                       [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
 
+let Defs = [CARRY] in {
 def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
                       "sradi $rA, $rS, $SH", IntRotateD,
                       [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+}
 def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
                       "cntlzd $rA, $rS", IntGeneral,
                       [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
@@ -543,6 +555,10 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
                     [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+                    "ld $rD, $disp($reg)", LdStLD,
+                    [(set G8RC:$rD,
+                     (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
 def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
                    [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index 1de69116cd58..b424d1101416 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -29,7 +29,7 @@ namespace llvm {
 /// reference has base register as the FrameIndex offset until it is resolved.
 /// This allows a constant offset to be specified as well...
 ///
-inline const MachineInstrBuilder&
+static inline const MachineInstrBuilder&
 addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
                   bool mem = true) {
   if (mem)
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 87c612ab74e6..0083598cf18b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -20,7 +20,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCAsmInfo.h"
 using namespace llvm;
 
 extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
@@ -485,8 +487,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                      .addReg(PPC::R0)
                      .addReg(PPC::R0));
   } else {
-    assert(0 && "Unknown regclass!");
-    abort();
+    llvm_unreachable("Unknown regclass!");
   }
 
   return false;
@@ -509,45 +510,6 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     MBB.insert(MI, NewMIs[i]);
 }
 
-void PPCInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                  bool isKill,
-                                  SmallVectorImpl<MachineOperand> &Addr,
-                                  const TargetRegisterClass *RC,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
-  if (Addr[0].isFI()) {
-    if (StoreRegToStackSlot(MF, SrcReg, isKill,
-                            Addr[0].getIndex(), RC, NewMIs)) {
-      PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-      FuncInfo->setSpillsCR();
-    }
-
-    return;
-  }
-
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  unsigned Opc = 0;
-  if (RC == PPC::GPRCRegisterClass) {
-    Opc = PPC::STW;
-  } else if (RC == PPC::G8RCRegisterClass) {
-    Opc = PPC::STD;
-  } else if (RC == PPC::F8RCRegisterClass) {
-    Opc = PPC::STFD;
-  } else if (RC == PPC::F4RCRegisterClass) {
-    Opc = PPC::STFS;
-  } else if (RC == PPC::VRRCRegisterClass) {
-    Opc = PPC::STVX;
-  } else {
-    assert(0 && "Unknown regclass!");
-    abort();
-  }
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
-    .addReg(SrcReg, getKillRegState(isKill));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
-
 void
 PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                    unsigned DestReg, int FrameIdx,
@@ -634,8 +596,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
                      .addReg(PPC::R0));
   } else {
-    assert(0 && "Unknown regclass!");
-    abort();
+    llvm_unreachable("Unknown regclass!");
   }
 }
 
@@ -653,41 +614,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     MBB.insert(MI, NewMIs[i]);
 }
 
-void PPCInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                   SmallVectorImpl<MachineOperand> &Addr,
-                                   const TargetRegisterClass *RC,
-                                   SmallVectorImpl<MachineInstr*> &NewMIs)const{
-  if (Addr[0].isFI()) {
-    LoadRegFromStackSlot(MF, DebugLoc::getUnknownLoc(),
-                         DestReg, Addr[0].getIndex(), RC, NewMIs);
-    return;
-  }
-
-  unsigned Opc = 0;
-  if (RC == PPC::GPRCRegisterClass) {
-    assert(DestReg != PPC::LR && "Can't handle this yet!");
-    Opc = PPC::LWZ;
-  } else if (RC == PPC::G8RCRegisterClass) {
-    assert(DestReg != PPC::LR8 && "Can't handle this yet!");
-    Opc = PPC::LD;
-  } else if (RC == PPC::F8RCRegisterClass) {
-    Opc = PPC::LFD;
-  } else if (RC == PPC::F4RCRegisterClass) {
-    Opc = PPC::LFS;
-  } else if (RC == PPC::VRRCRegisterClass) {
-    Opc = PPC::LVX;
-  } else {
-    assert(0 && "Unknown regclass!");
-    abort();
-  }
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
-}
-
 /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
 /// copy instructions, turning them into load/store instructions.
 MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -842,7 +768,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::INLINEASM: {       // Inline Asm: Variable size.
     const MachineFunction *MF = MI->getParent()->getParent();
     const char *AsmStr = MI->getOperand(0).getSymbolName();
-    return MF->getTarget().getTargetAsmInfo()->getInlineAsmLength(AsmStr);
+    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
   }
   case PPC::DBG_LABEL:
   case PPC::EH_LABEL:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 492634c979eb..bb0dc15a7922 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -121,20 +121,10 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC) const;
 
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
-
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
   
   /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
   /// copy instructions, turning them into load/store instructions.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 7af59a2ecaf7..dc5db6ff59e3 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -35,11 +35,11 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
   SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
 ]>;
 
-def SDT_PPClbrx : SDTypeProfile<1, 3, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
-def SDT_PPCstbrx : SDTypeProfile<0, 4, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 
 def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -53,6 +53,8 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisVT<1, i32>
 ]>;
 
+def SDT_PPCnop : SDTypeProfile<0, 0, []>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@@ -85,6 +87,7 @@ def PPCfsel   : SDNode<"PPCISD::FSEL",
 
 def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
 def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
 def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
 
@@ -111,6 +114,7 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
                             [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
@@ -125,9 +129,6 @@ def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
 def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
                         [SDNPHasChain,  SDNPOptInFlag]>;
 
-def PPCtailcall : SDNode<"PPCISD::TAILCALL",     SDT_PPCCall,
-                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
-
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
 
@@ -309,6 +310,10 @@ def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
   let PrintMethod = "printMemRegImmShifted";
   let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
 }
+def tocentry : Operand<iPTR> {
+  let PrintMethod = "printTOCEntryLabel";
+  let MIOperandInfo = (ops i32imm:$imm);
+}
 
 // PowerPC Predicate operand.  20 = (0<<5)|20 = always, CR0 is a dummy reg
 // that doesn't matter.
@@ -421,7 +426,7 @@ let isCall = 1, PPC970_Unit = 7,
           LR,CTR,
           CR0,CR1,CR5,CR6,CR7,
           CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
-          CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
+          CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_Darwin  : IForm<18, 0, 1,
@@ -448,7 +453,7 @@ let isCall = 1, PPC970_Unit = 7,
           LR,CTR,
           CR0,CR1,CR5,CR6,CR7,
           CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
-          CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
+          CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_SVR4  : IForm<18, 0, 1,
@@ -736,10 +741,10 @@ def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
                    
 def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
                    "lhbrx $rD, $src", LdStGeneral,
-                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i16))]>;
+                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
 def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
                    "lwbrx $rD, $src", LdStGeneral,
-                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i32))]>;
+                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
 
 def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
                       "lfsx $frD, $src", LdStLFDU,
@@ -832,11 +837,11 @@ def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
 }
 def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
                    "sthbrx $rS, $dst", LdStGeneral,
-                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i16)]>, 
+                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, 
                    PPC970_DGroup_Cracked;
 def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwbrx $rS, $dst", LdStGeneral,
-                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i32)]>,
+                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
                    PPC970_DGroup_Cracked;
 
 def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
@@ -864,6 +869,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addi $rD, $rA, $imm", IntGeneral,
                      [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
 def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
                      [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
@@ -871,6 +877,7 @@ def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
 def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IntGeneral,
                      []>;
+}
 def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
                      "addis $rD, $rA, $imm", IntGeneral,
                      [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
@@ -881,9 +888,11 @@ def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
 def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "mulli $rD, $rA, $imm", IntMulLI,
                      [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
 def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
                      [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+}
 
 let isReMaterializable = 1 in {
   def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
@@ -956,15 +965,19 @@ def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
 def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "srw $rA, $rS, $rB", IntGeneral,
                    [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+let Defs = [CARRY] in {
 def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "sraw $rA, $rS, $rB", IntShift,
                    [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
 }
+}
 
 let PPC970_Unit = 1 in {  // FXU Operations.
+let Defs = [CARRY] in {
 def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
                      "srawi $rA, $rS, $SH", IntShift,
                      [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+}
 def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
                       "cntlzw $rA, $rS", IntGeneral,
                       [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
@@ -1159,13 +1172,12 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "add $rT, $rA, $rB", IntGeneral,
                      [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+let Defs = [CARRY] in {
 def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "addc $rT, $rA, $rB", IntGeneral,
                      [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
                      PPC970_DGroup_Cracked;
-def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "adde $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+}
 def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "divw $rT, $rA, $rB", IntDivW,
                      [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
@@ -1186,22 +1198,28 @@ def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
 def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "subf $rT, $rA, $rB", IntGeneral,
                      [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+let Defs = [CARRY] in {
 def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "subfc $rT, $rA, $rB", IntGeneral,
                      [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
                      PPC970_DGroup_Cracked;
-def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subfe $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+}
+def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "neg $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                      "adde $rT, $rA, $rB", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
 def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "addme $rT, $rA", IntGeneral,
                       [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
 def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "addze $rT, $rA", IntGeneral,
                       [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
-def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "neg $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                      "subfe $rT, $rA, $rB", IntGeneral,
+                      [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
 def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "subfme $rT, $rA", IntGeneral,
                       [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
@@ -1209,6 +1227,7 @@ def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "subfze $rT, $rA", IntGeneral,
                       [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
 }
+}
 
 // A-Form instructions.  Most of the instructions executed in the FPU are of
 // this type.
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 7486d7495888..ef25d92f719a 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -18,6 +18,8 @@
 #include "llvm/Function.h"
 #include "llvm/System/Memory.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static TargetJITInfo::JITCompilerFn JITCompilerFunction;
@@ -197,8 +199,7 @@ asm(
     );
 #else
 void PPC32CompilationCallback() {
-  assert(0 && "This is not a power pc, you can't execute this!");
-  abort();
+  llvm_unreachable("This is not a power pc, you can't execute this!");
 }
 #endif
 
@@ -264,8 +265,7 @@ asm(
     );
 #else
 void PPC64CompilationCallback() {
-  assert(0 && "This is not a power pc, you can't execute this!");
-  abort();
+  llvm_unreachable("This is not a power pc, you can't execute this!");
 }
 #endif
 
@@ -383,7 +383,7 @@ void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
     unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
     intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
     switch ((PPC::RelocationType)MR->getRelocationType()) {
-    default: assert(0 && "Unknown relocation type!");
+    default: llvm_unreachable("Unknown relocation type!");
     case PPC::reloc_pcrel_bx:
       // PC-relative relocation for b and bl instructions.
       ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
new file mode 100644
index 000000000000..c87879b2a332
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -0,0 +1,58 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+using namespace llvm;
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+  PCSymbol = ".";
+  CommentString = ";";
+  ExceptionsType = ExceptionHandling::Dwarf;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
+  AssemblerDialect = 1;           // New-Style mnemonics.
+}
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+  UsedDirective = "\t# .no_dead_strip\t";
+  WeakRefDirective = "\t.weak\t";
+  
+  // Uses '.section' before '.bss' directive
+  UsesELFSectionDirectiveForBSS = true;  
+
+  // Debug Information
+  AbsoluteDebugSectionOffsets = true;
+  SupportsDebugInformation = true;
+
+  PCSymbol = ".";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+
+  // Exceptions handling
+  if (!is64Bit)
+    ExceptionsType = ExceptionHandling::Dwarf;
+  AbsoluteEHSectionOffsets = false;
+    
+  ZeroDirective = "\t.space\t";
+  SetDirective = "\t.set";
+  Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+  AlignmentIsInBytes = false;
+  LCOMMDirective = "\t.lcomm\t";
+  AssemblerDialect = 0;           // Old-Style mnemonics.
+}
+
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.h b/lib/Target/PowerPC/PPCMCAsmInfo.h
new file mode 100644
index 000000000000..96ae6fbba0e4
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+  struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit PPCMCAsmInfoDarwin(bool is64Bit);
+  };
+
+  struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+    explicit PPCLinuxMCAsmInfo(bool is64Bit);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
index 3bfa6d719105..4c14454096ca 100644
--- a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
@@ -16,6 +16,7 @@
 #include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachORelocation.h"
 #include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstdio>
 using namespace llvm;
 
@@ -46,9 +47,9 @@ unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR,
     Addr = (uintptr_t)MR.getResultPointer() + ToAddr;
 
   switch ((PPC::RelocationType)MR.getRelocationType()) {
-  default: assert(0 && "Unknown PPC relocation type!");
+  default: llvm_unreachable("Unknown PPC relocation type!");
   case PPC::reloc_absolute_low_ix:
-    assert(0 && "Unhandled PPC relocation type!");
+    llvm_unreachable("Unhandled PPC relocation type!");
     break;
   case PPC::reloc_vanilla:
     {
diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/PPCPredicates.cpp
index 08a281259e1f..12bb0a143406 100644
--- a/lib/Target/PowerPC/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/PPCPredicates.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 using namespace llvm;
 
 PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   switch (Opcode) {
-  default: assert(0 && "Unknown PPC branch opcode!");
+  default: llvm_unreachable("Unknown PPC branch opcode!");
   case PPC::PRED_EQ: return PPC::PRED_NE;
   case PPC::PRED_NE: return PPC::PRED_EQ;
   case PPC::PRED_LT: return PPC::PRED_GE;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 97b1c57d7978..cf5c7c0f598a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -37,7 +37,9 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
@@ -111,8 +113,7 @@ unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   case R30:  case X30:  case F30:  case V30: case CR7EQ: return 30;
   case R31:  case X31:  case F31:  case V31: case CR7UN: return 31;
   default:
-    cerr << "Unhandled reg in PPCRegisterInfo::getRegisterNumbering!\n";
-    abort();
+    llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
   }
 }
 
@@ -139,11 +140,11 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
 
 /// getPointerRegClass - Return the register class to use to hold pointers.
 /// This is used for addressing modes.
-const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const {
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
   if (Subtarget.isPPC64())
     return &PPC::G8RCRegClass;
-  else
-    return &PPC::GPRCRegClass;
+  return &PPC::GPRCRegClass;
 }
 
 const unsigned*
@@ -173,7 +174,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     
     PPC::LR,  0
   };
-  
+
+  // 32-bit SVR4 calling convention.
   static const unsigned SVR4_CalleeSavedRegs[] = {
                         PPC::R14, PPC::R15,
     PPC::R16, PPC::R17, PPC::R18, PPC::R19,
@@ -199,7 +201,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
     PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
     
-    PPC::LR,  0
+    0
   };
   // 64-bit Darwin calling convention. 
   static const unsigned Darwin64_CalleeSavedRegs[] = {
@@ -226,12 +228,41 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     
     PPC::LR8,  0
   };
+
+  // 64-bit SVR4 calling convention.
+  static const unsigned SVR4_64_CalleeSavedRegs[] = {
+    PPC::X14, PPC::X15,
+    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+
+    PPC::CR2, PPC::CR3, PPC::CR4,
+
+    PPC::VRSAVE,
+
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+    0
+  };
   
   if (Subtarget.isDarwinABI())
     return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
                                  Darwin32_CalleeSavedRegs;
-  
-  return SVR4_CalleeSavedRegs;
+
+  return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
 }
 
 const TargetRegisterClass* const*
@@ -266,6 +297,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::GPRCRegClass, 0
   };
   
+  // 32-bit SVR4 calling convention.
   static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = {
                                           &PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
@@ -294,7 +326,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
     &PPC::CRBITRCRegClass, 
     
-    &PPC::GPRCRegClass, 0
+    0
   };
   
   // 64-bit Darwin calling convention.
@@ -326,12 +358,45 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     
     &PPC::G8RCRegClass, 0
   };
+
+  // 64-bit SVR4 calling convention.
+  static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = {
+    &PPC::G8RCRegClass,&PPC::G8RCRegClass,
+    &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+    &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+    &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+    &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+
+    &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+    &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+    &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+    &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+    &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+    &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+    &PPC::VRSAVERCRegClass,
+
+    &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+    &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+    &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+    &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+    &PPC::CRBITRCRegClass,
+    &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+    &PPC::CRBITRCRegClass,
+    &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+    &PPC::CRBITRCRegClass,
+
+    0
+  };
   
   if (Subtarget.isDarwinABI())
     return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses :
                                  Darwin32_CalleeSavedRegClasses;
   
-  return SVR4_CalleeSavedRegClasses;
+  return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses
+                             : SVR4_CalleeSavedRegClasses;
 }
 
 // needsFP - Return true if the specified function should have a dedicated frame
@@ -363,9 +428,9 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R13); // Small Data Area pointer register
   }
   
-  // On PPC64, r13 is the thread pointer. Never allocate this register. Note
-  // that this is over conservative, as it also prevents allocation of R31 when
-  // the FP is not needed.
+  // On PPC64, r13 is the thread pointer. Never allocate this register.
+  // Note that this is over conservative, as it also prevents allocation of R31
+  // when the FP is not needed.
   if (Subtarget.isPPC64()) {
     Reserved.set(PPC::R13);
     Reserved.set(PPC::R31);
@@ -377,6 +442,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::X1);
     Reserved.set(PPC::X13);
     Reserved.set(PPC::X31);
+
+    // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+    if (Subtarget.isSVR4ABI()) {
+      Reserved.set(PPC::X2);
+    }
   }
 
   if (needsFP(MF))
@@ -457,7 +527,7 @@ static
 unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
                              const TargetRegisterClass *RC, int SPAdj) {
   assert(RS && "Register scavenging must be on");
-  unsigned Reg = RS->FindUnusedReg(RC, true);
+  unsigned Reg = RS->FindUnusedReg(RC);
   // FIXME: move ARM callee-saved reg scan to target independent code, then 
   // search for already spilled CS register here.
   if (Reg == 0)
@@ -629,8 +699,10 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
-void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                          int SPAdj, RegScavenger *RS) const {
+unsigned
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                     int SPAdj, int *Value,
+                                     RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   // Get the instruction.
@@ -669,14 +741,14 @@ void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
     lowerDynamicAlloc(II, SPAdj, RS);
-    return;
+    return 0;
   }
 
   // Special case for pseudo-op SPILL_CR.
   if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
     if (OpC == PPC::SPILL_CR) {
       lowerCRSpilling(II, FrameIndex, SPAdj, RS);
-      return;
+      return 0;
     }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -718,7 +790,7 @@ void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     if (isIXAddr)
       Offset >>= 2;    // The actual encoded value has the low two bits zero.
     MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
-    return;
+    return 0;
   }
 
   // The offset doesn't fit into a single register, scavenge one to build the
@@ -758,6 +830,7 @@ void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
   MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
   MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+  return 0;
 }
 
 /// VRRegNo - Map from a numbered VR register to its enum value.
@@ -910,7 +983,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
   // to adjust the stack pointer (we fit in the Red Zone).
   bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
-  // FIXME SVR4 The SVR4 ABI has no red zone.
+  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
@@ -1005,7 +1078,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
   if (!Subtarget.isSVR4ABI()) {
     return;
   }
-  
+
   // Get callee saved register information.
   MachineFrameInfo *FFI = MF.getFrameInfo();
   const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
@@ -1016,16 +1089,19 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
   }
   
   unsigned MinGPR = PPC::R31;
+  unsigned MinG8R = PPC::X31;
   unsigned MinFPR = PPC::F31;
   unsigned MinVR = PPC::V31;
   
   bool HasGPSaveArea = false;
+  bool HasG8SaveArea = false;
   bool HasFPSaveArea = false;
   bool HasCRSaveArea = false;
   bool HasVRSAVESaveArea = false;
   bool HasVRSaveArea = false;
   
   SmallVector<CalleeSavedInfo, 18> GPRegs;
+  SmallVector<CalleeSavedInfo, 18> G8Regs;
   SmallVector<CalleeSavedInfo, 18> FPRegs;
   SmallVector<CalleeSavedInfo, 18> VRegs;
   
@@ -1041,6 +1117,14 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
       if (Reg < MinGPR) {
         MinGPR = Reg;
       }
+    } else if (RC == PPC::G8RCRegisterClass) {
+      HasG8SaveArea = true;
+
+      G8Regs.push_back(CSI[i]);
+
+      if (Reg < MinG8R) {
+        MinG8R = Reg;
+      }
     } else if (RC == PPC::F8RCRegisterClass) {
       HasFPSaveArea = true;
       
@@ -1064,7 +1148,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
         MinVR = Reg;
       }
     } else {
-      assert(0 && "Unknown RegisterClass!");
+      llvm_unreachable("Unknown RegisterClass!");
     }
   }
 
@@ -1103,7 +1187,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
   
   // General register save area starts right below the Floating-point
   // register save area.
-  if (HasGPSaveArea) {
+  if (HasGPSaveArea || HasG8SaveArea) {
     // Move general register save area spill slots down, taking into account
     // the size of the Floating-point register save area.
     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
@@ -1112,7 +1196,22 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
     }
     
-    LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4;
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+      int FI = G8Regs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    unsigned MinReg = std::min<unsigned>(getRegisterNumbering(MinGPR),
+                                         getRegisterNumbering(MinG8R));
+
+    if (Subtarget.isPPC64()) {
+      LowerBound -= (31 - MinReg + 1) * 8;
+    } else {
+      LowerBound -= (31 - MinReg + 1) * 4;
+    }
   }
   
   // The CR save area is below the general register save area.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index ddaefdd2a37c..1689bc224fb6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -37,7 +37,7 @@ public:
 
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
-  virtual const TargetRegisterClass *getPointerRegClass() const;  
+  virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;  
 
   /// Code Generation virtual methods...
   const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
@@ -66,8 +66,9 @@ public:
                          int SPAdj, RegScavenger *RS) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
                        int SPAdj, RegScavenger *RS) const;
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
   /// determineFrameLayout - Determine the size of the frame and maximum call
   /// frame size.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index bac8e3aed8eb..049e893e82ed 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -255,6 +255,11 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66]>;
 // VRsave register
 def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[107]>;
 
+// Carry bit.  In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1);  this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">, DwarfRegNum<[0]>;
+
 // FP rounding mode:  bits 30 and 31 of the FP status and control register
 // This is not allocated as a normal register; it appears only in
 // Uses and Defs.  The ABI says it needs to be preserved by a function,
@@ -280,7 +285,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
   let MethodBodies = [{
     GPRCClass::iterator
     GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
-      // In Linux, r2 is reserved for the OS.
+      // 32-bit SVR4 ABI: r2 is reserved for the OS.
+      // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
       if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
         return begin()+1;
 
@@ -291,7 +297,7 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
       // On PPC64, r13 is the thread pointer.  Never allocate this register.
       // Note that this is overconservative, as it also prevents allocation of
       // R31 when the FP is not needed.
-      // When using the SVR4 ABI, r13 is reserved for the Small Data Area
+      // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area
       // pointer.
       const PPCSubtarget &Subtarget
         = MF.getTarget().getSubtarget<PPCSubtarget>();
@@ -318,6 +324,10 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
   let MethodBodies = [{
     G8RCClass::iterator
     G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
+      // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
+      if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
+        return begin()+1;
+
       return begin();
     }
     G8RCClass::iterator
@@ -372,4 +382,6 @@ def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
 def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
 def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
 def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
-
+def CARRYRC : RegisterClass<"PPC", [i32], 32, [CARRY]> {
+  let CopyCost = -1;
+}
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 425d8e6195c6..f75e7814526f 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -13,7 +13,7 @@
 
 #include "PPCSubtarget.h"
 #include "PPC.h"
-#include "llvm/Module.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
 #include "PPCGenSubtarget.inc"
 #include <cstdlib>
@@ -57,10 +57,9 @@ static const char *GetCurrentPowerPCCPU() {
 #endif
 
 
-PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M,
-                           const std::string &FS, bool is64Bit)
-  : TM(tm)
-  , StackAlignment(16)
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
+                           bool is64Bit)
+  : StackAlignment(16)
   , DarwinDirective(PPC::DIR_NONE)
   , IsGigaProcessor(false)
   , Has64BitSupport(false)
@@ -95,7 +94,6 @@ PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M,
   
   // Set the boolean corresponding to the current target triple, or the default
   // if one cannot be determined, to true.
-  const std::string &TT = M.getTargetTriple();
   if (TT.length() > 7) {
     // Determine which version of darwin this is.
     size_t DarwinPos = TT.find("-darwin");
@@ -105,24 +103,11 @@ PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M,
       else
         DarwinVers = 8;  // Minimum supported darwin is Tiger.
     }
-  } else if (TT.empty()) {
-    // Try to autosense the subtarget from the host compiler.
-#if defined(__APPLE__)
-#if __APPLE_CC__ > 5400
-    DarwinVers = 9;  // GCC 5400+ is Leopard.
-#else
-    DarwinVers = 8;  // Minimum supported darwin is Tiger.
-#endif
-#endif
   }
 
   // Set up darwin-specific properties.
-  if (isDarwin()) {
+  if (isDarwin())
     HasLazyResolverStubs = true;
-    AsmFlavor = NewMnemonic;
-  } else {
-    AsmFlavor = OldMnemonic;
-  }
 }
 
 /// SetJITMode - This is called to inform the subtarget info that we are
@@ -138,7 +123,8 @@ void PPCSubtarget::SetJITMode() {
 /// hasLazyResolverStub - Return true if accesses to the specified global have
 /// to go through a dyld lazy resolution stub.  This means that an extra load
 /// is required to get the address of the global.
-bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+                                       const TargetMachine &TM) const {
   // We never hae stubs if HasLazyResolverStubs=false or if in static mode.
   if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
     return false;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index f633cc6d2da4..02c8ad79bd38 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -39,18 +39,11 @@ namespace PPC {
   };
 }
 
-class Module;
 class GlobalValue;
 class TargetMachine;
   
 class PPCSubtarget : public TargetSubtarget {
-public:
-  enum AsmWriterFlavorTy {
-    OldMnemonic, NewMnemonic, Unset
-  };
 protected:
-  const TargetMachine &TM;
-  
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned StackAlignment;
@@ -61,9 +54,6 @@ protected:
   /// Which cpu directive was used.
   unsigned DarwinDirective;
 
-  /// AsmFlavor - Which PPC asm dialect to use.
-  AsmWriterFlavorTy AsmFlavor;
-
   /// Used by the ISel to turn in optimizations for POWER4-derived architectures
   bool IsGigaProcessor;
   bool Has64BitSupport;
@@ -79,10 +69,9 @@ protected:
   unsigned char DarwinVers; // Is any darwin-ppc platform.
 public:
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  PPCSubtarget(const TargetMachine &TM, const Module &M,
-               const std::string &FS, bool is64Bit);
+  PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -132,7 +121,8 @@ public:
   /// hasLazyResolverStub - Return true if accesses to the specified global have
   /// to go through a dyld lazy resolution stub.  This means that an extra load
   /// is required to get the address of the global.
-  bool hasLazyResolverStub(const GlobalValue *GV) const;
+  bool hasLazyResolverStub(const GlobalValue *GV, 
+                           const TargetMachine &TM) const;
   
   // Specific obvious features.
   bool hasFSQRT() const { return HasFSQRT; }
@@ -148,12 +138,9 @@ public:
   /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
   unsigned getDarwinVers() const { return DarwinVers; }
 
-  bool isDarwinABI() const { return isDarwin() || IsPPC64; }
-  bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; }
+  bool isDarwinABI() const { return isDarwin(); }
+  bool isSVR4ABI() const { return !isDarwin(); }
 
-  unsigned getAsmFlavor() const {
-    return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
-  }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index e9073d63a152..3371954c30f1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,96 +12,38 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
-#include "PPCTargetAsmInfo.h"
+#include "PPCMCAsmInfo.h"
 #include "PPCTargetMachine.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Target/TargetMachineRegistry.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-/// PowerPCTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int PowerPCTargetMachineModule;
-int PowerPCTargetMachineModule = 0;
-
-// Register the targets
-static RegisterTarget<PPC32TargetMachine>
-X("ppc32", "PowerPC 32");
-static RegisterTarget<PPC64TargetMachine>
-Y("ppc64", "PowerPC 64");
-
-// Force static initialization.
-extern "C" void LLVMInitializePowerPCTarget() { }
-
-// No assembler printer by default
-PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
-
-const TargetAsmInfo *PPCTargetMachine::createTargetAsmInfo() const {
-  if (Subtarget.isDarwin())
-    return new PPCDarwinTargetAsmInfo(*this);
-  else
-    return new PPCLinuxTargetAsmInfo(*this);
-}
-
-unsigned PPC32TargetMachine::getJITMatchQuality() {
-#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
-  if (sizeof(void*) == 4)
-    return 10;
-#endif
-  return 0;
-}
-unsigned PPC64TargetMachine::getJITMatchQuality() {
-#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
-  if (sizeof(void*) == 8)
-    return 10;
-#endif
-  return 0;
-}
-
-unsigned PPC32TargetMachine::getModuleMatchQuality(const Module &M) {
-  // We strongly match "powerpc-*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "powerpc-")
-    return 20;
-  
-  // If the target triple is something non-powerpc, we don't match.
-  if (!TT.empty()) return 0;
+static const MCAsmInfo *createMCAsmInfo(const Target &T,
+                                                const StringRef &TT) {
+  Triple TheTriple(TT);
+  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+  if (TheTriple.getOS() == Triple::Darwin)
+    return new PPCMCAsmInfoDarwin(isPPC64);
+  return new PPCLinuxMCAsmInfo(isPPC64);
   
-  if (M.getEndianness()  == Module::BigEndian &&
-      M.getPointerSize() == Module::Pointer32)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
-  
-  return getJITMatchQuality()/2;
 }
 
-unsigned PPC64TargetMachine::getModuleMatchQuality(const Module &M) {
-  // We strongly match "powerpc64-*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 10 && std::string(TT.begin(), TT.begin()+10) == "powerpc64-")
-    return 20;
-  
-  if (M.getEndianness()  == Module::BigEndian &&
-      M.getPointerSize() == Module::Pointer64)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
+extern "C" void LLVMInitializePowerPCTarget() {
+  // Register the targets
+  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
+  RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
   
-  return getJITMatchQuality()/2;
+  RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
+  RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
 }
 
 
-PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS,
-                                   bool is64Bit)
-  : Subtarget(*this, M, FS, is64Bit),
+PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS, bool is64Bit)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
     FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) {
@@ -118,13 +60,15 @@ PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS,
 /// groups, which typically degrades performance.
 bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
 
-PPC32TargetMachine::PPC32TargetMachine(const Module &M, const std::string &FS) 
-  : PPCTargetMachine(M, FS, false) {
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &FS) 
+  : PPCTargetMachine(T, TT, FS, false) {
 }
 
 
-PPC64TargetMachine::PPC64TargetMachine(const Module &M, const std::string &FS)
-  : PPCTargetMachine(M, FS, true) {
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &FS)
+  : PPCTargetMachine(T, TT, FS, true) {
 }
 
 
@@ -146,20 +90,36 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
   return false;
 }
 
-bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool Verbose,
-                                          raw_ostream &Out) {
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, Verbose));
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      MachineCodeEmitter &MCE) {
+  // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+  // FIXME: This should be moved to TargetJITInfo!!
+  if (Subtarget.isPPC64()) {
+    // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+    // instructions to materialize arbitrary global variable + function +
+    // constant pool addresses.
+    setRelocationModel(Reloc::PIC_);
+    // Temporary workaround for the inability of PPC64 JIT to handle jump
+    // tables.
+    DisableJumpTables = true;      
+  } else {
+    setRelocationModel(Reloc::Static);
+  }
+  
+  // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
+  // writing?
+  Subtarget.SetJITMode();
+  
+  // Machine code emitter pass for PowerPC.
+  PM.add(createPPCCodeEmitterPass(*this, MCE));
 
   return false;
 }
 
 bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm, MachineCodeEmitter &MCE) {
+                                      JITCodeEmitter &JCE) {
   // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
   // FIXME: This should be moved to TargetJITInfo!!
   if (Subtarget.isPPC64()) {
@@ -179,19 +139,14 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   Subtarget.SetJITMode();
   
   // Machine code emitter pass for PowerPC.
-  PM.add(createPPCCodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
+  PM.add(createPPCJITCodeEmitterPass(*this, JCE));
 
   return false;
 }
 
 bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm, JITCodeEmitter &JCE) {
+                                      ObjectCodeEmitter &OCE) {
   // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
   // FIXME: This should be moved to TargetJITInfo!!
   if (Subtarget.isPPC64()) {
@@ -211,43 +166,33 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   Subtarget.SetJITMode();
   
   // Machine code emitter pass for PowerPC.
-  PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
+  PM.add(createPPCObjectCodeEmitterPass(*this, OCE));
 
   return false;
 }
 
 bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                             CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm, 
                                             MachineCodeEmitter &MCE) {
   // Machine code emitter pass for PowerPC.
   PM.add(createPPCCodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
-
   return false;
 }
 
 bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                             CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
                                             JITCodeEmitter &JCE) {
   // Machine code emitter pass for PowerPC.
   PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
+  return false;
+}
 
+bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel,
+                                            ObjectCodeEmitter &OCE) {
+  // Machine code emitter pass for PowerPC.
+  PM.add(createPPCObjectCodeEmitterPass(*this, OCE));
   return false;
 }
 
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index c693bf42a3e0..3399ac89188f 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -39,18 +39,9 @@ class PPCTargetMachine : public LLVMTargetMachine {
   InstrItineraryData  InstrItins;
   PPCMachOWriterInfo  MachOWriterInfo;
 
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
-  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
-  // set this functions to ctor pointer at startup time if they are linked in.
-  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            PPCTargetMachine &tm, 
-                                            bool verbose);
-  static AsmPrinterCtorFn AsmPrinterCtor;
-
 public:
-  PPCTargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+  PPCTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS, bool is64Bit);
 
   virtual const PPCInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const PPCFrameInfo     *getFrameInfo() const { return &FrameInfo; }
@@ -71,26 +62,24 @@ public:
     return &MachOWriterInfo;
   }
 
-  static void registerAsmPrinter(AsmPrinterCtorFn F) {
-    AsmPrinterCtor = F;
-  }
-
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel, 
-                                  bool Verbose, raw_ostream &Out);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, MachineCodeEmitter &MCE);
+                              MachineCodeEmitter &MCE);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &JCE);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, JITCodeEmitter &JCE);
+                              ObjectCodeEmitter &OCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm, MachineCodeEmitter &MCE);
+                                    MachineCodeEmitter &MCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm, JITCodeEmitter &JCE);
+                                    JITCodeEmitter &JCE);
+  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+                                    CodeGenOpt::Level OptLevel,
+                                    ObjectCodeEmitter &OCE);
   virtual bool getEnableTailMergeDefault() const;
 };
 
@@ -98,20 +87,16 @@ public:
 ///
 class PPC32TargetMachine : public PPCTargetMachine {
 public:
-  PPC32TargetMachine(const Module &M, const std::string &FS);
-  
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
+  PPC32TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
 };
 
 /// PPC64TargetMachine - PowerPC 64-bit target machine.
 ///
 class PPC64TargetMachine : public PPCTargetMachine {
 public:
-  PPC64TargetMachine(const Module &M, const std::string &FS);
-  
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
+  PPC64TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 6e9e6c74e8f3..f5e50fc808a8 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -149,7 +149,7 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
 
 Implement Newton-Rhapson method for improving estimate instructions to the
 correct accuracy, and implementing divide as multiply by reciprocal when it has
-more than one use.  Itanium will want this too.
+more than one use.  Itanium would want this too.
 
 ===-------------------------------------------------------------------------===
 
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..058d599a4af0
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCInfo
+  PowerPCTargetInfo.cpp
+  )
+
+add_dependencies(LLVMPowerPCInfo PowerPCCodeGenTable_gen)
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
new file mode 100644
index 000000000000..a101aa4a4495
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PowerPC/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 000000000000..ad607d0ade6a
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() { 
+  RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+    X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+  RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+    Y(ThePPC64Target, "ppc64", "PowerPC 64");
+}
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index f68cf0e40df0..89ea9d0afc42 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -197,13 +197,6 @@ _bar:   addic r3,r3,-1
 
 //===---------------------------------------------------------------------===//
 
-Legalize should lower ctlz like this:
-  ctlz(x) = popcnt((x-1) & ~x)
-
-on targets that have popcnt but not ctlz.  itanium, what else?
-
-//===---------------------------------------------------------------------===//
-
 quantum_sigma_x in 462.libquantum contains the following loop:
 
       for(i=0; i<reg->size; i++)
@@ -227,7 +220,20 @@ so cool to turn it into something like:
 ... which would only do one 32-bit XOR per loop iteration instead of two.
 
 It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
-alas...
+alas.
+
+//===---------------------------------------------------------------------===//
+
+This should be optimized to one 'and' and one 'or', from PR4216:
+
+define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp {
+entry:
+  %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; <i32> [#uses=1]
+  %0 = and i32 %bf.prev.low, -65536               ; <i32> [#uses=1]
+  %1 = and i32 %bf.prev.lo.cleared10, 40186       ; <i32> [#uses=1]
+  %2 = or i32 %1, %0                              ; <i32> [#uses=1]
+  ret i32 %2
+}
 
 //===---------------------------------------------------------------------===//
 
@@ -335,11 +341,6 @@ when it is declared U32.
 
 //===---------------------------------------------------------------------===//
 
-Promote for i32 bswap can use i64 bswap + shr.  Useful on targets with 64-bit
-regs and bswap, like itanium.
-
-//===---------------------------------------------------------------------===//
-
 LSR should know what GPR types a target has.  This code:
 
 volatile short X, Y; // globals
@@ -349,24 +350,22 @@ void foo(int N) {
   for (i = 0; i < N; i++) { X = i; Y = i*4; }
 }
 
-produces two identical IV's (after promotion) on PPC/ARM:
+produces two near identical IV's (after promotion) on PPC/ARM:
 
-LBB1_1: @bb.preheader
-        mov r3, #0
-        mov r2, r3
-        mov r1, r3
-LBB1_2: @bb
-        ldr r12, LCPI1_0
-        ldr r12, [r12]
-        strh r2, [r12]
-        ldr r12, LCPI1_1
-        ldr r12, [r12]
-        strh r3, [r12]
-        add r1, r1, #1    <- [0,+,1]
-        add r3, r3, #4
-        add r2, r2, #1    <- [0,+,1]
-        cmp r1, r0
-        bne LBB1_2      @bb
+LBB1_2:
+	ldr r3, LCPI1_0
+	ldr r3, [r3]
+	strh r2, [r3]
+	ldr r3, LCPI1_1
+	ldr r3, [r3]
+	strh r1, [r3]
+	add r1, r1, #4
+	add r2, r2, #1   <- [0,+,1]
+	sub r0, r0, #1   <- [0,-,1]
+	cmp r0, #0
+	bne LBB1_2
+
+LSR should reuse the "+" IV for the exit test.
 
 
 //===---------------------------------------------------------------------===//
@@ -600,25 +599,6 @@ implementations of ceil/floor/rint.
 
 //===---------------------------------------------------------------------===//
 
-This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043
-contains a testcase that compiles down to:
-
-	%struct.XMM128 = type { <4 x float> }
-..
-	%src = alloca %struct.XMM128
-..
-	%tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>*
-	%tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0
-	store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16
-	%tmp66 = load <4 x float>* %tmp65, align 16		
-	%tmp71 = add <4 x float> %tmp66, %tmp66		
-
-If the mid-level optimizer turned the bitcast of pointer + store of tmp5899
-into a bitcast of the vector value and a store to the pointer, then the 
-store->load could be easily removed.
-
-//===---------------------------------------------------------------------===//
-
 Consider:
 
 int test() {
@@ -1123,16 +1103,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
 
 //===---------------------------------------------------------------------===//
 
-We would like to do the following transform in the instcombiner:
-
-  -X/C -> X/-C
-
-However, this isn't valid if (-X) overflows. We can implement this when we
-have the concept of a "C signed subtraction" operator that which is undefined
-on overflow.
-
-//===---------------------------------------------------------------------===//
-
 This was noticed in the entryblock for grokdeclarator in 403.gcc:
 
         %tmp = icmp eq i32 %decl_context, 4          
@@ -1311,6 +1281,8 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)
   llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis
 
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
+
 //===---------------------------------------------------------------------===//
 
 Type based alias analysis:
@@ -1318,31 +1290,25 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705
 
 //===---------------------------------------------------------------------===//
 
-When GVN/PRE finds a store of float* to a must aliases pointer when expecting
-an int*, it should turn it into a bitcast.  This is a nice generalization of
-the SROA hack that would apply to other cases, e.g.:
-
-int foo(int C, int *P, float X) {
-  if (C) {
-    bar();
-    *P = 42;
-  } else
-    *(float*)P = X;
-
-   return *P;
-}
-
-
-One example (that requires crazy phi translation) is:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
-
-//===---------------------------------------------------------------------===//
-
 A/B get pinned to the stack because we turn an if/then into a select instead
 of PRE'ing the load/store.  This may be fixable in instcombine:
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892
 
+struct X { int i; };
+int foo (int x) {
+  struct X a;
+  struct X b;
+  struct X *p;
+  a.i = 1;
+  b.i = 2;
+  if (x)
+    p = &a;
+  else
+    p = &b;
+  return p->i;
+}
 
+//===---------------------------------------------------------------------===//
 
 Interesting missed case because of control flow flattening (should be 2 loads):
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
@@ -1675,5 +1641,6 @@ entry:
 
 Instcombine should be able to optimize away the loads (and thus the globals).
 
+See also PR4973
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile
index f12a6ac39891..a856828ce401 100644
--- a/lib/Target/Sparc/AsmPrinter/Makefile
+++ b/lib/Target/Sparc/AsmPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+##===- lib/Target/Sparc/AsmPrinter/Makefile ----------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 71bd0dee2068..a3e5fba928f0 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -19,18 +19,22 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Support/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
 #include <cctype>
 #include <cstring>
@@ -49,45 +53,36 @@ namespace {
     ValueMapTy NumberForBB;
     unsigned BBNumber;
   public:
-    explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                             const TargetAsmInfo *T, bool V)
+    explicit SparcAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                             const MCAsmInfo *T, bool V)
       : AsmPrinter(O, TM, T, V), BBNumber(0) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
     }
 
-    void printModuleLevelGV(const GlobalVariable* GVar);
+    void PrintGlobalVariable(const GlobalVariable *GVar);
     void printOperand(const MachineInstr *MI, int opNum);
     void printMemOperand(const MachineInstr *MI, int opNum,
                          const char *Modifier = 0);
     void printCCOperand(const MachineInstr *MI, int opNum);
 
-    bool printInstruction(const MachineInstr *MI);  // autogenerated.
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode);
     bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              unsigned AsmVariant, const char *ExtraCode);
+
+    void emitFunctionHeader(const MachineFunction &MF);
+    bool printGetPCX(const MachineInstr *MI, unsigned OpNo);
   };
 } // end of anonymous namespace
 
 #include "SparcGenAsmWriter.inc"
 
-/// createSparcCodePrinterPass - Returns a pass that prints the SPARC
-/// assembly code for a MachineFunction to the given output stream,
-/// using the given target machine description.  This should work
-/// regardless of whether the function is in SSA form.
-///
-FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
-                                               TargetMachine &tm,
-                                               bool verbose) {
-  return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-}
-
-
 /// runOnMachineFunction - This uses the printInstruction()
 /// method to print assembly for each instruction.
 ///
@@ -103,17 +98,11 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // BBs the same name. (If you have a better way, please let me know!)
 
   O << "\n\n";
-
-  // Print out the label for the function.
-  const Function *F = MF.getFunction();
-  SwitchToSection(TAI->SectionForGlobal(F));
-  EmitAlignment(MF.getAlignment(), F);
-  O << "\t.globl\t" << CurrentFnName << '\n';
-
-  printVisibility(CurrentFnName, F->getVisibility());
-
-  O << "\t.type\t" << CurrentFnName << ", #function\n";
-  O << CurrentFnName << ":\n";
+  emitFunctionHeader(MF);
+  
+  
+  // Emit pre-function debug information.
+  DW->BeginFunction(&MF);
 
   // Number each basic block so that we can consistently refer to them
   // in PC-relative references.
@@ -129,24 +118,65 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
        I != E; ++I) {
     // Print a label for the basic block.
     if (I != MF.begin()) {
-      printBasicBlockLabel(I, true, true);
-      O << '\n';
+      EmitBasicBlockStart(I);
     }
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
       // Print the assembly for the instruction.
+      processDebugLoc(II, true);
       printInstruction(II);
+      
+      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+        EmitComments(*II);
+      O << '\n';
+      processDebugLoc(II, false);
       ++EmittedInsts;
     }
   }
 
+  // Emit post-function debug information.
+  DW->EndFunction(&MF);
+
   // We didn't modify anything.
+  O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
   return false;
 }
 
+void SparcAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  EmitAlignment(MF.getAlignment(), F);
+  
+  switch (F->getLinkage()) {
+  default: llvm_unreachable("Unknown linkage type");
+  case Function::PrivateLinkage:
+  case Function::InternalLinkage:
+    // Function is internal.
+    break;
+  case Function::DLLExportLinkage:
+  case Function::ExternalLinkage:
+    // Function is externally visible
+    O << "\t.global\t" << CurrentFnName << '\n';
+    break;
+  case Function::LinkerPrivateLinkage:
+  case Function::LinkOnceAnyLinkage:
+  case Function::LinkOnceODRLinkage:
+  case Function::WeakAnyLinkage:
+  case Function::WeakODRLinkage:
+    // Function is weak
+    O << "\t.weak\t" << CurrentFnName << '\n' ;
+    break;
+  }
+  
+  printVisibility(CurrentFnName, F->getVisibility());
+  
+  O << "\t.type\t" << CurrentFnName << ", #function\n";
+  O << CurrentFnName << ":\n";
+}
+
+
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand (opNum);
-  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
   bool CloseParen = false;
   if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
     O << "%hi(";
@@ -158,33 +188,27 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   }
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-      O << "%" << LowercaseString (RI.get(MO.getReg()).AsmName);
-    else
-      O << "%reg" << MO.getReg();
+    O << "%" << LowercaseString(getRegisterName(MO.getReg()));
     break;
 
   case MachineOperand::MO_Immediate:
     O << (int)MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress:
-    {
-      const GlobalValue *GV = MO.getGlobal();
-      O << Mang->getValueName(GV);
-    }
+    O << Mang->getMangledName(MO.getGlobal());
     break;
   case MachineOperand::MO_ExternalSymbol:
     O << MO.getSymbolName();
     break;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
       << MO.getIndex();
     break;
   default:
-    O << "<unknown operand type>"; abort (); break;
+    llvm_unreachable("<unknown operand type>");
   }
   if (CloseParen) O << ")";
 }
@@ -218,28 +242,42 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
   }
 }
 
-void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
-  int CC = (int)MI->getOperand(opNum).getImm();
-  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
-}
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) {
+  std::string operand = "";
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Operand is not a register ");
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Operand is not a physical register ");
+    operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+    break;
+  }
 
-bool SparcAsmPrinter::doInitialization(Module &M) {
-  Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
-  return false; // success
-}
+  unsigned bbNum = NumberForBB[MI->getParent()->getBasicBlock()];
 
-bool SparcAsmPrinter::doFinalization(Module &M) {
-  // Print out module-level global variables here.
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    printModuleLevelGV(I);
+  O << '\n' << ".LLGETPCH" << bbNum << ":\n";
+  O << "\tcall\t.LLGETPC" << bbNum << '\n' ;
 
-  O << '\n';
+  O << "\t  sethi\t"
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "  
+    << operand << '\n' ;
+
+  O << ".LLGETPC" << bbNum << ":\n" ;
+  O << "\tor\t" << operand  
+    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "
+    << operand << '\n';
+  O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
+  
+  return true;
+}
 
-  return AsmPrinter::doFinalization(M);
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+  int CC = (int)MI->getOperand(opNum).getImm();
+  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
 }
 
-void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+void SparcAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
   const TargetData *TD = TM.getTargetData();
 
   if (!GVar->hasInitializer())
@@ -250,16 +288,15 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
     return;
 
   O << "\n\n";
-  std::string name = Mang->getValueName(GVar);
+  std::string name = Mang->getMangledName(GVar);
   Constant *C = GVar->getInitializer();
-  if (isa<MDNode>(C) || isa<MDString>(C))
-    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignment(GVar);
 
   printVisibility(name, GVar->getVisibility());
 
-  SwitchToSection(TAI->SectionForGlobal(GVar));
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
 
   if (C->isNullValue() && !GVar->hasSection()) {
     if (!GVar->isThreadLocal() &&
@@ -269,8 +306,8 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
       if (GVar->hasLocalLinkage())
         O << "\t.local " << name << '\n';
 
-      O << TAI->getCOMMDirective() << name << ',' << Size;
-      if (TAI->getCOMMDirectiveTakesAlignment())
+      O << MAI->getCOMMDirective() << name << ',' << Size;
+      if (MAI->getCOMMDirectiveTakesAlignment())
         O << ',' << (1 << Align);
 
       O << '\n';
@@ -292,27 +329,25 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
     // their name or something.  For now, just emit them as external.
    case GlobalValue::ExternalLinkage:
     // If external or appending, declare as a global symbol
-    O << TAI->getGlobalDirective() << name << '\n';
+    O << MAI->getGlobalDirective() << name << '\n';
     // FALL THROUGH
    case GlobalValue::PrivateLinkage:
+   case GlobalValue::LinkerPrivateLinkage:
    case GlobalValue::InternalLinkage:
     break;
    case GlobalValue::GhostLinkage:
-    cerr << "Should not have any unmaterialized functions!\n";
-    abort();
+    llvm_unreachable("Should not have any unmaterialized functions!");
    case GlobalValue::DLLImportLinkage:
-    cerr << "DLLImport linkage is not supported by this target!\n";
-    abort();
+    llvm_unreachable("DLLImport linkage is not supported by this target!");
    case GlobalValue::DLLExportLinkage:
-    cerr << "DLLExport linkage is not supported by this target!\n";
-    abort();
+    llvm_unreachable("DLLExport linkage is not supported by this target!");
    default:
-    assert(0 && "Unknown linkage type!");
+    llvm_unreachable("Unknown linkage type!");
   }
 
   EmitAlignment(Align, GVar);
 
-  if (TAI->hasDotTypeDotSizeDirective()) {
+  if (MAI->hasDotTypeDotSizeDirective()) {
     O << "\t.type " << name << ",#object\n";
     O << "\t.size " << name << ',' << Size << '\n';
   }
@@ -355,13 +390,7 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-namespace {
-  static struct Register {
-    Register() {
-      SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass);
-    }
-  } Registrator;
-}
-
 // Force static initialization.
-extern "C" void LLVMInitializeSparcAsmPrinter() { }
+extern "C" void LLVMInitializeSparcAsmPrinter() { 
+  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index eb045e242b79..74f320a00035 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -16,9 +16,9 @@ add_llvm_target(SparcCodeGen
   SparcInstrInfo.cpp
   SparcISelDAGToDAG.cpp
   SparcISelLowering.cpp
+  SparcMCAsmInfo.cpp
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
-  SparcTargetAsmInfo.cpp
   SparcTargetMachine.cpp
   )
 
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index f72a4c4645c1..88b0927b3550 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -20,6 +20,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumFpDs , "Number of instructions translated");
@@ -75,7 +77,7 @@ static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
       OddReg = OddHalvesOfPairs[i];
       return;
     }
-  assert(0 && "Can't find reg");
+  llvm_unreachable("Can't find reg");
 }
 
 /// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
@@ -108,16 +110,16 @@ bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       else if (MI->getOpcode() == SP::FpABSD)
         MI->setDesc(TII->get(SP::FABSS));
       else
-        assert(0 && "Unknown opcode!");
+        llvm_unreachable("Unknown opcode!");
         
       MI->getOperand(0).setReg(EvenDestReg);
       MI->getOperand(1).setReg(EvenSrcReg);
-      DOUT << "FPMover: the modified instr is: " << *MI;
+      DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
       // Insert copy for the other half of the double.
       if (DestDReg != SrcDReg) {
         MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
           .addReg(OddSrcReg);
-        DOUT << "FPMover: the inserted instr is: " << *MI;
+        DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
       }
       ++NumFpDs;
     }
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index fdf6afaee076..6714b4dadb29 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
                 SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
                 SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index c7d0ca8a0875..bb5155e1c263 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -15,19 +15,21 @@
 #ifndef TARGET_SPARC_H
 #define TARGET_SPARC_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 
 namespace llvm {
   class FunctionPass;
   class SparcTargetMachine;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
-  FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM,
-                                           bool Verbose);
   FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
   FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+
+  extern Target TheSparcTarget;
+
 } // end namespace llvm;
 
 // Defines symbolic names for Sparc registers.  This defines a mapping from
@@ -83,7 +85,7 @@ namespace llvm {
   
   inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
     switch (CC) {
-    default: assert(0 && "Unknown condition code");
+    default: llvm_unreachable("Unknown condition code");
     case SPCC::ICC_NE:  return "ne";
     case SPCC::ICC_E:   return "e";
     case SPCC::ICC_G:   return "g";
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index c9bd62d0e20d..a1a4a8ef52c2 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -17,6 +17,8 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -32,10 +34,13 @@ class SparcDAGToDAGISel : public SelectionDAGISel {
   /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
   /// make the right decision when generating code for different targets.
   const SparcSubtarget &Subtarget;
+  SparcTargetMachine& TM;
+  MachineBasicBlock *CurBB;
 public:
-  explicit SparcDAGToDAGISel(SparcTargetMachine &TM)
-    : SelectionDAGISel(TM),
-      Subtarget(TM.getSubtarget<SparcSubtarget>()) {
+  explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
+    : SelectionDAGISel(tm),
+      Subtarget(tm.getSubtarget<SparcSubtarget>()),
+      TM(tm) {
   }
 
   SDNode *Select(SDValue Op);
@@ -61,6 +66,9 @@ public:
 
   // Include the pieces autogenerated from the target description.
 #include "SparcGenDAGISel.inc"
+
+private:
+  SDNode* getGlobalBaseReg();
 };
 }  // end anonymous namespace
 
@@ -68,12 +76,18 @@ public:
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void SparcDAGToDAGISel::InstructionSelect() {
   DEBUG(BB->dump());
-
+  CurBB = BB;
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
   CurDAG->RemoveDeadNodes();
 }
 
+SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
+  MachineFunction *MF = CurBB->getParent();
+  unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
 bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
@@ -147,6 +161,9 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
 
   switch (N->getOpcode()) {
   default: break;
+  case SPISD::GLOBAL_BASE_REG:
+    return getGlobalBaseReg();
+
   case ISD::SDIV:
   case ISD::UDIV: {
     // FIXME: should use a custom expander to expose the SRA to the dag.
@@ -156,12 +173,12 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
     // Set the Y register to the high-part.
     SDValue TopPart;
     if (N->getOpcode() == ISD::SDIV) {
-      TopPart = SDValue(CurDAG->getTargetNode(SP::SRAri, dl, MVT::i32, DivLHS,
+      TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
                                    CurDAG->getTargetConstant(31, MVT::i32)), 0);
     } else {
       TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
     }
-    TopPart = SDValue(CurDAG->getTargetNode(SP::WRYrr, dl, MVT::Flag, TopPart,
+    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart,
                                      CurDAG->getRegister(SP::G0, MVT::i32)), 0);
 
     // FIXME: Handle div by immediate.
@@ -175,8 +192,8 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
     SDValue MulLHS = N->getOperand(0);
     SDValue MulRHS = N->getOperand(1);
     unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
-    SDNode *Mul = CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::Flag,
-                                        MulLHS, MulRHS);
+    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+                                         MulLHS, MulRHS);
     // The high part is in the Y register.
     return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
     return NULL;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4c3efde36fe1..164770d72df7 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -21,7 +21,9 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 
@@ -31,18 +33,21 @@ using namespace llvm;
 
 #include "SparcGenCallingConv.inc"
 
-static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
+SDValue
+SparcTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 DebugLoc dl, SelectionDAG &DAG) {
+
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CC, isVarArg, DAG.getTarget(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(),
+                 RVLocs, *DAG.getContext());
 
-  // Analize return values of ISD::RET
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Sparc32);
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
@@ -52,7 +57,6 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
-  SDValue Chain = Op.getOperand(0);
   SDValue Flag;
 
   // Copy the result values into the output registers.
@@ -60,10 +64,8 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    // ISD::RET => ret chain, (regnum1,val1), ...
-    // So i*2+1 index only the regnums.
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
-                             Op.getOperand(i*2+1), Flag);
+                             Outs[i].Val, Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
@@ -74,55 +76,64 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain);
 }
 
-/// LowerArguments - V8 uses a very simple ABI, where all values are passed in
-/// either one or two GPRs, including FP values.  TODO: we should pass FP values
-/// in FP registers for fastcc functions.
-void
-SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &ArgValues,
-                                    DebugLoc dl) {
+/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values.  TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue
+SparcTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv, bool isVarArg,
+                                          const SmallVectorImpl<ISD::InputArg>
+                                            &Ins,
+                                          DebugLoc dl, SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
+
   static const unsigned ArgRegs[] = {
     SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
   };
-
   const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
   unsigned ArgOffset = 68;
 
-  SDValue Root = DAG.getRoot();
-  std::vector<SDValue> OutChains;
-
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
-    MVT ObjectVT = getValueType(I->getType());
-
-    switch (ObjectVT.getSimpleVT()) {
-    default: assert(0 && "Unhandled argument type!");
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    SDValue ArgValue;
+    CCValAssign &VA = ArgLocs[i];
+    // FIXME: We ignore the register assignments of AnalyzeFormalArguments
+    // because it doesn't know how to split a double into two i32 registers.
+    EVT ObjectVT = VA.getValVT();
+    switch (ObjectVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unhandled argument type!");
     case MVT::i1:
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
-      if (I->use_empty()) {                // Argument is dead.
+      if (!Ins[i].Used) {                  // Argument is dead.
         if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+        InVals.push_back(DAG.getUNDEF(ObjectVT));
       } else if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
         unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
         MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
-        SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+        SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         if (ObjectVT != MVT::i32) {
           unsigned AssertOp = ISD::AssertSext;
           Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg,
                             DAG.getValueType(ObjectVT));
           Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg);
         }
-        ArgValues.push_back(Arg);
+        InVals.push_back(Arg);
       } else {
         int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
         SDValue Load;
         if (ObjectVT == MVT::i32) {
-          Load = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
         } else {
           ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
 
@@ -130,63 +141,63 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
           unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
           FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
                               DAG.getConstant(Offset, MVT::i32));
-          Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Root, FIPtr,
+          Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
                                 NULL, 0, ObjectVT);
           Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
         }
-        ArgValues.push_back(Load);
+        InVals.push_back(Load);
       }
 
       ArgOffset += 4;
       break;
     case MVT::f32:
-      if (I->use_empty()) {                // Argument is dead.
+      if (!Ins[i].Used) {                  // Argument is dead.
         if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+        InVals.push_back(DAG.getUNDEF(ObjectVT));
       } else if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
         // FP value is passed in an integer register.
         unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
         MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
-        SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+        SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
 
         Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
-        ArgValues.push_back(Arg);
+        InVals.push_back(Arg);
       } else {
         int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load = DAG.getLoad(MVT::f32, dl, Root, FIPtr, NULL, 0);
-        ArgValues.push_back(Load);
+        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0);
+        InVals.push_back(Load);
       }
       ArgOffset += 4;
       break;
 
     case MVT::i64:
     case MVT::f64:
-      if (I->use_empty()) {                // Argument is dead.
+      if (!Ins[i].Used) {                // Argument is dead.
         if (CurArgReg < ArgRegEnd) ++CurArgReg;
         if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+        InVals.push_back(DAG.getUNDEF(ObjectVT));
       } else {
         SDValue HiVal;
         if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
           unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
           MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
-          HiVal = DAG.getCopyFromReg(Root, dl, VRegHi, MVT::i32);
+          HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
         } else {
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          HiVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
         }
 
         SDValue LoVal;
         if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
           unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
           MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
-          LoVal = DAG.getCopyFromReg(Root, dl, VRegLo, MVT::i32);
+          LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
         } else {
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          LoVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
         }
 
         // Compose the two halves together into an i64 unit.
@@ -197,7 +208,7 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
         if (ObjectVT == MVT::f64)
           WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue);
 
-        ArgValues.push_back(WholeValue);
+        InVals.push_back(WholeValue);
       }
       ArgOffset += 8;
       break;
@@ -205,10 +216,12 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
   }
 
   // Store remaining ArgRegs to the stack if this is a varargs function.
-  if (F.isVarArg()) {
+  if (isVarArg) {
     // Remember the vararg offset for the va_start implementation.
     VarArgsFrameOffset = ArgOffset;
 
+    std::vector<SDValue> OutChains;
+
     for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
       unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
       MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
@@ -220,26 +233,31 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
       OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
       ArgOffset += 4;
     }
+
+    if (!OutChains.empty()) {
+      OutChains.push_back(Chain);
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &OutChains[0], OutChains.size());
+    }
   }
 
-  if (!OutChains.empty())
-    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                            &OutChains[0], OutChains.size()));
+  return Chain;
 }
 
-static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  unsigned CallingConv = TheCall->getCallingConv();
-  SDValue Chain = TheCall->getChain();
-  SDValue Callee = TheCall->getCallee();
-  bool isVarArg = TheCall->isVarArg();
-  DebugLoc dl = TheCall->getDebugLoc();
+SDValue
+SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) {
 
 #if 0
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallingConv, isVarArg, DAG.getTarget(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(Op.getNode(), CC_Sparc32);
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs);
+  CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
 
   // Get the size of the outgoing arguments stack space requirement.
   unsigned ArgsSize = CCInfo.getNextStackOffset();
@@ -249,9 +267,9 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
   // Count the size of the outgoing arguments.
   unsigned ArgsSize = 0;
-  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
-    switch (TheCall->getArg(i).getValueType().getSimpleVT()) {
-      default: assert(0 && "Unknown value type!");
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown value type!");
       case MVT::i1:
       case MVT::i8:
       case MVT::i16:
@@ -283,13 +301,11 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-
-    // Arguments start after the 5 first operands of ISD::CALL
-    SDValue Arg = TheCall->getArg(i);
+    SDValue Arg = Outs[i].Val;
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::SExt:
       Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
@@ -325,13 +341,13 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
   };
   unsigned ArgOffset = 68;
 
-  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
-    SDValue Val = TheCall->getArg(i);
-    MVT ObjectVT = Val.getValueType();
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    SDValue Val = Outs[i].Val;
+    EVT ObjectVT = Val.getValueType();
     SDValue ValToStore(0, 0);
     unsigned ObjSize;
-    switch (ObjectVT.getSimpleVT()) {
-    default: assert(0 && "Unhandled argument type!");
+    switch (ObjectVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unhandled argument type!");
     case MVT::i32:
       ObjSize = 4;
 
@@ -446,7 +462,7 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
   else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
 
-  std::vector<MVT> NodeTys;
+  std::vector<EVT> NodeTys;
   NodeTys.push_back(MVT::Other);   // Returns a chain
   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
   SDValue Ops[] = { Chain, Callee, InFlag };
@@ -459,10 +475,10 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState RVInfo(CallingConv, isVarArg, DAG.getTarget(), RVLocs);
+  CCState RVInfo(CallConv, isVarArg, DAG.getTarget(),
+                 RVLocs, *DAG.getContext());
 
-  RVInfo.AnalyzeCallResult(TheCall, RetCC_Sparc32);
-  SmallVector<SDValue, 8> ResultVals;
+  RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -475,15 +491,10 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
     Chain = DAG.getCopyFromReg(Chain, dl, Reg,
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
-    ResultVals.push_back(Chain.getValue(0));
+    InVals.push_back(Chain.getValue(0));
   }
 
-  ResultVals.push_back(Chain);
-
-  // Merge everything together with a MERGE_VALUES node.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, 
-                     TheCall->getVTList(), &ResultVals[0],
-                     ResultVals.size());
+  return Chain;
 }
 
 
@@ -496,7 +507,7 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
 /// condition.
 static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Unknown integer condition code!");
+  default: llvm_unreachable("Unknown integer condition code!");
   case ISD::SETEQ:  return SPCC::ICC_E;
   case ISD::SETNE:  return SPCC::ICC_NE;
   case ISD::SETLT:  return SPCC::ICC_L;
@@ -514,7 +525,7 @@ static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
 /// FCC condition.
 static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Unknown fp condition code!");
+  default: llvm_unreachable("Unknown fp condition code!");
   case ISD::SETEQ:
   case ISD::SETOEQ: return SPCC::FCC_E;
   case ISD::SETNE:
@@ -538,9 +549,8 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
   }
 }
 
-
 SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
-  : TargetLowering(TM) {
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
@@ -635,9 +645,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
-  // RET must be custom lowered, to meet ABI requirements
-  setOperationAction(ISD::RET               , MVT::Other, Custom);
-
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
   // VAARG needs to be lowered to not do unaligned accesses for doubles.
@@ -654,7 +661,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
   setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
-  setOperationAction(ISD::DECLARE, MVT::Other, Expand);
 
   setStackPointerRegisterToSaveRestore(SP::O6);
 
@@ -734,17 +740,29 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
-static SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) {
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, 
+                                                SelectionDAG &DAG) {
   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
-  return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+                                   getPointerTy());
+  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, 
+                                GlobalBase, RelAddr);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
+                     AbsAddr, NULL, 0);
 }
 
-static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) {
+SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
+                                               SelectionDAG &DAG) {
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
@@ -752,7 +770,16 @@ static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) {
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, 
+                                   getPointerTy());
+  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                GlobalBase, RelAddr);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
+                     AbsAddr, NULL, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -787,7 +814,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   // Get the condition flag.
   SDValue CompareFlag;
   if (LHS.getValueType() == MVT::i32) {
-    std::vector<MVT> VTs;
+    std::vector<EVT> VTs;
     VTs.push_back(MVT::i32);
     VTs.push_back(MVT::Flag);
     SDValue Ops[2] = { LHS, RHS };
@@ -818,7 +845,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 
   SDValue CompareFlag;
   if (LHS.getValueType() == MVT::i32) {
-    std::vector<MVT> VTs;
+    std::vector<EVT> VTs;
     VTs.push_back(LHS.getValueType());   // subcc returns a value
     VTs.push_back(MVT::Flag);
     SDValue Ops[2] = { LHS, RHS };
@@ -849,7 +876,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   SDNode *Node = Op.getNode();
-  MVT VT = Node->getValueType(0);
+  EVT VT = Node->getValueType(0);
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
@@ -900,14 +927,14 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 SDValue SparcTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
-  default: assert(0 && "Should not custom lower this!");
+  default: llvm_unreachable("Should not custom lower this!");
   // Frame & Return address.  Currently unimplemented
   case ISD::RETURNADDR: return SDValue();
   case ISD::FRAMEADDR:  return SDValue();
   case ISD::GlobalTLSAddress:
-    assert(0 && "TLS not implemented for Sparc.");
-  case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
-  case ISD::ConstantPool:       return LowerCONSTANTPOOL(Op, DAG);
+    llvm_unreachable("TLS not implemented for Sparc.");
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::BR_CC:              return LowerBR_CC(Op, DAG);
@@ -915,21 +942,20 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::VASTART:            return LowerVASTART(Op, DAG, *this);
   case ISD::VAARG:              return LowerVAARG(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
-  case ISD::CALL:               return LowerCALL(Op, DAG);
-  case ISD::RET:                return LowerRET(Op, DAG);
   }
 }
 
 MachineBasicBlock *
 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
+                                                 MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   unsigned BROpcode;
   unsigned CC;
   DebugLoc dl = MI->getDebugLoc();
   // Figure out the conditional branch opcode to use for this select_cc.
   switch (MI->getOpcode()) {
-  default: assert(0 && "Unknown SELECT_CC!");
+  default: llvm_unreachable("Unknown SELECT_CC!");
   case SP::SELECT_CC_Int_ICC:
   case SP::SELECT_CC_FP_ICC:
   case SP::SELECT_CC_DFP_ICC:
@@ -964,9 +990,18 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
   F->insert(It, copy0MBB);
   F->insert(It, sinkMBB);
-  // Update machine-CFG edges by transferring all successors of the current
+  // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  sinkMBB->transferSuccessors(BB);
+  // Also inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
+         E = BB->succ_end(); I != E; ++I) {
+    EM->insert(std::make_pair(*I, sinkMBB));
+    sinkMBB->addSuccessor(*I);
+  }
+  // Next, remove all successors of the current block, and add the true
+  // and fallthrough blocks as its successors.
+  while (!BB->succ_empty())
+    BB->removeSuccessor(BB->succ_begin());
   // Next, add the true and fallthrough blocks as its successors.
   BB->addSuccessor(copy0MBB);
   BB->addSuccessor(sinkMBB);
@@ -1011,7 +1046,7 @@ SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::pair<unsigned, const TargetRegisterClass*>
 SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                  MVT VT) const {
+                                                  EVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
@@ -1024,7 +1059,7 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
 std::vector<unsigned> SparcTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const {
+                                  EVT VT) const {
   if (Constraint.size() != 1)
     return std::vector<unsigned>();
 
@@ -1050,5 +1085,5 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned SparcTargetLowering::getFunctionAlignment(const Function *) const {
-  return 4;
+  return 2;
 }
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 27ce1b76cc79..55781be8b5b1 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -35,7 +35,8 @@ namespace llvm {
       ITOF,        // Int to FP within a FP register.
 
       CALL,        // A call instruction.
-      RET_FLAG     // Return with a flag operand.
+      RET_FLAG,    // Return with a flag operand.
+      GLOBAL_BASE_REG // Global base reg for PIC
     };
   }
 
@@ -57,25 +58,49 @@ namespace llvm {
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
-    virtual void LowerArguments(Function &F, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &ArgValues,
-                                DebugLoc dl);
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
     std::vector<unsigned>
     getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      MVT VT) const;
+                                      EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 12c286af9428..8667bca7fe96 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -17,7 +17,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "SparcGenInstrInfo.inc"
+#include "SparcMachineFunctionInfo.h"
 using namespace llvm;
 
 SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
@@ -160,30 +163,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
       .addReg(SrcReg,  getKillRegState(isKill));
   else
-    assert(0 && "Can't store this register to stack slot");
-}
-
-void SparcInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                    bool isKill,
-                                    SmallVectorImpl<MachineOperand> &Addr,
-                                    const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  unsigned Opc = 0;
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (RC == SP::IntRegsRegisterClass)
-    Opc = SP::STri;
-  else if (RC == SP::FPRegsRegisterClass)
-    Opc = SP::STFri;
-  else if (RC == SP::DFPRegsRegisterClass)
-    Opc = SP::STDFri;
-  else
-    assert(0 && "Can't load this register");
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  MIB.addReg(SrcReg, getKillRegState(isKill));
-  NewMIs.push_back(MIB);
-  return;
+    llvm_unreachable("Can't store this register to stack slot");
 }
 
 void SparcInstrInfo::
@@ -200,28 +180,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   else if (RC == SP::DFPRegsRegisterClass)
     BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
   else
-    assert(0 && "Can't load this register from stack slot");
-}
-
-void SparcInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                     SmallVectorImpl<MachineOperand> &Addr,
-                                     const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  unsigned Opc = 0;
-  if (RC == SP::IntRegsRegisterClass)
-    Opc = SP::LDri;
-  else if (RC == SP::FPRegsRegisterClass)
-    Opc = SP::LDFri;
-  else if (RC == SP::DFPRegsRegisterClass)
-    Opc = SP::LDDFri;
-  else
-    assert(0 && "Can't load this register");
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-    MIB.addOperand(Addr[i]);
-  NewMIs.push_back(MIB);
-  return;
+    llvm_unreachable("Can't load this register from stack slot");
 }
 
 MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -278,3 +237,25 @@ MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
   return NewMI;
 }
+
+unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
+{
+  SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
+  unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+
+
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+
+  BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
+  SparcFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index ab661b991d74..345674bacf37 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -81,20 +81,10 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC) const;
 
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
-
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
   
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
@@ -107,6 +97,8 @@ public:
                                               MachineInstr* LoadMI) const {
     return 0;
   }
+
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
 };
 
 }
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 2d6c9209e6ae..44821b810b14 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -117,7 +117,7 @@ def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
 def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
 def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
 
-// These are target-independent nodes, but have target-specific formats.
+//  These are target-independent nodes, but have target-specific formats.
 def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
@@ -134,6 +134,10 @@ def call          : SDNode<"SPISD::CALL", SDT_SPCall,
 def retflag       : SDNode<"SPISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInFlag]>;
 
+def getPCX        : Operand<i32> {
+  let PrintMethod = "printGetPCX";
+}  
+
 //===----------------------------------------------------------------------===//
 // SPARC Flag Conditions
 //===----------------------------------------------------------------------===//
@@ -207,6 +211,11 @@ multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
    : InstSP<outs, ins, asmstr, pattern>;
 
+// GETPCX for PIC
+let Defs = [O7], Uses = [O7] in {
+  def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
+}
+
 let Defs = [O6], Uses = [O6] in {
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
                                "!ADJCALLSTACKDOWN $amt",
@@ -431,18 +440,23 @@ def LEA_ADDri   : F3_2<2, 0b000000,
                    (outs IntRegs:$dst), (ins MEMri:$addr),
                    "add ${addr:arith}, $dst",
                    [(set IntRegs:$dst, ADDRri:$addr)]>;
-                   
-defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
+
+let Defs = [ICC] in                   
+  defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
+
 defm ADDX  : F3_12<"addx", 0b001000, adde>;
 
 // Section B.15 - Subtract Instructions, p. 110
 defm SUB    : F3_12  <"sub"  , 0b000100, sub>;
 defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
-defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
 
-def SUBXCCrr: F3_1<2, 0b011100, 
-                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                   "subxcc $b, $c, $dst", []>;
+let Defs = [ICC] in {
+  defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
+
+  def SUBXCCrr: F3_1<2, 0b011100, 
+                (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                "subxcc $b, $c, $dst", []>;
+}
 
 // Section B.18 - Multiply Instructions, p. 113
 defm UMUL : F3_12np<"umul", 0b001010>;
@@ -471,11 +485,12 @@ let isBarrier = 1 in
   def BA   : BranchSP<0b1000, (ins brtarget:$dst),
                       "ba $dst",
                       [(br bb:$dst)]>;
-                      
+
 // FIXME: the encoding for the JIT should look at the condition field.
-def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
-                     "b$cc $dst",
-                     [(SPbricc bb:$dst, imm:$cc)]>;
+let Uses = [ICC] in
+  def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                         "b$cc $dst",
+                        [(SPbricc bb:$dst, imm:$cc)]>;
 
 
 // Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
@@ -489,9 +504,10 @@ class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
 }
 
 // FIXME: the encoding for the JIT should look at the condition field.
-def FBCOND  : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
-                      "fb$cc $dst",
-                      [(SPbrfcc bb:$dst, imm:$cc)]>;
+let Uses = [FCC] in
+  def FBCOND  : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                              "fb$cc $dst",
+                              [(SPbrfcc bb:$dst, imm:$cc)]>;
 
 
 // Section B.24 - Call and Link Instruction, p. 125
@@ -633,15 +649,16 @@ def FDIVD  : F3_3<2, 0b110100, 0b001001110,
 // Note 2: the result of a FCMP is not available until the 2nd cycle
 // after the instr is retired, but there is no interlock. This behavior
 // is modelled with a forced noop after the instruction.
-def FCMPS  : F3_3<2, 0b110101, 0b001010001,
-                  (outs), (ins FPRegs:$src1, FPRegs:$src2),
-                  "fcmps $src1, $src2\n\tnop",
-                  [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
-def FCMPD  : F3_3<2, 0b110101, 0b001010010,
-                  (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
-                  "fcmpd $src1, $src2\n\tnop",
-                  [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
-
+let Defs = [FCC] in {
+  def FCMPS  : F3_3<2, 0b110101, 0b001010001,
+                   (outs), (ins FPRegs:$src1, FPRegs:$src2),
+                   "fcmps $src1, $src2\n\tnop",
+                   [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+  def FCMPD  : F3_3<2, 0b110101, 0b001010010,
+                   (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+                   "fcmpd $src1, $src2\n\tnop",
+                   [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+}
 
 //===----------------------------------------------------------------------===//
 // V9 Instructions
@@ -754,8 +771,6 @@ def : Pat<(call tglobaladdr:$dst),
 def : Pat<(call texternalsym:$dst),
           (CALL texternalsym:$dst)>;
 
-def : Pat<(ret), (RETL)>;
-
 // Map integer extload's to zextloads.
 def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
 def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp
new file mode 100644
index 000000000000..b67537c17881
--- /dev/null
+++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp
@@ -0,0 +1,38 @@
+//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCAsmInfo.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "!";
+  COMMDirectiveTakesAlignment = true;
+  HasLEB128 = true;
+  AbsoluteDebugSectionOffsets = true;
+  SupportsDebugInformation = true;
+  
+  SunStyleELFSectionSwitchSyntax = true;
+  UsesELFSectionDirectiveForBSS = true;
+
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+
+  PrivateGlobalPrefix = ".L";
+}
+
+
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/SparcMCAsmInfo.h
new file mode 100644
index 000000000000..12d6ef4a6f18
--- /dev/null
+++ b/lib/Target/Sparc/SparcMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+  struct SparcELFMCAsmInfo : public MCAsmInfo {
+    explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
new file mode 100644
index 000000000000..e457235ff6a6
--- /dev/null
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -0,0 +1,32 @@
+//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares  Sparc specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SPARCMACHINEFUNCTIONINFO_H
+#define SPARCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+  class SparcMachineFunctionInfo : public MachineFunctionInfo {
+  private:
+    unsigned GlobalBaseReg;
+  public:
+    SparcMachineFunctionInfo() : GlobalBaseReg(0) {}
+    SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
+
+    unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+    void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+  };
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 59efb19ab9c5..7883260e14c0 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
@@ -75,8 +76,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, RegScavenger *RS) const {
+unsigned
+SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, int *Value,
+                                       RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -112,6 +115,7 @@ void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(i).ChangeToRegister(SP::G1, false);
     MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
   }
+  return 0;
 }
 
 void SparcRegisterInfo::
@@ -168,28 +172,25 @@ void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
 }
 
 unsigned SparcRegisterInfo::getRARegister() const {
-  assert(0 && "What is the return address register");
-  return 0;
+  return SP::I7;
 }
 
 unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const {
-  assert(0 && "What is the frame register");
-  return SP::G1;
+  return SP::I6;
 }
 
 unsigned SparcRegisterInfo::getEHExceptionRegister() const {
-  assert(0 && "What is the exception register");
+  llvm_unreachable("What is the exception register");
   return 0;
 }
 
 unsigned SparcRegisterInfo::getEHHandlerRegister() const {
-  assert(0 && "What is the exception handler register");
+  llvm_unreachable("What is the exception handler register");
   return 0;
 }
 
 int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  assert(0 && "What is the dwarf register number");
-  return -1;
+  return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
 #include "SparcGenRegisterInfo.inc"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index fc863f3b28f0..753b1c049293 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -43,8 +43,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index e3a50ca42bbb..2b05c19bf148 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -16,6 +16,10 @@ class SparcReg<string n> : Register<n> {
   let Namespace = "SP";
 }
 
+class SparcCtrlReg<string n>: Register<n> {
+  let Namespace = "SP";
+}
+
 // Registers are identified with 5-bit ID numbers.
 // Ri - 32-bit integer registers
 class Ri<bits<5> num, string n> : SparcReg<n> {
@@ -31,6 +35,10 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
   let SubRegs = subregs;
 }
 
+// Control Registers
+def ICC : SparcCtrlReg<"ICC">;
+def FCC : SparcCtrlReg<"FCC">;
+
 // Integer registers
 def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
 def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
@@ -46,7 +54,7 @@ def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
 def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
 def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
 def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; 
-def O6 : Ri<14, "O6">, DwarfRegNum<[14]>;
+def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
 def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
 def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
 def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
@@ -62,7 +70,7 @@ def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
 def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
 def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
 def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; 
-def I6 : Ri<30, "I6">, DwarfRegNum<[30]>;
+def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
 def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
 
 // Floating-point registers
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index aaddbff073ad..8a88cc076429 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -22,7 +22,7 @@ namespace {
                           cl::desc("Enable V9 instructions in the V8 target"));
 }
 
-SparcSubtarget::SparcSubtarget(const Module &M, const std::string &FS) {
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS) {
   // Set the default features.
   IsV9 = false;
   V8DeprecatedInsts = false;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index e5a5ba47f106..43770343d334 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -18,14 +18,13 @@
 #include <string>
 
 namespace llvm {
-  class Module;
-  
+
 class SparcSubtarget : public TargetSubtarget {
   bool IsV9;
   bool V8DeprecatedInsts;
   bool IsVIS;
 public:
-  SparcSubtarget(const Module &M, const std::string &FS);
+  SparcSubtarget(const std::string &TT, const std::string &FS);
 
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1343bccadf54..3a381151f946 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,63 +10,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SparcTargetAsmInfo.h"
+#include "SparcMCAsmInfo.h"
 #include "SparcTargetMachine.h"
 #include "Sparc.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-// Register the target.
-static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
+extern "C" void LLVMInitializeSparcTarget() {
+  // Register the target.
+  RegisterTargetMachine<SparcTargetMachine> X(TheSparcTarget);
+  RegisterAsmInfo<SparcELFMCAsmInfo> Y(TheSparcTarget);
 
-// No assembler printer by default
-SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
-
-
-// Force static initialization.
-extern "C" void LLVMInitializeSparcTarget() { }
-
-const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
-  // FIXME: Handle Solaris subtarget someday :)
-  return new SparcELFTargetAsmInfo(*this);
 }
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
 ///
-SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
-  : DataLayout("E-p:32:32-f128:128:128"),
-    Subtarget(M, FS), TLInfo(*this), InstrInfo(Subtarget),
+SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    DataLayout("E-p:32:32-f128:128:128"),
+    Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
 }
 
-unsigned SparcTargetMachine::getModuleMatchQuality(const Module &M) {
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "sparc-")
-    return 20;
-  
-  // If the target triple is something non-sparc, we don't match.
-  if (!TT.empty()) return 0;
-
-  if (M.getEndianness()  == Module::BigEndian &&
-      M.getPointerSize() == Module::Pointer32)
-#ifdef __sparc__
-    return 20;   // BE/32 ==> Prefer sparc on sparc
-#else
-    return 5;    // BE/32 ==> Prefer ppc elsewhere
-#endif
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
-
-#if defined(__sparc__)
-  return 10;
-#else
-  return 0;
-#endif
-}
-
 bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
                                          CodeGenOpt::Level OptLevel) {
   PM.add(createSparcISelDag(*this));
@@ -82,14 +49,3 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
   PM.add(createSparcDelaySlotFillerPass(*this));
   return true;
 }
-
-bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool Verbose,
-                                            raw_ostream &Out) {
-  // Output assembly language.
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, Verbose));
-  return false;
-}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index ee55d3ce774d..cce55105e76e 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -23,27 +23,15 @@
 
 namespace llvm {
 
-class Module;
-
 class SparcTargetMachine : public LLVMTargetMachine {
   const TargetData DataLayout;       // Calculates type size & alignment
   SparcSubtarget Subtarget;
   SparcTargetLowering TLInfo;
   SparcInstrInfo InstrInfo;
   TargetFrameInfo FrameInfo;
-  
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
-  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
-  // set this functions to ctor pointer at startup time if they are linked in.
-  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            TargetMachine &tm,
-                                            bool verbose);
-  static AsmPrinterCtorFn AsmPrinterCtor;
-  
 public:
-  SparcTargetMachine(const Module &M, const std::string &FS);
+  SparcTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
@@ -55,18 +43,10 @@ public:
     return const_cast<SparcTargetLowering*>(&TLInfo);
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-  static unsigned getModuleMatchQuality(const Module &M);
 
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel,
-                                  bool Verbose, raw_ostream &Out);
-  
-  static void registerAsmPrinter(AsmPrinterCtorFn F) {
-    AsmPrinterCtor = F;
-  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..870b56a6ea1b
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSparcInfo
+  SparcTargetInfo.cpp
+  )
+
+add_dependencies(LLVMSparcInfo SparcCodeGenTable_gen)
diff --git a/lib/Target/Sparc/TargetInfo/Makefile b/lib/Target/Sparc/TargetInfo/Makefile
new file mode 100644
index 000000000000..641ed87160c7
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Sparc/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
new file mode 100644
index 000000000000..5d697bd23a61
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSparcTarget;
+
+extern "C" void LLVMInitializeSparcTargetInfo() { 
+  RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+}
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index f9370256c602..664a43cbcca7 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -12,10 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Streams.h"
 #include <algorithm>
-#include <ostream>
 #include <cassert>
 #include <cctype>
 using namespace llvm;
@@ -145,22 +144,22 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
   unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
 
   // Print the CPU table.
-  cerr << "Available CPUs for this target:\n\n";
+  errs() << "Available CPUs for this target:\n\n";
   for (size_t i = 0; i != CPUTableSize; i++)
-    cerr << "  " << CPUTable[i].Key
+    errs() << "  " << CPUTable[i].Key
          << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
          << " - " << CPUTable[i].Desc << ".\n";
-  cerr << "\n";
+  errs() << "\n";
   
   // Print the Feature table.
-  cerr << "Available features for this target:\n\n";
+  errs() << "Available features for this target:\n\n";
   for (size_t i = 0; i != FeatTableSize; i++)
-    cerr << "  " << FeatTable[i].Key
+    errs() << "  " << FeatTable[i].Key
          << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
          << " - " << FeatTable[i].Desc << ".\n";
-  cerr << "\n";
+  errs() << "\n";
   
-  cerr << "Use +feature to enable a feature, or -feature to disable it.\n"
+  errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
        << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
   exit(1);
 }
@@ -283,10 +282,9 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
         SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
     }
   } else {
-    cerr << "'" << Features[0]
-         << "' is not a recognized processor for this target"
-         << " (ignoring processor)"
-         << "\n";
+    errs() << "'" << Features[0]
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
   }
   // Iterate through each feature
   for (size_t i = 1; i < Features.size(); i++) {
@@ -314,10 +312,9 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
         ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
       }
     } else {
-      cerr << "'" << Feature
-           << "' is not a recognized feature for this target"
-           << " (ignoring feature)"
-           << "\n";
+      errs() << "'" << Feature
+             << "' is not a recognized feature for this target"
+             << " (ignoring feature)\n";
     }
   }
 
@@ -340,25 +337,23 @@ void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
   if (Entry) {
     return Entry->Value;
   } else {
-    cerr << "'" << Features[0]
-         << "' is not a recognized processor for this target"
-         << " (ignoring processor)"
-         << "\n";
+    errs() << "'" << Features[0]
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
     return NULL;
   }
 }
 
 /// print - Print feature string.
 ///
-void SubtargetFeatures::print(std::ostream &OS) const {
-  for (size_t i = 0; i < Features.size(); i++) {
+void SubtargetFeatures::print(raw_ostream &OS) const {
+  for (size_t i = 0, e = Features.size(); i != e; ++i)
     OS << Features[i] << "  ";
-  }
   OS << "\n";
 }
 
 /// dump - Dump feature info.
 ///
 void SubtargetFeatures::dump() const {
-  print(*cerr.stream());
+  print(errs());
 }
diff --git a/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt b/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..c6be83a61080
--- /dev/null
+++ b/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZAsmPrinter
+  SystemZAsmPrinter.cpp
+  )
+add_dependencies(LLVMSystemZAsmPrinter SystemZCodeGenTable_gen)
diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile
new file mode 100644
index 000000000000..9a350dfe62e7
--- /dev/null
+++ b/lib/Target/SystemZ/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZAsmPrinter
+
+# Hack: we need to include 'main' SystemZ target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
new file mode 100644
index 000000000000..a128992934be
--- /dev/null
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -0,0 +1,391 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SystemZ assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+  class VISIBILITY_HIDDEN SystemZAsmPrinter : public AsmPrinter {
+  public:
+    SystemZAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                      const MCAsmInfo *MAI, bool V)
+      : AsmPrinter(O, TM, MAI, V) {}
+
+    virtual const char *getPassName() const {
+      return "SystemZ Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum,
+                      const char* Modifier = 0);
+    void printPCRelImmOperand(const MachineInstr *MI, int OpNum);
+    void printRIAddrOperand(const MachineInstr *MI, int OpNum,
+                            const char* Modifier = 0);
+    void printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+                             const char* Modifier = 0);
+    void printS16ImmOperand(const MachineInstr *MI, int OpNum) {
+      O << (int16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printS32ImmOperand(const MachineInstr *MI, int OpNum) {
+      O << (int32_t)MI->getOperand(OpNum).getImm();
+    }
+
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printMachineInstruction(const MachineInstr * MI);
+
+    void emitFunctionHeader(const MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &F);
+    void PrintGlobalVariable(const GlobalVariable* GVar);
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AsmPrinter::getAnalysisUsage(AU);
+      AU.setPreservesAll();
+    }
+  };
+} // end of anonymous namespace
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+  unsigned FnAlign = MF.getAlignment();
+  const Function *F = MF.getFunction();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+
+  EmitAlignment(FnAlign, F);
+
+  switch (F->getLinkage()) {
+  default: assert(0 && "Unknown linkage type!");
+  case Function::InternalLinkage:  // Symbols default to internal.
+  case Function::PrivateLinkage:
+  case Function::LinkerPrivateLinkage:
+    break;
+  case Function::ExternalLinkage:
+    O << "\t.globl\t" << CurrentFnName << '\n';
+    break;
+  case Function::LinkOnceAnyLinkage:
+  case Function::LinkOnceODRLinkage:
+  case Function::WeakAnyLinkage:
+  case Function::WeakODRLinkage:
+    O << "\t.weak\t" << CurrentFnName << '\n';
+    break;
+  }
+
+  printVisibility(CurrentFnName, F->getVisibility());
+
+  O << "\t.type\t" << CurrentFnName << ",@function\n"
+    << CurrentFnName << ":\n";
+}
+
+bool SystemZAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+  O << "\n\n";
+
+  // Print out constants referenced by the function
+  EmitConstantPool(MF.getConstantPool());
+
+  // Print the 'header' of function
+  emitFunctionHeader(MF);
+
+  // Print out code for the function.
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II)
+      // Print the assembly for the instruction.
+      printMachineInstruction(II);
+  }
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+  // We didn't modify anything
+  return false;
+}
+
+void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  ++EmittedInsts;
+
+  processDebugLoc(MI, true);
+
+  // Call the autogenerated instruction printer routines.
+  printInstruction(MI);
+  
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
+
+  processDebugLoc(MI, false);
+}
+
+void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getMangledName(GV);
+
+    O << Name;
+
+    // Assemble calls via PLT for externally visible symbols if PIC.
+    if (TM.getRelocationModel() == Reloc::PIC_ &&
+        !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+        !GV->hasLocalLinkage())
+      O << "@PLT";
+
+    printOffset(MO.getOffset());
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    O << Name;
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      O << "@PLT";
+
+    return;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+}
+
+
+void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                     const char* Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+            "Virtual registers should be already mapped!");
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
+      if (strncmp(Modifier + 7, "even", 4) == 0)
+        Reg = TRI->getSubReg(Reg, SystemZ::SUBREG_EVEN);
+      else if (strncmp(Modifier + 7, "odd", 3) == 0)
+        Reg = TRI->getSubReg(Reg, SystemZ::SUBREG_ODD);
+      else
+        assert(0 && "Invalid subreg modifier");
+    }
+
+    O << '%' << getRegisterName(Reg);
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    printOffset(MO.getOffset());
+    break;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getMangledName(GV);
+
+    O << Name;
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    O << Name;
+    break;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+
+  switch (MO.getTargetFlags()) {
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case SystemZII::MO_NO_FLAG:
+    break;
+  case SystemZII::MO_GOTENT:    O << "@GOTENT";    break;
+  case SystemZII::MO_PLT:       O << "@PLT";       break;
+  }
+
+  printOffset(MO.getOffset());
+}
+
+void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                           const char* Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum);
+    O << ')';
+  }
+}
+
+void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                            const char* Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Index = MI->getOperand(OpNum+2);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum);
+    if (Index.getReg()) {
+      O << ',';
+      printOperand(MI, OpNum+2);
+    }
+    O << ')';
+  } else
+    assert(!Index.getReg() && "Should allocate base register first!");
+}
+
+void SystemZAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
+  const TargetData *TD = TM.getTargetData();
+
+  if (!GVar->hasInitializer())
+    return;   // External global require no code
+
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(GVar))
+    return;
+
+  std::string name = Mang->getMangledName(GVar);
+  Constant *C = GVar->getInitializer();
+  unsigned Size = TD->getTypeAllocSize(C->getType());
+  unsigned Align = std::max(1U, TD->getPreferredAlignmentLog(GVar));
+
+  printVisibility(name, GVar->getVisibility());
+
+  O << "\t.type\t" << name << ",@object\n";
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
+
+  if (C->isNullValue() && !GVar->hasSection() &&
+      !GVar->isThreadLocal() &&
+      (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+
+    if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+
+    if (GVar->hasLocalLinkage())
+      O << "\t.local\t" << name << '\n';
+
+    O << MAI->getCOMMDirective()  << name << ',' << Size;
+    if (MAI->getCOMMDirectiveTakesAlignment())
+      O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+
+    if (VerboseAsm) {
+      O << "\t\t" << MAI->getCommentString() << ' ';
+      WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+    }
+    O << '\n';
+    return;
+  }
+
+  switch (GVar->getLinkage()) {
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+    O << "\t.weak\t" << name << '\n';
+    break;
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
+    // If external or appending, declare as a global symbol
+    O << "\t.globl " << name << '\n';
+    // FALL THROUGH
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+  case GlobalValue::InternalLinkage:
+     break;
+  default:
+    assert(0 && "Unknown linkage type!");
+  }
+
+  // Use 16-bit alignment by default to simplify bunch of stuff
+  EmitAlignment(Align, GVar, 1);
+  O << name << ":";
+  if (VerboseAsm) {
+    O << "\t\t\t\t" << MAI->getCommentString() << ' ';
+    WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+  }
+  O << '\n';
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << name << ", " << Size << '\n';
+
+  EmitGlobalConstant(C);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
new file mode 100644
index 000000000000..81e51d89ad9f
--- /dev/null
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS SystemZ.td)
+
+tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SystemZGenRegisterNames.inc -gen-register-enums)
+tablegen(SystemZGenRegisterInfo.inc -gen-register-desc)
+tablegen(SystemZGenInstrNames.inc -gen-instr-enums)
+tablegen(SystemZGenInstrInfo.inc -gen-instr-desc)
+tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
+tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
+tablegen(SystemZGenCallingConv.inc -gen-callingconv)
+tablegen(SystemZGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(SystemZCodeGen
+  SystemZISelDAGToDAG.cpp
+  SystemZISelLowering.cpp
+  SystemZInstrInfo.cpp
+  SystemZMCAsmInfo.cpp
+  SystemZRegisterInfo.cpp
+  SystemZSubtarget.cpp
+  SystemZTargetMachine.cpp
+  )
+
+target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
new file mode 100644
index 000000000000..f1097ebcf3b7
--- /dev/null
+++ b/lib/Target/SystemZ/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSystemZCodeGen
+TARGET = SystemZ
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \
+                SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \
+                SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \
+                SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc
+
+DIRS = AsmPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 000000000000..ea5240a10c9a
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,61 @@
+//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_H
+#define LLVM_TARGET_SystemZ_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SystemZTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  namespace SystemZCC {
+    // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
+    // SystemZInstrInfo.td. They must be kept in synch.
+    enum CondCodes {
+      O   = 0,
+      H   = 1,
+      NLE = 2,
+      L   = 3,
+      NHE = 4,
+      LH  = 5,
+      NE  = 6,
+      E   = 7,
+      NLH = 8,
+      HE  = 9,
+      NL  = 10,
+      LE  = 11,
+      NH  = 12,
+      NO  = 13,
+      INVALID = -1
+    };
+  }
+
+  FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+                                    CodeGenOpt::Level OptLevel);
+
+  extern Target TheSystemZTarget;
+
+} // end namespace llvm;
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#include "SystemZGenRegisterNames.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#include "SystemZGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 000000000000..4c08c087225e
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,61 @@
+//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the SystemZ target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features. 
+//===----------------------------------------------------------------------===//
+def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
+                                  "Support Z10 instructions">;
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z9",  []>;
+def : Proc<"z10", [FeatureZ10]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {} 
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+  let InstructionSet = SystemZInstrInfo;
+}
+
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 000000000000..c799a9e501aa
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,46 @@
+//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for SystemZ architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // i64 is returned in register R2
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+  // f32 / f64 are returned in F0
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // The first 5 integer arguments of non-varargs functions are passed in
+  // integer registers.
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+  // The first 4 floating point arguments of non-varargs functions are passed
+  // in FP registers.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
+
+  // Integer values get stored in stack slots that are 8 bytes in
+  // size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 000000000000..028ee8986a37
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,829 @@
+//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZISelLowering.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static const unsigned subreg_even32 = 1;
+static const unsigned subreg_odd32  = 2;
+static const unsigned subreg_even   = 3;
+static const unsigned subreg_odd    = 4;
+
+namespace {
+  /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
+  /// instead of register numbers for the leaves of the matched tree.
+  struct SystemZRRIAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    struct {            // This is really a union, discriminated by BaseType!
+      SDValue Reg;
+      int FrameIndex;
+    } Base;
+
+    SDValue IndexReg;
+    int64_t Disp;
+    bool isRI;
+
+    SystemZRRIAddressMode(bool RI = false)
+      : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
+    }
+
+    void dump() {
+      errs() << "SystemZRRIAddressMode " << this << '\n';
+      if (BaseType == RegBase) {
+        errs() << "Base.Reg ";
+        if (Base.Reg.getNode() != 0)
+          Base.Reg.getNode()->dump();
+        else
+          errs() << "nul";
+        errs() << '\n';
+      } else {
+        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      if (!isRI) {
+        errs() << "IndexReg ";
+        if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+        else errs() << "nul";
+      }
+      errs() << " Disp " << Disp << '\n';
+    }
+  };
+}
+
+/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class SystemZDAGToDAGISel : public SelectionDAGISel {
+    SystemZTargetLowering &Lowering;
+    const SystemZSubtarget &Subtarget;
+
+    void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp);
+    void getAddressOperands(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp,
+                            SDValue &Index);
+
+  public:
+    SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
+        Lowering(*TM.getTargetLowering()),
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    virtual void InstructionSelect();
+
+    virtual const char *getPassName() const {
+      return "SystemZ DAG->DAG Pattern Instruction Selection";
+    }
+
+    /// getI8Imm - Return a target constant with the specified value, of type
+    /// i8.
+    inline SDValue getI8Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    }
+
+    /// getI16Imm - Return a target constant with the specified value, of type
+    /// i16.
+    inline SDValue getI16Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i16);
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    // Include the pieces autogenerated from the target description.
+    #include "SystemZGenDAGISel.inc"
+
+  private:
+    bool SelectAddrRI12Only(SDValue Op, SDValue& Addr,
+                            SDValue &Base, SDValue &Disp);
+    bool SelectAddrRI12(SDValue Op, SDValue& Addr,
+                        SDValue &Base, SDValue &Disp,
+                        bool is12BitOnly = false);
+    bool SelectAddrRI(SDValue Op, SDValue& Addr,
+                      SDValue &Base, SDValue &Disp);
+    bool SelectAddrRRI12(SDValue Op, SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectAddrRRI20(SDValue Op, SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectLAAddr(SDValue Op, SDValue Addr,
+                      SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    SDNode *Select(SDValue Op);
+
+    bool TryFoldLoad(SDValue P, SDValue N,
+                     SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                      bool is12Bit, unsigned Depth = 0);
+    bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
+    bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
+                        bool is12Bit);
+
+  #ifndef NDEBUG
+    unsigned Indent;
+  #endif
+  };
+}  // end anonymous namespace
+
+/// createSystemZISelDag - This pass converts a legalized DAG into a
+/// SystemZ-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+/// isImmSExt20 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 20-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmSExt20(int64_t Val, int64_t &Imm) {
+  if (Val >= -524288 && Val <= 524287) {
+    Imm = Val;
+    return true;
+  }
+  return false;
+}
+
+/// isImmZExt12 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero extension from a 12-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmZExt12(int64_t Val, int64_t &Imm) {
+  if (Val >= 0 && Val <= 0xFFF) {
+    Imm = Val;
+    return true;
+  }
+  return false;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done.  This just pattern matches for the
+/// addressing mode.
+bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                                       bool is12Bit, unsigned Depth) {
+  DebugLoc dl = N.getDebugLoc();
+  DEBUG(errs() << "MatchAddress: "; AM.dump());
+  // Limit recursion.
+  if (Depth > 5)
+    return MatchAddressBase(N, AM);
+
+  // FIXME: We can perform better here. If we have something like
+  // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
+  // imm into addressing mode.
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    int64_t Imm = 0;
+    bool Match = (is12Bit ?
+                  isImmZExt12(AM.Disp + Val, Imm) :
+                  isImmSExt20(AM.Disp + Val, Imm));
+    if (Match) {
+      AM.Disp = Imm;
+      return false;
+    }
+    break;
+  }
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        AM.Base.Reg.getNode() == 0) {
+      AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::SUB: {
+    // Given A-B, if A can be completely folded into the address and
+    // the index field with the index field unused, use -B as the index.
+    // This is a win if a has multiple parts that can be folded into
+    // the address. Also, this saves a mov if the base register has
+    // other uses, since it avoids a two-address sub instruction, however
+    // it costs an additional mov if the index register has other uses.
+
+    // Test if the LHS of the sub can be folded.
+    SystemZRRIAddressMode Backup = AM;
+    if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
+      AM = Backup;
+      break;
+    }
+    // Test if the index field is free for use.
+    if (AM.IndexReg.getNode() || AM.isRI) {
+      AM = Backup;
+      break;
+    }
+
+    // If the base is a register with multiple uses, this transformation may
+    // save a mov. Otherwise it's probably better not to do it.
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
+      AM = Backup;
+      break;
+    }
+
+    // Ok, the transformation is legal and appears profitable. Go for it.
+    SDValue RHS = N.getNode()->getOperand(1);
+    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+    AM.IndexReg = Neg;
+
+    // Insert the new nodes into the topological ordering.
+    if (Zero.getNode()->getNodeId() == -1 ||
+        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    if (Neg.getNode()->getNodeId() == -1 ||
+        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    return false;
+  }
+
+  case ISD::ADD: {
+    SystemZRRIAddressMode Backup = AM;
+    if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
+      return false;
+    AM = Backup;
+    if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
+      return false;
+    AM = Backup;
+
+    // If we couldn't fold both operands into the address at the same time,
+    // see if we can just put each operand into a register and fold at least
+    // the add.
+    if (!AM.isRI &&
+        AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
+      AM.Base.Reg = N.getNode()->getOperand(0);
+      AM.IndexReg = N.getNode()->getOperand(1);
+      return false;
+    }
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      SystemZRRIAddressMode Backup = AM;
+      int64_t Offset = CN->getSExtValue();
+      int64_t Imm = 0;
+      bool MatchOffset = (is12Bit ?
+                          isImmZExt12(AM.Disp + Offset, Imm) :
+                          isImmSExt20(AM.Disp + Offset, Imm));
+      // The resultant disp must fit in 12 or 20-bits.
+      if (MatchOffset &&
+          // LHS should be an addr mode.
+          !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
+          // Check to see if the LHS & C is zero.
+          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+        AM.Disp = Imm;
+        return false;
+      }
+      AM = Backup;
+    }
+    break;
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
+                                           SystemZRRIAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
+    // If so, check to see if the index register is set.
+    if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
+      AM.IndexReg = N;
+      return false;
+    }
+
+    // Otherwise, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = SystemZRRIAddressMode::RegBase;
+  AM.Base.Reg = N;
+  return false;
+}
+
+void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                                               SDValue &Base, SDValue &Disp) {
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    Base = AM.Base.Reg;
+  else
+    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
+  Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
+                                             SDValue &Base, SDValue &Disp,
+                                             SDValue &Index) {
+  getAddressOperandsRI(AM, Base, Disp);
+  Index = AM.IndexReg;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// an unsigned 12-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue Op, SDValue& Addr,
+                                             SDValue &Base, SDValue &Disp) {
+  return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true);
+}
+
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr,
+                                         SDValue &Base, SDValue &Disp,
+                                         bool is12BitOnly) {
+  SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !is12BitOnly &&
+      !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM12, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// a signed 20-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr,
+                                       SDValue &Base, SDValue &Disp) {
+  SystemZRRIAddressMode AM(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus an unsigned 12-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM20, AM12;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM12.IndexReg.getNode())
+    AM12.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM12, Base, Disp, Index);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus a signed 20-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM.IndexReg.getNode())
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM, Base, Disp, Index);
+
+  return true;
+}
+
+/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr,
+                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+
+  if (MatchAddress(Addr, AM, false))
+    return false;
+
+  EVT VT = Addr.getValueType();
+  unsigned Complexity = 0;
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    if (AM.Base.Reg.getNode())
+      Complexity = 1;
+    else
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
+    Complexity = 4;
+
+  if (AM.IndexReg.getNode())
+    Complexity += 1;
+  else
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+    Complexity += 1;
+
+  if (Complexity > 2) {
+    getAddressOperands(AM, Base, Disp, Index);
+    return true;
+  }
+
+  return false;
+}
+
+bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
+  if (ISD::isNON_EXTLoad(N.getNode()) &&
+      N.hasOneUse() &&
+      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+    return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
+  return false;
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void SystemZDAGToDAGISel::InstructionSelect() {
+  DEBUG(BB->dump());
+
+  // Codegen the basic block.
+  DEBUG(errs() << "===== Instruction selection begins:\n");
+  DEBUG(Indent = 0);
+  SelectRoot(*CurDAG);
+  DEBUG(errs() << "===== Instruction selection ends:\n");
+
+  CurDAG->RemoveDeadNodes();
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
+  SDNode *Node = Op.getNode();
+  EVT NVT = Node->getValueType(0);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opcode = Node->getOpcode();
+
+  // Dump information about the Node being selected
+  DEBUG(errs().indent(Indent) << "Selecting: ";
+        Node->dump(CurDAG);
+        errs() << "\n");
+  DEBUG(Indent += 2);
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs().indent(Indent-2) << "== ";
+          Node->dump(CurDAG);
+          errs() << "\n");
+    DEBUG(Indent -= 2);
+    return NULL; // Already selected.
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SDIVREM: {
+    unsigned Opc, MOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    EVT ResVT;
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT().SimpleTy) {
+      default: assert(0 && "Unsupported VT!");
+      case MVT::i32:
+        Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+        ResVT = MVT::v2i64;
+        is32Bit = true;
+        break;
+      case MVT::i64:
+        Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+        ResVT = MVT::v2i64;
+        break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend;
+    if (is32Bit)
+      Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
+    else
+      Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+                                         dl, ResVT);
+    Dividend =
+      CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
+                             SDValue(Tmp, 0), SDValue(Dividend, 0),
+                             CurDAG->getTargetConstant(subreg_odd, MVT::i32));
+
+    SDNode *Result;
+    SDValue DivVal = SDValue(Dividend, 0);
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT,
+                                      Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 0));
+    } else {
+      Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!Op.getValue(0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+
+      ReplaceUses(Op.getValue(0), SDValue(Div, 0));
+      DEBUG(errs().indent(Indent-2) << "=> ";
+            Result->dump(CurDAG);
+            errs() << "\n");
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!Op.getValue(1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+      SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+
+      ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
+      DEBUG(errs().indent(Indent-2) << "=> ";
+            Result->dump(CurDAG);
+            errs() << "\n");
+    }
+
+#ifndef NDEBUG
+    Indent -= 2;
+#endif
+
+    return NULL;
+  }
+  case ISD::UDIVREM: {
+    unsigned Opc, MOpc, ClrOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    EVT ResVT;
+
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT().SimpleTy) {
+      default: assert(0 && "Unsupported VT!");
+      case MVT::i32:
+        Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+        ClrOpc = SystemZ::MOV64Pr0_even;
+        ResVT = MVT::v2i32;
+        is32Bit = true;
+        break;
+      case MVT::i64:
+        Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+        ClrOpc = SystemZ::MOV128r0_even;
+        ResVT = MVT::v2i64;
+        break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+                                         dl, ResVT);
+    {
+      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      Dividend =
+        CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
+                               SDValue(Tmp, 0), SDValue(Dividend, 0),
+                               CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
+    }
+
+    // Zero out even subreg
+    Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
+
+    SDValue DivVal = SDValue(Dividend, 0);
+    SDNode *Result;
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getMachineNode(MOpc, dl,ResVT,
+                                      Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 0));
+    } else {
+      Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!Op.getValue(0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+      ReplaceUses(Op.getValue(0), SDValue(Div, 0));
+      DEBUG(errs().indent(Indent-2) << "=> ";
+            Result->dump(CurDAG);
+            errs() << "\n");
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!Op.getValue(1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+      SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+      ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
+      DEBUG(errs().indent(Indent-2) << "=> ";
+            Result->dump(CurDAG);
+            errs() << "\n");
+    }
+
+#ifndef NDEBUG
+    Indent -= 2;
+#endif
+
+    return NULL;
+  }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Op);
+
+  DEBUG(errs().indent(Indent-2) << "=> ";
+        if (ResNode == NULL || ResNode == Op.getNode())
+          Op.getNode()->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        errs() << "\n";
+        );
+  DEBUG(Indent -= 2);
+
+  return ResNode;
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 000000000000..07e0d8305806
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,843 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation  -----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
+  TargetLowering(tm, new TargetLoweringObjectFileELF()),
+  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+  RegInfo = TM.getRegisterInfo();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32,  SystemZ::GR32RegisterClass);
+  addRegisterClass(MVT::i64,  SystemZ::GR64RegisterClass);
+  addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
+  addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
+
+  if (!UseSoftFloat) {
+    addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
+    addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
+
+    addLegalFPImmediate(APFloat(+0.0));  // lzer
+    addLegalFPImmediate(APFloat(+0.0f)); // lzdr
+    addLegalFPImmediate(APFloat(-0.0));  // lzer + lner
+    addLegalFPImmediate(APFloat(-0.0f)); // lzdr + lndr
+  }
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Set shifts properties
+  setShiftAmountType(MVT::i64);
+
+  // Provide all sorts of operation actions
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
+
+  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+  setSchedulingPreference(SchedulingForLatency);
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,            MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::i64, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::f64, Custom);
+  setOperationAction(ISD::ConstantPool,     MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool,     MVT::i64, Custom);
+  setOperationAction(ISD::GlobalAddress,    MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable,        MVT::i64, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+  setOperationAction(ISD::SDIV,             MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,             MVT::i32, Expand);
+  setOperationAction(ISD::SDIV,             MVT::i64, Expand);
+  setOperationAction(ISD::UDIV,             MVT::i64, Expand);
+  setOperationAction(ISD::SREM,             MVT::i32, Expand);
+  setOperationAction(ISD::UREM,             MVT::i32, Expand);
+  setOperationAction(ISD::SREM,             MVT::i64, Expand);
+  setOperationAction(ISD::UREM,             MVT::i64, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::CTPOP,            MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i64, Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i32, Promote);
+  setOperationAction(ISD::CTLZ,             MVT::i64, Legal);
+
+  // FIXME: Can we lower these 2 efficiently?
+  setOperationAction(ISD::SETCC,            MVT::i32, Expand);
+  setOperationAction(ISD::SETCC,            MVT::i64, Expand);
+  setOperationAction(ISD::SETCC,            MVT::f32, Expand);
+  setOperationAction(ISD::SETCC,            MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,           MVT::i32, Expand);
+  setOperationAction(ISD::SELECT,           MVT::i64, Expand);
+  setOperationAction(ISD::SELECT,           MVT::f32, Expand);
+  setOperationAction(ISD::SELECT,           MVT::f64, Expand);
+  setOperationAction(ISD::SELECT_CC,        MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::i64, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::f64, Custom);
+
+  setOperationAction(ISD::MULHS,            MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i64, Expand);
+
+  // FIXME: Can we support these natively?
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i64, Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS,        MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i64, Expand);
+
+  // Lower some FP stuff
+  setOperationAction(ISD::FSIN,             MVT::f32, Expand);
+  setOperationAction(ISD::FSIN,             MVT::f64, Expand);
+  setOperationAction(ISD::FCOS,             MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,             MVT::f64, Expand);
+  setOperationAction(ISD::FREM,             MVT::f32, Expand);
+  setOperationAction(ISD::FREM,             MVT::f64, Expand);
+
+  // We have only 64-bit bitconverts
+  setOperationAction(ISD::BIT_CONVERT,      MVT::f32, Expand);
+  setOperationAction(ISD::BIT_CONVERT,      MVT::i32, Expand);
+
+  setOperationAction(ISD::UINT_TO_FP,       MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT,       MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getOpcode()) {
+  case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
+  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
+  default:
+    llvm_unreachable("Should not custom lower this!");
+    return SDValue();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                       SystemZ Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return C_RegisterClass;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+      if (VT == MVT::i32)
+        return std::make_pair(0U, SystemZ::GR32RegisterClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, SystemZ::GR128RegisterClass);
+
+      return std::make_pair(0U, SystemZ::GR64RegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+SDValue
+SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
+                                            CallingConv::ID CallConv,
+                                            bool isVarArg,
+                                            const SmallVectorImpl<ISD::InputArg>
+                                              &Ins,
+                                            DebugLoc dl,
+                                            SelectionDAG &DAG,
+                                            SmallVectorImpl<SDValue> &InVals) {
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+  }
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 bool isTailCall,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) {
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                          Outs, Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
+                                         CallingConv::ID CallConv,
+                                         bool isVarArg,
+                                         const SmallVectorImpl<ISD::InputArg>
+                                           &Ins,
+                                         DebugLoc dl,
+                                         SelectionDAG &DAG,
+                                         SmallVectorImpl<SDValue> &InVals) {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+  if (isVarArg)
+    llvm_report_error("Varargs not supported yet");
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    SDValue ArgValue;
+    CCValAssign &VA = ArgLocs[i];
+    EVT LocVT = VA.getLocVT();
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      TargetRegisterClass *RC;
+      switch (LocVT.getSimpleVT().SimpleTy) {
+      default:
+#ifndef NDEBUG
+        errs() << "LowerFormalArguments Unhandled argument type: "
+             << LocVT.getSimpleVT().SimpleTy
+             << "\n";
+#endif
+        llvm_unreachable(0);
+      case MVT::i64:
+        RC = SystemZ::GR64RegisterClass;
+        break;
+      case MVT::f32:
+        RC = SystemZ::FP32RegisterClass;
+        break;
+      case MVT::f64:
+        RC = SystemZ::FP64RegisterClass;
+        break;
+      }
+
+      unsigned VReg = RegInfo.createVirtualRegister(RC);
+      RegInfo.addLiveIn(VA.getLocReg(), VReg);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+    } else {
+      // Sanity check
+      assert(VA.isMemLoc());
+
+      // Create the nodes corresponding to a load from this parameter slot.
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
+                                      VA.getLocMemOffset());
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
+                             PseudoSourceValue::getFixedStack(FI), 0);
+    }
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+    InVals.push_back(ArgValue);
+  }
+
+  return Chain;
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      bool isTailCall,
+                                      const SmallVectorImpl<ISD::OutputArg>
+                                        &Outs,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Offset to first argument stack slot.
+  const unsigned FirstArgOffset = 160;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = Outs[i].Val;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr =
+          DAG.getCopyFromReg(Chain, dl,
+                             (RegInfo->hasFP(MF) ?
+                              SystemZ::R11D : SystemZ::R15D),
+                             getPointerTy());
+
+      unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(Offset));
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         PseudoSourceValue::getStack(), Offset));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emited instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+                         DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                       CallingConv::ID CallConv, bool isVarArg,
+                                       const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                       DebugLoc dl, SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals) {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                               VA.getLocVT(), InFlag).getValue(1);
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   DebugLoc dl, SelectionDAG &DAG) {
+
+  // CCValAssign - represent the assignment of the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    SDValue ResValue = Outs[i].Val;
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // If this is an 8/16/32-bit value, it is really should be passed promoted
+    // to 64 bits.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
+    else if (VA.getLocInfo() == CCValAssign::AExt)
+      ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+  // Return Void
+  return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
+                                       ISD::CondCode CC, SDValue &SystemZCC,
+                                       SelectionDAG &DAG) {
+  // FIXME: Emit a test if RHS is zero
+
+  bool isUnsigned = false;
+  SystemZCC::CondCodes TCC;
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    TCC = SystemZCC::E;
+    break;
+  case ISD::SETUEQ:
+    TCC = SystemZCC::NLH;
+    break;
+  case ISD::SETNE:
+  case ISD::SETONE:
+    TCC = SystemZCC::NE;
+    break;
+  case ISD::SETUNE:
+    TCC = SystemZCC::LH;
+    break;
+  case ISD::SETO:
+    TCC = SystemZCC::O;
+    break;
+  case ISD::SETUO:
+    TCC = SystemZCC::NO;
+    break;
+  case ISD::SETULE:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NH;
+      break;
+    }
+    isUnsigned = true;   // FALLTHROUGH
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    TCC = SystemZCC::LE;
+    break;
+  case ISD::SETUGE:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NL;
+      break;
+    }
+    isUnsigned = true;   // FALLTHROUGH
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    TCC = SystemZCC::HE;
+    break;
+  case ISD::SETUGT:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NLE;
+      break;
+    }
+    isUnsigned = true;  // FALLTHROUGH
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    TCC = SystemZCC::H;
+    break;
+  case ISD::SETULT:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NHE;
+      break;
+    }
+    isUnsigned = true;  // FALLTHROUGH
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    TCC = SystemZCC::L;
+    break;
+  }
+
+  SystemZCC = DAG.getConstant(TCC, MVT::i32);
+
+  DebugLoc dl = LHS.getDebugLoc();
+  return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+                     dl, MVT::Flag, LHS, RHS);
+}
+
+
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS   = Op.getOperand(2);
+  SDValue RHS   = Op.getOperand(3);
+  SDValue Dest  = Op.getOperand(4);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue SystemZCC;
+  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+  return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
+                     Chain, Dest, SystemZCC, Flag);
+}
+
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS    = Op.getOperand(0);
+  SDValue RHS    = Op.getOperand(1);
+  SDValue TrueV  = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue SystemZCC;
+  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueV);
+  Ops.push_back(FalseV);
+  Ops.push_back(SystemZCC);
+  Ops.push_back(Flag);
+
+  return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                  SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+  bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  bool ExtraLoadRequired =
+    Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+  SDValue Result;
+  if (!IsPic && !ExtraLoadRequired) {
+    Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+    Offset = 0;
+  } else {
+    unsigned char OpFlags = 0;
+    if (ExtraLoadRequired)
+      OpFlags = SystemZII::MO_GOTENT;
+
+    Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+  }
+
+  Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
+                       getPointerTy(), Result);
+
+  if (ExtraLoadRequired)
+    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+                         PseudoSourceValue::getGOT(), 0);
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+                         DAG.getConstant(Offset, getPointerTy()));
+
+  return Result;
+}
+
+// FIXME: PIC here
+SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
+                                              SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+
+  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+
+// FIXME: PIC here
+// FIXME: This is just dirty hack. We need to lower cpool properly
+SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
+                                                 SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+                                             CP->getAlignment(),
+                                             CP->getOffset());
+
+  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  case SystemZISD::RET_FLAG:           return "SystemZISD::RET_FLAG";
+  case SystemZISD::CALL:               return "SystemZISD::CALL";
+  case SystemZISD::BRCOND:             return "SystemZISD::BRCOND";
+  case SystemZISD::CMP:                return "SystemZISD::CMP";
+  case SystemZISD::UCMP:               return "SystemZISD::UCMP";
+  case SystemZISD::SELECT:             return "SystemZISD::SELECT";
+  case SystemZISD::PCRelativeWrapper:  return "SystemZISD::PCRelativeWrapper";
+  default: return NULL;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+  const SystemZInstrInfo &TII = *TM.getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  assert((MI->getOpcode() == SystemZ::Select32  ||
+          MI->getOpcode() == SystemZ::SelectF32 ||
+          MI->getOpcode() == SystemZ::Select64  ||
+          MI->getOpcode() == SystemZ::SelectF64) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
+  BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+  F->insert(I, copy0MBB);
+  F->insert(I, copy1MBB);
+  // Inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 
+         SE = BB->succ_end(); SI != SE; ++SI)
+    EM->insert(std::make_pair(*SI, copy1MBB));
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  copy1MBB->transferSuccessors(BB);
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(copy1MBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to copy1MBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(copy1MBB);
+
+  //  copy1MBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = copy1MBB;
+  BuildMI(BB, dl, TII.get(SystemZ::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+  return BB;
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 000000000000..c2c24bc1f3ab
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,141 @@
+//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+  namespace SystemZISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// Return with a flag operand. Operand 0 is the chain operand.
+      RET_FLAG,
+
+      /// CALL - These operations represent an abstract call
+      /// instruction, which includes a bunch of information.
+      CALL,
+
+      /// PCRelativeWrapper - PC relative address
+      PCRelativeWrapper,
+
+      /// CMP, UCMP - Compare instruction
+      CMP,
+      UCMP,
+
+      /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
+      /// the block to branch if condition is true, operand 2 is condition code
+      /// and operand 3 is the flag operand produced by a CMP instruction.
+      BRCOND,
+
+      /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
+      /// condition code and operand 3 is the flag operand.
+      SELECT
+    };
+  }
+
+  class SystemZSubtarget;
+  class SystemZTargetMachine;
+
+  class SystemZTargetLowering : public TargetLowering {
+  public:
+    explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const {
+      return 1;
+    }
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const;
+
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+
+    SDValue EmitCmp(SDValue LHS, SDValue RHS,
+                    ISD::CondCode CC, SDValue &SystemZCC,
+                    SelectionDAG &DAG);
+
+
+    MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
+  private:
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl,
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals);
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
+    const SystemZSubtarget &Subtarget;
+    const SystemZTargetMachine &TM;
+    const SystemZRegisterInfo *RegInfo;
+  };
+} // namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 000000000000..b69d2f6ce9ff
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,128 @@
+//===- SystemZInstrBuilder.h - Functions to aid building  insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Displacement, Index.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with R15 or R11 and Disp being offsetted accordingly.
+struct SystemZAddressMode {
+  enum {
+    RegBase,
+    FrameIndexBase
+  } BaseType;
+
+  union {
+    unsigned Reg;
+    int FrameIndex;
+  } Base;
+
+  unsigned IndexReg;
+  int32_t Disp;
+  GlobalValue *GV;
+
+  SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
+    Base.Reg = 0;
+  }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no index or displacement.
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+  // Because memory references are always represented with 3
+  // values, this adds: Reg, [0, NoReg] to the instruction.
+  return MIB.addReg(Reg).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+  return MIB.addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no or index, but with a
+/// displacement. An example is: 10(%r15).
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+             unsigned Reg, bool isKill, int Offset) {
+  return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &
+addRegReg(const MachineInstrBuilder &MIB,
+            unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
+  return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
+    .addReg(Reg2, getKillRegState(isKill2));
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
+  if (AM.BaseType == SystemZAddressMode::RegBase)
+    MIB.addReg(AM.Base.Reg);
+  else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
+    MIB.addFrameIndex(AM.Base.FrameIndex);
+  else
+    assert(0);
+
+  return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Flags = 0;
+  if (TID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (TID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                            Flags, Offset,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  return addOffset(MIB.addFrameIndex(FI), Offset)
+            .addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 000000000000..8a202d4523a5
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,340 @@
+//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ (binary) floating point instructions in 
+// TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: multiclassify!
+
+//===----------------------------------------------------------------------===//
+// FP Pattern fragments
+
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+let usesCustomDAGSchedInserter = 1 in {
+  def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
+                        "# SelectF32 PSEUDO",
+                        [(set FP32:$dst,
+                              (SystemZselect FP32:$src1, FP32:$src2, imm:$cc))]>;
+  def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
+                        "# SelectF64 PSEUDO",
+                        [(set FP64:$dst,
+                              (SystemZselect FP64:$src1, FP64:$src2, imm:$cc))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// Floating point constant loads.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
+                      "lzer\t{$dst}",
+                      [(set FP32:$dst, fpimm0)]>;
+def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
+                      "lzdr\t{$dst}",
+                      [(set FP64:$dst, fpimm0)]>;
+}
+
+let neverHasSideEffects = 1 in {
+def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                      "ler\t{$dst, $src}",
+                      []>;
+def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                      "ldr\t{$dst, $src}",
+                      []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+def FMOV32rm  : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+                      "le\t{$dst, $src}",
+                      [(set FP32:$dst, (load rriaddr12:$src))]>;
+def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
+                      "ley\t{$dst, $src}",
+                      [(set FP32:$dst, (load rriaddr:$src))]>;
+def FMOV64rm  : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                      "ld\t{$dst, $src}",
+                      [(set FP64:$dst, (load rriaddr12:$src))]>;
+def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
+                      "ldy\t{$dst, $src}",
+                      [(set FP64:$dst, (load rriaddr:$src))]>;
+}
+
+def FMOV32mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
+                       "ste\t{$src, $dst}",
+                       [(store FP32:$src, rriaddr12:$dst)]>;
+def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
+                       "stey\t{$src, $dst}",
+                       [(store FP32:$src, rriaddr:$dst)]>;
+def FMOV64mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
+                       "std\t{$src, $dst}",
+                       [(store FP64:$src, rriaddr12:$dst)]>;
+def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
+                       "stdy\t{$src, $dst}",
+                       [(store FP64:$src, rriaddr:$dst)]>;
+
+def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                         "cpsdr\t{$dst, $src2, $src1}",
+                         [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
+def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                         "cpsdr\t{$dst, $src2, $src1}",
+                         [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+
+let Defs = [PSW] in {
+def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lcebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fneg FP32:$src)),
+                        (implicit PSW)]>;
+def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lcdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fneg FP64:$src)),
+                        (implicit PSW)]>;
+
+def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lpebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fabs FP32:$src)),
+                        (implicit PSW)]>;
+def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lpdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fabs FP64:$src)),
+                        (implicit PSW)]>;
+
+def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lnebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fneg(fabs FP32:$src))),
+                        (implicit PSW)]>;
+def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lndbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fneg(fabs FP64:$src))),
+                        (implicit PSW)]>;
+}
+
+let isTwoAddress = 1 in {
+let Defs = [PSW] in {
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "aebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
+                        (implicit PSW)]>;
+def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "adbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
+                        (implicit PSW)]>;
+}
+
+def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "aeb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "adb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+
+def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "sebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
+                        (implicit PSW)]>;
+def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "sdbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
+                        (implicit PSW)]>;
+
+def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "seb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "sdb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z  == X = MUL Z, Y
+def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "meebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
+def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "mdbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
+}
+
+def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "meeb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
+def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "mdb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
+
+def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+                       "maebr\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
+                                              FP32:$src1))]>;
+def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+                       "maeb\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
+                                                     FP32:$src3),
+                                              FP32:$src1))]>;
+
+def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+                       "madbr\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
+                                              FP64:$src1))]>;
+def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+                       "madb\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
+                                                     FP64:$src3),
+                                              FP64:$src1))]>;
+
+def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+                       "msebr\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
+                                              FP32:$src1))]>;
+def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+                       "mseb\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
+                                                     FP32:$src3),
+                                              FP32:$src1))]>;
+
+def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+                       "msdbr\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
+                                              FP64:$src1))]>;
+def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+                       "msdb\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
+                                                     FP64:$src3),
+                                              FP64:$src1))]>;
+
+def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "debr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
+def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "ddbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
+
+def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "deb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
+def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "ddb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
+
+} // isTwoAddress = 1
+
+def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "sqebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fsqrt FP32:$src))]>;
+def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "sqdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fsqrt FP64:$src))]>;
+
+def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+                       "sqeb\t{$dst, $src}",
+                       [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
+def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                       "sqdb\t{$dst, $src}",
+                       [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
+
+def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
+                         "ledbr\t{$dst, $src}",
+                         [(set FP32:$dst, (fround FP64:$src))]>;
+
+def FEXT32r64   : Pseudo<(outs FP64:$dst), (ins FP32:$src),
+                         "ldebr\t{$dst, $src}",
+                         [(set FP64:$dst, (fextend FP32:$src))]>;
+def FEXT32m64   : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                         "ldeb\t{$dst, $src}",
+                         [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
+
+let Defs = [PSW] in {
+def FCONVFP32   : Pseudo<(outs FP32:$dst), (ins GR32:$src),
+                         "cefbr\t{$dst, $src}",
+                         [(set FP32:$dst, (sint_to_fp GR32:$src)),
+                          (implicit PSW)]>;
+def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
+                         "cegbr\t{$dst, $src}",
+                         [(set FP32:$dst, (sint_to_fp GR64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
+                         "cdfbr\t{$dst, $src}",
+                         [(set FP64:$dst, (sint_to_fp GR32:$src)),
+                          (implicit PSW)]>;
+def FCONVFP64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+                         "cdgbr\t{$dst, $src}",
+                         [(set FP64:$dst, (sint_to_fp GR64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVGR32   : Pseudo<(outs GR32:$dst), (ins FP32:$src),
+                         "cfebr\t{$dst, 5, $src}",
+                         [(set GR32:$dst, (fp_to_sint FP32:$src)),
+                          (implicit PSW)]>;
+def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
+                         "cfdbr\t{$dst, 5, $src}",
+                         [(set GR32:$dst, (fp_to_sint FP64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
+                         "cgebr\t{$dst, 5, $src}",
+                         [(set GR64:$dst, (fp_to_sint FP32:$src)),
+                          (implicit PSW)]>;
+def FCONVGR64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+                         "cgdbr\t{$dst, 5, $src}",
+                         [(set GR64:$dst, (fp_to_sint FP64:$src)),
+                          (implicit PSW)]>;
+} // Defs = [PSW]
+
+def FBCONVG64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+                         "lgdr\t{$dst, $src}",
+                         [(set GR64:$dst, (bitconvert FP64:$src))]>;
+def FBCONVF64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+                         "ldgr\t{$dst, $src}",
+                         [(set FP64:$dst, (bitconvert GR64:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
+                      "cebr\t$src1, $src2",
+                      [(SystemZcmp FP32:$src1, FP32:$src2), (implicit PSW)]>;
+def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
+                      "cdbr\t$src1, $src2",
+                      [(SystemZcmp FP64:$src1, FP64:$src2), (implicit PSW)]>;
+
+def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
+                      "ceb\t$src1, $src2",
+                      [(SystemZcmp FP32:$src1, (load rriaddr12:$src2)),
+                       (implicit PSW)]>;
+def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
+                      "cdb\t$src1, $src2",
+                      [(SystemZcmp FP64:$src1, (load rriaddr12:$src2)),
+                       (implicit PSW)]>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Floating point constant -0.0
+def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
+def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 000000000000..b4a8993c1971
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,133 @@
+//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+  bits<5> Value = val;
+}
+
+def Pseudo   : Format<0>;
+def EForm    : Format<1>;
+def IForm    : Format<2>;
+def RIForm   : Format<3>;
+def RIEForm  : Format<4>;
+def RILForm  : Format<5>;
+def RISForm  : Format<6>;
+def RRForm   : Format<7>;
+def RREForm  : Format<8>;
+def RRFForm  : Format<9>;
+def RRRForm  : Format<10>;
+def RRSForm  : Format<11>;
+def RSForm   : Format<12>;
+def RSIForm  : Format<13>;
+def RSILForm : Format<14>;
+def RSYForm  : Format<15>;
+def RXForm   : Format<16>;
+def RXEForm  : Format<17>;
+def RXFForm  : Format<18>;
+def RXYForm  : Format<19>;
+def SForm    : Format<20>;
+def SIForm   : Format<21>;
+def SILForm  : Format<22>;
+def SIYForm  : Format<23>;
+def SSForm   : Format<24>;
+def SSEForm  : Format<25>;
+def SSFForm  : Format<26>;
+
+class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
+  let Namespace = "SystemZ";
+
+  bits<16> Opcode = op;
+
+  Format Form = f;
+  bits<5> FormBits = Form.Value;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+}
+
+class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr, 
+         list<dag> pattern> 
+  : InstSystemZ<0, f, outs, ins> {
+  let Opcode{0-7} = op;
+  let Opcode{8-15} = 0;
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr, 
+         list<dag> pattern> 
+  : InstSystemZ<0, f, outs, ins> {
+  let Opcode{0-11} = op;
+  let Opcode{12-15} = 0;
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
+         list<dag> pattern>
+  : InstSystemZ<op, f, outs, ins> {
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RRForm, outs, ins, asmstr, pattern>;
+
+class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I12<op, RIForm, outs, ins, asmstr, pattern>;
+
+class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I12<op, RILForm, outs, ins, asmstr, pattern>;
+
+class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RREForm, outs, ins, asmstr, pattern>;
+
+class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RXForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RXYForm, outs, ins, asmstr, pattern>;
+
+class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RSForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RSYForm, outs, ins, asmstr, pattern>;
+
+class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, SIForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, SIYForm, outs, ins, asmstr, pattern>;
+
+class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, SILForm, outs, ins, asmstr, pattern>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSystemZ<0, Pseudo, outs, ins> {
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 000000000000..236711cc0bcc
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,648 @@
+//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZGenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+  : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)),
+    RI(tm, *this), TM(tm) {
+  // Fill the spill offsets map
+  static const unsigned SpillOffsTab[][2] = {
+    { SystemZ::R2D,  0x10 },
+    { SystemZ::R3D,  0x18 },
+    { SystemZ::R4D,  0x20 },
+    { SystemZ::R5D,  0x28 },
+    { SystemZ::R6D,  0x30 },
+    { SystemZ::R7D,  0x38 },
+    { SystemZ::R8D,  0x40 },
+    { SystemZ::R9D,  0x48 },
+    { SystemZ::R10D, 0x50 },
+    { SystemZ::R11D, 0x58 },
+    { SystemZ::R12D, 0x60 },
+    { SystemZ::R13D, 0x68 },
+    { SystemZ::R14D, 0x70 },
+    { SystemZ::R15D, 0x78 }
+  };
+
+  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+  for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+    RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
+  return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+  if (RC == &SystemZ::GR32RegClass ||
+      RC == &SystemZ::ADDR32RegClass)
+    Opc = SystemZ::MOV32mr;
+  else if (RC == &SystemZ::GR64RegClass ||
+           RC == &SystemZ::ADDR64RegClass) {
+    Opc = SystemZ::MOV64mr;
+  } else if (RC == &SystemZ::FP32RegClass) {
+    Opc = SystemZ::FMOV32mr;
+  } else if (RC == &SystemZ::FP64RegClass) {
+    Opc = SystemZ::FMOV64mr;
+  } else if (RC == &SystemZ::GR64PRegClass) {
+    Opc = SystemZ::MOV64Pmr;
+  } else if (RC == &SystemZ::GR128RegClass) {
+    Opc = SystemZ::MOV128mr;
+  } else
+    llvm_unreachable("Unsupported regclass to store");
+
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+    .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                           unsigned DestReg, int FrameIdx,
+                                           const TargetRegisterClass *RC) const{
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+  if (RC == &SystemZ::GR32RegClass ||
+      RC == &SystemZ::ADDR32RegClass)
+    Opc = SystemZ::MOV32rm;
+  else if (RC == &SystemZ::GR64RegClass ||
+           RC == &SystemZ::ADDR64RegClass) {
+    Opc = SystemZ::MOV64rm;
+  } else if (RC == &SystemZ::FP32RegClass) {
+    Opc = SystemZ::FMOV32rm;
+  } else if (RC == &SystemZ::FP64RegClass) {
+    Opc = SystemZ::FMOV64rm;
+  } else if (RC == &SystemZ::GR64PRegClass) {
+    Opc = SystemZ::MOV64Prm;
+  } else if (RC == &SystemZ::GR128RegClass) {
+    Opc = SystemZ::MOV128rm;
+  } else
+    llvm_unreachable("Unsupported regclass to load");
+
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned DestReg, unsigned SrcReg,
+                                    const TargetRegisterClass *DestRC,
+                                    const TargetRegisterClass *SrcRC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // Determine if DstRC and SrcRC have a common superclass.
+  const TargetRegisterClass *CommonRC = DestRC;
+  if (DestRC == SrcRC)
+    /* Same regclass for source and dest */;
+  else if (CommonRC->hasSuperClass(SrcRC))
+    CommonRC = SrcRC;
+  else if (!CommonRC->hasSubClass(SrcRC))
+    CommonRC = 0;
+
+  if (CommonRC) {
+    if (CommonRC == &SystemZ::GR64RegClass ||
+        CommonRC == &SystemZ::ADDR64RegClass) {
+      BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg);
+    } else if (CommonRC == &SystemZ::GR32RegClass ||
+               CommonRC == &SystemZ::ADDR32RegClass) {
+      BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg);
+    } else if (CommonRC == &SystemZ::GR64PRegClass) {
+      BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg);
+    } else if (CommonRC == &SystemZ::GR128RegClass) {
+      BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg);
+    } else if (CommonRC == &SystemZ::FP32RegClass) {
+      BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg);
+    } else if (CommonRC == &SystemZ::FP64RegClass) {
+      BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg);
+    } else {
+      return false;
+    }
+
+    return true;
+  }
+
+  if ((SrcRC == &SystemZ::GR64RegClass &&
+       DestRC == &SystemZ::ADDR64RegClass) ||
+      (DestRC == &SystemZ::GR64RegClass &&
+       SrcRC == &SystemZ::ADDR64RegClass)) {
+    BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg);
+    return true;
+  } else if ((SrcRC == &SystemZ::GR32RegClass &&
+              DestRC == &SystemZ::ADDR32RegClass) ||
+             (DestRC == &SystemZ::GR32RegClass &&
+              SrcRC == &SystemZ::ADDR32RegClass)) {
+    BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg);
+    return true;
+  }
+
+  return false;
+}
+
+bool
+SystemZInstrInfo::isMoveInstr(const MachineInstr& MI,
+                              unsigned &SrcReg, unsigned &DstReg,
+                              unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case SystemZ::MOV32rr:
+  case SystemZ::MOV64rr:
+  case SystemZ::MOV64rrP:
+  case SystemZ::MOV128rr:
+  case SystemZ::FMOV32rr:
+  case SystemZ::FMOV64rr:
+    assert(MI.getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "invalid register-register move instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcSubIdx = MI.getOperand(1).getSubReg();
+    DstSubIdx = MI.getOperand(0).getSubReg();
+    return true;
+  }
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SystemZ::MOV32rm:
+  case SystemZ::MOV32rmy:
+  case SystemZ::MOV64rm:
+  case SystemZ::MOVSX32rm8:
+  case SystemZ::MOVSX32rm16y:
+  case SystemZ::MOVSX64rm8:
+  case SystemZ::MOVSX64rm16:
+  case SystemZ::MOVSX64rm32:
+  case SystemZ::MOVZX32rm8:
+  case SystemZ::MOVZX32rm16:
+  case SystemZ::MOVZX64rm8:
+  case SystemZ::MOVZX64rm16:
+  case SystemZ::MOVZX64rm32:
+  case SystemZ::FMOV32rm:
+  case SystemZ::FMOV32rmy:
+  case SystemZ::FMOV64rm:
+  case SystemZ::FMOV64rmy:
+  case SystemZ::MOV64Prm:
+  case SystemZ::MOV64Prmy:
+  case SystemZ::MOV128rm:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
+        MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SystemZ::MOV32mr:
+  case SystemZ::MOV32mry:
+  case SystemZ::MOV64mr:
+  case SystemZ::MOV32m8r:
+  case SystemZ::MOV32m8ry:
+  case SystemZ::MOV32m16r:
+  case SystemZ::MOV32m16ry:
+  case SystemZ::MOV64m8r:
+  case SystemZ::MOV64m8ry:
+  case SystemZ::MOV64m16r:
+  case SystemZ::MOV64m16ry:
+  case SystemZ::MOV64m32r:
+  case SystemZ::MOV64m32ry:
+  case SystemZ::FMOV32mr:
+  case SystemZ::FMOV32mry:
+  case SystemZ::FMOV64mr:
+  case SystemZ::FMOV64mry:
+  case SystemZ::MOV64Pmr:
+  case SystemZ::MOV64Pmry:
+  case SystemZ::MOV128mr:
+    if (MI->getOperand(0).isFI() &&
+        MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
+        MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(3).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool
+SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                const std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned CalleeFrameSize = 0;
+
+  // Scan the callee-saved and find the bounds of register spill area.
+  unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RegClass = CSI[i].getRegClass();
+    if (RegClass != &SystemZ::FP64RegClass) {
+      unsigned Offset = RegSpillOffsets[Reg];
+      CalleeFrameSize += 8;
+      if (StartOffset > Offset) {
+        LowReg = Reg; StartOffset = Offset;
+      }
+      if (EndOffset < Offset) {
+        HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+      }
+    }
+  }
+
+  // Save information for epilogue inserter.
+  MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+  MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+  // Save GPRs
+  if (StartOffset) {
+    // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+    // registers to store, otherwise - just STORE.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, get((LowReg == HighReg ?
+                                SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+    // Add store operands.
+    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+    MIB.addReg(LowReg, RegState::Kill);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Kill);
+
+    // Do a second scan adding regs as being killed by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      // Add the callee-saved register as live-in. It's killed at the spill.
+      MBB.addLiveIn(Reg);
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitKill);
+    }
+  }
+
+  // Save FPRs
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RegClass = CSI[i].getRegClass();
+    if (RegClass == &SystemZ::FP64RegClass) {
+      MBB.addLiveIn(Reg);
+      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass);
+    }
+  }
+
+  return true;
+}
+
+bool
+SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                const std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetRegisterInfo *RegInfo= MF.getTarget().getRegisterInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+  // Restore FP registers
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RegClass = CSI[i].getRegClass();
+    if (RegClass == &SystemZ::FP64RegClass)
+      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+  }
+
+  // Restore GP registers
+  unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+  unsigned StartOffset = RegSpillOffsets[LowReg];
+
+  if (StartOffset) {
+    // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+    // registers to load, otherwise - just LOAD.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, get((LowReg == HighReg ?
+                                SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+    // Add store operands.
+    MIB.addReg(LowReg, RegState::Define);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Define);
+
+    MIB.addReg((RegInfo->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D));
+    MIB.addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+
+    // Do a second scan adding regs as being defined by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+  }
+
+  return true;
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+  SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
+  Cond[0].setImm(getOppositeCondition(CC));
+  return false;
+}
+
+bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
+  if (MBB.empty()) return false;
+
+  switch (MBB.back().getOpcode()) {
+  case SystemZ::RET:   // Return.
+  case SystemZ::JMP:   // Uncond branch.
+  case SystemZ::JMPr:  // Indirect branch.
+    return true;
+  default: return false;
+  }
+}
+
+bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (TID.isBranch() && !TID.isBarrier())
+    return true;
+  if (!TID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    // Working from the bottom, when we see a non-terminator
+    // instruction, we're done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled
+    // by this analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == SystemZ::JMP) {
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (next(I) != MBB.end())
+        next(I)->eraseFromParent();
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
+    if (BranchCode == SystemZCC::INVALID)
+      return true;  // Can't handle indirect branch.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to
+    // the same destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
+    // If the conditions are the same, we can leave them alone.
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    return true;
+  }
+
+  return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->getOpcode() != SystemZ::JMP &&
+        getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond) const {
+  // FIXME: this should probably have a DebugLoc operand
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "SystemZ branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, dl, get(SystemZ::JMP)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
+  BuildMI(&MBB, dl, getBrCond(CC)).addMBB(TBB);
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, dl, get(SystemZ::JMP)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+const TargetInstrDesc&
+SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
+  switch (CC) {
+  default:
+   llvm_unreachable("Unknown condition code!");
+  case SystemZCC::O:   return get(SystemZ::JO);
+  case SystemZCC::H:   return get(SystemZ::JH);
+  case SystemZCC::NLE: return get(SystemZ::JNLE);
+  case SystemZCC::L:   return get(SystemZ::JL);
+  case SystemZCC::NHE: return get(SystemZ::JNHE);
+  case SystemZCC::LH:  return get(SystemZ::JLH);
+  case SystemZCC::NE:  return get(SystemZ::JNE);
+  case SystemZCC::E:   return get(SystemZ::JE);
+  case SystemZCC::NLH: return get(SystemZ::JNLH);
+  case SystemZCC::HE:  return get(SystemZ::JHE);
+  case SystemZCC::NL:  return get(SystemZ::JNL);
+  case SystemZCC::LE:  return get(SystemZ::JLE);
+  case SystemZCC::NH:  return get(SystemZ::JNH);
+  case SystemZCC::NO:  return get(SystemZ::JNO);
+  }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:            return SystemZCC::INVALID;
+  case SystemZ::JO:   return SystemZCC::O;
+  case SystemZ::JH:   return SystemZCC::H;
+  case SystemZ::JNLE: return SystemZCC::NLE;
+  case SystemZ::JL:   return SystemZCC::L;
+  case SystemZ::JNHE: return SystemZCC::NHE;
+  case SystemZ::JLH:  return SystemZCC::LH;
+  case SystemZ::JNE:  return SystemZCC::NE;
+  case SystemZ::JE:   return SystemZCC::E;
+  case SystemZ::JNLH: return SystemZCC::NLH;
+  case SystemZ::JHE:  return SystemZCC::HE;
+  case SystemZ::JNL:  return SystemZCC::NL;
+  case SystemZ::JLE:  return SystemZCC::LE;
+  case SystemZ::JNH:  return SystemZCC::NH;
+  case SystemZ::JNO:  return SystemZCC::NO;
+  }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid condition!");
+  case SystemZCC::O:   return SystemZCC::NO;
+  case SystemZCC::H:   return SystemZCC::NH;
+  case SystemZCC::NLE: return SystemZCC::LE;
+  case SystemZCC::L:   return SystemZCC::NL;
+  case SystemZCC::NHE: return SystemZCC::HE;
+  case SystemZCC::LH:  return SystemZCC::NLH;
+  case SystemZCC::NE:  return SystemZCC::E;
+  case SystemZCC::E:   return SystemZCC::NE;
+  case SystemZCC::NLH: return SystemZCC::LH;
+  case SystemZCC::HE:  return SystemZCC::NHE;
+  case SystemZCC::NL:  return SystemZCC::L;
+  case SystemZCC::LE:  return SystemZCC::NLE;
+  case SystemZCC::NH:  return SystemZCC::H;
+  case SystemZCC::NO:  return SystemZCC::O;
+  }
+}
+
+const TargetInstrDesc&
+SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:
+    llvm_unreachable("Don't have long disp version of this instruction");
+  case SystemZ::MOV32mr:   return get(SystemZ::MOV32mry);
+  case SystemZ::MOV32rm:   return get(SystemZ::MOV32rmy);
+  case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
+  case SystemZ::MOV32m8r:  return get(SystemZ::MOV32m8ry);
+  case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
+  case SystemZ::MOV64m8r:  return get(SystemZ::MOV64m8ry);
+  case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
+  case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
+  case SystemZ::MOV8mi:    return get(SystemZ::MOV8miy);
+  case SystemZ::MUL32rm:   return get(SystemZ::MUL32rmy);
+  case SystemZ::CMP32rm:   return get(SystemZ::CMP32rmy);
+  case SystemZ::UCMP32rm:  return get(SystemZ::UCMP32rmy);
+  case SystemZ::FMOV32mr:  return get(SystemZ::FMOV32mry);
+  case SystemZ::FMOV64mr:  return get(SystemZ::FMOV64mry);
+  case SystemZ::FMOV32rm:  return get(SystemZ::FMOV32rmy);
+  case SystemZ::FMOV64rm:  return get(SystemZ::FMOV64rmy);
+  case SystemZ::MOV64Pmr:  return get(SystemZ::MOV64Pmry);
+  case SystemZ::MOV64Prm:  return get(SystemZ::MOV64Prmy);
+  }
+}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 000000000000..e16d704164e0
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,119 @@
+//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+/// SystemZII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SystemZII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // SystemZ Specific MachineOperand flags.
+
+    MO_NO_FLAG = 0,
+
+    /// MO_GOTENT - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT.
+    ///
+    ///    SYMBOL_LABEL @GOTENT
+    MO_GOTENT = 1,
+
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location.
+    ///
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT = 2
+  };
+}
+
+class SystemZInstrInfo : public TargetInstrInfoImpl {
+  const SystemZRegisterInfo RI;
+  SystemZTargetMachine &TM;
+  IndexedMap<unsigned> RegSpillOffsets;
+public:
+  explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+  bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned DestReg, unsigned SrcReg,
+                    const TargetRegisterClass *DestRC,
+                    const TargetRegisterClass *SrcRC) const;
+
+  bool isMoveInstr(const MachineInstr& MI,
+                   unsigned &SrcReg, unsigned &DstReg,
+                   unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill,
+                                   int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC) const;
+
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
+  SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
+  const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+  const TargetInstrDesc& getLongDispOpc(unsigned Opc) const;
+
+  const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+    if (Offset < 0 || Offset >= 4096)
+      return getLongDispOpc(Opc);
+    else
+      return get(Opc);
+  }
+};
+
+}
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 000000000000..56d75ddfc0c7
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,1155 @@
+//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Instruction Predicate Definitions.
+def IsZ10 : Predicate<"Subtarget.isZ10()">;
+
+include "SystemZInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
+class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_SystemZCall         : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
+def SDT_SystemZCallSeqEnd   : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
+def SDT_CmpTest             : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_BrCond              : SDTypeProfile<0, 2,
+                                           [SDTCisVT<0, OtherVT>,
+                                            SDTCisI8<1>]>;
+def SDT_SelectCC            : SDTypeProfile<1, 3,
+                                           [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                            SDTCisI8<3>]>;
+def SDT_Address             : SDTypeProfile<1, 1,
+                                            [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+                     [SDNPHasChain, SDNPOptInFlag]>;
+def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
+                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+def SystemZcallseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
+                        [SDNPHasChain, SDNPOutFlag]>;
+def SystemZcallseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_SystemZCallSeqEnd,
+                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest, [SDNPOutFlag]>;
+def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest, [SDNPOutFlag]>;
+def SystemZbrcond  : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
+                            [SDNPHasChain, SDNPInFlag]>;
+def SystemZselect  : SDNode<"SystemZISD::SELECT", SDT_SelectCC, [SDNPInFlag]>;
+def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
+
+
+include "SystemZOperands.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                              "#ADJCALLSTACKDOWN",
+                              [(SystemZcallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                              "#ADJCALLSTACKUP",
+                              [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
+
+let usesCustomDAGSchedInserter = 1 in {
+  def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
+                        "# Select32 PSEUDO",
+                        [(set GR32:$dst,
+                              (SystemZselect GR32:$src1, GR32:$src2, imm:$cc))]>;
+  def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
+                        "# Select64 PSEUDO",
+                        [(set GR64:$dst,
+                              (SystemZselect GR64:$src1, GR64:$src2, imm:$cc))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  let isBarrier = 1 in {
+    def JMP  : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
+
+    let isIndirectBranch = 1 in
+      def JMPr   : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
+  }
+
+  let Uses = [PSW] in {
+    def JO  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jo\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O)]>;
+    def JH  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H)]>;
+    def JNLE: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnle\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE)]>;
+    def JL  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jl\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L)]>;
+    def JNHE: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnhe\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE)]>;
+    def JLH : Pseudo<(outs), (ins brtarget:$dst),
+                     "jlh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH)]>;
+    def JNE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jne\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE)]>;
+    def JE  : Pseudo<(outs), (ins brtarget:$dst),
+                     "je\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E)]>;
+    def JNLH: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnlh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH)]>;
+    def JHE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jhe\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE)]>;
+    def JNL : Pseudo<(outs), (ins brtarget:$dst),
+                     "jnl\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL)]>;
+    def JLE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jle\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE)]>;
+    def JNH : Pseudo<(outs), (ins brtarget:$dst),
+                     "jnh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH)]>;
+    def JNO : Pseudo<(outs), (ins brtarget:$dst),
+                     "jno\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO)]>;
+  } // Uses = [PSW]
+} // isBranch = 1
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. Uses for argument
+  // registers are added manually.
+  let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+              F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
+    def CALLi     : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
+                           "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
+    def CALLr     : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
+                           "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
+  }
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let isReMaterializable = 1 in
+// FIXME: Provide imm12 variant
+// FIXME: Address should be halfword aligned...
+def LA64r  : RXI<0x47,
+                 (outs GR64:$dst), (ins laaddr:$src),
+                 "lay\t{$dst, $src}",
+                 [(set GR64:$dst, laaddr:$src)]>;
+def LA64rm : RXYI<0x71E3,
+                  (outs GR64:$dst), (ins i64imm:$src),
+                  "larl\t{$dst, $src}",
+                  [(set GR64:$dst,
+                        (SystemZpcrelwrapper tglobaladdr:$src))]>;
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "# no-op", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+let neverHasSideEffects = 1 in {
+def MOV32rr : RRI<0x18,
+                  (outs GR32:$dst), (ins GR32:$src),
+                  "lr\t{$dst, $src}",
+                  []>;
+def MOV64rr : RREI<0xB904,
+                   (outs GR64:$dst), (ins GR64:$src),
+                   "lgr\t{$dst, $src}",
+                   []>;
+def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+                     "# MOV128 PSEUDO!\n"
+                     "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+                     "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
+                     []>;
+def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+                     "# MOV64P PSEUDO!\n"
+                     "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+                     "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
+                     []>;
+}
+
+def MOVSX64rr32 : RREI<0xB914,
+                       (outs GR64:$dst), (ins GR32:$src),
+                       "lgfr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVZX64rr32 : RREI<0xB916,
+                       (outs GR64:$dst), (ins GR32:$src),
+                       "llgfr\t{$dst, $src}",
+                       [(set GR64:$dst, (zext GR32:$src))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV32ri16 : RII<0x8A7,
+                    (outs GR32:$dst), (ins s16imm:$src),
+                    "lhi\t{$dst, $src}",
+                    [(set GR32:$dst, immSExt16:$src)]>;
+def MOV64ri16 : RII<0x9A7,
+                    (outs GR64:$dst), (ins s16imm64:$src),
+                    "lghi\t{$dst, $src}",
+                    [(set GR64:$dst, immSExt16:$src)]>;
+
+def MOV64rill16 : RII<0xFA5,
+                      (outs GR64:$dst), (ins i64imm:$src),
+                      "llill\t{$dst, $src}",
+                      [(set GR64:$dst, i64ll16:$src)]>;
+def MOV64rilh16 : RII<0xEA5,
+                      (outs GR64:$dst), (ins i64imm:$src),
+                      "llilh\t{$dst, $src}",
+                      [(set GR64:$dst, i64lh16:$src)]>;
+def MOV64rihl16 : RII<0xDA5,
+                      (outs GR64:$dst), (ins i64imm:$src),
+                      "llihl\t{$dst, $src}",
+                      [(set GR64:$dst, i64hl16:$src)]>;
+def MOV64rihh16 : RII<0xCA5,
+                      (outs GR64:$dst), (ins i64imm:$src),
+                      "llihh\t{$dst, $src}",
+                      [(set GR64:$dst, i64hh16:$src)]>;
+
+def MOV64ri32 : RILI<0x1C0,
+                     (outs GR64:$dst), (ins s32imm64:$src),
+                     "lgfi\t{$dst, $src}",
+                     [(set GR64:$dst, immSExt32:$src)]>;
+def MOV64rilo32 : RILI<0xFC0,
+                       (outs GR64:$dst), (ins i64imm:$src),
+                       "llilf\t{$dst, $src}",
+                       [(set GR64:$dst, i64lo32:$src)]>;
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src),
+                       "llihf\t{$dst, $src}",
+                       [(set GR64:$dst, i64hi32:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+def MOV32rm  : RXI<0x58,
+                   (outs GR32:$dst), (ins rriaddr12:$src),
+                   "l\t{$dst, $src}",
+                   [(set GR32:$dst, (load rriaddr12:$src))]>;
+def MOV32rmy : RXYI<0x58E3,
+                    (outs GR32:$dst), (ins rriaddr:$src),
+                    "ly\t{$dst, $src}",
+                    [(set GR32:$dst, (load rriaddr:$src))]>;
+def MOV64rm  : RXYI<0x04E3,
+                    (outs GR64:$dst), (ins rriaddr:$src),
+                    "lg\t{$dst, $src}",
+                    [(set GR64:$dst, (load rriaddr:$src))]>;
+def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
+                      "# MOV64P PSEUDO!\n"
+                      "\tl\t${dst:subreg_odd},  $src\n"
+                      "\tl\t${dst:subreg_even}, 4+$src",
+                      [(set GR64P:$dst, (load rriaddr12:$src))]>;
+def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
+                       "# MOV64P PSEUDO!\n"
+                       "\tly\t${dst:subreg_odd},  $src\n"
+                       "\tly\t${dst:subreg_even}, 4+$src",
+                       [(set GR64P:$dst, (load rriaddr:$src))]>;
+def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
+                      "# MOV128 PSEUDO!\n"
+                      "\tlg\t${dst:subreg_odd},  $src\n"
+                      "\tlg\t${dst:subreg_even}, 8+$src",
+                      [(set GR128:$dst, (load rriaddr:$src))]>;
+}
+
+def MOV32mr  : RXI<0x50,
+                   (outs), (ins rriaddr12:$dst, GR32:$src),
+                   "st\t{$src, $dst}",
+                   [(store GR32:$src, rriaddr12:$dst)]>;
+def MOV32mry : RXYI<0x50E3,
+                    (outs), (ins rriaddr:$dst, GR32:$src),
+                    "sty\t{$src, $dst}",
+                    [(store GR32:$src, rriaddr:$dst)]>;
+def MOV64mr  : RXYI<0x24E3,
+                    (outs), (ins rriaddr:$dst, GR64:$src),
+                    "stg\t{$src, $dst}",
+                    [(store GR64:$src, rriaddr:$dst)]>;
+def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
+                      "# MOV64P PSEUDO!\n"
+                      "\tst\t${src:subreg_odd}, $dst\n"
+                      "\tst\t${src:subreg_even}, 4+$dst",
+                      [(store GR64P:$src, rriaddr12:$dst)]>;
+def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
+                       "# MOV64P PSEUDO!\n"
+                       "\tsty\t${src:subreg_odd}, $dst\n"
+                       "\tsty\t${src:subreg_even}, 4+$dst",
+                       [(store GR64P:$src, rriaddr:$dst)]>;
+def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
+                      "# MOV128 PSEUDO!\n"
+                      "\tstg\t${src:subreg_odd}, $dst\n"
+                      "\tstg\t${src:subreg_even}, 8+$dst",
+                      [(store GR128:$src, rriaddr:$dst)]>;
+
+def MOV8mi    : SII<0x92,
+                    (outs), (ins riaddr12:$dst, i32i8imm:$src),
+                    "mvi\t{$dst, $src}",
+                    [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
+def MOV8miy   : SIYI<0x52EB,
+                     (outs), (ins riaddr:$dst, i32i8imm:$src),
+                     "mviy\t{$dst, $src}",
+                     [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
+
+let AddedComplexity = 2 in {
+def MOV16mi   : SILI<0xE544,
+                     (outs), (ins riaddr12:$dst, s16imm:$src),
+                     "mvhhi\t{$dst, $src}",
+                     [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+def MOV32mi16 : SILI<0xE54C,
+                     (outs), (ins riaddr12:$dst, s32imm:$src),
+                     "mvhi\t{$dst, $src}",
+                     [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+def MOV64mi16 : SILI<0xE548,
+                     (outs), (ins riaddr12:$dst, s32imm64:$src),
+                     "mvghi\t{$dst, $src}",
+                     [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+}
+
+// sexts
+def MOVSX32rr8  : RREI<0xB926,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "lbr\t{$dst, $src}",
+                       [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
+def MOVSX64rr8  : RREI<0xB906,
+                       (outs GR64:$dst), (ins GR64:$src),
+                       "lgbr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
+def MOVSX32rr16 : RREI<0xB927,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "lhr\t{$dst, $src}",
+                       [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
+def MOVSX64rr16 : RREI<0xB907,
+                       (outs GR64:$dst), (ins GR64:$src),
+                       "lghr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
+
+// extloads
+def MOVSX32rm8   : RXYI<0x76E3,
+                        (outs GR32:$dst), (ins rriaddr:$src),
+                        "lb\t{$dst, $src}",
+                        [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
+def MOVSX32rm16  : RXI<0x48,
+                       (outs GR32:$dst), (ins rriaddr12:$src),
+                       "lh\t{$dst, $src}",
+                       [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
+def MOVSX32rm16y : RXYI<0x78E3,
+                        (outs GR32:$dst), (ins rriaddr:$src),
+                        "lhy\t{$dst, $src}",
+                        [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
+def MOVSX64rm8   : RXYI<0x77E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgb\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
+def MOVSX64rm16  : RXYI<0x15E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgh\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
+def MOVSX64rm32  : RXYI<0x14E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgf\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
+
+def MOVZX32rm8  : RXYI<0x94E3,
+                       (outs GR32:$dst), (ins rriaddr:$src),
+                       "llc\t{$dst, $src}",
+                       [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
+def MOVZX32rm16 : RXYI<0x95E3,
+                       (outs GR32:$dst), (ins rriaddr:$src),
+                       "llh\t{$dst, $src}",
+                       [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
+def MOVZX64rm8  : RXYI<0x90E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgc\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
+def MOVZX64rm16 : RXYI<0x91E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgh\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
+def MOVZX64rm32 : RXYI<0x16E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgf\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
+
+// truncstores
+def MOV32m8r   : RXI<0x42,
+                     (outs), (ins rriaddr12:$dst, GR32:$src),
+                     "stc\t{$src, $dst}",
+                     [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m8ry  : RXYI<0x72E3,
+                      (outs), (ins rriaddr:$dst, GR32:$src),
+                      "stcy\t{$src, $dst}",
+                      [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
+
+def MOV32m16r  : RXI<0x40,
+                     (outs), (ins rriaddr12:$dst, GR32:$src),
+                     "sth\t{$src, $dst}",
+                     [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m16ry : RXYI<0x70E3,
+                      (outs), (ins rriaddr:$dst, GR32:$src),
+                      "sthy\t{$src, $dst}",
+                      [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
+
+def MOV64m8r   : RXI<0x42,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "stc\t{$src, $dst}",
+                     [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m8ry  : RXYI<0x72E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "stcy\t{$src, $dst}",
+                      [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m16r  : RXI<0x40,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "sth\t{$src, $dst}",
+                     [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m16ry : RXYI<0x70E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "sthy\t{$src, $dst}",
+                      [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m32r  : RXI<0x50,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "st\t{$src, $dst}",
+                     [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m32ry : RXYI<0x50E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "sty\t{$src, $dst}",
+                      [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
+
+// multiple regs moves
+// FIXME: should we use multiple arg nodes?
+def MOV32mrm  : RSYI<0x90EB,
+                     (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
+                     "stmy\t{$from, $to, $dst}",
+                     []>;
+def MOV64mrm  : RSYI<0x24EB,
+                     (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
+                     "stmg\t{$from, $to, $dst}",
+                     []>;
+def MOV32rmm  : RSYI<0x90EB,
+                     (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
+                     "lmy\t{$from, $to, $dst}",
+                     []>;
+def MOV64rmm  : RSYI<0x04EB,
+                     (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
+                     "lmg\t{$from, $to, $dst}",
+                     []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in {
+def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+                           "lhi\t${dst:subreg_even}, 0",
+                           []>;
+def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+                           "lghi\t${dst:subreg_even}, 0",
+                           []>;
+}
+
+// Byte swaps
+def BSWAP32rr : RREI<0xB91F,
+                     (outs GR32:$dst), (ins GR32:$src),
+                     "lrvr\t{$dst, $src}",
+                     [(set GR32:$dst, (bswap GR32:$src))]>;
+def BSWAP64rr : RREI<0xB90F,
+                     (outs GR64:$dst), (ins GR64:$src),
+                     "lrvgr\t{$dst, $src}",
+                     [(set GR64:$dst, (bswap GR64:$src))]>;
+
+// FIXME: this is invalid pattern for big-endian
+//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
+//                     "lrvh\t{$dst, $src}",
+//                     [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
+def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
+                     "lrv\t{$dst, $src}",
+                     [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
+def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
+                     "lrvg\t{$dst, $src}",
+                     [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
+
+//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
+//                     "strvh\t{$src, $dst}",
+//                     [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
+                     "strv\t{$src, $dst}",
+                     [(store (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
+                     "strvg\t{$src, $dst}",
+                     [(store (bswap GR64:$src), rriaddr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Defs = [PSW] in {
+def NEG32rr : RRI<0x13,
+                  (outs GR32:$dst), (ins GR32:$src),
+                  "lcr\t{$dst, $src}",
+                  [(set GR32:$dst, (ineg GR32:$src)),
+                   (implicit PSW)]>;
+def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
+                   "lcgr\t{$dst, $src}",
+                   [(set GR64:$dst, (ineg GR64:$src)),
+                    (implicit PSW)]>;
+def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
+                     "lcgfr\t{$dst, $src}",
+                     [(set GR64:$dst, (ineg (sext GR32:$src))),
+                      (implicit PSW)]>;
+}
+
+let isTwoAddress = 1 in {
+
+let Defs = [PSW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "ar\t{$dst, $src2}",
+                  [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+                   (implicit PSW)]>;
+def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "agr\t{$dst, $src2}",
+                   [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+                    (implicit PSW)]>;
+}
+
+def ADD32rm   : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "a\t{$dst, $src2}",
+                    [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def ADD32rmy  : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "ay\t{$dst, $src2}",
+                     [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def ADD64rm   : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "ag\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+
+def ADD32ri16 : RII<0xA7A,
+                    (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+                    "ahi\t{$dst, $src2}",
+                    [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
+                     (implicit PSW)]>;
+def ADD32ri   : RILI<0xC29,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "afi\t{$dst, $src2}",
+                     [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+                      (implicit PSW)]>;
+def ADD64ri16 : RILI<0xA7B,
+                     (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+                     "aghi\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
+                      (implicit PSW)]>;
+def ADD64ri32 : RILI<0xC28,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "agfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
+                      (implicit PSW)]>;
+
+let isCommutable = 1 in { // X = ADC Y, Z  == X = ADC Z, Y
+def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "alr\t{$dst, $src2}",
+                  [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
+def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "algr\t{$dst, $src2}",
+                   [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
+}
+
+def ADC32ri   : RILI<0xC2B,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "alfi\t{$dst, $src2}",
+                     [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
+def ADC64ri32 : RILI<0xC2A,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "algfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "alcr\t{$dst, $src2}",
+                    [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
+                     (implicit PSW)]>;
+def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "alcgr\t{$dst, $src2}",
+                    [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
+                     (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
+def AND32rr : RRI<0x14,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "nr\t{$dst, $src2}",
+                  [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
+def AND64rr : RREI<0xB980,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "ngr\t{$dst, $src2}",
+                   [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+}
+
+def AND32rm   : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "n\t{$dst, $src2}",
+                    [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def AND32rmy  : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "ny\t{$dst, $src2}",
+                     [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def AND64rm   : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "ng\t{$dst, $src2}",
+                     [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+def AND32rill16 : RII<0xA57,
+                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      "nill\t{$dst, $src2}",
+                      [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
+def AND64rill16 : RII<0xA57,
+                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      "nill\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
+
+def AND32rilh16 : RII<0xA56,
+                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      "nilh\t{$dst, $src2}",
+                      [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
+def AND64rilh16 : RII<0xA56,
+                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      "nilh\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
+
+def AND64rihl16 : RII<0xA55,
+                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      "nihl\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
+def AND64rihh16 : RII<0xA54,
+                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      "nihh\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
+
+def AND32ri     : RILI<0xC0B,
+                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                       "nilf\t{$dst, $src2}",
+                       [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
+def AND64rilo32 : RILI<0xC0B,
+                       (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                       "nilf\t{$dst, $src2}",
+                       [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
+def AND64rihi32 : RILI<0xC0A,
+                       (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                       "nihf\t{$dst, $src2}",
+                       [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
+def OR32rr : RRI<0x16,
+                 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                 "or\t{$dst, $src2}",
+                 [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
+def OR64rr : RREI<0xB981,
+                  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                  "ogr\t{$dst, $src2}",
+                  [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+}
+
+def OR32rm   : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                   "o\t{$dst, $src2}",
+                   [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
+                    (implicit PSW)]>;
+def OR32rmy  : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                    "oy\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
+                     (implicit PSW)]>;
+def OR64rm   : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                    "og\t{$dst, $src2}",
+                    [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
+                     (implicit PSW)]>;
+
+ // FIXME: Provide proper encoding!
+def OR32ri16  : RII<0xA5B,
+                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    "oill\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
+def OR32ri16h : RII<0xA5A,
+                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    "oilh\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
+def OR32ri : RILI<0xC0D,
+                  (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                  "oilf\t{$dst, $src2}",
+                  [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
+
+def OR64rill16 : RII<0xA5B,
+                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     "oill\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
+def OR64rilh16 : RII<0xA5A,
+                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     "oilh\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
+def OR64rihl16 : RII<0xA59,
+                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     "oihl\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
+def OR64rihh16 : RII<0xA58,
+                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     "oihh\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
+
+def OR64rilo32 : RILI<0xC0D,
+                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      "oilf\t{$dst, $src2}",
+                      [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
+def OR64rihi32 : RILI<0xC0C,
+                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      "oihf\t{$dst, $src2}",
+                      [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
+
+def SUB32rr : RRI<0x1B,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "sr\t{$dst, $src2}",
+                  [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+def SUB64rr : RREI<0xB909,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "sgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB32rm   : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "s\t{$dst, $src2}",
+                    [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def SUB32rmy  : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "sy\t{$dst, $src2}",
+                     [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def SUB64rm   : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "sg\t{$dst, $src2}",
+                     [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+ 
+def SBC32rr : RRI<0x1F,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "slr\t{$dst, $src2}",
+                  [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
+def SBC64rr : RREI<0xB90B,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "slgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
+
+def SBC32ri   : RILI<0xC25,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "sllfi\t{$dst, $src2}",
+                     [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
+def SBC64ri32 : RILI<0xC24,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "slgfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "slbr\t{$dst, $src2}",
+                    [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
+                     (implicit PSW)]>;
+def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "slbgr\t{$dst, $src2}",
+                    [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
+                     (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
+def XOR32rr : RRI<0x17,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "xr\t{$dst, $src2}",
+                  [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+def XOR64rr : RREI<0xB982,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "xgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+}
+
+def XOR32rm   : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "x\t{$dst, $src2}",
+                    [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def XOR32rmy  : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "xy\t{$dst, $src2}",
+                     [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def XOR64rm   : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "xg\t{$dst, $src2}",
+                     [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+def XOR32ri : RILI<0xC07,
+                   (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                   "xilf\t{$dst, $src2}",
+                   [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
+
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def MUL32rr : RREI<0xB252,
+                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                   "msr\t{$dst, $src2}",
+                   [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
+def MUL64rr : RREI<0xB90C,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "msgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
+}
+
+def MUL64rrP   : RRI<0x1C,
+                     (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                     "mr\t{$dst, $src2}",
+                     []>;
+def UMUL64rrP  : RREI<0xB996,
+                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                      "mlr\t{$dst, $src2}",
+                      []>;
+def UMUL128rrP : RREI<0xB986,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "mlgr\t{$dst, $src2}",
+                      []>;
+
+def MUL32ri16   : RII<0xA7C,
+                      (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+                      "mhi\t{$dst, $src2}",
+                      [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
+def MUL64ri16   : RII<0xA7D,
+                      (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+                      "mghi\t{$dst, $src2}",
+                      [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
+
+let AddedComplexity = 2 in {
+def MUL32ri     : RILI<0xC21,
+                       (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                       "msfi\t{$dst, $src2}",
+                       [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
+                       Requires<[IsZ10]>;
+def MUL64ri32   : RILI<0xC20,
+                       (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                       "msgfi\t{$dst, $src2}",
+                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
+                       Requires<[IsZ10]>;
+}
+
+def MUL32rm : RXI<0x71,
+                  (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                  "ms\t{$dst, $src2}",
+                  [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
+def MUL32rmy : RXYI<0xE351,
+                    (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                    "msy\t{$dst, $src2}",
+                    [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
+def MUL64rm  : RXYI<0xE30C,
+                    (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                    "msg\t{$dst, $src2}",
+                    [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
+
+def MULSX64rr32 : RREI<0xB91C,
+                       (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
+                       "msgfr\t{$dst, $src2}",
+                       [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
+
+def SDIVREM32r : RREI<0xB91D,
+                      (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
+                      "dsgfr\t{$dst, $src2}",
+                      []>;
+def SDIVREM64r : RREI<0xB90D,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "dsgr\t{$dst, $src2}",
+                      []>;
+
+def UDIVREM32r : RREI<0xB997,
+                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                      "dlr\t{$dst, $src2}",
+                      []>;
+def UDIVREM64r : RREI<0xB987,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "dlgr\t{$dst, $src2}",
+                      []>;
+let mayLoad = 1 in {
+def SDIVREM32m : RXYI<0xE31D,
+                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dsgf\t{$dst, $src2}",
+                      []>;
+def SDIVREM64m : RXYI<0xE30D,
+                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dsg\t{$dst, $src2}",
+                      []>;
+
+def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
+                      "dl\t{$dst, $src2}",
+                      []>;
+def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dlg\t{$dst, $src2}",
+                      []>;
+} // mayLoad
+} // isTwoAddress = 1
+
+//===----------------------------------------------------------------------===//
+// Shifts
+
+let isTwoAddress = 1 in
+def SRL32rri : RSI<0x88,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "srl\t{$src, $amt}",
+                   [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
+def SRL64rri : RSYI<0xEB0C,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "srlg\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
+
+let isTwoAddress = 1 in
+def SHL32rri : RSI<0x89,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "sll\t{$src, $amt}",
+                   [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
+def SHL64rri : RSYI<0xEB0D,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "sllg\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
+
+let Defs = [PSW] in {
+let isTwoAddress = 1 in
+def SRA32rri : RSI<0x8A,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "sra\t{$src, $amt}",
+                   [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
+                    (implicit PSW)]>;
+
+def SRA64rri : RSYI<0xEB0A,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "srag\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
+                     (implicit PSW)]>;
+} // Defs = [PSW]
+
+def ROTL32rri : RSYI<0xEB1D,
+                     (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                     "rll\t{$dst, $src, $amt}",
+                     [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
+def ROTL64rri : RSYI<0xEB1C,
+                     (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                     "rllg\t{$dst, $src, $amt}",
+                     [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def CMP32rr : RRI<0x19,
+                  (outs), (ins GR32:$src1, GR32:$src2),
+                  "cr\t$src1, $src2",
+                  [(SystemZcmp GR32:$src1, GR32:$src2), 
+                   (implicit PSW)]>;
+def CMP64rr : RREI<0xB920,
+                   (outs), (ins GR64:$src1, GR64:$src2),
+                   "cgr\t$src1, $src2",
+                   [(SystemZcmp GR64:$src1, GR64:$src2), 
+                    (implicit PSW)]>;
+
+def CMP32ri   : RILI<0xC2D,
+                     (outs), (ins GR32:$src1, s32imm:$src2),
+                     "cfi\t$src1, $src2",
+                     [(SystemZcmp GR32:$src1, imm:$src2), 
+                      (implicit PSW)]>;
+def CMP64ri32 : RILI<0xC2C,
+                     (outs), (ins GR64:$src1, s32imm64:$src2),
+                     "cgfi\t$src1, $src2",
+                     [(SystemZcmp GR64:$src1, i64immSExt32:$src2),
+                      (implicit PSW)]>;
+
+def CMP32rm : RXI<0x59,
+                  (outs), (ins GR32:$src1, rriaddr12:$src2),
+                  "c\t$src1, $src2",
+                  [(SystemZcmp GR32:$src1, (load rriaddr12:$src2)),
+                   (implicit PSW)]>;
+def CMP32rmy : RXYI<0xE359,
+                    (outs), (ins GR32:$src1, rriaddr:$src2),
+                    "cy\t$src1, $src2",
+                    [(SystemZcmp GR32:$src1, (load rriaddr:$src2)),
+                     (implicit PSW)]>;
+def CMP64rm  : RXYI<0xE320,
+                    (outs), (ins GR64:$src1, rriaddr:$src2),
+                    "cg\t$src1, $src2",
+                    [(SystemZcmp GR64:$src1, (load rriaddr:$src2)),
+                     (implicit PSW)]>;
+
+def UCMP32rr : RRI<0x15,
+                   (outs), (ins GR32:$src1, GR32:$src2),
+                   "clr\t$src1, $src2",
+                   [(SystemZucmp GR32:$src1, GR32:$src2),
+                    (implicit PSW)]>;
+def UCMP64rr : RREI<0xB921,
+                    (outs), (ins GR64:$src1, GR64:$src2),
+                    "clgr\t$src1, $src2",
+                    [(SystemZucmp GR64:$src1, GR64:$src2), 
+                     (implicit PSW)]>;
+
+def UCMP32ri   : RILI<0xC2F,
+                      (outs), (ins GR32:$src1, i32imm:$src2),
+                      "clfi\t$src1, $src2",
+                      [(SystemZucmp GR32:$src1, imm:$src2),
+                       (implicit PSW)]>;
+def UCMP64ri32 : RILI<0xC2E,
+                      (outs), (ins GR64:$src1, i64i32imm:$src2),
+                      "clgfi\t$src1, $src2",
+                      [(SystemZucmp GR64:$src1, i64immZExt32:$src2),
+                       (implicit PSW)]>;
+
+def UCMP32rm  : RXI<0x55,
+                    (outs), (ins GR32:$src1, rriaddr12:$src2),
+                    "cl\t$src1, $src2",
+                    [(SystemZucmp GR32:$src1, (load rriaddr12:$src2)),
+                     (implicit PSW)]>;
+def UCMP32rmy : RXYI<0xE355,
+                     (outs), (ins GR32:$src1, rriaddr:$src2),
+                     "cly\t$src1, $src2",
+                     [(SystemZucmp GR32:$src1, (load rriaddr:$src2)),
+                      (implicit PSW)]>;
+def UCMP64rm  : RXYI<0xE351,
+                     (outs), (ins GR64:$src1, rriaddr:$src2),
+                     "clg\t$src1, $src2",
+                     [(SystemZucmp GR64:$src1, (load rriaddr:$src2)),
+                      (implicit PSW)]>;
+
+def CMPSX64rr32  : RREI<0xB930,
+                        (outs), (ins GR64:$src1, GR32:$src2),
+                        "cgfr\t$src1, $src2",
+                        [(SystemZucmp GR64:$src1, (sext GR32:$src2)),
+                         (implicit PSW)]>;
+def UCMPZX64rr32 : RREI<0xB931,
+                        (outs), (ins GR64:$src1, GR32:$src2),
+                        "clgfr\t$src1, $src2",
+                        [(SystemZucmp GR64:$src1, (zext GR32:$src2)),
+                         (implicit PSW)]>;
+
+def CMPSX64rm32   : RXYI<0xE330,
+                         (outs), (ins GR64:$src1, rriaddr:$src2),
+                         "cgf\t$src1, $src2",
+                         [(SystemZucmp GR64:$src1, (sextloadi64i32 rriaddr:$src2)),
+                          (implicit PSW)]>;
+def UCMPZX64rm32  : RXYI<0xE331,
+                         (outs), (ins GR64:$src1, rriaddr:$src2),
+                         "clgf\t$src1, $src2",
+                         [(SystemZucmp GR64:$src1, (zextloadi64i32 rriaddr:$src2)),
+                          (implicit PSW)]>;
+
+// FIXME: Add other crazy ucmp forms
+
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Other crazy stuff
+let Defs = [PSW] in {
+def FLOGR64 : RREI<0xB983,
+                   (outs GR128:$dst), (ins GR64:$src),
+                   "flogr\t{$dst, $src}",
+                   []>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+// ConstPools, JumpTables
+def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
+def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
+
+// anyext
+def : Pat<(i64 (anyext GR32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// calls
+def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
+def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// FIXME: use add/sub tricks with 32678/-32768
+
+// Arbitrary immediate support.
+def : Pat<(i32 imm:$src),
+          (EXTRACT_SUBREG (MOV64ri32 (i64 imm:$src)), subreg_32bit)>;
+
+// Implement in terms of LLIHF/OILF.
+def : Pat<(i64 imm:$imm),
+          (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// extload patterns
+def : Pat<(extloadi32i8  rriaddr:$src), (MOVZX32rm8  rriaddr:$src)>;
+def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i8  rriaddr:$src), (MOVZX64rm8  rriaddr:$src)>;
+def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
+
+// muls
+def : Pat<(mulhs GR32:$src1, GR32:$src2),
+          (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+                                                   GR32:$src1, subreg_odd32),
+                                    GR32:$src2),
+                          subreg_even32)>;
+
+def : Pat<(mulhu GR32:$src1, GR32:$src2),
+          (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+                                                    GR32:$src1, subreg_odd32),
+                                     GR32:$src2),
+                          subreg_even32)>;
+def : Pat<(mulhu GR64:$src1, GR64:$src2),
+          (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+                                                     GR64:$src1, subreg_odd),
+                                      GR64:$src2),
+                          subreg_even)>;
+
+def : Pat<(ctlz GR64:$src),
+          (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
new file mode 100644
index 000000000000..8ea11c95b27d
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -0,0 +1,26 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SystemZMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
+  AlignmentIsInBytes = true;
+
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+  PCSymbol = ".";
+
+  NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits";
+}
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h
new file mode 100644
index 000000000000..3bebcb74e37c
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h
@@ -0,0 +1,29 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SystemZMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct SystemZMCAsmInfo : public MCAsmInfo {
+    explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 000000000000..e47d41962ea8
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,50 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares SystemZ-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private SystemZ target-specific information for each MachineFunction.
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// LowReg - Low register of range of callee-saved registers to store.
+  unsigned LowReg;
+
+  /// HighReg - High register of range of callee-saved registers to store.
+  unsigned HighReg;
+public:
+  SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+  SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  unsigned getLowReg() const { return LowReg; }
+  void setLowReg(unsigned Reg) { LowReg = Reg; }
+
+  unsigned getHighReg() const { return HighReg; }
+  void setHighReg(unsigned Reg) { HighReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 000000000000..156cace9c374
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,306 @@
+//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various SystemZ instruction operands.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff.
+//===----------------------------------------------------------------------===//
+
+// SystemZ specific condition code. These correspond to CondCode in
+// SystemZ.h. They must be kept in synch.
+def SYSTEMZ_COND_O   : PatLeaf<(i8 0)>;
+def SYSTEMZ_COND_H   : PatLeaf<(i8 1)>;
+def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
+def SYSTEMZ_COND_L   : PatLeaf<(i8 3)>;
+def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
+def SYSTEMZ_COND_LH  : PatLeaf<(i8 5)>;
+def SYSTEMZ_COND_NE  : PatLeaf<(i8 6)>;
+def SYSTEMZ_COND_E   : PatLeaf<(i8 7)>;
+def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
+def SYSTEMZ_COND_HE  : PatLeaf<(i8 9)>;
+def SYSTEMZ_COND_NL  : PatLeaf<(i8 10)>;
+def SYSTEMZ_COND_LE  : PatLeaf<(i8 11)>;
+def SYSTEMZ_COND_NH  : PatLeaf<(i8 12)>;
+def SYSTEMZ_COND_NO  : PatLeaf<(i8 13)>;
+
+def LO8 : SDNodeXForm<imm, [{
+  // Transformation function: return low 8 bits.
+  return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
+}]>;
+
+def LL16 : SDNodeXForm<imm, [{
+  // Transformation function: return low 16 bits.
+  return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
+}]>;
+
+def LH16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 16-31.
+  return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
+}]>;
+
+def HL16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 32-47.
+  return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
+}]>;
+
+def HH16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 48-63.
+  return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
+}]>;
+
+def LO32 : SDNodeXForm<imm, [{
+  // Transformation function: return low 32 bits.
+  return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
+}]>;
+
+def HI32 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 32-63.
+  return getI32Imm(N->getZExtValue() >> 32);
+}]>;
+
+def i32ll16 : PatLeaf<(i32 imm), [{
+  // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16 : PatLeaf<(i32 imm), [{
+  // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i32ll16c : PatLeaf<(i32 imm), [{
+  // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
+  return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16c : PatLeaf<(i32 imm), [{
+  // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
+  //  bits set.
+  return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64ll16 : PatLeaf<(i64 imm), [{  
+  // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16 : PatLeaf<(i64 imm), [{  
+  // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16 : PatLeaf<(i64 imm), [{  
+  // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
+  return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16 : PatLeaf<(i64 imm), [{  
+  // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
+  return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
+}], HH16>;
+
+def i64ll16c : PatLeaf<(i64 imm), [{  
+  // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16c : PatLeaf<(i64 imm), [{  
+  // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16c : PatLeaf<(i64 imm), [{  
+  // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
+  return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16c : PatLeaf<(i64 imm), [{  
+  // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
+  return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
+}], HH16>;
+
+def immSExt16 : PatLeaf<(imm), [{
+  // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
+  // field.
+  if (N->getValueType(0) == MVT::i64) {
+    uint64_t val = N->getZExtValue();
+    return ((int64_t)val == (int16_t)val);
+  } else if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = N->getZExtValue();
+    return ((int32_t)val == (int16_t)val);
+  }
+
+  return false;
+}], LL16>;
+
+def immSExt32 : PatLeaf<(i64 imm), [{
+  // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
+  // field.
+  uint64_t val = N->getZExtValue();
+  return ((int64_t)val == (int32_t)val);
+}], LO32>;
+
+def i64lo32 : PatLeaf<(i64 imm), [{
+  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+  // bits set.
+  return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32 : PatLeaf<(i64 imm), [{
+  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+  return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], HI32>;
+
+def i64lo32c : PatLeaf<(i64 imm), [{
+  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+  // bits set.
+  return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32c : PatLeaf<(i64 imm), [{
+  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+  return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], HI32>;
+
+def i32immSExt8  : PatLeaf<(i32 imm), [{
+  // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+  // sign extended field.
+  return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}], LO8>;
+
+def i32immSExt16 : PatLeaf<(i32 imm), [{
+  // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
+  // sign extended field.
+  return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
+}], LL16>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+  // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // sign extended field.
+  return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}], LO32>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // zero extended field.
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}], LO32>;
+
+// extloads
+def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8  node:$ptr))>;
+def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8  node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+def sextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (sextloadi8  node:$ptr))>;
+def sextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (sextloadi8  node:$ptr))>;
+def sextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (zextloadi8  node:$ptr))>;
+def zextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (zextloadi8  node:$ptr))>;
+def zextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+// A couple of more descriptive operand definitions.
+// 32-bits but only 8 bits are significant.
+def i32i8imm  : Operand<i32>;
+// 32-bits but only 16 bits are significant.
+def i32i16imm : Operand<i32>;
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Unsigned i12
+def u12imm : Operand<i32> {
+  let PrintMethod = "printU12ImmOperand";
+}
+def u12imm64 : Operand<i64> {
+  let PrintMethod = "printU12ImmOperand";
+}
+
+// Signed i16
+def s16imm : Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def s16imm64 : Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+// Signed i20
+def s20imm : Operand<i32> {
+  let PrintMethod = "printS20ImmOperand";
+}
+def s20imm64 : Operand<i64> {
+  let PrintMethod = "printS20ImmOperand";
+}
+// Signed i32
+def s32imm : Operand<i32> {
+  let PrintMethod = "printS32ImmOperand";
+}
+def s32imm64 : Operand<i64> {
+  let PrintMethod = "printS32ImmOperand";
+}
+
+def imm_pcrel : Operand<i64> {
+  let PrintMethod = "printPCRelImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+
+// riaddr := reg + imm
+def riaddr32 : Operand<i64>,
+               ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
+}
+
+def riaddr12 : Operand<i64>,
+               ComplexPattern<i64, 2, "SelectAddrRI12", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
+}
+
+def riaddr : Operand<i64>,
+             ComplexPattern<i64, 2, "SelectAddrRI", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
+}
+
+//===----------------------------------------------------------------------===//
+
+// rriaddr := reg + reg + imm
+def rriaddr12 : Operand<i64>,
+                ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
+}
+def rriaddr : Operand<i64>,
+              ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
+def laaddr : Operand<i64>,
+             ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 000000000000..38460a63712d
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,343 @@
+//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+                                         const SystemZInstrInfo &tii)
+  : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
+    TM(tm), TII(tii) {
+}
+
+const unsigned*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    SystemZ::R6D,  SystemZ::R7D,  SystemZ::R8D,  SystemZ::R9D,
+    SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+    SystemZ::R14D, SystemZ::R15D,
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+    &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
+    &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
+    &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
+    &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
+    &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
+    &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
+    &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
+    &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
+    &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0
+  };
+  return CalleeSavedRegClasses;
+}
+
+BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  if (hasFP(MF))
+    Reserved.set(SystemZ::R11D);
+  Reserved.set(SystemZ::R14D);
+  Reserved.set(SystemZ::R15D);
+  return Reserved;
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register.  This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+void SystemZRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MBB.erase(I);
+}
+
+int SystemZRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+  uint64_t StackSize = MFI->getStackSize();
+
+  // Fixed objects are really located in the "previous" frame.
+  if (FI < 0)
+    StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  Offset += StackSize - TFI.getOffsetOfLocalArea();
+
+  // Skip the register save area if we generated the stack frame.
+  if (StackSize || MFI->hasCalls())
+    Offset -= TFI.getOffsetOfLocalArea();
+
+  return Offset;
+}
+
+unsigned
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                         int SPAdj, int *Value,
+                                         RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unxpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  unsigned BasePtr = (hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+
+  // This must be part of a rri or ri operand memory reference.  Replace the
+  // FrameIndex with base register with BasePtr.  Add an offset to the
+  // displacement field.
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+  // Offset is a either 12-bit unsigned or 20-bit signed integer.
+  // FIXME: handle "too long" displacements.
+  int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+
+  // Check whether displacement is too long to fit into 12 bit zext field.
+  MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
+
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
+  return 0;
+}
+
+void
+SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // Determine whether R15/R14 will ever be clobbered inside the function. And
+  // if yes - mark it as 'callee' saved.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Check whether high FPRs are ever used, if yes - we need to save R15 as
+  // well.
+  static const unsigned HighFPRs[] = {
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
+    SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+  };
+
+  bool HighFPRsUsed = false;
+  for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+    HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+  if (FFI->hasCalls())
+    /* FIXME: function is varargs */
+    /* FIXME: function grabs RA */
+    /* FIXME: function calls eh_return */
+    MRI.setPhysRegUsed(SystemZ::R14D);
+
+  if (HighFPRsUsed ||
+      FFI->hasCalls() ||
+      FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+      FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+      /* FIXME: function is varargs */)
+    MRI.setPhysRegUsed(SystemZ::R15D);
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  int64_t NumBytes, const TargetInstrInfo &TII) {
+  unsigned Opc; uint64_t Chunk;
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+  if (Offset >= (1LL << 15) - 1) {
+    Opc = SystemZ::ADD64ri32;
+    Chunk = (1LL << 31) - 1;
+  } else {
+    Opc = SystemZ::ADD64ri16;
+    Chunk = (1LL << 15) - 1;
+  }
+
+  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+                 DebugLoc::getUnknownLoc());
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+      .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal));
+    // The PSW implicit def is dead.
+    MI->getOperand(3).setIsDead();
+    Offset -= ThisVal;
+  }
+}
+
+void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+                 DebugLoc::getUnknownLoc());
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize = MFI->getStackSize();
+  StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == SystemZ::MOV64mr ||
+          MBBI->getOpcode() == SystemZ::MOV64mrm))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  // adjust stack pointer: R15 -= numbytes
+  if (StackSize || MFI->hasCalls()) {
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+  }
+
+  if (hasFP(MF)) {
+    // Update R11 with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+      .addReg(SystemZ::R15D);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(SystemZ::R11D);
+
+  }
+}
+
+void SystemZRegisterInfo::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  switch (RetOpcode) {
+  case SystemZ::RET: break;  // These are ok
+  default:
+    assert(0 && "Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize =
+    MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea();
+
+  // Skip the final terminator instruction.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    --MBBI;
+    if (!PI->getDesc().isTerminator())
+      break;
+  }
+
+  // During callee-saved restores emission stack frame was not yet finialized
+  // (and thus - the stack size was unknown). Tune the offset having full stack
+  // size in hands.
+  if (StackSize || MFI->hasCalls()) {
+    assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+            MBBI->getOpcode() == SystemZ::MOV64rm) &&
+           "Expected to see callee-save register restore code");
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+
+    unsigned i = 0;
+    MachineInstr &MI = *MBBI;
+    while (!MI.getOperand(i).isImm()) {
+      ++i;
+      assert(i < MI.getNumOperands() && "Unexpected restore code!");
+    }
+
+    uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+    // If Offset does not fit into 20-bit signed displacement field we need to
+    // emit some additional code...
+    if (Offset > 524287) {
+      // Fold the displacement into load instruction as much as possible.
+      NumBytes = Offset - 524287;
+      Offset = 524287;
+      emitSPUpdate(MBB, MBBI, NumBytes, TII);
+    }
+
+    MI.getOperand(i).ChangeToImmediate(Offset);
+  }
+}
+
+unsigned SystemZRegisterInfo::getRARegister() const {
+  assert(0 && "What is the return address register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+  assert(0 && "What is the frame register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
+  assert(0 && "What is the exception register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
+  assert(0 && "What is the exception handler register");
+  return 0;
+}
+
+int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  assert(0 && "What is the dwarf register number");
+  return -1;
+}
+
+#include "SystemZGenRegisterInfo.inc"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 000000000000..b22b05da401e
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,82 @@
+//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "SystemZGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+namespace SystemZ {
+  /// SubregIndex - The index of various sized subregister classes. Note that
+  /// these indices must be kept in sync with the class indices in the
+  /// SystemZRegisterInfo.td file.
+  enum SubregIndex {
+    SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2
+  };
+}
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+class Type;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+  SystemZTargetMachine &TM;
+  const SystemZInstrInfo &TII;
+
+  SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  const TargetRegisterClass* const* getCalleeSavedRegClasses(
+                                     const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
+  bool hasFP(const MachineFunction &MF) const;
+
+  int getFrameIndexOffset(MachineFunction &MF, int FI) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
+
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 000000000000..8795847a6c3c
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,490 @@
+//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+  let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  let Namespace = "SystemZ";
+}
+
+// We identify all our registers with a 4-bit ID, for consistency's sake.
+
+// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<4> num, string n> : SystemZReg<n> {
+  field bits<4> Num = num;
+}
+
+// GPR64 - One of the 16 64-bit general-purpose registers
+class GPR64<bits<4> num, string n, list<Register> subregs,
+            list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+  let Aliases = aliases;
+}
+
+// GPR128 - 8 even-odd register pairs
+class GPR128<bits<4> num, string n, list<Register> subregs,
+             list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+  let Aliases = aliases;
+}
+
+// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPRS<bits<4> num, string n> : SystemZReg<n> {
+  field bits<4> Num = num;
+}
+
+// FPRL - One of the 16 64-bit floating-point registers
+class FPRL<bits<4> num, string n, list<Register> subregs>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+}
+
+// General-purpose registers
+def R0W  : GPR32< 0,  "r0">, DwarfRegNum<[0]>;
+def R1W  : GPR32< 1,  "r1">, DwarfRegNum<[1]>;
+def R2W  : GPR32< 2,  "r2">, DwarfRegNum<[2]>;
+def R3W  : GPR32< 3,  "r3">, DwarfRegNum<[3]>;
+def R4W  : GPR32< 4,  "r4">, DwarfRegNum<[4]>;
+def R5W  : GPR32< 5,  "r5">, DwarfRegNum<[5]>;
+def R6W  : GPR32< 6,  "r6">, DwarfRegNum<[6]>;
+def R7W  : GPR32< 7,  "r7">, DwarfRegNum<[7]>;
+def R8W  : GPR32< 8,  "r8">, DwarfRegNum<[8]>;
+def R9W  : GPR32< 9,  "r9">, DwarfRegNum<[9]>;
+def R10W : GPR32<10, "r10">, DwarfRegNum<[10]>;
+def R11W : GPR32<11, "r11">, DwarfRegNum<[11]>;
+def R12W : GPR32<12, "r12">, DwarfRegNum<[12]>;
+def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>;
+def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>;
+def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>;
+
+def R0D  : GPR64< 0,  "r0", [R0W]>,  DwarfRegNum<[0]>;
+def R1D  : GPR64< 1,  "r1", [R1W]>,  DwarfRegNum<[1]>;
+def R2D  : GPR64< 2,  "r2", [R2W]>,  DwarfRegNum<[2]>;
+def R3D  : GPR64< 3,  "r3", [R3W]>,  DwarfRegNum<[3]>;
+def R4D  : GPR64< 4,  "r4", [R4W]>,  DwarfRegNum<[4]>;
+def R5D  : GPR64< 5,  "r5", [R5W]>,  DwarfRegNum<[5]>;
+def R6D  : GPR64< 6,  "r6", [R6W]>,  DwarfRegNum<[6]>;
+def R7D  : GPR64< 7,  "r7", [R7W]>,  DwarfRegNum<[7]>;
+def R8D  : GPR64< 8,  "r8", [R8W]>,  DwarfRegNum<[8]>;
+def R9D  : GPR64< 9,  "r9", [R9W]>,  DwarfRegNum<[9]>;
+def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
+def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
+def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
+def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
+def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
+def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+
+// Register pairs
+def R0P  : GPR64< 0,  "r0", [R0W,  R1W],  [R0D,  R1D]>,  DwarfRegNum<[0]>;
+def R2P  : GPR64< 2,  "r2", [R2W,  R3W],  [R2D,  R3D]>,  DwarfRegNum<[2]>;
+def R4P  : GPR64< 4,  "r4", [R4W,  R5W],  [R4D,  R5D]>,  DwarfRegNum<[4]>;
+def R6P  : GPR64< 6,  "r6", [R6W,  R7W],  [R6D,  R7D]>,  DwarfRegNum<[6]>;
+def R8P  : GPR64< 8,  "r8", [R8W,  R9W],  [R8D,  R9D]>,  DwarfRegNum<[8]>;
+def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>;
+def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>;
+def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
+
+def R0Q  : GPR128< 0,  "r0", [R0D,  R1D],  [R0P]>,  DwarfRegNum<[0]>;
+def R2Q  : GPR128< 2,  "r2", [R2D,  R3D],  [R2P]>,  DwarfRegNum<[2]>;
+def R4Q  : GPR128< 4,  "r4", [R4D,  R5D],  [R4P]>,  DwarfRegNum<[4]>;
+def R6Q  : GPR128< 6,  "r6", [R6D,  R7D],  [R6P]>,  DwarfRegNum<[6]>;
+def R8Q  : GPR128< 8,  "r8", [R8D,  R9D],  [R8P]>,  DwarfRegNum<[8]>;
+def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>;
+def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>;
+def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>;
+
+// Floating-point registers
+def F0S  : FPRS< 0,  "f0">, DwarfRegNum<[16]>;
+def F1S  : FPRS< 1,  "f1">, DwarfRegNum<[17]>;
+def F2S  : FPRS< 2,  "f2">, DwarfRegNum<[18]>;
+def F3S  : FPRS< 3,  "f3">, DwarfRegNum<[19]>;
+def F4S  : FPRS< 4,  "f4">, DwarfRegNum<[20]>;
+def F5S  : FPRS< 5,  "f5">, DwarfRegNum<[21]>;
+def F6S  : FPRS< 6,  "f6">, DwarfRegNum<[22]>;
+def F7S  : FPRS< 7,  "f7">, DwarfRegNum<[23]>;
+def F8S  : FPRS< 8,  "f8">, DwarfRegNum<[24]>;
+def F9S  : FPRS< 9,  "f9">, DwarfRegNum<[25]>;
+def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
+def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
+def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
+def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
+def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
+def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+
+def F0L  : FPRL< 0,  "f0", [F0S]>,  DwarfRegNum<[16]>;
+def F1L  : FPRL< 1,  "f1", [F1S]>,  DwarfRegNum<[17]>;
+def F2L  : FPRL< 2,  "f2", [F2S]>,  DwarfRegNum<[18]>;
+def F3L  : FPRL< 3,  "f3", [F3S]>,  DwarfRegNum<[19]>;
+def F4L  : FPRL< 4,  "f4", [F4S]>,  DwarfRegNum<[20]>;
+def F5L  : FPRL< 5,  "f5", [F5S]>,  DwarfRegNum<[21]>;
+def F6L  : FPRL< 6,  "f6", [F6S]>,  DwarfRegNum<[22]>;
+def F7L  : FPRL< 7,  "f7", [F7S]>,  DwarfRegNum<[23]>;
+def F8L  : FPRL< 8,  "f8", [F8S]>,  DwarfRegNum<[24]>;
+def F9L  : FPRL< 9,  "f9", [F9S]>,  DwarfRegNum<[25]>;
+def F10L : FPRL<10, "f10", [F10S]>, DwarfRegNum<[26]>;
+def F11L : FPRL<11, "f11", [F11S]>, DwarfRegNum<[27]>;
+def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>;
+def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>;
+def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>;
+def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>;
+
+// Status register
+def PSW : SystemZReg<"psw">;
+
+def subreg_32bit  : PatLeaf<(i32 1)>;
+def subreg_even32 : PatLeaf<(i32 1)>;
+def subreg_odd32  : PatLeaf<(i32 2)>;
+def subreg_even   : PatLeaf<(i32 3)>;
+def subreg_odd    : PatLeaf<(i32 4)>;
+
+def : SubRegSet<1, [R0D, R1D,  R2D,  R3D,  R4D,  R5D,  R6D,  R7D,
+                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+                   [R0W, R1W,  R2W,  R3W,  R4W,  R5W,  R6W,  R7W,
+                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+
+def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
+                   [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>;
+
+def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
+                   [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>;
+
+def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
+                   [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
+
+def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
+                   [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
+
+def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
+                   [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
+
+def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
+                   [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
+
+/// Register classes
+def GR32 : RegisterClass<"SystemZ", [i32], 32,
+   // Volatile registers
+  [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
+   // Frame pointer, sometimes allocable
+   R11W,
+   // Volatile, but not allocable
+   R14W, R15W]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG32[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, SystemZ::R11W,
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    static const unsigned SystemZ_REG32_nofp[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, /* No R11W */
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    GR32Class::iterator
+    GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG32_nofp;
+      else
+        return SystemZ_REG32;
+    }
+    GR32Class::iterator
+    GR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
+    }
+  }];
+}
+
+/// Registers used to generate address. Everything except R0.
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
+   // Volatile registers
+  [R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
+   // Frame pointer, sometimes allocable
+   R11W,
+   // Volatile, but not allocable
+   R14W, R15W]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_ADDR32[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, SystemZ::R11W,
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    static const unsigned SystemZ_ADDR32_nofp[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, /* No R11W */
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    ADDR32Class::iterator
+    ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_ADDR32_nofp;
+      else
+        return SystemZ_ADDR32;
+    }
+    ADDR32Class::iterator
+    ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
+    }
+  }];
+}
+
+def GR64 : RegisterClass<"SystemZ", [i64], 64,
+   // Volatile registers
+  [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
+   // Frame pointer, sometimes allocable
+   R11D,
+   // Volatile, but not allocable
+   R14D, R15D]>
+{
+  let SubRegClassList = [GR32];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG64[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, SystemZ::R11D,
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    static const unsigned SystemZ_REG64_nofp[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, /* No R11D */
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    GR64Class::iterator
+    GR64Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG64_nofp;
+      else
+        return SystemZ_REG64;
+    }
+    GR64Class::iterator
+    GR64Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
+    }
+  }];
+}
+
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
+   // Volatile registers
+  [R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
+   // Frame pointer, sometimes allocable
+   R11D,
+   // Volatile, but not allocable
+   R14D, R15D]>
+{
+  let SubRegClassList = [ADDR32];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_ADDR64[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, SystemZ::R11D,
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    static const unsigned SystemZ_ADDR64_nofp[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, /* No R11D */
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    ADDR64Class::iterator
+    ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_ADDR64_nofp;
+      else
+        return SystemZ_ADDR64;
+    }
+    ADDR64Class::iterator
+    ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
+    }
+  }];
+}
+
+// Even-odd register pairs
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
+  [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
+{
+  let SubRegClassList = [GR32, GR32];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG64P[] = {
+      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, SystemZ::R10P,
+      SystemZ::R8P,  SystemZ::R6P };
+    static const unsigned SystemZ_REG64P_nofp[] = {
+      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, /* NO R10P */
+      SystemZ::R8P,  SystemZ::R6P };
+    GR64PClass::iterator
+    GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG64P_nofp;
+      else
+        return SystemZ_REG64P;
+    }
+    GR64PClass::iterator
+    GR64PClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
+    }
+  }];
+}
+
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
+  [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
+{
+  let SubRegClassList = [GR32, GR32, GR64, GR64];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG128[] = {
+      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q,  SystemZ::R10Q,
+      SystemZ::R8Q,  SystemZ::R6Q };
+    static const unsigned SystemZ_REG128_nofp[] = {
+      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q, /* NO R10Q */
+      SystemZ::R8Q,  SystemZ::R6Q };
+    GR128Class::iterator
+    GR128Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG128_nofp;
+      else
+        return SystemZ_REG128;
+    }
+    GR128Class::iterator
+    GR128Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
+    }
+  }];
+}
+
+def FP32 : RegisterClass<"SystemZ", [f32], 32,
+ [F0S, F1S,  F2S,  F3S,  F4S,  F5S,  F6S,  F7S,
+  F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REGFP32[] = {
+      SystemZ::F0S,  SystemZ::F2S,  SystemZ::F4S,  SystemZ::F6S,
+      SystemZ::F1S,  SystemZ::F3S,  SystemZ::F5S,  SystemZ::F7S,
+      SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
+      SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S };
+    FP32Class::iterator
+    FP32Class::allocation_order_begin(const MachineFunction &MF) const {
+      return SystemZ_REGFP32;
+    }
+    FP32Class::iterator
+    FP32Class::allocation_order_end(const MachineFunction &MF) const {
+      return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned));
+    }
+  }];
+}
+
+def FP64 : RegisterClass<"SystemZ", [f64], 64,
+ [F0L, F1L,  F2L,  F3L,  F4L,  F5L,  F6L,  F7L, 
+  F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
+  let SubRegClassList = [FP32];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REGFP64[] = {
+      SystemZ::F0L,  SystemZ::F2L,  SystemZ::F4L,  SystemZ::F6L,
+      SystemZ::F1L,  SystemZ::F3L,  SystemZ::F5L,  SystemZ::F7L,
+      SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+      SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L };
+    FP64Class::iterator
+    FP64Class::allocation_order_begin(const MachineFunction &MF) const {
+      return SystemZ_REGFP64;
+    }
+    FP64Class::iterator
+    FP64Class::allocation_order_end(const MachineFunction &MF) const {
+      return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned));
+    }
+  }];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"SystemZ", [i64], 64, [PSW]> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 000000000000..a8b5e1f18679
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,47 @@
+//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZ specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "SystemZ.h"
+#include "SystemZGenSubtarget.inc"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT, 
+                                   const std::string &FS):
+  HasZ10Insts(false) {
+  std::string CPU = "z9";
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
+
+/// True if accessing the GV requires an extra load.
+bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+                                           const TargetMachine& TM,
+                                           bool isDirectCall) const {
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    // Extra load is needed for all externally visible.
+    if (isDirectCall)
+      return false;
+
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return false;
+
+    return true;
+  }
+
+  return false;
+}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 000000000000..405d6e91b7ee
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,45 @@
+//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
+#define LLVM_TARGET_SystemZ_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class GlobalValue;
+class TargetMachine;
+
+class SystemZSubtarget : public TargetSubtarget {
+  bool HasZ10Insts;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  SystemZSubtarget(const std::string &TT, const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool isZ10() const { return HasZ10Insts; }
+
+  bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+                           bool isDirectCall) const;
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 000000000000..990e0031c5ec
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,44 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZ.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+  // Register the target.
+  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+  RegisterAsmInfo<SystemZMCAsmInfo> Y(TheSystemZTarget);
+}
+
+/// SystemZTargetMachine ctor - Create an ILP64 architecture model
+///
+SystemZTargetMachine::SystemZTargetMachine(const Target &T,
+                                           const std::string &TT,
+                                           const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
+    DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+               "-f64:64:64-f128:128:128-a0:16:16"),
+    InstrInfo(*this), TLInfo(*this),
+    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
+
+  if (getRelocationModel() == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+}
+
+bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createSystemZISelDag(*this, OptLevel));
+  return false;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 000000000000..551aeb5a3e47
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,61 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+
+#include "SystemZInstrInfo.h"
+#include "SystemZISelLowering.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// SystemZTargetMachine
+///
+class SystemZTargetMachine : public LLVMTargetMachine {
+  SystemZSubtarget        Subtarget;
+  const TargetData        DataLayout;       // Calculates type size & alignment
+  SystemZInstrInfo        InstrInfo;
+  SystemZTargetLowering   TLInfo;
+
+  // SystemZ does not have any call stack frame, therefore not having
+  // any SystemZ specific FrameInfo class.
+  TargetFrameInfo       FrameInfo;
+public:
+  SystemZTargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS);
+
+  virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+  virtual const SystemZInstrInfo *getInstrInfo() const  { return &InstrInfo; }
+  virtual const TargetData *getTargetData() const     { return &DataLayout;}
+  virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  virtual const SystemZRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual SystemZTargetLowering *getTargetLowering() const {
+    return const_cast<SystemZTargetLowering*>(&TLInfo);
+  }
+
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // SystemZTargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..743d8d322d05
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZInfo
+  SystemZTargetInfo.cpp
+  )
+
+add_dependencies(LLVMSystemZInfo SystemZCodeGenTable_gen)
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
new file mode 100644
index 000000000000..0be80eb4e6ad
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 000000000000..8272b1188201
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+  RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index ed544b73eaed..cc6be9fa7abb 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -41,7 +41,7 @@ unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
 }
 
 LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
-  return wrap(unwrap(TD)->getIntPtrType());
+  return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
 }
 
 unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 7b843df7422d..5bcd6583635b 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
@@ -155,13 +156,13 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =
  <br><br>
  <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type
  alignment. Type is
- one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector (aka
- packed) or aggregate.  Size indicates the size, e.g., 32 or 64 bits.
+ one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector, or
+ aggregate.  Size indicates the size, e.g., 32 or 64 bits.
  \p
- The default string, fully specified is:
+ The default string, fully specified, is:
  <br><br>
- "E-p:64:64:64-a0:0:0-f32:32:32-f64:0:64"
- "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:0:64"
+ "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64"
  "-v64:64:64-v128:128:128"
  <br><br>
  Note that in the case of aggregates, 0 is the default ABI and preferred
@@ -171,6 +172,7 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =
 void TargetData::init(const std::string &TargetDescription) {
   std::string temp = TargetDescription;
   
+  LayoutMap = 0;
   LittleEndian = false;
   PointerMemSize = 8;
   PointerABIAlign   = 8;
@@ -184,9 +186,9 @@ void TargetData::init(const std::string &TargetDescription) {
   setAlignment(INTEGER_ALIGN,   4,  8, 64);  // i64
   setAlignment(FLOAT_ALIGN,     4,  4, 32);  // float
   setAlignment(FLOAT_ALIGN,     8,  8, 64);  // double
-  setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32
+  setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32, v1i64, ...
   setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
-  setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct, union, class, ...
+  setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
 
   while (!temp.empty()) {
     std::string token = getToken(temp, "-");
@@ -316,61 +318,30 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
                  : Alignments[BestMatchIdx].PrefAlign;
 }
 
-namespace {
-
-/// LayoutInfo - The lazy cache of structure layout information maintained by
-/// TargetData.  Note that the struct types must have been free'd before
-/// llvm_shutdown is called (and thus this is deallocated) because all the
-/// targets with cached elements should have been destroyed.
-///
-typedef std::pair<const TargetData*,const StructType*> LayoutKey;
-
-struct DenseMapLayoutKeyInfo {
-  static inline LayoutKey getEmptyKey() { return LayoutKey(0, 0); }
-  static inline LayoutKey getTombstoneKey() {
-    return LayoutKey((TargetData*)(intptr_t)-1, 0);
-  }
-  static unsigned getHashValue(const LayoutKey &Val) {
-    return DenseMapInfo<void*>::getHashValue(Val.first) ^
-           DenseMapInfo<void*>::getHashValue(Val.second);
-  }
-  static bool isEqual(const LayoutKey &LHS, const LayoutKey &RHS) {
-    return LHS == RHS;
-  }
-
-  static bool isPod() { return true; }
-};
-
-typedef DenseMap<LayoutKey, StructLayout*, DenseMapLayoutKeyInfo> LayoutInfoTy;
-
-}
-
-static ManagedStatic<LayoutInfoTy> LayoutInfo;
-static ManagedStatic<sys::SmartMutex<true> > LayoutLock;
+typedef DenseMap<const StructType*, StructLayout*>LayoutInfoTy;
 
 TargetData::~TargetData() {
-  if (!LayoutInfo.isConstructed())
+  if (!LayoutMap)
     return;
   
-  sys::SmartScopedLock<true> Lock(&*LayoutLock);
   // Remove any layouts for this TD.
-  LayoutInfoTy &TheMap = *LayoutInfo;
+  LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap);
   for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) {
-    if (I->first.first == this) {
-      I->second->~StructLayout();
-      free(I->second);
-      TheMap.erase(I++);
-    } else {
-      ++I;
-    }
+    I->second->~StructLayout();
+    free(I->second);
+    TheMap.erase(I++);
   }
+  
+  delete static_cast<LayoutInfoTy*>(LayoutMap);
 }
 
 const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
-  LayoutInfoTy &TheMap = *LayoutInfo;
+  if (!LayoutMap)
+    LayoutMap = static_cast<void*>(new LayoutInfoTy());
+  
+  LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap);
   
-  sys::SmartScopedLock<true> Lock(&*LayoutLock);
-  StructLayout *&SL = TheMap[LayoutKey(this, Ty)];
+  StructLayout *&SL = TheMap[Ty];
   if (SL) return SL;
 
   // Otherwise, create the struct layout.  Because it is variable length, we 
@@ -392,10 +363,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
 /// removed, this method must be called whenever a StructType is removed to
 /// avoid a dangling pointer in this cache.
 void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
-  if (!LayoutInfo.isConstructed()) return;  // No cache.
+  if (!LayoutMap) return;  // No cache.
   
-  sys::SmartScopedLock<true> Lock(&*LayoutLock);
-  LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty));
+  LayoutInfoTy* LayoutInfo = static_cast<LayoutInfoTy*>(LayoutMap);
+  LayoutInfoTy::iterator I = LayoutInfo->find(Ty);
   if (I == LayoutInfo->end()) return;
   
   I->second->~StructLayout();
@@ -453,7 +424,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
   case Type::VectorTyID:
     return cast<VectorType>(Ty)->getBitWidth();
   default:
-    assert(0 && "TargetData::getTypeSizeInBits(): Unsupported type");
+    llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
     break;
   }
   return 0;
@@ -508,7 +479,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
     AlignType = VECTOR_ALIGN;
     break;
   default:
-    assert(0 && "Bad type for getAlignment!!!");
+    llvm_unreachable("Bad type for getAlignment!!!");
     break;
   }
 
@@ -540,8 +511,8 @@ unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
 
 /// getIntPtrType - Return an unsigned integer type that is the same size or
 /// greater to the host pointer size.
-const IntegerType *TargetData::getIntPtrType() const {
-  return IntegerType::get(getPointerSizeInBits());
+const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+  return IntegerType::get(C, getPointerSizeInBits());
 }
 
 
@@ -555,7 +526,8 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
     TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
   for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
     if (const StructType *STy = dyn_cast<StructType>(*TI)) {
-      assert(Indices[CurIDX]->getType() == Type::Int32Ty &&
+      assert(Indices[CurIDX]->getType() ==
+             Type::getInt32Ty(ptrTy->getContext()) &&
              "Illegal struct idx");
       unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
 
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index ceaea0c2027c..094a57edb419 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -12,11 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Constant.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
+//===----------------------------------------------------------------------===//
+//  TargetOperandInfo
+//===----------------------------------------------------------------------===//
+
+/// getRegClass - Get the register class for the operand, handling resolution
+/// of "symbolic" pointer register classes etc.  If this is not a register
+/// operand, this returns null.
+const TargetRegisterClass *
+TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const {
+  if (isLookupPtrRegClass())
+    return TRI->getPointerRegClass(RegClass);
+  return TRI->getRegClass(RegClass);
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetInstrInfo
+//===----------------------------------------------------------------------===//
+
 TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
                                  unsigned numOpcodes)
   : Descriptors(Desc), NumOpcodes(numOpcodes) {
@@ -25,6 +43,14 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
 TargetInstrInfo::~TargetInstrInfo() {
 }
 
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, 
+                                 MachineBasicBlock::iterator MI) const {
+  llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+
 bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isTerminator()) return false;
@@ -37,14 +63,33 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   return !isPredicated(MI);
 }
 
-/// getInstrOperandRegClass - Return register class of the operand of an
-/// instruction of the specified TargetInstrDesc.
-const TargetRegisterClass*
-llvm::getInstrOperandRegClass(const TargetRegisterInfo *TRI,
-                        const TargetInstrDesc &II, unsigned Op) {
-  if (Op >= II.getNumOperands())
-    return NULL;
-  if (II.OpInfo[Op].isLookupPtrRegClass())
-    return TRI->getPointerRegClass();
-  return TRI->getRegClass(II.OpInfo[Op].RegClass);
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorChar or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorChar or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+                                             const MCAsmInfo &MAI) const {
+  
+  
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || *Str == MAI.getSeparatorChar())
+      atInsnStart = true;
+    if (atInsnStart && !isspace(*Str)) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+  
+  return Length;
 }
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
new file mode 100644
index 000000000000..c1aab9921fb2
--- /dev/null
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -0,0 +1,1089 @@
+//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                              Generic Code
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
+  TextSection = 0;
+  DataSection = 0;
+  BSSSection = 0;
+  ReadOnlySection = 0;
+  StaticCtorSection = 0;
+  StaticDtorSection = 0;
+  LSDASection = 0;
+  EHFrameSection = 0;
+
+  DwarfAbbrevSection = 0;
+  DwarfInfoSection = 0;
+  DwarfLineSection = 0;
+  DwarfFrameSection = 0;
+  DwarfPubNamesSection = 0;
+  DwarfPubTypesSection = 0;
+  DwarfDebugInlineSection = 0;
+  DwarfStrSection = 0;
+  DwarfLocSection = 0;
+  DwarfARangesSection = 0;
+  DwarfRangesSection = 0;
+  DwarfMacroInfoSection = 0;
+}
+
+TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+  Constant *C = GV->getInitializer();
+
+  // Must have zero initializer.
+  if (!C->isNullValue())
+    return false;
+
+  // Leave constant zeros in readonly constant sections, so they can be shared.
+  if (GV->isConstant())
+    return false;
+
+  // If the global has an explicit section specified, don't put it in BSS.
+  if (!GV->getSection().empty())
+    return false;
+
+  // If -nozero-initialized-in-bss is specified, don't ever use BSS.
+  if (NoZerosInBSS)
+    return false;
+
+  // Otherwise, put it in BSS!
+  return true;
+}
+
+/// IsNullTerminatedString - Return true if the specified constant (which is
+/// known to have a type that is an array of 1/2/4 byte elements) ends with a
+/// nul value and contains no other nuls in it.
+static bool IsNullTerminatedString(const Constant *C) {
+  const ArrayType *ATy = cast<ArrayType>(C->getType());
+
+  // First check: is we have constant array of i8 terminated with zero
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
+    if (ATy->getNumElements() == 0) return false;
+
+    ConstantInt *Null =
+      dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
+    if (Null == 0 || Null->getZExtValue() != 0)
+      return false; // Not null terminated.
+
+    // Verify that the null doesn't occur anywhere else in the string.
+    for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
+      // Reject constantexpr elements etc.
+      if (!isa<ConstantInt>(CVA->getOperand(i)) ||
+          CVA->getOperand(i) == Null)
+        return false;
+    return true;
+  }
+
+  // Another possibility: [1 x i8] zeroinitializer
+  if (isa<ConstantAggregateZero>(C))
+    return ATy->getNumElements() == 1;
+
+  return false;
+}
+
+/// getKindForGlobal - This is a top-level target-independent classifier for
+/// a global variable.  Given an global variable and information from TM, it
+/// classifies the global in a variety of ways that make various target
+/// implementations simpler.  The target implementation is free to ignore this
+/// extra info of course.
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+                                                       const TargetMachine &TM){
+  assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+         "Can only be used for global definitions");
+
+  Reloc::Model ReloModel = TM.getRelocationModel();
+
+  // Early exit - functions should be always in text sections.
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (GVar == 0)
+    return SectionKind::getText();
+
+  // Handle thread-local data first.
+  if (GVar->isThreadLocal()) {
+    if (isSuitableForBSS(GVar))
+      return SectionKind::getThreadBSS();
+    return SectionKind::getThreadData();
+  }
+
+  // Variable can be easily put to BSS section.
+  if (isSuitableForBSS(GVar))
+    return SectionKind::getBSS();
+
+  Constant *C = GVar->getInitializer();
+
+  // If the global is marked constant, we can put it into a mergable section,
+  // a mergable string section, or general .data if it contains relocations.
+  if (GVar->isConstant()) {
+    // If the initializer for the global contains something that requires a
+    // relocation, then we may have to drop this into a wriable data section
+    // even though it is marked const.
+    switch (C->getRelocationInfo()) {
+    default: llvm_unreachable("unknown relocation info kind");
+    case Constant::NoRelocation:
+      // If initializer is a null-terminated string, put it in a "cstring"
+      // section of the right width.
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+        if (const IntegerType *ITy =
+              dyn_cast<IntegerType>(ATy->getElementType())) {
+          if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
+               ITy->getBitWidth() == 32) &&
+              IsNullTerminatedString(C)) {
+            if (ITy->getBitWidth() == 8)
+              return SectionKind::getMergeable1ByteCString();
+            if (ITy->getBitWidth() == 16)
+              return SectionKind::getMergeable2ByteCString();
+
+            assert(ITy->getBitWidth() == 32 && "Unknown width");
+            return SectionKind::getMergeable4ByteCString();
+          }
+        }
+      }
+
+      // Otherwise, just drop it into a mergable constant section.  If we have
+      // a section for this size, use it, otherwise use the arbitrary sized
+      // mergable section.
+      switch (TM.getTargetData()->getTypeAllocSize(C->getType())) {
+      case 4:  return SectionKind::getMergeableConst4();
+      case 8:  return SectionKind::getMergeableConst8();
+      case 16: return SectionKind::getMergeableConst16();
+      default: return SectionKind::getMergeableConst();
+      }
+
+    case Constant::LocalRelocation:
+      // In static relocation model, the linker will resolve all addresses, so
+      // the relocation entries will actually be constants by the time the app
+      // starts up.  However, we can't put this into a mergable section, because
+      // the linker doesn't take relocations into consideration when it tries to
+      // merge entries in the section.
+      if (ReloModel == Reloc::Static)
+        return SectionKind::getReadOnly();
+
+      // Otherwise, the dynamic linker needs to fix it up, put it in the
+      // writable data.rel.local section.
+      return SectionKind::getReadOnlyWithRelLocal();
+
+    case Constant::GlobalRelocations:
+      // In static relocation model, the linker will resolve all addresses, so
+      // the relocation entries will actually be constants by the time the app
+      // starts up.  However, we can't put this into a mergable section, because
+      // the linker doesn't take relocations into consideration when it tries to
+      // merge entries in the section.
+      if (ReloModel == Reloc::Static)
+        return SectionKind::getReadOnly();
+
+      // Otherwise, the dynamic linker needs to fix it up, put it in the
+      // writable data.rel section.
+      return SectionKind::getReadOnlyWithRel();
+    }
+  }
+
+  // Okay, this isn't a constant.  If the initializer for the global is going
+  // to require a runtime relocation by the dynamic linker, put it into a more
+  // specific section to improve startup time of the app.  This coalesces these
+  // globals together onto fewer pages, improving the locality of the dynamic
+  // linker.
+  if (ReloModel == Reloc::Static)
+    return SectionKind::getDataNoRel();
+
+  switch (C->getRelocationInfo()) {
+  default: llvm_unreachable("unknown relocation info kind");
+  case Constant::NoRelocation:
+    return SectionKind::getDataNoRel();
+  case Constant::LocalRelocation:
+    return SectionKind::getDataRelLocal();
+  case Constant::GlobalRelocations:
+    return SectionKind::getDataRel();
+  }
+}
+
+/// SectionForGlobal - This method computes the appropriate section to emit
+/// the specified global variable or function definition.  This should not
+/// be passed external (or available externally) globals.
+const MCSection *TargetLoweringObjectFile::
+SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang,
+                 const TargetMachine &TM) const {
+  // Select section name.
+  if (GV->hasSection())
+    return getExplicitSectionGlobal(GV, Kind, Mang, TM);
+
+
+  // Use default section depending on the 'type' of global
+  return SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+
+// Lame default implementation. Calculate the section name for global.
+const MCSection *
+TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                                 SectionKind Kind,
+                                                 Mangler *Mang,
+                                                 const TargetMachine &TM) const{
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  if (Kind.isText())
+    return getTextSection();
+
+  if (Kind.isBSS() && BSSSection != 0)
+    return BSSSection;
+
+  if (Kind.isReadOnly() && ReadOnlySection != 0)
+    return ReadOnlySection;
+
+  return getDataSection();
+}
+
+/// getSectionForConstant - Given a mergable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isReadOnly() && ReadOnlySection != 0)
+    return ReadOnlySection;
+
+  return DataSection;
+}
+
+/// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a
+/// pc-relative reference to the specified global variable from exception
+/// handling information.  In addition to the symbol, this returns
+/// by-reference:
+///
+/// IsIndirect - True if the returned symbol is actually a stub that contains
+///    the address of the symbol, false if the symbol is the global itself.
+///
+/// IsPCRel - True if the symbol reference is already pc-relative, false if
+///    the caller needs to subtract off the address of the reference from the
+///    symbol.
+///
+const MCExpr *TargetLoweringObjectFile::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI,
+                                 bool &IsIndirect, bool &IsPCRel) const {
+  // The generic implementation of this just returns a direct reference to the
+  // symbol.
+  IsIndirect = false;
+  IsPCRel    = false;
+  
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, false);
+  return MCSymbolRefExpr::Create(Name.str(), getContext());
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                  ELF
+//===----------------------------------------------------------------------===//
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+
+TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
+  // If we have the section uniquing map, free it.
+  delete (ELFUniqueMapTy*)UniquingMap;
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, bool IsExplicit) const {
+  if (UniquingMap == 0)
+    UniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionELF *&Entry = Map[Section];
+  if (Entry) return Entry;
+
+  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
+                                      getContext());
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((ELFUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  BSSSection =
+    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getBSS());
+
+  TextSection =
+    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getText());
+
+  DataSection =
+    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getDataRel());
+
+  ReadOnlySection =
+    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC,
+                  SectionKind::getReadOnly());
+
+  TLSDataSection =
+    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+
+  TLSBSSSection =
+    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+
+  DataRelSection =
+    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  DataRelLocalSection =
+    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRelLocal());
+
+  DataRelROSection =
+    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getReadOnlyWithRel());
+
+  DataRelROLocalSection =
+    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getReadOnlyWithRelLocal());
+
+  MergeableConst4Section =
+    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst4());
+
+  MergeableConst8Section =
+    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst8());
+
+  MergeableConst16Section =
+    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst16());
+
+  StaticCtorSection =
+    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  StaticDtorSection =
+    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  // Exception Handling Sections.
+
+  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+  // it contains relocatable pointers.  In PIC mode, this is probably a big
+  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+  EHFrameSection =
+    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  // Debug Info Sections.
+  DwarfAbbrevSection =
+    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfInfoSection =
+    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfLineSection =
+    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfFrameSection =
+    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfStrSection =
+    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfLocSection =
+    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfARangesSection =
+    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfRangesSection =
+    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+}
+
+
+static SectionKind
+getELFKindForNamedSection(const char *Name, SectionKind K) {
+  if (Name[0] != '.') return K;
+
+  // Some lame default implementation based on some magic section names.
+  if (strcmp(Name, ".bss") == 0 ||
+      strncmp(Name, ".bss.", 5) == 0 ||
+      strncmp(Name, ".gnu.linkonce.b.", 16) == 0 ||
+      strncmp(Name, ".llvm.linkonce.b.", 17) == 0 ||
+      strcmp(Name, ".sbss") == 0 ||
+      strncmp(Name, ".sbss.", 6) == 0 ||
+      strncmp(Name, ".gnu.linkonce.sb.", 17) == 0 ||
+      strncmp(Name, ".llvm.linkonce.sb.", 18) == 0)
+    return SectionKind::getBSS();
+
+  if (strcmp(Name, ".tdata") == 0 ||
+      strncmp(Name, ".tdata.", 7) == 0 ||
+      strncmp(Name, ".gnu.linkonce.td.", 17) == 0 ||
+      strncmp(Name, ".llvm.linkonce.td.", 18) == 0)
+    return SectionKind::getThreadData();
+
+  if (strcmp(Name, ".tbss") == 0 ||
+      strncmp(Name, ".tbss.", 6) == 0 ||
+      strncmp(Name, ".gnu.linkonce.tb.", 17) == 0 ||
+      strncmp(Name, ".llvm.linkonce.tb.", 18) == 0)
+    return SectionKind::getThreadBSS();
+
+  return K;
+}
+
+
+static unsigned
+getELFSectionType(const char *Name, SectionKind K) {
+
+  if (strcmp(Name, ".init_array") == 0)
+    return MCSectionELF::SHT_INIT_ARRAY;
+
+  if (strcmp(Name, ".fini_array") == 0)
+    return MCSectionELF::SHT_FINI_ARRAY;
+
+  if (strcmp(Name, ".preinit_array") == 0)
+    return MCSectionELF::SHT_PREINIT_ARRAY;
+
+  if (K.isBSS() || K.isThreadBSS())
+    return MCSectionELF::SHT_NOBITS;
+
+  return MCSectionELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (!K.isMetadata())
+    Flags |= MCSectionELF::SHF_ALLOC;
+
+  if (K.isText())
+    Flags |= MCSectionELF::SHF_EXECINSTR;
+
+  if (K.isWriteable())
+    Flags |= MCSectionELF::SHF_WRITE;
+
+  if (K.isThreadLocal())
+    Flags |= MCSectionELF::SHF_TLS;
+
+  // K.isMergeableConst() is left out to honour PR4650
+  if (K.isMergeableCString() || K.isMergeableConst4() ||
+      K.isMergeableConst8() || K.isMergeableConst16())
+    Flags |= MCSectionELF::SHF_MERGE;
+
+  if (K.isMergeableCString())
+    Flags |= MCSectionELF::SHF_STRINGS;
+
+  return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  const char *SectionName = GV->getSection().c_str();
+
+  // Infer section flags from the section name if we can.
+  Kind = getELFKindForNamedSection(SectionName, Kind);
+
+  return getELFSection(SectionName,
+                       getELFSectionType(SectionName, Kind),
+                       getELFSectionFlags(Kind), Kind, true);
+}
+
+static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".gnu.linkonce.t.";
+  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
+
+  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
+  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
+
+  if (Kind.isBSS())                  return ".gnu.linkonce.b.";
+  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
+  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
+  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".gnu.linkonce.d.rel.ro.";
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker()) {
+    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
+    std::string Name = Mang->makeNameProper(GV->getNameStr());
+
+    return getELFSection((Prefix+Name).c_str(),
+                         getELFSectionType((Prefix+Name).c_str(), Kind),
+                         getELFSectionFlags(Kind),
+                         Kind);
+  }
+
+  if (Kind.isText()) return TextSection;
+
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString() ||
+      Kind.isMergeable4ByteCString()) {
+
+    // We also need alignment here.
+    // FIXME: this is getting the alignment of the character, not the
+    // alignment of the global!
+    unsigned Align =
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+    const char *SizeSpec = ".rodata.str1.";
+    if (Kind.isMergeable2ByteCString())
+      SizeSpec = ".rodata.str2.";
+    else if (Kind.isMergeable4ByteCString())
+      SizeSpec = ".rodata.str4.";
+    else
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+    std::string Name = SizeSpec + utostr(Align);
+    return getELFSection(Name.c_str(), MCSectionELF::SHT_PROGBITS,
+                         MCSectionELF::SHF_ALLOC |
+                         MCSectionELF::SHF_MERGE |
+                         MCSectionELF::SHF_STRINGS,
+                         Kind);
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4() && MergeableConst4Section)
+      return MergeableConst4Section;
+    if (Kind.isMergeableConst8() && MergeableConst8Section)
+      return MergeableConst8Section;
+    if (Kind.isMergeableConst16() && MergeableConst16Section)
+      return MergeableConst16Section;
+    return ReadOnlySection;  // .const
+  }
+
+  if (Kind.isReadOnly())             return ReadOnlySection;
+
+  if (Kind.isThreadData())           return TLSDataSection;
+  if (Kind.isThreadBSS())            return TLSBSSSection;
+
+  if (Kind.isBSS())                  return BSSSection;
+
+  if (Kind.isDataNoRel())            return DataSection;
+  if (Kind.isDataRelLocal())         return DataRelLocalSection;
+  if (Kind.isDataRel())              return DataRelSection;
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return MergeableConst4Section;
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return MergeableConst8Section;
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return MergeableConst16Section;
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+//===----------------------------------------------------------------------===//
+//                                 MachO
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+
+TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSectionMachO *TargetLoweringObjectFileMachO::
+getMachOSection(const StringRef &Segment, const StringRef &Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) const {
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+
+  // Create the map if it doesn't already exist.
+  if (UniquingMap == 0)
+    UniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
+
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+
+  // Otherwise, return a new section.
+  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
+                                        Reserved2, Kind, getContext());
+}
+
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((MachOUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  TextSection // .text
+    = getMachOSection("__TEXT", "__text",
+                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                      SectionKind::getText());
+  DataSection // .data
+    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+
+  CStringSection // .cstring
+    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
+                      SectionKind::getMergeable1ByteCString());
+  UStringSection
+    = getMachOSection("__TEXT","__ustring", 0,
+                      SectionKind::getMergeable2ByteCString());
+  FourByteConstantSection // .literal4
+    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
+                      SectionKind::getMergeableConst4());
+  EightByteConstantSection // .literal8
+    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
+                      SectionKind::getMergeableConst8());
+
+  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+  // to using it in -static mode.
+  SixteenByteConstantSection = 0;
+  if (TM.getRelocationModel() != Reloc::Static &&
+      TM.getTargetData()->getPointerSize() == 32)
+    SixteenByteConstantSection =   // .literal16
+      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
+                      SectionKind::getMergeableConst16());
+
+  ReadOnlySection  // .const
+    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+
+  TextCoalSection
+    = getMachOSection("__TEXT", "__textcoal_nt",
+                      MCSectionMachO::S_COALESCED |
+                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                      SectionKind::getText());
+  ConstTextCoalSection
+    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
+                      SectionKind::getText());
+  ConstDataCoalSection
+    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
+                      SectionKind::getText());
+  ConstDataSection  // .const_data
+    = getMachOSection("__DATA", "__const", 0,
+                      SectionKind::getReadOnlyWithRel());
+  DataCoalSection
+    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
+                      SectionKind::getDataRel());
+
+
+  LazySymbolPointerSection
+    = getMachOSection("__DATA", "__la_symbol_ptr",
+                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                      SectionKind::getMetadata());
+  NonLazySymbolPointerSection
+    = getMachOSection("__DATA", "__nl_symbol_ptr",
+                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                      SectionKind::getMetadata());
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection
+      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+    StaticDtorSection
+      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+  } else {
+    StaticCtorSection
+      = getMachOSection("__DATA", "__mod_init_func",
+                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                        SectionKind::getDataRel());
+    StaticDtorSection
+      = getMachOSection("__DATA", "__mod_term_func",
+                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                        SectionKind::getDataRel());
+  }
+
+  // Exception Handling.
+  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+                                SectionKind::getDataRel());
+  EHFrameSection =
+    getMachOSection("__TEXT", "__eh_frame",
+                    MCSectionMachO::S_COALESCED |
+                    MCSectionMachO::S_ATTR_NO_TOC |
+                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                    SectionKind::getReadOnly());
+
+  // Debug Information.
+  DwarfAbbrevSection =
+    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfInfoSection =
+    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfLineSection =
+    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfFrameSection =
+    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfStrSection =
+    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfLocSection =
+    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfARangesSection =
+    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfRangesSection =
+    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfDebugInlineSection =
+    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  // Parse the section specifier and create it if valid.
+  StringRef Segment, Section;
+  unsigned TAA, StubSize;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+                                          TAA, StubSize);
+  if (!ErrorCode.empty()) {
+    // If invalid, report the error with llvm_report_error.
+    llvm_report_error("Global variable '" + GV->getNameStr() +
+                      "' has an invalid section specifier '" + GV->getSection()+
+                      "': " + ErrorCode + ".");
+    // Fall back to dropping it into the data section.
+    return DataSection;
+  }
+
+  // Get the section.
+  const MCSectionMachO *S =
+    getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+  // Okay, now that we got the section, verify that the TAA & StubSize agree.
+  // If the user declared multiple globals with different section flags, we need
+  // to reject it here.
+  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+    // If invalid, report the error with llvm_report_error.
+    llvm_report_error("Global variable '" + GV->getNameStr() +
+                      "' section type or attributes does not match previous"
+                      " section specifier");
+  }
+
+  return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
+
+  if (Kind.isText())
+    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+  // If this is weak/linkonce, put this in a coalescable section, either in text
+  // or data depending on if it is writable.
+  if (GV->isWeakForLinker()) {
+    if (Kind.isReadOnly())
+      return ConstTextCoalSection;
+    return DataCoalSection;
+  }
+
+  // FIXME: Alignment check should be handled by section classifier.
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString()) {
+    if (TM.getTargetData()->getPreferredAlignment(
+                                              cast<GlobalVariable>(GV)) < 32) {
+      if (Kind.isMergeable1ByteCString())
+        return CStringSection;
+      assert(Kind.isMergeable2ByteCString());
+      return UStringSection;
+    }
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4())
+      return FourByteConstantSection;
+    if (Kind.isMergeableConst8())
+      return EightByteConstantSection;
+    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+      return SixteenByteConstantSection;
+  }
+
+  // Otherwise, if it is readonly, but not something we can specially optimize,
+  // just drop it in .const.
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  // If this is marked const, put it into a const section.  But if the dynamic
+  // linker needs to write to it, put it in the data segment.
+  if (Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  // Otherwise, just drop the variable in the normal data section.
+  return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+  // If this constant requires a relocation, we have to put it in the data
+  // segment, not in the text segment.
+  if (Kind.isDataRel())
+    return ConstDataSection;
+
+  if (Kind.isMergeableConst4())
+    return FourByteConstantSection;
+  if (Kind.isMergeableConst8())
+    return EightByteConstantSection;
+  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+    return SixteenByteConstantSection;
+  return ReadOnlySection;  // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+  /// On Darwin, internally linked data beginning with "L" or "l" does not have
+  /// the directive emitted (this occurs in ObjC metadata).
+  if (!GV) return false;
+
+  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+    // FIXME: ObjC metadata is currently emitted as internal symbols that have
+    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
+    // this horrible hack can go away.
+    const std::string &Name = Mang->getMangledName(GV);
+    if (Name[0] == 'L' || Name[0] == 'l')
+      return false;
+  }
+
+  return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI,
+                                 bool &IsIndirect, bool &IsPCRel) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+  IsIndirect = true;
+  IsPCRel    = false;
+  
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, true);
+  Name += "$non_lazy_ptr";
+  return MCSymbolRefExpr::Create(Name.str(), getContext());
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                  COFF
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
+  delete (COFFUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getCOFFSection(const char *Name, bool isDirective, SectionKind Kind) const {
+  // Create the map if it doesn't already exist.
+  if (UniquingMap == 0)
+    UniquingMap = new MachOUniqueMapTy();
+  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionCOFF *&Entry = Map[Name];
+  if (Entry) return Entry;
+
+  return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((COFFUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
+  DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
+  StaticCtorSection =
+    getCOFFSection(".ctors", false, SectionKind::getDataRel());
+  StaticDtorSection =
+    getCOFFSection(".dtors", false, SectionKind::getDataRel());
+
+  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+  // though it contains relocatable pointers.  In PIC mode, this is probably a
+  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
+  EHFrameSection =
+    getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
+
+  // Debug info.
+  // FIXME: Don't use 'directive' mode here.
+  DwarfAbbrevSection =
+    getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfInfoSection =
+    getCOFFSection("\t.section\t.debug_info,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfLineSection =
+    getCOFFSection("\t.section\t.debug_line,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfFrameSection =
+    getCOFFSection("\t.section\t.debug_frame,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfStrSection =
+    getCOFFSection("\t.section\t.debug_str,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfLocSection =
+    getCOFFSection("\t.section\t.debug_loc,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfARangesSection =
+    getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfRangesSection =
+    getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
+                   true, SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  return getCOFFSection(GV->getSection().c_str(), false, Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())
+    return ".text$linkonce";
+  if (Kind.isWriteable())
+    return ".data$linkonce";
+  return ".rdata$linkonce";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker()) {
+    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+    std::string Name = Mang->makeNameProper(GV->getNameStr());
+    return getCOFFSection((Prefix+Name).c_str(), false, Kind);
+  }
+
+  if (Kind.isText())
+    return getTextSection();
+
+  return getDataSection();
+}
+
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index c487cb805306..fec59b5e2b50 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -33,7 +33,10 @@ namespace llvm {
   FloatABI::ABIType FloatABIType;
   bool NoImplicitFloat;
   bool NoZerosInBSS;
-  bool ExceptionHandling;
+  bool DwarfExceptionHandling;
+  bool SjLjExceptionHandling;
+  bool JITEmitDebugInfo;
+  bool JITEmitDebugInfoToDisk;
   bool UnwindTablesMandatory;
   Reloc::Model RelocationModel;
   CodeModel::Model CMModel;
@@ -104,9 +107,32 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::location(NoZerosInBSS),
   cl::init(false));
 static cl::opt<bool, true>
-EnableExceptionHandling("enable-eh",
+EnableDwarfExceptionHandling("enable-eh",
   cl::desc("Emit DWARF exception handling (default if target supports)"),
-  cl::location(ExceptionHandling),
+  cl::location(DwarfExceptionHandling),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableSjLjExceptionHandling("enable-sjlj-eh",
+  cl::desc("Emit SJLJ exception handling (default if target supports)"),
+  cl::location(SjLjExceptionHandling),
+  cl::init(false));
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool, true>
+EmitJitDebugInfo("jit-emit-debug",
+  cl::desc("Emit debug information to debugger"),
+  cl::location(JITEmitDebugInfo),
+  cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+static cl::opt<bool, true>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+  cl::Hidden,
+  cl::desc("Emit debug info objfiles to disk"),
+  cl::location(JITEmitDebugInfoToDisk),
   cl::init(false));
 static cl::opt<bool, true>
 EnableUnwindTables("unwind-tables",
@@ -176,8 +202,8 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
 // TargetMachine Class
 //
 
-TargetMachine::TargetMachine() 
-  : AsmInfo(0) {
+TargetMachine::TargetMachine(const Target &T) 
+  : TheTarget(T), AsmInfo(0) {
   // Typically it will be subtargets that will adjust FloatABIType from Default
   // to Soft or Hard.
   if (UseSoftFloat)
@@ -237,4 +263,3 @@ namespace llvm {
     return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
   }
 }
-
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index a84fdaa4a802..fac67e2e1aaf 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -40,10 +40,10 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
 TargetRegisterInfo::~TargetRegisterInfo() {}
 
 /// getPhysicalRegisterRegClass - Returns the Register Class of a physical
-/// register of the given type. If type is MVT::Other, then just return any
+/// register of the given type. If type is EVT::Other, then just return any
 /// register class the register belongs to.
 const TargetRegisterClass *
-TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const {
+TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
   assert(isPhysicalRegister(reg) && "reg must be a physical register");
 
   // Pick the most super register class of the right type that contains
@@ -62,14 +62,14 @@ TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const {
 
 /// getAllocatableSetForRC - Toggle the bits that represent allocatable
 /// registers for the specific register class.
-static void getAllocatableSetForRC(MachineFunction &MF,
+static void getAllocatableSetForRC(const MachineFunction &MF,
                                    const TargetRegisterClass *RC, BitVector &R){  
   for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
          E = RC->allocation_order_end(MF); I != E; ++I)
     R.set(*I);
 }
 
-BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF,
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
                                           const TargetRegisterClass *RC) const {
   BitVector Allocatable(NumRegs);
   if (RC) {
@@ -85,7 +85,7 @@ BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF,
 
 /// getFrameIndexOffset - Returns the displacement from the frame register to
 /// the stack frame of the specified index. This is the default implementation
-/// which is likely incorrect for the target.
+/// which is overridden for some targets.
 int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
   const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
   MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..034d5aba8327
--- /dev/null
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmParser
+  X86AsmParser.cpp
+  )
+add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile
new file mode 100644
index 000000000000..25fb0a2836db
--- /dev/null
+++ b/lib/Target/X86/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmParser
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 000000000000..c357b4d0dee1
--- /dev/null
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,479 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLexer.h"
+#include "llvm/MC/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86ATTAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+
+private:
+  MCAsmParser &getParser() const { return Parser; }
+
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  bool ParseRegister(X86Operand &Op);
+
+  bool ParseOperand(X86Operand &Op);
+
+  bool ParseMemOperand(X86Operand &Op);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  /// @name Auto-generated Match Functions
+  /// {  
+
+  bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands,
+                        MCInst &Inst);
+
+  /// MatchRegisterName - Match the given string to a register name, or 0 if
+  /// there is no match.
+  unsigned MatchRegisterName(const StringRef &Name);
+
+  /// }
+
+public:
+  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+    : TargetAsmParser(T), Parser(_Parser) {}
+
+  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+  
+} // end anonymous namespace
+
+
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand {
+  enum {
+    Token,
+    Register,
+    Immediate,
+    Memory
+  } Kind;
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNo;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned SegReg;
+      const MCExpr *Disp;
+      unsigned BaseReg;
+      unsigned IndexReg;
+      unsigned Scale;
+    } Mem;
+  };
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNo;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getMemDisp() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Disp;
+  }
+  unsigned getMemSegReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.SegReg;
+  }
+  unsigned getMemBaseReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.BaseReg;
+  }
+  unsigned getMemIndexReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.IndexReg;
+  }
+  unsigned getMemScale() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Scale;
+  }
+
+  bool isToken() const {return Kind == Token; }
+
+  bool isImm() const { return Kind == Immediate; }
+  
+  bool isImmSExt8() const { 
+    // Accept immediates which fit in 8 bits when sign extended, and
+    // non-absolute immediates.
+    if (!isImm())
+      return false;
+
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+      int64_t Value = CE->getValue();
+      return Value == (int64_t) (int8_t) Value;
+    }
+
+    return true;
+  }
+  
+  bool isMem() const { return Kind == Memory; }
+
+  bool isReg() const { return Kind == Register; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateExpr(getImm()));
+  }
+
+  void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
+    // FIXME: Support user customization of the render method.
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateExpr(getImm()));
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 4 || N == 5) && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+
+    // FIXME: What a hack.
+    if (N == 5)
+      Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+  }
+
+  static X86Operand CreateToken(StringRef Str) {
+    X86Operand Res;
+    Res.Kind = Token;
+    Res.Tok.Data = Str.data();
+    Res.Tok.Length = Str.size();
+    return Res;
+  }
+
+  static X86Operand CreateReg(unsigned RegNo) {
+    X86Operand Res;
+    Res.Kind = Register;
+    Res.Reg.RegNo = RegNo;
+    return Res;
+  }
+
+  static X86Operand CreateImm(const MCExpr *Val) {
+    X86Operand Res;
+    Res.Kind = Immediate;
+    Res.Imm.Val = Val;
+    return Res;
+  }
+
+  static X86Operand CreateMem(unsigned SegReg, const MCExpr *Disp,
+                              unsigned BaseReg, unsigned IndexReg,
+                              unsigned Scale) {
+    // We should never just have a displacement, that would be an immediate.
+    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+    // The scale should always be one of {1,2,4,8}.
+    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+           "Invalid scale!");
+    X86Operand Res;
+    Res.Kind = Memory;
+    Res.Mem.SegReg   = SegReg;
+    Res.Mem.Disp     = Disp;
+    Res.Mem.BaseReg  = BaseReg;
+    Res.Mem.IndexReg = IndexReg;
+    Res.Mem.Scale    = Scale;
+    return Res;
+  }
+};
+
+} // end anonymous namespace.
+
+
+bool X86ATTAsmParser::ParseRegister(X86Operand &Op) {
+  const AsmToken &TokPercent = getLexer().getTok();
+  (void)TokPercent; // Avoid warning when assertions are disabled.
+  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+  getLexer().Lex(); // Eat percent token.
+
+  const AsmToken &Tok = getLexer().getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return Error(Tok.getLoc(), "invalid register name");
+
+  // FIXME: Validate register for the current architecture; we have to do
+  // validation later, so maybe there is no need for this here.
+  unsigned RegNo;
+
+  RegNo = MatchRegisterName(Tok.getString());
+  if (RegNo == 0)
+    return Error(Tok.getLoc(), "invalid register name");
+
+  Op = X86Operand::CreateReg(RegNo);
+  getLexer().Lex(); // Eat identifier token.
+
+  return false;
+}
+
+bool X86ATTAsmParser::ParseOperand(X86Operand &Op) {
+  switch (getLexer().getKind()) {
+  default:
+    return ParseMemOperand(Op);
+  case AsmToken::Percent:
+    // FIXME: if a segment register, this could either be just the seg reg, or
+    // the start of a memory operand.
+    return ParseRegister(Op);
+  case AsmToken::Dollar: {
+    // $42 -> immediate.
+    getLexer().Lex();
+    const MCExpr *Val;
+    if (getParser().ParseExpression(Val))
+      return true;
+    Op = X86Operand::CreateImm(Val);
+    return false;
+  }
+  }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
+bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) {
+  // FIXME: If SegReg ':'  (e.g. %gs:), eat and remember.
+  unsigned SegReg = 0;
+  
+  // We have to disambiguate a parenthesized expression "(4+5)" from the start
+  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
+  // only way to do this without lookahead is to eat the ( and see what is after
+  // it.
+  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+  if (getLexer().isNot(AsmToken::LParen)) {
+    if (getParser().ParseExpression(Disp)) return true;
+    
+    // After parsing the base expression we could either have a parenthesized
+    // memory address or not.  If not, return now.  If so, eat the (.
+    if (getLexer().isNot(AsmToken::LParen)) {
+      // Unless we have a segment register, treat this as an immediate.
+      if (SegReg)
+        Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1);
+      else
+        Op = X86Operand::CreateImm(Disp);
+      return false;
+    }
+    
+    // Eat the '('.
+    getLexer().Lex();
+  } else {
+    // Okay, we have a '('.  We don't know if this is an expression or not, but
+    // so we have to eat the ( to see beyond it.
+    getLexer().Lex(); // Eat the '('.
+    
+    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+      // Nothing to do here, fall into the code below with the '(' part of the
+      // memory operand consumed.
+    } else {
+      // It must be an parenthesized expression, parse it now.
+      if (getParser().ParseParenExpression(Disp))
+        return true;
+      
+      // After parsing the base expression we could either have a parenthesized
+      // memory address or not.  If not, return now.  If so, eat the (.
+      if (getLexer().isNot(AsmToken::LParen)) {
+        // Unless we have a segment register, treat this as an immediate.
+        if (SegReg)
+          Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1);
+        else
+          Op = X86Operand::CreateImm(Disp);
+        return false;
+      }
+      
+      // Eat the '('.
+      getLexer().Lex();
+    }
+  }
+  
+  // If we reached here, then we just ate the ( of the memory operand.  Process
+  // the rest of the memory operand.
+  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+  
+  if (getLexer().is(AsmToken::Percent)) {
+    if (ParseRegister(Op))
+      return true;
+    BaseReg = Op.getReg();
+  }
+  
+  if (getLexer().is(AsmToken::Comma)) {
+    getLexer().Lex(); // Eat the comma.
+
+    // Following the comma we should have either an index register, or a scale
+    // value. We don't support the later form, but we want to parse it
+    // correctly.
+    //
+    // Not that even though it would be completely consistent to support syntax
+    // like "1(%eax,,1)", the assembler doesn't.
+    if (getLexer().is(AsmToken::Percent)) {
+      if (ParseRegister(Op))
+        return true;
+      IndexReg = Op.getReg();
+    
+      if (getLexer().isNot(AsmToken::RParen)) {
+        // Parse the scale amount:
+        //  ::= ',' [scale-expression]
+        if (getLexer().isNot(AsmToken::Comma))
+          return true;
+        getLexer().Lex(); // Eat the comma.
+
+        if (getLexer().isNot(AsmToken::RParen)) {
+          SMLoc Loc = getLexer().getTok().getLoc();
+
+          int64_t ScaleVal;
+          if (getParser().ParseAbsoluteExpression(ScaleVal))
+            return true;
+          
+          // Validate the scale amount.
+          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8)
+            return Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+          Scale = (unsigned)ScaleVal;
+        }
+      }
+    } else if (getLexer().isNot(AsmToken::RParen)) {
+      // Otherwise we have the unsupported form of a scale amount without an
+      // index.
+      SMLoc Loc = getLexer().getTok().getLoc();
+
+      int64_t Value;
+      if (getParser().ParseAbsoluteExpression(Value))
+        return true;
+      
+      return Error(Loc, "cannot have scale factor without index register");
+    }
+  }
+  
+  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+  if (getLexer().isNot(AsmToken::RParen))
+    return Error(getLexer().getTok().getLoc(),
+                    "unexpected token in memory operand");
+  getLexer().Lex(); // Eat the ')'.
+  
+  Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale);
+  return false;
+}
+
+bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
+  SmallVector<X86Operand, 8> Operands;
+
+  Operands.push_back(X86Operand::CreateToken(Name));
+
+  SMLoc Loc = getLexer().getTok().getLoc();
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+    // Parse '*' modifier.
+    if (getLexer().is(AsmToken::Star)) {
+      getLexer().Lex(); // Eat the star.
+      Operands.push_back(X86Operand::CreateToken("*"));
+    }
+
+    // Read the first operand.
+    Operands.push_back(X86Operand());
+    if (ParseOperand(Operands.back()))
+      return true;
+
+    while (getLexer().is(AsmToken::Comma)) {
+      getLexer().Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      Operands.push_back(X86Operand());
+      if (ParseOperand(Operands.back()))
+        return true;
+    }
+  }
+
+  if (!MatchInstruction(Operands, Inst))
+    return false;
+
+  // FIXME: We should give nicer diagnostics about the exact failure.
+
+  Error(Loc, "unrecognized instruction");
+  return true;
+}
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+      
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      getLexer().Lex();
+    }
+  }
+
+  getLexer().Lex();
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+}
+
+#include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index a28c8266b82e..b70a587ec4e2 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMX86AsmPrinter
-  X86ATTAsmPrinter.cpp
   X86ATTInstPrinter.cpp
   X86AsmPrinter.cpp
-  X86IntelAsmPrinter.cpp
+  X86IntelInstPrinter.cpp
+  X86MCInstLower.cpp
   )
-add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
-\ No newline at end of file
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
index ba89ac69bf68..2368761ac9f4 100644
--- a/lib/Target/X86/AsmPrinter/Makefile
+++ b/lib/Target/X86/AsmPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index fa0ee753f02c..bc70ffe8d633 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -13,10 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
 #include "llvm/MC/MCInst.h"
-#include "X86ATTAsmPrinter.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
 using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
@@ -25,9 +28,11 @@ using namespace llvm;
 #include "X86GenAsmWriter.inc"
 #undef MachineInstr
 
-void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
   switch (MI->getOperand(Op).getImm()) {
-  default: assert(0 && "Invalid ssecc argument!");
+  default: llvm_unreachable("Invalid ssecc argument!");
   case 0: O << "eq"; break;
   case 1: O << "lt"; break;
   case 2: O << "le"; break;
@@ -39,61 +44,36 @@ void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
   }
 }
 
-
-void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) {
-  assert(0 &&
-         "This is only used for MOVPC32r, should lower before asm printing!");
-}
-
-
 /// print_pcrel_imm - This is used to print an immediate value that ends up
 /// being encoded as a pc-relative value.  These print slightly differently, for
 /// example, a $ is not emitted.
-void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
   const MCOperand &Op = MI->getOperand(OpNo);
-  
   if (Op.isImm())
     O << Op.getImm();
-  else if (Op.isMBBLabel())
-    // FIXME: Keep in sync with printBasicBlockLabel.  printBasicBlockLabel
-    // should eventually call into this code, not the other way around.
-    O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction()
-      << '_' << Op.getMBBLabelBlock();
-  else
-    assert(0 && "Unknown pcrel immediate operand");
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    Op.getExpr()->print(O, &MAI);
+  }
 }
 
-
-void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    const char *Modifier) {
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     const char *Modifier) {
   assert(Modifier == 0 && "Modifiers should not be used");
   
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
-    O << '%';
-    unsigned Reg = Op.getReg();
-#if 0
-    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
-      MVT VT = (strcmp(Modifier+6,"64") == 0) ?
-      MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
-                  ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
-      Reg = getX86SubSuperRegister(Reg, VT);
-    }
-#endif
-    O << TRI->getAsmName(Reg);
-    return;
+    O << '%' << getRegisterName(Op.getReg());
   } else if (Op.isImm()) {
-    //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+    O << '$' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
     O << '$';
-    O << Op.getImm();
-    return;
+    Op.getExpr()->print(O, &MAI);
   }
-  
-  O << "<<UNKNOWN OPERAND KIND>>";
 }
 
-void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
-
+void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
   const MCOperand &BaseReg  = MI->getOperand(Op);
   const MCOperand &IndexReg = MI->getOperand(Op+2);
   const MCOperand &DispSpec = MI->getOperand(Op+3);
@@ -103,19 +83,11 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
       O << DispVal;
   } else {
-    abort();
-    //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
-    //       DispSpec.isJTI() || DispSpec.isSymbol());
-    //printOperand(MI, Op+3, "mem");
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    DispSpec.getExpr()->print(O, &MAI);
   }
   
   if (IndexReg.getReg() || BaseReg.getReg()) {
-    // There are cases where we can end up with ESP/RSP in the indexreg slot.
-    // If this happens, swap the base/index register to support assemblers that
-    // don't work when the index is *SP.
-    // FIXME: REMOVE THIS.
-    assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP);
-    
     O << '(';
     if (BaseReg.getReg())
       printOperand(MI, Op);
@@ -131,9 +103,9 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
   }
 }
 
-void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) {
-  const MCOperand &Segment = MI->getOperand(Op+4);
-  if (Segment.getReg()) {
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+  // If this has a segment register, print it.
+  if (MI->getOperand(Op+4).getReg()) {
     printOperand(MI, Op+4);
     O << ':';
   }
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
new file mode 100644
index 000000000000..5f28fa46f5f8
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,86 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+  
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+  X86ATTInstPrinter(raw_ostream &O, const MCAsmInfo &MAI)
+    : MCInstPrinter(O, MAI) {}
+
+  
+  virtual void printInst(const MCInst *MI);
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI);
+  static const char *getRegisterName(unsigned RegNo);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo,
+                    const char *Modifier = 0);
+  void printMemReference(const MCInst *MI, unsigned Op);
+  void printLeaMemReference(const MCInst *MI, unsigned Op);
+  void printSSECC(const MCInst *MI, unsigned Op);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printlea32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+};
+  
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index e5d80a4cbdec..2a0290db97e9 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===//
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,42 +7,937 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file the shared super class printer that converts from our internal
-// representation of machine-dependent LLVM code to Intel and AT&T format
-// assembly language.
-// This printer is the output mechanism used by `llc'.
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to AT&T format assembly
+// language. This printer is the output mechanism used by `llc'.
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86ATTAsmPrinter.h"
-#include "X86IntelAsmPrinter.h"
-#include "X86Subtarget.h"
+#define DEBUG_TYPE "asm-printer"
+#include "X86AsmPrinter.h"
+#include "X86ATTInstPrinter.h"
+#include "X86IntelInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86.h"
+#include "X86COFF.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code
-/// for a MachineFunction to the given output stream, using the given target
-/// machine description.
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+void X86AsmPrinter::printMCInst(const MCInst *MI) {
+  if (MAI->getAssemblerDialect() == 0)
+    X86ATTInstPrinter(O, *MAI).printInstruction(MI);
+  else
+    X86IntelInstPrinter(O, *MAI).printInstruction(MI);
+}
+
+void X86AsmPrinter::PrintPICBaseSymbol() const {
+  // FIXME: Gross const cast hack.
+  X86AsmPrinter *AP = const_cast<X86AsmPrinter*>(this);
+  X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol()->print(O, MAI);
+}
+
+void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+  unsigned FnAlign = MF.getAlignment();
+  const Function *F = MF.getFunction();
+
+  if (Subtarget->isTargetCygMing()) {
+    X86COFFMachineModuleInfo &COFFMMI = 
+      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+    COFFMMI.DecorateCygMingName(CurrentFnName, F, *TM.getTargetData());
+  }
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  EmitAlignment(FnAlign, F);
+
+  switch (F->getLinkage()) {
+  default: llvm_unreachable("Unknown linkage type!");
+  case Function::InternalLinkage:  // Symbols default to internal.
+  case Function::PrivateLinkage:
+    break;
+  case Function::DLLExportLinkage:
+  case Function::ExternalLinkage:
+    O << "\t.globl\t" << CurrentFnName << '\n';
+    break;
+  case Function::LinkerPrivateLinkage:
+  case Function::LinkOnceAnyLinkage:
+  case Function::LinkOnceODRLinkage:
+  case Function::WeakAnyLinkage:
+  case Function::WeakODRLinkage:
+    if (Subtarget->isTargetDarwin()) {
+      O << "\t.globl\t" << CurrentFnName << '\n';
+      O << MAI->getWeakDefDirective() << CurrentFnName << '\n';
+    } else if (Subtarget->isTargetCygMing()) {
+      O << "\t.globl\t" << CurrentFnName << "\n"
+           "\t.linkonce discard\n";
+    } else {
+      O << "\t.weak\t" << CurrentFnName << '\n';
+    }
+    break;
+  }
+
+  printVisibility(CurrentFnName, F->getVisibility());
+
+  if (Subtarget->isTargetELF())
+    O << "\t.type\t" << CurrentFnName << ",@function\n";
+  else if (Subtarget->isTargetCygMing()) {
+    O << "\t.def\t " << CurrentFnName
+      << ";\t.scl\t" <<
+      (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
+      << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+      << ";\t.endef\n";
+  }
+
+  O << CurrentFnName << ':';
+  if (VerboseAsm) {
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << ' ';
+    WriteAsOperand(O, F, /*PrintType=*/false, F->getParent());
+  }
+  O << '\n';
+
+  // Add some workaround for linkonce linkage on Cygwin\MinGW
+  if (Subtarget->isTargetCygMing() &&
+      (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+    O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  this->MF = &MF;
+  CallingConv::ID CC = F->getCallingConv();
+
+  SetupMachineFunction(MF);
+  O << "\n\n";
+
+  if (Subtarget->isTargetCOFF()) {
+    X86COFFMachineModuleInfo &COFFMMI = 
+    MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+    // Populate function information map.  Don't want to populate
+    // non-stdcall or non-fastcall functions' information right now.
+    if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+      COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>());
+  }
+
+  // Print out constants referenced by the function
+  EmitConstantPool(MF.getConstantPool());
+
+  // Print the 'header' of function
+  emitFunctionHeader(MF);
+
+  // Emit pre-function debug and/or EH information.
+  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+    DW->BeginFunction(&MF);
+
+  // Print out code for the function.
+  bool hasAnyRealCode = false;
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      // Print the assembly for the instruction.
+      if (!II->isLabel())
+        hasAnyRealCode = true;
+      printMachineInstruction(II);
+    }
+  }
+
+  if (Subtarget->isTargetDarwin() && !hasAnyRealCode) {
+    // If the function is empty, then we need to emit *something*. Otherwise,
+    // the function's label might be associated with something that it wasn't
+    // meant to be associated with. We emit a noop in this situation.
+    // We are assuming inline asms are code.
+    O << "\tnop\n";
+  }
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+  // Emit post-function debug information.
+  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+    DW->EndFunction(&MF);
+
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+  // We didn't modify anything.
+  return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand.  This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown symbol type!");
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+    printOffset(MO.getOffset());
+    break;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    
+    const char *Suffix = "";
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+      Suffix = "$stub";
+    else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+             MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+             MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+      Suffix = "$non_lazy_ptr";
+    
+    std::string Name = Mang->getMangledName(GV, Suffix, Suffix[0] != '\0');
+    if (Subtarget->isTargetCygMing()) {
+      X86COFFMachineModuleInfo &COFFMMI = 
+        MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+      COFFMMI.DecorateCygMingName(Name, GV, *TM.getTargetData());
+    }
+    
+    // Handle dllimport linkage.
+    if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+      Name = "__imp_" + Name;
+    
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+      SmallString<128> NameStr;
+      Mang->getNameWithPrefix(NameStr, GV, true);
+      NameStr += "$non_lazy_ptr";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+      
+      const MCSymbol *&StubSym = 
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+      if (StubSym == 0) {
+        NameStr.clear();
+        Mang->getNameWithPrefix(NameStr, GV, false);
+        StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+      }
+    } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+      SmallString<128> NameStr;
+      Mang->getNameWithPrefix(NameStr, GV, true);
+      NameStr += "$non_lazy_ptr";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+      const MCSymbol *&StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+      if (StubSym == 0) {
+        NameStr.clear();
+        Mang->getNameWithPrefix(NameStr, GV, false);
+        StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+      }
+    } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+      SmallString<128> NameStr;
+      Mang->getNameWithPrefix(NameStr, GV, true);
+      NameStr += "$stub";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+      const MCSymbol *&StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+      if (StubSym == 0) {
+        NameStr.clear();
+        Mang->getNameWithPrefix(NameStr, GV, false);
+        StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+      }
+    }
+    
+    // If the name begins with a dollar-sign, enclose it in parens.  We do this
+    // to avoid having it look like an integer immediate to the assembler.
+    if (Name[0] == '$') 
+      O << '(' << Name << ')';
+    else
+      O << Name;
+    
+    printOffset(MO.getOffset());
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    std::string Name = Mang->makeNameProper(MO.getSymbolName());
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+      Name += "$stub";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(Name);
+      const MCSymbol *&StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+      if (StubSym == 0) {
+        Name.erase(Name.end()-5, Name.end());
+        StubSym = OutContext.GetOrCreateSymbol(Name);
+      }
+    }
+    
+    // If the name begins with a dollar-sign, enclose it in parens.  We do this
+    // to avoid having it look like an integer immediate to the assembler.
+    if (Name[0] == '$') 
+      O << '(' << Name << ')';
+    else
+      O << Name;
+    break;
+  }
+  }
+  
+  switch (MO.getTargetFlags()) {
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+    break;
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DLLIMPORT:
+  case X86II::MO_DARWIN_STUB:
+    // These affect the name of the symbol, not any suffix.
+    break;
+  case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+    O << " + [.-";
+    PrintPICBaseSymbol();
+    O << ']';
+    break;      
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    O << '-';
+    PrintPICBaseSymbol();
+    break;
+  case X86II::MO_TLSGD:     O << "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     O << "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    O << "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  O << "@GOTPCREL";  break;
+  case X86II::MO_GOT:       O << "@GOT";       break;
+  case X86II::MO_GOTOFF:    O << "@GOTOFF";    break;
+  case X86II::MO_PLT:       O << "@PLT";       break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+    return;
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+    printSymbolOperand(MO);
+    return;
+  }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                    const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown operand type!");
+  case MachineOperand::MO_Register: {
+    O << '%';
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+      EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+        MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                    ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+      Reg = getX86SubSuperRegister(Reg, VT);
+    }
+    O << X86ATTInstPrinter::getRegisterName(Reg);
+    return;
+  }
+
+  case MachineOperand::MO_Immediate:
+    O << '$' << MO.getImm();
+    return;
+
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_GlobalAddress: 
+  case MachineOperand::MO_ExternalSymbol: {
+    O << '$';
+    printSymbolOperand(MO);
+    break;
+  }
+  }
+}
+
+void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+  unsigned char value = MI->getOperand(Op).getImm();
+  assert(value <= 7 && "Invalid ssecc argument!");
+  switch (value) {
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+                                         const char *Modifier) {
+  const MachineOperand &BaseReg  = MI->getOperand(Op);
+  const MachineOperand &IndexReg = MI->getOperand(Op+2);
+  const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+  // If we really don't want to print out (rip), don't.
+  bool HasBaseReg = BaseReg.getReg() != 0;
+  if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+      BaseReg.getReg() == X86::RIP)
+    HasBaseReg = false;
+  
+  // HasParenPart - True if we will print out the () part of the mem ref.
+  bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+  
+  if (DispSpec.isImm()) {
+    int DispVal = DispSpec.getImm();
+    if (DispVal || !HasParenPart)
+      O << DispVal;
+  } else {
+    assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+           DispSpec.isJTI() || DispSpec.isSymbol());
+    printSymbolOperand(MI->getOperand(Op+3));
+  }
+
+  if (HasParenPart) {
+    assert(IndexReg.getReg() != X86::ESP &&
+           "X86 doesn't allow scaling by ESP");
+
+    O << '(';
+    if (HasBaseReg)
+      printOperand(MI, Op, Modifier);
+
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2, Modifier);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+                                      const char *Modifier) {
+  assert(isMem(MI, Op) && "Invalid memory reference!");
+  const MachineOperand &Segment = MI->getOperand(Op+4);
+  if (Segment.getReg()) {
+    printOperand(MI, Op+4, Modifier);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op, Modifier);
+}
+
+void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+                                           const MachineBasicBlock *MBB) const {
+  if (!MAI->getSetDirective())
+    return;
+
+  // We don't need .set machinery if we have GOT-style relocations
+  if (Subtarget->isPICStyleGOT())
+    return;
+
+  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+  
+  GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+  
+  if (Subtarget->isPICStyleRIPRel())
+    O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << uid << '\n';
+  else {
+    O << '-';
+    PrintPICBaseSymbol();
+    O << '\n';
+  }
+}
+
+
+void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+  PrintPICBaseSymbol();
+  O << '\n';
+  PrintPICBaseSymbol();
+  O << ':';
+}
+
+void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                           const MachineBasicBlock *MBB,
+                                           unsigned uid) const {
+  const char *JTEntryDirective = MJTI->getEntrySize() == 4 ?
+    MAI->getData32bitsDirective() : MAI->getData64bitsDirective();
+
+  O << JTEntryDirective << ' ';
+
+  if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) {
+    O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
+      << '_' << uid << "_set_" << MBB->getNumber();
+  } else if (Subtarget->isPICStyleGOT()) {
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+    O << "@GOTOFF";
+  } else
+    GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
+  unsigned Reg = MO.getReg();
+  switch (Mode) {
+  default: return true;  // Unknown mode.
+  case 'b': // Print QImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i8);
+    break;
+  case 'h': // Print QImode high register
+    Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+    break;
+  case 'w': // Print HImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i16);
+    break;
+  case 'k': // Print SImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i32);
+    break;
+  case 'q': // Print DImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i64);
+    break;
+  }
+
+  O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+  return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
-FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
-                                             X86TargetMachine &tm,
-                                             bool verbose) {
-  const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    const MachineOperand &MO = MI->getOperand(OpNo);
+    
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'a': // This is an address.  Currently only 'i' and 'r' are expected.
+      if (MO.isImm()) {
+        O << MO.getImm();
+        return false;
+      } 
+      if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+        printSymbolOperand(MO);
+        return false;
+      }
+      if (MO.isReg()) {
+        O << '(';
+        printOperand(MI, OpNo);
+        O << ')';
+        return false;
+      }
+      return true;
+
+    case 'c': // Don't print "$" before a global var name or constant.
+      if (MO.isImm())
+        O << MO.getImm();
+      else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+        printSymbolOperand(MO);
+      else
+        printOperand(MI, OpNo);
+      return false;
+
+    case 'A': // Print '*' before a register (it must be a register)
+      if (MO.isReg()) {
+        O << '*';
+        printOperand(MI, OpNo);
+        return false;
+      }
+      return true;
+
+    case 'b': // Print QImode register
+    case 'h': // Print QImode high register
+    case 'w': // Print HImode register
+    case 'k': // Print SImode register
+    case 'q': // Print DImode register
+      if (MO.isReg())
+        return printAsmMRegister(MO, ExtraCode[0]);
+      printOperand(MI, OpNo);
+      return false;
+
+    case 'P': // This is the operand of a call, treat specially.
+      print_pcrel_imm(MI, OpNo);
+      return false;
+
+    case 'n':  // Negate the immediate or print a '-' before the operand.
+      // Note: this is a temporary solution. It should be handled target
+      // independently as part of the 'MC' work.
+      if (MO.isImm()) {
+        O << -MO.getImm();
+        return false;
+      }
+      O << '-';
+    }
+  }
 
-  if (Subtarget->isFlavorIntel())
-    return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
-  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
+  printOperand(MI, OpNo);
+  return false;
 }
 
-namespace {
-  static struct Register {
-    Register() {
-      X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass);
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'b': // Print QImode register
+    case 'h': // Print QImode high register
+    case 'w': // Print HImode register
+    case 'k': // Print SImode register
+    case 'q': // Print SImode register
+      // These only apply to registers, ignore on mem.
+      break;
+    case 'P': // Don't print @PLT, but do print as memory.
+      printMemReference(MI, OpNo, "no-rip");
+      return false;
     }
-  } Registrator;
+  }
+  printMemReference(MI, OpNo);
+  return false;
+}
+
+
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
+/// AT&T syntax to the current output stream.
+///
+void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  ++EmittedInsts;
+
+  processDebugLoc(MI, true);
+  
+  printInstructionThroughMCStreamer(MI);
+  
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
+
+  processDebugLoc(MI, false);
 }
 
-extern "C" int X86AsmPrinterForceLink;
-int X86AsmPrinterForceLink = 0;
+void X86AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
+  if (!GVar->hasInitializer())
+    return;   // External global require no code
+  
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(GVar)) {
+    if (Subtarget->isTargetDarwin() &&
+        TM.getRelocationModel() == Reloc::Static) {
+      if (GVar->getName() == "llvm.global_ctors")
+        O << ".reference .constructors_used\n";
+      else if (GVar->getName() == "llvm.global_dtors")
+        O << ".reference .destructors_used\n";
+    }
+    return;
+  }
+  
+  const TargetData *TD = TM.getTargetData();
+
+  std::string name = Mang->getMangledName(GVar);
+  Constant *C = GVar->getInitializer();
+  const Type *Type = C->getType();
+  unsigned Size = TD->getTypeAllocSize(Type);
+  unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+  printVisibility(name, GVar->getVisibility());
+
+  if (Subtarget->isTargetELF())
+    O << "\t.type\t" << name << ",@object\n";
+
+  
+  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GVar, TM);
+  const MCSection *TheSection =
+    getObjFileLowering().SectionForGlobal(GVar, GVKind, Mang, TM);
+  OutStreamer.SwitchSection(TheSection);
+
+  // FIXME: get this stuff from section kind flags.
+  if (C->isNullValue() && !GVar->hasSection() &&
+      // Don't put things that should go in the cstring section into "comm".
+      !TheSection->getKind().isMergeableCString()) {
+    if (GVar->hasExternalLinkage()) {
+      if (const char *Directive = MAI->getZeroFillDirective()) {
+        O << "\t.globl " << name << '\n';
+        O << Directive << "__DATA, __common, " << name << ", "
+          << Size << ", " << Align << '\n';
+        return;
+      }
+    }
+
+    if (!GVar->isThreadLocal() &&
+        (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+      if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+
+      if (MAI->getLCOMMDirective() != NULL) {
+        if (GVar->hasLocalLinkage()) {
+          O << MAI->getLCOMMDirective() << name << ',' << Size;
+          if (Subtarget->isTargetDarwin())
+            O << ',' << Align;
+        } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) {
+          O << "\t.globl " << name << '\n'
+            << MAI->getWeakDefDirective() << name << '\n';
+          EmitAlignment(Align, GVar);
+          O << name << ":";
+          if (VerboseAsm) {
+            O.PadToColumn(MAI->getCommentColumn());
+            O << MAI->getCommentString() << ' ';
+            WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+          }
+          O << '\n';
+          EmitGlobalConstant(C);
+          return;
+        } else {
+          O << MAI->getCOMMDirective()  << name << ',' << Size;
+          if (MAI->getCOMMDirectiveTakesAlignment())
+            O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+        }
+      } else {
+        if (!Subtarget->isTargetCygMing()) {
+          if (GVar->hasLocalLinkage())
+            O << "\t.local\t" << name << '\n';
+        }
+        O << MAI->getCOMMDirective()  << name << ',' << Size;
+        if (MAI->getCOMMDirectiveTakesAlignment())
+          O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+      }
+      if (VerboseAsm) {
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << ' ';
+        WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+      }
+      O << '\n';
+      return;
+    }
+  }
+
+  switch (GVar->getLinkage()) {
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+    if (Subtarget->isTargetDarwin()) {
+      O << "\t.globl " << name << '\n'
+        << MAI->getWeakDefDirective() << name << '\n';
+    } else if (Subtarget->isTargetCygMing()) {
+      O << "\t.globl\t" << name << "\n"
+           "\t.linkonce same_size\n";
+    } else {
+      O << "\t.weak\t" << name << '\n';
+    }
+    break;
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
+    // If external or appending, declare as a global symbol
+    O << "\t.globl " << name << '\n';
+    // FALL THROUGH
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::InternalLinkage:
+     break;
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+
+  EmitAlignment(Align, GVar);
+  O << name << ":";
+  if (VerboseAsm){
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << ' ';
+    WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+  }
+  O << '\n';
+
+  EmitGlobalConstant(C);
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << name << ", " << Size << '\n';
+}
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin()) {
+    // All darwin targets use mach-o.
+    TargetLoweringObjectFileMachO &TLOFMacho = 
+      static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+    
+    MachineModuleInfoMachO &MMIMacho =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+    
+    // Output stubs for dynamically-linked functions.
+    MachineModuleInfoMachO::SymbolListTy Stubs;
+
+    Stubs = MMIMacho.GetFnStubList();
+    if (!Stubs.empty()) {
+      const MCSection *TheSection = 
+        TLOFMacho.getMachOSection("__IMPORT", "__jump_table",
+                                  MCSectionMachO::S_SYMBOL_STUBS |
+                                  MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+                                  MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                  5, SectionKind::getMetadata());
+      OutStreamer.SwitchSection(TheSection);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        Stubs[i].first->print(O, MAI);
+        O << ":\n" << "\t.indirect_symbol ";
+        // Get the MCSymbol without the $stub suffix.
+        Stubs[i].second->print(O, MAI);
+        O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n";
+      }
+      O << '\n';
+      
+      Stubs.clear();
+    }
+
+    // Output stubs for external and common global variables.
+    Stubs = MMIMacho.GetGVStubList();
+    if (!Stubs.empty()) {
+      const MCSection *TheSection = 
+        TLOFMacho.getMachOSection("__IMPORT", "__pointers",
+                                  MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                  SectionKind::getMetadata());
+      OutStreamer.SwitchSection(TheSection);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        Stubs[i].first->print(O, MAI);
+        O << ":\n\t.indirect_symbol ";
+        Stubs[i].second->print(O, MAI);
+        O << "\n\t.long\t0\n";
+      }
+      Stubs.clear();
+    }
+
+    Stubs = MMIMacho.GetHiddenGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+      EmitAlignment(2);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        Stubs[i].first->print(O, MAI);
+        O << ":\n" << MAI->getData32bitsDirective();
+        Stubs[i].second->print(O, MAI);
+        O << '\n';
+      }
+      Stubs.clear();
+    }
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    O << "\t.subsections_via_symbols\n";
+  }  
+  
+  if (Subtarget->isTargetCOFF()) {
+    // Necessary for dllexport support
+    std::vector<std::string> DLLExportedFns, DLLExportedGlobals;
+
+    X86COFFMachineModuleInfo &COFFMMI = 
+      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+    TargetLoweringObjectFileCOFF &TLOFCOFF = 
+      static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedFns.push_back(Mang->getMangledName(I));
+    
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedGlobals.push_back(Mang->getMangledName(I));
+    
+    if (Subtarget->isTargetCygMing()) {
+      // Emit type information for external functions
+      for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(),
+           E = COFFMMI.stub_end(); I != E; ++I) {
+        O << "\t.def\t " << I->getKeyData()
+        << ";\t.scl\t" << COFF::C_EXT
+        << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+        << ";\t.endef\n";
+      }
+    }
+  
+    // Output linker support code for dllexported globals on windows.
+    if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+      OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve",
+                                                        true,
+                                                   SectionKind::getMetadata()));
+    
+      for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i)
+        O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n";
+    
+      for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i)
+        O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n";
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI,
+                                             raw_ostream &O) {
+  if (SyntaxVariant == 0)
+    return new X86ATTInstPrinter(O, MAI);
+  if (SyntaxVariant == 1)
+    return new X86IntelInstPrinter(O, MAI);
+  return 0;
+}
 
 // Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() { }
+extern "C" void LLVMInitializeX86AsmPrinter() { 
+  RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+  RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+  
+  TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
new file mode 100644
index 000000000000..0351829b0856
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -0,0 +1,150 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "../X86.h"
+#include "../X86MachineFunctionInfo.h"
+#include "../X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+class MCContext;
+class MCInst;
+class MCStreamer;
+class MCSymbol;
+
+class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
+  const X86Subtarget *Subtarget;
+ public:
+  explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                            const MCAsmInfo *T, bool V)
+    : AsmPrinter(O, TM, T, V) {
+    Subtarget = &TM.getSubtarget<X86Subtarget>();
+  }
+
+  virtual const char *getPassName() const {
+    return "X86 AT&T-Style Assembly Printer";
+  }
+  
+  const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<MachineModuleInfo>();
+    AU.addRequired<DwarfWriter>();
+    AsmPrinter::getAnalysisUsage(AU);
+  }
+
+  
+  virtual void EmitEndOfAsmFile(Module &M);
+  
+  void printInstructionThroughMCStreamer(const MachineInstr *MI);
+
+
+  void printMCInst(const MCInst *MI);
+
+  void printSymbolOperand(const MachineOperand &MO);
+  
+  
+
+  // These methods are used by the tablegen'erated instruction printer.
+  void printOperand(const MachineInstr *MI, unsigned OpNo,
+                    const char *Modifier = 0);
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
+
+  void printopaquemem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+
+  void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64mem(const MachineInstr *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo, "subreg64");
+  }
+
+  bool printAsmMRegister(const MachineOperand &MO, char Mode);
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                             unsigned AsmVariant, const char *ExtraCode);
+
+  void printMachineInstruction(const MachineInstr *MI);
+  void printSSECC(const MachineInstr *MI, unsigned Op);
+  void printMemReference(const MachineInstr *MI, unsigned Op,
+                         const char *Modifier=NULL);
+  void printLeaMemReference(const MachineInstr *MI, unsigned Op,
+                            const char *Modifier=NULL);
+  void printPICJumpTableSetLabel(unsigned uid,
+                                 const MachineBasicBlock *MBB) const;
+  void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+                                 const MachineBasicBlock *MBB) const {
+    AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
+  }
+  void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                              const MachineBasicBlock *MBB,
+                              unsigned uid) const;
+
+  void printPICLabel(const MachineInstr *MI, unsigned Op);
+  void PrintGlobalVariable(const GlobalVariable* GVar);
+
+  void PrintPICBaseSymbol() const;
+  
+  bool runOnMachineFunction(MachineFunction &F);
+
+  void emitFunctionHeader(const MachineFunction &MF);
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..fde5902357b2
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,131 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define MachineInstr MCInst
+#define NO_ASM_WRITER_BOILERPLATE
+#include "X86GenAsmWriter1.inc"
+#undef MachineInstr
+
+void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: llvm_unreachable("Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+  for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+    O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     const char *Modifier) {
+  assert(Modifier == 0 && "Modifiers should not be used");
+  
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    PrintRegName(O, getRegisterName(Op.getReg()));
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  unsigned ScaleVal         = MI->getOperand(Op+1).getImm();
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  
+  O << '[';
+  
+  bool NeedPlus = false;
+  if (BaseReg.getReg()) {
+    printOperand(MI, Op);
+    NeedPlus = true;
+  }
+  
+  if (IndexReg.getReg()) {
+    if (NeedPlus) O << " + ";
+    if (ScaleVal != 1)
+      O << ScaleVal << '*';
+    printOperand(MI, Op+2);
+    NeedPlus = true;
+  }
+  
+ 
+  if (!DispSpec.isImm()) {
+    if (NeedPlus) O << " + ";
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    DispSpec.getExpr()->print(O, &MAI);
+  } else {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+      if (NeedPlus) {
+        if (DispVal > 0)
+          O << " + ";
+        else {
+          O << " - ";
+          DispVal = -DispVal;
+        }
+      }
+      O << DispVal;
+    }
+  }
+  
+  O << ']';
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+  // If this has a segment register, print it.
+  if (MI->getOperand(Op+4).getReg()) {
+    printOperand(MI, Op+4);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
new file mode 100644
index 000000000000..1976177eb13c
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,99 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  class MCOperand;
+  
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+  X86IntelInstPrinter(raw_ostream &O, const MCAsmInfo &MAI)
+    : MCInstPrinter(O, MAI) {}
+  
+  virtual void printInst(const MCInst *MI);
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI);
+  static const char *getRegisterName(unsigned RegNo);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo,
+                    const char *Modifier = 0);
+  void printMemReference(const MCInst *MI, unsigned Op);
+  void printLeaMemReference(const MCInst *MI, unsigned Op);
+  void printSSECC(const MCInst *MI, unsigned Op);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo) {
+    O << "OPAQUE PTR ";
+    printMemReference(MI, OpNo);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo) {
+    O << "BYTE PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo) {
+    O << "WORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo) {
+    O << "XWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
+  void printlea32mem(const MCInst *MI, unsigned OpNo) {
+    O << "DWORD PTR ";
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64mem(const MCInst *MI, unsigned OpNo) {
+    O << "QWORD PTR ";
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+    O << "QWORD PTR ";
+    printLeaMemReference(MI, OpNo);
+  }
+};
+  
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
new file mode 100644
index 000000000000..5ccddf57e7ab
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -0,0 +1,485 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCInstLower.h"
+#include "X86AsmPrinter.h"
+#include "X86MCAsmInfo.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+
+const X86Subtarget &X86MCInstLower::getSubtarget() const {
+  return AsmPrinter.getSubtarget();
+}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+  assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
+  return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); 
+}
+
+
+MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix()
+    << AsmPrinter.getFunctionNumber() << "$pb";
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
+/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an
+/// MCOperand.
+MCSymbol *X86MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  const GlobalValue *GV = MO.getGlobal();
+  
+  bool isImplicitlyPrivate = false;
+  if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+      MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+      MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+      MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+    isImplicitlyPrivate = true;
+  
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+  
+  if (getSubtarget().isTargetCygMing()) {
+    X86COFFMachineModuleInfo &COFFMMI = 
+      AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+    COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData());
+  }
+  
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:                // No flag.
+  case X86II::MO_PIC_BASE_OFFSET:        // Doesn't modify symbol name.
+    break;
+  case X86II::MO_DLLIMPORT: {
+    // Handle dllimport linkage.
+    const char *Prefix = "__imp_";
+    Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+    break;
+  }
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+    const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = Ctx.GetOrCreateSymbol(Name.str());
+    }
+    return Sym;
+  }
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = Ctx.GetOrCreateSymbol(Name.str());
+    }
+    return Sym;
+  }
+  case X86II::MO_DARWIN_STUB: {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = Ctx.GetOrCreateSymbol(Name.str());
+    }
+    return Sym;
+  }
+  // FIXME: These probably should be a modifier on the symbol or something??
+  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
+  case X86II::MO_GOT:       Name += "@GOT";       break;
+  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
+  case X86II::MO_PLT:       Name += "@PLT";       break;
+  }
+  
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *X86MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  SmallString<128> Name;
+  Name += AsmPrinter.MAI->getGlobalPrefix();
+  Name += MO.getSymbolName();
+  
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:                // No flag.
+  case X86II::MO_GOT_ABSOLUTE_ADDRESS:   // Doesn't modify symbol name.
+  case X86II::MO_PIC_BASE_OFFSET:        // Doesn't modify symbol name.
+    break;
+  case X86II::MO_DLLIMPORT: {
+    // Handle dllimport linkage.
+    const char *Prefix = "__imp_";
+    Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+    break;
+  }
+  case X86II::MO_DARWIN_STUB: {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
+
+    if (StubSym == 0) {
+      Name.erase(Name.end()-5, Name.end());
+      StubSym = Ctx.GetOrCreateSymbol(Name.str());
+    }
+    return Sym;
+  }
+  // FIXME: These probably should be a modifier on the symbol or something??
+  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
+  case X86II::MO_GOT:       Name += "@GOT";       break;
+  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
+  case X86II::MO_PLT:       Name += "@PLT";       break;
+  }
+  
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI"
+    << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex();
+  
+  switch (MO.getTargetFlags()) {
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    break;
+    // FIXME: These probably should be a modifier on the symbol or something??
+  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
+  case X86II::MO_GOT:       Name += "@GOT";       break;
+  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
+  case X86II::MO_PLT:       Name += "@PLT";       break;
+  }
+  
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *X86MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI"
+    << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex();
+  
+  switch (MO.getTargetFlags()) {
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    break;
+    // FIXME: These probably should be a modifier on the symbol or something??
+  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
+  case X86II::MO_GOT:       Name += "@GOT";       break;
+  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
+  case X86II::MO_PLT:       Name += "@PLT";       break;
+  }
+  
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                             MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+  
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+      
+  // These affect the name of the symbol, not any suffix.
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DLLIMPORT:
+  case X86II::MO_DARWIN_STUB:
+  case X86II::MO_TLSGD:
+  case X86II::MO_GOTTPOFF:
+  case X86II::MO_INDNTPOFF:
+  case X86II::MO_TPOFF:
+  case X86II::MO_NTPOFF:
+  case X86II::MO_GOTPCREL:
+  case X86II::MO_GOT:
+  case X86II::MO_GOTOFF:
+  case X86II::MO_PLT:
+    break;
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr, 
+                               MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
+                                   Ctx);
+    break;
+  }
+  
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg32.
+  unsigned Reg = MI->getOperand(OpNo).getReg();
+  if (Reg != 0)
+    MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg64.
+  for (unsigned i = 0; i != 4; ++i) {
+    if (!MI->getOperand(OpNo+i).isReg()) continue;
+    
+    unsigned Reg = MI->getOperand(OpNo+i).getReg();
+    if (Reg == 0) continue;
+    
+    MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+  }
+}
+
+
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                       AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+  
+  // Handle a few special cases to eliminate operand modifiers.
+  switch (OutMI.getOpcode()) {
+  case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+    lower_lea64_32mem(&OutMI, 1);
+    break;
+  case X86::MOV16r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX16rr8:
+    OutMI.setOpcode(X86::MOVZX32rr8);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX16rm8:
+    OutMI.setOpcode(X86::MOVZX32rm8);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVSX16rr8:
+    OutMI.setOpcode(X86::MOVSX32rr8);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVSX16rm8:
+    OutMI.setOpcode(X86::MOVSX32rm8);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX64rr32:
+    OutMI.setOpcode(X86::MOV32rr);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX64rm32:
+    OutMI.setOpcode(X86::MOV32rm);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOV64ri64i32:
+    OutMI.setOpcode(X86::MOV32ri);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX64rr8:
+    OutMI.setOpcode(X86::MOVZX32rr8);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX64rm8:
+    OutMI.setOpcode(X86::MOVZX32rm8);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX64rr16:
+    OutMI.setOpcode(X86::MOVZX32rr16);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOVZX64rm16:
+    OutMI.setOpcode(X86::MOVZX32rm16);
+    lower_subreg32(&OutMI, 0);
+    break;
+  }
+}
+
+
+
+void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
+  X86MCInstLower MCInstLowering(OutContext, Mang, *this);
+  switch (MI->getOpcode()) {
+  case TargetInstrInfo::DBG_LABEL:
+  case TargetInstrInfo::EH_LABEL:
+  case TargetInstrInfo::GC_LABEL:
+    printLabel(MI);
+    return;
+  case TargetInstrInfo::INLINEASM:
+    O << '\t';
+    printInlineAsm(MI);
+    return;
+  case TargetInstrInfo::IMPLICIT_DEF:
+    printImplicitDef(MI);
+    return;
+  case TargetInstrInfo::KILL:
+    return;
+  case X86::MOVPC32r: {
+    MCInst TmpInst;
+    // This is a pseudo op for a two instruction sequence with a label, which
+    // looks like:
+    //     call "L1$pb"
+    // "L1$pb":
+    //     popl %esi
+    
+    // Emit the call.
+    MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol();
+    TmpInst.setOpcode(X86::CALLpcrel32);
+    // FIXME: We would like an efficient form for this, so we don't have to do a
+    // lot of extra uniquing.
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
+                                                                 OutContext)));
+    printMCInst(&TmpInst);
+    O << '\n';
+    
+    // Emit the label.
+    OutStreamer.EmitLabel(PICBase);
+    
+    // popl $reg
+    TmpInst.setOpcode(X86::POP32r);
+    TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
+    printMCInst(&TmpInst);
+    return;
+  }
+      
+  case X86::ADD32ri: {
+    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+      break;
+    
+    // Okay, we have something like:
+    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+    
+    // For this, we want to print something like:
+    //   MYGLOBAL + (. - PICBASE)
+    // However, we can't generate a ".", so just emit a new label here and refer
+    // to it.  We know that this operand flag occurs at most once per function.
+    SmallString<64> Name;
+    raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+      << "picbaseref" << getFunctionNumber();
+    MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Name.str());
+    OutStreamer.EmitLabel(DotSym);
+    
+    // Now that we have emitted the label, lower the complex operand expression.
+    MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2));
+    
+    const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+    const MCExpr *PICBase =
+      MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext);
+    DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+    
+    DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), 
+                                      DotExpr, OutContext);
+    
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::ADD32ri);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
+    printMCInst(&TmpInst);
+    return;
+  }
+  }
+  
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  
+  
+  printMCInst(&TmpInst);
+}
+
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
new file mode 100644
index 000000000000..fa25b906d543
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -0,0 +1,54 @@
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCINSTLOWER_H
+#define X86_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+  class X86AsmPrinter;
+  class X86Subtarget;
+  
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class VISIBILITY_HIDDEN X86MCInstLower {
+  MCContext &Ctx;
+  Mangler *Mang;
+  X86AsmPrinter &AsmPrinter;
+
+  const X86Subtarget &getSubtarget() const;
+public:
+  X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter)
+    : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {}
+  
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCSymbol *GetPICBaseSymbol() const;
+  
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+  
+private:
+  MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 7ea0e5170d27..3ad65fbedc54 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -7,13 +7,15 @@ tablegen(X86GenInstrNames.inc -gen-instr-enums)
 tablegen(X86GenInstrInfo.inc -gen-instr-desc)
 tablegen(X86GenAsmWriter.inc -gen-asm-writer)
 tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
 tablegen(X86GenDAGISel.inc -gen-dag-isel)
 tablegen(X86GenFastISel.inc -gen-fast-isel)
 tablegen(X86GenCallingConv.inc -gen-callingconv)
 tablegen(X86GenSubtarget.inc -gen-subtarget)
 
-add_llvm_target(X86CodeGen
+set(sources
   X86CodeEmitter.cpp
+  X86COFFMachineModuleInfo.cpp
   X86ELFWriterInfo.cpp
   X86FloatingPoint.cpp
   X86FloatingPointRegKill.cpp
@@ -21,11 +23,19 @@ add_llvm_target(X86CodeGen
   X86ISelLowering.cpp
   X86InstrInfo.cpp
   X86JITInfo.cpp
+  X86MCAsmInfo.cpp
   X86RegisterInfo.cpp
   X86Subtarget.cpp
-  X86TargetAsmInfo.cpp
   X86TargetMachine.cpp
+  X86TargetObjectFile.cpp
   X86FastISel.cpp
   )
 
+if( CMAKE_CL_64 )
+  enable_language(ASM_MASM)
+  set(sources ${sources} X86CompilationCallback_Win64.asm)
+endif()
+
+add_llvm_target(X86CodeGen ${sources})
+
 target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 44f1c5d5a509..220831d88db3 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -13,11 +13,11 @@ TARGET = X86
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
                 X86GenRegisterInfo.inc X86GenInstrNames.inc \
-                X86GenInstrInfo.inc X86GenAsmWriter.inc \
+                X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
                 X86GenFastISel.inc \
                 X86GenCallingConv.inc X86GenSubtarget.inc
 
-DIRS = AsmPrinter
+DIRS = AsmPrinter AsmParser TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index ad12137c8913..e8f7c5d6dd22 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -249,3 +249,52 @@ lowered return value, and it would free non-C frontends from a
 complication only required by a C-based ABI.
 
 //===---------------------------------------------------------------------===//
+
+We get a redundant zero extension for code like this:
+
+int mask[1000];
+int foo(unsigned x) {
+ if (x < 10)
+   x = x * 45;
+ else
+   x = x * 78;
+ return mask[x];
+}
+
+_foo:
+LBB1_0:	## entry
+	cmpl	$9, %edi
+	jbe	LBB1_3	## bb
+LBB1_1:	## bb1
+	imull	$78, %edi, %eax
+LBB1_2:	## bb2
+	movl	%eax, %eax                    <----
+	movq	_mask@GOTPCREL(%rip), %rcx
+	movl	(%rcx,%rax,4), %eax
+	ret
+LBB1_3:	## bb
+	imull	$45, %edi, %eax
+	jmp	LBB1_2	## bb2
+  
+Before regalloc, we have:
+
+        %reg1025<def> = IMUL32rri8 %reg1024, 45, %EFLAGS<imp-def>
+        JMP mbb<bb2,0x203afb0>
+    Successors according to CFG: 0x203afb0 (#3)
+
+bb1: 0x203af60, LLVM BB @0x1e02310, ID#2:
+    Predecessors according to CFG: 0x203aec0 (#0)
+        %reg1026<def> = IMUL32rri8 %reg1024, 78, %EFLAGS<imp-def>
+    Successors according to CFG: 0x203afb0 (#3)
+
+bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3:
+    Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2)
+        %reg1027<def> = PHI %reg1025, mbb<bb,0x203af10>,
+                            %reg1026, mbb<bb1,0x203af60>
+        %reg1029<def> = MOVZX64rr32 %reg1027
+
+so we'd have to know that IMUL32rri8 leaves the high word zero extended and to
+be able to recognize the zero extend.  This could also presumably be implemented
+if we have whole-function selectiondags.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 4464878ce217..046d35ce5b69 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1932,3 +1932,23 @@ Replacing an icmp+select with a shift should always be considered profitable in
 instcombine.
 
 //===---------------------------------------------------------------------===//
+
+Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch
+properly.
+
+When the return value is not used (i.e. only care about the value in the
+memory), x86 does not have to use add to implement these. Instead, it can use
+add, sub, inc, dec instructions with the "lock" prefix.
+
+This is currently implemented using a bit of instruction selection trick. The
+issue is the target independent pattern produces one output and a chain and we
+want to map it into one that just output a chain. The current trick is to select
+it into a MERGE_VALUES with the first definition being an implicit_def. The
+proper solution is to add new ISD opcodes for the no-output variant. DAG
+combiner can then transform the node before it gets to target node selection.
+
+Problem #2 is we are adding a whole bunch of x86 atomic instructions when in
+fact these instructions are identical to the non-lock versions. We need a way to
+add target specific information to target nodes and have this information
+carried over to machine instructions. Asm printer (or JIT) can use this
+information to add the "lock" prefix.
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..90be9f58cc73
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Info
+  X86TargetInfo.cpp
+  )
+
+add_dependencies(LLVMX86Info X86CodeGenTable_gen)
diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile
new file mode 100644
index 000000000000..6677d4bdfde1
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 000000000000..08d4d84f8a8a
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() { 
+  RegisterTarget<Triple::x86, /*HasJIT=*/true>
+    X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+  RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+    Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 22de3f642563..a1671185afb4 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -22,8 +22,10 @@ namespace llvm {
 class X86TargetMachine;
 class FunctionPass;
 class MachineCodeEmitter;
+class MCCodeEmitter;
 class JITCodeEmitter;
-class raw_ostream;
+class Target;
+class formatted_raw_ostream;
 
 /// createX86ISelDag - This pass converts a legalized DAG into a 
 /// X86-specific DAG, ready for instruction scheduling.
@@ -42,13 +44,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
 ///
 FunctionPass *createX87FPRegKillInserterPass();
 
-/// createX86CodePrinterPass - Returns a pass that prints the X86
-/// assembly code for a MachineFunction to the given output stream,
-/// using the given target machine description.
-///
-FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm,
-                                       bool Verbose);
-
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
 /// to the specified MCE object.
 
@@ -56,6 +51,10 @@ FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                           JITCodeEmitter &JCE);
+FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM,
+                                             ObjectCodeEmitter &OCE);
+
+MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
 
 /// createX86EmitCodeToMemory - Returns a pass that converts a register
 /// allocated function into raw machine code in a dynamically
@@ -68,6 +67,8 @@ FunctionPass *createEmitX86CodeToMemory();
 ///
 FunctionPass *createX86MaxStackAlignmentCalculatorPass();
 
+extern Target TheX86_32Target, TheX86_64Target;
+
 } // End llvm namespace
 
 // Defines symbolic names for X86 registers.  This defines a mapping from
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 47861d5a67dc..da467fe6aa72 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -19,12 +19,17 @@ include "llvm/Target/Target.td"
 //===----------------------------------------------------------------------===//
 // X86 Subtarget features.
 //===----------------------------------------------------------------------===//
- 
+
+def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
+                                      "Enable conditional move instructions">;
+
 def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
                                       "Enable MMX instructions">;
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
                                       "Enable SSE instructions",
-                                      [FeatureMMX]>;
+                                      // SSE codegen depends on cmovs, and all
+                                      // SSE1+ processors support them. 
+                                      [FeatureMMX, FeatureCMOV]>;
 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                       "Enable SSE2 instructions",
                                       [FeatureSSE1]>;
@@ -76,8 +81,8 @@ def : Proc<"i586",            []>;
 def : Proc<"pentium",         []>;
 def : Proc<"pentium-mmx",     [FeatureMMX]>;
 def : Proc<"i686",            []>;
-def : Proc<"pentiumpro",      []>;
-def : Proc<"pentium2",        [FeatureMMX]>;
+def : Proc<"pentiumpro",      [FeatureCMOV]>;
+def : Proc<"pentium2",        [FeatureMMX, FeatureCMOV]>;
 def : Proc<"pentium3",        [FeatureSSE1]>;
 def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
 def : Proc<"pentium4",        [FeatureSSE2]>;
@@ -178,21 +183,34 @@ include "X86CallingConv.td"
 // Assembly Printers
 //===----------------------------------------------------------------------===//
 
+// Currently the X86 assembly parser only supports ATT syntax.
+def ATTAsmParser : AsmParser {
+  string AsmParserClassName  = "ATTAsmParser";
+  int Variant = 0;
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = "#";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "%";
+}
+
 // The X86 target supports two different syntaxes for emitting machine code.
 // This is controlled by the -x86-asm-syntax={att|intel}
 def ATTAsmWriter : AsmWriter {
-  string AsmWriterClassName  = "ATTAsmPrinter";
+  string AsmWriterClassName  = "ATTInstPrinter";
   int Variant = 0;
 }
 def IntelAsmWriter : AsmWriter {
-  string AsmWriterClassName  = "IntelAsmPrinter";
+  string AsmWriterClassName  = "IntelInstPrinter";
   int Variant = 1;
 }
 
-
 def X86 : Target {
   // Information about the instructions...
   let InstructionSet = X86InstrInfo;
 
+  let AssemblyParsers = [ATTAsmParser];
+
   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
 }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 000000000000..01c4fcfa1bfe
--- /dev/null
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,123 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) {
+}
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+  
+}
+
+void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F,
+                                            const X86MachineFunctionInfo &Val) {
+  FunctionInfoMap[F] = Val;
+}
+
+
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+                                                    const TargetData &TD) {
+  X86MachineFunctionInfo Info;
+  uint64_t Size = 0;
+  
+  switch (F->getCallingConv()) {
+  case CallingConv::X86_StdCall:
+    Info.setDecorationStyle(StdCall);
+    break;
+  case CallingConv::X86_FastCall:
+    Info.setDecorationStyle(FastCall);
+    break;
+  default:
+    return Info;
+  }
+  
+  unsigned argNum = 1;
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI, ++argNum) {
+    const Type* Ty = AI->getType();
+    
+    // 'Dereference' type in case of byval parameter attribute
+    if (F->paramHasAttr(argNum, Attribute::ByVal))
+      Ty = cast<PointerType>(Ty)->getElementType();
+    
+    // Size should be aligned to DWORD boundary
+    Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+  }
+  
+  // We're not supporting tooooo huge arguments :)
+  Info.setBytesToPopOnReturn((unsigned int)Size);
+  return Info;
+}
+
+
+/// DecorateCygMingName - Query FunctionInfoMap and use this information for
+/// various name decorations for Cygwin and MingW.
+void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name,
+                                                   const GlobalValue *GV,
+                                                   const TargetData &TD) {
+  const Function *F = dyn_cast<Function>(GV);
+  if (!F) return;
+  
+  // Save function name for later type emission.
+  if (F->isDeclaration())
+    CygMingStubs.insert(StringRef(Name.data(), Name.size()));
+  
+  // We don't want to decorate non-stdcall or non-fastcall functions right now
+  CallingConv::ID CC = F->getCallingConv();
+  if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+    return;
+  
+  const X86MachineFunctionInfo *Info;
+  
+  FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+  if (info_item == FunctionInfoMap.end()) {
+    // Calculate apropriate function info and populate map
+    FunctionInfoMap[F] = calculateFunctionInfo(F, TD);
+    Info = &FunctionInfoMap[F];
+  } else {
+    Info = &info_item->second;
+  }
+  
+  if (Info->getDecorationStyle() == None) return;
+  const FunctionType *FT = F->getFunctionType();
+  
+  // "Pure" variadic functions do not receive @0 suffix.
+  if (!FT->isVarArg() || FT->getNumParams() == 0 ||
+      (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+    raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn();
+  
+  if (Info->getDecorationStyle() == FastCall) {
+    if (Name[0] == '_')
+      Name[0] = '@';
+    else
+      Name.insert(Name.begin(), '@');
+  }    
+}
+
+/// DecorateCygMingName - Query FunctionInfoMap and use this information for
+/// various name decorations for Cygwin and MingW.
+void X86COFFMachineModuleInfo::DecorateCygMingName(std::string &Name,
+                                                   const GlobalValue *GV,
+                                                   const TargetData &TD) {
+  SmallString<128> NameStr(Name.begin(), Name.end());
+  DecorateCygMingName(NameStr, GV, TD);
+  Name.assign(NameStr.begin(), NameStr.end());
+}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 000000000000..afd552563d91
--- /dev/null
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,67 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/StringSet.h"
+
+namespace llvm {
+  class X86MachineFunctionInfo;
+  class TargetData;
+  
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+  StringSet<> CygMingStubs;
+  
+  // We have to propagate some information about MachineFunction to
+  // AsmPrinter. It's ok, when we're printing the function, since we have
+  // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+  // Unfortunately, this is not possible when we're printing reference to
+  // Function (e.g. calling it and so on). Even more, there is no way to get the
+  // corresponding MachineFunctions: it can even be not created at all. That's
+  // why we should use additional structure, when we're collecting all necessary
+  // information.
+  //
+  // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+  // function, since we have to use arguments' size for decoration.
+  typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+  FMFInfoMap FunctionInfoMap;
+  
+public:
+  X86COFFMachineModuleInfo(const MachineModuleInfo &);
+  ~X86COFFMachineModuleInfo();
+  
+  
+  void DecorateCygMingName(std::string &Name, const GlobalValue *GV,
+                           const TargetData &TD);
+  void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV,
+                           const TargetData &TD);
+  
+  void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val);
+  
+
+  typedef StringSet<>::const_iterator stub_iterator;
+  stub_iterator stub_begin() const { return CygMingStubs.begin(); }
+  stub_iterator stub_end() const { return CygMingStubs.end(); }
+
+  
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index e9fcbd5a4895..d77f0390b10c 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[
 // X86-Win64 C return-value convention.
 def RetCC_X86_Win64_C : CallingConv<[
   // The X86-Win64 calling convention always returns __m64 values in RAX.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>,
+  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
 
   // And FP in XMM0 only.
   CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -137,26 +137,26 @@ def CC_X86_64_C : CallingConv<[
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
 
+  // The first 6 v1i64 vector arguments are passed in GPRs on Darwin.
+  CCIfType<[v1i64],
+            CCIfSubtarget<"isTargetDarwin()",
+            CCBitConvertToType<i64>>>,
+
   // The first 6 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
   CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
-  
-  // The first 8 FP/Vector arguments are passed in XMM registers.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasSSE1()",
-            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
 
   // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
   // registers on Darwin.
   CCIfType<[v8i8, v4i16, v2i32, v2f32],
             CCIfSubtarget<"isTargetDarwin()",
             CCIfSubtarget<"hasSSE2()",
-            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>,
+            CCPromoteToType<v2i64>>>>,
 
-  // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
-  CCIfType<[v1i64],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+  // The first 8 FP/Vector arguments are passed in XMM registers.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCIfSubtarget<"hasSSE1()",
+            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
  
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
@@ -184,6 +184,13 @@ def CC_X86_Win64_C : CallingConv<[
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
 
+  // 128 bit vectors are passed by pointer
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+  // The first 4 MMX vector arguments are passed in GPRs.
+  CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+           CCBitConvertToType<i64>>,
+
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
                                           [XMM0, XMM1, XMM2, XMM3]>>,
@@ -195,24 +202,16 @@ def CC_X86_Win64_C : CallingConv<[
            CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
                                    [RCX , RDX , R8  , R9  ]>>,
 
-  // The first 4 MMX vector arguments are passed in GPRs.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
-           CCAssignToRegWithShadow<[RCX , RDX , R8  , R9  ],
-                                   [XMM0, XMM1, XMM2, XMM3]>>,
-
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
-  // 16-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
 
   // Long doubles get stack slots whose size and alignment depends on the
   // subtarget.
   CCIfType<[f80], CCAssignToStack<0, 0>>,
 
-  // Vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
-  // __m64 vectors get 8-byte stack slots that are 16-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
+  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
 ]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index d5846a049afb..f942f3f85107 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -22,21 +22,27 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 STATISTIC(NumEmitted, "Number of machine instructions emitted");
 
 namespace {
-template<class CodeEmitter>
+  template<class CodeEmitter>
   class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
     const X86InstrInfo  *II;
     const TargetData    *TD;
@@ -67,6 +73,7 @@ template<class CodeEmitter>
                          const TargetInstrDesc *Desc);
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
       AU.addRequired<MachineModuleInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -83,7 +90,7 @@ template<class CodeEmitter>
                               intptr_t PCAdj = 0);
 
     void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
-                               intptr_t PCAdj = 0);
+                               intptr_t Adj = 0, bool IsPCRel = true);
 
     void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
     void emitRegModRMByte(unsigned RegOpcodeField);
@@ -95,29 +102,27 @@ template<class CodeEmitter>
                           intptr_t PCAdj = 0);
 
     unsigned getX86RegNum(unsigned RegNo) const;
-
-    bool gvNeedsNonLazyPtr(const GlobalValue *GV);
   };
 
 template<class CodeEmitter>
   char Emitter<CodeEmitter>::ID = 0;
-}
+} // end anonymous namespace.
 
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
 /// to the specified templated MachineCodeEmitter object.
 
-namespace llvm {
-	
-FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
-                                       MachineCodeEmitter &MCE) {
+FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM,
+                                             MachineCodeEmitter &MCE) {
   return new Emitter<MachineCodeEmitter>(TM, MCE);
 }
-FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
-                                          JITCodeEmitter &JCE) {
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
-
-} // end namespace llvm
+FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM,
+                                                   ObjectCodeEmitter &OCE) {
+  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+}
 
 template<class CodeEmitter>
 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
@@ -130,7 +135,8 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   IsPIC = TM.getRelocationModel() == Reloc::PIC_;
   
   do {
-    DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+    DEBUG(errs() << "JITTing function '" 
+          << MF.getFunction()->getName() << "'\n");
     MCE.startFunction(MF);
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
          MBB != E; ++MBB) {
@@ -172,7 +178,7 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
                                 intptr_t PCAdj /* = 0 */,
                                 bool NeedStub /* = false */,
                                 bool Indirect /* = false */) {
-  intptr_t RelocCST = 0;
+  intptr_t RelocCST = Disp;
   if (Reloc == X86::reloc_picrel_word)
     RelocCST = PICBaseOffset;
   else if (Reloc == X86::reloc_pcrel_word)
@@ -291,53 +297,61 @@ static bool isDisp8(int Value) {
   return Value == (signed char)Value;
 }
 
-template<class CodeEmitter>
-bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) {
-  // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+                              const TargetMachine &TM) {
+  // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
   // mechanism as 32-bit mode.
-  return (!Is64BitMode || TM.getSubtarget<X86Subtarget>().isTargetDarwin()) &&
-    TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
+  if (TM.getSubtarget<X86Subtarget>().is64Bit() && 
+      !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+    return false;
+  
+  // Return true if this is a reference to a stub containing the address of the
+  // global, not the global itself.
+  return isGlobalStubReference(GVOp.getTargetFlags());
 }
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
-                                                 int DispVal, intptr_t PCAdj) {
+                                                 int DispVal,
+                                                 intptr_t Adj /* = 0 */,
+                                                 bool IsPCRel /* = true */) {
   // If this is a simple integer displacement that doesn't require a relocation,
   // emit it now.
   if (!RelocOp) {
     emitConstant(DispVal, 4);
     return;
   }
-  
+
   // Otherwise, this is something that requires a relocation.  Emit it as such
   // now.
+  unsigned RelocType = Is64BitMode ?
+    (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+    : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
   if (RelocOp->isGlobal()) {
     // In 64-bit static small code model, we could potentially emit absolute.
-    // But it's probably not beneficial.
+    // But it's probably not beneficial. If the MCE supports using RIP directly
+    // do it, otherwise fallback to absolute (this is determined by IsPCRel). 
     //  89 05 00 00 00 00     mov    %eax,0(%rip)  # PC-relative
     //  89 04 25 00 00 00 00  mov    %eax,0x0      # Absolute
-    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
     bool NeedStub = isa<Function>(RelocOp->getGlobal());
-    bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal());
-    emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(),
-                      PCAdj, NeedStub, Indirect);
+    bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+    emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+                      Adj, NeedStub, Indirect);
+  } else if (RelocOp->isSymbol()) {
+    emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
   } else if (RelocOp->isCPI()) {
-    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
-    emitConstPoolAddress(RelocOp->getIndex(), rt,
-                         RelocOp->getOffset(), PCAdj);
-  } else if (RelocOp->isJTI()) {
-    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
-    emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj);
+    emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+                         RelocOp->getOffset(), Adj);
   } else {
-    assert(0 && "Unknown value to relocate!");
+    assert(RelocOp->isJTI() && "Unexpected machine operand!");
+    emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
   }
 }
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
-                               unsigned Op, unsigned RegOpcodeField,
-                               intptr_t PCAdj) {
+                                            unsigned Op,unsigned RegOpcodeField,
+                                            intptr_t PCAdj) {
   const MachineOperand &Op3 = MI.getOperand(Op+3);
   int DispVal = 0;
   const MachineOperand *DispForReloc = 0;
@@ -345,15 +359,17 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   // Figure out what sort of displacement we have to handle here.
   if (Op3.isGlobal()) {
     DispForReloc = &Op3;
+  } else if (Op3.isSymbol()) {
+    DispForReloc = &Op3;
   } else if (Op3.isCPI()) {
-    if (Is64BitMode || IsPIC) {
+    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
       DispForReloc = &Op3;
     } else {
       DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
       DispVal += Op3.getOffset();
     }
   } else if (Op3.isJTI()) {
-    if (Is64BitMode || IsPIC) {
+    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
       DispForReloc = &Op3;
     } else {
       DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
@@ -368,17 +384,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
 
   unsigned BaseReg = Base.getReg();
 
+  // Indicate that the displacement will use an pcrel or absolute reference
+  // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+  // while others, unless explicit asked to use RIP, use absolute references.
+  bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
   // Is a SIB byte needed?
+  // If no BaseReg, issue a RIP relative instruction only if the MCE can 
+  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+  // 2-7) and absolute references.
   if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
-      IndexReg.getReg() == 0 &&
-      (BaseReg == 0 || BaseReg == X86::RIP ||
-       getX86RegNum(BaseReg) != N86::ESP)) {
-    if (BaseReg == 0 ||
-        BaseReg == X86::RIP) {  // Just a displacement?
+      IndexReg.getReg() == 0 && 
+      ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP || 
+       (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) {
+    if (BaseReg == 0 || BaseReg == X86::RIP) {  // Just a displacement?
       // Emit special case [disp32] encoding
       MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
-      
-      emitDisplacementField(DispForReloc, DispVal, PCAdj);
+      emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
     } else {
       unsigned BaseRegNo = getX86RegNum(BaseReg);
       if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
@@ -391,7 +413,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
       } else {
         // Emit the most general non-SIB encoding: [REG+disp32]
         MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
-        emitDisplacementField(DispForReloc, DispVal, PCAdj);
+        emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
       }
     }
 
@@ -427,13 +449,13 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
     unsigned SS = SSTable[Scale.getImm()];
 
     if (BaseReg == 0) {
-      // Handle the SIB byte for the case where there is no base.  The
-      // displacement has already been output.
+      // Handle the SIB byte for the case where there is no base, see Intel 
+      // Manual 2A, table 2-7. The displacement has already been output.
       unsigned IndexRegNo;
       if (IndexReg.getReg())
         IndexRegNo = getX86RegNum(IndexReg.getReg());
-      else
-        IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+      else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+        IndexRegNo = 4;
       emitSIBByte(SS, IndexRegNo, 5);
     } else {
       unsigned BaseRegNo = getX86RegNum(BaseReg);
@@ -449,21 +471,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
     if (ForceDisp8) {
       emitConstant(DispVal, 1);
     } else if (DispVal != 0 || ForceDisp32) {
-      emitDisplacementField(DispForReloc, DispVal, PCAdj);
+      emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
     }
   }
 }
 
 template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(
-                              const MachineInstr &MI,
-                              const TargetInstrDesc *Desc) {
-  DOUT << MI;
+void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
+                                           const TargetInstrDesc *Desc) {
+  DEBUG(errs() << MI);
+
+  MCE.processDebugLoc(MI.getDebugLoc(), true);
 
   unsigned Opcode = Desc->Opcode;
 
   // Emit the lock opcode prefix as needed.
-  if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0);
+  if (Desc->TSFlags & X86II::LOCK)
+    MCE.emitByte(0xF0);
 
   // Emit segment override opcode prefix as needed.
   switch (Desc->TSFlags & X86II::SegOvrMask) {
@@ -473,18 +497,21 @@ void Emitter<CodeEmitter>::emitInstruction(
   case X86II::GS:
     MCE.emitByte(0x65);
     break;
-  default: assert(0 && "Invalid segment!");
+  default: llvm_unreachable("Invalid segment!");
   case 0: break;  // No segment override!
   }
 
   // Emit the repeat opcode prefix as needed.
-  if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+  if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+    MCE.emitByte(0xF3);
 
   // Emit the operand size opcode prefix as needed.
-  if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66);
+  if (Desc->TSFlags & X86II::OpSize)
+    MCE.emitByte(0x66);
 
   // Emit the address size opcode prefix as needed.
-  if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67);
+  if (Desc->TSFlags & X86II::AdSize)
+    MCE.emitByte(0x67);
 
   bool Need0FPrefix = false;
   switch (Desc->TSFlags & X86II::Op0Mask) {
@@ -493,6 +520,10 @@ void Emitter<CodeEmitter>::emitInstruction(
   case X86II::TA:  // 0F 3A
     Need0FPrefix = true;
     break;
+  case X86II::TF: // F2 0F 38
+    MCE.emitByte(0xF2);
+    Need0FPrefix = true;
+    break;
   case X86II::REP: break; // already handled.
   case X86II::XS:   // F3 0F
     MCE.emitByte(0xF3);
@@ -508,14 +539,13 @@ void Emitter<CodeEmitter>::emitInstruction(
                  (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
                                    >> X86II::Op0Shift));
     break; // Two-byte opcode prefix
-  default: assert(0 && "Invalid prefix!");
+  default: llvm_unreachable("Invalid prefix!");
   case 0: break;  // No prefix!
   }
 
+  // Handle REX prefix.
   if (Is64BitMode) {
-    // REX prefix
-    unsigned REX = X86InstrInfo::determineREX(MI);
-    if (REX)
+    if (unsigned REX = X86InstrInfo::determineREX(MI))
       MCE.emitByte(0x40 | REX);
   }
 
@@ -524,7 +554,8 @@ void Emitter<CodeEmitter>::emitInstruction(
     MCE.emitByte(0x0F);
 
   switch (Desc->TSFlags & X86II::Op0Mask) {
-  case X86II::T8:  // 0F 38
+  case X86II::TF:    // F2 0F 38
+  case X86II::T8:    // 0F 38
     MCE.emitByte(0x38);
     break;
   case X86II::TA:    // 0F 3A
@@ -543,29 +574,29 @@ void Emitter<CodeEmitter>::emitInstruction(
 
   unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc);
   switch (Desc->TSFlags & X86II::FormMask) {
-  default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+  default:
+    llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
   case X86II::Pseudo:
     // Remember the current PC offset, this is the PIC relocation
     // base address.
     switch (Opcode) {
     default: 
-      assert(0 && "psuedo instructions should be removed before code emission");
+      llvm_unreachable("psuedo instructions should be removed before code"
+                       " emission");
       break;
-    case TargetInstrInfo::INLINEASM: {
+    case TargetInstrInfo::INLINEASM:
       // We allow inline assembler nodes with empty bodies - they can
       // implicitly define registers, which is ok for JIT.
-      if (MI.getOperand(0).getSymbolName()[0]) {
-        assert(0 && "JIT does not support inline asm!\n");
-        abort();
-      }
+      if (MI.getOperand(0).getSymbolName()[0])
+        llvm_report_error("JIT does not support inline asm!");
       break;
-    }
     case TargetInstrInfo::DBG_LABEL:
     case TargetInstrInfo::EH_LABEL:
+    case TargetInstrInfo::GC_LABEL:
       MCE.emitLabel(MI.getOperand(0).getImm());
       break;
     case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::DECLARE:
+    case TargetInstrInfo::KILL:
     case X86::DWARF_LOC:
     case X86::FP_REG_KILL:
       break;
@@ -582,73 +613,86 @@ void Emitter<CodeEmitter>::emitInstruction(
     }
     CurOp = NumOps;
     break;
-  case X86II::RawFrm:
+  case X86II::RawFrm: {
     MCE.emitByte(BaseOpcode);
 
-    if (CurOp != NumOps) {
-      const MachineOperand &MO = MI.getOperand(CurOp++);
-
-      DOUT << "RawFrm CurOp " << CurOp << "\n";
-      DOUT << "isMBB " << MO.isMBB() << "\n";
-      DOUT << "isGlobal " << MO.isGlobal() << "\n";
-      DOUT << "isSymbol " << MO.isSymbol() << "\n";
-      DOUT << "isImm " << MO.isImm() << "\n";
-
-      if (MO.isMBB()) {
-        emitPCRelativeBlockAddress(MO.getMBB());
-      } else if (MO.isGlobal()) {
-        // Assume undefined functions may be outside the Small codespace.
-        bool NeedStub = 
-          (Is64BitMode && 
-              (TM.getCodeModel() == CodeModel::Large ||
-               TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
-          Opcode == X86::TAILJMPd;
-        emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
-                          MO.getOffset(), 0, NeedStub);
-      } else if (MO.isSymbol()) {
-        emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
-      } else if (MO.isImm()) {
-        if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
-          // Fix up immediate operand for pc relative calls.
-          intptr_t Imm = (intptr_t)MO.getImm();
-          Imm = Imm - MCE.getCurrentPCValue() - 4;
-          emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
-        } else
-          emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
-      } else {
-        assert(0 && "Unknown RawFrm operand!");
-      }
+    if (CurOp == NumOps)
+      break;
+      
+    const MachineOperand &MO = MI.getOperand(CurOp++);
+
+    DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n");
+    DEBUG(errs() << "isMBB " << MO.isMBB() << "\n");
+    DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n");
+    DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n");
+    DEBUG(errs() << "isImm " << MO.isImm() << "\n");
+
+    if (MO.isMBB()) {
+      emitPCRelativeBlockAddress(MO.getMBB());
+      break;
     }
+    
+    if (MO.isGlobal()) {
+      // Assume undefined functions may be outside the Small codespace.
+      bool NeedStub = 
+        (Is64BitMode && 
+            (TM.getCodeModel() == CodeModel::Large ||
+             TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
+        Opcode == X86::TAILJMPd;
+      emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+                        MO.getOffset(), 0, NeedStub);
+      break;
+    }
+    
+    if (MO.isSymbol()) {
+      emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+      break;
+    }
+    
+    assert(MO.isImm() && "Unknown RawFrm operand!");
+    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+      // Fix up immediate operand for pc relative calls.
+      intptr_t Imm = (intptr_t)MO.getImm();
+      Imm = Imm - MCE.getCurrentPCValue() - 4;
+      emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
+    } else
+      emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
     break;
-
-  case X86II::AddRegFrm:
+  }
+      
+  case X86II::AddRegFrm: {
     MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
     
-    if (CurOp != NumOps) {
-      const MachineOperand &MO1 = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
-      if (MO1.isImm())
-        emitConstant(MO1.getImm(), Size);
-      else {
-        unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-          : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-        // This should not occur on Darwin for relocatable objects.
-        if (Opcode == X86::MOV64ri)
-          rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
-        if (MO1.isGlobal()) {
-          bool NeedStub = isa<Function>(MO1.getGlobal());
-          bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
-          emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                            NeedStub, Indirect);
-        } else if (MO1.isSymbol())
-          emitExternalSymbolAddress(MO1.getSymbolName(), rt);
-        else if (MO1.isCPI())
-          emitConstPoolAddress(MO1.getIndex(), rt);
-        else if (MO1.isJTI())
-          emitJumpTableAddress(MO1.getIndex(), rt);
-      }
+    if (CurOp == NumOps)
+      break;
+      
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    if (MO1.isImm()) {
+      emitConstant(MO1.getImm(), Size);
+      break;
     }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64ri64i32)
+      rt = X86::reloc_absolute_word;  // FIXME: add X86II flag?
+    // This should not occur on Darwin for relocatable objects.
+    if (Opcode == X86::MOV64ri)
+      rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+    if (MO1.isGlobal()) {
+      bool NeedStub = isa<Function>(MO1.getGlobal());
+      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                        NeedStub, Indirect);
+    } else if (MO1.isSymbol())
+      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+    else if (MO1.isCPI())
+      emitConstPoolAddress(MO1.getIndex(), rt);
+    else if (MO1.isJTI())
+      emitJumpTableAddress(MO1.getIndex(), rt);
     break;
+  }
 
   case X86II::MRMDestReg: {
     MCE.emitByte(BaseOpcode);
@@ -656,7 +700,8 @@ void Emitter<CodeEmitter>::emitInstruction(
                      getX86RegNum(MI.getOperand(CurOp+1).getReg()));
     CurOp += 2;
     if (CurOp != NumOps)
-      emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86InstrInfo::sizeOfImm(Desc));
     break;
   }
   case X86II::MRMDestMem: {
@@ -666,7 +711,8 @@ void Emitter<CodeEmitter>::emitInstruction(
                                   .getReg()));
     CurOp +=  X86AddrNumOperands + 1;
     if (CurOp != NumOps)
-      emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86InstrInfo::sizeOfImm(Desc));
     break;
   }
 
@@ -729,29 +775,31 @@ void Emitter<CodeEmitter>::emitInstruction(
                        (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
     }
 
-    if (CurOp != NumOps) {
-      const MachineOperand &MO1 = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
-      if (MO1.isImm())
-        emitConstant(MO1.getImm(), Size);
-      else {
-        unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-          : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-        if (Opcode == X86::MOV64ri32)
-          rt = X86::reloc_absolute_word;  // FIXME: add X86II flag?
-        if (MO1.isGlobal()) {
-          bool NeedStub = isa<Function>(MO1.getGlobal());
-          bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
-          emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                            NeedStub, Indirect);
-        } else if (MO1.isSymbol())
-          emitExternalSymbolAddress(MO1.getSymbolName(), rt);
-        else if (MO1.isCPI())
-          emitConstPoolAddress(MO1.getIndex(), rt);
-        else if (MO1.isJTI())
-          emitJumpTableAddress(MO1.getIndex(), rt);
-      }
+    if (CurOp == NumOps)
+      break;
+    
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    if (MO1.isImm()) {
+      emitConstant(MO1.getImm(), Size);
+      break;
     }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64ri32)
+      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
+    if (MO1.isGlobal()) {
+      bool NeedStub = isa<Function>(MO1.getGlobal());
+      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                        NeedStub, Indirect);
+    } else if (MO1.isSymbol())
+      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+    else if (MO1.isCPI())
+      emitConstPoolAddress(MO1.getIndex(), rt);
+    else if (MO1.isJTI())
+      emitJumpTableAddress(MO1.getIndex(), rt);
     break;
   }
 
@@ -768,29 +816,31 @@ void Emitter<CodeEmitter>::emitInstruction(
                      PCAdj);
     CurOp += X86AddrNumOperands;
 
-    if (CurOp != NumOps) {
-      const MachineOperand &MO = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
-      if (MO.isImm())
-        emitConstant(MO.getImm(), Size);
-      else {
-        unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-          : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-        if (Opcode == X86::MOV64mi32)
-          rt = X86::reloc_absolute_word;  // FIXME: add X86II flag?
-        if (MO.isGlobal()) {
-          bool NeedStub = isa<Function>(MO.getGlobal());
-          bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal());
-          emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
-                            NeedStub, Indirect);
-        } else if (MO.isSymbol())
-          emitExternalSymbolAddress(MO.getSymbolName(), rt);
-        else if (MO.isCPI())
-          emitConstPoolAddress(MO.getIndex(), rt);
-        else if (MO.isJTI())
-          emitJumpTableAddress(MO.getIndex(), rt);
-      }
+    if (CurOp == NumOps)
+      break;
+    
+    const MachineOperand &MO = MI.getOperand(CurOp++);
+    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    if (MO.isImm()) {
+      emitConstant(MO.getImm(), Size);
+      break;
     }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64mi32)
+      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
+    if (MO.isGlobal()) {
+      bool NeedStub = isa<Function>(MO.getGlobal());
+      bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+      emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+                        NeedStub, Indirect);
+    } else if (MO.isSymbol())
+      emitExternalSymbolAddress(MO.getSymbolName(), rt);
+    else if (MO.isCPI())
+      emitConstPoolAddress(MO.getIndex(), rt);
+    else if (MO.isJTI())
+      emitJumpTableAddress(MO.getIndex(), rt);
     break;
   }
 
@@ -804,10 +854,264 @@ void Emitter<CodeEmitter>::emitInstruction(
   }
 
   if (!Desc->isVariadic() && CurOp != NumOps) {
-    cerr << "Cannot encode: ";
-    MI.dump();
-    cerr << '\n';
-    abort();
+#ifndef NDEBUG
+    errs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+
+  MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter.
+//
+// FIXME: This is a total hack designed to allow work on llvm-mc to proceed
+// without being blocked on various cleanups needed to support a clean interface
+// to instruction encoding.
+//
+// Look away!
+
+#include "llvm/DerivedTypes.h"
+
+namespace {
+class MCSingleInstructionCodeEmitter : public MachineCodeEmitter {
+  uint8_t Data[256];
+
+public:
+  MCSingleInstructionCodeEmitter() { reset(); }
+
+  void reset() { 
+    BufferBegin = Data;
+    BufferEnd = array_endof(Data);
+    CurBufferPtr = Data;
+  }
+
+  StringRef str() {
+    return StringRef(reinterpret_cast<char*>(BufferBegin),
+                     CurBufferPtr - BufferBegin);
+  }
+
+  virtual void startFunction(MachineFunction &F) {}
+  virtual bool finishFunction(MachineFunction &F) { return false; }
+  virtual void emitLabel(uint64_t LabelID) {}
+  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {}
+  virtual bool earlyResolveAddresses() const { return false; }
+  virtual void addRelocation(const MachineRelocation &MR) { }
+  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
+    return 0;
+  }
+  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
+    return 0;
+  }
+  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+    return 0;
+  }
+  virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
+    return 0;
+  }
+  virtual void setModuleInfo(MachineModuleInfo* Info) {}
+};
+
+class X86MCCodeEmitter : public MCCodeEmitter {
+  X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+
+private:
+  X86TargetMachine &TM;
+  llvm::Function *DummyF;
+  TargetData *DummyTD;
+  mutable llvm::MachineFunction *DummyMF;
+  llvm::MachineBasicBlock *DummyMBB;
+  
+  MCSingleInstructionCodeEmitter *InstrEmitter;
+  Emitter<MachineCodeEmitter> *Emit;
+
+public:
+  X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) {
+    // Verily, thou shouldst avert thine eyes.
+    const llvm::FunctionType *FTy =
+      FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false);
+    DummyF = Function::Create(FTy, GlobalValue::InternalLinkage);
+    DummyTD = new TargetData("");
+    DummyMF = new MachineFunction(DummyF, TM);
+    DummyMBB = DummyMF->CreateMachineBasicBlock();
+
+    InstrEmitter = new MCSingleInstructionCodeEmitter();
+    Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, 
+                                           *TM.getInstrInfo(),
+                                           *DummyTD, false);
+  }
+  ~X86MCCodeEmitter() {
+    delete Emit;
+    delete InstrEmitter;
+    delete DummyMF;
+    delete DummyF;
+  }
+
+  bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr,
+                     unsigned Start) const {
+    if (Start + 1 > MI.getNumOperands())
+      return false;
+
+    const MCOperand &Op = MI.getOperand(Start);
+    if (!Op.isReg()) return false;
+
+    Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false));
+    return true;
+  }
+
+  bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr,
+                     unsigned Start) const {
+    if (Start + 1 > MI.getNumOperands())
+      return false;
+
+    const MCOperand &Op = MI.getOperand(Start);
+    if (Op.isImm()) {
+      Instr->addOperand(MachineOperand::CreateImm(Op.getImm()));
+      return true;
+    }
+    if (!Op.isExpr())
+      return false;
+
+    const MCExpr *Expr = Op.getExpr();
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+      Instr->addOperand(MachineOperand::CreateImm(CE->getValue()));
+      return true;
+    }
+
+    // FIXME: Relocation / fixup.
+    Instr->addOperand(MachineOperand::CreateImm(0));
+    return true;
+  }
+
+  bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr,
+                     unsigned Start) const {
+    return (AddRegToInstr(MI, Instr, Start + 0) &&
+            AddImmToInstr(MI, Instr, Start + 1) &&
+            AddRegToInstr(MI, Instr, Start + 2) &&
+            AddImmToInstr(MI, Instr, Start + 3));
+  }
+
+  bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr,
+                     unsigned Start) const {
+    return (AddRegToInstr(MI, Instr, Start + 0) &&
+            AddImmToInstr(MI, Instr, Start + 1) &&
+            AddRegToInstr(MI, Instr, Start + 2) &&
+            AddImmToInstr(MI, Instr, Start + 3) &&
+            AddRegToInstr(MI, Instr, Start + 4));
+  }
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const {
+    // Don't look yet!
+
+    // Convert the MCInst to a MachineInstr so we can (ab)use the regular
+    // emitter.
+    const X86InstrInfo &II = *TM.getInstrInfo();
+    const TargetInstrDesc &Desc = II.get(MI.getOpcode());    
+    MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc());
+    DummyMBB->push_back(Instr);
+
+    unsigned Opcode = MI.getOpcode();
+    unsigned NumOps = MI.getNumOperands();
+    unsigned CurOp = 0;
+    if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) {
+      Instr->addOperand(MachineOperand::CreateReg(0, false));
+      ++CurOp;
+    } else if (NumOps > 2 && 
+             Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+      // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+      --NumOps;
+
+    bool OK = true;
+    switch (Desc.TSFlags & X86II::FormMask) {
+    case X86II::MRMDestReg:
+    case X86II::MRMSrcReg:
+      // Matching doesn't fill this in completely, we have to choose operand 0
+      // for a tied register.
+      OK &= AddRegToInstr(MI, Instr, 0); CurOp++;
+      OK &= AddRegToInstr(MI, Instr, CurOp++);
+      if (CurOp < NumOps)
+        OK &= AddImmToInstr(MI, Instr, CurOp);
+      break;
+
+    case X86II::RawFrm:
+      if (CurOp < NumOps) {
+        // Hack to make branches work.
+        if (!(Desc.TSFlags & X86II::ImmMask) &&
+            MI.getOperand(0).isExpr() &&
+            isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr()))
+          Instr->addOperand(MachineOperand::CreateMBB(DummyMBB));
+        else
+          OK &= AddImmToInstr(MI, Instr, CurOp);
+      }
+      break;
+
+    case X86II::AddRegFrm:
+      OK &= AddRegToInstr(MI, Instr, CurOp++);
+      if (CurOp < NumOps)
+        OK &= AddImmToInstr(MI, Instr, CurOp);
+      break;
+
+    case X86II::MRM0r: case X86II::MRM1r:
+    case X86II::MRM2r: case X86II::MRM3r:
+    case X86II::MRM4r: case X86II::MRM5r:
+    case X86II::MRM6r: case X86II::MRM7r:
+      // Matching doesn't fill this in completely, we have to choose operand 0
+      // for a tied register.
+      OK &= AddRegToInstr(MI, Instr, 0); CurOp++;
+      if (CurOp < NumOps)
+        OK &= AddImmToInstr(MI, Instr, CurOp);
+      break;
+      
+    case X86II::MRM0m: case X86II::MRM1m:
+    case X86II::MRM2m: case X86II::MRM3m:
+    case X86II::MRM4m: case X86II::MRM5m:
+    case X86II::MRM6m: case X86II::MRM7m:
+      OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
+      if (CurOp < NumOps)
+        OK &= AddImmToInstr(MI, Instr, CurOp);
+      break;
+
+    case X86II::MRMSrcMem:
+      OK &= AddRegToInstr(MI, Instr, CurOp++);
+      if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+          Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+        OK &= AddLMemToInstr(MI, Instr, CurOp);
+      else
+        OK &= AddMemToInstr(MI, Instr, CurOp);
+      break;
+
+    case X86II::MRMDestMem:
+      OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
+      OK &= AddRegToInstr(MI, Instr, CurOp);
+      break;
+
+    default:
+    case X86II::MRMInitReg:
+    case X86II::Pseudo:
+      OK = false;
+      break;
+    }
+
+    if (!OK) {
+      errs() << "couldn't convert inst '";
+      MI.dump();
+      errs() << "' to machine instr:\n";
+      Instr->dump();
+    }
+
+    InstrEmitter->reset();
+    if (OK)
+      Emit->emitInstruction(*Instr, &Desc);
+    OS << InstrEmitter->str();
+
+    Instr->eraseFromParent();
   }
+};
 }
 
+// Ok, now you can look.
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &,
+                                            TargetMachine &TM) {
+  return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM));
+}
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
index 8002f98765f0..f321778db24b 100644
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -17,10 +17,11 @@ extrn X86CompilationCallback2: PROC
 X86CompilationCallback proc
     push    rbp
 
-    ; Save RSP
+    ; Save RSP.
     mov     rbp, rsp
 
     ; Save all int arg registers
+    ; WARNING: We cannot use register spill area - we're generating stubs by hands!
     push    rcx
     push    rdx
     push    r8
@@ -29,27 +30,27 @@ X86CompilationCallback proc
     ; Align stack on 16-byte boundary.
     and     rsp, -16
 
-    ; Save all XMM arg registers
-    sub     rsp, 64
-    movaps  [rsp],     xmm0
-    movaps  [rsp+16],  xmm1
-    movaps  [rsp+32],  xmm2
-    movaps  [rsp+48],  xmm3
+    ; Save all XMM arg registers. Also allocate reg spill area.
+    sub     rsp, 96
+    movaps  [rsp   +32],  xmm0
+    movaps  [rsp+16+32],  xmm1
+    movaps  [rsp+32+32],  xmm2
+    movaps  [rsp+48+32],  xmm3
 
     ; JIT callee
 
-    ; Pass prev frame and return address
+    ; Pass prev frame and return address.
     mov     rcx, rbp
     mov     rdx, qword ptr [rbp+8]
     call    X86CompilationCallback2
 
-    ; Restore all XMM arg registers
-    movaps  xmm3, [rsp+48]
-    movaps  xmm2, [rsp+32]
-    movaps  xmm1, [rsp+16]
-    movaps  xmm0, [rsp]
+    ; Restore all XMM arg registers.
+    movaps  xmm3, [rsp+48+32]
+    movaps  xmm2, [rsp+32+32]
+    movaps  xmm1, [rsp+16+32]
+    movaps  xmm0, [rsp   +32]
 
-    ; Restore RSP
+    ; Restore RSP.
     mov     rsp, rbp
 
     ; Restore all int arg registers
@@ -59,7 +60,7 @@ X86CompilationCallback proc
     pop     rdx
     pop     rcx
 
-    ; Restore RBP
+    ; Restore RBP.
     pop     rbp
     ret
 X86CompilationCallback endp
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 912ab0e886f4..1597d2b31d22 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,6 +14,7 @@
 #include "X86ELFWriterInfo.h"
 #include "X86Relocations.h"
 #include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -38,11 +39,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
       return R_X86_64_PC32;
     case X86::reloc_absolute_word:
       return R_X86_64_32;
+    case X86::reloc_absolute_word_sext:
+      return R_X86_64_32S;
     case X86::reloc_absolute_dword:
       return R_X86_64_64;
     case X86::reloc_picrel_word:
     default:
-      assert(0 && "unknown relocation type");
+      llvm_unreachable("unknown x86_64 machine relocation type");
     }
   } else {
     switch(MachineRelTy) {
@@ -50,23 +53,101 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
       return R_386_PC32;
     case X86::reloc_absolute_word:
       return R_386_32;
+    case X86::reloc_absolute_word_sext:
     case X86::reloc_absolute_dword:
     case X86::reloc_picrel_word:
     default:
-      assert(0 && "unknown relocation type");
+      llvm_unreachable("unknown x86 machine relocation type");
     }
   }
   return 0;
 }
 
-long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const {
+long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
   if (is64Bit) {
     switch(RelTy) {
-    case R_X86_64_PC32: return -4;
-      break;
+    case R_X86_64_PC32: return Modifier - 4;
+    case R_X86_64_32:
+    case R_X86_64_32S:
+    case R_X86_64_64:
+      return Modifier;
     default:
-      assert(0 && "unknown x86 relocation type");
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+      case R_386_PC32: return Modifier - 4;
+      case R_386_32: return Modifier;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  if (is64Bit) {
+    switch(RelTy) {
+      case R_X86_64_PC32:
+      case R_X86_64_32:
+      case R_X86_64_32S:
+        return 32;
+      case R_X86_64_64:
+        return 64;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+      case R_386_PC32:
+      case R_386_32:
+        return 32;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
     }
   }
   return 0;
 }
+
+bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  if (is64Bit) {
+    switch(RelTy) {
+      case R_X86_64_PC32:
+        return true;
+      case R_X86_64_32:
+      case R_X86_64_32S:
+      case R_X86_64_64:
+        return false;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+      case R_386_PC32:
+        return true;
+      case R_386_32:
+        return false;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return is64Bit ?
+    X86::reloc_absolute_dword : X86::reloc_absolute_word;
+}
+
+long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                             unsigned RelOffset,
+                                             unsigned RelTy) const {
+
+  if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32)
+    return SymOffset - (RelOffset + 4);
+  else
+    assert("computeRelocation unknown for this relocation type");
+
+  return 0;
+}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 2ba1a0bd70a2..342e6e627d26 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -49,9 +49,26 @@ namespace llvm {
     /// ELF relocation entry.
     virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
 
-    /// getAddendForRelTy - Gets the addend value for an ELF relocation entry
-    /// based on the target relocation type
-    virtual long int getAddendForRelTy(unsigned RelTy) const;
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
   };
 
 } // end llvm namespace
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b336d780c505..3401df0c9092 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
@@ -78,19 +79,20 @@ public:
 #include "X86GenFastISel.inc"
 
 private:
-  bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT);
+  bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
   
-  bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR);
+  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
 
-  bool X86FastEmitStore(MVT VT, Value *Val,
+  bool X86FastEmitStore(EVT VT, Value *Val,
                         const X86AddressMode &AM);
-  bool X86FastEmitStore(MVT VT, unsigned Val,
+  bool X86FastEmitStore(EVT VT, unsigned Val,
                         const X86AddressMode &AM);
 
-  bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT,
+  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                          unsigned &ResultReg);
   
-  bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall);
+  bool X86SelectAddress(Value *V, X86AddressMode &AM);
+  bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
 
   bool X86SelectLoad(Instruction *I);
   
@@ -116,7 +118,7 @@ private:
   bool X86VisitIntrinsicCall(IntrinsicInst &I);
   bool X86SelectCall(Instruction *I);
 
-  CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
+  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
 
   const X86InstrInfo *getInstrInfo() const {
     return getTargetMachine()->getInstrInfo();
@@ -131,17 +133,17 @@ private:
 
   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
   /// computed in an SSE register, not on the X87 floating point stack.
-  bool isScalarFPTypeInSSEReg(MVT VT) const {
+  bool isScalarFPTypeInSSEReg(EVT VT) const {
     return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
   }
 
-  bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+  bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
 };
   
 } // end anonymous namespace.
 
-bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
   VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
@@ -167,7 +169,8 @@ bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
 
 /// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
 /// convention.
-CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
+CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
+                                           bool isTaillCall) {
   if (Subtarget->is64Bit()) {
     if (Subtarget->isTargetWin64())
       return CC_X86_Win64_C;
@@ -186,13 +189,14 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
 /// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
                                   unsigned &ResultReg) {
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default: return false;
+  case MVT::i1:
   case MVT::i8:
     Opc = X86::MOV8rm;
     RC  = X86::GR8RegisterClass;
@@ -243,13 +247,21 @@ bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
 /// and a displacement offset, or a GlobalAddress,
 /// i.e. V. Return true if it is possible.
 bool
-X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
                               const X86AddressMode &AM) {
   // Get opcode and regclass of the output for the given store instruction.
   unsigned Opc = 0;
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
   case MVT::f80: // No f80 support yet.
   default: return false;
+  case MVT::i1: {
+    // Mask out all but lowest bit.
+    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+    BuildMI(MBB, DL,
+            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+    Val = AndResult;
+  }
+  // FALLTHROUGH, handling i1 as i8.
   case MVT::i8:  Opc = X86::MOV8mr;  break;
   case MVT::i16: Opc = X86::MOV16mr; break;
   case MVT::i32: Opc = X86::MOV32mr; break;
@@ -266,17 +278,19 @@ X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
   return true;
 }
 
-bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
+bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
                                    const X86AddressMode &AM) {
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Val))
-    Val = Constant::getNullValue(TD.getIntPtrType());
+    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
   
   // If this is a store of a simple constant, fold the constant into the store.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     unsigned Opc = 0;
-    switch (VT.getSimpleVT()) {
+    bool Signed = true;
+    switch (VT.getSimpleVT().SimpleTy) {
     default: break;
+    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
     case MVT::i8:  Opc = X86::MOV8mi;  break;
     case MVT::i16: Opc = X86::MOV16mi; break;
     case MVT::i32: Opc = X86::MOV32mi; break;
@@ -289,7 +303,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
     
     if (Opc) {
       addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
-                             .addImm(CI->getSExtValue());
+                             .addImm(Signed ? CI->getSExtValue() :
+                                              CI->getZExtValue());
       return true;
     }
   }
@@ -304,8 +319,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
 /// ISD::SIGN_EXTEND).
-bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
-                                    unsigned Src, MVT SrcVT,
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+                                    unsigned Src, EVT SrcVT,
                                     unsigned &ResultReg) {
   unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
   
@@ -318,7 +333,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
 
 /// X86SelectAddress - Attempt to fill in an address from the given value.
 ///
-bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
+bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
   User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
   if (Instruction *I = dyn_cast<Instruction>(V)) {
@@ -333,22 +348,21 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
   default: break;
   case Instruction::BitCast:
     // Look past bitcasts.
-    return X86SelectAddress(U->getOperand(0), AM, isCall);
+    return X86SelectAddress(U->getOperand(0), AM);
 
   case Instruction::IntToPtr:
     // Look past no-op inttoptrs.
     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
-      return X86SelectAddress(U->getOperand(0), AM, isCall);
+      return X86SelectAddress(U->getOperand(0), AM);
     break;
 
   case Instruction::PtrToInt:
     // Look past no-op ptrtoints.
     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
-      return X86SelectAddress(U->getOperand(0), AM, isCall);
+      return X86SelectAddress(U->getOperand(0), AM);
     break;
 
   case Instruction::Alloca: {
-    if (isCall) break;
     // Do static allocas.
     const AllocaInst *A = cast<AllocaInst>(V);
     DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
@@ -361,21 +375,19 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
   }
 
   case Instruction::Add: {
-    if (isCall) break;
     // Adds of constants are common and easy enough.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
       // They have to fit in the 32-bit signed displacement field though.
       if (isInt32(Disp)) {
         AM.Disp = (uint32_t)Disp;
-        return X86SelectAddress(U->getOperand(0), AM, isCall);
+        return X86SelectAddress(U->getOperand(0), AM);
       }
     }
     break;
   }
 
   case Instruction::GetElementPtr: {
-    if (isCall) break;
     // Pattern-match simple GEPs.
     uint64_t Disp = (int32_t)AM.Disp;
     unsigned IndexReg = AM.IndexReg;
@@ -416,7 +428,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
     AM.IndexReg = IndexReg;
     AM.Scale = Scale;
     AM.Disp = (uint32_t)Disp;
-    return X86SelectAddress(U->getOperand(0), AM, isCall);
+    return X86SelectAddress(U->getOperand(0), AM);
   unsupported_gep:
     // Ok, the GEP indices weren't all covered.
     break;
@@ -426,8 +438,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
   // Handle constant address.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // Can't handle alternate code models yet.
-    if (TM.getCodeModel() != CodeModel::Default &&
-        TM.getCodeModel() != CodeModel::Small)
+    if (TM.getCodeModel() != CodeModel::Small)
       return false;
 
     // RIP-relative addresses can't have additional register operands.
@@ -440,63 +451,149 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
       if (GVar->isThreadLocal())
         return false;
 
-    // Set up the basic address.
+    // Okay, we've committed to selecting this global. Set up the basic address.
     AM.GV = GV;
     
-    if (!isCall &&
-        TM.getRelocationModel() == Reloc::PIC_ &&
-        !Subtarget->is64Bit())
-      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+    // Allow the subtarget to classify the global.
+    unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 
-    // Emit an extra load if the ABI requires it.
-    if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
-      // Check to see if we've already materialized this
-      // value in a register in this block.
-      DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
-      if (I != LocalValueMap.end() && I->second != 0) {
-        AM.Base.Reg = I->second;
-        AM.GV = 0;
-        return true;
+    // If this reference is relative to the pic base, set it now.
+    if (isGlobalRelativeToPICBase(GVFlags)) {
+      // FIXME: How do we know Base.Reg is free??
+      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+    }
+    
+    // Unless the ABI requires an extra load, return a direct reference to
+    // the global.
+    if (!isGlobalStubReference(GVFlags)) {
+      if (Subtarget->isPICStyleRIPRel()) {
+        // Use rip-relative addressing if we can.  Above we verified that the
+        // base and index registers are unused.
+        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+        AM.Base.Reg = X86::RIP;
       }
-      
+      AM.GVOpFlags = GVFlags;
+      return true;
+    }
+    
+    // Ok, we need to do a load from a stub.  If we've already loaded from this
+    // stub, reuse the loaded pointer, otherwise emit the load now.
+    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+    unsigned LoadReg;
+    if (I != LocalValueMap.end() && I->second != 0) {
+      LoadReg = I->second;
+    } else {
       // Issue load from stub.
       unsigned Opc = 0;
       const TargetRegisterClass *RC = NULL;
       X86AddressMode StubAM;
       StubAM.Base.Reg = AM.Base.Reg;
-      StubAM.GV = AM.GV;
-      
-      if (TLI.getPointerTy() == MVT::i32) {
-        Opc = X86::MOV32rm;
-        RC  = X86::GR32RegisterClass;
-        
-        if (Subtarget->isPICStyleGOT() &&
-            TM.getRelocationModel() == Reloc::PIC_)
-          StubAM.GVOpFlags = X86II::MO_GOT;
-        
-      } else {
+      StubAM.GV = GV;
+      StubAM.GVOpFlags = GVFlags;
+
+      if (TLI.getPointerTy() == MVT::i64) {
         Opc = X86::MOV64rm;
         RC  = X86::GR64RegisterClass;
         
-        if (TM.getRelocationModel() != Reloc::Static) {
-          StubAM.GVOpFlags = X86II::MO_GOTPCREL;
+        if (Subtarget->isPICStyleRIPRel())
           StubAM.Base.Reg = X86::RIP;
-        }
+      } else {
+        Opc = X86::MOV32rm;
+        RC  = X86::GR32RegisterClass;
       }
+      
+      LoadReg = createResultReg(RC);
+      addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
+      
+      // Prevent loading GV stub multiple times in same MBB.
+      LocalValueMap[V] = LoadReg;
+    }
+    
+    // Now construct the final address. Note that the Disp, Scale,
+    // and Index values may already be set here.
+    AM.Base.Reg = LoadReg;
+    AM.GV = 0;
+    return true;
+  }
 
-      unsigned ResultReg = createResultReg(RC);
-      addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
+  // If all else fails, try to materialize the value in a register.
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+    if (AM.Base.Reg == 0) {
+      AM.Base.Reg = getRegForValue(V);
+      return AM.Base.Reg != 0;
+    }
+    if (AM.IndexReg == 0) {
+      assert(AM.Scale == 1 && "Scale with no index!");
+      AM.IndexReg = getRegForValue(V);
+      return AM.IndexReg != 0;
+    }
+  }
 
-      // Now construct the final address. Note that the Disp, Scale,
-      // and Index values may already be set here.
-      AM.Base.Reg = ResultReg;
-      AM.GV = 0;
+  return false;
+}
 
-      // Prevent loading GV stub multiple times in same MBB.
-      LocalValueMap[V] = AM.Base.Reg;
-    } else if (Subtarget->isPICStyleRIPRel()) {
-      // Use rip-relative addressing if we can.
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
+  User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    Opcode = I->getOpcode();
+    U = I;
+  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  switch (Opcode) {
+  default: break;
+  case Instruction::BitCast:
+    // Look past bitcasts.
+    return X86SelectCallAddress(U->getOperand(0), AM);
+
+  case Instruction::IntToPtr:
+    // Look past no-op inttoptrs.
+    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      return X86SelectCallAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::PtrToInt:
+    // Look past no-op ptrtoints.
+    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      return X86SelectCallAddress(U->getOperand(0), AM);
+    break;
+  }
+
+  // Handle constant address.
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // Can't handle alternate code models yet.
+    if (TM.getCodeModel() != CodeModel::Small)
+      return false;
+
+    // RIP-relative addresses can't have additional register operands.
+    if (Subtarget->isPICStyleRIPRel() &&
+        (AM.Base.Reg != 0 || AM.IndexReg != 0))
+      return false;
+
+    // Can't handle TLS or DLLImport.
+    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
+        return false;
+
+    // Okay, we've committed to selecting this global. Set up the basic address.
+    AM.GV = GV;
+    
+    // No ABI requires an extra load for anything other than DLLImport, which
+    // we rejected above. Return a direct reference to the global.
+    if (Subtarget->isPICStyleRIPRel()) {
+      // Use rip-relative addressing if we can.  Above we verified that the
+      // base and index registers are unused.
+      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
       AM.Base.Reg = X86::RIP;
+    } else if (Subtarget->isPICStyleStubPIC()) {
+      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+    } else if (Subtarget->isPICStyleGOT()) {
+      AM.GVOpFlags = X86II::MO_GOTOFF;
     }
     
     return true;
@@ -518,14 +615,15 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
   return false;
 }
 
+
 /// X86SelectStore - Select and emit code to implement store instructions.
 bool X86FastISel::X86SelectStore(Instruction* I) {
-  MVT VT;
-  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+  EVT VT;
+  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
 
   X86AddressMode AM;
-  if (!X86SelectAddress(I->getOperand(1), AM, false))
+  if (!X86SelectAddress(I->getOperand(1), AM))
     return false;
 
   return X86FastEmitStore(VT, I->getOperand(0), AM);
@@ -534,12 +632,12 @@ bool X86FastISel::X86SelectStore(Instruction* I) {
 /// X86SelectLoad - Select and emit code to implement load instructions.
 ///
 bool X86FastISel::X86SelectLoad(Instruction *I)  {
-  MVT VT;
-  if (!isTypeLegal(I->getType(), VT))
+  EVT VT;
+  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
     return false;
 
   X86AddressMode AM;
-  if (!X86SelectAddress(I->getOperand(0), AM, false))
+  if (!X86SelectAddress(I->getOperand(0), AM))
     return false;
 
   unsigned ResultReg = 0;
@@ -550,8 +648,8 @@ bool X86FastISel::X86SelectLoad(Instruction *I)  {
   return false;
 }
 
-static unsigned X86ChooseCmpOpcode(MVT VT) {
-  switch (VT.getSimpleVT()) {
+static unsigned X86ChooseCmpOpcode(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default:       return 0;
   case MVT::i8:  return X86::CMP8rr;
   case MVT::i16: return X86::CMP16rr;
@@ -565,8 +663,8 @@ static unsigned X86ChooseCmpOpcode(MVT VT) {
 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
 /// of the comparison, return an opcode that works for the compare (e.g.
 /// CMP32ri) otherwise return 0.
-static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
-  switch (VT.getSimpleVT()) {
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
+  switch (VT.getSimpleVT().SimpleTy) {
   // Otherwise, we can't fold the immediate into this comparison.
   default: return 0;
   case MVT::i8: return X86::CMP8ri;
@@ -581,13 +679,13 @@ static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
   }
 }
 
-bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
+bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
   unsigned Op0Reg = getRegForValue(Op0);
   if (Op0Reg == 0) return false;
   
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Op1))
-    Op1 = Constant::getNullValue(TD.getIntPtrType());
+    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
   
   // We have two options: compare with register or immediate.  If the RHS of
   // the compare is an immediate that we can fold into this compare, use
@@ -613,7 +711,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
 bool X86FastISel::X86SelectCmp(Instruction *I) {
   CmpInst *CI = cast<CmpInst>(I);
 
-  MVT VT;
+  EVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
     return false;
 
@@ -688,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
 
 bool X86FastISel::X86SelectZExt(Instruction *I) {
   // Handle zero-extension from i1 to i8, which is common.
-  if (I->getType() == Type::Int8Ty &&
-      I->getOperand(0)->getType() == Type::Int1Ty) {
+  if (I->getType() == Type::getInt8Ty(I->getContext()) &&
+      I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) {
     unsigned ResultReg = getRegForValue(I->getOperand(0));
     if (ResultReg == 0) return false;
     // Set the high bits to zero.
@@ -713,7 +811,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
   // Fold the common case of a conditional branch with a comparison.
   if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
     if (CI->hasOneUse()) {
-      MVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
 
       // Try to take advantage of fallthrough opportunities.
       CmpInst::Predicate Predicate = CI->getPredicate();
@@ -850,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
 bool X86FastISel::X86SelectShift(Instruction *I) {
   unsigned CReg = 0, OpReg = 0, OpImm = 0;
   const TargetRegisterClass *RC = NULL;
-  if (I->getType() == Type::Int8Ty) {
+  if (I->getType() == Type::getInt8Ty(I->getContext())) {
     CReg = X86::CL;
     RC = &X86::GR8RegClass;
     switch (I->getOpcode()) {
@@ -859,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
     default: return false;
     }
-  } else if (I->getType() == Type::Int16Ty) {
+  } else if (I->getType() == Type::getInt16Ty(I->getContext())) {
     CReg = X86::CX;
     RC = &X86::GR16RegClass;
     switch (I->getOpcode()) {
@@ -868,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
     default: return false;
     }
-  } else if (I->getType() == Type::Int32Ty) {
+  } else if (I->getType() == Type::getInt32Ty(I->getContext())) {
     CReg = X86::ECX;
     RC = &X86::GR32RegClass;
     switch (I->getOpcode()) {
@@ -877,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
     default: return false;
     }
-  } else if (I->getType() == Type::Int64Ty) {
+  } else if (I->getType() == Type::getInt64Ty(I->getContext())) {
     CReg = X86::RCX;
     RC = &X86::GR64RegClass;
     switch (I->getOpcode()) {
@@ -890,7 +988,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     return false;
   }
 
-  MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
     return false;
 
@@ -924,7 +1022,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
 }
 
 bool X86FastISel::X86SelectSelect(Instruction *I) {
-  MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
     return false;
   
@@ -959,9 +1057,10 @@ bool X86FastISel::X86SelectSelect(Instruction *I) {
 
 bool X86FastISel::X86SelectFPExt(Instruction *I) {
   // fpext from float to double.
-  if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) {
+  if (Subtarget->hasSSE2() &&
+      I->getType()->isDoubleTy()) {
     Value *V = I->getOperand(0);
-    if (V->getType() == Type::FloatTy) {
+    if (V->getType()->isFloatTy()) {
       unsigned OpReg = getRegForValue(V);
       if (OpReg == 0) return false;
       unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
@@ -976,9 +1075,9 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) {
 
 bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
   if (Subtarget->hasSSE2()) {
-    if (I->getType() == Type::FloatTy) {
+    if (I->getType()->isFloatTy()) {
       Value *V = I->getOperand(0);
-      if (V->getType() == Type::DoubleTy) {
+      if (V->getType()->isDoubleTy()) {
         unsigned OpReg = getRegForValue(V);
         if (OpReg == 0) return false;
         unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
@@ -996,8 +1095,8 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) {
   if (Subtarget->is64Bit())
     // All other cases should be handled by the tblgen generated code.
     return false;
-  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
   
   // This code only handles truncation to byte right now.
   if (DstVT != MVT::i8 && DstVT != MVT::i1)
@@ -1065,7 +1164,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     const Type *RetTy =
       cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
 
-    MVT VT;
+    EVT VT;
     if (!isTypeLegal(RetTy, VT))
       return false;
 
@@ -1125,7 +1224,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
   // Handle only C and fastcc calling conventions for now.
   CallSite CS(CI);
-  unsigned CC = CS.getCallingConv();
+  CallingConv::ID CC = CS.getCallingConv();
   if (CC != CallingConv::C &&
       CC != CallingConv::Fast &&
       CC != CallingConv::X86_FastCall)
@@ -1144,8 +1243,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
   // Handle *simple* calls for now.
   const Type *RetTy = CS.getType();
-  MVT RetVT;
-  if (RetTy == Type::VoidTy)
+  EVT RetVT;
+  if (RetTy->isVoidTy())
     RetVT = MVT::isVoid;
   else if (!isTypeLegal(RetTy, RetVT, true))
     return false;
@@ -1153,7 +1252,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   // Materialize callee address in a register. FIXME: GV address can be
   // handled with a CALLpcrel32 instead.
   X86AddressMode CalleeAM;
-  if (!X86SelectAddress(Callee, CalleeAM, true))
+  if (!X86SelectCallAddress(Callee, CalleeAM))
     return false;
   unsigned CalleeOp = 0;
   GlobalValue *GV = 0;
@@ -1174,7 +1273,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   // Deal with call operands first.
   SmallVector<Value*, 8> ArgVals;
   SmallVector<unsigned, 8> Args;
-  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<EVT, 8> ArgVTs;
   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   Args.reserve(CS.arg_size());
   ArgVals.reserve(CS.arg_size());
@@ -1200,7 +1299,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       return false;
 
     const Type *ArgTy = (*i)->getType();
-    MVT ArgVT;
+    EVT ArgVT;
     if (!isTypeLegal(ArgTy, ArgVT))
       return false;
     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
@@ -1214,7 +1313,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, false, TM, ArgLocs);
+  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
 
   // Get a count of how many bytes are to be pushed on the stack.
@@ -1230,11 +1329,11 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     unsigned Arg = Args[VA.getValNo()];
-    MVT ArgVT = ArgVTs[VA.getValNo()];
+    EVT ArgVT = ArgVTs[VA.getValNo()];
   
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::SExt: {
       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
@@ -1266,6 +1365,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       ArgVT = VA.getLocVT();
       break;
     }
+    case CCValAssign::BCvt: {
+      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
+                               ISD::BIT_CONVERT, Arg);
+      assert(BC != 0 && "Failed to emit a bitcast!");
+      Arg = BC;
+      ArgVT = VA.getLocVT();
+      break;
+    }
     }
     
     if (VA.isRegLoc()) {
@@ -1294,28 +1401,53 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
   // ELF / PIC requires GOT in the EBX register before function calls via PLT
   // GOT pointer.  
-  if (!Subtarget->is64Bit() &&
-      TM.getRelocationModel() == Reloc::PIC_ &&
-      Subtarget->isPICStyleGOT()) {
+  if (Subtarget->isPICStyleGOT()) {
     TargetRegisterClass *RC = X86::GR32RegisterClass;
     unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
     bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
     assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
     Emitted = true;
   }
-
+  
   // Issue the call.
-  unsigned CallOpc = CalleeOp
-    ? (Subtarget->is64Bit() ? X86::CALL64r       : X86::CALL32r)
-    : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32);
-  MachineInstrBuilder MIB = CalleeOp
-    ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp)
-    : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV);
+  MachineInstrBuilder MIB;
+  if (CalleeOp) {
+    // Register-indirect call.
+    unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
+    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
+    
+  } else {
+    // Direct call.
+    assert(GV && "Not a direct call");
+    unsigned CallOpc =
+      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+    
+    // See if we need any target-specific flags on the GV operand.
+    unsigned char OpFlags = 0;
+    
+    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+    // external symbols most go through the PLT in PIC mode.  If the symbol
+    // has hidden or protected visibility, or if it is static or local, then
+    // we don't need to use the PLT - we can directly call it.
+    if (Subtarget->isTargetELF() &&
+        TM.getRelocationModel() == Reloc::PIC_ &&
+        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+               (GV->isDeclaration() || GV->isWeakForLinker()) &&
+               Subtarget->getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+    
+    
+    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
+  }
 
   // Add an implicit use GOT pointer in EBX.
-  if (!Subtarget->is64Bit() &&
-      TM.getRelocationModel() == Reloc::PIC_ &&
-      Subtarget->isPICStyleGOT())
+  if (Subtarget->isPICStyleGOT())
     MIB.addReg(X86::EBX);
 
   // Add implicit physical register uses to the call.
@@ -1327,14 +1459,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
 
   // Now handle call return value (if any).
-  if (RetVT.getSimpleVT() != MVT::isVoid) {
+  if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
     SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, false, TM, RVLocs);
+    CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
     CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
 
     // Copy all of the result registers out of their specified physreg.
     assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
-    MVT CopyVT = RVLocs[0].getValVT();
+    EVT CopyVT = RVLocs[0].getValVT();
     TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
     TargetRegisterClass *SrcRC = DstRC;
     
@@ -1358,7 +1490,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       // Round the F80 the right size, which also moves to the appropriate xmm
       // register. This is accomplished by storing the F80 value in memory and
       // then loading it back. Ewww...
-      MVT ResVT = RVLocs[0].getValVT();
+      EVT ResVT = RVLocs[0].getValVT();
       unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
       unsigned MemSize = ResVT.getSizeInBits()/8;
       int FI = MFI.CreateStackObject(MemSize, MemSize);
@@ -1418,8 +1550,8 @@ X86FastISel::TargetSelectInstruction(Instruction *I)  {
     return X86SelectExtractValue(I);
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
-    MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-    MVT DstVT = TLI.getValueType(I->getType());
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
     if (DstVT.bitsGT(SrcVT))
       return X86SelectZExt(I);
     if (DstVT.bitsLT(SrcVT))
@@ -1435,14 +1567,14 @@ X86FastISel::TargetSelectInstruction(Instruction *I)  {
 }
 
 unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
-  MVT VT;
+  EVT VT;
   if (!isTypeLegal(C->getType(), VT))
     return false;
   
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default: return false;
   case MVT::i8:
     Opc = X86::MOV8rm;
@@ -1487,7 +1619,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
   // Materialize addresses with LEA instructions.
   if (isa<GlobalValue>(C)) {
     X86AddressMode AM;
-    if (X86SelectAddress(C, AM, false)) {
+    if (X86SelectAddress(C, AM)) {
       if (TLI.getPointerTy() == MVT::i32)
         Opc = X86::LEA32r;
       else
@@ -1509,16 +1641,15 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
   // x86-32 PIC requires a PIC base register for constant pools.
   unsigned PICBase = 0;
   unsigned char OpFlag = 0;
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub()) {
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-      PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
-    } else if (Subtarget->isPICStyleGOT()) {
-      OpFlag = X86II::MO_GOTOFF;
-      PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
-    } else if (Subtarget->isPICStyleRIPRel() &&
-               TM.getCodeModel() == CodeModel::Small)
-      PICBase = X86::RIP;
+  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+  } else if (Subtarget->isPICStyleGOT()) {
+    OpFlag = X86II::MO_GOTOFF;
+    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+  } else if (Subtarget->isPICStyleRIPRel() &&
+             TM.getCodeModel() == CodeModel::Small) {
+    PICBase = X86::RIP;
   }
 
   // Create the load from the constant pool.
@@ -1542,7 +1673,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
     return 0;
 
   X86AddressMode AM;
-  if (!X86SelectAddress(C, AM, false))
+  if (!X86SelectAddress(C, AM))
     return 0;
   unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
   TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 37027ee8beba..d9a05a83b9e5 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -31,19 +31,21 @@
 #define DEBUG_TYPE "x86-codegen"
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -56,6 +58,7 @@ namespace {
     FPS() : MachineFunctionPass(&ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -73,12 +76,12 @@ namespace {
     unsigned StackTop;          // The current top of the FP stack.
 
     void dumpStack() const {
-      cerr << "Stack contents:";
+      errs() << "Stack contents:";
       for (unsigned i = 0; i != StackTop; ++i) {
-        cerr << " FP" << Stack[i];
+        errs() << " FP" << Stack[i];
         assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
       }
-      cerr << "\n";
+      errs() << "\n";
     }
   private:
     /// isStackEmpty - Return true if the FP stack is empty.
@@ -210,6 +213,14 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
        I != E; ++I)
     Changed |= processBasicBlock(MF, **I);
 
+  // Process any unreachable blocks in arbitrary order now.
+  if (MF.size() == Processed.size())
+    return Changed;
+
+  for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+    if (Processed.insert(BB))
+      Changed |= processBasicBlock(MF, *BB);
+  
   return Changed;
 }
 
@@ -236,7 +247,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
       PrevMI = prior(I);
 
     ++NumFP;  // Keep track of # of pseudo instrs
-    DOUT << "\nFPInst:\t" << *MI;
+    DEBUG(errs() << "\nFPInst:\t" << *MI);
 
     // Get dead variables list now because the MI pointer may be deleted as part
     // of processing!
@@ -255,7 +266,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     case X86II::CompareFP:  handleCompareFP(I); break;
     case X86II::CondMovFP:  handleCondMovFP(I); break;
     case X86II::SpecialFP:  handleSpecialFP(I); break;
-    default: assert(0 && "Unknown FP Type!");
+    default: llvm_unreachable("Unknown FP Type!");
     }
 
     // Check to see if any of the values defined by this instruction are dead
@@ -263,7 +274,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
       unsigned Reg = DeadRegs[i];
       if (Reg >= X86::FP0 && Reg <= X86::FP6) {
-        DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
+        DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
         freeStackSlotAfter(I, Reg-X86::FP0);
       }
     }
@@ -272,13 +283,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     DEBUG(
       MachineBasicBlock::iterator PrevI(PrevMI);
       if (I == PrevI) {
-        cerr << "Just deleted pseudo instruction\n";
+        errs() << "Just deleted pseudo instruction\n";
       } else {
         MachineBasicBlock::iterator Start = I;
         // Rewind to first instruction newly inserted.
         while (Start != BB.begin() && prior(Start) != PrevI) --Start;
-        cerr << "Inserted instructions:\n\t";
-        Start->print(*cerr.stream(), &MF.getTarget());
+        errs() << "Inserted instructions:\n\t";
+        Start->print(errs(), &MF.getTarget());
         while (++Start != next(I)) {}
       }
       dumpStack();
@@ -945,7 +956,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   MachineInstr *MI = I;
   DebugLoc dl = MI->getDebugLoc();
   switch (MI->getOpcode()) {
-  default: assert(0 && "Unknown SpecialFP instruction!");
+  default: llvm_unreachable("Unknown SpecialFP instruction!");
   case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
   case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
   case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 009846e2e0b5..3e0385c79c19 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -35,6 +35,7 @@ namespace {
     FPRegKiller() : MachineFunctionPass(&ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -117,9 +118,10 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
            !ContainsFPCode && SI != E; ++SI) {
         for (BasicBlock::const_iterator II = SI->begin();
              (PN = dyn_cast<PHINode>(II)); ++II) {
-          if (PN->getType()==Type::X86_FP80Ty ||
+          if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
               (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
-              (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) {
+              (!Subtarget.hasSSE2() &&
+                PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
             ContainsFPCode = true;
             break;
           }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1336177de249..5b678fb602dc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -35,8 +35,9 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -78,7 +79,8 @@ namespace {
 
     X86ISelAddressMode()
       : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
-        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
+        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0),
+        SymbolFlags(X86II::MO_NO_FLAG) {
     }
 
     bool hasSymbolicDisplacement() const {
@@ -105,23 +107,37 @@ namespace {
     }
 
     void dump() {
-      cerr << "X86ISelAddressMode " << this << "\n";
-      cerr << "Base.Reg ";
-              if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 
-              else cerr << "nul";
-      cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
-      cerr << " Scale" << Scale << "\n";
-      cerr << "IndexReg ";
-              if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
-              else cerr << "nul"; 
-      cerr << " Disp " << Disp << "\n";
-      cerr << "GV "; if (GV) GV->dump(); 
-                     else cerr << "nul";
-      cerr << " CP "; if (CP) CP->dump(); 
-                     else cerr << "nul";
-      cerr << "\n";
-      cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
-      cerr  << " JT" << JT << " Align" << Align << "\n";
+      errs() << "X86ISelAddressMode " << this << '\n';
+      errs() << "Base.Reg ";
+      if (Base.Reg.getNode() != 0)
+        Base.Reg.getNode()->dump(); 
+      else
+        errs() << "nul";
+      errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+             << " Scale" << Scale << '\n'
+             << "IndexReg ";
+      if (IndexReg.getNode() != 0)
+        IndexReg.getNode()->dump();
+      else
+        errs() << "nul"; 
+      errs() << " Disp " << Disp << '\n'
+             << "GV ";
+      if (GV)
+        GV->dump();
+      else
+        errs() << "nul";
+      errs() << " CP ";
+      if (CP)
+        CP->dump();
+      else
+        errs() << "nul";
+      errs() << '\n'
+             << "ES ";
+      if (ES)
+        errs() << ES;
+      else
+        errs() << "nul";
+      errs() << " JT" << JT << " Align" << Align << '\n';
     }
   };
 }
@@ -140,10 +156,6 @@ namespace {
     /// make the right decision when generating code for different targets.
     const X86Subtarget *Subtarget;
 
-    /// CurBB - Current BB being isel'd.
-    ///
-    MachineBasicBlock *CurBB;
-
     /// OptForSize - If true, selector should try to optimize for code size
     /// instead of performance.
     bool OptForSize;
@@ -174,12 +186,14 @@ namespace {
   private:
     SDNode *Select(SDValue N);
     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+    SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
 
     bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
     bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
-    bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
-                      unsigned Depth = 0);
+    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                 unsigned Depth);
     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
     bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -342,13 +356,17 @@ static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
       Ops.push_back(Load.getOperand(0));
     else
       Ops.push_back(TF.getOperand(i));
-  CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
-  CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
-  CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
+  SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
+  SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
+                                               Load.getOperand(1),
+                                               Load.getOperand(2));
+  CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
                              Store.getOperand(2), Store.getOperand(3));
 }
 
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
+/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.  The 
+/// chain produced by the load must only be used by the store's chain operand,
+/// otherwise this may produce a cycle in the DAG.
 /// 
 static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
                       SDValue &Load) {
@@ -366,8 +384,9 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
     return false;
 
   if (N.hasOneUse() &&
+      LD->hasNUsesOfValue(1, 1) &&
       N.getOperand(1) == Address &&
-      N.getNode()->isOperandOf(Chain.getNode())) {
+      LD->isOperandOf(Chain.getNode())) {
     Load = N;
     return true;
   }
@@ -431,7 +450,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
   if (Chain.getOperand(0).getNode() == Callee.getNode())
     return true;
   if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
-      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
+      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+      Callee.getValue(1).hasOneUse())
     return true;
   return false;
 }
@@ -583,8 +603,8 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
     
     // If the source and destination are SSE registers, then this is a legal
     // conversion that should not be lowered.
-    MVT SrcVT = N->getOperand(0).getValueType();
-    MVT DstVT = N->getValueType(0);
+    EVT SrcVT = N->getOperand(0).getValueType();
+    EVT DstVT = N->getValueType(0);
     bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
     bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
     if (SrcIsSSE && DstIsSSE)
@@ -602,7 +622,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
     // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
     // FPStack has extload and truncstore.  SSE can fold direct loads into other
     // operations.  Based on this, decide what we want to do.
-    MVT MemVT;
+    EVT MemVT;
     if (N->getOpcode() == ISD::FP_ROUND)
       MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
     else
@@ -635,8 +655,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
 /// when it has created a SelectionDAG for us to codegen.
 void X86DAGToDAGISel::InstructionSelect() {
-  CurBB = BB;  // BB can change as result of isel.
-  const Function *F = CurDAG->getMachineFunction().getFunction();
+  const Function *F = MF->getFunction();
   OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
 
   DEBUG(BB->dump());
@@ -648,12 +667,12 @@ void X86DAGToDAGISel::InstructionSelect() {
 
   // Codegen the basic block.
 #ifndef NDEBUG
-  DOUT << "===== Instruction selection begins:\n";
+  DEBUG(errs() << "===== Instruction selection begins:\n");
   Indent = 0;
 #endif
   SelectRoot(*CurDAG);
 #ifndef NDEBUG
-  DOUT << "===== Instruction selection ends:\n";
+  DEBUG(errs() << "===== Instruction selection ends:\n");
 #endif
 
   CurDAG->RemoveDeadNodes();
@@ -706,7 +725,7 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
 /// into an addressing mode.  These wrap things that will resolve down into a
 /// symbol reference.  If no match is possible, this returns true, otherwise it
-/// returns false.  
+/// returns false.
 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
   // If the addressing mode already has a symbol as the displacement, we can
   // never match another symbol.
@@ -714,28 +733,27 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
     return true;
 
   SDValue N0 = N.getOperand(0);
-  
+  CodeModel::Model M = TM.getCodeModel();
+
   // Handle X86-64 rip-relative addresses.  We check this before checking direct
   // folding because RIP is preferable to non-RIP accesses.
   if (Subtarget->is64Bit() &&
       // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
       // they cannot be folded into immediate fields.
       // FIXME: This can be improved for kernel and other models?
-      TM.getCodeModel() == CodeModel::Small &&
-      
+      (M == CodeModel::Small || M == CodeModel::Kernel) &&
       // Base and index reg must be 0 in order to use %rip as base and lowering
       // must allow RIP.
       !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
-  
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
       int64_t Offset = AM.Disp + G->getOffset();
-      if (!isInt32(Offset)) return true;
+      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
       AM.GV = G->getGlobal();
       AM.Disp = Offset;
       AM.SymbolFlags = G->getTargetFlags();
     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
       int64_t Offset = AM.Disp + CP->getOffset();
-      if (!isInt32(Offset)) return true;
+      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
       AM.CP = CP->getConstVal();
       AM.Align = CP->getAlignment();
       AM.Disp = Offset;
@@ -748,7 +766,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       AM.JT = J->getIndex();
       AM.SymbolFlags = J->getTargetFlags();
     }
-  
+
     if (N.getOpcode() == X86ISD::WrapperRIP)
       AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
     return false;
@@ -758,7 +776,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
   // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
   // mode, this results in a non-RIP-relative computation.
   if (!Subtarget->is64Bit() ||
-      (TM.getCodeModel() == CodeModel::Small &&
+      ((M == CodeModel::Small || M == CodeModel::Kernel) &&
        TM.getRelocationModel() == Reloc::Static)) {
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
       AM.GV = G->getGlobal();
@@ -786,15 +804,49 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 /// MatchAddress - Add the specified node to the specified addressing mode,
 /// returning true if it cannot be done.  This just pattern matches for the
 /// addressing mode.
-bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
-                                   unsigned Depth) {
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+  if (MatchAddressRecursively(N, AM, 0))
+    return true;
+
+  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+  // a smaller encoding and avoids a scaled-index.
+  if (AM.Scale == 2 &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base.Reg.getNode() == 0) {
+    AM.Base.Reg = AM.IndexReg;
+    AM.Scale = 1;
+  }
+
+  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+  // because it has a smaller encoding.
+  // TODO: Which other code models can use this?
+  if (TM.getCodeModel() == CodeModel::Small &&
+      Subtarget->is64Bit() &&
+      AM.Scale == 1 &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base.Reg.getNode() == 0 &&
+      AM.IndexReg.getNode() == 0 &&
+      AM.SymbolFlags == X86II::MO_NO_FLAG &&
+      AM.hasSymbolicDisplacement())
+    AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+  return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                              unsigned Depth) {
   bool is64Bit = Subtarget->is64Bit();
   DebugLoc dl = N.getDebugLoc();
-  DOUT << "MatchAddress: "; DEBUG(AM.dump());
+  DEBUG({
+      errs() << "MatchAddress: ";
+      AM.dump();
+    });
   // Limit recursion.
   if (Depth > 5)
     return MatchAddressBase(N, AM);
-  
+
+  CodeModel::Model M = TM.getCodeModel();
+
   // If this is already a %rip relative address, we can only merge immediates
   // into it.  Instead of handling this in every case, we handle it here.
   // RIP relative addressing: %rip + 32-bit displacement!
@@ -803,10 +855,11 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // displacements.  It isn't very important, but this should be fixed for
     // consistency.
     if (!AM.ES && AM.JT != -1) return true;
-    
+
     if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
       int64_t Val = AM.Disp + Cst->getSExtValue();
-      if (isInt32(Val)) {
+      if (X86::isOffsetSuitableForCodeModel(Val, M,
+                                            AM.hasSymbolicDisplacement())) {
         AM.Disp = Val;
         return false;
       }
@@ -818,7 +871,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
   default: break;
   case ISD::Constant: {
     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    if (!is64Bit || isInt32(AM.Disp + Val)) {
+    if (!is64Bit ||
+        X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
+                                          AM.hasSymbolicDisplacement())) {
       AM.Disp += Val;
       return false;
     }
@@ -857,6 +912,10 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     if (ConstantSDNode
           *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
       unsigned Val = CN->getZExtValue();
+      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+      // that the base operand remains free for further matching. If
+      // the base doesn't end up getting used, a post-processing step
+      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
       if (Val == 1 || Val == 2 || Val == 3) {
         AM.Scale = 1 << Val;
         SDValue ShVal = N.getNode()->getOperand(0);
@@ -870,7 +929,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
           uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
-          if (!is64Bit || isInt32(Disp))
+          if (!is64Bit ||
+              X86::isOffsetSuitableForCodeModel(Disp, M,
+                                                AM.hasSymbolicDisplacement()))
             AM.Disp = Disp;
           else
             AM.IndexReg = ShVal;
@@ -912,7 +973,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
             uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
                                       CN->getZExtValue();
-            if (!is64Bit || isInt32(Disp))
+            if (!is64Bit ||
+                X86::isOffsetSuitableForCodeModel(Disp, M,
+                                                  AM.hasSymbolicDisplacement()))
               AM.Disp = Disp;
             else
               Reg = N.getNode()->getOperand(0);
@@ -936,7 +999,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
 
     // Test if the LHS of the sub can be folded.
     X86ISelAddressMode Backup = AM;
-    if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
+    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
       AM = Backup;
       break;
     }
@@ -998,12 +1061,12 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::ADD: {
     X86ISelAddressMode Backup = AM;
-    if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
-        !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
+    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
+        !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
       return false;
     AM = Backup;
-    if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
-        !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
+    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
+        !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
       return false;
     AM = Backup;
 
@@ -1027,11 +1090,13 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
       X86ISelAddressMode Backup = AM;
       uint64_t Offset = CN->getSExtValue();
       // Start with the LHS as an addr mode.
-      if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
+      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
           // Address could not have picked a GV address for the displacement.
           AM.GV == NULL &&
           // On x86-64, the resultant disp must fit in 32-bits.
-          (!is64Bit || isInt32(AM.Disp + Offset)) &&
+          (!is64Bit ||
+           X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
+                                             AM.hasSymbolicDisplacement())) &&
           // Check to see if the LHS & C is zero.
           CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
         AM.Disp += Offset;
@@ -1219,7 +1284,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
   if (!Done && MatchAddress(N, AM))
     return false;
 
-  MVT VT = N.getValueType();
+  EVT VT = N.getValueType();
   if (AM.BaseType == X86ISelAddressMode::RegBase) {
     if (!AM.Base.Reg.getNode())
       AM.Base.Reg = CurDAG->getRegister(0, VT);
@@ -1292,7 +1357,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
   assert (T == AM.Segment);
   AM.Segment = Copy;
 
-  MVT VT = N.getValueType();
+  EVT VT = N.getValueType();
   unsigned Complexity = 0;
   if (AM.BaseType == X86ISelAddressMode::RegBase)
     if (AM.Base.Reg.getNode())
@@ -1329,12 +1394,13 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
   if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
     Complexity++;
 
-  if (Complexity > 2) {
-    SDValue Segment;
-    getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
-    return true;
-  }
-  return false;
+  // If it isn't worth using an LEA, reject it.
+  if (Complexity <= 2)
+    return false;
+  
+  SDValue Segment;
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
 }
 
 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
@@ -1380,7 +1446,6 @@ bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
 /// initialize the global base register, if necessary.
 ///
 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
-  MachineFunction *MF = CurBB->getParent();
   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
@@ -1400,367 +1465,686 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
   if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return NULL;
-  SDValue LSI = Node->getOperand(4);    // MemOperand
-  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
-  return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
-                               MVT::i32, MVT::i32, MVT::Other, Ops,
-                               array_lengthof(Ops));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+                                           MVT::i32, MVT::i32, MVT::Other, Ops,
+                                           array_lengthof(Ops));
+  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+  return ResNode;
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
+  if (Node->hasAnyUseOfValue(0))
+    return 0;
+
+  // Optimize common patterns for __sync_add_and_fetch and
+  // __sync_sub_and_fetch where the result is not used. This allows us
+  // to use "lock" version of add, sub, inc, dec instructions.
+  // FIXME: Do not use special instructions but instead add the "lock"
+  // prefix to the target node somehow. The extra information will then be
+  // transferred to machine instruction and it denotes the prefix.
+  SDValue Chain = Node->getOperand(0);
+  SDValue Ptr = Node->getOperand(1);
+  SDValue Val = Node->getOperand(2);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return 0;
+
+  bool isInc = false, isDec = false, isSub = false, isCN = false;
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+  if (CN) {
+    isCN = true;
+    int64_t CNVal = CN->getSExtValue();
+    if (CNVal == 1)
+      isInc = true;
+    else if (CNVal == -1)
+      isDec = true;
+    else if (CNVal >= 0)
+      Val = CurDAG->getTargetConstant(CNVal, NVT);
+    else {
+      isSub = true;
+      Val = CurDAG->getTargetConstant(-CNVal, NVT);
+    }
+  } else if (Val.hasOneUse() &&
+             Val.getOpcode() == ISD::SUB &&
+             X86::isZeroNode(Val.getOperand(0))) {
+    isSub = true;
+    Val = Val.getOperand(1);
+  }
+
+  unsigned Opc = 0;
+  switch (NVT.getSimpleVT().SimpleTy) {
+  default: return 0;
+  case MVT::i8:
+    if (isInc)
+      Opc = X86::LOCK_INC8m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC8m;
+    else if (isSub) {
+      if (isCN)
+        Opc = X86::LOCK_SUB8mi;
+      else
+        Opc = X86::LOCK_SUB8mr;
+    } else {
+      if (isCN)
+        Opc = X86::LOCK_ADD8mi;
+      else
+        Opc = X86::LOCK_ADD8mr;
+    }
+    break;
+  case MVT::i16:
+    if (isInc)
+      Opc = X86::LOCK_INC16m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC16m;
+    else if (isSub) {
+      if (isCN) {
+        if (Predicate_i16immSExt8(Val.getNode()))
+          Opc = X86::LOCK_SUB16mi8;
+        else
+          Opc = X86::LOCK_SUB16mi;
+      } else
+        Opc = X86::LOCK_SUB16mr;
+    } else {
+      if (isCN) {
+        if (Predicate_i16immSExt8(Val.getNode()))
+          Opc = X86::LOCK_ADD16mi8;
+        else
+          Opc = X86::LOCK_ADD16mi;
+      } else
+        Opc = X86::LOCK_ADD16mr;
+    }
+    break;
+  case MVT::i32:
+    if (isInc)
+      Opc = X86::LOCK_INC32m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC32m;
+    else if (isSub) {
+      if (isCN) {
+        if (Predicate_i32immSExt8(Val.getNode()))
+          Opc = X86::LOCK_SUB32mi8;
+        else
+          Opc = X86::LOCK_SUB32mi;
+      } else
+        Opc = X86::LOCK_SUB32mr;
+    } else {
+      if (isCN) {
+        if (Predicate_i32immSExt8(Val.getNode()))
+          Opc = X86::LOCK_ADD32mi8;
+        else
+          Opc = X86::LOCK_ADD32mi;
+      } else
+        Opc = X86::LOCK_ADD32mr;
+    }
+    break;
+  case MVT::i64:
+    if (isInc)
+      Opc = X86::LOCK_INC64m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC64m;
+    else if (isSub) {
+      Opc = X86::LOCK_SUB64mr;
+      if (isCN) {
+        if (Predicate_i64immSExt8(Val.getNode()))
+          Opc = X86::LOCK_SUB64mi8;
+        else if (Predicate_i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_SUB64mi32;
+      }
+    } else {
+      Opc = X86::LOCK_ADD64mr;
+      if (isCN) {
+        if (Predicate_i64immSExt8(Val.getNode()))
+          Opc = X86::LOCK_ADD64mi8;
+        else if (Predicate_i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_ADD64mi32;
+      }
+    }
+    break;
+  }
+
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  if (isInc || isDec) {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
+    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+    SDValue RetVals[] = { Undef, Ret };
+    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+  } else {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+    SDValue RetVals[] = { Undef, Ret };
+    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+  }
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+  // Examine each user of the node.
+  for (SDNode::use_iterator UI = N->use_begin(),
+         UE = N->use_end(); UI != UE; ++UI) {
+    // Only examine CopyToReg uses.
+    if (UI->getOpcode() != ISD::CopyToReg)
+      return false;
+    // Only examine CopyToReg uses that copy to EFLAGS.
+    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+          X86::EFLAGS)
+      return false;
+    // Examine each user of the CopyToReg use.
+    for (SDNode::use_iterator FlagUI = UI->use_begin(),
+           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+      // Only examine the Flag result.
+      if (FlagUI.getUse().getResNo() != 1) continue;
+      // Anything unusual: assume conservatively.
+      if (!FlagUI->isMachineOpcode()) return false;
+      // Examine the opcode of the user.
+      switch (FlagUI->getMachineOpcode()) {
+      // These comparisons don't treat the most significant bit specially.
+      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+      case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
+      case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
+      case X86::CMOVA16rr: case X86::CMOVA16rm:
+      case X86::CMOVA32rr: case X86::CMOVA32rm:
+      case X86::CMOVA64rr: case X86::CMOVA64rm:
+      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+      case X86::CMOVB16rr: case X86::CMOVB16rm:
+      case X86::CMOVB32rr: case X86::CMOVB32rm:
+      case X86::CMOVB64rr: case X86::CMOVB64rm:
+      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+      case X86::CMOVE16rr: case X86::CMOVE16rm:
+      case X86::CMOVE32rr: case X86::CMOVE32rm:
+      case X86::CMOVE64rr: case X86::CMOVE64rm:
+      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+      case X86::CMOVP16rr: case X86::CMOVP16rm:
+      case X86::CMOVP32rr: case X86::CMOVP32rm:
+      case X86::CMOVP64rr: case X86::CMOVP64rm:
+        continue;
+      // Anything else: assume conservatively.
+      default: return false;
+      }
+    }
+  }
+  return true;
 }
 
 SDNode *X86DAGToDAGISel::Select(SDValue N) {
   SDNode *Node = N.getNode();
-  MVT NVT = Node->getValueType(0);
+  EVT NVT = Node->getValueType(0);
   unsigned Opc, MOpc;
   unsigned Opcode = Node->getOpcode();
   DebugLoc dl = Node->getDebugLoc();
   
 #ifndef NDEBUG
-  DOUT << std::string(Indent, ' ') << "Selecting: ";
-  DEBUG(Node->dump(CurDAG));
-  DOUT << "\n";
+  DEBUG({
+      errs() << std::string(Indent, ' ') << "Selecting: ";
+      Node->dump(CurDAG);
+      errs() << '\n';
+    });
   Indent += 2;
 #endif
 
   if (Node->isMachineOpcode()) {
 #ifndef NDEBUG
-    DOUT << std::string(Indent-2, ' ') << "== ";
-    DEBUG(Node->dump(CurDAG));
-    DOUT << "\n";
+    DEBUG({
+        errs() << std::string(Indent-2, ' ') << "== ";
+        Node->dump(CurDAG);
+        errs() << '\n';
+      });
     Indent -= 2;
 #endif
     return NULL;   // Already selected.
   }
 
   switch (Opcode) {
-    default: break;
-    case X86ISD::GlobalBaseReg: 
-      return getGlobalBaseReg();
-
-    case X86ISD::ATOMOR64_DAG:
-      return SelectAtomic64(Node, X86::ATOMOR6432);
-    case X86ISD::ATOMXOR64_DAG:
-      return SelectAtomic64(Node, X86::ATOMXOR6432);
-    case X86ISD::ATOMADD64_DAG:
-      return SelectAtomic64(Node, X86::ATOMADD6432);
-    case X86ISD::ATOMSUB64_DAG:
-      return SelectAtomic64(Node, X86::ATOMSUB6432);
-    case X86ISD::ATOMNAND64_DAG:
-      return SelectAtomic64(Node, X86::ATOMNAND6432);
-    case X86ISD::ATOMAND64_DAG:
-      return SelectAtomic64(Node, X86::ATOMAND6432);
-    case X86ISD::ATOMSWAP64_DAG:
-      return SelectAtomic64(Node, X86::ATOMSWAP6432);
-
-    case ISD::SMUL_LOHI:
-    case ISD::UMUL_LOHI: {
-      SDValue N0 = Node->getOperand(0);
-      SDValue N1 = Node->getOperand(1);
-
-      bool isSigned = Opcode == ISD::SMUL_LOHI;
-      if (!isSigned)
-        switch (NVT.getSimpleVT()) {
-        default: assert(0 && "Unsupported VT!");
-        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
-        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
-        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
-        case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
-        }
-      else
-        switch (NVT.getSimpleVT()) {
-        default: assert(0 && "Unsupported VT!");
-        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
-        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
-        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
-        case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
-        }
+  default: break;
+  case X86ISD::GlobalBaseReg:
+    return getGlobalBaseReg();
+
+  case X86ISD::ATOMOR64_DAG:
+    return SelectAtomic64(Node, X86::ATOMOR6432);
+  case X86ISD::ATOMXOR64_DAG:
+    return SelectAtomic64(Node, X86::ATOMXOR6432);
+  case X86ISD::ATOMADD64_DAG:
+    return SelectAtomic64(Node, X86::ATOMADD6432);
+  case X86ISD::ATOMSUB64_DAG:
+    return SelectAtomic64(Node, X86::ATOMSUB6432);
+  case X86ISD::ATOMNAND64_DAG:
+    return SelectAtomic64(Node, X86::ATOMNAND6432);
+  case X86ISD::ATOMAND64_DAG:
+    return SelectAtomic64(Node, X86::ATOMAND6432);
+  case X86ISD::ATOMSWAP64_DAG:
+    return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+  case ISD::ATOMIC_LOAD_ADD: {
+    SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+    if (RetVal)
+      return RetVal;
+    break;
+  }
 
-      unsigned LoReg, HiReg;
-      switch (NVT.getSimpleVT()) {
-      default: assert(0 && "Unsupported VT!");
-      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
-      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
-      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
-      case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    bool isSigned = Opcode == ISD::SMUL_LOHI;
+    if (!isSigned) {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
+      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+      case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+      case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
       }
-
-      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
-      // multiplty is commmutative
-      if (!foldedLoad) {
-        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
-        if (foldedLoad)
-          std::swap(N0, N1);
+    } else {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
+      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
       }
+    }
 
-      SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
-                                              N0, SDValue()).getValue(1);
-
-      if (foldedLoad) {
-        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
-                          InFlag };
-        SDNode *CNode =
-          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
-                                array_lengthof(Ops));
-        InFlag = SDValue(CNode, 1);
-        // Update the chain.
-        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
-      } else {
-        InFlag =
-          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
-      }
+    unsigned LoReg, HiReg;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
+    case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
+    case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+    case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+    }
 
-      // Copy the low half of the result, if it is needed.
-      if (!N.getValue(0).use_empty()) {
-        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                                  LoReg, NVT, InFlag);
-        InFlag = Result.getValue(2);
-        ReplaceUses(N.getValue(0), Result);
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    // Multiply is commmutative.
+    if (!foldedLoad) {
+      foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+      if (foldedLoad)
+        std::swap(N0, N1);
+    }
+
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+                                            N0, SDValue()).getValue(1);
+
+    if (foldedLoad) {
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      SDNode *CNode =
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+                               array_lengthof(Ops));
+      InFlag = SDValue(CNode, 1);
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+    } else {
+      InFlag =
+        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+    }
+
+    // Copy the low half of the result, if it is needed.
+    if (!N.getValue(0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                LoReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(N.getValue(0), Result);
 #ifndef NDEBUG
-        DOUT << std::string(Indent-2, ' ') << "=> ";
-        DEBUG(Result.getNode()->dump(CurDAG));
-        DOUT << "\n";
+      DEBUG({
+          errs() << std::string(Indent-2, ' ') << "=> ";
+          Result.getNode()->dump(CurDAG);
+          errs() << '\n';
+        });
 #endif
+    }
+    // Copy the high half of the result, if it is needed.
+    if (!N.getValue(1).use_empty()) {
+      SDValue Result;
+      if (HiReg == X86::AH && Subtarget->is64Bit()) {
+        // Prevent use of AH in a REX instruction by referencing AX instead.
+        // Shift it down 8 bits.
+        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                        X86::AX, MVT::i16, InFlag);
+        InFlag = Result.getValue(2);
+        Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                                Result,
+                                   CurDAG->getTargetConstant(8, MVT::i8)), 0);
+        // Then truncate it down to i8.
+        Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+                                                MVT::i8, Result);
+      } else {
+        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                        HiReg, NVT, InFlag);
+        InFlag = Result.getValue(2);
       }
-      // Copy the high half of the result, if it is needed.
-      if (!N.getValue(1).use_empty()) {
-        SDValue Result;
-        if (HiReg == X86::AH && Subtarget->is64Bit()) {
-          // Prevent use of AH in a REX instruction by referencing AX instead.
-          // Shift it down 8 bits.
-          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                          X86::AX, MVT::i16, InFlag);
-          InFlag = Result.getValue(2);
-          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
-                                                 Result,
-                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
-          // Then truncate it down to i8.
-          SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
-          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
-                                                   MVT::i8, Result, SRIdx), 0);
-        } else {
-          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                          HiReg, NVT, InFlag);
-          InFlag = Result.getValue(2);
-        }
-        ReplaceUses(N.getValue(1), Result);
+      ReplaceUses(N.getValue(1), Result);
 #ifndef NDEBUG
-        DOUT << std::string(Indent-2, ' ') << "=> ";
-        DEBUG(Result.getNode()->dump(CurDAG));
-        DOUT << "\n";
+      DEBUG({
+          errs() << std::string(Indent-2, ' ') << "=> ";
+          Result.getNode()->dump(CurDAG);
+          errs() << '\n';
+        });
 #endif
-      }
+    }
 
 #ifndef NDEBUG
-      Indent -= 2;
+    Indent -= 2;
 #endif
 
-      return NULL;
-    }
-      
-    case ISD::SDIVREM:
-    case ISD::UDIVREM: {
-      SDValue N0 = Node->getOperand(0);
-      SDValue N1 = Node->getOperand(1);
-
-      bool isSigned = Opcode == ISD::SDIVREM;
-      if (!isSigned)
-        switch (NVT.getSimpleVT()) {
-        default: assert(0 && "Unsupported VT!");
-        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
-        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
-        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
-        case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
-        }
-      else
-        switch (NVT.getSimpleVT()) {
-        default: assert(0 && "Unsupported VT!");
-        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
-        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
-        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
-        case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
-        }
+    return NULL;
+  }
 
-      unsigned LoReg, HiReg;
-      unsigned ClrOpcode, SExtOpcode;
-      switch (NVT.getSimpleVT()) {
-      default: assert(0 && "Unsupported VT!");
-      case MVT::i8:
-        LoReg = X86::AL;  HiReg = X86::AH;
-        ClrOpcode  = 0;
-        SExtOpcode = X86::CBW;
-        break;
-      case MVT::i16:
-        LoReg = X86::AX;  HiReg = X86::DX;
-        ClrOpcode  = X86::MOV16r0;
-        SExtOpcode = X86::CWD;
-        break;
-      case MVT::i32:
-        LoReg = X86::EAX; HiReg = X86::EDX;
-        ClrOpcode  = X86::MOV32r0;
-        SExtOpcode = X86::CDQ;
-        break;
-      case MVT::i64:
-        LoReg = X86::RAX; HiReg = X86::RDX;
-        ClrOpcode  = X86::MOV64r0;
-        SExtOpcode = X86::CQO;
-        break;
+  case ISD::SDIVREM:
+  case ISD::UDIVREM: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    bool isSigned = Opcode == ISD::SDIVREM;
+    if (!isSigned) {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
+      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
       }
+    } else {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
+      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+      }
+    }
 
-      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
-      bool signBitIsZero = CurDAG->SignBitIsZero(N0);
-
-      SDValue InFlag;
-      if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
-        // Special case for div8, just use a move with zero extension to AX to
-        // clear the upper 8 bits (AH).
-        SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
-        if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
-          SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
-          Move =
-            SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16, 
-                                          MVT::Other, Ops,
-                                          array_lengthof(Ops)), 0);
-          Chain = Move.getValue(1);
-          ReplaceUses(N0.getValue(1), Chain);
-        } else {
-          Move =
-            SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
-          Chain = CurDAG->getEntryNode();
-        }
-        Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
-        InFlag = Chain.getValue(1);
+    unsigned LoReg, HiReg;
+    unsigned ClrOpcode, SExtOpcode;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:
+      LoReg = X86::AL;  HiReg = X86::AH;
+      ClrOpcode  = 0;
+      SExtOpcode = X86::CBW;
+      break;
+    case MVT::i16:
+      LoReg = X86::AX;  HiReg = X86::DX;
+      ClrOpcode  = X86::MOV16r0;
+      SExtOpcode = X86::CWD;
+      break;
+    case MVT::i32:
+      LoReg = X86::EAX; HiReg = X86::EDX;
+      ClrOpcode  = X86::MOV32r0;
+      SExtOpcode = X86::CDQ;
+      break;
+    case MVT::i64:
+      LoReg = X86::RAX; HiReg = X86::RDX;
+      ClrOpcode  = ~0U; // NOT USED.
+      SExtOpcode = X86::CQO;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+    SDValue InFlag;
+    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+      // Special case for div8, just use a move with zero extension to AX to
+      // clear the upper 8 bits (AH).
+      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+      if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+        Move =
+          SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
+                                         MVT::Other, Ops,
+                                         array_lengthof(Ops)), 0);
+        Chain = Move.getValue(1);
+        ReplaceUses(N0.getValue(1), Chain);
       } else {
+        Move =
+          SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+        Chain = CurDAG->getEntryNode();
+      }
+      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+      InFlag = Chain.getValue(1);
+    } else {
+      InFlag =
+        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+                             LoReg, N0, SDValue()).getValue(1);
+      if (isSigned && !signBitIsZero) {
+        // Sign extend the low part into the high part.
         InFlag =
-          CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
-                               LoReg, N0, SDValue()).getValue(1);
-        if (isSigned && !signBitIsZero) {
-          // Sign extend the low part into the high part.
-          InFlag =
-            SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+      } else {
+        // Zero out the high part, effectively zero extending the input.
+        SDValue ClrNode;
+
+        if (NVT.getSimpleVT() == MVT::i64) {
+          ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
+                            0);
+          // We just did a 32-bit clear, insert it into a 64-bit register to
+          // clear the whole 64-bit reg.
+          SDValue Undef =
+            SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+                                           dl, MVT::i64), 0);
+          SDValue SubRegNo =
+            CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
+          ClrNode =
+            SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+                                           MVT::i64, Undef, ClrNode, SubRegNo),
+                    0);
         } else {
-          // Zero out the high part, effectively zero extending the input.
-          SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), 
-                                    0);
-          InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
-                                        ClrNode, InFlag).getValue(1);
+          ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
         }
-      }
 
-      if (foldedLoad) {
-        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
-                          InFlag };
-        SDNode *CNode =
-          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
-                                array_lengthof(Ops));
-        InFlag = SDValue(CNode, 1);
-        // Update the chain.
-        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
-      } else {
-        InFlag =
-          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
+                                      ClrNode, InFlag).getValue(1);
       }
+    }
 
-      // Copy the division (low) result, if it is needed.
-      if (!N.getValue(0).use_empty()) {
-        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                                  LoReg, NVT, InFlag);
-        InFlag = Result.getValue(2);
-        ReplaceUses(N.getValue(0), Result);
+    if (foldedLoad) {
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      SDNode *CNode =
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+                               array_lengthof(Ops));
+      InFlag = SDValue(CNode, 1);
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+    } else {
+      InFlag =
+        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+    }
+
+    // Copy the division (low) result, if it is needed.
+    if (!N.getValue(0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                LoReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(N.getValue(0), Result);
 #ifndef NDEBUG
-        DOUT << std::string(Indent-2, ' ') << "=> ";
-        DEBUG(Result.getNode()->dump(CurDAG));
-        DOUT << "\n";
+      DEBUG({
+          errs() << std::string(Indent-2, ' ') << "=> ";
+          Result.getNode()->dump(CurDAG);
+          errs() << '\n';
+        });
 #endif
+    }
+    // Copy the remainder (high) result, if it is needed.
+    if (!N.getValue(1).use_empty()) {
+      SDValue Result;
+      if (HiReg == X86::AH && Subtarget->is64Bit()) {
+        // Prevent use of AH in a REX instruction by referencing AX instead.
+        // Shift it down 8 bits.
+        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                        X86::AX, MVT::i16, InFlag);
+        InFlag = Result.getValue(2);
+        Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                      Result,
+                                      CurDAG->getTargetConstant(8, MVT::i8)),
+                         0);
+        // Then truncate it down to i8.
+        Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+                                                MVT::i8, Result);
+      } else {
+        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                        HiReg, NVT, InFlag);
+        InFlag = Result.getValue(2);
       }
-      // Copy the remainder (high) result, if it is needed.
-      if (!N.getValue(1).use_empty()) {
-        SDValue Result;
-        if (HiReg == X86::AH && Subtarget->is64Bit()) {
-          // Prevent use of AH in a REX instruction by referencing AX instead.
-          // Shift it down 8 bits.
-          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                          X86::AX, MVT::i16, InFlag);
-          InFlag = Result.getValue(2);
-          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
-                                        Result,
-                                        CurDAG->getTargetConstant(8, MVT::i8)), 
-                           0);
-          // Then truncate it down to i8.
-          SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
-          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
-                                                   MVT::i8, Result, SRIdx), 0);
-        } else {
-          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                          HiReg, NVT, InFlag);
-          InFlag = Result.getValue(2);
-        }
-        ReplaceUses(N.getValue(1), Result);
+      ReplaceUses(N.getValue(1), Result);
 #ifndef NDEBUG
-        DOUT << std::string(Indent-2, ' ') << "=> ";
-        DEBUG(Result.getNode()->dump(CurDAG));
-        DOUT << "\n";
+      DEBUG({
+          errs() << std::string(Indent-2, ' ') << "=> ";
+          Result.getNode()->dump(CurDAG);
+          errs() << '\n';
+        });
 #endif
-      }
+    }
 
 #ifndef NDEBUG
-      Indent -= 2;
+    Indent -= 2;
 #endif
 
-      return NULL;
-    }
+    return NULL;
+  }
 
-    case ISD::DECLARE: {
-      // Handle DECLARE nodes here because the second operand may have been
-      // wrapped in X86ISD::Wrapper.
-      SDValue Chain = Node->getOperand(0);
-      SDValue N1 = Node->getOperand(1);
-      SDValue N2 = Node->getOperand(2);
-      FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
-      
-      // FIXME: We need to handle this for VLAs.
-      if (!FINode) {
-        ReplaceUses(N.getValue(0), Chain);
-        return NULL;
+  case X86ISD::CMP: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+    // use a smaller encoding.
+    if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+        N0.getValueType() != MVT::i8 &&
+        X86::isZeroNode(N1)) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+      if (!C) break;
+
+      // For example, convert "testl %eax, $8" to "testb %al, $8"
+      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+          (!(C->getZExtValue() & 0x80) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // On x86-32, only the ABCD registers have 8-bit subregisters.
+        if (!Subtarget->is64Bit()) {
+          TargetRegisterClass *TRC = 0;
+          switch (N0.getValueType().getSimpleVT().SimpleTy) {
+          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+          default: llvm_unreachable("Unsupported TEST operand type!");
+          }
+          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+                                               Reg.getValueType(), Reg, RC), 0);
+        }
+
+        // Extract the l-register.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+                                                        MVT::i8, Reg);
+
+        // Emit a testb.
+        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
       }
-      
-      if (N2.getOpcode() == ISD::ADD &&
-          N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
-        N2 = N2.getOperand(1);
-      
-      // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
-      // somehow, just ignore it.
-      if (N2.getOpcode() != X86ISD::Wrapper &&
-          N2.getOpcode() != X86ISD::WrapperRIP) {
-        ReplaceUses(N.getValue(0), Chain);
-        return NULL;
+
+      // For example, "testl %eax, $2048" to "testb %ah, $8".
+      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+          (!(C->getZExtValue() & 0x8000) ||
+           HasNoSignedComparisonUses(Node))) {
+        // Shift the immediate right by 8 bits.
+        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+                                                       MVT::i8);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Put the value in an ABCD register.
+        TargetRegisterClass *TRC = 0;
+        switch (N0.getValueType().getSimpleVT().SimpleTy) {
+        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+        default: llvm_unreachable("Unsupported TEST operand type!");
+        }
+        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+                                             Reg.getValueType(), Reg, RC), 0);
+
+        // Extract the h-register.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+                                                        MVT::i8, Reg);
+
+        // Emit a testb. No special NOREX tricks are needed since there's
+        // only one GPR operand!
+        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+                                      Subreg, ShiftedImm);
       }
-      GlobalAddressSDNode *GVNode =
-        dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
-      if (GVNode == 0) {
-        ReplaceUses(N.getValue(0), Chain);
-        return NULL;
+
+      // For example, "testl %eax, $32776" to "testw %ax, $32776".
+      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+          N0.getValueType() != MVT::i16 &&
+          (!(C->getZExtValue() & 0x8000) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Extract the 16-bit subregister.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+                                                        MVT::i16, Reg);
+
+        // Emit a testw.
+        return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+      }
+
+      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+          N0.getValueType() == MVT::i64 &&
+          (!(C->getZExtValue() & 0x80000000) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Extract the 32-bit subregister.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+                                                        MVT::i32, Reg);
+
+        // Emit a testl.
+        return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
       }
-      SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
-                                                 TLI.getPointerTy());
-      SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
-                                                    TLI.getPointerTy());
-      SDValue Ops[] = { Tmp1, Tmp2, Chain };
-      return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
-                                   MVT::Other, Ops,
-                                   array_lengthof(Ops));
     }
+    break;
+  }
   }
 
   SDNode *ResNode = SelectCode(N);
 
 #ifndef NDEBUG
-  DOUT << std::string(Indent-2, ' ') << "=> ";
-  if (ResNode == NULL || ResNode == N.getNode())
-    DEBUG(N.getNode()->dump(CurDAG));
-  else
-    DEBUG(ResNode->dump(CurDAG));
-  DOUT << "\n";
+  DEBUG({
+      errs() << std::string(Indent-2, ' ') << "=> ";
+      if (ResNode == NULL || ResNode == N.getNode())
+        N.getNode()->dump(CurDAG);
+      else
+        ResNode->dump(CurDAG);
+      errs() << '\n';
+    });
   Indent -= 2;
 #endif
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5a6294a211c8..fadc81839491 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,13 +16,16 @@
 #include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
 #include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -33,21 +36,48 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<bool>
 DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
 
+// Disable16Bit - 16-bit operations typically have a larger encoding than
+// corresponding 32-bit instructions, and 16-bit code is slow on some
+// processors. This is an experimental flag to disable 16-bit operations
+// (which forces them to be Legalized to 32-bit operations).
+static cl::opt<bool>
+Disable16Bit("disable-16bit", cl::Hidden,
+             cl::desc("Disable use of 16-bit instructions"));
+
 // Forward declarations.
-static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
 
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+  switch (TM.getSubtarget<X86Subtarget>().TargetType) {
+  default: llvm_unreachable("unknown subtarget type");
+  case X86Subtarget::isDarwin:
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return new X8664_MachoTargetObjectFile();
+    return new X8632_MachoTargetObjectFile();
+  case X86Subtarget::isELF:
+    return new TargetLoweringObjectFileELF();
+  case X86Subtarget::isMingw:
+  case X86Subtarget::isCygwin:
+  case X86Subtarget::isWindows:
+    return new TargetLoweringObjectFileCOFF();
+  }
+
+}
+
 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
-  : TargetLowering(TM) {
+  : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -62,7 +92,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setShiftAmountType(MVT::i8);
   setBooleanContents(ZeroOrOneBooleanContent);
   setSchedulingPreference(SchedulingForRegPressure);
-  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
   setStackPointerRegisterToSaveRestore(X86StackPtr);
 
   if (Subtarget->isTargetDarwin()) {
@@ -80,7 +109,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // Set up the register classes.
   addRegisterClass(MVT::i8, X86::GR8RegisterClass);
-  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+  if (!Disable16Bit)
+    addRegisterClass(MVT::i16, X86::GR16RegisterClass);
   addRegisterClass(MVT::i32, X86::GR32RegisterClass);
   if (Subtarget->is64Bit())
     addRegisterClass(MVT::i64, X86::GR64RegisterClass);
@@ -89,9 +119,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // We don't accept any truncstore of integer registers.
   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
-  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  if (!Disable16Bit)
+    setTruncStoreAction(MVT::i64, MVT::i16, Expand);
   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
-  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+  if (!Disable16Bit)
+    setTruncStoreAction(MVT::i32, MVT::i16, Expand);
   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
 
@@ -242,8 +274,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
-  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
-  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
+  if (Disable16Bit) {
+    setOperationAction(ISD::CTTZ           , MVT::i16  , Expand);
+    setOperationAction(ISD::CTLZ           , MVT::i16  , Expand);
+  } else {
+    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
+    setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
+  }
   setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
@@ -257,16 +294,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
 
   // These should be promoted to a larger select which is supported.
-  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
-  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
+  setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
   // X86 wants to expand cmov itself.
-  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
+  if (Disable16Bit)
+    setOperationAction(ISD::SELECT        , MVT::i16  , Expand);
+  else
+    setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
-  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
+  if (Disable16Bit)
+    setOperationAction(ISD::SETCC         , MVT::i16  , Expand);
+  else
+    setOperationAction(ISD::SETCC         , MVT::i16  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
@@ -275,8 +318,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
   }
-  // X86 ret instruction may pop stack.
-  setOperationAction(ISD::RET             , MVT::Other, Custom);
   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
 
   // Darwin ABI issue.
@@ -330,7 +371,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
   }
 
-  // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
+  // Use the default ISD::DBG_STOPPOINT.
   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
   // FIXME - use subtarget debug flags
   if (!Subtarget->isTargetDarwin() &&
@@ -637,6 +678,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
     setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
     setOperationAction(ISD::SELECT,             MVT::v1i64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
   }
 
   if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -696,16 +740,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
     for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
+      EVT VT = (MVT::SimpleValueType)i;
       // Do not attempt to custom lower non-power-of-2 vectors
       if (!isPowerOf2_32(VT.getVectorNumElements()))
         continue;
       // Do not attempt to custom lower non-128-bit vectors
       if (!VT.is128BitVector())
         continue;
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR,
+                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,
+                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+                         VT.getSimpleVT().SimpleTy, Custom);
     }
 
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
@@ -722,22 +769,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
     for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
-      MVT VT = (MVT::SimpleValueType)i;
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
 
       // Do not attempt to promote non-128-bit vectors
       if (!VT.is128BitVector()) {
         continue;
       }
-      setOperationAction(ISD::AND,    VT, Promote);
-      AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
-      setOperationAction(ISD::OR,     VT, Promote);
-      AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
-      setOperationAction(ISD::XOR,    VT, Promote);
-      AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
-      setOperationAction(ISD::LOAD,   VT, Promote);
-      AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
-      setOperationAction(ISD::SELECT, VT, Promote);
-      AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
+      setOperationAction(ISD::AND,    SVT, Promote);
+      AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
+      setOperationAction(ISD::OR,     SVT, Promote);
+      AddPromotedToType (ISD::OR,     SVT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    SVT, Promote);
+      AddPromotedToType (ISD::XOR,    SVT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   SVT, Promote);
+      AddPromotedToType (ISD::LOAD,   SVT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, SVT, Promote);
+      AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
     }
 
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -847,7 +895,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
     // This includes 256-bit vectors
     for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
+      EVT VT = (MVT::SimpleValueType)i;
 
       // Do not attempt to custom lower non-power-of-2 vectors
       if (!isPowerOf2_32(VT.getVectorNumElements()))
@@ -861,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     if (Subtarget->is64Bit()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i64, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
-    }    
+    }
 #endif
 
 #if 0
@@ -871,7 +919,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
     // Including 256-bit vectors
     for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
-      MVT VT = (MVT::SimpleValueType)i;
+      EVT VT = (MVT::SimpleValueType)i;
 
       if (!VT.is256BitVector()) {
         continue;
@@ -933,13 +981,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
   maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
   maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
-  allowUnalignedMemoryAccesses = true; // x86 supports it!
   setPrefLoopAlignment(16);
   benefitFromCodePlacementOpt = true;
 }
 
 
-MVT X86TargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
   return MVT::i8;
 }
 
@@ -993,7 +1040,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 /// and store operations as a result of memset, memcpy, and memmove
 /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
 /// determining it.
-MVT
+EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
                                        bool isSrcConst, bool isSrcStr,
                                        SelectionDAG &DAG) const {
@@ -1019,7 +1066,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                       SelectionDAG &DAG) const {
   if (usesGlobalOffsetTable())
     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
-  if (!Subtarget->isPICStyleRIPRel())
+  if (!Subtarget->is64Bit())
     // This doesn't have DebugLoc associated with it, but is not really the
     // same as a Register.
     return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
@@ -1029,7 +1076,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
-  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1038,16 +1085,16 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
 
 #include "X86GenCallingConv.inc"
 
-/// LowerRET - Lower an ISD::RET node.
-SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
-  assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               DebugLoc dl, SelectionDAG &DAG) {
 
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
@@ -1056,49 +1103,19 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
       if (RVLocs[i].isRegLoc())
         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
-  SDValue Chain = Op.getOperand(0);
-
-  // Handle tail call return.
-  Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
-  if (Chain.getOpcode() == X86ISD::TAILCALL) {
-    SDValue TailCall = Chain;
-    SDValue TargetAddress = TailCall.getOperand(1);
-    SDValue StackAdjustment = TailCall.getOperand(2);
-    assert(((TargetAddress.getOpcode() == ISD::Register &&
-               (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
-                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
-              TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
-              TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
-             "Expecting an global address, external symbol, or register");
-    assert(StackAdjustment.getOpcode() == ISD::Constant &&
-           "Expecting a const value");
-
-    SmallVector<SDValue,8> Operands;
-    Operands.push_back(Chain.getOperand(0));
-    Operands.push_back(TargetAddress);
-    Operands.push_back(StackAdjustment);
-    // Copy registers used by the call. Last operand is a flag so it is not
-    // copied.
-    for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
-      Operands.push_back(Chain.getOperand(i));
-    }
-    return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0],
-                       Operands.size());
-  }
 
-  // Regular return.
   SDValue Flag;
 
   SmallVector<SDValue, 6> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   // Operand #1 = Bytes To Pop
-  RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
+  RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16));
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
-    SDValue ValToCopy = Op.getOperand(i*2+1);
+    SDValue ValToCopy = Outs[i].Val;
 
     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
     // the RET instruction and handled by the FP Stackifier.
@@ -1116,7 +1133,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
     // which is returned in RAX / RDX.
     if (Subtarget->is64Bit()) {
-      MVT ValVT = ValToCopy.getValueType();
+      EVT ValVT = ValToCopy.getValueType();
       if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
         ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
@@ -1145,6 +1162,9 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
 
     Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
     Flag = Chain.getValue(1);
+
+    // RAX now acts like a return value.
+    MF.getRegInfo().addLiveOut(X86::RAX);
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -1157,36 +1177,32 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
                      MVT::Other, &RetOps[0], RetOps.size());
 }
 
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) {
 
-/// LowerCallResult - Lower the result values of an ISD::CALL into the
-/// appropriate copies out of appropriate physical registers.  This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-/// being lowered.  The returns a SDNode with the same number of values as the
-/// ISD::CALL.
-SDNode *X86TargetLowering::
-LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
-                unsigned CallingConv, SelectionDAG &DAG) {
-
-  DebugLoc dl = TheCall->getDebugLoc();
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  bool isVarArg = TheCall->isVarArg();
   bool Is64Bit = Subtarget->is64Bit();
-  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
-
-  SmallVector<SDValue, 8> ResultVals;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
-    MVT CopyVT = VA.getValVT();
+    EVT CopyVT = VA.getValVT();
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
-        ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
-      cerr << "SSE register return with SSE disabled\n";
-      exit(1);
+        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+      llvm_report_error("SSE register return with SSE disabled");
     }
 
     // If this is a call to a function that returns an fp value on the floating
@@ -1206,7 +1222,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
                                    MVT::v2i64, InFlag).getValue(1);
         Val = Chain.getValue(0);
         Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
-                          Val, DAG.getConstant(0, MVT::i64));        
+                          Val, DAG.getConstant(0, MVT::i64));
       } else {
         Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                    MVT::i64, InFlag).getValue(1);
@@ -1228,13 +1244,10 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
                         DAG.getIntPtrConstant(1));
     }
 
-    ResultVals.push_back(Val);
+    InVals.push_back(Val);
   }
 
-  // Merge everything together with a MERGE_VALUES node.
-  ResultVals.push_back(Chain);
-  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                     &ResultVals[0], ResultVals.size()).getNode();
+  return Chain;
 }
 
 
@@ -1248,30 +1261,28 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
 //  For info on fast calling convention see Fast Calling Convention (tail call)
 //  implementation LowerX86_32FastCCCallTo.
 
-/// CallIsStructReturn - Determines whether a CALL node uses struct return
+/// CallIsStructReturn - Determines whether a call uses struct return
 /// semantics.
-static bool CallIsStructReturn(CallSDNode *TheCall) {
-  unsigned NumOps = TheCall->getNumArgs();
-  if (!NumOps)
+static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  if (Outs.empty())
     return false;
 
-  return TheCall->getArgFlags(0).isSRet();
+  return Outs[0].Flags.isSRet();
 }
 
-/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
+/// ArgsAreStructReturn - Determines whether a function uses struct
 /// return semantics.
-static bool ArgsAreStructReturn(SDValue Op) {
-  unsigned NumArgs = Op.getNode()->getNumValues() - 1;
-  if (!NumArgs)
+static bool
+ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+  if (Ins.empty())
     return false;
 
-  return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
+  return Ins[0].Flags.isSRet();
 }
 
-/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
-/// the callee to pop its own arguments. Callee pop is necessary to support tail
-/// calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
+/// IsCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
   if (IsVarArg)
     return false;
 
@@ -1289,7 +1300,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
 
 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 /// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
   if (Subtarget->is64Bit()) {
     if (Subtarget->isTargetWin64())
       return CC_X86_Win64_C;
@@ -1305,36 +1316,18 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
     return CC_X86_32_C;
 }
 
-/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
-/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
+/// NameDecorationForCallConv - Selects the appropriate decoration to
+/// apply to a MachineFunction containing a given calling convention.
 NameDecorationStyle
-X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
-  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  if (CC == CallingConv::X86_FastCall)
+X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
+  if (CallConv == CallingConv::X86_FastCall)
     return FastCall;
-  else if (CC == CallingConv::X86_StdCall)
+  else if (CallConv == CallingConv::X86_StdCall)
     return StdCall;
   return None;
 }
 
 
-/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
-/// in a register before calling.
-bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
-  return !IsTailCall && !Is64Bit &&
-    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-    Subtarget->isPICStyleGOT();
-}
-
-/// CallRequiresFnAddressInReg - Check whether the call requires the function
-/// address to be loaded in a register.
-bool
-X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
-  return !Is64Bit && IsTailCall &&
-    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-    Subtarget->isPICStyleGOT();
-}
-
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 /// by "Src" to address "Dst" with size and alignment information specified by
 /// the specific parameter attribute. The copy will be passed as a byval
@@ -1348,35 +1341,52 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                        /*AlwaysInline=*/true, NULL, 0, NULL, 0);
 }
 
-SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
-                                              const CCValAssign &VA,
-                                              MachineFrameInfo *MFI,
-                                              unsigned CC,
-                                              SDValue Root, unsigned i) {
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    MachineFrameInfo *MFI,
+                                    unsigned i) {
+
   // Create the nodes corresponding to a load from this parameter slot.
-  ISD::ArgFlagsTy Flags =
-    cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
-  bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
+  ISD::ArgFlagsTy Flags = Ins[i].Flags;
+  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt;
   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+  EVT ValVT;
+
+  // If value is passed by pointer we have address passed instead of the value
+  // itself.
+  if (VA.getLocInfo() == CCValAssign::Indirect)
+    ValVT = VA.getLocVT();
+  else
+    ValVT = VA.getValVT();
 
   // FIXME: For now, all byval parameter objects are marked mutable. This can be
   // changed with more analysis.
   // In case of tail call optimization mark all arguments mutable. Since they
   // could be overwritten by lowering of arguments in case of a tail call.
-  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+  int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                   VA.getLocMemOffset(), isImmutable);
   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   if (Flags.isByVal())
     return FIN;
-  return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN,
+  return DAG.getLoad(ValVT, dl, Chain, FIN,
                      PseudoSourceValue::getFixedStack(FI), 0);
 }
 
 SDValue
-X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv,
+                                        bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                        DebugLoc dl,
+                                        SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-  DebugLoc dl = Op.getDebugLoc();
 
   const Function* Fn = MF.getFunction();
   if (Fn->hasExternalLinkage() &&
@@ -1385,25 +1395,23 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
     FuncInfo->setForceFramePointer(true);
 
   // Decorate the function name.
-  FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+  FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv));
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  unsigned CC = MF.getFunction()->getCallingConv();
   bool Is64Bit = Subtarget->is64Bit();
   bool IsWin64 = Subtarget->isTargetWin64();
 
-  assert(!(isVarArg && CC == CallingConv::Fast) &&
+  assert(!(isVarArg && CallConv == CallingConv::Fast) &&
          "Var args not supported with calling convention fastcc");
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
 
-  SmallVector<SDValue, 8> ArgValues;
   unsigned LastVal = ~0U;
+  SDValue ArgValue;
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
@@ -1413,7 +1421,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
     LastVal = VA.getValNo();
 
     if (VA.isRegLoc()) {
-      MVT RegVT = VA.getLocVT();
+      EVT RegVT = VA.getLocVT();
       TargetRegisterClass *RC = NULL;
       if (RegVT == MVT::i32)
         RC = X86::GR32RegisterClass;
@@ -1425,27 +1433,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
         RC = X86::FR64RegisterClass;
       else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
         RC = X86::VR128RegisterClass;
-      else if (RegVT.isVector()) {
-        assert(RegVT.getSizeInBits() == 64);
-        if (!Is64Bit)
-          RC = X86::VR64RegisterClass;     // MMX values are passed in MMXs.
-        else {
-          // Darwin calling convention passes MMX values in either GPRs or
-          // XMMs in x86-64. Other targets pass them in memory.
-          if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
-            RC = X86::VR128RegisterClass;  // MMX values are passed in XMMs.
-            RegVT = MVT::v2i64;
-          } else {
-            RC = X86::GR64RegisterClass;   // v1i64 values are passed in GPRs.
-            RegVT = MVT::i64;
-          }
-        }
-      } else {
-        assert(0 && "Unknown argument type!");
-      }
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+        RC = X86::VR64RegisterClass;
+      else
+        llvm_unreachable("Unknown argument type!");
 
-      unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it is really passed promoted to 32
       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
@@ -1456,52 +1450,53 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
       else if (VA.getLocInfo() == CCValAssign::ZExt)
         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
                                DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::BCvt)
+        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
 
-      if (VA.getLocInfo() != CCValAssign::Full)
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
-      // Handle MMX values passed in GPRs.
-      if (Is64Bit && RegVT != VA.getLocVT()) {
-        if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
-        else if (RC == X86::VR128RegisterClass) {
+      if (VA.isExtInLoc()) {
+        // Handle MMX values passed in XMM regs.
+        if (RegVT.isVector()) {
           ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
                                  ArgValue, DAG.getConstant(0, MVT::i64));
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
-        }
+          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+        } else
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
-
-      ArgValues.push_back(ArgValue);
     } else {
       assert(VA.isMemLoc());
-      ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
+      ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
     }
+
+    // If value is passed via pointer - do a load.
+    if (VA.getLocInfo() == CCValAssign::Indirect)
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0);
+
+    InVals.push_back(ArgValue);
   }
 
   // The x86-64 ABI for returning structs by value requires that we copy
   // the sret argument into %rax for the return. Save the argument into
   // a virtual register so that we can access it from the return points.
-  if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
-    MachineFunction &MF = DAG.getMachineFunction();
+  if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
     if (!Reg) {
       Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
       FuncInfo->setSRetReturnReg(Reg);
     }
-    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
-    Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   }
 
   unsigned StackSize = CCInfo.getNextStackOffset();
   // align stack specially for tail calls
-  if (PerformTailCallOpt && CC == CallingConv::Fast)
+  if (PerformTailCallOpt && CallConv == CallingConv::Fast)
     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
 
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    if (Is64Bit || CC != CallingConv::X86_FastCall) {
+    if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
       VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
     }
     if (Is64Bit) {
@@ -1558,75 +1553,81 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
       // Store the integer parameter registers.
       SmallVector<SDValue, 8> MemOps;
       SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
-      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
-                                  DAG.getIntPtrConstant(VarArgsGPOffset));
+      unsigned Offset = VarArgsGPOffset;
       for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+        SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+                                  DAG.getIntPtrConstant(Offset));
         unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
                                      X86::GR64RegisterClass);
-        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+                       Offset);
         MemOps.push_back(Store);
-        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
-                          DAG.getIntPtrConstant(8));
+        Offset += 8;
       }
 
-      // Now store the XMM (fp + vector) parameter registers.
-      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
-                        DAG.getIntPtrConstant(VarArgsFPOffset));
-      for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
-        unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
-                                     X86::VR128RegisterClass);
-        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32);
-        SDValue Store =
-          DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
-        MemOps.push_back(Store);
-        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
-                          DAG.getIntPtrConstant(16));
+      if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+        // Now store the XMM (fp + vector) parameter registers.
+        SmallVector<SDValue, 11> SaveXMMOps;
+        SaveXMMOps.push_back(Chain);
+
+        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+        SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+        SaveXMMOps.push_back(ALVal);
+
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
+
+        for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+          unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
+                                       X86::VR128RegisterClass);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+          SaveXMMOps.push_back(Val);
+        }
+        MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+                                     MVT::Other,
+                                     &SaveXMMOps[0], SaveXMMOps.size()));
       }
+
       if (!MemOps.empty())
-          Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                             &MemOps[0], MemOps.size());
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
     }
   }
 
-  ArgValues.push_back(Root);
-
   // Some CCs need callee pop.
-  if (IsCalleePop(isVarArg, CC)) {
+  if (IsCalleePop(isVarArg, CallConv)) {
     BytesToPopOnReturn  = StackSize; // Callee pops everything.
     BytesCallerReserves = 0;
   } else {
     BytesToPopOnReturn  = 0; // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
-    if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))
+    if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
       BytesToPopOnReturn = 4;
     BytesCallerReserves = StackSize;
   }
 
   if (!Is64Bit) {
     RegSaveFrameIndex = 0xAAAAAAA;   // RegSaveFrameIndex is X86-64 only.
-    if (CC == CallingConv::X86_FastCall)
+    if (CallConv == CallingConv::X86_FastCall)
       VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
   }
 
   FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
 
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+  return Chain;
 }
 
 SDValue
-X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
-                                    const SDValue &StackPtr,
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                    SDValue StackPtr, SDValue Arg,
+                                    DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
-                                    SDValue Chain,
-                                    SDValue Arg, ISD::ArgFlagsTy Flags) {
-  DebugLoc dl = TheCall->getDebugLoc();
-  unsigned LocMemOffset = VA.getLocMemOffset();
+                                    ISD::ArgFlagsTy Flags) {
+  const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
+  unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   if (Flags.isByVal()) {
@@ -1649,7 +1650,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
   if (!IsTailCall || FPDiff==0) return Chain;
 
   // Adjust the Return address stack slot.
-  MVT VT = getPointerTy();
+  EVT VT = getPointerTy();
   OutRetAddr = getReturnAddressFrameIndex(DAG);
 
   // Load the "old" Return address.
@@ -1669,41 +1670,45 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
   int SlotSize = Is64Bit ? 8 : 4;
   int NewReturnAddrFI =
     MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
-  MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
                        PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
   return Chain;
 }
 
-SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+SDValue
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) {
+
   MachineFunction &MF = DAG.getMachineFunction();
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain       = TheCall->getChain();
-  unsigned CC         = TheCall->getCallingConv();
-  bool isVarArg       = TheCall->isVarArg();
-  bool IsTailCall     = TheCall->isTailCall() &&
-                        CC == CallingConv::Fast && PerformTailCallOpt;
-  SDValue Callee      = TheCall->getCallee();
   bool Is64Bit        = Subtarget->is64Bit();
-  bool IsStructRet    = CallIsStructReturn(TheCall);
-  DebugLoc dl         = TheCall->getDebugLoc();
+  bool IsStructRet    = CallIsStructReturn(Outs);
 
-  assert(!(isVarArg && CC == CallingConv::Fast) &&
+  assert((!isTailCall ||
+          (CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
+         "IsEligibleForTailCallOptimization missed a case!");
+  assert(!(isVarArg && CallConv == CallingConv::Fast) &&
          "Var args not supported with calling convention fastcc");
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
-  if (PerformTailCallOpt && CC == CallingConv::Fast)
+  if (PerformTailCallOpt && CallConv == CallingConv::Fast)
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
-  if (IsTailCall) {
+  if (isTailCall) {
     // Lower arguments at fp - stackoffset + fpdiff.
     unsigned NumBytesCallerPushed =
       MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
@@ -1719,7 +1724,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
   SDValue RetAddrFrIdx;
   // Load return adress for tail calls.
-  Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
+  Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
                                   FPDiff, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
@@ -1730,57 +1735,54 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   // of tail call optimization arguments are handle later.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = TheCall->getArg(i);
-    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+    EVT RegVT = VA.getLocVT();
+    SDValue Arg = Outs[i].Val;
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
     bool isByVal = Flags.isByVal();
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: assert(0 && "Unknown loc info!");
+    default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
       break;
     case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
       break;
     case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+        // Special case: passing MMX values in XMM registers.
+        Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+        Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+        Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+      } else
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
       break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+      break;
+    case CCValAssign::Indirect: {
+      // Store the argument.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+                           PseudoSourceValue::getFixedStack(FI), 0);
+      Arg = SpillSlot;
+      break;
+    }
     }
 
     if (VA.isRegLoc()) {
-      if (Is64Bit) {
-        MVT RegVT = VA.getLocVT();
-        if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
-          switch (VA.getLocReg()) {
-          default:
-            break;
-          case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
-          case X86::R8: {
-            // Special case: passing MMX values in GPR registers.
-            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
-            break;
-          }
-          case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
-          case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
-            // Special case: passing MMX values in XMM registers.
-            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
-            Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
-            Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
-            break;
-          }
-          }
-      }
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else {
-      if (!IsTailCall || (IsTailCall && isByVal)) {
+      if (!isTailCall || (isTailCall && isByVal)) {
         assert(VA.isMemLoc());
         if (StackPtr.getNode() == 0)
           StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
 
-        MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
-                                               Chain, Arg, Flags));
+        MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                               dl, DAG, VA, Flags));
       }
     }
   }
@@ -1794,37 +1796,41 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   SDValue InFlag;
   // Tail call byval lowering might overwrite argument registers so in case of
   // tail call optimization the copies to registers are lowered later.
-  if (!IsTailCall)
+  if (!isTailCall)
     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                                RegsToPass[i].second, InFlag);
       InFlag = Chain.getValue(1);
     }
 
-  // ELF / PIC requires GOT in the EBX register before function calls via PLT
-  // GOT pointer.
-  if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
-    Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
-                             DAG.getNode(X86ISD::GlobalBaseReg,
-                                         DebugLoc::getUnknownLoc(),
-                                         getPointerTy()),
-                             InFlag);
-    InFlag = Chain.getValue(1);
-  }
-  // If we are tail calling and generating PIC/GOT style code load the address
-  // of the callee into ecx. The value in ecx is used as target of the tail
-  // jump. This is done to circumvent the ebx/callee-saved problem for tail
-  // calls on PIC/GOT architectures. Normally we would just put the address of
-  // GOT into ebx and then call target@PLT. But for tail callss ebx would be
-  // restored (since ebx is callee saved) before jumping to the target@PLT.
-  if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
-    // Note: The actual moving to ecx is done further down.
-    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-    if (G && !G->getGlobal()->hasHiddenVisibility() &&
-        !G->getGlobal()->hasProtectedVisibility())
-      Callee =  LowerGlobalAddress(Callee, DAG);
-    else if (isa<ExternalSymbolSDNode>(Callee))
-      Callee = LowerExternalSymbol(Callee,DAG);
+
+  if (Subtarget->isPICStyleGOT()) {
+    // ELF / PIC requires GOT in the EBX register before function calls via PLT
+    // GOT pointer.
+    if (!isTailCall) {
+      Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+                               DAG.getNode(X86ISD::GlobalBaseReg,
+                                           DebugLoc::getUnknownLoc(),
+                                           getPointerTy()),
+                               InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // If we are tail calling and generating PIC/GOT style code load the
+      // address of the callee into ECX. The value in ecx is used as target of
+      // the tail jump. This is done to circumvent the ebx/callee-saved problem
+      // for tail calls on PIC/GOT architectures. Normally we would just put the
+      // address of GOT into ebx and then call target@PLT. But for tail calls
+      // ebx would be restored (since ebx is callee saved) before jumping to the
+      // target@PLT.
+
+      // Note: The actual moving to ECX is done further down.
+      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+      if (G && !G->getGlobal()->hasHiddenVisibility() &&
+          !G->getGlobal()->hasProtectedVisibility())
+        Callee = LowerGlobalAddress(Callee, DAG);
+      else if (isa<ExternalSymbolSDNode>(Callee))
+        Callee = LowerExternalSymbol(Callee, DAG);
+    }
   }
 
   if (Is64Bit && isVarArg) {
@@ -1853,7 +1859,15 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
 
   // For tail calls lower the arguments to the 'real' stack slot.
-  if (IsTailCall) {
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+    SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
     SmallVector<SDValue, 8> MemOpChains2;
     SDValue FIN;
     int FI = 0;
@@ -1863,8 +1877,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
       CCValAssign &VA = ArgLocs[i];
       if (!VA.isRegLoc()) {
         assert(VA.isMemLoc());
-        SDValue Arg = TheCall->getArg(i);
-        ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+        SDValue Arg = Outs[i].Val;
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
         // Create frame index.
         int32_t Offset = VA.getLocMemOffset()+FPDiff;
         uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
@@ -1879,12 +1893,13 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
                                           getPointerTy());
           Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
 
-          MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+          MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+                                                           ArgChain,
                                                            Flags, DAG, dl));
         } else {
           // Store relative to framepointer.
           MemOpChains2.push_back(
-            DAG.getStore(Chain, dl, Arg, FIN,
+            DAG.getStore(ArgChain, dl, Arg, FIN,
                          PseudoSourceValue::getFixedStack(FI), 0));
         }
       }
@@ -1912,13 +1927,49 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     // We should use extra load for direct calls to dllimported functions in
     // non-JIT mode.
-    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
-                                        getTargetMachine(), true))
-      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
-                                          G->getOffset());
+    GlobalValue *GV = G->getGlobal();
+    if (!GV->hasDLLImportLinkage()) {
+      unsigned char OpFlags = 0;
+
+      // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+      // external symbols most go through the PLT in PIC mode.  If the symbol
+      // has hidden or protected visibility, or if it is static or local, then
+      // we don't need to use the PLT - we can directly call it.
+      if (Subtarget->isTargetELF() &&
+          getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+          GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+        OpFlags = X86II::MO_PLT;
+      } else if (Subtarget->isPICStyleStubAny() &&
+               (GV->isDeclaration() || GV->isWeakForLinker()) &&
+               Subtarget->getDarwinVers() < 9) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = X86II::MO_DARWIN_STUB;
+      }
+
+      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
+                                          G->getOffset(), OpFlags);
+    }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-  } else if (IsTailCall) {
+    unsigned char OpFlags = 0;
+
+    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
+    // symbols should go through the PLT.
+    if (Subtarget->isTargetELF() &&
+        getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+             Subtarget->getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+                                         OpFlags);
+  } else if (isTailCall) {
     unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
 
     Chain = DAG.getCopyToReg(Chain,  dl,
@@ -1926,27 +1977,23 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
                              Callee,InFlag);
     Callee = DAG.getRegister(Opc, getPointerTy());
     // Add register as live out.
-    DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+    MF.getRegInfo().addLiveOut(Opc);
   }
 
   // Returns a chain & a flag for retval copy to use.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   SmallVector<SDValue, 8> Ops;
 
-  if (IsTailCall) {
+  if (isTailCall) {
     Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                            DAG.getIntPtrConstant(0, true), InFlag);
     InFlag = Chain.getValue(1);
-
-    // Returns a chain & a flag for retval copy to use.
-    NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
-    Ops.clear();
   }
 
   Ops.push_back(Chain);
   Ops.push_back(Callee);
 
-  if (IsTailCall)
+  if (isTailCall)
     Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
 
   // Add argument registers to the end of the list so that they are known live
@@ -1956,9 +2003,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
                                   RegsToPass[i].second.getValueType()));
 
   // Add an implicit use GOT pointer in EBX.
-  if (!IsTailCall && !Is64Bit &&
-      getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      Subtarget->isPICStyleGOT())
+  if (!isTailCall && Subtarget->isPICStyleGOT())
     Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
 
   // Add an implicit use of AL for x86 vararg functions.
@@ -1968,13 +2013,28 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
-  if (IsTailCall) {
-    assert(InFlag.getNode() &&
-           "Flag must be set. Depend on flag being set in LowerRET");
-    Chain = DAG.getNode(X86ISD::TAILCALL, dl,
-                        TheCall->getVTList(), &Ops[0], Ops.size());
+  if (isTailCall) {
+    // If this is the first return lowered for this function, add the regs
+    // to the liveout set for the function.
+    if (MF.getRegInfo().liveout_empty()) {
+      SmallVector<CCValAssign, 16> RVLocs;
+      CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                     *DAG.getContext());
+      CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+      for (unsigned i = 0; i != RVLocs.size(); ++i)
+        if (RVLocs[i].isRegLoc())
+          MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+    }
+
+    assert(((Callee.getOpcode() == ISD::Register &&
+               (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
+                cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) ||
+              Callee.getOpcode() == ISD::TargetExternalSymbol ||
+              Callee.getOpcode() == ISD::TargetGlobalAddress) &&
+             "Expecting an global address, external symbol, or register");
 
-    return SDValue(Chain.getNode(), Op.getResNo());
+    return DAG.getNode(X86ISD::TC_RETURN, dl,
+                       NodeTys, &Ops[0], Ops.size());
   }
 
   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -1982,9 +2042,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
   // Create the CALLSEQ_END node.
   unsigned NumBytesForCalleeToPush;
-  if (IsCalleePop(isVarArg, CC))
+  if (IsCalleePop(isVarArg, CallConv))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
-  else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet)
+  else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
     // If this is is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2002,8 +2062,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
-                 Op.getResNo());
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
 }
 
 
@@ -2060,36 +2120,18 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
   return Offset;
 }
 
-/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-/// following the call is a return. A function is eligible if caller/callee
-/// calling conventions match, currently only fastcc supports tail calls, and
-/// the function CALL is immediatly followed by a RET.
-bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
-                                                      SDValue Ret,
-                                                      SelectionDAG& DAG) const {
-  if (!PerformTailCallOpt)
-    return false;
-
-  if (CheckTailCallReturnConstraints(TheCall, Ret)) {
-    MachineFunction &MF = DAG.getMachineFunction();
-    unsigned CallerCC = MF.getFunction()->getCallingConv();
-    unsigned CalleeCC= TheCall->getCallingConv();
-    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
-      SDValue Callee = TheCall->getCallee();
-      // On x86/32Bit PIC/GOT  tail calls are supported.
-      if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
-          !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
-        return true;
-
-      // Can only do local tail calls (in same module, hidden or protected) on
-      // x86_64 PIC/GOT at the moment.
-      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-        return G->getGlobal()->hasHiddenVisibility()
-            || G->getGlobal()->hasProtectedVisibility();
-    }
-  }
-
-  return false;
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+  return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC;
 }
 
 FastISel *
@@ -2133,6 +2175,36 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
 }
 
 
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+                                       bool hasSymbolicDisplacement) {
+  // Offset should fit into 32 bit immediate field.
+  if (!isInt32(Offset))
+    return false;
+
+  // If we don't have a symbolic displacement - we don't have any extra
+  // restrictions.
+  if (!hasSymbolicDisplacement)
+    return true;
+
+  // FIXME: Some tweaks might be needed for medium code model.
+  if (M != CodeModel::Small && M != CodeModel::Kernel)
+    return false;
+
+  // For small code model we assume that latest object is 16MB before end of 31
+  // bits boundary. We may also accept pretty large negative constants knowing
+  // that all objects are in the positive half of address space.
+  if (M == CodeModel::Small && Offset < 16*1024*1024)
+    return true;
+
+  // For kernel code model we know that all object resist in the negative half
+  // of 32bits address space. We may not accept negative offsets, since they may
+  // be just off and we may accept pretty large positive ones.
+  if (M == CodeModel::Kernel && Offset > 0)
+    return true;
+
+  return false;
+}
+
 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
 /// specific condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
@@ -2155,7 +2227,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
     }
 
     switch (SetCCOpcode) {
-    default: assert(0 && "Invalid integer condition!");
+    default: llvm_unreachable("Invalid integer condition!");
     case ISD::SETEQ:  return X86::COND_E;
     case ISD::SETGT:  return X86::COND_G;
     case ISD::SETGE:  return X86::COND_GE;
@@ -2195,7 +2267,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
   //  1 | 0 | 0 | X == Y
   //  1 | 1 | 1 | unordered
   switch (SetCCOpcode) {
-  default: assert(0 && "Condcode should be pre-legalized away");
+  default: llvm_unreachable("Condcode should be pre-legalized away");
   case ISD::SETUEQ:
   case ISD::SETEQ:   return X86::COND_E;
   case ISD::SETOLT:              // flipped
@@ -2253,7 +2325,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
 /// the second operand.
-static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
   if (VT == MVT::v2f64 || VT == MVT::v2i64)
@@ -2262,68 +2334,68 @@ static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) {
 }
 
 bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M; 
+  SmallVector<int, 8> M;
   N->getMask(M);
   return ::isPSHUFDMask(M, N->getValueType(0));
 }
 
 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   if (VT != MVT::v8i16)
     return false;
-  
+
   // Lower quadword copied in order or undef.
   for (int i = 0; i != 4; ++i)
     if (Mask[i] >= 0 && Mask[i] != i)
       return false;
-  
+
   // Upper quadword shuffled.
   for (int i = 4; i != 8; ++i)
     if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
       return false;
-  
+
   return true;
 }
 
 bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M; 
+  SmallVector<int, 8> M;
   N->getMask(M);
   return ::isPSHUFHWMask(M, N->getValueType(0));
 }
 
 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   if (VT != MVT::v8i16)
     return false;
-  
+
   // Upper quadword copied in order.
   for (int i = 4; i != 8; ++i)
     if (Mask[i] >= 0 && Mask[i] != i)
       return false;
-  
+
   // Lower quadword shuffled.
   for (int i = 0; i != 4; ++i)
     if (Mask[i] >= 4)
       return false;
-  
+
   return true;
 }
 
 bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M; 
+  SmallVector<int, 8> M;
   N->getMask(M);
   return ::isPSHUFLWMask(M, N->getValueType(0));
 }
 
 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to SHUFP*.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   int NumElems = VT.getVectorNumElements();
   if (NumElems != 2 && NumElems != 4)
     return false;
-  
+
   int Half = NumElems / 2;
   for (int i = 0; i < Half; ++i)
     if (!isUndefOrInRange(Mask[i], 0, NumElems))
@@ -2331,7 +2403,7 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
   for (int i = Half; i < NumElems; ++i)
     if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
       return false;
-  
+
   return true;
 }
 
@@ -2345,12 +2417,12 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower
 /// half elements to come from vector 1 (which would equal the dest.) and
 /// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   int NumElems = VT.getVectorNumElements();
-  
-  if (NumElems != 2 && NumElems != 4) 
+
+  if (NumElems != 2 && NumElems != 4)
     return false;
-  
+
   int Half = NumElems / 2;
   for (int i = 0; i < Half; ++i)
     if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
@@ -2424,24 +2496,24 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
 /// <2, 3, 2, 3>
 bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
   unsigned NumElems = N->getValueType(0).getVectorNumElements();
-  
+
   if (NumElems != 4)
     return false;
-  
-  return isUndefOrEqual(N->getMaskElt(0), 2) && 
+
+  return isUndefOrEqual(N->getMaskElt(0), 2) &&
          isUndefOrEqual(N->getMaskElt(1), 3) &&
-         isUndefOrEqual(N->getMaskElt(2), 2) && 
+         isUndefOrEqual(N->getMaskElt(2), 2) &&
          isUndefOrEqual(N->getMaskElt(3), 3);
 }
 
 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
                          bool V2IsSplat = false) {
   int NumElts = VT.getVectorNumElements();
   if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
     return false;
-  
+
   for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
     int BitI  = Mask[i];
     int BitI1 = Mask[i+1];
@@ -2466,12 +2538,12 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
 
 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, MVT VT, 
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
                          bool V2IsSplat = false) {
   int NumElts = VT.getVectorNumElements();
   if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
     return false;
-  
+
   for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
     int BitI  = Mask[i];
     int BitI1 = Mask[i+1];
@@ -2497,11 +2569,11 @@ bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
 /// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
   int NumElems = VT.getVectorNumElements();
   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
     return false;
-  
+
   for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
     int BitI  = Mask[i];
     int BitI1 = Mask[i+1];
@@ -2522,11 +2594,11 @@ bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
 /// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
   int NumElems = VT.getVectorNumElements();
   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
     return false;
-  
+
   for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
     int BitI  = Mask[i];
     int BitI1 = Mask[i+1];
@@ -2547,19 +2619,19 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVSS,
 /// MOVSD, and MOVD, i.e. setting the lowest element.
-static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   if (VT.getVectorElementType().getSizeInBits() < 32)
     return false;
 
   int NumElts = VT.getVectorNumElements();
-  
+
   if (!isUndefOrEqual(Mask[0], NumElts))
     return false;
-  
+
   for (int i = 1; i < NumElts; ++i)
     if (!isUndefOrEqual(Mask[i], i))
       return false;
-  
+
   return true;
 }
 
@@ -2572,21 +2644,21 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
 /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
 /// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
                                bool V2IsSplat = false, bool V2IsUndef = false) {
   int NumOps = VT.getVectorNumElements();
   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
     return false;
-  
+
   if (!isUndefOrEqual(Mask[0], 0))
     return false;
-  
+
   for (int i = 1; i < NumOps; ++i)
     if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
           (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
           (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
       return false;
-  
+
   return true;
 }
 
@@ -2650,7 +2722,7 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
 /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
 bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
   int e = N->getValueType(0).getVectorNumElements() / 2;
-  
+
   for (int i = 0; i < e; ++i)
     if (!isUndefOrEqual(N->getMaskElt(i), i))
       return false;
@@ -2714,14 +2786,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
   return Mask;
 }
 
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+  return ((isa<ConstantSDNode>(Elt) &&
+           cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+          (isa<ConstantFPSDNode>(Elt) &&
+           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
 /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
 /// their permute mask.
 static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
                                     SelectionDAG &DAG) {
-  MVT VT = SVOp->getValueType(0);
+  EVT VT = SVOp->getValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
   SmallVector<int, 8> MaskVec;
-  
+
   for (unsigned i = 0; i != NumElems; ++i) {
     int idx = SVOp->getMaskElt(i);
     if (idx < 0)
@@ -2737,7 +2818,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
 
 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
 /// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) {
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
   unsigned NumElems = VT.getVectorNumElements();
   for (unsigned i = 0; i != NumElems; ++i) {
     int idx = Mask[i];
@@ -2795,7 +2876,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
     return false;
 
   unsigned NumElems = Op->getValueType(0).getVectorNumElements();
-  
+
   if (NumElems != 2 && NumElems != 4)
     return false;
   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
@@ -2820,17 +2901,8 @@ static bool isSplatVector(SDNode *N) {
   return true;
 }
 
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
-static inline bool isZeroNode(SDValue Elt) {
-  return ((isa<ConstantSDNode>(Elt) &&
-           cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
-          (isa<ConstantFPSDNode>(Elt) &&
-           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
-}
-
 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
-/// to an zero vector. 
+/// to an zero vector.
 /// FIXME: move to dag combiner / method on ShuffleVectorSDNode
 static bool isZeroShuffle(ShuffleVectorSDNode *N) {
   SDValue V1 = N->getOperand(0);
@@ -2842,13 +2914,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
       unsigned Opc = V2.getOpcode();
       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
         continue;
-      if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
         return false;
     } else if (Idx >= 0) {
       unsigned Opc = V1.getOpcode();
       if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
         continue;
-      if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V1.getOperand(Idx)))
         return false;
     }
   }
@@ -2857,7 +2931,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
 
 /// getZeroVector - Returns a vector of specified type with all zero elements.
 ///
-static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG,
+static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
                              DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
@@ -2879,7 +2953,7 @@ static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG,
 
 /// getOnesVector - Returns a vector of specified type with all bits set.
 ///
-static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
   // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
@@ -2897,13 +2971,13 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
 /// that point to V2 points to its first element.
 static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
-  MVT VT = SVOp->getValueType(0);
+  EVT VT = SVOp->getValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
-  
+
   bool Changed = false;
   SmallVector<int, 8> MaskVec;
   SVOp->getMask(MaskVec);
-  
+
   for (unsigned i = 0; i != NumElems; ++i) {
     if (MaskVec[i] > (int)NumElems) {
       MaskVec[i] = NumElems;
@@ -2918,7 +2992,7 @@ static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
 
 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
 /// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2) {
   unsigned NumElems = VT.getVectorNumElements();
   SmallVector<int, 8> Mask;
@@ -2929,7 +3003,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
 }
 
 /// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                           SDValue V2) {
   unsigned NumElems = VT.getVectorNumElements();
   SmallVector<int, 8> Mask;
@@ -2941,7 +3015,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
 }
 
 /// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                           SDValue V2) {
   unsigned NumElems = VT.getVectorNumElements();
   unsigned Half = NumElems/2;
@@ -2954,13 +3028,13 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
 }
 
 /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, 
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
                             bool HasSSE2) {
   if (SV->getValueType(0).getVectorNumElements() <= 4)
     return SDValue(SV, 0);
-  
-  MVT PVT = MVT::v4f32;
-  MVT VT = SV->getValueType(0);
+
+  EVT PVT = MVT::v4f32;
+  EVT VT = SV->getValueType(0);
   DebugLoc dl = SV->getDebugLoc();
   SDValue V1 = SV->getOperand(0);
   int NumElems = VT.getVectorNumElements();
@@ -2976,7 +3050,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
     }
     NumElems >>= 1;
   }
-  
+
   // Perform the splat.
   int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
   V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
@@ -2991,7 +3065,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
                                              bool isZero, bool HasSSE2,
                                              SelectionDAG &DAG) {
-  MVT VT = V2.getValueType();
+  EVT VT = V2.getValueType();
   SDValue V1 = isZero
     ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
   unsigned NumElems = VT.getVectorNumElements();
@@ -3016,7 +3090,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
       continue;
     }
     SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
-    if (Elt.getNode() && isZeroNode(Elt))
+    if (Elt.getNode() && X86::isZeroNode(Elt))
       ++NumZeros;
     else
       break;
@@ -3142,11 +3216,11 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
 
 /// getVShift - Return a vector logical shift node.
 ///
-static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp,
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
                          unsigned NumBits, SelectionDAG &DAG,
                          const TargetLowering &TLI, DebugLoc dl) {
   bool isMMX = VT.getSizeInBits() == 64;
-  MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+  EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
   unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
   SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -3171,9 +3245,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
   }
 
-  MVT VT = Op.getValueType();
-  MVT EVT = VT.getVectorElementType();
-  unsigned EVTBits = EVT.getSizeInBits();
+  EVT VT = Op.getValueType();
+  EVT ExtVT = VT.getVectorElementType();
+  unsigned EVTBits = ExtVT.getSizeInBits();
 
   unsigned NumElems = Op.getNumOperands();
   unsigned NumZero  = 0;
@@ -3189,7 +3263,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     if (Elt.getOpcode() != ISD::Constant &&
         Elt.getOpcode() != ISD::ConstantFP)
       IsAllConstants = false;
-    if (isZeroNode(Elt))
+    if (X86::isZeroNode(Elt))
       NumZero++;
     else {
       NonZeros |= (1 << i);
@@ -3212,11 +3286,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     // insertion that way.  Only do this if the value is non-constant or if the
     // value is a constant being inserted into element 0.  It is cheaper to do
     // a constant pool load than it is to do a movd + shuffle.
-    if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+    if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
         (!IsAllConstants || Idx == 0)) {
       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
         // Handle MMX and SSE both.
-        MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+        EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
         unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
 
         // Truncate the value (which may itself be a constant) to i32, and
@@ -3234,7 +3308,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
           for (unsigned i = 1; i != VecElts; ++i)
             Mask.push_back(i);
           Item = DAG.getVectorShuffle(VecVT, dl, Item,
-                                      DAG.getUNDEF(Item.getValueType()), 
+                                      DAG.getUNDEF(Item.getValueType()),
                                       &Mask[0]);
         }
         return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
@@ -3248,15 +3322,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     if (Idx == 0) {
       if (NumZero == 0) {
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
-      } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
-          (EVT == MVT::i64 && Subtarget->is64Bit())) {
+      } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+          (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
                                            DAG);
-      } else if (EVT == MVT::i16 || EVT == MVT::i8) {
+      } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+        EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
         Item = getShuffleVectorZeroOrUndef(Item, 0, true,
                                            Subtarget->hasSSE2(), DAG);
@@ -3266,7 +3340,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
 
     // Is it a vector logical left shift?
     if (NumElems == 2 && Idx == 1 &&
-        isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+        X86::isZeroNode(Op.getOperand(0)) &&
+        !X86::isZeroNode(Op.getOperand(1))) {
       unsigned NumBits = VT.getSizeInBits();
       return getVShift(true, VT,
                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -3374,9 +3449,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     // If we have SSE 4.1, Expand into a number of inserts unless the number of
     // values to be inserted is equal to the number of elements, in which case
     // use the unpack code below in the hopes of matching the consecutive elts
-    // load merge pattern for shuffles. 
+    // load merge pattern for shuffles.
     // FIXME: We could probably just check that here directly.
-    if (Values.size() < NumElems && VT.getSizeInBits() == 128 && 
+    if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
         getSubtarget()->hasSSE41()) {
       V[0] = DAG.getUNDEF(VT);
       for (unsigned i = 0; i < NumElems; ++i)
@@ -3457,7 +3532,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
   }
 
   // For SSSE3, If all 8 words of the result come from only 1 quadword of each
-  // of the two input vectors, shuffle them into one input vector so only a 
+  // of the two input vectors, shuffle them into one input vector so only a
   // single pshufb instruction is necessary. If There are more than 2 input
   // quads, disable the next transformation since it does not help SSSE3.
   bool V1Used = InputQuads[0] || InputQuads[1];
@@ -3481,7 +3556,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
     SmallVector<int, 8> MaskV;
     MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
     MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
-    NewV = DAG.getVectorShuffle(MVT::v2i64, dl, 
+    NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
                   DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
                   DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
     NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
@@ -3506,7 +3581,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
         int idx = MaskVals[i];
         if (idx < 0)
           continue;
-        idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; 
+        idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
         if ((idx != i) && idx < 4)
           pshufhw = false;
         if ((idx != i) && idx > 3)
@@ -3521,19 +3596,19 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
     // If we've eliminated the use of V2, and the new mask is a pshuflw or
     // pshufhw, that's as cheap as it gets.  Return the new shuffle.
     if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
-      return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, 
+      return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
                                   DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
     }
   }
-  
+
   // If we have SSSE3, and all words of the result are from 1 input vector,
   // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
   // is present, fall back to case 4.
   if (TLI.getSubtarget()->hasSSSE3()) {
     SmallVector<SDValue,16> pshufbMask;
-    
+
     // If we have elements from both input vectors, set the high bit of the
-    // shuffle mask element to zero out elements that come from V2 in the V1 
+    // shuffle mask element to zero out elements that come from V2 in the V1
     // mask, and elements that come from V1 in the V2 mask, so that the two
     // results can be OR'd together.
     bool TwoInputs = V1Used && V2Used;
@@ -3548,12 +3623,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
       pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
     }
     V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
-    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, 
+    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
                      DAG.getNode(ISD::BUILD_VECTOR, dl,
                                  MVT::v16i8, &pshufbMask[0], 16));
     if (!TwoInputs)
       return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
-    
+
     // Calculate the shuffle mask for the second input, shuffle it, and
     // OR it with the first shuffled input.
     pshufbMask.clear();
@@ -3568,7 +3643,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
       pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
     }
     V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
-    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, 
+    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
                      DAG.getNode(ISD::BUILD_VECTOR, dl,
                                  MVT::v16i8, &pshufbMask[0], 16));
     V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
@@ -3597,7 +3672,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
     NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
                                 &MaskV[0]);
   }
-  
+
   // If BestHi >= 0, generate a pshufhw to put the high elements in order,
   // and update MaskVals with the new element order.
   if (BestHiQuad >= 0) {
@@ -3619,7 +3694,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
     NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
                                 &MaskV[0]);
   }
-  
+
   // In case BestHi & BestLo were both -1, which means each quadword has a word
   // from each of the four input quadwords, calculate the InOrder bitvector now
   // before falling through to the insert/extract cleanup.
@@ -3629,7 +3704,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
       if (MaskVals[i] < 0 || MaskVals[i] == i)
         InOrder.set(i);
   }
-  
+
   // The other elements are put in the right place using pextrw and pinsrw.
   for (unsigned i = 0; i != 8; ++i) {
     if (InOrder[i])
@@ -3660,9 +3735,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
   DebugLoc dl = SVOp->getDebugLoc();
   SmallVector<int, 16> MaskVals;
   SVOp->getMask(MaskVals);
-  
+
   // If we have SSSE3, case 1 is generated when all result bytes come from
-  // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is 
+  // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
   // present, fall back to case 3.
   // FIXME: kill V2Only once shuffles are canonizalized by getNode.
   bool V1Only = true;
@@ -3676,13 +3751,13 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
     else
       V1Only = false;
   }
-  
+
   // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
   if (TLI.getSubtarget()->hasSSSE3()) {
     SmallVector<SDValue,16> pshufbMask;
-    
+
     // If all result elements are from one input vector, then only translate
-    // undef mask values to 0x80 (zero out result) in the pshufb mask. 
+    // undef mask values to 0x80 (zero out result) in the pshufb mask.
     //
     // Otherwise, we have elements from both input vectors, and must zero out
     // elements that come from V2 in the first mask, and V1 in the second mask
@@ -3705,7 +3780,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
                                  MVT::v16i8, &pshufbMask[0], 16));
     if (!TwoInputs)
       return V1;
-    
+
     // Calculate the shuffle mask for the second input, shuffle it, and
     // OR it with the first shuffled input.
     pshufbMask.clear();
@@ -3722,7 +3797,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
                                  MVT::v16i8, &pshufbMask[0], 16));
     return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
   }
-  
+
   // No SSSE3 - Calculate in place words and then fix all out of place words
   // With 0-16 extracts & inserts.  Worst case is 16 bytes out of order from
   // the 16 different words that comprise the two doublequadword input vectors.
@@ -3732,17 +3807,17 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
   for (int i = 0; i != 8; ++i) {
     int Elt0 = MaskVals[i*2];
     int Elt1 = MaskVals[i*2+1];
-    
+
     // This word of the result is all undef, skip it.
     if (Elt0 < 0 && Elt1 < 0)
       continue;
-    
+
     // This word of the result is already in the correct place, skip it.
     if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
       continue;
     if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
       continue;
-    
+
     SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
     SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
     SDValue InsElt;
@@ -3801,15 +3876,15 @@ static
 SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
                                  SelectionDAG &DAG,
                                  TargetLowering &TLI, DebugLoc dl) {
-  MVT VT = SVOp->getValueType(0);
+  EVT VT = SVOp->getValueType(0);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   unsigned NumElems = VT.getVectorNumElements();
   unsigned NewWidth = (NumElems == 4) ? 2 : 4;
-  MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
-  MVT MaskEltVT = MaskVT.getVectorElementType();
-  MVT NewVT = MaskVT;
-  switch (VT.getSimpleVT()) {
+  EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+  EVT MaskEltVT = MaskVT.getVectorElementType();
+  EVT NewVT = MaskVT;
+  switch (VT.getSimpleVT().SimpleTy) {
   default: assert(false && "Unexpected!");
   case MVT::v4f32: NewVT = MVT::v2f64; break;
   case MVT::v4i32: NewVT = MVT::v2i64; break;
@@ -3849,7 +3924,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
 
 /// getVZextMovL - Return a zero-extending vector move low node.
 ///
-static SDValue getVZextMovL(MVT VT, MVT OpVT,
+static SDValue getVZextMovL(EVT VT, EVT OpVT,
                             SDValue SrcOp, SelectionDAG &DAG,
                             const X86Subtarget *Subtarget, DebugLoc dl) {
   if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -3859,11 +3934,11 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT,
     if (!LD) {
       // movssrr and movsdrr do not clear top bits. Try to use movd, movq
       // instead.
-      MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
-      if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
+      MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+      if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
           SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
           SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
-          SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
+          SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
         // PR2108
         OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
         return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -3889,8 +3964,8 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
-  MVT VT = SVOp->getValueType(0);
-  
+  EVT VT = SVOp->getValueType(0);
+
   SmallVector<std::pair<int, int>, 8> Locs;
   Locs.resize(4);
   SmallVector<int, 8> Mask1(4U, -1);
@@ -3926,7 +4001,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
     V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
 
     SmallVector<int, 8> Mask2(4U, -1);
-    
+
     for (unsigned i = 0; i != 4; ++i) {
       if (Locs[i].first == -1)
         continue;
@@ -4036,7 +4111,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned NumElems = VT.getVectorNumElements();
   bool isMMX = VT.getSizeInBits() == 64;
@@ -4050,7 +4125,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
   // Promote splats to v4f32.
   if (SVOp->isSplat()) {
-    if (isMMX || NumElems < 4) 
+    if (isMMX || NumElems < 4)
       return Op;
     return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
   }
@@ -4079,10 +4154,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
                             DAG, Subtarget, dl);
     }
   }
-  
+
   if (X86::isPSHUFDMask(SVOp))
     return Op;
-  
+
   // Check if this can be converted into a logical shift.
   bool isLeft = false;
   unsigned ShAmt = 0;
@@ -4092,11 +4167,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   if (isShift && ShVal.hasOneUse()) {
     // If the shifted value has multiple uses, it may be cheaper to use
     // v_set0 + movlhps or movhlps, etc.
-    MVT EVT = VT.getVectorElementType();
-    ShAmt *= EVT.getSizeInBits();
+    EVT EltVT = VT.getVectorElementType();
+    ShAmt *= EltVT.getSizeInBits();
     return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
   }
-  
+
   if (X86::isMOVLMask(SVOp)) {
     if (V1IsUndef)
       return V2;
@@ -4105,7 +4180,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     if (!isMMX)
       return Op;
   }
-  
+
   // FIXME: fold these into legal mask.
   if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
                  X86::isMOVSLDUPMask(SVOp) ||
@@ -4120,11 +4195,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
   if (isShift) {
     // No better options. Use a vshl / vsrl.
-    MVT EVT = VT.getVectorElementType();
-    ShAmt *= EVT.getSizeInBits();
+    EVT EltVT = VT.getVectorElementType();
+    ShAmt *= EltVT.getSizeInBits();
     return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
   }
-  
+
   bool Commuted = false;
   // FIXME: This should also accept a bitcast of a splat?  Be careful, not
   // 1,1,1,1 -> v8i16 though.
@@ -4144,7 +4219,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
   if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
     // Shuffling low element of v1 into undef, just return v1.
-    if (V2IsUndef) 
+    if (V2IsUndef)
       return V1;
     // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
     // the instruction selector will not match, so get a canonical MOVL with
@@ -4196,7 +4271,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   SVOp->getMask(PermMask);
   if (isShuffleMaskLegal(PermMask, VT))
     return Op;
-  
+
   // Handle v8i16 specifically since SSE can do byte extraction and insertion.
   if (VT == MVT::v8i16) {
     SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
@@ -4209,7 +4284,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     if (NewOp.getNode())
       return NewOp;
   }
-  
+
   // Handle all 4 wide cases with a number of shuffles except for MMX.
   if (NumElems == 4 && !isMMX)
     return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
@@ -4220,7 +4295,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 SDValue
 X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
                                                 SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   if (VT.getSizeInBits() == 8) {
     SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
@@ -4283,7 +4358,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
       return Res;
   }
 
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   // TODO: handle v16i8.
   if (VT.getSizeInBits() == 16) {
@@ -4296,21 +4371,21 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
                                                  MVT::v4i32, Vec),
                                      Op.getOperand(1)));
     // Transform it so it match pextrw which produces a 32-bit result.
-    MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
-    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT,
+    EVT EltVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy+1);
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
                                     Op.getOperand(0), Op.getOperand(1));
-    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, EVT, Extract,
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
                                     DAG.getValueType(VT));
     return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
   } else if (VT.getSizeInBits() == 32) {
     unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
     if (Idx == 0)
       return Op;
-    
+
     // SHUFPS the element to the lowest double word, then movss.
     int Mask[4] = { Idx, -1, -1, -1 };
-    MVT VVT = Op.getOperand(0).getValueType();
-    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), 
+    EVT VVT = Op.getOperand(0).getValueType();
+    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
                                        DAG.getUNDEF(VVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
                        DAG.getIntPtrConstant(0));
@@ -4326,8 +4401,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
     // to a f64mem, the whole operation is folded into a single MOVHPDmr.
     int Mask[2] = { 1, -1 };
-    MVT VVT = Op.getOperand(0).getValueType();
-    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), 
+    EVT VVT = Op.getOperand(0).getValueType();
+    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
                                        DAG.getUNDEF(VVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
                        DAG.getIntPtrConstant(0));
@@ -4338,18 +4413,18 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 
 SDValue
 X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
-  MVT VT = Op.getValueType();
-  MVT EVT = VT.getVectorElementType();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
 
   SDValue N0 = Op.getOperand(0);
   SDValue N1 = Op.getOperand(1);
   SDValue N2 = Op.getOperand(2);
 
-  if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
+  if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
       isa<ConstantSDNode>(N2)) {
-    unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
-                                              : X86ISD::PINSRW;
+    unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB
+                                                : X86ISD::PINSRW;
     // Transform it so it match pinsr{b,w} which expects a GR32 as its second
     // argument.
     if (N1.getValueType() != MVT::i32)
@@ -4357,7 +4432,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
     if (N2.getValueType() != MVT::i32)
       N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
     return DAG.getNode(Opc, dl, VT, N0, N1, N2);
-  } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+  } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
     // Bits [7:6] of the constant are the source select.  This will always be
     //  zero here.  The DAG Combiner may combine an extract_elt index into these
     //  bits.  For example (insert (extract, 3), 2) could be matched by putting
@@ -4367,24 +4442,25 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
     // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
     //   combine either bitwise AND or insert of float 0.0 to set these bits.
     N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+    // Create this as a scalar to vector..
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
     return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
-  } else if (EVT == MVT::i32) {
-    // InsertPS works with constant index.
-    if (isa<ConstantSDNode>(N2))
-      return Op;
+  } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+    // PINSR* works with constant index.
+    return Op;
   }
   return SDValue();
 }
 
 SDValue
 X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT EVT = VT.getVectorElementType();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
 
   if (Subtarget->hasSSE41())
     return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
 
-  if (EVT == MVT::i8)
+  if (EltVT == MVT::i8)
     return SDValue();
 
   DebugLoc dl = Op.getDebugLoc();
@@ -4392,7 +4468,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   SDValue N1 = Op.getOperand(1);
   SDValue N2 = Op.getOperand(2);
 
-  if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+  if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
     // Transform it so it match pinsrw which expects a 16-bit value in a GR32
     // as its second argument.
     if (N1.getValueType() != MVT::i32)
@@ -4413,9 +4489,12 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
                                    DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
                                                Op.getOperand(0))));
 
+  if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
   SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
-  MVT VT = MVT::v2i32;
-  switch (Op.getValueType().getSimpleVT()) {
+  EVT VT = MVT::v2i32;
+  switch (Op.getValueType().getSimpleVT().SimpleTy) {
   default: break;
   case MVT::v16i8:
   case MVT::v8i16:
@@ -4435,21 +4514,21 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 SDValue
 X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  
+
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
   // global base reg.
   unsigned char OpFlag = 0;
   unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel() &&
-             getTargetMachine().getCodeModel() == CodeModel::Small)
-      WrapperKind = X86ISD::WrapperRIP;
-  }
-  
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
   SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
                                              CP->getAlignment(),
                                              CP->getOffset(), OpFlag);
@@ -4468,25 +4547,26 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
 
 SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  
+
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
   // global base reg.
   unsigned char OpFlag = 0;
   unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel())
-      WrapperKind = X86ISD::WrapperRIP;
-  }
-  
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
                                           OpFlag);
   DebugLoc DL = JT->getDebugLoc();
   Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-  
+
   // With PIC, the address is actually $g + Offset.
   if (OpFlag) {
     Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
@@ -4494,43 +4574,44 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
                                      DebugLoc::getUnknownLoc(), getPointerTy()),
                          Result);
   }
-  
+
   return Result;
 }
 
 SDValue
 X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-  
+
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
   // global base reg.
   unsigned char OpFlag = 0;
   unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel())
-      WrapperKind = X86ISD::WrapperRIP;
-  }
-  
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
   SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
-  
+
   DebugLoc DL = Op.getDebugLoc();
   Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-  
-  
+
+
   // With PIC, the address is actually $g + Offset.
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
+      !Subtarget->is64Bit()) {
     Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
                                      DebugLoc::getUnknownLoc(),
                                      getPointerTy()),
                          Result);
   }
-  
+
   return Result;
 }
 
@@ -4538,53 +4619,37 @@ SDValue
 X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
                                       int64_t Offset,
                                       SelectionDAG &DAG) const {
-  bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  bool ExtraLoadRequired =
-    Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
-
   // Create the TargetGlobalAddress node, folding in the constant
   // offset if it is legal.
+  unsigned char OpFlags =
+    Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+  CodeModel::Model M = getTargetMachine().getCodeModel();
   SDValue Result;
-  if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+  if (OpFlags == X86II::MO_NO_FLAG &&
+      X86::isOffsetSuitableForCodeModel(Offset, M)) {
+    // A direct static reference to a global.
     Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
     Offset = 0;
   } else {
-    unsigned char OpFlags = 0;
-    
-    if (Subtarget->isPICStyleRIPRel() &&
-        getTargetMachine().getRelocationModel() != Reloc::Static) {
-      if (ExtraLoadRequired)
-        OpFlags = X86II::MO_GOTPCREL;
-    } else if (Subtarget->isPICStyleGOT() &&
-               getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-      if (ExtraLoadRequired)
-        OpFlags = X86II::MO_GOT;
-      else
-        OpFlags = X86II::MO_GOTOFF;
-    }
-    
     Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
   }
-  
+
   if (Subtarget->isPICStyleRIPRel() &&
-      getTargetMachine().getCodeModel() == CodeModel::Small)
+      (M == CodeModel::Small || M == CodeModel::Kernel))
     Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
   else
     Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
 
   // With PIC, the address is actually $g + Offset.
-  if (IsPic && !Subtarget->isPICStyleRIPRel()) {
+  if (isGlobalRelativeToPICBase(OpFlags)) {
     Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
                          Result);
   }
 
-  // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
-  // load the value at address GV, not the value of GV itself. This means that
-  // the GlobalAddress must be in the base or index register of the address, not
-  // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
-  // The same applies for external symbols during PIC codegen
-  if (ExtraLoadRequired)
+  // For globals that require a load from a stub to get the address, emit the
+  // load.
+  if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
                          PseudoSourceValue::getGOT(), 0);
 
@@ -4606,7 +4671,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
-           SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg,
+           SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
            unsigned char OperandFlags) {
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   DebugLoc dl = GA->getDebugLoc();
@@ -4628,7 +4693,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
 static SDValue
 LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                const MVT PtrVT) {
+                                const EVT PtrVT) {
   SDValue InFlag;
   DebugLoc dl = GA->getDebugLoc();  // ? function entry point might be better
   SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -4643,7 +4708,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
 static SDValue
 LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                const MVT PtrVT) {
+                                const EVT PtrVT) {
   return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
                     X86::RAX, X86II::MO_TLSGD);
 }
@@ -4651,7 +4716,7 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
 // "local exec" model.
 static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                   const MVT PtrVT, TLSModel::Model model,
+                                   const EVT PtrVT, TLSModel::Model model,
                                    bool is64Bit) {
   DebugLoc dl = GA->getDebugLoc();
   // Get the Thread Pointer
@@ -4677,7 +4742,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
     assert(model == TLSModel::InitialExec);
     OperandFlags = X86II::MO_INDNTPOFF;
   }
-  
+
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
@@ -4701,29 +4766,29 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
          "TLS not implemented for non-ELF targets");
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GA->getGlobal();
-  
+
   // If GV is an alias then use the aliasee for determining
   // thread-localness.
   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
     GV = GA->resolveAliasedGlobal(false);
-  
+
   TLSModel::Model model = getTLSModel(GV,
                                       getTargetMachine().getRelocationModel());
-  
+
   switch (model) {
   case TLSModel::GeneralDynamic:
   case TLSModel::LocalDynamic: // not implemented
     if (Subtarget->is64Bit())
       return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
     return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-    
+
   case TLSModel::InitialExec:
   case TLSModel::LocalExec:
     return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
                                Subtarget->is64Bit());
   }
-  
-  assert(0 && "Unreachable");
+
+  llvm_unreachable("Unreachable");
   return SDValue();
 }
 
@@ -4732,17 +4797,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
 /// take a 2 x i32 value to shift plus a shift amount.
 SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
   DebugLoc dl = Op.getDebugLoc();
   bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
-  SDValue Tmp1 = isSRA ?
-    DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
-                DAG.getConstant(VTBits - 1, MVT::i8)) :
-    DAG.getConstant(0, VT);
+  SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+                                     DAG.getConstant(VTBits - 1, MVT::i8))
+                       : DAG.getConstant(0, VT);
 
   SDValue Tmp2, Tmp3;
   if (Op.getOpcode() == ISD::SHL_PARTS) {
@@ -4754,9 +4818,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   }
 
   SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
-                                  DAG.getConstant(VTBits, MVT::i8));
+                                DAG.getConstant(VTBits, MVT::i8));
   SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
-                               AndNode, DAG.getConstant(0, MVT::i8));
+                             AndNode, DAG.getConstant(0, MVT::i8));
 
   SDValue Hi, Lo;
   SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
@@ -4776,7 +4840,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
-  MVT SrcVT = Op.getOperand(0).getValueType();
+  EVT SrcVT = Op.getOperand(0).getValueType();
 
   if (SrcVT.isVector()) {
     if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
@@ -4808,7 +4872,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
 }
 
-SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain,
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
                                      SDValue StackSlot,
                                      SelectionDAG &DAG) {
   // Build the FILD
@@ -4888,19 +4952,22 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
   */
 
   DebugLoc dl = Op.getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
 
   // Build some magic constants.
   std::vector<Constant*> CV0;
-  CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
-  CV0.push_back(ConstantInt::get(APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
   Constant *C0 = ConstantVector::get(CV0);
   SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
 
   std::vector<Constant*> CV1;
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
-  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
   Constant *C1 = ConstantVector::get(CV1);
   SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
 
@@ -4965,7 +5032,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
 
   // Handle final rounding.
-  MVT DestVT = Op.getValueType();
+  EVT DestVT = Op.getValueType();
 
   if (DestVT.bitsLT(MVT::f64)) {
     return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
@@ -4988,7 +5055,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
 
-  MVT SrcVT = N0.getValueType();
+  EVT SrcVT = N0.getValueType();
   if (SrcVT == MVT::i64) {
     // We only handle SSE2 f64 target here; caller can expand the rest.
     if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
@@ -5017,7 +5084,7 @@ std::pair<SDValue,SDValue> X86TargetLowering::
 FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   DebugLoc dl = Op.getDebugLoc();
 
-  MVT DstTy = Op.getValueType();
+  EVT DstTy = Op.getValueType();
 
   if (!IsSigned) {
     assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -5043,10 +5110,10 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   unsigned MemSize = DstTy.getSizeInBits()/8;
   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-  
+
   unsigned Opc;
-  switch (DstTy.getSimpleVT()) {
-  default: assert(0 && "Invalid FP_TO_SINT to lower!");
+  switch (DstTy.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
   case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
   case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
   case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
@@ -5105,18 +5172,19 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
-  MVT EltVT = VT;
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT;
   if (VT.isVector())
     EltVT = VT.getVectorElementType();
   std::vector<Constant*> CV;
   if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
     CV.push_back(C);
     CV.push_back(C);
   } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
     CV.push_back(C);
     CV.push_back(C);
     CV.push_back(C);
@@ -5131,21 +5199,19 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
-  MVT EltVT = VT;
-  unsigned EltNum = 1;
-  if (VT.isVector()) {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT;
+  if (VT.isVector())
     EltVT = VT.getVectorElementType();
-    EltNum = VT.getVectorNumElements();
-  }
   std::vector<Constant*> CV;
   if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
     CV.push_back(C);
     CV.push_back(C);
   } else {
-    Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
     CV.push_back(C);
     CV.push_back(C);
     CV.push_back(C);
@@ -5168,11 +5234,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+  LLVMContext *Context = DAG.getContext();
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
-  MVT SrcVT = Op1.getValueType();
+  EVT VT = Op.getValueType();
+  EVT SrcVT = Op1.getValueType();
 
   // If second operand is smaller, extend it first.
   if (SrcVT.bitsLT(VT)) {
@@ -5191,13 +5258,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   // First get the sign bit of second operand.
   std::vector<Constant*> CV;
   if (SrcVT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
   } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
   }
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -5220,13 +5287,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   // Clear first operand sign bit.
   CV.clear();
   if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
   } else {
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
   }
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -5299,21 +5366,48 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
       Opcode = X86ISD::ADD;
       NumOperands = 2;
       break;
+    case ISD::AND: {
+      // If the primary and result isn't used, don't bother using X86ISD::AND,
+      // because a TEST instruction will be better.
+      bool NonFlagUse = false;
+      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI)
+        if (UI->getOpcode() != ISD::BRCOND &&
+            UI->getOpcode() != ISD::SELECT &&
+            UI->getOpcode() != ISD::SETCC) {
+          NonFlagUse = true;
+          break;
+        }
+      if (!NonFlagUse)
+        break;
+    }
+    // FALL THROUGH
     case ISD::SUB:
-      // Due to the ISEL shortcoming noted above, be conservative if this sub is
+    case ISD::OR:
+    case ISD::XOR:
+      // Due to the ISEL shortcoming noted above, be conservative if this op is
       // likely to be selected as part of a load-modify-store instruction.
       for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
            UE = Op.getNode()->use_end(); UI != UE; ++UI)
         if (UI->getOpcode() == ISD::STORE)
           goto default_case;
-      // Otherwise use a regular EFLAGS-setting sub.
-      Opcode = X86ISD::SUB;
+      // Otherwise use a regular EFLAGS-setting instruction.
+      switch (Op.getNode()->getOpcode()) {
+      case ISD::SUB: Opcode = X86ISD::SUB; break;
+      case ISD::OR:  Opcode = X86ISD::OR;  break;
+      case ISD::XOR: Opcode = X86ISD::XOR; break;
+      case ISD::AND: Opcode = X86ISD::AND; break;
+      default: llvm_unreachable("unexpected operator!");
+      }
       NumOperands = 2;
       break;
     case X86ISD::ADD:
     case X86ISD::SUB:
     case X86ISD::INC:
     case X86ISD::DEC:
+    case X86ISD::OR:
+    case X86ISD::XOR:
+    case X86ISD::AND:
       return SDValue(Op.getNode(), 1);
     default:
     default_case:
@@ -5419,14 +5513,14 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue CC = Op.getOperand(2);
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
   DebugLoc dl = Op.getDebugLoc();
 
   if (isFP) {
     unsigned SSECC = 8;
-    MVT VT0 = Op0.getValueType();
+    EVT VT0 = Op0.getValueType();
     assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
     unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
     bool Swap = false;
@@ -5469,7 +5563,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
         NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
         return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
       }
-      assert(0 && "Illegal FP comparison");
+      llvm_unreachable("Illegal FP comparison");
     }
     // Handle all other FP comparisons here.
     return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
@@ -5481,10 +5575,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
   bool Swap = false, Invert = false, FlipSigns = false;
 
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default: break;
+  case MVT::v8i8:
   case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v4i16:
   case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v2i32:
   case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
   case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
   }
@@ -5508,7 +5605,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   // Since SSE has no unsigned integer comparisons, we need to flip  the sign
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
-    MVT EltVT = VT.getVectorElementType();
+    EVT EltVT = VT.getVectorElementType();
     SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
                                       EltVT);
     std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
@@ -5538,7 +5635,10 @@ static bool isX86LogicalCmp(SDValue Op) {
        Opc == X86ISD::SMUL ||
        Opc == X86ISD::UMUL ||
        Opc == X86ISD::INC ||
-       Opc == X86ISD::DEC))
+       Opc == X86ISD::DEC ||
+       Opc == X86ISD::OR ||
+       Opc == X86ISD::XOR ||
+       Opc == X86ISD::AND))
     return true;
 
   return false;
@@ -5560,7 +5660,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
 
     SDValue Cmp = Cond.getOperand(1);
     unsigned Opc = Cmp.getOpcode();
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
 
     bool IllegalFPCMov = false;
     if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -5751,8 +5851,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
   SDValue Flag;
 
-  MVT IntPtr = getPointerTy();
-  MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+  EVT IntPtr = getPointerTy();
+  EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
 
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
 
@@ -5802,8 +5902,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
 
     if (const char *bzeroEntry =  V &&
         V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
-      MVT IntPtr = getPointerTy();
-      const Type *IntPtrTy = TD->getIntPtrType();
+      EVT IntPtr = getPointerTy();
+      const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext());
       TargetLowering::ArgListTy Args;
       TargetLowering::ArgListEntry Entry;
       Entry.Node = Dst;
@@ -5812,8 +5912,9 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
       Entry.Node = Size;
       Args.push_back(Entry);
       std::pair<SDValue,SDValue> CallResult =
-        LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
-                    0, CallingConv::C, false,
+        LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+                    false, false, false, false,
+                    0, CallingConv::C, false, /*isReturnValueUsed=*/false,
                     DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
       return CallResult.second;
     }
@@ -5824,7 +5925,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
 
   uint64_t SizeVal = ConstantSize->getZExtValue();
   SDValue InFlag(0, 0);
-  MVT AVT;
+  EVT AVT;
   SDValue Count;
   ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
   unsigned BytesLeft = 0;
@@ -5893,7 +5994,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
   if (TwoRepStos) {
     InFlag = Chain.getValue(1);
     Count  = Size;
-    MVT CVT = Count.getValueType();
+    EVT CVT = Count.getValueType();
     SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
                                DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
     Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
@@ -5909,8 +6010,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
   } else if (BytesLeft) {
     // Handle the last 1 - 7 bytes.
     unsigned Offset = SizeVal - BytesLeft;
-    MVT AddrVT = Dst.getValueType();
-    MVT SizeVT = Size.getValueType();
+    EVT AddrVT = Dst.getValueType();
+    EVT SizeVT = Size.getValueType();
 
     Chain = DAG.getMemset(Chain, dl,
                           DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
@@ -5945,7 +6046,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     return SDValue();
 
   // DWORD aligned
-  MVT AVT = MVT::i32;
+  EVT AVT = MVT::i32;
   if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
     AVT = MVT::i64;
 
@@ -5980,9 +6081,9 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   if (BytesLeft) {
     // Handle the last 1 - 7 bytes.
     unsigned Offset = SizeVal - BytesLeft;
-    MVT DstVT = Dst.getValueType();
-    MVT SrcVT = Src.getValueType();
-    MVT SizeVT = Size.getValueType();
+    EVT DstVT = Dst.getValueType();
+    EVT SrcVT = Src.getValueType();
+    EVT SizeVT = Size.getValueType();
     Results.push_back(DAG.getMemcpy(Chain, dl,
                                     DAG.getNode(ISD::ADD, dl, DstVT, Dst,
                                                 DAG.getConstant(Offset, DstVT)),
@@ -6054,8 +6155,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   SDValue SrcPtr = Op.getOperand(1);
   SDValue SrcSV = Op.getOperand(2);
 
-  assert(0 && "VAArgInst is not yet implemented for x86-64!");
-  abort();
+  llvm_report_error("VAArgInst is not yet implemented for x86-64!");
   return SDValue();
 }
 
@@ -6179,6 +6279,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
                                 DAG.getConstant(X86CC, MVT::i8), Cond);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
+  // ptest intrinsics. The intrinsic these come from are designed to return
+  // an integer value, not just an instruction so lower it to the ptest
+  // pattern and a setcc for the result.
+  case Intrinsic::x86_sse41_ptestz:
+  case Intrinsic::x86_sse41_ptestc:
+  case Intrinsic::x86_sse41_ptestnzc:{
+    unsigned X86CC = 0;
+    switch (IntNo) {
+    default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_sse41_ptestz:
+      // ZF = 1
+      X86CC = X86::COND_E;
+      break;
+    case Intrinsic::x86_sse41_ptestc:
+      // CF = 1
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_sse41_ptestnzc:
+      // ZF and CF = 0
+      X86CC = X86::COND_A;
+      break;
+    }
+
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+    SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
 
   // Fix vector shift instructions where the last operand is a non-immediate
   // i32 value.
@@ -6203,7 +6333,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
       return SDValue();
 
     unsigned NewIntNo = 0;
-    MVT ShAmtVT = MVT::v4i32;
+    EVT ShAmtVT = MVT::v4i32;
     switch (IntNo) {
     case Intrinsic::x86_sse2_pslli_w:
       NewIntNo = Intrinsic::x86_sse2_psll_w;
@@ -6256,14 +6386,28 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
       case Intrinsic::x86_mmx_psrai_d:
         NewIntNo = Intrinsic::x86_mmx_psra_d;
         break;
-      default: abort();  // Can't reach here.
+      default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
       }
       break;
     }
     }
-    MVT VT = Op.getValueType();
-    ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt));
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    SDValue ShOps[4];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    if (ShAmtVT == MVT::v4i32) {
+      ShOps[2] = DAG.getUNDEF(MVT::i32);
+      ShOps[3] = DAG.getUNDEF(MVT::i32);
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+    } else {
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    }
+
+    EVT VT = Op.getValueType();
+    ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                        DAG.getConstant(NewIntNo, MVT::i32),
                        Op.getOperand(1), ShAmt);
@@ -6295,7 +6439,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
@@ -6401,12 +6545,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
   } else {
     const Function *Func =
       cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
-    unsigned CC = Func->getCallingConv();
+    CallingConv::ID CC = Func->getCallingConv();
     unsigned NestReg;
 
     switch (CC) {
     default:
-      assert(0 && "Unsupported calling convention");
+      llvm_unreachable("Unsupported calling convention");
     case CallingConv::C:
     case CallingConv::X86_StdCall: {
       // Pass 'nest' parameter in ECX.
@@ -6428,8 +6572,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
             InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
 
         if (InRegCount > 2) {
-          cerr << "Nest register in use - reduce number of inreg parameters!\n";
-          abort();
+          llvm_report_error("Nest register in use - reduce number of inreg parameters!");
         }
       }
       break;
@@ -6499,7 +6642,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
   const TargetMachine &TM = MF.getTarget();
   const TargetFrameInfo &TFI = *TM.getFrameInfo();
   unsigned StackAlignment = TFI.getStackAlignment();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
 
   // Save FP Control Word to stack slot
@@ -6537,8 +6680,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT OpVT = VT;
+  EVT VT = Op.getValueType();
+  EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -6570,8 +6713,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT OpVT = VT;
+  EVT VT = Op.getValueType();
+  EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -6599,7 +6742,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
   assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
   DebugLoc dl = Op.getDebugLoc();
 
@@ -6656,7 +6799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
 
   switch (Op.getOpcode()) {
-  default: assert(0 && "Unknown ovf instruction!");
+  default: llvm_unreachable("Unknown ovf instruction!");
   case ISD::SADDO:
     // A subtract of one will be selected as a INC. Note that INC doesn't
     // set CF, so we can't do this for UADDO.
@@ -6712,11 +6855,11 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
-  MVT T = Op.getValueType();
+  EVT T = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned Reg = 0;
   unsigned size = 0;
-  switch(T.getSimpleVT()) {
+  switch(T.getSimpleVT().SimpleTy) {
   default:
     assert(false && "Invalid value type!");
   case MVT::i8:  Reg = X86::AL;  size = 1; break;
@@ -6763,7 +6906,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
 SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
-  MVT T = Node->getValueType(0);
+  EVT T = Node->getValueType(0);
   SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
                               DAG.getConstant(0, T), Node->getOperand(2));
   return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
@@ -6778,7 +6921,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
 ///
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
-  default: assert(0 && "Should not custom lower this!");
+  default: llvm_unreachable("Should not custom lower this!");
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
@@ -6805,9 +6948,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SELECT:             return LowerSELECT(Op, DAG);
   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
-  case ISD::CALL:               return LowerCALL(Op, DAG);
-  case ISD::RET:                return LowerRET(Op, DAG);
-  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
   case ISD::VASTART:            return LowerVASTART(Op, DAG);
   case ISD::VAARG:              return LowerVAARG(Op, DAG);
   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
@@ -6836,7 +6976,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 void X86TargetLowering::
 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
                         SelectionDAG &DAG, unsigned NewOp) {
-  MVT T = Node->getValueType(0);
+  EVT T = Node->getValueType(0);
   DebugLoc dl = Node->getDebugLoc();
   assert (T == MVT::i64 && "Only know how to expand i64 atomics");
 
@@ -6846,12 +6986,11 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
                              Node->getOperand(2), DAG.getIntPtrConstant(0));
   SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
                              Node->getOperand(2), DAG.getIntPtrConstant(1));
-  // This is a generalized SDNode, not an AtomicSDNode, so it doesn't
-  // have a MemOperand.  Pass the info through as a normal operand.
-  SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
-  SDValue Ops[] = { Chain, In1, In2L, In2H, LSI };
+  SDValue Ops[] = { Chain, In1, In2L, In2H };
   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
-  SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5);
+  SDValue Result =
+    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+                            cast<MemSDNode>(Node)->getMemOperand());
   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
   Results.push_back(Result.getValue(2));
@@ -6872,7 +7011,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
         FP_TO_INTHelper(SDValue(N, 0), DAG, true);
     SDValue FIST = Vals.first, StackSlot = Vals.second;
     if (FIST.getNode() != 0) {
-      MVT VT = N->getValueType(0);
+      EVT VT = N->getValueType(0);
       // Return a load from the stack slot.
       Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
     }
@@ -6893,7 +7032,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::ATOMIC_CMP_SWAP: {
-    MVT T = N->getValueType(0);
+    EVT T = N->getValueType(0);
     assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
     SDValue cpInL, cpInH;
     cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
@@ -6969,7 +7108,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FLD:                return "X86ISD::FLD";
   case X86ISD::FST:                return "X86ISD::FST";
   case X86ISD::CALL:               return "X86ISD::CALL";
-  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
   case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
   case X86ISD::BT:                 return "X86ISD::BT";
   case X86ISD::CMP:                return "X86ISD::CMP";
@@ -7027,7 +7165,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UMUL:               return "X86ISD::UMUL";
   case X86ISD::INC:                return "X86ISD::INC";
   case X86ISD::DEC:                return "X86ISD::DEC";
+  case X86ISD::OR:                 return "X86ISD::OR";
+  case X86ISD::XOR:                return "X86ISD::XOR";
+  case X86ISD::AND:                return "X86ISD::AND";
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
+  case X86ISD::PTEST:              return "X86ISD::PTEST";
+  case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
   }
 }
 
@@ -7036,28 +7179,28 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
 bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                               const Type *Ty) const {
   // X86 supports extremely general addressing modes.
+  CodeModel::Model M = getTargetMachine().getCodeModel();
 
   // X86 allows a sign-extended 32-bit immediate field as a displacement.
-  if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1)
+  if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
     return false;
 
   if (AM.BaseGV) {
-    // We can only fold this if we don't need an extra load.
-    if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
+    unsigned GVFlags =
+      Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+    // If a reference to this global requires an extra load, we can't fold it.
+    if (isGlobalStubReference(GVFlags))
       return false;
-    // If BaseGV requires a register, we cannot also have a BaseReg.
-    if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
-        AM.HasBaseReg)
+
+    // If BaseGV requires a register for the PIC base, we cannot also have a
+    // BaseReg specified.
+    if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
       return false;
 
-    // X86-64 only supports addr of globals in small code model.
-    if (Subtarget->is64Bit()) {
-      if (getTargetMachine().getCodeModel() != CodeModel::Small)
-        return false;
-      // If lower 4G is not available, then we must use rip-relative addressing.
-      if (AM.BaseOffs || AM.Scale > 1)
-        return false;
-    }
+    // If lower 4G is not available, then we must use rip-relative addressing.
+    if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+      return false;
   }
 
   switch (AM.Scale) {
@@ -7094,7 +7237,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
   return Subtarget->is64Bit() || NumBits1 < 64;
 }
 
-bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;
   unsigned NumBits1 = VT1.getSizeInBits();
@@ -7106,15 +7249,16 @@ bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
 
 bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
-  return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit();
+  return Ty1 == Type::getInt32Ty(Ty1->getContext()) &&
+         Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit();
 }
 
-bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
   return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
 }
 
-bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
   // i16 instructions are longer (0x66 prefix) and potentially slower.
   return !(VT1 == MVT::i32 && VT2 == MVT::i16);
 }
@@ -7124,8 +7268,8 @@ bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
 /// are assumed to be legal.
 bool
-X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 
-                                      MVT VT) const {
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                      EVT VT) const {
   // Only do shuffles on 128-bit vector types for now.
   if (VT.getSizeInBits() == 64)
     return false;
@@ -7146,7 +7290,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
 
 bool
 X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
-                                          MVT VT) const {
+                                          EVT VT) const {
   unsigned NumElts = VT.getVectorNumElements();
   // FIXME: This collection of masks seems suspect.
   if (NumElts == 2)
@@ -7254,7 +7398,8 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
     (*MIB).addOperand(*argOpers[i]);
   MIB.addReg(t2);
   assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
-  (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+  (*MIB).setMemRefs(bInstr->memoperands_begin(),
+                    bInstr->memoperands_end());
 
   MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
   MIB.addReg(EAXreg);
@@ -7406,7 +7551,8 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
     (*MIB).addOperand(*argOpers[i]);
 
   assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
-  (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+  (*MIB).setMemRefs(bInstr->memoperands_begin(),
+                    bInstr->memoperands_end());
 
   MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
   MIB.addReg(X86::EAX);
@@ -7450,7 +7596,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
   F->insert(MBBIter, newMBB);
   F->insert(MBBIter, nextMBB);
 
-  // Move all successors to thisMBB to nextMBB
+  // Move all successors of thisMBB to nextMBB
   nextMBB->transferSuccessors(thisMBB);
 
   // Update thisMBB to fall through to newMBB
@@ -7510,7 +7656,8 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
     (*MIB).addOperand(*argOpers[i]);
   MIB.addReg(t3);
   assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
-  (*MIB).addMemOperand(*F, *mInstr->memoperands_begin());
+  (*MIB).setMemRefs(mInstr->memoperands_begin(),
+                    mInstr->memoperands_end());
 
   MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
   MIB.addReg(X86::EAX);
@@ -7522,70 +7669,190 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
   return nextMBB;
 }
 
-
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// all of this code can be replaced with that in the .td file.
 MachineBasicBlock *
-X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB) const {
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+                            unsigned numArgs, bool memArg) const {
+
+  MachineFunction *F = BB->getParent();
   DebugLoc dl = MI->getDebugLoc();
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  unsigned Opc;
+  if (memArg)
+    Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+  else
+    Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+
+  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
+
+  for (unsigned i = 0; i < numArgs; ++i) {
+    MachineOperand &Op = MI->getOperand(i+1);
+
+    if (!(Op.isReg() && Op.isImplicit()))
+      MIB.addOperand(Op);
+  }
+
+  BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+    .addReg(X86::XMM0);
+
+  F->DeleteMachineInstr(MI);
+
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+                                                 MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) const {
+  // Emit code to save XMM registers to the stack. The ABI says that the
+  // number of registers to save is given in %al, so it's theoretically
+  // possible to do an indirect jump trick to avoid saving all of them,
+  // however this code takes a simpler approach and just executes all
+  // of the stores if %al is non-zero. It's less code, and it's probably
+  // easier on the hardware branch predictor, and stores aren't all that
+  // expensive anyway.
+
+  // Create the new basic blocks. One block contains all the XMM stores,
+  // and one block is the final destination regardless of whether any
+  // stores were performed.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction *F = MBB->getParent();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+  MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, XMMSaveMBB);
+  F->insert(MBBIter, EndMBB);
+
+  // Set up the CFG.
+  // Move any original successors of MBB to the end block.
+  EndMBB->transferSuccessors(MBB);
+  // The original block will now fall through to the XMM save block.
+  MBB->addSuccessor(XMMSaveMBB);
+  // The XMMSaveMBB will fall through to the end block.
+  XMMSaveMBB->addSuccessor(EndMBB);
+
+  // Now add the instructions.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned CountReg = MI->getOperand(0).getReg();
+  int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+  int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+  if (!Subtarget->isTargetWin64()) {
+    // If %al is 0, branch around the XMM save block.
+    BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+    BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
+    MBB->addSuccessor(EndMBB);
+  }
+
+  // In the XMM save block, save all the XMM argument registers.
+  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+    int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+    MachineMemOperand *MMO =
+      F->getMachineMemOperand(
+        PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+        MachineMemOperand::MOStore, Offset,
+        /*Size=*/16, /*Align=*/16);
+    BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+      .addFrameIndex(RegSaveFrameIndex)
+      .addImm(/*Scale=*/1)
+      .addReg(/*IndexReg=*/0)
+      .addImm(/*Disp=*/Offset)
+      .addReg(/*Segment=*/0)
+      .addReg(MI->getOperand(i).getReg())
+      .addMemOperand(MMO);
+  }
+
+  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+
+  return EndMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+                                     MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  unsigned Opc =
+    X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+  BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+  // Update machine-CFG edges by first adding all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  // Also inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
+         E = BB->succ_end(); I != E; ++I) {
+    EM->insert(std::make_pair(*I, sinkMBB));
+    sinkMBB->addSuccessor(*I);
+  }
+  // Next, remove all successors of the current block, and add the true
+  // and fallthrough blocks as its successors.
+  while (!BB->succ_empty())
+    BB->removeSuccessor(BB->succ_begin());
+  // Add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = sinkMBB;
+  BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+  return BB;
+}
+
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
+  case X86::CMOV_GR8:
   case X86::CMOV_V1I64:
   case X86::CMOV_FR32:
   case X86::CMOV_FR64:
   case X86::CMOV_V4F32:
   case X86::CMOV_V2F64:
-  case X86::CMOV_V2I64: {
-    // To "insert" a SELECT_CC instruction, we actually have to insert the
-    // diamond control-flow pattern.  The incoming instruction knows the
-    // destination vreg to set, the condition code register to branch on, the
-    // true/false values to select between, and a branch opcode to use.
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
-
-    //  thisMBB:
-    //  ...
-    //   TrueVal = ...
-    //   cmpTY ccX, r1, r2
-    //   bCC copy1MBB
-    //   fallthrough --> copy0MBB
-    MachineBasicBlock *thisMBB = BB;
-    MachineFunction *F = BB->getParent();
-    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    unsigned Opc =
-      X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
-    BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
-    F->insert(It, copy0MBB);
-    F->insert(It, sinkMBB);
-    // Update machine-CFG edges by transferring all successors of the current
-    // block to the new block which will contain the Phi node for the select.
-    sinkMBB->transferSuccessors(BB);
-
-    // Add the true and fallthrough blocks as its successors.
-    BB->addSuccessor(copy0MBB);
-    BB->addSuccessor(sinkMBB);
-
-    //  copy0MBB:
-    //   %FalseValue = ...
-    //   # fallthrough to sinkMBB
-    BB = copy0MBB;
-
-    // Update machine-CFG edges
-    BB->addSuccessor(sinkMBB);
-
-    //  sinkMBB:
-    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-    //  ...
-    BB = sinkMBB;
-    BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
-      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
-
-    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
-    return BB;
-  }
+  case X86::CMOV_V2I64:
+    return EmitLoweredSelect(MI, BB, EM);
 
   case X86::FP32_TO_INT16_IN_MEM:
   case X86::FP32_TO_INT32_IN_MEM:
@@ -7596,33 +7863,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::FP80_TO_INT16_IN_MEM:
   case X86::FP80_TO_INT32_IN_MEM:
   case X86::FP80_TO_INT64_IN_MEM: {
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    DebugLoc DL = MI->getDebugLoc();
+
     // Change the floating point control register to use "round towards zero"
     // mode when truncating to an integer value.
     MachineFunction *F = BB->getParent();
     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
 
     // Load the old value of the high byte of the control word...
     unsigned OldCW =
       F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW),
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW),
                       CWFrameIdx);
 
     // Set the high part to be round to zero...
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx)
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
       .addImm(0xC7F);
 
     // Reload the modified control word now...
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
 
     // Restore the memory image of control word to original value
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx)
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
       .addReg(OldCW);
 
     // Get the X86 opcode to use.
     unsigned Opc;
     switch (MI->getOpcode()) {
-    default: assert(0 && "illegal opcode!");
+    default: llvm_unreachable("illegal opcode!");
     case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
     case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
     case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
@@ -7655,15 +7925,26 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     } else {
       AM.Disp = Op.getImm();
     }
-    addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM)
+    addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM)
                       .addReg(MI->getOperand(X86AddrNumOperands).getReg());
 
     // Reload the original control word now.
-    addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+    addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
 
     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
     return BB;
   }
+    // String/text processing lowering.
+  case X86::PCMPISTRM128REG:
+    return EmitPCMP(MI, BB, 3, false /* in-mem */);
+  case X86::PCMPISTRM128MEM:
+    return EmitPCMP(MI, BB, 3, true /* in-mem */);
+  case X86::PCMPESTRM128REG:
+    return EmitPCMP(MI, BB, 5, false /* in mem */);
+  case X86::PCMPESTRM128MEM:
+    return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+    // Atomic Lowering.
   case X86::ATOMAND32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
                                                X86::AND32ri, X86::MOV32rm,
@@ -7825,6 +8106,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                X86::MOV32rr, X86::MOV32rr,
                                                X86::MOV32ri, X86::MOV32ri,
                                                false);
+  case X86::VASTART_SAVE_XMM_REGS:
+    return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
   }
 }
 
@@ -7855,6 +8138,9 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   case X86ISD::UMUL:
   case X86ISD::INC:
   case X86ISD::DEC:
+  case X86ISD::OR:
+  case X86ISD::XOR:
+  case X86ISD::AND:
     // These nodes' second result is a boolean.
     if (Op.getResNo() == 0)
       break;
@@ -7891,7 +8177,7 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
 }
 
 static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
-                                     MVT EVT, LoadSDNode *&LDBase,
+                                     EVT EltVT, LoadSDNode *&LDBase,
                                      unsigned &LastLoadedElt,
                                      SelectionDAG &DAG, MachineFrameInfo *MFI,
                                      const TargetLowering &TLI) {
@@ -7919,7 +8205,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
       continue;
 
     LoadSDNode *LD = cast<LoadSDNode>(Elt);
-    if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
+    if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
       return false;
     LastLoadedElt = i;
   }
@@ -7935,8 +8221,8 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
 static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   DebugLoc dl = N->getDebugLoc();
-  MVT VT = N->getValueType(0);
-  MVT EVT = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   unsigned NumElems = VT.getVectorNumElements();
 
@@ -7947,7 +8233,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   LoadSDNode *LD = NULL;
   unsigned LastLoadedElt;
-  if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG,
+  if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG,
                                 MFI, TLI))
     return SDValue();
 
@@ -7976,57 +8262,159 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   // Get the LHS/RHS of the select.
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
-  
-  // If we have SSE[12] support, try to form min/max nodes.
+
+  // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+  // instructions have the peculiarity that if either operand is a NaN,
+  // they chose what we call the RHS operand (and as such are not symmetric).
+  // It happens that this matches the semantics of the common C idiom
+  // x<y?x:y and related forms, so we can recognize these cases.
   if (Subtarget->hasSSE2() &&
       (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
       Cond.getOpcode() == ISD::SETCC) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
     unsigned Opcode = 0;
+    // Check for x CC y ? x : y.
     if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
       switch (CC) {
       default: break;
-      case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+      case ISD::SETULT:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETOLE:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
       case ISD::SETULE:
-      case ISD::SETLE:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
+        // This can be a min, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
       case ISD::SETLT:
+      case ISD::SETLE:
         Opcode = X86ISD::FMIN;
         break;
 
-      case ISD::SETOGT: // (X > Y) ? X : Y -> max
+      case ISD::SETOGE:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
       case ISD::SETUGT:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETUGE:
+        // This can be a max, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
       case ISD::SETGT:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
       case ISD::SETGE:
         Opcode = X86ISD::FMAX;
         break;
       }
+    // Check for x CC y ? y : x -- a min/max with reversed arms.
     } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
       switch (CC) {
       default: break;
-      case ISD::SETOGT: // (X > Y) ? Y : X -> min
+      case ISD::SETOGE:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
       case ISD::SETUGT:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETUGE:
+        // This can be a min, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
       case ISD::SETGT:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
       case ISD::SETGE:
         Opcode = X86ISD::FMIN;
         break;
 
-      case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
+      case ISD::SETULT:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETOLE:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
       case ISD::SETULE:
-      case ISD::SETLE:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
+        // This can be a max, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
       case ISD::SETLT:
+      case ISD::SETLE:
         Opcode = X86ISD::FMAX;
         break;
       }
@@ -8035,7 +8423,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     if (Opcode)
       return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
   }
-  
+
   // If this is a select between two integer constants, try to do some
   // optimizations.
   if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
@@ -8045,7 +8433,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // If this is efficiently invertible, canonicalize the LHSC/RHSC values
         // so that TrueC (the true value) is larger than FalseC.
         bool NeedsCondInvert = false;
-        
+
         if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
             // Efficiently invertible.
             (Cond.getOpcode() == ISD::SETCC ||  // setcc -> invertible.
@@ -8054,41 +8442,41 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
           NeedsCondInvert = true;
           std::swap(TrueC, FalseC);
         }
-   
+
         // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
         if (FalseC->getAPIntValue() == 0 &&
             TrueC->getAPIntValue().isPowerOf2()) {
           if (NeedsCondInvert) // Invert the condition if needed.
             Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
                                DAG.getConstant(1, Cond.getValueType()));
-          
+
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
-          
+
           unsigned ShAmt = TrueC->getAPIntValue().logBase2();
           return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
                              DAG.getConstant(ShAmt, MVT::i8));
         }
-        
+
         // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
         if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
           if (NeedsCondInvert) // Invert the condition if needed.
             Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
                                DAG.getConstant(1, Cond.getValueType()));
-          
+
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
                              FalseC->getValueType(0), Cond);
           return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
                              SDValue(FalseC, 0));
         }
-        
+
         // Optimize cases that will turn into an LEA instruction.  This requires
         // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
         if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
           uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
           if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
-          
+
           bool isFastMultiplier = false;
           if (Diff < 10) {
             switch ((unsigned char)Diff) {
@@ -8104,13 +8492,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
                 break;
             }
           }
-          
+
           if (isFastMultiplier) {
             APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
             if (NeedsCondInvert) // Invert the condition if needed.
               Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
                                  DAG.getConstant(1, Cond.getValueType()));
-            
+
             // Zero extend the condition if needed.
             Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
                                Cond);
@@ -8118,17 +8506,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
             if (Diff != 1)
               Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
                                  DAG.getConstant(Diff, Cond.getValueType()));
-            
+
             // Add the base if non-zero.
             if (FalseC->getAPIntValue() != 0)
               Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
                                  SDValue(FalseC, 0));
             return Cond;
           }
-        }      
+        }
       }
   }
-      
+
   return SDValue();
 }
 
@@ -8136,11 +8524,11 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI) {
   DebugLoc DL = N->getDebugLoc();
-  
+
   // If the flag operand isn't dead, don't touch this CMOV.
   if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
     return SDValue();
-  
+
   // If this is a select between two integer constants, try to do some
   // optimizations.  Note that the operands are ordered the opposite of SELECT
   // operands.
@@ -8149,12 +8537,12 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
       // larger than FalseC (the false value).
       X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
-        
+
       if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
         CC = X86::GetOppositeBranchCondition(CC);
         std::swap(TrueC, FalseC);
       }
-        
+
       // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.
       // This is efficient for any integer data type (including i8/i16) and
       // shift amount.
@@ -8162,10 +8550,10 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
         SDValue Cond = N->getOperand(3);
         Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
                            DAG.getConstant(CC, MVT::i8), Cond);
-      
+
         // Zero extend the condition if needed.
         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
-        
+
         unsigned ShAmt = TrueC->getAPIntValue().logBase2();
         Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
                            DAG.getConstant(ShAmt, MVT::i8));
@@ -8173,31 +8561,31 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
           return DCI.CombineTo(N, Cond, SDValue());
         return Cond;
       }
-      
+
       // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
       // for any integer data type, including i8/i16.
       if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
         SDValue Cond = N->getOperand(3);
         Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
                            DAG.getConstant(CC, MVT::i8), Cond);
-        
+
         // Zero extend the condition if needed.
         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
                            FalseC->getValueType(0), Cond);
         Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
                            SDValue(FalseC, 0));
-        
+
         if (N->getNumValues() == 2)  // Dead flag value?
           return DCI.CombineTo(N, Cond, SDValue());
         return Cond;
       }
-      
+
       // Optimize cases that will turn into an LEA instruction.  This requires
       // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
       if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
         uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
         if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
-       
+
         bool isFastMultiplier = false;
         if (Diff < 10) {
           switch ((unsigned char)Diff) {
@@ -8213,7 +8601,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
             break;
           }
         }
-        
+
         if (isFastMultiplier) {
           APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
           SDValue Cond = N->getOperand(3);
@@ -8235,7 +8623,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
             return DCI.CombineTo(N, Cond, SDValue());
           return Cond;
         }
-      }      
+      }
     }
   }
   return SDValue();
@@ -8254,7 +8642,7 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
     return SDValue();
 
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   if (VT != MVT::i64)
     return SDValue();
 
@@ -8289,17 +8677,17 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
       std::swap(MulAmt1, MulAmt2);
 
     SDValue NewMul;
-    if (isPowerOf2_64(MulAmt1)) 
+    if (isPowerOf2_64(MulAmt1))
       NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
                            DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
                            DAG.getConstant(MulAmt1, VT));
 
-    if (isPowerOf2_64(MulAmt2)) 
+    if (isPowerOf2_64(MulAmt2))
       NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
                            DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
-    else 
+    else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
                            DAG.getConstant(MulAmt2, VT));
 
@@ -8321,14 +8709,14 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
   if (!Subtarget->hasSSE2())
     return SDValue();
 
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
   if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
     return SDValue();
 
   SDValue ShAmtOp = N->getOperand(1);
-  MVT EltVT = VT.getVectorElementType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc DL = N->getDebugLoc();
-  SDValue BaseShAmt;
+  SDValue BaseShAmt = SDValue();
   if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
     unsigned NumElts = VT.getVectorNumElements();
     unsigned i = 0;
@@ -8347,21 +8735,40 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
     }
   } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
              cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
-    BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
-                            DAG.getIntPtrConstant(0));
+    SDValue InVec = ShAmtOp.getOperand(0);
+    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElts = InVec.getValueType().getVectorNumElements();
+      unsigned i = 0;
+      for (; i != NumElts; ++i) {
+        SDValue Arg = InVec.getOperand(i);
+        if (Arg.getOpcode() == ISD::UNDEF) continue;
+        BaseShAmt = Arg;
+        break;
+      }
+    } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+         unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+         if (C->getZExtValue() == SplatIdx)
+           BaseShAmt = InVec.getOperand(1);
+       }
+    }
+    if (BaseShAmt.getNode() == 0)
+      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+                              DAG.getIntPtrConstant(0));
   } else
     return SDValue();
 
+  // The shift amount is an i32.
   if (EltVT.bitsGT(MVT::i32))
     BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
   else if (EltVT.bitsLT(MVT::i32))
-    BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt);
+    BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
 
   // The shift amount is identical so we can do a vector shift.
   SDValue  ValOp = N->getOperand(0);
   switch (N->getOpcode()) {
   default:
-    assert(0 && "Unknown shift opcode!");
+    llvm_unreachable("Unknown shift opcode!");
     break;
   case ISD::SHL:
     if (VT == MVT::v2i64)
@@ -8415,13 +8822,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
   // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
   StoreSDNode *St = cast<StoreSDNode>(N);
-  MVT VT = St->getValue().getValueType();
+  EVT VT = St->getValue().getValueType();
   if (VT.getSizeInBits() != 64)
     return SDValue();
 
   const Function *F = DAG.getMachineFunction().getFunction();
   bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
-  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps 
+  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
     && Subtarget->hasSSE2();
   if ((VT.isVector() ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
@@ -8464,7 +8871,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
     if (Subtarget->is64Bit() || F64IsLegal) {
-      MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+      EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
                                   Ld->getBasePtr(), Ld->getSrcValue(),
                                   Ld->getSrcValueOffset(), Ld->isVolatile(),
@@ -8568,9 +8975,9 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
   SDValue Op = N->getOperand(0);
   if (Op.getOpcode() == ISD::BIT_CONVERT)
     Op = Op.getOperand(0);
-  MVT VT = N->getValueType(0), OpVT = Op.getValueType();
+  EVT VT = N->getValueType(0), OpVT = Op.getValueType();
   if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
-      VT.getVectorElementType().getSizeInBits() == 
+      VT.getVectorElementType().getSizeInBits() ==
       OpVT.getVectorElementType().getSizeInBits()) {
     return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
   }
@@ -8580,7 +8987,7 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
 // On X86 and X86-64, atomic operations are lowered to locked instructions.
 // Locked instructions, in turn, have implicit fence semantics (all memory
 // operations are flushed before issuing the locked instruction, and the
-// are not buffered), so we can fold away the common pattern of 
+// are not buffered), so we can fold away the common pattern of
 // fence-atomic-fence.
 static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
   SDValue atomic = N->getOperand(0);
@@ -8601,11 +9008,11 @@ static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
     default:
       return SDValue();
   }
-  
+
   SDValue fence = atomic.getOperand(0);
   if (fence.getOpcode() != ISD::MEMBARRIER)
     return SDValue();
-  
+
   switch (atomic.getOpcode()) {
     case ISD::ATOMIC_CMP_SWAP:
       return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
@@ -8657,6 +9064,101 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
+static bool LowerToBSwap(CallInst *CI) {
+  // FIXME: this should verify that we are targetting a 486 or better.  If not,
+  // we will turn this bswap into something that will be lowered to logical ops
+  // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+  // so don't worry about this.
+
+  // Verify this is a simple bswap.
+  if (CI->getNumOperands() != 2 ||
+      CI->getType() != CI->getOperand(1)->getType() ||
+      !CI->getType()->isInteger())
+    return false;
+
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty || Ty->getBitWidth() % 16 != 0)
+    return false;
+
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+  Value *Op = CI->getOperand(1);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+  std::string AsmStr = IA->getAsmString();
+
+  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+  std::vector<std::string> AsmPieces;
+  SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
+
+    // bswap $0
+    if (AsmPieces.size() == 2 &&
+        (AsmPieces[0] == "bswap" ||
+         AsmPieces[0] == "bswapq" ||
+         AsmPieces[0] == "bswapl") &&
+        (AsmPieces[1] == "$0" ||
+         AsmPieces[1] == "${0:q}")) {
+      // No need to check constraints, nothing other than the equivalent of
+      // "=r,0" would be valid here.
+      return LowerToBSwap(CI);
+    }
+    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
+    if (CI->getType() == Type::getInt16Ty(CI->getContext()) &&
+        AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rorw" &&
+        AsmPieces[1] == "$$8," &&
+        AsmPieces[2] == "${0:w}" &&
+        IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+      return LowerToBSwap(CI);
+    }
+    break;
+  case 3:
+    if (CI->getType() == Type::getInt64Ty(CI->getContext()) &&
+        Constraints.size() >= 2 &&
+        Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+        Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+      // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+      std::vector<std::string> Words;
+      SplitString(AsmPieces[0], Words, " \t");
+      if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+        Words.clear();
+        SplitString(AsmPieces[1], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+          Words.clear();
+          SplitString(AsmPieces[2], Words, " \t,");
+          if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+              Words[2] == "%edx") {
+            return LowerToBSwap(CI);
+          }
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+
+
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 X86TargetLowering::ConstraintType
@@ -8689,7 +9191,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
 /// with another that has more specific requirements based on the type of the
 /// corresponding operand.
 const char *X86TargetLowering::
-LowerXConstraint(MVT ConstraintVT) const {
+LowerXConstraint(EVT ConstraintVT) const {
   // FP X constraints get lowered to SSE1/2 registers if available, otherwise
   // 'f' like normal targets.
   if (ConstraintVT.isFloatingPoint()) {
@@ -8749,7 +9251,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     // 32-bit signed value
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       const ConstantInt *CI = C->getConstantIntValue();
-      if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) {
+      if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+                                  C->getSExtValue())) {
         // Widen to 64 bits here to get it sign extended.
         Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
         break;
@@ -8763,7 +9266,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     // 32-bit unsigned value
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       const ConstantInt *CI = C->getConstantIntValue();
-      if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) {
+      if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+                                  C->getZExtValue())) {
         Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
         break;
       }
@@ -8803,16 +9307,22 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
           continue;
         }
       }
-      
+
       // Otherwise, this isn't something we can handle, reject it.
       return;
     }
 
+    GlobalValue *GV = GA->getGlobal();
+    // If we require an extra load to get this address, as in PIC mode, we
+    // can't accept it.
+    if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+                                                        getTargetMachine())))
+      return;
+
     if (hasMemory)
-      Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG);
+      Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
     else
-      Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
-                                      Offset);
+      Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
     Result = Op;
     break;
   }
@@ -8828,12 +9338,42 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 std::vector<unsigned> X86TargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const {
+                                  EVT VT) const {
   if (Constraint.size() == 1) {
     // FIXME: not handling fp-stack yet!
     switch (Constraint[0]) {      // GCC X86 Constraint Letters
     default: break;  // Unknown constraint letter
-    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+        if (VT == MVT::i32)
+          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
+                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
+                                       X86::R10D,X86::R11D,X86::R12D,
+                                       X86::R13D,X86::R14D,X86::R15D,
+                                       X86::EBP, X86::ESP, 0);
+        else if (VT == MVT::i16)
+          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
+                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
+                                       X86::R10W,X86::R11W,X86::R12W,
+                                       X86::R13W,X86::R14W,X86::R15W,
+                                       X86::BP,  X86::SP, 0);
+        else if (VT == MVT::i8)
+          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
+                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
+                                       X86::R10B,X86::R11B,X86::R12B,
+                                       X86::R13B,X86::R14B,X86::R15B,
+                                       X86::BPL, X86::SPL, 0);
+
+        else if (VT == MVT::i64)
+          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
+                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
+                                       X86::R10, X86::R11, X86::R12,
+                                       X86::R13, X86::R14, X86::R15,
+                                       X86::RBP, X86::RSP, 0);
+
+        break;
+      }
+      // 32-bit fallthrough
     case 'Q':   // Q_REGS
       if (VT == MVT::i32)
         return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
@@ -8852,7 +9392,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
 
 std::pair<unsigned, const TargetRegisterClass*>
 X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT VT) const {
+                                                EVT VT) const {
   // First, see if this is a constraint that directly corresponds to an LLVM
   // register class.
   if (Constraint.size() == 1) {
@@ -8860,7 +9400,6 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     switch (Constraint[0]) {
     default: break;
     case 'r':   // GENERAL_REGS
-    case 'R':   // LEGACY_REGS
     case 'l':   // INDEX_REGS
       if (VT == MVT::i8)
         return std::make_pair(0U, X86::GR8RegisterClass);
@@ -8869,6 +9408,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       if (VT == MVT::i32 || !Subtarget->is64Bit())
         return std::make_pair(0U, X86::GR32RegisterClass);
       return std::make_pair(0U, X86::GR64RegisterClass);
+    case 'R':   // LEGACY_REGS
+      if (VT == MVT::i8)
+        return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+      if (VT == MVT::i32 || !Subtarget->is64Bit())
+        return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
+      return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
     case 'f':  // FP Stack registers.
       // If SSE is enabled for this VT, use f80 to ensure the isel moves the
       // value to the correct fpstack register class.
@@ -8886,7 +9433,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     case 'x':   // SSE_REGS if SSE1 allowed
       if (!Subtarget->hasSSE1()) break;
 
-      switch (VT.getSimpleVT()) {
+      switch (VT.getSimpleVT().SimpleTy) {
       default: break;
       // Scalar SSE types.
       case MVT::f32:
@@ -8915,15 +9462,39 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
   // Not found as a standard register?
   if (Res.second == 0) {
-    // GCC calls "st(0)" just plain "st".
+    // Map st(0) -> st(7) -> ST0
+    if (Constraint.size() == 7 && Constraint[0] == '{' &&
+        tolower(Constraint[1]) == 's' &&
+        tolower(Constraint[2]) == 't' &&
+        Constraint[3] == '(' &&
+        (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+        Constraint[5] == ')' &&
+        Constraint[6] == '}') {
+
+      Res.first = X86::ST0+Constraint[4]-'0';
+      Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+
+    // GCC allows "st(0)" to be called just plain "st".
     if (StringsEqualNoCase("{st}", Constraint)) {
       Res.first = X86::ST0;
       Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+
+    // flags -> EFLAGS
+    if (StringsEqualNoCase("{flags}", Constraint)) {
+      Res.first = X86::EFLAGS;
+      Res.second = X86::CCRRegisterClass;
+      return Res;
     }
+
     // 'A' means EAX + EDX.
     if (Constraint == "A") {
       Res.first = X86::EAX;
-      Res.second = X86::GRADRegisterClass;
+      Res.second = X86::GR32_ADRegisterClass;
+      return Res;
     }
     return Res;
   }
@@ -9015,7 +9586,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 /// When and where to widen is target dependent based on the cost of
 /// scalarizing vs using the wider vector type.
 
-MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
+EVT X86TargetLowering::getWidenVectorType(EVT VT) const {
   assert(VT.isVector());
   if (isTypeLegal(VT))
     return VT;
@@ -9024,7 +9595,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
   //       type based on element type.  This would speed up our search (though
   //       it may not be worth it since the size of the list is relatively
   //       small).
-  MVT EltVT = VT.getVectorElementType();
+  EVT EltVT = VT.getVectorElementType();
   unsigned NElts = VT.getVectorNumElements();
 
   // On X86, it make sense to widen any vector wider than 1
@@ -9033,7 +9604,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
 
   for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
        nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-    MVT SVT = (MVT::SimpleValueType)nVT;
+    EVT SVT = (MVT::SimpleValueType)nVT;
 
     if (isTypeLegal(SVT) &&
         SVT.getVectorElementType() == EltVT &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ffed46c733aa..2f7b8ba6e694 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -85,7 +85,7 @@ namespace llvm {
       /// as.
       FST,
 
-      /// CALL/TAILCALL - These operations represent an abstract X86 call
+      /// CALL - These operations represent an abstract X86 call
       /// instruction, which includes a bunch of information.  In particular the
       /// operands of these node are:
       ///
@@ -102,12 +102,8 @@ namespace llvm {
       ///     #1 - The first register result value (optional)
       ///     #2 - The second register result value (optional)
       ///
-      /// The CALL vs TAILCALL distinction boils down to whether the callee is
-      /// known not to modify the caller's stack frame, as is standard with
-      /// LLVM.
       CALL,
-      TAILCALL,
-      
+
       /// RDTSC_DAG - This operation implements the lowering for 
       /// readcyclecounter
       RDTSC_DAG,
@@ -208,17 +204,6 @@ namespace llvm {
       LCMPXCHG_DAG,
       LCMPXCHG8_DAG,
 
-      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 
-      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 
-      // Atomic 64-bit binary operations.
-      ATOMADD64_DAG,
-      ATOMSUB64_DAG,
-      ATOMOR64_DAG,
-      ATOMXOR64_DAG,
-      ATOMAND64_DAG,
-      ATOMNAND64_DAG,
-      ATOMSWAP64_DAG,
-
       // FNSTCW16m - Store FP control world into i16 memory.
       FNSTCW16m,
 
@@ -241,10 +226,29 @@ namespace llvm {
 
       // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
       ADD, SUB, SMUL, UMUL,
-      INC, DEC,
+      INC, DEC, OR, XOR, AND,
 
       // MUL_IMM - X86 specific multiply by immediate.
-      MUL_IMM
+      MUL_IMM,
+      
+      // PTEST - Vector bitwise comparisons
+      PTEST,
+
+      // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+      // according to %al. An operator is needed so that this can be expanded
+      // with control flow.
+      VASTART_SAVE_XMM_REGS,
+
+      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 
+      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 
+      // Atomic 64-bit binary operations.
+      ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      ATOMSUB64_DAG,
+      ATOMOR64_DAG,
+      ATOMXOR64_DAG,
+      ATOMAND64_DAG,
+      ATOMNAND64_DAG,
+      ATOMSWAP64_DAG
     };
   }
 
@@ -333,6 +337,15 @@ namespace llvm {
     /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
     /// instructions.
     unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+    /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+    /// constant +0.0.
+    bool isZeroNode(SDValue Elt);
+
+    /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+    /// fit into displacement field of the instruction.
+    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+                                      bool hasSymbolicDisplacement = true);
   }
 
   //===--------------------------------------------------------------------===//
@@ -374,12 +387,17 @@ namespace llvm {
 
     /// getOptimalMemOpType - Returns the target specific optimal type for load
     /// and store operations as a result of memset, memcpy, and memmove
-    /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+    /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
     /// determining it.
-    virtual
-    MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                            bool isSrcConst, bool isSrcStr,
-                            SelectionDAG &DAG) const;
+    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                    bool isSrcConst, bool isSrcStr,
+                                    SelectionDAG &DAG) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+      return true;
+    }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
@@ -395,7 +413,8 @@ namespace llvm {
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
  
     /// getTargetNodeName - This method returns the name of a target specific
@@ -403,7 +422,7 @@ namespace llvm {
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     /// getSetCCResultType - Return the ISD::SETCC ValueType
-    virtual MVT getSetCCResultType(MVT VT) const;
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     /// computeMaskedBitsForTargetNode - Determine which of the bits specified 
     /// in Mask are known to be either zero or one and return them in the 
@@ -420,13 +439,15 @@ namespace llvm {
     
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
 
+    virtual bool ExpandInlineAsm(CallInst *CI) const;
+    
     ConstraintType getConstraintType(const std::string &Constraint) const;
      
     std::vector<unsigned> 
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                        MVT VT) const;
+                                        EVT VT) const;
 
-    virtual const char *LowerXConstraint(MVT ConstraintVT) const;
+    virtual const char *LowerXConstraint(EVT ConstraintVT) const;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
@@ -444,7 +465,7 @@ namespace llvm {
     /// error, this returns a register number of 0.
     std::pair<unsigned, const TargetRegisterClass*> 
       getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const;
+                                   EVT VT) const;
     
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
@@ -454,7 +475,7 @@ namespace llvm {
     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
     /// register EAX to i16 by referencing its sub-register AX.
     virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
-    virtual bool isTruncateFree(MVT VT1, MVT VT2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
 
     /// isZExtFree - Return true if any actual instruction that defines a
     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
@@ -465,31 +486,31 @@ namespace llvm {
     /// all instructions that define 32-bit values implicit zero-extend the
     /// result out to 64 bits.
     virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
-    virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+    virtual bool isZExtFree(EVT VT1, EVT VT2) const;
 
     /// isNarrowingProfitable - Return true if it's profitable to narrow
     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
     /// from i32 to i8 but not from i32 to i16.
-    virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const;
+    virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
 
     /// isShuffleMaskLegal - Targets can use this to indicate that they only
     /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
     /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
     /// values are assumed to be legal.
     virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
-                                    MVT VT) const;
+                                    EVT VT) const;
 
     /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
     /// used by Targets can use this to indicate if there is a suitable
     /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
     /// pool entry.
     virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
-                                        MVT VT) const;
+                                        EVT VT) const;
 
     /// ShouldShrinkFPConstant - If true, then instruction selection should
     /// seek to shrink the FP constant of the specified type to a smaller type
     /// in order to save space and / or reduce runtime.
-    virtual bool ShouldShrinkFPConstant(MVT VT) const {
+    virtual bool ShouldShrinkFPConstant(EVT VT) const {
       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
       // expensive than a straight movsd. On the other hand, it's important to
       // shrink long double fp constant since fldt is very slow.
@@ -497,11 +518,14 @@ namespace llvm {
     }
     
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
-    /// for tail call optimization. Target which want to do tail call
+    /// for tail call optimization. Targets which want to do tail call
     /// optimization should implement this function.
-    virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall, 
-                                                   SDValue Ret, 
-                                                   SelectionDAG &DAG) const;
+    virtual bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
 
     virtual const X86Subtarget* getSubtarget() {
       return Subtarget;
@@ -509,17 +533,17 @@ namespace llvm {
 
     /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
     /// computed in an SSE register, not on the X87 floating point stack.
-    bool isScalarFPTypeInSSEReg(MVT VT) const {
+    bool isScalarFPTypeInSSEReg(EVT VT) const {
       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
     }
 
     /// getWidenVectorType: given a vector type, returns the type to widen
     /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
-    /// If there is no vector type that we want to widen to, returns MVT::Other
+    /// If there is no vector type that we want to widen to, returns EVT::Other
     /// When and were to widen is target dependent based on the cost of
     /// scalarizing vs using the wider vector type.
-    virtual MVT getWidenVectorType(MVT VT) const;
+    virtual EVT getWidenVectorType(EVT VT) const;
 
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
@@ -554,28 +578,30 @@ namespace llvm {
     bool X86ScalarSSEf32;
     bool X86ScalarSSEf64;
 
-    SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
-                            unsigned CallingConv, SelectionDAG &DAG);
-
-    SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG,
-                               const CCValAssign &VA,  MachineFrameInfo *MFI,
-                               unsigned CC, SDValue Root, unsigned i);
-
-    SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
-                               const SDValue &StackPtr,
-                               const CCValAssign &VA, SDValue Chain,
-                               SDValue Arg, ISD::ArgFlagsTy Flags);
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
+    SDValue LowerMemArgument(SDValue Chain,
+                             CallingConv::ID CallConv,
+                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,  MachineFrameInfo *MFI,
+                              unsigned i);
+    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,
+                             ISD::ArgFlagsTy Flags);
 
     // Call lowering helpers.
-    bool IsCalleePop(bool isVarArg, unsigned CallingConv);
-    bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall);
-    bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall);
+    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv);
     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
                                 SDValue Chain, bool IsTailCall, bool Is64Bit,
                                 int FPDiff, DebugLoc dl);
 
-    CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const;
-    NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op);
+    CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
+    NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv);
     unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
@@ -595,7 +621,7 @@ namespace llvm {
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
-    SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot,
+    SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG);
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
@@ -612,10 +638,7 @@ namespace llvm {
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
     SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG);
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG);
@@ -635,6 +658,26 @@ namespace llvm {
     SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
     SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG);
 
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
     void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                  SelectionDAG &DAG, unsigned NewOp);
 
@@ -651,9 +694,17 @@ namespace llvm {
                                     const Value *DstSV, uint64_t DstSVOff,
                                     const Value *SrcSV, uint64_t SrcSVOff);
     
+    /// Utility function to emit string processing sse4.2 instructions
+    /// that return in xmm0.
+    /// This takes the instruction to expand, the associated machine basic
+    /// block, the number of args, and whether or not the second arg is
+    /// in memory or not.
+    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+				unsigned argNum, bool inMem) const;
+
     /// Utility function to emit atomic bitwise operations (and, or, xor).
-    // It takes the bitwise instruction to expand, the associated machine basic
-    // block, and the associated X86 opcodes for reg/reg and reg/imm.
+    /// It takes the bitwise instruction to expand, the associated machine basic
+    /// block, and the associated X86 opcodes for reg/reg and reg/imm.
     MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
                                                     MachineInstr *BInstr,
                                                     MachineBasicBlock *BB,
@@ -683,6 +734,15 @@ namespace llvm {
                                                           MachineBasicBlock *BB,
                                                         unsigned cmovOpc) const;
 
+    /// Utility function to emit the xmm reg save portion of va_start.
+    MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+                                                   MachineInstr *BInstr,
+                                                   MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+                                         MachineBasicBlock *BB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    
     /// Emit nodes that will be selected as "test Op0,Op0", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 472ba4c46285..ef19823a2831 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -28,26 +28,29 @@ def i64i32imm_pcrel : Operand<i64> {
 
 
 // 64-bits but only 8 bits are significant.
-def i64i8imm   : Operand<i64>;
+def i64i8imm   : Operand<i64> {
+  let ParserMatchClass = ImmSExt8AsmOperand;
+}
 
 def lea64mem : Operand<i64> {
   let PrintMethod = "printlea64mem";
-  let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
+  let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printlea64_32mem";
   let AsmOperandLowerMethod = "lower_lea64_32mem";
-  let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 
 //===----------------------------------------------------------------------===//
 // Complex Pattern Definitions.
 //
 def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
-                        [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper,
-                         X86WrapperRIP],
-                        []>;
+                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
+                         X86WrapperRIP], []>;
 
 def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
@@ -129,13 +132,40 @@ let isCall = 1 in
     def CALL64pcrel32 : Ii32<0xE8, RawFrm,
                           (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
                           "call\t$dst", []>,
-                        Requires<[In64BitMode]>;
+                        Requires<[In64BitMode, NotWin64]>;
     def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                          "call\t{*}$dst", [(X86call GR64:$dst)]>;
+                          "call\t{*}$dst", [(X86call GR64:$dst)]>,
+                        Requires<[NotWin64]>;
     def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
-                          "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
+                          "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+                        Requires<[NotWin64]>;
+                        
+    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+                         "lcall{q}\t{*}$dst", []>;
   }
 
+  // FIXME: We need to teach codegen about single list of call-clobbered registers.
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP] in {
+    def WINCALL64pcrel32 : I<0xE8, RawFrm,
+                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                             "call\t$dst", []>,
+                           Requires<[IsWin64]>;
+    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                             "call\t{*}$dst",
+                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+    def WINCALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+                             "call\t{*}$dst",
+                             [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>;
+  }
 
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -162,6 +192,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                      [(brind GR64:$dst)]>;
   def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
                      [(brind (loadi64 addr:$dst))]>;
+  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+                      "ljmp{q}\t{*}$dst", []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -182,12 +214,18 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
 def LEAVE64  : I<0xC9, RawFrm,
                  (outs), (ins), "leave", []>;
 let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in
+let mayLoad = 1 in {
 def POP64r   : I<0x58, AddRegFrm,
                  (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-let mayStore = 1 in
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
 def PUSH64r  : I<0x50, AddRegFrm,
                  (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
 }
 
 let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
@@ -246,6 +284,14 @@ let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
 def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
                    [(X86rep_stos i64)]>, REP;
 
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>;
+
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>;
+
+// Fast system-call instructions
+def SYSEXIT64 : RI<0x35, RawFrm,
+                   (outs), (ins), "sysexit", []>, TB;
+
 //===----------------------------------------------------------------------===//
 //  Move Instructions...
 //
@@ -275,6 +321,25 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(store i64immSExt32:$src, addr:$dst)]>;
 
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins i8imm:$src),
+                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o32a : RIi32<0xA1, RawFrm, (outs), (ins i32imm:$src),
+                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs i8imm:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao32 : RIi32<0xA3, RawFrm, (outs i32imm:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+
+// Moves to and from segment registers
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+                 "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+                 "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+                 "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+                 "mov{w}\t{$src, $dst|$dst, $src}", []>;
+
 // Sign/Zero extenders
 
 // MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -332,13 +397,15 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
 
 // Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
-// be copying from a truncate, but any other 32-bit operation will zero-extend
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
 // up to 64 bits.
 def def32 : PatLeaf<(i32 GR32:$src), [{
   return N->getOpcode() != ISD::TRUNCATE &&
          N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg;
+         N->getOpcode() != ISD::CopyFromReg &&
+         N->getOpcode() != X86ISD::CMOV;
 }]>;
 
 // In the case of a 32-bit def that is known to implicitly zero-extend,
@@ -361,6 +428,10 @@ let neverHasSideEffects = 1 in {
 //
 
 let Defs = [EFLAGS] in {
+
+def ADD64i32 : RI<0x05, RawFrm, (outs), (ins i32imm:$src),
+                  "add{q}\t{$src, %rax|%rax, $src}", []>;
+
 let isTwoAddress = 1 in {
 let isConvertibleToThreeAddress = 1 in {
 let isCommutable = 1 in
@@ -386,6 +457,12 @@ def ADD64rm     : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:
                      "add{q}\t{$src2, $dst|$dst, $src2}",
                      [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
                       (implicit EFLAGS)]>;
+
+// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but
+//   differently encoded.
+def ADD64mrmrr  : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                     "add{l}\t{$src2, $dst|$dst, $src2}", []>;
+
 } // isTwoAddress
 
 // Memory-Register Addition
@@ -403,6 +480,10 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
                 (implicit EFLAGS)]>;
 
 let Uses = [EFLAGS] in {
+
+def ADC64i32 : RI<0x15, RawFrm, (outs), (ins i32imm:$src),
+                  "adc{q}\t{$src, %rax|%rax, $src}", []>;
+
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def ADC64rr  : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -458,6 +539,9 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
                        (implicit EFLAGS)]>;
 } // isTwoAddress
 
+def SUB64i32 : RI<0x2D, RawFrm, (outs), (ins i32imm:$src),
+                  "sub{q}\t{$src, %rax|%rax, $src}", []>;
+
 // Memory-Register Subtraction
 def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
                   "sub{q}\t{$src2, $dst|$dst, $src2}",
@@ -494,6 +578,9 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:
                       [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
 } // isTwoAddress
 
+def SBB64i32 : RI<0x1D, RawFrm, (outs), (ins i32imm:$src),
+                  "sbb{q}\t{$src, %rax|%rax, $src}", []>;
+
 def SBB64mr  : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
                   "sbb{q}\t{$src2, $dst|$dst, $src2}",
                   [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
@@ -665,8 +752,10 @@ let isConvertibleToThreeAddress = 1 in   // Can transform into LEA.
 def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
                     "shl{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
-// cheaper.
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper.
+def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shr{q}\t$dst", []>;
 } // isTwoAddress
 
 let Uses = [CL] in
@@ -729,6 +818,39 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
                  [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
 
 // Rotate instructions
+
+let isTwoAddress = 1 in {
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
+                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
+                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
+                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+}
+
 let isTwoAddress = 1 in {
 let Uses = [CL] in
 def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
@@ -839,6 +961,9 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
                 [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
 
 let Defs = [EFLAGS] in {
+def AND64i32 : RI<0x25, RawFrm, (outs), (ins i32imm:$src),
+                  "and{q}\t{$src, %rax|%rax, $src}", []>;
+
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def AND64rr  : RI<0x21, MRMDestReg, 
@@ -912,6 +1037,9 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
               [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
                (implicit EFLAGS)]>;
 
+def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src),
+                    "or{q}\t{$src, %rax|%rax, $src}", []>;
+
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def XOR64rr  : RI<0x31, MRMDestReg,  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), 
@@ -945,6 +1073,10 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                       "xor{q}\t{$src, $dst|$dst, $src}",
              [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
               (implicit EFLAGS)]>;
+              
+def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
+                     "xor{q}\t{$src, %rax|%rax, $src}", []>;
+
 } // Defs = [EFLAGS]
 
 //===----------------------------------------------------------------------===//
@@ -953,6 +1085,8 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
 
 // Integer comparison
 let Defs = [EFLAGS] in {
+def TEST64i32 : RI<0xa9, RawFrm, (outs), (ins i32imm:$src),
+                   "test{q}\t{$src, %rax|%rax, $src}", []>;
 let isCommutable = 1 in
 def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                   "test{q}\t{$src2, $src1|$src1, $src2}",
@@ -973,10 +1107,15 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
                 [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0),
                  (implicit EFLAGS)]>;
 
+
+def CMP64i32 : RI<0x3D, RawFrm, (outs), (ins i32imm:$src),
+                  "cmp{q}\t{$src, %rax|%rax, $src}", []>;
 def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp GR64:$src1, GR64:$src2),
                   (implicit EFLAGS)]>;
+def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+                    "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
 def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (loadi64 addr:$src1), GR64:$src2),
@@ -1306,14 +1445,12 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src)
 // Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
 // equivalent due to implicit zero-extending, and it sometimes has a smaller
 // encoding.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
 // when we have a better way to specify isel priority.
-let Defs = [EFLAGS], AddedComplexity = 1,
-    isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0  : I<0x31, MRMInitReg,  (outs GR64:$dst), (ins),
-                "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
-                [(set GR64:$dst, 0)]>;
+let AddedComplexity = 1 in
+def : Pat<(i64 0),
+          (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
+
 
 // Materialize i64 constant where top 32-bits are zero.
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
@@ -1343,12 +1480,12 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
                   [(X86tlsaddr tls64addr:$sym)]>,
                   Requires<[In64BitMode]>;
 
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
 def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                  "movq\t%gs:$src, $dst",
                  [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
 
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
 def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                  "movq\t%fs:$src, $dst",
                  [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
@@ -1371,11 +1508,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
                "xadd\t$val, $ptr",
                [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
                 TB, LOCK;
+
 def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
                   "xchg\t$val, $ptr", 
                   [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
 }
 
+// Optimized codegen when the non-memory output is not used.
+// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
+def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                      "lock\n\t"
+                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+                                      (ins i64mem:$dst, i64i8imm :$src2),
+                    "lock\n\t"
+                    "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+                                        (ins i64mem:$dst, i64i32imm :$src2),
+                      "lock\n\t"
+                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+                                      (ins i64mem:$dst, i64i8imm :$src2), 
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+                                        (ins i64mem:$dst, i64i32imm:$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+                     "lock\n\t"
+                     "inc{q}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+                      "lock\n\t"
+                      "dec{q}\t$dst", []>, LOCK;
+
 // Atomic exchange, and, or, xor
 let Constraints = "$val = $dst", Defs = [EFLAGS],
                   usesCustomDAGSchedInserter = 1 in {
@@ -1405,78 +1574,88 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
                [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
 }
 
+// Segmentation support instructions
+
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), 
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                 
+// String manipulation instructions
+
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
 
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
+//  'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
 def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri tconstpool  :$dst)>, Requires<[NotSmallCode]>;
+          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri tjumptable  :$dst)>, Requires<[NotSmallCode]>;
+          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>;
+          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
+          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
 
 // If we have small model and -static mode, it is safe to store global addresses
 // directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
-// should handle this sort of thing.
+// for MOV64mi32 should handle this sort of thing.
 def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tconstpool:$src)>,
-          Requires<[SmallCode, IsStatic]>;
+          Requires<[NearData, IsStatic]>;
 def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tjumptable:$src)>,
-          Requires<[SmallCode, IsStatic]>;
+          Requires<[NearData, IsStatic]>;
 def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
-          Requires<[SmallCode, IsStatic]>;
+          Requires<[NearData, IsStatic]>;
 def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, texternalsym:$src)>,
-          Requires<[SmallCode, IsStatic]>;
-
-// If we have small model and -static mode, it is safe to store global addresses
-// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
-// should handle this sort of thing.
-def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tconstpool:$src)>,
-          Requires<[SmallCode, IsStatic]>;
-def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tjumptable:$src)>,
-          Requires<[SmallCode, IsStatic]>;
-def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
-          Requires<[SmallCode, IsStatic]>;
-def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, texternalsym:$src)>,
-          Requires<[SmallCode, IsStatic]>;
-
+          Requires<[NearData, IsStatic]>;
 
 // Calls
 // Direct PC relative function call for small code model. 32-bit displacement
 // sign extended to 64-bit.
 def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>;
+          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
 def : Pat<(X86call (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>;
-
-def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86tailcall (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>;
-
-def : Pat<(X86tailcall GR64:$dst),
-          (CALL64r GR64:$dst)>;
+          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
 
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
 
 // tailcall stuff
-def : Pat<(X86tailcall GR32:$dst),
-          (TAILCALL)>;
-def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
-          (TAILCALL)>;
-def : Pat<(X86tailcall (i64 texternalsym:$dst)),
-          (TAILCALL)>;
-
 def : Pat<(X86tcret GR64:$dst, imm:$off),
           (TCRETURNri64 GR64:$dst, imm:$off)>;
 
@@ -1540,30 +1719,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
 // For other extloads, use subregs, since the high contents of the register are
 // defined after an extload.
 def : Pat<(extloadi64i32 addr:$src),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
                          x86_subreg_32bit)>;
-def : Pat<(extloadi16i1 addr:$src), 
-          (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), 
-                         x86_subreg_8bit)>,
-         Requires<[In64BitMode]>;
-def : Pat<(extloadi16i8 addr:$src), 
-          (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), 
-                         x86_subreg_8bit)>,
-         Requires<[In64BitMode]>;
-
-// anyext
-def : Pat<(i64 (anyext GR8:$src)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>;
-def : Pat<(i64 (anyext GR16:$src)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
-def : Pat<(i64 (anyext GR32:$src)), 
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>;
-def : Pat<(i16 (anyext GR8:$src)),
-          (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
-         Requires<[In64BitMode]>;
-def : Pat<(i32 (anyext GR8:$src)),
-          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
-         Requires<[In64BitMode]>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
 
 //===----------------------------------------------------------------------===//
 // Some peepholes
@@ -1661,6 +1825,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
             (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+                            x86_subreg_8bit_hi))>,
+      Requires<[In64BitMode]>;
 def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
           (SUBREG_TO_REG
             (i64 0),
@@ -1668,6 +1837,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
               (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
                               x86_subreg_8bit_hi)),
             x86_subreg_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+                              x86_subreg_8bit_hi)),
+            x86_subreg_32bit)>;
 
 // h-register extract and store.
 def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
@@ -1906,6 +2082,102 @@ def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst),
                     (implicit EFLAGS)),
           (DEC64m addr:$dst)>;
 
+// Register-Register Logical Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2),
+                    (implicit EFLAGS)),
+          (OR64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Logical Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2),
+                    (implicit EFLAGS)),
+          (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Logical Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)),
+                    (implicit EFLAGS)),
+          (OR64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Logical Or with EFLAGS result
+def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Logical XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2),
+                    (implicit EFLAGS)),
+          (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Logical XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2),
+                    (implicit EFLAGS)),
+          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Logical XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)),
+                    (implicit EFLAGS)),
+          (XOR64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Logical XOr with EFLAGS result
+def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Logical And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2),
+                    (implicit EFLAGS)),
+          (AND64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Logical And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2),
+                    (implicit EFLAGS)),
+          (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Logical And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)),
+                    (implicit EFLAGS)),
+          (AND64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Logical And with EFLAGS result
+def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND64mi32 addr:$dst, i64immSExt32:$src2)>;
+
 //===----------------------------------------------------------------------===//
 // X86-64 SSE Instructions
 //===----------------------------------------------------------------------===//
@@ -1977,3 +2249,15 @@ let isTwoAddress = 1 in {
 }
 
 defm PINSRQ      : SS41I_insert64<0x22, "pinsrq">;
+
+// -disable-16bit support.
+def : Pat<(truncstorei16 (i64 imm:$src), addr:$dst),
+          (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(truncstorei16 GR64:$src, addr:$dst),
+          (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+def : Pat<(i64 (sextloadi16 addr:$dst)),
+          (MOVSX64rm16 addr:$dst)>;
+def : Pat<(i64 (zextloadi16 addr:$dst)),
+          (MOVZX64rm16 addr:$dst)>;
+def : Pat<(i64 (extloadi16 addr:$dst)),
+          (MOVZX64rm16 addr:$dst)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 6359542819f4..c475b56d12f4 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -26,6 +26,7 @@
 
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 
 namespace llvm {
@@ -47,7 +48,7 @@ struct X86AddressMode {
 
   unsigned Scale;
   unsigned IndexReg;
-  unsigned Disp;
+  int Disp;
   GlobalValue *GV;
   unsigned GVOpFlags;
 
@@ -61,20 +62,20 @@ struct X86AddressMode {
 /// current instruction -- that is, a dereference of an address in a register,
 /// with no scale, index or displacement. An example is: DWORD PTR [EAX].
 ///
-inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB,
-                                               unsigned Reg) {
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
   // Because memory references are always represented with four
   // values, this adds: Reg, [1, NoReg, 0] to the instruction.
   return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
 }
 
-inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB,
-                                            int Offset) {
+static inline const MachineInstrBuilder &
+addLeaOffset(const MachineInstrBuilder &MIB, int Offset) {
   return MIB.addImm(1).addReg(0).addImm(Offset);
 }
 
-inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB,
-                                            int Offset) {
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
   return addLeaOffset(MIB, Offset).addReg(0);
 }
 
@@ -82,29 +83,29 @@ inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB,
 /// [Reg + Offset], i.e., one with no scale or index, but with a
 /// displacement. An example is: DWORD PTR [EAX + 4].
 ///
-inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB,
-                                               unsigned Reg, bool isKill,
-                                               int Offset) {
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+             unsigned Reg, bool isKill, int Offset) {
   return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
 }
 
-inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB,
-                                                  unsigned Reg, bool isKill,
-                                                  int Offset) {
+static inline const MachineInstrBuilder &
+addLeaRegOffset(const MachineInstrBuilder &MIB,
+                unsigned Reg, bool isKill, int Offset) {
   return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
 }
 
 /// addRegReg - This function is used to add a memory reference of the form:
 /// [Reg + Reg].
-inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
                                             unsigned Reg1, bool isKill1,
                                             unsigned Reg2, bool isKill2) {
   return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
     .addReg(Reg2, getKillRegState(isKill2)).addImm(0);
 }
 
-inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
-                                                const X86AddressMode &AM) {
+static inline const MachineInstrBuilder &
+addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) {
   assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
 
   if (AM.BaseType == X86AddressMode::RegBase)
@@ -120,8 +121,9 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
     return MIB.addImm(AM.Disp);
 }
 
-inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
-                                                 const X86AddressMode &AM) {
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+               const X86AddressMode &AM) {
   return addLeaAddress(MIB, AM).addReg(0);
 }
 
@@ -130,7 +132,7 @@ inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
 /// reference has base register as the FrameIndex offset until it is resolved.
 /// This allows a constant offset to be specified as well...
 ///
-inline const MachineInstrBuilder &
+static inline const MachineInstrBuilder &
 addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
@@ -141,11 +143,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
     Flags |= MachineMemOperand::MOLoad;
   if (TID.mayStore())
     Flags |= MachineMemOperand::MOStore;
-  MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI),
-                        Flags,
-                        MFI.getObjectOffset(FI) + Offset,
-                        MFI.getObjectSize(FI),
-                        MFI.getObjectAlignment(FI));
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                            Flags, Offset,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
   return addOffset(MIB.addFrameIndex(FI), Offset)
             .addMemOperand(MMO);
 }
@@ -157,7 +159,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
 /// the GlobalBaseReg parameter can be used to make this a
 /// GlobalBaseReg-relative reference.
 ///
-inline const MachineInstrBuilder &
+static inline const MachineInstrBuilder &
 addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
                          unsigned GlobalBaseReg, unsigned char OpFlags) {
   //FIXME: factor this
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index bc7def457c0f..7e373730b30a 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -303,6 +303,31 @@ def TST_Fp80  : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
 }
 def TST_F  : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
 
+// Versions of FP instructions that take a single memory operand.  Added for the
+//   disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp\t$src">;
+
+def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp\t$src">;
+
+def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">;
+def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fsave\t$dst">;
+def FSTSWm   : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">;
+
+def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
 // Floating point cmovs.
 multiclass FPCMov<PatLeaf cc> {
   def _Fp32  : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index eeed5bd27ff3..abdb3135c3ac 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -79,6 +79,7 @@ class XD     { bits<4> Prefix = 11; }
 class XS     { bits<4> Prefix = 12; }
 class T8     { bits<4> Prefix = 13; }
 class TA     { bits<4> Prefix = 14; }
+class TF     { bits<4> Prefix = 15; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr>
@@ -142,6 +143,24 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
   let Pattern = pattern;
 }
 
+// Templates for instructions that use a 16- or 32-bit segmented address as
+//  their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+//   Iseg16 - 16-bit segment selector, 16-bit offset
+//   Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+              list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+              list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
 // SSE1 Instruction Templates:
 // 
 //   SSI   - SSE1 instructions with XS prefix.
@@ -229,6 +248,16 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
              list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
 
+//   SS42FI - SSE 4.2 instructions with TF prefix.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+      
+//   SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+	     list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+
 // X86-64 Instruction templates...
 //
 
@@ -282,4 +311,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
       : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
 class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
-
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e5d84c507783..e8a39d11040a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -18,8 +18,8 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -27,24 +27,24 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 using namespace llvm;
 
-namespace {
-  cl::opt<bool>
-  NoFusing("disable-spill-fusing",
-           cl::desc("Disable fusing of spill code into instructions"));
-  cl::opt<bool>
-  PrintFailedFusing("print-failed-fuse-candidates",
-                    cl::desc("Print instructions that the allocator wants to"
-                             " fuse, but the X86 backend currently can't"),
-                    cl::Hidden);
-  cl::opt<bool>
-  ReMatPICStubLoad("remat-pic-stub-load",
-                   cl::desc("Re-materialize load from stub in PIC mode"),
-                   cl::init(false), cl::Hidden);
-}
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+         cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+                  cl::desc("Print instructions that the allocator wants to"
+                           " fuse, but the X86 backend currently can't"),
+                  cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+                 cl::desc("Re-materialize load from stub in PIC mode"),
+                 cl::init(false), cl::Hidden);
 
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
@@ -212,9 +212,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     unsigned RegOp = OpTbl2Addr[i][0];
     unsigned MemOp = OpTbl2Addr[i][1];
     if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
-                                                     MemOp)).second)
+                                               std::make_pair(MemOp,0))).second)
       assert(false && "Duplicated entries?");
-    unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store
+    // Index 0, folded load and store, no alignment requirement.
+    unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
     if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
                                                 std::make_pair(RegOp,
                                                               AuxInfo))).second)
@@ -222,93 +223,94 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   }
 
   // If the third value is 1, then it's folding either a load or a store.
-  static const unsigned OpTbl0[][3] = {
-    { X86::BT16ri8,     X86::BT16mi8, 1 },
-    { X86::BT32ri8,     X86::BT32mi8, 1 },
-    { X86::BT64ri8,     X86::BT64mi8, 1 },
-    { X86::CALL32r,     X86::CALL32m, 1 },
-    { X86::CALL64r,     X86::CALL64m, 1 },
-    { X86::CMP16ri,     X86::CMP16mi, 1 },
-    { X86::CMP16ri8,    X86::CMP16mi8, 1 },
-    { X86::CMP16rr,     X86::CMP16mr, 1 },
-    { X86::CMP32ri,     X86::CMP32mi, 1 },
-    { X86::CMP32ri8,    X86::CMP32mi8, 1 },
-    { X86::CMP32rr,     X86::CMP32mr, 1 },
-    { X86::CMP64ri32,   X86::CMP64mi32, 1 },
-    { X86::CMP64ri8,    X86::CMP64mi8, 1 },
-    { X86::CMP64rr,     X86::CMP64mr, 1 },
-    { X86::CMP8ri,      X86::CMP8mi, 1 },
-    { X86::CMP8rr,      X86::CMP8mr, 1 },
-    { X86::DIV16r,      X86::DIV16m, 1 },
-    { X86::DIV32r,      X86::DIV32m, 1 },
-    { X86::DIV64r,      X86::DIV64m, 1 },
-    { X86::DIV8r,       X86::DIV8m, 1 },
-    { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 },
-    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0 },
-    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0 },
-    { X86::IDIV16r,     X86::IDIV16m, 1 },
-    { X86::IDIV32r,     X86::IDIV32m, 1 },
-    { X86::IDIV64r,     X86::IDIV64m, 1 },
-    { X86::IDIV8r,      X86::IDIV8m, 1 },
-    { X86::IMUL16r,     X86::IMUL16m, 1 },
-    { X86::IMUL32r,     X86::IMUL32m, 1 },
-    { X86::IMUL64r,     X86::IMUL64m, 1 },
-    { X86::IMUL8r,      X86::IMUL8m, 1 },
-    { X86::JMP32r,      X86::JMP32m, 1 },
-    { X86::JMP64r,      X86::JMP64m, 1 },
-    { X86::MOV16ri,     X86::MOV16mi, 0 },
-    { X86::MOV16rr,     X86::MOV16mr, 0 },
-    { X86::MOV32ri,     X86::MOV32mi, 0 },
-    { X86::MOV32rr,     X86::MOV32mr, 0 },
-    { X86::MOV64ri32,   X86::MOV64mi32, 0 },
-    { X86::MOV64rr,     X86::MOV64mr, 0 },
-    { X86::MOV8ri,      X86::MOV8mi, 0 },
-    { X86::MOV8rr,      X86::MOV8mr, 0 },
-    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 },
-    { X86::MOVAPDrr,    X86::MOVAPDmr, 0 },
-    { X86::MOVAPSrr,    X86::MOVAPSmr, 0 },
-    { X86::MOVDQArr,    X86::MOVDQAmr, 0 },
-    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 },
-    { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 },
-    { X86::MOVPS2SSrr,  X86::MOVPS2SSmr, 0 },
-    { X86::MOVSDrr,     X86::MOVSDmr, 0 },
-    { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 },
-    { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0 },
-    { X86::MOVSSrr,     X86::MOVSSmr, 0 },
-    { X86::MOVUPDrr,    X86::MOVUPDmr, 0 },
-    { X86::MOVUPSrr,    X86::MOVUPSmr, 0 },
-    { X86::MUL16r,      X86::MUL16m, 1 },
-    { X86::MUL32r,      X86::MUL32m, 1 },
-    { X86::MUL64r,      X86::MUL64m, 1 },
-    { X86::MUL8r,       X86::MUL8m, 1 },
-    { X86::SETAEr,      X86::SETAEm, 0 },
-    { X86::SETAr,       X86::SETAm, 0 },
-    { X86::SETBEr,      X86::SETBEm, 0 },
-    { X86::SETBr,       X86::SETBm, 0 },
-    { X86::SETEr,       X86::SETEm, 0 },
-    { X86::SETGEr,      X86::SETGEm, 0 },
-    { X86::SETGr,       X86::SETGm, 0 },
-    { X86::SETLEr,      X86::SETLEm, 0 },
-    { X86::SETLr,       X86::SETLm, 0 },
-    { X86::SETNEr,      X86::SETNEm, 0 },
-    { X86::SETNOr,      X86::SETNOm, 0 },
-    { X86::SETNPr,      X86::SETNPm, 0 },
-    { X86::SETNSr,      X86::SETNSm, 0 },
-    { X86::SETOr,       X86::SETOm, 0 },
-    { X86::SETPr,       X86::SETPm, 0 },
-    { X86::SETSr,       X86::SETSm, 0 },
-    { X86::TAILJMPr,    X86::TAILJMPm, 1 },
-    { X86::TEST16ri,    X86::TEST16mi, 1 },
-    { X86::TEST32ri,    X86::TEST32mi, 1 },
-    { X86::TEST64ri32,  X86::TEST64mi32, 1 },
-    { X86::TEST8ri,     X86::TEST8mi, 1 }
+  static const unsigned OpTbl0[][4] = {
+    { X86::BT16ri8,     X86::BT16mi8, 1, 0 },
+    { X86::BT32ri8,     X86::BT32mi8, 1, 0 },
+    { X86::BT64ri8,     X86::BT64mi8, 1, 0 },
+    { X86::CALL32r,     X86::CALL32m, 1, 0 },
+    { X86::CALL64r,     X86::CALL64m, 1, 0 },
+    { X86::CMP16ri,     X86::CMP16mi, 1, 0 },
+    { X86::CMP16ri8,    X86::CMP16mi8, 1, 0 },
+    { X86::CMP16rr,     X86::CMP16mr, 1, 0 },
+    { X86::CMP32ri,     X86::CMP32mi, 1, 0 },
+    { X86::CMP32ri8,    X86::CMP32mi8, 1, 0 },
+    { X86::CMP32rr,     X86::CMP32mr, 1, 0 },
+    { X86::CMP64ri32,   X86::CMP64mi32, 1, 0 },
+    { X86::CMP64ri8,    X86::CMP64mi8, 1, 0 },
+    { X86::CMP64rr,     X86::CMP64mr, 1, 0 },
+    { X86::CMP8ri,      X86::CMP8mi, 1, 0 },
+    { X86::CMP8rr,      X86::CMP8mr, 1, 0 },
+    { X86::DIV16r,      X86::DIV16m, 1, 0 },
+    { X86::DIV32r,      X86::DIV32m, 1, 0 },
+    { X86::DIV64r,      X86::DIV64m, 1, 0 },
+    { X86::DIV8r,       X86::DIV8m, 1, 0 },
+    { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0, 0 },
+    { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
+    { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
+    { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
+    { X86::IDIV8r,      X86::IDIV8m, 1, 0 },
+    { X86::IMUL16r,     X86::IMUL16m, 1, 0 },
+    { X86::IMUL32r,     X86::IMUL32m, 1, 0 },
+    { X86::IMUL64r,     X86::IMUL64m, 1, 0 },
+    { X86::IMUL8r,      X86::IMUL8m, 1, 0 },
+    { X86::JMP32r,      X86::JMP32m, 1, 0 },
+    { X86::JMP64r,      X86::JMP64m, 1, 0 },
+    { X86::MOV16ri,     X86::MOV16mi, 0, 0 },
+    { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
+    { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
+    { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
+    { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
+    { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
+    { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
+    { X86::MOV8rr,      X86::MOV8mr, 0, 0 },
+    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
+    { X86::MOVAPDrr,    X86::MOVAPDmr, 0, 16 },
+    { X86::MOVAPSrr,    X86::MOVAPSmr, 0, 16 },
+    { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
+    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
+    { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
+    { X86::MOVPS2SSrr,  X86::MOVPS2SSmr, 0, 0 },
+    { X86::MOVSDrr,     X86::MOVSDmr, 0, 0 },
+    { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
+    { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
+    { X86::MOVSSrr,     X86::MOVSSmr, 0, 0 },
+    { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
+    { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
+    { X86::MUL16r,      X86::MUL16m, 1, 0 },
+    { X86::MUL32r,      X86::MUL32m, 1, 0 },
+    { X86::MUL64r,      X86::MUL64m, 1, 0 },
+    { X86::MUL8r,       X86::MUL8m, 1, 0 },
+    { X86::SETAEr,      X86::SETAEm, 0, 0 },
+    { X86::SETAr,       X86::SETAm, 0, 0 },
+    { X86::SETBEr,      X86::SETBEm, 0, 0 },
+    { X86::SETBr,       X86::SETBm, 0, 0 },
+    { X86::SETEr,       X86::SETEm, 0, 0 },
+    { X86::SETGEr,      X86::SETGEm, 0, 0 },
+    { X86::SETGr,       X86::SETGm, 0, 0 },
+    { X86::SETLEr,      X86::SETLEm, 0, 0 },
+    { X86::SETLr,       X86::SETLm, 0, 0 },
+    { X86::SETNEr,      X86::SETNEm, 0, 0 },
+    { X86::SETNOr,      X86::SETNOm, 0, 0 },
+    { X86::SETNPr,      X86::SETNPm, 0, 0 },
+    { X86::SETNSr,      X86::SETNSm, 0, 0 },
+    { X86::SETOr,       X86::SETOm, 0, 0 },
+    { X86::SETPr,       X86::SETPm, 0, 0 },
+    { X86::SETSr,       X86::SETSm, 0, 0 },
+    { X86::TAILJMPr,    X86::TAILJMPm, 1, 0 },
+    { X86::TEST16ri,    X86::TEST16mi, 1, 0 },
+    { X86::TEST32ri,    X86::TEST32mi, 1, 0 },
+    { X86::TEST64ri32,  X86::TEST64mi32, 1, 0 },
+    { X86::TEST8ri,     X86::TEST8mi, 1, 0 }
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
     unsigned RegOp = OpTbl0[i][0];
     unsigned MemOp = OpTbl0[i][1];
+    unsigned Align = OpTbl0[i][3];
     if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
-                                                 MemOp)).second)
+                                           std::make_pair(MemOp,Align))).second)
       assert(false && "Duplicated entries?");
     unsigned FoldedLoad = OpTbl0[i][2];
     // Index 0, folded load or store.
@@ -319,338 +321,342 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
         AmbEntries.push_back(MemOp);
   }
 
-  static const unsigned OpTbl1[][2] = {
-    { X86::CMP16rr,         X86::CMP16rm },
-    { X86::CMP32rr,         X86::CMP32rm },
-    { X86::CMP64rr,         X86::CMP64rm },
-    { X86::CMP8rr,          X86::CMP8rm },
-    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm },
-    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm },
-    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm },
-    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm },
-    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm },
-    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm },
-    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm },
-    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm },
-    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm },
-    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm },
-    { X86::FsMOVAPDrr,      X86::MOVSDrm },
-    { X86::FsMOVAPSrr,      X86::MOVSSrm },
-    { X86::IMUL16rri,       X86::IMUL16rmi },
-    { X86::IMUL16rri8,      X86::IMUL16rmi8 },
-    { X86::IMUL32rri,       X86::IMUL32rmi },
-    { X86::IMUL32rri8,      X86::IMUL32rmi8 },
-    { X86::IMUL64rri32,     X86::IMUL64rmi32 },
-    { X86::IMUL64rri8,      X86::IMUL64rmi8 },
-    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm },
-    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm },
-    { X86::Int_COMISDrr,    X86::Int_COMISDrm },
-    { X86::Int_COMISSrr,    X86::Int_COMISSrm },
-    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm },
-    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm },
-    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm },
-    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm },
-    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm },
-    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm },
-    { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
-    { X86::Int_CVTSD2SIrr,  X86::Int_CVTSD2SIrm },
-    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm },
-    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
-    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm },
-    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
-    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm },
-    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm },
-    { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
-    { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm },
-    { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
-    { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
-    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
-    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
-    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
-    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
-    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm },
-    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm },
-    { X86::MOV16rr,         X86::MOV16rm },
-    { X86::MOV32rr,         X86::MOV32rm },
-    { X86::MOV64rr,         X86::MOV64rm },
-    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm },
-    { X86::MOV64toSDrr,     X86::MOV64toSDrm },
-    { X86::MOV8rr,          X86::MOV8rm },
-    { X86::MOVAPDrr,        X86::MOVAPDrm },
-    { X86::MOVAPSrr,        X86::MOVAPSrm },
-    { X86::MOVDDUPrr,       X86::MOVDDUPrm },
-    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm },
-    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm },
-    { X86::MOVDQArr,        X86::MOVDQArm },
-    { X86::MOVSD2PDrr,      X86::MOVSD2PDrm },
-    { X86::MOVSDrr,         X86::MOVSDrm },
-    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm },
-    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm },
-    { X86::MOVSS2PSrr,      X86::MOVSS2PSrm },
-    { X86::MOVSSrr,         X86::MOVSSrm },
-    { X86::MOVSX16rr8,      X86::MOVSX16rm8 },
-    { X86::MOVSX32rr16,     X86::MOVSX32rm16 },
-    { X86::MOVSX32rr8,      X86::MOVSX32rm8 },
-    { X86::MOVSX64rr16,     X86::MOVSX64rm16 },
-    { X86::MOVSX64rr32,     X86::MOVSX64rm32 },
-    { X86::MOVSX64rr8,      X86::MOVSX64rm8 },
-    { X86::MOVUPDrr,        X86::MOVUPDrm },
-    { X86::MOVUPSrr,        X86::MOVUPSrm },
-    { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm },
-    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm },
-    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
-    { X86::MOVZX16rr8,      X86::MOVZX16rm8 },
-    { X86::MOVZX32rr16,     X86::MOVZX32rm16 },
-    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
-    { X86::MOVZX32rr8,      X86::MOVZX32rm8 },
-    { X86::MOVZX64rr16,     X86::MOVZX64rm16 },
-    { X86::MOVZX64rr32,     X86::MOVZX64rm32 },
-    { X86::MOVZX64rr8,      X86::MOVZX64rm8 },
-    { X86::PSHUFDri,        X86::PSHUFDmi },
-    { X86::PSHUFHWri,       X86::PSHUFHWmi },
-    { X86::PSHUFLWri,       X86::PSHUFLWmi },
-    { X86::RCPPSr,          X86::RCPPSm },
-    { X86::RCPPSr_Int,      X86::RCPPSm_Int },
-    { X86::RSQRTPSr,        X86::RSQRTPSm },
-    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int },
-    { X86::RSQRTSSr,        X86::RSQRTSSm },
-    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int },
-    { X86::SQRTPDr,         X86::SQRTPDm },
-    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int },
-    { X86::SQRTPSr,         X86::SQRTPSm },
-    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int },
-    { X86::SQRTSDr,         X86::SQRTSDm },
-    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int },
-    { X86::SQRTSSr,         X86::SQRTSSm },
-    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int },
-    { X86::TEST16rr,        X86::TEST16rm },
-    { X86::TEST32rr,        X86::TEST32rm },
-    { X86::TEST64rr,        X86::TEST64rm },
-    { X86::TEST8rr,         X86::TEST8rm },
+  static const unsigned OpTbl1[][3] = {
+    { X86::CMP16rr,         X86::CMP16rm, 0 },
+    { X86::CMP32rr,         X86::CMP32rm, 0 },
+    { X86::CMP64rr,         X86::CMP64rm, 0 },
+    { X86::CMP8rr,          X86::CMP8rm, 0 },
+    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm, 0 },
+    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm, 0 },
+    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm, 0 },
+    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm, 0 },
+    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm, 0 },
+    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm, 0 },
+    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm, 0 },
+    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
+    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
+    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm, 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm, 0 },
+    { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
+    { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
+    { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
+    { X86::IMUL32rri8,      X86::IMUL32rmi8, 0 },
+    { X86::IMUL64rri32,     X86::IMUL64rmi32, 0 },
+    { X86::IMUL64rri8,      X86::IMUL64rmi8, 0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
+    { X86::Int_COMISDrr,    X86::Int_COMISDrm, 0 },
+    { X86::Int_COMISSrr,    X86::Int_COMISSrm, 0 },
+    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm, 16 },
+    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm, 16 },
+    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm, 16 },
+    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm, 16 },
+    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm, 16 },
+    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm, 0 },
+    { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
+    { X86::Int_CVTSD2SIrr,  X86::Int_CVTSD2SIrm, 0 },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm, 0 },
+    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm, 0 },
+    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm, 0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm, 0 },
+    { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
+    { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm, 0 },
+    { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
+    { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm, 0 },
+    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
+    { X86::MOV16rr,         X86::MOV16rm, 0 },
+    { X86::MOV32rr,         X86::MOV32rm, 0 },
+    { X86::MOV64rr,         X86::MOV64rm, 0 },
+    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
+    { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
+    { X86::MOV8rr,          X86::MOV8rm, 0 },
+    { X86::MOVAPDrr,        X86::MOVAPDrm, 16 },
+    { X86::MOVAPSrr,        X86::MOVAPSrm, 16 },
+    { X86::MOVDDUPrr,       X86::MOVDDUPrm, 0 },
+    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
+    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
+    { X86::MOVDQArr,        X86::MOVDQArm, 16 },
+    { X86::MOVSD2PDrr,      X86::MOVSD2PDrm, 0 },
+    { X86::MOVSDrr,         X86::MOVSDrm, 0 },
+    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
+    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
+    { X86::MOVSS2PSrr,      X86::MOVSS2PSrm, 0 },
+    { X86::MOVSSrr,         X86::MOVSSrm, 0 },
+    { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
+    { X86::MOVSX32rr16,     X86::MOVSX32rm16, 0 },
+    { X86::MOVSX32rr8,      X86::MOVSX32rm8, 0 },
+    { X86::MOVSX64rr16,     X86::MOVSX64rm16, 0 },
+    { X86::MOVSX64rr32,     X86::MOVSX64rm32, 0 },
+    { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
+    { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
+    { X86::MOVUPSrr,        X86::MOVUPSrm, 16 },
+    { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
+    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
+    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
+    { X86::MOVZX16rr8,      X86::MOVZX16rm8, 0 },
+    { X86::MOVZX32rr16,     X86::MOVZX32rm16, 0 },
+    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+    { X86::MOVZX32rr8,      X86::MOVZX32rm8, 0 },
+    { X86::MOVZX64rr16,     X86::MOVZX64rm16, 0 },
+    { X86::MOVZX64rr32,     X86::MOVZX64rm32, 0 },
+    { X86::MOVZX64rr8,      X86::MOVZX64rm8, 0 },
+    { X86::PSHUFDri,        X86::PSHUFDmi, 16 },
+    { X86::PSHUFHWri,       X86::PSHUFHWmi, 16 },
+    { X86::PSHUFLWri,       X86::PSHUFLWmi, 16 },
+    { X86::RCPPSr,          X86::RCPPSm, 16 },
+    { X86::RCPPSr_Int,      X86::RCPPSm_Int, 16 },
+    { X86::RSQRTPSr,        X86::RSQRTPSm, 16 },
+    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int, 16 },
+    { X86::RSQRTSSr,        X86::RSQRTSSm, 0 },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int, 0 },
+    { X86::SQRTPDr,         X86::SQRTPDm, 16 },
+    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int, 16 },
+    { X86::SQRTPSr,         X86::SQRTPSm, 16 },
+    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int, 16 },
+    { X86::SQRTSDr,         X86::SQRTSDm, 0 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int, 0 },
+    { X86::SQRTSSr,         X86::SQRTSSm, 0 },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int, 0 },
+    { X86::TEST16rr,        X86::TEST16rm, 0 },
+    { X86::TEST32rr,        X86::TEST32rm, 0 },
+    { X86::TEST64rr,        X86::TEST64rm, 0 },
+    { X86::TEST8rr,         X86::TEST8rm, 0 },
     // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
-    { X86::UCOMISDrr,       X86::UCOMISDrm },
-    { X86::UCOMISSrr,       X86::UCOMISSrm }
+    { X86::UCOMISDrr,       X86::UCOMISDrm, 0 },
+    { X86::UCOMISSrr,       X86::UCOMISSrm, 0 }
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
     unsigned RegOp = OpTbl1[i][0];
     unsigned MemOp = OpTbl1[i][1];
+    unsigned Align = OpTbl1[i][2];
     if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
-                                                 MemOp)).second)
+                                           std::make_pair(MemOp,Align))).second)
       assert(false && "Duplicated entries?");
-    unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load
+    // Index 1, folded load
+    unsigned AuxInfo = 1 | (1 << 4);
     if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
       if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
                                      std::make_pair(RegOp, AuxInfo))).second)
         AmbEntries.push_back(MemOp);
   }
 
-  static const unsigned OpTbl2[][2] = {
-    { X86::ADC32rr,         X86::ADC32rm },
-    { X86::ADC64rr,         X86::ADC64rm },
-    { X86::ADD16rr,         X86::ADD16rm },
-    { X86::ADD32rr,         X86::ADD32rm },
-    { X86::ADD64rr,         X86::ADD64rm },
-    { X86::ADD8rr,          X86::ADD8rm },
-    { X86::ADDPDrr,         X86::ADDPDrm },
-    { X86::ADDPSrr,         X86::ADDPSrm },
-    { X86::ADDSDrr,         X86::ADDSDrm },
-    { X86::ADDSSrr,         X86::ADDSSrm },
-    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm },
-    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm },
-    { X86::AND16rr,         X86::AND16rm },
-    { X86::AND32rr,         X86::AND32rm },
-    { X86::AND64rr,         X86::AND64rm },
-    { X86::AND8rr,          X86::AND8rm },
-    { X86::ANDNPDrr,        X86::ANDNPDrm },
-    { X86::ANDNPSrr,        X86::ANDNPSrm },
-    { X86::ANDPDrr,         X86::ANDPDrm },
-    { X86::ANDPSrr,         X86::ANDPSrm },
-    { X86::CMOVA16rr,       X86::CMOVA16rm },
-    { X86::CMOVA32rr,       X86::CMOVA32rm },
-    { X86::CMOVA64rr,       X86::CMOVA64rm },
-    { X86::CMOVAE16rr,      X86::CMOVAE16rm },
-    { X86::CMOVAE32rr,      X86::CMOVAE32rm },
-    { X86::CMOVAE64rr,      X86::CMOVAE64rm },
-    { X86::CMOVB16rr,       X86::CMOVB16rm },
-    { X86::CMOVB32rr,       X86::CMOVB32rm },
-    { X86::CMOVB64rr,       X86::CMOVB64rm },
-    { X86::CMOVBE16rr,      X86::CMOVBE16rm },
-    { X86::CMOVBE32rr,      X86::CMOVBE32rm },
-    { X86::CMOVBE64rr,      X86::CMOVBE64rm },
-    { X86::CMOVE16rr,       X86::CMOVE16rm },
-    { X86::CMOVE32rr,       X86::CMOVE32rm },
-    { X86::CMOVE64rr,       X86::CMOVE64rm },
-    { X86::CMOVG16rr,       X86::CMOVG16rm },
-    { X86::CMOVG32rr,       X86::CMOVG32rm },
-    { X86::CMOVG64rr,       X86::CMOVG64rm },
-    { X86::CMOVGE16rr,      X86::CMOVGE16rm },
-    { X86::CMOVGE32rr,      X86::CMOVGE32rm },
-    { X86::CMOVGE64rr,      X86::CMOVGE64rm },
-    { X86::CMOVL16rr,       X86::CMOVL16rm },
-    { X86::CMOVL32rr,       X86::CMOVL32rm },
-    { X86::CMOVL64rr,       X86::CMOVL64rm },
-    { X86::CMOVLE16rr,      X86::CMOVLE16rm },
-    { X86::CMOVLE32rr,      X86::CMOVLE32rm },
-    { X86::CMOVLE64rr,      X86::CMOVLE64rm },
-    { X86::CMOVNE16rr,      X86::CMOVNE16rm },
-    { X86::CMOVNE32rr,      X86::CMOVNE32rm },
-    { X86::CMOVNE64rr,      X86::CMOVNE64rm },
-    { X86::CMOVNO16rr,      X86::CMOVNO16rm },
-    { X86::CMOVNO32rr,      X86::CMOVNO32rm },
-    { X86::CMOVNO64rr,      X86::CMOVNO64rm },
-    { X86::CMOVNP16rr,      X86::CMOVNP16rm },
-    { X86::CMOVNP32rr,      X86::CMOVNP32rm },
-    { X86::CMOVNP64rr,      X86::CMOVNP64rm },
-    { X86::CMOVNS16rr,      X86::CMOVNS16rm },
-    { X86::CMOVNS32rr,      X86::CMOVNS32rm },
-    { X86::CMOVNS64rr,      X86::CMOVNS64rm },
-    { X86::CMOVO16rr,       X86::CMOVO16rm },
-    { X86::CMOVO32rr,       X86::CMOVO32rm },
-    { X86::CMOVO64rr,       X86::CMOVO64rm },
-    { X86::CMOVP16rr,       X86::CMOVP16rm },
-    { X86::CMOVP32rr,       X86::CMOVP32rm },
-    { X86::CMOVP64rr,       X86::CMOVP64rm },
-    { X86::CMOVS16rr,       X86::CMOVS16rm },
-    { X86::CMOVS32rr,       X86::CMOVS32rm },
-    { X86::CMOVS64rr,       X86::CMOVS64rm },
-    { X86::CMPPDrri,        X86::CMPPDrmi },
-    { X86::CMPPSrri,        X86::CMPPSrmi },
-    { X86::CMPSDrr,         X86::CMPSDrm },
-    { X86::CMPSSrr,         X86::CMPSSrm },
-    { X86::DIVPDrr,         X86::DIVPDrm },
-    { X86::DIVPSrr,         X86::DIVPSrm },
-    { X86::DIVSDrr,         X86::DIVSDrm },
-    { X86::DIVSSrr,         X86::DIVSSrm },
-    { X86::FsANDNPDrr,      X86::FsANDNPDrm },
-    { X86::FsANDNPSrr,      X86::FsANDNPSrm },
-    { X86::FsANDPDrr,       X86::FsANDPDrm },
-    { X86::FsANDPSrr,       X86::FsANDPSrm },
-    { X86::FsORPDrr,        X86::FsORPDrm },
-    { X86::FsORPSrr,        X86::FsORPSrm },
-    { X86::FsXORPDrr,       X86::FsXORPDrm },
-    { X86::FsXORPSrr,       X86::FsXORPSrm },
-    { X86::HADDPDrr,        X86::HADDPDrm },
-    { X86::HADDPSrr,        X86::HADDPSrm },
-    { X86::HSUBPDrr,        X86::HSUBPDrm },
-    { X86::HSUBPSrr,        X86::HSUBPSrm },
-    { X86::IMUL16rr,        X86::IMUL16rm },
-    { X86::IMUL32rr,        X86::IMUL32rm },
-    { X86::IMUL64rr,        X86::IMUL64rm },
-    { X86::MAXPDrr,         X86::MAXPDrm },
-    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int },
-    { X86::MAXPSrr,         X86::MAXPSrm },
-    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int },
-    { X86::MAXSDrr,         X86::MAXSDrm },
-    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int },
-    { X86::MAXSSrr,         X86::MAXSSrm },
-    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int },
-    { X86::MINPDrr,         X86::MINPDrm },
-    { X86::MINPDrr_Int,     X86::MINPDrm_Int },
-    { X86::MINPSrr,         X86::MINPSrm },
-    { X86::MINPSrr_Int,     X86::MINPSrm_Int },
-    { X86::MINSDrr,         X86::MINSDrm },
-    { X86::MINSDrr_Int,     X86::MINSDrm_Int },
-    { X86::MINSSrr,         X86::MINSSrm },
-    { X86::MINSSrr_Int,     X86::MINSSrm_Int },
-    { X86::MULPDrr,         X86::MULPDrm },
-    { X86::MULPSrr,         X86::MULPSrm },
-    { X86::MULSDrr,         X86::MULSDrm },
-    { X86::MULSSrr,         X86::MULSSrm },
-    { X86::OR16rr,          X86::OR16rm },
-    { X86::OR32rr,          X86::OR32rm },
-    { X86::OR64rr,          X86::OR64rm },
-    { X86::OR8rr,           X86::OR8rm },
-    { X86::ORPDrr,          X86::ORPDrm },
-    { X86::ORPSrr,          X86::ORPSrm },
-    { X86::PACKSSDWrr,      X86::PACKSSDWrm },
-    { X86::PACKSSWBrr,      X86::PACKSSWBrm },
-    { X86::PACKUSWBrr,      X86::PACKUSWBrm },
-    { X86::PADDBrr,         X86::PADDBrm },
-    { X86::PADDDrr,         X86::PADDDrm },
-    { X86::PADDQrr,         X86::PADDQrm },
-    { X86::PADDSBrr,        X86::PADDSBrm },
-    { X86::PADDSWrr,        X86::PADDSWrm },
-    { X86::PADDWrr,         X86::PADDWrm },
-    { X86::PANDNrr,         X86::PANDNrm },
-    { X86::PANDrr,          X86::PANDrm },
-    { X86::PAVGBrr,         X86::PAVGBrm },
-    { X86::PAVGWrr,         X86::PAVGWrm },
-    { X86::PCMPEQBrr,       X86::PCMPEQBrm },
-    { X86::PCMPEQDrr,       X86::PCMPEQDrm },
-    { X86::PCMPEQWrr,       X86::PCMPEQWrm },
-    { X86::PCMPGTBrr,       X86::PCMPGTBrm },
-    { X86::PCMPGTDrr,       X86::PCMPGTDrm },
-    { X86::PCMPGTWrr,       X86::PCMPGTWrm },
-    { X86::PINSRWrri,       X86::PINSRWrmi },
-    { X86::PMADDWDrr,       X86::PMADDWDrm },
-    { X86::PMAXSWrr,        X86::PMAXSWrm },
-    { X86::PMAXUBrr,        X86::PMAXUBrm },
-    { X86::PMINSWrr,        X86::PMINSWrm },
-    { X86::PMINUBrr,        X86::PMINUBrm },
-    { X86::PMULDQrr,        X86::PMULDQrm },
-    { X86::PMULHUWrr,       X86::PMULHUWrm },
-    { X86::PMULHWrr,        X86::PMULHWrm },
-    { X86::PMULLDrr,        X86::PMULLDrm },
-    { X86::PMULLDrr_int,    X86::PMULLDrm_int },
-    { X86::PMULLWrr,        X86::PMULLWrm },
-    { X86::PMULUDQrr,       X86::PMULUDQrm },
-    { X86::PORrr,           X86::PORrm },
-    { X86::PSADBWrr,        X86::PSADBWrm },
-    { X86::PSLLDrr,         X86::PSLLDrm },
-    { X86::PSLLQrr,         X86::PSLLQrm },
-    { X86::PSLLWrr,         X86::PSLLWrm },
-    { X86::PSRADrr,         X86::PSRADrm },
-    { X86::PSRAWrr,         X86::PSRAWrm },
-    { X86::PSRLDrr,         X86::PSRLDrm },
-    { X86::PSRLQrr,         X86::PSRLQrm },
-    { X86::PSRLWrr,         X86::PSRLWrm },
-    { X86::PSUBBrr,         X86::PSUBBrm },
-    { X86::PSUBDrr,         X86::PSUBDrm },
-    { X86::PSUBSBrr,        X86::PSUBSBrm },
-    { X86::PSUBSWrr,        X86::PSUBSWrm },
-    { X86::PSUBWrr,         X86::PSUBWrm },
-    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm },
-    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm },
-    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm },
-    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm },
-    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm },
-    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm },
-    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm },
-    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm },
-    { X86::PXORrr,          X86::PXORrm },
-    { X86::SBB32rr,         X86::SBB32rm },
-    { X86::SBB64rr,         X86::SBB64rm },
-    { X86::SHUFPDrri,       X86::SHUFPDrmi },
-    { X86::SHUFPSrri,       X86::SHUFPSrmi },
-    { X86::SUB16rr,         X86::SUB16rm },
-    { X86::SUB32rr,         X86::SUB32rm },
-    { X86::SUB64rr,         X86::SUB64rm },
-    { X86::SUB8rr,          X86::SUB8rm },
-    { X86::SUBPDrr,         X86::SUBPDrm },
-    { X86::SUBPSrr,         X86::SUBPSrm },
-    { X86::SUBSDrr,         X86::SUBSDrm },
-    { X86::SUBSSrr,         X86::SUBSSrm },
+  static const unsigned OpTbl2[][3] = {
+    { X86::ADC32rr,         X86::ADC32rm, 0 },
+    { X86::ADC64rr,         X86::ADC64rm, 0 },
+    { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD8rr,          X86::ADD8rm, 0 },
+    { X86::ADDPDrr,         X86::ADDPDrm, 16 },
+    { X86::ADDPSrr,         X86::ADDPSrm, 16 },
+    { X86::ADDSDrr,         X86::ADDSDrm, 0 },
+    { X86::ADDSSrr,         X86::ADDSSrm, 0 },
+    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm, 16 },
+    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm, 16 },
+    { X86::AND16rr,         X86::AND16rm, 0 },
+    { X86::AND32rr,         X86::AND32rm, 0 },
+    { X86::AND64rr,         X86::AND64rm, 0 },
+    { X86::AND8rr,          X86::AND8rm, 0 },
+    { X86::ANDNPDrr,        X86::ANDNPDrm, 16 },
+    { X86::ANDNPSrr,        X86::ANDNPSrm, 16 },
+    { X86::ANDPDrr,         X86::ANDPDrm, 16 },
+    { X86::ANDPSrr,         X86::ANDPSrm, 16 },
+    { X86::CMOVA16rr,       X86::CMOVA16rm, 0 },
+    { X86::CMOVA32rr,       X86::CMOVA32rm, 0 },
+    { X86::CMOVA64rr,       X86::CMOVA64rm, 0 },
+    { X86::CMOVAE16rr,      X86::CMOVAE16rm, 0 },
+    { X86::CMOVAE32rr,      X86::CMOVAE32rm, 0 },
+    { X86::CMOVAE64rr,      X86::CMOVAE64rm, 0 },
+    { X86::CMOVB16rr,       X86::CMOVB16rm, 0 },
+    { X86::CMOVB32rr,       X86::CMOVB32rm, 0 },
+    { X86::CMOVB64rr,       X86::CMOVB64rm, 0 },
+    { X86::CMOVBE16rr,      X86::CMOVBE16rm, 0 },
+    { X86::CMOVBE32rr,      X86::CMOVBE32rm, 0 },
+    { X86::CMOVBE64rr,      X86::CMOVBE64rm, 0 },
+    { X86::CMOVE16rr,       X86::CMOVE16rm, 0 },
+    { X86::CMOVE32rr,       X86::CMOVE32rm, 0 },
+    { X86::CMOVE64rr,       X86::CMOVE64rm, 0 },
+    { X86::CMOVG16rr,       X86::CMOVG16rm, 0 },
+    { X86::CMOVG32rr,       X86::CMOVG32rm, 0 },
+    { X86::CMOVG64rr,       X86::CMOVG64rm, 0 },
+    { X86::CMOVGE16rr,      X86::CMOVGE16rm, 0 },
+    { X86::CMOVGE32rr,      X86::CMOVGE32rm, 0 },
+    { X86::CMOVGE64rr,      X86::CMOVGE64rm, 0 },
+    { X86::CMOVL16rr,       X86::CMOVL16rm, 0 },
+    { X86::CMOVL32rr,       X86::CMOVL32rm, 0 },
+    { X86::CMOVL64rr,       X86::CMOVL64rm, 0 },
+    { X86::CMOVLE16rr,      X86::CMOVLE16rm, 0 },
+    { X86::CMOVLE32rr,      X86::CMOVLE32rm, 0 },
+    { X86::CMOVLE64rr,      X86::CMOVLE64rm, 0 },
+    { X86::CMOVNE16rr,      X86::CMOVNE16rm, 0 },
+    { X86::CMOVNE32rr,      X86::CMOVNE32rm, 0 },
+    { X86::CMOVNE64rr,      X86::CMOVNE64rm, 0 },
+    { X86::CMOVNO16rr,      X86::CMOVNO16rm, 0 },
+    { X86::CMOVNO32rr,      X86::CMOVNO32rm, 0 },
+    { X86::CMOVNO64rr,      X86::CMOVNO64rm, 0 },
+    { X86::CMOVNP16rr,      X86::CMOVNP16rm, 0 },
+    { X86::CMOVNP32rr,      X86::CMOVNP32rm, 0 },
+    { X86::CMOVNP64rr,      X86::CMOVNP64rm, 0 },
+    { X86::CMOVNS16rr,      X86::CMOVNS16rm, 0 },
+    { X86::CMOVNS32rr,      X86::CMOVNS32rm, 0 },
+    { X86::CMOVNS64rr,      X86::CMOVNS64rm, 0 },
+    { X86::CMOVO16rr,       X86::CMOVO16rm, 0 },
+    { X86::CMOVO32rr,       X86::CMOVO32rm, 0 },
+    { X86::CMOVO64rr,       X86::CMOVO64rm, 0 },
+    { X86::CMOVP16rr,       X86::CMOVP16rm, 0 },
+    { X86::CMOVP32rr,       X86::CMOVP32rm, 0 },
+    { X86::CMOVP64rr,       X86::CMOVP64rm, 0 },
+    { X86::CMOVS16rr,       X86::CMOVS16rm, 0 },
+    { X86::CMOVS32rr,       X86::CMOVS32rm, 0 },
+    { X86::CMOVS64rr,       X86::CMOVS64rm, 0 },
+    { X86::CMPPDrri,        X86::CMPPDrmi, 16 },
+    { X86::CMPPSrri,        X86::CMPPSrmi, 16 },
+    { X86::CMPSDrr,         X86::CMPSDrm, 0 },
+    { X86::CMPSSrr,         X86::CMPSSrm, 0 },
+    { X86::DIVPDrr,         X86::DIVPDrm, 16 },
+    { X86::DIVPSrr,         X86::DIVPSrm, 16 },
+    { X86::DIVSDrr,         X86::DIVSDrm, 0 },
+    { X86::DIVSSrr,         X86::DIVSSrm, 0 },
+    { X86::FsANDNPDrr,      X86::FsANDNPDrm, 16 },
+    { X86::FsANDNPSrr,      X86::FsANDNPSrm, 16 },
+    { X86::FsANDPDrr,       X86::FsANDPDrm, 16 },
+    { X86::FsANDPSrr,       X86::FsANDPSrm, 16 },
+    { X86::FsORPDrr,        X86::FsORPDrm, 16 },
+    { X86::FsORPSrr,        X86::FsORPSrm, 16 },
+    { X86::FsXORPDrr,       X86::FsXORPDrm, 16 },
+    { X86::FsXORPSrr,       X86::FsXORPSrm, 16 },
+    { X86::HADDPDrr,        X86::HADDPDrm, 16 },
+    { X86::HADDPSrr,        X86::HADDPSrm, 16 },
+    { X86::HSUBPDrr,        X86::HSUBPDrm, 16 },
+    { X86::HSUBPSrr,        X86::HSUBPSrm, 16 },
+    { X86::IMUL16rr,        X86::IMUL16rm, 0 },
+    { X86::IMUL32rr,        X86::IMUL32rm, 0 },
+    { X86::IMUL64rr,        X86::IMUL64rm, 0 },
+    { X86::MAXPDrr,         X86::MAXPDrm, 16 },
+    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int, 16 },
+    { X86::MAXPSrr,         X86::MAXPSrm, 16 },
+    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int, 16 },
+    { X86::MAXSDrr,         X86::MAXSDrm, 0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int, 0 },
+    { X86::MAXSSrr,         X86::MAXSSrm, 0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int, 0 },
+    { X86::MINPDrr,         X86::MINPDrm, 16 },
+    { X86::MINPDrr_Int,     X86::MINPDrm_Int, 16 },
+    { X86::MINPSrr,         X86::MINPSrm, 16 },
+    { X86::MINPSrr_Int,     X86::MINPSrm_Int, 16 },
+    { X86::MINSDrr,         X86::MINSDrm, 0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int, 0 },
+    { X86::MINSSrr,         X86::MINSSrm, 0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int, 0 },
+    { X86::MULPDrr,         X86::MULPDrm, 16 },
+    { X86::MULPSrr,         X86::MULPSrm, 16 },
+    { X86::MULSDrr,         X86::MULSDrm, 0 },
+    { X86::MULSSrr,         X86::MULSSrm, 0 },
+    { X86::OR16rr,          X86::OR16rm, 0 },
+    { X86::OR32rr,          X86::OR32rm, 0 },
+    { X86::OR64rr,          X86::OR64rm, 0 },
+    { X86::OR8rr,           X86::OR8rm, 0 },
+    { X86::ORPDrr,          X86::ORPDrm, 16 },
+    { X86::ORPSrr,          X86::ORPSrm, 16 },
+    { X86::PACKSSDWrr,      X86::PACKSSDWrm, 16 },
+    { X86::PACKSSWBrr,      X86::PACKSSWBrm, 16 },
+    { X86::PACKUSWBrr,      X86::PACKUSWBrm, 16 },
+    { X86::PADDBrr,         X86::PADDBrm, 16 },
+    { X86::PADDDrr,         X86::PADDDrm, 16 },
+    { X86::PADDQrr,         X86::PADDQrm, 16 },
+    { X86::PADDSBrr,        X86::PADDSBrm, 16 },
+    { X86::PADDSWrr,        X86::PADDSWrm, 16 },
+    { X86::PADDWrr,         X86::PADDWrm, 16 },
+    { X86::PANDNrr,         X86::PANDNrm, 16 },
+    { X86::PANDrr,          X86::PANDrm, 16 },
+    { X86::PAVGBrr,         X86::PAVGBrm, 16 },
+    { X86::PAVGWrr,         X86::PAVGWrm, 16 },
+    { X86::PCMPEQBrr,       X86::PCMPEQBrm, 16 },
+    { X86::PCMPEQDrr,       X86::PCMPEQDrm, 16 },
+    { X86::PCMPEQWrr,       X86::PCMPEQWrm, 16 },
+    { X86::PCMPGTBrr,       X86::PCMPGTBrm, 16 },
+    { X86::PCMPGTDrr,       X86::PCMPGTDrm, 16 },
+    { X86::PCMPGTWrr,       X86::PCMPGTWrm, 16 },
+    { X86::PINSRWrri,       X86::PINSRWrmi, 16 },
+    { X86::PMADDWDrr,       X86::PMADDWDrm, 16 },
+    { X86::PMAXSWrr,        X86::PMAXSWrm, 16 },
+    { X86::PMAXUBrr,        X86::PMAXUBrm, 16 },
+    { X86::PMINSWrr,        X86::PMINSWrm, 16 },
+    { X86::PMINUBrr,        X86::PMINUBrm, 16 },
+    { X86::PMULDQrr,        X86::PMULDQrm, 16 },
+    { X86::PMULHUWrr,       X86::PMULHUWrm, 16 },
+    { X86::PMULHWrr,        X86::PMULHWrm, 16 },
+    { X86::PMULLDrr,        X86::PMULLDrm, 16 },
+    { X86::PMULLDrr_int,    X86::PMULLDrm_int, 16 },
+    { X86::PMULLWrr,        X86::PMULLWrm, 16 },
+    { X86::PMULUDQrr,       X86::PMULUDQrm, 16 },
+    { X86::PORrr,           X86::PORrm, 16 },
+    { X86::PSADBWrr,        X86::PSADBWrm, 16 },
+    { X86::PSLLDrr,         X86::PSLLDrm, 16 },
+    { X86::PSLLQrr,         X86::PSLLQrm, 16 },
+    { X86::PSLLWrr,         X86::PSLLWrm, 16 },
+    { X86::PSRADrr,         X86::PSRADrm, 16 },
+    { X86::PSRAWrr,         X86::PSRAWrm, 16 },
+    { X86::PSRLDrr,         X86::PSRLDrm, 16 },
+    { X86::PSRLQrr,         X86::PSRLQrm, 16 },
+    { X86::PSRLWrr,         X86::PSRLWrm, 16 },
+    { X86::PSUBBrr,         X86::PSUBBrm, 16 },
+    { X86::PSUBDrr,         X86::PSUBDrm, 16 },
+    { X86::PSUBSBrr,        X86::PSUBSBrm, 16 },
+    { X86::PSUBSWrr,        X86::PSUBSWrm, 16 },
+    { X86::PSUBWrr,         X86::PSUBWrm, 16 },
+    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm, 16 },
+    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm, 16 },
+    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm, 16 },
+    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm, 16 },
+    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm, 16 },
+    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm, 16 },
+    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm, 16 },
+    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm, 16 },
+    { X86::PXORrr,          X86::PXORrm, 16 },
+    { X86::SBB32rr,         X86::SBB32rm, 0 },
+    { X86::SBB64rr,         X86::SBB64rm, 0 },
+    { X86::SHUFPDrri,       X86::SHUFPDrmi, 16 },
+    { X86::SHUFPSrri,       X86::SHUFPSrmi, 16 },
+    { X86::SUB16rr,         X86::SUB16rm, 0 },
+    { X86::SUB32rr,         X86::SUB32rm, 0 },
+    { X86::SUB64rr,         X86::SUB64rm, 0 },
+    { X86::SUB8rr,          X86::SUB8rm, 0 },
+    { X86::SUBPDrr,         X86::SUBPDrm, 16 },
+    { X86::SUBPSrr,         X86::SUBPSrm, 16 },
+    { X86::SUBSDrr,         X86::SUBSDrm, 0 },
+    { X86::SUBSSrr,         X86::SUBSSrm, 0 },
     // FIXME: TEST*rr -> swapped operand of TEST*mr.
-    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm },
-    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm },
-    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm },
-    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm },
-    { X86::XOR16rr,         X86::XOR16rm },
-    { X86::XOR32rr,         X86::XOR32rm },
-    { X86::XOR64rr,         X86::XOR64rm },
-    { X86::XOR8rr,          X86::XOR8rm },
-    { X86::XORPDrr,         X86::XORPDrm },
-    { X86::XORPSrr,         X86::XORPSrm }
+    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm, 16 },
+    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm, 16 },
+    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm, 16 },
+    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm, 16 },
+    { X86::XOR16rr,         X86::XOR16rm, 0 },
+    { X86::XOR32rr,         X86::XOR32rm, 0 },
+    { X86::XOR64rr,         X86::XOR64rm, 0 },
+    { X86::XOR8rr,          X86::XOR8rm, 0 },
+    { X86::XORPDrr,         X86::XORPDrm, 16 },
+    { X86::XORPSrr,         X86::XORPSrm, 16 }
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
     unsigned RegOp = OpTbl2[i][0];
     unsigned MemOp = OpTbl2[i][1];
+    unsigned Align = OpTbl2[i][2];
     if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
-                                                 MemOp)).second)
+                                           std::make_pair(MemOp,Align))).second)
       assert(false && "Duplicated entries?");
-    unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load
+    // Index 2, folded load
+    unsigned AuxInfo = 2 | (1 << 4);
     if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
                                    std::make_pair(RegOp, AuxInfo))).second)
       AmbEntries.push_back(MemOp);
@@ -760,7 +766,6 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-
 /// regIsPICBase - Return true if register is PIC base (i.e.g defined by
 /// X86::MOVPC32r.
 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -776,37 +781,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
   return isPICBase;
 }
 
-/// isGVStub - Return true if the GV requires an extra load to get the
-/// real address.
-static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) {
-  return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
-}
-
-/// CanRematLoadWithDispOperand - Return true if a load with the specified
-/// operand is a candidate for remat: for this to be true we need to know that
-/// the load will always return the same value, even if moved.
-static bool CanRematLoadWithDispOperand(const MachineOperand &MO,
-                                        X86TargetMachine &TM) {
-  // Loads from constant pool entries can be remat'd.
-  if (MO.isCPI()) return true;
-  
-  // We can remat globals in some cases.
-  if (MO.isGlobal()) {
-    // If this is a load of a stub, not of the global, we can remat it.  This
-    // access will always return the address of the global.
-    if (isGVStub(MO.getGlobal(), TM))
-      return true;
-    
-    // If the global itself is constant, we can remat the load.
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal()))
-      if (GV->isConstant())
-        return true;
-  }
-  return false;
-}
- 
 bool
-X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                AliasAnalysis *AA) const {
   switch (MI->getOpcode()) {
   default: break;
     case X86::MOV8rm:
@@ -825,7 +802,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
       if (MI->getOperand(1).isReg() &&
           MI->getOperand(2).isImm() &&
           MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
-          CanRematLoadWithDispOperand(MI->getOperand(4), TM)) {
+          MI->isInvariantLoad(AA)) {
         unsigned BaseReg = MI->getOperand(1).getReg();
         if (BaseReg == 0 || BaseReg == X86::RIP)
           return true;
@@ -876,7 +853,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
 /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
 /// would clobber the EFLAGS condition register. Note the result may be
 /// conservative. If it cannot definitely determine the safety after visiting
-/// two instructions it assumes it's not safe.
+/// a few instructions in each direction it assumes it's not safe.
 static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I) {
   // It's always safe to clobber EFLAGS at the end of a block.
@@ -884,11 +861,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
     return true;
 
   // For compile time consideration, if we are not able to determine the
-  // safety after visiting 2 instructions, we will assume it's not safe.
-  for (unsigned i = 0; i < 2; ++i) {
+  // safety after visiting 4 instructions in each direction, we will assume
+  // it's not safe.
+  MachineBasicBlock::iterator Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
     bool SeenDef = false;
-    for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
-      MachineOperand &MO = I->getOperand(j);
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
       if (!MO.isReg())
         continue;
       if (MO.getReg() == X86::EFLAGS) {
@@ -901,10 +880,33 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
     if (SeenDef)
       // This instruction defines EFLAGS, no need to look any further.
       return true;
-    ++I;
+    ++Iter;
 
     // If we make it to the end of the block, it's safe to clobber EFLAGS.
-    if (I == MBB.end())
+    if (Iter == MBB.end())
+      return true;
+  }
+
+  Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
+    // If we make it to the beginning of the block, it's safe to clobber
+    // EFLAGS iff EFLAGS is not live-in.
+    if (Iter == MBB.begin())
+      return !MBB.isLiveIn(X86::EFLAGS);
+
+    --Iter;
+    bool SawKill = false;
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
+      if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+        if (MO.isDef()) return MO.isDead();
+        if (MO.isKill()) SawKill = true;
+      }
+    }
+
+    if (SawKill)
+      // This instruction kills EFLAGS and doesn't redefine it, so
+      // there's no need to look further.
       return true;
   }
 
@@ -914,14 +916,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
 
 void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator I,
-                                 unsigned DestReg,
+                                 unsigned DestReg, unsigned SubIdx,
                                  const MachineInstr *Orig) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  unsigned SubIdx = Orig->getOperand(0).isReg()
-    ? Orig->getOperand(0).getSubReg() : 0;
-  bool ChangeSubIdx = SubIdx != 0;
   if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
     DestReg = RI.getSubReg(DestReg, SubIdx);
     SubIdx = 0;
@@ -929,76 +928,36 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
 
   // MOV32r0 etc. are implemented with xor which clobbers condition code.
   // Re-materialize them as movri instructions to avoid side effects.
-  bool Emitted = false;
-  switch (Orig->getOpcode()) {
+  bool Clone = true;
+  unsigned Opc = Orig->getOpcode();
+  switch (Opc) {
   default: break;
   case X86::MOV8r0:
   case X86::MOV16r0:
-  case X86::MOV32r0:
-  case X86::MOV64r0: {
+  case X86::MOV32r0: {
     if (!isSafeToClobberEFLAGS(MBB, I)) {
-      unsigned Opc = 0;
-      switch (Orig->getOpcode()) {
+      switch (Opc) {
       default: break;
       case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
       case X86::MOV16r0: Opc = X86::MOV16ri; break;
       case X86::MOV32r0: Opc = X86::MOV32ri; break;
-      case X86::MOV64r0: Opc = X86::MOV64ri32; break;
       }
-      BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
-      Emitted = true;
+      Clone = false;
     }
     break;
   }
   }
 
-  if (!Emitted) {
+  if (Clone) {
     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
     MI->getOperand(0).setReg(DestReg);
     MBB.insert(I, MI);
+  } else {
+    BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
   }
 
-  if (ChangeSubIdx) {
-    MachineInstr *NewMI = prior(I);
-    NewMI->getOperand(0).setSubReg(SubIdx);
-  }
-}
-
-/// isInvariantLoad - Return true if the specified instruction (which is marked
-/// mayLoad) is loading from a location whose value is invariant across the
-/// function.  For example, loading a value from the constant pool or from
-/// from the argument area of a function if it does not change.  This should
-/// only return true of *all* loads the instruction does are invariant (if it
-/// does multiple loads).
-bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const {
-  // This code cares about loads from three cases: constant pool entries,
-  // invariant argument slots, and global stubs.  In order to handle these cases
-  // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV
-  // operand and base our analysis on it.  This is safe because the address of
-  // none of these three cases is ever used as anything other than a load base
-  // and X86 doesn't have any instructions that load from multiple places.
-  
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    // Loads from constant pools are trivially invariant.
-    if (MO.isCPI())
-      return true;
-
-    if (MO.isGlobal())
-      return isGVStub(MO.getGlobal(), TM);
-
-    // If this is a load from an invariant stack slot, the load is a constant.
-    if (MO.isFI()) {
-      const MachineFrameInfo &MFI =
-        *MI->getParent()->getParent()->getFrameInfo();
-      int Idx = MO.getIndex();
-      return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx);
-    }
-  }
-  
-  // All other instances of these instructions are presumed to have other
-  // issues.
-  return false;
+  MachineInstr *NewMI = prior(I);
+  NewMI->getOperand(0).setSubReg(SubIdx);
 }
 
 /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
@@ -1304,7 +1263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     unsigned Opc;
     unsigned Size;
     switch (MI->getOpcode()) {
-    default: assert(0 && "Unreachable!");
+    default: llvm_unreachable("Unreachable!");
     case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
     case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
     case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
@@ -1459,7 +1418,7 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
 
 unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Illegal condition code!");
+  default: llvm_unreachable("Illegal condition code!");
   case X86::COND_E:  return X86::JE;
   case X86::COND_NE: return X86::JNE;
   case X86::COND_L:  return X86::JL;
@@ -1483,7 +1442,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
 /// e.g. turning COND_E to COND_NE.
 X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
   switch (CC) {
-  default: assert(0 && "Illegal condition code!");
+  default: llvm_unreachable("Illegal condition code!");
   case X86::COND_E:  return X86::COND_NE;
   case X86::COND_NE: return X86::COND_E;
   case X86::COND_L:  return X86::COND_GE;
@@ -1699,14 +1658,26 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     /* Source and destination have the same register class. */;
   else if (CommonRC->hasSuperClass(SrcRC))
     CommonRC = SrcRC;
-  else if (!DestRC->hasSubClass(SrcRC))
-    CommonRC = 0;
+  else if (!DestRC->hasSubClass(SrcRC)) {
+    // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
+    // but we want to copy then as GR64. Similarly, for GR32_NOREX and
+    // GR32_NOSP, copy as GR32.
+    if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
+        DestRC->hasSuperClass(&X86::GR64RegClass))
+      CommonRC = &X86::GR64RegClass;
+    else if (SrcRC->hasSuperClass(&X86::GR32RegClass) &&
+             DestRC->hasSuperClass(&X86::GR32RegClass))
+      CommonRC = &X86::GR32RegClass;
+    else
+      CommonRC = 0;
+  }
 
   if (CommonRC) {
     unsigned Opc;
-    if (CommonRC == &X86::GR64RegClass) {
+    if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) {
       Opc = X86::MOV64rr;
-    } else if (CommonRC == &X86::GR32RegClass) {
+    } else if (CommonRC == &X86::GR32RegClass ||
+               CommonRC == &X86::GR32_NOSPRegClass) {
       Opc = X86::MOV32rr;
     } else if (CommonRC == &X86::GR16RegClass) {
       Opc = X86::MOV16rr;
@@ -1731,7 +1702,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
         Opc = X86::MOV8rr_NOREX;
       else
         Opc = X86::MOV8rr;
-    } else if (CommonRC == &X86::GR64_NOREXRegClass) {
+    } else if (CommonRC == &X86::GR64_NOREXRegClass ||
+               CommonRC == &X86::GR64_NOREX_NOSPRegClass) {
       Opc = X86::MOV64rr;
     } else if (CommonRC == &X86::GR32_NOREXRegClass) {
       Opc = X86::MOV32rr;
@@ -1759,16 +1731,17 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
     return true;
   }
-  
+
   // Moving EFLAGS to / from another register requires a push and a pop.
   if (SrcRC == &X86::CCRRegClass) {
     if (SrcReg != X86::EFLAGS)
       return false;
-    if (DestRC == &X86::GR64RegClass) {
+    if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
       BuildMI(MBB, MI, DL, get(X86::PUSHFQ));
       BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
       return true;
-    } else if (DestRC == &X86::GR32RegClass) {
+    } else if (DestRC == &X86::GR32RegClass ||
+               DestRC == &X86::GR32_NOSPRegClass) {
       BuildMI(MBB, MI, DL, get(X86::PUSHFD));
       BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
       return true;
@@ -1776,11 +1749,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   } else if (DestRC == &X86::CCRRegClass) {
     if (DestReg != X86::EFLAGS)
       return false;
-    if (SrcRC == &X86::GR64RegClass) {
+    if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
       BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
       BuildMI(MBB, MI, DL, get(X86::POPFQ));
       return true;
-    } else if (SrcRC == &X86::GR32RegClass) {
+    } else if (SrcRC == &X86::GR32RegClass ||
+               DestRC == &X86::GR32_NOSPRegClass) {
       BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
       BuildMI(MBB, MI, DL, get(X86::POPFD));
       return true;
@@ -1838,9 +1812,9 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
                                   bool isStackAligned,
                                   TargetMachine &TM) {
   unsigned Opc = 0;
-  if (RC == &X86::GR64RegClass) {
+  if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
     Opc = X86::MOV64mr;
-  } else if (RC == &X86::GR32RegClass) {
+  } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
     Opc = X86::MOV32mr;
   } else if (RC == &X86::GR16RegClass) {
     Opc = X86::MOV16mr;
@@ -1865,7 +1839,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
       Opc = X86::MOV8mr_NOREX;
     else
       Opc = X86::MOV8mr;
-  } else if (RC == &X86::GR64_NOREXRegClass) {
+  } else if (RC == &X86::GR64_NOREXRegClass ||
+             RC == &X86::GR64_NOREX_NOSPRegClass) {
     Opc = X86::MOV64mr;
   } else if (RC == &X86::GR32_NOREXRegClass) {
     Opc = X86::MOV32mr;
@@ -1889,8 +1864,7 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
   } else if (RC == &X86::VR64RegClass) {
     Opc = X86::MMX_MOVQ64mr;
   } else {
-    assert(0 && "Unknown regclass");
-    abort();
+    llvm_unreachable("Unknown regclass");
   }
 
   return Opc;
@@ -1914,6 +1888,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
                                   bool isKill,
                                   SmallVectorImpl<MachineOperand> &Addr,
                                   const TargetRegisterClass *RC,
+                                  MachineInstr::mmo_iterator MMOBegin,
+                                  MachineInstr::mmo_iterator MMOEnd,
                                   SmallVectorImpl<MachineInstr*> &NewMIs) const {
   bool isAligned = (RI.getStackAlignment() >= 16) ||
     RI.needsStackRealignment(MF);
@@ -1923,6 +1899,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.addOperand(Addr[i]);
   MIB.addReg(SrcReg, getKillRegState(isKill));
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
   NewMIs.push_back(MIB);
 }
 
@@ -1931,9 +1908,9 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
                                  bool isStackAligned,
                                  const TargetMachine &TM) {
   unsigned Opc = 0;
-  if (RC == &X86::GR64RegClass) {
+  if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
     Opc = X86::MOV64rm;
-  } else if (RC == &X86::GR32RegClass) {
+  } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
     Opc = X86::MOV32rm;
   } else if (RC == &X86::GR16RegClass) {
     Opc = X86::MOV16rm;
@@ -1958,7 +1935,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
       Opc = X86::MOV8rm_NOREX;
     else
       Opc = X86::MOV8rm;
-  } else if (RC == &X86::GR64_NOREXRegClass) {
+  } else if (RC == &X86::GR64_NOREXRegClass ||
+             RC == &X86::GR64_NOREX_NOSPRegClass) {
     Opc = X86::MOV64rm;
   } else if (RC == &X86::GR32_NOREXRegClass) {
     Opc = X86::MOV32rm;
@@ -1982,8 +1960,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
   } else if (RC == &X86::VR64RegClass) {
     Opc = X86::MMX_MOVQ64rm;
   } else {
-    assert(0 && "Unknown regclass");
-    abort();
+    llvm_unreachable("Unknown regclass");
   }
 
   return Opc;
@@ -2005,6 +1982,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                  SmallVectorImpl<MachineOperand> &Addr,
                                  const TargetRegisterClass *RC,
+                                 MachineInstr::mmo_iterator MMOBegin,
+                                 MachineInstr::mmo_iterator MMOEnd,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
   bool isAligned = (RI.getStackAlignment() >= 16) ||
     RI.needsStackRealignment(MF);
@@ -2013,6 +1992,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.addOperand(Addr[i]);
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
   NewMIs.push_back(MIB);
 }
 
@@ -2026,9 +2006,11 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+  bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
   unsigned SlotSize = is64Bit ? 8 : 4;
 
   MachineFunction &MF = *MBB.getParent();
+  unsigned FPReg = RI.getFrameRegister(MF);
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   unsigned CalleeFrameSize = 0;
   
@@ -2038,10 +2020,12 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
-    if (RegClass != &X86::VR128RegClass) {
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+      continue;
+    if (RegClass != &X86::VR128RegClass && !isWin64) {
       CalleeFrameSize += SlotSize;
-      BuildMI(MBB, MI, DL, get(Opc))
-        .addReg(Reg, RegState::Kill);
+      BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
     } else {
       storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
     }
@@ -2060,13 +2044,18 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
+  MachineFunction &MF = *MBB.getParent();
+  unsigned FPReg = RI.getFrameRegister(MF);
   bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-
+  bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
   unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+      continue;
     const TargetRegisterClass *RegClass = CSI[i].getRegClass();
-    if (RegClass != &X86::VR128RegClass) {
+    if (RegClass != &X86::VR128RegClass && !isWin64) {
       BuildMI(MBB, MI, DL, get(Opc), Reg);
     } else {
       loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
@@ -2143,8 +2132,9 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
 MachineInstr*
 X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                     MachineInstr *MI, unsigned i,
-                                    const SmallVectorImpl<MachineOperand> &MOs) const{
-  const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+                                    const SmallVectorImpl<MachineOperand> &MOs,
+                                    unsigned Size, unsigned Align) const {
+  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
   bool isTwoAddrFold = false;
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
@@ -2165,8 +2155,6 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
     else if (MI->getOpcode() == X86::MOV32r0)
       NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
-    else if (MI->getOpcode() == X86::MOV64r0)
-      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
     else if (MI->getOpcode() == X86::MOV8r0)
       NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
     if (NewMI)
@@ -2182,60 +2170,82 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   // If table selected...
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, unsigned>::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
       OpcodeTablePtr->find((unsigned*)MI->getOpcode());
     if (I != OpcodeTablePtr->end()) {
+      unsigned Opcode = I->second.first;
+      unsigned MinAlign = I->second.second;
+      if (Align < MinAlign)
+        return NULL;
+      bool NarrowToMOV32rm = false;
+      if (Size) {
+        unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+        if (Size < RCSize) {
+          // Check if it's safe to fold the load. If the size of the object is
+          // narrower than the load width, then it's not.
+          if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+            return NULL;
+          // If this is a 64-bit load, but the spill slot is 32, then we can do
+          // a 32-bit load which is implicitly zero-extended. This likely is due
+          // to liveintervalanalysis remat'ing a load from stack slot.
+          if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+            return NULL;
+          Opcode = X86::MOV32rm;
+          NarrowToMOV32rm = true;
+        }
+      }
+
       if (isTwoAddrFold)
-        NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this);
+        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
       else
-        NewMI = FuseInst(MF, I->second, i, MOs, MI, *this);
+        NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+      if (NarrowToMOV32rm) {
+        // If this is the special case where we use a MOV32rm to load a 32-bit
+        // value and zero-extend the top bits. Change the destination register
+        // to a 32-bit one.
+        unsigned DstReg = NewMI->getOperand(0).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+          NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+                                                   4/*x86_subreg_32bit*/));
+        else
+          NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+      }
       return NewMI;
     }
   }
   
   // No fusion 
   if (PrintFailedFusing)
-    cerr << "We failed to fuse operand " << i << " in " << *MI;
+    errs() << "We failed to fuse operand " << i << " in " << *MI;
   return NULL;
 }
 
 
 MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
-                                                  const SmallVectorImpl<unsigned> &Ops,
+                                           const SmallVectorImpl<unsigned> &Ops,
                                                   int FrameIndex) const {
   // Check switch flag 
   if (NoFusing) return NULL;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned Size = MFI->getObjectSize(FrameIndex);
   unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
-  // FIXME: Move alignment requirement into tables?
-  if (Alignment < 16) {
-    switch (MI->getOpcode()) {
-    default: break;
-    // Not always safe to fold movsd into these instructions since their load
-    // folding variants expects the address to be 16 byte aligned.
-    case X86::FsANDNPDrr:
-    case X86::FsANDNPSrr:
-    case X86::FsANDPDrr:
-    case X86::FsANDPSrr:
-    case X86::FsORPDrr:
-    case X86::FsORPSrr:
-    case X86::FsXORPDrr:
-    case X86::FsXORPSrr:
-      return NULL;
-    }
-  }
-
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
     unsigned NewOpc = 0;
+    unsigned RCSize = 0;
     switch (MI->getOpcode()) {
     default: return NULL;
-    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
-    case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
-    case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
-    case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+    case X86::TEST8rr:  NewOpc = X86::CMP8ri; RCSize = 1; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
     }
+    // Check if it's safe to fold the load. If the size of the object is
+    // narrower than the load width, then it's not.
+    if (Size < RCSize)
+      return NULL;
     // Change to CMPXXri r, 0 first.
     MI->setDesc(get(NewOpc));
     MI->getOperand(1).ChangeToImmediate(0);
@@ -2244,12 +2254,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
   SmallVector<MachineOperand,4> MOs;
   MOs.push_back(MachineOperand::CreateFI(FrameIndex));
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
 }
 
 MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
-                                            const SmallVectorImpl<unsigned> &Ops,
+                                           const SmallVectorImpl<unsigned> &Ops,
                                                   MachineInstr *LoadMI) const {
   // Check switch flag 
   if (NoFusing) return NULL;
@@ -2257,26 +2267,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   // Determine the alignment of the load.
   unsigned Alignment = 0;
   if (LoadMI->hasOneMemOperand())
-    Alignment = LoadMI->memoperands_begin()->getAlignment();
-
-  // FIXME: Move alignment requirement into tables?
-  if (Alignment < 16) {
-    switch (MI->getOpcode()) {
-    default: break;
-    // Not always safe to fold movsd into these instructions since their load
-    // folding variants expects the address to be 16 byte aligned.
-    case X86::FsANDNPDrr:
-    case X86::FsANDNPSrr:
-    case X86::FsANDPDrr:
-    case X86::FsANDPSrr:
-    case X86::FsORPDrr:
-    case X86::FsORPSrr:
-    case X86::FsXORPDrr:
-    case X86::FsXORPSrr:
-      return NULL;
+    Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+  else
+    switch (LoadMI->getOpcode()) {
+    case X86::V_SET0:
+    case X86::V_SETALLONES:
+      Alignment = 16;
+      break;
+    case X86::FsFLD0SD:
+      Alignment = 8;
+      break;
+    case X86::FsFLD0SS:
+      Alignment = 4;
+      break;
+    default:
+      llvm_unreachable("Don't know how to fold this instruction!");
     }
-  }
-
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
     unsigned NewOpc = 0;
     switch (MI->getOpcode()) {
@@ -2293,28 +2299,40 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     return NULL;
 
   SmallVector<MachineOperand,X86AddrNumOperands> MOs;
-  if (LoadMI->getOpcode() == X86::V_SET0 ||
-      LoadMI->getOpcode() == X86::V_SETALLONES) {
+  switch (LoadMI->getOpcode()) {
+  case X86::V_SET0:
+  case X86::V_SETALLONES:
+  case X86::FsFLD0SD:
+  case X86::FsFLD0SS: {
     // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
     // Create a constant-pool entry and operands to load from it.
 
     // x86-32 PIC requires a PIC base register for constant pools.
     unsigned PICBase = 0;
-    if (TM.getRelocationModel() == Reloc::PIC_ &&
-        !TM.getSubtarget<X86Subtarget>().is64Bit())
-      // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
-      // This doesn't work for several reasons.
-      // 1. GlobalBaseReg may have been spilled.
-      // 2. It may not be live at MI.
-      return false;
+    if (TM.getRelocationModel() == Reloc::PIC_) {
+      if (TM.getSubtarget<X86Subtarget>().is64Bit())
+        PICBase = X86::RIP;
+      else
+        // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+        // This doesn't work for several reasons.
+        // 1. GlobalBaseReg may have been spilled.
+        // 2. It may not be live at MI.
+        return NULL;
+    }
 
-    // Create a v4i32 constant-pool entry.
+    // Create a constant-pool entry.
     MachineConstantPool &MCP = *MF.getConstantPool();
-    const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
-    Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
-                    ConstantVector::getNullValue(Ty) :
-                    ConstantVector::getAllOnesValue(Ty);
-    unsigned CPI = MCP.getConstantPoolIndex(C, 16);
+    const Type *Ty;
+    if (LoadMI->getOpcode() == X86::FsFLD0SS)
+      Ty = Type::getFloatTy(MF.getFunction()->getContext());
+    else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+      Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+    else
+      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+    Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+                    Constant::getAllOnesValue(Ty) :
+                    Constant::getNullValue(Ty);
+    unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
 
     // Create operands to load from the constant pool entry.
     MOs.push_back(MachineOperand::CreateReg(PICBase, false));
@@ -2322,13 +2340,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     MOs.push_back(MachineOperand::CreateReg(0, false));
     MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
     MOs.push_back(MachineOperand::CreateReg(0, false));
-  } else {
+    break;
+  }
+  default: {
     // Folding a normal load. Just copy the load's address operands.
     unsigned NumOps = LoadMI->getDesc().getNumOperands();
     for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
       MOs.push_back(LoadMI->getOperand(i));
+    break;
+  }
   }
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
 }
 
 
@@ -2360,15 +2382,14 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
   // replacing the *two* registers with the memory location.
-  const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
   if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
   } else if (OpNum == 0) { // If operand 0
     switch (Opc) {
+    case X86::MOV8r0:
     case X86::MOV16r0:
     case X86::MOV32r0:
-    case X86::MOV64r0:
-    case X86::MOV8r0:
       return true;
     default: break;
     }
@@ -2381,7 +2402,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, unsigned>::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
       OpcodeTablePtr->find((unsigned*)Opc);
     if (I != OpcodeTablePtr->end())
       return true;
@@ -2410,8 +2431,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   const TargetInstrDesc &TID = get(Opc);
   const TargetOperandInfo &TOI = TID.OpInfo[Index];
-  const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
-    ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+  const TargetRegisterClass *RC = TOI.getRegClass(&RI);
   SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
   SmallVector<MachineOperand,2> BeforeOps;
   SmallVector<MachineOperand,2> AfterOps;
@@ -2430,7 +2450,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   // Emit the load instruction.
   if (UnfoldLoad) {
-    loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(MI->memoperands_begin(),
+                            MI->memoperands_end());
+    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
     if (UnfoldStore) {
       // Address operands cannot be marked isKill.
       for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
@@ -2489,10 +2513,12 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   // Emit the store instruction.
   if (UnfoldStore) {
-    const TargetOperandInfo &DstTOI = TID.OpInfo[0];
-    const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass()
-      ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
-    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs);
+    const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(MI->memoperands_begin(),
+                             MI->memoperands_end());
+    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
   }
 
   return true;
@@ -2513,9 +2539,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   bool FoldedLoad = I->second.second & (1 << 4);
   bool FoldedStore = I->second.second & (1 << 5);
   const TargetInstrDesc &TID = get(Opc);
-  const TargetOperandInfo &TOI = TID.OpInfo[Index];
-  const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
-    ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+  const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
   unsigned NumDefs = TID.NumDefs;
   std::vector<SDValue> AddrOps;
   std::vector<SDValue> BeforeOps;
@@ -2536,35 +2560,40 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
 
   // Emit the load instruction.
   SDNode *Load = 0;
-  const MachineFunction &MF = DAG.getMachineFunction();
+  MachineFunction &MF = DAG.getMachineFunction();
   if (FoldedLoad) {
-    MVT VT = *RC->vt_begin();
+    EVT VT = *RC->vt_begin();
     bool isAligned = (RI.getStackAlignment() >= 16) ||
       RI.needsStackRealignment(MF);
-    Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
-                             VT, MVT::Other, &AddrOps[0], AddrOps.size());
+    Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
     NewNodes.push_back(Load);
+
+    // Preserve memory reference information.
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                            cast<MachineSDNode>(N)->memoperands_end());
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
   }
 
   // Emit the data processing instruction.
-  std::vector<MVT> VTs;
+  std::vector<EVT> VTs;
   const TargetRegisterClass *DstRC = 0;
   if (TID.getNumDefs() > 0) {
-    const TargetOperandInfo &DstTOI = TID.OpInfo[0];
-    DstRC = DstTOI.isLookupPtrRegClass()
-      ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
+    DstRC = TID.OpInfo[0].getRegClass(&RI);
     VTs.push_back(*DstRC->vt_begin());
   }
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
-    MVT VT = N->getValueType(i);
+    EVT VT = N->getValueType(i);
     if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
       VTs.push_back(VT);
   }
   if (Load)
     BeforeOps.push_back(SDValue(Load, 0));
   std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
-  SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0],
-                                     BeforeOps.size());
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+                                      BeforeOps.size());
   NewNodes.push_back(NewNode);
 
   // Emit the store instruction.
@@ -2574,11 +2603,18 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     AddrOps.push_back(Chain);
     bool isAligned = (RI.getStackAlignment() >= 16) ||
       RI.needsStackRealignment(MF);
-    SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC,
-                                                        isAligned, TM),
-                                      dl, MVT::Other,
-                                      &AddrOps[0], AddrOps.size());
+    SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+                                                         isAligned, TM),
+                                       dl, MVT::Other,
+                                       &AddrOps[0], AddrOps.size());
     NewNodes.push_back(Store);
+
+    // Preserve memory reference information.
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                             cast<MachineSDNode>(N)->memoperands_end());
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
   }
 
   return true;
@@ -2644,7 +2680,7 @@ unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
   case X86II::Imm16:  return 2;
   case X86II::Imm32:  return 4;
   case X86II::Imm64:  return 8;
-  default: assert(0 && "Immediate size not set!");
+  default: llvm_unreachable("Immediate size not set!");
     return 0;
   }
 }
@@ -2829,7 +2865,7 @@ static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
   } else if (RelocOp->isJTI()) {
     FinalSize += sizeJumpTableAddress(false);
   } else {
-    assert(0 && "Unknown value to relocate!");
+    llvm_unreachable("Unknown value to relocate!");
   }
   return FinalSize;
 }
@@ -2926,7 +2962,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
   case X86II::GS:
    ++FinalSize;
    break;
-  default: assert(0 && "Invalid segment!");
+  default: llvm_unreachable("Invalid segment!");
   case 0: break;  // No segment override!
   }
 
@@ -2946,6 +2982,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
   case X86II::TA:  // 0F 3A
     Need0FPrefix = true;
     break;
+  case X86II::TF: // F2 0F 38
+    ++FinalSize;
+    Need0FPrefix = true;
+    break;
   case X86II::REP: break; // already handled.
   case X86II::XS:   // F3 0F
     ++FinalSize;
@@ -2959,7 +2999,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
   case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
     ++FinalSize;
     break; // Two-byte opcode prefix
-  default: assert(0 && "Invalid prefix!");
+  default: llvm_unreachable("Invalid prefix!");
   case 0: break;  // No prefix!
   }
 
@@ -2981,6 +3021,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
   case X86II::TA:  // 0F 3A
     ++FinalSize;
     break;
+  case X86II::TF: // F2 0F 38
+    ++FinalSize;
+    break;
   }
 
   // If this is a two-address instruction, skip one of the register operands.
@@ -2993,7 +3036,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     --NumOps;
 
   switch (Desc->TSFlags & X86II::FormMask) {
-  default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+  default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
   case X86II::Pseudo:
     // Remember the current PC offset, this is the PIC relocation
     // base address.
@@ -3002,16 +3045,16 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
       break;
     case TargetInstrInfo::INLINEASM: {
       const MachineFunction *MF = MI.getParent()->getParent();
-      const char *AsmStr = MI.getOperand(0).getSymbolName();
-      const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo();
-      FinalSize += AI->getInlineAsmLength(AsmStr);
+      const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+      FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
+                                          *MF->getTarget().getMCAsmInfo());
       break;
     }
     case TargetInstrInfo::DBG_LABEL:
     case TargetInstrInfo::EH_LABEL:
       break;
     case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::DECLARE:
+    case TargetInstrInfo::KILL:
     case X86::DWARF_LOC:
     case X86::FP_REG_KILL:
       break;
@@ -3038,7 +3081,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
       } else if (MO.isImm()) {
         FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
       } else {
-        assert(0 && "Unknown RawFrm operand!");
+        llvm_unreachable("Unknown RawFrm operand!");
       }
     }
     break;
@@ -3196,10 +3239,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
   }
 
   if (!Desc->isVariadic() && CurOp != NumOps) {
-    cerr << "Cannot determine size: ";
-    MI.dump();
-    cerr << '\n';
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Cannot determine size: " << MI;
+    llvm_report_error(Msg.str());
   }
   
 
@@ -3209,7 +3252,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
 
 unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   const TargetInstrDesc &Desc = MI->getDesc();
-  bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
+  bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
   bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
   unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
   if (Desc.getOpcode() == X86::MOVPC32r)
@@ -3245,12 +3288,11 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   
   // If we're using vanilla 'GOT' PIC style, we should use relative addressing
   // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
-  if (TM.getRelocationModel() == Reloc::PIC_ &&
-      TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+  if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
     GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
     // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
     BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
-      .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0,
+      .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
                                     X86II::MO_GOT_ABSOLUTE_ADDRESS);
   } else {
     GlobalBaseReg = PC;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 83f01945ea21..2237c8be517a 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -69,35 +69,36 @@ namespace X86 {
 /// instruction info tracks.
 ///
 namespace X86II {
-  enum {
+  /// Target Operand Flag enum.
+  enum TOF {
     //===------------------------------------------------------------------===//
     // X86 Specific MachineOperand flags.
     
-    MO_NO_FLAG = 0,
+    MO_NO_FLAG,
     
     /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
     /// relocation of:
     ///    SYMBOL_LABEL + [. - PICBASELABEL]
-    MO_GOT_ABSOLUTE_ADDRESS = 1,
+    MO_GOT_ABSOLUTE_ADDRESS,
     
     /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
     /// immediate should get the value of the symbol minus the PIC base label:
     ///    SYMBOL_LABEL - PICBASELABEL
-    MO_PIC_BASE_OFFSET = 2,
+    MO_PIC_BASE_OFFSET,
 
     /// MO_GOT - On a symbol operand this indicates that the immediate is the
     /// offset to the GOT entry for the symbol name from the base of the GOT.
     ///
     /// See the X86-64 ELF ABI supplement for more details. 
     ///    SYMBOL_LABEL @GOT
-    MO_GOT = 3,
+    MO_GOT,
     
     /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
     /// the offset to the location of the symbol name from the base of the GOT. 
     ///
     /// See the X86-64 ELF ABI supplement for more details. 
     ///    SYMBOL_LABEL @GOTOFF
-    MO_GOTOFF = 4,
+    MO_GOTOFF,
     
     /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
     /// offset to the GOT entry for the symbol name from the current code
@@ -105,50 +106,115 @@ namespace X86II {
     ///
     /// See the X86-64 ELF ABI supplement for more details. 
     ///    SYMBOL_LABEL @GOTPCREL
-    MO_GOTPCREL = 5,
+    MO_GOTPCREL,
     
     /// MO_PLT - On a symbol operand this indicates that the immediate is
     /// offset to the PLT entry of symbol name from the current code location. 
     ///
     /// See the X86-64 ELF ABI supplement for more details. 
     ///    SYMBOL_LABEL @PLT
-    MO_PLT = 6,
+    MO_PLT,
     
     /// MO_TLSGD - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
     /// See 'ELF Handling for Thread-Local Storage' for more details. 
     ///    SYMBOL_LABEL @TLSGD
-    MO_TLSGD = 7,
+    MO_TLSGD,
     
     /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
     /// See 'ELF Handling for Thread-Local Storage' for more details. 
     ///    SYMBOL_LABEL @GOTTPOFF
-    MO_GOTTPOFF = 8,
+    MO_GOTTPOFF,
    
     /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
     /// See 'ELF Handling for Thread-Local Storage' for more details. 
     ///    SYMBOL_LABEL @INDNTPOFF
-    MO_INDNTPOFF = 9,
+    MO_INDNTPOFF,
     
     /// MO_TPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
     /// See 'ELF Handling for Thread-Local Storage' for more details. 
     ///    SYMBOL_LABEL @TPOFF
-    MO_TPOFF = 10,
+    MO_TPOFF,
     
     /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
     /// See 'ELF Handling for Thread-Local Storage' for more details. 
     ///    SYMBOL_LABEL @NTPOFF
-    MO_NTPOFF = 11,
+    MO_NTPOFF,
+    
+    /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "__imp_FOO" symbol.  This is used for
+    /// dllimport linkage on windows.
+    MO_DLLIMPORT,
+    
+    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
+    /// and jumps to external functions on Tiger and before.
+    MO_DARWIN_STUB,
     
+    /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+    /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+    MO_DARWIN_NONLAZY,
+
+    /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+    /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+    /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+    MO_DARWIN_NONLAZY_PIC_BASE,
+    
+    /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+    /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+    /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+    /// stub.
+    MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE
+  };
+}
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+  switch (TargetFlag) {
+  case X86II::MO_DLLIMPORT: // dllimport stub.
+  case X86II::MO_GOTPCREL:  // rip-relative GOT reference.
+  case X86II::MO_GOT:       // normal GOT reference.
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:        // Normal $non_lazy_ptr ref.
+  case X86II::MO_DARWIN_NONLAZY:                 // Normal $non_lazy_ptr ref.
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg).  If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+  switch (TargetFlag) {
+  case X86II::MO_GOTOFF:                         // isPICStyleGOT: local global.
+  case X86II::MO_GOT:                            // isPICStyleGOT: other global.
+  case X86II::MO_PIC_BASE_OFFSET:                // Darwin local global.
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:        // Darwin/32 external global.
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+    return true;
+  default:
+    return false;
+  }
+}
+ 
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+  enum {
     //===------------------------------------------------------------------===//
     // Instruction encodings.  These are the standard/most common forms for X86
     // instructions.
@@ -249,6 +315,9 @@ namespace X86II {
 
     // T8, TA - Prefix after the 0x0F prefix.
     T8 = 13 << Op0Shift,  TA = 14 << Op0Shift,
+    
+    // TF - Prefix before and after 0x0F
+    TF = 15 << Op0Shift,
 
     //===------------------------------------------------------------------===//
     // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -355,10 +424,10 @@ class X86InstrInfo : public TargetInstrInfoImpl {
   /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
   /// RegOp2MemOpTable2 - Load / store folding opcode maps.
   ///
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable2Addr;
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable0;
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable1;
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable2;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
   
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
@@ -382,11 +451,11 @@ public:
   unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
   unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
 
-  bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const;
+  bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const;
   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                     unsigned DestReg, const MachineInstr *Orig) const;
-
-  bool isInvariantLoad(const MachineInstr *MI) const;
+                     unsigned DestReg, unsigned SubIdx,
+                     const MachineInstr *Orig) const;
 
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
@@ -430,6 +499,8 @@ public:
   virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
                               SmallVectorImpl<MachineOperand> &Addr,
                               const TargetRegisterClass *RC,
+                              MachineInstr::mmo_iterator MMOBegin,
+                              MachineInstr::mmo_iterator MMOEnd,
                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
 
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -440,6 +511,8 @@ public:
   virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                SmallVectorImpl<MachineOperand> &Addr,
                                const TargetRegisterClass *RC,
+                               MachineInstr::mmo_iterator MMOBegin,
+                               MachineInstr::mmo_iterator MMOEnd,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
   
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
@@ -530,9 +603,10 @@ public:
 
 private:
   MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      unsigned OpNum,
-                                      const SmallVectorImpl<MachineOperand> &MOs) const;
+                                     MachineInstr* MI,
+                                     unsigned OpNum,
+                                     const SmallVectorImpl<MachineOperand> &MOs,
+                                     unsigned Size, unsigned Alignment) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 03df10db61c5..30b57d85d012 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -56,6 +56,10 @@ def SDT_X86CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
 
 def SDT_X86Call   : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
 
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+                                                         SDTCisVT<1, iPTR>,
+                                                         SDTCisVT<2, iPTR>]>;
+
 def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
 
 def SDTX86RdTsc   : SDTypeProfile<0, 0, []>;
@@ -114,6 +118,11 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
                         [SDNPHasChain, SDNPOptInFlag]>;
 
+def X86vastart_save_xmm_regs :
+                 SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+                        SDT_X86VASTART_SAVE_XMM_REGS,
+                        [SDNPHasChain]>;
+
 def X86callseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
                         [SDNPHasChain, SDNPOutFlag]>;
@@ -124,9 +133,6 @@ def X86callseq_end :
 def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
 
-def X86tailcall: SDNode<"X86ISD::TAILCALL",     SDT_X86Call,
-                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
-
 def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
 def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
@@ -156,6 +162,9 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
 def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
 def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
 def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
+def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags>;
+def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags>;
+def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags>;
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
@@ -167,57 +176,80 @@ def i32imm_pcrel : Operand<i32> {
   let PrintMethod = "print_pcrel_imm";
 }
 
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
 
 // *mem - Operand definitions for the funky X86 addressing mode operands.
 //
+def X86MemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let SuperClass = ?;
+}
 class X86MemOperand<string printMethod> : Operand<iPTR> {
   let PrintMethod = printMethod;
-  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+
 def i8mem   : X86MemOperand<"printi8mem">;
 def i16mem  : X86MemOperand<"printi16mem">;
 def i32mem  : X86MemOperand<"printi32mem">;
 def i64mem  : X86MemOperand<"printi64mem">;
 def i128mem : X86MemOperand<"printi128mem">;
-def i256mem : X86MemOperand<"printi256mem">;
+//def i256mem : X86MemOperand<"printi256mem">;
 def f32mem  : X86MemOperand<"printf32mem">;
 def f64mem  : X86MemOperand<"printf64mem">;
 def f80mem  : X86MemOperand<"printf80mem">;
 def f128mem : X86MemOperand<"printf128mem">;
-def f256mem : X86MemOperand<"printf256mem">;
+//def f256mem : X86MemOperand<"printf256mem">;
 
 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
 // plain GR64, so that it doesn't potentially require a REX prefix.
 def i8mem_NOREX : Operand<i64> {
   let PrintMethod = "printi8mem";
-  let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
+  let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 
 def lea32mem : Operand<i32> {
   let PrintMethod = "printlea32mem";
-  let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 
 def SSECC : Operand<i8> {
   let PrintMethod = "printSSECC";
 }
 
-def piclabel: Operand<i32> {
-  let PrintMethod = "printPICLabel";
+def ImmSExt8AsmOperand : AsmOperandClass {
+  let Name = "ImmSExt8";
+  let SuperClass = ImmAsmOperand;
 }
 
 // A couple of more descriptive operand definitions.
 // 16-bits but only 8 bits are significant.
-def i16i8imm  : Operand<i16>;
+def i16i8imm  : Operand<i16> {
+  let ParserMatchClass = ImmSExt8AsmOperand;
+}
 // 32-bits but only 8 bits are significant.
-def i32i8imm  : Operand<i32>;
+def i32i8imm  : Operand<i32> {
+  let ParserMatchClass = ImmSExt8AsmOperand;
+}
 
 // Branch targets have OtherVT type and print as pc-relative values.
 def brtarget : Operand<OtherVT> {
   let PrintMethod = "print_pcrel_imm";
 }
 
+def brtarget8 : Operand<OtherVT> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
 //
@@ -225,7 +257,8 @@ def brtarget : Operand<OtherVT> {
 // Define X86 specific addressing mode.
 def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
 def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
-                               [add, sub, mul, shl, or, frameindex], []>;
+                               [add, sub, mul, X86mul_imm, shl, or, frameindex],
+                               []>;
 def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
@@ -246,8 +279,14 @@ def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def In32BitMode  : Predicate<"!Subtarget->is64Bit()">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">;
+def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
-def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
+def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData      : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+                             "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+                             "TM.getCodeModel() == CodeModel::Kernel">;
 def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
@@ -484,15 +523,35 @@ def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
                           Requires<[In32BitMode]>;
 }
 
+// x86-64 va_start lowering magic.
+let usesCustomDAGSchedInserter = 1 in
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+                              (outs),
+                              (ins GR8:$al,
+                                   i64imm:$regsavefi, i64imm:$offset,
+                                   variable_ops),
+                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+                              [(X86vastart_save_xmm_regs GR8:$al,
+                                                         imm:$regsavefi,
+                                                         imm:$offset)]>;
+
 // Nop
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1 in {
   def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+  def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+                "nopl\t$zero", []>, TB;
+}
 
-// PIC base
+// Trap
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>;
+def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+
+// PIC base construction.  This expands to code that looks like this:
+//     call  $next_inst
+//     popl %destreg"
 let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
-  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label),
-                      "call\t$label\n\t"
-                      "pop{l}\t$reg", []>;
+  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+                      "", []>;
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions...
@@ -506,7 +565,11 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
                     [(X86retflag 0)]>;
   def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
                     "ret\t$amt",
-                    [(X86retflag imm:$amt)]>;
+                    [(X86retflag timm:$amt)]>;
+  def LRET   : I   <0xCB, RawFrm, (outs), (ins),
+                    "lret", []>;
+  def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lret\t$amt", []>;
 }
 
 // All branches are RawFrm, Void, Branch, and Terminators
@@ -514,8 +577,10 @@ let isBranch = 1, isTerminator = 1 in
   class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
         I<opcode, RawFrm, (outs), ins, asm, pattern>;
 
-let isBranch = 1, isBarrier = 1 in
+let isBranch = 1, isBarrier = 1 in {
   def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
+  def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>;
+}
 
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -523,10 +588,42 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                      [(brind GR32:$dst)]>;
   def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
                      [(brind (loadi32 addr:$dst))]>;
+                     
+  def FARJMP16i  : Iseg16<0xEA, RawFrm, (outs), 
+                          (ins i16imm:$seg, i16imm:$off),
+                          "ljmp{w}\t$seg, $off", []>, OpSize;
+  def FARJMP32i  : Iseg32<0xEA, RawFrm, (outs),
+                          (ins i16imm:$seg, i32imm:$off),
+                          "ljmp{l}\t$seg, $off", []>;                     
+
+  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), 
+                     "ljmp{w}\t{*}$dst", []>, OpSize;
+  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+                     "ljmp{l}\t{*}$dst", []>;
 }
 
 // Conditional branches
 let Uses = [EFLAGS] in {
+// Short conditional jumps
+def JO8   : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>;
+def JNO8  : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>;
+def JB8   : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>;
+def JAE8  : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>;
+def JE8   : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>;
+def JNE8  : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>;
+def JBE8  : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>;
+def JA8   : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>;
+def JS8   : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>;
+def JNS8  : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>;
+def JP8   : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>;
+def JNP8  : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>;
+def JL8   : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>;
+def JGE8  : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>;
+def JLE8  : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>;
+def JG8   : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>;
+
+def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>;
+
 def JE  : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
               [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
 def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
@@ -563,6 +660,12 @@ def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
               [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
 } // Uses = [EFLAGS]
 
+// Loop instructions
+
+def LOOP   : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>;
+def LOOPE  : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>;
+def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>;
+
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
 //
@@ -583,13 +686,26 @@ let isCall = 1 in
                         "call\t{*}$dst", [(X86call GR32:$dst)]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
                         "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
+  
+    def FARCALL16i  : Iseg16<0x9A, RawFrm, (outs), 
+                             (ins i16imm:$seg, i16imm:$off),
+                             "lcall{w}\t$seg, $off", []>, OpSize;
+    def FARCALL32i  : Iseg32<0x9A, RawFrm, (outs),
+                             (ins i16imm:$seg, i32imm:$off),
+                             "lcall{l}\t$seg, $off", []>;
+                             
+    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+                        "lcall{w}\t{*}$dst", []>, OpSize;
+    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+                        "lcall{l}\t{*}$dst", []>;
   }
 
-// Tail call stuff.
+// Constructing a stack frame.
+
+def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
+              "enter\t$len, $lvl", []>;
 
-def TAILCALL : I<0, Pseudo, (outs), (ins),
-                         "#TAILCALL",
-                         []>;
+// Tail call stuff.
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
 def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops),
@@ -620,11 +736,29 @@ def LEAVE    : I<0xC9, RawFrm,
                  (outs), (ins), "leave", []>;
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in
-def POP32r   : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+let mayLoad = 1 in {
+def POP16r  : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+  OpSize;
+def POP32r  : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+  OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
+  OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+}
 
-let mayStore = 1 in
+let mayStore = 1 in {
+def PUSH16r  : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+  OpSize;
 def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+  OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+  OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+}
 }
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
@@ -710,6 +844,14 @@ let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in
 def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
                   [(X86rep_stos i32)]>, REP;
 
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+
 let Defs = [RAX, RDX] in
 def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
             TB;
@@ -718,6 +860,18 @@ let isBarrier = 1, hasCtrlDep = 1 in {
 def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
 }
 
+def SYSCALL  : I<0x05, RawFrm,
+                 (outs), (ins), "syscall", []>, TB;
+def SYSRET   : I<0x07, RawFrm,
+                 (outs), (ins), "sysret", []>, TB;
+def SYSENTER : I<0x34, RawFrm,
+                 (outs), (ins), "sysenter", []>, TB;
+def SYSEXIT  : I<0x35, RawFrm,
+                 (outs), (ins), "sysexit", []>, TB;
+
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+
+
 //===----------------------------------------------------------------------===//
 //  Input/Output Instructions...
 //
@@ -793,6 +947,30 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(store (i32 imm:$src), addr:$dst)]>;
 
+def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins i8imm:$src),
+                   "mov{b}\t{$src, %al|%al, $src}", []>;
+def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins i16imm:$src),
+                      "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins i32imm:$src),
+                      "mov{l}\t{$src, %eax|%eax, $src}", []>;
+
+def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs i8imm:$dst), (ins),
+                   "mov{b}\t{%al, $dst|$dst, %al}", []>;
+def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs i16imm:$dst), (ins),
+                      "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs i32imm:$dst), (ins),
+                      "mov{l}\t{%eax, $dst|$dst, %eax}", []>;
+
+// Moves to and from segment registers
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+
 let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
 def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}",
@@ -950,6 +1128,20 @@ let isTwoAddress = 1 in {
 
 // Conditional moves
 let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customDAGSchedInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomDAGSchedInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in
+def CMOV_GR8 : I<0, Pseudo,
+                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+                 "#CMOV_GR8 PSEUDO!",
+                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+                                          imm:$cond, EFLAGS))]>;
+
 let isCommutable = 1 in {
 def CMOVB16rr : I<0x42, MRMSrcReg,       // if <u, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
@@ -1549,6 +1741,14 @@ let isTwoAddress = 0 in {
                      "and{l}\t{$src, $dst|$dst, $src}",
                 [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
                  (implicit EFLAGS)]>;
+
+  def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
+                   "and{b}\t{$src, %al|%al, $src}", []>;
+  def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
+                      "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
+                      "and{l}\t{$src, %eax|%eax, $src}", []>;
+
 }
 
 
@@ -1635,6 +1835,13 @@ let isTwoAddress = 0 in {
                  "or{l}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
                   (implicit EFLAGS)]>;
+                  
+  def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
+                   "or{b}\t{$src, %al|%al, $src}", []>;
+  def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
+                      "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
+                      "or{l}\t{$src, %eax|%eax, $src}", []>;
 } // isTwoAddress = 0
 
 
@@ -1744,6 +1951,13 @@ let isTwoAddress = 0 in {
                      "xor{l}\t{$src, $dst|$dst, $src}",
                  [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
                   (implicit EFLAGS)]>;
+                  
+  def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
+                   "xor{b}\t{$src, %al|%al, $src}", []>;
+  def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src),
+                        "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src),
+                        "xor{l}\t{$src, %eax|%eax, $src}", []>;
 } // isTwoAddress = 0
 } // Defs = [EFLAGS]
 
@@ -1771,8 +1985,17 @@ def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
 def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "shl{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
-// cheaper.
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper.
+
+def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shl{b}\t$dst", []>;
+def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t$dst", []>, OpSize;
+def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t$dst", []>;
+
 } // isConvertibleToThreeAddress = 1
 
 let isTwoAddress = 0 in {
@@ -1951,6 +2174,97 @@ let isTwoAddress = 0 in {
 }
 
 // Rotate instructions
+
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+               "rcl{b}\t{1, $dst|$dst, 1}", []>;
+def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
+               "rcl{b}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+  
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
+                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+let Uses = [CL] in {
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+}
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+                "rcl{l}\t{1, $dst|$dst, 1}", []>;
+def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
+                "rcl{l}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+                  
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+               "rcr{b}\t{1, $dst|$dst, 1}", []>;
+def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
+               "rcr{b}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+  
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
+                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+let Uses = [CL] in {
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+}
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+                "rcr{l}\t{1, $dst|$dst, 1}", []>;
+def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
+                "rcr{l}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
@@ -2228,6 +2542,15 @@ def ADD32rm  : I<0x03, MRMSrcMem, (outs GR32:$dst),
                  "add{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
                   (implicit EFLAGS)]>;
+                  
+// Register-Register Addition - Equivalent to the normal rr forms (ADD8rr, 
+//   ADD16rr, and ADD32rr), but differently encoded.
+def ADD8mrmrr: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+                 "add{b}\t{$src2, $dst|$dst, $src2}", []>;
+def ADD16mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
+                  "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
+def ADD32mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
+                  "add{l}\t{$src2, $dst|$dst, $src2}", []>;
 
 // Register-Integer Addition
 def ADD8ri    : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
@@ -2295,6 +2618,14 @@ let isTwoAddress = 0 in {
                   [(store (add (load addr:$dst), i32immSExt8:$src2),
                                addr:$dst),
                    (implicit EFLAGS)]>;
+
+  // addition to rAX
+  def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
+                   "add{b}\t{$src, %al|%al, $src}", []>;
+  def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
+                      "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
+                      "add{l}\t{$src, %eax|%eax, $src}", []>;
 }
 
 let Uses = [EFLAGS] in {
@@ -2373,6 +2704,13 @@ let isTwoAddress = 0 in {
   def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
                      "adc{l}\t{$src2, $dst|$dst, $src2}",
                [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+
+  def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
+                   "adc{b}\t{$src, %al|%al, $src}", []>;
+  def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
+                      "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
+                      "adc{l}\t{$src, %eax|%eax, $src}", []>;
 }
 } // Uses = [EFLAGS]
 
@@ -2472,6 +2810,13 @@ let isTwoAddress = 0 in {
                      [(store (sub (load addr:$dst), i32immSExt8:$src2),
                              addr:$dst),
                       (implicit EFLAGS)]>;
+                      
+  def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
+                   "sub{b}\t{$src, %al|%al, $src}", []>;
+  def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
+                      "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+                      "sub{l}\t{$src, %eax|%eax, $src}", []>;
 }
 
 let Uses = [EFLAGS] in {
@@ -2516,6 +2861,13 @@ let isTwoAddress = 0 in {
   def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
                      "sbb{l}\t{$src2, $dst|$dst, $src2}",
                [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+               
+  def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
+                   "sbb{b}\t{$src, %al|%al, $src}", []>;
+  def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
+                      "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+  def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+                      "sbb{l}\t{$src, %eax|%eax, $src}", []>;
 }
 def SBB8rm   : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
                     "sbb{b}\t{$src2, $dst|$dst, $src2}",
@@ -2647,6 +2999,13 @@ def TEST32rr : I<0x85, MRMDestReg, (outs),  (ins GR32:$src1, GR32:$src2),
                       (implicit EFLAGS)]>;
 }
 
+def TEST8i8  : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
+                   "test{b}\t{$src, %al|%al, $src}", []>;
+def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
+                     "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
+                     "test{l}\t{$src, %eax|%eax, $src}", []>;
+
 def TEST8rm  : I<0x84, MRMSrcMem, (outs),  (ins GR8 :$src1, i8mem :$src2),
                      "test{b}\t{$src2, $src1|$src1, $src2}",
                      [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0),
@@ -2878,6 +3237,13 @@ def SETNOm   : I<0x91, MRM0m,
 
 // Integer comparisons
 let Defs = [EFLAGS] in {
+def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
+                 "cmp{b}\t{$src, %al|%al, $src}", []>;
+def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
+                    "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+                    "cmp{l}\t{$src, %eax|%eax, $src}", []>;
+
 def CMP8rr  : I<0x38, MRMDestReg,
                 (outs), (ins GR8 :$src1, GR8 :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
@@ -2920,6 +3286,12 @@ def CMP32rm : I<0x3B, MRMSrcMem,
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR32:$src1, (loadi32 addr:$src2)),
                  (implicit EFLAGS)]>;
+def CMP8mrmrr : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
+                  "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
+def CMP16mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
+                   "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
+def CMP32mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
+                   "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
 def CMP8ri  : Ii8<0x80, MRM7r,
                   (outs), (ins GR8:$src1, i8imm:$src2),
                   "cmp{b}\t{$src2, $src1|$src1, $src2}",
@@ -3095,7 +3467,8 @@ let neverHasSideEffects = 1 in {
 
 // Alias instructions that map movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in {
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
                  "xor{b}\t$dst, $dst",
                  [(set GR8:$dst, 0)]>;
@@ -3127,12 +3500,12 @@ def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
                   [(X86tlsaddr tls32addr:$sym)]>,
                   Requires<[In32BitMode]>;
 
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
 def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                    "movl\t%gs:$src, $dst",
                    [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
 
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
 def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                    "movl\t%fs:$src, $dst",
                    [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
@@ -3143,7 +3516,7 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 
 def DWARF_LOC   : I<0, Pseudo, (outs),
                     (ins i32imm:$line, i32imm:$col, i32imm:$file),
-                    ".loc\t${file:debug} ${line:debug} ${col:debug}",
+                    ".loc\t$file $line $col",
                     [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
                       (i32 imm:$file))]>;
 
@@ -3151,7 +3524,7 @@ def DWARF_LOC   : I<0, Pseudo, (outs),
 // EH Pseudo Instructions
 //
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1 in {
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
                     "ret\t#eh_return, addr: $addr",
                     [(X86ehret GR32:$addr)]>;
@@ -3223,6 +3596,78 @@ def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
                 TB, LOCK;
 }
 
+// Optimized codegen when the non-memory output is not used.
+// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
+def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+                    "lock\n\t"
+                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    "lock\n\t"
+                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+                    "lock\n\t"
+                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+                    "lock\n\t"
+                     "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+                    "lock\n\t"
+                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "inc{l}\t$dst", []>, LOCK;
+
+def LOCK_SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+                    "lock\n\t"
+                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    "lock\n\t"
+                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
+                    "lock\n\t"
+                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
+                    "lock\n\t"
+                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
+                    "lock\n\t"
+                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
+                    "lock\n\t"
+                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
+                    "lock\n\t"
+                     "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+                    "lock\n\t"
+                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "dec{l}\t$dst", []>, LOCK;
+
 // Atomic exchange, and, or, xor
 let Constraints = "$val = $dst", Defs = [EFLAGS],
                   usesCustomDAGSchedInserter = 1 in {
@@ -3318,6 +3763,25 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
                "#ATOMSWAP6432 PSEUDO!", []>;
 }
 
+// Segmentation support instructions.
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), 
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+// String manipulation instructions
+
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lodsd", []>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
@@ -3345,14 +3809,6 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
 
 // Calls
 // tailcall stuff
-def : Pat<(X86tailcall GR32:$dst),
-          (TAILCALL)>;
-
-def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
-          (TAILCALL)>;
-def : Pat<(X86tailcall (i32 texternalsym:$dst)),
-          (TAILCALL)>;
-
 def : Pat<(X86tcret GR32:$dst, imm:$off),
           (TCRETURNri GR32:$dst, imm:$off)>;
 
@@ -3362,6 +3818,7 @@ def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
 def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
           (TCRETURNdi texternalsym:$dst, imm:$off)>;
 
+// Normal calls, with various flavors of addresses.
 def : Pat<(X86call (i32 tglobaladdr:$dst)),
           (CALLpcrel32 tglobaladdr:$dst)>;
 def : Pat<(X86call (i32 texternalsym:$dst)),
@@ -3472,21 +3929,17 @@ def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
 
 // extload bool -> extload byte
 def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>,
-         Requires<[In32BitMode]>;
+def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
 def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>,
-         Requires<[In32BitMode]>;
+def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
 def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
 def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
 
-// anyext
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>,
-         Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>,
-         Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext GR16:$src)),
-          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
+def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
 
 // (and (i32 load), 255) -> (zextload i8)
 def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
@@ -3567,6 +4020,10 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
                                       x86_subreg_8bit_hi))>,
       Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+                                      x86_subreg_8bit_hi))>,
+      Requires<[In32BitMode]>;
 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
           (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
                                       x86_subreg_8bit_hi))>,
@@ -3961,6 +4418,243 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
                     (implicit EFLAGS)),
           (DEC32m addr:$dst)>, Requires<[In32BitMode]>;
 
+// Register-Register Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2),
+                    (implicit EFLAGS)),
+          (OR8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2),
+                    (implicit EFLAGS)),
+          (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2),
+                    (implicit EFLAGS)),
+          (OR32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)),
+                    (implicit EFLAGS)),
+          (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)),
+                    (implicit EFLAGS)),
+          (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)),
+                    (implicit EFLAGS)),
+          (OR32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (OR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Or with EFLAGS result
+def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Or with EFLAGS result
+def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (OR32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2),
+                    (implicit EFLAGS)),
+          (XOR8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2),
+                    (implicit EFLAGS)),
+          (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2),
+                    (implicit EFLAGS)),
+          (XOR32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)),
+                    (implicit EFLAGS)),
+          (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)),
+                    (implicit EFLAGS)),
+          (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)),
+                    (implicit EFLAGS)),
+          (XOR32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register XOr with EFLAGS result
+def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer XOr with EFLAGS result
+def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (XOR32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2),
+                    (implicit EFLAGS)),
+          (AND8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2),
+                    (implicit EFLAGS)),
+          (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2),
+                    (implicit EFLAGS)),
+          (AND32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)),
+                    (implicit EFLAGS)),
+          (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)),
+                    (implicit EFLAGS)),
+          (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)),
+                    (implicit EFLAGS)),
+          (AND32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register And with EFLAGS result
+def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer And with EFLAGS result
+def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+                           addr:$dst),
+                    (implicit EFLAGS)),
+          (AND32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// -disable-16bit support.
+def : Pat<(truncstorei16 (i32 imm:$src), addr:$dst),
+          (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(truncstorei16 GR32:$src, addr:$dst),
+          (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+def : Pat<(i32 (sextloadi16 addr:$dst)),
+          (MOVSX32rm16 addr:$dst)>;
+def : Pat<(i32 (zextloadi16 addr:$dst)),
+          (MOVZX32rm16 addr:$dst)>;
+def : Pat<(i32 (extloadi16 addr:$dst)),
+          (MOVZX32rm16 addr:$dst)>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Stack Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index b79a00643324..ce76b4e8b11e 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,10 +1,10 @@
 //====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 MMX instruction set, defining the instructions,
@@ -67,16 +67,18 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
 // MMX Multiclasses
 //===----------------------------------------------------------------------===//
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
   // MMXI_binop_rm - Simple MMX binary operator.
   multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            ValueType OpVT, bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+				  (ins VR64:$src1, VR64:$src2),
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
       let isCommutable = Commutable;
     }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+				  (ins VR64:$src1, i64mem:$src2),
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
                                          (bitconvert
@@ -85,12 +87,14 @@ let isTwoAddress = 1 in {
 
   multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                                bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+				  (ins VR64:$src1, VR64:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
       let isCommutable = Commutable;
     }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+				  (ins VR64:$src1, i64mem:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))]>;
@@ -139,8 +143,10 @@ let isTwoAddress = 1 in {
 // MMX EMMS & FEMMS Instructions
 //===----------------------------------------------------------------------===//
 
-def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",  [(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+						  [(int_x86_mmx_emms)]>;
+def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
+						  [(int_x86_mmx_femms)]>;
 
 //===----------------------------------------------------------------------===//
 // MMX Scalar Instructions
@@ -149,12 +155,14 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]
 // Data Transfer Instructions
 def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
-                        [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>;
+                        [(set VR64:$dst,
+		   	  (v2i32 (scalar_to_vector GR32:$src)))]>;
 let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
-              [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
-let mayStore = 1 in 
+              [(set VR64:$dst,
+		(v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
 
@@ -164,9 +172,16 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              []>;
 
 let neverHasSideEffects = 1 in
-def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg,
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
                                "movd\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                            "movd\t{$src, $dst|$dst, $src}",
+                            [(set VR64:$dst,
+			      (v1i64 (scalar_to_vector GR64:$src)))]>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
@@ -179,21 +194,21 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(store (v1i64 VR64:$src), addr:$dst)]>;
 
-def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                           "movdq2q\t{$src, $dst|$dst, $src}",
                           [(set VR64:$dst,
                             (v1i64 (bitconvert
                             (i64 (vector_extract (v2i64 VR128:$src),
                                   (iPTR 0))))))]>;
 
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
                            "movq2dq\t{$src, $dst|$dst, $src}",
           [(set VR128:$dst,
             (movl immAllZerosV,
                   (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
 
 let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
                            "movq2dq\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
@@ -207,7 +222,8 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
               [(set VR64:$dst,
                     (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
 let AddedComplexity = 20 in
-def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+					     (ins i32mem:$src),
                              "movd\t{$src, $dst|$dst, $src}",
           [(set VR64:$dst,
                 (v2i32 (X86vzmovl (v2i32
@@ -265,7 +281,7 @@ defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
 defm MMX_POR  : MMXI_binop_rm_v1i64<0xEB, "por" , or,  1>;
 defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
   def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
                          (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
                          "pandn\t{$src2, $dst|$dst, $src2}",
@@ -316,33 +332,33 @@ defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
 // Conversion Instructions
 
 // -- Unpack Instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
   // Unpack High Packed Data Instructions
-  def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, 
+  def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
                              "punpckhbw\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, 
+  def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
                              "punpckhbw\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v8i8 (mmx_unpckh VR64:$src1,
                                       (bc_v8i8 (load_mmx addr:$src2)))))]>;
 
-  def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, 
+  def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
                              "punpckhwd\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, 
+  def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
                              "punpckhwd\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v4i16 (mmx_unpckh VR64:$src1,
                                        (bc_v4i16 (load_mmx addr:$src2)))))]>;
 
-  def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, 
+  def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
                              "punpckhdq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
@@ -379,12 +395,12 @@ let isTwoAddress = 1 in {
                                (v4i16 (mmx_unpckl VR64:$src1,
                                        (bc_v4i16 (load_mmx addr:$src2)))))]>;
 
-  def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, 
+  def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
                              "punpckldq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, 
+  def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
                              "punpckldq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
@@ -415,19 +431,22 @@ let neverHasSideEffects = 1 in {
 def MMX_CVTPD2PIrr  : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                             "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
 let mayLoad = 1 in
-def MMX_CVTPD2PIrm  : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+def MMX_CVTPD2PIrm  : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
+					     (ins f128mem:$src),
                             "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTPI2PDrr  : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
                             "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
 let mayLoad = 1 in
-def MMX_CVTPI2PDrm  : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+def MMX_CVTPI2PDrm  : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
+	  				     (ins i64mem:$src),
                             "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTPI2PSrr  : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
                            "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
 let mayLoad = 1 in
-def MMX_CVTPI2PSrm  : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+def MMX_CVTPI2PSrm  : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
+					    (ins i64mem:$src),
                            "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTPS2PIrr  : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
@@ -439,7 +458,8 @@ def MMX_CVTPS2PIrm  : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
 def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                             "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
 let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
+					     (ins f128mem:$src),
                             "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
@@ -459,14 +479,16 @@ def MMX_PEXTRWri  : MMXIi8<0xC5, MRMSrcReg,
                            "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                            [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
                                              (iPTR imm:$src2)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
   def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
-                      (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3),
+                      (outs VR64:$dst), (ins VR64:$src1, GR32:$src2,
+					     i16i8imm:$src3),
                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
-                                               GR32:$src2, (iPTR imm:$src3))))]>;
+                                               GR32:$src2,(iPTR imm:$src3))))]>;
   def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
-                     (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+                     (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2,
+					    i16i8imm:$src3),
                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set VR64:$dst,
                        (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
@@ -494,7 +516,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
 //===----------------------------------------------------------------------===//
 
 // Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
   def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
                               "pxor\t$dst, $dst",
                               [(set VR64:$dst, (v2i32 immAllZerosV))]>;
@@ -579,7 +601,7 @@ def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
 
 let AddedComplexity = 20 in {
   def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>; 
+            (MMX_MOVZDI2PDIrm addr:$src)>;
 }
 
 // Clear top half.
@@ -657,6 +679,33 @@ def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
                                                   (iPTR 0))))),
           (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
 
+// Patterns for vector comparisons
+def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
+          (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+          (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
+def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
+          (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+          (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
+def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
+          (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+          (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
+
+def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
+          (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+          (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
+def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
+          (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+          (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
+def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
+          (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+          (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
+
 // CMOV* - Used to implement the SELECT DAG operation.  Expanded by the
 // scheduler into a branch sequence.
 // These are expanded by the scheduler.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 5d6ef36414a5..96fc932fc88c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1,10 +1,10 @@
 //====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 SSE instruction set, defining the instructions,
@@ -36,22 +36,22 @@ def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
 def X86fsrl    : SDNode<"X86ISD::FSRL",      SDTX86FPShiftOp>;
 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
-def X86pshufb  : SDNode<"X86ISD::PSHUFB", 
+def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
 def X86pextrw  : SDNode<"X86ISD::PEXTRW",
                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
-def X86pinsrb  : SDNode<"X86ISD::PINSRB", 
+def X86pinsrb  : SDNode<"X86ISD::PINSRB",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86pinsrw  : SDNode<"X86ISD::PINSRW", 
+def X86pinsrw  : SDNode<"X86ISD::PINSRW",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
                                       SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86insrtps : SDNode<"X86ISD::INSERTPS", 
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
-                                      SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
+                                      SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
@@ -69,6 +69,10 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
 def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
 def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
 
+def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>,
+					  SDTCisVT<1, v4f32>]>;
+def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
 //===----------------------------------------------------------------------===//
@@ -83,11 +87,13 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
 
 def ssmem : Operand<v4f32> {
   let PrintMethod = "printf32mem";
-  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 def sdmem : Operand<v2f64> {
   let PrintMethod = "printf64mem";
-  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
 }
 
 //===----------------------------------------------------------------------===//
@@ -179,13 +185,13 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
   return getI8Imm(X86::getShuffleSHUFImmediate(N));
 }]>;
 
-// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to 
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
 // PSHUFHW imm.
 def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
   return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
 }]>;
 
-// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to 
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
 // PSHUFLW imm.
 def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
   return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
@@ -360,25 +366,25 @@ def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                          [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
 def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
                          "cvtps2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvtps2pi 
+                         [(set VR64:$dst, (int_x86_sse_cvtps2pi
                                            (load addr:$src)))]>;
 def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                          "cvttps2pi\t{$src, $dst|$dst, $src}",
                          [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
 def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
                          "cvttps2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvttps2pi 
+                         [(set VR64:$dst, (int_x86_sse_cvttps2pi
                                            (load addr:$src)))]>;
 let Constraints = "$src1 = $dst" in {
-  def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, 
+  def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
                            (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
                         "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
                                            VR64:$src2))]>;
-  def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, 
+  def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
                            (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
                         "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, 
+                        [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
                                             (load addr:$src2)))]>;
 }
 
@@ -407,11 +413,11 @@ let Constraints = "$src1 = $dst" in {
 
 // Comparison instructions
 let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
-  def CMPSSrr : SSIi8<0xC2, MRMSrcReg, 
+  def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
                     (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
                     "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
 let mayLoad = 1 in
-  def CMPSSrm : SSIi8<0xC2, MRMSrcMem, 
+  def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
                     (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
                     "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
 }
@@ -428,13 +434,15 @@ def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
 
 // Aliases to match intrinsics which expect XMM operand(s).
 let Constraints = "$src1 = $dst" in {
-  def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, 
-                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+  def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src,
+					        SSECC:$cc),
                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
-                                           VR128:$src, imm:$cc))]>;
-  def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, 
-                        (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
+                                           	VR128:$src, imm:$cc))]>;
+  def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, f32mem:$src,
+						SSECC:$cc),
                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
                                            (load addr:$src), imm:$cc))]>;
@@ -460,18 +468,19 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                        (implicit EFLAGS)]>;
 } // Defs = [EFLAGS]
 
-// Aliases of packed SSE1 instructions for scalar use. These all have names that
-// start with 'Fs'.
+// Aliases of packed SSE1 instructions for scalar use. These all have names
+// that start with 'Fs'.
 
 // Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+    canFoldAsLoad = 1 in
 def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
                  "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
                Requires<[HasSSE1]>, TB, OpSize;
 
 // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
 // disregarded.
-let neverHasSideEffects = 1 in 
+let neverHasSideEffects = 1 in
 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                      "movaps\t{$src, $dst|$dst, $src}", []>;
 
@@ -552,7 +561,7 @@ multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
                                  (ins FR32:$src1, f32mem:$src2),
                  !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                  [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-                 
+
   // Vector operation, reg+reg.
   def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
                                  (ins VR128:$src1, VR128:$src2),
@@ -616,7 +625,7 @@ multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
                                  (ins FR32:$src1, f32mem:$src2),
                  !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                  [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-                 
+
   // Vector operation, reg+reg.
   def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
                                  (ins VR128:$src1, VR128:$src2),
@@ -671,7 +680,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
 // SSE packed FP Instructions
 
 // Move Instructions
-let neverHasSideEffects = 1 in 
+let neverHasSideEffects = 1 in
 def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}", []>;
 let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
@@ -708,13 +717,13 @@ let Constraints = "$src1 = $dst" in {
     def MOVLPSrm : PSI<0x12, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                        "movlps\t{$src2, $dst|$dst, $src2}",
-       [(set VR128:$dst, 
+       [(set VR128:$dst,
          (movlp VR128:$src1,
                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
     def MOVHPSrm : PSI<0x16, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                        "movhps\t{$src2, $dst|$dst, $src2}",
-       [(set VR128:$dst, 
+       [(set VR128:$dst,
          (movhp VR128:$src1,
                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
   } // AddedComplexity
@@ -789,7 +798,7 @@ multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
   def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
-                 
+
   // Vector operation, reg.
   def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
               !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
@@ -890,12 +899,12 @@ let Constraints = "$src1 = $dst" in {
 }
 
 let Constraints = "$src1 = $dst" in {
-  def CMPPSrri : PSIi8<0xC2, MRMSrcReg, 
+  def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
                     "cmp${cc}ps\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                                         VR128:$src, imm:$cc))]>;
-  def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, 
+  def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
                   (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
                   "cmp${cc}ps\t{$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
@@ -909,13 +918,13 @@ def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
 // Shuffle and unpack instructions
 let Constraints = "$src1 = $dst" in {
   let isConvertibleToThreeAddress = 1 in // Convert to pshufd
-    def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, 
+    def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
                           (outs VR128:$dst), (ins VR128:$src1,
                            VR128:$src2, i8imm:$src3),
                           "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                           [(set VR128:$dst,
                             (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
-  def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, 
+  def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1,
                          f128mem:$src2, i8imm:$src3),
                         "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -924,24 +933,24 @@ let Constraints = "$src1 = $dst" in {
                                   VR128:$src1, (memopv4f32 addr:$src2))))]>;
 
   let AddedComplexity = 10 in {
-    def UNPCKHPSrr : PSI<0x15, MRMSrcReg, 
+    def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                          "unpckhps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
-    def UNPCKHPSrm : PSI<0x15, MRMSrcMem, 
+    def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
                          "unpckhps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (unpckh VR128:$src1,
                                           (memopv4f32 addr:$src2))))]>;
 
-    def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
+    def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                          "unpcklps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
-    def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
+    def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
                          "unpcklps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
@@ -984,7 +993,8 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
 // Alias instructions that map zero vector to pxor / xorp* for sse.
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1 in
 def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
                  "xorps\t$dst, $dst",
                  [(set VR128:$dst, (v4i32 immAllZerosV))]>;
@@ -1046,14 +1056,14 @@ let AddedComplexity = 20 in
 def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
                       "movss\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
-                                                      (loadf32 addr:$src))))))]>;
+                                                    (loadf32 addr:$src))))))]>;
 
 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
           (MOVZSS2PSrm addr:$src)>;
 
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 // SSE2 Instructions
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 // Move Instructions
 let neverHasSideEffects = 1 in
@@ -1077,7 +1087,7 @@ def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround FR64:$src))]>;
-def CVTSD2SSrm  : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 
+def CVTSD2SSrm  : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
 def CVTSI2SDrr  : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
@@ -1087,6 +1097,27 @@ def CVTSI2SDrm  : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
                       "cvtsi2sd\t{$src, $dst|$dst, $src}",
                       [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
 
+def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
+def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+                  "comisd\t{$src2, $src1|$src1, $src2}", []>;
+def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+                      "comisd\t{$src2, $src1|$src1, $src2}", []>;
+
 // SSE2 instructions with XS prefix
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1112,21 +1143,21 @@ def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                          [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
 def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
                          "cvtpd2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvtpd2pi 
+                         [(set VR64:$dst, (int_x86_sse_cvtpd2pi
                                            (memop addr:$src)))]>;
 def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                          "cvttpd2pi\t{$src, $dst|$dst, $src}",
                          [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
 def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
                          "cvttpd2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvttpd2pi 
+                         [(set VR64:$dst, (int_x86_sse_cvttpd2pi
                                            (memop addr:$src)))]>;
 def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
                          "cvtpi2pd\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
 def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                          "cvtpi2pd\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst, (int_x86_sse_cvtpi2pd 
+                         [(set VR128:$dst, (int_x86_sse_cvtpi2pd
                                             (load addr:$src)))]>;
 
 // Aliases for intrinsics
@@ -1141,11 +1172,11 @@ def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
 
 // Comparison instructions
 let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
-  def CMPSDrr : SDIi8<0xC2, MRMSrcReg, 
+  def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
                     (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
                     "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
 let mayLoad = 1 in
-  def CMPSDrm : SDIi8<0xC2, MRMSrcMem, 
+  def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
                     (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
                     "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
 }
@@ -1162,13 +1193,15 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
 
 // Aliases to match intrinsics which expect XMM operand(s).
 let Constraints = "$src1 = $dst" in {
-  def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, 
-                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+  def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src,
+						SSECC:$cc),
                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
                                            VR128:$src, imm:$cc))]>;
-  def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, 
-                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
+  def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src,
+						SSECC:$cc),
                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
                                            (load addr:$src), imm:$cc))]>;
@@ -1194,11 +1227,12 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                        (implicit EFLAGS)]>;
 } // Defs = [EFLAGS]
 
-// Aliases of packed SSE2 instructions for scalar use. These all have names that
-// start with 'Fs'.
+// Aliases of packed SSE2 instructions for scalar use. These all have names
+// that start with 'Fs'.
 
 // Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+    canFoldAsLoad = 1 in
 def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
                  "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
                Requires<[HasSSE2]>, TB, OpSize;
@@ -1286,7 +1320,7 @@ multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
                                  (ins FR64:$src1, f64mem:$src2),
                  !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                  [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-                 
+
   // Vector operation, reg+reg.
   def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
                                  (ins VR128:$src1, VR128:$src2),
@@ -1350,7 +1384,7 @@ multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
                                  (ins FR64:$src1, f64mem:$src2),
                  !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                  [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-                 
+
   // Vector operation, reg+reg.
   def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
                                  (ins VR128:$src1, VR128:$src2),
@@ -1402,7 +1436,7 @@ defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
 defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
                             int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
 
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 // SSE packed FP Instructions
 
 // Move Instructions
@@ -1442,13 +1476,13 @@ let Constraints = "$src1 = $dst" in {
     def MOVLPDrm : PDI<0x12, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                        "movlpd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst, 
+                       [(set VR128:$dst,
                          (v2f64 (movlp VR128:$src1,
                                  (scalar_to_vector (loadf64 addr:$src2)))))]>;
     def MOVHPDrm : PDI<0x16, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                        "movhpd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst, 
+                       [(set VR128:$dst,
                          (v2f64 (movhp VR128:$src1,
                                  (scalar_to_vector (loadf64 addr:$src2)))))]>;
   } // AddedComplexity
@@ -1564,7 +1598,7 @@ def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
                    [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
                                       VR128:$src2))]>;
 def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 
+                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                    "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
                                       (load addr:$src2)))]>;
@@ -1612,7 +1646,7 @@ multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
   def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
-                 
+
   // Vector operation, reg.
   def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
@@ -1712,12 +1746,12 @@ let Constraints = "$src1 = $dst" in {
 }
 
 let Constraints = "$src1 = $dst" in {
-  def CMPPDrri : PDIi8<0xC2, MRMSrcReg, 
+  def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
                     "cmp${cc}pd\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                                         VR128:$src, imm:$cc))]>;
-  def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, 
+  def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
                   (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
                   "cmp${cc}pd\t{$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
@@ -1730,12 +1764,12 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
 
 // Shuffle and unpack instructions
 let Constraints = "$src1 = $dst" in {
-  def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, 
+  def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
                  (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
                  "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                  [(set VR128:$dst,
                    (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
-  def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, 
+  def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1,
                          f128mem:$src2, i8imm:$src3),
                         "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -1744,24 +1778,24 @@ let Constraints = "$src1 = $dst" in {
                                   VR128:$src1, (memopv2f64 addr:$src2))))]>;
 
   let AddedComplexity = 10 in {
-    def UNPCKHPDrr : PDI<0x15, MRMSrcReg, 
+    def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                          "unpckhpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
-    def UNPCKHPDrm : PDI<0x15, MRMSrcMem, 
+    def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
                          "unpckhpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (unpckh VR128:$src1,
                                           (memopv2f64 addr:$src2))))]>;
 
-    def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
+    def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                          "unpcklpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
-    def UNPCKLPDrm : PDI<0x14, MRMSrcMem, 
+    def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
                          "unpcklpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
@@ -1770,7 +1804,7 @@ let Constraints = "$src1 = $dst" in {
 } // Constraints = "$src1 = $dst"
 
 
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 // SSE integer instructions
 
 // Move Instructions
@@ -1825,14 +1859,17 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
 multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                              string OpcodeStr,
                              Intrinsic IntId, Intrinsic IntId2> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+						       VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+						       i128mem:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1,
-                                        (bitconvert (memopv2i64 addr:$src2))))]>;
-  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                                      (bitconvert (memopv2i64 addr:$src2))))]>;
+  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1,
+							i32i8imm:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
 }
@@ -1840,15 +1877,17 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
 /// PDI_binop_rm - Simple SSE2 binary operator.
 multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                         ValueType OpVT, bit Commutable = 0> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+						       VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+						       i128mem:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
-                                       (bitconvert (memopv2i64 addr:$src2)))))]>;
+                                     (bitconvert (memopv2i64 addr:$src2)))))]>;
 }
 
 /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
@@ -1858,14 +1897,17 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 ///
 multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               bit Commutable = 0> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+			       (ins VR128:$src1, VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+			       (ins VR128:$src1, i128mem:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
+               [(set VR128:$dst, (OpNode VR128:$src1,
+					 (memopv2i64 addr:$src2)))]>;
 }
 
 } // Constraints = "$src1 = $dst"
@@ -2029,8 +2071,8 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
                      (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
                      "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set VR128:$dst, (v4i32 (pshufd:$src2
-                                               (bc_v4i32(memopv2i64 addr:$src1)),
-                                               (undef))))]>;
+                                             (bc_v4i32(memopv2i64 addr:$src1)),
+                                             (undef))))]>;
 
 // SSE2 with ImmT == Imm8 and XS prefix.
 def PSHUFHWri : Ii8<0x70, MRMSrcReg,
@@ -2043,8 +2085,8 @@ def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
                     (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
                     "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (pshufhw:$src2
-                                             (bc_v8i16 (memopv2i64 addr:$src1)),
-                                             (undef))))]>,
+                                            (bc_v8i16 (memopv2i64 addr:$src1)),
+                                            (undef))))]>,
                 XS, Requires<[HasSSE2]>;
 
 // SSE2 with ImmT == Imm8 and XD prefix.
@@ -2064,90 +2106,90 @@ def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
 
 
 let Constraints = "$src1 = $dst" in {
-  def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, 
+  def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "punpcklbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, 
+  def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpcklbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (unpckl VR128:$src1,
                                   (bc_v16i8 (memopv2i64 addr:$src2))))]>;
-  def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, 
+  def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "punpcklwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, 
+  def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpcklwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (unpckl VR128:$src1,
                                   (bc_v8i16 (memopv2i64 addr:$src2))))]>;
-  def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, 
+  def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "punpckldq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, 
+  def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpckldq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (unpckl VR128:$src1,
                                   (bc_v4i32 (memopv2i64 addr:$src2))))]>;
-  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, 
+  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                          "punpcklqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, 
+  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
                          (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                          "punpcklqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (unpckl VR128:$src1,
                                          (memopv2i64 addr:$src2))))]>;
-  
-  def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, 
+
+  def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "punpckhbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, 
+  def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpckhbw\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, 
-                          (unpckh VR128:$src1, 
+                        [(set VR128:$dst,
+                          (unpckh VR128:$src1,
                                   (bc_v16i8 (memopv2i64 addr:$src2))))]>;
-  def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, 
+  def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "punpckhwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, 
+  def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpckhwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (unpckh VR128:$src1,
                                   (bc_v8i16 (memopv2i64 addr:$src2))))]>;
-  def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, 
+  def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                         "punpckhdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, 
+  def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpckhdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (unpckh VR128:$src1,
                                   (bc_v4i32 (memopv2i64 addr:$src2))))]>;
-  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, 
+  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                          "punpckhqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, 
+  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
@@ -2172,7 +2214,7 @@ let Constraints = "$src1 = $dst" in {
                        (outs VR128:$dst), (ins VR128:$src1,
                         i16mem:$src2, i32i8imm:$src3),
                        "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                       [(set VR128:$dst, 
+                       [(set VR128:$dst,
                          (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
                                     imm:$src3))]>;
 }
@@ -2202,7 +2244,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
 def MOVNTImr  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                     "movnti\t{$src, $dst|$dst, $src}",
-                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, 
+                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
                   TB, Requires<[HasSSE2]>;
 
 // Flush cache
@@ -2217,17 +2259,18 @@ def MFENCE : I<0xAE, MRM6r, (outs), (ins),
                "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
 
 //TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), 
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
            (i8 0)), (NOOP)>;
 def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
 def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), 
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
            (i8 1)), (MFENCE)>;
 
 // Alias instructions that map zero vector to pxor / xorp* for sse.
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1 in
   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
                          "pcmpeqd\t$dst, $dst",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>;
@@ -2240,7 +2283,7 @@ def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
                         (v2f64 (scalar_to_vector FR64:$src)))]>;
 def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                      "movsd\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst, 
+                     [(set VR128:$dst,
                        (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
 
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
@@ -2399,9 +2442,9 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
             (MOVZPQILo2PQIrm addr:$src)>;
 }
 
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 // SSE3 Instructions
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 // Move Instructions
 def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2525,9 +2568,9 @@ let AddedComplexity = 20 in
   def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
             (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
 
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 // SSSE3 Instructions
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
 multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
@@ -2801,12 +2844,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
 def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
           (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
 
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 // Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
-// extload f32 -> f64.  This matches load+fextend because we have a hack in 
-// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
+// extload f32 -> f64.  This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
 // Since these loads aren't folded into the fextend, we have to match it
 // explicitly here.
 let Predicates = [HasSSE2] in
@@ -2884,12 +2928,12 @@ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
       Requires<[HasSSE2]>;
 // Special unary SHUFPDrri case.
 def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
-          (SHUFPDrri VR128:$src1, VR128:$src1, 
+          (SHUFPDrri VR128:$src1, VR128:$src1,
                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
       Requires<[HasSSE2]>;
 // Special unary SHUFPDrri case.
 def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
-          (SHUFPDrri VR128:$src1, VR128:$src1, 
+          (SHUFPDrri VR128:$src1, VR128:$src1,
                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
       Requires<[HasSSE2]>;
 // Unary v4f32 shuffle with PSHUF* in order to fold a load.
@@ -2899,16 +2943,16 @@ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
 
 // Special binary v4i32 shuffle cases with SHUFPS.
 def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
-          (SHUFPSrri VR128:$src1, VR128:$src2, 
+          (SHUFPSrri VR128:$src1, VR128:$src2,
                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
            Requires<[HasSSE2]>;
 def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
-          (SHUFPSrmi VR128:$src1, addr:$src2, 
+          (SHUFPSrmi VR128:$src1, addr:$src2,
                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
            Requires<[HasSSE2]>;
 // Special binary v2i64 shuffle cases using SHUFPDrri.
 def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
-          (SHUFPDrri VR128:$src1, VR128:$src2, 
+          (SHUFPDrri VR128:$src1, VR128:$src2,
                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
           Requires<[HasSSE2]>;
 
@@ -3030,7 +3074,7 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
 // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
 // fall back to this for SSE1)
 def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
-          (SHUFPSrri VR128:$src2, VR128:$src1, 
+          (SHUFPSrri VR128:$src2, VR128:$src1,
                      (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
 
 // Set lowest element and zero upper elements.
@@ -3097,7 +3141,7 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
           (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
           (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
-          
+
 //===----------------------------------------------------------------------===//
 // SSE4.1 Instructions
 //===----------------------------------------------------------------------===//
@@ -3108,7 +3152,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
                             Intrinsic V2F64Int> {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
-  def PSr_Int : SS4AIi8<opcps, MRMSrcReg, 
+  def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3149,41 +3193,41 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
                             Intrinsic F64Int> {
   // Intrinsic operation, reg.
   def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
-                    (outs VR128:$dst), 
+                    (outs VR128:$dst),
                                  (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
                     !strconcat(OpcodeStr,
                     "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [(set VR128:$dst, 
+                    [(set VR128:$dst,
                             (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
                     OpSize;
 
   // Intrinsic operation, mem.
-  def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 
-                    (outs VR128:$dst), 
+  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+                    (outs VR128:$dst),
                                 (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
-                    !strconcat(OpcodeStr, 
+                    !strconcat(OpcodeStr,
                     "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [(set VR128:$dst, 
+                    [(set VR128:$dst,
                          (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
                     OpSize;
 
   // Intrinsic operation, reg.
   def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
-                    (outs VR128:$dst), 
+                    (outs VR128:$dst),
                             (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
                     !strconcat(OpcodeStr,
                     "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [(set VR128:$dst, 
+                    [(set VR128:$dst,
                             (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
                     OpSize;
 
   // Intrinsic operation, mem.
   def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
-                    (outs VR128:$dst), 
+                    (outs VR128:$dst),
                             (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
                     !strconcat(OpcodeStr,
                     "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [(set VR128:$dst, 
+                    [(set VR128:$dst,
                         (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
                     OpSize;
 }
@@ -3302,9 +3346,9 @@ let Constraints = "$src1 = $dst" in {
                                  Intrinsic IntId128, bit Commutable = 0> {
     def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
-                    !strconcat(OpcodeStr, 
+                    !strconcat(OpcodeStr,
                      "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [(set VR128:$dst, 
+                    [(set VR128:$dst,
                       (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
                     OpSize {
       let isCommutable = Commutable;
@@ -3339,7 +3383,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
   multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
     def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src1, VR128:$src2),
-                    !strconcat(OpcodeStr, 
+                    !strconcat(OpcodeStr,
                      "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
                     [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
                     OpSize;
@@ -3471,13 +3515,13 @@ def : Pat<(int_x86_sse41_pmovzxbq
 multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
                  (ins VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
                  OpSize;
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
                  (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  []>, OpSize;
 // FIXME:
@@ -3492,7 +3536,7 @@ defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
 multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
                  (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  []>, OpSize;
 // FIXME:
@@ -3507,13 +3551,13 @@ defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
 multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
                  (ins VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32:$dst,
                   (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
                  (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
                           addr:$dst)]>, OpSize;
@@ -3527,14 +3571,14 @@ defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
 multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
                  (ins VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32:$dst,
                     (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
            OpSize;
-  def mr : SS4AIi8<opc, MRMDestMem, (outs), 
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
                  (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
-                 !strconcat(OpcodeStr, 
+                 !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
                           addr:$dst)]>, OpSize;
@@ -3553,15 +3597,15 @@ let Constraints = "$src1 = $dst" in {
   multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
-                   !strconcat(OpcodeStr, 
+                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set VR128:$dst, 
+                   [(set VR128:$dst,
                      (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
     def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
                    (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
                    !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set VR128:$dst, 
+                   [(set VR128:$dst,
                      (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
                                 imm:$src3))]>, OpSize;
   }
@@ -3573,16 +3617,16 @@ let Constraints = "$src1 = $dst" in {
   multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
-                   !strconcat(OpcodeStr, 
+                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set VR128:$dst, 
+                   [(set VR128:$dst,
                      (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
                    OpSize;
     def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
                    (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
                    !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set VR128:$dst, 
+                   [(set VR128:$dst,
                      (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
                                        imm:$src3)))]>, OpSize;
   }
@@ -3590,37 +3634,57 @@ let Constraints = "$src1 = $dst" in {
 
 defm PINSRD      : SS41I_insert32<0x22, "pinsrd">;
 
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
 let Constraints = "$src1 = $dst" in {
   multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
-                   (ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
-                   !strconcat(OpcodeStr, 
+                   (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set VR128:$dst, 
-                     (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
+                   [(set VR128:$dst,
+                     (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+		OpSize;
     def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
                    (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
                    !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set VR128:$dst, 
-                     (X86insrtps VR128:$src1, (loadf32 addr:$src2),
+                   [(set VR128:$dst,
+                     (X86insrtps VR128:$src1,
+                                (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
                                  imm:$src3))]>, OpSize;
   }
 }
 
 defm INSERTPS    : SS41I_insertf32<0x21, "insertps">;
 
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+          (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
 let Defs = [EFLAGS] in {
 def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                    "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+                    "ptest \t{$src2, $src1|$src1, $src2}",
+                    [(X86ptest VR128:$src1, VR128:$src2),
+                      (implicit EFLAGS)]>, OpSize;
 def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
-                    "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+                    "ptest \t{$src2, $src1|$src1, $src2}",
+                    [(X86ptest VR128:$src1, (load addr:$src2)),
+                        (implicit EFLAGS)]>, OpSize;
 }
 
 def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
 
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 Instructions
+//===----------------------------------------------------------------------===//
+
 /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
 let Constraints = "$src1 = $dst" in {
   multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
@@ -3647,3 +3711,171 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
           (PCMPGTQrr VR128:$src1, VR128:$src2)>;
 def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
           (PCMPGTQrm VR128:$src1, addr:$src2)>;
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+  def CRC32m8  : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i8mem:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_8 GR32:$src1,
+                         (load addr:$src2)))]>, OpSize;
+  def CRC32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR8:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>,
+                         OpSize;
+  def CRC32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i16mem:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_16 GR32:$src1,
+                         (load addr:$src2)))]>,
+                         OpSize;
+  def CRC32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR16:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
+                         OpSize;
+  def CRC32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i32mem:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32 GR32:$src1,
+                         (load addr:$src2)))]>, OpSize;
+  def CRC32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR32:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>,
+                         OpSize;
+  def CRC64m64  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i64mem:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc32_64 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         OpSize, REX_W;
+  def CRC64r64  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR64:$src2),
+                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
+                         OpSize, REX_W;
+}
+
+// String/text processing instructions.
+let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+			(ins VR128:$src1, VR128:$src2, i8imm:$src3),
+		    "#PCMPISTRM128rr PSEUDO!",
+		    [(set VR128:$dst,
+			(int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+						    imm:$src3))]>, OpSize;
+def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+			(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+		    "#PCMPISTRM128rm PSEUDO!",
+		    [(set VR128:$dst,
+			(int_x86_sse42_pcmpistrm128 VR128:$src1,
+						    (load addr:$src2),
+						    imm:$src3))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+			    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+		     "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+		     []>, OpSize;
+def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+			    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+		     "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+		     []>, OpSize;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX],
+	usesCustomDAGSchedInserter = 1 in {
+def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+			(ins VR128:$src1, VR128:$src3, i8imm:$src5),
+		    "#PCMPESTRM128rr PSEUDO!",
+		    [(set VR128:$dst,
+			(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+						    VR128:$src3,
+						    EDX, imm:$src5))]>, OpSize;
+def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+			(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+		    "#PCMPESTRM128rm PSEUDO!",
+		    [(set VR128:$dst,
+			(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+						    (load addr:$src3),
+						    EDX, imm:$src5))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+			    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+		     "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+		     []>, OpSize;
+def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+			    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+		     "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+		     []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS] in {
+  multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
+    def rr : SS42AI<0x63, MRMSrcReg, (outs),
+		(ins VR128:$src1, VR128:$src2, i8imm:$src3),
+		"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+		[(set ECX,
+		   (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+	         (implicit EFLAGS)]>,
+		OpSize;
+    def rm : SS42AI<0x63, MRMSrcMem, (outs),
+		(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+		"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+		[(set ECX,
+		  (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+		 (implicit EFLAGS)]>,
+		OpSize;
+  }
+}
+
+defm PCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+let Defs = [ECX, EFLAGS] in {
+let Uses = [EAX, EDX] in {
+  multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
+    def rr : SS42AI<0x61, MRMSrcReg, (outs),
+		(ins VR128:$src1, VR128:$src3, i8imm:$src5),
+		"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+		[(set ECX,
+		   (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+	         (implicit EFLAGS)]>,
+		OpSize;
+    def rm : SS42AI<0x61, MRMSrcMem, (outs),
+		(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+		"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+		[(set ECX,
+		  (IntId128 VR128:$src1, EAX, (load addr:$src3),
+		    EDX, imm:$src5)),
+		 (implicit EFLAGS)]>,
+		OpSize;
+  }
+}
+}
+
+defm PCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index f92310607a8e..62ca47ff787a 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -15,15 +15,16 @@
 #include "X86JITInfo.h"
 #include "X86Relocations.h"
 #include "X86Subtarget.h"
+#include "X86TargetMachine.h"
 #include "llvm/Function.h"
-#include "llvm/Config/alloca.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstdlib>
 #include <cstring>
 using namespace llvm;
 
 // Determine the platform we're running on
-#if defined (__x86_64__) || defined (_M_AMD64)
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
 # define X86_64_JIT
 #elif defined(__i386__) || defined(i386) || defined(_M_IX86)
 # define X86_32_JIT
@@ -51,13 +52,6 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
 #define GETASMPREFIX(X) GETASMPREFIX2(X)
 #define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
 
-// Check if building with -fPIC
-#if defined(__PIC__) && __PIC__ && defined(__linux__)
-#define ASMCALLSUFFIX "@PLT"
-#else
-#define ASMCALLSUFFIX
-#endif
-
 // For ELF targets, use a .size and .type directive, to let tools
 // know the extent of functions defined in assembler.
 #if defined(__ELF__)
@@ -130,7 +124,7 @@ extern "C" {
     // JIT callee
     "movq    %rbp, %rdi\n"    // Pass prev frame and return address
     "movq    8(%rbp), %rsi\n"
-    "call    " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
     // Restore all XMM arg registers
     "movaps  112(%rsp), %xmm7\n"
     "movaps  96(%rsp), %xmm6\n"
@@ -206,7 +200,7 @@ extern "C" {
     "movl    4(%ebp), %eax\n" // Pass prev frame and return address
     "movl    %eax, 4(%esp)\n"
     "movl    %ebp, (%esp)\n"
-    "call    " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
     "movl    %ebp, %esp\n"    // Restore ESP
     CFI(".cfi_def_cfa_register %esp\n")
     "subl    $12, %esp\n"
@@ -262,7 +256,7 @@ extern "C" {
     "movl    4(%ebp), %eax\n" // Pass prev frame and return address
     "movl    %eax, 4(%esp)\n"
     "movl    %ebp, (%esp)\n"
-    "call    " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
     "addl    $16, %esp\n"
     "movaps  48(%esp), %xmm3\n"
     CFI(".cfi_restore %xmm3\n")
@@ -321,8 +315,7 @@ extern "C" {
 
 #else // Not an i386 host
   void X86CompilationCallback() {
-    assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n");
-    abort();
+    llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
   }
 #endif
 }
@@ -331,14 +324,21 @@ extern "C" {
 /// function stub when we did not know the real target of a call.  This function
 /// must locate the start of the stub or call site and pass it into the JIT
 /// compiler function.
-extern "C" void ATTRIBUTE_USED
+extern "C" {
+#if !(defined (X86_64_JIT) && defined(_MSC_VER))
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is 
+ // no support for inline assembly
+static
+#endif
+void ATTRIBUTE_USED
 X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
   intptr_t *RetAddrLoc = &StackPtr[1];
   assert(*RetAddrLoc == RetAddr &&
          "Could not find return address on the stack!");
 
   // It's a stub if there is an interrupt marker after the call.
-  bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
+  bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
 
   // The call instruction should have pushed the return value onto the stack...
 #if defined (X86_64_JIT)
@@ -348,10 +348,10 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
 #endif
 
 #if 0
-  DOUT << "In callback! Addr=" << (void*)RetAddr
-       << " ESP=" << (void*)StackPtr
-       << ": Resolving call to function: "
-       << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n";
+  DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr
+               << " ESP=" << (void*)StackPtr
+               << ": Resolving call to function: "
+               << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
 #endif
 
   // Sanity check to make sure this really is a call instruction.
@@ -377,7 +377,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
     // If this is a stub, rewrite the call into an unconditional branch
     // instruction so that two return addresses are not pushed onto the stack
     // when the requested function finally gets called.  This also makes the
-    // 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
+    // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
 #if defined (X86_64_JIT)
     // If the target address is within 32-bit range of the stub, use a
     // PC-relative branch instead of loading the actual address.  (This is
@@ -403,31 +403,26 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
   *RetAddrLoc -= 5;
 #endif
 }
+}
 
 TargetJITInfo::LazyResolverFn
 X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
   JITCompilerFunction = F;
 
 #if defined (X86_32_JIT) && !defined (_MSC_VER)
-  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
-  union {
-    unsigned u[3];
-    char     c[12];
-  } text;
-
-  if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) {
-    // FIXME: support for AMD family of processors.
-    if (memcmp(text.c, "GenuineIntel", 12) == 0) {
-      X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
-      if ((EDX >> 25) & 0x1)
-        return X86CompilationCallback_SSE;
-    }
-  }
+  if (Subtarget->hasSSE1())
+    return X86CompilationCallback_SSE;
 #endif
 
   return X86CompilationCallback;
 }
 
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+  Subtarget = &TM.getSubtarget<X86Subtarget>();
+  useGOT = 0;
+  TLSOffset = 0;
+}
+
 void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
                                              JITCodeEmitter &JCE) {
 #if defined (X86_64_JIT)
@@ -485,7 +480,10 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn,
   JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
 #endif
 
-  JCE.emitByte(0xCD);   // Interrupt - Just a marker identifying the stub!
+  // This used to use 0xCD, but that value is used by JITMemoryManager to
+  // initialize the buffer with garbage, which means it may follow a
+  // noreturn function call, confusing X86CompilationCallback2.  PR 4929.
+  JCE.emitByte(0xCE);   // Interrupt - Just a marker identifying the stub!
   return JCE.finishGVStub(F);
 }
 
@@ -495,9 +493,11 @@ void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub,
   // complains about casting a function pointer to a normal pointer.
   JCE.startGVStub(F, Stub, 5);
   JCE.emitByte(0xE9);
-#if defined (X86_64_JIT)
-  assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) == 
-          ((intptr_t)Fn-JCE.getCurrentPCValue()-5) 
+#if defined (X86_64_JIT) && !defined (NDEBUG)
+  // Yes, we need both of these casts, or some broken versions of GCC (4.2.4)
+  // get the signed-ness of the expression wrong.  Go figure.
+  intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5;
+  assert(((Displacement << 32) >> 32) == Displacement
          && "PIC displacement does not fit in displacement field!");
 #endif
   JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
@@ -538,6 +538,7 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
       break;
     }
     case X86::reloc_absolute_word:
+    case X86::reloc_absolute_word_sext:
       // Absolute relocation, just add the relocated value to the value already
       // in memory.
       *((unsigned*)RelocPos) += (unsigned)ResultPtr;
@@ -554,7 +555,7 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
   TLSOffset -= size;
   return TLSOffset;
 #else
-  assert(0 && "Cannot allocate thread local storage on this arch!\n");
+  llvm_unreachable("Cannot allocate thread local storage on this arch!");
   return 0;
 #endif
 }
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 6a4e2148a5aa..c381433bf357 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -20,16 +20,15 @@
 
 namespace llvm {
   class X86TargetMachine;
+  class X86Subtarget;
 
   class X86JITInfo : public TargetJITInfo {
     X86TargetMachine &TM;
+    const X86Subtarget *Subtarget;
     uintptr_t PICBase;
     char* TLSOffset;
   public:
-    explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) {
-      useGOT = 0;
-      TLSOffset = 0;
-    }
+    explicit X86JITInfo(X86TargetMachine &tm);
 
     /// replaceMachineCodeForFunction - Make it so that calling the function
     /// whose machine code is at OLD turns into a call to NEW, perhaps by
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
new file mode 100644
index 000000000000..9d7e66debb90
--- /dev/null
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -0,0 +1,123 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+  // Note: This numbering has to match the GCC assembler dialects for inline
+  // asm alternatives to work right.
+  ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+  cl::desc("Choose style of code to emit from X86 backend:"),
+  cl::values(clEnumValN(ATT,   "att",   "Emit AT&T-style assembly"),
+             clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+             clEnumValEnd));
+
+
+static const char *const x86_asm_table[] = {
+  "{si}", "S",
+  "{di}", "D",
+  "{ax}", "a",
+  "{cx}", "c",
+  "{memory}", "memory",
+  "{flags}", "",
+  "{dirflag}", "",
+  "{fpsr}", "",
+  "{cc}", "cc",
+  0,0};
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+    
+  bool is64Bit = Triple.getArch() == Triple::x86_64;
+
+  TextAlignFillValue = 0x90;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;       // we can't emit a 64-bit unit
+
+  // Leopard and above support aligned common symbols.
+  COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9;
+
+  CommentString = "##";
+  PCSymbol = ".";
+
+  SupportsDebugInformation = true;
+  DwarfUsesInlineInfoSection = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::Dwarf;
+  AbsoluteEHSectionOffsets = false;
+}
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+  PCSymbol = ".";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+
+  // Debug Information
+  AbsoluteDebugSectionOffsets = true;
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::Dwarf;
+  AbsoluteEHSectionOffsets = false;
+
+  // On Linux we must declare when we can use a non-executable stack.
+  if (Triple.getOS() == Triple::Linux)
+    NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits";
+}
+
+X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+}
+
+
+X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) {
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  GlobalPrefix = "_";
+  CommentString = ";";
+
+  PrivateGlobalPrefix = "$";
+  AlignDirective = "\tALIGN\t";
+  ZeroDirective = "\tdb\t";
+  ZeroDirectiveSuffix = " dup(0)";
+  AsciiDirective = "\tdb\t";
+  AscizDirective = 0;
+  Data8bitsDirective = "\tdb\t";
+  Data16bitsDirective = "\tdw\t";
+  Data32bitsDirective = "\tdd\t";
+  Data64bitsDirective = "\tdq\t";
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = false;
+
+  AlignmentIsInBytes = true;
+}
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
new file mode 100644
index 000000000000..18e2bdbcba91
--- /dev/null
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -0,0 +1,42 @@
+//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+  class Triple;
+
+  struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit X86MCAsmInfoDarwin(const Triple &Triple);
+  };
+
+  struct X86ELFMCAsmInfo : public MCAsmInfo {
+    explicit X86ELFMCAsmInfo(const Triple &Triple);
+  };
+
+  struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
+    explicit X86MCAsmInfoCOFF(const Triple &Triple);
+  };
+
+  struct X86WinMCAsmInfo : public MCAsmInfo {
+    explicit X86WinMCAsmInfo(const Triple &Triple);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a2f319f9a7ab..f03723ae3098 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -30,14 +30,16 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
@@ -54,6 +56,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
   Is64Bit = Subtarget->is64Bit();
   IsWin64 = Subtarget->isTargetWin64();
   StackAlign = TM.getFrameInfo()->getStackAlignment();
+
   if (Is64Bit) {
     SlotSize = 8;
     StackPtr = X86::RSP;
@@ -65,12 +68,12 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
   }
 }
 
-// getDwarfRegNum - This function maps LLVM register identifiers to the
-// Dwarf specific numbering, used in debug info and exception tables.
-
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
 int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
   const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
   unsigned Flavour = DWARFFlavour::X86_64;
+
   if (!Subtarget->is64Bit()) {
     if (Subtarget->isTargetDarwin()) {
       if (isEH)
@@ -88,9 +91,8 @@ int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
   return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
 }
 
-// getX86RegNum - This function maps LLVM register identifiers to their X86
-// specific numbering, which is used in various places encoding instructions.
-//
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
 unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
   switch(RegNo) {
   case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
@@ -146,17 +148,131 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
 
   default:
     assert(isVirtualRegister(RegNo) && "Unknown physical register!");
-    assert(0 && "Register allocator hasn't allocated reg correctly yet!");
+    llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
     return 0;
   }
 }
 
-const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {
-  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
-  if (Subtarget->is64Bit())
-    return &X86::GR64RegClass;
-  else
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                          const TargetRegisterClass *B,
+                                          unsigned SubIdx) const {
+  switch (SubIdx) {
+  default: return 0;
+  case 1:
+    // 8-bit
+    if (B == &X86::GR8RegClass) {
+      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+               A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_ABCDRegClass;
+    } else if (B == &X86::GR8_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_NOREXRegClass;
+      else if (A == &X86::GR32_ABCDRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_NOREXRegClass;
+      else if (A == &X86::GR16_ABCDRegClass)
+        return &X86::GR16_ABCDRegClass;
+    }
+    break;
+  case 2:
+    // 8-bit hi
+    if (B == &X86::GR8_ABCD_HRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+               A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_ABCDRegClass;
+    }
+    break;
+  case 3:
+    // 16-bit
+    if (B == &X86::GR16RegClass) {
+      if (A->getSize() == 4 || A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR16_ABCDRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+    } else if (B == &X86::GR16_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_NOREXRegClass;
+      else if (A == &X86::GR32_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    }
+    break;
+  case 4:
+    // 32-bit
+    if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+      if (A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR32_ABCDRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+    } else if (B == &X86::GR32_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    }
+    break;
+  }
+  return 0;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  switch (Kind) {
+  default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+  case 0: // Normal GPRs.
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64RegClass;
     return &X86::GR32RegClass;
+  case 1: // Normal GRPs except the stack pointer (for encoding reasons).
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64_NOSPRegClass;
+    return &X86::GR32_NOSPRegClass;
+  }
 }
 
 const TargetRegisterClass *
@@ -276,6 +392,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(X86::ESP);
   Reserved.set(X86::SP);
   Reserved.set(X86::SPL);
+
   // Set the frame-pointer register and its aliases as reserved if needed.
   if (hasFP(MF)) {
     Reserved.set(X86::RBP);
@@ -283,10 +400,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(X86::BP);
     Reserved.set(X86::BPL);
   }
-  // Mark the x87 stack registers as reserved, since they don't
-  // behave normally with respect to liveness. We don't fully
-  // model the effects of x87 stack pushes and pops after
-  // stackification.
+
+  // Mark the x87 stack registers as reserved, since they don't behave normally
+  // with respect to liveness. We don't fully model the effects of x87 stack
+  // pushes and pops after stackification.
   Reserved.set(X86::ST0);
   Reserved.set(X86::ST1);
   Reserved.set(X86::ST2);
@@ -304,10 +421,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
   unsigned MaxAlign = 0;
+
   for (int i = FFI->getObjectIndexBegin(),
          e = FFI->getObjectIndexEnd(); i != e; ++i) {
     if (FFI->isDeadObjectIndex(i))
       continue;
+
     unsigned Align = FFI->getObjectAlignment(i);
     MaxAlign = std::max(MaxAlign, Align);
   }
@@ -315,10 +434,9 @@ static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
   return MaxAlign;
 }
 
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
 bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
@@ -335,7 +453,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // FIXME: Currently we don't support stack realignment for functions with
-  // variable-sized allocas
+  //        variable-sized allocas
   return (RealignStack &&
           (MFI->getMaxAlignment() > StackAlign &&
            !MFI->hasVarSizedObjects()));
@@ -345,34 +463,45 @@ bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
+bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+                                           int &FrameIdx) const {
+  if (Reg == FramePtr && hasFP(MF)) {
+    FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+    return true;
+  }
+  return false;
+}
+
 int
 X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
-  int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
-  uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
+  uint64_t StackSize = MFI->getStackSize();
 
   if (needsStackRealignment(MF)) {
-    if (FI < 0)
-      // Skip the saved EBP
+    if (FI < 0) {
+      // Skip the saved EBP.
       Offset += SlotSize;
-    else {
-      unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
+    } else {
+      unsigned Align = MFI->getObjectAlignment(FI);
       assert( (-(Offset + StackSize)) % Align == 0);
       Align = 0;
       return Offset + StackSize;
     }
-
     // FIXME: Support tail calls
   } else {
     if (!hasFP(MF))
       return Offset + StackSize;
 
-    // Skip the saved EBP
+    // Skip the saved EBP.
     Offset += SlotSize;
 
     // Skip the RETADDR move area
     X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-    if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
+    if (TailCallReturnAddrDelta < 0)
+      Offset -= TailCallReturnAddrDelta;
   }
 
   return Offset;
@@ -392,24 +521,29 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
-      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+      Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
 
       MachineInstr *New = 0;
       if (Old->getOpcode() == getCallFrameSetupOpcode()) {
         New = BuildMI(MF, Old->getDebugLoc(),
                       TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
-                      StackPtr).addReg(StackPtr).addImm(Amount);
+                      StackPtr)
+          .addReg(StackPtr)
+          .addImm(Amount);
       } else {
         assert(Old->getOpcode() == getCallFrameDestroyOpcode());
-        // factor out the amount the callee already popped.
+
+        // Factor out the amount the callee already popped.
         uint64_t CalleeAmt = Old->getOperand(1).getImm();
         Amount -= CalleeAmt;
-        if (Amount) {
+  
+      if (Amount) {
           unsigned Opc = (Amount < 128) ?
             (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
             (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
           New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
-            .addReg(StackPtr).addImm(Amount);
+            .addReg(StackPtr)
+            .addImm(Amount);
         }
       }
 
@@ -417,7 +551,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
         // The EFLAGS implicit def is dead.
         New->getOperand(3).setIsDead();
 
-        // Replace the pseudo instruction with a new instruction...
+        // Replace the pseudo instruction with a new instruction.
         MBB.insert(I, New);
       }
     }
@@ -432,10 +566,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       MachineInstr *Old = I;
       MachineInstr *New =
         BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), 
-                StackPtr).addReg(StackPtr).addImm(CalleeAmt);
+                StackPtr)
+          .addReg(StackPtr)
+          .addImm(CalleeAmt);
+
       // The EFLAGS implicit def is dead.
       New->getOperand(3).setIsDead();
-
       MBB.insert(I, New);
     }
   }
@@ -443,21 +579,24 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                          int SPAdj, RegScavenger *RS) const{
+unsigned
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                     int SPAdj, int *Value,
+                                     RegScavenger *RS) const{
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
+
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
 
   int FrameIndex = MI.getOperand(i).getIndex();
-
   unsigned BasePtr;
+
   if (needsStackRealignment(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
   else
@@ -471,34 +610,33 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.getOperand(i+3).isImm()) {
     // Offset is a 32-bit integer.
     int Offset = getFrameIndexOffset(MF, FrameIndex) +
-      (int)(MI.getOperand(i+3).getImm());
+      (int)(MI.getOperand(i + 3).getImm());
   
-     MI.getOperand(i+3).ChangeToImmediate(Offset);
+     MI.getOperand(i + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
     uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
                       (uint64_t)MI.getOperand(i+3).getOffset();
     MI.getOperand(i+3).setOffset(Offset);
   }
+  return 0;
 }
 
 void
 X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                       RegScavenger *RS) const {
-  MachineFrameInfo *FFI = MF.getFrameInfo();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // Calculate and set max stack object alignment early, so we can decide
   // whether we will need stack realignment (and thus FP).
-  unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
-                               calculateMaxStackAlignment(FFI));
+  unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
+                               calculateMaxStackAlignment(MFI));
 
-  FFI->setMaxAlignment(MaxAlign);
-}
+  MFI->setMaxAlignment(MaxAlign);
 
-void
-X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
   if (TailCallReturnAddrDelta < 0) {
     // create RETURNADDR area
     //   arg
@@ -509,18 +647,21 @@ X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
     //     ...
     //   }
     //   [EBP]
-    MF.getFrameInfo()->
-      CreateFixedObject(-TailCallReturnAddrDelta,
-                        (-1*SlotSize)+TailCallReturnAddrDelta);
+    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+                           (-1U*SlotSize)+TailCallReturnAddrDelta);
   }
+
   if (hasFP(MF)) {
     assert((TailCallReturnAddrDelta <= 0) &&
            "The Delta should always be zero or negative");
+    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+
     // Create a frame entry for the EBP register that must be saved.
-    int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
-                                                        (int)SlotSize * -2+
-                                                       TailCallReturnAddrDelta);
-    assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+    int FrameIdx = MFI->CreateFixedObject(SlotSize,
+                                          -(int)SlotSize +
+                                          TFI.getOffsetOfLocalArea() +
+                                          TailCallReturnAddrDelta);
+    assert(FrameIdx == MFI->getObjectIndexBegin() &&
            "Slot for EBP register must be last in order to be found!");
     FrameIdx = 0;
   }
@@ -549,14 +690,14 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
     MachineInstr *MI =
       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-         .addReg(StackPtr).addImm(ThisVal);
-    // The EFLAGS implicit def is dead.
-    MI->getOperand(3).setIsDead();
+        .addReg(StackPtr)
+        .addImm(ThisVal);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
     Offset -= ThisVal;
   }
 }
 
-// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
 static
 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
                       unsigned StackPtr, uint64_t *NumBytes = NULL) {
@@ -579,11 +720,12 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
   }
 }
 
-// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
 static
 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator &MBBI,
                         unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  // FIXME: THIS ISN'T RUN!!!
   return;
 
   if (MBBI == MBB.end()) return;
@@ -610,23 +752,22 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
 }
 
 /// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted
-/// argument and the stack adjustment is returned as a positive value for ADD
-/// and a negative for SUB.
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
 static int mergeSPUpdates(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator &MBBI,
                            unsigned StackPtr,
                            bool doMergeWithPrevious) {
-
   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
       (!doMergeWithPrevious && MBBI == MBB.end()))
     return 0;
 
-  int Offset = 0;
-
   MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
   MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
   unsigned Opc = PI->getOpcode();
+  int Offset = 0;
+
   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
        Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
       PI->getOperand(0).getReg() == StackPtr){
@@ -644,122 +785,116 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
   return Offset;
 }
 
-void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
-                                     unsigned FrameLabelId,
-                                     unsigned ReadyLabelId) const {
+void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
+                                                unsigned LabelId,
+                                                unsigned FramePtr) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
-  if (!MMI)
-    return;
+  if (!MMI) return;
+
+  // Add callee saved registers to move list.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.empty()) return;
 
-  uint64_t StackSize = MFI->getStackSize();
   std::vector<MachineMove> &Moves = MMI->getFrameMoves();
   const TargetData *TD = MF.getTarget().getTargetData();
+  bool HasFP = hasFP(MF);
 
-  // Calculate amount of bytes used for return address storing
+  // Calculate amount of bytes used for return address storing.
   int stackGrowth =
     (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
      TargetFrameInfo::StackGrowsUp ?
      TD->getPointerSize() : -TD->getPointerSize());
 
-  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
-  MachineLocation FPSrc(MachineLocation::VirtualFP);
-  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
-
-  if (StackSize) {
-    // Show update of SP.
-    if (hasFP(MF)) {
-      // Adjust SP
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
-      Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
-    } else {
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP,
-                            -StackSize+stackGrowth);
-      Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
-    }
-  } else {
-    // FIXME: Verify & implement for FP
-    MachineLocation SPDst(StackPtr);
-    MachineLocation SPSrc(StackPtr, stackGrowth);
-    Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
-  }
-
-  // Add callee saved registers to move list.
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
   // FIXME: This is dirty hack. The code itself is pretty mess right now.
   // It should be rewritten from scratch and generalized sometimes.
 
-  // Determine maximum offset (minumum due to stack growth)
+  // Determine maximum offset (minumum due to stack growth).
   int64_t MaxOffset = 0;
-  for (unsigned I = 0, E = CSI.size(); I!=E; ++I)
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I)
     MaxOffset = std::min(MaxOffset,
-                         MFI->getObjectOffset(CSI[I].getFrameIdx()));
-
-  // Calculate offsets
-  int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth;
-  for (unsigned I = 0, E = CSI.size(); I!=E; ++I) {
-    int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-    unsigned Reg = CSI[I].getReg();
-    Offset = (MaxOffset-Offset+saveAreaOffset);
+                         MFI->getObjectOffset(I->getFrameIdx()));
+
+  // Calculate offsets.
+  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+    unsigned Reg = I->getReg();
+    Offset = MaxOffset - Offset + saveAreaOffset;
+
+    // Don't output a new machine move if we're re-saving the frame
+    // pointer. This happens when the PrologEpilogInserter has inserted an extra
+    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+    // generates one when frame pointers are used. If we generate a "machine
+    // move" for this extra "PUSH", the linker will lose track of the fact that
+    // the frame pointer should have the value of the first "PUSH" when it's
+    // trying to unwind.
+    // 
+    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+    //        another bug. I.e., one where we generate a prolog like this:
+    //
+    //          pushl  %ebp
+    //          movl   %esp, %ebp
+    //          pushl  %ebp
+    //          pushl  %esi
+    //           ...
+    //
+    //        The immediate re-push of EBP is unnecessary. At the least, it's an
+    //        optimization bug. EBP can be used as a scratch register in certain
+    //        cases, but probably not when we have a frame pointer.
+    if (HasFP && FramePtr == Reg)
+      continue;
+
     MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
     MachineLocation CSSrc(Reg);
-    Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
-  }
-
-  if (hasFP(MF)) {
-    // Save FP
-    MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
-    MachineLocation FPSrc(FramePtr);
-    Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+    Moves.push_back(MachineMove(LabelId, CSDst, CSSrc));
   }
 }
 
-
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
 void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+  MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const Function* Fn = MF.getFunction();
-  const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
+  const Function *Fn = MF.getFunction();
+  const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
   MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
   bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
-                          !Fn->doesNotThrow() ||
-                          UnwindTablesMandatory;
+                          !Fn->doesNotThrow() || UnwindTablesMandatory;
+  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  bool HasFP = hasFP(MF);
   DebugLoc DL;
 
-  // Prepare for frame info.
-  unsigned FrameLabelId = 0;
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  uint64_t StackSize = MFI->getStackSize();
-
-  // Get desired stack alignment
-  uint64_t MaxAlign  = MFI->getMaxAlignment();
-
   // Add RETADDR move area to callee saved frame size.
   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
   if (TailCallReturnAddrDelta < 0)
     X86FI->setCalleeSavedFrameSize(
-          X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
+      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
 
   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
   // function, and use up to 128 bytes of stack space, don't have a frame
   // pointer, calls, or dynamic alloca then we do not need to adjust the
   // stack pointer (we fit in the Red Zone).
-  bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone);
-  if (Is64Bit && !DisableRedZone &&
+  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
       !needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->hasCalls() &&                          // No calls.
       !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
-    if (hasFP(MF)) MinSize += SlotSize;
-    StackSize = std::max(MinSize,
-                         StackSize > 128 ? StackSize - 128 : 0);
+    if (HasFP) MinSize += SlotSize;
+    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+    MFI->setStackSize(StackSize);
+  } else if (Subtarget->isTargetWin64()) {
+    // We need to always allocate 32 bytes as register spill area.
+    // FIXME: We might reuse these 32 bytes for leaf functions.
+    StackSize += 32;
     MFI->setStackSize(StackSize);
   }
 
@@ -769,33 +904,73 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   if (TailCallReturnAddrDelta < 0) {
     MachineInstr *MI =
       BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
-              StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
-    // The EFLAGS implicit def is dead.
-    MI->getOperand(3).setIsDead();
+              StackPtr)
+        .addReg(StackPtr)
+        .addImm(-TailCallReturnAddrDelta);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
   }
 
+  // Mapping for machine moves:
+  //
+  //   DST: VirtualFP AND
+  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
+  //        ELSE                        => DW_CFA_def_cfa
+  //
+  //   SRC: VirtualFP AND
+  //        DST: Register               => DW_CFA_def_cfa_register
+  //
+  //   ELSE
+  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
+  //        REG < 64                    => DW_CFA_offset + Reg
+  //        ELSE                        => DW_CFA_offset_extended
+
+  std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+  const TargetData *TD = MF.getTarget().getTargetData();
   uint64_t NumBytes = 0;
-  if (hasFP(MF)) {
-    // Calculate required stack adjustment
+  int stackGrowth =
+    (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
+     TargetFrameInfo::StackGrowsUp ?
+       TD->getPointerSize() : -TD->getPointerSize());
+
+  if (HasFP) {
+    // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
     if (needsStackRealignment(MF))
-      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
 
     NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
 
-    // Get the offset of the stack slot for the EBP register... which is
+    // Get the offset of the stack slot for the EBP register, which is
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
     // Update the frame offset adjustment.
     MFI->setOffsetAdjustment(-NumBytes);
 
-    // Save EBP into the appropriate stack slot...
+    // Save EBP/RBP into the appropriate stack slot.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
       .addReg(FramePtr, RegState::Kill);
 
     if (needsFrameMoves) {
-      // Mark effective beginning of when frame pointer becomes valid.
-      FrameLabelId = MMI->NextLabelID();
+      // Mark the place where EBP/RBP was saved.
+      unsigned FrameLabelId = MMI->NextLabelID();
       BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+        Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+      } else {
+        // FIXME: Verify & implement for FP
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+      }
+
+      // Change the rule for the FramePtr to be an "offset" rule.
+      MachineLocation FPDst(MachineLocation::VirtualFP,
+                            2 * stackGrowth);
+      MachineLocation FPSrc(FramePtr);
+      Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
     }
 
     // Update EBP with the new base value...
@@ -803,6 +978,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
         .addReg(StackPtr);
 
+    if (needsFrameMoves) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      unsigned FrameLabelId = MMI->NextLabelID();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+
+      // Define the current CFA to use the EBP/RBP register.
+      MachineLocation FPDst(FramePtr);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+    }
+
     // Mark the FramePtr as live-in in every block except the entry.
     for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
          I != E; ++I)
@@ -814,6 +1000,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
         BuildMI(MBB, MBBI, DL,
                 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
                 StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
       // The EFLAGS implicit def is dead.
       MI->getOperand(3).setIsDead();
     }
@@ -822,11 +1009,30 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   }
 
   // Skip the callee-saved push instructions.
+  bool PushedRegs = false;
+  int StackOffset = 2 * stackGrowth;
+
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
-          MBBI->getOpcode() == X86::PUSH64r))
+          MBBI->getOpcode() == X86::PUSH64r)) {
+    PushedRegs = true;
     ++MBBI;
 
+    if (!HasFP && needsFrameMoves) {
+      // Mark callee-saved push instruction.
+      unsigned LabelId = MMI->NextLabelID();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId);
+
+      // Define the current CFA rule to use the provided offset.
+      unsigned Ptr = StackSize ?
+        MachineLocation::VirtualFP : StackPtr;
+      MachineLocation SPDst(Ptr);
+      MachineLocation SPSrc(Ptr, StackOffset);
+      Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+      StackOffset += stackGrowth;
+    }
+  }
+
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
 
@@ -883,12 +1089,29 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
   }
 
-  if (needsFrameMoves) {
-    unsigned ReadyLabelId = 0;
-    // Mark effective beginning of when frame pointer is ready.
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
-    emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+  if ((NumBytes || PushedRegs) && needsFrameMoves) {
+    // Mark end of stack pointer adjustment.
+    unsigned LabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId);
+
+    if (!HasFP && NumBytes) {
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP,
+                              -StackSize + stackGrowth);
+        Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+      } else {
+        // FIXME: Verify & implement for FP
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+      }
+    }
+
+    // Emit DWARF info specifying the offsets of the callee-saved registers.
+    if (PushedRegs)
+      emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr);
   }
 }
 
@@ -901,6 +1124,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
   DebugLoc DL = MBBI->getDebugLoc();
 
   switch (RetOpcode) {
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
   case X86::RET:
   case X86::RETI:
   case X86::TCRETURNdi:
@@ -911,26 +1136,25 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
   case X86::EH_RETURN64:
   case X86::TAILJMPd:
   case X86::TAILJMPr:
-  case X86::TAILJMPm: break;  // These are ok
-  default:
-    assert(0 && "Can only insert epilog into returning blocks");
+  case X86::TAILJMPm:
+    break;  // These are ok
   }
 
-  // Get the number of bytes to allocate from the FrameInfo
+  // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
   uint64_t MaxAlign  = MFI->getMaxAlignment();
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
   if (hasFP(MF)) {
-    // Calculate required stack adjustment
+    // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
     if (needsStackRealignment(MF))
       FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
 
     NumBytes = FrameSize - CSSize;
 
-    // pop EBP.
+    // Pop EBP.
     BuildMI(MBB, MBBI, DL,
             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
   } else {
@@ -942,9 +1166,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
   while (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PI = prior(MBBI);
     unsigned Opc = PI->getOpcode();
+
     if (Opc != X86::POP32r && Opc != X86::POP64r &&
         !PI->getDesc().isTerminator())
       break;
+
     --MBBI;
   }
 
@@ -957,10 +1183,10 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
 
   // If dynamic alloca is used, then reset esp to point to the last callee-saved
   // slot before popping them off! Same applies for the case, when stack was
-  // realigned
+  // realigned.
   if (needsStackRealignment(MF)) {
     // We cannot use LEA here, because stack pointer was realigned. We need to
-    // deallocate local frame back
+    // deallocate local frame back.
     if (CSSize) {
       emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
       MBBI = prior(LastCSPop);
@@ -972,17 +1198,18 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
   } else if (MFI->hasVarSizedObjects()) {
     if (CSSize) {
       unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
-      MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
-                                         FramePtr, false, -CSSize);
+      MachineInstr *MI =
+        addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+                        FramePtr, false, -CSSize);
       MBB.insert(MBBI, MI);
-    } else
-      BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
-              StackPtr).addReg(FramePtr);
-
-  } else {
-    // adjust stack pointer back: ESP += numbytes
-    if (NumBytes)
-      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+    } else {
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+        .addReg(FramePtr);
+    }
+  } else if (NumBytes) {
+    // Adjust stack pointer back: ESP += numbytes.
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
   }
 
   // We're returning from function via eh_return.
@@ -993,9 +1220,9 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL,
             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
             StackPtr).addReg(DestAddr.getReg());
-  // Tail call return: adjust the stack pointer and jump to callee
   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
              RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
+    // Tail call return: adjust the stack pointer and jump to callee.
     MBBI = prior(MBB.end());
     MachineOperand &JumpTarget = MBBI->getOperand(0);
     MachineOperand &StackAdjust = MBBI->getOperand(1);
@@ -1006,6 +1233,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
     int MaxTCDelta = X86FI->getTCReturnAddrDelta();
     int Offset = 0;
     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
     // Incoporate the retaddr area.
     Offset = StackAdj-MaxTCDelta;
     assert(Offset >= 0 && "Offset should never be negative");
@@ -1032,6 +1260,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
     // Add the return addr area delta back since we are not tail calling.
     int delta = -1*X86FI->getTCReturnAddrDelta();
     MBBI = prior(MBB.end());
+
     // Check for possible merge with preceeding ADD instruction.
     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
     emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
@@ -1039,18 +1268,16 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
 }
 
 unsigned X86RegisterInfo::getRARegister() const {
-  if (Is64Bit)
-    return X86::RIP;  // Should have dwarf #16
-  else
-    return X86::EIP;  // Should have dwarf #8
+  return Is64Bit ? X86::RIP     // Should have dwarf #16.
+                 : X86::EIP;    // Should have dwarf #8.
 }
 
 unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
   return hasFP(MF) ? FramePtr : StackPtr;
 }
 
-void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
-                                                                         const {
+void
+X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
   // Calculate amount of bytes used for return address storing
   int stackGrowth = (Is64Bit ? -8 : -4);
 
@@ -1066,18 +1293,18 @@ void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
 }
 
 unsigned X86RegisterInfo::getEHExceptionRegister() const {
-  assert(0 && "What is the exception register");
+  llvm_unreachable("What is the exception register");
   return 0;
 }
 
 unsigned X86RegisterInfo::getEHHandlerRegister() const {
-  assert(0 && "What is the exception handler register");
+  llvm_unreachable("What is the exception handler register");
   return 0;
 }
 
 namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) {
-  switch (VT.getSimpleVT()) {
+unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
+  switch (VT.getSimpleVT().SimpleTy) {
   default: return Reg;
   case MVT::i8:
     if (High) {
@@ -1264,14 +1491,21 @@ namespace {
            RegNum < RI.getLastVirtReg(); ++RegNum)
         MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
 
-      FFI->setMaxAlignment(MaxAlign);
+      if (FFI->getMaxAlignment() == MaxAlign)
+        return false;
 
-      return false;
+      FFI->setMaxAlignment(MaxAlign);
+      return true;
     }
 
     virtual const char *getPassName() const {
       return "X86 Maximal Stack Alignment Calculator";
     }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
   };
 
   char MSAC::ID = 0;
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 33b9f5edc73a..f63570706d23 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -93,9 +93,16 @@ public:
   /// Code Generation virtual methods...
   /// 
 
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
   /// values.
-  const TargetRegisterClass *getPointerRegClass() const;
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
 
   /// getCrossCopyRegClass - Returns a legal register class to copy a register
   /// in the specified class to or from. Returns NULL if it is possible to copy
@@ -125,23 +132,25 @@ public:
 
   bool hasReservedCallFrame(MachineFunction &MF) const;
 
+  bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+                            int &FrameIdx) const;
+
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MI) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
 
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
 
+  void emitCalleeSavedFrameMoves(MachineFunction &MF, unsigned LabelId,
+                                 unsigned FramePtr) const;
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
-  void emitFrameMoves(MachineFunction &MF,
-                      unsigned FrameLabelId, unsigned ReadyLabelId) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(MachineFunction &MF) const;
@@ -155,8 +164,8 @@ public:
 
 // getX86SubSuperRegister - X86 utility function. It returns the sub or super
 // register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
-unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false);
+// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
 
 } // End llvm namespace
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 2e6f017e2704..7bf074d4991e 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -270,42 +270,27 @@ def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
 // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
 // cannot be encoded.
 def GR8 : RegisterClass<"X86", [i8],  8,
-                        [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
+                        [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
                          R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate SPL or BPL.
-    static const unsigned X86_GR8_AO_64_fp[] = {
-      X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
-      X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
-      X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B
-    };
-    // If not, just don't allocate SPL.
     static const unsigned X86_GR8_AO_64[] = {
       X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
       X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
       X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
     };
-    // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
-    static const unsigned X86_GR8_AO_32[] = {
-      X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
-    };
 
     GR8Class::iterator
     GR8Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (!Subtarget.is64Bit())
-        return X86_GR8_AO_32;
-      else if (RI->hasFP(MF))
-        return X86_GR8_AO_64_fp;
-      else
+      if (Subtarget.is64Bit())
         return X86_GR8_AO_64;
+      else
+        return begin();
     }
 
     GR8Class::iterator
@@ -313,17 +298,20 @@ def GR8 : RegisterClass<"X86", [i8],  8,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
       if (!Subtarget.is64Bit())
-        return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
+        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+        return begin() + 8;
       else if (RI->hasFP(MF))
-        return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
+        // If so, don't allocate SPL or BPL.
+        return array_endof(X86_GR8_AO_64) - 1;
       else
-        return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
+        // If not, just don't allocate SPL.
+        return array_endof(X86_GR8_AO_64);
     }
   }];
 }
 
-
 def GR16 : RegisterClass<"X86", [i16], 16,
                          [AX, CX, DX, SI, DI, BX, BP, SP,
                           R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
@@ -333,42 +321,20 @@ def GR16 : RegisterClass<"X86", [i16], 16,
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate SP or BP.
-    static const unsigned X86_GR16_AO_64_fp[] = {
-      X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
-      X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
-      X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W
-    };
-    static const unsigned X86_GR16_AO_32_fp[] = {
-      X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
-    };
-    // If not, just don't allocate SP.
     static const unsigned X86_GR16_AO_64[] = {
       X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
       X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
       X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W, X86::BP
     };
-    static const unsigned X86_GR16_AO_32[] = {
-      X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
-    };
 
     GR16Class::iterator
     GR16Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit()) {
-        if (RI->hasFP(MF))
-          return X86_GR16_AO_64_fp;
-        else
-          return X86_GR16_AO_64;
-      } else {
-        if (RI->hasFP(MF))
-          return X86_GR16_AO_32_fp;
-        else
-          return X86_GR16_AO_32;
-      }
+      if (Subtarget.is64Bit())
+        return X86_GR16_AO_64;
+      else
+        return begin();
     }
 
     GR16Class::iterator
@@ -377,21 +343,26 @@ def GR16 : RegisterClass<"X86", [i16], 16,
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       if (Subtarget.is64Bit()) {
+        // Does the function dedicate RBP to being a frame ptr?
         if (RI->hasFP(MF))
-          return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
+          // If so, don't allocate SP or BP.
+          return array_endof(X86_GR16_AO_64) - 1;
         else
-          return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
+          // If not, just don't allocate SP.
+          return array_endof(X86_GR16_AO_64);
       } else {
+        // Does the function dedicate EBP to being a frame ptr?
         if (RI->hasFP(MF))
-          return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
+          // If so, don't allocate SP or BP.
+          return begin() + 6;
         else
-          return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
+          // If not, just don't allocate SP.
+          return begin() + 7;
       }
     }
   }];
 }
 
-
 def GR32 : RegisterClass<"X86", [i32], 32, 
                          [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
                           R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
@@ -401,42 +372,20 @@ def GR32 : RegisterClass<"X86", [i32], 32,
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate ESP or EBP.
-    static const unsigned X86_GR32_AO_64_fp[] = {
-      X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
-      X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
-      X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D
-    };
-    static const unsigned X86_GR32_AO_32_fp[] = {
-      X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
-    };
-    // If not, just don't allocate ESP.
     static const unsigned X86_GR32_AO_64[] = {
       X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
       X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
       X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
     };
-    static const unsigned X86_GR32_AO_32[] = {
-      X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
-    };
 
     GR32Class::iterator
     GR32Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit()) {
-        if (RI->hasFP(MF))
-          return X86_GR32_AO_64_fp;
-        else
-          return X86_GR32_AO_64;
-      } else {
-        if (RI->hasFP(MF))
-          return X86_GR32_AO_32_fp;
-        else
-          return X86_GR32_AO_32;
-      }
+      if (Subtarget.is64Bit())
+        return X86_GR32_AO_64;
+      else
+        return begin();
     }
 
     GR32Class::iterator
@@ -445,21 +394,29 @@ def GR32 : RegisterClass<"X86", [i32], 32,
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       if (Subtarget.is64Bit()) {
+        // Does the function dedicate RBP to being a frame ptr?
         if (RI->hasFP(MF))
-          return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
+          // If so, don't allocate ESP or EBP.
+          return array_endof(X86_GR32_AO_64) - 1;
         else
-          return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
+          // If not, just don't allocate ESP.
+          return array_endof(X86_GR32_AO_64);
       } else {
+        // Does the function dedicate EBP to being a frame ptr?
         if (RI->hasFP(MF))
-          return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
+          // If so, don't allocate ESP or EBP.
+          return begin() + 6;
         else
-          return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
+          // If not, just don't allocate ESP.
+          return begin() + 7;
       }
     }
   }];
 }
 
-
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
 def GR64 : RegisterClass<"X86", [i64], 64, 
                          [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                           RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
@@ -483,6 +440,11 @@ def GR64 : RegisterClass<"X86", [i64], 64,
   }];
 }
 
+// Segment registers for use by MOV instructions (and others) that have a
+//   segment register as one operand.  Always contain a 16-bit segment
+//   descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
+}
 
 // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
 // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -509,38 +471,25 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
 // On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
 // of registers which do not by themselves require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
-                              [AL, CL, DL, BL, AH, CH, DH, BH,
+                              [AL, CL, DL, AH, CH, DH, BL, BH,
                                SIL, DIL, BPL, SPL]> {
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate SPL or BPL.
-    static const unsigned X86_GR8_NOREX_AO_64_fp[] = {
-      X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL
-    };
-    // If not, just don't allocate SPL.
     static const unsigned X86_GR8_NOREX_AO_64[] = {
       X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL
     };
-    // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
-    static const unsigned X86_GR8_NOREX_AO_32[] = {
-      X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
-    };
 
     GR8_NOREXClass::iterator
     GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (!Subtarget.is64Bit())
-        return X86_GR8_NOREX_AO_32;
-      else if (RI->hasFP(MF))
-        return X86_GR8_NOREX_AO_64_fp;
-      else
+      if (Subtarget.is64Bit())
         return X86_GR8_NOREX_AO_64;
+      else
+        return begin();
     }
 
     GR8_NOREXClass::iterator
@@ -548,15 +497,16 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
       if (!Subtarget.is64Bit())
-        return X86_GR8_NOREX_AO_32 +
-               (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned));
+        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+        return begin() + 8;
       else if (RI->hasFP(MF))
-        return X86_GR8_NOREX_AO_64_fp +
-               (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned));
+        // If so, don't allocate SPL or BPL.
+        return array_endof(X86_GR8_NOREX_AO_64) - 1;
       else
-        return X86_GR8_NOREX_AO_64 +
-               (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned));
+        // If not, just don't allocate SPL.
+        return array_endof(X86_GR8_NOREX_AO_64);
     }
   }];
 }
@@ -564,38 +514,20 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
                                [AX, CX, DX, SI, DI, BX, BP, SP]> {
   let SubRegClassList = [GR8_NOREX, GR8_NOREX];
   let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate SP or BP.
-    static const unsigned X86_GR16_AO_fp[] = {
-      X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
-    };
-    // If not, just don't allocate SP.
-    static const unsigned X86_GR16_AO[] = {
-      X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
-    };
-
-    GR16_NOREXClass::iterator
-    GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
-        return X86_GR16_AO_fp;
-      else
-        return X86_GR16_AO;
-    }
-
     GR16_NOREXClass::iterator
     GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
       if (RI->hasFP(MF))
-        return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned));
+        // If so, don't allocate SP or BP.
+        return end() - 2;
       else
-        return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned));
+        // If not, just don't allocate SP.
+        return end() - 1;
     }
   }];
 }
@@ -604,89 +536,149 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
                                [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
   let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
   let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate ESP or EBP.
-    static const unsigned X86_GR32_NOREX_AO_fp[] = {
-      X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
-    };
-    // If not, just don't allocate ESP.
-    static const unsigned X86_GR32_NOREX_AO[] = {
-      X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
-    };
-
     GR32_NOREXClass::iterator
-    GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+    GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
       if (RI->hasFP(MF))
-        return X86_GR32_NOREX_AO_fp;
+        // If so, don't allocate ESP or EBP.
+        return end() - 2;
       else
-        return X86_GR32_NOREX_AO;
+        // If not, just don't allocate ESP.
+        return end() - 1;
     }
-
-    GR32_NOREXClass::iterator
-    GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+  }];
+}
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+                               [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
+  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64_NOREXClass::iterator
+    GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      // Does the function dedicate RBP to being a frame ptr?
       if (RI->hasFP(MF))
-        return X86_GR32_NOREX_AO_fp +
-               (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
+        // If so, don't allocate RIP, RSP or RBP.
+        return end() - 3;
       else
-        return X86_GR32_NOREX_AO +
-               (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
+        // If not, just don't allocate RIP or RSP.
+        return end() - 2;
     }
   }];
 }
 
-// GR64_NOREX - GR64 registers which do not require a REX prefix.
-def GR64_NOREX : RegisterClass<"X86", [i64], 64,
-                               [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
-  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32,
+                              [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
+                               R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+  let SubRegClassList = [GR8, GR8, GR16];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // Does the function dedicate RBP / EBP to being a frame ptr?
-    // If so, don't allocate RSP or RBP.
-    static const unsigned X86_GR64_NOREX_AO_fp[] = {
-      X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
-    };
-    // If not, just don't allocate RSP.
-    static const unsigned X86_GR64_NOREX_AO[] = {
-      X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
+    static const unsigned X86_GR32_NOSP_AO_64[] = {
+      X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
+      X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
+      X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
     };
 
-    GR64_NOREXClass::iterator
-    GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+    GR32_NOSPClass::iterator
+    GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return X86_GR32_NOSP_AO_64;
+      else
+        return begin();
+    }
+
+    GR32_NOSPClass::iterator
+    GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
-        return X86_GR64_NOREX_AO_fp;
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit()) {
+        // Does the function dedicate RBP to being a frame ptr?
+        if (RI->hasFP(MF))
+          // If so, don't allocate EBP.
+          return array_endof(X86_GR32_NOSP_AO_64) - 1;
+        else
+          // If not, any reg in this class is ok.
+          return array_endof(X86_GR32_NOSP_AO_64);
+      } else {
+        // Does the function dedicate EBP to being a frame ptr?
+        if (RI->hasFP(MF))
+          // If so, don't allocate EBP.
+          return begin() + 6;
+        else
+          // If not, any reg in this class is ok.
+          return begin() + 7;
+      }
+    }
+  }];
+}
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64,
+                              [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                               RBX, R14, R15, R12, R13, RBP]> {
+  let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64_NOSPClass::iterator
+    GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (!Subtarget.is64Bit())
+        return begin();  // None of these are allocatable in 32-bit.
+      if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+        return end()-1;  // If so, don't allocate RBP
       else
-        return X86_GR64_NOREX_AO;
+        return end();  // If not, any reg in this class is ok.
     }
+  }];
+}
 
-    GR64_NOREXClass::iterator
-    GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+                                    [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
+  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64_NOREX_NOSPClass::iterator
+    GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      // Does the function dedicate RBP to being a frame ptr?
       if (RI->hasFP(MF))
-        return X86_GR64_NOREX_AO_fp +
-               (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
+        // If so, don't allocate RBP.
+        return end() - 1;
       else
-        return X86_GR64_NOREX_AO +
-               (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
+        // If not, any reg in this class is ok.
+        return end();
     }
   }];
 }
 
 // A class to support the 'A' assembler constraint: EAX then EDX.
-def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>;
+def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
+  let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+}
 
 // Scalar SSE2 floating point registers.
 def FR32 : RegisterClass<"X86", [f32], 32,
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index b225f480e4ed..990962dc4173 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -20,21 +20,31 @@ namespace llvm {
   namespace X86 {
     /// RelocationType - An enum for the x86 relocation codes. Note that
     /// the terminology here doesn't follow x86 convention - word means
-    /// 32-bit and dword means 64-bit.
+    /// 32-bit and dword means 64-bit. The relocations will be treated
+    /// by JIT or ObjectCode emitters, this is transparent to the x86 code 
+    /// emitter but JIT and ObjectCode will treat them differently
     enum RelocationType {
-      // reloc_pcrel_word - PC relative relocation, add the relocated value to
-      // the value already in memory, after we adjust it for where the PC is.
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
       reloc_pcrel_word = 0,
 
-      // reloc_picrel_word - PIC base relative relocation, add the relocated
-      // value to the value already in memory, after we adjust it for where the
-      // PIC base is.
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
       reloc_picrel_word = 1,
-      
-      // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
-      // add the relocated value to the value already in memory.
+
+      /// reloc_absolute_word - absolute relocation, just add the relocated
+      /// value to the value already in memory.
       reloc_absolute_word = 2,
-      reloc_absolute_dword = 3
+
+      /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+      /// value to the value already in memory. In object files, it represents a
+      /// value which must be sign-extended when resolving the relocation.
+      reloc_absolute_word_sext = 3,
+
+      /// reloc_absolute_dword - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_dword = 4
     };
   }
 }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 8506fa66a645..fb76aeb05556 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -13,80 +13,111 @@
 
 #define DEBUG_TYPE "subtarget"
 #include "X86Subtarget.h"
+#include "X86InstrInfo.h"
 #include "X86GenSubtarget.inc"
-#include "llvm/Module.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 #if defined(_MSC_VER)
-    #include <intrin.h>
+#include <intrin.h>
 #endif
 
-static cl::opt<X86Subtarget::AsmWriterFlavorTy>
-AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset),
-  cl::desc("Choose style of code to emit from X86 backend:"),
-  cl::values(
-    clEnumValN(X86Subtarget::ATT,   "att",   "Emit AT&T-style assembly"),
-    clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"),
-    clEnumValEnd));
-
-
-/// True if accessing the GV requires an extra load. For Windows, dllimported
-/// symbols are indirect, loading the value at address GV rather then the
-/// value of GV itself. This means that the GlobalAddress must be in the base
-/// or index register of the address, not the GV offset field.
-bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV,
-                                       const TargetMachine& TM,
-                                       bool isDirectCall) const
-{
-  // FIXME: PIC
-  if (TM.getRelocationModel() != Reloc::Static &&
-      TM.getCodeModel() != CodeModel::Large) {
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+  // DLLImport only exists on windows, it is implemented as a load from a
+  // DLLIMPORT stub.
+  if (GV->hasDLLImportLinkage())
+    return X86II::MO_DLLIMPORT;
+
+  // GV with ghost linkage (in JIT lazy compilation mode) do not require an
+  // extra load from stub.
+  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+
+  // X86-64 in PIC mode.
+  if (isPICStyleRIPRel()) {
+    // Large model never uses stubs.
+    if (TM.getCodeModel() == CodeModel::Large)
+      return X86II::MO_NO_FLAG;
+      
     if (isTargetDarwin()) {
-      if (isDirectCall)
-        return false;
-      bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
-      if (GV->hasHiddenVisibility() &&
-          (Is64Bit || (!isDecl && !GV->hasCommonLinkage())))
-        // If symbol visibility is hidden, the extra load is not needed if
-        // target is x86-64 or the symbol is definitely defined in the current
-        // translation unit.
-        return false;
-      return !isDirectCall && (isDecl || GV->isWeakForLinker());
-    } else if (isTargetELF()) {
+      // If symbol visibility is hidden, the extra load is not needed if
+      // target is x86-64 or the symbol is definitely defined in the current
+      // translation unit.
+      if (GV->hasDefaultVisibility() &&
+          (isDecl || GV->isWeakForLinker()))
+        return X86II::MO_GOTPCREL;
+    } else {
+      assert(isTargetELF() && "Unknown rip-relative target");
+
       // Extra load is needed for all externally visible.
-      if (isDirectCall)
-        return false;
-      if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
-        return false;
-      return true;
-    } else if (isTargetCygMing() || isTargetWindows()) {
-      return (GV->hasDLLImportLinkage());
+      if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+        return X86II::MO_GOTPCREL;
     }
+
+    return X86II::MO_NO_FLAG;
   }
-  return false;
-}
+  
+  if (isPICStyleGOT()) {   // 32-bit ELF targets.
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return X86II::MO_GOTOFF;
+    return X86II::MO_GOT;
+  }
+  
+  if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
+    // Determine whether we have a stub reference and/or whether the reference
+    // is relative to the PIC base or not.
+    
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (!isDecl && !GV->isWeakForLinker())
+      return X86II::MO_PIC_BASE_OFFSET;
 
-/// True if accessing the GV requires a register.  This is a superset of the
-/// cases where GVRequiresExtraLoad is true.  Some variations of PIC require
-/// a register, but not an extra load.
-bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV,
-                                      const TargetMachine& TM,
-                                      bool isDirectCall) const
-{
-  if (GVRequiresExtraLoad(GV, TM, isDirectCall))
-    return true;
-  // Code below here need only consider cases where GVRequiresExtraLoad
-  // returns false.
-  if (TM.getRelocationModel() == Reloc::PIC_)
-    return !isDirectCall && 
-      (GV->hasLocalLinkage() || GV->hasExternalLinkage());
-  return false;
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+    
+    // If symbol visibility is hidden, we have a stub for common symbol
+    // references and external declarations.
+    if (isDecl || GV->hasCommonLinkage()) {
+      // Hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+    }
+    
+    // Otherwise, no stub.
+    return X86II::MO_PIC_BASE_OFFSET;
+  }
+  
+  if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
+    // Determine whether we have a stub reference.
+    
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (!isDecl && !GV->isWeakForLinker())
+      return X86II::MO_NO_FLAG;
+    
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY;
+
+    // Otherwise, no stub.
+    return X86II::MO_NO_FLAG;
+  }
+  
+  // Direct static reference to global.
+  return X86II::MO_NO_FLAG;
 }
 
+
 /// getBZeroEntry - This function returns the name of a function which has an
 /// interface like the non-standard bzero function, if such a function exists on
 /// the current subtarget and it is considered prefereable over memset with zero
@@ -120,9 +151,9 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
 
 /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
 /// specified arguments.  If we can't run cpuid on the host, return true.
-bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
-                          unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64)
+static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
   #if defined(__GNUC__)
     // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
     asm ("movq\t%%rbx, %%rsi\n\t"
@@ -192,18 +223,19 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     char     c[12];
   } text;
   
-  if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+  if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
     return;
 
-  X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+  GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
   
-  if ((EDX >> 23) & 0x1) X86SSELevel = MMX;
-  if ((EDX >> 25) & 0x1) X86SSELevel = SSE1;
-  if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
-  if (ECX & 0x1)         X86SSELevel = SSE3;
-  if ((ECX >> 9)  & 0x1) X86SSELevel = SSSE3;
-  if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
-  if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
+  if ((EDX >> 15) & 1) HasCMov = true;
+  if ((EDX >> 23) & 1) X86SSELevel = MMX;
+  if ((EDX >> 25) & 1) X86SSELevel = SSE1;
+  if ((EDX >> 26) & 1) X86SSELevel = SSE2;
+  if (ECX & 0x1)       X86SSELevel = SSE3;
+  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3;
+  if ((ECX >> 19) & 1) X86SSELevel = SSE41;
+  if ((ECX >> 20) & 1) X86SSELevel = SSE42;
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -218,7 +250,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     DetectFamilyModel(EAX, Family, Model);
     IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
 
-    X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+    GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
     HasX86_64 = (EDX >> 29) & 0x1;
     HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
     HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
@@ -227,13 +259,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
 
 static const char *GetCurrentX86CPU() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
-  if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+  if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
     return "generic";
   unsigned Family = 0;
   unsigned Model  = 0;
   DetectFamilyModel(EAX, Family, Model);
 
-  X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
   bool HasSSE3 = (ECX & 0x1);
 
@@ -242,7 +274,7 @@ static const char *GetCurrentX86CPU() {
     char     c[12];
   } text;
 
-  X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+  GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
   if (memcmp(text.c, "GenuineIntel", 12) == 0) {
     switch (Family) {
       case 3:
@@ -319,9 +351,7 @@ static const char *GetCurrentX86CPU() {
         }
       case 15:
         if (HasSSE3) {
-          switch (Model) {
-          default: return "k8-sse3";
-          }
+          return "k8-sse3";
         } else {
           switch (Model) {
           case 1:  return "opteron";
@@ -330,9 +360,7 @@ static const char *GetCurrentX86CPU() {
           }
         }
       case 16:
-        switch (Model) {
-        default: return "amdfam10";
-        }
+        return "amdfam10";
     default:
       return "generic";
     }
@@ -341,11 +369,12 @@ static const char *GetCurrentX86CPU() {
   }
 }
 
-X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
-  : AsmFlavor(AsmWriterFlavor)
-  , PICStyle(PICStyles::None)
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, 
+                           bool is64Bit)
+  : PICStyle(PICStyles::None)
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
+  , HasCMov(false)
   , HasX86_64(false)
   , HasSSE4A(false)
   , HasAVX(false)
@@ -384,15 +413,14 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
   if (Is64Bit)
     HasX86_64 = true;
 
-  DOUT << "Subtarget features: SSELevel " << X86SSELevel
-       << ", 3DNowLevel " << X863DNowLevel
-       << ", 64bit " << HasX86_64 << "\n";
+  DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel
+               << ", 3DNowLevel " << X863DNowLevel
+               << ", 64bit " << HasX86_64 << "\n");
   assert((!Is64Bit || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
   // Set the boolean corresponding to the current target triple, or the default
   // if one cannot be determined, to true.
-  const std::string& TT = M.getTargetTriple();
   if (TT.length() > 5) {
     size_t Pos;
     if ((Pos = TT.find("-darwin")) != std::string::npos) {
@@ -415,38 +443,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
       TargetType = isWindows;
     } else if (TT.find("windows") != std::string::npos) {
       TargetType = isWindows;
-    }
-    else if (TT.find("-cl") != std::string::npos) {
+    } else if (TT.find("-cl") != std::string::npos) {
       TargetType = isDarwin;
       DarwinVers = 9;
     }
-  } else if (TT.empty()) {
-#if defined(__CYGWIN__)
-    TargetType = isCygwin;
-#elif defined(__MINGW32__) || defined(__MINGW64__)
-    TargetType = isMingw;
-#elif defined(__APPLE__)
-    TargetType = isDarwin;
-#if __APPLE_CC__ > 5400
-    DarwinVers = 9;  // GCC 5400+ is Leopard.
-#else
-    DarwinVers = 8;  // Minimum supported darwin is Tiger.
-#endif
-    
-#elif defined(_WIN32) || defined(_WIN64)
-    TargetType = isWindows;
-#elif defined(__linux__)
-    // Linux doesn't imply ELF, but we don't currently support anything else.
-    TargetType = isELF;
-    IsLinux = true;
-#endif
-  }
-
-  // If the asm syntax hasn't been overridden on the command line, use whatever
-  // the target wants.
-  if (AsmFlavor == X86Subtarget::Unset) {
-    AsmFlavor = (TargetType == isWindows)
-      ? X86Subtarget::Intel : X86Subtarget::ATT;
   }
 
   // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0d1434f8e999..a2e368de6f0e 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -18,23 +18,22 @@
 #include <string>
 
 namespace llvm {
-class Module;
 class GlobalValue;
 class TargetMachine;
   
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+/// 
 namespace PICStyles {
 enum Style {
-  Stub, GOT, RIPRel, WinPIC, None
+  StubPIC,          // Used on i386-darwin in -fPIC mode.
+  StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+  GOT,              // Used on many 32-bit unices in -fPIC mode.
+  RIPRel,           // Used on X86-64 when not in -static mode.
+  None              // Set when in -static mode (not PIC or DynamicNoPIC mode).
 };
 }
 
 class X86Subtarget : public TargetSubtarget {
-public:
-  enum AsmWriterFlavorTy {
-    // Note: This numbering has to match the GCC assembler dialects for inline
-    // asm alternatives to work right.
-    ATT = 0, Intel = 1, Unset
-  };
 protected:
   enum X86SSEEnum {
     NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
@@ -44,10 +43,6 @@ protected:
     NoThreeDNow, ThreeDNow, ThreeDNowA
   };
 
-  /// AsmFlavor - Which x86 asm dialect to use.
-  ///
-  AsmWriterFlavorTy AsmFlavor;
-
   /// PICStyle - Which PIC style to use
   ///
   PICStyles::Style PICStyle;
@@ -60,6 +55,10 @@ protected:
   ///
   X863DNowEnum X863DNowLevel;
 
+  /// HasCMov - True if this processor has conditional move instructions
+  /// (generally pentium pro+).
+  bool HasCMov;
+  
   /// HasX86_64 - True if the processor supports X86-64 instructions.
   ///
   bool HasX86_64;
@@ -95,7 +94,7 @@ protected:
   unsigned MaxInlineSizeThreshold;
 
 private:
-  /// Is64Bit - True if the processor supports 64-bit instructions and module
+  /// Is64Bit - True if the processor supports 64-bit instructions and
   /// pointer size is 64 bit.
   bool Is64Bit;
 
@@ -105,9 +104,9 @@ public:
   } TargetType;
 
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
+  X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit);
 
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
@@ -145,66 +144,67 @@ public:
   bool hasAVX() const { return HasAVX; }
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
-
   bool isBTMemSlow() const { return IsBTMemSlow; }
 
-  unsigned getAsmFlavor() const {
-    return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
-  }
-
-  bool isFlavorAtt() const { return AsmFlavor == ATT; }
-  bool isFlavorIntel() const { return AsmFlavor == Intel; }
-
   bool isTargetDarwin() const { return TargetType == isDarwin; }
-  bool isTargetELF() const {
-    return TargetType == isELF;
-  }
+  bool isTargetELF() const { return TargetType == isELF; }
+  
   bool isTargetWindows() const { return TargetType == isWindows; }
   bool isTargetMingw() const { return TargetType == isMingw; }
-  bool isTargetCygMing() const { return (TargetType == isMingw ||
-                                         TargetType == isCygwin); }
   bool isTargetCygwin() const { return TargetType == isCygwin; }
+  bool isTargetCygMing() const {
+    return TargetType == isMingw || TargetType == isCygwin;
+  }
+  
+  /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
+  bool isTargetCOFF() const {
+    return TargetType == isMingw || TargetType == isCygwin ||
+           TargetType == isWindows;
+  }
+  
   bool isTargetWin64() const {
-    return (Is64Bit && (TargetType == isMingw || TargetType == isWindows));
+    return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
   }
 
   std::string getDataLayout() const {
     const char *p;
     if (is64Bit())
       p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
-    else {
-      if (isTargetDarwin())
-        p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
-      else
-        p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
-    }
+    else if (isTargetDarwin())
+      p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+    else
+      p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
     return std::string(p);
   }
 
   bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
-  bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
-  bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; }
+
+  bool isPICStyleStubPIC() const {
+    return PICStyle == PICStyles::StubPIC;
+  }
+
+  bool isPICStyleStubNoDynamic() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC;
+  }
+  bool isPICStyleStubAny() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC ||
+           PICStyle == PICStyles::StubPIC; }
   
-  /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+  /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
+  /// 10 = Snow Leopard, etc.
   unsigned getDarwinVers() const { return DarwinVers; }
   
   /// isLinux - Return true if the target is "Linux".
   bool isLinux() const { return IsLinux; }
 
-  /// True if accessing the GV requires an extra load. For Windows, dllimported
-  /// symbols are indirect, loading the value at address GV rather then the
-  /// value of GV itself. This means that the GlobalAddress must be in the base
-  /// or index register of the address, not the GV offset field.
-  bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
-                           bool isDirectCall) const;
-
-  /// True if accessing the GV requires a register.  This is a superset of the
-  /// cases where GVRequiresExtraLoad is true.  Some variations of PIC require
-  /// a register, but not an extra load.
-  bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM,
-                           bool isDirectCall) const;
+  
+  /// ClassifyGlobalReference - Classify a global variable reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+                                        const TargetMachine &TM)const;
 
   /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
   /// to immediate address.
@@ -224,13 +224,6 @@ public:
   unsigned getSpecialAddressLatency() const;
 };
 
-namespace X86 {
-  /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
-  /// the specified arguments.  If we can't run cpuid on the host, return true.
-  bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
-                       unsigned *rECX, unsigned *rEDX);
-}
-
 } // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index b000914c9203..a61de1cd182a 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,172 +11,134 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86TargetAsmInfo.h"
+#include "X86MCAsmInfo.h"
 #include "X86TargetMachine.h"
 #include "X86.h"
-#include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-/// X86TargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int X86TargetMachineModule;
-int X86TargetMachineModule = 0;
-
-// Register the target.
-static RegisterTarget<X86_32TargetMachine>
-X("x86",    "32-bit X86: Pentium-Pro and above");
-static RegisterTarget<X86_64TargetMachine>
-Y("x86-64", "64-bit X86: EM64T and AMD64");
-
-// Force static initialization.
-extern "C" void LLVMInitializeX86Target() { }
-
-// No assembler printer by default
-X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
-
-const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
-  if (Subtarget.isFlavorIntel())
-    return new X86WinTargetAsmInfo(*this);
-  else
-    switch (Subtarget.TargetType) {
-     case X86Subtarget::isDarwin:
-      return new X86DarwinTargetAsmInfo(*this);
-     case X86Subtarget::isELF:
-      return new X86ELFTargetAsmInfo(*this);
-     case X86Subtarget::isMingw:
-     case X86Subtarget::isCygwin:
-      return new X86COFFTargetAsmInfo(*this);
-     case X86Subtarget::isWindows:
-      return new X86WinTargetAsmInfo(*this);
-     default:
-      return new X86GenericTargetAsmInfo(*this);
-    }
-}
-
-unsigned X86_32TargetMachine::getJITMatchQuality() {
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  return 10;
-#endif
-  return 0;
-}
-
-unsigned X86_64TargetMachine::getJITMatchQuality() {
-#if defined(__x86_64__) || defined(_M_AMD64)
-  return 10;
-#endif
-  return 0;
+static const MCAsmInfo *createMCAsmInfo(const Target &T,
+                                                const StringRef &TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return new X86MCAsmInfoDarwin(TheTriple);
+  case Triple::MinGW32:
+  case Triple::MinGW64:
+  case Triple::Cygwin:
+    return new X86MCAsmInfoCOFF(TheTriple);
+  case Triple::Win32:
+    return new X86WinMCAsmInfo(TheTriple);
+  default:
+    return new X86ELFMCAsmInfo(TheTriple);
+  }
 }
 
-unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
-  // We strongly match "i[3-9]86-*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
-      TT[4] == '-' && TT[1] - '3' < 6)
-    return 20;
-  // If the target triple is something non-X86, we don't match.
-  if (!TT.empty()) return 0;
+extern "C" void LLVMInitializeX86Target() { 
+  // Register the target.
+  RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+  RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
 
-  if (M.getEndianness()  == Module::LittleEndian &&
-      M.getPointerSize() == Module::Pointer32)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
+  // Register the target asm info.
+  RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
+  RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
 
-  return getJITMatchQuality()/2;
+  // Register the code emitter.
+  TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
 }
 
-unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
-  // We strongly match "x86_64-*".
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
-      TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
-    return 20;
-
-  // We strongly match "amd64-*".
-  if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' &&
-      TT[3] == '6' && TT[4] == '4' && TT[5] == '-')
-    return 20;
-  
-  // If the target triple is something non-X86-64, we don't match.
-  if (!TT.empty()) return 0;
-
-  if (M.getEndianness()  == Module::LittleEndian &&
-      M.getPointerSize() == Module::Pointer64)
-    return 10;                                   // Weak match
-  else if (M.getEndianness() != Module::AnyEndianness ||
-           M.getPointerSize() != Module::AnyPointerSize)
-    return 0;                                    // Match for some other target
 
-  return getJITMatchQuality()/2;
-}
-
-X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
-  : X86TargetMachine(M, FS, false) {
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &FS)
+  : X86TargetMachine(T, TT, FS, false) {
 }
 
 
-X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
-  : X86TargetMachine(M, FS, true) {
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &FS)
+  : X86TargetMachine(T, TT, FS, true) {
 }
 
-/// X86TargetMachine ctor - Create an ILP32 architecture model
+/// X86TargetMachine ctor - Create an X86 target.
 ///
-X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
-                                   bool is64Bit)
-  : Subtarget(M, FS, is64Bit),
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, 
+                                   const std::string &FS, bool is64Bit)
+  : LLVMTargetMachine(T, TT), 
+    Subtarget(TT, FS, is64Bit),
     DataLayout(Subtarget.getDataLayout()),
     FrameInfo(TargetFrameInfo::StackGrowsDown,
-              Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
+              Subtarget.getStackAlignment(),
+              (Subtarget.isTargetWin64() ? -40 :
+               (Subtarget.is64Bit() ? -8 : -4))),
     InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
   DefRelocModel = getRelocationModel();
-  // FIXME: Correctly select PIC model for Win64 stuff
+      
+  // If no relocation model was picked, default as appropriate for the target.
   if (getRelocationModel() == Reloc::Default) {
-    if (Subtarget.isTargetDarwin() ||
-        (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()))
-      setRelocationModel(Reloc::DynamicNoPIC);
-    else
+    if (!Subtarget.isTargetDarwin())
       setRelocationModel(Reloc::Static);
+    else if (Subtarget.is64Bit())
+      setRelocationModel(Reloc::PIC_);
+    else
+      setRelocationModel(Reloc::DynamicNoPIC);
   }
 
-  // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC
-  // is defined as a model for code which may be used in static or
-  // dynamic executables but not necessarily a shared library. On ELF
-  // implement this by using the Static model.
-  if (Subtarget.isTargetELF() &&
-      getRelocationModel() == Reloc::DynamicNoPIC)
-    setRelocationModel(Reloc::Static);
-
-  if (Subtarget.is64Bit()) {
-    // No DynamicNoPIC support under X86-64.
-    if (getRelocationModel() == Reloc::DynamicNoPIC)
+  assert(getRelocationModel() != Reloc::Default &&
+         "Relocation mode not picked");
+
+  // If no code model is picked, default to small.
+  if (getCodeModel() == CodeModel::Default)
+    setCodeModel(CodeModel::Small);
+      
+  // ELF and X86-64 don't have a distinct DynamicNoPIC model.  DynamicNoPIC
+  // is defined as a model for code which may be used in static or dynamic
+  // executables but not necessarily a shared library. On X86-32 we just
+  // compile in -static mode, in x86-64 we use PIC.
+  if (getRelocationModel() == Reloc::DynamicNoPIC) {
+    if (is64Bit)
       setRelocationModel(Reloc::PIC_);
-    // Default X86-64 code model is small.
-    if (getCodeModel() == CodeModel::Default)
-      setCodeModel(CodeModel::Small);
+    else if (!Subtarget.isTargetDarwin())
+      setRelocationModel(Reloc::Static);
   }
 
-  if (Subtarget.isTargetCygMing())
-    Subtarget.setPICStyle(PICStyles::WinPIC);
-  else if (Subtarget.isTargetDarwin()) {
+  // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+  // the Mach-O file format doesn't support it.
+  if (getRelocationModel() == Reloc::Static &&
+      Subtarget.isTargetDarwin() &&
+      is64Bit)
+    setRelocationModel(Reloc::PIC_);
+      
+  // Determine the PICStyle based on the target selected.
+  if (getRelocationModel() == Reloc::Static) {
+    // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+    Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.isTargetCygMing()) {
+    Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.isTargetDarwin()) {
     if (Subtarget.is64Bit())
       Subtarget.setPICStyle(PICStyles::RIPRel);
-    else
-      Subtarget.setPICStyle(PICStyles::Stub);
+    else if (getRelocationModel() == Reloc::PIC_)
+      Subtarget.setPICStyle(PICStyles::StubPIC);
+    else {
+      assert(getRelocationModel() == Reloc::DynamicNoPIC);
+      Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+    }
   } else if (Subtarget.isTargetELF()) {
     if (Subtarget.is64Bit())
       Subtarget.setPICStyle(PICStyles::RIPRel);
     else
       Subtarget.setPICStyle(PICStyles::GOT);
   }
+      
+  // Finally, if we have "none" as our PIC style, force to static mode.
+  if (Subtarget.getPICStyle() == PICStyles::None)
+    setRelocationModel(Reloc::Static);
 }
 
 //===----------------------------------------------------------------------===//
@@ -212,33 +174,16 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
   return true;  // -print-machineinstr should print after this.
 }
 
-bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool Verbose,
-                                          raw_ostream &Out) {
-  // FIXME: Move this somewhere else!
-  // On Darwin, override 64-bit static relocation to pic_ since the
-  // assembler doesn't support it.
-  if (DefRelocModel == Reloc::Static &&
-      Subtarget.isTargetDarwin() && Subtarget.is64Bit() &&
-      getCodeModel() == CodeModel::Small)
-    setRelocationModel(Reloc::PIC_);
-
-  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, Verbose));
-  return false;
-}
-
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm, 
                                       MachineCodeEmitter &MCE) {
   // FIXME: Move this to TargetJITInfo!
   // On Darwin, do not override 64-bit setting made in X86TargetMachine().
   if (DefRelocModel == Reloc::Default && 
-        (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+      (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
     setRelocationModel(Reloc::Static);
+    Subtarget.setPICStyle(PICStyles::None);
+  }
   
   // 64-bit JIT places everything in the same buffer except external functions.
   // On Darwin, use small code model but hack the call instruction for 
@@ -251,24 +196,20 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   }
 
   PM.add(createX86CodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
 
   return false;
 }
 
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm,
                                       JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   // On Darwin, do not override 64-bit setting made in X86TargetMachine().
   if (DefRelocModel == Reloc::Default && 
-        (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+      (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
     setRelocationModel(Reloc::Static);
+    Subtarget.setPICStyle(PICStyles::None);
+  }
   
   // 64-bit JIT places everything in the same buffer except external functions.
   // On Darwin, use small code model but hack the call instruction for 
@@ -281,40 +222,34 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   }
 
   PM.add(createX86JITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
 
   return false;
 }
 
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      ObjectCodeEmitter &OCE) {
+  PM.add(createX86ObjectCodeEmitterPass(*this, OCE));
+  return false;
+}
+
 bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                             CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
                                             MachineCodeEmitter &MCE) {
   PM.add(createX86CodeEmitterPass(*this, MCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
-
   return false;
 }
 
 bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
                                             CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
                                             JITCodeEmitter &JCE) {
   PM.add(createX86JITCodeEmitterPass(*this, JCE));
-  if (DumpAsm) {
-    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-    if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, true));
-  }
-
   return false;
 }
 
+bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel,
+                                            ObjectCodeEmitter &OCE) {
+  PM.add(createX86ObjectCodeEmitterPass(*this, OCE));
+  return false;
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 90a5cc243d26..b538408e8a45 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -26,7 +26,7 @@
 
 namespace llvm {
   
-class raw_ostream;
+class formatted_raw_ostream;
 
 class X86TargetMachine : public LLVMTargetMachine {
   X86Subtarget      Subtarget;
@@ -38,18 +38,9 @@ class X86TargetMachine : public LLVMTargetMachine {
   X86ELFWriterInfo  ELFWriterInfo;
   Reloc::Model      DefRelocModel; // Reloc model before it's overridden.
 
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
-  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
-  // set this functions to ctor pointer at startup time if they are linked in.
-  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            X86TargetMachine &tm,
-                                            bool verbose);
-  static AsmPrinterCtorFn AsmPrinterCtor;
-
 public:
-  X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+  X86TargetMachine(const Target &T, const std::string &TT, 
+                   const std::string &FS, bool is64Bit);
 
   virtual const X86InstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
@@ -66,50 +57,41 @@ public:
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
   }
 
-  static unsigned getModuleMatchQuality(const Module &M);
-  static unsigned getJITMatchQuality();
-
-  static void registerAsmPrinter(AsmPrinterCtorFn F) {
-    AsmPrinterCtor = F;
-  }
-
   // Set up the pass pipeline.
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel, 
-                                  bool Verbose, raw_ostream &Out);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, MachineCodeEmitter &MCE);
+                              MachineCodeEmitter &MCE);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              bool DumpAsm, JITCodeEmitter &JCE);
+                              JITCodeEmitter &JCE);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              ObjectCodeEmitter &OCE);
+  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+                                    CodeGenOpt::Level OptLevel,
+                                    MachineCodeEmitter &MCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm, MachineCodeEmitter &MCE);
+                                    JITCodeEmitter &JCE);
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool DumpAsm, JITCodeEmitter &JCE);
+                                    ObjectCodeEmitter &OCE);
 };
 
 /// X86_32TargetMachine - X86 32-bit target machine.
 ///
 class X86_32TargetMachine : public X86TargetMachine {
 public:
-  X86_32TargetMachine(const Module &M, const std::string &FS);
-  
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
+  X86_32TargetMachine(const Target &T, const std::string &M,
+                      const std::string &FS);
 };
 
 /// X86_64TargetMachine - X86 64-bit target machine.
 ///
 class X86_64TargetMachine : public X86TargetMachine {
 public:
-  X86_64TargetMachine(const Module &M, const std::string &FS);
-  
-  static unsigned getJITMatchQuality();
-  static unsigned getModuleMatchQuality(const Module &M);
+  X86_64TargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 000000000000..d39b3c432420
--- /dev/null
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+using namespace llvm;
+
+const MCExpr *X8632_MachoTargetObjectFile::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI,
+                                 bool &IsIndirect, bool &IsPCRel) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+  IsIndirect = true;
+  IsPCRel    = false;
+  
+  
+  MachineModuleInfoMachO &MachOMMI =
+  MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, true);
+  Name += "$non_lazy_ptr";
+  
+  // Add information about the stub reference to MachOMMI so that the stub gets
+  // emitted by the asmprinter.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+  const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
+  if (StubSym == 0) {
+    Name.clear();
+    Mang->getNameWithPrefix(Name, GV, false);
+    StubSym = getContext().GetOrCreateSymbol(Name.str());
+  }
+  
+  return MCSymbolRefExpr::Create(Sym, getContext());
+}
+
+const MCExpr *X8664_MachoTargetObjectFile::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI,
+                                 bool &IsIndirect, bool &IsPCRel) const {
+  
+  // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+  // is an indirect pc-relative reference.
+  IsIndirect = true;
+  IsPCRel    = true;
+  
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, false);
+  Name += "@GOTPCREL";
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(Name.str(), getContext());
+  const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+  return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+}
+
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 000000000000..377a93bb7152
--- /dev/null
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+  
+  /// X8632_MachoTargetObjectFile - This TLOF implementation is used for
+  /// Darwin/x86-32.
+  class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+  public:
+    
+    virtual const MCExpr *
+    getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                     MachineModuleInfo *MMI,
+                                     bool &IsIndirect, bool &IsPCRel) const;
+  };
+  
+  /// X8664_MachoTargetObjectFile - This TLOF implementation is used for
+  /// Darwin/x86-64.
+  class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+  public:
+
+    virtual const MCExpr *
+    getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                     MachineModuleInfo *MMI,
+                                     bool &IsIndirect, bool &IsPCRel) const;
+  };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/AsmPrinter/CMakeLists.txt b/lib/Target/XCore/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..7c7c2f4ded04
--- /dev/null
+++ b/lib/Target/XCore/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMXCoreAsmPrinter
+  XCoreAsmPrinter.cpp
+  )
+add_dependencies(LLVMXCoreAsmPrinter XCoreCodeGenTable_gen)
diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile
new file mode 100644
index 000000000000..82dc1df95d3b
--- /dev/null
+++ b/lib/Target/XCore/AsmPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreAsmPrinter
+
+# Hack: we need to include 'main' XCore target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
new file mode 100644
index 000000000000..e58edda0c5dc
--- /dev/null
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -0,0 +1,374 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreMCAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+  cl::Hidden,
+  cl::value_desc("number"),
+  cl::init(8));
+
+namespace {
+  class VISIBILITY_HIDDEN XCoreAsmPrinter : public AsmPrinter {
+    const XCoreSubtarget &Subtarget;
+  public:
+    explicit XCoreAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                             const MCAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V),
+      Subtarget(TM.getSubtarget<XCoreSubtarget>()) {}
+
+    virtual const char *getPassName() const {
+      return "XCore Assembly Printer";
+    }
+
+    void printMemOperand(const MachineInstr *MI, int opNum);
+    void printOperand(const MachineInstr *MI, int opNum);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                        unsigned AsmVariant, const char *ExtraCode);
+
+    void emitGlobalDirective(const std::string &name);
+    void emitExternDirective(const std::string &name);
+    
+    void emitArrayBound(const std::string &name, const GlobalVariable *GV);
+    virtual void PrintGlobalVariable(const GlobalVariable *GV);
+
+    void emitFunctionStart(MachineFunction &MF);
+    void emitFunctionEnd(MachineFunction &MF);
+
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printMachineInstruction(const MachineInstr *MI);
+    bool runOnMachineFunction(MachineFunction &F);
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AsmPrinter::getAnalysisUsage(AU);
+      AU.setPreservesAll();
+      AU.addRequired<MachineModuleInfo>();
+      AU.addRequired<DwarfWriter>();
+    }
+  };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreAsmPrinter::
+emitGlobalDirective(const std::string &name)
+{
+  O << MAI->getGlobalDirective() << name;
+  O << "\n";
+}
+
+void XCoreAsmPrinter::
+emitExternDirective(const std::string &name)
+{
+  O << "\t.extern\t" << name;
+  O << '\n';
+}
+
+void XCoreAsmPrinter::
+emitArrayBound(const std::string &name, const GlobalVariable *GV)
+{
+  assert(((GV->hasExternalLinkage() ||
+    GV->hasWeakLinkage()) ||
+    GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(
+    cast<PointerType>(GV->getType())->getElementType()))
+  {
+    O << MAI->getGlobalDirective() << name << ".globound" << "\n";
+    O << MAI->getSetDirective() << name << ".globound" << ","
+      << ATy->getNumElements() << "\n";
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+      // TODO Use COMDAT groups for LinkOnceLinkage
+      O << MAI->getWeakDefDirective() << name << ".globound" << "\n";
+    }
+  }
+}
+
+void XCoreAsmPrinter::PrintGlobalVariable(const GlobalVariable *GV) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (!GV->hasInitializer() ||
+      EmitSpecialLLVMGlobal(GV))
+    return;
+
+  const TargetData *TD = TM.getTargetData();
+  
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+  
+  std::string name = Mang->getMangledName(GV);
+  Constant *C = GV->getInitializer();
+  unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+  
+  // Mark the start of the global
+  O << "\t.cc_top " << name << ".data," << name << "\n";
+
+  switch (GV->getLinkage()) {
+  case GlobalValue::AppendingLinkage:
+    llvm_report_error("AppendingLinkage is not supported by this target!");
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalLinkage:
+    emitArrayBound(name, GV);
+    emitGlobalDirective(name);
+    // TODO Use COMDAT groups for LinkOnceLinkage
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+      O << MAI->getWeakDefDirective() << name << "\n";
+    }
+    // FALL THROUGH
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+    break;
+  case GlobalValue::GhostLinkage:
+    llvm_unreachable("Should not have any unmaterialized functions!");
+  case GlobalValue::DLLImportLinkage:
+    llvm_unreachable("DLLImport linkage is not supported by this target!");
+  case GlobalValue::DLLExportLinkage:
+    llvm_unreachable("DLLExport linkage is not supported by this target!");
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+
+  EmitAlignment(Align, GV, 2);
+  
+  unsigned Size = TD->getTypeAllocSize(C->getType());
+  if (GV->isThreadLocal()) {
+    Size *= MaxThreads;
+  }
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    O << "\t.type " << name << ",@object\n";
+    O << "\t.size " << name << "," << Size << "\n";
+  }
+  O << name << ":\n";
+  
+  EmitGlobalConstant(C);
+  if (GV->isThreadLocal()) {
+    for (unsigned i = 1; i < MaxThreads; ++i) {
+      EmitGlobalConstant(C);
+    }
+  }
+  if (Size < 4) {
+    // The ABI requires that unsigned scalar types smaller than 32 bits
+    // are are padded to 32 bits.
+    EmitZeros(4 - Size);
+  }
+  
+  // Mark the end of the global
+  O << "\t.cc_bottom " << name << ".data\n";
+}
+
+/// Emit the directives on the start of functions
+void XCoreAsmPrinter::emitFunctionStart(MachineFunction &MF) {
+  // Print out the label for the function.
+  const Function *F = MF.getFunction();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  
+  // Mark the start of the function
+  O << "\t.cc_top " << CurrentFnName << ".function," << CurrentFnName << "\n";
+
+  switch (F->getLinkage()) {
+  default: llvm_unreachable("Unknown linkage type!");
+  case Function::InternalLinkage:  // Symbols default to internal.
+  case Function::PrivateLinkage:
+  case Function::LinkerPrivateLinkage:
+    break;
+  case Function::ExternalLinkage:
+    emitGlobalDirective(CurrentFnName);
+    break;
+  case Function::LinkOnceAnyLinkage:
+  case Function::LinkOnceODRLinkage:
+  case Function::WeakAnyLinkage:
+  case Function::WeakODRLinkage:
+    // TODO Use COMDAT groups for LinkOnceLinkage
+    O << MAI->getGlobalDirective() << CurrentFnName << "\n";
+    O << MAI->getWeakDefDirective() << CurrentFnName << "\n";
+    break;
+  }
+  // (1 << 1) byte aligned
+  EmitAlignment(MF.getAlignment(), F, 1);
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    O << "\t.type " << CurrentFnName << ",@function\n";
+  }
+  O << CurrentFnName << ":\n";
+}
+
+/// Emit the directives on the end of functions
+void XCoreAsmPrinter::
+emitFunctionEnd(MachineFunction &MF) 
+{
+  // Mark the end of the function
+  O << "\t.cc_bottom " << CurrentFnName << ".function\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF)
+{
+  this->MF = &MF;
+
+  SetupMachineFunction(MF);
+
+  // Print out constants referenced by the function
+  EmitConstantPool(MF.getConstantPool());
+
+  // Print out jump tables referenced by the function
+  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+  // Emit the function start directives
+  emitFunctionStart(MF);
+  
+  // Emit pre-function debug information.
+  DW->BeginFunction(&MF);
+
+  // Print out code for the function.
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+
+    // Print a label for the basic block.
+    if (I != MF.begin()) {
+      EmitBasicBlockStart(I);
+    }
+
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+      // Print the assembly for the instruction.
+      printMachineInstruction(II);
+    }
+
+    // Each Basic Block is separated by a newline
+    O << '\n';
+  }
+
+  // Emit function end directives
+  emitFunctionEnd(MF);
+  
+  // Emit post-function debug information.
+  DW->EndFunction(&MF);
+
+  // We didn't modify anything.
+  return false;
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum)
+{
+  printOperand(MI, opNum);
+  
+  if (MI->getOperand(opNum+1).isImm()
+    && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+  
+  O << "+";
+  printOperand(MI, opNum+1);
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << Mang->getMangledName(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("not implemented");
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant, 
+                                      const char *ExtraCode) {
+  printOperand(MI, OpNo);
+  return false;
+}
+
+void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  ++EmittedInsts;
+
+  processDebugLoc(MI, true);
+
+  // Check for mov mnemonic
+  unsigned src, dst, srcSR, dstSR;
+  if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
+    O << "\tmov " << getRegisterName(dst) << ", ";
+    O << getRegisterName(src) << '\n';
+    return;
+  }
+  printInstruction(MI);
+  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+    EmitComments(*MI);
+  O << '\n';
+
+  processDebugLoc(MI, false);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() { 
+  RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index a7aba14a7a14..0965323b998a 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -11,13 +11,14 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv)
 tablegen(XCoreGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(XCore
-  XCoreAsmPrinter.cpp
+  MCSectionXCore.cpp
   XCoreFrameInfo.cpp
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
+  XCoreMCAsmInfo.cpp
   XCoreRegisterInfo.cpp
   XCoreSubtarget.cpp
-  XCoreTargetAsmInfo.cpp
   XCoreTargetMachine.cpp
+  XCoreTargetObjectFile.cpp
   )
diff --git a/lib/Target/XCore/MCSectionXCore.cpp b/lib/Target/XCore/MCSectionXCore.cpp
new file mode 100644
index 000000000000..5acceafe9ea3
--- /dev/null
+++ b/lib/Target/XCore/MCSectionXCore.cpp
@@ -0,0 +1,35 @@
+//===- MCSectionXCore.cpp - XCore-specific section representation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MCSectionXCore class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCSectionXCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionXCore *
+MCSectionXCore::Create(const StringRef &Section, unsigned Type,
+                       unsigned Flags, SectionKind K,
+                       bool isExplicit, MCContext &Ctx) {
+  return new (Ctx) MCSectionXCore(Section, Type, Flags, K, isExplicit);
+}
+
+
+/// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp
+/// section flags.
+void MCSectionXCore::PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
+                                                     raw_ostream &OS) const {
+  if (getFlags() & MCSectionXCore::SHF_CP_SECTION)
+    OS << 'c';
+  if (getFlags() & MCSectionXCore::SHF_DP_SECTION)
+    OS << 'd';
+}
diff --git a/lib/Target/XCore/MCSectionXCore.h b/lib/Target/XCore/MCSectionXCore.h
new file mode 100644
index 000000000000..02f8f95572c8
--- /dev/null
+++ b/lib/Target/XCore/MCSectionXCore.h
@@ -0,0 +1,54 @@
+//===- MCSectionXCore.h - XCore-specific section representation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionXCore class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCSECTION_XCORE_H
+#define LLVM_MCSECTION_XCORE_H
+
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+  
+class MCSectionXCore : public MCSectionELF {
+  MCSectionXCore(const StringRef &Section, unsigned Type, unsigned Flags,
+                 SectionKind K, bool isExplicit)
+    : MCSectionELF(Section, Type, Flags, K, isExplicit) {}
+  
+public:
+  
+  enum {
+    /// SHF_CP_SECTION - All sections with the "c" flag are grouped together
+    /// by the linker to form the constant pool and the cp register is set to
+    /// the start of the constant pool by the boot code.
+    SHF_CP_SECTION = FIRST_TARGET_DEP_FLAG,
+    
+    /// SHF_DP_SECTION - All sections with the "d" flag are grouped together
+    /// by the linker to form the data section and the dp register is set to
+    /// the start of the section by the boot code.
+    SHF_DP_SECTION = FIRST_TARGET_DEP_FLAG << 1
+  };
+  
+  static MCSectionXCore *Create(const StringRef &Section, unsigned Type,
+                                unsigned Flags, SectionKind K,
+                                bool isExplicit, MCContext &Ctx);
+  
+  
+  /// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp
+  /// section flags.
+  virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
+                                               raw_ostream &OS) const;
+
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index 568df70ab63a..bd3b52a7ac10 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -7,7 +7,7 @@
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../..
-LIBRARYNAME = LLVMXCore
+LIBRARYNAME = LLVMXCoreCodeGen
 TARGET = XCore
 
 # Make sure that tblgen is run, first thing.
@@ -17,5 +17,7 @@ BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
                 XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
 		XCoreGenSubtarget.inc
 
+DIRS = AsmPrinter TargetInfo
+
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..0a568de1624b
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMXCoreInfo
+  XCoreTargetInfo.cpp
+  )
+
+add_dependencies(LLVMXCoreInfo XCoreTable_gen)
diff --git a/lib/Target/XCore/TargetInfo/Makefile b/lib/Target/XCore/TargetInfo/Makefile
new file mode 100644
index 000000000000..07473d223f6b
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/XCore/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
new file mode 100644
index 000000000000..7aa8965c4ac6
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheXCoreTarget;
+
+extern "C" void LLVMInitializeXCoreTargetInfo() { 
+  RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+}
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index d95aab3979a5..8937fbe123c6 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -21,12 +21,12 @@ namespace llvm {
   class FunctionPass;
   class TargetMachine;
   class XCoreTargetMachine;
-  class raw_ostream;
+  class formatted_raw_ostream;
 
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
-  FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS,
-                                           XCoreTargetMachine &TM,
-                                           bool Verbose);
+
+  extern Target TheXCoreTarget;
+
 } // end namespace llvm;
 
 // Defines symbolic names for XCore registers.  This defines a mapping from
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 7a2dcdbf9fe5..b07445dd386f 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -30,27 +30,14 @@ def XCoreInstrInfo : InstrInfo {
 }
 
 //===----------------------------------------------------------------------===//
-// XCore Subtarget features.
-//===----------------------------------------------------------------------===//
-
-def FeatureXS1A
-  : SubtargetFeature<"xs1a", "IsXS1A", "true",
-                     "Enable XS1A instructions">;
-
-def FeatureXS1B
-  : SubtargetFeature<"xs1b", "IsXS1B", "true",
-                     "Enable XS1B instructions">;
-
-//===----------------------------------------------------------------------===//
 // XCore processors supported.
 //===----------------------------------------------------------------------===//
 
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
 
-def : Proc<"generic",      [FeatureXS1A]>;
-def : Proc<"xs1a-generic", [FeatureXS1A]>;
-def : Proc<"xs1b-generic", [FeatureXS1B]>;
+def : Proc<"generic",      []>;
+def : Proc<"xs1b-generic", []>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index eed34a4b635b..860b72f9402e 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,8 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <queue>
 #include <set>
 using namespace llvm;
@@ -159,69 +162,62 @@ InstructionSelect() {
 SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
-  MVT NVT = N->getValueType(0);
+  EVT NVT = N->getValueType(0);
   if (NVT == MVT::i32) {
     switch (N->getOpcode()) {
       default: break;
       case ISD::Constant: {
         if (Predicate_immMskBitp(N)) {
           SDValue MskSize = Transform_msksize_xform(N);
-          return CurDAG->getTargetNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize);
+          return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+                                        MVT::i32, MskSize);
         }
         else if (! Predicate_immU16(N)) {
           unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
           SDValue CPIdx =
-            CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+            CurDAG->getTargetConstantPool(ConstantInt::get(
+                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
                                           TLI.getPointerTy());
-          return CurDAG->getTargetNode(XCore::LDWCP_lru6, dl, MVT::i32, 
-                                       MVT::Other, CPIdx, 
-                                       CurDAG->getEntryNode());
+          return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, 
+                                        MVT::Other, CPIdx, 
+                                        CurDAG->getEntryNode());
         }
         break;
       }
       case ISD::SMUL_LOHI: {
         // FIXME fold addition into the macc instruction
-        if (!Subtarget.isXS1A()) {
-          SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32,
-                                  CurDAG->getTargetConstant(0, MVT::i32)), 0);
-          SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) };
-          SDNode *ResNode = CurDAG->getTargetNode(XCore::MACCS_l4r, dl,
-                                                  MVT::i32, MVT::i32, Ops, 4);
-          ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
-          ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
-          return NULL;
-        }
-        break;
+        SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32,
+                                CurDAG->getTargetConstant(0, MVT::i32)), 0);
+        SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) };
+        SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl,
+                                                 MVT::i32, MVT::i32, Ops, 4);
+        ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
+        ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
+        return NULL;
       }
       case ISD::UMUL_LOHI: {
         // FIXME fold addition into the macc / lmul instruction
-        SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32,
+        SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32,
                                   CurDAG->getTargetConstant(0, MVT::i32)), 0);
         SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
                             Zero, Zero };
-        SDNode *ResNode = CurDAG->getTargetNode(XCore::LMUL_l6r, dl, MVT::i32,
-                                                MVT::i32, Ops, 4);
+        SDNode *ResNode = CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32,
+                                                 MVT::i32, Ops, 4);
         ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
         ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
         return NULL;
       }
       case XCoreISD::LADD: {
-        if (!Subtarget.isXS1A()) {
-          SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
-                              Op.getOperand(2) };
-          return CurDAG->getTargetNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
-                                       Ops, 3);
-        }
-        break;
+        SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+                            Op.getOperand(2) };
+        return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+                                      Ops, 3);
       }
       case XCoreISD::LSUB: {
-        if (!Subtarget.isXS1A()) {
-          SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
-                              Op.getOperand(2) };
-          return CurDAG->getTargetNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
-                                       Ops, 3);
-        }
-        break;
+        SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+                            Op.getOperand(2) };
+        return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+                                      Ops, 3);
       }
       // Other cases are autogenerated.
     }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index cc11d3248139..5ef56c9ff299 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "XCoreISelLowering.h"
 #include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
+#include "XCoreTargetObjectFile.h"
 #include "XCoreTargetMachine.h"
 #include "XCoreSubtarget.h"
 #include "llvm/DerivedTypes.h"
@@ -32,6 +33,8 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/VectorExtras.h"
 #include <queue>
 #include <set>
@@ -48,12 +51,14 @@ getTargetNodeName(unsigned Opcode) const
     case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
     case XCoreISD::STWSP             : return "XCoreISD::STWSP";
     case XCoreISD::RETSP             : return "XCoreISD::RETSP";
+    case XCoreISD::LADD              : return "XCoreISD::LADD";
+    case XCoreISD::LSUB              : return "XCoreISD::LSUB";
     default                           : return NULL;
   }
 }
 
 XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
-  : TargetLowering(XTM),
+  : TargetLowering(XTM, new XCoreTargetObjectFile()),
     TM(XTM),
     Subtarget(*XTM.getSubtargetImpl()) {
 
@@ -67,8 +72,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setIntDivIsCheap(false);
 
   setShiftAmountType(MVT::i32);
-  // shl X, 32 == 0
-  setShiftAmountFlavor(Extend);
   setStackPointerRegisterToSaveRestore(XCore::SP);
 
   setSchedulingPreference(SchedulingForRegPressure);
@@ -88,13 +91,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
   
   // 64bit
-  if (!Subtarget.isXS1A()) {
-    setOperationAction(ISD::ADD, MVT::i64, Custom);
-    setOperationAction(ISD::SUB, MVT::i64, Custom);
-  }
-  if (Subtarget.isXS1A()) {
-    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-  }
+  setOperationAction(ISD::ADD, MVT::i64, Custom);
+  setOperationAction(ISD::SUB, MVT::i64, Custom);
   setOperationAction(ISD::MULHS, MVT::i32, Expand);
   setOperationAction(ISD::MULHU, MVT::i32, Expand);
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -112,9 +110,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
 
-  // RET must be custom lowered, to meet ABI requirements
-  setOperationAction(ISD::RET,           MVT::Other, Custom);
-
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   
   // Thread Local Storage
@@ -130,7 +125,11 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
 
   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
-  
+
+  // Custom expand misaligned loads / stores.
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+
   // Varargs
   setOperationAction(ISD::VAEND, MVT::Other, Expand);
   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
@@ -145,19 +144,24 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   // Debug
   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+  maxStoresPerMemset = 4;
+  maxStoresPerMemmove = maxStoresPerMemcpy = 2;
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::STORE);
 }
 
 SDValue XCoreTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) 
   {
-  case ISD::CALL:             return LowerCALL(Op, DAG);
-  case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
-  case ISD::RET:              return LowerRET(Op, DAG);
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
   case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
+  case ISD::LOAD:             return LowerLOAD(Op, DAG);
+  case ISD::STORE:            return LowerSTORE(Op, DAG);
   case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
   case ISD::VAARG:            return LowerVAARG(Op, DAG);
   case ISD::VASTART:          return LowerVASTART(Op, DAG);
@@ -166,7 +170,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SUB:              return ExpandADDSUB(Op.getNode(), DAG);
   case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
   default:
-    assert(0 && "unimplemented operand");
+    llvm_unreachable("unimplemented operand");
     return SDValue();
   }
 }
@@ -178,7 +182,7 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SelectionDAG &DAG) {
   switch (N->getOpcode()) {
   default:
-    assert(0 && "Don't know how to custom expand this!");
+    llvm_unreachable("Don't know how to custom expand this!");
     return;
   case ISD::ADD:
   case ISD::SUB:
@@ -214,17 +218,16 @@ getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
   DebugLoc dl = GA.getDebugLoc();
   if (isa<Function>(GV)) {
     return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
-  } else if (!Subtarget.isXS1A()) {
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-    if (!GVar) {
-      // If GV is an alias then use the aliasee to determine constness
-      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-        GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
-    }
-    bool isConst = GVar && GVar->isConstant();
-    if (isConst) {
-      return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
-    }
+  }
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee to determine constness
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+  }
+  bool isConst = GVar && GVar->isConstant();
+  if (isConst) {
+    return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
   }
   return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
 }
@@ -265,14 +268,16 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
       GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
   }
   if (! GVar) {
-    assert(0 && "Thread local object not a GlobalVariable?");
+    llvm_unreachable("Thread local object not a GlobalVariable?");
     return SDValue();
   }
   const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
   if (!Ty->isSized() || isZeroLengthArray(Ty)) {
-    cerr << "Size of thread local object " << GVar->getName()
-         << " is unknown\n";
-    abort();
+#ifndef NDEBUG
+    errs() << "Size of thread local object " << GVar->getName()
+           << " is unknown\n";
+#endif
+    llvm_unreachable(0);
   }
   SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
   const TargetData *TD = TM.getTargetData();
@@ -288,21 +293,16 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   // FIXME there isn't really debug info here
   DebugLoc dl = CP->getDebugLoc();
-  if (Subtarget.isXS1A()) {
-    assert(0 && "Lowering of constant pool unimplemented");
-    return SDValue();
+  EVT PtrVT = Op.getValueType();
+  SDValue Res;
+  if (CP->isMachineConstantPoolEntry()) {
+    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                    CP->getAlignment());
   } else {
-    MVT PtrVT = Op.getValueType();
-    SDValue Res;
-    if (CP->isMachineConstantPoolEntry()) {
-      Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
-                                      CP->getAlignment());
-    } else {
-      Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
-                                      CP->getAlignment());
-    }
-    return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                    CP->getAlignment());
   }
+  return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
 }
 
 SDValue XCoreTargetLowering::
@@ -310,19 +310,211 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
 {
   // FIXME there isn't really debug info here
   DebugLoc dl = Op.getDebugLoc();
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
   return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
 }
 
+static bool
+IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
+                                    int64_t &Offset)
+{
+  if (Addr.getOpcode() != ISD::ADD) {
+    return false;
+  }
+  ConstantSDNode *CN = 0;
+  if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    return false;
+  }
+  int64_t off = CN->getSExtValue();
+  const SDValue &Base = Addr.getOperand(0);
+  const SDValue *Root = &Base;
+  if (Base.getOpcode() == ISD::ADD &&
+      Base.getOperand(1).getOpcode() == ISD::SHL) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
+                                                      .getOperand(1));
+    if (CN && (CN->getSExtValue() >= 2)) {
+      Root = &Base.getOperand(0);
+    }
+  }
+  if (isa<FrameIndexSDNode>(*Root)) {
+    // All frame indicies are word aligned
+    AlignedBase = Base;
+    Offset = off;
+    return true;
+  }
+  if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
+      Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
+    // All dp / cp relative addresses are word aligned
+    AlignedBase = Base;
+    Offset = off;
+    return true;
+  }
+  return false;
+}
+
+SDValue XCoreTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG)
+{
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+         "Unexpected extension type");
+  assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+    return SDValue();
+  }
+  unsigned ABIAlignment = getTargetData()->
+    getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  // Leave aligned load alone.
+  if (LD->getAlignment() >= ABIAlignment) {
+    return SDValue();
+  }
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  DebugLoc dl = Op.getDebugLoc();
+  
+  SDValue Base;
+  int64_t Offset;
+  if (!LD->isVolatile() &&
+      IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
+    if (Offset % 4 == 0) {
+      // We've managed to infer better alignment information than the load
+      // already has. Use an aligned load.
+      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
+    }
+    // Lower to
+    // ldw low, base[offset >> 2]
+    // ldw high, base[(offset >> 2) + 1]
+    // shr low_shifted, low, (offset & 0x3) * 8
+    // shl high_shifted, high, 32 - (offset & 0x3) * 8
+    // or result, low_shifted, high_shifted
+    SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
+    SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
+    SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
+    SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
+    
+    SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
+    
+    SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
+                               LowAddr, NULL, 4);
+    SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
+                               HighAddr, NULL, 4);
+    SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
+    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+                             High.getValue(1));
+    SDValue Ops[] = { Result, Chain };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+  
+  if (LD->getAlignment() == 2) {
+    int SVOffset = LD->getSrcValueOffset();
+    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+                                 BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
+                                 LD->isVolatile(), 2);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+                                   DAG.getConstant(2, MVT::i32));
+    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
+                                  HighAddr, LD->getSrcValue(), SVOffset + 2,
+                                  MVT::i16, LD->isVolatile(), 2);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
+                                      DAG.getConstant(16, MVT::i32));
+    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+                             High.getValue(1));
+    SDValue Ops[] = { Result, Chain };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+  
+  // Lower to a call to __misaligned_load(BasePtr).
+  const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  
+  Entry.Ty = IntPtrTy;
+  Entry.Node = BasePtr;
+  Args.push_back(Entry);
+  
+  std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(Chain, IntPtrTy, false, false,
+                    false, false, 0, CallingConv::C, false,
+                    /*isReturnValueUsed=*/true,
+                    DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+                    Args, DAG, dl);
+
+  SDValue Ops[] =
+    { CallResult.first, CallResult.second };
+
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue XCoreTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG)
+{
+  StoreSDNode *ST = cast<StoreSDNode>(Op);
+  assert(!ST->isTruncatingStore() && "Unexpected store type");
+  assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
+  if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+    return SDValue();
+  }
+  unsigned ABIAlignment = getTargetData()->
+    getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  // Leave aligned store alone.
+  if (ST->getAlignment() >= ABIAlignment) {
+    return SDValue();
+  }
+  SDValue Chain = ST->getChain();
+  SDValue BasePtr = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  DebugLoc dl = Op.getDebugLoc();
+  
+  if (ST->getAlignment() == 2) {
+    int SVOffset = ST->getSrcValueOffset();
+    SDValue Low = Value;
+    SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
+                                      DAG.getConstant(16, MVT::i32));
+    SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
+                                         ST->getSrcValue(), SVOffset, MVT::i16,
+                                         ST->isVolatile(), 2);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+                                   DAG.getConstant(2, MVT::i32));
+    SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
+                                          ST->getSrcValue(), SVOffset + 2,
+                                          MVT::i16, ST->isVolatile(), 2);
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
+  }
+  
+  // Lower to a call to __misaligned_store(BasePtr, Value).
+  const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  
+  Entry.Ty = IntPtrTy;
+  Entry.Node = BasePtr;
+  Args.push_back(Entry);
+  
+  Entry.Node = Value;
+  Args.push_back(Entry);
+  
+  std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
+                    false, false, 0, CallingConv::C, false,
+                    /*isReturnValueUsed=*/true,
+                    DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+                    Args, DAG, dl);
+
+  return CallResult.second;
+}
+
 SDValue XCoreTargetLowering::
 ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
 {
   assert(N->getValueType(0) == MVT::i64 &&
          (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
         "Unknown operand to lower!");
-  assert(!Subtarget.isXS1A() && "Cannot custom lower ADD/SUB on xs1a");
   DebugLoc dl = N->getDebugLoc();
   
   // Extract components
@@ -353,12 +545,12 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
 SDValue XCoreTargetLowering::
 LowerVAARG(SDValue Op, SelectionDAG &DAG)
 {
-  assert(0 && "unimplemented");
+  llvm_unreachable("unimplemented");
   // FIX Arguments passed by reference need a extra dereference.
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  MVT VT = Node->getValueType(0);
+  EVT VT = Node->getValueType(0);
   SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
                                Node->getOperand(1), V, 0);
   // Increment the pointer, VAList, to the next vararg
@@ -398,35 +590,33 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
 
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
-//
-//  The lower operations present on calling convention works on this order:
-//      LowerCALL (virt regs --> phys regs, virt regs --> stack) 
-//      LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
-//      LowerRET (virt regs --> phys regs)
-//      LowerCALL (phys regs --> virt regs)
-//
 //===----------------------------------------------------------------------===//
 
 #include "XCoreGenCallingConv.inc"
 
 //===----------------------------------------------------------------------===//
-//                  CALL Calling Convention Implementation
+//                  Call Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-/// XCore custom CALL implementation
-SDValue XCoreTargetLowering::
-LowerCALL(SDValue Op, SelectionDAG &DAG)
-{
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  unsigned CallingConv = TheCall->getCallingConv();
+/// XCore call implementation
+SDValue
+XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) {
+
   // For now, only CallingConv::C implemented
-  switch (CallingConv) 
+  switch (CallConv)
   {
     default:
-      assert(0 && "Unsupported calling convention");
+      llvm_unreachable("Unsupported calling convention");
     case CallingConv::Fast:
     case CallingConv::C:
-      return LowerCCCCallTo(Op, DAG, CallingConv);
+      return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                            Outs, Ins, dl, DAG, InVals);
   }
 }
 
@@ -434,24 +624,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG)
 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
 /// CALLSEQ_END are emitted.
 /// TODO: isTailCall, sret.
-SDValue XCoreTargetLowering::
-LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC) 
-{
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain  = TheCall->getChain();
-  SDValue Callee = TheCall->getCallee();
-  bool isVarArg  = TheCall->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
+SDValue
+XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) {
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
 
   // The ABI dictates there should be one stack slot available to the callee
   // on function entry (for saving lr).
   CCInfo.AllocateStack(4, 4);
 
-  CCInfo.AnalyzeCallOperands(TheCall, CC_XCore);
+  CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -465,13 +656,11 @@ LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC)
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-
-    // Arguments start after the 5 first operands of ISD::CALL
-    SDValue Arg = TheCall->getArg(i);
+    SDValue Arg = Outs[i].Val;
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-      default: assert(0 && "Unknown loc info!");
+      default: llvm_unreachable("Unknown loc info!");
       case CCValAssign::Full: break;
       case CCValAssign::SExt:
         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
@@ -554,59 +743,58 @@ LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC)
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
-                 Op.getResNo());
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
 }
 
-/// LowerCallResult - Lower the result values of an ISD::CALL into the
-/// appropriate copies out of appropriate physical registers.  This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-/// being lowered. Returns a SDNode with the same number of values as the 
-/// ISD::CALL.
-SDNode *XCoreTargetLowering::
-LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, 
-        unsigned CallingConv, SelectionDAG &DAG) {
-  bool isVarArg = TheCall->isVarArg();
-  DebugLoc dl = TheCall->getDebugLoc();
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
 
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_XCore);
-  SmallVector<SDValue, 8> ResultVals;
+  CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
                                  RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
-    ResultVals.push_back(Chain.getValue(0));
+    InVals.push_back(Chain.getValue(0));
   }
 
-  ResultVals.push_back(Chain);
-
-  // Merge everything together with a MERGE_VALUES node.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                     &ResultVals[0], ResultVals.size()).getNode();
+  return Chain;
 }
 
 //===----------------------------------------------------------------------===//
-//             FORMAL_ARGUMENTS Calling Convention Implementation
+//             Formal Arguments Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-/// XCore custom FORMAL_ARGUMENTS implementation
-SDValue XCoreTargetLowering::
-LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) 
-{
-  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  switch(CC) 
+/// XCore formal arguments implementation
+SDValue
+XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv,
+                                          bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                          DebugLoc dl,
+                                          SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals) {
+  switch (CallConv)
   {
     default:
-      assert(0 && "Unsupported calling convention");
+      llvm_unreachable("Unsupported calling convention");
     case CallingConv::C:
     case CallingConv::Fast:
-      return LowerCCCArguments(Op, DAG);
+      return LowerCCCArguments(Chain, CallConv, isVarArg,
+                               Ins, dl, DAG, InVals);
   }
 }
 
@@ -614,27 +802,28 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
 /// virtual registers and generate load operations for
 /// arguments places on the stack.
 /// TODO: sret
-SDValue XCoreTargetLowering::
-LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
-{
+SDValue
+XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
+                                       CallingConv::ID CallConv,
+                                       bool isVarArg,
+                                       const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                       DebugLoc dl,
+                                       SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals) {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  unsigned CC = MF.getFunction()->getCallingConv();
-  DebugLoc dl = Op.getDebugLoc();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
 
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_XCore);
+  CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
 
   unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize();
 
-  SmallVector<SDValue, 16> ArgValues;
-  
   unsigned LRSaveSize = StackSlotSize;
   
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -643,18 +832,21 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
     
     if (VA.isRegLoc()) {
       // Arguments passed in registers
-      MVT RegVT = VA.getLocVT();
-      switch (RegVT.getSimpleVT()) {
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
       default:
-        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-             << RegVT.getSimpleVT()
-             << "\n";
-        abort();
+        {
+#ifndef NDEBUG
+          errs() << "LowerFormalArguments Unhandled argument type: "
+                 << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+          llvm_unreachable(0);
+        }
       case MVT::i32:
         unsigned VReg = RegInfo.createVirtualRegister(
                           XCore::GRRegsRegisterClass);
         RegInfo.addLiveIn(VA.getLocReg(), VReg);
-        ArgValues.push_back(DAG.getCopyFromReg(Root, dl, VReg, RegVT));
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
       }
     } else {
       // sanity check
@@ -662,9 +854,9 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
       // Load the argument to a virtual register
       unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
       if (ObjSize > StackSlotSize) {
-        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-             << VA.getLocVT().getSimpleVT()
-             << "\n";
+        errs() << "LowerFormalArguments Unhandled argument type: "
+               << (unsigned)VA.getLocVT().getSimpleVT().SimpleTy
+               << "\n";
       }
       // Create the frame index object for this incoming parameter...
       int FI = MFI->CreateFixedObject(ObjSize,
@@ -673,7 +865,7 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0));
     }
   }
   
@@ -702,14 +894,14 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
         unsigned VReg = RegInfo.createVirtualRegister(
                           XCore::GRRegsRegisterClass);
         RegInfo.addLiveIn(ArgRegs[i], VReg);
-        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         // Move argument from virt reg -> stack
         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
         MemOps.push_back(Store);
       }
       if (!MemOps.empty())
-        Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                           &MemOps[0], MemOps.size());
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
     } else {
       // This will point to the next argument passed via stack.
       XFI->setVarArgsFrameIndex(
@@ -717,34 +909,29 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
     }
   }
   
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  std::vector<MVT> RetVT(Op.getNode()->value_begin(),
-                                    Op.getNode()->value_end());
-  return DAG.getNode(ISD::MERGE_VALUES, dl, RetVT, 
-                     &ArgValues[0], ArgValues.size());
+  return Chain;
 }
 
 //===----------------------------------------------------------------------===//
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-SDValue XCoreTargetLowering::
-LowerRET(SDValue Op, SelectionDAG &DAG)
-{
+SDValue
+XCoreTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 DebugLoc dl, SelectionDAG &DAG) {
+
   // CCValAssign - represent the assignment of
   // the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC   = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  DebugLoc dl = Op.getDebugLoc();
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
 
-  // Analize return values of ISD::RET
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_XCore);
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
 
   // If this is the first return lowered for this function, add 
   // the regs to the liveout set for the function.
@@ -754,8 +941,6 @@ LowerRET(SDValue Op, SelectionDAG &DAG)
         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
-  // The chain is always operand #0
-  SDValue Chain = Op.getOperand(0);
   SDValue Flag;
 
   // Copy the result values into the output registers.
@@ -763,10 +948,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG)
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    // ISD::RET => ret chain, (regnum1,val1), ...
-    // So i*2+1 index only the regnums
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
-                             Op.getOperand(i*2+1), Flag);
+                             Outs[i].Val, Flag);
 
     // guarantee that all emitted copies are
     // stuck together, avoiding something bad
@@ -788,7 +971,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG)
 
 MachineBasicBlock *
 XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
+                                                 MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == XCore::SELECT_CC) &&
@@ -816,9 +1000,18 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
   F->insert(It, copy0MBB);
   F->insert(It, sinkMBB);
-  // Update machine-CFG edges by transferring all successors of the current
+  // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  sinkMBB->transferSuccessors(BB);
+  // Also inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
+         E = BB->succ_end(); I != E; ++I) {
+    EM->insert(std::make_pair(*I, sinkMBB));
+    sinkMBB->addSuccessor(*I);
+  }
+  // Next, remove all successors of the current block, and add the true
+  // and fallthrough blocks as its successors.
+  while (!BB->succ_empty())
+    BB->removeSuccessor(BB->succ_begin());
   // Next, add the true and fallthrough blocks as its successors.
   BB->addSuccessor(copy0MBB);
   BB->addSuccessor(sinkMBB);
@@ -844,6 +1037,56 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 }
 
 //===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::STORE: {
+    // Replace unaligned store of unaligned load with memmove.
+    StoreSDNode *ST  = cast<StoreSDNode>(N);
+    if (!DCI.isBeforeLegalize() ||
+        allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+        ST->isVolatile() || ST->isIndexed()) {
+      break;
+    }
+    SDValue Chain = ST->getChain();
+
+    unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
+    if (StoreBits % 8) {
+      break;
+    }
+    unsigned ABIAlignment = getTargetData()->getABITypeAlignment(
+        ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
+    unsigned Alignment = ST->getAlignment();
+    if (Alignment >= ABIAlignment) {
+      break;
+    }
+
+    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
+      if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
+        LD->getAlignment() == Alignment &&
+        !LD->isVolatile() && !LD->isIndexed() &&
+        Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+        return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
+                              LD->getBasePtr(),
+                              DAG.getConstant(StoreBits/8, MVT::i32),
+                              Alignment, ST->getSrcValue(),
+                              ST->getSrcValueOffset(), LD->getSrcValue(),
+                              LD->getSrcValueOffset());
+      }
+    }
+    break;
+  }
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
 //  Addressing mode description hooks
 //===----------------------------------------------------------------------===//
 
@@ -867,44 +1110,35 @@ static inline bool isImmUs4(int64_t val)
 bool
 XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, 
                                               const Type *Ty) const {
-  MVT VT = getValueType(Ty, true);
-  // Get expected value type after legalization
-  switch (VT.getSimpleVT()) {
-  // Legal load / stores
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i32:
-    break;
-  // Expand i1 -> i8
-  case MVT::i1:
-    VT = MVT::i8;
-    break;
-  // Everything else is lowered to words
-  default:
-    VT = MVT::i32;
-    break;
-  }
+  // Be conservative with void
+  // FIXME: Can we be more aggressive?
+  if (Ty->getTypeID() == Type::VoidTyID)
+    return false;
+
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(Ty);
   if (AM.BaseGV) {
-    return VT == MVT::i32 && !AM.HasBaseReg && AM.Scale == 0 &&
+    return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
                  AM.BaseOffs%4 == 0;
   }
   
-  switch (VT.getSimpleVT()) {
-  default:
-    return false;
-  case MVT::i8:
+  switch (Size) {
+  case 1:
     // reg + imm
     if (AM.Scale == 0) {
       return isImmUs(AM.BaseOffs);
     }
+    // reg + reg
     return AM.Scale == 1 && AM.BaseOffs == 0;
-  case MVT::i16:
+  case 2:
+  case 3:
     // reg + imm
     if (AM.Scale == 0) {
       return isImmUs2(AM.BaseOffs);
     }
+    // reg + reg<<1
     return AM.Scale == 2 && AM.BaseOffs == 0;
-  case MVT::i32:
+  default:
     // reg + imm
     if (AM.Scale == 0) {
       return isImmUs4(AM.BaseOffs);
@@ -922,7 +1156,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 
 std::vector<unsigned> XCoreTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  MVT VT) const 
+                                  EVT VT) const 
 {
   if (Constraint.size() != 1)
     return std::vector<unsigned>();
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 753ea819c2bd..ef8555e3da17 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -79,7 +79,8 @@ namespace llvm {
     virtual const char *getTargetNodeName(unsigned Opcode) const;
   
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *MBB) const;
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
     virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
@@ -92,18 +93,31 @@ namespace llvm {
     const XCoreSubtarget &Subtarget;
   
     // Lower Operand helpers
-    SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC);
-    SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode*TheCall,
-                            unsigned CallingConv, SelectionDAG &DAG);
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals);
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
     SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV,
                                     SelectionDAG &DAG);
 
     // Lower Operand specifics
-    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
@@ -116,10 +130,35 @@ namespace llvm {
     // Inline asm support
     std::vector<unsigned>
     getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              MVT VT) const;
+              EVT VT) const;
   
     // Expand specifics
     SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG);
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
   };
 }
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 504d2025edcf..e616fe68e232 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "XCoreGenInstrInfo.inc"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 namespace XCore {
@@ -36,7 +37,7 @@ namespace XCore {
 
 using namespace llvm;
 
-XCoreInstrInfo::XCoreInstrInfo(void)
+XCoreInstrInfo::XCoreInstrInfo()
   : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)),
     RI(*this) {
 }
@@ -115,30 +116,6 @@ XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-/// isInvariantLoad - Return true if the specified instruction (which is marked
-/// mayLoad) is loading from a location whose value is invariant across the
-/// function.  For example, loading a value from the constant pool or from
-/// from the argument area of a function if it does not change.  This should
-/// only return true of *all* loads the instruction does are invariant (if it
-/// does multiple loads).
-bool
-XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const {
-  // Loads from constants pools and loads from invariant argument slots are
-  // invariant
-  int Opcode = MI->getOpcode();
-  if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) {
-    return MI->getOperand(1).isCPI();
-  }
-  int FrameIndex;
-  if (isLoadFromStackSlot(MI, FrameIndex)) {
-    const MachineFrameInfo &MFI =
-      *MI->getParent()->getParent()->getFrameInfo();
-    return MFI.isFixedObjectIndex(FrameIndex) &&
-           MFI.isImmutableObjectIndex(FrameIndex);
-  }
-  return false;
-}
-
 //===----------------------------------------------------------------------===//
 // Branch Analysis
 //===----------------------------------------------------------------------===//
@@ -186,7 +163,7 @@ static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
 static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) 
 {
   switch (CC) {
-  default: assert(0 && "Illegal condition code!");
+  default: llvm_unreachable("Illegal condition code!");
   case XCore::COND_TRUE   : return XCore::BRFT_lru6;
   case XCore::COND_FALSE  : return XCore::BRFF_lru6;
   }
@@ -197,7 +174,7 @@ static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
 static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
 {
   switch (CC) {
-  default: assert(0 && "Illegal condition code!");
+  default: llvm_unreachable("Illegal condition code!");
   case XCore::COND_TRUE   : return XCore::COND_FALSE;
   case XCore::COND_FALSE  : return XCore::COND_TRUE;
   }
@@ -402,14 +379,6 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     .addImm(0);
 }
 
-void XCoreInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                            bool isKill, SmallVectorImpl<MachineOperand> &Addr,
-                            const TargetRegisterClass *RC,
-                            SmallVectorImpl<MachineInstr*> &NewMIs) const
-{
-  assert(0 && "unimplemented\n");
-}
-
 void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I,
                                           unsigned DestReg, int FrameIndex,
@@ -422,14 +391,6 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     .addImm(0);
 }
 
-void XCoreInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const
-{
-  assert(0 && "unimplemented\n");
-}
-
 bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator MI,
                                   const std::vector<CalleeSavedInfo> &CSI) const
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 08708863ad57..24230ac46a13 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
 class XCoreInstrInfo : public TargetInstrInfoImpl {
   const XCoreRegisterInfo RI;
 public:
-  XCoreInstrInfo(void);
+  XCoreInstrInfo();
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
@@ -52,8 +52,6 @@ public:
   virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const;
   
-  virtual bool isInvariantLoad(const MachineInstr *MI) const;
-  
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
                              SmallVectorImpl<MachineOperand> &Cond,
@@ -76,21 +74,11 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC) const;
 
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
 
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
                                 const std::vector<CalleeSavedInfo> &CSI) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 65cd4fe95559..4b9ea7a49178 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -23,18 +23,6 @@
 include "XCoreInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
-// Feature predicates.
-//===----------------------------------------------------------------------===//
-
-// HasXS1A - This predicate is true when the target processor supports XS1A
-// instructions.
-def HasXS1A   : Predicate<"Subtarget.isXS1A()">;
-
-// HasXS1B - This predicate is true when the target processor supports XS1B
-// instructions.
-def HasXS1B : Predicate<"Subtarget.isXS1B()">;
-
-//===----------------------------------------------------------------------===//
 // XCore specific DAG Nodes.
 //
 
@@ -95,6 +83,12 @@ def neg_xform : SDNodeXForm<imm, [{
   return getI32Imm(-value);
 }]>;
 
+def bpwsub_xform : SDNodeXForm<imm, [{
+  // Transformation function: 32-imm
+  uint32_t value = N->getZExtValue();
+  return getI32Imm(32-value);
+}]>;
+
 def div4neg_xform : SDNodeXForm<imm, [{
   // Transformation function: -imm/4
   uint32_t value = N->getZExtValue();
@@ -136,9 +130,6 @@ def immU20 : PatLeaf<(imm), [{
   return (uint32_t)N->getZExtValue() < (1 << 20);
 }]>;
 
-// FIXME check subtarget. Currently we check if the immediate
-// is in the common subset of legal immediate values for both
-// XS1A and XS1B.
 def immMskBitp : PatLeaf<(imm), [{
   uint32_t value = (uint32_t)N->getZExtValue();
   if (!isMask_32(value)) {
@@ -151,9 +142,6 @@ def immMskBitp : PatLeaf<(imm), [{
           || msksize == 32;
 }]>;
 
-// FIXME check subtarget. Currently we check if the immediate
-// is in the common subset of legal immediate values for both
-// XS1A and XS1B.
 def immBitp : PatLeaf<(imm), [{
   uint32_t value = (uint32_t)N->getZExtValue();
   return (value >= 1 && value <= 8)
@@ -162,6 +150,14 @@ def immBitp : PatLeaf<(imm), [{
           || value == 32;
 }]>;
 
+def immBpwSubBitp : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (value >= 24 && value <= 31)
+          || value == 16
+          || value == 8
+          || value == 0;
+}]>;
+
 def lda16f : PatFrag<(ops node:$addr, node:$offset),
                      (add node:$addr, (shl node:$offset, 1))>;
 def lda16b : PatFrag<(ops node:$addr, node:$offset),
@@ -469,7 +465,7 @@ def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
 }
 
 // Four operand long
-let Predicates = [HasXS1B], Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
 def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
                       GRRegs:$src4),
@@ -485,7 +481,6 @@ def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
 
 // Five operand long
 
-let Predicates = [HasXS1B] in {
 def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
                     "ladd $dst1, $dst2, $src1, $src2, $src3",
@@ -500,7 +495,6 @@ def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
                     "ldiv $dst1, $dst2, $src1, $src2, $src3",
                     []>;
-}
 
 // Six operand long
 
@@ -510,13 +504,6 @@ def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
                     "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
                     []>;
 
-let Predicates = [HasXS1A] in
-def MACC_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
-                      GRRegs:$src4),
-                    "macc $dst1, $dst2, $src1, $src2, $src3, $src4",
-                    []>;
-
 // Register - U6
 
 //let Uses = [DP] in ...
@@ -664,13 +651,12 @@ def BRFU_lu6 : _FLU6<
 }
 
 //let Uses = [CP] in ...
-let Predicates = [HasXS1B], Defs = [R11], neverHasSideEffects = 1,
-  isReMaterializable = 1 in
+let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
 def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
                     "ldaw r11, cp[$a]",
                     []>;
 
-let Predicates = [HasXS1B], Defs = [R11], isReMaterializable = 1 in
+let Defs = [R11], isReMaterializable = 1 in
 def LDAWCP_lu6: _FLRU6<
                     (outs), (ins MEMii:$a),
                     "ldaw r11, cp[$a]",
@@ -821,7 +807,7 @@ def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
           (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
 def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
 
-def : Pat<(zextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
           (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
 def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
 
@@ -989,3 +975,21 @@ def : Pat<(mul GRRegs:$src, -3),
 def : Pat<(sra GRRegs:$src, 31),
           (ASHR_l2rus GRRegs:$src, 32)>;
 
+def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst),
+          (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+// setge X, 0 is canonicalized to setgt X, -1
+def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst),
+          (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(setgt GRRegs:$lhs, -1),
+          (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>;
+
+def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm),
+          (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>;
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp
new file mode 100644
index 000000000000..dffdda9a1fd0
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp
@@ -0,0 +1,31 @@
+//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+using namespace llvm;
+
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) {
+  SupportsDebugInformation = true;
+  Data16bitsDirective = "\t.short\t";
+  Data32bitsDirective = "\t.long\t";
+  Data64bitsDirective = 0;
+  ZeroDirective = "\t.space\t";
+  CommentString = "#";
+    
+  PrivateGlobalPrefix = ".L";
+  AscizDirective = ".asciiz";
+  WeakDefDirective = "\t.weak\t";
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+
+  // Debug
+  HasLEB128 = true;
+  AbsoluteDebugSectionOffsets = true;
+}
+
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/XCoreMCAsmInfo.h
new file mode 100644
index 000000000000..01f8e481a949
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+  class XCoreMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 82cd92d5685c..136a035cb1f2 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -30,6 +30,8 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -142,9 +144,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       
       if (!isU6 && !isImmU16(Amount)) {
         // FIX could emit multiple instructions in this case.
-        cerr << "eliminateCallFramePseudoInstr size too big: "
-             << Amount << "\n";
-        abort();
+#ifndef NDEBUG
+        errs() << "eliminateCallFramePseudoInstr size too big: "
+               << Amount << "\n";
+#endif
+        llvm_unreachable(0);
       }
 
       MachineInstr *New;
@@ -167,8 +171,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, RegScavenger *RS) const {
+unsigned
+XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, int *Value,
+                                       RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
   MachineInstr &MI = *II;
   DebugLoc dl = MI.getDebugLoc();
@@ -187,12 +193,13 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int StackSize = MF.getFrameInfo()->getStackSize();
 
   #ifndef NDEBUG
-  DOUT << "\nFunction         : " << MF.getFunction()->getName() << "\n";
-  DOUT << "<--------->\n";
-  MI.print(DOUT);
-  DOUT << "FrameIndex         : " << FrameIndex << "\n";
-  DOUT << "FrameOffset        : " << Offset << "\n";
-  DOUT << "StackSize          : " << StackSize << "\n";
+  DEBUG(errs() << "\nFunction         : " 
+        << MF.getFunction()->getName() << "\n");
+  DEBUG(errs() << "<--------->\n");
+  DEBUG(MI.print(errs()));
+  DEBUG(errs() << "FrameIndex         : " << FrameIndex << "\n");
+  DEBUG(errs() << "FrameOffset        : " << Offset << "\n");
+  DEBUG(errs() << "StackSize          : " << StackSize << "\n");
   #endif
 
   Offset += StackSize;
@@ -203,10 +210,7 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   
   assert(Offset%4 == 0 && "Misaligned stack offset");
 
-  #ifndef NDEBUG
-  DOUT << "Offset             : " << Offset << "\n";
-  DOUT << "<--------->\n";
-  #endif
+  DEBUG(errs() << "Offset             : " << Offset << "\n" << "<--------->\n");
   
   Offset/=4;
   
@@ -224,63 +228,65 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     bool isUs = isImmUs(Offset);
     unsigned FramePtr = XCore::R10;
     
-    MachineInstr *New = 0;
     if (!isUs) {
       if (!RS) {
-        cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n";
-        abort();
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "eliminateFrameIndex Frame size too big: " << Offset;
+        llvm_report_error(Msg.str());
       }
       unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
                                                  SPAdj);
       loadConstant(MBB, II, ScratchReg, Offset, dl);
       switch (MI.getOpcode()) {
       case XCore::LDWFI:
-        New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+        BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
               .addReg(FramePtr)
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::STWFI:
-        New = BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
               .addReg(Reg, getKillRegState(isKill))
               .addReg(FramePtr)
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::LDAWFI:
-        New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+        BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
               .addReg(FramePtr)
               .addReg(ScratchReg, RegState::Kill);
         break;
       default:
-        assert(0 && "Unexpected Opcode\n");
+        llvm_unreachable("Unexpected Opcode");
       }
     } else {
       switch (MI.getOpcode()) {
       case XCore::LDWFI:
-        New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+        BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
               .addReg(FramePtr)
               .addImm(Offset);
         break;
       case XCore::STWFI:
-        New = BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
               .addReg(Reg, getKillRegState(isKill))
               .addReg(FramePtr)
               .addImm(Offset);
         break;
       case XCore::LDAWFI:
-        New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+        BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
               .addReg(FramePtr)
               .addImm(Offset);
         break;
       default:
-        assert(0 && "Unexpected Opcode\n");
+        llvm_unreachable("Unexpected Opcode");
       }
     }
   } else {
     bool isU6 = isImmU6(Offset);
     if (!isU6 && !isImmU16(Offset)) {
-      // FIXME could make this work for LDWSP, LDAWSP.
-      cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "eliminateFrameIndex Frame size too big: " << Offset;
+      llvm_report_error(Msg.str());
     }
 
     switch (MI.getOpcode()) {
@@ -302,11 +308,12 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
             .addImm(Offset);
       break;
     default:
-      assert(0 && "Unexpected Opcode\n");
+      llvm_unreachable("Unexpected Opcode");
     }
   }
   // Erase old instruction.
   MBB.erase(II);
+  return 0;
 }
 
 void
@@ -354,8 +361,10 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   // TODO use mkmsk if possible.
   if (!isImmU16(Value)) {
     // TODO use constant pool.
-    cerr << "loadConstant value too big " << Value << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "loadConstant value too big " << Value;
+    llvm_report_error(Msg.str());
   }
   int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
@@ -368,8 +377,10 @@ storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   Offset/=4;
   bool isU6 = isImmU6(Offset);
   if (!isU6 && !isImmU16(Offset)) {
-    cerr << "storeToStack offset too big " << Offset << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "storeToStack offset too big " << Offset;
+    llvm_report_error(Msg.str());
   }
   int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode))
@@ -384,8 +395,10 @@ loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   Offset/=4;
   bool isU6 = isImmU6(Offset);
   if (!isU6 && !isImmU16(Offset)) {
-    cerr << "loadFromStack offset too big " << Offset << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "loadFromStack offset too big " << Offset;
+    llvm_report_error(Msg.str());
   }
   int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
@@ -414,8 +427,10 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   if (!isU6 && !isImmU16(FrameSize)) {
     // FIXME could emit multiple instructions.
-    cerr << "emitPrologue Frame size too big: " << FrameSize << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "emitPrologue Frame size too big: " << FrameSize;
+    llvm_report_error(Msg.str());
   }
   bool emitFrameMoves = needsFrameMoves(MF);
 
@@ -538,8 +553,10 @@ void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
 
   if (!isU6 && !isImmU16(FrameSize)) {
     // FIXME could emit multiple instructions.
-    cerr << "emitEpilogue Frame size too big: " << FrameSize << "\n";
-    abort();
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "emitEpilogue Frame size too big: " << FrameSize;
+    llvm_report_error(Msg.str());
   }
 
   if (FrameSize) {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 00b7caa96bc6..a7df5102f201 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -57,8 +57,9 @@ public:
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
                            
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                 RegScavenger *RS = NULL) const;
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index dc53da4ddf0b..78a6fa5b2edb 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -13,16 +13,8 @@
 
 #include "XCoreSubtarget.h"
 #include "XCore.h"
-#include "XCoreGenSubtarget.inc"
 using namespace llvm;
 
-XCoreSubtarget::XCoreSubtarget(const TargetMachine &TM, const Module &M, 
-                             const std::string &FS)
-  : IsXS1A(false),
-    IsXS1B(false)
+XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS)
 {
-  std::string CPU = "xs1a-generic";
-
-  // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
 }
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index ff6475baa810..f8be3ec86189 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -20,21 +20,14 @@
 #include <string>
 
 namespace llvm {
-class Module;
 
 class XCoreSubtarget : public TargetSubtarget {
-  bool IsXS1A;
-  bool IsXS1B;
 
 public:
   /// This constructor initializes the data members to match that
-  /// of the specified module.
+  /// of the specified triple.
   ///
-  XCoreSubtarget(const TargetMachine &TM, const Module &M, 
-                const std::string &FS);
-
-  bool isXS1A() const { return IsXS1A; }
-  bool isXS1B() const { return IsXS1B; }
+  XCoreSubtarget(const std::string &TT, const std::string &FS);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index b72225f23b75..75f2055ebf9f 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,38 +10,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCoreTargetAsmInfo.h"
+#include "XCoreMCAsmInfo.h"
 #include "XCoreTargetMachine.h"
 #include "XCore.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-/// XCoreTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int XCoreTargetMachineModule;
-int XCoreTargetMachineModule = 0;
-
-namespace {
-  // Register the target.
-  RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeXCoreTarget() { }
-
-const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
-  return new XCoreTargetAsmInfo(*this);
-}
-
 /// XCoreTargetMachine ctor - Create an ILP32 architecture model
 ///
-XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS)
-  : Subtarget(*this, M, FS),
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
                "i16:16:32-i32:32:32-i64:32:32"),
     InstrInfo(),
@@ -49,26 +31,14 @@ XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS)
     TLInfo(*this) {
 }
 
-unsigned XCoreTargetMachine::getModuleMatchQuality(const Module &M) {
-  std::string TT = M.getTargetTriple();
-  if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "xcore-")
-    return 20;
-  
-  // Otherwise we don't match.
-  return 0;
-}
-
 bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
                                          CodeGenOpt::Level OptLevel) {
   PM.add(createXCoreISelDag(*this));
   return false;
 }
 
-bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool Verbose,
-                                            raw_ostream &Out) {
-  // Output assembly language.
-  PM.add(createXCoreCodePrinterPass(Out, *this, Verbose));
-  return false;
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() {
+  RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+  RegisterAsmInfo<XCoreMCAsmInfo> Y(TheXCoreTarget);
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 2385aedc9079..b0b1464dbe0c 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -23,20 +23,15 @@
 
 namespace llvm {
 
-class Module;
-
 class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreSubtarget Subtarget;
   const TargetData DataLayout;       // Calculates type size & alignment
   XCoreInstrInfo InstrInfo;
   XCoreFrameInfo FrameInfo;
   XCoreTargetLowering TLInfo;
-
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
 public:
-  XCoreTargetMachine(const Module &M, const std::string &FS);
+  XCoreTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
 
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
@@ -49,13 +44,9 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-  static unsigned getModuleMatchQuality(const Module &M);
 
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addAssemblyEmitter(PassManagerBase &PM,
-                                  CodeGenOpt::Level OptLevel, 
-                                  bool Verbose, raw_ostream &Out);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
new file mode 100644
index 000000000000..7de3b55d38f6
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -0,0 +1,67 @@
+//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetObjectFile.h"
+#include "XCoreSubtarget.h"
+#include "MCSectionXCore.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  DataSection =
+    MCSectionXCore::Create(".dp.data", MCSectionELF::SHT_PROGBITS, 
+                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
+                           MCSectionXCore::SHF_DP_SECTION,
+                           SectionKind::getDataRel(), false, getContext());
+  BSSSection =
+    MCSectionXCore::Create(".dp.bss", MCSectionELF::SHT_NOBITS,
+                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
+                           MCSectionXCore::SHF_DP_SECTION,
+                           SectionKind::getBSS(), false, getContext());
+  
+  MergeableConst4Section = 
+    MCSectionXCore::Create(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                           MCSectionXCore::SHF_CP_SECTION,
+                           SectionKind::getMergeableConst4(), false,
+                           getContext());
+  MergeableConst8Section = 
+    MCSectionXCore::Create(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                           MCSectionXCore::SHF_CP_SECTION,
+                           SectionKind::getMergeableConst8(), false,
+                           getContext());
+  MergeableConst16Section = 
+    MCSectionXCore::Create(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                           MCSectionXCore::SHF_CP_SECTION,
+                           SectionKind::getMergeableConst16(), false,
+                           getContext());
+  
+  // TLS globals are lowered in the backend to arrays indexed by the current
+  // thread id. After lowering they require no special handling by the linker
+  // and can be placed in the standard data / bss sections.
+  TLSDataSection = DataSection;
+  TLSBSSSection = BSSSection;
+
+  ReadOnlySection = 
+    MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS,
+                           MCSectionELF::SHF_ALLOC |
+                           MCSectionXCore::SHF_CP_SECTION,
+                           SectionKind::getReadOnlyWithRel(), false,
+                           getContext());
+
+  // Dynamic linking is not supported. Data with relocations is placed in the
+  // same section as data without relocations.
+  DataRelSection = DataRelLocalSection = DataSection;
+  DataRelROSection = DataRelROLocalSection = ReadOnlySection;
+}
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
new file mode 100644
index 000000000000..7efb990b79cf
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -0,0 +1,26 @@
+//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+  class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
+  public:
+    
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    // TODO: Classify globals as xcore wishes.
+  };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index d07f6135257f..8000d0d2ff4a 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -32,7 +32,7 @@ namespace {
       HelloCounter++;
       std::string fname = F.getName();
       EscapeString(fname);
-      cerr << "Hello: " << fname << "\n";
+      errs() << "Hello: " << fname << "\n";
       return false;
     }
   };
@@ -51,7 +51,7 @@ namespace {
       HelloCounter++;
       std::string fname = F.getName();
       EscapeString(fname);
-      cerr << "Hello: " << fname << "\n";
+      errs() << "Hello: " << fname << "\n";
       return false;
     }
 
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index a61263401618..5b91f3d20992 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -36,16 +36,18 @@
 #include "llvm/Module.h"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Compiler.h"
 #include <set>
 using namespace llvm;
 
@@ -60,11 +62,10 @@ namespace {
   struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetData>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
       : CallGraphSCCPass(&ID), maxElements(maxElements) {}
@@ -73,11 +74,11 @@ namespace {
     typedef std::vector<uint64_t> IndicesVector;
 
   private:
-    bool PromoteArguments(CallGraphNode *CGN);
+    CallGraphNode *PromoteArguments(CallGraphNode *CGN);
     bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
-    Function *DoPromotion(Function *F,
-                          SmallPtrSet<Argument*, 8> &ArgsToPromote,
-                          SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+    CallGraphNode *DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
+                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
     /// The maximum number of elements to expand, or 0 for unlimited.
     unsigned maxElements;
   };
@@ -91,14 +92,17 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
   return new ArgPromotion(maxElements);
 }
 
-bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   bool Changed = false, LocalChange;
 
   do {  // Iterate until we stop promoting from this SCC.
     LocalChange = false;
     // Attempt to promote arguments from all functions in this SCC.
     for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-      LocalChange |= PromoteArguments(SCC[i]);
+      if (CallGraphNode *CGN = PromoteArguments(SCC[i])) {
+        LocalChange = true;
+        SCC[i] = CGN;
+      }
     Changed |= LocalChange;               // Remember that we changed something.
   } while (LocalChange);
 
@@ -110,11 +114,11 @@ bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
 /// example, all callers are direct).  If safe to promote some arguments, it
 /// calls the DoPromotion method.
 ///
-bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   Function *F = CGN->getFunction();
 
   // Make sure that it is local to this module.
-  if (!F || !F->hasLocalLinkage()) return false;
+  if (!F || !F->hasLocalLinkage()) return 0;
 
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
@@ -123,12 +127,12 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
        I != E; ++I, ++ArgNo)
     if (isa<PointerType>(I->getType()))
       PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
-  if (PointerArgs.empty()) return false;
+  if (PointerArgs.empty()) return 0;
 
   // Second check: make sure that all callers are direct callers.  We can't
   // transform functions that have indirect callers.
   if (F->hasAddressTaken())
-    return false;
+    return 0;
 
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
@@ -144,9 +148,9 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
       const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
       if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (maxElements > 0 && STy->getNumElements() > maxElements) {
-          DOUT << "argpromotion disable promoting argument '"
-               << PtrArg->getName() << "' because it would require adding more "
-               << "than " << maxElements << " arguments to the function.\n";
+          DEBUG(errs() << "argpromotion disable promoting argument '"
+                << PtrArg->getName() << "' because it would require adding more"
+                << " than " << maxElements << " arguments to the function.\n");
         } else {
           // If all the elements are single-value types, we can promote it.
           bool AllSimple = true;
@@ -173,13 +177,10 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   }
 
   // No promotable pointer arguments.
-  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false;
-
-  Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
+    return 0;
 
-  // Update the call graph to know that the function has been transformed.
-  getAnalysis<CallGraph>().changeFunction(F, NewF);
-  return true;
+  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
 }
 
 /// IsAlwaysValidPointer - Return true if the specified pointer is always legal
@@ -409,9 +410,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
     // to do.
     if (ToPromote.find(Operands) == ToPromote.end()) {
       if (maxElements > 0 && ToPromote.size() == maxElements) {
-        DOUT << "argpromotion not promoting argument '"
-             << Arg->getName() << "' because it would require adding more "
-             << "than " << maxElements << " arguments to the function.\n";
+        DEBUG(errs() << "argpromotion not promoting argument '"
+              << Arg->getName() << "' because it would require adding more "
+              << "than " << maxElements << " arguments to the function.\n");
         // We limit aggregate promotion to only promoting up to a fixed number
         // of elements of the aggregate.
         return false;
@@ -432,7 +433,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
   SmallPtrSet<BasicBlock*, 16> TranspBlocks;
 
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  TargetData &TD = getAnalysis<TargetData>();
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false; // Without TargetData, assume the worst.
 
   for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
     // Check to see if the load is invalidated from the start of the block to
@@ -442,7 +444,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
 
     const PointerType *LoadTy =
       cast<PointerType>(Load->getPointerOperand()->getType());
-    unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType());
+    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
 
     if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
       return false;  // Pointer is invalidated!
@@ -467,8 +469,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
 /// DoPromotion - This method actually performs the promotion of the specified
 /// arguments, and returns the new function.  At this point, we know that it's
 /// safe to do so.
-Function *ArgPromotion::DoPromotion(Function *F,
-                                    SmallPtrSet<Argument*, 8> &ArgsToPromote,
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
 
   // Start by computing a new prototype for the function, which is the same as
@@ -581,19 +583,24 @@ Function *ArgPromotion::DoPromotion(Function *F,
   bool ExtraArgHack = false;
   if (Params.empty() && FTy->isVarArg()) {
     ExtraArgHack = true;
-    Params.push_back(Type::Int32Ty);
+    Params.push_back(Type::getInt32Ty(F->getContext()));
   }
 
   // Construct the new function type using the new arguments.
   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
 
-  // Create the new function body and insert it into the module...
+  // Create the new function body and insert it into the module.
   Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
   NF->copyAttributesFrom(F);
 
+  
+  DEBUG(errs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
+        << "From: " << *F);
+  
   // Recompute the parameter attributes list based on the new arguments for
   // the function.
-  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
+  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                     AttributesVec.end()));
   AttributesVec.clear();
 
   F->getParent()->getFunctionList().insert(F, NF);
@@ -606,6 +613,10 @@ Function *ArgPromotion::DoPromotion(Function *F,
   // Get the callgraph information that we need to update to reflect our
   // changes.
   CallGraph &CG = getAnalysis<CallGraph>();
+  
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+  
 
   // Loop over all of the callers of the function, transforming the call sites
   // to pass in the loaded pointers.
@@ -636,9 +647,10 @@ Function *ArgPromotion::DoPromotion(Function *F,
         // Emit a GEP and load for each element of the struct.
         const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
         const StructType *STy = cast<StructType>(AgTy);
-        Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 };
+        Value *Idxs[2] = {
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          Idxs[1] = ConstantInt::get(Type::Int32Ty, i);
+          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
           Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
                                                  (*AI)->getName()+"."+utostr(i),
                                                  Call);
@@ -662,7 +674,9 @@ Function *ArgPromotion::DoPromotion(Function *F,
                  IE = SI->end(); II != IE; ++II) {
               // Use i32 to index structs, and i64 for others (pointers/arrays).
               // This satisfies GEP constraints.
-              const Type *IdxTy = (isa<StructType>(ElTy) ? Type::Int32Ty : Type::Int64Ty);
+              const Type *IdxTy = (isa<StructType>(ElTy) ?
+                    Type::getInt32Ty(F->getContext()) : 
+                    Type::getInt64Ty(F->getContext()));
               Ops.push_back(ConstantInt::get(IdxTy, *II));
               // Keep track of the type we're currently indexing
               ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
@@ -679,7 +693,7 @@ Function *ArgPromotion::DoPromotion(Function *F,
       }
 
     if (ExtraArgHack)
-      Args.push_back(Constant::getNullValue(Type::Int32Ty));
+      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
 
     // Push any varargs arguments on the list
     for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
@@ -715,7 +729,8 @@ Function *ArgPromotion::DoPromotion(Function *F,
     AA.replaceWithNewValue(Call, New);
 
     // Update the callgraph to know that the callsite has been transformed.
-    CG[Call->getParent()->getParent()]->replaceCallSite(Call, New);
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
 
     if (!Call->use_empty()) {
       Call->replaceAllUsesWith(New);
@@ -756,14 +771,16 @@ Function *ArgPromotion::DoPromotion(Function *F,
       const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
       Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
       const StructType *STy = cast<StructType>(AgTy);
-      Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 };
+      Value *Idxs[2] = {
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
 
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-        Idxs[1] = ConstantInt::get(Type::Int32Ty, i);
-        std::string Name = TheAlloca->getName()+"."+utostr(i);
-        Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
-                                               Name, InsertPt);
-        I2->setName(I->getName()+"."+utostr(i));
+        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+        Value *Idx = 
+          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+                                    TheAlloca->getName()+"."+Twine(i), 
+                                    InsertPt);
+        I2->setName(I->getName()+"."+Twine(i));
         new StoreInst(I2++, Idx, InsertPt);
       }
 
@@ -792,8 +809,8 @@ Function *ArgPromotion::DoPromotion(Function *F,
         LI->replaceAllUsesWith(I2);
         AA.replaceWithNewValue(LI, I2);
         LI->eraseFromParent();
-        DOUT << "*** Promoted load of argument '" << I->getName()
-             << "' in function '" << F->getName() << "'\n";
+        DEBUG(errs() << "*** Promoted load of argument '" << I->getName()
+              << "' in function '" << F->getName() << "'\n");
       } else {
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
         IndicesVector Operands;
@@ -819,8 +836,8 @@ Function *ArgPromotion::DoPromotion(Function *F,
         NewName += ".val";
         TheArg->setName(NewName);
 
-        DOUT << "*** Promoted agg argument '" << TheArg->getName()
-             << "' of function '" << NF->getName() << "'\n";
+        DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName()
+              << "' of function '" << NF->getName() << "'\n");
 
         // All of the uses must be load instructions.  Replace them all with
         // the argument specified by ArgNo.
@@ -842,13 +859,18 @@ Function *ArgPromotion::DoPromotion(Function *F,
 
   // Notify the alias analysis implementation that we inserted a new argument.
   if (ExtraArgHack)
-    AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin());
+    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
+                 NF->arg_begin());
 
 
   // Tell the alias analysis that the old function is about to disappear.
   AA.replaceWithNewValue(F, NF);
 
+  
+  NF_CGN->stealCalledFunctionsFrom(CG[F]);
+  
   // Now that the old function is dead, delete it.
-  F->eraseFromParent();
-  return NF;
+  delete CG.removeFunctionFromModule(F);
+  
+  return NF_CGN;
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 1438b4879d2b..ec0f1e193ad6 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -1,18 +1,19 @@
 add_llvm_library(LLVMipo
-  FunctionAttrs.cpp
   ArgumentPromotion.cpp
   ConstantMerge.cpp
   DeadArgumentElimination.cpp
   DeadTypeElimination.cpp
   ExtractGV.cpp
+  FunctionAttrs.cpp
   GlobalDCE.cpp
   GlobalOpt.cpp
+  IPConstantPropagation.cpp
+  IPO.cpp
   IndMemRemoval.cpp
   InlineAlways.cpp
-  Inliner.cpp
   InlineSimple.cpp
+  Inliner.cpp
   Internalize.cpp
-  IPConstantPropagation.cpp
   LoopExtractor.cpp
   LowerSetJmp.cpp
   MergeFunctions.cpp
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 237e6db1d335..c1a1045005b7 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -78,7 +78,7 @@ bool ConstantMerge::runOnModule(Module &M) {
       }
       
       // Only process constants with initializers.
-      if (GV->isConstant() && GV->hasInitializer()) {
+      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
         Constant *Init = GV->getInitializer();
 
         // Check to see if the initializer is already known.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index e480dadca891..79a32f02aace 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -24,10 +24,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -72,7 +74,7 @@ namespace {
 
       std::string getDescription() const {
         return std::string((IsArg ? "Argument #" : "Return value #")) 
-               + utostr(Idx) + " of function " + F->getName();
+               + utostr(Idx) + " of function " + F->getNameStr();
       }
     };
 
@@ -195,8 +197,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but doesn't have isVarArg set.
   const FunctionType *FTy = Fn.getFunctionType();
+  
   std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
-  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+                                                Params, false);
   unsigned NumArgs = Params.size();
 
   // Create the new function body and insert it into the module...
@@ -277,7 +281,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
 /// for void functions and 1 for functions not returning a struct. It returns
 /// the number of struct elements for functions returning a struct.
 static unsigned NumRetVals(const Function *F) {
-  if (F->getReturnType() == Type::VoidTy)
+  if (F->getReturnType() == Type::getVoidTy(F->getContext()))
     return 0;
   else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
     return STy->getNumElements();
@@ -422,7 +426,7 @@ void DAE::SurveyFunction(Function &F) {
     return;
   }
 
-  DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n";
+  DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
   // Keep track of the number of live retvals, so we can skip checks once all
   // of them turn out to be live.
   unsigned NumLiveRetVals = 0;
@@ -485,7 +489,7 @@ void DAE::SurveyFunction(Function &F) {
   for (unsigned i = 0; i != RetCount; ++i)
     MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
 
-  DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n";
+  DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
 
   // Now, check all of our arguments.
   unsigned i = 0;
@@ -527,7 +531,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
 /// mark any values that are used as this function's parameters or by its return
 /// values (according to Uses) live as well.
 void DAE::MarkLive(const Function &F) {
-    DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n";
+  DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
     // Mark the function as live.
     LiveFunctions.insert(&F);
     // Mark all arguments as live.
@@ -548,7 +552,7 @@ void DAE::MarkLive(const RetOrArg &RA) {
   if (!LiveValues.insert(RA).second)
     return; // We were already marked Live.
 
-  DOUT << "DAE - Marking " << RA.getDescription() << " live\n";
+  DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n");
   PropagateLiveness(RA);
 }
 
@@ -596,11 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   const Type *RetTy = FTy->getReturnType();
   const Type *NRetTy = NULL;
   unsigned RetCount = NumRetVals(F);
+  
   // -1 means unused, other numbers are the new index
   SmallVector<int, 5> NewRetIdxs(RetCount, -1);
   std::vector<const Type*> RetTypes;
-  if (RetTy == Type::VoidTy) {
-    NRetTy = Type::VoidTy;
+  if (RetTy == Type::getVoidTy(F->getContext())) {
+    NRetTy = Type::getVoidTy(F->getContext());
   } else {
     const StructType *STy = dyn_cast<StructType>(RetTy);
     if (STy)
@@ -612,8 +617,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           NewRetIdxs[i] = RetTypes.size() - 1;
         } else {
           ++NumRetValsEliminated;
-          DOUT << "DAE - Removing return value " << i << " from "
-               << F->getNameStart() << "\n";
+          DEBUG(errs() << "DAE - Removing return value " << i << " from "
+                << F->getName() << "\n");
         }
       }
     else
@@ -622,8 +627,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         RetTypes.push_back(RetTy);
         NewRetIdxs[0] = 0;
       } else {
-        DOUT << "DAE - Removing return value from " << F->getNameStart()
-             << "\n";
+        DEBUG(errs() << "DAE - Removing return value from " << F->getName()
+              << "\n");
         ++NumRetValsEliminated;
       }
     if (RetTypes.size() > 1)
@@ -633,14 +638,14 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       // something and {} into void.
       // Make the new struct packed if we used to return a packed struct
       // already.
-      NRetTy = StructType::get(RetTypes, STy->isPacked());
+      NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
     else if (RetTypes.size() == 1)
       // One return type? Just a simple value then, but only if we didn't use to
       // return a struct with that simple value before.
       NRetTy = RetTypes.front();
     else if (RetTypes.size() == 0)
       // No return types? Make it void, but only if we didn't use to return {}.
-      NRetTy = Type::VoidTy;
+      NRetTy = Type::getVoidTy(F->getContext());
   }
 
   assert(NRetTy && "No new return type found?");
@@ -649,7 +654,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // values. Otherwise, ensure that we don't have any conflicting attributes
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
-  if (NRetTy == Type::VoidTy)
+  if (NRetTy == Type::getVoidTy(F->getContext()))
     RAttrs &= ~Attribute::typeIncompatible(NRetTy);
   else
     assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 
@@ -677,8 +682,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
     } else {
       ++NumArgumentsEliminated;
-      DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart()
-           << ") from " << F->getNameStart() << "\n";
+      DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName()
+            << ") from " << F->getName() << "\n");
     }
   }
 
@@ -697,11 +702,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   bool ExtraArgHack = false;
   if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) {
     ExtraArgHack = true;
-    Params.push_back(Type::Int32Ty);
+    Params.push_back(Type::getInt32Ty(F->getContext()));
   }
 
   // Create the new function type based on the recomputed parameters.
-  FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+  FunctionType *NFTy = FunctionType::get(NRetTy, Params,
+                                                FTy->isVarArg());
 
   // No change?
   if (NFTy == FTy)
@@ -750,7 +756,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       }
 
     if (ExtraArgHack)
-      Args.push_back(UndefValue::get(Type::Int32Ty));
+      Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext())));
 
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
@@ -786,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         // Return type not changed? Just replace users then.
         Call->replaceAllUsesWith(New);
         New->takeName(Call);
-      } else if (New->getType() == Type::VoidTy) {
+      } else if (New->getType() == Type::getVoidTy(F->getContext())) {
         // Our return value has uses, but they will get removed later on.
         // Replace by null for now.
         Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
@@ -806,7 +812,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         // extract/insertvalue chaining and let instcombine clean that up.
         //
         // Start out building up our return value from undef
-        Value *RetVal = llvm::UndefValue::get(RetTy);
+        Value *RetVal = UndefValue::get(RetTy);
         for (unsigned i = 0; i != RetCount; ++i)
           if (NewRetIdxs[i] != -1) {
             Value *V;
@@ -862,7 +868,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
         Value *RetVal;
 
-        if (NFTy->getReturnType() == Type::VoidTy) {
+        if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
           RetVal = 0;
         } else {
           assert (isa<StructType>(RetTy));
@@ -873,7 +879,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           // clean that up.
           Value *OldRet = RI->getOperand(0);
           // Start out building up our return value from undef
-          RetVal = llvm::UndefValue::get(NRetTy);
+          RetVal = UndefValue::get(NRetTy);
           for (unsigned i = 0; i != RetCount; ++i)
             if (NewRetIdxs[i] != -1) {
               ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
@@ -893,7 +899,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         }
         // Replace the return instruction with one returning the new return
         // value (possibly 0 if we became void).
-        ReturnInst::Create(RetVal, RI);
+        ReturnInst::Create(F->getContext(), RetVal, RI);
         BB->getInstList().erase(RI);
       }
 
@@ -910,7 +916,7 @@ bool DAE::runOnModule(Module &M) {
   // removed.  We can do this if they never call va_start.  This loop cannot be
   // fused with the next loop, because deleting a function invalidates
   // information computed while surveying other functions.
-  DOUT << "DAE - Deleting dead varargs\n";
+  DEBUG(errs() << "DAE - Deleting dead varargs\n");
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
     Function &F = *I++;
     if (F.getFunctionType()->isVarArg())
@@ -921,7 +927,7 @@ bool DAE::runOnModule(Module &M) {
   // We assume all arguments are dead unless proven otherwise (allowing us to
   // determine that dead arguments passed into recursive functions are dead).
   //
-  DOUT << "DAE - Determining liveness\n";
+  DEBUG(errs() << "DAE - Determining liveness\n");
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     SurveyFunction(*I);
   
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 0c529d239d98..191100c2e241 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Constants.h"
@@ -43,6 +44,7 @@ namespace {
         return false;  // Nothing to extract
       }
       
+      
       if (deleteStuff)
         return deleteGV();
       M.setModuleInlineAsm("");
@@ -99,7 +101,8 @@ namespace {
       // by putting them in the used array
       {
         std::vector<Constant *> AUGs;
-        const Type *SBP= PointerType::getUnqual(Type::Int8Ty);
+        const Type *SBP=
+              Type::getInt8PtrTy(M.getContext());
         for (std::vector<GlobalValue*>::iterator GI = Named.begin(), 
                GE = Named.end(); GI != GE; ++GI) {
           (*GI)->setLinkage(GlobalValue::ExternalLinkage);
@@ -107,9 +110,9 @@ namespace {
         }
         ArrayType *AT = ArrayType::get(SBP, AUGs.size());
         Constant *Init = ConstantArray::get(AT, AUGs);
-        GlobalValue *gv = new GlobalVariable(AT, false, 
+        GlobalValue *gv = new GlobalVariable(M, AT, false, 
                                              GlobalValue::AppendingLinkage, 
-                                             Init, "llvm.used", &M);
+                                             Init, "llvm.used");
         gv->setSection("llvm.metadata");
       }
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index e8315247b23c..7edaa7fbef5e 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
@@ -44,7 +45,7 @@ namespace {
     FunctionAttrs() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
 
     // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
     bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC);
@@ -54,7 +55,7 @@ namespace {
 
     // IsFunctionMallocLike - Does this function allocate new memory?
     bool IsFunctionMallocLike(Function *F,
-                              SmallPtrSet<CallGraphNode*, 8> &) const;
+                              SmallPtrSet<Function*, 8> &) const;
 
     // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
     bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC);
@@ -93,13 +94,12 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) {
 
 /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
 bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
-  SmallPtrSet<CallGraphNode*, 8> SCCNodes;
-  CallGraph &CG = getAnalysis<CallGraph>();
+  SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
   for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]);
+    SCCNodes.insert(SCC[i]->getFunction());
 
   // Check if any of the functions in the SCC read or write memory.  If they
   // write memory then they can't be marked readnone or readonly.
@@ -133,9 +133,9 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
       // Some instructions can be ignored even if they read or write memory.
       // Detect these now, skipping to the next instruction if one is found.
       CallSite CS = CallSite::get(I);
-      if (CS.getInstruction()) {
+      if (CS.getInstruction() && CS.getCalledFunction()) {
         // Ignore calls to functions in the same SCC.
-        if (SCCNodes.count(CG[CS.getCalledFunction()]))
+        if (SCCNodes.count(CS.getCalledFunction()))
           continue;
       } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         // Ignore loads from local memory.
@@ -154,7 +154,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
         return false;
 
       if (isa<MallocInst>(I))
-        // MallocInst claims not to write memory!  PR3754.
+        // malloc claims not to write memory!  PR3754.
         return false;
 
       // If this instruction may read memory, remember that.
@@ -226,9 +226,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
 /// IsFunctionMallocLike - A function is malloc-like if it returns either null
 /// or a pointer that doesn't alias any other pointer visible to the caller.
 bool FunctionAttrs::IsFunctionMallocLike(Function *F,
-                              SmallPtrSet<CallGraphNode*, 8> &SCCNodes) const {
-  CallGraph &CG = getAnalysis<CallGraph>();
-
+                              SmallPtrSet<Function*, 8> &SCCNodes) const {
   UniqueVector<Value *> FlowsToReturn;
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -250,32 +248,36 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
     if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
       switch (RVI->getOpcode()) {
         // Extend the analysis by looking upwards.
-        case Instruction::GetElementPtr:
         case Instruction::BitCast:
+        case Instruction::GetElementPtr:
           FlowsToReturn.insert(RVI->getOperand(0));
           continue;
         case Instruction::Select: {
           SelectInst *SI = cast<SelectInst>(RVI);
           FlowsToReturn.insert(SI->getTrueValue());
           FlowsToReturn.insert(SI->getFalseValue());
-        } continue;
+          continue;
+        }
         case Instruction::PHI: {
           PHINode *PN = cast<PHINode>(RVI);
           for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
             FlowsToReturn.insert(PN->getIncomingValue(i));
-        } continue;
+          continue;
+        }
 
         // Check whether the pointer came from an allocation.
         case Instruction::Alloca:
         case Instruction::Malloc:
           break;
         case Instruction::Call:
+          if (isMalloc(RVI))
+            break;
         case Instruction::Invoke: {
           CallSite CS(RVI);
           if (CS.paramHasAttr(0, Attribute::NoAlias))
             break;
           if (CS.getCalledFunction() &&
-              SCCNodes.count(CG[CS.getCalledFunction()]))
+              SCCNodes.count(CS.getCalledFunction()))
             break;
         } // fall-through
         default:
@@ -291,12 +293,12 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
 
 /// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
 bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
-  SmallPtrSet<CallGraphNode*, 8> SCCNodes;
+  SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
   for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]);
+    SCCNodes.insert(SCC[i]->getFunction());
 
   // Check each function in turn, determining which functions return noalias
   // pointers.
@@ -339,7 +341,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   return MadeChange;
 }
 
-bool FunctionAttrs::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   bool Changed = AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 9c652b996aeb..09f9e7c4f68a 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -58,6 +58,7 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
 
 bool GlobalDCE::runOnModule(Module &M) {
   bool Changed = false;
+  
   // Loop over the module, adding globals which are obviously necessary.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     Changed |= RemoveUnusedGlobalValue(*I);
@@ -147,6 +148,9 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // Make sure that all memory is released
   AliveGlobals.clear();
+
+  // Remove dead metadata.
+  Changed |= M.getContext().RemoveDeadMetadata();
   return Changed;
 }
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7fe097c7c576..a44386e6c15f 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -20,20 +20,23 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
@@ -56,7 +59,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
 namespace {
   struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
     static char ID; // Pass identification, replacement for typeid
     GlobalOpt() : ModulePass(&ID) {}
@@ -244,7 +246,8 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
   return false;
 }
 
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
+                                             LLVMContext &Context) {
   ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
   if (!CI) return 0;
   unsigned IdxV = CI->getZExtValue();
@@ -280,7 +283,8 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
 /// users of the global, cleaning up the obvious ones.  This is largely just a
 /// quick scan over the use list to clean up the easy and obvious cruft.  This
 /// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+                                       LLVMContext &Context) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
     User *U = *UI++;
@@ -301,11 +305,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
         Constant *SubInit = 0;
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
-        Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context);
       } else if (CE->getOpcode() == Instruction::BitCast && 
                  isa<PointerType>(CE->getType())) {
         // Pointer cast, delete any stores and memsets to the global.
-        Changed |= CleanupConstantGlobalUsers(CE, 0);
+        Changed |= CleanupConstantGlobalUsers(CE, 0, Context);
       }
 
       if (CE->use_empty()) {
@@ -319,11 +323,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
         ConstantExpr *CE = 
-          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
       }
-      Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context);
 
       if (GEP->use_empty()) {
         GEP->eraseFromParent();
@@ -341,7 +345,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
-        CleanupConstantGlobalUsers(V, Init);
+        CleanupConstantGlobalUsers(V, Init, Context);
         return true;
       }
     }
@@ -423,13 +427,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
     // Scalar replacing *just* the outer index of the array is probably not
     // going to be a win anyway, so just give up.
     for (++GEPI; // Skip array index.
-         GEPI != E && (isa<ArrayType>(*GEPI) || isa<VectorType>(*GEPI));
+         GEPI != E;
          ++GEPI) {
       uint64_t NumElements;
       if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
         NumElements = SubArrayTy->getNumElements();
-      else
-        NumElements = cast<VectorType>(*GEPI)->getNumElements();
+      else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+        NumElements = SubVectorTy->getNumElements();
+      else {
+        assert(isa<StructType>(*GEPI) &&
+               "Indexed GEP type is not array, vector, or struct!");
+        continue;
+      }
       
       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
@@ -461,7 +470,8 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
 /// behavior of the program in a more fine-grained way.  We have determined that
 /// this transformation is safe already.  We return the first global variable we
 /// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
+                                 LLVMContext &Context) {
   // Make sure this global only has simple uses that we can SRA.
   if (!GlobalUsersSafeToSRA(GV))
     return 0;
@@ -483,14 +493,15 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     const StructLayout &Layout = *TD.getStructLayout(STy);
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       Constant *In = getAggregateConstantElement(Init,
-                                            ConstantInt::get(Type::Int32Ty, i));
+                                ConstantInt::get(Type::getInt32Ty(Context), i),
+                                    Context);
       assert(In && "Couldn't get element of initializer?");
-      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+      GlobalVariable *NGV = new GlobalVariable(Context,
+                                               STy->getElementType(i), false,
                                                GlobalVariable::InternalLinkage,
-                                               In, GV->getName()+"."+utostr(i),
-                                               (Module *)NULL,
+                                               In, GV->getName()+"."+Twine(i),
                                                GV->isThreadLocal(),
-                                               GV->getType()->getAddressSpace());
+                                              GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
       
@@ -517,15 +528,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
     for (unsigned i = 0, e = NumElements; i != e; ++i) {
       Constant *In = getAggregateConstantElement(Init,
-                                            ConstantInt::get(Type::Int32Ty, i));
+                                ConstantInt::get(Type::getInt32Ty(Context), i),
+                                    Context);
       assert(In && "Couldn't get element of initializer?");
 
-      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+      GlobalVariable *NGV = new GlobalVariable(Context,
+                                               STy->getElementType(), false,
                                                GlobalVariable::InternalLinkage,
-                                               In, GV->getName()+"."+utostr(i),
-                                               (Module *)NULL,
+                                               In, GV->getName()+"."+Twine(i),
                                                GV->isThreadLocal(),
-                                               GV->getType()->getAddressSpace());
+                                              GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
       
@@ -541,9 +553,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
   if (NewGlobals.empty())
     return 0;
 
-  DOUT << "PERFORMING GLOBAL SRA ON: " << *GV;
+  DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
-  Constant *NullInt = Constant::getNullValue(Type::Int32Ty);
+  Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context));
 
   // Loop over all of the uses of the global, replacing the constantexpr geps,
   // with smaller constantexpr geps or direct references.
@@ -577,7 +589,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
         for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
           Idxs.push_back(GEPI->getOperand(i));
         NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
-                                           GEPI->getName()+"."+utostr(Val), GEPI);
+                                           GEPI->getName()+"."+Twine(Val),GEPI);
       }
     }
     GEP->replaceAllUsesWith(NewPtr);
@@ -667,7 +679,8 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
   return true;
 }
 
-static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
+                                           LLVMContext &Context) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
     Instruction *I = cast<Instruction>(*UI++);
@@ -700,7 +713,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
                                 ConstantExpr::getCast(CI->getOpcode(),
-                                                      NewV, CI->getType()));
+                                                NewV, CI->getType()), Context);
       if (CI->use_empty()) {
         Changed = true;
         CI->eraseFromParent();
@@ -717,8 +730,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
           break;
       if (Idxs.size() == GEPI->getNumOperands()-1)
         Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
-                                ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
-                                                               Idxs.size()));
+                          ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+                                                        Idxs.size()), Context);
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -734,7 +747,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
 /// value stored into it.  If there are uses of the loaded value that would trap
 /// if the loaded value is dynamically null, then we know that they cannot be
 /// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+                                            LLVMContext &Context) {
   bool Changed = false;
 
   // Keep track of whether we are able to remove all the uses of the global
@@ -745,7 +759,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
     User *GlobalUser = *GUI++;
     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
-      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context);
       // If we were able to delete all uses of the loads
       if (LI->use_empty()) {
         LI->eraseFromParent();
@@ -768,15 +782,15 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   }
 
   if (Changed) {
-    DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV;
+    DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
     ++NumGlobUses;
   }
 
   // If we nuked all of the loads, then none of the stores are needed either,
   // nor is the global.
   if (AllNonStoreUsesGone) {
-    DOUT << "  *** GLOBAL NOW DEAD!\n";
-    CleanupConstantGlobalUsers(GV, 0);
+    DEBUG(errs() << "  *** GLOBAL NOW DEAD!\n");
+    CleanupConstantGlobalUsers(GV, 0, Context);
     if (GV->use_empty()) {
       GV->eraseFromParent();
       ++NumDeleted;
@@ -788,10 +802,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
 
 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
 /// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V) {
+static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
     if (Instruction *I = dyn_cast<Instruction>(*UI++))
-      if (Constant *NewC = ConstantFoldInstruction(I)) {
+      if (Constant *NewC = ConstantFoldInstruction(I, Context)) {
         I->replaceAllUsesWith(NewC);
 
         // Advance UI to the next non-I use to avoid invalidating it!
@@ -808,8 +822,9 @@ static void ConstantPropUsersOf(Value *V) {
 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
 /// malloc into a global, and any loads of GV as uses of the new global.
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
-                                                     MallocInst *MI) {
-  DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI;
+                                                     MallocInst *MI,
+                                                     LLVMContext &Context) {
+  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI);
   ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize());
 
   if (NElements->getZExtValue() != 1) {
@@ -818,10 +833,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
     Type *NewTy = ArrayType::get(MI->getAllocatedType(),
                                  NElements->getZExtValue());
     MallocInst *NewMI =
-      new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty),
+      new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)),
                      MI->getAlignment(), MI->getName(), MI);
     Value* Indices[2];
-    Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty);
+    Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context));
     Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
                                               NewMI->getName()+".el0", MI);
     MI->replaceAllUsesWith(NewGEP);
@@ -831,17 +846,17 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
+  // FIXME: This new global should have the alignment returned by malloc.  Code
+  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+  // this would only guarantee some lower alignment.
   Constant *Init = UndefValue::get(MI->getAllocatedType());
-  GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false,
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
+                                             MI->getAllocatedType(), false,
                                              GlobalValue::InternalLinkage, Init,
                                              GV->getName()+".body",
-                                             (Module *)NULL,
+                                             GV,
                                              GV->isThreadLocal());
-  // FIXME: This new global should have the alignment returned by malloc.  Code
-  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
-  // this would only guarantee some lower alignment.
-  GV->getParent()->getGlobalList().insert(GV, NewGV);
-
+  
   // Anything that used the malloc now uses the global directly.
   MI->replaceAllUsesWith(NewGV);
 
@@ -853,9 +868,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // If there is a comparison against null, we will insert a global bool to
   // keep track of whether the global was initialized yet or not.
   GlobalVariable *InitBool =
-    new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage,
-                       ConstantInt::getFalse(), GV->getName()+".init",
-                       (Module *)NULL, GV->isThreadLocal());
+    new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(Context), GV->getName()+".init",
+                       GV->isThreadLocal());
   bool InitBoolUsed = false;
 
   // Loop over all uses of GV, processing them in turn.
@@ -872,10 +888,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
           Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI);
           InitBoolUsed = true;
           switch (CI->getPredicate()) {
-          default: assert(0 && "Unknown ICmp Predicate!");
+          default: llvm_unreachable("Unknown ICmp Predicate!");
           case ICmpInst::ICMP_ULT:
           case ICmpInst::ICMP_SLT:
-            LV = ConstantInt::getFalse();   // X < null -> always false
+            LV = ConstantInt::getFalse(Context);   // X < null -> always false
             break;
           case ICmpInst::ICMP_ULE:
           case ICmpInst::ICMP_SLE:
@@ -897,7 +913,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
     } else {
       StoreInst *SI = cast<StoreInst>(GV->use_back());
       // The global is initialized when the store to it occurs.
-      new StoreInst(ConstantInt::getTrue(), InitBool, SI);
+      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
       SI->eraseFromParent();
     }
 
@@ -917,9 +933,141 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // To further other optimizations, loop over all users of NewGV and try to
   // constant prop them.  This will promote GEP instructions with constant
   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
-  ConstantPropUsersOf(NewGV);
+  ConstantPropUsersOf(NewGV, Context);
   if (RepValue != NewGV)
-    ConstantPropUsersOf(RepValue);
+    ConstantPropUsersOf(RepValue, Context);
+
+  return NewGV;
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc.  Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+                                                     CallInst *CI,
+                                                     BitCastInst *BCI,
+                                                     LLVMContext &Context,
+                                                     TargetData* TD) {
+  const Type *IntPtrTy = TD->getIntPtrType(Context);
+  
+  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *CI);
+
+  ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI,
+                                                                Context, TD));
+  if (NElements->getZExtValue() != 1) {
+    // If we have an array allocation, transform it to a single element
+    // allocation to make the code below simpler.
+    Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
+                                 NElements->getZExtValue());
+    Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
+    Instruction* NewMI = cast<Instruction>(NewM);
+    Value* Indices[2];
+    Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
+    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
+                                              NewMI->getName()+".el0", CI);
+    BCI->replaceAllUsesWith(NewGEP);
+    BCI->eraseFromParent();
+    CI->eraseFromParent();
+    BCI = cast<BitCastInst>(NewMI);
+    CI = extractMallocCallFromBitCast(NewMI);
+  }
+
+  // Create the new global variable.  The contents of the malloc'd memory is
+  // undefined, so initialize with an undef value.
+  // FIXME: This new global should have the alignment returned by malloc.  Code
+  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+  // this would only guarantee some lower alignment.
+  const Type *MAT = getMallocAllocatedType(CI);
+  Constant *Init = UndefValue::get(MAT);
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
+                                             MAT, false,
+                                             GlobalValue::InternalLinkage, Init,
+                                             GV->getName()+".body",
+                                             GV,
+                                             GV->isThreadLocal());
+  
+  // Anything that used the malloc now uses the global directly.
+  BCI->replaceAllUsesWith(NewGV);
+
+  Constant *RepValue = NewGV;
+  if (NewGV->getType() != GV->getType()->getElementType())
+    RepValue = ConstantExpr::getBitCast(RepValue, 
+                                        GV->getType()->getElementType());
+
+  // If there is a comparison against null, we will insert a global bool to
+  // keep track of whether the global was initialized yet or not.
+  GlobalVariable *InitBool =
+    new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(Context), GV->getName()+".init",
+                       GV->isThreadLocal());
+  bool InitBoolUsed = false;
+
+  // Loop over all uses of GV, processing them in turn.
+  std::vector<StoreInst*> Stores;
+  while (!GV->use_empty())
+    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
+      while (!LI->use_empty()) {
+        Use &LoadUse = LI->use_begin().getUse();
+        if (!isa<ICmpInst>(LoadUse.getUser()))
+          LoadUse = RepValue;
+        else {
+          ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+          // Replace the cmp X, 0 with a use of the bool value.
+          Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+          InitBoolUsed = true;
+          switch (ICI->getPredicate()) {
+          default: llvm_unreachable("Unknown ICmp Predicate!");
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_SLT:
+            LV = ConstantInt::getFalse(Context);   // X < null -> always false
+            break;
+          case ICmpInst::ICMP_ULE:
+          case ICmpInst::ICMP_SLE:
+          case ICmpInst::ICMP_EQ:
+            LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+            break;
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_UGE:
+          case ICmpInst::ICMP_SGE:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_SGT:
+            break;  // no change.
+          }
+          ICI->replaceAllUsesWith(LV);
+          ICI->eraseFromParent();
+        }
+      }
+      LI->eraseFromParent();
+    } else {
+      StoreInst *SI = cast<StoreInst>(GV->use_back());
+      // The global is initialized when the store to it occurs.
+      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
+      SI->eraseFromParent();
+    }
+
+  // If the initialization boolean was used, insert it, otherwise delete it.
+  if (!InitBoolUsed) {
+    while (!InitBool->use_empty())  // Delete initializations
+      cast<Instruction>(InitBool->use_back())->eraseFromParent();
+    delete InitBool;
+  } else
+    GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+
+  // Now the GV is dead, nuke it and the malloc.
+  GV->eraseFromParent();
+  BCI->eraseFromParent();
+  CI->eraseFromParent();
+
+  // To further other optimizations, loop over all users of NewGV and try to
+  // constant prop them.  This will promote GEP instructions with constant
+  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+  ConstantPropUsersOf(NewGV, Context);
+  if (RepValue != NewGV)
+    ConstantPropUsersOf(RepValue, Context);
 
   return NewGV;
 }
@@ -1071,7 +1219,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
 /// GV are simple enough to perform HeapSRA, return true.
 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
-                                                    MallocInst *MI) {
+                                                    Instruction *StoredVal) {
   SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
   SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad;
   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
@@ -1095,7 +1243,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
       Value *InVal = PN->getIncomingValue(op);
       
       // PHI of the stored value itself is ok.
-      if (InVal == MI) continue;
+      if (InVal == StoredVal) continue;
       
       if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
         // One of the PHIs in our set is (optimistically) ok.
@@ -1121,7 +1269,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
 
 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
+                   LLVMContext &Context) {
   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
   
   if (FieldNo >= FieldVals.size())
@@ -1139,19 +1288,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
     // a new Load of the scalarized global.
     Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
                                            InsertedScalarizedValues,
-                                           PHIsToRewrite),
-                          LI->getName()+".f" + utostr(FieldNo), LI);
+                                           PHIsToRewrite, Context),
+                          LI->getName()+".f"+Twine(FieldNo), LI);
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
     const StructType *ST = 
       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
     
-    Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
-                            PN->getName()+".f"+utostr(FieldNo), PN);
+    Result =
+     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                     PN->getName()+".f"+Twine(FieldNo), PN);
     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   } else {
-    assert(0 && "Unknown usable value");
+    llvm_unreachable("Unknown usable value");
     Result = 0;
   }
   
@@ -1162,18 +1312,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
 /// the load, rewrite the derived value to use the HeapSRoA'd load.
 static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
+                   LLVMContext &Context) {
   // If this is a comparison against null, handle it.
   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
     // If we have a setcc of the loaded pointer, we can use a setcc of any
     // field.
     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
-                                   InsertedScalarizedValues, PHIsToRewrite);
+                                   InsertedScalarizedValues, PHIsToRewrite,
+                                   Context);
     
-    Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
-                              Constant::getNullValue(NPtr->getType()),
-                              SCI->getName(), SCI);
+    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+                              Constant::getNullValue(NPtr->getType()), 
+                              SCI->getName());
     SCI->replaceAllUsesWith(New);
     SCI->eraseFromParent();
     return;
@@ -1187,7 +1339,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
     // Load the pointer for this field.
     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
-                                     InsertedScalarizedValues, PHIsToRewrite);
+                                     InsertedScalarizedValues, PHIsToRewrite,
+                                     Context);
     
     // Create the new GEP idx vector.
     SmallVector<Value*, 8> GEPIdx;
@@ -1219,7 +1372,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   // users.
   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
     Instruction *User = cast<Instruction>(*UI++);
-    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
+                            Context);
   }
 }
 
@@ -1229,11 +1383,13 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
+                   LLVMContext &Context) {
   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
        UI != E; ) {
     Instruction *User = cast<Instruction>(*UI++);
-    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
+                            Context);
   }
   
   if (Load->use_empty()) {
@@ -1244,8 +1400,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 
 /// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
-  DOUT << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI;
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI,
+                                            LLVMContext &Context){
+  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI);
   const StructType *STy = cast<StructType>(MI->getAllocatedType());
 
   // There is guaranteed to be at least one use of the malloc (storing
@@ -1264,14 +1421,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
     const Type *PFieldTy = PointerType::getUnqual(FieldTy);
     
     GlobalVariable *NGV =
-      new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage,
+      new GlobalVariable(*GV->getParent(),
+                         PFieldTy, false, GlobalValue::InternalLinkage,
                          Constant::getNullValue(PFieldTy),
-                         GV->getName() + ".f" + utostr(FieldNo), GV,
+                         GV->getName() + ".f" + Twine(FieldNo), GV,
                          GV->isThreadLocal());
     FieldGlobals.push_back(NGV);
     
     MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
-                                     MI->getName() + ".f" + utostr(FieldNo),MI);
+                                     MI->getName() + ".f" + Twine(FieldNo), MI);
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, MI);
   }
@@ -1290,9 +1448,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
   //    }
   Value *RunningOr = 0;
   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
-    Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i],
-                             Constant::getNullValue(FieldMallocs[i]->getType()),
-                                  "isnull", MI);
+    Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                              Constant::getNullValue(FieldMallocs[i]->getType()),
+                                  "isnull");
     if (!RunningOr)
       RunningOr = Cond;   // First seteq
     else
@@ -1305,7 +1463,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
   
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
-  BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null",
+  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
                                                 OrigBB->getParent());
   
   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
@@ -1317,11 +1475,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
   // pointer, because some may be null while others are not.
   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
-    Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, 
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
                               Constant::getNullValue(GVVal->getType()),
-                              "tmp", NullPtrBlock);
-    BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent());
-    BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent());
+                              "tmp");
+    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", 
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Context, "next", 
+                                               OrigBB->getParent());
     BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
 
     // Fill in FreeBlock.
@@ -1353,7 +1513,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
     Instruction *User = cast<Instruction>(*UI++);
     
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
+                                   Context);
       continue;
     }
     
@@ -1384,7 +1545,192 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       Value *InVal = PN->getIncomingValue(i);
       InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
-                               PHIsToRewrite);
+                               PHIsToRewrite, Context);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+    }
+  }
+  
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+  
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+  
+  // The old global is now dead, remove it.
+  GV->eraseFromParent();
+
+  ++NumHeapSRA;
+  return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
+                                            CallInst *CI, BitCastInst* BCI, 
+                                            LLVMContext &Context,
+                                            TargetData *TD){
+  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI 
+               << " BITCAST = " << *BCI << '\n');
+  const Type* MAT = getMallocAllocatedType(CI);
+  const StructType *STy = cast<StructType>(MAT);
+
+  // There is guaranteed to be at least one use of the malloc (storing
+  // it into GV).  If there are other uses, change them to be uses of
+  // the global to simplify later code.  This also deletes the store
+  // into GV.
+  ReplaceUsesOfMallocWithGlobal(BCI, GV);
+  
+  // Okay, at this point, there are no users of the malloc.  Insert N
+  // new mallocs at the same place as CI, and N globals.
+  std::vector<Value*> FieldGlobals;
+  std::vector<Value*> FieldMallocs;
+  
+  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+    const Type *FieldTy = STy->getElementType(FieldNo);
+    const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+    
+    GlobalVariable *NGV =
+      new GlobalVariable(*GV->getParent(),
+                         PFieldTy, false, GlobalValue::InternalLinkage,
+                         Constant::getNullValue(PFieldTy),
+                         GV->getName() + ".f" + Twine(FieldNo), GV,
+                         GV->isThreadLocal());
+    FieldGlobals.push_back(NGV);
+    
+    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy,
+                                        getMallocArraySize(CI, Context, TD),
+                                        BCI->getName() + ".f" + Twine(FieldNo));
+    FieldMallocs.push_back(NMI);
+    new StoreInst(NMI, NGV, BCI);
+  }
+  
+  // The tricky aspect of this transformation is handling the case when malloc
+  // fails.  In the original code, malloc failing would set the result pointer
+  // of malloc to null.  In this case, some mallocs could succeed and others
+  // could fail.  As such, we emit code that looks like this:
+  //    F0 = malloc(field0)
+  //    F1 = malloc(field1)
+  //    F2 = malloc(field2)
+  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
+  //      if (F0) { free(F0); F0 = 0; }
+  //      if (F1) { free(F1); F1 = 0; }
+  //      if (F2) { free(F2); F2 = 0; }
+  //    }
+  Value *RunningOr = 0;
+  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+    Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                              Constant::getNullValue(FieldMallocs[i]->getType()),
+                                  "isnull");
+    if (!RunningOr)
+      RunningOr = Cond;   // First seteq
+    else
+      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
+  }
+
+  // Split the basic block at the old malloc.
+  BasicBlock *OrigBB = BCI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
+  
+  // Create the block to check the first condition.  Put all these blocks at the
+  // end of the function as they are unlikely to be executed.
+  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
+                                                OrigBB->getParent());
+  
+  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+  // branch on RunningOr.
+  OrigBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+  
+  // Within the NullPtrBlock, we need to emit a comparison and branch for each
+  // pointer, because some may be null while others are not.
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
+                              Constant::getNullValue(GVVal->getType()),
+                              "tmp");
+    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
+                                               OrigBB->getParent());
+    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
+
+    // Fill in FreeBlock.
+    new FreeInst(GVVal, FreeBlock);
+    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+                  FreeBlock);
+    BranchInst::Create(NextBlock, FreeBlock);
+    
+    NullPtrBlock = NextBlock;
+  }
+  
+  BranchInst::Create(ContBB, NullPtrBlock);
+  
+  // CI and BCI are no longer needed, remove them.
+  BCI->eraseFromParent();
+  CI->eraseFromParent();
+
+  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+  /// update all uses of the load, keep track of what scalarized loads are
+  /// inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+  
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+  
+  // Okay, the malloc site is completely handled.  All of the uses of GV are now
+  // loads, and all uses of those loads are simple.  Rewrite them to use loads
+  // of the per-field globals instead.
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
+                                   Context);
+      continue;
+    }
+    
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+    
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
+  }
+
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite, Context);
       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
     }
   }
@@ -1422,7 +1768,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                MallocInst *MI,
                                                Module::global_iterator &GVI,
-                                               TargetData &TD) {
+                                               TargetData *TD,
+                                               LLVMContext &Context) {
   // If this is a malloc of an abstract type, don't touch it.
   if (!MI->getAllocatedType()->isSized())
     return false;
@@ -1456,9 +1803,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
     // Restrict this transformation to only working on small allocations
     // (2048 bytes currently), as we don't want to introduce a 16M global or
     // something.
-    if (NElements->getZExtValue()*
-        TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) {
-      GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
+    if (TD &&
+        NElements->getZExtValue()*
+        TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context);
       return true;
     }
   }
@@ -1485,7 +1833,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
       if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {
         MallocInst *NewMI = 
           new MallocInst(AllocSTy, 
-                         ConstantInt::get(Type::Int32Ty, AT->getNumElements()),
+                  ConstantInt::get(Type::getInt32Ty(Context),
+                  AT->getNumElements()),
                          "", MI);
         NewMI->takeName(MI);
         Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI);
@@ -1494,7 +1843,100 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
         MI = NewMI;
       }
       
-      GVI = PerformHeapAllocSRoA(GV, MI);
+      GVI = PerformHeapAllocSRoA(GV, MI, Context);
+      return true;
+    }
+  }
+  
+  return false;
+}  
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+                                               CallInst *CI,
+                                               BitCastInst *BCI,
+                                               Module::global_iterator &GVI,
+                                               TargetData *TD,
+                                               LLVMContext &Context) {
+  // If we can't figure out the type being malloced, then we can't optimize.
+  const Type *AllocTy = getMallocAllocatedType(CI);
+  assert(AllocTy);
+
+  // If this is a malloc of an abstract type, don't touch it.
+  if (!AllocTy->isSized())
+    return false;
+
+  // We can't optimize this global unless all uses of it are *known* to be
+  // of the malloc value, not of the null initializer value (consider a use
+  // that compares the global's value against zero to see if the malloc has
+  // been reached).  To do this, we check to see if all uses of the global
+  // would trap if the global were null: this proves that they must all
+  // happen after the malloc.
+  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+    return false;
+
+  // We can't optimize this if the malloc itself is used in a complex way,
+  // for example, being stored into multiple globals.  This allows the
+  // malloc to be stored into the specified global, loaded setcc'd, and
+  // GEP'd.  These are all things we could transform to using the global
+  // for.
+  {
+    SmallPtrSet<PHINode*, 8> PHIs;
+    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+      return false;
+  }  
+
+  // If we have a global that is only initialized with a fixed size malloc,
+  // transform the program to use global memory instead of malloc'd memory.
+  // This eliminates dynamic allocation, avoids an indirection accessing the
+  // data, and exposes the resultant global to further GlobalOpt.
+  if (ConstantInt *NElements =
+              dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) {
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (TD && 
+        NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
+      return true;
+    }
+  }
+  
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (!isArrayMalloc(CI, Context, TD))
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
+  
+  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
+    // This the structure has an unreasonable number of fields, leave it
+    // alone.
+    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+
+      // If this is a fixed size array, transform the Malloc to be an alloc of
+      // structs.  malloc [100 x struct],1 -> malloc struct, 100
+      if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+        Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
+                                              AT->getNumElements());
+        Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+                                              AllocSTy, NumElements,
+                                              BCI->getName());
+        Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
+        BCI->replaceAllUsesWith(Cast);
+        BCI->eraseFromParent();
+        CI->eraseFromParent();
+        BCI = cast<BitCastInst>(NewMI);
+        CI = extractMallocCallFromBitCast(NewMI);
+      }
+      
+      GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
       return true;
     }
   }
@@ -1506,7 +1948,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 // that only one value (besides its initializer) is ever stored to the global.
 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                      Module::global_iterator &GVI,
-                                     TargetData &TD) {
+                                     TargetData *TD, LLVMContext &Context) {
   // Ignore no-op GEPs and bitcasts.
   StoredOnceVal = StoredOnceVal->stripPointerCasts();
 
@@ -1518,14 +1960,25 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
       GV->getInitializer()->isNullValue()) {
     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
       if (GV->getInitializer()->getType() != SOVC->getType())
-        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+        SOVC = 
+         ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
 
       // Optimize away any trapping uses of the loaded value.
-      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
         return true;
     } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
-      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD))
+      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context))
         return true;
+    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
+      if (getMallocAllocatedType(CI)) {
+        BitCastInst* BCI = NULL;
+        for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+             UI != E; )
+          BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++));
+        if (BCI &&
+            TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context))
+          return true;
+      }
     }
   }
 
@@ -1536,7 +1989,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
 /// two values ever stored into GV are its initializer and OtherVal.  See if we
 /// can shrink the global into a boolean and select between the two values
 /// whenever it is used.  This exposes the values to other scalar optimizations.
-static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
+                                       LLVMContext &Context) {
   const Type *GVElType = GV->getType()->getElementType();
   
   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
@@ -1544,7 +1998,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   // between them is very expensive and unlikely to lead to later
   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   // where v1 and v2 both require constant pool loads, a big loss.
-  if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() ||
+  if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() ||
       isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
     return false;
   
@@ -1554,18 +2008,19 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
     if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
       return false;
   
-  DOUT << "   *** SHRINKING TO BOOL: " << *GV;
+  DEBUG(errs() << "   *** SHRINKING TO BOOL: " << *GV);
   
   // Create the new global, initializing it to false.
-  GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false,
-         GlobalValue::InternalLinkage, ConstantInt::getFalse(),
+  GlobalVariable *NewGV = new GlobalVariable(Context,
+                                             Type::getInt1Ty(Context), false,
+         GlobalValue::InternalLinkage, ConstantInt::getFalse(Context),
                                              GV->getName()+".b",
-                                             (Module *)NULL,
                                              GV->isThreadLocal());
   GV->getParent()->getGlobalList().insert(GV, NewGV);
 
   Constant *InitVal = GV->getInitializer();
-  assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!");
+  assert(InitVal->getType() != Type::getInt1Ty(Context) &&
+         "No reason to shrink to bool!");
 
   // If initialized to zero and storing one into the global, we can use a cast
   // instead of a select to synthesize the desired value.
@@ -1581,7 +2036,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
       // Only do this if we weren't storing a loaded value.
       Value *StoreVal;
       if (StoringOther || SI->getOperand(0) == InitVal)
-        StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther);
+        StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther);
       else {
         // Otherwise, we are storing a previously loaded copy.  To do this,
         // change the copy from copying the original value to just copying the
@@ -1632,7 +2087,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   GV->removeDeadConstantUsers();
 
   if (GV->use_empty()) {
-    DOUT << "GLOBAL DEAD: " << *GV;
+    DEBUG(errs() << "GLOBAL DEAD: " << *GV);
     GV->eraseFromParent();
     ++NumDeleted;
     return true;
@@ -1675,7 +2130,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
         GS.AccessingFunction->getName() == "main" &&
         GS.AccessingFunction->hasExternalLinkage() &&
         GV->getType()->getAddressSpace() == 0) {
-      DOUT << "LOCALIZING GLOBAL: " << *GV;
+      DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV);
       Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
       const Type* ElemTy = GV->getType()->getElementType();
       // FIXME: Pass Global's alignment when globals have alignment
@@ -1692,11 +2147,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     // If the global is never loaded (but may be stored to), it is dead.
     // Delete it now.
     if (!GS.isLoaded) {
-      DOUT << "GLOBAL NEVER LOADED: " << *GV;
+      DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV);
 
       // Delete any stores we can find to the global.  We may not be able to
       // make it completely dead though.
-      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), 
+                                                GV->getContext());
 
       // If the global is dead now, delete it.
       if (GV->use_empty()) {
@@ -1707,16 +2163,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       return Changed;
 
     } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
-      DOUT << "MARKING CONSTANT: " << *GV;
+      DEBUG(errs() << "MARKING CONSTANT: " << *GV);
       GV->setConstant(true);
 
       // Clean up any obviously simplifiable users now.
-      CleanupConstantGlobalUsers(GV, GV->getInitializer());
+      CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext());
 
       // If the global is dead now, just nuke it.
       if (GV->use_empty()) {
-        DOUT << "   *** Marking constant allowed us to simplify "
-             << "all users and delete global!\n";
+        DEBUG(errs() << "   *** Marking constant allowed us to simplify "
+                     << "all users and delete global!\n");
         GV->eraseFromParent();
         ++NumDeleted;
       }
@@ -1724,11 +2180,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       ++NumMarked;
       return true;
     } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
-      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, 
-                                                 getAnalysis<TargetData>())) {
-        GVI = FirstNewGV;  // Don't skip the newly produced globals!
-        return true;
-      }
+      if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+        if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD,
+                                                   GV->getContext())) {
+          GVI = FirstNewGV;  // Don't skip the newly produced globals!
+          return true;
+        }
     } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
       // If the initial value for the global was an undef value, and if only
       // one other value was stored into it, we can just change the
@@ -1740,11 +2197,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
           GV->setInitializer(SOVConstant);
 
           // Clean up any obviously simplifiable users now.
-          CleanupConstantGlobalUsers(GV, GV->getInitializer());
+          CleanupConstantGlobalUsers(GV, GV->getInitializer(), 
+                                     GV->getContext());
 
           if (GV->use_empty()) {
-            DOUT << "   *** Substituting initializer allowed us to "
-                 << "simplify all users and delete global!\n";
+            DEBUG(errs() << "   *** Substituting initializer allowed us to "
+                         << "simplify all users and delete global!\n");
             GV->eraseFromParent();
             ++NumDeleted;
           } else {
@@ -1757,13 +2215,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       // Try to optimize globals based on the knowledge that only one value
       // (besides its initializer) is ever stored to the global.
       if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
-                                   getAnalysis<TargetData>()))
+                                   getAnalysisIfAvailable<TargetData>(),
+                                   GV->getContext()))
         return true;
 
       // Otherwise, if the global was not a boolean, we can shrink it to be a
       // boolean.
       if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
-        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+        if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) {
           ++NumShrunkToBool;
           return true;
         }
@@ -1866,16 +2325,16 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
       if (!ATy) return 0;
       const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       if (!STy || STy->getNumElements() != 2 ||
-          STy->getElementType(0) != Type::Int32Ty) return 0;
+          STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0;
       const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
       if (!PFTy) return 0;
       const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
-      if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() ||
-          FTy->getNumParams() != 0)
+      if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) ||
+          FTy->isVarArg() || FTy->getNumParams() != 0)
         return 0;
       
       // Verify that the initializer is simple enough for us to handle.
-      if (!I->hasInitializer()) return 0;
+      if (!I->hasDefinitiveInitializer()) return 0;
       ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
       if (!CA) return 0;
       for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -1916,10 +2375,11 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
 /// specified array, returning the new global to use.
 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 
-                                          const std::vector<Function*> &Ctors) {
+                                          const std::vector<Function*> &Ctors,
+                                          LLVMContext &Context) {
   // If we made a change, reassemble the initializer list.
   std::vector<Constant*> CSVals;
-  CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535));
+  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535));
   CSVals.push_back(0);
   
   // Create the new init list.
@@ -1928,19 +2388,19 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
     if (Ctors[i]) {
       CSVals[1] = Ctors[i];
     } else {
-      const Type *FTy = FunctionType::get(Type::VoidTy, false);
+      const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false);
       const PointerType *PFTy = PointerType::getUnqual(FTy);
       CSVals[1] = Constant::getNullValue(PFTy);
-      CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647);
+      CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647);
     }
-    CAList.push_back(ConstantStruct::get(CSVals));
+    CAList.push_back(ConstantStruct::get(Context, CSVals, false));
   }
   
   // Create the array initializer.
   const Type *StructTy =
-    cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
-  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()),
-                                    CAList);
+      cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
+  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 
+                                                   CAList.size()), CAList);
   
   // If we didn't change the number of elements, don't create a new GV.
   if (CA->getType() == GCL->getInitializer()->getType()) {
@@ -1949,9 +2409,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   }
   
   // Create the new global and insert it next to the existing list.
-  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+  GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), 
+                                           GCL->isConstant(),
                                            GCL->getLinkage(), CA, "",
-                                           (Module *)NULL,
                                            GCL->isThreadLocal());
   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   NGV->takeName(GCL);
@@ -1984,21 +2444,38 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
 /// enough for us to understand.  In particular, if it is a cast of something,
 /// we punt.  We basically just support direct accesses to globals and GEP's of
 /// globals.  This should be kept up to date with CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
-      return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
-    return !GV->isDeclaration();  // reject external globals.
-  }
+static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
+  // Conservatively, avoid aggregate types. This is because we don't
+  // want to worry about them partially overlapping other stores.
+  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+    return false;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    // Do not allow weak/linkonce/dllimport/dllexport linkage or
+    // external globals.
+    return GV->hasDefinitiveInitializer();
+
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     // Handle a constantexpr gep.
     if (CE->getOpcode() == Instruction::GetElementPtr &&
-        isa<GlobalVariable>(CE->getOperand(0))) {
+        isa<GlobalVariable>(CE->getOperand(0)) &&
+        cast<GEPOperator>(CE)->isInBounds()) {
       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
-        return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
-      return GV->hasInitializer() &&
-             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+      // Do not allow weak/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      if (!GV->hasDefinitiveInitializer())
+        return false;
+
+      // The first index must be zero.
+      ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin()));
+      if (!CI || !CI->isZero()) return false;
+
+      // The remaining indices must be compile-time known integers within the
+      // notional bounds of the corresponding static array types.
+      if (!CE->isGEPWithNoNotionalOverIndexing())
+        return false;
+
+      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
     }
   return false;
 }
@@ -2007,7 +2484,8 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
 /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
-                                   ConstantExpr *Addr, unsigned OpNo) {
+                                   ConstantExpr *Addr, unsigned OpNo,
+                                   LLVMContext &Context) {
   // Base case of the recursion.
   if (OpNo == Addr->getNumOperands()) {
     assert(Val->getType() == Init->getType() && "Type mismatch!");
@@ -2028,7 +2506,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
         Elts.push_back(UndefValue::get(STy->getElementType(i)));
     } else {
-      assert(0 && "This code is out of sync with "
+      llvm_unreachable("This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
     }
     
@@ -2036,10 +2514,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
     unsigned Idx = CU->getZExtValue();
     assert(Idx < STy->getNumElements() && "Struct index out of range!");
-    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context);
     
     // Return the modified struct.
-    return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked());
+    return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked());
   } else {
     ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
     const ArrayType *ATy = cast<ArrayType>(Init->getType());
@@ -2056,20 +2534,21 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
       Constant *Elt = UndefValue::get(ATy->getElementType());
       Elts.assign(ATy->getNumElements(), Elt);
     } else {
-      assert(0 && "This code is out of sync with "
+      llvm_unreachable("This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
     }
     
     assert(CI->getZExtValue() < ATy->getNumElements());
     Elts[CI->getZExtValue()] =
-      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context);
     return ConstantArray::get(ATy, Elts);
   }    
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
-static void CommitValueTo(Constant *Val, Constant *Addr) {
+static void CommitValueTo(Constant *Val, Constant *Addr,
+                          LLVMContext &Context) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
     assert(GV->hasInitializer());
     GV->setInitializer(Val);
@@ -2080,7 +2559,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   
   Constant *Init = GV->getInitializer();
-  Init = EvaluateStoreInto(Init, Val, CE, 2);
+  Init = EvaluateStoreInto(Init, Val, CE, 2, Context);
   GV->setInitializer(Init);
 }
 
@@ -2088,7 +2567,8 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
 /// P after the stores reflected by 'memory' have been performed.  If we can't
 /// decide, return null.
 static Constant *ComputeLoadResult(Constant *P,
-                                const DenseMap<Constant*, Constant*> &Memory) {
+                                const DenseMap<Constant*, Constant*> &Memory,
+                                LLVMContext &Context) {
   // If this memory location has been recently stored, use the stored value: it
   // is the most up-to-date.
   DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
@@ -2096,7 +2576,7 @@ static Constant *ComputeLoadResult(Constant *P,
  
   // Access it.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
-    if (GV->hasInitializer())
+    if (GV->hasDefinitiveInitializer())
       return GV->getInitializer();
     return 0;
   }
@@ -2106,7 +2586,7 @@ static Constant *ComputeLoadResult(Constant *P,
     if (CE->getOpcode() == Instruction::GetElementPtr &&
         isa<GlobalVariable>(CE->getOperand(0))) {
       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      if (GV->hasInitializer())
+      if (GV->hasDefinitiveInitializer())
         return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
     }
 
@@ -2117,7 +2597,7 @@ static Constant *ComputeLoadResult(Constant *P,
 /// successful, false if we can't evaluate it.  ActualArgs contains the formal
 /// arguments for the function.
 static bool EvaluateFunction(Function *F, Constant *&RetVal,
-                             const std::vector<Constant*> &ActualArgs,
+                             const SmallVectorImpl<Constant*> &ActualArgs,
                              std::vector<Function*> &CallStack,
                              DenseMap<Constant*, Constant*> &MutatedMemory,
                              std::vector<GlobalVariable*> &AllocaTmps) {
@@ -2126,6 +2606,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
     return false;
   
+  LLVMContext &Context = F->getContext();
+  
   CallStack.push_back(F);
   
   /// Values - As we compute SSA register values, we store their contents here.
@@ -2152,7 +2634,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
       if (SI->isVolatile()) return false;  // no volatile accesses.
       Constant *Ptr = getVal(Values, SI->getOperand(1));
-      if (!isSimpleEnoughPointerToCommit(Ptr))
+      if (!isSimpleEnoughPointerToCommit(Ptr, Context))
         // If this is too complex for us to commit, reject it.
         return false;
       Constant *Val = getVal(Values, SI->getOperand(0));
@@ -2170,7 +2652,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
                                          getVal(Values, CI->getOperand(0)),
                                          CI->getType());
     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
-      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+      InstResult =
+            ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
                                            getVal(Values, SI->getOperand(1)),
                                            getVal(Values, SI->getOperand(2)));
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
@@ -2179,16 +2662,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
            i != e; ++i)
         GEPOps.push_back(getVal(Values, *i));
-      InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+      InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
+          ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
+          ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
       if (LI->isVolatile()) return false;  // no volatile accesses.
       InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
-                                     MutatedMemory);
+                                     MutatedMemory, Context);
       if (InstResult == 0) return false; // Could not evaluate load.
     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
       if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
       const Type *Ty = AI->getType()->getElementType();
-      AllocaTmps.push_back(new GlobalVariable(Ty, false,
+      AllocaTmps.push_back(new GlobalVariable(Context, Ty, false,
                                               GlobalValue::InternalLinkage,
                                               UndefValue::get(Ty),
                                               AI->getName()));
@@ -2208,14 +2693,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0)));
       if (!Callee) return false;  // Cannot resolve.
 
-      std::vector<Constant*> Formals;
+      SmallVector<Constant*, 8> Formals;
       for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
            i != e; ++i)
         Formals.push_back(getVal(Values, *i));
-      
+
       if (Callee->isDeclaration()) {
         // If this is a function we can constant fold, do it.
-        if (Constant *C = ConstantFoldCall(Callee, &Formals[0],
+        if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
                                            Formals.size())) {
           InstResult = C;
         } else {
@@ -2310,16 +2795,17 @@ static bool EvaluateStaticConstructor(Function *F) {
 
   // Call the function.
   Constant *RetValDummy;
-  bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(),
-                                       CallStack, MutatedMemory, AllocaTmps);
+  bool EvalSuccess = EvaluateFunction(F, RetValDummy,
+                                      SmallVector<Constant*, 0>(), CallStack,
+                                      MutatedMemory, AllocaTmps);
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
-    DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
-         << F->getName() << "' to " << MutatedMemory.size()
-         << " stores.\n";
+    DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+          << F->getName() << "' to " << MutatedMemory.size()
+          << " stores.\n");
     for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
          E = MutatedMemory.end(); I != E; ++I)
-      CommitValueTo(I->second, I->first);
+      CommitValueTo(I->second, I->first, F->getContext());
   }
   
   // At this point, we are done interpreting.  If we created any 'alloca'
@@ -2376,7 +2862,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   
   if (!MadeChange) return false;
   
-  GCL = InstallGlobalCtors(GCL, Ctors);
+  GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext());
   return true;
 }
 
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index e4a9deadd971..7b0e9c727cd4 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -129,7 +130,8 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
   Function::arg_iterator AI = F.arg_begin();
   for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
     // Do we have a constant argument?
-    if (ArgumentConstants[i].second || AI->use_empty())
+    if (ArgumentConstants[i].second || AI->use_empty() ||
+        (AI->hasByValAttr() && !F.onlyReadsMemory()))
       continue;
   
     Value *V = ArgumentConstants[i].first;
@@ -151,13 +153,15 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
 // callers will be updated to use the value they pass in directly instead of
 // using the return value.
 bool IPCP::PropagateConstantReturn(Function &F) {
-  if (F.getReturnType() == Type::VoidTy)
+  if (F.getReturnType() == Type::getVoidTy(F.getContext()))
     return false; // No return value.
 
   // If this function could be overridden later in the link stage, we can't
   // propagate information about its results into callers.
   if (F.mayBeOverridden())
     return false;
+    
+  LLVMContext &Context = F.getContext();
   
   // Check to see if this function returns a constant.
   SmallVector<Value *,4> RetVals;
@@ -182,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
         if (!STy)
           V = RI->getOperand(i);
         else
-          V = FindInsertedValue(RI->getOperand(0), i);
+          V = FindInsertedValue(RI->getOperand(0), i, Context);
 
         if (V) {
           // Ignore undefs, we can change them into anything
diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp
index b55dea2c759c..e7884ec634b6 100644
--- a/lib/Transforms/IPO/IndMemRemoval.cpp
+++ b/lib/Transforms/IPO/IndMemRemoval.cpp
@@ -1,4 +1,4 @@
-//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===//
+//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,8 +10,8 @@
 // This pass finds places where memory allocation functions may escape into
 // indirect land.  Some transforms are much easier (aka possible) only if free 
 // or malloc are not called indirectly.
-// Thus find places where the address of memory functions are taken and construct
-// bounce functions with direct calls of those functions.
+// Thus find places where the address of memory functions are taken and 
+// construct bounce functions with direct calls of those functions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -55,8 +55,8 @@ bool IndMemRemPass::runOnModule(Module &M) {
       Function* FN = Function::Create(F->getFunctionType(),
                                       GlobalValue::LinkOnceAnyLinkage,
                                       "free_llvm_bounce", &M);
-      BasicBlock* bb = BasicBlock::Create("entry",FN);
-      Instruction* R = ReturnInst::Create(bb);
+      BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN);
+      Instruction* R = ReturnInst::Create(M.getContext(), bb);
       new FreeInst(FN->arg_begin(), R);
       ++NumBounce;
       NumBounceSites += F->getNumUses();
@@ -70,11 +70,12 @@ bool IndMemRemPass::runOnModule(Module &M) {
                                       GlobalValue::LinkOnceAnyLinkage,
                                       "malloc_llvm_bounce", &M);
       FN->setDoesNotAlias(0);
-      BasicBlock* bb = BasicBlock::Create("entry",FN);
+      BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN);
       Instruction* c = CastInst::CreateIntegerCast(
-          FN->arg_begin(), Type::Int32Ty, false, "c", bb);
-      Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb);
-      ReturnInst::Create(a, bb);
+          FN->arg_begin(), Type::getInt32Ty(M.getContext()), false, "c", bb);
+      Instruction* a = new MallocInst(Type::getInt8Ty(M.getContext()),
+                                      c, "m", bb);
+      ReturnInst::Create(M.getContext(), a, bb);
       ++NumBounce;
       NumBounceSites += F->getNumUses();
       F->replaceAllUsesWith(FN);
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 5f9ea5453c1f..2344403391cf 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -19,11 +19,11 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/InlineCost.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index e107a0023ce6..b1c643b558c5 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -18,11 +18,11 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/InlineCost.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
@@ -78,7 +78,7 @@ bool SimpleInliner::doInitialization(CallGraph &CG) {
     return false;
 
   // Don't crash on invalid code
-  if (!GV->hasInitializer())
+  if (!GV->hasDefinitiveInitializer())
     return false;
   
   const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index b382837289bd..ea47366f47ed 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -18,21 +18,25 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
 STATISTIC(NumInlined, "Number of functions inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 
 static cl::opt<int>
-InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
+InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore,
         cl::desc("Control the amount of inlining to perform (default = 200)"));
 
 Inliner::Inliner(void *ID) 
@@ -45,19 +49,32 @@ Inliner::Inliner(void *ID, int Threshold)
 /// the call graph.  If the derived class implements this method, it should
 /// always explicitly call the implementation here.
 void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
-  Info.addRequired<TargetData>();
   CallGraphSCCPass::getAnalysisUsage(Info);
 }
 
-// InlineCallIfPossible - If it is possible to inline the specified call site,
-// do so and update the CallGraph for this operation.
-bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
-                                 const SmallPtrSet<Function*, 8> &SCCFunctions,
-                                 const TargetData &TD) {
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR.  The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other  functions inlined into the caller.  If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
+                                 const TargetData *TD,
+                                 InlinedArrayAllocasTy &InlinedArrayAllocas) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
-  if (!InlineFunction(CS, &CG, &TD)) return false;
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  SmallVector<AllocaInst*, 16> StaticAllocas;
+  if (!InlineFunction(CS, &CG, TD, &StaticAllocas))
+    return false;
 
   // If the inlined function had a higher stack protection level than the
   // calling function, then bump up the caller's stack protection level.
@@ -67,23 +84,89 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
            !Caller->hasFnAttr(Attribute::StackProtectReq))
     Caller->addFnAttr(Attribute::StackProtect);
 
-  // If we inlined the last possible call site to the function, delete the
-  // function body now.
-  if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
-                              Callee->hasAvailableExternallyLinkage()) &&
-      !SCCFunctions.count(Callee)) {
-    DOUT << "    -> Deleting dead function: " << Callee->getName() << "\n";
-    CallGraphNode *CalleeNode = CG[Callee];
-
-    // Remove any call graph edges from the callee to its callees.
-    CalleeNode->removeAllCalledFunctions();
-
-    resetCachedCostInfo(CalleeNode->getFunction());
+  
+  // Look at all of the allocas that we inlined through this call site.  If we
+  // have already inlined other allocas through other calls into this function,
+  // then we know that they have disjoint lifetimes and that we can merge them.
+  //
+  // There are many heuristics possible for merging these allocas, and the
+  // different options have different tradeoffs.  One thing that we *really*
+  // don't want to hurt is SRoA: once inlining happens, often allocas are no
+  // longer address taken and so they can be promoted.
+  //
+  // Our "solution" for that is to only merge allocas whose outermost type is an
+  // array type.  These are usually not promoted because someone is using a
+  // variable index into them.  These are also often the most important ones to
+  // merge.
+  //
+  // A better solution would be to have real memory lifetime markers in the IR
+  // and not have the inliner do any merging of allocas at all.  This would
+  // allow the backend to do proper stack slot coloring of all allocas that
+  // *actually make it to the backend*, which is really what we want.
+  //
+  // Because we don't have this information, we do this simple and useful hack.
+  //
+  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+  
+  // Loop over all the allocas we have so far and see if they can be merged with
+  // a previously inlined alloca.  If not, remember that we had it.
+  for (unsigned AllocaNo = 0, e = StaticAllocas.size();
+       AllocaNo != e; ++AllocaNo) {
+    AllocaInst *AI = StaticAllocas[AllocaNo];
+    
+    // Don't bother trying to merge array allocations (they will usually be
+    // canonicalized to be an allocation *of* an array), or allocations whose
+    // type is not itself an array (because we're afraid of pessimizing SRoA).
+    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+    if (ATy == 0 || AI->isArrayAllocation())
+      continue;
+    
+    // Get the list of all available allocas for this array type.
+    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+    
+    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
+    // that we have to be careful not to reuse the same "available" alloca for
+    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+    // set to keep track of which "available" allocas are being used by this
+    // function.  Also, AllocasForType can be empty of course!
+    bool MergedAwayAlloca = false;
+    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+      AllocaInst *AvailableAlloca = AllocasForType[i];
+      
+      // The available alloca has to be in the right function, not in some other
+      // function in this SCC.
+      if (AvailableAlloca->getParent() != AI->getParent())
+        continue;
+      
+      // If the inlined function already uses this alloca then we can't reuse
+      // it.
+      if (!UsedAllocas.insert(AvailableAlloca))
+        continue;
+      
+      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+      // success!
+      DEBUG(errs() << "    ***MERGED ALLOCA: " << *AI);
+      
+      AI->replaceAllUsesWith(AvailableAlloca);
+      AI->eraseFromParent();
+      MergedAwayAlloca = true;
+      ++NumMergedAllocas;
+      break;
+    }
 
-    // Removing the node for callee from the call graph and delete it.
-    delete CG.removeFunctionFromModule(CalleeNode);
-    ++NumDeleted;
+    // If we already nuked the alloca, we're done with it.
+    if (MergedAwayAlloca)
+      continue;
+
+    // If we were unable to merge away the alloca either because there are no
+    // allocas of the right type available or because we reused them all
+    // already, remember that this alloca came from an inlined function and mark
+    // it used so we don't reuse it for other allocas from this inline
+    // operation.
+    AllocasForType.push_back(AI);
+    UsedAllocas.insert(AI);
   }
+  
   return true;
 }
         
@@ -91,69 +174,145 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
 /// at the given CallSite.
 bool Inliner::shouldInline(CallSite CS) {
   InlineCost IC = getInlineCost(CS);
-  float FudgeFactor = getInlineFudgeFactor(CS);
   
   if (IC.isAlways()) {
-    DOUT << "    Inlining: cost=always"
-         << ", Call: " << *CS.getInstruction();
+    DEBUG(errs() << "    Inlining: cost=always"
+          << ", Call: " << *CS.getInstruction() << "\n");
     return true;
   }
   
   if (IC.isNever()) {
-    DOUT << "    NOT Inlining: cost=never"
-         << ", Call: " << *CS.getInstruction();
+    DEBUG(errs() << "    NOT Inlining: cost=never"
+          << ", Call: " << *CS.getInstruction() << "\n");
     return false;
   }
   
   int Cost = IC.getValue();
   int CurrentThreshold = InlineThreshold;
-  Function *Fn = CS.getCaller();
-  if (Fn && !Fn->isDeclaration() 
-      && Fn->hasFnAttr(Attribute::OptimizeForSize)
-      && InlineThreshold != 50) {
+  Function *Caller = CS.getCaller();
+  if (Caller && !Caller->isDeclaration() &&
+      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
+      InlineLimit.getNumOccurrences() == 0 &&
+      InlineThreshold != 50)
     CurrentThreshold = 50;
-  }
   
+  float FudgeFactor = getInlineFudgeFactor(CS);
   if (Cost >= (int)(CurrentThreshold * FudgeFactor)) {
-    DOUT << "    NOT Inlining: cost=" << Cost
-         << ", Call: " << *CS.getInstruction();
+    DEBUG(errs() << "    NOT Inlining: cost=" << Cost
+          << ", Call: " << *CS.getInstruction() << "\n");
     return false;
-  } else {
-    DOUT << "    Inlining: cost=" << Cost
-         << ", Call: " << *CS.getInstruction();
-    return true;
   }
+  
+  // Try to detect the case where the current inlining candidate caller
+  // (call it B) is a static function and is an inlining candidate elsewhere,
+  // and the current candidate callee (call it C) is large enough that
+  // inlining it into B would make B too big to inline later.  In these
+  // circumstances it may be best not to inline C into B, but to inline B
+  // into its callers.
+  if (Caller->hasLocalLinkage()) {
+    int TotalSecondaryCost = 0;
+    bool outerCallsFound = false;
+    bool allOuterCallsWillBeInlined = true;
+    bool someOuterCallWouldNotBeInlined = false;
+    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
+         I != E; ++I) {
+      CallSite CS2 = CallSite::get(*I);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.
+      if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+        continue;
+
+      InlineCost IC2 = getInlineCost(CS2);
+      if (IC2.isNever())
+        allOuterCallsWillBeInlined = false;
+      if (IC2.isAlways() || IC2.isNever())
+        continue;
+
+      outerCallsFound = true;
+      int Cost2 = IC2.getValue();
+      int CurrentThreshold2 = InlineThreshold;
+      Function *Caller2 = CS2.getCaller();
+      if (Caller2 && !Caller2->isDeclaration() &&
+          Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
+          InlineThreshold != 50)
+        CurrentThreshold2 = 50;
+
+      float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+        allOuterCallsWillBeInlined = false;
+
+      // See if we have this case.  We subtract off the penalty
+      // for the call instruction, which we would be deleting.
+      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
+                (int)(CurrentThreshold2 * FudgeFactor2)) {
+        someOuterCallWouldNotBeInlined = true;
+        TotalSecondaryCost += Cost2;
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+    if (outerCallsFound && someOuterCallWouldNotBeInlined && 
+        TotalSecondaryCost < Cost) {
+      DEBUG(errs() << "    NOT Inlining: " << *CS.getInstruction() << 
+           " Cost = " << Cost << 
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      return false;
+    }
+  }
+
+  DEBUG(errs() << "    Inlining: cost=" << Cost
+        << ", Call: " << *CS.getInstruction() << '\n');
+  return true;
 }
 
-bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
+bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
-  TargetData &TD = getAnalysis<TargetData>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
-  DOUT << "Inliner visiting SCC:";
+  DEBUG(errs() << "Inliner visiting SCC:");
   for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
     Function *F = SCC[i]->getFunction();
     if (F) SCCFunctions.insert(F);
-    DOUT << " " << (F ? F->getName() : "INDIRECTNODE");
+    DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
 
   // Scan through and identify all call sites ahead of time so that we only
   // inline call sites in the original functions, not call sites that result
   // from inlining other functions.
-  std::vector<CallSite> CallSites;
+  SmallVector<CallSite, 16> CallSites;
 
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    if (Function *F = SCC[i]->getFunction())
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-        for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-          CallSite CS = CallSite::get(I);
-          if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(I) &&
-                                     (!CS.getCalledFunction() ||
-                                      !CS.getCalledFunction()->isDeclaration()))
-            CallSites.push_back(CS);
-        }
+  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+    Function *F = SCC[i]->getFunction();
+    if (!F) continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS = CallSite::get(I);
+        // If this isn't a call, or it is a call to an intrinsic, it can
+        // never be inlined.
+        if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+          continue;
+        
+        // If this is a direct call to an external function, we can never inline
+        // it.  If it is an indirect call, inlining may resolve it to be a
+        // direct call, so we keep it.
+        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+          continue;
+        
+        CallSites.push_back(CS);
+      }
+  }
 
-  DOUT << ": " << CallSites.size() << " call sites.\n";
+  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
 
   // Now that we have all of the call sites, move the ones to functions in the
   // current SCC to the end of the list.
@@ -163,6 +322,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
       if (SCCFunctions.count(F))
         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
 
+  
+  InlinedArrayAllocasTy InlinedArrayAllocas;
+  
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
   bool Changed = false;
@@ -171,51 +333,68 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
     LocalChange = false;
     // Iterate over the outer loop because inlining functions can cause indirect
     // calls to become direct calls.
-    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi)
-      if (Function *Callee = CallSites[CSi].getCalledFunction()) {
-        // Calls to external functions are never inlinable.
-        if (Callee->isDeclaration()) {
-          if (SCC.size() == 1) {
-            std::swap(CallSites[CSi], CallSites.back());
-            CallSites.pop_back();
-          } else {
-            // Keep the 'in SCC / not in SCC' boundary correct.
-            CallSites.erase(CallSites.begin()+CSi);
-          }
-          --CSi;
-          continue;
-        }
-
-        // If the policy determines that we should inline this function,
-        // try to do so.
-        CallSite CS = CallSites[CSi];
-        if (shouldInline(CS)) {
-          Function *Caller = CS.getCaller();
-          // Attempt to inline the function...
-          if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) {
-            // Remove any cached cost info for this caller, as inlining the
-            // callee has increased the size of the caller (which may be the
-            // same as the callee).
-            resetCachedCostInfo(Caller);
-
-            // Remove this call site from the list.  If possible, use 
-            // swap/pop_back for efficiency, but do not use it if doing so would
-            // move a call site to a function in this SCC before the
-            // 'FirstCallInSCC' barrier.
-            if (SCC.size() == 1) {
-              std::swap(CallSites[CSi], CallSites.back());
-              CallSites.pop_back();
-            } else {
-              CallSites.erase(CallSites.begin()+CSi);
-            }
-            --CSi;
-
-            ++NumInlined;
-            Changed = true;
-            LocalChange = true;
-          }
-        }
+    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+      CallSite CS = CallSites[CSi];
+      
+      Function *Callee = CS.getCalledFunction();
+      // We can only inline direct calls to non-declarations.
+      if (Callee == 0 || Callee->isDeclaration()) continue;
+      
+      // If the policy determines that we should inline this function,
+      // try to do so.
+      if (!shouldInline(CS))
+        continue;
+
+      Function *Caller = CS.getCaller();
+      // Attempt to inline the function...
+      if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+        continue;
+      
+      // If we inlined the last possible call site to the function, delete the
+      // function body now.
+      if (Callee->use_empty() && Callee->hasLocalLinkage() &&
+          // TODO: Can remove if in SCC now.
+          !SCCFunctions.count(Callee) &&
+          
+          // The function may be apparently dead, but if there are indirect
+          // callgraph references to the node, we cannot delete it yet, this
+          // could invalidate the CGSCC iterator.
+          CG[Callee]->getNumReferences() == 0) {
+        DEBUG(errs() << "    -> Deleting dead function: "
+              << Callee->getName() << "\n");
+        CallGraphNode *CalleeNode = CG[Callee];
+        
+        // Remove any call graph edges from the callee to its callees.
+        CalleeNode->removeAllCalledFunctions();
+        
+        resetCachedCostInfo(Callee);
+        
+        // Removing the node for callee from the call graph and delete it.
+        delete CG.removeFunctionFromModule(CalleeNode);
+        ++NumDeleted;
       }
+      
+      // Remove any cached cost info for this caller, as inlining the
+      // callee has increased the size of the caller (which may be the
+      // same as the callee).
+      resetCachedCostInfo(Caller);
+
+      // Remove this call site from the list.  If possible, use 
+      // swap/pop_back for efficiency, but do not use it if doing so would
+      // move a call site to a function in this SCC before the
+      // 'FirstCallInSCC' barrier.
+      if (SCC.size() == 1) {
+        std::swap(CallSites[CSi], CallSites.back());
+        CallSites.pop_back();
+      } else {
+        CallSites.erase(CallSites.begin()+CSi);
+      }
+      --CSi;
+
+      ++NumInlined;
+      Changed = true;
+      LocalChange = true;
+    }
   } while (LocalChange);
 
   return Changed;
@@ -227,47 +406,55 @@ bool Inliner::doFinalization(CallGraph &CG) {
   return removeDeadFunctions(CG);
 }
 
-  /// removeDeadFunctions - Remove dead functions that are not included in
-  /// DNR (Do Not Remove) list.
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
 bool Inliner::removeDeadFunctions(CallGraph &CG, 
-                                 SmallPtrSet<const Function *, 16> *DNR) {
-  std::set<CallGraphNode*> FunctionsToRemove;
+                                  SmallPtrSet<const Function *, 16> *DNR) {
+  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
   for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
     CallGraphNode *CGN = I->second;
-    if (Function *F = CGN ? CGN->getFunction() : 0) {
-      // If the only remaining users of the function are dead constants, remove
-      // them.
-      F->removeDeadConstantUsers();
-
-      if (DNR && DNR->count(F))
-        continue;
+    if (CGN->getFunction() == 0)
+      continue;
+    
+    Function *F = CGN->getFunction();
+    
+    // If the only remaining users of the function are dead constants, remove
+    // them.
+    F->removeDeadConstantUsers();
+
+    if (DNR && DNR->count(F))
+      continue;
+    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+        !F->hasAvailableExternallyLinkage())
+      continue;
+    if (!F->use_empty())
+      continue;
+    
+    // Remove any call graph edges from the function to its callees.
+    CGN->removeAllCalledFunctions();
+
+    // Remove any edges from the external node to the function's call graph
+    // node.  These edges might have been made irrelegant due to
+    // optimization of the program.
+    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
 
-      if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) &&
-          F->use_empty()) {
-
-        // Remove any call graph edges from the function to its callees.
-        CGN->removeAllCalledFunctions();
-
-        // Remove any edges from the external node to the function's call graph
-        // node.  These edges might have been made irrelegant due to
-        // optimization of the program.
-        CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
-
-        // Removing the node for callee from the call graph and delete it.
-        FunctionsToRemove.insert(CGN);
-      }
-    }
+    // Removing the node for callee from the call graph and delete it.
+    FunctionsToRemove.insert(CGN);
   }
 
   // Now that we know which functions to delete, do so.  We didn't want to do
   // this inline, because that would invalidate our CallGraph::iterator
   // objects. :(
+  //
+  // Note that it doesn't matter that we are iterating over a non-stable set
+  // here to do this, it doesn't matter which order the functions are deleted
+  // in.
   bool Changed = false;
-  for (std::set<CallGraphNode*>::iterator I = FunctionsToRemove.begin(),
-         E = FunctionsToRemove.end(); I != E; ++I) {
+  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
+       E = FunctionsToRemove.end(); I != E; ++I) {
     resetCachedCostInfo((*I)->getFunction());
     delete CG.removeFunctionFromModule(*I);
     ++NumDeleted;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 5093ae90b5ba..e3c3c672c590 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include <fstream>
 #include <set>
@@ -86,7 +87,7 @@ void InternalizePass::LoadFile(const char *Filename) {
   // Load the APIFile...
   std::ifstream In(Filename);
   if (!In.good()) {
-    cerr << "WARNING: Internalize couldn't load file '" << Filename
+    errs() << "WARNING: Internalize couldn't load file '" << Filename
          << "'! Continuing as if it's empty.\n";
     return; // Just continue as if the file were empty
   }
@@ -101,7 +102,7 @@ void InternalizePass::LoadFile(const char *Filename) {
 bool InternalizePass::runOnModule(Module &M) {
   CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
-
+  
   if (ExternalNames.empty()) {
     // Return if we're not in 'all but main' mode and have no external api
     if (!AllButMain)
@@ -131,12 +132,14 @@ bool InternalizePass::runOnModule(Module &M) {
       if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
       Changed = true;
       ++NumFunctions;
-      DOUT << "Internalizing func " << I->getName() << "\n";
+      DEBUG(errs() << "Internalizing func " << I->getName() << "\n");
     }
 
   // Never internalize the llvm.used symbol.  It is used to implement
   // attribute((used)).
+  // FIXME: Shouldn't this just filter on llvm.metadata section??
   ExternalNames.insert("llvm.used");
+  ExternalNames.insert("llvm.compiler.used");
 
   // Never internalize anchors used by the machine module info, else the info
   // won't find them.  (see MachineModuleInfo.)
@@ -158,7 +161,7 @@ bool InternalizePass::runOnModule(Module &M) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
       ++NumGlobals;
-      DOUT << "Internalized gvar " << I->getName() << "\n";
+      DEBUG(errs() << "Internalized gvar " << I->getName() << "\n");
     }
 
   // Mark all aliases that are not in the api as internal as well.
@@ -169,7 +172,7 @@ bool InternalizePass::runOnModule(Module &M) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
       ++NumAliases;
-      DOUT << "Internalized alias " << I->getName() << "\n";
+      DEBUG(errs() << "Internalized alias " << I->getName() << "\n");
     }
 
   return Changed;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 0c654438d508..02ac3bb903c7 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Scalar.h"
@@ -33,23 +33,19 @@ using namespace llvm;
 STATISTIC(NumExtracted, "Number of loops extracted");
 
 namespace {
-  // FIXME: This is not a function pass, but the PassManager doesn't allow
-  // Module passes to require FunctionPasses, so we can't get loop info if we're
-  // not a function pass.
-  struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass {
+  struct VISIBILITY_HIDDEN LoopExtractor : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     unsigned NumLoops;
 
     explicit LoopExtractor(unsigned numLoops = ~0) 
-      : FunctionPass(&ID), NumLoops(numLoops) {}
+      : LoopPass(&ID), NumLoops(numLoops) {}
 
-    virtual bool runOnFunction(Function &F);
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(BreakCriticalEdgesID);
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<DominatorTree>();
-      AU.addRequired<LoopInfo>();
     }
   };
 }
@@ -73,68 +69,50 @@ Y("loop-extract-single", "Extract at most one loop into a new function");
 // createLoopExtractorPass - This pass extracts all natural loops from the
 // program into a function if it can.
 //
-FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
 
-bool LoopExtractor::runOnFunction(Function &F) {
-  LoopInfo &LI = getAnalysis<LoopInfo>();
-
-  // If this function has no loops, there is nothing to do.
-  if (LI.empty())
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // Only visit top-level loops.
+  if (L->getParentLoop())
     return false;
 
   DominatorTree &DT = getAnalysis<DominatorTree>();
-
-  // If there is more than one top-level loop in this function, extract all of
-  // the loops.
   bool Changed = false;
-  if (LI.end()-LI.begin() > 1) {
-    for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) {
-      if (NumLoops == 0) return Changed;
-      --NumLoops;
-      Changed |= ExtractLoop(DT, *i) != 0;
-      ++NumExtracted;
-    }
-  } else {
-    // Otherwise there is exactly one top-level loop.  If this function is more
-    // than a minimal wrapper around the loop, extract the loop.
-    Loop *TLL = *LI.begin();
-    bool ShouldExtractLoop = false;
-
-    // Extract the loop if the entry block doesn't branch to the loop header.
-    TerminatorInst *EntryTI = F.getEntryBlock().getTerminator();
-    if (!isa<BranchInst>(EntryTI) ||
-        !cast<BranchInst>(EntryTI)->isUnconditional() ||
-        EntryTI->getSuccessor(0) != TLL->getHeader())
-      ShouldExtractLoop = true;
-    else {
-      // Check to see if any exits from the loop are more than just return
-      // blocks.
-      SmallVector<BasicBlock*, 8> ExitBlocks;
-      TLL->getExitBlocks(ExitBlocks);
-      for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-        if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
-          ShouldExtractLoop = true;
-          break;
-        }
-    }
 
-    if (ShouldExtractLoop) {
-      if (NumLoops == 0) return Changed;
-      --NumLoops;
-      Changed |= ExtractLoop(DT, TLL) != 0;
-      ++NumExtracted;
-    } else {
-      // Okay, this function is a minimal container around the specified loop.
-      // If we extract the loop, we will continue to just keep extracting it
-      // infinitely... so don't extract it.  However, if the loop contains any
-      // subloops, extract them.
-      for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) {
-        if (NumLoops == 0) return Changed;
-        --NumLoops;
-        Changed |= ExtractLoop(DT, *i) != 0;
-        ++NumExtracted;
+  // If there is more than one top-level loop in this function, extract all of
+  // the loops. Otherwise there is exactly one top-level loop; in this case if
+  // this function is more than a minimal wrapper around the loop, extract
+  // the loop.
+  bool ShouldExtractLoop = false;
+
+  // Extract the loop if the entry block doesn't branch to the loop header.
+  TerminatorInst *EntryTI =
+    L->getHeader()->getParent()->getEntryBlock().getTerminator();
+  if (!isa<BranchInst>(EntryTI) ||
+      !cast<BranchInst>(EntryTI)->isUnconditional() ||
+      EntryTI->getSuccessor(0) != L->getHeader())
+    ShouldExtractLoop = true;
+  else {
+    // Check to see if any exits from the loop are more than just return
+    // blocks.
+    SmallVector<BasicBlock*, 8> ExitBlocks;
+    L->getExitBlocks(ExitBlocks);
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+        ShouldExtractLoop = true;
+        break;
       }
+  }
+  if (ShouldExtractLoop) {
+    if (NumLoops == 0) return Changed;
+    --NumLoops;
+    if (ExtractLoop(DT, L) != 0) {
+      Changed = true;
+      // After extraction, the loop is replaced by a function call, so
+      // we shouldn't try to run any more loop passes on it.
+      LPM.deleteLoopFromQueue(L);
     }
+    ++NumExtracted;
   }
 
   return Changed;
@@ -143,7 +121,7 @@ bool LoopExtractor::runOnFunction(Function &F) {
 // createSingleLoopExtractorPass - This pass extracts one natural loop from the
 // program into a function if it can.  This is used by bugpoint.
 //
-FunctionPass *llvm::createSingleLoopExtractorPass() {
+Pass *llvm::createSingleLoopExtractorPass() {
   return new SingleLoopExtractor();
 }
 
@@ -193,8 +171,8 @@ void BlockExtractorPass::LoadFile(const char *Filename) {
   // Load the BlockFile...
   std::ifstream In(Filename);
   if (!In.good()) {
-    cerr << "WARNING: BlockExtractor couldn't load file '" << Filename
-         << "'!\n";
+    errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+           << "'!\n";
     return;
   }
   while (In) {
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index dfc040b83342..55194b34cf20 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -39,6 +39,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
@@ -200,7 +201,7 @@ bool LowerSetJmp::runOnModule(Module& M) {
 // This function is always successful, unless it isn't.
 bool LowerSetJmp::doInitialization(Module& M)
 {
-  const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
   const Type *SBPPTy = PointerType::getUnqual(SBPTy);
 
   // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
@@ -208,33 +209,40 @@ bool LowerSetJmp::doInitialization(Module& M)
 
   // void __llvm_sjljeh_init_setjmpmap(void**)
   InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
-                                    Type::VoidTy, SBPPTy, (Type *)0);
+                                    Type::getVoidTy(M.getContext()),
+                                    SBPPTy, (Type *)0);
   // void __llvm_sjljeh_destroy_setjmpmap(void**)
   DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
-                                       Type::VoidTy, SBPPTy, (Type *)0);
+                                       Type::getVoidTy(M.getContext()),
+                                       SBPPTy, (Type *)0);
 
   // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
   AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
-                                     Type::VoidTy, SBPPTy, SBPTy,
-                                     Type::Int32Ty, (Type *)0);
+                                     Type::getVoidTy(M.getContext()),
+                                     SBPPTy, SBPTy,
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
 
   // void __llvm_sjljeh_throw_longjmp(int*, int)
   ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
-                                       Type::VoidTy, SBPTy, Type::Int32Ty,
+                                       Type::getVoidTy(M.getContext()), SBPTy, 
+                                       Type::getInt32Ty(M.getContext()),
                                        (Type *)0);
 
   // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
   TryCatchLJ =
     M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
-                          Type::Int32Ty, SBPPTy, (Type *)0);
+                          Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
 
   // bool __llvm_sjljeh_is_longjmp_exception()
   IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
-                                        Type::Int1Ty, (Type *)0);
+                                        Type::getInt1Ty(M.getContext()),
+                                        (Type *)0);
 
   // int __llvm_sjljeh_get_longjmp_value()
   GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
-                                     Type::Int32Ty, (Type *)0);
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
   return true;
 }
 
@@ -257,7 +265,8 @@ bool LowerSetJmp::IsTransformableFunction(const std::string& Name) {
 // throwing the exception for us.
 void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
 {
-  const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type* SBPTy =
+        Type::getInt8PtrTy(Inst->getContext());
 
   // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
   // same parameters as "longjmp", except that the buffer is cast to a
@@ -278,7 +287,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
   if (SVP.first)
     BranchInst::Create(SVP.first->getParent(), Inst);
   else
-    new UnwindInst(Inst);
+    new UnwindInst(Inst->getContext(), Inst);
 
   // Remove all insts after the branch/unwind inst.  Go from back to front to
   // avoid replaceAllUsesWith if possible.
@@ -309,7 +318,8 @@ AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
   assert(Inst && "Couldn't find even ONE instruction in entry block!");
 
   // Fill in the alloca and call to initialize the SJ map.
-  const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *SBPTy =
+        Type::getInt8PtrTy(Func->getContext());
   AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
   CallInst::Create(InitSJMap, Map, "", Inst);
   return SJMap[Func] = Map;
@@ -324,12 +334,13 @@ BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
 
   // The basic block we're going to jump to if we need to rethrow the
   // exception.
-  BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func);
+  BasicBlock* Rethrow =
+        BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
 
   // Fill in the "Rethrow" BB with a call to rethrow the exception. This
   // is the last instruction in the BB since at this point the runtime
   // should exit this function and go to the next function.
-  new UnwindInst(Rethrow);
+  new UnwindInst(Func->getContext(), Rethrow);
   return RethrowBBMap[Func] = Rethrow;
 }
 
@@ -340,7 +351,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
 {
   if (SwitchValMap[Func].first) return SwitchValMap[Func];
 
-  BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func);
+  BasicBlock* LongJmpPre =
+        BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
 
   // Keep track of the preliminary basic block for some of the other
   // transformations.
@@ -352,7 +364,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
   // The "decision basic block" gets the number associated with the
   // setjmp call returning to switch on and the value returned by
   // longjmp.
-  BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func);
+  BasicBlock* DecisionBB =
+        BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
 
   BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
 
@@ -375,12 +388,13 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
   Function* Func = ABlock->getParent();
 
   // Add this setjmp to the setjmp map.
-  const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type* SBPTy =
+          Type::getInt8PtrTy(Inst->getContext());
   CastInst* BufPtr = 
     new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
   std::vector<Value*> Args = 
     make_vector<Value*>(GetSetJmpMap(Func), BufPtr,
-                        ConstantInt::get(Type::Int32Ty,
+                        ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
                                          SetJmpIDMap[Func]++), 0);
   CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst);
 
@@ -424,14 +438,17 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
 
   // This PHI node will be in the new block created from the
   // splitBasicBlock call.
-  PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst);
+  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()),
+                                 "SetJmpReturn", Inst);
 
   // Coming from a call to setjmp, the return is 0.
-  PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock);
+  PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
+                   ABlock);
 
   // Add the case for this setjmp's number...
   SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
-  SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1),
+  SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
+                                      SetJmpIDMap[Func] - 1),
                      SetJmpContBlock);
 
   // Value coming from the handling of the exception.
@@ -503,7 +520,8 @@ void LowerSetJmp::visitInvokeInst(InvokeInst& II)
   BasicBlock* ExceptBB = II.getUnwindDest();
 
   Function* Func = BB->getParent();
-  BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func);
+  BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), 
+                                               "InvokeExcept", Func);
 
   // If this is a longjmp exception, then branch to the preliminary BB of
   // the longjmp exception handling. Otherwise, go to the old exception.
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 5693cc0fc3b4..13bbf9c682e4 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -47,11 +47,14 @@
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <vector>
 using namespace llvm;
@@ -61,7 +64,7 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged");
 namespace {
   struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    MergeFunctions() : ModulePass((intptr_t)&ID) {}
+    MergeFunctions() : ModulePass(&ID) {}
 
     bool runOnModule(Module &M);
   };
@@ -127,7 +130,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
     return false;
 
   default:
-    assert(0 && "Unknown type!");
+    llvm_unreachable("Unknown type!");
     return false;
 
   case Type::PointerTyID: {
@@ -185,7 +188,8 @@ static bool
 isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
   if (I1->getOpcode() != I2->getOpcode() ||
       I1->getNumOperands() != I2->getNumOperands() ||
-      !isEquivalentType(I1->getType(), I2->getType()))
+      !isEquivalentType(I1->getType(), I2->getType()) ||
+      !I1->hasSameSubclassOptionalData(I2))
     return false;
 
   // We have two instructions of identical opcode and #operands.  Check to see
@@ -449,6 +453,7 @@ static LinkageCategory categorize(const Function *F) {
   switch (F->getLinkage()) {
   case GlobalValue::InternalLinkage:
   case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
     return Internal;
 
   case GlobalValue::WeakAnyLinkage:
@@ -468,14 +473,14 @@ static LinkageCategory categorize(const Function *F) {
     return ExternalStrong;
   }
 
-  assert(0 && "Unknown LinkageType.");
+  llvm_unreachable("Unknown LinkageType.");
   return ExternalWeak;
 }
 
 static void ThunkGToF(Function *F, Function *G) {
   Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
                                     G->getParent());
-  BasicBlock *BB = BasicBlock::Create("", NewG);
+  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
 
   std::vector<Value *> Args;
   unsigned i = 0;
@@ -494,13 +499,13 @@ static void ThunkGToF(Function *F, Function *G) {
   CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
-  if (NewG->getReturnType() == Type::VoidTy) {
-    ReturnInst::Create(BB);
+  if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) {
+    ReturnInst::Create(F->getContext(), BB);
   } else if (CI->getType() != NewG->getReturnType()) {
     Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
-    ReturnInst::Create(BCI, BB);
+    ReturnInst::Create(F->getContext(), BCI, BB);
   } else {
-    ReturnInst::Create(CI, BB);
+    ReturnInst::Create(F->getContext(), CI, BB);
   }
 
   NewG->copyAttributesFrom(G);
@@ -574,22 +579,22 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
     case Internal:
       switch (catG) {
         case ExternalStrong:
-          assert(0);
+          llvm_unreachable(0);
           // fall-through
         case ExternalWeak:
-	  if (F->hasAddressTaken())
+          if (F->hasAddressTaken())
             ThunkGToF(F, G);
           else
             AliasGToF(F, G);
-	  break;
+          break;
         case Internal: {
           bool addrTakenF = F->hasAddressTaken();
           bool addrTakenG = G->hasAddressTaken();
           if (!addrTakenF && addrTakenG) {
             std::swap(FnVec[i], FnVec[j]);
             std::swap(F, G);
-	    std::swap(addrTakenF, addrTakenG);
-	  }
+            std::swap(addrTakenF, addrTakenG);
+          }
 
           if (addrTakenF && addrTakenG) {
             ThunkGToF(F, G);
@@ -597,7 +602,7 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
             assert(!addrTakenG);
             AliasGToF(F, G);
           }
-	} break;
+        } break;
       }
       break;
   }
@@ -629,19 +634,19 @@ bool MergeFunctions::runOnModule(Module &M) {
   bool LocalChanged;
   do {
     LocalChanged = false;
-    DOUT << "size: " << FnMap.size() << "\n";
+    DEBUG(errs() << "size: " << FnMap.size() << "\n");
     for (std::map<unsigned long, std::vector<Function *> >::iterator
          I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
       std::vector<Function *> &FnVec = I->second;
-      DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n";
+      DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
 
       for (int i = 0, e = FnVec.size(); i != e; ++i) {
         for (int j = i + 1; j != e; ++j) {
           bool isEqual = equals(FnVec[i], FnVec[j]);
 
-          DOUT << "  " << FnVec[i]->getName()
-               << (isEqual ? " == " : " != ")
-               << FnVec[j]->getName() << "\n";
+          DEBUG(errs() << "  " << FnVec[i]->getName()
+                << (isEqual ? " == " : " != ")
+                << FnVec[j]->getName() << "\n");
 
           if (isEqual) {
             if (fold(FnVec, i, j)) {
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 73ec9c107637..8f858d35ea3f 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -48,7 +48,8 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
 Function* PartialInliner::unswitchFunction(Function* F) {
   // First, verify that this function is an unswitching candidate...
   BasicBlock* entryBlock = F->begin();
-  if (!isa<BranchInst>(entryBlock->getTerminator()))
+  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+  if (!BR || BR->isUnconditional())
     return 0;
   
   BasicBlock* returnBlock = 0;
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 2b52f464b674..daf81e9259da 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/CallGraph.h"
@@ -40,7 +41,7 @@ namespace {
     PruneEH() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
 
     bool SimplifyFunction(Function *F);
     void DeleteBasicBlock(BasicBlock *BB);
@@ -54,7 +55,7 @@ X("prune-eh", "Remove unused exception handling info");
 Pass *llvm::createPruneEHPass() { return new PruneEH(); }
 
 
-bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   SmallPtrSet<CallGraphNode *, 8> SCCNodes;
   CallGraph &CG = getAnalysis<CallGraph>();
   bool MadeChange = false;
@@ -164,9 +165,6 @@ bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
 // function if we have invokes to non-unwinding functions or code after calls to
 // no-return functions.
 bool PruneEH::SimplifyFunction(Function *F) {
-  CallGraph &CG = getAnalysis<CallGraph>();
-  CallGraphNode *CGN = CG[F];
-
   bool MadeChange = false;
   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
@@ -180,14 +178,13 @@ bool PruneEH::SimplifyFunction(Function *F) {
         Call->setAttributes(II->getAttributes());
 
         // Anything that used the value produced by the invoke instruction
-        // now uses the value produced by the call instruction.
+        // now uses the value produced by the call instruction.  Note that we
+        // do this even for void functions and calls with no uses so that the
+        // callgraph edge is updated.
         II->replaceAllUsesWith(Call);
         BasicBlock *UnwindBlock = II->getUnwindDest();
         UnwindBlock->removePredecessor(II->getParent());
 
-        // Fix up the call graph.
-        CGN->replaceCallSite(II, Call);
-
         // Insert a branch to the normal destination right before the
         // invoke.
         BranchInst::Create(II->getNormalDest(), II);
@@ -214,7 +211,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
 
           // Remove the uncond branch and add an unreachable.
           BB->getInstList().pop_back();
-          new UnreachableInst(BB);
+          new UnreachableInst(BB->getContext(), BB);
 
           DeleteBasicBlock(New);  // Delete the new BB.
           MadeChange = true;
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index 99003689fb1f..4c1f26d50d30 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
@@ -69,7 +70,6 @@ ModulePass *llvm::createRaiseAllocationsPass() {
 // function into the appropriate instruction.
 //
 void RaiseAllocations::doInitialization(Module &M) {
-
   // Get Malloc and free prototypes if they exist!
   MallocFunc = M.getFunction("malloc");
   if (MallocFunc) {
@@ -77,22 +77,27 @@ void RaiseAllocations::doInitialization(Module &M) {
 
     // Get the expected prototype for malloc
     const FunctionType *Malloc1Type = 
-      FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
-                      std::vector<const Type*>(1, Type::Int64Ty), false);
+      FunctionType::get(Type::getInt8PtrTy(M.getContext()),
+                      std::vector<const Type*>(1,
+                                      Type::getInt64Ty(M.getContext())), false);
 
     // Chck to see if we got the expected malloc
     if (TyWeHave != Malloc1Type) {
       // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
       // This handles the common declaration of: 'void *malloc(unsigned);'
       const FunctionType *Malloc2Type = 
-        FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
-                          std::vector<const Type*>(1, Type::Int32Ty), false);
+        FunctionType::get(PointerType::getUnqual(
+                          Type::getInt8Ty(M.getContext())),
+                          std::vector<const Type*>(1, 
+                                      Type::getInt32Ty(M.getContext())), false);
       if (TyWeHave != Malloc2Type) {
         // Check to see if the prototype is missing, giving us 
         // i8*(...) * malloc
         // This handles the common declaration of: 'void *malloc();'
         const FunctionType *Malloc3Type = 
-          FunctionType::get(PointerType::getUnqual(Type::Int8Ty), true);
+          FunctionType::get(PointerType::getUnqual(
+                                    Type::getInt8Ty(M.getContext())), 
+                                    true);
         if (TyWeHave != Malloc3Type)
           // Give up
           MallocFunc = 0;
@@ -105,19 +110,24 @@ void RaiseAllocations::doInitialization(Module &M) {
     const FunctionType* TyWeHave = FreeFunc->getFunctionType();
     
     // Get the expected prototype for void free(i8*)
-    const FunctionType *Free1Type = FunctionType::get(Type::VoidTy,
-      std::vector<const Type*>(1, PointerType::getUnqual(Type::Int8Ty)), false);
+    const FunctionType *Free1Type =
+      FunctionType::get(Type::getVoidTy(M.getContext()),
+        std::vector<const Type*>(1, PointerType::getUnqual(
+                                 Type::getInt8Ty(M.getContext()))), 
+                                 false);
 
     if (TyWeHave != Free1Type) {
       // Check to see if the prototype was forgotten, giving us 
       // void (...) * free
       // This handles the common forward declaration of: 'void free();'
-      const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, true);
+      const FunctionType* Free2Type =
+                    FunctionType::get(Type::getVoidTy(M.getContext()), true);
 
       if (TyWeHave != Free2Type) {
         // One last try, check to see if we can find free as 
         // int (...)* free.  This handles the case where NOTHING was declared.
-        const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, true);
+        const FunctionType* Free3Type =
+                    FunctionType::get(Type::getInt32Ty(M.getContext()), true);
         
         if (TyWeHave != Free3Type) {
           // Give up.
@@ -137,7 +147,7 @@ void RaiseAllocations::doInitialization(Module &M) {
 bool RaiseAllocations::runOnModule(Module &M) {
   // Find the malloc/free prototypes...
   doInitialization(M);
-
+  
   bool Changed = false;
 
   // First, process all of the malloc calls...
@@ -159,12 +169,15 @@ bool RaiseAllocations::runOnModule(Module &M) {
 
           // If no prototype was provided for malloc, we may need to cast the
           // source size.
-          if (Source->getType() != Type::Int32Ty)
+          if (Source->getType() != Type::getInt32Ty(M.getContext()))
             Source = 
-              CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/,
+              CastInst::CreateIntegerCast(Source, 
+                                          Type::getInt32Ty(M.getContext()), 
+                                          false/*ZExt*/,
                                           "MallocAmtCast", I);
 
-          MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I);
+          MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()),
+                                          Source, "", I);
           MI->takeName(I);
           I->replaceAllUsesWith(MI);
 
@@ -216,7 +229,7 @@ bool RaiseAllocations::runOnModule(Module &M) {
           Value *Source = *CS.arg_begin();
           if (!isa<PointerType>(Source->getType()))
             Source = new IntToPtrInst(Source,           
-                                      PointerType::getUnqual(Type::Int8Ty), 
+                        Type::getInt8PtrTy(M.getContext()), 
                                       "FreePtrCast", I);
           new FreeInst(Source, I);
 
@@ -226,7 +239,7 @@ bool RaiseAllocations::runOnModule(Module &M) {
             BranchInst::Create(II->getNormalDest(), I);
 
           // Delete the old call site
-          if (I->getType() != Type::VoidTy)
+          if (I->getType() != Type::getVoidTy(M.getContext()))
             I->replaceAllUsesWith(UndefValue::get(I->getType()));
           I->eraseFromParent();
           Changed = true;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 046e0441b1dc..77d44b27e208 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -24,18 +24,18 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN StripSymbols : public ModulePass {
+  class StripSymbols : public ModulePass {
     bool OnlyDebugInfo;
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -49,7 +49,7 @@ namespace {
     }
   };
 
-  class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass {
+  class StripNonDebugSymbols : public ModulePass {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripNonDebugSymbols()
@@ -62,7 +62,7 @@ namespace {
     }
   };
 
-  class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass {
+  class StripDebugDeclare : public ModulePass {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripDebugDeclare()
@@ -138,7 +138,7 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
     Value *V = VI->getValue();
     ++VI;
     if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
-      if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8))
+      if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
         // Set name to "", removing from symbol table!
         V->setName("");
     }
@@ -156,43 +156,37 @@ static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
 }
 
 /// Find values that are marked as llvm.used.
-void findUsedValues(Module &M,
-                    SmallPtrSet<const GlobalValue*, 8>& llvmUsedValues) {
-  if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) {
-    llvmUsedValues.insert(LLVMUsed);
-    // Collect values that are preserved as per explicit request.
-    // llvm.used is used to list these values.
-    if (ConstantArray *Inits = 
-        dyn_cast<ConstantArray>(LLVMUsed->getInitializer())) {
-      for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
-        if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i)))
-          llvmUsedValues.insert(GV);
-        else if (ConstantExpr *CE =
-                 dyn_cast<ConstantExpr>(Inits->getOperand(i)))
-          if (CE->getOpcode() == Instruction::BitCast)
-            if (GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0)))
-              llvmUsedValues.insert(GV);
-      }
-    }
-  }
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  UsedValues.insert(LLVMUsed);
+  
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
 }
 
 /// StripSymbolNames - Strip symbol names.
-bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
 
   SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
-  findUsedValues(M, llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
 
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
     if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
-      if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8))
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
   }
   
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
-      if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8))
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
     StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
   }
@@ -206,169 +200,58 @@ bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
 // StripDebugInfo - Strip debug info in the module if it exists.  
 // To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and 
 // llvm.dbg.region.end calls, and any globals they point to if now dead.
-bool StripDebugInfo(Module &M) {
-
-  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
-  findUsedValues(M, llvmUsedValues);
-
-  SmallVector<GlobalVariable *, 2> CUs;
-  SmallVector<GlobalVariable *, 4> GVs;
-  SmallVector<GlobalVariable *, 4> SPs;
-  CollectDebugInfoAnchors(M, CUs, GVs, SPs);
-  // These anchors use LinkOnce linkage so that the optimizer does not
-  // remove them accidently. Set InternalLinkage for all these debug
-  // info anchors.
-  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
-         E = CUs.end(); I != E; ++I)
-    (*I)->setLinkage(GlobalValue::InternalLinkage);
-  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
-         E = GVs.end(); I != E; ++I)
-    (*I)->setLinkage(GlobalValue::InternalLinkage);
-  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
-         E = SPs.end(); I != E; ++I)
-    (*I)->setLinkage(GlobalValue::InternalLinkage);
-
-
- // Delete all dbg variables.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
-       I != E; ++I) {
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
-    if (!GV) continue;
-    if (!GV->use_empty() && llvmUsedValues.count(I) == 0) {
-      if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) {
-        GV->replaceAllUsesWith(UndefValue::get(GV->getType()));
-      }
-    }
-  }
+static bool StripDebugInfo(Module &M) {
 
+  // Remove all of the calls to the debugger intrinsics, and remove them from
+  // the module.
   Function *FuncStart = M.getFunction("llvm.dbg.func.start");
   Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");
   Function *RegionStart = M.getFunction("llvm.dbg.region.start");
   Function *RegionEnd = M.getFunction("llvm.dbg.region.end");
   Function *Declare = M.getFunction("llvm.dbg.declare");
 
-  std::vector<Constant*> DeadConstants;
-
-  // Remove all of the calls to the debugger intrinsics, and remove them from
-  // the module.
   if (FuncStart) {
     while (!FuncStart->use_empty()) {
       CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      Value *Arg = CI->getOperand(1);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     FuncStart->eraseFromParent();
   }
   if (StopPoint) {
     while (!StopPoint->use_empty()) {
       CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      Value *Arg = CI->getOperand(3);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     StopPoint->eraseFromParent();
   }
   if (RegionStart) {
     while (!RegionStart->use_empty()) {
       CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      Value *Arg = CI->getOperand(1);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     RegionStart->eraseFromParent();
   }
   if (RegionEnd) {
     while (!RegionEnd->use_empty()) {
       CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      Value *Arg = CI->getOperand(1);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     RegionEnd->eraseFromParent();
   }
   if (Declare) {
     while (!Declare->use_empty()) {
       CallInst *CI = cast<CallInst>(Declare->use_back());
-      Value *Arg1 = CI->getOperand(1);
-      Value *Arg2 = CI->getOperand(2);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg1->use_empty()) {
-        if (Constant *C = dyn_cast<Constant>(Arg1)) 
-          DeadConstants.push_back(C);
-        else 
-          RecursivelyDeleteTriviallyDeadInstructions(Arg1);
-      }
-      if (Arg2->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg2)) 
-          DeadConstants.push_back(C);
     }
     Declare->eraseFromParent();
   }
 
-  // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce
-  // but since we are removing all debug information, make them internal now.
-  // FIXME: Use private linkage maybe?
-  if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units"))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-      GV->setLinkage(GlobalValue::InternalLinkage);
-
-  if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms"))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-      GV->setLinkage(GlobalValue::InternalLinkage);
- 
-  if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables"))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-      GV->setLinkage(GlobalValue::InternalLinkage);
-
-  // Delete all dbg variables.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
-       I != E; ++I) {
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
-    if (!GV) continue;
-    if (GV->use_empty() && llvmUsedValues.count(I) == 0
-        && (!GV->hasSection() 
-            || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0))
-      DeadConstants.push_back(GV);
-  }
-
-  if (DeadConstants.empty())
-    return false;
+  NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
+  if (NMD)
+    NMD->eraseFromParent();
 
-  // Delete any internal globals that were only used by the debugger intrinsics.
-  while (!DeadConstants.empty()) {
-    Constant *C = DeadConstants.back();
-    DeadConstants.pop_back();
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-      if (GV->hasLocalLinkage())
-        RemoveDeadConstant(GV);
-    }
-    else
-      RemoveDeadConstant(C);
-  }
-
-  // Remove all llvm.dbg types.
-  TypeSymbolTable &ST = M.getTypeSymbolTable();
-  for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) {
-    if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9))
-      ST.remove(TI++);
-    else 
-      ++TI;
-  }
-  
+  // Remove dead metadata.
+  M.getContext().RemoveDeadMetadata();
   return true;
 }
 
@@ -414,8 +297,7 @@ bool StripDebugDeclare::runOnModule(Module &M) {
        I != E; ++I) {
     GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
     if (!GV) continue;
-    if (GV->use_empty() && GV->hasName() 
-        && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0)
+    if (GV->use_empty() && GV->getName().startswith("llvm.dbg.global_variable"))
       DeadConstants.push_back(GV);
   }
 
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index 9f54388aa45e..4442820a284b 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Instructions.h"
@@ -34,6 +35,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
@@ -47,15 +49,15 @@ namespace {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
     static char ID; // Pass identification, replacement for typeid
     SRETPromotion() : CallGraphSCCPass(&ID) {}
 
   private:
-    bool PromoteReturn(CallGraphNode *CGN);
+    CallGraphNode *PromoteReturn(CallGraphNode *CGN);
     bool isSafeToUpdateAllCallers(Function *F);
     Function *cloneFunctionBody(Function *F, const StructType *STy);
-    void updateCallSites(Function *F, Function *NF);
+    CallGraphNode *updateCallSites(Function *F, Function *NF);
     bool nestedStructType(const StructType *STy);
   };
 }
@@ -68,49 +70,54 @@ Pass *llvm::createStructRetPromotionPass() {
   return new SRETPromotion();
 }
 
-bool SRETPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   bool Changed = false;
 
   for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    Changed |= PromoteReturn(SCC[i]);
+    if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) {
+      SCC[i] = NewNode;
+      Changed = true;
+    }
 
   return Changed;
 }
 
 /// PromoteReturn - This method promotes function that uses StructRet paramater 
-/// into a function that uses mulitple return value.
-bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
+/// into a function that uses multiple return values.
+CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
   Function *F = CGN->getFunction();
 
   if (!F || F->isDeclaration() || !F->hasLocalLinkage())
-    return false;
+    return 0;
 
   // Make sure that function returns struct.
   if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
-    return false;
+    return 0;
 
-  DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n";
+  DEBUG(errs() << "SretPromotion: Looking at sret function " 
+        << F->getName() << "\n");
 
-  assert (F->getReturnType() == Type::VoidTy && "Invalid function return type");
+  assert(F->getReturnType() == Type::getVoidTy(F->getContext()) &&
+         "Invalid function return type");
   Function::arg_iterator AI = F->arg_begin();
   const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
-  assert (FArgType && "Invalid sret parameter type");
+  assert(FArgType && "Invalid sret parameter type");
   const llvm::StructType *STy = 
     dyn_cast<StructType>(FArgType->getElementType());
-  assert (STy && "Invalid sret parameter element type");
+  assert(STy && "Invalid sret parameter element type");
 
   // Check if it is ok to perform this promotion.
   if (isSafeToUpdateAllCallers(F) == false) {
-    DOUT << "SretPromotion: Not all callers can be updated\n";
+    DEBUG(errs() << "SretPromotion: Not all callers can be updated\n");
     NumRejectedSRETUses++;
-    return false;
+    return 0;
   }
 
-  DOUT << "SretPromotion: sret argument will be promoted\n";
+  DEBUG(errs() << "SretPromotion: sret argument will be promoted\n");
   NumSRET++;
   // [1] Replace use of sret parameter 
-  AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv", 
-                                          F->getEntryBlock().begin());
+  AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", 
+                                         F->getEntryBlock().begin());
   Value *NFirstArg = F->arg_begin();
   NFirstArg->replaceAllUsesWith(TheAlloca);
 
@@ -121,7 +128,7 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
       ++BI;
       if (isa<ReturnInst>(I)) {
         Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
-        ReturnInst *NR = ReturnInst::Create(NV, I);
+        ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);
         I->replaceAllUsesWith(NR);
         I->eraseFromParent();
       }
@@ -131,11 +138,13 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
   Function *NF = cloneFunctionBody(F, STy);
 
   // [4] Update all call sites to use new function
-  updateCallSites(F, NF);
+  CallGraphNode *NF_CFN = updateCallSites(F, NF);
 
-  F->eraseFromParent();
-  getAnalysis<CallGraph>().changeFunction(F, NF);
-  return true;
+  CallGraph &CG = getAnalysis<CallGraph>();
+  NF_CFN->stealCalledFunctionsFrom(CG[F]);
+
+  delete CG.removeFunctionFromModule(F);
+  return NF_CFN;
 }
 
 // Check if it is ok to perform this promotion.
@@ -243,23 +252,26 @@ Function *SRETPromotion::cloneFunctionBody(Function *F,
   Function::arg_iterator NI = NF->arg_begin();
   ++I;
   while (I != E) {
-      I->replaceAllUsesWith(NI);
-      NI->takeName(I);
-      ++I;
-      ++NI;
+    I->replaceAllUsesWith(NI);
+    NI->takeName(I);
+    ++I;
+    ++NI;
   }
 
   return NF;
 }
 
 /// updateCallSites - Update all sites that call F to use NF.
-void SRETPromotion::updateCallSites(Function *F, Function *NF) {
+CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
   CallGraph &CG = getAnalysis<CallGraph>();
   SmallVector<Value*, 16> Args;
 
   // Attributes - Keep track of the parameter attributes for the arguments.
   SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
 
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
   while (!F->use_empty()) {
     CallSite CS = CallSite::get(*F->use_begin());
     Instruction *Call = CS.getInstruction();
@@ -309,8 +321,10 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) {
     New->takeName(Call);
 
     // Update the callgraph to know that the callsite has been transformed.
-    CG[Call->getParent()->getParent()]->replaceCallSite(Call, New);
-
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->removeCallEdgeFor(Call);
+    CalleeNode->addCalledFunction(New, NF_CGN);
+    
     // Update all users of sret parameter to extract value using extractvalue.
     for (Value::use_iterator UI = FirstCArg->use_begin(), 
            UE = FirstCArg->use_end(); UI != UE; ) {
@@ -318,24 +332,25 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) {
       CallInst *C2 = dyn_cast<CallInst>(U2);
       if (C2 && (C2 == Call))
         continue;
-      else if (GetElementPtrInst *UGEP = dyn_cast<GetElementPtrInst>(U2)) {
-        ConstantInt *Idx = dyn_cast<ConstantInt>(UGEP->getOperand(2));
-        assert (Idx && "Unexpected getelementptr index!");
-        Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
-                                             "evi", UGEP);
-        while(!UGEP->use_empty()) {
-          // isSafeToUpdateAllCallers has checked that all GEP uses are
-          // LoadInsts
-          LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
-          L->replaceAllUsesWith(GR);
-          L->eraseFromParent();
-        }
-        UGEP->eraseFromParent();
+      
+      GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
+      ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
+      Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
+                                           "evi", UGEP);
+      while(!UGEP->use_empty()) {
+        // isSafeToUpdateAllCallers has checked that all GEP uses are
+        // LoadInsts
+        LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
+        L->replaceAllUsesWith(GR);
+        L->eraseFromParent();
       }
-      else assert( 0 && "Unexpected sret parameter use");
+      UGEP->eraseFromParent();
+      continue;
     }
     Call->eraseFromParent();
   }
+  
+  return NF_CGN;
 }
 
 /// nestedStructType - Return true if STy includes any
@@ -344,7 +359,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) {
   unsigned Num = STy->getNumElements();
   for (unsigned i = 0; i < Num; i++) {
     const Type *Ty = STy->getElementType(i);
-    if (!Ty->isSingleValueType() && Ty != Type::VoidTy)
+    if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext()))
       return true;
   }
   return false;
diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp
index 2bd9809a3961..eb8f22585b62 100644
--- a/lib/Transforms/Instrumentation/BlockProfiling.cpp
+++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp
@@ -19,12 +19,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "RSProfiling.h"
 #include "ProfilingUtils.h"
@@ -52,8 +51,8 @@ ModulePass *llvm::createFunctionProfilerPass() {
 bool FunctionProfiler::runOnModule(Module &M) {
   Function *Main = M.getFunction("main");
   if (Main == 0) {
-    cerr << "WARNING: cannot insert function profiling into a module"
-         << " with no main function!\n";
+    errs() << "WARNING: cannot insert function profiling into a module"
+           << " with no main function!\n";
     return false;  // No main, no instrumentation!
   }
 
@@ -62,10 +61,11 @@ bool FunctionProfiler::runOnModule(Module &M) {
     if (!I->isDeclaration())
       ++NumFunctions;
 
-  const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions);
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()),
+                                   NumFunctions);
   GlobalVariable *Counters =
-    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "FuncProfCounters", &M);
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "FuncProfCounters");
 
   // Instrument all of the functions...
   unsigned i = 0;
@@ -98,26 +98,29 @@ ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); }
 bool BlockProfiler::runOnModule(Module &M) {
   Function *Main = M.getFunction("main");
   if (Main == 0) {
-    cerr << "WARNING: cannot insert block profiling into a module"
-         << " with no main function!\n";
+    errs() << "WARNING: cannot insert block profiling into a module"
+           << " with no main function!\n";
     return false;  // No main, no instrumentation!
   }
 
   unsigned NumBlocks = 0;
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    NumBlocks += I->size();
+    if (!I->isDeclaration())
+      NumBlocks += I->size();
 
-  const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks);
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks);
   GlobalVariable *Counters =
-    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "BlockProfCounters", &M);
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "BlockProfCounters");
 
   // Instrument all of the blocks...
   unsigned i = 0;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->isDeclaration()) continue;
     for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
       // Insert counter at the start of the block
       IncrementCounterInBlock(BB, i++, Counters);
+  }
 
   // Add the initialization call to main.
   InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters);
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index d7c518d282f8..494928e43814 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMInstrumentation
   BlockProfiling.cpp
   EdgeProfiling.cpp
+  OptimalEdgeProfiling.cpp
   ProfilingUtils.cpp
   RSProfiling.cpp
   )
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index 0831f3b7a480..b9cb275578e0 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -16,25 +16,30 @@
 // number of counters inserted.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "insert-edge-profiling"
 #include "ProfilingUtils.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
 namespace {
   class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass {
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
     EdgeProfiler() : ModulePass(&ID) {}
+
+    virtual const char *getPassName() const {
+      return "Edge Profiler";
+    }
   };
 }
 
@@ -47,14 +52,17 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
 bool EdgeProfiler::runOnModule(Module &M) {
   Function *Main = M.getFunction("main");
   if (Main == 0) {
-    cerr << "WARNING: cannot insert edge profiling into a module"
-         << " with no main function!\n";
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
     return false;  // No main, no instrumentation!
   }
 
   std::set<BasicBlock*> BlocksToInstrument;
   unsigned NumEdges = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
       // Keep track of which blocks need to be instrumented.  We don't want to
       // instrument blocks that are added as the result of breaking critical
@@ -62,15 +70,20 @@ bool EdgeProfiler::runOnModule(Module &M) {
       BlocksToInstrument.insert(BB);
       NumEdges += BB->getTerminator()->getNumSuccessors();
     }
+  }
 
-  const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges);
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
   GlobalVariable *Counters =
-    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "EdgeProfCounters", &M);
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "EdgeProfCounters");
+  NumEdgesInserted = NumEdges;
 
   // Instrument all of the edges...
   unsigned i = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Create counter for (0,entry) edge.
+    IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks
         // Okay, we have to add a counter of each outgoing edge.  If the
@@ -93,6 +106,7 @@ bool EdgeProfiler::runOnModule(Module &M) {
           }
         }
       }
+  }
 
   // Add the initialization call to main.
   InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 000000000000..2951dbcea9a1
--- /dev/null
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,95 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module privides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+
+  /// MaximumSpanningTree - A MST implementation.
+  /// The type parameter T determines the type of the nodes of the graph.
+  template <typename T>
+  class MaximumSpanningTree {
+
+    // A comparing class for comparing weighted edges.
+    template <typename CT>
+    struct EdgeWeightCompare {
+      bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, 
+                      typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
+        if (X.second > Y.second) return true;
+        if (X.second < Y.second) return false;
+        return false;
+      }
+    };
+
+  public:
+    typedef std::pair<const T*, const T*> Edge;
+    typedef std::pair<Edge, double> EdgeWeight;
+    typedef std::vector<EdgeWeight> EdgeWeights;
+  protected:
+    typedef std::vector<Edge> MaxSpanTree;
+
+    MaxSpanTree MST;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+    /// spanning tree.
+    MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+
+      // Create spanning tree, Forest contains a special data structure
+      // that makes checking if two nodes are already in a common (sub-)tree
+      // fast and cheap.
+      EquivalenceClasses<const T*> Forest;
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        Forest.insert(e.first);
+        Forest.insert(e.second);
+      }
+
+      // Iterate over the sorted edges, biggest first.
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+          Forest.unionSets(e.first, e.second);
+          // So we know now that the edge is not already in a subtree, so we push
+          // the edge to the MST.
+          MST.push_back(e);
+        }
+      }
+    }
+
+    typename MaxSpanTree::iterator begin() {
+      return MST.begin();
+    }
+
+    typename MaxSpanTree::iterator end() {
+      return MST.end();
+    }
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 000000000000..b2e6747ca0e9
--- /dev/null
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,219 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "MaximumSpanningTree.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+  class VISIBILITY_HIDDEN OptimalEdgeProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    OptimalEdgeProfiler() : ModulePass(&ID) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(ProfileEstimatorPassID);
+      AU.addRequired<ProfileInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Optimal Edge Profiler";
+    }
+  };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+static RegisterPass<OptimalEdgeProfiler>
+X("insert-optimal-edge-profiling", 
+  "Insert optimal instrumentation for edge profiling");
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+  return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+                                    BasicBlock* b,
+                                    unsigned i) {
+  DEBUG(errs() << "--Edge Counter for " << (e) << " in " \
+               << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+  Function *Main = M.getFunction("main");
+  if (Main == 0) {
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
+    return false;  // No main, no instrumentation!
+  }
+
+  // NumEdges counts all the edges that may be instrumented. Later on its
+  // decided which edges to actually instrument, to achieve optimal profiling.
+  // For the entry block a virtual edge (0,entry) is reserved, for each block
+  // with no successors an edge (BB,0) is reserved. These edges are necessary
+  // to calculate a truly optimal maximum spanning tree and thus an optimal
+  // instrumentation.
+  unsigned NumEdges = 0;
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Keep track of which blocks need to be instrumented.  We don't want to
+      // instrument blocks that are added as the result of breaking critical
+      // edges!
+      if (BB->getTerminator()->getNumSuccessors() == 0) {
+        // Reserve space for (BB,0) edge.
+        ++NumEdges;
+      } else {
+        NumEdges += BB->getTerminator()->getNumSuccessors();
+      }
+    }
+  }
+
+  // In the profiling output a counter for each edge is reserved, but only few
+  // are used. This is done to be able to read back in the profile without
+  // calulating the maximum spanning tree again, instead each edge counter that
+  // is not used is initialised with -1 to signal that this edge counter has to
+  // be calculated from other edge counters on reading the profile info back
+  // in.
+
+  const Type *Int32 = Type::getInt32Ty(M.getContext());
+  const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+  GlobalVariable *Counters =
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "OptEdgeProfCounters");
+  NumEdgesInserted = 0;
+
+  std::vector<Constant*> Initializer(NumEdges);
+  Constant* Zero = ConstantInt::get(Int32, 0);
+  Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+  // Instrument all of the edges not in MST...
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+
+    // Calculate a Maximum Spanning Tree with the edge weights determined by
+    // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+    // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+    // edges also participate in the maximum spanning tree calculation. 
+    // The third parameter of MaximumSpanningTree() has the effect that not the
+    // actual MST is returned but the edges _not_ in the MST.
+
+    ProfileInfo::EdgeWeights ECs = 
+      getAnalysisID<ProfileInfo>(ProfileEstimatorPassID, *F).getEdgeWeights(F);
+    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+    MaximumSpanningTree<BasicBlock> MST (EdgeVector);
+    std::stable_sort(MST.begin(),MST.end());
+
+    // Check if (0,entry) not in the MST. If not, instrument edge
+    // (IncrementCounterInBlock()) and set the counter initially to zero, if
+    // the edge is in the MST the counter is initialised to -1.
+
+    BasicBlock *entry = &(F->getEntryBlock());
+    ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
+    if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+      printEdgeCounter(edge,entry,i);
+      IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++;
+      Initializer[i++] = (Zero);
+    } else{
+      Initializer[i++] = (Uncounted);
+    }
+
+    // InsertedBlocks contains all blocks that were inserted for splitting an
+    // edge, this blocks do not have to be instrumented.
+    DenseSet<BasicBlock*> InsertedBlocks;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Check if block was not inserted and thus does not have to be
+      // instrumented.
+      if (InsertedBlocks.count(BB)) continue;
+
+      // Okay, we have to add a counter of each outgoing edge not in MST. If
+      // the outgoing edge is not critical don't split it, just insert the
+      // counter in the source or destination of the edge. Also, if the block
+      // has no successors, the virtual edge (BB,0) is processed.
+      TerminatorInst *TI = BB->getTerminator();
+      if (TI->getNumSuccessors() == 0) {
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+          printEdgeCounter(edge,BB,i);
+          IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+          Initializer[i++] = (Zero);
+        } else{
+          Initializer[i++] = (Uncounted);
+        }
+      }
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+        BasicBlock *Succ = TI->getSuccessor(s);
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+          // If the edge is critical, split it.
+          bool wasInserted = SplitCriticalEdge(TI, s, this);
+          Succ = TI->getSuccessor(s);
+          if (wasInserted)
+            InsertedBlocks.insert(Succ);
+
+          // Okay, we are guaranteed that the edge is no longer critical.  If
+          // we only have a single successor, insert the counter in this block,
+          // otherwise insert it in the successor block.
+          if (TI->getNumSuccessors() == 1) {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge,BB,i);
+            IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+          } else {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge,Succ,i);
+            IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++;
+          }
+          Initializer[i++] = (Zero);
+        } else {
+          Initializer[i++] = (Uncounted);
+        }
+      }
+    }
+  }
+
+  // Check if the number of edges counted at first was the number of edges we
+  // considered for instrumentation.
+  assert(i==NumEdges && "the number of edges in counting array is wrong");
+
+  // Assing the now completely defined initialiser to the array.
+  Constant *init = ConstantArray::get(ATy, Initializer);
+  Counters->setInitializer(init);
+
+  // Add the initialization call to main.
+  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+  return true;
+}
+
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 48071f115692..1679bea08c19 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -18,22 +18,27 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 
 void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
                                    GlobalValue *Array) {
+  LLVMContext &Context = MainFn->getContext();
   const Type *ArgVTy = 
-    PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty));
-  const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty);
+    PointerType::getUnqual(Type::getInt8PtrTy(Context));
+  const PointerType *UIntPtr =
+        Type::getInt32PtrTy(Context);
   Module &M = *MainFn->getParent();
-  Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty,
-                                           ArgVTy, UIntPtr, Type::Int32Ty,
+  Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+                                           Type::getInt32Ty(Context),
+                                           ArgVTy, UIntPtr,
+                                           Type::getInt32Ty(Context),
                                            (Type *)0);
 
   // This could force argc and argv into programs that wouldn't otherwise have
   // them, but instead we just pass null values in.
   std::vector<Value*> Args(4);
-  Args[0] = Constant::getNullValue(Type::Int32Ty);
+  Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
   Args[1] = Constant::getNullValue(ArgVTy);
 
   // Skip over any allocas in the entry block.
@@ -41,7 +46,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
   BasicBlock::iterator InsertPos = Entry->begin();
   while (isa<AllocaInst>(InsertPos)) ++InsertPos;
 
-  std::vector<Constant*> GEPIndices(2, Constant::getNullValue(Type::Int32Ty));
+  std::vector<Constant*> GEPIndices(2,
+                             Constant::getNullValue(Type::getInt32Ty(Context)));
   unsigned NumElements = 0;
   if (Array) {
     Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
@@ -53,7 +59,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     // pass null.
     Args[2] = ConstantPointerNull::get(UIntPtr);
   }
-  Args[3] = ConstantInt::get(Type::Int32Ty, NumElements);
+  Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
 
   Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
                                            "newargc", InsertPos);
@@ -78,16 +84,18 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     AI = MainFn->arg_begin();
     // If the program looked at argc, have it look at the return value of the
     // init call instead.
-    if (AI->getType() != Type::Int32Ty) {
+    if (AI->getType() != Type::getInt32Ty(Context)) {
       Instruction::CastOps opcode;
       if (!AI->use_empty()) {
         opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
         AI->replaceAllUsesWith(
           CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
       }
-      opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true);
+      opcode = CastInst::getCastOpcode(AI, true,
+                                       Type::getInt32Ty(Context), true);
       InitCall->setOperand(1, 
-          CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall));
+          CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+                           "argc.cast", InitCall));
     } else {
       AI->replaceAllUsesWith(InitCall);
       InitCall->setOperand(1, AI);
@@ -104,17 +112,20 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
   while (isa<AllocaInst>(InsertPos))
     ++InsertPos;
 
+  LLVMContext &Context = BB->getContext();
+
   // Create the getelementptr constant expression
   std::vector<Constant*> Indices(2);
-  Indices[0] = Constant::getNullValue(Type::Int32Ty);
-  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
+  Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
   Constant *ElementPtr = 
-    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
+                                          Indices.size());
 
   // Load, increment and store the value back.
   Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
   Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
-                                         ConstantInt::get(Type::Int32Ty, 1),
+                                 ConstantInt::get(Type::getInt32Ty(Context), 1),
                                          "NewFuncCounter", InsertPos);
   new StoreInst(NewVal, ElementPtr, InsertPos);
 }
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index b110f4eb368b..3b72260db845 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -33,6 +33,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Pass.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/Constants.h"
@@ -43,6 +44,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "RSProfiling.h"
 #include <set>
@@ -197,8 +200,8 @@ GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
                                          uint64_t resetval) : T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval); 
   ResetValue = Init;
-  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
-                               Init, "RandomSteeringCounter", &M);
+  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
+                               Init, "RandomSteeringCounter");
 }
 
 GlobalRandomCounter::~GlobalRandomCounter() {}
@@ -211,8 +214,9 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
   //decrement counter
   LoadInst* l = new LoadInst(Counter, "counter", t);
   
-  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), 
-                             "countercc", t);
+  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l,
+                             ConstantInt::get(T, 0), 
+                             "countercc");
 
   Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
                                         "counternew", t);
@@ -221,7 +225,8 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
   
   //reset counter
   BasicBlock* oldnext = t->getSuccessor(0);
-  BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), 
+  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(),
+                                              "reset", oldnext->getParent(), 
                                               oldnext);
   TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
   t->setSuccessor(0, resetblock);
@@ -234,8 +239,8 @@ GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
   : AI(0), T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval);
   ResetValue  = Init;
-  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
-                               Init, "RandomSteeringCounter", &M);
+  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
+                               Init, "RandomSteeringCounter");
 }
 
 GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
@@ -283,8 +288,9 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
   //decrement counter
   LoadInst* l = new LoadInst(AI, "counter", t);
   
-  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), 
-                             "countercc", t);
+  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l,
+                             ConstantInt::get(T, 0), 
+                             "countercc");
 
   Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
                                         "counternew", t);
@@ -293,7 +299,8 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
   
   //reset counter
   BasicBlock* oldnext = t->getSuccessor(0);
-  BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), 
+  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(),
+                                              "reset", oldnext->getParent(), 
                                               oldnext);
   TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
   t->setSuccessor(0, resetblock);
@@ -315,12 +322,13 @@ void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
   
   CallInst* c = CallInst::Create(F, "rdcc", t);
   BinaryOperator* b = 
-    BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm),
+    BinaryOperator::CreateAnd(c,
+                      ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm),
                               "mrdcc", t);
   
-  ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
-                             ConstantInt::get(Type::Int64Ty, 0), 
-                             "mrdccc", t);
+  ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b,
+                        ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), 
+                             "mrdccc");
 
   t->setCondition(s);
 }
@@ -345,16 +353,16 @@ void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNu
   
   // Create the getelementptr constant expression
   std::vector<Constant*> Indices(2);
-  Indices[0] = Constant::getNullValue(Type::Int32Ty);
-  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
-  Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray,
+  Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext()));
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum);
+  Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray,
                                                         &Indices[0], 2);
   
   // Load, increment and store the value back.
   Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
   profcode.insert(OldVal);
   Value *NewVal = BinaryOperator::CreateAdd(OldVal,
-                                            ConstantInt::get(Type::Int32Ty, 1),
+                       ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1),
                                             "NewCounter", InsertPos);
   profcode.insert(NewVal);
   profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
@@ -377,7 +385,8 @@ Value* ProfilerRS::Translate(Value* v) {
     if (bb == &bb->getParent()->getEntryBlock())
       TransCache[bb] = bb; //don't translate entry block
     else
-      TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(),
+      TransCache[bb] = BasicBlock::Create(v->getContext(), 
+                                          "dup_" + bb->getName(),
                                           bb->getParent(), NULL);
     return TransCache[bb];
   } else if (Instruction* i = dyn_cast<Instruction>(v)) {
@@ -401,7 +410,7 @@ Value* ProfilerRS::Translate(Value* v) {
     TransCache[v] = v;
     return v;
   }
-  assert(0 && "Value not handled");
+  llvm_unreachable("Value not handled");
   return 0;
 }
 
@@ -466,16 +475,16 @@ void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F)
   
   //a:
   Function::iterator BBN = src; ++BBN;
-  BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN);
+  BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN);
   //ChoicePoints.insert(bbC);
   BBN = cast<BasicBlock>(Translate(src));
-  BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN);
+  BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN);
   ChoicePoints.insert(bbCp);
   
   //b:
   BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC);
   BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)), 
-                     ConstantInt::get(Type::Int1Ty, true), bbCp);
+              ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp);
   //c:
   {
     TerminatorInst* iB = src->getTerminator();
@@ -531,9 +540,8 @@ bool ProfilerRS::runOnFunction(Function& F) {
     TerminatorInst* T = F.getEntryBlock().getTerminator();
     ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0),
                                               cast<BasicBlock>(
-                                                Translate(T->getSuccessor(0))),
-                                              ConstantInt::get(Type::Int1Ty,
-                                                               true)));
+                   Translate(T->getSuccessor(0))),
+                      ConstantInt::get(Type::getInt1Ty(F.getContext()), true)));
     
     //do whatever is needed now that the function is duplicated
     c->PrepFunction(&F);
@@ -556,10 +564,12 @@ bool ProfilerRS::runOnFunction(Function& F) {
 bool ProfilerRS::doInitialization(Module &M) {
   switch (RandomMethod) {
   case GBV:
-    c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1);
+    c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()),
+                                (1 << 14) - 1);
     break;
   case GBVO:
-    c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1);
+    c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()),
+                                   (1 << 14) - 1);
     break;
   case HOSTCC:
     c = new CycleCounter(M, (1 << 14) - 1);
@@ -639,7 +649,7 @@ static void getBackEdges(Function& F, T& BackEdges) {
   std::map<BasicBlock*, int> finish;
   int time = 0;
   recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
-  DOUT << F.getName() << " " << BackEdges.size() << "\n";
+  DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n");
 }
 
 
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index 5fe1eeb5c752..025d02ad3073 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello
 include $(LEVEL)/Makefile.config
 
 # No support for plugins on windows targets
-ifeq ($(OS), $(filter $(OS), Cygwin MingW))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
   PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
 endif
 
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 9c55f664ebbd..37f383fb512a 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -21,19 +21,17 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-
 using namespace llvm;
 
 STATISTIC(NumRemoved, "Number of instructions removed");
 
 namespace {
-  struct VISIBILITY_HIDDEN ADCE : public FunctionPass {
+  struct ADCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     ADCE() : FunctionPass(&ID) {}
     
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index fb9b88005b6a..54533f50405f 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Transforms/Scalar.h"
 #include <set>
@@ -40,7 +39,7 @@ using namespace llvm;
 STATISTIC(NumMoved, "Number of basic blocks moved");
 
 namespace {
-  struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass {
+  struct BlockPlacement : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     BlockPlacement() : FunctionPass(&ID) {}
 
@@ -127,13 +126,13 @@ void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
       /*empty*/;
     if (SI == E) return;  // No more successors to place.
 
-    unsigned MaxExecutionCount = PI->getExecutionCount(*SI);
+    double MaxExecutionCount = PI->getExecutionCount(*SI);
     BasicBlock *MaxSuccessor = *SI;
 
     // Scan for more frequently executed successors
     for (; SI != E; ++SI)
       if (!PlacedBlocks.count(*SI)) {
-        unsigned Count = PI->getExecutionCount(*SI);
+        double Count = PI->getExecutionCount(*SI);
         if (Count > MaxExecutionCount ||
             // Prefer to not disturb the code.
             (Count == MaxExecutionCount && *SI == &*InsertPos)) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 8a8f83fa311d..cbeed4c6b55f 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,13 +1,13 @@
 add_llvm_library(LLVMScalarOpts
   ADCE.cpp
   BasicBlockPlacement.cpp
+  CodeGenLICM.cpp
   CodeGenPrepare.cpp
   CondPropagate.cpp
   ConstantProp.cpp
   DCE.cpp
   DeadStoreElimination.cpp
   GVN.cpp
-  GVNPRE.cpp
   IndVarSimplify.cpp
   InstructionCombining.cpp
   JumpThreading.cpp
@@ -19,7 +19,6 @@ add_llvm_library(LLVMScalarOpts
   LoopUnroll.cpp
   LoopUnswitch.cpp
   MemCpyOptimizer.cpp
-  PredicateSimplifier.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/CodeGenLICM.cpp b/lib/Transforms/Scalar/CodeGenLICM.cpp
new file mode 100644
index 000000000000..10f950e135da
--- /dev/null
+++ b/lib/Transforms/Scalar/CodeGenLICM.cpp
@@ -0,0 +1,112 @@
+//===- CodeGenLICM.cpp - LICM a function for code generation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This function performs late LICM, hoisting constants out of loops that
+// are not valid immediates. It should not be followed by instcombine,
+// because instcombine would quickly stuff the constants back into the loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+using namespace llvm;
+
+namespace {
+  class CodeGenLICM : public LoopPass {
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit CodeGenLICM() : LoopPass(&ID) {}
+  };
+}
+
+char CodeGenLICM::ID = 0;
+static RegisterPass<CodeGenLICM> X("codegen-licm",
+                                   "hoist constants out of loops");
+
+Pass *llvm::createCodeGenLICMPass() {
+  return new CodeGenLICM();
+}
+
+bool CodeGenLICM::runOnLoop(Loop *L, LPPassManager &) {
+  bool Changed = false;
+
+  // Only visit outermost loops.
+  if (L->getParentLoop()) return Changed;
+
+  Instruction *PreheaderTerm = L->getLoopPreheader()->getTerminator();
+  DenseMap<Constant *, BitCastInst *> HoistedConstants;
+
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+    for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+         BBI != BBE; ++BBI) {
+      Instruction *I = BBI;
+      // TODO: For now, skip all intrinsic instructions, because some of them
+      // can require their operands to be constants, and we don't want to
+      // break that.
+      if (isa<IntrinsicInst>(I))
+        continue;
+      // LLVM represents fneg as -0.0-x; don't hoist the -0.0 out.
+      if (BinaryOperator::isFNeg(I) ||
+          BinaryOperator::isNeg(I) ||
+          BinaryOperator::isNot(I))
+        continue;
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        // Don't hoist out switch case constants.
+        if (isa<SwitchInst>(I) && i == 1)
+          break;
+        // Don't hoist out shuffle masks.
+        if (isa<ShuffleVectorInst>(I) && i == 2)
+          break;
+        Value *Op = I->getOperand(i);
+        Constant *C = dyn_cast<Constant>(Op);
+        if (!C) continue;
+        // TODO: Ask the target which constants are legal. This would allow
+        // us to add support for hoisting ConstantInts and GlobalValues too.
+        if (isa<ConstantFP>(C) ||
+            isa<ConstantVector>(C) ||
+            isa<ConstantAggregateZero>(C)) {
+          BitCastInst *&BC = HoistedConstants[C];
+          if (!BC)
+            BC = new BitCastInst(C, C->getType(), "hoist", PreheaderTerm);
+          I->setOperand(i, BC);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
+
+void CodeGenLICM::getAnalysisUsage(AnalysisUsage &AU) const {
+  // This pass preserves just about everything. List some popular things here.
+  AU.setPreservesCFG();
+  AU.addPreservedID(LoopSimplifyID);
+  AU.addPreserved<LoopInfo>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved("scalar-evolution");
+  AU.addPreserved("iv-users");
+  AU.addPreserved("lda");
+  AU.addPreserved("live-values");
+
+  // Hoisting requires a loop preheader.
+  AU.addRequiredID(LoopSimplifyID);
+}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 85e9243e3ce8..a3e3fea4da07 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -23,10 +23,9 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -35,10 +34,10 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -46,10 +45,11 @@ static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
                                        cl::init(false), cl::Hidden);
 
 namespace {
-  class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
+  class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// transformation profitability.
     const TargetLowering *TLI;
+    ProfileInfo *PI;
 
     /// BackEdges - Keep a set of all the loop back edges.
     ///
@@ -60,6 +60,10 @@ namespace {
       : FunctionPass(&ID), TLI(tli) {}
     bool runOnFunction(Function &F);
 
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<ProfileInfo>();
+    }
+
   private:
     bool EliminateMostlyEmptyBlocks(Function &F);
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -95,6 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {
 bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
+  PI = getAnalysisIfAvailable<ProfileInfo>();
   // First pass, eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -232,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   BasicBlock *DestBB = BI->getSuccessor(0);
 
-  DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB;
+  DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
 
   // If the destination block has a single pred, then this is a trivial edge,
   // just collapse it.
@@ -241,12 +246,12 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(DestBB);
+      MergeBasicBlockIntoOnlyPred(DestBB, this);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
       
-      DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+      DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");
       return;
     }
   }
@@ -283,9 +288,13 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
+  if (PI) {
+    PI->replaceAllUses(BB, DestBB);
+    PI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+  }
   BB->eraseFromParent();
 
-  DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+  DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
 
@@ -358,6 +367,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
 
       // If we found a workable predecessor, change TI to branch to Succ.
       if (FoundMatch) {
+        ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+        if (PI)
+          PI->splitEdge(TIBB, Dest, Pred);
         Dest->removePredecessor(TIBB);
         TI->setSuccessor(SuccNum, Pred);
         return;
@@ -410,8 +422,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
 ///
 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   // If this is a noop copy,
-  MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(CI->getType());
+  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(CI->getType());
 
   // This is an fp<->int conversion?
   if (SrcVT.isInteger() != DstVT.isInteger())
@@ -424,10 +436,10 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   // If these values will be promoted, find out what they will be promoted
   // to.  This helps us consider truncates on PPC as noop copies when they
   // are.
-  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
-    SrcVT = TLI.getTypeToTransformTo(SrcVT);
-  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
-    DstVT = TLI.getTypeToTransformTo(DstVT);
+  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
 
   // If, after promotion, these are the same types, this is a noop copy.
   if (SrcVT != DstVT)
@@ -520,7 +532,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
       BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
 
       InsertedCmp =
-        CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
+        CmpInst::Create(CI->getOpcode(),
+                        CI->getPredicate(),  CI->getOperand(0),
                         CI->getOperand(1), "", InsertPt);
       MadeChange = true;
     }
@@ -577,7 +590,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
   // If all the instructions matched are already in this BB, don't do anything.
   if (!AnyNonLocal) {
-    DEBUG(cerr << "CGP: Found      local addrmode: " << AddrMode << "\n");
+    DEBUG(errs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
     return false;
   }
 
@@ -592,14 +605,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // computation.
   Value *&SunkAddr = SunkAddrs[Addr];
   if (SunkAddr) {
-    DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
-               << *MemoryInst);
+    DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
     if (SunkAddr->getType() != Addr->getType())
       SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
   } else {
-    DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
-               << *MemoryInst);
-    const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
+    DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    const Type *IntPtrTy =
+          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
 
     Value *Result = 0;
     // Start with the scale value.
@@ -616,7 +630,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
         V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
       }
       if (AddrMode.Scale != 1)
-        V = BinaryOperator::CreateMul(V, Context->getConstantInt(IntPtrTy,
+        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
                                                                 AddrMode.Scale),
                                       "sunkaddr", InsertPt);
       Result = V;
@@ -648,7 +662,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
     // Add in the Base Offset if present.
     if (AddrMode.BaseOffs) {
-      Value *V = Context->getConstantInt(IntPtrTy, AddrMode.BaseOffs);
+      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
       if (Result)
         Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
       else
@@ -656,7 +670,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     }
 
     if (Result == 0)
-      SunkAddr = Context->getNullValue(Addr->getType());
+      SunkAddr = Constant::getNullValue(Addr->getType());
     else
       SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
   }
@@ -858,18 +872,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
     } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
       // If we found an inline asm expession, and if the target knows how to
       // lower it to normal LLVM code, do so now.
-      if (TLI && isa<InlineAsm>(CI->getCalledValue()))
-        if (const TargetAsmInfo *TAI =
-            TLI->getTargetMachine().getTargetAsmInfo()) {
-          if (TAI->ExpandInlineAsm(CI)) {
-            BBI = BB.begin();
-            // Avoid processing instructions out of order, which could cause
-            // reuse before a value is defined.
-            SunkAddrs.clear();
-          } else
-            // Sink address computing for memory operands into the block.
-            MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
-        }
+      if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+        if (TLI->ExpandInlineAsm(CI)) {
+          BBI = BB.begin();
+          // Avoid processing instructions out of order, which could cause
+          // reuse before a value is defined.
+          SunkAddrs.clear();
+        } else
+          // Sink address computing for memory operands into the block.
+          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
+      }
     }
   }
 
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
index c85d0317d65f..5b573f492cdc 100644
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ b/lib/Transforms/Scalar/CondPropagate.cpp
@@ -14,26 +14,21 @@
 
 #define DEBUG_TYPE "condprop"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
 using namespace llvm;
 
 STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
 STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
 
 namespace {
-  struct VISIBILITY_HIDDEN CondProp : public FunctionPass {
+  struct CondProp : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     CondProp() : FunctionPass(&ID) {}
 
@@ -124,7 +119,7 @@ void CondProp::SimplifyBlock(BasicBlock *BB) {
       // Succ is now dead, but we cannot delete it without potentially
       // invalidating iterators elsewhere.  Just insert an unreachable
       // instruction in it and delete this block later on.
-      new UnreachableInst(Succ);
+      new UnreachableInst(BB->getContext(), Succ);
       DeadBlocks.push_back(Succ);
       MadeChange = true;
     }
@@ -196,8 +191,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {
   if (&*BBI != SI)
     return;
 
-  bool RemovedPreds = false;
-
   // Ok, we have this really simple case, walk the PHI operands, looking for
   // constants.  Walk from the end to remove operands from the end when
   // possible, and to avoid invalidating "i".
@@ -209,7 +202,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {
       RevectorBlockTo(PN->getIncomingBlock(i-1),
                       SI->getSuccessor(DestCase));
       ++NumSwThread;
-      RemovedPreds = true;
 
       // If there were two predecessors before this simplification, or if the
       // PHI node contained all the same value except for the one we just
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index b933488cf636..4fee327ebec1 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Constant.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
@@ -33,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumInstKilled, "Number of instructions killed");
 
 namespace {
-  struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass {
+  struct ConstantPropagation : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     ConstantPropagation() : FunctionPass(&ID) {}
 
@@ -67,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
     WorkList.erase(WorkList.begin());    // Get an element from the worklist...
 
     if (!I->use_empty())                 // Don't muck with dead instructions...
-      if (Constant *C = ConstantFoldInstruction(I)) {
+      if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {
         // Add all of the users of this instruction to the worklist, they might
         // be constant propagatable now...
         for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8bb504c09c6e..39940c35da5d 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
@@ -34,7 +33,7 @@ namespace {
   //===--------------------------------------------------------------------===//
   // DeadInstElimination pass implementation
   //
-  struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass {
+  struct DeadInstElimination : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
     DeadInstElimination() : BasicBlockPass(&ID) {}
     virtual bool runOnBasicBlock(BasicBlock &BB) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index b923c92bd300..a7b3e7524fa2 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -29,14 +29,15 @@
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumFastStores, "Number of stores deleted");
 STATISTIC(NumFastOther , "Number of other instrs removed");
 
 namespace {
-  struct VISIBILITY_HIDDEN DSE : public FunctionPass {
+  struct DSE : public FunctionPass {
+    TargetData *TD;
+
     static char ID; // Pass identification, replacement for typeid
     DSE() : FunctionPass(&ID) {}
 
@@ -62,7 +63,6 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<DominatorTree>();
-      AU.addRequired<TargetData>();
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addPreserved<DominatorTree>();
@@ -79,15 +79,15 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
   MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  TargetData &TD = getAnalysis<TargetData>();  
+  TD = getAnalysisIfAvailable<TargetData>();
 
   bool MadeChange = false;
   
-  // Do a top-down walk on the BB
+  // Do a top-down walk on the BB.
   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
     Instruction *Inst = BBI++;
     
-    // If we find a store or a free, get it's memory dependence.
+    // If we find a store or a free, get its memory dependence.
     if (!isa<StoreInst>(Inst) && !isa<FreeInst>(Inst))
       continue;
     
@@ -117,13 +117,17 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     // If this is a store-store dependence, then the previous store is dead so
     // long as this store is at least as big as it.
     if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
-      if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <=
-          TD.getTypeStoreSize(SI->getOperand(0)->getType())) {
+      if (TD &&
+          TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <=
+          TD->getTypeStoreSize(SI->getOperand(0)->getType())) {
         // Delete the store and now-dead instructions that feed it.
         DeleteDeadInstruction(DepStore);
         NumFastStores++;
         MadeChange = true;
-        
+
+        // DeleteDeadInstruction can delete the current instruction in loop
+        // cases, reset BBI.
+        BBI = Inst;
         if (BBI != BB.begin())
           --BBI;
         continue;
@@ -134,8 +138,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
       if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
           SI->getOperand(0) == DepLoad) {
+        // DeleteDeadInstruction can delete the current instruction.  Save BBI
+        // in case we need it.
+        WeakVH NextInst(BBI);
+        
         DeleteDeadInstruction(SI);
-        if (BBI != BB.begin())
+        
+        if (NextInst == 0)  // Next instruction deleted.
+          BBI = BB.begin();
+        else if (BBI != BB.begin())  // Revisit this instruction if possible.
           --BBI;
         NumFastStores++;
         MadeChange = true;
@@ -181,7 +192,6 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) {
 /// store i32 1, i32* %A
 /// ret void
 bool DSE::handleEndBlock(BasicBlock &BB) {
-  TargetData &TD = getAnalysis<TargetData>();
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   
   bool MadeChange = false;
@@ -302,14 +312,16 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
         // Get size information for the alloca
         unsigned pointerSize = ~0U;
-        if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
-          if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
-            pointerSize = C->getZExtValue() *
-                          TD.getTypeAllocSize(A->getAllocatedType());
-        } else {
-          const PointerType* PT = cast<PointerType>(
-                                                 cast<Argument>(*I)->getType());
-          pointerSize = TD.getTypeAllocSize(PT->getElementType());
+        if (TD) {
+          if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+            if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+              pointerSize = C->getZExtValue() *
+                            TD->getTypeAllocSize(A->getAllocatedType());
+          } else {
+            const PointerType* PT = cast<PointerType>(
+                                                   cast<Argument>(*I)->getType());
+            pointerSize = TD->getTypeAllocSize(PT->getElementType());
+          }
         }
 
         // See if the call site touches it
@@ -357,7 +369,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
                                BasicBlock::iterator &BBI,
                                SmallPtrSet<Value*, 64>& deadPointers) {
-  TargetData &TD = getAnalysis<TargetData>();
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
                                   
   // If the kill pointer can be easily reduced to an alloca,
@@ -379,13 +390,15 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
       E = deadPointers.end(); I != E; ++I) {
     // Get size information for the alloca.
     unsigned pointerSize = ~0U;
-    if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
-      if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
-        pointerSize = C->getZExtValue() *
-                      TD.getTypeAllocSize(A->getAllocatedType());
-    } else {
-      const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType());
-      pointerSize = TD.getTypeAllocSize(PT->getElementType());
+    if (TD) {
+      if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+        if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+          pointerSize = C->getZExtValue() *
+                        TD->getTypeAllocSize(A->getAllocatedType());
+      } else {
+        const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType());
+        pointerSize = TD->getTypeAllocSize(PT->getElementType());
+      }
     }
 
     // See if this pointer could alias it
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index f4fe15e0e525..2ed4a638adf4 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -32,13 +33,18 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <cstdio>
 using namespace llvm;
 
@@ -60,17 +66,17 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 /// as an efficient mechanism to determine the expression-wise equivalence of
 /// two values.
 namespace {
-  struct VISIBILITY_HIDDEN Expression {
+  struct Expression {
     enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
                             UDIV, SDIV, FDIV, UREM, SREM,
-                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 
-                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 
-                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 
-                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 
-                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 
+                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
+                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
                             FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
                             SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
-                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 
+                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
                             PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
                             EMPTY, TOMBSTONE };
 
@@ -80,11 +86,11 @@ namespace {
     uint32_t secondVN;
     uint32_t thirdVN;
     SmallVector<uint32_t, 4> varargs;
-    Value* function;
-  
+    Value *function;
+
     Expression() { }
     Expression(ExpressionOpcode o) : opcode(o) { }
-  
+
     bool operator==(const Expression &other) const {
       if (opcode != other.opcode)
         return false;
@@ -103,30 +109,30 @@ namespace {
       else {
         if (varargs.size() != other.varargs.size())
           return false;
-      
+
         for (size_t i = 0; i < varargs.size(); ++i)
           if (varargs[i] != other.varargs[i])
             return false;
-    
+
         return true;
       }
     }
-  
+
     bool operator!=(const Expression &other) const {
       return !(*this == other);
     }
   };
-  
-  class VISIBILITY_HIDDEN ValueTable {
+
+  class ValueTable {
     private:
       DenseMap<Value*, uint32_t> valueNumbering;
       DenseMap<Expression, uint32_t> expressionNumbering;
       AliasAnalysis* AA;
       MemoryDependenceAnalysis* MD;
       DominatorTree* DT;
-  
+
       uint32_t nextValueNumber;
-    
+
       Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
       Expression::ExpressionOpcode getOpcode(CmpInst* C);
       Expression::ExpressionOpcode getOpcode(CastInst* C);
@@ -142,11 +148,11 @@ namespace {
       Expression create_expression(Constant* C);
     public:
       ValueTable() : nextValueNumber(1) { }
-      uint32_t lookup_or_add(Value* V);
-      uint32_t lookup(Value* V) const;
-      void add(Value* V, uint32_t num);
+      uint32_t lookup_or_add(Value *V);
+      uint32_t lookup(Value *V) const;
+      void add(Value *V, uint32_t num);
       void clear();
-      void erase(Value* v);
+      void erase(Value *v);
       unsigned size();
       void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
       AliasAnalysis *getAliasAnalysis() const { return AA; }
@@ -162,30 +168,30 @@ template <> struct DenseMapInfo<Expression> {
   static inline Expression getEmptyKey() {
     return Expression(Expression::EMPTY);
   }
-  
+
   static inline Expression getTombstoneKey() {
     return Expression(Expression::TOMBSTONE);
   }
-  
+
   static unsigned getHashValue(const Expression e) {
     unsigned hash = e.opcode;
-    
+
     hash = e.firstVN + hash * 37;
     hash = e.secondVN + hash * 37;
     hash = e.thirdVN + hash * 37;
-    
+
     hash = ((unsigned)((uintptr_t)e.type >> 4) ^
             (unsigned)((uintptr_t)e.type >> 9)) +
            hash * 37;
-    
+
     for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
          E = e.varargs.end(); I != E; ++I)
       hash = *I + hash * 37;
-    
+
     hash = ((unsigned)((uintptr_t)e.function >> 4) ^
             (unsigned)((uintptr_t)e.function >> 9)) +
            hash * 37;
-    
+
     return hash;
   }
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
@@ -201,7 +207,7 @@ template <> struct DenseMapInfo<Expression> {
 Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
   switch(BO->getOpcode()) {
   default: // THIS SHOULD NEVER HAPPEN
-    assert(0 && "Binary operator with unknown opcode?");
+    llvm_unreachable("Binary operator with unknown opcode?");
   case Instruction::Add:  return Expression::ADD;
   case Instruction::FAdd: return Expression::FADD;
   case Instruction::Sub:  return Expression::SUB;
@@ -224,10 +230,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
 }
 
 Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
-  if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) {
+  if (isa<ICmpInst>(C)) {
     switch (C->getPredicate()) {
     default:  // THIS SHOULD NEVER HAPPEN
-      assert(0 && "Comparison with unknown predicate?");
+      llvm_unreachable("Comparison with unknown predicate?");
     case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
     case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
     case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
@@ -239,32 +245,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
     case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
     case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
     }
-  }
-  assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare");
-  switch (C->getPredicate()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    assert(0 && "Comparison with unknown predicate?");
-  case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
-  case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
-  case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
-  case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
-  case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
-  case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
-  case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
-  case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
-  case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
-  case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
-  case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
-  case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
-  case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
-  case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+  } else {
+    switch (C->getPredicate()) {
+    default: // THIS SHOULD NEVER HAPPEN
+      llvm_unreachable("Comparison with unknown predicate?");
+    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
+    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
+    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
+    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
+    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
+    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
+    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
+    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
+    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
+    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
+    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
+    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
+    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
+    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+    }
   }
 }
 
 Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
   switch(C->getOpcode()) {
   default: // THIS SHOULD NEVER HAPPEN
-    assert(0 && "Cast operator with unknown opcode?");
+    llvm_unreachable("Cast operator with unknown opcode?");
   case Instruction::Trunc:    return Expression::TRUNC;
   case Instruction::ZExt:     return Expression::ZEXT;
   case Instruction::SExt:     return Expression::SEXT;
@@ -282,126 +288,126 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
 
 Expression ValueTable::create_expression(CallInst* C) {
   Expression e;
-  
+
   e.type = C->getType();
   e.firstVN = 0;
   e.secondVN = 0;
   e.thirdVN = 0;
   e.function = C->getCalledFunction();
   e.opcode = Expression::CALL;
-  
+
   for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
        I != E; ++I)
     e.varargs.push_back(lookup_or_add(*I));
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(BinaryOperator* BO) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(BO->getOperand(0));
   e.secondVN = lookup_or_add(BO->getOperand(1));
   e.thirdVN = 0;
   e.function = 0;
   e.type = BO->getType();
   e.opcode = getOpcode(BO);
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(CmpInst* C) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(C->getOperand(0));
   e.secondVN = lookup_or_add(C->getOperand(1));
   e.thirdVN = 0;
   e.function = 0;
   e.type = C->getType();
   e.opcode = getOpcode(C);
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(CastInst* C) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(C->getOperand(0));
   e.secondVN = 0;
   e.thirdVN = 0;
   e.function = 0;
   e.type = C->getType();
   e.opcode = getOpcode(C);
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(ShuffleVectorInst* S) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(S->getOperand(0));
   e.secondVN = lookup_or_add(S->getOperand(1));
   e.thirdVN = lookup_or_add(S->getOperand(2));
   e.function = 0;
   e.type = S->getType();
   e.opcode = Expression::SHUFFLE;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(ExtractElementInst* E) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(E->getOperand(0));
   e.secondVN = lookup_or_add(E->getOperand(1));
   e.thirdVN = 0;
   e.function = 0;
   e.type = E->getType();
   e.opcode = Expression::EXTRACT;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(InsertElementInst* I) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(I->getOperand(0));
   e.secondVN = lookup_or_add(I->getOperand(1));
   e.thirdVN = lookup_or_add(I->getOperand(2));
   e.function = 0;
   e.type = I->getType();
   e.opcode = Expression::INSERT;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(SelectInst* I) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(I->getCondition());
   e.secondVN = lookup_or_add(I->getTrueValue());
   e.thirdVN = lookup_or_add(I->getFalseValue());
   e.function = 0;
   e.type = I->getType();
   e.opcode = Expression::SELECT;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(GetElementPtrInst* G) {
   Expression e;
-  
+
   e.firstVN = lookup_or_add(G->getPointerOperand());
   e.secondVN = 0;
   e.thirdVN = 0;
   e.function = 0;
   e.type = G->getType();
   e.opcode = Expression::GEP;
-  
+
   for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
        I != E; ++I)
     e.varargs.push_back(lookup_or_add(*I));
-  
+
   return e;
 }
 
@@ -410,21 +416,21 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) {
 //===----------------------------------------------------------------------===//
 
 /// add - Insert a value into the table with a specified value number.
-void ValueTable::add(Value* V, uint32_t num) {
+void ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
 }
 
 /// lookup_or_add - Returns the value number for the specified value, assigning
 /// it a new number if it did not have one before.
-uint32_t ValueTable::lookup_or_add(Value* V) {
+uint32_t ValueTable::lookup_or_add(Value *V) {
   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
   if (VI != valueNumbering.end())
     return VI->second;
-  
+
   if (CallInst* C = dyn_cast<CallInst>(V)) {
     if (AA->doesNotAccessMemory(C)) {
       Expression e = create_expression(C);
-    
+
       DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
       if (EI != expressionNumbering.end()) {
         valueNumbering.insert(std::make_pair(V, EI->second));
@@ -432,20 +438,20 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
       } else {
         expressionNumbering.insert(std::make_pair(e, nextValueNumber));
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
         return nextValueNumber++;
       }
     } else if (AA->onlyReadsMemory(C)) {
       Expression e = create_expression(C);
-      
+
       if (expressionNumbering.find(e) == expressionNumbering.end()) {
         expressionNumbering.insert(std::make_pair(e, nextValueNumber));
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
       }
-      
+
       MemDepResult local_dep = MD->getDependency(C);
-      
+
       if (!local_dep.isDef() && !local_dep.isNonLocal()) {
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
@@ -453,12 +459,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
 
       if (local_dep.isDef()) {
         CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
-        
+
         if (local_cdep->getNumOperands() != C->getNumOperands()) {
           valueNumbering.insert(std::make_pair(V, nextValueNumber));
           return nextValueNumber++;
         }
-          
+
         for (unsigned i = 1; i < C->getNumOperands(); ++i) {
           uint32_t c_vn = lookup_or_add(C->getOperand(i));
           uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
@@ -467,19 +473,19 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
             return nextValueNumber++;
           }
         }
-      
+
         uint32_t v = lookup_or_add(local_cdep);
         valueNumbering.insert(std::make_pair(V, v));
         return v;
       }
 
       // Non-local case.
-      const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 
+      const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
         MD->getNonLocalCallDependency(CallSite(C));
       // FIXME: call/call dependencies for readonly calls should return def, not
       // clobber!  Move the checking logic to MemDep!
       CallInst* cdep = 0;
-      
+
       // Check to see if we have a single dominating call instruction that is
       // identical to C.
       for (unsigned i = 0, e = deps.size(); i != e; ++i) {
@@ -494,23 +500,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
           cdep = 0;
           break;
         }
-        
+
         CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
         // FIXME: All duplicated with non-local case.
         if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
           cdep = NonLocalDepCall;
           continue;
         }
-        
+
         cdep = 0;
         break;
       }
-      
+
       if (!cdep) {
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
       }
-      
+
       if (cdep->getNumOperands() != C->getNumOperands()) {
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
@@ -523,18 +529,18 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
           return nextValueNumber++;
         }
       }
-      
+
       uint32_t v = lookup_or_add(cdep);
       valueNumbering.insert(std::make_pair(V, v));
       return v;
-      
+
     } else {
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
       return nextValueNumber++;
     }
   } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
     Expression e = create_expression(BO);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -542,12 +548,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
     Expression e = create_expression(C);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -555,12 +561,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -568,12 +574,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -581,12 +587,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -594,12 +600,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -607,12 +613,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (CastInst* U = dyn_cast<CastInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -620,12 +626,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -633,7 +639,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else {
@@ -644,7 +650,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
 
 /// lookup - Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t ValueTable::lookup(Value* V) const {
+uint32_t ValueTable::lookup(Value *V) const {
   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
   assert(VI != valueNumbering.end() && "Value not numbered?");
   return VI->second;
@@ -658,7 +664,7 @@ void ValueTable::clear() {
 }
 
 /// erase - Remove a value from the value numbering
-void ValueTable::erase(Value* V) {
+void ValueTable::erase(Value *V) {
   valueNumbering.erase(V);
 }
 
@@ -676,17 +682,17 @@ void ValueTable::verifyRemoved(const Value *V) const {
 //===----------------------------------------------------------------------===//
 
 namespace {
-  struct VISIBILITY_HIDDEN ValueNumberScope {
+  struct ValueNumberScope {
     ValueNumberScope* parent;
     DenseMap<uint32_t, Value*> table;
-    
+
     ValueNumberScope(ValueNumberScope* p) : parent(p) { }
   };
 }
 
 namespace {
 
-  class VISIBILITY_HIDDEN GVN : public FunctionPass {
+  class GVN : public FunctionPass {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -698,45 +704,35 @@ namespace {
 
     ValueTable VN;
     DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
-    
-    typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
-    PhiMapType phiMap;
-    
-    
+
     // This transformation requires dominator postdominator info
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
-      
+
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<AliasAnalysis>();
     }
-  
+
     // Helper fuctions
     // FIXME: eliminate or document these better
     bool processLoad(LoadInst* L,
                      SmallVectorImpl<Instruction*> &toErase);
-    bool processInstruction(Instruction* I,
+    bool processInstruction(Instruction *I,
                             SmallVectorImpl<Instruction*> &toErase);
     bool processNonLocalLoad(LoadInst* L,
                              SmallVectorImpl<Instruction*> &toErase);
-    bool processBlock(BasicBlock* BB);
-    Value *GetValueForBlock(BasicBlock *BB, Instruction* orig,
-                            DenseMap<BasicBlock*, Value*> &Phis,
-                            bool top_level = false);
+    bool processBlock(BasicBlock *BB);
     void dump(DenseMap<uint32_t, Value*>& d);
     bool iterateOnFunction(Function &F);
-    Value* CollapsePhi(PHINode* p);
-    bool isSafeReplacement(PHINode* p, Instruction* inst);
+    Value *CollapsePhi(PHINode* p);
     bool performPRE(Function& F);
-    Value* lookupNumber(BasicBlock* BB, uint32_t num);
-    bool mergeBlockIntoPredecessor(BasicBlock* BB);
-    Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno);
+    Value *lookupNumber(BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
   };
-  
+
   char GVN::ID = 0;
 }
 
@@ -756,107 +752,31 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   printf("}\n");
 }
 
-Value* GVN::CollapsePhi(PHINode* p) {
-  Value* constVal = p->hasConstantValue();
-  if (!constVal) return 0;
-  
-  Instruction* inst = dyn_cast<Instruction>(constVal);
-  if (!inst)
-    return constVal;
-    
-  if (DT->dominates(inst, p))
-    if (isSafeReplacement(p, inst))
-      return inst;
-  return 0;
-}
-
-bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) {
+static bool isSafeReplacement(PHINode* p, Instruction *inst) {
   if (!isa<PHINode>(inst))
     return true;
-  
+
   for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
        UI != E; ++UI)
     if (PHINode* use_phi = dyn_cast<PHINode>(UI))
       if (use_phi->getParent() == inst->getParent())
         return false;
-  
+
   return true;
 }
 
-/// GetValueForBlock - Get the value to use within the specified basic block.
-/// available values are in Phis.
-Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,
-                             DenseMap<BasicBlock*, Value*> &Phis,
-                             bool top_level) { 
-                                 
-  // If we have already computed this value, return the previously computed val.
-  DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
-  if (V != Phis.end() && !top_level) return V->second;
-  
-  // If the block is unreachable, just return undef, since this path
-  // can't actually occur at runtime.
-  if (!DT->isReachableFromEntry(BB))
-    return Phis[BB] = Context->getUndef(orig->getType());
-  
-  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
-    Value *ret = GetValueForBlock(Pred, orig, Phis);
-    Phis[BB] = ret;
-    return ret;
-  }
+Value *GVN::CollapsePhi(PHINode *PN) {
+  Value *ConstVal = PN->hasConstantValue(DT);
+  if (!ConstVal) return 0;
 
-  // Get the number of predecessors of this block so we can reserve space later.
-  // If there is already a PHI in it, use the #preds from it, otherwise count.
-  // Getting it from the PHI is constant time.
-  unsigned NumPreds;
-  if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin()))
-    NumPreds = ExistingPN->getNumIncomingValues();
-  else
-    NumPreds = std::distance(pred_begin(BB), pred_end(BB));
-  
-  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
-  // now, then get values to fill in the incoming values for the PHI.
-  PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle",
-                                BB->begin());
-  PN->reserveOperandSpace(NumPreds);
-  
-  Phis.insert(std::make_pair(BB, PN));
-  
-  // Fill in the incoming values for the block.
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-    Value* val = GetValueForBlock(*PI, orig, Phis);
-    PN->addIncoming(val, *PI);
-  }
-  
-  VN.getAliasAnalysis()->copyValue(orig, PN);
-  
-  // Attempt to collapse PHI nodes that are trivially redundant
-  Value* v = CollapsePhi(PN);
-  if (!v) {
-    // Cache our phi construction results
-    if (LoadInst* L = dyn_cast<LoadInst>(orig))
-      phiMap[L->getPointerOperand()].insert(PN);
-    else
-      phiMap[orig].insert(PN);
-    
-    return PN;
-  }
-    
-  PN->replaceAllUsesWith(v);
-  if (isa<PointerType>(v->getType()))
-    MD->invalidateCachedPointerInfo(v);
-
-  for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
-       E = Phis.end(); I != E; ++I)
-    if (I->second == PN)
-      I->second = v;
-
-  DEBUG(cerr << "GVN removed: " << *PN);
-  MD->removeInstruction(PN);
-  PN->eraseFromParent();
-  DEBUG(verifyRemoved(PN));
-
-  Phis[BB] = v;
-  return v;
+  Instruction *Inst = dyn_cast<Instruction>(ConstVal);
+  if (!Inst)
+    return ConstVal;
+
+  if (DT->dominates(Inst, PN))
+    if (isSafeReplacement(PN, Inst))
+      return Inst;
+  return 0;
 }
 
 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
@@ -869,11 +789,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,
 ///      currently speculating that it will be.
 ///   3) we are speculating for this block and have used that to speculate for
 ///      other blocks.
-static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
                             DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
   // Optimistically assume that the block is fully available and check to see
   // if we already know about this block in one lookup.
-  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 
+  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
     FullyAvailableBlocks.insert(std::make_pair(BB, 2));
 
   // If the entry already existed for this block, return the precomputed value.
@@ -884,29 +804,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
       IV.first->second = 3;
     return IV.first->second != 0;
   }
-  
+
   // Otherwise, see if it is fully available in all predecessors.
   pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-  
+
   // If this block has no predecessors, it isn't live-in here.
   if (PI == PE)
     goto SpeculationFailure;
-  
+
   for (; PI != PE; ++PI)
     // If the value isn't fully available in one of our predecessors, then it
     // isn't fully available in this block either.  Undo our previous
     // optimistic assumption and bail out.
     if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
       goto SpeculationFailure;
-  
+
   return true;
-  
+
 // SpeculationFailure - If we get here, we found out that this is not, after
 // all, a fully-available block.  We have a problem if we speculated on this and
 // used the speculation to mark other blocks as available.
 SpeculationFailure:
   char &BBVal = FullyAvailableBlocks[BB];
-  
+
   // If we didn't speculate on this, just return with it set to false.
   if (BBVal == 2) {
     BBVal = 0;
@@ -918,7 +838,7 @@ SpeculationFailure:
   // 0 if set to one.
   SmallVector<BasicBlock*, 32> BBWorklist;
   BBWorklist.push_back(BB);
-  
+
   while (!BBWorklist.empty()) {
     BasicBlock *Entry = BBWorklist.pop_back_val();
     // Note that this sets blocks to 0 (unavailable) if they happen to not
@@ -928,24 +848,372 @@ SpeculationFailure:
 
     // Mark as unavailable.
     EntryVal = 0;
-    
+
     for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
       BBWorklist.push_back(*I);
   }
-  
+
   return false;
 }
 
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+                                            const Type *LoadTy,
+                                            const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
+      isa<StructType>(StoredVal->getType()) ||
+      isa<ArrayType>(StoredVal->getType()))
+    return false;
+  
+  // The store has to be at least as big as the load.
+  if (TD.getTypeSizeInBits(StoredVal->getType()) <
+        TD.getTypeSizeInBits(LoadTy))
+    return false;
+  
+  return true;
+}
+  
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value.  LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 
+                                             const Type *LoadedTy,
+                                             Instruction *InsertPt,
+                                             const TargetData &TD) {
+  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+    return 0;
+  
+  const Type *StoredValTy = StoredVal->getType();
+  
+  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+  
+  // If the store and reload are the same size, we can always reuse it.
+  if (StoreSize == LoadSize) {
+    if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
+      // Pointer to Pointer -> use bitcast.
+      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+    }
+    
+    // Convert source pointers to integers, which can be bitcast.
+    if (isa<PointerType>(StoredValTy)) {
+      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+    }
+    
+    const Type *TypeToCastTo = LoadedTy;
+    if (isa<PointerType>(TypeToCastTo))
+      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+    
+    if (StoredValTy != TypeToCastTo)
+      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+    
+    // Cast to pointer if the load needs a pointer type.
+    if (isa<PointerType>(LoadedTy))
+      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+    
+    return StoredVal;
+  }
+  
+  // If the loaded value is smaller than the available value, then we can
+  // extract out a piece from it.  If the available value is too small, then we
+  // can't do anything.
+  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+  
+  // Convert source pointers to integers, which can be manipulated.
+  if (isa<PointerType>(StoredValTy)) {
+    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // Convert vectors and fp to integer, which can be manipulated.
+  if (!isa<IntegerType>(StoredValTy)) {
+    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // If this is a big-endian system, we need to shift the value down to the low
+  // bits so that a truncate will work.
+  if (TD.isBigEndian()) {
+    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+  }
+  
+  // Truncate the integer to the right size now.
+  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+  
+  if (LoadedTy == NewIntTy)
+    return StoredVal;
+  
+  // If the result is a pointer, inttoptr.
+  if (isa<PointerType>(LoadedTy))
+    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+  
+  // Otherwise, bitcast.
+  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
+/// be expressed as a base pointer plus a constant offset.  Return the base and
+/// offset to the caller.
+static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                        const TargetData &TD) {
+  Operator *PtrOp = dyn_cast<Operator>(Ptr);
+  if (PtrOp == 0) return Ptr;
+  
+  // Just look through bitcasts.
+  if (PtrOp->getOpcode() == Instruction::BitCast)
+    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+  
+  // If this is a GEP with constant indices, we can look through it.
+  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+  
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = cast<ConstantInt>(*I);
+    if (OpC->isZero()) continue;
+    
+    // Handle a struct and array indices which add their offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += OpC->getSExtValue()*Size;
+    }
+  }
+  
+  // Re-sign extend from the pointer size if needed to get overflow edge cases
+  // right.
+  unsigned PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize < 64)
+    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+  
+  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.  This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.  This returns -1 if we have to
+/// give up, or a byte number in the stored value of the piece that feeds the
+/// load.
+static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+                                          const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
+      isa<StructType>(DepSI->getOperand(0)->getType()) ||
+      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+    return -1;
+  
+  int64_t StoreOffset = 0, LoadOffset = 0;
+  Value *StoreBase = 
+    GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
+  Value *LoadBase = 
+    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+  if (StoreBase != LoadBase)
+    return -1;
+  
+  // If the load and store are to the exact same address, they should have been
+  // a must alias.  AA must have gotten confused.
+  // FIXME: Study to see if/when this happens.
+  if (LoadOffset == StoreOffset) {
+#if 0
+    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
+    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
+    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
+    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
+    errs() << "'" << L->getParent()->getParent()->getName() << "'"
+    << *L->getParent();
+#endif
+    return -1;
+  }
+  
+  // If the load and store don't overlap at all, the store doesn't provide
+  // anything to the load.  In this case, they really don't alias at all, AA
+  // must have gotten confused.
+  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
+  // remove this check, as it is duplicated with what we have below.
+  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
+  
+  if ((StoreSize & 7) | (LoadSize & 7))
+    return -1;
+  StoreSize >>= 3;  // Convert to bytes.
+  LoadSize >>= 3;
+  
+  
+  bool isAAFailure = false;
+  if (StoreOffset < LoadOffset) {
+    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+  } else {
+    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+  }
+  if (isAAFailure) {
+#if 0
+    errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
+    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
+    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
+    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
+    errs() << "'" << L->getParent()->getParent()->getName() << "'"
+    << *L->getParent();
+#endif
+    return -1;
+  }
+  
+  // If the Load isn't completely contained within the stored bits, we don't
+  // have all the bits to feed it.  We could do something crazy in the future
+  // (issue a smaller load then merge the bits in) but this seems unlikely to be
+  // valuable.
+  if (StoreOffset > LoadOffset ||
+      StoreOffset+StoreSize < LoadOffset+LoadSize)
+    return -1;
+  
+  // Okay, we can do this transformation.  Return the number of bytes into the
+  // store that the load is.
+  return LoadOffset-StoreOffset;
+}  
+
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.  This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+                                   const Type *LoadTy,
+                                   Instruction *InsertPt, const TargetData &TD){
+  LLVMContext &Ctx = SrcVal->getType()->getContext();
+  
+  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+  
+  
+  // Compute which bits of the stored value are being used by the load.  Convert
+  // to an integer type to start with.
+  if (isa<PointerType>(SrcVal->getType()))
+    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
+  if (!isa<IntegerType>(SrcVal->getType()))
+    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                             "tmp", InsertPt);
+  
+  // Shift the bits to the least significant depending on endianness.
+  unsigned ShiftAmt;
+  if (TD.isLittleEndian()) {
+    ShiftAmt = Offset*8;
+  } else {
+    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+  }
+  
+  if (ShiftAmt)
+    SrcVal = BinaryOperator::CreateLShr(SrcVal,
+                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
+  
+  if (LoadSize != StoreSize)
+    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                           "tmp", InsertPt);
+  
+  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+struct AvailableValueInBlock {
+  /// BB - The basic block in question.
+  BasicBlock *BB;
+  /// V - The value that is live out of the block.
+  Value *V;
+  /// Offset - The byte offset in V that is interesting for the load query.
+  unsigned Offset;
+  
+  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+                                   unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.V = V;
+    Res.Offset = Offset;
+    return Res;
+  }
+};
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI.  This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI, 
+                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+                                     const TargetData *TD,
+                                     AliasAnalysis *AA) {
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSAUpdate(&NewPHIs);
+  SSAUpdate.Initialize(LI);
+  
+  const Type *LoadTy = LI->getType();
+  
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+    BasicBlock *BB = ValuesPerBlock[i].BB;
+    Value *AvailableVal = ValuesPerBlock[i].V;
+    unsigned Offset = ValuesPerBlock[i].Offset;
+    
+    if (SSAUpdate.HasValueForBlock(BB))
+      continue;
+    
+    if (AvailableVal->getType() != LoadTy) {
+      assert(TD && "Need target data to handle type mismatch case");
+      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
+                                          BB->getTerminator(), *TD);
+      
+      if (Offset) {
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
+              << *ValuesPerBlock[i].V << '\n'
+              << *AvailableVal << '\n' << "\n\n\n");
+      }
+      
+      
+      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
+            << *ValuesPerBlock[i].V << '\n'
+            << *AvailableVal << '\n' << "\n\n\n");
+    }
+    
+    SSAUpdate.AddAvailableValue(BB, AvailableVal);
+  }
+  
+  // Perform PHI construction.
+  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+  
+  // If new PHI nodes were created, notify alias analysis.
+  if (isa<PointerType>(V->getType()))
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      AA->copyValue(LI, NewPHIs[i]);
+
+  return V;
+}
+
 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI,
                               SmallVectorImpl<Instruction*> &toErase) {
   // Find the non-local dependencies of the load.
-  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; 
+  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
   MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
                                    Deps);
-  //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI);
-  
+  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
+  //             << Deps.size() << *LI << '\n');
+
   // If we had to process more than one hundred blocks to find the
   // dependencies, this load isn't worth worrying about.  Optimizing
   // it will be too expensive.
@@ -956,106 +1224,124 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // clobber in the current block.  Reject this early.
   if (Deps.size() == 1 && Deps[0].second.isClobber()) {
     DEBUG(
-      DOUT << "GVN: non-local load ";
-      WriteAsOperand(*DOUT.stream(), LI);
-      DOUT << " is clobbered by " << *Deps[0].second.getInst();
+      errs() << "GVN: non-local load ";
+      WriteAsOperand(errs(), LI);
+      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
     );
     return false;
   }
-  
+
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
   // dependencies that produce an unknown value for the load (such as a call
   // that could potentially clobber the load).
-  SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock;
+  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
   SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+  const TargetData *TD = 0;
   
   for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
     BasicBlock *DepBB = Deps[i].first;
     MemDepResult DepInfo = Deps[i].second;
-    
+
     if (DepInfo.isClobber()) {
+      // If the dependence is to a store that writes to a superset of the bits
+      // read by the load, we can extract the bits we need for the load from the
+      // stored value.
+      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        if (TD) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                           DepSI->getOperand(0),
+                                                                Offset));
+            continue;
+          }
+        }
+      }
+      
+      // FIXME: Handle memset/memcpy.
       UnavailableBlocks.push_back(DepBB);
       continue;
     }
-    
+
     Instruction *DepInst = DepInfo.getInst();
-    
+
     // Loading the allocation -> undef.
-    if (isa<AllocationInst>(DepInst)) {
-      ValuesPerBlock.push_back(std::make_pair(DepBB, 
-                                            Context->getUndef(LI->getType())));
+    if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                             UndefValue::get(LI->getType())));
       continue;
     }
-  
-    if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) {
-      // Reject loads and stores that are to the same address but are of 
-      // different types.
-      // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
-      // of bitfield access, it would be interesting to optimize for it at some
-      // point.
+
+    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+      // Reject loads and stores that are to the same address but are of
+      // different types if we have to.
       if (S->getOperand(0)->getType() != LI->getType()) {
-        UnavailableBlocks.push_back(DepBB);
-        continue;
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
+                                                        LI->getType(), *TD)) {
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }
       }
-      
-      ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0)));
-      
-    } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) {
+
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                          S->getOperand(0)));
+      continue;
+    }
+    
+    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+      // If the types mismatch and we can't handle it, reject reuse of the load.
       if (LD->getType() != LI->getType()) {
-        UnavailableBlocks.push_back(DepBB);
-        continue;
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }          
       }
-      ValuesPerBlock.push_back(std::make_pair(DepBB, LD));
-    } else {
-      UnavailableBlocks.push_back(DepBB);
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
       continue;
     }
+    
+    UnavailableBlocks.push_back(DepBB);
+    continue;
   }
-  
+
   // If we have no predecessors that produce a known value for this load, exit
   // early.
   if (ValuesPerBlock.empty()) return false;
-  
+
   // If all of the instructions we depend on produce a known value for this
   // load, then it is fully redundant and we can use PHI insertion to compute
   // its value.  Insert PHIs and remove the fully redundant value now.
   if (UnavailableBlocks.empty()) {
-    // Use cached PHI construction information from previous runs
-    SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
-    // FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
-    for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
-         I != E; ++I) {
-      if ((*I)->getParent() == LI->getParent()) {
-        DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI);
-        LI->replaceAllUsesWith(*I);
-        if (isa<PointerType>((*I)->getType()))
-          MD->invalidateCachedPointerInfo(*I);
-        toErase.push_back(LI);
-        NumGVNLoad++;
-        return true;
-      }
-      
-      ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
-    }
-    
-    DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI);
+    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
     
-    DenseMap<BasicBlock*, Value*> BlockReplValues;
-    BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
     // Perform PHI construction.
-    Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
-    LI->replaceAllUsesWith(v);
-    
-    if (isa<PHINode>(v))
-      v->takeName(LI);
-    if (isa<PointerType>(v->getType()))
-      MD->invalidateCachedPointerInfo(v);
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+                                      VN.getAliasAnalysis());
+    LI->replaceAllUsesWith(V);
+
+    if (isa<PHINode>(V))
+      V->takeName(LI);
+    if (isa<PointerType>(V->getType()))
+      MD->invalidateCachedPointerInfo(V);
     toErase.push_back(LI);
     NumGVNLoad++;
     return true;
   }
-  
+
   if (!EnablePRE || !EnableLoadPRE)
     return false;
 
@@ -1066,7 +1352,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // prefer to not increase code size.  As such, we only do this when we know
   // that we only have to insert *one* load (which means we're basically moving
   // the load, not inserting a new one).
-  
+
   SmallPtrSet<BasicBlock *, 4> Blockers;
   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
     Blockers.insert(UnavailableBlocks[i]);
@@ -1090,28 +1376,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
       allSingleSucc = false;
   }
-  
+
   assert(TmpBB);
   LoadBB = TmpBB;
-  
+
   // If we have a repl set with LI itself in it, this means we have a loop where
   // at least one of the values is LI.  Since this means that we won't be able
   // to eliminate LI even if we insert uses in the other predecessors, we will
   // end up increasing code size.  Reject this by scanning for LI.
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    if (ValuesPerBlock[i].second == LI)
+    if (ValuesPerBlock[i].V == LI)
       return false;
-  
+
   if (isSinglePred) {
     bool isHot = false;
     for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second))
-	// "Hot" Instruction is in some loop (because it dominates its dep. 
-	// instruction).
-	if (DT->dominates(LI, I)) { 
-	  isHot = true;
-	  break;
-	}
+      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
+        // "Hot" Instruction is in some loop (because it dominates its dep.
+        // instruction).
+        if (DT->dominates(LI, I)) {
+          isHot = true;
+          break;
+        }
 
     // We are interested only in "hot" instructions. We don't want to do any
     // mis-optimizations here.
@@ -1128,7 +1414,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    FullyAvailableBlocks[ValuesPerBlock[i].first] = true;
+    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
     FullyAvailableBlocks[UnavailableBlocks[i]] = false;
 
@@ -1136,33 +1422,33 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
        PI != E; ++PI) {
     if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
       continue;
-    
+
     // If this load is not available in multiple predecessors, reject it.
     if (UnavailablePred && UnavailablePred != *PI)
       return false;
     UnavailablePred = *PI;
   }
-  
+
   assert(UnavailablePred != 0 &&
          "Fully available value should be eliminated above!");
-  
+
   // If the loaded pointer is PHI node defined in this block, do PHI translation
   // to get its value in the predecessor.
   Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
-  
+
   // Make sure the value is live in the predecessor.  If it was defined by a
   // non-PHI instruction in this block, we don't know how to recompute it above.
   if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
     if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
-      DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
-                 << *LPInst << *LI << "\n");
+      DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
+                   << *LPInst << '\n' << *LI << "\n");
       return false;
     }
-  
+
   // We don't currently handle critical edges :(
   if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
-    DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
-                << UnavailablePred->getName() << "': " << *LI);
+    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+                 << UnavailablePred->getName() << "': " << *LI << '\n');
     return false;
   }
 
@@ -1182,28 +1468,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // Okay, we can eliminate this load by inserting a reload in the predecessor
   // and using PHI construction to get the value in the other predecessors, do
   // it.
-  DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI);
-  
+  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+
   Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
                                 LI->getAlignment(),
                                 UnavailablePred->getTerminator());
-  
-  SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
-  for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
-       I != E; ++I)
-    ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
-  
-  DenseMap<BasicBlock*, Value*> BlockReplValues;
-  BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
-  BlockReplValues[UnavailablePred] = NewLoad;
-  
+
+  // Add the newly created load.
+  ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
+
   // Perform PHI construction.
-  Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
-  LI->replaceAllUsesWith(v);
-  if (isa<PHINode>(v))
-    v->takeName(LI);
-  if (isa<PointerType>(v->getType()))
-    MD->invalidateCachedPointerInfo(v);
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+                                    VN.getAliasAnalysis());
+  LI->replaceAllUsesWith(V);
+  if (isa<PHINode>(V))
+    V->takeName(LI);
+  if (isa<PointerType>(V->getType()))
+    MD->invalidateCachedPointerInfo(V);
   toErase.push_back(LI);
   NumPRELoad++;
   return true;
@@ -1214,64 +1495,119 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   if (L->isVolatile())
     return false;
-  
-  Value* pointer = L->getPointerOperand();
 
   // ... to a pointer that has been loaded from before...
-  MemDepResult dep = MD->getDependency(L);
-  
+  MemDepResult Dep = MD->getDependency(L);
+
   // If the value isn't available, don't do anything!
-  if (dep.isClobber()) {
+  if (Dep.isClobber()) {
+    // FIXME: We should handle memset/memcpy/memmove as dependent instructions
+    // to forward the value if available.
+    //if (isa<MemIntrinsic>(Dep.getInst()))
+    //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
+    
+    // Check to see if we have something like this:
+    //   store i32 123, i32* %P
+    //   %A = bitcast i32* %P to i8*
+    //   %B = gep i8* %A, i32 1
+    //   %C = load i8* %B
+    //
+    // We could do that by recognizing if the clobber instructions are obviously
+    // a common base + constant offset, and if the previous store (or memset)
+    // completely covers this load.  This sort of thing can happen in bitfield
+    // access code.
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
+      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
+        if (Offset != -1) {
+          Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+                                                 L->getType(), L, *TD);
+          DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
+                       << *AvailVal << '\n' << *L << "\n\n\n");
+    
+          // Replace the load!
+          L->replaceAllUsesWith(AvailVal);
+          if (isa<PointerType>(AvailVal->getType()))
+            MD->invalidateCachedPointerInfo(AvailVal);
+          toErase.push_back(L);
+          NumGVNLoad++;
+          return true;
+        }
+      }
+    
     DEBUG(
       // fast print dep, using operator<< on instruction would be too slow
-      DOUT << "GVN: load ";
-      WriteAsOperand(*DOUT.stream(), L);
-      Instruction *I = dep.getInst();
-      DOUT << " is clobbered by " << *I;
+      errs() << "GVN: load ";
+      WriteAsOperand(errs(), L);
+      Instruction *I = Dep.getInst();
+      errs() << " is clobbered by " << *I << '\n';
     );
     return false;
   }
 
   // If it is defined in another block, try harder.
-  if (dep.isNonLocal())
+  if (Dep.isNonLocal())
     return processNonLocalLoad(L, toErase);
 
-  Instruction *DepInst = dep.getInst();
+  Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
-    // Only forward substitute stores to loads of the same type.
-    // FIXME: Could do better!
-    if (DepSI->getPointerOperand()->getType() != pointer->getType())
-      return false;
+    Value *StoredVal = DepSI->getOperand(0);
     
+    // The store and load are to a must-aliased pointer, but they may not
+    // actually have the same type.  See if we know how to reuse the stored
+    // value (depending on its type).
+    const TargetData *TD = 0;
+    if (StoredVal->getType() != L->getType() &&
+        (TD = getAnalysisIfAvailable<TargetData>())) {
+      StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+                                                 L, *TD);
+      if (StoredVal == 0)
+        return false;
+      
+      DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+                   << '\n' << *L << "\n\n\n");
+    }
+
     // Remove it!
-    L->replaceAllUsesWith(DepSI->getOperand(0));
-    if (isa<PointerType>(DepSI->getOperand(0)->getType()))
-      MD->invalidateCachedPointerInfo(DepSI->getOperand(0));
+    L->replaceAllUsesWith(StoredVal);
+    if (isa<PointerType>(StoredVal->getType()))
+      MD->invalidateCachedPointerInfo(StoredVal);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
   }
 
   if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
-    // Only forward substitute stores to loads of the same type.
-    // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian.
-    if (DepLI->getType() != L->getType())
-      return false;
+    Value *AvailableVal = DepLI;
+    
+    // The loads are of a must-aliased pointer, but they may not actually have
+    // the same type.  See if we know how to reuse the previously loaded value
+    // (depending on its type).
+    const TargetData *TD = 0;
+    if (DepLI->getType() != L->getType() &&
+        (TD = getAnalysisIfAvailable<TargetData>())) {
+      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+      if (AvailableVal == 0)
+        return false;
+      
+      DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+                   << "\n" << *L << "\n\n\n");
+    }
     
     // Remove it!
-    L->replaceAllUsesWith(DepLI);
+    L->replaceAllUsesWith(AvailableVal);
     if (isa<PointerType>(DepLI->getType()))
       MD->invalidateCachedPointerInfo(DepLI);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
   }
-  
+
   // If this load really doesn't depend on anything, then we must be loading an
   // undef value.  This can happen when loading for a fresh allocation with no
   // intervening stores, for example.
-  if (isa<AllocationInst>(DepInst)) {
-    L->replaceAllUsesWith(Context->getUndef(L->getType()));
+  if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
+    L->replaceAllUsesWith(UndefValue::get(L->getType()));
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
@@ -1280,150 +1616,93 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   return false;
 }
 
-Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) {
+Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
   DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
   if (I == localAvail.end())
     return 0;
-  
-  ValueNumberScope* locals = I->second;
-  
-  while (locals) {
-    DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num);
-    if (I != locals->table.end())
+
+  ValueNumberScope *Locals = I->second;
+  while (Locals) {
+    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
+    if (I != Locals->table.end())
       return I->second;
-    else
-      locals = locals->parent;
+    Locals = Locals->parent;
   }
-  
+
   return 0;
 }
 
-/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
-/// by inheritance from the dominator fails, see if we can perform phi 
-/// construction to eliminate the redundancy.
-Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) {
-  BasicBlock* BaseBlock = orig->getParent();
-  
-  SmallPtrSet<BasicBlock*, 4> Visited;
-  SmallVector<BasicBlock*, 8> Stack;
-  Stack.push_back(BaseBlock);
-  
-  DenseMap<BasicBlock*, Value*> Results;
-  
-  // Walk backwards through our predecessors, looking for instances of the
-  // value number we're looking for.  Instances are recorded in the Results
-  // map, which is then used to perform phi construction.
-  while (!Stack.empty()) {
-    BasicBlock* Current = Stack.back();
-    Stack.pop_back();
-    
-    // If we've walked all the way to a proper dominator, then give up. Cases
-    // where the instance is in the dominator will have been caught by the fast
-    // path, and any cases that require phi construction further than this are
-    // probably not worth it anyways.  Note that this is a SIGNIFICANT compile
-    // time improvement.
-    if (DT->properlyDominates(Current, orig->getParent())) return 0;
-    
-    DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA =
-                                                       localAvail.find(Current);
-    if (LA == localAvail.end()) return 0;
-    DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno);
-    
-    if (V != LA->second->table.end()) {
-      // Found an instance, record it.
-      Results.insert(std::make_pair(Current, V->second));
-      continue;
-    }
-    
-    // If we reach the beginning of the function, then give up.
-    if (pred_begin(Current) == pred_end(Current))
-      return 0;
-    
-    for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current);
-         PI != PE; ++PI)
-      if (Visited.insert(*PI))
-        Stack.push_back(*PI);
-  }
-  
-  // If we didn't find instances, give up.  Otherwise, perform phi construction.
-  if (Results.size() == 0)
-    return 0;
-  else
-    return GetValueForBlock(BaseBlock, orig, Results, true);
-}
 
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I,
                              SmallVectorImpl<Instruction*> &toErase) {
-  if (LoadInst* L = dyn_cast<LoadInst>(I)) {
-    bool changed = processLoad(L, toErase);
-    
-    if (!changed) {
-      unsigned num = VN.lookup_or_add(L);
-      localAvail[I->getParent()]->table.insert(std::make_pair(num, L));
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    bool Changed = processLoad(LI, toErase);
+
+    if (!Changed) {
+      unsigned Num = VN.lookup_or_add(LI);
+      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
     }
-    
-    return changed;
+
+    return Changed;
   }
-  
-  uint32_t nextNum = VN.getNextUnusedValueNumber();
-  unsigned num = VN.lookup_or_add(I);
-  
-  if (BranchInst* BI = dyn_cast<BranchInst>(I)) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
-    
+
+  uint32_t NextNum = VN.getNextUnusedValueNumber();
+  unsigned Num = VN.lookup_or_add(I);
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+
     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
       return false;
-    
-    Value* branchCond = BI->getCondition();
-    uint32_t condVN = VN.lookup_or_add(branchCond);
-    
-    BasicBlock* trueSucc = BI->getSuccessor(0);
-    BasicBlock* falseSucc = BI->getSuccessor(1);
-    
-    if (trueSucc->getSinglePredecessor())
-      localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue();
-    if (falseSucc->getSinglePredecessor())
-      localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse();
+
+    Value *BranchCond = BI->getCondition();
+    uint32_t CondVN = VN.lookup_or_add(BranchCond);
+
+    BasicBlock *TrueSucc = BI->getSuccessor(0);
+    BasicBlock *FalseSucc = BI->getSuccessor(1);
+
+    if (TrueSucc->getSinglePredecessor())
+      localAvail[TrueSucc]->table[CondVN] =
+        ConstantInt::getTrue(TrueSucc->getContext());
+    if (FalseSucc->getSinglePredecessor())
+      localAvail[FalseSucc]->table[CondVN] =
+        ConstantInt::getFalse(TrueSucc->getContext());
 
     return false;
-    
+
   // Allocations are always uniquely numbered, so we can save time and memory
-  // by fast failing them.  
+  // by fast failing them.
   } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
     return false;
   }
-  
+
   // Collapse PHI nodes
   if (PHINode* p = dyn_cast<PHINode>(I)) {
-    Value* constVal = CollapsePhi(p);
-    
+    Value *constVal = CollapsePhi(p);
+
     if (constVal) {
-      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
-           PI != PE; ++PI)
-        PI->second.erase(p);
-        
       p->replaceAllUsesWith(constVal);
       if (isa<PointerType>(constVal->getType()))
         MD->invalidateCachedPointerInfo(constVal);
       VN.erase(p);
-      
+
       toErase.push_back(p);
     } else {
-      localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
     }
-  
+
   // If the number we were assigned was a brand new VN, then we don't
   // need to do a lookup to see if the number already exists
   // somewhere in the domtree: it can't!
-  } else if (num == nextNum) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
-    
+  } else if (Num == NextNum) {
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+
   // Perform fast-path value-number based elimination of values inherited from
   // dominators.
-  } else if (Value* repl = lookupNumber(I->getParent(), num)) {
+  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
     // Remove it!
     VN.erase(I);
     I->replaceAllUsesWith(repl);
@@ -1432,21 +1711,10 @@ bool GVN::processInstruction(Instruction *I,
     toErase.push_back(I);
     return true;
 
-#if 0
-  // Perform slow-pathvalue-number based elimination with phi construction.
-  } else if (Value* repl = AttemptRedundancyElimination(I, num)) {
-    // Remove it!
-    VN.erase(I);
-    I->replaceAllUsesWith(repl);
-    if (isa<PointerType>(repl->getType()))
-      MD->invalidateCachedPointerInfo(repl);
-    toErase.push_back(I);
-    return true;
-#endif
   } else {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
   }
-  
+
   return false;
 }
 
@@ -1457,35 +1725,35 @@ bool GVN::runOnFunction(Function& F) {
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
-  
-  bool changed = false;
-  bool shouldContinue = true;
-  
+
+  bool Changed = false;
+  bool ShouldContinue = true;
+
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
-    BasicBlock* BB = FI;
+    BasicBlock *BB = FI;
     ++FI;
     bool removedBlock = MergeBlockIntoPredecessor(BB, this);
     if (removedBlock) NumGVNBlocks++;
-    
-    changed |= removedBlock;
+
+    Changed |= removedBlock;
   }
-  
+
   unsigned Iteration = 0;
-  
-  while (shouldContinue) {
-    DEBUG(cerr << "GVN iteration: " << Iteration << "\n");
-    shouldContinue = iterateOnFunction(F);
-    changed |= shouldContinue;
+
+  while (ShouldContinue) {
+    DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
+    ShouldContinue = iterateOnFunction(F);
+    Changed |= ShouldContinue;
     ++Iteration;
   }
-  
+
   if (EnablePRE) {
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
-      changed |= PREChanged;
+      Changed |= PREChanged;
     }
   }
   // FIXME: Should perform GVN again after PRE does something.  PRE can move
@@ -1495,27 +1763,27 @@ bool GVN::runOnFunction(Function& F) {
 
   cleanupGlobalSets();
 
-  return changed;
+  return Changed;
 }
 
 
-bool GVN::processBlock(BasicBlock* BB) {
+bool GVN::processBlock(BasicBlock *BB) {
   // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
   // incrementing BI before processing an instruction).
   SmallVector<Instruction*, 8> toErase;
-  bool changed_function = false;
-  
+  bool ChangedFunction = false;
+
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
        BI != BE;) {
-    changed_function |= processInstruction(BI, toErase);
+    ChangedFunction |= processInstruction(BI, toErase);
     if (toErase.empty()) {
       ++BI;
       continue;
     }
-    
+
     // If we need some instructions deleted, do it now.
     NumGVNInstr += toErase.size();
-    
+
     // Avoid iterator invalidation.
     bool AtStart = BI == BB->begin();
     if (!AtStart)
@@ -1523,7 +1791,7 @@ bool GVN::processBlock(BasicBlock* BB) {
 
     for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
          E = toErase.end(); I != E; ++I) {
-      DEBUG(cerr << "GVN removed: " << **I);
+      DEBUG(errs() << "GVN removed: " << **I << '\n');
       MD->removeInstruction(*I);
       (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
@@ -1535,8 +1803,8 @@ bool GVN::processBlock(BasicBlock* BB) {
     else
       ++BI;
   }
-  
-  return changed_function;
+
+  return ChangedFunction;
 }
 
 /// performPRE - Perform a purely local form of PRE that looks for diamond
@@ -1547,32 +1815,33 @@ bool GVN::performPRE(Function& F) {
   DenseMap<BasicBlock*, Value*> predMap;
   for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
        DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
-    BasicBlock* CurrentBlock = *DI;
-    
+    BasicBlock *CurrentBlock = *DI;
+
     // Nothing to PRE in the entry block.
     if (CurrentBlock == &F.getEntryBlock()) continue;
-    
+
     for (BasicBlock::iterator BI = CurrentBlock->begin(),
          BE = CurrentBlock->end(); BI != BE; ) {
       Instruction *CurInst = BI++;
 
-      if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
-          isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) ||
+      if (isa<AllocationInst>(CurInst) ||
+          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+          CurInst->getType()->isVoidTy() ||
           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
           isa<DbgInfoIntrinsic>(CurInst))
         continue;
 
-      uint32_t valno = VN.lookup(CurInst);
-      
+      uint32_t ValNo = VN.lookup(CurInst);
+
       // Look for the predecessors for PRE opportunities.  We're
       // only trying to solve the basic diamond case, where
       // a value is computed in the successor and one predecessor,
       // but not the other.  We also explicitly disallow cases
       // where the successor is its own predecessor, because they're
       // more complicated to get right.
-      unsigned numWith = 0;
-      unsigned numWithout = 0;
-      BasicBlock* PREPred = 0;
+      unsigned NumWith = 0;
+      unsigned NumWithout = 0;
+      BasicBlock *PREPred = 0;
       predMap.clear();
 
       for (pred_iterator PI = pred_begin(CurrentBlock),
@@ -1581,59 +1850,59 @@ bool GVN::performPRE(Function& F) {
         // own predecessor, on in blocks with predecessors
         // that are not reachable.
         if (*PI == CurrentBlock) {
-          numWithout = 2;
+          NumWithout = 2;
           break;
         } else if (!localAvail.count(*PI))  {
-          numWithout = 2;
+          NumWithout = 2;
           break;
         }
-        
-        DenseMap<uint32_t, Value*>::iterator predV = 
-                                            localAvail[*PI]->table.find(valno);
+
+        DenseMap<uint32_t, Value*>::iterator predV =
+                                            localAvail[*PI]->table.find(ValNo);
         if (predV == localAvail[*PI]->table.end()) {
           PREPred = *PI;
-          numWithout++;
+          NumWithout++;
         } else if (predV->second == CurInst) {
-          numWithout = 2;
+          NumWithout = 2;
         } else {
           predMap[*PI] = predV->second;
-          numWith++;
+          NumWith++;
         }
       }
-      
+
       // Don't do PRE when it might increase code size, i.e. when
       // we would need to insert instructions in more than one pred.
-      if (numWithout != 1 || numWith == 0)
+      if (NumWithout != 1 || NumWith == 0)
         continue;
-      
+
       // We can't do PRE safely on a critical edge, so instead we schedule
       // the edge to be split and perform the PRE the next time we iterate
       // on the function.
-      unsigned succNum = 0;
+      unsigned SuccNum = 0;
       for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
            i != e; ++i)
         if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
-          succNum = i;
+          SuccNum = i;
           break;
         }
-        
-      if (isCriticalEdge(PREPred->getTerminator(), succNum)) {
-        toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum));
+
+      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
         continue;
       }
-      
+
       // Instantiate the expression the in predecessor that lacked it.
       // Because we are going top-down through the block, all value numbers
       // will be available in the predecessor by the time we need them.  Any
       // that weren't original present will have been instantiated earlier
       // in this loop.
-      Instruction* PREInstr = CurInst->clone();
+      Instruction *PREInstr = CurInst->clone();
       bool success = true;
       for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
         Value *Op = PREInstr->getOperand(i);
         if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
           continue;
-        
+
         if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
           PREInstr->setOperand(i, V);
         } else {
@@ -1641,25 +1910,25 @@ bool GVN::performPRE(Function& F) {
           break;
         }
       }
-      
+
       // Fail out if we encounter an operand that is not available in
-      // the PRE predecessor.  This is typically because of loads which 
+      // the PRE predecessor.  This is typically because of loads which
       // are not value numbered precisely.
       if (!success) {
         delete PREInstr;
         DEBUG(verifyRemoved(PREInstr));
         continue;
       }
-      
+
       PREInstr->insertBefore(PREPred->getTerminator());
       PREInstr->setName(CurInst->getName() + ".pre");
       predMap[PREPred] = PREInstr;
-      VN.add(PREInstr, valno);
+      VN.add(PREInstr, ValNo);
       NumGVNPRE++;
-      
+
       // Update the availability map to include the new instruction.
-      localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr));
-      
+      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+
       // Create a PHI to make the value available in this block.
       PHINode* Phi = PHINode::Create(CurInst->getType(),
                                      CurInst->getName() + ".pre-phi",
@@ -1667,27 +1936,27 @@ bool GVN::performPRE(Function& F) {
       for (pred_iterator PI = pred_begin(CurrentBlock),
            PE = pred_end(CurrentBlock); PI != PE; ++PI)
         Phi->addIncoming(predMap[*PI], *PI);
-      
-      VN.add(Phi, valno);
-      localAvail[CurrentBlock]->table[valno] = Phi;
-      
+
+      VN.add(Phi, ValNo);
+      localAvail[CurrentBlock]->table[ValNo] = Phi;
+
       CurInst->replaceAllUsesWith(Phi);
       if (isa<PointerType>(Phi->getType()))
         MD->invalidateCachedPointerInfo(Phi);
       VN.erase(CurInst);
-      
-      DEBUG(cerr << "GVN PRE removed: " << *CurInst);
+
+      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
       MD->removeInstruction(CurInst);
       CurInst->eraseFromParent();
       DEBUG(verifyRemoved(CurInst));
       Changed = true;
     }
   }
-  
+
   for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
        I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
     SplitCriticalEdge(I->first, I->second, this);
-  
+
   return Changed || toSplit.size();
 }
 
@@ -1705,25 +1974,24 @@ bool GVN::iterateOnFunction(Function &F) {
   }
 
   // Top-down walk of the dominator tree
-  bool changed = false;
+  bool Changed = false;
 #if 0
   // Needed for value numbering with phi construction to work.
   ReversePostOrderTraversal<Function*> RPOT(&F);
   for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
        RE = RPOT.end(); RI != RE; ++RI)
-    changed |= processBlock(*RI);
+    Changed |= processBlock(*RI);
 #else
   for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
        DE = df_end(DT->getRootNode()); DI != DE; ++DI)
-    changed |= processBlock(DI->getBlock());
+    Changed |= processBlock(DI->getBlock());
 #endif
 
-  return changed;
+  return Changed;
 }
 
 void GVN::cleanupGlobalSets() {
   VN.clear();
-  phiMap.clear();
 
   for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
        I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
@@ -1736,18 +2004,6 @@ void GVN::cleanupGlobalSets() {
 void GVN::verifyRemoved(const Instruction *Inst) const {
   VN.verifyRemoved(Inst);
 
-  // Walk through the PHI map to make sure the instruction isn't hiding in there
-  // somewhere.
-  for (PhiMapType::iterator
-         I = phiMap.begin(), E = phiMap.end(); I != E; ++I) {
-    assert(I->first != Inst && "Inst is still a key in PHI map!");
-
-    for (SmallPtrSet<Instruction*, 4>::iterator
-           II = I->second.begin(), IE = I->second.end(); II != IE; ++II) {
-      assert(*II != Inst && "Inst is still a value in PHI map!");
-    }
-  }
-
   // Walk through the value number scope to make sure the instruction isn't
   // ferreted away in it.
   for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 88cf60ecbaa8..e2d9e0b9ec4a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -51,11 +51,11 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -67,7 +67,7 @@ STATISTIC(NumReplaced, "Number of exit values replaced");
 STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
 
 namespace {
-  class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass {
+  class IndVarSimplify : public LoopPass {
     IVUsers         *IU;
     LoopInfo        *LI;
     ScalarEvolution *SE;
@@ -75,30 +75,30 @@ namespace {
     bool Changed;
   public:
 
-   static char ID; // Pass identification, replacement for typeid
-   IndVarSimplify() : LoopPass(&ID) {}
-
-   virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-     AU.addRequired<DominatorTree>();
-     AU.addRequired<ScalarEvolution>();
-     AU.addRequiredID(LoopSimplifyID);
-     AU.addRequired<LoopInfo>();
-     AU.addRequired<IVUsers>();
-     AU.addRequiredID(LCSSAID);
-     AU.addPreserved<ScalarEvolution>();
-     AU.addPreservedID(LoopSimplifyID);
-     AU.addPreserved<IVUsers>();
-     AU.addPreservedID(LCSSAID);
-     AU.setPreservesCFG();
-   }
+    static char ID; // Pass identification, replacement for typeid
+    IndVarSimplify() : LoopPass(&ID) {}
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addRequired<IVUsers>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<IVUsers>();
+      AU.setPreservesCFG();
+    }
 
   private:
 
     void RewriteNonIntegerIVs(Loop *L);
 
-    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV* BackedgeTakenCount,
+    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                    Value *IndVar,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
@@ -129,7 +129,7 @@ Pass *llvm::createIndVarSimplifyPass() {
 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
 /// is actually a much broader range than just linear tests.
 ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
-                                   const SCEV* BackedgeTakenCount,
+                                   const SCEV *BackedgeTakenCount,
                                    Value *IndVar,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
@@ -138,13 +138,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
   Value *CmpIndVar;
-  const SCEV* RHS = BackedgeTakenCount;
+  const SCEV *RHS = BackedgeTakenCount;
   if (ExitingBlock == L->getLoopLatch()) {
     // Add one to the "backedge-taken" count to get the trip count.
     // If this addition may overflow, we have to be more pessimistic and
     // cast the induction variable before doing the add.
-    const SCEV* Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
-    const SCEV* N =
+    const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
+    const SCEV *N =
       SE->getAddExpr(BackedgeTakenCount,
                      SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));
     if ((isa<SCEVConstant>(N) && !N->isZero()) ||
@@ -182,13 +182,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   else
     Opcode = ICmpInst::ICMP_EQ;
 
-  DOUT << "INDVARS: Rewriting loop exit condition to:\n"
-       << "      LHS:" << *CmpIndVar // includes a newline
-       << "       op:\t"
-       << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-       << "      RHS:\t" << *RHS << "\n";
+  DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
 
-  ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
 
   Instruction *OrigCond = cast<Instruction>(BI->getCondition());
   // It's tempting to use replaceAllUsesWith here to fully replace the old
@@ -264,7 +264,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         // Okay, this instruction has a user outside of the current loop
         // and varies predictably *inside* the loop.  Evaluate the value it
         // contains when the loop exits, if possible.
-        const SCEV* ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
         if (!ExitValue->isLoopInvariant(L))
           continue;
 
@@ -273,25 +273,23 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
 
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
-        DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
-             << "  LoopVal = " << *Inst << "\n";
+        DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+                     << "  LoopVal = " << *Inst << "\n");
 
         PN->setIncomingValue(i, ExitVal);
 
         // If this instruction is dead now, delete it.
         RecursivelyDeleteTriviallyDeadInstructions(Inst);
 
-        // If we're inserting code into the exit block rather than the
-        // preheader, we can (and have to) remove the PHI entirely.
-        // This is safe, because the NewVal won't be variant
-        // in the loop, so we don't need an LCSSA phi node anymore.
-        if (ExitBlocks.size() == 1) {
+        if (NumPreds == 1) {
+          // Completely replace a single-pred PHI. This is safe, because the
+          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+          // node anymore.
           PN->replaceAllUsesWith(ExitVal);
           RecursivelyDeleteTriviallyDeadInstructions(PN);
-          break;
         }
       }
-      if (ExitBlocks.size() != 1) {
+      if (NumPreds != 1) {
         // Clone the PHI and delete the original one. This lets IVUsers and
         // any other maps purge the original user from their records.
         PHINode *NewPN = PN->clone();
@@ -339,7 +337,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   RewriteNonIntegerIVs(L);
 
   BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
   // Create a rewriter object which we'll use to transform the code with.
   SCEVExpander Rewriter(*SE);
@@ -367,14 +365,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
       NeedCannIV = true;
   }
   for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV* Stride = IU->StrideOrder[i];
+    const SCEV *Stride = IU->StrideOrder[i];
     const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
     if (!LargestType ||
         SE->getTypeSizeInBits(Ty) >
           SE->getTypeSizeInBits(LargestType))
       LargestType = Ty;
 
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[i]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
 
@@ -403,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     ++NumInserted;
     Changed = true;
-    DOUT << "INDVARS: New CanIV: " << *IndVar;
+    DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n');
 
     // Now that the official induction variable is established, reinsert
     // the old canonical-looking variable after it so that the IR remains
@@ -458,9 +456,9 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
   for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV* Stride = IU->StrideOrder[i];
+    const SCEV *Stride = IU->StrideOrder[i];
 
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[i]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
     ilist<IVStrideUse> &List = SI->second->Users;
@@ -471,7 +469,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
       Instruction *User = UI->getUser();
 
       // Compute the final addrec to expand into code.
-      const SCEV* AR = IU->getReplacementExpr(*UI);
+      const SCEV *AR = IU->getReplacementExpr(*UI);
 
       // FIXME: It is an extremely bad idea to indvar substitute anything more
       // complex than affine induction variables.  Doing so will put expensive
@@ -508,8 +506,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         NewVal->takeName(Op);
       User->replaceUsesOfWith(Op, NewVal);
       UI->setOperandValToReplace(NewVal);
-      DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op
-           << "   into = " << *NewVal << "\n";
+      DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                   << "   into = " << *NewVal << "\n");
       ++NumRemoved;
       Changed = true;
 
@@ -546,8 +544,19 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
     // New instructions were inserted at the end of the preheader.
     if (isa<PHINode>(I))
       break;
-    if (I->isTrapping())
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also
+    // don't move instructions which might read memory, since the loop may
+    // modify memory. Note that it's okay if the instruction might have
+    // undefined behavior: LoopSimplify guarantees that the preheader
+    // dominates the exit block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
       continue;
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
     bool UsedInLoop = false;
@@ -630,7 +639,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   // Check incoming value.
   ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge));
   if (!InitValue) return;
-  uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits();
+  uint64_t newInitValue =
+              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
   if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
     return;
 
@@ -646,7 +656,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
     IncrVIndex = 0;
   IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex));
   if (!IncrValue) return;
-  uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits();
+  uint64_t newIncrValue =
+              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
   if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue))
     return;
 
@@ -677,7 +688,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
     EVIndex = 0;
   EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex));
   if (!EV) return;
-  uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits();
+  uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
   if (!convertToInt(EV->getValueAPF(), &intEV))
     return;
 
@@ -710,24 +721,26 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;
 
   // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Type::Int32Ty,
+  PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()),
                                     PH->getName()+".int", PH);
-  NewPHI->addIncoming(Context->getConstantInt(Type::Int32Ty, newInitValue),
+  NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()),
+                                       newInitValue),
                       PH->getIncomingBlock(IncomingEdge));
 
   Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
-                                          Context->getConstantInt(Type::Int32Ty,
+                           ConstantInt::get(Type::getInt32Ty(PH->getContext()),
                                                              newIncrValue),
                                             Incr->getName()+".int", Incr);
   NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));
 
   // The back edge is edge 1 of newPHI, whatever it may have been in the
   // original PHI.
-  ConstantInt *NewEV = Context->getConstantInt(Type::Int32Ty, intEV);
+  ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()),
+                                        intEV);
   Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
   Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
-  ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
-                                 EC->getParent()->getTerminator());
+  ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
+                                 NewPred, LHS, RHS, EC->getName());
 
   // In the following deltions, PH may become dead and may be deleted.
   // Use a WeakVH to observe whether this happens.
@@ -739,7 +752,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   RecursivelyDeleteTriviallyDeadInstructions(EC);
 
   // Delete old, floating point, increment instruction.
-  Incr->replaceAllUsesWith(Context->getUndef(Incr->getType()));
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
   RecursivelyDeleteTriviallyDeadInstructions(Incr);
 
   // Replace floating induction variable, if it isn't already deleted.
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 59fbd396a3a1..7c96c49a34b9 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -40,7 +40,9 @@
 #include "llvm/Pass.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -48,11 +50,13 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -60,7 +64,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 #include <climits>
-#include <sstream>
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -71,29 +74,49 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated");
 STATISTIC(NumSunkInst , "Number of instructions sunk");
 
 namespace {
-  class VISIBILITY_HIDDEN InstCombiner
-    : public FunctionPass,
-      public InstVisitor<InstCombiner, Instruction*> {
-    // Worklist of all of the instructions that need to be simplified.
+  /// InstCombineWorklist - This is the worklist management logic for
+  /// InstCombine.
+  class InstCombineWorklist {
     SmallVector<Instruction*, 256> Worklist;
     DenseMap<Instruction*, unsigned> WorklistMap;
-    TargetData *TD;
-    bool MustPreserveLCSSA;
+    
+    void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT
+    InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
   public:
-    static char ID; // Pass identification, replacement for typeid
-    InstCombiner() : FunctionPass(&ID) {}
-
-    LLVMContext* getContext() { return Context; }
-
-    /// AddToWorkList - Add the specified instruction to the worklist if it
-    /// isn't already in it.
-    void AddToWorkList(Instruction *I) {
-      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second)
+    InstCombineWorklist() {}
+    
+    bool isEmpty() const { return Worklist.empty(); }
+    
+    /// Add - Add the specified instruction to the worklist if it isn't already
+    /// in it.
+    void Add(Instruction *I) {
+      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+        DEBUG(errs() << "IC: ADD: " << *I << '\n');
         Worklist.push_back(I);
+      }
+    }
+    
+    void AddValue(Value *V) {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        Add(I);
     }
     
-    // RemoveFromWorkList - remove I from the worklist if it exists.
-    void RemoveFromWorkList(Instruction *I) {
+    /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+    /// which should only be done when the worklist is empty and when the group
+    /// has no duplicates.
+    void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+      assert(Worklist.empty() && "Worklist must be empty to add initial group");
+      Worklist.reserve(NumEntries+16);
+      DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+      for (; NumEntries; --NumEntries) {
+        Instruction *I = List[NumEntries-1];
+        WorklistMap.insert(std::make_pair(I, Worklist.size()));
+        Worklist.push_back(I);
+      }
+    }
+    
+    // Remove - remove I from the worklist if it exists.
+    void Remove(Instruction *I) {
       DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
       if (It == WorklistMap.end()) return; // Not in worklist.
       
@@ -103,51 +126,74 @@ namespace {
       WorklistMap.erase(It);
     }
     
-    Instruction *RemoveOneFromWorkList() {
+    Instruction *RemoveOne() {
       Instruction *I = Worklist.back();
       Worklist.pop_back();
       WorklistMap.erase(I);
       return I;
     }
 
-    
     /// AddUsersToWorkList - When an instruction is simplified, add all users of
     /// the instruction to the work lists because they might get more simplified
     /// now.
     ///
-    void AddUsersToWorkList(Value &I) {
+    void AddUsersToWorkList(Instruction &I) {
       for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
            UI != UE; ++UI)
-        AddToWorkList(cast<Instruction>(*UI));
-    }
-
-    /// AddUsesToWorkList - When an instruction is simplified, add operands to
-    /// the work lists because they might get more simplified now.
-    ///
-    void AddUsesToWorkList(Instruction &I) {
-      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(*i))
-          AddToWorkList(Op);
+        Add(cast<Instruction>(*UI));
     }
     
-    /// AddSoonDeadInstToWorklist - The specified instruction is about to become
-    /// dead.  Add all of its operands to the worklist, turning them into
-    /// undef's to reduce the number of uses of those instructions.
-    ///
-    /// Return the specified operand before it is turned into an undef.
-    ///
-    Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
-      Value *R = I.getOperand(op);
-      
-      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(*i)) {
-          AddToWorkList(Op);
-          // Set the operand to undef to drop the use.
-          *i = Context->getUndef(Op->getType());
-        }
+    
+    /// Zap - check that the worklist is empty and nuke the backing store for
+    /// the map if it is large.
+    void Zap() {
+      assert(WorklistMap.empty() && "Worklist empty, but map not?");
       
-      return R;
+      // Do an explicit clear, this shrinks the map if needed.
+      WorklistMap.clear();
     }
+  };
+} // end anonymous namespace.
+
+
+namespace {
+  /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+  /// just like the normal insertion helper, but also adds any new instructions
+  /// to the instcombine worklist.
+  class InstCombineIRInserter : public IRBuilderDefaultInserter<true> {
+    InstCombineWorklist &Worklist;
+  public:
+    InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+    
+    void InsertHelper(Instruction *I, const Twine &Name,
+                      BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+      IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+      Worklist.Add(I);
+    }
+  };
+} // end anonymous namespace
+
+
+namespace {
+  class InstCombiner : public FunctionPass,
+                       public InstVisitor<InstCombiner, Instruction*> {
+    TargetData *TD;
+    bool MustPreserveLCSSA;
+    bool MadeIRChange;
+  public:
+    /// Worklist - All of the instructions that need to be simplified.
+    InstCombineWorklist Worklist;
+
+    /// Builder - This is an IRBuilder that automatically inserts new
+    /// instructions into the worklist when they are created.
+    typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy;
+    BuilderTy *Builder;
+        
+    static char ID; // Pass identification, replacement for typeid
+    InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+
+    LLVMContext *Context;
+    LLVMContext *getContext() const { return Context; }
 
   public:
     virtual bool runOnFunction(Function &F);
@@ -155,12 +201,11 @@ namespace {
     bool DoOneIteration(Function &F, unsigned ItNum);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
       AU.addPreservedID(LCSSAID);
       AU.setPreservesCFG();
     }
 
-    TargetData &getTargetData() const { return *TD; }
+    TargetData *getTargetData() const { return TD; }
 
     // Visitation implementation - Implement instruction combining for different
     // instruction types.  The semantics are as follows:
@@ -187,8 +232,10 @@ namespace {
     Instruction *visitSDiv(BinaryOperator &I);
     Instruction *visitFDiv(BinaryOperator &I);
     Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+    Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
     Instruction *visitAnd(BinaryOperator &I);
     Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+    Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
     Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
                                      Value *A, Value *B, Value *C);
     Instruction *visitOr (BinaryOperator &I);
@@ -208,7 +255,7 @@ namespace {
     Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
                                 ConstantInt *DivRHS);
 
-    Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS,
+    Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                              ICmpInst::Predicate Cond, Instruction &I);
     Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                      BinaryOperator &I);
@@ -269,30 +316,10 @@ namespace {
              "New instruction already inserted into a basic block!");
       BasicBlock *BB = Old.getParent();
       BB->getInstList().insert(&Old, New);  // Insert inst
-      AddToWorkList(New);
+      Worklist.Add(New);
       return New;
     }
-
-    /// InsertCastBefore - Insert a cast of V to TY before the instruction POS.
-    /// This also adds the cast to the worklist.  Finally, this returns the
-    /// cast.
-    Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty,
-                            Instruction &Pos) {
-      if (V->getType() == Ty) return V;
-
-      if (Constant *CV = dyn_cast<Constant>(V))
-        return Context->getConstantExprCast(opc, CV, Ty);
-      
-      Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
-      AddToWorkList(C);
-      return C;
-    }
         
-    Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
-      return InsertCastBefore(Instruction::BitCast, V, Ty, Pos);
-    }
-
-
     // ReplaceInstUsesWith - This method is to be used when an instruction is
     // found to be dead, replacable with another preexisting expression.  Here
     // we add all uses of I to the worklist, replace all uses of I with the new
@@ -300,16 +327,15 @@ namespace {
     // modified.
     //
     Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
-      AddUsersToWorkList(I);         // Add all modified instrs to worklist
-      if (&I != V) {
-        I.replaceAllUsesWith(V);
-        return &I;
-      } else {
-        // If we are replacing the instruction with itself, this must be in a
-        // segment of unreachable code, so just clobber the instruction.
-        I.replaceAllUsesWith(Context->getUndef(I.getType()));
-        return &I;
-      }
+      Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
+      
+      // If we are replacing the instruction with itself, this must be in a
+      // segment of unreachable code, so just clobber the instruction.
+      if (&I == V) 
+        V = UndefValue::get(I.getType());
+        
+      I.replaceAllUsesWith(V);
+      return &I;
     }
 
     // EraseInstFromFunction - When dealing with an instruction that has side
@@ -317,10 +343,19 @@ namespace {
     // instruction.  Instead, visit methods should return the value returned by
     // this function.
     Instruction *EraseInstFromFunction(Instruction &I) {
+      DEBUG(errs() << "IC: ERASE " << I << '\n');
+
       assert(I.use_empty() && "Cannot erase instruction that is used!");
-      AddUsesToWorkList(I);
-      RemoveFromWorkList(&I);
+      // Make sure that we reprocess all operands now that we reduced their
+      // use counts.
+      if (I.getNumOperands() < 8) {
+        for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+          if (Instruction *Op = dyn_cast<Instruction>(*i))
+            Worklist.Add(Op);
+      }
+      Worklist.Remove(&I);
       I.eraseFromParent();
+      MadeIRChange = true;
       return 0;  // Don't do anything with FI
     }
         
@@ -364,10 +399,15 @@ namespace {
     Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                       APInt& UndefElts, unsigned Depth = 0);
       
-    // FoldOpIntoPhi - Given a binary operator or cast instruction which has a
-    // PHI node as operand #0, see if we can fold the instruction into the PHI
-    // (which is only possible if all operands to the PHI are constants).
-    Instruction *FoldOpIntoPhi(Instruction &I);
+    // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+    // which has a PHI node as operand #0, see if we can fold the instruction
+    // into the PHI (which is only possible if all operands to the PHI are
+    // constants).
+    //
+    // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+    // that would normally be unprofitable because they strongly encourage jump
+    // threading.
+    Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
 
     // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
     // operator and they all are only used by the PHI, PHI together their
@@ -399,7 +439,7 @@ namespace {
                                         unsigned PrefAlign = 0);
 
   };
-}
+} // end anonymous namespace
 
 char InstCombiner::ID = 0;
 static RegisterPass<InstCombiner>
@@ -409,7 +449,8 @@ X("instcombine", "Combine redundant instructions");
 //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
-    if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
+    if (BinaryOperator::isNeg(V) ||
+        BinaryOperator::isFNeg(V) ||
         BinaryOperator::isNot(V))
       return 3;
     return 4;
@@ -429,7 +470,7 @@ static bool isOnlyUse(Value *V) {
 static const Type *getPromotedType(const Type *Ty) {
   if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
     if (ITy->getBitWidth() < 32)
-      return Type::Int32Ty;
+      return Type::getInt32Ty(Ty->getContext());
   }
   return Ty;
 }
@@ -438,29 +479,12 @@ static const Type *getPromotedType(const Type *Ty) {
 /// expression bitcast, or a GetElementPtrInst with all zero indices, return the
 /// operand value, otherwise return null.
 static Value *getBitCastOperand(Value *V) {
-  if (BitCastInst *I = dyn_cast<BitCastInst>(V))
-    // BitCastInst?
-    return I->getOperand(0);
-  else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
-    // GetElementPtrInst?
-    if (GEP->hasAllZeroIndices())
-      return GEP->getOperand(0);
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->getOpcode() == Instruction::BitCast)
-      // BitCast ConstantExp?
-      return CE->getOperand(0);
-    else if (CE->getOpcode() == Instruction::GetElementPtr) {
-      // GetElementPtr ConstantExp?
-      for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end();
-           I != E; ++I) {
-        ConstantInt *CI = dyn_cast<ConstantInt>(I);
-        if (!CI || !CI->isZero())
-          // Any non-zero indices? Not cast-like.
-          return 0;
-      }
-      // All-zero indices? This is just like casting.
-      return CE->getOperand(0);
-    }
+  if (Operator *O = dyn_cast<Operator>(V)) {
+    if (O->getOpcode() == Instruction::BitCast)
+      return O->getOperand(0);
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      if (GEP->hasAllZeroIndices())
+        return GEP->getPointerOperand();
   }
   return 0;
 }
@@ -474,7 +498,7 @@ isEliminableCastPair(
   const Type *DstTy,     ///< The target type for the second cast instruction
   TargetData *TD         ///< The target data for pointer size
 ) {
-  
+
   const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
   const Type *MidTy = CI->getType();                  // B from above
 
@@ -483,12 +507,15 @@ isEliminableCastPair(
   Instruction::CastOps secondOp = Instruction::CastOps(opcode);
 
   unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
-                                                DstTy, TD->getIntPtrType());
+                                                DstTy,
+                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
   
   // We don't want to form an inttoptr or ptrtoint that converts to an integer
   // type that differs from the pointer size.
-  if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) ||
-      (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType()))
+  if ((Res == Instruction::IntToPtr &&
+          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+      (Res == Instruction::PtrToInt &&
+          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
     Res = 0;
   
   return Instruction::CastOps(Res);
@@ -503,7 +530,7 @@ static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V,
   
   // If this is another cast that can be eliminated, it isn't codegen either.
   if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opcode, Ty, TD)) 
+    if (isEliminableCastPair(CI, opcode, Ty, TD))
       return false;
   return true;
 }
@@ -528,7 +555,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
   if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
     if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
       if (isa<Constant>(I.getOperand(1))) {
-        Constant *Folded = Context->getConstantExpr(I.getOpcode(),
+        Constant *Folded = ConstantExpr::get(I.getOpcode(),
                                              cast<Constant>(I.getOperand(1)),
                                              cast<Constant>(Op->getOperand(1)));
         I.setOperand(0, Op->getOperand(0));
@@ -541,11 +568,11 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
           Constant *C2 = cast<Constant>(Op1->getOperand(1));
 
           // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
-          Constant *Folded = Context->getConstantExpr(I.getOpcode(), C1, C2);
+          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
           Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
                                                     Op1->getOperand(0),
                                                     Op1->getName(), &I);
-          AddToWorkList(New);
+          Worklist.Add(New);
           I.setOperand(0, New);
           I.setOperand(1, Folded);
           return true;
@@ -568,17 +595,17 @@ bool InstCombiner::SimplifyCompare(CmpInst &I) {
 // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
 // if the LHS is a constant zero (which is the 'negate' form).
 //
-static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) {
+static inline Value *dyn_castNegVal(Value *V) {
   if (BinaryOperator::isNeg(V))
     return BinaryOperator::getNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return Context->getConstantExprNeg(C);
+    return ConstantExpr::getNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
     if (C->getType()->getElementType()->isInteger())
-      return Context->getConstantExprNeg(C);
+      return ConstantExpr::getNeg(C);
 
   return 0;
 }
@@ -587,28 +614,28 @@ static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) {
 // instruction if the LHS is a constant negative zero (which is the 'negate'
 // form).
 //
-static inline Value *dyn_castFNegVal(Value *V, LLVMContext* Context) {
+static inline Value *dyn_castFNegVal(Value *V) {
   if (BinaryOperator::isFNeg(V))
     return BinaryOperator::getFNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantFP *C = dyn_cast<ConstantFP>(V))
-    return Context->getConstantExprFNeg(C);
+    return ConstantExpr::getFNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
     if (C->getType()->getElementType()->isFloatingPoint())
-      return Context->getConstantExprFNeg(C);
+      return ConstantExpr::getFNeg(C);
 
   return 0;
 }
 
-static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) {
+static inline Value *dyn_castNotVal(Value *V) {
   if (BinaryOperator::isNot(V))
     return BinaryOperator::getNotArgument(V);
 
   // Constants can be considered to be not'ed values...
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return Context->getConstantInt(~C->getValue());
+    return ConstantInt::get(C->getType(), ~C->getValue());
   return 0;
 }
 
@@ -617,8 +644,7 @@ static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) {
 // non-constant operand of the multiply, and set CST to point to the multiplier.
 // Otherwise, return null.
 //
-static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST,
-                                         LLVMContext* Context) {
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
   if (V->hasOneUse() && V->getType()->isInteger())
     if (Instruction *I = dyn_cast<Instruction>(V)) {
       if (I->getOpcode() == Instruction::Mul)
@@ -629,48 +655,27 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST,
           // The multiplier is really 1 << CST.
           uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
           uint32_t CSTVal = CST->getLimitedValue(BitWidth);
-          CST = Context->getConstantInt(APInt(BitWidth, 1).shl(CSTVal));
+          CST = ConstantInt::get(V->getType()->getContext(),
+                                 APInt(BitWidth, 1).shl(CSTVal));
           return I->getOperand(0);
         }
     }
   return 0;
 }
 
-/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant
-/// expression, return it.
-static User *dyn_castGetElementPtr(Value *V) {
-  if (isa<GetElementPtrInst>(V)) return cast<User>(V);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::GetElementPtr)
-      return cast<User>(V);
-  return false;
-}
-
-/// getOpcode - If this is an Instruction or a ConstantExpr, return the
-/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getOpcode();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    return CE->getOpcode();
-  // Use UserOp1 to mean there's no opcode.
-  return Instruction::UserOp1;
-}
-
 /// AddOne - Add one to a ConstantInt
-static Constant *AddOne(Constant *C, LLVMContext* Context) {
-  return Context->getConstantExprAdd(C, 
-    Context->getConstantInt(C->getType(), 1));
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, 
+    ConstantInt::get(C->getType(), 1));
 }
 /// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C, LLVMContext* Context) {
-  return Context->getConstantExprSub(C, 
-    Context->getConstantInt(C->getType(), 1));
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantExpr::getSub(C, 
+    ConstantInt::get(C->getType(), 1));
 }
 /// MultiplyOverflows - True if the multiply can not be expressed in an int
 /// this size.
-static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign,
-                              LLVMContext* Context) {
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
   uint32_t W = C1->getBitWidth();
   APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
   if (sign) {
@@ -697,7 +702,7 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign,
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
-                                   APInt Demanded, LLVMContext* Context) {
+                                   APInt Demanded) {
   assert(I && "No instruction?");
   assert(OpNo < I->getNumOperands() && "Operand index too large");
 
@@ -712,7 +717,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
 
   // This instruction is producing bits that are not demanded. Shrink the RHS.
   Demanded &= OpC->getValue();
-  I->setOperand(OpNo, Context->getConstantInt(Demanded));
+  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
   return true;
 }
 
@@ -784,7 +789,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
                                           KnownZero, KnownOne, Depth);
   if (NewVal == 0) return false;
-  U.set(NewVal);
+  U = NewVal;
   return true;
 }
 
@@ -844,7 +849,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   if (DemandedMask == 0) {   // Not demanding any bits from V.
     if (isa<UndefValue>(V))
       return 0;
-    return Context->getUndef(VTy);
+    return UndefValue::get(VTy);
   }
   
   if (Depth == 6)        // Limit search depth.
@@ -886,7 +891,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       
       // If all of the demanded bits in the inputs are known zeros, return zero.
       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-        return Context->getNullValue(VTy);
+        return Constant::getNullValue(VTy);
       
     } else if (I->getOpcode() == Instruction::Or) {
       // We can simplify (X|Y) -> X or Y in the user's context if we know that
@@ -955,10 +960,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-      return Context->getNullValue(VTy);
+      return Constant::getNullValue(VTy);
       
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
       return I;
       
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -995,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I->getOperand(1);
         
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
           
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1030,7 +1035,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // other, turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
-      Instruction *Or =
+      Instruction *Or = 
         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                  I->getName());
       return InsertNewInstBefore(Or, *I);
@@ -1043,7 +1048,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
       // all known
       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
-        Constant *AndC = Context->getConstantInt(~RHSKnownOne & DemandedMask);
+        Constant *AndC = Constant::getIntegerValue(VTy,
+                                                   ~RHSKnownOne & DemandedMask);
         Instruction *And = 
           BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
         return InsertNewInstBefore(And, *I);
@@ -1052,9 +1058,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     
     // If the RHS is a constant, see if we can simplify it.
     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
     
+    // If our LHS is an 'and' and if it has one use, and if any of the bits we
+    // are flipping are known to be set, then the xor is just resetting those
+    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
+    // simplifying both of them.
+    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+          isa<ConstantInt>(I->getOperand(1)) &&
+          isa<ConstantInt>(LHSInst->getOperand(1)) &&
+          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+        
+        Constant *AndC =
+          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+        Instruction *NewAnd = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        InsertNewInstBefore(NewAnd, *I);
+        
+        Constant *XorC =
+          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+        Instruction *NewXor =
+          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+        return InsertNewInstBefore(NewXor, *I);
+      }
+          
+          
     RHSKnownZero = KnownZeroOut;
     RHSKnownOne  = KnownOneOut;
     break;
@@ -1069,8 +1102,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
     
     // If the operands are constants, see if we can simplify them.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context) ||
-        ShrinkDemandedConstant(I, 2, DemandedMask, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+        ShrinkDemandedConstant(I, 2, DemandedMask))
       return I;
     
     // Only known if known in both the LHS and RHS.
@@ -1194,7 +1227,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
       // If the RHS of the add has bits set that can't affect the input, reduce
       // the constant.
-      if (ShrinkDemandedConstant(I, 1, InDemandedBits, Context))
+      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
         return I;
       
       // Avoid excess work.
@@ -1415,10 +1448,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
           Instruction *NewVal;
           if (InputBit > ResultBit)
             NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
-                    Context->getConstantInt(I->getType(), InputBit-ResultBit));
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
           else
             NewVal = BinaryOperator::CreateShl(I->getOperand(1),
-                    Context->getConstantInt(I->getType(), ResultBit-InputBit));
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
           NewVal->takeName(I);
           return InsertNewInstBefore(NewVal, *I);
         }
@@ -1434,12 +1467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   
   // If the client is only demanding bits that we know, return the known
   // constant.
-  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
-    Constant *C = Context->getConstantInt(RHSKnownOne);
-    if (isa<PointerType>(V->getType()))
-      C = Context->getConstantExprIntToPtr(C, V->getType());
-    return C;
-  }
+  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask)
+    return Constant::getIntegerValue(VTy, RHSKnownOne);
   return false;
 }
 
@@ -1465,13 +1494,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     return 0;
   } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
-    return Context->getUndef(V->getType());
+    return UndefValue::get(V->getType());
   }
 
   UndefElts = 0;
   if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Undef = Context->getUndef(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
 
     std::vector<Constant*> Elts;
     for (unsigned i = 0; i != VWidth; ++i)
@@ -1486,7 +1515,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       }
 
     // If we changed the constant, return it.
-    Constant *NewCP = Context->getConstantVector(Elts);
+    Constant *NewCP = ConstantVector::get(Elts);
     return NewCP != CP ? NewCP : 0;
   } else if (isa<ConstantAggregateZero>(V)) {
     // Simplify the CAZ to a ConstantVector where the non-demanded elements are
@@ -1498,15 +1527,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       return 0;
     
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Zero = Context->getNullValue(EltTy);
-    Constant *Undef = Context->getUndef(EltTy);
+    Constant *Zero = Constant::getNullValue(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
     std::vector<Constant*> Elts;
     for (unsigned i = 0; i != VWidth; ++i) {
       Constant *Elt = DemandedElts[i] ? Zero : Undef;
       Elts.push_back(Elt);
     }
     UndefElts = DemandedElts ^ EltMask;
-    return Context->getConstantVector(Elts);
+    return ConstantVector::get(Elts);
   }
   
   // Limit search depth.
@@ -1553,8 +1582,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
     unsigned IdxNo = Idx->getZExtValue();
-    if (IdxNo >= VWidth || !DemandedElts[IdxNo])
-      return AddSoonDeadInstToWorklist(*I, 0);
+    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+      Worklist.Add(I);
+      return I->getOperand(0);
+    }
     
     // Otherwise, the element inserted overwrites whatever was there, so the
     // input demanded set is simpler than the output set.
@@ -1620,12 +1651,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       std::vector<Constant*> Elts;
       for (unsigned i = 0; i < VWidth; ++i) {
         if (UndefElts[i])
-          Elts.push_back(Context->getUndef(Type::Int32Ty));
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
         else
-          Elts.push_back(Context->getConstantInt(Type::Int32Ty,
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
                                           Shuffle->getMaskValue(i)));
       }
-      I->setOperand(2, Context->getConstantVector(Elts));
+      I->setOperand(2, ConstantVector::get(Elts));
       MadeChange = true;
     }
     break;
@@ -1678,7 +1709,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     
     UndefElts = UndefElts2;
     if (VWidth > InVWidth) {
-      assert(0 && "Unimp");
+      llvm_unreachable("Unimp");
       // If there are more elements in the result than there are in the source,
       // then an output element is undef if the corresponding input element is
       // undef.
@@ -1686,7 +1717,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         if (UndefElts2[OutIdx/Ratio])
           UndefElts.set(OutIdx);
     } else if (VWidth < InVWidth) {
-      assert(0 && "Unimp");
+      llvm_unreachable("Unimp");
       // If there are more elements in the source than there are in the result,
       // then a result element is undef if all of the corresponding input
       // elements are undef.
@@ -1752,11 +1783,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           Value *LHS = II->getOperand(1);
           Value *RHS = II->getOperand(2);
           // Extract the element as scalars.
-          LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II);
-          RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II);
+          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 
+            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);
+          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
+            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);
           
           switch (II->getIntrinsicID()) {
-          default: assert(0 && "Case stmts out of sync!");
+          default: llvm_unreachable("Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
           case Intrinsic::x86_sse2_sub_sd:
             TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
@@ -1771,9 +1804,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           
           Instruction *New =
             InsertElementInst::Create(
-              Context->getUndef(II->getType()), TmpV, 0U, II->getName());
+              UndefValue::get(II->getType()), TmpV,
+              ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName());
           InsertNewInstBefore(New, *II);
-          AddSoonDeadInstToWorklist(*II, 0);
           return New;
         }            
       }
@@ -1799,8 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 /// 'shouldApply' and 'apply' methods.
 ///
 template<typename Functor>
-static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F,
-                                   LLVMContext* Context) {
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
   unsigned Opcode = Root.getOpcode();
   Value *LHS = Root.getOperand(0);
 
@@ -1833,7 +1865,7 @@ static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F,
       // Make what used to be the LHS of the root be the user of the root...
       Value *ExtraOperand = TmpLHSI->getOperand(1);
       if (&Root == TmpLHSI) {
-        Root.replaceAllUsesWith(Context->getNullValue(TmpLHSI->getType()));
+        Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));
         return 0;
       }
       Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
@@ -1872,12 +1904,11 @@ namespace {
 // AddRHS - Implements: X + X --> X << 1
 struct AddRHS {
   Value *RHS;
-  LLVMContext* Context;
-  AddRHS(Value *rhs, LLVMContext* C) : RHS(rhs), Context(C) {}
+  explicit AddRHS(Value *rhs) : RHS(rhs) {}
   bool shouldApply(Value *LHS) const { return LHS == RHS; }
   Instruction *apply(BinaryOperator &Add) const {
     return BinaryOperator::CreateShl(Add.getOperand(0),
-                                     Context->getConstantInt(Add.getType(), 1));
+                                     ConstantInt::get(Add.getType(), 1));
   }
 };
 
@@ -1885,12 +1916,11 @@ struct AddRHS {
 //                 iff C1&C2 == 0
 struct AddMaskingAnd {
   Constant *C2;
-  LLVMContext* Context;
-  AddMaskingAnd(Constant *c, LLVMContext* C) : C2(c), Context(C) {}
+  explicit AddMaskingAnd(Constant *c) : C2(c) {}
   bool shouldApply(Value *LHS) const {
     ConstantInt *C1;
     return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
-           Context->getConstantExprAnd(C1, C2)->isNullValue();
+           ConstantExpr::getAnd(C1, C2)->isNullValue();
   }
   Instruction *apply(BinaryOperator &Add) const {
     return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
@@ -1901,11 +1931,8 @@ struct AddMaskingAnd {
 
 static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                              InstCombiner *IC) {
-  LLVMContext* Context = IC->getContext();
-  
-  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
-    return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I);
-  }
+  if (CastInst *CI = dyn_cast<CastInst>(&I))
+    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
 
   // Figure out if the constant is the left or the right argument.
   bool ConstIsRHS = isa<Constant>(I.getOperand(1));
@@ -1913,24 +1940,24 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
 
   if (Constant *SOC = dyn_cast<Constant>(SO)) {
     if (ConstIsRHS)
-      return Context->getConstantExpr(I.getOpcode(), SOC, ConstOperand);
-    return Context->getConstantExpr(I.getOpcode(), ConstOperand, SOC);
+      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
   }
 
   Value *Op0 = SO, *Op1 = ConstOperand;
   if (!ConstIsRHS)
     std::swap(Op0, Op1);
-  Instruction *New;
+  
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
-    New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
-  else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-    New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, 
-                          SO->getName()+".cmp");
-  else {
-    assert(0 && "Unknown binary instruction type!");
-    abort();
-  }
-  return IC->InsertNewInstBefore(New, I);
+    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                    SO->getName()+".op");
+  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  llvm_unreachable("Unknown binary instruction type!");
 }
 
 // FoldOpIntoSelect - Given an instruction with a select as one operand and a
@@ -1946,7 +1973,7 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 
   if (isa<Constant>(TV) || isa<Constant>(FV)) {
     // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType() == Type::Int1Ty) return 0;
+    if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0;
 
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC);
@@ -1958,20 +1985,34 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 }
 
 
-/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI
-/// node as operand #0, see if we can fold the instruction into the PHI (which
-/// is only possible if all operands to the PHI are constants).
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+/// that would normally be unprofitable because they strongly encourage jump
+/// threading.
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
+                                         bool AllowAggressive) {
+  AllowAggressive = false;
   PHINode *PN = cast<PHINode>(I.getOperand(0));
   unsigned NumPHIValues = PN->getNumIncomingValues();
-  if (!PN->hasOneUse() || NumPHIValues == 0) return 0;
-
-  // Check to see if all of the operands of the PHI are constants.  If there is
-  // one non-constant value, remember the BB it is.  If there is more than one
-  // or if *it* is a PHI, bail out.
+  if (NumPHIValues == 0 ||
+      // We normally only transform phis with a single use, unless we're trying
+      // hard to make jump threading happen.
+      (!PN->hasOneUse() && !AllowAggressive))
+    return 0;
+  
+  
+  // Check to see if all of the operands of the PHI are simple constants
+  // (constantint/constantfp/undef).  If there is one non-constant value,
+  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
+  // bail out.  We don't do arbitrary constant expressions here because moving
+  // their computation can be expensive without a cost model.
   BasicBlock *NonConstBB = 0;
   for (unsigned i = 0; i != NumPHIValues; ++i)
-    if (!isa<Constant>(PN->getIncomingValue(i))) {
+    if (!isa<Constant>(PN->getIncomingValue(i)) ||
+        isa<ConstantExpr>(PN->getIncomingValue(i))) {
       if (NonConstBB) return 0;  // More than one non-const value.
       if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
       NonConstBB = PN->getIncomingBlock(i);
@@ -1986,7 +2027,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   // operation in that block.  However, if this is a critical edge, we would be
   // inserting the computation one some other paths (e.g. inside a loop).  Only
   // do this if the pred block is unconditionally branching into the phi block.
-  if (NonConstBB) {
+  if (NonConstBB != 0 && !AllowAggressive) {
     BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
     if (!BI || !BI->isUnconditional()) return 0;
   }
@@ -1998,15 +2039,37 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   NewPN->takeName(PN);
 
   // Next, add all of the operands to the PHI.
-  if (I.getNumOperands() == 2) {
+  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+    // We only currently try to fold the condition of a select when it is a phi,
+    // not the true/false values.
+    Value *TrueV = SI->getTrueValue();
+    Value *FalseV = SI->getFalseValue();
+    BasicBlock *PhiTransBB = PN->getParent();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      BasicBlock *ThisBB = PN->getIncomingBlock(i);
+      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+      } else {
+        assert(PN->getIncomingBlock(i) == NonConstBB);
+        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
+                                 FalseVInPred,
+                                 "phitmp", NonConstBB->getTerminator());
+        Worklist.Add(cast<Instruction>(InV));
+      }
+      NewPN->addIncoming(InV, ThisBB);
+    }
+  } else if (I.getNumOperands() == 2) {
     Constant *C = cast<Constant>(I.getOperand(1));
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV = 0;
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
         if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = Context->getConstantExprCompare(CI->getPredicate(), InC, C);
+          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
         else
-          InV = Context->getConstantExpr(I.getOpcode(), InC, C);
+          InV = ConstantExpr::get(I.getOpcode(), InC, C);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
@@ -2014,14 +2077,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
                                        PN->getIncomingValue(i), C, "phitmp",
                                        NonConstBB->getTerminator());
         else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = CmpInst::Create(CI->getOpcode(), 
+          InV = CmpInst::Create(CI->getOpcode(),
                                 CI->getPredicate(),
                                 PN->getIncomingValue(i), C, "phitmp",
                                 NonConstBB->getTerminator());
         else
-          assert(0 && "Unknown binop!");
+          llvm_unreachable("Unknown binop!");
         
-        AddToWorkList(cast<Instruction>(InV));
+        Worklist.Add(cast<Instruction>(InV));
       }
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
@@ -2031,13 +2094,13 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV;
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        InV = Context->getConstantExprCast(CI->getOpcode(), InC, RetTy);
+        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
                                I.getType(), "phitmp", 
                                NonConstBB->getTerminator());
-        AddToWorkList(cast<Instruction>(InV));
+        Worklist.Add(cast<Instruction>(InV));
       }
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
@@ -2098,13 +2161,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       if (SimplifyDemandedInstructionBits(I))
         return &I;
 
-      // zext(i1) - 1  ->  select i1, 0, -1
+      // zext(bool) + C -> bool ? C + 1 : C
       if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
-        if (CI->isAllOnesValue() &&
-            ZI->getOperand(0)->getType() == Type::Int1Ty)
-          return SelectInst::Create(ZI->getOperand(0),
-                                    Context->getNullValue(I.getType()),
-                              Context->getConstantIntAllOnesValue(I.getType()));
+        if (ZI->getSrcTy() == Type::getInt1Ty(*Context))
+          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
     }
 
     if (isa<PHINode>(LHS))
@@ -2146,24 +2206,23 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       const Type *MiddleType = 0;
       switch (Size) {
         default: break;
-        case 32: MiddleType = Type::Int32Ty; break;
-        case 16: MiddleType = Type::Int16Ty; break;
-        case  8: MiddleType = Type::Int8Ty; break;
+        case 32: MiddleType = Type::getInt32Ty(*Context); break;
+        case 16: MiddleType = Type::getInt16Ty(*Context); break;
+        case  8: MiddleType = Type::getInt8Ty(*Context); break;
       }
       if (MiddleType) {
-        Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext");
-        InsertNewInstBefore(NewTrunc, I);
+        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
         return new SExtInst(NewTrunc, I.getType(), I.getName());
       }
     }
   }
 
-  if (I.getType() == Type::Int1Ty)
+  if (I.getType() == Type::getInt1Ty(*Context))
     return BinaryOperator::CreateXor(LHS, RHS);
 
   // X + X --> X << 1
   if (I.getType()->isInteger()) {
-    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS, Context), Context))
+    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS)))
       return Result;
 
     if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
@@ -2180,11 +2239,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castNegVal(LHS, Context)) {
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
     if (LHS->getType()->isIntOrIntVector()) {
-      if (Value *RHSV = dyn_castNegVal(RHS, Context)) {
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
-        InsertNewInstBefore(NewAdd, I);
+      if (Value *RHSV = dyn_castNegVal(RHS)) {
+        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
       }
     }
@@ -2194,34 +2252,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castNegVal(RHS, Context))
+    if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
 
   ConstantInt *C2;
-  if (Value *X = dyn_castFoldableMul(LHS, C2, Context)) {
+  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
     if (X == RHS)   // X*C + X --> X * (C+1)
-      return BinaryOperator::CreateMul(RHS, AddOne(C2, Context));
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
 
     // X*C1 + X*C2 --> X * (C1+C2)
     ConstantInt *C1;
-    if (X == dyn_castFoldableMul(RHS, C1, Context))
-      return BinaryOperator::CreateMul(X, Context->getConstantExprAdd(C1, C2));
+    if (X == dyn_castFoldableMul(RHS, C1))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
   }
 
   // X + X*C --> X * (C+1)
-  if (dyn_castFoldableMul(RHS, C2, Context) == LHS)
-    return BinaryOperator::CreateMul(LHS, AddOne(C2, Context));
+  if (dyn_castFoldableMul(RHS, C2) == LHS)
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
 
   // X + ~X --> -1   since   ~X = -X-1
-  if (dyn_castNotVal(LHS, Context) == RHS ||
-      dyn_castNotVal(RHS, Context) == LHS)
-    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+  if (dyn_castNotVal(LHS) == RHS ||
+      dyn_castNotVal(RHS) == LHS)
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
   
 
   // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
   if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
-    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2, Context), Context))
+    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
       return R;
   
   // A+B --> A|B iff A and B have no bits set in common.
@@ -2258,8 +2316,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       }
 
       if (W == Y) {
-        Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z,
-                                                            LHS->getName()), I);
+        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
         return BinaryOperator::CreateMul(W, NewAdd);
       }
     }
@@ -2268,11 +2325,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
     Value *X = 0;
     if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
-      return BinaryOperator::CreateSub(SubOne(CRHS, Context), X);
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
-    if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = Context->getConstantExprAnd(CRHS, C2);
+    if (LHS->hasOneUse() &&
+        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
+      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
       if (Anded == CRHS) {
         // See if all bits from the first bit set in the Add RHS up are included
         // in the mask.  First, get the rightmost bit.
@@ -2286,8 +2344,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
         if (AddRHSHighBits == AddRHSHighBitsAnd) {
           // Okay, the xform is safe.  Insert the new add pronto.
-          Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS,
-                                                            LHS->getName()), I);
+          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
           return BinaryOperator::CreateAnd(NewAdd, C2);
         }
       }
@@ -2299,28 +2356,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         return R;
   }
 
-  // add (cast *A to intptrtype) B -> 
-  //   cast (GEP (cast *A to i8*) B)  -->  intptrtype
-  {
-    CastInst *CI = dyn_cast<CastInst>(LHS);
-    Value *Other = RHS;
-    if (!CI) {
-      CI = dyn_cast<CastInst>(RHS);
-      Other = LHS;
-    }
-    if (CI && CI->getType()->isSized() && 
-        (CI->getType()->getScalarSizeInBits() ==
-         TD->getIntPtrType()->getPrimitiveSizeInBits()) 
-        && isa<PointerType>(CI->getOperand(0)->getType())) {
-      unsigned AS =
-        cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
-      Value *I2 = InsertBitCastBefore(CI->getOperand(0),
-                                  Context->getPointerType(Type::Int8Ty, AS), I);
-      I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I);
-      return new PtrToIntInst(I2, CI->getType());
-    }
-  }
-  
   // add (select X 0 (sub n A)) A  -->  select X A n
   {
     SelectInst *SI = dyn_cast<SelectInst>(LHS);
@@ -2336,10 +2371,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
       // Can we fold the add into the argument of the select?
       // We check both true and false select arguments for a matching subtract.
-      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+      if (match(FV, m_Zero()) &&
+          match(TV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the true select value.
         return SelectInst::Create(SI->getCondition(), N, A);
-      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+      if (match(TV, m_Zero()) &&
+          match(FV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the false select value.
         return SelectInst::Create(SI->getCondition(), A, N);
     }
@@ -2351,14 +2388,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     // (add (sext x), cst) --> (sext (add x, cst'))
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
       Constant *CI = 
-        Context->getConstantExprTrunc(RHSC, LHSConv->getOperand(0)->getType());
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
-          Context->getConstantExprSExt(CI, I.getType()) == RHSC &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new, smaller add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        CI, "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
+                                           CI, "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
@@ -2373,10 +2409,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        RHSConv->getOperand(0),
-                                                        "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
+                                           RHSConv->getOperand(0), "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
@@ -2392,7 +2426,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
     // X + 0 --> X
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(Context->getConstantFPNegativeZero
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
                               (I.getType())->getValueAPF()))
         return ReplaceInstUsesWith(I, LHS);
     }
@@ -2404,12 +2438,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castFNegVal(LHS, Context))
+  if (Value *LHSV = dyn_castFNegVal(LHS))
     return BinaryOperator::CreateFSub(RHS, LHSV);
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castFNegVal(RHS, Context))
+    if (Value *V = dyn_castFNegVal(RHS))
       return BinaryOperator::CreateFSub(LHS, V);
 
   // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
@@ -2427,14 +2461,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     // instcombined.
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
       Constant *CI = 
-      Context->getConstantExprFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
-          Context->getConstantExprSIToFP(CI, I.getType()) == CFP &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new integer add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        CI, "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),
+                                           CI, "addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
@@ -2449,10 +2482,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        RHSConv->getOperand(0),
-                                                        "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
+                                           RHSConv->getOperand(0), "addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
@@ -2465,10 +2496,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Op0 == Op1)                        // sub X, X  -> 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castNegVal(Op1, Context))
+  if (Value *V = dyn_castNegVal(Op1))
     return BinaryOperator::CreateAdd(Op0, V);
 
   if (isa<UndefValue>(Op0))
@@ -2484,7 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // C - ~X == X + (1+C)
     Value *X = 0;
     if (match(Op1, m_Not(m_Value(X))))
-      return BinaryOperator::CreateAdd(X, AddOne(C, Context));
+      return BinaryOperator::CreateAdd(X, AddOne(C));
 
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
@@ -2519,22 +2550,29 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
       if (Instruction *R = FoldOpIntoSelect(I, SI, this))
         return R;
+
+    // C - zext(bool) -> bool ? C - 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+      if (ZI->getSrcTy() == Type::getInt1Ty(*Context))
+        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
   }
 
-  if (I.getType() == Type::Int1Ty)
+  if (I.getType() == Type::getInt1Ty(*Context))
     return BinaryOperator::CreateXor(Op0, Op1);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
     if (Op1I->getOpcode() == Instruction::Add) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
+                                         I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
+                                         I.getName());
       else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
         if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
           // C1-(X+C2) --> (C1-C2)-X
           return BinaryOperator::CreateSub(
-            Context->getConstantExprSub(CI1, CI2), Op1I->getOperand(0));
+            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
       }
     }
 
@@ -2558,8 +2596,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
         Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
 
-        Value *NewNot =
-          InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I);
+        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
         return BinaryOperator::CreateAnd(Op0, NewNot);
       }
 
@@ -2569,13 +2606,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           if (CSI->isZero())
             if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
               return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
-                                          Context->getConstantExprNeg(DivRHS));
+                                          ConstantExpr::getNeg(DivRHS));
 
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
-      if (dyn_castFoldableMul(Op1I, C2, Context) == Op0) {
+      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
         Constant *CP1 = 
-          Context->getConstantExprSub(Context->getConstantInt(I.getType(), 1),
+          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
                                              C2);
         return BinaryOperator::CreateMul(Op0, CP1);
       }
@@ -2590,18 +2627,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         return ReplaceInstUsesWith(I, Op0I->getOperand(0));
     } else if (Op0I->getOpcode() == Instruction::Sub) {
       if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-        return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
+        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
+                                         I.getName());
     }
   }
 
   ConstantInt *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1, Context)) {
+  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
     if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1, Context));
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
 
     ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2, Context))
-      return BinaryOperator::CreateMul(X, Context->getConstantExprSub(C1, C2));
+    if (X == dyn_castFoldableMul(Op1, C2))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
   return 0;
 }
@@ -2610,15 +2648,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castFNegVal(Op1, Context))
+  if (Value *V = dyn_castFNegVal(Op1))
     return BinaryOperator::CreateFAdd(Op0, V);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
     if (Op1I->getOpcode() == Instruction::FAdd) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName());
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
+                                          I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
+                                          I.getName());
     }
   }
 
@@ -2657,26 +2697,24 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
 
 Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // TODO: If Op1 is undef and Op0 is finite, return zero.
-  if (!I.getType()->isFPOrFPVector() &&
-      isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+  if (isa<UndefValue>(Op1))              // undef * X -> 0
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
-  // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+  // Simplify mul instructions with a constant RHS.
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
 
       // ((X << C1)*C2) == (X * (C2 << C1))
       if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
         if (SI->getOpcode() == Instruction::Shl)
           if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
             return BinaryOperator::CreateMul(SI->getOperand(0),
-                                        Context->getConstantExprShl(CI, ShOp));
+                                        ConstantExpr::getShl(CI, ShOp));
 
       if (CI->isZero())
-        return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
+        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
       if (CI->equalsInt(1))                  // X * 1  == X
         return ReplaceInstUsesWith(I, Op0);
       if (CI->isAllOnesValue())              // X * -1 == 0 - X
@@ -2685,12 +2723,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       const APInt& Val = cast<ConstantInt>(CI)->getValue();
       if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
         return BinaryOperator::CreateShl(Op0,
-                 Context->getConstantInt(Op0->getType(), Val.logBase2()));
+                 ConstantInt::get(Op0->getType(), Val.logBase2()));
       }
-    } else if (isa<VectorType>(Op1->getType())) {
-      // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros.
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (Op1C->isNullValue())
+        return ReplaceInstUsesWith(I, Op1C);
 
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
         if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
           return BinaryOperator::CreateNeg(Op0, I.getName());
 
@@ -2705,13 +2744,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
       if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
+          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
         // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
-                                                     Op1, "tmp");
-        InsertNewInstBefore(Add, I);
-        Value *C1C2 = Context->getConstantExprMul(Op1, 
-                                           cast<Constant>(Op0I->getOperand(1)));
+        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
+        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
         return BinaryOperator::CreateAdd(Add, C1C2);
         
       }
@@ -2726,93 +2762,80 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *Op0v = dyn_castNegVal(Op0, Context))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castNegVal(I.getOperand(1), Context))
+  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castNegVal(Op1))
       return BinaryOperator::CreateMul(Op0v, Op1v);
 
   // (X / Y) *  Y = X - (X % Y)
   // (X / Y) * -Y = (X % Y) - X
   {
-    Value *Op1 = I.getOperand(1);
+    Value *Op1C = Op1;
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
     if (!BO ||
         (BO->getOpcode() != Instruction::UDiv && 
          BO->getOpcode() != Instruction::SDiv)) {
-      Op1 = Op0;
-      BO = dyn_cast<BinaryOperator>(I.getOperand(1));
+      Op1C = Op0;
+      BO = dyn_cast<BinaryOperator>(Op1);
     }
-    Value *Neg = dyn_castNegVal(Op1, Context);
+    Value *Neg = dyn_castNegVal(Op1C);
     if (BO && BO->hasOneUse() &&
-        (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) &&
+        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
         (BO->getOpcode() == Instruction::UDiv ||
          BO->getOpcode() == Instruction::SDiv)) {
       Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
 
-      Instruction *Rem;
+      // If the division is exact, X % Y is zero.
+      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+        if (SDiv->isExact()) {
+          if (Op1BO == Op1C)
+            return ReplaceInstUsesWith(I, Op0BO);
+          return BinaryOperator::CreateNeg(Op0BO);
+        }
+
+      Value *Rem;
       if (BO->getOpcode() == Instruction::UDiv)
-        Rem = BinaryOperator::CreateURem(Op0BO, Op1BO);
+        Rem = Builder->CreateURem(Op0BO, Op1BO);
       else
-        Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO);
-
-      InsertNewInstBefore(Rem, I);
+        Rem = Builder->CreateSRem(Op0BO, Op1BO);
       Rem->takeName(BO);
 
-      if (Op1BO == Op1)
+      if (Op1BO == Op1C)
         return BinaryOperator::CreateSub(Op0BO, Rem);
-      else
-        return BinaryOperator::CreateSub(Rem, Op0BO);
+      return BinaryOperator::CreateSub(Rem, Op0BO);
     }
   }
 
-  if (I.getType() == Type::Int1Ty)
-    return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
+  /// i1 mul -> i1 and.
+  if (I.getType() == Type::getInt1Ty(*Context))
+    return BinaryOperator::CreateAnd(Op0, Op1);
 
+  // X*(1 << Y) --> X << Y
+  // (1 << Y)*X --> X << Y
+  {
+    Value *Y;
+    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op1, Y);
+    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op0, Y);
+  }
+  
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
-  // See if we can simplify things based on how the boolean was originally
-  // formed.
-  CastInst *BoolCast = 0;
-  if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
-    if (CI->getOperand(0)->getType() == Type::Int1Ty)
-      BoolCast = CI;
-  if (!BoolCast)
-    if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(1)))
-      if (CI->getOperand(0)->getType() == Type::Int1Ty)
-        BoolCast = CI;
-  if (BoolCast) {
-    if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) {
-      Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
-      const Type *SCOpTy = SCIOp0->getType();
-      bool TIS = false;
-      
-      // If the icmp is true iff the sign bit of X is set, then convert this
-      // multiply into a shift/and combination.
-      if (isa<ConstantInt>(SCIOp1) &&
-          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
-          TIS) {
-        // Shift the X value right to turn it into "all signbits".
-        Constant *Amt = Context->getConstantInt(SCIOp0->getType(),
-                                          SCOpTy->getPrimitiveSizeInBits()-1);
-        Value *V =
-          InsertNewInstBefore(
-            BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt,
-                                            BoolCast->getOperand(0)->getName()+
-                                            ".mask"), I);
-
-        // If the multiply type is not the same as the source type, sign extend
-        // or truncate to the multiply type.
-        if (I.getType() != V->getType()) {
-          uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits();
-          uint32_t DstBits = I.getType()->getPrimitiveSizeInBits();
-          Instruction::CastOps opcode = 
-            (SrcBits == DstBits ? Instruction::BitCast : 
-             (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc));
-          V = InsertCastBefore(opcode, V, I.getType(), I);
-        }
+  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
+  if (!isa<VectorType>(I.getType())) {
+    // -2 is "-1 << 1" so it is all bits set except the low one.
+    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+    
+    Value *BoolCast = 0, *OtherOp = 0;
+    if (MaskedValueIsZero(Op0, Negative2))
+      BoolCast = Op0, OtherOp = Op1;
+    else if (MaskedValueIsZero(Op1, Negative2))
+      BoolCast = Op1, OtherOp = Op0;
 
-        Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
-        return BinaryOperator::CreateAnd(V, OtherOp);
-      }
+    if (BoolCast) {
+      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+                                    BoolCast, "tmp");
+      return BinaryOperator::CreateAnd(V, OtherOp);
     }
   }
 
@@ -2821,17 +2844,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
 Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
-    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
       // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
       // ANSI says we can drop signals, so we can do this anyway." (from GCC)
       if (Op1F->isExactlyValue(1.0))
         return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
-    } else if (isa<VectorType>(Op1->getType())) {
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
         // As above, vector X*splat(1.0) -> X in all defined cases.
         if (Constant *Splat = Op1V->getSplatValue()) {
           if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
@@ -2851,8 +2874,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *Op0v = dyn_castFNegVal(Op0, Context))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1), Context))
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(Op1))
       return BinaryOperator::CreateFMul(Op0v, Op1v);
 
   return Changed ? &I : 0;
@@ -2907,11 +2930,11 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
          I != E; ++I) {
       if (*I == SI) {
         *I = SI->getOperand(NonNullOperand);
-        AddToWorkList(BBI);
+        Worklist.Add(BBI);
       } else if (*I == SelectCond) {
-        *I = NonNullOperand == 1 ? Context->getConstantIntTrue() :
-                                   Context->getConstantIntFalse();
-        AddToWorkList(BBI);
+        *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) :
+                                   ConstantInt::getFalse(*Context);
+        Worklist.Add(BBI);
       }
     }
     
@@ -2942,7 +2965,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
   if (isa<UndefValue>(Op0)) {
     if (Op0->getType()->isFPOrFPVector())
       return ReplaceInstUsesWith(I, Op0);
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
 
   // X / undef -> undef
@@ -2962,12 +2985,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // (sdiv X, X) --> 1     (udiv X, X) --> 1
   if (Op0 == Op1) {
     if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      Constant *CI = Context->getConstantInt(Ty->getElementType(), 1);
+      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
       std::vector<Constant*> Elts(Ty->getNumElements(), CI);
-      return ReplaceInstUsesWith(I, Context->getConstantVector(Elts));
+      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
     }
 
-    Constant *CI = Context->getConstantInt(I.getType(), 1);
+    Constant *CI = ConstantInt::get(I.getType(), 1);
     return ReplaceInstUsesWith(I, CI);
   }
   
@@ -2989,11 +3012,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
       if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
         if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
           if (MultiplyOverflows(RHS, LHSRHS,
-                                I.getOpcode()==Instruction::SDiv, Context))
-            return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+                                I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
           else 
             return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                      Context->getConstantExprMul(RHS, LHSRHS));
+                                      ConstantExpr::getMul(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -3009,10 +3032,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // 0 / X == 0, we don't need to preserve faults!
   if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
     if (LHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // It can't be division by zero, hence it must be division by one.
-  if (I.getType() == Type::Int1Ty)
+  if (I.getType() == Type::getInt1Ty(*Context))
     return ReplaceInstUsesWith(I, Op0);
 
   if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
@@ -3038,14 +3061,13 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
     // if so, convert to a right shift.
     if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
       return BinaryOperator::CreateLShr(Op0, 
-            Context->getConstantInt(Op0->getType(), C->getValue().logBase2()));
+            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
 
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
-      Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C),
-                                      I);
-      return SelectInst::Create(IC, Context->getNullValue(I.getType()),
-                                Context->getConstantInt(I.getType(), 1));
+      Value *IC = Builder->CreateICmpULT( Op0, C);
+      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+                                ConstantInt::get(I.getType(), 1));
     }
   }
 
@@ -3057,10 +3079,8 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       if (C1.isPowerOf2()) {
         Value *N = RHSI->getOperand(1);
         const Type *NTy = N->getType();
-        if (uint32_t C2 = C1.logBase2()) {
-          Constant *C2V = Context->getConstantInt(NTy, C2);
-          N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
-        }
+        if (uint32_t C2 = C1.logBase2())
+          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
         return BinaryOperator::CreateLShr(Op0, N);
       }
     }
@@ -3076,16 +3096,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
           // Compute the shift amounts
           uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
           // Construct the "on true" case of the select
-          Constant *TC = Context->getConstantInt(Op0->getType(), TSA);
-          Instruction *TSI = BinaryOperator::CreateLShr(
-                                                 Op0, TC, SI->getName()+".t");
-          TSI = InsertNewInstBefore(TSI, I);
+          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
+          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
   
           // Construct the "on false" case of the select
-          Constant *FC = Context->getConstantInt(Op0->getType(), FSA); 
-          Instruction *FSI = BinaryOperator::CreateLShr(
-                                                 Op0, FC, SI->getName()+".f");
-          FSI = InsertNewInstBefore(FSI, I);
+          Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
+          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
 
           // construct the select instruction and return it.
           return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
@@ -3105,17 +3121,45 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
     // sdiv X, -1 == -X
     if (RHS->isAllOnesValue())
       return BinaryOperator::CreateNeg(Op0);
+
+    // sdiv X, C  -->  ashr X, log2(C)
+    if (cast<SDivOperator>(&I)->isExact() &&
+        RHS->getValue().isNonNegative() &&
+        RHS->getValue().isPowerOf2()) {
+      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+                                            RHS->getValue().exactLogBase2());
+      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+    }
+
+    // -X/C  -->  X/-C  provided the negation doesn't overflow.
+    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+      if (isa<Constant>(Sub->getOperand(0)) &&
+          cast<Constant>(Sub->getOperand(0))->isNullValue() &&
+          Sub->hasNoSignedWrap())
+        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+                                          ConstantExpr::getNeg(RHS));
   }
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
   if (I.getType()->isInteger()) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
-    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
-      // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
-      return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+    if (MaskedValueIsZero(Op0, Mask)) {
+      if (MaskedValueIsZero(Op1, Mask)) {
+        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+      ConstantInt *ShiftedInt;
+      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
+          ShiftedInt->getValue().isPowerOf2()) {
+        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+        // Safe because the only negative value (1 << Y) can take on is
+        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+        // the sign bit set.
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
     }
-  }      
+  }
   
   return 0;
 }
@@ -3134,7 +3178,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
   if (isa<UndefValue>(Op0)) {             // undef % X -> 0
     if (I.getType()->isFPOrFPVector())
       return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
@@ -3159,15 +3203,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
   // 0 % X == 0 for integer, we don't need to preserve faults!
   if (Constant *LHS = dyn_cast<Constant>(Op0))
     if (LHS->isNullValue())
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     // X % 0 == undef, we don't need to preserve faults!
     if (RHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Context->getUndef(I.getType()));
+      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
     
     if (RHS->equalsInt(1))  // X % 1 == 0
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
     if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
       if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
@@ -3199,7 +3243,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     // if so, convert to a bitwise and.
     if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
       if (C->getValue().isPowerOf2())
-        return BinaryOperator::CreateAnd(Op0, SubOne(C, Context));
+        return BinaryOperator::CreateAnd(Op0, SubOne(C));
   }
 
   if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
@@ -3207,9 +3251,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     if (RHSI->getOpcode() == Instruction::Shl &&
         isa<ConstantInt>(RHSI->getOperand(0))) {
       if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
-        Constant *N1 = Context->getConstantIntAllOnesValue(I.getType());
-        Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
-                                                                   "tmp"), I);
+        Constant *N1 = Constant::getAllOnesValue(I.getType());
+        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
         return BinaryOperator::CreateAnd(Op0, Add);
       }
     }
@@ -3223,12 +3266,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
         // STO == 0 and SFO == 0 handled above.
         if ((STO->getValue().isPowerOf2()) && 
             (SFO->getValue().isPowerOf2())) {
-          Value *TrueAnd = InsertNewInstBefore(
-            BinaryOperator::CreateAnd(Op0, SubOne(STO, Context),
-                                      SI->getName()+".t"), I);
-          Value *FalseAnd = InsertNewInstBefore(
-            BinaryOperator::CreateAnd(Op0, SubOne(SFO, Context),
-                                      SI->getName()+".f"), I);
+          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
+                                              SI->getName()+".t");
+          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
+                                               SI->getName()+".f");
           return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
         }
       }
@@ -3241,15 +3282,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // Handle the integer rem common cases
-  if (Instruction *common = commonIRemTransforms(I))
-    return common;
+  if (Instruction *Common = commonIRemTransforms(I))
+    return Common;
   
-  if (Value *RHSNeg = dyn_castNegVal(Op1, Context))
+  if (Value *RHSNeg = dyn_castNegVal(Op1))
     if (!isa<Constant>(RHSNeg) ||
         (isa<ConstantInt>(RHSNeg) &&
          cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
       // X % -Y -> X % Y
-      AddUsesToWorkList(I);
+      Worklist.AddValue(I.getOperand(1));
       I.setOperand(1, RHSNeg);
       return &I;
     }
@@ -3279,15 +3320,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
       for (unsigned i = 0; i != VWidth; ++i) {
         if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
           if (RHS->getValue().isNegative())
-            Elts[i] = cast<ConstantInt>(Context->getConstantExprNeg(RHS));
+            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
           else
             Elts[i] = RHS;
         }
       }
 
-      Constant *NewRHSV = Context->getConstantVector(Elts);
+      Constant *NewRHSV = ConstantVector::get(Elts);
       if (NewRHSV != RHSV) {
-        AddUsesToWorkList(I);
+        Worklist.AddValue(I.getOperand(1));
         I.setOperand(1, NewRHSV);
         return &I;
       }
@@ -3351,7 +3392,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) {
   case ICmpInst::ICMP_SLE: return 6;  // 110
     // True -> 7
   default:
-    assert(0 && "Invalid ICmp predicate!");
+    llvm_unreachable("Invalid ICmp predicate!");
     return 0;
   }
 }
@@ -3379,7 +3420,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
     // True -> 7
   default:
     // Not expecting FCMP_FALSE and FCMP_TRUE;
-    assert(0 && "Unexpected FCmp predicate!");
+    llvm_unreachable("Unexpected FCmp predicate!");
     return 0;
   }
 }
@@ -3389,10 +3430,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
 static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
-                           LLVMContext* Context) {
+                           LLVMContext *Context) {
   switch (code) {
-  default: assert(0 && "Illegal ICmp code!");
-  case  0: return Context->getConstantIntFalse();
+  default: llvm_unreachable("Illegal ICmp code!");
+  case  0: return ConstantInt::getFalse(*Context);
   case  1: 
     if (sign)
       return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
@@ -3415,7 +3456,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
       return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
     else
       return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
-  case  7: return Context->getConstantIntTrue();
+  case  7: return ConstantInt::getTrue(*Context);
   }
 }
 
@@ -3423,9 +3464,9 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
 /// opcode and two operands into either a FCmp instruction. isordered is passed
 /// in to determine which kind of predicate to use in the new fcmp instruction.
 static Value *getFCmpValue(bool isordered, unsigned code,
-                           Value *LHS, Value *RHS, LLVMContext* Context) {
+                           Value *LHS, Value *RHS, LLVMContext *Context) {
   switch (code) {
-  default: assert(0 && "Illegal FCmp code!");
+  default: llvm_unreachable("Illegal FCmp code!");
   case  0:
     if (isordered)
       return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
@@ -3461,7 +3502,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
       return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
     else
       return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
-  case  7: return Context->getConstantIntTrue();
+  case  7: return ConstantInt::getTrue(*Context);
   }
 }
 
@@ -3504,7 +3545,7 @@ struct FoldICmpLogical {
     case Instruction::And: Code = LHSCode & RHSCode; break;
     case Instruction::Or:  Code = LHSCode | RHSCode; break;
     case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
-    default: assert(0 && "Illegal logical opcode!"); return 0;
+    default: llvm_unreachable("Illegal logical opcode!"); return 0;
     }
 
     bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || 
@@ -3529,14 +3570,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   Value *X = Op->getOperand(0);
   Constant *Together = 0;
   if (!Op->isShift())
-    Together = Context->getConstantExprAnd(AndRHS, OpRHS);
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
 
   switch (Op->getOpcode()) {
   case Instruction::Xor:
     if (Op->hasOneUse()) {
       // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Instruction *And = BinaryOperator::CreateAnd(X, AndRHS);
-      InsertNewInstBefore(And, TheAnd);
+      Value *And = Builder->CreateAnd(X, AndRHS);
       And->takeName(Op);
       return BinaryOperator::CreateXor(And, Together);
     }
@@ -3547,8 +3587,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 
     if (Op->hasOneUse() && Together != OpRHS) {
       // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Instruction *Or = BinaryOperator::CreateOr(X, Together);
-      InsertNewInstBefore(Or, TheAnd);
+      Value *Or = Builder->CreateOr(X, Together);
       Or->takeName(Op);
       return BinaryOperator::CreateAnd(Or, AndRHS);
     }
@@ -3578,8 +3617,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
             return &TheAnd;
           } else {
             // Pull the XOR out of the AND.
-            Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS);
-            InsertNewInstBefore(NewAnd, TheAnd);
+            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
             NewAnd->takeName(Op);
             return BinaryOperator::CreateXor(NewAnd, AndRHS);
           }
@@ -3595,7 +3633,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
-    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShlMask);
+    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask);
 
     if (CI->getValue() == ShlMask) { 
     // Masking out bits that the shift already masks
@@ -3615,7 +3653,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShrMask);
+    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);
 
     if (CI->getValue() == ShrMask) {   
     // Masking out bits that the shift already masks.
@@ -3634,14 +3672,12 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
       uint32_t BitWidth = AndRHS->getType()->getBitWidth();
       uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
       APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-      Constant *C = Context->getConstantInt(AndRHS->getValue() & ShrMask);
+      Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);
       if (C == AndRHS) {          // Masking out bits shifted in.
         // (Val ashr C1) & C2 -> (Val lshr C1) & C2
         // Make the argument unsigned.
         Value *ShVal = Op->getOperand(0);
-        ShVal = InsertNewInstBefore(
-            BinaryOperator::CreateLShr(ShVal, OpRHS, 
-                                   Op->getName()), TheAnd);
+        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
         return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
       }
     }
@@ -3659,7 +3695,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                            bool isSigned, bool Inside, 
                                            Instruction &IB) {
-  assert(cast<ConstantInt>(Context->getConstantExprICmp((isSigned ? 
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
     
@@ -3675,10 +3711,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
     }
 
     // Emit V-Lo <u Hi-Lo
-    Constant *NegLo = Context->getConstantExprNeg(Lo);
-    Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
-    InsertNewInstBefore(Add, IB);
-    Constant *UpperBound = Context->getConstantExprAdd(NegLo, Hi);
+    Constant *NegLo = ConstantExpr::getNeg(Lo);
+    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
     return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
   }
 
@@ -3686,7 +3721,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
     return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
 
   // V < Min || V >= Hi -> V > Hi-1
-  Hi = SubOne(cast<ConstantInt>(Hi), Context);
+  Hi = SubOne(cast<ConstantInt>(Hi));
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
     ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
@@ -3695,10 +3730,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
 
   // Emit V-Lo >u Hi-1-Lo
   // Note that Hi has already had one subtracted from it, above.
-  ConstantInt *NegLo = cast<ConstantInt>(Context->getConstantExprNeg(Lo));
-  Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
-  InsertNewInstBefore(Add, IB);
-  Constant *LowerBound = Context->getConstantExprAdd(NegLo, Hi);
+  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
   return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
 }
 
@@ -3740,7 +3774,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   switch (LHSI->getOpcode()) {
   default: return 0;
   case Instruction::And:
-    if (Context->getConstantExprAnd(N, Mask) == Mask) {
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
       if ((Mask->getValue().countLeadingZeros() + 
            Mask->getValue().countPopulation()) == 
@@ -3764,17 +3798,14 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
     if ((Mask->getValue().countLeadingZeros() + 
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
-        && Context->getConstantExprAnd(N, Mask)->isNullValue())
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
   
-  Instruction *New;
   if (isSub)
-    New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold");
-  else
-    New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold");
-  return InsertNewInstBefore(New, I);
+    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
 }
 
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
@@ -3785,16 +3816,17 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   ICmpInst::Predicate LHSCC, RHSCC;
   
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
+  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
+                         m_ConstantInt(LHSCst))) ||
+      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
+                         m_ConstantInt(RHSCst))))
     return 0;
   
   // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
   // where C is a power of 2
   if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&
       LHSCst->getValue().isPowerOf2()) {
-    Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2);
-    InsertNewInstBefore(NewOr, I);
+    Value *NewOr = Builder->CreateOr(Val, Val2);
     return new ICmpInst(LHSCC, NewOr, LHSCst);
   }
   
@@ -3837,14 +3869,14 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   assert(LHSCst != RHSCst && "Compares not folded above?");
 
   switch (LHSCC) {
-  default: assert(0 && "Unknown integer condition code!");
+  default: llvm_unreachable("Unknown integer condition code!");
   case ICmpInst::ICMP_EQ:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
     case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
     case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
     case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
@@ -3852,13 +3884,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     }
   case ICmpInst::ICMP_NE:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_ULT:
-      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X u< 14) -> X < 13
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
         return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
       break;                        // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
-      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X s< 14) -> X < 13
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
         return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
       break;                        // (X != 13 & X s< 15) -> no change
     case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
@@ -3866,23 +3898,21 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_NE:
-      if (LHSCst == SubOne(RHSCst, Context)){// (X != 13 & X != 14) -> X-13 >u 1
-        Constant *AddCST = Context->getConstantExprNeg(LHSCst);
-        Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
-                                                     Val->getName()+".off");
-        InsertNewInstBefore(Add, I);
+      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
         return new ICmpInst(ICmpInst::ICMP_UGT, Add,
-                            Context->getConstantInt(Add->getType(), 1));
+                            ConstantInt::get(Add->getType(), 1));
       }
       break;                        // (X != 13 & X != 15) -> no change
     }
     break;
   case ICmpInst::ICMP_ULT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
@@ -3894,10 +3924,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_SLT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
     case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
@@ -3909,18 +3939,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_UGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst, Context)) // (X u> 13 & X != 14) -> X u> 14
+      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
         return new ICmpInst(LHSCC, Val, RHSCst);
       break;                        // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst, Context),
+      return InsertRangeTest(Val, AddOne(LHSCst),
                              RHSCst, false, true, I);
     case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
       break;
@@ -3928,18 +3958,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_SGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst, Context)) // (X s> 13 & X != 14) -> X s> 14
+      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
         return new ICmpInst(LHSCC, Val, RHSCst);
       break;                        // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst, Context),
+      return InsertRangeTest(Val, AddOne(LHSCst),
                              RHSCst, true, true, I);
     case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
       break;
@@ -3950,13 +3980,89 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   return 0;
 }
 
+Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
+                                          FCmpInst *RHS) {
+  
+  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // false.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+        return new FCmpInst(FCmpInst::FCMP_ORD,
+                            LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp ord x,x" is "fcmp ord x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return new FCmpInst(FCmpInst::FCMP_ORD,
+                          LHS->getOperand(0), RHS->getOperand(0));
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    
+    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+    if (Op0CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, RHS);
+    if (Op1CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, LHS);
+    
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op1Pred == 0) {
+      std::swap(LHS, RHS);
+      std::swap(Op0Pred, Op1Pred);
+      std::swap(Op0Ordered, Op1Ordered);
+    }
+    if (Op0Pred == 0) {
+      // uno && ueq -> uno && (uno || eq) -> ueq
+      // ord && olt -> ord && (ord && lt) -> olt
+      if (Op0Ordered == Op1Ordered)
+        return ReplaceInstUsesWith(I, RHS);
+      
+      // uno && oeq -> uno && (ord && eq) -> false
+      // uno && ord -> false
+      if (!Op0Ordered)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+      // ord && ueq -> ord && (uno || eq) -> oeq
+      return cast<Instruction>(getFCmpValue(true, Op1Pred,
+                                            Op0LHS, Op0RHS, Context));
+    }
+  }
+
+  return 0;
+}
+
 
 Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op1))                         // X & undef -> 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // and X, X = X
   if (Op0 == Op1)
@@ -3976,36 +4082,32 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   }
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
-    const APInt& AndRHSMask = AndRHS->getValue();
+    const APInt &AndRHSMask = AndRHS->getValue();
     APInt NotAndRHS(~AndRHSMask);
 
     // Optimize a variety of ((val OP C1) & C2) combinations...
-    if (isa<BinaryOperator>(Op0)) {
-      Instruction *Op0I = cast<Instruction>(Op0);
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
       Value *Op0LHS = Op0I->getOperand(0);
       Value *Op0RHS = Op0I->getOperand(1);
       switch (Op0I->getOpcode()) {
+      default: break;
       case Instruction::Xor:
       case Instruction::Or:
         // If the mask is only needed on one incoming arm, push it up.
-        if (Op0I->hasOneUse()) {
-          if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
-            // Not masking anything out for the LHS, move to RHS.
-            Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS,
-                                                   Op0RHS->getName()+".masked");
-            InsertNewInstBefore(NewRHS, I);
-            return BinaryOperator::Create(
-                       cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
-          }
-          if (!isa<Constant>(Op0RHS) &&
-              MaskedValueIsZero(Op0RHS, NotAndRHS)) {
-            // Not masking anything out for the RHS, move to LHS.
-            Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS,
-                                                   Op0LHS->getName()+".masked");
-            InsertNewInstBefore(NewLHS, I);
-            return BinaryOperator::Create(
-                       cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
-          }
+        if (!Op0I->hasOneUse()) break;
+          
+        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+          // Not masking anything out for the LHS, move to RHS.
+          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+                                             Op0RHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+        }
+        if (!isa<Constant>(Op0RHS) &&
+            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+          // Not masking anything out for the RHS, move to LHS.
+          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+                                             Op0LHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
         }
 
         break;
@@ -4036,8 +4138,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
           ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
           if (!(A && A->isZero()) &&               // avoid infinite recursion.
               MaskedValueIsZero(Op0LHS, Mask)) {
-            Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS);
-            InsertNewInstBefore(NewNeg, I);
+            Value *NewNeg = Builder->CreateNeg(Op0RHS);
             return BinaryOperator::CreateAnd(NewNeg, AndRHS);
           }
         }
@@ -4048,9 +4149,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // (1 << x) & 1 --> zext(x == 0)
         // (1 >> x) & 1 --> zext(x == 0)
         if (AndRHSMask == 1 && Op0LHS == AndRHS) {
-          Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS,
-                                           Context->getNullValue(I.getType()));
-          InsertNewInstBefore(NewICmp, I);
+          Value *NewICmp =
+            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
           return new ZExtInst(NewICmp, I.getType());
         }
         break;
@@ -4072,21 +4172,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
               // This will fold the two constants together, which may allow 
               // other simplifications.
-              Instruction *NewCast = CastInst::CreateTruncOrBitCast(
+              Value *NewCast = Builder->CreateTruncOrBitCast(
                 CastOp->getOperand(0), I.getType(), 
                 CastOp->getName()+".shrunk");
-              NewCast = InsertNewInstBefore(NewCast, I);
               // trunc_or_bitcast(C1)&C2
-              Constant *C3 =
-                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType());
-              C3 = Context->getConstantExprAnd(C3, AndRHS);
+              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+              C3 = ConstantExpr::getAnd(C3, AndRHS);
               return BinaryOperator::CreateAnd(NewCast, C3);
             } else if (CastOp->getOpcode() == Instruction::Or) {
               // Change: and (cast (or X, C1) to T), C2
               // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
-              Constant *C3 =
-                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType());
-              if (Context->getConstantExprAnd(C3, AndRHS) == AndRHS)
+              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
                 // trunc(C1)&C2
                 return ReplaceInstUsesWith(I, AndRHS);
             }
@@ -4103,17 +4200,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         return NV;
   }
 
-  Value *Op0NotVal = dyn_castNotVal(Op0, Context);
-  Value *Op1NotVal = dyn_castNotVal(Op1, Context);
+  Value *Op0NotVal = dyn_castNotVal(Op0);
+  Value *Op1NotVal = dyn_castNotVal(Op1);
 
   if (Op0NotVal == Op1 || Op1NotVal == Op0)  // A & ~A  == ~A & A == 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // (~A & ~B) == (~(A | B)) - De Morgan's Law
   if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-    Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal,
-                                               I.getName()+".demorgan");
-    InsertNewInstBefore(Or, I);
+    Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+                                  I.getName()+".demorgan");
     return BinaryOperator::CreateNot(Or);
   }
   
@@ -4159,11 +4255,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         cast<BinaryOperator>(Op1)->swapOperands();
         std::swap(A, B);
       }
-      if (A == Op0) {                                // A&(A^B) -> A & ~B
-        Instruction *NotB = BinaryOperator::CreateNot(B, "tmp");
-        InsertNewInstBefore(NotB, I);
-        return BinaryOperator::CreateAnd(A, NotB);
-      }
+      if (A == Op0)                                // A&(A^B) -> A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
     }
 
     // (A&((~A)|B)) -> A&B
@@ -4177,7 +4270,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
     // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
       return R;
 
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
@@ -4190,16 +4283,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
         const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+        if (SrcTy == Op1C->getOperand(0)->getType() &&
+            SrcTy->isIntOrIntVector() &&
             // Only do this if the casts both really cause code to be generated.
             ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
                               I.getType(), TD) &&
             ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                               I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0),
-                                                         Op1C->getOperand(0),
-                                                         I.getName());
-          InsertNewInstBefore(NewOp, I);
+          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
       }
@@ -4210,10 +4302,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
-        Instruction *NewOp =
-          InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0),
-                                                        SI1->getOperand(0),
-                                                        SI0->getName()), I);
+        Value *NewOp =
+          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+                             SI0->getName());
         return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                       SI1->getOperand(1));
       }
@@ -4221,66 +4312,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   // If and'ing two fcmp, try combine them into one.
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
-      if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
-          RHS->getPredicate() == FCmpInst::FCMP_ORD) {
-        // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
-        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-            // If either of the constants are nans, then the whole thing returns
-            // false.
-            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-              return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
-            return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0),
-                                RHS->getOperand(0));
-          }
-      } else {
-        Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS;
-        FCmpInst::Predicate Op0CC, Op1CC;
-        if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) &&
-            match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) {
-          if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-            // Swap RHS operands to match LHS.
-            Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-            std::swap(Op1LHS, Op1RHS);
-          }
-          if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
-            // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
-            if (Op0CC == Op1CC)
-              return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-            else if (Op0CC == FCmpInst::FCMP_FALSE ||
-                     Op1CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
-            else if (Op0CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, Op1);
-            else if (Op1CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, Op0);
-            bool Op0Ordered;
-            bool Op1Ordered;
-            unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
-            unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
-            if (Op1Pred == 0) {
-              std::swap(Op0, Op1);
-              std::swap(Op0Pred, Op1Pred);
-              std::swap(Op0Ordered, Op1Ordered);
-            }
-            if (Op0Pred == 0) {
-              // uno && ueq -> uno && (uno || eq) -> ueq
-              // ord && olt -> ord && (ord && lt) -> olt
-              if (Op0Ordered == Op1Ordered)
-                return ReplaceInstUsesWith(I, Op1);
-              // uno && oeq -> uno && (ord && eq) -> false
-              // uno && ord -> false
-              if (!Op0Ordered)
-                return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
-              // ord && ueq -> ord && (uno || eq) -> oeq
-              return cast<Instruction>(getFCmpValue(true, Op1Pred,
-                                                    Op0LHS, Op0RHS, Context));
-            }
-          }
-        }
-      }
-    }
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
+        return Res;
   }
 
   return Changed ? &I : 0;
@@ -4450,7 +4484,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
 /// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
 /// we can simplify this expression to "cond ? C : D or B".
 static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
-                                         Value *C, Value *D) {
+                                         Value *C, Value *D,
+                                         LLVMContext *Context) {
   // If A is not a select of -1/0, this cannot match.
   Value *Cond = 0;
   if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
@@ -4477,8 +4512,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   ICmpInst::Predicate LHSCC, RHSCC;
   
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
+  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
+             m_ConstantInt(LHSCst))) ||
+      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
+             m_ConstantInt(RHSCst))))
     return 0;
   
   // From here on, we only handle:
@@ -4520,18 +4557,16 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   assert(LHSCst != RHSCst && "Compares not folded above?");
 
   switch (LHSCC) {
-  default: assert(0 && "Unknown integer condition code!");
+  default: llvm_unreachable("Unknown integer condition code!");
   case ICmpInst::ICMP_EQ:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst, Context)) {
+      if (LHSCst == SubOne(RHSCst)) {
         // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = Context->getConstantExprNeg(LHSCst);
-        Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
-                                                     Val->getName()+".off");
-        InsertNewInstBefore(Add, I);
-        AddCST = Context->getConstantExprSub(AddOne(RHSCst, Context), LHSCst);
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
         return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
@@ -4546,7 +4581,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_NE:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
     case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
     case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
@@ -4554,12 +4589,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
     case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
     }
     break;
   case ICmpInst::ICMP_ULT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
       break;
     case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
@@ -4567,7 +4602,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // this can cause overflow.
       if (RHSCst->isMaxValue(false))
         return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context),
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
                              false, false, I);
     case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
       break;
@@ -4580,7 +4615,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_SLT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
       break;
     case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
@@ -4588,7 +4623,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // this can cause overflow.
       if (RHSCst->isMaxValue(true))
         return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context),
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
                              true, false, I);
     case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
       break;
@@ -4601,7 +4636,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_UGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
     case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
       return ReplaceInstUsesWith(I, LHS);
@@ -4609,14 +4644,14 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       break;
     case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
     case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
       break;
     }
     break;
   case ICmpInst::ICMP_SGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
     case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
       return ReplaceInstUsesWith(I, LHS);
@@ -4624,7 +4659,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       break;
     case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
     case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
     case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
       break;
     }
@@ -4633,6 +4668,72 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   return 0;
 }
 
+Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
+                                         FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // true.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+        
+        // Otherwise, no need to compare the two constants, compare the
+        // rest.
+        return new FCmpInst(FCmpInst::FCMP_UNO,
+                            LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp uno x,x" is "fcmp uno x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return new FCmpInst(FCmpInst::FCMP_UNO,
+                          LHS->getOperand(0), RHS->getOperand(0));
+    
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return new FCmpInst((FCmpInst::Predicate)Op0CC,
+                          Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+    if (Op0CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, RHS);
+    if (Op1CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, LHS);
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op0Ordered == Op1Ordered) {
+      // If both are ordered or unordered, return a new fcmp with
+      // or'ed predicates.
+      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
+                               Op0LHS, Op0RHS, Context);
+      if (Instruction *I = dyn_cast<Instruction>(RV))
+        return I;
+      // Otherwise, it's a constant boolean value...
+      return ReplaceInstUsesWith(I, RV);
+    }
+  }
+  return 0;
+}
+
 /// FoldOrWithConstants - This helper function folds:
 ///
 ///     ((A | B) & C1) | (B & C2)
@@ -4655,8 +4756,7 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
   if (!Xor.isAllOnesValue()) return 0;
 
   if (V1 == A || V1 == B) {
-    Instruction *NewOp =
-      InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I);
+    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
     return BinaryOperator::CreateOr(NewOp, V1);
   }
 
@@ -4668,7 +4768,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op1))                       // X | undef -> -1
-    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
   // or X, X = X
   if (Op0 == Op1)
@@ -4691,21 +4791,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     ConstantInt *C1 = 0; Value *X = 0;
     // (X & C1) | C2 --> (X | C2) & (C1|C2)
-    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
-      InsertNewInstBefore(Or, I);
+    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+        isOnlyUse(Op0)) {
+      Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
       return BinaryOperator::CreateAnd(Or, 
-               Context->getConstantInt(RHS->getValue() | C1->getValue()));
+               ConstantInt::get(*Context, RHS->getValue() | C1->getValue()));
     }
 
     // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
-    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
-      InsertNewInstBefore(Or, I);
+    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+        isOnlyUse(Op0)) {
+      Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
       return BinaryOperator::CreateXor(Or,
-                 Context->getConstantInt(C1->getValue() & ~RHS->getValue()));
+                 ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue()));
     }
 
     // Try to fold constant and into select arguments.
@@ -4738,19 +4838,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   }
   
   // (X^C)|Y -> (X|Y)^C iff Y&C == 0
-  if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+  if (Op0->hasOneUse() &&
+      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
       MaskedValueIsZero(Op1, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::CreateOr(A, Op1);
-    InsertNewInstBefore(NOr, I);
+    Value *NOr = Builder->CreateOr(A, Op1);
     NOr->takeName(Op0);
     return BinaryOperator::CreateXor(NOr, C1);
   }
 
   // Y|(X^C) -> (X|Y)^C iff Y&C == 0
-  if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+  if (Op1->hasOneUse() &&
+      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
       MaskedValueIsZero(Op0, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::CreateOr(A, Op0);
-    InsertNewInstBefore(NOr, I);
+    Value *NOr = Builder->CreateOr(A, Op0);
     NOr->takeName(Op0);
     return BinaryOperator::CreateXor(NOr, C1);
   }
@@ -4801,20 +4901,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         V1 = C, V2 = A, V3 = B;
       
       if (V1) {
-        Value *Or =
-          InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I);
+        Value *Or = Builder->CreateOr(V2, V3, "tmp");
         return BinaryOperator::CreateAnd(V1, Or);
       }
     }
 
     // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
-    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context))
       return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context))
       return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context))
       return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context))
       return Match;
 
     // ((A&~B)|(~A&B)) -> A^B
@@ -4841,10 +4940,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
-        Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0),
-                                                     SI1->getOperand(0),
-                                                     SI0->getName()), I);
+        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+                                         SI0->getName());
         return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                       SI1->getOperand(1));
       }
@@ -4865,26 +4962,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
     if (A == Op1)   // ~A | A == -1
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
   } else {
     A = 0;
   }
   // Note, A is still live here!
   if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
     if (Op0 == B)
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
     // (~A | ~B) == (~(A & B)) - De Morgan's Law
     if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-      Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B,
-                                              I.getName()+".demorgan"), I);
+      Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");
       return BinaryOperator::CreateNot(And);
     }
   }
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
       return R;
 
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
@@ -4899,17 +4995,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
             !isa<ICmpInst>(Op1C->getOperand(0))) {
           const Type *SrcTy = Op0C->getOperand(0)->getType();
-          if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+          if (SrcTy == Op1C->getOperand(0)->getType() &&
+              SrcTy->isIntOrIntVector() &&
               // Only do this if the casts both really cause code to be
               // generated.
               ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
                                 I.getType(), TD) &&
               ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                                 I.getType(), TD)) {
-            Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0),
-                                                          Op1C->getOperand(0),
-                                                          I.getName());
-            InsertNewInstBefore(NewOp, I);
+            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
+                                             Op1C->getOperand(0), I.getName());
             return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
           }
         }
@@ -4919,61 +5014,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     
   // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
-      if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-          RHS->getPredicate() == FCmpInst::FCMP_UNO && 
-          LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
-        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-            // If either of the constants are nans, then the whole thing returns
-            // true.
-            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-              return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-            
-            // Otherwise, no need to compare the two constants, compare the
-            // rest.
-            return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0),
-                                RHS->getOperand(0));
-          }
-      } else {
-        Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS;
-        FCmpInst::Predicate Op0CC, Op1CC;
-        if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) &&
-            match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) {
-          if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-            // Swap RHS operands to match LHS.
-            Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-            std::swap(Op1LHS, Op1RHS);
-          }
-          if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
-            // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
-            if (Op0CC == Op1CC)
-              return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-            else if (Op0CC == FCmpInst::FCMP_TRUE ||
-                     Op1CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-            else if (Op0CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, Op1);
-            else if (Op1CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, Op0);
-            bool Op0Ordered;
-            bool Op1Ordered;
-            unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
-            unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
-            if (Op0Ordered == Op1Ordered) {
-              // If both are ordered or unordered, return a new fcmp with
-              // or'ed predicates.
-              Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
-                                       Op0LHS, Op0RHS, Context);
-              if (Instruction *I = dyn_cast<Instruction>(RV))
-                return I;
-              // Otherwise, it's a constant boolean value...
-              return ReplaceInstUsesWith(I, RV);
-            }
-          }
-        }
-      }
-    }
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
+        return Res;
   }
 
   return Changed ? &I : 0;
@@ -5001,14 +5044,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (isa<UndefValue>(Op0))
       // Handle undef ^ undef -> 0 special case. This is a common
       // idiom (misuse).
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
     return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
   }
 
   // xor X, X = 0, even if X is nested in a sequence of Xor's.
-  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1), Context)) {
+  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
     assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
   
   // See if we can simplify any instructions used by the instruction whose sole 
@@ -5020,22 +5063,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
   // Is this a ~ operation?
-  if (Value *NotOp = dyn_castNotVal(&I, Context)) {
+  if (Value *NotOp = dyn_castNotVal(&I)) {
     // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
     // ~(~X | Y) === (X & ~Y) - De Morgan's Law
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
       if (Op0I->getOpcode() == Instruction::And || 
           Op0I->getOpcode() == Instruction::Or) {
-        if (dyn_castNotVal(Op0I->getOperand(1), Context)) Op0I->swapOperands();
-        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0), Context)) {
-          Instruction *NotY =
-            BinaryOperator::CreateNot(Op0I->getOperand(1),
-                                      Op0I->getOperand(1)->getName()+".not");
-          InsertNewInstBefore(NotY, I);
+        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1),
+                               Op0I->getOperand(1)->getName()+".not");
           if (Op0I->getOpcode() == Instruction::And)
             return BinaryOperator::CreateOr(Op0NotVal, NotY);
-          else
-            return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
         }
       }
     }
@@ -5043,7 +5084,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS == Context->getConstantIntTrue() && Op0->hasOneUse()) {
+    if (RHS->isOne() && Op0->hasOneUse()) {
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
         return new ICmpInst(ICI->getInversePredicate(),
@@ -5059,16 +5100,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
         if (CI->hasOneUse() && Op0C->hasOneUse()) {
           Instruction::CastOps Opcode = Op0C->getOpcode();
-          if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
-            if (RHS == Context->getConstantExprCast(Opcode, 
-                                             Context->getConstantIntTrue(),
-                                             Op0C->getDestTy())) {
-              Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
-                                     CI->getOpcode(), CI->getInversePredicate(),
-                                     CI->getOperand(0), CI->getOperand(1)), I);
-              NewCI->takeName(CI);
-              return CastInst::Create(Opcode, NewCI, Op0C->getType());
-            }
+          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+              (RHS == ConstantExpr::getCast(Opcode, 
+                                            ConstantInt::getTrue(*Context),
+                                            Op0C->getDestTy()))) {
+            CI->setPredicate(CI->getInversePredicate());
+            return CastInst::Create(Opcode, CI, Op0C->getType());
           }
         }
       }
@@ -5078,9 +5115,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       // ~(c-X) == X-c-1 == X+(-c-1)
       if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
         if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
-          Constant *NegOp0I0C = Context->getConstantExprNeg(Op0I0C);
-          Constant *ConstantRHS = Context->getConstantExprSub(NegOp0I0C,
-                                      Context->getConstantInt(I.getType(), 1));
+          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+                                      ConstantInt::get(I.getType(), 1));
           return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
         }
           
@@ -5088,28 +5125,28 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
           if (RHS->isAllOnesValue()) {
-            Constant *NegOp0CI = Context->getConstantExprNeg(Op0CI);
+            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
             return BinaryOperator::CreateSub(
-                           Context->getConstantExprSub(NegOp0CI,
-                                      Context->getConstantInt(I.getType(), 1)),
+                           ConstantExpr::getSub(NegOp0CI,
+                                      ConstantInt::get(I.getType(), 1)),
                                       Op0I->getOperand(0));
           } else if (RHS->getValue().isSignBit()) {
             // (X + C) ^ signbit -> (X + C + signbit)
-            Constant *C =
-                   Context->getConstantInt(RHS->getValue() + Op0CI->getValue());
+            Constant *C = ConstantInt::get(*Context,
+                                           RHS->getValue() + Op0CI->getValue());
             return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
 
           }
         } else if (Op0I->getOpcode() == Instruction::Or) {
           // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
           if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
-            Constant *NewRHS = Context->getConstantExprOr(Op0CI, RHS);
+            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
-            Constant *CommonBits = Context->getConstantExprAnd(Op0CI, RHS);
-            NewRHS = Context->getConstantExprAnd(NewRHS, 
-                                       Context->getConstantExprNot(CommonBits));
-            AddToWorkList(Op0I);
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+            NewRHS = ConstantExpr::getAnd(NewRHS, 
+                                       ConstantExpr::getNot(CommonBits));
+            Worklist.Add(Op0I);
             I.setOperand(0, Op0I->getOperand(0));
             I.setOperand(1, NewRHS);
             return &I;
@@ -5127,13 +5164,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *X = dyn_castNotVal(Op0, Context))   // ~A ^ A == -1
+  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
     if (X == Op1)
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
-  if (Value *X = dyn_castNotVal(Op1, Context))   // A ^ ~A == -1
+  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
     if (X == Op0)
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
   
   BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
@@ -5152,7 +5189,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
     } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
       return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
-    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+               Op1I->hasOneUse()){
       if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
         Op1I->swapOperands();
         std::swap(A, B);
@@ -5167,26 +5205,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
   if (Op0I) {
     Value *A, *B;
-    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) {
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        Op0I->hasOneUse()) {
       if (A == Op1)                                  // (B|A)^B == (A|B)^B
         std::swap(A, B);
-      if (B == Op1) {                                // (A|B)^B == A & ~B
-        Instruction *NotB =
-          InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I);
-        return BinaryOperator::CreateAnd(A, NotB);
-      }
+      if (B == Op1)                                  // (A|B)^B == A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
     } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
       return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
     } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
       return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
-    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+               Op0I->hasOneUse()){
       if (A == Op1)                                        // (A&B)^A -> (B&A)^A
         std::swap(A, B);
       if (B == Op1 &&                                      // (B&A)^A == ~B & A
           !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
-        Instruction *N =
-          InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I);
-        return BinaryOperator::CreateAnd(N, Op1);
+        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
       }
     }
   }
@@ -5196,10 +5231,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       Op0I->getOpcode() == Op1I->getOpcode() && 
       Op0I->getOperand(1) == Op1I->getOperand(1) &&
       (Op1I->hasOneUse() || Op1I->hasOneUse())) {
-    Instruction *NewOp =
-      InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0),
-                                                    Op1I->getOperand(0),
-                                                    Op0I->getName()), I);
+    Value *NewOp =
+      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+                         Op0I->getName());
     return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
                                   Op1I->getOperand(1));
   }
@@ -5235,8 +5269,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         X = B, Y = A, Z = C;
       
       if (X) {
-        Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I);
+        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
         return BinaryOperator::CreateAnd(NewOp, X);
       }
     }
@@ -5244,7 +5277,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     
   // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
       return R;
 
   // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
@@ -5258,10 +5291,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                               I.getType(), TD) &&
             ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                               I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0),
-                                                         Op1C->getOperand(0),
-                                                         I.getName());
-          InsertNewInstBefore(NewOp, I);
+          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
       }
@@ -5271,8 +5302,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
 }
 
 static ConstantInt *ExtractElement(Constant *V, Constant *Idx,
-                                   LLVMContext* Context) {
-  return cast<ConstantInt>(Context->getConstantExprExtractElement(V, Idx));
+                                   LLVMContext *Context) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
 }
 
 static bool HasAddOverflow(ConstantInt *Result,
@@ -5290,13 +5321,13 @@ static bool HasAddOverflow(ConstantInt *Result,
 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
 /// overflowed for this type.
 static bool AddWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, LLVMContext* Context,
+                            Constant *In2, LLVMContext *Context,
                             bool IsSigned = false) {
-  Result = Context->getConstantExprAdd(In1, In2);
+  Result = ConstantExpr::getAdd(In1, In2);
 
   if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i);
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);
       if (HasAddOverflow(ExtractElement(Result, Idx, Context),
                          ExtractElement(In1, Idx, Context),
                          ExtractElement(In2, Idx, Context),
@@ -5326,13 +5357,13 @@ static bool HasSubOverflow(ConstantInt *Result,
 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
 /// overflowed for this type.
 static bool SubWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, LLVMContext* Context,
+                            Constant *In2, LLVMContext *Context,
                             bool IsSigned = false) {
-  Result = Context->getConstantExprSub(In1, In2);
+  Result = ConstantExpr::getSub(In1, In2);
 
   if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i);
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);
       if (HasSubOverflow(ExtractElement(Result, Idx, Context),
                          ExtractElement(In1, Idx, Context),
                          ExtractElement(In2, Idx, Context),
@@ -5351,11 +5382,10 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1,
 /// code necessary to compute the offset from the base pointer (without adding
 /// in the base pointer).  Return the result as a signed integer of intptr size.
 static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
-  TargetData &TD = IC.getTargetData();
+  TargetData &TD = *IC.getTargetData();
   gep_type_iterator GTI = gep_type_begin(GEP);
-  const Type *IntPtrTy = TD.getIntPtrType();
-  LLVMContext* Context = IC.getContext();
-  Value *Result = Context->getNullValue(IntPtrTy);
+  const Type *IntPtrTy = TD.getIntPtrType(I.getContext());
+  Value *Result = Constant::getNullValue(IntPtrTy);
 
   // Build a mask for high order bits.
   unsigned IntPtrWidth = TD.getPointerSizeInBits();
@@ -5372,74 +5402,49 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
       if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
         Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
         
-        if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
-          Result = 
-             Context->getConstantInt(RC->getValue() + APInt(IntPtrWidth, Size));
-        else
-          Result = IC.InsertNewInstBefore(
-                   BinaryOperator::CreateAdd(Result,
-                                        Context->getConstantInt(IntPtrTy, Size),
-                                             GEP->getName()+".offs"), I);
+        Result = IC.Builder->CreateAdd(Result,
+                                       ConstantInt::get(IntPtrTy, Size),
+                                       GEP->getName()+".offs");
         continue;
       }
       
-      Constant *Scale = Context->getConstantInt(IntPtrTy, Size);
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
       Constant *OC =
-              Context->getConstantExprIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = Context->getConstantExprMul(OC, Scale);
-      if (Constant *RC = dyn_cast<Constant>(Result))
-        Result = Context->getConstantExprAdd(RC, Scale);
-      else {
-        // Emit an add instruction.
-        Result = IC.InsertNewInstBefore(
-           BinaryOperator::CreateAdd(Result, Scale,
-                                     GEP->getName()+".offs"), I);
-      }
+              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      // Emit an add instruction.
+      Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
       continue;
     }
     // Convert to correct type.
-    if (Op->getType() != IntPtrTy) {
-      if (Constant *OpC = dyn_cast<Constant>(Op))
-        Op = Context->getConstantExprIntegerCast(OpC, IntPtrTy, true);
-      else
-        Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy,
-                                                                true,
-                                                      Op->getName()+".c"), I);
-    }
+    if (Op->getType() != IntPtrTy)
+      Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
     if (Size != 1) {
-      Constant *Scale = Context->getConstantInt(IntPtrTy, Size);
-      if (Constant *OpC = dyn_cast<Constant>(Op))
-        Op = Context->getConstantExprMul(OpC, Scale);
-      else    // We'll let instcombine(mul) convert this to a shl if possible.
-        Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
-                                                  GEP->getName()+".idx"), I);
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      // We'll let instcombine(mul) convert this to a shl if possible.
+      Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
     }
 
     // Emit an add instruction.
-    if (isa<Constant>(Op) && isa<Constant>(Result))
-      Result = Context->getConstantExprAdd(cast<Constant>(Op),
-                                    cast<Constant>(Result));
-    else
-      Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
-                                                  GEP->getName()+".offs"), I);
+    Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
   }
   return Result;
 }
 
 
-/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
-/// the *offset* implied by GEP to zero.  For example, if we have &A[i], we want
-/// to return 'i' for "icmp ne i, 0".  Note that, in general, indices can be
-/// complex, and scales are involved.  The above expression would also be legal
-/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).  This
-/// later form is less amenable to optimization though, and we are allowed to
-/// generate the first by knowing that pointer arithmetic doesn't overflow.
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
+/// be complex, and scales are involved.  The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
 ///
 /// If we can't emit an optimized form for this expression, this returns null.
 /// 
 static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
                                           InstCombiner &IC) {
-  TargetData &TD = IC.getTargetData();
+  TargetData &TD = *IC.getTargetData();
   gep_type_iterator GTI = gep_type_begin(GEP);
 
   // Check to see if this gep only has a single variable index.  If so, and if
@@ -5502,8 +5507,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
     // we don't need to bother extending: the extension won't affect where the
     // computation crosses zero.
     if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
-      VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
-                                  VariableIdx->getNameStart(), &I);
+      VariableIdx = new TruncInst(VariableIdx, 
+                                  TD.getIntPtrType(VariableIdx->getContext()),
+                                  VariableIdx->getName(), &I);
     return VariableIdx;
   }
   
@@ -5523,40 +5529,39 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
     return 0;
 
   // Okay, we can do this evaluation.  Start by converting the index to intptr.
-  const Type *IntPtrTy = TD.getIntPtrType();
+  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
   if (VariableIdx->getType() != IntPtrTy)
     VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
                                               true /*SExt*/, 
-                                              VariableIdx->getNameStart(), &I);
-  Constant *OffsetVal = IC.getContext()->getConstantInt(IntPtrTy, NewOffs);
+                                              VariableIdx->getName(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
   return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
 }
 
 
 /// FoldGEPICmp - Fold comparisons between a GEP instruction and something
 /// else.  At this point we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                                        ICmpInst::Predicate Cond,
                                        Instruction &I) {
-  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
-
   // Look through bitcasts.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
     RHS = BCI->getOperand(0);
 
   Value *PtrBase = GEPLHS->getOperand(0);
-  if (PtrBase == RHS) {
+  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
     // This transformation (ignoring the base and scales) is valid because we
-    // know pointers can't overflow.  See if we can output an optimized form.
+    // know pointers can't overflow since the gep is inbounds.  See if we can
+    // output an optimized form.
     Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
     
     // If not, synthesize the offset the hard way.
     if (Offset == 0)
       Offset = EmitGEPOffset(GEPLHS, I, *this);
     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
-                        Context->getNullValue(Offset->getType()));
-  } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
+                        Constant::getNullValue(Offset->getType()));
+  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
     // If the base pointers are different, but the indices are the same, just
     // compare the base pointer.
     if (PtrBase != GEPRHS->getOperand(0)) {
@@ -5572,7 +5577,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
       // If all indices are the same, just compare the base pointers.
       if (IndicesTheSame)
-        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), 
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
                             GEPLHS->getOperand(0), GEPRHS->getOperand(0));
 
       // Otherwise, the base pointers are different and the indices are
@@ -5622,7 +5627,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
       if (NumDifferences == 0)   // SAME GEP?
         return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                   Context->getConstantInt(Type::Int1Ty,
+                                   ConstantInt::get(Type::getInt1Ty(*Context),
                                              ICmpInst::isTrueWhenEqual(Cond)));
 
       else if (NumDifferences == 1) {
@@ -5635,7 +5640,8 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
     // Only lower this if the icmp is the only user of the GEP or if we expect
     // the result to fold to a constant!
-    if ((isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+    if (TD &&
+        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
       Value *L = EmitGEPOffset(GEPLHS, I, *this);
@@ -5680,7 +5686,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   
   ICmpInst::Predicate Pred;
   switch (I.getPredicate()) {
-  default: assert(0 && "Unexpected predicate!");
+  default: llvm_unreachable("Unexpected predicate!");
   case FCmpInst::FCMP_UEQ:
   case FCmpInst::FCMP_OEQ:
     Pred = ICmpInst::ICMP_EQ;
@@ -5706,9 +5712,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     Pred = ICmpInst::ICMP_NE;
     break;
   case FCmpInst::FCMP_ORD:
-    return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+    return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
   case FCmpInst::FCMP_UNO:
-    return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+    return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
   }
   
   const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
@@ -5728,8 +5734,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
           Pred == ICmpInst::ICMP_SLE)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     }
   } else {
     // If the RHS value is > UnsignedMax, fold the comparison. This handles
@@ -5740,8 +5746,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
           Pred == ICmpInst::ICMP_ULE)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     }
   }
   
@@ -5753,8 +5759,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
           Pred == ICmpInst::ICMP_SGE)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     }
   }
 
@@ -5763,27 +5769,27 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   // casting the FP value to the integer value and back, checking for equality.
   // Don't do this for zero, because -0.0 is not fractional.
   Constant *RHSInt = LHSUnsigned
-    ? Context->getConstantExprFPToUI(RHSC, IntTy)
-    : Context->getConstantExprFPToSI(RHSC, IntTy);
+    ? ConstantExpr::getFPToUI(RHSC, IntTy)
+    : ConstantExpr::getFPToSI(RHSC, IntTy);
   if (!RHS.isZero()) {
     bool Equal = LHSUnsigned
-      ? Context->getConstantExprUIToFP(RHSInt, RHSC->getType()) == RHSC
-      : Context->getConstantExprSIToFP(RHSInt, RHSC->getType()) == RHSC;
+      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
     if (!Equal) {
       // If we had a comparison against a fractional value, we have to adjust
       // the compare predicate and sometimes the value.  RHSC is rounded towards
       // zero at this point.
       switch (Pred) {
-      default: assert(0 && "Unexpected integer comparison!");
+      default: llvm_unreachable("Unexpected integer comparison!");
       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       case ICmpInst::ICMP_ULE:
         // (float)int <= 4.4   --> int <= 4
         // (float)int <= -4.4  --> false
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
         break;
       case ICmpInst::ICMP_SLE:
         // (float)int <= 4.4   --> int <= 4
@@ -5795,7 +5801,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int < -4.4   --> false
         // (float)int < 4.4    --> int <= 4
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
         Pred = ICmpInst::ICMP_ULE;
         break;
       case ICmpInst::ICMP_SLT:
@@ -5808,7 +5814,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int > 4.4    --> int > 4
         // (float)int > -4.4   --> true
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
         break;
       case ICmpInst::ICMP_SGT:
         // (float)int > 4.4    --> int > 4
@@ -5820,7 +5826,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int >= -4.4   --> true
         // (float)int >= 4.4    --> int > 4
         if (!RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
         Pred = ICmpInst::ICMP_UGT;
         break;
       case ICmpInst::ICMP_SGE:
@@ -5844,22 +5850,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   // Fold trivial predicates.
   if (I.getPredicate() == FCmpInst::FCMP_FALSE)
-    return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
   if (I.getPredicate() == FCmpInst::FCMP_TRUE)
-    return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
   
   // Simplify 'fcmp pred X, X'
   if (Op0 == Op1) {
     switch (I.getPredicate()) {
-    default: assert(0 && "Unknown predicate!");
+    default: llvm_unreachable("Unknown predicate!");
     case FCmpInst::FCMP_UEQ:    // True if unordered or equal
     case FCmpInst::FCMP_UGE:    // True if unordered, greater than, or equal
     case FCmpInst::FCMP_ULE:    // True if unordered, less than, or equal
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
     case FCmpInst::FCMP_OGT:    // True if ordered and greater than
     case FCmpInst::FCMP_OLT:    // True if ordered and less than
     case FCmpInst::FCMP_ONE:    // True if ordered and operands are unequal
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
       
     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
     case FCmpInst::FCMP_ULT:    // True if unordered or less than
@@ -5867,7 +5873,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
       // Canonicalize these to be 'fcmp uno %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_UNO);
-      I.setOperand(1, Context->getNullValue(Op0->getType()));
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
       return &I;
       
     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
@@ -5876,13 +5882,13 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
       // Canonicalize these to be 'fcmp ord %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_ORD);
-      I.setOperand(1, Context->getNullValue(Op0->getType()));
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
       return &I;
     }
   }
     
   if (isa<UndefValue>(Op1))                  // fcmp pred X, undef -> undef
-    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty));
+    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
 
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
@@ -5890,11 +5896,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
       if (CFP->getValueAPF().isNaN()) {
         if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and...
-          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
         assert(FCmpInst::isUnordered(I.getPredicate()) &&
                "Comparison must be either ordered or unordered!");
         // True if unordered.
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       }
     }
     
@@ -5905,7 +5911,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         // block.  If in the same block, we're encouraging jump threading.  If
         // not, we are just pessimizing the code by making an i1 phi.
         if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
             return NV;
         break;
       case Instruction::SIToFP:
@@ -5921,18 +5927,16 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         if (LHSI->hasOneUse()) {
           if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
             // Fold the known value into the constant operand.
-            Op1 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC);
+            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
             // Insert a new FCmp of the other select operand.
-            Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
-                                                      LHSI->getOperand(2), RHSC,
-                                                      I.getName()), I);
+            Op2 = Builder->CreateFCmp(I.getPredicate(),
+                                      LHSI->getOperand(2), RHSC, I.getName());
           } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
             // Fold the known value into the constant operand.
-            Op2 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC);
+            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
             // Insert a new FCmp of the other select operand.
-            Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
-                                                      LHSI->getOperand(1), RHSC,
-                                                      I.getName()), I);
+            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
           }
         }
 
@@ -5952,28 +5956,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
   // icmp X, X
   if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, 
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
                                                    I.isTrueWhenEqual()));
 
   if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
-    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty));
+    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
   
   // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
   // addresses never equal each other!  We already know that Op0 != Op1.
-  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
+  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || 
        isa<ConstantPointerNull>(Op0)) &&
-      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
+      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || 
        isa<ConstantPointerNull>(Op1)))
-    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, 
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), 
                                                    !I.isTrueWhenEqual()));
 
   // icmp's with boolean values can always be turned into bitwise operations
-  if (Ty == Type::Int1Ty) {
+  if (Ty == Type::getInt1Ty(*Context)) {
     switch (I.getPredicate()) {
-    default: assert(0 && "Invalid icmp instruction!");
+    default: llvm_unreachable("Invalid icmp instruction!");
     case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
-      Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
-      InsertNewInstBefore(Xor, I);
+      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
       return BinaryOperator::CreateNot(Xor);
     }
     case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B
@@ -5983,32 +5986,28 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
       // FALL THROUGH
     case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B
-      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
       return BinaryOperator::CreateAnd(Not, Op1);
     }
     case ICmpInst::ICMP_SGT:
       std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
       // FALL THROUGH
     case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
-      Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
       return BinaryOperator::CreateAnd(Not, Op0);
     }
     case ICmpInst::ICMP_UGE:
       std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
       // FALL THROUGH
     case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B
-      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
       return BinaryOperator::CreateOr(Not, Op1);
     }
     case ICmpInst::ICMP_SGE:
       std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
       // FALL THROUGH
     case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B
-      Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
       return BinaryOperator::CreateOr(Not, Op0);
     }
     }
@@ -6040,20 +6039,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     default: break;
     case ICmpInst::ICMP_ULE:
       if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+                          AddOne(CI));
     case ICmpInst::ICMP_SLE:
       if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                          AddOne(CI));
     case ICmpInst::ICMP_UGE:
       if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+                          SubOne(CI));
     case ICmpInst::ICMP_SGE:
       if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                          SubOne(CI));
     }
     
     // If this comparison is a normal comparison, it demands all
@@ -6100,110 +6103,114 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // that code below can assume that Min != Max.
     if (!isa<Constant>(Op0) && Op0Min == Op0Max)
       return new ICmpInst(I.getPredicate(),
-                          Context->getConstantInt(Op0Min), Op1);
+                          ConstantInt::get(*Context, Op0Min), Op1);
     if (!isa<Constant>(Op1) && Op1Min == Op1Max)
-      return new ICmpInst(I.getPredicate(), Op0, 
-                          Context->getConstantInt(Op1Min));
+      return new ICmpInst(I.getPredicate(), Op0,
+                          ConstantInt::get(*Context, Op1Min));
 
     // Based on the range information we know about the LHS, see if we can
     // simplify this comparison.  For example, (x&4) < 8  is always true.
     switch (I.getPredicate()) {
-    default: assert(0 && "Unknown icmp opcode!");
+    default: llvm_unreachable("Unknown icmp opcode!");
     case ICmpInst::ICMP_EQ:
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_NE:
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       break;
     case ICmpInst::ICMP_ULT:
       if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              SubOne(CI));
 
         // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
         if (CI->isMinValue(true))
           return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
-                           Context->getConstantIntAllOnesValue(Op0->getType()));
+                           Constant::getAllOnesValue(Op0->getType()));
       }
       break;
     case ICmpInst::ICMP_UGT:
       if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
 
       if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              AddOne(CI));
 
         // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
         if (CI->isMaxValue(true))
           return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
-                              Context->getNullValue(Op0->getType()));
+                              Constant::getNullValue(Op0->getType()));
       }
       break;
     case ICmpInst::ICMP_SLT:
       if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              SubOne(CI));
       }
       break;
     case ICmpInst::ICMP_SGT:
       if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
 
       if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              AddOne(CI));
       }
       break;
     case ICmpInst::ICMP_SGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
       if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_SLE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
       if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_UGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
       if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_ULE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
       if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     }
 
@@ -6255,16 +6262,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             }
           if (isAllZeros)
             return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
-                    Context->getNullValue(LHSI->getOperand(0)->getType()));
+                    Constant::getNullValue(LHSI->getOperand(0)->getType()));
         }
         break;
 
       case Instruction::PHI:
-        // Only fold icmp into the PHI if the phi and fcmp are in the same
+        // Only fold icmp into the PHI if the phi and icmp are in the same
         // block.  If in the same block, we're encouraging jump threading.  If
         // not, we are just pessimizing the code by making an i1 phi.
         if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
             return NV;
         break;
       case Instruction::Select: {
@@ -6275,18 +6282,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         if (LHSI->hasOneUse()) {
           if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
             // Fold the known value into the constant operand.
-            Op1 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC);
+            Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
             // Insert a new ICmp of the other select operand.
-            Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
-                                                   LHSI->getOperand(2), RHSC,
-                                                   I.getName()), I);
+            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+                                      RHSC, I.getName());
           } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
             // Fold the known value into the constant operand.
-            Op2 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC);
+            Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
             // Insert a new ICmp of the other select operand.
-            Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
-                                                   LHSI->getOperand(1), RHSC,
-                                                   I.getName()), I);
+            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
           }
         }
 
@@ -6298,19 +6303,31 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         // If we have (malloc != null), and if the malloc has a single use, we
         // can assume it is successful and remove the malloc.
         if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
-          AddToWorkList(LHSI);
-          return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty,
-                                                         !I.isTrueWhenEqual()));
+          Worklist.Add(LHSI);
+          return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
+        }
+        break;
+      case Instruction::Call:
+        // If we have (malloc != null), and if the malloc has a single use, we
+        // can assume it is successful and remove the malloc.
+        if (isMalloc(LHSI) && LHSI->hasOneUse() &&
+            isa<ConstantPointerNull>(RHSC)) {
+          Worklist.Add(LHSI);
+          return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
         }
         break;
       }
   }
 
   // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
-  if (User *GEP = dyn_castGetElementPtr(Op0))
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
     if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
       return NI;
-  if (User *GEP = dyn_castGetElementPtr(Op1))
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
     if (Instruction *NI = FoldGEPICmp(GEP, Op0,
                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
       return NI;
@@ -6333,10 +6350,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // If Op1 is a constant, we can fold the cast into the constant.
       if (Op0->getType() != Op1->getType()) {
         if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-          Op1 = Context->getConstantExprBitCast(Op1C, Op0->getType());
+          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
         } else {
           // Otherwise, cast the RHS right before the icmp
-          Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
+          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
         }
       }
       return new ICmpInst(I.getPredicate(), Op0, Op1);
@@ -6397,16 +6414,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             // Mask = -1 >> count-trailing-zeros(Cst).
             if (!CI->isZero() && !CI->isOne()) {
               const APInt &AP = CI->getValue();
-              ConstantInt *Mask = Context->getConstantInt(
+              ConstantInt *Mask = ConstantInt::get(*Context, 
                                       APInt::getLowBitsSet(AP.getBitWidth(),
                                                            AP.getBitWidth() -
                                                       AP.countTrailingZeros()));
-              Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0),
-                                                            Mask);
-              Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0),
-                                                            Mask);
-              InsertNewInstBefore(And1, I);
-              InsertNewInstBefore(And2, I);
+              Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
+              Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
               return new ICmpInst(I.getPredicate(), And1, And2);
             }
           }
@@ -6435,7 +6448,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
         Value *OtherVal = A == Op1 ? B : A;
         return new ICmpInst(I.getPredicate(), OtherVal,
-                            Context->getNullValue(A->getType()));
+                            Constant::getNullValue(A->getType()));
       }
 
       if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
@@ -6444,10 +6457,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         if (match(B, m_ConstantInt(C1)) &&
             match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
           Constant *NC = 
-                       Context->getConstantInt(C1->getValue() ^ C2->getValue());
-          Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
-          return new ICmpInst(I.getPredicate(), A,
-                              InsertNewInstBefore(Xor, I));
+                   ConstantInt::get(*Context, C1->getValue() ^ C2->getValue());
+          Value *Xor = Builder->CreateXor(C, NC, "tmp");
+          return new ICmpInst(I.getPredicate(), A, Xor);
         }
         
         // A^B == A^D -> B == D
@@ -6463,18 +6475,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // A == (A^B)  ->  B == 0
       Value *OtherVal = A == Op0 ? B : A;
       return new ICmpInst(I.getPredicate(), OtherVal,
-                          Context->getNullValue(A->getType()));
+                          Constant::getNullValue(A->getType()));
     }
 
     // (A-B) == A  ->  B == 0
     if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
       return new ICmpInst(I.getPredicate(), B, 
-                          Context->getNullValue(B->getType()));
+                          Constant::getNullValue(B->getType()));
 
     // A == (A-B)  ->  B == 0
     if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
       return new ICmpInst(I.getPredicate(), B,
-                          Context->getNullValue(B->getType()));
+                          Constant::getNullValue(B->getType()));
     
     // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
     if (Op0->hasOneUse() && Op1->hasOneUse() &&
@@ -6493,10 +6505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
       
       if (X) {   // Build (X^Y) & Z
-        Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
-        Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
+        Op1 = Builder->CreateXor(X, Y, "tmp");
+        Op1 = Builder->CreateAnd(Op1, Z, "tmp");
         I.setOperand(0, Op1);
-        I.setOperand(1, Context->getNullValue(Op1->getType()));
+        I.setOperand(1, Constant::getNullValue(Op1->getType()));
         return &I;
       }
     }
@@ -6535,13 +6547,13 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
   // C2 (CI). By solving for X we can turn this into a range check 
   // instead of computing a divide. 
-  Constant *Prod = Context->getConstantExprMul(CmpRHS, DivRHS);
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
 
   // Determine if the product overflows by seeing if the product is
   // not equal to the divide. Make sure we do the same kind of divide
   // as in the LHS instruction that we're folding. 
-  bool ProdOV = (DivIsSigned ? Context->getConstantExprSDiv(Prod, DivRHS) :
-                 Context->getConstantExprUDiv(Prod, DivRHS)) != CmpRHS;
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
 
   // Get the ICmp opcode
   ICmpInst::Predicate Pred = ICI.getPredicate();
@@ -6565,8 +6577,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
     if (CmpRHSV == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
-      LoBound = cast<ConstantInt>(Context->getConstantExprNeg(SubOne(DivRHS, 
-                                                                    Context)));
+      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
       HiBound = DivRHS;
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
@@ -6575,11 +6586,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
         HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true);
     } else {                       // (X / pos) op neg
       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
-      HiBound = AddOne(Prod, Context);
+      HiBound = AddOne(Prod);
       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow) {
         ConstantInt* DivNeg =
-                         cast<ConstantInt>(Context->getConstantExprNeg(DivRHS));
+                         cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
         LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context,
                                      true) ? -1 : 0;
        }
@@ -6587,15 +6598,15 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
     if (CmpRHSV == 0) {       // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
-      LoBound = AddOne(DivRHS, Context);
-      HiBound = cast<ConstantInt>(Context->getConstantExprNeg(DivRHS));
+      LoBound = AddOne(DivRHS);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
       if (HiBound == DivRHS) {     // -INTMIN = INTMIN
         HiOverflow = 1;            // [INTMIN+1, overflow)
         HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
       }
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
       // e.g. X/-5 op 3  --> [-19, -14)
-      HiBound = AddOne(Prod, Context);
+      HiBound = AddOne(Prod);
       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow)
         LoOverflow = AddWithOverflow(LoBound, HiBound,
@@ -6613,42 +6624,42 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
 
   Value *X = DivI->getOperand(0);
   switch (Pred) {
-  default: assert(0 && "Unhandled icmp opcode!");
+  default: llvm_unreachable("Unhandled icmp opcode!");
   case ICmpInst::ICMP_EQ:
     if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
     else if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, LoBound);
     else if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, HiBound);
     else
       return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
   case ICmpInst::ICMP_NE:
     if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
     else if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, LoBound);
     else if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, HiBound);
     else
       return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_SLT:
     if (LoOverflow == +1)   // Low bound is greater than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
     if (LoOverflow == -1)   // Low bound is less than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
     return new ICmpInst(Pred, X, LoBound);
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_SGT:
     if (HiOverflow == +1)       // High bound greater than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
     else if (HiOverflow == -1)  // High bound less than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
     if (Pred == ICmpInst::ICMP_UGT)
       return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
     else
@@ -6682,7 +6693,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         NewRHS.zext(SrcBits);
         NewRHS |= KnownOne;
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            Context->getConstantInt(NewRHS));
+                            ConstantInt::get(*Context, NewRHS));
       }
     }
     break;
@@ -6699,7 +6710,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // the operation, just stop using the Xor.
         if (!XorCST->getValue().isNegative()) {
           ICI.setOperand(0, CompareVal);
-          AddToWorkList(LHSI);
+          Worklist.Add(LHSI);
           return &ICI;
         }
         
@@ -6711,10 +6722,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         
         if (isTrueIfPositive)
           return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
-                              SubOne(RHS, Context));
+                              SubOne(RHS));
         else
           return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
-                              AddOne(RHS, Context));
+                              AddOne(RHS));
       }
 
       if (LHSI->hasOneUse()) {
@@ -6725,7 +6736,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          ? ICI.getUnsignedPredicate()
                                          : ICI.getSignedPredicate();
           return new ICmpInst(Pred, LHSI->getOperand(0),
-                              Context->getConstantInt(RHSV ^ SignBit));
+                              ConstantInt::get(*Context, RHSV ^ SignBit));
         }
 
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
@@ -6736,7 +6747,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          : ICI.getSignedPredicate();
           Pred = ICI.getSwappedPredicate(Pred);
           return new ICmpInst(Pred, LHSI->getOperand(0),
-                              Context->getConstantInt(RHSV ^ NotSignBit));
+                              ConstantInt::get(*Context, RHSV ^ NotSignBit));
         }
       }
     }
@@ -6763,12 +6774,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           NewCST.zext(BitWidth);
           APInt NewCI = RHSV;
           NewCI.zext(BitWidth);
-          Instruction *NewAnd = 
-            BinaryOperator::CreateAnd(Cast->getOperand(0),
-                               Context->getConstantInt(NewCST),LHSI->getName());
-          InsertNewInstBefore(NewAnd, ICI);
+          Value *NewAnd = 
+            Builder->CreateAnd(Cast->getOperand(0),
+                           ConstantInt::get(*Context, NewCST), LHSI->getName());
           return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              Context->getConstantInt(NewCI));
+                              ConstantInt::get(*Context, NewCI));
         }
       }
       
@@ -6805,32 +6815,31 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (CanFold) {
           Constant *NewCst;
           if (Shift->getOpcode() == Instruction::Shl)
-            NewCst = Context->getConstantExprLShr(RHS, ShAmt);
+            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
           else
-            NewCst = Context->getConstantExprShl(RHS, ShAmt);
+            NewCst = ConstantExpr::getShl(RHS, ShAmt);
           
           // Check to see if we are shifting out any of the bits being
           // compared.
-          if (Context->getConstantExpr(Shift->getOpcode(),
+          if (ConstantExpr::get(Shift->getOpcode(),
                                        NewCst, ShAmt) != RHS) {
             // If we shifted bits out, the fold is not going to work out.
             // As a special case, check to see if this means that the
             // result is always true or false now.
             if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-              return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
             if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-              return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
           } else {
             ICI.setOperand(1, NewCst);
             Constant *NewAndCST;
             if (Shift->getOpcode() == Instruction::Shl)
-              NewAndCST = Context->getConstantExprLShr(AndCST, ShAmt);
+              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
             else
-              NewAndCST = Context->getConstantExprShl(AndCST, ShAmt);
+              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
             LHSI->setOperand(1, NewAndCST);
             LHSI->setOperand(0, Shift->getOperand(0));
-            AddToWorkList(Shift); // Shift is dead.
-            AddUsesToWorkList(ICI);
+            Worklist.Add(Shift); // Shift is dead.
             return &ICI;
           }
         }
@@ -6845,19 +6854,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Compute C << Y.
         Value *NS;
         if (Shift->getOpcode() == Instruction::LShr) {
-          NS = BinaryOperator::CreateShl(AndCST, 
-                                         Shift->getOperand(1), "tmp");
+          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
         } else {
           // Insert a logical shift.
-          NS = BinaryOperator::CreateLShr(AndCST,
-                                          Shift->getOperand(1), "tmp");
+          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
         }
-        InsertNewInstBefore(cast<Instruction>(NS), ICI);
         
         // Compute X & (C << Y).
-        Instruction *NewAnd = 
-          BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
-        InsertNewInstBefore(NewAnd, ICI);
+        Value *NewAnd = 
+          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
         
         ICI.setOperand(0, NewAnd);
         return &ICI;
@@ -6881,11 +6886,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       // If we are comparing against bits always shifted out, the
       // comparison cannot succeed.
       Constant *Comp =
-        Context->getConstantExprShl(Context->getConstantExprLShr(RHS, ShAmt),
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
                                                                  ShAmt);
       if (Comp != RHS) {// Comparing against a bit that we know is zero.
         bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE);
+        Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);
         return ReplaceInstUsesWith(ICI, Cst);
       }
       
@@ -6893,15 +6898,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
         Constant *Mask =
-          Context->getConstantInt(APInt::getLowBitsSet(TypeBits, 
+          ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, 
                                                        TypeBits-ShAmtVal));
         
-        Instruction *AndI =
-          BinaryOperator::CreateAnd(LHSI->getOperand(0),
-                                    Mask, LHSI->getName()+".mask");
-        Value *And = InsertNewInstBefore(AndI, ICI);
+        Value *And =
+          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
         return new ICmpInst(ICI.getPredicate(), And,
-                            Context->getConstantInt(RHSV.lshr(ShAmtVal)));
+                            ConstantInt::get(*Context, RHSV.lshr(ShAmtVal)));
       }
     }
     
@@ -6910,15 +6913,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     if (LHSI->hasOneUse() &&
         isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
       // (X << 31) <s 0  --> (X&1) != 0
-      Constant *Mask = Context->getConstantInt(APInt(TypeBits, 1) <<
+      Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) <<
                                            (TypeBits-ShAmt->getZExtValue()-1));
-      Instruction *AndI =
-        BinaryOperator::CreateAnd(LHSI->getOperand(0),
-                                  Mask, LHSI->getName()+".mask");
-      Value *And = InsertNewInstBefore(AndI, ICI);
-      
+      Value *And =
+        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
       return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
-                          And, Context->getNullValue(And->getType()));
+                          And, Constant::getNullValue(And->getType()));
     }
     break;
   }
@@ -6948,7 +6948,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     
     if (Comp != RHSV) { // Comparing against a bit that we know is zero.
       bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-      Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE);
+      Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);
       return ReplaceInstUsesWith(ICI, Cst);
     }
     
@@ -6959,20 +6959,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         MaskedValueIsZero(LHSI->getOperand(0), 
                           APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
       return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                          Context->getConstantExprShl(RHS, ShAmt));
+                          ConstantExpr::getShl(RHS, ShAmt));
     }
       
     if (LHSI->hasOneUse()) {
       // Otherwise strength reduce the shift into an and.
       APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-      Constant *Mask = Context->getConstantInt(Val);
+      Constant *Mask = ConstantInt::get(*Context, Val);
       
-      Instruction *AndI =
-        BinaryOperator::CreateAnd(LHSI->getOperand(0),
-                                  Mask, LHSI->getName()+".mask");
-      Value *And = InsertNewInstBefore(AndI, ICI);
+      Value *And = Builder->CreateAnd(LHSI->getOperand(0),
+                                      Mask, LHSI->getName()+".mask");
       return new ICmpInst(ICI.getPredicate(), And,
-                          Context->getConstantExprShl(RHS, ShAmt));
+                          ConstantExpr::getShl(RHS, ShAmt));
     }
     break;
   }
@@ -7005,18 +7003,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       if (ICI.isSignedPredicate()) {
         if (CR.getLower().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getUpper()));
+                              ConstantInt::get(*Context, CR.getUpper()));
         } else if (CR.getUpper().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getLower()));
+                              ConstantInt::get(*Context, CR.getLower()));
         }
       } else {
         if (CR.getLower().isMinValue()) {
           return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getUpper()));
+                              ConstantInt::get(*Context, CR.getUpper()));
         } else if (CR.getUpper().isMinValue()) {
           return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getLower()));
+                              ConstantInt::get(*Context, CR.getLower()));
         }
       }
     }
@@ -7036,12 +7034,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
           const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
           if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
-            Instruction *NewRem =
-              BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1),
-                                         BO->getName());
-            InsertNewInstBefore(NewRem, ICI);
-            return new ICmpInst(ICI.getPredicate(), NewRem, 
-                                Context->getNullValue(BO->getType()));
+            Value *NewRem =
+              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+                                  BO->getName());
+            return new ICmpInst(ICI.getPredicate(), NewRem,
+                                Constant::getNullValue(BO->getType()));
           }
         }
         break;
@@ -7050,19 +7047,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
           if (BO->hasOneUse())
             return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                Context->getConstantExprSub(RHS, BOp1C));
+                                ConstantExpr::getSub(RHS, BOp1C));
         } else if (RHSV == 0) {
           // Replace ((add A, B) != 0) with (A != -B) if A or B is
           // efficiently invertible, or if the add has just this one use.
           Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
           
-          if (Value *NegVal = dyn_castNegVal(BOp1, Context))
+          if (Value *NegVal = dyn_castNegVal(BOp1))
             return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
-          else if (Value *NegVal = dyn_castNegVal(BOp0, Context))
+          else if (Value *NegVal = dyn_castNegVal(BOp0))
             return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
           else if (BO->hasOneUse()) {
-            Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
-            InsertNewInstBefore(Neg, ICI);
+            Value *Neg = Builder->CreateNeg(BOp1);
             Neg->takeName(BO);
             return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
           }
@@ -7073,7 +7069,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // the explicit xor.
         if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
           return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
-                              Context->getConstantExprXor(RHS, BOC));
+                              ConstantExpr::getXor(RHS, BOC));
         
         // FALLTHROUGH
       case Instruction::Sub:
@@ -7087,10 +7083,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // If bits are being or'd in that are not present in the constant we
         // are comparing against, then the comparison could never succeed!
         if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
-          Constant *NotCI = Context->getConstantExprNot(RHS);
-          if (!Context->getConstantExprAnd(BOC, NotCI)->isNullValue())
+          Constant *NotCI = ConstantExpr::getNot(RHS);
+          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
             return ReplaceInstUsesWith(ICI,
-                                       Context->getConstantInt(Type::Int1Ty, 
+                                       ConstantInt::get(Type::getInt1Ty(*Context), 
                                        isICMP_NE));
         }
         break;
@@ -7101,19 +7097,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           // comparison can never succeed!
           if ((RHSV & ~BOC->getValue()) != 0)
             return ReplaceInstUsesWith(ICI,
-                                       Context->getConstantInt(Type::Int1Ty,
+                                       ConstantInt::get(Type::getInt1Ty(*Context),
                                        isICMP_NE));
           
           // If we have ((X & C) == C), turn it into ((X & C) != 0).
           if (RHS == BOC && RHSV.isPowerOf2())
             return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
                                 ICmpInst::ICMP_NE, LHSI,
-                                Context->getNullValue(RHS->getType()));
+                                Constant::getNullValue(RHS->getType()));
           
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
           if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
-            Constant *Zero = Context->getNullValue(X->getType());
+            Constant *Zero = Constant::getNullValue(X->getType());
             ICmpInst::Predicate pred = isICMP_NE ? 
               ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
             return new ICmpInst(pred, X, Zero);
@@ -7122,7 +7118,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           // ((X & ~7) == 0) --> X < 8
           if (RHSV == 0 && isHighOnes(BOC)) {
             Value *X = BO->getOperand(0);
-            Constant *NegX = Context->getConstantExprNeg(BOC);
+            Constant *NegX = ConstantExpr::getNeg(BOC);
             ICmpInst::Predicate pred = isICMP_NE ? 
               ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
             return new ICmpInst(pred, X, NegX);
@@ -7133,9 +7129,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
       // Handle icmp {eq|ne} <intrinsic>, intcst.
       if (II->getIntrinsicID() == Intrinsic::bswap) {
-        AddToWorkList(II);
+        Worklist.Add(II);
         ICI.setOperand(0, II->getOperand(1));
-        ICI.setOperand(1, Context->getConstantInt(RHSV.byteSwap()));
+        ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap()));
         return &ICI;
       }
     }
@@ -7155,17 +7151,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
   // integer type is the same size as the pointer type.
-  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
-      getTargetData().getPointerSizeInBits() == 
+  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+      TD->getPointerSizeInBits() ==
          cast<IntegerType>(DestTy)->getBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
-      RHSOp = Context->getConstantExprIntToPtr(RHSC, SrcTy);
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
     } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
       RHSOp = RHSC->getOperand(0);
       // If the pointer types don't match, insert a bitcast.
       if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI);
+        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
     }
 
     if (RHSOp)
@@ -7212,8 +7208,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // Compute the constant that would happen if we truncated to SrcTy then
   // reextended to DestTy.
-  Constant *Res1 = Context->getConstantExprTrunc(CI, SrcTy);
-  Constant *Res2 = Context->getConstantExprCast(LHSCI->getOpcode(),
+  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
                                                 Res1, DestTy);
 
   // If the re-extended constant didn't change...
@@ -7239,9 +7235,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // First, handle some easy cases. We know the result cannot be equal at this
   // point so handle the ICI.isEquality() cases
   if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-    return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
   if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-    return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
 
   // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
   // should have been folded away previously and not enter in here.
@@ -7249,20 +7245,19 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   if (isSignedCmp) {
     // We're performing a signed comparison.
     if (cast<ConstantInt>(CI)->getValue().isNegative())
-      Result = Context->getConstantIntFalse();          // X < (small) --> false
+      Result = ConstantInt::getFalse(*Context);          // X < (small) --> false
     else
-      Result = Context->getConstantIntTrue();           // X < (large) --> true
+      Result = ConstantInt::getTrue(*Context);           // X < (large) --> true
   } else {
     // We're performing an unsigned comparison.
     if (isSignedExt) {
       // We're performing an unsigned comp with a sign extended value.
       // This is true if the input is >= 0. [aka >s -1]
-      Constant *NegOne = Context->getConstantIntAllOnesValue(SrcTy);
-      Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp,
-                                   NegOne, ICI.getName()), ICI);
+      Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+      Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
     } else {
       // Unsigned extend & unsigned compare -> always true.
-      Result = Context->getConstantIntTrue();
+      Result = ConstantInt::getTrue(*Context);
     }
   }
 
@@ -7275,7 +7270,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
           ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
          "ICmp should be folded!");
   if (Constant *CI = dyn_cast<Constant>(Result))
-    return ReplaceInstUsesWith(ICI, Context->getConstantExprNot(CI));
+    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
   return BinaryOperator::CreateNot(Result);
 }
 
@@ -7317,21 +7312,21 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
 
   // shl X, 0 == X and shr X, 0 == X
   // shl 0, X == 0 and shr 0, X == 0
-  if (Op1 == Context->getNullValue(Op1->getType()) ||
-      Op0 == Context->getNullValue(Op0->getType()))
+  if (Op1 == Constant::getNullValue(Op1->getType()) ||
+      Op0 == Constant::getNullValue(Op0->getType()))
     return ReplaceInstUsesWith(I, Op0);
   
   if (isa<UndefValue>(Op0)) {            
     if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
       return ReplaceInstUsesWith(I, Op0);
     else                                    // undef << X -> 0, undef >>u X -> 0
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
   if (isa<UndefValue>(Op1)) {
     if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
       return ReplaceInstUsesWith(I, Op0);          
     else                                     // X << undef, X >>u undef -> 0
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
 
   // See if we can fold away this shift.
@@ -7363,9 +7358,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   //
   if (Op1->uge(TypeBits)) {
     if (I.getOpcode() != Instruction::AShr)
-      return ReplaceInstUsesWith(I, Context->getNullValue(Op0->getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
     else {
-      I.setOperand(1, Context->getConstantInt(I.getType(), TypeBits-1));
+      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
       return &I;
     }
   }
@@ -7375,7 +7370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
         return BinaryOperator::CreateMul(BO->getOperand(0),
-                                        Context->getConstantExprShl(BOOp, Op1));
+                                        ConstantExpr::getShl(BOOp, Op1));
   
   // Try to fold constant and into select arguments.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
@@ -7396,10 +7391,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
         isa<ConstantInt>(TrOp->getOperand(1))) {
       // Okay, we'll do this xform.  Make the shift of shift.
-      Constant *ShAmt = Context->getConstantExprZExt(Op1, TrOp->getType());
-      Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
-                                                I.getName());
-      InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      // (shift2 (shift1 & 0x00FF), c2)
+      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
 
       // For logical shifts, the truncation has the effect of making the high
       // part of the register be zeros.  Emulate this by inserting an AND to
@@ -7420,10 +7414,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         MaskV = MaskV.lshr(Op1->getZExtValue());
       }
 
-      Instruction *And =
-        BinaryOperator::CreateAnd(NSh, Context->getConstantInt(MaskV), 
-                                  TI->getName());
-      InsertNewInstBefore(And, I); // shift1 & 0x00FF
+      // shift1 & 0x00FF
+      Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV),
+                                      TI->getName());
 
       // Return the value truncated to the interesting size.
       return new TruncInst(And, I.getType());
@@ -7444,17 +7437,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           // These operators commute.
           // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
           if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
-              match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){
-            Instruction *YS = BinaryOperator::CreateShl(
-                                            Op0BO->getOperand(0), Op1,
-                                            Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *X = 
-              BinaryOperator::Create(Op0BO->getOpcode(), YS, V1,
-                                     Op0BO->getOperand(1)->getName());
-            InsertNewInstBefore(X, I);  // (X + (Y << C))
+              match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+                    m_Specific(Op1)))) {
+            Value *YS =         // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+            // (X + (Y << C))
+            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+                                            Op0BO->getOperand(1)->getName());
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, Context->getConstantInt(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -7465,16 +7456,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                     m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
                           m_ConstantInt(CC))) &&
               cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
-            Instruction *YS = BinaryOperator::CreateShl(
-                                                     Op0BO->getOperand(0), Op1,
-                                                     Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *XM =
-              BinaryOperator::CreateAnd(V1,
-                                        Context->getConstantExprShl(CC, Op1),
-                                        V1->getName()+".mask");
-            InsertNewInstBefore(XM, I); // X & (CC << C)
-            
+            Value *YS =   // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(0), Op1,
+                                           Op0BO->getName());
+            // X & (CC << C)
+            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                           V1->getName()+".mask");
             return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
           }
         }
@@ -7483,17 +7470,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         case Instruction::Sub: {
           // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
           if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
-              match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){
-            Instruction *YS = BinaryOperator::CreateShl(
-                                                     Op0BO->getOperand(1), Op1,
-                                                     Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *X =
-              BinaryOperator::Create(Op0BO->getOpcode(), V1, YS,
-                                     Op0BO->getOperand(0)->getName());
-            InsertNewInstBefore(X, I);  // (X + (Y << C))
+              match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+                    m_Specific(Op1)))) {
+            Value *YS =  // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+            // (X + (Y << C))
+            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+                                            Op0BO->getOperand(0)->getName());
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, Context->getConstantInt(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -7504,15 +7489,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                           m_ConstantInt(CC))) && V2 == Op1 &&
               cast<BinaryOperator>(Op0BO->getOperand(0))
                   ->getOperand(0)->hasOneUse()) {
-            Instruction *YS = BinaryOperator::CreateShl(
-                                                     Op0BO->getOperand(1), Op1,
-                                                     Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *XM =
-              BinaryOperator::CreateAnd(V1, 
-                                        Context->getConstantExprShl(CC, Op1),
-                                        V1->getName()+".mask");
-            InsertNewInstBefore(XM, I); // X & (CC << C)
+            Value *YS = // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+            // X & (CC << C)
+            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                           V1->getName()+".mask");
             
             return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
           }
@@ -7552,11 +7533,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
         
         if (isValid) {
-          Constant *NewRHS = Context->getConstantExpr(I.getOpcode(), Op0C, Op1);
+          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
           
-          Instruction *NewShift =
-            BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
-          InsertNewInstBefore(NewShift, I);
+          Value *NewShift =
+            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
           NewShift->takeName(Op0BO);
           
           return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
@@ -7589,31 +7569,33 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // saturates.
       if (AmtSum >= TypeBits) {
         if (I.getOpcode() != Instruction::AShr)
-          return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
         AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
       }
       
       return BinaryOperator::Create(I.getOpcode(), X,
-                                    Context->getConstantInt(Ty, AmtSum));
-    } else if (ShiftOp->getOpcode() == Instruction::LShr &&
-               I.getOpcode() == Instruction::AShr) {
+                                    ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftOp->getOpcode() == Instruction::LShr &&
+        I.getOpcode() == Instruction::AShr) {
       if (AmtSum >= TypeBits)
-        return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
       
       // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, AmtSum));
-    } else if (ShiftOp->getOpcode() == Instruction::AShr &&
-               I.getOpcode() == Instruction::LShr) {
+      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftOp->getOpcode() == Instruction::AShr &&
+        I.getOpcode() == Instruction::LShr) {
       // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
       if (AmtSum >= TypeBits)
         AmtSum = TypeBits-1;
       
-      Instruction *Shift =
-        BinaryOperator::CreateAShr(X, Context->getConstantInt(Ty, AmtSum));
-      InsertNewInstBefore(Shift, I);
+      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
 
       APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask));
     }
     
     // Okay, if we get here, one shift must be left, and the other shift must be
@@ -7622,12 +7604,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // If we have ((X >>? C) << C), turn this into X & (-1 << C).
       if (I.getOpcode() == Instruction::Shl) {
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));
       }
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr) {
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));
       }
       // We can simplify ((X << C) >>s C) into a trunc + sext.
       // NOTE: we could do this for any C, but that would make 'unusual' integer
@@ -7641,15 +7623,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       case 32 :
       case 64 :
       case 128:
-        SExtType = Context->getIntegerType(Ty->getBitWidth() - ShiftAmt1);
+        SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1);
         break;
       default: break;
       }
-      if (SExtType) {
-        Instruction *NewTrunc = new TruncInst(X, SExtType, "sext");
-        InsertNewInstBefore(NewTrunc, I);
-        return new SExtInst(NewTrunc, Ty);
-      }
+      if (SExtType)
+        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
       // Otherwise, we can't handle it yet.
     } else if (ShiftAmt1 < ShiftAmt2) {
       uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
@@ -7658,23 +7637,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       if (I.getOpcode() == Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
-        Instruction *Shift =
-          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Instruction *Shift =
-          BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
@@ -7686,24 +7663,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       if (I.getOpcode() == Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
-        Instruction *Shift =
-          BinaryOperator::Create(ShiftOp->getOpcode(), X,
-                                 Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+                                            ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Instruction *Shift =
-          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
@@ -7718,12 +7693,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
 /// X*Scale+Offset.
 ///
 static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
-                                        int &Offset, LLVMContext* Context) {
-  assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
+                                        int &Offset, LLVMContext *Context) {
+  assert(Val->getType() == Type::getInt32Ty(*Context) && 
+         "Unexpected allocation size type!");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     Offset = CI->getZExtValue();
     Scale  = 0;
-    return Context->getConstantInt(Type::Int32Ty, 0);
+    return ConstantInt::get(Type::getInt32Ty(*Context), 0);
   } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
       if (I->getOpcode() == Instruction::Shl) {
@@ -7763,6 +7739,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                    AllocationInst &AI) {
   const PointerType *PTy = cast<PointerType>(CI.getType());
   
+  BuilderTy AllocaBuilder(*Builder);
+  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+  
   // Remove any uses of AI that are dead.
   assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
   
@@ -7773,11 +7752,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
         ++UI; // If this instruction uses AI more than once, don't break UI.
       
       ++NumDeadInst;
-      DOUT << "IC: DCE: " << *User;
+      DEBUG(errs() << "IC: DCE: " << *User << '\n');
       EraseInstFromFunction(*User);
     }
   }
-  
+
+  // This requires TargetData to get the alloca alignment and size information.
+  if (!TD) return 0;
+
   // Get the type really allocated and the type casted to.
   const Type *AllocElTy = AI.getAllocatedType();
   const Type *CastElTy = PTy->getElementType();
@@ -7816,30 +7798,22 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   if (Scale == 1) {
     Amt = NumElements;
   } else {
-    // If the allocation size is constant, form a constant mul expression
-    Amt = Context->getConstantInt(Type::Int32Ty, Scale);
-    if (isa<ConstantInt>(NumElements))
-      Amt = Context->getConstantExprMul(cast<ConstantInt>(NumElements),
-                                 cast<ConstantInt>(Amt));
-    // otherwise multiply the amount and the number of elements
-    else {
-      Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
-      Amt = InsertNewInstBefore(Tmp, AI);
-    }
+    Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale);
+    // Insert before the alloca, not before the cast.
+    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
   }
   
   if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
-    Value *Off = Context->getConstantInt(Type::Int32Ty, Offset, true);
-    Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
-    Amt = InsertNewInstBefore(Tmp, AI);
+    Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true);
+    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
   }
   
   AllocationInst *New;
   if (isa<MallocInst>(AI))
-    New = new MallocInst(CastElTy, Amt, AI.getAlignment());
+    New = AllocaBuilder.CreateMalloc(CastElTy, Amt);
   else
-    New = new AllocaInst(CastElTy, Amt, AI.getAlignment());
-  InsertNewInstBefore(New, AI);
+    New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
   
   // If the allocation has one real use plus a dbg.declare, just remove the
@@ -7851,11 +7825,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   // things that used it to use the new cast.  This will also hack on CI, but it
   // will die soon.
   else if (!AI.hasOneUse()) {
-    AddUsesToWorkList(AI);
     // New is the allocation instruction, pointer typed. AI is the original
     // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
-    CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast");
-    InsertNewInstBefore(NewCast, AI);
+    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
     AI.replaceAllUsesWith(NewCast);
   }
   return ReplaceInstUsesWith(CI, New);
@@ -7923,6 +7895,23 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
            CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
                                       NumCastsRemoved);
 
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    // UDiv and URem can be truncated if all the truncated bits are zero.
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    if (BitWidth < OrigBitWidth) {
+      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+      if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+          MaskedValueIsZero(I->getOperand(1), Mask)) {
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved) &&
+               CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                          NumCastsRemoved);
+      }
+    }
+    break;
+  }
   case Instruction::Shl:
     // If we are truncating the result of this SHL, and if it's a shift of a
     // constant amount, we can always perform a SHL in a smaller type.
@@ -7993,7 +7982,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
 Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V))
-    return Context->getConstantExprIntegerCast(C, Ty,
+    return ConstantExpr::getIntegerCast(C, Ty,
                                                isSigned /*Sext or ZExt*/);
 
   // Otherwise, it must be an instruction.
@@ -8009,7 +7998,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   case Instruction::Xor:
   case Instruction::AShr:
   case Instruction::LShr:
-  case Instruction::Shl: {
+  case Instruction::Shl:
+  case Instruction::UDiv:
+  case Instruction::URem: {
     Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
     Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
     Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
@@ -8046,7 +8037,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   }
   default: 
     // TODO: Can handle more cases here.
-    assert(0 && "Unreachable!");
+    llvm_unreachable("Unreachable!");
     break;
   }
   
@@ -8089,13 +8080,14 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
 static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
                                        SmallVectorImpl<Value*> &NewIndices,
                                        const TargetData *TD,
-                                       LLVMContext* Context) {
+                                       LLVMContext *Context) {
+  if (!TD) return 0;
   if (!Ty->isSized()) return 0;
   
   // Start with the index over the outer type.  Note that the type size
   // might be zero (even if the offset isn't zero) if the indexed type
   // is something like [0 x {int, int}]
-  const Type *IntPtrTy = TD->getIntPtrType();
+  const Type *IntPtrTy = TD->getIntPtrType(*Context);
   int64_t FirstIdx = 0;
   if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
     FirstIdx = Offset/TySize;
@@ -8110,7 +8102,7 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
     assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
   }
   
-  NewIndices.push_back(Context->getConstantInt(IntPtrTy, FirstIdx));
+  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
     
   // Index into the types.  If we fail, set OrigBase to null.
   while (Offset) {
@@ -8124,14 +8116,14 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
              "Offset must stay within the indexed type");
       
       unsigned Elt = SL->getElementContainingOffset(Offset);
-      NewIndices.push_back(Context->getConstantInt(Type::Int32Ty, Elt));
+      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt));
       
       Offset -= SL->getElementOffset(Elt);
       Ty = STy->getElementType(Elt);
     } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
       uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(Context->getConstantInt(IntPtrTy,Offset/EltSize));
+      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
       Offset %= EltSize;
       Ty = AT->getElementType();
     } else {
@@ -8154,7 +8146,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
       // Changing the cast operand is usually not a good idea but it is safe
       // here because the pointer operand is being replaced with another 
       // pointer operand so the opcode doesn't need to change.
-      AddToWorkList(GEP);
+      Worklist.Add(GEP);
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
@@ -8163,7 +8155,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
     // GEP computes a constant offset, see if we can convert these three
     // instructions into fewer.  This typically happens with unions and other
     // non-type-safe code.
-    if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
+    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
       if (GEP->hasAllConstantIndices()) {
         // We are guaranteed to get a constant from EmitGEPOffset.
         ConstantInt *OffsetV =
@@ -8179,10 +8171,10 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
           // If we were able to index down into an element, create the GEP
           // and bitcast the result.  This eliminates one bitcast, potentially
           // two.
-          Instruction *NGEP = GetElementPtrInst::Create(OrigBase, 
-                                                        NewIndices.begin(),
-                                                        NewIndices.end(), "");
-          InsertNewInstBefore(NGEP, CI);
+          Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+            Builder->CreateInBoundsGEP(OrigBase,
+                                       NewIndices.begin(), NewIndices.end()) :
+            Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
           NGEP->takeName(GEP);
           
           if (isa<BitCastInst>(CI))
@@ -8214,10 +8206,8 @@ static bool isSafeIntegerType(const Type *Ty) {
   }
 }
 
-/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as
-/// integer types. This function implements the common transforms for all those
-/// cases.
-/// @brief Implement the transforms common to CastInst with integer operands
+/// commonIntCastTransforms - This function implements the common transforms
+/// for trunc, zext, and sext.
 Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
@@ -8241,11 +8231,10 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
 
   // Attempt to propagate the cast into the instruction for int->int casts.
   int NumCastsRemoved = 0;
-  if (!isa<BitCastInst>(CI) &&
-      // Only do this if the dest type is a simple type, don't convert the
-      // expression tree to something weird like i93 unless the source is also
-      // strange.
-      (isSafeIntegerType(DestTy->getScalarType()) ||
+  // Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((isSafeIntegerType(DestTy->getScalarType()) ||
        !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
       CanEvaluateInDifferentType(SrcI, DestTy,
                                  CI.getOpcode(), NumCastsRemoved)) {
@@ -8261,7 +8250,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     default:
       // All the others use floating point so we shouldn't actually 
       // get here because of the check above.
-      assert(0 && "Unknown cast type");
+      llvm_unreachable("Unknown cast type");
     case Instruction::Trunc:
       DoXForm = true;
       break;
@@ -8307,8 +8296,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     }
     
     if (DoXForm) {
-      DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid"
-           << " cast: " << CI;
+      DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type"
+            " to avoid cast: " << CI);
       Value *Res = EvaluateInDifferentType(SrcI, DestTy, 
                                            CI.getOpcode() == Instruction::SExt);
       if (JustReplace)
@@ -8317,9 +8306,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
 
       assert(Res->getType() == DestTy);
       switch (CI.getOpcode()) {
-      default: assert(0 && "Unknown cast type!");
+      default: llvm_unreachable("Unknown cast type!");
       case Instruction::Trunc:
-      case Instruction::BitCast:
         // Just replace this cast with the result.
         return ReplaceInstUsesWith(CI, Res);
       case Instruction::ZExt: {
@@ -8332,8 +8320,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
           return ReplaceInstUsesWith(CI, Res);
 
         // We need to emit an AND to clear the high bits.
-        Constant *C = Context->getConstantInt(APInt::getLowBitsSet(DestBitSize,
-                                                            SrcBitSize));
+        Constant *C = ConstantInt::get(*Context, 
+                                 APInt::getLowBitsSet(DestBitSize, SrcBitSize));
         return BinaryOperator::CreateAnd(Res, C);
       }
       case Instruction::SExt: {
@@ -8344,9 +8332,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
           return ReplaceInstUsesWith(CI, Res);
 
         // We need to emit a cast to truncate, then a cast to sext.
-        return CastInst::Create(Instruction::SExt,
-            InsertCastBefore(Instruction::Trunc, Res, Src->getType(), 
-                             CI), DestTy);
+        return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy);
       }
       }
     }
@@ -8362,16 +8348,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   case Instruction::Or:
   case Instruction::Xor:
     // If we are discarding information, rewrite.
-    if (DestBitSize <= SrcBitSize && DestBitSize != 1) {
-      // Don't insert two casts if they cannot be eliminated.  We allow 
-      // two casts to be inserted if the sizes are the same.  This could 
-      // only be converting signedness, which is a noop.
-      if (DestBitSize == SrcBitSize || 
-          !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) ||
+    if (DestBitSize < SrcBitSize && DestBitSize != 1) {
+      // Don't insert two casts unless at least one can be eliminated.
+      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||
           !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
-        Instruction::CastOps opcode = CI.getOpcode();
-        Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI);
-        Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI);
+        Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
+        Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
         return BinaryOperator::Create(
             cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
       }
@@ -8380,62 +8362,25 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     // cast (xor bool X, true) to int  --> xor (cast bool X to int), 1
     if (isa<ZExtInst>(CI) && SrcBitSize == 1 && 
         SrcI->getOpcode() == Instruction::Xor &&
-        Op1 == Context->getConstantIntTrue() &&
+        Op1 == ConstantInt::getTrue(*Context) &&
         (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
-      Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI);
+      Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName());
       return BinaryOperator::CreateXor(New,
-                                      Context->getConstantInt(CI.getType(), 1));
-    }
-    break;
-  case Instruction::SDiv:
-  case Instruction::UDiv:
-  case Instruction::SRem:
-  case Instruction::URem:
-    // If we are just changing the sign, rewrite.
-    if (DestBitSize == SrcBitSize) {
-      // Don't insert two casts if they cannot be eliminated.  We allow 
-      // two casts to be inserted if the sizes are the same.  This could 
-      // only be converting signedness, which is a noop.
-      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || 
-          !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
-        Value *Op0c = InsertCastBefore(Instruction::BitCast, 
-                                       Op0, DestTy, *SrcI);
-        Value *Op1c = InsertCastBefore(Instruction::BitCast, 
-                                       Op1, DestTy, *SrcI);
-        return BinaryOperator::Create(
-          cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
-      }
+                                      ConstantInt::get(CI.getType(), 1));
     }
     break;
 
-  case Instruction::Shl:
-    // Allow changing the sign of the source operand.  Do not allow 
-    // changing the size of the shift, UNLESS the shift amount is a 
-    // constant.  We must not change variable sized shifts to a smaller 
-    // size, because it is undefined to shift more bits out than exist 
-    // in the value.
-    if (DestBitSize == SrcBitSize ||
-        (DestBitSize < SrcBitSize && isa<Constant>(Op1))) {
-      Instruction::CastOps opcode = (DestBitSize == SrcBitSize ?
-          Instruction::BitCast : Instruction::Trunc);
-      Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI);
-      Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI);
+  case Instruction::Shl: {
+    // Canonicalize trunc inside shl, if we can.
+    ConstantInt *CI = dyn_cast<ConstantInt>(Op1);
+    if (CI && DestBitSize < SrcBitSize &&
+        CI->getLimitedValue(DestBitSize) < DestBitSize) {
+      Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
+      Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
       return BinaryOperator::CreateShl(Op0c, Op1c);
     }
     break;
-  case Instruction::AShr:
-    // If this is a signed shr, and if all bits shifted in are about to be
-    // truncated off, turn it into an unsigned shr to allow greater
-    // simplifications.
-    if (DestBitSize < SrcBitSize &&
-        isa<ConstantInt>(Op1)) {
-      uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize);
-      if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) {
-        // Insert the new logical shift right.
-        return BinaryOperator::CreateLShr(Op0, Op1);
-      }
-    }
-    break;
+  }
   }
   return 0;
 }
@@ -8450,11 +8395,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
 
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
-  if (DestBitWidth == 1 &&
-      isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
-    Constant *One = Context->getConstantInt(Src->getType(), 1);
-    Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
-    Value *Zero = Context->getNullValue(Src->getType());
+  if (DestBitWidth == 1) {
+    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Src = Builder->CreateAnd(Src, One, "tmp");
+    Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
 
@@ -8469,12 +8413,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth));
     if (MaskedValueIsZero(ShiftOp, Mask)) {
       if (ShAmt >= DestBitWidth)        // All zeros.
-        return ReplaceInstUsesWith(CI, Context->getNullValue(Ty));
+        return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty));
       
       // Okay, we can shrink this.  Truncate the input, then return a new
       // shift.
-      Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI);
-      Value *V2 = Context->getConstantExprTrunc(ShAmtV, Ty);
+      Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName());
+      Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);
       return BinaryOperator::CreateLShr(V1, V2);
     }
   }
@@ -8499,20 +8443,15 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
       if (!DoXform) return ICI;
 
       Value *In = ICI->getOperand(0);
-      Value *Sh = Context->getConstantInt(In->getType(),
+      Value *Sh = ConstantInt::get(In->getType(),
                                    In->getType()->getScalarSizeInBits()-1);
-      In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
-                                                        In->getName()+".lobit"),
-                               CI);
+      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
       if (In->getType() != CI.getType())
-        In = CastInst::CreateIntegerCast(In, CI.getType(),
-                                         false/*ZExt*/, "tmp", &CI);
+        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
 
       if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
-        Constant *One = Context->getConstantInt(In->getType(), 1);
-        In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
-                                                         In->getName()+".not"),
-                                 CI);
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = Builder->CreateXor(In, One, In->getName()+".not");
       }
 
       return ReplaceInstUsesWith(CI, In);
@@ -8545,8 +8484,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
         if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
           // (X&4) == 2 --> false
           // (X&4) != 2 --> true
-          Constant *Res = Context->getConstantInt(Type::Int1Ty, isNE);
-          Res = Context->getConstantExprZExt(Res, CI.getType());
+          Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
           return ReplaceInstUsesWith(CI, Res);
         }
           
@@ -8555,15 +8494,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
         if (ShiftAmt) {
           // Perform a logical shr by shiftamt.
           // Insert the shift to put the result in the low bit.
-          In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
-                              Context->getConstantInt(In->getType(), ShiftAmt),
-                                                   In->getName()+".lobit"), CI);
+          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+                                   In->getName()+".lobit");
         }
           
         if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-          Constant *One = Context->getConstantInt(In->getType(), 1);
-          In = BinaryOperator::CreateXor(In, One, "tmp");
-          InsertNewInstBefore(cast<Instruction>(In), CI);
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = Builder->CreateXor(In, One, "tmp");
         }
           
         if (CI.getType() == In->getType())
@@ -8600,21 +8537,21 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     // SrcSize  > DstSize: trunc(a) & mask
     if (SrcSize < DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      Constant *AndConst = Context->getConstantInt(A->getType(), AndValue);
-      Instruction *And =
-        BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
-      InsertNewInstBefore(And, CI);
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
       return new ZExtInst(And, CI.getType());
-    } else if (SrcSize == DstSize) {
+    }
+    
+    if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      return BinaryOperator::CreateAnd(A, Context->getConstantInt(A->getType(),
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
                                                            AndValue));
-    } else if (SrcSize > DstSize) {
-      Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
-      InsertNewInstBefore(Trunc, CI);
+    }
+    if (SrcSize > DstSize) {
+      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
       return BinaryOperator::CreateAnd(Trunc, 
-                                       Context->getConstantInt(Trunc->getType(),
+                                       ConstantInt::get(Trunc->getType(),
                                                                AndValue));
     }
   }
@@ -8631,8 +8568,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
-      Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
-      Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
+      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
       return BinaryOperator::Create(Instruction::Or, LCast, RCast);
     }
   }
@@ -8645,7 +8582,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
         if (TI0->getType() == CI.getType())
           return
             BinaryOperator::CreateAnd(TI0,
-                                Context->getConstantExprZExt(C, CI.getType()));
+                                ConstantExpr::getZExt(C, CI.getType()));
       }
 
   // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
@@ -8657,9 +8594,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
           if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
             Value *TI0 = TI->getOperand(0);
             if (TI0->getType() == CI.getType()) {
-              Constant *ZC = Context->getConstantExprZExt(C, CI.getType());
-              Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
-              InsertNewInstBefore(NewAnd, *And);
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
               return BinaryOperator::CreateXor(NewAnd, ZC);
             }
           }
@@ -8674,14 +8610,14 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   Value *Src = CI.getOperand(0);
   
   // Canonicalize sign-extend from i1 to a select.
-  if (Src->getType() == Type::Int1Ty)
+  if (Src->getType() == Type::getInt1Ty(*Context))
     return SelectInst::Create(Src,
-                              Context->getConstantIntAllOnesValue(CI.getType()),
-                              Context->getNullValue(CI.getType()));
+                              Constant::getAllOnesValue(CI.getType()),
+                              Constant::getNullValue(CI.getType()));
 
   // See if the value being truncated is already sign extended.  If so, just
   // eliminate the trunc/sext pair.
-  if (getOpcode(Src) == Instruction::Trunc) {
+  if (Operator::getOpcode(Src) == Instruction::Trunc) {
     Value *Op = cast<User>(Src)->getOperand(0);
     unsigned OpBits   = Op->getType()->getScalarSizeInBits();
     unsigned MidBits  = Src->getType()->getScalarSizeInBits();
@@ -8729,9 +8665,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       unsigned MidSize = Src->getType()->getScalarSizeInBits();
       unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
       unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
-      Constant *ShAmtV = Context->getConstantInt(CI.getType(), ShAmt);
-      I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
-                                                        CI.getName()), CI);
+      Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+      I = Builder->CreateShl(I, ShAmtV, CI.getName());
       return BinaryOperator::CreateAShr(I, ShAmtV);
     }
   }
@@ -8742,18 +8677,18 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
 /// FitsInFPType - Return a Constant* for the specified FP constant if it fits
 /// in the specified FP type without changing its value.
 static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem,
-                              LLVMContext* Context) {
+                              LLVMContext *Context) {
   bool losesInfo;
   APFloat F = CFP->getValueAPF();
   (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
   if (!losesInfo)
-    return Context->getConstantFP(F);
+    return ConstantFP::get(*Context, F);
   return 0;
 }
 
 /// LookThroughFPExtensions - If this is an fp extension instruction, look
 /// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) {
+static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::FPExt)
       return LookThroughFPExtensions(I->getOperand(0), Context);
@@ -8762,12 +8697,12 @@ static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) {
   // that can accurately represent it.  This allows us to turn
   // (float)((double)X+2.0) into x+2.0f.
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType() == Type::PPC_FP128Ty)
+    if (CFP->getType() == Type::getPPC_FP128Ty(*Context))
       return V;  // No constant folding of this.
     // See if the value can be truncated to float and then reextended.
     if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context))
       return V;
-    if (CFP->getType() == Type::DoubleTy)
+    if (CFP->getType() == Type::getDoubleTy(*Context))
       return V;  // Won't shrink.
     if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context))
       return V;
@@ -8804,10 +8739,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         // the cast, do this xform.
         if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
             RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
-          LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
-                                      CI.getType(), CI);
-          RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
-                                      CI.getType(), CI);
+          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
           return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
         }
       }
@@ -8875,10 +8808,11 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // trunc to be exposed to other transforms.  Don't do this for extending
   // ptrtoint's, because we don't know if the target sign or zero extends its
   // pointers.
-  if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-    Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
-                                                    TD->getIntPtrType(),
-                                                    "tmp"), CI);
+  if (TD &&
+      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+    Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                       TD->getIntPtrType(CI.getContext()),
+                                       "tmp");
     return new TruncInst(P, CI.getType());
   }
   
@@ -8891,65 +8825,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // allows the trunc to be exposed to other transforms.  Don't do this for
   // extending inttoptr's, because we don't know if the target sign or zero
   // extends to pointers.
-  if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
       TD->getPointerSizeInBits()) {
-    Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
-                                                 TD->getIntPtrType(),
-                                                 "tmp"), CI);
+    Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                    TD->getIntPtrType(CI.getContext()), "tmp");
     return new IntToPtrInst(P, CI.getType());
   }
   
   if (Instruction *I = commonCastTransforms(CI))
     return I;
-  
-  const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType();
-  if (!DestPointee->isSized()) return 0;
-
-  // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP.
-  ConstantInt *Cst;
-  Value *X;
-  if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)),
-                                    m_ConstantInt(Cst)))) {
-    // If the source and destination operands have the same type, see if this
-    // is a single-index GEP.
-    if (X->getType() == CI.getType()) {
-      // Get the size of the pointee type.
-      uint64_t Size = TD->getTypeAllocSize(DestPointee);
-
-      // Convert the constant to intptr type.
-      APInt Offset = Cst->getValue();
-      Offset.sextOrTrunc(TD->getPointerSizeInBits());
-
-      // If Offset is evenly divisible by Size, we can do this xform.
-      if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
-        Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
-        return GetElementPtrInst::Create(X, Context->getConstantInt(Offset));
-      }
-    }
-    // TODO: Could handle other cases, e.g. where add is indexing into field of
-    // struct etc.
-  } else if (CI.getOperand(0)->hasOneUse() &&
-             match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) {
-    // Otherwise, if this is inttoptr(add x, cst), try to turn this into an
-    // "inttoptr+GEP" instead of "add+intptr".
-    
-    // Get the size of the pointee type.
-    uint64_t Size = TD->getTypeAllocSize(DestPointee);
-    
-    // Convert the constant to intptr type.
-    APInt Offset = Cst->getValue();
-    Offset.sextOrTrunc(TD->getPointerSizeInBits());
-    
-    // If Offset is evenly divisible by Size, we can do this xform.
-    if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
-      Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
-      
-      Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(),
-                                                            "tmp"), CI);
-      return GetElementPtrInst::Create(P,
-                                       Context->getConstantInt(Offset), "tmp");
-    }
-  }
+
   return 0;
 }
 
@@ -8960,10 +8845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   const Type *SrcTy = Src->getType();
   const Type *DestTy = CI.getType();
 
-  if (SrcTy->isInteger() && DestTy->isInteger()) {
-    if (Instruction *Result = commonIntCastTransforms(CI))
-      return Result;
-  } else if (isa<PointerType>(SrcTy)) {
+  if (isa<PointerType>(SrcTy)) {
     if (Instruction *I = commonPointerCastTransforms(CI))
       return I;
   } else {
@@ -8987,8 +8869,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
       return 0;
     
-    // If we are casting a malloc or alloca to a pointer to a type of the same
+    // If we are casting a alloca to a pointer to a type of the same
     // size, rewrite the allocation instruction to allocate the "right" type.
+    // There is no need to modify malloc calls because it is their bitcast that
+    // needs to be cleaned up.
     if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
       if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
         return V;
@@ -8996,7 +8880,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If the source and destination are pointers, and this cast is equivalent
     // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
     // This can enhance SROA and other transforms that want type-safe pointers.
-    Constant *ZeroUInt = Context->getNullValue(Type::Int32Ty);
+    Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context));
     unsigned NumZeros = 0;
     while (SrcElTy != DstElTy && 
            isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
@@ -9008,8 +8892,30 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If we found a path from the src to dest, create the getelementptr now.
     if (SrcElTy == DstElTy) {
       SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", 
-                                       ((Instruction*) NULL));
+      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "",
+                                               ((Instruction*) NULL));
+    }
+  }
+
+  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+    if (DestVTy->getNumElements() == 1) {
+      if (!isa<VectorType>(SrcTy)) {
+        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+        return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+                            Constant::getNullValue(Type::getInt32Ty(*Context)));
+      }
+      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+    }
+  }
+
+  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+    if (SrcVTy->getNumElements() == 1) {
+      if (!isa<VectorType>(DestTy)) {
+        Value *Elem = 
+          Builder->CreateExtractElement(Src,
+                            Constant::getNullValue(Type::getInt32Ty(*Context)));
+        return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+      }
     }
   }
 
@@ -9030,10 +8936,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
              Tmp->getOperand(0)->getType() == DestTy) ||
             ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && 
              Tmp->getOperand(0)->getType() == DestTy)) {
-          Value *LHS = InsertCastBefore(Instruction::BitCast,
-                                        SVI->getOperand(0), DestTy, CI);
-          Value *RHS = InsertCastBefore(Instruction::BitCast,
-                                        SVI->getOperand(1), DestTy, CI);
+          Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+          Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
           // Return a new shuffle vector.  Use the same element ID's, as we
           // know the vector types match #elts.
           return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
@@ -9076,9 +8980,9 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
 /// GetSelectFoldableConstant - For the same transformation as the previous
 /// function, return the identity constant that goes into the select.
 static Constant *GetSelectFoldableConstant(Instruction *I,
-                                           LLVMContext* Context) {
+                                           LLVMContext *Context) {
   switch (I->getOpcode()) {
-  default: assert(0 && "This cannot happen!"); abort();
+  default: llvm_unreachable("This cannot happen!");
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Or:
@@ -9086,11 +8990,11 @@ static Constant *GetSelectFoldableConstant(Instruction *I,
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    return Context->getNullValue(I->getType());
+    return Constant::getNullValue(I->getType());
   case Instruction::And:
-    return Context->getAllOnesValue(I->getType());
+    return Constant::getAllOnesValue(I->getType());
   case Instruction::Mul:
-    return Context->getConstantInt(I->getType(), 1);
+    return ConstantInt::get(I->getType(), 1);
   }
 }
 
@@ -9110,7 +9014,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
 
     // Fold this by inserting a select from the input values.
     SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
-                                           FI->getOperand(0), SI.getName()+".v");
+                                          FI->getOperand(0), SI.getName()+".v");
     InsertNewInstBefore(NewSI, SI);
     return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
                             TI->getType());
@@ -9160,7 +9064,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     else
       return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
   }
-  assert(0 && "Shouldn't get here");
+  llvm_unreachable("Shouldn't get here");
   return 0;
 }
 
@@ -9202,7 +9106,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
             NewSel->takeName(TVI);
             if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
               return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
-            assert(0 && "Unknown instruction!!");
+            llvm_unreachable("Unknown instruction!!");
           }
         }
       }
@@ -9231,7 +9135,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
             NewSel->takeName(FVI);
             if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
               return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
-            assert(0 && "Unknown instruction!!");
+            llvm_unreachable("Unknown instruction!!");
           }
         }
       }
@@ -9266,7 +9170,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
           return ReplaceInstUsesWith(SI, FalseVal);
         // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
-        Constant *AdjustedRHS = SubOne(CI, Context);
+        Constant *AdjustedRHS = SubOne(CI);
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
             (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
           Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9286,7 +9190,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
           return ReplaceInstUsesWith(SI, FalseVal);
         // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
-        Constant *AdjustedRHS = AddOne(CI, Context);
+        Constant *AdjustedRHS = AddOne(CI);
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
             (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
           Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9323,10 +9227,10 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
             (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
           Value *In = ICI->getOperand(0);
-          Value *Sh = Context->getConstantInt(In->getType(),
+          Value *Sh = ConstantInt::get(In->getType(),
                                        In->getType()->getScalarSizeInBits()-1);
           In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
-                                                          In->getName()+".lobit"),
+                                                        In->getName()+".lobit"),
                                    *ICI);
           if (In->getType() != SI.getType())
             In = CastInst::CreateIntegerCast(In, SI.getType(),
@@ -9365,6 +9269,14 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   return Changed ? &SI : 0;
 }
 
+/// isDefinedInBB - Return true if the value is an instruction defined in the
+/// specified basicblock.
+static bool isDefinedInBB(const Value *V, const BasicBlock *BB) {
+  const Instruction *I = dyn_cast<Instruction>(V);
+  return I != 0 && I->getParent() == BB;
+}
+
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -9390,7 +9302,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return ReplaceInstUsesWith(SI, FalseVal);
   }
 
-  if (SI.getType() == Type::Int1Ty) {
+  if (SI.getType() == Type::getInt1Ty(*Context)) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
@@ -9438,26 +9350,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
       if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
-
-        // (x <s 0) ? -1 : 0 -> ashr x, 31
-        if (TrueValC->isAllOnesValue() && FalseValC->isZero())
-          if (ConstantInt *CmpCst = dyn_cast<ConstantInt>(IC->getOperand(1))) {
-            if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) {
-              // The comparison constant and the result are not neccessarily the
-              // same width. Make an all-ones value by inserting a AShr.
-              Value *X = IC->getOperand(0);
-              uint32_t Bits = X->getType()->getScalarSizeInBits();
-              Constant *ShAmt = Context->getConstantInt(X->getType(), Bits-1);
-              Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
-                                                        ShAmt, "ones");
-              InsertNewInstBefore(SRA, SI);
-
-              // Then cast to the appropriate width.
-              return CastInst::CreateIntegerCast(SRA, SI.getType(), true);
-            }
-          }
-
-
         // If one of the constants is zero (we know they can't both be) and we
         // have an icmp instruction with zero, and we have an 'and' with the
         // non-constant value, eliminate this whole mess.  This corresponds to
@@ -9568,10 +9460,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             //        select C, (add X, Y), (sub X, Z)
             Value *NegVal;  // Compute -Z
             if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
-              NegVal = Context->getConstantExprNeg(C);
+              NegVal = ConstantExpr::getNeg(C);
             } else {
               NegVal = InsertNewInstBefore(
-                    BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
+                    BinaryOperator::CreateNeg(SubOp->getOperand(1),
+                                              "tmp"), SI);
             }
 
             Value *NewTrueOp = OtherAddOp;
@@ -9595,6 +9488,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return FoldI;
   }
 
+  // See if we can fold the select into a phi node.  The true/false values have
+  // to be live in the predecessor blocks.  If they are instructions in SI's
+  // block, we can't map to the predecessor.
+  if (isa<PHINode>(SI.getCondition()) &&
+      (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) ||
+       isa<PHINode>(SI.getTrueValue())) &&
+      (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) ||
+       isa<PHINode>(SI.getFalseValue())))
+    if (Instruction *NV = FoldOpIntoPhi(SI))
+      return NV;
+
   if (BinaryOperator::isNot(CondVal)) {
     SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
     SI.setOperand(1, FalseVal);
@@ -9617,7 +9521,7 @@ static unsigned EnforceKnownAlignment(Value *V,
   User *U = dyn_cast<User>(V);
   if (!U) return Align;
 
-  switch (getOpcode(U)) {
+  switch (Operator::getOpcode(U)) {
   default: break;
   case Instruction::BitCast:
     return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
@@ -9650,16 +9554,13 @@ static unsigned EnforceKnownAlignment(Value *V,
         Align = PrefAlign;
       }
     }
-  } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
-    // If there is a requested alignment and if this is an alloca, round up.  We
-    // don't do this for malloc, because some systems can't respect the request.
-    if (isa<AllocaInst>(AI)) {
-      if (AI->getAlignment() >= PrefAlign)
-        Align = AI->getAlignment();
-      else {
-        AI->setAlignment(PrefAlign);
-        Align = PrefAlign;
-      }
+  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      Align = AI->getAlignment();
+    else {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
     }
   }
 
@@ -9694,7 +9595,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   unsigned CopyAlign = MI->getAlignment();
 
   if (CopyAlign < MinAlign) {
-    MI->setAlignment(MinAlign);
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
+                                             MinAlign, false));
     return MI;
   }
   
@@ -9715,7 +9617,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   
   // Use an integer load+store unless we can find something better.
   Type *NewPtrTy =
-                Context->getPointerTypeUnqual(Context->getIntegerType(Size<<3));
+                PointerType::getUnqual(IntegerType::get(*Context, Size<<3));
   
   // Memcpy forces the use of i8* for the source and destination.  That means
   // that if you're using memcpy to move one double around, you'll get a cast
@@ -9725,7 +9627,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   // integer datatype.
   if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
     const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
-    if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
       // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
       // down through these levels if so.
       while (!SrcETy->isSingleValueType()) {
@@ -9744,7 +9646,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
       }
       
       if (SrcETy->isSingleValueType())
-        NewPtrTy = Context->getPointerTypeUnqual(SrcETy);
+        NewPtrTy = PointerType::getUnqual(SrcETy);
     }
   }
   
@@ -9754,28 +9656,29 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   SrcAlign = std::max(SrcAlign, CopyAlign);
   DstAlign = std::max(DstAlign, CopyAlign);
   
-  Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
-  Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
+  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
   Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
   InsertNewInstBefore(L, *MI);
   InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
 
   // Set the size of the copy to 0, it will be deleted on the next iteration.
-  MI->setOperand(3, Context->getNullValue(MemOpLength->getType()));
+  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
   return MI;
 }
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
   if (MI->getAlignment() < Alignment) {
-    MI->setAlignment(Alignment);
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+                                             Alignment, false));
     return MI;
   }
   
   // Extract the length and alignment and fill if they are constant.
   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
-  if (!LenC || !FillC || FillC->getType() != Type::Int8Ty)
+  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context))
     return 0;
   uint64_t Len = LenC->getZExtValue();
   Alignment = MI->getAlignment();
@@ -9785,21 +9688,21 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
-    const Type *ITy = Context->getIntegerType(Len*8);  // n=1 -> i8.
+    const Type *ITy = IntegerType::get(*Context, Len*8);  // n=1 -> i8.
     
     Value *Dest = MI->getDest();
-    Dest = InsertBitCastBefore(Dest, Context->getPointerTypeUnqual(ITy), *MI);
+    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
 
     // Alignment 0 is identity for alignment 1 for memset, but not store.
     if (Alignment == 0) Alignment = 1;
     
     // Extract the fill value and store.
     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
-    InsertNewInstBefore(new StoreInst(Context->getConstantInt(ITy, Fill),
+    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
                                       Dest, false, Alignment), *MI);
     
     // Set the size of the copy to 0, it will be deleted on the next iteration.
-    MI->setLength(Context->getNullValue(LenC->getType()));
+    MI->setLength(Constant::getNullValue(LenC->getType()));
     return MI;
   }
 
@@ -9820,8 +9723,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     return &CI;
   }
   
-  
-  
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
   if (!II) return visitCallSite(&CI);
   
@@ -9891,9 +9792,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn PPC lvx     -> load if the pointer is known aligned.
     // Turn X86 loadups -> load if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
-      Value *Ptr = InsertBitCastBefore(II->getOperand(1),
-                                   Context->getPointerTypeUnqual(II->getType()),
-                                       CI);
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+                                         PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
     }
     break;
@@ -9902,8 +9802,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn stvx -> store if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
       const Type *OpPtrTy = 
-        Context->getPointerTypeUnqual(II->getOperand(1)->getType());
-      Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+        PointerType::getUnqual(II->getOperand(1)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
       return new StoreInst(II->getOperand(1), Ptr);
     }
     break;
@@ -9913,8 +9813,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn X86 storeu -> store if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
       const Type *OpPtrTy = 
-        Context->getPointerTypeUnqual(II->getOperand(2)->getType());
-      Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+        PointerType::getUnqual(II->getOperand(2)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
       return new StoreInst(II->getOperand(2), Ptr);
     }
     break;
@@ -9951,9 +9851,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       
       if (AllEltsOk) {
         // Cast the input vectors to byte vectors.
-        Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
-        Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
-        Value *Result = Context->getUndef(Op0->getType());
+        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
+        Value *Result = UndefValue::get(Op0->getType());
         
         // Only extract each element once.
         Value *ExtractedElts[32];
@@ -9966,16 +9866,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           Idx &= 31;  // Match the hardware behavior.
           
           if (ExtractedElts[Idx] == 0) {
-            Instruction *Elt = 
-              new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
-            InsertNewInstBefore(Elt, CI);
-            ExtractedElts[Idx] = Elt;
+            ExtractedElts[Idx] = 
+              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 
+                  ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false),
+                                            "tmp");
           }
         
           // Insert this value into the result vector.
-          Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
-                                             i, "tmp");
-          InsertNewInstBefore(cast<Instruction>(Result), CI);
+          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+                         ConstantInt::get(Type::getInt32Ty(*Context), i, false),
+                                                "tmp");
         }
         return CastInst::Create(Instruction::BitCast, Result, CI.getType());
       }
@@ -9999,7 +9899,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     TerminatorInst *TI = II->getParent()->getTerminator();
     bool CannotRemove = false;
     for (++BI; &*BI != TI; ++BI) {
-      if (isa<AllocaInst>(BI)) {
+      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
         CannotRemove = true;
         break;
       }
@@ -10055,7 +9955,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
   if (!SrcTy->isSized() || !DstTy->isSized())
     return false;
-  if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
     return false;
   return true;
 }
@@ -10076,11 +9976,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       Instruction *OldCall = CS.getInstruction();
       // If the call and callee calling conventions don't match, this call must
       // be unreachable, as the call is undefined.
-      new StoreInst(Context->getConstantIntTrue(),
-                Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), 
+      new StoreInst(ConstantInt::getTrue(*Context),
+                UndefValue::get(Type::getInt1PtrTy(*Context)), 
                                   OldCall);
-      if (!OldCall->use_empty())
-        OldCall->replaceAllUsesWith(Context->getUndef(OldCall->getType()));
+      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!OldCall->getType()->isVoidTy())
+        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
       if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
         return EraseInstFromFunction(*OldCall);
       return 0;
@@ -10090,18 +9992,20 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     // This instruction is not reachable, just remove it.  We insert a store to
     // undef so that we know that this code is not reachable, despite the fact
     // that we can't modify the CFG here.
-    new StoreInst(Context->getConstantIntTrue(),
-               Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)),
+    new StoreInst(ConstantInt::getTrue(*Context),
+               UndefValue::get(Type::getInt1PtrTy(*Context)),
                   CS.getInstruction());
 
-    if (!CS.getInstruction()->use_empty())
+    // If CS dues not return void then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!CS.getInstruction()->getType()->isVoidTy())
       CS.getInstruction()->
-        replaceAllUsesWith(Context->getUndef(CS.getInstruction()->getType()));
+        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
       // Don't break the CFG, insert a dummy cond branch.
       BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
-                         Context->getConstantIntTrue(), II);
+                         ConstantInt::getTrue(*Context), II);
     }
     return EraseInstFromFunction(*CS.getInstruction());
   }
@@ -10165,13 +10069,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (Callee->isDeclaration() &&
         // Conversion is ok if changing from one pointer type to another or from
         // a pointer to an integer of the same size.
-        !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
-          (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
+        !((isa<PointerType>(OldRetTy) || !TD ||
+           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+          (isa<PointerType>(NewRetTy) || !TD ||
+           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
       return false;   // Cannot transform this return value.
 
     if (!Caller->use_empty() &&
         // void -> non-void is handled specially
-        NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
+        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
       return false;   // Cannot transform this return value.
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
@@ -10212,8 +10118,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     // Converting from one pointer type to another or between a pointer and an
     // integer of the same size is safe even if we do not have a body.
     bool isConvertible = ActTy == ParamTy ||
-      ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
-       (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
+      (TD && ((isa<PointerType>(ParamTy) ||
+      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+              (isa<PointerType>(ActTy) ||
+              ActTy == TD->getIntPtrType(Caller->getContext()))));
     if (Callee->isDeclaration() && !isConvertible) return false;
   }
 
@@ -10260,8 +10168,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     } else {
       Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
           false, ParamTy, false);
-      CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp");
-      Args.push_back(InsertNewInstBefore(NewCast, *Caller));
+      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
     }
 
     // Add any parameter attributes.
@@ -10270,26 +10177,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   }
 
   // If the function takes more arguments than the call was taking, add them
-  // now...
+  // now.
   for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
-    Args.push_back(Context->getNullValue(FT->getParamType(i)));
+    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
 
-  // If we are removing arguments to the function, emit an obnoxious warning...
+  // If we are removing arguments to the function, emit an obnoxious warning.
   if (FT->getNumParams() < NumActualArgs) {
     if (!FT->isVarArg()) {
-      cerr << "WARNING: While resolving call to function '"
-           << Callee->getName() << "' arguments were dropped!\n";
+      errs() << "WARNING: While resolving call to function '"
+             << Callee->getName() << "' arguments were dropped!\n";
     } else {
-      // Add all of the arguments in their promoted form to the arg list...
+      // Add all of the arguments in their promoted form to the arg list.
       for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
         const Type *PTy = getPromotedType((*AI)->getType());
         if (PTy != (*AI)->getType()) {
           // Must promote to pass through va_arg area!
-          Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, 
-                                                                PTy, false);
-          Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp");
-          InsertNewInstBefore(Cast, *Caller);
-          Args.push_back(Cast);
+          Instruction::CastOps opcode =
+            CastInst::getCastOpcode(*AI, false, PTy, false);
+          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
         } else {
           Args.push_back(*AI);
         }
@@ -10304,10 +10209,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
     attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
 
-  if (NewRetTy == Type::VoidTy)
+  if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
 
-  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end());
+  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+                                                     attrVec.end());
 
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -10329,7 +10235,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // Insert a cast of the return type as necessary.
   Value *NV = NC;
   if (OldRetTy != NV->getType() && !Caller->use_empty()) {
-    if (NV->getType() != Type::VoidTy) {
+    if (!NV->getType()->isVoidTy()) {
       Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
                                                             OldRetTy, false);
       NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
@@ -10343,16 +10249,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         // Otherwise, it's a call, just insert cast right after the call instr
         InsertNewInstBefore(NC, *Caller);
       }
-      AddUsersToWorkList(*Caller);
+      Worklist.AddUsersToWorkList(*Caller);
     } else {
-      NV = Context->getUndef(Caller->getType());
+      NV = UndefValue::get(Caller->getType());
     }
   }
 
-  if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+
+  if (!Caller->use_empty())
     Caller->replaceAllUsesWith(NV);
-  Caller->eraseFromParent();
-  RemoveFromWorkList(Caller);
+  
+  EraseInstFromFunction(*Caller);
   return true;
 }
 
@@ -10469,14 +10376,14 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
 
       // Replace the trampoline call with a direct call.  Let the generic
       // code sort out any function type mismatches.
-      FunctionType *NewFTy =
-                       Context->getFunctionType(FTy->getReturnType(), NewTypes, 
+      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
                                                 FTy->isVarArg());
       Constant *NewCallee =
-        NestF->getType() == Context->getPointerTypeUnqual(NewFTy) ?
-        NestF : Context->getConstantExprBitCast(NestF, 
-                                         Context->getPointerTypeUnqual(NewFTy));
-      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end());
+        NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, 
+                                         PointerType::getUnqual(NewFTy));
+      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+                                                   NewAttrs.end());
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -10495,10 +10402,10 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
           setCallingConv(cast<CallInst>(Caller)->getCallingConv());
         cast<CallInst>(NewCaller)->setAttributes(NewPAL);
       }
-      if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+      if (!Caller->getType()->isVoidTy())
         Caller->replaceAllUsesWith(NewCaller);
       Caller->eraseFromParent();
-      RemoveFromWorkList(Caller);
+      Worklist.Remove(Caller);
       return 0;
     }
   }
@@ -10508,13 +10415,13 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   // code sort out any function type mismatches.
   Constant *NewCallee =
     NestF->getType() == PTy ? NestF : 
-                              Context->getConstantExprBitCast(NestF, PTy);
+                              ConstantExpr::getBitCast(NestF, PTy);
   CS.setCalledFunction(NewCallee);
   return CS.getInstruction();
 }
 
-/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)]
-/// and if a/b/c/d and the add's all have a single use, turn this into two phi's
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
 /// and a single binop.
 Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
@@ -10526,8 +10433,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   const Type *LHSType = LHSVal->getType();
   const Type *RHSType = RHSVal->getType();
   
-  // Scan to see if all operands are the same opcode, all have one use, and all
-  // kill their operands (i.e. the operands have one use).
+  // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
     if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
@@ -10547,6 +10453,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     if (I->getOperand(0) != LHSVal) LHSVal = 0;
     if (I->getOperand(1) != RHSVal) RHSVal = 0;
   }
+
+  // If both LHS and RHS would need a PHI, don't do this transformation,
+  // because it would increase the number of PHIs entering the block,
+  // which leads to higher register pressure. This is especially
+  // bad when the PHIs are in the header of a loop.
+  if (!LHSVal && !RHSVal)
+    return 0;
   
   // Otherwise, this is safe to transform!
   
@@ -10589,8 +10502,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
     return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
-  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal,
-                         RHSVal);
+  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                         LHSVal, RHSVal);
 }
 
 Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
@@ -10601,9 +10514,13 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // This is true if all GEP bases are allocas and if all indices into them are
   // constants.
   bool AllBasePointersAreAllocas = true;
+
+  // We don't want to replace this phi if the replacement would require
+  // more than one phi, which leads to higher register pressure. This is
+  // especially bad when the PHIs are in the header of a loop.
+  bool NeededPhi = false;
   
-  // Scan to see if all operands are the same opcode, all have one use, and all
-  // kill their operands (i.e. the operands have one use).
+  // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
     if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
@@ -10632,7 +10549,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       
       if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
         return 0;
+
+      // If we already needed a PHI for an earlier operand, and another operand
+      // also requires a PHI, we'd be introducing more PHIs than we're
+      // eliminating, which increases register pressure on entry to the PHI's
+      // block.
+      if (NeededPhi)
+        return 0;
+
       FixedOperands[op] = 0;  // Needs a PHI.
+      NeededPhi = true;
     }
   }
   
@@ -10678,8 +10604,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   }
   
   Value *Base = FixedOperands[0];
-  return GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
-                                   FixedOperands.end());
+  return cast<GEPOperator>(FirstInst)->isInBounds() ?
+    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
+                                      FixedOperands.end()) :
+    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+                              FixedOperands.end());
 }
 
 
@@ -10836,7 +10765,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
     return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
   if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                            PhiVal, ConstantOp);
   assert(isa<LoadInst>(FirstInst) && "Unknown operation");
   
@@ -10929,7 +10858,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
       PotentiallyDeadPHIs.insert(&PN);
       if (DeadPHICycle(PU, PotentiallyDeadPHIs))
-        return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType()));
+        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
    
     // If this phi has a single use, and if that use just computes a value for
@@ -10941,7 +10870,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
     if (PHIUser->hasOneUse() &&
         (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
         PHIUser->use_back() == &PN) {
-      return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType()));
+      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
   }
 
@@ -10982,30 +10911,14 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   return 0;
 }
 
-static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
-                                   Instruction *InsertPoint,
-                                   InstCombiner *IC) {
-  unsigned PtrSize = DTy->getScalarSizeInBits();
-  unsigned VTySize = V->getType()->getScalarSizeInBits();
-  // We must cast correctly to the pointer type. Ensure that we
-  // sign extend the integer value if it is smaller as this is
-  // used for address computation.
-  Instruction::CastOps opcode = 
-     (VTySize < PtrSize ? Instruction::SExt :
-      (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc));
-  return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint);
-}
-
-
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
-  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P'
-  // If so, eliminate the noop.
+  // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops.
   if (GEP.getNumOperands() == 1)
     return ReplaceInstUsesWith(GEP, PtrOp);
 
   if (isa<UndefValue>(GEP.getOperand(0)))
-    return ReplaceInstUsesWith(GEP, Context->getUndef(GEP.getType()));
+    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
 
   bool HasZeroPointerIndex = false;
   if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
@@ -11015,78 +10928,48 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     return ReplaceInstUsesWith(GEP, PtrOp);
 
   // Eliminate unneeded casts for indices.
-  bool MadeChange = false;
-  
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
-       i != e; ++i, ++GTI) {
-    if (isa<SequentialType>(*GTI)) {
-      if (CastInst *CI = dyn_cast<CastInst>(*i)) {
-        if (CI->getOpcode() == Instruction::ZExt ||
-            CI->getOpcode() == Instruction::SExt) {
-          const Type *SrcTy = CI->getOperand(0)->getType();
-          // We can eliminate a cast from i32 to i64 iff the target 
-          // is a 32-bit pointer target.
-          if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {
-            MadeChange = true;
-            *i = CI->getOperand(0);
-          }
-        }
-      }
+  if (TD) {
+    bool MadeChange = false;
+    unsigned PtrSize = TD->getPointerSizeInBits();
+    
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+         I != E; ++I, ++GTI) {
+      if (!isa<SequentialType>(*GTI)) continue;
+      
       // If we are using a wider index than needed for this platform, shrink it
-      // to what we need.  If narrower, sign-extend it to what we need.
-      // If the incoming value needs a cast instruction,
-      // insert it.  This explicit cast can make subsequent optimizations more
-      // obvious.
-      Value *Op = *i;
-      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
-        if (Constant *C = dyn_cast<Constant>(Op)) {
-          *i = Context->getConstantExprTrunc(C, TD->getIntPtrType());
-          MadeChange = true;
-        } else {
-          Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
-                                GEP);
-          *i = Op;
-          MadeChange = true;
-        }
-      } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) {
-        if (Constant *C = dyn_cast<Constant>(Op)) {
-          *i = Context->getConstantExprSExt(C, TD->getIntPtrType());
-          MadeChange = true;
-        } else {
-          Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(),
-                                GEP);
-          *i = Op;
-          MadeChange = true;
-        }
-      }
+      // to what we need.  If narrower, sign-extend it to what we need.  This
+      // explicit cast can make subsequent optimizations more obvious.
+      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
+      if (OpBits == PtrSize)
+        continue;
+      
+      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
+      MadeChange = true;
     }
+    if (MadeChange) return &GEP;
   }
-  if (MadeChange) return &GEP;
 
   // Combine Indices - If the source pointer to this getelementptr instruction
   // is a getelementptr instruction, combine the indices of the two
   // getelementptr instructions into a single instruction.
   //
-  SmallVector<Value*, 8> SrcGEPOperands;
-  if (User *Src = dyn_castGetElementPtr(PtrOp))
-    SrcGEPOperands.append(Src->op_begin(), Src->op_end());
-
-  if (!SrcGEPOperands.empty()) {
+  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
     // Note that if our source is a gep chain itself that we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
     //
-    if (isa<GetElementPtrInst>(SrcGEPOperands[0]) &&
-        cast<Instruction>(SrcGEPOperands[0])->getNumOperands() == 2)
-      return 0;   // Wait until our source is folded to completion.
+    if (GetElementPtrInst *SrcGEP =
+          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+      if (SrcGEP->getNumOperands() == 2)
+        return 0;   // Wait until our source is folded to completion.
 
     SmallVector<Value*, 8> Indices;
 
     // Find out whether the last index in the source GEP is a sequential idx.
     bool EndsWithSequential = false;
-    for (gep_type_iterator I = gep_type_begin(*cast<User>(PtrOp)),
-           E = gep_type_end(*cast<User>(PtrOp)); I != E; ++I)
+    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+         I != E; ++I)
       EndsWithSequential = !isa<StructType>(*I);
 
     // Can we combine the two pointer arithmetics offsets?
@@ -11094,98 +10977,68 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // Replace: gep (gep %P, long B), long A, ...
       // With:    T = long A+B; gep %P, T, ...
       //
-      Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1);
-      if (SO1 == Context->getNullValue(SO1->getType())) {
+      Value *Sum;
+      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+      Value *GO1 = GEP.getOperand(1);
+      if (SO1 == Constant::getNullValue(SO1->getType())) {
         Sum = GO1;
-      } else if (GO1 == Context->getNullValue(GO1->getType())) {
+      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
         Sum = SO1;
       } else {
-        // If they aren't the same type, convert both to an integer of the
-        // target's pointer size.
-        if (SO1->getType() != GO1->getType()) {
-          if (Constant *SO1C = dyn_cast<Constant>(SO1)) {
-            SO1 =
-                Context->getConstantExprIntegerCast(SO1C, GO1->getType(), true);
-          } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) {
-            GO1 =
-                Context->getConstantExprIntegerCast(GO1C, SO1->getType(), true);
-          } else {
-            unsigned PS = TD->getPointerSizeInBits();
-            if (TD->getTypeSizeInBits(SO1->getType()) == PS) {
-              // Convert GO1 to SO1's type.
-              GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this);
-
-            } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) {
-              // Convert SO1 to GO1's type.
-              SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this);
-            } else {
-              const Type *PT = TD->getIntPtrType();
-              SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this);
-              GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this);
-            }
-          }
-        }
-        if (isa<Constant>(SO1) && isa<Constant>(GO1))
-          Sum = Context->getConstantExprAdd(cast<Constant>(SO1), 
-                                            cast<Constant>(GO1));
-        else {
-          Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
-          InsertNewInstBefore(cast<Instruction>(Sum), GEP);
-        }
+        // If they aren't the same type, then the input hasn't been processed
+        // by the loop above yet (which canonicalizes sequential index types to
+        // intptr_t).  Just avoid transforming this until the input has been
+        // normalized.
+        if (SO1->getType() != GO1->getType())
+          return 0;
+        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
       }
 
-      // Recycle the GEP we already have if possible.
-      if (SrcGEPOperands.size() == 2) {
-        GEP.setOperand(0, SrcGEPOperands[0]);
+      // Update the GEP in place if possible.
+      if (Src->getNumOperands() == 2) {
+        GEP.setOperand(0, Src->getOperand(0));
         GEP.setOperand(1, Sum);
         return &GEP;
-      } else {
-        Indices.insert(Indices.end(), SrcGEPOperands.begin()+1,
-                       SrcGEPOperands.end()-1);
-        Indices.push_back(Sum);
-        Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end());
       }
+      Indices.append(Src->op_begin()+1, Src->op_end()-1);
+      Indices.push_back(Sum);
+      Indices.append(GEP.op_begin()+2, GEP.op_end());
     } else if (isa<Constant>(*GEP.idx_begin()) &&
                cast<Constant>(*GEP.idx_begin())->isNullValue() &&
-               SrcGEPOperands.size() != 1) {
+               Src->getNumOperands() != 1) {
       // Otherwise we can do the fold if the first index of the GEP is a zero
-      Indices.insert(Indices.end(), SrcGEPOperands.begin()+1,
-                     SrcGEPOperands.end());
-      Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end());
+      Indices.append(Src->op_begin()+1, Src->op_end());
+      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
     }
 
     if (!Indices.empty())
-      return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(),
-                                       Indices.end(), GEP.getName());
-
-  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
-    // GEP of global variable.  If all of the indices for this GEP are
-    // constants, we can promote this to a constexpr instead of an instruction.
-
-    // Scan for nonconstants...
-    SmallVector<Constant*, 8> Indices;
-    User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end();
-    for (; I != E && isa<Constant>(*I); ++I)
-      Indices.push_back(cast<Constant>(*I));
-
-    if (I == E) {  // If they are all constants...
-      Constant *CE = Context->getConstantExprGetElementPtr(GV,
-                                                    &Indices[0],Indices.size());
-
-      // Replace all uses of the GEP with the new constexpr...
-      return ReplaceInstUsesWith(GEP, CE);
-    }
-  } else if (Value *X = getBitCastOperand(PtrOp)) {  // Is the operand a cast?
-    if (!isa<PointerType>(X->getType())) {
-      // Not interesting.  Source pointer must be a cast from pointer.
-    } else if (HasZeroPointerIndex) {
-      // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
-      // into     : GEP [10 x i8]* X, i32 0, ...
-      //
-      // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
-      //           into     : GEP i8* X, ...
-      // 
-      // This occurs when the program declares an array extern like "int X[];"
+      return (cast<GEPOperator>(&GEP)->isInBounds() &&
+              Src->isInBounds()) ?
+        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+                                          Indices.end(), GEP.getName()) :
+        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+                                  Indices.end(), GEP.getName());
+  }
+  
+  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+  if (Value *X = getBitCastOperand(PtrOp)) {
+    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
+
+    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 
+    // want to change the gep until the bitcasts are eliminated.
+    if (getBitCastOperand(X)) {
+      Worklist.AddValue(PtrOp);
+      return 0;
+    }
+    
+    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+    // into     : GEP [10 x i8]* X, i32 0, ...
+    //
+    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+    //           into     : GEP i8* X, ...
+    // 
+    // This occurs when the program declares an array extern like "int X[];"
+    if (HasZeroPointerIndex) {
       const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
       const PointerType *XTy = cast<PointerType>(X->getType());
       if (const ArrayType *CATy =
@@ -11194,10 +11047,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         if (CATy->getElementType() == XTy->getElementType()) {
           // -> GEP i8* X, ...
           SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
-          return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
-                                           GEP.getName());
-        } else if (const ArrayType *XATy =
-                 dyn_cast<ArrayType>(XTy->getElementType())) {
+          return cast<GEPOperator>(&GEP)->isInBounds() ?
+            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
+                                              GEP.getName()) :
+            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
+                                      GEP.getName());
+        }
+        
+        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
           // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
           if (CATy->getElementType() == XATy->getElementType()) {
             // -> GEP [10 x i8]* X, i32 0, ...
@@ -11216,16 +11073,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
       const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
       const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
-      if (isa<ArrayType>(SrcElTy) &&
+      if (TD && isa<ArrayType>(SrcElTy) &&
           TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
         Value *Idx[2];
-        Idx[0] = Context->getNullValue(Type::Int32Ty);
+        Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
         Idx[1] = GEP.getOperand(1);
-        Value *V = InsertNewInstBefore(
-               GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP);
+        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
+          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
         // V and GEP are both pointer types --> BitCast
-        return new BitCastInst(V, GEP.getType());
+        return new BitCastInst(NewGEP, GEP.getType());
       }
       
       // Transform things like:
@@ -11233,7 +11091,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
       
-      if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) {
+      if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) {
         uint64_t ArrayEltSize =
             TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
         
@@ -11243,17 +11101,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         ConstantInt *Scale = 0;
         if (ArrayEltSize == 1) {
           NewIdx = GEP.getOperand(1);
-          Scale = 
-               Context->getConstantInt(cast<IntegerType>(NewIdx->getType()), 1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
         } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
-          NewIdx = Context->getConstantInt(CI->getType(), 1);
+          NewIdx = ConstantInt::get(CI->getType(), 1);
           Scale = CI;
         } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
           if (Inst->getOpcode() == Instruction::Shl &&
               isa<ConstantInt>(Inst->getOperand(1))) {
             ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
             uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = Context->getConstantInt(cast<IntegerType>(Inst->getType()),
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
                                      1ULL << ShAmtVal);
             NewIdx = Inst->getOperand(0);
           } else if (Inst->getOpcode() == Instruction::Mul &&
@@ -11269,23 +11126,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // operation after making sure Scale doesn't have the sign bit set.
         if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
             Scale->getZExtValue() % ArrayEltSize == 0) {
-          Scale = Context->getConstantInt(Scale->getType(),
+          Scale = ConstantInt::get(Scale->getType(),
                                    Scale->getZExtValue() / ArrayEltSize);
           if (Scale->getZExtValue() != 1) {
-            Constant *C =
-                   Context->getConstantExprIntegerCast(Scale, NewIdx->getType(),
+            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
                                                        false /*ZExt*/);
-            Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
-            NewIdx = InsertNewInstBefore(Sc, GEP);
+            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
           }
 
           // Insert the new GEP instruction.
           Value *Idx[2];
-          Idx[0] = Context->getNullValue(Type::Int32Ty);
+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
           Idx[1] = NewIdx;
-          Instruction *NewGEP =
-            GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName());
-          NewGEP = InsertNewInstBefore(NewGEP, GEP);
+          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
+            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
           // The NewGEP must be pointer typed, so must the old one -> BitCast
           return new BitCastInst(NewGEP, GEP.getType());
         }
@@ -11294,12 +11149,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   }
   
   /// See if we can simplify:
-  ///   X = bitcast A to B*
+  ///   X = bitcast A* to B*
   ///   Y = gep X, <...constant indices...>
   /// into a gep of the original struct.  This is important for SROA and alias
   /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
-    if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+    if (TD &&
+        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
       // Determine how much the GEP moves the pointer.  We are guaranteed to get
       // a constant back from EmitGEPOffset.
       ConstantInt *OffsetV =
@@ -11311,7 +11167,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (Offset == 0) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
-        if (isa<AllocationInst>(BCI->getOperand(0))) {
+        if (isa<AllocationInst>(BCI->getOperand(0)) ||
+            isMalloc(BCI->getOperand(0))) {
           // See if the bitcast simplifies, if so, don't nuke this GEP yet.
           if (Instruction *I = visitBitCast(*BCI)) {
             if (I != BCI) {
@@ -11332,11 +11189,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       const Type *InTy =
         cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
       if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) {
-        Instruction *NGEP =
-           GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(),
-                                     NewIndices.end());
-        if (NGEP->getType() == GEP.getType()) return NGEP;
-        InsertNewInstBefore(NGEP, GEP);
+        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end()) :
+          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+                             NewIndices.end());
+        
+        if (NGEP->getType() == GEP.getType())
+          return ReplaceInstUsesWith(GEP, NGEP);
         NGEP->takeName(&GEP);
         return new BitCastInst(NGEP, GEP.getType());
       }
@@ -11351,18 +11211,17 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
       const Type *NewTy = 
-        Context->getArrayType(AI.getAllocatedType(), C->getZExtValue());
+        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
       AllocationInst *New = 0;
 
       // Create and insert the replacement instruction...
       if (isa<MallocInst>(AI))
-        New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName());
+        New = Builder->CreateMalloc(NewTy, 0, AI.getName());
       else {
         assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
-        New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName());
+        New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       }
-
-      InsertNewInstBefore(New, AI);
+      New->setAlignment(AI.getAlignment());
 
       // Scan to the end of the allocation instructions, to skip over a block of
       // allocas if possible...also skip interleaved debug info
@@ -11373,27 +11232,27 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
       //
-      Value *NullIdx = Context->getNullValue(Type::Int32Ty);
+      Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context));
       Value *Idx[2];
       Idx[0] = NullIdx;
       Idx[1] = NullIdx;
-      Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2,
-                                           New->getName()+".sub", It);
+      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+                                                   New->getName()+".sub", It);
 
       // Now make everything use the getelementptr instead of the original
       // allocation.
       return ReplaceInstUsesWith(AI, V);
     } else if (isa<UndefValue>(AI.getArraySize())) {
-      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType()));
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
     }
   }
 
-  if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
     // If alloca'ing a zero byte object, replace the alloca with a null pointer.
     // Note that we only do this for alloca's, because malloc should allocate
     // and return a unique pointer, even for a zero byte allocation.
     if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
-      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType()));
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
 
     // If the alignment is 0 (unspecified), assign it the preferred alignment.
     if (AI.getAlignment() == 0)
@@ -11409,8 +11268,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
     // Insert a new store to null because we cannot modify the CFG here.
-    new StoreInst(Context->getConstantIntTrue(),
-           Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), &FI);
+    new StoreInst(ConstantInt::getTrue(*Context),
+           UndefValue::get(Type::getInt1PtrTy(*Context)), &FI);
     return EraseInstFromFunction(FI);
   }
   
@@ -11428,7 +11287,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   // Change free (gep X, 0,0,0,0) into free(X)
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
     if (GEPI->hasAllZeroIndices()) {
-      AddToWorkList(GEPI);
+      Worklist.Add(GEPI);
       FI.setOperand(0, GEPI->getOperand(0));
       return &FI;
     }
@@ -11440,6 +11299,21 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
       EraseInstFromFunction(FI);
       return EraseInstFromFunction(*MI);
     }
+  if (isMalloc(Op)) {
+    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
+      if (Op->hasOneUse() && CI->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        EraseInstFromFunction(*CI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    } else {
+      // Op is a call to malloc
+      if (Op->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    }
+  }
 
   return 0;
 }
@@ -11450,7 +11324,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
                                         const TargetData *TD) {
   User *CI = cast<User>(LI.getOperand(0));
   Value *CastOp = CI->getOperand(0);
-  LLVMContext* Context = IC.getContext();
+  LLVMContext *Context = IC.getContext();
 
   if (TD) {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
@@ -11479,7 +11353,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
             SingleChar = 0;
             StrVal = (StrVal << 8) | SingleChar;
           }
-          Value *NL = Context->getConstantInt(StrVal);
+          Value *NL = ConstantInt::get(*Context, StrVal);
           return IC.ReplaceInstUsesWith(LI, NL);
         }
       }
@@ -11505,26 +11379,26 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
           if (ASrcTy->getNumElements() != 0) {
             Value *Idxs[2];
-            Idxs[0] = Idxs[1] = Context->getNullValue(Type::Int32Ty);
-            CastOp = Context->getConstantExprGetElementPtr(CSrc, Idxs, 2);
+            Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context));
+            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
             SrcTy = cast<PointerType>(CastOp->getType());
             SrcPTy = SrcTy->getElementType();
           }
 
-      if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
+      if (IC.getTargetData() &&
+          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
             isa<VectorType>(SrcPTy)) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
           (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
-          IC.getTargetData().getTypeSizeInBits(SrcPTy) ==
-               IC.getTargetData().getTypeSizeInBits(DestPTy)) {
+          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
 
         // Okay, we are casting from one integer or pointer type to another of
         // the same size.  Instead of casting the pointer before the load, cast
         // the result of the loaded value.
-        Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp,
-                                                             CI->getName(),
-                                                         LI.isVolatile()),LI);
+        Value *NewLoad = 
+          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
         // Now cast the result of the load.
         return new BitCastInst(NewLoad, LI.getType());
       }
@@ -11537,14 +11411,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
   // Attempt to improve the alignment.
-  unsigned KnownAlign =
-    GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
-  if (KnownAlign >
-      (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
-                                LI.getAlignment()))
-    LI.setAlignment(KnownAlign);
-
-  // load (cast X) --> cast (load X) iff safe
+  if (TD) {
+    unsigned KnownAlign =
+      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+    if (KnownAlign >
+        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
+                                  LI.getAlignment()))
+      LI.setAlignment(KnownAlign);
+  }
+
+  // load (cast X) --> cast (load X) iff safe.
   if (isa<CastInst>(Op))
     if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
       return Res;
@@ -11562,29 +11438,28 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
     const Value *GEPI0 = GEPI->getOperand(0);
     // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(GEPI0) &&
-        cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) {
+    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
       // Insert a new store to null instruction before the load to indicate
       // that this code is not reachable.  We do this instead of inserting
       // an unreachable instruction directly because we cannot modify the
       // CFG.
-      new StoreInst(Context->getUndef(LI.getType()),
-                    Context->getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
   } 
 
   if (Constant *C = dyn_cast<Constant>(Op)) {
     // load null/undef -> undef
     // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<UndefValue>(C) || (C->isNullValue() && 
-        cast<PointerType>(Op->getType())->getAddressSpace() == 0)) {
+    if (isa<UndefValue>(C) ||
+        (C->isNullValue() && LI.getPointerAddressSpace() == 0)) {
       // Insert a new store to null instruction before the load to indicate that
       // this code is not reachable.  We do this instead of inserting an
       // unreachable instruction directly because we cannot modify the CFG.
-      new StoreInst(Context->getUndef(LI.getType()),
-                    Context->getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
 
     // Instcombine load (constant global) into the value loaded.
@@ -11605,9 +11480,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
           // that this code is not reachable.  We do this instead of inserting
           // an unreachable instruction directly because we cannot modify the
           // CFG.
-          new StoreInst(Context->getUndef(LI.getType()),
-                        Context->getNullValue(Op->getType()), &LI);
-          return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+          new StoreInst(UndefValue::get(LI.getType()),
+                        Constant::getNullValue(Op->getType()), &LI);
+          return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
         }
 
       } else if (CE->isCast()) {
@@ -11622,9 +11497,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
     if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
       if (GV->getInitializer()->isNullValue())
-        return ReplaceInstUsesWith(LI, Context->getNullValue(LI.getType()));
+        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
       else if (isa<UndefValue>(GV->getInitializer()))
-        return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
   }
 
@@ -11643,10 +11518,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
           isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
-        Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1),
-                                     SI->getOperand(1)->getName()+".val"), LI);
-        Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2),
-                                     SI->getOperand(2)->getName()+".val"), LI);
+        Value *V1 = Builder->CreateLoad(SI->getOperand(1),
+                                        SI->getOperand(1)->getName()+".val");
+        Value *V2 = Builder->CreateLoad(SI->getOperand(2),
+                                        SI->getOperand(2)->getName()+".val");
         return SelectInst::Create(SI->getCondition(), V1, V2);
       }
 
@@ -11674,7 +11549,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   User *CI = cast<User>(SI.getOperand(1));
   Value *CastOp = CI->getOperand(0);
-  LLVMContext* Context = IC.getContext();
 
   const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
   const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
@@ -11696,7 +11570,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   // constants.
   if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
     // Index through pointer.
-    Constant *Zero = Context->getNullValue(Type::Int32Ty);
+    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext()));
     NewGEPIndices.push_back(Zero);
     
     while (1) {
@@ -11713,7 +11587,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
       }
     }
     
-    SrcTy = Context->getPointerType(SrcPTy, SrcTy->getAddressSpace());
+    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
   if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
@@ -11721,10 +11595,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   // If the pointers point into different address spaces or if they point to
   // values with different sizes, we can't do the transformation.
-  if (SrcTy->getAddressSpace() != 
+  if (!IC.getTargetData() ||
+      SrcTy->getAddressSpace() != 
         cast<PointerType>(CI->getType())->getAddressSpace() ||
-      IC.getTargetData().getTypeSizeInBits(SrcPTy) !=
-      IC.getTargetData().getTypeSizeInBits(DestPTy))
+      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+      IC.getTargetData()->getTypeSizeInBits(DestPTy))
     return 0;
 
   // Okay, we are casting from one integer or pointer type to another of
@@ -11745,22 +11620,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   // SIOp0 is a pointer to aggregate and this is a store to the first field,
   // emit a GEP to index into its first field.
-  if (!NewGEPIndices.empty()) {
-    if (Constant *C = dyn_cast<Constant>(CastOp))
-      CastOp = Context->getConstantExprGetElementPtr(C, &NewGEPIndices[0], 
-                                              NewGEPIndices.size());
-    else
-      CastOp = IC.InsertNewInstBefore(
-              GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(),
-                                        NewGEPIndices.end()), SI);
-  }
+  if (!NewGEPIndices.empty())
+    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+                                           NewGEPIndices.end());
   
-  if (Constant *C = dyn_cast<Constant>(SIOp0))
-    NewCast = Context->getConstantExprCast(opcode, C, CastDstTy);
-  else
-    NewCast = IC.InsertNewInstBefore(
-      CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
-      SI);
+  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+                                   SIOp0->getName()+".c");
   return new StoreInst(NewCast, CastOp);
 }
 
@@ -11777,12 +11642,16 @@ static bool equivalentAddressValues(Value *A, Value *B) {
   if (A == B) return true;
   
   // Test if the values come form identical arithmetic instructions.
+  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+  // its only used to compare two uses within the same basic block, which
+  // means that they'll always either have the same value or one of them
+  // will have an undefined value.
   if (isa<BinaryOperator>(A) ||
       isa<CastInst>(A) ||
       isa<PHINode>(A) ||
       isa<GetElementPtrInst>(A))
     if (Instruction *BI = dyn_cast<Instruction>(B))
-      if (cast<Instruction>(A)->isIdenticalTo(BI))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
   
   // Otherwise they may not be equivalent.
@@ -11854,12 +11723,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   }
 
   // Attempt to improve the alignment.
-  unsigned KnownAlign =
-    GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
-  if (KnownAlign >
-      (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
-                                SI.getAlignment()))
-    SI.setAlignment(KnownAlign);
+  if (TD) {
+    unsigned KnownAlign =
+      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+    if (KnownAlign >
+        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
+                                  SI.getAlignment()))
+      SI.setAlignment(KnownAlign);
+  }
 
   // Do really simple DSE, to catch cases where there are several consecutive
   // stores to the same location, separated by a few arithmetic operations. This
@@ -11914,12 +11785,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
 
   // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr) &&
-      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
+  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
     if (!isa<UndefValue>(Val)) {
-      SI.setOperand(0, Context->getUndef(Val->getType()));
+      SI.setOperand(0, UndefValue::get(Val->getType()));
       if (Instruction *U = dyn_cast<Instruction>(Val))
-        AddToWorkList(U);  // Dropped a use.
+        Worklist.Add(U);  // Dropped a use.
       ++NumCombined;
     }
     return 0;  // Do not modify these!
@@ -12096,41 +11966,34 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
   // Cannonicalize fcmp_one -> fcmp_oeq
   FCmpInst::Predicate FPred; Value *Y;
   if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 
-                             TrueDest, FalseDest)))
-    if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
-         FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) {
-      FCmpInst *I = cast<FCmpInst>(BI.getCondition());
-      FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred);
-      Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I);
-      NewSCC->takeName(I);
-      // Swap Destinations and condition...
-      BI.setCondition(NewSCC);
+                             TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+        FPred == FCmpInst::FCMP_OGE) {
+      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+      
+      // Swap Destinations and condition.
       BI.setSuccessor(0, FalseDest);
       BI.setSuccessor(1, TrueDest);
-      RemoveFromWorkList(I);
-      I->eraseFromParent();
-      AddToWorkList(NewSCC);
+      Worklist.Add(Cond);
       return &BI;
     }
 
   // Cannonicalize icmp_ne -> icmp_eq
   ICmpInst::Predicate IPred;
   if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
-                      TrueDest, FalseDest)))
-    if ((IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
-         IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
-         IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) {
-      ICmpInst *I = cast<ICmpInst>(BI.getCondition());
-      ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred);
-      Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I);
-      NewSCC->takeName(I);
-      // Swap Destinations and condition...
-      BI.setCondition(NewSCC);
+                      TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
+        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+        IPred == ICmpInst::ICMP_SGE) {
+      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+      // Swap Destinations and condition.
       BI.setSuccessor(0, FalseDest);
       BI.setSuccessor(1, TrueDest);
-      RemoveFromWorkList(I);
-      I->eraseFromParent();;
-      AddToWorkList(NewSCC);
+      Worklist.Add(Cond);
       return &BI;
     }
 
@@ -12145,10 +12008,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
         // change 'switch (X+4) case 1:' into 'switch (X) case -3'
         for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
           SI.setOperand(i,
-                   Context->getConstantExprSub(cast<Constant>(SI.getOperand(i)),
+                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
                                                 AddRHS));
         SI.setOperand(0, I->getOperand(0));
-        AddToWorkList(I);
+        Worklist.Add(I);
         return &SI;
       }
   }
@@ -12163,10 +12026,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
 
   if (Constant *C = dyn_cast<Constant>(Agg)) {
     if (isa<UndefValue>(C))
-      return ReplaceInstUsesWith(EV, Context->getUndef(EV.getType()));
+      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
       
     if (isa<ConstantAggregateZero>(C))
-      return ReplaceInstUsesWith(EV, Context->getNullValue(EV.getType()));
+      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
 
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
       // Extract the element indexed by the first index out of the constant
@@ -12214,10 +12077,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // %E = insertvalue { i32 } %X, i32 42, 0
       // by switching the order of the insert and extract (though the
       // insertvalue should be left in, since it may have other uses).
-      Value *NewEV = InsertNewInstBefore(
-        ExtractValueInst::Create(IV->getAggregateOperand(),
-                                 EV.idx_begin(), EV.idx_end()),
-        EV);
+      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+                                                 EV.idx_begin(), EV.idx_end());
       return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
                                      insi, inse);
     }
@@ -12303,17 +12164,17 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
 /// value is already around as a register, for example if it were inserted then
 /// extracted from the vector.
 static Value *FindScalarElement(Value *V, unsigned EltNo,
-                                LLVMContext* Context) {
+                                LLVMContext *Context) {
   assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
   const VectorType *PTy = cast<VectorType>(V->getType());
   unsigned Width = PTy->getNumElements();
   if (EltNo >= Width)  // Out of range access.
-    return Context->getUndef(PTy->getElementType());
+    return UndefValue::get(PTy->getElementType());
   
   if (isa<UndefValue>(V))
-    return Context->getUndef(PTy->getElementType());
+    return UndefValue::get(PTy->getElementType());
   else if (isa<ConstantAggregateZero>(V))
-    return Context->getNullValue(PTy->getElementType());
+    return Constant::getNullValue(PTy->getElementType());
   else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
     return CP->getOperand(EltNo);
   else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
@@ -12339,7 +12200,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo,
     else if (InEl < LHSWidth*2)
       return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context);
     else
-      return Context->getUndef(PTy->getElementType());
+      return UndefValue::get(PTy->getElementType());
   }
   
   // Otherwise, we don't know.
@@ -12349,18 +12210,18 @@ static Value *FindScalarElement(Value *V, unsigned EltNo,
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is undef, replace extract with scalar undef.
   if (isa<UndefValue>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
+    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
 
   // If vector val is constant 0, replace extract with scalar 0.
   if (isa<ConstantAggregateZero>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Context->getNullValue(EI.getType()));
+    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
   
   if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
     // If vector val is constant with all elements the same, replace EI with
     // that element. When the elements are not identical, we cannot replace yet
     // (we do that below, but only when the index is constant).
     Constant *op0 = C->getOperand(0);
-    for (unsigned i = 1; i < C->getNumOperands(); ++i)
+    for (unsigned i = 1; i != C->getNumOperands(); ++i)
       if (C->getOperand(i) != op0) {
         op0 = 0; 
         break;
@@ -12373,13 +12234,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // find a previously computed scalar that was inserted into the vector.
   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
     unsigned IndexVal = IdxC->getZExtValue();
-    unsigned VectorWidth = 
-      cast<VectorType>(EI.getOperand(0)->getType())->getNumElements();
+    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
       
     // If this is extracting an invalid index, turn this into undef, to avoid
     // crashing the code below.
     if (IndexVal >= VectorWidth)
-      return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
+      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
     
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
@@ -12411,42 +12271,27 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   }
   
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
-    if (I->hasOneUse()) {
-      // Push extractelement into predecessor operation if legal and
-      // profitable to do so
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-        bool isConstantElt = isa<ConstantInt>(EI.getOperand(1));
-        if (CheapToScalarize(BO, isConstantElt)) {
-          ExtractElementInst *newEI0 = 
-            new ExtractElementInst(BO->getOperand(0), EI.getOperand(1),
-                                   EI.getName()+".lhs");
-          ExtractElementInst *newEI1 =
-            new ExtractElementInst(BO->getOperand(1), EI.getOperand(1),
-                                   EI.getName()+".rhs");
-          InsertNewInstBefore(newEI0, EI);
-          InsertNewInstBefore(newEI1, EI);
-          return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
-        }
-      } else if (isa<LoadInst>(I)) {
-        unsigned AS = 
-          cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
-        Value *Ptr = InsertBitCastBefore(I->getOperand(0),
-                                  Context->getPointerType(EI.getType(), AS),EI);
-        GetElementPtrInst *GEP =
-          GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
-        InsertNewInstBefore(GEP, EI);
-        return new LoadInst(GEP);
-      }
-    }
-    if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+    // Push extractelement into predecessor operation if legal and
+    // profitable to do so
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      if (I->hasOneUse() &&
+          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+        Value *newEI0 =
+          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                        EI.getName()+".lhs");
+        Value *newEI1 =
+          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                        EI.getName()+".rhs");
+        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+      }
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
       // Extracting the inserted element?
       if (IE->getOperand(2) == EI.getOperand(1))
         return ReplaceInstUsesWith(EI, IE->getOperand(1));
       // If the inserted and extracted elements are constants, they must not
       // be the same value, extract from the pre-inserted value instead.
-      if (isa<Constant>(IE->getOperand(2)) &&
-          isa<Constant>(EI.getOperand(1))) {
-        AddUsesToWorkList(EI);
+      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+        Worklist.AddValue(EI.getOperand(0));
         EI.setOperand(0, IE->getOperand(0));
         return &EI;
       }
@@ -12465,11 +12310,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           SrcIdx -= LHSWidth;
           Src = SVI->getOperand(1);
         } else {
-          return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
+          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
         }
-        return new ExtractElementInst(Src, SrcIdx);
+        return ExtractElementInst::Create(Src,
+                         ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx,
+                                          false));
       }
     }
+    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
   }
   return 0;
 }
@@ -12479,21 +12327,21 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 /// Otherwise, return false.
 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                          std::vector<Constant*> &Mask,
-                                         LLVMContext* Context) {
+                                         LLVMContext *Context) {
   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty));
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));
     return true;
   } else if (V == LHS) {
     for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i));
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));
     return true;
   } else if (V == RHS) {
     for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i+NumElts));
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts));
     return true;
   } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
@@ -12510,7 +12358,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
       // transitively ok.
       if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {
         // If so, update the mask to reflect the inserted undef.
-        Mask[InsertedIdx] = Context->getUndef(Type::Int32Ty);
+        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context));
         return true;
       }      
     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
@@ -12527,11 +12375,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
             // If so, update the mask to reflect the inserted value.
             if (EI->getOperand(0) == LHS) {
               Mask[InsertedIdx % NumElts] = 
-                 Context->getConstantInt(Type::Int32Ty, ExtractedIdx);
+                 ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx);
             } else {
               assert(EI->getOperand(0) == RHS);
               Mask[InsertedIdx % NumElts] = 
-                Context->getConstantInt(Type::Int32Ty, ExtractedIdx+NumElts);
+                ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts);
               
             }
             return true;
@@ -12549,17 +12397,17 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
 /// that computes V and the LHS value of the shuffle.
 static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
-                                     Value *&RHS, LLVMContext* Context) {
+                                     Value *&RHS, LLVMContext *Context) {
   assert(isa<VectorType>(V->getType()) && 
          (RHS == 0 || V->getType() == RHS->getType()) &&
          "Invalid shuffle!");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty));
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));
     return V;
   } else if (isa<ConstantAggregateZero>(V)) {
-    Mask.assign(NumElts, Context->getConstantInt(Type::Int32Ty, 0));
+    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0));
     return V;
   } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
@@ -12580,7 +12428,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
           RHS = EI->getOperand(0);
           Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context);
           Mask[InsertedIdx % NumElts] = 
-            Context->getConstantInt(Type::Int32Ty, NumElts+ExtractedIdx);
+            ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx);
           return V;
         }
         
@@ -12590,7 +12438,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
           // Everything but the extracted element is replaced with the RHS.
           for (unsigned i = 0; i != NumElts; ++i) {
             if (i != InsertedIdx)
-              Mask[i] = Context->getConstantInt(Type::Int32Ty, NumElts+i);
+              Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i);
           }
           return V;
         }
@@ -12608,7 +12456,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
   
   // Otherwise, can't do anything fancy.  Return an identity vector.
   for (unsigned i = 0; i != NumElts; ++i)
-    Mask.push_back(Context->getConstantInt(Type::Int32Ty, i));
+    Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));
   return V;
 }
 
@@ -12635,45 +12483,23 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
         return ReplaceInstUsesWith(IE, VecOp);
       
       if (InsertedIdx >= NumVectorElts)  // Out of range insert.
-        return ReplaceInstUsesWith(IE, Context->getUndef(IE.getType()));
+        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
       
       // If we are extracting a value from a vector, then inserting it right
       // back into the same place, just use the input vector.
       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
         return ReplaceInstUsesWith(IE, VecOp);      
       
-      // We could theoretically do this for ANY input.  However, doing so could
-      // turn chains of insertelement instructions into a chain of shufflevector
-      // instructions, and right now we do not merge shufflevectors.  As such,
-      // only do this in a situation where it is clear that there is benefit.
-      if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) {
-        // Turn this into shuffle(EIOp0, VecOp, Mask).  The result has all of
-        // the values of VecOp, except then one read from EIOp0.
-        // Build a new shuffle mask.
-        std::vector<Constant*> Mask;
-        if (isa<UndefValue>(VecOp))
-          Mask.assign(NumVectorElts, Context->getUndef(Type::Int32Ty));
-        else {
-          assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
-          Mask.assign(NumVectorElts, Context->getConstantInt(Type::Int32Ty,
-                                                       NumVectorElts));
-        } 
-        Mask[InsertedIdx] = 
-                           Context->getConstantInt(Type::Int32Ty, ExtractedIdx);
-        return new ShuffleVectorInst(EI->getOperand(0), VecOp,
-                                     Context->getConstantVector(Mask));
-      }
-      
       // If this insertelement isn't used by some other insertelement, turn it
       // (and any insertelements it points to), into one big shuffle.
       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
         std::vector<Constant*> Mask;
         Value *RHS = 0;
         Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context);
-        if (RHS == 0) RHS = Context->getUndef(LHS->getType());
+        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
         // We now have a shuffle of LHS, RHS, Mask.
         return new ShuffleVectorInst(LHS, RHS,
-                                     Context->getConstantVector(Mask));
+                                     ConstantVector::get(Mask));
       }
     }
   }
@@ -12697,7 +12523,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(SVI.getOperand(2)))
-    return ReplaceInstUsesWith(SVI, Context->getUndef(SVI.getType()));
+    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
 
   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
 
@@ -12724,21 +12550,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     std::vector<Constant*> Elts;
     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
       if (Mask[i] >= 2*e)
-        Elts.push_back(Context->getUndef(Type::Int32Ty));
+        Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
       else {
         if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
             (Mask[i] <  e && isa<UndefValue>(LHS))) {
           Mask[i] = 2*e;     // Turn into undef.
-          Elts.push_back(Context->getUndef(Type::Int32Ty));
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
         } else {
           Mask[i] = Mask[i] % e;  // Force to LHS.
-          Elts.push_back(Context->getConstantInt(Type::Int32Ty, Mask[i]));
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i]));
         }
       }
     }
     SVI.setOperand(0, SVI.getOperand(1));
-    SVI.setOperand(1, Context->getUndef(RHS->getType()));
-    SVI.setOperand(2, Context->getConstantVector(Elts));
+    SVI.setOperand(1, UndefValue::get(RHS->getType()));
+    SVI.setOperand(2, ConstantVector::get(Elts));
     LHS = SVI.getOperand(0);
     RHS = SVI.getOperand(1);
     MadeChange = true;
@@ -12788,14 +12614,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
         std::vector<Constant*> Elts;
         for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
           if (NewMask[i] >= LHSInNElts*2) {
-            Elts.push_back(Context->getUndef(Type::Int32Ty));
+            Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
           } else {
-            Elts.push_back(Context->getConstantInt(Type::Int32Ty, NewMask[i]));
+            Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));
           }
         }
         return new ShuffleVectorInst(LHSSVI->getOperand(0),
                                      LHSSVI->getOperand(1),
-                                     Context->getConstantVector(Elts));
+                                     ConstantVector::get(Elts));
       }
     }
   }
@@ -12855,6 +12681,9 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
                                        const TargetData *TD) {
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
+  
+  std::vector<Instruction*> InstrsForInstCombineWorklist;
+  InstrsForInstCombineWorklist.reserve(128);
 
   while (!Worklist.empty()) {
     BB = Worklist.back();
@@ -12863,44 +12692,28 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
     // We have now visited this block!  If we've already been here, ignore it.
     if (!Visited.insert(BB)) continue;
 
-    DbgInfoIntrinsic *DBI_Prev = NULL;
     for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
       Instruction *Inst = BBI++;
       
       // DCE instruction if trivially dead.
       if (isInstructionTriviallyDead(Inst)) {
         ++NumDeadInst;
-        DOUT << "IC: DCE: " << *Inst;
+        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
         Inst->eraseFromParent();
         continue;
       }
       
       // ConstantProp instruction if trivially constant.
-      if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
-        DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst;
+      if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                     << *Inst << '\n');
         Inst->replaceAllUsesWith(C);
         ++NumConstProp;
         Inst->eraseFromParent();
         continue;
       }
-     
-      // If there are two consecutive llvm.dbg.stoppoint calls then
-      // it is likely that the optimizer deleted code in between these
-      // two intrinsics. 
-      DbgInfoIntrinsic *DBI_Next = dyn_cast<DbgInfoIntrinsic>(Inst);
-      if (DBI_Next) {
-        if (DBI_Prev
-            && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint
-            && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) {
-          IC.RemoveFromWorkList(DBI_Prev);
-          DBI_Prev->eraseFromParent();
-        }
-        DBI_Prev = DBI_Next;
-      } else {
-        DBI_Prev = 0;
-      }
 
-      IC.AddToWorkList(Inst);
+      InstrsForInstCombineWorklist.push_back(Inst);
     }
 
     // Recursively visit successors.  If this is a branch or switch on a
@@ -12932,14 +12745,22 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
       Worklist.push_back(TI->getSuccessor(i));
   }
+  
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                              InstrsForInstCombineWorklist.size());
 }
 
 bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
-  bool Changed = false;
-  TD = &getAnalysis<TargetData>();
+  MadeIRChange = false;
+  TD = getAnalysisIfAvailable<TargetData>();
   
-  DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
-             << F.getNameStr() << "\n");
+  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+        << F.getNameStr() << "\n");
 
   {
     // Do a depth-first traversal of the function, populate the worklist with
@@ -12957,71 +12778,73 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         while (Term != BB->begin()) {   // Remove instrs bottom-up
           BasicBlock::iterator I = Term; --I;
 
-          DOUT << "IC: DCE: " << *I;
+          DEBUG(errs() << "IC: DCE: " << *I << '\n');
           // A debug intrinsic shouldn't force another iteration if we weren't
           // going to do one without it.
           if (!isa<DbgInfoIntrinsic>(I)) {
             ++NumDeadInst;
-            Changed = true;
+            MadeIRChange = true;
           }
-          if (!I->use_empty())
-            I->replaceAllUsesWith(Context->getUndef(I->getType()));
+
+
+          // If I is not void type then replaceAllUsesWith undef.
+          // This allows ValueHandlers and custom metadata to adjust itself.
+          if (!I->getType()->isVoidTy())
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
           I->eraseFromParent();
         }
       }
   }
 
-  while (!Worklist.empty()) {
-    Instruction *I = RemoveOneFromWorkList();
+  while (!Worklist.isEmpty()) {
+    Instruction *I = Worklist.RemoveOne();
     if (I == 0) continue;  // skip null values.
 
     // Check to see if we can DCE the instruction.
     if (isInstructionTriviallyDead(I)) {
-      // Add operands to the worklist.
-      if (I->getNumOperands() < 4)
-        AddUsesToWorkList(*I);
+      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      EraseInstFromFunction(*I);
       ++NumDeadInst;
-
-      DOUT << "IC: DCE: " << *I;
-
-      I->eraseFromParent();
-      RemoveFromWorkList(I);
-      Changed = true;
+      MadeIRChange = true;
       continue;
     }
 
     // Instruction isn't dead, see if we can constant propagate it.
-    if (Constant *C = ConstantFoldInstruction(I, TD)) {
-      DOUT << "IC: ConstFold to: " << *C << " from: " << *I;
+    if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+      DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
       // Add operands to the worklist.
-      AddUsesToWorkList(*I);
       ReplaceInstUsesWith(*I, C);
-
       ++NumConstProp;
-      I->eraseFromParent();
-      RemoveFromWorkList(I);
-      Changed = true;
+      EraseInstFromFunction(*I);
+      MadeIRChange = true;
       continue;
     }
 
-    if (TD &&
-        (I->getType()->getTypeID() == Type::VoidTyID ||
-         I->isTrapping())) {
+    if (TD) {
       // See if we can constant fold its operands.
       for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
         if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i))
-          if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+          if (Constant *NewC = ConstantFoldConstantExpression(CE,   
+                                  F.getContext(), TD))
             if (NewC != CE) {
-              i->set(NewC);
-              Changed = true;
+              *i = NewC;
+              MadeIRChange = true;
             }
     }
 
     // See if we can trivially sink this instruction to a successor basic block.
     if (I->hasOneUse()) {
       BasicBlock *BB = I->getParent();
-      BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
+      Instruction *UserInst = cast<Instruction>(I->use_back());
+      BasicBlock *UserParent;
+      
+      // Get the block the use occurs in.
+      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+      else
+        UserParent = UserInst->getParent();
+      
       if (UserParent != BB) {
         bool UserIsSuccessor = false;
         // See if the user is one of our successors.
@@ -13034,31 +12857,34 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         // If the user is one of our immediate successors, and if that successor
         // only has us as a predecessors (we'd have to split the critical edge
         // otherwise), we can keep going.
-        if (UserIsSuccessor && !isa<PHINode>(I->use_back()) &&
-            next(pred_begin(UserParent)) == pred_end(UserParent))
+        if (UserIsSuccessor && UserParent->getSinglePredecessor())
           // Okay, the CFG is simple enough, try to sink this instruction.
-          Changed |= TryToSinkInstruction(I, UserParent);
+          MadeIRChange |= TryToSinkInstruction(I, UserParent);
       }
     }
 
-    // Now that we have an instruction, try combining it to simplify it...
+    // Now that we have an instruction, try combining it to simplify it.
+    Builder->SetInsertPoint(I->getParent(), I);
+    
 #ifndef NDEBUG
     std::string OrigI;
 #endif
-    DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str(););
+    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
     if (Instruction *Result = visit(*I)) {
       ++NumCombined;
       // Should we replace the old instruction with a new one?
       if (Result != I) {
-        DOUT << "IC: Old = " << *I
-             << "    New = " << *Result;
+        DEBUG(errs() << "IC: Old = " << *I << '\n'
+                     << "    New = " << *Result << '\n');
 
         // Everything uses the new instruction now.
         I->replaceAllUsesWith(Result);
 
         // Push the new instruction and any users onto the worklist.
-        AddToWorkList(Result);
-        AddUsersToWorkList(*Result);
+        Worklist.Add(Result);
+        Worklist.AddUsersToWorkList(*Result);
 
         // Move the name to the new instruction first.
         Result->takeName(I);
@@ -13073,52 +12899,42 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
         InstParent->getInstList().insert(InsertPos, Result);
 
-        // Make sure that we reprocess all operands now that we reduced their
-        // use counts.
-        AddUsesToWorkList(*I);
-
-        // Instructions can end up on the worklist more than once.  Make sure
-        // we do not process an instruction that has been deleted.
-        RemoveFromWorkList(I);
-
-        // Erase the old instruction.
-        InstParent->getInstList().erase(I);
+        EraseInstFromFunction(*I);
       } else {
 #ifndef NDEBUG
-        DOUT << "IC: Mod = " << OrigI
-             << "    New = " << *I;
+        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+                     << "    New = " << *I << '\n');
 #endif
 
         // If the instruction was modified, it's possible that it is now dead.
         // if so, remove it.
         if (isInstructionTriviallyDead(I)) {
-          // Make sure we process all operands now that we are reducing their
-          // use counts.
-          AddUsesToWorkList(*I);
-
-          // Instructions may end up in the worklist more than once.  Erase all
-          // occurrences of this instruction.
-          RemoveFromWorkList(I);
-          I->eraseFromParent();
+          EraseInstFromFunction(*I);
         } else {
-          AddToWorkList(I);
-          AddUsersToWorkList(*I);
+          Worklist.Add(I);
+          Worklist.AddUsersToWorkList(*I);
         }
       }
-      Changed = true;
+      MadeIRChange = true;
     }
   }
 
-  assert(WorklistMap.empty() && "Worklist empty, but map not?");
-    
-  // Do an explicit clear, this shrinks the map if needed.
-  WorklistMap.clear();
-  return Changed;
+  Worklist.Zap();
+  return MadeIRChange;
 }
 
 
 bool InstCombiner::runOnFunction(Function &F) {
   MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+  Context = &F.getContext();
+  
+  
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  IRBuilder<true, ConstantFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), ConstantFolder(F.getContext()),
+               InstCombineIRInserter(Worklist));
+  Builder = &TheBuilder;
   
   bool EverMadeChange = false;
 
@@ -13126,6 +12942,8 @@ bool InstCombiner::runOnFunction(Function &F) {
   unsigned Iteration = 0;
   while (DoOneIteration(F, Iteration++))
     EverMadeChange = true;
+  
+  Builder = 0;
   return EverMadeChange;
 }
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dee7bfba21dd..8b11edd891fd 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
@@ -26,13 +27,13 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumThreads, "Number of jumps threaded");
 STATISTIC(NumFolds,   "Number of terminators folded");
+STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");
 
 static cl::opt<unsigned>
 Threshold("jump-threading-threshold", 
@@ -56,7 +57,7 @@ namespace {
   /// In this case, the unconditional branch at the end of the first if can be
   /// revectored to the false side of the second if.
   ///
-  class VISIBILITY_HIDDEN JumpThreading : public FunctionPass {
+  class JumpThreading : public FunctionPass {
     TargetData *TD;
 #ifdef NDEBUG
     SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -68,15 +69,16 @@ namespace {
     JumpThreading() : FunctionPass(&ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
 
     bool runOnFunction(Function &F);
     void FindLoopHeaders(Function &F);
     
     bool ProcessBlock(BasicBlock *BB);
-    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
-                    unsigned JumpThreadCost);
+    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+    bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                          BasicBlock *PredBB);
+
     BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
     bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
     bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
@@ -99,8 +101,8 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 /// runOnFunction - Top level algorithm.
 ///
 bool JumpThreading::runOnFunction(Function &F) {
-  DOUT << "Jump threading on function '" << F.getNameStart() << "'\n";
-  TD = &getAnalysis<TargetData>();
+  DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
+  TD = getAnalysisIfAvailable<TargetData>();
   
   FindLoopHeaders(F);
   
@@ -119,8 +121,8 @@ bool JumpThreading::runOnFunction(Function &F) {
       // edges which simplifies the CFG.
       if (pred_begin(BB) == pred_end(BB) &&
           BB != &BB->getParent()->getEntryBlock()) {
-        DOUT << "  JT: Deleting dead block '" << BB->getNameStart()
-             << "' with terminator: " << *BB->getTerminator();
+        DEBUG(errs() << "  JT: Deleting dead block '" << BB->getName()
+              << "' with terminator: " << *BB->getTerminator() << '\n');
         LoopHeaders.erase(BB);
         DeleteDeadBlock(BB);
         Changed = true;
@@ -134,6 +136,48 @@ bool JumpThreading::runOnFunction(Function &F) {
   return EverChanged;
 }
 
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+  /// Ignore PHI nodes, these will be flattened when duplication happens.
+  BasicBlock::const_iterator I = BB->getFirstNonPHI();
+  
+  // Sum up the cost of each instruction until we get to the terminator.  Don't
+  // include the terminator because the copy won't include it.
+  unsigned Size = 0;
+  for (; !isa<TerminatorInst>(I); ++I) {
+    // Debugger intrinsics don't incur code size.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+    
+    // If this is a pointer->pointer bitcast, it is free.
+    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+      continue;
+    
+    // All other instructions count for at least one unit.
+    ++Size;
+    
+    // Calls are more expensive.  If they are non-intrinsic calls, we model them
+    // as having cost of 4.  If they are a non-vector intrinsic, we model them
+    // as having cost of 2 total, and if they are a vector intrinsic, we model
+    // them as having cost 1.
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<IntrinsicInst>(CI))
+        Size += 3;
+      else if (!isa<VectorType>(CI->getType()))
+        Size += 1;
+    }
+  }
+  
+  // Threading through a switch statement is particularly profitable.  If this
+  // block ends in a switch, decrease its cost to make it more likely to happen.
+  if (isa<SwitchInst>(I))
+    Size = Size > 6 ? Size-6 : 0;
+  
+  return Size;
+}
+
+
+
 /// FindLoopHeaders - We do not want jump threading to turn proper loop
 /// structures into irreducible loops.  Doing this breaks up the loop nesting
 /// hierarchy and pessimizes later transformations.  To prevent this from
@@ -173,52 +217,34 @@ BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
   if (CommonPreds.size() == 1)
     return CommonPreds[0];
     
-  DOUT << "  Factoring out " << CommonPreds.size()
-       << " common predecessors.\n";
+  DEBUG(errs() << "  Factoring out " << CommonPreds.size()
+        << " common predecessors.\n");
   return SplitBlockPredecessors(PN->getParent(),
                                 &CommonPreds[0], CommonPreds.size(),
                                 ".thr_comm", this);
 }
   
 
-/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
-  /// Ignore PHI nodes, these will be flattened when duplication happens.
-  BasicBlock::const_iterator I = BB->getFirstNonPHI();
-
-  // Sum up the cost of each instruction until we get to the terminator.  Don't
-  // include the terminator because the copy won't include it.
-  unsigned Size = 0;
-  for (; !isa<TerminatorInst>(I); ++I) {
-    // Debugger intrinsics don't incur code size.
-    if (isa<DbgInfoIntrinsic>(I)) continue;
-    
-    // If this is a pointer->pointer bitcast, it is free.
-    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
-      continue;
-    
-    // All other instructions count for at least one unit.
-    ++Size;
-    
-    // Calls are more expensive.  If they are non-intrinsic calls, we model them
-    // as having cost of 4.  If they are a non-vector intrinsic, we model them
-    // as having cost of 2 total, and if they are a vector intrinsic, we model
-    // them as having cost 1.
-    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (!isa<IntrinsicInst>(CI))
-        Size += 3;
-      else if (!isa<VectorType>(CI->getType()))
-        Size += 1;
-    }
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors.  This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+  TerminatorInst *BBTerm = BB->getTerminator();
+  unsigned MinSucc = 0;
+  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+  // Compute the successor with the minimum number of predecessors.
+  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+    TestBB = BBTerm->getSuccessor(i);
+    unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+    if (NumPreds < MinNumPreds)
+      MinSucc = i;
   }
   
-  // Threading through a switch statement is particularly profitable.  If this
-  // block ends in a switch, decrease its cost to make it more likely to happen.
-  if (isa<SwitchInst>(I))
-    Size = Size > 6 ? Size-6 : 0;
-  
-  return Size;
+  return MinSucc;
 }
 
 /// ProcessBlock - If there are any predecessors whose control can be threaded
@@ -262,39 +288,28 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // terminator to an unconditional branch.  This can occur due to threading in
   // other blocks.
   if (isa<ConstantInt>(Condition)) {
-    DOUT << "  In block '" << BB->getNameStart()
-         << "' folding terminator: " << *BB->getTerminator();
+    DEBUG(errs() << "  In block '" << BB->getName()
+          << "' folding terminator: " << *BB->getTerminator() << '\n');
     ++NumFolds;
     ConstantFoldTerminator(BB);
     return true;
   }
   
   // If the terminator is branching on an undef, we can pick any of the
-  // successors to branch to.  Since this is arbitrary, we pick the successor
-  // with the fewest predecessors.  This should reduce the in-degree of the
-  // others.
+  // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
   if (isa<UndefValue>(Condition)) {
-    TerminatorInst *BBTerm = BB->getTerminator();
-    unsigned MinSucc = 0;
-    BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
-    // Compute the successor with the minimum number of predecessors.
-    unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
-    for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
-      TestBB = BBTerm->getSuccessor(i);
-      unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
-      if (NumPreds < MinNumPreds)
-        MinSucc = i;
-    }
+    unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
     
     // Fold the branch/switch.
+    TerminatorInst *BBTerm = BB->getTerminator();
     for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
-      if (i == MinSucc) continue;
+      if (i == BestSucc) continue;
       BBTerm->getSuccessor(i)->removePredecessor(BB);
     }
     
-    DOUT << "  In block '" << BB->getNameStart()
-         << "' folding undef terminator: " << *BBTerm;
-    BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm);
+    DEBUG(errs() << "  In block '" << BB->getName()
+          << "' folding undef terminator: " << *BBTerm << '\n');
+    BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
     BBTerm->eraseFromParent();
     return true;
   }
@@ -419,8 +434,8 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   else if (PredBI->getSuccessor(0) != BB)
     BranchDir = false;
   else {
-    DOUT << "  In block '" << PredBB->getNameStart()
-         << "' folding terminator: " << *PredBB->getTerminator();
+    DEBUG(errs() << "  In block '" << PredBB->getName()
+          << "' folding terminator: " << *PredBB->getTerminator() << '\n');
     ++NumFolds;
     ConstantFoldTerminator(PredBB);
     return true;
@@ -431,29 +446,24 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   // If the dest block has one predecessor, just fix the branch condition to a
   // constant and fold it.
   if (BB->getSinglePredecessor()) {
-    DOUT << "  In block '" << BB->getNameStart()
-         << "' folding condition to '" << BranchDir << "': "
-         << *BB->getTerminator();
+    DEBUG(errs() << "  In block '" << BB->getName()
+          << "' folding condition to '" << BranchDir << "': "
+          << *BB->getTerminator() << '\n');
     ++NumFolds;
-    DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir));
+    Value *OldCond = DestBI->getCondition();
+    DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+                                          BranchDir));
     ConstantFoldTerminator(BB);
+    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
     return true;
   }
-  
-  // Otherwise we need to thread from PredBB to DestBB's successor which
-  // involves code duplication.  Check to see if it is worth it.
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
-  }
+ 
   
   // Next, figure out which successor we are threading to.
   BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
   
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+  return ThreadEdge(BB, PredBB, SuccBB);
 }
 
 /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
@@ -472,7 +482,6 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
   if (PredBB == DestBB)
     return false;
   
-  
   SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
   SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
 
@@ -508,8 +517,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
 
       // Otherwise, we're safe to make the change.  Make sure that the edge from
       // DestSI to DestSucc is not critical and has no PHI nodes.
-      DOUT << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI;
-      DOUT << "THROUGH: " << *DestSI;
+      DEBUG(errs() << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI);
+      DEBUG(errs() << "THROUGH: " << *DestSI);
 
       // If the destination has PHI nodes, just split the edge for updating
       // simplicity.
@@ -564,7 +573,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     
     // If the returned value is the load itself, replace with an undef. This can
     // only happen in dead loops.
-    if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType());
+    if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
     LI->replaceAllUsesWith(AvailableVal);
     LI->eraseFromParent();
     return true;
@@ -685,49 +694,74 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 }
 
 
-/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in
+/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
 /// the current block.  See if there are any simplifications we can do based on
 /// inputs to the phi node.
 /// 
 bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
-  // See if the phi node has any constant values.  If so, we can determine where
-  // the corresponding predecessor will branch.
-  ConstantInt *PredCst = 0;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i))))
-      break;
-  
-  // If no incoming value has a constant, we don't know the destination of any
-  // predecessors.
-  if (PredCst == 0)
-    return false;
-  
-  // See if the cost of duplicating this block is low enough.
   BasicBlock *BB = PN->getParent();
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
+  
+  // See if the phi node has any constant integer or undef values.  If so, we
+  // can determine where the corresponding predecessor will branch.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PredVal = PN->getIncomingValue(i);
+    
+    // Check to see if this input is a constant integer.  If so, the direction
+    // of the branch is predictable.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) {
+      // Merge any common predecessors that will act the same.
+      BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI);
+      
+      BasicBlock *SuccBB;
+      if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+        SuccBB = BI->getSuccessor(CI->isZero());
+      else {
+        SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+        SuccBB = SI->getSuccessor(SI->findCaseValue(CI));
+      }
+      
+      // Ok, try to thread it!
+      return ThreadEdge(BB, PredBB, SuccBB);
+    }
+    
+    // If the input is an undef, then it doesn't matter which way it will go.
+    // Pick an arbitrary dest and thread the edge.
+    if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) {
+      // Merge any common predecessors that will act the same.
+      BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV);
+      BasicBlock *SuccBB =
+        BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB));
+      
+      // Ok, try to thread it!
+      return ThreadEdge(BB, PredBB, SuccBB);
+    }
   }
   
-  // If so, we can actually do this threading.  Merge any common predecessors
-  // that will act the same.
-  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+  // If the incoming values are all variables, we don't know the destination of
+  // any predecessors.  However, if any of the predecessor blocks end in an
+  // unconditional branch, we can *duplicate* the jump into that block in order
+  // to further encourage jump threading and to eliminate cases where we have
+  // branch on a phi of an icmp (branch on icmp is much better).
+
+  // We don't want to do this tranformation for switches, because we don't
+  // really want to duplicate a switch.
+  if (isa<SwitchInst>(BB->getTerminator()))
+    return false;
   
-  // Next, figure out which successor we are threading to.
-  BasicBlock *SuccBB;
-  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
-    SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse());
-  else {
-    SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
-    SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst));
+  // Look for unconditional branch predecessors.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+      if (PredBr->isUnconditional() &&
+          // Try to duplicate BB into PredBB.
+          DuplicateCondBranchOnPHIIntoPred(BB, PredBB))
+        return true;
   }
-  
-  // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+
+  return false;
 }
 
+
 /// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
 /// whose condition is an AND/OR where one side is PN.  If PN has constant
 /// operands that permit us to evaluate the condition for some operand, thread
@@ -756,7 +790,8 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   // We can only do the simplification for phi nodes of 'false' with AND or
   // 'true' with OR.  See if we have any entries in the phi for this.
   unsigned PredNo = ~0U;
-  ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd);
+  ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+                                          !isAnd);
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) == PredCst) {
       PredNo = i;
@@ -768,14 +803,6 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   if (PredNo == ~0U)
     return false;
   
-  // See if the cost of duplicating this block is low enough.
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
-  }
-
   // If so, we can actually do this threading.  Merge any common predecessors
   // that will act the same.
   BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
@@ -787,7 +814,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
   
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+  return ThreadEdge(BB, PredBB, SuccBB);
 }
 
 /// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
@@ -795,15 +822,15 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
 /// result can not be determined, a null pointer is returned.
 static Constant *GetResultOfComparison(CmpInst::Predicate pred,
                                        Value *LHS, Value *RHS,
-                                       LLVMContext* Context) {
+                                       LLVMContext &Context) {
   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return Context->getConstantExprCompare(pred, CLHS, CRHS);
+      return ConstantExpr::getCompare(pred, CLHS, CRHS);
 
   if (LHS == RHS)
     if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
       return ICmpInst::isTrueWhenEqual(pred) ? 
-                 Context->getConstantIntTrue() : Context->getConstantIntFalse();
+                 ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);
 
   return 0;
 }
@@ -829,7 +856,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
     PredVal = PN->getIncomingValue(i);
     
     Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
-                                          RHS, Context);
+                                          RHS, Cmp->getContext());
     if (!Res) {
       PredVal = 0;
       continue;
@@ -854,14 +881,6 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   if (PredVal == 0)
     return false;
   
-  // See if the cost of duplicating this block is low enough.
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
-  }
-  
   // If so, we can actually do this threading.  Merge any common predecessors
   // that will act the same.
   BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
@@ -870,58 +889,77 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
   
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+  return ThreadEdge(BB, PredBB, SuccBB);
 }
 
 
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+                                            BasicBlock *OldPred,
+                                            BasicBlock *NewPred,
+                                     DenseMap<Instruction*, Value*> &ValueMap) {
+  for (BasicBlock::iterator PNI = PHIBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+    // Ok, we have a PHI node.  Figure out what the incoming value was for the
+    // DestBlock.
+    Value *IV = PN->getIncomingValueForBlock(OldPred);
+    
+    // Remap the value if necessary.
+    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+      if (I != ValueMap.end())
+        IV = I->second;
+    }
+    
+    PN->addIncoming(IV, NewPred);
+  }
+}
+
 /// ThreadEdge - We have decided that it is safe and profitable to thread an
 /// edge from PredBB to SuccBB across BB.  Transform the IR to reflect this
 /// change.
 bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, 
-                               BasicBlock *SuccBB, unsigned JumpThreadCost) {
-
+                               BasicBlock *SuccBB) {
   // If threading to the same block as we come from, we would infinite loop.
   if (SuccBB == BB) {
-    DOUT << "  Not threading across BB '" << BB->getNameStart()
-         << "' - would thread to self!\n";
+    DEBUG(errs() << "  Not threading across BB '" << BB->getName()
+          << "' - would thread to self!\n");
     return false;
   }
   
   // If threading this would thread across a loop header, don't thread the edge.
   // See the comments above FindLoopHeaders for justifications and caveats.
   if (LoopHeaders.count(BB)) {
-    DOUT << "  Not threading from '" << PredBB->getNameStart()
-         << "' across loop header BB '" << BB->getNameStart()
-         << "' to dest BB '" << SuccBB->getNameStart()
-         << "' - it might create an irreducible loop!\n";
+    DEBUG(errs() << "  Not threading from '" << PredBB->getName()
+          << "' across loop header BB '" << BB->getName()
+          << "' to dest BB '" << SuccBB->getName()
+          << "' - it might create an irreducible loop!\n");
     return false;
   }
 
-  // And finally, do it!
-  DOUT << "  Threading edge from '" << PredBB->getNameStart() << "' to '"
-       << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost
-       << ", across block:\n    "
-       << *BB << "\n";
-  
-  // Jump Threading can not update SSA properties correctly if the values
-  // defined in the duplicated block are used outside of the block itself.  For
-  // this reason, we spill all values that are used outside of BB to the stack.
-  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-    if (!I->isUsedOutsideOfBlock(BB))
-      continue;
-    
-    // We found a use of I outside of BB.  Create a new stack slot to
-    // break this inter-block usage pattern.
-    DemoteRegToStack(*I);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  if (JumpThreadCost > Threshold) {
+    DEBUG(errs() << "  Not threading BB '" << BB->getName()
+          << "' - Cost is too high: " << JumpThreadCost << "\n");
+    return false;
   }
- 
+  
+  // And finally, do it!
+  DEBUG(errs() << "  Threading edge from '" << PredBB->getName() << "' to '"
+        << SuccBB->getName() << "' with cost: " << JumpThreadCost
+        << ", across block:\n    "
+        << *BB << "\n");
+  
   // We are going to have to map operands from the original BB block to the new
   // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
   // account for entry from PredBB.
   DenseMap<Instruction*, Value*> ValueMapping;
   
-  BasicBlock *NewBB =
-    BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), 
+                                         BB->getName()+".thread", 
+                                         BB->getParent(), BB);
   NewBB->moveAfter(PredBB);
   
   BasicBlock::iterator BI = BB->begin();
@@ -932,7 +970,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   // mapping and using it to remap operands in the cloned instructions.
   for (; !isa<TerminatorInst>(BI); ++BI) {
     Instruction *New = BI->clone();
-    New->setName(BI->getNameStart());
+    New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
     ValueMapping[BI] = New;
    
@@ -951,21 +989,48 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   
   // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
   // PHI nodes for NewBB now.
-  for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) {
-    PHINode *PN = cast<PHINode>(PNI);
-    // Ok, we have a PHI node.  Figure out what the incoming value was for the
-    // DestBlock.
-    Value *IV = PN->getIncomingValueForBlock(BB);
-    
-    // Remap the value if necessary.
-    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
-      DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
-      if (I != ValueMapping.end())
-        IV = I->second;
+  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+  
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+      
+      UsesToRename.push_back(&UI.getUse());
     }
-    PN->addIncoming(IV, NewBB);
+    
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+    
+    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I);
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+    
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(errs() << "\n");
   }
   
+  
   // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
   // NewBB instead of BB.  This eliminates predecessors from BB, which requires
   // us to simplify any PHI nodes in BB.
@@ -982,7 +1047,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   BI = NewBB->begin();
   for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
     Instruction *Inst = BI++;
-    if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+    if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
       Inst->replaceAllUsesWith(C);
       Inst->eraseFromParent();
       continue;
@@ -995,3 +1060,120 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   ++NumThreads;
   return true;
 }
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                                     BasicBlock *PredBB) {
+  // If BB is a loop header, then duplicating this block outside the loop would
+  // cause us to transform this into an irreducible loop, don't do this.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB)) {
+    DEBUG(errs() << "  Not duplicating loop header '" << BB->getName()
+          << "' into predecessor block '" << PredBB->getName()
+          << "' - it might create an irreducible loop!\n");
+    return false;
+  }
+  
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  if (DuplicationCost > Threshold) {
+    DEBUG(errs() << "  Not duplicating BB '" << BB->getName()
+          << "' - Cost is too high: " << DuplicationCost << "\n");
+    return false;
+  }
+  
+  // Okay, we decided to do this!  Clone all the instructions in BB onto the end
+  // of PredBB.
+  DEBUG(errs() << "  Duplicating block '" << BB->getName() << "' into end of '"
+        << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
+        << DuplicationCost << " block is:" << *BB << "\n");
+  
+  // We are going to have to map operands from the original BB block into the
+  // PredBB block.  Evaluate PHI nodes in BB.
+  DenseMap<Instruction*, Value*> ValueMapping;
+  
+  BasicBlock::iterator BI = BB->begin();
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+  
+  BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  
+  // Clone the non-phi instructions of BB into PredBB, keeping track of the
+  // mapping and using it to remap operands in the cloned instructions.
+  for (; BI != BB->end(); ++BI) {
+    Instruction *New = BI->clone();
+    New->setName(BI->getName());
+    PredBB->getInstList().insert(OldPredBranch, New);
+    ValueMapping[BI] = New;
+    
+    // Remap operands to patch up intra-block references.
+    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
+  }
+  
+  // Check to see if the targets of the branch had PHI nodes. If so, we need to
+  // add entries to the PHI nodes for branch from PredBB now.
+  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+                                  ValueMapping);
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+                                  ValueMapping);
+  
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+      
+      UsesToRename.push_back(&UI.getUse());
+    }
+    
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+    
+    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+    
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I);
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+    
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(errs() << "\n");
+  }
+  
+  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+  // that we nuked.
+  BB->removePredecessor(PredBB);
+  
+  // Remove the unconditional branch at the end of the PredBB block.
+  OldPredBranch->eraseFromParent();
+  
+  ++NumDupes;
+  return true;
+}
+
+
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d6daeca1128c..756fbf3e7bd5 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -35,8 +35,8 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -46,8 +46,8 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/Statistic.h"
 #include <algorithm>
@@ -73,7 +73,7 @@ EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden,
                                   "global variables"));
 
 namespace {
-  struct VISIBILITY_HIDDEN LICM : public LoopPass {
+  struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     LICM() : LoopPass(&ID) {}
 
@@ -91,6 +91,7 @@ namespace {
       AU.addRequired<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominanceFrontier>();
+      AU.addPreservedID(LoopSimplifyID);
     }
 
     bool doFinalization() {
@@ -338,7 +339,6 @@ void LICM::SinkRegion(DomTreeNode *N) {
   }
 }
 
-
 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
 /// first order w.r.t the DominatorTree.  This allows us to visit definitions
@@ -389,9 +389,13 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // Don't hoist loads which have may-aliased stores in loop.
     unsigned Size = 0;
     if (LI->getType()->isSized())
-      Size = AA->getTargetData().getTypeStoreSize(LI->getType());
+      Size = AA->getTypeStoreSize(LI->getType());
     return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+    if (isa<DbgStopPointInst>(CI)) {
+      // Don't hoist/sink dbgstoppoints, we handle them separately
+      return false;
+    }
     // Handle obvious cases efficiently.
     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == AliasAnalysis::DoesNotAccessMemory)
@@ -465,7 +469,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
 /// position, and may either delete it or move it to outside of the loop.
 ///
 void LICM::sink(Instruction &I) {
-  DOUT << "LICM sinking instruction: " << I;
+  DEBUG(errs() << "LICM sinking instruction: " << I);
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
@@ -482,22 +486,27 @@ void LICM::sink(Instruction &I) {
     if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
       // Instruction is not used, just delete it.
       CurAST->deleteValue(&I);
-      if (!I.use_empty())  // If I has users in unreachable blocks, eliminate.
-        I.replaceAllUsesWith(Context->getUndef(I.getType()));
+      // If I has users in unreachable blocks, eliminate.
+      // If I is not void type then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!I.getType()->isVoidTy())
+        I.replaceAllUsesWith(UndefValue::get(I.getType()));
       I.eraseFromParent();
     } else {
       // Move the instruction to the start of the exit block, after any PHI
       // nodes in it.
       I.removeFromParent();
-
       BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
       ExitBlocks[0]->getInstList().insert(InsertPt, &I);
     }
   } else if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
-    if (!I.use_empty())  // If I has users in unreachable blocks, eliminate.
-      I.replaceAllUsesWith(Context->getUndef(I.getType()));
+    // If I has users in unreachable blocks, eliminate.
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I.getType()->isVoidTy())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
     I.eraseFromParent();
   } else {
     // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
@@ -507,7 +516,7 @@ void LICM::sink(Instruction &I) {
     // Firstly, we create a stack object to hold the value...
     AllocaInst *AI = 0;
 
-    if (I.getType() != Type::VoidTy) {
+    if (!I.getType()->isVoidTy()) {
       AI = new AllocaInst(I.getType(), 0, I.getName(),
                           I.getParent()->getParent()->getEntryBlock().begin());
       CurAST->add(AI);
@@ -593,7 +602,7 @@ void LICM::sink(Instruction &I) {
     if (AI) {
       std::vector<AllocaInst*> Allocas;
       Allocas.push_back(AI);
-      PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+      PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST);
     }
   }
 }
@@ -602,7 +611,8 @@ void LICM::sink(Instruction &I) {
 /// that is safe to hoist, this instruction is called to do the dirty work.
 ///
 void LICM::hoist(Instruction &I) {
-  DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I;
+  DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": "
+        << I << "\n");
 
   // Remove the instruction from its current basic block... but don't delete the
   // instruction.
@@ -623,7 +633,8 @@ void LICM::hoist(Instruction &I) {
 ///
 bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   // If it is not a trapping instruction, it is always safe to hoist.
-  if (!Inst.isTrapping()) return true;
+  if (Inst.isSafeToSpeculativelyExecute())
+    return true;
 
   // Otherwise we have to check to make sure that the instruction dominates all
   // of the exit blocks.  If it doesn't, then there is a path out of the loop
@@ -635,12 +646,6 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   if (Inst.getParent() == CurLoop->getHeader())
     return true;
 
-  // It's always safe to load from a global or alloca.
-  if (isa<LoadInst>(Inst))
-    if (isa<AllocationInst>(Inst.getOperand(0)) ||
-        isa<GlobalVariable>(Inst.getOperand(0)))
-      return true;
-
   // Get the exit blocks for the current loop.
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
@@ -773,7 +778,7 @@ void LICM::PromoteValuesInLoop() {
   PromotedAllocas.reserve(PromotedValues.size());
   for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
     PromotedAllocas.push_back(PromotedValues[i].first);
-  PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
+  PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST);
 }
 
 /// FindPromotableValuesInLoop - Check the current loop for stores to definite
@@ -862,7 +867,7 @@ void LICM::FindPromotableValuesInLoop(
     for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
       ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
 
-    DOUT << "LICM: Promoting value: " << *V << "\n";
+    DEBUG(errs() << "LICM: Promoting value: " << *V << "\n");
   }
 }
 
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 302cdec2ba4a..5f93756a05c0 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -15,19 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-delete"
-
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
-
 using namespace llvm;
 
 STATISTIC(NumDeleted, "Number of loops deleted");
 
 namespace {
-  class VISIBILITY_HIDDEN LoopDeletion : public LoopPass {
+  class LoopDeletion : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopDeletion() : LoopPass(&ID) {}
@@ -38,9 +36,9 @@ namespace {
     bool SingleDominatingExit(Loop* L,
                               SmallVector<BasicBlock*, 4>& exitingBlocks);
     bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
-                    SmallVector<BasicBlock*, 4>& exitBlocks);
-    bool IsLoopInvariantInst(Instruction *I, Loop* L);
-    
+                    SmallVector<BasicBlock*, 4>& exitBlocks,
+                    bool &Changed, BasicBlock *Preheader);
+
     virtual void getAnalysisUsage(AnalysisUsage& AU) const {
       AU.addRequired<ScalarEvolution>();
       AU.addRequired<DominatorTree>();
@@ -84,32 +82,13 @@ bool LoopDeletion::SingleDominatingExit(Loop* L,
   return DT.dominates(exitingBlocks[0], latch);
 }
 
-/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to
-/// a loop, which is defined as being true if all of its operands are defined
-/// outside of the loop.  These instructions can be hoisted out of the loop
-/// if their results are needed.  This could be made more aggressive by
-/// recursively checking the operands for invariance, but it's not clear that
-/// it's worth it.
-bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L)  {
-  // PHI nodes are not loop invariant if defined in  the loop.
-  if (isa<PHINode>(I) && L->contains(I->getParent()))
-    return false;
-    
-  // The instruction is loop invariant if all of its operands are loop-invariant
-  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-    if (!L->isLoopInvariant(I->getOperand(i)))
-      return false;
-  
-  // If we got this far, the instruction is loop invariant!
-  return true;
-}
-
 /// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
 /// checked for unique exit and exiting blocks, and that the code is in LCSSA
 /// form.
 bool LoopDeletion::IsLoopDead(Loop* L,
                               SmallVector<BasicBlock*, 4>& exitingBlocks,
-                              SmallVector<BasicBlock*, 4>& exitBlocks) {
+                              SmallVector<BasicBlock*, 4>& exitBlocks,
+                              bool &Changed, BasicBlock *Preheader) {
   BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock* exitBlock = exitBlocks[0];
   
@@ -122,7 +101,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
     Value* incoming = P->getIncomingValueForBlock(exitingBlock);
     if (Instruction* I = dyn_cast<Instruction>(incoming))
-      if (!IsLoopInvariantInst(I, L))
+      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
         return false;
       
     BI++;
@@ -181,15 +160,16 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
     return false;
   
   // Finally, we have to check that the loop really is dead.
-  if (!IsLoopDead(L, exitingBlocks, exitBlocks))
-    return false;
+  bool Changed = false;
+  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+    return Changed;
   
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
   ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
-  const SCEV* S = SE.getBackedgeTakenCount(L);
+  const SCEV *S = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(S))
-    return false;
+    return Changed;
   
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.  
@@ -199,18 +179,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // Because we're deleting a large chunk of code at once, the sequence in which
   // we remove things is very important to avoid invalidation issues.  Don't
   // mess with this unless you have good reason and know what you're doing.
-  
-  // Move simple loop-invariant expressions out of the loop, since they
-  // might be needed by the exit phis.
-  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
-       LI != LE; ++LI)
-    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
-         BI != BE; ) {
-      Instruction* I = BI++;
-      if (!I->use_empty() && IsLoopInvariantInst(I, L))
-        I->moveBefore(preheader->getTerminator());
-    }
-  
+
+  // Tell ScalarEvolution that the loop is deleted. Do this before
+  // deleting the loop so that ScalarEvolution can look at the loop
+  // to determine what it needs to clean up.
+  SE.forgetLoopBackedgeTakenCount(L);
+
   // Connect the preheader directly to the exit block.
   TerminatorInst* TI = preheader->getTerminator();
   TI->replaceUsesOfWith(L->getHeader(), exitBlock);
@@ -248,11 +222,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
     (*LI)->dropAllReferences();
   }
   
-  // Tell ScalarEvolution that the loop is deleted. Do this before
-  // deleting the loop so that ScalarEvolution can look at the loop
-  // to determine what it needs to clean up.
-  SE.forgetLoopBackedgeTakenCount(L);
-
   // Erase the instructions and the blocks without having to worry
   // about ordering because we already dropped the references.
   // NOTE: This iteration is safe because erasing the block does not remove its
@@ -273,8 +242,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // The last step is to inform the loop pass manager that we've
   // eliminated this loop.
   LPM.deleteLoopFromQueue(L);
+  Changed = true;
   
   NumDeleted++;
   
-  return true;
+  return Changed;
 }
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 38e3a8b7af70..5f9d3703da99 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -51,7 +51,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-index-split"
-
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
@@ -61,7 +60,6 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 
@@ -73,8 +71,7 @@ STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
 
 namespace {
 
-  class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass {
-
+  class LoopIndexSplit : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopIndexSplit() : LoopPass(&ID) {}
@@ -294,31 +291,33 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
 
 // Return V+1
 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, 
-                         LLVMContext* Context) {
-  Constant *One = Context->getConstantInt(V->getType(), 1, Sign);
+                         LLVMContext &Context) {
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
 }
 
 // Return V-1
 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
-                          LLVMContext* Context) {
-  Constant *One = Context->getConstantInt(V->getType(), 1, Sign);
+                          LLVMContext &Context) {
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
 }
 
 // Return min(V1, V1)
 static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
  
-  Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp", InsertPt);
+  Value *C = new ICmpInst(InsertPt,
+                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                          V1, V2, "lsp");
   return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
 }
 
 // Return max(V1, V2)
 static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
  
-  Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp", InsertPt);
+  Value *C = new ICmpInst(InsertPt, 
+                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                          V1, V2, "lsp");
   return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
 }
 
@@ -427,15 +426,15 @@ bool LoopIndexSplit::processOneIterationLoop() {
   //      c1 = icmp uge i32 SplitValue, StartValue
   //      c2 = icmp ult i32 SplitValue, ExitValue
   //      and i32 c1, c2 
-  Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ? 
+  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSignedPredicate() ? 
                                  ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
-                                 SplitValue, StartValue, "lisplit", BR);
+                                 SplitValue, StartValue, "lisplit");
 
   CmpInst::Predicate C2P  = ExitCondition->getPredicate();
   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-  if (LatchBR->getOperand(0) != Header)
+  if (LatchBR->getOperand(1) != Header)
     C2P = CmpInst::getInversePredicate(C2P);
-  Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR);
+  Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
   Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
 
   SplitCondition->replaceAllUsesWith(NSplitCond);
@@ -491,6 +490,8 @@ bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
     EBR->setSuccessor(1, T);
   }
 
+  LLVMContext &Context = Op.getContext();
+
   // New upper and lower bounds.
   Value *NLB = NULL;
   Value *NUB = NULL;
@@ -698,7 +699,8 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
          E = df_end(DN); DI != E; ++DI) {
     BasicBlock *BB = DI->getBlock();
     WorkList.push_back(BB);
-    BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy));
+    BB->replaceAllUsesWith(UndefValue::get(
+                                       Type::getLabelTy(DeadBB->getContext())));
   }
 
   while (!WorkList.empty()) {
@@ -877,6 +879,8 @@ bool LoopIndexSplit::splitLoop() {
   BasicBlock *ExitingBlock = ExitCondition->getParent();
   if (!cleanBlock(ExitingBlock)) return false;
 
+  LLVMContext &Context = Header->getContext();
+
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I) {
     BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 1f7892ad1015..70c69bb1dae0 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -32,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumRotated, "Number of loops rotated");
 namespace {
 
-  class VISIBILITY_HIDDEN RenameData {
+  class RenameData {
   public:
     RenameData(Instruction *O, Value *P, Instruction *H) 
       : Original(O), PreHeader(P), Header(H) { }
@@ -42,8 +42,7 @@ namespace {
     Instruction *Header; // New header replacement
   };
   
-  class VISIBILITY_HIDDEN LoopRotate : public LoopPass {
-
+  class LoopRotate : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopRotate() : LoopPass(&ID) {}
@@ -178,6 +177,11 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 
   // Now, this loop is suitable for rotation.
 
+  // Anything ScalarEvolution may know about this loop or the PHI nodes
+  // in its header will soon be invalidated.
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoopBackedgeTakenCount(L);
+
   // Find new Loop header. NewHeader is a Header's one and only successor
   // that is inside loop.  Header's other successor is outside the
   // loop.  Otherwise loop is not suitable for rotation.
@@ -435,7 +439,8 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
   // Right now original pre-header has two successors, new header and
   // exit block. Insert new block between original pre-header and
   // new header such that loop's new pre-header has only one successor.
-  BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph",
+  BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
+                                                "bb.nph",
                                                 OrigHeader->getParent(), 
                                                 NewHeader);
   LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
@@ -511,26 +516,30 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
       DF->addBasicBlock(L->getHeader(), LatchSet);
     }
 
-    // If a loop block dominates new loop latch then its frontier is
-    // new header and Exit.
+    // If a loop block dominates new loop latch then add to its frontiers
+    // new header and Exit and remove new latch (which is equal to original
+    // header).
     BasicBlock *NewLatch = L->getLoopLatch();
-    DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
-    for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
-         BI != BE; ++BI) {
-      BasicBlock *B = *BI;
-      if (DT->dominates(B, NewLatch)) {
-        DominanceFrontier::iterator BDFI = DF->find(B);
-        if (BDFI != DF->end()) {
-          DominanceFrontier::DomSetType &BSet = BDFI->second;
-          BSet = BDFI->second;
-          BSet.clear();
-          BSet.insert(L->getHeader());
-          BSet.insert(Exit);
-        } else {
-          DominanceFrontier::DomSetType BSet;
-          BSet.insert(L->getHeader());
-          BSet.insert(Exit);
-          DF->addBasicBlock(B, BSet);
+
+    assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
+
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+           BI != BE; ++BI) {
+        BasicBlock *B = *BI;
+        if (DT->dominates(B, NewLatch)) {
+          DominanceFrontier::iterator BDFI = DF->find(B);
+          if (BDFI != DF->end()) {
+            DominanceFrontier::DomSetType &BSet = BDFI->second;
+            BSet.erase(NewLatch);
+            BSet.insert(L->getHeader());
+            BSet.insert(Exit);
+          } else {
+            DominanceFrontier::DomSetType BSet;
+            BSet.insert(L->getHeader());
+            BSet.insert(Exit);
+            DF->addBasicBlock(B, BSet);
+          }
         }
       }
     }
@@ -538,22 +547,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
 
   // Preserve canonical loop form, which means Exit block should
   // have only one predecessor.
-  BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this);
-
-  // Preserve LCSSA.
-  for (BasicBlock::iterator I = Exit->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I) {
-    unsigned N = PN->getNumIncomingValues();
-    for (unsigned index = 0; index != N; ++index)
-      if (PN->getIncomingBlock(index) == NExit) {
-        PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(),
-                                         NExit->begin());
-        NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch());
-        PN->setIncomingValue(index, NewPN);
-        PN->setIncomingBlock(index, NExit);
-        break;
-      }
-  }
+  SplitEdge(L->getLoopLatch(), Exit, this);
 
   assert(NewHeader && L->getHeader() == NewHeader &&
          "Invalid loop header after loop rotation");
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 046fed3d7157..d8f6cc18a1e9 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/Dominators.h"
@@ -38,9 +37,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
 using namespace llvm;
@@ -64,26 +63,26 @@ namespace {
   /// IVInfo - This structure keeps track of one IV expression inserted during
   /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
   /// well as the PHI node and increment value created for rewrite.
-  struct VISIBILITY_HIDDEN IVExpr {
-    const SCEV*  Stride;
-    const SCEV*  Base;
+  struct IVExpr {
+    const SCEV *Stride;
+    const SCEV *Base;
     PHINode    *PHI;
 
-    IVExpr(const SCEV* const stride, const SCEV* const base, PHINode *phi)
+    IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
       : Stride(stride), Base(base), PHI(phi) {}
   };
 
   /// IVsOfOneStride - This structure keeps track of all IV expression inserted
   /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
-  struct VISIBILITY_HIDDEN IVsOfOneStride {
+  struct IVsOfOneStride {
     std::vector<IVExpr> IVs;
 
-    void addIV(const SCEV* const Stride, const SCEV* const Base, PHINode *PHI) {
+    void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
       IVs.push_back(IVExpr(Stride, Base, PHI));
     }
   };
 
-  class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
+  class LoopStrengthReduce : public LoopPass {
     IVUsers *IU;
     LoopInfo *LI;
     DominatorTree *DT;
@@ -92,11 +91,11 @@ namespace {
 
     /// IVsByStride - Keep track of all IVs that have been inserted for a
     /// particular stride.
-    std::map<const SCEV*, IVsOfOneStride> IVsByStride;
+    std::map<const SCEV *, IVsOfOneStride> IVsByStride;
 
     /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
     /// reused (nor should they be rewritten to reuse other strides).
-    SmallSet<const SCEV*, 4> StrideNoReuse;
+    SmallSet<const SCEV *, 4> StrideNoReuse;
 
     /// DeadInsts - Keep track of instructions we may have made dead, so that
     /// we can remove them after we are done working.
@@ -134,7 +133,7 @@ namespace {
   private:
     ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
                                   IVStrideUse* &CondUse,
-                                  const SCEV* const *  &CondStride);
+                                  const SCEV *const *  &CondStride);
 
     void OptimizeIndvars(Loop *L);
     void OptimizeLoopCountIV(Loop *L);
@@ -150,16 +149,16 @@ namespace {
                           IVStrideUse* &CondUse);
 
     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                           const SCEV* const * &CondStride);
+                           const SCEV *const * &CondStride);
     bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    const SCEV* CheckForIVReuse(bool, bool, bool, const SCEV* const&,
+    const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
                              IVExpr&, const Type*,
                              const std::vector<BasedUser>& UsersToProcess);
     bool ValidScale(bool, int64_t,
                     const std::vector<BasedUser>& UsersToProcess);
     bool ValidOffset(bool, int64_t, int64_t,
                      const std::vector<BasedUser>& UsersToProcess);
-    const SCEV* CollectIVUsers(const SCEV* const &Stride,
+    const SCEV *CollectIVUsers(const SCEV *const &Stride,
                               IVUsersOfOneStride &Uses,
                               Loop *L,
                               bool &AllUsesAreAddresses,
@@ -169,11 +168,11 @@ namespace {
                                 const std::vector<BasedUser> &UsersToProcess,
                                 const Loop *L,
                                 bool AllUsesAreAddresses,
-                                const SCEV* Stride);
+                                const SCEV *Stride);
     void PrepareToStrengthReduceFully(
                              std::vector<BasedUser> &UsersToProcess,
-                             const SCEV* Stride,
-                             const SCEV* CommonExprs,
+                             const SCEV *Stride,
+                             const SCEV *CommonExprs,
                              const Loop *L,
                              SCEVExpander &PreheaderRewriter);
     void PrepareToStrengthReduceFromSmallerStride(
@@ -183,13 +182,13 @@ namespace {
                                          Instruction *PreInsertPt);
     void PrepareToStrengthReduceWithNewPhi(
                                   std::vector<BasedUser> &UsersToProcess,
-                                  const SCEV* Stride,
-                                  const SCEV* CommonExprs,
+                                  const SCEV *Stride,
+                                  const SCEV *CommonExprs,
                                   Value *CommonBaseV,
                                   Instruction *IVIncInsertPt,
                                   const Loop *L,
                                   SCEVExpander &PreheaderRewriter);
-    void StrengthReduceStridedIVUsers(const SCEV* const &Stride,
+    void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
                                       IVUsersOfOneStride &Uses,
                                       Loop *L);
     void DeleteTriviallyDeadInstructions();
@@ -233,7 +232,7 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
 /// containsAddRecFromDifferentLoop - Determine whether expression S involves a 
 /// subexpression that is an AddRec from a loop other than L.  An outer loop 
 /// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
+static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
   // This is very common, put it first.
   if (isa<SCEVConstant>(S))
     return false;
@@ -248,7 +247,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
         return false;
     }
     return true;
@@ -328,7 +327,7 @@ namespace {
     /// this use.  As the use is processed, information gets moved from this
     /// field to the Imm field (below).  BasedUser values are sorted by this
     /// field.
-    const SCEV* Base;
+    const SCEV *Base;
     
     /// Inst - The instruction using the induction variable.
     Instruction *Inst;
@@ -341,7 +340,7 @@ namespace {
     /// before Inst, because it will be folded into the imm field of the
     /// instruction.  This is also sometimes used for loop-variant values that
     /// must be added inside the loop.
-    const SCEV* Imm;
+    const SCEV *Imm;
 
     /// Phi - The induction variable that performs the striding that
     /// should be used for this user.
@@ -363,13 +362,13 @@ namespace {
     // Once we rewrite the code to insert the new IVs we want, update the
     // operands of Inst to use the new expression 'NewBase', with 'Imm' added
     // to it.
-    void RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
+    void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                         Instruction *InsertPt,
                                        SCEVExpander &Rewriter, Loop *L, Pass *P,
                                         LoopInfo &LI,
                                         SmallVectorImpl<WeakVH> &DeadInsts);
     
-    Value *InsertCodeForBaseAtPosition(const SCEV* const &NewBase, 
+    Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, 
                                        const Type *Ty,
                                        SCEVExpander &Rewriter,
                                        Instruction *IP, Loop *L,
@@ -379,12 +378,12 @@ namespace {
 }
 
 void BasedUser::dump() const {
-  cerr << " Base=" << *Base;
-  cerr << " Imm=" << *Imm;
-  cerr << "   Inst: " << *Inst;
+  errs() << " Base=" << *Base;
+  errs() << " Imm=" << *Imm;
+  errs() << "   Inst: " << *Inst;
 }
 
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, 
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, 
                                               const Type *Ty,
                                               SCEVExpander &Rewriter,
                                               Instruction *IP, Loop *L,
@@ -408,7 +407,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
   
   Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
 
-  const SCEV* NewValSCEV = SE->getUnknown(Base);
+  const SCEV *NewValSCEV = SE->getUnknown(Base);
 
   // Always emit the immediate into the same block as the user.
   NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
@@ -423,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
 // value of NewBase in the case that it's a diffferent instruction from
 // the PHI that NewBase is computed from, or null otherwise.
 //
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
+void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                                Instruction *NewBasePt,
                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
                                       LoopInfo &LI,
@@ -460,9 +459,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
     // Replace the use of the operand Value with the new Phi we just created.
     Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
 
-    DOUT << "      Replacing with ";
-    DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false));
-    DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
+    DEBUG(errs() << "      Replacing with ");
+    DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false));
+    DEBUG(errs() << ", which has value " << *NewBase << " plus IMM "
+                 << *Imm << "\n");
     return;
   }
 
@@ -483,43 +483,45 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
       // loop because multiple copies sometimes do useful sinking of code in
       // that case(?).
       Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
+      BasicBlock *PHIPred = PN->getIncomingBlock(i);
       if (L->contains(OldLoc->getParent())) {
         // If this is a critical edge, split the edge so that we do not insert
         // the code on all predecessor/successor paths.  We do this unless this
         // is the canonical backedge for this loop, as this can make some
         // inserted code be in an illegal position.
-        BasicBlock *PHIPred = PN->getIncomingBlock(i);
         if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
             (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
 
           // First step, split the critical edge.
-          SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
+          BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
+                                                P, false);
 
           // Next step: move the basic block.  In particular, if the PHI node
           // is outside of the loop, and PredTI is in the loop, we want to
           // move the block to be immediately before the PHI block, not
           // immediately after PredTI.
-          if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
-            BasicBlock *NewBB = PN->getIncomingBlock(i);
+          if (L->contains(PHIPred) && !L->contains(PN->getParent()))
             NewBB->moveBefore(PN->getParent());
-          }
 
           // Splitting the edge can reduce the number of PHI entries we have.
           e = PN->getNumIncomingValues();
+          PHIPred = NewBB;
+          i = PN->getBasicBlockIndex(PHIPred);
         }
       }
-      Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
+      Value *&Code = InsertedCode[PHIPred];
       if (!Code) {
         // Insert the code into the end of the predecessor block.
         Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
-                                PN->getIncomingBlock(i)->getTerminator() :
+                                PHIPred->getTerminator() :
                                 OldLoc->getParent()->getTerminator();
         Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
                                            Rewriter, InsertPt, L, LI);
 
-        DOUT << "      Changing PHI use to ";
-        DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false));
-        DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
+        DEBUG(errs() << "      Changing PHI use to ");
+        DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
+        DEBUG(errs() << ", which has value " << *NewBase << " plus IMM "
+                     << *Imm << "\n");
       }
 
       // Replace the use of the operand Value with the new Phi we just created.
@@ -535,7 +537,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
 
 /// fitsInAddressMode - Return true if V can be subsumed within an addressing
 /// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy,
+static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
                              const TargetLowering *TLI, bool HasBaseReg) {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
     int64_t VC = SC->getValue()->getSExtValue();
@@ -567,12 +569,12 @@ static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy,
 
 /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
 /// loop varying to the Imm operand.
-static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,
+static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
                                              Loop *L, ScalarEvolution *SE) {
   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
   
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV*, 4> NewOps;
+    SmallVector<const SCEV *, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
@@ -590,10 +592,10 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,
       Val = SE->getAddExpr(NewOps);
   } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
     // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV* Start = SARE->getStart();
+    const SCEV *Start = SARE->getStart();
     MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
     
-    SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end());
+    SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
     Ops[0] = Start;
     Val = SE->getAddRecExpr(Ops, SARE->getLoop());
   } else {
@@ -609,15 +611,15 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,
 /// Accumulate these immediate values into the Imm value.
 static void MoveImmediateValues(const TargetLowering *TLI,
                                 const Type *AccessTy,
-                                const SCEV* &Val, const SCEV* &Imm,
+                                const SCEV *&Val, const SCEV *&Imm,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV*, 4> NewOps;
+    SmallVector<const SCEV *, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
-      const SCEV* NewOp = SAE->getOperand(i);
+      const SCEV *NewOp = SAE->getOperand(i);
       MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
       
       if (!NewOp->isLoopInvariant(L)) {
@@ -636,11 +638,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,
     return;
   } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
     // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV* Start = SARE->getStart();
+    const SCEV *Start = SARE->getStart();
     MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
     
     if (Start != SARE->getStart()) {
-      SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Start;
       Val = SE->getAddRecExpr(Ops, SARE->getLoop());
     }
@@ -651,8 +653,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,
         fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
         SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
 
-      const SCEV* SubImm = SE->getIntegerSCEV(0, Val->getType());
-      const SCEV* NewOp = SME->getOperand(1);
+      const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
+      const SCEV *NewOp = SME->getOperand(1);
       MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
       
       // If we extracted something out of the subexpressions, see if we can 
@@ -687,7 +689,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
 
 static void MoveImmediateValues(const TargetLowering *TLI,
                                 Instruction *User,
-                                const SCEV* &Val, const SCEV* &Imm,
+                                const SCEV *&Val, const SCEV *&Imm,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
   const Type *AccessTy = getAccessType(User);
@@ -697,19 +699,19 @@ static void MoveImmediateValues(const TargetLowering *TLI,
 /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
 /// added together.  This is used to reassociate common addition subexprs
 /// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(SmallVector<const SCEV*, 16> &SubExprs,
-                             const SCEV* Expr,
+static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
+                             const SCEV *Expr,
                              ScalarEvolution *SE) {
   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
     for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
       SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
   } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
-    const SCEV* Zero = SE->getIntegerSCEV(0, Expr->getType());
+    const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());
     if (SARE->getOperand(0) == Zero) {
       SubExprs.push_back(Expr);
     } else {
       // Compute the addrec with zero as its base.
-      SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Zero;   // Start with zero base.
       SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
       
@@ -733,7 +735,7 @@ struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
 /// not remove anything.  This looks for things like (a+b+c) and
 /// (a+c+d) and computes the common (a+c) subexpression.  The common expression
 /// is *removed* from the Bases and returned.
-static const SCEV* 
+static const SCEV *
 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
                                     ScalarEvolution *SE, Loop *L,
                                     const TargetLowering *TLI) {
@@ -741,9 +743,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
 
   // Only one use?  This is a very common case, so we handle it specially and
   // cheaply.
-  const SCEV* Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
-  const SCEV* Result = Zero;
-  const SCEV* FreeResult = Zero;
+  const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
+  const SCEV *Result = Zero;
+  const SCEV *FreeResult = Zero;
   if (NumUses == 1) {
     // If the use is inside the loop, use its base, regardless of what it is:
     // it is clearly shared across all the IV's.  If the use is outside the loop
@@ -759,13 +761,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   // Also track whether all uses of each expression can be moved into an
   // an addressing mode "for free"; such expressions are left within the loop.
   // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-  std::map<const SCEV*, SubExprUseData> SubExpressionUseData;
+  std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
   
   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
   // order we see them.
-  SmallVector<const SCEV*, 16> UniqueSubExprs;
+  SmallVector<const SCEV *, 16> UniqueSubExprs;
 
-  SmallVector<const SCEV*, 16> SubExprs;
+  SmallVector<const SCEV *, 16> SubExprs;
   unsigned NumUsesInsideLoop = 0;
   for (unsigned i = 0; i != NumUses; ++i) {
     // If the user is outside the loop, just ignore it for base computation.
@@ -809,7 +811,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   // Now that we know how many times each is used, build Result.  Iterate over
   // UniqueSubexprs so that we have a stable ordering.
   for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
-    std::map<const SCEV*, SubExprUseData>::iterator I = 
+    std::map<const SCEV *, SubExprUseData>::iterator I = 
        SubExpressionUseData.find(UniqueSubExprs[i]);
     assert(I != SubExpressionUseData.end() && "Entry not found?");
     if (I->second.Count == NumUsesInsideLoop) { // Found CSE! 
@@ -853,7 +855,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   if (FreeResult != Zero) {
     SeparateSubExprs(SubExprs, FreeResult, SE);
     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      std::map<const SCEV*, SubExprUseData>::iterator I = 
+      std::map<const SCEV *, SubExprUseData>::iterator I = 
          SubExpressionUseData.find(SubExprs[j]);
       SubExpressionUseData.erase(I);
     }
@@ -902,7 +904,8 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
 
   for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
     // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy = Type::VoidTy;
+    const Type *AccessTy =
+        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
     if (isAddressUse(UsersToProcess[i].Inst,
                      UsersToProcess[i].OperandValToReplace))
       AccessTy = getAccessType(UsersToProcess[i].Inst);
@@ -934,7 +937,8 @@ bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
 
   for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
     // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy = Type::VoidTy;
+    const Type *AccessTy =
+        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
     if (isAddressUse(UsersToProcess[i].Inst,
                      UsersToProcess[i].OperandValToReplace))
       AccessTy = getAccessType(UsersToProcess[i].Inst);
@@ -982,10 +986,10 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
 /// be folded into the addressing mode, nor even that the factor be constant; 
 /// a multiply (executed once) outside the loop is better than another IV 
 /// within.  Well, usually.
-const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
                                 bool AllUsesAreAddresses,
                                 bool AllUsesAreOutsideLoop,
-                                const SCEV* const &Stride, 
+                                const SCEV *const &Stride, 
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
   if (StrideNoReuse.count(Stride))
@@ -995,7 +999,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     int64_t SInt = SC->getValue()->getSExtValue();
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV*, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
           StrideNoReuse.count(SI->first))
@@ -1048,7 +1052,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     // an existing IV if we can.
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV*, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
         continue;
@@ -1068,7 +1072,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     // -1*old.
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV*, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end()) 
         continue;
@@ -1097,7 +1101,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
 
 /// isNonConstantNegative - Return true if the specified scev is negated, but
 /// not a constant.
-static bool isNonConstantNegative(const SCEV* const &Expr) {
+static bool isNonConstantNegative(const SCEV *const &Expr) {
   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
   if (!Mul) return false;
   
@@ -1114,7 +1118,7 @@ static bool isNonConstantNegative(const SCEV* const &Expr) {
 /// of the strided accesses, as well as the old information from Uses. We
 /// progressively move information from the Base field to the Imm field, until
 /// we eventually have the full access expression to rewrite the use.
-const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride,
+const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
                                               IVUsersOfOneStride &Uses,
                                               Loop *L,
                                               bool &AllUsesAreAddresses,
@@ -1145,7 +1149,7 @@ const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride,
   // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
   // "A+B"), emit it to the preheader, then remove the expression from the
   // UsersToProcess base values.
-  const SCEV* CommonExprs =
+  const SCEV *CommonExprs =
     RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
 
   // Next, figure out what we can represent in the immediate fields of
@@ -1211,7 +1215,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
                                    const std::vector<BasedUser> &UsersToProcess,
                                    const Loop *L,
                                    bool AllUsesAreAddresses,
-                                   const SCEV* Stride) {
+                                   const SCEV *Stride) {
   if (!EnableFullLSRMode)
     return false;
 
@@ -1248,7 +1252,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
         if (!Imm)       Imm = SE->getIntegerSCEV(0, Stride->getType());
         const Instruction *Inst = UsersToProcess[i].Inst;
         const Type *AccessTy = getAccessType(Inst);
-        const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
+        const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
         if (!Diff->isZero() &&
             (!AllUsesAreAddresses ||
              !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
@@ -1282,7 +1286,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
 ///
 /// Return the created phi node.
 ///
-static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step,
+static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
                                 Instruction *IVIncInsertPt,
                                 const Loop *L,
                                 SCEVExpander &Rewriter) {
@@ -1302,7 +1306,7 @@ static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step,
   // If the stride is negative, insert a sub instead of an add for the
   // increment.
   bool isNegative = isNonConstantNegative(Step);
-  const SCEV* IncAmount = Step;
+  const SCEV *IncAmount = Step;
   if (isNegative)
     IncAmount = Rewriter.SE.getNegativeSCEV(Step);
 
@@ -1341,13 +1345,13 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
   // loop before users outside of the loop with a particular base.
   //
   // We would like to use stable_sort here, but we can't.  The problem is that
-  // const SCEV*'s don't have a deterministic ordering w.r.t to each other, so
+  // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
   // we don't have anything to do a '<' comparison on.  Because we think the
   // number of uses is small, do a horrible bubble sort which just relies on
   // ==.
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
     // Get a base value.
-    const SCEV* Base = UsersToProcess[i].Base;
+    const SCEV *Base = UsersToProcess[i].Base;
 
     // Compact everything with this base to be consecutive with this one.
     for (unsigned j = i+1; j != e; ++j) {
@@ -1366,11 +1370,11 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
 void
 LoopStrengthReduce::PrepareToStrengthReduceFully(
                                         std::vector<BasedUser> &UsersToProcess,
-                                        const SCEV* Stride,
-                                        const SCEV* CommonExprs,
+                                        const SCEV *Stride,
+                                        const SCEV *CommonExprs,
                                         const Loop *L,
                                         SCEVExpander &PreheaderRewriter) {
-  DOUT << "  Fully reducing all users\n";
+  DEBUG(errs() << "  Fully reducing all users\n");
 
   // Rewrite the UsersToProcess records, creating a separate PHI for each
   // unique Base value.
@@ -1379,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(
     // TODO: The uses are grouped by base, but not sorted. We arbitrarily
     // pick the first Imm value here to start with, and adjust it for the
     // other uses.
-    const SCEV* Imm = UsersToProcess[i].Imm;
-    const SCEV* Base = UsersToProcess[i].Base;
-    const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm);
+    const SCEV *Imm = UsersToProcess[i].Imm;
+    const SCEV *Base = UsersToProcess[i].Base;
+    const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
     PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
                                    PreheaderRewriter);
     // Loop over all the users with the same base.
@@ -1413,13 +1417,13 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
 void
 LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
                                          std::vector<BasedUser> &UsersToProcess,
-                                         const SCEV* Stride,
-                                         const SCEV* CommonExprs,
+                                         const SCEV *Stride,
+                                         const SCEV *CommonExprs,
                                          Value *CommonBaseV,
                                          Instruction *IVIncInsertPt,
                                          const Loop *L,
                                          SCEVExpander &PreheaderRewriter) {
-  DOUT << "  Inserting new PHI:\n";
+  DEBUG(errs() << "  Inserting new PHI:\n");
 
   PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
                                  Stride, IVIncInsertPt, L,
@@ -1432,9 +1436,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
     UsersToProcess[i].Phi = Phi;
 
-  DOUT << "    IV=";
-  DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false));
-  DOUT << "\n";
+  DEBUG(errs() << "    IV=");
+  DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false));
+  DEBUG(errs() << "\n");
 }
 
 /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
@@ -1447,8 +1451,8 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
                                          Value *CommonBaseV,
                                          const IVExpr &ReuseIV,
                                          Instruction *PreInsertPt) {
-  DOUT << "  Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride
-       << " and BASE " << *ReuseIV.Base << "\n";
+  DEBUG(errs() << "  Rewriting in terms of existing IV of STRIDE "
+               << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
 
   // All the users will share the reused IV.
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
@@ -1490,7 +1494,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
 /// stride of IV.  All of the users may have different starting values, and this
 /// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
+void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
                                                       IVUsersOfOneStride &Uses,
                                                       Loop *L) {
   // If all the users are moved to another stride, then there is nothing to do.
@@ -1513,7 +1517,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   // move information from the Base field to the Imm field, until we eventually
   // have the full access expression to rewrite the use.
   std::vector<BasedUser> UsersToProcess;
-  const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
+  const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
                                           AllUsesAreOutsideLoop,
                                           UsersToProcess);
 
@@ -1531,9 +1535,11 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   // If all uses are addresses, consider sinking the immediate part of the
   // common expression back into uses if they can fit in the immediate fields.
   if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
-    const SCEV* NewCommon = CommonExprs;
-    const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy);
-    MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE);
+    const SCEV *NewCommon = CommonExprs;
+    const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
+    MoveImmediateValues(TLI, Type::getVoidTy(
+                        L->getLoopPreheader()->getContext()),
+                        NewCommon, Imm, true, L, SE);
     if (!Imm->isZero()) {
       bool DoSink = true;
 
@@ -1548,11 +1554,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
       if (GV || Offset)
         // Pass VoidTy as the AccessTy to be conservative, because
         // there could be multiple access types among all the uses.
-        DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy,
+        DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
+                          Type::getVoidTy(L->getLoopPreheader()->getContext()),
                                          UsersToProcess, TLI);
 
       if (DoSink) {
-        DOUT << "  Sinking " << *Imm << " back down into uses\n";
+        DEBUG(errs() << "  Sinking " << *Imm << " back down into uses\n");
         for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
           UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
         CommonExprs = NewCommon;
@@ -1564,9 +1571,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
 
   // Now that we know what we need to do, insert the PHI node itself.
   //
-  DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
-       << *Stride << ":\n"
-       << "  Common base: " << *CommonExprs << "\n";
+  DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
+               << *Stride << ":\n"
+               << "  Common base: " << *CommonExprs << "\n");
 
   SCEVExpander Rewriter(*SE);
   SCEVExpander PreheaderRewriter(*SE);
@@ -1576,11 +1583,13 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   BasicBlock *LatchBlock = L->getLoopLatch();
   Instruction *IVIncInsertPt = LatchBlock->getTerminator();
 
-  Value *CommonBaseV = Context->getNullValue(ReplacedTy);
+  Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
 
-  const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
-  IVExpr   ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
-                   SE->getIntegerSCEV(0, Type::Int32Ty),
+  const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
+  IVExpr   ReuseIV(SE->getIntegerSCEV(0,
+                                    Type::getInt32Ty(Preheader->getContext())),
+                   SE->getIntegerSCEV(0, 
+                                    Type::getInt32Ty(Preheader->getContext())),
                    0);
 
   /// Choose a strength-reduction strategy and prepare for it by creating
@@ -1618,7 +1627,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   // strength-reduced forms.  This outer loop handles all bases, the inner
   // loop handles all users of a particular base.
   while (!UsersToProcess.empty()) {
-    const SCEV* Base = UsersToProcess.back().Base;
+    const SCEV *Base = UsersToProcess.back().Base;
     Instruction *Inst = UsersToProcess.back().Inst;
 
     // Emit the code for Base into the preheader.
@@ -1626,17 +1635,17 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
     if (!Base->isZero()) {
       BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
 
-      DOUT << "  INSERTING code for BASE = " << *Base << ":";
+      DEBUG(errs() << "  INSERTING code for BASE = " << *Base << ":");
       if (BaseV->hasName())
-        DOUT << " Result value name = %" << BaseV->getNameStr();
-      DOUT << "\n";
+        DEBUG(errs() << " Result value name = %" << BaseV->getName());
+      DEBUG(errs() << "\n");
 
       // If BaseV is a non-zero constant, make sure that it gets inserted into
       // the preheader, instead of being forward substituted into the uses.  We
       // do this by forcing a BitCast (noop cast) to be inserted into the
       // preheader in this case.
       if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
-          !isa<Instruction>(BaseV)) {
+          isa<Constant>(BaseV)) {
         // We want this constant emitted into the preheader! This is just
         // using cast as a copy so BitCast (no-op cast) is appropriate
         BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
@@ -1650,15 +1659,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
       // FIXME: Use emitted users to emit other users.
       BasedUser &User = UsersToProcess.back();
 
-      DOUT << "    Examining ";
+      DEBUG(errs() << "    Examining ");
       if (User.isUseOfPostIncrementedValue)
-        DOUT << "postinc";
+        DEBUG(errs() << "postinc");
       else
-        DOUT << "preinc";
-      DOUT << " use ";
-      DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace,
+        DEBUG(errs() << "preinc");
+      DEBUG(errs() << " use ");
+      DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace,
                            /*PrintType=*/false));
-      DOUT << " in Inst: " << *(User.Inst);
+      DEBUG(errs() << " in Inst: " << *User.Inst);
 
       // If this instruction wants to use the post-incremented value, move it
       // after the post-inc and use its value instead of the PHI.
@@ -1673,7 +1682,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
           User.Inst->moveBefore(IVIncInsertPt);
       }
 
-      const SCEV* RewriteExpr = SE->getUnknown(RewriteOp);
+      const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
 
       if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
           SE->getEffectiveSCEVType(ReplacedTy)) {
@@ -1705,7 +1714,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
         // The base has been used to initialize the PHI node but we don't want
         // it here.
         if (!ReuseIV.Base->isZero()) {
-          const SCEV* typedBase = ReuseIV.Base;
+          const SCEV *typedBase = ReuseIV.Base;
           if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
               SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
             // It's possible the original IV is a larger type than the new IV,
@@ -1770,10 +1779,10 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
 /// set the IV user and stride information and return true, otherwise return
 /// false.
 bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                                       const SCEV* const * &CondStride) {
+                                       const SCEV *const * &CondStride) {
   for (unsigned Stride = 0, e = IU->StrideOrder.size();
        Stride != e && !CondUse; ++Stride) {
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
 
@@ -1800,7 +1809,7 @@ namespace {
     const ScalarEvolution *SE;
     explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
 
-    bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) {
+    bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) {
       const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
       const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
       if (LHSC && RHSC) {
@@ -1843,14 +1852,14 @@ namespace {
 /// if (v1 < 30) goto loop
 ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
                                                 IVStrideUse* &CondUse,
-                                              const SCEV* const* &CondStride) {
+                                              const SCEV *const* &CondStride) {
   // If there's only one stride in the loop, there's nothing to do here.
   if (IU->StrideOrder.size() < 2)
     return Cond;
   // If there are other users of the condition's stride, don't bother
   // trying to change the condition because the stride will still
   // remain.
-  std::map<const SCEV*, IVUsersOfOneStride *>::iterator I =
+  std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
     IU->IVUsesByStride.find(*CondStride);
   if (I == IU->IVUsesByStride.end() ||
       I->second->Users.size() != 1)
@@ -1867,11 +1876,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
   const Type *NewCmpTy = NULL;
   unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
   unsigned NewTyBits = 0;
-  const SCEV* *NewStride = NULL;
+  const SCEV **NewStride = NULL;
   Value *NewCmpLHS = NULL;
   Value *NewCmpRHS = NULL;
   int64_t Scale = 1;
-  const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy);
+  const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
 
   if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
     int64_t CmpVal = C->getValue().getSExtValue();
@@ -1883,7 +1892,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
     // Look for a suitable stride / iv as replacement.
     for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-      std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
         IU->IVUsesByStride.find(IU->StrideOrder[i]);
       if (!isa<SCEVConstant>(SI->first))
         continue;
@@ -1942,7 +1951,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       NewCmpTy = NewCmpLHS->getType();
       NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
-      const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits);
+      const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
       if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
         // Check if it is possible to rewrite it using
         // an iv / stride of a smaller integer type.
@@ -1963,7 +1972,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
       bool AllUsesAreAddresses = true;
       bool AllUsesAreOutsideLoop = true;
       std::vector<BasedUser> UsersToProcess;
-      const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+      const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
                                               AllUsesAreAddresses,
                                               AllUsesAreOutsideLoop,
                                               UsersToProcess);
@@ -1987,10 +1996,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       NewStride = &IU->StrideOrder[i];
       if (!isa<PointerType>(NewCmpTy))
-        NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal);
+        NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
       else {
-        Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal);
-        NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy);
+        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+        NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
       }
       NewOffset = TyBits == NewTyBits
         ? SE->getMulExpr(CondUse->getOffset(),
@@ -2019,9 +2028,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
     // Create a new compare instruction using new stride / iv.
     ICmpInst *OldCond = Cond;
     // Insert new compare instruction.
-    Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS,
-                        L->getHeader()->getName() + ".termcond",
-                        OldCond);
+    Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
+                        L->getHeader()->getName() + ".termcond");
 
     // Remove the old compare instruction. The old indvar is probably dead too.
     DeadInsts.push_back(CondUse->getOperandValToReplace());
@@ -2098,13 +2106,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
   if (!Sel || !Sel->hasOneUse()) return Cond;
 
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return Cond;
-  const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
 
   // Add one to the backedge-taken count to get the trip count.
-  const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
+  const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
 
   // Check for a max calculation that matches the pattern.
   if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
@@ -2117,13 +2125,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   if (Max->getNumOperands() != 2)
     return Cond;
 
-  const SCEV* MaxLHS = Max->getOperand(0);
-  const SCEV* MaxRHS = Max->getOperand(1);
+  const SCEV *MaxLHS = Max->getOperand(0);
+  const SCEV *MaxRHS = Max->getOperand(1);
   if (!MaxLHS || MaxLHS != One) return Cond;
 
   // Check the relevant induction variable for conformance to
   // the pattern.
-  const SCEV* IV = SE->getSCEV(Cond->getOperand(0));
+  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
   if (!AR || !AR->isAffine() ||
       AR->getStart() != One ||
@@ -2152,7 +2160,7 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   // Ok, everything looks ok to change the condition into an SLT or SGE and
   // delete the max calculation.
   ICmpInst *NewCond =
-    new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
+    new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
 
   // Delete the max calculation instructions.
   Cond->replaceAllUsesWith(NewCond);
@@ -2169,13 +2177,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
 /// inside the loop then try to eliminate the cast opeation.
 void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
 
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return;
-
+    
   for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
        ++Stride) {
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
     if (!isa<SCEVConstant>(SI->first))
@@ -2209,7 +2217,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       if (TLI) {
         // If target does not support DestTy natively then do not apply
         // this transformation.
-        MVT DVT = TLI->getValueType(DestTy);
+        EVT DVT = TLI->getValueType(DestTy);
         if (!TLI->isTypeLegal(DVT)) continue;
       }
 
@@ -2234,7 +2242,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
         
       ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
       if (!Init) continue;
-      Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue());
+      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
       BinaryOperator *Incr = 
         dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2258,7 +2266,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
       /* create new increment. '++d' in above example. */
-      Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue());
+      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
         BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
                                  Instruction::FAdd : Instruction::FSub,
@@ -2294,6 +2302,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   // one register value.
   BasicBlock *LatchBlock = L->getLoopLatch();
   BasicBlock *ExitingBlock = L->getExitingBlock();
+  
   if (!ExitingBlock)
     // Multiple exits, just look at the exit in the latch block if there is one.
     ExitingBlock = LatchBlock;
@@ -2305,7 +2314,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
 
   // Search IVUsesByStride to find Cond's IVUse if there is one.
   IVStrideUse *CondUse = 0;
-  const SCEV* const *CondStride = 0;
+  const SCEV *const *CondStride = 0;
   ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
   if (!FindIVUserForCond(Cond, CondUse, CondStride))
     return; // setcc doesn't use the IV.
@@ -2335,7 +2344,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
       int64_t SInt = SC->getValue()->getSExtValue();
       for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
            ++NewStride) {
-        std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
           IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
         if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
           continue;
@@ -2349,7 +2358,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
         bool AllUsesAreAddresses = true;
         bool AllUsesAreOutsideLoop = true;
         std::vector<BasedUser> UsersToProcess;
-        const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
                                                 AllUsesAreAddresses,
                                                 AllUsesAreOutsideLoop,
                                                 UsersToProcess);
@@ -2410,7 +2419,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
 void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
   // If the number of times the loop is executed isn't computable, give up.
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return;
 
@@ -2439,9 +2448,9 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   // Handle only tests for equality for the moment, and only stride 1.
   if (Cond->getPredicate() != CmpInst::ICMP_EQ)
     return;
-  const SCEV* IV = SE->getSCEV(Cond->getOperand(0));
+  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-  const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
   if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
     return;
   // If the RHS of the comparison is defined inside the loop, the rewrite
@@ -2497,7 +2506,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   Value *startVal = phi->getIncomingValue(inBlock);
   Value *endVal = Cond->getOperand(1);
   // FIXME check for case where both are constant
-  Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0);
+  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
   BinaryOperator *NewStartVal = 
     BinaryOperator::Create(Instruction::Sub, endVal, startVal,
                            "tmp", PreInsertPt);
@@ -2516,11 +2525,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
   Changed = false;
 
   if (!IU->IVUsesByStride.empty()) {
-#ifndef NDEBUG
-    DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
-         << "\" ";
-    DEBUG(L->dump());
-#endif
+    DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
+          << "\" ";
+          L->dump());
 
     // Sort the StrideOrder so we process larger strides first.
     std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
@@ -2557,7 +2564,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
     // strides deterministic - not dependent on map order.
     for (unsigned Stride = 0, e = IU->StrideOrder.size();
          Stride != e; ++Stride) {
-      std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
         IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
       assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
       // FIXME: Generalize to non-affine IV's.
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
index 23757cdb2d29..837ec59dbbce 100644
--- a/lib/Transforms/Scalar/LoopUnroll.cpp
+++ b/lib/Transforms/Scalar/LoopUnroll.cpp
@@ -17,9 +17,9 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <climits>
 
@@ -39,7 +39,7 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
            "-unroll-threshold loop size is reached."));
 
 namespace {
-  class VISIBILITY_HIDDEN LoopUnroll : public LoopPass {
+  class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopUnroll() : LoopPass(&ID) {}
@@ -96,10 +96,7 @@ static unsigned ApproximateLoopSize(const Loop *L) {
         // is higher than other instructions. Here 3 and 10 are magic
         // numbers that help one isolated test case from PR2067 without
         // negatively impacting measured benchmarks.
-        if (isa<IntrinsicInst>(I))
-          Size = Size + 3;
-        else
-          Size = Size + 10;
+        Size += isa<IntrinsicInst>(I) ? 3 : 10;
       } else {
         ++Size;
       }
@@ -118,51 +115,48 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   LoopInfo *LI = &getAnalysis<LoopInfo>();
 
   BasicBlock *Header = L->getHeader();
-  DOUT << "Loop Unroll: F[" << Header->getParent()->getName()
-       << "] Loop %" << Header->getName() << "\n";
+  DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
+        << "] Loop %" << Header->getName() << "\n");
+  (void)Header;
 
   // Find trip count
   unsigned TripCount = L->getSmallConstantTripCount();
   unsigned Count = UnrollCount;
- 
+
   // Automatically select an unroll count.
   if (Count == 0) {
     // Conservative heuristic: if we know the trip count, see if we can
     // completely unroll (subject to the threshold, checked below); otherwise
-    // try to find greatest modulo of the trip count which is still under 
+    // try to find greatest modulo of the trip count which is still under
     // threshold value.
-    if (TripCount != 0) {
-      Count = TripCount;
-    } else {
+    if (TripCount == 0)
       return false;
-    }
+    Count = TripCount;
   }
 
   // Enforce the threshold.
   if (UnrollThreshold != NoThreshold) {
     unsigned LoopSize = ApproximateLoopSize(L);
-    DOUT << "  Loop Size = " << LoopSize << "\n";
+    DEBUG(errs() << "  Loop Size = " << LoopSize << "\n");
     uint64_t Size = (uint64_t)LoopSize*Count;
     if (TripCount != 1 && Size > UnrollThreshold) {
-      DOUT << "  Too large to fully unroll with count: " << Count
-           << " because size: " << Size << ">" << UnrollThreshold << "\n";
-      if (UnrollAllowPartial) {
-        // Reduce unroll count to be modulo of TripCount for partial unrolling
-        Count = UnrollThreshold / LoopSize;        
-        while (Count != 0 && TripCount%Count != 0) {
-          Count--;
-        }        
-        if (Count < 2) {
-          DOUT << "  could not unroll partially\n";
-          return false;
-        } else {
-          DOUT << "  partially unrolling with count: " << Count << "\n";
-        }
-      } else {
-        DOUT << "  will not try to unroll partially because "
-             << "-unroll-allow-partial not given\n";
+      DEBUG(errs() << "  Too large to fully unroll with count: " << Count
+            << " because size: " << Size << ">" << UnrollThreshold << "\n");
+      if (!UnrollAllowPartial) {
+        DEBUG(errs() << "  will not try to unroll partially because "
+              << "-unroll-allow-partial not given\n");
+        return false;
+      }
+      // Reduce unroll count to be modulo of TripCount for partial unrolling
+      Count = UnrollThreshold / LoopSize;
+      while (Count != 0 && TripCount%Count != 0) {
+        Count--;
+      }
+      if (Count < 2) {
+        DEBUG(errs() << "  could not unroll partially\n");
         return false;
       }
+      DEBUG(errs() << "  partially unrolling with count: " << Count << "\n");
     }
   }
 
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index de5eedf1e84c..f6de36292603 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -44,8 +45,8 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
@@ -56,12 +57,14 @@ STATISTIC(NumSelects , "Number of selects unswitched");
 STATISTIC(NumTrivial , "Number of unswitches that are trivial");
 STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
 
+// The specific value of 50 here was chosen based only on intuition and a
+// few specific examples.
 static cl::opt<unsigned>
 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
-          cl::init(10), cl::Hidden);
+          cl::init(50), cl::Hidden);
   
 namespace {
-  class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass {
+  class LoopUnswitch : public LoopPass {
     LoopInfo *LI;  // Loop information
     LPPassManager *LPM;
 
@@ -112,6 +115,10 @@ namespace {
 
   private:
 
+    virtual void releaseMemory() {
+      UnswitchedVals.clear();
+    }
+
     /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
     /// remove it.
     void RemoveLoopFromWorklist(Loop *L) {
@@ -168,8 +175,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
   if (isa<Constant>(Cond)) return 0;
 
   // TODO: Handle: br (VARIANT|INVARIANT).
-  // TODO: Hoist simple expressions out of loops.
-  if (L->isLoopInvariant(Cond)) return Cond;
+
+  // Hoist simple values out.
+  if (L->makeLoopInvariant(Cond, Changed))
+    return Cond;
 
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
     if (BO->getOpcode() == Instruction::And ||
@@ -214,6 +223,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
 /// and profitable.
 bool LoopUnswitch::processCurrentLoop() {
   bool Changed = false;
+  LLVMContext &Context = currentLoop->getHeader()->getContext();
 
   // Loop over all of the basic blocks in the loop.  If we find an interior
   // block that is branching on a loop-invariant condition, we can unswitch this
@@ -231,7 +241,7 @@ bool LoopUnswitch::processCurrentLoop() {
         Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 
                                                currentLoop, Changed);
         if (LoopCond && UnswitchIfProfitable(LoopCond, 
-                                             Context->getConstantIntTrue())) {
+                                             ConstantInt::getTrue(Context))) {
           ++NumBranches;
           return true;
         }
@@ -261,7 +271,7 @@ bool LoopUnswitch::processCurrentLoop() {
         Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
                                                currentLoop, Changed);
         if (LoopCond && UnswitchIfProfitable(LoopCond, 
-                                             Context->getConstantIntTrue())) {
+                                             ConstantInt::getTrue(Context))) {
           ++NumSelects;
           return true;
         }
@@ -335,6 +345,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
                                        BasicBlock **LoopExit) {
   BasicBlock *Header = currentLoop->getHeader();
   TerminatorInst *HeaderTerm = Header->getTerminator();
+  LLVMContext &Context = Header->getContext();
   
   BasicBlock *LoopExitBB = 0;
   if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
@@ -349,10 +360,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
     // this.
     if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                              BI->getSuccessor(0)))) {
-      if (Val) *Val = Context->getConstantIntTrue();
+      if (Val) *Val = ConstantInt::getTrue(Context);
     } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                                     BI->getSuccessor(1)))) {
-      if (Val) *Val = Context->getConstantIntFalse();
+      if (Val) *Val = ConstantInt::getFalse(Context);
     }
   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
     // If this isn't a switch on Cond, we can't handle it.
@@ -398,29 +409,14 @@ unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
   if (IsTrivialUnswitchCondition(LIC))
     return 0;
   
-  // FIXME: This is really overly conservative.  However, more liberal 
-  // estimations have thus far resulted in excessive unswitching, which is bad
-  // both in compile time and in code size.  This should be replaced once
-  // someone figures out how a good estimation.
-  return currentLoop->getBlocks().size();
-  
-  unsigned Cost = 0;
-  // FIXME: this is brain dead.  It should take into consideration code
-  // shrinkage.
+  // FIXME: This is overly conservative because it does not take into
+  // consideration code simplification opportunities.
+  CodeMetrics Metrics;
   for (Loop::block_iterator I = currentLoop->block_begin(), 
          E = currentLoop->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-    // Do not include empty blocks in the cost calculation.  This happen due to
-    // loop canonicalization and will be removed.
-    if (BB->begin() == BasicBlock::iterator(BB->getTerminator()))
-      continue;
-    
-    // Count basic blocks.
-    ++Cost;
-  }
-
-  return Cost;
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+  return Metrics.NumInsts;
 }
 
 /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
@@ -445,9 +441,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
     // FIXME: this should estimate growth by the amount of code shared by the
     // resultant unswitched loops.
     //
-    DOUT << "NOT unswitching loop %"
-         << currentLoop->getHeader()->getName() << ", cost too high: "
-         << currentLoop->getBlocks().size() << "\n";
+    DEBUG(errs() << "NOT unswitching loop %"
+          << currentLoop->getHeader()->getName() << ", cost too high: "
+          << currentLoop->getBlocks().size() << "\n");
     return false;
   }
 
@@ -506,14 +502,20 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
   // Insert a conditional branch on LIC to the two preheaders.  The original
   // code is the true version and the new code is the false version.
   Value *BranchVal = LIC;
-  if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty)
-    BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt);
-  else if (Val != Context->getConstantIntTrue())
+  if (!isa<ConstantInt>(Val) ||
+      Val->getType() != Type::getInt1Ty(LIC->getContext()))
+    BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+  else if (Val != ConstantInt::getTrue(Val->getContext()))
     // We want to enter the new loop when the condition is true.
     std::swap(TrueDest, FalseDest);
 
   // Insert the new branch.
-  BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+  BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+  // If either edge is critical, split it. This helps preserve LoopSimplify
+  // form for enclosing loops.
+  SplitCriticalEdge(BI, 0, this);
+  SplitCriticalEdge(BI, 1, this);
 }
 
 /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
@@ -524,10 +526,10 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 
                                             Constant *Val, 
                                             BasicBlock *ExitBlock) {
-  DOUT << "loop-unswitch: Trivial-Unswitch loop %"
-       << loopHeader->getName() << " [" << L->getBlocks().size()
-       << " blocks] in Function " << L->getHeader()->getParent()->getName()
-       << " on cond: " << *Val << " == " << *Cond << "\n";
+  DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << L->getHeader()->getParent()->getName()
+        << " on cond: " << *Val << " == " << *Cond << "\n");
   
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
@@ -570,47 +572,11 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
-    std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock));
-
-    for (unsigned j = 0, e = Preds.size(); j != e; ++j) {
-      BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this);
-      BasicBlock* StartBlock = Preds[j];
-      BasicBlock* EndBlock;
-      if (NewExitBlock->getSinglePredecessor() == ExitBlock) {
-        EndBlock = NewExitBlock;
-        NewExitBlock = EndBlock->getSinglePredecessor();
-      } else {
-        EndBlock = ExitBlock;
-      }
-      
-      std::set<PHINode*> InsertedPHIs;
-      PHINode* OldLCSSA = 0;
-      for (BasicBlock::iterator I = EndBlock->begin();
-           (OldLCSSA = dyn_cast<PHINode>(I)); ++I) {
-        Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock);
-        PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(),
-                                            OldLCSSA->getName() + ".us-lcssa",
-                                            NewExitBlock->getTerminator());
-        NewLCSSA->addIncoming(OldValue, StartBlock);
-        OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock),
-                                   NewLCSSA);
-        InsertedPHIs.insert(NewLCSSA);
-      }
-
-      BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI();
-      for (BasicBlock::iterator I = NewExitBlock->begin();
-         (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0;
-         ++I) {
-        PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(),
-                                            OldLCSSA->getName() + ".us-lcssa",
-                                            InsertPt);
-        OldLCSSA->replaceAllUsesWith(NewLCSSA);
-        NewLCSSA->addIncoming(OldLCSSA, NewExitBlock);
-      }
-
-    }    
+    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+                                       pred_end(ExitBlock));
+    SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+                           ".us-lcssa", this);
   }
-
 }
 
 /// UnswitchNontrivialCondition - We determined that the loop is profitable 
@@ -619,10 +585,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
                                                Loop *L) {
   Function *F = loopHeader->getParent();
-  DOUT << "loop-unswitch: Unswitching loop %"
-       << loopHeader->getName() << " [" << L->getBlocks().size()
-       << " blocks] in Function " << F->getName()
-       << " when '" << *Val << "' == " << *LIC << "\n";
+  DEBUG(errs() << "loop-unswitch: Unswitching loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << F->getName()
+        << " when '" << *Val << "' == " << *LIC << "\n");
 
   LoopBlocks.clear();
   NewBlocks.clear();
@@ -745,7 +711,7 @@ static void RemoveFromWorklist(Instruction *I,
 static void ReplaceUsesOfWith(Instruction *I, Value *V, 
                               std::vector<Instruction*> &Worklist,
                               Loop *L, LPPassManager *LPM) {
-  DOUT << "Replace with '" << *V << "': " << *I;
+  DEBUG(errs() << "Replace with '" << *V << "': " << *I);
 
   // Add uses to the worklist, which may be dead now.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -788,7 +754,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
           // dominates the latch).
           LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
           Pred->getTerminator()->eraseFromParent();
-          new UnreachableInst(Pred);
+          new UnreachableInst(BB->getContext(), Pred);
           
           // The loop is now broken, remove it from LI.
           RemoveLoopFromHierarchy(L);
@@ -807,7 +773,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     return;
   }
 
-  DOUT << "Nuking dead block: " << *BB;
+  DEBUG(errs() << "Nuking dead block: " << *BB);
   
   // Remove the instructions in the basic block from the worklist.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
@@ -815,8 +781,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     
     // Anything that uses the instructions in this basic block should have their
     // uses replaced with undefs.
-    if (!I->use_empty())
-      I->replaceAllUsesWith(Context->getUndef(I->getType()));
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I->getType()->isVoidTy())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
   }
   
   // If this is the edge to the header block for a loop, remove the loop and
@@ -897,15 +865,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   // selects, switches.
   std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
   std::vector<Instruction*> Worklist;
+  LLVMContext &Context = Val->getContext();
+
 
   // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
   // in the loop with the appropriate one directly.
-  if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) {
+  if (IsEqual || (isa<ConstantInt>(Val) &&
+      Val->getType() == Type::getInt1Ty(Val->getContext()))) {
     Value *Replacement;
     if (IsEqual)
       Replacement = Val;
     else
-      Replacement = Context->getConstantInt(Type::Int1Ty, 
+      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 
                                      !cast<ConstantInt>(Val)->getZExtValue());
     
     for (unsigned i = 0, e = Users.size(); i != e; ++i)
@@ -937,27 +908,35 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
               // FIXME: This is a hack.  We need to keep the successor around
               // and hooked up so as to preserve the loop structure, because
               // trying to update it is complicated.  So instead we preserve the
-              // loop structure and put the block on an dead code path.
-              
-              BasicBlock *SISucc = SI->getSuccessor(i);
-              BasicBlock* Old = SI->getParent();
-              BasicBlock* Split = SplitBlock(Old, SI, this);
-              
-              Instruction* OldTerm = Old->getTerminator();
-              BranchInst::Create(Split, SISucc,
-                                 Context->getConstantIntTrue(), OldTerm);
-
-              LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L);
-              Old->getTerminator()->eraseFromParent();
-              
-              PHINode *PN;
-              for (BasicBlock::iterator II = SISucc->begin();
-                   (PN = dyn_cast<PHINode>(II)); ++II) {
-                Value *InVal = PN->removeIncomingValue(Split, false);
-                PN->addIncoming(InVal, Old);
-              }
-
-              SI->removeCase(i);
+              // loop structure and put the block on a dead code path.
+              BasicBlock *Switch = SI->getParent();
+              SplitEdge(Switch, SI->getSuccessor(i), this);
+              // Compute the successors instead of relying on the return value
+              // of SplitEdge, since it may have split the switch successor
+              // after PHI nodes.
+              BasicBlock *NewSISucc = SI->getSuccessor(i);
+              BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+              // Create an "unreachable" destination.
+              BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+                                                     Switch->getParent(),
+                                                     OldSISucc);
+              new UnreachableInst(Context, Abort);
+              // Force the new case destination to branch to the "unreachable"
+              // block while maintaining a (dead) CFG edge to the old block.
+              NewSISucc->getTerminator()->eraseFromParent();
+              BranchInst::Create(Abort, OldSISucc,
+                                 ConstantInt::getTrue(Context), NewSISucc);
+              // Release the PHI operands for this edge.
+              for (BasicBlock::iterator II = NewSISucc->begin();
+                   PHINode *PN = dyn_cast<PHINode>(II); ++II)
+                PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+                                     UndefValue::get(PN->getType()));
+              // Tell the domtree about the new block. We don't fully update the
+              // domtree here -- instead we force it to do a full recomputation
+              // after the pass is complete -- but we do need to inform it of
+              // new blocks.
+              if (DT)
+                DT->addNewBlock(Abort, NewSISucc);
               break;
             }
           }
@@ -971,7 +950,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   SimplifyCode(Worklist, L);
 }
 
-/// SimplifyCode - Okay, now that we have simplified some instructions in the 
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
 /// loop, walk over it and constant prop, dce, and fold control flow where
 /// possible.  Note that this is effectively a very simple loop-structure-aware
 /// optimizer.  During processing of this loop, L could very well be deleted, so
@@ -986,14 +965,14 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     Worklist.pop_back();
     
     // Simple constant folding.
-    if (Constant *C = ConstantFoldInstruction(I)) {
+    if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {
       ReplaceUsesOfWith(I, C, Worklist, L, LPM);
       continue;
     }
     
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
-      DOUT << "Remove dead instruction '" << *I;
+      DEBUG(errs() << "Remove dead instruction '" << *I);
       
       // Add uses to the worklist, which may be dead now.
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -1017,10 +996,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       break;
     case Instruction::And:
       if (isa<ConstantInt>(I->getOperand(0)) && 
-          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS
+          // constant -> RHS
+          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))   
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType() == Type::Int1Ty) {
+        if (CB->getType() == Type::getInt1Ty(I->getContext())) {
           if (CB->isOne())      // X & 1 -> X
             ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
           else                  // X & 0 -> 0
@@ -1030,10 +1010,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       break;
     case Instruction::Or:
       if (isa<ConstantInt>(I->getOperand(0)) &&
-          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS
+          // constant -> RHS
+          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType() == Type::Int1Ty) {
+        if (CB->getType() == Type::getInt1Ty(I->getContext())) {
           if (CB->isOne())   // X | 1 -> 1
             ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
           else                  // X | 0 -> X
@@ -1052,8 +1033,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         if (!SinglePred) continue;  // Nothing to do.
         assert(SinglePred == Pred && "CFG broken");
 
-        DOUT << "Merging blocks: " << Pred->getName() << " <- " 
-             << Succ->getName() << "\n";
+        DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " 
+              << Succ->getName() << "\n");
         
         // Resolve any single entry PHI nodes in Succ.
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
@@ -1080,7 +1061,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         // remove dead blocks.
         break;  // FIXME: Enable.
 
-        DOUT << "Folded branch: " << *BI;
+        DEBUG(errs() << "Folded branch: " << *BI);
         BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
         BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
         DeadSucc->removePredecessor(BI->getParent(), true);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3c7a5ab8f4d3..c922814833c5 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -24,29 +24,33 @@
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include <list>
 using namespace llvm;
 
 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
 
 /// isBytewiseValue - If the specified value can be set by repeating the same
 /// byte in memory, return the i8 value that it is represented with.  This is
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
 /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
 /// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V, LLVMContext* Context) {
+static Value *isBytewiseValue(Value *V) {
+  LLVMContext &Context = V->getContext();
+  
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType() == Type::Int8Ty) return V;
+  if (V->getType() == Type::getInt8Ty(Context)) return V;
   
   // Constant float and double values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0. 
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType() == Type::FloatTy)
-      V = Context->getConstantExprBitCast(CFP, Type::Int32Ty);
-    if (CFP->getType() == Type::DoubleTy)
-      V = Context->getConstantExprBitCast(CFP, Type::Int64Ty);
+    if (CFP->getType()->isFloatTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
+    if (CFP->getType()->isDoubleTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
     // Don't handle long double formats, which have strange constraints.
   }
   
@@ -69,7 +73,7 @@ static Value *isBytewiseValue(Value *V, LLVMContext* Context) {
         if (Val != Val2)
           return 0;
       }
-      return Context->getConstantInt(Val);
+      return ConstantInt::get(Context, Val);
     }
   }
   
@@ -271,6 +275,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
   if (Start < I->Start) {
     I->Start = Start;
     I->StartPtr = SI->getPointerOperand();
+    I->Alignment = SI->getAlignment();
   }
     
   // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@@ -295,8 +300,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-
-  class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass {
+  class MemCpyOpt : public FunctionPass {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -309,16 +313,15 @@ namespace {
       AU.addRequired<DominatorTree>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetData>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<MemoryDependenceAnalysis>();
-      AU.addPreserved<TargetData>();
     }
   
     // Helper fuctions
-    bool processStore(StoreInst *SI, BasicBlock::iterator& BBI);
-    bool processMemCpy(MemCpyInst* M);
-    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C);
+    bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+    bool processMemCpy(MemCpyInst *M);
+    bool processMemMove(MemMoveInst *M);
+    bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
     bool iterateOnFunction(Function &F);
   };
   
@@ -337,27 +340,31 @@ static RegisterPass<MemCpyOpt> X("memcpyopt",
 /// some other patterns to fold away.  In particular, this looks for stores to
 /// neighboring locations of memory.  If it sees enough consequtive ones
 /// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   if (SI->isVolatile()) return false;
   
+  LLVMContext &Context = SI->getContext();
+
   // There are two cases that are interesting for this code to handle: memcpy
   // and memset.  Right now we only handle memset.
   
   // Ensure that the value being stored is something that can be memset'able a
   // byte at a time like "0" or "-1" or any width, as well as things like
   // 0xA0A0A0A0 and 0.0.
-  Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context);
+  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
   if (!ByteVal)
     return false;
 
-  TargetData &TD = getAnalysis<TargetData>();
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false;
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  Module *M = SI->getParent()->getParent()->getParent();
 
   // Okay, so we now have a single store that can be splatable.  Scan to find
   // all subsequent stores of the same value to offset from the same pointer.
   // Join these together into ranges, so we can decide whether contiguous blocks
   // are stored.
-  MemsetRanges Ranges(TD);
+  MemsetRanges Ranges(*TD);
   
   Value *StartPtr = SI->getPointerOperand();
   
@@ -385,12 +392,12 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     if (NextStore->isVolatile()) break;
     
     // Check to see if this stored value is of the same byte-splattable value.
-    if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context))
+    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
       break;
 
     // Check to see if this store is to a constant offset from the start ptr.
     int64_t Offset;
-    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
+    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
       break;
 
     Ranges.addStore(Offset, NextStore);
@@ -405,7 +412,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
   // store as well.  We try to avoid this unless there is at least something
   // interesting as a small compile-time optimization.
   Ranges.addStore(0, SI);
-
   
   Function *MemSetF = 0;
   
@@ -419,7 +425,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     if (Range.TheStores.size() == 1) continue;
     
     // If it is profitable to lower this range to memset, do so now.
-    if (!Range.isProfitableToUseMemset(TD))
+    if (!Range.isProfitableToUseMemset(*TD))
       continue;
     
     // Otherwise, we do want to transform this!  Create a new memset.  We put
@@ -429,37 +435,38 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     BasicBlock::iterator InsertPt = BI;
   
     if (MemSetF == 0) {
-      const Type *Tys[] = {Type::Int64Ty};
-      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
-                                          ->getParent(), Intrinsic::memset,
-                                          Tys, 1);
-   }
+      const Type *Ty = Type::getInt64Ty(Context);
+      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
+    }
     
     // Get the starting pointer of the block.
     StartPtr = Range.StartPtr;
   
     // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = Context->getPointerTypeUnqual(Type::Int8Ty);
+    const Type *i8Ptr = Type::getInt8PtrTy(Context);
     if (StartPtr->getType() != i8Ptr)
-      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
+      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                  InsertPt);
   
     Value *Ops[] = {
       StartPtr, ByteVal,   // Start, value
-      Context->getConstantInt(Type::Int64Ty, Range.End-Range.Start),  // size
-      Context->getConstantInt(Type::Int32Ty, Range.Alignment)   // align
+      // size
+      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
+      // align
+      ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
     };
     Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
-    DEBUG(cerr << "Replace stores:\n";
+    DEBUG(errs() << "Replace stores:\n";
           for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
-            cerr << *Range.TheStores[i];
-          cerr << "With: " << *C); C=C;
+            errs() << *Range.TheStores[i];
+          errs() << "With: " << *C); C=C;
   
     // Don't invalidate the iterator
     BBI = BI;
   
     // Zap all the stores.
-    for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(),
+    for (SmallVector<StoreInst*, 16>::const_iterator
+         SI = Range.TheStores.begin(),
          SE = Range.TheStores.end(); SI != SE; ++SI)
       (*SI)->eraseFromParent();
     ++NumMemSetInfer;
@@ -490,29 +497,30 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Deliberately get the source and destination with bitcasts stripped away,
   // because we'll need to do type comparisons based on the underlying type.
-  Value* cpyDest = cpy->getDest();
-  Value* cpySrc = cpy->getSource();
+  Value *cpyDest = cpy->getDest();
+  Value *cpySrc = cpy->getSource();
   CallSite CS = CallSite::get(C);
 
   // We need to be able to reason about the size of the memcpy, so we require
   // that it be a constant.
-  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
   if (!cpyLength)
     return false;
 
   // Require that src be an alloca.  This simplifies the reasoning considerably.
-  AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
   if (!srcAlloca)
     return false;
 
   // Check that all of src is copied to dest.
-  TargetData& TD = getAnalysis<TargetData>();
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false;
 
-  ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
   if (!srcArraySize)
     return false;
 
-  uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) *
+  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
     srcArraySize->getZExtValue();
 
   if (cpyLength->getZExtValue() < srcSize)
@@ -521,25 +529,25 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   // Check that accessing the first srcSize bytes of dest will not cause a
   // trap.  Otherwise the transform is invalid since it might cause a trap
   // to occur earlier than it otherwise would.
-  if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) {
+  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
     // The destination is an alloca.  Check it is larger than srcSize.
-    ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
     if (!destArraySize)
       return false;
 
-    uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) *
+    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
       destArraySize->getZExtValue();
 
     if (destSize < srcSize)
       return false;
-  } else if (Argument* A = dyn_cast<Argument>(cpyDest)) {
+  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
     // If the destination is an sret parameter then only accesses that are
     // outside of the returned struct type can trap.
     if (!A->hasStructRetAttr())
       return false;
 
-    const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
-    uint64_t destSize = TD.getTypeAllocSize(StructTy);
+    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+    uint64_t destSize = TD->getTypeAllocSize(StructTy);
 
     if (destSize < srcSize)
       return false;
@@ -554,14 +562,14 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
                                    srcAlloca->use_end());
   while (!srcUseList.empty()) {
-    User* UI = srcUseList.back();
+    User *UI = srcUseList.back();
     srcUseList.pop_back();
 
     if (isa<BitCastInst>(UI)) {
       for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
            I != E; ++I)
         srcUseList.push_back(*I);
-    } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) {
+    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
       if (G->hasAllZeroIndices())
         for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
              I != E; ++I)
@@ -575,8 +583,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Since we're changing the parameter to the callsite, we need to make sure
   // that what would be the new parameter dominates the callsite.
-  DominatorTree& DT = getAnalysis<DominatorTree>();
-  if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
     if (!DT.dominates(cpyDestInst, C))
       return false;
 
@@ -584,7 +592,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   // unexpected manner, for example via a global, which we deduce from
   // the use analysis, we also need to know that it does not sneakily
   // access dest.  We rely on AA to figure this out for us.
-  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
       AliasAnalysis::NoModRef)
     return false;
@@ -597,11 +605,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
         cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                               cpyDest->getName(), C);
       changedArgument = true;
-      if (CS.getArgument(i)->getType() != cpyDest->getType())
-        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
-                       CS.getArgument(i)->getType(), cpyDest->getName(), C));
-      else
+      if (CS.getArgument(i)->getType() == cpyDest->getType())
         CS.setArgument(i, cpyDest);
+      else
+        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
+                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
     }
 
   if (!changedArgument)
@@ -609,7 +617,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Drop any cached information about the call, because we may have changed
   // its dependence information by changing its parameter.
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
   MD.removeInstruction(C);
 
   // Remove the memcpy
@@ -624,22 +632,22 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
 /// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
 ///  This allows later passes to remove the first memcpy altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
 
   // The are two possible optimizations we can do for memcpy:
-  //   a) memcpy-memcpy xform which exposes redundance for DSE
-  //   b) call-memcpy xform for return slot optimization
+  //   a) memcpy-memcpy xform which exposes redundance for DSE.
+  //   b) call-memcpy xform for return slot optimization.
   MemDepResult dep = MD.getDependency(M);
   if (!dep.isClobber())
     return false;
   if (!isa<MemCpyInst>(dep.getInst())) {
-    if (CallInst* C = dyn_cast<CallInst>(dep.getInst()))
+    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
       return performCallSlotOptzn(M, C);
     return false;
   }
   
-  MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst());
+  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
   
   // We can only transforms memcpy's where the dest of one is the source of the
   // other
@@ -648,8 +656,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
   
   // Second, the length of the memcpy's must be the same, or the preceeding one
   // must be larger than the following one.
-  ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
-  ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
+  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
   if (!C1 || !C2)
     return false;
   
@@ -661,7 +669,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
   
   // Finally, we have to make sure that the dest of the second does not
   // alias the source of the first
-  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
       AliasAnalysis::NoAlias)
     return false;
@@ -673,17 +681,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
     return false;
   
   // If all checks passed, then we can transform these memcpy's
-  const Type *Tys[1];
-  Tys[0] = M->getLength()->getType();
-  Function* MemCpyFun = Intrinsic::getDeclaration(
+  const Type *Ty = M->getLength()->getType();
+  Function *MemCpyFun = Intrinsic::getDeclaration(
                                  M->getParent()->getParent()->getParent(),
-                                 M->getIntrinsicID(), Tys, 1);
+                                 M->getIntrinsicID(), &Ty, 1);
     
   Value *Args[4] = {
     M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
   };
   
-  CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
+  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
   
   
   // If C and M don't interfere, then this is a valid transformation.  If they
@@ -702,41 +709,78 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
   return false;
 }
 
-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
-// function.
-//
-bool MemCpyOpt::runOnFunction(Function& F) {
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // If the memmove is a constant size, use it for the alias query, this allows
+  // us to optimize things like: memmove(P, P+64, 64);
+  uint64_t MemMoveSize = ~0ULL;
+  if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
+    MemMoveSize = Len->getZExtValue();
   
-  bool changed = false;
-  bool shouldContinue = true;
+  // See if the pointers alias.
+  if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
+      AliasAnalysis::NoAlias)
+    return false;
   
-  while (shouldContinue) {
-    shouldContinue = iterateOnFunction(F);
-    changed |= shouldContinue;
-  }
+  DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
   
-  return changed;
-}
+  // If not, then we know we can transform this.
+  Module *Mod = M->getParent()->getParent()->getParent();
+  const Type *Ty = M->getLength()->getType();
+  M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
 
+  // MemDep may have over conservative information about this instruction, just
+  // conservatively flush it from the cache.
+  getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
 
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN
+  ++NumMoveToCpy;
+  return true;
+}
+  
+
+// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
 bool MemCpyOpt::iterateOnFunction(Function &F) {
-  bool changed_function = false;
+  bool MadeChange = false;
 
-  // Walk all instruction in the function
+  // Walk all instruction in the function.
   for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
     for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
          BI != BE;) {
-      // Avoid invalidating the iterator
-      Instruction* I = BI++;
+      // Avoid invalidating the iterator.
+      Instruction *I = BI++;
       
       if (StoreInst *SI = dyn_cast<StoreInst>(I))
-        changed_function |= processStore(SI, BI);
-      else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
-        changed_function |= processMemCpy(M);
+        MadeChange |= processStore(SI, BI);
+      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+        MadeChange |= processMemCpy(M);
+      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
+        if (processMemMove(M)) {
+          --BI;         // Reprocess the new memcpy.
+          MadeChange = true;
+        }
       }
     }
   }
   
-  return changed_function;
+  return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+  bool MadeChange = false;
+  while (1) {
+    if (!iterateOnFunction(F))
+      break;
+    MadeChange = true;
+  }
+  
+  return MadeChange;
 }
+
+
+
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index fa60a9dba3b5..e6ffac251b7b 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -31,9 +31,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <algorithm>
@@ -46,7 +46,7 @@ STATISTIC(NumAnnihil, "Number of expr tree annihilated");
 STATISTIC(NumFactor , "Number of multiplies factored");
 
 namespace {
-  struct VISIBILITY_HIDDEN ValueEntry {
+  struct ValueEntry {
     unsigned Rank;
     Value *Op;
     ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
@@ -61,17 +61,17 @@ namespace {
 ///
 static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) {
   Module *M = I->getParent()->getParent()->getParent();
-  cerr << Instruction::getOpcodeName(I->getOpcode()) << " "
+  errs() << Instruction::getOpcodeName(I->getOpcode()) << " "
        << *Ops[0].Op->getType();
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M);
-    cerr << "," << Ops[i].Rank;
+    WriteAsOperand(errs() << " ", Ops[i].Op, false, M);
+    errs() << "," << Ops[i].Rank;
   }
 }
 #endif
   
 namespace {
-  class VISIBILITY_HIDDEN Reassociate : public FunctionPass {
+  class Reassociate : public FunctionPass {
     std::map<BasicBlock*, unsigned> RankMap;
     std::map<AssertingVH<>, unsigned> ValueRankMap;
     bool MadeChange;
@@ -181,8 +181,8 @@ unsigned Reassociate::getRank(Value *V) {
       (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
     ++Rank;
 
-  //DOUT << "Calculated Rank[" << V->getName() << "] = "
-  //     << Rank << "\n";
+  //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = "
+  //     << Rank << "\n");
 
   return CachedRank = Rank;
 }
@@ -200,8 +200,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
 ///
 static Instruction *LowerNegateToMultiply(Instruction *Neg,
                               std::map<AssertingVH<>, unsigned> &ValueRankMap,
-                              LLVMContext* Context) {
-  Constant *Cst = Context->getConstantIntAllOnesValue(Neg->getType());
+                              LLVMContext &Context) {
+  Constant *Cst = Constant::getAllOnesValue(Neg->getType());
 
   Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
   ValueRankMap.erase(Neg);
@@ -222,7 +222,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
          isReassociableOp(RHS, I->getOpcode()) &&
          "Not an expression that needs linearization?");
 
-  DOUT << "Linear" << *LHS << *RHS << *I;
+  DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
 
   // Move the RHS instruction to live immediately before I, avoiding breaking
   // dominator properties.
@@ -235,7 +235,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
 
   ++NumLinear;
   MadeChange = true;
-  DOUT << "Linearized: " << *I;
+  DEBUG(errs() << "Linearized: " << *I << '\n');
 
   // If D is part of this expression tree, tail recurse.
   if (isReassociableOp(I->getOperand(1), I->getOpcode()))
@@ -256,6 +256,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
                                     std::vector<ValueEntry> &Ops) {
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
   unsigned Opcode = I->getOpcode();
+  LLVMContext &Context = I->getContext();
 
   // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
   BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
@@ -284,8 +285,8 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
       Ops.push_back(ValueEntry(getRank(RHS), RHS));
       
       // Clear the leaves out.
-      I->setOperand(0, Context->getUndef(I->getType()));
-      I->setOperand(1, Context->getUndef(I->getType()));
+      I->setOperand(0, UndefValue::get(I->getType()));
+      I->setOperand(1, UndefValue::get(I->getType()));
       return;
     } else {
       // Turn X+(Y+Z) -> (Y+Z)+X
@@ -320,7 +321,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
   Ops.push_back(ValueEntry(getRank(RHS), RHS));
   
   // Clear the RHS leaf out.
-  I->setOperand(1, Context->getUndef(I->getType()));
+  I->setOperand(1, UndefValue::get(I->getType()));
 }
 
 // RewriteExprTree - Now that the operands for this expression tree are
@@ -333,10 +334,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
     if (I->getOperand(0) != Ops[i].Op ||
         I->getOperand(1) != Ops[i+1].Op) {
       Value *OldLHS = I->getOperand(0);
-      DOUT << "RA: " << *I;
+      DEBUG(errs() << "RA: " << *I << '\n');
       I->setOperand(0, Ops[i].Op);
       I->setOperand(1, Ops[i+1].Op);
-      DOUT << "TO: " << *I;
+      DEBUG(errs() << "TO: " << *I << '\n');
       MadeChange = true;
       ++NumChanged;
       
@@ -349,9 +350,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
   assert(i+2 < Ops.size() && "Ops index out of range!");
 
   if (I->getOperand(1) != Ops[i].Op) {
-    DOUT << "RA: " << *I;
+    DEBUG(errs() << "RA: " << *I << '\n');
     I->setOperand(1, Ops[i].Op);
-    DOUT << "TO: " << *I;
+    DEBUG(errs() << "TO: " << *I << '\n');
     MadeChange = true;
     ++NumChanged;
   }
@@ -373,7 +374,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
 // version of the value is returned, and BI is left pointing at the instruction
 // that should be processed next by the reassociation pass.
 //
-static Value *NegateValue(Value *V, Instruction *BI) {
+static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
   // We are trying to expose opportunity for reassociation.  One of the things
   // that we want to do to achieve this is to push a negation as deep into an
   // expression chain as possible, to expose the add instructions.  In practice,
@@ -386,8 +387,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
       // Push the negates through the add.
-      I->setOperand(0, NegateValue(I->getOperand(0), BI));
-      I->setOperand(1, NegateValue(I->getOperand(1), BI));
+      I->setOperand(0, NegateValue(Context, I->getOperand(0), BI));
+      I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));
 
       // We must move the add instruction here, because the neg instructions do
       // not dominate the old add instruction in general.  By moving it, we are
@@ -407,7 +408,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
 
 /// ShouldBreakUpSubtract - Return true if we should break up this subtract of
 /// X-Y into (X + -Y).
-static bool ShouldBreakUpSubtract(Instruction *Sub) {
+static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
   // If this is a negation, we can't split it up!
   if (BinaryOperator::isNeg(Sub))
     return false;
@@ -431,7 +432,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
 /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
 /// only used by an add, transform this into (X+(0-Y)) to promote better
 /// reassociation.
-static Instruction *BreakUpSubtract(Instruction *Sub,
+static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
                               std::map<AssertingVH<>, unsigned> &ValueRankMap) {
   // Convert a subtract into an add and a neg instruction... so that sub
   // instructions can be commuted with other add instructions...
@@ -439,7 +440,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   // Calculate the negative value of Operand 1 of the sub instruction...
   // and set it as the RHS of the add instruction we just made...
   //
-  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);
   Instruction *New =
     BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
   New->takeName(Sub);
@@ -449,7 +450,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   Sub->replaceAllUsesWith(New);
   Sub->eraseFromParent();
 
-  DOUT << "Negated: " << *New;
+  DEBUG(errs() << "Negated: " << *New << '\n');
   return New;
 }
 
@@ -458,16 +459,16 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
 /// reassociation.
 static Instruction *ConvertShiftToMul(Instruction *Shl, 
                               std::map<AssertingVH<>, unsigned> &ValueRankMap,
-                              LLVMContext* Context) {
+                              LLVMContext &Context) {
   // If an operand of this shift is a reassociable multiply, or if the shift
   // is used by a reassociable multiply or add, turn into a multiply.
   if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
       (Shl->hasOneUse() && 
        (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
         isReassociableOp(Shl->use_back(), Instruction::Add)))) {
-    Constant *MulCst = Context->getConstantInt(Shl->getType(), 1);
+    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
     MulCst =
-        Context->getConstantExprShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+        ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
     
     Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
                                                  "", Shl);
@@ -567,7 +568,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
   if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
     if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
       Ops.pop_back();
-      Ops.back().Op = Context->getConstantExpr(Opcode, V1, V2);
+      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
       return OptimizeExpression(I, Ops);
     }
 
@@ -623,10 +624,10 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
         if (FoundX != i) {
           if (Opcode == Instruction::And) {   // ...&X&~X = 0
             ++NumAnnihil;
-            return Context->getNullValue(X->getType());
+            return Constant::getNullValue(X->getType());
           } else if (Opcode == Instruction::Or) {   // ...|X|~X = -1
             ++NumAnnihil;
-            return Context->getConstantIntAllOnesValue(X->getType());
+            return Constant::getAllOnesValue(X->getType());
           }
         }
       }
@@ -645,7 +646,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
           assert(Opcode == Instruction::Xor);
           if (e == 2) {
             ++NumAnnihil;
-            return Context->getNullValue(Ops[0].Op->getType());
+            return Constant::getNullValue(Ops[0].Op->getType());
           }
           // ... X^X -> ...
           Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
@@ -670,7 +671,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
           // Remove X and -X from the operand list.
           if (Ops.size() == 2) {
             ++NumAnnihil;
-            return Context->getNullValue(X->getType());
+            return Constant::getNullValue(X->getType());
           } else {
             Ops.erase(Ops.begin()+i);
             if (i < FoundX)
@@ -727,7 +728,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 
     // If any factor occurred more than one time, we can pull it out.
     if (MaxOcc > 1) {
-      DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n";
+      DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n");
       
       // Create a new instruction that uses the MaxOccVal twice.  If we don't do
       // this, we could otherwise run into situations where removing a factor
@@ -781,6 +782,8 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 /// ReassociateBB - Inspect all of the instructions in this basic block,
 /// reassociating them as we go.
 void Reassociate::ReassociateBB(BasicBlock *BB) {
+  LLVMContext &Context = BB->getContext();
+  
   for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
     Instruction *BI = BBI++;
     if (BI->getOpcode() == Instruction::Shl &&
@@ -798,8 +801,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
     // If this is a subtract instruction which is not already in negate form,
     // see if we can convert it to X+-Y.
     if (BI->getOpcode() == Instruction::Sub) {
-      if (ShouldBreakUpSubtract(BI)) {
-        BI = BreakUpSubtract(BI, ValueRankMap);
+      if (ShouldBreakUpSubtract(Context, BI)) {
+        BI = BreakUpSubtract(Context, BI, ValueRankMap);
         MadeChange = true;
       } else if (BinaryOperator::isNeg(BI)) {
         // Otherwise, this is a negation.  See if the operand is a multiply tree
@@ -838,7 +841,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
   std::vector<ValueEntry> Ops;
   LinearizeExprTree(I, Ops);
   
-  DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n";
+  DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n");
   
   // Now that we have linearized the tree to a list and have gathered all of
   // the operands and their ranks, sort the operands by their rank.  Use a
@@ -853,7 +856,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
   if (Value *V = OptimizeExpression(I, Ops)) {
     // This expression tree simplified to something that isn't a tree,
     // eliminate it.
-    DOUT << "Reassoc to scalar: " << *V << "\n";
+    DEBUG(errs() << "Reassoc to scalar: " << *V << "\n");
     I->replaceAllUsesWith(V);
     RemoveDeadBinaryOp(I);
     return;
@@ -871,7 +874,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
     Ops.pop_back();
   }
   
-  DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n";
+  DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n");
   
   if (Ops.size() == 1) {
     // This expression tree simplified to something that isn't a tree,
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index ac95d25b7f7f..99e12522ce0c 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -26,7 +26,6 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include <list>
 using namespace llvm;
@@ -35,7 +34,7 @@ STATISTIC(NumRegsDemoted, "Number of registers demoted");
 STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
 
 namespace {
-  struct VISIBILITY_HIDDEN RegToMem : public FunctionPass {
+  struct RegToMem : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     RegToMem() : FunctionPass(&ID) {}
 
@@ -44,73 +43,17 @@ namespace {
       AU.addPreservedID(BreakCriticalEdgesID);
     }
 
-   bool valueEscapes(Instruction* i) {
-      BasicBlock* bb = i->getParent();
-      for (Value::use_iterator ii = i->use_begin(), ie = i->use_end();
-           ii != ie; ++ii)
-        if (cast<Instruction>(*ii)->getParent() != bb ||
-            isa<PHINode>(*ii))
+   bool valueEscapes(const Instruction *Inst) const {
+     const BasicBlock *BB = Inst->getParent();
+      for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end();
+           UI != E; ++UI)
+        if (cast<Instruction>(*UI)->getParent() != BB ||
+            isa<PHINode>(*UI))
           return true;
       return false;
     }
 
-    virtual bool runOnFunction(Function &F) {
-      if (!F.isDeclaration()) {
-        // Insert all new allocas into entry block.
-        BasicBlock* BBEntry = &F.getEntryBlock();
-        assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
-               "Entry block to function must not have predecessors!");
-
-        // Find first non-alloca instruction and create insertion point. This is
-        // safe if block is well-formed: it always have terminator, otherwise
-        // we'll get and assertion.
-        BasicBlock::iterator I = BBEntry->begin();
-        while (isa<AllocaInst>(I)) ++I;
-
-        CastInst *AllocaInsertionPoint =
-          CastInst::Create(Instruction::BitCast,
-                           Context->getNullValue(Type::Int32Ty), Type::Int32Ty,
-                           "reg2mem alloca point", I);
-
-        // Find the escaped instructions. But don't create stack slots for
-        // allocas in entry block.
-        std::list<Instruction*> worklist;
-        for (Function::iterator ibb = F.begin(), ibe = F.end();
-             ibb != ibe; ++ibb)
-          for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
-               iib != iie; ++iib) {
-            if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
-                valueEscapes(iib)) {
-              worklist.push_front(&*iib);
-            }
-          }
-
-        // Demote escaped instructions
-        NumRegsDemoted += worklist.size();
-        for (std::list<Instruction*>::iterator ilb = worklist.begin(), 
-               ile = worklist.end(); ilb != ile; ++ilb)
-          DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
-
-        worklist.clear();
-
-        // Find all phi's
-        for (Function::iterator ibb = F.begin(), ibe = F.end();
-             ibb != ibe; ++ibb)
-          for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
-               iib != iie; ++iib)
-            if (isa<PHINode>(iib))
-              worklist.push_front(&*iib);
-
-        // Demote phi nodes
-        NumPhisDemoted += worklist.size();
-        for (std::list<Instruction*>::iterator ilb = worklist.begin(), 
-               ile = worklist.end(); ilb != ile; ++ilb)
-          DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
-
-        return true;
-      }
-      return false;
-    }
+    virtual bool runOnFunction(Function &F);
   };
 }
   
@@ -118,6 +61,66 @@ char RegToMem::ID = 0;
 static RegisterPass<RegToMem>
 X("reg2mem", "Demote all values to stack slots");
 
+
+bool RegToMem::runOnFunction(Function &F) {
+  if (F.isDeclaration()) 
+    return false;
+  
+  // Insert all new allocas into entry block.
+  BasicBlock *BBEntry = &F.getEntryBlock();
+  assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+         "Entry block to function must not have predecessors!");
+  
+  // Find first non-alloca instruction and create insertion point. This is
+  // safe if block is well-formed: it always have terminator, otherwise
+  // we'll get and assertion.
+  BasicBlock::iterator I = BBEntry->begin();
+  while (isa<AllocaInst>(I)) ++I;
+  
+  CastInst *AllocaInsertionPoint =
+    new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                    Type::getInt32Ty(F.getContext()),
+                    "reg2mem alloca point", I);
+  
+  // Find the escaped instructions. But don't create stack slots for
+  // allocas in entry block.
+  std::list<Instruction*> WorkList;
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib) {
+      if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+          valueEscapes(iib)) {
+        WorkList.push_front(&*iib);
+      }
+    }
+  
+  // Demote escaped instructions
+  NumRegsDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+  
+  WorkList.clear();
+  
+  // Find all phi's
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib)
+      if (isa<PHINode>(iib))
+        WorkList.push_front(&*iib);
+  
+  // Demote phi nodes
+  NumPhisDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+  
+  return true;
+}
+
+
 // createDemoteRegisterToMemory - Provide an entry point to create this pass.
 //
 const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index f0bc12734734..b5edf4e05821 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -33,9 +33,10 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -58,7 +59,7 @@ namespace {
 /// LatticeVal class - This class represents the different lattice values that
 /// an LLVM value may occupy.  It is a simple class with value semantics.
 ///
-class VISIBILITY_HIDDEN LatticeVal {
+class LatticeVal {
   enum {
     /// undefined - This LLVM Value has no known value yet.
     undefined,
@@ -139,7 +140,7 @@ public:
 /// Constant Propagation.
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
-  LLVMContext* Context;
+  LLVMContext *Context;
   DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable
   std::map<Value*, LatticeVal> ValueState;  // The state each value is in.
 
@@ -179,12 +180,12 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
-  void setContext(LLVMContext* C) { Context = C; }
+  void setContext(LLVMContext *C) { Context = C; }
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
   void MarkBlockExecutable(BasicBlock *BB) {
-    DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
+    DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");
     BBExecutable.insert(BB);   // Basic block is executable!
     BBWorkList.push_back(BB);  // Add the block to the work list!
   }
@@ -260,14 +261,14 @@ private:
   //
   inline void markConstant(LatticeVal &IV, Value *V, Constant *C) {
     if (IV.markConstant(C)) {
-      DOUT << "markConstant: " << *C << ": " << *V;
+      DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n');
       InstWorkList.push_back(V);
     }
   }
   
   inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) {
     IV.markForcedConstant(C);
-    DOUT << "markForcedConstant: " << *C << ": " << *V;
+    DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n');
     InstWorkList.push_back(V);
   }
   
@@ -280,11 +281,11 @@ private:
   // work list so that the users of the instruction are updated later.
   inline void markOverdefined(LatticeVal &IV, Value *V) {
     if (IV.markOverdefined()) {
-      DEBUG(DOUT << "markOverdefined: ";
+      DEBUG(errs() << "markOverdefined: ";
             if (Function *F = dyn_cast<Function>(V))
-              DOUT << "Function '" << F->getName() << "'\n";
+              errs() << "Function '" << F->getName() << "'\n";
             else
-              DOUT << *V);
+              errs() << *V << '\n');
       // Only instructions go on the work list
       OverdefinedInstWorkList.push_back(V);
     }
@@ -337,8 +338,8 @@ private:
       return;  // This edge is already known to be executable!
 
     if (BBExecutable.count(Dest)) {
-      DOUT << "Marking Edge Executable: " << Source->getNameStart()
-           << " -> " << Dest->getNameStart() << "\n";
+      DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
+            << " -> " << Dest->getName() << "\n");
 
       // The destination is already executable, but we just made an edge
       // feasible that wasn't before.  Revisit the PHI nodes in the block
@@ -399,7 +400,9 @@ private:
   void visitStoreInst     (Instruction &I);
   void visitLoadInst      (LoadInst &I);
   void visitGetElementPtrInst(GetElementPtrInst &I);
-  void visitCallInst      (CallInst &I) { visitCallSite(CallSite::get(&I)); }
+  void visitCallInst      (CallInst &I) { 
+    visitCallSite(CallSite::get(&I));
+  }
   void visitInvokeInst    (InvokeInst &II) {
     visitCallSite(CallSite::get(&II));
     visitTerminatorInst(II);
@@ -414,7 +417,7 @@ private:
 
   void visitInstruction(Instruction &I) {
     // If a new instruction is added to LLVM that we don't handle...
-    cerr << "SCCP: Don't know how to handle: " << I;
+    errs() << "SCCP: Don't know how to handle: " << I;
     markOverdefined(&I);   // Just in case
   }
 };
@@ -440,7 +443,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
         Succs[0] = Succs[1] = true;
       } else if (BCValue.isConstant()) {
         // Constant condition variables mean the branch can only go a single way
-        Succs[BCValue.getConstant() == Context->getConstantIntFalse()] = true;
+        Succs[BCValue.getConstant() == ConstantInt::getFalse(*Context)] = true;
       }
     }
   } else if (isa<InvokeInst>(&TI)) {
@@ -455,7 +458,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
     } else if (SCValue.isConstant())
       Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true;
   } else {
-    assert(0 && "SCCP: Don't know how to handle this terminator!");
+    llvm_unreachable("SCCP: Don't know how to handle this terminator!");
   }
 }
 
@@ -485,7 +488,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 
         // Constant condition variables mean the branch can only go a single way
         return BI->getSuccessor(BCValue.getConstant() ==
-                                       Context->getConstantIntFalse()) == To;
+                                       ConstantInt::getFalse(*Context)) == To;
       }
       return false;
     }
@@ -513,8 +516,10 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
     }
     return false;
   } else {
-    cerr << "Unknown terminator instruction: " << *TI;
-    abort();
+#ifndef NDEBUG
+    errs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+    llvm_unreachable(0);
   }
 }
 
@@ -642,7 +647,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
       DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
         It = TrackedMultipleRetVals.find(std::make_pair(F, i));
       if (It == TrackedMultipleRetVals.end()) break;
-      if (Value *Val = FindInsertedValue(I.getOperand(0), i))
+      if (Value *Val = FindInsertedValue(I.getOperand(0), i, I.getContext()))
         mergeInValue(It->second, F, getValueState(Val));
     }
   }
@@ -666,7 +671,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
   if (VState.isOverdefined())          // Inherit overdefinedness of operand
     markOverdefined(&I);
   else if (VState.isConstant())        // Propagate constant value
-    markConstant(&I, Context->getConstantExprCast(I.getOpcode(), 
+    markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
                                            VState.getConstant(), I.getType()));
 }
 
@@ -809,12 +814,12 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
         if (NonOverdefVal->isUndefined()) {
           // Could annihilate value.
           if (I.getOpcode() == Instruction::And)
-            markConstant(IV, &I, Context->getNullValue(I.getType()));
+            markConstant(IV, &I, Constant::getNullValue(I.getType()));
           else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
-            markConstant(IV, &I, Context->getConstantVectorAllOnesValue(PT));
+            markConstant(IV, &I, Constant::getAllOnesValue(PT));
           else
             markConstant(IV, &I,
-                         Context->getConstantIntAllOnesValue(I.getType()));
+                         Constant::getAllOnesValue(I.getType()));
           return;
         } else {
           if (I.getOpcode() == Instruction::And) {
@@ -859,7 +864,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
               break;  // Cannot fold this operation over the PHI nodes!
             } else if (In1.isConstant() && In2.isConstant()) {
               Constant *V =
-                     Context->getConstantExpr(I.getOpcode(), In1.getConstant(),
+                     ConstantExpr::get(I.getOpcode(), In1.getConstant(),
                                               In2.getConstant());
               if (Result.isUndefined())
                 Result.markConstant(V);
@@ -908,7 +913,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
     markOverdefined(IV, &I);
   } else if (V1State.isConstant() && V2State.isConstant()) {
     markConstant(IV, &I,
-                Context->getConstantExpr(I.getOpcode(), V1State.getConstant(),
+                ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
                                            V2State.getConstant()));
   }
 }
@@ -945,7 +950,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
               Result.markOverdefined();
               break;  // Cannot fold this operation over the PHI nodes!
             } else if (In1.isConstant() && In2.isConstant()) {
-              Constant *V = Context->getConstantExprCompare(I.getPredicate(), 
+              Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
                                                      In1.getConstant(), 
                                                      In2.getConstant());
               if (Result.isUndefined())
@@ -994,7 +999,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
 
     markOverdefined(IV, &I);
   } else if (V1State.isConstant() && V2State.isConstant()) {
-    markConstant(IV, &I, Context->getConstantExprCompare(I.getPredicate(), 
+    markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
                                                   V1State.getConstant(), 
                                                   V2State.getConstant()));
   }
@@ -1096,7 +1101,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
   Constant *Ptr = Operands[0];
   Operands.erase(Operands.begin());  // Erase the pointer from idx list...
 
-  markConstant(IV, &I, Context->getConstantExprGetElementPtr(Ptr, &Operands[0],
+  markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0],
                                                       Operands.size()));
 }
 
@@ -1127,10 +1132,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
   if (PtrVal.isConstant() && !I.isVolatile()) {
     Value *Ptr = PtrVal.getConstant();
     // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(Ptr) && 
-        cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
+    if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) {
       // load null -> null
-      markConstant(IV, &I, Context->getNullValue(I.getType()));
+      markConstant(IV, &I, Constant::getNullValue(I.getType()));
       return;
     }
 
@@ -1179,7 +1183,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {
   if (F == 0 || !F->hasLocalLinkage()) {
 CallOverdefined:
     // Void return and not tracking callee, just bail.
-    if (I->getType() == Type::VoidTy) return;
+    if (I->getType()->isVoidTy()) return;
     
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
@@ -1258,6 +1262,10 @@ CallOverdefined:
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
        AI != E; ++AI, ++CAI) {
     LatticeVal &IV = ValueState[AI];
+    if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+      IV.markOverdefined();
+      continue;
+    }
     if (!IV.isOverdefined())
       mergeInValue(IV, AI, getValueState(*CAI));
   }
@@ -1273,7 +1281,7 @@ void SCCPSolver::Solve() {
       Value *I = OverdefinedInstWorkList.back();
       OverdefinedInstWorkList.pop_back();
 
-      DOUT << "\nPopped off OI-WL: " << *I;
+      DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
       // bottom to constant
@@ -1291,7 +1299,7 @@ void SCCPSolver::Solve() {
       Value *I = InstWorkList.back();
       InstWorkList.pop_back();
 
-      DOUT << "\nPopped off I-WL: " << *I;
+      DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
       // bottom to constant
@@ -1311,7 +1319,7 @@ void SCCPSolver::Solve() {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
 
-      DOUT << "\nPopped off BBWL: " << *BB;
+      DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n');
 
       // Notify all instructions in this basic block that they are newly
       // executable.
@@ -1345,7 +1353,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
       // Look for instructions which produce undef values.
-      if (I->getType() == Type::VoidTy) continue;
+      if (I->getType()->isVoidTy()) continue;
       
       LatticeVal &LV = getValueState(I);
       if (!LV.isUndefined()) continue;
@@ -1371,22 +1379,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // to be handled here, because we don't know whether the top part is 1's
         // or 0's.
         assert(Op0LV.isUndefined());
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Mul:
       case Instruction::And:
         // undef * X -> 0.   X could be zero.
         // undef & X -> 0.   X could be zero.
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
 
       case Instruction::Or:
         // undef | X -> -1.   X could be -1.
         if (const VectorType *PTy = dyn_cast<VectorType>(ITy))
           markForcedConstant(LV, I,
-                             Context->getConstantVectorAllOnesValue(PTy));
+                             Constant::getAllOnesValue(PTy));
         else          
-          markForcedConstant(LV, I, Context->getConstantIntAllOnesValue(ITy));
+          markForcedConstant(LV, I, Constant::getAllOnesValue(ITy));
         return true;
 
       case Instruction::SDiv:
@@ -1399,7 +1407,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
         
       case Instruction::AShr:
@@ -1420,7 +1428,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // X >> undef -> 0.  X could be 0.
         // X << undef -> 0.  X could be 0.
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Select:
         // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
@@ -1483,7 +1491,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     // as undef, then further analysis could think the undef went another way
     // leading to an inconsistent set of conclusions.
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      BI->setCondition(Context->getConstantIntFalse());
+      BI->setCondition(ConstantInt::getFalse(*Context));
     } else {
       SwitchInst *SI = cast<SwitchInst>(TI);
       SI->setCondition(SI->getCaseValue(1));
@@ -1502,7 +1510,7 @@ namespace {
   /// SCCP Class - This class uses the SCCPSolver to implement a per-function
   /// Sparse Conditional Constant Propagator.
   ///
-  struct VISIBILITY_HIDDEN SCCP : public FunctionPass {
+  struct SCCP : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     SCCP() : FunctionPass(&ID) {}
 
@@ -1531,9 +1539,9 @@ FunctionPass *llvm::createSCCPPass() {
 // and return true if the function was modified.
 //
 bool SCCP::runOnFunction(Function &F) {
-  DOUT << "SCCP on function '" << F.getNameStart() << "'\n";
+  DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n");
   SCCPSolver Solver;
-  Solver.setContext(Context);
+  Solver.setContext(&F.getContext());
 
   // Mark the first block of the function as being executable.
   Solver.MarkBlockExecutable(F.begin());
@@ -1546,7 +1554,7 @@ bool SCCP::runOnFunction(Function &F) {
   bool ResolvedUndefs = true;
   while (ResolvedUndefs) {
     Solver.Solve();
-    DOUT << "RESOLVING UNDEFs\n";
+    DEBUG(errs() << "RESOLVING UNDEFs\n");
     ResolvedUndefs = Solver.ResolvedUndefsIn(F);
   }
 
@@ -1561,7 +1569,7 @@ bool SCCP::runOnFunction(Function &F) {
 
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (!Solver.isBlockExecutable(BB)) {
-      DOUT << "  BasicBlock Dead:" << *BB;
+      DEBUG(errs() << "  BasicBlock Dead:" << *BB);
       ++NumDeadBlocks;
 
       // Delete the instructions backwards, as it has a reduced likelihood of
@@ -1573,7 +1581,7 @@ bool SCCP::runOnFunction(Function &F) {
         Instruction *I = Insts.back();
         Insts.pop_back();
         if (!I->use_empty())
-          I->replaceAllUsesWith(Context->getUndef(I->getType()));
+          I->replaceAllUsesWith(UndefValue::get(I->getType()));
         BB->getInstList().erase(I);
         MadeChanges = true;
         ++NumInstRemoved;
@@ -1584,8 +1592,7 @@ bool SCCP::runOnFunction(Function &F) {
       //
       for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
         Instruction *Inst = BI++;
-        if (Inst->getType() == Type::VoidTy ||
-            isa<TerminatorInst>(Inst))
+        if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
           continue;
         
         LatticeVal &IV = Values[Inst];
@@ -1593,8 +1600,8 @@ bool SCCP::runOnFunction(Function &F) {
           continue;
         
         Constant *Const = IV.isConstant()
-          ? IV.getConstant() : Context->getUndef(Inst->getType());
-        DOUT << "  Constant: " << *Const << " = " << *Inst;
+          ? IV.getConstant() : UndefValue::get(Inst->getType());
+        DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
 
         // Replaces all of the uses of a variable with uses of the constant.
         Inst->replaceAllUsesWith(Const);
@@ -1617,7 +1624,7 @@ namespace {
   /// IPSCCP Class - This class implements interprocedural Sparse Conditional
   /// Constant Propagation.
   ///
-  struct VISIBILITY_HIDDEN IPSCCP : public ModulePass {
+  struct IPSCCP : public ModulePass {
     static char ID;
     IPSCCP() : ModulePass(&ID) {}
     bool runOnModule(Module &M);
@@ -1658,7 +1665,10 @@ static bool AddressIsTaken(GlobalValue *GV) {
 }
 
 bool IPSCCP::runOnModule(Module &M) {
+  LLVMContext *Context = &M.getContext();
+  
   SCCPSolver Solver;
+  Solver.setContext(Context);
 
   // Loop over all functions, marking arguments to those with their addresses
   // taken or that are external as overdefined.
@@ -1687,7 +1697,7 @@ bool IPSCCP::runOnModule(Module &M) {
   while (ResolvedUndefs) {
     Solver.Solve();
 
-    DOUT << "RESOLVING UNDEFS\n";
+    DEBUG(errs() << "RESOLVING UNDEFS\n");
     ResolvedUndefs = false;
     for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
       ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
@@ -1709,8 +1719,8 @@ bool IPSCCP::runOnModule(Module &M) {
         LatticeVal &IV = Values[AI];
         if (IV.isConstant() || IV.isUndefined()) {
           Constant *CST = IV.isConstant() ?
-            IV.getConstant() : Context->getUndef(AI->getType());
-          DOUT << "***  Arg " << *AI << " = " << *CST <<"\n";
+            IV.getConstant() : UndefValue::get(AI->getType());
+          DEBUG(errs() << "***  Arg " << *AI << " = " << *CST <<"\n");
 
           // Replaces all of the uses of a variable with uses of the
           // constant.
@@ -1721,7 +1731,7 @@ bool IPSCCP::runOnModule(Module &M) {
 
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       if (!Solver.isBlockExecutable(BB)) {
-        DOUT << "  BasicBlock Dead:" << *BB;
+        DEBUG(errs() << "  BasicBlock Dead:" << *BB);
         ++IPNumDeadBlocks;
 
         // Delete the instructions backwards, as it has a reduced likelihood of
@@ -1734,7 +1744,7 @@ bool IPSCCP::runOnModule(Module &M) {
           Instruction *I = Insts.back();
           Insts.pop_back();
           if (!I->use_empty())
-            I->replaceAllUsesWith(Context->getUndef(I->getType()));
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
           BB->getInstList().erase(I);
           MadeChanges = true;
           ++IPNumInstRemoved;
@@ -1746,18 +1756,18 @@ bool IPSCCP::runOnModule(Module &M) {
             TI->getSuccessor(i)->removePredecessor(BB);
         }
         if (!TI->use_empty())
-          TI->replaceAllUsesWith(Context->getUndef(TI->getType()));
+          TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
         BB->getInstList().erase(TI);
 
         if (&*BB != &F->front())
           BlocksToErase.push_back(BB);
         else
-          new UnreachableInst(BB);
+          new UnreachableInst(M.getContext(), BB);
 
       } else {
         for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
           Instruction *Inst = BI++;
-          if (Inst->getType() == Type::VoidTy)
+          if (Inst->getType()->isVoidTy())
             continue;
           
           LatticeVal &IV = Values[Inst];
@@ -1765,8 +1775,8 @@ bool IPSCCP::runOnModule(Module &M) {
             continue;
           
           Constant *Const = IV.isConstant()
-            ? IV.getConstant() : Context->getUndef(Inst->getType());
-          DOUT << "  Constant: " << *Const << " = " << *Inst;
+            ? IV.getConstant() : UndefValue::get(Inst->getType());
+          DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
 
           // Replaces all of the uses of a variable with uses of the
           // constant.
@@ -1802,7 +1812,7 @@ bool IPSCCP::runOnModule(Module &M) {
           } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
             assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
           } else {
-            assert(0 && "Didn't fold away reference to block!");
+            llvm_unreachable("Didn't fold away reference to block!");
           }
 #endif
           
@@ -1834,12 +1844,12 @@ bool IPSCCP::runOnModule(Module &M) {
   for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
          E = RV.end(); I != E; ++I)
     if (!I->second.isOverdefined() &&
-        I->first->getReturnType() != Type::VoidTy) {
+        !I->first->getReturnType()->isVoidTy()) {
       Function *F = I->first;
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
         if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
           if (!isa<UndefValue>(RI->getOperand(0)))
-            RI->setOperand(0, Context->getUndef(F->getReturnType()));
+            RI->setOperand(0, UndefValue::get(F->getReturnType()));
     }
 
   // If we infered constant or undef values for globals variables, we can delete
@@ -1850,7 +1860,7 @@ bool IPSCCP::runOnModule(Module &M) {
     GlobalVariable *GV = I->first;
     assert(!I->second.isOverdefined() &&
            "Overdefined values should have been taken out of the map!");
-    DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n";
+    DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n");
     while (!GV->use_empty()) {
       StoreInst *SI = cast<StoreInst>(GV->use_back());
       SI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 109fb90d52f3..610d874b3684 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -34,13 +34,13 @@
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 STATISTIC(NumReplaced,  "Number of allocas broken up");
@@ -49,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar");
 STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
 
 namespace {
-  struct VISIBILITY_HIDDEN SROA : public FunctionPass {
+  struct SROA : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     explicit SROA(signed T = -1) : FunctionPass(&ID) {
       if (T == -1)
@@ -68,7 +68,6 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
       AU.addRequired<DominanceFrontier>();
-      AU.addRequired<TargetData>();
       AU.setPreservesCFG();
     }
 
@@ -150,9 +149,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
 
 
 bool SROA::runOnFunction(Function &F) {
-  TD = &getAnalysis<TargetData>();
-  
+  TD = getAnalysisIfAvailable<TargetData>();
+
   bool Changed = performPromotion(F);
+
+  // FIXME: ScalarRepl currently depends on TargetData more than it
+  // theoretically needs to. It should be refactored in order to support
+  // target-independent IR. Until this is done, just skip the actual
+  // scalar-replacement portion of this pass.
+  if (!TD) return Changed;
+
   while (1) {
     bool LocalChange = performScalarRepl(F);
     if (!LocalChange) break;   // No need to repromote if no scalarrepl
@@ -186,7 +192,7 @@ bool SROA::performPromotion(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    PromoteMemToReg(Allocas, DT, DF, F.getContext());
     NumPromoted += Allocas.size();
     Changed = true;
   }
@@ -238,11 +244,10 @@ bool SROA::performScalarRepl(Function &F) {
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
     if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
-      DOUT << "Found alloca equal to global: " << *AI;
-      DOUT << "  memcpy = " << *TheCopy;
+      DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n');
+      DEBUG(errs() << "  memcpy = " << *TheCopy << '\n');
       Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
-      AI->replaceAllUsesWith(
-                        Context->getConstantExprBitCast(TheSrc, AI->getType()));
+      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
       TheCopy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
       ++NumGlobals;
@@ -256,9 +261,12 @@ bool SROA::performScalarRepl(Function &F) {
     // value cannot be decomposed at all.
     uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
 
+    // Do not promote [0 x %struct].
+    if (AllocaSize == 0) continue;
+
     // Do not promote any struct whose size is too big.
     if (AllocaSize > SRThreshold) continue;
-        
+
     if ((isa<StructType>(AI->getAllocatedType()) ||
          isa<ArrayType>(AI->getAllocatedType())) &&
         // Do not promote any struct into more than "32" separate vars.
@@ -266,7 +274,7 @@ bool SROA::performScalarRepl(Function &F) {
       // Check that all of the users of the allocation are capable of being
       // transformed.
       switch (isSafeAllocaToScalarRepl(AI)) {
-      default: assert(0 && "Unexpected value!");
+      default: llvm_unreachable("Unexpected value!");
       case 0:  // Not safe to scalar replace.
         break;
       case 1:  // Safe, but requires cleanup/canonicalizations first
@@ -298,16 +306,17 @@ bool SROA::performScalarRepl(Function &F) {
       // we just get a lot of insert/extracts.  If at least one vector is
       // involved, then we probably really do have a union of vector/array.
       if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
-        DOUT << "CONVERT TO VECTOR: " << *AI << "  TYPE = " << *VectorTy <<"\n";
+        DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+                     << *VectorTy << '\n');
         
         // Create and insert the vector alloca.
-        NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin());
+        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
         ConvertUsesToScalar(AI, NewAI, 0);
       } else {
-        DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
+        DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
         
         // Create and insert the integer alloca.
-        const Type *NewTy = Context->getIntegerType(AllocaSize*8);
+        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
         NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
         ConvertUsesToScalar(AI, NewAI, 0);
       }
@@ -328,14 +337,14 @@ bool SROA::performScalarRepl(Function &F) {
 /// predicate, do SROA now.
 void SROA::DoScalarReplacement(AllocationInst *AI, 
                                std::vector<AllocationInst*> &WorkList) {
-  DOUT << "Found inst to SROA: " << *AI;
+  DEBUG(errs() << "Found inst to SROA: " << *AI << '\n');
   SmallVector<AllocaInst*, 32> ElementAllocas;
   if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
     ElementAllocas.reserve(ST->getNumContainedTypes());
     for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
       AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 
                                       AI->getAlignment(),
-                                      AI->getName() + "." + utostr(i), AI);
+                                      AI->getName() + "." + Twine(i), AI);
       ElementAllocas.push_back(NA);
       WorkList.push_back(NA);  // Add to worklist for recursive processing
     }
@@ -345,7 +354,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
     const Type *ElTy = AT->getElementType();
     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
       AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
-                                      AI->getName() + "." + utostr(i), AI);
+                                      AI->getName() + "." + Twine(i), AI);
       ElementAllocas.push_back(NA);
       WorkList.push_back(NA);  // Add to worklist for recursive processing
     }
@@ -371,7 +380,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
     //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 
     // (Also works for arrays instead of structs)
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      Value *Insert = Context->getUndef(LI->getType());
+      Value *Insert = UndefValue::get(LI->getType());
       for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
         Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
         Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
@@ -418,7 +427,8 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
       // expanded itself once the worklist is rerun.
       //
       SmallVector<Value*, 8> NewArgs;
-      NewArgs.push_back(Context->getNullValue(Type::Int32Ty));
+      NewArgs.push_back(Constant::getNullValue(
+                                           Type::getInt32Ty(AI->getContext())));
       NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
       RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
                                            NewArgs.end(), "", GEPI);
@@ -478,7 +488,7 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
         if (Info.isUnsafe) return;
         break;
       }
-      DOUT << "  Transformation preventing inst: " << *User;
+      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
       return MarkUnsafe(Info);
     case Instruction::Call:
       if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
@@ -488,10 +498,10 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
           break;
         }
       }
-      DOUT << "  Transformation preventing inst: " << *User;
+      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
       return MarkUnsafe(Info);
     default:
-      DOUT << "  Transformation preventing inst: " << *User;
+      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
       return MarkUnsafe(Info);
     }
   }
@@ -531,7 +541,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
 
   // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
   if (I == E ||
-      I.getOperand() != Context->getNullValue(I.getOperand()->getType())) {
+      I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
     return MarkUnsafe(Info);
   }
 
@@ -727,6 +737,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
   // that doesn't have anything to do with the alloca that we are promoting. For
   // memset, this Value* stays null.
   Value *OtherPtr = 0;
+  LLVMContext &Context = MI->getContext();
   unsigned MemAlignment = MI->getAlignment();
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
     if (BCInst == MTI->getRawDest())
@@ -764,7 +775,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
   const Type *BytePtrTy = MI->getRawDest()->getType();
   bool SROADest = MI->getRawDest() == BCInst;
   
-  Constant *Zero = Context->getNullValue(Type::Int32Ty);
+  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
 
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // If this is a memcpy/memmove, emit a GEP of the other element address.
@@ -772,9 +783,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
     unsigned OtherEltAlign = MemAlignment;
     
     if (OtherPtr) {
-      Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) };
+      Value *Idx[2] = { Zero,
+                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
       OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
-                                           OtherPtr->getNameStr()+"."+utostr(i),
+                                           OtherPtr->getNameStr()+"."+Twine(i),
                                            MI);
       uint64_t EltOffset;
       const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
@@ -819,7 +831,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       Constant *StoreVal;
       if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
         if (CI->isZero()) {
-          StoreVal = Context->getNullValue(EltTy);  // 0.0, null, 0, <0,0>
+          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
         } else {
           // If EltTy is a vector type, get the element type.
           const Type *ValTy = EltTy->getScalarType();
@@ -835,18 +847,18 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
           }
           
           // Convert the integer value to the appropriate type.
-          StoreVal = Context->getConstantInt(TotalVal);
+          StoreVal = ConstantInt::get(Context, TotalVal);
           if (isa<PointerType>(ValTy))
-            StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy);
+            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
           else if (ValTy->isFloatingPoint())
-            StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy);
+            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
           assert(StoreVal->getType() == ValTy && "Type mismatch!");
           
           // If the requested value was a vector constant, create it.
           if (EltTy != ValTy) {
             unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
             SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
-            StoreVal = Context->getConstantVector(&Elts[0], NumElts);
+            StoreVal = ConstantVector::get(&Elts[0], NumElts);
           }
         }
         new StoreInst(StoreVal, EltPtr, MI);
@@ -872,15 +884,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       Value *Ops[] = {
         SROADest ? EltPtr : OtherElt,  // Dest ptr
         SROADest ? OtherElt : EltPtr,  // Src ptr
-        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
-        Context->getConstantInt(Type::Int32Ty, OtherEltAlign)  // Align
+        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+        // Align
+        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
       };
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
     } else {
       assert(isa<MemSetInst>(MI));
       Value *Ops[] = {
         EltPtr, MI->getOperand(2),  // Dest, Value,
-        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
+        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
         Zero  // Align
       };
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
@@ -910,9 +923,11 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
   // Handle tail padding by extending the operand
   if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
     SrcVal = new ZExtInst(SrcVal,
-                          Context->getIntegerType(AllocaSizeBits), "", SI);
+                          IntegerType::get(SI->getContext(), AllocaSizeBits), 
+                          "", SI);
 
-  DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
+  DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+               << '\n');
 
   // There are two forms here: AI could be an array or struct.  Both cases
   // have different ways to compute the element offset.
@@ -929,7 +944,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       Value *EltVal = SrcVal;
       if (Shift) {
-        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
                                             "sroa.store.elt", SI);
       }
@@ -942,7 +957,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       if (FieldSizeBits != AllocaSizeBits)
         EltVal = new TruncInst(EltVal,
-                               Context->getIntegerType(FieldSizeBits), "", SI);
+                             IntegerType::get(SI->getContext(), FieldSizeBits),
+                              "", SI);
       Value *DestField = NewElts[i];
       if (EltVal->getType() == FieldTy) {
         // Storing to an integer field of this size, just do it.
@@ -952,7 +968,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
         DestField = new BitCastInst(DestField,
-                              Context->getPointerTypeUnqual(EltVal->getType()),
+                              PointerType::getUnqual(EltVal->getType()),
                                     "", SI);
       }
       new StoreInst(EltVal, DestField, SI);
@@ -977,7 +993,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       Value *EltVal = SrcVal;
       if (Shift) {
-        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
                                             "sroa.store.elt", SI);
       }
@@ -985,7 +1001,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       // Truncate down to an integer of the right size.
       if (ElementSizeBits != AllocaSizeBits)
         EltVal = new TruncInst(EltVal, 
-                               Context->getIntegerType(ElementSizeBits),"",SI);
+                               IntegerType::get(SI->getContext(), 
+                                                ElementSizeBits),"",SI);
       Value *DestField = NewElts[i];
       if (EltVal->getType() == ArrayEltTy) {
         // Storing to an integer field of this size, just do it.
@@ -995,7 +1012,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
         DestField = new BitCastInst(DestField,
-                              Context->getPointerTypeUnqual(EltVal->getType()),
+                              PointerType::getUnqual(EltVal->getType()),
                                     "", SI);
       }
       new StoreInst(EltVal, DestField, SI);
@@ -1026,7 +1043,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
       TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
     return;
   
-  DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI;
+  DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+               << '\n');
   
   // There are two forms here: AI could be an array or struct.  Both cases
   // have different ways to compute the element offset.
@@ -1038,9 +1056,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
     const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
     ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
   }    
-    
-  Value *ResultVal =
-                 Context->getNullValue(Context->getIntegerType(AllocaSizeBits));
+  
+  Value *ResultVal = 
+    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
   
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // Load the value from the alloca.  If the NewElt is an aggregate, cast
@@ -1053,11 +1071,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
     // Ignore zero sized fields like {}, they obviously contain no data.
     if (FieldSizeBits == 0) continue;
     
-    const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits);
+    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 
+                                                     FieldSizeBits);
     if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
         !isa<VectorType>(FieldTy))
       SrcField = new BitCastInst(SrcField,
-                                 Context->getPointerTypeUnqual(FieldIntTy),
+                                 PointerType::getUnqual(FieldIntTy),
                                  "", LI);
     SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
 
@@ -1082,7 +1101,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
       Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
     
     if (Shift) {
-      Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift);
+      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
       SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
     }
 
@@ -1152,7 +1171,8 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
        I != E; ++I) {
     isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
     if (Info.isUnsafe) {
-      DOUT << "Cannot transform: " << *AI << "  due to user: " << **I;
+      DEBUG(errs() << "Cannot transform: " << *AI << "\n  due to user: "
+                   << **I << '\n');
       return 0;
     }
   }
@@ -1186,24 +1206,25 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
     return;
 
   if (NumElements == 1) {
-    GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty));
+    GEPI->setOperand(2, 
+                  Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())));
     return;
   } 
     
   assert(NumElements == 2 && "Unhandled case!");
   // All users of the GEP must be loads.  At each use of the GEP, insert
   // two loads of the appropriate indexed GEP and select between them.
-  Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), 
-                              Context->getNullValue(I.getOperand()->getType()),
-                              "isone", GEPI);
+  Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), 
+                              Constant::getNullValue(I.getOperand()->getType()),
+                              "isone");
   // Insert the new GEP instructions, which are properly indexed.
   SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
-  Indices[1] = Context->getNullValue(Type::Int32Ty);
+  Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
   Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
                                              Indices.begin(),
                                              Indices.end(),
                                              GEPI->getName()+".0", GEPI);
-  Indices[1] = Context->getConstantInt(Type::Int32Ty, 1);
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);
   Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
                                             Indices.begin(),
                                             Indices.end(),
@@ -1261,9 +1282,9 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) {
 ///      and stores would mutate the memory.
 static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
                         unsigned AllocaSize, const TargetData &TD,
-                        LLVMContext* Context) {
+                        LLVMContext &Context) {
   // If this could be contributing to a vector, analyze it.
-  if (VecTy != Type::VoidTy) { // either null or a vector type.
+  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.
 
     // If the In type is a vector that is the same size as the alloca, see if it
     // matches the existing VecTy.
@@ -1276,7 +1297,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
           VecTy = VInTy;
         return;
       }
-    } else if (In == Type::FloatTy || In == Type::DoubleTy ||
+    } else if (In->isFloatTy() || In->isDoubleTy() ||
                (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
                 isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
       // If we're accessing something that could be an element of a vector, see
@@ -1289,7 +1310,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
            cast<VectorType>(VecTy)->getElementType()
                  ->getPrimitiveSizeInBits()/8 == EltSize)) {
         if (VecTy == 0)
-          VecTy = Context->getVectorType(In, AllocaSize/EltSize);
+          VecTy = VectorType::get(In, AllocaSize/EltSize);
         return;
       }
     }
@@ -1297,7 +1318,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
   
   // Otherwise, we have a case that we can't handle with an optimized vector
   // form.  We can still turn this into a large integer.
-  VecTy = Type::VoidTy;
+  VecTy = Type::getVoidTy(Context);
 }
 
 /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
@@ -1320,7 +1341,8 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       // Don't break volatile loads.
       if (LI->isVolatile())
         return false;
-      MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context);
+      MergeInType(LI->getType(), Offset, VecTy,
+                  AllocaSize, *TD, V->getContext());
       SawVec |= isa<VectorType>(LI->getType());
       continue;
     }
@@ -1329,7 +1351,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       // Storing the pointer, not into the value?
       if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
       MergeInType(SI->getOperand(0)->getType(), Offset,
-                  VecTy, AllocaSize, *TD, Context);
+                  VecTy, AllocaSize, *TD, V->getContext());
       SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
       continue;
     }
@@ -1433,7 +1455,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
     
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       assert(SI->getOperand(0) != Ptr && "Consistency error!");
-      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
+      // FIXME: Remove once builder has Twine API.
+      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
       Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
                                              Builder);
       Builder.CreateStore(New, NewAI);
@@ -1457,8 +1480,10 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
           for (unsigned i = 1; i != NumBytes; ++i)
             APVal |= APVal << 8;
         
-        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
-        Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal),
+        // FIXME: Remove once builder has Twine API.
+        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+        Value *New = ConvertScalar_InsertValue(
+                                    ConstantInt::get(User->getContext(), APVal),
                                                Old, Offset, Builder);
         Builder.CreateStore(New, NewAI);
       }
@@ -1510,8 +1535,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
       continue;
     }
 
-    assert(0 && "Unsupported operation!");
-    abort();
+    llvm_unreachable("Unsupported operation!");
   }
 }
 
@@ -1545,9 +1569,8 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
       assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
     }
     // Return the element extracted out of it.
-    Value *V = Builder.CreateExtractElement(FromVal,
-                                    Context->getConstantInt(Type::Int32Ty,Elt),
-                                            "tmp");
+    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
     if (V->getType() != ToType)
       V = Builder.CreateBitCast(V, ToType, "tmp");
     return V;
@@ -1557,7 +1580,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // use insertvalue's to form the FCA.
   if (const StructType *ST = dyn_cast<StructType>(ToType)) {
     const StructLayout &Layout = *TD->getStructLayout(ST);
-    Value *Res = Context->getUndef(ST);
+    Value *Res = UndefValue::get(ST);
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
       Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
                                         Offset+Layout.getElementOffsetInBits(i),
@@ -1569,7 +1592,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   
   if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
     uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    Value *Res = Context->getUndef(AT);
+    Value *Res = UndefValue::get(AT);
     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
       Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
                                               Offset+i*EltSize, Builder);
@@ -1599,21 +1622,23 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // only some bits are used.
   if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
     FromVal = Builder.CreateLShr(FromVal,
-                                 Context->getConstantInt(FromVal->getType(),
+                                 ConstantInt::get(FromVal->getType(),
                                                            ShAmt), "tmp");
   else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
     FromVal = Builder.CreateShl(FromVal, 
-                                Context->getConstantInt(FromVal->getType(),
+                                ConstantInt::get(FromVal->getType(),
                                                           -ShAmt), "tmp");
 
   // Finally, unconditionally truncate the integer to the right width.
   unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
   if (LIBitWidth < NTy->getBitWidth())
     FromVal =
-      Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
   else if (LIBitWidth > NTy->getBitWidth())
     FromVal =
-       Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
 
   // If the result is an integer, this is a trunc or bitcast.
   if (isa<IntegerType>(ToType)) {
@@ -1645,6 +1670,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // Convert the stored type to the actual type, shift it left to insert
   // then 'or' into place.
   const Type *AllocaType = Old->getType();
+  LLVMContext &Context = Old->getContext();
 
   if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
     uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
@@ -1664,7 +1690,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
       SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
     
     SV = Builder.CreateInsertElement(Old, SV, 
-                                   Context->getConstantInt(Type::Int32Ty, Elt),
+                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
                                      "tmp");
     return SV;
   }
@@ -1697,9 +1723,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
   unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
   if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
-    SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp");
+    SV = Builder.CreateBitCast(SV,
+                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
   else if (isa<PointerType>(SV->getType()))
-    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp");
+    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
 
   // Zero extend or truncate the value if needed.
   if (SV->getType() != AllocaType) {
@@ -1732,11 +1759,11 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // only some bits in the structure are set.
   APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
   if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
-    SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(),
+    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
                            ShAmt), "tmp");
     Mask <<= ShAmt;
   } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
-    SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(),
+    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
                             -ShAmt), "tmp");
     Mask = Mask.lshr(-ShAmt);
   }
@@ -1745,7 +1772,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // in the new bits.
   if (SrcWidth != DestWidth) {
     assert(DestWidth > SrcWidth);
-    Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask");
+    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
     SV = Builder.CreateOr(Old, SV, "ins");
   }
   return SV;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b8bce801a1fb..29712b3c13de 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Module.h"
 #include "llvm/Attributes.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -40,7 +39,7 @@ using namespace llvm;
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass {
+  struct CFGSimplifyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     CFGSimplifyPass() : FunctionPass(&ID) {}
 
@@ -58,20 +57,20 @@ FunctionPass *llvm::createCFGSimplificationPass() {
 
 /// ChangeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, LLVMContext* Context) {
+static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
     (*SI)->removePredecessor(BB);
   
-  new UnreachableInst(I);
+  new UnreachableInst(I->getContext(), I);
   
   // All instructions after this are dead.
   BasicBlock::iterator BBI = I, BBE = BB->end();
   while (BBI != BBE) {
     if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(Context->getUndef(BBI->getType()));
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
     BB->getInstList().erase(BBI++);
   }
 }
@@ -97,7 +96,7 @@ static void ChangeToCall(InvokeInst *II) {
 
 static bool MarkAliveBlocks(BasicBlock *BB,
                             SmallPtrSet<BasicBlock*, 128> &Reachable,
-                            LLVMContext* Context) {
+                            LLVMContext &Context) {
   
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
@@ -132,7 +131,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
-             cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) {
+             SI->getPointerAddressSpace() == 0)) {
           ChangeToUnreachable(SI, Context);
           Changed = true;
           break;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 4aad17d7236d..13077fe642a7 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -22,15 +22,13 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 namespace {
   /// This pass optimizes well half_powr function calls.
   ///
-  class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass {
+  class SimplifyHalfPowrLibCalls : public FunctionPass {
     const TargetData *TD;
   public:
     static char ID; // Pass identification
@@ -39,7 +37,6 @@ namespace {
     bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
 
     Instruction *
@@ -60,8 +57,9 @@ FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
 /// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
 /// their control flow to better facilitate subsequent optimization.
 Instruction *
-SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
-                                        Instruction *InsertPt) {
+SimplifyHalfPowrLibCalls::
+InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+                Instruction *InsertPt) {
   std::vector<BasicBlock *> Bodies;
   BasicBlock *NewBlock = 0;
 
@@ -123,7 +121,7 @@ SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &Half
 /// runOnFunction - Top level algorithm.
 ///
 bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
-  TD = &getAnalysis<TargetData>();
+  TD = getAnalysisIfAvailable<TargetData>();
   
   bool Changed = false;
   std::vector<Instruction *> HalfPowrs;
@@ -136,8 +134,7 @@ bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
         Function *Callee = CI->getCalledFunction();
         if (Callee && Callee->hasExternalLinkage()) {
           // Look for calls with well-known names.
-          const char *CalleeName = Callee->getNameStart();
-          if (strcmp(CalleeName, "__half_powrf4") == 0)
+          if (Callee->getName() == "__half_powrf4")
             IsHalfPowr = true;
         }
       }
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index ec48469f536e..e186601505c2 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -9,11 +9,9 @@
 //
 // This file implements a simple pass that applies a variety of small
 // optimizations for calls to specific well-known function calls (e.g. runtime
-// library functions). For example, a call to the function "exit(3)" that
-// occurs within the main() function can be transformed into a simple "return 3"
-// instruction. Any optimization that takes this form (replace call to library
-// function with simpler code that provides the same result) belongs in this
-// file.
+// library functions).   Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,8 +27,9 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
@@ -44,7 +43,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 /// This class is the abstract base class for the set of optimizations that
 /// corresponds to one library call.
 namespace {
-class VISIBILITY_HIDDEN LibCallOptimization {
+class LibCallOptimization {
 protected:
   Function *Caller;
   const TargetData *TD;
@@ -58,14 +57,14 @@ public:
   /// performed.  If it returns CI, then it transformed the call and CI is to be
   /// deleted.  If it returns something else, replace CI with the new value and
   /// delete CI.
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) 
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
     =0;
-  
-  Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) {
+
+  Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
     Caller = CI->getParent()->getParent();
-    this->TD = &TD;
+    this->TD = TD;
     if (CI->getCalledFunction())
-      Context = CI->getCalledFunction()->getContext();
+      Context = &CI->getCalledFunction()->getContext();
     return CallOptimizer(CI->getCalledFunction(), CI, B);
   }
 
@@ -76,12 +75,12 @@ public:
   /// specified pointer.  Ptr is required to be some pointer type, and the
   /// return value has 'intptr_t' type.
   Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
-  
+
   /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
   /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, 
+  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
                     unsigned Align, IRBuilder<> &B);
-  
+
   /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
   /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
   Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
@@ -96,35 +95,36 @@ public:
   /// 'floor').  This function is known to take a single of type matching 'Op'
   /// and returns one value with the same type.  If 'Op' is a long double, 'l'
   /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B);
-  
+  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
+                              const AttrListPtr &Attrs);
+
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
   void EmitPutChar(Value *Char, IRBuilder<> &B);
-  
+
   /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
   /// some pointer.
   void EmitPutS(Value *Str, IRBuilder<> &B);
-    
+
   /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
   /// an i32, and File is a pointer to FILE.
   void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B);
-  
+
   /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
   /// pointer and File is a pointer to FILE.
   void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B);
-  
+
   /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
   void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B);
-  
+
 };
 } // End anonymous namespace.
 
 /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
 Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
   return
-        B.CreateBitCast(V, Context->getPointerTypeUnqual(Type::Int8Ty), "cstr");
+        B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
 }
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -137,8 +137,8 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
                                    Attribute::NoUnwind);
 
   Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
-                                           TD->getIntPtrType(),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
+                                           TD->getIntPtrType(*Context),
+					   Type::getInt8PtrTy(*Context),
                                            NULL);
   CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
   if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
@@ -157,7 +157,7 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
   Tys[0] = Len->getType();
   Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
   return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
-                       Context->getConstantInt(Type::Int32Ty, Align));
+                       ConstantInt::get(Type::getInt32Ty(*Context), Align));
 }
 
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
@@ -169,9 +169,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
   AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
 
   Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                         Type::Int32Ty, TD->getIntPtrType(),
+					 Type::getInt8PtrTy(*Context),
+					 Type::getInt8PtrTy(*Context),
+                                         Type::getInt32Ty(*Context),
+					 TD->getIntPtrType(*Context),
                                          NULL);
   CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
 
@@ -192,10 +193,10 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
                                    Attribute::NoUnwind);
 
   Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
-                                         Type::Int32Ty,
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                         TD->getIntPtrType(), NULL);
+                                         Type::getInt32Ty(*Context),
+                                    Type::getInt8PtrTy(*Context),
+                                    Type::getInt8PtrTy(*Context),
+                                         TD->getIntPtrType(*Context), NULL);
   CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
                                Len, "memcmp");
 
@@ -213,7 +214,7 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
  const Type *Tys[1];
  Tys[0] = Len->getType();
  Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *Align = Context->getConstantInt(Type::Int32Ty, 1);
+ Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
  return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
 }
 
@@ -222,14 +223,15 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
 /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
 Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
-                                                 IRBuilder<> &B) {
+                                                 IRBuilder<> &B,
+                                                 const AttrListPtr &Attrs) {
   char NameBuffer[20];
-  if (Op->getType() != Type::DoubleTy) {
+  if (!Op->getType()->isDoubleTy()) {
     // If we need to add a suffix, copy into NameBuffer.
     unsigned NameLen = strlen(Name);
     assert(NameLen < sizeof(NameBuffer)-2);
     memcpy(NameBuffer, Name, NameLen);
-    if (Op->getType() == Type::FloatTy)
+    if (Op->getType()->isFloatTy())
       NameBuffer[NameLen] = 'f';  // floorf
     else
       NameBuffer[NameLen] = 'l';  // floorl
@@ -241,7 +243,7 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
   Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                          Op->getType(), NULL);
   CallInst *CI = B.CreateCall(Callee, Op, Name);
-
+  CI->setAttributes(Attrs);
   if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
@@ -252,10 +254,12 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
 /// is an integer.
 void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,
-                                          Type::Int32Ty, NULL);
+  Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
+                                          Type::getInt32Ty(*Context), NULL);
   CallInst *CI = B.CreateCall(PutChar,
-                              B.CreateIntCast(Char, Type::Int32Ty, "chari"),
+                              B.CreateIntCast(Char,
+					      Type::getInt32Ty(*Context),
+					      "chari"),
                               "putchar");
 
   if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
@@ -271,8 +275,8 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
-                                       Type::Int32Ty,
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
+                                       Type::getInt32Ty(*Context),
+                                    Type::getInt8PtrTy(*Context),
                                        NULL);
   CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
   if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
@@ -289,12 +293,16 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   Constant *F;
   if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
-                               Type::Int32Ty, File->getType(), NULL);
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+			       Type::getInt32Ty(*Context),
+                               Type::getInt32Ty(*Context), File->getType(),
+			       NULL);
   else
-    F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
+    F = M->getOrInsertFunction("fputc",
+			       Type::getInt32Ty(*Context),
+			       Type::getInt32Ty(*Context),
                                File->getType(), NULL);
-  Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
+  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");
   CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -311,12 +319,13 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
   AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   Constant *F;
   if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty,
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+			       Type::getInt32Ty(*Context),
+                               Type::getInt8PtrTy(*Context),
                                File->getType(), NULL);
   else
-    F = M->getOrInsertFunction("fputs", Type::Int32Ty,
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
+    F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context),
+                               Type::getInt8PtrTy(*Context),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
 
@@ -336,17 +345,19 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
   Constant *F;
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
-                               TD->getIntPtrType(),
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
-                               TD->getIntPtrType(), TD->getIntPtrType(),
+                               TD->getIntPtrType(*Context),
+                               Type::getInt8PtrTy(*Context),
+                               TD->getIntPtrType(*Context),
+			       TD->getIntPtrType(*Context),
                                File->getType(), NULL);
   else
-    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(),
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
-                               TD->getIntPtrType(), TD->getIntPtrType(),
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context),
+                               Type::getInt8PtrTy(*Context),
+                               TD->getIntPtrType(*Context),
+			       TD->getIntPtrType(*Context),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
-                        Context->getConstantInt(TD->getIntPtrType(), 1), File);
+                        ConstantInt::get(TD->getIntPtrType(*Context), 1), File);
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
@@ -362,30 +373,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
   // Look through noop bitcast instructions.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
     return GetStringLengthH(BCI->getOperand(0), PHIs);
-  
+
   // If this is a PHI node, there are two cases: either we have already seen it
   // or we haven't.
   if (PHINode *PN = dyn_cast<PHINode>(V)) {
     if (!PHIs.insert(PN))
       return ~0ULL;  // already in the set.
-    
+
     // If it was new, see if all the input strings are the same length.
     uint64_t LenSoFar = ~0ULL;
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
       if (Len == 0) return 0; // Unknown length -> unknown.
-      
+
       if (Len == ~0ULL) continue;
-      
+
       if (Len != LenSoFar && LenSoFar != ~0ULL)
         return 0;    // Disagree -> unknown.
       LenSoFar = Len;
     }
-    
+
     // Success, all agree.
     return LenSoFar;
   }
-  
+
   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
   if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
@@ -397,7 +408,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     if (Len1 != Len2) return 0;
     return Len1;
   }
-  
+
   // If the value is not a GEP instruction nor a constant expression with a
   // GEP instruction, then return unknown.
   User *GEP = 0;
@@ -410,11 +421,11 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
   } else {
     return 0;
   }
-  
+
   // Make sure the GEP has exactly three arguments.
   if (GEP->getNumOperands() != 3)
     return 0;
-  
+
   // Check to make sure that the first operand of the GEP is an integer and
   // has value 0 so that we are sure we're indexing into the initializer.
   if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
@@ -422,7 +433,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
       return 0;
   } else
     return 0;
-  
+
   // If the second index isn't a ConstantInt, then this is a variable index
   // into the array.  If this occurs, we can't say anything meaningful about
   // the string.
@@ -431,28 +442,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     StartIdx = CI->getZExtValue();
   else
     return 0;
-  
+
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
   GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer())
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
     return 0;
   Constant *GlobalInit = GV->getInitializer();
-  
+
   // Handle the ConstantAggregateZero case, which is a degenerate case. The
   // initializer is constant zero so the length of the string must be zero.
   if (isa<ConstantAggregateZero>(GlobalInit))
     return 1;  // Len = 0 offset by 1.
-  
+
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array || Array->getType()->getElementType() != Type::Int8Ty)
+  if (!Array ||
+      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
     return false;
-  
+
   // Get the number of elements in the array
   uint64_t NumElts = Array->getType()->getNumElements();
-  
+
   // Traverse the constant array from StartIdx (derived above) which is
   // the place the GEP refers to in the array.
   for (unsigned i = StartIdx; i != NumElts; ++i) {
@@ -463,7 +476,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     if (CI->isZero())
       return i-StartIdx+1; // We found end of string, success!
   }
-  
+
   return 0; // The array isn't null terminated, conservatively return 'unknown'.
 }
 
@@ -471,7 +484,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
 static uint64_t GetStringLength(Value *V) {
   if (!isa<PointerType>(V->getType())) return 0;
-  
+
   SmallPtrSet<PHINode*, 32> PHIs;
   uint64_t Len = GetStringLengthH(V, PHIs);
   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
@@ -480,7 +493,7 @@ static uint64_t GetStringLength(Value *V) {
 }
 
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero. 
+/// value is equal or not-equal to zero.
 static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
        UI != E; ++UI) {
@@ -496,73 +509,38 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
 }
 
 //===----------------------------------------------------------------------===//
-// Miscellaneous LibCall Optimizations
-//===----------------------------------------------------------------------===//
-
-namespace {
-//===---------------------------------------===//
-// 'exit' Optimizations
-
-/// ExitOpt - int main() { exit(4); } --> int main() { return 4; }
-struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify we have a reasonable prototype for exit.
-    if (Callee->arg_size() == 0 || !CI->use_empty())
-      return 0;
-
-    // Verify the caller is main, and that the result type of main matches the
-    // argument type of exit.
-    if (!Caller->isName("main") || !Caller->hasExternalLinkage() ||
-        Caller->getReturnType() != CI->getOperand(1)->getType())
-      return 0;
-
-    TerminatorInst *OldTI = CI->getParent()->getTerminator();
-    
-    // Create the return after the call.
-    ReturnInst *RI = B.CreateRet(CI->getOperand(1));
-
-    // Drop all successor phi node entries.
-    for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i)
-      OldTI->getSuccessor(i)->removePredecessor(CI->getParent());
-    
-    // Erase all instructions from after our return instruction until the end of
-    // the block.
-    BasicBlock::iterator FirstDead = RI; ++FirstDead;
-    CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end());
-    return CI;
-  }
-};
-
-//===----------------------------------------------------------------------===//
 // String and Memory LibCall Optimizations
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------===//
 // 'strcat' Optimizations
-
-struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
+namespace {
+struct StrCatOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strcat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType())
       return 0;
-    
+
     // Extract some information from the instruction
     Value *Dst = CI->getOperand(1);
     Value *Src = CI->getOperand(2);
-    
+
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
     if (Len == 0) return 0;
     --Len;  // Unbias length.
-    
+
     // Handle the simple, do-nothing case: strcat(x, "") -> x
     if (Len == 0)
       return Dst;
-    
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     EmitStrLenMemCpy(Src, Dst, Len, B);
     return Dst;
   }
@@ -571,28 +549,28 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
     // We need to find the end of the destination string.  That's where the
     // memory is to be moved to. We just generate a call to strlen.
     Value *DstLen = EmitStrLen(Dst, B);
-    
+
     // Now that we have the destination's length, we must index into the
     // destination's pointer to get the actual memcpy destination (end of
     // the string .. we're concatenating).
     Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
-    
+
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(CpyDst, Src,
-               Context->getConstantInt(TD->getIntPtrType(), Len+1), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B);
   }
 };
 
 //===---------------------------------------===//
 // 'strncat' Optimizations
 
-struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
+struct StrNCatOpt : public StrCatOpt {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strncat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType() ||
         !isa<IntegerType>(FT->getParamType(2)))
@@ -619,6 +597,9 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
     // strncat(x,  c, 0) -> x
     if (SrcLen == 0 || Len == 0) return Dst;
 
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // We don't optimize this case
     if (Len < SrcLen) return 0;
 
@@ -632,27 +613,31 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
 //===---------------------------------------===//
 // 'strchr' Optimizations
 
-struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
+struct StrChrOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strchr" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType())
       return 0;
-    
+
     Value *SrcStr = CI->getOperand(1);
-    
+
     // If the second operand is non-constant, see if we can compute the length
     // of the input string and turn this into memchr.
     ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
     if (CharC == 0) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32.
+      if (Len == 0 ||
+          FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32.
         return 0;
-      
+
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
-                        Context->getConstantInt(TD->getIntPtrType(), Len), B);
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len), B);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
@@ -660,24 +645,24 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
     std::string Str;
     if (!GetConstantStringInfo(SrcStr, Str))
       return 0;
-    
+
     // strchr can find the nul character.
     Str += '\0';
     char CharValue = CharC->getSExtValue();
-    
+
     // Compute the offset.
     uint64_t i = 0;
     while (1) {
       if (i == Str.size())    // Didn't find the char.  strchr returns null.
-        return Context->getNullValue(CI->getType());
+        return Constant::getNullValue(CI->getType());
       // Did we find our match?
       if (Str[i] == CharValue)
         break;
       ++i;
     }
-    
+
     // strchr(s+n,c)  -> gep(s+n+i,c)
-    Value *Idx = Context->getConstantInt(Type::Int64Ty, i);
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
     return B.CreateGEP(SrcStr, Idx, "strchr");
   }
 };
@@ -685,40 +670,44 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strcmp' Optimizations
 
-struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
+struct StrCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strcmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty ||
+    if (FT->getNumParams() != 2 ||
+	FT->getReturnType() != Type::getInt32Ty(*Context) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty))
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
-    
+
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
     if (Str1P == Str2P)      // strcmp(x,x)  -> 0
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
     bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
-    
+
     if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
       return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-    
+
     if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
       return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-    
+
     // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return Context->getConstantInt(CI->getType(), 
+      return ConstantInt::get(CI->getType(),
                                      strcmp(Str1.c_str(),Str2.c_str()));
 
     // strcmp(P, "x") -> memcmp(P, "x", 2)
     uint64_t Len1 = GetStringLength(Str1P);
     uint64_t Len2 = GetStringLength(Str2P);
     if (Len1 && Len2) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       return EmitMemCmp(Str1P, Str2P,
-                        Context->getConstantInt(TD->getIntPtrType(),
+                        ConstantInt::get(TD->getIntPtrType(*Context),
                         std::min(Len1, Len2)), B);
     }
 
@@ -729,43 +718,44 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strncmp' Optimizations
 
-struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
+struct StrNCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strncmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty ||
+    if (FT->getNumParams() != 3 ||
+	FT->getReturnType() != Type::getInt32Ty(*Context) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getParamType(2)))
       return 0;
-    
+
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
     if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     // Get the length argument if it is constant.
     uint64_t Length;
     if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
       Length = LengthArg->getZExtValue();
     else
       return 0;
-    
+
     if (Length == 0) // strncmp(x,y,0)   -> 0
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
     bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
-    
+
     if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> *x
       return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-    
+
     if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
       return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-    
+
     // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return Context->getConstantInt(CI->getType(),
+      return ConstantInt::get(CI->getType(),
                               strncmp(Str1.c_str(), Str2.c_str(), Length));
     return 0;
   }
@@ -775,27 +765,30 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strcpy' Optimizations
 
-struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
+struct StrCpyOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strcpy" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty))
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
-    
+
     Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
     if (Dst == Src)      // strcpy(x,x)  -> x
       return Src;
-    
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
     if (Len == 0) return 0;
-    
+
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(Dst, Src,
-               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
     return Dst;
   }
 };
@@ -803,12 +796,12 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strncpy' Optimizations
 
-struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
+struct StrNCpyOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getParamType(2)))
       return 0;
 
@@ -823,7 +816,8 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      EmitMemSet(Dst, Context->getConstantInt(Type::Int8Ty, '\0'), LenOp, B);
+      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
+		 B);
       return Dst;
     }
 
@@ -835,12 +829,15 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 
     if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
 
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // Let strncpy handle the zero padding
     if (Len > SrcLen+1) return 0;
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
     EmitMemCpy(Dst, Src,
-               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
 
     return Dst;
   }
@@ -849,19 +846,19 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strlen' Optimizations
 
-struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
+struct StrLenOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
-    
+
     Value *Src = CI->getOperand(1);
 
     // Constant folding: strlen("xyz") -> 3
     if (uint64_t Len = GetStringLength(Src))
-      return Context->getConstantInt(CI->getType(), Len-1);
+      return ConstantInt::get(CI->getType(), Len-1);
 
     // Handle strlen(p) != 0.
     if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0;
@@ -875,7 +872,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strto*' Optimizations
 
-struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization {
+struct StrToOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
@@ -897,18 +894,18 @@ struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memcmp' Optimizations
 
-struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
+struct MemCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getReturnType() != Type::Int32Ty)
+        FT->getReturnType() != Type::getInt32Ty(*Context))
       return 0;
 
     Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
 
     if (LHS == RHS)  // memcmp(s,s,x) -> 0
-      return Context->getNullValue(CI->getType());
+      return Constant::getNullValue(CI->getType());
 
     // Make sure we have a constant length.
     ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
@@ -916,7 +913,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     uint64_t Len = LenC->getZExtValue();
 
     if (Len == 0) // memcmp(s1,s2,0) -> 0
-      return Context->getNullValue(CI->getType());
+      return Constant::getNullValue(CI->getType());
 
     if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
       Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
@@ -927,8 +924,8 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS)  != 0
     // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS)  != 0
     if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
-      const Type *PTy = Context->getPointerTypeUnqual(Len == 2 ?
-                                               Type::Int16Ty : Type::Int32Ty);
+      const Type *PTy = PointerType::getUnqual(Len == 2 ?
+                       Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context));
       LHS = B.CreateBitCast(LHS, PTy, "tmp");
       RHS = B.CreateBitCast(RHS, PTy, "tmp");
       LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
@@ -944,13 +941,16 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memcpy' Optimizations
 
-struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {
+struct MemCpyOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getParamType(2) != TD->getIntPtrType())
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -962,25 +962,28 @@ struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memmove' Optimizations
 
-struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {
+struct MemMoveOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getParamType(2) != TD->getIntPtrType())
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
     Module *M = Caller->getParent();
     Intrinsic::ID IID = Intrinsic::memmove;
     const Type *Tys[1];
-    Tys[0] = TD->getIntPtrType();
+    Tys[0] = TD->getIntPtrType(*Context);
     Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
     Value *Dst = CastToCStr(CI->getOperand(1), B);
     Value *Src = CastToCStr(CI->getOperand(2), B);
     Value *Size = CI->getOperand(3);
-    Value *Align = Context->getConstantInt(Type::Int32Ty, 1);
+    Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
     B.CreateCall4(MemMove, Dst, Src, Size, Align);
     return CI->getOperand(1);
   }
@@ -989,17 +992,21 @@ struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memset' Optimizations
 
-struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization {
+struct MemSetOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
-        FT->getParamType(1) != TD->getIntPtrType() ||
-        FT->getParamType(2) != TD->getIntPtrType())
+        !isa<IntegerType>(FT->getParamType(1)) ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
-    Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty);
+    Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
+				 false);
     EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
     return CI->getOperand(1);
   }
@@ -1012,7 +1019,7 @@ struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'pow*' Optimizations
 
-struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
+struct PowOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 2 arguments of the same FP type, which match the
@@ -1021,40 +1028,44 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
         FT->getParamType(0) != FT->getParamType(1) ||
         !FT->getParamType(0)->isFloatingPoint())
       return 0;
-    
+
     Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
     if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
       if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
         return Op1C;
       if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
-        return EmitUnaryFloatFnCall(Op2, "exp2", B);
+        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
     }
-    
+
     ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
     if (Op2C == 0) return 0;
-    
+
     if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
-      return Context->getConstantFP(CI->getType(), 1.0);
-    
+      return ConstantFP::get(CI->getType(), 1.0);
+
     if (Op2C->isExactlyValue(0.5)) {
-      // FIXME: This is not safe for -0.0 and -inf.  This can only be done when
-      // 'unsafe' math optimizations are allowed.
-      // x    pow(x, 0.5)  sqrt(x)
-      // ---------------------------------------------
-      // -0.0    +0.0       -0.0
-      // -inf    +inf       NaN
-#if 0
-      // pow(x, 0.5) -> sqrt(x)
-      return B.CreateCall(get_sqrt(), Op1, "sqrt");
-#endif
+      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+      // This is faster than calling pow, and still handles negative zero
+      // and negative infinite correctly.
+      // TODO: In fast-math mode, this could be just sqrt(x).
+      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+      Value *Inf = ConstantFP::getInfinity(CI->getType());
+      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+                                         Callee->getAttributes());
+      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+                                         Callee->getAttributes());
+      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
+      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+      return Sel;
     }
-    
+
     if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
       return Op1;
     if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
       return B.CreateFMul(Op1, Op1, "pow2");
     if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
-      return B.CreateFDiv(Context->getConstantFP(CI->getType(), 1.0),
+      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
                           Op1, "powrecip");
     return 0;
   }
@@ -1063,7 +1074,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'exp2' Optimizations
 
-struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
+struct Exp2Opt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 1 argument of FP type, which matches the
@@ -1071,35 +1082,38 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isFloatingPoint())
       return 0;
-    
+
     Value *Op = CI->getOperand(1);
     // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
     // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
     Value *LdExpArg = 0;
     if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
-        LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+        LdExpArg = B.CreateSExt(OpC->getOperand(0),
+				Type::getInt32Ty(*Context), "tmp");
     } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
-        LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+        LdExpArg = B.CreateZExt(OpC->getOperand(0),
+				Type::getInt32Ty(*Context), "tmp");
     }
 
     if (LdExpArg) {
       const char *Name;
-      if (Op->getType() == Type::FloatTy)
+      if (Op->getType()->isFloatTy())
         Name = "ldexpf";
-      else if (Op->getType() == Type::DoubleTy)
+      else if (Op->getType()->isDoubleTy())
         Name = "ldexp";
       else
         Name = "ldexpl";
 
-      Constant *One = Context->getConstantFP(APFloat(1.0f));
-      if (Op->getType() != Type::FloatTy)
-        One = Context->getConstantExprFPExtend(One, Op->getType());
+      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+      if (!Op->getType()->isFloatTy())
+        One = ConstantExpr::getFPExtend(One, Op->getType());
 
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                             Op->getType(), Type::Int32Ty,NULL);
+                                             Op->getType(),
+					     Type::getInt32Ty(*Context),NULL);
       CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
         CI->setCallingConv(F->getCallingConv());
@@ -1113,22 +1127,23 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
 //===---------------------------------------===//
 // Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
 
-struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
+struct UnaryDoubleFPOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
-        FT->getParamType(0) != Type::DoubleTy)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
       return 0;
 
     // If this is something like 'floor((double)floatval)', convert to floorf.
     FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
-    if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
       return 0;
 
     // floor((double)floatval) -> (double)floorf(floatval)
     Value *V = Cast->getOperand(0);
-    V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B);
-    return B.CreateFPExt(V, Type::DoubleTy);
+    V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
+                             Callee->getAttributes());
+    return B.CreateFPExt(V, Type::getDoubleTy(*Context));
   }
 };
 
@@ -1139,54 +1154,56 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'ffs*' Optimizations
 
-struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization {
+struct FFSOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
-    if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty ||
+    if (FT->getNumParams() != 1 ||
+	FT->getReturnType() != Type::getInt32Ty(*Context) ||
         !isa<IntegerType>(FT->getParamType(0)))
       return 0;
-    
+
     Value *Op = CI->getOperand(1);
-    
+
     // Constant fold.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
       if (CI->getValue() == 0)  // ffs(0) -> 0.
-        return Context->getNullValue(CI->getType());
-      return Context->getConstantInt(Type::Int32Ty, // ffs(c) -> cttz(c)+1
+        return Constant::getNullValue(CI->getType());
+      return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
                               CI->getValue().countTrailingZeros()+1);
     }
-    
+
     // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
     const Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
                                          Intrinsic::cttz, &ArgType, 1);
     Value *V = B.CreateCall(F, Op, "cttz");
-    V = B.CreateAdd(V, Context->getConstantInt(V->getType(), 1), "tmp");
-    V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp");
-    
-    Value *Cond = B.CreateICmpNE(Op, Context->getNullValue(ArgType), "tmp");
-    return B.CreateSelect(Cond, V, Context->getConstantInt(Type::Int32Ty, 0));
+    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+    V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+
+    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+    return B.CreateSelect(Cond, V,
+			  ConstantInt::get(Type::getInt32Ty(*Context), 0));
   }
 };
 
 //===---------------------------------------===//
 // 'isdigit' Optimizations
 
-struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
+struct IsDigitOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        FT->getParamType(0) != Type::Int32Ty)
+        FT->getParamType(0) != Type::getInt32Ty(*Context))
       return 0;
-    
+
     // isdigit(c) -> (c-'0') <u 10
     Value *Op = CI->getOperand(1);
-    Op = B.CreateSub(Op, Context->getConstantInt(Type::Int32Ty, '0'), 
+    Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
                      "isdigittmp");
-    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 10), 
+    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
                          "isdigit");
     return B.CreateZExt(Op, CI->getType());
   }
@@ -1195,58 +1212,58 @@ struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'isascii' Optimizations
 
-struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization {
+struct IsAsciiOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        FT->getParamType(0) != Type::Int32Ty)
+        FT->getParamType(0) != Type::getInt32Ty(*Context))
       return 0;
-    
+
     // isascii(c) -> c <u 128
     Value *Op = CI->getOperand(1);
-    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 128),
+    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
                          "isascii");
     return B.CreateZExt(Op, CI->getType());
   }
 };
-  
+
 //===---------------------------------------===//
 // 'abs', 'labs', 'llabs' Optimizations
 
-struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization {
+struct AbsOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(integer) where the types agree.
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
         FT->getParamType(0) != FT->getReturnType())
       return 0;
-    
+
     // abs(x) -> x >s -1 ? x : -x
     Value *Op = CI->getOperand(1);
-    Value *Pos = B.CreateICmpSGT(Op, 
-                             Context->getConstantIntAllOnesValue(Op->getType()),
+    Value *Pos = B.CreateICmpSGT(Op,
+                             Constant::getAllOnesValue(Op->getType()),
                                  "ispos");
     Value *Neg = B.CreateNeg(Op, "neg");
     return B.CreateSelect(Pos, Op, Neg);
   }
 };
-  
+
 
 //===---------------------------------------===//
 // 'toascii' Optimizations
 
-struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
+struct ToAsciiOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require i32(i32)
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        FT->getParamType(0) != Type::Int32Ty)
+        FT->getParamType(0) != Type::getInt32Ty(*Context))
       return 0;
-    
+
     // isascii(c) -> c & 0x7f
     return B.CreateAnd(CI->getOperand(1),
-                       Context->getConstantInt(CI->getType(),0x7F));
+                       ConstantInt::get(CI->getType(),0x7F));
   }
 };
 
@@ -1257,15 +1274,15 @@ struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'printf' Optimizations
 
-struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
+struct PrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require one fixed pointer argument and an integer/void result.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
         !(isa<IntegerType>(FT->getReturnType()) ||
-          FT->getReturnType() == Type::VoidTy))
+          FT->getReturnType()->isVoidTy()))
       return 0;
-    
+
     // Check for a fixed format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
@@ -1273,39 +1290,39 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
 
     // Empty format string -> noop.
     if (FormatStr.empty())  // Tolerate printf's declared void.
-      return CI->use_empty() ? (Value*)CI : 
-                               Context->getConstantInt(CI->getType(), 0);
-    
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 0);
+
     // printf("x") -> putchar('x'), even for '%'.
     if (FormatStr.size() == 1) {
-      EmitPutChar(Context->getConstantInt(Type::Int32Ty, FormatStr[0]), B);
-      return CI->use_empty() ? (Value*)CI : 
-                               Context->getConstantInt(CI->getType(), 1);
+      EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B);
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 1);
     }
-    
+
     // printf("foo\n") --> puts("foo")
     if (FormatStr[FormatStr.size()-1] == '\n' &&
         FormatStr.find('%') == std::string::npos) {  // no format characters.
       // Create a string literal with no \n on it.  We expect the constant merge
       // pass to be run after this pass, to merge duplicate strings.
       FormatStr.erase(FormatStr.end()-1);
-      Constant *C = Context->getConstantArray(FormatStr, true);
-      C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage,
-                             C, "str", Callee->getParent());
+      Constant *C = ConstantArray::get(*Context, FormatStr, true);
+      C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
+                             GlobalVariable::InternalLinkage, C, "str");
       EmitPutS(C, B);
-      return CI->use_empty() ? (Value*)CI : 
-                    Context->getConstantInt(CI->getType(), FormatStr.size()+1);
+      return CI->use_empty() ? (Value*)CI :
+                    ConstantInt::get(CI->getType(), FormatStr.size()+1);
     }
-    
+
     // Optimize specific format strings.
     // printf("%c", chr) --> putchar(*(i8*)dst)
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
         isa<IntegerType>(CI->getOperand(2)->getType())) {
       EmitPutChar(CI->getOperand(2), B);
-      return CI->use_empty() ? (Value*)CI : 
-                               Context->getConstantInt(CI->getType(), 1);
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 1);
     }
-    
+
     // printf("%s\n", str) --> puts(str)
     if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
         isa<PointerType>(CI->getOperand(2)->getType()) &&
@@ -1320,7 +1337,7 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'sprintf' Optimizations
 
-struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
+struct SPrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require two fixed pointer arguments and an integer result.
     const FunctionType *FT = Callee->getFunctionType();
@@ -1333,7 +1350,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
       return 0;
-    
+
     // If we just have a format string (nothing else crazy) transform it.
     if (CI->getNumOperands() == 3) {
       // Make sure there's no % in the constant array.  We could try to handle
@@ -1341,41 +1358,49 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')
           return 0; // we found a format specifier, bail out.
-      
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
-          Context->getConstantInt(TD->getIntPtrType(), FormatStr.size()+1),1,B);
-      return Context->getConstantInt(CI->getType(), FormatStr.size());
+          ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
+      return ConstantInt::get(CI->getType(), FormatStr.size());
     }
-    
+
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
     if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
       return 0;
-    
+
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
       if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
-      Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char");
+      Value *V = B.CreateTrunc(CI->getOperand(3),
+			       Type::getInt8Ty(*Context), "char");
       Value *Ptr = CastToCStr(CI->getOperand(1), B);
       B.CreateStore(V, Ptr);
-      Ptr = B.CreateGEP(Ptr, Context->getConstantInt(Type::Int32Ty, 1), "nul");
-      B.CreateStore(Context->getNullValue(Type::Int8Ty), Ptr);
-      
-      return Context->getConstantInt(CI->getType(), 1);
+      Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
+			"nul");
+      B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
     }
-    
+
     if (FormatStr[1] == 's') {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
       if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
 
       Value *Len = EmitStrLen(CI->getOperand(3), B);
       Value *IncLen = B.CreateAdd(Len,
-                                  Context->getConstantInt(Len->getType(), 1),
+                                  ConstantInt::get(Len->getType(), 1),
                                   "leninc");
       EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
-      
+
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
     }
@@ -1386,7 +1411,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'fwrite' Optimizations
 
-struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
+struct FWriteOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require a pointer, an integer, an integer, a pointer, returning integer.
     const FunctionType *FT = Callee->getFunctionType();
@@ -1396,22 +1421,22 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
         !isa<PointerType>(FT->getParamType(3)) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
-    
+
     // Get the element size and count.
     ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
     ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
     if (!SizeC || !CountC) return 0;
     uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
-    
+
     // If this is writing zero records, remove the call (it's a noop).
     if (Bytes == 0)
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     // If this is writing one byte, turn it into fputc.
     if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
       EmitFPutC(Char, CI->getOperand(4), B);
-      return Context->getConstantInt(CI->getType(), 1);
+      return ConstantInt::get(CI->getType(), 1);
     }
 
     return 0;
@@ -1421,20 +1446,23 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'fputs' Optimizations
 
-struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
+struct FPutsOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // Require two pointers.  Also, we can't optimize if return value is used.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
         !CI->use_empty())
       return 0;
-    
+
     // fputs(s,F) --> fwrite(s,1,strlen(s),F)
     uint64_t Len = GetStringLength(CI->getOperand(1));
     if (!Len) return 0;
     EmitFWrite(CI->getOperand(1),
-               Context->getConstantInt(TD->getIntPtrType(), Len-1),
+               ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
                CI->getOperand(2), B);
     return CI;  // Known to have no uses (see above).
   }
@@ -1443,7 +1471,7 @@ struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'fprintf' Optimizations
 
-struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
+struct FPrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require two fixed paramters as pointers and integer result.
     const FunctionType *FT = Callee->getFunctionType();
@@ -1451,7 +1479,7 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
         !isa<PointerType>(FT->getParamType(1)) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
-    
+
     // All the optimizations depend on the format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
@@ -1462,26 +1490,29 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
           return 0; // We found a format specifier.
-      
-      EmitFWrite(CI->getOperand(2), Context->getConstantInt(TD->getIntPtrType(),
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context),
                                                      FormatStr.size()),
                  CI->getOperand(1), B);
-      return Context->getConstantInt(CI->getType(), FormatStr.size());
+      return ConstantInt::get(CI->getType(), FormatStr.size());
     }
-    
+
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
     if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
       return 0;
-    
+
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> *(i8*)dst = chr
       if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
       EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
-      return Context->getConstantInt(CI->getType(), 1);
+      return ConstantInt::get(CI->getType(), 1);
     }
-    
+
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) -> fputs(str, F)
       if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
@@ -1502,10 +1533,8 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
 namespace {
   /// This pass optimizes well known library functions from libc and libm.
   ///
-  class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass {
+  class SimplifyLibCalls : public FunctionPass {
     StringMap<LibCallOptimization*> Optimizations;
-    // Miscellaneous LibCall Optimizations
-    ExitOpt Exit; 
     // String and Memory LibCall Optimizations
     StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
     StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen;
@@ -1536,7 +1565,6 @@ namespace {
     bool doInitialization(Module &M);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
   };
   char SimplifyLibCalls::ID = 0;
@@ -1547,15 +1575,12 @@ X("simplify-libcalls", "Simplify well-known library calls");
 
 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createSimplifyLibCallsPass() {
-  return new SimplifyLibCalls(); 
+  return new SimplifyLibCalls();
 }
 
 /// Optimizations - Populate the Optimizations map with all the optimizations
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
-  // Miscellaneous LibCall Optimizations
-  Optimizations["exit"] = &Exit;
-  
   // String and Memory LibCall Optimizations
   Optimizations["strcat"] = &StrCat;
   Optimizations["strncat"] = &StrNCat;
@@ -1576,7 +1601,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["memcpy"] = &MemCpy;
   Optimizations["memmove"] = &MemMove;
   Optimizations["memset"] = &MemSet;
-  
+
   // Math Library Optimizations
   Optimizations["powf"] = &Pow;
   Optimizations["pow"] = &Pow;
@@ -1594,7 +1619,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["llvm.exp2.f80"] = &Exp2;
   Optimizations["llvm.exp2.f64"] = &Exp2;
   Optimizations["llvm.exp2.f32"] = &Exp2;
-  
+
 #ifdef HAVE_FLOORF
   Optimizations["floor"] = &UnaryDoubleFP;
 #endif
@@ -1610,7 +1635,7 @@ void SimplifyLibCalls::InitOptimizations() {
 #ifdef HAVE_NEARBYINTF
   Optimizations["nearbyint"] = &UnaryDoubleFP;
 #endif
-  
+
   // Integer Optimizations
   Optimizations["ffs"] = &FFS;
   Optimizations["ffsl"] = &FFS;
@@ -1621,7 +1646,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["isdigit"] = &IsDigit;
   Optimizations["isascii"] = &IsAscii;
   Optimizations["toascii"] = &ToAscii;
-  
+
   // Formatting and IO Optimizations
   Optimizations["sprintf"] = &SPrintF;
   Optimizations["printf"] = &PrintF;
@@ -1636,10 +1661,10 @@ void SimplifyLibCalls::InitOptimizations() {
 bool SimplifyLibCalls::runOnFunction(Function &F) {
   if (Optimizations.empty())
     InitOptimizations();
-  
-  const TargetData &TD = getAnalysis<TargetData>();
-  
-  IRBuilder<> Builder;
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  IRBuilder<> Builder(F.getContext());
 
   bool Changed = false;
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -1647,37 +1672,35 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
       // Ignore non-calls.
       CallInst *CI = dyn_cast<CallInst>(I++);
       if (!CI) continue;
-      
+
       // Ignore indirect calls and calls to non-external functions.
       Function *Callee = CI->getCalledFunction();
       if (Callee == 0 || !Callee->isDeclaration() ||
           !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
         continue;
-      
+
       // Ignore unknown calls.
-      const char *CalleeName = Callee->getNameStart();
-      StringMap<LibCallOptimization*>::iterator OMI =
-        Optimizations.find(CalleeName, CalleeName+Callee->getNameLen());
-      if (OMI == Optimizations.end()) continue;
-      
+      LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+      if (!LCO) continue;
+
       // Set the builder to the instruction after the call.
       Builder.SetInsertPoint(BB, I);
-      
+
       // Try to optimize this call.
-      Value *Result = OMI->second->OptimizeCall(CI, TD, Builder);
+      Value *Result = LCO->OptimizeCall(CI, TD, Builder);
       if (Result == 0) continue;
 
-      DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI;
-            DOUT << "  into: " << *Result << "\n");
-      
+      DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI;
+            errs() << "  into: " << *Result << "\n");
+
       // Something changed!
       Changed = true;
       ++NumSimplified;
-      
+
       // Inspect the instruction after the call (which was potentially just
       // added) next.
       I = CI; ++I;
-      
+
       if (CI != Result && !CI->use_empty()) {
         CI->replaceAllUsesWith(Result);
         if (!Result->hasName())
@@ -1736,40 +1759,39 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
     if (!F.isDeclaration())
       continue;
 
-    unsigned NameLen = F.getNameLen();
-    if (!NameLen)
+    if (!F.hasName())
       continue;
 
     const FunctionType *FTy = F.getFunctionType();
 
-    const char *NameStr = F.getNameStart();
-    switch (NameStr[0]) {
+    StringRef Name = F.getName();
+    switch (Name[0]) {
       case 's':
-        if (NameLen == 6 && !strcmp(NameStr, "strlen")) {
+        if (Name == "strlen") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "stpcpy")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strcat")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strtol")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strtod")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strtof")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strtoul")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strtoll")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strtold")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strncat")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strncpy")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "strtoull"))) {
+        } else if (Name == "strcpy" ||
+                   Name == "stpcpy" ||
+                   Name == "strcat" ||
+                   Name == "strtol" ||
+                   Name == "strtod" ||
+                   Name == "strtof" ||
+                   Name == "strtoul" ||
+                   Name == "strtoll" ||
+                   Name == "strtold" ||
+                   Name == "strncat" ||
+                   Name == "strncpy" ||
+                   Name == "strtoull") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) {
+        } else if (Name == "strxfrm") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1777,13 +1799,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strspn")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strncmp")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strcspn")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strcoll")) ||
-                   (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) ||
-                   (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) {
+        } else if (Name == "strcmp" ||
+                   Name == "strspn" ||
+                   Name == "strncmp" ||
+                   Name ==" strcspn" ||
+                   Name == "strcoll" ||
+                   Name == "strcasecmp" ||
+                   Name == "strncasecmp") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1792,31 +1814,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) {
+        } else if (Name == "strstr" ||
+                   Name == "strpbrk") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) {
+        } else if (Name == "strtok" ||
+                   Name == "strtok_r") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "setbuf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) {
+        } else if (Name == "scanf" ||
+                   Name == "setbuf" ||
+                   Name == "setvbuf") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strndup"))) {
+        } else if (Name == "strdup" ||
+                   Name == "strndup") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)))
@@ -1824,10 +1846,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "sscanf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "sprintf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "statvfs"))) {
+        } else if (Name == "stat" ||
+                   Name == "sscanf" ||
+                   Name == "sprintf" ||
+                   Name == "statvfs") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1835,7 +1857,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) {
+        } else if (Name == "snprintf") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -1843,7 +1865,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 3);
-        } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) {
+        } else if (Name == "setitimer") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -1851,7 +1873,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
           setDoesNotCapture(F, 3);
-        } else if (NameLen == 6 && !strcmp(NameStr, "system")) {
+        } else if (Name == "system") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1860,7 +1882,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'm':
-        if (NameLen == 6 && !strcmp(NameStr, "memcmp")) {
+        if (Name == "malloc") {
+          if (FTy->getNumParams() != 1 ||
+              !isa<PointerType>(FTy->getReturnType()))
+            continue;
+          setDoesNotThrow(F);
+          setDoesNotAlias(F, 0);
+        } else if (Name == "memcmp") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1869,29 +1897,29 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "memrchr"))) {
+        } else if (Name == "memchr" ||
+                   Name == "memrchr") {
           if (FTy->getNumParams() != 3)
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "modff")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "modfl")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "memcpy")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "memccpy")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "memmove"))) {
+        } else if (Name == "modf" ||
+                   Name == "modff" ||
+                   Name == "modfl" ||
+                   Name == "memcpy" ||
+                   Name == "memccpy" ||
+                   Name == "memmove") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) {
+        } else if (Name == "memalign") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotAlias(F, 0);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "mktime"))) {
+        } else if (Name == "mkdir" ||
+                   Name == "mktime") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1900,7 +1928,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'r':
-        if (NameLen == 7 && !strcmp(NameStr, "realloc")) {
+        if (Name == "realloc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getReturnType()))
@@ -1908,23 +1936,23 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 4 && !strcmp(NameStr, "read")) {
+        } else if (Name == "read") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           // May throw; "read" is a valid pthread cancellation point.
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "rewind")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "remove")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "realpath"))) {
+        } else if (Name == "rmdir" ||
+                   Name == "rewind" ||
+                   Name == "remove" ||
+                   Name == "realpath") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "readlink"))) {
+        } else if (Name == "rename" ||
+                   Name == "readlink") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1935,7 +1963,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'w':
-        if (NameLen == 5 && !strcmp(NameStr, "write")) {
+        if (Name == "write") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
@@ -1944,7 +1972,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'b':
-        if (NameLen == 5 && !strcmp(NameStr, "bcopy")) {
+        if (Name == "bcopy") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1952,7 +1980,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) {
+        } else if (Name == "bcmp") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1961,7 +1989,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) {
+        } else if (Name == "bzero") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1970,17 +1998,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'c':
-        if (NameLen == 6 && !strcmp(NameStr, "calloc")) {
+        if (Name == "calloc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "chown")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "ctermid")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "clearerr")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "closedir"))) {
+        } else if (Name == "chmod" ||
+                   Name == "chown" ||
+                   Name == "ctermid" ||
+                   Name == "clearerr" ||
+                   Name == "closedir") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1989,17 +2017,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'a':
-        if ((NameLen == 4 && !strcmp(NameStr, "atoi")) ||
-            (NameLen == 4 && !strcmp(NameStr, "atol")) ||
-            (NameLen == 4 && !strcmp(NameStr, "atof")) ||
-            (NameLen == 5 && !strcmp(NameStr, "atoll"))) {
+        if (Name == "atoi" ||
+            Name == "atol" ||
+            Name == "atof" ||
+            Name == "atoll") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 6 && !strcmp(NameStr, "access")) {
+        } else if (Name == "access") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2008,7 +2036,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'f':
-        if (NameLen == 5 && !strcmp(NameStr, "fopen")) {
+        if (Name == "fopen") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)) ||
@@ -2018,7 +2046,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) {
+        } else if (Name == "fdopen") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2026,52 +2054,52 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) ||
-                   (NameLen == 4 && !strcmp(NameStr, "free")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "fseek")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "ftell")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "fgetc")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fseeko")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "ftello")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fileno")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fflush")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fclose")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "fsetpos")) ||
-                   (NameLen == 9 && !strcmp(NameStr, "flockfile")) ||
-                   (NameLen == 11 && !strcmp(NameStr, "funlockfile")) ||
-                   (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) {
+        } else if (Name == "feof" ||
+                   Name == "free" ||
+                   Name == "fseek" ||
+                   Name == "ftell" ||
+                   Name == "fgetc" ||
+                   Name == "fseeko" ||
+                   Name == "ftello" ||
+                   Name == "fileno" ||
+                   Name == "fflush" ||
+                   Name == "fclose" ||
+                   Name == "fsetpos" ||
+                   Name == "flockfile" ||
+                   Name == "funlockfile" ||
+                   Name == "ftrylockfile") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) {
+        } else if (Name == "ferror") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setOnlyReadsMemory(F);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "fstat")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "frexp")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "frexpf")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "frexpl")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) {
+        } else if (Name == "fputc" ||
+                   Name == "fstat" ||
+                   Name == "frexp" ||
+                   Name == "frexpf" ||
+                   Name == "frexpl" ||
+                   Name == "fstatvfs") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) {
+        } else if (Name == "fgets") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(2)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 3);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fwrite"))) {
+        } else if (Name == "fread" ||
+                   Name == "fwrite") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(3)))
@@ -2079,10 +2107,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 4);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fscanf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "fprintf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) {
+        } else if (Name == "fputs" ||
+                   Name == "fscanf" ||
+                   Name == "fprintf" ||
+                   Name == "fgetpos") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2093,31 +2121,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'g':
-        if ((NameLen == 4 && !strcmp(NameStr, "getc")) ||
-            (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) ||
-            (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) {
+        if (Name == "getc" ||
+            Name == "getlogin_r" ||
+            Name == "getc_unlocked") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) {
+        } else if (Name == "getenv") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "getchar"))) {
+        } else if (Name == "gets" ||
+                   Name == "getchar") {
           setDoesNotThrow(F);
-        } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) {
+        } else if (Name == "getitimer") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) {
+        } else if (Name == "getpwnam") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2126,22 +2154,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'u':
-        if (NameLen == 6 && !strcmp(NameStr, "ungetc")) {
+        if (Name == "ungetc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "unlink")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) {
+        } else if (Name == "uname" ||
+                   Name == "unlink" ||
+                   Name == "unsetenv") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "utimes"))) {
+        } else if (Name == "utime" ||
+                   Name == "utimes") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2152,30 +2180,30 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'p':
-        if (NameLen == 4 && !strcmp(NameStr, "putc")) {
+        if (Name == "putc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "printf")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "perror"))) {
+        } else if (Name == "puts" ||
+                   Name == "printf" ||
+                   Name == "perror") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "pwrite"))) {
+        } else if (Name == "pread" ||
+                   Name == "pwrite") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           // May throw; these are valid pthread cancellation points.
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) {
+        } else if (Name == "putchar") {
           setDoesNotThrow(F);
-        } else if (NameLen == 5 && !strcmp(NameStr, "popen")) {
+        } else if (Name == "popen") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)) ||
@@ -2185,7 +2213,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) {
+        } else if (Name == "pclose") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2194,14 +2222,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'v':
-        if (NameLen == 6 && !strcmp(NameStr, "vscanf")) {
+        if (Name == "vscanf") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) {
+        } else if (Name == "vsscanf" ||
+                   Name == "vfscanf") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -2209,19 +2237,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) {
+        } else if (Name == "valloc") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) {
+        } else if (Name == "vprintf") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) {
+        } else if (Name == "vfprintf" ||
+                   Name == "vsprintf") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2229,7 +2257,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) {
+        } else if (Name == "vsnprintf") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -2240,13 +2268,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'o':
-        if (NameLen == 4 && !strcmp(NameStr, "open")) {
+        if (Name == "open") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           // May throw; "open" is a valid pthread cancellation point.
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) {
+        } else if (Name == "opendir") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)))
@@ -2257,12 +2285,12 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 't':
-        if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) {
+        if (Name == "tmpfile") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if (NameLen == 5 && !strcmp(NameStr, "times")) {
+        } else if (Name == "times") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2271,21 +2299,21 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'h':
-        if ((NameLen == 5 && !strcmp(NameStr, "htonl")) ||
-            (NameLen == 5 && !strcmp(NameStr, "htons"))) {
+        if (Name == "htonl" ||
+            Name == "htons") {
           setDoesNotThrow(F);
           setDoesNotAccessMemory(F);
         }
         break;
       case 'n':
-        if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) ||
-            (NameLen == 5 && !strcmp(NameStr, "ntohs"))) {
+        if (Name == "ntohl" ||
+            Name == "ntohs") {
           setDoesNotThrow(F);
           setDoesNotAccessMemory(F);
         }
         break;
       case 'l':
-        if (NameLen == 5 && !strcmp(NameStr, "lstat")) {
+        if (Name == "lstat") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2293,7 +2321,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) {
+        } else if (Name == "lchown") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2302,7 +2330,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'q':
-        if (NameLen == 5 && !strcmp(NameStr, "qsort")) {
+        if (Name == "qsort") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(3)))
             continue;
@@ -2311,8 +2339,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case '_':
-        if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) ||
-            (NameLen == 9 && !strcmp(NameStr, "__strndup"))) {
+        if (Name == "__strdup" ||
+            Name == "__strndup") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)))
@@ -2320,19 +2348,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) {
+        } else if (Name == "__strtok_r") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) {
+        } else if (Name == "_IO_getc") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) {
+        } else if (Name == "_IO_putc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
@@ -2341,16 +2369,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 1:
-        if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) {
+        if (Name == "\1__isoc99_scanf") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) ||
-                   (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) ||
-                   (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) {
+        } else if (Name == "\1stat64" ||
+                   Name == "\1lstat64" ||
+                   Name == "\1statvfs64" ||
+                   Name == "\1__isoc99_sscanf") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2358,7 +2386,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) {
+        } else if (Name == "\1fopen64") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)) ||
@@ -2368,26 +2396,26 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) ||
-                   (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) {
+        } else if (Name == "\1fseeko64" ||
+                   Name == "\1ftello64") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) {
+        } else if (Name == "\1tmpfile64") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) ||
-                   (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) {
+        } else if (Name == "\1fstat64" ||
+                   Name == "\1fstatvfs64") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) {
+        } else if (Name == "\1open64") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index c037ee960317..68689d6f13b7 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -30,8 +30,8 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <map>
@@ -45,7 +45,7 @@ TailDupThreshold("taildup-threshold",
                  cl::init(1), cl::Hidden);
 
 namespace {
-  class VISIBILITY_HIDDEN TailDup : public FunctionPass {
+  class TailDup : public FunctionPass {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -128,7 +128,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
     // other instructions.
     if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
 
-    // Allso alloca and malloc.
+    // Also alloca and malloc.
     if (isa<AllocationInst>(I)) return false;
 
     // Some vector instructions can expand into a number of instructions.
@@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
   BasicBlock *DestBlock = Branch->getSuccessor(0);
   assert(SourceBlock != DestBlock && "Our predicate is broken!");
 
-  DOUT << "TailDuplication[" << SourceBlock->getParent()->getName()
-       << "]: Eliminating branch: " << *Branch;
+  DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+        << "]: Eliminating branch: " << *Branch);
 
   // See if we can avoid duplicating code by moving it up to a dominator of both
   // blocks.
   if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
-    DOUT << "Found shared dominator: " << DomBlock->getName() << "\n";
+    DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n");
 
     // If there are non-phi instructions in DestBlock that have no operands
     // defined in DestBlock, and if the instruction has no side effects, we can
@@ -258,7 +258,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
     while (!isa<TerminatorInst>(BBI)) {
       Instruction *I = BBI++;
 
-      bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects();
+      bool CanHoist = I->isSafeToSpeculativelyExecute() &&
+                      !I->mayReadFromMemory();
       if (CanHoist) {
         for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
           if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
@@ -271,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
           // Remove from DestBlock, move right before the term in DomBlock.
           DestBlock->getInstList().remove(I);
           DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
-          DOUT << "Hoisted: " << *I;
+          DEBUG(errs() << "Hoisted: " << *I);
         }
       }
     }
@@ -358,7 +359,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
       Instruction *Inst = BI++;
       if (isInstructionTriviallyDead(Inst))
         Inst->eraseFromParent();
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
+      else if (Constant *C = ConstantFoldInstruction(Inst,
+                                                     Inst->getContext())) {
         Inst->replaceAllUsesWith(C);
         Inst->eraseFromParent();
       }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 34ee57c9b9dc..b56e17040db2 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -60,14 +60,13 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumEliminated, "Number of tail calls removed");
 STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 
 namespace {
-  struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass {
+  struct TailCallElim : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     TailCallElim() : FunctionPass(&ID) {}
 
@@ -394,7 +393,7 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   // create the new entry block, allowing us to branch back to the old entry.
   if (OldEntry == 0) {
     OldEntry = &F->getEntryBlock();
-    BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry);
+    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
     NewEntry->takeName(OldEntry);
     OldEntry->setName("tailrecurse");
     BranchInst::Create(OldEntry, NewEntry);
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 71049fa212d3..135a621f5d96 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -19,17 +19,18 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-void ExtAddrMode::print(OStream &OS) const {
+void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
   if (BaseGV) {
     OS << (NeedPlus ? " + " : "")
        << "GV:";
-    WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
     NeedPlus = true;
   }
 
@@ -39,13 +40,13 @@ void ExtAddrMode::print(OStream &OS) const {
   if (BaseReg) {
     OS << (NeedPlus ? " + " : "")
        << "Base:";
-    WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
     NeedPlus = true;
   }
   if (Scale) {
     OS << (NeedPlus ? " + " : "")
        << Scale << "*";
-    WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false);
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
     NeedPlus = true;
   }
 
@@ -53,8 +54,8 @@ void ExtAddrMode::print(OStream &OS) const {
 }
 
 void ExtAddrMode::dump() const {
-  print(cerr);
-  cerr << '\n';
+  print(errs());
+  errs() << '\n';
 }
 
 
@@ -205,7 +206,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     if (!RHS) return false;
     int64_t Scale = RHS->getSExtValue();
     if (Opcode == Instruction::Shl)
-      Scale = 1 << Scale;
+      Scale = 1LL << Scale;
     
     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
   }
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6d1180d0dd9a..4931ab3f7fad 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Constant.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -23,6 +24,8 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
 #include <algorithm>
 using namespace llvm;
@@ -249,11 +252,11 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
       Value *RetVal = 0;
 
       // Create a value to return... if the function doesn't return null...
-      if (BB->getParent()->getReturnType() != Type::VoidTy)
+      if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext()))
         RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
 
       // Create the return...
-      NewTI = ReturnInst::Create(RetVal);
+      NewTI = ReturnInst::Create(TI->getContext(), RetVal);
     }
     break;
 
@@ -261,8 +264,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
   case Instruction::Switch:    // Should remove entry
   default:
   case Instruction::Ret:       // Cannot happen, has no successors!
-    assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!");
-    abort();
+    llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
   }
 
   if (NewTI)   // If it's a different instruction, replace.
@@ -318,7 +320,8 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
     ++SplitIt;
   BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
 
-  // The new block lives in whichever loop the old one did.
+  // The new block lives in whichever loop the old one did. This preserves
+  // LCSSA as well, because we force the split point to be after any PHI nodes.
   if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
     if (Loop *L = LI->getLoopFor(Old))
       L->addBasicBlockToLoop(New, LI->getBase());
@@ -352,32 +355,61 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 /// Preds array, which has NumPreds elements in it.  The new block is given a
 /// suffix of 'Suffix'.
 ///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and
-/// DominanceFrontier, but no other analyses.
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
+/// In particular, it does not preserve LoopSimplify (because it's
+/// complicated to handle the case where one of the edges being split
+/// is an exit of a loop with other exits).
+///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
                                          BasicBlock *const *Preds,
                                          unsigned NumPreds, const char *Suffix,
                                          Pass *P) {
   // Create new basic block, insert right before the original block.
-  BasicBlock *NewBB =
-    BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+                                         BB->getParent(), BB);
   
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
   
+  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+  Loop *L = LI ? LI->getLoopFor(BB) : 0;
+  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
   // Move the edges from Preds to point to NewBB instead of BB.
-  for (unsigned i = 0; i != NumPreds; ++i)
+  // While here, if we need to preserve loop analyses, collect
+  // some information about how this split will affect loops.
+  bool HasLoopExit = false;
+  bool IsLoopEntry = !!L;
+  bool SplitMakesNewLoopHeader = false;
+  for (unsigned i = 0; i != NumPreds; ++i) {
     Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
-  
+
+    if (LI) {
+      // If we need to preserve LCSSA, determine if any of
+      // the preds is a loop exit.
+      if (PreserveLCSSA)
+        if (Loop *PL = LI->getLoopFor(Preds[i]))
+          if (!PL->contains(BB))
+            HasLoopExit = true;
+      // If we need to preserve LoopInfo, note whether any of the
+      // preds crosses an interesting loop boundary.
+      if (L) {
+        if (L->contains(Preds[i]))
+          IsLoopEntry = false;
+        else
+          SplitMakesNewLoopHeader = true;
+      }
+    }
+  }
+
   // Update dominator tree and dominator frontier if available.
   DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
   if (DT)
     DT->splitBlock(NewBB);
   if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
     DF->splitBlock(NewBB);
-  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
-  
-  
+
   // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
   // node becomes an incoming value for BB's phi node.  However, if the Preds
   // list is empty, we need to insert dummy entries into the PHI nodes in BB to
@@ -388,20 +420,42 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
       cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
     return NewBB;
   }
+
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+  if (L) {
+    if (IsLoopEntry) {
+      if (Loop *PredLoop = LI->getLoopFor(Preds[0])) {
+        // Add the new block to the nearest enclosing loop (and not an
+        // adjacent loop).
+        while (PredLoop && !PredLoop->contains(BB))
+          PredLoop = PredLoop->getParentLoop();
+        if (PredLoop)
+          PredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      }
+    } else {
+      L->addBasicBlockToLoop(NewBB, LI->getBase());
+      if (SplitMakesNewLoopHeader)
+        L->moveToHeader(NewBB);
+    }
+  }
   
   // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
   for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I++);
     
     // Check to see if all of the values coming in are the same.  If so, we
-    // don't need to create a new PHI node.
-    Value *InVal = PN->getIncomingValueForBlock(Preds[0]);
-    for (unsigned i = 1; i != NumPreds; ++i)
-      if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
-        InVal = 0;
-        break;
-      }
-    
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = 0;
+    if (!HasLoopExit) {
+      InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1; i != NumPreds; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+    }
+
     if (InVal) {
       // If all incoming values for the new PHI would be the same, just don't
       // make a new PHI.  Instead, just remove the incoming values from the old
@@ -426,16 +480,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     // Add an incoming value to the PHI node in the loop for the preheader
     // edge.
     PN->addIncoming(InVal, NewBB);
-    
-    // Check to see if we can eliminate this phi node.
-    if (Value *V = PN->hasConstantValue(DT != 0)) {
-      Instruction *I = dyn_cast<Instruction>(V);
-      if (!I || DT == 0 || DT->dominates(I, PN)) {
-        PN->replaceAllUsesWith(V);
-        if (AA) AA->deleteValue(PN);
-        PN->eraseFromParent();
-      }
-    }
   }
   
   return NewBB;
@@ -503,11 +547,15 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
   
-  // Test if the values come form identical arithmetic instructions.
+  // Test if the values come from identical arithmetic instructions.
+  // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+  // this function is only used when one address use dominates the
+  // other, which means that they'll always either have the same
+  // value or one of them will have an undefined value.
   if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
       isa<PHINode>(A) || isa<GetElementPtrInst>(A))
     if (const Instruction *BI = dyn_cast<Instruction>(B))
-      if (cast<Instruction>(A)->isIdenticalTo(BI))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
   
   // Otherwise they may not be equivalent.
@@ -537,7 +585,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
   unsigned AccessSize = 0;
   if (AA) {
     const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
-    AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy);
+    AccessSize = AA->getTypeStoreSize(AccessTy);
   }
   
   while (ScanFrom != ScanBB->begin()) {
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index 1650cfa30653..4b720b1e323c 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "basicinliner"
-
 #include "llvm/Module.h"
 #include "llvm/Function.h"
 #include "llvm/Transforms/Utils/BasicInliner.h"
@@ -21,6 +20,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <vector>
 
@@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() {
       }
   }
   
-  DOUT << ": " << CallSites.size() << " call sites.\n";
+  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
   
   // Inline call sites.
   bool Changed = false;
@@ -109,22 +109,22 @@ void BasicInlinerImpl::inlineFunctions() {
         }
         InlineCost IC = CA.getInlineCost(CS, NeverInline);
         if (IC.isAlways()) {        
-          DOUT << "  Inlining: cost=always"
-               <<", call: " << *CS.getInstruction();
+          DEBUG(errs() << "  Inlining: cost=always"
+                       <<", call: " << *CS.getInstruction());
         } else if (IC.isNever()) {
-          DOUT << "  NOT Inlining: cost=never"
-               <<", call: " << *CS.getInstruction();
+          DEBUG(errs() << "  NOT Inlining: cost=never"
+                       <<", call: " << *CS.getInstruction());
           continue;
         } else {
           int Cost = IC.getValue();
           
           if (Cost >= (int) BasicInlineThreshold) {
-            DOUT << "  NOT Inlining: cost = " << Cost
-                 << ", call: " <<  *CS.getInstruction();
+            DEBUG(errs() << "  NOT Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
             continue;
           } else {
-            DOUT << "  Inlining: cost = " << Cost
-                 << ", call: " <<  *CS.getInstruction();
+            DEBUG(errs() << "  Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
           }
         }
         
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c4fd1eae43cd..849b2b5d5cd6 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -21,11 +21,13 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -43,6 +45,7 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ProfileInfo>();
 
       // No loop canonicalization guarantees are broken by this pass.
       AU.addPreservedID(LoopSimplifyID);
@@ -114,6 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
   return false;
 }
 
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+                                       BasicBlock *SplitBB,
+                                       BasicBlock *DestBB) {
+  // SplitBB shouldn't have anything non-trivial in it yet.
+  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+         "SplitBB has non-PHI nodes!");
+
+  // For each PHI in the destination block...
+  for (BasicBlock::iterator I = DestBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+    Value *V = PN->getIncomingValue(Idx);
+    // If the input is a PHI which already satisfies LCSSA, don't create
+    // a new one.
+    if (const PHINode *VP = dyn_cast<PHINode>(V))
+      if (VP->getParent() == SplitBB)
+        continue;
+    // Otherwise a new PHI is needed. Create one and populate it.
+    PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+                                     SplitBB->getTerminator());
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+      NewPN->addIncoming(V, Preds[i]);
+    // Update the original PHI.
+    PN->setIncomingValue(Idx, NewPN);
+  }
+}
+
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
 /// split the critical edge.  This will update DominatorTree and
 /// DominatorFrontier  information if it is available, thus calling this pass
@@ -121,15 +156,15 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 /// false otherwise.  This ensures that all edges to that dest go to one block
 /// instead of each going to a different block.
 //
-bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
-                             bool MergeIdenticalEdges) {
-  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false;
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                                    Pass *P, bool MergeIdenticalEdges) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
   BasicBlock *TIBB = TI->getParent();
   BasicBlock *DestBB = TI->getSuccessor(SuccNum);
 
   // Create a new basic block, linking it into the CFG.
-  BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." +
-                                         DestBB->getName() + "_crit_edge");
+  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
   // Create our unconditional branch...
   BranchInst::Create(DestBB, NewBB);
 
@@ -171,7 +206,7 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
   
 
   // If we don't have a pass object, we can't update anything...
-  if (P == 0) return true;
+  if (P == 0) return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
   // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
@@ -222,8 +257,8 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
     // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
     if (!OtherPreds.empty()) {
       // FIXME: IMPLEMENT THIS!
-      assert(0 && "Requiring domfrontiers but not idom/domtree/domset."
-             " not implemented yet!");
+      llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
+                       " not implemented yet!");
     }
     
     // Since the new block is dominated by its only predecessor TIBB,
@@ -253,9 +288,9 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
   
   // Update LoopInfo if it is around.
   if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
-    // If one or the other blocks were not in a loop, the new block is not
-    // either, and thus LI doesn't need to be updated.
-    if (Loop *TIL = LI->getLoopFor(TIBB))
+    if (Loop *TIL = LI->getLoopFor(TIBB)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
       if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
         if (TIL == DestLoop) {
           // Both in the same loop, the NewBB joins loop.
@@ -277,6 +312,65 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
             P->addBasicBlockToLoop(NewBB, LI->getBase());
         }
       }
+      // If TIBB is in a loop and DestBB is outside of that loop, split the
+      // other exit blocks of the loop that also have predecessors outside
+      // the loop, to maintain a LoopSimplify guarantee.
+      if (!TIL->contains(DestBB) &&
+          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+        assert(!TIL->contains(NewBB) &&
+               "Split point for loop exit is contained in loop!");
+
+        // Update LCSSA form in the newly created exit block.
+        if (P->mustPreserveAnalysisID(LCSSAID)) {
+          SmallVector<BasicBlock *, 1> OrigPred;
+          OrigPred.push_back(TIBB);
+          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+        }
+
+        // For each unique exit block...
+        SmallVector<BasicBlock *, 4> ExitBlocks;
+        TIL->getExitBlocks(ExitBlocks);
+        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+          // Collect all the preds that are inside the loop, and note
+          // whether there are any preds outside the loop.
+          SmallVector<BasicBlock *, 4> Preds;
+          bool HasPredOutsideOfLoop = false;
+          BasicBlock *Exit = ExitBlocks[i];
+          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+               I != E; ++I)
+            if (TIL->contains(*I))
+              Preds.push_back(*I);
+            else
+              HasPredOutsideOfLoop = true;
+          // If there are any preds not in the loop, we'll need to split
+          // the edges. The Preds.empty() check is needed because a block
+          // may appear multiple times in the list. We can't use
+          // getUniqueExitBlocks above because that depends on LoopSimplify
+          // form, which we're in the process of restoring!
+          if (!Preds.empty() && HasPredOutsideOfLoop) {
+            BasicBlock *NewExitBB =
+              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+                                     "split", P);
+            if (P->mustPreserveAnalysisID(LCSSAID))
+              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+          }
+        }
+      }
+      // LCSSA form was updated above for the case where LoopSimplify is
+      // available, which means that all predecessors of loop exit blocks
+      // are within the loop. Without LoopSimplify form, it would be
+      // necessary to insert a new phi.
+      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+             "SplitCriticalEdge doesn't know how to update LCCSA form "
+             "without LoopSimplify!");
+    }
   }
-  return true;
+
+  // Update ProfileInfo if it is around.
+  if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) {
+    PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges);
+  }
+
+  return NewBB;
 }
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 10cae5ca7087..f4394ea64d6e 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -6,11 +6,10 @@ add_llvm_library(LLVMTransformUtils
   CloneFunction.cpp
   CloneLoop.cpp
   CloneModule.cpp
-  CloneTrace.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
-  InlineCost.cpp
   InlineFunction.cpp
+  InstructionNamer.cpp
   LCSSA.cpp
   Local.cpp
   LoopSimplify.cpp
@@ -19,12 +18,12 @@ add_llvm_library(LLVMTransformUtils
   LowerSwitch.cpp
   Mem2Reg.cpp
   PromoteMemoryToRegister.cpp
-  SimplifyCFG.cpp
+  SSAUpdater.cpp
   SSI.cpp
+  SimplifyCFG.cpp
   UnifyFunctionExitNodes.cpp
   UnrollLoop.cpp
   ValueMapper.cpp
-  InstructionNamer.cpp
   )
 
 target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d0fdefa3f689..30130fa0a126 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -34,7 +35,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
                                   DenseMap<const Value*, Value*> &ValueMap,
                                   const char *NameSuffix, Function *F,
                                   ClonedCodeInfo *CodeInfo) {
-  BasicBlock *NewBB = BasicBlock::Create("", F);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
@@ -72,7 +73,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
 //
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                              DenseMap<const Value*, Value*> &ValueMap,
-                             std::vector<ReturnInst*> &Returns,
+                             SmallVectorImpl<ReturnInst*> &Returns,
                              const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
   assert(NameSuffix && "NameSuffix cannot be null!");
 
@@ -165,7 +166,7 @@ Function *llvm::CloneFunction(const Function *F,
       ValueMap[I] = DestI++;        // Add mapping to ValueMap
     }
 
-  std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
   CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
   return NewF;
 }
@@ -179,7 +180,7 @@ namespace {
     Function *NewFunc;
     const Function *OldFunc;
     DenseMap<const Value*, Value*> &ValueMap;
-    std::vector<ReturnInst*> &Returns;
+    SmallVectorImpl<ReturnInst*> &Returns;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
     const TargetData *TD;
@@ -187,7 +188,7 @@ namespace {
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           DenseMap<const Value*, Value*> &valueMap,
-                          std::vector<ReturnInst*> &returns,
+                          SmallVectorImpl<ReturnInst*> &returns,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
@@ -218,7 +219,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   
   // Nope, clone it now.
   BasicBlock *NewBB;
-  BBEntry = NewBB = BasicBlock::Create();
+  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
@@ -237,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner.
     if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) {
       if (DbgFnStart == NULL) {
-        DISubprogram SP(cast<GlobalVariable>(DFSI->getSubprogram()));
+        DISubprogram SP(DFSI->getSubprogram());
         if (SP.describes(BB->getParent()))
           DbgFnStart = DFSI->getSubprogram();
       }
@@ -323,17 +324,21 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
 /// mapping its operands through ValueMap if they are available.
 Constant *PruningFunctionCloner::
 ConstantFoldMappedInstruction(const Instruction *I) {
+  LLVMContext &Context = I->getContext();
+  
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           ValueMap)))
+                                                           ValueMap,
+                                                           Context)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Ops[0], Ops.size(), TD);
+                                           &Ops[0], Ops.size(), 
+                                           Context, TD);
 
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
@@ -344,7 +349,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                                           CE);
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
-                                  Ops.size(), TD);
+                                  Ops.size(), Context, TD);
 }
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -356,11 +361,12 @@ ConstantFoldMappedInstruction(const Instruction *I) {
 /// used for things like CloneFunction or CloneModule.
 void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      DenseMap<const Value*, Value*> &ValueMap,
-                                     std::vector<ReturnInst*> &Returns,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
                                      const TargetData *TD) {
   assert(NameSuffix && "NameSuffix cannot be null!");
+  LLVMContext &Context = OldFunc->getContext();
   
 #ifndef NDEBUG
   for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
@@ -385,7 +391,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // insert it into the new function in the right order.  If not, ignore it.
   //
   // Defer PHI resolution until rest of function is resolved.
-  std::vector<const PHINode*> PHIToResolve;
+  SmallVector<const PHINode*, 16> PHIToResolve;
   for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
        BI != BE; ++BI) {
     BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
@@ -430,7 +436,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
       for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
         if (BasicBlock *MappedBlock = 
             cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
-          Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap);
+          Value *InVal = MapValue(PN->getIncomingValue(pred),
+                                  ValueMap, Context);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 82f5b93a9544..0285f8c8d107 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -56,10 +56,11 @@ Module *llvm::CloneModule(const Module *M,
   //
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I) {
-    GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(),
+    GlobalVariable *GV = new GlobalVariable(*New, 
+                                            I->getType()->getElementType(),
                                             false,
                                             GlobalValue::ExternalLinkage, 0,
-                                            I->getName(), New);
+                                            I->getName());
     GV->setAlignment(I->getAlignment());
     ValueMap[I] = GV;
   }
@@ -88,7 +89,8 @@ Module *llvm::CloneModule(const Module *M,
     GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 ValueMap)));
+                                                 ValueMap,
+                                                 M->getContext())));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M,
         ValueMap[J] = DestI++;
       }
 
-      std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
       CloneFunctionInto(F, I, ValueMap, Returns);
     }
 
@@ -119,7 +121,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap, M->getContext())));
   }
   
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 6d5904e30886..c39ccf7d3f45 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -18,6 +18,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -27,6 +28,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <set>
@@ -180,8 +183,24 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
 void CodeExtractor::splitReturnBlocks() {
   for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),
          E = BlocksToExtract.end(); I != E; ++I)
-    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator()))
-      (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      if (DT) {
+        // Old dominates New. New node domiantes all other nodes dominated
+        //by Old.
+        DomTreeNode *OldNode = DT->getNode(*I);
+        SmallVector<DomTreeNode*, 8> Children;
+        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+             DI != DE; ++DI) 
+          Children.push_back(*DI);
+
+        DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I) 
+          DT->changeImmediateDominator(*I, NewNode);
+      }
+    }
 }
 
 // findInputsOutputs - Find inputs to, outputs from the code region.
@@ -234,15 +253,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
                                            BasicBlock *newHeader,
                                            Function *oldFunction,
                                            Module *M) {
-  DOUT << "inputs: " << inputs.size() << "\n";
-  DOUT << "outputs: " << outputs.size() << "\n";
+  DEBUG(errs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(errs() << "outputs: " << outputs.size() << "\n");
 
   // This function returns unsigned, outputs will go back by reference.
   switch (NumExitBlocks) {
   case 0:
-  case 1: RetTy = Type::VoidTy; break;
-  case 2: RetTy = Type::Int1Ty; break;
-  default: RetTy = Type::Int16Ty; break;
+  case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+  case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+  default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
   std::vector<const Type*> paramTy;
@@ -251,32 +270,34 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   for (Values::const_iterator i = inputs.begin(),
          e = inputs.end(); i != e; ++i) {
     const Value *value = *i;
-    DOUT << "value used in func: " << *value << "\n";
+    DEBUG(errs() << "value used in func: " << *value << "\n");
     paramTy.push_back(value->getType());
   }
 
   // Add the types of the output values to the function's argument list.
   for (Values::const_iterator I = outputs.begin(), E = outputs.end();
        I != E; ++I) {
-    DOUT << "instr used in func: " << **I << "\n";
+    DEBUG(errs() << "instr used in func: " << **I << "\n");
     if (AggregateArgs)
       paramTy.push_back((*I)->getType());
     else
       paramTy.push_back(PointerType::getUnqual((*I)->getType()));
   }
 
-  DOUT << "Function type: " << *RetTy << " f(";
+  DEBUG(errs() << "Function type: " << *RetTy << " f(");
   for (std::vector<const Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
-    DOUT << **i << ", ";
-  DOUT << ")\n";
+    DEBUG(errs() << **i << ", ");
+  DEBUG(errs() << ")\n");
 
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy));
+    PointerType *StructPtr =
+           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
     paramTy.clear();
     paramTy.push_back(StructPtr);
   }
-  const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false);
+  const FunctionType *funcType =
+                  FunctionType::get(RetTy, paramTy, false);
 
   // Create the new function
   Function *newFunction = Function::Create(funcType,
@@ -298,13 +319,13 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
     Value *RewriteVal;
     if (AggregateArgs) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, i);
-      std::string GEPname = "gep_" + inputs[i]->getName();
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
       TerminatorInst *TI = newFunction->begin()->getTerminator();
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2, 
-                                                         GEPname, TI);
-      RewriteVal = new LoadInst(GEP, "load" + GEPname, TI);
+      GetElementPtrInst *GEP = 
+        GetElementPtrInst::Create(AI, Idx, Idx+2, 
+                                  "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
     } else
       RewriteVal = AI++;
 
@@ -340,6 +361,20 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   return newFunction;
 }
 
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+  for (Value::use_iterator UI = Used->use_begin(),
+       UE = Used->use_end(); UI != UE; ++UI) {
+     PHINode *P = dyn_cast<PHINode>(*UI);
+     if (P && P->getParent() == BB)
+       return P->getIncomingBlock(UI);
+  }
+  
+  return 0;
+}
+
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
 /// the call instruction, splitting any PHI nodes in the header block as
 /// necessary.
@@ -348,7 +383,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                            Values &inputs, Values &outputs) {
   // Emit a call to the new function, passing in: *pointer to struct (if
   // aggregating parameters), or plan inputs and allocated memory for outputs
-  std::vector<Value*> params, StructValues, ReloadOutputs;
+  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  
+  LLVMContext &Context = newFunction->getContext();
 
   // Add inputs as params, or to be filled into the struct
   for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
@@ -378,7 +415,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       ArgTypes.push_back((*v)->getType());
 
     // Allocate a struct at the beginning of this function
-    Type *StructArgTy = StructType::get(ArgTypes);
+    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
     Struct =
       new AllocaInst(StructArgTy, 0, "structArg",
                      codeReplacer->getParent()->begin()->begin());
@@ -386,8 +423,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
     for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
       GetElementPtrInst *GEP =
         GetElementPtrInst::Create(Struct, Idx, Idx + 2,
                                   "gep_" + StructValues[i]->getName());
@@ -412,8 +449,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     Value *Output = 0;
     if (AggregateArgs) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i);
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
       GetElementPtrInst *GEP
         = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
                                     "gep_reload_" + outputs[i]->getName());
@@ -423,6 +460,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       Output = ReloadOutputs[i];
     }
     LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    Reloads.push_back(load);
     codeReplacer->getInstList().push_back(load);
     std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
     for (unsigned u = 0, e = Users.size(); u != e; ++u) {
@@ -434,7 +472,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   // Now we can emit a switch statement using the call as a value.
   SwitchInst *TheSwitch =
-      SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty),
+      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
                          codeReplacer, 0, codeReplacer);
 
   // Since there may be multiple exits from the original region, make the new
@@ -456,7 +494,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
         if (!NewTarget) {
           // If we don't already have an exit stub for this non-extracted
           // destination, create one now!
-          NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub",
+          NewTarget = BasicBlock::Create(Context,
+                                         OldTarget->getName() + ".exitStub",
                                          newFunction);
           unsigned SuccNum = switchVal++;
 
@@ -465,17 +504,18 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
           case 0:
           case 1: break;  // No value needed.
           case 2:         // Conditional branch, return a bool
-            brVal = ConstantInt::get(Type::Int1Ty, !SuccNum);
+            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
             break;
           default:
-            brVal = ConstantInt::get(Type::Int16Ty, SuccNum);
+            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
             break;
           }
 
-          ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget);
+          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
 
           // Update the switch instruction.
-          TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum),
+          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+                                              SuccNum),
                              OldTarget);
 
           // Restore values just before we exit
@@ -507,14 +547,25 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                 DominatesDef = false;
             }
 
-            if (DT)
+            if (DT) {
               DominatesDef = DT->dominates(DefBlock, OldTarget);
+              
+              // If the output value is used by a phi in the target block,
+              // then we need to test for dominance of the phi's predecessor
+              // instead.  Unfortunately, this a little complicated since we
+              // have already rewritten uses of the value to uses of the reload.
+              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
+                                                          OldTarget);
+              if (pred && DT && DT->dominates(DefBlock, pred))
+                DominatesDef = true;
+            }
 
             if (DominatesDef) {
               if (AggregateArgs) {
                 Value *Idx[2];
-                Idx[0] = Constant::getNullValue(Type::Int32Ty);
-                Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out);
+                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+                                          FirstOut+out);
                 GetElementPtrInst *GEP =
                   GetElementPtrInst::Create(OAI, Idx, Idx + 2,
                                             "gep_" + outputs[out]->getName(),
@@ -543,15 +594,16 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     // this should be rewritten as a `ret'
 
     // Check if the function should return a value
-    if (OldFnRetTy == Type::VoidTy) {
-      ReturnInst::Create(0, TheSwitch);  // Return void
+    if (OldFnRetTy == Type::getVoidTy(Context)) {
+      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
     } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
       // return what we have
-      ReturnInst::Create(TheSwitch->getCondition(), TheSwitch);
+      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
     } else {
       // Otherwise we must have code extracted an unwind or something, just
       // return whatever we want.
-      ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch);
+      ReturnInst::Create(Context, 
+                         Constant::getNullValue(OldFnRetTy), TheSwitch);
     }
 
     TheSwitch->eraseFromParent();
@@ -644,12 +696,14 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   Function *oldFunction = header->getParent();
 
   // This takes place of the original loop
-  BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction,
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+                                                "codeRepl", oldFunction,
                                                 header);
 
   // The new function needs a root node because other nodes can branch to the
   // head of the region, but the entry node of a function cannot have preds.
-  BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot");
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+                                               "newFuncRoot");
   newFuncRoot->getInstList().push_back(BranchInst::Create(header));
 
   // Find inputs to, outputs from the code region.
@@ -702,7 +756,8 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   //  cerr << "OLD FUNCTION: " << *oldFunction;
   //  verifyFunction(*oldFunction);
 
-  DEBUG(if (verifyFunction(*newFunction)) abort());
+  DEBUG(if (verifyFunction(*newFunction)) 
+        llvm_report_error("verifyFunction failed!"));
   return newFunction;
 }
 
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index b8dd75413342..c908b4a55914 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -39,7 +39,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint);
+    Slot = new AllocaInst(I.getType(), 0,
+                          I.getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = I.getParent()->getParent();
     Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
@@ -116,7 +117,8 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint);
+    Slot = new AllocaInst(P->getType(), 0,
+                          P->getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = P->getParent()->getParent();
     Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 4989c00ceb81..0d00d69c8cb9 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
@@ -28,13 +29,73 @@
 #include "llvm/Support/CallSite.h"
 using namespace llvm;
 
-bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) {
-  return InlineFunction(CallSite(CI), CG, TD);
+bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+  return InlineFunction(CallSite(CI), CG, TD, StaticAllocas);
 }
-bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
-  return InlineFunction(CallSite(II), CG, TD);
+bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+  return InlineFunction(CallSite(II), CG, TD, StaticAllocas);
 }
 
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+                                                   BasicBlock *InvokeDest,
+                           const SmallVectorImpl<Value*> &InvokeDestPHIValues) {
+  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    Instruction *I = BBI++;
+    
+    // We only need to check for function calls: inlined invoke
+    // instructions require no special handling.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    if (CI == 0) continue;
+    
+    // If this call cannot unwind, don't convert it to an invoke.
+    if (CI->doesNotThrow())
+      continue;
+    
+    // Convert this function call into an invoke instruction.
+    // First, split the basic block.
+    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+    
+    // Next, create the new invoke instruction, inserting it at the end
+    // of the old basic block.
+    SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+    InvokeInst *II =
+      InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
+                         InvokeArgs.begin(), InvokeArgs.end(),
+                         CI->getName(), BB->getTerminator());
+    II->setCallingConv(CI->getCallingConv());
+    II->setAttributes(CI->getAttributes());
+    
+    // Make sure that anything using the call now uses the invoke!  This also
+    // updates the CallGraph if present.
+    CI->replaceAllUsesWith(II);
+    
+    // Delete the unconditional branch inserted by splitBasicBlock
+    BB->getInstList().pop_back();
+    Split->getInstList().pop_front();  // Delete the original call
+    
+    // Update any PHI nodes in the exceptional block to indicate that
+    // there is now a new entry in them.
+    unsigned i = 0;
+    for (BasicBlock::iterator I = InvokeDest->begin();
+         isa<PHINode>(I); ++I, ++i)
+      cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB);
+    
+    // This basic block is now complete, the caller will continue scanning the
+    // next one.
+    return;
+  }
+}
+  
+
 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
 /// in the body of the inlined function into invokes and turn unwind
 /// instructions into branches to the invoke unwind dest.
@@ -43,10 +104,9 @@ bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
 /// block of the inlined code (the last block is the end of the function),
 /// and InlineCodeInfo is information about the code that got inlined.
 static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
-                                ClonedCodeInfo &InlinedCodeInfo,
-                                CallGraph *CG) {
+                                ClonedCodeInfo &InlinedCodeInfo) {
   BasicBlock *InvokeDest = II->getUnwindDest();
-  std::vector<Value*> InvokeDestPHIValues;
+  SmallVector<Value*, 8> InvokeDestPHIValues;
 
   // If there are PHI nodes in the unwind destination block, we need to
   // keep track of which values came into them from this invoke, then remove
@@ -62,92 +122,39 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.
-  if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) {
-    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
-         BB != E; ++BB) {
-      if (InlinedCodeInfo.ContainsCalls) {
-        for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){
-          Instruction *I = BBI++;
-
-          // We only need to check for function calls: inlined invoke
-          // instructions require no special handling.
-          if (!isa<CallInst>(I)) continue;
-          CallInst *CI = cast<CallInst>(I);
-
-          // If this call cannot unwind, don't convert it to an invoke.
-          if (CI->doesNotThrow())
-            continue;
-
-          // Convert this function call into an invoke instruction.
-          // First, split the basic block.
-          BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
-
-          // Next, create the new invoke instruction, inserting it at the end
-          // of the old basic block.
-          SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
-          InvokeInst *II =
-            InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
-                               InvokeArgs.begin(), InvokeArgs.end(),
-                               CI->getName(), BB->getTerminator());
-          II->setCallingConv(CI->getCallingConv());
-          II->setAttributes(CI->getAttributes());
-
-          // Make sure that anything using the call now uses the invoke!
-          CI->replaceAllUsesWith(II);
-
-          // Update the callgraph.
-          if (CG) {
-            // We should be able to do this:
-            //   (*CG)[Caller]->replaceCallSite(CI, II);
-            // but that fails if the old call site isn't in the call graph,
-            // which, because of LLVM bug 3601, it sometimes isn't.
-            CallGraphNode *CGN = (*CG)[Caller];
-            for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end();
-                 NI != NE; ++NI) {
-              if (NI->first == CI) {
-                NI->first = II;
-                break;
-              }
-            }
-          }
-
-          // Delete the unconditional branch inserted by splitBasicBlock
-          BB->getInstList().pop_back();
-          Split->getInstList().pop_front();  // Delete the original call
-
-          // Update any PHI nodes in the exceptional block to indicate that
-          // there is now a new entry in them.
-          unsigned i = 0;
-          for (BasicBlock::iterator I = InvokeDest->begin();
-               isa<PHINode>(I); ++I, ++i) {
-            PHINode *PN = cast<PHINode>(I);
-            PN->addIncoming(InvokeDestPHIValues[i], BB);
-          }
-
-          // This basic block is now complete, start scanning the next one.
-          break;
-        }
-      }
-
-      if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-        // An UnwindInst requires special handling when it gets inlined into an
-        // invoke site.  Once this happens, we know that the unwind would cause
-        // a control transfer to the invoke exception destination, so we can
-        // transform it into a direct branch to the exception destination.
-        BranchInst::Create(InvokeDest, UI);
-
-        // Delete the unwind instruction!
-        UI->eraseFromParent();
-
-        // Update any PHI nodes in the exceptional block to indicate that
-        // there is now a new entry in them.
-        unsigned i = 0;
-        for (BasicBlock::iterator I = InvokeDest->begin();
-             isa<PHINode>(I); ++I, ++i) {
-          PHINode *PN = cast<PHINode>(I);
-          PN->addIncoming(InvokeDestPHIValues[i], BB);
-        }
+  // rewrite.  If the code doesn't have calls or unwinds, we know there is
+  // nothing to rewrite.
+  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+    // Now that everything is happy, we have one final detail.  The PHI nodes in
+    // the exception destination block still have entries due to the original
+    // invoke instruction.  Eliminate these entries (which might even delete the
+    // PHI node) now.
+    InvokeDest->removePredecessor(II->getParent());
+    return;
+  }
+  
+  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+    if (InlinedCodeInfo.ContainsCalls)
+      HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest,
+                                             InvokeDestPHIValues);
+
+    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // An UnwindInst requires special handling when it gets inlined into an
+      // invoke site.  Once this happens, we know that the unwind would cause
+      // a control transfer to the invoke exception destination, so we can
+      // transform it into a direct branch to the exception destination.
+      BranchInst::Create(InvokeDest, UI);
+
+      // Delete the unwind instruction!
+      UI->eraseFromParent();
+
+      // Update any PHI nodes in the exceptional block to indicate that
+      // there is now a new entry in them.
+      unsigned i = 0;
+      for (BasicBlock::iterator I = InvokeDest->begin();
+           isa<PHINode>(I); ++I, ++i) {
+        PHINode *PN = cast<PHINode>(I);
+        PN->addIncoming(InvokeDestPHIValues[i], BB);
       }
     }
   }
@@ -185,17 +192,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   }
 
   for (; I != E; ++I) {
-    const Instruction *OrigCall = I->first.getInstruction();
+    const Value *OrigCall = I->first;
 
     DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
-    if (VMI != ValueMap.end() && VMI->second) {
-      // If the call was inlined, but then constant folded, there is no edge to
-      // add.  Check for this case.
-      if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
-        CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
-    }
+    if (VMI == ValueMap.end() || VMI->second == 0)
+      continue;
+    
+    // If the call was inlined, but then constant folded, there is no edge to
+    // add.  Check for this case.
+    if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
+      CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
   }
+  
   // Update the call graph by deleting the edge from Callee to Caller.  We must
   // do this after the loop above in case Caller and Callee are the same.
   CallerNode->removeCallEdgeFor(CS);
@@ -204,25 +213,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 /// findFnRegionEndMarker - This is a utility routine that is used by
 /// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds
 /// to the llvm.dbg.func.start of the function F. Otherwise return NULL.
+///
 static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
 
-  GlobalVariable *FnStart = NULL;
+  MDNode *FnStart = NULL;
   const DbgRegionEndInst *FnEnd = NULL;
   for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) 
     for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE;
          ++BI) {
       if (FnStart == NULL)  {
         if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) {
-          DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
+          DISubprogram SP(FSI->getSubprogram());
           assert (SP.isNull() == false && "Invalid llvm.dbg.func.start");
           if (SP.describes(F))
-            FnStart = SP.getGV();
+            FnStart = SP.getNode();
         }
-      } else {
-        if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
-          if (REI->getContext() == FnStart)
-            FnEnd = REI;
+        continue;
       }
+      
+      if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
+        if (REI->getContext() == FnStart)
+          FnEnd = REI;
     }
   return FnEnd;
 }
@@ -236,8 +247,10 @@ static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
 // exists in the instruction stream.  Similiarly this will inline a recursive
 // function by one level.
 //
-bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
+bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
   Instruction *TheCall = CS.getInstruction();
+  LLVMContext &Context = TheCall->getContext();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
 
@@ -277,7 +290,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
 
   // Make sure to capture all of the return instructions from the cloned
   // function.
-  std::vector<ReturnInst*> Returns;
+  SmallVector<ReturnInst*, 8> Returns;
   ClonedCodeInfo InlinedFunctionInfo;
   Function::iterator FirstNewBlock;
 
@@ -302,15 +315,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
       if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
           !CalledFunc->onlyReadsMemory()) {
         const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
-        const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+        const Type *VoidPtrTy = 
+            Type::getInt8PtrTy(Context);
 
         // Create the alloca.  If we have TargetData, use nice alignment.
         unsigned Align = 1;
         if (TD) Align = TD->getPrefTypeAlignment(AggTy);
-        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(),
-                                          Caller->begin()->begin());
+        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
+                                          I->getName(), 
+                                          &*Caller->begin()->begin());
         // Emit a memcpy.
-        const Type *Tys[] = { Type::Int64Ty };
+        const Type *Tys[] = { Type::getInt64Ty(Context) };
         Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                        Intrinsic::memcpy, 
                                                        Tys, 1);
@@ -321,13 +336,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
         if (TD == 0)
           Size = ConstantExpr::getSizeOf(AggTy);
         else
-          Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy));
+          Size = ConstantInt::get(Type::getInt64Ty(Context),
+                                         TD->getTypeStoreSize(AggTy));
 
         // Always generate a memcpy of alignment 1 here because we don't know
         // the alignment of the src pointer.  Other optimizations can infer
         // better alignment.
         Value *CallArgs[] = {
-          DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1)
+          DestCast, SrcCast, Size,
+          ConstantInt::get(Type::getInt32Ty(Context), 1)
         };
         CallInst *TheMemCpy =
           CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);
@@ -352,13 +369,12 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
     // call site. The function body cloner does not clone original
     // region end marker from the CalledFunc. This will ensure that
     // inlined function's scope ends at the right place. 
-    const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc);
-    if (DREI) {
-      for (BasicBlock::iterator BI = TheCall, 
-             BE = TheCall->getParent()->end(); BI != BE; ++BI) {
+    if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) {
+      for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end();
+           BI != BE; ++BI) {
         if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) {
           if (DbgRegionEndInst *NewDREI = 
-              dyn_cast<DbgRegionEndInst>(DREI->clone()))
+                dyn_cast<DbgRegionEndInst>(DREI->clone()))
             NewDREI->insertAfter(DSPI);
           break;
         }
@@ -388,31 +404,39 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
   {
     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
     for (BasicBlock::iterator I = FirstNewBlock->begin(),
-           E = FirstNewBlock->end(); I != E; )
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
-        // If the alloca is now dead, remove it.  This often occurs due to code
-        // specialization.
-        if (AI->use_empty()) {
-          AI->eraseFromParent();
-          continue;
-        }
+         E = FirstNewBlock->end(); I != E; ) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+      if (AI == 0) continue;
+      
+      // If the alloca is now dead, remove it.  This often occurs due to code
+      // specialization.
+      if (AI->use_empty()) {
+        AI->eraseFromParent();
+        continue;
+      }
 
-        if (isa<Constant>(AI->getArraySize())) {
-          // Scan for the block of allocas that we can move over, and move them
-          // all at once.
-          while (isa<AllocaInst>(I) &&
-                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
-            ++I;
-
-          // Transfer all of the allocas over in a block.  Using splice means
-          // that the instructions aren't removed from the symbol table, then
-          // reinserted.
-          Caller->getEntryBlock().getInstList().splice(
-              InsertPoint,
-              FirstNewBlock->getInstList(),
-              AI, I);
-        }
+      if (!isa<Constant>(AI->getArraySize()))
+        continue;
+      
+      // Keep track of the static allocas that we inline into the caller if the
+      // StaticAllocas pointer is non-null.
+      if (StaticAllocas) StaticAllocas->push_back(AI);
+      
+      // Scan for the block of allocas that we can move over, and move them
+      // all at once.
+      while (isa<AllocaInst>(I) &&
+             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+        if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I));
+        ++I;
       }
+
+      // Transfer all of the allocas over in a block.  Using splice means
+      // that the instructions aren't removed from the symbol table, then
+      // reinserted.
+      Caller->getEntryBlock().getInstList().splice(InsertPoint,
+                                                   FirstNewBlock->getInstList(),
+                                                   AI, I);
+    }
   }
 
   // If the inlined code contained dynamic alloca instructions, wrap the inlined
@@ -486,7 +510,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
          BB != E; ++BB) {
       TerminatorInst *Term = BB->getTerminator();
       if (isa<UnwindInst>(Term)) {
-        new UnreachableInst(Term);
+        new UnreachableInst(Context, Term);
         BB->getInstList().erase(Term);
       }
     }
@@ -495,7 +519,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
   // any inlined 'unwind' instructions into branches to the invoke exception
   // destination, and call instructions into invoke instructions.
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG);
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
 
   // If we cloned in _exactly one_ basic block, and if that block ends in a
   // return instruction, we splice the body of the inlined callee directly into
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 4f8a1603948a..1fa51a3b6a71 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -32,7 +32,7 @@ namespace {
     bool runOnFunction(Function &F) {
       for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
            AI != AE; ++AI)
-        if (!AI->hasName() && AI->getType() != Type::VoidTy)
+        if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))
           AI->setName("tmp");
 
       for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -40,7 +40,7 @@ namespace {
           BB->setName("BB");
         
         for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-          if (!I->hasName() && I->getType() != Type::VoidTy)
+          if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))
             I->setName("tmp");
       }
       return true;
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index d5e7303a5070..56e662e9dac1 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -33,22 +33,19 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
-#include <algorithm>
-#include <map>
 using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
 
 namespace {
-  struct VISIBILITY_HIDDEN LCSSA : public LoopPass {
+  struct LCSSA : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     LCSSA() : LoopPass(&ID) {}
 
@@ -57,12 +54,10 @@ namespace {
     DominatorTree *DT;
     std::vector<BasicBlock*> LoopBlocks;
     PredIteratorCache PredCache;
+    Loop *L;
     
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
-    void ProcessInstruction(Instruction* Instr,
-                            const SmallVector<BasicBlock*, 8>& exitBlocks);
-    
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.  It maintains both of these,
     /// as well as the CFG.  It also requires dominator information.
@@ -71,9 +66,9 @@ namespace {
       AU.setPreservesCFG();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
+      AU.addRequiredTransitive<LoopInfo>();
       AU.addPreserved<LoopInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequiredTransitive<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominatorTree>();
 
@@ -85,15 +80,17 @@ namespace {
       AU.addPreserved<DominanceFrontier>();
     }
   private:
-    void getLoopValuesUsedOutsideLoop(Loop *L,
-                                      SetVector<Instruction*> &AffectedValues,
-                                 const SmallVector<BasicBlock*, 8>& exitBlocks);
-
-    Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
-                            DenseMap<DomTreeNode*, Value*> &Phis);
+    bool ProcessInstruction(Instruction *Inst,
+                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+    
+    /// verifyAnalysis() - Verify loop nest.
+    virtual void verifyAnalysis() const {
+      // Check the special guarantees that LCSSA makes.
+      assert(L->isLCSSAForm() && "LCSSA form not preserved!");
+    }
 
     /// inLoop - returns true if the given block is within the current loop
-    bool inLoop(BasicBlock* B) {
+    bool inLoop(BasicBlock *B) const {
       return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
     }
   };
@@ -105,181 +102,163 @@ static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
 const PassInfo *const llvm::LCSSAID = &X;
 
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                                 DominatorTree *DT) {
+  DomTreeNode *DomNode = DT->getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+
 /// runOnFunction - Process all loops in the function, inner-most out.
-bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) {
-  PredCache.clear();
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+  L = TheLoop;
   
   LI = &LPM.getAnalysis<LoopInfo>();
   DT = &getAnalysis<DominatorTree>();
 
-  // Speed up queries by creating a sorted list of blocks
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  
+  if (ExitBlocks.empty())
+    return false;
+  
+  // Speed up queries by creating a sorted vector of blocks.
   LoopBlocks.clear();
   LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
-  std::sort(LoopBlocks.begin(), LoopBlocks.end());
+  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
   
-  SmallVector<BasicBlock*, 8> exitBlocks;
-  L->getExitBlocks(exitBlocks);
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  bool MadeChange = false;
   
-  SetVector<Instruction*> AffectedValues;
-  getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks);
+  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+       BBI != E; ++BBI) {
+    BasicBlock *BB = *BBI;
+    
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->use_back()->getParent() == BB &&
+           !isa<PHINode>(I->use_back())))
+        continue;
+      
+      MadeChange |= ProcessInstruction(I, ExitBlocks);
+    }
+  }
   
-  // If no values are affected, we can save a lot of work, since we know that
-  // nothing will be changed.
-  if (AffectedValues.empty())
-    return false;
+  assert(L->isLCSSAForm());
+  PredCache.clear();
+
+  return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (ExitBlocks[i] == BB)
+      return true;
+  return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  SmallVector<Use*, 16> UsesToRewrite;
   
-  // Iterate over all affected values for this loop and insert Phi nodes
-  // for them in the appropriate exit blocks
+  BasicBlock *InstBB = Inst->getParent();
   
-  for (SetVector<Instruction*>::iterator I = AffectedValues.begin(),
-       E = AffectedValues.end(); I != E; ++I)
-    ProcessInstruction(*I, exitBlocks);
+  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+       UI != E; ++UI) {
+    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(*UI))
+      UserBB = PN->getIncomingBlock(UI);
+    
+    if (InstBB != UserBB && !inLoop(UserBB))
+      UsesToRewrite.push_back(&UI.getUse());
+  }
   
-  assert(L->isLCSSAForm());
+  // If there are no uses outside the loop, exit with no change.
+  if (UsesToRewrite.empty()) return false;
   
-  return true;
-}
-
-/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes,
-/// eliminate all out-of-loop uses.
-void LCSSA::ProcessInstruction(Instruction *Instr,
-                               const SmallVector<BasicBlock*, 8>& exitBlocks) {
   ++NumLCSSA; // We are applying the transformation
 
-  // Keep track of the blocks that have the value available already.
-  DenseMap<DomTreeNode*, Value*> Phis;
-
-  BasicBlock *DomBB = Instr->getParent();
-
   // Invoke instructions are special in that their result value is not available
   // along their unwind edge. The code below tests to see whether DomBB dominates
   // the value, so adjust DomBB to the normal destination block, which is
   // effectively where the value is first usable.
-  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr))
+  BasicBlock *DomBB = Inst->getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
     DomBB = Inv->getNormalDest();
 
   DomTreeNode *DomNode = DT->getNode(DomBB);
 
-  // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
-  // add them to the Phi's map.
-  for (SmallVector<BasicBlock*, 8>::const_iterator BBI = exitBlocks.begin(),
-      BBE = exitBlocks.end(); BBI != BBE; ++BBI) {
-    BasicBlock *BB = *BBI;
-    DomTreeNode *ExitBBNode = DT->getNode(BB);
-    Value *&Phi = Phis[ExitBBNode];
-    if (!Phi && DT->dominates(DomNode, ExitBBNode)) {
-      PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa",
-                                    BB->begin());
-      PN->reserveOperandSpace(PredCache.GetNumPreds(BB));
-
-      // Remember that this phi makes the value alive in this block.
-      Phi = PN;
-
-      // Add inputs from inside the loop for this PHI.
-      for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI)
-        PN->addIncoming(Instr, *PI);
-    }
-  }
+  SSAUpdater SSAUpdate;
+  SSAUpdate.Initialize(Inst);
   
-  
-  // Record all uses of Instr outside the loop.  We need to rewrite these.  The
-  // LCSSA phis won't be included because they use the value in the loop.
-  for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end();
-       UI != E;) {
-    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-    if (PHINode *P = dyn_cast<PHINode>(*UI)) {
-      UserBB = P->getIncomingBlock(UI);
-    }
+  // Insert the LCSSA phi's into all of the exit blocks dominated by the
+  // value., and add them to the Phi's map.
+  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *ExitBB = *BBI;
+    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
     
-    // If the user is in the loop, don't rewrite it!
-    if (UserBB == Instr->getParent() || inLoop(UserBB)) {
-      ++UI;
-      continue;
-    }
+    // If we already inserted something for this BB, don't reprocess it.
+    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
     
-    // Otherwise, patch up uses of the value with the appropriate LCSSA Phi,
-    // inserting PHI nodes into join points where needed.
-    Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis);
-    
-    // Preincrement the iterator to avoid invalidating it when we change the
-    // value.
-    Use &U = UI.getUse();
-    ++UI;
-    U.set(Val);
-  }
-}
+    PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+                                  ExitBB->begin());
+    PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
 
-/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that
-/// are used by instructions outside of it.
-void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L,
-                                      SetVector<Instruction*> &AffectedValues,
-                                const SmallVector<BasicBlock*, 8>& exitBlocks) {
-  // FIXME: For large loops, we may be able to avoid a lot of use-scanning
-  // by using dominance information.  In particular, if a block does not
-  // dominate any of the loop exits, then none of the values defined in the
-  // block could be used outside the loop.
-  for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
-       BB != BE; ++BB) {
-    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I)
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
-           ++UI) {
-        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-        if (PHINode* p = dyn_cast<PHINode>(*UI)) {
-          UserBB = p->getIncomingBlock(UI);
-        }
-        
-        if (*BB != UserBB && !inLoop(UserBB)) {
-          AffectedValues.insert(I);
-          break;
-        }
-      }
+    // Add inputs from inside the loop for this PHI.
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+      PN->addIncoming(Inst, *PI);
+    
+    // Remember that this phi makes the value alive in this block.
+    SSAUpdate.AddAvailableValue(ExitBB, PN);
   }
-}
-
-/// GetValueForBlock - Get the value to use within the specified basic block.
-/// available values are in Phis.
-Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
-                               DenseMap<DomTreeNode*, Value*> &Phis) {
-  // If there is no dominator info for this BB, it is unreachable.
-  if (BB == 0)
-    return UndefValue::get(OrigInst->getType());
-                                 
-  // If we have already computed this value, return the previously computed val.
-  if (Phis.count(BB)) return Phis[BB];
-
-  DomTreeNode *IDom = BB->getIDom();
+  
+  // Rewrite all uses outside the loop in terms of the new PHIs we just
+  // inserted.
+  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+    // If this use is in an exit block, rewrite to use the newly inserted PHI.
+    // This is required for correctness because SSAUpdate doesn't handle uses in
+    // the same block.  It assumes the PHI we inserted is at the end of the
+    // block.
+    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
 
-  // Otherwise, there are two cases: we either have to insert a PHI node or we
-  // don't.  We need to insert a PHI node if this block is not dominated by one
-  // of the exit nodes from the loop (the loop could have multiple exits, and
-  // though the value defined *inside* the loop dominated all its uses, each
-  // exit by itself may not dominate all the uses).
-  //
-  // The simplest way to check for this condition is by checking to see if the
-  // idom is in the loop.  If so, we *know* that none of the exit blocks
-  // dominate this block.  Note that we *know* that the block defining the
-  // original instruction is in the idom chain, because if it weren't, then the
-  // original value didn't dominate this use.
-  if (!inLoop(IDom->getBlock())) {
-    // Idom is not in the loop, we must still be "below" the exit block and must
-    // be fully dominated by the value live in the idom.
-    Value* val = GetValueForBlock(IDom, OrigInst, Phis);
-    Phis.insert(std::make_pair(BB, val));
-    return val;
+    if (isa<PHINode>(UserBB->begin()) &&
+        isExitBlock(UserBB, ExitBlocks)) {
+      UsesToRewrite[i]->set(UserBB->begin());
+      continue;
+    }
+    
+    // Otherwise, do full PHI insertion.
+    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
   }
   
-  BasicBlock *BBN = BB->getBlock();
-  
-  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
-  // now, then get values to fill in the incoming values for the PHI.
-  PHINode *PN = PHINode::Create(OrigInst->getType(),
-                                OrigInst->getName() + ".lcssa", BBN->begin());
-  PN->reserveOperandSpace(PredCache.GetNumPreds(BBN));
-  Phis.insert(std::make_pair(BB, PN));
-                                 
-  // Fill in the incoming values for the block.
-  for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI)
-    PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI);
-  return PN;
+  return true;
 }
 
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8c08638c4c3d..b62261119c75 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -20,9 +20,11 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
@@ -183,8 +185,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
     } else if (SI->getNumSuccessors() == 2) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
-      Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(),
-                                 SI->getSuccessorValue(1), "cond", SI);
+      Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(),
+                                 SI->getSuccessorValue(1), "cond");
       // Insert the new branch...
       BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
 
@@ -262,7 +264,6 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
 /// too, recursively.
 void
 llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
-
   // We can remove a PHI if it is on a cycle in the def-use graph
   // where each node in the cycle has degree one, i.e. only one use,
   // and is an instruction with no side effects.
@@ -294,7 +295,7 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
 /// between them, moving the instructions in the predecessor into DestBB and
 /// deleting the predecessor block.
 ///
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // If BB has single-entry PHI nodes, fold them.
   while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
     Value *NewVal = PN->getIncomingValue(0);
@@ -314,6 +315,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
   
+  if (P) {
+    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PI) {
+      PI->replaceAllUses(PredBB, DestBB);
+      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+    }
+  }
   // Nuke BB.
   PredBB->eraseFromParent();
 }
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index d6b167f8b848..c22708a92b7a 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,10 +37,12 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
@@ -55,44 +57,42 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
 STATISTIC(NumNested  , "Number of nested loops split out");
 
 namespace {
-  struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass {
+  struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LoopSimplify() : FunctionPass(&ID) {}
+    LoopSimplify() : LoopPass(&ID) {}
 
     // AA - If we have an alias analysis object to update, this is it, otherwise
     // this is null.
     AliasAnalysis *AA;
     LoopInfo *LI;
     DominatorTree *DT;
-    virtual bool runOnFunction(Function &F);
+    Loop *L;
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       // We need loop information to identify the loops...
-      AU.addRequired<LoopInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequiredTransitive<LoopInfo>();
+      AU.addRequiredTransitive<DominatorTree>();
 
       AU.addPreserved<LoopInfo>();
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
     }
 
     /// verifyAnalysis() - Verify loop nest.
     void verifyAnalysis() const {
-#ifndef NDEBUG
-      LoopInfo *NLI = &getAnalysis<LoopInfo>();
-      for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I) 
-        (*I)->verifyLoop();
-#endif  
+      assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");
     }
 
   private:
-    bool ProcessLoop(Loop *L);
+    bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
-    void InsertPreheaderForLoop(Loop *L);
-    Loop *SeparateNestedLoop(Loop *L);
-    void InsertUniqueBackedgeBlock(Loop *L);
+    BasicBlock *InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
     void PlaceSplitBlockCarefully(BasicBlock *NewBB,
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
                                   Loop *L);
@@ -105,73 +105,19 @@ X("loopsimplify", "Canonicalize natural loops", true);
 
 // Publically exposed interface to pass...
 const PassInfo *const llvm::LoopSimplifyID = &X;
-FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 
 /// runOnFunction - Run down all loops in the CFG (recursively, but we could do
 /// it in any convenient order) inserting preheaders...
 ///
-bool LoopSimplify::runOnFunction(Function &F) {
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+  L = l;
   bool Changed = false;
   LI = &getAnalysis<LoopInfo>();
   AA = getAnalysisIfAvailable<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
 
-  // Check to see that no blocks (other than the header) in loops have
-  // predecessors that are not in loops.  This is not valid for natural loops,
-  // but can occur if the blocks are unreachable.  Since they are unreachable we
-  // can just shamelessly destroy their terminators to make them not branch into
-  // the loop!
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    // This case can only occur for unreachable blocks.  Blocks that are
-    // unreachable can't be in loops, so filter those blocks out.
-    if (LI->getLoopFor(BB)) continue;
-    
-    bool BlockUnreachable = false;
-    TerminatorInst *TI = BB->getTerminator();
-
-    // Check to see if any successors of this block are non-loop-header loops
-    // that are not the header.
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-      // If this successor is not in a loop, BB is clearly ok.
-      Loop *L = LI->getLoopFor(TI->getSuccessor(i));
-      if (!L) continue;
-      
-      // If the succ is the loop header, and if L is a top-level loop, then this
-      // is an entrance into a loop through the header, which is also ok.
-      if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0)
-        continue;
-      
-      // Otherwise, this is an entrance into a loop from some place invalid.
-      // Either the loop structure is invalid and this is not a natural loop (in
-      // which case the compiler is buggy somewhere else) or BB is unreachable.
-      BlockUnreachable = true;
-      break;
-    }
-    
-    // If this block is ok, check the next one.
-    if (!BlockUnreachable) continue;
-    
-    // Otherwise, this block is dead.  To clean up the CFG and to allow later
-    // loop transformations to ignore this case, we delete the edges into the
-    // loop by replacing the terminator.
-    
-    // Remove PHI entries from the successors.
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      TI->getSuccessor(i)->removePredecessor(BB);
-   
-    // Add a new unreachable instruction before the old terminator.
-    new UnreachableInst(TI);
-    
-    // Delete the dead terminator.
-    if (AA) AA->deleteValue(TI);
-    if (!TI->use_empty())
-      TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
-    TI->eraseFromParent();
-    Changed |= true;
-  }
-  
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= ProcessLoop(*I);
+  Changed |= ProcessLoop(L, LPM);
 
   return Changed;
 }
@@ -179,21 +125,42 @@ bool LoopSimplify::runOnFunction(Function &F) {
 /// ProcessLoop - Walk the loop structure in depth first order, ensuring that
 /// all loops have preheaders.
 ///
-bool LoopSimplify::ProcessLoop(Loop *L) {
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
   bool Changed = false;
 ReprocessLoop:
-  
-  // Canonicalize inner loops before outer loops.  Inner loop canonicalization
-  // can provide work for the outer loop to canonicalize.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= ProcessLoop(*I);
-  
-  assert(L->getBlocks()[0] == L->getHeader() &&
-         "Header isn't first block in loop?");
+
+  // Check to see that no blocks (other than the header) in this loop that has
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock *, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI)
+      if (!L->contains(*PI))
+        BadPreds.insert(*PI);
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
 
   // Does the loop already have a preheader?  If so, don't insert one.
-  if (L->getLoopPreheader() == 0) {
-    InsertPreheaderForLoop(L);
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L);
     NumInserted++;
     Changed = true;
   }
@@ -229,10 +196,9 @@ ReprocessLoop:
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
     if (NumBackedges < 8) {
-      if (Loop *NL = SeparateNestedLoop(L)) {
+      if (SeparateNestedLoop(L, LPM)) {
         ++NumNested;
         // This is a big restructuring change, reprocess the whole loop.
-        ProcessLoop(NL);
         Changed = true;
         // GCC doesn't tail recursion eliminate this.
         goto ReprocessLoop;
@@ -242,7 +208,7 @@ ReprocessLoop:
     // If we either couldn't, or didn't want to, identify nesting of the loops,
     // insert a new block that all backedges target, then make it jump to the
     // loop header.
-    InsertUniqueBackedgeBlock(L);
+    InsertUniqueBackedgeBlock(L, Preheader);
     NumInserted++;
     Changed = true;
   }
@@ -253,7 +219,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = PN->hasConstantValue()) {
+    if (Value *V = PN->hasConstantValue(DT)) {
       if (AA) AA->deleteValue(PN);
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
@@ -286,19 +252,10 @@ ReprocessLoop:
         Instruction *Inst = I++;
         if (Inst == CI)
           continue;
-        if (Inst->isTrapping()) {
+        if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {
           AllInvariant = false;
           break;
         }
-        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
-          if (!L->isLoopInvariant(Inst->getOperand(j))) {
-            AllInvariant = false;
-            break;
-          }
-        if (!AllInvariant)
-          break;
-        // Hoist.
-        Inst->moveBefore(L->getLoopPreheader()->getTerminator());
       }
       if (!AllInvariant) continue;
 
@@ -317,9 +274,10 @@ ReprocessLoop:
       DomTreeNode *Node = DT->getNode(ExitingBlock);
       const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
         Node->getChildren();
-      for (unsigned k = 0, g = Children.size(); k != g; ++k) {
-        DT->changeImmediateDominator(Children[k], Node->getIDom());
-        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+        if (DF) DF->changeImmediateDominator(Child->getBlock(),
                                              Node->getIDom()->getBlock(),
                                              DT);
       }
@@ -339,7 +297,7 @@ ReprocessLoop:
 /// preheader, this method is called to insert one.  This method has two phases:
 /// preheader insertion and analysis updating.
 ///
-void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *Header = L->getHeader();
 
   // Compute the set of predecessors of the loop that are not in the loop.
@@ -353,19 +311,12 @@ void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *NewBB =
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
-  
-
-  //===--------------------------------------------------------------------===//
-  //  Update analysis results now that we have performed the transformation
-  //
-
-  // We know that we have loop information to update... update it now.
-  if (Loop *Parent = L->getParentLoop())
-    Parent->addBasicBlockToLoop(NewBB, LI->getBase());
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
   PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+  return NewBB;
 }
 
 /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -382,17 +333,6 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                              LoopBlocks.size(), ".loopexit",
                                              this);
 
-  // Update Loop Information - we know that the new block will be in whichever
-  // loop the Exit block is in.  Note that it may not be in that immediate loop,
-  // if the successor is some other loop header.  In that case, we continue 
-  // walking up the loop tree to find a loop that contains both the successor
-  // block and the predecessor block.
-  Loop *SuccLoop = LI->getLoopFor(Exit);
-  while (SuccLoop && !SuccLoop->contains(L->getHeader()))
-    SuccLoop = SuccLoop->getParentLoop();
-  if (SuccLoop)
-    SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase());
-
   return NewBB;
 }
 
@@ -422,14 +362,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = PN->hasConstantValue())
-      if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) {
-        // This is a degenerate PHI already, don't modify it!
-        PN->replaceAllUsesWith(V);
-        if (AA) AA->deleteValue(PN);
-        PN->eraseFromParent();
-        continue;
-      }
+    if (Value *V = PN->hasConstantValue(DT)) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      if (AA) AA->deleteValue(PN);
+      PN->eraseFromParent();
+      continue;
+    }
 
     // Scan this PHI node looking for a use of the PHI node by itself.
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -496,7 +435,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// If we are able to separate out a loop, return the new outer loop that was
 /// created.
 ///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
   PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
   if (PN == 0) return 0;  // No known way to partition.
 
@@ -527,17 +466,20 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
   else
     LI->changeTopLevelLoop(L, NewOuter);
 
-  // This block is going to be our new header block: add it to this loop and all
-  // parent loops.
-  NewOuter->addBasicBlockToLoop(NewBB, LI->getBase());
-
   // L is now a subloop of our outer loop.
   NewOuter->addChildLoop(L);
 
+  // Add the new loop to the pass manager queue.
+  LPM.insertLoopIntoQueue(NewOuter);
+
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     NewOuter->addBlockEntry(*I);
 
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
   // Determine which blocks should stay in L and which should be moved out to
   // the Outer loop now.
   std::set<BasicBlock*> BlocksInL;
@@ -578,11 +520,10 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
 /// backedges to target a new basic block and have that block branch to the loop
 /// header.  This ensures that loops have exactly one backedge.
 ///
-void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
+void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
   // Get information about the loop
-  BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *Header = L->getHeader();
   Function *F = Header->getParent();
 
@@ -592,7 +533,8 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
     if (*I != Preheader) BackedgeBlocks.push_back(*I);
 
   // Create and insert the new backedge block...
-  BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F);
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName()+".backedge", F);
   BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
 
   // Move the new backedge block to right after the last backedge block.
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 74e7028d127c..f26d7c146ee3 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -19,6 +19,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Target/TargetData.h"
@@ -28,17 +29,17 @@ using namespace llvm;
 STATISTIC(NumLowered, "Number of allocations lowered");
 
 namespace {
-  /// LowerAllocations - Turn malloc and free instructions into %malloc and
-  /// %free calls.
+  /// LowerAllocations - Turn malloc and free instructions into @malloc and
+  /// @free calls.
   ///
   class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
-    Constant *MallocFunc;   // Functions in the module we are processing
-    Constant *FreeFunc;     // Initialized by doInitialization
+    Constant *FreeFunc;   // Functions in the module we are processing
+                          // Initialized by doInitialization
     bool LowerMallocArgToInteger;
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LowerAllocations(bool LowerToInt = false)
-      : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0), 
+      : BasicBlockPass(&ID), FreeFunc(0), 
         LowerMallocArgToInteger(LowerToInt) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -86,12 +87,9 @@ Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
 // This function is always successful.
 //
 bool LowerAllocations::doInitialization(Module &M) {
-  const Type *BPTy = PointerType::getUnqual(Type::Int8Ty);
-  // Prototype malloc as "char* malloc(...)", because we don't know in
-  // doInitialization whether size_t is int or long.
-  FunctionType *FT = FunctionType::get(BPTy, true);
-  MallocFunc = M.getOrInsertFunction("malloc", FT);
-  FreeFunc = M.getOrInsertFunction("free"  , Type::VoidTy, BPTy, (Type *)0);
+  const Type *BPTy = Type::getInt8PtrTy(M.getContext());
+  FreeFunc = M.getOrInsertFunction("free"  , Type::getVoidTy(M.getContext()),
+                                   BPTy, (Type *)0);
   return true;
 }
 
@@ -100,57 +98,22 @@ bool LowerAllocations::doInitialization(Module &M) {
 //
 bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
   bool Changed = false;
-  assert(MallocFunc && FreeFunc && "Pass not initialized!");
+  assert(FreeFunc && "Pass not initialized!");
 
   BasicBlock::InstListType &BBIL = BB.getInstList();
 
   const TargetData &TD = getAnalysis<TargetData>();
-  const Type *IntPtrTy = TD.getIntPtrType();
+  const Type *IntPtrTy = TD.getIntPtrType(BB.getContext());
 
   // Loop over all of the instructions, looking for malloc or free instructions
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
     if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
-      const Type *AllocTy = MI->getType()->getElementType();
-
-      // malloc(type) becomes i8 *malloc(size)
-      Value *MallocArg;
-      if (LowerMallocArgToInteger)
-        MallocArg = ConstantInt::get(Type::Int64Ty,
-                                     TD.getTypeAllocSize(AllocTy));
-      else
-        MallocArg = ConstantExpr::getSizeOf(AllocTy);
-      MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg), 
-                                                  IntPtrTy);
-
-      if (MI->isArrayAllocation()) {
-        if (isa<ConstantInt>(MallocArg) &&
-            cast<ConstantInt>(MallocArg)->isOne()) {
-          MallocArg = MI->getOperand(0);         // Operand * 1 = Operand
-        } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) {
-          CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/);
-          MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg));
-        } else {
-          Value *Scale = MI->getOperand(0);
-          if (Scale->getType() != IntPtrTy)
-            Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/,
-                                                "", I);
-
-          // Multiply it by the array size if necessary...
-          MallocArg = BinaryOperator::Create(Instruction::Mul, Scale,
-                                             MallocArg, "", I);
-        }
-      }
-
-      // Create the call to Malloc.
-      CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I);
-      MCall->setTailCall();
-
-      // Create a cast instruction to convert to the right type...
-      Value *MCast;
-      if (MCall->getType() != Type::VoidTy)
-        MCast = new BitCastInst(MCall, MI->getType(), "", I);
-      else
-        MCast = Constant::getNullValue(MI->getType());
+      Value *ArraySize = MI->getOperand(0);
+      if (ArraySize->getType() != IntPtrTy)
+        ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy,
+                                                false /*ZExt*/, "", I);
+      Value *MCast = CallInst::CreateMalloc(I, IntPtrTy,
+                                            MI->getAllocatedType(), ArraySize);
 
       // Replace all uses of the old malloc inst with the cast inst
       MI->replaceAllUsesWith(MCast);
@@ -160,7 +123,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
     } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
       Value *PtrCast = 
         new BitCastInst(FI->getOperand(0),
-                        PointerType::getUnqual(Type::Int8Ty), "", I);
+               Type::getInt8PtrTy(BB.getContext()), "", I);
 
       // Insert a call to the free function...
       CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 1f6b1a2a6846..9a3de2649244 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -40,6 +40,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -114,7 +115,8 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
-  const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
   AbortMessage = 0;
   if (ExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
@@ -125,9 +127,9 @@ bool LowerInvoke::doInitialization(Module &M) {
     { // The type is recursive, so use a type holder.
       std::vector<const Type*> Elements;
       Elements.push_back(JmpBufTy);
-      OpaqueType *OT = OpaqueType::get();
+      OpaqueType *OT = OpaqueType::get(M.getContext());
       Elements.push_back(PointerType::getUnqual(OT));
-      PATypeHolder JBLType(StructType::get(Elements));
+      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
       OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
       JBLinkTy = JBLType.get();
       M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
@@ -138,10 +140,10 @@ bool LowerInvoke::doInitialization(Module &M) {
     // Now that we've done that, insert the jmpbuf list head global, unless it
     // already exists.
     if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
-      JBListHead = new GlobalVariable(PtrJBList, false,
+      JBListHead = new GlobalVariable(M, PtrJBList, false,
                                       GlobalValue::LinkOnceAnyLinkage,
                                       Constant::getNullValue(PtrJBList),
-                                      "llvm.sjljeh.jblist", &M);
+                                      "llvm.sjljeh.jblist");
     }
 
 // VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
@@ -163,7 +165,8 @@ bool LowerInvoke::doInitialization(Module &M) {
   }
 
   // We need the 'write' and 'abort' functions for both models.
-  AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0);
+  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+                                  (Type *)0);
 #if 0 // "write" is Unix-specific.. code is going away soon anyway.
   WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,
                                   VoidPtrTy, Type::Int32Ty, (Type *)0);
@@ -178,26 +181,30 @@ void LowerInvoke::createAbortMessage(Module *M) {
     // The abort message for expensive EH support tells the user that the
     // program 'unwound' without an 'invoke' instruction.
     Constant *Msg =
-      ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+      ConstantArray::get(M->getContext(),
+                         "ERROR: Exception thrown, but not caught!\n");
     AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
 
-    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
                                                GlobalValue::InternalLinkage,
-                                               Msg, "abortmsg", M);
-    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+                                               Msg, "abortmsg");
+    std::vector<Constant*> GEPIdx(2,
+                     Constant::getNullValue(Type::getInt32Ty(M->getContext())));
     AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
   } else {
     // The abort message for cheap EH support tells the user that EH is not
     // enabled.
     Constant *Msg =
-      ConstantArray::get("Exception handler needed, but not enabled.  Recompile"
-                         " program with -enable-correct-eh-support.\n");
+      ConstantArray::get(M->getContext(), 
+                        "Exception handler needed, but not enabled."      
+                        "Recompile program with -enable-correct-eh-support.\n");
     AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
 
-    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
                                                GlobalValue::InternalLinkage,
-                                               Msg, "abortmsg", M);
-    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+                                               Msg, "abortmsg");
+    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(
+                                            Type::getInt32Ty(M->getContext())));
     AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
   }
 }
@@ -249,8 +256,9 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 
       // Insert a return instruction.  This really should be a "barrier", as it
       // is unreachable.
-      ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 :
-                         Constant::getNullValue(F.getReturnType()), UI);
+      ReturnInst::Create(F.getContext(),
+                         F.getReturnType() == Type::getVoidTy(F.getContext()) ?
+                          0 : Constant::getNullValue(F.getReturnType()), UI);
 
       // Remove the unwind instruction now.
       BB->getInstList().erase(UI);
@@ -265,7 +273,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
                                          AllocaInst *InvokeNum,
                                          SwitchInst *CatchSwitch) {
-  ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo);
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
 
   // If the unwind edge has phi nodes, split the edge.
   if (isa<PHINode>(II->getUnwindDest()->begin())) {
@@ -284,7 +293,8 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
 
   BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
   // nonvolatile.
-  new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI);
+  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
+                InvokeNum, false, NI);
 
   // Add a switch case to our unwind block.
   CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
@@ -469,13 +479,15 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     // alloca because the value needs to be live across invokes.
     unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
     AllocaInst *JmpBuf =
-      new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin());
+      new AllocaInst(JBLinkTy, 0, Align,
+                     "jblink", F.begin()->begin());
 
     std::vector<Value*> Idx;
-    Idx.push_back(Constant::getNullValue(Type::Int32Ty));
-    Idx.push_back(ConstantInt::get(Type::Int32Ty, 1));
+    Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
+    Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1));
     OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
-                                             "OldBuf", EntryBB->getTerminator());
+                                             "OldBuf",
+                                              EntryBB->getTerminator());
 
     // Copy the JBListHead to the alloca.
     Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
@@ -487,20 +499,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 
     // Create the catch block.  The catch block is basically a big switch
     // statement that goes to all of the invoke catch blocks.
-    BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F);
+    BasicBlock *CatchBB =
+            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
 
     // Create an alloca which keeps track of which invoke is currently
     // executing.  For normal calls it contains zero.
-    AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum",
-                                           EntryBB->begin());
-    new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true,
-                  EntryBB->getTerminator());
+    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+                                           "invokenum",EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
+                  InvokeNum, true, EntryBB->getTerminator());
 
     // Insert a load in the Catch block, and a switch on its value.  By default,
     // we go to a block that just does an unwind (which is the correct action
     // for a standard call).
-    BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F);
-    Unwinds.push_back(new UnwindInst(UnwindBB));
+    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
 
     Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
     SwitchInst *CatchSwitch =
@@ -512,19 +525,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
                                                      "setjmp.cont");
 
-    Idx[1] = ConstantInt::get(Type::Int32Ty, 0);
+    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
     Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
                                                  "TheJmpBuf",
                                                  EntryBB->getTerminator());
-    JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty),
+    JmpBufPtr = new BitCastInst(JmpBufPtr,
+                        Type::getInt8PtrTy(F.getContext()),
                                 "tmp", EntryBB->getTerminator());
     Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
                                     EntryBB->getTerminator());
 
     // Compare the return value to zero.
-    Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet,
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, SJRet,
                                    Constant::getNullValue(SJRet->getType()),
-      "notunwind", EntryBB->getTerminator());
+                                   "notunwind");
     // Nuke the uncond branch.
     EntryBB->getTerminator()->eraseFromParent();
 
@@ -541,9 +556,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   // Create three new blocks, the block to load the jmpbuf ptr and compare
   // against null, the block to do the longjmp, and the error block for if it
   // is null.  Add them at the end of the function because they are not hot.
-  BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F);
-  BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F);
-  BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F);
+  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+                                                "dounwind", &F);
+  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
 
   // If this function contains an invoke, restore the old jumpbuf ptr.
   Value *BufPtr;
@@ -556,26 +572,27 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   }
 
   // Load the JBList, if it's null, then there was no catch!
-  Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr,
+  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
                                 Constant::getNullValue(BufPtr->getType()),
-    "notnull", UnwindHandler);
+                                "notnull");
   BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
 
   // Create the block to do the longjmp.
   // Get a pointer to the jmpbuf and longjmp.
   std::vector<Value*> Idx;
-  Idx.push_back(Constant::getNullValue(Type::Int32Ty));
-  Idx.push_back(ConstantInt::get(Type::Int32Ty, 0));
+  Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
+  Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0));
   Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf",
                                      UnwindBlock);
-  Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty),
+  Idx[0] = new BitCastInst(Idx[0],
+             Type::getInt8PtrTy(F.getContext()),
                            "tmp", UnwindBlock);
-  Idx[1] = ConstantInt::get(Type::Int32Ty, 1);
+  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
   CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock);
-  new UnreachableInst(UnwindBlock);
+  new UnreachableInst(F.getContext(), UnwindBlock);
 
   // Set up the term block ("throw without a catch").
-  new UnreachableInst(TermBlock);
+  new UnreachableInst(F.getContext(), TermBlock);
 
   // Insert a new call to write(2, AbortMessage, AbortMessageLength);
   writeAbortMessage(TermBlock->getTerminator());
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 1da59360fc2b..764f0980cd2d 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
@@ -108,8 +109,10 @@ bool LowerSwitch::runOnFunction(Function &F) {
 
 // operator<< - Used for debugging purposes.
 //
-static std::ostream& operator<<(std::ostream &O,
-                                const LowerSwitch::CaseVector &C) {
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) {
   O << "[";
 
   for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
@@ -121,11 +124,6 @@ static std::ostream& operator<<(std::ostream &O,
   return O << "]";
 }
 
-static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) {
-  if (O.stream()) *O.stream() << C;
-  return O;
-}
-
 // switchConvert - Convert the switch statement into a binary lookup of
 // the case values. The function recursively builds this tree.
 //
@@ -140,9 +138,9 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
 
   unsigned Mid = Size / 2;
   std::vector<CaseRange> LHS(Begin, Begin + Mid);
-  DOUT << "LHS: " << LHS << "\n";
+  DEBUG(errs() << "LHS: " << LHS << "\n");
   std::vector<CaseRange> RHS(Begin + Mid, End);
-  DOUT << "RHS: " << RHS << "\n";
+  DEBUG(errs() << "RHS: " << RHS << "\n");
 
   CaseRange& Pivot = *(Begin + Mid);
   DEBUG(errs() << "Pivot ==> " 
@@ -157,11 +155,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
   // Create a new node that checks if the value is < pivot. Go to the
   // left branch if it is and right branch if not.
   Function* F = OrigBlock->getParent();
-  BasicBlock* NewNode = BasicBlock::Create("NodeBlock");
+  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewNode);
 
-  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot");
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+                                Val, Pivot.Low, "Pivot");
   NewNode->getInstList().push_back(Comp);
   BranchInst::Create(LBranch, RBranch, Comp, NewNode);
   return NewNode;
@@ -178,7 +177,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                       BasicBlock* Default)
 {
   Function* F = OrigBlock->getParent();
-  BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock");
+  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewLeaf);
 
@@ -186,18 +185,18 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   ICmpInst* Comp = NULL;
   if (Leaf.Low == Leaf.High) {
     // Make the seteq instruction...
-    Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low,
-                        "SwitchLeaf", NewLeaf);
+    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+                        Leaf.Low, "SwitchLeaf");
   } else {
     // Make range comparison
     if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
       // Val >= Min && Val <= Hi --> Val <= Hi
-      Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High,
-                          "SwitchLeaf", NewLeaf);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf");
     } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
       // Val >= 0 && Val <= Hi --> Val <=u Hi
-      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High,
-                          "SwitchLeaf", NewLeaf);      
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf");      
     } else {
       // Emit V-Lo <=u Hi-Lo
       Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
@@ -205,8 +204,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                                    Val->getName()+".off",
                                                    NewLeaf);
       Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
-      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound,
-                          "SwitchLeaf", NewLeaf);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf");
     }
   }
 
@@ -290,7 +289,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
 
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
-  BasicBlock* NewDefault = BasicBlock::Create("NewDefault");
+  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
   F->getBasicBlockList().insert(Default, NewDefault);
 
   BranchInst::Create(Default, NewDefault);
@@ -308,9 +307,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   CaseVector Cases;
   unsigned numCmps = Clusterify(Cases, SI);
 
-  DOUT << "Clusterify finished. Total clusters: " << Cases.size()
-       << ". Total compares: " << numCmps << "\n";
-  DOUT << "Cases: " << Cases << "\n";
+  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(errs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
   
   BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
                                           OrigBlock, NewDefault);
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 2b06d778e145..5df08326d8bb 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -75,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    PromoteMemToReg(Allocas, DT, DF, F.getContext());
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index b717699b7e05..9ca06bd180a1 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -23,13 +23,13 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
@@ -41,7 +41,6 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 
-// Provide DenseMapInfo for all pointers.
 namespace llvm {
 template<>
 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
@@ -181,6 +180,8 @@ namespace {
     /// AST - An AliasSetTracker object to update.  If null, don't update it.
     ///
     AliasSetTracker *AST;
+    
+    LLVMContext &Context;
 
     /// AllocaLookup - Reverse mapping of Allocas.
     ///
@@ -212,8 +213,9 @@ namespace {
     DenseMap<const BasicBlock*, unsigned> BBNumPreds;
   public:
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
-                   DominanceFrontier &df, AliasSetTracker *ast)
-      : Allocas(A), DT(dt), DF(df), AST(ast) {}
+                   DominanceFrontier &df, AliasSetTracker *ast,
+                   LLVMContext &C)
+      : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {}
 
     void run();
 
@@ -291,10 +293,9 @@ namespace {
       // As we scan the uses of the alloca instruction, keep track of stores,
       // and decide whether all of the loads and stores to the alloca are within
       // the same basic block.
-      for (Value::use_iterator U = AI->use_begin(), E = AI->use_end();
-           U != E;)  {
-        Instruction *User = cast<Instruction>(*U);
-        ++U;
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E;)  {
+        Instruction *User = cast<Instruction>(*UI++);
         if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
           // Remove any uses of this alloca in DbgInfoInstrinsics.
           assert(BC->hasOneUse() && "Unexpected alloca uses!");
@@ -303,7 +304,8 @@ namespace {
           BC->eraseFromParent();
           continue;
         } 
-        else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        
+        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
           // Remember the basic blocks which define new values for the alloca
           DefiningBlocks.push_back(SI->getParent());
           AllocaPointerVal = SI->getOperand(0);
@@ -491,17 +493,14 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
       
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = PN->hasConstantValue(true)) {
-        if (!isa<Instruction>(V) ||
-            properlyDominates(cast<Instruction>(V), PN)) {
-          if (AST && isa<PointerType>(PN->getType()))
-            AST->deleteValue(PN);
-          PN->replaceAllUsesWith(V);
-          PN->eraseFromParent();
-          NewPhiNodes.erase(I++);
-          EliminatedAPHI = true;
-          continue;
-        }
+      if (Value *V = PN->hasConstantValue(&DT)) {
+        if (AST && isa<PointerType>(PN->getType()))
+          AST->deleteValue(PN);
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+        NewPhiNodes.erase(I++);
+        EliminatedAPHI = true;
+        continue;
       }
       ++I;
     }
@@ -603,7 +602,9 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
         LiveInBlockWorklist.pop_back();
         --i, --e;
         break;
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      }
+      
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         if (LI->getOperand(0) != AI) continue;
         
         // Okay, we found a load before a store to the alloca.  It is actually
@@ -757,6 +758,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
   }
 }
 
+namespace {
 
 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the
 /// first element of a pair.
@@ -767,6 +769,8 @@ struct StoreIndexSearchPredicate {
   }
 };
 
+}
+
 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
 /// block.  If this is the case, avoid traversing the CFG and inserting a lot of
 /// potentially useless PHI nodes by just performing a single linear pass over
@@ -864,8 +868,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   // Create a PhiNode using the dereferenced type... and add the phi-node to the
   // BasicBlock.
   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
-                       Allocas[AllocaNo]->getName() + "." +
-                       utostr(Version++), BB->begin());
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       BB->begin());
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
   PN->reserveOperandSpace(getNumPreds(BB));
@@ -995,9 +999,9 @@ NextIteration:
 ///
 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
                            DominatorTree &DT, DominanceFrontier &DF,
-                           AliasSetTracker *AST) {
+                           LLVMContext &Context, AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty()) return;
 
-  PromoteMem2Reg(Allocas, DT, DF, AST).run();
+  PromoteMem2Reg(Allocas, DT, DF, AST, Context).run();
 }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 000000000000..780ee2638942
--- /dev/null
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,335 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
+typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
+                IncomingPredInfoTy;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+  return *static_cast<IncomingPredInfoTy*>(IPI);
+}
+
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+  delete &getAvailableVals(AV);
+  delete &getIncomingPredInfo(IPI);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.  ProtoValue is the value used to name PHI nodes.
+void SSAUpdater::Initialize(Value *ProtoValue) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+  
+  if (IPI == 0)
+    IPI = new IncomingPredInfoTy();
+  else
+    getIncomingPredInfo(IPI).clear();
+  PrototypeValue = ProtoValue;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+  assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
+  assert(PrototypeValue->getType() == V->getType() &&
+         "All rewritten values must have the same type");
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  Value *Res = GetValueAtEndOfBlockInternal(BB);
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!getAvailableVals(AV).count(BB))
+    return GetValueAtEndOfBlock(BB);
+  
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  Value *SingularValue = 0;
+  
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+  
+  // If there are no predecessors, just return undef.
+  if (PredValues.empty())
+    return UndefValue::get(PrototypeValue->getType());
+  
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+  
+  // Otherwise, we do need a PHI: insert one now.
+  PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
+                                         PrototypeValue->getName(),
+                                         &BB->front());
+  InsertedPHI->reserveOperandSpace(PredValues.size());
+  
+  // Fill in all the predecessors of the PHI.
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+  
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->eraseFromParent();
+    return ConstVal;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  
+  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  BasicBlock *UseBB = User->getParent();
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    UseBB = UserPN->getIncomingBlock(U);
+  
+  U.set(GetValueInMiddleOfBlock(UseBB));
+}
+
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  
+  // Query AvailableVals by doing an insertion of null.
+  std::pair<AvailableValsTy::iterator, bool> InsertRes =
+  AvailableVals.insert(std::make_pair(BB, WeakVH()));
+  
+  // Handle the case when the insertion fails because we have already seen BB.
+  if (!InsertRes.second) {
+    // If the insertion failed, there are two cases.  The first case is that the
+    // value is already available for the specified block.  If we get this, just
+    // return the value.
+    if (InsertRes.first->second != 0)
+      return InsertRes.first->second;
+    
+    // Otherwise, if the value we find is null, then this is the value is not
+    // known but it is being computed elsewhere in our recursion.  This means
+    // that we have a cycle.  Handle this by inserting a PHI node and returning
+    // it.  When we get back to the first instance of the recursion we will fill
+    // in the PHI node.
+    return InsertRes.first->second =
+    PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
+                    &BB->front());
+  }
+  
+  // Okay, the value isn't in the map and we just inserted a null in the entry
+  // to indicate that we're processing the block.  Since we have no idea what
+  // value is in this block, we have to recurse through our predecessors.
+  //
+  // While we're walking our predecessors, we keep track of them in a vector,
+  // then insert a PHI node in the end if we actually need one.  We could use a
+  // smallvector here, but that would take a lot of stack space for every level
+  // of the recursion, just use IncomingPredInfo as an explicit stack.
+  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+  
+  // As we're walking the predecessors, keep track of whether they are all
+  // producing the same value.  If so, this value will capture it, if not, it
+  // will get reset to null.  We distinguish the no-predecessor case explicitly
+  // below.
+  TrackingVH<Value> SingularValue;
+  
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+  
+  // If there are no predecessors, then we must have found an unreachable block
+  // just return 'undef'.  Since there are no predecessors, InsertRes must not
+  // be invalidated.
+  if (IncomingPredInfo.size() == FirstPredInfoEntry)
+    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
+  
+  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
+  /// this block is involved in a loop, a no-entry PHI node will have been
+  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
+  /// above.
+  TrackingVH<Value> &InsertedVal = AvailableVals[BB];
+  
+  // If all the predecessor values are the same then we don't need to insert a
+  // PHI.  This is the simple and common case.
+  if (SingularValue) {
+    // If a PHI node got inserted, replace it with the singlar value and delete
+    // it.
+    if (InsertedVal) {
+      PHINode *OldVal = cast<PHINode>(InsertedVal);
+      // Be careful about dead loops.  These RAUW's also update InsertedVal.
+      if (InsertedVal != SingularValue)
+        OldVal->replaceAllUsesWith(SingularValue);
+      else
+        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
+      OldVal->eraseFromParent();
+    } else {
+      InsertedVal = SingularValue;
+    }
+    
+    // Drop the entries we added in IncomingPredInfo to restore the stack.
+    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                           IncomingPredInfo.end());
+    return InsertedVal;
+  }
+  
+  // Otherwise, we do need a PHI: insert one now if we don't already have one.
+  if (InsertedVal == 0)
+    InsertedVal = PHINode::Create(PrototypeValue->getType(),
+                                  PrototypeValue->getName(), &BB->front());
+  
+  PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
+  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
+  
+  // Fill in all the predecessors of the PHI.
+  for (IncomingPredInfoTy::iterator I =
+         IncomingPredInfo.begin()+FirstPredInfoEntry,
+       E = IncomingPredInfo.end(); I != E; ++I)
+    InsertedPHI->addIncoming(I->second, I->first);
+  
+  // Drop the entries we added in IncomingPredInfo to restore the stack.
+  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                         IncomingPredInfo.end());
+  
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->replaceAllUsesWith(ConstVal);
+    InsertedPHI->eraseFromParent();
+    InsertedVal = ConstVal;
+  } else {
+    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    
+    // If the client wants to know about all new instructions, tell it.
+    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  }
+  
+  return InsertedVal;
+}
+
+
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
index 4c4dd37ddf75..3bb2e8ee6911 100644
--- a/lib/Transforms/Utils/SSI.cpp
+++ b/lib/Transforms/Utils/SSI.cpp
@@ -23,6 +23,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SSI.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 
 using namespace llvm;
@@ -30,11 +31,12 @@ using namespace llvm;
 static const std::string SSI_PHI = "SSI_phi";
 static const std::string SSI_SIG = "SSI_sigma";
 
-static const unsigned UNSIGNED_INFINITE = ~0U;
+STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
+STATISTIC(NumPhiInserted, "Number of phi functions inserted");
 
 void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DominanceFrontier>();
-  AU.addRequired<DominatorTree>();
+  AU.addRequiredTransitive<DominanceFrontier>();
+  AU.addRequiredTransitive<DominatorTree>();
   AU.setPreservesAll();
 }
 
@@ -45,22 +47,23 @@ bool SSI::runOnFunction(Function &F) {
 
 /// This methods creates the SSI representation for the list of values
 /// received. It will only create SSI representation if a value is used
-/// in a to decide a branch. Repeated values are created only once.
+/// to decide a branch. Repeated values are created only once.
 ///
 void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
   init(value);
 
-  for (unsigned i = 0; i < num_values; ++i) {
-    if (created.insert(value[i])) {
-      needConstruction[i] = true;
-    }
-  }
-  insertSigmaFunctions(value);
+  SmallPtrSet<Instruction*, 4> needConstruction;
+  for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I)
+    if (created.insert(*I))
+      needConstruction.insert(*I);
+
+  insertSigmaFunctions(needConstruction);
 
   // Test if there is a need to transform to SSI
-  if (needConstruction.any()) {
-    insertPhiFunctions(value);
-    renameInit(value);
+  if (!needConstruction.empty()) {
+    insertPhiFunctions(needConstruction);
+    renameInit(needConstruction);
     rename(DT_->getRoot());
     fixPhis();
   }
@@ -71,100 +74,107 @@ void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
 /// Insert sigma functions (a sigma function is a phi function with one
 /// operator)
 ///
-void SSI::insertSigmaFunctions(SmallVectorImpl<Instruction *> &value) {
-  for (unsigned i = 0; i < num_values; ++i) {
-    if (!needConstruction[i])
-      continue;
-
-    bool need = false;
-    for (Value::use_iterator begin = value[i]->use_begin(), end =
-         value[i]->use_end(); begin != end; ++begin) {
+void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
+    for (Value::use_iterator begin = (*I)->use_begin(),
+         end = (*I)->use_end(); begin != end; ++begin) {
       // Test if the Use of the Value is in a comparator
-      CmpInst *CI = dyn_cast<CmpInst>(begin);
-      if (CI && isUsedInTerminator(CI)) {
-        // Basic Block of the Instruction
-        BasicBlock *BB = CI->getParent();
-        // Last Instruction of the Basic Block
-        const TerminatorInst *TI = BB->getTerminator();
-
-        for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) {
-          // Next Basic Block
-          BasicBlock *BB_next = TI->getSuccessor(j);
-          if (BB_next != BB &&
-              BB_next->getUniquePredecessor() != NULL &&
-              dominateAny(BB_next, value[i])) {
-            PHINode *PN = PHINode::Create(
-                value[i]->getType(), SSI_SIG, BB_next->begin());
-            PN->addIncoming(value[i], BB);
-            sigmas.insert(std::make_pair(PN, i));
-            created.insert(PN);
-            need = true;
-            defsites[i].push_back(BB_next);
+      if (CmpInst *CI = dyn_cast<CmpInst>(begin)) {
+        // Iterates through all uses of CmpInst
+        for (Value::use_iterator begin_ci = CI->use_begin(),
+             end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
+          // Test if any use of CmpInst is in a Terminator
+          if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) {
+            insertSigma(TI, *I);
           }
         }
       }
     }
-    needConstruction[i] = need;
+  }
+}
+
+/// Inserts Sigma Functions in every BasicBlock successor to Terminator
+/// Instruction TI. All inserted Sigma Function are related to Instruction I.
+///
+void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
+  // Basic Block of the Terminator Instruction
+  BasicBlock *BB = TI->getParent();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
+    // Next Basic Block
+    BasicBlock *BB_next = TI->getSuccessor(i);
+    if (BB_next != BB &&
+        BB_next->getSinglePredecessor() != NULL &&
+        dominateAny(BB_next, I)) {
+      PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
+      PN->addIncoming(I, BB);
+      sigmas[PN] = I;
+      created.insert(PN);
+      defsites[I].push_back(BB_next);
+      ++NumSigmaInserted;
+    }
   }
 }
 
 /// Insert phi functions when necessary
 ///
-void SSI::insertPhiFunctions(SmallVectorImpl<Instruction *> &value) {
+void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
   DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
-  for (unsigned i = 0; i < num_values; ++i) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
     // Test if there were any sigmas for this variable
-    if (needConstruction[i]) {
-
-      SmallPtrSet<BasicBlock *, 1> BB_visited;
-
-      // Insert phi functions if there is any sigma function
-      while (!defsites[i].empty()) {
-
-        BasicBlock *BB = defsites[i].back();
-
-        defsites[i].pop_back();
-        DominanceFrontier::iterator DF_BB = DF->find(BB);
-
-        // Iterates through all the dominance frontier of BB
-        for (std::set<BasicBlock *>::iterator DF_BB_begin =
-             DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
-             DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
-          BasicBlock *BB_dominated = *DF_BB_begin;
-
-          // Test if has not yet visited this node and if the
-          // original definition dominates this node
-          if (BB_visited.insert(BB_dominated) &&
-              DT_->properlyDominates(value_original[i], BB_dominated) &&
-              dominateAny(BB_dominated, value[i])) {
-            PHINode *PN = PHINode::Create(
-                value[i]->getType(), SSI_PHI, BB_dominated->begin());
-            phis.insert(std::make_pair(PN, i));
-            created.insert(PN);
-
-            defsites[i].push_back(BB_dominated);
-          }
+    SmallPtrSet<BasicBlock *, 16> BB_visited;
+
+    // Insert phi functions if there is any sigma function
+    while (!defsites[*I].empty()) {
+
+      BasicBlock *BB = defsites[*I].back();
+
+      defsites[*I].pop_back();
+      DominanceFrontier::iterator DF_BB = DF->find(BB);
+
+      // The BB is unreachable. Skip it.
+      if (DF_BB == DF->end())
+        continue; 
+
+      // Iterates through all the dominance frontier of BB
+      for (std::set<BasicBlock *>::iterator DF_BB_begin =
+           DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
+           DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
+        BasicBlock *BB_dominated = *DF_BB_begin;
+
+        // Test if has not yet visited this node and if the
+        // original definition dominates this node
+        if (BB_visited.insert(BB_dominated) &&
+            DT_->properlyDominates(value_original[*I], BB_dominated) &&
+            dominateAny(BB_dominated, *I)) {
+          PHINode *PN = PHINode::Create(
+              (*I)->getType(), SSI_PHI, BB_dominated->begin());
+          phis.insert(std::make_pair(PN, *I));
+          created.insert(PN);
+
+          defsites[*I].push_back(BB_dominated);
+          ++NumPhiInserted;
         }
       }
-      BB_visited.clear();
     }
+    BB_visited.clear();
   }
 }
 
 /// Some initialization for the rename part
 ///
-void SSI::renameInit(SmallVectorImpl<Instruction *> &value) {
-  value_stack.resize(num_values);
-  for (unsigned i = 0; i < num_values; ++i) {
-    value_stack[i].push_back(value[i]);
-  }
+void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I)
+    value_stack[*I].push_back(*I);
 }
 
 /// Renames all variables in the specified BasicBlock.
 /// Only variables that need to be rename will be.
 ///
 void SSI::rename(BasicBlock *BB) {
-  BitVector *defined = new BitVector(num_values, false);
+  SmallPtrSet<Instruction*, 8> defined;
 
   // Iterate through instructions and make appropriate renaming.
   // For SSI_PHI (b = PHI()), store b at value_stack as a new
@@ -178,19 +188,17 @@ void SSI::rename(BasicBlock *BB) {
        begin != end; ++begin) {
     Instruction *I = begin;
     if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
-      int position;
+      Instruction* position;
 
       // Treat SSI_PHI
-      if ((position = getPositionPhi(PN)) != -1) {
+      if ((position = getPositionPhi(PN))) {
         value_stack[position].push_back(PN);
-        (*defined)[position] = true;
-      }
-
+        defined.insert(position);
       // Treat SSI_SIG
-      else if ((position = getPositionSigma(PN)) != -1) {
+      } else if ((position = getPositionSigma(PN))) {
         substituteUse(I);
         value_stack[position].push_back(PN);
-        (*defined)[position] = true;
+        defined.insert(position);
       }
 
       // Treat all other PHI functions
@@ -216,10 +224,9 @@ void SSI::rename(BasicBlock *BB) {
     for (BasicBlock::iterator begin = BB_succ->begin(),
          notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
       Instruction *I = begin;
-      PHINode *PN;
-      int position;
-      if ((PN = dyn_cast<PHINode>(I)) && ((position
-          = getPositionPhi(PN)) != -1)) {
+      PHINode *PN = dyn_cast<PHINode>(I);
+      Instruction* position;
+      if (PN && ((position = getPositionPhi(PN)))) {
         PN->addIncoming(value_stack[position].back(), BB);
       }
     }
@@ -237,13 +244,9 @@ void SSI::rename(BasicBlock *BB) {
 
   // Now we remove all inserted definitions of a variable from the top of
   // the stack leaving the previous one as the top.
-  if (defined->any()) {
-    for (unsigned i = 0; i < num_values; ++i) {
-      if ((*defined)[i]) {
-        value_stack[i].pop_back();
-      }
-    }
-  }
+  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
+       DE = defined.end(); DI != DE; ++DI)
+    value_stack[*DI].pop_back();
 }
 
 /// Substitute any use in this instruction for the last definition of
@@ -252,23 +255,24 @@ void SSI::rename(BasicBlock *BB) {
 void SSI::substituteUse(Instruction *I) {
   for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
     Value *operand = I->getOperand(i);
-    for (unsigned j = 0; j < num_values; ++j) {
-      if (operand == value_stack[j].front() &&
-          I != value_stack[j].back()) {
+    for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
+         VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
+      if (operand == VI->second.front() &&
+          I != VI->second.back()) {
         PHINode *PN_I = dyn_cast<PHINode>(I);
-        PHINode *PN_vs = dyn_cast<PHINode>(value_stack[j].back());
+        PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
 
         // If a phi created in a BasicBlock is used as an operand of another
         // created in the same BasicBlock, this step marks this second phi,
         // to fix this issue later. It cannot be fixed now, because the
         // operands of the first phi are not final yet.
         if (PN_I && PN_vs &&
-            value_stack[j].back()->getParent() == I->getParent()) {
+            VI->second.back()->getParent() == I->getParent()) {
 
           phisToFix.insert(PN_I);
         }
 
-        I->setOperand(i, value_stack[j].back());
+        I->setOperand(i, VI->second.back());
         break;
       }
     }
@@ -276,12 +280,16 @@ void SSI::substituteUse(Instruction *I) {
 }
 
 /// Test if the BasicBlock BB dominates any use or definition of value.
+/// If it dominates a phi instruction that is on the same BasicBlock,
+/// that does not count.
 ///
 bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
   for (Value::use_iterator begin = value->use_begin(),
        end = value->use_end(); begin != end; ++begin) {
     Instruction *I = cast<Instruction>(*begin);
     BasicBlock *BB_father = I->getParent();
+    if (BB == BB_father && isa<PHINode>(I))
+      continue;
     if (DT_->dominates(BB, BB_father)) {
       return true;
     }
@@ -293,31 +301,54 @@ bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
 /// as an operand of another phi function used in the same BasicBlock,
 /// LLVM looks this as an error. So on the second phi, the first phi is called
 /// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B.
+/// it has for BasicBlock B. It also includes undef values for predecessors
+/// that were not included in the phi.
 ///
 void SSI::fixPhis() {
   for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
        end = phisToFix.end(); begin != end; ++begin) {
     PHINode *PN = *begin;
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
-      PHINode *PN_father;
-      if ((PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i))) &&
-          PN->getParent() == PN_father->getParent()) {
+      PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
+      if (PN_father && PN->getParent() == PN_father->getParent() &&
+          !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
         BasicBlock *BB = PN->getIncomingBlock(i);
         int pos = PN_father->getBasicBlockIndex(BB);
         PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
       }
     }
   }
+
+  for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
+       end = phis.end(); begin != end; ++begin) {
+    PHINode *PN = begin->first;
+    BasicBlock *BB = PN->getParent();
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    SmallVector<BasicBlock*, 8> Preds(PI, PE);
+    for (unsigned size = Preds.size();
+         PI != PE && PN->getNumIncomingValues() != size; ++PI) {
+      bool found = false;
+      for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
+           i < pn_end; ++i) {
+        if (PN->getIncomingBlock(i) == *PI) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        PN->addIncoming(UndefValue::get(PN->getType()), *PI);
+      }
+    }
+  }
 }
 
 /// Return which variable (position on the vector of variables) this phi
 /// represents on the phis list.
 ///
-unsigned SSI::getPositionPhi(PHINode *PN) {
-  DenseMap<PHINode *, unsigned>::iterator val = phis.find(PN);
+Instruction* SSI::getPositionPhi(PHINode *PN) {
+  DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
   if (val == phis.end())
-    return UNSIGNED_INFINITE;
+    return 0;
   else
     return val->second;
 }
@@ -325,52 +356,27 @@ unsigned SSI::getPositionPhi(PHINode *PN) {
 /// Return which variable (position on the vector of variables) this phi
 /// represents on the sigmas list.
 ///
-unsigned SSI::getPositionSigma(PHINode *PN) {
-  DenseMap<PHINode *, unsigned>::iterator val = sigmas.find(PN);
+Instruction* SSI::getPositionSigma(PHINode *PN) {
+  DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
   if (val == sigmas.end())
-    return UNSIGNED_INFINITE;
+    return 0;
   else
     return val->second;
 }
 
-/// Return true if the the Comparison Instruction is an operator
-/// of the Terminator instruction of its Basic Block.
-///
-unsigned SSI::isUsedInTerminator(CmpInst *CI) {
-  TerminatorInst *TI = CI->getParent()->getTerminator();
-  if (TI->getNumOperands() == 0) {
-    return false;
-  } else if (CI == TI->getOperand(0)) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
 /// Initializes
 ///
 void SSI::init(SmallVectorImpl<Instruction *> &value) {
-  num_values = value.size();
-  needConstruction.resize(num_values, false);
-
-  value_original.resize(num_values);
-  defsites.resize(num_values);
-
-  for (unsigned i = 0; i < num_values; ++i) {
-    value_original[i] = value[i]->getParent();
-    defsites[i].push_back(value_original[i]);
+  for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
+    value_original[*I] = (*I)->getParent();
+    defsites[*I].push_back((*I)->getParent());
   }
 }
 
 /// Clean all used resources in this creation of SSI
 ///
 void SSI::clean() {
-  for (unsigned i = 0; i < num_values; ++i) {
-    defsites[i].clear();
-    if (i < value_stack.size())
-      value_stack[i].clear();
-  }
-
   phis.clear();
   sigmas.clear();
   phisToFix.clear();
@@ -378,7 +384,6 @@ void SSI::clean() {
   defsites.clear();
   value_stack.clear();
   value_original.clear();
-  needConstruction.clear();
 }
 
 /// createSSIPass - The public interface to this file...
@@ -388,3 +393,40 @@ FunctionPass *llvm::createSSIPass() { return new SSI(); }
 char SSI::ID = 0;
 static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
 
+/// SSIEverything - A pass that runs createSSI on every non-void variable,
+/// intended for debugging.
+namespace {
+  struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    SSIEverything() : FunctionPass(&ID) {}
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<SSI>();
+    }
+  };
+}
+
+bool SSIEverything::runOnFunction(Function &F) {
+  SmallVector<Instruction *, 16> Insts;
+  SSI &ssi = getAnalysis<SSI>();
+
+  if (F.isDeclaration() || F.isIntrinsic()) return false;
+
+  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
+    for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
+      if (I->getType() != Type::getVoidTy(F.getContext()))
+        Insts.push_back(I);
+
+  ssi.createSSI(Insts);
+  return true;
+}
+
+/// createSSIEverythingPass - The public interface to this file...
+///
+FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
+
+char SSIEverything::ID = 0;
+static RegisterPass<SSIEverything>
+Y("ssi-everything", "Static Single Information Construction");
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 58d4d5a344c1..6fd7d7bf9aea 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -21,6 +21,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/SmallVector.h"
@@ -84,19 +85,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
 static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
 
-  DOUT << "Looking to fold " << BB->getNameStart() << " into " 
-       << Succ->getNameStart() << "\n";
+  DEBUG(errs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
   // Shortcut, if there is only a single predecessor it must be BB and merging
   // is always safe
   if (Succ->getSinglePredecessor()) return true;
 
-  typedef SmallPtrSet<Instruction*, 16> InstrSet;
-  InstrSet BBPHIs;
-
-  // Make a list of all phi nodes in BB
-  BasicBlock::iterator BBI = BB->begin();
-  while (isa<PHINode>(*BBI)) BBPHIs.insert(BBI++);
-
   // Make a list of the predecessors of BB
   typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
   BlockSet BBPreds(pred_begin(BB), pred_end(BB));
@@ -126,16 +120,13 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
             PI != PE; PI++) {
         if (BBPN->getIncomingValueForBlock(*PI) 
               != PN->getIncomingValueForBlock(*PI)) {
-          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-               << Succ->getNameStart() << " is conflicting with " 
-               << BBPN->getNameStart() << " with regard to common predecessor "
-               << (*PI)->getNameStart() << "\n";
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
           return false;
         }
       }
-      // Remove this phinode from the list of phis in BB, since it has been
-      // handled.
-      BBPHIs.erase(BBPN);
     } else {
       Value* Val = PN->getIncomingValueForBlock(BB);
       for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
@@ -144,33 +135,15 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
         if (Val != PN->getIncomingValueForBlock(*PI)) {
-          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-          << Succ->getNameStart() << " is conflicting with regard to common "
-          << "predecessor " << (*PI)->getNameStart() << "\n";
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
           return false;
         }
       }
     }
   }
 
-  // If there are any other phi nodes in BB that don't have a phi node in Succ
-  // to merge with, they must be moved to Succ completely. However, for any
-  // predecessors of Succ, branches will be added to the phi node that just
-  // point to itself. So, for any common predecessors, this must not cause
-  // conflicts.
-  for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end();
-        I != E; I++) {
-    PHINode *PN = cast<PHINode>(*I);
-    for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-          PI != PE; PI++)
-      if (PN->getIncomingValueForBlock(*PI) != PN) {
-        DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-             << BB->getNameStart() << " is conflicting with regard to common "
-             << "predecessor " << (*PI)->getNameStart() << "\n";
-        return false;
-      }
-  }
-
   return true;
 }
 
@@ -182,8 +155,36 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
   // Check to see if merging these blocks would cause conflicts for any of the
   // phi nodes in BB or Succ. If not, we can safely merge.
   if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-  
-  DOUT << "Killing Trivial BB: \n" << *BB;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
   
   if (isa<PHINode>(Succ->begin())) {
     // If there is more than one pred of succ, and there are PHI nodes in
@@ -217,38 +218,16 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
     }
   }
   
-  if (isa<PHINode>(&BB->front())) {
-    SmallVector<BasicBlock*, 16>
-    OldSuccPreds(pred_begin(Succ), pred_end(Succ));
-    
-    // Move all PHI nodes in BB to Succ if they are alive, otherwise
-    // delete them.
-    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-      if (PN->use_empty()) {
-        // Just remove the dead phi.  This happens if Succ's PHIs were the only
-        // users of the PHI nodes.
-        PN->eraseFromParent();
-        continue;
-      }
-    
-      // The instruction is alive, so this means that BB must dominate all
-      // predecessors of Succ (Since all uses of the PN are after its
-      // definition, so in Succ or a block dominated by Succ. If a predecessor
-      // of Succ would not be dominated by BB, PN would violate the def before
-      // use SSA demand). Therefore, we can simply move the phi node to the
-      // next block.
+  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+    if (Succ->getSinglePredecessor()) {
+      // BB is the only predecessor of Succ, so Succ will end up with exactly
+      // the same predecessors BB had.
       Succ->getInstList().splice(Succ->begin(),
                                  BB->getInstList(), BB->begin());
-      
-      // We need to add new entries for the PHI node to account for
-      // predecessors of Succ that the PHI node does not take into
-      // account.  At this point, since we know that BB dominated succ and all
-      // of its predecessors, this means that we should any newly added
-      // incoming edges should use the PHI node itself as the value for these
-      // edges, because they are loop back edges.
-      for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i)
-        if (OldSuccPreds[i] != BB)
-          PN->addIncoming(PN, OldSuccPreds[i]);
+    } else {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
     }
   }
     
@@ -383,26 +362,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
       // Okay, it looks like the instruction IS in the "condition".  Check to
       // see if its a cheap instruction to unconditionally compute, and if it
       // only uses stuff defined outside of the condition.  If so, hoist it out.
+      if (!I->isSafeToSpeculativelyExecute())
+        return false;
+
       switch (I->getOpcode()) {
       default: return false;  // Cannot hoist this out safely.
       case Instruction::Load: {
-        // We can hoist loads that are non-volatile and obviously cannot trap.
-        if (cast<LoadInst>(I)->isVolatile())
-          return false;
-        // FIXME: A computation of a constant can trap!
-        if (!isa<AllocaInst>(I->getOperand(0)) &&
-            !isa<Constant>(I->getOperand(0)))
-          return false;
-        // External weak globals may have address 0, so we can't load them.
-        Value *V2 = I->getOperand(0)->getUnderlyingObject();
-        if (V2) {
-          GlobalVariable* GV = dyn_cast<GlobalVariable>(V2);
-          if (GV && GV->hasExternalWeakLinkage())
-            return false;
-        }
-        // Finally, we have to check to make sure there are no instructions
-        // before the load in its basic block, as we are going to hoist the loop
-        // out to its predecessor.
+        // We have to check to make sure there are no instructions before the
+        // load in its basic block, as we are going to hoist the loop out to
+        // its predecessor.
         BasicBlock::iterator IP = PBB->begin();
         while (isa<DbgInfoIntrinsic>(IP))
           IP++;
@@ -645,12 +613,13 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         assert(ThisCases.size() == 1 && "Branch can only have one case!");
         // Insert the new branch.
         Instruction *NI = BranchInst::Create(ThisDef, TI);
+        (void) NI;
 
         // Remove PHI node entries for the dead edge.
         ThisCases[0].second->removePredecessor(TI->getParent());
 
-        DOUT << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
         EraseTerminatorInstAndDCECond(TI);
         return true;
@@ -662,8 +631,8 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
           DeadCases.insert(PredCases[i].first);
 
-        DOUT << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI;
+        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+                     << "Through successor TI: " << *TI);
 
         for (unsigned i = SI->getNumCases()-1; i != 0; --i)
           if (DeadCases.count(SI->getCaseValue(i))) {
@@ -671,7 +640,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
             SI->removeCase(i);
           }
 
-        DOUT << "Leaving: " << *TI << "\n";
+        DEBUG(errs() << "Leaving: " << *TI << "\n");
         return true;
       }
     }
@@ -712,9 +681,10 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
 
     // Insert the new branch.
     Instruction *NI = BranchInst::Create(TheRealDest, TI);
+    (void) NI;
 
-    DOUT << "Threading pred instr: " << *Pred->getTerminator()
-         << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+    DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+              << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
     EraseTerminatorInstAndDCECond(TI);
     return true;
@@ -847,7 +817,8 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
           if (InfLoopBlock == 0) {
             // Insert it at the end of the function, because it's either code,
             // or it won't matter if it's hot. :)
-            InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+            InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                              "infloop", BB->getParent());
             BranchInst::Create(InfLoopBlock, InfLoopBlock);
           }
           NewSI->setSuccessor(i, InfLoopBlock);
@@ -900,7 +871,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
   while (isa<DbgInfoIntrinsic>(I2))
     I2 = BB2_Itr++;
   if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
-      !I1->isIdenticalTo(I2) ||
+      !I1->isIdenticalToWhenDefined(I2) ||
       (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
@@ -919,6 +890,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
     if (!I2->use_empty())
       I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
     BB2->getInstList().erase(I2);
 
     I1 = BB1_Itr++;
@@ -927,7 +899,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     I2 = BB2_Itr++;
     while (isa<DbgInfoIntrinsic>(I2))
       I2 = BB2_Itr++;
-  } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2));
+  } while (I1->getOpcode() == I2->getOpcode() &&
+           I1->isIdenticalToWhenDefined(I2));
 
   return true;
 
@@ -939,7 +912,7 @@ HoistTerminator:
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
   BIParent->getInstList().insert(BI, NT);
-  if (NT->getType() != Type::VoidTy) {
+  if (NT->getType() != Type::getVoidTy(BB1->getContext())) {
     I1->replaceAllUsesWith(NT);
     I2->replaceAllUsesWith(NT);
     NT->takeName(I1);
@@ -1197,7 +1170,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB;
     if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType() == Type::Int1Ty) {
+        CB->getType() == Type::getInt1Ty(BB->getContext())) {
       // Okay, we now know that all edges from PredBB should be revectored to
       // branch to RealDest.
       BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -1209,7 +1182,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
       // difficult cases.  Instead of being smart about this, just insert a new
       // block that jumps to the destination block, effectively splitting
       // the edge we are about to create.
-      BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge",
+      BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                              RealDest->getName()+".critedge",
                                               RealDest->getParent(), RealDest);
       BranchInst::Create(RealDest, EdgeBB);
       PHINode *PN;
@@ -1242,7 +1216,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
           }
           
           // Check for trivial simplification.
-          if (Constant *C = ConstantFoldInstruction(N)) {
+          if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {
             TranslateMap[BBI] = C;
             delete N;   // Constant folded away, don't need actual inst
           } else {
@@ -1296,8 +1270,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
     if (NumPhis > 2)
       return false;
   
-  DOUT << "FOUND IF CONDITION!  " << *IfCond << "  T: "
-       << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n";
+  DEBUG(errs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+        << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
   
   // Loop over the PHI's seeing if we can promote them all to select
   // instructions.  While we are at it, keep track of the instructions
@@ -1427,7 +1401,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   if (FalseRet->getNumOperands() == 0) {
     TrueSucc->removePredecessor(BI->getParent());
     FalseSucc->removePredecessor(BI->getParent());
-    ReturnInst::Create(0, BI);
+    ReturnInst::Create(BI->getContext(), 0, BI);
     EraseTerminatorInstAndDCECond(BI);
     return true;
   }
@@ -1476,12 +1450,13 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   }
 
   Value *RI = !TrueValue ?
-              ReturnInst::Create(BI) :
-              ReturnInst::Create(TrueValue, BI);
+              ReturnInst::Create(BI->getContext(), BI) :
+              ReturnInst::Create(BI->getContext(), TrueValue, BI);
+  (void) RI;
       
-  DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
-       << "\n  " << *BI << "NewRet = " << *RI
-       << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc;
+  DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+               << "\n  " << *BI << "NewRet = " << *RI
+               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
       
   EraseTerminatorInstAndDCECond(BI);
 
@@ -1561,7 +1536,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     else
       continue;
 
-    DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB;
+    DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
     
     // If we need to invert the condition in the pred block to match, do so now.
     if (InvertPredCond) {
@@ -1605,7 +1580,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   assert(PBI->isConditional() && BI->isConditional());
   BasicBlock *BB = BI->getParent();
-  
+
   // If this block ends with a branch instruction, and if there is a
   // predecessor that ends on a branch of the same condition, make 
   // this conditional branch redundant.
@@ -1616,7 +1591,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     if (BB->getSinglePredecessor()) {
       // Turn this into a branch on constant.
       bool CondIsTrue = PBI->getSuccessor(0) == BB;
-      BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue));
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                        CondIsTrue));
       return true;  // Nuke the branch on constant.
     }
     
@@ -1624,7 +1600,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     // in the constant and simplify the block result.  Subsequent passes of
     // simplifycfg will thread the block.
     if (BlockIsSimpleEnoughToThreadThrough(BB)) {
-      PHINode *NewPN = PHINode::Create(Type::Int1Ty,
+      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
                                        BI->getCondition()->getName() + ".pr",
                                        BB->begin());
       // Okay, we're going to insert the PHI node.  Since PBI is not the only
@@ -1636,7 +1612,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
             PBI->getCondition() == BI->getCondition() &&
             PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
           bool CondIsTrue = PBI->getSuccessor(0) == BB;
-          NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, 
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
                                               CondIsTrue), *PI);
         } else {
           NewPN->addIncoming(BI->getCondition(), *PI);
@@ -1694,8 +1670,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   // Finally, if everything is ok, fold the branches to logical ops.
   BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
   
-  DOUT << "FOLDING BRs:" << *PBI->getParent()
-       << "AND: " << *BI->getParent();
+  DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent()
+               << "AND: " << *BI->getParent());
   
   
   // If OtherDest *is* BB, then BB is a basic block with a single conditional
@@ -1708,12 +1684,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   if (OtherDest == BB) {
     // Insert it at the end of the function, because it's either code,
     // or it won't matter if it's hot. :)
-    BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                                  "infloop", BB->getParent());
     BranchInst::Create(InfLoopBlock, InfLoopBlock);
     OtherDest = InfLoopBlock;
   }  
   
-  DOUT << *PBI->getParent()->getParent();
+  DEBUG(errs() << *PBI->getParent()->getParent());
   
   // BI may have other predecessors.  Because of this, we leave
   // it alone, but modify PBI.
@@ -1763,9 +1740,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     }
   }
   
-  DOUT << "INTO: " << *PBI->getParent();
-  
-  DOUT << *PBI->getParent()->getParent();
+  DEBUG(errs() << "INTO: " << *PBI->getParent());
+  DEBUG(errs() << *PBI->getParent()->getParent());
   
   // This basic block is probably dead.  We know it has at least
   // one fewer predecessor.
@@ -1792,7 +1768,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   // Remove basic blocks that have no predecessors... or that just have themself
   // as a predecessor.  These are unreachable.
   if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
-    DOUT << "Removing BB: \n" << *BB;
+    DEBUG(errs() << "Removing BB: \n" << *BB);
     DeleteDeadBlock(BB);
     return true;
   }
@@ -1832,8 +1808,8 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
       if (!UncondBranchPreds.empty()) {
         while (!UncondBranchPreds.empty()) {
           BasicBlock *Pred = UncondBranchPreds.pop_back_val();
-          DOUT << "FOLDING: " << *BB
-               << "INTO UNCOND BRANCH PRED: " << *Pred;
+          DEBUG(errs() << "FOLDING: " << *BB
+                       << "INTO UNCOND BRANCH PRED: " << *Pred);
           Instruction *UncondBranch = Pred->getTerminator();
           // Clone the return and add it to the end of the predecessor.
           Instruction *NewRet = RI->clone();
@@ -1884,33 +1860,26 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   } else if (isa<UnwindInst>(BB->begin())) {
     // Check to see if the first instruction in this block is just an unwind.
     // If so, replace any invoke instructions which use this as an exception
-    // destination with call instructions, and any unconditional branch
-    // predecessor with an unwind.
+    // destination with call instructions.
     //
     SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
     while (!Preds.empty()) {
       BasicBlock *Pred = Preds.back();
-      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) {
-        if (BI->isUnconditional()) {
-          Pred->getInstList().pop_back();  // nuke uncond branch
-          new UnwindInst(Pred);            // Use unwind.
-          Changed = true;
-        }
-      } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
         if (II->getUnwindDest() == BB) {
           // Insert a new branch instruction before the invoke, because this
-          // is now a fall through...
+          // is now a fall through.
           BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
           Pred->getInstList().remove(II);   // Take out of symbol table
 
-          // Insert the call now...
+          // Insert the call now.
           SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());
           CallInst *CI = CallInst::Create(II->getCalledValue(),
                                           Args.begin(), Args.end(),
                                           II->getName(), BI);
           CI->setCallingConv(II->getCallingConv());
           CI->setAttributes(II->getAttributes());
-          // If the invoke produced a value, the Call now does instead
+          // If the invoke produced a value, the Call now does instead.
           II->replaceAllUsesWith(CI);
           delete II;
           Changed = true;
@@ -2042,7 +2011,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
         if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
           if (BI->isUnconditional()) {
             if (BI->getSuccessor(0) == BB) {
-              new UnreachableInst(TI);
+              new UnreachableInst(TI->getContext(), TI);
               TI->eraseFromParent();
               Changed = true;
             }
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 848f2b87c4ee..30cb94d90385 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -66,8 +66,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnwindingBlocks.size() == 1) {
     UnwindBlock = UnwindingBlocks.front();
   } else {
-    UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F);
-    new UnwindInst(UnwindBlock);
+    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+    new UnwindInst(F.getContext(), UnwindBlock);
 
     for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
            E = UnwindingBlocks.end(); I != E; ++I) {
@@ -83,8 +83,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnreachableBlocks.size() == 1) {
     UnreachableBlock = UnreachableBlocks.front();
   } else {
-    UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F);
-    new UnreachableInst(UnreachableBlock);
+    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+                                          "UnifiedUnreachableBlock", &F);
+    new UnreachableInst(F.getContext(), UnreachableBlock);
 
     for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
            E = UnreachableBlocks.end(); I != E; ++I) {
@@ -107,16 +108,17 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   // nodes (if the function returns values), and convert all of the return
   // instructions into unconditional branches.
   //
-  BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F);
+  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+                                               "UnifiedReturnBlock", &F);
 
   PHINode *PN = 0;
-  if (F.getReturnType() == Type::VoidTy) {
-    ReturnInst::Create(NULL, NewRetBlock);
+  if (F.getReturnType() == Type::getVoidTy(F.getContext())) {
+    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
   } else {
     // If the function doesn't return void... add a PHI node to the block...
     PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
     NewRetBlock->getInstList().push_back(PN);
-    ReturnInst::Create(PN, NewRetBlock);
+    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
   }
 
   // Loop over all of the blocks, replacing the return instruction with an
diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp
index caef7ec5c45f..4d838b50e345 100644
--- a/lib/Transforms/Utils/UnrollLoop.cpp
+++ b/lib/Transforms/Utils/UnrollLoop.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -62,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
     return 0;
 
-  DOUT << "Merging: " << *BB << "into: " << *OnlyPred;
+  DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred);
 
   // Resolve any PHI nodes at the start of the block.  They are all
   // guaranteed to have exactly one entry if they exist, unless there are
@@ -113,7 +114,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   
   if (!BI || BI->isUnconditional()) {
     // The loop-rotate pass can be helpful to avoid this in many cases.
-    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n";
+    DEBUG(errs() <<
+             "  Can't unroll; loop not terminated by a conditional branch.\n");
     return false;
   }
 
@@ -125,9 +127,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     TripMultiple = L->getSmallConstantTripMultiple();
 
   if (TripCount != 0)
-    DOUT << "  Trip Count = " << TripCount << "\n";
+    DEBUG(errs() << "  Trip Count = " << TripCount << "\n");
   if (TripMultiple != 1)
-    DOUT << "  Trip Multiple = " << TripMultiple << "\n";
+    DEBUG(errs() << "  Trip Multiple = " << TripMultiple << "\n");
 
   // Effectively "DCE" unrolled iterations that are beyond the tripcount
   // and will never be executed.
@@ -153,17 +155,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   }
 
   if (CompletelyUnroll) {
-    DOUT << "COMPLETELY UNROLLING loop %" << Header->getName()
-         << " with trip count " << TripCount << "!\n";
+    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+          << " with trip count " << TripCount << "!\n");
   } else {
-    DOUT << "UNROLLING loop %" << Header->getName()
-         << " by " << Count;
+    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
+          << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
-      DOUT << " with a breakout at trip " << BreakoutTrip;
+      DEBUG(errs() << " with a breakout at trip " << BreakoutTrip);
     } else if (TripMultiple != 1) {
-      DOUT << " with " << TripMultiple << " trips per branch";
+      DEBUG(errs() << " with " << TripMultiple << " trips per branch");
     }
-    DOUT << "!\n";
+    DEBUG(errs() << "!\n");
   }
 
   std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
@@ -349,7 +351,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
+      else if (Constant *C = ConstantFoldInstruction(Inst, 
+                                                     Header->getContext())) {
         Inst->replaceAllUsesWith(C);
         (*BB)->getInstList().erase(Inst);
       }
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 20b676d0fb8d..2d8332f5252a 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,23 +13,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DerivedTypes.h"  // For getNullValue(Type::Int32Ty)
 #include "llvm/Constants.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instruction.h"
-#include "llvm/MDNode.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {
   Value *&VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
   // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
   // DenseMap.  This includes any recursive calls to MapValue.
 
-  // Global values do not need to be seeded into the ValueMap if they are using
-  // the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V))
+  // Global values and metadata do not need to be seeded into the ValueMap if 
+  // they are using the identity mapping.
+  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MetadataBase>(V))
     return VMSlot = const_cast<Value*>(V);
 
   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
@@ -40,7 +44,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
       for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This array must contain a reference to a global, make a new array
           // and return it.
@@ -51,7 +55,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantArray::get(CA->getType(), Values);
         }
       }
@@ -60,7 +64,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
       for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This struct must contain a reference to a global, make a new struct
           // and return it.
@@ -71,7 +75,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantStruct::get(CS->getType(), Values);
         }
       }
@@ -80,12 +84,12 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       std::vector<Constant*> Ops;
       for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-        Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+        Ops.push_back(cast<Constant>(MapValue(*i, VM, Context)));
       return VM[V] = CE->getWithOperands(Ops);
     } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
       for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This vector value must contain a reference to a global, make a new
           // vector constant and return it.
@@ -96,38 +100,16 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantVector::get(Values);
         }
       }
       return VM[V] = C;
       
-    } else if (MDNode *N = dyn_cast<MDNode>(C)) {
-      for (MDNode::const_elem_iterator b = N->elem_begin(), i = b,
-             e = N->elem_end(); i != e; ++i) {
-        if (!*i) continue;
-
-        Value *MV = MapValue(*i, VM);
-        if (MV != *i) {
-          // This MDNode must contain a reference to a global, make a new MDNode
-          // and return it.
-	  SmallVector<Value*, 8> Values;
-          Values.reserve(N->getNumElements());
-          for (MDNode::const_elem_iterator j = b; j != i; ++j)
-            Values.push_back(*j);
-          Values.push_back(MV);
-          for (++i; i != e; ++i)
-            Values.push_back(MapValue(*i, VM));
-          return VM[V] = MDNode::get(Values.data(), Values.size());
-        }
-      }
-      return VM[V] = C;
-
     } else {
-      assert(0 && "Unknown type of constant!");
+      llvm_unreachable("Unknown type of constant!");
     }
   }
-
   return 0;
 }
 
@@ -136,7 +118,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
 ///
 void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, ValueMap);
+    Value *V = MapValue(*op, ValueMap, I->getParent()->getContext());
     assert(V && "Referenced value not in value map!");
     *op = V;
   }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index cbf7070d17ed..b5ae81b50f97 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -23,7 +23,8 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/Instruction.h"
 #include "llvm/Instructions.h"
-#include "llvm/MDNode.h"
+#include "llvm/Operator.h"
+#include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/TypeSymbolTable.h"
@@ -31,8 +32,10 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
 #include <algorithm>
 #include <cctype>
 #include <map>
@@ -48,15 +51,15 @@ AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
 static const Module *getModuleFromVal(const Value *V) {
   if (const Argument *MA = dyn_cast<Argument>(V))
     return MA->getParent() ? MA->getParent()->getParent() : 0;
-  
+
   if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
     return BB->getParent() ? BB->getParent()->getParent() : 0;
-  
+
   if (const Instruction *I = dyn_cast<Instruction>(V)) {
     const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
     return M ? M->getParent() : 0;
   }
-  
+
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
     return GV->getParent();
   return 0;
@@ -64,10 +67,10 @@ static const Module *getModuleFromVal(const Value *V) {
 
 // PrintEscapedString - Print each character of the specified string, escaping
 // it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const char *Str, unsigned Length,
+static void PrintEscapedString(const StringRef &Name,
                                raw_ostream &Out) {
-  for (unsigned i = 0; i != Length; ++i) {
-    unsigned char C = Str[i];
+  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+    unsigned char C = Name[i];
     if (isprint(C) && C != '\\' && C != '"')
       Out << C;
     else
@@ -75,12 +78,6 @@ static void PrintEscapedString(const char *Str, unsigned Length,
   }
 }
 
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
-  PrintEscapedString(Str.c_str(), Str.size(), Out);
-}
-
 enum PrefixType {
   GlobalPrefix,
   LabelPrefix,
@@ -91,39 +88,39 @@ enum PrefixType {
 /// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
 /// prefixed with % (if the string only contains simple characters) or is
 /// surrounded with ""'s (if it has special chars in it).  Print it out.
-static void PrintLLVMName(raw_ostream &OS, const char *NameStr,
-                          unsigned NameLen, PrefixType Prefix) {
-  assert(NameStr && "Cannot get empty name!");
+static void PrintLLVMName(raw_ostream &OS, const StringRef &Name,
+                          PrefixType Prefix) {
+  assert(Name.data() && "Cannot get empty name!");
   switch (Prefix) {
-  default: assert(0 && "Bad prefix!");
+  default: llvm_unreachable("Bad prefix!");
   case NoPrefix: break;
   case GlobalPrefix: OS << '@'; break;
   case LabelPrefix:  break;
   case LocalPrefix:  OS << '%'; break;
   }
-  
+
   // Scan the name to see if it needs quotes first.
-  bool NeedsQuotes = isdigit(NameStr[0]);
+  bool NeedsQuotes = isdigit(Name[0]);
   if (!NeedsQuotes) {
-    for (unsigned i = 0; i != NameLen; ++i) {
-      char C = NameStr[i];
+    for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+      char C = Name[i];
       if (!isalnum(C) && C != '-' && C != '.' && C != '_') {
         NeedsQuotes = true;
         break;
       }
     }
   }
-  
+
   // If we didn't need any quotes, just write out the name in one blast.
   if (!NeedsQuotes) {
-    OS.write(NameStr, NameLen);
+    OS << Name;
     return;
   }
-  
+
   // Okay, we need quotes.  Output the quotes and escape any scary characters as
   // needed.
   OS << '"';
-  PrintEscapedString(NameStr, NameLen, OS);
+  PrintEscapedString(Name, OS);
   OS << '"';
 }
 
@@ -131,7 +128,7 @@ static void PrintLLVMName(raw_ostream &OS, const char *NameStr,
 /// prefixed with % (if the string only contains simple characters) or is
 /// surrounded with ""'s (if it has special chars in it).  Print it out.
 static void PrintLLVMName(raw_ostream &OS, const Value *V) {
-  PrintLLVMName(OS, V->getNameStart(), V->getNameLen(),
+  PrintLLVMName(OS, V->getName(),
                 isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
 }
 
@@ -178,11 +175,11 @@ void TypePrinting::CalcTypeName(const Type *Ty,
       return;
     }
   }
-  
+
   // Check to see if the Type is already on the stack...
   unsigned Slot = 0, CurSize = TypeStack.size();
   while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type
-  
+
   // This is another base case for the recursion.  In this case, we know
   // that we have looped back to a type that we have previously visited.
   // Generate the appropriate upreference to handle this.
@@ -190,9 +187,9 @@ void TypePrinting::CalcTypeName(const Type *Ty,
     OS << '\\' << unsigned(CurSize-Slot);     // Here's the upreference
     return;
   }
-  
+
   TypeStack.push_back(Ty);    // Recursive case: Add us to the stack..
-  
+
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:      OS << "void"; break;
   case Type::FloatTyID:     OS << "float"; break;
@@ -205,7 +202,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
   case Type::IntegerTyID:
     OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
     break;
-      
+
   case Type::FunctionTyID: {
     const FunctionType *FTy = cast<FunctionType>(Ty);
     CalcTypeName(FTy->getReturnType(), TypeStack, OS);
@@ -269,7 +266,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
     OS << "<unrecognized-type>";
     break;
   }
-  
+
   TypeStack.pop_back();       // Remove self from stack.
 }
 
@@ -287,13 +284,13 @@ void TypePrinting::print(const Type *Ty, raw_ostream &OS,
       return;
     }
   }
-  
+
   // Otherwise we have a type that has not been named but is a derived type.
   // Carefully recurse the type hierarchy to print out any contained symbolic
   // names.
   SmallVector<const Type *, 16> TypeStack;
   std::string TypeName;
-  
+
   raw_string_ostream TypeOS(TypeName);
   CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName);
   OS << TypeOS.str();
@@ -309,13 +306,13 @@ namespace {
     // objects, we keep several helper maps.
     DenseSet<const Value*> VisitedConstants;
     DenseSet<const Type*> VisitedTypes;
-    
+
     TypePrinting &TP;
     std::vector<const Type*> &NumberedTypes;
   public:
     TypeFinder(TypePrinting &tp, std::vector<const Type*> &numberedTypes)
       : TP(tp), NumberedTypes(numberedTypes) {}
-    
+
     void Run(const Module &M) {
       // Get types from the type symbol table.  This gets opaque types referened
       // only through derived named types.
@@ -323,7 +320,7 @@ namespace {
       for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
            TI != E; ++TI)
         IncorporateType(TI->second);
-      
+
       // Get types from global variables.
       for (Module::const_global_iterator I = M.global_begin(),
            E = M.global_end(); I != E; ++I) {
@@ -331,18 +328,18 @@ namespace {
         if (I->hasInitializer())
           IncorporateValue(I->getInitializer());
       }
-      
+
       // Get types from aliases.
       for (Module::const_alias_iterator I = M.alias_begin(),
            E = M.alias_end(); I != E; ++I) {
         IncorporateType(I->getType());
         IncorporateValue(I->getAliasee());
       }
-      
+
       // Get types from functions.
       for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
         IncorporateType(FI->getType());
-        
+
         for (Function::const_iterator BB = FI->begin(), E = FI->end();
              BB != E;++BB)
           for (BasicBlock::const_iterator II = BB->begin(),
@@ -356,40 +353,40 @@ namespace {
           }
       }
     }
-    
+
   private:
     void IncorporateType(const Type *Ty) {
       // Check to see if we're already visited this type.
       if (!VisitedTypes.insert(Ty).second)
         return;
-      
+
       // If this is a structure or opaque type, add a name for the type.
       if (((isa<StructType>(Ty) && cast<StructType>(Ty)->getNumElements())
             || isa<OpaqueType>(Ty)) && !TP.hasTypeName(Ty)) {
         TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
         NumberedTypes.push_back(Ty);
       }
-      
+
       // Recursively walk all contained types.
       for (Type::subtype_iterator I = Ty->subtype_begin(),
            E = Ty->subtype_end(); I != E; ++I)
-        IncorporateType(*I);      
+        IncorporateType(*I);
     }
-    
+
     /// IncorporateValue - This method is used to walk operand lists finding
     /// types hiding in constant expressions and other operands that won't be
     /// walked in other ways.  GlobalValues, basic blocks, instructions, and
     /// inst operands are all explicitly enumerated.
     void IncorporateValue(const Value *V) {
       if (V == 0 || !isa<Constant>(V) || isa<GlobalValue>(V)) return;
-      
+
       // Already visited?
       if (!VisitedConstants.insert(V).second)
         return;
-      
+
       // Check this type.
       IncorporateType(V->getType());
-      
+
       // Look in operands for types.
       const Constant *C = cast<Constant>(V);
       for (Constant::const_op_iterator I = C->op_begin(),
@@ -403,18 +400,18 @@ namespace {
 /// AddModuleTypesToPrinter - Add all of the symbolic type names for types in
 /// the specified module to the TypePrinter and all numbered types to it and the
 /// NumberedTypes table.
-static void AddModuleTypesToPrinter(TypePrinting &TP, 
+static void AddModuleTypesToPrinter(TypePrinting &TP,
                                     std::vector<const Type*> &NumberedTypes,
                                     const Module *M) {
   if (M == 0) return;
-  
+
   // If the module has a symbol table, take all global types and stuff their
   // names into the TypeNames map.
   const TypeSymbolTable &ST = M->getTypeSymbolTable();
   for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
        TI != E; ++TI) {
     const Type *Ty = cast<Type>(TI->second);
-    
+
     // As a heuristic, don't insert pointer to primitive types, because
     // they are used too often to have a single useful name.
     if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
@@ -423,18 +420,20 @@ static void AddModuleTypesToPrinter(TypePrinting &TP,
           !isa<OpaqueType>(PETy))
         continue;
     }
-    
+
     // Likewise don't insert primitives either.
     if (Ty->isInteger() || Ty->isPrimitiveType())
       continue;
-    
+
     // Get the name as a string and insert it into TypeNames.
     std::string NameStr;
-    raw_string_ostream NameOS(NameStr);
-    PrintLLVMName(NameOS, TI->first.c_str(), TI->first.length(), LocalPrefix);
-    TP.addTypeName(Ty, NameOS.str());
+    raw_string_ostream NameROS(NameStr);
+    formatted_raw_ostream NameOS(NameROS);
+    PrintLLVMName(NameOS, TI->first, LocalPrefix);
+    NameOS.flush();
+    TP.addTypeName(Ty, NameStr);
   }
-  
+
   // Walk the entire module to find references to unnamed structure and opaque
   // types.  This is required for correctness by opaque types (because multiple
   // uses of an unnamed opaque type needs to be referred to by the same ID) and
@@ -464,35 +463,49 @@ namespace {
 ///
 class SlotTracker {
 public:
-  /// ValueMap - A mapping of Values to slot numbers
+  /// ValueMap - A mapping of Values to slot numbers.
   typedef DenseMap<const Value*, unsigned> ValueMap;
-  
-private:  
-  /// TheModule - The module for which we are holding slot numbers
+
+private:
+  /// TheModule - The module for which we are holding slot numbers.
   const Module* TheModule;
-  
-  /// TheFunction - The function for which we are holding slot numbers
+
+  /// TheFunction - The function for which we are holding slot numbers.
   const Function* TheFunction;
   bool FunctionProcessed;
-  
-  /// mMap - The TypePlanes map for the module level data
+
+  /// TheMDNode - The MDNode for which we are holding slot numbers.
+  const MDNode *TheMDNode;
+
+  /// TheNamedMDNode - The MDNode for which we are holding slot numbers.
+  const NamedMDNode *TheNamedMDNode;
+
+  /// mMap - The TypePlanes map for the module level data.
   ValueMap mMap;
   unsigned mNext;
-  
-  /// fMap - The TypePlanes map for the function level data
+
+  /// fMap - The TypePlanes map for the function level data.
   ValueMap fMap;
   unsigned fNext;
-  
+
+  /// mdnMap - Map for MDNodes.
+  ValueMap mdnMap;
+  unsigned mdnNext;
 public:
   /// Construct from a module
   explicit SlotTracker(const Module *M);
   /// Construct from a function, starting out in incorp state.
   explicit SlotTracker(const Function *F);
+  /// Construct from a mdnode.
+  explicit SlotTracker(const MDNode *N);
+  /// Construct from a named mdnode.
+  explicit SlotTracker(const NamedMDNode *N);
 
   /// Return the slot number of the specified value in it's type
   /// plane.  If something is not in the SlotTracker, return -1.
   int getLocalSlot(const Value *V);
   int getGlobalSlot(const GlobalValue *V);
+  int getMetadataSlot(const MDNode *N);
 
   /// If you'd like to deal with a function instead of just a module, use
   /// this method to get its data into the SlotTracker.
@@ -506,14 +519,23 @@ public:
   /// will reset the state of the machine back to just the module contents.
   void purgeFunction();
 
-  // Implementation Details
-private:
+  /// MDNode map iterators.
+  ValueMap::iterator mdnBegin() { return mdnMap.begin(); }
+  ValueMap::iterator mdnEnd() { return mdnMap.end(); }
+  unsigned mdnSize() const { return mdnMap.size(); }
+  bool mdnEmpty() const { return mdnMap.empty(); }
+
   /// This function does the actual initialization.
   inline void initialize();
 
+  // Implementation Details
+private:
   /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
   void CreateModuleSlot(const GlobalValue *V);
-  
+
+  /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
+  void CreateMetadataSlot(const MDNode *N);
+
   /// CreateFunctionSlot - Insert the specified Value* into the slot table.
   void CreateFunctionSlot(const Value *V);
 
@@ -521,9 +543,15 @@ private:
   /// and function declarations, but not the contents of those functions.
   void processModule();
 
-  /// Add all of the functions arguments, basic blocks, and instructions
+  /// Add all of the functions arguments, basic blocks, and instructions.
   void processFunction();
 
+  /// Add all MDNode operands.
+  void processMDNode();
+
+  /// Add all MDNode operands.
+  void processNamedMDNode();
+
   SlotTracker(const SlotTracker &);  // DO NOT IMPLEMENT
   void operator=(const SlotTracker &);  // DO NOT IMPLEMENT
 };
@@ -534,27 +562,27 @@ private:
 static SlotTracker *createSlotTracker(const Value *V) {
   if (const Argument *FA = dyn_cast<Argument>(V))
     return new SlotTracker(FA->getParent());
-  
+
   if (const Instruction *I = dyn_cast<Instruction>(V))
     return new SlotTracker(I->getParent()->getParent());
-  
+
   if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
     return new SlotTracker(BB->getParent());
-  
+
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     return new SlotTracker(GV->getParent());
-  
+
   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
-    return new SlotTracker(GA->getParent());    
-  
+    return new SlotTracker(GA->getParent());
+
   if (const Function *Func = dyn_cast<Function>(V))
     return new SlotTracker(Func);
-  
+
   return 0;
 }
 
 #if 0
-#define ST_DEBUG(X) cerr << X
+#define ST_DEBUG(X) errs() << X
 #else
 #define ST_DEBUG(X)
 #endif
@@ -562,14 +590,27 @@ static SlotTracker *createSlotTracker(const Value *V) {
 // Module level constructor. Causes the contents of the Module (sans functions)
 // to be added to the slot table.
 SlotTracker::SlotTracker(const Module *M)
-  : TheModule(M), TheFunction(0), FunctionProcessed(false), mNext(0), fNext(0) {
+  : TheModule(M), TheFunction(0), FunctionProcessed(false), TheMDNode(0),
+    TheNamedMDNode(0), mNext(0), fNext(0),  mdnNext(0) {
 }
 
 // Function level constructor. Causes the contents of the Module and the one
 // function provided to be added to the slot table.
 SlotTracker::SlotTracker(const Function *F)
   : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
-    mNext(0), fNext(0) {
+    TheMDNode(0), TheNamedMDNode(0), mNext(0), fNext(0), mdnNext(0) {
+}
+
+// Constructor to handle single MDNode.
+SlotTracker::SlotTracker(const MDNode *C)
+  : TheModule(0), TheFunction(0), FunctionProcessed(false), TheMDNode(C),
+    TheNamedMDNode(0), mNext(0), fNext(0),  mdnNext(0) {
+}
+
+// Constructor to handle single NamedMDNode.
+SlotTracker::SlotTracker(const NamedMDNode *N)
+  : TheModule(0), TheFunction(0), FunctionProcessed(false), TheMDNode(0),
+    TheNamedMDNode(N), mNext(0), fNext(0),  mdnNext(0) {
 }
 
 inline void SlotTracker::initialize() {
@@ -577,60 +618,120 @@ inline void SlotTracker::initialize() {
     processModule();
     TheModule = 0; ///< Prevent re-processing next time we're called.
   }
-  
+
   if (TheFunction && !FunctionProcessed)
     processFunction();
+
+  if (TheMDNode)
+    processMDNode();
+
+  if (TheNamedMDNode)
+    processNamedMDNode();
 }
 
 // Iterate through all the global variables, functions, and global
 // variable initializers and create slots for them.
 void SlotTracker::processModule() {
   ST_DEBUG("begin processModule!\n");
-  
+
   // Add all of the unnamed global variables to the value table.
   for (Module::const_global_iterator I = TheModule->global_begin(),
-       E = TheModule->global_end(); I != E; ++I)
-    if (!I->hasName()) 
+         E = TheModule->global_end(); I != E; ++I) {
+    if (!I->hasName())
       CreateModuleSlot(I);
-  
+    if (I->hasInitializer()) {
+      if (MDNode *N = dyn_cast<MDNode>(I->getInitializer()))
+        CreateMetadataSlot(N);
+    }
+  }
+
+  // Add metadata used by named metadata.
+  for (Module::const_named_metadata_iterator
+         I = TheModule->named_metadata_begin(),
+         E = TheModule->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+      MDNode *MD = dyn_cast_or_null<MDNode>(NMD->getElement(i));
+      if (MD)
+        CreateMetadataSlot(MD);
+    }
+  }
+
   // Add all the unnamed functions to the table.
   for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
        I != E; ++I)
     if (!I->hasName())
       CreateModuleSlot(I);
-  
+
   ST_DEBUG("end processModule!\n");
 }
 
-
 // Process the arguments, basic blocks, and instructions  of a function.
 void SlotTracker::processFunction() {
   ST_DEBUG("begin processFunction!\n");
   fNext = 0;
-  
+
   // Add all the function arguments with no names.
   for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
       AE = TheFunction->arg_end(); AI != AE; ++AI)
     if (!AI->hasName())
       CreateFunctionSlot(AI);
-  
+
   ST_DEBUG("Inserting Instructions:\n");
-  
+
+  MetadataContext &TheMetadata = TheFunction->getContext().getMetadata();
+
   // Add all of the basic blocks and instructions with no names.
   for (Function::const_iterator BB = TheFunction->begin(),
        E = TheFunction->end(); BB != E; ++BB) {
     if (!BB->hasName())
       CreateFunctionSlot(BB);
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (I->getType() != Type::VoidTy && !I->hasName())
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+         ++I) {
+      if (I->getType() != Type::getVoidTy(TheFunction->getContext()) &&
+          !I->hasName())
         CreateFunctionSlot(I);
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+        if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+          CreateMetadataSlot(N);
+
+      // Process metadata attached with this instruction.
+      const MetadataContext::MDMapTy *MDs = TheMetadata.getMDs(I);
+      if (MDs)
+        for (MetadataContext::MDMapTy::const_iterator MI = MDs->begin(),
+               ME = MDs->end(); MI != ME; ++MI)
+          if (MDNode *MDN = dyn_cast_or_null<MDNode>(MI->second))
+            CreateMetadataSlot(MDN);
+    }
   }
-  
+
   FunctionProcessed = true;
-  
+
   ST_DEBUG("end processFunction!\n");
 }
 
+/// processMDNode - Process TheMDNode.
+void SlotTracker::processMDNode() {
+  ST_DEBUG("begin processMDNode!\n");
+  mdnNext = 0;
+  CreateMetadataSlot(TheMDNode);
+  TheMDNode = 0;
+  ST_DEBUG("end processMDNode!\n");
+}
+
+/// processNamedMDNode - Process TheNamedMDNode.
+void SlotTracker::processNamedMDNode() {
+  ST_DEBUG("begin processNamedMDNode!\n");
+  mdnNext = 0;
+  for (unsigned i = 0, e = TheNamedMDNode->getNumElements(); i != e; ++i) {
+    MDNode *MD = dyn_cast_or_null<MDNode>(TheNamedMDNode->getElement(i));
+    if (MD)
+      CreateMetadataSlot(MD);
+  }
+  TheNamedMDNode = 0;
+  ST_DEBUG("end processNamedMDNode!\n");
+}
+
 /// Clean up after incorporating a function. This is the only way to get out of
 /// the function incorporation state that affects get*Slot/Create*Slot. Function
 /// incorporation state is indicated by TheFunction != 0.
@@ -646,20 +747,30 @@ void SlotTracker::purgeFunction() {
 int SlotTracker::getGlobalSlot(const GlobalValue *V) {
   // Check for uninitialized state and do lazy initialization.
   initialize();
-  
+
   // Find the type plane in the module map
   ValueMap::iterator MI = mMap.find(V);
   return MI == mMap.end() ? -1 : (int)MI->second;
 }
 
+/// getGlobalSlot - Get the slot number of a MDNode.
+int SlotTracker::getMetadataSlot(const MDNode *N) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the type plane in the module map
+  ValueMap::iterator MI = mdnMap.find(N);
+  return MI == mdnMap.end() ? -1 : (int)MI->second;
+}
+
 
 /// getLocalSlot - Get the slot number for a value that is local to a function.
 int SlotTracker::getLocalSlot(const Value *V) {
   assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
-  
+
   // Check for uninitialized state and do lazy initialization.
   initialize();
-  
+
   ValueMap::iterator FI = fMap.find(V);
   return FI == fMap.end() ? -1 : (int)FI->second;
 }
@@ -668,12 +779,13 @@ int SlotTracker::getLocalSlot(const Value *V) {
 /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
 void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
   assert(V && "Can't insert a null Value into SlotTracker!");
-  assert(V->getType() != Type::VoidTy && "Doesn't need a slot!");
+  assert(V->getType() != Type::getVoidTy(V->getContext()) &&
+         "Doesn't need a slot!");
   assert(!V->hasName() && "Doesn't need a slot!");
-  
+
   unsigned DestSlot = mNext++;
   mMap[V] = DestSlot;
-  
+
   ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
            DestSlot << " [");
   // G = Global, F = Function, A = Alias, o = other
@@ -682,28 +794,45 @@ void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
              (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
 }
 
-
 /// CreateSlot - Create a new slot for the specified value if it has no name.
 void SlotTracker::CreateFunctionSlot(const Value *V) {
-  assert(V->getType() != Type::VoidTy && !V->hasName() &&
-         "Doesn't need a slot!");
-  
+  assert(V->getType() != Type::getVoidTy(TheFunction->getContext()) &&
+         !V->hasName() && "Doesn't need a slot!");
+
   unsigned DestSlot = fNext++;
   fMap[V] = DestSlot;
-  
+
   // G = Global, F = Function, o = other
   ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
            DestSlot << " [o]\n");
-}  
+}
 
+/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
+void SlotTracker::CreateMetadataSlot(const MDNode *N) {
+  assert(N && "Can't insert a null Value into SlotTracker!");
+
+  ValueMap::iterator I = mdnMap.find(N);
+  if (I != mdnMap.end())
+    return;
 
+  unsigned DestSlot = mdnNext++;
+  mdnMap[N] = DestSlot;
+
+  for (MDNode::const_elem_iterator MDI = N->elem_begin(),
+         MDE = N->elem_end(); MDI != MDE; ++MDI) {
+    const Value *TV = *MDI;
+    if (TV)
+      if (const MDNode *N2 = dyn_cast<MDNode>(TV))
+        CreateMetadataSlot(N2);
+  }
+}
 
 //===----------------------------------------------------------------------===//
 // AsmWriter Implementation
 //===----------------------------------------------------------------------===//
 
 static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
-                                   TypePrinting &TypePrinter,
+                                   TypePrinting *TypePrinter,
                                    SlotTracker *Machine);
 
 
@@ -741,17 +870,93 @@ static const char *getPredicateText(unsigned predicate) {
   return pred;
 }
 
+static void WriteMDNodeComment(const MDNode *Node,
+			       formatted_raw_ostream &Out) {
+  if (Node->getNumElements() < 1)
+    return;
+  ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getElement(0));
+  if (!CI) return;
+  unsigned Val = CI->getZExtValue();
+  unsigned Tag = Val & ~LLVMDebugVersionMask;
+  if (Val >= LLVMDebugVersion) {
+    if (Tag == dwarf::DW_TAG_auto_variable)
+      Out << "; [ DW_TAG_auto_variable ]";
+    else if (Tag == dwarf::DW_TAG_arg_variable)
+      Out << "; [ DW_TAG_arg_variable ]";
+    else if (Tag == dwarf::DW_TAG_return_variable)
+      Out << "; [ DW_TAG_return_variable ]";
+    else if (Tag == dwarf::DW_TAG_vector_type)
+      Out << "; [ DW_TAG_vector_type ]";
+    else if (Tag == dwarf::DW_TAG_user_base)
+      Out << "; [ DW_TAG_user_base ]";
+    else
+      Out << "; [" << dwarf::TagString(Tag) << " ]";
+  }
+}
+
+static void WriteMDNodes(formatted_raw_ostream &Out, TypePrinting &TypePrinter,
+                         SlotTracker &Machine) {
+  SmallVector<const MDNode *, 16> Nodes;
+  Nodes.resize(Machine.mdnSize());
+  for (SlotTracker::ValueMap::iterator I =
+         Machine.mdnBegin(), E = Machine.mdnEnd(); I != E; ++I)
+    Nodes[I->second] = cast<MDNode>(I->first);
+
+  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+    Out << '!' << i << " = metadata ";
+    const MDNode *Node = Nodes[i];
+    Out << "!{";
+    for (MDNode::const_elem_iterator NI = Node->elem_begin(),
+           NE = Node->elem_end(); NI != NE;) {
+      const Value *V = *NI;
+      if (!V)
+        Out << "null";
+      else if (const MDNode *N = dyn_cast<MDNode>(V)) {
+        Out << "metadata ";
+        Out << '!' << Machine.getMetadataSlot(N);
+      }
+      else {
+        TypePrinter.print((*NI)->getType(), Out);
+        Out << ' ';
+        WriteAsOperandInternal(Out, *NI, &TypePrinter, &Machine);
+      }
+      if (++NI != NE)
+        Out << ", ";
+    }
+
+    Out << "}";
+    WriteMDNodeComment(Node, Out);
+    Out << "\n";
+  }
+}
+
+static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(U)) {
+    if (OBO->hasNoUnsignedWrap())
+      Out << " nuw";
+    if (OBO->hasNoSignedWrap())
+      Out << " nsw";
+  } else if (const SDivOperator *Div = dyn_cast<SDivOperator>(U)) {
+    if (Div->isExact())
+      Out << " exact";
+  } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+    if (GEP->isInBounds())
+      Out << " inbounds";
+  }
+}
+
 static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
                              TypePrinting &TypePrinter, SlotTracker *Machine) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (CI->getType() == Type::Int1Ty) {
+    if (CI->getType() == Type::getInt1Ty(CV->getContext())) {
       Out << (CI->getZExtValue() ? "true" : "false");
       return;
     }
     Out << CI->getValue();
     return;
   }
-  
+
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
     if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
         &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
@@ -789,14 +994,14 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
       APFloat apf = CFP->getValueAPF();
       // Floats are represented in ASCII IR as double, convert.
       if (!isDouble)
-        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, 
+        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
                           &ignored);
-      Out << "0x" << 
-              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), 
+      Out << "0x" <<
+              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
                             Buffer+40);
       return;
     }
-    
+
     // Some form of long double.  These appear as a magic letter identifying
     // the type, then a fixed number of hex digits.
     Out << "0x";
@@ -827,7 +1032,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble)
       Out << 'M';
     else
-      assert(0 && "Unsupported floating point type");
+      llvm_unreachable("Unsupported floating point type");
     // api needed to prevent premature destruction
     APInt api = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t* p = api.getRawData();
@@ -849,12 +1054,12 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     }
     return;
   }
-  
+
   if (isa<ConstantAggregateZero>(CV)) {
     Out << "zeroinitializer";
     return;
   }
-  
+
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
     // As a special case, print the array as a string if it is an array of
     // i8 with ConstantInt values.
@@ -870,19 +1075,19 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
         TypePrinter.print(ETy, Out);
         Out << ' ';
         WriteAsOperandInternal(Out, CA->getOperand(0),
-                               TypePrinter, Machine);
+                               &TypePrinter, Machine);
         for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
           Out << ", ";
           TypePrinter.print(ETy, Out);
           Out << ' ';
-          WriteAsOperandInternal(Out, CA->getOperand(i), TypePrinter, Machine);
+          WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine);
         }
       }
       Out << ']';
     }
     return;
   }
-  
+
   if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
     if (CS->getType()->isPacked())
       Out << '<';
@@ -893,24 +1098,24 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
       TypePrinter.print(CS->getOperand(0)->getType(), Out);
       Out << ' ';
 
-      WriteAsOperandInternal(Out, CS->getOperand(0), TypePrinter, Machine);
+      WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine);
 
       for (unsigned i = 1; i < N; i++) {
         Out << ", ";
         TypePrinter.print(CS->getOperand(i)->getType(), Out);
         Out << ' ';
 
-        WriteAsOperandInternal(Out, CS->getOperand(i), TypePrinter, Machine);
+        WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine);
       }
       Out << ' ';
     }
- 
+
     Out << '}';
     if (CS->getType()->isPacked())
       Out << '>';
     return;
   }
-  
+
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     const Type *ETy = CP->getType()->getElementType();
     assert(CP->getNumOperands() > 0 &&
@@ -918,36 +1123,35 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     Out << '<';
     TypePrinter.print(ETy, Out);
     Out << ' ';
-    WriteAsOperandInternal(Out, CP->getOperand(0), TypePrinter, Machine);
+    WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine);
     for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
       Out << ", ";
       TypePrinter.print(ETy, Out);
       Out << ' ';
-      WriteAsOperandInternal(Out, CP->getOperand(i), TypePrinter, Machine);
+      WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine);
     }
     Out << '>';
     return;
   }
-  
+
   if (isa<ConstantPointerNull>(CV)) {
     Out << "null";
     return;
   }
-  
+
   if (isa<UndefValue>(CV)) {
     Out << "undef";
     return;
   }
-  
-  if (const MDString *S = dyn_cast<MDString>(CV)) {
-    Out << "!\"";
-    PrintEscapedString(S->begin(), S->size(), Out);
-    Out << '"';
+
+  if (const MDNode *Node = dyn_cast<MDNode>(CV)) {
+    Out << "!" << Machine->getMetadataSlot(Node);
     return;
   }
 
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     Out << CE->getOpcodeName();
+    WriteOptimizationInfo(Out, CE);
     if (CE->isCompare())
       Out << ' ' << getPredicateText(CE->getPredicate());
     Out << " (";
@@ -955,7 +1159,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
       TypePrinter.print((*OI)->getType(), Out);
       Out << ' ';
-      WriteAsOperandInternal(Out, *OI, TypePrinter, Machine);
+      WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine);
       if (OI+1 != CE->op_end())
         Out << ", ";
     }
@@ -974,7 +1178,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     Out << ')';
     return;
   }
-  
+
   Out << "<placeholder or erroneous Constant>";
 }
 
@@ -984,23 +1188,26 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
 /// the whole instruction that generated it.
 ///
 static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
-                                   TypePrinting &TypePrinter,
+                                   TypePrinting *TypePrinter,
                                    SlotTracker *Machine) {
   if (V->hasName()) {
     PrintLLVMName(Out, V);
     return;
   }
-  
+
   const Constant *CV = dyn_cast<Constant>(V);
   if (CV && !isa<GlobalValue>(CV)) {
-    WriteConstantInt(Out, CV, TypePrinter, Machine);
+    assert(TypePrinter && "Constants require TypePrinting!");
+    WriteConstantInt(Out, CV, *TypePrinter, Machine);
     return;
   }
-  
+
   if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
     Out << "asm ";
     if (IA->hasSideEffects())
       Out << "sideeffect ";
+    if (IA->isMsAsm())
+      Out << "msasm ";
     Out << '"';
     PrintEscapedString(IA->getAsmString(), Out);
     Out << "\", \"";
@@ -1008,7 +1215,24 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
     Out << '"';
     return;
   }
-  
+
+  if (const MDNode *N = dyn_cast<MDNode>(V)) {
+    Out << '!' << Machine->getMetadataSlot(N);
+    return;
+  }
+
+  if (const MDString *MDS = dyn_cast<MDString>(V)) {
+    Out << "!\"";
+    PrintEscapedString(MDS->getString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (V->getValueID() == Value::PseudoSourceValueVal) {
+    V->print(Out);
+    return;
+  }
+
   char Prefix = '%';
   int Slot;
   if (Machine) {
@@ -1027,30 +1251,29 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
       } else {
         Slot = Machine->getLocalSlot(V);
       }
+      delete Machine;
     } else {
       Slot = -1;
     }
-    delete Machine;
   }
-  
+
   if (Slot != -1)
     Out << Prefix << Slot;
   else
     Out << "<badref>";
 }
 
-/// WriteAsOperand - Write the name of the specified value out to the specified
-/// ostream.  This can be useful when you just want to print int %reg126, not
-/// the whole instruction that generated it.
-///
-void llvm::WriteAsOperand(std::ostream &Out, const Value *V, bool PrintType,
-                          const Module *Context) {
-  raw_os_ostream OS(Out);
-  WriteAsOperand(OS, V, PrintType, Context);
-}
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
+                          bool PrintType, const Module *Context) {
+
+  // Fast path: Don't construct and populate a TypePrinting object if we
+  // won't be needing any types printed.
+  if (!PrintType &&
+      (!isa<Constant>(V) || V->hasName() || isa<GlobalValue>(V))) {
+    WriteAsOperandInternal(Out, V, 0, 0);
+    return;
+  }
 
-void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, bool PrintType,
-                          const Module *Context) {
   if (Context == 0) Context = getModuleFromVal(V);
 
   TypePrinting TypePrinter;
@@ -1061,32 +1284,40 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, bool PrintType,
     Out << ' ';
   }
 
-  WriteAsOperandInternal(Out, V, TypePrinter, 0);
+  WriteAsOperandInternal(Out, V, &TypePrinter, 0);
 }
 
-
 namespace {
 
 class AssemblyWriter {
-  raw_ostream &Out;
+  formatted_raw_ostream &Out;
   SlotTracker &Machine;
   const Module *TheModule;
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
   std::vector<const Type*> NumberedTypes;
+  DenseMap<unsigned, const char *> MDNames;
 
-  // Each MDNode is assigned unique MetadataIDNo.
-  std::map<const MDNode *, unsigned> MDNodes;
-  unsigned MetadataIDNo;
 public:
-  inline AssemblyWriter(raw_ostream &o, SlotTracker &Mac, const Module *M,
+  inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+                        const Module *M,
                         AssemblyAnnotationWriter *AAW)
-    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW), MetadataIDNo(0) {
+    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
     AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
+    // FIXME: Provide MDPrinter
+    if (M) {
+      MetadataContext &TheMetadata = M->getContext().getMetadata();
+      const StringMap<unsigned> *Names = TheMetadata.getHandlerNames();
+      for (StringMapConstIterator<unsigned> I = Names->begin(),
+             E = Names->end(); I != E; ++I) {
+        const StringMapEntry<unsigned> &Entry = *I;
+        MDNames[I->second] = Entry.getKeyData();
+      }
+    }
   }
 
   void write(const Module *M) { printModule(M); }
-  
+
   void write(const GlobalValue *G) {
     if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(G))
       printGlobal(GV);
@@ -1095,17 +1326,14 @@ public:
     else if (const Function *F = dyn_cast<Function>(G))
       printFunction(F);
     else
-      assert(0 && "Unknown global");
+      llvm_unreachable("Unknown global");
   }
-  
+
   void write(const BasicBlock *BB)    { printBasicBlock(BB);  }
   void write(const Instruction *I)    { printInstruction(*I); }
 
   void writeOperand(const Value *Op, bool PrintType);
   void writeParamOperand(const Value *Operand, Attributes Attrs);
-  void printMDNode(const MDNode *Node, bool StandAlone);
-
-  const Module* getModule() { return TheModule; }
 
 private:
   void printModule(const Module *M);
@@ -1132,11 +1360,11 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
       TypePrinter.print(Operand->getType(), Out);
       Out << ' ';
     }
-    WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine);
+    WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine);
   }
 }
 
-void AssemblyWriter::writeParamOperand(const Value *Operand, 
+void AssemblyWriter::writeParamOperand(const Value *Operand,
                                        Attributes Attrs) {
   if (Operand == 0) {
     Out << "<null operand!>";
@@ -1148,7 +1376,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
       Out << ' ' << Attribute::getAsString(Attrs);
     Out << ' ';
     // Print the operand
-    WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine);
+    WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine);
   }
 }
 
@@ -1169,6 +1397,7 @@ void AssemblyWriter::printModule(const Module *M) {
     std::string Asm = M->getModuleInlineAsm();
     size_t CurPos = 0;
     size_t NewLine = Asm.find_first_of('\n', CurPos);
+    Out << '\n';
     while (NewLine != std::string::npos) {
       // We found a newline, print the portion of the asm string from the
       // last newline up to this newline.
@@ -1183,11 +1412,12 @@ void AssemblyWriter::printModule(const Module *M) {
     PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
     Out << "\"\n";
   }
-  
+
   // Loop over the dependent libraries and emit them.
   Module::lib_iterator LI = M->lib_begin();
   Module::lib_iterator LE = M->lib_end();
   if (LI != LE) {
+    Out << '\n';
     Out << "deplibs = [ ";
     while (LI != LE) {
       Out << '"' << *LI << '"';
@@ -1195,16 +1425,19 @@ void AssemblyWriter::printModule(const Module *M) {
       if (LI != LE)
         Out << ", ";
     }
-    Out << " ]\n";
+    Out << " ]";
   }
 
   // Loop over the symbol table, emitting all id'd types.
+  if (!M->getTypeSymbolTable().empty() || !NumberedTypes.empty()) Out << '\n';
   printTypeSymbolTable(M->getTypeSymbolTable());
 
+  // Output all globals.
+  if (!M->global_empty()) Out << '\n';
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I)
     printGlobal(I);
-  
+
   // Output all aliases.
   if (!M->alias_empty()) Out << "\n";
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
@@ -1214,36 +1447,55 @@ void AssemblyWriter::printModule(const Module *M) {
   // Output all of the functions.
   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
     printFunction(I);
+
+  // Output named metadata.
+  if (!M->named_metadata_empty()) Out << '\n';
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+         E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    Out << "!" << NMD->getName() << " = !{";
+    for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+      if (i) Out << ", ";
+      MDNode *MD = dyn_cast_or_null<MDNode>(NMD->getElement(i));
+      Out << '!' << Machine.getMetadataSlot(MD);
+    }
+    Out << "}\n";
+  }
+
+  // Output metadata.
+  if (!Machine.mdnEmpty()) Out << '\n';
+  WriteMDNodes(Out, TypePrinter, Machine);
 }
 
-static void PrintLinkage(GlobalValue::LinkageTypes LT, raw_ostream &Out) {
+static void PrintLinkage(GlobalValue::LinkageTypes LT,
+                         formatted_raw_ostream &Out) {
   switch (LT) {
-  case GlobalValue::PrivateLinkage:     Out << "private "; break;
-  case GlobalValue::InternalLinkage:    Out << "internal "; break;
+  case GlobalValue::ExternalLinkage: break;
+  case GlobalValue::PrivateLinkage:       Out << "private ";        break;
+  case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+  case GlobalValue::InternalLinkage:      Out << "internal ";       break;
+  case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
+  case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
+  case GlobalValue::WeakAnyLinkage:       Out << "weak ";           break;
+  case GlobalValue::WeakODRLinkage:       Out << "weak_odr ";       break;
+  case GlobalValue::CommonLinkage:        Out << "common ";         break;
+  case GlobalValue::AppendingLinkage:     Out << "appending ";      break;
+  case GlobalValue::DLLImportLinkage:     Out << "dllimport ";      break;
+  case GlobalValue::DLLExportLinkage:     Out << "dllexport ";      break;
+  case GlobalValue::ExternalWeakLinkage:  Out << "extern_weak ";    break;
   case GlobalValue::AvailableExternallyLinkage:
     Out << "available_externally ";
     break;
-  case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
-  case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
-  case GlobalValue::WeakAnyLinkage:     Out << "weak "; break;
-  case GlobalValue::WeakODRLinkage:     Out << "weak_odr "; break;
-  case GlobalValue::CommonLinkage:      Out << "common "; break;
-  case GlobalValue::AppendingLinkage:   Out << "appending "; break;
-  case GlobalValue::DLLImportLinkage:   Out << "dllimport "; break;
-  case GlobalValue::DLLExportLinkage:   Out << "dllexport "; break;
-  case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break;
-  case GlobalValue::ExternalLinkage: break;
   case GlobalValue::GhostLinkage:
-    Out << "GhostLinkage not allowed in AsmWriter!\n";
-    abort();
+    llvm_unreachable("GhostLinkage not allowed in AsmWriter!");
   }
 }
 
 
 static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
-                            raw_ostream &Out) {
+                            formatted_raw_ostream &Out) {
   switch (Vis) {
-  default: assert(0 && "Invalid visibility style!");
+  default: llvm_unreachable("Invalid visibility style!");
   case GlobalValue::DefaultVisibility: break;
   case GlobalValue::HiddenVisibility:    Out << "hidden "; break;
   case GlobalValue::ProtectedVisibility: Out << "protected "; break;
@@ -1251,36 +1503,12 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
 }
 
 void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
-  if (GV->hasInitializer())
-    // If GV is initialized using Metadata then separate out metadata
-    // operands used by the initializer. Note, MDNodes are not cyclic.
-    if (MDNode *N = dyn_cast<MDNode>(GV->getInitializer())) {
-      SmallVector<const MDNode *, 4> WorkList;
-      // Collect MDNodes used by the initializer.
-      for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
-	   I != E; ++I) {
-	const Value *TV = *I;
-	if (TV)
-	  if (const MDNode *NN = dyn_cast<MDNode>(TV))
-	    WorkList.push_back(NN);
-      }
-
-      // Print MDNodes used by the initializer.
-      while (!WorkList.empty()) {
-	const MDNode *N = WorkList.back(); WorkList.pop_back();
-	printMDNode(N, true);
-	Out << '\n';
-      }
-    }
-
-  if (GV->hasName()) {
-    PrintLLVMName(Out, GV);
-    Out << " = ";
-  }
+  WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine);
+  Out << " = ";
 
   if (!GV->hasInitializer() && GV->hasExternalLinkage())
     Out << "external ";
-  
+
   PrintLinkage(GV->getLinkage(), Out);
   PrintVisibility(GV->getVisibility(), Out);
 
@@ -1292,12 +1520,9 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
 
   if (GV->hasInitializer()) {
     Out << ' ';
-    if (MDNode *N = dyn_cast<MDNode>(GV->getInitializer()))
-      printMDNode(N, false);
-    else
-      writeOperand(GV->getInitializer(), false);
+    writeOperand(GV->getInitializer(), false);
   }
-    
+
   if (GV->hasSection())
     Out << ", section \"" << GV->getSection() << '"';
   if (GV->getAlignment())
@@ -1307,47 +1532,6 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
   Out << '\n';
 }
 
-void AssemblyWriter::printMDNode(const MDNode *Node,
-				 bool StandAlone) {
-  std::map<const MDNode *, unsigned>::iterator MI = MDNodes.find(Node);
-  // If this node is already printed then just refer it using its Metadata
-  // id number.
-  if (MI != MDNodes.end()) {
-    if (!StandAlone)
-      Out << "!" << MI->second;
-    return;
-  }
-  
-  if (StandAlone) {
-    // Print standalone MDNode.
-    // !42 = !{ ... }
-    Out << "!" << MetadataIDNo << " = ";
-    Out << "constant metadata ";
-  }
-
-  Out << "!{";
-  for (MDNode::const_elem_iterator I = Node->elem_begin(), E = Node->elem_end();
-       I != E;) {
-    const Value *TV = *I;
-    if (!TV)
-      Out << "null";
-    else if (const MDNode *N = dyn_cast<MDNode>(TV)) {
-      TypePrinter.print(N->getType(), Out);
-      Out << ' ';
-      printMDNode(N, StandAlone);
-    }
-    else if (!*I)
-      Out << "null";
-    else 
-      writeOperand(*I, true);
-    if (++I != E)
-      Out << ", ";
-  }
-  Out << "}";
-
-  MDNodes[Node] = MetadataIDNo++;
-}
-
 void AssemblyWriter::printAlias(const GlobalAlias *GA) {
   // Don't crash when dumping partially built GA
   if (!GA->hasName())
@@ -1361,9 +1545,9 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
   Out << "alias ";
 
   PrintLinkage(GA->getLinkage(), Out);
-  
+
   const Constant *Aliasee = GA->getAliasee();
-    
+
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
     TypePrinter.print(GV->getType(), Out);
     Out << ' ';
@@ -1372,7 +1556,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
     TypePrinter.print(F->getFunctionType(), Out);
     Out << "* ";
 
-    WriteAsOperandInternal(Out, F, TypePrinter, &Machine);
+    WriteAsOperandInternal(Out, F, &TypePrinter, &Machine);
   } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
     TypePrinter.print(GA->getType(), Out);
     Out << ' ';
@@ -1385,7 +1569,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
            "Unsupported aliasee");
     writeOperand(CE, false);
   }
-  
+
   printInfoComment(*GA);
   Out << '\n';
 }
@@ -1393,19 +1577,18 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
 void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) {
   // Emit all numbered types.
   for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
-    Out << "\ttype ";
-    
+    Out << '%' << i << " = type ";
+
     // Make sure we print out at least one level of the type structure, so
     // that we do not get %2 = type %2
     TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out);
-    Out << "\t\t; type %" << i << '\n';
+    Out << '\n';
   }
-  
+
   // Print the named types.
   for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end();
        TI != TE; ++TI) {
-    Out << '\t';
-    PrintLLVMName(Out, &TI->first[0], TI->first.size(), LocalPrefix);
+    PrintLLVMName(Out, TI->first, LocalPrefix);
     Out << " = type ";
 
     // Make sure we print out at least one level of the type structure, so
@@ -1427,7 +1610,7 @@ void AssemblyWriter::printFunction(const Function *F) {
     Out << "declare ";
   else
     Out << "define ";
-  
+
   PrintLinkage(F->getLinkage(), Out);
   PrintVisibility(F->getVisibility(), Out);
 
@@ -1451,7 +1634,7 @@ void AssemblyWriter::printFunction(const Function *F) {
     Out <<  Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
   TypePrinter.print(F->getReturnType(), Out);
   Out << ' ';
-  WriteAsOperandInternal(Out, F, TypePrinter, &Machine);
+  WriteAsOperandInternal(Out, F, &TypePrinter, &Machine);
   Out << '(';
   Machine.incorporateFunction(F);
 
@@ -1472,10 +1655,10 @@ void AssemblyWriter::printFunction(const Function *F) {
     for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
       // Insert commas as we go... the first arg doesn't get a comma
       if (i) Out << ", ";
-      
+
       // Output type...
       TypePrinter.print(FT->getParamType(i), Out);
-      
+
       Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
       if (ArgAttrs != Attribute::None)
         Out << ' ' << Attribute::getAsString(ArgAttrs);
@@ -1515,7 +1698,7 @@ void AssemblyWriter::printFunction(const Function *F) {
 /// printArgument - This member is called for every argument that is passed into
 /// the function.  Simply print it out
 ///
-void AssemblyWriter::printArgument(const Argument *Arg, 
+void AssemblyWriter::printArgument(const Argument *Arg,
                                    Attributes Attrs) {
   // Output type...
   TypePrinter.print(Arg->getType(), Out);
@@ -1536,7 +1719,7 @@ void AssemblyWriter::printArgument(const Argument *Arg,
 void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (BB->hasName()) {              // Print out the label if it exists...
     Out << "\n";
-    PrintLLVMName(Out, BB->getNameStart(), BB->getNameLen(), LabelPrefix);
+    PrintLLVMName(Out, BB->getName(), LabelPrefix);
     Out << ':';
   } else if (!BB->use_empty()) {      // Don't print block # of no uses...
     Out << "\n; <label>:";
@@ -1547,13 +1730,15 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
       Out << "<badref>";
   }
 
-  if (BB->getParent() == 0)
-    Out << "\t\t; Error: Block without parent!";
-  else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
+  if (BB->getParent() == 0) {
+    Out.PadToColumn(50);
+    Out << "; Error: Block without parent!";
+  } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
     // Output predecessors for the block...
-    Out << "\t\t;";
+    Out.PadToColumn(50);
+    Out << ";";
     pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
-    
+
     if (PI == PE) {
       Out << " No predecessors!";
     } else {
@@ -1571,8 +1756,10 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
 
   // Output all of the instructions in the basic block...
-  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     printInstruction(*I);
+    Out << '\n';
+  }
 
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
@@ -1582,23 +1769,11 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
-  if (V.getType() != Type::VoidTy) {
-    Out << "\t\t; <";
+  if (V.getType() != Type::getVoidTy(V.getContext())) {
+    Out.PadToColumn(50);
+    Out << "; <";
     TypePrinter.print(V.getType(), Out);
-    Out << '>';
-
-    if (!V.hasName() && !isa<Instruction>(V)) {
-      int SlotNum;
-      if (const GlobalValue *GV = dyn_cast<GlobalValue>(&V))
-        SlotNum = Machine.getGlobalSlot(GV);
-      else
-        SlotNum = Machine.getLocalSlot(&V);
-      if (SlotNum == -1)
-        Out << ":<badref>";
-      else
-        Out << ':' << SlotNum; // Print out the def slot taken.
-    }
-    Out << " [#uses=" << V.getNumUses() << ']';  // Output # uses
+    Out << "> [#uses=" << V.getNumUses() << ']';  // Output # uses
   }
 }
 
@@ -1606,13 +1781,14 @@ void AssemblyWriter::printInfoComment(const Value &V) {
 void AssemblyWriter::printInstruction(const Instruction &I) {
   if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
 
-  Out << '\t';
+  // Print out indentation for an instruction.
+  Out << "  ";
 
   // Print out name if it exists...
   if (I.hasName()) {
     PrintLLVMName(Out, &I);
     Out << " = ";
-  } else if (I.getType() != Type::VoidTy) {
+  } else if (I.getType() != Type::getVoidTy(I.getContext())) {
     // Print out the def slot taken.
     int SlotNum = Machine.getLocalSlot(&I);
     if (SlotNum == -1)
@@ -1633,6 +1809,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
   // Print out the opcode...
   Out << I.getOpcodeName();
 
+  // Print out optimization information.
+  WriteOptimizationInfo(Out, &I);
+
   // Print out the compare instruction predicates
   if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
     Out << ' ' << getPredicateText(CI->getPredicate());
@@ -1659,12 +1838,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << " [";
 
     for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) {
-      Out << "\n\t\t";
+      Out << "\n    ";
       writeOperand(I.getOperand(op  ), true);
       Out << ", ";
       writeOperand(I.getOperand(op+1), true);
     }
-    Out << "\n\t]";
+    Out << "\n  ]";
   } else if (isa<PHINode>(I)) {
     Out << ' ';
     TypePrinter.print(I.getType(), Out);
@@ -1781,7 +1960,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     if (PAL.getFnAttributes() != Attribute::None)
       Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
 
-    Out << "\n\t\t\tto ";
+    Out << "\n          to ";
     writeOperand(II->getNormalDest(), true);
     Out << " unwind ";
     writeOperand(II->getUnwindDest(), true);
@@ -1789,7 +1968,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
   } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(&I)) {
     Out << ' ';
     TypePrinter.print(AI->getType()->getElementType(), Out);
-    if (AI->isArrayAllocation()) {
+    if (!AI->getArraySize() || AI->isArrayAllocation()) {
       Out << ", ";
       writeOperand(AI->getArraySize(), true);
     }
@@ -1845,7 +2024,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       writeOperand(I.getOperand(i), PrintAllTypes);
     }
   }
-  
+
   // Print post operand alignment for load/store
   if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) {
     Out << ", align " << cast<LoadInst>(I).getAlignment();
@@ -1853,8 +2032,18 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << ", align " << cast<StoreInst>(I).getAlignment();
   }
 
+  // Print Metadata info
+  if (!MDNames.empty()) {
+    MetadataContext &TheMetadata = I.getContext().getMetadata();
+    const MetadataContext::MDMapTy *MDMap = TheMetadata.getMDs(&I);
+    if (MDMap)
+      for (MetadataContext::MDMapTy::const_iterator MI = MDMap->begin(),
+             ME = MDMap->end(); MI != ME; ++MI)
+        if (const MDNode *MD = dyn_cast_or_null<MDNode>(MI->second))
+          Out << ", !" << MDNames[MI->first]
+              << " !" << Machine.getMetadataSlot(MD);
+  }
   printInfoComment(I);
-  Out << '\n';
 }
 
 
@@ -1862,21 +2051,13 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
 //                       External Interface declarations
 //===----------------------------------------------------------------------===//
 
-void Module::print(std::ostream &o, AssemblyAnnotationWriter *AAW) const {
-  raw_os_ostream OS(o);
-  print(OS, AAW);
-}
-void Module::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
   SlotTracker SlotTable(this);
+  formatted_raw_ostream OS(ROS);
   AssemblyWriter W(OS, SlotTable, this, AAW);
   W.write(this);
 }
 
-void Type::print(std::ostream &o) const {
-  raw_os_ostream OS(o);
-  print(OS);
-}
-
 void Type::print(raw_ostream &OS) const {
   if (this == 0) {
     OS << "<null Type>";
@@ -1885,12 +2066,12 @@ void Type::print(raw_ostream &OS) const {
   TypePrinting().print(this, OS);
 }
 
-void Value::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
+void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
   if (this == 0) {
-    OS << "printing a <null> value\n";
+    ROS << "printing a <null> value\n";
     return;
   }
-
+  formatted_raw_ostream OS(ROS);
   if (const Instruction *I = dyn_cast<Instruction>(this)) {
     const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
     SlotTracker SlotTable(F);
@@ -1905,14 +2086,33 @@ void Value::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
     SlotTracker SlotTable(GV->getParent());
     AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
     W.write(GV);
-  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+  } else if (const MDString *MDS = dyn_cast<MDString>(this)) {
     TypePrinting TypePrinter;
-    TypePrinter.print(N->getType(), OS);
+    TypePrinter.print(MDS->getType(), OS);
     OS << ' ';
-    // FIXME: Do we need a slot tracker for metadata ?
-    SlotTracker SlotTable((const Function *)NULL);
-    AssemblyWriter W(OS, SlotTable, NULL, AAW);
-    W.printMDNode(N, false);
+    OS << "!\"";
+    PrintEscapedString(MDS->getString(), OS);
+    OS << '"';
+  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+    SlotTracker SlotTable(N);
+    TypePrinting TypePrinter;
+    SlotTable.initialize();
+    WriteMDNodes(OS, TypePrinter, SlotTable);
+  } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
+    SlotTracker SlotTable(N);
+    TypePrinting TypePrinter;
+    SlotTable.initialize();
+    OS << "!" << N->getName() << " = !{";
+    for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) {
+      if (i) OS << ", ";
+      MDNode *MD = dyn_cast_or_null<MDNode>(N->getElement(i));
+      if (MD)
+        OS << '!' << SlotTable.getMetadataSlot(MD);
+      else
+        OS << "null";
+    }
+    OS << "}\n";
+    WriteMDNodes(OS, TypePrinter, SlotTable);
   } else if (const Constant *C = dyn_cast<Constant>(this)) {
     TypePrinting TypePrinter;
     TypePrinter.print(C->getType(), OS);
@@ -1924,13 +2124,15 @@ void Value::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
   } else if (isa<InlineAsm>(this)) {
     WriteAsOperand(OS, this, true, 0);
   } else {
-    assert(0 && "Unknown value to print out!");
+    // Otherwise we don't know what it is. Call the virtual function to
+    // allow a subclass to print itself.
+    printCustom(OS);
   }
 }
 
-void Value::print(std::ostream &O, AssemblyAnnotationWriter *AAW) const {
-  raw_os_ostream OS(O);
-  print(OS, AAW);
+// Value::printCustom - subclasses should override this to implement printing.
+void Value::printCustom(raw_ostream &OS) const {
+  llvm_unreachable("Unknown value to print out!");
 }
 
 // Value::dump - allow easy printing of Values from the debugger.
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 8dfbd1d50216..d68bba30729d 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -15,8 +15,10 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -40,7 +42,7 @@ std::string Attribute::getAsString(Attributes Attrs) {
   if (Attrs & Attribute::NoCapture)
     Result += "nocapture ";
   if (Attrs & Attribute::StructRet)
-    Result += "sret ";  
+    Result += "sret ";
   if (Attrs & Attribute::ByVal)
     Result += "byval ";
   if (Attrs & Attribute::Nest)
@@ -53,6 +55,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "optsize ";
   if (Attrs & Attribute::NoInline)
     Result += "noinline ";
+  if (Attrs & Attribute::InlineHint)
+    Result += "inlinehint ";
   if (Attrs & Attribute::AlwaysInline)
     Result += "alwaysinline ";
   if (Attrs & Attribute::StackProtect)
@@ -63,6 +67,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "noredzone ";
   if (Attrs & Attribute::NoImplicitFloat)
     Result += "noimplicitfloat ";
+  if (Attrs & Attribute::Naked)
+    Result += "naked ";
   if (Attrs & Attribute::Alignment) {
     Result += "align ";
     Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
@@ -94,7 +100,7 @@ Attributes Attribute::typeIncompatible(const Type *Ty) {
 
 namespace llvm {
 class AttributeListImpl : public FoldingSetNode {
-  unsigned RefCount;
+  sys::cas_flag RefCount;
   
   // AttributesList is uniqued, these should not be publicly available.
   void operator=(const AttributeListImpl &); // Do not implement
@@ -108,8 +114,11 @@ public:
     RefCount = 0;
   }
   
-  void AddRef() { ++RefCount; }
-  void DropRef() { if (--RefCount == 0) delete this; }
+  void AddRef() { sys::AtomicIncrement(&RefCount); }
+  void DropRef() {
+    sys::cas_flag old = sys::AtomicDecrement(&RefCount);
+    if (old == 0) delete this;
+  }
   
   void Profile(FoldingSetNodeID &ID) const {
     Profile(ID, Attrs.data(), Attrs.size());
@@ -122,9 +131,11 @@ public:
 };
 }
 
+static ManagedStatic<sys::SmartMutex<true> > ALMutex;
 static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
 
 AttributeListImpl::~AttributeListImpl() {
+  sys::SmartScopedLock<true> Lock(*ALMutex);
   AttributesLists->RemoveNode(this);
 }
 
@@ -147,6 +158,9 @@ AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs)
   FoldingSetNodeID ID;
   AttributeListImpl::Profile(ID, Attrs, NumAttrs);
   void *InsertPos;
+  
+  sys::SmartScopedLock<true> Lock(*ALMutex);
+  
   AttributeListImpl *PAL =
     AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
   
@@ -304,11 +318,11 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
 }
 
 void AttrListPtr::dump() const {
-  cerr << "PAL[ ";
+  errs() << "PAL[ ";
   for (unsigned i = 0; i < getNumSlots(); ++i) {
     const AttributeWithIndex &PAWI = getSlot(i);
-    cerr << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+    errs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
   }
   
-  cerr << "]\n";
+  errs() << "]\n";
 }
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index dd366071b76b..77ab19f417ce 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -14,10 +14,11 @@
 #include "llvm/AutoUpgrade.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 using namespace llvm;
 
@@ -119,6 +120,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
 
+  case 'e':
+    //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
+    if (Name.compare("llvm.eh.selector.i32") == 0) {
+      F->setName("llvm.eh.selector");
+      NewFn = F;
+      return true;
+    }
+    //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
+    if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
+      F->setName("llvm.eh.typeid.for");
+      NewFn = F;
+      return true;
+    }
+    //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
+    if (Name.compare("llvm.eh.selector.i64") == 0) {
+      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
+      return true;
+    }
+    //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
+    if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
+      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
+      return true;
+    }
+    break;
+
   case 'p':
     //  This upgrades the llvm.part.select overloaded intrinsic names to only 
     //  use one type specifier in the name. We only care about the old format
@@ -162,7 +188,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
          Name.compare(13,4,"psra", 4) == 0 ||
          Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') {
       
-      const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1);
+      const llvm::Type *VT =
+                    VectorType::get(IntegerType::get(FTy->getContext(), 64), 1);
       
       // We don't have to do anything if the parameter already has
       // the correct type.
@@ -227,6 +254,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
 // order to seamlessly integrate with existing context.
 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   Function *F = CI->getCalledFunction();
+  LLVMContext &C = CI->getContext();
+  
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
@@ -234,23 +263,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     bool isMovSD = false, isShufPD = false;
     bool isUnpckhPD = false, isUnpcklPD = false;
     bool isPunpckhQPD = false, isPunpcklQPD = false;
-    if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadh.pd") == 0)
+    if (F->getName() == "llvm.x86.sse2.loadh.pd")
       isLoadH = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadl.pd") == 0)
+    else if (F->getName() == "llvm.x86.sse2.loadl.pd")
       isLoadL = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movl.dq") == 0)
+    else if (F->getName() == "llvm.x86.sse2.movl.dq")
       isMovL = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movs.d") == 0)
+    else if (F->getName() == "llvm.x86.sse2.movs.d")
       isMovSD = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.shuf.pd") == 0)
+    else if (F->getName() == "llvm.x86.sse2.shuf.pd")
       isShufPD = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckh.pd") == 0)
+    else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
       isUnpckhPD = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckl.pd") == 0)
+    else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
       isUnpcklPD = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.punpckh.qdq") == 0)
+    else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
       isPunpckhQPD = true;
-    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.punpckl.qdq") == 0)
+    else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
       isPunpcklQPD = true;
 
     if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
@@ -261,23 +290,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       if (isLoadH || isLoadL) {
         Value *Op1 = UndefValue::get(Op0->getType());
         Value *Addr = new BitCastInst(CI->getOperand(2), 
-                                      PointerType::getUnqual(Type::DoubleTy),
+                                  Type::getDoublePtrTy(C),
                                       "upgraded.", CI);
         Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
-        Value *Idx = ConstantInt::get(Type::Int32Ty, 0);
+        Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
         Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
 
         if (isLoadH) {
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
         } else {
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
         }
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
       } else if (isMovL) {
-        Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
+        Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
         Idxs.push_back(Zero);
         Idxs.push_back(Zero);
         Idxs.push_back(Zero);
@@ -285,32 +314,33 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         Value *ZeroV = ConstantVector::get(Idxs);
 
         Idxs.clear(); 
-        Idxs.push_back(ConstantInt::get(Type::Int32Ty, 4));
-        Idxs.push_back(ConstantInt::get(Type::Int32Ty, 5));
-        Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
-        Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
       } else if (isMovSD ||
                  isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
         Value *Op1 = CI->getOperand(2);
         if (isMovSD) {
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
         } else if (isUnpckhPD || isPunpckhQPD) {
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
         } else {
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
-          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
         }
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
       } else if (isShufPD) {
         Value *Op1 = CI->getOperand(2);
         unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue();
-        Idxs.push_back(ConstantInt::get(Type::Int32Ty, MaskVal & 1));
-        Idxs.push_back(ConstantInt::get(Type::Int32Ty, ((MaskVal >> 1) & 1)+2));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
+                                               ((MaskVal >> 1) & 1)+2));
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
       }
@@ -326,13 +356,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       //  Clean up the old call now that it has been completely upgraded.
       CI->eraseFromParent();
     } else {
-      assert(0 && "Unknown function for CallInst upgrade.");
+      llvm_unreachable("Unknown function for CallInst upgrade.");
     }
     return;
   }
 
   switch (NewFn->getIntrinsicID()) {
-  default:  assert(0 && "Unknown function for CallInst upgrade.");
+  default:  llvm_unreachable("Unknown function for CallInst upgrade.");
   case Intrinsic::x86_mmx_psll_d:
   case Intrinsic::x86_mmx_psll_q:
   case Intrinsic::x86_mmx_psll_w:
@@ -404,6 +434,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     CI->eraseFromParent();
   }
   break;
+  case Intrinsic::eh_selector:
+  case Intrinsic::eh_typeid_for: {
+    // Only the return type changed.
+    SmallVector<Value*, 8> Operands(CI->op_begin() + 1, CI->op_end());
+    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+                                       "upgraded." + CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty()) {
+      //  Construct an appropriate cast from the new return type to the old.
+      CastInst *RetCast =
+        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
+                                                 F->getReturnType(), true),
+                         NewCI, F->getReturnType(), NewCI->getName(), CI);
+      CI->replaceAllUsesWith(RetCast);
+    }
+    CI->eraseFromParent();
+  }
+  break;
   }
 }
 
@@ -428,3 +479,74 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
     }
   }
 }
+
+/// This function checks debug info intrinsics. If an intrinsic is invalid
+/// then this function simply removes the intrinsic. 
+void llvm::CheckDebugInfoIntrinsics(Module *M) {
+
+
+  if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
+    if (!FuncStart->use_empty()) {
+      DbgFuncStartInst *DFSI = cast<DbgFuncStartInst>(FuncStart->use_back());
+      if (!isa<MDNode>(DFSI->getOperand(1))) {
+        while (!FuncStart->use_empty()) {
+          CallInst *CI = cast<CallInst>(FuncStart->use_back());
+          CI->eraseFromParent();
+        }
+        FuncStart->eraseFromParent();
+      }
+    }
+  }
+
+  if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
+    if (!StopPoint->use_empty()) {
+      DbgStopPointInst *DSPI = cast<DbgStopPointInst>(StopPoint->use_back());
+      if (!isa<MDNode>(DSPI->getOperand(3))) {
+        while (!StopPoint->use_empty()) {
+          CallInst *CI = cast<CallInst>(StopPoint->use_back());
+          CI->eraseFromParent();
+        }
+        StopPoint->eraseFromParent();
+      }
+    }
+  }
+
+  if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
+    if (!RegionStart->use_empty()) {
+      DbgRegionStartInst *DRSI = cast<DbgRegionStartInst>(RegionStart->use_back());
+      if (!isa<MDNode>(DRSI->getOperand(1))) {
+        while (!RegionStart->use_empty()) {
+          CallInst *CI = cast<CallInst>(RegionStart->use_back());
+          CI->eraseFromParent();
+        }
+        RegionStart->eraseFromParent();
+      }
+    }
+  }
+
+  if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
+    if (!RegionEnd->use_empty()) {
+      DbgRegionEndInst *DREI = cast<DbgRegionEndInst>(RegionEnd->use_back());
+      if (!isa<MDNode>(DREI->getOperand(1))) {
+        while (!RegionEnd->use_empty()) {
+          CallInst *CI = cast<CallInst>(RegionEnd->use_back());
+          CI->eraseFromParent();
+      }
+        RegionEnd->eraseFromParent();
+      }
+    }
+  }
+  
+  if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
+    if (!Declare->use_empty()) {
+      DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
+      if (!isa<MDNode>(DDI->getOperand(2))) {
+        while (!Declare->use_empty()) {
+          CallInst *CI = cast<CallInst>(Declare->use_back());
+          CI->eraseFromParent();
+        }
+        Declare->eraseFromParent();
+      }
+    }
+  }
+}
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 3065766362e4..50cf84c3fe62 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -14,11 +14,11 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/LeakDetector.h"
-#include "llvm/Support/Compiler.h"
 #include "SymbolTableListTraitsImpl.h"
 #include <algorithm>
 using namespace llvm;
@@ -29,14 +29,18 @@ ValueSymbolTable *BasicBlock::getValueSymbolTable() {
   return 0;
 }
 
+LLVMContext &BasicBlock::getContext() const {
+  return getType()->getContext();
+}
+
 // Explicit instantiation of SymbolTableListTraits since some of the methods
 // are not in the public header file...
 template class SymbolTableListTraits<Instruction, BasicBlock>;
 
 
-BasicBlock::BasicBlock(const std::string &Name, Function *NewParent,
+BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
                        BasicBlock *InsertBefore)
-  : Value(Type::LabelTy, Value::BasicBlockVal), Parent(0) {
+  : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
 
   // Make sure that we get added to a function
   LeakDetector::addGarbageObject(this);
@@ -235,14 +239,15 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 /// cause a degenerate basic block to be formed, having a terminator inside of
 /// the basic block).
 ///
-BasicBlock *BasicBlock::splitBasicBlock(iterator I, const std::string &BBName) {
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
   assert(I != InstList.end() &&
          "Trying to get me to create degenerate basic block!");
 
   BasicBlock *InsertBefore = next(Function::iterator(this))
                                .getNodePtrUnchecked();
-  BasicBlock *New = BasicBlock::Create(BBName, getParent(), InsertBefore);
+  BasicBlock *New = BasicBlock::Create(getContext(), BBName,
+                                       getParent(), InsertBefore);
 
   // Move all of the specified instructions from the original basic block into
   // the new basic block.
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index c9cdce4789d3..9b17d4bcd210 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -13,9 +13,10 @@ add_llvm_library(LLVMCore
   Instruction.cpp
   Instructions.cpp
   IntrinsicInst.cpp
-  LeakDetector.cpp
   LLVMContext.cpp
+  LeakDetector.cpp
   Mangler.cpp
+  Metadata.cpp
   Module.cpp
   ModuleProvider.cpp
   Pass.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 3aab0cce37e4..c1fcc5f4ed27 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -12,9 +12,8 @@
 // ConstantExpr::get* methods to automatically fold constants when possible.
 //
 // The current constant folding implementation is implemented in two pieces: the
-// template-based folder for simple primitive constants like ConstantInt, and
-// the special case hackery that we use to symbolically evaluate expressions
-// that use ConstantExprs.
+// pieces that don't need TargetData, and the pieces that do. This is to avoid
+// a dependence in VMCore on Target.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,8 +23,11 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
@@ -39,7 +41,7 @@ using namespace llvm;
 /// BitCastConstantVector - Convert the specified ConstantVector node to the
 /// specified vector type.  At this point, we know that the elements of the
 /// input vector constant are all simple integer or FP values.
-static Constant *BitCastConstantVector(ConstantVector *CV,
+static Constant *BitCastConstantVector(LLVMContext &Context, ConstantVector *CV,
                                        const VectorType *DstTy) {
   // If this cast changes element count then we can't handle it here:
   // doing so requires endianness information.  This should be handled by
@@ -47,7 +49,7 @@ static Constant *BitCastConstantVector(ConstantVector *CV,
   unsigned NumElts = DstTy->getNumElements();
   if (NumElts != CV->getNumOperands())
     return 0;
-  
+
   // Check to verify that all elements of the input are simple.
   for (unsigned i = 0; i != NumElts; ++i) {
     if (!isa<ConstantInt>(CV->getOperand(i)) &&
@@ -59,7 +61,8 @@ static Constant *BitCastConstantVector(ConstantVector *CV,
   std::vector<Constant*> Result;
   const Type *DstEltTy = DstTy->getElementType();
   for (unsigned i = 0; i != NumElts; ++i)
-    Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i), DstEltTy));
+    Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
+                                                    DstEltTy));
   return ConstantVector::get(Result);
 }
 
@@ -70,13 +73,13 @@ static Constant *BitCastConstantVector(ConstantVector *CV,
 static unsigned
 foldConstantCastPair(
   unsigned opc,          ///< opcode of the second cast constant expression
-  const ConstantExpr*Op, ///< the first cast constant expression
+  ConstantExpr *Op,      ///< the first cast constant expression
   const Type *DstTy      ///< desintation type of the first cast
 ) {
   assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
   assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
   assert(CastInst::isCast(opc) && "Invalid cast opcode");
-  
+
   // The the types and opcodes for the two Cast constant expressions
   const Type *SrcTy = Op->getOperand(0)->getType();
   const Type *MidTy = Op->getType();
@@ -85,41 +88,45 @@ foldConstantCastPair(
 
   // Let CastInst::isEliminableCastPair do the heavy lifting.
   return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
-                                        Type::Int64Ty);
+                                        Type::getInt64Ty(DstTy->getContext()));
 }
 
-static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
+static Constant *FoldBitCast(LLVMContext &Context, 
+                             Constant *V, const Type *DestTy) {
   const Type *SrcTy = V->getType();
   if (SrcTy == DestTy)
     return V; // no-op cast
-  
+
   // Check to see if we are casting a pointer to an aggregate to a pointer to
   // the first element.  If so, return the appropriate GEP instruction.
   if (const PointerType *PTy = dyn_cast<PointerType>(V->getType()))
     if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
       if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
         SmallVector<Value*, 8> IdxList;
-        IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
+        Value *Zero = Constant::getNullValue(Type::getInt32Ty(Context));
+        IdxList.push_back(Zero);
         const Type *ElTy = PTy->getElementType();
         while (ElTy != DPTy->getElementType()) {
           if (const StructType *STy = dyn_cast<StructType>(ElTy)) {
             if (STy->getNumElements() == 0) break;
             ElTy = STy->getElementType(0);
-            IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
+            IdxList.push_back(Zero);
           } else if (const SequentialType *STy = 
                      dyn_cast<SequentialType>(ElTy)) {
             if (isa<PointerType>(ElTy)) break;  // Can't index into pointers!
             ElTy = STy->getElementType();
-            IdxList.push_back(IdxList[0]);
+            IdxList.push_back(Zero);
           } else {
             break;
           }
         }
-        
+
         if (ElTy == DPTy->getElementType())
-          return ConstantExpr::getGetElementPtr(V, &IdxList[0], IdxList.size());
+          // This GEP is inbounds because all indices are zero.
+          return ConstantExpr::getInBoundsGetElementPtr(V, &IdxList[0],
+                                                        IdxList.size());
       }
-  
+
   // Handle casts from one vector constant to another.  We know that the src 
   // and dest type have the same size (otherwise its an illegal cast).
   if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
@@ -130,48 +137,50 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
       // First, check for null.  Undef is already handled.
       if (isa<ConstantAggregateZero>(V))
         return Constant::getNullValue(DestTy);
-      
+
       if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
-        return BitCastConstantVector(CV, DestPTy);
+        return BitCastConstantVector(Context, CV, DestPTy);
     }
 
     // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
     // This allows for other simplifications (although some of them
     // can only be handled by Analysis/ConstantFolding.cpp).
     if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
-      return ConstantExpr::getBitCast(ConstantVector::get(&V, 1), DestPTy);
+      return ConstantExpr::getBitCast(
+                                     ConstantVector::get(&V, 1), DestPTy);
   }
-  
+
   // Finally, implement bitcast folding now.   The code below doesn't handle
   // bitcast right.
   if (isa<ConstantPointerNull>(V))  // ptr->ptr cast.
     return ConstantPointerNull::get(cast<PointerType>(DestTy));
-  
+
   // Handle integral constant input.
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     if (DestTy->isInteger())
       // Integral -> Integral. This is a no-op because the bit widths must
       // be the same. Consequently, we just fold to V.
       return V;
 
     if (DestTy->isFloatingPoint())
-      return ConstantFP::get(APFloat(CI->getValue(),
-                                     DestTy != Type::PPC_FP128Ty));
+      return ConstantFP::get(Context, APFloat(CI->getValue(),
+                                     DestTy != Type::getPPC_FP128Ty(Context)));
 
     // Otherwise, can't fold this (vector?)
     return 0;
   }
 
   // Handle ConstantFP input.
-  if (const ConstantFP *FP = dyn_cast<ConstantFP>(V))
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
     // FP -> Integral.
-    return ConstantInt::get(FP->getValueAPF().bitcastToAPInt());
+    return ConstantInt::get(Context, FP->getValueAPF().bitcastToAPInt());
 
   return 0;
 }
 
 
-Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
+Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, 
+                                            unsigned opc, Constant *V,
                                             const Type *DestTy) {
   if (isa<UndefValue>(V)) {
     // zext(undef) = 0, because the top bits will be zero.
@@ -183,12 +192,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
     return UndefValue::get(DestTy);
   }
   // No compile-time operations on this type yet.
-  if (V->getType() == Type::PPC_FP128Ty || DestTy == Type::PPC_FP128Ty)
+  if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
     return 0;
 
   // If the cast operand is a constant expression, there's a few things we can
   // do to try to simplify it.
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->isCast()) {
       // Try hard to fold cast of cast because they are often eliminable.
       if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
@@ -211,7 +220,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
   // If the cast operand is a constant vector, perform the cast by
   // operating on each element. In the cast of bitcasts, the element
   // count may be mismatched; don't attempt to handle that here.
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
     if (isa<VectorType>(DestTy) &&
         cast<VectorType>(DestTy)->getNumElements() ==
         CV->getType()->getNumElements()) {
@@ -229,21 +238,21 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
   switch (opc) {
   case Instruction::FPTrunc:
   case Instruction::FPExt:
-    if (const ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
       bool ignored;
       APFloat Val = FPC->getValueAPF();
-      Val.convert(DestTy == Type::FloatTy ? APFloat::IEEEsingle :
-                  DestTy == Type::DoubleTy ? APFloat::IEEEdouble :
-                  DestTy == Type::X86_FP80Ty ? APFloat::x87DoubleExtended :
-                  DestTy == Type::FP128Ty ? APFloat::IEEEquad :
+      Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle :
+                  DestTy->isDoubleTy() ? APFloat::IEEEdouble :
+                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
+                  DestTy->isFP128Ty() ? APFloat::IEEEquad :
                   APFloat::Bogus,
                   APFloat::rmNearestTiesToEven, &ignored);
-      return ConstantFP::get(Val);
+      return ConstantFP::get(Context, Val);
     }
     return 0; // Can't fold.
   case Instruction::FPToUI: 
   case Instruction::FPToSI:
-    if (const ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
       const APFloat &V = FPC->getValueAPF();
       bool ignored;
       uint64_t x[2]; 
@@ -251,7 +260,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
       (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
                                 APFloat::rmTowardZero, &ignored);
       APInt Val(DestBitWidth, 2, x);
-      return ConstantInt::get(Val);
+      return ConstantInt::get(Context, Val);
     }
     return 0; // Can't fold.
   case Instruction::IntToPtr:   //always treated as unsigned
@@ -264,7 +273,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
     return 0;                   // Other pointer types cannot be casted
   case Instruction::UIToFP:
   case Instruction::SIToFP:
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       APInt api = CI->getValue();
       const uint64_t zero[] = {0, 0};
       APFloat apf = APFloat(APInt(DestTy->getPrimitiveSizeInBits(),
@@ -272,67 +281,68 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
       (void)apf.convertFromAPInt(api, 
                                  opc==Instruction::SIToFP,
                                  APFloat::rmNearestTiesToEven);
-      return ConstantFP::get(apf);
+      return ConstantFP::get(Context, apf);
     }
     return 0;
   case Instruction::ZExt:
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
       APInt Result(CI->getValue());
       Result.zext(BitWidth);
-      return ConstantInt::get(Result);
+      return ConstantInt::get(Context, Result);
     }
     return 0;
   case Instruction::SExt:
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
       APInt Result(CI->getValue());
       Result.sext(BitWidth);
-      return ConstantInt::get(Result);
+      return ConstantInt::get(Context, Result);
     }
     return 0;
   case Instruction::Trunc:
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
       APInt Result(CI->getValue());
       Result.trunc(BitWidth);
-      return ConstantInt::get(Result);
+      return ConstantInt::get(Context, Result);
     }
     return 0;
   case Instruction::BitCast:
-    return FoldBitCast(const_cast<Constant*>(V), DestTy);
+    return FoldBitCast(Context, V, DestTy);
   default:
     assert(!"Invalid CE CastInst opcode");
     break;
   }
 
-  assert(0 && "Failed to cast constant expression");
+  llvm_unreachable("Failed to cast constant expression");
   return 0;
 }
 
-Constant *llvm::ConstantFoldSelectInstruction(const Constant *Cond,
-                                              const Constant *V1,
-                                              const Constant *V2) {
-  if (const ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
-    return const_cast<Constant*>(CB->getZExtValue() ? V1 : V2);
+Constant *llvm::ConstantFoldSelectInstruction(LLVMContext&,
+                                              Constant *Cond,
+                                              Constant *V1, Constant *V2) {
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
+    return CB->getZExtValue() ? V1 : V2;
 
-  if (isa<UndefValue>(V1)) return const_cast<Constant*>(V2);
-  if (isa<UndefValue>(V2)) return const_cast<Constant*>(V1);
-  if (isa<UndefValue>(Cond)) return const_cast<Constant*>(V1);
-  if (V1 == V2) return const_cast<Constant*>(V1);
+  if (isa<UndefValue>(V1)) return V2;
+  if (isa<UndefValue>(V2)) return V1;
+  if (isa<UndefValue>(Cond)) return V1;
+  if (V1 == V2) return V1;
   return 0;
 }
 
-Constant *llvm::ConstantFoldExtractElementInstruction(const Constant *Val,
-                                                      const Constant *Idx) {
+Constant *llvm::ConstantFoldExtractElementInstruction(LLVMContext &Context,
+                                                      Constant *Val,
+                                                      Constant *Idx) {
   if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
     return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
   if (Val->isNullValue())  // ee(zero, x) -> zero
     return Constant::getNullValue(
                           cast<VectorType>(Val->getType())->getElementType());
-  
-  if (const ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
-    if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+
+  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+    if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
       return CVal->getOperand(CIdx->getZExtValue());
     } else if (isa<UndefValue>(Idx)) {
       // ee({w,x,y,z}, undef) -> w (an arbitrary value).
@@ -342,17 +352,18 @@ Constant *llvm::ConstantFoldExtractElementInstruction(const Constant *Val,
   return 0;
 }
 
-Constant *llvm::ConstantFoldInsertElementInstruction(const Constant *Val,
-                                                     const Constant *Elt,
-                                                     const Constant *Idx) {
-  const ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+Constant *llvm::ConstantFoldInsertElementInstruction(LLVMContext &Context,
+                                                     Constant *Val,
+                                                     Constant *Elt,
+                                                     Constant *Idx) {
+  ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
   if (!CIdx) return 0;
   APInt idxVal = CIdx->getValue();
   if (isa<UndefValue>(Val)) { 
     // Insertion of scalar constant into vector undef
     // Optimize away insertion of undef
     if (isa<UndefValue>(Elt))
-      return const_cast<Constant*>(Val);
+      return Val;
     // Otherwise break the aggregate undef into multiple undefs and do
     // the insertion
     unsigned numOps = 
@@ -360,9 +371,9 @@ Constant *llvm::ConstantFoldInsertElementInstruction(const Constant *Val,
     std::vector<Constant*> Ops; 
     Ops.reserve(numOps);
     for (unsigned i = 0; i < numOps; ++i) {
-      const Constant *Op =
+      Constant *Op =
         (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
-      Ops.push_back(const_cast<Constant*>(Op));
+      Ops.push_back(Op);
     }
     return ConstantVector::get(Ops);
   }
@@ -370,7 +381,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(const Constant *Val,
     // Insertion of scalar constant into vector aggregate zero
     // Optimize away insertion of zero
     if (Elt->isNullValue())
-      return const_cast<Constant*>(Val);
+      return Val;
     // Otherwise break the aggregate zero into multiple zeros and do
     // the insertion
     unsigned numOps = 
@@ -378,20 +389,20 @@ Constant *llvm::ConstantFoldInsertElementInstruction(const Constant *Val,
     std::vector<Constant*> Ops; 
     Ops.reserve(numOps);
     for (unsigned i = 0; i < numOps; ++i) {
-      const Constant *Op =
+      Constant *Op =
         (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
-      Ops.push_back(const_cast<Constant*>(Op));
+      Ops.push_back(Op);
     }
     return ConstantVector::get(Ops);
   }
-  if (const ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
     // Insertion of scalar constant into vector constant
     std::vector<Constant*> Ops; 
     Ops.reserve(CVal->getNumOperands());
     for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
-      const Constant *Op =
+      Constant *Op =
         (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
-      Ops.push_back(const_cast<Constant*>(Op));
+      Ops.push_back(Op);
     }
     return ConstantVector::get(Ops);
   }
@@ -401,10 +412,11 @@ Constant *llvm::ConstantFoldInsertElementInstruction(const Constant *Val,
 
 /// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
 /// return the specified element value.  Otherwise return null.
-static Constant *GetVectorElement(const Constant *C, unsigned EltNo) {
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C))
+static Constant *GetVectorElement(LLVMContext &Context, Constant *C,
+                                  unsigned EltNo) {
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
     return CV->getOperand(EltNo);
-  
+
   const Type *EltTy = cast<VectorType>(C->getType())->getElementType();
   if (isa<ConstantAggregateZero>(C))
     return Constant::getNullValue(EltTy);
@@ -413,9 +425,10 @@ static Constant *GetVectorElement(const Constant *C, unsigned EltNo) {
   return 0;
 }
 
-Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
-                                                     const Constant *V2,
-                                                     const Constant *Mask) {
+Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
+                                                     Constant *V1,
+                                                     Constant *V2,
+                                                     Constant *Mask) {
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
 
@@ -426,7 +439,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
   // Loop over the shuffle mask, evaluating each element.
   SmallVector<Constant*, 32> Result;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
-    Constant *InElt = GetVectorElement(Mask, i);
+    Constant *InElt = GetVectorElement(Context, Mask, i);
     if (InElt == 0) return 0;
 
     if (isa<UndefValue>(InElt))
@@ -436,9 +449,9 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
       if (Elt >= SrcNumElts*2)
         InElt = UndefValue::get(EltTy);
       else if (Elt >= SrcNumElts)
-        InElt = GetVectorElement(V2, Elt - SrcNumElts);
+        InElt = GetVectorElement(Context, V2, Elt - SrcNumElts);
       else
-        InElt = GetVectorElement(V1, Elt);
+        InElt = GetVectorElement(Context, V1, Elt);
       if (InElt == 0) return 0;
     } else {
       // Unknown value.
@@ -450,12 +463,13 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
   return ConstantVector::get(&Result[0], Result.size());
 }
 
-Constant *llvm::ConstantFoldExtractValueInstruction(const Constant *Agg,
+Constant *llvm::ConstantFoldExtractValueInstruction(LLVMContext &Context,
+                                                    Constant *Agg,
                                                     const unsigned *Idxs,
                                                     unsigned NumIdx) {
   // Base case: no indices, so return the entire value.
   if (NumIdx == 0)
-    return const_cast<Constant *>(Agg);
+    return Agg;
 
   if (isa<UndefValue>(Agg))  // ev(undef, x) -> undef
     return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
@@ -469,123 +483,111 @@ Constant *llvm::ConstantFoldExtractValueInstruction(const Constant *Agg,
                                                               Idxs + NumIdx));
 
   // Otherwise recurse.
-  return ConstantFoldExtractValueInstruction(Agg->getOperand(*Idxs),
+  return ConstantFoldExtractValueInstruction(Context, Agg->getOperand(*Idxs),
                                              Idxs+1, NumIdx-1);
 }
 
-Constant *llvm::ConstantFoldInsertValueInstruction(const Constant *Agg,
-                                                   const Constant *Val,
+Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
+                                                   Constant *Agg,
+                                                   Constant *Val,
                                                    const unsigned *Idxs,
                                                    unsigned NumIdx) {
   // Base case: no indices, so replace the entire value.
   if (NumIdx == 0)
-    return const_cast<Constant *>(Val);
+    return Val;
 
   if (isa<UndefValue>(Agg)) {
     // Insertion of constant into aggregate undef
-    // Optimize away insertion of undef
+    // Optimize away insertion of undef.
     if (isa<UndefValue>(Val))
-      return const_cast<Constant*>(Agg);
+      return Agg;
+    
     // Otherwise break the aggregate undef into multiple undefs and do
-    // the insertion
+    // the insertion.
     const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
     unsigned numOps;
     if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
       numOps = AR->getNumElements();
     else
       numOps = cast<StructType>(AggTy)->getNumElements();
+    
     std::vector<Constant*> Ops(numOps); 
     for (unsigned i = 0; i < numOps; ++i) {
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
-      const Constant *Op =
+      Constant *Op =
         (*Idxs == i) ?
-        ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
+        ConstantFoldInsertValueInstruction(Context, UndefValue::get(MemberTy),
                                            Val, Idxs+1, NumIdx-1) :
         UndefValue::get(MemberTy);
-      Ops[i] = const_cast<Constant*>(Op);
+      Ops[i] = Op;
     }
-    if (isa<StructType>(AggTy))
-      return ConstantStruct::get(Ops);
-    else
-      return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+    
+    if (const StructType* ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(Context, Ops, ST->isPacked());
+    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
+  
   if (isa<ConstantAggregateZero>(Agg)) {
     // Insertion of constant into aggregate zero
-    // Optimize away insertion of zero
+    // Optimize away insertion of zero.
     if (Val->isNullValue())
-      return const_cast<Constant*>(Agg);
+      return Agg;
+    
     // Otherwise break the aggregate zero into multiple zeros and do
-    // the insertion
+    // the insertion.
     const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
     unsigned numOps;
     if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
       numOps = AR->getNumElements();
     else
       numOps = cast<StructType>(AggTy)->getNumElements();
+    
     std::vector<Constant*> Ops(numOps);
     for (unsigned i = 0; i < numOps; ++i) {
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
-      const Constant *Op =
+      Constant *Op =
         (*Idxs == i) ?
-        ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
+        ConstantFoldInsertValueInstruction(Context, 
+                                           Constant::getNullValue(MemberTy),
                                            Val, Idxs+1, NumIdx-1) :
         Constant::getNullValue(MemberTy);
-      Ops[i] = const_cast<Constant*>(Op);
+      Ops[i] = Op;
     }
-    if (isa<StructType>(AggTy))
-      return ConstantStruct::get(Ops);
-    else
-      return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+    
+    if (const StructType* ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(Context, Ops, ST->isPacked());
+    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
+  
   if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
-    // Insertion of constant into aggregate constant
+    // Insertion of constant into aggregate constant.
     std::vector<Constant*> Ops(Agg->getNumOperands());
     for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
-      const Constant *Op =
+      Constant *Op =
         (*Idxs == i) ?
-        ConstantFoldInsertValueInstruction(Agg->getOperand(i),
+        ConstantFoldInsertValueInstruction(Context, Agg->getOperand(i),
                                            Val, Idxs+1, NumIdx-1) :
         Agg->getOperand(i);
-      Ops[i] = const_cast<Constant*>(Op);
+      Ops[i] = Op;
     }
-    Constant *C;
-    if (isa<StructType>(Agg->getType()))
-      C = ConstantStruct::get(Ops);
-    else
-      C = ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
-    return C;
+    
+    if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
+      return ConstantStruct::get(Context, Ops, ST->isPacked());
+    return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
   }
 
   return 0;
 }
 
-/// EvalVectorOp - Given two vector constants and a function pointer, apply the
-/// function pointer to each element pair, producing a new ConstantVector
-/// constant. Either or both of V1 and V2 may be NULL, meaning a
-/// ConstantAggregateZero operand.
-static Constant *EvalVectorOp(const ConstantVector *V1, 
-                              const ConstantVector *V2,
-                              const VectorType *VTy,
-                              Constant *(*FP)(Constant*, Constant*)) {
-  std::vector<Constant*> Res;
-  const Type *EltTy = VTy->getElementType();
-  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-    const Constant *C1 = V1 ? V1->getOperand(i) : Constant::getNullValue(EltTy);
-    const Constant *C2 = V2 ? V2->getOperand(i) : Constant::getNullValue(EltTy);
-    Res.push_back(FP(const_cast<Constant*>(C1),
-                     const_cast<Constant*>(C2)));
-  }
-  return ConstantVector::get(Res);
-}
 
-Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
-                                              const Constant *C1,
-                                              const Constant *C2) {
+Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
+                                              unsigned Opcode,
+                                              Constant *C1, Constant *C2) {
   // No compile-time operations on this type yet.
-  if (C1->getType() == Type::PPC_FP128Ty)
+  if (C1->getType()->isPPC_FP128Ty())
     return 0;
 
-  // Handle UndefValue up front
+  // Handle UndefValue up front.
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
     switch (Opcode) {
     case Instruction::Xor:
@@ -606,23 +608,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::SRem:
       if (!isa<UndefValue>(C2))                    // undef / X -> 0
         return Constant::getNullValue(C1->getType());
-      return const_cast<Constant*>(C2);            // X / undef -> undef
+      return C2;                                   // X / undef -> undef
     case Instruction::Or:                          // X | undef -> -1
       if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType()))
-        return ConstantVector::getAllOnesValue(PTy);
-      return ConstantInt::getAllOnesValue(C1->getType());
+        return Constant::getAllOnesValue(PTy);
+      return Constant::getAllOnesValue(C1->getType());
     case Instruction::LShr:
       if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
-        return const_cast<Constant*>(C1);           // undef lshr undef -> undef
+        return C1;                                  // undef lshr undef -> undef
       return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
                                                     // undef lshr X -> 0
     case Instruction::AShr:
       if (!isa<UndefValue>(C2))
-        return const_cast<Constant*>(C1);           // undef ashr X --> undef
+        return C1;                                  // undef ashr X --> undef
       else if (isa<UndefValue>(C1)) 
-        return const_cast<Constant*>(C1);           // undef ashr undef -> undef
+        return C1;                                  // undef ashr undef -> undef
       else
-        return const_cast<Constant*>(C1);           // X ashr undef --> X
+        return C1;                                  // X ashr undef --> X
     case Instruction::Shl:
       // undef << X -> 0   or   X << undef -> 0
       return Constant::getNullValue(C1->getType());
@@ -630,23 +632,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
   }
 
   // Handle simplifications when the RHS is a constant int.
-  if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+  if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
     switch (Opcode) {
     case Instruction::Add:
-      if (CI2->equalsInt(0)) return const_cast<Constant*>(C1);  // X + 0 == X
+      if (CI2->equalsInt(0)) return C1;                         // X + 0 == X
       break;
     case Instruction::Sub:
-      if (CI2->equalsInt(0)) return const_cast<Constant*>(C1);  // X - 0 == X
+      if (CI2->equalsInt(0)) return C1;                         // X - 0 == X
       break;
     case Instruction::Mul:
-      if (CI2->equalsInt(0)) return const_cast<Constant*>(C2);  // X * 0 == 0
+      if (CI2->equalsInt(0)) return C2;                         // X * 0 == 0
       if (CI2->equalsInt(1))
-        return const_cast<Constant*>(C1);                       // X * 1 == X
+        return C1;                                              // X * 1 == X
       break;
     case Instruction::UDiv:
     case Instruction::SDiv:
       if (CI2->equalsInt(1))
-        return const_cast<Constant*>(C1);                     // X / 1 == X
+        return C1;                                            // X / 1 == X
       if (CI2->equalsInt(0))
         return UndefValue::get(CI2->getType());               // X / 0 == undef
       break;
@@ -658,11 +660,11 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
         return UndefValue::get(CI2->getType());               // X % 0 == undef
       break;
     case Instruction::And:
-      if (CI2->isZero()) return const_cast<Constant*>(C2);    // X & 0 == 0
+      if (CI2->isZero()) return C2;                           // X & 0 == 0
       if (CI2->isAllOnesValue())
-        return const_cast<Constant*>(C1);                     // X & -1 == X
-      
-      if (const ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        return C1;                                            // X & -1 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
         // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
         if (CE1->getOpcode() == Instruction::ZExt) {
           unsigned DstWidth = CI2->getType()->getBitWidth();
@@ -670,19 +672,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
             CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
           APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
           if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
-            return const_cast<Constant*>(C1);
+            return C1;
         }
-        
+
         // If and'ing the address of a global with a constant, fold it.
         if (CE1->getOpcode() == Instruction::PtrToInt && 
             isa<GlobalValue>(CE1->getOperand(0))) {
           GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
-        
+
           // Functions are at least 4-byte aligned.
           unsigned GVAlign = GV->getAlignment();
           if (isa<Function>(GV))
             GVAlign = std::max(GVAlign, 4U);
-          
+
           if (GVAlign > 1) {
             unsigned DstWidth = CI2->getType()->getBitWidth();
             unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
@@ -696,26 +698,39 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       }
       break;
     case Instruction::Or:
-      if (CI2->equalsInt(0)) return const_cast<Constant*>(C1);  // X | 0 == X
+      if (CI2->equalsInt(0)) return C1;    // X | 0 == X
       if (CI2->isAllOnesValue())
-        return const_cast<Constant*>(C2);  // X | -1 == -1
+        return C2;                         // X | -1 == -1
       break;
     case Instruction::Xor:
-      if (CI2->equalsInt(0)) return const_cast<Constant*>(C1);  // X ^ 0 == X
+      if (CI2->equalsInt(0)) return C1;    // X ^ 0 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        switch (CE1->getOpcode()) {
+        default: break;
+        case Instruction::ICmp:
+        case Instruction::FCmp:
+          // cmp pred ^ true -> cmp !pred
+          assert(CI2->equalsInt(1));
+          CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
+          pred = CmpInst::getInversePredicate(pred);
+          return ConstantExpr::getCompare(pred, CE1->getOperand(0),
+                                          CE1->getOperand(1));
+        }
+      }
       break;
     case Instruction::AShr:
       // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
-      if (const ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
         if (CE1->getOpcode() == Instruction::ZExt)  // Top bits known zero.
-          return ConstantExpr::getLShr(const_cast<Constant*>(C1),
-                                       const_cast<Constant*>(C2));
+          return ConstantExpr::getLShr(C1, C2);
       break;
     }
   }
-  
+
   // At this point we know neither constant is an UndefValue.
-  if (const ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
-    if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+    if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
       using namespace APIntOps;
       const APInt &C1V = CI1->getValue();
       const APInt &C2V = CI2->getValue();
@@ -723,51 +738,51 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       default:
         break;
       case Instruction::Add:     
-        return ConstantInt::get(C1V + C2V);
+        return ConstantInt::get(Context, C1V + C2V);
       case Instruction::Sub:     
-        return ConstantInt::get(C1V - C2V);
+        return ConstantInt::get(Context, C1V - C2V);
       case Instruction::Mul:     
-        return ConstantInt::get(C1V * C2V);
+        return ConstantInt::get(Context, C1V * C2V);
       case Instruction::UDiv:
         assert(!CI2->isNullValue() && "Div by zero handled above");
-        return ConstantInt::get(C1V.udiv(C2V));
+        return ConstantInt::get(Context, C1V.udiv(C2V));
       case Instruction::SDiv:
         assert(!CI2->isNullValue() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
           return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
-        return ConstantInt::get(C1V.sdiv(C2V));
+        return ConstantInt::get(Context, C1V.sdiv(C2V));
       case Instruction::URem:
         assert(!CI2->isNullValue() && "Div by zero handled above");
-        return ConstantInt::get(C1V.urem(C2V));
+        return ConstantInt::get(Context, C1V.urem(C2V));
       case Instruction::SRem:
         assert(!CI2->isNullValue() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
           return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
-        return ConstantInt::get(C1V.srem(C2V));
+        return ConstantInt::get(Context, C1V.srem(C2V));
       case Instruction::And:
-        return ConstantInt::get(C1V & C2V);
+        return ConstantInt::get(Context, C1V & C2V);
       case Instruction::Or:
-        return ConstantInt::get(C1V | C2V);
+        return ConstantInt::get(Context, C1V | C2V);
       case Instruction::Xor:
-        return ConstantInt::get(C1V ^ C2V);
+        return ConstantInt::get(Context, C1V ^ C2V);
       case Instruction::Shl: {
         uint32_t shiftAmt = C2V.getZExtValue();
         if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(C1V.shl(shiftAmt));
+          return ConstantInt::get(Context, C1V.shl(shiftAmt));
         else
           return UndefValue::get(C1->getType()); // too big shift is undef
       }
       case Instruction::LShr: {
         uint32_t shiftAmt = C2V.getZExtValue();
         if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(C1V.lshr(shiftAmt));
+          return ConstantInt::get(Context, C1V.lshr(shiftAmt));
         else
           return UndefValue::get(C1->getType()); // too big shift is undef
       }
       case Instruction::AShr: {
         uint32_t shiftAmt = C2V.getZExtValue();
         if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(C1V.ashr(shiftAmt));
+          return ConstantInt::get(Context, C1V.ashr(shiftAmt));
         else
           return UndefValue::get(C1->getType()); // too big shift is undef
       }
@@ -782,13 +797,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::LShr:
     case Instruction::AShr:
     case Instruction::Shl:
-      if (CI1->equalsInt(0)) return const_cast<Constant*>(C1);
+      if (CI1->equalsInt(0)) return C1;
       break;
     default:
       break;
     }
-  } else if (const ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
-    if (const ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+  } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+    if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
       APFloat C1V = CFP1->getValueAPF();
       APFloat C2V = CFP2->getValueAPF();
       APFloat C3V = C1V;  // copy for modification
@@ -797,65 +812,159 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
         break;
       case Instruction::FAdd:
         (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C3V);
+        return ConstantFP::get(Context, C3V);
       case Instruction::FSub:
         (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C3V);
+        return ConstantFP::get(Context, C3V);
       case Instruction::FMul:
         (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C3V);
+        return ConstantFP::get(Context, C3V);
       case Instruction::FDiv:
         (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C3V);
+        return ConstantFP::get(Context, C3V);
       case Instruction::FRem:
         (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C3V);
+        return ConstantFP::get(Context, C3V);
       }
     }
   } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
-    const ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
-    const ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
+    ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
+    ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
     if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
         (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
+      std::vector<Constant*> Res;
+      const Type* EltTy = VTy->getElementType();  
+      Constant *C1 = 0;
+      Constant *C2 = 0;
       switch (Opcode) {
       default:
         break;
       case Instruction::Add:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAdd);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAdd(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::FAdd:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFAdd);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFAdd(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::Sub:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSub);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSub(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::FSub:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFSub);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFSub(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::Mul:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getMul);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getMul(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::FMul:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFMul);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFMul(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::UDiv:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getUDiv);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getUDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::SDiv:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSDiv);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::FDiv:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFDiv);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::URem:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getURem);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getURem(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::SRem:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSRem);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSRem(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::FRem:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFRem);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFRem(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::And: 
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAnd);
-      case Instruction::Or:  
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getOr);
-      case Instruction::Xor: 
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getXor);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAnd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Or:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getOr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Xor:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getXor(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::LShr:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getLShr);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getLShr(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::AShr:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAShr);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAShr(C1, C2));
+        }
+        return ConstantVector::get(Res);
       case Instruction::Shl:
-        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getShl);
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getShl(C1, C2));
+        }
+        return ConstantVector::get(Res);
       }
     }
   }
@@ -876,8 +985,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::Or:
     case Instruction::Xor:
       // No change of opcode required.
-      return ConstantFoldBinaryInstruction(Opcode, C2, C1);
-      
+      return ConstantFoldBinaryInstruction(Context, Opcode, C2, C1);
+
     case Instruction::Shl:
     case Instruction::LShr:
     case Instruction::AShr:
@@ -893,7 +1002,36 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       break;
     }
   }
-  
+
+  // i1 can be simplified in many cases.
+  if (C1->getType() == Type::getInt1Ty(Context)) {
+    switch (Opcode) {
+    case Instruction::Add:
+    case Instruction::Sub:
+      return ConstantExpr::getXor(C1, C2);
+    case Instruction::Mul:
+      return ConstantExpr::getAnd(C1, C2);
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      // We can assume that C2 == 0.  If it were one the result would be
+      // undefined because the shift value is as large as the bitwidth.
+      return C1;
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return C1;
+    case Instruction::URem:
+    case Instruction::SRem:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return ConstantInt::getFalse(Context);
+    default:
+      break;
+    }
+  }
+
   // We don't know how to fold this.
   return 0;
 }
@@ -922,7 +1060,8 @@ static bool isMaybeZeroSizedType(const Type *Ty) {
 /// first is less than the second, return -1, if the second is less than the
 /// first, return 1.  If the constants are not integral, return -2.
 ///
-static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, 
+                      const Type *ElTy) {
   if (C1 == C2) return 0;
 
   // Ok, we found a different index.  If they are not ConstantInt, we can't do
@@ -932,11 +1071,11 @@ static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
 
   // Ok, we have two differing integer indices.  Sign extend them to be the same
   // type.  Long is always big enough, so we use it.
-  if (C1->getType() != Type::Int64Ty)
-    C1 = ConstantExpr::getSExt(C1, Type::Int64Ty);
+  if (C1->getType() != Type::getInt64Ty(Context))
+    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
 
-  if (C2->getType() != Type::Int64Ty)
-    C2 = ConstantExpr::getSExt(C2, Type::Int64Ty);
+  if (C2->getType() != Type::getInt64Ty(Context))
+    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
 
   if (C1 == C2) return 0;  // They are equal
 
@@ -965,13 +1104,13 @@ static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
 /// To simplify this code we canonicalize the relation so that the first
 /// operand is always the most "complex" of the two.  We consider ConstantFP
 /// to be the simplest, and ConstantExprs to be the most complex.
-static FCmpInst::Predicate evaluateFCmpRelation(const Constant *V1, 
-                                                const Constant *V2) {
+static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context,
+                                                Constant *V1, Constant *V2) {
   assert(V1->getType() == V2->getType() &&
          "Cannot compare values of different types!");
 
   // No compile-time operations on this type yet.
-  if (V1->getType() == Type::PPC_FP128Ty)
+  if (V1->getType()->isPPC_FP128Ty())
     return FCmpInst::BAD_FCMP_PREDICATE;
 
   // Handle degenerate case quickly
@@ -981,33 +1120,31 @@ static FCmpInst::Predicate evaluateFCmpRelation(const Constant *V1,
     if (!isa<ConstantExpr>(V2)) {
       // We distilled thisUse the standard constant folder for a few cases
       ConstantInt *R = 0;
-      Constant *C1 = const_cast<Constant*>(V1);
-      Constant *C2 = const_cast<Constant*>(V2);
       R = dyn_cast<ConstantInt>(
-                             ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, C1, C2));
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
       if (R && !R->isZero()) 
         return FCmpInst::FCMP_OEQ;
       R = dyn_cast<ConstantInt>(
-                             ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, C1, C2));
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
       if (R && !R->isZero()) 
         return FCmpInst::FCMP_OLT;
       R = dyn_cast<ConstantInt>(
-                             ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, C1, C2));
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
       if (R && !R->isZero()) 
         return FCmpInst::FCMP_OGT;
 
       // Nothing more we can do
       return FCmpInst::BAD_FCMP_PREDICATE;
     }
-    
+
     // If the first operand is simple and second is ConstantExpr, swap operands.
-    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(Context, V2, V1);
     if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
       return FCmpInst::getSwappedPredicate(SwappedRelation);
   } else {
     // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
     // constantexpr or a simple constant.
-    const ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
     switch (CE1->getOpcode()) {
     case Instruction::FPTrunc:
     case Instruction::FPExt:
@@ -1036,8 +1173,9 @@ static FCmpInst::Predicate evaluateFCmpRelation(const Constant *V1,
 /// constants (like ConstantInt) to be the simplest, followed by
 /// GlobalValues, followed by ConstantExpr's (the most complex).
 ///
-static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1, 
-                                                const Constant *V2,
+static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
+                                                Constant *V1, 
+                                                Constant *V2,
                                                 bool isSigned) {
   assert(V1->getType() == V2->getType() &&
          "Cannot compare different types of values!");
@@ -1048,35 +1186,33 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
       // We distilled this down to a simple case, use the standard constant
       // folder.
       ConstantInt *R = 0;
-      Constant *C1 = const_cast<Constant*>(V1);
-      Constant *C2 = const_cast<Constant*>(V2);
       ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
-      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, C1, C2));
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
       if (R && !R->isZero()) 
         return pred;
       pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, C1, C2));
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
       if (R && !R->isZero())
         return pred;
-      pred = isSigned ?  ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, C1, C2));
+      pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
       if (R && !R->isZero())
         return pred;
-      
+
       // If we couldn't figure it out, bail.
       return ICmpInst::BAD_ICMP_PREDICATE;
     }
-    
+
     // If the first operand is simple, swap operands.
     ICmpInst::Predicate SwappedRelation = 
-      evaluateICmpRelation(V2, V1, isSigned);
+      evaluateICmpRelation(Context, V2, V1, isSigned);
     if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
       return ICmpInst::getSwappedPredicate(SwappedRelation);
 
   } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(V1)) {
     if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
       ICmpInst::Predicate SwappedRelation = 
-        evaluateICmpRelation(V2, V1, isSigned);
+        evaluateICmpRelation(Context, V2, V1, isSigned);
       if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
         return ICmpInst::getSwappedPredicate(SwappedRelation);
       else
@@ -1099,8 +1235,8 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
   } else {
     // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
     // constantexpr, a CPR, or a simple constant.
-    const ConstantExpr *CE1 = cast<ConstantExpr>(V1);
-    const Constant *CE1Op0 = CE1->getOperand(0);
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    Constant *CE1Op0 = CE1->getOperand(0);
 
     switch (CE1->getOpcode()) {
     case Instruction::Trunc:
@@ -1119,28 +1255,12 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
       // null pointer, do the comparison with the pre-casted value.
       if (V2->isNullValue() &&
           (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) {
-        bool sgnd = isSigned;
         if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
         if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
-        return evaluateICmpRelation(CE1Op0,
+        return evaluateICmpRelation(Context, CE1Op0,
                                     Constant::getNullValue(CE1Op0->getType()), 
-                                    sgnd);
+                                    isSigned);
       }
-
-      // If the dest type is a pointer type, and the RHS is a constantexpr cast
-      // from the same type as the src of the LHS, evaluate the inputs.  This is
-      // important for things like "icmp eq (cast 4 to int*), (cast 5 to int*)",
-      // which happens a lot in compilers with tagged integers.
-      if (const ConstantExpr *CE2 = dyn_cast<ConstantExpr>(V2))
-        if (CE2->isCast() && isa<PointerType>(CE1->getType()) &&
-            CE1->getOperand(0)->getType() == CE2->getOperand(0)->getType() &&
-            CE1->getOperand(0)->getType()->isInteger()) {
-          bool sgnd = isSigned;
-          if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
-          if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
-          return evaluateICmpRelation(CE1->getOperand(0), CE2->getOperand(0),
-                                      sgnd);
-        }
       break;
 
     case Instruction::GetElementPtr:
@@ -1157,7 +1277,7 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
           else 
             // If its not weak linkage, the GVal must have a non-zero address
             // so the result is greater-than
-            return isSigned ? ICmpInst::ICMP_SGT :  ICmpInst::ICMP_UGT;
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
         } else if (isa<ConstantPointerNull>(CE1Op0)) {
           // If we are indexing from a null pointer, check to see if we have any
           // non-zero indices.
@@ -1196,8 +1316,8 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
           }
         }
       } else {
-        const ConstantExpr *CE2 = cast<ConstantExpr>(V2);
-        const Constant *CE2Op0 = CE2->getOperand(0);
+        ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+        Constant *CE2Op0 = CE2->getOperand(0);
 
         // There are MANY other foldings that we could perform here.  They will
         // probably be added on demand, as they seem needed.
@@ -1214,12 +1334,20 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
             // ordering of the resultant pointers.
             unsigned i = 1;
 
+            // The logic below assumes that the result of the comparison
+            // can be determined by finding the first index that differs.
+            // This doesn't work if there is over-indexing in any
+            // subsequent indices, so check for that case first.
+            if (!CE1->isGEPWithNoNotionalOverIndexing() ||
+                !CE2->isGEPWithNoNotionalOverIndexing())
+               return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+
             // Compare all of the operands the GEP's have in common.
             gep_type_iterator GTI = gep_type_begin(CE1);
             for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
                  ++i, ++GTI)
-              switch (IdxCompare(CE1->getOperand(i), CE2->getOperand(i),
-                                 GTI.getIndexedType())) {
+              switch (IdxCompare(Context, CE1->getOperand(i),
+                                 CE2->getOperand(i), GTI.getIndexedType())) {
               case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
               case 1:  return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
               case -2: return ICmpInst::BAD_ICMP_PREDICATE;
@@ -1254,36 +1382,28 @@ static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
   return ICmpInst::BAD_ICMP_PREDICATE;
 }
 
-Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, 
-                                               const Constant *C1, 
-                                               const Constant *C2) {
+Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
+                                               unsigned short pred, 
+                                               Constant *C1, Constant *C2) {
+  const Type *ResultTy;
+  if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+    ResultTy = VectorType::get(Type::getInt1Ty(Context), VT->getNumElements());
+  else
+    ResultTy = Type::getInt1Ty(Context);
+
   // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
-  if (pred == FCmpInst::FCMP_FALSE) {
-    if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
-      return Constant::getNullValue(VectorType::getInteger(VT));
-    else
-      return ConstantInt::getFalse();
-  }
-  
-  if (pred == FCmpInst::FCMP_TRUE) {
-    if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
-      return Constant::getAllOnesValue(VectorType::getInteger(VT));
-    else
-      return ConstantInt::getTrue();
-  }
-      
+  if (pred == FCmpInst::FCMP_FALSE)
+    return Constant::getNullValue(ResultTy);
+
+  if (pred == FCmpInst::FCMP_TRUE)
+    return Constant::getAllOnesValue(ResultTy);
+
   // Handle some degenerate cases first
-  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
-    // vicmp/vfcmp -> [vector] undef
-    if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType()))
-      return UndefValue::get(VectorType::getInteger(VTy));
-    
-    // icmp/fcmp -> i1 undef
-    return UndefValue::get(Type::Int1Ty);
-  }
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
+    return UndefValue::get(ResultTy);
 
   // No compile-time operations on this type yet.
-  if (C1->getType() == Type::PPC_FP128Ty)
+  if (C1->getType()->isPPC_FP128Ty())
     return 0;
 
   // icmp eq/ne(null,GV) -> false/true
@@ -1292,9 +1412,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       // Don't try to evaluate aliases.  External weak GV can be null.
       if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
         if (pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse();
+          return ConstantInt::getFalse(Context);
         else if (pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue();
+          return ConstantInt::getTrue(Context);
       }
   // icmp eq/ne(GV,null) -> false/true
   } else if (C2->isNullValue()) {
@@ -1302,114 +1422,115 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       // Don't try to evaluate aliases.  External weak GV can be null.
       if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
         if (pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse();
+          return ConstantInt::getFalse(Context);
         else if (pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue();
+          return ConstantInt::getTrue(Context);
       }
   }
 
+  // If the comparison is a comparison between two i1's, simplify it.
+  if (C1->getType() == Type::getInt1Ty(Context)) {
+    switch(pred) {
+    case ICmpInst::ICMP_EQ:
+      if (isa<ConstantInt>(C2))
+        return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
+      return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
+    case ICmpInst::ICMP_NE:
+      return ConstantExpr::getXor(C1, C2);
+    default:
+      break;
+    }
+  }
+
   if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
     APInt V1 = cast<ConstantInt>(C1)->getValue();
     APInt V2 = cast<ConstantInt>(C2)->getValue();
     switch (pred) {
-    default: assert(0 && "Invalid ICmp Predicate"); return 0;
-    case ICmpInst::ICMP_EQ: return ConstantInt::get(Type::Int1Ty, V1 == V2);
-    case ICmpInst::ICMP_NE: return ConstantInt::get(Type::Int1Ty, V1 != V2);
-    case ICmpInst::ICMP_SLT:return ConstantInt::get(Type::Int1Ty, V1.slt(V2));
-    case ICmpInst::ICMP_SGT:return ConstantInt::get(Type::Int1Ty, V1.sgt(V2));
-    case ICmpInst::ICMP_SLE:return ConstantInt::get(Type::Int1Ty, V1.sle(V2));
-    case ICmpInst::ICMP_SGE:return ConstantInt::get(Type::Int1Ty, V1.sge(V2));
-    case ICmpInst::ICMP_ULT:return ConstantInt::get(Type::Int1Ty, V1.ult(V2));
-    case ICmpInst::ICMP_UGT:return ConstantInt::get(Type::Int1Ty, V1.ugt(V2));
-    case ICmpInst::ICMP_ULE:return ConstantInt::get(Type::Int1Ty, V1.ule(V2));
-    case ICmpInst::ICMP_UGE:return ConstantInt::get(Type::Int1Ty, V1.uge(V2));
+    default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
+    case ICmpInst::ICMP_EQ:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1 == V2);
+    case ICmpInst::ICMP_NE: 
+      return ConstantInt::get(Type::getInt1Ty(Context), V1 != V2);
+    case ICmpInst::ICMP_SLT:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.slt(V2));
+    case ICmpInst::ICMP_SGT:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.sgt(V2));
+    case ICmpInst::ICMP_SLE:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.sle(V2));
+    case ICmpInst::ICMP_SGE:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.sge(V2));
+    case ICmpInst::ICMP_ULT:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.ult(V2));
+    case ICmpInst::ICMP_UGT:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.ugt(V2));
+    case ICmpInst::ICMP_ULE:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.ule(V2));
+    case ICmpInst::ICMP_UGE:
+      return ConstantInt::get(Type::getInt1Ty(Context), V1.uge(V2));
     }
   } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
     APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
     APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
     APFloat::cmpResult R = C1V.compare(C2V);
     switch (pred) {
-    default: assert(0 && "Invalid FCmp Predicate"); return 0;
-    case FCmpInst::FCMP_FALSE: return ConstantInt::getFalse();
-    case FCmpInst::FCMP_TRUE:  return ConstantInt::getTrue();
+    default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
+    case FCmpInst::FCMP_FALSE: return ConstantInt::getFalse(Context);
+    case FCmpInst::FCMP_TRUE:  return ConstantInt::getTrue(Context);
     case FCmpInst::FCMP_UNO:
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered);
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered);
     case FCmpInst::FCMP_ORD:
-      return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpUnordered);
+      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpUnordered);
     case FCmpInst::FCMP_UEQ:
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered ||
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered ||
                                             R==APFloat::cmpEqual);
     case FCmpInst::FCMP_OEQ:   
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpEqual);
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpEqual);
     case FCmpInst::FCMP_UNE:
-      return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpEqual);
+      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpEqual);
     case FCmpInst::FCMP_ONE:   
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpLessThan ||
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan ||
                                             R==APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_ULT: 
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered ||
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered ||
                                             R==APFloat::cmpLessThan);
     case FCmpInst::FCMP_OLT:   
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpLessThan);
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan);
     case FCmpInst::FCMP_UGT:
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered ||
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered ||
                                             R==APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_OGT:
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpGreaterThan);
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_ULE:
-      return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpGreaterThan);
+      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_OLE: 
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpLessThan ||
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan ||
                                             R==APFloat::cmpEqual);
     case FCmpInst::FCMP_UGE:
-      return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpLessThan);
+      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpLessThan);
     case FCmpInst::FCMP_OGE: 
-      return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpGreaterThan ||
+      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpGreaterThan ||
                                             R==APFloat::cmpEqual);
     }
   } else if (isa<VectorType>(C1->getType())) {
     SmallVector<Constant*, 16> C1Elts, C2Elts;
-    C1->getVectorElements(C1Elts);
-    C2->getVectorElements(C2Elts);
-    
+    C1->getVectorElements(Context, C1Elts);
+    C2->getVectorElements(Context, C2Elts);
+
     // If we can constant fold the comparison of each element, constant fold
     // the whole vector comparison.
     SmallVector<Constant*, 4> ResElts;
-    const Type *InEltTy = C1Elts[0]->getType();
-    bool isFP = InEltTy->isFloatingPoint();
-    const Type *ResEltTy = InEltTy;
-    if (isFP)
-      ResEltTy = IntegerType::get(InEltTy->getPrimitiveSizeInBits());
-    
     for (unsigned i = 0, e = C1Elts.size(); i != e; ++i) {
       // Compare the elements, producing an i1 result or constant expr.
-      Constant *C;
-      if (isFP)
-        C = ConstantExpr::getFCmp(pred, C1Elts[i], C2Elts[i]);
-      else
-        C = ConstantExpr::getICmp(pred, C1Elts[i], C2Elts[i]);
-
-      // If it is a bool or undef result, convert to the dest type.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-        if (CI->isZero())
-          ResElts.push_back(Constant::getNullValue(ResEltTy));
-        else
-          ResElts.push_back(Constant::getAllOnesValue(ResEltTy));
-      } else if (isa<UndefValue>(C)) {
-        ResElts.push_back(UndefValue::get(ResEltTy));
-      } else {
-        break;
-      }
+      ResElts.push_back(
+                    ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
     }
-    
-    if (ResElts.size() == C1Elts.size())
-      return ConstantVector::get(&ResElts[0], ResElts.size());
+    return ConstantVector::get(&ResElts[0], ResElts.size());
   }
 
   if (C1->getType()->isFloatingPoint()) {
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateFCmpRelation(C1, C2)) {
-    default: assert(0 && "Unknown relation!");
+    switch (evaluateFCmpRelation(Context, C1, C2)) {
+    default: llvm_unreachable("Unknown relation!");
     case FCmpInst::FCMP_UNO:
     case FCmpInst::FCMP_ORD:
     case FCmpInst::FCMP_UEQ:
@@ -1459,110 +1580,115 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
         Result = 1;
       break;
     }
-    
+
     // If we evaluated the result, return it now.
-    if (Result != -1) {
-      if (const VectorType *VT = dyn_cast<VectorType>(C1->getType())) {
-        if (Result == 0)
-          return Constant::getNullValue(VectorType::getInteger(VT));
-        else
-          return Constant::getAllOnesValue(VectorType::getInteger(VT));
-      }
-      return ConstantInt::get(Type::Int1Ty, Result);
-    }
-    
+    if (Result != -1)
+      return ConstantInt::get(Type::getInt1Ty(Context), Result);
+
   } else {
     // Evaluate the relation between the two constants, per the predicate.
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
-    default: assert(0 && "Unknown relational!");
+    switch (evaluateICmpRelation(Context, C1, C2, CmpInst::isSigned(pred))) {
+    default: llvm_unreachable("Unknown relational!");
     case ICmpInst::BAD_ICMP_PREDICATE:
       break;  // Couldn't determine anything about these constants.
     case ICmpInst::ICMP_EQ:   // We know the constants are equal!
       // If we know the constants are equal, we can decide the result of this
       // computation precisely.
-      Result = (pred == ICmpInst::ICMP_EQ  ||
-                pred == ICmpInst::ICMP_ULE ||
-                pred == ICmpInst::ICMP_SLE ||
-                pred == ICmpInst::ICMP_UGE ||
-                pred == ICmpInst::ICMP_SGE);
+      Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
       break;
     case ICmpInst::ICMP_ULT:
-      // If we know that C1 < C2, we can decide the result of this computation
-      // precisely.
-      Result = (pred == ICmpInst::ICMP_ULT ||
-                pred == ICmpInst::ICMP_NE  ||
-                pred == ICmpInst::ICMP_ULE);
+      switch (pred) {
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
+        Result = 1; break;
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
+        Result = 0; break;
+      }
       break;
     case ICmpInst::ICMP_SLT:
-      // If we know that C1 < C2, we can decide the result of this computation
-      // precisely.
-      Result = (pred == ICmpInst::ICMP_SLT ||
-                pred == ICmpInst::ICMP_NE  ||
-                pred == ICmpInst::ICMP_SLE);
+      switch (pred) {
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
+        Result = 0; break;
+      }
       break;
     case ICmpInst::ICMP_UGT:
-      // If we know that C1 > C2, we can decide the result of this computation
-      // precisely.
-      Result = (pred == ICmpInst::ICMP_UGT ||
-                pred == ICmpInst::ICMP_NE  ||
-                pred == ICmpInst::ICMP_UGE);
+      switch (pred) {
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
+        Result = 0; break;
+      }
       break;
     case ICmpInst::ICMP_SGT:
-      // If we know that C1 > C2, we can decide the result of this computation
-      // precisely.
-      Result = (pred == ICmpInst::ICMP_SGT ||
-                pred == ICmpInst::ICMP_NE  ||
-                pred == ICmpInst::ICMP_SGE);
+      switch (pred) {
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
+        Result = 0; break;
+      }
       break;
     case ICmpInst::ICMP_ULE:
-      // If we know that C1 <= C2, we can only partially decide this relation.
       if (pred == ICmpInst::ICMP_UGT) Result = 0;
-      if (pred == ICmpInst::ICMP_ULT) Result = 1;
+      if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
       break;
     case ICmpInst::ICMP_SLE:
-      // If we know that C1 <= C2, we can only partially decide this relation.
       if (pred == ICmpInst::ICMP_SGT) Result = 0;
-      if (pred == ICmpInst::ICMP_SLT) Result = 1;
+      if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
       break;
-
     case ICmpInst::ICMP_UGE:
-      // If we know that C1 >= C2, we can only partially decide this relation.
       if (pred == ICmpInst::ICMP_ULT) Result = 0;
-      if (pred == ICmpInst::ICMP_UGT) Result = 1;
+      if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
       break;
     case ICmpInst::ICMP_SGE:
-      // If we know that C1 >= C2, we can only partially decide this relation.
       if (pred == ICmpInst::ICMP_SLT) Result = 0;
-      if (pred == ICmpInst::ICMP_SGT) Result = 1;
+      if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
       break;
-
     case ICmpInst::ICMP_NE:
-      // If we know that C1 != C2, we can only partially decide this relation.
       if (pred == ICmpInst::ICMP_EQ) Result = 0;
       if (pred == ICmpInst::ICMP_NE) Result = 1;
       break;
     }
-    
+
     // If we evaluated the result, return it now.
-    if (Result != -1) {
-      if (const VectorType *VT = dyn_cast<VectorType>(C1->getType())) {
-        if (Result == 0)
-          return Constant::getNullValue(VT);
-        else
-          return Constant::getAllOnesValue(VT);
+    if (Result != -1)
+      return ConstantInt::get(Type::getInt1Ty(Context), Result);
+
+    // If the right hand side is a bitcast, try using its inverse to simplify
+    // it by moving it to the left hand side.
+    if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
+      if (CE2->getOpcode() == Instruction::BitCast) {
+        Constant *CE2Op0 = CE2->getOperand(0);
+        Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
+        return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
       }
-      return ConstantInt::get(Type::Int1Ty, Result);
     }
-    
+
+    // If the left hand side is an extension, try eliminating it.
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+      if (CE1->getOpcode() == Instruction::SExt ||
+          CE1->getOpcode() == Instruction::ZExt) {
+        Constant *CE1Op0 = CE1->getOperand(0);
+        Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
+        if (CE1Inverse == CE1Op0) {
+          // Check whether we can safely truncate the right hand side.
+          Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
+          if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
+            return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
+          }
+        }
+      }
+    }
+
     if (!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) {
-      // If C2 is a constant expr and C1 isn't, flop them around and fold the
+      // If C2 is a constant expr and C1 isn't, flip them around and fold the
       // other way if possible.
       switch (pred) {
       case ICmpInst::ICMP_EQ:
       case ICmpInst::ICMP_NE:
         // No change of predicate required.
-        return ConstantFoldCompareInstruction(pred, C2, C1);
+        return ConstantFoldCompareInstruction(Context, pred, C2, C1);
 
       case ICmpInst::ICMP_ULT:
       case ICmpInst::ICMP_SLT:
@@ -1574,7 +1700,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       case ICmpInst::ICMP_SGE:
         // Change the predicate as necessary to swap the operands.
         pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
-        return ConstantFoldCompareInstruction(pred, C2, C1);
+        return ConstantFoldCompareInstruction(Context, pred, C2, C1);
 
       default:  // These predicates cannot be flopped around.
         break;
@@ -1584,12 +1710,33 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
   return 0;
 }
 
-Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
+/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
+/// is "inbounds".
+static bool isInBoundsIndices(Constant *const *Idxs, size_t NumIdx) {
+  // No indices means nothing that could be out of bounds.
+  if (NumIdx == 0) return true;
+
+  // If the first index is zero, it's in bounds.
+  if (Idxs[0]->isNullValue()) return true;
+
+  // If the first index is one and all the rest are zero, it's in bounds,
+  // by the one-past-the-end rule.
+  if (!cast<ConstantInt>(Idxs[0])->isOne())
+    return false;
+  for (unsigned i = 1, e = NumIdx; i != e; ++i)
+    if (!Idxs[i]->isNullValue())
+      return false;
+  return true;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, 
+                                          Constant *C,
+                                          bool inBounds,
                                           Constant* const *Idxs,
                                           unsigned NumIdx) {
   if (NumIdx == 0 ||
       (NumIdx == 1 && Idxs[0]->isNullValue()))
-    return const_cast<Constant*>(C);
+    return C;
 
   if (isa<UndefValue>(C)) {
     const PointerType *Ptr = cast<PointerType>(C->getType());
@@ -1614,12 +1761,12 @@ Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
                                                          (Value**)Idxs,
                                                          (Value**)Idxs+NumIdx);
       assert(Ty != 0 && "Invalid indices for GEP!");
-      return 
-        ConstantPointerNull::get(PointerType::get(Ty,Ptr->getAddressSpace()));
+      return  ConstantPointerNull::get(
+                            PointerType::get(Ty,Ptr->getAddressSpace()));
     }
   }
 
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(const_cast<Constant*>(C))) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     // Combine Indices - If the source pointer to this getelementptr instruction
     // is a getelementptr instruction, combine the indices of the two
     // getelementptr instructions into a single instruction.
@@ -1643,9 +1790,10 @@ Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
         if (!Idx0->isNullValue()) {
           const Type *IdxTy = Combined->getType();
           if (IdxTy != Idx0->getType()) {
-            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Type::Int64Ty);
+            Constant *C1 =
+              ConstantExpr::getSExtOrBitCast(Idx0, Type::getInt64Ty(Context));
             Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, 
-                                                          Type::Int64Ty);
+                                                          Type::getInt64Ty(Context));
             Combined = ConstantExpr::get(Instruction::Add, C1, C2);
           } else {
             Combined =
@@ -1655,8 +1803,13 @@ Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
 
         NewIndices.push_back(Combined);
         NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx);
-        return ConstantExpr::getGetElementPtr(CE->getOperand(0), &NewIndices[0],
-                                              NewIndices.size());
+        return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
+          ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
+                                                 &NewIndices[0],
+                                                 NewIndices.size()) :
+          ConstantExpr::getGetElementPtr(CE->getOperand(0),
+                                         &NewIndices[0],
+                                         NewIndices.size());
       }
     }
 
@@ -1672,19 +1825,23 @@ Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
           if (const ArrayType *CAT =
         dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
             if (CAT->getElementType() == SAT->getElementType())
-              return ConstantExpr::getGetElementPtr(
+              return inBounds ?
+                ConstantExpr::getInBoundsGetElementPtr(
+                      (Constant*)CE->getOperand(0), Idxs, NumIdx) :
+                ConstantExpr::getGetElementPtr(
                       (Constant*)CE->getOperand(0), Idxs, NumIdx);
     }
-    
+
     // Fold: getelementptr (i8* inttoptr (i64 1 to i8*), i32 -1)
     // Into: inttoptr (i64 0 to i8*)
     // This happens with pointers to member functions in C++.
     if (CE->getOpcode() == Instruction::IntToPtr && NumIdx == 1 &&
         isa<ConstantInt>(CE->getOperand(0)) && isa<ConstantInt>(Idxs[0]) &&
-        cast<PointerType>(CE->getType())->getElementType() == Type::Int8Ty) {
+        cast<PointerType>(CE->getType())->getElementType() ==
+            Type::getInt8Ty(Context)) {
       Constant *Base = CE->getOperand(0);
       Constant *Offset = Idxs[0];
-      
+
       // Convert the smaller integer to the larger type.
       if (Offset->getType()->getPrimitiveSizeInBits() < 
           Base->getType()->getPrimitiveSizeInBits())
@@ -1692,11 +1849,74 @@ Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
       else if (Base->getType()->getPrimitiveSizeInBits() <
                Offset->getType()->getPrimitiveSizeInBits())
         Base = ConstantExpr::getZExt(Base, Offset->getType());
-      
+
       Base = ConstantExpr::getAdd(Base, Offset);
       return ConstantExpr::getIntToPtr(Base, CE->getType());
     }
   }
+
+  // Check to see if any array indices are not within the corresponding
+  // notional array bounds. If so, try to determine if they can be factored
+  // out into preceding dimensions.
+  bool Unknown = false;
+  SmallVector<Constant *, 8> NewIdxs;
+  const Type *Ty = C->getType();
+  const Type *Prev = 0;
+  for (unsigned i = 0; i != NumIdx;
+       Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+        if (ATy->getNumElements() <= INT64_MAX &&
+            ATy->getNumElements() != 0 &&
+            CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+          if (isa<SequentialType>(Prev)) {
+            // It's out of range, but we can factor it into the prior
+            // dimension.
+            NewIdxs.resize(NumIdx);
+            ConstantInt *Factor = ConstantInt::get(CI->getType(),
+                                                   ATy->getNumElements());
+            NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
+
+            Constant *PrevIdx = Idxs[i-1];
+            Constant *Div = ConstantExpr::getSDiv(CI, Factor);
+
+            // Before adding, extend both operands to i64 to avoid
+            // overflow trouble.
+            if (PrevIdx->getType() != Type::getInt64Ty(Context))
+              PrevIdx = ConstantExpr::getSExt(PrevIdx,
+                                              Type::getInt64Ty(Context));
+            if (Div->getType() != Type::getInt64Ty(Context))
+              Div = ConstantExpr::getSExt(Div,
+                                          Type::getInt64Ty(Context));
+
+            NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
+          } else {
+            // It's out of range, but the prior dimension is a struct
+            // so we can't do anything about it.
+            Unknown = true;
+          }
+        }
+    } else {
+      // We don't know if it's in range or not.
+      Unknown = true;
+    }
+  }
+
+  // If we did any factoring, start over with the adjusted indices.
+  if (!NewIdxs.empty()) {
+    for (unsigned i = 0; i != NumIdx; ++i)
+      if (!NewIdxs[i]) NewIdxs[i] = Idxs[i];
+    return inBounds ?
+      ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
+                                             NewIdxs.size()) :
+      ConstantExpr::getGetElementPtr(C, NewIdxs.data(), NewIdxs.size());
+  }
+
+  // If all indices are known integers and normalized, we can do a simple
+  // check for the "inbounds" property.
+  if (!Unknown && !inBounds &&
+      isa<GlobalVariable>(C) && isInBoundsIndices(Idxs, NumIdx))
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idxs, NumIdx);
+
   return 0;
 }
-
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
index 49aea11870af..cc97001e3cf3 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/VMCore/ConstantFold.h
@@ -23,37 +23,46 @@ namespace llvm {
   class Value;
   class Constant;
   class Type;
+  class LLVMContext;
 
   // Constant fold various types of instruction...
   Constant *ConstantFoldCastInstruction(
+    LLVMContext &Context,
     unsigned opcode,     ///< The opcode of the cast
-    const Constant *V,   ///< The source constant
+    Constant *V,         ///< The source constant
     const Type *DestTy   ///< The destination type
   );
-  Constant *ConstantFoldSelectInstruction(const Constant *Cond,
-                                          const Constant *V1,
-                                          const Constant *V2);
-  Constant *ConstantFoldExtractElementInstruction(const Constant *Val,
-                                                  const Constant *Idx);
-  Constant *ConstantFoldInsertElementInstruction(const Constant *Val,
-                                                 const Constant *Elt,
-                                                 const Constant *Idx);
-  Constant *ConstantFoldShuffleVectorInstruction(const Constant *V1,
-                                                 const Constant *V2,
-                                                 const Constant *Mask);
-  Constant *ConstantFoldExtractValueInstruction(const Constant *Agg,
+  Constant *ConstantFoldSelectInstruction(LLVMContext &Context,
+                                          Constant *Cond,
+                                          Constant *V1, Constant *V2);
+  Constant *ConstantFoldExtractElementInstruction(LLVMContext &Context,
+                                                  Constant *Val,
+                                                  Constant *Idx);
+  Constant *ConstantFoldInsertElementInstruction(LLVMContext &Context,
+                                                 Constant *Val,
+                                                 Constant *Elt,
+                                                 Constant *Idx);
+  Constant *ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
+                                                 Constant *V1,
+                                                 Constant *V2,
+                                                 Constant *Mask);
+  Constant *ConstantFoldExtractValueInstruction(LLVMContext &Context,
+                                                Constant *Agg,
                                                 const unsigned *Idxs,
                                                 unsigned NumIdx);
-  Constant *ConstantFoldInsertValueInstruction(const Constant *Agg,
-                                               const Constant *Val,
-                                               const unsigned* Idxs,
+  Constant *ConstantFoldInsertValueInstruction(LLVMContext &Context,
+                                               Constant *Agg,
+                                               Constant *Val,
+                                               const unsigned *Idxs,
                                                unsigned NumIdx);
-  Constant *ConstantFoldBinaryInstruction(unsigned Opcode, const Constant *V1,
-                                          const Constant *V2);
-  Constant *ConstantFoldCompareInstruction(unsigned short predicate, 
-                                           const Constant *C1, 
-                                           const Constant *C2);
-  Constant *ConstantFoldGetElementPtr(const Constant *C,
+  Constant *ConstantFoldBinaryInstruction(LLVMContext &Context,
+                                          unsigned Opcode, Constant *V1,
+                                          Constant *V2);
+  Constant *ConstantFoldCompareInstruction(LLVMContext &Context,
+                                           unsigned short predicate, 
+                                           Constant *C1, Constant *C2);
+  Constant *ConstantFoldGetElementPtr(LLVMContext &Context, Constant *C,
+                                      bool inBounds,
                                       Constant* const *Idxs, unsigned NumIdx);
 } // End llvm namespace
 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index a9e4e78ee1f4..529c45557bc1 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -12,19 +12,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Constants.h"
+#include "LLVMContextImpl.h"
 #include "ConstantFold.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instructions.h"
-#include "llvm/MDNode.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/System/RWMutex.h"
 #include "llvm/System/Threading.h"
@@ -38,8 +42,64 @@ using namespace llvm;
 //                              Constant Class
 //===----------------------------------------------------------------------===//
 
-// Becomes a no-op when multithreading is disabled.
-ManagedStatic<sys::SmartRWMutex<true> > ConstantsLock;
+// Constructor to create a '0' constant of arbitrary type...
+static const uint64_t zero[2] = {0, 0};
+Constant* Constant::getNullValue(const Type* Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    return ConstantInt::get(Ty, 0);
+  case Type::FloatTyID:
+    return ConstantFP::get(Ty->getContext(), APFloat(APInt(32, 0)));
+  case Type::DoubleTyID:
+    return ConstantFP::get(Ty->getContext(), APFloat(APInt(64, 0)));
+  case Type::X86_FP80TyID:
+    return ConstantFP::get(Ty->getContext(), APFloat(APInt(80, 2, zero)));
+  case Type::FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat(APInt(128, 2, zero), true));
+  case Type::PPC_FP128TyID:
+    return ConstantFP::get(Ty->getContext(), APFloat(APInt(128, 2, zero)));
+  case Type::PointerTyID:
+    return ConstantPointerNull::get(cast<PointerType>(Ty));
+  case Type::StructTyID:
+  case Type::ArrayTyID:
+  case Type::VectorTyID:
+    return ConstantAggregateZero::get(Ty);
+  default:
+    // Function, Label, or Opaque type?
+    assert(!"Cannot create a null constant of that type!");
+    return 0;
+  }
+}
+
+Constant* Constant::getIntegerValue(const Type* Ty, const APInt &V) {
+  const Type *ScalarTy = Ty->getScalarType();
+
+  // Create the base integer constant.
+  Constant *C = ConstantInt::get(Ty->getContext(), V);
+
+  // Convert an integer to a pointer, if necessary.
+  if (const PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+    C = ConstantExpr::getIntToPtr(C, PTy);
+
+  // Broadcast a scalar to a vector, if necessary.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+Constant* Constant::getAllOnesValue(const Type* Ty) {
+  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty))
+    return ConstantInt::get(Ty->getContext(),
+                            APInt::getAllOnesValue(ITy->getBitWidth()));
+  
+  std::vector<Constant*> Elts;
+  const VectorType* VTy = cast<VectorType>(Ty);
+  Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
+  assert(Elts[0] && "Not a vector integer type!");
+  return cast<ConstantVector>(ConstantVector::get(Elts));
+}
 
 void Constant::destroyConstantImpl() {
   // When a Constant is destroyed, there may be lingering
@@ -52,10 +112,11 @@ void Constant::destroyConstantImpl() {
   while (!use_empty()) {
     Value *V = use_back();
 #ifndef NDEBUG      // Only in -g mode...
-    if (!isa<Constant>(V))
-      DOUT << "While deleting: " << *this
-           << "\n\nUse still stuck around after Def is destroyed: "
-           << *V << "\n\n";
+    if (!isa<Constant>(V)) {
+      errs() << "While deleting: " << *this
+             << "\n\nUse still stuck around after Def is destroyed: "
+             << *V << "\n\n";
+    }
 #endif
     assert(isa<Constant>(V) && "References remain to Constant being destroyed");
     Constant *CV = cast<Constant>(V);
@@ -99,85 +160,33 @@ bool Constant::canTrap() const {
   }
 }
 
-/// ContainsRelocations - Return true if the constant value contains relocations
-/// which cannot be resolved at compile time. Kind argument is used to filter
-/// only 'interesting' sorts of relocations.
-bool Constant::ContainsRelocations(unsigned Kind) const {
-  if (const GlobalValue* GV = dyn_cast<GlobalValue>(this)) {
-    bool isLocal = GV->hasLocalLinkage();
-    if ((Kind & Reloc::Local) && isLocal) {
-      // Global has local linkage and 'local' kind of relocations are
-      // requested
-      return true;
-    }
-
-    if ((Kind & Reloc::Global) && !isLocal) {
-      // Global has non-local linkage and 'global' kind of relocations are
-      // requested
-      return true;
-    }
 
-    return false;
+/// getRelocationInfo - This method classifies the entry according to
+/// whether or not it may generate a relocation entry.  This must be
+/// conservative, so if it might codegen to a relocatable entry, it should say
+/// so.  The return values are:
+/// 
+///  NoRelocation: This constant pool entry is guaranteed to never have a
+///     relocation applied to it (because it holds a simple constant like
+///     '4').
+///  LocalRelocation: This entry has relocations, but the entries are
+///     guaranteed to be resolvable by the static linker, so the dynamic
+///     linker will never see them.
+///  GlobalRelocations: This entry may have arbitrary relocations.
+///
+/// FIXME: This really should not be in VMCore.
+Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return LocalRelocation;  // Local to this file/library.
+    return GlobalRelocations;    // Global reference.
   }
-
+  
+  PossibleRelocationsTy Result = NoRelocation;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (getOperand(i)->ContainsRelocations(Kind))
-      return true;
-
-  return false;
-}
-
-// Static constructor to create a '0' constant of arbitrary type...
-static const uint64_t zero[2] = {0, 0};
-Constant *Constant::getNullValue(const Type *Ty) {
-  switch (Ty->getTypeID()) {
-  case Type::IntegerTyID:
-    return ConstantInt::get(Ty, 0);
-  case Type::FloatTyID:
-    return ConstantFP::get(APFloat(APInt(32, 0)));
-  case Type::DoubleTyID:
-    return ConstantFP::get(APFloat(APInt(64, 0)));
-  case Type::X86_FP80TyID:
-    return ConstantFP::get(APFloat(APInt(80, 2, zero)));
-  case Type::FP128TyID:
-    return ConstantFP::get(APFloat(APInt(128, 2, zero), true));
-  case Type::PPC_FP128TyID:
-    return ConstantFP::get(APFloat(APInt(128, 2, zero)));
-  case Type::PointerTyID:
-    return ConstantPointerNull::get(cast<PointerType>(Ty));
-  case Type::StructTyID:
-  case Type::ArrayTyID:
-  case Type::VectorTyID:
-    return ConstantAggregateZero::get(Ty);
-  default:
-    // Function, Label, or Opaque type?
-    assert(!"Cannot create a null constant of that type!");
-    return 0;
-  }
-}
-
-Constant *Constant::getAllOnesValue(const Type *Ty) {
-  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty))
-    return ConstantInt::get(APInt::getAllOnesValue(ITy->getBitWidth()));
-  return ConstantVector::getAllOnesValue(cast<VectorType>(Ty));
-}
-
-// Static constructor to create an integral constant with all bits set
-ConstantInt *ConstantInt::getAllOnesValue(const Type *Ty) {
-  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty))
-    return ConstantInt::get(APInt::getAllOnesValue(ITy->getBitWidth()));
-  return 0;
-}
-
-/// @returns the value for a vector integer constant of the given type that
-/// has all its bits set to true.
-/// @brief Get the all ones value
-ConstantVector *ConstantVector::getAllOnesValue(const VectorType *Ty) {
-  std::vector<Constant*> Elts;
-  Elts.resize(Ty->getNumElements(),
-              ConstantInt::getAllOnesValue(Ty->getElementType()));
-  assert(Elts[0] && "Not a vector integer type!");
-  return cast<ConstantVector>(ConstantVector::get(Elts));
+    Result = std::max(Result, getOperand(i)->getRelocationInfo());
+  
+  return Result;
 }
 
 
@@ -185,7 +194,8 @@ ConstantVector *ConstantVector::getAllOnesValue(const VectorType *Ty) {
 /// type, returns the elements of the vector in the specified smallvector.
 /// This handles breaking down a vector undef into undef elements, etc.  For
 /// constant exprs and other cases we can't handle, we return an empty vector.
-void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
+void Constant::getVectorElements(LLVMContext &Context,
+                                 SmallVectorImpl<Constant*> &Elts) const {
   assert(isa<VectorType>(getType()) && "Not a vector constant!");
   
   if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
@@ -220,95 +230,45 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
   assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
 }
 
-ConstantInt *ConstantInt::TheTrueVal = 0;
-ConstantInt *ConstantInt::TheFalseVal = 0;
-
-namespace llvm {
-  void CleanupTrueFalse(void *) {
-    ConstantInt::ResetTrueFalse();
-  }
-}
-
-static ManagedCleanup<llvm::CleanupTrueFalse> TrueFalseCleanup;
-
-ConstantInt *ConstantInt::CreateTrueFalseVals(bool WhichOne) {
-  assert(TheTrueVal == 0 && TheFalseVal == 0);
-  TheTrueVal  = get(Type::Int1Ty, 1);
-  TheFalseVal = get(Type::Int1Ty, 0);
-  
-  // Ensure that llvm_shutdown nulls out TheTrueVal/TheFalseVal.
-  TrueFalseCleanup.Register();
-  
-  return WhichOne ? TheTrueVal : TheFalseVal;
+ConstantInt* ConstantInt::getTrue(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedWriter<true>(pImpl->ConstantsLock);
+  if (pImpl->TheTrueVal)
+    return pImpl->TheTrueVal;
+  else
+    return (pImpl->TheTrueVal =
+              ConstantInt::get(IntegerType::get(Context, 1), 1));
 }
 
-
-namespace {
-  struct DenseMapAPIntKeyInfo {
-    struct KeyTy {
-      APInt val;
-      const Type* type;
-      KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
-      KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
-      bool operator==(const KeyTy& that) const {
-        return type == that.type && this->val == that.val;
-      }
-      bool operator!=(const KeyTy& that) const {
-        return !this->operator==(that);
-      }
-    };
-    static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
-    static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
-    static unsigned getHashValue(const KeyTy &Key) {
-      return DenseMapInfo<void*>::getHashValue(Key.type) ^ 
-        Key.val.getHashValue();
-    }
-    static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
-      return LHS == RHS;
-    }
-    static bool isPod() { return false; }
-  };
+ConstantInt* ConstantInt::getFalse(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedWriter<true>(pImpl->ConstantsLock);
+  if (pImpl->TheFalseVal)
+    return pImpl->TheFalseVal;
+  else
+    return (pImpl->TheFalseVal =
+              ConstantInt::get(IntegerType::get(Context, 1), 0));
 }
 
 
-typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
-                 DenseMapAPIntKeyInfo> IntMapTy;
-static ManagedStatic<IntMapTy> IntConstants;
-
-ConstantInt *ConstantInt::get(const IntegerType *Ty,
-                              uint64_t V, bool isSigned) {
-  return get(APInt(Ty->getBitWidth(), V, isSigned));
-}
-
-Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
-  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
-
-  // For vectors, broadcast the value.
-  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return
-      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
-
-  return C;
-}
-
 // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
 // as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
 // operator== and operator!= to ensure that the DenseMap doesn't attempt to
 // compare APInt's of different widths, which would violate an APInt class
 // invariant which generates an assertion.
-ConstantInt *ConstantInt::get(const APInt& V) {
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) {
   // Get the corresponding integer type for the bit width of the value.
-  const IntegerType *ITy = IntegerType::get(V.getBitWidth());
+  const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
   // get an existing value or the insertion position
   DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
   
-  ConstantsLock->reader_acquire();
-  ConstantInt *&Slot = (*IntConstants)[Key]; 
-  ConstantsLock->reader_release();
+  Context.pImpl->ConstantsLock.reader_acquire();
+  ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; 
+  Context.pImpl->ConstantsLock.reader_release();
     
   if (!Slot) {
-    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
-    ConstantInt *&NewSlot = (*IntConstants)[Key]; 
+    sys::SmartScopedWriter<true> Writer(Context.pImpl->ConstantsLock);
+    ConstantInt *&NewSlot = Context.pImpl->IntConstants[Key]; 
     if (!Slot) {
       NewSlot = new ConstantInt(ITy, V);
     }
@@ -319,117 +279,153 @@ ConstantInt *ConstantInt::get(const APInt& V) {
   }
 }
 
-Constant *ConstantInt::get(const Type *Ty, const APInt &V) {
-  ConstantInt *C = ConstantInt::get(V);
+Constant* ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()),
+                               V, isSigned);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, uint64_t V, 
+                              bool isSigned) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+ConstantInt* ConstantInt::getSigned(const IntegerType* Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
+  ConstantInt *C = get(Ty->getContext(), V);
   assert(C->getType() == Ty->getScalarType() &&
          "ConstantInt type doesn't match the type implied by its value!");
 
   // For vectors, broadcast the value.
   if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return
-      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+    return ConstantVector::get(
+      std::vector<Constant *>(VTy->getNumElements(), C));
 
   return C;
 }
 
+ConstantInt* ConstantInt::get(const IntegerType* Ty, const StringRef& Str,
+                              uint8_t radix) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
+}
+
 //===----------------------------------------------------------------------===//
 //                                ConstantFP
 //===----------------------------------------------------------------------===//
 
 static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
-  if (Ty == Type::FloatTy)
+  if (Ty->isFloatTy())
     return &APFloat::IEEEsingle;
-  if (Ty == Type::DoubleTy)
+  if (Ty->isDoubleTy())
     return &APFloat::IEEEdouble;
-  if (Ty == Type::X86_FP80Ty)
+  if (Ty->isX86_FP80Ty())
     return &APFloat::x87DoubleExtended;
-  else if (Ty == Type::FP128Ty)
+  else if (Ty->isFP128Ty())
     return &APFloat::IEEEquad;
   
-  assert(Ty == Type::PPC_FP128Ty && "Unknown FP format");
+  assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
   return &APFloat::PPCDoubleDouble;
 }
 
-ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
-  : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
-  assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
-         "FP type Mismatch");
+/// get() - This returns a constant fp for the specified value in the
+/// specified type.  This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+Constant* ConstantFP::get(const Type* Ty, double V) {
+  LLVMContext &Context = Ty->getContext();
+  
+  APFloat FV(V);
+  bool ignored;
+  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
-bool ConstantFP::isNullValue() const {
-  return Val.isZero() && !Val.isNegative();
+
+Constant* ConstantFP::get(const Type* Ty, const StringRef& Str) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C; 
 }
 
-ConstantFP *ConstantFP::getNegativeZero(const Type *Ty) {
+
+ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
+  LLVMContext &Context = Ty->getContext();
   APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
   apf.changeSign();
-  return ConstantFP::get(apf);
+  return get(Context, apf);
 }
 
-bool ConstantFP::isExactlyValue(const APFloat& V) const {
-  return Val.bitwiseIsEqual(V);
-}
 
-namespace {
-  struct DenseMapAPFloatKeyInfo {
-    struct KeyTy {
-      APFloat val;
-      KeyTy(const APFloat& V) : val(V){}
-      KeyTy(const KeyTy& that) : val(that.val) {}
-      bool operator==(const KeyTy& that) const {
-        return this->val.bitwiseIsEqual(that.val);
-      }
-      bool operator!=(const KeyTy& that) const {
-        return !this->operator==(that);
-      }
-    };
-    static inline KeyTy getEmptyKey() { 
-      return KeyTy(APFloat(APFloat::Bogus,1));
-    }
-    static inline KeyTy getTombstoneKey() { 
-      return KeyTy(APFloat(APFloat::Bogus,2)); 
-    }
-    static unsigned getHashValue(const KeyTy &Key) {
-      return Key.val.getHashValue();
-    }
-    static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
-      return LHS == RHS;
+Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
+  if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
+    if (PTy->getElementType()->isFloatingPoint()) {
+      std::vector<Constant*> zeros(PTy->getNumElements(),
+                           getNegativeZero(PTy->getElementType()));
+      return ConstantVector::get(PTy, zeros);
     }
-    static bool isPod() { return false; }
-  };
-}
 
-//---- ConstantFP::get() implementation...
-//
-typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, 
-                 DenseMapAPFloatKeyInfo> FPMapTy;
+  if (Ty->isFloatingPoint()) 
+    return getNegativeZero(Ty);
+
+  return Constant::getNullValue(Ty);
+}
 
-static ManagedStatic<FPMapTy> FPConstants;
 
-ConstantFP *ConstantFP::get(const APFloat &V) {
+// ConstantFP accessors.
+ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
   DenseMapAPFloatKeyInfo::KeyTy Key(V);
   
-  ConstantsLock->reader_acquire();
-  ConstantFP *&Slot = (*FPConstants)[Key];
-  ConstantsLock->reader_release();
+  LLVMContextImpl* pImpl = Context.pImpl;
+  
+  pImpl->ConstantsLock.reader_acquire();
+  ConstantFP *&Slot = pImpl->FPConstants[Key];
+  pImpl->ConstantsLock.reader_release();
     
   if (!Slot) {
-    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
-    ConstantFP *&NewSlot = (*FPConstants)[Key];
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
+    ConstantFP *&NewSlot = pImpl->FPConstants[Key];
     if (!NewSlot) {
       const Type *Ty;
       if (&V.getSemantics() == &APFloat::IEEEsingle)
-        Ty = Type::FloatTy;
+        Ty = Type::getFloatTy(Context);
       else if (&V.getSemantics() == &APFloat::IEEEdouble)
-        Ty = Type::DoubleTy;
+        Ty = Type::getDoubleTy(Context);
       else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
-        Ty = Type::X86_FP80Ty;
+        Ty = Type::getX86_FP80Ty(Context);
       else if (&V.getSemantics() == &APFloat::IEEEquad)
-        Ty = Type::FP128Ty;
+        Ty = Type::getFP128Ty(Context);
       else {
         assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
                "Unknown FP format");
-        Ty = Type::PPC_FP128Ty;
+        Ty = Type::getPPC_FP128Ty(Context);
       }
       NewSlot = new ConstantFP(Ty, V);
     }
@@ -440,22 +436,24 @@ ConstantFP *ConstantFP::get(const APFloat &V) {
   return Slot;
 }
 
-/// get() - This returns a constant fp for the specified value in the
-/// specified type.  This should only be used for simple constant values like
-/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant *ConstantFP::get(const Type *Ty, double V) {
-  APFloat FV(V);
-  bool ignored;
-  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
-             APFloat::rmNearestTiesToEven, &ignored);
-  Constant *C = get(FV);
+ConstantFP *ConstantFP::getInfinity(const Type *Ty, bool Negative) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
+  return ConstantFP::get(Ty->getContext(),
+                         APFloat::getInf(Semantics, Negative));
+}
 
-  // For vectors, broadcast the value.
-  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return
-      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
+  : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+  assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+         "FP type Mismatch");
+}
 
-  return C;
+bool ConstantFP::isNullValue() const {
+  return Val.isZero() && !Val.isNegative();
+}
+
+bool ConstantFP::isExactlyValue(const APFloat& V) const {
+  return Val.bitwiseIsEqual(V);
 }
 
 //===----------------------------------------------------------------------===//
@@ -474,14 +472,65 @@ ConstantArray::ConstantArray(const ArrayType *T,
   for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
        I != E; ++I, ++OL) {
     Constant *C = *I;
-    assert((C->getType() == T->getElementType() ||
-            (T->isAbstract() &&
-             C->getType()->getTypeID() == T->getElementType()->getTypeID())) &&
+    assert(C->getType() == T->getElementType() &&
            "Initializer for array element doesn't match array element type!");
     *OL = C;
   }
 }
 
+Constant *ConstantArray::get(const ArrayType *Ty, 
+                             const std::vector<Constant*> &V) {
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    assert(V[i]->getType() == Ty->getElementType() &&
+           "Wrong type in array element initializer");
+  }
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  // If this is an all-zero array, return a ConstantAggregateZero object
+  if (!V.empty()) {
+    Constant *C = V[0];
+    if (!C->isNullValue()) {
+      // Implicitly locked.
+      return pImpl->ArrayConstants.getOrCreate(Ty, V);
+    }
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C) {
+        // Implicitly locked.
+        return pImpl->ArrayConstants.getOrCreate(Ty, V);
+      }
+  }
+  
+  return ConstantAggregateZero::get(Ty);
+}
+
+
+Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
+                             unsigned NumVals) {
+  // FIXME: make this the primary ctor method.
+  return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
+}
+
+/// ConstantArray::get(const string&) - Return an array that is initialized to
+/// contain the specified string.  If length is zero then a null terminator is 
+/// added to the specified string so that it may be used in a natural way. 
+/// Otherwise, the length parameter specifies how much of the string to use 
+/// and it won't be null terminated.
+///
+Constant* ConstantArray::get(LLVMContext &Context, const StringRef &Str,
+                             bool AddNull) {
+  std::vector<Constant*> ElementVals;
+  for (unsigned i = 0; i < Str.size(); ++i)
+    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
+
+  // Add a null terminator to the string...
+  if (AddNull) {
+    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
+  }
+
+  ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size());
+  return get(ATy, ElementVals);
+}
+
+
 
 ConstantStruct::ConstantStruct(const StructType *T,
                                const std::vector<Constant*> &V)
@@ -494,16 +543,41 @@ ConstantStruct::ConstantStruct(const StructType *T,
   for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
        I != E; ++I, ++OL) {
     Constant *C = *I;
-    assert((C->getType() == T->getElementType(I-V.begin()) ||
-            ((T->getElementType(I-V.begin())->isAbstract() ||
-              C->getType()->isAbstract()) &&
-             T->getElementType(I-V.begin())->getTypeID() == 
-                   C->getType()->getTypeID())) &&
+    assert(C->getType() == T->getElementType(I-V.begin()) &&
            "Initializer for struct element doesn't match struct element type!");
     *OL = C;
   }
 }
 
+// ConstantStruct accessors.
+Constant* ConstantStruct::get(const StructType* T,
+                              const std::vector<Constant*>& V) {
+  LLVMContextImpl* pImpl = T->getContext().pImpl;
+  
+  // Create a ConstantAggregateZero value if all elements are zeros...
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    if (!V[i]->isNullValue())
+      // Implicitly locked.
+      return pImpl->StructConstants.getOrCreate(T, V);
+
+  return ConstantAggregateZero::get(T);
+}
+
+Constant* ConstantStruct::get(LLVMContext &Context,
+                              const std::vector<Constant*>& V, bool packed) {
+  std::vector<const Type*> StructEls;
+  StructEls.reserve(V.size());
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    StructEls.push_back(V[i]->getType());
+  return get(StructType::get(Context, StructEls, packed), V);
+}
+
+Constant* ConstantStruct::get(LLVMContext &Context,
+                              Constant* const *Vals, unsigned NumVals,
+                              bool Packed) {
+  // FIXME: make this the primary ctor method.
+  return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
+}
 
 ConstantVector::ConstantVector(const VectorType *T,
                                const std::vector<Constant*> &V)
@@ -514,297 +588,66 @@ ConstantVector::ConstantVector(const VectorType *T,
     for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
          I != E; ++I, ++OL) {
       Constant *C = *I;
-      assert((C->getType() == T->getElementType() ||
-            (T->isAbstract() &&
-             C->getType()->getTypeID() == T->getElementType()->getTypeID())) &&
+      assert(C->getType() == T->getElementType() &&
            "Initializer for vector element doesn't match vector element type!");
     *OL = C;
   }
 }
 
+// ConstantVector accessors.
+Constant* ConstantVector::get(const VectorType* T,
+                              const std::vector<Constant*>& V) {
+   assert(!V.empty() && "Vectors can't be empty");
+   LLVMContext &Context = T->getContext();
+   LLVMContextImpl *pImpl = Context.pImpl;
+   
+  // If this is an all-undef or alll-zero vector, return a
+  // ConstantAggregateZero or UndefValue.
+  Constant *C = V[0];
+  bool isZero = C->isNullValue();
+  bool isUndef = isa<UndefValue>(C);
 
-namespace llvm {
-// We declare several classes private to this file, so use an anonymous
-// namespace
-namespace {
-
-/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement unary constant exprs.
-class VISIBILITY_HIDDEN UnaryConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-  UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
-    : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
-    Op<0>() = C;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement binary constant exprs.
-class VISIBILITY_HIDDEN BinaryConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2)
-    : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// SelectConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement select constant exprs.
-class VISIBILITY_HIDDEN SelectConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-    : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    Op<2>() = C3;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// ExtractElementConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// extractelement constant exprs.
-class VISIBILITY_HIDDEN ExtractElementConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  ExtractElementConstantExpr(Constant *C1, Constant *C2)
-    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
-                   Instruction::ExtractElement, &Op<0>(), 2) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// InsertElementConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// insertelement constant exprs.
-class VISIBILITY_HIDDEN InsertElementConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
-                   &Op<0>(), 3) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    Op<2>() = C3;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// ShuffleVectorConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// shufflevector constant exprs.
-class VISIBILITY_HIDDEN ShuffleVectorConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-  : ConstantExpr(VectorType::get(
-                   cast<VectorType>(C1->getType())->getElementType(),
-                   cast<VectorType>(C3->getType())->getNumElements()),
-                 Instruction::ShuffleVector, 
-                 &Op<0>(), 3) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    Op<2>() = C3;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// ExtractValueConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// extractvalue constant exprs.
-class VISIBILITY_HIDDEN ExtractValueConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-  ExtractValueConstantExpr(Constant *Agg,
-                           const SmallVector<unsigned, 4> &IdxList,
-                           const Type *DestTy)
-    : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
-      Indices(IdxList) {
-    Op<0>() = Agg;
-  }
-
-  /// Indices - These identify which value to extract.
-  const SmallVector<unsigned, 4> Indices;
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// InsertValueConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// insertvalue constant exprs.
-class VISIBILITY_HIDDEN InsertValueConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  InsertValueConstantExpr(Constant *Agg, Constant *Val,
-                          const SmallVector<unsigned, 4> &IdxList,
-                          const Type *DestTy)
-    : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
-      Indices(IdxList) {
-    Op<0>() = Agg;
-    Op<1>() = Val;
-  }
-
-  /// Indices - These identify the position for the insertion.
-  const SmallVector<unsigned, 4> Indices;
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-
-/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
-/// used behind the scenes to implement getelementpr constant exprs.
-class VISIBILITY_HIDDEN GetElementPtrConstantExpr : public ConstantExpr {
-  GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
-                            const Type *DestTy);
-public:
-  static GetElementPtrConstantExpr *Create(Constant *C,
-                                           const std::vector<Constant*>&IdxList,
-                                           const Type *DestTy) {
-    return new(IdxList.size() + 1)
-      GetElementPtrConstantExpr(C, IdxList, DestTy);
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-// CompareConstantExpr - This class is private to Constants.cpp, and is used
-// behind the scenes to implement ICmp and FCmp constant expressions. This is
-// needed in order to store the predicate value for these instructions.
-struct VISIBILITY_HIDDEN CompareConstantExpr : public ConstantExpr {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  unsigned short predicate;
-  CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
-                      unsigned short pred,  Constant* LHS, Constant* RHS)
-    : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
-    Op<0>() = LHS;
-    Op<1>() = RHS;
+  if (isZero || isUndef) {
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C) {
+        isZero = isUndef = false;
+        break;
+      }
   }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-} // end anonymous namespace
-
-template <>
-struct OperandTraits<UnaryConstantExpr> : FixedNumOperandTraits<1> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
-
-template <>
-struct OperandTraits<BinaryConstantExpr> : FixedNumOperandTraits<2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
-
-template <>
-struct OperandTraits<SelectConstantExpr> : FixedNumOperandTraits<3> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
-
-template <>
-struct OperandTraits<ExtractElementConstantExpr> : FixedNumOperandTraits<2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
-
-template <>
-struct OperandTraits<InsertElementConstantExpr> : FixedNumOperandTraits<3> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
-
-template <>
-struct OperandTraits<ShuffleVectorConstantExpr> : FixedNumOperandTraits<3> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
-
-template <>
-struct OperandTraits<ExtractValueConstantExpr> : FixedNumOperandTraits<1> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
-
-template <>
-struct OperandTraits<InsertValueConstantExpr> : FixedNumOperandTraits<2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
-
-template <>
-struct OperandTraits<GetElementPtrConstantExpr> : VariadicOperandTraits<1> {
-};
-
-GetElementPtrConstantExpr::GetElementPtrConstantExpr
-  (Constant *C,
-   const std::vector<Constant*> &IdxList,
-   const Type *DestTy)
-    : ConstantExpr(DestTy, Instruction::GetElementPtr,
-                   OperandTraits<GetElementPtrConstantExpr>::op_end(this)
-                   - (IdxList.size()+1),
-                   IdxList.size()+1) {
-  OperandList[0] = C;
-  for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
-    OperandList[i+1] = IdxList[i];
+  
+  if (isZero)
+    return ConstantAggregateZero::get(T);
+  if (isUndef)
+    return UndefValue::get(T);
+    
+  // Implicitly locked.
+  return pImpl->VectorConstants.getOrCreate(T, V);
 }
 
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
-
+Constant* ConstantVector::get(const std::vector<Constant*>& V) {
+  assert(!V.empty() && "Cannot infer type if V is empty");
+  return get(VectorType::get(V.front()->getType(),V.size()), V);
+}
 
-template <>
-struct OperandTraits<CompareConstantExpr> : FixedNumOperandTraits<2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
+  // FIXME: make this the primary ctor method.
+  return get(std::vector<Constant*>(Vals, Vals+NumVals));
+}
 
+Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) {
+  return getTy(C1->getType(), Instruction::Add, C1, C2,
+               OverflowingBinaryOperator::NoSignedWrap);
+}
 
-} // End llvm namespace
+Constant* ConstantExpr::getNSWSub(Constant* C1, Constant* C2) {
+  return getTy(C1->getType(), Instruction::Sub, C1, C2,
+               OverflowingBinaryOperator::NoSignedWrap);
+}
 
+Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) {
+  return getTy(C1->getType(), Instruction::SDiv, C1, C2,
+               SDivOperator::IsExact);
+}
 
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
 // can't be inline because we don't want to #include Instruction.h into
@@ -814,8 +657,32 @@ bool ConstantExpr::isCast() const {
 }
 
 bool ConstantExpr::isCompare() const {
-  return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp ||
-         getOpcode() == Instruction::VICmp || getOpcode() == Instruction::VFCmp;
+  return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
+}
+
+bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
+  if (getOpcode() != Instruction::GetElementPtr) return false;
+
+  gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
+  User::const_op_iterator OI = next(this->op_begin());
+
+  // Skip the first index, as it has no static limit.
+  ++GEPI;
+  ++OI;
+
+  // The remaining indices must be compile-time known integers within the
+  // bounds of the corresponding notional static array types.
+  for (; GEPI != E; ++GEPI, ++OI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
+    if (!CI) return false;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+      if (CI->getValue().getActiveBits() > 64 ||
+          CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+  }
+
+  // All the indices checked out.
+  return true;
 }
 
 bool ConstantExpr::hasIndices() const {
@@ -831,93 +698,11 @@ const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const {
   return cast<InsertValueConstantExpr>(this)->Indices;
 }
 
-/// ConstantExpr::get* - Return some common constants without having to
-/// specify the full Instruction::OPCODE identifier.
-///
-Constant *ConstantExpr::getNeg(Constant *C) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C->getType()->isFPOrFPVector())
-    return getFNeg(C);
-  assert(C->getType()->isIntOrIntVector() &&
-         "Cannot NEG a nonintegral value!");
-  return get(Instruction::Sub,
-             ConstantExpr::getZeroValueForNegationExpr(C->getType()),
-             C);
-}
-Constant *ConstantExpr::getFNeg(Constant *C) {
-  assert(C->getType()->isFPOrFPVector() &&
-         "Cannot FNEG a non-floating-point value!");
-  return get(Instruction::FSub,
-             ConstantExpr::getZeroValueForNegationExpr(C->getType()),
-             C);
-}
-Constant *ConstantExpr::getNot(Constant *C) {
-  assert(C->getType()->isIntOrIntVector() &&
-         "Cannot NOT a nonintegral value!");
-  return get(Instruction::Xor, C,
-             Constant::getAllOnesValue(C->getType()));
-}
-Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2) {
-  return get(Instruction::Add, C1, C2);
-}
-Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
-  return get(Instruction::FAdd, C1, C2);
-}
-Constant *ConstantExpr::getSub(Constant *C1, Constant *C2) {
-  return get(Instruction::Sub, C1, C2);
-}
-Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
-  return get(Instruction::FSub, C1, C2);
-}
-Constant *ConstantExpr::getMul(Constant *C1, Constant *C2) {
-  return get(Instruction::Mul, C1, C2);
-}
-Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
-  return get(Instruction::FMul, C1, C2);
-}
-Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2) {
-  return get(Instruction::UDiv, C1, C2);
-}
-Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2) {
-  return get(Instruction::SDiv, C1, C2);
-}
-Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
-  return get(Instruction::FDiv, C1, C2);
-}
-Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
-  return get(Instruction::URem, C1, C2);
-}
-Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
-  return get(Instruction::SRem, C1, C2);
-}
-Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
-  return get(Instruction::FRem, C1, C2);
-}
-Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
-  return get(Instruction::And, C1, C2);
-}
-Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
-  return get(Instruction::Or, C1, C2);
-}
-Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
-  return get(Instruction::Xor, C1, C2);
-}
 unsigned ConstantExpr::getPredicate() const {
   assert(getOpcode() == Instruction::FCmp || 
-         getOpcode() == Instruction::ICmp ||
-         getOpcode() == Instruction::VFCmp ||
-         getOpcode() == Instruction::VICmp);
+         getOpcode() == Instruction::ICmp);
   return ((const CompareConstantExpr*)this)->predicate;
 }
-Constant *ConstantExpr::getShl(Constant *C1, Constant *C2) {
-  return get(Instruction::Shl, C1, C2);
-}
-Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2) {
-  return get(Instruction::LShr, C1, C2);
-}
-Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2) {
-  return get(Instruction::AShr, C1, C2);
-}
 
 /// getWithOperandReplaced - Return a constant expression identical to this
 /// one, but with the specified operand set to the specified value.
@@ -969,15 +754,19 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
     for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
       Ops[i-1] = getOperand(i);
     if (OpNo == 0)
-      return ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
+      return cast<GEPOperator>(this)->isInBounds() ?
+        ConstantExpr::getInBoundsGetElementPtr(Op, &Ops[0], Ops.size()) :
+        ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
     Ops[OpNo-1] = Op;
-    return ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
+    return cast<GEPOperator>(this)->isInBounds() ?
+      ConstantExpr::getInBoundsGetElementPtr(getOperand(0), &Ops[0], Ops.size()) :
+      ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
   }
   default:
     assert(getNumOperands() == 2 && "Must be binary operator?");
     Op0 = (OpNo == 0) ? Op : getOperand(0);
     Op1 = (OpNo == 1) ? Op : getOperand(1);
-    return ConstantExpr::get(getOpcode(), Op0, Op1);
+    return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassData);
   }
 }
 
@@ -1019,15 +808,15 @@ getWithOperands(Constant* const *Ops, unsigned NumOps) const {
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    return ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1);
+    return cast<GEPOperator>(this)->isInBounds() ?
+      ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], NumOps-1) :
+      ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1);
   case Instruction::ICmp:
   case Instruction::FCmp:
-  case Instruction::VICmp:
-  case Instruction::VFCmp:
     return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
   default:
     assert(getNumOperands() == 2 && "Must be binary operator?");
-    return ConstantExpr::get(getOpcode(), Ops[0], Ops[1]);
+    return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassData);
   }
 }
 
@@ -1037,7 +826,7 @@ getWithOperands(Constant* const *Ops, unsigned NumOps) const {
 
 bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
   unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
-  if (Ty == Type::Int1Ty)
+  if (Ty == Type::getInt1Ty(Ty->getContext()))
     return Val == 0 || Val == 1;
   if (NumBits >= 64)
     return true; // always true, has to fit in largest type
@@ -1047,7 +836,7 @@ bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
 
 bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
   unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
-  if (Ty == Type::Int1Ty)
+  if (Ty == Type::getInt1Ty(Ty->getContext()))
     return Val == 0 || Val == 1 || Val == -1;
   if (NumBits >= 64)
     return true; // always true, has to fit in largest type
@@ -1096,404 +885,36 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
 //===----------------------------------------------------------------------===//
 //                      Factory Function Implementation
 
-
-// The number of operands for each ConstantCreator::create method is
-// determined by the ConstantTraits template.
-// ConstantCreator - A class that is used to create constants by
-// ValueMap*.  This class should be partially specialized if there is
-// something strange that needs to be done to interface to the ctor for the
-// constant.
-//
-namespace llvm {
-  template<class ValType>
-  struct ConstantTraits;
-
-  template<typename T, typename Alloc>
-  struct VISIBILITY_HIDDEN ConstantTraits< std::vector<T, Alloc> > {
-    static unsigned uses(const std::vector<T, Alloc>& v) {
-      return v.size();
-    }
-  };
-
-  template<class ConstantClass, class TypeClass, class ValType>
-  struct VISIBILITY_HIDDEN ConstantCreator {
-    static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
-      return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
-    }
-  };
-
-  template<class ConstantClass, class TypeClass>
-  struct VISIBILITY_HIDDEN ConvertConstantType {
-    static void convert(ConstantClass *OldC, const TypeClass *NewTy) {
-      assert(0 && "This type cannot be converted!\n");
-      abort();
-    }
-  };
-
-  template<class ValType, class TypeClass, class ConstantClass,
-           bool HasLargeKey = false  /*true for arrays and structs*/ >
-  class VISIBILITY_HIDDEN ValueMap : public AbstractTypeUser {
-  public:
-    typedef std::pair<const Type*, ValType> MapKey;
-    typedef std::map<MapKey, Constant *> MapTy;
-    typedef std::map<Constant*, typename MapTy::iterator> InverseMapTy;
-    typedef std::map<const Type*, typename MapTy::iterator> AbstractTypeMapTy;
-  private:
-    /// Map - This is the main map from the element descriptor to the Constants.
-    /// This is the primary way we avoid creating two of the same shape
-    /// constant.
-    MapTy Map;
-    
-    /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
-    /// from the constants to their element in Map.  This is important for
-    /// removal of constants from the array, which would otherwise have to scan
-    /// through the map with very large keys.
-    InverseMapTy InverseMap;
-
-    /// AbstractTypeMap - Map for abstract type constants.
-    ///
-    AbstractTypeMapTy AbstractTypeMap;
-    
-    /// ValueMapLock - Mutex for this map.
-    sys::SmartMutex<true> ValueMapLock;
-
-  public:
-    // NOTE: This function is not locked.  It is the caller's responsibility
-    // to enforce proper synchronization.
-    typename MapTy::iterator map_end() { return Map.end(); }
-    
-    /// InsertOrGetItem - Return an iterator for the specified element.
-    /// If the element exists in the map, the returned iterator points to the
-    /// entry and Exists=true.  If not, the iterator points to the newly
-    /// inserted entry and returns Exists=false.  Newly inserted entries have
-    /// I->second == 0, and should be filled in.
-    /// NOTE: This function is not locked.  It is the caller's responsibility
-    // to enforce proper synchronization.
-    typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, Constant *>
-                                   &InsertVal,
-                                   bool &Exists) {
-      std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
-      Exists = !IP.second;
-      return IP.first;
-    }
-    
-private:
-    typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
-      if (HasLargeKey) {
-        typename InverseMapTy::iterator IMI = InverseMap.find(CP);
-        assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
-               IMI->second->second == CP &&
-               "InverseMap corrupt!");
-        return IMI->second;
-      }
-      
-      typename MapTy::iterator I =
-        Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()),
-                        getValType(CP)));
-      if (I == Map.end() || I->second != CP) {
-        // FIXME: This should not use a linear scan.  If this gets to be a
-        // performance problem, someone should look at this.
-        for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
-          /* empty */;
-      }
-      return I;
-    }
-    
-    ConstantClass* Create(const TypeClass *Ty, const ValType &V,
-                          typename MapTy::iterator I) {
-      ConstantClass* Result =
-        ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
-
-      assert(Result->getType() == Ty && "Type specified is not correct!");
-      I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
-
-      if (HasLargeKey)  // Remember the reverse mapping if needed.
-        InverseMap.insert(std::make_pair(Result, I));
-
-      // If the type of the constant is abstract, make sure that an entry
-      // exists for it in the AbstractTypeMap.
-      if (Ty->isAbstract()) {
-        typename AbstractTypeMapTy::iterator TI = 
-                                                 AbstractTypeMap.find(Ty);
-
-        if (TI == AbstractTypeMap.end()) {
-          // Add ourselves to the ATU list of the type.
-          cast<DerivedType>(Ty)->addAbstractTypeUser(this);
-
-          AbstractTypeMap.insert(TI, std::make_pair(Ty, I));
-        }
-      }
-      
-      return Result;
-    }
-public:
-    
-    /// getOrCreate - Return the specified constant from the map, creating it if
-    /// necessary.
-    ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
-      sys::SmartScopedLock<true> Lock(&ValueMapLock);
-      MapKey Lookup(Ty, V);
-      ConstantClass* Result = 0;
-      
-      typename MapTy::iterator I = Map.find(Lookup);
-      // Is it in the map?  
-      if (I != Map.end())
-        Result = static_cast<ConstantClass *>(I->second);
-        
-      if (!Result) {
-        // If no preexisting value, create one now...
-        Result = Create(Ty, V, I);
-      }
-        
-      return Result;
-    }
-
-    void remove(ConstantClass *CP) {
-      sys::SmartScopedLock<true> Lock(&ValueMapLock);
-      typename MapTy::iterator I = FindExistingElement(CP);
-      assert(I != Map.end() && "Constant not found in constant table!");
-      assert(I->second == CP && "Didn't find correct element?");
-
-      if (HasLargeKey)  // Remember the reverse mapping if needed.
-        InverseMap.erase(CP);
-      
-      // Now that we found the entry, make sure this isn't the entry that
-      // the AbstractTypeMap points to.
-      const TypeClass *Ty = static_cast<const TypeClass *>(I->first.first);
-      if (Ty->isAbstract()) {
-        assert(AbstractTypeMap.count(Ty) &&
-               "Abstract type not in AbstractTypeMap?");
-        typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty];
-        if (ATMEntryIt == I) {
-          // Yes, we are removing the representative entry for this type.
-          // See if there are any other entries of the same type.
-          typename MapTy::iterator TmpIt = ATMEntryIt;
-
-          // First check the entry before this one...
-          if (TmpIt != Map.begin()) {
-            --TmpIt;
-            if (TmpIt->first.first != Ty) // Not the same type, move back...
-              ++TmpIt;
-          }
-
-          // If we didn't find the same type, try to move forward...
-          if (TmpIt == ATMEntryIt) {
-            ++TmpIt;
-            if (TmpIt == Map.end() || TmpIt->first.first != Ty)
-              --TmpIt;   // No entry afterwards with the same type
-          }
-
-          // If there is another entry in the map of the same abstract type,
-          // update the AbstractTypeMap entry now.
-          if (TmpIt != ATMEntryIt) {
-            ATMEntryIt = TmpIt;
-          } else {
-            // Otherwise, we are removing the last instance of this type
-            // from the table.  Remove from the ATM, and from user list.
-            cast<DerivedType>(Ty)->removeAbstractTypeUser(this);
-            AbstractTypeMap.erase(Ty);
-          }
-        }
-      }
-
-      Map.erase(I);
-    }
-
-    
-    /// MoveConstantToNewSlot - If we are about to change C to be the element
-    /// specified by I, update our internal data structures to reflect this
-    /// fact.
-    /// NOTE: This function is not locked. It is the responsibility of the
-    /// caller to enforce proper synchronization if using this method.
-    void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
-      // First, remove the old location of the specified constant in the map.
-      typename MapTy::iterator OldI = FindExistingElement(C);
-      assert(OldI != Map.end() && "Constant not found in constant table!");
-      assert(OldI->second == C && "Didn't find correct element?");
-      
-      // If this constant is the representative element for its abstract type,
-      // update the AbstractTypeMap so that the representative element is I.
-      if (C->getType()->isAbstract()) {
-        typename AbstractTypeMapTy::iterator ATI =
-            AbstractTypeMap.find(C->getType());
-        assert(ATI != AbstractTypeMap.end() &&
-               "Abstract type not in AbstractTypeMap?");
-        if (ATI->second == OldI)
-          ATI->second = I;
-      }
-      
-      // Remove the old entry from the map.
-      Map.erase(OldI);
-      
-      // Update the inverse map so that we know that this constant is now
-      // located at descriptor I.
-      if (HasLargeKey) {
-        assert(I->second == C && "Bad inversemap entry!");
-        InverseMap[C] = I;
-      }
-    }
-    
-    void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-      sys::SmartScopedLock<true> Lock(&ValueMapLock);
-      typename AbstractTypeMapTy::iterator I =
-        AbstractTypeMap.find(cast<Type>(OldTy));
-
-      assert(I != AbstractTypeMap.end() &&
-             "Abstract type not in AbstractTypeMap?");
-
-      // Convert a constant at a time until the last one is gone.  The last one
-      // leaving will remove() itself, causing the AbstractTypeMapEntry to be
-      // eliminated eventually.
-      do {
-        ConvertConstantType<ConstantClass,
-                            TypeClass>::convert(
-                                static_cast<ConstantClass *>(I->second->second),
-                                                cast<TypeClass>(NewTy));
-
-        I = AbstractTypeMap.find(cast<Type>(OldTy));
-      } while (I != AbstractTypeMap.end());
-    }
-
-    // If the type became concrete without being refined to any other existing
-    // type, we just remove ourselves from the ATU list.
-    void typeBecameConcrete(const DerivedType *AbsTy) {
-      AbsTy->removeAbstractTypeUser(this);
-    }
-
-    void dump() const {
-      DOUT << "Constant.cpp: ValueMap\n";
-    }
-  };
-}
-
-
-
-//---- ConstantAggregateZero::get() implementation...
-//
-namespace llvm {
-  // ConstantAggregateZero does not take extra "value" argument...
-  template<class ValType>
-  struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
-    static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
-      return new ConstantAggregateZero(Ty);
-    }
-  };
-
-  template<>
-  struct ConvertConstantType<ConstantAggregateZero, Type> {
-    static void convert(ConstantAggregateZero *OldC, const Type *NewTy) {
-      // Make everyone now use a constant of the new type...
-      Constant *New = ConstantAggregateZero::get(NewTy);
-      assert(New != OldC && "Didn't replace constant??");
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();     // This constant is now dead, destroy it.
-    }
-  };
-}
-
-static ManagedStatic<ValueMap<char, Type, 
-                              ConstantAggregateZero> > AggZeroConstants;
-
-static char getValType(ConstantAggregateZero *CPZ) { return 0; }
-
-ConstantAggregateZero *ConstantAggregateZero::get(const Type *Ty) {
+ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
   assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
          "Cannot create an aggregate zero of non-aggregate type!");
   
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
   // Implicitly locked.
-  return AggZeroConstants->getOrCreate(Ty, 0);
+  return pImpl->AggZeroConstants.getOrCreate(Ty, 0);
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantAggregateZero::destroyConstant() {
   // Implicitly locked.
-  AggZeroConstants->remove(this);
+  getType()->getContext().pImpl->AggZeroConstants.remove(this);
   destroyConstantImpl();
 }
 
-//---- ConstantArray::get() implementation...
-//
-namespace llvm {
-  template<>
-  struct ConvertConstantType<ConstantArray, ArrayType> {
-    static void convert(ConstantArray *OldC, const ArrayType *NewTy) {
-      // Make everyone now use a constant of the new type...
-      std::vector<Constant*> C;
-      for (unsigned i = 0, e = OldC->getNumOperands(); i != e; ++i)
-        C.push_back(cast<Constant>(OldC->getOperand(i)));
-      Constant *New = ConstantArray::get(NewTy, C);
-      assert(New != OldC && "Didn't replace constant??");
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();    // This constant is now dead, destroy it.
-    }
-  };
-}
-
-static std::vector<Constant*> getValType(ConstantArray *CA) {
-  std::vector<Constant*> Elements;
-  Elements.reserve(CA->getNumOperands());
-  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-    Elements.push_back(cast<Constant>(CA->getOperand(i)));
-  return Elements;
-}
-
-typedef ValueMap<std::vector<Constant*>, ArrayType, 
-                 ConstantArray, true /*largekey*/> ArrayConstantsTy;
-static ManagedStatic<ArrayConstantsTy> ArrayConstants;
-
-Constant *ConstantArray::get(const ArrayType *Ty,
-                             const std::vector<Constant*> &V) {
-  // If this is an all-zero array, return a ConstantAggregateZero object
-  if (!V.empty()) {
-    Constant *C = V[0];
-    if (!C->isNullValue()) {
-      // Implicitly locked.
-      return ArrayConstants->getOrCreate(Ty, V);
-    }
-    for (unsigned i = 1, e = V.size(); i != e; ++i)
-      if (V[i] != C) {
-        // Implicitly locked.
-        return ArrayConstants->getOrCreate(Ty, V);
-      }
-  }
-  
-  return ConstantAggregateZero::get(Ty);
-}
-
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantArray::destroyConstant() {
   // Implicitly locked.
-  ArrayConstants->remove(this);
+  getType()->getContext().pImpl->ArrayConstants.remove(this);
   destroyConstantImpl();
 }
 
-/// ConstantArray::get(const string&) - Return an array that is initialized to
-/// contain the specified string.  If length is zero then a null terminator is 
-/// added to the specified string so that it may be used in a natural way. 
-/// Otherwise, the length parameter specifies how much of the string to use 
-/// and it won't be null terminated.
-///
-Constant *ConstantArray::get(const std::string &Str, bool AddNull) {
-  std::vector<Constant*> ElementVals;
-  for (unsigned i = 0; i < Str.length(); ++i)
-    ElementVals.push_back(ConstantInt::get(Type::Int8Ty, Str[i]));
-
-  // Add a null terminator to the string...
-  if (AddNull) {
-    ElementVals.push_back(ConstantInt::get(Type::Int8Ty, 0));
-  }
-
-  ArrayType *ATy = ArrayType::get(Type::Int8Ty, ElementVals.size());
-  return ConstantArray::get(ATy, ElementVals);
-}
-
 /// isString - This method returns true if the array is an array of i8, and 
 /// if the elements of the array are all ConstantInt's.
 bool ConstantArray::isString() const {
   // Check the element type for i8...
-  if (getType()->getElementType() != Type::Int8Ty)
+  if (getType()->getElementType() != Type::getInt8Ty(getContext()))
     return false;
   // Check the elements to make sure they are all integers, not constant
   // expressions.
@@ -1508,17 +929,17 @@ bool ConstantArray::isString() const {
 /// null bytes except its terminator.
 bool ConstantArray::isCString() const {
   // Check the element type for i8...
-  if (getType()->getElementType() != Type::Int8Ty)
+  if (getType()->getElementType() != Type::getInt8Ty(getContext()))
     return false;
-  Constant *Zero = Constant::getNullValue(getOperand(0)->getType());
+
   // Last element must be a null.
-  if (getOperand(getNumOperands()-1) != Zero)
+  if (!getOperand(getNumOperands()-1)->isNullValue())
     return false;
   // Other elements must be non-null integers.
   for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
     if (!isa<ConstantInt>(getOperand(i)))
       return false;
-    if (getOperand(i) == Zero)
+    if (getOperand(i)->isNullValue())
       return false;
   }
   return true;
@@ -1543,126 +964,22 @@ std::string ConstantArray::getAsString() const {
 //
 
 namespace llvm {
-  template<>
-  struct ConvertConstantType<ConstantStruct, StructType> {
-    static void convert(ConstantStruct *OldC, const StructType *NewTy) {
-      // Make everyone now use a constant of the new type...
-      std::vector<Constant*> C;
-      for (unsigned i = 0, e = OldC->getNumOperands(); i != e; ++i)
-        C.push_back(cast<Constant>(OldC->getOperand(i)));
-      Constant *New = ConstantStruct::get(NewTy, C);
-      assert(New != OldC && "Didn't replace constant??");
-
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();    // This constant is now dead, destroy it.
-    }
-  };
-}
-
-typedef ValueMap<std::vector<Constant*>, StructType,
-                 ConstantStruct, true /*largekey*/> StructConstantsTy;
-static ManagedStatic<StructConstantsTy> StructConstants;
-
-static std::vector<Constant*> getValType(ConstantStruct *CS) {
-  std::vector<Constant*> Elements;
-  Elements.reserve(CS->getNumOperands());
-  for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
-    Elements.push_back(cast<Constant>(CS->getOperand(i)));
-  return Elements;
-}
-
-Constant *ConstantStruct::get(const StructType *Ty,
-                              const std::vector<Constant*> &V) {
-  // Create a ConstantAggregateZero value if all elements are zeros...
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    if (!V[i]->isNullValue())
-      // Implicitly locked.
-      return StructConstants->getOrCreate(Ty, V);
 
-  return ConstantAggregateZero::get(Ty);
-}
-
-Constant *ConstantStruct::get(const std::vector<Constant*> &V, bool packed) {
-  std::vector<const Type*> StructEls;
-  StructEls.reserve(V.size());
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    StructEls.push_back(V[i]->getType());
-  return get(StructType::get(StructEls, packed), V);
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantStruct::destroyConstant() {
   // Implicitly locked.
-  StructConstants->remove(this);
+  getType()->getContext().pImpl->StructConstants.remove(this);
   destroyConstantImpl();
 }
 
-//---- ConstantVector::get() implementation...
-//
-namespace llvm {
-  template<>
-  struct ConvertConstantType<ConstantVector, VectorType> {
-    static void convert(ConstantVector *OldC, const VectorType *NewTy) {
-      // Make everyone now use a constant of the new type...
-      std::vector<Constant*> C;
-      for (unsigned i = 0, e = OldC->getNumOperands(); i != e; ++i)
-        C.push_back(cast<Constant>(OldC->getOperand(i)));
-      Constant *New = ConstantVector::get(NewTy, C);
-      assert(New != OldC && "Didn't replace constant??");
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();    // This constant is now dead, destroy it.
-    }
-  };
-}
-
-static std::vector<Constant*> getValType(ConstantVector *CP) {
-  std::vector<Constant*> Elements;
-  Elements.reserve(CP->getNumOperands());
-  for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-    Elements.push_back(CP->getOperand(i));
-  return Elements;
-}
-
-static ManagedStatic<ValueMap<std::vector<Constant*>, VectorType,
-                              ConstantVector> > VectorConstants;
-
-Constant *ConstantVector::get(const VectorType *Ty,
-                              const std::vector<Constant*> &V) {
-  assert(!V.empty() && "Vectors can't be empty");
-  // If this is an all-undef or alll-zero vector, return a
-  // ConstantAggregateZero or UndefValue.
-  Constant *C = V[0];
-  bool isZero = C->isNullValue();
-  bool isUndef = isa<UndefValue>(C);
-
-  if (isZero || isUndef) {
-    for (unsigned i = 1, e = V.size(); i != e; ++i)
-      if (V[i] != C) {
-        isZero = isUndef = false;
-        break;
-      }
-  }
-  
-  if (isZero)
-    return ConstantAggregateZero::get(Ty);
-  if (isUndef)
-    return UndefValue::get(Ty);
-    
-  // Implicitly locked.
-  return VectorConstants->getOrCreate(Ty, V);
-}
-
-Constant *ConstantVector::get(const std::vector<Constant*> &V) {
-  assert(!V.empty() && "Cannot infer type if V is empty");
-  return get(VectorType::get(V.front()->getType(),V.size()), V);
-}
-
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantVector::destroyConstant() {
   // Implicitly locked.
-  VectorConstants->remove(this);
+  getType()->getContext().pImpl->VectorConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1696,45 +1013,16 @@ Constant *ConstantVector::getSplatValue() {
 //---- ConstantPointerNull::get() implementation...
 //
 
-namespace llvm {
-  // ConstantPointerNull does not take extra "value" argument...
-  template<class ValType>
-  struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
-    static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
-      return new ConstantPointerNull(Ty);
-    }
-  };
-
-  template<>
-  struct ConvertConstantType<ConstantPointerNull, PointerType> {
-    static void convert(ConstantPointerNull *OldC, const PointerType *NewTy) {
-      // Make everyone now use a constant of the new type...
-      Constant *New = ConstantPointerNull::get(NewTy);
-      assert(New != OldC && "Didn't replace constant??");
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();     // This constant is now dead, destroy it.
-    }
-  };
-}
-
-static ManagedStatic<ValueMap<char, PointerType, 
-                              ConstantPointerNull> > NullPtrConstants;
-
-static char getValType(ConstantPointerNull *) {
-  return 0;
-}
-
-
 ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
   // Implicitly locked.
-  return NullPtrConstants->getOrCreate(Ty, 0);
+  return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantPointerNull::destroyConstant() {
   // Implicitly locked.
-  NullPtrConstants->remove(this);
+  getType()->getContext().pImpl->NullPtrConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1742,295 +1030,39 @@ void ConstantPointerNull::destroyConstant() {
 //---- UndefValue::get() implementation...
 //
 
-namespace llvm {
-  // UndefValue does not take extra "value" argument...
-  template<class ValType>
-  struct ConstantCreator<UndefValue, Type, ValType> {
-    static UndefValue *create(const Type *Ty, const ValType &V) {
-      return new UndefValue(Ty);
-    }
-  };
-
-  template<>
-  struct ConvertConstantType<UndefValue, Type> {
-    static void convert(UndefValue *OldC, const Type *NewTy) {
-      // Make everyone now use a constant of the new type.
-      Constant *New = UndefValue::get(NewTy);
-      assert(New != OldC && "Didn't replace constant??");
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();     // This constant is now dead, destroy it.
-    }
-  };
-}
-
-static ManagedStatic<ValueMap<char, Type, UndefValue> > UndefValueConstants;
-
-static char getValType(UndefValue *) {
-  return 0;
-}
-
-
 UndefValue *UndefValue::get(const Type *Ty) {
   // Implicitly locked.
-  return UndefValueConstants->getOrCreate(Ty, 0);
+  return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
   // Implicitly locked.
-  UndefValueConstants->remove(this);
-  destroyConstantImpl();
-}
-
-//---- MDString::get() implementation
-//
-
-MDString::MDString(const char *begin, const char *end)
-  : Constant(Type::MetadataTy, MDStringVal, 0, 0),
-    StrBegin(begin), StrEnd(end) {}
-
-static ManagedStatic<StringMap<MDString*> > MDStringCache;
-
-MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
-  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
-  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
-                                        StrBegin, StrEnd);
-  MDString *&S = Entry.getValue();
-  if (!S) S = new MDString(Entry.getKeyData(),
-                           Entry.getKeyData() + Entry.getKeyLength());
-
-  return S;
-}
-
-MDString *MDString::get(const std::string &Str) {
-  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
-  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
-                                        Str.data(), Str.data() + Str.size());
-  MDString *&S = Entry.getValue();
-  if (!S) S = new MDString(Entry.getKeyData(),
-                           Entry.getKeyData() + Entry.getKeyLength());
-
-  return S;
-}
-
-void MDString::destroyConstant() {
-  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
-  MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
-  destroyConstantImpl();
-}
-
-//---- MDNode::get() implementation
-//
-
-static ManagedStatic<FoldingSet<MDNode> > MDNodeSet;
-
-MDNode::MDNode(Value*const* Vals, unsigned NumVals)
-  : Constant(Type::MetadataTy, MDNodeVal, 0, 0) {
-  for (unsigned i = 0; i != NumVals; ++i)
-    Node.push_back(ElementVH(Vals[i], this));
-}
-
-void MDNode::Profile(FoldingSetNodeID &ID) const {
-  for (const_elem_iterator I = elem_begin(), E = elem_end(); I != E; ++I)
-    ID.AddPointer(*I);
-}
-
-MDNode *MDNode::get(Value*const* Vals, unsigned NumVals) {
-  FoldingSetNodeID ID;
-  for (unsigned i = 0; i != NumVals; ++i)
-    ID.AddPointer(Vals[i]);
-
-  ConstantsLock->reader_acquire();
-  void *InsertPoint;
-  MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
-  ConstantsLock->reader_release();
-  
-  if (!N) {
-    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
-    N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
-    if (!N) {
-      // InsertPoint will have been set by the FindNodeOrInsertPos call.
-      N = new(0) MDNode(Vals, NumVals);
-      MDNodeSet->InsertNode(N, InsertPoint);
-    }
-  }
-  return N;
-}
-
-void MDNode::destroyConstant() {
-  sys::SmartScopedWriter<true> Writer(&*ConstantsLock); 
-  MDNodeSet->RemoveNode(this);
-  
+  getType()->getContext().pImpl->UndefValueConstants.remove(this);
   destroyConstantImpl();
 }
 
 //---- ConstantExpr::get() implementations...
 //
 
-namespace {
-
-struct ExprMapKeyType {
-  typedef SmallVector<unsigned, 4> IndexList;
-
-  ExprMapKeyType(unsigned opc,
-      const std::vector<Constant*> &ops,
-      unsigned short pred = 0,
-      const IndexList &inds = IndexList())
-        : opcode(opc), predicate(pred), operands(ops), indices(inds) {}
-  uint16_t opcode;
-  uint16_t predicate;
-  std::vector<Constant*> operands;
-  IndexList indices;
-  bool operator==(const ExprMapKeyType& that) const {
-    return this->opcode == that.opcode &&
-           this->predicate == that.predicate &&
-           this->operands == that.operands &&
-           this->indices == that.indices;
-  }
-  bool operator<(const ExprMapKeyType & that) const {
-    return this->opcode < that.opcode ||
-      (this->opcode == that.opcode && this->predicate < that.predicate) ||
-      (this->opcode == that.opcode && this->predicate == that.predicate &&
-       this->operands < that.operands) ||
-      (this->opcode == that.opcode && this->predicate == that.predicate &&
-       this->operands == that.operands && this->indices < that.indices);
-  }
-
-  bool operator!=(const ExprMapKeyType& that) const {
-    return !(*this == that);
-  }
-};
-
-}
-
-namespace llvm {
-  template<>
-  struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
-    static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
-        unsigned short pred = 0) {
-      if (Instruction::isCast(V.opcode))
-        return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
-      if ((V.opcode >= Instruction::BinaryOpsBegin &&
-           V.opcode < Instruction::BinaryOpsEnd))
-        return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1]);
-      if (V.opcode == Instruction::Select)
-        return new SelectConstantExpr(V.operands[0], V.operands[1], 
-                                      V.operands[2]);
-      if (V.opcode == Instruction::ExtractElement)
-        return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
-      if (V.opcode == Instruction::InsertElement)
-        return new InsertElementConstantExpr(V.operands[0], V.operands[1],
-                                             V.operands[2]);
-      if (V.opcode == Instruction::ShuffleVector)
-        return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
-                                             V.operands[2]);
-      if (V.opcode == Instruction::InsertValue)
-        return new InsertValueConstantExpr(V.operands[0], V.operands[1],
-                                           V.indices, Ty);
-      if (V.opcode == Instruction::ExtractValue)
-        return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
-      if (V.opcode == Instruction::GetElementPtr) {
-        std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
-        return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty);
-      }
-
-      // The compare instructions are weird. We have to encode the predicate
-      // value and it is combined with the instruction opcode by multiplying
-      // the opcode by one hundred. We must decode this to get the predicate.
-      if (V.opcode == Instruction::ICmp)
-        return new CompareConstantExpr(Ty, Instruction::ICmp, V.predicate, 
-                                       V.operands[0], V.operands[1]);
-      if (V.opcode == Instruction::FCmp) 
-        return new CompareConstantExpr(Ty, Instruction::FCmp, V.predicate, 
-                                       V.operands[0], V.operands[1]);
-      if (V.opcode == Instruction::VICmp)
-        return new CompareConstantExpr(Ty, Instruction::VICmp, V.predicate, 
-                                       V.operands[0], V.operands[1]);
-      if (V.opcode == Instruction::VFCmp) 
-        return new CompareConstantExpr(Ty, Instruction::VFCmp, V.predicate, 
-                                       V.operands[0], V.operands[1]);
-      assert(0 && "Invalid ConstantExpr!");
-      return 0;
-    }
-  };
-
-  template<>
-  struct ConvertConstantType<ConstantExpr, Type> {
-    static void convert(ConstantExpr *OldC, const Type *NewTy) {
-      Constant *New;
-      switch (OldC->getOpcode()) {
-      case Instruction::Trunc:
-      case Instruction::ZExt:
-      case Instruction::SExt:
-      case Instruction::FPTrunc:
-      case Instruction::FPExt:
-      case Instruction::UIToFP:
-      case Instruction::SIToFP:
-      case Instruction::FPToUI:
-      case Instruction::FPToSI:
-      case Instruction::PtrToInt:
-      case Instruction::IntToPtr:
-      case Instruction::BitCast:
-        New = ConstantExpr::getCast(OldC->getOpcode(), OldC->getOperand(0), 
-                                    NewTy);
-        break;
-      case Instruction::Select:
-        New = ConstantExpr::getSelectTy(NewTy, OldC->getOperand(0),
-                                        OldC->getOperand(1),
-                                        OldC->getOperand(2));
-        break;
-      default:
-        assert(OldC->getOpcode() >= Instruction::BinaryOpsBegin &&
-               OldC->getOpcode() <  Instruction::BinaryOpsEnd);
-        New = ConstantExpr::getTy(NewTy, OldC->getOpcode(), OldC->getOperand(0),
-                                  OldC->getOperand(1));
-        break;
-      case Instruction::GetElementPtr:
-        // Make everyone now use a constant of the new type...
-        std::vector<Value*> Idx(OldC->op_begin()+1, OldC->op_end());
-        New = ConstantExpr::getGetElementPtrTy(NewTy, OldC->getOperand(0),
-                                               &Idx[0], Idx.size());
-        break;
-      }
-
-      assert(New != OldC && "Didn't replace constant??");
-      OldC->uncheckedReplaceAllUsesWith(New);
-      OldC->destroyConstant();    // This constant is now dead, destroy it.
-    }
-  };
-} // end namespace llvm
-
-
-static ExprMapKeyType getValType(ConstantExpr *CE) {
-  std::vector<Constant*> Operands;
-  Operands.reserve(CE->getNumOperands());
-  for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
-    Operands.push_back(cast<Constant>(CE->getOperand(i)));
-  return ExprMapKeyType(CE->getOpcode(), Operands, 
-      CE->isCompare() ? CE->getPredicate() : 0,
-      CE->hasIndices() ?
-        CE->getIndices() : SmallVector<unsigned, 4>());
-}
-
-static ManagedStatic<ValueMap<ExprMapKeyType, Type,
-                              ConstantExpr> > ExprConstants;
-
 /// This is a utility function to handle folding of casts and lookup of the
 /// cast in the ExprConstants map. It is used by the various get* methods below.
 static inline Constant *getFoldedCast(
   Instruction::CastOps opc, Constant *C, const Type *Ty) {
   assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
   // Fold a few common cases
-  if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+  if (Constant *FC = ConstantFoldCastInstruction(Ty->getContext(), opc, C, Ty))
     return FC;
 
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> argVec(1, C);
   ExprMapKeyType Key(opc, argVec);
   
   // Implicitly locked.
-  return ExprConstants->getOrCreate(Ty, Key);
+  return pImpl->ExprConstants.getOrCreate(Ty, Key);
 }
  
 Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
@@ -2041,7 +1073,7 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
 
   switch (opc) {
     default:
-      assert(0 && "Invalid cast opcode");
+      llvm_unreachable("Invalid cast opcode");
       break;
     case Instruction::Trunc:    return getTrunc(C, Ty);
     case Instruction::ZExt:     return getZExt(C, Ty);
@@ -2256,27 +1288,9 @@ Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
   return getFoldedCast(Instruction::BitCast, C, DstTy);
 }
 
-Constant *ConstantExpr::getAlignOf(const Type *Ty) {
-  // alignof is implemented as: (i64) gep ({i8,Ty}*)null, 0, 1
-  const Type *AligningTy = StructType::get(Type::Int8Ty, Ty, NULL);
-  Constant *NullPtr = getNullValue(AligningTy->getPointerTo());
-  Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
-  Constant *One = ConstantInt::get(Type::Int32Ty, 1);
-  Constant *Indices[2] = { Zero, One };
-  Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
-  return getCast(Instruction::PtrToInt, GEP, Type::Int32Ty);
-}
-
-Constant *ConstantExpr::getSizeOf(const Type *Ty) {
-  // sizeof is implemented as: (i64) gep (Ty*)null, 1
-  Constant *GEPIdx = ConstantInt::get(Type::Int32Ty, 1);
-  Constant *GEP =
-    getGetElementPtr(getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
-  return getCast(Instruction::PtrToInt, GEP, Type::Int64Ty);
-}
-
 Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
-                              Constant *C1, Constant *C2) {
+                              Constant *C1, Constant *C2,
+                              unsigned Flags) {
   // Check the operands for consistency first
   assert(Opcode >= Instruction::BinaryOpsBegin &&
          Opcode <  Instruction::BinaryOpsEnd   &&
@@ -2284,40 +1298,42 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
   assert(C1->getType() == C2->getType() &&
          "Operand types in binary constant expression should match");
 
-  if (ReqTy == C1->getType() || ReqTy == Type::Int1Ty)
-    if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+  if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext()))
+    if (Constant *FC = ConstantFoldBinaryInstruction(ReqTy->getContext(),
+                                                     Opcode, C1, C2))
       return FC;          // Fold a few common cases...
 
   std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
-  ExprMapKeyType Key(Opcode, argVec);
+  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+  
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
   
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ReqTy, Key);
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
 Constant *ConstantExpr::getCompareTy(unsigned short predicate,
                                      Constant *C1, Constant *C2) {
-  bool isVectorType = C1->getType()->getTypeID() == Type::VectorTyID;
   switch (predicate) {
-    default: assert(0 && "Invalid CmpInst predicate");
+    default: llvm_unreachable("Invalid CmpInst predicate");
     case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
     case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
     case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
     case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
     case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
     case CmpInst::FCMP_TRUE:
-      return isVectorType ? getVFCmp(predicate, C1, C2) 
-                          : getFCmp(predicate, C1, C2);
+      return getFCmp(predicate, C1, C2);
+
     case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
     case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
     case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
     case CmpInst::ICMP_SLE:
-      return isVectorType ? getVICmp(predicate, C1, C2)
-                          : getICmp(predicate, C1, C2);
+      return getICmp(predicate, C1, C2);
   }
 }
 
-Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+                            unsigned Flags) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
   if (C1->getType()->isFPOrFPVector()) {
     if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
@@ -2382,7 +1398,44 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
   }
 #endif
 
-  return getTy(C1->getType(), Opcode, C1, C2);
+  return getTy(C1->getType(), Opcode, C1, C2, Flags);
+}
+
+Constant* ConstantExpr::getSizeOf(const Type* Ty) {
+  // sizeof is implemented as: (i64) gep (Ty*)null, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *GEP = getGetElementPtr(
+                 Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
+  return getCast(Instruction::PtrToInt, GEP, 
+                 Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant* ConstantExpr::getAlignOf(const Type* Ty) {
+  // alignof is implemented as: (i64) gep ({i8,Ty}*)null, 0, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  const Type *AligningTy = StructType::get(Ty->getContext(),
+                                   Type::getInt8Ty(Ty->getContext()), Ty, NULL);
+  Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+  Constant *Zero = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 0);
+  Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *Indices[2] = { Zero, One };
+  Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
+  return getCast(Instruction::PtrToInt, GEP,
+                 Type::getInt32Ty(Ty->getContext()));
+}
+
+Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+  // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx[] = {
+    ConstantInt::get(Type::getInt64Ty(STy->getContext()), 0),
+    ConstantInt::get(Type::getInt32Ty(STy->getContext()), FieldNo)
+  };
+  Constant *GEP = getGetElementPtr(
+                Constant::getNullValue(PointerType::getUnqual(STy)), GEPIdx, 2);
+  return getCast(Instruction::PtrToInt, GEP,
+                 Type::getInt64Ty(STy->getContext()));
 }
 
 Constant *ConstantExpr::getCompare(unsigned short pred, 
@@ -2396,7 +1449,8 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
   assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
 
   if (ReqTy == V1->getType())
-    if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+    if (Constant *SC = ConstantFoldSelectInstruction(
+                                                ReqTy->getContext(), C, V1, V2))
       return SC;        // Fold common cases
 
   std::vector<Constant*> argVec(3, C);
@@ -2404,8 +1458,10 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
   argVec[2] = V2;
   ExprMapKeyType Key(Instruction::Select, argVec);
   
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ReqTy, Key);
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
 Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
@@ -2416,7 +1472,9 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
          cast<PointerType>(ReqTy)->getElementType() &&
          "GEP indices invalid!");
 
-  if (Constant *FC = ConstantFoldGetElementPtr(C, (Constant**)Idxs, NumIdx))
+  if (Constant *FC = ConstantFoldGetElementPtr(
+                              ReqTy->getContext(), C, /*inBounds=*/false,
+                              (Constant**)Idxs, NumIdx))
     return FC;          // Fold a few common cases...
 
   assert(isa<PointerType>(C->getType()) &&
@@ -2429,8 +1487,41 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
 
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+
+  // Implicitly locked.
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
+                                                   Constant *C,
+                                                   Value* const *Idxs,
+                                                   unsigned NumIdx) {
+  assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
+                                           Idxs+NumIdx) ==
+         cast<PointerType>(ReqTy)->getElementType() &&
+         "GEP indices invalid!");
+
+  if (Constant *FC = ConstantFoldGetElementPtr(
+                              ReqTy->getContext(), C, /*inBounds=*/true,
+                              (Constant**)Idxs, NumIdx))
+    return FC;          // Fold a few common cases...
+
+  assert(isa<PointerType>(C->getType()) &&
+         "Non-pointer type for constant GetElementPtr expression");
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.reserve(NumIdx+1);
+  ArgVec.push_back(C);
+  for (unsigned i = 0; i != NumIdx; ++i)
+    ArgVec.push_back(cast<Constant>(Idxs[i]));
+  const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+                           GEPOperator::IsInBounds);
+
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ReqTy, Key);
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
 Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
@@ -2443,11 +1534,27 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
   return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
 }
 
+Constant *ConstantExpr::getInBoundsGetElementPtr(Constant *C,
+                                                 Value* const *Idxs,
+                                                 unsigned NumIdx) {
+  // Get the result type of the getelementptr!
+  const Type *Ty = 
+    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+  assert(Ty && "GEP indices invalid!");
+  unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
+  return getInBoundsGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
+}
+
 Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant* const *Idxs,
                                          unsigned NumIdx) {
   return getGetElementPtr(C, (Value* const *)Idxs, NumIdx);
 }
 
+Constant *ConstantExpr::getInBoundsGetElementPtr(Constant *C,
+                                                 Constant* const *Idxs,
+                                                 unsigned NumIdx) {
+  return getInBoundsGetElementPtr(C, (Value* const *)Idxs, NumIdx);
+}
 
 Constant *
 ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
@@ -2455,7 +1562,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
          pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
 
-  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+  if (Constant *FC = ConstantFoldCompareInstruction(
+                                             LHS->getContext(), pred, LHS, RHS))
     return FC;          // Fold a few common cases...
 
   // Look up the constant in the table first to ensure uniqueness
@@ -2465,8 +1573,11 @@ ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
 
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+
   // Implicitly locked.
-  return ExprConstants->getOrCreate(Type::Int1Ty, Key);
+  return
+      pImpl->ExprConstants.getOrCreate(Type::getInt1Ty(LHS->getContext()), Key);
 }
 
 Constant *
@@ -2474,7 +1585,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   assert(LHS->getType() == RHS->getType());
   assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
 
-  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+  if (Constant *FC = ConstantFoldCompareInstruction(
+                                            LHS->getContext(), pred, LHS, RHS))
     return FC;          // Fold a few common cases...
 
   // Look up the constant in the table first to ensure uniqueness
@@ -2484,123 +1596,33 @@ ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
   
-  // Implicitly locked.
-  return ExprConstants->getOrCreate(Type::Int1Ty, Key);
-}
-
-Constant *
-ConstantExpr::getVICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
-  assert(isa<VectorType>(LHS->getType()) && LHS->getType() == RHS->getType() &&
-         "Tried to create vicmp operation on non-vector type!");
-  assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
-         pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid VICmp Predicate");
-
-  const VectorType *VTy = cast<VectorType>(LHS->getType());
-  const Type *EltTy = VTy->getElementType();
-  unsigned NumElts = VTy->getNumElements();
-
-  // See if we can fold the element-wise comparison of the LHS and RHS.
-  SmallVector<Constant *, 16> LHSElts, RHSElts;
-  LHS->getVectorElements(LHSElts);
-  RHS->getVectorElements(RHSElts);
-                    
-  if (!LHSElts.empty() && !RHSElts.empty()) {
-    SmallVector<Constant *, 16> Elts;
-    for (unsigned i = 0; i != NumElts; ++i) {
-      Constant *FC = ConstantFoldCompareInstruction(pred, LHSElts[i],
-                                                    RHSElts[i]);
-      if (ConstantInt *FCI = dyn_cast_or_null<ConstantInt>(FC)) {
-        if (FCI->getZExtValue())
-          Elts.push_back(ConstantInt::getAllOnesValue(EltTy));
-        else
-          Elts.push_back(ConstantInt::get(EltTy, 0ULL));
-      } else if (FC && isa<UndefValue>(FC)) {
-        Elts.push_back(UndefValue::get(EltTy));
-      } else {
-        break;
-      }
-    }
-    if (Elts.size() == NumElts)
-      return ConstantVector::get(&Elts[0], Elts.size());
-  }
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec;
-  ArgVec.push_back(LHS);
-  ArgVec.push_back(RHS);
-  // Get the key type with both the opcode and predicate
-  const ExprMapKeyType Key(Instruction::VICmp, ArgVec, pred);
-  
-  // Implicitly locked.
-  return ExprConstants->getOrCreate(LHS->getType(), Key);
-}
-
-Constant *
-ConstantExpr::getVFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
-  assert(isa<VectorType>(LHS->getType()) &&
-         "Tried to create vfcmp operation on non-vector type!");
-  assert(LHS->getType() == RHS->getType());
-  assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid VFCmp Predicate");
-
-  const VectorType *VTy = cast<VectorType>(LHS->getType());
-  unsigned NumElts = VTy->getNumElements();
-  const Type *EltTy = VTy->getElementType();
-  const Type *REltTy = IntegerType::get(EltTy->getPrimitiveSizeInBits());
-  const Type *ResultTy = VectorType::get(REltTy, NumElts);
-
-  // See if we can fold the element-wise comparison of the LHS and RHS.
-  SmallVector<Constant *, 16> LHSElts, RHSElts;
-  LHS->getVectorElements(LHSElts);
-  RHS->getVectorElements(RHSElts);
-  
-  if (!LHSElts.empty() && !RHSElts.empty()) {
-    SmallVector<Constant *, 16> Elts;
-    for (unsigned i = 0; i != NumElts; ++i) {
-      Constant *FC = ConstantFoldCompareInstruction(pred, LHSElts[i],
-                                                    RHSElts[i]);
-      if (ConstantInt *FCI = dyn_cast_or_null<ConstantInt>(FC)) {
-        if (FCI->getZExtValue())
-          Elts.push_back(ConstantInt::getAllOnesValue(REltTy));
-        else
-          Elts.push_back(ConstantInt::get(REltTy, 0ULL));
-      } else if (FC && isa<UndefValue>(FC)) {
-        Elts.push_back(UndefValue::get(REltTy));
-      } else {
-        break;
-      }
-    }
-    if (Elts.size() == NumElts)
-      return ConstantVector::get(&Elts[0], Elts.size());
-  }
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec;
-  ArgVec.push_back(LHS);
-  ArgVec.push_back(RHS);
-  // Get the key type with both the opcode and predicate
-  const ExprMapKeyType Key(Instruction::VFCmp, ArgVec, pred);
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
   
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ResultTy, Key);
+  return
+      pImpl->ExprConstants.getOrCreate(Type::getInt1Ty(LHS->getContext()), Key);
 }
 
 Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
                                             Constant *Idx) {
-  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+  if (Constant *FC = ConstantFoldExtractElementInstruction(
+                                                ReqTy->getContext(), Val, Idx))
     return FC;          // Fold a few common cases...
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, Val);
   ArgVec.push_back(Idx);
   const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
   
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ReqTy, Key);
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   assert(isa<VectorType>(Val->getType()) &&
          "Tried to create extractelement operation on non-vector type!");
-  assert(Idx->getType() == Type::Int32Ty &&
+  assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) &&
          "Extractelement index must be i32 type!");
   return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
                              Val, Idx);
@@ -2608,7 +1630,8 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
 
 Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
                                            Constant *Elt, Constant *Idx) {
-  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+  if (Constant *FC = ConstantFoldInsertElementInstruction(
+                                            ReqTy->getContext(), Val, Elt, Idx))
     return FC;          // Fold a few common cases...
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, Val);
@@ -2616,8 +1639,10 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
   ArgVec.push_back(Idx);
   const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
   
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ReqTy, Key);
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
 Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, 
@@ -2626,14 +1651,15 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
          "Tried to create insertelement operation on non-vector type!");
   assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
          && "Insertelement types must match!");
-  assert(Idx->getType() == Type::Int32Ty &&
+  assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) &&
          "Insertelement index must be i32 type!");
   return getInsertElementTy(Val->getType(), Val, Elt, Idx);
 }
 
 Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
                                            Constant *V2, Constant *Mask) {
-  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(
+                                            ReqTy->getContext(), V1, V2, Mask))
     return FC;          // Fold a few common cases...
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, V1);
@@ -2641,8 +1667,10 @@ Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
   ArgVec.push_back(Mask);
   const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
   
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  
   // Implicitly locked.
-  return ExprConstants->getOrCreate(ReqTy, Key);
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
 Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
@@ -2666,7 +1694,8 @@ Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg,
          "insertvalue type invalid!");
   assert(Agg->getType()->isFirstClassType() &&
          "Non-first-class type for constant InsertValue expression");
-  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx);
+  Constant *FC = ConstantFoldInsertValueInstruction(
+                                  ReqTy->getContext(), Agg, Val, Idxs, NumIdx);
   assert(FC && "InsertValue constant expr couldn't be folded!");
   return FC;
 }
@@ -2692,7 +1721,8 @@ Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg,
          "extractvalue indices invalid!");
   assert(Agg->getType()->isFirstClassType() &&
          "Non-first-class type for constant extractvalue expression");
-  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx);
+  Constant *FC = ConstantFoldExtractValueInstruction(
+                                        ReqTy->getContext(), Agg, Idxs, NumIdx);
   assert(FC && "ExtractValue constant expr couldn't be folded!");
   return FC;
 }
@@ -2708,25 +1738,109 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
   return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
 }
 
-Constant *ConstantExpr::getZeroValueForNegationExpr(const Type *Ty) {
-  if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
-    if (PTy->getElementType()->isFloatingPoint()) {
-      std::vector<Constant*> zeros(PTy->getNumElements(),
-                           ConstantFP::getNegativeZero(PTy->getElementType()));
-      return ConstantVector::get(PTy, zeros);
-    }
+Constant* ConstantExpr::getNeg(Constant* C) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (C->getType()->isFPOrFPVector())
+    return getFNeg(C);
+  assert(C->getType()->isIntOrIntVector() &&
+         "Cannot NEG a nonintegral value!");
+  return get(Instruction::Sub,
+             ConstantFP::getZeroValueForNegation(C->getType()),
+             C);
+}
 
-  if (Ty->isFloatingPoint()) 
-    return ConstantFP::getNegativeZero(Ty);
+Constant* ConstantExpr::getFNeg(Constant* C) {
+  assert(C->getType()->isFPOrFPVector() &&
+         "Cannot FNEG a non-floating-point value!");
+  return get(Instruction::FSub,
+             ConstantFP::getZeroValueForNegation(C->getType()),
+             C);
+}
 
-  return Constant::getNullValue(Ty);
+Constant* ConstantExpr::getNot(Constant* C) {
+  assert(C->getType()->isIntOrIntVector() &&
+         "Cannot NOT a nonintegral value!");
+  return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
+}
+
+Constant* ConstantExpr::getAdd(Constant* C1, Constant* C2) {
+  return get(Instruction::Add, C1, C2);
+}
+
+Constant* ConstantExpr::getFAdd(Constant* C1, Constant* C2) {
+  return get(Instruction::FAdd, C1, C2);
+}
+
+Constant* ConstantExpr::getSub(Constant* C1, Constant* C2) {
+  return get(Instruction::Sub, C1, C2);
+}
+
+Constant* ConstantExpr::getFSub(Constant* C1, Constant* C2) {
+  return get(Instruction::FSub, C1, C2);
+}
+
+Constant* ConstantExpr::getMul(Constant* C1, Constant* C2) {
+  return get(Instruction::Mul, C1, C2);
+}
+
+Constant* ConstantExpr::getFMul(Constant* C1, Constant* C2) {
+  return get(Instruction::FMul, C1, C2);
+}
+
+Constant* ConstantExpr::getUDiv(Constant* C1, Constant* C2) {
+  return get(Instruction::UDiv, C1, C2);
+}
+
+Constant* ConstantExpr::getSDiv(Constant* C1, Constant* C2) {
+  return get(Instruction::SDiv, C1, C2);
+}
+
+Constant* ConstantExpr::getFDiv(Constant* C1, Constant* C2) {
+  return get(Instruction::FDiv, C1, C2);
+}
+
+Constant* ConstantExpr::getURem(Constant* C1, Constant* C2) {
+  return get(Instruction::URem, C1, C2);
+}
+
+Constant* ConstantExpr::getSRem(Constant* C1, Constant* C2) {
+  return get(Instruction::SRem, C1, C2);
+}
+
+Constant* ConstantExpr::getFRem(Constant* C1, Constant* C2) {
+  return get(Instruction::FRem, C1, C2);
+}
+
+Constant* ConstantExpr::getAnd(Constant* C1, Constant* C2) {
+  return get(Instruction::And, C1, C2);
+}
+
+Constant* ConstantExpr::getOr(Constant* C1, Constant* C2) {
+  return get(Instruction::Or, C1, C2);
+}
+
+Constant* ConstantExpr::getXor(Constant* C1, Constant* C2) {
+  return get(Instruction::Xor, C1, C2);
+}
+
+Constant* ConstantExpr::getShl(Constant* C1, Constant* C2) {
+  return get(Instruction::Shl, C1, C2);
+}
+
+Constant* ConstantExpr::getLShr(Constant* C1, Constant* C2) {
+  return get(Instruction::LShr, C1, C2);
+}
+
+Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) {
+  return get(Instruction::AShr, C1, C2);
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantExpr::destroyConstant() {
   // Implicitly locked.
-  ExprConstants->remove(this);
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  pImpl->ExprConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -2747,12 +1861,16 @@ const char *ConstantExpr::getOpcodeName() const {
 /// single invocation handles all 1000 uses.  Handling them one at a time would
 /// work, but would be really slow because it would have to unique each updated
 /// array instance.
+
 void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
                                                 Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
-  std::pair<ArrayConstantsTy::MapKey, Constant*> Lookup;
+  LLVMContext &Context = getType()->getContext();
+  LLVMContextImpl *pImpl = Context.pImpl;
+
+  std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
   Lookup.first.first = getType();
   Lookup.second = this;
 
@@ -2774,7 +1892,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
     }
   } else {
     isAllZeros = true;
-    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+    for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) {
       Constant *Val = cast<Constant>(O->get());
       if (Val == From) {
         Val = ToC;
@@ -2790,10 +1908,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
-    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
     bool Exists;
-    ArrayConstantsTy::MapTy::iterator I =
-      ArrayConstants->InsertOrGetItem(Lookup, Exists);
+    LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
+      pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists);
     
     if (Exists) {
       Replacement = I->second;
@@ -2802,12 +1920,12 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
       // creating a new constant array, inserting it, replaceallusesof'ing the
       // old with the new, then deleting the old... just update the current one
       // in place!
-      ArrayConstants->MoveConstantToNewSlot(this, I);
+      pImpl->ArrayConstants.MoveConstantToNewSlot(this, I);
       
       // Update to the new value.  Optimize for the case when we have a single
       // operand that we're changing, but handle bulk updates efficiently.
       if (NumUpdated == 1) {
-        unsigned OperandToUpdate = U-OperandList;
+        unsigned OperandToUpdate = U - OperandList;
         assert(getOperand(OperandToUpdate) == From &&
                "ReplaceAllUsesWith broken!");
         setOperand(OperandToUpdate, ToC);
@@ -2838,7 +1956,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   unsigned OperandToUpdate = U-OperandList;
   assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
 
-  std::pair<StructConstantsTy::MapKey, Constant*> Lookup;
+  std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
   Lookup.first.first = getType();
   Lookup.second = this;
   std::vector<Constant*> &Values = Lookup.first.second;
@@ -2849,7 +1967,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   // compute whether this turns into an all-zeros struct.
   bool isAllZeros = false;
   if (!ToC->isNullValue()) {
-    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O)
+    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
       Values.push_back(cast<Constant>(O->get()));
   } else {
     isAllZeros = true;
@@ -2861,15 +1979,18 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   }
   Values[OperandToUpdate] = ToC;
   
+  LLVMContext &Context = getType()->getContext();
+  LLVMContextImpl *pImpl = Context.pImpl;
+  
   Constant *Replacement = 0;
   if (isAllZeros) {
     Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
-    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
     bool Exists;
-    StructConstantsTy::MapTy::iterator I =
-      StructConstants->InsertOrGetItem(Lookup, Exists);
+    LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
+      pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
     
     if (Exists) {
       Replacement = I->second;
@@ -2878,7 +1999,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
       // creating a new constant struct, inserting it, replaceallusesof'ing the
       // old with the new, then deleting the old... just update the current one
       // in place!
-      StructConstants->MoveConstantToNewSlot(this, I);
+      pImpl->StructConstants.MoveConstantToNewSlot(this, I);
       
       // Update to the new value.
       setOperand(OperandToUpdate, ToC);
@@ -2907,7 +2028,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Values.push_back(Val);
   }
   
-  Constant *Replacement = ConstantVector::get(getType(), Values);
+  Constant *Replacement = get(getType(), Values);
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
@@ -2992,22 +2113,18 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     if (C2 == From) C2 = To;
     if (getOpcode() == Instruction::ICmp)
       Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
-    else if (getOpcode() == Instruction::FCmp)
-      Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
-    else if (getOpcode() == Instruction::VICmp)
-      Replacement = ConstantExpr::getVICmp(getPredicate(), C1, C2);
     else {
-      assert(getOpcode() == Instruction::VFCmp);
-      Replacement = ConstantExpr::getVFCmp(getPredicate(), C1, C2);
+      assert(getOpcode() == Instruction::FCmp);
+      Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
     }
   } else if (getNumOperands() == 2) {
     Constant *C1 = getOperand(0);
     Constant *C2 = getOperand(1);
     if (C1 == From) C1 = To;
     if (C2 == From) C2 = To;
-    Replacement = ConstantExpr::get(getOpcode(), C1, C2);
+    Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassData);
   } else {
-    assert(0 && "Unknown ConstantExpr type!");
+    llvm_unreachable("Unknown ConstantExpr type!");
     return;
   }
   
@@ -3019,20 +2136,3 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
   // Delete the old constant!
   destroyConstant();
 }
-
-void MDNode::replaceElement(Value *From, Value *To) {
-  SmallVector<Value*, 4> Values;
-  Values.reserve(getNumElements());  // Build replacement array...
-  for (unsigned i = 0, e = getNumElements(); i != e; ++i) {
-    Value *Val = getElement(i);
-    if (Val == From) Val = To;
-    Values.push_back(Val);
-  }
-
-  MDNode *Replacement = MDNode::get(&Values[0], Values.size());
-  assert(Replacement != this && "I didn't contain From!");
-
-  uncheckedReplaceAllUsesWith(Replacement);
-
-  destroyConstant();
-}
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
new file mode 100644
index 000000000000..526b4b1b7ee3
--- /dev/null
+++ b/lib/VMCore/ConstantsContext.h
@@ -0,0 +1,787 @@
+//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTSCONTEXT_H
+#define LLVM_CONSTANTSCONTEXT_H
+
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include <map>
+
+namespace llvm {
+template<class ValType>
+struct ConstantTraits;
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class UnaryConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
+    : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+    Op<0>() = C;
+  }
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class BinaryConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
+                     unsigned Flags)
+    : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    SubclassOptionalData = Flags;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class SelectConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class ExtractElementConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  ExtractElementConstantExpr(Constant *C1, Constant *C2)
+    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
+                   Instruction::ExtractElement, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class InsertElementConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
+                   &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class ShuffleVectorConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+  : ConstantExpr(VectorType::get(
+                   cast<VectorType>(C1->getType())->getElementType(),
+                   cast<VectorType>(C3->getType())->getNumElements()),
+                 Instruction::ShuffleVector, 
+                 &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class ExtractValueConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  ExtractValueConstantExpr(Constant *Agg,
+                           const SmallVector<unsigned, 4> &IdxList,
+                           const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+  }
+
+  /// Indices - These identify which value to extract.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class InsertValueConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  InsertValueConstantExpr(Constant *Agg, Constant *Val,
+                          const SmallVector<unsigned, 4> &IdxList,
+                          const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+    Op<1>() = Val;
+  }
+
+  /// Indices - These identify the position for the insertion.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class GetElementPtrConstantExpr : public ConstantExpr {
+  GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+                            const Type *DestTy);
+public:
+  static GetElementPtrConstantExpr *Create(Constant *C,
+                                           const std::vector<Constant*>&IdxList,
+                                           const Type *DestTy,
+                                           unsigned Flags) {
+    GetElementPtrConstantExpr *Result =
+      new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+    Result->SubclassOptionalData = Flags;
+    return Result;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+struct CompareConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  unsigned short predicate;
+  CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
+                      unsigned short pred,  Constant* LHS, Constant* RHS)
+    : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+template <>
+struct OperandTraits<UnaryConstantExpr> : public FixedNumOperandTraits<1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> : public FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> : public FixedNumOperandTraits<3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> : public FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> : public FixedNumOperandTraits<3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> : public FixedNumOperandTraits<3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> : public FixedNumOperandTraits<1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> : public FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> : public VariadicOperandTraits<1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> : public FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+struct ExprMapKeyType {
+  typedef SmallVector<unsigned, 4> IndexList;
+
+  ExprMapKeyType(unsigned opc,
+      const std::vector<Constant*> &ops,
+      unsigned short flags = 0,
+      unsigned short optionalflags = 0,
+      const IndexList &inds = IndexList())
+        : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
+        operands(ops), indices(inds) {}
+  uint8_t opcode;
+  uint8_t subclassoptionaldata;
+  uint16_t subclassdata;
+  std::vector<Constant*> operands;
+  IndexList indices;
+  bool operator==(const ExprMapKeyType& that) const {
+    return this->opcode == that.opcode &&
+           this->subclassdata == that.subclassdata &&
+           this->subclassoptionaldata == that.subclassoptionaldata &&
+           this->operands == that.operands &&
+           this->indices == that.indices;
+  }
+  bool operator<(const ExprMapKeyType & that) const {
+    if (this->opcode != that.opcode) return this->opcode < that.opcode;
+    if (this->operands != that.operands) return this->operands < that.operands;
+    if (this->subclassdata != that.subclassdata)
+      return this->subclassdata < that.subclassdata;
+    if (this->subclassoptionaldata != that.subclassoptionaldata)
+      return this->subclassoptionaldata < that.subclassoptionaldata;
+    if (this->indices != that.indices) return this->indices < that.indices;
+    return false;
+  }
+
+  bool operator!=(const ExprMapKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ValueMap*.  This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+template<typename T, typename Alloc>
+struct ConstantTraits< std::vector<T, Alloc> > {
+  static unsigned uses(const std::vector<T, Alloc>& v) {
+    return v.size();
+  }
+};
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator {
+  static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
+    return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+  }
+};
+
+template<class ConstantClass>
+struct ConstantKeyData {
+  typedef void ValType;
+  static ValType getValType(ConstantClass *C) {
+    llvm_unreachable("Unknown Constant type!");
+  }
+};
+
+template<>
+struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+  static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
+      unsigned short pred = 0) {
+    if (Instruction::isCast(V.opcode))
+      return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+    if ((V.opcode >= Instruction::BinaryOpsBegin &&
+         V.opcode < Instruction::BinaryOpsEnd))
+      return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
+                                    V.subclassoptionaldata);
+    if (V.opcode == Instruction::Select)
+      return new SelectConstantExpr(V.operands[0], V.operands[1], 
+                                    V.operands[2]);
+    if (V.opcode == Instruction::ExtractElement)
+      return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::InsertElement)
+      return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::ShuffleVector)
+      return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::InsertValue)
+      return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+                                         V.indices, Ty);
+    if (V.opcode == Instruction::ExtractValue)
+      return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+    if (V.opcode == Instruction::GetElementPtr) {
+      std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+      return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
+                                               V.subclassoptionaldata);
+    }
+
+    // The compare instructions are weird. We have to encode the predicate
+    // value and it is combined with the instruction opcode by multiplying
+    // the opcode by one hundred. We must decode this to get the predicate.
+    if (V.opcode == Instruction::ICmp)
+      return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::FCmp) 
+      return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    llvm_unreachable("Invalid ConstantExpr!");
+    return 0;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantExpr> {
+  typedef ExprMapKeyType ValType;
+  static ValType getValType(ConstantExpr *CE) {
+    std::vector<Constant*> Operands;
+    Operands.reserve(CE->getNumOperands());
+    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+      Operands.push_back(cast<Constant>(CE->getOperand(i)));
+    return ExprMapKeyType(CE->getOpcode(), Operands,
+        CE->isCompare() ? CE->getPredicate() : 0,
+        CE->getRawSubclassOptionalData(),
+        CE->hasIndices() ?
+          CE->getIndices() : SmallVector<unsigned, 4>());
+  }
+};
+
+// ConstantAggregateZero does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
+  static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
+    return new ConstantAggregateZero(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantVector> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantVector *CP) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CP->getNumOperands());
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+      Elements.push_back(CP->getOperand(i));
+    return Elements;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantAggregateZero> {
+  typedef char ValType;
+  static ValType getValType(ConstantAggregateZero *C) {
+    return 0;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantArray> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantArray *CA) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CA->getNumOperands());
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      Elements.push_back(cast<Constant>(CA->getOperand(i)));
+    return Elements;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantStruct> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantStruct *CS) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CS->getNumOperands());
+    for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
+      Elements.push_back(cast<Constant>(CS->getOperand(i)));
+    return Elements;
+  }
+};
+
+// ConstantPointerNull does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
+  static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
+    return new ConstantPointerNull(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantPointerNull> {
+  typedef char ValType;
+  static ValType getValType(ConstantPointerNull *C) {
+    return 0;
+  }
+};
+
+// UndefValue does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<UndefValue, Type, ValType> {
+  static UndefValue *create(const Type *Ty, const ValType &V) {
+    return new UndefValue(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<UndefValue> {
+  typedef char ValType;
+  static ValType getValType(UndefValue *C) {
+    return 0;
+  }
+};
+
+template<class ValType, class TypeClass, class ConstantClass,
+         bool HasLargeKey = false /*true for arrays and structs*/ >
+class ValueMap : public AbstractTypeUser {
+public:
+  typedef std::pair<const TypeClass*, ValType> MapKey;
+  typedef std::map<MapKey, ConstantClass *> MapTy;
+  typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
+  typedef std::map<const DerivedType*, typename MapTy::iterator>
+    AbstractTypeMapTy;
+private:
+  /// Map - This is the main map from the element descriptor to the Constants.
+  /// This is the primary way we avoid creating two of the same shape
+  /// constant.
+  MapTy Map;
+    
+  /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+  /// from the constants to their element in Map.  This is important for
+  /// removal of constants from the array, which would otherwise have to scan
+  /// through the map with very large keys.
+  InverseMapTy InverseMap;
+
+  /// AbstractTypeMap - Map for abstract type constants.
+  ///
+  AbstractTypeMapTy AbstractTypeMap;
+    
+  /// ValueMapLock - Mutex for this map.
+  sys::SmartMutex<true> ValueMapLock;
+
+public:
+  // NOTE: This function is not locked.  It is the caller's responsibility
+  // to enforce proper synchronization.
+  typename MapTy::iterator map_begin() { return Map.begin(); }
+  typename MapTy::iterator map_end() { return Map.end(); }
+
+  void freeConstants() {
+    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+         I != E; ++I) {
+      if (I->second->use_empty())
+        delete I->second;
+    }
+  }
+    
+  /// InsertOrGetItem - Return an iterator for the specified element.
+  /// If the element exists in the map, the returned iterator points to the
+  /// entry and Exists=true.  If not, the iterator points to the newly
+  /// inserted entry and returns Exists=false.  Newly inserted entries have
+  /// I->second == 0, and should be filled in.
+  /// NOTE: This function is not locked.  It is the caller's responsibility
+  // to enforce proper synchronization.
+  typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
+                                 &InsertVal,
+                                 bool &Exists) {
+    std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+    Exists = !IP.second;
+    return IP.first;
+  }
+    
+private:
+  typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+    if (HasLargeKey) {
+      typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+      assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+             IMI->second->second == CP &&
+             "InverseMap corrupt!");
+      return IMI->second;
+    }
+      
+    typename MapTy::iterator I =
+      Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()),
+                      ConstantKeyData<ConstantClass>::getValType(CP)));
+    if (I == Map.end() || I->second != CP) {
+      // FIXME: This should not use a linear scan.  If this gets to be a
+      // performance problem, someone should look at this.
+      for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+        /* empty */;
+    }
+    return I;
+  }
+    
+  void AddAbstractTypeUser(const Type *Ty, typename MapTy::iterator I) {
+    // If the type of the constant is abstract, make sure that an entry
+    // exists for it in the AbstractTypeMap.
+    if (Ty->isAbstract()) {
+      const DerivedType *DTy = static_cast<const DerivedType *>(Ty);
+      typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(DTy);
+
+      if (TI == AbstractTypeMap.end()) {
+        // Add ourselves to the ATU list of the type.
+        cast<DerivedType>(DTy)->addAbstractTypeUser(this);
+
+        AbstractTypeMap.insert(TI, std::make_pair(DTy, I));
+      }
+    }
+  }
+
+  ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+                        typename MapTy::iterator I) {
+    ConstantClass* Result =
+      ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+    assert(Result->getType() == Ty && "Type specified is not correct!");
+    I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.insert(std::make_pair(Result, I));
+
+    AddAbstractTypeUser(Ty, I);
+      
+    return Result;
+  }
+public:
+    
+  /// getOrCreate - Return the specified constant from the map, creating it if
+  /// necessary.
+  ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+    sys::SmartScopedLock<true> Lock(ValueMapLock);
+    MapKey Lookup(Ty, V);
+    ConstantClass* Result = 0;
+    
+    typename MapTy::iterator I = Map.find(Lookup);
+    // Is it in the map?  
+    if (I != Map.end())
+      Result = I->second;
+        
+    if (!Result) {
+      // If no preexisting value, create one now...
+      Result = Create(Ty, V, I);
+    }
+        
+    return Result;
+  }
+
+  void UpdateAbstractTypeMap(const DerivedType *Ty,
+                             typename MapTy::iterator I) {
+    assert(AbstractTypeMap.count(Ty) &&
+           "Abstract type not in AbstractTypeMap?");
+    typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty];
+    if (ATMEntryIt == I) {
+      // Yes, we are removing the representative entry for this type.
+      // See if there are any other entries of the same type.
+      typename MapTy::iterator TmpIt = ATMEntryIt;
+
+      // First check the entry before this one...
+      if (TmpIt != Map.begin()) {
+        --TmpIt;
+        if (TmpIt->first.first != Ty) // Not the same type, move back...
+          ++TmpIt;
+      }
+
+      // If we didn't find the same type, try to move forward...
+      if (TmpIt == ATMEntryIt) {
+        ++TmpIt;
+        if (TmpIt == Map.end() || TmpIt->first.first != Ty)
+          --TmpIt;   // No entry afterwards with the same type
+      }
+
+      // If there is another entry in the map of the same abstract type,
+      // update the AbstractTypeMap entry now.
+      if (TmpIt != ATMEntryIt) {
+        ATMEntryIt = TmpIt;
+      } else {
+        // Otherwise, we are removing the last instance of this type
+        // from the table.  Remove from the ATM, and from user list.
+        cast<DerivedType>(Ty)->removeAbstractTypeUser(this);
+        AbstractTypeMap.erase(Ty);
+      }
+    }
+  }
+
+  void remove(ConstantClass *CP) {
+    sys::SmartScopedLock<true> Lock(ValueMapLock);
+    typename MapTy::iterator I = FindExistingElement(CP);
+    assert(I != Map.end() && "Constant not found in constant table!");
+    assert(I->second == CP && "Didn't find correct element?");
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.erase(CP);
+      
+    // Now that we found the entry, make sure this isn't the entry that
+    // the AbstractTypeMap points to.
+    const TypeClass *Ty = I->first.first;
+    if (Ty->isAbstract())
+      UpdateAbstractTypeMap(static_cast<const DerivedType *>(Ty), I);
+
+    Map.erase(I);
+  }
+
+  /// MoveConstantToNewSlot - If we are about to change C to be the element
+  /// specified by I, update our internal data structures to reflect this
+  /// fact.
+  /// NOTE: This function is not locked. It is the responsibility of the
+  /// caller to enforce proper synchronization if using this method.
+  void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+    // First, remove the old location of the specified constant in the map.
+    typename MapTy::iterator OldI = FindExistingElement(C);
+    assert(OldI != Map.end() && "Constant not found in constant table!");
+    assert(OldI->second == C && "Didn't find correct element?");
+      
+    // If this constant is the representative element for its abstract type,
+    // update the AbstractTypeMap so that the representative element is I.
+    if (C->getType()->isAbstract()) {
+      typename AbstractTypeMapTy::iterator ATI =
+          AbstractTypeMap.find(C->getType());
+      assert(ATI != AbstractTypeMap.end() &&
+             "Abstract type not in AbstractTypeMap?");
+      if (ATI->second == OldI)
+        ATI->second = I;
+    }
+      
+    // Remove the old entry from the map.
+    Map.erase(OldI);
+    
+    // Update the inverse map so that we know that this constant is now
+    // located at descriptor I.
+    if (HasLargeKey) {
+      assert(I->second == C && "Bad inversemap entry!");
+      InverseMap[C] = I;
+    }
+  }
+    
+  void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+    sys::SmartScopedLock<true> Lock(ValueMapLock);
+    typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(OldTy);
+
+    assert(I != AbstractTypeMap.end() &&
+           "Abstract type not in AbstractTypeMap?");
+
+    // Convert a constant at a time until the last one is gone.  The last one
+    // leaving will remove() itself, causing the AbstractTypeMapEntry to be
+    // eliminated eventually.
+    do {
+      ConstantClass *C = I->second->second;
+      MapKey Key(cast<TypeClass>(NewTy),
+                 ConstantKeyData<ConstantClass>::getValType(C));
+
+      std::pair<typename MapTy::iterator, bool> IP =
+        Map.insert(std::make_pair(Key, C));
+      if (IP.second) {
+        // The map didn't previously have an appropriate constant in the
+        // new type.
+        
+        // Remove the old entry.
+        typename MapTy::iterator OldI =
+          Map.find(MapKey(cast<TypeClass>(OldTy), IP.first->first.second));
+        assert(OldI != Map.end() && "Constant not in map!");
+        UpdateAbstractTypeMap(OldTy, OldI);
+        Map.erase(OldI);
+
+        // Set the constant's type. This is done in place!
+        setType(C, NewTy);
+
+        // Update the inverse map so that we know that this constant is now
+        // located at descriptor I.
+        if (HasLargeKey)
+          InverseMap[C] = IP.first;
+
+        AddAbstractTypeUser(NewTy, IP.first);
+      } else {
+        // The map already had an appropriate constant in the new type, so
+        // there's no longer a need for the old constant.
+        C->uncheckedReplaceAllUsesWith(IP.first->second);
+        C->destroyConstant();    // This constant is now dead, destroy it.
+      }
+      I = AbstractTypeMap.find(OldTy);
+    } while (I != AbstractTypeMap.end());
+  }
+
+  // If the type became concrete without being refined to any other existing
+  // type, we just remove ourselves from the ATU list.
+  void typeBecameConcrete(const DerivedType *AbsTy) {
+    AbsTy->removeAbstractTypeUser(this);
+  }
+
+  void dump() const {
+    DEBUG(errs() << "Constant.cpp: ValueMap\n");
+  }
+};
+
+}
+
+#endif
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 6eb188907f56..bff308727fe1 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
@@ -93,12 +94,15 @@ int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
 }
 
 void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name) {
-  std::string N(Name);
-  
   TypeSymbolTable &TST = unwrap(M)->getTypeSymbolTable();
-  for (TypeSymbolTable::iterator I = TST.begin(), E = TST.end(); I != E; ++I)
-    if (I->first == N)
-      TST.remove(I);
+
+  TypeSymbolTable::iterator I = TST.find(Name);
+  if (I != TST.end())
+    TST.remove(I);
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getTypeByName(Name));
 }
 
 void LLVMDumpModule(LLVMModuleRef M) {
@@ -111,19 +115,84 @@ void LLVMDumpModule(LLVMModuleRef M) {
 /*--.. Operations on all types (mostly) ....................................--*/
 
 LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
-  return static_cast<LLVMTypeKind>(unwrap(Ty)->getTypeID());
+  switch (unwrap(Ty)->getTypeID()) {
+  default:
+    assert(false && "Unhandled TypeID.");
+  case Type::VoidTyID:
+    return LLVMVoidTypeKind;
+  case Type::FloatTyID:
+    return LLVMFloatTypeKind;
+  case Type::DoubleTyID:
+    return LLVMDoubleTypeKind;
+  case Type::X86_FP80TyID:
+    return LLVMX86_FP80TypeKind;
+  case Type::FP128TyID:
+    return LLVMFP128TypeKind;
+  case Type::PPC_FP128TyID:
+    return LLVMPPC_FP128TypeKind;
+  case Type::LabelTyID:
+    return LLVMLabelTypeKind;
+  case Type::MetadataTyID:
+    return LLVMMetadataTypeKind;
+  case Type::IntegerTyID:
+    return LLVMIntegerTypeKind;
+  case Type::FunctionTyID:
+    return LLVMFunctionTypeKind;
+  case Type::StructTyID:
+    return LLVMStructTypeKind;
+  case Type::ArrayTyID:
+    return LLVMArrayTypeKind;
+  case Type::PointerTyID:
+    return LLVMPointerTypeKind;
+  case Type::OpaqueTyID:
+    return LLVMOpaqueTypeKind;
+  case Type::VectorTyID:
+    return LLVMVectorTypeKind;
+  }
+}
+
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
+  return wrap(&unwrap(Ty)->getContext());
 }
 
 /*--.. Operations on integer types .........................................--*/
 
-LLVMTypeRef LLVMInt1Type(void)  { return (LLVMTypeRef) Type::Int1Ty;  }
-LLVMTypeRef LLVMInt8Type(void)  { return (LLVMTypeRef) Type::Int8Ty;  }
-LLVMTypeRef LLVMInt16Type(void) { return (LLVMTypeRef) Type::Int16Ty; }
-LLVMTypeRef LLVMInt32Type(void) { return (LLVMTypeRef) Type::Int32Ty; }
-LLVMTypeRef LLVMInt64Type(void) { return (LLVMTypeRef) Type::Int64Ty; }
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
+  return wrap(IntegerType::get(*unwrap(C), NumBits));
+}
 
+LLVMTypeRef LLVMInt1Type(void)  {
+  return LLVMInt1TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt8Type(void)  {
+  return LLVMInt8TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt16Type(void) {
+  return LLVMInt16TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt32Type(void) {
+  return LLVMInt32TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt64Type(void) {
+  return LLVMInt64TypeInContext(LLVMGetGlobalContext());
+}
 LLVMTypeRef LLVMIntType(unsigned NumBits) {
-  return wrap(IntegerType::get(NumBits));
+  return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
 }
 
 unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
@@ -132,11 +201,37 @@ unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
 
 /*--.. Operations on real types ............................................--*/
 
-LLVMTypeRef LLVMFloatType(void)    { return (LLVMTypeRef) Type::FloatTy;     }
-LLVMTypeRef LLVMDoubleType(void)   { return (LLVMTypeRef) Type::DoubleTy;    }
-LLVMTypeRef LLVMX86FP80Type(void)  { return (LLVMTypeRef) Type::X86_FP80Ty;  }
-LLVMTypeRef LLVMFP128Type(void)    { return (LLVMTypeRef) Type::FP128Ty;     }
-LLVMTypeRef LLVMPPCFP128Type(void) { return (LLVMTypeRef) Type::PPC_FP128Ty; }
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
+}
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
+}
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
+}
+
+LLVMTypeRef LLVMFloatType(void) {
+  return LLVMFloatTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMDoubleType(void) {
+  return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86FP80Type(void) {
+  return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFP128Type(void) {
+  return LLVMFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMPPCFP128Type(void) {
+  return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
+}
 
 /*--.. Operations on function types ........................................--*/
 
@@ -171,16 +266,23 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
 
 /*--.. Operations on struct types ..........................................--*/
 
-LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
                            unsigned ElementCount, int Packed) {
   std::vector<const Type*> Tys;
   for (LLVMTypeRef *I = ElementTypes,
                    *E = ElementTypes + ElementCount; I != E; ++I)
     Tys.push_back(unwrap(*I));
   
-  return wrap(StructType::get(Tys, Packed != 0));
+  return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
+}
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, int Packed) {
+  return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+                                 ElementCount, Packed);
 }
 
+
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->getNumElements();
 }
@@ -228,11 +330,24 @@ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
 
 /*--.. Operations on other types ...........................................--*/
 
-LLVMTypeRef LLVMVoidType(void)  { return (LLVMTypeRef) Type::VoidTy;  }
-LLVMTypeRef LLVMLabelType(void) { return (LLVMTypeRef) Type::LabelTy; }
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
+  return wrap(Type::getVoidTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
+  return wrap(Type::getLabelTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C) {
+  return wrap(OpaqueType::get(*unwrap(C)));
+}
 
+LLVMTypeRef LLVMVoidType(void)  {
+  return LLVMVoidTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMLabelType(void) {
+  return LLVMLabelTypeInContext(LLVMGetGlobalContext());
+}
 LLVMTypeRef LLVMOpaqueType(void) {
-  return wrap(llvm::OpaqueType::get());
+  return LLVMOpaqueTypeInContext(LLVMGetGlobalContext());
 }
 
 /*--.. Operations on type handles ..........................................--*/
@@ -263,7 +378,7 @@ LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
 }
 
 const char *LLVMGetValueName(LLVMValueRef Val) {
-  return unwrap(Val)->getNameStart();
+  return unwrap(Val)->getName().data();
 }
 
 void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
@@ -274,6 +389,9 @@ void LLVMDumpValue(LLVMValueRef Val) {
   unwrap(Val)->dump();
 }
 
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
+  unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
+}
 
 /*--.. Conversion functions ................................................--*/
 
@@ -284,6 +402,31 @@ void LLVMDumpValue(LLVMValueRef Val) {
 
 LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
 
+/*--.. Operations on Uses ..................................................--*/
+LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
+  Value *V = unwrap(Val);
+  Value::use_iterator I = V->use_begin();
+  if (I == V->use_end())
+    return 0;
+  return wrap(&(I.getUse()));
+}
+
+LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef UR) {
+  return wrap(unwrap(UR)->getNext());
+}
+
+LLVMValueRef LLVMGetUser(LLVMUseIteratorRef UR) {
+  return wrap(unwrap(UR)->getUser());
+}
+
+LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef UR) {
+  return wrap(unwrap(UR)->get());
+}
+
+/*--.. Operations on Users .................................................--*/
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
+  return wrap(unwrap<User>(Val)->getOperand(Index));
+}
 
 /*--.. Operations on constants of any type .................................--*/
 
@@ -313,6 +456,11 @@ int LLVMIsUndef(LLVMValueRef Val) {
   return isa<UndefValue>(unwrap(Val));
 }
 
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
+  return
+      wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
+}
+
 /*--.. Operations on scalar constants ......................................--*/
 
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -320,63 +468,84 @@ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
   return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
 }
 
-static const fltSemantics &SemanticsForType(Type *Ty) {
-  assert(Ty->isFloatingPoint() && "Type is not floating point!");
-  if (Ty == Type::FloatTy)
-    return APFloat::IEEEsingle;
-  if (Ty == Type::DoubleTy)
-    return APFloat::IEEEdouble;
-  if (Ty == Type::X86_FP80Ty)
-    return APFloat::x87DoubleExtended;
-  if (Ty == Type::FP128Ty)
-    return APFloat::IEEEquad;
-  if (Ty == Type::PPC_FP128Ty)
-    return APFloat::PPCDoubleDouble;
-  return APFloat::Bogus;
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
+                                  uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
+                                         unsigned SLen, uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
+                               Radix));
 }
 
 LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
-  APFloat APN(N);
-  bool ignored;
-  APN.convert(SemanticsForType(unwrap(RealTy)), APFloat::rmNearestTiesToEven,
-              &ignored);
-  return wrap(ConstantFP::get(APN));
+  return wrap(ConstantFP::get(unwrap(RealTy), N));
 }
 
 LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
-  return wrap(ConstantFP::get(APFloat(SemanticsForType(unwrap(RealTy)), Text)));
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
+}
+
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
+                                          unsigned SLen) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
+}
+
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
+}
+
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
 }
 
 /*--.. Operations on composite constants ...................................--*/
 
-LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
-                             int DontNullTerminate) {
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+                                      unsigned Length, int DontNullTerminate) {
   /* Inverted the sense of AddNull because ', 0)' is a
      better mnemonic for null termination than ', 1)'. */
-  return wrap(ConstantArray::get(std::string(Str, Length),
+  return wrap(ConstantArray::get(*unwrap(C), std::string(Str, Length),
                                  DontNullTerminate == 0));
 }
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, int Packed) {
+  return wrap(ConstantStruct::get(*unwrap(C),
+                                  unwrap<Constant>(ConstantVals, Count),
+                                  Count, Packed != 0));
+}
 
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+                             int DontNullTerminate) {
+  return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
+                                  DontNullTerminate);
+}
 LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length) {
   return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length),
                                  unwrap<Constant>(ConstantVals, Length),
                                  Length));
 }
-
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              int Packed) {
-  return wrap(ConstantStruct::get(unwrap<Constant>(ConstantVals, Count),
-                                  Count, Packed != 0));
+  return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
+                                  Packed);
 }
 
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
-  return wrap(ConstantVector::get(unwrap<Constant>(ScalarConstantVals, Size),
-                                  Size));
+  return wrap(ConstantVector::get(
+                            unwrap<Constant>(ScalarConstantVals, Size), Size));
 }
 
 /*--.. Constant expressions ................................................--*/
 
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
+  return (LLVMOpcode)unwrap<ConstantExpr>(ConstantVal)->getOpcode();
+}
+
 LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
   return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
 }
@@ -386,70 +555,120 @@ LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
 }
 
 LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getNeg(
+                                   unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getFNeg(
+                                    unwrap<Constant>(ConstantVal)));
 }
 
 LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getNot(
+                                   unwrap<Constant>(ConstantVal)));
 }
 
 LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getAdd(
+                                   unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWAdd(
+                                      unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFAdd(
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getSub(
+                                   unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getMul(
+                                   unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFMul(
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getUDiv(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getSDiv(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
+                                LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getExactSDiv(
+                                         unwrap<Constant>(LHSConstant),
+                                         unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getFDiv(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getURem(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getSRem(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getFRem(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getAnd(
+                                   unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getOr(
+                                  unwrap<Constant>(LHSConstant),
                                   unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getXor(
+                                   unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
@@ -468,55 +687,73 @@ LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
 }
 
 LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getShl(
+                                  unwrap<Constant>(LHSConstant),
                                   unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getLShr(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getAShr(
+                                    unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices) {
-  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getGetElementPtr(
+                                             unwrap<Constant>(ConstantVal),
                                              unwrap<Constant>(ConstantIndices, 
                                                               NumIndices),
                                              NumIndices));
 }
 
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+                                  LLVMValueRef *ConstantIndices,
+                                  unsigned NumIndices) {
+  Constant* Val = unwrap<Constant>(ConstantVal);
+  Constant** Idxs = unwrap<Constant>(ConstantIndices, NumIndices);
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, Idxs, NumIndices));
+}
+
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getTrunc(
+                                     unwrap<Constant>(ConstantVal),
                                      unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getSExt(
+                                    unwrap<Constant>(ConstantVal),
                                     unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getZExt(
+                                    unwrap<Constant>(ConstantVal),
                                     unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPTrunc(
+                                       unwrap<Constant>(ConstantVal),
                                        unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPExtend(
+                                        unwrap<Constant>(ConstantVal),
                                         unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getUIToFP(
+                                      unwrap<Constant>(ConstantVal),
                                       unwrap(ToType)));
 }
 
@@ -531,43 +768,92 @@ LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
 }
 
 LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPToSI(
+                                      unwrap<Constant>(ConstantVal),
                                       unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getPtrToInt(
+                                        unwrap<Constant>(ConstantVal),
                                         unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getIntToPtr(
+                                        unwrap<Constant>(ConstantVal),
                                         unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getBitCast(
+                                       unwrap<Constant>(ConstantVal),
                                        unwrap(ToType)));
 }
 
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExtOrBitCast(
+                                             unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExtOrBitCast(
+                                             unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+                                     LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTruncOrBitCast(
+                                              unwrap<Constant>(ConstantVal),
+                                              unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+                                  LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPointerCast(
+                                           unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+                              unsigned isSigned) {
+  return wrap(ConstantExpr::getIntegerCast(
+                                           unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType),
+                                           isSigned));
+}
+
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPCast(
+                                      unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
 LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
                              LLVMValueRef ConstantIfTrue,
                              LLVMValueRef ConstantIfFalse) {
-  return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+  return wrap(ConstantExpr::getSelect(
+                                      unwrap<Constant>(ConstantCondition),
                                       unwrap<Constant>(ConstantIfTrue),
                                       unwrap<Constant>(ConstantIfFalse)));
 }
 
 LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
                                      LLVMValueRef IndexConstant) {
-  return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+  return wrap(ConstantExpr::getExtractElement(
+                                              unwrap<Constant>(VectorConstant),
                                               unwrap<Constant>(IndexConstant)));
 }
 
 LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
                                     LLVMValueRef ElementValueConstant,
                                     LLVMValueRef IndexConstant) {
-  return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+  return wrap(ConstantExpr::getInsertElement(
+                                         unwrap<Constant>(VectorConstant),
                                          unwrap<Constant>(ElementValueConstant),
                                              unwrap<Constant>(IndexConstant)));
 }
@@ -575,29 +861,33 @@ LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
 LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
                                     LLVMValueRef VectorBConstant,
                                     LLVMValueRef MaskConstant) {
-  return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+  return wrap(ConstantExpr::getShuffleVector(
+                                             unwrap<Constant>(VectorAConstant),
                                              unwrap<Constant>(VectorBConstant),
                                              unwrap<Constant>(MaskConstant)));
 }
 
 LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
                                    unsigned NumIdx) {
-  return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+  return wrap(ConstantExpr::getExtractValue(
+                                            unwrap<Constant>(AggConstant),
                                             IdxList, NumIdx));
 }
 
 LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
                                   LLVMValueRef ElementValueConstant,
                                   unsigned *IdxList, unsigned NumIdx) {
-  return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+  return wrap(ConstantExpr::getInsertValue(
+                                         unwrap<Constant>(AggConstant),
                                          unwrap<Constant>(ElementValueConstant),
                                            IdxList, NumIdx));
 }
 
 LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, 
-                                const char *Constraints, int HasSideEffects) {
+                                const char *Constraints, int HasSideEffects,
+                                int IsMsAsm) {
   return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString, 
-                             Constraints, HasSideEffects));
+                             Constraints, HasSideEffects, IsMsAsm));
 }
 
 /*--.. Operations on global variables, functions, and aliases (globals) ....--*/
@@ -611,12 +901,97 @@ int LLVMIsDeclaration(LLVMValueRef Global) {
 }
 
 LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
-  return static_cast<LLVMLinkage>(unwrap<GlobalValue>(Global)->getLinkage());
+  switch (unwrap<GlobalValue>(Global)->getLinkage()) {
+  default:
+    assert(false && "Unhandled Linkage Type.");
+  case GlobalValue::ExternalLinkage:
+    return LLVMExternalLinkage;
+  case GlobalValue::AvailableExternallyLinkage:
+    return LLVMAvailableExternallyLinkage;
+  case GlobalValue::LinkOnceAnyLinkage:
+    return LLVMLinkOnceAnyLinkage;
+  case GlobalValue::LinkOnceODRLinkage:
+    return LLVMLinkOnceODRLinkage;
+  case GlobalValue::WeakAnyLinkage:
+    return LLVMWeakAnyLinkage;
+  case GlobalValue::WeakODRLinkage:
+    return LLVMWeakODRLinkage;
+  case GlobalValue::AppendingLinkage:
+    return LLVMAppendingLinkage;
+  case GlobalValue::InternalLinkage:
+    return LLVMInternalLinkage;
+  case GlobalValue::PrivateLinkage:
+    return LLVMPrivateLinkage;
+  case GlobalValue::LinkerPrivateLinkage:
+    return LLVMLinkerPrivateLinkage;
+  case GlobalValue::DLLImportLinkage:
+    return LLVMDLLImportLinkage;
+  case GlobalValue::DLLExportLinkage:
+    return LLVMDLLExportLinkage;
+  case GlobalValue::ExternalWeakLinkage:
+    return LLVMExternalWeakLinkage;
+  case GlobalValue::GhostLinkage:
+    return LLVMGhostLinkage;
+  case GlobalValue::CommonLinkage:
+    return LLVMCommonLinkage;
+  }
+
+  // Should never get here.
+  return static_cast<LLVMLinkage>(0);
 }
 
 void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
-  unwrap<GlobalValue>(Global)
-    ->setLinkage(static_cast<GlobalValue::LinkageTypes>(Linkage));
+  GlobalValue *GV = unwrap<GlobalValue>(Global);
+
+  switch (Linkage) {
+  default:
+    assert(false && "Unhandled Linkage Type.");
+  case LLVMExternalLinkage:
+    GV->setLinkage(GlobalValue::ExternalLinkage);
+    break;
+  case LLVMAvailableExternallyLinkage:
+    GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+    break;
+  case LLVMLinkOnceAnyLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+    break;
+  case LLVMLinkOnceODRLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    break;
+  case LLVMWeakAnyLinkage:
+    GV->setLinkage(GlobalValue::WeakAnyLinkage);
+    break;
+  case LLVMWeakODRLinkage:
+    GV->setLinkage(GlobalValue::WeakODRLinkage);
+    break;
+  case LLVMAppendingLinkage:
+    GV->setLinkage(GlobalValue::AppendingLinkage);
+    break;
+  case LLVMInternalLinkage:
+    GV->setLinkage(GlobalValue::InternalLinkage);
+    break;
+  case LLVMPrivateLinkage:
+    GV->setLinkage(GlobalValue::PrivateLinkage);
+    break;
+  case LLVMLinkerPrivateLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
+    break;
+  case LLVMDLLImportLinkage:
+    GV->setLinkage(GlobalValue::DLLImportLinkage);
+    break;
+  case LLVMDLLExportLinkage:
+    GV->setLinkage(GlobalValue::DLLExportLinkage);
+    break;
+  case LLVMExternalWeakLinkage:
+    GV->setLinkage(GlobalValue::ExternalWeakLinkage);
+    break;
+  case LLVMGhostLinkage:
+    GV->setLinkage(GlobalValue::GhostLinkage);
+    break;
+  case LLVMCommonLinkage:
+    GV->setLinkage(GlobalValue::CommonLinkage);
+    break;
+  }
 }
 
 const char *LLVMGetSection(LLVMValueRef Global) {
@@ -648,9 +1023,8 @@ void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
 /*--.. Operations on global variables ......................................--*/
 
 LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
-  return wrap(new GlobalVariable(unwrap(Ty), false,
-                                 GlobalValue::ExternalLinkage, 0, Name,
-                                 unwrap(M)));
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name));
 }
 
 LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
@@ -694,7 +1068,10 @@ void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
 }
 
 LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
-  return wrap(unwrap<GlobalVariable>(GlobalVar)->getInitializer());
+  GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
+  if ( !GV->hasInitializer() )
+    return 0;
+  return wrap(GV->getInitializer());
 }
 
 void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
@@ -785,7 +1162,8 @@ unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
 }
 
 void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
-  return unwrap<Function>(Fn)->setCallingConv(CC);
+  return unwrap<Function>(Fn)->setCallingConv(
+    static_cast<CallingConv::ID>(CC));
 }
 
 const char *LLVMGetGC(LLVMValueRef Fn) {
@@ -815,6 +1193,13 @@ void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Func->setAttributes(PALnew);
 }
 
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  Attributes attr = PAL.getFnAttributes();
+  return (LLVMAttribute)attr;
+}
+
 /*--.. Operations on parameters ............................................--*/
 
 unsigned LLVMCountParams(LLVMValueRef FnRef) {
@@ -881,6 +1266,14 @@ void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
   unwrap<Argument>(Arg)->removeAttr(PA);
 }
 
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Attributes attr = A->getParent()->getAttributes().getParamAttributes(
+    A->getArgNo()+1);
+  return (LLVMAttribute)attr;
+}
+  
+
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
   unwrap<Argument>(Arg)->addAttr(
           Attribute::constructAlignmentFromInt(align));
@@ -950,15 +1343,26 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
   return wrap(--I);
 }
 
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+                                                LLVMValueRef FnRef,
+                                                const char *Name) {
+  return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
+}
+
 LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
-  return wrap(BasicBlock::Create(Name, unwrap<Function>(FnRef)));
+  return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+                                                LLVMBasicBlockRef BBRef,
+                                                const char *Name) {
+  BasicBlock *BB = unwrap(BBRef);
+  return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
 }
 
-LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBBRef,
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
                                        const char *Name) {
-  BasicBlock *InsertBeforeBB = unwrap(InsertBeforeBBRef);
-  return wrap(BasicBlock::Create(Name, InsertBeforeBB->getParent(),
-                                 InsertBeforeBB));
+  return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
 }
 
 void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
@@ -1011,17 +1415,17 @@ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
     return CI->getCallingConv();
   else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
     return II->getCallingConv();
-  assert(0 && "LLVMGetInstructionCallConv applies only to call and invoke!");
+  llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
   return 0;
 }
 
 void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
   Value *V = unwrap(Instr);
   if (CallInst *CI = dyn_cast<CallInst>(V))
-    return CI->setCallingConv(CC);
+    return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
   else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
-    return II->setCallingConv(CC);
-  assert(0 && "LLVMSetInstructionCallConv applies only to call and invoke!");
+    return II->setCallingConv(static_cast<CallingConv::ID>(CC));
+  llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
 }
 
 void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
@@ -1080,8 +1484,12 @@ LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
 
 /*===-- Instruction builders ----------------------------------------------===*/
 
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
+  return wrap(new IRBuilder<>(*unwrap(C)));
+}
+
 LLVMBuilderRef LLVMCreateBuilder(void) {
-  return wrap(new IRBuilder<>());
+  return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
 }
 
 void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
@@ -1113,6 +1521,11 @@ void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
   unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
 }
 
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+                                   const char *Name) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
+}
+
 void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
   delete unwrap(Builder);
 }
@@ -1127,6 +1540,11 @@ LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
   return wrap(unwrap(B)->CreateRet(unwrap(V)));
 }
 
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
+                                   unsigned N) {
+  return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
+}
+
 LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
   return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
 }
@@ -1170,16 +1588,36 @@ LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
   return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name) {
   return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name) {
   return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name) {
   return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
@@ -1190,6 +1628,11 @@ LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
   return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+                                LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name) {
   return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
@@ -1244,6 +1687,10 @@ LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
   return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
 }
 
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
+}
+
 LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
   return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
 }
@@ -1292,6 +1739,28 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                    unwrap(Indices) + NumIndices, Name));
 }
 
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                  LLVMValueRef *Indices, unsigned NumIndices,
+                                  const char *Name) {
+  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), unwrap(Indices),
+                                           unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                unsigned Idx, const char *Name) {
+  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+                                   const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalString(Str, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+                                      const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
+}
+
 /*--.. Casts ...............................................................--*/
 
 LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
@@ -1354,6 +1823,39 @@ LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
   return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
 }
 
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                     LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                  LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
 /*--.. Comparisons .........................................................--*/
 
 LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
@@ -1427,6 +1929,21 @@ LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
                                            Index, Name));
 }
 
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
+                                const char *Name) {
+  return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
+                              LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+}
+
 
 /*===-- Module providers --------------------------------------------------===*/
 
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 735a70c50927..b49faf84dcea 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -23,22 +23,20 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Instructions.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
 #include <algorithm>
 using namespace llvm;
 
-namespace llvm {
-static std::ostream &operator<<(std::ostream &o,
-                                const std::set<BasicBlock*> &BBs) {
-  for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
-       I != E; ++I)
-    if (*I)
-      WriteAsOperand(o, *I, false);
-    else
-      o << " <<exit node>>";
-  return o;
-}
-}
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+bool VerifyDomInfo = true;
+#else
+bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+               cl::desc("Verify dominator info (time consuming)"));
 
 //===----------------------------------------------------------------------===//
 //  DominatorTree Implementation
@@ -61,6 +59,47 @@ bool DominatorTree::runOnFunction(Function &F) {
   return false;
 }
 
+void DominatorTree::verifyAnalysis() const {
+  if (!VerifyDomInfo) return;
+
+  Function &F = *getRoot()->getParent();
+
+  DominatorTree OtherDT;
+  OtherDT.getBase().recalculate(F);
+  assert(!compare(OtherDT) && "Invalid DominatorTree info!");
+}
+
+void DominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+// dominates - Return true if A dominates a use in B. This performs the
+// special checks necessary if A and B are in the same basic block.
+bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
+  const BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
+  
+  // If A is an invoke instruction, its value is only available in this normal
+  // successor block.
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(A))
+    BBA = II->getNormalDest();
+  
+  if (BBA != BBB) return dominates(BBA, BBB);
+  
+  // It is not possible to determine dominance between two PHI nodes 
+  // based on their ordering.
+  if (isa<PHINode>(A) && isa<PHINode>(B)) 
+    return false;
+  
+  // Loop through the basic block until we find A or B.
+  BasicBlock::const_iterator I = BBA->begin();
+  for (; &*I != A && &*I != B; ++I)
+    /*empty*/;
+  
+  return &*I == A;
+}
+
+
+
 //===----------------------------------------------------------------------===//
 //  DominanceFrontier Implementation
 //===----------------------------------------------------------------------===//
@@ -69,6 +108,17 @@ char DominanceFrontier::ID = 0;
 static RegisterPass<DominanceFrontier>
 G("domfrontier", "Dominance Frontier Construction", true, true);
 
+void DominanceFrontier::verifyAnalysis() const {
+  if (!VerifyDomInfo) return;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+
+  DominanceFrontier OtherDF;
+  const std::vector<BasicBlock*> &DTRoots = DT.getRoots();
+  OtherDF.calculate(DT, DT.getNode(DTRoots[0]));
+  assert(!compare(OtherDF) && "Invalid DominanceFrontier info!");
+}
+
 // NewBB is split and now it has one successor. Update dominace frontier to
 // reflect this change.
 void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
@@ -76,7 +126,7 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
          && "NewBB should have a single successor!");
   BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0);
 
-  std::vector<BasicBlock*> PredBlocks;
+  SmallVector<BasicBlock*, 8> PredBlocks;
   for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB);
        PI != PE; ++PI)
       PredBlocks.push_back(*PI);  
@@ -153,7 +203,7 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
     // Verify whether this block dominates a block in predblocks.  If not, do
     // not update it.
     bool BlockDominatesAny = false;
-    for (std::vector<BasicBlock*>::const_iterator BI = PredBlocks.begin(), 
+    for (SmallVectorImpl<BasicBlock*>::const_iterator BI = PredBlocks.begin(), 
            BE = PredBlocks.end(); BI != BE; ++BI) {
       if (DT.dominates(FI, *BI)) {
         BlockDominatesAny = true;
@@ -270,18 +320,24 @@ DominanceFrontier::calculate(const DominatorTree &DT,
   return *Result;
 }
 
-void DominanceFrontierBase::print(std::ostream &o, const Module* ) const {
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    o << "  DomFrontier for BB";
+    OS << "  DomFrontier for BB";
     if (I->first)
-      WriteAsOperand(o, I->first, false);
+      WriteAsOperand(OS, I->first, false);
     else
-      o << " <<exit node>>";
-    o << " is:\t" << I->second << "\n";
+      OS << " <<exit node>>";
+    OS << " is:\t";
+    
+    const std::set<BasicBlock*> &BBs = I->second;
+    
+    for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+         I != E; ++I)
+      if (*I)
+        WriteAsOperand(OS, *I, false);
+      else
+        OS << " <<exit node>>";
+    OS << "\n";
   }
 }
 
-void DominanceFrontierBase::dump() {
-  print (llvm::cerr);
-}
-
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index eeade051ac53..8ad885c4c23d 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Module.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -35,7 +36,7 @@ template class SymbolTableListTraits<BasicBlock, Function>;
 // Argument Implementation
 //===----------------------------------------------------------------------===//
 
-Argument::Argument(const Type *Ty, const std::string &Name, Function *Par)
+Argument::Argument(const Type *Ty, const Twine &Name, Function *Par)
   : Value(Ty, Value::ArgumentVal) {
   Parent = 0;
 
@@ -114,10 +115,8 @@ void Argument::removeAttr(Attributes attr) {
 // Helper Methods in Function
 //===----------------------------------------------------------------------===//
 
-LLVMContext* Function::getContext() {
-  Module* M = getParent();
-  if (M) return &M->getContext();
-  return 0;
+LLVMContext &Function::getContext() const {
+  return getType()->getContext();
 }
 
 const FunctionType *Function::getFunctionType() const {
@@ -145,7 +144,7 @@ void Function::eraseFromParent() {
 //===----------------------------------------------------------------------===//
 
 Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
-                   const std::string &name, Module *ParentModule)
+                   const Twine &name, Module *ParentModule)
   : GlobalValue(PointerType::getUnqual(Ty), 
                 Value::FunctionVal, 0, 0, Linkage, name) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
@@ -183,7 +182,7 @@ void Function::BuildLazyArguments() const {
   // Create the arguments vector, all arguments start out unnamed.
   const FunctionType *FT = getFunctionType();
   for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
-    assert(FT->getParamType(i) != Type::VoidTy &&
+    assert(FT->getParamType(i) != Type::getVoidTy(FT->getContext()) &&
            "Cannot have void typed arguments!");
     ArgumentList.push_back(new Argument(FT->getParamType(i)));
   }
@@ -242,18 +241,18 @@ static StringPool *GCNamePool;
 static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
 
 bool Function::hasGC() const {
-  sys::SmartScopedReader<true> Reader(&*GCLock);
+  sys::SmartScopedReader<true> Reader(*GCLock);
   return GCNames && GCNames->count(this);
 }
 
 const char *Function::getGC() const {
   assert(hasGC() && "Function has no collector");
-  sys::SmartScopedReader<true> Reader(&*GCLock);
+  sys::SmartScopedReader<true> Reader(*GCLock);
   return *(*GCNames)[this];
 }
 
 void Function::setGC(const char *Str) {
-  sys::SmartScopedWriter<true> Writer(&*GCLock);
+  sys::SmartScopedWriter<true> Writer(*GCLock);
   if (!GCNamePool)
     GCNamePool = new StringPool();
   if (!GCNames)
@@ -262,7 +261,7 @@ void Function::setGC(const char *Str) {
 }
 
 void Function::clearGC() {
-  sys::SmartScopedWriter<true> Writer(&*GCLock);
+  sys::SmartScopedWriter<true> Writer(*GCLock);
   if (GCNames) {
     GCNames->erase(this);
     if (GCNames->empty()) {
@@ -328,15 +327,16 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) {
   for (unsigned i = 0; i < numTys; ++i) {
     if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
       Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + 
-                MVT::getMVT(PTyp->getElementType()).getMVTString();
+                EVT::getEVT(PTyp->getElementType()).getEVTString();
     }
     else if (Tys[i])
-      Result += "." + MVT::getMVT(Tys[i]).getMVTString();
+      Result += "." + EVT::getEVT(Tys[i]).getEVTString();
   }
   return Result;
 }
 
-const FunctionType *Intrinsic::getType(ID id, const Type **Tys, 
+const FunctionType *Intrinsic::getType(LLVMContext &Context,
+                                       ID id, const Type **Tys, 
                                        unsigned numTys) {
   const Type *ResultTy = NULL;
   std::vector<const Type*> ArgTys;
@@ -370,7 +370,8 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys,
   // because intrinsics must be a specific type.
   return
     cast<Function>(M->getOrInsertFunction(getName(id, Tys, numTys),
-                                          getType(id, Tys, numTys)));
+                                          getType(M->getContext(),
+                                                  id, Tys, numTys)));
 }
 
 // This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 5abe1f9ac40d..d18a20162dd9 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -16,8 +16,10 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 using namespace llvm;
 
@@ -76,8 +78,7 @@ void GlobalValue::removeDeadConstantUsers() const {
 /// Override destroyConstant to make sure it doesn't get called on
 /// GlobalValue's because they shouldn't be treated like other constants.
 void GlobalValue::destroyConstant() {
-  assert(0 && "You can't GV->destroyConstant()!");
-  abort();
+  llvm_unreachable("You can't GV->destroyConstant()!");
 }
 
 /// copyAttributesFrom - copy all additional attributes (those not needed to
@@ -93,11 +94,12 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
 // GlobalVariable Implementation
 //===----------------------------------------------------------------------===//
 
-GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
-                               Constant *InitVal, const std::string &Name,
-                               Module *ParentModule, bool ThreadLocal, 
-                               unsigned AddressSpace)
-  : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+GlobalVariable::GlobalVariable(LLVMContext &Context, const Type *Ty,
+                               bool constant, LinkageTypes Link,
+                               Constant *InitVal, const Twine &Name,
+                               bool ThreadLocal, unsigned AddressSpace)
+  : GlobalValue(PointerType::get(Ty, AddressSpace), 
+                Value::GlobalVariableVal,
                 OperandTraits<GlobalVariable>::op_begin(this),
                 InitVal != 0, Link, Name),
     isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
@@ -108,16 +110,15 @@ GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
   }
 
   LeakDetector::addGarbageObject(this);
-
-  if (ParentModule)
-    ParentModule->getGlobalList().push_back(this);
 }
 
-GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
-                               Constant *InitVal, const std::string &Name,
+GlobalVariable::GlobalVariable(Module &M, const Type *Ty, bool constant,
+                               LinkageTypes Link, Constant *InitVal,
+                               const Twine &Name,
                                GlobalVariable *Before, bool ThreadLocal,
                                unsigned AddressSpace)
-  : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+  : GlobalValue(PointerType::get(Ty, AddressSpace), 
+                Value::GlobalVariableVal,
                 OperandTraits<GlobalVariable>::op_begin(this),
                 InitVal != 0, Link, Name),
     isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
@@ -131,6 +132,8 @@ GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
   
   if (Before)
     Before->getParent()->getGlobalList().insert(Before, this);
+  else
+    M.getGlobalList().push_back(this);
 }
 
 void GlobalVariable::setParent(Module *parent) {
@@ -184,7 +187,7 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
 //===----------------------------------------------------------------------===//
 
 GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link,
-                         const std::string &Name, Constant* aliasee,
+                         const Twine &Name, Constant* aliasee,
                          Module *ParentModule)
   : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
   LeakDetector::addGarbageObject(this);
@@ -242,7 +245,7 @@ const GlobalValue *GlobalAlias::getAliasedGlobal() const {
            CE->getOpcode() == Instruction::GetElementPtr))
         return dyn_cast<GlobalValue>(CE->getOperand(0));
       else
-        assert(0 && "Unsupported aliasee");
+        llvm_unreachable("Unsupported aliasee");
     }
   }
   return 0;
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 524e294ab75f..0520dfa17ced 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -26,18 +26,20 @@ InlineAsm::~InlineAsm() {
 // NOTE: when memoizing the function type, we have to be careful to handle the
 // case when the type gets refined.
 
-InlineAsm *InlineAsm::get(const FunctionType *Ty, const std::string &AsmString,
-                          const std::string &Constraints, bool hasSideEffects) {
+InlineAsm *InlineAsm::get(const FunctionType *Ty, const StringRef &AsmString,
+                          const StringRef &Constraints, bool hasSideEffects,
+                          bool isMsAsm) {
   // FIXME: memoize!
-  return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects);  
+  return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects, isMsAsm);  
 }
 
-InlineAsm::InlineAsm(const FunctionType *Ty, const std::string &asmString,
-                     const std::string &constraints, bool hasSideEffects)
+InlineAsm::InlineAsm(const FunctionType *Ty, const StringRef &asmString,
+                     const StringRef &constraints, bool hasSideEffects,
+                     bool isMsAsm)
   : Value(PointerType::getUnqual(Ty), 
           Value::InlineAsmVal), 
     AsmString(asmString), 
-    Constraints(constraints), HasSideEffects(hasSideEffects) {
+    Constraints(constraints), HasSideEffects(hasSideEffects), IsMsAsm(isMsAsm) {
 
   // Do various checks on the constraint string and type.
   assert(Verify(Ty, constraints) && "Function type not legal for constraints!");
@@ -50,9 +52,9 @@ const FunctionType *InlineAsm::getFunctionType() const {
 /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
 /// fields in this structure.  If the constraint string is not understood,
 /// return true, otherwise return false.
-bool InlineAsm::ConstraintInfo::Parse(const std::string &Str,
+bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str,
                      std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) {
-  std::string::const_iterator I = Str.begin(), E = Str.end();
+  StringRef::iterator I = Str.begin(), E = Str.end();
   
   // Initialize
   Type = isInput;
@@ -111,13 +113,13 @@ bool InlineAsm::ConstraintInfo::Parse(const std::string &Str,
   while (I != E) {
     if (*I == '{') {   // Physical register reference.
       // Find the end of the register name.
-      std::string::const_iterator ConstraintEnd = std::find(I+1, E, '}');
+      StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
       if (ConstraintEnd == E) return true;  // "{foo"
       Codes.push_back(std::string(I, ConstraintEnd+1));
       I = ConstraintEnd+1;
     } else if (isdigit(*I)) {     // Matching Constraint
       // Maximal munch numbers.
-      std::string::const_iterator NumStart = I;
+      StringRef::iterator NumStart = I;
       while (I != E && isdigit(*I))
         ++I;
       Codes.push_back(std::string(NumStart, I));
@@ -145,16 +147,16 @@ bool InlineAsm::ConstraintInfo::Parse(const std::string &Str,
 }
 
 std::vector<InlineAsm::ConstraintInfo>
-InlineAsm::ParseConstraints(const std::string &Constraints) {
+InlineAsm::ParseConstraints(const StringRef &Constraints) {
   std::vector<ConstraintInfo> Result;
   
   // Scan the constraints string.
-  for (std::string::const_iterator I = Constraints.begin(), 
-       E = Constraints.end(); I != E; ) {
+  for (StringRef::iterator I = Constraints.begin(),
+         E = Constraints.end(); I != E; ) {
     ConstraintInfo Info;
 
     // Find the end of this constraint.
-    std::string::const_iterator ConstraintEnd = std::find(I, E, ',');
+    StringRef::iterator ConstraintEnd = std::find(I, E, ',');
 
     if (ConstraintEnd == I ||  // Empty constraint like ",,"
         Info.Parse(std::string(I, ConstraintEnd), Result)) {
@@ -179,7 +181,7 @@ InlineAsm::ParseConstraints(const std::string &Constraints) {
 
 /// Verify - Verify that the specified constraint string is reasonable for the
 /// specified function type, and otherwise validate the constraint string.
-bool InlineAsm::Verify(const FunctionType *Ty, const std::string &ConstStr) {
+bool InlineAsm::Verify(const FunctionType *Ty, const StringRef &ConstStr) {
   if (Ty->isVarArg()) return false;
   
   std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr);
@@ -213,7 +215,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, const std::string &ConstStr) {
   
   switch (NumOutputs) {
   case 0:
-    if (Ty->getReturnType() != Type::VoidTy) return false;
+    if (Ty->getReturnType() != Type::getVoidTy(Ty->getContext())) return false;
     break;
   case 1:
     if (isa<StructType>(Ty->getReturnType())) return false;
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index e0764e494b2e..4df536e68b4e 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -11,9 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/Type.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/LeakDetector.h"
 using namespace llvm;
@@ -47,6 +51,10 @@ Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
 // Out of line virtual method, so the vtable, etc has a home.
 Instruction::~Instruction() {
   assert(Parent == 0 && "Instruction still linked in the program!");
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsDeleted(this);
+  }
 }
 
 
@@ -143,8 +151,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   // Other instructions...
   case ICmp:           return "icmp";
   case FCmp:           return "fcmp";
-  case VICmp:          return "vicmp";
-  case VFCmp:          return "vfcmp";
   case PHI:            return "phi";
   case Select:         return "select";
   case Call:           return "call";
@@ -168,6 +174,14 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
 /// identical to the current one.  This means that all operands match and any
 /// extra information (e.g. load is volatile) agree.
 bool Instruction::isIdenticalTo(const Instruction *I) const {
+  return isIdenticalToWhenDefined(I) &&
+         SubclassOptionalData == I->SubclassOptionalData;
+}
+
+/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+/// ignores the SubclassOptionalData flags, which specify conditions
+/// under which the instruction's result is undefined.
+bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
   if (getOpcode() != I->getOpcode() ||
       getNumOperands() != I->getNumOperands() ||
       getType() != I->getType())
@@ -283,11 +297,11 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
         return true;
       continue;
     }
-    
+
     if (PN->getIncomingBlock(UI) != BB)
       return true;
   }
-  return false;    
+  return false;
 }
 
 /// mayReadFromMemory - Return true if this instruction may read memory.
@@ -367,23 +381,77 @@ bool Instruction::isCommutative(unsigned op) {
   }
 }
 
-/// isTrapping - Return true if the instruction may trap.
-///
-bool Instruction::isTrapping(unsigned op) {
-  switch(op) {
+// Code here matches isMalloc from MallocHelper, which is not in VMCore.
+static bool isMalloc(const Value* I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI) {
+    const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+    if (!BCI) return false;
+
+    CI = dyn_cast<CallInst>(BCI->getOperand(0));
+  }
+
+  if (!CI) return false;
+
+  const Module* M = CI->getParent()->getParent()->getParent();
+  Constant *MallocFunc = M->getFunction("malloc");
+
+  if (CI->getOperand(0) != MallocFunc)
+    return false;
+
+  return true;
+}
+
+bool Instruction::isSafeToSpeculativelyExecute() const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (Constant *C = dyn_cast<Constant>(getOperand(i)))
+      if (C->canTrap())
+        return false;
+
+  switch (getOpcode()) {
+  default:
+    return true;
   case UDiv:
+  case URem: {
+    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+    return Op && !Op->isNullValue();
+  }
   case SDiv:
-  case FDiv:
-  case URem:
-  case SRem:
-  case FRem:
-  case Load:
-  case Store:
+  case SRem: {
+    // x / y is undefined if y == 0, and might be undefined if y == -1,
+    // but calcuations like x / 3 are safe.
+    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+    return Op && !Op->isNullValue() && !Op->isAllOnesValue();
+  }
+  case Load: {
+    if (cast<LoadInst>(this)->isVolatile())
+      return false;
+    if (isa<AllocationInst>(getOperand(0)) || isMalloc(getOperand(0)))
+      return true;
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0)))
+      return !GV->hasExternalWeakLinkage();
+    // FIXME: Handle cases involving GEPs.  We have to be careful because
+    // a load of a out-of-bounds GEP has undefined behavior.
+    return false;
+  }
   case Call:
-  case Invoke:
+    return false; // The called function could have undefined behavior or
+                  // side-effects.
+                  // FIXME: We should special-case some intrinsics (bswap,
+                  // overflow-checking arithmetic, etc.)
   case VAArg:
-    return true;
-  default:
-    return false;
+  case Alloca:
+  case Malloc:
+  case Invoke:
+  case PHI:
+  case Store:
+  case Free:
+  case Ret:
+  case Br:
+  case Switch:
+  case Unwind:
+  case Unreachable:
+    return false; // Misc instructions which have effects
   }
 }
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 6a6424d39dd2..f3d15cb2b88b 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -12,13 +12,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/MathExtras.h"
+
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -43,10 +49,10 @@ CallSite::CallSite(Instruction *C) {
   I.setPointer(C);
   I.setInt(isa<CallInst>(C));
 }
-unsigned CallSite::getCallingConv() const {
+CallingConv::ID CallSite::getCallingConv() const {
   CALLSITE_DELEGATE_GETTER(getCallingConv());
 }
-void CallSite::setCallingConv(unsigned CC) {
+void CallSite::setCallingConv(CallingConv::ID CC) {
   CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
 }
 const AttrListPtr &CallSite::getAttributes() const {
@@ -124,7 +130,7 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
   
   if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
     // Vector select.
-    if (VT->getElementType() != Type::Int1Ty)
+    if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
       return "vector select condition element type must be i1";
     const VectorType *ET = dyn_cast<VectorType>(Op1->getType());
     if (ET == 0)
@@ -132,7 +138,7 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
     if (ET->getNumElements() != VT->getNumElements())
       return "vector select requires selected vectors to have "
                    "the same vector length as select condition";
-  } else if (Op0->getType() != Type::Int1Ty) {
+  } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
     return "select condition must be i1 or <n x i1>";
   }
   return 0;
@@ -152,6 +158,7 @@ PHINode::PHINode(const PHINode &PN)
     OL[i] = PN.getOperand(i);
     OL[i+1] = PN.getOperand(i+1);
   }
+  SubclassOptionalData = PN.SubclassOptionalData;
 }
 
 PHINode::~PHINode() {
@@ -223,13 +230,17 @@ void PHINode::resizeOperands(unsigned NumOps) {
 /// hasConstantValue - If the specified PHI node always merges together the same
 /// value, return the value, otherwise return null.
 ///
-Value *PHINode::hasConstantValue(bool AllowNonDominatingInstruction) const {
-  // If the PHI node only has one incoming value, eliminate the PHI node...
+/// If the PHI has undef operands, but all the rest of the operands are
+/// some unique value, return that value if it can be proved that the
+/// value dominates the PHI. If DT is null, use a conservative check,
+/// otherwise use DT to test for dominance.
+///
+Value *PHINode::hasConstantValue(DominatorTree *DT) const {
+  // If the PHI node only has one incoming value, eliminate the PHI node.
   if (getNumIncomingValues() == 1) {
     if (getIncomingValue(0) != this)   // not  X = phi X
       return getIncomingValue(0);
-    else
-      return UndefValue::get(getType());  // Self cycle is dead.
+    return UndefValue::get(getType());  // Self cycle is dead.
   }
       
   // Otherwise if all of the incoming values are the same for the PHI, replace
@@ -243,8 +254,7 @@ Value *PHINode::hasConstantValue(bool AllowNonDominatingInstruction) const {
     } else if (getIncomingValue(i) != this) { // Not the PHI node itself...
       if (InVal && getIncomingValue(i) != InVal)
         return 0;  // Not the same, bail out.
-      else
-        InVal = getIncomingValue(i);
+      InVal = getIncomingValue(i);
     }
   
   // The only case that could cause InVal to be null is if we have a PHI node
@@ -257,12 +267,20 @@ Value *PHINode::hasConstantValue(bool AllowNonDominatingInstruction) const {
   // instruction, we cannot always return X as the result of the PHI node.  Only
   // do this if X is not an instruction (thus it must dominate the PHI block),
   // or if the client is prepared to deal with this possibility.
-  if (HasUndefInput && !AllowNonDominatingInstruction)
-    if (Instruction *IV = dyn_cast<Instruction>(InVal))
-      // If it's in the entry block, it dominates everything.
-      if (IV->getParent() != &IV->getParent()->getParent()->getEntryBlock() ||
-          isa<InvokeInst>(IV))
-        return 0;   // Cannot guarantee that InVal dominates this PHINode.
+  if (!HasUndefInput || !isa<Instruction>(InVal))
+    return InVal;
+  
+  Instruction *IV = cast<Instruction>(InVal);
+  if (DT) {
+    // We have a DominatorTree. Do a precise test.
+    if (!DT->dominates(IV, this))
+      return 0;
+  } else {
+    // If it is in the entry block, it obviously dominates everything.
+    if (IV->getParent() != &IV->getParent()->getParent()->getEntryBlock() ||
+        isa<InvokeInst>(IV))
+      return 0;   // Cannot guarantee that InVal dominates this PHINode.
+  }
 
   // All of the incoming values are the same, return the value now.
   return InVal;
@@ -348,7 +366,7 @@ void CallInst::init(Value *Func) {
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
 }
 
-CallInst::CallInst(Value *Func, Value* Actual, const std::string &Name,
+CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
                    Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
@@ -359,7 +377,7 @@ CallInst::CallInst(Value *Func, Value* Actual, const std::string &Name,
   setName(Name);
 }
 
-CallInst::CallInst(Value *Func, Value* Actual, const std::string &Name,
+CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
                    BasicBlock  *InsertAtEnd)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
@@ -369,7 +387,7 @@ CallInst::CallInst(Value *Func, Value* Actual, const std::string &Name,
   init(Func, Actual);
   setName(Name);
 }
-CallInst::CallInst(Value *Func, const std::string &Name,
+CallInst::CallInst(Value *Func, const Twine &Name,
                    Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
@@ -380,7 +398,7 @@ CallInst::CallInst(Value *Func, const std::string &Name,
   setName(Name);
 }
 
-CallInst::CallInst(Value *Func, const std::string &Name,
+CallInst::CallInst(Value *Func, const Twine &Name,
                    BasicBlock *InsertAtEnd)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
@@ -401,6 +419,7 @@ CallInst::CallInst(const CallInst &CI)
   Use *InOL = CI.OperandList;
   for (unsigned i = 0, e = CI.getNumOperands(); i != e; ++i)
     OL[i] = InOL[i];
+  SubclassOptionalData = CI.SubclassOptionalData;
 }
 
 void CallInst::addAttribute(unsigned i, Attributes attr) {
@@ -423,6 +442,111 @@ bool CallInst::paramHasAttr(unsigned i, Attributes attr) const {
   return false;
 }
 
+/// IsConstantOne - Return true only if val is constant int 1
+static bool IsConstantOne(Value *val) {
+  assert(val && "IsConstantOne does not work with NULL val");
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Value *checkArraySize(Value *Amt, const Type *IntPtrTy) {
+  if (!Amt)
+    Amt = ConstantInt::get(IntPtrTy, 1);
+  else {
+    assert(!isa<BasicBlock>(Amt) &&
+           "Passed basic block into malloc size parameter! Use other ctor");
+    assert(Amt->getType() == IntPtrTy &&
+           "Malloc array size is not an intptr!");
+  }
+  return Amt;
+}
+
+static Value *createMalloc(Instruction *InsertBefore, BasicBlock *InsertAtEnd,
+                           const Type *IntPtrTy, const Type *AllocTy,
+                           Value *ArraySize, const Twine &NameStr) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createMalloc needs either InsertBefore or InsertAtEnd");
+
+  // malloc(type) becomes: 
+  //       bitcast (i8* malloc(typeSize)) to type*
+  // malloc(type, arraySize) becomes:
+  //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
+  Value *AllocSize = ConstantExpr::getSizeOf(AllocTy);
+  AllocSize = ConstantExpr::getTruncOrBitCast(cast<Constant>(AllocSize),
+                                              IntPtrTy);
+  ArraySize = checkArraySize(ArraySize, IntPtrTy);
+
+  if (!IsConstantOne(ArraySize)) {
+    if (IsConstantOne(AllocSize)) {
+      AllocSize = ArraySize;         // Operand * 1 = Operand
+    } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
+      Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
+                                                     false /*ZExt*/);
+      // Malloc arg is constant product of type size and array size
+      AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
+    } else {
+      // Multiply type size by the array size...
+      if (InsertBefore)
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertBefore);
+      else
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertAtEnd);
+    }
+  }
+
+  assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+  // Create the call to Malloc.
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+  const Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+  // prototype malloc as "void *malloc(size_t)"
+  Constant *MallocF = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+  if (!cast<Function>(MallocF)->doesNotAlias(0))
+    cast<Function>(MallocF)->setDoesNotAlias(0);
+  const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+  CallInst *MCall = NULL;
+  Value    *MCast = NULL;
+  if (InsertBefore) {
+    MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertBefore);
+    // Create a cast instruction to convert to the right type...
+    MCast = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore);
+  } else {
+    MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertAtEnd);
+    // Create a cast instruction to convert to the right type...
+    MCast = new BitCastInst(MCall, AllocPtrType, NameStr);
+  }
+  MCall->setTailCall();
+  assert(MCall->getType() != Type::getVoidTy(BB->getContext()) &&
+         "Malloc has void return type");
+
+  return MCast;
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+Value *CallInst::CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy,
+                              const Type *AllocTy, Value *ArraySize,
+                              const Twine &Name) {
+  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, ArraySize, Name);
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+/// Note: This function does not add the bitcast to the basic block, that is the
+/// responsibility of the caller.
+Value *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy,
+                              const Type *AllocTy, Value *ArraySize, 
+                              const Twine &Name) {
+  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, ArraySize, Name);
+}
 
 //===----------------------------------------------------------------------===//
 //                        InvokeInst Implementation
@@ -462,6 +586,7 @@ InvokeInst::InvokeInst(const InvokeInst &II)
   Use *OL = OperandList, *InOL = II.OperandList;
   for (unsigned i = 0, e = II.getNumOperands(); i != e; ++i)
     OL[i] = InOL[i];
+  SubclassOptionalData = II.SubclassOptionalData;
 }
 
 BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
@@ -500,30 +625,31 @@ void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
 //===----------------------------------------------------------------------===//
 
 ReturnInst::ReturnInst(const ReturnInst &RI)
-  : TerminatorInst(Type::VoidTy, Instruction::Ret,
+  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
                    OperandTraits<ReturnInst>::op_end(this) -
                      RI.getNumOperands(),
                    RI.getNumOperands()) {
   if (RI.getNumOperands())
     Op<0>() = RI.Op<0>();
+  SubclassOptionalData = RI.SubclassOptionalData;
 }
 
-ReturnInst::ReturnInst(Value *retVal, Instruction *InsertBefore)
-  : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
                    OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
                    InsertBefore) {
   if (retVal)
     Op<0>() = retVal;
 }
-ReturnInst::ReturnInst(Value *retVal, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
                    OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
                    InsertAtEnd) {
   if (retVal)
     Op<0>() = retVal;
 }
-ReturnInst::ReturnInst(BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
                    OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
 }
 
@@ -534,12 +660,11 @@ unsigned ReturnInst::getNumSuccessorsV() const {
 /// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
 /// emit the vtable for the class in this translation unit.
 void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  assert(0 && "ReturnInst has no successors!");
+  llvm_unreachable("ReturnInst has no successors!");
 }
 
 BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
-  assert(0 && "ReturnInst has no successors!");
-  abort();
+  llvm_unreachable("ReturnInst has no successors!");
   return 0;
 }
 
@@ -550,11 +675,13 @@ ReturnInst::~ReturnInst() {
 //                        UnwindInst Implementation
 //===----------------------------------------------------------------------===//
 
-UnwindInst::UnwindInst(Instruction *InsertBefore)
-  : TerminatorInst(Type::VoidTy, Instruction::Unwind, 0, 0, InsertBefore) {
+UnwindInst::UnwindInst(LLVMContext &Context, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+                   0, 0, InsertBefore) {
 }
-UnwindInst::UnwindInst(BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Unwind, 0, 0, InsertAtEnd) {
+UnwindInst::UnwindInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+                   0, 0, InsertAtEnd) {
 }
 
 
@@ -563,12 +690,11 @@ unsigned UnwindInst::getNumSuccessorsV() const {
 }
 
 void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  assert(0 && "UnwindInst has no successors!");
+  llvm_unreachable("UnwindInst has no successors!");
 }
 
 BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
-  assert(0 && "UnwindInst has no successors!");
-  abort();
+  llvm_unreachable("UnwindInst has no successors!");
   return 0;
 }
 
@@ -576,11 +702,14 @@ BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
 //                      UnreachableInst Implementation
 //===----------------------------------------------------------------------===//
 
-UnreachableInst::UnreachableInst(Instruction *InsertBefore)
-  : TerminatorInst(Type::VoidTy, Instruction::Unreachable, 0, 0, InsertBefore) {
+UnreachableInst::UnreachableInst(LLVMContext &Context, 
+                                 Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertBefore) {
 }
-UnreachableInst::UnreachableInst(BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Unreachable, 0, 0, InsertAtEnd) {
+UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertAtEnd) {
 }
 
 unsigned UnreachableInst::getNumSuccessorsV() const {
@@ -588,12 +717,11 @@ unsigned UnreachableInst::getNumSuccessorsV() const {
 }
 
 void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  assert(0 && "UnwindInst has no successors!");
+  llvm_unreachable("UnwindInst has no successors!");
 }
 
 BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
-  assert(0 && "UnwindInst has no successors!");
-  abort();
+  llvm_unreachable("UnwindInst has no successors!");
   return 0;
 }
 
@@ -603,12 +731,12 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
 
 void BranchInst::AssertOK() {
   if (isConditional())
-    assert(getCondition()->getType() == Type::Int1Ty &&
+    assert(getCondition()->getType() == Type::getInt1Ty(getContext()) &&
            "May only branch on boolean predicates!");
 }
 
 BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
-  : TerminatorInst(Type::VoidTy, Instruction::Br,
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
                    OperandTraits<BranchInst>::op_end(this) - 1,
                    1, InsertBefore) {
   assert(IfTrue != 0 && "Branch destination may not be null!");
@@ -616,7 +744,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
 }
 BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
                        Instruction *InsertBefore)
-  : TerminatorInst(Type::VoidTy, Instruction::Br,
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
                    OperandTraits<BranchInst>::op_end(this) - 3,
                    3, InsertBefore) {
   Op<-1>() = IfTrue;
@@ -628,7 +756,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
 }
 
 BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Br,
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
                    OperandTraits<BranchInst>::op_end(this) - 1,
                    1, InsertAtEnd) {
   assert(IfTrue != 0 && "Branch destination may not be null!");
@@ -637,7 +765,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
 
 BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
            BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Br,
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
                    OperandTraits<BranchInst>::op_end(this) - 3,
                    3, InsertAtEnd) {
   Op<-1>() = IfTrue;
@@ -650,7 +778,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
 
 
 BranchInst::BranchInst(const BranchInst &BI) :
-  TerminatorInst(Type::VoidTy, Instruction::Br,
+  TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
                  OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
                  BI.getNumOperands()) {
   Op<-1>() = BI.Op<-1>();
@@ -659,6 +787,7 @@ BranchInst::BranchInst(const BranchInst &BI) :
     Op<-3>() = BI.Op<-3>();
     Op<-2>() = BI.Op<-2>();
   }
+  SubclassOptionalData = BI.SubclassOptionalData;
 }
 
 
@@ -701,35 +830,35 @@ void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 //                        AllocationInst Implementation
 //===----------------------------------------------------------------------===//
 
-static Value *getAISize(Value *Amt) {
+static Value *getAISize(LLVMContext &Context, Value *Amt) {
   if (!Amt)
-    Amt = ConstantInt::get(Type::Int32Ty, 1);
+    Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
   else {
     assert(!isa<BasicBlock>(Amt) &&
            "Passed basic block into allocation size parameter! Use other ctor");
-    assert(Amt->getType() == Type::Int32Ty &&
+    assert(Amt->getType() == Type::getInt32Ty(Context) &&
            "Malloc/Allocation array size is not a 32-bit integer!");
   }
   return Amt;
 }
 
 AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
-                               unsigned Align, const std::string &Name,
+                               unsigned Align, const Twine &Name,
                                Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), iTy, getAISize(ArraySize),
-                     InsertBefore) {
+  : UnaryInstruction(PointerType::getUnqual(Ty), iTy,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
   setAlignment(Align);
-  assert(Ty != Type::VoidTy && "Cannot allocate void!");
+  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
   setName(Name);
 }
 
 AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
-                               unsigned Align, const std::string &Name,
+                               unsigned Align, const Twine &Name,
                                BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), iTy, getAISize(ArraySize),
-                     InsertAtEnd) {
+  : UnaryInstruction(PointerType::getUnqual(Ty), iTy,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
   setAlignment(Align);
-  assert(Ty != Type::VoidTy && "Cannot allocate void!");
+  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -753,11 +882,6 @@ const Type *AllocationInst::getAllocatedType() const {
   return getType()->getElementType();
 }
 
-AllocaInst::AllocaInst(const AllocaInst &AI)
-  : AllocationInst(AI.getType()->getElementType(), (Value*)AI.getOperand(0),
-                   Instruction::Alloca, AI.getAlignment()) {
-}
-
 /// isStaticAlloca - Return true if this alloca is in the entry block of the
 /// function and is a constant size.  If so, the code generator will fold it
 /// into the prolog/epilog code, so it is basically free.
@@ -770,11 +894,6 @@ bool AllocaInst::isStaticAlloca() const {
   return Parent == &Parent->getParent()->front();
 }
 
-MallocInst::MallocInst(const MallocInst &MI)
-  : AllocationInst(MI.getType()->getElementType(), (Value*)MI.getOperand(0),
-                   Instruction::Malloc, MI.getAlignment()) {
-}
-
 //===----------------------------------------------------------------------===//
 //                             FreeInst Implementation
 //===----------------------------------------------------------------------===//
@@ -785,12 +904,14 @@ void FreeInst::AssertOK() {
 }
 
 FreeInst::FreeInst(Value *Ptr, Instruction *InsertBefore)
-  : UnaryInstruction(Type::VoidTy, Free, Ptr, InsertBefore) {
+  : UnaryInstruction(Type::getVoidTy(Ptr->getContext()),
+                     Free, Ptr, InsertBefore) {
   AssertOK();
 }
 
 FreeInst::FreeInst(Value *Ptr, BasicBlock *InsertAtEnd)
-  : UnaryInstruction(Type::VoidTy, Free, Ptr, InsertAtEnd) {
+  : UnaryInstruction(Type::getVoidTy(Ptr->getContext()),
+                     Free, Ptr, InsertAtEnd) {
   AssertOK();
 }
 
@@ -804,7 +925,7 @@ void LoadInst::AssertOK() {
          "Ptr must have pointer type.");
 }
 
-LoadInst::LoadInst(Value *Ptr, const std::string &Name, Instruction *InsertBef)
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
   : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
                      Load, Ptr, InsertBef) {
   setVolatile(false);
@@ -813,7 +934,7 @@ LoadInst::LoadInst(Value *Ptr, const std::string &Name, Instruction *InsertBef)
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const std::string &Name, BasicBlock *InsertAE)
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
   : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
                      Load, Ptr, InsertAE) {
   setVolatile(false);
@@ -822,7 +943,7 @@ LoadInst::LoadInst(Value *Ptr, const std::string &Name, BasicBlock *InsertAE)
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
                    Instruction *InsertBef)
   : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
                      Load, Ptr, InsertBef) {
@@ -832,7 +953,7 @@ LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile, 
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
                    unsigned Align, Instruction *InsertBef)
   : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
                      Load, Ptr, InsertBef) {
@@ -842,7 +963,7 @@ LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile, 
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
                    unsigned Align, BasicBlock *InsertAE)
   : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
                      Load, Ptr, InsertAE) {
@@ -852,7 +973,7 @@ LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
                    BasicBlock *InsertAE)
   : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
                      Load, Ptr, InsertAE) {
@@ -922,7 +1043,7 @@ void StoreInst::AssertOK() {
 
 
 StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
-  : Instruction(Type::VoidTy, Store,
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
                 OperandTraits<StoreInst>::op_begin(this),
                 OperandTraits<StoreInst>::operands(this),
                 InsertBefore) {
@@ -934,7 +1055,7 @@ StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
 }
 
 StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
-  : Instruction(Type::VoidTy, Store,
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
                 OperandTraits<StoreInst>::op_begin(this),
                 OperandTraits<StoreInst>::operands(this),
                 InsertAtEnd) {
@@ -947,7 +1068,7 @@ StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      Instruction *InsertBefore)
-  : Instruction(Type::VoidTy, Store,
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
                 OperandTraits<StoreInst>::op_begin(this),
                 OperandTraits<StoreInst>::operands(this),
                 InsertBefore) {
@@ -960,7 +1081,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      unsigned Align, Instruction *InsertBefore)
-  : Instruction(Type::VoidTy, Store,
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
                 OperandTraits<StoreInst>::op_begin(this),
                 OperandTraits<StoreInst>::operands(this),
                 InsertBefore) {
@@ -973,7 +1094,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      unsigned Align, BasicBlock *InsertAtEnd)
-  : Instruction(Type::VoidTy, Store,
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
                 OperandTraits<StoreInst>::op_begin(this),
                 OperandTraits<StoreInst>::operands(this),
                 InsertAtEnd) {
@@ -986,7 +1107,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      BasicBlock *InsertAtEnd)
-  : Instruction(Type::VoidTy, Store,
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
                 OperandTraits<StoreInst>::op_begin(this),
                 OperandTraits<StoreInst>::operands(this),
                 InsertAtEnd) {
@@ -1011,7 +1132,7 @@ static unsigned retrieveAddrSpace(const Value *Val) {
 }
 
 void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
-                             const std::string &Name) {
+                             const Twine &Name) {
   assert(NumOperands == 1+NumIdx && "NumOperands not initialized?");
   Use *OL = OperandList;
   OL[0] = Ptr;
@@ -1022,7 +1143,7 @@ void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
   setName(Name);
 }
 
-void GetElementPtrInst::init(Value *Ptr, Value *Idx, const std::string &Name) {
+void GetElementPtrInst::init(Value *Ptr, Value *Idx, const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
   Use *OL = OperandList;
   OL[0] = Ptr;
@@ -1040,12 +1161,13 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
   Use *GEPIOL = GEPI.OperandList;
   for (unsigned i = 0, E = NumOperands; i != E; ++i)
     OL[i] = GEPIOL[i];
+  SubclassOptionalData = GEPI.SubclassOptionalData;
 }
 
 GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
-                                     const std::string &Name, Instruction *InBe)
-  : Instruction(PointerType::get(checkType(getIndexedType(Ptr->getType(),Idx)),
-                                 retrieveAddrSpace(Ptr)),
+                                     const Twine &Name, Instruction *InBe)
+  : Instruction(PointerType::get(
+      checkType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - 2,
                 2, InBe) {
@@ -1053,9 +1175,10 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
 }
 
 GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
-                                     const std::string &Name, BasicBlock *IAE)
-  : Instruction(PointerType::get(checkType(getIndexedType(Ptr->getType(),Idx)),
-                                 retrieveAddrSpace(Ptr)),
+                                     const Twine &Name, BasicBlock *IAE)
+  : Instruction(PointerType::get(
+            checkType(getIndexedType(Ptr->getType(),Idx)),  
+                retrieveAddrSpace(Ptr)),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - 2,
                 2, IAE) {
@@ -1155,13 +1278,20 @@ bool GetElementPtrInst::hasAllConstantIndices() const {
   return true;
 }
 
+void GetElementPtrInst::setIsInBounds(bool B) {
+  cast<GEPOperator>(this)->setIsInBounds(B);
+}
+
+bool GetElementPtrInst::isInBounds() const {
+  return cast<GEPOperator>(this)->isInBounds();
+}
 
 //===----------------------------------------------------------------------===//
 //                           ExtractElementInst Implementation
 //===----------------------------------------------------------------------===//
 
 ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
-                                       const std::string &Name,
+                                       const Twine &Name,
                                        Instruction *InsertBef)
   : Instruction(cast<VectorType>(Val->getType())->getElementType(),
                 ExtractElement,
@@ -1174,24 +1304,8 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
   setName(Name);
 }
 
-ExtractElementInst::ExtractElementInst(Value *Val, unsigned IndexV,
-                                       const std::string &Name,
-                                       Instruction *InsertBef)
-  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
-                ExtractElement,
-                OperandTraits<ExtractElementInst>::op_begin(this),
-                2, InsertBef) {
-  Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
-  assert(isValidOperands(Val, Index) &&
-         "Invalid extractelement instruction operands!");
-  Op<0>() = Val;
-  Op<1>() = Index;
-  setName(Name);
-}
-
-
 ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
-                                       const std::string &Name,
+                                       const Twine &Name,
                                        BasicBlock *InsertAE)
   : Instruction(cast<VectorType>(Val->getType())->getElementType(),
                 ExtractElement,
@@ -1205,25 +1319,10 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
   setName(Name);
 }
 
-ExtractElementInst::ExtractElementInst(Value *Val, unsigned IndexV,
-                                       const std::string &Name,
-                                       BasicBlock *InsertAE)
-  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
-                ExtractElement,
-                OperandTraits<ExtractElementInst>::op_begin(this),
-                2, InsertAE) {
-  Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
-  assert(isValidOperands(Val, Index) &&
-         "Invalid extractelement instruction operands!");
-  
-  Op<0>() = Val;
-  Op<1>() = Index;
-  setName(Name);
-}
-
 
 bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
-  if (!isa<VectorType>(Val->getType()) || Index->getType() != Type::Int32Ty)
+  if (!isa<VectorType>(Val->getType()) ||
+      Index->getType() != Type::getInt32Ty(Val->getContext()))
     return false;
   return true;
 }
@@ -1233,15 +1332,8 @@ bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
 //                           InsertElementInst Implementation
 //===----------------------------------------------------------------------===//
 
-InsertElementInst::InsertElementInst(const InsertElementInst &IE)
-    : Instruction(IE.getType(), InsertElement,
-                  OperandTraits<InsertElementInst>::op_begin(this), 3) {
-  Op<0>() = IE.Op<0>();
-  Op<1>() = IE.Op<1>();
-  Op<2>() = IE.Op<2>();
-}
 InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
-                                     const std::string &Name,
+                                     const Twine &Name,
                                      Instruction *InsertBef)
   : Instruction(Vec->getType(), InsertElement,
                 OperandTraits<InsertElementInst>::op_begin(this),
@@ -1254,24 +1346,8 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
   setName(Name);
 }
 
-InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, unsigned IndexV,
-                                     const std::string &Name,
-                                     Instruction *InsertBef)
-  : Instruction(Vec->getType(), InsertElement,
-                OperandTraits<InsertElementInst>::op_begin(this),
-                3, InsertBef) {
-  Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
-  assert(isValidOperands(Vec, Elt, Index) &&
-         "Invalid insertelement instruction operands!");
-  Op<0>() = Vec;
-  Op<1>() = Elt;
-  Op<2>() = Index;
-  setName(Name);
-}
-
-
 InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
-                                     const std::string &Name,
+                                     const Twine &Name,
                                      BasicBlock *InsertAE)
   : Instruction(Vec->getType(), InsertElement,
                 OperandTraits<InsertElementInst>::op_begin(this),
@@ -1285,22 +1361,6 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
   setName(Name);
 }
 
-InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, unsigned IndexV,
-                                     const std::string &Name,
-                                     BasicBlock *InsertAE)
-: Instruction(Vec->getType(), InsertElement,
-              OperandTraits<InsertElementInst>::op_begin(this),
-              3, InsertAE) {
-  Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
-  assert(isValidOperands(Vec, Elt, Index) &&
-         "Invalid insertelement instruction operands!");
-  
-  Op<0>() = Vec;
-  Op<1>() = Elt;
-  Op<2>() = Index;
-  setName(Name);
-}
-
 bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, 
                                         const Value *Index) {
   if (!isa<VectorType>(Vec->getType()))
@@ -1309,7 +1369,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
   if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
     return false;// Second operand of insertelement must be vector element type.
     
-  if (Index->getType() != Type::Int32Ty)
+  if (Index->getType() != Type::getInt32Ty(Vec->getContext()))
     return false;  // Third operand of insertelement must be i32.
   return true;
 }
@@ -1319,17 +1379,8 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
 //                      ShuffleVectorInst Implementation
 //===----------------------------------------------------------------------===//
 
-ShuffleVectorInst::ShuffleVectorInst(const ShuffleVectorInst &SV) 
-  : Instruction(SV.getType(), ShuffleVector,
-                OperandTraits<ShuffleVectorInst>::op_begin(this),
-                OperandTraits<ShuffleVectorInst>::operands(this)) {
-  Op<0>() = SV.Op<0>();
-  Op<1>() = SV.Op<1>();
-  Op<2>() = SV.Op<2>();
-}
-
 ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                                     const std::string &Name,
+                                     const Twine &Name,
                                      Instruction *InsertBefore)
 : Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
                 cast<VectorType>(Mask->getType())->getNumElements()),
@@ -1346,12 +1397,14 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
 }
 
 ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                                     const std::string &Name,
+                                     const Twine &Name,
                                      BasicBlock *InsertAtEnd)
-  : Instruction(V1->getType(), ShuffleVector,
-                OperandTraits<ShuffleVectorInst>::op_begin(this),
-                OperandTraits<ShuffleVectorInst>::operands(this),
-                InsertAtEnd) {
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertAtEnd) {
   assert(isValidOperands(V1, V2, Mask) &&
          "Invalid shuffle vector instruction operands!");
 
@@ -1368,7 +1421,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
   
   const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
   if (!isa<Constant>(Mask) || MaskTy == 0 ||
-      MaskTy->getElementType() != Type::Int32Ty)
+      MaskTy->getElementType() != Type::getInt32Ty(V1->getContext()))
     return false;
   return true;
 }
@@ -1393,7 +1446,7 @@ int ShuffleVectorInst::getMaskValue(unsigned i) const {
 //===----------------------------------------------------------------------===//
 
 void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, 
-                           unsigned NumIdx, const std::string &Name) {
+                           unsigned NumIdx, const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
   Op<0>() = Agg;
   Op<1>() = Val;
@@ -1403,7 +1456,7 @@ void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx,
 }
 
 void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx, 
-                           const std::string &Name) {
+                           const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
   Op<0>() = Agg;
   Op<1>() = Val;
@@ -1418,12 +1471,13 @@ InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
     Indices(IVI.Indices) {
   Op<0>() = IVI.getOperand(0);
   Op<1>() = IVI.getOperand(1);
+  SubclassOptionalData = IVI.SubclassOptionalData;
 }
 
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
                                  unsigned Idx, 
-                                 const std::string &Name,
+                                 const Twine &Name,
                                  Instruction *InsertBefore)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
@@ -1434,7 +1488,7 @@ InsertValueInst::InsertValueInst(Value *Agg,
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
                                  unsigned Idx, 
-                                 const std::string &Name,
+                                 const Twine &Name,
                                  BasicBlock *InsertAtEnd)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
@@ -1447,14 +1501,14 @@ InsertValueInst::InsertValueInst(Value *Agg,
 //===----------------------------------------------------------------------===//
 
 void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx,
-                            const std::string &Name) {
+                            const Twine &Name) {
   assert(NumOperands == 1 && "NumOperands not initialized?");
 
   Indices.insert(Indices.end(), Idx, Idx + NumIdx);
   setName(Name);
 }
 
-void ExtractValueInst::init(unsigned Idx, const std::string &Name) {
+void ExtractValueInst::init(unsigned Idx, const Twine &Name) {
   assert(NumOperands == 1 && "NumOperands not initialized?");
 
   Indices.push_back(Idx);
@@ -1464,6 +1518,7 @@ void ExtractValueInst::init(unsigned Idx, const std::string &Name) {
 ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
   : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
     Indices(EVI.Indices) {
+  SubclassOptionalData = EVI.SubclassOptionalData;
 }
 
 // getIndexedType - Returns the type of the element that would be extracted
@@ -1517,7 +1572,7 @@ static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
 }
 
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
-                               const Type *Ty, const std::string &Name,
+                               const Type *Ty, const Twine &Name,
                                Instruction *InsertBefore)
   : Instruction(Ty, AdjustIType(iType, Ty),
                 OperandTraits<BinaryOperator>::op_begin(this),
@@ -1530,7 +1585,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
 }
 
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
-                               const Type *Ty, const std::string &Name,
+                               const Type *Ty, const Twine &Name,
                                BasicBlock *InsertAtEnd)
   : Instruction(Ty, AdjustIType(iType, Ty),
                 OperandTraits<BinaryOperator>::op_begin(this),
@@ -1619,7 +1674,7 @@ void BinaryOperator::init(BinaryOps iType) {
 }
 
 BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
-                                       const std::string &Name,
+                                       const Twine &Name,
                                        Instruction *InsertBefore) {
   assert(S1->getType() == S2->getType() &&
          "Cannot create binary operator with two operands of differing type!");
@@ -1627,69 +1682,70 @@ BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
 }
 
 BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
-                                       const std::string &Name,
+                                       const Twine &Name,
                                        BasicBlock *InsertAtEnd) {
   BinaryOperator *Res = Create(Op, S1, S2, Name);
   InsertAtEnd->getInstList().push_back(Res);
   return Res;
 }
 
-BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name,
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
                                           Instruction *InsertBefore) {
-  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
   return new BinaryOperator(Instruction::Sub,
                             zero, Op,
                             Op->getType(), Name, InsertBefore);
 }
 
-BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name,
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
                                           BasicBlock *InsertAtEnd) {
-  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
   return new BinaryOperator(Instruction::Sub,
                             zero, Op,
                             Op->getType(), Name, InsertAtEnd);
 }
 
-BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name,
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
                                            Instruction *InsertBefore) {
-  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
   return new BinaryOperator(Instruction::FSub,
                             zero, Op,
                             Op->getType(), Name, InsertBefore);
 }
 
-BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name,
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
                                            BasicBlock *InsertAtEnd) {
-  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
   return new BinaryOperator(Instruction::FSub,
                             zero, Op,
                             Op->getType(), Name, InsertAtEnd);
 }
 
-BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name,
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
                                           Instruction *InsertBefore) {
   Constant *C;
   if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
-    C = ConstantInt::getAllOnesValue(PTy->getElementType());
-    C = ConstantVector::get(std::vector<Constant*>(PTy->getNumElements(), C));
+    C = Constant::getAllOnesValue(PTy->getElementType());
+    C = ConstantVector::get(
+                              std::vector<Constant*>(PTy->getNumElements(), C));
   } else {
-    C = ConstantInt::getAllOnesValue(Op->getType());
+    C = Constant::getAllOnesValue(Op->getType());
   }
   
   return new BinaryOperator(Instruction::Xor, Op, C,
                             Op->getType(), Name, InsertBefore);
 }
 
-BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name,
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
                                           BasicBlock *InsertAtEnd) {
   Constant *AllOnes;
   if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
     // Create a vector of all ones values.
-    Constant *Elt = ConstantInt::getAllOnesValue(PTy->getElementType());
-    AllOnes = 
-      ConstantVector::get(std::vector<Constant*>(PTy->getNumElements(), Elt));
+    Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
+    AllOnes = ConstantVector::get(
+                            std::vector<Constant*>(PTy->getNumElements(), Elt));
   } else {
-    AllOnes = ConstantInt::getAllOnesValue(Op->getType());
+    AllOnes = Constant::getAllOnesValue(Op->getType());
   }
   
   return new BinaryOperator(Instruction::Xor, Op, AllOnes,
@@ -1709,16 +1765,16 @@ static inline bool isConstantAllOnes(const Value *V) {
 bool BinaryOperator::isNeg(const Value *V) {
   if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
     if (Bop->getOpcode() == Instruction::Sub)
-      return Bop->getOperand(0) ==
-             ConstantExpr::getZeroValueForNegationExpr(Bop->getType());
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
   return false;
 }
 
 bool BinaryOperator::isFNeg(const Value *V) {
   if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
     if (Bop->getOpcode() == Instruction::FSub)
-      return Bop->getOperand(0) ==
-             ConstantExpr::getZeroValueForNegationExpr(Bop->getType());
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
   return false;
 }
 
@@ -1731,7 +1787,6 @@ bool BinaryOperator::isNot(const Value *V) {
 }
 
 Value *BinaryOperator::getNegArgument(Value *BinOp) {
-  assert(isNeg(BinOp) && "getNegArgument from non-'neg' instruction!");
   return cast<BinaryOperator>(BinOp)->getOperand(1);
 }
 
@@ -1740,7 +1795,6 @@ const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
 }
 
 Value *BinaryOperator::getFNegArgument(Value *BinOp) {
-  assert(isFNeg(BinOp) && "getFNegArgument from non-'fneg' instruction!");
   return cast<BinaryOperator>(BinOp)->getOperand(1);
 }
 
@@ -1776,6 +1830,30 @@ bool BinaryOperator::swapOperands() {
   return false;
 }
 
+void BinaryOperator::setHasNoUnsignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
+}
+
+void BinaryOperator::setHasNoSignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
+}
+
+void BinaryOperator::setIsExact(bool b) {
+  cast<SDivOperator>(this)->setIsExact(b);
+}
+
+bool BinaryOperator::hasNoUnsignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
+}
+
+bool BinaryOperator::hasNoSignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
+}
+
+bool BinaryOperator::isExact() const {
+  return cast<SDivOperator>(this)->isExact();
+}
+
 //===----------------------------------------------------------------------===//
 //                                CastInst Class
 //===----------------------------------------------------------------------===//
@@ -1944,6 +2022,8 @@ unsigned CastInst::isEliminableCastPair(
       return 0;
     case 7: { 
       // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+      if (!IntPtrTy)
+        return 0;
       unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
       unsigned MidSize = MidTy->getScalarSizeInBits();
       if (MidSize >= PtrSize)
@@ -1983,6 +2063,8 @@ unsigned CastInst::isEliminableCastPair(
       return 0;
     case 13: {
       // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+      if (!IntPtrTy)
+        return 0;
       unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
       unsigned SrcSize = SrcTy->getScalarSizeInBits();
       unsigned DstSize = DstTy->getScalarSizeInBits();
@@ -2003,7 +2085,7 @@ unsigned CastInst::isEliminableCastPair(
 }
 
 CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, 
-  const std::string &Name, Instruction *InsertBefore) {
+  const Twine &Name, Instruction *InsertBefore) {
   // Construct and return the appropriate CastInst subclass
   switch (op) {
     case Trunc:    return new TruncInst    (S, Ty, Name, InsertBefore);
@@ -2025,7 +2107,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
-  const std::string &Name, BasicBlock *InsertAtEnd) {
+  const Twine &Name, BasicBlock *InsertAtEnd) {
   // Construct and return the appropriate CastInst subclass
   switch (op) {
     case Trunc:    return new TruncInst    (S, Ty, Name, InsertAtEnd);
@@ -2047,7 +2129,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
-                                        const std::string &Name,
+                                        const Twine &Name,
                                         Instruction *InsertBefore) {
   if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
@@ -2055,7 +2137,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
-                                        const std::string &Name,
+                                        const Twine &Name,
                                         BasicBlock *InsertAtEnd) {
   if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
@@ -2063,7 +2145,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
-                                        const std::string &Name,
+                                        const Twine &Name,
                                         Instruction *InsertBefore) {
   if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
@@ -2071,7 +2153,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
-                                        const std::string &Name,
+                                        const Twine &Name,
                                         BasicBlock *InsertAtEnd) {
   if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
@@ -2079,7 +2161,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
-                                         const std::string &Name,
+                                         const Twine &Name,
                                          Instruction *InsertBefore) {
   if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
@@ -2087,7 +2169,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
-                                         const std::string &Name, 
+                                         const Twine &Name, 
                                          BasicBlock *InsertAtEnd) {
   if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
@@ -2095,7 +2177,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
-                                      const std::string &Name,
+                                      const Twine &Name,
                                       BasicBlock *InsertAtEnd) {
   assert(isa<PointerType>(S->getType()) && "Invalid cast");
   assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
@@ -2108,7 +2190,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
 
 /// @brief Create a BitCast or a PtrToInt cast instruction
 CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, 
-                                      const std::string &Name, 
+                                      const Twine &Name, 
                                       Instruction *InsertBefore) {
   assert(isa<PointerType>(S->getType()) && "Invalid cast");
   assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
@@ -2120,7 +2202,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
 }
 
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
-                                      bool isSigned, const std::string &Name,
+                                      bool isSigned, const Twine &Name,
                                       Instruction *InsertBefore) {
   assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
@@ -2133,7 +2215,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 }
 
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
-                                      bool isSigned, const std::string &Name,
+                                      bool isSigned, const Twine &Name,
                                       BasicBlock *InsertAtEnd) {
   assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
          "Invalid cast");
@@ -2147,7 +2229,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 }
 
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
-                                 const std::string &Name, 
+                                 const Twine &Name, 
                                  Instruction *InsertBefore) {
   assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
@@ -2160,7 +2242,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
 }
 
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
-                                 const std::string &Name, 
+                                 const Twine &Name, 
                                  BasicBlock *InsertAtEnd) {
   assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
@@ -2295,7 +2377,7 @@ CastInst::getCastOpcode(
       PTy = NULL;
       return BitCast;                             // same size, no-op cast
     } else {
-      assert(0 && "Casting pointer or non-first class to float");
+      llvm_unreachable("Casting pointer or non-first class to float");
     }
   } else if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
     if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
@@ -2404,144 +2486,144 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
 }
 
 TruncInst::TruncInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
 }
 
 TruncInst::TruncInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
 }
 
 ZExtInst::ZExtInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 )  : CastInst(Ty, ZExt, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
 }
 
 ZExtInst::ZExtInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 )  : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
 }
 SExtInst::SExtInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, SExt, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
 }
 
 SExtInst::SExtInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 )  : CastInst(Ty, SExt, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
 }
 
 FPTruncInst::FPTruncInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
 }
 
 FPTruncInst::FPTruncInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
 }
 
 FPExtInst::FPExtInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, FPExt, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
 }
 
 FPExtInst::FPExtInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
 }
 
 UIToFPInst::UIToFPInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
 }
 
 UIToFPInst::UIToFPInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
 }
 
 SIToFPInst::SIToFPInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
 }
 
 SIToFPInst::SIToFPInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
 }
 
 FPToUIInst::FPToUIInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
 }
 
 FPToUIInst::FPToUIInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
 }
 
 FPToSIInst::FPToSIInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
 }
 
 FPToSIInst::FPToSIInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
 }
 
 PtrToIntInst::PtrToIntInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
 }
 
 PtrToIntInst::PtrToIntInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
 }
 
 IntToPtrInst::IntToPtrInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
 }
 
 IntToPtrInst::IntToPtrInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
 }
 
 BitCastInst::BitCastInst(
-  Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
 ) : CastInst(Ty, BitCast, S, Name, InsertBefore) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
 }
 
 BitCastInst::BitCastInst(
-  Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
 ) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { 
   assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
 }
@@ -2551,7 +2633,7 @@ BitCastInst::BitCastInst(
 //===----------------------------------------------------------------------===//
 
 CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
-                 Value *LHS, Value *RHS, const std::string &Name,
+                 Value *LHS, Value *RHS, const Twine &Name,
                  Instruction *InsertBefore)
   : Instruction(ty, op,
                 OperandTraits<CmpInst>::op_begin(this),
@@ -2564,7 +2646,7 @@ CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
 }
 
 CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
-                 Value *LHS, Value *RHS, const std::string &Name,
+                 Value *LHS, Value *RHS, const Twine &Name,
                  BasicBlock *InsertAtEnd)
   : Instruction(ty, op,
                 OperandTraits<CmpInst>::op_begin(this),
@@ -2577,41 +2659,35 @@ CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
 }
 
 CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
-                const std::string &Name, Instruction *InsertBefore) {
+CmpInst::Create(OtherOps Op, unsigned short predicate,
+                Value *S1, Value *S2, 
+                const Twine &Name, Instruction *InsertBefore) {
   if (Op == Instruction::ICmp) {
-    return new ICmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                        InsertBefore);
-  }
-  if (Op == Instruction::FCmp) {
-    return new FCmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                        InsertBefore);
-  }
-  if (Op == Instruction::VICmp) {
-    return new VICmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                         InsertBefore);
+    if (InsertBefore)
+      return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+    else
+      return new ICmpInst(CmpInst::Predicate(predicate),
+                          S1, S2, Name);
   }
-  return new VFCmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                       InsertBefore);
+  
+  if (InsertBefore)
+    return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  else
+    return new FCmpInst(CmpInst::Predicate(predicate),
+                        S1, S2, Name);
 }
 
 CmpInst *
 CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
-                const std::string &Name, BasicBlock *InsertAtEnd) {
+                const Twine &Name, BasicBlock *InsertAtEnd) {
   if (Op == Instruction::ICmp) {
-    return new ICmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                        InsertAtEnd);
-  }
-  if (Op == Instruction::FCmp) {
-    return new FCmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                        InsertAtEnd);
+    return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
   }
-  if (Op == Instruction::VICmp) {
-    return new VICmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                         InsertAtEnd);
-  }
-  return new VFCmpInst(CmpInst::Predicate(predicate), S1, S2, Name, 
-                       InsertAtEnd);
+  return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                      S1, S2, Name);
 }
 
 void CmpInst::swapOperands() {
@@ -2712,7 +2788,7 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
   APInt Upper(C);
   uint32_t BitWidth = C.getBitWidth();
   switch (pred) {
-  default: assert(0 && "Invalid ICmp opcode to ConstantRange ctor!");
+  default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
   case ICmpInst::ICMP_EQ: Upper++; break;
   case ICmpInst::ICMP_NE: Lower++; break;
   case ICmpInst::ICMP_ULT: Lower = APInt::getMinValue(BitWidth); break;
@@ -2823,7 +2899,8 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumCases) {
 /// constructor can also autoinsert before another instruction.
 SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
                        Instruction *InsertBefore)
-  : TerminatorInst(Type::VoidTy, Instruction::Switch, 0, 0, InsertBefore) {
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertBefore) {
   init(Value, Default, NumCases);
 }
 
@@ -2833,18 +2910,20 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
 /// constructor also autoinserts at the end of the specified BasicBlock.
 SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
                        BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::VoidTy, Instruction::Switch, 0, 0, InsertAtEnd) {
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertAtEnd) {
   init(Value, Default, NumCases);
 }
 
 SwitchInst::SwitchInst(const SwitchInst &SI)
-  : TerminatorInst(Type::VoidTy, Instruction::Switch,
+  : TerminatorInst(Type::getVoidTy(SI.getContext()), Instruction::Switch,
                    allocHungoffUses(SI.getNumOperands()), SI.getNumOperands()) {
   Use *OL = OperandList, *InOL = SI.OperandList;
   for (unsigned i = 0, E = SI.getNumOperands(); i != E; i+=2) {
     OL[i] = InOL[i];
     OL[i+1] = InOL[i+1];
   }
+  SubclassOptionalData = SI.SubclassOptionalData;
 }
 
 SwitchInst::~SwitchInst() {
@@ -2937,80 +3016,372 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 // unit that uses these classes.
 
 GetElementPtrInst *GetElementPtrInst::clone() const {
-  return new(getNumOperands()) GetElementPtrInst(*this);
+  GetElementPtrInst *New = new(getNumOperands()) GetElementPtrInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
 
 BinaryOperator *BinaryOperator::clone() const {
-  return Create(getOpcode(), Op<0>(), Op<1>());
+  BinaryOperator *New = Create(getOpcode(), Op<0>(), Op<1>());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
 
 FCmpInst* FCmpInst::clone() const {
-  return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+  FCmpInst *New = new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
 ICmpInst* ICmpInst::clone() const {
-  return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+  ICmpInst *New = new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
 
-VFCmpInst* VFCmpInst::clone() const {
-  return new VFCmpInst(getPredicate(), Op<0>(), Op<1>());
+ExtractValueInst *ExtractValueInst::clone() const {
+  ExtractValueInst *New = new ExtractValueInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+InsertValueInst *InsertValueInst::clone() const {
+  InsertValueInst *New = new InsertValueInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
-VICmpInst* VICmpInst::clone() const {
-  return new VICmpInst(getPredicate(), Op<0>(), Op<1>());
+
+MallocInst *MallocInst::clone() const {
+  MallocInst *New = new MallocInst(getAllocatedType(),
+                                   (Value*)getOperand(0),
+                                   getAlignment());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
 
-ExtractValueInst *ExtractValueInst::clone() const {
-  return new ExtractValueInst(*this);
+AllocaInst *AllocaInst::clone() const {
+  AllocaInst *New = new AllocaInst(getAllocatedType(),
+                                   (Value*)getOperand(0),
+                                   getAlignment());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+FreeInst *FreeInst::clone() const {
+  FreeInst *New = new FreeInst(getOperand(0));
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+LoadInst *LoadInst::clone() const {
+  LoadInst *New = new LoadInst(getOperand(0),
+                               Twine(), isVolatile(),
+                               getAlignment());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+StoreInst *StoreInst::clone() const {
+  StoreInst *New = new StoreInst(getOperand(0), getOperand(1),
+                                 isVolatile(), getAlignment());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+TruncInst *TruncInst::clone() const {
+  TruncInst *New = new TruncInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+ZExtInst *ZExtInst::clone() const {
+  ZExtInst *New = new ZExtInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+SExtInst *SExtInst::clone() const {
+  SExtInst *New = new SExtInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+FPTruncInst *FPTruncInst::clone() const {
+  FPTruncInst *New = new FPTruncInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+FPExtInst *FPExtInst::clone() const {
+  FPExtInst *New = new FPExtInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+UIToFPInst *UIToFPInst::clone() const {
+  UIToFPInst *New = new UIToFPInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+SIToFPInst *SIToFPInst::clone() const {
+  SIToFPInst *New = new SIToFPInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+FPToUIInst *FPToUIInst::clone() const {
+  FPToUIInst *New = new FPToUIInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+FPToSIInst *FPToSIInst::clone() const {
+  FPToSIInst *New = new FPToSIInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+PtrToIntInst *PtrToIntInst::clone() const {
+  PtrToIntInst *New = new PtrToIntInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+IntToPtrInst *IntToPtrInst::clone() const {
+  IntToPtrInst *New = new IntToPtrInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+BitCastInst *BitCastInst::clone() const {
+  BitCastInst *New = new BitCastInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+CallInst *CallInst::clone() const {
+  CallInst *New = new(getNumOperands()) CallInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+SelectInst *SelectInst::clone() const {
+  SelectInst *New = SelectInst::Create(getOperand(0),
+                                       getOperand(1),
+                                       getOperand(2));
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+VAArgInst *VAArgInst::clone() const {
+  VAArgInst *New = new VAArgInst(getOperand(0), getType());
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
-InsertValueInst *InsertValueInst::clone() const {
-  return new InsertValueInst(*this);
-}
-
-
-MallocInst *MallocInst::clone()   const { return new MallocInst(*this); }
-AllocaInst *AllocaInst::clone()   const { return new AllocaInst(*this); }
-FreeInst   *FreeInst::clone()     const { return new FreeInst(getOperand(0)); }
-LoadInst   *LoadInst::clone()     const { return new LoadInst(*this); }
-StoreInst  *StoreInst::clone()    const { return new StoreInst(*this); }
-CastInst   *TruncInst::clone()    const { return new TruncInst(*this); }
-CastInst   *ZExtInst::clone()     const { return new ZExtInst(*this); }
-CastInst   *SExtInst::clone()     const { return new SExtInst(*this); }
-CastInst   *FPTruncInst::clone()  const { return new FPTruncInst(*this); }
-CastInst   *FPExtInst::clone()    const { return new FPExtInst(*this); }
-CastInst   *UIToFPInst::clone()   const { return new UIToFPInst(*this); }
-CastInst   *SIToFPInst::clone()   const { return new SIToFPInst(*this); }
-CastInst   *FPToUIInst::clone()   const { return new FPToUIInst(*this); }
-CastInst   *FPToSIInst::clone()   const { return new FPToSIInst(*this); }
-CastInst   *PtrToIntInst::clone() const { return new PtrToIntInst(*this); }
-CastInst   *IntToPtrInst::clone() const { return new IntToPtrInst(*this); }
-CastInst   *BitCastInst::clone()  const { return new BitCastInst(*this); }
-CallInst   *CallInst::clone()     const {
-  return new(getNumOperands()) CallInst(*this);
-}
-SelectInst *SelectInst::clone()   const {
-  return new(getNumOperands()) SelectInst(*this);
-}
-VAArgInst  *VAArgInst::clone()    const { return new VAArgInst(*this); }
 
 ExtractElementInst *ExtractElementInst::clone() const {
-  return new ExtractElementInst(*this);
+  ExtractElementInst *New = ExtractElementInst::Create(getOperand(0),
+                                                       getOperand(1));
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
+
 InsertElementInst *InsertElementInst::clone() const {
-  return InsertElementInst::Create(*this);
+  InsertElementInst *New = InsertElementInst::Create(getOperand(0),
+                                                     getOperand(1),
+                                                     getOperand(2));
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
+
 ShuffleVectorInst *ShuffleVectorInst::clone() const {
-  return new ShuffleVectorInst(*this);
+  ShuffleVectorInst *New = new ShuffleVectorInst(getOperand(0),
+                                                 getOperand(1),
+                                                 getOperand(2));
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+PHINode *PHINode::clone() const {
+  PHINode *New = new PHINode(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
-PHINode    *PHINode::clone()    const { return new PHINode(*this); }
+
 ReturnInst *ReturnInst::clone() const {
-  return new(getNumOperands()) ReturnInst(*this);
+  ReturnInst *New = new(getNumOperands()) ReturnInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
+
 BranchInst *BranchInst::clone() const {
   unsigned Ops(getNumOperands());
-  return new(Ops, Ops == 1) BranchInst(*this);
+  BranchInst *New = new(Ops, Ops == 1) BranchInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+SwitchInst *SwitchInst::clone() const {
+  SwitchInst *New = new SwitchInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
 }
-SwitchInst *SwitchInst::clone() const { return new SwitchInst(*this); }
+
 InvokeInst *InvokeInst::clone() const {
-  return new(getNumOperands()) InvokeInst(*this);
+  InvokeInst *New = new(getNumOperands()) InvokeInst(*this);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata()) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  }
+  return New;
+}
+
+UnwindInst *UnwindInst::clone() const {
+  LLVMContext &Context = getContext();
+  UnwindInst *New = new UnwindInst(Context);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata())
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  return New;
+}
+
+UnreachableInst *UnreachableInst::clone() const {
+  LLVMContext &Context = getContext();
+  UnreachableInst *New = new UnreachableInst(Context);
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata())
+    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
+  return New;
 }
-UnwindInst *UnwindInst::clone() const { return new UnwindInst(); }
-UnreachableInst *UnreachableInst::clone() const { return new UnreachableInst();}
diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp
index 8bdc96896c51..5f33d0eebb94 100644
--- a/lib/VMCore/IntrinsicInst.cpp
+++ b/lib/VMCore/IntrinsicInst.cpp
@@ -61,17 +61,11 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) {
 Value *DbgStopPointInst::getFileName() const {
   // Once the operand indices are verified, update this assert
   assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices");
-  GlobalVariable *GV = cast<GlobalVariable>(getContext());
-  if (!GV->hasInitializer()) return NULL;
-  ConstantStruct *CS = cast<ConstantStruct>(GV->getInitializer());
-  return CS->getOperand(3);
+  return getContext()->getElement(3);
 }
 
 Value *DbgStopPointInst::getDirectory() const {
   // Once the operand indices are verified, update this assert
   assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices");
-  GlobalVariable *GV = cast<GlobalVariable>(getContext());
-  if (!GV->hasInitializer()) return NULL;
-  ConstantStruct *CS = cast<ConstantStruct>(GV->getInitializer());
-  return CS->getOperand(4);
+  return getContext()->getElement(4);
 }
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index fe2cb7bf196f..39ed7ed68828 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -8,16 +8,18 @@
 //===----------------------------------------------------------------------===//
 //
 //  This file implements LLVMContext, as a wrapper around the opaque
-// class LLVMContextImpl.
+//  class LLVMContextImpl.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/MDNode.h"
+#include "llvm/Instruction.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
 #include "LLVMContextImpl.h"
+#include <set>
 
 using namespace llvm;
 
@@ -27,463 +29,48 @@ LLVMContext& llvm::getGlobalContext() {
   return *GlobalContext;
 }
 
-LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl()) { }
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
-// Constant accessors
-Constant* LLVMContext::getNullValue(const Type* Ty) {
-  return Constant::getNullValue(Ty);
-}
-
-Constant* LLVMContext::getAllOnesValue(const Type* Ty) {
-  return Constant::getAllOnesValue(Ty);
-}
-
-// UndefValue accessors.
-UndefValue* LLVMContext::getUndef(const Type* Ty) {
-  return UndefValue::get(Ty);
-}
-
-// ConstantInt accessors.
-ConstantInt* LLVMContext::getConstantIntTrue() {
-  return ConstantInt::getTrue();
-}
-
-ConstantInt* LLVMContext::getConstantIntFalse() {
-  return ConstantInt::getFalse();
-}
-
-Constant* LLVMContext::getConstantInt(const Type* Ty, uint64_t V,
-                                         bool isSigned) {
-  return ConstantInt::get(Ty, V, isSigned);
-}
-
-
-ConstantInt* LLVMContext::getConstantInt(const IntegerType* Ty, uint64_t V,
-                                         bool isSigned) {
-  return ConstantInt::get(Ty, V, isSigned);
-}
-
-ConstantInt* LLVMContext::getConstantIntSigned(const IntegerType* Ty,
-                                               int64_t V) {
-  return ConstantInt::getSigned(Ty, V);
-}
-
-ConstantInt* LLVMContext::getConstantInt(const APInt& V) {
-  return ConstantInt::get(V);
-}
-
-Constant* LLVMContext::getConstantInt(const Type* Ty, const APInt& V) {
-  return ConstantInt::get(Ty, V);
-}
-
-ConstantInt* LLVMContext::getConstantIntAllOnesValue(const Type* Ty) {
-  return ConstantInt::getAllOnesValue(Ty);
-}
-
-
-// ConstantPointerNull accessors.
-ConstantPointerNull* LLVMContext::getConstantPointerNull(const PointerType* T) {
-  return ConstantPointerNull::get(T);
-}
-
-
-// ConstantStruct accessors.
-Constant* LLVMContext::getConstantStruct(const StructType* T,
-                                         const std::vector<Constant*>& V) {
-  return ConstantStruct::get(T, V);
-}
-
-Constant* LLVMContext::getConstantStruct(const std::vector<Constant*>& V,
-                                         bool Packed) {
-  return ConstantStruct::get(V, Packed);
-}
-
-Constant* LLVMContext::getConstantStruct(Constant* const *Vals,
-                                         unsigned NumVals, bool Packed) {
-  return ConstantStruct::get(Vals, NumVals, Packed);
-}
-
-
-// ConstantAggregateZero accessors.
-ConstantAggregateZero* LLVMContext::getConstantAggregateZero(const Type* Ty) {
-  return ConstantAggregateZero::get(Ty);
-}
-
-
-// ConstantArray accessors.
-Constant* LLVMContext::getConstantArray(const ArrayType* T,
-                                        const std::vector<Constant*>& V) {
-  return ConstantArray::get(T, V);
-}
-
-Constant* LLVMContext::getConstantArray(const ArrayType* T,
-                                        Constant* const* Vals,
-                                        unsigned NumVals) {
-  return ConstantArray::get(T, Vals, NumVals);
-}
-
-Constant* LLVMContext::getConstantArray(const std::string& Initializer,
-                                        bool AddNull) {
-  return ConstantArray::get(Initializer, AddNull);
-}
-
-
-// ConstantExpr accessors.
-Constant* LLVMContext::getConstantExpr(unsigned Opcode, Constant* C1,
-                                       Constant* C2) {
-  return ConstantExpr::get(Opcode, C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprTrunc(Constant* C, const Type* Ty) {
-  return ConstantExpr::getTrunc(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprSExt(Constant* C, const Type* Ty) {
-  return ConstantExpr::getSExt(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprZExt(Constant* C, const Type* Ty) {
-  return ConstantExpr::getZExt(C, Ty);  
-}
-
-Constant* LLVMContext::getConstantExprFPTrunc(Constant* C, const Type* Ty) {
-  return ConstantExpr::getFPTrunc(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprFPExtend(Constant* C, const Type* Ty) {
-  return ConstantExpr::getFPExtend(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprUIToFP(Constant* C, const Type* Ty) {
-  return ConstantExpr::getUIToFP(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprSIToFP(Constant* C, const Type* Ty) {
-  return ConstantExpr::getSIToFP(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprFPToUI(Constant* C, const Type* Ty) {
-  return ConstantExpr::getFPToUI(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprFPToSI(Constant* C, const Type* Ty) {
-  return ConstantExpr::getFPToSI(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprPtrToInt(Constant* C, const Type* Ty) {
-  return ConstantExpr::getPtrToInt(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprIntToPtr(Constant* C, const Type* Ty) {
-  return ConstantExpr::getIntToPtr(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprBitCast(Constant* C, const Type* Ty) {
-  return ConstantExpr::getBitCast(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprCast(unsigned ops, Constant* C,
-                                           const Type* Ty) {
-  return ConstantExpr::getCast(ops, C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprZExtOrBitCast(Constant* C,
-                                                    const Type* Ty) {
-  return ConstantExpr::getZExtOrBitCast(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprSExtOrBitCast(Constant* C,
-                                                    const Type* Ty) {
-  return ConstantExpr::getSExtOrBitCast(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprTruncOrBitCast(Constant* C,
-                                                     const Type* Ty) {
-  return ConstantExpr::getTruncOrBitCast(C, Ty);  
-}
-
-Constant* LLVMContext::getConstantExprPointerCast(Constant* C, const Type* Ty) {
-  return ConstantExpr::getPointerCast(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprIntegerCast(Constant* C, const Type* Ty,
-                                                  bool isSigned) {
-  return ConstantExpr::getIntegerCast(C, Ty, isSigned);
-}
-
-Constant* LLVMContext::getConstantExprFPCast(Constant* C, const Type* Ty) {
-  return ConstantExpr::getFPCast(C, Ty);
-}
-
-Constant* LLVMContext::getConstantExprSelect(Constant* C, Constant* V1,
-                                             Constant* V2) {
-  return ConstantExpr::getSelect(C, V1, V2);
-}
-
-Constant* LLVMContext::getConstantExprAlignOf(const Type* Ty) {
-  return ConstantExpr::getAlignOf(Ty);
-}
-
-Constant* LLVMContext::getConstantExprCompare(unsigned short pred,
-                                 Constant* C1, Constant* C2) {
-  return ConstantExpr::getCompare(pred, C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprNeg(Constant* C) {
-  return ConstantExpr::getNeg(C);
-}
-
-Constant* LLVMContext::getConstantExprFNeg(Constant* C) {
-  return ConstantExpr::getFNeg(C);
-}
-
-Constant* LLVMContext::getConstantExprNot(Constant* C) {
-  return ConstantExpr::getNot(C);
-}
-
-Constant* LLVMContext::getConstantExprAdd(Constant* C1, Constant* C2) {
-  return ConstantExpr::getAdd(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprFAdd(Constant* C1, Constant* C2) {
-  return ConstantExpr::getFAdd(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprSub(Constant* C1, Constant* C2) {
-  return ConstantExpr::getSub(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprFSub(Constant* C1, Constant* C2) {
-  return ConstantExpr::getFSub(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprMul(Constant* C1, Constant* C2) {
-  return ConstantExpr::getMul(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprFMul(Constant* C1, Constant* C2) {
-  return ConstantExpr::getFMul(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprUDiv(Constant* C1, Constant* C2) {
-  return ConstantExpr::getUDiv(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprSDiv(Constant* C1, Constant* C2) {
-  return ConstantExpr::getSDiv(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprFDiv(Constant* C1, Constant* C2) {
-  return ConstantExpr::getFDiv(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprURem(Constant* C1, Constant* C2) {
-  return ConstantExpr::getURem(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprSRem(Constant* C1, Constant* C2) {
-  return ConstantExpr::getSRem(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprFRem(Constant* C1, Constant* C2) {
-  return ConstantExpr::getFRem(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprAnd(Constant* C1, Constant* C2) {
-  return ConstantExpr::getAnd(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprOr(Constant* C1, Constant* C2) {
-  return ConstantExpr::getOr(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprXor(Constant* C1, Constant* C2) {
-  return ConstantExpr::getXor(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprICmp(unsigned short pred, Constant* LHS,
-                              Constant* RHS) {
-  return ConstantExpr::getICmp(pred, LHS, RHS);
-}
-
-Constant* LLVMContext::getConstantExprFCmp(unsigned short pred, Constant* LHS,
-                              Constant* RHS) {
-  return ConstantExpr::getFCmp(pred, LHS, RHS);
-}
-
-Constant* LLVMContext::getConstantExprVICmp(unsigned short pred, Constant* LHS,
-                               Constant* RHS) {
-  return ConstantExpr::getVICmp(pred, LHS, RHS);
-}
-
-Constant* LLVMContext::getConstantExprVFCmp(unsigned short pred, Constant* LHS,
-                               Constant* RHS) {
-  return ConstantExpr::getVFCmp(pred, LHS, RHS);
-}
-
-Constant* LLVMContext::getConstantExprShl(Constant* C1, Constant* C2) {
-  return ConstantExpr::getShl(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprLShr(Constant* C1, Constant* C2) {
-  return ConstantExpr::getLShr(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprAShr(Constant* C1, Constant* C2) {
-  return ConstantExpr::getAShr(C1, C2);
-}
-
-Constant* LLVMContext::getConstantExprGetElementPtr(Constant* C,
-                                                    Constant* const* IdxList, 
-                                                    unsigned NumIdx) {
-  return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
-}
-
-Constant* LLVMContext::getConstantExprGetElementPtr(Constant* C,
-                                                    Value* const* IdxList, 
-                                                    unsigned NumIdx) {
-  return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
-}
-
-Constant* LLVMContext::getConstantExprExtractElement(Constant* Vec,
-                                                     Constant* Idx) {
-  return ConstantExpr::getExtractElement(Vec, Idx);
-}
-
-Constant* LLVMContext::getConstantExprInsertElement(Constant* Vec,
-                                                    Constant* Elt,
-                                                    Constant* Idx) {
-  return ConstantExpr::getInsertElement(Vec, Elt, Idx);
-}
-
-Constant* LLVMContext::getConstantExprShuffleVector(Constant* V1, Constant* V2,
-                                                    Constant* Mask) {
-  return ConstantExpr::getShuffleVector(V1, V2, Mask);
-}
-
-Constant* LLVMContext::getConstantExprExtractValue(Constant* Agg,
-                                                   const unsigned* IdxList, 
-                                                   unsigned NumIdx) {
-  return ConstantExpr::getExtractValue(Agg, IdxList, NumIdx);
-}
-
-Constant* LLVMContext::getConstantExprInsertValue(Constant* Agg, Constant* Val,
-                                                  const unsigned* IdxList,
-                                                  unsigned NumIdx) {
-  return ConstantExpr::getInsertValue(Agg, Val, IdxList, NumIdx);
-}
-
-Constant* LLVMContext::getZeroValueForNegation(const Type* Ty) {
-  return ConstantExpr::getZeroValueForNegationExpr(Ty);
-}
-
-
-// ConstantFP accessors.
-ConstantFP* LLVMContext::getConstantFP(const APFloat& V) {
-  return ConstantFP::get(V);
-}
-
-Constant* LLVMContext::getConstantFP(const Type* Ty, double V) {
-  return ConstantFP::get(Ty, V);
-}
-
-ConstantFP* LLVMContext::getConstantFPNegativeZero(const Type* Ty) {
-  return ConstantFP::getNegativeZero(Ty);
-}
-
-
-// ConstantVector accessors.
-Constant* LLVMContext::getConstantVector(const VectorType* T,
-                            const std::vector<Constant*>& V) {
-  return ConstantVector::get(T, V);
-}
-
-Constant* LLVMContext::getConstantVector(const std::vector<Constant*>& V) {
-  return ConstantVector::get(V);
-}
-
-Constant* LLVMContext::getConstantVector(Constant* const* Vals,
-                                         unsigned NumVals) {
-  return ConstantVector::get(Vals, NumVals);
-}
-
-ConstantVector* LLVMContext::getConstantVectorAllOnesValue(
-                                                         const VectorType* Ty) {
-  return ConstantVector::getAllOnesValue(Ty);
-}
-
-// MDNode accessors
-MDNode* LLVMContext::getMDNode(Value* const* Vals, unsigned NumVals) {
-  return MDNode::get(Vals, NumVals);
-}
-
-// MDString accessors
-MDString* LLVMContext::getMDString(const char *StrBegin, const char *StrEnd) {
-  return MDString::get(StrBegin, StrEnd);
-}
-
-MDString* LLVMContext::getMDString(const std::string &Str) {
-  return MDString::get(Str);
-}
-
-// FunctionType accessors
-FunctionType* LLVMContext::getFunctionType(const Type* Result,
-                                         const std::vector<const Type*>& Params,
-                                         bool isVarArg) {
-  return FunctionType::get(Result, Params, isVarArg);
-}
-                                
-// IntegerType accessors
-const IntegerType* LLVMContext::getIntegerType(unsigned NumBits) {
-  return IntegerType::get(NumBits);
-}
-  
-// OpaqueType accessors
-OpaqueType* LLVMContext::getOpaqueType() {
-  return OpaqueType::get();
-}
-
-// StructType accessors
-StructType* LLVMContext::getStructType(bool isPacked) {
-  return StructType::get(isPacked);
-}
-
-StructType* LLVMContext::getStructType(const std::vector<const Type*>& Params,
-                                       bool isPacked) {
-  return StructType::get(Params, isPacked);
-}
-
-// ArrayType accessors
-ArrayType* LLVMContext::getArrayType(const Type* ElementType,
-                                     uint64_t NumElements) {
-  return ArrayType::get(ElementType, NumElements);
-}
-  
-// PointerType accessors
-PointerType* LLVMContext::getPointerType(const Type* ElementType,
-                                         unsigned AddressSpace) {
-  return PointerType::get(ElementType, AddressSpace);
-}
-
-PointerType* LLVMContext::getPointerTypeUnqual(const Type* ElementType) {
-  return PointerType::getUnqual(ElementType);
-}
-  
-// VectorType accessors
-VectorType* LLVMContext::getVectorType(const Type* ElementType,
-                                       unsigned NumElements) {
-  return VectorType::get(ElementType, NumElements);
-}
-
-VectorType* LLVMContext::getVectorTypeInteger(const VectorType* VTy) {
-  return VectorType::getInteger(VTy);  
-}
-
-VectorType* LLVMContext::getVectorTypeExtendedElement(const VectorType* VTy) {
-  return VectorType::getExtendedElementVectorType(VTy);
-}
-
-VectorType* LLVMContext::getVectorTypeTruncatedElement(const VectorType* VTy) {
-  return VectorType::getTruncatedElementVectorType(VTy);
+GetElementPtrConstantExpr::GetElementPtrConstantExpr
+  (Constant *C,
+   const std::vector<Constant*> &IdxList,
+   const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::GetElementPtr,
+                   OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+                   - (IdxList.size()+1),
+                   IdxList.size()+1) {
+  OperandList[0] = C;
+  for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+    OperandList[i+1] = IdxList[i];
+}
+
+bool LLVMContext::RemoveDeadMetadata() {
+  std::vector<WeakVH> DeadMDNodes;
+  bool Changed = false;
+  while (1) {
+
+    for (FoldingSet<MDNode>::iterator 
+           I = pImpl->MDNodeSet.begin(),
+           E = pImpl->MDNodeSet.end(); I != E; ++I) {
+      MDNode *N = &(*I);
+      if (N->use_empty()) 
+        DeadMDNodes.push_back(WeakVH(N));
+    }
+    
+    if (DeadMDNodes.empty())
+      return Changed;
+
+    while (!DeadMDNodes.empty()) {
+      Value *V = DeadMDNodes.back(); DeadMDNodes.pop_back();
+      if (const MDNode *N = dyn_cast_or_null<MDNode>(V))
+        if (N->use_empty())
+          delete N;
+    }
+  }
+  return Changed;
+}
+
+MetadataContext &LLVMContext::getMetadata() {
+  return pImpl->TheMetadata;
 }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 4e089fb661cf..83888c3907bf 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -1,4 +1,4 @@
-//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,9 +15,209 @@
 #ifndef LLVM_LLVMCONTEXT_IMPL_H
 #define LLVM_LLVMCONTEXT_IMPL_H
 
+#include "ConstantsContext.h"
+#include "LeaksContext.h"
+#include "TypesContext.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
 namespace llvm {
+
+class ConstantInt;
+class ConstantFP;
+class MDString;
+class MDNode;
+class LLVMContext;
+class Type;
+class Value;
+
+struct DenseMapAPIntKeyInfo {
+  struct KeyTy {
+    APInt val;
+    const Type* type;
+    KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
+    KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
+    bool operator==(const KeyTy& that) const {
+      return type == that.type && this->val == that.val;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+  static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return DenseMapInfo<void*>::getHashValue(Key.type) ^ 
+      Key.val.getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+  static bool isPod() { return false; }
+};
+
+struct DenseMapAPFloatKeyInfo {
+  struct KeyTy {
+    APFloat val;
+    KeyTy(const APFloat& V) : val(V){}
+    KeyTy(const KeyTy& that) : val(that.val) {}
+    bool operator==(const KeyTy& that) const {
+      return this->val.bitwiseIsEqual(that.val);
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline KeyTy getEmptyKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,1));
+  }
+  static inline KeyTy getTombstoneKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,2)); 
+  }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return Key.val.getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+  static bool isPod() { return false; }
+};
+
 class LLVMContextImpl {
+public:
+  sys::SmartRWMutex<true> ConstantsLock;
+  typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
+                         DenseMapAPIntKeyInfo> IntMapTy;
+  IntMapTy IntConstants;
+  
+  typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, 
+                         DenseMapAPFloatKeyInfo> FPMapTy;
+  FPMapTy FPConstants;
+  
+  StringMap<MDString*> MDStringCache;
+  
+  FoldingSet<MDNode> MDNodeSet;
+  
+  ValueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
+
+  typedef ValueMap<std::vector<Constant*>, ArrayType, 
+    ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  ArrayConstantsTy ArrayConstants;
+  
+  typedef ValueMap<std::vector<Constant*>, StructType,
+                   ConstantStruct, true /*largekey*/> StructConstantsTy;
+  StructConstantsTy StructConstants;
+  
+  typedef ValueMap<std::vector<Constant*>, VectorType,
+                   ConstantVector> VectorConstantsTy;
+  VectorConstantsTy VectorConstants;
+  
+  ValueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
+  
+  ValueMap<char, Type, UndefValue> UndefValueConstants;
+  
+  ValueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+  
+  ConstantInt *TheTrueVal;
+  ConstantInt *TheFalseVal;
+  
+  // Lock used for guarding access to the leak detector
+  sys::SmartMutex<true> LLVMObjectsLock;
+  LeakDetectorImpl<Value> LLVMObjects;
+  
+  // Lock used for guarding access to the type maps.
+  sys::SmartMutex<true> TypeMapLock;
+  
+  // Recursive lock used for guarding access to AbstractTypeUsers.
+  // NOTE: The true template parameter means this will no-op when we're not in
+  // multithreaded mode.
+  sys::SmartMutex<true> AbstractTypeUsersLock;
+
+  // Basic type instances.
+  const Type VoidTy;
+  const Type LabelTy;
+  const Type FloatTy;
+  const Type DoubleTy;
+  const Type MetadataTy;
+  const Type X86_FP80Ty;
+  const Type FP128Ty;
+  const Type PPC_FP128Ty;
+  const IntegerType Int1Ty;
+  const IntegerType Int8Ty;
+  const IntegerType Int16Ty;
+  const IntegerType Int32Ty;
+  const IntegerType Int64Ty;
+
+  // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
+  // for types as they are needed.  Because resolution of types must invalidate
+  // all of the abstract type descriptions, we keep them in a seperate map to 
+  // make this easy.
+  TypePrinting ConcreteTypeDescriptions;
+  TypePrinting AbstractTypeDescriptions;
+  
+  TypeMap<ArrayValType, ArrayType> ArrayTypes;
+  TypeMap<VectorValType, VectorType> VectorTypes;
+  TypeMap<PointerValType, PointerType> PointerTypes;
+  TypeMap<FunctionValType, FunctionType> FunctionTypes;
+  TypeMap<StructValType, StructType> StructTypes;
+  TypeMap<IntegerValType, IntegerType> IntegerTypes;
+
+  /// ValueHandles - This map keeps track of all of the value handles that are
+  /// watching a Value*.  The Value::HasValueHandle bit is used to know
+  // whether or not a value has an entry in this map.
+  typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+  ValueHandlesTy ValueHandles;
+  
+  MetadataContext TheMetadata;
+  LLVMContextImpl(LLVMContext &C) : TheTrueVal(0), TheFalseVal(0),
+    VoidTy(C, Type::VoidTyID),
+    LabelTy(C, Type::LabelTyID),
+    FloatTy(C, Type::FloatTyID),
+    DoubleTy(C, Type::DoubleTyID),
+    MetadataTy(C, Type::MetadataTyID),
+    X86_FP80Ty(C, Type::X86_FP80TyID),
+    FP128Ty(C, Type::FP128TyID),
+    PPC_FP128Ty(C, Type::PPC_FP128TyID),
+    Int1Ty(C, 1),
+    Int8Ty(C, 8),
+    Int16Ty(C, 16),
+    Int32Ty(C, 32),
+    Int64Ty(C, 64) { }
 
+  ~LLVMContextImpl()
+  {
+    ExprConstants.freeConstants();
+    ArrayConstants.freeConstants();
+    StructConstants.freeConstants();
+    VectorConstants.freeConstants();
+    AggZeroConstants.freeConstants();
+    NullPtrConstants.freeConstants();
+    UndefValueConstants.freeConstants();
+    for (FoldingSet<MDNode>::iterator I = MDNodeSet.begin(), 
+           E = MDNodeSet.end(); I != E; ++I)
+      I->dropAllReferences();
+    for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); 
+         I != E; ++I) {
+      if (I->second->use_empty())
+        delete I->second;
+    }
+    for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); 
+         I != E; ++I) {
+      if (I->second->use_empty())
+        delete I->second;
+    }
+  }
 };
 
 }
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index b5926bcf441a..5ebd4f5ac03b 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -11,129 +11,62 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Streams.h"
-#include "llvm/System/RWMutex.h"
+#include "llvm/System/Mutex.h"
 #include "llvm/System/Threading.h"
 #include "llvm/Value.h"
 using namespace llvm;
 
-namespace {
-  template <class T>
-  struct VISIBILITY_HIDDEN PrinterTrait {
-    static void print(const T* P) { cerr << P; }
-  };
+static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
+static ManagedStatic<LeakDetectorImpl<void> > Objects;
 
-  template<>
-  struct VISIBILITY_HIDDEN PrinterTrait<Value> {
-    static void print(const Value* P) { cerr << *P; }
-  };
-
-  ManagedStatic<sys::SmartRWMutex<true> > LeakDetectorLock;
-
-  template <typename T>
-  struct VISIBILITY_HIDDEN LeakDetectorImpl {
-    explicit LeakDetectorImpl(const char* const name = "") : 
-      Cache(0), Name(name) { }
-
-    void clear() {
-      Cache = 0;
-      Ts.clear();
-    }
-    
-    void setName(const char* n) { 
-      Name = n;
-    }
-    
-    // Because the most common usage pattern, by far, is to add a
-    // garbage object, then remove it immediately, we optimize this
-    // case.  When an object is added, it is not added to the set
-    // immediately, it is added to the CachedValue Value.  If it is
-    // immediately removed, no set search need be performed.
-    void addGarbage(const T* o) {
-      sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
-      if (Cache) {
-        assert(Ts.count(Cache) == 0 && "Object already in set!");
-        Ts.insert(Cache);
-      }
-      Cache = o;
-    }
-
-    void removeGarbage(const T* o) {
-      sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
-      if (o == Cache)
-        Cache = 0; // Cache hit
-      else
-        Ts.erase(o);
-    }
-
-    bool hasGarbage(const std::string& Message) {
-      addGarbage(0); // Flush the Cache
-
-      sys::SmartScopedReader<true> Reader(&*LeakDetectorLock);
-      assert(Cache == 0 && "No value should be cached anymore!");
-
-      if (!Ts.empty()) {
-        cerr << "Leaked " << Name << " objects found: " << Message << ":\n";
-        for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
-               E = Ts.end(); I != E; ++I) {
-          cerr << "\t";
-          PrinterTrait<T>::print(*I);
-          cerr << "\n";
-        }
-        cerr << '\n';
-
-        return true;
-      }
-      
-      return false;
-    }
-
-  private:
-    SmallPtrSet<const T*, 8> Ts;
-    const T* Cache;
-    const char* Name;
-  };
-
-  static ManagedStatic<LeakDetectorImpl<void> > Objects;
-  static ManagedStatic<LeakDetectorImpl<Value> > LLVMObjects;
-
-  static void clearGarbage() {
-    Objects->clear();
-    LLVMObjects->clear();
-  }
+static void clearGarbage(LLVMContext &Context) {
+  Objects->clear();
+  Context.pImpl->LLVMObjects.clear();
 }
 
 void LeakDetector::addGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
   Objects->addGarbage(Object);
 }
 
 void LeakDetector::addGarbageObjectImpl(const Value *Object) {
-  LLVMObjects->addGarbage(Object);
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  sys::SmartScopedLock<true> Lock(pImpl->LLVMObjectsLock);
+  pImpl->LLVMObjects.addGarbage(Object);
 }
 
 void LeakDetector::removeGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
   Objects->removeGarbage(Object);
 }
 
 void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
-  LLVMObjects->removeGarbage(Object);
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  sys::SmartScopedLock<true> Lock(pImpl->LLVMObjectsLock);
+  pImpl->LLVMObjects.removeGarbage(Object);
 }
 
-void LeakDetector::checkForGarbageImpl(const std::string &Message) {
+void LeakDetector::checkForGarbageImpl(LLVMContext &Context, 
+                                       const std::string &Message) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  sys::SmartScopedLock<true> CLock(pImpl->LLVMObjectsLock);
+  
   Objects->setName("GENERIC");
-  LLVMObjects->setName("LLVM");
+  pImpl->LLVMObjects.setName("LLVM");
   
   // use non-short-circuit version so that both checks are performed
   if (Objects->hasGarbage(Message) |
-      LLVMObjects->hasGarbage(Message))
-    cerr << "\nThis is probably because you removed an object, but didn't "
-         << "delete it.  Please check your code for memory leaks.\n";
+      pImpl->LLVMObjects.hasGarbage(Message))
+    errs() << "\nThis is probably because you removed an object, but didn't "
+           << "delete it.  Please check your code for memory leaks.\n";
 
   // Clear out results so we don't get duplicate warnings on
   // next call...
-  clearGarbage();
+  clearGarbage(Context);
 }
diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h
new file mode 100644
index 000000000000..b0c3a14fe84a
--- /dev/null
+++ b/lib/VMCore/LeaksContext.h
@@ -0,0 +1,89 @@
+//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for leaks detectors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+template <class T>
+struct PrinterTrait {
+  static void print(const T* P) { errs() << P; }
+};
+
+template<>
+struct PrinterTrait<Value> {
+  static void print(const Value* P) { errs() << *P; }
+};
+
+template <typename T>
+struct LeakDetectorImpl {
+  explicit LeakDetectorImpl(const char* const name = "") : 
+    Cache(0), Name(name) { }
+
+  void clear() {
+    Cache = 0;
+    Ts.clear();
+  }
+    
+  void setName(const char* n) { 
+    Name = n;
+  }
+    
+  // Because the most common usage pattern, by far, is to add a
+  // garbage object, then remove it immediately, we optimize this
+  // case.  When an object is added, it is not added to the set
+  // immediately, it is added to the CachedValue Value.  If it is
+  // immediately removed, no set search need be performed.
+  void addGarbage(const T* o) {
+    if (Cache) {
+      assert(Ts.count(Cache) == 0 && "Object already in set!");
+      Ts.insert(Cache);
+    }
+    Cache = o;
+  }
+
+  void removeGarbage(const T* o) {
+    if (o == Cache)
+      Cache = 0; // Cache hit
+    else
+      Ts.erase(o);
+  }
+
+  bool hasGarbage(const std::string& Message) {
+    addGarbage(0); // Flush the Cache
+
+    assert(Cache == 0 && "No value should be cached anymore!");
+
+    if (!Ts.empty()) {
+      errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
+      for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+           E = Ts.end(); I != E; ++I) {
+        errs() << '\t';
+        PrinterTrait<T>::print(*I);
+        errs() << '\n';
+      }
+      errs() << '\n';
+
+      return true;
+    }
+    
+    return false;
+  }
+
+private:
+  SmallPtrSet<const T*, 8> Ts;
+  const T* Cache;
+  const char* Name;
+};
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 1a68b890542f..33eb0449e824 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Mangler.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/System/Atomic.h"
+#include "llvm/Function.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static char HexDigit(int V) {
@@ -32,13 +32,9 @@ static std::string MangleLetter(unsigned char C) {
 /// makeNameProper - We don't want identifier names non-C-identifier characters
 /// in them, so mangle them as appropriate.
 ///
-std::string Mangler::makeNameProper(const std::string &X, const char *Prefix,
-                                    const char *PrivatePrefix) {
-  if (X.empty()) return X;  // Empty names are uniqued by the caller.
-  
-  // If PreserveAsmNames is set, names with asm identifiers are not modified. 
-  if (PreserveAsmNames && X[0] == 1)
-    return X;
+std::string Mangler::makeNameProper(const std::string &X,
+                                    ManglerPrefixTy PrefixTy) {
+  assert(!X.empty() && "Cannot mangle empty strings");
   
   if (!UseQuotes) {
     std::string Result;
@@ -51,8 +47,9 @@ std::string Mangler::makeNameProper(const std::string &X, const char *Prefix,
       ++I;  // Skip over the marker.
     }
     
-    // Mangle the first letter specially, don't allow numbers.
-    if (*I >= '0' && *I <= '9')
+    // Mangle the first letter specially, don't allow numbers unless the target
+    // explicitly allows them.
+    if (!SymbolsCanStartWithDigit && *I >= '0' && *I <= '9')
       Result += MangleLetter(*I++);
 
     for (std::string::const_iterator E = X.end(); I != E; ++I) {
@@ -63,11 +60,14 @@ std::string Mangler::makeNameProper(const std::string &X, const char *Prefix,
     }
 
     if (NeedPrefix) {
-      if (Prefix)
-        Result = Prefix + Result;
-      if (PrivatePrefix)
+      Result = Prefix + Result;
+
+      if (PrefixTy == Mangler::Private)
         Result = PrivatePrefix + Result;
+      else if (PrefixTy == Mangler::LinkerPrivate)
+        Result = LinkerPrivatePrefix + Result;
     }
+
     return Result;
   }
 
@@ -95,17 +95,21 @@ std::string Mangler::makeNameProper(const std::string &X, const char *Prefix,
     
   // In the common case, we don't need quotes.  Handle this quickly.
   if (!NeedQuotes) {
-    if (NeedPrefix) {
-      if (Prefix)
-        Result = Prefix + X;
-      else
-        Result = X;
-      if (PrivatePrefix)
-        Result = PrivatePrefix + Result;
-      return Result;
-    } else
-      return X.substr(1);
+    if (!NeedPrefix)
+      return X.substr(1);   // Strip off the \001.
+    
+    Result = Prefix + X;
+
+    if (PrefixTy == Mangler::Private)
+      Result = PrivatePrefix + Result;
+    else if (PrefixTy == Mangler::LinkerPrivate)
+      Result = LinkerPrivatePrefix + Result;
+
+    return Result;
   }
+
+  if (NeedPrefix)
+    Result = X.substr(0, I-X.begin());
     
   // Otherwise, construct the string the expensive way.
   for (std::string::const_iterator E = X.end(); I != E; ++I) {
@@ -118,72 +122,93 @@ std::string Mangler::makeNameProper(const std::string &X, const char *Prefix,
   }
 
   if (NeedPrefix) {
-    if (Prefix)
-      Result = Prefix + X;
-    else
-      Result = X;
-    if (PrivatePrefix)
+    Result = Prefix + Result;
+
+    if (PrefixTy == Mangler::Private)
       Result = PrivatePrefix + Result;
+    else if (PrefixTy == Mangler::LinkerPrivate)
+      Result = LinkerPrivatePrefix + Result;
   }
+
   Result = '"' + Result + '"';
   return Result;
 }
 
-/// getTypeID - Return a unique ID for the specified LLVM type.
+/// getMangledName - Returns the mangled name of V, an LLVM Value,
+/// in the current module.  If 'Suffix' is specified, the name ends with the
+/// specified suffix.  If 'ForcePrivate' is specified, the label is specified
+/// to have a private label prefix.
 ///
-unsigned Mangler::getTypeID(const Type *Ty) {
-  unsigned &E = TypeMap[Ty];
-  if (E == 0) E = ++TypeCounter;
-  return E;
-}
+std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix,
+                                    bool ForcePrivate) {
+  assert((!isa<Function>(GV) || !cast<Function>(GV)->isIntrinsic()) &&
+         "Intrinsic functions cannot be mangled by Mangler");
 
-std::string Mangler::getValueName(const Value *V) {
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return getValueName(GV);
+  ManglerPrefixTy PrefixTy =
+    (GV->hasPrivateLinkage() || ForcePrivate) ? Mangler::Private :
+      GV->hasLinkerPrivateLinkage() ? Mangler::LinkerPrivate : Mangler::Default;
+
+  if (GV->hasName())
+    return makeNameProper(GV->getNameStr() + Suffix, PrefixTy);
   
-  std::string &Name = Memo[V];
-  if (!Name.empty())
-    return Name;       // Return the already-computed name for V.
+  // Get the ID for the global, assigning a new one if we haven't got one
+  // already.
+  unsigned &ID = AnonGlobalIDs[GV];
+  if (ID == 0) ID = NextAnonGlobalID++;
   
-  // Always mangle local names.
-  Name = "ltmp_" + utostr(Count++) + "_" + utostr(getTypeID(V->getType()));
-  return Name;
+  // Must mangle the global into a unique ID.
+  return makeNameProper("__unnamed_" + utostr(ID) + Suffix, PrefixTy);
 }
 
 
-std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
-  // Check to see whether we've already named V.
-  std::string &Name = Memo[GV];
-  if (!Name.empty())
-    return Name;       // Return the already-computed name for V.
-
-  // Name mangling occurs as follows:
-  // - If V is an intrinsic function, do not change name at all
-  // - Otherwise, mangling occurs if global collides with existing name.
-  if (isa<Function>(GV) && cast<Function>(GV)->isIntrinsic()) {
-    Name = GV->getNameStart(); // Is an intrinsic function
-  } else if (!GV->hasName()) {
-    // Must mangle the global into a unique ID.
-    unsigned TypeUniqueID = getTypeID(GV->getType());
-    static uint32_t GlobalID = 0;
-    
-    unsigned OldID = GlobalID;
-    sys::AtomicIncrement(&GlobalID);
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified global variable's name.  If the global variable doesn't
+/// have a name, this fills in a unique name for the global.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const GlobalValue *GV,
+                                bool isImplicitlyPrivate) {
+   
+  // If the global is anonymous or not led with \1, then add the appropriate
+  // prefix.
+  if (!GV->hasName() || GV->getName()[0] != '\1') {
+    if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+      OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
+    else if (GV->hasLinkerPrivateLinkage())
+      OutName.append(LinkerPrivatePrefix,
+                     LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));;
+    OutName.append(Prefix, Prefix+strlen(Prefix));
+  }
+
+  // If the global has a name, just append it now.
+  if (GV->hasName()) {
+    StringRef Name = GV->getName();
     
-    Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID);
-  } else {
-    if (GV->hasPrivateLinkage())
-      Name = makeNameProper(GV->getName() + Suffix, Prefix, PrivatePrefix);
+    // Strip off the prefix marker if present.
+    if (Name[0] != '\1')
+      OutName.append(Name.begin(), Name.end());
     else
-      Name = makeNameProper(GV->getName() + Suffix, Prefix);
+      OutName.append(Name.begin()+1, Name.end());
+    return;
   }
-
-  return Name;
+  
+  // If the global variable doesn't have a name, return a unique name for the
+  // global based on a numbering.
+  
+  // Get the ID for the global, assigning a new one if we haven't got one
+  // already.
+  unsigned &ID = AnonGlobalIDs[GV];
+  if (ID == 0) ID = NextAnonGlobalID++;
+  
+  // Must mangle the global into a unique ID.
+  raw_svector_ostream(OutName) << "__unnamed_" << ID;
 }
 
-Mangler::Mangler(Module &M, const char *prefix, const char *privatePrefix)
-  : Prefix(prefix), PrivatePrefix (privatePrefix), UseQuotes(false),
-    PreserveAsmNames(false), Count(0), TypeCounter(0) {
+
+Mangler::Mangler(Module &M, const char *prefix, const char *privatePrefix,
+                 const char *linkerPrivatePrefix)
+  : Prefix(prefix), PrivatePrefix(privatePrefix),
+    LinkerPrivatePrefix(linkerPrivatePrefix), UseQuotes(false),
+    SymbolsCanStartWithDigit(false), NextAnonGlobalID(1) {
   std::fill(AcceptableChars, array_endof(AcceptableChars), 0);
 
   // Letters and numbers are acceptable.
@@ -198,4 +223,5 @@ Mangler::Mangler(Module &M, const char *prefix, const char *privatePrefix)
   markCharAcceptable('_');
   markCharAcceptable('$');
   markCharAcceptable('.');
+  markCharAcceptable('@');
 }
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
new file mode 100644
index 000000000000..110c5e38fac3
--- /dev/null
+++ b/lib/VMCore/Metadata.cpp
@@ -0,0 +1,433 @@
+//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Metadata.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Instruction.h"
+#include "SymbolTableListTraitsImpl.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//MetadataBase implementation
+//
+
+/// resizeOperands - Metadata keeps track of other metadata uses using 
+/// OperandList. Resize this list to hold anticipated number of metadata
+/// operands.
+void MetadataBase::resizeOperands(unsigned NumOps) {
+  unsigned e = getNumOperands();
+  if (NumOps == 0) {
+    NumOps = e*2;
+    if (NumOps < 2) NumOps = 2;  
+  } else if (NumOps > NumOperands) {
+    // No resize needed.
+    if (ReservedSpace >= NumOps) return;
+  } else if (NumOps == NumOperands) {
+    if (ReservedSpace == NumOps) return;
+  } else {
+    return;
+  }
+
+  ReservedSpace = NumOps;
+  Use *OldOps = OperandList;
+  Use *NewOps = allocHungoffUses(NumOps);
+  std::copy(OldOps, OldOps + e, NewOps);
+  OperandList = NewOps;
+  if (OldOps) Use::zap(OldOps, OldOps + e, true);
+}
+//===----------------------------------------------------------------------===//
+//MDString implementation
+//
+MDString *MDString::get(LLVMContext &Context, const StringRef &Str) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
+  StringMapEntry<MDString *> &Entry = 
+    pImpl->MDStringCache.GetOrCreateValue(Str);
+  MDString *&S = Entry.getValue();
+  if (!S) S = new MDString(Context, Entry.getKeyData(),
+                           Entry.getKeyLength());
+
+  return S;
+}
+
+//===----------------------------------------------------------------------===//
+//MDNode implementation
+//
+MDNode::MDNode(LLVMContext &C, Value*const* Vals, unsigned NumVals)
+  : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) {
+  NumOperands = 0;
+  resizeOperands(NumVals);
+  for (unsigned i = 0; i != NumVals; ++i) {
+    // Only record metadata uses.
+    if (MetadataBase *MB = dyn_cast_or_null<MetadataBase>(Vals[i]))
+      OperandList[NumOperands++] = MB;
+    else if(Vals[i] && 
+            Vals[i]->getType()->getTypeID() == Type::MetadataTyID)
+      OperandList[NumOperands++] = Vals[i];
+    Node.push_back(ElementVH(Vals[i], this));
+  }
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+  for (const_elem_iterator I = elem_begin(), E = elem_end(); I != E; ++I)
+    ID.AddPointer(*I);
+}
+
+MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  for (unsigned i = 0; i != NumVals; ++i)
+    ID.AddPointer(Vals[i]);
+
+  pImpl->ConstantsLock.reader_acquire();
+  void *InsertPoint;
+  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+  pImpl->ConstantsLock.reader_release();
+  
+  if (!N) {
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
+    N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+    if (!N) {
+      // InsertPoint will have been set by the FindNodeOrInsertPos call.
+      N = new MDNode(Context, Vals, NumVals);
+      pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+    }
+  }
+
+  return N;
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void MDNode::dropAllReferences() {
+  User::dropAllReferences();
+  Node.clear();
+}
+
+MDNode::~MDNode() {
+  {
+    LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
+    pImpl->MDNodeSet.RemoveNode(this);
+  }
+  dropAllReferences();
+}
+
+// Replace value from this node's element list.
+void MDNode::replaceElement(Value *From, Value *To) {
+  if (From == To || !getType())
+    return;
+  LLVMContext &Context = getType()->getContext();
+  LLVMContextImpl *pImpl = Context.pImpl;
+
+  // Find value. This is a linear search, do something if it consumes 
+  // lot of time. It is possible that to have multiple instances of
+  // From in this MDNode's element list.
+  SmallVector<unsigned, 4> Indexes;
+  unsigned Index = 0;
+  for (SmallVector<ElementVH, 4>::iterator I = Node.begin(),
+         E = Node.end(); I != E; ++I, ++Index) {
+    Value *V = *I;
+    if (V && V == From) 
+      Indexes.push_back(Index);
+  }
+
+  if (Indexes.empty())
+    return;
+
+  // Remove "this" from the context map. 
+  {
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
+    pImpl->MDNodeSet.RemoveNode(this);
+  }
+
+  // MDNode only lists metadata elements in operand list, because MDNode
+  // used by MDNode is considered a valid use. However on the side, MDNode
+  // using a non-metadata value is not considered a "use" of non-metadata
+  // value.
+  SmallVector<unsigned, 4> OpIndexes;
+  unsigned OpIndex = 0;
+  for (User::op_iterator OI = op_begin(), OE = op_end();
+       OI != OE; ++OI, OpIndex++) {
+    if (*OI == From)
+      OpIndexes.push_back(OpIndex);
+  }
+  if (MetadataBase *MDTo = dyn_cast_or_null<MetadataBase>(To)) {
+    for (SmallVector<unsigned, 4>::iterator OI = OpIndexes.begin(),
+           OE = OpIndexes.end(); OI != OE; ++OI)
+      setOperand(*OI, MDTo);
+  } else {
+    for (SmallVector<unsigned, 4>::iterator OI = OpIndexes.begin(),
+           OE = OpIndexes.end(); OI != OE; ++OI)
+      setOperand(*OI, 0);
+  }
+
+  // Replace From element(s) in place.
+  for (SmallVector<unsigned, 4>::iterator I = Indexes.begin(), E = Indexes.end(); 
+       I != E; ++I) {
+    unsigned Index = *I;
+    Node[Index] = ElementVH(To, this);
+  }
+
+  // Insert updated "this" into the context's folding node set.
+  // If a node with same element list already exist then before inserting 
+  // updated "this" into the folding node set, replace all uses of existing 
+  // node with updated "this" node.
+  FoldingSetNodeID ID;
+  Profile(ID);
+  pImpl->ConstantsLock.reader_acquire();
+  void *InsertPoint;
+  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+  pImpl->ConstantsLock.reader_release();
+
+  if (N) {
+    N->replaceAllUsesWith(this);
+    delete N;
+    N = 0;
+  }
+
+  {
+    sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock);
+    N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+    if (!N) {
+      // InsertPoint will have been set by the FindNodeOrInsertPos call.
+      N = this;
+      pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//NamedMDNode implementation
+//
+NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N,
+                         MetadataBase*const* MDs, 
+                         unsigned NumMDs, Module *ParentModule)
+  : MetadataBase(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) {
+  setName(N);
+  NumOperands = 0;
+  resizeOperands(NumMDs);
+
+  for (unsigned i = 0; i != NumMDs; ++i) {
+    if (MDs[i])
+      OperandList[NumOperands++] = MDs[i];
+    Node.push_back(WeakMetadataVH(MDs[i]));
+  }
+  if (ParentModule)
+    ParentModule->getNamedMDList().push_back(this);
+}
+
+NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) {
+  assert (NMD && "Invalid source NamedMDNode!");
+  SmallVector<MetadataBase *, 4> Elems;
+  for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i)
+    Elems.push_back(NMD->getElement(i));
+  return new NamedMDNode(NMD->getContext(), NMD->getName().data(),
+                         Elems.data(), Elems.size(), M);
+}
+
+/// eraseFromParent - Drop all references and remove the node from parent
+/// module.
+void NamedMDNode::eraseFromParent() {
+  getParent()->getNamedMDList().erase(this);
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void NamedMDNode::dropAllReferences() {
+  User::dropAllReferences();
+  Node.clear();
+}
+
+NamedMDNode::~NamedMDNode() {
+  dropAllReferences();
+}
+
+//===----------------------------------------------------------------------===//
+//Metadata implementation
+//
+
+/// RegisterMDKind - Register a new metadata kind and return its ID.
+/// A metadata kind can be registered only once. 
+unsigned MetadataContext::RegisterMDKind(const char *Name) {
+  assert (validName(Name) && "Invalid custome metadata name!");
+  unsigned Count = MDHandlerNames.size();
+  assert(MDHandlerNames.find(Name) == MDHandlerNames.end() 
+         && "Already registered MDKind!");
+  MDHandlerNames[Name] = Count + 1;
+  return Count + 1;
+}
+
+/// validName - Return true if Name is a valid custom metadata handler name.
+bool MetadataContext::validName(const char *Name) {
+  if (!Name)
+    return false;
+
+  if (!isalpha(*Name))
+    return false;
+
+  unsigned Length = strlen(Name);  
+  unsigned Count = 1;
+  ++Name;
+  while (Name &&
+         (isalnum(*Name) || *Name == '_' || *Name == '-' || *Name == '.')) {
+    ++Name;
+    ++Count;
+  }
+  if (Length != Count)
+    return false;
+  return true;
+}
+
+/// getMDKind - Return metadata kind. If the requested metadata kind
+/// is not registered then return 0.
+unsigned MetadataContext::getMDKind(const char *Name) {
+  assert (validName(Name) && "Invalid custome metadata name!");
+  StringMap<unsigned>::iterator I = MDHandlerNames.find(Name);
+  if (I == MDHandlerNames.end())
+    return 0;
+
+  return I->getValue();
+}
+
+/// addMD - Attach the metadata of given kind with an Instruction.
+void MetadataContext::addMD(unsigned MDKind, MDNode *Node, Instruction *Inst) {
+  assert (Node && "Unable to add custome metadata");
+  Inst->HasMetadata = true;
+  MDStoreTy::iterator I = MetadataStore.find(Inst);
+  if (I == MetadataStore.end()) {
+    MDMapTy Info;
+    Info.push_back(std::make_pair(MDKind, Node));
+    MetadataStore.insert(std::make_pair(Inst, Info));
+    return;
+  }
+
+  MDMapTy &Info = I->second;
+  // If there is an entry for this MDKind then replace it.
+  for (unsigned i = 0, e = Info.size(); i != e; ++i) {
+    MDPairTy &P = Info[i];
+    if (P.first == MDKind) {
+      Info[i] = std::make_pair(MDKind, Node);
+      return;
+    }
+  }
+
+  // Otherwise add a new entry.
+  Info.push_back(std::make_pair(MDKind, Node));
+  return;
+}
+
+/// removeMD - Remove metadata of given kind attached with an instuction.
+void MetadataContext::removeMD(unsigned Kind, Instruction *Inst) {
+  MDStoreTy::iterator I = MetadataStore.find(Inst);
+  if (I == MetadataStore.end())
+    return;
+
+  MDMapTy &Info = I->second;
+  for (MDMapTy::iterator MI = Info.begin(), ME = Info.end(); MI != ME; ++MI) {
+    MDPairTy &P = *MI;
+    if (P.first == Kind) {
+      Info.erase(MI);
+      return;
+    }
+  }
+
+  return;
+}
+  
+/// removeMDs - Remove all metadata attached with an instruction.
+void MetadataContext::removeMDs(const Instruction *Inst) {
+  // Find Metadata handles for this instruction.
+  MDStoreTy::iterator I = MetadataStore.find(Inst);
+  assert (I != MetadataStore.end() && "Invalid custom metadata info!");
+  MDMapTy &Info = I->second;
+  
+  // FIXME : Give all metadata handlers a chance to adjust.
+  
+  // Remove the entries for this instruction.
+  Info.clear();
+  MetadataStore.erase(I);
+}
+
+/// copyMD - If metadata is attached with Instruction In1 then attach
+/// the same metadata to In2.
+void MetadataContext::copyMD(Instruction *In1, Instruction *In2) {
+  assert (In1 && In2 && "Invalid instruction!");
+   MDStoreTy::iterator I = MetadataStore.find(In1);
+  if (I == MetadataStore.end())
+    return;
+
+  MDMapTy &In1Info = I->second;
+  MDMapTy In2Info;
+  for (MDMapTy::iterator I = In1Info.begin(), E = In1Info.end(); I != E; ++I)
+    if (MDNode *MD = dyn_cast_or_null<MDNode>(I->second))
+      addMD(I->first, MD, In2);
+}
+
+/// getMD - Get the metadata of given kind attached with an Instruction.
+/// If the metadata is not found then return 0.
+MDNode *MetadataContext::getMD(unsigned MDKind, const Instruction *Inst) {
+  MDStoreTy::iterator I = MetadataStore.find(Inst);
+  if (I == MetadataStore.end())
+    return NULL;
+  
+  MDMapTy &Info = I->second;
+  for (MDMapTy::iterator I = Info.begin(), E = Info.end(); I != E; ++I)
+    if (I->first == MDKind)
+      return dyn_cast_or_null<MDNode>(I->second);
+  return NULL;
+}
+
+/// getMDs - Get the metadata attached with an Instruction.
+const MetadataContext::MDMapTy *MetadataContext::getMDs(const Instruction *Inst) {
+  MDStoreTy::iterator I = MetadataStore.find(Inst);
+  if (I == MetadataStore.end())
+    return NULL;
+  
+  return &(I->second);
+}
+
+/// getHandlerNames - Get handler names. This is used by bitcode
+/// writer.
+const StringMap<unsigned> *MetadataContext::getHandlerNames() {
+  return &MDHandlerNames;
+}
+
+/// ValueIsCloned - This handler is used to update metadata store
+/// when In1 is cloned to create In2.
+void MetadataContext::ValueIsCloned(const Instruction *In1, Instruction *In2) {
+  // Find Metadata handles for In1.
+  MDStoreTy::iterator I = MetadataStore.find(In1);
+  assert (I != MetadataStore.end() && "Invalid custom metadata info!");
+
+  // FIXME : Give all metadata handlers a chance to adjust.
+
+  MDMapTy &In1Info = I->second;
+  MDMapTy In2Info;
+  for (MDMapTy::iterator I = In1Info.begin(), E = In1Info.end(); I != E; ++I)
+    if (MDNode *MD = dyn_cast_or_null<MDNode>(I->second))
+      addMD(I->first, MD, In2);
+}
+
+/// ValueIsRAUWd - This handler is used when V1's all uses are replaced by
+/// V2.
+void MetadataContext::ValueIsRAUWd(Value *V1, Value *V2) {
+  Instruction *I1 = dyn_cast<Instruction>(V1);
+  Instruction *I2 = dyn_cast<Instruction>(V2);
+  if (!I1 || !I2)
+    return;
+
+  // FIXME : Give custom handlers a chance to override this.
+  ValueIsCloned(I1, I2);
+}
+
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index f057e81a649b..add24491079e 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -31,14 +31,15 @@ using namespace llvm;
 //
 
 GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
-  GlobalVariable *Ret = new GlobalVariable(Type::Int32Ty, false,
-                                           GlobalValue::ExternalLinkage);
+  GlobalVariable *Ret = new GlobalVariable(getGlobalContext(), 
+                                           Type::getInt32Ty(getGlobalContext()),
+                                           false, GlobalValue::ExternalLinkage);
   // This should not be garbage monitored.
   LeakDetector::removeGarbageObject(Ret);
   return Ret;
 }
 GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() {
-  GlobalAlias *Ret = new GlobalAlias(Type::Int32Ty,
+  GlobalAlias *Ret = new GlobalAlias(Type::getInt32Ty(getGlobalContext()),
                                      GlobalValue::ExternalLinkage);
   // This should not be garbage monitored.
   LeakDetector::removeGarbageObject(Ret);
@@ -55,7 +56,7 @@ template class SymbolTableListTraits<GlobalAlias, Module>;
 // Primitive Module methods.
 //
 
-Module::Module(const std::string &MID, LLVMContext& C)
+Module::Module(const StringRef &MID, LLVMContext& C)
   : Context(C), ModuleID(MID), DataLayout("")  {
   ValSymTab = new ValueSymbolTable();
   TypeSymTab = new TypeSymbolTable();
@@ -67,6 +68,7 @@ Module::~Module() {
   FunctionList.clear();
   AliasList.clear();
   LibraryList.clear();
+  NamedMDList.clear();
   delete ValSymTab;
   delete TypeSymTab;
 }
@@ -113,15 +115,10 @@ Module::PointerSize Module::getPointerSize() const {
 /// getNamedValue - Return the first global value in the module with
 /// the specified name, of arbitrary type.  This method returns null
 /// if a global with the specified name is not found.
-GlobalValue *Module::getNamedValue(const std::string &Name) const {
+GlobalValue *Module::getNamedValue(const StringRef &Name) const {
   return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
 }
 
-GlobalValue *Module::getNamedValue(const char *Name) const {
-  llvm::Value *V = getValueSymbolTable().lookup(Name, Name+strlen(Name));
-  return cast_or_null<GlobalValue>(V);
-}
-
 //===----------------------------------------------------------------------===//
 // Methods for easy access to the functions in the module.
 //
@@ -131,7 +128,7 @@ GlobalValue *Module::getNamedValue(const char *Name) const {
 // it.  This is nice because it allows most passes to get away with not handling
 // the symbol table directly for this common task.
 //
-Constant *Module::getOrInsertFunction(const std::string &Name,
+Constant *Module::getOrInsertFunction(const StringRef &Name,
                                       const FunctionType *Ty,
                                       AttrListPtr AttributeList) {
   // See if we have a definition for the specified function already.
@@ -151,7 +148,7 @@ Constant *Module::getOrInsertFunction(const std::string &Name,
     F->setName("");
     // Retry, now there won't be a conflict.
     Constant *NewF = getOrInsertFunction(Name, Ty);
-    F->setName(&Name[0], Name.size());
+    F->setName(Name);
     return NewF;
   }
 
@@ -164,7 +161,7 @@ Constant *Module::getOrInsertFunction(const std::string &Name,
   return F;  
 }
 
-Constant *Module::getOrInsertTargetIntrinsic(const std::string &Name,
+Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name,
                                              const FunctionType *Ty,
                                              AttrListPtr AttributeList) {
   // See if we have a definition for the specified function already.
@@ -181,7 +178,7 @@ Constant *Module::getOrInsertTargetIntrinsic(const std::string &Name,
   return F;  
 }
 
-Constant *Module::getOrInsertFunction(const std::string &Name,
+Constant *Module::getOrInsertFunction(const StringRef &Name,
                                       const FunctionType *Ty) {
   AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
   return getOrInsertFunction(Name, Ty, AttributeList);
@@ -192,7 +189,7 @@ Constant *Module::getOrInsertFunction(const std::string &Name,
 // This version of the method takes a null terminated list of function
 // arguments, which makes it easier for clients to use.
 //
-Constant *Module::getOrInsertFunction(const std::string &Name,
+Constant *Module::getOrInsertFunction(const StringRef &Name,
                                       AttrListPtr AttributeList,
                                       const Type *RetTy, ...) {
   va_list Args;
@@ -206,11 +203,12 @@ Constant *Module::getOrInsertFunction(const std::string &Name,
   va_end(Args);
 
   // Build the function type and chain to the other getOrInsertFunction...
-  return getOrInsertFunction(Name, FunctionType::get(RetTy, ArgTys, false),
+  return getOrInsertFunction(Name,
+                             FunctionType::get(RetTy, ArgTys, false),
                              AttributeList);
 }
 
-Constant *Module::getOrInsertFunction(const std::string &Name,
+Constant *Module::getOrInsertFunction(const StringRef &Name,
                                       const Type *RetTy, ...) {
   va_list Args;
   va_start(Args, RetTy);
@@ -223,18 +221,15 @@ Constant *Module::getOrInsertFunction(const std::string &Name,
   va_end(Args);
 
   // Build the function type and chain to the other getOrInsertFunction...
-  return getOrInsertFunction(Name, FunctionType::get(RetTy, ArgTys, false),
+  return getOrInsertFunction(Name, 
+                             FunctionType::get(RetTy, ArgTys, false),
                              AttrListPtr::get((AttributeWithIndex *)0, 0));
 }
 
 // getFunction - Look up the specified function in the module symbol table.
 // If it does not exist, return null.
 //
-Function *Module::getFunction(const std::string &Name) const {
-  return dyn_cast_or_null<Function>(getNamedValue(Name));
-}
-
-Function *Module::getFunction(const char *Name) const {
+Function *Module::getFunction(const StringRef &Name) const {
   return dyn_cast_or_null<Function>(getNamedValue(Name));
 }
 
@@ -249,7 +244,7 @@ Function *Module::getFunction(const char *Name) const {
 /// If AllowLocal is set to true, this function will return types that
 /// have an local. By default, these types are not returned.
 ///
-GlobalVariable *Module::getGlobalVariable(const std::string &Name,
+GlobalVariable *Module::getGlobalVariable(const StringRef &Name,
                                           bool AllowLocal) const {
   if (GlobalVariable *Result = 
       dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
@@ -264,15 +259,15 @@ GlobalVariable *Module::getGlobalVariable(const std::string &Name,
 ///      with a constantexpr cast to the right type.
 ///   3. Finally, if the existing global is the correct delclaration, return the
 ///      existing global.
-Constant *Module::getOrInsertGlobal(const std::string &Name, const Type *Ty) {
+Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) {
   // See if we have a definition for the specified global already.
   GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
   if (GV == 0) {
     // Nope, add it
     GlobalVariable *New =
-      new GlobalVariable(Ty, false, GlobalVariable::ExternalLinkage, 0, Name);
-    GlobalList.push_back(New);
-    return New;                    // Return the new declaration.
+      new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+                         0, Name);
+     return New;                    // Return the new declaration.
   }
 
   // If the variable exists but has the wrong type, return a bitcast to the
@@ -291,10 +286,28 @@ Constant *Module::getOrInsertGlobal(const std::string &Name, const Type *Ty) {
 // getNamedAlias - Look up the specified global in the module symbol table.
 // If it does not exist, return null.
 //
-GlobalAlias *Module::getNamedAlias(const std::string &Name) const {
+GlobalAlias *Module::getNamedAlias(const StringRef &Name) const {
   return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
 }
 
+/// getNamedMetadata - Return the first NamedMDNode in the module with the
+/// specified name. This method returns null if a NamedMDNode with the 
+//// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const StringRef &Name) const {
+  return dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
+}
+
+/// getOrInsertNamedMetadata - Return the first named MDNode in the module 
+/// with the specified name. This method returns a new NamedMDNode if a 
+/// NamedMDNode with the specified name is not found.
+NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) {
+  NamedMDNode *NMD =
+    dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
+  if (!NMD)
+    NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this);
+  return NMD;
+}
+
 //===----------------------------------------------------------------------===//
 // Methods for easy access to the types in the module.
 //
@@ -304,7 +317,7 @@ GlobalAlias *Module::getNamedAlias(const std::string &Name) const {
 // there is already an entry for this name, true is returned and the symbol
 // table is not modified.
 //
-bool Module::addTypeName(const std::string &Name, const Type *Ty) {
+bool Module::addTypeName(const StringRef &Name, const Type *Ty) {
   TypeSymbolTable &ST = getTypeSymbolTable();
 
   if (ST.lookup(Name)) return true;  // Already in symtab...
@@ -318,7 +331,7 @@ bool Module::addTypeName(const std::string &Name, const Type *Ty) {
 
 /// getTypeByName - Return the type with the specified name in this module, or
 /// null if there is none by that name.
-const Type *Module::getTypeByName(const std::string &Name) const {
+const Type *Module::getTypeByName(const StringRef &Name) const {
   const TypeSymbolTable &ST = getTypeSymbolTable();
   return cast_or_null<Type>(ST.lookup(Name));
 }
@@ -364,14 +377,14 @@ void Module::dropAllReferences() {
     I->dropAllReferences();
 }
 
-void Module::addLibrary(const std::string& Lib) {
+void Module::addLibrary(const StringRef& Lib) {
   for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
     if (*I == Lib)
       return;
   LibraryList.push_back(Lib);
 }
 
-void Module::removeLibrary(const std::string& Lib) {
+void Module::removeLibrary(const StringRef& Lib) {
   LibraryListType::iterator I = LibraryList.begin();
   LibraryListType::iterator E = LibraryList.end();
   for (;I != E; ++I)
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index b037994d428b..a2831d34345e 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ModuleProvider.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Atomic.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/System/Threading.h"
@@ -45,7 +46,7 @@ bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
 
 // dumpPassStructure - Implement the -debug-passes=Structure option
 void Pass::dumpPassStructure(unsigned Offset) {
-  cerr << std::string(Offset*2, ' ') << getPassName() << "\n";
+  errs().indent(Offset*2) << getPassName() << "\n";
 }
 
 /// getPassName - Return a nice clean name for a pass.  This usually
@@ -62,13 +63,13 @@ const char *Pass::getPassName() const {
 // to print out the contents of an analysis.  Otherwise it is not necessary to
 // implement this method.
 //
-void Pass::print(std::ostream &O,const Module*) const {
+void Pass::print(raw_ostream &O,const Module*) const {
   O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
 }
 
 // dump - call print(cerr);
 void Pass::dump() const {
-  print(*cerr.stream(), 0);
+  print(errs(), 0);
 }
 
 //===----------------------------------------------------------------------===//
@@ -128,12 +129,13 @@ class PassRegistrar {
   /// pass.
   typedef std::map<intptr_t, const PassInfo*> MapType;
   MapType PassInfoMap;
+
+  typedef StringMap<const PassInfo*> StringMapType;
+  StringMapType PassInfoStringMap;
   
   /// AnalysisGroupInfo - Keep track of information for each analysis group.
   struct AnalysisGroupInfo {
-    const PassInfo *DefaultImpl;
     std::set<const PassInfo *> Implementations;
-    AnalysisGroupInfo() : DefaultImpl(0) {}
   };
   
   /// AnalysisGroupInfoMap - Information for each analysis group.
@@ -146,10 +148,16 @@ public:
     return I != PassInfoMap.end() ? I->second : 0;
   }
   
+  const PassInfo *GetPassInfo(const StringRef &Arg) const {
+    StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
+    return I != PassInfoStringMap.end() ? I->second : 0;
+  }
+  
   void RegisterPass(const PassInfo &PI) {
     bool Inserted =
       PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
     assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
+    PassInfoStringMap[PI.getPassArgument()] = &PI;
   }
   
   void UnregisterPass(const PassInfo &PI) {
@@ -158,6 +166,7 @@ public:
     
     // Remove pass from the map.
     PassInfoMap.erase(I);
+    PassInfoStringMap.erase(PI.getPassArgument());
   }
   
   void EnumerateWith(PassRegistrationListener *L) {
@@ -176,11 +185,10 @@ public:
            "Cannot add a pass to the same analysis group more than once!");
     AGI.Implementations.insert(ImplementationInfo);
     if (isDefault) {
-      assert(AGI.DefaultImpl == 0 && InterfaceInfo->getNormalCtor() == 0 &&
+      assert(InterfaceInfo->getNormalCtor() == 0 &&
              "Default implementation for analysis group already specified!");
       assert(ImplementationInfo->getNormalCtor() &&
            "Cannot specify pass as default if it does not have a default ctor");
-      AGI.DefaultImpl = ImplementationInfo;
       InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
     }
   }
@@ -229,11 +237,15 @@ const PassInfo *Pass::lookupPassInfo(intptr_t TI) {
   return getPassRegistrar()->GetPassInfo(TI);
 }
 
+const PassInfo *Pass::lookupPassInfo(const StringRef &Arg) {
+  return getPassRegistrar()->GetPassInfo(Arg);
+}
+
 void PassInfo::registerPass() {
   getPassRegistrar()->RegisterPass(*this);
 
   // Notify any listeners.
-  sys::SmartScopedLock<true> Lock(&ListenersLock);
+  sys::SmartScopedLock<true> Lock(ListenersLock);
   if (Listeners)
     for (std::vector<PassRegistrationListener*>::iterator
            I = Listeners->begin(), E = Listeners->end(); I != E; ++I)
@@ -252,10 +264,10 @@ void PassInfo::unregisterPass() {
 //
 RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
                                intptr_t PassID, bool isDefault)
-  : PassInfo(Name, InterfaceID),
-    ImplementationInfo(0), isDefaultImplementation(isDefault) {
+  : PassInfo(Name, InterfaceID) {
 
-  InterfaceInfo = const_cast<PassInfo*>(Pass::lookupPassInfo(InterfaceID));
+  PassInfo *InterfaceInfo =
+    const_cast<PassInfo*>(Pass::lookupPassInfo(InterfaceID));
   if (InterfaceInfo == 0) {
     // First reference to Interface, register it now.
     registerPass();
@@ -265,7 +277,7 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
          "Trying to join an analysis group that is a normal pass!");
 
   if (PassID) {
-    ImplementationInfo = Pass::lookupPassInfo(PassID);
+    const PassInfo *ImplementationInfo = Pass::lookupPassInfo(PassID);
     assert(ImplementationInfo &&
            "Must register pass before adding to AnalysisGroup!");
 
@@ -286,14 +298,14 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
 // PassRegistrationListener ctor - Add the current object to the list of
 // PassRegistrationListeners...
 PassRegistrationListener::PassRegistrationListener() {
-  sys::SmartScopedLock<true> Lock(&ListenersLock);
+  sys::SmartScopedLock<true> Lock(ListenersLock);
   if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>();
   Listeners->push_back(this);
 }
 
 // dtor - Remove object from list of listeners...
 PassRegistrationListener::~PassRegistrationListener() {
-  sys::SmartScopedLock<true> Lock(&ListenersLock);
+  sys::SmartScopedLock<true> Lock(ListenersLock);
   std::vector<PassRegistrationListener*>::iterator I =
     std::find(Listeners->begin(), Listeners->end(), this);
   assert(Listeners && I != Listeners->end() &&
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 46f1243e1211..f10bc6f5ef6f 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -13,16 +13,16 @@
 
 
 #include "llvm/PassManagers.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/System/Threading.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm-c/Core.h"
 #include <algorithm>
 #include <cstdio>
@@ -45,16 +45,6 @@ enum PassDebugLevel {
   None, Arguments, Structure, Executions, Details
 };
 
-// Always verify dominfo if expensive checking is enabled.
-#ifdef XDEBUG
-bool VerifyDomInfo = true;
-#else
-bool VerifyDomInfo = false;
-#endif
-static cl::opt<bool,true>
-VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
-               cl::desc("Verify dominator info (time consuming)"));
-
 static cl::opt<enum PassDebugLevel>
 PassDebugging("debug-pass", cl::Hidden,
                   cl::desc("Print PassManager debugging information"),
@@ -67,6 +57,15 @@ PassDebugging("debug-pass", cl::Hidden,
                              clEnumValEnd));
 } // End of llvm namespace
 
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+  return PassDebugging >= Executions;
+}
+
+
+
+
 void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
   if (V == 0 && M == 0)
     OS << "Releasing pass '";
@@ -134,7 +133,7 @@ public:
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) {
-    llvm::cerr << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+    llvm::errs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       BasicBlockPass *BP = getContainedPass(Index);
       BP->dumpPassStructure(Offset + 1);
@@ -274,7 +273,7 @@ public:
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) {
-    llvm::cerr << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+    llvm::errs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       ModulePass *MP = getContainedPass(Index);
       MP->dumpPassStructure(Offset + 1);
@@ -388,25 +387,19 @@ public:
   // null.  It may be called multiple times.
   static void createTheTimeInfo();
 
-  void passStarted(Pass *P) {
+  /// passStarted - This method creates a timer for the given pass if it doesn't
+  /// already have one, and starts the timer.
+  Timer *passStarted(Pass *P) {
     if (dynamic_cast<PMDataManager *>(P)) 
-      return;
+      return 0;
 
-    sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
+    sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
     std::map<Pass*, Timer>::iterator I = TimingData.find(P);
     if (I == TimingData.end())
       I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first;
-    I->second.startTimer();
-  }
-  
-  void passEnded(Pass *P) {
-    if (dynamic_cast<PMDataManager *>(P)) 
-      return;
-
-    sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
-    std::map<Pass*, Timer>::iterator I = TimingData.find(P);
-    assert(I != TimingData.end() && "passStarted/passEnded not nested right!");
-    I->second.stopTimer();
+    Timer *T = &I->second;
+    T->startTimer();
+    return T;
   }
 };
 
@@ -603,11 +596,11 @@ void PMTopLevelManager::dumpArguments() const {
   if (PassDebugging < Arguments)
     return;
 
-  cerr << "Pass Arguments: ";
+  errs() << "Pass Arguments: ";
   for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
          E = PassManagers.end(); I != E; ++I)
     (*I)->dumpPassArguments();
-  cerr << "\n";
+  errs() << "\n";
 }
 
 void PMTopLevelManager::initializeAllAnalysisInfo() {
@@ -700,47 +693,13 @@ void PMDataManager::verifyPreservedAnalysis(Pass *P) {
   for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
          E = PreservedSet.end(); I != E; ++I) {
     AnalysisID AID = *I;
-    if (Pass *AP = findAnalysisPass(AID, true))
-      AP->verifyAnalysis();
-  }
-}
-
-/// verifyDomInfo - Verify dominator information if it is available.
-void PMDataManager::verifyDomInfo(Pass &P, Function &F) {
-  if (!VerifyDomInfo || !P.getResolver())
-    return;
-
-  DominatorTree *DT = P.getAnalysisIfAvailable<DominatorTree>();
-  if (!DT)
-    return;
+    if (Pass *AP = findAnalysisPass(AID, true)) {
 
-  DominatorTree OtherDT;
-  OtherDT.getBase().recalculate(F);
-  if (DT->compare(OtherDT)) {
-    cerr << "Dominator Information for " << F.getNameStart() << "\n";
-    cerr << "Pass '" << P.getPassName() << "'\n";
-    cerr << "----- Valid -----\n";
-    OtherDT.dump();
-    cerr << "----- Invalid -----\n";
-    DT->dump();
-    assert(0 && "Invalid dominator info");
-  }
-
-  DominanceFrontier *DF = P.getAnalysisIfAvailable<DominanceFrontier>();
-  if (!DF) 
-    return;
-
-  DominanceFrontier OtherDF;
-  std::vector<BasicBlock*> DTRoots = DT->getRoots();
-  OtherDF.calculate(*DT, DT->getNode(DTRoots[0]));
-  if (DF->compare(OtherDF)) {
-    cerr << "Dominator Information for " << F.getNameStart() << "\n";
-    cerr << "Pass '" << P.getPassName() << "'\n";
-    cerr << "----- Valid -----\n";
-    OtherDF.dump();
-    cerr << "----- Invalid -----\n";
-    DF->dump();
-    assert(0 && "Invalid dominator info");
+      Timer *T = 0;
+      if (TheTimeInfo) T = TheTimeInfo->passStarted(AP);
+      AP->verifyAnalysis();
+      if (T) T->stopTimer();
+    }
   }
 }
 
@@ -760,8 +719,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
       // Remove this analysis
       if (PassDebugging >= Details) {
         Pass *S = Info->second;
-        cerr << " -- '" <<  P->getPassName() << "' is not preserving '";
-        cerr << S->getPassName() << "'\n";
+        errs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+        errs() << S->getPassName() << "'\n";
       }
       AvailableAnalysis.erase(Info);
     }
@@ -788,7 +747,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
 }
 
 /// Remove analysis passes that are not used any longer
-void PMDataManager::removeDeadPasses(Pass *P, const char *Msg,
+void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg,
                                      enum PassDebuggingString DBG_STR) {
 
   SmallVector<Pass *, 12> DeadPasses;
@@ -800,40 +759,41 @@ void PMDataManager::removeDeadPasses(Pass *P, const char *Msg,
   TPM->collectLastUses(DeadPasses, P);
 
   if (PassDebugging >= Details && !DeadPasses.empty()) {
-    cerr << " -*- '" <<  P->getPassName();
-    cerr << "' is the last user of following pass instances.";
-    cerr << " Free these instances\n";
+    errs() << " -*- '" <<  P->getPassName();
+    errs() << "' is the last user of following pass instances.";
+    errs() << " Free these instances\n";
   }
 
   for (SmallVector<Pass *, 12>::iterator I = DeadPasses.begin(),
-         E = DeadPasses.end(); I != E; ++I) {
+         E = DeadPasses.end(); I != E; ++I)
+    freePass(*I, Msg, DBG_STR);
+}
 
-    dumpPassInfo(*I, FREEING_MSG, DBG_STR, Msg);
+void PMDataManager::freePass(Pass *P, const StringRef &Msg,
+                             enum PassDebuggingString DBG_STR) {
+  dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
 
-    {
-      // If the pass crashes releasing memory, remember this.
-      PassManagerPrettyStackEntry X(*I);
-      
-      if (TheTimeInfo) TheTimeInfo->passStarted(*I);
-      (*I)->releaseMemory();
-      if (TheTimeInfo) TheTimeInfo->passEnded(*I);
-    }
-    if (const PassInfo *PI = (*I)->getPassInfo()) {
-      std::map<AnalysisID, Pass*>::iterator Pos =
-        AvailableAnalysis.find(PI);
+  {
+    // If the pass crashes releasing memory, remember this.
+    PassManagerPrettyStackEntry X(P);
+    
+    Timer *T = StartPassTimer(P);
+    P->releaseMemory();
+    StopPassTimer(P, T);
+  }
 
-      // It is possible that pass is already removed from the AvailableAnalysis
-      if (Pos != AvailableAnalysis.end())
-        AvailableAnalysis.erase(Pos);
+  if (const PassInfo *PI = P->getPassInfo()) {
+    // Remove the pass itself (if it is not already removed).
+    AvailableAnalysis.erase(PI);
 
-      // Remove all interfaces this pass implements, for which it is also
-      // listed as the available implementation.
-      const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
-      for (unsigned i = 0, e = II.size(); i != e; ++i) {
-        Pos = AvailableAnalysis.find(II[i]);
-        if (Pos != AvailableAnalysis.end() && Pos->second == *I)
-          AvailableAnalysis.erase(Pos);
-      }
+    // Remove all interfaces this pass implements, for which it is also
+    // listed as the available implementation.
+    const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+    for (unsigned i = 0, e = II.size(); i != e; ++i) {
+      std::map<AnalysisID, Pass*>::iterator Pos =
+        AvailableAnalysis.find(II[i]);
+      if (Pos != AvailableAnalysis.end() && Pos->second == P)
+        AvailableAnalysis.erase(Pos);
     }
   }
 }
@@ -882,7 +842,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
       // Keep track of higher level analysis used by this manager.
       HigherLevelAnalysis.push_back(PRequired);
     } else 
-      assert(0 && "Unable to accomodate Required Pass");
+      llvm_unreachable("Unable to accomodate Required Pass");
   }
 
   // Set P as P's last user until someone starts using P.
@@ -994,7 +954,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
   
   for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
          E = LUses.end(); I != E; ++I) {
-    llvm::cerr << "--" << std::string(Offset*2, ' ');
+    llvm::errs() << "--" << std::string(Offset*2, ' ');
     (*I)->dumpPassStructure(0);
   }
 }
@@ -1007,44 +967,44 @@ void PMDataManager::dumpPassArguments() const {
     else
       if (const PassInfo *PI = (*I)->getPassInfo())
         if (!PI->isAnalysisGroup())
-          cerr << " -" << PI->getPassArgument();
+          errs() << " -" << PI->getPassArgument();
   }
 }
 
 void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
                                  enum PassDebuggingString S2,
-                                 const char *Msg) {
+                                 const StringRef &Msg) {
   if (PassDebugging < Executions)
     return;
-  cerr << (void*)this << std::string(getDepth()*2+1, ' ');
+  errs() << (void*)this << std::string(getDepth()*2+1, ' ');
   switch (S1) {
   case EXECUTION_MSG:
-    cerr << "Executing Pass '" << P->getPassName();
+    errs() << "Executing Pass '" << P->getPassName();
     break;
   case MODIFICATION_MSG:
-    cerr << "Made Modification '" << P->getPassName();
+    errs() << "Made Modification '" << P->getPassName();
     break;
   case FREEING_MSG:
-    cerr << " Freeing Pass '" << P->getPassName();
+    errs() << " Freeing Pass '" << P->getPassName();
     break;
   default:
     break;
   }
   switch (S2) {
   case ON_BASICBLOCK_MSG:
-    cerr << "' on BasicBlock '" << Msg << "'...\n";
+    errs() << "' on BasicBlock '" << Msg << "'...\n";
     break;
   case ON_FUNCTION_MSG:
-    cerr << "' on Function '" << Msg << "'...\n";
+    errs() << "' on Function '" << Msg << "'...\n";
     break;
   case ON_MODULE_MSG:
-    cerr << "' on Module '"  << Msg << "'...\n";
+    errs() << "' on Module '"  << Msg << "'...\n";
     break;
   case ON_LOOP_MSG:
-    cerr << "' on Loop " << Msg << "'...\n";
+    errs() << "' on Loop '" << Msg << "'...\n";
     break;
   case ON_CG_MSG:
-    cerr << "' on Call Graph " << Msg << "'...\n";
+    errs() << "' on Call Graph Nodes '" << Msg << "'...\n";
     break;
   default:
     break;
@@ -1069,17 +1029,17 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const {
   dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
 }
 
-void PMDataManager::dumpAnalysisUsage(const char *Msg, const Pass *P,
+void PMDataManager::dumpAnalysisUsage(const StringRef &Msg, const Pass *P,
                                    const AnalysisUsage::VectorType &Set) const {
   assert(PassDebugging >= Details);
   if (Set.empty())
     return;
-  cerr << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+  errs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
   for (unsigned i = 0; i != Set.size(); ++i) {
-    if (i) cerr << ",";
-    cerr << " " << Set[i]->getPassName();
+    if (i) errs() << ',';
+    errs() << ' ' << Set[i]->getPassName();
   }
-  cerr << "\n";
+  errs() << '\n';
 }
 
 /// Add RequiredPass into list of lower level passes required by pass P.
@@ -1102,10 +1062,10 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
   // checks whether any lower level manager will be able to provide this 
   // analysis info on demand or not.
 #ifndef NDEBUG
-  cerr << "Unable to schedule '" << RequiredPass->getPassName();
-  cerr << "' required by '" << P->getPassName() << "'\n";
+  errs() << "Unable to schedule '" << RequiredPass->getPassName();
+  errs() << "' required by '" << P->getPassName() << "'\n";
 #endif
-  assert(0 && "Unable to schedule pass");
+  llvm_unreachable("Unable to schedule pass");
 }
 
 // Destructor
@@ -1143,7 +1103,7 @@ bool BBPassManager::runOnFunction(Function &F) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       BasicBlockPass *BP = getContainedPass(Index);
 
-      dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getNameStart());
+      dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
       dumpRequiredSet(BP);
 
       initializeAnalysisImpl(BP);
@@ -1152,20 +1112,20 @@ bool BBPassManager::runOnFunction(Function &F) {
         // If the pass crashes, remember this.
         PassManagerPrettyStackEntry X(BP, *I);
       
-        if (TheTimeInfo) TheTimeInfo->passStarted(BP);
+        Timer *T = StartPassTimer(BP);
         Changed |= BP->runOnBasicBlock(*I);
-        if (TheTimeInfo) TheTimeInfo->passEnded(BP);
+        StopPassTimer(BP, T);
       }
 
       if (Changed) 
         dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
-                     I->getNameStart());
+                     I->getName());
       dumpPreservedSet(BP);
 
       verifyPreservedAnalysis(BP);
       removeNotPreservedAnalysis(BP);
       recordAvailableAnalysis(BP);
-      removeDeadPasses(BP, I->getNameStart(), ON_BASICBLOCK_MSG);
+      removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
     }
 
   return Changed |= doFinalization(F);
@@ -1248,8 +1208,7 @@ void FunctionPassManager::add(Pass *P) {
 bool FunctionPassManager::run(Function &F) {
   std::string errstr;
   if (MP->materializeFunction(&F, &errstr)) {
-    cerr << "Error reading bitcode file: " << errstr << "\n";
-    abort();
+    llvm_report_error("Error reading bitcode file: " + errstr);
   }
   return FPM->run(F);
 }
@@ -1336,7 +1295,7 @@ bool FunctionPassManagerImpl::run(Function &F) {
 char FPPassManager::ID = 0;
 /// Print passes managed by this manager
 void FPPassManager::dumpPassStructure(unsigned Offset) {
-  llvm::cerr << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+  llvm::errs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
     FP->dumpPassStructure(Offset + 1);
@@ -1360,7 +1319,7 @@ bool FPPassManager::runOnFunction(Function &F) {
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
 
-    dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getNameStart());
+    dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpRequiredSet(FP);
 
     initializeAnalysisImpl(FP);
@@ -1368,22 +1327,19 @@ bool FPPassManager::runOnFunction(Function &F) {
     {
       PassManagerPrettyStackEntry X(FP, F);
 
-      if (TheTimeInfo) TheTimeInfo->passStarted(FP);
+      Timer *T = StartPassTimer(FP);
       Changed |= FP->runOnFunction(F);
-      if (TheTimeInfo) TheTimeInfo->passEnded(FP);
+      StopPassTimer(FP, T);
     }
 
     if (Changed) 
-      dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getNameStart());
+      dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpPreservedSet(FP);
 
     verifyPreservedAnalysis(FP);
     removeNotPreservedAnalysis(FP);
     recordAvailableAnalysis(FP);
-    removeDeadPasses(FP, F.getNameStart(), ON_FUNCTION_MSG);
-
-    // If dominator information is available then verify the info if requested.
-    verifyDomInfo(*FP, F);
+    removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
   }
   return Changed;
 }
@@ -1444,9 +1400,9 @@ MPPassManager::runOnModule(Module &M) {
 
     {
       PassManagerPrettyStackEntry X(MP, M);
-      if (TheTimeInfo) TheTimeInfo->passStarted(MP);
+      Timer *T = StartPassTimer(MP);
       Changed |= MP->runOnModule(M);
-      if (TheTimeInfo) TheTimeInfo->passEnded(MP);
+      StopPassTimer(MP, T);
     }
 
     if (Changed) 
@@ -1582,15 +1538,15 @@ void TimingInfo::createTheTimeInfo() {
 }
 
 /// If TimingInfo is enabled then start pass timer.
-void StartPassTimer(Pass *P) {
+Timer *llvm::StartPassTimer(Pass *P) {
   if (TheTimeInfo) 
-    TheTimeInfo->passStarted(P);
+    return TheTimeInfo->passStarted(P);
+  return 0;
 }
 
 /// If TimingInfo is enabled then stop pass timer.
-void StopPassTimer(Pass *P) {
-  if (TheTimeInfo) 
-    TheTimeInfo->passEnded(P);
+void llvm::StopPassTimer(Pass *P, Timer *T) {
+  if (T) T->stopTimer();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 40d751704917..7afbc682d15d 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -11,15 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -38,26 +42,14 @@ using namespace llvm;
 
 AbstractTypeUser::~AbstractTypeUser() {}
 
+void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
+  V->VTy = NewTy;
+}
 
 //===----------------------------------------------------------------------===//
 //                         Type Class Implementation
 //===----------------------------------------------------------------------===//
 
-// Lock used for guarding access to the type maps.
-static ManagedStatic<sys::SmartMutex<true> > TypeMapLock;
-
-// Recursive lock used for guarding access to AbstractTypeUsers.
-// NOTE: The true template parameter means this will no-op when we're not in
-// multithreaded mode.
-static ManagedStatic<sys::SmartMutex<true> > AbstractTypeUsersLock;
-
-// Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
-// for types as they are needed.  Because resolution of types must invalidate
-// all of the abstract type descriptions, we keep them in a seperate map to make
-// this easy.
-static ManagedStatic<TypePrinting> ConcreteTypeDescriptions;
-static ManagedStatic<TypePrinting> AbstractTypeDescriptions;
-
 /// Because of the way Type subclasses are allocated, this function is necessary
 /// to use the correct kind of "delete" operator to deallocate the Type object.
 /// Some type objects (FunctionTy, StructTy) allocate additional space after 
@@ -99,26 +91,26 @@ void Type::destroy() const {
   delete this; 
 }
 
-const Type *Type::getPrimitiveType(TypeID IDNumber) {
+const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   switch (IDNumber) {
-  case VoidTyID      : return VoidTy;
-  case FloatTyID     : return FloatTy;
-  case DoubleTyID    : return DoubleTy;
-  case X86_FP80TyID  : return X86_FP80Ty;
-  case FP128TyID     : return FP128Ty;
-  case PPC_FP128TyID : return PPC_FP128Ty;
-  case LabelTyID     : return LabelTy;
-  case MetadataTyID  : return MetadataTy;
+  case VoidTyID      : return getVoidTy(C);
+  case FloatTyID     : return getFloatTy(C);
+  case DoubleTyID    : return getDoubleTy(C);
+  case X86_FP80TyID  : return getX86_FP80Ty(C);
+  case FP128TyID     : return getFP128Ty(C);
+  case PPC_FP128TyID : return getPPC_FP128Ty(C);
+  case LabelTyID     : return getLabelTy(C);
+  case MetadataTyID  : return getMetadataTy(C);
   default:
     return 0;
   }
 }
 
-const Type *Type::getVAArgsPromotedType() const {
+const Type *Type::getVAArgsPromotedType(LLVMContext &C) const {
   if (ID == IntegerTyID && getSubclassData() < 32)
-    return Type::Int32Ty;
+    return Type::getInt32Ty(C);
   else if (ID == FloatTyID)
-    return Type::DoubleTy;
+    return Type::getDoubleTy(C);
   else
     return this;
 }
@@ -264,16 +256,19 @@ const Type *Type::getForwardedTypeInternal() const {
 }
 
 void Type::refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-  abort();
+  llvm_unreachable("Attempting to refine a derived type!");
 }
 void Type::typeBecameConcrete(const DerivedType *AbsTy) {
-  abort();
+  llvm_unreachable("DerivedType is already a concrete type!");
 }
 
 
 std::string Type::getDescription() const {
+  LLVMContextImpl *pImpl = getContext().pImpl;
   TypePrinting &Map =
-    isAbstract() ? *AbstractTypeDescriptions : *ConcreteTypeDescriptions;
+    isAbstract() ?
+      pImpl->AbstractTypeDescriptions :
+      pImpl->ConcreteTypeDescriptions;
   
   std::string DescStr;
   raw_string_ostream DescOS(DescStr);
@@ -284,7 +279,7 @@ std::string Type::getDescription() const {
 
 bool StructType::indexValid(const Value *V) const {
   // Structure indexes require 32-bit integer constants.
-  if (V->getType() == Type::Int32Ty)
+  if (V->getType() == Type::getInt32Ty(V->getContext()))
     if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
       return indexValid(CU->getZExtValue());
   return false;
@@ -311,25 +306,97 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const {
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
 
-const Type *Type::VoidTy       = new Type(Type::VoidTyID);
-const Type *Type::FloatTy      = new Type(Type::FloatTyID);
-const Type *Type::DoubleTy     = new Type(Type::DoubleTyID);
-const Type *Type::X86_FP80Ty   = new Type(Type::X86_FP80TyID);
-const Type *Type::FP128Ty      = new Type(Type::FP128TyID);
-const Type *Type::PPC_FP128Ty  = new Type(Type::PPC_FP128TyID);
-const Type *Type::LabelTy      = new Type(Type::LabelTyID);
-const Type *Type::MetadataTy   = new Type(Type::MetadataTyID);
+const Type *Type::getVoidTy(LLVMContext &C) {
+  return &C.pImpl->VoidTy;
+}
 
-namespace {
-  struct BuiltinIntegerType : public IntegerType {
-    explicit BuiltinIntegerType(unsigned W) : IntegerType(W) {}
-  };
+const Type *Type::getLabelTy(LLVMContext &C) {
+  return &C.pImpl->LabelTy;
+}
+
+const Type *Type::getFloatTy(LLVMContext &C) {
+  return &C.pImpl->FloatTy;
+}
+
+const Type *Type::getDoubleTy(LLVMContext &C) {
+  return &C.pImpl->DoubleTy;
+}
+
+const Type *Type::getMetadataTy(LLVMContext &C) {
+  return &C.pImpl->MetadataTy;
+}
+
+const Type *Type::getX86_FP80Ty(LLVMContext &C) {
+  return &C.pImpl->X86_FP80Ty;
+}
+
+const Type *Type::getFP128Ty(LLVMContext &C) {
+  return &C.pImpl->FP128Ty;
+}
+
+const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
+  return &C.pImpl->PPC_FP128Ty;
+}
+
+const IntegerType *Type::getInt1Ty(LLVMContext &C) {
+  return &C.pImpl->Int1Ty;
+}
+
+const IntegerType *Type::getInt8Ty(LLVMContext &C) {
+  return &C.pImpl->Int8Ty;
+}
+
+const IntegerType *Type::getInt16Ty(LLVMContext &C) {
+  return &C.pImpl->Int16Ty;
+}
+
+const IntegerType *Type::getInt32Ty(LLVMContext &C) {
+  return &C.pImpl->Int32Ty;
+}
+
+const IntegerType *Type::getInt64Ty(LLVMContext &C) {
+  return &C.pImpl->Int64Ty;
+}
+
+const PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+  return getFloatTy(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+  return getDoubleTy(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_FP80Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getFP128Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getPPC_FP128Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt1Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt8Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt16Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt32Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt64Ty(C)->getPointerTo(AS);
 }
-const IntegerType *Type::Int1Ty  = new BuiltinIntegerType(1);
-const IntegerType *Type::Int8Ty  = new BuiltinIntegerType(8);
-const IntegerType *Type::Int16Ty = new BuiltinIntegerType(16);
-const IntegerType *Type::Int32Ty = new BuiltinIntegerType(32);
-const IntegerType *Type::Int64Ty = new BuiltinIntegerType(64);
 
 //===----------------------------------------------------------------------===//
 //                          Derived Type Constructors
@@ -338,42 +405,20 @@ const IntegerType *Type::Int64Ty = new BuiltinIntegerType(64);
 /// isValidReturnType - Return true if the specified type is valid as a return
 /// type.
 bool FunctionType::isValidReturnType(const Type *RetTy) {
-  if (RetTy->isFirstClassType()) {
-    if (const PointerType *PTy = dyn_cast<PointerType>(RetTy))
-      return PTy->getElementType() != Type::MetadataTy;
-    return true;
-  }
-  if (RetTy == Type::VoidTy || RetTy == Type::MetadataTy ||
-      isa<OpaqueType>(RetTy))
-    return true;
-  
-  // If this is a multiple return case, verify that each return is a first class
-  // value and that there is at least one value.
-  const StructType *SRetTy = dyn_cast<StructType>(RetTy);
-  if (SRetTy == 0 || SRetTy->getNumElements() == 0)
-    return false;
-  
-  for (unsigned i = 0, e = SRetTy->getNumElements(); i != e; ++i)
-    if (!SRetTy->getElementType(i)->isFirstClassType())
-      return false;
-  return true;
+  return RetTy->getTypeID() != LabelTyID &&
+         RetTy->getTypeID() != MetadataTyID;
 }
 
 /// isValidArgumentType - Return true if the specified type is valid as an
 /// argument type.
 bool FunctionType::isValidArgumentType(const Type *ArgTy) {
-  if ((!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy)) ||
-      (isa<PointerType>(ArgTy) &&
-       cast<PointerType>(ArgTy)->getElementType() == Type::MetadataTy))
-    return false;
-
-  return true;
+  return ArgTy->isFirstClassType() || isa<OpaqueType>(ArgTy);
 }
 
 FunctionType::FunctionType(const Type *Result,
                            const std::vector<const Type*> &Params,
                            bool IsVarArgs)
-  : DerivedType(FunctionTyID), isVarArgs(IsVarArgs) {
+  : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) {
   ContainedTys = reinterpret_cast<PATypeHandle*>(this+1);
   NumContainedTys = Params.size() + 1; // + 1 for result type
   assert(isValidReturnType(Result) && "invalid return type for function");
@@ -393,8 +438,9 @@ FunctionType::FunctionType(const Type *Result,
   setAbstract(isAbstract);
 }
 
-StructType::StructType(const std::vector<const Type*> &Types, bool isPacked)
-  : CompositeType(StructTyID) {
+StructType::StructType(LLVMContext &C, 
+                       const std::vector<const Type*> &Types, bool isPacked)
+  : CompositeType(C, StructTyID) {
   ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
   NumContainedTys = Types.size();
   setSubclassData(isPacked);
@@ -437,10 +483,10 @@ PointerType::PointerType(const Type *E, unsigned AddrSpace)
   setAbstract(E->isAbstract());
 }
 
-OpaqueType::OpaqueType() : DerivedType(OpaqueTyID) {
+OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) {
   setAbstract(true);
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << *this << "\n";
+  DEBUG(errs() << "Derived new type: " << *this << "\n");
 #endif
 }
 
@@ -464,8 +510,8 @@ void DerivedType::dropAllTypeUses() {
         llvm_acquire_global_lock();
         tmp = AlwaysOpaqueTy;
         if (!tmp) {
-          tmp = OpaqueType::get();
-          PATypeHolder* tmp2 = new PATypeHolder(AlwaysOpaqueTy);
+          tmp = OpaqueType::get(getContext());
+          PATypeHolder* tmp2 = new PATypeHolder(tmp);
           sys::MemoryFence();
           AlwaysOpaqueTy = tmp;
           Holder = tmp2;
@@ -473,8 +519,8 @@ void DerivedType::dropAllTypeUses() {
       
         llvm_release_global_lock();
       }
-    } else {
-      AlwaysOpaqueTy = OpaqueType::get();
+    } else if (!AlwaysOpaqueTy) {
+      AlwaysOpaqueTy = OpaqueType::get(getContext());
       Holder = new PATypeHolder(AlwaysOpaqueTy);
     } 
         
@@ -482,9 +528,11 @@ void DerivedType::dropAllTypeUses() {
 
     // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
     // pick so long as it doesn't point back to this type.  We choose something
-    // concrete to avoid overhead for adding to AbstracTypeUser lists and stuff.
+    // concrete to avoid overhead for adding to AbstractTypeUser lists and
+    // stuff.
+    const Type *ConcreteTy = Type::getInt32Ty(getContext());
     for (unsigned i = 1, e = NumContainedTys; i != e; ++i)
-      ContainedTys[i] = Type::Int32Ty;
+      ContainedTys[i] = ConcreteTy;
   }
 }
 
@@ -633,7 +681,7 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
     }
     return true;
   } else {
-    assert(0 && "Unknown derived type!");
+    llvm_unreachable("Unknown derived type!");
     return false;
   }
 }
@@ -695,327 +743,41 @@ static bool TypeHasCycleThroughItself(const Type *Ty) {
   return false;
 }
 
-/// getSubElementHash - Generate a hash value for all of the SubType's of this
-/// type.  The hash value is guaranteed to be zero if any of the subtypes are 
-/// an opaque type.  Otherwise we try to mix them in as well as possible, but do
-/// not look at the subtype's subtype's.
-static unsigned getSubElementHash(const Type *Ty) {
-  unsigned HashVal = 0;
-  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-       I != E; ++I) {
-    HashVal *= 32;
-    const Type *SubTy = I->get();
-    HashVal += SubTy->getTypeID();
-    switch (SubTy->getTypeID()) {
-    default: break;
-    case Type::OpaqueTyID: return 0;    // Opaque -> hash = 0 no matter what.
-    case Type::IntegerTyID:
-      HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3);
-      break;
-    case Type::FunctionTyID:
-      HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 + 
-                 cast<FunctionType>(SubTy)->isVarArg();
-      break;
-    case Type::ArrayTyID:
-      HashVal ^= cast<ArrayType>(SubTy)->getNumElements();
-      break;
-    case Type::VectorTyID:
-      HashVal ^= cast<VectorType>(SubTy)->getNumElements();
-      break;
-    case Type::StructTyID:
-      HashVal ^= cast<StructType>(SubTy)->getNumElements();
-      break;
-    case Type::PointerTyID:
-      HashVal ^= cast<PointerType>(SubTy)->getAddressSpace();
-      break;
-    }
-  }
-  return HashVal ? HashVal : 1;  // Do not return zero unless opaque subty.
-}
-
-//===----------------------------------------------------------------------===//
-//                       Derived Type Factory Functions
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-class TypeMapBase {
-protected:
-  /// TypesByHash - Keep track of types by their structure hash value.  Note
-  /// that we only keep track of types that have cycles through themselves in
-  /// this map.
-  ///
-  std::multimap<unsigned, PATypeHolder> TypesByHash;
-
-public:
-  ~TypeMapBase() {
-    // PATypeHolder won't destroy non-abstract types.
-    // We can't destroy them by simply iterating, because
-    // they may contain references to each-other.
-#if 0
-    for (std::multimap<unsigned, PATypeHolder>::iterator I
-         = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) {
-      Type *Ty = const_cast<Type*>(I->second.Ty);
-      I->second.destroy();
-      // We can't invoke destroy or delete, because the type may
-      // contain references to already freed types.
-      // So we have to destruct the object the ugly way.
-      if (Ty) {
-        Ty->AbstractTypeUsers.clear();
-        static_cast<const Type*>(Ty)->Type::~Type();
-        operator delete(Ty);
-      }
-    }
-#endif
-  }
-
-  void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
-    std::multimap<unsigned, PATypeHolder>::iterator I =
-      TypesByHash.lower_bound(Hash);
-    for (; I != TypesByHash.end() && I->first == Hash; ++I) {
-      if (I->second == Ty) {
-        TypesByHash.erase(I);
-        return;
-      }
-    }
-    
-    // This must be do to an opaque type that was resolved.  Switch down to hash
-    // code of zero.
-    assert(Hash && "Didn't find type entry!");
-    RemoveFromTypesByHash(0, Ty);
-  }
-  
-  /// TypeBecameConcrete - When Ty gets a notification that TheType just became
-  /// concrete, drop uses and make Ty non-abstract if we should.
-  void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) {
-    // If the element just became concrete, remove 'ty' from the abstract
-    // type user list for the type.  Do this for as many times as Ty uses
-    // OldType.
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (I->get() == TheType)
-        TheType->removeAbstractTypeUser(Ty);
-    
-    // If the type is currently thought to be abstract, rescan all of our
-    // subtypes to see if the type has just become concrete!  Note that this
-    // may send out notifications to AbstractTypeUsers that types become
-    // concrete.
-    if (Ty->isAbstract())
-      Ty->PromoteAbstractToConcrete();
-  }
-};
-}
-
-
-// TypeMap - Make sure that only one instance of a particular type may be
-// created on any given run of the compiler... note that this involves updating
-// our map if an abstract type gets refined somehow.
-//
-namespace llvm {
-template<class ValType, class TypeClass>
-class TypeMap : public TypeMapBase {
-  std::map<ValType, PATypeHolder> Map;
-public:
-  typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
-  ~TypeMap() { print("ON EXIT"); }
-
-  inline TypeClass *get(const ValType &V) {
-    iterator I = Map.find(V);
-    return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0;
-  }
-
-  inline void add(const ValType &V, TypeClass *Ty) {
-    Map.insert(std::make_pair(V, Ty));
-
-    // If this type has a cycle, remember it.
-    TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty));
-    print("add");
-  }
-  
-  /// RefineAbstractType - This method is called after we have merged a type
-  /// with another one.  We must now either merge the type away with
-  /// some other type or reinstall it in the map with it's new configuration.
-  void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
-                        const Type *NewType) {
-#ifdef DEBUG_MERGE_TYPES
-    DOUT << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
-         << "], " << (void*)NewType << " [" << *NewType << "])\n";
-#endif
-    
-    // Otherwise, we are changing one subelement type into another.  Clearly the
-    // OldType must have been abstract, making us abstract.
-    assert(Ty->isAbstract() && "Refining a non-abstract type!");
-    assert(OldType != NewType);
-
-    // Make a temporary type holder for the type so that it doesn't disappear on
-    // us when we erase the entry from the map.
-    PATypeHolder TyHolder = Ty;
-
-    // The old record is now out-of-date, because one of the children has been
-    // updated.  Remove the obsolete entry from the map.
-    unsigned NumErased = Map.erase(ValType::get(Ty));
-    assert(NumErased && "Element not found!"); NumErased = NumErased;
-
-    // Remember the structural hash for the type before we start hacking on it,
-    // in case we need it later.
-    unsigned OldTypeHash = ValType::hashTypeStructure(Ty);
-
-    // Find the type element we are refining... and change it now!
-    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i)
-      if (Ty->ContainedTys[i] == OldType)
-        Ty->ContainedTys[i] = NewType;
-    unsigned NewTypeHash = ValType::hashTypeStructure(Ty);
-    
-    // If there are no cycles going through this node, we can do a simple,
-    // efficient lookup in the map, instead of an inefficient nasty linear
-    // lookup.
-    if (!TypeHasCycleThroughItself(Ty)) {
-      typename std::map<ValType, PATypeHolder>::iterator I;
-      bool Inserted;
-
-      tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty));
-      if (!Inserted) {
-        // Refined to a different type altogether?
-        RemoveFromTypesByHash(OldTypeHash, Ty);
-
-        // We already have this type in the table.  Get rid of the newly refined
-        // type.
-        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-        Ty->unlockedRefineAbstractTypeTo(NewTy);
-        return;
-      }
-    } else {
-      // Now we check to see if there is an existing entry in the table which is
-      // structurally identical to the newly refined type.  If so, this type
-      // gets refined to the pre-existing type.
-      //
-      std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry;
-      tie(I, E) = TypesByHash.equal_range(NewTypeHash);
-      Entry = E;
-      for (; I != E; ++I) {
-        if (I->second == Ty) {
-          // Remember the position of the old type if we see it in our scan.
-          Entry = I;
-        } else {
-          if (TypesEqual(Ty, I->second)) {
-            TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-
-            // Remove the old entry form TypesByHash.  If the hash values differ
-            // now, remove it from the old place.  Otherwise, continue scanning
-            // withing this hashcode to reduce work.
-            if (NewTypeHash != OldTypeHash) {
-              RemoveFromTypesByHash(OldTypeHash, Ty);
-            } else {
-              if (Entry == E) {
-                // Find the location of Ty in the TypesByHash structure if we
-                // haven't seen it already.
-                while (I->second != Ty) {
-                  ++I;
-                  assert(I != E && "Structure doesn't contain type??");
-                }
-                Entry = I;
-              }
-              TypesByHash.erase(Entry);
-            }
-            Ty->unlockedRefineAbstractTypeTo(NewTy);
-            return;
-          }
-        }
-      }
-
-      // If there is no existing type of the same structure, we reinsert an
-      // updated record into the map.
-      Map.insert(std::make_pair(ValType::get(Ty), Ty));
-    }
-
-    // If the hash codes differ, update TypesByHash
-    if (NewTypeHash != OldTypeHash) {
-      RemoveFromTypesByHash(OldTypeHash, Ty);
-      TypesByHash.insert(std::make_pair(NewTypeHash, Ty));
-    }
-    
-    // If the type is currently thought to be abstract, rescan all of our
-    // subtypes to see if the type has just become concrete!  Note that this
-    // may send out notifications to AbstractTypeUsers that types become
-    // concrete.
-    if (Ty->isAbstract())
-      Ty->PromoteAbstractToConcrete();
-  }
-
-  void print(const char *Arg) const {
-#ifdef DEBUG_MERGE_TYPES
-    DOUT << "TypeMap<>::" << Arg << " table contents:\n";
-    unsigned i = 0;
-    for (typename std::map<ValType, PATypeHolder>::const_iterator I
-           = Map.begin(), E = Map.end(); I != E; ++I)
-      DOUT << " " << (++i) << ". " << (void*)I->second.get() << " "
-           << *I->second.get() << "\n";
-#endif
-  }
-
-  void dump() const { print("dump output"); }
-};
-}
-
-
 //===----------------------------------------------------------------------===//
 // Function Type Factory and Value Class...
 //
-
-//===----------------------------------------------------------------------===//
-// Integer Type Factory...
-//
-namespace llvm {
-class IntegerValType {
-  uint32_t bits;
-public:
-  IntegerValType(uint16_t numbits) : bits(numbits) {}
-
-  static IntegerValType get(const IntegerType *Ty) {
-    return IntegerValType(Ty->getBitWidth());
-  }
-
-  static unsigned hashTypeStructure(const IntegerType *Ty) {
-    return (unsigned)Ty->getBitWidth();
-  }
-
-  inline bool operator<(const IntegerValType &IVT) const {
-    return bits < IVT.bits;
-  }
-};
-}
-
-static ManagedStatic<TypeMap<IntegerValType, IntegerType> > IntegerTypes;
-
-const IntegerType *IntegerType::get(unsigned NumBits) {
+const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
   assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
   assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
 
   // Check for the built-in integer types
   switch (NumBits) {
-    case  1: return cast<IntegerType>(Type::Int1Ty);
-    case  8: return cast<IntegerType>(Type::Int8Ty);
-    case 16: return cast<IntegerType>(Type::Int16Ty);
-    case 32: return cast<IntegerType>(Type::Int32Ty);
-    case 64: return cast<IntegerType>(Type::Int64Ty);
+    case  1: return cast<IntegerType>(Type::getInt1Ty(C));
+    case  8: return cast<IntegerType>(Type::getInt8Ty(C));
+    case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+    case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+    case 64: return cast<IntegerType>(Type::getInt64Ty(C));
     default: 
       break;
   }
+
+  LLVMContextImpl *pImpl = C.pImpl;
   
   IntegerValType IVT(NumBits);
   IntegerType *ITy = 0;
   
   // First, see if the type is already in the table, for which
   // a reader lock suffices.
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
-  ITy = IntegerTypes->get(IVT);
+  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
+  ITy = pImpl->IntegerTypes.get(IVT);
     
   if (!ITy) {
     // Value not found.  Derive a new type!
-    ITy = new IntegerType(NumBits);
-    IntegerTypes->add(IVT, ITy);
+    ITy = new IntegerType(C, NumBits);
+    pImpl->IntegerTypes.add(IVT, ITy);
   }
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << *ITy << "\n";
+  DEBUG(errs() << "Derived new type: " << *ITy << "\n");
 #endif
   return ITy;
 }
@@ -1029,39 +791,6 @@ APInt IntegerType::getMask() const {
   return APInt::getAllOnesValue(getBitWidth());
 }
 
-// FunctionValType - Define a class to hold the key that goes into the TypeMap
-//
-namespace llvm {
-class FunctionValType {
-  const Type *RetTy;
-  std::vector<const Type*> ArgTypes;
-  bool isVarArg;
-public:
-  FunctionValType(const Type *ret, const std::vector<const Type*> &args,
-                  bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {}
-
-  static FunctionValType get(const FunctionType *FT);
-
-  static unsigned hashTypeStructure(const FunctionType *FT) {
-    unsigned Result = FT->getNumParams()*2 + FT->isVarArg();
-    return Result;
-  }
-
-  inline bool operator<(const FunctionValType &MTV) const {
-    if (RetTy < MTV.RetTy) return true;
-    if (RetTy > MTV.RetTy) return false;
-    if (isVarArg < MTV.isVarArg) return true;
-    if (isVarArg > MTV.isVarArg) return false;
-    if (ArgTypes < MTV.ArgTypes) return true;
-    if (ArgTypes > MTV.ArgTypes) return false;
-    return false;
-  }
-};
-}
-
-// Define the actual map itself now...
-static ManagedStatic<TypeMap<FunctionValType, FunctionType> > FunctionTypes;
-
 FunctionValType FunctionValType::get(const FunctionType *FT) {
   // Build up a FunctionValType
   std::vector<const Type *> ParamTypes;
@@ -1079,194 +808,105 @@ FunctionType *FunctionType::get(const Type *ReturnType,
   FunctionValType VT(ReturnType, Params, isVarArg);
   FunctionType *FT = 0;
   
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
-  FT = FunctionTypes->get(VT);
+  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+  
+  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
+  FT = pImpl->FunctionTypes.get(VT);
   
   if (!FT) {
     FT = (FunctionType*) operator new(sizeof(FunctionType) +
                                     sizeof(PATypeHandle)*(Params.size()+1));
     new (FT) FunctionType(ReturnType, Params, isVarArg);
-    FunctionTypes->add(VT, FT);
+    pImpl->FunctionTypes.add(VT, FT);
   }
 
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << FT << "\n";
+  DEBUG(errs() << "Derived new type: " << FT << "\n");
 #endif
   return FT;
 }
 
-//===----------------------------------------------------------------------===//
-// Array Type Factory...
-//
-namespace llvm {
-class ArrayValType {
-  const Type *ValTy;
-  uint64_t Size;
-public:
-  ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {}
-
-  static ArrayValType get(const ArrayType *AT) {
-    return ArrayValType(AT->getElementType(), AT->getNumElements());
-  }
-
-  static unsigned hashTypeStructure(const ArrayType *AT) {
-    return (unsigned)AT->getNumElements();
-  }
-
-  inline bool operator<(const ArrayValType &MTV) const {
-    if (Size < MTV.Size) return true;
-    return Size == MTV.Size && ValTy < MTV.ValTy;
-  }
-};
-}
-
-static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
-
 ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
   assert(ElementType && "Can't get array of <null> types!");
   assert(isValidElementType(ElementType) && "Invalid type for array element!");
 
   ArrayValType AVT(ElementType, NumElements);
   ArrayType *AT = 0;
+
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
-  AT = ArrayTypes->get(AVT);
+  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
+  AT = pImpl->ArrayTypes.get(AVT);
       
   if (!AT) {
     // Value not found.  Derive a new type!
-    ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
+    pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements));
   }
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << *AT << "\n";
+  DEBUG(errs() << "Derived new type: " << *AT << "\n");
 #endif
   return AT;
 }
 
 bool ArrayType::isValidElementType(const Type *ElemTy) {
-  if (ElemTy == Type::VoidTy || ElemTy == Type::LabelTy ||
-      ElemTy == Type::MetadataTy)
-    return false;
-
-  if (const PointerType *PTy = dyn_cast<PointerType>(ElemTy))
-    if (PTy->getElementType() == Type::MetadataTy)
-      return false;
-
-  return true;
+  return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
+         ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
 }
 
-
-//===----------------------------------------------------------------------===//
-// Vector Type Factory...
-//
-namespace llvm {
-class VectorValType {
-  const Type *ValTy;
-  unsigned Size;
-public:
-  VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {}
-
-  static VectorValType get(const VectorType *PT) {
-    return VectorValType(PT->getElementType(), PT->getNumElements());
-  }
-
-  static unsigned hashTypeStructure(const VectorType *PT) {
-    return PT->getNumElements();
-  }
-
-  inline bool operator<(const VectorValType &MTV) const {
-    if (Size < MTV.Size) return true;
-    return Size == MTV.Size && ValTy < MTV.ValTy;
-  }
-};
-}
-
-static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
-
 VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
   assert(ElementType && "Can't get vector of <null> types!");
 
   VectorValType PVT(ElementType, NumElements);
   VectorType *PT = 0;
   
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
-  PT = VectorTypes->get(PVT);
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  
+  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
+  PT = pImpl->VectorTypes.get(PVT);
     
   if (!PT) {
-    VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
+    pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements));
   }
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << *PT << "\n";
+  DEBUG(errs() << "Derived new type: " << *PT << "\n");
 #endif
   return PT;
 }
 
 bool VectorType::isValidElementType(const Type *ElemTy) {
-  if (ElemTy->isInteger() || ElemTy->isFloatingPoint() ||
-      isa<OpaqueType>(ElemTy))
-    return true;
-
-  return false;
+  return ElemTy->isInteger() || ElemTy->isFloatingPoint() ||
+         isa<OpaqueType>(ElemTy);
 }
 
 //===----------------------------------------------------------------------===//
 // Struct Type Factory...
 //
 
-namespace llvm {
-// StructValType - Define a class to hold the key that goes into the TypeMap
-//
-class StructValType {
-  std::vector<const Type*> ElTypes;
-  bool packed;
-public:
-  StructValType(const std::vector<const Type*> &args, bool isPacked)
-    : ElTypes(args), packed(isPacked) {}
-
-  static StructValType get(const StructType *ST) {
-    std::vector<const Type *> ElTypes;
-    ElTypes.reserve(ST->getNumElements());
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
-      ElTypes.push_back(ST->getElementType(i));
-
-    return StructValType(ElTypes, ST->isPacked());
-  }
-
-  static unsigned hashTypeStructure(const StructType *ST) {
-    return ST->getNumElements();
-  }
-
-  inline bool operator<(const StructValType &STV) const {
-    if (ElTypes < STV.ElTypes) return true;
-    else if (ElTypes > STV.ElTypes) return false;
-    else return (int)packed < (int)STV.packed;
-  }
-};
-}
-
-static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
-
-StructType *StructType::get(const std::vector<const Type*> &ETypes, 
+StructType *StructType::get(LLVMContext &Context,
+                            const std::vector<const Type*> &ETypes, 
                             bool isPacked) {
   StructValType STV(ETypes, isPacked);
   StructType *ST = 0;
   
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
-  ST = StructTypes->get(STV);
+  LLVMContextImpl *pImpl = Context.pImpl;
+  
+  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
+  ST = pImpl->StructTypes.get(STV);
     
   if (!ST) {
     // Value not found.  Derive a new type!
     ST = (StructType*) operator new(sizeof(StructType) +
                                     sizeof(PATypeHandle) * ETypes.size());
-    new (ST) StructType(ETypes, isPacked);
-    StructTypes->add(STV, ST);
+    new (ST) StructType(Context, ETypes, isPacked);
+    pImpl->StructTypes.add(STV, ST);
   }
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << *ST << "\n";
+  DEBUG(errs() << "Derived new type: " << *ST << "\n");
 #endif
   return ST;
 }
 
-StructType *StructType::get(const Type *type, ...) {
+StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
   va_list ap;
   std::vector<const llvm::Type*> StructFields;
   va_start(ap, type);
@@ -1274,19 +914,12 @@ StructType *StructType::get(const Type *type, ...) {
     StructFields.push_back(type);
     type = va_arg(ap, llvm::Type*);
   }
-  return llvm::StructType::get(StructFields);
+  return llvm::StructType::get(Context, StructFields);
 }
 
 bool StructType::isValidElementType(const Type *ElemTy) {
-  if (ElemTy == Type::VoidTy || ElemTy == Type::LabelTy ||
-      ElemTy == Type::MetadataTy)
-    return false;
-
-  if (const PointerType *PTy = dyn_cast<PointerType>(ElemTy))
-    if (PTy->getElementType() == Type::MetadataTy)
-      return false;
-
-  return true;
+  return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
+         ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
 }
 
 
@@ -1294,67 +927,38 @@ bool StructType::isValidElementType(const Type *ElemTy) {
 // Pointer Type Factory...
 //
 
-// PointerValType - Define a class to hold the key that goes into the TypeMap
-//
-namespace llvm {
-class PointerValType {
-  const Type *ValTy;
-  unsigned AddressSpace;
-public:
-  PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {}
-
-  static PointerValType get(const PointerType *PT) {
-    return PointerValType(PT->getElementType(), PT->getAddressSpace());
-  }
-
-  static unsigned hashTypeStructure(const PointerType *PT) {
-    return getSubElementHash(PT);
-  }
-
-  bool operator<(const PointerValType &MTV) const {
-    if (AddressSpace < MTV.AddressSpace) return true;
-    return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy;
-  }
-};
-}
-
-static ManagedStatic<TypeMap<PointerValType, PointerType> > PointerTypes;
-
 PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
   assert(ValueType && "Can't get a pointer to <null> type!");
-  assert(ValueType != Type::VoidTy &&
+  assert(ValueType->getTypeID() != VoidTyID &&
          "Pointer to void is not valid, use i8* instead!");
   assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
   PointerValType PVT(ValueType, AddressSpace);
 
   PointerType *PT = 0;
   
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
-  PT = PointerTypes->get(PVT);
+  LLVMContextImpl *pImpl = ValueType->getContext().pImpl;
+  
+  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
+  PT = pImpl->PointerTypes.get(PVT);
   
   if (!PT) {
     // Value not found.  Derive a new type!
-    PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
+    pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace));
   }
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "Derived new type: " << *PT << "\n";
+  DEBUG(errs() << "Derived new type: " << *PT << "\n");
 #endif
   return PT;
 }
 
-PointerType *Type::getPointerTo(unsigned addrs) const {
+const PointerType *Type::getPointerTo(unsigned addrs) const {
   return PointerType::get(this, addrs);
 }
 
 bool PointerType::isValidElementType(const Type *ElemTy) {
-  if (ElemTy == Type::VoidTy || ElemTy == Type::LabelTy)
-    return false;
-
-  if (const PointerType *PTy = dyn_cast<PointerType>(ElemTy))
-    if (PTy->getElementType() == Type::MetadataTy)
-      return false;
-
-  return true;
+  return ElemTy->getTypeID() != VoidTyID &&
+         ElemTy->getTypeID() != LabelTyID &&
+         ElemTy->getTypeID() != MetadataTyID;
 }
 
 
@@ -1366,9 +970,10 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
 // it.  This function is called primarily by the PATypeHandle class.
 void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
   assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
-  AbstractTypeUsersLock->acquire();
+  LLVMContextImpl *pImpl = getContext().pImpl;
+  pImpl->AbstractTypeUsersLock.acquire();
   AbstractTypeUsers.push_back(U);
-  AbstractTypeUsersLock->release();
+  pImpl->AbstractTypeUsersLock.release();
 }
 
 
@@ -1378,7 +983,8 @@ void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
 // is annihilated, because there is no way to get a reference to it ever again.
 //
 void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
-  AbstractTypeUsersLock->acquire();
+  LLVMContextImpl *pImpl = getContext().pImpl;
+  pImpl->AbstractTypeUsersLock.acquire();
   
   // Search from back to front because we will notify users from back to
   // front.  Also, it is likely that there will be a stack like behavior to
@@ -1394,20 +1000,20 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
   AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i);
 
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "  remAbstractTypeUser[" << (void*)this << ", "
-       << *this << "][" << i << "] User = " << U << "\n";
+  DEBUG(errs() << "  remAbstractTypeUser[" << (void*)this << ", "
+               << *this << "][" << i << "] User = " << U << "\n");
 #endif
 
   if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) {
 #ifdef DEBUG_MERGE_TYPES
-    DOUT << "DELETEing unused abstract type: <" << *this
-         << ">[" << (void*)this << "]" << "\n";
+    DEBUG(errs() << "DELETEing unused abstract type: <" << *this
+                 << ">[" << (void*)this << "]" << "\n");
 #endif
   
   this->destroy();
   }
   
-  AbstractTypeUsersLock->release();
+  pImpl->AbstractTypeUsersLock.release();
 }
 
 // unlockedRefineAbstractTypeTo - This function is used when it is discovered
@@ -1421,21 +1027,22 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   assert(this != NewType && "Can't refine to myself!");
   assert(ForwardType == 0 && "This type has already been refined!");
 
+  LLVMContextImpl *pImpl = getContext().pImpl;
+
   // The descriptions may be out of date.  Conservatively clear them all!
-  if (AbstractTypeDescriptions.isConstructed())
-    AbstractTypeDescriptions->clear();
+  pImpl->AbstractTypeDescriptions.clear();
 
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "REFINING abstract type [" << (void*)this << " "
-       << *this << "] to [" << (void*)NewType << " "
-       << *NewType << "]!\n";
+  DEBUG(errs() << "REFINING abstract type [" << (void*)this << " "
+               << *this << "] to [" << (void*)NewType << " "
+               << *NewType << "]!\n");
 #endif
 
   // Make sure to put the type to be refined to into a holder so that if IT gets
   // refined, that we will not continue using a dead reference...
   //
   PATypeHolder NewTy(NewType);
-  // Any PATypeHolders referring to this type will now automatically forward o
+  // Any PATypeHolders referring to this type will now automatically forward to
   // the type we are resolved to.
   ForwardType = NewType;
   if (NewType->isAbstract())
@@ -1458,23 +1065,23 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   // will not cause users to drop off of the use list.  If we resolve to ourself
   // we succeed!
   //
-  AbstractTypeUsersLock->acquire();
+  pImpl->AbstractTypeUsersLock.acquire();
   while (!AbstractTypeUsers.empty() && NewTy != this) {
     AbstractTypeUser *User = AbstractTypeUsers.back();
 
     unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
 #ifdef DEBUG_MERGE_TYPES
-    DOUT << " REFINING user " << OldSize-1 << "[" << (void*)User
-         << "] of abstract type [" << (void*)this << " "
-         << *this << "] to [" << (void*)NewTy.get() << " "
-         << *NewTy << "]!\n";
+    DEBUG(errs() << " REFINING user " << OldSize-1 << "[" << (void*)User
+                 << "] of abstract type [" << (void*)this << " "
+                 << *this << "] to [" << (void*)NewTy.get() << " "
+                 << *NewTy << "]!\n");
 #endif
     User->refineAbstractType(this, NewTy);
 
     assert(AbstractTypeUsers.size() != OldSize &&
            "AbsTyUser did not remove self from user list!");
   }
-  AbstractTypeUsersLock->release();
+  pImpl->AbstractTypeUsersLock.release();
 
   // If we were successful removing all users from the type, 'this' will be
   // deleted when the last PATypeHolder is destroyed or updated from this type.
@@ -1488,7 +1095,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
 void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // All recursive calls will go through unlockedRefineAbstractTypeTo,
   // to avoid deadlock problems.
-  sys::SmartScopedLock<true> L(&*TypeMapLock);
+  sys::SmartScopedLock<true> L(NewType->getContext().pImpl->TypeMapLock);
   unlockedRefineAbstractTypeTo(NewType);
 }
 
@@ -1497,10 +1104,12 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
 //
 void DerivedType::notifyUsesThatTypeBecameConcrete() {
 #ifdef DEBUG_MERGE_TYPES
-  DOUT << "typeIsREFINED type: " << (void*)this << " " << *this << "\n";
+  DEBUG(errs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
 #endif
 
-  AbstractTypeUsersLock->acquire();
+  LLVMContextImpl *pImpl = getContext().pImpl;
+
+  pImpl->AbstractTypeUsersLock.acquire();
   unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
   while (!AbstractTypeUsers.empty()) {
     AbstractTypeUser *ATU = AbstractTypeUsers.back();
@@ -1509,7 +1118,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
     assert(AbstractTypeUsers.size() < OldSize-- &&
            "AbstractTypeUser did not remove itself from the use list!");
   }
-  AbstractTypeUsersLock->release();
+  pImpl->AbstractTypeUsersLock.release();
 }
 
 // refineAbstractType - Called when a contained type is found to be more
@@ -1518,11 +1127,13 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
 //
 void FunctionType::refineAbstractType(const DerivedType *OldType,
                                       const Type *NewType) {
-  FunctionTypes->RefineAbstractType(this, OldType, NewType);
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->FunctionTypes.RefineAbstractType(this, OldType, NewType);
 }
 
 void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) {
-  FunctionTypes->TypeBecameConcrete(this, AbsTy);
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->FunctionTypes.TypeBecameConcrete(this, AbsTy);
 }
 
 
@@ -1532,11 +1143,13 @@ void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) {
 //
 void ArrayType::refineAbstractType(const DerivedType *OldType,
                                    const Type *NewType) {
-  ArrayTypes->RefineAbstractType(this, OldType, NewType);
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->ArrayTypes.RefineAbstractType(this, OldType, NewType);
 }
 
 void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) {
-  ArrayTypes->TypeBecameConcrete(this, AbsTy);
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->ArrayTypes.TypeBecameConcrete(this, AbsTy);
 }
 
 // refineAbstractType - Called when a contained type is found to be more
@@ -1545,11 +1158,13 @@ void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) {
 //
 void VectorType::refineAbstractType(const DerivedType *OldType,
                                    const Type *NewType) {
-  VectorTypes->RefineAbstractType(this, OldType, NewType);
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->VectorTypes.RefineAbstractType(this, OldType, NewType);
 }
 
 void VectorType::typeBecameConcrete(const DerivedType *AbsTy) {
-  VectorTypes->TypeBecameConcrete(this, AbsTy);
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->VectorTypes.TypeBecameConcrete(this, AbsTy);
 }
 
 // refineAbstractType - Called when a contained type is found to be more
@@ -1558,11 +1173,13 @@ void VectorType::typeBecameConcrete(const DerivedType *AbsTy) {
 //
 void StructType::refineAbstractType(const DerivedType *OldType,
                                     const Type *NewType) {
-  StructTypes->RefineAbstractType(this, OldType, NewType);
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->StructTypes.RefineAbstractType(this, OldType, NewType);
 }
 
 void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
-  StructTypes->TypeBecameConcrete(this, AbsTy);
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->StructTypes.TypeBecameConcrete(this, AbsTy);
 }
 
 // refineAbstractType - Called when a contained type is found to be more
@@ -1571,11 +1188,13 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
 //
 void PointerType::refineAbstractType(const DerivedType *OldType,
                                      const Type *NewType) {
-  PointerTypes->RefineAbstractType(this, OldType, NewType);
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->PointerTypes.RefineAbstractType(this, OldType, NewType);
 }
 
 void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
-  PointerTypes->TypeBecameConcrete(this, AbsTy);
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->PointerTypes.TypeBecameConcrete(this, AbsTy);
 }
 
 bool SequentialType::indexValid(const Value *V) const {
@@ -1585,19 +1204,6 @@ bool SequentialType::indexValid(const Value *V) const {
 }
 
 namespace llvm {
-std::ostream &operator<<(std::ostream &OS, const Type *T) {
-  if (T == 0)
-    OS << "<null> value!\n";
-  else
-    T->print(OS);
-  return OS;
-}
-
-std::ostream &operator<<(std::ostream &OS, const Type &T) {
-  T.print(OS);
-  return OS;
-}
-
 raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
   T.print(OS);
   return OS;
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 5ae60e28d7f0..f31ea6693e0b 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -14,8 +14,9 @@
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/RWMutex.h"
 #include "llvm/System/Threading.h"
 #include <algorithm>
@@ -34,22 +35,22 @@ TypeSymbolTable::~TypeSymbolTable() {
   }
 }
 
-std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
+std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
   std::string TryName = BaseName;
   
-  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   
   const_iterator End = tmap.end();
 
   // See if the name exists
   while (tmap.find(TryName) != End)            // Loop until we find a free
-    TryName = BaseName + utostr(++LastUnique); // name in the symbol table
+    TryName = BaseName.str() + utostr(++LastUnique); // name in the symbol table
   return TryName;
 }
 
 // lookup a type by name - returns null on failure
-Type* TypeSymbolTable::lookup(const std::string& Name) const {
-  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+Type* TypeSymbolTable::lookup(const StringRef &Name) const {
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   
   const_iterator TI = tmap.find(Name);
   Type* result = 0;
@@ -58,6 +59,17 @@ Type* TypeSymbolTable::lookup(const std::string& Name) const {
   return result;
 }
 
+TypeSymbolTable::iterator TypeSymbolTable::find(const StringRef &Name) {
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);  
+  return tmap.find(Name);
+}
+
+TypeSymbolTable::const_iterator
+TypeSymbolTable::find(const StringRef &Name) const {
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);  
+  return tmap.find(Name);
+}
+
 // remove - Remove a type from the symbol table...
 Type* TypeSymbolTable::remove(iterator Entry) {
   TypeSymbolTableLock->writer_acquire();
@@ -67,7 +79,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 
 #if DEBUG_SYMBOL_TABLE
   dump();
-  cerr << " Removing Value: " << Result->getName() << "\n";
+  errs() << " Removing Value: " << Result->getDescription() << "\n";
 #endif
 
   tmap.erase(Entry);
@@ -78,9 +90,9 @@ Type* TypeSymbolTable::remove(iterator Entry) {
   // list...
   if (Result->isAbstract()) {
 #if DEBUG_ABSTYPE
-    cerr << "Removing abstract type from symtab"
-         << Result->getDescription()
-         << "\n";
+    errs() << "Removing abstract type from symtab"
+           << Result->getDescription()
+           << "\n";
 #endif
     cast<DerivedType>(Result)->removeAbstractTypeUser(this);
   }
@@ -90,17 +102,17 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 
 
 // insert - Insert a type into the symbol table with the specified name...
-void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
+void TypeSymbolTable::insert(const StringRef &Name, const Type* T) {
   assert(T && "Can't insert null type into symbol table!");
 
   TypeSymbolTableLock->writer_acquire();
 
-  if (tmap.insert(make_pair(Name, T)).second) {
+  if (tmap.insert(std::make_pair(Name, T)).second) {
     // Type inserted fine with no conflict.
     
 #if DEBUG_SYMBOL_TABLE
     dump();
-    cerr << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
+    errs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
 #endif
   } else {
     // If there is a name conflict...
@@ -112,8 +124,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
     
 #if DEBUG_SYMBOL_TABLE
     dump();
-    cerr << " Inserting type: " << UniqueName << ": "
-        << T->getDescription() << "\n";
+    errs() << " Inserting type: " << UniqueName << ": "
+           << T->getDescription() << "\n";
 #endif
 
     // Insert the tmap entry
@@ -126,7 +138,7 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
   if (T->isAbstract()) {
     cast<DerivedType>(T)->addAbstractTypeUser(this);
 #if DEBUG_ABSTYPE
-    cerr << "Added abstract type to ST: " << T->getDescription() << "\n";
+    errs() << "Added abstract type to ST: " << T->getDescription() << "\n";
 #endif
   }
 }
@@ -134,7 +146,7 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
 // This function is called when one of the types in the type plane are refined
 void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
                                          const Type *NewType) {
-  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   
   // Loop over all of the types in the symbol table, replacing any references
   // to OldType with references to NewType.  Note that there may be multiple
@@ -144,14 +156,14 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
   for (iterator I = begin(), E = end(); I != E; ++I) {
     if (I->second == (Type*)OldType) {  // FIXME when Types aren't const.
 #if DEBUG_ABSTYPE
-      cerr << "Removing type " << OldType->getDescription() << "\n";
+      errs() << "Removing type " << OldType->getDescription() << "\n";
 #endif
       OldType->removeAbstractTypeUser(this);
 
       I->second = (Type*)NewType;  // TODO FIXME when types aren't const
       if (NewType->isAbstract()) {
 #if DEBUG_ABSTYPE
-        cerr << "Added type " << NewType->getDescription() << "\n";
+        errs() << "Added type " << NewType->getDescription() << "\n";
 #endif
         cast<DerivedType>(NewType)->addAbstractTypeUser(this);
       }
@@ -165,21 +177,21 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
   // Loop over all of the types in the symbol table, dropping any abstract
   // type user entries for AbsTy which occur because there are names for the
   // type.
-  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
     if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
       AbsTy->removeAbstractTypeUser(this);
 }
 
 static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
-  cerr << "  '" << T.first << "' = ";
+  errs() << "  '" << T.first << "' = ";
   T.second->dump();
-  cerr << "\n";
+  errs() << "\n";
 }
 
 void TypeSymbolTable::dump() const {
-  cerr << "TypeSymbolPlane: ";
-  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  errs() << "TypeSymbolPlane: ";
+  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   for_each(tmap.begin(), tmap.end(), DumpTypes);
 }
 
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
new file mode 100644
index 000000000000..e7950bd211ff
--- /dev/null
+++ b/lib/VMCore/TypesContext.h
@@ -0,0 +1,424 @@
+//===-- TypesContext.h - Types-related Context Internals ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TYPESCONTEXT_H
+#define LLVM_TYPESCONTEXT_H
+
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+
+
+//===----------------------------------------------------------------------===//
+//                       Derived Type Factory Functions
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+/// getSubElementHash - Generate a hash value for all of the SubType's of this
+/// type.  The hash value is guaranteed to be zero if any of the subtypes are 
+/// an opaque type.  Otherwise we try to mix them in as well as possible, but do
+/// not look at the subtype's subtype's.
+static unsigned getSubElementHash(const Type *Ty) {
+  unsigned HashVal = 0;
+  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+       I != E; ++I) {
+    HashVal *= 32;
+    const Type *SubTy = I->get();
+    HashVal += SubTy->getTypeID();
+    switch (SubTy->getTypeID()) {
+    default: break;
+    case Type::OpaqueTyID: return 0;    // Opaque -> hash = 0 no matter what.
+    case Type::IntegerTyID:
+      HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3);
+      break;
+    case Type::FunctionTyID:
+      HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 + 
+                 cast<FunctionType>(SubTy)->isVarArg();
+      break;
+    case Type::ArrayTyID:
+      HashVal ^= cast<ArrayType>(SubTy)->getNumElements();
+      break;
+    case Type::VectorTyID:
+      HashVal ^= cast<VectorType>(SubTy)->getNumElements();
+      break;
+    case Type::StructTyID:
+      HashVal ^= cast<StructType>(SubTy)->getNumElements();
+      break;
+    case Type::PointerTyID:
+      HashVal ^= cast<PointerType>(SubTy)->getAddressSpace();
+      break;
+    }
+  }
+  return HashVal ? HashVal : 1;  // Do not return zero unless opaque subty.
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Type Factory...
+//
+class IntegerValType {
+  uint32_t bits;
+public:
+  IntegerValType(uint16_t numbits) : bits(numbits) {}
+
+  static IntegerValType get(const IntegerType *Ty) {
+    return IntegerValType(Ty->getBitWidth());
+  }
+
+  static unsigned hashTypeStructure(const IntegerType *Ty) {
+    return (unsigned)Ty->getBitWidth();
+  }
+
+  inline bool operator<(const IntegerValType &IVT) const {
+    return bits < IVT.bits;
+  }
+};
+
+// PointerValType - Define a class to hold the key that goes into the TypeMap
+//
+class PointerValType {
+  const Type *ValTy;
+  unsigned AddressSpace;
+public:
+  PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {}
+
+  static PointerValType get(const PointerType *PT) {
+    return PointerValType(PT->getElementType(), PT->getAddressSpace());
+  }
+
+  static unsigned hashTypeStructure(const PointerType *PT) {
+    return getSubElementHash(PT);
+  }
+
+  bool operator<(const PointerValType &MTV) const {
+    if (AddressSpace < MTV.AddressSpace) return true;
+    return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Array Type Factory...
+//
+class ArrayValType {
+  const Type *ValTy;
+  uint64_t Size;
+public:
+  ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {}
+
+  static ArrayValType get(const ArrayType *AT) {
+    return ArrayValType(AT->getElementType(), AT->getNumElements());
+  }
+
+  static unsigned hashTypeStructure(const ArrayType *AT) {
+    return (unsigned)AT->getNumElements();
+  }
+
+  inline bool operator<(const ArrayValType &MTV) const {
+    if (Size < MTV.Size) return true;
+    return Size == MTV.Size && ValTy < MTV.ValTy;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Vector Type Factory...
+//
+class VectorValType {
+  const Type *ValTy;
+  unsigned Size;
+public:
+  VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {}
+
+  static VectorValType get(const VectorType *PT) {
+    return VectorValType(PT->getElementType(), PT->getNumElements());
+  }
+
+  static unsigned hashTypeStructure(const VectorType *PT) {
+    return PT->getNumElements();
+  }
+
+  inline bool operator<(const VectorValType &MTV) const {
+    if (Size < MTV.Size) return true;
+    return Size == MTV.Size && ValTy < MTV.ValTy;
+  }
+};
+
+// StructValType - Define a class to hold the key that goes into the TypeMap
+//
+class StructValType {
+  std::vector<const Type*> ElTypes;
+  bool packed;
+public:
+  StructValType(const std::vector<const Type*> &args, bool isPacked)
+    : ElTypes(args), packed(isPacked) {}
+
+  static StructValType get(const StructType *ST) {
+    std::vector<const Type *> ElTypes;
+    ElTypes.reserve(ST->getNumElements());
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+      ElTypes.push_back(ST->getElementType(i));
+
+    return StructValType(ElTypes, ST->isPacked());
+  }
+
+  static unsigned hashTypeStructure(const StructType *ST) {
+    return ST->getNumElements();
+  }
+
+  inline bool operator<(const StructValType &STV) const {
+    if (ElTypes < STV.ElTypes) return true;
+    else if (ElTypes > STV.ElTypes) return false;
+    else return (int)packed < (int)STV.packed;
+  }
+};
+
+// FunctionValType - Define a class to hold the key that goes into the TypeMap
+//
+class FunctionValType {
+  const Type *RetTy;
+  std::vector<const Type*> ArgTypes;
+  bool isVarArg;
+public:
+  FunctionValType(const Type *ret, const std::vector<const Type*> &args,
+                  bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {}
+
+  static FunctionValType get(const FunctionType *FT);
+
+  static unsigned hashTypeStructure(const FunctionType *FT) {
+    unsigned Result = FT->getNumParams()*2 + FT->isVarArg();
+    return Result;
+  }
+
+  inline bool operator<(const FunctionValType &MTV) const {
+    if (RetTy < MTV.RetTy) return true;
+    if (RetTy > MTV.RetTy) return false;
+    if (isVarArg < MTV.isVarArg) return true;
+    if (isVarArg > MTV.isVarArg) return false;
+    if (ArgTypes < MTV.ArgTypes) return true;
+    if (ArgTypes > MTV.ArgTypes) return false;
+    return false;
+  }
+};
+
+class TypeMapBase {
+protected:
+  /// TypesByHash - Keep track of types by their structure hash value.  Note
+  /// that we only keep track of types that have cycles through themselves in
+  /// this map.
+  ///
+  std::multimap<unsigned, PATypeHolder> TypesByHash;
+
+public:
+  ~TypeMapBase() {
+    // PATypeHolder won't destroy non-abstract types.
+    // We can't destroy them by simply iterating, because
+    // they may contain references to each-other.
+    for (std::multimap<unsigned, PATypeHolder>::iterator I
+         = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) {
+      Type *Ty = const_cast<Type*>(I->second.Ty);
+      I->second.destroy();
+      // We can't invoke destroy or delete, because the type may
+      // contain references to already freed types.
+      // So we have to destruct the object the ugly way.
+      if (Ty) {
+        Ty->AbstractTypeUsers.clear();
+        static_cast<const Type*>(Ty)->Type::~Type();
+        operator delete(Ty);
+      }
+    }
+  }
+
+  void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
+    std::multimap<unsigned, PATypeHolder>::iterator I =
+      TypesByHash.lower_bound(Hash);
+    for (; I != TypesByHash.end() && I->first == Hash; ++I) {
+      if (I->second == Ty) {
+        TypesByHash.erase(I);
+        return;
+      }
+    }
+
+    // This must be do to an opaque type that was resolved.  Switch down to hash
+    // code of zero.
+    assert(Hash && "Didn't find type entry!");
+    RemoveFromTypesByHash(0, Ty);
+  }
+
+  /// TypeBecameConcrete - When Ty gets a notification that TheType just became
+  /// concrete, drop uses and make Ty non-abstract if we should.
+  void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) {
+    // If the element just became concrete, remove 'ty' from the abstract
+    // type user list for the type.  Do this for as many times as Ty uses
+    // OldType.
+    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+         I != E; ++I)
+      if (I->get() == TheType)
+        TheType->removeAbstractTypeUser(Ty);
+
+    // If the type is currently thought to be abstract, rescan all of our
+    // subtypes to see if the type has just become concrete!  Note that this
+    // may send out notifications to AbstractTypeUsers that types become
+    // concrete.
+    if (Ty->isAbstract())
+      Ty->PromoteAbstractToConcrete();
+  }
+};
+
+// TypeMap - Make sure that only one instance of a particular type may be
+// created on any given run of the compiler... note that this involves updating
+// our map if an abstract type gets refined somehow.
+//
+template<class ValType, class TypeClass>
+class TypeMap : public TypeMapBase {
+  std::map<ValType, PATypeHolder> Map;
+public:
+  typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
+  ~TypeMap() { print("ON EXIT"); }
+
+  inline TypeClass *get(const ValType &V) {
+    iterator I = Map.find(V);
+    return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0;
+  }
+
+  inline void add(const ValType &V, TypeClass *Ty) {
+    Map.insert(std::make_pair(V, Ty));
+
+    // If this type has a cycle, remember it.
+    TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty));
+    print("add");
+  }
+  
+  /// RefineAbstractType - This method is called after we have merged a type
+  /// with another one.  We must now either merge the type away with
+  /// some other type or reinstall it in the map with it's new configuration.
+  void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
+                        const Type *NewType) {
+#ifdef DEBUG_MERGE_TYPES
+    DEBUG(errs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
+                 << "], " << (void*)NewType << " [" << *NewType << "])\n");
+#endif
+    
+    // Otherwise, we are changing one subelement type into another.  Clearly the
+    // OldType must have been abstract, making us abstract.
+    assert(Ty->isAbstract() && "Refining a non-abstract type!");
+    assert(OldType != NewType);
+
+    // Make a temporary type holder for the type so that it doesn't disappear on
+    // us when we erase the entry from the map.
+    PATypeHolder TyHolder = Ty;
+
+    // The old record is now out-of-date, because one of the children has been
+    // updated.  Remove the obsolete entry from the map.
+    unsigned NumErased = Map.erase(ValType::get(Ty));
+    assert(NumErased && "Element not found!"); NumErased = NumErased;
+
+    // Remember the structural hash for the type before we start hacking on it,
+    // in case we need it later.
+    unsigned OldTypeHash = ValType::hashTypeStructure(Ty);
+
+    // Find the type element we are refining... and change it now!
+    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i)
+      if (Ty->ContainedTys[i] == OldType)
+        Ty->ContainedTys[i] = NewType;
+    unsigned NewTypeHash = ValType::hashTypeStructure(Ty);
+    
+    // If there are no cycles going through this node, we can do a simple,
+    // efficient lookup in the map, instead of an inefficient nasty linear
+    // lookup.
+    if (!TypeHasCycleThroughItself(Ty)) {
+      typename std::map<ValType, PATypeHolder>::iterator I;
+      bool Inserted;
+
+      tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty));
+      if (!Inserted) {
+        // Refined to a different type altogether?
+        RemoveFromTypesByHash(OldTypeHash, Ty);
+
+        // We already have this type in the table.  Get rid of the newly refined
+        // type.
+        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+        Ty->unlockedRefineAbstractTypeTo(NewTy);
+        return;
+      }
+    } else {
+      // Now we check to see if there is an existing entry in the table which is
+      // structurally identical to the newly refined type.  If so, this type
+      // gets refined to the pre-existing type.
+      //
+      std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry;
+      tie(I, E) = TypesByHash.equal_range(NewTypeHash);
+      Entry = E;
+      for (; I != E; ++I) {
+        if (I->second == Ty) {
+          // Remember the position of the old type if we see it in our scan.
+          Entry = I;
+        } else {
+          if (TypesEqual(Ty, I->second)) {
+            TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+
+            // Remove the old entry form TypesByHash.  If the hash values differ
+            // now, remove it from the old place.  Otherwise, continue scanning
+            // withing this hashcode to reduce work.
+            if (NewTypeHash != OldTypeHash) {
+              RemoveFromTypesByHash(OldTypeHash, Ty);
+            } else {
+              if (Entry == E) {
+                // Find the location of Ty in the TypesByHash structure if we
+                // haven't seen it already.
+                while (I->second != Ty) {
+                  ++I;
+                  assert(I != E && "Structure doesn't contain type??");
+                }
+                Entry = I;
+              }
+              TypesByHash.erase(Entry);
+            }
+            Ty->unlockedRefineAbstractTypeTo(NewTy);
+            return;
+          }
+        }
+      }
+
+      // If there is no existing type of the same structure, we reinsert an
+      // updated record into the map.
+      Map.insert(std::make_pair(ValType::get(Ty), Ty));
+    }
+
+    // If the hash codes differ, update TypesByHash
+    if (NewTypeHash != OldTypeHash) {
+      RemoveFromTypesByHash(OldTypeHash, Ty);
+      TypesByHash.insert(std::make_pair(NewTypeHash, Ty));
+    }
+    
+    // If the type is currently thought to be abstract, rescan all of our
+    // subtypes to see if the type has just become concrete!  Note that this
+    // may send out notifications to AbstractTypeUsers that types become
+    // concrete.
+    if (Ty->isAbstract())
+      Ty->PromoteAbstractToConcrete();
+  }
+
+  void print(const char *Arg) const {
+#ifdef DEBUG_MERGE_TYPES
+    DEBUG(errs() << "TypeMap<>::" << Arg << " table contents:\n");
+    unsigned i = 0;
+    for (typename std::map<ValType, PATypeHolder>::const_iterator I
+           = Map.begin(), E = Map.end(); I != E; ++I)
+      DEBUG(errs() << " " << (++i) << ". " << (void*)I->second.get() << " "
+                   << *I->second.get() << "\n");
+#endif
+  }
+
+  void dump() const { print("dump output"); }
+};
+}
+
+#endif
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index b25415a3d14e..b7fd92f9b066 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -128,7 +128,7 @@ void Use::zap(Use *Start, const Use *Stop, bool del) {
 //                         AugmentedUse layout struct
 //===----------------------------------------------------------------------===//
 
-struct AugmentedUse : Use {
+struct AugmentedUse : public Use {
   PointerIntPair<User*, 1, Tag> ref;
   AugmentedUse(); // not implemented
 };
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index c952b7888cdd..ba72af635cdc 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -11,17 +11,23 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/Constant.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InstrTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/Operator.h"
 #include "llvm/Module.h"
+#include "llvm/Metadata.h"
 #include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/RWMutex.h"
 #include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
@@ -38,23 +44,31 @@ static inline const Type *checkType(const Type *Ty) {
 }
 
 Value::Value(const Type *ty, unsigned scid)
-  : SubclassID(scid), HasValueHandle(0), SubclassData(0), VTy(checkType(ty)),
+  : SubclassID(scid), HasValueHandle(0), HasMetadata(0),
+    SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)),
     UseList(0), Name(0) {
   if (isa<CallInst>(this) || isa<InvokeInst>(this))
-    assert((VTy->isFirstClassType() || VTy == Type::VoidTy ||
+    assert((VTy->isFirstClassType() ||
+            VTy == Type::getVoidTy(ty->getContext()) ||
             isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) &&
            "invalid CallInst  type!");
   else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
-    assert((VTy->isFirstClassType() || VTy == Type::VoidTy ||
+    assert((VTy->isFirstClassType() ||
+            VTy == Type::getVoidTy(ty->getContext()) ||
            isa<OpaqueType>(ty)) &&
            "Cannot create non-first-class values except for constants!");
 }
 
 Value::~Value() {
+  if (HasMetadata) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsDeleted(this);
+  }
+
   // Notify all ValueHandles (if present) that this value is going away.
   if (HasValueHandle)
     ValueHandleBase::ValueIsDeleted(this);
-  
+
 #ifndef NDEBUG      // Only in -g mode...
   // Check to make sure that there are no uses of this value that are still
   // around when the value is destroyed.  If there are, then we have a dangling
@@ -63,9 +77,9 @@ Value::~Value() {
   // a <badref>
   //
   if (!use_empty()) {
-    cerr << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+    errs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
     for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
-      cerr << "Use still stuck around after Def is destroyed:"
+      errs() << "Use still stuck around after Def is destroyed:"
            << **I << "\n";
   }
 #endif
@@ -75,7 +89,7 @@ Value::~Value() {
   // at this point.
   if (Name)
     Name->Destroy();
-  
+
   // There should be no uses of this object anymore, remove it.
   LeakDetector::removeGarbageObject(this);
 }
@@ -128,61 +142,57 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
       if (Function *PP = P->getParent())
         ST = &PP->getValueSymbolTable();
   } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
-    if (Function *P = BB->getParent()) 
+    if (Function *P = BB->getParent())
       ST = &P->getValueSymbolTable();
   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-    if (Module *P = GV->getParent()) 
+    if (Module *P = GV->getParent())
       ST = &P->getValueSymbolTable();
   } else if (Argument *A = dyn_cast<Argument>(V)) {
-    if (Function *P = A->getParent()) 
+    if (Function *P = A->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (NamedMDNode *N = dyn_cast<NamedMDNode>(V)) {
+    if (Module *P = N->getParent()) {
       ST = &P->getValueSymbolTable();
-  } else {
+    }
+  } else if (isa<MDString>(V))
+    return true;
+  else {
     assert(isa<Constant>(V) && "Unknown value type!");
     return true;  // no name is setable for this.
   }
   return false;
 }
 
-/// getNameStart - Return a pointer to a null terminated string for this name.
-/// Note that names can have null characters within the string as well as at
-/// their end.  This always returns a non-null pointer.
-const char *Value::getNameStart() const {
-  if (Name == 0) return "";
-  return Name->getKeyData();
+StringRef Value::getName() const {
+  // Make sure the empty string is still a C string. For historical reasons,
+  // some clients want to call .data() on the result and expect it to be null
+  // terminated.
+  if (!Name) return StringRef("", 0);
+  return Name->getKey();
 }
 
-/// getNameLen - Return the length of the string, correctly handling nul
-/// characters embedded into them.
-unsigned Value::getNameLen() const {
-  return Name ? Name->getKeyLength() : 0;
+std::string Value::getNameStr() const {
+  return getName().str();
 }
 
-/// isName - Return true if this value has the name specified by the provided
-/// nul terminated string.
-bool Value::isName(const char *N) const {
-  unsigned InLen = strlen(N);
-  return InLen == getNameLen() && memcmp(getNameStart(), N, InLen) == 0;
-}
+void Value::setName(const Twine &NewName) {
+  // Fast path for common IRBuilder case of setName("") when there is no name.
+  if (NewName.isTriviallyEmpty() && !hasName())
+    return;
 
+  SmallString<256> NameData;
+  NewName.toVector(NameData);
 
-std::string Value::getNameStr() const {
-  if (Name == 0) return "";
-  return std::string(Name->getKeyData(),
-                     Name->getKeyData()+Name->getKeyLength());
-}
+  const char *NameStr = NameData.data();
+  unsigned NameLen = NameData.size();
 
-void Value::setName(const std::string &name) {
-  setName(&name[0], name.size());
-}
+  // Name isn't changing?
+  if (getName() == StringRef(NameStr, NameLen))
+    return;
 
-void Value::setName(const char *Name) {
-  setName(Name, Name ? strlen(Name) : 0);
-}
+  assert(getType() != Type::getVoidTy(getContext()) &&
+         "Cannot assign a name to void values!");
 
-void Value::setName(const char *NameStr, unsigned NameLen) {
-  if (NameLen == 0 && !hasName()) return;
-  assert(getType() != Type::VoidTy && "Cannot assign a name to void values!");
-  
   // Get the symbol table to update for this object.
   ValueSymbolTable *ST;
   if (getSymTab(this, ST))
@@ -195,32 +205,22 @@ void Value::setName(const char *NameStr, unsigned NameLen) {
       Name = 0;
       return;
     }
-    
-    if (Name) {
-      // Name isn't changing?
-      if (NameLen == Name->getKeyLength() &&
-          !memcmp(Name->getKeyData(), NameStr, NameLen))
-        return;
+
+    if (Name)
       Name->Destroy();
-    }
-    
+
     // NOTE: Could optimize for the case the name is shrinking to not deallocate
     // then reallocated.
-      
+
     // Create the new name.
     Name = ValueName::Create(NameStr, NameStr+NameLen);
     Name->setValue(this);
     return;
   }
-  
+
   // NOTE: Could optimize for the case the name is shrinking to not deallocate
   // then reallocated.
   if (hasName()) {
-    // Name isn't changing?
-    if (NameLen == Name->getKeyLength() &&
-        !memcmp(Name->getKeyData(), NameStr, NameLen))
-      return;
-
     // Remove old name.
     ST->removeValueName(Name);
     Name->Destroy();
@@ -231,12 +231,12 @@ void Value::setName(const char *NameStr, unsigned NameLen) {
   }
 
   // Name is changing to something new.
-  Name = ST->createValueName(NameStr, NameLen, this);
+  Name = ST->createValueName(StringRef(NameStr, NameLen), this);
 }
 
 
 /// takeName - transfer the name from V to this value, setting V's name to
-/// empty.  It is an error to call V->takeName(V). 
+/// empty.  It is an error to call V->takeName(V).
 void Value::takeName(Value *V) {
   ValueSymbolTable *ST = 0;
   // If this value has a name, drop it.
@@ -245,36 +245,36 @@ void Value::takeName(Value *V) {
     if (getSymTab(this, ST)) {
       // We can't set a name on this value, but we need to clear V's name if
       // it has one.
-      if (V->hasName()) V->setName(0, 0);
+      if (V->hasName()) V->setName("");
       return;  // Cannot set a name on this value (e.g. constant).
     }
-    
+
     // Remove old name.
     if (ST)
       ST->removeValueName(Name);
     Name->Destroy();
     Name = 0;
-  } 
-  
+  }
+
   // Now we know that this has no name.
-  
+
   // If V has no name either, we're done.
   if (!V->hasName()) return;
-   
+
   // Get this's symtab if we didn't before.
   if (!ST) {
     if (getSymTab(this, ST)) {
       // Clear V's name.
-      V->setName(0, 0);
+      V->setName("");
       return;  // Cannot set a name on this value (e.g. constant).
     }
   }
-  
+
   // Get V's ST, this should always succed, because V has a name.
   ValueSymbolTable *VST;
   bool Failure = getSymTab(V, VST);
   assert(!Failure && "V has a name, so it should have a ST!"); Failure=Failure;
-  
+
   // If these values are both in the same symtab, we can do this very fast.
   // This works even if both values have no symtab yet.
   if (ST == VST) {
@@ -284,16 +284,16 @@ void Value::takeName(Value *V) {
     Name->setValue(this);
     return;
   }
-  
+
   // Otherwise, things are slightly more complex.  Remove V's name from VST and
   // then reinsert it into ST.
-  
+
   if (VST)
     VST->removeValueName(V->Name);
   Name = V->Name;
   V->Name = 0;
   Name->setValue(this);
-  
+
   if (ST)
     ST->reinsertValue(this);
 }
@@ -309,7 +309,11 @@ void Value::uncheckedReplaceAllUsesWith(Value *New) {
   // Notify all ValueHandles (if present) that this value is going away.
   if (HasValueHandle)
     ValueHandleBase::ValueIsRAUWd(this, New);
- 
+  if (HasMetadata) {
+    LLVMContext &Context = getContext();
+    Context.pImpl->TheMetadata.ValueIsRAUWd(this, New);
+  }
+
   while (!use_empty()) {
     Use &U = *UseList;
     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -320,7 +324,7 @@ void Value::uncheckedReplaceAllUsesWith(Value *New) {
         continue;
       }
     }
-    
+
     U.set(New);
   }
 }
@@ -339,23 +343,16 @@ Value *Value::stripPointerCasts() {
     return this;
   Value *V = this;
   do {
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-      if (CE->getOpcode() == Instruction::GetElementPtr) {
-        for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
-          if (!CE->getOperand(i)->isNullValue())
-            return V;
-        V = CE->getOperand(0);
-      } else if (CE->getOpcode() == Instruction::BitCast) {
-        V = CE->getOperand(0);
-      } else {
-        return V;
-      }
-    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       if (!GEP->hasAllZeroIndices())
         return V;
-      V = GEP->getOperand(0);
-    } else if (BitCastInst *CI = dyn_cast<BitCastInst>(V)) {
-      V = CI->getOperand(0);
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
     } else {
       return V;
     }
@@ -369,15 +366,14 @@ Value *Value::getUnderlyingObject() {
   Value *V = this;
   unsigned MaxLookup = 6;
   do {
-    if (Instruction *I = dyn_cast<Instruction>(V)) {
-      if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
-        return V;
-      V = I->getOperand(0);
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-      if (CE->getOpcode() != Instruction::BitCast &&
-          CE->getOpcode() != Instruction::GetElementPtr)
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
         return V;
-      V = CE->getOperand(0);
+      V = GA->getAliasee();
     } else {
       return V;
     }
@@ -390,7 +386,7 @@ Value *Value::getUnderlyingObject() {
 /// return the value in the PHI node corresponding to PredBB.  If not, return
 /// ourself.  This is useful if you want to know the value something has in a
 /// predecessor block.
-Value *Value::DoPHITranslation(const BasicBlock *CurBB, 
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
                                const BasicBlock *PredBB) {
   PHINode *PN = dyn_cast<PHINode>(this);
   if (PN && PN->getParent() == CurBB)
@@ -398,22 +394,17 @@ Value *Value::DoPHITranslation(const BasicBlock *CurBB,
   return this;
 }
 
+LLVMContext &Value::getContext() const { return VTy->getContext(); }
+
 //===----------------------------------------------------------------------===//
 //                             ValueHandleBase Class
 //===----------------------------------------------------------------------===//
 
-/// ValueHandles - This map keeps track of all of the value handles that are
-/// watching a Value*.  The Value::HasValueHandle bit is used to know whether or
-/// not a value has an entry in this map.
-typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
-static ManagedStatic<ValueHandlesTy> ValueHandles;
-static ManagedStatic<sys::SmartRWMutex<true> > ValueHandlesLock;
-
 /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
 /// List is known to point into the existing use list.
 void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
   assert(List && "Handle list is null?");
-  
+
   // Splice ourselves into the list.
   Next = *List;
   *List = this;
@@ -424,43 +415,54 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
   }
 }
 
+void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
+  assert(List && "Must insert after existing node");
+
+  Next = List->Next;
+  setPrevPtr(&List->Next);
+  List->Next = this;
+  if (Next)
+    Next->setPrevPtr(&Next);
+}
+
 /// AddToUseList - Add this ValueHandle to the use list for VP.
 void ValueHandleBase::AddToUseList() {
   assert(VP && "Null pointer doesn't have a use list!");
+
+  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+
   if (VP->HasValueHandle) {
     // If this value already has a ValueHandle, then it must be in the
     // ValueHandles map already.
-    sys::SmartScopedReader<true> Reader(&*ValueHandlesLock);
-    ValueHandleBase *&Entry = (*ValueHandles)[VP];
+    ValueHandleBase *&Entry = pImpl->ValueHandles[VP];
     assert(Entry != 0 && "Value doesn't have any handles?");
     AddToExistingUseList(&Entry);
     return;
   }
-  
+
   // Ok, it doesn't have any handles yet, so we must insert it into the
   // DenseMap.  However, doing this insertion could cause the DenseMap to
   // reallocate itself, which would invalidate all of the PrevP pointers that
   // point into the old table.  Handle this by checking for reallocation and
   // updating the stale pointers only if needed.
-  sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
-  ValueHandlesTy &Handles = *ValueHandles;
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
-  
+
   ValueHandleBase *&Entry = Handles[VP];
   assert(Entry == 0 && "Value really did already have handles?");
   AddToExistingUseList(&Entry);
   VP->HasValueHandle = true;
-  
+
   // If reallocation didn't happen or if this was the first insertion, don't
   // walk the table.
-  if (Handles.isPointerIntoBucketsArray(OldBucketPtr) || 
+  if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
       Handles.size() == 1) {
     return;
   }
-  
+
   // Okay, reallocation did happen.  Fix the Prev Pointers.
-  for (ValueHandlesTy::iterator I = Handles.begin(), E = Handles.end();
-       I != E; ++I) {
+  for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
+       E = Handles.end(); I != E; ++I) {
     assert(I->second && I->first == I->second->VP && "List invariant broken!");
     I->second->setPrevPtr(&I->second);
   }
@@ -473,19 +475,19 @@ void ValueHandleBase::RemoveFromUseList() {
   // Unlink this from its use list.
   ValueHandleBase **PrevPtr = getPrevPtr();
   assert(*PrevPtr == this && "List invariant broken");
-  
+
   *PrevPtr = Next;
   if (Next) {
     assert(Next->getPrevPtr() == &Next && "List invariant broken");
     Next->setPrevPtr(PrevPtr);
     return;
   }
-  
+
   // If the Next pointer was null, then it is possible that this was the last
   // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
   // map.
-  sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
-  ValueHandlesTy &Handles = *ValueHandles;
+  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
     Handles.erase(VP);
     VP->HasValueHandle = false;
@@ -498,67 +500,91 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
 
   // Get the linked list base, which is guaranteed to exist since the
   // HasValueHandle flag is set.
-  ValueHandlesLock->reader_acquire();
-  ValueHandleBase *Entry = (*ValueHandles)[V];
-  ValueHandlesLock->reader_release();
+  LLVMContextImpl *pImpl = V->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[V];
   assert(Entry && "Value bit set but no entries exist");
-  
-  while (Entry) {
-    // Advance pointer to avoid invalidation.
-    ValueHandleBase *ThisNode = Entry;
-    Entry = Entry->Next;
-    
-    switch (ThisNode->getKind()) {
+
+  // We use a local ValueHandleBase as an iterator so that
+  // ValueHandles can add and remove themselves from the list without
+  // breaking our iteration.  This is not really an AssertingVH; we
+  // just have to give ValueHandleBase some kind.
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
     case Assert:
-#ifndef NDEBUG      // Only in -g mode...
-      cerr << "While deleting: " << *V->getType() << " %" << V->getNameStr()
-           << "\n";
-#endif
-      cerr << "An asserting value handle still pointed to this value!\n";
-      abort();
+      break;
+    case Tracking:
+      // Mark that this value has been deleted by setting it to an invalid Value
+      // pointer.
+      Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
+      break;
     case Weak:
       // Weak just goes to null, which will unlink it from the list.
-      ThisNode->operator=(0);
+      Entry->operator=(0);
       break;
     case Callback:
       // Forward to the subclass's implementation.
-      static_cast<CallbackVH*>(ThisNode)->deleted();
+      static_cast<CallbackVH*>(Entry)->deleted();
       break;
     }
   }
-  
-  // All callbacks and weak references should be dropped by now.
-  assert(!V->HasValueHandle && "All references to V were not removed?");
+
+  // All callbacks, weak references, and assertingVHs should be dropped by now.
+  if (V->HasValueHandle) {
+#ifndef NDEBUG      // Only in +Asserts mode...
+    errs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+           << "\n";
+    if (pImpl->ValueHandles[V]->getKind() == Assert)
+      llvm_unreachable("An asserting value handle still pointed to this"
+                       " value!");
+
+#endif
+    llvm_unreachable("All references to V were not removed?");
+  }
 }
 
 
 void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
   assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
   assert(Old != New && "Changing value into itself!");
-  
+
   // Get the linked list base, which is guaranteed to exist since the
   // HasValueHandle flag is set.
-  ValueHandlesLock->reader_acquire();
-  ValueHandleBase *Entry = (*ValueHandles)[Old];
-  ValueHandlesLock->reader_release();
+  LLVMContextImpl *pImpl = Old->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[Old];
+
   assert(Entry && "Value bit set but no entries exist");
-  
-  while (Entry) {
-    // Advance pointer to avoid invalidation.
-    ValueHandleBase *ThisNode = Entry;
-    Entry = Entry->Next;
-    
-    switch (ThisNode->getKind()) {
+
+  // We use a local ValueHandleBase as an iterator so that
+  // ValueHandles can add and remove themselves from the list without
+  // breaking our iteration.  This is not really an AssertingVH; we
+  // just have to give ValueHandleBase some kind.
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
     case Assert:
       // Asserting handle does not follow RAUW implicitly.
       break;
+    case Tracking:
+      // Tracking goes to new value like a WeakVH. Note that this may make it
+      // something incompatible with its templated type. We don't want to have a
+      // virtual (or inline) interface to handle this though, so instead we make
+      // the TrackingVH accessors guarantee that a client never sees this value.
+
+      // FALLTHROUGH
     case Weak:
       // Weak goes to the new value, which will unlink it from Old's list.
-      ThisNode->operator=(New);
+      Entry->operator=(New);
       break;
     case Callback:
       // Forward to the subclass's implementation.
-      static_cast<CallbackVH*>(ThisNode)->allUsesReplacedWith(New);
+      static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
       break;
     }
   }
@@ -580,7 +606,7 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
   if (From == To) return;   // Duh what?
 
   assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
-         "Cannot call User::replaceUsesofWith on a constant!");
+         "Cannot call User::replaceUsesOfWith on a constant!");
 
   for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
     if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
@@ -590,4 +616,3 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
       setOperand(i, To); // Fix it now...
     }
 }
-
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index eee18a164c12..7765a98c1fd1 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -17,36 +17,20 @@
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // Class destructor
 ValueSymbolTable::~ValueSymbolTable() {
 #ifndef NDEBUG   // Only do this in -g mode...
   for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
-    cerr << "Value still in symbol table! Type = '"
-         << VI->getValue()->getType()->getDescription() << "' Name = '"
-         << VI->getKeyData() << "'\n";
+    errs() << "Value still in symbol table! Type = '"
+           << VI->getValue()->getType()->getDescription() << "' Name = '"
+           << VI->getKeyData() << "'\n";
   assert(vmap.empty() && "Values remain in symbol table!");
 #endif
 }
 
-// lookup a value - Returns null on failure...
-//
-Value *ValueSymbolTable::lookup(const std::string &Name) const {
-  const_iterator VI = vmap.find(Name.data(), Name.data() + Name.size());
-  if (VI != vmap.end())                   // We found the symbol
-    return VI->getValue();
-  return 0;
-}
-
-Value *ValueSymbolTable::lookup(const char *NameBegin,
-                                const char *NameEnd) const {
-  const_iterator VI = vmap.find(NameBegin, NameEnd);
-  if (VI != vmap.end())                   // We found the symbol
-    return VI->getValue();
-  return 0;
-}
-
 // Insert a value into the symbol table with the specified name...
 //
 void ValueSymbolTable::reinsertValue(Value* V) {
@@ -54,37 +38,38 @@ void ValueSymbolTable::reinsertValue(Value* V) {
 
   // Try inserting the name, assuming it won't conflict.
   if (vmap.insert(V->Name)) {
-    //DOUT << " Inserted value: " << V->Name << ": " << *V << "\n";
+    //DEBUG(errs() << " Inserted value: " << V->Name << ": " << *V << "\n");
     return;
   }
   
   // Otherwise, there is a naming conflict.  Rename this value.
-  SmallString<128> UniqueName(V->getNameStart(), V->getNameEnd());
+  SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
 
   // The name is too already used, just free it so we can allocate a new name.
   V->Name->Destroy();
   
   unsigned BaseSize = UniqueName.size();
   while (1) {
-    // Trim any suffix off.
+    // Trim any suffix off and append the next number.
     UniqueName.resize(BaseSize);
-    UniqueName.append_uint_32(++LastUnique);
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+
     // Try insert the vmap entry with this suffix.
     ValueName &NewName =
-      vmap.GetOrCreateValue(UniqueName.data(),
-                            UniqueName.data() + UniqueName.size());
+      vmap.GetOrCreateValue(StringRef(UniqueName.data(),
+                                      UniqueName.size()));
     if (NewName.getValue() == 0) {
       // Newly inserted name.  Success!
       NewName.setValue(V);
       V->Name = &NewName;
-      //DEBUG(DOUT << " Inserted value: " << UniqueName << ": " << *V << "\n");
+     //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
       return;
     }
   }
 }
 
 void ValueSymbolTable::removeValueName(ValueName *V) {
-  //DEBUG(DOUT << " Removing Value: " << V->getKeyData() << "\n");
+  //DEBUG(errs() << " Removing Value: " << V->getKeyData() << "\n");
   // Remove the value from the symbol table.
   vmap.remove(V);
 }
@@ -92,33 +77,32 @@ void ValueSymbolTable::removeValueName(ValueName *V) {
 /// createValueName - This method attempts to create a value name and insert
 /// it into the symbol table with the specified name.  If it conflicts, it
 /// auto-renames the name and returns that instead.
-ValueName *ValueSymbolTable::createValueName(const char *NameStart,
-                                             unsigned NameLen, Value *V) {
+ValueName *ValueSymbolTable::createValueName(const StringRef &Name, Value *V) {
   // In the common case, the name is not already in the symbol table.
-  ValueName &Entry = vmap.GetOrCreateValue(NameStart, NameStart+NameLen);
+  ValueName &Entry = vmap.GetOrCreateValue(Name);
   if (Entry.getValue() == 0) {
     Entry.setValue(V);
-    //DEBUG(DOUT << " Inserted value: " << Entry.getKeyData() << ": "
+    //DEBUG(errs() << " Inserted value: " << Entry.getKeyData() << ": "
     //           << *V << "\n");
     return &Entry;
   }
   
   // Otherwise, there is a naming conflict.  Rename this value.
-  SmallString<128> UniqueName(NameStart, NameStart+NameLen);
+  SmallString<128> UniqueName(Name.begin(), Name.end());
   
   while (1) {
-    // Trim any suffix off.
-    UniqueName.resize(NameLen);
-    UniqueName.append_uint_32(++LastUnique);
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(Name.size());
+    raw_svector_ostream(UniqueName) << ++LastUnique;
     
     // Try insert the vmap entry with this suffix.
     ValueName &NewName =
-      vmap.GetOrCreateValue(UniqueName.data(),
-                            UniqueName.data() + UniqueName.size());
+      vmap.GetOrCreateValue(StringRef(UniqueName.data(),
+                                      UniqueName.size()));
     if (NewName.getValue() == 0) {
       // Newly inserted name.  Success!
       NewName.setValue(V);
-      //DEBUG(DOUT << " Inserted value: " << UniqueName << ": " << *V << "\n");
+     //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
       return &NewName;
     }
   }
@@ -128,10 +112,10 @@ ValueName *ValueSymbolTable::createValueName(const char *NameStart,
 // dump - print out the symbol table
 //
 void ValueSymbolTable::dump() const {
-  //DOUT << "ValueSymbolTable:\n";
+  //DEBUG(errs() << "ValueSymbolTable:\n");
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    //DOUT << "  '" << I->getKeyData() << "' = ";
+    //DEBUG(errs() << "  '" << I->getKeyData() << "' = ");
     I->getValue()->dump();
-    //DOUT << "\n";
+    //DEBUG(errs() << "\n");
   }
 }
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 2d207eea31db..7f9a6cde2d5c 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -1,4 +1,4 @@
-//===----------- ValueTypes.cpp - Implementation of MVT methods -----------===//
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,62 +13,65 @@
 
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-MVT MVT::getExtendedIntegerVT(unsigned BitWidth) {
-  MVT VT;
-  VT.LLVMTy = IntegerType::get(BitWidth);
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+  EVT VT;
+  VT.LLVMTy = IntegerType::get(Context, BitWidth);
   assert(VT.isExtended() && "Type is not extended!");
   return VT;
 }
 
-MVT MVT::getExtendedVectorVT(MVT VT, unsigned NumElements) {
-  MVT ResultVT;
-  ResultVT.LLVMTy = VectorType::get(VT.getTypeForMVT(), NumElements);
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+                             unsigned NumElements) {
+  EVT ResultVT;
+  ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
   assert(ResultVT.isExtended() && "Type is not extended!");
   return ResultVT;
 }
 
-bool MVT::isExtendedFloatingPoint() const {
+bool EVT::isExtendedFloatingPoint() const {
   assert(isExtended() && "Type is not extended!");
   return LLVMTy->isFPOrFPVector();
 }
 
-bool MVT::isExtendedInteger() const {
+bool EVT::isExtendedInteger() const {
   assert(isExtended() && "Type is not extended!");
   return LLVMTy->isIntOrIntVector();
 }
 
-bool MVT::isExtendedVector() const {
+bool EVT::isExtendedVector() const {
   assert(isExtended() && "Type is not extended!");
   return isa<VectorType>(LLVMTy);
 }
 
-bool MVT::isExtended64BitVector() const {
+bool EVT::isExtended64BitVector() const {
   return isExtendedVector() && getSizeInBits() == 64;
 }
 
-bool MVT::isExtended128BitVector() const {
+bool EVT::isExtended128BitVector() const {
   return isExtendedVector() && getSizeInBits() == 128;
 }
 
-bool MVT::isExtended256BitVector() const {
+bool EVT::isExtended256BitVector() const {
   return isExtendedVector() && getSizeInBits() == 256;
 }
 
-MVT MVT::getExtendedVectorElementType() const {
+EVT EVT::getExtendedVectorElementType() const {
   assert(isExtended() && "Type is not extended!");
-  return MVT::getMVT(cast<VectorType>(LLVMTy)->getElementType());
+  return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
 }
 
-unsigned MVT::getExtendedVectorNumElements() const {
+unsigned EVT::getExtendedVectorNumElements() const {
   assert(isExtended() && "Type is not extended!");
   return cast<VectorType>(LLVMTy)->getNumElements();
 }
 
-unsigned MVT::getExtendedSizeInBits() const {
+unsigned EVT::getExtendedSizeInBits() const {
   assert(isExtended() && "Type is not extended!");
   if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
     return ITy->getBitWidth();
@@ -78,16 +81,16 @@ unsigned MVT::getExtendedSizeInBits() const {
   return 0; // Suppress warnings.
 }
 
-/// getMVTString - This function returns value type as a string, e.g. "i32".
-std::string MVT::getMVTString() const {
-  switch (V) {
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+  switch (V.SimpleTy) {
   default:
     if (isVector())
       return "v" + utostr(getVectorNumElements()) +
-             getVectorElementType().getMVTString();
+             getVectorElementType().getEVTString();
     if (isInteger())
       return "i" + utostr(getSizeInBits());
-    assert(0 && "Invalid MVT!");
+    llvm_unreachable("Invalid EVT!");
     return "?";
   case MVT::i1:      return "i1";
   case MVT::i8:      return "i8";
@@ -113,14 +116,12 @@ std::string MVT::getMVTString() const {
   case MVT::v8i16:   return "v8i16";
   case MVT::v16i16:  return "v16i16";
   case MVT::v2i32:   return "v2i32";
-  case MVT::v3i32:   return "v3i32";
   case MVT::v4i32:   return "v4i32";
   case MVT::v8i32:   return "v8i32";
   case MVT::v1i64:   return "v1i64";
   case MVT::v2i64:   return "v2i64";
   case MVT::v4i64:   return "v4i64";
   case MVT::v2f32:   return "v2f32";
-  case MVT::v3f32:   return "v3f32";
   case MVT::v4f32:   return "v4f32";
   case MVT::v8f32:   return "v8f32";
   case MVT::v2f64:   return "v2f64";
@@ -128,73 +129,72 @@ std::string MVT::getMVTString() const {
   }
 }
 
-/// getTypeForMVT - This method returns an LLVM type corresponding to the
-/// specified MVT.  For integer types, this returns an unsigned type.  Note
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT.  For integer types, this returns an unsigned type.  Note
 /// that this will abort for types that cannot be represented.
-const Type *MVT::getTypeForMVT() const {
-  switch (V) {
+const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+  switch (V.SimpleTy) {
   default:
     assert(isExtended() && "Type is not extended!");
     return LLVMTy;
-  case MVT::isVoid:  return Type::VoidTy;
-  case MVT::i1:      return Type::Int1Ty;
-  case MVT::i8:      return Type::Int8Ty;
-  case MVT::i16:     return Type::Int16Ty;
-  case MVT::i32:     return Type::Int32Ty;
-  case MVT::i64:     return Type::Int64Ty;
-  case MVT::i128:    return IntegerType::get(128);
-  case MVT::f32:     return Type::FloatTy;
-  case MVT::f64:     return Type::DoubleTy;
-  case MVT::f80:     return Type::X86_FP80Ty;
-  case MVT::f128:    return Type::FP128Ty;
-  case MVT::ppcf128: return Type::PPC_FP128Ty;
-  case MVT::v2i8:    return VectorType::get(Type::Int8Ty, 2);
-  case MVT::v4i8:    return VectorType::get(Type::Int8Ty, 4);
-  case MVT::v8i8:    return VectorType::get(Type::Int8Ty, 8);
-  case MVT::v16i8:   return VectorType::get(Type::Int8Ty, 16);
-  case MVT::v32i8:   return VectorType::get(Type::Int8Ty, 32);
-  case MVT::v2i16:   return VectorType::get(Type::Int16Ty, 2);
-  case MVT::v4i16:   return VectorType::get(Type::Int16Ty, 4);
-  case MVT::v8i16:   return VectorType::get(Type::Int16Ty, 16);
-  case MVT::v16i16:  return VectorType::get(Type::Int16Ty, 8);
-  case MVT::v2i32:   return VectorType::get(Type::Int32Ty, 2);
-  case MVT::v3i32:   return VectorType::get(Type::Int32Ty, 3);
-  case MVT::v4i32:   return VectorType::get(Type::Int32Ty, 4);
-  case MVT::v8i32:   return VectorType::get(Type::Int32Ty, 8);
-  case MVT::v1i64:   return VectorType::get(Type::Int64Ty, 1);
-  case MVT::v2i64:   return VectorType::get(Type::Int64Ty, 2);
-  case MVT::v4i64:   return VectorType::get(Type::Int64Ty, 4);
-  case MVT::v2f32:   return VectorType::get(Type::FloatTy, 2);
-  case MVT::v3f32:   return VectorType::get(Type::FloatTy, 3);
-  case MVT::v4f32:   return VectorType::get(Type::FloatTy, 4);
-  case MVT::v8f32:   return VectorType::get(Type::FloatTy, 8);
-  case MVT::v2f64:   return VectorType::get(Type::DoubleTy, 2);
-  case MVT::v4f64:   return VectorType::get(Type::DoubleTy, 4); 
+  case MVT::isVoid:  return Type::getVoidTy(Context);
+  case MVT::i1:      return Type::getInt1Ty(Context);
+  case MVT::i8:      return Type::getInt8Ty(Context);
+  case MVT::i16:     return Type::getInt16Ty(Context);
+  case MVT::i32:     return Type::getInt32Ty(Context);
+  case MVT::i64:     return Type::getInt64Ty(Context);
+  case MVT::i128:    return IntegerType::get(Context, 128);
+  case MVT::f32:     return Type::getFloatTy(Context);
+  case MVT::f64:     return Type::getDoubleTy(Context);
+  case MVT::f80:     return Type::getX86_FP80Ty(Context);
+  case MVT::f128:    return Type::getFP128Ty(Context);
+  case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+  case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
+  case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
+  case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
+  case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
+  case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
+  case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
+  case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
+  case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
+  case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
+  case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
+  case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
+  case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
+  case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
+  case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
+  case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
+  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
+  case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
+  case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
+  case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
+  case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4); 
+  case MVT::Metadata: return Type::getMetadataTy(Context);
  }
 }
 
-/// getMVT - Return the value type corresponding to the specified type.  This
+/// getEVT - Return the value type corresponding to the specified type.  This
 /// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types
 /// are returned as Other, otherwise they are invalid.
-MVT MVT::getMVT(const Type *Ty, bool HandleUnknown){
+EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
   switch (Ty->getTypeID()) {
   default:
-    if (HandleUnknown) return MVT::Other;
-    assert(0 && "Unknown type!");
+    if (HandleUnknown) return MVT(MVT::Other);
+    llvm_unreachable("Unknown type!");
     return MVT::isVoid;
   case Type::VoidTyID:
     return MVT::isVoid;
   case Type::IntegerTyID:
-    return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
-  case Type::FloatTyID:     return MVT::f32;
-  case Type::DoubleTyID:    return MVT::f64;
-  case Type::X86_FP80TyID:  return MVT::f80;
-  case Type::FP128TyID:     return MVT::f128;
-  case Type::PPC_FP128TyID: return MVT::ppcf128;
-  case Type::PointerTyID:   return MVT::iPTR;
+    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+  case Type::FloatTyID:     return MVT(MVT::f32);
+  case Type::DoubleTyID:    return MVT(MVT::f64);
+  case Type::X86_FP80TyID:  return MVT(MVT::f80);
+  case Type::FP128TyID:     return MVT(MVT::f128);
+  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+  case Type::PointerTyID:   return MVT(MVT::iPTR);
   case Type::VectorTyID: {
     const VectorType *VTy = cast<VectorType>(Ty);
-    return getVectorVT(getMVT(VTy->getElementType(), false),
+    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
                        VTy->getNumElements());
   }
   }
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 10816e6248bc..75ea4c3e2f28 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -45,26 +45,27 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/MDNode.h"
+#include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <sstream>
 #include <cstdarg>
 using namespace llvm;
 
@@ -85,15 +86,15 @@ namespace {  // Anonymous namespace for class
 
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
         if (I->empty() || !I->back().isTerminator()) {
-          cerr << "Basic Block does not have terminator!\n";
-          WriteAsOperand(*cerr, I, true);
-          cerr << "\n";
+          errs() << "Basic Block does not have terminator!\n";
+          WriteAsOperand(errs(), I, true);
+          errs() << "\n";
           Broken = true;
         }
       }
 
       if (Broken)
-        abort();
+        llvm_report_error("Broken module, no Basic Block terminator!");
 
       return false;
     }
@@ -106,8 +107,55 @@ PreVer("preverify", "Preliminary module verification");
 static const PassInfo *const PreVerifyID = &PreVer;
 
 namespace {
-  struct VISIBILITY_HIDDEN
-     Verifier : public FunctionPass, InstVisitor<Verifier> {
+  class TypeSet : public AbstractTypeUser {
+  public:
+    TypeSet() {}
+
+    /// Insert a type into the set of types.
+    bool insert(const Type *Ty) {
+      if (!Types.insert(Ty))
+        return false;
+      if (Ty->isAbstract())
+        Ty->addAbstractTypeUser(this);
+      return true;
+    }
+
+    // Remove ourselves as abstract type listeners for any types that remain
+    // abstract when the TypeSet is destroyed.
+    ~TypeSet() {
+      for (SmallSetVector<const Type *, 16>::iterator I = Types.begin(),
+             E = Types.end(); I != E; ++I) {
+        const Type *Ty = *I;
+        if (Ty->isAbstract())
+          Ty->removeAbstractTypeUser(this);
+      }
+    }
+
+    // Abstract type user interface.
+
+    /// Remove types from the set when refined. Do not insert the type it was
+    /// refined to because that type hasn't been verified yet.
+    void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+      Types.remove(OldTy);
+      OldTy->removeAbstractTypeUser(this);
+    }
+
+    /// Stop listening for changes to a type which is no longer abstract.
+    void typeBecameConcrete(const DerivedType *AbsTy) {
+      AbsTy->removeAbstractTypeUser(this);
+    }
+
+    void dump() const {}
+
+  private:
+    SmallSetVector<const Type *, 16> Types;
+
+    // Disallow copying.
+    TypeSet(const TypeSet &);
+    TypeSet &operator=(const TypeSet &);
+  };
+
+  struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
     static char ID; // Pass ID, replacement for typeid
     bool Broken;          // Is this module found to be broken?
     bool RealPass;        // Are we not being run by a PassManager?
@@ -115,7 +163,9 @@ namespace {
                           // What to do if verification fails.
     Module *Mod;          // Module we are verifying right now
     DominatorTree *DT; // Dominator Tree, caution can be null!
-    std::stringstream msgs;  // A stringstream to collect messages
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
 
     /// InstInThisBlock - when verifying a basic block, keep track of all of the
     /// instructions we have seen so far.  This allows us to do efficient
@@ -123,23 +173,26 @@ namespace {
     /// an instruction in the same block.
     SmallPtrSet<Instruction*, 16> InstsInThisBlock;
 
+    /// Types - keep track of the types that have been checked already.
+    TypeSet Types;
+
     Verifier()
       : FunctionPass(&ID), 
       Broken(false), RealPass(true), action(AbortProcessAction),
-      DT(0), msgs( std::ios::app | std::ios::out ) {}
+      DT(0), MessagesStr(Messages) {}
     explicit Verifier(VerifierFailureAction ctn)
       : FunctionPass(&ID), 
       Broken(false), RealPass(true), action(ctn), DT(0),
-      msgs( std::ios::app | std::ios::out ) {}
+      MessagesStr(Messages) {}
     explicit Verifier(bool AB)
       : FunctionPass(&ID), 
       Broken(false), RealPass(true),
       action( AB ? AbortProcessAction : PrintMessageAction), DT(0),
-      msgs( std::ios::app | std::ios::out ) {}
+      MessagesStr(Messages) {}
     explicit Verifier(DominatorTree &dt)
       : FunctionPass(&ID), 
       Broken(false), RealPass(false), action(PrintMessageAction),
-      DT(&dt), msgs( std::ios::app | std::ios::out ) {}
+      DT(&dt), MessagesStr(Messages) {}
 
 
     bool doInitialization(Module &M) {
@@ -205,19 +258,20 @@ namespace {
     ///
     bool abortIfBroken() {
       if (!Broken) return false;
-      msgs << "Broken module found, ";
+      MessagesStr << "Broken module found, ";
       switch (action) {
-      default: assert(0 && "Unknown action");
+      default: llvm_unreachable("Unknown action");
       case AbortProcessAction:
-        msgs << "compilation aborted!\n";
-        cerr << msgs.str();
+        MessagesStr << "compilation aborted!\n";
+        errs() << MessagesStr.str();
+        // Client should choose different reaction if abort is not desired
         abort();
       case PrintMessageAction:
-        msgs << "verification continues.\n";
-        cerr << msgs.str();
+        MessagesStr << "verification continues.\n";
+        errs() << MessagesStr.str();
         return false;
       case ReturnStatusAction:
-        msgs << "compilation terminated.\n";
+        MessagesStr << "compilation terminated.\n";
         return true;
       }
     }
@@ -231,9 +285,9 @@ namespace {
     void visitFunction(Function &F);
     void visitBasicBlock(BasicBlock &BB);
     using InstVisitor<Verifier>::visit;
-       
+
     void visit(Instruction &I);
-       
+
     void visitTruncInst(TruncInst &I);
     void visitZExtInst(ZExtInst &I);
     void visitSExtInst(SExtInst &I);
@@ -280,32 +334,32 @@ namespace {
                               bool isReturnValue, const Value *V);
     void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
                              const Value *V);
+    void VerifyType(const Type *Ty);
 
     void WriteValue(const Value *V) {
       if (!V) return;
       if (isa<Instruction>(V)) {
-        msgs << *V;
+        MessagesStr << *V;
       } else {
-        WriteAsOperand(msgs, V, true, Mod);
-        msgs << "\n";
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << "\n";
       }
     }
 
     void WriteType(const Type *T) {
       if (!T) return;
-      raw_os_ostream RO(msgs);
-      RO << ' ';
-      WriteTypeSymbolic(RO, T, Mod);
+      MessagesStr << ' ';
+      WriteTypeSymbolic(MessagesStr, T, Mod);
     }
 
 
     // CheckFailed - A check failed, so print out the condition and the message
     // that failed.  This provides a nice place to put a breakpoint if you want
     // to see why something is not correct.
-    void CheckFailed(const std::string &Message,
+    void CheckFailed(const Twine &Message,
                      const Value *V1 = 0, const Value *V2 = 0,
                      const Value *V3 = 0, const Value *V4 = 0) {
-      msgs << Message << "\n";
+      MessagesStr << Message.str() << "\n";
       WriteValue(V1);
       WriteValue(V2);
       WriteValue(V3);
@@ -313,14 +367,23 @@ namespace {
       Broken = true;
     }
 
-    void CheckFailed( const std::string& Message, const Value* V1,
-                      const Type* T2, const Value* V3 = 0 ) {
-      msgs << Message << "\n";
+    void CheckFailed(const Twine &Message, const Value *V1,
+                     const Type *T2, const Value *V3 = 0) {
+      MessagesStr << Message.str() << "\n";
       WriteValue(V1);
       WriteType(T2);
       WriteValue(V3);
       Broken = true;
     }
+
+    void CheckFailed(const Twine &Message, const Type *T1,
+                     const Type *T2 = 0, const Type *T3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteType(T1);
+      WriteType(T2);
+      WriteType(T3);
+      Broken = true;
+    }
   };
 } // End anonymous namespace
 
@@ -359,14 +422,14 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
 
   Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
           "Global is marked as dllimport, but not external", &GV);
-  
+
   Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
           "Only global variables can have appending linkage!", &GV);
 
   if (GV.hasAppendingLinkage()) {
-    GlobalVariable &GVar = cast<GlobalVariable>(GV);
-    Assert1(isa<ArrayType>(GVar.getType()->getElementType()),
-            "Only global arrays can have appending linkage!", &GV);
+    GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
+    Assert1(GVar && isa<ArrayType>(GVar->getType()->getElementType()),
+            "Only global arrays can have appending linkage!", GVar);
   }
 }
 
@@ -376,26 +439,13 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
             "Global variable initializer type does not match global "
             "variable type!", &GV);
 
-    // Verify that any metadata used in a global initializer points only to
-    // other globals.
-    if (MDNode *FirstNode = dyn_cast<MDNode>(GV.getInitializer())) {
-      SmallVector<const MDNode *, 4> NodesToAnalyze;
-      NodesToAnalyze.push_back(FirstNode);
-      while (!NodesToAnalyze.empty()) {
-        const MDNode *N = NodesToAnalyze.back();
-        NodesToAnalyze.pop_back();
-
-        for (MDNode::const_elem_iterator I = N->elem_begin(),
-               E = N->elem_end(); I != E; ++I)
-          if (const Value *V = *I) {
-            if (const MDNode *Next = dyn_cast<MDNode>(V))
-              NodesToAnalyze.push_back(Next);
-            else
-              Assert3(isa<Constant>(V),
-                      "reference to instruction from global metadata node",
-                      &GV, N, V);
-          }
-      }
+    // If the global has common linkage, it must have a zero initializer and
+    // cannot be constant.
+    if (GV.hasCommonLinkage()) {
+      Assert1(GV.getInitializer()->isNullValue(),
+              "'common' global must have a zero initializer!", &GV);
+      Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
+              &GV);
     }
   } else {
     Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
@@ -435,6 +485,8 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
 }
 
 void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
+  for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I)
+    VerifyType(I->second);
 }
 
 // VerifyParameterAttrs - Check the given attributes for an argument or return
@@ -525,16 +577,17 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
 static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
   if (Attrs.isEmpty())
     return true;
-    
+
   unsigned LastSlot = Attrs.getNumSlots() - 1;
   unsigned LastIndex = Attrs.getSlot(LastSlot).Index;
   if (LastIndex <= Params
       || (LastIndex == (unsigned)~0
           && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params)))  
     return true;
-    
+
   return false;
 }
+
 // visitFunction - Verify that a function is ok.
 //
 void Verifier::visitFunction(Function &F) {
@@ -542,15 +595,16 @@ void Verifier::visitFunction(Function &F) {
   const FunctionType *FT = F.getFunctionType();
   unsigned NumArgs = F.arg_size();
 
+  Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
   Assert2(FT->getNumParams() == NumArgs,
           "# formal arguments must match # of arguments for function type!",
           &F, FT);
   Assert1(F.getReturnType()->isFirstClassType() ||
-          F.getReturnType() == Type::VoidTy || 
+          F.getReturnType()->isVoidTy() || 
           isa<StructType>(F.getReturnType()),
           "Functions cannot return aggregate values!", &F);
 
-  Assert1(!F.hasStructRetAttr() || F.getReturnType() == Type::VoidTy,
+  Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
           "Invalid struct return type!", &F);
 
   const AttrListPtr &Attrs = F.getAttributes();
@@ -574,12 +628,9 @@ void Verifier::visitFunction(Function &F) {
             "Varargs functions must have C calling conventions!", &F);
     break;
   }
-  
+
   bool isLLVMdotName = F.getName().size() >= 5 &&
                        F.getName().substr(0, 5) == "llvm.";
-  if (!isLLVMdotName)
-    Assert1(F.getReturnType() != Type::MetadataTy,
-            "Function may not return metadata unless it's an intrinsic", &F);
 
   // Check that the argument values match the function type for this function...
   unsigned i = 0;
@@ -591,7 +642,7 @@ void Verifier::visitFunction(Function &F) {
     Assert1(I->getType()->isFirstClassType(),
             "Function arguments must have first-class types!", I);
     if (!isLLVMdotName)
-      Assert2(I->getType() != Type::MetadataTy,
+      Assert2(!I->getType()->isMetadataTy(),
               "Function takes metadata but isn't an intrinsic", I, &F);
   }
 
@@ -609,9 +660,20 @@ void Verifier::visitFunction(Function &F) {
     Assert1(pred_begin(Entry) == pred_end(Entry),
             "Entry block to function must not have predecessors!", Entry);
   }
+  
+  // If this function is actually an intrinsic, verify that it is only used in
+  // direct call/invokes, never having its "address taken".
+  if (F.getIntrinsicID()) {
+    for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E;++UI){
+      User *U = cast<User>(UI);
+      if ((isa<CallInst>(U) || isa<InvokeInst>(U)) && UI.getOperandNo() == 0)
+        continue;  // Direct calls/invokes are ok.
+      
+      Assert1(0, "Invalid user of intrinsic instruction!", U); 
+    }
+  }
 }
 
-
 // verifyBasicBlock - Verify that a basic block is well formed...
 //
 void Verifier::visitBasicBlock(BasicBlock &BB) {
@@ -628,7 +690,6 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
     std::sort(Preds.begin(), Preds.end());
     PHINode *PN;
     for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
-
       // Ensure that PHI nodes have at least one entry!
       Assert1(PN->getNumIncomingValues() != 0,
               "PHI nodes must have at least one entry.  If the block is dead, "
@@ -676,7 +737,7 @@ void Verifier::visitTerminatorInst(TerminatorInst &I) {
 void Verifier::visitReturnInst(ReturnInst &RI) {
   Function *F = RI.getParent()->getParent();
   unsigned N = RI.getNumOperands();
-  if (F->getReturnType() == Type::VoidTy) 
+  if (F->getReturnType()->isVoidTy()) 
     Assert2(N == 0,
             "Found return instr that returns non-void in Function of void "
             "return type!", &RI, F->getReturnType());
@@ -704,7 +765,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
     CheckFailed("Function return type does not match operand "
                 "type of return inst!", &RI, F->getReturnType());
   }
-  
+
   // Check to make sure that the return value has necessary properties for
   // terminators...
   visitTerminatorInst(RI);
@@ -731,7 +792,6 @@ void Verifier::visitSelectInst(SelectInst &SI) {
   visitInstruction(SI);
 }
 
-
 /// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
 /// a pass, if any exist, it's an error.
 ///
@@ -856,8 +916,8 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  bool SrcVec = SrcTy->getTypeID() == Type::VectorTyID;
-  bool DstVec = DestTy->getTypeID() == Type::VectorTyID;
+  bool SrcVec = isa<VectorType>(SrcTy);
+  bool DstVec = isa<VectorType>(DestTy);
 
   Assert1(SrcVec == DstVec,
           "SIToFP source and dest must both be vector or scalar", &I);
@@ -954,7 +1014,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
   // However, you can't cast pointers to anything but pointers.
   Assert1(isa<PointerType>(DestTy) == isa<PointerType>(DestTy),
           "Bitcast requires both operands to be pointer or neither", &I);
-  Assert1(SrcBitSize == DestBitSize, "Bitcast requies types of same width", &I);
+  Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
 
   // Disallow aggregates.
   Assert1(!SrcTy->isAggregateType(),
@@ -977,11 +1037,15 @@ void Verifier::visitPHINode(PHINode &PN) {
           "PHI nodes not grouped at top of basic block!",
           &PN, PN.getParent());
 
-  // Check that all of the operands of the PHI node have the same type as the
-  // result.
-  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+  // Check that all of the values of the PHI node have the same type as the
+  // result, and that the incoming blocks are really basic blocks.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
     Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
             "PHI node operands are not the same type as the result!", &PN);
+    Assert1(isa<BasicBlock>(PN.getOperand(
+                PHINode::getOperandNumForIncomingBlock(i))),
+            "PHI node incoming block is not a BasicBlock!", &PN);
+  }
 
   // All other PHI node constraints are checked in the visitBasicBlock method.
 
@@ -994,9 +1058,9 @@ void Verifier::VerifyCallSite(CallSite CS) {
   Assert1(isa<PointerType>(CS.getCalledValue()->getType()),
           "Called function must be a pointer!", I);
   const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+
   Assert1(isa<FunctionType>(FPTy->getElementType()),
           "Called function is not pointer to function type!", I);
-
   const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
 
   // Verify that the correct number of arguments are being passed
@@ -1036,12 +1100,10 @@ void Verifier::VerifyCallSite(CallSite CS) {
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
   if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 ||
       CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") {
-    Assert1(FTy->getReturnType() != Type::MetadataTy,
-            "Only intrinsics may return metadata", I);
     for (FunctionType::param_iterator PI = FTy->param_begin(),
            PE = FTy->param_end(); PI != PE; ++PI)
-      Assert1(PI->get() != Type::MetadataTy, "Function has metadata parameter "
-              "but isn't an intrinsic", I);
+      Assert1(!PI->get()->isMetadataTy(),
+              "Function has metadata parameter but isn't an intrinsic", I);
   }
 
   visitInstruction(*I);
@@ -1115,7 +1177,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
             "Shift return type must be same as operands!", &B);
     break;
   default:
-    assert(0 && "Unknown BinaryOperator opcode!");
+    llvm_unreachable("Unknown BinaryOperator opcode!");
   }
 
   visitInstruction(B);
@@ -1202,20 +1264,21 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 }
 
 void Verifier::visitLoadInst(LoadInst &LI) {
-  const Type *ElTy =
-    cast<PointerType>(LI.getOperand(0)->getType())->getElementType();
+  const PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+  Assert1(PTy, "Load operand must be a pointer.", &LI);
+  const Type *ElTy = PTy->getElementType();
   Assert2(ElTy == LI.getType(),
           "Load result type does not match pointer operand type!", &LI, ElTy);
-  Assert1(ElTy != Type::MetadataTy, "Can't load metadata!", &LI);
   visitInstruction(LI);
 }
 
 void Verifier::visitStoreInst(StoreInst &SI) {
-  const Type *ElTy =
-    cast<PointerType>(SI.getOperand(1)->getType())->getElementType();
+  const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+  Assert1(PTy, "Load operand must be a pointer.", &SI);
+  const Type *ElTy = PTy->getElementType();
   Assert2(ElTy == SI.getOperand(0)->getType(),
-          "Stored value type does not match pointer operand type!", &SI, ElTy);
-  Assert1(ElTy != Type::MetadataTy, "Can't store metadata!", &SI);
+          "Stored value type does not match pointer operand type!",
+          &SI, ElTy);
   visitInstruction(SI);
 }
 
@@ -1259,44 +1322,39 @@ void Verifier::visitInstruction(Instruction &I) {
       Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
               "Only PHI nodes may reference their own value!", &I);
   }
-  
+
   // Verify that if this is a terminator that it is at the end of the block.
   if (isa<TerminatorInst>(I))
     Assert1(BB->getTerminator() == &I, "Terminator not at end of block!", &I);
-  
 
   // Check that void typed values don't have names
-  Assert1(I.getType() != Type::VoidTy || !I.hasName(),
+  Assert1(!I.getType()->isVoidTy() || !I.hasName(),
           "Instruction has a name, but provides a void value!", &I);
 
   // Check that the return value of the instruction is either void or a legal
   // value type.
-  Assert1(I.getType() == Type::VoidTy || I.getType()->isFirstClassType()
-          || ((isa<CallInst>(I) || isa<InvokeInst>(I)) 
-              && isa<StructType>(I.getType())),
+  Assert1(I.getType()->isVoidTy() || 
+          I.getType()->isFirstClassType(),
           "Instruction returns a non-scalar type!", &I);
 
-  // Check that the instruction doesn't produce metadata or metadata*. Calls
-  // all already checked against the callee type.
-  Assert1(I.getType() != Type::MetadataTy ||
+  // Check that the instruction doesn't produce metadata. Calls are already
+  // checked against the callee type.
+  Assert1(!I.getType()->isMetadataTy() ||
           isa<CallInst>(I) || isa<InvokeInst>(I),
           "Invalid use of metadata!", &I);
 
-  if (const PointerType *PTy = dyn_cast<PointerType>(I.getType()))
-    Assert1(PTy->getElementType() != Type::MetadataTy,
-            "Instructions may not produce pointer to metadata.", &I);
-
-
   // Check that all uses of the instruction, if they are instructions
   // themselves, actually have parent basic blocks.  If the use is not an
   // instruction, it is an error!
   for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
        UI != UE; ++UI) {
-    Assert1(isa<Instruction>(*UI), "Use of instruction is not an instruction!",
-            *UI);
-    Instruction *Used = cast<Instruction>(*UI);
-    Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
-            " embedded in a basic block!", &I, Used);
+    if (Instruction *Used = dyn_cast<Instruction>(*UI))
+      Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+              " embedded in a basic block!", &I, Used);
+    else {
+      CheckFailed("Use of instruction is not an instruction!", *UI);
+      return;
+    }
   }
 
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
@@ -1308,11 +1366,6 @@ void Verifier::visitInstruction(Instruction &I) {
       Assert1(0, "Instruction operands must be first-class values!", &I);
     }
 
-    if (const PointerType *PTy =
-            dyn_cast<PointerType>(I.getOperand(i)->getType()))
-      Assert1(PTy->getElementType() != Type::MetadataTy,
-              "Invalid use of metadata pointer.", &I);
-    
     if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
       // Check to make sure that the "address of" an intrinsic function is never
       // taken.
@@ -1346,7 +1399,9 @@ void Verifier::visitInstruction(Instruction &I) {
         // value in the predecessor basic blocks they correspond to.
         BasicBlock *UseBlock = BB;
         if (isa<PHINode>(I))
-          UseBlock = cast<BasicBlock>(I.getOperand(i+1));
+          UseBlock = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
+                Op, &I);
 
         if (isa<PHINode>(I) && UseBlock == OpBlock) {
           // Special case of a phi node in the normal destination or the unwind
@@ -1379,9 +1434,9 @@ void Verifier::visitInstruction(Instruction &I) {
       } else if (isa<PHINode>(I)) {
         // PHI nodes are more difficult than other nodes because they actually
         // "use" the value in the predecessor basic blocks they correspond to.
-        BasicBlock *PredBB = cast<BasicBlock>(I.getOperand(i+1));
-        Assert2(DT->dominates(OpBlock, PredBB) ||
-                !DT->isReachableFromEntry(PredBB),
+        BasicBlock *PredBB = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
+                           !DT->isReachableFromEntry(PredBB)),
                 "Instruction does not dominate all uses!", Op, &I);
       } else {
         if (OpBlock == BB) {
@@ -1402,6 +1457,61 @@ void Verifier::visitInstruction(Instruction &I) {
     }
   }
   InstsInThisBlock.insert(&I);
+
+  VerifyType(I.getType());
+}
+
+/// VerifyType - Verify that a type is well formed.
+///
+void Verifier::VerifyType(const Type *Ty) {
+  if (!Types.insert(Ty)) return;
+
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID: {
+    const FunctionType *FTy = cast<FunctionType>(Ty);
+
+    const Type *RetTy = FTy->getReturnType();
+    Assert2(FunctionType::isValidReturnType(RetTy),
+            "Function type with invalid return type", RetTy, FTy);
+    VerifyType(RetTy);
+
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+      const Type *ElTy = FTy->getParamType(i);
+      Assert2(FunctionType::isValidArgumentType(ElTy),
+              "Function type with invalid parameter type", ElTy, FTy);
+      VerifyType(ElTy);
+    }
+  } break;
+  case Type::StructTyID: {
+    const StructType *STy = cast<StructType>(Ty);
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      const Type *ElTy = STy->getElementType(i);
+      Assert2(StructType::isValidElementType(ElTy),
+              "Structure type with invalid element type", ElTy, STy);
+      VerifyType(ElTy);
+    }
+  } break;
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    Assert1(ArrayType::isValidElementType(ATy->getElementType()),
+            "Array type with invalid element type", ATy);
+    VerifyType(ATy->getElementType());
+  } break;
+  case Type::PointerTyID: {
+    const PointerType *PTy = cast<PointerType>(Ty);
+    Assert1(PointerType::isValidElementType(PTy->getElementType()),
+            "Pointer type with invalid element type", PTy);
+    VerifyType(PTy->getElementType());
+  } break;
+  case Type::VectorTyID: {
+    const VectorType *VTy = cast<VectorType>(Ty);
+    Assert1(VectorType::isValidElementType(VTy->getElementType()),
+            "Vector type with invalid element type", VTy);
+    VerifyType(VTy->getElementType());
+  } break;
+  default:
+    break;
+  }
 }
 
 // Flags used by TableGen to mark intrinsic parameters with the
@@ -1415,11 +1525,11 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   Function *IF = CI.getCalledFunction();
   Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
           IF);
-  
+
 #define GET_INTRINSIC_VERIFIER
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_VERIFIER
-  
+
   switch (ID) {
   default:
     break;
@@ -1446,7 +1556,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
       Assert1(isa<Constant>(CI.getOperand(2)),
               "llvm.gcroot parameter #2 must be a constant.", &CI);
     }
-      
+
     Assert1(CI.getParent()->getParent()->hasGC(),
             "Enclosing function does not use GC.", &CI);
     break;
@@ -1468,6 +1578,17 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
             "llvm.stackprotector parameter #2 must resolve to an alloca.",
             &CI);
     break;
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start:
+    Assert1(isa<ConstantInt>(CI.getOperand(1)),
+            "size argument of memory use markers must be a constant integer",
+            &CI);
+    break;
+  case Intrinsic::invariant_end:
+    Assert1(isa<ConstantInt>(CI.getOperand(2)),
+            "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+    break;
   }
 }
 
@@ -1541,9 +1662,9 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
         return false;
       }
     } else {
-      if (Ty != FTy->getParamType(Match - 1)) {
+      if (Ty != FTy->getParamType(Match - NumRets)) {
         CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not "
-                    "match parameter %" + utostr(Match - 1) + ".", F);
+                    "match parameter %" + utostr(Match - NumRets) + ".", F);
         return false;
       }
     }
@@ -1584,7 +1705,13 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
     if (EltTy != Ty)
       Suffix += "v" + utostr(NumElts);
 
-    Suffix += MVT::getMVT(EltTy).getMVTString();
+    Suffix += EVT::getEVT(EltTy).getEVTString();
+  } else if (VT == MVT::vAny) {
+    if (!VTy) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a vector type.", F);
+      return false;
+    }
+    Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
   } else if (VT == MVT::iPTR) {
     if (!isa<PointerType>(Ty)) {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
@@ -1597,17 +1724,17 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
     // allow either case to be legal.
     if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
       Suffix += ".p" + utostr(PTyp->getAddressSpace()) + 
-        MVT::getMVT(PTyp->getElementType()).getMVTString();
+        EVT::getEVT(PTyp->getElementType()).getEVTString();
     } else {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
                   "pointer and a pointer is required.", F);
       return false;
     }
-  } else if (MVT((MVT::SimpleValueType)VT).isVector()) {
-    MVT VVT = MVT((MVT::SimpleValueType)VT);
+  } else if (EVT((MVT::SimpleValueType)VT).isVector()) {
+    EVT VVT = EVT((MVT::SimpleValueType)VT);
 
     // If this is a vector argument, verify the number and type of elements.
-    if (VVT.getVectorElementType() != MVT::getMVT(EltTy)) {
+    if (VVT.getVectorElementType() != EVT::getEVT(EltTy)) {
       CheckFailed("Intrinsic prototype has incorrect vector element type!", F);
       return false;
     }
@@ -1617,7 +1744,8 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
                   "vector elements!", F);
       return false;
     }
-  } else if (MVT((MVT::SimpleValueType)VT).getTypeForMVT() != EltTy) {
+  } else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) != 
+             EltTy) {
     CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is wrong!", F);
     return false;
   } else if (EltTy != Ty) {
@@ -1638,7 +1766,7 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
   va_list VA;
   va_start(VA, ParamNum);
   const FunctionType *FTy = F->getFunctionType();
-  
+
   // For overloaded intrinsics, the Suffix of the function name must match the
   // types of the arguments. This variable keeps track of the expected
   // suffix, to be checked at the end.
@@ -1739,10 +1867,8 @@ bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
   Verifier *V = new Verifier(action);
   PM.add(V);
   PM.run(const_cast<Module&>(M));
-  
+
   if (ErrorInfo && V->Broken)
-    *ErrorInfo = V->msgs.str();
+    *ErrorInfo = V->MessagesStr.str();
   return V->Broken;
 }
-
-// vim: sw=2
diff --git a/projects/sample/configure b/projects/sample/configure
index 2efb85397b7c..f6aa8476eab1 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -2320,15 +2320,15 @@ esac
 echo "$as_me: executing $ac_dest commands" >&6;}
   case $ac_dest in
     Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/Makefile Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
     lib/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/lib/Makefile lib/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
     lib/sample/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/sample/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/lib/sample/Makefile lib/sample/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/sample/Makefile lib/sample/Makefile ;;
     tools/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/tools/Makefile tools/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
     tools/sample/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/sample/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/tools/sample/Makefile tools/sample/Makefile ;;
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/sample/Makefile tools/sample/Makefile ;;
   esac
 done
 _ACEOF
diff --git a/runtime/Makefile b/runtime/Makefile
index 9937c11e3bc3..1e1045142583 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -10,16 +10,9 @@
 LEVEL = ..
 include $(LEVEL)/Makefile.config
 
-ifneq ($(wildcard $(LLVMGCC)),)
+ifndef NO_RUNTIME_LIBS
+
 PARALLEL_DIRS  := libprofile
-else
-PARALLEL_DIRS  := 
-install all ::
-	@echo '********' Warning: Your LLVMGCCDIR is set incorrectly.  Check 
-	@echo '********' Warning: llvm/Makefile.config to make sure it matches
-	@echo '********' Warning: the directory where the C front-end is
-	@echo '********' Warning: installed,and re-run configure if it does not.
-endif
 
 # Disable libprofile: a faulty libtool is generated by autoconf which breaks the
 # build on Sparc
@@ -27,9 +20,8 @@ ifeq ($(ARCH), Sparc)
 PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
 endif
 
+endif
+
 include $(LEVEL)/Makefile.common
 
-# Install target for libraries: Copy into $LLVMGCCDIR/bytecode-libs
-#
 install::
-
diff --git a/runtime/libprofile/Makefile b/runtime/libprofile/Makefile
index 8ff46fabdd17..92a85584db17 100644
--- a/runtime/libprofile/Makefile
+++ b/runtime/libprofile/Makefile
@@ -8,12 +8,15 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
+include $(LEVEL)/Makefile.config
+
+ifneq ($(wildcard $(LLVMGCC)),)
 BYTECODE_LIBRARY = 1
+endif
 SHARED_LIBRARY = 1
 LOADABLE_MODULE = 1
 LIBRARYNAME = profile_rt
 EXTRA_DIST = exported_symbols.lst
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/exported_symbols.lst
-BYTECODE_DESTINATION = $(CFERuntimeLibDir)
 
 include $(LEVEL)/Makefile.common
diff --git a/runtime/libprofile/OptimalEdgeProfiling.c b/runtime/libprofile/OptimalEdgeProfiling.c
new file mode 100644
index 000000000000..eb7887b2aea9
--- /dev/null
+++ b/runtime/libprofile/OptimalEdgeProfiling.c
@@ -0,0 +1,45 @@
+/*===-- OptimalEdgeProfiling.c - Support library for opt. edge profiling --===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source      
+|* License. See LICENSE.TXT for details.                                      
+|* 
+|*===----------------------------------------------------------------------===*|
+|* 
+|* This file implements the call back routines for the edge profiling
+|* instrumentation pass.  This should be used with the
+|* -insert-opt-edge-profiling LLVM pass.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include <stdlib.h>
+
+static unsigned *ArrayStart;
+static unsigned NumElements;
+
+/* OptEdgeProfAtExitHandler - When the program exits, just write out the
+ * profiling data.
+ */
+static void OptEdgeProfAtExitHandler() {
+  /* Note that, although the array has a counter for each edge, not all
+   * counters are updated, the ones that are not used are initialised with -1.
+   * When loading this information the counters with value -1 have to be
+   * recalculated, it is guranteed that this is possible.
+   */
+  write_profiling_data(OptEdgeInfo, ArrayStart, NumElements);
+}
+
+
+/* llvm_start_opt_edge_profiling - This is the main entry point of the edge
+ * profiling library.  It is responsible for setting up the atexit handler.
+ */
+int llvm_start_opt_edge_profiling(int argc, const char **argv,
+                                  unsigned *arrayStart, unsigned numElements) {
+  int Ret = save_arguments(argc, argv);
+  ArrayStart = arrayStart;
+  NumElements = numElements;
+  atexit(OptEdgeProfAtExitHandler);
+  return Ret;
+}
diff --git a/runtime/libprofile/exported_symbols.lst b/runtime/libprofile/exported_symbols.lst
index 6f6c3cc840c5..45c6d5efe5d5 100644
--- a/runtime/libprofile/exported_symbols.lst
+++ b/runtime/libprofile/exported_symbols.lst
@@ -1,5 +1,7 @@
 
 llvm_start_func_profiling
 llvm_start_block_profiling
+llvm_start_edge_profiling
+llvm_start_opt_edge_profiling
 llvm_start_basic_block_tracing
 llvm_trace_basic_block
diff --git a/test/Analysis/Andersens/2007-11-19-InlineAsm.ll b/test/Analysis/Andersens/2007-11-19-InlineAsm.ll
index c1ab6c7b1a4f..5ba34997e93d 100644
--- a/test/Analysis/Andersens/2007-11-19-InlineAsm.ll
+++ b/test/Analysis/Andersens/2007-11-19-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -disable-output
+; RUN: opt < %s -anders-aa -disable-output
 
 define void @x(i16 %Y) {
 entry:
diff --git a/test/Analysis/Andersens/2008-03-19-External.ll b/test/Analysis/Andersens/2008-03-19-External.ll
index c4f1ff083855..a9731037669b 100644
--- a/test/Analysis/Andersens/2008-03-19-External.ll
+++ b/test/Analysis/Andersens/2008-03-19-External.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -gvn | llvm-dis | not grep undef
+; RUN: opt < %s -anders-aa -gvn -S | not grep undef
 ; PR2160
 
 declare void @f(i32*)
diff --git a/test/Analysis/Andersens/2008-04-07-Memcpy.ll b/test/Analysis/Andersens/2008-04-07-Memcpy.ll
index 935444991a38..5a50dd56214c 100644
--- a/test/Analysis/Andersens/2008-04-07-Memcpy.ll
+++ b/test/Analysis/Andersens/2008-04-07-Memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -gvn | llvm-dis | not grep undef
+; RUN: opt < %s -anders-aa -gvn -S | not grep undef
 ; PR2169
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
diff --git a/test/Analysis/Andersens/2008-12-27-BuiltinWrongType.ll b/test/Analysis/Andersens/2008-12-27-BuiltinWrongType.ll
index 5f5da7464db9..da6751139f2e 100644
--- a/test/Analysis/Andersens/2008-12-27-BuiltinWrongType.ll
+++ b/test/Analysis/Andersens/2008-12-27-BuiltinWrongType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa
+; RUN: opt < %s -anders-aa
 ; PR3262
 
 @.str15 = external global [3 x i8]              ; <[3 x i8]*> [#uses=1]
diff --git a/test/Analysis/Andersens/basictest.ll b/test/Analysis/Andersens/basictest.ll
index 0005e09b2e7e..47226dd8da7a 100644
--- a/test/Analysis/Andersens/basictest.ll
+++ b/test/Analysis/Andersens/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -aa-eval 2>/dev/null
+; RUN: opt < %s -anders-aa -aa-eval 2>/dev/null
 
 define void @test1() {
 	%X = malloc i32*
diff --git a/test/Analysis/Andersens/external.ll b/test/Analysis/Andersens/external.ll
index 8a4be2590ddf..13c12dc558a7 100644
--- a/test/Analysis/Andersens/external.ll
+++ b/test/Analysis/Andersens/external.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -gvn -deadargelim | llvm-dis | grep store | not grep null
+; RUN: opt < %s -anders-aa -gvn -deadargelim -S | grep store | not grep null
 
 ; Because the 'internal' function is passed to an external function, we don't
 ; know what the incoming values will alias.  As such, we cannot do the 
diff --git a/test/Analysis/Andersens/modreftest.ll b/test/Analysis/Andersens/modreftest.ll
index f86c7f74d17f..e0c2edc4c237 100644
--- a/test/Analysis/Andersens/modreftest.ll
+++ b/test/Analysis/Andersens/modreftest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -gvn -instcombine | llvm-dis \
+; RUN: opt < %s -anders-aa -gvn -instcombine -S \
 ; RUN: | grep {ret i1 true}
 
 @G = internal global i32* null
diff --git a/test/Analysis/Andersens/modreftest2.ll b/test/Analysis/Andersens/modreftest2.ll
index 0ba91df857a6..562c961e7d46 100644
--- a/test/Analysis/Andersens/modreftest2.ll
+++ b/test/Analysis/Andersens/modreftest2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -gvn | llvm-dis \
+; RUN: opt < %s -anders-aa -gvn -S \
 ; RUN: | not grep {ret i32 undef}
 
 ;; From PR 2160
diff --git a/test/Analysis/Andersens/trivialtest.ll b/test/Analysis/Andersens/trivialtest.ll
index ce37516b3684..f9f938f8f1f9 100644
--- a/test/Analysis/Andersens/trivialtest.ll
+++ b/test/Analysis/Andersens/trivialtest.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | opt -anders-aa -disable-output
+; RUN: opt < %s -anders-aa -disable-output
 
 define void @foo() { ret void }
diff --git a/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll b/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
index 8ba66df8b99f..6b50a168cd0a 100644
--- a/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
+++ b/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
@@ -2,7 +2,7 @@
 ; is performed.  It is not legal to delete the second load instruction because
 ; the value computed by the first load instruction is changed by the store.
 
-; RUN: llvm-as < %s | opt -gvn -instcombine | llvm-dis | grep DONOTREMOVE
+; RUN: opt < %s -gvn -instcombine -S | grep DONOTREMOVE
 
 define i32 @test() {
 	%A = alloca i32
diff --git a/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll b/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll
index 0a15deb9baa6..4f8eabb79308 100644
--- a/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll
+++ b/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -aa-eval -disable-output 2>/dev/null
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
 ; Test for a bug in BasicAA which caused a crash when querying equality of P1&P2
 define void @test({[2 x i32],[2 x i32]}* %A, i64 %X, i64 %Y) {
 	%P1 = getelementptr {[2 x i32],[2 x i32]}* %A, i64 0, i32 0, i64 %X
diff --git a/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll b/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
index 4b3cc6a0a647..f7e82951da76 100644
--- a/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
+++ b/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -instcombine | llvm-dis | grep sub
+; RUN: opt < %s -gvn -instcombine -S | grep sub
 
 ; BasicAA was incorrectly concluding that P1 and P2 didn't conflict!
 
diff --git a/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll b/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll
index 845613150eac..97bc38eb69bf 100644
--- a/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll
+++ b/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -aa-eval -disable-output 2>/dev/null
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
 ; Test for a bug in BasicAA which caused a crash when querying equality of P1&P2
 define void @test([17 x i16]* %mask_bits) {
 	%P1 = getelementptr [17 x i16]* %mask_bits, i64 0, i64 0
diff --git a/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll b/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
index c9049c8cc439..d439dfc530ba 100644
--- a/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
+++ b/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 	%struct..apr_array_header_t = type { i32*, i32, i32, i32, i8* }
 	%struct..apr_table_t = type { %struct..apr_array_header_t, i32, [32 x i32], [32 x i32] }
 
diff --git a/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll b/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
index c673a323001e..0abd3847836c 100644
--- a/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
+++ b/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -aa-eval -disable-output 2>/dev/null
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
 
 define i32 @MTConcat([3 x i32]* %a.1) {
 	%tmp.961 = getelementptr [3 x i32]* %a.1, i64 0, i64 4
diff --git a/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll b/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll
index d385961780d3..3e813fa2ca18 100644
--- a/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll
+++ b/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -aa-eval -disable-output 2>/dev/null
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
 
 %struct..RefPoint = type { i32, { i32, i8, i8 } }
 %struct..RefRect = type { %struct..RefPoint, %struct..RefPoint }
diff --git a/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll b/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
index e2bb86dcb226..637d8f0db486 100644
--- a/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
+++ b/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
@@ -1,6 +1,6 @@
 ; In this test, a local alloca cannot alias an incoming argument.
 
-; RUN: llvm-as < %s | opt -gvn -instcombine | llvm-dis | not grep sub
+; RUN: opt < %s -gvn -instcombine -S | not grep sub
 
 define i32 @test(i32* %P) {
 	%X = alloca i32
diff --git a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
index 99eae1660de8..911f78cc827e 100644
--- a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
+++ b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
@@ -1,7 +1,7 @@
 ; This testcase consists of alias relations which should be completely
 ; resolvable by basicaa.
 
-; RUN: llvm-as < %s | opt -aa-eval -print-may-aliases -disable-output \
+; RUN: opt < %s -aa-eval -print-may-aliases -disable-output \
 ; RUN: |& not grep May:
 
 %T = type { i32, [10 x i8] }
diff --git a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
index 639cb0a2f82c..8166b979ddab 100644
--- a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
+++ b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
@@ -1,7 +1,7 @@
 ; This testcase consists of alias relations which should be completely
 ; resolvable by basicaa, but require analysis of getelementptr constant exprs.
 
-; RUN: llvm-as < %s | opt -aa-eval -print-may-aliases -disable-output \
+; RUN: opt < %s -aa-eval -print-may-aliases -disable-output \
 ; RUN: |& not grep May:
 
 %T = type { i32, [10 x i8] }
diff --git a/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll b/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
index 407932c9d6c5..e1cfd0348f35 100644
--- a/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
+++ b/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | grep {store i32 0}
+; RUN: opt < %s -dse -S | grep {store i32 0}
 
 define void @test({i32,i32 }* %P) {
 	%Q = getelementptr {i32,i32}* %P, i32 1
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
index 58d4da16e6e6..81248db32881 100644
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
+++ b/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm
+; RUN: opt < %s -licm
 
 %"java/lang/Object" = type { %struct.llvm_java_object_base }
 %"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
index d96438fd43d2..0e03db330c04 100644
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
+++ b/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse
+; RUN: opt < %s -dse
 
 %"java/lang/Object" = type { %struct.llvm_java_object_base }
 %"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
diff --git a/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll b/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
index 21c86b774c5a..4564263de015 100644
--- a/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
+++ b/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine |\
+; RUN: opt < %s -basicaa -gvn -instcombine |\
 ; RUN: llvm-dis | grep {load i32\\* %A}
 
 declare double* @useit(i32*)
diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
index b8e30198e371..5d08312791f2 100644
--- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
+++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -aa-eval -disable-output |& grep {2 no alias respon}
+; RUN: opt < %s -aa-eval -disable-output |& grep {2 no alias respon}
 ; TEST that A[1][0] may alias A[0][i].
 
 define void @test(i32 %N) {
diff --git a/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll b/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
index cc10e4b81bfd..85f53a6cda48 100644
--- a/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
+++ b/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as  < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
 
diff --git a/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll b/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
index 08c483d6d76c..917bf2554dca 100644
--- a/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
+++ b/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
@@ -1,7 +1,7 @@
 ; PR1109
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine | llvm-dis | \
+; RUN: opt < %s -basicaa -gvn -instcombine -S | \
 ; RUN:   grep {sub i32}
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine | llvm-dis | \
+; RUN: opt < %s -basicaa -gvn -instcombine -S | \
 ; RUN:   not grep {ret i32 0}
 ; END.
 
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
index 3b6eb11b6ae4..e6a26e30c023 100644
--- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
+++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %., i32\\* %.} | grep {%x} | grep {%y}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %., i32\\* %.} | grep {%x} | grep {%y}
 
 declare i32* @unclear(i32* %a)
 
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
index 9936afb19560..7f33fa4a2d08 100644
--- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
+++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as %s -o - | opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {9 no alias}
-; RUN: llvm-as %s -o - | opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {6 may alias}
-; RUN: llvm-as %s -o - | opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %Ipointer, i32\\* %Jpointer}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {9 no alias}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {6 may alias}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %Ipointer, i32\\* %Jpointer}
 
 define void @foo(i32* noalias %p, i32* noalias %q, i32 %i, i32 %j) {
   %Ipointer = getelementptr i32* %p, i32 %i
diff --git a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
index 15aaa02a6ef9..035299e0ac82 100644
--- a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
+++ b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -1,5 +1,5 @@
 ; PR1600
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine | llvm-dis | \
+; RUN: opt < %s -basicaa -gvn -instcombine -S | \
 ; RUN:   grep {ret i32 0}
 ; END.
 
diff --git a/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll b/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
index 61ab80d5475a..78f24b5e305d 100644
--- a/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
+++ b/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn -dce | llvm-dis | grep tmp7
+; RUN: opt < %s -basicaa -gvn -dce -S | grep tmp7
 
         %struct.A = type { i32 }
         %struct.B = type { %struct.A }
diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index 5a938cfef05f..f0f1535da746 100644
--- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -disable-output
+; RUN: opt < %s -gvn -disable-output
 ; PR1774
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
index 2f0c769ee58d..8028afb0d003 100644
--- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
+++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -disable-output
+; RUN: opt < %s -gvn -disable-output
 ; PR1782
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/BasicAA/2008-04-15-Byval.ll b/test/Analysis/BasicAA/2008-04-15-Byval.ll
index ee16909977d1..2069401628d2 100644
--- a/test/Analysis/BasicAA/2008-04-15-Byval.ll
+++ b/test/Analysis/BasicAA/2008-04-15-Byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llvm-dis | grep store
+; RUN: opt < %s -std-compile-opts -S | grep store
 ; ModuleID = 'small2.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll b/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
index 40d1e32d6730..ba29f3abcffe 100644
--- a/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
+++ b/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -disable-output
+; RUN: opt < %s -gvn -disable-output
 ; PR2395
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
index d2e823ee8a48..06018ccd5b57 100644
--- a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
+++ b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -aa-eval |& grep {1 no alias response}
+; RUN: opt < %s -aa-eval |& grep {1 no alias response}
 
 declare noalias i32* @_Znwj(i32 %x) nounwind
 
diff --git a/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll b/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll
index 967a36edcb3a..aaf9061953e7 100644
--- a/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll
+++ b/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -aa-eval -print-all-alias-modref-info -disable-output |& grep {MustAlias:.*%R,.*%r}
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& grep {MustAlias:.*%R,.*%r}
 ; Make sure that basicaa thinks R and r are must aliases.
 
 define i32 @test(i8 * %P) {
diff --git a/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll b/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
index f3891ec75f08..3ab5d03ca8c4 100644
--- a/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
+++ b/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn | llvm-dis | grep load
+; RUN: opt < %s -basicaa -gvn -S | grep load
 
 declare noalias i32* @noalias()
 
diff --git a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
new file mode 100644
index 000000000000..3ccbc2f04f37
--- /dev/null
+++ b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -0,0 +1,16 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8)
+
+define void @foo(i8* %ptr) {
+  %P = getelementptr i8* %ptr, i32 0
+  %Q = getelementptr i8* %ptr, i32 1
+; CHECK: getelementptr
+  %X = load i8* %P
+; CHECK: = load
+  %Y = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %Q, i8 1)
+  %Z = load i8* %P
+; CHECK-NOT: = load
+  ret void
+; CHECK: ret void
+}
diff --git a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
new file mode 100644
index 000000000000..771636f42cfa
--- /dev/null
+++ b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; If GEP base doesn't alias Z, then GEP doesn't alias Z.
+; rdar://7282591
+
+@Y = common global i32 0
+@Z = common global i32 0
+
+define void @foo(i32 %cond) nounwind ssp {
+entry:
+  %a = alloca i32
+  %tmp = icmp ne i32 %cond, 0
+  br i1 %tmp, label %bb, label %bb1
+
+bb:
+  %b = getelementptr i32* %a, i32 0
+  br label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %P = phi i32* [ %b, %bb ], [ @Y, %bb1 ]
+  %tmp1 = load i32* @Z, align 4
+  store i32 123, i32* %P, align 4
+  %tmp2 = load i32* @Z, align 4
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/Analysis/BasicAA/byval.ll b/test/Analysis/BasicAA/byval.ll
index f0644198b7db..cdcafdf474f3 100644
--- a/test/Analysis/BasicAA/byval.ll
+++ b/test/Analysis/BasicAA/byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {ret i32 1}
+; RUN: opt < %s -gvn -S | grep {ret i32 1}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 	%struct.x = type { i32, i32, i32, i32 }
diff --git a/test/Analysis/BasicAA/cas.ll b/test/Analysis/BasicAA/cas.ll
index 9bbb5e7c373e..4ce7811634e2 100644
--- a/test/Analysis/BasicAA/cas.ll
+++ b/test/Analysis/BasicAA/cas.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn | llvm-dis | grep load | count 1
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
 
 @flag0 = internal global i32 zeroinitializer
 @turn = internal global i32 zeroinitializer
@@ -6,9 +6,10 @@
 
 define i32 @main() {
   %a = load i32* @flag0
-	%b = tail call i32 @llvm.atomic.swap.i32.p0i32(i32* @turn, i32 1)
+  %b = tail call i32 @llvm.atomic.swap.i32.p0i32(i32* @turn, i32 1)
   %c = load i32* @flag0
-	ret i32 %c
+  %d = sub i32 %a, %c
+  ret i32 %d
 }
 
 declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
 \ No newline at end of file
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index e92995be5e69..95f94d096f35 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -aa-eval -print-all-alias-modref-info \
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info \
 ; RUN:   |& grep {MayAlias:	double\\* \[%\]p.0.i.0, double\\* \[%\]p3\$}
 ; PR4267
 
diff --git a/test/Analysis/BasicAA/featuretest.ll b/test/Analysis/BasicAA/featuretest.ll
index e807f882219f..737ee4535034 100644
--- a/test/Analysis/BasicAA/featuretest.ll
+++ b/test/Analysis/BasicAA/featuretest.ll
@@ -1,7 +1,7 @@
 ; This testcase tests for various features the basicaa test should be able to 
 ; determine, as noted in the comments.
 
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine -dce | llvm-dis | not grep REMOVE
+; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | not grep REMOVE
 
 @Global = external global { i32 }
 
diff --git a/test/Analysis/BasicAA/gcsetest.ll b/test/Analysis/BasicAA/gcsetest.ll
index 1d55ca9a2a94..a903362beb99 100644
--- a/test/Analysis/BasicAA/gcsetest.ll
+++ b/test/Analysis/BasicAA/gcsetest.ll
@@ -2,8 +2,8 @@
 ; disambiguating some obvious cases.  All loads should be removable in 
 ; this testcase.
 
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine -dce \
-; RUN: | llvm-dis | not grep load
+; RUN: opt < %s -basicaa -gvn -instcombine -dce -S \
+; RUN: | not grep load
 
 @A = global i32 7
 @B = global i32 8
diff --git a/test/Analysis/BasicAA/global-size.ll b/test/Analysis/BasicAA/global-size.ll
index ce92a690d410..0a643d4d080d 100644
--- a/test/Analysis/BasicAA/global-size.ll
+++ b/test/Analysis/BasicAA/global-size.ll
@@ -1,7 +1,7 @@
 ; A store or load cannot alias a global if the accessed amount is larger then
 ; the global.
 
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine | llvm-dis | not grep load
+; RUN: opt < %s -basicaa -gvn -instcombine -S | not grep load
 
 @B = global i16 8               ; <i16*> [#uses=2]
 
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 819f956eaa10..8f7c0a75d52e 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -1,5 +1,5 @@
 ; A very rudimentary test on AliasAnalysis::getModRefInfo.
-; RUN: llvm-as < %s | opt -print-all-alias-modref-info -aa-eval -disable-output |& \
+; RUN: opt < %s -print-all-alias-modref-info -aa-eval -disable-output |& \
 ; RUN: not grep NoModRef
 
 define i32 @callee() {
diff --git a/test/Analysis/BasicAA/no-escape-call.ll b/test/Analysis/BasicAA/no-escape-call.ll
index ab1fea78e959..ccabce9b7b44 100644
--- a/test/Analysis/BasicAA/no-escape-call.ll
+++ b/test/Analysis/BasicAA/no-escape-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine | llvm-dis | grep {ret i1 true}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i1 true}
 ; PR2436
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Analysis/BasicAA/nocapture.ll b/test/Analysis/BasicAA/nocapture.ll
index 0ca444c1cacb..7970fbb9a03d 100644
--- a/test/Analysis/BasicAA/nocapture.ll
+++ b/test/Analysis/BasicAA/nocapture.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine | llvm-dis | grep {ret i32 0}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
 
 declare i32* @test(i32* nocapture)
 
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
new file mode 100644
index 000000000000..02889600fb8f
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; rdar://7282591
+
+@X = common global i32 0
+@Y = common global i32 0
+@Z = common global i32 0
+
+define void @foo(i32 %cond) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32
+  %tmp = icmp ne i32 %cond, 0
+  br i1 %tmp, label %bb, label %bb1
+
+bb:
+  br label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %P = phi i32* [ @X, %bb ], [ @Y, %bb1 ]
+  %tmp1 = load i32* @Z, align 4
+  store i32 123, i32* %P, align 4
+  %tmp2 = load i32* @Z, align 4
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/Analysis/BasicAA/pure-const-dce.ll b/test/Analysis/BasicAA/pure-const-dce.ll
index b01b5c5cb81c..54e6e79a5e50 100644
--- a/test/Analysis/BasicAA/pure-const-dce.ll
+++ b/test/Analysis/BasicAA/pure-const-dce.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn | llvm-dis | grep TestConst | count 2
-; RUN: llvm-as < %s | opt -basicaa -gvn | llvm-dis | grep TestPure  | count 3
-; RUN: llvm-as < %s | opt -basicaa -gvn | llvm-dis | grep TestNone  | count 4
+; RUN: opt < %s -basicaa -gvn -S | grep TestConst | count 2
+; RUN: opt < %s -basicaa -gvn -S | grep TestPure  | count 3
+; RUN: opt < %s -basicaa -gvn -S | grep TestNone  | count 4
 @g = global i32 0		; <i32*> [#uses=1]
 
 define i32 @test() {
diff --git a/test/Analysis/BasicAA/store-promote.ll b/test/Analysis/BasicAA/store-promote.ll
new file mode 100644
index 000000000000..d8e7c75142a2
--- /dev/null
+++ b/test/Analysis/BasicAA/store-promote.ll
@@ -0,0 +1,53 @@
+; Test that LICM uses basicaa to do alias analysis, which is capable of 
+; disambiguating some obvious cases.  If LICM is able to disambiguate the
+; two pointers, then the load should be hoisted, and the store sunk.
+
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+
+@A = global i32 7               ; <i32*> [#uses=3]
+@B = global i32 8               ; <i32*> [#uses=2]
+@C = global [2 x i32] [ i32 4, i32 8 ]          ; <[2 x i32]*> [#uses=2]
+
+define i32 @test1(i1 %c) {
+        %Atmp = load i32* @A            ; <i32> [#uses=2]
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %ToRemove = load i32* @A                ; <i32> [#uses=1]
+        store i32 %Atmp, i32* @B
+        br i1 %c, label %Out, label %Loop
+
+Out:            ; preds = %Loop
+        %X = sub i32 %ToRemove, %Atmp           ; <i32> [#uses=1]
+        ret i32 %X
+        
+; The Loop block should be empty after the load/store are promoted.
+; CHECK:     @test1
+; CHECK:        load i32* @B
+; CHECK:      Loop:
+; CHECK-NEXT:   br i1 %c, label %Out, label %Loop
+; CHECK:      Out:
+; CHECK:        store i32 %Atmp, i32* @B
+}
+
+define i32 @test2(i1 %c) {
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %AVal = load i32* @A            ; <i32> [#uses=2]
+        %C0 = getelementptr [2 x i32]* @C, i64 0, i64 0         ; <i32*> [#uses=1]
+        store i32 %AVal, i32* %C0
+        %BVal = load i32* @B            ; <i32> [#uses=2]
+        %C1 = getelementptr [2 x i32]* @C, i64 0, i64 1         ; <i32*> [#uses=1]
+        store i32 %BVal, i32* %C1
+        br i1 %c, label %Out, label %Loop
+
+Out:            ; preds = %Loop
+        %X = sub i32 %AVal, %BVal               ; <i32> [#uses=1]
+        ret i32 %X
+; The Loop block should be empty after the load/store are promoted.
+; CHECK:     @test2
+; CHECK:      Loop:
+; CHECK-NEXT:   br i1 %c, label %Out, label %Loop
+}
+
diff --git a/test/Analysis/BasicAA/tailcall-modref.ll b/test/Analysis/BasicAA/tailcall-modref.ll
index ac4ea400b0f3..f7d6c57c1bcd 100644
--- a/test/Analysis/BasicAA/tailcall-modref.ll
+++ b/test/Analysis/BasicAA/tailcall-modref.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn -instcombine |\
+; RUN: opt < %s -basicaa -gvn -instcombine |\
 ; RUN:   llvm-dis | grep {ret i32 0}
 
 declare void @foo(i32*)
diff --git a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
index 456ffa25d7bf..6e34209a0e36 100644
--- a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
+++ b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -print-callgraph -disable-output |& \
+; RUN: opt < %s -print-callgraph -disable-output |& \
 ; RUN:   grep {Calls function 'callee'} | count 2
 
 define internal void @callee(...) {
diff --git a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
index ffc27bbe43d1..12849b7aa96c 100644
--- a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
+++ b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -print-callgraph -disable-output |& \
+; RUN: opt < %s -print-callgraph -disable-output |& \
 ; RUN:   grep {Calls function}
 
 @a = global void ()* @f		; <void ()**> [#uses=0]
diff --git a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
index 997ee2a8a0a6..e31f4165b626 100644
--- a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
+++ b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -domtree -break-crit-edges -analyze \
+; RUN: opt < %s -domtree -break-crit-edges -analyze \
 ; RUN:  -domtree | grep {3.*%brtrue }
 ; PR932
 
diff --git a/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll b/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
index 697dad25cce9..96dc73929d1c 100644
--- a/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
+++ b/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -domtree -break-crit-edges -domtree -disable-output
+; RUN: opt < %s -domtree -break-crit-edges -domtree -disable-output
 ; PR1110
 
 	%struct.OggVorbis_File = type { i8*, i32, i64, i64, %struct.ogg_sync_state, i32, i64*, i64*, i32*, i64*, %struct.vorbis_info*, %struct.vorbis_comment*, i64, i32, i32, i32, double, double, %struct.ogg_stream_state, %struct.vorbis_dsp_state, %struct.vorbis_block, %struct.ov_callbacks }
diff --git a/test/Analysis/Dominators/2007-07-11-SplitBlock.ll b/test/Analysis/Dominators/2007-07-11-SplitBlock.ll
index 3dc6eda6de1d..52fdd2b16dbe 100644
--- a/test/Analysis/Dominators/2007-07-11-SplitBlock.ll
+++ b/test/Analysis/Dominators/2007-07-11-SplitBlock.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s  | opt -loop-rotate -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -loop-unswitch -disable-output
 
 define i32 @stringSearch_Clib(i32 %count) {
 entry:
diff --git a/test/Analysis/Dominators/2007-07-12-SplitBlock.ll b/test/Analysis/Dominators/2007-07-12-SplitBlock.ll
index 1748daf4f393..b46f0c75e10a 100644
--- a/test/Analysis/Dominators/2007-07-12-SplitBlock.ll
+++ b/test/Analysis/Dominators/2007-07-12-SplitBlock.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s |  opt -loop-rotate -licm -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
diff --git a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
index aba6082c62e5..17ace8a950bb 100644
--- a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
+++ b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -gvn | llvm-dis | grep call | count 2
+; RUN: opt < %s -globalsmodref-aa -gvn -S | grep call | count 2
 
 @g = internal global i32 0		; <i32*> [#uses=2]
 
diff --git a/test/Analysis/GlobalsModRef/aliastest.ll b/test/Analysis/GlobalsModRef/aliastest.ll
index 5ea90fe9a3e6..3e5d11907aa6 100644
--- a/test/Analysis/GlobalsModRef/aliastest.ll
+++ b/test/Analysis/GlobalsModRef/aliastest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -gvn | llvm-dis | not grep load
+; RUN: opt < %s -globalsmodref-aa -gvn -S | not grep load
 @X = internal global i32 4		; <i32*> [#uses=1]
 
 define i32 @test(i32* %P) {
diff --git a/test/Analysis/GlobalsModRef/chaining-analysis.ll b/test/Analysis/GlobalsModRef/chaining-analysis.ll
index 137b2c14d3dc..b1d4593ac992 100644
--- a/test/Analysis/GlobalsModRef/chaining-analysis.ll
+++ b/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -gvn | llvm-dis | not grep load
+; RUN: opt < %s -globalsmodref-aa -gvn -S | not grep load
 
 ; This test requires the use of previous analyses to determine that
 ; doesnotmodX does not modify X (because 'sin' doesn't).
diff --git a/test/Analysis/GlobalsModRef/indirect-global.ll b/test/Analysis/GlobalsModRef/indirect-global.ll
index ff5a0b9839d6..4074909ce785 100644
--- a/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -gvn -instcombine | llvm-dis | \
+; RUN: opt < %s -globalsmodref-aa -gvn -instcombine -S | \
 ; RUN:   grep {ret i32 0}
 
 @G = internal global i32* null		; <i32**> [#uses=3]
diff --git a/test/Analysis/GlobalsModRef/modreftest.ll b/test/Analysis/GlobalsModRef/modreftest.ll
index ffcb84defad8..257c0ee7deb2 100644
--- a/test/Analysis/GlobalsModRef/modreftest.ll
+++ b/test/Analysis/GlobalsModRef/modreftest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -gvn | llvm-dis | not grep load
+; RUN: opt < %s -globalsmodref-aa -gvn -S | not grep load
 @X = internal global i32 4		; <i32*> [#uses=2]
 
 define i32 @test(i32* %P) {
diff --git a/test/Analysis/GlobalsModRef/purecse.ll b/test/Analysis/GlobalsModRef/purecse.ll
index dc3f6adda143..994aff8d4c68 100644
--- a/test/Analysis/GlobalsModRef/purecse.ll
+++ b/test/Analysis/GlobalsModRef/purecse.ll
@@ -1,5 +1,5 @@
 ; Test that pure functions are cse'd away
-; RUN: llvm-as < %s | opt -globalsmodref-aa -gvn -instcombine | \
+; RUN: opt < %s -globalsmodref-aa -gvn -instcombine | \
 ; RUN: llvm-dis | not grep sub
 
 define i32 @pure(i32 %X) {
diff --git a/test/Analysis/LoopDependenceAnalysis/alias.ll b/test/Analysis/LoopDependenceAnalysis/alias.ll
new file mode 100644
index 000000000000..a5f504bafbf8
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/alias.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -disable-output -analyze -lda | FileCheck %s
+
+;; x[5] = x[6] // with x being a pointer passed as argument
+
+define void @f1(i32* nocapture %xptr) nounwind {
+entry:
+  %x.ld.addr = getelementptr i32* %xptr, i64 6
+  %x.st.addr = getelementptr i32* %xptr, i64 5
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* %x.ld.addr
+  store i32 %x, i32* %x.st.addr
+; CHECK: 0,1: dep
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; x[5] = x[6] // with x being an array on the stack
+
+define void @foo(...) nounwind {
+entry:
+  %xptr = alloca [256 x i32], align 4
+  %x.ld.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 6
+  %x.st.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 5
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* %x.ld.addr
+  store i32 %x, i32* %x.st.addr
+; CHECK: 0,1: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LoopDependenceAnalysis/siv-strong.ll b/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
new file mode 100644
index 000000000000..327089501ff8
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s -disable-output -analyze -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+@y = common global [256 x i32] zeroinitializer, align 4
+
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[i] + y[i]
+
+define void @f1(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.addr      ; 0
+  %y = load i32* %y.addr      ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.addr  ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 256; i++)
+;;   x[i+1] = x[i] + y[i]
+
+define void @f2(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %i.next = add i64 %i, 1
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.next
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 10; i++)
+;;   x[i+20] = x[i] + y[i]
+
+define void @f3(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %i.20 = add i64 %i, 20
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.20
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 10; i++)
+;;   x[10*i+1] = x[10*i] + y[i]
+
+define void @f4(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.10 = mul i64 %i, 10
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.10
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10
+  %i.10.1 = add i64 %i.10, 1
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10.1
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll b/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
new file mode 100644
index 000000000000..3d9f2583b3ab
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
@@ -0,0 +1,118 @@
+; RUN: opt < %s -disable-output -analyze -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+@y = common global [256 x i32] zeroinitializer, align 4
+
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[255 - i] + y[i]
+
+define void @f1(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.255 = sub i64 255, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 100; i++)
+;;   x[i] = x[255 - i] + y[i]
+
+define void @f2(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.255 = sub i64 255, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; // the first iteration (i=0) leads to an out-of-bounds access of x. as the
+;; // result of this access is undefined, _any_ dependence result is safe.
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[256 - i] + y[i]
+
+define void @f3(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.256 = sub i64 0, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2:
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; // slightly contrived but valid IR for the following loop, where all
+;; // accesses in all iterations are within bounds. while this example's first
+;; // (ZIV-)subscript is (0, 1), accesses are dependent.
+;; for (i = 1; i < 256; i++)
+;;   x[i] = x[256 - i] + y[i]
+
+define void @f4(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.1 = add i64 1, %i
+  %i.256 = sub i64 -1, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.1
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.1
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll b/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
new file mode 100644
index 000000000000..4433138b6eb2
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -disable-output -analyze -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+@y = common global [256 x i32] zeroinitializer, align 4
+
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[42] + y[i]
+
+define void @f1(...) nounwind {
+entry:
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 42
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x = load i32* %x.ld.addr   ; 0
+  %y = load i32* %y.addr      ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.addr  ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 250; i++)
+;;   x[i] = x[255] + y[i]
+
+define void @f2(...) nounwind {
+entry:
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 255
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x = load i32* %x.ld.addr   ; 0
+  %y = load i32* %y.addr      ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.addr  ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 250
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LoopDependenceAnalysis/ziv.ll b/test/Analysis/LoopDependenceAnalysis/ziv.ll
new file mode 100644
index 000000000000..0a93762d4c7a
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/ziv.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -disable-output -analyze -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+
+;; x[5] = x[6]
+
+define void @f1(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
+  store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 5)
+; CHECK: 0,1: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; x[c] = x[c+1] // with c being a loop-invariant constant
+
+define void @f2(i64 %c0) nounwind {
+entry:
+  %c1 = add i64 %c0, 1
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c0
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c1
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* %x.ld.addr
+  store i32 %x, i32* %x.st.addr
+; CHECK: 0,1: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; x[6] = x[6]
+
+define void @f3(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
+  store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
+; CHECK: 0,1: dep
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
index 34b9c4431810..617c23f8e86f 100644
--- a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
+++ b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
@@ -1,7 +1,7 @@
 ; This testcase was incorrectly computing that the loopentry.7 loop was
 ; not a child of the loopentry.6 loop.
 ;
-; RUN: llvm-as < %s | opt -analyze -loops | \
+; RUN: opt < %s -analyze -loops | \
 ; RUN:   grep {^            Loop at depth 4 containing: %loopentry.7<header><latch><exit>}
 
 define void @getAndMoveToFrontDecode() {
diff --git a/test/Analysis/PointerTracking/dg.exp b/test/Analysis/PointerTracking/dg.exp
new file mode 100644
index 000000000000..f2005891a59a
--- /dev/null
+++ b/test/Analysis/PointerTracking/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll
new file mode 100644
index 000000000000..c0b0606af0b5
--- /dev/null
+++ b/test/Analysis/PointerTracking/sizes.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -pointertracking -analyze | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@.str = internal constant [5 x i8] c"1234\00"		; <[5 x i8]*> [#uses=1]
+@test1p = global i8* getelementptr ([5 x i8]* @.str, i32 0, i32 0), align 8		; <i8**> [#uses=1]
+@test1a = global [5 x i8] c"1234\00", align 1		; <[5 x i8]*> [#uses=1]
+@test2a = global [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4		; <[5 x i32]*> [#uses=2]
+@test2p = global i32* getelementptr ([5 x i32]* @test2a, i32 0, i32 0), align 8		; <i32**> [#uses=1]
+@test0p = common global i32* null, align 8		; <i32**> [#uses=1]
+@test0i = common global i32 0, align 4		; <i32*> [#uses=1]
+
+define i32 @foo0() nounwind {
+entry:
+	%tmp = load i32** @test0p		; <i32*> [#uses=1]
+	%conv = bitcast i32* %tmp to i8*		; <i8*> [#uses=1]
+	%call = tail call i32 @bar(i8* %conv) nounwind		; <i32> [#uses=1]
+	%tmp1 = load i8** @test1p		; <i8*> [#uses=1]
+	%call2 = tail call i32 @bar(i8* %tmp1) nounwind		; <i32> [#uses=1]
+	%call3 = tail call i32 @bar(i8* getelementptr ([5 x i8]* @test1a, i32 0, i32 0)) nounwind		; <i32> [#uses=1]
+	%call5 = tail call i32 @bar(i8* bitcast ([5 x i32]* @test2a to i8*)) nounwind		; <i32> [#uses=1]
+	%tmp7 = load i32** @test2p		; <i32*> [#uses=1]
+	%conv8 = bitcast i32* %tmp7 to i8*		; <i8*> [#uses=1]
+	%call9 = tail call i32 @bar(i8* %conv8) nounwind		; <i32> [#uses=1]
+	%call11 = tail call i32 @bar(i8* bitcast (i32* @test0i to i8*)) nounwind		; <i32> [#uses=1]
+	%add = add i32 %call2, %call		; <i32> [#uses=1]
+	%add4 = add i32 %add, %call3		; <i32> [#uses=1]
+	%add6 = add i32 %add4, %call5		; <i32> [#uses=1]
+	%add10 = add i32 %add6, %call9		; <i32> [#uses=1]
+	%add12 = add i32 %add10, %call11		; <i32> [#uses=1]
+	ret i32 %add12
+}
+
+declare i32 @bar(i8*)
+
+define i32 @foo1(i32 %n) nounwind {
+entry:
+; CHECK: 'foo1':
+	%test4a = alloca [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+; CHECK: %test4a =
+; CHECK: ==> 1 elements, 10 bytes allocated
+	%test6a = alloca [10 x i32], align 4		; <[10 x i32]*> [#uses=1]
+; CHECK: %test6a =
+; CHECK: ==> 1 elements, 40 bytes allocated
+	%vla = alloca i8, i32 %n, align 1		; <i8*> [#uses=1]
+; CHECK: %vla =
+; CHECK: ==> %n elements, %n bytes allocated
+	%0 = shl i32 %n, 2		; <i32> [#uses=1]
+	%vla7 = alloca i8, i32 %0, align 1		; <i8*> [#uses=1]
+; CHECK: %vla7 =
+; CHECK: ==> (4 * %n) elements, (4 * %n) bytes allocated
+	%call = call i32 @bar(i8* %vla) nounwind		; <i32> [#uses=1]
+	%arraydecay = getelementptr [10 x i8]* %test4a, i64 0, i64 0		; <i8*> [#uses=1]
+	%call10 = call i32 @bar(i8* %arraydecay) nounwind		; <i32> [#uses=1]
+	%call11 = call i32 @bar(i8* %vla7) nounwind		; <i32> [#uses=1]
+	%ptrconv14 = bitcast [10 x i32]* %test6a to i8*		; <i8*> [#uses=1]
+	%call15 = call i32 @bar(i8* %ptrconv14) nounwind		; <i32> [#uses=1]
+	%add = add i32 %call10, %call		; <i32> [#uses=1]
+	%add12 = add i32 %add, %call11		; <i32> [#uses=1]
+	%add16 = add i32 %add12, %call15		; <i32> [#uses=1]
+	ret i32 %add16
+}
+
+define i32 @foo2(i32 %n) nounwind {
+entry:
+	%call = malloc i8, i32 %n		; <i8*> [#uses=1]
+; CHECK: %call =
+; CHECK: ==> %n elements, %n bytes allocated
+	%call2 = tail call i8* @calloc(i64 2, i64 4) nounwind		; <i8*> [#uses=1]
+; CHECK: %call2 =
+; CHECK: ==> 8 elements, 8 bytes allocated
+	%call4 = tail call i8* @realloc(i8* null, i64 16) nounwind		; <i8*> [#uses=1]
+; CHECK: %call4 =
+; CHECK: ==> 16 elements, 16 bytes allocated
+	%call6 = tail call i32 @bar(i8* %call) nounwind		; <i32> [#uses=1]
+	%call8 = tail call i32 @bar(i8* %call2) nounwind		; <i32> [#uses=1]
+	%call10 = tail call i32 @bar(i8* %call4) nounwind		; <i32> [#uses=1]
+	%add = add i32 %call8, %call6		; <i32> [#uses=1]
+	%add11 = add i32 %add, %call10		; <i32> [#uses=1]
+	ret i32 %add11
+}
+
+declare noalias i8* @calloc(i64, i64) nounwind
+
+declare noalias i8* @realloc(i8* nocapture, i64) nounwind
diff --git a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
index b272f92499e5..b73b7f03f7e7 100644
--- a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
+++ b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -postdomfrontier \
+; RUN: opt < %s -analyze -postdomfrontier \
 ; RUN:   -disable-verify
 ; ModuleID = '2006-09-26-PostDominanceFrontier.bc'
 target datalayout = "e-p:64:64"
diff --git a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
index 51e4c2aeb553..1ec056bc34e0 100644
--- a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
+++ b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -postdomfrontier -disable-output
+; RUN: opt < %s -postdomfrontier -disable-output
 
 define void @SManager() {
 entry:
diff --git a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
index 4deec98a9ee8..767e5db94ce8 100644
--- a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
+++ b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -postdomfrontier -disable-output
+; RUN: opt < %s -postdomfrontier -disable-output
 
 define void @args_out_of_range() {
 entry:
diff --git a/test/Analysis/PostDominators/pr1098.ll b/test/Analysis/PostDominators/pr1098.ll
index b54a9fe1c75b..afb47769ee49 100644
--- a/test/Analysis/PostDominators/pr1098.ll
+++ b/test/Analysis/PostDominators/pr1098.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -postdomtree -analyze | grep entry
+; RUN: opt < %s -postdomtree -analyze | grep entry
 ; PR932
 
 define void @foo(i1 %x) {
diff --git a/test/Analysis/Profiling/dg.exp b/test/Analysis/Profiling/dg.exp
new file mode 100644
index 000000000000..1eb4755c4102
--- /dev/null
+++ b/test/Analysis/Profiling/dg.exp
@@ -0,0 +1,4 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+
diff --git a/test/Analysis/Profiling/edge-profiling.ll b/test/Analysis/Profiling/edge-profiling.ll
new file mode 100644
index 000000000000..cbaf47617fb6
--- /dev/null
+++ b/test/Analysis/Profiling/edge-profiling.ll
@@ -0,0 +1,139 @@
+; Test the edge profiling instrumentation.
+; RUN: opt < %s -insert-edge-profiling -S | FileCheck %s
+
+; ModuleID = '<stdin>'
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
+; CHECK:@EdgeProfCounters
+; CHECK:[19 x i32] 
+; CHECK:zeroinitializer
+
+define void @oneblock() nounwind {
+entry:
+; CHECK:entry:
+; CHECK:%OldFuncCounter
+; CHECK:load 
+; CHECK:getelementptr
+; CHECK:@EdgeProfCounters
+; CHECK:i32 0
+; CHECK:i32 0
+; CHECK:%NewFuncCounter
+; CHECK:add
+; CHECK:%OldFuncCounter
+; CHECK:store 
+; CHECK:%NewFuncCounter
+; CHECK:getelementptr
+; CHECK:@EdgeProfCounters
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; CHECK:entry:
+  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
+  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=4]
+  %i = alloca i32                                 ; <i32*> [#uses=4]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+; CHECK:call 
+; CHECK:@llvm_start_edge_profiling
+; CHECK:@EdgeProfCounters
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %argc, i32* %argc_addr
+  store i8** %argv, i8*** %argv_addr
+  store i32 0, i32* %i, align 4
+  br label %bb10
+
+bb:                                               ; preds = %bb10
+; CHECK:bb:
+  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb8
+
+bb1:                                              ; preds = %bb
+; CHECK:bb1:
+  store i32 0, i32* %j, align 4
+  br label %bb6
+
+bb2:                                              ; preds = %bb6
+; CHECK:bb2:
+  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
+  br i1 %5, label %bb3, label %bb4
+
+bb3:                                              ; preds = %bb2
+; CHECK:bb3:
+  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb5
+
+bb4:                                              ; preds = %bb2
+; CHECK:bb4:
+  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb11
+
+bb5:                                              ; preds = %bb3
+; CHECK:bb5:
+  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
+  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
+  store i32 %10, i32* %j, align 4
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb1
+; CHECK:bb6:
+  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
+  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
+  br i1 %13, label %bb2, label %bb7
+
+bb7:                                              ; preds = %bb6
+; CHECK:bb7:
+  br label %bb9
+
+bb8:                                              ; preds = %bb
+; CHECK:bb8:
+  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb9
+
+bb9:                                              ; preds = %bb8, %bb7
+; CHECK:bb9:
+  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
+  store i32 %16, i32* %i, align 4
+  br label %bb10
+
+bb10:                                             ; preds = %bb9, %entry
+; CHECK:bb10:
+  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
+  br i1 %18, label %bb, label %bb11
+; CHECK:br
+; CHECK:label %bb10.bb11_crit_edge
+
+; CHECK:bb10.bb11_crit_edge:
+; CHECK:br
+; CHECK:label %bb11
+
+bb11:                                             ; preds = %bb10, %bb4
+; CHECK:bb11:
+  call void @oneblock() nounwind
+  store i32 0, i32* %0, align 4
+  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
+  store i32 %19, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %bb11
+; CHECK:return:
+  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
+  ret i32 %retval12
+}
diff --git a/test/Analysis/Profiling/profiling-tool-chain.ll b/test/Analysis/Profiling/profiling-tool-chain.ll
new file mode 100644
index 000000000000..5ac31b59bdcb
--- /dev/null
+++ b/test/Analysis/Profiling/profiling-tool-chain.ll
@@ -0,0 +1,212 @@
+; RUN: llvm-as %s -o %t1
+
+; FIXME: The RUX parts of the test are disabled for now, they aren't working on
+; llvm-gcc-x86_64-darwin10-selfhost.
+
+; Test the edge optimal profiling instrumentation.
+; RUN: opt %t1 -insert-optimal-edge-profiling -o %t2
+; RUX: llvm-dis < %t2 | FileCheck --check-prefix=INST %s
+
+; Test the creation, reading and displaying of profile
+; RUX: rm -f llvmprof.out
+; RUX: lli -load %llvmlibsdir/profile_rt%shlibext %t2
+; RUX: lli -load %llvmlibsdir/profile_rt%shlibext %t2 1 2
+; RUX: llvm-prof -print-all-code %t1 | FileCheck --check-prefix=PROF %s
+
+; Test the loaded profile also with verifier.
+; RUX  opt %t1 -profile-loader -profile-verifier -o %t3
+
+; Test profile estimator.
+; RUN: opt %t1 -profile-estimator -profile-verifier -o %t3
+
+; PROF:  1.     2/4 oneblock
+; PROF:  2.     2/4 main
+; PROF:  1. 15.7895%    12/76	main() - bb6
+; PROF:  2. 11.8421%     9/76	main() - bb2
+; PROF:  3. 11.8421%     9/76	main() - bb3
+; PROF:  4. 11.8421%     9/76	main() - bb5
+; PROF:  5. 10.5263%     8/76	main() - bb10
+; PROF:  6. 7.89474%     6/76	main() - bb
+; PROF:  7. 7.89474%     6/76	main() - bb9
+; PROF:  8. 3.94737%     3/76	main() - bb1
+; PROF:  9. 3.94737%     3/76	main() - bb7
+; PROF: 10. 3.94737%     3/76	main() - bb8
+; PROF: 11. 2.63158%     2/76	oneblock() - entry
+; PROF: 12. 2.63158%     2/76	main() - entry
+; PROF: 13. 2.63158%     2/76	main() - bb11
+; PROF: 14. 2.63158%     2/76	main() - return
+
+; ModuleID = '<stdin>'
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
+; INST:@OptEdgeProfCounters
+; INST:[21 x i32]
+; INST:[i32 0,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 0,
+; INST:i32 0,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 0,
+; INST:i32 0,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 0,
+; INST:i32 -1,
+; INST:i32 -1]
+
+; PROF:;;; %oneblock called 2 times.
+; PROF:;;;
+define void @oneblock() nounwind {
+entry:
+; PROF:entry:
+; PROF:	;;; Basic block executed 2 times.
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+; PROF:;;; %main called 2 times.
+; PROF:;;;
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; PROF:entry:
+; PROF:	;;; Basic block executed 2 times.
+  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
+  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=4]
+  %i = alloca i32                                 ; <i32*> [#uses=4]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+; INST:call 
+; INST:@llvm_start_opt_edge_profiling
+; INST:@OptEdgeProfCounters
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %argc, i32* %argc_addr
+  store i8** %argv, i8*** %argv_addr
+  store i32 0, i32* %i, align 4
+  br label %bb10
+; PROF:	;;; Out-edge counts: [2.000000e+00 -> bb10]
+
+bb:                                               ; preds = %bb10
+; PROF:bb:
+; PROF:	;;; Basic block executed 6 times.
+  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb8
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb1] [3.000000e+00 -> bb8]
+
+bb1:                                              ; preds = %bb
+; PROF:bb1:
+; PROF:	;;; Basic block executed 3 times.
+  store i32 0, i32* %j, align 4
+  br label %bb6
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb6]
+
+bb2:                                              ; preds = %bb6
+; PROF:bb2:
+; PROF:	;;; Basic block executed 9 times.
+  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
+  br i1 %5, label %bb3, label %bb4
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb3]
+
+bb3:                                              ; preds = %bb2
+; PROF:bb3:
+; PROF:	;;; Basic block executed 9 times.
+  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb5
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb5]
+
+bb4:                                              ; preds = %bb2
+; PROF:bb4:
+; PROF:	;;; Never executed!
+  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb11
+
+bb5:                                              ; preds = %bb3
+; PROF:bb5:
+; PROF:	;;; Basic block executed 9 times.
+  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
+  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
+  store i32 %10, i32* %j, align 4
+  br label %bb6
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb6]
+
+bb6:                                              ; preds = %bb5, %bb1
+; PROF:bb6:
+; PROF:	;;; Basic block executed 12 times.
+  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
+  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
+  br i1 %13, label %bb2, label %bb7
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb2] [3.000000e+00 -> bb7]
+
+bb7:                                              ; preds = %bb6
+; PROF:bb7:
+; PROF:	;;; Basic block executed 3 times.
+  br label %bb9
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
+
+bb8:                                              ; preds = %bb
+; PROF:bb8:
+; PROF:	;;; Basic block executed 3 times.
+  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb9
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
+
+bb9:                                              ; preds = %bb8, %bb7
+; PROF:bb9:
+; PROF:	;;; Basic block executed 6 times.
+  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
+  store i32 %16, i32* %i, align 4
+  br label %bb10
+; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb10]
+
+bb10:                                             ; preds = %bb9, %entry
+; PROF:bb10:
+; PROF:	;;; Basic block executed 8 times.
+  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
+  br i1 %18, label %bb, label %bb11
+; INST:br
+; INST:label %bb10.bb11_crit_edge
+; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb] [2.000000e+00 -> bb11]
+
+; INST:bb10.bb11_crit_edge:
+; INST:br
+; INST:label %bb11
+
+bb11:                                             ; preds = %bb10, %bb4
+; PROF:bb11:
+; PROF:	;;; Basic block executed 2 times.
+  call void @oneblock() nounwind
+  store i32 0, i32* %0, align 4
+  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
+  store i32 %19, i32* %retval, align 4
+  br label %return
+; PROF:	;;; Out-edge counts: [2.000000e+00 -> return]
+
+return:                                           ; preds = %bb11
+; PROF:return:
+; PROF:	;;; Basic block executed 2 times.
+  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
+  ret i32 %retval12
+}
diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
index bf27e7753538..7f82ea435791 100644
--- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {Loop bb: backedge-taken count is 100}
 ; PR1533
 
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
index e3393d5eed62..e67e4d00d625 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -adce -simplifycfg | llvm-dis | grep "icmp s"
+; RUN: opt < %s -indvars -adce -simplifycfg -S | grep "icmp s"
 ; PR1598
 
 define i32 @f(i32 %a, i32 %b, i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index 95f932a9a581..f623da1b2757 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
 ; PR1597
 
 define i32 @f(i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
index e5e47d549f53..817090ffef65 100644
--- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
+++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 13}
 ; PR1706
 
diff --git a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
index 66ca7551c240..514920f0f6fa 100644
--- a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep printd | grep 1206807378
+; RUN: opt < %s -indvars -S | grep printd | grep 1206807378
 ; PR1798
 
 declare void @printd(i32)
diff --git a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
index 01f338a29c27..2b3c982d6b12 100644
--- a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
+++ b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep -e {-->  %b}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep -e {-->  %b}
 ; PR1810
 
 define void @fun() {
diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index b9a53b318bf5..c8e483e7d50f 100644
--- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)}
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index b943bc7d4c61..cb9a1829eb7c 100644
--- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))}
 ; PR2002
 
 define void @foo(i8 %n) {
diff --git a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
index 59b51093f40a..bf9f4a9e8607 100644
--- a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep umax
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep umax
 ; PR2003
 
 define i32 @foo(i32 %n) {
diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
index 5453ae3ae80b..8d15b772f1fe 100644
--- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
+++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 61}
 ; PR2364
 
diff --git a/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll b/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
index cbe5c97905ce..d503329292c7 100644
--- a/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
+++ b/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution 2>/dev/null
+; RUN: opt < %s -analyze -scalar-evolution 2>/dev/null
 ; PR2433
 
 define i32 @main1(i32 %argc, i8** %argv) nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index 6ba0f25eb061..850b6708f4fe 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& not grep smax
+; RUN: opt < %s -analyze -scalar-evolution -disable-output |& not grep smax
 ; PR2261
 
 @lut = common global [256 x i8] zeroinitializer, align 32		; <[256 x i8]*> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
index 3c022e7181bd..59e9fda41e64 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& not grep smax
+; RUN: opt < %s -analyze -scalar-evolution -disable-output |& not grep smax
 ; PR2070
 
 define i32 @a(i32 %x) nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
index 5dcad53f6a60..989ac51226dc 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep Unpredictable
 ; PR2088
 
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
index 54c929dcdaf6..803c7d110e72 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 113}
 ; PR2088
 
diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll b/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
index 527b8b0add23..97d0640c6c58 100644
--- a/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | \
 ; RUN: grep -F "backedge-taken count is (-1 + (-1 * %j))"
 ; PR2607
diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
index 9051dc7ec515..7f4de9173336 100644
--- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | \
 ; RUN: grep -F "backedge-taken count is (-2147483632 + ((-1 + (-1 * %x)) smax (-1 + (-1 * %y))))"
 ; PR2607
diff --git a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
index f8e1cfcd7fbe..fa09895eac32 100644
--- a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
+++ b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep -F "Exits: 20028"
 ; PR2621
 
diff --git a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
index fbd249fbc040..5a28117eb60b 100644
--- a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep -F "Exits: -19168"
 ; PR2621
 
diff --git a/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll b/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
index 1e9d0bfc9c23..9daff991aee9 100644
--- a/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output
+; RUN: opt < %s -analyze -scalar-evolution -disable-output
 ; PR1827
 
 declare void @use(i32)
diff --git a/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll b/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
index c0b3a1fe01b2..5a2c36659c72 100644
--- a/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output
+; RUN: opt < %s -analyze -scalar-evolution -disable-output
 ; PR2602
 
 define i32 @a() nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index 56d1fe7b541d..daeb26a202e3 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output |& \
 ; RUN: grep {Loop bb: backedge-taken count is (7 + (-1 \\* %argc))}
 ; XFAIL: *
 
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index 8fb1604fd190..9dda78b21f7d 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:  | grep {Loop bb: Unpredictable backedge-taken count\\.}
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index d506f9c3f82f..bcbe92f509ae 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& grep {/u 3}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output |& grep {/u 3}
 ; XFAIL: *
 
 define i32 @f(i32 %x) nounwind readnone {
diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index 643d2f835b8c..2ee107a4a43a 100644
--- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {backedge-taken count is 255}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {backedge-taken count is 255}
 ; XFAIL: *
 
 define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
index 995a1d95a8a8..0cfd84c997b4 100644
--- a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {0 smax}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {0 smax}
 ; XFAIL: *
 
 define i32 @f(i32 %c.idx.val) {
diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
index 8e064c70da9a..4ec358c8a4dc 100644
--- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output |& \
 ; RUN: grep {(((-1 \\* %i0) + (100005 smax %i0)) /u 5)}
 ; XFAIL: *
 
diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
index 950c1d21d910..1fe10689f3fe 100644
--- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& grep {/u 5}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output |& grep {/u 5}
 ; XFAIL: *
 
 define i8 @foo0(i8 %i0) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index 65c4cdbb1362..9d13695c3e47 100644
--- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | not grep {/u -1}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | not grep {/u -1}
 ; PR3275
 
 @g_16 = external global i16		; <i16*> [#uses=3]
diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index 6aced23cf950..78a7fd016716 100644
--- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {(trunc i} | not grep ext
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {(trunc i} | not grep ext
 
 define i16 @test1(i8 %x) {
   %A = sext i8 %x to i32
diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index 5e5128bd577a..6ed261481e2d 100644
--- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {count is 2}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {count is 2}
 ; PR3171
 
 	%struct.Foo = type { i32 }
diff --git a/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll b/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
new file mode 100644
index 000000000000..a4358aa63215
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -analyze -scalar-evolution
+; PR4501
+
+define void @test() {
+entry:
+        %0 = load i16* undef, align 1
+        %1 = lshr i16 %0, 8
+        %2 = and i16 %1, 3
+        %3 = zext i16 %2 to i32
+        %4 = load i8* undef, align 1
+        %5 = lshr i8 %4, 4
+        %6 = and i8 %5, 1
+        %7 = zext i8 %6 to i32
+        %t1 = add i32 %3, %7
+        ret void
+}
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index 27a546f32e82..fcc6fc3297c0 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 100}
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll
index 94cca83ea74d..90d947f15bba 100644
--- a/test/Analysis/ScalarEvolution/and-xor.ll
+++ b/test/Analysis/ScalarEvolution/and-xor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \
+; RUN: opt < %s -scalar-evolution -analyze -disable-output \
 ; RUN:   | grep {\\-->  (zext} | count 2
 
 define i32 @foo(i32 %x) {
diff --git a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
new file mode 100644
index 000000000000..f638eb340140
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -analyze -scalar-evolution -disable-output
+; PR4537
+
+; ModuleID = 'b.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test() {
+entry:
+	%0 = load i32** undef, align 8		; <i32*> [#uses=1]
+	%1 = ptrtoint i32* %0 to i64		; <i64> [#uses=1]
+	%2 = sub i64 undef, %1		; <i64> [#uses=1]
+	%3 = lshr i64 %2, 3		; <i64> [#uses=1]
+	%4 = trunc i64 %3 to i32		; <i32> [#uses=2]
+	br i1 undef, label %bb10, label %bb4.i
+
+bb4.i:		; preds = %bb4.i, %entry
+	%i.0.i6 = phi i32 [ %8, %bb4.i ], [ 0, %entry ]		; <i32> [#uses=2]
+	%5 = sub i32 %4, %i.0.i6		; <i32> [#uses=1]
+	%6 = sext i32 %5 to i64		; <i64> [#uses=1]
+	%7 = udiv i64 undef, %6		; <i64> [#uses=1]
+	%8 = add i32 %i.0.i6, 1		; <i32> [#uses=2]
+	%phitmp = icmp eq i64 %7, 0		; <i1> [#uses=1]
+	%.not.i = icmp sge i32 %8, %4		; <i1> [#uses=1]
+	%or.cond.i = or i1 %phitmp, %.not.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %bb10, label %bb4.i
+
+bb10:		; preds = %bb4.i, %entry
+	unreachable
+}
diff --git a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
new file mode 100644
index 000000000000..31b95e1470b2
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
@@ -0,0 +1,354 @@
+; RUN: opt < %s -iv-users
+; PR4538
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-freebsd8.0"
+module asm ".ident\09\22$FreeBSD: head/sys/kern/vfs_subr.c 195285 2009-07-02 14:19:33Z jamie $\22"
+module asm ".section set_pcpu, \22aw\22, @progbits"
+module asm ".previous"
+	type <{ [40 x i8] }>		; type %0
+	type <{ %struct.vm_object*, %struct.vm_object** }>		; type %1
+	type <{ %struct.vm_object* }>		; type %2
+	type <{ %struct.vm_page*, %struct.vm_page** }>		; type %3
+	type <{ %struct.pv_entry*, %struct.pv_entry** }>		; type %4
+	type <{ %struct.vm_reserv* }>		; type %5
+	type <{ %struct.bufobj*, %struct.bufobj** }>		; type %6
+	type <{ %struct.proc*, %struct.proc** }>		; type %7
+	type <{ %struct.thread*, %struct.thread** }>		; type %8
+	type <{ %struct.prison*, %struct.prison** }>		; type %9
+	type <{ %struct.prison* }>		; type %10
+	type <{ %struct.task* }>		; type %11
+	type <{ %struct.osd*, %struct.osd** }>		; type %12
+	type <{ %struct.proc* }>		; type %13
+	type <{ %struct.ksiginfo*, %struct.ksiginfo** }>		; type %14
+	type <{ %struct.pv_chunk*, %struct.pv_chunk** }>		; type %15
+	type <{ %struct.pgrp*, %struct.pgrp** }>		; type %16
+	type <{ %struct.knote*, %struct.knote** }>		; type %17
+	type <{ %struct.ktr_request*, %struct.ktr_request** }>		; type %18
+	type <{ %struct.mqueue_notifier* }>		; type %19
+	type <{ %struct.turnstile* }>		; type %20
+	type <{ %struct.namecache* }>		; type %21
+	type <{ %struct.namecache*, %struct.namecache** }>		; type %22
+	type <{ %struct.lockf*, %struct.lockf** }>		; type %23
+	type <{ %struct.lockf_entry*, %struct.lockf_entry** }>		; type %24
+	type <{ %struct.lockf_edge*, %struct.lockf_edge** }>		; type %25
+	%struct.__siginfo = type <{ i32, i32, i32, i32, i32, i32, i8*, %union.sigval, %0 }>
+	%struct.__sigset = type <{ [4 x i32] }>
+	%struct.acl = type <{ i32, i32, [4 x i32], [254 x %struct.acl_entry] }>
+	%struct.acl_entry = type <{ i32, i32, i32, i16, i16 }>
+	%struct.au_mask = type <{ i32, i32 }>
+	%struct.au_tid_addr = type <{ i32, i32, [4 x i32] }>
+	%struct.auditinfo_addr = type <{ i32, %struct.au_mask, %struct.au_tid_addr, i32, i64 }>
+	%struct.bintime = type <{ i64, i64 }>
+	%struct.buf = type <{ %struct.bufobj*, i64, i8*, i8*, i32, i8, i8, i8, i8, i64, i64, void (%struct.buf*)*, i64, i64, %struct.buflists, %struct.buf*, %struct.buf*, i32, i8, i8, i8, i8, %struct.buflists, i16, i8, i8, i32, i8, i8, i8, i8, i8, i8, i8, i8, %struct.lock, i64, i64, i8*, i32, i8, i8, i8, i8, i64, %struct.vnode*, i32, i32, %struct.ucred*, %struct.ucred*, i8*, %union.pager_info, i8, i8, i8, i8, %union.anon, [32 x %struct.vm_page*], i32, i8, i8, i8, i8, %struct.workhead, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.buf_ops = type <{ i8*, i32 (%struct.buf*)*, void (%struct.bufobj*, %struct.buf*)*, i32 (%struct.bufobj*, i32)*, void (%struct.bufobj*, %struct.buf*)* }>
+	%struct.buflists = type <{ %struct.buf*, %struct.buf** }>
+	%struct.bufobj = type <{ %struct.mtx, %struct.bufv, %struct.bufv, i64, i32, i8, i8, i8, i8, %struct.buf_ops*, i32, i8, i8, i8, i8, %struct.vm_object*, %6, i8*, %struct.vnode* }>
+	%struct.bufv = type <{ %struct.buflists, %struct.buf*, i32, i8, i8, i8, i8 }>
+	%struct.callout = type <{ %union.anon, i32, i8, i8, i8, i8, i8*, void (i8*)*, %struct.lock_object*, i32, i32 }>
+	%struct.cdev_privdata = type opaque
+	%struct.cluster_save = type <{ i64, i64, i8*, i32, i8, i8, i8, i8, %struct.buf** }>
+	%struct.componentname = type <{ i64, i64, %struct.thread*, %struct.ucred*, i32, i8, i8, i8, i8, i8*, i8*, i64, i64 }>
+	%struct.cpuset = type opaque
+	%struct.cv = type <{ i8*, i32, i8, i8, i8, i8 }>
+	%struct.fid = type <{ i16, i16, [16 x i8] }>
+	%struct.file = type <{ i8*, %struct.fileops*, %struct.ucred*, %struct.vnode*, i16, i16, i32, i32, i32, i64, %struct.cdev_privdata*, i64, i8* }>
+	%struct.filedesc = type opaque
+	%struct.filedesc_to_leader = type opaque
+	%struct.fileops = type <{ i32 (%struct.file*, %struct.uio*, %struct.ucred*, i32, %struct.thread*)*, i32 (%struct.file*, %struct.uio*, %struct.ucred*, i32, %struct.thread*)*, i32 (%struct.file*, i64, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, i64, i8*, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, i32, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, %struct.knote*)*, i32 (%struct.file*, %struct.stat*, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, %struct.thread*)*, i32, i8, i8, i8, i8 }>
+	%struct.filterops = type <{ i32, i8, i8, i8, i8, i32 (%struct.knote*)*, void (%struct.knote*)*, i32 (%struct.knote*, i64)* }>
+	%struct.flock = type <{ i64, i64, i32, i16, i16, i32, i8, i8, i8, i8 }>
+	%struct.freelst = type <{ %struct.vnode*, %struct.vnode** }>
+	%struct.fsid = type <{ [2 x i32] }>
+	%struct.in6_addr = type opaque
+	%struct.in_addr = type opaque
+	%struct.inode = type opaque
+	%struct.iovec = type <{ i8*, i64 }>
+	%struct.itimers = type opaque
+	%struct.itimerval = type <{ %struct.bintime, %struct.bintime }>
+	%struct.kaioinfo = type opaque
+	%struct.kaudit_record = type opaque
+	%struct.kdtrace_proc = type opaque
+	%struct.kdtrace_thread = type opaque
+	%struct.kevent = type <{ i64, i16, i16, i32, i64, i8* }>
+	%struct.klist = type <{ %struct.knote* }>
+	%struct.knlist = type <{ %struct.klist, void (i8*)*, void (i8*)*, void (i8*)*, void (i8*)*, i8* }>
+	%struct.knote = type <{ %struct.klist, %struct.klist, %struct.knlist*, %17, %struct.kqueue*, %struct.kevent, i32, i32, i64, %union.sigval, %struct.filterops*, i8* }>
+	%struct.kqueue = type opaque
+	%struct.ksiginfo = type <{ %14, %struct.__siginfo, i32, i8, i8, i8, i8, %struct.sigqueue* }>
+	%struct.ktr_request = type opaque
+	%struct.label = type opaque
+	%struct.lock = type <{ %struct.lock_object, i64, i32, i32 }>
+	%struct.lock_list_entry = type opaque
+	%struct.lock_object = type <{ i8*, i32, i32, %struct.witness* }>
+	%struct.lock_owner = type opaque
+	%struct.lock_profile_object = type opaque
+	%struct.lockf = type <{ %23, %struct.mtx, %struct.lockf_entry_list, %struct.lockf_entry_list, i32, i8, i8, i8, i8 }>
+	%struct.lockf_edge = type <{ %25, %25, %struct.lockf_entry*, %struct.lockf_entry* }>
+	%struct.lockf_edge_list = type <{ %struct.lockf_edge* }>
+	%struct.lockf_entry = type <{ i16, i16, i8, i8, i8, i8, i64, i64, %struct.lock_owner*, %struct.vnode*, %struct.inode*, %struct.task*, %24, %struct.lockf_edge_list, %struct.lockf_edge_list, i32, i8, i8, i8, i8 }>
+	%struct.lockf_entry_list = type <{ %struct.lockf_entry* }>
+	%struct.lpohead = type <{ %struct.lock_profile_object* }>
+	%struct.md_page = type <{ %4 }>
+	%struct.mdproc = type <{ %struct.cv*, %struct.system_segment_descriptor }>
+	%struct.mdthread = type <{ i32, i8, i8, i8, i8, i64 }>
+	%struct.mntarg = type opaque
+	%struct.mntlist = type <{ %struct.mount*, %struct.mount** }>
+	%struct.mount = type <{ %struct.mtx, i32, i8, i8, i8, i8, %struct.mntlist, %struct.vfsops*, %struct.vfsconf*, %struct.vnode*, %struct.vnode*, i32, i8, i8, i8, i8, %struct.freelst, i32, i32, i32, i32, i32, i32, %struct.vfsoptlist*, %struct.vfsoptlist*, i32, i8, i8, i8, i8, %struct.statfs, %struct.ucred*, i8*, i64, i32, i8, i8, i8, i8, %struct.netexport*, %struct.label*, i32, i32, i32, i32, %struct.thread*, i8*, %struct.lock }>
+	%struct.mqueue_notifier = type opaque
+	%struct.mtx = type <{ %struct.lock_object, i64 }>
+	%struct.namecache = type opaque
+	%struct.netexport = type opaque
+	%struct.nlminfo = type opaque
+	%struct.osd = type <{ i32, i8, i8, i8, i8, i8**, %12 }>
+	%struct.p_sched = type opaque
+	%struct.pargs = type <{ i32, i32, [1 x i8], i8, i8, i8 }>
+	%struct.pcb = type opaque
+	%struct.pgrp = type <{ %16, %13, %struct.session*, %struct.sigiolst, i32, i32, %struct.mtx }>
+	%struct.plimit = type opaque
+	%struct.pmap = type <{ %struct.mtx, i64*, %15, i32, i8, i8, i8, i8, %struct.bintime, %struct.vm_page* }>
+	%struct.prison = type <{ %9, i32, i32, i32, i32, %10, %9, %struct.prison*, %struct.mtx, %struct.task, %struct.osd, %struct.cpuset*, %struct.vnet*, %struct.vnode*, i32, i32, %struct.in_addr*, %struct.in6_addr*, [4 x i8*], i32, i32, i32, i32, i32, [5 x i32], i64, [256 x i8], [1024 x i8], [256 x i8], [256 x i8], [64 x i8] }>
+	%struct.proc = type <{ %7, %8, %struct.mtx, %struct.ucred*, %struct.filedesc*, %struct.filedesc_to_leader*, %struct.pstats*, %struct.plimit*, %struct.callout, %struct.sigacts*, i32, i32, i32, i8, i8, i8, i8, %7, %7, %struct.proc*, %7, %13, %struct.mtx, %struct.ksiginfo*, %struct.sigqueue, i32, i8, i8, i8, i8, %struct.vmspace*, i32, i8, i8, i8, i8, %struct.itimerval, %struct.rusage, %struct.rusage_ext, %struct.rusage_ext, i32, i32, i32, i8, i8, i8, i8, %struct.vnode*, %struct.ucred*, %struct.vnode*, i32, i8, i8, i8, i8, %struct.sigiolst, i32, i32, i64, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %struct.nlminfo*, %struct.kaioinfo*, %struct.thread*, i32, i8, i8, i8, i8, %struct.thread*, i32, i32, %struct.itimers*, i32, i32, [20 x i8], i8, i8, i8, i8, %struct.pgrp*, %struct.sysentvec*, %struct.pargs*, i64, i8, i8, i8, i8, i32, i16, i8, i8, i8, i8, i8, i8, %struct.knlist, i32, i8, i8, i8, i8, %struct.mdproc, %struct.callout, i16, i8, i8, i8, i8, i8, i8, %struct.proc*, %struct.proc*, i8*, %struct.label*, %struct.p_sched*, %18, %19, %struct.kdtrace_proc*, %struct.cv }>
+	%struct.pstats = type opaque
+	%struct.pv_chunk = type <{ %struct.pmap*, %15, [3 x i64], [2 x i64], [168 x %struct.pv_entry] }>
+	%struct.pv_entry = type <{ i64, %4 }>
+	%struct.rusage = type <{ %struct.bintime, %struct.bintime, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 }>
+	%struct.rusage_ext = type <{ i64, i64, i64, i64, i64, i64, i64 }>
+	%struct.selfd = type opaque
+	%struct.selfdlist = type <{ %struct.selfd*, %struct.selfd** }>
+	%struct.selinfo = type <{ %struct.selfdlist, %struct.knlist, %struct.mtx* }>
+	%struct.seltd = type opaque
+	%struct.session = type <{ i32, i8, i8, i8, i8, %struct.proc*, %struct.vnode*, %struct.tty*, i32, [24 x i8], i8, i8, i8, i8, %struct.mtx }>
+	%struct.shmmap_state = type opaque
+	%struct.sigacts = type <{ [128 x void (i32)*], [128 x %struct.__sigset], %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, i32, i32, %struct.mtx }>
+	%struct.sigaltstack = type <{ i8*, i64, i32, i8, i8, i8, i8 }>
+	%struct.sigio = type <{ %union.sigval, %struct.sigiolst, %struct.sigio**, %struct.ucred*, i32, i8, i8, i8, i8 }>
+	%struct.sigiolst = type <{ %struct.sigio* }>
+	%struct.sigqueue = type <{ %struct.__sigset, %struct.__sigset, %14, %struct.proc*, i32, i8, i8, i8, i8 }>
+	%struct.sleepqueue = type opaque
+	%struct.sockaddr = type opaque
+	%struct.stat = type <{ i32, i32, i16, i16, i32, i32, i32, %struct.bintime, %struct.bintime, %struct.bintime, i64, i64, i32, i32, i32, i32, %struct.bintime }>
+	%struct.statfs = type <{ i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, [10 x i64], i32, i32, %struct.fsid, [80 x i8], [16 x i8], [88 x i8], [88 x i8] }>
+	%struct.sysctl_req = type <{ %struct.thread*, i32, i8, i8, i8, i8, i8*, i64, i64, i32 (%struct.sysctl_req*, i8*, i64)*, i8*, i64, i64, i32 (%struct.sysctl_req*, i8*, i64)*, i64, i32, i8, i8, i8, i8 }>
+	%struct.sysentvec = type opaque
+	%struct.system_segment_descriptor = type <{ i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.task = type <{ %11, i16, i16, i8, i8, i8, i8, void (i8*, i32)*, i8* }>
+	%struct.td_sched = type opaque
+	%struct.thread = type <{ %struct.mtx*, %struct.proc*, %8, %8, %8, %8, %struct.cpuset*, %struct.seltd*, %struct.sleepqueue*, %struct.turnstile*, %struct.umtx_q*, i32, i8, i8, i8, i8, %struct.sigqueue, i32, i32, i32, i32, i32, i8, i8, i8, i8, i8*, i8*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, %struct.turnstile*, i8*, %20, %struct.lock_list_entry*, i32, i32, %struct.ucred*, i32, i32, %struct.rusage, i64, i64, i32, i32, i32, i32, i32, %struct.__sigset, %struct.__sigset, i32, %struct.sigaltstack, i32, i8, i8, i8, i8, i64, i32, [20 x i8], %struct.file*, i32, i32, %struct.osd, i8, i8, i8, i8, i8, i8, i8, i8, %struct.pcb*, i32, i8, i8, i8, i8, [2 x i64], %struct.callout, %struct.trapframe*, %struct.vm_object*, i64, i32, i8, i8, i8, i8, %struct.vm_object*, i64, i32, i32, %struct.mdthread, %struct.td_sched*, %struct.kaudit_record*, i32, i8, i8, i8, i8, [2 x %struct.lpohead], %struct.kdtrace_thread*, i32, i8, i8, i8, i8, %struct.vnet*, i8* }>
+	%struct.trapframe = type <{ i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i16, i16, i64, i32, i16, i16, i64, i64, i64, i64, i64, i64 }>
+	%struct.tty = type opaque
+	%struct.turnstile = type opaque
+	%struct.ucred = type <{ i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, %struct.uidinfo*, %struct.uidinfo*, %struct.prison*, %struct.vimage*, i32, i8, i8, i8, i8, [2 x i8*], %struct.label*, %struct.auditinfo_addr, i32*, i32, i8, i8, i8, i8 }>
+	%struct.uidinfo = type opaque
+	%struct.uio = type <{ %struct.iovec*, i32, i8, i8, i8, i8, i64, i64, i32, i32, %struct.thread* }>
+	%struct.umtx_q = type opaque
+	%struct.vattr = type <{ i32, i16, i16, i32, i32, i32, i8, i8, i8, i8, i64, i64, i64, %struct.bintime, %struct.bintime, %struct.bintime, %struct.bintime, i64, i64, i32, i8, i8, i8, i8, i64, i64, i32, i8, i8, i8, i8, i64 }>
+	%struct.vfsconf = type <{ i32, [16 x i8], i8, i8, i8, i8, %struct.vfsops*, i32, i32, i32, i8, i8, i8, i8, %struct.vfsoptdecl*, %struct.vfsconfhead }>
+	%struct.vfsconfhead = type <{ %struct.vfsconf*, %struct.vfsconf** }>
+	%struct.vfsops = type <{ i32 (%struct.mount*)*, i32 (%struct.mntarg*, i8*, i32)*, i32 (%struct.mount*, i32)*, i32 (%struct.mount*, i32, %struct.vnode**)*, i32 (%struct.mount*, i32, i32, i8*)*, i32 (%struct.mount*, %struct.statfs*)*, i32 (%struct.mount*, i32)*, i32 (%struct.mount*, i32, i32, %struct.vnode**)*, i32 (%struct.mount*, %struct.fid*, %struct.vnode**)*, i32 (%struct.mount*, %struct.sockaddr*, i32*, %struct.ucred**, i32*, i32**)*, i32 (%struct.vfsconf*)*, i32 (%struct.vfsconf*)*, i32 (%struct.mount*, i32, %struct.vnode*, i32, i8*)*, i32 (%struct.mount*, i32, %struct.sysctl_req*)*, void (%struct.mount*)* }>
+	%struct.vfsopt = type <{ %struct.vfsoptlist, i8*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.vfsoptdecl = type opaque
+	%struct.vfsoptlist = type <{ %struct.vfsopt*, %struct.vfsopt** }>
+	%struct.vimage = type opaque
+	%struct.vm_map = type <{ %struct.vm_map_entry, %struct.mtx, %struct.mtx, i32, i8, i8, i8, i8, i64, i32, i8, i8, i8, i8, %struct.vm_map_entry*, %struct.pmap*, %struct.vm_map_entry* }>
+	%struct.vm_map_entry = type <{ %struct.vm_map_entry*, %struct.vm_map_entry*, %struct.vm_map_entry*, %struct.vm_map_entry*, i64, i64, i64, i64, i64, %union.sigval, i64, i32, i8, i8, i8, i8, i32, i8, i8, i8, i8, i64, %struct.uidinfo* }>
+	%struct.vm_object = type <{ %struct.mtx, %1, %2, %1, %3, %struct.vm_page*, i64, i32, i32, i32, i8, i8, i16, i16, i16, i32, %struct.vm_object*, i64, %1, %5, %struct.vm_page*, i8*, %union.anon, %struct.uidinfo*, i64 }>
+	%struct.vm_page = type <{ %3, %3, %struct.vm_page*, %struct.vm_page*, %struct.vm_object*, i64, i64, %struct.md_page, i8, i8, i16, i8, i8, i16, i32, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.vm_reserv = type opaque
+	%struct.vmspace = type <{ %struct.vm_map, %struct.shmmap_state*, i64, i64, i64, i64, i8*, i8*, i8*, i32, i8, i8, i8, i8, %struct.pmap }>
+	%struct.vnet = type opaque
+	%struct.vnode = type <{ i32, i8, i8, i8, i8, i8*, %struct.vop_vector*, i8*, %struct.mount*, %struct.freelst, %union.sigval, %struct.freelst, i32, i8, i8, i8, i8, %21, %22, %struct.namecache*, i64, i64, i64, i32, i8, i8, i8, i8, %struct.lock, %struct.mtx, %struct.lock*, i32, i32, i64, i64, i32, i8, i8, i8, i8, %struct.freelst, %struct.bufobj, %struct.vpollinfo*, %struct.label*, %struct.lockf* }>
+	%struct.vnodeop_desc = type <{ i8*, i32, i8, i8, i8, i8, i32 (%struct.vop_generic_args*)*, i32*, i32, i32, i32, i32 }>
+	%struct.vop_access_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread* }>
+	%struct.vop_aclcheck_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.acl*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_advlock_args = type <{ %struct.vop_generic_args, %struct.vnode*, i8*, i32, i8, i8, i8, i8, %struct.flock*, i32, i8, i8, i8, i8 }>
+	%struct.vop_advlockasync_args = type <{ %struct.vop_generic_args, %struct.vnode*, i8*, i32, i8, i8, i8, i8, %struct.flock*, i32, i8, i8, i8, i8, %struct.task*, i8** }>
+	%struct.vop_bmap_args = type <{ %struct.vop_generic_args, %struct.vnode*, i64, %struct.bufobj**, i64*, i32*, i32* }>
+	%struct.vop_cachedlookup_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname* }>
+	%struct.vop_create_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname*, %struct.vattr* }>
+	%struct.vop_deleteextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_fsync_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.thread* }>
+	%struct.vop_generic_args = type <{ %struct.vnodeop_desc* }>
+	%struct.vop_getattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vattr*, %struct.ucred* }>
+	%struct.vop_getextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.uio*, i64*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_getpages_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vm_page**, i32, i32, i64 }>
+	%struct.vop_getwritemount_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.mount** }>
+	%struct.vop_inactive_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.thread* }>
+	%struct.vop_ioctl_args = type <{ %struct.vop_generic_args, %struct.vnode*, i64, i8*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread* }>
+	%struct.vop_islocked_args = type <{ %struct.vop_generic_args, %struct.vnode* }>
+	%struct.vop_kqfilter_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.knote* }>
+	%struct.vop_link_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode*, %struct.componentname* }>
+	%struct.vop_listextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.uio*, i64*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_lock1_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8 }>
+	%struct.vop_open_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread*, %struct.file* }>
+	%struct.vop_openextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_pathconf_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i64* }>
+	%struct.vop_putpages_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vm_page**, i32, i32, i32*, i64 }>
+	%struct.vop_read_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, i32, i8, i8, i8, i8, %struct.ucred* }>
+	%struct.vop_readdir_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, %struct.ucred*, i32*, i32*, i64** }>
+	%struct.vop_readlink_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, %struct.ucred* }>
+	%struct.vop_reallocblks_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.cluster_save* }>
+	%struct.vop_rename_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode*, %struct.componentname*, %struct.vnode*, %struct.vnode*, %struct.componentname* }>
+	%struct.vop_revoke_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8 }>
+	%struct.vop_setextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.uio*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_setlabel_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.label*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_strategy_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.buf* }>
+	%struct.vop_symlink_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname*, %struct.vattr*, i8* }>
+	%struct.vop_vector = type <{ %struct.vop_vector*, i32 (%struct.vop_generic_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_cachedlookup_args*)*, i32 (%struct.vop_cachedlookup_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_whiteout_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_open_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_getattr_args*)*, i32 (%struct.vop_getattr_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_read_args*)*, i32 (%struct.vop_read_args*)*, i32 (%struct.vop_ioctl_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_kqfilter_args*)*, i32 (%struct.vop_revoke_args*)*, i32 (%struct.vop_fsync_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_rename_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_symlink_args*)*, i32 (%struct.vop_readdir_args*)*, i32 (%struct.vop_readlink_args*)*, i32 (%struct.vop_inactive_args*)*, i32 (%struct.vop_inactive_args*)*, i32 (%struct.vop_lock1_args*)*, i32 (%struct.vop_revoke_args*)*, i32 (%struct.vop_bmap_args*)*, i32 (%struct.vop_strategy_args*)*, i32 (%struct.vop_getwritemount_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_pathconf_args*)*, i32 (%struct.vop_advlock_args*)*, i32 (%struct.vop_advlockasync_args*)*, i32 (%struct.vop_reallocblks_args*)*, i32 (%struct.vop_getpages_args*)*, i32 (%struct.vop_putpages_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_getextattr_args*)*, i32 (%struct.vop_listextattr_args*)*, i32 (%struct.vop_openextattr_args*)*, i32 (%struct.vop_deleteextattr_args*)*, i32 (%struct.vop_setextattr_args*)*, i32 (%struct.vop_setlabel_args*)*, i32 (%struct.vop_vptofh_args*)*, i32 (%struct.vop_vptocnp_args*)* }>
+	%struct.vop_vptocnp_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.ucred*, i8*, i32* }>
+	%struct.vop_vptofh_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.fid* }>
+	%struct.vop_whiteout_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.componentname*, i32, i8, i8, i8, i8 }>
+	%struct.vpollinfo = type <{ %struct.mtx, %struct.selinfo, i16, i16, i8, i8, i8, i8 }>
+	%struct.witness = type opaque
+	%struct.workhead = type <{ %struct.worklist* }>
+	%struct.worklist = type opaque
+	%union.anon = type <{ [16 x i8] }>
+	%union.pager_info = type <{ [4 x i8] }>
+	%union.sigval = type <{ [8 x i8] }>
+
+define i32 @vlrureclaim(%struct.mount* %mp) nounwind {
+entry:
+	br i1 undef, label %if.then11, label %do.end
+
+if.then11:		; preds = %entry
+	br label %do.end
+
+do.end:		; preds = %if.then11, %entry
+	br label %while.cond.outer
+
+while.cond.outer:		; preds = %while.cond.outer.backedge, %do.end
+	%count.0.ph = phi i32 [ undef, %do.end ], [ undef, %while.cond.outer.backedge ]		; <i32> [#uses=1]
+	br label %while.cond
+
+while.cond:		; preds = %next_iter, %while.cond.outer
+	%count.0 = phi i32 [ %dec, %next_iter ], [ %count.0.ph, %while.cond.outer ]		; <i32> [#uses=2]
+	%cmp21 = icmp eq i32 %count.0, 0		; <i1> [#uses=1]
+	br i1 %cmp21, label %do.body288.loopexit4, label %while.body
+
+while.body:		; preds = %while.cond
+	br label %while.cond27
+
+while.cond27:		; preds = %while.body36, %while.body
+	br i1 undef, label %do.body288.loopexit, label %land.rhs
+
+land.rhs:		; preds = %while.cond27
+	br i1 undef, label %while.body36, label %while.end
+
+while.body36:		; preds = %land.rhs
+	br label %while.cond27
+
+while.end:		; preds = %land.rhs
+	br i1 undef, label %do.body288.loopexit4, label %do.body46
+
+do.body46:		; preds = %while.end
+	br i1 undef, label %if.else64, label %if.then53
+
+if.then53:		; preds = %do.body46
+	br label %if.end72
+
+if.else64:		; preds = %do.body46
+	br label %if.end72
+
+if.end72:		; preds = %if.else64, %if.then53
+	%dec = add i32 %count.0, -1		; <i32> [#uses=2]
+	br i1 undef, label %next_iter, label %if.end111
+
+if.end111:		; preds = %if.end72
+	br i1 undef, label %lor.lhs.false, label %do.body145
+
+lor.lhs.false:		; preds = %if.end111
+	br i1 undef, label %lor.lhs.false122, label %do.body145
+
+lor.lhs.false122:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false128, label %do.body145
+
+lor.lhs.false128:		; preds = %lor.lhs.false122
+	br i1 undef, label %do.body162, label %land.lhs.true
+
+land.lhs.true:		; preds = %lor.lhs.false128
+	br i1 undef, label %do.body145, label %do.body162
+
+do.body145:		; preds = %land.lhs.true, %lor.lhs.false122, %lor.lhs.false, %if.end111
+	br i1 undef, label %if.then156, label %next_iter
+
+if.then156:		; preds = %do.body145
+	br label %next_iter
+
+do.body162:		; preds = %land.lhs.true, %lor.lhs.false128
+	br i1 undef, label %if.then173, label %do.end177
+
+if.then173:		; preds = %do.body162
+	br label %do.end177
+
+do.end177:		; preds = %if.then173, %do.body162
+	br i1 undef, label %do.body185, label %if.then182
+
+if.then182:		; preds = %do.end177
+	br label %next_iter_mntunlocked
+
+do.body185:		; preds = %do.end177
+	br i1 undef, label %if.then196, label %do.end202
+
+if.then196:		; preds = %do.body185
+	br label %do.end202
+
+do.end202:		; preds = %if.then196, %do.body185
+	br i1 undef, label %lor.lhs.false207, label %if.then231
+
+lor.lhs.false207:		; preds = %do.end202
+	br i1 undef, label %lor.lhs.false214, label %if.then231
+
+lor.lhs.false214:		; preds = %lor.lhs.false207
+	br i1 undef, label %do.end236, label %land.lhs.true221
+
+land.lhs.true221:		; preds = %lor.lhs.false214
+	br i1 undef, label %if.then231, label %do.end236
+
+if.then231:		; preds = %land.lhs.true221, %lor.lhs.false207, %do.end202
+	br label %next_iter_mntunlocked
+
+do.end236:		; preds = %land.lhs.true221, %lor.lhs.false214
+	br label %next_iter_mntunlocked
+
+next_iter_mntunlocked:		; preds = %do.end236, %if.then231, %if.then182
+	br i1 undef, label %yield, label %do.body269
+
+next_iter:		; preds = %if.then156, %do.body145, %if.end72
+	%rem2482 = and i32 %dec, 255		; <i32> [#uses=1]
+	%cmp249 = icmp eq i32 %rem2482, 0		; <i1> [#uses=1]
+	br i1 %cmp249, label %do.body253, label %while.cond
+
+do.body253:		; preds = %next_iter
+	br i1 undef, label %if.then264, label %yield
+
+if.then264:		; preds = %do.body253
+	br label %yield
+
+yield:		; preds = %if.then264, %do.body253, %next_iter_mntunlocked
+	br label %do.body269
+
+do.body269:		; preds = %yield, %next_iter_mntunlocked
+	br i1 undef, label %if.then280, label %while.cond.outer.backedge
+
+if.then280:		; preds = %do.body269
+	br label %while.cond.outer.backedge
+
+while.cond.outer.backedge:		; preds = %if.then280, %do.body269
+	br label %while.cond.outer
+
+do.body288.loopexit:		; preds = %while.cond27
+	br label %do.body288
+
+do.body288.loopexit4:		; preds = %while.end, %while.cond
+	br label %do.body288
+
+do.body288:		; preds = %do.body288.loopexit4, %do.body288.loopexit
+	br i1 undef, label %if.then299, label %do.end303
+
+if.then299:		; preds = %do.body288
+	br label %do.end303
+
+do.end303:		; preds = %if.then299, %do.body288
+	ret i32 undef
+}
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index ce7ee7791d58..b733d6acb504 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)}
 
 ; We don't want to use a max in the trip count expression in
 ; this testcase.
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 9270b6e6c890..0bc9ce8241a8 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep select %t | count 2
-; RUN: grep {icmp ne i32.\* %w	} %t
+; RUN: grep {icmp ne i32.\* %w } %t
 
 ; Indvars should be able to insert a canonical induction variable
 ; for the bb6 loop without using a maximum calculation (icmp, select)
diff --git a/test/Analysis/ScalarEvolution/div-overflow.ll b/test/Analysis/ScalarEvolution/div-overflow.ll
index cb64b856a777..0c01044b977f 100644
--- a/test/Analysis/ScalarEvolution/div-overflow.ll
+++ b/test/Analysis/ScalarEvolution/div-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \
+; RUN: opt < %s -scalar-evolution -analyze -disable-output \
 ; RUN:  | grep {\\-->  ((-128 \\* %a) /u -128)}
 
 ; Don't let ScalarEvolution fold this div away.
diff --git a/test/Analysis/ScalarEvolution/do-loop.ll b/test/Analysis/ScalarEvolution/do-loop.ll
index 85c38e4f1c5a..f8d7da7c9a0a 100644
--- a/test/Analysis/ScalarEvolution/do-loop.ll
+++ b/test/Analysis/ScalarEvolution/do-loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep smax
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep smax
 ; PR1614
 
 define i32 @f(i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll
index 05674149da82..506401dafea5 100644
--- a/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
-; RUN:   | grep {\{%d,+,4\}<bb>}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
+; RUN:   | grep {\{%d,+,\[^\{\}\]\*\}<bb>}
+
+; ScalarEvolution should be able to understand the loop and eliminate the casts.
 
 define void @foo(i32* nocapture %d, i32 %n) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll
new file mode 100644
index 000000000000..1e165bf62226
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -S -analyze -scalar-evolution -disable-output | FileCheck %s
+
+; ScalarEvolution should be able to fold away the sign-extensions
+; on this loop with a primary induction variable incremented with
+; a nsw add of 2.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(i32 %n, double* nocapture %d, double* nocapture %q) nounwind {
+entry:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb1
+  %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ]   ; <i32> [#uses=5]
+
+; CHECK: %1 = sext i32 %i.01 to i64
+; CHECK: -->  {0,+,2}<bb>
+  %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
+
+; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
+; CHECK: -->  {%d,+,16}<bb>
+  %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
+
+  %3 = load double* %2, align 8                   ; <double> [#uses=1]
+  %4 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
+  %5 = getelementptr inbounds double* %q, i64 %4  ; <double*> [#uses=1]
+  %6 = load double* %5, align 8                   ; <double> [#uses=1]
+  %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
+
+; CHECK: %8 = sext i32 %7 to i64
+; CHECK: -->  {1,+,2}<bb>
+  %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
+
+; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
+; CHECK: {(8 + %q),+,16}<bb>
+  %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
+
+; Artificially repeat the above three instructions, this time using
+; add nsw instead of or.
+  %t7 = add nsw i32 %i.01, 1                            ; <i32> [#uses=1]
+
+; CHECK: %t8 = sext i32 %t7 to i64
+; CHECK: -->  {1,+,2}<bb>
+  %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
+
+; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
+; CHECK: {(8 + %q),+,16}<bb>
+  %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
+
+  %10 = load double* %9, align 8                  ; <double> [#uses=1]
+  %11 = fadd double %6, %10                       ; <double> [#uses=1]
+  %12 = fadd double %11, 3.200000e+00             ; <double> [#uses=1]
+  %13 = fmul double %3, %12                       ; <double> [#uses=1]
+  %14 = sext i32 %i.01 to i64                     ; <i64> [#uses=1]
+  %15 = getelementptr inbounds double* %d, i64 %14 ; <double*> [#uses=1]
+  store double %13, double* %15, align 8
+  %16 = add nsw i32 %i.01, 2                      ; <i32> [#uses=2]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %17 = icmp slt i32 %16, %n                      ; <i1> [#uses=1]
+  br i1 %17, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:                             ; preds = %bb1
+  br label %return
+
+return:                                           ; preds = %bb1.return_crit_edge, %entry
+  ret void
+}
+
+; CHECK: Loop bb: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop bb: max backedge-taken count is 1073741823
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
new file mode 100644
index 000000000000..c31edabf38ee
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep { -->  {.*,+,.*}<bb>} | count 8
+
+; The addrecs in this loop are analyzable only by using nsw information.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+define void @foo(double* %p) nounwind {
+entry:
+	%tmp = load double* %p, align 8		; <double> [#uses=1]
+	%tmp1 = fcmp ogt double %tmp, 2.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp1, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.01 = phi i32 [ %tmp8, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%tmp2 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%tmp3 = getelementptr double* %p, i64 %tmp2		; <double*> [#uses=1]
+	%tmp4 = load double* %tmp3, align 8		; <double> [#uses=1]
+	%tmp5 = fmul double %tmp4, 9.200000e+00		; <double> [#uses=1]
+	%tmp6 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr double* %p, i64 %tmp6		; <double*> [#uses=1]
+	store double %tmp5, double* %tmp7, align 8
+	%tmp8 = add nsw i32 %i.01, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%phitmp = sext i32 %tmp8 to i64		; <i64> [#uses=1]
+	%tmp9 = getelementptr double* %p, i64 %phitmp		; <double*> [#uses=1]
+	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
+	%tmp11 = fcmp ogt double %tmp10, 2.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp11, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
diff --git a/test/Analysis/ScalarEvolution/pointer-sign-bits.ll b/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
index 05cb81b3ba6b..4de006c4ed18 100644
--- a/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
+++ b/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output
+; RUN: opt < %s -analyze -scalar-evolution -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
   %JavaObject = type { [0 x i32 (...)*]*, i8* }
diff --git a/test/Analysis/ScalarEvolution/pr3909.ll b/test/Analysis/ScalarEvolution/pr3909.ll
index 80720c724afb..10e328ddf7e7 100644
--- a/test/Analysis/ScalarEvolution/pr3909.ll
+++ b/test/Analysis/ScalarEvolution/pr3909.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; PR 3909
 
 
diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll
new file mode 100644
index 000000000000..0dcf52977a0a
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -0,0 +1,194 @@
+; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
+; RUN:   |& FileCheck %s
+
+; At the time of this writing, all of these CHECK lines are cases that
+; plain -basicaa misses.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+; p[i] and p[i+1] don't alias.
+
+; CHECK: Function: loop: 3 pointers, 0 call sites
+; CHECK: NoAlias: double* %pi, double* %pi.next
+
+define void @loop(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %j = icmp sgt i64 %n, 0
+  br i1 %j, label %bb, label %return
+
+bb:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
+  %pi = getelementptr double* %p, i64 %i
+  %i.next = add i64 %i, 1
+  %pi.next = getelementptr double* %p, i64 %i.next
+  %x = load double* %pi
+  %y = load double* %pi.next
+  %z = fmul double %x, %y
+  store double %z, double* %pi
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; Slightly more involved: p[j][i], p[j][i+1], and p[j+1][i] don't alias.
+
+; CHECK: Function: nestedloop: 4 pointers, 0 call sites
+; CHECK: NoAlias: double* %pi.j, double* %pi.next.j
+; CHECK: NoAlias: double* %pi.j, double* %pi.j.next
+; CHECK: NoAlias: double* %pi.j.next, double* %pi.next.j
+
+define void @nestedloop(double* nocapture %p, i64 %m) nounwind {
+entry:
+  %k = icmp sgt i64 %m, 0
+  br i1 %k, label %guard, label %return
+
+guard:
+  %l = icmp sgt i64 91, 0
+  br i1 %l, label %outer.loop, label %return
+
+outer.loop:
+  %j = phi i64 [ 0, %guard ], [ %j.next, %outer.latch ]
+  br label %bb
+
+bb:
+  %i = phi i64 [ 0, %outer.loop ], [ %i.next, %bb ]
+  %i.next = add i64 %i, 1
+
+  %e = add i64 %i, %j
+  %pi.j = getelementptr double* %p, i64 %e
+  %f = add i64 %i.next, %j
+  %pi.next.j = getelementptr double* %p, i64 %f
+  %x = load double* %pi.j
+  %y = load double* %pi.next.j
+  %z = fmul double %x, %y
+  store double %z, double* %pi.j
+
+  %o = add i64 %j, 91
+  %g = add i64 %i, %o
+  %pi.j.next = getelementptr double* %p, i64 %g
+  %a = load double* %pi.j.next
+  %b = fmul double %x, %a
+  store double %b, double* %pi.j.next
+
+  %exitcond = icmp eq i64 %i.next, 91
+  br i1 %exitcond, label %outer.latch, label %bb
+
+outer.latch:
+  %j.next = add i64 %j, 91
+  %h = icmp eq i64 %j.next, %m
+  br i1 %h, label %return, label %outer.loop
+
+return:
+  ret void
+}
+
+; Even more involved: same as nestedloop, but with a variable extent.
+; When n is 1, p[j+1][i] does alias p[j][i+1], and there's no way to
+; prove whether n will be greater than 1, so that relation will always
+; by MayAlias. The loop is guarded by a n > 0 test though, so
+; p[j+1][i] and p[j][i] can theoretically be determined to be NoAlias,
+; however the analysis currently doesn't do that.
+; TODO: Make the analysis smarter and turn that MayAlias into a NoAlias.
+
+; CHECK: Function: nestedloop_more: 4 pointers, 0 call sites
+; CHECK: NoAlias: double* %pi.j, double* %pi.next.j
+; CHECK: MayAlias: double* %pi.j, double* %pi.j.next
+
+define void @nestedloop_more(double* nocapture %p, i64 %n, i64 %m) nounwind {
+entry:
+  %k = icmp sgt i64 %m, 0
+  br i1 %k, label %guard, label %return
+
+guard:
+  %l = icmp sgt i64 %n, 0
+  br i1 %l, label %outer.loop, label %return
+
+outer.loop:
+  %j = phi i64 [ 0, %guard ], [ %j.next, %outer.latch ]
+  br label %bb
+
+bb:
+  %i = phi i64 [ 0, %outer.loop ], [ %i.next, %bb ]
+  %i.next = add i64 %i, 1
+
+  %e = add i64 %i, %j
+  %pi.j = getelementptr double* %p, i64 %e
+  %f = add i64 %i.next, %j
+  %pi.next.j = getelementptr double* %p, i64 %f
+  %x = load double* %pi.j
+  %y = load double* %pi.next.j
+  %z = fmul double %x, %y
+  store double %z, double* %pi.j
+
+  %o = add i64 %j, %n
+  %g = add i64 %i, %o
+  %pi.j.next = getelementptr double* %p, i64 %g
+  %a = load double* %pi.j.next
+  %b = fmul double %x, %a
+  store double %b, double* %pi.j.next
+
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %outer.latch, label %bb
+
+outer.latch:
+  %j.next = add i64 %j, %n
+  %h = icmp eq i64 %j.next, %m
+  br i1 %h, label %return, label %outer.loop
+
+return:
+  ret void
+}
+
+; ScalarEvolution expands field offsets into constants, which allows it to
+; do aggressive analysis. Contrast this with BasicAA, which works by
+; recognizing GEP idioms.
+
+%struct.A = type { %struct.B, i32, i32 }
+%struct.B = type { double }
+
+; CHECK: Function: foo: 7 pointers, 0 call sites
+; CHECK: NoAlias: %struct.B* %B, i32* %Z
+; CHECK: NoAlias: %struct.B* %B, %struct.B* %C
+; CHECK: MustAlias: %struct.B* %C, i32* %Z
+; CHECK: NoAlias: %struct.B* %B, i32* %X
+; CHECK: MustAlias: i32* %X, i32* %Z
+; CHECK: MustAlias: %struct.B* %C, i32* %Y
+; CHECK: MustAlias: i32* %X, i32* %Y
+
+define void @foo() {
+entry:
+  %A = alloca %struct.A
+  %B = getelementptr %struct.A* %A, i32 0, i32 0
+  %Q = bitcast %struct.B* %B to %struct.A*
+  %Z = getelementptr %struct.A* %Q, i32 0, i32 1
+  %C = getelementptr %struct.B* %B, i32 1
+  %X = bitcast %struct.B* %C to i32*
+  %Y = getelementptr %struct.A* %A, i32 0, i32 1
+  ret void
+}
+
+; CHECK: Function: bar: 7 pointers, 0 call sites
+; CHECK: NoAlias: %struct.B* %N, i32* %P
+; CHECK: NoAlias: %struct.B* %N, %struct.B* %R
+; CHECK: MustAlias: %struct.B* %R, i32* %P
+; CHECK: NoAlias: %struct.B* %N, i32* %W
+; CHECK: MustAlias: i32* %P, i32* %W
+; CHECK: MustAlias: %struct.B* %R, i32* %V
+; CHECK: MustAlias: i32* %V, i32* %W
+
+define void @bar() {
+  %M = alloca %struct.A
+  %N = getelementptr %struct.A* %M, i32 0, i32 0
+  %O = bitcast %struct.B* %N to %struct.A*
+  %P = getelementptr %struct.A* %O, i32 0, i32 1
+  %R = getelementptr %struct.B* %N, i32 1
+  %W = bitcast %struct.B* %R to i32*
+  %V = getelementptr %struct.A* %M, i32 0, i32 1
+  ret void
+}
+
+; CHECK: 13 no alias responses
+; CHECK: 26 may alias responses
+; CHECK: 18 must alias responses
diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll
index 8a88f0f7d967..16128354aeb4 100644
--- a/test/Analysis/ScalarEvolution/sext-inreg.ll
+++ b/test/Analysis/ScalarEvolution/sext-inreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output > %t
+; RUN: opt < %s -analyze -scalar-evolution -disable-output > %t
 ; RUN: grep {sext i57 \{0,+,199\}<bb> to i64} %t | count 1
 ; RUN: grep {sext i59 \{0,+,199\}<bb> to i64} %t | count 1
 
diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll
index 17f2dffdbfcf..8f887c4a57eb 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \
+; RUN: opt < %s -disable-output -scalar-evolution -analyze \
 ; RUN:  | grep { -->  \{-128,+,1\}<bb1>		Exits: 127} | count 5
 
 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll
index ca6ad0aaba12..02c3206c6fe7 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \
+; RUN: opt < %s -disable-output -scalar-evolution -analyze \
 ; RUN:  | grep { -->  (sext i. \{.\*,+,.\*\}<bb1> to i64)} | count 5
 
 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
diff --git a/test/Analysis/ScalarEvolution/sext-iv-2.ll b/test/Analysis/ScalarEvolution/sext-iv-2.ll
new file mode 100644
index 000000000000..b25c237958c0
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | FileCheck %s
+
+; CHECK: %tmp3 = sext i8 %tmp2 to i32
+; CHECK: -->  (sext i8 {0,+,1}<bb1> to i32)   Exits: -1
+; CHECK: %tmp4 = mul i32 %tmp3, %i.02
+; CHECK: -->  ((sext i8 {0,+,1}<bb1> to i32) * {0,+,1}<bb>)   Exits: {0,+,-1}<bb>
+
+; These sexts are not foldable.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+@table = common global [32 x [256 x i32]] zeroinitializer, align 32		; <[32 x [256 x i32]]*> [#uses=2]
+
+define i32 @main() nounwind {
+entry:
+	br i1 false, label %bb5, label %bb.nph3
+
+bb.nph3:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb4, %bb.nph3
+	%i.02 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb.nph3 ]		; <i32> [#uses=3]
+	br i1 false, label %bb3, label %bb.nph
+
+bb.nph:		; preds = %bb
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i32 [ %tmp8, %bb2 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%tmp2 = trunc i32 %j.01 to i8		; <i8> [#uses=1]
+	%tmp3 = sext i8 %tmp2 to i32		; <i32> [#uses=1]
+	%tmp4 = mul i32 %tmp3, %i.02		; <i32> [#uses=1]
+	%tmp5 = sext i32 %i.02 to i64		; <i64> [#uses=1]
+	%tmp6 = sext i32 %j.01 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr [32 x [256 x i32]]* @table, i64 0, i64 %tmp5, i64 %tmp6		; <i32*> [#uses=1]
+	store i32 %tmp4, i32* %tmp7, align 4
+	%tmp8 = add i32 %j.01, 1		; <i32> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%phitmp1 = icmp sgt i32 %tmp8, 255		; <i1> [#uses=1]
+	br i1 %phitmp1, label %bb2.bb3_crit_edge, label %bb1
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.bb3_crit_edge, %bb
+	%tmp10 = add i32 %i.02, 1		; <i32> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%phitmp = icmp sgt i32 %tmp10, 31		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb4.bb5_crit_edge, label %bb
+
+bb4.bb5_crit_edge:		; preds = %bb4
+	br label %bb5
+
+bb5:		; preds = %bb4.bb5_crit_edge, %entry
+	%tmp12 = load i32* getelementptr ([32 x [256 x i32]]* @table, i64 0, i64 9, i64 132), align 16		; <i32> [#uses=1]
+	%tmp13 = icmp eq i32 %tmp12, -1116		; <i1> [#uses=1]
+	br i1 %tmp13, label %bb7, label %bb6
+
+bb6:		; preds = %bb5
+	call void @abort() noreturn nounwind
+	unreachable
+
+bb7:		; preds = %bb5
+	br label %return
+
+return:		; preds = %bb7
+	ret i32 0
+}
+
+declare void @abort() noreturn nounwind
diff --git a/test/Analysis/ScalarEvolution/smax.ll b/test/Analysis/ScalarEvolution/smax.ll
index 366dfdee7146..39de8d6c5a79 100644
--- a/test/Analysis/ScalarEvolution/smax.ll
+++ b/test/Analysis/ScalarEvolution/smax.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep smax | count 2
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep smax | count 2
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep \
 ; RUN:     {%. smax %. smax %.}
 ; PR1614
 
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index c5be858d1ee9..66cc304918ae 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 10000}
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll
index 374a5621cebf..bbe64358d4e5 100644
--- a/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | \
 ; RUN:   grep {backedge-taken count is 4}
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 35c86835f3ac..240983178b40 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \
+; RUN: opt < %s -scalar-evolution -analyze -disable-output \
 ; RUN:  | grep {Loop bb3\\.i: Unpredictable backedge-taken count\\.}
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll
index 49c4e133b467..e8d59cf550a8 100644
--- a/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   | grep {sext.*trunc.*Exits: 11}
 
 ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8.
diff --git a/test/Analysis/ScalarEvolution/trip-count5.ll b/test/Analysis/ScalarEvolution/trip-count5.ll
index 822dc2638f5a..2512a966ed20 100644
--- a/test/Analysis/ScalarEvolution/trip-count5.ll
+++ b/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output > %t
+; RUN: opt < %s -analyze -scalar-evolution -disable-output > %t
 ; RUN: grep sext %t | count 2
 ; RUN: not grep {(sext} %t
 
diff --git a/test/Analysis/ScalarEvolution/trip-count6.ll b/test/Analysis/ScalarEvolution/trip-count6.ll
index a6674092e799..5833286317ce 100644
--- a/test/Analysis/ScalarEvolution/trip-count6.ll
+++ b/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -disable-output -scalar-evolution \
+; RUN: opt < %s -analyze -disable-output -scalar-evolution \
 ; RUN:  | grep {max backedge-taken count is 1\$}
 
 @mode_table = global [4 x i32] zeroinitializer          ; <[4 x i32]*> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll
index cea826ef1d30..0cd8d7c4a9a3 100644
--- a/test/Analysis/ScalarEvolution/trip-count7.ll
+++ b/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
 ; RUN:   | grep {Loop bb7.i: Unpredictable backedge-taken count\\.}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll
new file mode 100644
index 000000000000..c49f5ceea704
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count8.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
+; RUN:  | grep {Loop for\\.body: backedge-taken count is (-1 + \[%\]ecx)}
+; PR4599
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @foo(i32 %ecx) nounwind {
+entry:
+	%cmp2 = icmp eq i32 %ecx, 0		; <i1> [#uses=1]
+	br i1 %cmp2, label %for.end, label %bb.nph
+
+for.cond:		; preds = %for.inc
+	%cmp = icmp ult i32 %inc, %ecx		; <i1> [#uses=1]
+	br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:		; preds = %for.cond
+	%phitmp = add i32 %i.01, 2		; <i32> [#uses=1]
+	br label %for.end
+
+bb.nph:		; preds = %entry
+	br label %for.body
+
+for.body:		; preds = %bb.nph, %for.cond
+	%i.01 = phi i32 [ %inc, %for.cond ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%call = call i32 @bar(i32 %i.01) nounwind		; <i32> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.body
+	%inc = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %for.cond
+
+for.end:		; preds = %for.cond.for.end_crit_edge, %entry
+	%i.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 1, %entry ]		; <i32> [#uses=1]
+	ret i32 %i.0.lcssa
+}
+
+declare i32 @bar(i32)
diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll
index 843052456a80..c8339d7138d9 100644
--- a/test/Analysis/ScalarEvolution/xor-and.ll
+++ b/test/Analysis/ScalarEvolution/xor-and.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -disable-output -analyze \
+; RUN: opt < %s -scalar-evolution -disable-output -analyze \
 ; RUN:   | grep {\\-->  (zext i4 (-8 + (trunc i64 (8 \\* %x) to i4)) to i64)}
 
 ; ScalarEvolution shouldn't try to analyze %z into something like
diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll
new file mode 100644
index 000000000000..9ff99be736a0
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -analyze -scalar-evolution -disable-output \
+; RUN:  | FileCheck %s
+; PR4569
+
+define i16 @main() nounwind {
+entry:
+        br label %bb.i
+
+bb.i:           ; preds = %bb1.i, %bb.nph
+        %l_95.0.i1 = phi i8 [ %tmp1, %bb.i ], [ 0, %entry ]
+
+; This cast shouldn't be folded into the addrec.
+; CHECK: %tmp = zext i8 %l_95.0.i1 to i16
+; CHECK: -->  (zext i8 {0,+,-1}<bb.i> to i16)    Exits: 2
+
+        %tmp = zext i8 %l_95.0.i1 to i16
+
+        %tmp1 = add i8 %l_95.0.i1, -1
+        %phitmp = icmp eq i8 %tmp1, 1
+        br i1 %phitmp, label %bb1.i.func_36.exit_crit_edge, label %bb.i
+
+bb1.i.func_36.exit_crit_edge:
+        ret i16 %tmp
+}
diff --git a/test/Archive/extract.ll b/test/Archive/extract.ll
new file mode 100644
index 000000000000..3649714259b8
--- /dev/null
+++ b/test/Archive/extract.ll
@@ -0,0 +1,16 @@
+; This isn't really an assembly file, its just here to run the test.
+
+; This test just makes sure that llvm-ar can extract bytecode members
+; from various style archives.
+
+; RUN: llvm-ar x %p/GNU.a very_long_bytecode_file_name.bc
+; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null
+
+; RUN: llvm-ar x %p/MacOSX.a very_long_bytecode_file_name.bc
+; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc > /dev/null 2>/dev/null
+
+; RUN: llvm-ar x %p/SVR4.a very_long_bytecode_file_name.bc
+; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null
+
+; RUN: llvm-ar x %p/xpg4.a very_long_bytecode_file_name.bc
+; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null
diff --git a/test/Assembler/2002-01-24-BadSymbolTableAssert.ll b/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
index b2a48f556893..7c49e2bd9935 100644
--- a/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
+++ b/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; This testcase failed due to a bad assertion in SymbolTable.cpp, removed in
 ; the 1.20 revision. Basically the symbol table assumed that if there was an
diff --git a/test/Assembler/2002-01-24-ValueRefineAbsType.ll b/test/Assembler/2002-01-24-ValueRefineAbsType.ll
index fb7c4fbf587f..6e49674a32fb 100644
--- a/test/Assembler/2002-01-24-ValueRefineAbsType.ll
+++ b/test/Assembler/2002-01-24-ValueRefineAbsType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; This testcase used to fail due to a lack of this diff in Value.cpp:
 ; diff -r1.16 Value.cpp
diff --git a/test/Assembler/2002-02-19-TypeParsing.ll b/test/Assembler/2002-02-19-TypeParsing.ll
index b7cadbdb0568..0df678497841 100644
--- a/test/Assembler/2002-02-19-TypeParsing.ll
+++ b/test/Assembler/2002-02-19-TypeParsing.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 	
 %Hosp = type { i32, i32, i32, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* } }
diff --git a/test/Assembler/2002-03-08-NameCollision.ll b/test/Assembler/2002-03-08-NameCollision.ll
index 539dfd671988..b49789b2902d 100644
--- a/test/Assembler/2002-03-08-NameCollision.ll
+++ b/test/Assembler/2002-03-08-NameCollision.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; Method arguments were being checked for collisions at the global scope before
 ; the method object was created by the parser.  Because of this, false
diff --git a/test/Assembler/2002-03-08-NameCollision2.ll b/test/Assembler/2002-03-08-NameCollision2.ll
index 57dc517d3bb0..1f7a4e16f8b3 100644
--- a/test/Assembler/2002-03-08-NameCollision2.ll
+++ b/test/Assembler/2002-03-08-NameCollision2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; Another name collision problem.  Here the problem was that if a forward
 ; declaration for a method was found, that this would cause spurious conflicts
diff --git a/test/Assembler/2002-04-04-PureVirtMethCall.ll b/test/Assembler/2002-04-04-PureVirtMethCall.ll
index 4c63e44e9d21..29aed55a3a9f 100644
--- a/test/Assembler/2002-04-04-PureVirtMethCall.ll
+++ b/test/Assembler/2002-04-04-PureVirtMethCall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
   type { { \2 *, \4 ** },
          { \2 *, \4 ** }
diff --git a/test/Assembler/2002-04-04-PureVirtMethCall2.ll b/test/Assembler/2002-04-04-PureVirtMethCall2.ll
index 553401f56367..a0968999a92d 100644
--- a/test/Assembler/2002-04-04-PureVirtMethCall2.ll
+++ b/test/Assembler/2002-04-04-PureVirtMethCall2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 %t = type { { \2*, \2 },
             { \2*, \2 }
diff --git a/test/Assembler/2002-04-05-TypeParsing.ll b/test/Assembler/2002-04-05-TypeParsing.ll
index 82db75f1ce97..f725944b9210 100644
--- a/test/Assembler/2002-04-05-TypeParsing.ll
+++ b/test/Assembler/2002-04-05-TypeParsing.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
   
  %Hosp = type { { \2*, { \2, %Hosp }* }, { \2*, { \2, %Hosp }* } }
diff --git a/test/Assembler/2002-04-07-HexFloatConstants.ll b/test/Assembler/2002-04-07-HexFloatConstants.ll
index 5c54b39b8081..b0d7cc0e43a4 100644
--- a/test/Assembler/2002-04-07-HexFloatConstants.ll
+++ b/test/Assembler/2002-04-07-HexFloatConstants.ll
@@ -5,7 +5,7 @@
 ; of the bug that was causing the Olden Health benchmark to output incorrect
 ; results!
 ;
-; RUN: llvm-as < %s | opt -constprop | llvm-dis > %t.1
+; RUN: opt -constprop -S > %t.1 < %s
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | opt -constprop | \
 ; RUN: llvm-dis > %t.2
 ; RUN: diff %t.1 %t.2
diff --git a/test/Assembler/2002-04-29-NameBinding.ll b/test/Assembler/2002-04-29-NameBinding.ll
index 9665aef14323..7960c20ddcea 100644
--- a/test/Assembler/2002-04-29-NameBinding.ll
+++ b/test/Assembler/2002-04-29-NameBinding.ll
@@ -4,7 +4,7 @@
 ; Check by running globaldce, which will remove the constant if there are
 ; no references to it!
 ; 
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | \
+; RUN: opt < %s -globaldce -S | \
 ; RUN:   not grep constant
 ;
 
diff --git a/test/Assembler/2002-05-02-InvalidForwardRef.ll b/test/Assembler/2002-05-02-InvalidForwardRef.ll
index 00a0a0153776..234545c2936f 100644
--- a/test/Assembler/2002-05-02-InvalidForwardRef.ll
+++ b/test/Assembler/2002-05-02-InvalidForwardRef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 ; It looks like the assembler is not forward resolving the function declaraion
 ; correctly.
 
diff --git a/test/Assembler/2002-05-02-ParseError.ll b/test/Assembler/2002-05-02-ParseError.ll
index b198edfd31f5..5a9817c1eaa8 100644
--- a/test/Assembler/2002-05-02-ParseError.ll
+++ b/test/Assembler/2002-05-02-ParseError.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 %T = type i32 *
 
diff --git a/test/Assembler/2002-07-08-HugePerformanceProblem.ll b/test/Assembler/2002-07-08-HugePerformanceProblem.ll
index b9ebfbe2ad61..52c90af18c8d 100644
--- a/test/Assembler/2002-07-08-HugePerformanceProblem.ll
+++ b/test/Assembler/2002-07-08-HugePerformanceProblem.ll
@@ -1,6 +1,6 @@
 ; This file takes about 48 __MINUTES__ to assemble using as.  This is WAY too
 ; long.  The type resolution code needs to be sped up a lot.
-; RUN: llvm-as %s -o /dev/null -f	
+; RUN: llvm-as %s -o /dev/null
 	%ALL_INTERSECTIONS_METHOD = type i32 (%OBJECT*, %RAY*, %ISTACK*)*
 	%BBOX = type { %BBOX_VECT, %BBOX_VECT }
 	%BBOX_TREE = type { i16, i16, %BBOX, %BBOX_TREE** }
diff --git a/test/Assembler/2002-07-25-ParserAssertionFailure.ll b/test/Assembler/2002-07-25-ParserAssertionFailure.ll
index 29c7c02ff850..3c5c5546b431 100644
--- a/test/Assembler/2002-07-25-ParserAssertionFailure.ll
+++ b/test/Assembler/2002-07-25-ParserAssertionFailure.ll
@@ -1,6 +1,6 @@
 ; Make sure we don't get an assertion failure, even though this is a parse 
 ; error
-; RUN: not llvm-as %s -o /dev/null -f |& grep {'@foo' defined with}
+; RUN: not llvm-as %s -o /dev/null |& grep {'@foo' defined with}
 
 %ty = type void (i32)
 
diff --git a/test/Assembler/2002-08-15-CastAmbiguity.ll b/test/Assembler/2002-08-15-CastAmbiguity.ll
index c10f91103fdc..c71652446d6f 100644
--- a/test/Assembler/2002-08-15-CastAmbiguity.ll
+++ b/test/Assembler/2002-08-15-CastAmbiguity.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 define void @test(i32 %X) {
         call void @test( i32 6 )
diff --git a/test/Assembler/2002-08-15-ConstantExprProblem.ll b/test/Assembler/2002-08-15-ConstantExprProblem.ll
index d02c26a80cdb..02b9ea9adb87 100644
--- a/test/Assembler/2002-08-15-ConstantExprProblem.ll
+++ b/test/Assembler/2002-08-15-ConstantExprProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 @.LC0 = internal global [12 x i8] c"hello world\00"             ; <[12 x i8]*> [#uses=1]
 
diff --git a/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll b/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
index 5252be266a16..2ba3f14a48e5 100644
--- a/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
+++ b/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 @.LC0 = internal global [12 x i8] c"hello world\00"             ; <[12 x i8]*> [#uses=1]
 
diff --git a/test/Assembler/2002-08-19-BytecodeReader.ll b/test/Assembler/2002-08-19-BytecodeReader.ll
index e42cda0baa07..e211014eb0e8 100644
--- a/test/Assembler/2002-08-19-BytecodeReader.ll
+++ b/test/Assembler/2002-08-19-BytecodeReader.ll
@@ -1,7 +1,7 @@
 ; Testcase that seems to break the bytecode reader.  This comes from the
 ; "crafty" spec benchmark.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | llvm-as
+; RUN: opt < %s -instcombine | llvm-dis
 	
 %CHESS_POSITION = type { i32, i32 }
 @pawn_probes = external global i32		; <i32*> [#uses=0]
diff --git a/test/Assembler/2002-08-22-DominanceProblem.ll b/test/Assembler/2002-08-22-DominanceProblem.ll
index a841dfa83855..0dc192df2356 100644
--- a/test/Assembler/2002-08-22-DominanceProblem.ll
+++ b/test/Assembler/2002-08-22-DominanceProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; Dominance relationships is not calculated correctly for unreachable blocks,
 ; which causes the verifier to barf on this input.
diff --git a/test/Assembler/2002-10-08-LargeArrayPerformance.ll b/test/Assembler/2002-10-08-LargeArrayPerformance.ll
index 2c4cba412b1e..34a993214e92 100644
--- a/test/Assembler/2002-10-08-LargeArrayPerformance.ll
+++ b/test/Assembler/2002-10-08-LargeArrayPerformance.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 ; This testcase comes from the following really simple c file:
 ;; int foo[30000]
 ;;; We should not be soo slow for such a simple case!
diff --git a/test/Assembler/2002-10-15-NameClash.ll b/test/Assembler/2002-10-15-NameClash.ll
index 8ba5ed2507c1..89346cba9be3 100644
--- a/test/Assembler/2002-10-15-NameClash.ll
+++ b/test/Assembler/2002-10-15-NameClash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 declare i32 @"ArrayRef"([100 x i32] * %Array)
 
diff --git a/test/Assembler/2002-12-15-GlobalResolve.ll b/test/Assembler/2002-12-15-GlobalResolve.ll
index da049c4f4320..f9ad12e5478f 100644
--- a/test/Assembler/2002-12-15-GlobalResolve.ll
+++ b/test/Assembler/2002-12-15-GlobalResolve.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 @X = external global i32*
 @X1 = external global %T* 
diff --git a/test/Assembler/2003-01-30-UnsignedString.ll b/test/Assembler/2003-01-30-UnsignedString.ll
index 5eaa9c2c8389..3c14d71621c7 100644
--- a/test/Assembler/2003-01-30-UnsignedString.ll
+++ b/test/Assembler/2003-01-30-UnsignedString.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 @spell_order = global [4 x i8] c"\FF\00\F7\00"
 
diff --git a/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll b/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
index 608eb6ae343c..f1a5ed7b56b2 100644
--- a/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
+++ b/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 ; There should be absolutely no problem with this testcase.
 
 define i32 @test(i32 %arg1, i32 %arg2) {
diff --git a/test/Assembler/2003-05-15-AssemblerProblem.ll b/test/Assembler/2003-05-15-AssemblerProblem.ll
index 17967a936506..146ce6534d70 100644
--- a/test/Assembler/2003-05-15-AssemblerProblem.ll
+++ b/test/Assembler/2003-05-15-AssemblerProblem.ll
@@ -1,6 +1,6 @@
 ; This bug was caused by two CPR's existing for the same global variable, 
 ; colliding in the Module level CPR map.
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 define void @test() {
         call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* null, i32 0 )
diff --git a/test/Assembler/2003-05-15-SwitchBug.ll b/test/Assembler/2003-05-15-SwitchBug.ll
index af42020ca167..3768d9c9a677 100644
--- a/test/Assembler/2003-05-15-SwitchBug.ll
+++ b/test/Assembler/2003-05-15-SwitchBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; Check minimal switch statement
 
diff --git a/test/Assembler/2003-05-21-ConstantShiftExpr.ll b/test/Assembler/2003-05-21-ConstantShiftExpr.ll
index 667bc9b93320..40b96514e045 100644
--- a/test/Assembler/2003-05-21-ConstantShiftExpr.ll
+++ b/test/Assembler/2003-05-21-ConstantShiftExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 ; Test that shift instructions can be used in constant expressions.
 
 global i32 3670016
diff --git a/test/Assembler/2003-05-21-EmptyStructTest.ll b/test/Assembler/2003-05-21-EmptyStructTest.ll
index 6925d2693250..26e83d931c4d 100644
--- a/test/Assembler/2003-05-21-EmptyStructTest.ll
+++ b/test/Assembler/2003-05-21-EmptyStructTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; The old C front-end never generated empty structures, now the new one
 ; can.  For some reason we never handled them in the parser. Weird.
diff --git a/test/Assembler/2003-06-30-RecursiveTypeProblem.ll b/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
index 33f63a621711..5db31140a741 100644
--- a/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
+++ b/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 %MidFnTy = type void (%MidFnTy*)
diff --git a/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll b/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
index 6f31f16778f6..50cdeedd695e 100644
--- a/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
+++ b/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
 
 @.str_1 = internal constant [6 x i8] c"_Bool\00"                ; <[6 x i8]*> [#uses=2]
 
diff --git a/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll b/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
index 10a0280e9060..5fec05d8cbf1 100644
--- a/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
+++ b/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 %T = type i32
 @X = global i32* null           ; <i32**> [#uses=0]
diff --git a/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll b/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
index bdb4d546854c..93f9a7081477 100644
--- a/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
+++ b/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -o /dev/null -f |& grep {use of undefined type named 'struct.D_Scope'}
+; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.D_Scope'}
 ; END.
 
 @d_reduction_0_dparser_gram = global { 
diff --git a/test/Assembler/2004-02-27-SelfUseAssertError.ll b/test/Assembler/2004-02-27-SelfUseAssertError.ll
index ff4c0b43e48a..7052eac5cbd4 100644
--- a/test/Assembler/2004-02-27-SelfUseAssertError.ll
+++ b/test/Assembler/2004-02-27-SelfUseAssertError.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 ; %inc2 uses it's own value, but that's ok, as it's unreachable!
 
diff --git a/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll b/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
index c9363dbcdca9..ab46f887be07 100644
--- a/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
+++ b/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 define i32* @t1({ float, i32 }* %X) {
         %W = getelementptr { float, i32 }* %X, i32 20, i32 1            ; <i32*> [#uses=0]
diff --git a/test/Assembler/2004-10-22-BCWriterUndefBug.ll b/test/Assembler/2004-10-22-BCWriterUndefBug.ll
index 4b2ebeee9342..694b80b78c16 100644
--- a/test/Assembler/2004-10-22-BCWriterUndefBug.ll
+++ b/test/Assembler/2004-10-22-BCWriterUndefBug.ll
@@ -1,5 +1,5 @@
 ;; The bytecode writer was trying to treat undef values as ConstantArray's when
 ;; they looked like strings.
-;; RUN: llvm-as %s -o /dev/null -f
+;; RUN: llvm-as %s -o /dev/null
 @G = internal global [8 x i8] undef
 
diff --git a/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
index 6f264393a598..f9b453b57462 100644
--- a/test/Assembler/2004-11-28-InvalidTypeCrash.ll
+++ b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
@@ -1,4 +1,4 @@
 ; Test for PR463.  This program is erroneous, but should not crash llvm-as.
-; RUN: not llvm-as %s -o /dev/null -f |& grep {invalid type for null constant}
+; RUN: not llvm-as %s -o /dev/null |& grep {invalid type for null constant}
 
 @.FOO  = internal global %struct.none zeroinitializer
diff --git a/test/Assembler/2005-01-31-CallingAggregateFunction.ll b/test/Assembler/2005-01-31-CallingAggregateFunction.ll
index 14045138f811..ce769a2e9d7b 100644
--- a/test/Assembler/2005-01-31-CallingAggregateFunction.ll
+++ b/test/Assembler/2005-01-31-CallingAggregateFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f 
+; RUN: llvm-as %s -o /dev/null
 
 define void @test() {
 	call {i32} @foo()
diff --git a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
index 1962ae70c03d..a39de1cb6cba 100644
--- a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
+++ b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
@@ -1,5 +1,5 @@
 ; The assembler should catch an undefined argument type .
-; RUN: not llvm-as %s -o /dev/null -f |& grep {use of undefined type named 'typedef.bc_struct'}
+; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'typedef.bc_struct'}
 
 ; %typedef.bc_struct = type opaque
 
diff --git a/test/Assembler/2007-01-05-Cmp-ConstExpr.ll b/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
index 8c25989d21a3..e3f67ba13afc 100644
--- a/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
+++ b/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
@@ -1,5 +1,5 @@
 ; Test Case for PR1080
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 @str = internal constant [4 x i8] c"-ga\00"             ; <[4 x i8]*> [#uses=2]
 
diff --git a/test/Assembler/2007-01-16-CrashOnBadCast.ll b/test/Assembler/2007-01-16-CrashOnBadCast.ll
index 33666b8cb2ce..81f5458b2ebe 100644
--- a/test/Assembler/2007-01-16-CrashOnBadCast.ll
+++ b/test/Assembler/2007-01-16-CrashOnBadCast.ll
@@ -1,5 +1,5 @@
 ; PR1117
-; RUN: not llvm-as %s -o /dev/null -f |& grep {invalid cast opcode for cast from}
+; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from}
 
 define i8* @nada(i64 %X) {
     %result = trunc i64 %X to i8*
diff --git a/test/Assembler/2007-01-16-CrashOnBadCast2.ll b/test/Assembler/2007-01-16-CrashOnBadCast2.ll
index 49c539f5024b..c05c60952c59 100644
--- a/test/Assembler/2007-01-16-CrashOnBadCast2.ll
+++ b/test/Assembler/2007-01-16-CrashOnBadCast2.ll
@@ -1,4 +1,4 @@
 ; PR1117
-; RUN: not llvm-as %s -o /dev/null -f |& grep {invalid cast opcode for cast from}
+; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from}
 
 @X = constant i8* trunc (i64 0 to i8*)
diff --git a/test/Assembler/2007-03-18-InvalidNumberedVar.ll b/test/Assembler/2007-03-18-InvalidNumberedVar.ll
index 12bac61124e7..b2193b170130 100644
--- a/test/Assembler/2007-03-18-InvalidNumberedVar.ll
+++ b/test/Assembler/2007-03-18-InvalidNumberedVar.ll
@@ -1,5 +1,5 @@
 ; PR 1258
-; RUN: not llvm-as < %s >/dev/null -f |& grep {'%0' defined with type 'i1'}
+; RUN: not llvm-as < %s >/dev/null |& grep {'%0' defined with type 'i1'}
 
 define i32 @test1(i32 %a, i32 %b) {
 entry:
diff --git a/test/Assembler/2008-02-20-MultipleReturnValue.ll b/test/Assembler/2008-02-20-MultipleReturnValue.ll
index 5b2ed7e8f480..32c893a9f5f1 100644
--- a/test/Assembler/2008-02-20-MultipleReturnValue.ll
+++ b/test/Assembler/2008-02-20-MultipleReturnValue.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -verify | llvm-dis | llvm-as -disable-output
+; RUN: opt < %s -verify -S | llvm-as -disable-output
 
 define {i32, i8} @foo(i32 %p) {
   ret i32 1, i8 2
diff --git a/test/Assembler/2008-09-02-FunctionNotes2.ll b/test/Assembler/2008-09-02-FunctionNotes2.ll
index dbe75be700b7..8a49e8990280 100644
--- a/test/Assembler/2008-09-02-FunctionNotes2.ll
+++ b/test/Assembler/2008-09-02-FunctionNotes2.ll
@@ -1,5 +1,5 @@
 ; Test function notes
-; RUN: not llvm-as %s -o /dev/null -f |& grep "Attributes noinline alwaysinline are incompatible"
+; RUN: not llvm-as %s -o /dev/null |& grep "Attributes noinline alwaysinline are incompatible"
 define void @fn1() alwaysinline  noinline {
   ret void
 }
diff --git a/test/Assembler/2009-02-28-StripOpaqueName.ll b/test/Assembler/2009-02-28-StripOpaqueName.ll
index eef5d3614a81..f61a44cbd15d 100644
--- a/test/Assembler/2009-02-28-StripOpaqueName.ll
+++ b/test/Assembler/2009-02-28-StripOpaqueName.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -strip | llvm-dis | llvm-as | llvm-dis
+; RUN: opt < %s -strip -S | llvm-as | llvm-dis
 
 ; Stripping the name from A should not break references to it.
 %A = type opaque
diff --git a/test/Assembler/2009-07-24-ZeroArgGEP.ll b/test/Assembler/2009-07-24-ZeroArgGEP.ll
new file mode 100644
index 000000000000..2a3d11477cb1
--- /dev/null
+++ b/test/Assembler/2009-07-24-ZeroArgGEP.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as %s -o /dev/null
+
+@foo = global i32 0
+@bar = constant i32* getelementptr(i32* @foo)
+
diff --git a/test/Assembler/ConstantExprFold.ll b/test/Assembler/ConstantExprFold.ll
index 89edc24b37ec..d3d374a07cf1 100644
--- a/test/Assembler/ConstantExprFold.ll
+++ b/test/Assembler/ConstantExprFold.ll
@@ -19,6 +19,7 @@ global i64* inttoptr (i64 xor (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ;
 @B = external global %Ty 
 
 global i1 icmp slt (i64* @A, i64* getelementptr (i64* @A, i64 1))        ; true
+global i1 icmp ult (i64* @A, i64* getelementptr (i64* @A, i64 1))        ; true
 global i1 icmp slt (i64* @A, i64* getelementptr (i64* @A, i64 0))        ; false
 global i1 icmp slt (i32* getelementptr (%Ty* @B, i64 0, i32 0), 
                    i32* getelementptr (%Ty* @B, i64 0, i32 1))            ; true
diff --git a/test/Assembler/anon-functions.ll b/test/Assembler/anon-functions.ll
index e08063e655d6..ac06e8ce3055 100644
--- a/test/Assembler/anon-functions.ll
+++ b/test/Assembler/anon-functions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s |llvm-dis | llvm-as | llvm-dis
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
 ; PR3611
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Assembler/flags.ll b/test/Assembler/flags.ll
new file mode 100644
index 000000000000..324190905975
--- /dev/null
+++ b/test/Assembler/flags.ll
@@ -0,0 +1,212 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+@addr = external global i64
+
+define i64 @add_unsigned(i64 %x, i64 %y) {
+; CHECK: %z = add nuw i64 %x, %y
+	%z = add nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_unsigned(i64 %x, i64 %y) {
+; CHECK: %z = sub nuw i64 %x, %y
+	%z = sub nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_unsigned(i64 %x, i64 %y) {
+; CHECK: %z = mul nuw i64 %x, %y
+	%z = mul nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_signed(i64 %x, i64 %y) {
+; CHECK: %z = add nsw i64 %x, %y
+	%z = add nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_signed(i64 %x, i64 %y) {
+; CHECK: %z = sub nsw i64 %x, %y
+	%z = sub nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_signed(i64 %x, i64 %y) {
+; CHECK: %z = mul nsw i64 %x, %y
+	%z = mul nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_plain(i64 %x, i64 %y) {
+; CHECK: %z = add i64 %x, %y
+	%z = add i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_plain(i64 %x, i64 %y) {
+; CHECK: %z = sub i64 %x, %y
+	%z = sub i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_plain(i64 %x, i64 %y) {
+; CHECK: %z = mul i64 %x, %y
+	%z = mul i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_both(i64 %x, i64 %y) {
+; CHECK: %z = add nuw nsw i64 %x, %y
+	%z = add nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_both(i64 %x, i64 %y) {
+; CHECK: %z = sub nuw nsw i64 %x, %y
+	%z = sub nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_both(i64 %x, i64 %y) {
+; CHECK: %z = mul nuw nsw i64 %x, %y
+	%z = mul nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_both_reversed(i64 %x, i64 %y) {
+; CHECK: %z = add nuw nsw i64 %x, %y
+	%z = add nsw nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_both_reversed(i64 %x, i64 %y) {
+; CHECK: %z = sub nuw nsw i64 %x, %y
+	%z = sub nsw nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_both_reversed(i64 %x, i64 %y) {
+; CHECK: %z = mul nuw nsw i64 %x, %y
+	%z = mul nsw nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sdiv_exact(i64 %x, i64 %y) {
+; CHECK: %z = sdiv exact i64 %x, %y
+	%z = sdiv exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sdiv_plain(i64 %x, i64 %y) {
+; CHECK: %z = sdiv i64 %x, %y
+	%z = sdiv i64 %x, %y
+	ret i64 %z
+}
+
+define i64* @gep_nw(i64* %p, i64 %x) {
+; CHECK: %z = getelementptr inbounds i64* %p, i64 %x
+	%z = getelementptr inbounds i64* %p, i64 %x
+        ret i64* %z
+}
+
+define i64* @gep_plain(i64* %p, i64 %x) {
+; CHECK: %z = getelementptr i64* %p, i64 %x
+	%z = getelementptr i64* %p, i64 %x
+        ret i64* %z
+}
+
+define i64 @add_both_ce() {
+; CHECK: ret i64 add nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_both_ce() {
+; CHECK: ret i64 sub nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_both_ce() {
+; CHECK: ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sdiv_exact_ce() {
+; CHECK: ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64* @gep_nw_ce() {
+; CHECK: ret i64* getelementptr inbounds (i64* @addr, i64 171)
+        ret i64* getelementptr inbounds (i64* @addr, i64 171)
+}
+
+define i64 @add_plain_ce() {
+; CHECK: ret i64 add (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_plain_ce() {
+; CHECK: ret i64 sub (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_plain_ce() {
+; CHECK: ret i64 mul (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sdiv_plain_ce() {
+; CHECK: ret i64 sdiv (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sdiv (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64* @gep_plain_ce() {
+; CHECK: ret i64* getelementptr (i64* @addr, i64 171)
+        ret i64* getelementptr (i64* @addr, i64 171)
+}
+
+define i64 @add_both_reversed_ce() {
+; CHECK: ret i64 add nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_both_reversed_ce() {
+; CHECK: ret i64 sub nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_both_reversed_ce() {
+; CHECK: ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @add_signed_ce() {
+; CHECK: ret i64 add nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_signed_ce() {
+; CHECK: ret i64 sub nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_signed_ce() {
+; CHECK: ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @add_unsigned_ce() {
+; CHECK: ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_unsigned_ce() {
+; CHECK: ret i64 sub nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_unsigned_ce() {
+; CHECK: ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index 10e5011397da..803d6d343063 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -1,11 +1,21 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; Verify that over-indexed getelementptrs are folded.
+@A = external global [2 x [3 x [5 x [7 x i32]]]]
+@B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523)
+; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) ; <i32**> [#uses=0]
+@C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523)
+; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ; <i32**> [#uses=0]
 
 ;; Verify that i16 indices work.
 @x = external global {i32, i32}
 @y = global i32* getelementptr ({i32, i32}* @x, i16 42, i32 0)
+; CHECK: @y = global i32* getelementptr (%0* @x, i16 42, i32 0)
 
 ; see if i92 indices work too.
 define i32 *@test({i32, i32}* %t, i92 %n) {
+; CHECK: @test
+; CHECK: %B = getelementptr %0* %t, i92 %n, i32 0
   %B = getelementptr {i32, i32}* %t, i92 %n, i32 0
   ret i32* %B
 }
diff --git a/test/Assembler/insertextractvalue.ll b/test/Assembler/insertextractvalue.ll
index 3581238aa4c1..2f5521fba872 100644
--- a/test/Assembler/insertextractvalue.ll
+++ b/test/Assembler/insertextractvalue.ll
@@ -21,3 +21,9 @@ define float @dar({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}} zeroinitializer, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}} zeroinitializer, 1, 0)
 }
+
+
+; PR4963
+define <{ i32, i32 }> @test57() {
+  ret <{ i32, i32 }> insertvalue (<{ i32, i32 }> zeroinitializer, i32 4, 1)
+}
diff --git a/test/Assembler/msasm.ll b/test/Assembler/msasm.ll
new file mode 100644
index 000000000000..5e32963abd8e
--- /dev/null
+++ b/test/Assembler/msasm.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define void @test1() nounwind {
+; CHECK: test1
+; CHECK: sideeffect
+; CHECK-NOT: msasm
+	tail call void asm sideeffect "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
+define void @test2() nounwind {
+; CHECK: test2
+; CHECK: sideeffect
+; CHECK: msasm
+	tail call void asm sideeffect msasm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
+define void @test3() nounwind {
+; CHECK: test3
+; CHECK-NOT: sideeffect
+; CHECK: msasm
+	tail call void asm msasm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
+define void @test4() nounwind {
+; CHECK: test4
+; CHECK-NOT: sideeffect
+; CHECK-NOT: msasm
+	tail call void asm  "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
diff --git a/test/Assembler/select.ll b/test/Assembler/select.ll
index b018fbe9c416..2d3f412d256d 100644
--- a/test/Assembler/select.ll
+++ b/test/Assembler/select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 
 define i32 @test(i1 %C, i32 %V1, i32 %V2) {
diff --git a/test/Assembler/unnamed.ll b/test/Assembler/unnamed.ll
new file mode 100644
index 000000000000..fb4fa6244e5a
--- /dev/null
+++ b/test/Assembler/unnamed.ll
@@ -0,0 +1,51 @@
+; RUN: llvm-as < %s | llvm-dis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+module asm "this is an inline asm block"
+module asm "this is another inline asm block"
+
+%0 = type { %1, %2 }
+%1 = type { i32 }
+%2 = type { float, double }
+
+@0 = global i32 0
+@1 = global float 3.0
+@2 = global i8* null
+@3 = global x86_fp80 0xK4001E000000000000000
+
+define float @foo(%0* %p) nounwind {
+  %t = load %0* %p                                ; <%0> [#uses=2]
+  %s = extractvalue %0 %t, 1, 0                   ; <float> [#uses=1]
+  %r = insertvalue %0 %t, double 2.000000e+00, 1, 1; <%0> [#uses=1]
+  store %0 %r, %0* %p
+  ret float %s
+}
+
+define float @bar(%0* %p) nounwind {
+  store %0 { %1 { i32 4 }, %2 { float 4.000000e+00, double 2.000000e+01 } }, %0* %p
+  ret float 7.000000e+00
+}
+
+define float @car(%0* %p) nounwind {
+  store %0 { %1 undef, %2 { float undef, double 2.000000e+01 } }, %0* %p
+  ret float undef
+}
+
+define float @dar(%0* %p) nounwind {
+  store %0 { %1 zeroinitializer, %2 { float 0.000000e+00, double 2.000000e+01 } }, %0* %p
+  ret float 0.000000e+00
+}
+
+define i32* @qqq() {
+  ret i32* @0
+}
+define float* @rrr() {
+  ret float* @1
+}
+define i8** @sss() {
+  ret i8** @2
+}
+define x86_fp80* @nnn() {
+  ret x86_fp80* @3
+}
diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll
index 383c0faf6206..e4d35d9c9828 100644
--- a/test/Assembler/vector-cmp.ll
+++ b/test/Assembler/vector-cmp.ll
@@ -1,16 +1,16 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*vicmp slt}
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*icmp slt}
 ; PR2317
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
 
-define <4 x i32> @foo(<4 x float> %a, <4 x float> %b) nounwind  {
+define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind  {
 entry:
-	%cmp = vfcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
+	ret <4 x i1> %cmp
 }
 
-global <4 x i32> vicmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> ) ;
+global <4 x i1> icmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> ) ;
 
 @B = external global i32;
 
-global <4 x i32> vicmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> ) ;
+global <4 x i1> icmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> ) ;
diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml
index 5a6fde8da24d..e830106c11ff 100644
--- a/test/Bindings/Ocaml/analysis.ml
+++ b/test/Bindings/Ocaml/analysis.ml
@@ -1,4 +1,4 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_analysis.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t
  * RUN: ./%t %t.bc
  *)
 
@@ -8,6 +8,8 @@ open Llvm_analysis
 (* Note that this takes a moment to link, so it's best to keep the number of
    individual tests low. *)
 
+let context = global_context ()
+
 let test x = if not x then exit 1 else ()
 
 let bomb msg =
@@ -15,10 +17,10 @@ let bomb msg =
   exit 2
 
 let _ =
-  let fty = function_type void_type [| |] in
-  let m = create_module "valid_m" in
+  let fty = function_type (void_type context) [| |] in
+  let m = create_module context "valid_m" in
   let fn = define_function "valid_fn" fty m in
-  let at_entry = builder_at_end (entry_block fn) in
+  let at_entry = builder_at_end context (entry_block fn) in
   ignore (build_ret_void at_entry);
   
   
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
index 776228fc1648..5c23041c80d3 100644
--- a/test/Bindings/Ocaml/bitreader.ml
+++ b/test/Bindings/Ocaml/bitreader.ml
@@ -1,4 +1,4 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_bitreader.cma llvm_bitwriter.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: ./%t %t.bc
  * RUN: llvm-dis < %t.bc | grep caml_int_ty
  *)
@@ -6,13 +6,15 @@
 (* Note that this takes a moment to link, so it's best to keep the number of
    individual tests low. *)
 
+let context = Llvm.global_context ()
+
 let test x = if not x then exit 1 else ()
 
 let _ =
   let fn = Sys.argv.(1) in
-  let m = Llvm.create_module "ocaml_test_module" in
+  let m = Llvm.create_module context "ocaml_test_module" in
   
-  ignore (Llvm.define_type_name "caml_int_ty" Llvm.i32_type m);
+  ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
   
   test (Llvm_bitwriter.write_bitcode_file m fn);
   
@@ -22,7 +24,7 @@ let _ =
   begin
     let mb = Llvm.MemoryBuffer.of_file fn in
     begin try
-      let m = Llvm_bitreader.parse_bitcode mb in
+      let m = Llvm_bitreader.parse_bitcode context mb in
       Llvm.dispose_module m
     with x ->
       Llvm.MemoryBuffer.dispose mb;
@@ -43,7 +45,7 @@ let _ =
   begin
     let mb = Llvm.MemoryBuffer.of_file fn in
     let mp = begin try
-      Llvm_bitreader.get_module_provider mb
+      Llvm_bitreader.get_module_provider context mb
     with x ->
       Llvm.MemoryBuffer.dispose mb;
       raise x
@@ -63,7 +65,7 @@ let _ =
     try
       let mb = Llvm.MemoryBuffer.of_file fn in
       let mp = begin try
-        Llvm_bitreader.get_module_provider mb
+        Llvm_bitreader.get_module_provider context mb
       with x ->
         Llvm.MemoryBuffer.dispose mb;
         raise x
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index ec9dbc832997..57caac7cb97d 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,4 +1,4 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_bitwriter.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: ./%t %t.bc
  * RUN: llvm-dis < %t.bc | grep caml_int_ty
  *)
@@ -6,11 +6,13 @@
 (* Note that this takes a moment to link, so it's best to keep the number of
    individual tests low. *)
 
+let context = Llvm.global_context ()
+
 let test x = if not x then exit 1 else ()
 
 let _ =
-  let m = Llvm.create_module "ocaml_test_module" in
+  let m = Llvm.create_module context "ocaml_test_module" in
   
-  ignore (Llvm.define_type_name "caml_int_ty" Llvm.i32_type m);
+  ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
   
   test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1))
diff --git a/test/Bindings/Ocaml/dg.exp b/test/Bindings/Ocaml/dg.exp
new file mode 100644
index 000000000000..fb4bd078e37f
--- /dev/null
+++ b/test/Bindings/Ocaml/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_supports_binding ocaml ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,ml}]]
+}
diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml
index 726a700f0ce3..ce56c50dcb61 100644
--- a/test/Bindings/Ocaml/executionengine.ml
+++ b/test/Bindings/Ocaml/executionengine.ml
@@ -1,4 +1,4 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_target.cma llvm_executionengine.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t
  * RUN: ./%t %t.bc
  *)
 
@@ -9,6 +9,12 @@ open Llvm_target
 (* Note that this takes a moment to link, so it's best to keep the number of
    individual tests low. *)
 
+let context = global_context ()
+let i8_type = Llvm.i8_type context
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+let double_type = Llvm.double_type context
+
 let bomb msg =
   prerr_endline msg;
   exit 2
@@ -19,14 +25,14 @@ let define_main_fn m retval =
     define_function "main" (function_type i32_type [| i32_type;
                                                       str_arr_type;
                                                       str_arr_type |]) m in
-  let b = builder_at_end (entry_block fn) in
+  let b = builder_at_end (global_context ()) (entry_block fn) in
   ignore (build_ret (const_int i32_type retval) b);
   fn
 
 let define_plus m =
   let fn = define_function "plus" (function_type i32_type [| i32_type;
                                                              i32_type |]) m in
-  let b = builder_at_end (entry_block fn) in
+  let b = builder_at_end (global_context ()) (entry_block fn) in
   let add = build_add (param fn 0) (param fn 1) "sum" b in
   ignore (build_ret add b)
 
@@ -52,10 +58,10 @@ let test_genericvalue () =
 
 let test_executionengine () =
   (* create *)
-  let m = create_module "test_module" in
+  let m = create_module (global_context ()) "test_module" in
   let main = define_main_fn m 42 in
   
-  let m2 = create_module "test_module2" in
+  let m2 = create_module (global_context ()) "test_module2" in
   define_plus m2;
   
   let ee = ExecutionEngine.create (ModuleProvider.create m) in
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
index 0a65db996bb4..0a65810105b0 100644
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ b/test/Bindings/Ocaml/scalar_opts.ml
@@ -1,7 +1,7 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_scalar_opts.cma llvm_target.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t
  *)
 
-(* Note: It takes several seconds for ocamlc to link an executable with
+(* Note: It takes several seconds for ocamlopt to link an executable with
          libLLVMCore.a, so it's better to write a big test than a bunch of
          little ones. *)
 
@@ -9,6 +9,8 @@ open Llvm
 open Llvm_scalar_opts
 open Llvm_target
 
+let context = global_context ()
+let void_type = Llvm.void_type context
 
 (* Tiny unit test framework - really just to help find which line is busted *)
 let suite name f =
@@ -19,7 +21,7 @@ let suite name f =
 (*===-- Fixture -----------------------------------------------------------===*)
 
 let filename = Sys.argv.(1)
-let m = create_module filename
+let m = create_module context filename
 let mp = ModuleProvider.create m
 
 
@@ -30,7 +32,7 @@ let test_transforms () =
 
   let fty = function_type void_type [| |] in
   let fn = define_function "fn" fty m in
-  ignore (build_ret_void (builder_at_end (entry_block fn)));
+  ignore (build_ret_void (builder_at_end context (entry_block fn)));
   
   let td = TargetData.create (target_triple m) in
   
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
index e6d08ed6db75..3c3b7339fef8 100644
--- a/test/Bindings/Ocaml/target.ml
+++ b/test/Bindings/Ocaml/target.ml
@@ -1,13 +1,16 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_target.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t
  *)
 
-(* Note: It takes several seconds for ocamlc to link an executable with
+(* Note: It takes several seconds for ocamlopt to link an executable with
          libLLVMCore.a, so it's better to write a big test than a bunch of
          little ones. *)
 
 open Llvm
 open Llvm_target
 
+let context = global_context ()
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
 
 (* Tiny unit test framework - really just to help find which line is busted *)
 let suite name f =
@@ -18,14 +21,14 @@ let suite name f =
 (*===-- Fixture -----------------------------------------------------------===*)
 
 let filename = Sys.argv.(1)
-let m = create_module filename
+let m = create_module context filename
 
 
 (*===-- Target Data -------------------------------------------------------===*)
 
 let test_target_data () =
   let td = TargetData.create (target_triple m) in
-  let sty = struct_type [| i32_type; i64_type |] in
+  let sty = struct_type context [| i32_type; i64_type |] in
   
   ignore (TargetData.as_string td);
   ignore (TargetData.invalidate_struct_layout td sty);
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index 9016d3927f59..9e976d34aa4b 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -1,9 +1,9 @@
-(* RUN: %ocamlc -warn-error A llvm.cma llvm_analysis.cma llvm_bitwriter.cma %s -o %t 2> /dev/null
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: ./%t %t.bc
  * RUN: llvm-dis < %t.bc > %t.ll
  *)
 
-(* Note: It takes several seconds for ocamlc to link an executable with
+(* Note: It takes several seconds for ocamlopt to link an executable with
          libLLVMCore.a, so it's better to write a big test than a bunch of
          little ones. *)
 
@@ -17,6 +17,16 @@ let suite_name = ref ""
 let group_name = ref ""
 let case_num = ref 0
 let print_checkpoints = false
+let context = global_context ()
+let i1_type = Llvm.i1_type context
+let i8_type = Llvm.i8_type context
+let i16_type = Llvm.i16_type context
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+let void_type = Llvm.void_type context
+let float_type = Llvm.float_type context
+let double_type = Llvm.double_type context
+let fp128_type = Llvm.fp128_type context
 
 let group name =
   group_name := !suite_name ^ "/" ^ name;
@@ -47,7 +57,7 @@ let suite name f =
 (*===-- Fixture -----------------------------------------------------------===*)
 
 let filename = Sys.argv.(1)
-let m = create_module filename
+let m = create_module context filename
 let mp = ModuleProvider.create m
 
 
@@ -93,7 +103,7 @@ let test_types () =
   (* RUN: grep {Ty04.*i42} < %t.ll
    *)
   group "i42";
-  let ty = integer_type 42 in
+  let ty = integer_type context 42 in
   insist (define_type_name "Ty04" ty m);
 
   (* RUN: grep {Ty05.*float} < %t.ll
@@ -164,22 +174,22 @@ let test_types () =
   (* RUN: grep {Ty12.*opaque} < %t.ll
    *)
   group "opaque";
-  let ty = opaque_type () in
+  let ty = opaque_type context in
   insist (define_type_name "Ty12" ty m);
   insist (ty == ty);
-  insist (ty <> opaque_type ());
+  insist (ty <> opaque_type context);
   
   (* RUN: grep -v {Ty13} < %t.ll
    *)
   group "delete";
-  let ty = opaque_type () in
+  let ty = opaque_type context in
   insist (define_type_name "Ty13" ty m);
   delete_type_name "Ty13" m;
   
   (* RUN: grep -v {RecursiveTy.*RecursiveTy} < %t.ll
    *)
   group "recursive";
-  let ty = opaque_type () in
+  let ty = opaque_type context in
   let th = handle_to_type ty in
   refine_type ty (pointer_type ty);
   let ty = type_of_handle th in
@@ -212,22 +222,30 @@ let test_constants () =
   ignore (define_global "Const03" c m);
   insist (i64_type = type_of c);
 
+  (* RUN: grep {ConstIntString.*i32.*-1} < %t.ll
+   *)
+  group "int string";
+  let c = const_int_of_string i32_type "-1" 10 in
+  ignore (define_global "ConstIntString" c m);
+  insist (i32_type = type_of c);
+
   (* RUN: grep {Const04.*"cruel\\\\00world"} < %t.ll
    *)
   group "string";
-  let c = const_string "cruel\000world" in
+  let c = const_string context "cruel\000world" in
   ignore (define_global "Const04" c m);
   insist ((array_type i8_type 11) = type_of c);
 
   (* RUN: grep {Const05.*"hi\\\\00again\\\\00"} < %t.ll
    *)
   group "stringz";
-  let c = const_stringz "hi\000again" in
+  let c = const_stringz context "hi\000again" in
   ignore (define_global "Const05" c m);
   insist ((array_type i8_type 9) = type_of c);
 
   (* RUN: grep {ConstSingle.*2.75} < %t.ll
    * RUN: grep {ConstDouble.*3.1459} < %t.ll
+   * RUN: grep {ConstDoubleString.*1.25} < %t.ll
    *)
   begin group "real";
     let cs = const_float float_type 2.75 in
@@ -236,6 +254,10 @@ let test_constants () =
     
     let cd = const_float double_type 3.1459 in
     ignore (define_global "ConstDouble" cd m);
+    insist (double_type = type_of cd);
+
+    let cd = const_float_of_string double_type "1.25" in
+    ignore (define_global "ConstDoubleString" cd m);
     insist (double_type = type_of cd)
   end;
   
@@ -258,20 +280,20 @@ let test_constants () =
                           one; two; one; two |] in
   ignore (define_global "Const08" c m);
   insist ((vector_type i16_type 8) = (type_of c));
-  
+
   (* RUN: grep {Const09.*.i16 1, i16 2, i32 3, i32 4} < %t.ll
    *)
   group "structure";
-  let c = const_struct [| one; two; three; four |] in
+  let c = const_struct context [| one; two; three; four |] in
   ignore (define_global "Const09" c m);
-  insist ((struct_type [| i16_type; i16_type; i32_type; i32_type |])
+  insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
         = (type_of c));
   
   (* RUN: grep {Const10.*zeroinit} < %t.ll
    *)
   group "null";
-  let c = const_null (packed_struct_type [| i1_type; i8_type;
-                                            i64_type; double_type |]) in
+  let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type;
+                                                    double_type |]) in
   ignore (define_global "Const10" c m);
   
   (* RUN: grep {Const11.*-1} < %t.ll
@@ -343,7 +365,7 @@ let test_constants () =
    * RUN: grep {ConstIntToPtr.*inttoptr} < %t.ll
    * RUN: grep {ConstBitCast.*bitcast} < %t.ll
    *)
-  let i128_type = integer_type 128 in
+  let i128_type = integer_type context 128 in
   ignore (define_global "ConstTrunc" (const_trunc (const_add foldbomb five)
                                                i8_type) m);
   ignore (define_global "ConstSExt" (const_sext foldbomb i128_type) m);
@@ -484,7 +506,7 @@ let test_global_variables () =
   insist (is_global_constant g);
   
   begin group "iteration";
-    let m = create_module "temp" in
+    let m = create_module context "temp" in
     
     insist (At_end m = global_begin m);
     insist (At_start m = global_end m);
@@ -544,7 +566,7 @@ let test_functions () =
   let fn = define_function "Fn3" ty m in
   insist (not (is_declaration fn));
   insist (1 = Array.length (basic_blocks fn));
-  ignore (build_unreachable (builder_at_end (entry_block fn)));
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
   (* RUN: grep {define.*Fn4.*Param1.*Param2} < %t.ll
    *)
@@ -558,7 +580,7 @@ let test_functions () =
   insist (i64_type = type_of params.(1));
   set_value_name "Param1" params.(0);
   set_value_name "Param2" params.(1);
-  ignore (build_unreachable (builder_at_end (entry_block fn)));
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
   (* RUN: grep {fastcc.*Fn5} < %t.ll
    *)
@@ -567,7 +589,7 @@ let test_functions () =
   insist (CallConv.c = function_call_conv fn);
   set_function_call_conv CallConv.fast fn;
   insist (CallConv.fast = function_call_conv fn);
-  ignore (build_unreachable (builder_at_end (entry_block fn)));
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
   begin group "gc";
     (* RUN: grep {Fn6.*gc.*shadowstack} < %t.ll
@@ -579,11 +601,11 @@ let test_functions () =
     set_gc None fn;
     insist (None = gc fn);
     set_gc (Some "shadowstack") fn;
-    ignore (build_unreachable (builder_at_end (entry_block fn)));
+    ignore (build_unreachable (builder_at_end context (entry_block fn)));
   end;
   
   begin group "iteration";
-    let m = create_module "temp" in
+    let m = create_module context "temp" in
     
     insist (At_end m = function_begin m);
     insist (At_start m = function_end m);
@@ -613,7 +635,7 @@ let test_functions () =
 
 let test_params () =
   begin group "iteration";
-    let m = create_module "temp" in
+    let m = create_module context "temp" in
     
     let vf = define_function "void" (function_type void_type [| |]) m in
     
@@ -660,31 +682,31 @@ let test_basic_blocks () =
    *)
   group "entry";
   let fn = declare_function "X" ty m in
-  let bb = append_block "Bb1" fn in
+  let bb = append_block context "Bb1" fn in
   insist (bb = entry_block fn);
-  ignore (build_unreachable (builder_at_end bb));
+  ignore (build_unreachable (builder_at_end context bb));
   
   (* RUN: grep -v Bb2 < %t.ll
    *)
   group "delete";
   let fn = declare_function "X2" ty m in
-  let bb = append_block "Bb2" fn in
+  let bb = append_block context "Bb2" fn in
   delete_block bb;
   
   group "insert";
   let fn = declare_function "X3" ty m in
-  let bbb = append_block "b" fn in
-  let bba = insert_block "a" bbb in
+  let bbb = append_block context "b" fn in
+  let bba = insert_block context "a" bbb in
   insist ([| bba; bbb |] = basic_blocks fn);
-  ignore (build_unreachable (builder_at_end bba));
-  ignore (build_unreachable (builder_at_end bbb));
+  ignore (build_unreachable (builder_at_end context bba));
+  ignore (build_unreachable (builder_at_end context bbb));
   
   (* RUN: grep Bb3 < %t.ll
    *)
   group "name/value";
   let fn = define_function "X4" ty m in
   let bb = entry_block fn in
-  ignore (build_unreachable (builder_at_end bb));
+  ignore (build_unreachable (builder_at_end context bb));
   let bbv = value_of_block bb in
   set_value_name "Bb3" bbv;
   insist ("Bb3" = value_name bbv);
@@ -692,20 +714,20 @@ let test_basic_blocks () =
   group "casts";
   let fn = define_function "X5" ty m in
   let bb = entry_block fn in
-  ignore (build_unreachable (builder_at_end bb));
+  ignore (build_unreachable (builder_at_end context bb));
   insist (bb = block_of_value (value_of_block bb));
   insist (value_is_block (value_of_block bb));
   insist (not (value_is_block (const_null i32_type)));
   
   begin group "iteration";
-    let m = create_module "temp" in
+    let m = create_module context "temp" in
     let f = declare_function "Temp" (function_type i32_type [| |]) m in
     
     insist (At_end f = block_begin f);
     insist (At_start f = block_end f);
     
-    let b1 = append_block "One" f in
-    let b2 = append_block "Two" f in
+    let b1 = append_block context "One" f in
+    let b2 = append_block context "Two" f in
     
     insist (Before b1 = block_begin f);
     insist (Before b2 = block_succ b1);
@@ -729,11 +751,11 @@ let test_basic_blocks () =
 
 let test_instructions () =
   begin group "iteration";
-    let m = create_module "temp" in
+    let m = create_module context "temp" in
     let fty = function_type void_type [| i32_type; i32_type |] in
     let f = define_function "f" fty m in
     let bb = entry_block f in
-    let b = builder_at (At_end bb) in
+    let b = builder_at context (At_end bb) in
     
     insist (At_end bb = instr_begin bb);
     insist (At_start bb = instr_end bb);
@@ -766,7 +788,7 @@ let test_builder () =
   
   begin group "parent";
     insist (try
-              ignore (insertion_block (builder ()));
+              ignore (insertion_block (builder context));
               false
             with Not_found ->
               true);
@@ -774,7 +796,7 @@ let test_builder () =
     let fty = function_type void_type [| i32_type |] in
     let fn = define_function "BuilderParent" fty m in
     let bb = entry_block fn in
-    let b = builder_at_end bb in
+    let b = builder_at_end context bb in
     let p = param fn 0 in
     let sum = build_add p p "sum" b in
     ignore (build_ret_void b);
@@ -791,21 +813,21 @@ let test_builder () =
      *)
     let fty = function_type void_type [| |] in
     let fn = declare_function "X6" fty m in
-    let b = builder_at_end (append_block "Bb01" fn) in
+    let b = builder_at_end context (append_block context "Bb01" fn) in
     ignore (build_ret_void b)
   end;
   
   (* The rest of the tests will use one big function. *)
   let fty = function_type i32_type [| i32_type; i32_type |] in
   let fn = define_function "X7" fty m in
-  let atentry = builder_at_end (entry_block fn) in
+  let atentry = builder_at_end context (entry_block fn) in
   let p1 = param fn 0 ++ set_value_name "P1" in
   let p2 = param fn 1 ++ set_value_name "P2" in
   let f1 = build_uitofp p1 float_type "F1" atentry in
   let f2 = build_uitofp p2 float_type "F2" atentry in
   
-  let bb00 = append_block "Bb00" fn in
-  ignore (build_unreachable (builder_at_end bb00));
+  let bb00 = append_block context "Bb00" fn in
+  ignore (build_unreachable (builder_at_end context bb00));
   
   group "ret"; begin
     (* RUN: grep {ret.*P1} < %t.ll
@@ -817,16 +839,16 @@ let test_builder () =
   group "br"; begin
     (* RUN: grep {br.*Bb02} < %t.ll
      *)
-    let bb02 = append_block "Bb02" fn in
-    let b = builder_at_end bb02 in
+    let bb02 = append_block context "Bb02" fn in
+    let b = builder_at_end context bb02 in
     ignore (build_br bb02 b)
   end;
   
   group "cond_br"; begin
     (* RUN: grep {br.*Inst01.*Bb03.*Bb00} < %t.ll
      *)
-    let bb03 = append_block "Bb03" fn in
-    let b = builder_at_end bb03 in
+    let bb03 = append_block context "Bb03" fn in
+    let b = builder_at_end context bb03 in
     let cond = build_trunc p1 i1_type "Inst01" b in
     ignore (build_cond_br cond bb03 bb00 b)
   end;
@@ -835,12 +857,12 @@ let test_builder () =
     (* RUN: grep {switch.*P1.*SwiBlock3} < %t.ll
      * RUN: grep {2,.*SwiBlock2} < %t.ll
      *)
-    let bb1 = append_block "SwiBlock1" fn in
-    let bb2 = append_block "SwiBlock2" fn in
-    ignore (build_unreachable (builder_at_end bb2));
-    let bb3 = append_block "SwiBlock3" fn in
-    ignore (build_unreachable (builder_at_end bb3));
-    let si = build_switch p1 bb3 1 (builder_at_end bb1) in
+    let bb1 = append_block context "SwiBlock1" fn in
+    let bb2 = append_block context "SwiBlock2" fn in
+    ignore (build_unreachable (builder_at_end context bb2));
+    let bb3 = append_block context "SwiBlock3" fn in
+    ignore (build_unreachable (builder_at_end context bb3));
+    let si = build_switch p1 bb3 1 (builder_at_end context bb1) in
     ignore (add_case si (const_int i32_type 2) bb2)
   end;
   
@@ -848,30 +870,30 @@ let test_builder () =
     (* RUN: grep {Inst02.*invoke.*P1.*P2} < %t.ll
      * RUN: grep {to.*Bb04.*unwind.*Bb00} < %t.ll
      *)
-    let bb04 = append_block "Bb04" fn in
-    let b = builder_at_end bb04 in
+    let bb04 = append_block context "Bb04" fn in
+    let b = builder_at_end context bb04 in
     ignore (build_invoke fn [| p1; p2 |] bb04 bb00 "Inst02" b)
   end;
   
   group "unwind"; begin
     (* RUN: grep {unwind} < %t.ll
      *)
-    let bb05 = append_block "Bb05" fn in
-    let b = builder_at_end bb05 in
+    let bb05 = append_block context "Bb05" fn in
+    let b = builder_at_end context bb05 in
     ignore (build_unwind b)
   end;
   
   group "unreachable"; begin
     (* RUN: grep {unreachable} < %t.ll
      *)
-    let bb06 = append_block "Bb06" fn in
-    let b = builder_at_end bb06 in
+    let bb06 = append_block context "Bb06" fn in
+    let b = builder_at_end context bb06 in
     ignore (build_unreachable b)
   end;
   
   group "arithmetic"; begin
-    let bb07 = append_block "Bb07" fn in
-    let b = builder_at_end bb07 in
+    let bb07 = append_block context "Bb07" fn in
+    let b = builder_at_end context bb07 in
     
     (* RUN: grep {Inst03.*add.*P1.*P2} < %t.ll
      * RUN: grep {Inst04.*sub.*P1.*Inst03} < %t.ll
@@ -912,12 +934,12 @@ let test_builder () =
   end;
   
   group "memory"; begin
-    let bb08 = append_block "Bb08" fn in
-    let b = builder_at_end bb08 in
+    let bb08 = append_block context "Bb08" fn in
+    let b = builder_at_end context bb08 in
     
-    (* RUN: grep {Inst20.*malloc.*i8	} < %t.ll
+    (* RUN: grep {Inst20.*malloc.*i8 } < %t.ll
      * RUN: grep {Inst21.*malloc.*i8.*P1} < %t.ll
-     * RUN: grep {Inst22.*alloca.*i32	} < %t.ll
+     * RUN: grep {Inst22.*alloca.*i32 } < %t.ll
      * RUN: grep {Inst23.*alloca.*i32.*P2} < %t.ll
      * RUN: grep {free.*Inst20} < %t.ll
      * RUN: grep {Inst25.*load.*Inst21} < %t.ll
@@ -1021,13 +1043,13 @@ let test_builder () =
   group "phi"; begin
     (* RUN: grep {PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2} < %t.ll
      *)
-    let b1 = append_block "PhiBlock1" fn in
-    let b2 = append_block "PhiBlock2" fn in
+    let b1 = append_block context "PhiBlock1" fn in
+    let b2 = append_block context "PhiBlock2" fn in
     
-    let jb = append_block "PhiJoinBlock" fn in
-    ignore (build_br jb (builder_at_end b1));
-    ignore (build_br jb (builder_at_end b2));
-    let at_jb = builder_at_end jb in
+    let jb = append_block context "PhiJoinBlock" fn in
+    ignore (build_br jb (builder_at_end context b1));
+    ignore (build_br jb (builder_at_end context b2));
+    let at_jb = builder_at_end context jb in
     
     let phi = build_phi [(p1, b1)] "PhiNode" at_jb in
     insist ([(p1, b1)] = incoming phi);
@@ -1042,7 +1064,7 @@ let test_builder () =
 (*===-- Module Provider ---------------------------------------------------===*)
 
 let test_module_provider () =
-  let m = create_module "test" in
+  let m = create_module context "test" in
   let mp = ModuleProvider.create m in
   ModuleProvider.dispose mp
 
@@ -1061,7 +1083,7 @@ let test_pass_manager () =
   begin group "function pass manager";
     let fty = function_type void_type [| |] in
     let fn = define_function "FunctionPassManager" fty m in
-    ignore (build_ret_void (builder_at_end (entry_block fn)));
+    ignore (build_ret_void (builder_at_end context (entry_block fn)));
     
     ignore (PassManager.create_function mp
              ++ PassManager.initialize
diff --git a/test/Bitcode/extractelement.ll b/test/Bitcode/extractelement.ll
index 04cb131f6e7f..d88f811e8e9c 100644
--- a/test/Bitcode/extractelement.ll
+++ b/test/Bitcode/extractelement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis
+; RUN: opt < %s -constprop | llvm-dis
 ; PR3465
 
 define double @test() {
diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll
index fb509b8e8eee..85b95fe57263 100644
--- a/test/Bitcode/memcpy.ll
+++ b/test/Bitcode/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 
 define void @test(i32* %P, i32* %Q) {
 entry:
diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll
new file mode 100644
index 000000000000..1a59ce6f9dfb
--- /dev/null
+++ b/test/Bitcode/metadata-2.ll
@@ -0,0 +1,87 @@
+; RUN: llvm-as < %s | llvm-dis -o /dev/null
+	type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
+	type { i64, %object.ModuleInfo* }		; type %1
+	type { i32, void ()* }		; type %2
+	%"ClassInfo[]" = type { i64, %object.ClassInfo** }
+	%"Interface[]" = type { i64, %object.Interface* }
+	%"ModuleInfo[]" = type { i64, %object.ModuleInfo** }
+	%ModuleReference = type { %ModuleReference*, %object.ModuleInfo* }
+	%"OffsetTypeInfo[]" = type { i64, %object.OffsetTypeInfo* }
+	%"byte[]" = type { i64, i8* }
+	%object.ClassInfo = type { %object.ClassInfo.__vtbl*, i8*, %"byte[]", %"byte[]", %"void*[]", %"Interface[]", %object.ClassInfo*, i8*, i8*, i32, i8*, %"OffsetTypeInfo[]", i8*, %object.TypeInfo* }
+	%object.ClassInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)*, %object.Object* (%object.ClassInfo*)* }
+	%object.Interface = type { %object.ClassInfo*, %"void*[]", i64 }
+	%object.ModuleInfo = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %"ModuleInfo[]", %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }
+	%object.ModuleInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)* }
+	%object.Object = type { %object.ModuleInfo.__vtbl*, i8* }
+	%object.OffsetTypeInfo = type { i64, %object.TypeInfo* }
+	%object.TypeInfo = type { %object.TypeInfo.__vtbl*, i8* }
+	%object.TypeInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)*, i64 (%object.TypeInfo*, i8*)*, i32 (%object.TypeInfo*, i8*, i8*)*, i32 (%object.TypeInfo*, i8*, i8*)*, i64 (%object.TypeInfo*)*, void (%object.TypeInfo*, i8*, i8*)*, %object.TypeInfo* (%object.TypeInfo*)*, %"byte[]" (%object.TypeInfo*)*, i32 (%object.TypeInfo*)*, %"OffsetTypeInfo[]" (%object.TypeInfo*)* }
+	%"void*[]" = type { i64, i8** }
+@_D10ModuleInfo6__vtblZ = external constant %object.ModuleInfo.__vtbl		; <%object.ModuleInfo.__vtbl*> [#uses=1]
+@.str = internal constant [20 x i8] c"tango.core.BitManip\00"		; <[20 x i8]*> [#uses=1]
+@_D5tango4core8BitManip8__ModuleZ = global %0 { %object.ModuleInfo.__vtbl* @_D10ModuleInfo6__vtblZ, i8* null, %"byte[]" { i64 19, i8* getelementptr ([20 x i8]* @.str, i32 0, i32 0) }, %1 zeroinitializer, %"ClassInfo[]" zeroinitializer, i32 4, void ()* null, void ()* null, void ()* null, i8* null, void ()* null }		; <%0*> [#uses=1]
+@_D5tango4core8BitManip11__moduleRefZ = internal global %ModuleReference { %ModuleReference* null, %object.ModuleInfo* bitcast (%0* @_D5tango4core8BitManip8__ModuleZ to %object.ModuleInfo*) }		; <%ModuleReference*> [#uses=2]
+@_Dmodule_ref = external global %ModuleReference*		; <%ModuleReference**> [#uses=2]
+@llvm.global_ctors = appending constant [1 x %2] [%2 { i32 65535, void ()* @_D5tango4core8BitManip16__moduleinfoCtorZ }]		; <[1 x %2]*> [#uses=0]
+
+define fastcc i32 @_D5tango4core8BitManip6popcntFkZi(i32 %x_arg) nounwind readnone {
+entry:
+	%tmp1 = lshr i32 %x_arg, 1		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1431655765		; <i32> [#uses=1]
+	%tmp4 = sub i32 %x_arg, %tmp2		; <i32> [#uses=2]
+	%tmp6 = lshr i32 %tmp4, 2		; <i32> [#uses=1]
+	%tmp7 = and i32 %tmp6, 858993459		; <i32> [#uses=1]
+	%tmp9 = and i32 %tmp4, 858993459		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp7, %tmp9		; <i32> [#uses=2]
+	%tmp12 = lshr i32 %tmp10, 4		; <i32> [#uses=1]
+	%tmp14 = add i32 %tmp12, %tmp10		; <i32> [#uses=1]
+	%tmp16 = and i32 %tmp14, 252645135		; <i32> [#uses=2]
+	%tmp18 = lshr i32 %tmp16, 8		; <i32> [#uses=1]
+	%tmp20 = add i32 %tmp18, %tmp16		; <i32> [#uses=1]
+	%tmp22 = and i32 %tmp20, 16711935		; <i32> [#uses=2]
+	%tmp24 = lshr i32 %tmp22, 16		; <i32> [#uses=1]
+	%tmp26 = add i32 %tmp24, %tmp22		; <i32> [#uses=1]
+	%tmp28 = and i32 %tmp26, 65535		; <i32> [#uses=1]
+	ret i32 %tmp28
+}
+
+define fastcc i32 @_D5tango4core8BitManip7bitswapFkZk(i32 %x_arg) nounwind readnone {
+entry:
+	%tmp1 = lshr i32 %x_arg, 1		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1431655765		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x_arg, 1		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, -1431655766		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=2]
+	%tmp8 = lshr i32 %tmp6, 2		; <i32> [#uses=1]
+	%tmp9 = and i32 %tmp8, 858993459		; <i32> [#uses=1]
+	%tmp11 = shl i32 %tmp6, 2		; <i32> [#uses=1]
+	%tmp12 = and i32 %tmp11, -858993460		; <i32> [#uses=1]
+	%tmp13 = or i32 %tmp9, %tmp12		; <i32> [#uses=2]
+	%tmp15 = lshr i32 %tmp13, 4		; <i32> [#uses=1]
+	%tmp16 = and i32 %tmp15, 252645135		; <i32> [#uses=1]
+	%tmp18 = shl i32 %tmp13, 4		; <i32> [#uses=1]
+	%tmp19 = and i32 %tmp18, -252645136		; <i32> [#uses=1]
+	%tmp20 = or i32 %tmp16, %tmp19		; <i32> [#uses=2]
+	%tmp22 = lshr i32 %tmp20, 8		; <i32> [#uses=1]
+	%tmp23 = and i32 %tmp22, 16711935		; <i32> [#uses=1]
+	%tmp25 = shl i32 %tmp20, 8		; <i32> [#uses=1]
+	%tmp26 = and i32 %tmp25, -16711936		; <i32> [#uses=1]
+	%tmp27 = or i32 %tmp23, %tmp26		; <i32> [#uses=2]
+	%tmp29 = lshr i32 %tmp27, 16		; <i32> [#uses=1]
+	%tmp31 = shl i32 %tmp27, 16		; <i32> [#uses=1]
+	%tmp32 = or i32 %tmp29, %tmp31		; <i32> [#uses=1]
+	ret i32 %tmp32
+}
+
+define internal void @_D5tango4core8BitManip16__moduleinfoCtorZ() nounwind {
+moduleinfoCtorEntry:
+	%current = load %ModuleReference** @_Dmodule_ref		; <%ModuleReference*> [#uses=1]
+	store %ModuleReference* %current, %ModuleReference** getelementptr (%ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, i32 0, i32 0)
+	store %ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, %ModuleReference** @_Dmodule_ref
+	ret void
+}
+!llvm.ldc.classinfo._D6Object7__ClassZ = !{!0}
+!llvm.ldc.classinfo._D10ModuleInfo7__ClassZ = !{!1}
+!0 = metadata !{%object.Object undef, i1 false, i1 false}
+!1 = metadata !{%object.ModuleInfo undef, i1 false, i1 false}
diff --git a/test/Bitcode/metadata.ll b/test/Bitcode/metadata.ll
new file mode 100644
index 000000000000..19db3eac2160
--- /dev/null
+++ b/test/Bitcode/metadata.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis -o /dev/null
+
+!llvm.foo = !{!0}
+!0 = metadata !{i32 42}
+@my.str = internal constant [4 x i8] c"foo\00"
+
diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll
index 3ee9f8aa0ebb..6ad09d2e25cd 100644
--- a/test/BugPoint/crash-narrowfunctiontest.ll
+++ b/test/BugPoint/crash-narrowfunctiontest.ll
@@ -1,6 +1,6 @@
 ; Test that bugpoint can narrow down the testcase to the important function
 ;
-; RUN: bugpoint %s -bugpoint-crashcalls -silence-passes > /dev/null
+; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
 
 define i32 @foo() { ret i32 1 }
 
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index e5655974f7e5..fb17c78a140b 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -1,4 +1,4 @@
-; RUN: bugpoint %s -bugpoint-crashcalls -silence-passes
+; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes
 
 ; Test to make sure that arguments are removed from the function if they are 
 ; unnecessary.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 000000000000..627b57d85634
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,31 @@
+include(GetTargetTriple)
+get_target_triple(target)
+
+foreach(c ${LLVM_TARGETS_TO_BUILD})
+  set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
+endforeach(c)
+set(TARGETS_TO_BUILD ${TARGETS_BUILT})
+
+include(FindPythonInterp)
+if(PYTHONINTERP_FOUND)
+  get_target_property(LLVM_TOOLS_PATH llvm-config RUNTIME_OUTPUT_DIRECTORY)
+
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
+    ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
+
+  add_custom_target(llvm-test
+    COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#"
+                -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#"
+                -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_PATH}/${CMAKE_CFG_INTDIR}#"
+                -e "s#\@LLVMGCC_DIR\@##"
+                ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in >
+                ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    COMMAND ${PYTHON_EXECUTABLE} 
+                ${LLVM_SOURCE_DIR}/utils/lit/lit.py
+                -sv
+                ${CMAKE_CURRENT_BINARY_DIR}
+                DEPENDS
+                COMMENT "Running LLVM regression tests")
+
+endif()  
diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index caa9a981fc6a..a0235f787061 100644
--- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 %struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }
 @ld = external global %struct.layer_data*               ; <%struct.layer_data**> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 6e11b1691018..81483cb4e7c5 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
index 7317e62e3182..83b26d340062 100644
--- a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
 	br label %bb
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 07390add5538..33f935e960b1 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
 ; | grep 35
 
diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index 32daf839f0fc..b0953dc8b61f 100644
--- a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
 ; PR1257
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
index 6d3f6404af84..d741112e2886 100644
--- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR1266
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
index f927ef43ca19..e4635f50279d 100644
--- a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1279
 
 	%struct.rtx_def = type { i16, i8, i8, %struct.u }
diff --git a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
index 55d29933a55c..ea27676a9f0f 100644
--- a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1279
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
index ef5a1ae40459..f24def31f97a 100644
--- a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-apple-darwin
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin
 
 	%struct.H_TBL = type { [17 x i8], [256 x i8], i32 }
 	%struct.Q_TBL = type { [64 x i16], i32 }
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index e412127eae7b..b543c57e1a85 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep {add.*#0}
+; RUN: llc < %s -march=arm | not grep {add.*#0}
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index 42f5034c70a7..e001cde8351b 100644
--- a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
 ; RUN:   not grep LPC9
 
 	%struct.B = type { i32 }
diff --git a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
index ec70a596bc3a..a89e937d3e10 100644
--- a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "arm-apple-darwin8"
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index f3f82bc4846f..c73b6793da0f 100644
--- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.Connection = type { i32, [10 x i8], i32 }
 	%struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
index 11431be9c28c..26864f18a69c 100644
--- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
+++ b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep 1_0
+; RUN: llc < %s | not grep 1_0
 ; This used to create an extra branch to 'entry', LBB1_0.
 
 ; ModuleID = 'bug.bc'
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index c3596e7c7b4f..f2a8ee1a1424 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 41ab1e52f674..275850581154 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
index 58c5f89c619d..b3b0769347f1 100644
--- a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
+++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 define i32 @test3() {
 	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
diff --git a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
index 430b3689c0b4..7b15ded44799 100644
--- a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1406
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 4c4a9336fd91..061bf5e851b0 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=arm | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
 ; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index de32a26ae9cf..d2eb85d356c5 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep {str.*\\!}
+; RUN: llc < %s -march=arm | not grep {str.*\\!}
 
 	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
 	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
index d21a8f209e96..030486a7c983 100644
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR1424
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
index 3cfcdef48f4b..30b72e09a114 100644
--- a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
+++ b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
 ; PR1609
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index ec170f8eac5b..ff015065ef01 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -regalloc=local
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local
 ; PR1925
 
 	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index b81d5759b6cf..06bc98746076 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local
 ; PR1925
 
 	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index ca34275f79f4..a604c5cd574e 100644
--- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | not grep 255
+; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 70f1774b4c52..78c622237563 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 @accum = external global { double, double }		; <{ double, double }*> [#uses=1]
 @.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
index 610f5ea7cd05..234c7b69e3e7 100644
--- a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 @numBinsY = external global i32		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 80ccddfcd735..77418be38084 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
index 3cd757fa62ad..33bd4def5b49 100644
--- a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
+++ b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
 
diff --git a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
index 035af08cd40a..71aa6037a137 100644
--- a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
+++ b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.BiContextType = type { i16, i8, i32 }
 	%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
diff --git a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
index e98126bf87aa..aa61d86e1389 100644
--- a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** }
 @decoders = external global %struct.Decoders		; <%struct.Decoders*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
index aa75970418a6..4cb768ef5b6d 100644
--- a/test/CodeGen/ARM/2008-07-17-Fdiv.ll
+++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define float @f(float %a, float %b) nounwind  {
 	%tmp = fdiv float %a, %b
diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index 6ea75eb5c79c..83fde07779bc 100644
--- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR2589
 
 define void @main({ i32 }*) {
diff --git a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
index 0a79e8665a75..adb011277604 100644
--- a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
+++ b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
diff --git a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
index c601b90e0710..5f9d9aea58dd 100644
--- a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 @"\01LC1" = external constant [288 x i8]		; <[288 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
index b3ea6fc5945f..d3bc3e1663bc 100644
--- a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define void @gcov_exit() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
index 164e9643f170..601a516eb09a 100644
--- a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
index 3f17a5150fbe..35ca7b4c9af2 100644
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 154
 
 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index 48e663dd8067..4c0c59ccfbc6 100644
--- a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 target triple = "arm-apple-darwin9"
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
diff --git a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
index d7befa098748..a48f0033acc8 100644
--- a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
+++ b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3610
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "arm-elf"
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index bd5b71959442..bc5e6023409f 100644
--- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 target triple = "arm-apple-darwin9"
 @a = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 399ed3081f20..0ec17ae23d69 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin9 -mattr=+vfp2
+; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
 ; rdar://6653182
 
 	%struct.ggBRDF = type { i32 (...)** }
diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
index 0ec6d7d4ff73..a1ce384b5345 100644
--- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
+++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 	%struct.hit_t = type { %struct.v_t, double }
 	%struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 11c05c6ea7b3..352672274d20 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {swi 107}
+; RUN: llc < %s -march=arm | grep {swi 107}
 
 define i32 @_swilseek(i32) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index c00b1fb98606..f6b3d2c0147b 100644
--- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR3795
 
 define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
index c7e343c89203..99907fc697bd 100644
--- a/test/CodeGen/ARM/2009-04-08-FREM.ll
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 declare i32 @printf(i8*, ...)
 
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index f394847362f9..05d2f26be0b7 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index 223fa0f435c9..deb092bbf86e 100644
--- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR3954
 
 define void @foo(...) nounwind {
diff --git a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
index 2bca6e62fc30..670d2045f8ec 100644
--- a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
+; RUN: llc < %s -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
 ; PR4166
 
 	%"byte[]" = type { i32, i8* }
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
index d03b7ce87539..75610ffecec2 100644
--- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
 ; PR4100
 @.str = external constant [30 x i8]		; <[30 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 35d4306e9d14..7046fccb5ee9 100644
--- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 	%struct.List = type { %struct.List*, i32 }
 @Node5 = external constant %struct.List		; <%struct.List*> [#uses=1]
 @"\01LC" = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
index f942c9fc2216..1e2707f7b5bb 100644
--- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
+++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep swp
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=thumb | FileCheck %s
 ; PR4091
 
 define void @foo(i32 %i, i32* %p) nounwind {
+;CHECK: swp r2, r0, [r1]
 	%asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind
 	ret void
 }
diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
index 7cd35b9557d0..403e3f6509f3 100644
--- a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
+++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
 
 @"\01LC" = external constant [15 x i8]		; <[15 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
index 5eaae7aa9b46..98e002302558 100644
--- a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6
 
 	%struct.anon = type { i16, i16 }
 	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
index 45b4bd48f516..27888d75f67a 100644
--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
 	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
 	type { i32, %struct.D_Reduction** }		; type %1
diff --git a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
index c715a189287a..a0f903b0bdf5 100644
--- a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
   %struct.term = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
index cbe2385ab27a..b56b68447360 100644
--- a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
 ; PR4419
 
 define float @__ieee754_acosf(float %x) nounwind {
diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
index 5c8d7b0f6220..e068be74bae4 100644
--- a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
 	%struct.rtunion = type { i64 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 27cad7ccf6b7..17efe0035419 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @nn = external global i32		; <i32*> [#uses=1]
 @al_len = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index 3a14d67247b9..f520be3946ae 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @no_mat = external global i32		; <i32*> [#uses=1]
 @no_mis = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
index f94b59dc91bd..eee6ff98c610 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @JJ = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
index bca7f793eef4..93c92b1c93f4 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @r = external global i32		; <i32*> [#uses=1]
 @qr = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
index 0c90592f1d21..277283dc0889 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @XX = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
index dfccefcac7a0..5c0e5fa57b9f 100644
--- a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
+++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @qr = external global i32		; <i32*> [#uses=1]
 @II = external global i32*		; <i32**> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
new file mode 100644
index 000000000000..e1e94b641214
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define void @test(i8* %x) nounwind {
+entry:
+	call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind
+	ret void
+}
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
new file mode 100644
index 000000000000..ee93fde998c1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -0,0 +1,1323 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13
+
+	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+	%struct.VEC2 = type { double, double, double }
+	%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+	%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+@avail_edge = internal global %struct.edge_rec* null		; <%struct.edge_rec**> [#uses=6]
+@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+entry:
+	%delright = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%delleft = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%0 = icmp eq %struct.VERTEX* %tree, null		; <i1> [#uses=1]
+	br i1 %0, label %bb8, label %bb
+
+bb:		; preds = %entry
+	%1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
+	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
+	br i1 %3, label %bb7, label %bb1.i
+
+bb1.i:		; preds = %bb1.i, %bb
+	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
+	%4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
+	br i1 %6, label %get_low.exit, label %bb1.i
+
+get_low.exit:		; preds = %bb1.i
+	call arm_apcscc  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
+	%7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	call arm_apcscc  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
+	%9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.i, %get_low.exit
+	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
+	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
+	%17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
+	%20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%23 = load double* %22, align 4		; <double> [#uses=3]
+	br label %bb2.i
+
+bb1.i1:		; preds = %bb2.i
+	%24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32		; <i32> [#uses=2]
+	%25 = add i32 %24, 48		; <i32> [#uses=1]
+	%26 = and i32 %25, 63		; <i32> [#uses=1]
+	%27 = and i32 %24, -64		; <i32> [#uses=1]
+	%28 = or i32 %26, %27		; <i32> [#uses=1]
+	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
+	%33 = add i32 %32, 16		; <i32> [#uses=1]
+	%34 = and i32 %33, 63		; <i32> [#uses=1]
+	%35 = and i32 %32, -64		; <i32> [#uses=1]
+	%36 = or i32 %34, %35		; <i32> [#uses=2]
+	%37 = inttoptr i32 %36 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	br label %bb2.i
+
+bb2.i:		; preds = %bb1.i1, %bb.i
+	%ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ]		; <i32> [#uses=1]
+	%ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn6.in.i = xor i32 %.pn6.in.in.i, 32		; <i32> [#uses=1]
+	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%46 = fsub double %39, %21		; <double> [#uses=1]
+	%47 = fsub double %45, %23		; <double> [#uses=1]
+	%48 = fmul double %46, %47		; <double> [#uses=1]
+	%49 = fsub double %43, %21		; <double> [#uses=1]
+	%50 = fsub double %41, %23		; <double> [#uses=1]
+	%51 = fmul double %49, %50		; <double> [#uses=1]
+	%52 = fsub double %48, %51		; <double> [#uses=1]
+	%53 = fcmp ogt double %52, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %53, label %bb1.i1, label %bb3.i
+
+bb3.i:		; preds = %bb2.i
+	%54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32		; <i32> [#uses=1]
+	%55 = xor i32 %54, 32		; <i32> [#uses=3]
+	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%63 = fsub double %60, %39		; <double> [#uses=1]
+	%64 = fsub double %23, %41		; <double> [#uses=1]
+	%65 = fmul double %63, %64		; <double> [#uses=1]
+	%66 = fsub double %21, %39		; <double> [#uses=1]
+	%67 = fsub double %62, %41		; <double> [#uses=1]
+	%68 = fmul double %66, %67		; <double> [#uses=1]
+	%69 = fsub double %65, %68		; <double> [#uses=1]
+	%70 = fcmp ogt double %69, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %70, label %bb4.i, label %bb5.i
+
+bb4.i:		; preds = %bb3.i
+	%71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb.i
+
+bb5.i:		; preds = %bb3.i
+	%73 = add i32 %55, 48		; <i32> [#uses=1]
+	%74 = and i32 %73, 63		; <i32> [#uses=1]
+	%75 = and i32 %55, -64		; <i32> [#uses=1]
+	%76 = or i32 %74, %75		; <i32> [#uses=1]
+	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
+	%81 = add i32 %80, 16		; <i32> [#uses=1]
+	%82 = and i32 %81, 63		; <i32> [#uses=1]
+	%83 = and i32 %80, -64		; <i32> [#uses=1]
+	%84 = or i32 %82, %83		; <i32> [#uses=1]
+	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%88 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
+	%89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
+	%90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0		; <%struct.VERTEX**> [#uses=2]
+	store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
+	%91 = ptrtoint %struct.edge_rec* %88 to i32		; <i32> [#uses=5]
+	%92 = add i32 %91, 16		; <i32> [#uses=2]
+	%93 = inttoptr i32 %92 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%94 = add i32 %91, 48		; <i32> [#uses=1]
+	%95 = inttoptr i32 %94 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+	%97 = add i32 %91, 32		; <i32> [#uses=1]
+	%98 = inttoptr i32 %97 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
+	%100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
+	%101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
+	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
+	%104 = add i32 %103, 16		; <i32> [#uses=1]
+	%105 = and i32 %104, 63		; <i32> [#uses=1]
+	%106 = and i32 %103, -64		; <i32> [#uses=1]
+	%107 = or i32 %105, %106		; <i32> [#uses=1]
+	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
+	%112 = add i32 %111, 16		; <i32> [#uses=1]
+	%113 = and i32 %112, 63		; <i32> [#uses=1]
+	%114 = and i32 %111, -64		; <i32> [#uses=1]
+	%115 = or i32 %113, %114		; <i32> [#uses=1]
+	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
+	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
+	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
+	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+	%123 = xor i32 %91, 32		; <i32> [#uses=1]
+	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
+	%128 = add i32 %127, 16		; <i32> [#uses=1]
+	%129 = and i32 %128, 63		; <i32> [#uses=1]
+	%130 = and i32 %127, -64		; <i32> [#uses=1]
+	%131 = or i32 %129, %130		; <i32> [#uses=1]
+	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
+	%136 = add i32 %135, 16		; <i32> [#uses=1]
+	%137 = and i32 %136, 63		; <i32> [#uses=1]
+	%138 = and i32 %135, -64		; <i32> [#uses=1]
+	%139 = or i32 %137, %138		; <i32> [#uses=1]
+	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
+	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
+	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
+	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+	%147 = and i32 %92, 63		; <i32> [#uses=1]
+	%148 = and i32 %91, -64		; <i32> [#uses=1]
+	%149 = or i32 %147, %148		; <i32> [#uses=1]
+	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
+	%154 = add i32 %153, 16		; <i32> [#uses=1]
+	%155 = and i32 %154, 63		; <i32> [#uses=1]
+	%156 = and i32 %153, -64		; <i32> [#uses=1]
+	%157 = or i32 %155, %156		; <i32> [#uses=1]
+	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
+	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
+	%165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
+	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
+	br label %bb9.i
+
+bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
+	%lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
+	%168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
+	%173 = xor i32 %172, 32		; <i32> [#uses=1]
+	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
+	%178 = xor i32 %177, 32		; <i32> [#uses=1]
+	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%194 = fsub double %183, %191		; <double> [#uses=1]
+	%195 = fsub double %189, %193		; <double> [#uses=1]
+	%196 = fmul double %194, %195		; <double> [#uses=1]
+	%197 = fsub double %187, %191		; <double> [#uses=1]
+	%198 = fsub double %185, %193		; <double> [#uses=1]
+	%199 = fmul double %197, %198		; <double> [#uses=1]
+	%200 = fsub double %196, %199		; <double> [#uses=1]
+	%201 = fcmp ogt double %200, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %201, label %bb10.i, label %bb13.i
+
+bb10.i:		; preds = %bb9.i
+	%202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb12.i
+
+bb11.i:		; preds = %bb12.i
+	%203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32		; <i32> [#uses=3]
+	%204 = add i32 %203, 16		; <i32> [#uses=1]
+	%205 = and i32 %204, 63		; <i32> [#uses=1]
+	%206 = and i32 %203, -64		; <i32> [#uses=3]
+	%207 = or i32 %205, %206		; <i32> [#uses=1]
+	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
+	%212 = add i32 %211, 16		; <i32> [#uses=1]
+	%213 = and i32 %212, 63		; <i32> [#uses=1]
+	%214 = and i32 %211, -64		; <i32> [#uses=1]
+	%215 = or i32 %213, %214		; <i32> [#uses=1]
+	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
+	%220 = add i32 %219, 16		; <i32> [#uses=1]
+	%221 = and i32 %220, 63		; <i32> [#uses=1]
+	%222 = and i32 %219, -64		; <i32> [#uses=1]
+	%223 = or i32 %221, %222		; <i32> [#uses=1]
+	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
+	%228 = add i32 %227, 16		; <i32> [#uses=1]
+	%229 = and i32 %228, 63		; <i32> [#uses=1]
+	%230 = and i32 %227, -64		; <i32> [#uses=1]
+	%231 = or i32 %229, %230		; <i32> [#uses=1]
+	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
+	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
+	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
+	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+	%239 = xor i32 %203, 32		; <i32> [#uses=2]
+	%240 = add i32 %239, 16		; <i32> [#uses=1]
+	%241 = and i32 %240, 63		; <i32> [#uses=1]
+	%242 = or i32 %241, %206		; <i32> [#uses=1]
+	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
+	%247 = add i32 %246, 16		; <i32> [#uses=1]
+	%248 = and i32 %247, 63		; <i32> [#uses=1]
+	%249 = and i32 %246, -64		; <i32> [#uses=1]
+	%250 = or i32 %248, %249		; <i32> [#uses=1]
+	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
+	%256 = add i32 %255, 16		; <i32> [#uses=1]
+	%257 = and i32 %256, 63		; <i32> [#uses=1]
+	%258 = and i32 %255, -64		; <i32> [#uses=1]
+	%259 = or i32 %257, %258		; <i32> [#uses=1]
+	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
+	%264 = add i32 %263, 16		; <i32> [#uses=1]
+	%265 = and i32 %264, 63		; <i32> [#uses=1]
+	%266 = and i32 %263, -64		; <i32> [#uses=1]
+	%267 = or i32 %265, %266		; <i32> [#uses=1]
+	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
+	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
+	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
+	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
+	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
+	%277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	br label %bb12.i
+
+bb12.i:		; preds = %bb11.i, %bb10.i
+	%avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]		; <%struct.edge_rec*> [#uses=2]
+	%279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ]		; <double> [#uses=3]
+	%280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ]		; <double> [#uses=3]
+	%lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn5.in.i = xor i32 %.pn5.in.in.i, 32		; <i32> [#uses=1]
+	%.pn4.in.i = xor i32 %.pn4.in.in.i, 32		; <i32> [#uses=1]
+	%.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%284 = fsub double %283, %280		; <double> [#uses=2]
+	%285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%287 = fsub double %286, %279		; <double> [#uses=2]
+	%288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%292 = fsub double %291, %280		; <double> [#uses=2]
+	%293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%295 = fsub double %294, %279		; <double> [#uses=2]
+	%296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%300 = fsub double %299, %280		; <double> [#uses=2]
+	%301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%303 = fsub double %302, %279		; <double> [#uses=2]
+	%304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%306 = fsub double %289, %281		; <double> [#uses=1]
+	%307 = fmul double %292, %303		; <double> [#uses=1]
+	%308 = fmul double %295, %300		; <double> [#uses=1]
+	%309 = fsub double %307, %308		; <double> [#uses=1]
+	%310 = fmul double %306, %309		; <double> [#uses=1]
+	%311 = fsub double %297, %281		; <double> [#uses=1]
+	%312 = fmul double %300, %287		; <double> [#uses=1]
+	%313 = fmul double %303, %284		; <double> [#uses=1]
+	%314 = fsub double %312, %313		; <double> [#uses=1]
+	%315 = fmul double %311, %314		; <double> [#uses=1]
+	%316 = fadd double %315, %310		; <double> [#uses=1]
+	%317 = fsub double %305, %281		; <double> [#uses=1]
+	%318 = fmul double %284, %295		; <double> [#uses=1]
+	%319 = fmul double %287, %292		; <double> [#uses=1]
+	%320 = fsub double %318, %319		; <double> [#uses=1]
+	%321 = fmul double %317, %320		; <double> [#uses=1]
+	%322 = fadd double %321, %316		; <double> [#uses=1]
+	%323 = fcmp ogt double %322, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %323, label %bb11.i, label %bb13.loopexit.i
+
+bb13.loopexit.i:		; preds = %bb12.i
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
+	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb13.i
+
+bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
+	%324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]		; <%struct.VERTEX*> [#uses=4]
+	%325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]		; <%struct.VERTEX*> [#uses=3]
+	%lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]		; <%struct.edge_rec*> [#uses=3]
+	%326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32		; <i32> [#uses=2]
+	%327 = add i32 %326, 16		; <i32> [#uses=1]
+	%328 = and i32 %327, 63		; <i32> [#uses=1]
+	%329 = and i32 %326, -64		; <i32> [#uses=1]
+	%330 = or i32 %328, %329		; <i32> [#uses=1]
+	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
+	%335 = add i32 %334, 16		; <i32> [#uses=1]
+	%336 = and i32 %335, 63		; <i32> [#uses=1]
+	%337 = and i32 %334, -64		; <i32> [#uses=1]
+	%338 = or i32 %336, %337		; <i32> [#uses=3]
+	%339 = xor i32 %338, 32		; <i32> [#uses=1]
+	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%355 = fsub double %344, %352		; <double> [#uses=1]
+	%356 = fsub double %350, %354		; <double> [#uses=1]
+	%357 = fmul double %355, %356		; <double> [#uses=1]
+	%358 = fsub double %348, %352		; <double> [#uses=1]
+	%359 = fsub double %346, %354		; <double> [#uses=1]
+	%360 = fmul double %358, %359		; <double> [#uses=1]
+	%361 = fsub double %357, %360		; <double> [#uses=1]
+	%362 = fcmp ogt double %361, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %362, label %bb14.i, label %bb17.i
+
+bb14.i:		; preds = %bb13.i
+	%363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb16.i
+
+bb15.i:		; preds = %bb16.i
+	%364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32		; <i32> [#uses=3]
+	%365 = add i32 %364, 16		; <i32> [#uses=1]
+	%366 = and i32 %365, 63		; <i32> [#uses=1]
+	%367 = and i32 %364, -64		; <i32> [#uses=3]
+	%368 = or i32 %366, %367		; <i32> [#uses=1]
+	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
+	%373 = add i32 %372, 16		; <i32> [#uses=1]
+	%374 = and i32 %373, 63		; <i32> [#uses=1]
+	%375 = and i32 %372, -64		; <i32> [#uses=1]
+	%376 = or i32 %374, %375		; <i32> [#uses=1]
+	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
+	%381 = add i32 %380, 16		; <i32> [#uses=1]
+	%382 = and i32 %381, 63		; <i32> [#uses=1]
+	%383 = and i32 %380, -64		; <i32> [#uses=1]
+	%384 = or i32 %382, %383		; <i32> [#uses=1]
+	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
+	%389 = add i32 %388, 16		; <i32> [#uses=1]
+	%390 = and i32 %389, 63		; <i32> [#uses=1]
+	%391 = and i32 %388, -64		; <i32> [#uses=1]
+	%392 = or i32 %390, %391		; <i32> [#uses=1]
+	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
+	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
+	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
+	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+	%400 = xor i32 %364, 32		; <i32> [#uses=2]
+	%401 = add i32 %400, 16		; <i32> [#uses=1]
+	%402 = and i32 %401, 63		; <i32> [#uses=1]
+	%403 = or i32 %402, %367		; <i32> [#uses=1]
+	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
+	%408 = add i32 %407, 16		; <i32> [#uses=1]
+	%409 = and i32 %408, 63		; <i32> [#uses=1]
+	%410 = and i32 %407, -64		; <i32> [#uses=1]
+	%411 = or i32 %409, %410		; <i32> [#uses=1]
+	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
+	%417 = add i32 %416, 16		; <i32> [#uses=1]
+	%418 = and i32 %417, 63		; <i32> [#uses=1]
+	%419 = and i32 %416, -64		; <i32> [#uses=1]
+	%420 = or i32 %418, %419		; <i32> [#uses=1]
+	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
+	%425 = add i32 %424, 16		; <i32> [#uses=1]
+	%426 = and i32 %425, 63		; <i32> [#uses=1]
+	%427 = and i32 %424, -64		; <i32> [#uses=1]
+	%428 = or i32 %426, %427		; <i32> [#uses=1]
+	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
+	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
+	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
+	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
+	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+	%438 = add i32 %t.1.in.i, 16		; <i32> [#uses=1]
+	%439 = and i32 %438, 63		; <i32> [#uses=1]
+	%440 = and i32 %t.1.in.i, -64		; <i32> [#uses=1]
+	%441 = or i32 %439, %440		; <i32> [#uses=1]
+	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
+	%446 = add i32 %445, 16		; <i32> [#uses=1]
+	%447 = and i32 %446, 63		; <i32> [#uses=1]
+	%448 = and i32 %445, -64		; <i32> [#uses=1]
+	%449 = or i32 %447, %448		; <i32> [#uses=2]
+	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	br label %bb16.i
+
+bb16.i:		; preds = %bb15.i, %bb14.i
+	%avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]		; <%struct.edge_rec*> [#uses=2]
+	%450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ]		; <double> [#uses=3]
+	%451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ]		; <double> [#uses=3]
+	%rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=3]
+	%.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=1]
+	%.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn3.in.i = xor i32 %.pn3.in.in.i, 32		; <i32> [#uses=1]
+	%.pn.in.i = xor i32 %.pn.in.in.i, 32		; <i32> [#uses=1]
+	%.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%455 = fsub double %454, %451		; <double> [#uses=2]
+	%456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%458 = fsub double %457, %450		; <double> [#uses=2]
+	%459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%463 = fsub double %462, %451		; <double> [#uses=2]
+	%464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%466 = fsub double %465, %450		; <double> [#uses=2]
+	%467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%471 = fsub double %470, %451		; <double> [#uses=2]
+	%472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%474 = fsub double %473, %450		; <double> [#uses=2]
+	%475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%477 = fsub double %460, %452		; <double> [#uses=1]
+	%478 = fmul double %463, %474		; <double> [#uses=1]
+	%479 = fmul double %466, %471		; <double> [#uses=1]
+	%480 = fsub double %478, %479		; <double> [#uses=1]
+	%481 = fmul double %477, %480		; <double> [#uses=1]
+	%482 = fsub double %468, %452		; <double> [#uses=1]
+	%483 = fmul double %471, %458		; <double> [#uses=1]
+	%484 = fmul double %474, %455		; <double> [#uses=1]
+	%485 = fsub double %483, %484		; <double> [#uses=1]
+	%486 = fmul double %482, %485		; <double> [#uses=1]
+	%487 = fadd double %486, %481		; <double> [#uses=1]
+	%488 = fsub double %476, %452		; <double> [#uses=1]
+	%489 = fmul double %455, %466		; <double> [#uses=1]
+	%490 = fmul double %458, %463		; <double> [#uses=1]
+	%491 = fsub double %489, %490		; <double> [#uses=1]
+	%492 = fmul double %488, %491		; <double> [#uses=1]
+	%493 = fadd double %492, %487		; <double> [#uses=1]
+	%494 = fcmp ogt double %493, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %494, label %bb15.i, label %bb17.loopexit.i
+
+bb17.loopexit.i:		; preds = %bb16.i
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
+	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb17.i
+
+bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
+	%495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]		; <%struct.edge_rec*> [#uses=3]
+	%497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32		; <i32> [#uses=1]
+	%498 = xor i32 %497, 32		; <i32> [#uses=1]
+	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%514 = fsub double %503, %511		; <double> [#uses=2]
+	%515 = fsub double %509, %513		; <double> [#uses=1]
+	%516 = fmul double %514, %515		; <double> [#uses=1]
+	%517 = fsub double %507, %511		; <double> [#uses=1]
+	%518 = fsub double %505, %513		; <double> [#uses=2]
+	%519 = fmul double %517, %518		; <double> [#uses=1]
+	%520 = fsub double %516, %519		; <double> [#uses=1]
+	%521 = fcmp ogt double %520, 0.000000e+00		; <i1> [#uses=2]
+	%522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32		; <i32> [#uses=3]
+	%523 = xor i32 %522, 32		; <i32> [#uses=1]
+	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%531 = fsub double %530, %513		; <double> [#uses=1]
+	%532 = fmul double %514, %531		; <double> [#uses=1]
+	%533 = fsub double %528, %511		; <double> [#uses=1]
+	%534 = fmul double %533, %518		; <double> [#uses=1]
+	%535 = fsub double %532, %534		; <double> [#uses=1]
+	%536 = fcmp ogt double %535, 0.000000e+00		; <i1> [#uses=2]
+	%537 = or i1 %536, %521		; <i1> [#uses=1]
+	br i1 %537, label %bb21.i, label %do_merge.exit
+
+bb21.i:		; preds = %bb17.i
+	%538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	br i1 %521, label %bb22.i, label %bb24.i
+
+bb22.i:		; preds = %bb21.i
+	br i1 %536, label %bb23.i, label %bb25.i
+
+bb23.i:		; preds = %bb22.i
+	%542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%544 = fsub double %507, %528		; <double> [#uses=2]
+	%545 = fsub double %509, %530		; <double> [#uses=2]
+	%546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%550 = fsub double %549, %528		; <double> [#uses=2]
+	%551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%553 = fsub double %552, %530		; <double> [#uses=2]
+	%554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%558 = fsub double %557, %528		; <double> [#uses=2]
+	%559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%561 = fsub double %560, %530		; <double> [#uses=2]
+	%562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%564 = fsub double %547, %543		; <double> [#uses=1]
+	%565 = fmul double %550, %561		; <double> [#uses=1]
+	%566 = fmul double %553, %558		; <double> [#uses=1]
+	%567 = fsub double %565, %566		; <double> [#uses=1]
+	%568 = fmul double %564, %567		; <double> [#uses=1]
+	%569 = fsub double %555, %543		; <double> [#uses=1]
+	%570 = fmul double %558, %545		; <double> [#uses=1]
+	%571 = fmul double %561, %544		; <double> [#uses=1]
+	%572 = fsub double %570, %571		; <double> [#uses=1]
+	%573 = fmul double %569, %572		; <double> [#uses=1]
+	%574 = fadd double %573, %568		; <double> [#uses=1]
+	%575 = fsub double %563, %543		; <double> [#uses=1]
+	%576 = fmul double %544, %553		; <double> [#uses=1]
+	%577 = fmul double %545, %550		; <double> [#uses=1]
+	%578 = fsub double %576, %577		; <double> [#uses=1]
+	%579 = fmul double %575, %578		; <double> [#uses=1]
+	%580 = fadd double %579, %574		; <double> [#uses=1]
+	%581 = fcmp ogt double %580, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %581, label %bb24.i, label %bb25.i
+
+bb24.i:		; preds = %bb23.i, %bb21.i
+	%582 = add i32 %522, 48		; <i32> [#uses=1]
+	%583 = and i32 %582, 63		; <i32> [#uses=1]
+	%584 = and i32 %522, -64		; <i32> [#uses=1]
+	%585 = or i32 %583, %584		; <i32> [#uses=1]
+	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
+	%590 = add i32 %589, 16		; <i32> [#uses=1]
+	%591 = and i32 %590, 63		; <i32> [#uses=1]
+	%592 = and i32 %589, -64		; <i32> [#uses=1]
+	%593 = or i32 %591, %592		; <i32> [#uses=1]
+	%594 = inttoptr i32 %593 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%595 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
+	%597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
+	%598 = ptrtoint %struct.edge_rec* %595 to i32		; <i32> [#uses=5]
+	%599 = add i32 %598, 16		; <i32> [#uses=1]
+	%600 = inttoptr i32 %599 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%601 = add i32 %598, 48		; <i32> [#uses=1]
+	%602 = inttoptr i32 %601 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+	%604 = add i32 %598, 32		; <i32> [#uses=1]
+	%605 = inttoptr i32 %604 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
+	%607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
+	%608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
+	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
+	%611 = add i32 %610, 16		; <i32> [#uses=1]
+	%612 = and i32 %611, 63		; <i32> [#uses=1]
+	%613 = and i32 %610, -64		; <i32> [#uses=1]
+	%614 = or i32 %612, %613		; <i32> [#uses=1]
+	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
+	%619 = add i32 %618, 16		; <i32> [#uses=1]
+	%620 = and i32 %619, 63		; <i32> [#uses=1]
+	%621 = and i32 %618, -64		; <i32> [#uses=1]
+	%622 = or i32 %620, %621		; <i32> [#uses=1]
+	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
+	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
+	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
+	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+	%630 = xor i32 %598, 32		; <i32> [#uses=2]
+	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
+	%635 = add i32 %634, 16		; <i32> [#uses=1]
+	%636 = and i32 %635, 63		; <i32> [#uses=1]
+	%637 = and i32 %634, -64		; <i32> [#uses=1]
+	%638 = or i32 %636, %637		; <i32> [#uses=1]
+	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
+	%643 = add i32 %642, 16		; <i32> [#uses=1]
+	%644 = and i32 %643, 63		; <i32> [#uses=1]
+	%645 = and i32 %642, -64		; <i32> [#uses=1]
+	%646 = or i32 %644, %645		; <i32> [#uses=1]
+	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
+	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
+	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
+	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+	%654 = add i32 %630, 48		; <i32> [#uses=1]
+	%655 = and i32 %654, 63		; <i32> [#uses=1]
+	%656 = and i32 %598, -64		; <i32> [#uses=1]
+	%657 = or i32 %655, %656		; <i32> [#uses=1]
+	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
+	%662 = add i32 %661, 16		; <i32> [#uses=1]
+	%663 = and i32 %662, 63		; <i32> [#uses=1]
+	%664 = and i32 %661, -64		; <i32> [#uses=1]
+	%665 = or i32 %663, %664		; <i32> [#uses=1]
+	%666 = inttoptr i32 %665 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+bb25.i:		; preds = %bb23.i, %bb22.i
+	%667 = add i32 %172, 16		; <i32> [#uses=1]
+	%668 = and i32 %667, 63		; <i32> [#uses=1]
+	%669 = and i32 %172, -64		; <i32> [#uses=1]
+	%670 = or i32 %668, %669		; <i32> [#uses=1]
+	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
+	%675 = add i32 %674, 16		; <i32> [#uses=1]
+	%676 = and i32 %675, 63		; <i32> [#uses=1]
+	%677 = and i32 %674, -64		; <i32> [#uses=1]
+	%678 = or i32 %676, %677		; <i32> [#uses=1]
+	%679 = inttoptr i32 %678 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%680 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1		; <%struct.edge_rec**> [#uses=5]
+	store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
+	%682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
+	%683 = ptrtoint %struct.edge_rec* %680 to i32		; <i32> [#uses=4]
+	%684 = add i32 %683, 16		; <i32> [#uses=1]
+	%685 = inttoptr i32 %684 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%686 = add i32 %683, 48		; <i32> [#uses=1]
+	%687 = inttoptr i32 %686 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+	%689 = add i32 %683, 32		; <i32> [#uses=1]
+	%690 = inttoptr i32 %689 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
+	%692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
+	%693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
+	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
+	%696 = add i32 %695, 16		; <i32> [#uses=1]
+	%697 = and i32 %696, 63		; <i32> [#uses=1]
+	%698 = and i32 %695, -64		; <i32> [#uses=1]
+	%699 = or i32 %697, %698		; <i32> [#uses=1]
+	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
+	%704 = add i32 %703, 16		; <i32> [#uses=1]
+	%705 = and i32 %704, 63		; <i32> [#uses=1]
+	%706 = and i32 %703, -64		; <i32> [#uses=1]
+	%707 = or i32 %705, %706		; <i32> [#uses=1]
+	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
+	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
+	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
+	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+	%715 = xor i32 %683, 32		; <i32> [#uses=1]
+	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
+	%720 = add i32 %719, 16		; <i32> [#uses=1]
+	%721 = and i32 %720, 63		; <i32> [#uses=1]
+	%722 = and i32 %719, -64		; <i32> [#uses=1]
+	%723 = or i32 %721, %722		; <i32> [#uses=1]
+	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
+	%728 = add i32 %727, 16		; <i32> [#uses=1]
+	%729 = and i32 %728, 63		; <i32> [#uses=1]
+	%730 = and i32 %727, -64		; <i32> [#uses=1]
+	%731 = or i32 %729, %730		; <i32> [#uses=1]
+	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
+	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
+	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
+	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
+	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+do_merge.exit:		; preds = %bb17.i
+	%740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %742, label %bb5.loopexit, label %bb2
+
+bb2:		; preds = %bb2, %do_merge.exit
+	%ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]		; <%struct.edge_rec*> [#uses=1]
+	%743 = ptrtoint %struct.edge_rec* %ldo.07 to i32		; <i32> [#uses=1]
+	%744 = xor i32 %743, 32		; <i32> [#uses=1]
+	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %750, label %bb5.loopexit, label %bb2
+
+bb4:		; preds = %bb5.loopexit, %bb4
+	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
+	%751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
+	%754 = xor i32 %753, 32		; <i32> [#uses=1]
+	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
+	br i1 %758, label %bb6, label %bb4
+
+bb5.loopexit:		; preds = %bb2, %do_merge.exit
+	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
+	%759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
+	br i1 %761, label %bb6, label %bb4
+
+bb6:		; preds = %bb5.loopexit, %bb4
+	%rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]		; <%struct.edge_rec*> [#uses=1]
+	%tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32		; <i32> [#uses=1]
+	%tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32		; <i32> [#uses=1]
+	br label %bb15
+
+bb7:		; preds = %bb
+	%762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
+	%765 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
+	%767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	br i1 %764, label %bb10, label %bb11
+
+bb8:		; preds = %entry
+	%768 = call arm_apcscc  i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call arm_apcscc  void @exit(i32 -1) noreturn nounwind
+	unreachable
+
+bb10:		; preds = %bb7
+	store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
+	%769 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=5]
+	%770 = add i32 %769, 16		; <i32> [#uses=1]
+	%771 = inttoptr i32 %770 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%772 = add i32 %769, 48		; <i32> [#uses=1]
+	%773 = inttoptr i32 %772 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+	%775 = add i32 %769, 32		; <i32> [#uses=1]
+	%776 = inttoptr i32 %775 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
+	%778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
+	%779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+	%780 = xor i32 %769, 32		; <i32> [#uses=1]
+	br label %bb15
+
+bb11:		; preds = %bb7
+	store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
+	%781 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=6]
+	%782 = add i32 %781, 16		; <i32> [#uses=1]
+	%783 = inttoptr i32 %782 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%784 = add i32 %781, 48		; <i32> [#uses=1]
+	%785 = inttoptr i32 %784 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+	%787 = add i32 %781, 32		; <i32> [#uses=1]
+	%788 = inttoptr i32 %787 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
+	%790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
+	%791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
+	%792 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
+	%794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
+	%795 = ptrtoint %struct.edge_rec* %792 to i32		; <i32> [#uses=5]
+	%796 = add i32 %795, 16		; <i32> [#uses=1]
+	%797 = inttoptr i32 %796 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%798 = add i32 %795, 48		; <i32> [#uses=2]
+	%799 = inttoptr i32 %798 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+	%801 = add i32 %795, 32		; <i32> [#uses=1]
+	%802 = inttoptr i32 %801 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
+	%804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
+	%805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+	%806 = xor i32 %781, 32		; <i32> [#uses=1]
+	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
+	%811 = add i32 %810, 16		; <i32> [#uses=1]
+	%812 = and i32 %811, 63		; <i32> [#uses=1]
+	%813 = and i32 %810, -64		; <i32> [#uses=1]
+	%814 = or i32 %812, %813		; <i32> [#uses=1]
+	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
+	%818 = add i32 %817, 16		; <i32> [#uses=1]
+	%819 = and i32 %818, 63		; <i32> [#uses=1]
+	%820 = and i32 %817, -64		; <i32> [#uses=1]
+	%821 = or i32 %819, %820		; <i32> [#uses=1]
+	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
+	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
+	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
+	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+	%829 = xor i32 %795, 32		; <i32> [#uses=3]
+	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%833 = and i32 %798, 63		; <i32> [#uses=1]
+	%834 = and i32 %795, -64		; <i32> [#uses=1]
+	%835 = or i32 %833, %834		; <i32> [#uses=1]
+	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
+	%840 = add i32 %839, 16		; <i32> [#uses=1]
+	%841 = and i32 %840, 63		; <i32> [#uses=1]
+	%842 = and i32 %839, -64		; <i32> [#uses=1]
+	%843 = or i32 %841, %842		; <i32> [#uses=1]
+	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%846 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
+	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
+	%848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
+	%849 = ptrtoint %struct.edge_rec* %846 to i32		; <i32> [#uses=6]
+	%850 = add i32 %849, 16		; <i32> [#uses=2]
+	%851 = inttoptr i32 %850 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%852 = add i32 %849, 48		; <i32> [#uses=1]
+	%853 = inttoptr i32 %852 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+	%855 = add i32 %849, 32		; <i32> [#uses=1]
+	%856 = inttoptr i32 %855 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
+	%858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
+	%859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
+	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
+	%862 = add i32 %861, 16		; <i32> [#uses=1]
+	%863 = and i32 %862, 63		; <i32> [#uses=1]
+	%864 = and i32 %861, -64		; <i32> [#uses=1]
+	%865 = or i32 %863, %864		; <i32> [#uses=1]
+	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
+	%870 = add i32 %869, 16		; <i32> [#uses=1]
+	%871 = and i32 %870, 63		; <i32> [#uses=1]
+	%872 = and i32 %869, -64		; <i32> [#uses=1]
+	%873 = or i32 %871, %872		; <i32> [#uses=1]
+	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
+	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
+	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
+	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+	%881 = xor i32 %849, 32		; <i32> [#uses=3]
+	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
+	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
+	%886 = add i32 %885, 16		; <i32> [#uses=1]
+	%887 = and i32 %886, 63		; <i32> [#uses=1]
+	%888 = and i32 %885, -64		; <i32> [#uses=1]
+	%889 = or i32 %887, %888		; <i32> [#uses=1]
+	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
+	%893 = add i32 %892, 16		; <i32> [#uses=1]
+	%894 = and i32 %893, 63		; <i32> [#uses=1]
+	%895 = and i32 %892, -64		; <i32> [#uses=1]
+	%896 = or i32 %894, %895		; <i32> [#uses=1]
+	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
+	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
+	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
+	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
+	%904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%916 = fsub double %905, %913		; <double> [#uses=1]
+	%917 = fsub double %911, %915		; <double> [#uses=1]
+	%918 = fmul double %916, %917		; <double> [#uses=1]
+	%919 = fsub double %909, %913		; <double> [#uses=1]
+	%920 = fsub double %907, %915		; <double> [#uses=1]
+	%921 = fmul double %919, %920		; <double> [#uses=1]
+	%922 = fsub double %918, %921		; <double> [#uses=1]
+	%923 = fcmp ogt double %922, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %923, label %bb15, label %bb13
+
+bb13:		; preds = %bb11
+	%924 = fsub double %905, %909		; <double> [#uses=1]
+	%925 = fsub double %915, %911		; <double> [#uses=1]
+	%926 = fmul double %924, %925		; <double> [#uses=1]
+	%927 = fsub double %913, %909		; <double> [#uses=1]
+	%928 = fsub double %907, %911		; <double> [#uses=1]
+	%929 = fmul double %927, %928		; <double> [#uses=1]
+	%930 = fsub double %926, %929		; <double> [#uses=1]
+	%931 = fcmp ogt double %930, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %931, label %bb15, label %bb14
+
+bb14:		; preds = %bb13
+	%932 = and i32 %850, 63		; <i32> [#uses=1]
+	%933 = and i32 %849, -64		; <i32> [#uses=3]
+	%934 = or i32 %932, %933		; <i32> [#uses=1]
+	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
+	%939 = add i32 %938, 16		; <i32> [#uses=1]
+	%940 = and i32 %939, 63		; <i32> [#uses=1]
+	%941 = and i32 %938, -64		; <i32> [#uses=1]
+	%942 = or i32 %940, %941		; <i32> [#uses=1]
+	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
+	%946 = add i32 %945, 16		; <i32> [#uses=1]
+	%947 = and i32 %946, 63		; <i32> [#uses=1]
+	%948 = and i32 %945, -64		; <i32> [#uses=1]
+	%949 = or i32 %947, %948		; <i32> [#uses=1]
+	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
+	%954 = add i32 %953, 16		; <i32> [#uses=1]
+	%955 = and i32 %954, 63		; <i32> [#uses=1]
+	%956 = and i32 %953, -64		; <i32> [#uses=1]
+	%957 = or i32 %955, %956		; <i32> [#uses=1]
+	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
+	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
+	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
+	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+	%965 = add i32 %881, 16		; <i32> [#uses=1]
+	%966 = and i32 %965, 63		; <i32> [#uses=1]
+	%967 = or i32 %966, %933		; <i32> [#uses=1]
+	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
+	%972 = add i32 %971, 16		; <i32> [#uses=1]
+	%973 = and i32 %972, 63		; <i32> [#uses=1]
+	%974 = and i32 %971, -64		; <i32> [#uses=1]
+	%975 = or i32 %973, %974		; <i32> [#uses=1]
+	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
+	%979 = add i32 %978, 16		; <i32> [#uses=1]
+	%980 = and i32 %979, 63		; <i32> [#uses=1]
+	%981 = and i32 %978, -64		; <i32> [#uses=1]
+	%982 = or i32 %980, %981		; <i32> [#uses=1]
+	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
+	%987 = add i32 %986, 16		; <i32> [#uses=1]
+	%988 = and i32 %987, 63		; <i32> [#uses=1]
+	%989 = and i32 %986, -64		; <i32> [#uses=1]
+	%990 = or i32 %988, %989		; <i32> [#uses=1]
+	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
+	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
+	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
+	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
+	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
+	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+	br label %bb15
+
+bb15:		; preds = %bb14, %bb13, %bb11, %bb10, %bb6
+	%retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ]		; <i32> [#uses=1]
+	%retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ]		; <i32> [#uses=1]
+	%agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64*		; <i64*> [#uses=1]
+	%1001 = zext i32 %retval.0.0 to i64		; <i64> [#uses=1]
+	%1002 = zext i32 %retval.1.0 to i64		; <i64> [#uses=1]
+	%1003 = shl i64 %1002, 32		; <i64> [#uses=1]
+	%1004 = or i64 %1003, %1001		; <i64> [#uses=1]
+	store i64 %1004, i64* %agg.result162, align 4
+	ret void
+}
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc void @exit(i32) noreturn nounwind
+
+declare arm_apcscc %struct.edge_rec* @alloc_edge() nounwind
diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
new file mode 100644
index 000000000000..b4b989bf38a4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	store i16 %parts, i16* undef, align 4
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	br i1 undef, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
+	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
+	%2 = tail call arm_apcscc  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
new file mode 100644
index 000000000000..24f499036ce4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=arm
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	%0 = getelementptr i8* null, i32 10		; <i8*> [#uses=1]
+	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
+	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%3 = add i16 %2, 1		; <i16> [#uses=1]
+	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %4, 3		; <i32> [#uses=1]
+	%6 = add i32 %5, -1		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, undef		; <i1> [#uses=1]
+	br i1 %7, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
new file mode 100644
index 000000000000..e1d19d1ac2ff
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3
+
+@a = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define arm_apcscc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb28, label %bb
+
+bb28:		; preds = %bb
+	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%1 = fadd double %0, undef		; <double> [#uses=2]
+	br i1 undef, label %bb59, label %bb60
+
+bb59:		; preds = %bb28
+	%2 = fsub double -0.000000e+00, undef		; <double> [#uses=2]
+	br label %bb61
+
+bb60:		; preds = %bb28
+	%3 = tail call double @llvm.exp.f64(double undef) nounwind		; <double> [#uses=1]
+	%4 = fsub double -0.000000e+00, %3		; <double> [#uses=2]
+	%5 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	%6 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	br label %bb61
+
+bb61:		; preds = %bb60, %bb59
+	%.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ]		; <double> [#uses=1]
+	%.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ]		; <double> [#uses=1]
+	%.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ]		; <double> [#uses=0]
+	%.pn390 = fdiv double %.pn452, undef		; <double> [#uses=0]
+	%.pn145 = fdiv double %.pn238, %1		; <double> [#uses=0]
+	%.pn138 = fdiv double %.pn230, undef		; <double> [#uses=1]
+	%.pn139 = fdiv double %.pn228, undef		; <double> [#uses=1]
+	%.pn134 = fdiv double %.pn224, %0		; <double> [#uses=1]
+	%.pn135 = fdiv double %.pn222, %1		; <double> [#uses=1]
+	%.pn133 = fdiv double %.pn218, undef		; <double> [#uses=0]
+	%.pn128 = fdiv double %.pn214, undef		; <double> [#uses=1]
+	%.pn129 = fdiv double %.pn212, %.pn213		; <double> [#uses=1]
+	%.pn126 = fdiv double %.pn210, undef		; <double> [#uses=0]
+	%.pn54.in = fmul double undef, %.pn201		; <double> [#uses=1]
+	%.pn42.in = fmul double undef, undef		; <double> [#uses=1]
+	%.pn76 = fsub double %.pn138, %.pn139		; <double> [#uses=1]
+	%.pn74 = fsub double %.pn134, %.pn135		; <double> [#uses=1]
+	%.pn70 = fsub double %.pn128, %.pn129		; <double> [#uses=1]
+	%.pn54 = fdiv double %.pn54.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn64 = fmul double undef, 0x3FE5555555555555		; <double> [#uses=1]
+	%.pn65 = fmul double undef, undef		; <double> [#uses=1]
+	%.pn50 = fmul double undef, %.pn111		; <double> [#uses=0]
+	%.pn42 = fdiv double %.pn42.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn40 = fmul double undef, %.pn85		; <double> [#uses=0]
+	%.pn56 = fadd double %.pn76, undef		; <double> [#uses=1]
+	%.pn57 = fmul double %.pn74, undef		; <double> [#uses=1]
+	%.pn36 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn37 = fmul double %.pn70, undef		; <double> [#uses=1]
+	%.pn33 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn29 = fsub double %.pn64, %.pn65		; <double> [#uses=1]
+	%.pn21 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn27 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn11 = fadd double %.pn56, %.pn57		; <double> [#uses=1]
+	%.pn32 = fmul double %.pn54, undef		; <double> [#uses=1]
+	%.pn26 = fmul double %.pn42, undef		; <double> [#uses=1]
+	%.pn15 = fmul double 0.000000e+00, %.pn39		; <double> [#uses=1]
+	%.pn7 = fadd double %.pn36, %.pn37		; <double> [#uses=1]
+	%.pn30 = fsub double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fadd double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn24 = fsub double %.pn28, %.pn29		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, %.pn27		; <double> [#uses=1]
+	%.pn20 = fadd double %.pn24, undef		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fsub double %.pn20, %.pn21		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, undef		; <double> [#uses=1]
+	%.pn12 = fadd double %.pn16, undef		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, %.pn15		; <double> [#uses=1]
+	%.pn8 = fsub double %.pn12, undef		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, %.pn11		; <double> [#uses=1]
+	%.pn4 = fadd double %.pn8, undef		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, %.pn7		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn4, undef		; <double> [#uses=1]
+	%D1.0 = fsub double %.pn2, undef		; <double> [#uses=2]
+	br i1 undef, label %bb62, label %bb64
+
+bb62:		; preds = %bb61
+	%7 = fadd double %D1.0, undef		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb61
+	%.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ]		; <double> [#uses=1]
+	%.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %.pn, %.pn1		; <double> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
new file mode 100644
index 000000000000..2d4e58d63603
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; PR4657
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind {
+entry:
+	%v_addr = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%f_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%0 = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <4 x i32> %v, <4 x i32>* %v_addr
+	store i32 %f, i32* %f_addr
+	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
+	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
+	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %5, <4 x i32>* %0, align 16
+	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %6, <4 x i32>* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %retval1
+}
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
new file mode 100644
index 000000000000..65ffed2b80a0
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-elf"
+
+define arm_aapcscc i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	%0 = icmp eq i32 %asmtmp.i, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit
+
+bb6.i:		; preds = %bb5.i
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i
+
+bb7.i:		; preds = %bb6.i
+	unreachable
+
+fault_in_pages_writeable.exit:		; preds = %bb6.i, %bb5.i, %entry
+	br i1 undef, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%1 = tail call arm_aapcscc  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
new file mode 100644
index 000000000000..9e5372a79352
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+define arm_aapcscc i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	br label %fault_in_pages_writeable.exit
+
+fault_in_pages_writeable.exit:		; preds = %bb5.i, %entry
+	%0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ]		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%2 = tail call arm_aapcscc  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
new file mode 100644
index 000000000000..18d68f79370c
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm
+; PR4528
+
+; Inline asm is allowed to contain operands "=&r", "0".
+
+%struct.device_dma_parameters = type { i32, i32 }
+%struct.iovec = type { i8*, i32 }
+
+define arm_aapcscc i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize {
+entry:
+  br label %bb8
+
+bb:                                               ; preds = %bb8
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1]
+  %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; <i32> [#uses=1]
+  %0 = icmp eq i32 %asmresult, 0                  ; <i1> [#uses=1]
+  br i1 %0, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb2
+  br i1 undef, label %bb10, label %bb9
+
+bb7:                                              ; preds = %bb2
+  %1 = add i32 %2, 1                              ; <i32> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %entry
+  %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
+  %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
+  br i1 %4, label %bb, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb4
+  store i32 undef, i32* %count, align 4
+  ret i32 0
+
+bb10:                                             ; preds = %bb4, %bb
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
new file mode 100644
index 000000000000..a46482cc7317
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm
+; PR4716
+
+define arm_aapcscc void @_start() nounwind naked {
+entry:
+  tail call arm_aapcscc  void @exit(i32 undef) noreturn nounwind
+  unreachable
+}
+
+declare arm_aapcscc void @exit(i32) noreturn nounwind
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
new file mode 100644
index 000000000000..84915c48824a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
+@g = common global %struct.tree* null
+
+define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
+entry:
+  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
+  %2 = fmul double %1, %1                         ; <double> [#uses=1]
+  %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
+  %4 = fmul double %3, %3                         ; <double> [#uses=1]
+  %5 = fadd double %2, %4                         ; <double> [#uses=1]
+  %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
+  br i1 undef, label %bb7.i4, label %bb6.i
+
+bb6.i:                                            ; preds = %bb4.i
+  br label %bb7.i4
+
+bb7.i4:                                           ; preds = %bb6.i, %bb4.i
+  %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
+  unreachable
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
new file mode 100644
index 000000000000..a21ffc38d09e
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+%struct.icstruct = type { [3 x i32], i16 }
+%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+declare arm_apcscc double @floor(double) nounwind readnone
+
+define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+  %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 0, i32* %1
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
new file mode 100644
index 000000000000..e3d8ea60f992
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+%struct.Patient = type { i32, i32, i32, %struct.Village* }
+%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
+entry:
+  br i1 %p, label %bb8, label %bb1
+
+bb1:                                              ; preds = %entry
+  %0 = malloc %struct.Village                     ; <%struct.Village*> [#uses=3]
+  %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
+  %.c = fptosi double %exp2 to i32                ; <i32> [#uses=1]
+  store i32 %.c, i32* null
+  %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %1
+  %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %2
+  ret %struct.Village* %0
+
+bb8:                                              ; preds = %entry
+  ret %struct.Village* null
+}
+
+declare double @ldexp(double, i32)
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
new file mode 100644
index 000000000000..9123377e7151
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+  %0 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call arm_apcscc  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
new file mode 100644
index 000000000000..0fad533b6c59
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
new file mode 100644
index 000000000000..c6ef2561490c
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+define arm_apcscc void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) {
+entry:
+  %0 = lshr <4 x i32> zeroinitializer, <i32 31, i32 31, i32 31, i32 31> ; <<4 x i32>> [#uses=1]
+  %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; <<2 x i32>> [#uses=1]
+  %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1]
+  %3 = extractelement <2 x i32> %2, i32 0         ; <i32> [#uses=1]
+  %not..i = icmp eq i32 %3, undef                 ; <i1> [#uses=1]
+  br i1 %not..i, label %return, label %bb221
+
+bb221:                                            ; preds = %bb221, %entry
+  br label %bb221
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
new file mode 100644
index 000000000000..bc5bfe9f6098
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+define arm_apcscc void @aaaa(%quuz* %this, i8* %block) {
+entry:
+  br i1 undef, label %bb.nph269, label %bb201
+
+bb.nph269:                                        ; preds = %entry
+  br label %bb12
+
+bb12:                                             ; preds = %bb194, %bb.nph269
+  %0 = fmul <4 x float> undef, undef              ; <<4 x float>> [#uses=1]
+  %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %3 = fadd <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb194, label %bb186
+
+bb186:                                            ; preds = %bb12
+  br label %bb194
+
+bb194:                                            ; preds = %bb186, %bb12
+  %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0]
+  %indvar.next294 = add i32 undef, 1              ; <i32> [#uses=0]
+  br label %bb12
+
+bb201:                                            ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
new file mode 100644
index 000000000000..d5178b4bfb3f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() nounwind {
+entry:
+  %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1]
+  %tmp28 = extractelement <2 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fcmp une float %tmp28, 4.900000e+01        ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb8, label %bb9
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7
+  ret void
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
new file mode 100644
index 000000000000..266fce6e0c5e
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @aaa() nounwind {
+entry:
+  %0 = fmul <4 x float> undef, <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02, float 0x3EB0C6F7A0000000> ; <<4 x float>> [#uses=1]
+  %tmp31 = extractelement <4 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fpext float %tmp31 to double               ; <double> [#uses=1]
+  %2 = fsub double 1.000000e+00, %1               ; <double> [#uses=1]
+  %3 = fdiv double %2, 1.000000e+00               ; <double> [#uses=1]
+  %4 = tail call double @fabs(double %3) nounwind readnone ; <double> [#uses=1]
+  %5 = fcmp ogt double %4, 1.000000e-05           ; <i1> [#uses=1]
+  br i1 %5, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  unreachable
+}
+
+declare double @fabs(double)
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
new file mode 100644
index 000000000000..b6cf880a3001
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
+
+%struct.A = type { i32* }
+
+define arm_apcscc void @"\01-[MyFunction Name:]"() {
+entry:
+  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
+  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
+  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
+  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
+  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call arm_apcscc  void @_ZN1AC1Ev(%struct.A* %a)
+  invoke arm_apcscc  void @_Z3barv()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %entry
+  call arm_apcscc  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  br label %return
+
+bb:                                               ; preds = %ppad
+  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  store i32 %eh_select, i32* %save_filt.1, align 4
+  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  store i8* %eh_value, i8** %save_eptr.0, align 4
+  call arm_apcscc  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  store i8* %0, i8** %eh_exception, align 4
+  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  store i32 %1, i32* %eh_selector, align 4
+  br label %Unwind
+
+return:                                           ; preds = %invcont
+  ret void
+
+lpad:                                             ; preds = %entry
+  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  store i32 %eh_select2, i32* %eh_selector
+  br label %ppad
+
+ppad:                                             ; preds = %lpad
+  br label %bb
+
+Unwind:                                           ; preds = %bb
+  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  unreachable
+}
+
+define linkonce_odr arm_apcscc void @_ZN1AC1Ev(%struct.A* %this) {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = call arm_apcscc  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  store i32* %1, i32** %3, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare arm_apcscc i8* @_Znwm(i32)
+
+define linkonce_odr arm_apcscc void @_ZN1AD1Ev(%struct.A* %this) nounwind {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
+  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
+  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
+  call arm_apcscc  void @_ZdlPv(i8* %3) nounwind
+  br label %bb
+
+bb:                                               ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb
+  ret void
+}
+;CHECK: L_LSDA_1:
+
+declare arm_apcscc void @_ZdlPv(i8*) nounwind
+
+declare arm_apcscc void @_Z3barv()
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
new file mode 100644
index 000000000000..e1e60e6317a6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4843
+define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
+;CHECK: v2regbug:
+;CHECK: vzip.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
+	ret <4 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
new file mode 100644
index 000000000000..bf91fe099e6b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @getchar() nounwind
+
+define internal arm_apcscc i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
new file mode 100644
index 000000000000..f654a1664c8b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mattr=+neon < %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 -1, i32 3
+  store <4 x i32> %0, <4 x i32>* undef, align 16
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
new file mode 100644
index 000000000000..98cab9a9149e
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; pr4939
+
+define void @test(double* %x, double* %y) nounwind {
+  %1 = load double* %x, align 4
+  %2 = load double* %y, align 4
+  %3 = fsub double -0.000000e+00, %1
+  %4 = fcmp ugt double %2, %3
+  br i1 %4, label %bb1, label %bb2
+
+bb1:
+;CHECK: fstdhi
+  store double %1, double* %y, align 4
+  br label %bb2
+
+bb2:
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll
new file mode 100644
index 000000000000..10653b51c146
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-10-postdec.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; Radar 7213850
+
+define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
+  %1 = ptrtoint i8* %d to i32
+;CHECK: sub
+  %2 = sub i32 %x, %1
+  %3 = add nsw i32 %2, %y
+  store i8 0, i8* %d, align 1
+  ret i32 %3
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
new file mode 100644
index 000000000000..13adb24e2f6f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mattr=+neon < %s
+; PR4965
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.fr = type { [6 x %struct.pl] }
+%struct.obb = type { %"struct.m4", %"struct.p3" }
+%struct.pl = type { %"struct.p3" }
+%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" }
+%"struct.p3" = type { <4 x float> }
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
+entry:
+  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
+  br label %bb33
+
+bb:                                               ; preds = %bb33
+  %2 = fmul <4 x float> %val.i.i, undef           ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> %val2.i.i, undef          ; <<4 x float>> [#uses=1]
+  %4 = fadd <4 x float> %3, %2                    ; <<4 x float>> [#uses=1]
+  %5 = fmul <4 x float> %val4.i.i, undef          ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> %5, %4                    ; <<4 x float>> [#uses=1]
+  %7 = bitcast <4 x float> %6 to <4 x i32>        ; <<4 x i32>> [#uses=1]
+  %8 = and <4 x i32> %7, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+  %9 = or <4 x i32> %8, undef                     ; <<4 x i32>> [#uses=1]
+  %10 = bitcast <4 x i32> %9 to <4 x float>       ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> undef, %12               ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %0, undef                ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %14, %13                 ; <<4 x float>> [#uses=1]
+  %16 = fadd <4 x float> undef, %15               ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %1, %16                  ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> zeroinitializer, %17     ; <<4 x float>> [#uses=1]
+  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2]
+  %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2]
+  %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2]
+  %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %25 = fadd <4 x float> %24, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %tmp46 = extractelement <4 x float> %25, i32 0  ; <float> [#uses=1]
+  %26 = fcmp olt float %tmp46, 0.000000e+00       ; <i1> [#uses=1]
+  br i1 %26, label %bb41, label %bb33
+
+bb33:                                             ; preds = %bb, %entry
+  br i1 undef, label %bb34, label %bb
+
+bb34:                                             ; preds = %bb33
+  ret i8 undef
+
+bb41:                                             ; preds = %bb
+  ret i8 1
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
new file mode 100644
index 000000000000..758b59a4638d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+
+define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = ptrtoint i8* %pBuffer to i32
+
+  %lsr.iv2641 = inttoptr i32 %1 to float*
+  %tmp29 = add i32 %1, 4
+  %tmp2930 = inttoptr i32 %tmp29 to float*
+  %tmp31 = add i32 %1, 8
+  %tmp3132 = inttoptr i32 %tmp31 to float*
+  %tmp33 = add i32 %1, 12
+  %tmp3334 = inttoptr i32 %tmp33 to float*
+  %tmp35 = add i32 %1, 16
+  %tmp3536 = inttoptr i32 %tmp35 to float*
+  %tmp37 = add i32 %1, 20
+  %tmp3738 = inttoptr i32 %tmp37 to float*
+  %tmp39 = add i32 %1, 24
+  %tmp3940 = inttoptr i32 %tmp39 to float*
+  %2 = load float* %lsr.iv2641, align 4
+  %3 = load float* %tmp2930, align 4
+  %4 = load float* %tmp3132, align 4
+  %5 = load float* %tmp3334, align 4
+  %6 = load float* %tmp3536, align 4
+  %7 = load float* %tmp3738, align 4
+  %8 = load float* %tmp3940, align 4
+  %9 = insertelement <4 x float> undef, float %6, i32 0
+  %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
+  %11 = insertelement <4 x float> %10, float %7, i32 1
+  %12 = insertelement <4 x float> %11, float %8, i32 2
+  %13 = insertelement <4 x float> undef, float %2, i32 0
+  %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer
+  %15 = insertelement <4 x float> %14, float %3, i32 1
+  %16 = insertelement <4 x float> %15, float %4, i32 2
+  %17 = insertelement <4 x float> %16, float %5, i32 3
+  %18 = fsub <4 x float> zeroinitializer, %12
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
+  %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+
+  ret <4 x float> %21
+}
diff --git a/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
new file mode 100644
index 000000000000..980f8ce6fa1b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9
+
+; PR4986
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.preheader
+
+bb.preheader:                                     ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.preheader
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> undef, %3                 ; <<4 x float>> [#uses=1]
+  %5 = extractelement <4 x float> %4, i32 3       ; <float> [#uses=1]
+  store float %5, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  ret <4 x float> %6
+}
diff --git a/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
new file mode 100644
index 000000000000..aace4751915d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { <4 x float> }
+%foo = type { %bar, %bar, %bar, %bar }
+
+declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind
+
+define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0]
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
new file mode 100644
index 000000000000..30931a2ffb66
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { %foo, %foo }
+%foo = type { <4 x float> }
+
+declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly
+
+declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind
+
+define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  float @aaa(%foo* undef) nounwind ; <float> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 000000000000..2ff479b21781
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%struct.1 = type { %struct.4, %struct.4 }
+%struct.4 = type { <4 x float> }
+
+define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
+  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
+  ret %struct.1* %this
+}
+
+declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
new file mode 100644
index 000000000000..6281775d0616
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define arm_apcscc void @test_vget_lanep16() nounwind {
+entry:
+  %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+; CHECK: fldd
+  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_poly16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
new file mode 100644
index 000000000000..ea2693ac2e40
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR5055
+
+module asm ".globl\09__aeabi_f2lz"
+module asm ".set\09__aeabi_f2lz, __fixsfdi"
+module asm ""
+
+define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind {
+entry:
+  %0 = fcmp olt float %a, 0.000000e+00            ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = fsub float -0.000000e+00, %a               ; <float> [#uses=1]
+  %2 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %1) nounwind ; <i64> [#uses=1]
+  %3 = sub i64 0, %2                              ; <i64> [#uses=1]
+  ret i64 %3
+
+bb1:                                              ; preds = %entry
+  %4 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %a) nounwind ; <i64> [#uses=1]
+  ret i64 %4
+}
+
+declare arm_aapcs_vfpcc i64 @__fixunssfdi(float)
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
new file mode 100644
index 000000000000..53bd66825953
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s
+; PR4687
+
+%0 = type { double, double }
+
+define arm_aapcscc void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
+; CHECK: foo:
+; CHECK: bl __adddf3
+; CHECK-NOT: strd
+; CHECK: mov
+  %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %x77 = fadd double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %tmpr = fadd double %x.0, %x76                  ; <double> [#uses=1]
+  %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+  store double %tmpr, double* %agg.result.0, align 8
+  %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+  store double %x77, double* %agg.result.1, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index a3832c0ea3db..9ccff07d456b 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -stats |& grep asm-printer | grep 4
+; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
 
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 70b2c4d4195b..b2c03147740b 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-linux-gnueabi -o %t -f
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
 ; RUN: grep set %t   | count 5
 ; RUN: grep globl %t | count 4
 ; RUN: grep weak %t  | count 1
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
index bb336ceebbab..d73abe6a560c 100644
--- a/test/CodeGen/ARM/align.ll
+++ b/test/CodeGen/ARM/align.ll
@@ -1,9 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep align.*1 | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm | grep align.*1 | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep align.*2 | count 2
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep align.*3 | count 2
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep align.*2 | count 4
 
 @a = global i1 true
diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll
index f7e450f59324..15cf67734cb2 100644
--- a/test/CodeGen/ARM/alloca.ll
+++ b/test/CodeGen/ARM/alloca.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
 ; RUN:   grep {mov r11, sp}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
 ; RUN:   grep {mov sp, r11}
 
 define void @f(i32 %a) {
diff --git a/test/CodeGen/ARM/argaddr.ll b/test/CodeGen/ARM/argaddr.ll
index 080827d7f42e..116a32f9c74d 100644
--- a/test/CodeGen/ARM/argaddr.ll
+++ b/test/CodeGen/ARM/argaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
diff --git a/test/CodeGen/ARM/arguments-nosplit-double.ll b/test/CodeGen/ARM/arguments-nosplit-double.ll
index 57ff95c0cb6d..770e41df2c24 100644
--- a/test/CodeGen/ARM/arguments-nosplit-double.ll
+++ b/test/CodeGen/ARM/arguments-nosplit-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
 ; PR4059
 
 define i32 @f(i64 %z, i32 %a, double %b) {
diff --git a/test/CodeGen/ARM/arguments-nosplit-i64.ll b/test/CodeGen/ARM/arguments-nosplit-i64.ll
index 5464674dbca5..815edfd845ad 100644
--- a/test/CodeGen/ARM/arguments-nosplit-i64.ll
+++ b/test/CodeGen/ARM/arguments-nosplit-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
 ; PR4058
 
 define i32 @f(i64 %z, i32 %a, i64 %b) {
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index 833e22dc269d..ad5b2d69fab9 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep {mov r0, r2} | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep {mov r0, r1} | count 1
 
 define i32 @f(i32 %a, i64 %b) {
diff --git a/test/CodeGen/ARM/arguments2.ll b/test/CodeGen/ARM/arguments2.ll
index eb7e45b4f366..a515ad75a669 100644
--- a/test/CodeGen/ARM/arguments2.ll
+++ b/test/CodeGen/ARM/arguments2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i32 @f(i32 %a, i128 %b) {
         %tmp = call i32 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments3.ll b/test/CodeGen/ARM/arguments3.ll
index 97c040521d8b..58f64c6c2f10 100644
--- a/test/CodeGen/ARM/arguments3.ll
+++ b/test/CodeGen/ARM/arguments3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i64 @f(i32 %a, i128 %b) {
         %tmp = call i64 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments4.ll b/test/CodeGen/ARM/arguments4.ll
index 63ba64b27f1f..f5f4207b7b37 100644
--- a/test/CodeGen/ARM/arguments4.ll
+++ b/test/CodeGen/ARM/arguments4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define float @f(i32 %a, i128 %b) {
         %tmp = call float @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments5.ll b/test/CodeGen/ARM/arguments5.ll
index 2000ff7b4a85..388a8ebee670 100644
--- a/test/CodeGen/ARM/arguments5.ll
+++ b/test/CodeGen/ARM/arguments5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define double @f(i32 %a, i128 %b) {
         %tmp = call double @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments6.ll b/test/CodeGen/ARM/arguments6.ll
index a18c621d1437..3f757fee45e4 100644
--- a/test/CodeGen/ARM/arguments6.ll
+++ b/test/CodeGen/ARM/arguments6.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i128 @f(i32 %a, i128 %b) {
         %tmp = call i128 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll
index 489ffd41604d..038e417b333a 100644
--- a/test/CodeGen/ARM/arguments7.ll
+++ b/test/CodeGen/ARM/arguments7.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
         %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b)
diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll
index 5ff7e09548ea..6999a4d4f656 100644
--- a/test/CodeGen/ARM/arguments8.ll
+++ b/test/CodeGen/ARM/arguments8.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
         %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b)
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index 07d928abe81f..690f488d8483 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
 
 define float @f(float %z, double %a, float %b) {
         %tmp = call float @g(float %b)
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
index b260b1312daf..2e35e3953f7e 100644
--- a/test/CodeGen/ARM/arm-asm.ll
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @frame_dummy() {
 entry:
diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll
index f1e4c2aeb7fb..273986034c9b 100644
--- a/test/CodeGen/ARM/arm-frameaddr.ll
+++ b/test/CodeGen/ARM/arm-frameaddr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin  | grep mov | grep r7
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | grep r11
+; RUN: llc < %s -mtriple=arm-apple-darwin  | grep mov | grep r7
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | grep r11
 ; PR4344
 ; PR4416
 
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 553c2fb64671..c4b4ec613ee5 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
+; RUN: llc < %s -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
 
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
new file mode 100644
index 000000000000..53392de73fcf
--- /dev/null
+++ b/test/CodeGen/ARM/bfc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | grep "bfc " | count 3
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
index b4ea433c40cc..b16dcc6755b1 100644
--- a/test/CodeGen/ARM/bic.ll
+++ b/test/CodeGen/ARM/bic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
+; RUN: llc < %s -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
index 0ac4f9a3833d..9e94efe3f9db 100644
--- a/test/CodeGen/ARM/bits.ll
+++ b/test/CodeGen/ARM/bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep and      %t | count 1
 ; RUN: grep orr      %t | count 1
 ; RUN: grep eor      %t | count 1
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 437b3189141d..0e3e070a818f 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | not grep bx
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | not grep bx
 
 define void @test(i32 %Ptr, i8* %L) {
 entry:
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 6b196653e05a..52246c3f0cd7 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {mov lr, pc}
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep blx
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: llc < %s -march=arm | grep {mov lr, pc}
+; RUN: llc < %s -march=arm -mattr=+v5t | grep blx
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
 ; RUN:   -relocation-model=pic | grep {PLT}
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 1af6fad099b4..efe29d857d23 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:   not grep {bx lr}
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index 3bf2dc0b4f03..294de5ff7278 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | grep "subs r" | count 2
-; RUN: llvm-as < %s | llc -march=arm | grep "adc r"
-; RUN: llvm-as < %s | llc -march=arm | grep "sbc r"  | count 2
+; RUN: llc < %s -march=arm | grep "subs r" | count 2
+; RUN: llc < %s -march=arm | grep "adc r"
+; RUN: llc < %s -march=arm | grep "sbc r"  | count 2
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index 389fb2ce1ee8..d2235c9221ce 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep clz
+; RUN: llc < %s -march=arm -mattr=+v5t | grep clz
 
 declare i32 @llvm.ctlz.i32(i32)
 
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index fcb8b179c803..5f3ed1d2743c 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
 ; RUN:   grep fcmpes
 
 define void @test3(float* %glob, i32 %X) {
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 095157b592bf..e2d8ddc63fcf 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,13 +1,13 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #0} | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #255$} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \
+; RUN: llc < %s -march=arm -asm-verbose | \
 ; RUN:   grep {mov r0.*256} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {orr.*256} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep {cmp r0, #1, 16} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {orr.*256} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
+; RUN: llc < %s -march=arm | grep {cmp r0, #1, 16} | count 1
 
 define i32 @f1() {
         ret i32 0
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 4f4091af4837..0dcf9ddc0bb1 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {bl.\*__ltdf} | count 1
+; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll
index 5caa5b1266da..fb94626ab7dd 100644
--- a/test/CodeGen/ARM/ctors_dtors.ll
+++ b/test/CodeGen/ARM/ctors_dtors.ll
@@ -1,15 +1,15 @@
-; RUN: llvm-as <  %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep {\\.mod_init_func}
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep {\\.mod_term_func} 
-; RUN: llvm-as  < %s | llc -mtriple=arm-linux-gnu | \
-; RUN:   grep {\\.section \\.ctors,"aw",.progbits}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | \
-; RUN:   grep {\\.section \\.dtors,"aw",.progbits}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {\\.section \\.init_array,"aw",.init_array}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {\\.section \\.fini_array,"aw",.fini_array}
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN: .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN: .section	__DATA,__mod_term_func,mod_term_funcs
+
+; ELF: .section .ctors,"aw",%progbits
+; ELF: .section .dtors,"aw",%progbits
+
+; GNUEABI: .section .init_array,"aw",%init_array
+; GNUEABI: .section .fini_array,"aw",%fini_array
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
 @llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 1085ec7fa624..2f724e79f104 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep __divsi3  %t
 ; RUN: grep __udivsi3 %t
 ; RUN: grep __modsi3  %t
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index e0cd4e15f4e3..92e2d136af68 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll
index 2e9041c6ecab..dc45ce705f44 100644
--- a/test/CodeGen/ARM/extloadi1.ll
+++ b/test/CodeGen/ARM/extloadi1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 @handler_installed.6144.b = external global i1          ; <i1*> [#uses=1]
 
 define void @__mf_sigusr1_respond() {
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
new file mode 100644
index 000000000000..5690a01d750b
--- /dev/null
+++ b/test/CodeGen/ARM/fabss.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+        %dum = fadd float %a, %b
+	%0 = tail call float @fabsf(float %dum)
+        %dum1 = fadd float %0, %b
+	ret float %dum1
+}
+
+declare float @fabsf(float)
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
new file mode 100644
index 000000000000..a01f868d18b2
--- /dev/null
+++ b/test/CodeGen/ARM/fadds.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fadd float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 66acda9c9b91..bf7c305c8959 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep bic | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \
+; RUN: llc < %s -march=arm | grep bic | count 2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
 ; RUN:   grep fneg | count 2
 
 define float @test1(float %x, double %y) {
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
new file mode 100644
index 000000000000..2af250d121d1
--- /dev/null
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fdiv float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index 777a3d69a191..ebf1d84536e3 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fstd
+; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
new file mode 100644
index 000000000000..1a1cd0747b49
--- /dev/null
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fadd float %acc, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll
index 315e6238732f..eb72faf8d811 100644
--- a/test/CodeGen/ARM/fmdrr-fmrrd.ll
+++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmdrr
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmrrd
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
 
 ; naive codegen for this is:
 ; _i:
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
new file mode 100644
index 000000000000..c6e6d4060402
--- /dev/null
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float %0, %acc
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
new file mode 100644
index 000000000000..cb5dadeb2104
--- /dev/null
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
new file mode 100644
index 000000000000..7da443dd93f5
--- /dev/null
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+
+define float @test1(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+
+define float @test2(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
new file mode 100644
index 000000000000..e57bbbba3b38
--- /dev/null
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float %acc, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
new file mode 100644
index 000000000000..3ae437d69db1
--- /dev/null
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %acc, float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+	%1 = fsub float -0.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define float @test2(float %acc, float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+	%1 = fmul float -1.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 7bbda2d76d5d..613b347cdbf2 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fnmuld
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
 
 
 define double @t1(double %a, double %b) {
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll
new file mode 100644
index 000000000000..efd87d2dcb89
--- /dev/null
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float -0.0, %0
+	ret float %1
+}
+
+define float @test2(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fmul float -1.0, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll
index 6d6d108f3283..4ac10badea97 100644
--- a/test/CodeGen/ARM/formal.ll
+++ b/test/CodeGen/ARM/formal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 declare void @bar(i64 %x, i64 %y)
 
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ba199dbf5608..4e4ef722f97e 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -1,55 +1,71 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fmsr %t | count 4
-; RUN: grep fsitos %t
-; RUN: grep fmrs %t | count 2
-; RUN: grep fsitod %t
-; RUN: grep fmrrd %t | count 3
-; RUN: not grep fmdrr %t 
-; RUN: grep fldd %t
-; RUN: grep fuitod %t
-; RUN: grep fuitos %t
-; RUN: grep 1065353216 %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f(i32 %a) {
+;CHECK: f:
+;CHECK: fmsr
+;CHECK-NEXT: fsitos
+;CHECK-NEXT: fmrs
 entry:
         %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
 }
 
 define double @g(i32 %a) {
+;CHECK: g:
+;CHECK: fmsr
+;CHECK-NEXT: fsitod
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define double @uint_to_double(i32 %a) {
+;CHECK: uint_to_double:
+;CHECK: fmsr
+;CHECK-NEXT: fuitod
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define float @uint_to_float(i32 %a) {
+;CHECK: uint_to_float:
+;CHECK: fmsr
+;CHECK-NEXT: fuitos
+;CHECK-NEXT: fmrs
 entry:
         %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
 }
 
 define double @h(double* %v) {
+;CHECK: h:
+;CHECK: fldd
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define float @h2() {
+;CHECK: h2:
+;CHECK: 1065353216
 entry:
         ret float 1.000000e+00
 }
 
 define double @f2(double %a) {
+;CHECK: f2:
+;CHECK-NOT: fmdrr
         ret double %a
 }
 
 define void @f3() {
+;CHECK: f3:
+;CHECK-NOT: fmdrr
+;CHECK: f4
 entry:
         %tmp = call double @f5( )               ; <double> [#uses=1]
         call void @f4( double %tmp )
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
new file mode 100644
index 000000000000..9ce2ac549b57
--- /dev/null
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+
+define i32 @test1(float %a, float %b) {
+; VFP2: test1:
+; VFP2: ftosizs s0, s0
+; NEON: test1:
+; NEON: vcvt.s32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptosi float %0 to i32
+	ret i32 %1
+}
+
+define i32 @test2(float %a, float %b) {
+; VFP2: test2:
+; VFP2: ftouizs s0, s0
+; NEON: test2:
+; NEON: vcvt.u32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptoui float %0 to i32
+	ret i32 %1
+}
+
+define float @test3(i32 %a, i32 %b) {
+; VFP2: test3:
+; VFP2: fuitos s0, s0
+; NEON: test3:
+; NEON: vcvt.f32.u32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = uitofp i32 %0 to float
+	ret float %1
+}
+
+define float @test4(i32 %a, i32 %b) {
+; VFP2: test4:
+; VFP2: fsitos s0, s0
+; NEON: test4:
+; NEON: vcvt.f32.s32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = sitofp i32 %0 to float
+	ret float %1
+}
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index 568a6c41a0dd..ebeeb184121b 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,74 +1,88 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fadds %t
-; RUN: grep faddd %t
-; RUN: grep fmuls %t
-; RUN: grep fmuld %t
-; RUN: grep eor %t
-; RUN: grep fnegd %t
-; RUN: grep fdivs %t
-; RUN: grep fdivd %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a, float %b) {
+;CHECK: f1:
+;CHECK: fadds
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f2(double %a, double %b) {
+;CHECK: f2:
+;CHECK: faddd
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f3(float %a, float %b) {
+;CHECK: f3:
+;CHECK: fmuls
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f4(double %a, double %b) {
+;CHECK: f4:
+;CHECK: fmuld
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f5(float %a, float %b) {
+;CHECK: f5:
+;CHECK: fsubs
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f6(double %a, double %b) {
+;CHECK: f6:
+;CHECK: fsubd
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f7(float %a) {
+;CHECK: f7:
+;CHECK: eor
 entry:
 	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f8(double %a) {
+;CHECK: f8:
+;CHECK: fnegd
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define float @f9(float %a, float %b) {
+;CHECK: f9:
+;CHECK: fdivs
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f10(double %a, double %b) {
+;CHECK: f10:
+;CHECK: fdivd
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define float @f11(float %a) {
+;CHECK: f11:
+;CHECK: bic
 entry:
 	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]
 	ret float %tmp1
@@ -77,6 +91,8 @@ entry:
 declare float @fabsf(float)
 
 define double @f12(double %a) {
+;CHECK: f12:
+;CHECK: fabsd
 entry:
 	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
 	ret double %tmp1
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index ce0f4029589d..2c9591ca5429 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -1,13 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep movmi %t
-; RUN: grep moveq %t
-; RUN: grep movgt %t
-; RUN: grep movge %t
-; RUN: grep movne %t
-; RUN: grep fcmped %t | count 1
-; RUN: grep fcmpes %t | count 6
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define i32 @f1(float %a) {
+;CHECK: f1:
+;CHECK: fcmpes
+;CHECK: movmi
 entry:
         %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp1 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -15,6 +11,9 @@ entry:
 }
 
 define i32 @f2(float %a) {
+;CHECK: f2:
+;CHECK: fcmpes
+;CHECK: moveq
 entry:
         %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp2 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -22,6 +21,9 @@ entry:
 }
 
 define i32 @f3(float %a) {
+;CHECK: f3:
+;CHECK: fcmpes
+;CHECK: movgt
 entry:
         %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp3 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -29,6 +31,9 @@ entry:
 }
 
 define i32 @f4(float %a) {
+;CHECK: f4:
+;CHECK: fcmpes
+;CHECK: movge
 entry:
         %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp4 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -36,6 +41,9 @@ entry:
 }
 
 define i32 @f5(float %a) {
+;CHECK: f5:
+;CHECK: fcmpes
+;CHECK: movls
 entry:
         %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp5 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -43,6 +51,9 @@ entry:
 }
 
 define i32 @f6(float %a) {
+;CHECK: f6:
+;CHECK: fcmpes
+;CHECK: movne
 entry:
         %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp6 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -50,6 +61,9 @@ entry:
 }
 
 define i32 @g1(double %a) {
+;CHECK: g1:
+;CHECK: fcmped
+;CHECK: movmi
 entry:
         %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
         %tmp7 = zext i1 %tmp to i32              ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 3e749afb400c..67f70e9eb5ed 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep moveq 
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep movvs
+; RUN: llc < %s -march=arm | grep moveq 
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs
 
 define i32 @f7(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index 218b25f9c1b1..ee3c338e3b30 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -1,81 +1,101 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fcvtsd %t
-; RUN: grep fcvtds %t
-; RUN: grep ftosizs %t
-; RUN: grep ftouizs %t
-; RUN: grep ftosizd %t
-; RUN: grep ftouizd %t
-; RUN: grep fsitos %t
-; RUN: grep fsitod %t
-; RUN: grep fuitos %t
-; RUN: grep fuitod %t
-; RUN: llvm-as < %s | llc -march=arm > %t
-; RUN: grep truncdfsf2 %t
-; RUN: grep extendsfdf2 %t
-; RUN: grep fixsfsi %t
-; RUN: grep fixunssfsi %t
-; RUN: grep fixdfsi %t
-; RUN: grep fixunsdfsi %t
-; RUN: grep floatsisf %t
-; RUN: grep floatsidf %t
-; RUN: grep floatunsisf %t
-; RUN: grep floatunsidf %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define float @f1(double %x) {
+;CHECK-VFP: f1:
+;CHECK-VFP: fcvtsd
+;CHECK: f1:
+;CHECK: truncdfsf2
 entry:
 	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f2(float %x) {
+;CHECK-VFP: f2:
+;CHECK-VFP: fcvtds
+;CHECK: f2:
+;CHECK: extendsfdf2
 entry:
 	%tmp1 = fpext float %x to double		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define i32 @f3(float %x) {
+;CHECK-VFP: f3:
+;CHECK-VFP: ftosizs
+;CHECK: f3:
+;CHECK: fixsfsi
 entry:
 	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f4(float %x) {
+;CHECK-VFP: f4:
+;CHECK-VFP: ftouizs
+;CHECK: f4:
+;CHECK: fixunssfsi
 entry:
 	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f5(double %x) {
+;CHECK-VFP: f5:
+;CHECK-VFP: ftosizd
+;CHECK: f5:
+;CHECK: fixdfsi
 entry:
 	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f6(double %x) {
+;CHECK-VFP: f6:
+;CHECK-VFP: ftouizd
+;CHECK: f6:
+;CHECK: fixunsdfsi
 entry:
 	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define float @f7(i32 %a) {
+;CHECK-VFP: f7:
+;CHECK-VFP: fsitos
+;CHECK: f7:
+;CHECK: floatsisf
 entry:
 	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f8(i32 %a) {
+;CHECK-VFP: f8:
+;CHECK-VFP: fsitod
+;CHECK: f8:
+;CHECK: floatsidf
 entry:
 	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f9(i32 %a) {
+;CHECK-VFP: f9:
+;CHECK-VFP: fuitos
+;CHECK: f9:
+;CHECK: floatunsisf
 entry:
 	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f10(i32 %a) {
+;CHECK-VFP: f10:
+;CHECK-VFP: fuitod
+;CHECK: f10:
+;CHECK: floatunsidf
 entry:
 	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
 	ret double %tmp
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 13653bbe6aa0..fa897bf83f3a 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #0} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+vfp2 | \
 ; RUN:   grep {flds.*\\\[} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+vfp2 | \
 ; RUN:   grep {fsts.*\\\[} | count 1
 
 define float @f1(float %a) {
diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll
index 461a2c966ec4..6d487927ee61 100644
--- a/test/CodeGen/ARM/fpow.ll
+++ b/test/CodeGen/ARM/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll
index ab09ffff6b36..174106bf4faf 100644
--- a/test/CodeGen/ARM/fpowi.ll
+++ b/test/CodeGen/ARM/fpowi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep powidf2
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2
 ; PR1287
 
 ; ModuleID = '<stdin>'
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 41168acc42a5..0d270b0c0568 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
 
 @i = weak global i32 0		; <i32*> [#uses=2]
 @u = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
new file mode 100644
index 000000000000..060dd464f1b8
--- /dev/null
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fsub float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/hardfloat_neon.ll b/test/CodeGen/ARM/hardfloat_neon.ll
new file mode 100644
index 000000000000..4abf04b0a4b6
--- /dev/null
+++ b/test/CodeGen/ARM/hardfloat_neon.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard
+
+define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind {
+        %tmp1 = mul <16 x i8> %A, %B
+        ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) {
+        %tmp = call <16 x i8> @g(<16 x i8> %b)
+        ret <16 x i8> %tmp
+}
+
+declare <16 x i8> @g(<16 x i8>)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 16231da39b7c..ccdc7bf4c140 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu --disable-fp-elim | \
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 3
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll
index 6cf69aa486d5..90f5308d5ff0 100644
--- a/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/test/CodeGen/ARM/hidden-vis-2.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: t:
+; CHECK: ldr
+; CHECK-NEXT: ldr
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll
index 4477f2a441a1..3bd710ae949f 100644
--- a/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/test/CodeGen/ARM/hidden-vis-3.ll
@@ -1,12 +1,15 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 6
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep long | count 4
+; RUN: llc < %s -mtriple=arm-apple-darwin9   | FileCheck %s
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: LCPI1_0:
+; CHECK-NEXT: .long _x
+; CHECK: LCPI1_1:
+; CHECK-NEXT: .long _y
+
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	%1 = load i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/hidden-vis.ll b/test/CodeGen/ARM/hidden-vis.ll
index 93f81ecdae05..3544ae81a0a4 100644
--- a/test/CodeGen/ARM/hidden-vis.ll
+++ b/test/CodeGen/ARM/hidden-vis.ll
@@ -1,18 +1,23 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep .private_extern | count 2
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
-%struct.Person = type { i32 }
 @a = hidden global i32 0
 @b = external global i32
 
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
 
-define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) {
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
   ret void
 }
 
-declare void @function(i32)
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
 
-define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) {
+; DARWIN: t2:
+; DARWIN: .private_extern _a
   ret void
 }
-
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index ede6d7455330..1054f27dbe30 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -stats |& \
+; RUN: llc < %s -march=arm -stats |& \
 ; RUN:   grep {3 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as: ARM:
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index 7d429550b3ad..e6aa044564a2 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep bx | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bx | count 1
 
 define i32 @t1(i32 %a, i32 %b) {
 	%tmp2 = icmp eq i32 %a, 0
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index 394206121218..ce57d736c167 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep bxlt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bxgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bxge | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bxlt | count 1
+; RUN: llc < %s -march=arm | grep bxgt | count 1
+; RUN: llc < %s -march=arm | grep bxge | count 1
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	%tmp2 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 620bcbea1f27..f7ebac6f2bac 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep cmpne | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bx | count 2
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep cmpne | count 1
+; RUN: llc < %s -march=arm | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	switch i32 %c, label %cond_next [
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index ce5a679196c5..f28c61b9787f 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep subgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep suble | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep subgt | count 1
+; RUN: llc < %s -march=arm | grep suble | count 1
 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
 
 define i32 @t(i32 %a, i32 %b) {
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index f8d4f82bbe28..e9145ac36ddf 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep blge | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
@@ -11,6 +10,8 @@ entry:
 }
 
 define void @t1(i32 %a, i32 %b) {
+; CHECK: t1:
+; CHECK: ldmltfd sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 63c4a0819dbf..58241157580c 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,10 +1,6 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmhi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index 6bb4b5609a58..f9cf88f7292e 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -1,13 +1,8 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpeq | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 85bd8c7bf1fc..6cb8e7bb69fd 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,7 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmne | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll
index bbd2f2ed6213..05bdc459c83f 100644
--- a/test/CodeGen/ARM/ifcvt9.ll
+++ b/test/CodeGen/ARM/ifcvt9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define fastcc void @t() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index ad24eb5dad71..febe6f56b66c 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux
 
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll
index 998adbae5c94..6f25f9dcb323 100644
--- a/test/CodeGen/ARM/imm.ll
+++ b/test/CodeGen/ARM/imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep CPI
+; RUN: llc < %s -march=arm | not grep CPI
 
 define i32 @test1(i32 %A) {
         %B = add i32 %A, -268435441             ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll
index 2ceceae0d9d1..45dfcf0b82a5 100644
--- a/test/CodeGen/ARM/inlineasm-imm-arm.ll
+++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 ; Test ARM-mode "I" constraint, for any Data Processing immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll
index 2f7332a5f480..d522348ba999 100644
--- a/test/CodeGen/ARM/inlineasm.ll
+++ b/test/CodeGen/ARM/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 define i32 @test1(i32 %tmp54) {
 	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll
index 69394eb5bd49..a99bccf5a654 100644
--- a/test/CodeGen/ARM/inlineasm2.ll
+++ b/test/CodeGen/ARM/inlineasm2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @__ieee754_sqrt(double %x) {
 	%tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index f20344301e99..59f0d538d47c 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 7e8eb42b690f..5116ac82862a 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {mov r0, r0, lsr #31}
+; RUN: llc < %s -march=arm | grep {mov r0, r0, lsr #31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
index b1738a4a38a6..ddf0f0ec7cc0 100644
--- a/test/CodeGen/ARM/large-stack.ll
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 6a054577fc8b..774b3c09bed4 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep ldmia | count 2
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep ldmib | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep {ldmfd sp\!} | count 3
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index ea9965572319..954fb5b8ad31 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {ldr r0} | count 7
-; RUN: llvm-as < %s | llc -march=arm | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=arm | not grep mvn
-; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsl
-; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsr
+; RUN: llc < %s -march=arm | grep {ldr r0} | count 7
+; RUN: llc < %s -march=arm | grep mov | grep 1
+; RUN: llc < %s -march=arm | not grep mvn
+; RUN: llc < %s -march=arm | grep ldr | grep lsl
+; RUN: llc < %s -march=arm | grep ldr | grep lsr
 
 define i32 @f1(i32* %v) {
 entry:
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
index b99c72197740..d29eb022bace 100644
--- a/test/CodeGen/ARM/ldr_ext.ll
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -1,27 +1,36 @@
-; RUN: llvm-as < %s | llc -march=arm | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsh | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
-define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+define i32 @test1(i8* %t1) nounwind {
+; CHECK: ldrb
+    %tmp.u = load i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+define i32 @test2(i16* %t1) nounwind {
+; CHECK: ldrh
+    %tmp.u = load i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+define i32 @test3(i8* %t0) nounwind {
+; CHECK: ldrsb
+    %tmp.s = load i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test4() {
+define i32 @test4(i16* %t0) nounwind {
+; CHECK: ldrsh
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; CHECK: mov r0, #0
+; CHECK: ldrsh
     %tmp.s = load i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index 44315066c4c0..a3abdb603fa7 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep mov
+; RUN: llc < %s -march=arm | not grep mov
 
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index 0491563fc6a6..97a48e1377e5 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {ldr.*\\\[.*\],} | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index 7e447422361e..7c442845682e 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {ldr.*\\!} | count 2
 
 define i32* @test1(i32* %X, i32* %dest) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index f1bee058a0fc..8f7ae55c6eaf 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,12 +1,20 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | grep ldrd
-; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin | not grep ldrd
-; RUN: llvm-as < %s | llc -mtriple=armv6-eabi | not grep ldrd
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
 ; rdar://r6949835
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
+;V6:      ldrd r2, [r2]
+
+;V5:      ldr r3, [r2]
+;V5-NEXT: ldr r2, [r2, #+4]
+
+;EABI:      ldr r3, [r2]
+;EABI-NEXT: ldr r2, [r2, #+4]
+
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 8896ead5a51c..56a4a477f510 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -1,14 +1,10 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=static | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | \
 ; RUN:   not grep {L_G\$non_lazy_ptr}
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \
 ; RUN:   grep {L_G\$non_lazy_ptr} | count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
 ; RUN:   grep {ldr.*pc} | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-linux-gnueabi -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | \
 ; RUN:   grep {GOT} | count 1
 
 @G = external global i32
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index 05097328102c..253b0e145f81 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep ldrsb %t
 ; RUN: grep ldrb %t
 ; RUN: grep ldrsh %t
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
index 4bab330c7360..c76a5e4d4d1f 100644
--- a/test/CodeGen/ARM/long-setcc.ll
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep cmp | count 1
+; RUN: llc < %s -march=arm | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index fe0ee5473305..2fcaac0d9c98 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -1,13 +1,13 @@
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \
+; RUN: llc < %s -march=arm -asm-verbose | \
 ; RUN:   grep -- {-2147483648} | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep adds | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep adc | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep {subs } | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | grep mvn | count 3
+; RUN: llc < %s -march=arm | grep adds | count 1
+; RUN: llc < %s -march=arm | grep adc | count 1
+; RUN: llc < %s -march=arm | grep {subs } | count 1
+; RUN: llc < %s -march=arm | grep sbc | count 1
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep smull | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep umull | count 1
 
 define i64 @f1() {
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 55d0cdc54151..057b5f067f80 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep rrx %t | count 1
 ; RUN: grep __ashldi3 %t
 ; RUN: grep __ashrdi3 %t
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 3881e91453b4..507ec2c7bd3e 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -stats |& grep {39.*Number of machine instrs printed}
-; RUN: llvm-as < %s | llc -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
 ; the constant and do a store.  We do *not* want something like this:
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 02902f2debd3..8130019cbfd9 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep lsl | grep -F {lsl #2\]}
+; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll
index e98316576d8a..f46c7a5857ab 100644
--- a/test/CodeGen/ARM/mem.ll
+++ b/test/CodeGen/ARM/mem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep strb
-; RUN: llvm-as < %s | llc -march=arm | grep strh
+; RUN: llc < %s -march=arm | grep strb
+; RUN: llc < %s -march=arm | grep strh
 
 define void @f1() {
 entry:
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 4bf0b4f6f3b1..ed20c32dc0d5 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldmia
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep stmia
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrb
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrh
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
 
 	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 @src = external global %struct.x
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 0b58bf680157..41d5944cb83e 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @f() {
 entry:
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
new file mode 100644
index 000000000000..85407fa254b0
--- /dev/null
+++ b/test/CodeGen/ARM/mls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll
index 3543b5de55db..466a8020acce 100644
--- a/test/CodeGen/ARM/mul.ll
+++ b/test/CodeGen/ARM/mul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mul | count 2
-; RUN: llvm-as < %s | llc -march=arm | grep lsl | count 2
+; RUN: llc < %s -march=arm | grep mul | count 2
+; RUN: llc < %s -march=arm | grep lsl | count 2
 
 define i32 @f1(i32 %u) {
     %tmp = mul i32 %u, %u
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
new file mode 100644
index 000000000000..93188cdd883f
--- /dev/null
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll
index de75e96b8704..148f291e551d 100644
--- a/test/CodeGen/ARM/mulhi.ll
+++ b/test/CodeGen/ARM/mulhi.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep smmul | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep umull | count 1
+; RUN: llc < %s -march=arm | grep umull | count 1
 
 define i32 @smulhi(i32 %x, i32 %y) {
         %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
index a7ef907033de..571c21a833ec 100644
--- a/test/CodeGen/ARM/mvn.ll
+++ b/test/CodeGen/ARM/mvn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 8
+; RUN: llc < %s -march=arm | grep mvn | count 8
 
 define i32 @f1() {
 entry:
diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll
index 18b516fc1a8c..58927374177a 100644
--- a/test/CodeGen/ARM/neon_arith1.ll
+++ b/test/CodeGen/ARM/neon_arith1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vadd
+; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
 
 define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 8901ba177dac..2796dec5b970 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fldd | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fstd
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep fstd
+; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd
 
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index a26904afca30..547bab76356b 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd  | count 2
 
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 151beac3efce..1e2e7aa0c8ff 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep pkhbt | count 5
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep pkhtb | count 4
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/ARM/pr3502.ll b/test/CodeGen/ARM/pr3502.ll
index dee3fc43f973..606d9698b977 100644
--- a/test/CodeGen/ARM/pr3502.ll
+++ b/test/CodeGen/ARM/pr3502.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-none-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi
 ;pr3502
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index e5eeccb356a5..03376a4c61b7 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi > %t
+; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: egrep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index 454d36b46f29..ba9699efd597 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin 
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2
+; RUN: llc < %s -mtriple=arm-apple-darwin 
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 4
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.LOCBOX = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll
index 792b1690add2..5c312eb98a32 100644
--- a/test/CodeGen/ARM/ret0.ll
+++ b/test/CodeGen/ARM/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test() {
         ret i32 0
diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll
index 48a1fda35b33..1ab947b1e20d 100644
--- a/test/CodeGen/ARM/ret_arg1.ll
+++ b/test/CodeGen/ARM/ret_arg1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1) {
         ret i32 %a1
diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll
index a74870f85870..84477d042c74 100644
--- a/test/CodeGen/ARM/ret_arg2.ll
+++ b/test/CodeGen/ARM/ret_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2) {
         ret i32 %a2
diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll
index 9210e7b09f58..f7f9057432d1 100644
--- a/test/CodeGen/ARM/ret_arg3.ll
+++ b/test/CodeGen/ARM/ret_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 define i32 @test(i32 %a1, i32 %a2, i32 %a3) {
         ret i32 %a3
 }
diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll
index a9c66e9e98d1..f7b3e4a282b2 100644
--- a/test/CodeGen/ARM/ret_arg4.ll
+++ b/test/CodeGen/ARM/ret_arg4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
         ret i32 %a4
diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll
index 620a0175e072..c4f9fb5e0a9b 100644
--- a/test/CodeGen/ARM/ret_arg5.ll
+++ b/test/CodeGen/ARM/ret_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
         ret i32 %a5
diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll
index 287d92b9eb6e..2bafea675531 100644
--- a/test/CodeGen/ARM/ret_f32_arg2.ll
+++ b/test/CodeGen/ARM/ret_f32_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define float @test_f32(float %a1, float %a2) {
         ret float %a2
diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll
index 3418be93e1e8..c6ce60ecb9c8 100644
--- a/test/CodeGen/ARM/ret_f32_arg5.ll
+++ b/test/CodeGen/ARM/ret_f32_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
         ret float %a5
diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll
index 66848d5fb49b..386e85f4b9a5 100644
--- a/test/CodeGen/ARM/ret_f64_arg2.ll
+++ b/test/CodeGen/ARM/ret_f64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_f64(double %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
index 626ee6fb1374..bdb0a606227b 100644
--- a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mcpu=arm8 -mattr=+vfp2
+; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
 
 define double @test_double_arg_reg_split(i32 %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll
index b03b604beee7..4f841a3cde7b 100644
--- a/test/CodeGen/ARM/ret_f64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
         ret double %a3
diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll
index ba3ec7fb7517..21443177d3de 100644
--- a/test/CodeGen/ARM/ret_f64_arg_stack.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
         ret double %a4
diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll
index 0fe98e6b70fc..908c34f8cda6 100644
--- a/test/CodeGen/ARM/ret_i128_arg2.ll
+++ b/test/CodeGen/ARM/ret_i128_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
         ret i128 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll
index b015a96e0bf0..b1a1024acaf1 100644
--- a/test/CodeGen/ARM/ret_i64_arg2.ll
+++ b/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64(i64 %a1, i64 %a2) {
         ret i64 %a2
diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll
index 5dfecca319a1..ffc1d2f4b52a 100644
--- a/test/CodeGen/ARM/ret_i64_arg3.ll
+++ b/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll
index 5bd5cb2a230b..956bce558fc5 100644
--- a/test/CodeGen/ARM/ret_i64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_i64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll
index 68db8c423461..2b7ae0562884 100644
--- a/test/CodeGen/ARM/ret_void.ll
+++ b/test/CodeGen/ARM/ret_void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @test() {
         ret void
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 68f6264e8a06..1c12268ef86c 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep rev16
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep revsh
+; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16
+; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh
 
 define i32 @test1(i32 %X) {
         %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=3]
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
new file mode 100644
index 000000000000..923f52a86862
--- /dev/null
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+entry:
+; CHECK: f1:
+; CHECK: sbfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = ashr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a) {
+entry:
+; CHECK: f2:
+; CHECK: ubfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = lshr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a) {
+entry:
+; CHECK: f3:
+; CHECK: sbfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = ashr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a) {
+entry:
+; CHECK: f4:
+; CHECK: ubfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = lshr i32 %tmp, 29
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/section.ll b/test/CodeGen/ARM/section.ll
index aa658451675b..7a566d49d322 100644
--- a/test/CodeGen/ARM/section.ll
+++ b/test/CodeGen/ARM/section.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux | \
+; RUN: llc < %s -mtriple=arm-linux | \
 ; RUN:   grep {__DTOR_END__:}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux | \
+; RUN: llc < %s -mtriple=arm-linux | \
 ; RUN:   grep {\\.section.\\.dtors,"aw",.progbits}
 
 @__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors"       ; <[1 x i32]*> [#uses=0]
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 5148a5b86998..85c8b5b8477f 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,13 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movlt | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep movle | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movls | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movhi | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcpydmi | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
 
 define i32 @f1(i32 %a.s) {
+;CHECK: f1:
+;CHECK: moveq
 entry:
     %tmp = icmp eq i32 %a.s, 4
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -15,6 +11,8 @@ entry:
 }
 
 define i32 @f2(i32 %a.s) {
+;CHECK: f2:
+;CHECK: movgt
 entry:
     %tmp = icmp sgt i32 %a.s, 4
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -22,6 +20,8 @@ entry:
 }
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
+;CHECK: f3:
+;CHECK: movlt
 entry:
     %tmp = icmp slt i32 %a.s, %b.s
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -29,6 +29,8 @@ entry:
 }
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
+;CHECK: f4:
+;CHECK: movle
 entry:
     %tmp = icmp sle i32 %a.s, %b.s
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -36,6 +38,8 @@ entry:
 }
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
+;CHECK: f5:
+;CHECK: movls
 entry:
     %tmp = icmp ule i32 %a.u, %b.u
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -43,6 +47,8 @@ entry:
 }
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
+;CHECK: f6:
+;CHECK: movhi
 entry:
     %tmp = icmp ugt i32 %a.u, %b.u
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -50,6 +56,11 @@ entry:
 }
 
 define double @f7(double %a, double %b) {
+;CHECK: f7:
+;CHECK: movlt
+;CHECK: movlt
+;CHECK-VFP: f7:
+;CHECK-VFP: fcpydmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 6855e3227b99..7fd91ceea5ad 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mov | count 2
+; RUN: llc < %s -march=arm | grep mov | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index cae1c44a729d..2bbe9fd2602c 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep add | grep lsl
-; RUN: llvm-as < %s | llc -march=arm | grep bic | grep asr
+; RUN: llc < %s -march=arm | grep add | grep lsl
+; RUN: llc < %s -march=arm | grep bic | grep asr
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index 7a4e4887cc7b..b7ab2e796f8a 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -mattr=+v5TE
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smulbt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smultt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smlabt | count 1
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
new file mode 100644
index 000000000000..f4b27a7603e5
--- /dev/null
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: vstmia sp
+; CHECK: vldmia sp
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  br i1 undef, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
index c3dd65a594d6..1dd57ddb9f2f 100644
--- a/test/CodeGen/ARM/stack-frame.ll
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep add | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index ed5e4c5f5943..22a7ecb4aa28 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
 
 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
 @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index ba813805bacc..801b9cee37d6 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {str .*\\\[.*\],} | count 1
 
 define i16 @test1(i32* %X, i16* %A) {
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index e9f194574e43..f8d3df29c408 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index c02663fa4040..e56e3f253e63 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {str.*\\!} | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll
index 77c66ec2c7e0..2f1166b64b59 100644
--- a/test/CodeGen/ARM/str_trunc.ll
+++ b/test/CodeGen/ARM/str_trunc.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep strb | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep strh | count 1
 
 define void @test1(i32 %v, i16* %ptr) {
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
index e9f302c88d1c..4752f17f1e1c 100644
--- a/test/CodeGen/ARM/sxt_rot.ll
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtb | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtb | grep ror | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtab | count 1
 
 define i32 @test0(i8 %A) {
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
new file mode 100644
index 000000000000..848a4dfed054
--- /dev/null
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+
+define i32 @f6(i32 %a) {
+; CHECK:f6
+; CHECK: movw r0, #:lower16:65537123
+; CHECK: movt r0, #:upper16:65537123
+    %tmp = add i32 0, 65537123
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
index 6476b483d7d9..3143387ead65 100644
--- a/test/CodeGen/ARM/thread_pointer.ll
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
 
 define i8* @test() {
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index 6866a42db495..1087094e5798 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {i(tpoff)}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index 90e3bcf9040b..328472081e19 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {i(gottpoff)}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {ldr r., \[pc, r.\]}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index df2913b61cda..df7a4ca02db8 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {tbss}
 
 %struct.anon = type { i32, i32 }
diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll
index 6111ec9d2f48..3033c2ba3e25 100644
--- a/test/CodeGen/ARM/trunc_ldr.ll
+++ b/test/CodeGen/ARM/trunc_ldr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep ldrb.*7 | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1
 
 	%struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }
 	%struct.B = type { float, float, i32, i32, i32, [0 x i8] }
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
index 0e85fb69eb3a..2da08b60e86c 100644
--- a/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep orr
-; RUN: llvm-as < %s | llc -march=arm | not grep mov
+; RUN: llc < %s -march=arm | not grep orr
+; RUN: llc < %s -march=arm | not grep mov
 
 define void @bar(i8* %P, i16* %Q) {
 entry:
diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll
index bdeee3fa43fe..c83111e69937 100644
--- a/test/CodeGen/ARM/tst_teq.ll
+++ b/test/CodeGen/ARM/tst_teq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep tst
-; RUN: llvm-as < %s | llc -march=arm | grep teq
+; RUN: llc < %s -march=arm | grep tst
+; RUN: llc < %s -march=arm | grep teq
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll
index 055c3c370ee6..32eb225a2ad6 100644
--- a/test/CodeGen/ARM/uint64tof64.ll
+++ b/test/CodeGen/ARM/uint64tof64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index dad1897463a6..fcaa2b3103e9 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,16 +1,31 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -o %t -f
-; RUN: grep ldrb %t | count 4
-; RUN: grep strb %t | count 4
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s -check-prefix=V7
 
+; rdar://7113725
 
-	%struct.p = type <{ i8, i32 }>
-@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
-@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
-
-define i32 @main() {
+define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=2]
-	store i32 %tmp3, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
-	ret i32 %tmp3
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+; V7: t:
+; V7: ldr r1
+; V7: str r1
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
 }
diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll
index 149afc4abafe..bd28034b3adb 100644
--- a/test/CodeGen/ARM/unord.ll
+++ b/test/CodeGen/ARM/unord.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep movne | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
+; RUN: llc < %s -march=arm | grep movne | count 1
+; RUN: llc < %s -march=arm | grep moveq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll
index 09c74ebbb776..6307795499bf 100644
--- a/test/CodeGen/ARM/uxt_rot.ll
+++ b/test/CodeGen/ARM/uxt_rot.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtb | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtab | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxth | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
 
 define i8 @test1(i32 %A.u) zeroext {
     %B.u = trunc i32 %A.u to i8
diff --git a/test/CodeGen/ARM/uxtb.ll b/test/CodeGen/ARM/uxtb.ll
index 73e918b7a5d3..9d6e4bd4dfce 100644
--- a/test/CodeGen/ARM/uxtb.ll
+++ b/test/CodeGen/ARM/uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | \
+; RUN: llc < %s -mtriple=armv6-apple-darwin | \
 ; RUN:   grep uxt | count 10
 
 define i32 @test1(i32 %x) {
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index 98ee1e155ba8..e2dca4647bce 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vaba\\.s8} %t | count 2
-; RUN: grep {vaba\\.s16} %t | count 2
-; RUN: grep {vaba\\.s32} %t | count 2
-; RUN: grep {vaba\\.u8} %t | count 2
-; RUN: grep {vaba\\.u16} %t | count 2
-; RUN: grep {vaba\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabas8:
+;CHECK: vaba.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -15,6 +11,8 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabas16:
+;CHECK: vaba.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +21,8 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabas32:
+;CHECK: vaba.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -31,6 +31,8 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabau8:
+;CHECK: vaba.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -39,6 +41,8 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabau16:
+;CHECK: vaba.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -47,6 +51,8 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabau32:
+;CHECK: vaba.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -55,6 +61,8 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQs8:
+;CHECK: vaba.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -63,6 +71,8 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQs16:
+;CHECK: vaba.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -71,6 +81,8 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQs32:
+;CHECK: vaba.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -79,6 +91,8 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQu8:
+;CHECK: vaba.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -87,6 +101,8 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQu16:
+;CHECK: vaba.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -95,6 +111,8 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQu32:
+;CHECK: vaba.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -117,3 +135,71 @@ declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) no
 declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 0fe5ddb94ba4..2b4539361459 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -1,13 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vabd\\.s8} %t | count 2
-; RUN: grep {vabd\\.s16} %t | count 2
-; RUN: grep {vabd\\.s32} %t | count 2
-; RUN: grep {vabd\\.u8} %t | count 2
-; RUN: grep {vabd\\.u16} %t | count 2
-; RUN: grep {vabd\\.u32} %t | count 2
-; RUN: grep {vabd\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabds8:
+;CHECK: vabd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -15,6 +10,8 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabds16:
+;CHECK: vabd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -22,6 +19,8 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabds32:
+;CHECK: vabd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -29,6 +28,8 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdu8:
+;CHECK: vabd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -36,6 +37,8 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdu16:
+;CHECK: vabd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -43,6 +46,8 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdu32:
+;CHECK: vabd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -50,13 +55,17 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vabdf32:
+;CHECK: vabd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
 define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQs8:
+;CHECK: vabd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -64,6 +73,8 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQs16:
+;CHECK: vabd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -71,6 +82,8 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQs32:
+;CHECK: vabd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -78,6 +91,8 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQu8:
+;CHECK: vabd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -85,6 +100,8 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQu16:
+;CHECK: vabd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -92,6 +109,8 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQu32:
+;CHECK: vabd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -99,9 +118,11 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vabdQf32:
+;CHECK: vabd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -113,7 +134,7 @@ declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
 declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +144,66 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read
 declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index 629baa762a00..18ba61f81e65 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -1,64 +1,131 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vabs\\.s8} %t | count 2
-; RUN: grep {vabs\\.s16} %t | count 2
-; RUN: grep {vabs\\.s32} %t | count 2
-; RUN: grep {vabs\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
+;CHECK: vabss8:
+;CHECK: vabs.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
+;CHECK: vabss16:
+;CHECK: vabs.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
+;CHECK: vabss32:
+;CHECK: vabs.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
+;CHECK: vabsf32:
+;CHECK: vabs.f32
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vabsQs8:
+;CHECK: vabs.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vabsQs16:
+;CHECK: vabs.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vabsQs32:
+;CHECK: vabs.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
+;CHECK: vabsQf32:
+;CHECK: vabs.f32
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
 
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index b2b0e2397c72..9fa530750aa1 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vadd\\.i8} %t | count 2
-; RUN: grep {vadd\\.i16} %t | count 2
-; RUN: grep {vadd\\.i32} %t | count 2
-; RUN: grep {vadd\\.i64} %t | count 2
-; RUN: grep {vadd\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddi8:
+;CHECK: vadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = add <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddi16:
+;CHECK: vadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = add <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddi32:
+;CHECK: vadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = add <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vaddi64:
+;CHECK: vadd.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = add <1 x i64> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vaddf32:
+;CHECK: vadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = add <2 x float> %tmp1, %tmp2
@@ -41,6 +46,8 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vaddQi8:
+;CHECK: vadd.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = add <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddQi16:
+;CHECK: vadd.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = add <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddQi32:
+;CHECK: vadd.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = add <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddQi64:
+;CHECK: vadd.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = add <2 x i64> %tmp1, %tmp2
@@ -69,8 +82,196 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vaddQf32:
+;CHECK: vadd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = add <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
index 4bf79c041922..5f3536cbb9a3 100644
--- a/test/CodeGen/ARM/vargs.ll
+++ b/test/CodeGen/ARM/vargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 @str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00"           ; <[43 x i8]*> [#uses=1]
 
 define i32 @main() {
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index 1f2f05bd6086..e4ef9e3c36c1 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -1,7 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {add sp, sp, #16} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
-; RUN:   grep {add sp, sp, #12} | count 2
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
 
 define i32 @f(i32 %a, ...) {
 entry:
@@ -18,4 +16,8 @@ entry:
 return:		; preds = %entry
 	%retval2 = load i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
+; EABI: add sp, sp, #12
+; EABI: add sp, sp, #16
+; OABI: add sp, sp, #12
+; OABI: add sp, sp, #12
 }
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
new file mode 100644
index 000000000000..e1d23a17b4cb
--- /dev/null
+++ b/test/CodeGen/ARM/vbits.ll
@@ -0,0 +1,507 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = and <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = and <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = and <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = and <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = and <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = and <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = or <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = or <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = or <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = or <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = or <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = or <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = or <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = or <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = or <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = or <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = or <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = or <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+	ret <4 x i32> %tmp5
+}
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index 37ddf4de6d32..9f3bb4e1030c 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vbsl %t | count 8
-; Note: function names do not include "vbsl" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +14,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -25,6 +27,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -36,6 +40,8 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = load <1 x i64>* %C
@@ -47,6 +53,8 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vbsl
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -58,6 +66,8 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vbsl
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -69,6 +79,8 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vbsl
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -80,6 +92,8 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vbsl
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = load <2 x i64>* %C
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index 77f1890d0865..e4787518e731 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -1,61 +1,81 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.i8} %t | count 2
-; RUN: grep {vceq\\.i16} %t | count 2
-; RUN: grep {vceq\\.i32} %t | count 2
-; RUN: grep {vceq\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vceqi8:
+;CHECK: vceq.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp eq <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vceqi16:
+;CHECK: vceq.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp eq <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vceqi32:
+;CHECK: vceq.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp eq <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vceqf32:
+;CHECK: vceq.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp oeq <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vceqQi8:
+;CHECK: vceq.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp eq <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vceqQi16:
+;CHECK: vceq.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp eq <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vceqQi32:
+;CHECK: vceq.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp eq <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vceqQf32:
+;CHECK: vceq.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp oeq <4 x float> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 14c623ea082f..2c161113c113 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -1,106 +1,162 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcge\\.s8} %t | count 2
-; RUN: grep {vcge\\.s16} %t | count 2
-; RUN: grep {vcge\\.s32} %t | count 2
-; RUN: grep {vcge\\.u8} %t | count 2
-; RUN: grep {vcge\\.u16} %t | count 2
-; RUN: grep {vcge\\.u32} %t | count 2
-; RUN: grep {vcge\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcges8:
+;CHECK: vcge.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp sge <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcges16:
+;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sge <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcges32:
+;CHECK: vcge.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp sge <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgeu8:
+;CHECK: vcge.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp uge <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgeu16:
+;CHECK: vcge.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp uge <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgeu32:
+;CHECK: vcge.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp uge <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgef32:
+;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp oge <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQs8:
+;CHECK: vcge.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp sge <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQs16:
+;CHECK: vcge.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp sge <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQs32:
+;CHECK: vcge.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp sge <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQu8:
+;CHECK: vcge.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp uge <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQu16:
+;CHECK: vcge.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp uge <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQu32:
+;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp uge <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgeQf32:
+;CHECK: vcge.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp oge <4 x float> %tmp1, %tmp2
+	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 3f7e55078733..6b11ba5ce693 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -1,106 +1,162 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcgt\\.s8} %t | count 2
-; RUN: grep {vcgt\\.s16} %t | count 2
-; RUN: grep {vcgt\\.s32} %t | count 2
-; RUN: grep {vcgt\\.u8} %t | count 2
-; RUN: grep {vcgt\\.u16} %t | count 2
-; RUN: grep {vcgt\\.u32} %t | count 2
-; RUN: grep {vcgt\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgts8:
+;CHECK: vcgt.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp sgt <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgts16:
+;CHECK: vcgt.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sgt <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgts32:
+;CHECK: vcgt.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp sgt <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgtu8:
+;CHECK: vcgt.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp ugt <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgtu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ugt <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgtu32:
+;CHECK: vcgt.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp ugt <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgtf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ogt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQs8:
+;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp sgt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQs16:
+;CHECK: vcgt.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp sgt <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQs32:
+;CHECK: vcgt.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp sgt <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQu8:
+;CHECK: vcgt.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp ugt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp ugt <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQu32:
+;CHECK: vcgt.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ugt <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgtQf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp ogt <4 x float> %tmp1, %tmp2
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 981716895894..450f90d03dfe 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,13 +1,16 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcnt\\.8} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
@@ -15,3 +18,115 @@ define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 
 declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
 declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
new file mode 100644
index 000000000000..e6733051f269
--- /dev/null
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+
+define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x float> %tmp3
+}
+
+define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %tmp3
+}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index 1cb42bf155cb..f4cc5368d9aa 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,53 +1,140 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcvt\\.s32\\.f32} %t | count 2
-; RUN: grep {vcvt\\.u32\\.f32} %t | count 2
-; RUN: grep {vcvt\\.f32\\.s32} %t | count 2
-; RUN: grep {vcvt\\.f32\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tos32:
+;CHECK: vcvt.s32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
 
 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tou32:
+;CHECK: vcvt.u32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_s32tof32:
+;CHECK: vcvt.f32.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
 
 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_u32tof32:
+;CHECK: vcvt.f32.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
 
 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tos32:
+;CHECK: vcvt.s32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
 
 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tou32:
+;CHECK: vcvt.u32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_s32tof32:
+;CHECK: vcvt.f32.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
 
 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_u32tof32:
+;CHECK: vcvt.f32.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 1c0887a2492d..c9a68cabbc42 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -1,9 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vdup.8 %t | count 4
-; RUN: grep vdup.16 %t | count 4
-; RUN: grep vdup.32 %t | count 8
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK: v_dup8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
 	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
@@ -16,6 +15,8 @@ define <8 x i8> @v_dup8(i8 %A) nounwind {
 }
 
 define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK: v_dup16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
 	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
@@ -24,18 +25,24 @@ define <4 x i16> @v_dup16(i16 %A) nounwind {
 }
 
 define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK: v_dup32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK: v_dupfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK: v_dupQ8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
 	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
@@ -56,6 +63,8 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind {
 }
 
 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK: v_dupQ16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
 	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
@@ -68,6 +77,8 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
 }
 
 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK: v_dupQ32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
 	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
@@ -76,6 +87,8 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
 }
 
 define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK: v_dupQfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
 	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
@@ -86,49 +99,171 @@ define <4 x float> @v_dupQfloat(float %A) nounwind {
 ; Check to make sure it works with shuffles, too.
 
 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK: v_shuffledup8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK: v_shuffledup16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK: v_shuffledup32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK: v_shuffledupfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK: v_shuffledupQ8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK: v_shuffledupQ16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK: v_shuffledupQ32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK: v_shuffledupQfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
 	ret <4 x float> %tmp2
 }
+
+define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupfloat2:
+;CHECK: vdup.32
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupQfloat2:
+;CHECK: vdup.32
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x float> %tmp2
+}
+
+define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %0
+}
+
+define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %0
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
new file mode 100644
index 000000000000..20d953bfb4a0
--- /dev/null
+++ b/test/CodeGen/ARM/vext.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextRd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+	ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+	ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: test_vextd16:
+;CHECK: vext
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i16> %tmp3
+}
+
+define arm_apcscc <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: test_vextq32:
+;CHECK: vext
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i32> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index 58c2068bc8f4..6946d02637ea 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -1,96 +1,139 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.f32} %t | count 1
-; RUN: grep {vcgt\\.f32} %t | count 9
-; RUN: grep {vcge\\.f32} %t | count 5
-; RUN: grep vorr %t | count 4
-; RUN: grep vmvn %t | count 7
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
-; This tests vfcmp operations that do not map directly to NEON instructions.
+; This tests fcmp operations that do not map directly to NEON instructions.
 
 ; une is implemented with VCEQ/VMVN
 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp une <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; olt is implemented with VCGT
 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp olt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ole is implemented with VCGE
 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ole <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; uge is implemented with VCGT/VMVN
 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp uge <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ule is implemented with VCGT/VMVN
 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ule <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ugt is implemented with VCGE/VMVN
 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ugt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ult is implemented with VCGE/VMVN
 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ult <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ueq <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; one is implemented with VCGT/VCGT/VORR
 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp one <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; uno is implemented with VCGT/VCGE/VORR/VMVN
 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp uno <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ord is implemented with VCGT/VCGE/VORR
 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ord <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index f58da4409356..50000e31e112 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,19 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fabs | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fmscs | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcvt | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fuito | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fto.i | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep bmi | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcmpezs | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define void @test(float* %P, double* %D) {
 	%A = load float* %P		; <float> [#uses=1]
@@ -28,16 +13,20 @@ declare float @fabsf(float)
 declare double @fabs(double)
 
 define void @test_abs(float* %P, double* %D) {
+;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
+;CHECK: fabss
 	%b = call float @fabsf( float %a )		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
+;CHECK: fabsd
 	%B = call double @fabs( double %A )		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
 
 define void @test_add(float* %P, double* %D) {
+;CHECK: test_add:
 	%a = load float* %P		; <float> [#uses=2]
 	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
@@ -48,9 +37,12 @@ define void @test_add(float* %P, double* %D) {
 }
 
 define void @test_ext_round(float* %P, double* %D) {
+;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
+;CHECK: fcvtds
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
+;CHECK: fcvtsd
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -58,9 +50,11 @@ define void @test_ext_round(float* %P, double* %D) {
 }
 
 define void @test_fma(float* %P1, float* %P2, float* %P3) {
+;CHECK: test_fma:
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
+;CHECK: fmscs
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
@@ -68,42 +62,55 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 }
 
 define i32 @test_ftoi(float* %P1) {
+;CHECK: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: ftosizs
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_ftou(float* %P1) {
+;CHECK: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: ftouizs
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_dtoi(double* %P1) {
+;CHECK: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: ftosizd
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_dtou(double* %P1) {
+;CHECK: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: ftouizd
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define void @test_utod(double* %P1, i32 %X) {
+;CHECK: test_utod:
+;CHECK: fuitod
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
 }
 
 define void @test_utod2(double* %P1, i8 %X) {
+;CHECK: test_utod2:
+;CHECK: fuitod
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
 }
 
 define void @test_cmp(float* %glob, i32 %X) {
+;CHECK: test_cmp:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=2]
 	%tmp3 = getelementptr float* %glob, i32 2		; <float*> [#uses=1]
@@ -111,6 +118,8 @@ entry:
 	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]
+;CHECK: bmi
+;CHECK-NEXT: bgt
 	br i1 %tmp6, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
@@ -129,8 +138,10 @@ declare i32 @bar(...)
 declare i32 @baz(...)
 
 define void @test_cmpfp0(float* %glob, i32 %X) {
+;CHECK: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
+;CHECK: fcmpezs
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index a361ba2ba97f..f0df7982ef42 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -1,11 +1,10 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmov\\.s8} %t | count 2
-; RUN: grep {vmov\\.s16} %t | count 2
-; RUN: grep {vmov\\.u8} %t | count 2
-; RUN: grep {vmov\\.u16} %t | count 2
-; RUN: grep {vmov\\.32} %t | count 2
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
 
 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
@@ -13,6 +12,8 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
@@ -20,6 +21,8 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 }
 
 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
@@ -27,6 +30,8 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
@@ -35,6 +40,8 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = add <2 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
@@ -42,6 +49,8 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
@@ -49,6 +58,8 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
@@ -56,6 +67,8 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
@@ -63,6 +76,8 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
@@ -71,8 +86,127 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
 	ret i32 %tmp3
 }
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+	ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: fcpys
+;CHECK: fcpys
+entry:
+  %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+  ret <2 x float> %0
+}
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 5e7503dc71cf..379e062838f6 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vhadd\\.s8} %t | count 2
-; RUN: grep {vhadd\\.s16} %t | count 2
-; RUN: grep {vhadd\\.s32} %t | count 2
-; RUN: grep {vhadd\\.u8} %t | count 2
-; RUN: grep {vhadd\\.u16} %t | count 2
-; RUN: grep {vhadd\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -105,3 +123,127 @@ declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind rea
 declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index 32a66e547945..0f0d0279a521 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vhsub\\.s8} %t | count 2
-; RUN: grep {vhsub\\.s16} %t | count 2
-; RUN: grep {vhsub\\.s32} %t | count 2
-; RUN: grep {vhsub\\.u8} %t | count 2
-; RUN: grep {vhsub\\.u16} %t | count 2
-; RUN: grep {vhsub\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
index 86858f929348..2d8cb893bd86 100644
--- a/test/CodeGen/ARM/vicmp.ll
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -1,85 +1,113 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.i8} %t | count 2
-; RUN: grep {vceq\\.i16} %t | count 2
-; RUN: grep {vceq\\.i32} %t | count 2
-; RUN: grep vmvn %t | count 6
-; RUN: grep {vcgt\\.s8} %t | count 1
-; RUN: grep {vcge\\.s16} %t | count 1
-; RUN: grep {vcgt\\.u16} %t | count 1
-; RUN: grep {vcge\\.u32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
-; This tests vicmp operations that do not map directly to NEON instructions.
+; This tests icmp operations that do not map directly to NEON instructions.
 ; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
 ; and less-than-or-equal (le/ule) are implemented by swapping the arguments
 ; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
 ; the other operations.
 
 define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcnei8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp ne <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcnei16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ne <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcnei32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp ne <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcneQi8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp ne <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcneQi16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp ne <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcneQi32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ne <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcltQs8:
+;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp slt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcles16:
+;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sle <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcltu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ult <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcleQu32:
+;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ule <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
new file mode 100644
index 000000000000..f5383aafb2bb
--- /dev/null
+++ b/test/CodeGen/ARM/vld1.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+;CHECK: vld1i8:
+;CHECK: vld1.8
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
+	ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+;CHECK: vld1i16:
+;CHECK: vld1.16
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
+	ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+;CHECK: vld1i32:
+;CHECK: vld1.32
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
+	ret <2 x i32> %tmp1
+}
+
+define <2 x float> @vld1f(float* %A) nounwind {
+;CHECK: vld1f:
+;CHECK: vld1.32
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
+	ret <2 x float> %tmp1
+}
+
+define <1 x i64> @vld1i64(i64* %A) nounwind {
+;CHECK: vld1i64:
+;CHECK: vld1.64
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
+	ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+;CHECK: vld1Qi8:
+;CHECK: vld1.8
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
+	ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+;CHECK: vld1Qi16:
+;CHECK: vld1.16
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
+	ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+;CHECK: vld1Qi32:
+;CHECK: vld1.32
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
+	ret <4 x i32> %tmp1
+}
+
+define <4 x float> @vld1Qf(float* %A) nounwind {
+;CHECK: vld1Qf:
+;CHECK: vld1.32
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
+	ret <4 x float> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+;CHECK: vld1Qi64:
+;CHECK: vld1.64
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
+	ret <2 x i64> %tmp1
+}
+
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
new file mode 100644
index 000000000000..23f7d2ca0cd3
--- /dev/null
+++ b/test/CodeGen/ARM/vld2.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+;CHECK: vld2i8:
+;CHECK: vld2.8
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+;CHECK: vld2i16:
+;CHECK: vld2.16
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+;CHECK: vld2i32:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld2f(float* %A) nounwind {
+;CHECK: vld2f:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld2i64(i64* %A) nounwind {
+;CHECK: vld2i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+;CHECK: vld2Qi8:
+;CHECK: vld2.8
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+;CHECK: vld2Qi16:
+;CHECK: vld2.16
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+;CHECK: vld2Qi32:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld2Qf(float* %A) nounwind {
+;CHECK: vld2Qf:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
new file mode 100644
index 000000000000..207dc6a22e45
--- /dev/null
+++ b/test/CodeGen/ARM/vld3.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3i8(i8* %A) nounwind {
+;CHECK: vld3i8:
+;CHECK: vld3.8
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+;CHECK: vld3i16:
+;CHECK: vld3.16
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+;CHECK: vld3i32:
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld3f(float* %A) nounwind {
+;CHECK: vld3f:
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+;CHECK: vld3i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+;CHECK: vld3Qi8:
+;CHECK: vld3.8
+;CHECK: vld3.8
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld3Qi16(i16* %A) nounwind {
+;CHECK: vld3Qi16:
+;CHECK: vld3.16
+;CHECK: vld3.16
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32(i32* %A) nounwind {
+;CHECK: vld3Qi32:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld3Qf(float* %A) nounwind {
+;CHECK: vld3Qf:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
new file mode 100644
index 000000000000..0624f2977ea4
--- /dev/null
+++ b/test/CodeGen/ARM/vld4.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>,  <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+;CHECK: vld4i8:
+;CHECK: vld4.8
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+;CHECK: vld4i16:
+;CHECK: vld4.16
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+;CHECK: vld4i32:
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld4f(float* %A) nounwind {
+;CHECK: vld4f:
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+;CHECK: vld4i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+;CHECK: vld4Qi8:
+;CHECK: vld4.8
+;CHECK: vld4.8
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16(i16* %A) nounwind {
+;CHECK: vld4Qi16:
+;CHECK: vld4.16
+;CHECK: vld4.16
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld4Qi32(i32* %A) nounwind {
+;CHECK: vld4Qi32:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld4Qf(float* %A) nounwind {
+;CHECK: vld4Qf:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
new file mode 100644
index 000000000000..53881a3f924e
--- /dev/null
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld2lanei8:
+;CHECK: vld2.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld2lanei16:
+;CHECK: vld2.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32:
+;CHECK: vld2.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld2lanef:
+;CHECK: vld2.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld2laneQi16:
+;CHECK: vld2.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld2laneQi32:
+;CHECK: vld2.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld2laneQf:
+;CHECK: vld2.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld3lanei8:
+;CHECK: vld3.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i8> %tmp3, %tmp4
+        %tmp7 = add <8 x i8> %tmp5, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld3lanei16:
+;CHECK: vld3.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i16> %tmp3, %tmp4
+        %tmp7 = add <4 x i16> %tmp5, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld3lanei32:
+;CHECK: vld3.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x i32> %tmp3, %tmp4
+        %tmp7 = add <2 x i32> %tmp5, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld3lanef:
+;CHECK: vld3.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x float> %tmp3, %tmp4
+        %tmp7 = add <2 x float> %tmp5, %tmp6
+	ret <2 x float> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld3laneQi16:
+;CHECK: vld3.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i16> %tmp3, %tmp4
+        %tmp7 = add <8 x i16> %tmp5, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld3laneQi32:
+;CHECK: vld3.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i32> %tmp3, %tmp4
+        %tmp7 = add <4 x i32> %tmp5, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld3laneQf:
+;CHECK: vld3.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x float> %tmp3, %tmp4
+        %tmp7 = add <4 x float> %tmp5, %tmp6
+	ret <4 x float> %tmp7
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8:
+;CHECK: vld4.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i8> %tmp3, %tmp4
+        %tmp8 = add <8 x i8> %tmp5, %tmp6
+        %tmp9 = add <8 x i8> %tmp7, %tmp8
+	ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld4lanei16:
+;CHECK: vld4.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i16> %tmp3, %tmp4
+        %tmp8 = add <4 x i16> %tmp5, %tmp6
+        %tmp9 = add <4 x i16> %tmp7, %tmp8
+	ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld4lanei32:
+;CHECK: vld4.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x i32> %tmp3, %tmp4
+        %tmp8 = add <2 x i32> %tmp5, %tmp6
+        %tmp9 = add <2 x i32> %tmp7, %tmp8
+	ret <2 x i32> %tmp9
+}
+
+define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld4lanef:
+;CHECK: vld4.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x float> %tmp3, %tmp4
+        %tmp8 = add <2 x float> %tmp5, %tmp6
+        %tmp9 = add <2 x float> %tmp7, %tmp8
+	ret <2 x float> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld4laneQi16:
+;CHECK: vld4.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i16> %tmp3, %tmp4
+        %tmp8 = add <8 x i16> %tmp5, %tmp6
+        %tmp9 = add <8 x i16> %tmp7, %tmp8
+	ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld4laneQi32:
+;CHECK: vld4.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i32> %tmp3, %tmp4
+        %tmp8 = add <4 x i32> %tmp5, %tmp6
+        %tmp9 = add <4 x i32> %tmp7, %tmp8
+	ret <4 x i32> %tmp9
+}
+
+define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld4laneQf:
+;CHECK: vld4.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x float> %tmp3, %tmp4
+        %tmp8 = add <4 x float> %tmp5, %tmp6
+        %tmp9 = add <4 x float> %tmp7, %tmp8
+	ret <4 x float> %tmp9
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
new file mode 100644
index 000000000000..e3527c1a4d9b
--- /dev/null
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index ed77e11a7c47..840521827413 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmla\\.i8} %t | count 2
-; RUN: grep {vmla\\.i16} %t | count 2
-; RUN: grep {vmla\\.i32} %t | count 2
-; RUN: grep {vmla\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlai8:
+;CHECK: vmla.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +12,8 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlai16:
+;CHECK: vmla.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +23,8 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlai32:
+;CHECK: vmla.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -32,6 +34,8 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlaf32:
+;CHECK: vmla.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
@@ -41,6 +45,8 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlaQi8:
+;CHECK: vmla.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -50,6 +56,8 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlaQi16:
+;CHECK: vmla.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -59,6 +67,8 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlaQi32:
+;CHECK: vmla.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -68,6 +78,8 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlaQf32:
+;CHECK: vmla.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
@@ -75,3 +87,107 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp5 = add <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index d519b7e70e1e..c89552e6f9ea 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmls\\.i8} %t | count 2
-; RUN: grep {vmls\\.i16} %t | count 2
-; RUN: grep {vmls\\.i32} %t | count 2
-; RUN: grep {vmls\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlsi8:
+;CHECK: vmls.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +12,8 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsi16:
+;CHECK: vmls.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +23,8 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsi32:
+;CHECK: vmls.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -32,6 +34,8 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlsf32:
+;CHECK: vmls.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
@@ -41,6 +45,8 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlsQi8:
+;CHECK: vmls.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -50,6 +56,8 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlsQi16:
+;CHECK: vmls.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -59,6 +67,8 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlsQi32:
+;CHECK: vmls.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -68,6 +78,8 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlsQf32:
+;CHECK: vmls.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
@@ -75,3 +87,107 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp5 = sub <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index af9c8e25989c..ed69f970c611 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -1,101 +1,303 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vmov.i8 %t | count 2
-; RUN: grep vmov.i16 %t | count 4
-; RUN: grep vmov.i32 %t | count 12
-; RUN: grep vmov.i64 %t | count 2
-; Note: function names do not include "vmov" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_movi8() nounwind {
+;CHECK: v_movi8:
+;CHECK: vmov.i8
 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <4 x i16> @v_movi16a() nounwind {
+;CHECK: v_movi16a:
+;CHECK: vmov.i16
 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
 }
 
 ; 0x1000 = 4096
 define <4 x i16> @v_movi16b() nounwind {
+;CHECK: v_movi16b:
+;CHECK: vmov.i16
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <2 x i32> @v_movi32a() nounwind {
+;CHECK: v_movi32a:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 32, i32 32 >
 }
 
 ; 0x2000 = 8192
 define <2 x i32> @v_movi32b() nounwind {
+;CHECK: v_movi32b:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 8192, i32 8192 >
 }
 
 ; 0x200000 = 2097152
 define <2 x i32> @v_movi32c() nounwind {
+;CHECK: v_movi32c:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 2097152, i32 2097152 >
 }
 
 ; 0x20000000 = 536870912
 define <2 x i32> @v_movi32d() nounwind {
+;CHECK: v_movi32d:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 536870912, i32 536870912 >
 }
 
 ; 0x20ff = 8447
 define <2 x i32> @v_movi32e() nounwind {
+;CHECK: v_movi32e:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 ; 0x20ffff = 2162687
 define <2 x i32> @v_movi32f() nounwind {
+;CHECK: v_movi32f:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
 ; 0xff0000ff0000ffff = 18374687574888349695
 define <1 x i64> @v_movi64() nounwind {
+;CHECK: v_movi64:
+;CHECK: vmov.i64
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
 define <16 x i8> @v_movQi8() nounwind {
+;CHECK: v_movQi8:
+;CHECK: vmov.i8
 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <8 x i16> @v_movQi16a() nounwind {
+;CHECK: v_movQi16a:
+;CHECK: vmov.i16
 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 }
 
 ; 0x1000 = 4096
 define <8 x i16> @v_movQi16b() nounwind {
+;CHECK: v_movQi16b:
+;CHECK: vmov.i16
 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i32> @v_movQi32a() nounwind {
+;CHECK: v_movQi32a:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
 }
 
 ; 0x2000 = 8192
 define <4 x i32> @v_movQi32b() nounwind {
+;CHECK: v_movQi32b:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
 }
 
 ; 0x200000 = 2097152
 define <4 x i32> @v_movQi32c() nounwind {
+;CHECK: v_movQi32c:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
 }
 
 ; 0x20000000 = 536870912
 define <4 x i32> @v_movQi32d() nounwind {
+;CHECK: v_movQi32d:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
 }
 
 ; 0x20ff = 8447
 define <4 x i32> @v_movQi32e() nounwind {
+;CHECK: v_movQi32e:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 ; 0x20ffff = 2162687
 define <4 x i32> @v_movQi32f() nounwind {
+;CHECK: v_movQi32f:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 ; 0xff0000ff0000ffff = 18374687574888349695
 define <2 x i64> @v_movQi64() nounwind {
+;CHECK: v_movQi64:
+;CHECK: vmov.i64
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index eb9ae7b95c2d..325da5deabe5 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmul\\.i8} %t | count 2
-; RUN: grep {vmul\\.i16} %t | count 2
-; RUN: grep {vmul\\.i32} %t | count 2
-; RUN: grep {vmul\\.f32} %t | count 2
-; RUN: grep {vmul\\.p8} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmuli8:
+;CHECK: vmul.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = mul <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmuli16:
+;CHECK: vmul.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = mul <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmuli32:
+;CHECK: vmul.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = mul <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmulf32:
+;CHECK: vmul.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = mul <2 x float> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulp8:
+;CHECK: vmul.p8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -41,6 +46,8 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQi8:
+;CHECK: vmul.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = mul <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmulQi16:
+;CHECK: vmul.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = mul <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmulQi32:
+;CHECK: vmul.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = mul <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmulQf32:
+;CHECK: vmul.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = mul <4 x float> %tmp1, %tmp2
@@ -69,6 +82,8 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQp8:
+;CHECK: vmul.p8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,3 +92,166 @@ define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 
 declare <8 x i8>  @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 declare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+  %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+  %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+  %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
+  ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+  %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 9fa527f52fcc..7764e87c6ac6 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -1,53 +1,121 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vneg\\.s8} %t | count 2
-; RUN: grep {vneg\\.s16} %t | count 2
-; RUN: grep {vneg\\.s32} %t | count 2
-; RUN: grep {vneg\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vnegs8:
+;CHECK: vneg.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vnegs16:
+;CHECK: vneg.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vnegs32:
+;CHECK: vneg.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
+;CHECK: vnegf32:
+;CHECK: vneg.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vnegQs8:
+;CHECK: vneg.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vnegQs16:
+;CHECK: vneg.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vnegQs32:
+;CHECK: vneg.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
+;CHECK: vnegQf32:
+;CHECK: vneg.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <4 x float> %tmp2
 }
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
index c41c532988e8..7296e936cd73 100644
--- a/test/CodeGen/ARM/vpadal.ll
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vpadal\\.s8} %t | count 2
-; RUN: grep {vpadal\\.s16} %t | count 2
-; RUN: grep {vpadal\\.s32} %t | count 2
-; RUN: grep {vpadal\\.u8} %t | count 2
-; RUN: grep {vpadal\\.u16} %t | count 2
-; RUN: grep {vpadal\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadals8:
+;CHECK: vpadal.s8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadals16:
+;CHECK: vpadal.s16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadals32:
+;CHECK: vpadal.s32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadalu8:
+;CHECK: vpadal.u8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadalu16:
+;CHECK: vpadal.u16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadalu32:
+;CHECK: vpadal.u32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQs8:
+;CHECK: vpadal.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQs16:
+;CHECK: vpadal.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQs32:
+;CHECK: vpadal.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQu8:
+;CHECK: vpadal.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQu16:
+;CHECK: vpadal.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQu32:
+;CHECK: vpadal.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index baff49227e64..212557394518 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,39 +1,155 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vpadd\\.i8} %t | count 1
-; RUN: grep {vpadd\\.i16} %t | count 1
-; RUN: grep {vpadd\\.i32} %t | count 1
-; RUN: grep {vpadd\\.f32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpaddi8:
+;CHECK: vpadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
 define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpaddi16:
+;CHECK: vpadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
 define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpaddi32:
+;CHECK: vpadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
 define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpaddf32:
+;CHECK: vpadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
new file mode 100644
index 000000000000..b75bcc99f6b6
--- /dev/null
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
index c9e235995360..a1669b60ab56 100644
--- a/test/CodeGen/ARM/vqadd.ll
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqadd\\.s8} %t | count 2
-; RUN: grep {vqadd\\.s16} %t | count 2
-; RUN: grep {vqadd\\.s32} %t | count 2
-; RUN: grep {vqadd\\.s64} %t | count 2
-; RUN: grep {vqadd\\.u8} %t | count 2
-; RUN: grep {vqadd\\.u16} %t | count 2
-; RUN: grep {vqadd\\.u32} %t | count 2
-; RUN: grep {vqadd\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqadds8:
+;CHECK: vqadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -16,6 +10,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqadds16:
+;CHECK: vqadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -23,6 +19,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqadds32:
+;CHECK: vqadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -30,6 +28,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqadds64:
+;CHECK: vqadd.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -37,6 +37,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqaddu8:
+;CHECK: vqadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -44,6 +46,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqaddu16:
+;CHECK: vqadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -51,6 +55,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqaddu32:
+;CHECK: vqadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -58,6 +64,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqaddu64:
+;CHECK: vqadd.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -65,6 +73,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQs8:
+;CHECK: vqadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -72,6 +82,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQs16:
+;CHECK: vqadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -79,6 +91,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQs32:
+;CHECK: vqadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -86,6 +100,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQs64:
+;CHECK: vqadd.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -93,6 +109,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQu8:
+;CHECK: vqadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,6 +118,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQu16:
+;CHECK: vqadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -107,6 +127,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQu32:
+;CHECK: vqadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -114,6 +136,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQu64:
+;CHECK: vqadd.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
new file mode 100644
index 000000000000..8dcc7f73633c
--- /dev/null
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -0,0 +1,281 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
index 60b04bd5830e..e4d29a337cf0 100644
--- a/test/CodeGen/ARM/vqshl.ll
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -1,26 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqshl\\.s8} %t | count 4
-; RUN: grep {vqshl\\.s16} %t | count 4
-; RUN: grep {vqshl\\.s32} %t | count 4
-; RUN: grep {vqshl\\.s64} %t | count 4
-; RUN: grep {vqshl\\.u8} %t | count 4
-; RUN: grep {vqshl\\.u16} %t | count 4
-; RUN: grep {vqshl\\.u32} %t | count 4
-; RUN: grep {vqshl\\.u64} %t | count 4
-; RUN: grep {vqshl\\.s8.*#7} %t | count 2
-; RUN: grep {vqshl\\.s16.*#15} %t | count 2
-; RUN: grep {vqshl\\.s32.*#31} %t | count 2
-; RUN: grep {vqshl\\.s64.*#63} %t | count 2
-; RUN: grep {vqshl\\.u8.*#7} %t | count 2
-; RUN: grep {vqshl\\.u16.*#15} %t | count 2
-; RUN: grep {vqshl\\.u32.*#31} %t | count 2
-; RUN: grep {vqshl\\.u64.*#63} %t | count 2
-; RUN: grep {vqshlu\\.s8} %t | count 2
-; RUN: grep {vqshlu\\.s16} %t | count 2
-; RUN: grep {vqshlu\\.s32} %t | count 2
-; RUN: grep {vqshlu\\.s64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshls8:
+;CHECK: vqshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -28,6 +10,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshls16:
+;CHECK: vqshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -35,6 +19,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshls32:
+;CHECK: vqshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -42,6 +28,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshls64:
+;CHECK: vqshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -49,6 +37,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshlu8:
+;CHECK: vqshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -56,6 +46,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshlu16:
+;CHECK: vqshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -63,6 +55,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshlu32:
+;CHECK: vqshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,6 +64,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshlu64:
+;CHECK: vqshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -77,6 +73,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQs8:
+;CHECK: vqshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -84,6 +82,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQs16:
+;CHECK: vqshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,6 +91,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQs32:
+;CHECK: vqshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -98,6 +100,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQs64:
+;CHECK: vqshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -105,6 +109,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQu8:
+;CHECK: vqshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -112,6 +118,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQu16:
+;CHECK: vqshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -119,6 +127,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQu32:
+;CHECK: vqshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -126,6 +136,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQu64:
+;CHECK: vqshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -133,144 +145,192 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshls_n8:
+;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshls_n16:
+;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshls_n32:
+;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshls_n64:
+;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlu_n8:
+;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlu_n16:
+;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlu_n32:
+;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlu_n64:
+;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlsu_n8:
+;CHECK: vqshlu.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlsu_n16:
+;CHECK: vqshlu.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlsu_n32:
+;CHECK: vqshlu.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlsu_n64:
+;CHECK: vqshlu.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQs_n8:
+;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQs_n16:
+;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQs_n32:
+;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQs_n64:
+;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQu_n8:
+;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQu_n16:
+;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQu_n32:
+;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQu_n64:
+;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQsu_n8:
+;CHECK: vqshlu.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQsu_n16:
+;CHECK: vqshlu.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQsu_n32:
+;CHECK: vqshlu.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQsu_n64:
+;CHECK: vqshlu.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
@@ -305,3 +365,167 @@ declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind
 declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
index 6bd607abb4d0..5da79432bb42 100644
--- a/test/CodeGen/ARM/vqshrn.ll
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -1,63 +1,72 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqshrn\\.s16} %t | count 1
-; RUN: grep {vqshrn\\.s32} %t | count 1
-; RUN: grep {vqshrn\\.s64} %t | count 1
-; RUN: grep {vqshrn\\.u16} %t | count 1
-; RUN: grep {vqshrn\\.u32} %t | count 1
-; RUN: grep {vqshrn\\.u64} %t | count 1
-; RUN: grep {vqshrun\\.s16} %t | count 1
-; RUN: grep {vqshrun\\.s32} %t | count 1
-; RUN: grep {vqshrun\\.s64} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrns8:
+;CHECK: vqshrn.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrns16:
+;CHECK: vqshrn.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrns32:
+;CHECK: vqshrn.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrnu8:
+;CHECK: vqshrn.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrnu16:
+;CHECK: vqshrn.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrnu32:
+;CHECK: vqshrn.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshruns8:
+;CHECK: vqshrun.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshruns16:
+;CHECK: vqshrun.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshruns32:
+;CHECK: vqshrun.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
@@ -74,3 +83,87 @@ declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind
 declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
index 07052f78dbea..4231fca37e37 100644
--- a/test/CodeGen/ARM/vqsub.ll
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqsub\\.s8} %t | count 2
-; RUN: grep {vqsub\\.s16} %t | count 2
-; RUN: grep {vqsub\\.s32} %t | count 2
-; RUN: grep {vqsub\\.s64} %t | count 2
-; RUN: grep {vqsub\\.u8} %t | count 2
-; RUN: grep {vqsub\\.u16} %t | count 2
-; RUN: grep {vqsub\\.u32} %t | count 2
-; RUN: grep {vqsub\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubs8:
+;CHECK: vqsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -16,6 +10,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubs16:
+;CHECK: vqsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -23,6 +19,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubs32:
+;CHECK: vqsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -30,6 +28,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubs64:
+;CHECK: vqsub.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -37,6 +37,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubu8:
+;CHECK: vqsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -44,6 +46,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubu16:
+;CHECK: vqsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -51,6 +55,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubu32:
+;CHECK: vqsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -58,6 +64,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubu64:
+;CHECK: vqsub.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -65,6 +73,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQs8:
+;CHECK: vqsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -72,6 +82,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQs16:
+;CHECK: vqsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -79,6 +91,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQs32:
+;CHECK: vqsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -86,6 +100,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQs64:
+;CHECK: vqsub.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -93,6 +109,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQu8:
+;CHECK: vqsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,6 +118,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQu16:
+;CHECK: vqsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -107,6 +127,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQu32:
+;CHECK: vqsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -114,6 +136,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQu64:
+;CHECK: vqsub.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
new file mode 100644
index 000000000000..99989e9d6144
--- /dev/null
+++ b/test/CodeGen/ARM/vrec.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
new file mode 100644
index 000000000000..f0a04a441645
--- /dev/null
+++ b/test/CodeGen/ARM/vrev.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev64D16:
+;CHECK: vrev64.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+	ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK: test_vrev64D32:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x i32> %tmp2
+}
+
+define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK: test_vrev64Df:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x float> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev64Q8:
+;CHECK: vrev64.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+	ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev64Q16:
+;CHECK: vrev64.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK: test_vrev64Q32:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i32> %tmp2
+}
+
+define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK: test_vrev64Qf:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x float> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev32D8:
+;CHECK: vrev32.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev32D16:
+;CHECK: vrev32.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev32Q8:
+;CHECK: vrev32.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+	ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev16D8:
+;CHECK: vrev16.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev16Q8:
+;CHECK: vrev16.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+	ret <16 x i8> %tmp2
+}
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
index 8c5c4aad18d8..f3cbec7457d0 100644
--- a/test/CodeGen/ARM/vshift.ll
+++ b/test/CodeGen/ARM/vshift.ll
@@ -1,30 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshl\\.s8} %t | count 2
-; RUN: grep {vshl\\.s16} %t | count 2
-; RUN: grep {vshl\\.s32} %t | count 2
-; RUN: grep {vshl\\.s64} %t | count 2
-; RUN: grep {vshl\\.u8} %t | count 4
-; RUN: grep {vshl\\.u16} %t | count 4
-; RUN: grep {vshl\\.u32} %t | count 4
-; RUN: grep {vshl\\.u64} %t | count 4
-; RUN: grep {vshl\\.i8} %t | count 2
-; RUN: grep {vshl\\.i16} %t | count 2
-; RUN: grep {vshl\\.i32} %t | count 2
-; RUN: grep {vshl\\.i64} %t | count 2
-; RUN: grep {vshr\\.u8} %t | count 2
-; RUN: grep {vshr\\.u16} %t | count 2
-; RUN: grep {vshr\\.u32} %t | count 2
-; RUN: grep {vshr\\.u64} %t | count 2
-; RUN: grep {vshr\\.s8} %t | count 2
-; RUN: grep {vshr\\.s16} %t | count 2
-; RUN: grep {vshr\\.s32} %t | count 2
-; RUN: grep {vshr\\.s64} %t | count 2
-; RUN: grep {vneg\\.s8} %t | count 4
-; RUN: grep {vneg\\.s16} %t | count 4
-; RUN: grep {vneg\\.s32} %t | count 4
-; RUN: grep {vsub\\.i64} %t | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = shl <8 x i8> %tmp1, %tmp2
@@ -32,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = shl <4 x i16> %tmp1, %tmp2
@@ -39,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = shl <2 x i32> %tmp1, %tmp2
@@ -46,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = shl <1 x i64> %tmp1, %tmp2
@@ -53,30 +37,40 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = shl <16 x i8> %tmp1, %tmp2
@@ -84,6 +78,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = shl <8 x i16> %tmp1, %tmp2
@@ -91,6 +87,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = shl <4 x i32> %tmp1, %tmp2
@@ -98,6 +96,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = shl <2 x i64> %tmp1, %tmp2
@@ -105,30 +105,41 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vlshru8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
@@ -136,6 +147,9 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vlshru16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
@@ -143,6 +157,9 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vlshru32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
@@ -150,6 +167,9 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vlshru64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
@@ -157,30 +177,41 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
+;CHECK: vlshri8:
+;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
+;CHECK: vlshri16:
+;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
+;CHECK: vlshri32:
+;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
+;CHECK: vlshri64:
+;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vlshrQu8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
@@ -188,6 +219,9 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vlshrQu16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
@@ -195,6 +229,9 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vlshrQu32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
@@ -202,6 +239,9 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vlshrQu64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
@@ -209,30 +249,48 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vlshrQi8:
+;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vlshrQi16:
+;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vlshrQi32:
+;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vlshrQi64:
+;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
 	ret <2 x i64> %tmp2
 }
 
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %shr
+}
+
 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vashrs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
@@ -240,6 +298,9 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vashrs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
@@ -247,6 +308,9 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vashrs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
@@ -254,6 +318,9 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vashrs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
@@ -261,30 +328,41 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
+;CHECK: vashri8:
+;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
+;CHECK: vashri16:
+;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
+;CHECK: vashri32:
+;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
+;CHECK: vashri64:
+;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vashrQs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
@@ -292,6 +370,9 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vashrQs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
@@ -299,6 +380,9 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vashrQs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
@@ -306,6 +390,9 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vashrQs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
@@ -313,24 +400,32 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vashrQi8:
+;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vashrQi16:
+;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vashrQi32:
+;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vashrQi64:
+;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
 	ret <2 x i64> %tmp2
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
index cb7cbb89ecdb..3a4f8574e397 100644
--- a/test/CodeGen/ARM/vshiftins.ll
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsli\\.8} %t | count 2
-; RUN: grep {vsli\\.16} %t | count 2
-; RUN: grep {vsli\\.32} %t | count 2
-; RUN: grep {vsli\\.64} %t | count 2
-; RUN: grep {vsri\\.8} %t | count 2
-; RUN: grep {vsri\\.16} %t | count 2
-; RUN: grep {vsri\\.32} %t | count 2
-; RUN: grep {vsri\\.64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsli8:
+;CHECK: vsli.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -16,6 +10,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsli16:
+;CHECK: vsli.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -23,6 +19,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsli32:
+;CHECK: vsli.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
@@ -30,6 +28,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsli64:
+;CHECK: vsli.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
@@ -37,6 +37,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsliQ8:
+;CHECK: vsli.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -44,6 +46,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsliQ16:
+;CHECK: vsli.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -51,6 +55,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsliQ32:
+;CHECK: vsli.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -58,6 +64,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsliQ64:
+;CHECK: vsli.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
@@ -65,6 +73,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsri8:
+;CHECK: vsri.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -72,6 +82,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsri16:
+;CHECK: vsri.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -79,6 +91,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsri32:
+;CHECK: vsri.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -86,6 +100,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsri64:
+;CHECK: vsri.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -93,6 +109,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsriQ8:
+;CHECK: vsri.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -100,6 +118,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsriQ16:
+;CHECK: vsri.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -107,6 +127,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsriQ32:
+;CHECK: vsri.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -114,6 +136,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsriQ64:
+;CHECK: vsri.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
index 993126ea578c..818e71b8ff89 100644
--- a/test/CodeGen/ARM/vshl.ll
+++ b/test/CodeGen/ARM/vshl.ll
@@ -1,26 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshl\\.s8} %t | count 2
-; RUN: grep {vshl\\.s16} %t | count 2
-; RUN: grep {vshl\\.s32} %t | count 2
-; RUN: grep {vshl\\.s64} %t | count 2
-; RUN: grep {vshl\\.u8} %t | count 2
-; RUN: grep {vshl\\.u16} %t | count 2
-; RUN: grep {vshl\\.u32} %t | count 2
-; RUN: grep {vshl\\.u64} %t | count 2
-; RUN: grep {vshl\\.i8} %t | count 2
-; RUN: grep {vshl\\.i16} %t | count 2
-; RUN: grep {vshl\\.i32} %t | count 2
-; RUN: grep {vshl\\.i64} %t | count 2
-; RUN: grep {vshr\\.s8} %t | count 2
-; RUN: grep {vshr\\.s16} %t | count 2
-; RUN: grep {vshr\\.s32} %t | count 2
-; RUN: grep {vshr\\.s64} %t | count 2
-; RUN: grep {vshr\\.u8} %t | count 2
-; RUN: grep {vshr\\.u16} %t | count 2
-; RUN: grep {vshr\\.u32} %t | count 2
-; RUN: grep {vshr\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -28,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -35,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -42,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -49,6 +37,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshlu8:
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -56,6 +46,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshlu16:
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -63,6 +55,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshlu32:
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,6 +64,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshlu64:
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -77,6 +73,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -84,6 +82,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,6 +91,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -98,6 +100,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -105,6 +109,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQu8:
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -112,6 +118,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQu16:
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -119,6 +127,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQu32:
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -126,6 +136,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQu64:
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -136,48 +148,64 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ; Test a mix of both signed and unsigned intrinsics.
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
@@ -186,96 +214,128 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ; Right shift by immediate:
 
 define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vshrs8:
+;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vshrs16:
+;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vshrs32:
+;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vshrs64:
+;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
+;CHECK: vshru8:
+;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
+;CHECK: vshru16:
+;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
+;CHECK: vshru32:
+;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
+;CHECK: vshru64:
+;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQs8:
+;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQs16:
+;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQs32:
+;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQs64:
+;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQu8:
+;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQu16:
+;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQu32:
+;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQu64:
+;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
@@ -300,3 +360,295 @@ declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind re
 declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
index f81c09a7b9d3..8e85b98f49b1 100644
--- a/test/CodeGen/ARM/vshll.ll
+++ b/test/CodeGen/ARM/vshll.ll
@@ -1,45 +1,48 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshll\\.s8} %t | count 1
-; RUN: grep {vshll\\.s16} %t | count 1
-; RUN: grep {vshll\\.s32} %t | count 1
-; RUN: grep {vshll\\.u8} %t | count 1
-; RUN: grep {vshll\\.u16} %t | count 1
-; RUN: grep {vshll\\.u32} %t | count 1
-; RUN: grep {vshll\\.i8} %t | count 1
-; RUN: grep {vshll\\.i16} %t | count 1
-; RUN: grep {vshll\\.i32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
+;CHECK: vshlls8:
+;CHECK: vshll.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
+;CHECK: vshlls16:
+;CHECK: vshll.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
+;CHECK: vshlls32:
+;CHECK: vshll.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
+;CHECK: vshllu8:
+;CHECK: vshll.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
+;CHECK: vshllu16:
+;CHECK: vshll.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
+;CHECK: vshllu32:
+;CHECK: vshll.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i64> %tmp2
@@ -48,18 +51,24 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ; The following tests use the maximum shift count, so the signedness is
 ; irrelevant.  Test both signed and unsigned versions.
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
+;CHECK: vshlli8:
+;CHECK: vshll.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
+;CHECK: vshlli16:
+;CHECK: vshll.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
+;CHECK: vshlli32:
+;CHECK: vshll.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
 	ret <2 x i64> %tmp2
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
index bc640cbbca61..e2544f424a2c 100644
--- a/test/CodeGen/ARM/vshrn.ll
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -1,21 +1,24 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshrn\\.i16} %t | count 1
-; RUN: grep {vshrn\\.i32} %t | count 1
-; RUN: grep {vshrn\\.i64} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vshrns8:
+;CHECK: vshrn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vshrns16:
+;CHECK: vshrn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vshrns32:
+;CHECK: vshrn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
@@ -24,3 +27,31 @@ define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
 declare <8 x i8>  @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
index e2829dcddae6..acb672d00fa2 100644
--- a/test/CodeGen/ARM/vsra.ll
+++ b/test/CodeGen/ARM/vsra.ll
@@ -1,22 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsra\\.s8} %t | count 2
-; RUN: grep {vsra\\.s16} %t | count 2
-; RUN: grep {vsra\\.s32} %t | count 2
-; RUN: grep {vsra\\.s64} %t | count 2
-; RUN: grep {vsra\\.u8} %t | count 2
-; RUN: grep {vsra\\.u16} %t | count 2
-; RUN: grep {vsra\\.u32} %t | count 2
-; RUN: grep {vsra\\.u64} %t | count 2
-; RUN: grep {vrsra\\.s8} %t | count 2
-; RUN: grep {vrsra\\.s16} %t | count 2
-; RUN: grep {vrsra\\.s32} %t | count 2
-; RUN: grep {vrsra\\.s64} %t | count 2
-; RUN: grep {vrsra\\.u8} %t | count 2
-; RUN: grep {vrsra\\.u16} %t | count 2
-; RUN: grep {vrsra\\.u32} %t | count 2
-; RUN: grep {vrsra\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsras8:
+;CHECK: vsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -25,6 +11,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsras16:
+;CHECK: vsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
@@ -33,6 +21,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsras32:
+;CHECK: vsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
@@ -41,6 +31,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsras64:
+;CHECK: vsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
@@ -49,6 +41,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQs8:
+;CHECK: vsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -57,6 +51,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQs16:
+;CHECK: vsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -65,6 +61,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQs32:
+;CHECK: vsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
@@ -73,6 +71,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQs64:
+;CHECK: vsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
@@ -81,6 +81,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsrau8:
+;CHECK: vsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -89,6 +91,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsrau16:
+;CHECK: vsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
@@ -97,6 +101,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsrau32:
+;CHECK: vsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
@@ -105,6 +111,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsrau64:
+;CHECK: vsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
@@ -113,6 +121,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQu8:
+;CHECK: vsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -121,6 +131,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQu16:
+;CHECK: vsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -129,6 +141,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQu32:
+;CHECK: vsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
@@ -137,6 +151,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQu64:
+;CHECK: vsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
@@ -145,6 +161,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsras8:
+;CHECK: vrsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -153,6 +171,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsras16:
+;CHECK: vrsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -161,6 +181,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsras32:
+;CHECK: vrsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -169,6 +191,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsras64:
+;CHECK: vrsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -177,6 +201,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsrau8:
+;CHECK: vrsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -185,6 +211,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsrau16:
+;CHECK: vrsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -193,6 +221,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsrau32:
+;CHECK: vrsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -201,6 +231,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsrau64:
+;CHECK: vrsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -209,6 +241,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQs8:
+;CHECK: vrsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -217,6 +251,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQs16:
+;CHECK: vrsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -225,6 +261,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQs32:
+;CHECK: vrsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -233,6 +271,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQs64:
+;CHECK: vrsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
@@ -241,6 +281,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQu8:
+;CHECK: vrsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -249,6 +291,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQu16:
+;CHECK: vrsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -257,6 +301,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQu32:
+;CHECK: vrsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -265,6 +311,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQu64:
+;CHECK: vrsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
new file mode 100644
index 000000000000..602b124ffad9
--- /dev/null
+++ b/test/CodeGen/ARM/vst1.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1i8:
+;CHECK: vst1.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1i16:
+;CHECK: vst1.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1i32:
+;CHECK: vst1.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1f:
+;CHECK: vst1.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst1i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1Qi8:
+;CHECK: vst1.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1Qi16:
+;CHECK: vst1.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1Qi32:
+;CHECK: vst1.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1Qf:
+;CHECK: vst1.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
+	ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+;CHECK: vst1Qi64:
+;CHECK: vst1.64
+	%tmp1 = load <2 x i64>* %B
+	call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
new file mode 100644
index 000000000000..17d6bee0f56c
--- /dev/null
+++ b/test/CodeGen/ARM/vst2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2i8:
+;CHECK: vst2.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2i16:
+;CHECK: vst2.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2i32:
+;CHECK: vst2.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2f:
+;CHECK: vst2.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst2Qi8:
+;CHECK: vst2.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2Qi16:
+;CHECK: vst2.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2Qi32:
+;CHECK: vst2.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2Qf:
+;CHECK: vst2.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
new file mode 100644
index 000000000000..a831a0c08ce9
--- /dev/null
+++ b/test/CodeGen/ARM/vst3.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3i8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3i16:
+;CHECK: vst3.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32:
+;CHECK: vst3.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst3f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3f:
+;CHECK: vst3.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst3i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst3Qi8:
+;CHECK: vst3.8
+;CHECK: vst3.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16:
+;CHECK: vst3.16
+;CHECK: vst3.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3Qi32:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3Qf:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
new file mode 100644
index 000000000000..d92c017c30b2
--- /dev/null
+++ b/test/CodeGen/ARM/vst4.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4i8:
+;CHECK: vst4.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4i16:
+;CHECK: vst4.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4i32:
+;CHECK: vst4.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst4f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4f:
+;CHECK: vst4.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst4i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst4Qi8:
+;CHECK: vst4.8
+;CHECK: vst4.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4Qi16:
+;CHECK: vst4.16
+;CHECK: vst4.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4Qi32:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
new file mode 100644
index 000000000000..3bfb14f17b77
--- /dev/null
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -0,0 +1,197 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2lanei8:
+;CHECK: vst2.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2lanei16:
+;CHECK: vst2.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2lanei32:
+;CHECK: vst2.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2lanef:
+;CHECK: vst2.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2laneQi16:
+;CHECK: vst2.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2laneQi32:
+;CHECK: vst2.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	ret void
+}
+
+define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2laneQf:
+;CHECK: vst2.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3lanei8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3lanei16:
+;CHECK: vst3.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3lanei32:
+;CHECK: vst3.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3lanef:
+;CHECK: vst3.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3laneQi16:
+;CHECK: vst3.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
+	ret void
+}
+
+define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32:
+;CHECK: vst3.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
+	ret void
+}
+
+define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3laneQf:
+;CHECK: vst3.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8:
+;CHECK: vst4.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4lanei16:
+;CHECK: vst4.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4lanei32:
+;CHECK: vst4.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4lanef:
+;CHECK: vst4.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4laneQi16:
+;CHECK: vst4.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
+	ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4laneQi32:
+;CHECK: vst4.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	ret void
+}
+
+define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4laneQf:
+;CHECK: vst4.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 85dea41835f8..8f0055fd4103 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsub\\.i8} %t | count 2
-; RUN: grep {vsub\\.i16} %t | count 2
-; RUN: grep {vsub\\.i32} %t | count 2
-; RUN: grep {vsub\\.i64} %t | count 2
-; RUN: grep {vsub\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubi8:
+;CHECK: vsub.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubi16:
+;CHECK: vsub.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubi32:
+;CHECK: vsub.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsubi64:
+;CHECK: vsub.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vsubf32:
+;CHECK: vsub.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = sub <2 x float> %tmp1, %tmp2
@@ -41,6 +46,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsubQi8:
+;CHECK: vsub.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubQi16:
+;CHECK: vsub.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubQi32:
+;CHECK: vsub.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubQi64:
+;CHECK: vsub.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
@@ -69,8 +82,196 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vsubQf32:
+;CHECK: vsub.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = sub <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
new file mode 100644
index 000000000000..926498739e8a
--- /dev/null
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+
+define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtbl1:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
+;CHECK: vtbl2:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
+;CHECK: vtbl3:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
+;CHECK: vtbl4:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx1:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx2:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = load <8 x i8>* %C
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx3:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = load <8 x i8>* %C
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx4:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = load <8 x i8>* %C
+	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+	ret <8 x i8> %tmp8
+}
+
+declare <8 x i8>  @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
new file mode 100644
index 000000000000..5122b0981e96
--- /dev/null
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtrni16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtrni32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vtrnf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtrnQi8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtrnQi32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vtrnQf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
new file mode 100644
index 000000000000..e531718d94aa
--- /dev/null
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vuzpi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vuzpQi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vuzpQi32:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vuzpQf:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
new file mode 100644
index 000000000000..32f7e0d02c44
--- /dev/null
+++ b/test/CodeGen/ARM/vzip.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vzipi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vzipQi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vzipQi32:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vzipQf:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll
index dadd1b976798..5ac4b8c061d8 100644
--- a/test/CodeGen/ARM/weak.ll
+++ b/test/CodeGen/ARM/weak.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep .weak.*f
-; RUN: llvm-as < %s | llc -march=arm | grep .weak.*h
+; RUN: llc < %s -march=arm | grep .weak.*f
+; RUN: llc < %s -march=arm | grep .weak.*h
 
 define weak i32 @f() {
 entry:
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
index a57a76707ce6..cf327bbf5c87 100644
--- a/test/CodeGen/ARM/weak2.ll
+++ b/test/CodeGen/ARM/weak2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep .weak
+; RUN: llc < %s -march=arm | grep .weak
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
index c96b14ac97e5..87d992836bc3 100644
--- a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
+++ b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
@@ -1,5 +1,5 @@
 ; There should be exactly two calls here (memset and malloc), no more.
-; RUN: llvm-as < %s | llc -march=alpha | grep jsr | count 2
+; RUN: llc < %s -march=alpha | grep jsr | count 2
 
 %typedef.bc_struct = type opaque
 declare void @llvm.memset.i64(i8*, i8, i64, i32)
diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
index b45c2a44388e..4b3d022c1d8d 100644
--- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
+++ b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
@@ -1,5 +1,5 @@
 ; This shouldn't crash
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 @.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
 
diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
index f89997e0bf6b..65d2a8d02ac8 100644
--- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
+++ b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
@@ -1,5 +1,5 @@
 ; The global symbol should be legalized
-; RUN: llvm-as < %s | llc -march=alpha 
+; RUN: llc < %s -march=alpha 
 
 target datalayout = "e-p:64:64"
         %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
index 05ebe1eb888b..45587f08fd6c 100644
--- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
+++ b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
@@ -1,5 +1,5 @@
 ; This shouldn't crash
-; RUN: llvm-as < %s | llc -march=alpha 
+; RUN: llc < %s -march=alpha 
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll
index f3ff5b1750fe..2b28903c5014 100644
--- a/test/CodeGen/Alpha/2006-04-04-zextload.ll
+++ b/test/CodeGen/Alpha/2006-04-04-zextload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
index 6b5504757913..5d31bc3798dc 100644
--- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
+++ b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll
index 3f42eda4beb5..14e0bccc8482 100644
--- a/test/CodeGen/Alpha/2006-11-01-vastart.ll
+++ b/test/CodeGen/Alpha/2006-11-01-vastart.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
index 3eac13d2b7ac..b537e250ad86 100644
--- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
+++ b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 ;FIXME: this should produce no mul inst.  But not crashing will have to do for now
 
diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
index 9d814da982d4..1a4b40e2da2c 100644
--- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
+++ b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll
index e6e57464cb21..8b9b603fe6fe 100644
--- a/test/CodeGen/Alpha/2008-11-12-Add128.ll
+++ b/test/CodeGen/Alpha/2008-11-12-Add128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3044
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
new file mode 100644
index 000000000000..cfbf7fcdfd90
--- /dev/null
+++ b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=alpha
+
+define i1 @a(float %x) {
+  %r = fcmp ult float %x, 1.0
+  ret i1 %r
+}
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
index 260584b79048..24a74188f8c0 100644
--- a/test/CodeGen/Alpha/add.ll
+++ b/test/CodeGen/Alpha/add.ll
@@ -1,6 +1,6 @@
 ;test all the shifted and signextending adds and subs with and without consts
 ;
-; RUN: llvm-as < %s | llc -march=alpha -o %t.s -f
+; RUN: llc < %s -march=alpha -o %t.s
 ; RUN: grep {	addl} %t.s | count 2
 ; RUN: grep {	addq} %t.s | count 2
 ; RUN: grep {	subl} %t.s | count 2
diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll
index 61d020890e89..fa3b949fc7b8 100644
--- a/test/CodeGen/Alpha/add128.ll
+++ b/test/CodeGen/Alpha/add128.ll
@@ -1,6 +1,6 @@
 ;test for ADDC and ADDE expansion
 ;
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @add128(i128 %x, i128 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll
index 6e635119e569..9f0035097b0e 100644
--- a/test/CodeGen/Alpha/bic.ll
+++ b/test/CodeGen/Alpha/bic.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the bic instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep {bic}
+; RUN: llc < %s -march=alpha | grep {bic}
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll
index d4618577a044..14f6b46c5490 100644
--- a/test/CodeGen/Alpha/bsr.ll
+++ b/test/CodeGen/Alpha/bsr.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens the bsr instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep bsr
+; RUN: llc < %s -march=alpha | grep bsr
 
 define internal i64 @abc(i32 %x) {
         %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll
index ee8cda840e0a..24e97a92b86b 100644
--- a/test/CodeGen/Alpha/call_adj.ll
+++ b/test/CodeGen/Alpha/call_adj.ll
@@ -1,5 +1,5 @@
 ;All this should do is not crash
-;RUN: llvm-as < %s | llc -march=alpha
+;RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll
index 08e1dad2c0e7..9b655f03efdc 100644
--- a/test/CodeGen/Alpha/cmov.ll
+++ b/test/CodeGen/Alpha/cmov.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=alpha | not grep cmovlt
-; RUN: llvm-as < %s | llc -march=alpha | grep cmoveq
+; RUN: llc < %s -march=alpha | not grep cmovlt
+; RUN: llc < %s -march=alpha | grep cmoveq
 
 define i64 @cmov_lt(i64 %a, i64 %c) {
 entry:
diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll
index 9b83215181c9..e88d2eec75e1 100644
--- a/test/CodeGen/Alpha/cmpbge.ll
+++ b/test/CodeGen/Alpha/cmpbge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep cmpbge | count 2
+; RUN: llc < %s -march=alpha | grep cmpbge | count 2
 
 define i1 @test1(i64 %A, i64 %B) {
         %C = and i64 %A, 255            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll
index 83d97b5833c4..aa1588aa39e8 100644
--- a/test/CodeGen/Alpha/ctlz.ll
+++ b/test/CodeGen/Alpha/ctlz.ll
@@ -1,8 +1,8 @@
 ; Make sure this testcase codegens to the ctlz instruction
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | not grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
 
 declare i8 @llvm.ctlz.i8(i8)
 
diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll
index 56027dd3ea7b..230e096b08d2 100644
--- a/test/CodeGen/Alpha/ctlz_e.ll
+++ b/test/CodeGen/Alpha/ctlz_e.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llvm-as < %s | llc -march=alpha | not grep -i ctpop 
+; RUN: llc < %s -march=alpha | not grep -i ctpop 
 
 declare i64 @llvm.ctlz.i64(i64)
 
diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll
index a528d728be06..f887882cec2f 100644
--- a/test/CodeGen/Alpha/ctpop.ll
+++ b/test/CodeGen/Alpha/ctpop.ll
@@ -1,10 +1,10 @@
 ; Make sure this testcase codegens to the ctpop instruction
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | \
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
+; RUN: llc < %s -march=alpha -mattr=+CIX | \
 ; RUN:   grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | \
+; RUN: llc < %s -march=alpha -mcpu=ev6 | \
 ; RUN:   not grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | \
+; RUN: llc < %s -march=alpha -mattr=-CIX | \
 ; RUN:   not grep -i ctpop
 
 declare i64 @llvm.ctpop.i64(i64)
diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll
index 2539d7247448..b3413d6b5dce 100644
--- a/test/CodeGen/Alpha/eqv.ll
+++ b/test/CodeGen/Alpha/eqv.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the eqv instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep eqv
+; RUN: llc < %s -march=alpha | grep eqv
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
index 7af813454072..ffeafbd75938 100644
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ b/test/CodeGen/Alpha/i32_sub_1.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ctpop instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep -i {subl \$16,1,\$0}
+; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
 
 
 define i32 @foo(i32 signext %x) signext {
diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll
index c95d57153db2..4cf80dee57b7 100644
--- a/test/CodeGen/Alpha/illegal-element-type.ll
+++ b/test/CodeGen/Alpha/illegal-element-type.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=alphaev6-unknown-linux-gnu
+; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
 
 define void @foo() {
 entry:
diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll
index d7b61163e7b8..917c9327dc16 100644
--- a/test/CodeGen/Alpha/jmp_table.ll
+++ b/test/CodeGen/Alpha/jmp_table.ll
@@ -1,9 +1,9 @@
 ; try to check that we have the most important instructions, which shouldn't 
 ; appear otherwise
-; RUN: llvm-as < %s | llc -march=alpha | grep jmp
-; RUN: llvm-as < %s | llc -march=alpha | grep gprel32
-; RUN: llvm-as < %s | llc -march=alpha | grep ldl
-; RUN: llvm-as < %s | llc -march=alpha | grep rodata
+; RUN: llc < %s -march=alpha | grep jmp
+; RUN: llc < %s -march=alpha | grep gprel32
+; RUN: llc < %s -march=alpha | grep ldl
+; RUN: llc < %s -march=alpha | grep rodata
 ; END.
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
index 50c245ff3d9e..93e8b1b04465 100644
--- a/test/CodeGen/Alpha/mb.ll
+++ b/test/CodeGen/Alpha/mb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep mb
+; RUN: llc < %s -march=alpha | grep mb
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll
index b069fea4a5ca..daf8409409dd 100644
--- a/test/CodeGen/Alpha/mul128.ll
+++ b/test/CodeGen/Alpha/mul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
 entry:
diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll
index 5af73a1cc774..4075dd6289eb 100644
--- a/test/CodeGen/Alpha/mul5.ll
+++ b/test/CodeGen/Alpha/mul5.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use mulq
-; RUN: llvm-as < %s | llc -march=alpha | not grep -i mul
+; RUN: llc < %s -march=alpha | not grep -i mul
 
 define i64 @foo1(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll
index ddaed4a0c6e2..0db767f68e51 100644
--- a/test/CodeGen/Alpha/neg1.ll
+++ b/test/CodeGen/Alpha/neg1.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the lda -1 instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep {\\-1}
+; RUN: llc < %s -march=alpha | grep {\\-1}
 
 define i64 @bar() {
 entry:
diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll
index cea9f6bc95f5..4f0a5c2946ef 100644
--- a/test/CodeGen/Alpha/not.ll
+++ b/test/CodeGen/Alpha/not.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ornot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep eqv
+; RUN: llc < %s -march=alpha | grep eqv
 
 define i64 @bar(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll
index b8d350dc100e..f930e345ce42 100644
--- a/test/CodeGen/Alpha/ornot.ll
+++ b/test/CodeGen/Alpha/ornot.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ornot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep ornot
+; RUN: llc < %s -march=alpha | grep ornot
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
index 2d9ed1e413db..96ab4eb400ea 100644
--- a/test/CodeGen/Alpha/private.ll
+++ b/test/CodeGen/Alpha/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=alpha > %t
+; RUN: llc < %s -march=alpha > %t
 ; RUN: grep \\\$foo: %t
 ; RUN: grep bsr.*\\\$\\\$foo %t
 ; RUN: grep \\\$baz: %t
diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll
index 193a47f7ce3f..d6665b5d8d6f 100644
--- a/test/CodeGen/Alpha/rpcc.ll
+++ b/test/CodeGen/Alpha/rpcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep rpcc
+; RUN: llc < %s -march=alpha | grep rpcc
 
 declare i64 @llvm.readcyclecounter()
 
diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll
index 2344833dc5b3..3042ef3d0237 100644
--- a/test/CodeGen/Alpha/srl_and.ll
+++ b/test/CodeGen/Alpha/srl_and.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the zapnot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @foo(i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll
index cb18559e532c..d26404bfe024 100644
--- a/test/CodeGen/Alpha/sub128.ll
+++ b/test/CodeGen/Alpha/sub128.ll
@@ -1,6 +1,6 @@
 ;test for SUBC and SUBE expansion
 ;
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @sub128(i128 %x, i128 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll
index e00e6d7bfe26..ff04de9ef467 100644
--- a/test/CodeGen/Alpha/weak.ll
+++ b/test/CodeGen/Alpha/weak.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*f
-; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*h
+; RUN: llc < %s -march=alpha | grep .weak.*f
+; RUN: llc < %s -march=alpha | grep .weak.*h
 
 define weak i32 @f() {
 entry:
diff --git a/test/CodeGen/Alpha/wmb.ll b/test/CodeGen/Alpha/wmb.ll
index f745cd52ba3d..a3e2ccf57256 100644
--- a/test/CodeGen/Alpha/wmb.ll
+++ b/test/CodeGen/Alpha/wmb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep wmb
+; RUN: llc < %s -march=alpha | grep wmb
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
index 7fec19bdf3f5..d00984acf7f3 100644
--- a/test/CodeGen/Alpha/zapnot.ll
+++ b/test/CodeGen/Alpha/zapnot.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the bic instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 
 define i16 @foo(i64 %y) zeroext {
diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll
index 6a33ca2ab21f..cd3caae41d5a 100644
--- a/test/CodeGen/Alpha/zapnot2.ll
+++ b/test/CodeGen/Alpha/zapnot2.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the zapnot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @bar(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll
index 26aab37d7bb9..f02961f1eaec 100644
--- a/test/CodeGen/Alpha/zapnot3.ll
+++ b/test/CodeGen/Alpha/zapnot3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 ;demanded bits mess up this mask in a hard to fix way
 ;define i64 @foo(i64 %y) {
diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll
index 1be3ca2e3c72..89beeef2d810 100644
--- a/test/CodeGen/Alpha/zapnot4.ll
+++ b/test/CodeGen/Alpha/zapnot4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @foo(i64 %y) {
         %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
new file mode 100644
index 000000000000..3ee5e8df9972
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
+
+; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
+; super-register is illegally extended.
+
+define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
+  %y1 = add i16 %x1, 1
+  %y2 = add i16 %x2, 2
+  %y3 = add i16 %x3, 3
+  %y4 = add i16 %x4, 4
+  %z12 = add i16 %y1, %y2
+  %z34 = add i16 %y3, %y4
+  %p = add i16 %z12, %z34
+  ret i16 %p
+}
diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
new file mode 100644
index 000000000000..e5d1637a50cb
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
+	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i64 %d, i64* %DP
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
new file mode 100644
index 000000000000..0b731dccd19f
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; When joining live intervals of sub-registers, an MBB live-in list is not
+; updated properly. The register scavenger asserts on an undefined register.
+
+define i32 @foo(i8 %bar) {
+entry:
+  switch i8 %bar, label %bb1203 [
+    i8 117, label %bb1204
+    i8 85, label %bb1204
+    i8 106, label %bb1204
+  ]
+
+bb1203:                                           ; preds = %entry
+  ret i32 1
+
+bb1204:                                           ; preds = %entry, %entry, %entry
+  ret i32 2
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
new file mode 100644
index 000000000000..dcc3ea0dec88
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
+; argument registers. This then causes register scavenger asserts.
+
+declare i32 @printf(i8*, i32, float)
+
+define i32 @testissue(i32 %i, float %x, float %y) {
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %0
+  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
+  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
+  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
+  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
+  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
+  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
+  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
+  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
+  br i1 %b, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
new file mode 100644
index 000000000000..f21da52315fa
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+; XFAIL: *
+
+; An undef argument causes a setugt node to escape instruction selection.
+
+define void @bugt() {
+cond_next305:
+  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
+  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
+  br i1 %tmp308, label %bb311, label %bb314
+
+bb311:                                            ; preds = %cond_next305
+  unreachable
+
+bb314:                                            ; preds = %cond_next305
+  ret void
+}
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
new file mode 100644
index 000000000000..e982e437d687
--- /dev/null
+++ b/test/CodeGen/Blackfin/add-overflow.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	type { i24, i1 }		; type %0
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
+	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
+	br i1 %obit, label %carry, label %normal
+
+normal:		; preds = %entry
+	ret i1 true
+
+carry:		; preds = %entry
+	ret i1 false
+}
+
+declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll
new file mode 100644
index 000000000000..3311c03199ee
--- /dev/null
+++ b/test/CodeGen/Blackfin/add.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll
new file mode 100644
index 000000000000..dd5610120b4d
--- /dev/null
+++ b/test/CodeGen/Blackfin/addsub-i128.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; These functions have just the right size to annoy the register scavenger: They
+; use all the scratch registers, but not all the callee-saved registers.
+
+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
+
+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll
new file mode 100644
index 000000000000..c63adaba06cf
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i1.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @add(i1 %A, i1 %B) {
+	%R = add i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sub(i1 %A, i1 %B) {
+	%R = sub i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @mul(i1 %A, i1 %B) {
+	%R = mul i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sdiv(i1 %A, i1 %B) {
+	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @udiv(i1 %A, i1 %B) {
+	%R = udiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @srem(i1 %A, i1 %B) {
+	%R = srem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @urem(i1 %A, i1 %B) {
+	%R = urem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @and(i1 %A, i1 %B) {
+	%R = and i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @or(i1 %A, i1 %B) {
+	%R = or i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @xor(i1 %A, i1 %B) {
+	%R = xor i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll
new file mode 100644
index 000000000000..541e9a8dc948
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i16.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @add(i16 %A, i16 %B) {
+	%R = add i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sub(i16 %A, i16 %B) {
+	%R = sub i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @mul(i16 %A, i16 %B) {
+	%R = mul i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sdiv(i16 %A, i16 %B) {
+	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @udiv(i16 %A, i16 %B) {
+	%R = udiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @srem(i16 %A, i16 %B) {
+	%R = srem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @urem(i16 %A, i16 %B) {
+	%R = urem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll
new file mode 100644
index 000000000000..4b5dbfcb957e
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i32.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sub(i32 %A, i32 %B) {
+	%R = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @mul(i32 %A, i32 %B) {
+	%R = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @udiv(i32 %A, i32 %B) {
+	%R = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @srem(i32 %A, i32 %B) {
+	%R = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @urem(i32 %A, i32 %B) {
+	%R = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @and(i32 %A, i32 %B) {
+	%R = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @or(i32 %A, i32 %B) {
+	%R = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @xor(i32 %A, i32 %B) {
+	%R = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll
new file mode 100644
index 000000000000..d4dd8e2703bf
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i64.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i64 @add(i64 %A, i64 %B) {
+	%R = add i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sub(i64 %A, i64 %B) {
+	%R = sub i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @mul(i64 %A, i64 %B) {
+	%R = mul i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sdiv(i64 %A, i64 %B) {
+	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @udiv(i64 %A, i64 %B) {
+	%R = udiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @srem(i64 %A, i64 %B) {
+	%R = srem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @urem(i64 %A, i64 %B) {
+	%R = urem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @and(i64 %A, i64 %B) {
+	%R = and i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @or(i64 %A, i64 %B) {
+	%R = or i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @xor(i64 %A, i64 %B) {
+	%R = xor i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll
new file mode 100644
index 000000000000..2c7ce9d1015a
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i8.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin
+
+define i8 @add(i8 %A, i8 %B) {
+	%R = add i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sub(i8 %A, i8 %B) {
+	%R = sub i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @mul(i8 %A, i8 %B) {
+	%R = mul i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sdiv(i8 %A, i8 %B) {
+	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @udiv(i8 %A, i8 %B) {
+	%R = udiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @srem(i8 %A, i8 %B) {
+	%R = srem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @urem(i8 %A, i8 %B) {
+	%R = urem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @and(i8 %A, i8 %B) {
+	%R = and i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @or(i8 %A, i8 %B) {
+	%R = or i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @xor(i8 %A, i8 %B) {
+	%R = xor i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll
new file mode 100644
index 000000000000..85040df0fde5
--- /dev/null
+++ b/test/CodeGen/Blackfin/basictest.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define void @void(i32, i32) {
+        add i32 0, 0            ; <i32>:3 [#uses=2]
+        sub i32 0, 4            ; <i32>:4 [#uses=2]
+        br label %5
+
+; <label>:5             ; preds = %5, %2
+        add i32 %0, %1          ; <i32>:6 [#uses=2]
+        sub i32 %6, %4          ; <i32>:7 [#uses=1]
+        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %5
+
+; <label>:9             ; preds = %5
+        add i32 %0, %1          ; <i32>:10 [#uses=0]
+        sub i32 %6, %4          ; <i32>:11 [#uses=1]
+        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
+        ret void
+}
diff --git a/test/CodeGen/Blackfin/burg.ll b/test/CodeGen/Blackfin/burg.ll
new file mode 100644
index 000000000000..8cc3713b7e73
--- /dev/null
+++ b/test/CodeGen/Blackfin/burg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%IntList = type %struct.intlist*
+	%ReadFn = type i32 ()*
+	%YYSTYPE = type { %IntList }
+	%struct.intlist = type { i32, %IntList }
+@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
+
+define i32 @yyparse() {
+bb0:
+	%reg254 = load i16* null		; <i16> [#uses=1]
+	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
+	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
+	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
+	%reg261.idx1 = getelementptr %YYSTYPE* null, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
+	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
+	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	unreachable
+}
diff --git a/test/CodeGen/Blackfin/cmp-small-imm.ll b/test/CodeGen/Blackfin/cmp-small-imm.ll
new file mode 100644
index 000000000000..e1732a8f806b
--- /dev/null
+++ b/test/CodeGen/Blackfin/cmp-small-imm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @cmp3(i32 %A) {
+	%R = icmp uge i32 %A, 2
+	ret i1 %R
+}
diff --git a/test/CodeGen/Blackfin/cmp64.ll b/test/CodeGen/Blackfin/cmp64.ll
new file mode 100644
index 000000000000..ef5bf45861dd
--- /dev/null
+++ b/test/CodeGen/Blackfin/cmp64.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin
+
+; This test tries to use a JustCC register as a data operand for MOVEcc.  It
+; calls copyRegToReg(JustCC -> DP), failing because JustCC can only be copied to
+; D.  The proper solution would be to restrict the virtual register to D only.
+
+define i32 @main() {
+entry:
+	br label %loopentry
+
+loopentry:
+	%done = icmp sle i64 undef, 5
+	br i1 %done, label %loopentry, label %exit.1
+
+exit.1:
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
new file mode 100644
index 000000000000..e9b66ebe5772
--- /dev/null
+++ b/test/CodeGen/Blackfin/ct32.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctlztest(i32 %B) {
+	%b = call i32 @llvm.ctlz.i32( i32 %B )
+	ret i32 %b;
+}
+
+define i32 @cttztest(i32 %B) {
+	%b = call i32 @llvm.cttz.i32( i32 %B )
+	ret i32 %b;
+}
+
+define i32 @ctpoptest(i32 %B) {
+	%b = call i32 @llvm.ctpop.i32( i32 %B )
+	ret i32 %b;
+}
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
new file mode 100644
index 000000000000..ac4bdcffbe95
--- /dev/null
+++ b/test/CodeGen/Blackfin/ct64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctlztest(i64 %B) {
+	%b = call i64 @llvm.ctlz.i64( i64 %B )
+	ret i64 %b;
+}
+
+define i64 @cttztest(i64 %B) {
+	%b = call i64 @llvm.cttz.i64( i64 %B )
+	ret i64 %b;
+}
+
+define i64 @ctpoptest(i64 %B) {
+	%b = call i64 @llvm.ctpop.i64( i64 %B )
+	ret i64 %b;
+}
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
new file mode 100644
index 000000000000..56a65c05853e
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctlz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctlz.i16(i16)
+
+define i16 @ctlztest(i16 %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @ctlztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @ctlztest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/ctlz64.ll b/test/CodeGen/Blackfin/ctlz64.ll
new file mode 100644
index 000000000000..3e22f8843553
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctlz64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define i32 @main(i64 %arg) nounwind {
+entry:
+	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias, ...) nounwind
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
new file mode 100644
index 000000000000..cbbb3d9831a8
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctpop16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctpop.i16(i16)
+
+define i16 @ctpoptest(i16 %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @ctpoptest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @ctpoptest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
new file mode 100644
index 000000000000..05fe9bfd4469
--- /dev/null
+++ b/test/CodeGen/Blackfin/cttz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.cttz.i16(i16)
+
+define i16 @cttztest(i16 %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @cttztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @cttztest_s(i16 signext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/cycles.ll b/test/CodeGen/Blackfin/cycles.ll
new file mode 100644
index 000000000000..6451c747bd70
--- /dev/null
+++ b/test/CodeGen/Blackfin/cycles.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+declare i64 @llvm.readcyclecounter()
+
+; CHECK: cycles
+; CHECK: cycles2
+define i64 @cyc64() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+	ret i64 %tmp.1
+}
+
+; CHECK: cycles
+define i32@cyc32() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+        %s = trunc i64 %tmp.1 to i32
+	ret i32 %s
+}
diff --git a/test/CodeGen/Blackfin/dg.exp b/test/CodeGen/Blackfin/dg.exp
new file mode 100644
index 000000000000..5fdbe5feb087
--- /dev/null
+++ b/test/CodeGen/Blackfin/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Blackfin] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Blackfin/double-cast.ll b/test/CodeGen/Blackfin/double-cast.ll
new file mode 100644
index 000000000000..815ca797d752
--- /dev/null
+++ b/test/CodeGen/Blackfin/double-cast.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/frameindex.ll b/test/CodeGen/Blackfin/frameindex.ll
new file mode 100644
index 000000000000..7e677fbf18cf
--- /dev/null
+++ b/test/CodeGen/Blackfin/frameindex.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
+
+define void @foo() {
+bb0:
+	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
+	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i17mem.ll b/test/CodeGen/Blackfin/i17mem.ll
new file mode 100644
index 000000000000..bc5ade7416fa
--- /dev/null
+++ b/test/CodeGen/Blackfin/i17mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i1mem.ll b/test/CodeGen/Blackfin/i1mem.ll
new file mode 100644
index 000000000000..cb03e3d7fcb0
--- /dev/null
+++ b/test/CodeGen/Blackfin/i1mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i1ops.ll b/test/CodeGen/Blackfin/i1ops.ll
new file mode 100644
index 000000000000..6b5612cc4997
--- /dev/null
+++ b/test/CodeGen/Blackfin/i1ops.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @adj(i32 %d.1, i32 %ct.1) {
+entry:
+	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
+	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
+	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
+	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
+	ret i32 %tmp.27
+}
diff --git a/test/CodeGen/Blackfin/i216mem.ll b/test/CodeGen/Blackfin/i216mem.ll
new file mode 100644
index 000000000000..9f8cf48e8756
--- /dev/null
+++ b/test/CodeGen/Blackfin/i216mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i248mem.ll b/test/CodeGen/Blackfin/i248mem.ll
new file mode 100644
index 000000000000..db23f541adcb
--- /dev/null
+++ b/test/CodeGen/Blackfin/i248mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i256mem.ll b/test/CodeGen/Blackfin/i256mem.ll
new file mode 100644
index 000000000000..bc5ade7416fa
--- /dev/null
+++ b/test/CodeGen/Blackfin/i256mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i256param.ll b/test/CodeGen/Blackfin/i256param.ll
new file mode 100644
index 000000000000..df74c9a6e0e8
--- /dev/null
+++ b/test/CodeGen/Blackfin/i256param.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i56param.ll b/test/CodeGen/Blackfin/i56param.ll
new file mode 100644
index 000000000000..ca0256391b1f
--- /dev/null
+++ b/test/CodeGen/Blackfin/i56param.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i8mem.ll b/test/CodeGen/Blackfin/i8mem.ll
new file mode 100644
index 000000000000..ea3a67e4994c
--- /dev/null
+++ b/test/CodeGen/Blackfin/i8mem.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin
+
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/inline-asm.ll b/test/CodeGen/Blackfin/inline-asm.ll
new file mode 100644
index 000000000000..d623f6bd95aa
--- /dev/null
+++ b/test/CodeGen/Blackfin/inline-asm.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+; Standard "r"
+; CHECK: r0 = r0 + r1;
+define i32 @add_r(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "d"
+; CHECK: r0 = r0 - r1;
+define i32 @add_d(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "a" for P-regs
+; CHECK: p0 = (p0 + p1) << 1;
+define i32 @add_a(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "z" for P0, P1, P2. This is not a real regclass
+; CHECK: p0 = (p0 + p1) << 2;
+define i32 @add_Z(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "C" for CC. This is a single register
+; CHECK: cc = p0 < p1;
+; CHECK: r0 = cc;
+define i32 @add_C(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
diff --git a/test/CodeGen/Blackfin/int-setcc.ll b/test/CodeGen/Blackfin/int-setcc.ll
new file mode 100644
index 000000000000..6bd9f86a999c
--- /dev/null
+++ b/test/CodeGen/Blackfin/int-setcc.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define fastcc void @Evaluate() {
+entry:
+	br i1 false, label %cond_false186, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false186:		; preds = %entry
+	br i1 false, label %cond_true293, label %bb203
+
+bb203:		; preds = %cond_false186
+	ret void
+
+cond_true293:		; preds = %cond_false186
+	br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:		; preds = %cond_true293
+	br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:		; preds = %cond_true293
+	ret void
+
+cond_true397.preheader:		; preds = %cond_true298
+	ret void
+
+cond_next518:		; preds = %cond_true298
+	br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:		; preds = %cond_next518
+	ret void
+
+bb1069:		; preds = %cond_next518
+	br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:		; preds = %bb1069
+	ret void
+
+cond_next1131:		; preds = %bb1069
+	br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:		; preds = %cond_next1131
+	ret void
+
+cond_next1207:		; preds = %cond_next1131
+	br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:		; preds = %cond_next1207
+	ret void
+
+cond_next1219:		; preds = %cond_next1207
+	br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:		; preds = %cond_next1219
+	br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:		; preds = %cond_true1223
+	br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:		; preds = %cond_true1223
+	ret void
+
+cond_next1283:		; preds = %cond_next1219
+	ret void
+
+cond_true1369.preheader:		; preds = %cond_true1254
+	ret void
+
+bb1567:		; preds = %cond_true1254
+	%tmp1605 = load i8* null		; <i8> [#uses=1]
+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
+
+cond_true1607:		; preds = %bb1567
+	ret void
+
+cond_next1637:		; preds = %bb1567
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/invalid-apint.ll b/test/CodeGen/Blackfin/invalid-apint.ll
new file mode 100644
index 000000000000..a8c01ba65f88
--- /dev/null
+++ b/test/CodeGen/Blackfin/invalid-apint.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin
+
+; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
+; function trunc, file APInt.cpp, line 956.
+
+@str2 = external global [29 x i8]
+
+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
+entry:
+	%tmp17 = sext i8 %a13 to i32
+	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
new file mode 100644
index 000000000000..5f49e9d193e4
--- /dev/null
+++ b/test/CodeGen/Blackfin/jumptable.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+; CHECK: .section .rodata
+; CHECK: JTI1_0:
+; CHECK: .long .BB1_1
+
+define i32 @oper(i32 %op, i32 %A, i32 %B) {
+entry:
+        switch i32 %op, label %bbx [
+               i32 1 , label %bb1
+               i32 2 , label %bb2
+               i32 3 , label %bb3
+               i32 4 , label %bb4
+               i32 5 , label %bb5
+               i32 6 , label %bb6
+               i32 7 , label %bb7
+               i32 8 , label %bb8
+               i32 9 , label %bb9
+               i32 10, label %bb10
+        ]
+bb1:
+	%R1 = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R1
+bb2:
+	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R2
+bb3:
+	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R3
+bb4:
+	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R4
+bb5:
+	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R5
+bb6:
+	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R6
+bb7:
+	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R7
+bb8:
+	%R8 = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R8
+bb9:
+	%R9 = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R9
+bb10:
+	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R10
+bbx:
+        ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/large-switch.ll b/test/CodeGen/Blackfin/large-switch.ll
new file mode 100644
index 000000000000..02d32ef85f12
--- /dev/null
+++ b/test/CodeGen/Blackfin/large-switch.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -march=bfin
+
+; The switch expansion uses a dynamic shl, and it produces a jumptable
+
+define void @athlon_fp_unit_ready_cost() {
+entry:
+	switch i32 0, label %UnifiedReturnBlock [
+		i32 -1, label %bb2063
+		i32 19, label %bb2035
+		i32 20, label %bb2035
+		i32 21, label %bb2035
+		i32 23, label %bb2035
+		i32 24, label %bb2035
+		i32 27, label %bb2035
+		i32 32, label %bb2035
+		i32 33, label %bb1994
+		i32 35, label %bb2035
+		i32 36, label %bb1994
+		i32 90, label %bb1948
+		i32 94, label %bb1948
+		i32 95, label %bb1948
+		i32 133, label %bb1419
+		i32 135, label %bb1238
+		i32 136, label %bb1238
+		i32 137, label %bb1238
+		i32 138, label %bb1238
+		i32 139, label %bb1201
+		i32 140, label %bb1201
+		i32 141, label %bb1154
+		i32 142, label %bb1126
+		i32 144, label %bb1201
+		i32 145, label %bb1126
+		i32 146, label %bb1201
+		i32 147, label %bb1126
+		i32 148, label %bb1201
+		i32 149, label %bb1126
+		i32 150, label %bb1201
+		i32 151, label %bb1126
+		i32 152, label %bb1096
+		i32 153, label %bb1096
+		i32 154, label %bb1096
+		i32 157, label %bb1096
+		i32 158, label %bb1096
+		i32 159, label %bb1096
+		i32 162, label %bb1096
+		i32 163, label %bb1096
+		i32 164, label %bb1096
+		i32 167, label %bb1201
+		i32 168, label %bb1201
+		i32 170, label %bb1201
+		i32 171, label %bb1201
+		i32 173, label %bb1201
+		i32 174, label %bb1201
+		i32 176, label %bb1201
+		i32 177, label %bb1201
+		i32 179, label %bb993
+		i32 180, label %bb993
+		i32 181, label %bb993
+		i32 182, label %bb993
+		i32 183, label %bb993
+		i32 184, label %bb993
+		i32 365, label %bb1126
+		i32 366, label %bb1126
+		i32 367, label %bb1126
+		i32 368, label %bb1126
+		i32 369, label %bb1126
+		i32 370, label %bb1126
+		i32 371, label %bb1126
+		i32 372, label %bb1126
+		i32 373, label %bb1126
+		i32 384, label %bb1126
+		i32 385, label %bb1126
+		i32 386, label %bb1126
+		i32 387, label %bb1126
+		i32 388, label %bb1126
+		i32 389, label %bb1126
+		i32 390, label %bb1126
+		i32 391, label %bb1126
+		i32 392, label %bb1126
+		i32 525, label %bb919
+		i32 526, label %bb839
+		i32 528, label %bb919
+		i32 529, label %bb839
+		i32 532, label %cond_next6.i97
+		i32 533, label %cond_next6.i81
+		i32 534, label %bb495
+		i32 536, label %cond_next6.i81
+		i32 537, label %cond_next6.i81
+		i32 538, label %bb396
+		i32 539, label %bb288
+		i32 541, label %bb396
+		i32 542, label %bb396
+		i32 543, label %bb396
+		i32 544, label %bb396
+		i32 545, label %bb189
+		i32 546, label %cond_next6.i
+		i32 547, label %bb189
+		i32 548, label %cond_next6.i
+		i32 549, label %bb189
+		i32 550, label %cond_next6.i
+		i32 551, label %bb189
+		i32 552, label %cond_next6.i
+		i32 553, label %bb189
+		i32 554, label %cond_next6.i
+		i32 555, label %bb189
+		i32 556, label %cond_next6.i
+		i32 557, label %bb189
+		i32 558, label %cond_next6.i
+		i32 618, label %bb40
+		i32 619, label %bb18
+		i32 620, label %bb40
+		i32 621, label %bb10
+		i32 622, label %bb10
+	]
+
+bb10:
+	ret void
+
+bb18:
+	ret void
+
+bb40:
+	ret void
+
+cond_next6.i:
+	ret void
+
+bb189:
+	ret void
+
+bb288:
+	ret void
+
+bb396:
+	ret void
+
+bb495:
+	ret void
+
+cond_next6.i81:
+	ret void
+
+cond_next6.i97:
+	ret void
+
+bb839:
+	ret void
+
+bb919:
+	ret void
+
+bb993:
+	ret void
+
+bb1096:
+	ret void
+
+bb1126:
+	ret void
+
+bb1154:
+	ret void
+
+bb1201:
+	ret void
+
+bb1238:
+	ret void
+
+bb1419:
+	ret void
+
+bb1948:
+	ret void
+
+bb1994:
+	ret void
+
+bb2035:
+	ret void
+
+bb2063:
+	ret void
+
+UnifiedReturnBlock:
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/load-i16.ll b/test/CodeGen/Blackfin/load-i16.ll
new file mode 100644
index 000000000000..eb18d410d088
--- /dev/null
+++ b/test/CodeGen/Blackfin/load-i16.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; This somewhat contrived function heavily exercises register classes
+; It can trick -join-cross-class-copies into making illegal joins
+
+define void @f(i16** nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load i16** %p		; <i16*> [#uses=1]
+	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
+	%ptr = getelementptr i16* %tmp1, i16 %tmp2
+    store i16 %tmp2, i16* %ptr
+    ret void
+}
diff --git a/test/CodeGen/Blackfin/logic-i16.ll b/test/CodeGen/Blackfin/logic-i16.ll
new file mode 100644
index 000000000000..e44672ff4200
--- /dev/null
+++ b/test/CodeGen/Blackfin/logic-i16.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @and(i16 %A, i16 %B) {
+	%R = and i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @or(i16 %A, i16 %B) {
+	%R = or i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @xor(i16 %A, i16 %B) {
+	%R = xor i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
new file mode 100644
index 000000000000..8c52874e773b
--- /dev/null
+++ b/test/CodeGen/Blackfin/many-args.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+	type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.218 = load float* null		; <float> [#uses=1]
+	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
+	ret i32 0
+}
+
+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/Blackfin/mulhu.ll b/test/CodeGen/Blackfin/mulhu.ll
new file mode 100644
index 000000000000..72bacee33eb2
--- /dev/null
+++ b/test/CodeGen/Blackfin/mulhu.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
+	%struct.lpt_decision = type { i32, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+
+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
+entry:
+	switch i32 0, label %bb91 [
+		i32 0, label %bb
+		i32 1, label %bb6
+		i32 3, label %cond_next135
+	]
+
+bb:		; preds = %entry
+	ret i1 false
+
+bb6:		; preds = %entry
+	br i1 false, label %bb87, label %cond_next27
+
+cond_next27:		; preds = %bb6
+	br i1 false, label %cond_true30, label %cond_next55
+
+cond_true30:		; preds = %cond_next27
+	br i1 false, label %cond_next41, label %cond_true35
+
+cond_true35:		; preds = %cond_true30
+	ret i1 false
+
+cond_next41:		; preds = %cond_true30
+	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
+	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
+	br label %bb87
+
+cond_next55:		; preds = %cond_next27
+	ret i1 false
+
+bb87:		; preds = %cond_next41, %bb6
+	ret i1 false
+
+bb91:		; preds = %entry
+	ret i1 false
+
+cond_next135:		; preds = %entry
+	ret i1 false
+}
+
+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
+
+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/test/CodeGen/Blackfin/printf.ll b/test/CodeGen/Blackfin/printf.ll
new file mode 100644
index 000000000000..9e54b73c8772
--- /dev/null
+++ b/test/CodeGen/Blackfin/printf.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/printf2.ll b/test/CodeGen/Blackfin/printf2.ll
new file mode 100644
index 000000000000..7ac7e8032bb6
--- /dev/null
+++ b/test/CodeGen/Blackfin/printf2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
new file mode 100644
index 000000000000..c247aca0a5b0
--- /dev/null
+++ b/test/CodeGen/Blackfin/promote-logic.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin > %t
+
+; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
+; operation after LegalizeOps.
+
+define void @mng_display_bgr565() {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:
+	br i1 false, label %cond_true48, label %cond_next80
+
+cond_true48:
+	%tmp = load i8* null
+	%tmp51 = zext i8 %tmp to i16
+	%tmp99 = load i8* null
+	%tmp54 = bitcast i8 %tmp99 to i8
+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
+	%tmp52 = shl i16 %tmp51, 5
+	%tmp56 = and i16 %tmp55.upgrd.2, 28
+	%tmp57 = or i16 %tmp56, %tmp52
+	%tmp60 = zext i16 %tmp57 to i32
+	%tmp62 = xor i32 0, 65535
+	%tmp63 = mul i32 %tmp60, %tmp62
+	%tmp65 = add i32 0, %tmp63
+	%tmp69 = add i32 0, %tmp65
+	%tmp70 = lshr i32 %tmp69, 16
+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
+	store i8 %tmp76, i8* null
+	ret void
+
+cond_next80:
+	ret void
+
+return:
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/promote-setcc.ll b/test/CodeGen/Blackfin/promote-setcc.ll
new file mode 100644
index 000000000000..d344fadbf3d2
--- /dev/null
+++ b/test/CodeGen/Blackfin/promote-setcc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=bfin > %t
+
+; The DAG combiner may sometimes create illegal i16 SETCC operations when run
+; after LegalizeOps. Try to tease out all the optimizations in
+; TargetLowering::SimplifySetCC.
+
+@x = external global i16
+@y = external global i16
+
+declare i16 @llvm.ctlz.i16(i16)
+
+; Case (srl (ctlz x), 5) == const
+; Note: ctlz is promoted, so this test does not catch the DAG combiner
+define i1 @srl_ctlz_const() {
+  %x = load i16* @x
+  %c = call i16 @llvm.ctlz.i16(i16 %x)
+  %s = lshr i16 %c, 4
+  %r = icmp eq i16 %s, 1
+  ret i1 %r
+}
+
+; Case (zext x) == const
+define i1 @zext_const() {
+  %x = load i16* @x
+  %r = icmp ugt i16 %x, 1
+  ret i1 %r
+}
+
+; Case (sext x) == const
+define i1 @sext_const() {
+  %x = load i16* @x
+  %y = add i16 %x, 1
+  %x2 = sext i16 %y to i32
+  %r = icmp ne i32 %x2, -1
+  ret i1 %r
+}
+
diff --git a/test/CodeGen/Blackfin/sdiv.ll b/test/CodeGen/Blackfin/sdiv.ll
new file mode 100644
index 000000000000..1426655ba0b9
--- /dev/null
+++ b/test/CodeGen/Blackfin/sdiv.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/simple-select.ll b/test/CodeGen/Blackfin/simple-select.ll
new file mode 100644
index 000000000000..0f7f270967a6
--- /dev/null
+++ b/test/CodeGen/Blackfin/simple-select.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+declare i1 @foo()
+
+define i32 @test(i32* %A, i32* %B) {
+	%a = load i32* %A
+	%b = load i32* %B
+	%cond = call i1 @foo()
+	%c = select i1 %cond, i32 %a, i32 %b
+	ret i32 %c
+}
diff --git a/test/CodeGen/Blackfin/switch.ll b/test/CodeGen/Blackfin/switch.ll
new file mode 100644
index 000000000000..3680ec6e554b
--- /dev/null
+++ b/test/CodeGen/Blackfin/switch.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) {
+entry:
+	switch i32 %A, label %out [
+		i32 1, label %bb
+		i32 0, label %bb13
+	]
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb13:		; preds = %entry
+	ret i32 1
+
+out:		; preds = %entry
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/switch2.ll b/test/CodeGen/Blackfin/switch2.ll
new file mode 100644
index 000000000000..7877bce9c372
--- /dev/null
+++ b/test/CodeGen/Blackfin/switch2.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp = load i8* null		; <i8> [#uses=1]
+	switch i8 %tmp, label %bb [
+		i8 0, label %bb7
+		i8 120, label %bb7
+	]
+
+bb7:		; preds = %bb, %bb
+	ret i8* null
+}
diff --git a/test/CodeGen/Blackfin/sync-intr.ll b/test/CodeGen/Blackfin/sync-intr.ll
new file mode 100644
index 000000000000..75084f01e560
--- /dev/null
+++ b/test/CodeGen/Blackfin/sync-intr.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+define void @f() nounwind {
+entry:
+        ; CHECK: csync;
+        call void @llvm.bfin.csync()
+        ; CHECK: ssync;
+        call void @llvm.bfin.ssync()
+	ret void
+}
+
+declare void @llvm.bfin.csync() nounwind
+declare void @llvm.bfin.ssync() nounwind
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
index c69b9b0c2cd7..0b06041f5713 100644
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Make sure that global variables do not collide if they have the same name,
 ; but different types.
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
index 29081579ac71..a9f54e467d7e 100644
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This case was emitting code that looked like this:
 ; ...
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
index 297807ee32d1..2afb1a02bbad 100644
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Test const pointer refs & forward references
 
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
index ead1bce818f2..b71cf07dbf0c 100644
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
 global float* @2                       ;; Duplicate forward numeric reference
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
index 7ae13ec54c30..b5a1f0b28b2c 100644
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
 @somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
index 25f63a0da6b3..10b9fe22847c 100644
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @fptr = global void ()* @f       ;; Forward ref method defn
 declare void @f()               ;; External method
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
index 528b8de87edf..0827423e1ad0 100644
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
 @arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
diff --git a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
index e9df0c29e1a9..3b2085c950c4 100644
--- a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
+++ b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @MyIntList = external global { \2*, i32 }
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
index ccffe688992c..59aafd55d4c1 100644
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; The C Writer bombs on this testcase because it tries the print the prototype
 ; for the test function, which tries to print the argument name.  The function
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
index bf592ce3f6fb..6c4d62905b13 100644
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Indirect function call test... found by Joel & Brian
 ;
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
index e04890519d9e..1187a374601e 100644
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This testcase fails because the C backend does not arrange to output the 
 ; contents of a structure type before it outputs the structure type itself.
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
index ebb1c0fae265..021adb9c8873 100644
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define void @test() {
         %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
index 69f45753c50e..e915cd2fb3f4 100644
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 
 declare void @foo(...)
diff --git a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
index 2f6d9beb74a5..2563d8cb51e6 100644
--- a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
+++ b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %MPI_Comm = type %struct.Comm*
 %struct.Comm = type opaque
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
index d40cbdaa9ad0..2cdd15cf185b 100644
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ b/test/CodeGen/CBackend/2002-10-16-External.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @bob = external global i32              ; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
index a17b8db41c8b..54e0aa6c0bb9 100644
--- a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
+++ b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
         %BitField = type i32
         %tokenptr = type i32*
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
index 2dd281a4beeb..82d594fc7e20 100644
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
index 4a7170dbbd17..92d582d7f36d 100644
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Apparently this constant was unsigned in ISO C 90, but not in C 99.
 
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
index 2a4e839d737c..a42dc27a1e70 100644
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This testcase breaks the C backend, because gcc doesn't like (...) functions
 ; with no arguments at all.
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
index fb7e2ba69fd5..19c784022926 100644
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; The C backend was dying when there was no typename for a struct type!
 
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
index 6b7f9f0d378a..048e045b31e6 100644
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %X = type { i32, float }
 
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
index c6128d6df7a7..6197b301fd4a 100644
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Make sure hex constant does not continue into a valid hexadecimal letter/number
 @version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
index fd6821174bf2..f6177ea7db36 100644
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
index 9fe98e22063a..f0b1bbc7f03b 100644
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare i32 @callee(i32, i32)
 
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
index ef3b579b5b47..4bd1da25b355 100644
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c | grep common | grep X
+; RUN: llc < %s -march=c | grep common | grep X
 
 @X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
index 077f16cfac92..0fbb3feef137 100644
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This is a non-normal FP value: it's a nan.
 @NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
index e67ba2e5b164..9195634b0fc4 100644
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
 
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
index 41f3f1ef0fb3..b4389ffab18c 100644
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; reduced from DOOM.
         %union._XEvent = type { i32 }
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
index e8da787d27fc..6a2629124042 100644
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 @y = weak global i8 0           ; <i8*> [#uses=1]
 
 define i32 @testcaseshr() {
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
index 911d6d4e9f5f..142fbd84dd8d 100644
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep builtin_return_address
+; RUN: llc < %s -march=c | grep builtin_return_address
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
index 1629debc58fb..d1c6861c58d0 100644
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
@@ -4,7 +4,7 @@
 ; this testcase for example, which caused the CBE to mangle one, screwing
 ; everything up.  :(  Test that this does not happen anymore.
 ;
-; RUN: llvm-as < %s | llc -march=c | not grep _memcpy
+; RUN: llc < %s -march=c | not grep _memcpy
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
index 6f23915a75c0..6fceb0865741 100644
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
@@ -1,5 +1,5 @@
 ; This is a non-normal FP value
-; RUN: llvm-as < %s | llc -march=c | grep FPConstant | grep static
+; RUN: llc < %s -march=c | grep FPConstant | grep static
 
 define float @func() {
         ret float 0xFFF0000000000000
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
index 2d62231b74e3..cf59634e82c1 100644
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep func1 | grep WEAK
+; RUN: llc < %s -march=c | grep func1 | grep WEAK
 
 define linkonce i32 @func1() {
         ret i32 5
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
index ae7ba5305c00..3ee23d1a909a 100644
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare void @llvm.va_end(i8*)
 
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
index a8ee438cc431..af8f441c2229 100644
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
@@ -1,7 +1,7 @@
 ; The CBE should not emit code that casts the function pointer.  This causes
 ; GCC to get testy and insert trap instructions instead of doing the right
 ; thing. :(
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare void @external(i8*)
 
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
index 8acab764d780..78e9bacd9e77 100644
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | not grep extern.*msg
+; RUN: llc < %s -march=c | not grep extern.*msg
 ; PR472
 
 @msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
index 9acaa726dbe6..57a9adc7e89a 100644
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define i32 @foo() {
         ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
index 12c87901f54e..dd505af4831b 100644
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep volatile
+; RUN: llc < %s -march=c | grep volatile
 
 define void @test(i32* %P) {
         %X = volatile load i32* %P              ; <i32> [#uses=1]
diff --git a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
index 162e3d358fdb..1c5f5061df63 100644
--- a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
+++ b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
         %JNIEnv = type %struct.JNINa*
         %struct.JNINa = type { i8*, i8*, i8*, void (%JNIEnv*)* }
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
index 55d43e2bb020..808b8f91407a 100644
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | not grep -- --65535
+; RUN: llc < %s -march=c | not grep -- --65535
 ; PR596
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
index 7c55019d2350..6e650eb293fc 100644
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep fmod
+; RUN: llc < %s -march=c | grep fmod
 
 define double @test(double %A, double %B) {
         %C = frem double %A, %B         ; <double> [#uses=1]
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
index 37f311d4b93b..99de837dc79a 100644
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {\\* *volatile *\\*}
+; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
 
 @G = external global void ()*           ; <void ()**> [#uses=2]
 
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
index f8393a3fbc80..c9df800d72d5 100644
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | \
+; RUN: llc < %s -march=c | \
 ; RUN:   grep __BITCAST | count 14
 
 define i32 @test1(float %F) {
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
index 63dd9da0b11d..da36e78e0b05 100644
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
@@ -1,5 +1,5 @@
 ; For PR1099
-; RUN: llvm-as < %s | llc -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
+; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
index 42fa0d897f66..8a5f2532e701 100644
--- a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
+++ b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
@@ -1,5 +1,5 @@
 ; PR918
-; RUN: llvm-as < %s | llc -march=c | not grep {l_structtype_s l_fixarray_array3}
+; RUN: llc < %s -march=c | not grep {l_structtype_s l_fixarray_array3}
 
 %structtype_s = type { i32 }
 %fixarray_array3 = type [3 x %structtype_s]
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
index 8fe06b77d787..4f699b792e20 100644
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=c | grep __builtin_stack_save
-; RUN: llvm-as < %s | llc -march=c | grep __builtin_stack_restore
+; RUN: llc < %s -march=c | grep __builtin_stack_save
+; RUN: llc < %s -march=c | grep __builtin_stack_restore
 ; PR1028
 
 declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
index f253b30136f0..7d508e424051 100644
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ b/test/CodeGen/CBackend/2007-02-05-memset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 ; PR1181
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
index eb5cb8644652..7e1ff2a9dfa0 100644
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
@@ -1,7 +1,7 @@
 ; PR1164
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llvm-as < %s | llc -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
+; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
+; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
+; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
 
 @G = global i32 123
 @ltmp_0_1 = global i32 123
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
index 605761611184..c8bfdd6bcfc6 100644
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {packed}
+; RUN: llc < %s -march=c | grep {packed}
 
 	%struct.p = type <{ i16 }>
 
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
index 269126d75983..6e0cf6829296 100644
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | \
+; RUN: llc < %s -march=c | \
 ; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
 
 define void @test(i32* %P) {
diff --git a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
index 16bf23e4d88e..8db3167e54d4 100644
--- a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
+++ b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare {i32, i32} @foo()
 
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
index 52e025900780..e9fa552433a5 100644
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_t.*&1}
+; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
 define i32 @test(i32 %r) {
   %s = icmp eq i32 %r, 0
   %t = add i1 %s, %s
diff --git a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
index a2c10469bd7f..054a3cad900d 100644
--- a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
+++ b/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {"m"(llvm_cbe_newcw))}
+; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
 ; PR2407
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
index 32d635ad720e..b72b57343cd0 100644
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 ; PR2907
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9.5"
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
index 68849b20c769..7dec3d9e09c2 100644
--- a/test/CodeGen/CBackend/fneg.ll
+++ b/test/CodeGen/CBackend/fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define void @func() nounwind {
   entry:
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
index a16f91bfad89..bf8477b7e6dd 100644
--- a/test/CodeGen/CBackend/pr2408.ll
+++ b/test/CodeGen/CBackend/pr2408.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {\\* ((unsigned int )}
+; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
 ; PR2408
 
 define i32 @a(i32 %a) {
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
index d01e99288f7e..b7b76775f6c6 100644
--- a/test/CodeGen/CBackend/vectors.ll
+++ b/test/CodeGen/CBackend/vectors.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 @.str15 = external global [2 x i8]
 
 define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 75b96e6c373f..71fea12d9c2c 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
+; RUN: llc < %s -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
 ; PR1515
 
 define void @foo() {
diff --git a/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
index 9d132ec00f7c..0b2d882971a3 100644
--- a/test/CodeGen/CPP/2009-05-01-Long-Double.ll
+++ b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -f -o %t
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
 
 define x86_fp80 @some_func() nounwind {
 entry:
diff --git a/test/CodeGen/CPP/2009-05-04-CondBr.ll b/test/CodeGen/CPP/2009-05-04-CondBr.ll
index 6c3f984282c3..feb2cf765e7d 100644
--- a/test/CodeGen/CPP/2009-05-04-CondBr.ll
+++ b/test/CodeGen/CPP/2009-05-04-CondBr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -f -o %t
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
 ; RUN: grep "BranchInst::Create(label_if_then, label_if_end, int1_cmp, label_entry);" %t
 
 define i32 @some_func(i32 %a) nounwind {
diff --git a/test/CodeGen/CPP/llvm2cpp.ll b/test/CodeGen/CPP/llvm2cpp.ll
index 651a65bbea92..447f332b269e 100644
--- a/test/CodeGen/CPP/llvm2cpp.ll
+++ b/test/CodeGen/CPP/llvm2cpp.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis > /dev/null
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -o -
+; RUN: llc < %s -march=cpp -cppgen=program -o -
 
 @X = global i32 4, align 16		; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
index 75e0ed0cd2fc..35422311c574 100644
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cellspu -o - | grep brz
+; RUN: llc < %s -march=cellspu -o - | grep brz
 ; PR3274
 
 target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index a18b6f8d05fc..139e97b967a7 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and    %t1.s | count 234
 ; RUN: grep andc   %t1.s | count 85
 ; RUN: grep andi   %t1.s | count 37
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index a305a2354041..960d2feadeda 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep brsl    %t1.s | count 1
 ; RUN: grep brasl   %t1.s | count 1
 ; RUN: grep stqd    %t1.s | count 80
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index 9be714ebc9b8..639c794424f3 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep bisl    %t1.s | count 7
 ; RUN: grep ila     %t1.s | count 1
 ; RUN: grep rotqby  %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
index 3c7ee7aeea2b..e1a6cd829260 100644
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ b/test/CodeGen/CellSPU/ctpop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep cntb    %t1.s | count 3
 ; RUN: grep andi    %t1.s | count 3
 ; RUN: grep rotmi   %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index d4802ae8f545..b0a372beba0d 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep dfa    %t1.s | count 2
 ; RUN: grep dfs    %t1.s | count 2
 ; RUN: grep dfm    %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
index 540695677205..22c8c3bff940 100644
--- a/test/CodeGen/CellSPU/eqv.ll
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep eqv  %t1.s | count 18
 ; RUN: grep xshw %t1.s | count 6
 ; RUN: grep xsbh %t1.s | count 3
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
index bcd2f42aa77e..0ac971c58c5b 100644
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ b/test/CodeGen/CellSPU/extract_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 39
 ; RUN: grep ilhu    %t1.s | count 27
 ; RUN: grep iohl    %t1.s | count 27
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index 27a659e82930..f07fe6fdab28 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep fceq  %t1.s | count 1
 ; RUN: grep fcmeq %t1.s | count 1
 
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
index 1906bfe7ddaa..2b61fa6d2dc2 100644
--- a/test/CodeGen/CellSPU/fcmp64.ll
+++ b/test/CodeGen/CellSPU/fcmp64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 
 define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
 entry:
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
index d121c3f8c907..9921626b79cb 100644
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep frest    %t1.s | count 2 
 ; RUN: grep -w fi    %t1.s | count 2 
 ; RUN: grep -w fm    %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 5bd66f4aaef3..1e5e3b341440 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep 32768   %t1.s | count 2
 ; RUN: grep xor     %t1.s | count 4
 ; RUN: grep and     %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
index dd6782772a5d..3553cbbf7b5c 100644
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep xswd	     %t1.s | count 3
 ; RUN: grep xsbh	     %t1.s | count 1
 ; RUN: grep xshw	     %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
index 23a036e37443..57a2aa894725 100644
--- a/test/CodeGen/CellSPU/i8ops.ll
+++ b/test/CodeGen/CellSPU/i8ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 
 ; ModuleID = 'i8ops.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
index 56d1b8fb41b2..32b12617cfc3 100644
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ b/test/CodeGen/CellSPU/icmp16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ilh                                %t1.s | count 15
 ; RUN: grep ceqh                               %t1.s | count 29
 ; RUN: grep ceqhi                              %t1.s | count 13
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
index 4f74b0dd0429..ccbb5f7cde58 100644
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ b/test/CodeGen/CellSPU/icmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ila                                %t1.s | count 6
 ; RUN: grep ceq                                %t1.s | count 28
 ; RUN: grep ceqi                               %t1.s | count 12
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
index b26252cedb30..9dd2cdc0dea9 100644
--- a/test/CodeGen/CellSPU/icmp64.ll
+++ b/test/CodeGen/CellSPU/icmp64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceq                                %t1.s | count 20
 ; RUN: grep cgti                               %t1.s | count 12
 ; RUN: grep cgt                                %t1.s | count 16
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
index d246481f03a1..5517d104ab9f 100644
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ b/test/CodeGen/CellSPU/icmp8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceqb                               %t1.s | count 24
 ; RUN: grep ceqbi                              %t1.s | count 12
 ; RUN: grep clgtb                              %t1.s | count 11
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
index 9a461cbb85a6..077d07169e45 100644
--- a/test/CodeGen/CellSPU/immed16.ll
+++ b/test/CodeGen/CellSPU/immed16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep "ilh" %t1.s | count 11
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
index bf471b1eb1ce..119f526847ce 100644
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ilhu  %t1.s | count 8
 ; RUN: grep iohl  %t1.s | count 6
 ; RUN: grep -w il    %t1.s | count 3
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
index bbda3ff329cb..fd483651756e 100644
--- a/test/CodeGen/CellSPU/immed64.ll
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep lqa        %t1.s | count 13
 ; RUN: grep ilhu       %t1.s | count 15
 ; RUN: grep ila        %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
index ee3076594ad6..984c017c96d1 100644
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ b/test/CodeGen/CellSPU/int2fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep csflt %t1.s | count 5
 ; RUN: grep cuflt %t1.s | count 1
 ; RUN: grep xshw  %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
index 87ad18211a25..b0f6a6247e41 100644
--- a/test/CodeGen/CellSPU/intrinsics_branch.ll
+++ b/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceq     %t1.s | count 30 
 ; RUN: grep ceqb    %t1.s | count 10
 ; RUN: grep ceqhi   %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
index c18f8deb385e..81373470d069 100644
--- a/test/CodeGen/CellSPU/intrinsics_float.ll
+++ b/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep fa      %t1.s | count 5
 ; RUN: grep fs      %t1.s | count 5
 ; RUN: grep fm      %t1.s | count 15
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
index 843340b74542..a29ee4c2405d 100644
--- a/test/CodeGen/CellSPU/intrinsics_logical.ll
+++ b/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and       %t1.s | count 20
 ; RUN: grep andc      %t1.s | count 5
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index 3b9746c8080a..8e5422c58eb6 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep {lqd.*0(\$3)}   %t1.s | count 1
-; RUN: grep {lqd.*16(\$3)}  %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 ; ModuleID = 'loads.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -10,11 +8,13 @@ define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
 entry:
 	%tmp1 = load <4 x float>* %a
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 0($3)
 }
 
 define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
 entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1		; <<4 x float>*> [#uses=1]
-	%tmp1 = load <4 x float>* %arrayidx		; <<4 x float>> [#uses=1]
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	%tmp1 = load <4 x float>* %arrayidx
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 16($3)
 }
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
index 755b99be9cdd..d15da12649ea 100644
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ b/test/CodeGen/CellSPU/mul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cellspu
+; RUN: llc < %s -march=cellspu
 
 declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
 define i1 @a(i16 %x) zeroext nounwind {
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
index 085ce555dc25..031d6c37ce70 100644
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep mpy     %t1.s | count 44
 ; RUN: grep mpyu    %t1.s | count 4
 ; RUN: grep mpyh    %t1.s | count 10
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index 841a3ec54d6f..e1419232ece7 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep nand   %t1.s | count 90
 ; RUN: grep and    %t1.s | count 94
 ; RUN: grep xsbh   %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 4e9da8f12972..8aa1e998bd0e 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and    %t1.s | count 2
 ; RUN: grep orc    %t1.s | count 85
 ; RUN: grep ori    %t1.s | count 30
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
index 91567ce82803..7452276ccc8c 100644
--- a/test/CodeGen/CellSPU/private.ll
+++ b/test/CodeGen/CellSPU/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=cellspu > %t
+; RUN: llc < %s -march=cellspu > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep brsl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index e308172486a5..a504c002ae12 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s
+; RUN: llc < %s -march=cellspu -o %t1.s
 ; RUN: grep rot          %t1.s | count 85
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index e83e47606c28..c804256f513b 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep selb   %t1.s | count 56
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
new file mode 100644
index 000000000000..0c0b3599b110
--- /dev/null
+++ b/test/CodeGen/CellSPU/sext128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s 
+
+; ModuleID = 'sext128.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define i128 @sext_i64_i128(i64 %a) {
+entry:
+        %0 = sext i64 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	long	67438087
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128(i32 %a) {
+entry:
+        %0 = sext i32 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128a(float %a) {
+entry:
+  %0 = call i32 @myfunc(float %a)
+  %1 = sext i32 %0 to i128
+  ret i128 %1
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+declare i32 @myfunc(float)
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 3c26baa7c7ab..0264fc830ea8 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {shlh	}  %t1.s | count 9
 ; RUN: grep {shlhi	}  %t1.s | count 3
 ; RUN: grep {shl	}  %t1.s | count 9
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
index d77dd9216cd7..80bf47ccf5d9 100644
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu -enable-unsafe-fp-math > %t1.s
+; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
 ; RUN: grep fa %t1.s | count 2
 ; RUN: grep fs %t1.s | count 2
 ; RUN: grep fm %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index f2f35ef4dbc4..05f44f4be046 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {stqd.*0(\$3)}      %t1.s | count 4
 ; RUN: grep {stqd.*16(\$3)}     %t1.s | count 4
 ; RUN: grep 16256               %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index 82d319dd1050..8ee7d932251a 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep lqa     %t1.s | count 5
 ; RUN: grep lqd     %t1.s | count 11
 ; RUN: grep rotqbyi %t1.s | count 7
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
index db22564f4341..d16185238af0 100644
--- a/test/CodeGen/CellSPU/trunc.ll
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 19
 ; RUN: grep {ilhu.*1799}  %t1.s | count 1
 ; RUN: grep {ilhu.*771}  %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
index 4b29adc80921..24c05c684084 100644
--- a/test/CodeGen/CellSPU/vec_const.ll
+++ b/test/CodeGen/CellSPU/vec_const.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep -w il  %t1.s | count 3
 ; RUN: grep ilhu   %t1.s | count 8
 ; RUN: grep -w ilh %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
index 6abbd9ac797d..9a00c1f29f8f 100644
--- a/test/CodeGen/CellSPU/vecinsert.ll
+++ b/test/CodeGen/CellSPU/vecinsert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep cbd     %t1.s | count 5
 ; RUN: grep chd     %t1.s | count 5
 ; RUN: grep cwd     %t1.s | count 10
diff --git a/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll b/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
index f6d95cbd53fc..dd382cfcb24d 100644
--- a/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
+++ b/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc 
+; RUN: llc < %s
 
 ; This caused the backend to assert out with:
 ; SparcInstrInfo.cpp:103: failed assertion `0 && "Unexpected unsigned type"'
diff --git a/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll b/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
index d77b9e15596e..751ed407456d 100644
--- a/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
+++ b/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Compiling this file produces:
 ; Sparc.cpp:91: failed assertion `(offset - OFFSET) % getStackFrameSizeAlignment() == 0'
diff --git a/test/CodeGen/Generic/2003-05-27-phifcmpd.ll b/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
index cf17ef455418..6fb17991e739 100644
--- a/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
+++ b/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(i32 %p.1, double %tmp.212) {
 entry:
diff --git a/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll b/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
index 03b2a1684670..14bb00048d20 100644
--- a/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
+++ b/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(double %tmp.212) {
         %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll b/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
index b456eebea2c2..cc0eb5cd1374 100644
--- a/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
+++ b/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(double %tmp.212) {
 entry:
diff --git a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
index 595700ad183e..c6fbdaef8293 100644
--- a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
+++ b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/External/SPEC/CINT2000/175.vpr.llvm.bc
diff --git a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
index 41c90bd29d38..10d3a11a5190 100644
--- a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/External/SPEC/CINT2000/254.gap.llvm.bc
diff --git a/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll b/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
index 43bff82ef7cc..f7c3e42dc487 100644
--- a/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/SingleSource/richards_benchmark.c
diff --git a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
index d66ea186e873..1d1aad5f27e2 100644
--- a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
+++ b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date: May 28, 2003.
 ;; From: test/Programs/MultiSource/Olden-perimeter/maketree.c
diff --git a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
index 80738d54370c..64312ba09a50 100644
--- a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
+++ b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [42 x i8] c"   ui = %u (0x%x)\09\09UL-ui = %lld (0x%llx)\0A\00"             ; <[42 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
index 4532b760c707..8019caa832d7 100644
--- a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
+++ b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     Jul 8, 2003.
 ;; From:     test/Programs/MultiSource/Olden-perimeter
diff --git a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
index 54880db39289..4e6fe1cf8bf5 100644
--- a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
+++ b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     Jul 29, 2003.
 ;; From:     test/Programs/MultiSource/Ptrdist-bc
diff --git a/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll b/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
index 10d40693d8e5..393062abf78e 100644
--- a/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
+++ b/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-correct-eh-support
+; RUN: llc < %s -enable-correct-eh-support
 
 define i32 @test() {
         unwind
diff --git a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
index 1f58ce11fe71..d4a4cf88ce0c 100644
--- a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
+++ b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @global_long_1 = linkonce global i64 7          ; <i64*> [#uses=1]
 @global_long_2 = linkonce global i64 49         ; <i64*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll b/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
index ed8b2a22607d..7fd23612fb5f 100644
--- a/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
+++ b/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @intersect_pixel() {
 entry:
diff --git a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
index 37aaa3237caa..353e411b0887 100644
--- a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
+++ b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
         %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
 @.str_67 = external global [4 x i8]             ; <[4 x i8]*> [#uses=1]
 @.str_87 = external global [17 x i8]            ; <[17 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
index ab3a31d3c077..733202c8a96b 100644
--- a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
+++ b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; Test that llvm.memcpy works with a i64 length operand on all targets.
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll b/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
index b2bea1c6be3b..08060bf3d6f1 100644
--- a/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
+++ b/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @test() {
         %X = alloca {  }                ; <{  }*> [#uses=0]
diff --git a/test/CodeGen/Generic/2005-10-21-longlonggtu.ll b/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
index b355b026531e..53a9cd0f2659 100644
--- a/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
+++ b/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define float @t(i64 %u_arg) {
         %u = bitcast i64 %u_arg to i64          ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-12-01-Crash.ll b/test/CodeGen/Generic/2005-12-01-Crash.ll
index ee72ee1317b0..a9eeddedc54d 100644
--- a/test/CodeGen/Generic/2005-12-01-Crash.ll
+++ b/test/CodeGen/Generic/2005-12-01-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @str = external global [36 x i8]		; <[36 x i8]*> [#uses=0]
 @str.upgrd.1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
 @str1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
diff --git a/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll b/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
index bd2e043c96f4..349540fb384e 100644
--- a/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
+++ b/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i64 @test(i64 %A) {
         %B = trunc i64 %A to i8         ; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll b/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
index 1a555b355067..42e8ed02ca50 100644
--- a/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
+++ b/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; ModuleID = '2006-01-12-BadSetCCFold.ll'
 	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 
diff --git a/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll b/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
index b1e08c759c08..f06d3412a9d5 100644
--- a/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
+++ b/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; This crashed the PPC backend.
 
 define void @test() {
diff --git a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
index bacf8b5e2f9e..5508272b5551 100644
--- a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
+++ b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @G = external global i32		; <i32*> [#uses=1]
 
 define void @encode_one_frame(i64 %tmp.2i) {
diff --git a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
index 9607ebee1cc8..2a6cc0c9cdd2 100644
--- a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
+++ b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; Infinite loop in the dag combiner, reduced from 176.gcc.	
 %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.anon = type { i32 }
diff --git a/test/CodeGen/Generic/2006-04-11-vecload.ll b/test/CodeGen/Generic/2006-04-11-vecload.ll
index cc96d8f1ab6a..a68ed838c24f 100644
--- a/test/CodeGen/Generic/2006-04-11-vecload.ll
+++ b/test/CodeGen/Generic/2006-04-11-vecload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 ; The vload was getting memoized to the previous scalar load!
 
diff --git a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
index b99aa98fe9e8..8465b829e29f 100644
--- a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
+++ b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR748
 @G = external global i16		; <i16*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll b/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
index 6b9bf11860f9..22d8f99beea4 100644
--- a/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
+++ b/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @test(i32 %tmp93) {
         %tmp98 = shl i32 %tmp93, 31             ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
index 59ed2953afad..1a9fa9f5de6b 100644
--- a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
+++ b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.SYMBOL_TABLE_ENTRY = type { [9 x i8], [9 x i8], i32, i32, i32, %struct.SYMBOL_TABLE_ENTRY* }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll b/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
index b644bd2be78f..a3720a9e3ce7 100644
--- a/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
+++ b/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 
 define float @test(i32 %tmp12771278) {
         switch i32 %tmp12771278, label %bb1279 [
diff --git a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
index 1aa3c62f955b..bd922b3aa851 100644
--- a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
+++ b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 	
 %struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32 }
 @cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=2]
diff --git a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
index 8e8f18639bdb..c4f2fb0c4726 100644
--- a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
+++ b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.rtunion = type { i64 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
 @ix86_cpu = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-07-03-schedulers.ll b/test/CodeGen/Generic/2006-07-03-schedulers.ll
index 597ee56609ba..756bd5ddb1ae 100644
--- a/test/CodeGen/Generic/2006-07-03-schedulers.ll
+++ b/test/CodeGen/Generic/2006-07-03-schedulers.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -pre-RA-sched=default
-; RUN: llvm-as < %s | llc -pre-RA-sched=list-burr
-; RUN: llvm-as < %s | llc -pre-RA-sched=fast
+; RUN: llc < %s -pre-RA-sched=default
+; RUN: llc < %s -pre-RA-sched=list-burr
+; RUN: llc < %s -pre-RA-sched=fast
 ; PR859
 
 ; The top-down schedulers are excluded here because they don't yet support
diff --git a/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll b/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
index 7f8af5dda498..cbe8b15a2e83 100644
--- a/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
+++ b/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index c6d0dfee3815..4b332b32cf12 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -regalloc=local
+; RUN: llc < %s -regalloc=local
 	
 %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
 @search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]
diff --git a/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll b/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
index 2134d3302bc2..3d592b3a38aa 100644
--- a/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
+++ b/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo() {
 	br label %cond_true813.i
diff --git a/test/CodeGen/Generic/2006-10-27-CondFolding.ll b/test/CodeGen/Generic/2006-10-27-CondFolding.ll
index b3cfb9941bbb..51902c867f60 100644
--- a/test/CodeGen/Generic/2006-10-27-CondFolding.ll
+++ b/test/CodeGen/Generic/2006-10-27-CondFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 define void @start_pass_huff(i32 %gather_statistics) {
 entry:
diff --git a/test/CodeGen/Generic/2006-10-29-Crash.ll b/test/CodeGen/Generic/2006-10-29-Crash.ll
index cabec54d1610..7dcb52cf00f7 100644
--- a/test/CodeGen/Generic/2006-10-29-Crash.ll
+++ b/test/CodeGen/Generic/2006-10-29-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @form_component_prediction(i32 %dy) {
 entry:
diff --git a/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll b/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
index a773759f36ea..ad3e49f8f922 100644
--- a/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
+++ b/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep adc
+; RUN: llc < %s -march=x86 | not grep adc
 ; PR987
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
index 95ef53c62ec5..26d0f4f96ae8 100644
--- a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
+++ b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1011	
 %struct.mng_data = type { i8* (%struct.mng_data*, i32)*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll b/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
index 91ac3b9909dc..50a244b9e05b 100644
--- a/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
+++ b/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1049
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
index 49203d95d4a0..255b12092a77 100644
--- a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
+++ b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1114
 
 declare i1 @foo()
diff --git a/test/CodeGen/Generic/2007-02-16-BranchFold.ll b/test/CodeGen/Generic/2007-02-16-BranchFold.ll
index 0a8e49e56e49..6bf5631b4e34 100644
--- a/test/CodeGen/Generic/2007-02-16-BranchFold.ll
+++ b/test/CodeGen/Generic/2007-02-16-BranchFold.ll
@@ -1,5 +1,5 @@
 ; PR 1200
-; RUN: llvm-as < %s | llc -enable-tail-merge=0 | not grep jmp 
+; RUN: llc < %s -enable-tail-merge=0 | not grep jmp 
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll b/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
index 8b7db478181e..a8f0e576b95e 100644
--- a/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
+++ b/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
@@ -1,5 +1,5 @@
 ; PR1219
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {movl	\$1, %eax}
 
 define i32 @test(i1 %X) {
 old_entry1:
diff --git a/test/CodeGen/Generic/2007-02-25-invoke.ll b/test/CodeGen/Generic/2007-02-25-invoke.ll
index 6dba99e21f79..6e20eaae3bde 100644
--- a/test/CodeGen/Generic/2007-02-25-invoke.ll
+++ b/test/CodeGen/Generic/2007-02-25-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; PR1224
 
diff --git a/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll b/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
index 9cbf3146eb80..339f0f71ed5a 100644
--- a/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
+++ b/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; XFAIL: sparc-sun-solaris2
 ; PR1308
 ; PR1557
diff --git a/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll b/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
index 1418bbf16dfd..a0b1403cf8d1 100644
--- a/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
+++ b/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 ; PR 1323
 
 ; ModuleID = 'test.bc'
diff --git a/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll b/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
index 5490687e1ba8..00337b930145 100644
--- a/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
+++ b/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 8388635
-; RUN: llvm-as < %s | llc -march=x86-64 | grep 4294981120
+; RUN: llc < %s -march=x86 | grep 8388635
+; RUN: llc < %s -march=x86-64 | grep 4294981120
 ; PR 1325
 
 ; ModuleID = 'bugpoint.test.bc'
diff --git a/test/CodeGen/Generic/2007-04-17-lsr-crash.ll b/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
index 4257e9f4c2dc..98f87e5c514c 100644
--- a/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
+++ b/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo(i32 %inTextSize) {
 entry:
diff --git a/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll b/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
index 16d7a1654d93..3e8857f37cb9 100644
--- a/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
+++ b/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep je | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 | grep 4297064449
+; RUN: llc < %s -march=x86 | grep je | count 3
+; RUN: llc < %s -march=x86-64 | grep 4297064449
 ; PR 1325+
 
 define i32 @foo(i8 %bar) {
diff --git a/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll b/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
index 0ea13a2ad2e8..af522dc4c58d 100644
--- a/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
+++ b/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that we can have an "X" output constraint.
 
diff --git a/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll b/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
index 9424ea7a29fd..f2c9b7f849b6 100644
--- a/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
+++ b/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
         %struct..0anon = type { [100 x i32] }
 
diff --git a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
index 71b4c857d079..568b88f4df19 100644
--- a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
+++ b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR1228
 
 	"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
diff --git a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
index 8a427902f721..533aa4a8d9b0 100644
--- a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
+++ b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh -march=x86
+; RUN: llc < %s -enable-eh -march=x86
 
 	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
 @program_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
diff --git a/test/CodeGen/Generic/2007-05-05-Personality.ll b/test/CodeGen/Generic/2007-05-05-Personality.ll
index 0fa0e2ff6b22..27493261d569 100644
--- a/test/CodeGen/Generic/2007-05-05-Personality.ll
+++ b/test/CodeGen/Generic/2007-05-05-Personality.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
 
 @error = external global i8		; <i8*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
index a61108a0012a..b989819f4039 100644
--- a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
+++ b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
 	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
diff --git a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
index 0b98ebe4b665..33a36452b2e5 100644
--- a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
+++ b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-eh -asm-verbose -o - | \
+; RUN: llc < %s -march=x86 -enable-eh -asm-verbose -o - | \
 ; RUN:   grep -A 3 {Llabel138.*Region start} | grep {3.*Action}
 ; PR1422
 ; PR1508
diff --git a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
index cedee6ffbe0d..e220be6389dc 100644
--- a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
+++ b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o -
+; RUN: llc < %s -o -
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
index 98871d0e3a45..bd26481bd306 100644
--- a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
+++ b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 41fdb71ddf49..fc9164f7c4aa 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 ; PR1833
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
index 4b2544446939..314bb05c6784 100644
--- a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
+++ b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://5707064
 
 define i32 @f(i16* %pc) {
diff --git a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
index 8ed4139bd64f..70c3aaabedc1 100644
--- a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
+++ b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @letters.3100 = external constant [63 x i8]		; <[63 x i8]*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 4639b6f97703..288bfd245da9 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str = internal constant [14 x i8] c"%lld %d %d %d\00"
 
diff --git a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
index 9acb852bced0..8bf82dfe186d 100644
--- a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
+++ b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() nounwind  {
 entry:
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index ef60f92fa05a..da1aeb556a39 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1133
 define void @test(i32* %X) nounwind  {
 entry:
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index 01693079a3ef..97db667dc13a 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; rdar://5763967
 
 define void @test() {
diff --git a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
index b2112f3ad39b..10b3d444c682 100644
--- a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
+++ b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2096
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
 	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
diff --git a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
index a60d101fee25..4f95dfe8a730 100644
--- a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
+++ b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2603
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index c5844027b44d..6281ada73fc6 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3806
 
 	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
diff --git a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
index 40ad3deaeea7..9a9c1a110d61 100644
--- a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
+++ b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -soft-float
+; RUN: llc < %s -soft-float
 ; PR3899
 
 @m = external global <2 x double>;
diff --git a/test/CodeGen/Generic/2009-04-10-SinkCrash.ll b/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
index 3637a06c217f..125f87594b85 100644
--- a/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
+++ b/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(i32 %p.1, double %tmp.212) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index 405a6a8d6e90..577b547007d0 100644
--- a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6836460
 
 define i32 @test(i128* %P) nounwind {
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
index 59e7d0c7a8f5..112cac4f9644 100644
--- a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
+++ b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4317
 
 declare i32 @b()
diff --git a/test/CodeGen/Generic/APIntLoadStore.ll b/test/CodeGen/Generic/APIntLoadStore.ll
index 57ddae2c1b02..7c71a33fc3fd 100644
--- a/test/CodeGen/Generic/APIntLoadStore.ll
+++ b/test/CodeGen/Generic/APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_l = external global i1		; <i1*> [#uses=1]
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_l = external global i2		; <i2*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntParam.ll b/test/CodeGen/Generic/APIntParam.ll
index f80f71b17618..8aa0b494c26b 100644
--- a/test/CodeGen/Generic/APIntParam.ll
+++ b/test/CodeGen/Generic/APIntParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntSextParam.ll b/test/CodeGen/Generic/APIntSextParam.ll
index 9fb06cb1bfb1..acc0eebcada8 100644
--- a/test/CodeGen/Generic/APIntSextParam.ll
+++ b/test/CodeGen/Generic/APIntSextParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntZextParam.ll b/test/CodeGen/Generic/APIntZextParam.ll
index ea7743ecd086..173b9fd74ca4 100644
--- a/test/CodeGen/Generic/APIntZextParam.ll
+++ b/test/CodeGen/Generic/APIntZextParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/BasicInstrs.ll b/test/CodeGen/Generic/BasicInstrs.ll
index e65cbf772fd9..578431e8efa4 100644
--- a/test/CodeGen/Generic/BasicInstrs.ll
+++ b/test/CodeGen/Generic/BasicInstrs.ll
@@ -1,7 +1,7 @@
 ; New testcase, this contains a bunch of simple instructions that should be
 ; handled by a code generator.
 
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @add(i32 %A, i32 %B) {
 	%R = add i32 %A, %B		; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/BurgBadRegAlloc.ll b/test/CodeGen/Generic/BurgBadRegAlloc.ll
index 3ccc9a0aeb69..99d856aea990 100644
--- a/test/CodeGen/Generic/BurgBadRegAlloc.ll
+++ b/test/CodeGen/Generic/BurgBadRegAlloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Register allocation is doing a very poor job on this routine from yyparse
 ;; in Burg:
diff --git a/test/CodeGen/Generic/ConstantExprLowering.ll b/test/CodeGen/Generic/ConstantExprLowering.ll
index d26541596da5..428d712462d6 100644
--- a/test/CodeGen/Generic/ConstantExprLowering.ll
+++ b/test/CodeGen/Generic/ConstantExprLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [16 x i8] c"%d %d %d %d %d\0A\00"           ; <[16 x i8]*> [#uses=1]
 @XA = external global i32               ; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/GC/alloc_loop.ll b/test/CodeGen/Generic/GC/alloc_loop.ll
index b1fee68abab8..fb78ba2cd10b 100644
--- a/test/CodeGen/Generic/GC/alloc_loop.ll
+++ b/test/CodeGen/Generic/GC/alloc_loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 
 declare i8* @llvm_gc_allocate(i32)
diff --git a/test/CodeGen/Generic/GC/argpromotion.ll b/test/CodeGen/Generic/GC/argpromotion.ll
index 5df947a9fb12..dda376d6168e 100644
--- a/test/CodeGen/Generic/GC/argpromotion.ll
+++ b/test/CodeGen/Generic/GC/argpromotion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -argpromotion
+; RUN: opt < %s -anders-aa -argpromotion
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/GC/deadargelim.ll b/test/CodeGen/Generic/GC/deadargelim.ll
index c5a56f600dd0..176019020ad4 100644
--- a/test/CodeGen/Generic/GC/deadargelim.ll
+++ b/test/CodeGen/Generic/GC/deadargelim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim
+; RUN: opt < %s -deadargelim
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/GC/frame_size.ll b/test/CodeGen/Generic/GC/frame_size.ll
index 75626c18c5b1..31783cdb97ef 100644
--- a/test/CodeGen/Generic/GC/frame_size.ll
+++ b/test/CodeGen/Generic/GC/frame_size.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -asm-verbose | grep {frame size} | grep -v 0x0
+; RUN: llc < %s -asm-verbose | grep {frame size} | grep -v 0x0
 
 declare void @llvm.gcroot(i8** %value, i8* %tag)
 declare void @g() gc "ocaml"
diff --git a/test/CodeGen/Generic/GC/inline.ll b/test/CodeGen/Generic/GC/inline.ll
index 157e19d2d929..9da33aef8dd3 100644
--- a/test/CodeGen/Generic/GC/inline.ll
+++ b/test/CodeGen/Generic/GC/inline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep example
+; RUN: opt < %s -inline -S | grep example
 
 	%IntArray = type { i32, [0 x i32*] }
 
diff --git a/test/CodeGen/Generic/GC/inline2.ll b/test/CodeGen/Generic/GC/inline2.ll
index b45ef7c47f52..15947056ee39 100644
--- a/test/CodeGen/Generic/GC/inline2.ll
+++ b/test/CodeGen/Generic/GC/inline2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep sample
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep example
+; RUN: opt < %s -inline -S | grep sample
+; RUN: opt < %s -inline -S | grep example
 
 	%IntArray = type { i32, [0 x i32*] }
 
diff --git a/test/CodeGen/Generic/GC/lower_gcroot.ll b/test/CodeGen/Generic/GC/lower_gcroot.ll
index bd5a2bd14b4e..c2d418ac50ef 100644
--- a/test/CodeGen/Generic/GC/lower_gcroot.ll
+++ b/test/CodeGen/Generic/GC/lower_gcroot.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%Env = type i8*
 
diff --git a/test/CodeGen/Generic/GC/redundant_init.ll b/test/CodeGen/Generic/GC/redundant_init.ll
index 44996034748b..10c70e731052 100644
--- a/test/CodeGen/Generic/GC/redundant_init.ll
+++ b/test/CodeGen/Generic/GC/redundant_init.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   ignore grep {movl..0} | count 0
 
 %struct.obj = type { i8*, %struct.obj* }
diff --git a/test/CodeGen/Generic/GC/simple_ocaml.ll b/test/CodeGen/Generic/GC/simple_ocaml.ll
index a33e0351f7fd..f765dc029da5 100644
--- a/test/CodeGen/Generic/GC/simple_ocaml.ll
+++ b/test/CodeGen/Generic/GC/simple_ocaml.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep caml.*__frametable
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	.0}
+; RUN: llc < %s | grep caml.*__frametable
+; RUN: llc < %s -march=x86 | grep {movl	.0}
 
 %struct.obj = type { i8*, %struct.obj* }
 
diff --git a/test/CodeGen/Generic/Makefile b/test/CodeGen/Generic/Makefile
index d228f69a8566..26ebc316a215 100644
--- a/test/CodeGen/Generic/Makefile
+++ b/test/CodeGen/Generic/Makefile
@@ -1,10 +1,10 @@
 # Makefile for running ad-hoc custom LLVM tests
 #
 %.bc: %.ll
-	llvm-as -f $< 
+	llvm-as $< 
 	
 %.llc.s: %.bc
-	llc -f $< -o $@ 
+	llc $< -o $@ 
 
 %.gcc.s: %.c
 	gcc -O0 -S $< -o $@
diff --git a/test/CodeGen/Generic/SwitchLowering.ll b/test/CodeGen/Generic/SwitchLowering.ll
index 9fdfd8d5f63e..29a0e82bf59f 100644
--- a/test/CodeGen/Generic/SwitchLowering.ll
+++ b/test/CodeGen/Generic/SwitchLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
 ; PR964
 
 define i8* @FindChar(i8* %CurPtr) {
diff --git a/test/CodeGen/Generic/add-with-overflow-24.ll b/test/CodeGen/Generic/add-with-overflow-24.ll
index debdeb25af07..63f5a222a003 100644
--- a/test/CodeGen/Generic/add-with-overflow-24.ll
+++ b/test/CodeGen/Generic/add-with-overflow-24.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/Generic/add-with-overflow.ll b/test/CodeGen/Generic/add-with-overflow.ll
index 5c3d540c5cc2..0c2c9608deb9 100644
--- a/test/CodeGen/Generic/add-with-overflow.ll
+++ b/test/CodeGen/Generic/add-with-overflow.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llc < %s
+; RUN: llc < %s -fast-isel
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/Generic/addc-fold2.ll b/test/CodeGen/Generic/addc-fold2.ll
index 8f3cdd0793d4..34f5ac1b9814 100644
--- a/test/CodeGen/Generic/addc-fold2.ll
+++ b/test/CodeGen/Generic/addc-fold2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add
-; RUN: llvm-as < %s | llc -march=x86 | not grep adc
+; RUN: llc < %s -march=x86 | grep add
+; RUN: llc < %s -march=x86 | not grep adc
 
 define i64 @test(i64 %A, i32 %B) {
         %tmp12 = zext i32 %B to i64             ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/asm-large-immediate.ll b/test/CodeGen/Generic/asm-large-immediate.ll
index 70649133712c..605665bef6d1 100644
--- a/test/CodeGen/Generic/asm-large-immediate.ll
+++ b/test/CodeGen/Generic/asm-large-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep 68719476738
+; RUN: llc < %s | grep 68719476738
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/Generic/badCallArgLRLLVM.ll b/test/CodeGen/Generic/badCallArgLRLLVM.ll
index 56384748ac05..4ed88df4a538 100644
--- a/test/CodeGen/Generic/badCallArgLRLLVM.ll
+++ b/test/CodeGen/Generic/badCallArgLRLLVM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; This caused a problem because the argument of a call was defined by
 ; the return value of another call that appears later in the code.
diff --git a/test/CodeGen/Generic/badFoldGEP.ll b/test/CodeGen/Generic/badFoldGEP.ll
index 8de12512d145..2d4474bdf930 100644
--- a/test/CodeGen/Generic/badFoldGEP.ll
+++ b/test/CodeGen/Generic/badFoldGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; GetMemInstArgs() folded the two getElementPtr instructions together,
 ;; producing an illegal getElementPtr.  That's because the type generated
diff --git a/test/CodeGen/Generic/badarg6.ll b/test/CodeGen/Generic/badarg6.ll
index 1ff7df42a348..d6e5ac5791e4 100644
--- a/test/CodeGen/Generic/badarg6.ll
+++ b/test/CodeGen/Generic/badarg6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; On this code, llc did not pass the sixth argument (%reg321) to printf.
 ; It passed the first five in %o0 - %o4, but never initialized %o5.
diff --git a/test/CodeGen/Generic/badlive.ll b/test/CodeGen/Generic/badlive.ll
index 0114fb0fa060..43b03e31fa83 100644
--- a/test/CodeGen/Generic/badlive.ll
+++ b/test/CodeGen/Generic/badlive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {
 bb0:
diff --git a/test/CodeGen/Generic/bool-to-double.ll b/test/CodeGen/Generic/bool-to-double.ll
index d6c9e5239516..81350a40b4db 100644
--- a/test/CodeGen/Generic/bool-to-double.ll
+++ b/test/CodeGen/Generic/bool-to-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define double @test(i1 %X) {
         %Y = uitofp i1 %X to double             ; <double> [#uses=1]
         ret double %Y
diff --git a/test/CodeGen/Generic/bool-vector.ll b/test/CodeGen/Generic/bool-vector.ll
index e0f2a70886a7..4758697286a2 100644
--- a/test/CodeGen/Generic/bool-vector.ll
+++ b/test/CodeGen/Generic/bool-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1845
 
 define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
diff --git a/test/CodeGen/Generic/call-ret0.ll b/test/CodeGen/Generic/call-ret0.ll
index 7ab966ba9070..a8e00cd54ef7 100644
--- a/test/CodeGen/Generic/call-ret0.ll
+++ b/test/CodeGen/Generic/call-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define i32 @foo(i32 %x) {
         ret i32 %x
 }
diff --git a/test/CodeGen/Generic/call-ret42.ll b/test/CodeGen/Generic/call-ret42.ll
index ac9bd9235794..95cc28625aa6 100644
--- a/test/CodeGen/Generic/call-ret42.ll
+++ b/test/CodeGen/Generic/call-ret42.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @foo(i32 %x) {
         ret i32 42
diff --git a/test/CodeGen/Generic/call-void.ll b/test/CodeGen/Generic/call-void.ll
index b88268992396..9ed417941593 100644
--- a/test/CodeGen/Generic/call-void.ll
+++ b/test/CodeGen/Generic/call-void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo() {
         ret void
diff --git a/test/CodeGen/Generic/call2-ret0.ll b/test/CodeGen/Generic/call2-ret0.ll
index 8c7e8920f259..4e57ef804f22 100644
--- a/test/CodeGen/Generic/call2-ret0.ll
+++ b/test/CodeGen/Generic/call2-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @bar(i32 %x) {
         ret i32 0
diff --git a/test/CodeGen/Generic/cast-fp.ll b/test/CodeGen/Generic/cast-fp.ll
index 5f05d85ea854..590b7ceee4bf 100644
--- a/test/CodeGen/Generic/cast-fp.ll
+++ b/test/CodeGen/Generic/cast-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_fstr = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @a_lstr = internal constant [10 x i8] c"a = %lld\0A\00"		; <[10 x i8]*> [#uses=1]
 @a_dstr = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/constindices.ll b/test/CodeGen/Generic/constindices.ll
index 6366fd59598f..7deb30f43d12 100644
--- a/test/CodeGen/Generic/constindices.ll
+++ b/test/CodeGen/Generic/constindices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that a sequence of constant indices are folded correctly
 ; into the equivalent offset at compile-time.
diff --git a/test/CodeGen/Generic/debug-info.ll b/test/CodeGen/Generic/debug-info.ll
index d1bb66d1529d..20d9f913c11c 100644
--- a/test/CodeGen/Generic/debug-info.ll
+++ b/test/CodeGen/Generic/debug-info.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
         %lldb.compile_unit = type { i32, i16, i16, i8*, i8*, i8*, {  }* }
 @d.compile_unit7 = external global %lldb.compile_unit           ; <%lldb.compile_unit*> [#uses=1]
diff --git a/test/CodeGen/Generic/div-neg-power-2.ll b/test/CodeGen/Generic/div-neg-power-2.ll
index 3bc4899d8fdd..246cd033e279 100644
--- a/test/CodeGen/Generic/div-neg-power-2.ll
+++ b/test/CodeGen/Generic/div-neg-power-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @test(i32 %X) {
         %Y = sdiv i32 %X, -2            ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/empty-load-store.ll b/test/CodeGen/Generic/empty-load-store.ll
index d7bb37194e49..bca73054447c 100644
--- a/test/CodeGen/Generic/empty-load-store.ll
+++ b/test/CodeGen/Generic/empty-load-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2612
 
 @current_foo = internal global {  } zeroinitializer
diff --git a/test/CodeGen/Generic/externally_available.ll b/test/CodeGen/Generic/externally_available.ll
index 73b6b9825d2c..7976cc971880 100644
--- a/test/CodeGen/Generic/externally_available.ll
+++ b/test/CodeGen/Generic/externally_available.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep test_
+; RUN: llc < %s | not grep test_
 
 ; test_function should not be emitted to the .s file.
 define available_externally i32 @test_function() {
diff --git a/test/CodeGen/Generic/fastcall.ll b/test/CodeGen/Generic/fastcall.ll
index 65e66c7ef061..35e04f1863a3 100644
--- a/test/CodeGen/Generic/fastcall.ll
+++ b/test/CodeGen/Generic/fastcall.ll
@@ -1,5 +1,5 @@
 ; Test fastcc works. Test from bug 2770.
-; RUN: llvm-as < %s | llc -relocation-model=pic
+; RUN: llc < %s -relocation-model=pic
 
 
 %struct.__gcov_var = type {  i32 }
diff --git a/test/CodeGen/Generic/fneg-fabs.ll b/test/CodeGen/Generic/fneg-fabs.ll
index 2709fa1afd68..2f2f59762cb9 100644
--- a/test/CodeGen/Generic/fneg-fabs.ll
+++ b/test/CodeGen/Generic/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define double @fneg(double %X) {
         %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]
diff --git a/test/CodeGen/Generic/fp-to-int-invalid.ll b/test/CodeGen/Generic/fp-to-int-invalid.ll
index 73176b14391e..cdcc3a277b6e 100644
--- a/test/CodeGen/Generic/fp-to-int-invalid.ll
+++ b/test/CodeGen/Generic/fp-to-int-invalid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4057
 define void @test_cast_float_to_char(i8* %result) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/fp_to_int.ll b/test/CodeGen/Generic/fp_to_int.ll
index 609de6546cfd..ad944132d338 100644
--- a/test/CodeGen/Generic/fp_to_int.ll
+++ b/test/CodeGen/Generic/fp_to_int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i8 @test1(double %X) {
 	%tmp.1 = fptosi double %X to i8		; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/fpowi-promote.ll b/test/CodeGen/Generic/fpowi-promote.ll
index 55c2d2ad0c6f..82628ef6093b 100644
--- a/test/CodeGen/Generic/fpowi-promote.ll
+++ b/test/CodeGen/Generic/fpowi-promote.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386
+; RUN: llc < %s
+; RUN: llc < %s -march=x86 -mcpu=i386
 
 ; PR1239
 
diff --git a/test/CodeGen/Generic/fwdtwice.ll b/test/CodeGen/Generic/fwdtwice.ll
index 05e831af6be2..6b38f04673de 100644
--- a/test/CodeGen/Generic/fwdtwice.ll
+++ b/test/CodeGen/Generic/fwdtwice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;;
 ;; Test the sequence:
diff --git a/test/CodeGen/Generic/getresult-undef.ll b/test/CodeGen/Generic/getresult-undef.ll
index 7905ff52ef61..c675535335a0 100644
--- a/test/CodeGen/Generic/getresult-undef.ll
+++ b/test/CodeGen/Generic/getresult-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define double @foo() {
   %t = getresult {double, double} undef, 1
diff --git a/test/CodeGen/Generic/global-ret0.ll b/test/CodeGen/Generic/global-ret0.ll
index 8fcef33a34d7..74bff876f882 100644
--- a/test/CodeGen/Generic/global-ret0.ll
+++ b/test/CodeGen/Generic/global-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @g = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/hello.ll b/test/CodeGen/Generic/hello.ll
index 705423f973cf..705945cf1983 100644
--- a/test/CodeGen/Generic/hello.ll
+++ b/test/CodeGen/Generic/hello.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [7 x i8] c"hello\0A\00"             ; <[7 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/i128-addsub.ll b/test/CodeGen/Generic/i128-addsub.ll
index 10f0acc36308..e7cbf4aaf785 100644
--- a/test/CodeGen/Generic/i128-addsub.ll
+++ b/test/CodeGen/Generic/i128-addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
 entry:
diff --git a/test/CodeGen/Generic/i128-arith.ll b/test/CodeGen/Generic/i128-arith.ll
index 9a670847b575..cf10463143c9 100644
--- a/test/CodeGen/Generic/i128-arith.ll
+++ b/test/CodeGen/Generic/i128-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 define i64 @foo(i64 %x, i64 %y, i32 %amt) {
         %tmp0 = zext i64 %x to i128
diff --git a/test/CodeGen/Generic/inline-asm-special-strings.ll b/test/CodeGen/Generic/inline-asm-special-strings.ll
index e52e0be74bc3..d18221ef934d 100644
--- a/test/CodeGen/Generic/inline-asm-special-strings.ll
+++ b/test/CodeGen/Generic/inline-asm-special-strings.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "foo 0 0"
+; RUN: llc < %s | grep "foo 0 0"
 
 define void @bar() nounwind {
 	tail call void asm sideeffect "foo ${:uid} ${:uid}", ""() nounwind
diff --git a/test/CodeGen/Generic/intrinsics.ll b/test/CodeGen/Generic/intrinsics.ll
index 373bec9adc05..9a42c3ef32a1 100644
--- a/test/CodeGen/Generic/intrinsics.ll
+++ b/test/CodeGen/Generic/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; SQRT
 declare float @llvm.sqrt.f32(float)
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index e3acf0c2be13..8448565a2b82 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 ; This testcase is invalid (the alignment specified for memcpy is 
 ; greater than the alignment guaranteed for Qux or C.0.1173), but it
diff --git a/test/CodeGen/Generic/isunord.ll b/test/CodeGen/Generic/isunord.ll
index 589f49611791..ebbba010793b 100644
--- a/test/CodeGen/Generic/isunord.ll
+++ b/test/CodeGen/Generic/isunord.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc
-; XFAIL: ia64
+; RUN: llc < %s
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 66f409ed27d5..1db75497592f 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase is supported by all code generators
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare i64 @llvm.ctpop.i64(i64)
 
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index f21b645615ad..282e973ff9ad 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare { i64, double } @wild()
 
diff --git a/test/CodeGen/Generic/negintconst.ll b/test/CodeGen/Generic/negintconst.ll
index a2b3d69ee555..67d775e16882 100644
--- a/test/CodeGen/Generic/negintconst.ll
+++ b/test/CodeGen/Generic/negintconst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that a negative constant smaller than 64 bits (e.g., int)
 ; is correctly implemented with sign-extension.
diff --git a/test/CodeGen/Generic/nested-select.ll b/test/CodeGen/Generic/nested-select.ll
index a723a4d74268..f81fed332df0 100644
--- a/test/CodeGen/Generic/nested-select.ll
+++ b/test/CodeGen/Generic/nested-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o /dev/null -f
+; RUN: llc < %s -o /dev/null
 
 ; Test that select of a select works
 
diff --git a/test/CodeGen/Generic/phi-immediate-factoring.ll b/test/CodeGen/Generic/phi-immediate-factoring.ll
index e0f675976d91..9f9f92115c79 100644
--- a/test/CodeGen/Generic/phi-immediate-factoring.ll
+++ b/test/CodeGen/Generic/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
 ; PR1296
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	\$1} | count 1
+; RUN: llc < %s -march=x86 | grep {movl	\$1} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/Generic/pr2625.ll b/test/CodeGen/Generic/pr2625.ll
index c1f585de7376..3e3dc4b2d2bc 100644
--- a/test/CodeGen/Generic/pr2625.ll
+++ b/test/CodeGen/Generic/pr2625.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2625
 
 define i32 @main({ i32, { i32 } }*) {
diff --git a/test/CodeGen/Generic/pr3288.ll b/test/CodeGen/Generic/pr3288.ll
index ff0384db6db5..b62710f31ecb 100644
--- a/test/CodeGen/Generic/pr3288.ll
+++ b/test/CodeGen/Generic/pr3288.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3288
 
 define void @a() {
diff --git a/test/CodeGen/Generic/print-add.ll b/test/CodeGen/Generic/print-add.ll
index 4f1cb5e736af..95608dc60b50 100644
--- a/test/CodeGen/Generic/print-add.ll
+++ b/test/CodeGen/Generic/print-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index 1e27061941a4..d129ff85870e 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_str = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @add_str = internal constant [12 x i8] c"a + b = %f\0A\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-arith-int.ll b/test/CodeGen/Generic/print-arith-int.ll
index cf275151571e..ce938cf05b98 100644
--- a/test/CodeGen/Generic/print-arith-int.ll
+++ b/test/CodeGen/Generic/print-arith-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @add_str = internal constant [12 x i8] c"a + b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-int.ll b/test/CodeGen/Generic/print-int.ll
index 58f5047ceb79..7ca4b3de48a3 100644
--- a/test/CodeGen/Generic/print-int.ll
+++ b/test/CodeGen/Generic/print-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/print-mul-exp.ll b/test/CodeGen/Generic/print-mul-exp.ll
index 06667754a323..90fc55b25838 100644
--- a/test/CodeGen/Generic/print-mul-exp.ll
+++ b/test/CodeGen/Generic/print-mul-exp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-mul.ll b/test/CodeGen/Generic/print-mul.ll
index 1d9452a58434..0707f3c2318c 100644
--- a/test/CodeGen/Generic/print-mul.ll
+++ b/test/CodeGen/Generic/print-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-shift.ll b/test/CodeGen/Generic/print-shift.ll
index 8992e8df0c01..6c5d222209be 100644
--- a/test/CodeGen/Generic/print-shift.ll
+++ b/test/CodeGen/Generic/print-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/ret0.ll b/test/CodeGen/Generic/ret0.ll
index 489f31c3730d..9e628a1a1409 100644
--- a/test/CodeGen/Generic/ret0.ll
+++ b/test/CodeGen/Generic/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {  
   ret i32 0
diff --git a/test/CodeGen/Generic/ret42.ll b/test/CodeGen/Generic/ret42.ll
index 0cbe1763faad..f5cd33dc0b21 100644
--- a/test/CodeGen/Generic/ret42.ll
+++ b/test/CodeGen/Generic/ret42.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {  
   ret i32 42
diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll
index 85e68d19c45a..b653e2a46dcf 100644
--- a/test/CodeGen/Generic/select-cc.ll
+++ b/test/CodeGen/Generic/select-cc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2504
 
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index a532703d9417..63052c1a2845 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 %Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }
 @AConst = constant i32 123              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/shift-int64.ll b/test/CodeGen/Generic/shift-int64.ll
index 31be2d634e99..670ef20e084b 100644
--- a/test/CodeGen/Generic/shift-int64.ll
+++ b/test/CodeGen/Generic/shift-int64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i64 @test_imm(i64 %X) {
         %Y = ashr i64 %X, 17            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/spillccr.ll b/test/CodeGen/Generic/spillccr.ll
index 854513331742..0a774c64f82e 100644
--- a/test/CodeGen/Generic/spillccr.ll
+++ b/test/CodeGen/Generic/spillccr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc
+; RUN: llc < %s
 
 ; July 6, 2002 -- LLC Regression test
 ; This test case checks if the integer CC register %xcc (or %ccr)
diff --git a/test/CodeGen/Generic/stack-protector.ll b/test/CodeGen/Generic/stack-protector.ll
index a11a7149a36c..a59c649781d4 100644
--- a/test/CodeGen/Generic/stack-protector.ll
+++ b/test/CodeGen/Generic/stack-protector.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -o - | grep {__stack_chk_guard}
-; RUN: llvm-as < %s | llc -o - | grep {__stack_chk_fail}
+; RUN: llc < %s -o - | grep {__stack_chk_guard}
+; RUN: llc < %s -o - | grep {__stack_chk_fail}
 
 @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/stacksave-restore.ll b/test/CodeGen/Generic/stacksave-restore.ll
index fd3dd67298ed..b124b5f9b7d5 100644
--- a/test/CodeGen/Generic/stacksave-restore.ll
+++ b/test/CodeGen/Generic/stacksave-restore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare i8* @llvm.stacksave()
 
diff --git a/test/CodeGen/Generic/storetrunc-fp.ll b/test/CodeGen/Generic/storetrunc-fp.ll
index 0f7bb0b85ee9..7f7c7f71b3ad 100644
--- a/test/CodeGen/Generic/storetrunc-fp.ll
+++ b/test/CodeGen/Generic/storetrunc-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo(double %a, double %b, float* %fp) {
 	%c = fadd double %a, %b
diff --git a/test/CodeGen/Generic/switch-crit-edge-constant.ll b/test/CodeGen/Generic/switch-crit-edge-constant.ll
index d71fe56eb49d..1f2ab0dbcbe9 100644
--- a/test/CodeGen/Generic/switch-crit-edge-constant.ll
+++ b/test/CodeGen/Generic/switch-crit-edge-constant.ll
@@ -1,5 +1,5 @@
 ; PR925
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep mov.*str1 | count 1
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/Generic/switch-lower-feature-2.ll b/test/CodeGen/Generic/switch-lower-feature-2.ll
index 5e532a8db3ed..d6e56471c364 100644
--- a/test/CodeGen/Generic/switch-lower-feature-2.ll
+++ b/test/CodeGen/Generic/switch-lower-feature-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t -f
+; RUN: llc < %s -march=x86 -o %t
 ; RUN: grep jb %t | count 1
 ; RUN: grep \\\$6 %t | count 2
 ; RUN: grep 1024 %t | count 1
diff --git a/test/CodeGen/Generic/switch-lower-feature.ll b/test/CodeGen/Generic/switch-lower-feature.ll
index 05234012547c..65fdf5add790 100644
--- a/test/CodeGen/Generic/switch-lower-feature.ll
+++ b/test/CodeGen/Generic/switch-lower-feature.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep {\$7} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep {\$6} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep 1024 | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep jb | count 2
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep je | count 1
+; RUN: llc < %s -march=x86 -o - | grep {\$7} | count 1
+; RUN: llc < %s -march=x86 -o - | grep {\$6} | count 1
+; RUN: llc < %s -march=x86 -o - | grep 1024 | count 1
+; RUN: llc < %s -march=x86 -o - | grep jb | count 2
+; RUN: llc < %s -march=x86 -o - | grep je | count 1
 
 define i32 @main(i32 %tmp158) {
 entry:
diff --git a/test/CodeGen/Generic/switch-lower.ll b/test/CodeGen/Generic/switch-lower.ll
index b1aad3f45140..eb240edc7c67 100644
--- a/test/CodeGen/Generic/switch-lower.ll
+++ b/test/CodeGen/Generic/switch-lower.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1197
 
 
diff --git a/test/CodeGen/Generic/trap.ll b/test/CodeGen/Generic/trap.ll
index 4dfc1a6450b9..67d1a7a347f3 100644
--- a/test/CodeGen/Generic/trap.ll
+++ b/test/CodeGen/Generic/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define i32 @test() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/Generic/v-split.ll b/test/CodeGen/Generic/v-split.ll
index 44601d0c4024..634b5621aa99 100644
--- a/test/CodeGen/Generic/v-split.ll
+++ b/test/CodeGen/Generic/v-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 %f8 = type <8 x float>
 
 define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
diff --git a/test/CodeGen/Generic/vector-casts.ll b/test/CodeGen/Generic/vector-casts.ll
index 12104a32eecd..a26918b8f242 100644
--- a/test/CodeGen/Generic/vector-casts.ll
+++ b/test/CodeGen/Generic/vector-casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2671
 
 define void @a(<2 x double>* %p, <2 x i8>* %q) {
diff --git a/test/CodeGen/Generic/vector-constantexpr.ll b/test/CodeGen/Generic/vector-constantexpr.ll
index 441c4a0e71dc..d8e0258221c8 100644
--- a/test/CodeGen/Generic/vector-constantexpr.ll
+++ b/test/CodeGen/Generic/vector-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 	
 define void @""(float* %inregs, float* %outregs) {
         %a_addr.i = alloca <4 x float>          ; <<4 x float>*> [#uses=1]
diff --git a/test/CodeGen/Generic/vector-identity-shuffle.ll b/test/CodeGen/Generic/vector-identity-shuffle.ll
index 61b44af118b9..332d6d8c2536 100644
--- a/test/CodeGen/Generic/vector-identity-shuffle.ll
+++ b/test/CodeGen/Generic/vector-identity-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 
 define void @test(<4 x float>* %tmp2.i) {
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index f283256d10ac..a0f9a02d4cbb 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,5 +1,5 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 
 %d8 = type <8 x double>
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index 245c2f908bde..f339373ffc75 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4136
 
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
diff --git a/test/CodeGen/MSP430/2009-05-17-Rot.ll b/test/CodeGen/MSP430/2009-05-17-Rot.ll
index c25a906308af..2ae005259d4f 100644
--- a/test/CodeGen/MSP430/2009-05-17-Rot.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430
+; RUN: llc < %s -march=msp430
 
 define i16 @rol1u16(i16 %x.arg) nounwind {
         %retval = alloca i16
diff --git a/test/CodeGen/MSP430/2009-05-17-Shift.ll b/test/CodeGen/MSP430/2009-05-17-Shift.ll
index b048bb3fd32a..25aff60c2b3f 100644
--- a/test/CodeGen/MSP430/2009-05-17-Shift.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | grep rra | count 1
+; RUN: llc < %s -march=msp430 | grep rra | count 1
 
 define i16 @lsr2u16(i16 %x.arg) nounwind {
         %retval = alloca i16
diff --git a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
index 70f1d996e7ca..54eb7ff5c0bf 100644
--- a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
+++ b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430
+; RUN: llc < %s -march=msp430
 
 define i16 @test(double %d) nounwind {
 entry:
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
new file mode 100644
index 000000000000..088d3e1e7b37
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR4769
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @foo() nounwind readnone {
+entry:
+  %result = alloca i16, align 1                   ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result
+  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  ret i16 %tmp
+}
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i16 @bar() nounwind                ; <i16> [#uses=1]
+  %tobool = icmp eq i16 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result.i
+  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  ret i16 0
+}
+
+declare i16 @bar()
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
new file mode 100644
index 000000000000..cc574c7290ab
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 0x0021 | count 2
+; PR4776
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-unknown"
+
+@"\010x0021" = common global i8 0, align 1        ; <i8*> [#uses=2]
+
+define zeroext i8 @foo(i8 zeroext %x) nounwind {
+entry:
+  %retval = alloca i8                             ; <i8*> [#uses=2]
+  %x.addr = alloca i8                             ; <i8*> [#uses=2]
+  %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
+  store i8 %x, i8* %x.addr
+  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  store i8 %tmp1, i8* %tmp
+  %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
+  volatile store i8 %tmp2, i8* @"\010x0021"
+  %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
+  store i8 %tmp3, i8* %retval
+  %0 = load i8* %retval                           ; <i8> [#uses=1]
+  ret i8 %0
+}
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
new file mode 100644
index 000000000000..856eb9db3f6b
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=msp430 < %s
+; PR4779 
+define void @foo() nounwind {
+entry:
+	%r = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	or i8 %0, 1		; <i8>:1 [#uses=1]
+	volatile store i8 %1, i8* %r, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/MSP430/Inst16mi.ll b/test/CodeGen/MSP430/Inst16mi.ll
new file mode 100644
index 000000000000..33d7aa495d3c
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#2, &foo
+	store i16 2, i16 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
new file mode 100644
index 000000000000..510afe373494
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+@bar = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	&bar, &foo
+        %1 = load i16* @bar
+        store i16 %1, i16* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = add i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = and i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = or i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = xor i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
new file mode 100644
index 000000000000..53334aa748e9
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov(i16 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r15, &foo
+	store i16 %a, i16* @foo
+	ret void
+}
+
+define void @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
new file mode 100644
index 000000000000..d0cb0d19b938
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define i16 @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	ret i16 %2
+}
+
+
+define i16 @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	ret i16 %2
+}
+
diff --git a/test/CodeGen/MSP430/Inst16rr.ll b/test/CodeGen/MSP430/Inst16rr.ll
new file mode 100644
index 000000000000..6619c5182364
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16rr.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov(i16 %a, i16 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r14, r15
+	ret i16 %b
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	r14, r15
+	%1 = add i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i16 %a, %b
+	ret i16 %1
+}
diff --git a/test/CodeGen/MSP430/Inst8mi.ll b/test/CodeGen/MSP430/Inst8mi.ll
new file mode 100644
index 000000000000..ef318ce1590d
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#2, &foo
+	store i8 2, i8 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8mm.ll b/test/CodeGen/MSP430/Inst8mm.ll
new file mode 100644
index 000000000000..a2987ac9b46d
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mm.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+@foo = common global i8 0, align 1
+@bar = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	&bar, &foo
+        %1 = load i8* @bar
+        store i8 %1, i8* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = add i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = and i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = or i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = xor i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
new file mode 100644
index 000000000000..04c681ef29f2
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov(i8 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r15, &foo
+	store i8 %a, i8* @foo
+	ret void
+}
+
+define void @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
new file mode 100644
index 000000000000..62a5d4b9088b
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define i8 @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	ret i8 %2
+}
+
+
+define i8 @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	ret i8 %2
+}
+
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
new file mode 100644
index 000000000000..90ea94516abd
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov(i8 %a, i8 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r14, r15
+	ret i8 %b
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b	r14, r15
+	%1 = add i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i8 %a, %b
+	ret i8 %1
+}
+
diff --git a/test/CodeGen/MSP430/inline-asm.ll b/test/CodeGen/MSP430/inline-asm.ll
new file mode 100644
index 000000000000..2cc25a4835d6
--- /dev/null
+++ b/test/CodeGen/MSP430/inline-asm.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define void @imm() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16 32) nounwind
+        ret void
+}
+
+define void @reg(i16 %a) nounwind {
+        call void asm sideeffect "bic\09$0,r2", "r"(i16 %a) nounwind
+        ret void
+}
+
+@foo = global i16 0, align 2
+
+define void @immmem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16* @foo, i32 1)) nounwind
+        ret void
+}
+
+define void @mem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "m"(i16* @foo) nounwind
+        ret void
+}
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9cd7c80577a1..8e7b70e2216f 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep subu %t | count 2
 ; RUN: grep addu %t | count 4
 
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index 53ceaf360653..b2aaa00754b7 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll
index 2d1101a25302..6bb6bd862b25 100644
--- a/test/CodeGen/Mips/2008-07-05-ByVal.ll
+++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {lw.*(\$4)} | count 2
+; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll
index f8eca85efafb..808ce16910ee 100644
--- a/test/CodeGen/Mips/2008-07-06-fadd64.ll
+++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __adddf3
+; RUN: llc < %s -march=mips | grep __adddf3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
index e0e7d767b1d8..7ac0f5f840db 100644
--- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll
+++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __extendsfdf2
+; RUN: llc < %s -march=mips | grep __extendsfdf2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index f2f0374c3168..ca996367733e 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep trunc.w.s | count 3
+; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
index ab6a9c8edae8..20de18a0164c 100644
--- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
+++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep __floatsidf   %t | count 1
 ; RUN: grep __floatunsidf %t | count 1
 ; RUN: grep __fixdfsi %t | count 1
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index 4d218cf6b4bc..f6b2045444a5 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {rodata.str1.4,"aMS",@progbits}  %t | count 1
 ; RUN: grep {r.data,}  %t | count 1
 ; RUN: grep {\%hi} %t | count 2
diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index 0e3f86479539..26eb4db26d4d 100644
--- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mips-ssection-threshold=8 -march=mips -f -o %t0
-; RUN: llvm-as < %s | llc -mips-ssection-threshold=0 -march=mips -f -o %t1
+; RUN: llc < %s -mips-ssection-threshold=8 -march=mips -o %t0
+; RUN: llc < %s -mips-ssection-threshold=0 -march=mips -o %t1
 ; RUN: grep {sdata} %t0 | count 1
 ; RUN: grep {sbss} %t0 | count 1
 ; RUN: grep {gp_rel} %t0 | count 2
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index fc03bb5fef0d..59599b399c29 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep seh %t | count 1
 ; RUN: grep seb %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 2af7ab17c2cd..21ff96005421 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
 ; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 4580215b38f6..80101fa25b3e 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {c\\..*\\.s} %t | count 3
 ; RUN: grep {bc1\[tf\]} %t | count 3
 
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index 5d03a1986b2d..042cad60e2b0 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {b\[ne\]\[eq\]} | count 1
+; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
index de11ac77c01e..77680bccf976 100644
--- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll
+++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep abs.s  %t | count 1
 ; RUN: grep neg.s %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index fea5730f73d1..cd35ccaee83d 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep mfhi  %t | count 1
 ; RUN: grep mflo  %t | count 1
 ; RUN: grep multu %t | count 1
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index b1e999cea1f3..c41d5213c178 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -1,5 +1,5 @@
 ; Double return in abicall (default)
-; RUN: llvm-as < %s | llc -march=mips
+; RUN: llc < %s -march=mips
 ; PR2615
 
 define double @main(...) {
diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll
index 9d18f47bce20..2f33e9bea73f 100644
--- a/test/CodeGen/Mips/2008-08-03-fabs64.ll
+++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {lui.*32767} %t | count 1
 ; RUN: grep {ori.*65535} %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f7a64c32f27b..ca90b500f050 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep mtc1 %t | count 1
 ; RUN: grep mfc1 %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 34596ea029a0..79e49a3d682e 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {subu.*sp} | count 2
+; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll
index e276f5e90e5b..54d454cc3ade 100644
--- a/test/CodeGen/Mips/2008-08-07-CC.ll
+++ b/test/CodeGen/Mips/2008-08-07-CC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f 
+; RUN: llc < %s -march=mips
 ; Mips must ignore fastcc
 
 target datalayout =
diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll
index fd41ff1f8cf2..f3bb965cdb69 100644
--- a/test/CodeGen/Mips/2008-08-07-FPRound.ll
+++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __truncdfsf2 | count 1
+; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-08-08-bswap.ll b/test/CodeGen/Mips/2008-08-08-bswap.ll
index 71c2b85d8df1..83289d97cfd7 100644
--- a/test/CodeGen/Mips/2008-08-08-bswap.ll
+++ b/test/CodeGen/Mips/2008-08-08-bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep wsbw | count 1
+; RUN: llc < %s | grep wsbw | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "psp"
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index e468b6da5a6b..1da1db24bf5a 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep clz | count 1
+; RUN: llc < %s -march=mips | grep clz | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
index 783850a7827e..18f5b3d7ff7b 100644
--- a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
+++ b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips
+; RUN: llc < %s -march=mips
 ; PR2794
 
 define i32 @main(i8*) nounwind {
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index 1f7440afedd6..f5188434670b 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "psp"
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 00a969d0620a..a1b45c2a63e1 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=mips > %t
+; RUN: llc < %s -march=mips > %t
 ; RUN: grep \\\$foo: %t
 ; RUN: grep call.*\\\$foo %t
 ; RUN: grep \\\$baz: %t
diff --git a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
new file mode 100644
index 000000000000..d7072dd9b5d7
--- /dev/null
+++ b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=pic16 | grep {movf \\+@i + 0, \\+W}
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-f32:32:32"
+target triple = "pic16-"
+@i = global i32 -10, align 1		; <i32*> [#uses=1]
+@j = global i32 -20, align 1		; <i32*> [#uses=1]
+@pc = global i8* inttoptr (i64 160 to i8*), align 1		; <i8**> [#uses=3]
+@main.auto.k = internal global i32 0		; <i32*> [#uses=2]
+
+define void @main() nounwind {
+entry:
+	%tmp = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32* @j		; <i32> [#uses=1]
+	%add = add i32 %tmp, %tmp1		; <i32> [#uses=1]
+	store i32 %add, i32* @main.auto.k
+	%tmp2 = load i32* @main.auto.k		; <i32> [#uses=1]
+	%add3 = add i32 %tmp2, 32		; <i32> [#uses=1]
+	%conv = trunc i32 %add3 to i8		; <i8> [#uses=1]
+	%tmp4 = load i8** @pc		; <i8*> [#uses=1]
+	store i8 %conv, i8* %tmp4
+	%tmp5 = load i8** @pc		; <i8*> [#uses=1]
+	%tmp6 = load i8* %tmp5		; <i8> [#uses=1]
+	%conv7 = sext i8 %tmp6 to i16		; <i16> [#uses=1]
+	%sub = sub i16 %conv7, 1		; <i16> [#uses=1]
+	%conv8 = trunc i16 %sub to i8		; <i8> [#uses=1]
+	%tmp9 = load i8** @pc		; <i8*> [#uses=1]
+	store i8 %conv8, i8* %tmp9
+	ret void
+}
diff --git a/test/CodeGen/PIC16/dg.exp b/test/CodeGen/PIC16/dg.exp
new file mode 100644
index 000000000000..b08b9858e048
--- /dev/null
+++ b/test/CodeGen/PIC16/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PIC16] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/PIC16/global-in-user-section.ll b/test/CodeGen/PIC16/global-in-user-section.ll
new file mode 100644
index 000000000000..74c9d9d256cd
--- /dev/null
+++ b/test/CodeGen/PIC16/global-in-user-section.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=pic16 | FileCheck %s
+
+@G1 = common global i16 0, section "usersection", align 1 
+; CHECK: usersection UDATA
+; CHECK: @G1 RES 2 
diff --git a/test/CodeGen/PIC16/globals.ll b/test/CodeGen/PIC16/globals.ll
new file mode 100644
index 000000000000..959eb254d766
--- /dev/null
+++ b/test/CodeGen/PIC16/globals.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=pic16 | FileCheck %s
+
+@G1 = global i32 4712, section "Address=412"
+; CHECK: @G1.412.idata.0.# IDATA 412
+; CHECK: @G1 dl 4712
+
+@G2 = global i32 0, section "Address=412"
+; CHECK: @G2.412.udata.0.# UDATA 412
+; CHECK: @G2 RES 4
+
+@G3 = addrspace(1) constant i32 4712, section "Address=412"
+; CHECK: @G3.412.romdata.1.# ROMDATA 412
+; CHECK: @G3 rom_dl 4712
+
+
diff --git a/test/CodeGen/PIC16/sext.ll b/test/CodeGen/PIC16/sext.ll
new file mode 100644
index 000000000000..b49925ffb7c3
--- /dev/null
+++ b/test/CodeGen/PIC16/sext.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=pic16
+
+@main.auto.c = internal global i8 0		; <i8*> [#uses=1]
+
+define i16 @main() nounwind {
+entry:
+	%tmp = load i8* @main.auto.c		; <i8> [#uses=1]
+	%conv = sext i8 %tmp to i16		; <i16> [#uses=1]
+	ret i16 %conv
+}
diff --git a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
index 70f294a78d86..f95465cfc537 100644
--- a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
+++ b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 define void @test() {
 	%tr1 = lshr i32 1, 0		; <i32> [#uses=0]
 	ret void
diff --git a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
index 93a91234b707..c3bfa49115b9 100644
--- a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
+++ b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @main() {
         %tr4 = shl i64 1, 0             ; <i64> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
index 1a1aca4b5d11..dea654ac0c0b 100644
--- a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
+++ b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @main() {
         %shamt = add i8 0, 1            ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
index 3e490b1dc7a2..fc190a486e6b 100644
--- a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
+++ b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep .comm.*X,0
+; RUN: llc < %s -march=ppc32 | not grep .comm.*X,0
 
 @X = linkonce global {  } zeroinitializer               ; <{  }*> [#uses=0]
 
diff --git a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
index f84caaf1d499..ad02ece900c8 100644
--- a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 
+; RUN: llc < %s -march=ppc32 
 
 define i32 @main() {
         %setle = icmp sle i64 1, 0              ; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
index 7b3e9b4f092f..671bf804ed32 100644
--- a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
+++ b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define i64 @test() {
         ret i64 undef
diff --git a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
index 8e8fee2888fb..95012c30fc5f 100644
--- a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
@@ -1,6 +1,6 @@
 ; this should not crash the ppc backend
 
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 
 define i32 @test(i32 %j.0.0.i) {
diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
index 428dd0c3e3fd..5d1df468a66d 100644
--- a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
+++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -1,6 +1,6 @@
 ; This function should have exactly one call to fixdfdi, no more!
 
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:    grep {bl .*fixdfdi} | count 1
 
 define double @test2(double %tmp.7705) {
diff --git a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
index 54f24c612660..8a5d3b0fa2c2 100644
--- a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
+++ b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
@@ -1,7 +1,7 @@
 ; This was erroneously being turned into an rlwinm instruction.
 ; The sign bit does matter in this case.
 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep srawi
+; RUN: llc < %s -march=ppc32 | grep srawi
 
 define i32 @test(i32 %X) {
         %Y = and i32 %X, -2             ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
index d56cffcf4ab1..047a12bedd81 100644
--- a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
+++ b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index 1b3bde8fb12e..97bb48e96e56 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep {, f1}
+; RUN: llc < %s | not grep {, f1}
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index 86ad71861773..fbf254082ee0 100644
--- a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @iterative_hash_host_wide_int() {
         %zero = alloca i32              ; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
index 8500260fafce..172e34849d1d 100644
--- a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
+++ b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 
 define double @CalcSpeed(float %tmp127) {
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index a536fa162c03..969772ee2bee 100644
--- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
 ; RUN:   grep {vspltish v.*, 10}
 
 define void @test(<8 x i16>* %P) {
diff --git a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
index b79cce2ead00..d2256642fbf0 100644
--- a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
+++ b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; END.
 
 define void @test(i8* %stack) {
diff --git a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
index e1033c3f808a..0205d10a795c 100644
--- a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; END.
 
 	%struct.attr_desc = type { i8*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, i32 }
diff --git a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
index 33807ca012b5..1b8b064ee914 100644
--- a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
+++ b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin | grep extsw | count 2
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep extsw | count 2
 
 @lens = external global i8*             ; <i8**> [#uses=1]
 @vals = external global i32*            ; <i32**> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
index c25cf215bc34..65dd568b1ee3 100644
--- a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
+++ b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
-define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) {
+define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {
         %tmp93 = load i16* null         ; <i16> [#uses=1]
         %tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 )         ; <i16> [#uses=1]
         store i16 %tmp99, i16* %ui16
diff --git a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
index 1043e45efb11..a947e5cd9c58 100644
--- a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
+++ b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsldoi
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vor
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsldoi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vor
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) {
         %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >     ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index aff4edeba54b..cb76b5c70cf0 100644
--- a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%struct..0anon = type { i32 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
diff --git a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
index 5210dd1cb1a8..f748a8bf1d6c 100644
--- a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
+++ b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 target datalayout = "E-p:64:64"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
index 7a65c00f104f..57ed250abc09 100644
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -combiner-alias-analysis | grep f5
+; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
index 6621cec7f43e..002a0644183a 100644
--- a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep IMPLICIT_DEF
+; RUN: llc < %s -march=ppc32 | not grep IMPLICIT_DEF
 
 define void @foo(i64 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
index 313568c1e4be..3d462b4d1461 100644
--- a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep xor 
+; RUN: llc < %s -march=ppc32 | grep xor 
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
diff --git a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
index 6dc1ff037eb3..3284f0a624f6 100644
--- a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i32* @foo(i32 %n) {
         %A = alloca i32, i32 %n         ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
index 80ef479fb076..49b3b9d18fae 100644
--- a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
+++ b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | grep rlwimi
 
 define void @test(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) {
         %X = shl i16 %div.0.i.i.i.i, 1          ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
index 7680c215c217..61b996761898 100644
--- a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
+++ b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define void @glgRunProcessor15() {
         %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
index be3b86308f13..ba863047be99 100644
--- a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc64
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s 
 
 define void @bitap() {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 058166ff93db..6d9a3fa7b106 100644
--- a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc64
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s
 
 @qsz.b = external global i1             ; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
index 19fedf9f5962..805528cf2efd 100644
--- a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
+++ b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsb
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh
+; RUN: llc < %s -march=ppc32 | grep extsb
+; RUN: llc < %s -march=ppc32 | grep extsh
 
 define i32 @p1(i8 %c, i16 %s) {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index d9374edfe865..7b00ac69b91a 100644
--- a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:    grep cntlzw
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
index f2c951ec21d5..0c454729290d 100644
--- a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
+++ b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
 	%tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )		; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
index d4764622af68..fe5145d15230 100644
--- a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
+++ b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 ; Test two things: 1) that a frameidx can be rewritten in an inline asm
 ; 2) that inline asms can handle reg+imm addr modes.
diff --git a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
index 97f6a018b30b..621d43b5c22d 100644
--- a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
+++ b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
 ; RUN:   grep align.*3
 
 @X = global <{i32, i32}> <{ i32 1, i32 123 }>
diff --git a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
index 5a3d3b5d9c1c..f48f3656ddfe 100644
--- a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
+++ b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
index 3eef9c551b75..0473857ae70f 100644
--- a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
+++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep mflr | count 1
+; RUN: llc < %s | grep mflr | count 1
 
 target datalayout = "e-p:32:32"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index 098e7484e173..e93395a67ec6 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -mcpu=g5 | grep cntlzd
+; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
         %tmp19 = load i64* %t
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index 637208b610a4..d43916d4f3c1 100644
--- a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define void @test(<4 x float>*, { { i16, i16, i32 } }*) {
 xOperationInitMasks.exit:
diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
index 656b83192e00..86fd9475029d 100644
--- a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
+++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
 
 ; PR1351
 
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index ba0f8fe1b77d..f2fdedf20072 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc | grep {subfc r3,r5,r4}
-; RUN: llvm-as < %s | llc | grep {subfze r4,r2}
-; RUN: llvm-as < %s | llc -regalloc=local | grep {subfc r5,r2,r4}
-; RUN: llvm-as < %s | llc -regalloc=local | grep {subfze r2,r3}
+; RUN: llc < %s | grep {subfc r3,r5,r4}
+; RUN: llc < %s | grep {subfze r4,r2}
+; RUN: llc < %s -regalloc=local | grep {subfc r5,r2,r4}
+; RUN: llc < %s -regalloc=local | grep {subfze r2,r3}
 ; The first argument of subfc must not be the same as any other register.
 
 ; PR1357
diff --git a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
index 989a7516aa4d..1df51406fac9 100644
--- a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
+++ b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1382
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
index b64de683f837..e4e931492ac4 100644
--- a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 	%struct..0anon = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 5a86418f7cb7..42f215281a8b 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*quux | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2
+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2
+; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
 ; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
 
 ; ModuleID = 'tail.c'
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
index ae853f67e200..2938c70c48bf 100644
--- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -1,7 +1,7 @@
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
 ; PR1473
 
 define i8 @foo(i16 zeroext  %a) zeroext  {
diff --git a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
index 58260ec6b739..6de7a09128f0 100644
--- a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
+++ b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+altivec
+; RUN: llc < %s -march=ppc32 -mattr=+altivec
 
 	%struct.XATest = type { float, i16, i8, i8 }
 	%struct.XArrayRange = type { i8, i8, i8, i8 }
diff --git a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
index 34df7bb7d057..06f40d98c68c 100644
--- a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 ; PR1596
 
 	%struct._obstack_chunk = type { i8* }
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 9c8fa97be967..82ef2b82cbe6 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index c5e7a4d38a00..ea7de9847ea7 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep lwzx
+; RUN: llc < %s -march=ppc64 | grep lwzx
 
         %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
         %struct.__mutex_t = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index f6bd3337aef7..898c470b1726 100644
--- a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc | grep stfd | count 3
-; RUN: llvm-as < %s | llc | grep stfs | count 1
-; RUN: llvm-as < %s | llc | grep lfd | count 2
-; RUN: llvm-as < %s | llc | grep lfs | count 2
+; RUN: llc < %s | grep stfd | count 3
+; RUN: llc < %s | grep stfs | count 1
+; RUN: llc < %s | grep lfd | count 2
+; RUN: llc < %s | grep lfs | count 2
 ; ModuleID = 'foo.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll b/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
index bb7aba45a963..d12698b9a00f 100644
--- a/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
         %struct.TCMalloc_SpinLock = type { i32 }
 
diff --git a/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
index f4b87cf4517b..5cfe54e1582b 100644
--- a/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin
 
 declare void @cxa_atexit_check_1(i8*)
 
diff --git a/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll b/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
index e71a8fb0f160..c4152b4fc8de 100644
--- a/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
+++ b/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; rdar://5538377
 
         %struct.disk_unsigned = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
index bd11b5d5b7b2..84fadd1b0461 100644
--- a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
+++ b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -mattr=+altivec
+; RUN: llc < %s -march=ppc64 -mattr=+altivec
 	%struct.inoutprops = type <{ i8, [3 x i8] }>
 
 define void @bork(float* %argA, float* %argB, float* %res, i8 %inoutspec.0) {
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index bca6e5a8fbdb..ee614782952d 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index 80ef6f19f727..5a07a9b7acf0 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll b/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
index e49d59acfe5f..a9f242ba5b16 100644
--- a/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.HDescriptor = type <{ i32, i32 }>
 
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index a0649e08076e..439ef14d8b24 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index aca0faaa4e41..d1f028586160 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc 
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s 
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; PR1811
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
index 38ae87ce8c00..db2ab877ff7d 100644
--- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep nop
+; RUN: llc < %s -march=ppc32 | grep nop
 target triple = "powerpc-apple-darwin8"
 
 
diff --git a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
index 5b9cd1d8408f..791e9e610655 100644
--- a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.Handle = type { %struct.oopDesc** }
 	%struct.JNI_ArgumentPusher = type { %struct.SignatureIterator, %struct.JavaCallArguments* }
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index 5edf6b761fe8..cfa1b10d32ee 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=local
 
 define i32 @bork(i64 %foo, i64 %bar) {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index 8101a35a4fb4..e50fac4472a9 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll b/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
index 919de33234b8..222dde45353b 100644
--- a/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
+++ b/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
 
 define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index eaeccc5f27a4..9f35b8346c68 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index 061c585c7476..dd425f59822b 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
index 395c986a8412..a8fef05b1ad8 100644
--- a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define fastcc i8* @page_rec_get_next(i8* %rec) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
index 67c167aca127..8776d9a3eda5 100644
--- a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 	%struct..0objc_object = type { %struct.objc_class* }
 	%struct.NSArray = type { %struct..0objc_object }
diff --git a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
index 0b748d20b7ca..8e5bf567b126 100644
--- a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll b/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
index 410736d5872d..270633786077 100644
--- a/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 define fastcc i64 @nonzero_bits1() nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
index 357ab100d2d4..839098ef5c2f 100644
--- a/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 ; Avoid reading memory that's already freed.
 
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64)* @_Z13GetSectorSizey to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index a390e522686d..7b6d4916c1a8 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 @_ZL10DeviceCode = internal global i16 0		; <i16*> [#uses=1]
 @.str19 = internal constant [64 x i8] c"unlock_then_erase_sector: failed to erase block (status= 0x%x)\0A\00"		; <[64 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll b/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
index 5c40b9e0aed9..d42c814a46a2 100644
--- a/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
+++ b/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 target triple = "powerpc-apple-darwin9.2.2"
 
 define i256 @func(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
diff --git a/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll b/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
index d337e3773098..6b40b2462daf 100644
--- a/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @t() nounwind {
 	call void null( ppc_fp128 undef )
diff --git a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
index 92b5ca26b2e6..862559b109cf 100644
--- a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
+++ b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 @g = external global ppc_fp128
 @h = external global ppc_fp128
diff --git a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
index d3238d23c0ee..83c5511878ca 100644
--- a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; <rdar://problem/6020042>
 
 define i32 @bork() nounwind  {
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index b6b9c89730a3..8802b97d2a6a 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vadduhm
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsubuhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
index 7060fe560e94..4a834f93a205 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 	%struct.BiPartSrcDescriptor = type <{ %"struct.BiPartSrcDescriptor::$_105" }>
diff --git a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
index f55ffac45b08..17737d9d3b2d 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
index 32e36427c5e6..5cd8c348b4db 100644
--- a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
index a7f8181fd906..dc1e9369825a 100644
--- a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
+++ b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
index 2ccca25e2a27..c9c05e1cc363 100644
--- a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
+++ b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index b625cebaca41..97844dd7486a 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.CGLDI = type { %struct.cgli*, i32, i32, i32, i32, i32, i8*, i32, void (%struct.CGLSI*, i32, %struct.CGLDI*)*, i8*, %struct.vv_t }
 	%struct.cgli = type { i32, %struct.cgli*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, i32, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i8*, i32*, %struct._cgro*, %struct._cgro*, float, float, float, float, i32, i8*, float, i8*, [16 x i32] }
diff --git a/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll b/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
index 00ca81161027..91c36efc522e 100644
--- a/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
+++ b/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; XFAIL: *
 ; PR2356
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
diff --git a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index c760b41b3047..f474a6d7cc22 100644
--- a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 071c78833bae..f4c06fba6dfe 100644
--- a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -o - | not grep fixunstfsi
+; RUN: llc < %s -march=ppc32 -o - | not grep fixunstfsi
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
index af9a54ee9d13..83f3f6f8a762 100644
--- a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
+++ b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; PR2986
 @argc = external global i32		; <i32*> [#uses=1]
 @buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
index 0ad5b06c8032..20683b9019e0 100644
--- a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
+++ b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2988
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin10.0"
diff --git a/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll b/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
index f5b3e93d6170..9ed7f6f82dc4 100644
--- a/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
+++ b/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9.5
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9.5
 
 define void @__multc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-12-12-EH.ll b/test/CodeGen/PowerPC/2008-12-12-EH.ll
index 21218f55f039..b56c22abc6dd 100644
--- a/test/CodeGen/PowerPC/2008-12-12-EH.ll
+++ b/test/CodeGen/PowerPC/2008-12-12-EH.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | grep ^.L_Z1fv.eh
-; RUN: llvm-as < %s | llc  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
 
 define void @_Z1fv() {
 entry:
diff --git a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
index 0cf55188278e..d49d58deeaff 100644
--- a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
+++ b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9.5
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5
 ; rdar://6499616
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
index a898de0b4853..172531e5db49 100644
--- a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
+++ b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin10
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
 ; rdar://6692215
 
 define fastcc void @_qsort(i8* %a, i32 %n, i32 %es, i32 (i8*, i8*)* %cmp, i32 %depth_limit) nounwind optsize ssp {
diff --git a/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll b/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
index 4ea43ec505e3..29d115dc6a44 100644
--- a/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
+++ b/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin10
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
 ; PR4280
 
 define i32 @__fixunssfsi(float %a) nounwind readnone {
diff --git a/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll b/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
new file mode 100644
index 000000000000..f64e3dcf7328
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs
+
+; Machine code verifier will call isRegTiedToDefOperand() on /all/ register use
+; operands.  We must make sure that the operand flag is found correctly.
+
+; This test case is actually not specific to PowerPC, but the (imm, reg) format
+; of PowerPC "m" operands trigger this bug.
+
+define void @memory_asm_operand(i32 %a) {
+  ; "m" operand will be represented as:
+  ; INLINEASM <es:fake $0>, 10, %R2, 20, -4, %R1
+  ; It is difficult to find the flag operand (20) when starting from %R1
+  call i32 asm "lbzx $0, $1", "=r,m" (i32 %a)
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
new file mode 100644
index 000000000000..5d09696933b5
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+; It is wrong on powerpc to substitute reg+reg for $0; the stw opcode
+; would have to change.
+
+@x = external global [0 x i32]                    ; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %y) nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: add r2
+; CHECK: 0(r2)
+  %y_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %y, i32* %y_addr
+  %0 = load i32* %y_addr, align 4                 ; <i32> [#uses=1]
+  %1 = getelementptr inbounds [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
+  call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll b/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
new file mode 100644
index 000000000000..12c4c993ab51
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mtriple=ppc-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/PowerPC/2009-09-18-carrybit.ll b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
new file mode 100644
index 000000000000..6c23a6162c9d
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
@@ -0,0 +1,62 @@
+; RUN: llc -march=ppc32 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6"
+
+define i64 @foo(i64 %r.0.ph, i64 %q.0.ph, i32 %sr1.1.ph) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: subfc
+; CHECK: subfe
+; CHECK: subfc
+; CHECK: subfe
+  %tmp0 = add i64 %r.0.ph, -1                           ; <i64> [#uses=1]
+  br label %bb40
+
+bb40:                                             ; preds = %bb40, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb40 ] ; <i32> [#uses=1]
+  %carry.0274 = phi i32 [ 0, %entry ], [%tmp122, %bb40 ] ; <i32> [#uses=1]
+  %r.0273 = phi i64 [ %r.0.ph, %entry ], [ %tmp124, %bb40 ] ; <i64> [#uses=2]
+  %q.0272 = phi i64 [ %q.0.ph, %entry ], [ %ins169, %bb40 ] ; <i64> [#uses=3]
+  %tmp1 = lshr i64 %r.0273, 31                     ; <i64> [#uses=1]
+  %tmp2 = trunc i64 %tmp1 to i32                    ; <i32> [#uses=1]
+  %tmp3 = and i32 %tmp2, -2                         ; <i32> [#uses=1]
+  %tmp213 = trunc i64 %r.0273 to i32              ; <i32> [#uses=2]
+  %tmp106 = lshr i32 %tmp213, 31                     ; <i32> [#uses=1]
+  %tmp107 = or i32 %tmp3, %tmp106                        ; <i32> [#uses=1]
+  %tmp215 = zext i32 %tmp107 to i64                  ; <i64> [#uses=1]
+  %tmp216 = shl i64 %tmp215, 32                   ; <i64> [#uses=1]
+  %tmp108 = shl i32 %tmp213, 1                       ; <i32> [#uses=1]
+  %tmp109 = lshr i64 %q.0272, 63                     ; <i64> [#uses=1]
+  %tmp110 = trunc i64 %tmp109 to i32                    ; <i32> [#uses=1]
+  %tmp111 = or i32 %tmp108, %tmp110                        ; <i32> [#uses=1]
+  %tmp222 = zext i32 %tmp111 to i64                  ; <i64> [#uses=1]
+  %ins224 = or i64 %tmp216, %tmp222               ; <i64> [#uses=2]
+  %tmp112 = lshr i64 %q.0272, 31                     ; <i64> [#uses=1]
+  %tmp113 = trunc i64 %tmp112 to i32                    ; <i32> [#uses=1]
+  %tmp114 = and i32 %tmp113, -2                         ; <i32> [#uses=1]
+  %tmp158 = trunc i64 %q.0272 to i32              ; <i32> [#uses=2]
+  %tmp115 = lshr i32 %tmp158, 31                     ; <i32> [#uses=1]
+  %tmp116 = or i32 %tmp114, %tmp115                        ; <i32> [#uses=1]
+  %tmp160 = zext i32 %tmp116 to i64                  ; <i64> [#uses=1]
+  %tmp161 = shl i64 %tmp160, 32                   ; <i64> [#uses=1]
+  %tmp117 = shl i32 %tmp158, 1                       ; <i32> [#uses=1]
+  %tmp118 = or i32 %tmp117, %carry.0274                 ; <i32> [#uses=1]
+  %tmp167 = zext i32 %tmp118 to i64                  ; <i64> [#uses=1]
+  %ins169 = or i64 %tmp161, %tmp167               ; <i64> [#uses=2]
+  %tmp119 = sub i64 %tmp0, %ins224                    ; <i64> [#uses=1]
+  %tmp120 = ashr i64 %tmp119, 63                        ; <i64> [#uses=2]
+  %tmp121 = trunc i64 %tmp120 to i32                    ; <i32> [#uses=1]
+  %tmp122 = and i32 %tmp121, 1                          ; <i32> [#uses=2]
+  %tmp123 = and i64 %tmp120, %q.0.ph                         ; <i64> [#uses=1]
+  %tmp124 = sub i64 %ins224, %tmp123                    ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %sr1.1.ph ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb41.bb42_crit_edge, label %bb40
+
+bb41.bb42_crit_edge:                              ; preds = %bb40
+  %phitmp278 = zext i32 %tmp122 to i64               ; <i64> [#uses=1]
+  %tmp125 = shl i64 %ins169, 1                    ; <i64> [#uses=1]
+  %tmp126 = or i64 %phitmp278, %tmp125              ; <i64> [#uses=2]
+  ret i64 %tmp126
+}
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
index f3246fda15cb..03905a36dcfb 100644
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ b/test/CodeGen/PowerPC/Atomics-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; ModuleID = 'Atomics.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll
index c3de7102b038..1dc4310761c3 100644
--- a/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/test/CodeGen/PowerPC/Atomics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 ; ModuleID = 'Atomics.c'
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9"
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 45c13a7bfe41..25fc626550d2 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -1,35 +1,28 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | \
-; RUN:   grep {stwu r1, -80(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | \
-; RUN:   grep {stwu r1, -80(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stdu r1, -112(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stdu r1, -112(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r31, 40(r1)}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+
+; CHECK-PPC32: stw r31, 20(r1)
+; CHECK-PPC32: lwz r1, 0(r1)
+; CHECK-PPC32: lwz r31, 20(r1)
+; CHECK-PPC32-NOFP: stw r31, 20(r1)
+; CHECK-PPC32-NOFP: lwz r1, 0(r1)
+; CHECK-PPC32-NOFP: lwz r31, 20(r1)
+; CHECK-PPC32-RS: stwu r1, -80(r1)
+; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)
+
+; CHECK-PPC64: std r31, 40(r1)
+; CHECK-PPC64: stdu r1, -112(r1)
+; CHECK-PPC64: ld r1, 0(r1)
+; CHECK-PPC64: ld r31, 40(r1)
+; CHECK-PPC64-NOFP: std r31, 40(r1)
+; CHECK-PPC64-NOFP: stdu r1, -112(r1)
+; CHECK-PPC64-NOFP: ld r1, 0(r1)
+; CHECK-PPC64-NOFP: ld r31, 40(r1)
 
 define i32* @f1(i32 %n) {
 	%tmp = alloca i32, i32 %n		; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 0a15d2233e79..fda2e4ff9ce9 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -1,77 +1,52 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ori r0, r0, 32704}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stwux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ori r0, r0, 32704}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stwux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ori r0, r0, 32656}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stdux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ori r0, r0, 32656}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stdux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r31, 40(r1)}
+; RUN: llvm-as < %s > %t.bc
+; RUN: llc < %t.bc -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %t.bc -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP
 
-define i32* @f1() {
+; RUN: llc < %t.bc -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %t.bc -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP
+
+
+target triple = "powerpc-apple-darwin8"
+
+define i32* @f1() nounwind {
         %tmp = alloca i32, i32 8191             ; <i32*> [#uses=1]
         ret i32* %tmp
 }
 
+; PPC32-NOFP: _f1:
+; PPC32-NOFP: 	lis r0, -1
+; PPC32-NOFP: 	ori r0, r0, 32704
+; PPC32-NOFP: 	stwux r1, r1, r0
+; PPC32-NOFP: 	addi r3, r1, 68
+; PPC32-NOFP: 	lwz r1, 0(r1)
+; PPC32-NOFP: 	blr 
+
+; PPC32-FP: _f1:
+; PPC32-FP:	stw r31, 20(r1)
+; PPC32-FP:	lis r0, -1
+; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	stwux r1, r1, r0
+; ...
+; PPC32-FP:	lwz r1, 0(r1)
+; PPC32-FP:	lwz r31, 20(r1)
+; PPC32-FP:	blr 
+
+
+; PPC64-NOFP: _f1:
+; PPC64-NOFP: 	lis r0, -1
+; PPC64-NOFP: 	ori r0, r0, 32656
+; PPC64-NOFP: 	stdux r1, r1, r0
+; PPC64-NOFP: 	addi r3, r1, 116
+; PPC64-NOFP: 	ld r1, 0(r1)
+; PPC64-NOFP: 	blr 
+
+
+; PPC64-FP: _f1:
+; PPC64-FP:	std r31, 40(r1)
+; PPC64-FP:	lis r0, -1
+; PPC64-FP:	ori r0, r0, 32656
+; PPC64-FP:	stdux r1, r1, r0
+; ...
+; PPC64-FP:	ld r1, 0(r1)
+; PPC64-FP:	ld r31, 40(r1)
+; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll
index 11b64703ebd8..c2e1d6bddc5d 100644
--- a/test/CodeGen/PowerPC/Frames-leaf.ll
+++ b/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -1,34 +1,34 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {stwu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {stwu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {stdu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {stw r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {stdu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {ld r31, 40(r1)}
 
 define i32* @f1() {
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index 4ea3afba8831..6875704cf30d 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,26 +1,22 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
 ; RUN  not grep {stw r31, 20(r1)} %t1
 ; RUN: grep {stwu r1, -16448(r1)} %t1
 ; RUN: grep {addi r1, r1, 16448} %t1
-; RUN: llvm-as < %s | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN: not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
-; RUN:   -o %t2 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t2
 ; RUN: grep {stw r31, 20(r1)} %t2
 ; RUN: grep {stwu r1, -16448(r1)} %t2
 ; RUN: grep {addi r1, r1, 16448} %t2
 ; RUN: grep {lwz r31, 20(r1)} %t2
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -f
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
 ; RUN: not grep {std r31, 40(r1)} %t3
 ; RUN: grep {stdu r1, -16496(r1)} %t3
 ; RUN: grep {addi r1, r1, 16496} %t3
 ; RUN: not grep {ld r31, 40(r1)} %t3
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
-; RUN:   -o %t4 -f
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t4
 ; RUN: grep {std r31, 40(r1)} %t4
 ; RUN: grep {stdu r1, -16496(r1)} %t4
 ; RUN: grep {addi r1, r1, 16496} %t4
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 17053796bc79..0f7acacbfac9 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {std r2, 9024}
 
 define void @test() {
diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll
index 406053bee27f..09a7fbd7a69f 100644
--- a/test/CodeGen/PowerPC/addc.ll
+++ b/test/CodeGen/PowerPC/addc.ll
@@ -1,5 +1,5 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep addc %t | count 1
 ; RUN: grep adde %t | count 1
 ; RUN: grep addze %t | count 1
diff --git a/test/CodeGen/PowerPC/addi-reassoc.ll b/test/CodeGen/PowerPC/addi-reassoc.ll
index bee8660a8a9e..2b71ce65f6bc 100644
--- a/test/CodeGen/PowerPC/addi-reassoc.ll
+++ b/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep addi
+; RUN: llc < %s -march=ppc32 | not grep addi
 
         %struct.X = type { [5 x i8] }
 
diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll
index 7ffbe3676704..e619faa75173 100644
--- a/test/CodeGen/PowerPC/align.ll
+++ b/test/CodeGen/PowerPC/align.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.4 | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.2 | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.3 | count 1
 
 @A = global <4 x i32> < i32 10, i32 20, i32 30, i32 40 >                ; <<4 x i32>*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll
index f0bb5ea40157..0484f882ec72 100644
--- a/test/CodeGen/PowerPC/and-branch.ll
+++ b/test/CodeGen/PowerPC/and-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mfcr
+; RUN: llc < %s -march=ppc32 | not grep mfcr
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) {
 entry:
diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll
index eef8f51f7a19..36853614c40a 100644
--- a/test/CodeGen/PowerPC/and-elim.ll
+++ b/test/CodeGen/PowerPC/and-elim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwin
+; RUN: llc < %s -march=ppc32 | not grep rlwin
 
 define void @test(i8* %P) {
 	%W = load i8* %P
diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll
index 9c806494be3b..64a45e50c0a9 100644
--- a/test/CodeGen/PowerPC/and-imm.ll
+++ b/test/CodeGen/PowerPC/and-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {ori\\|lis}
+; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}
 
 ; andi. r3, r3, 32769	
 define i32 @test(i32 %X) {
diff --git a/test/CodeGen/PowerPC/and_add.ll b/test/CodeGen/PowerPC/and_add.ll
index f103e7c0df06..517e775172c3 100644
--- a/test/CodeGen/PowerPC/and_add.ll
+++ b/test/CodeGen/PowerPC/and_add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep slwi %t
 ; RUN: not grep addi %t
 ; RUN: not grep rlwinm %t
diff --git a/test/CodeGen/PowerPC/and_sext.ll b/test/CodeGen/PowerPC/and_sext.ll
index e0e498def057..c6d234ea665f 100644
--- a/test/CodeGen/PowerPC/and_sext.ll
+++ b/test/CodeGen/PowerPC/and_sext.ll
@@ -1,6 +1,6 @@
 ; These tests should not contain a sign extend.
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsh
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb
+; RUN: llc < %s -march=ppc32 | not grep extsh
+; RUN: llc < %s -march=ppc32 | not grep extsb
 
 define i32 @test1(i32 %mode.0.i.0) {
         %tmp.79 = trunc i32 %mode.0.i.0 to i16
diff --git a/test/CodeGen/PowerPC/and_sra.ll b/test/CodeGen/PowerPC/and_sra.ll
index c780605c9753..e6c02d80452d 100644
--- a/test/CodeGen/PowerPC/and_sra.ll
+++ b/test/CodeGen/PowerPC/and_sra.ll
@@ -1,5 +1,5 @@
 ; Neither of these functions should contain algebraic right shifts
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | not grep srawi 
 
 define i32 @test1(i32 %mode.0.i.0) {
         %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index f6bb2983d565..ec4e42defdcb 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lwarx  | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | grep stwcx. | count 4
+; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
+; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
 	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 77b7b08d8c22..6d9daef9285c 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep ldarx  | count 3
-; RUN: llvm-as < %s | llc -march=ppc64 | grep stdcx. | count 4
+; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
+; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
 
 define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
 	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index cfad6ea454ad..fdead7dd8b34 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,69 +1,71 @@
-; RUN: llvm-as < %s | llc | grep {bl L_exact_log2.stub}
+; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
 ; PR4482
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "powerpc-apple-darwin8"
 
 define i32 @foo(i64 %x) nounwind {
 entry:
-	%x_addr = alloca i64		; <i64*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i64 %x, i64* %x_addr
-	%1 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%2 = call i32 @exact_log2(i64 %1) nounwind		; <i32> [#uses=1]
-	store i32 %2, i32* %0, align 4
-	%3 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %3, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval1
+; STATIC: _foo:
+; STATIC: bl _exact_log2
+; STATIC: blr
+; STATIC: .subsections_via_symbols
+
+; PIC: _foo:
+; PIC: bl L_exact_log2$stub
+; PIC: blr
+
+; DYNAMIC: _foo:
+; DYNAMIC: bl L_exact_log2$stub
+; DYNAMIC: blr
+
+        %A = call i32 @exact_log2(i64 %x) nounwind
+	ret i32 %A
 }
 
 define available_externally i32 @exact_log2(i64 %x) nounwind {
 entry:
-	%x_addr = alloca i64		; <i64*> [#uses=6]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i64 %x, i64* %x_addr
-	%1 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%2 = sub i64 0, %1		; <i64> [#uses=1]
-	%3 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%4 = and i64 %2, %3		; <i64> [#uses=1]
-	%5 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%6 = icmp ne i64 %4, %5		; <i1> [#uses=1]
-	br i1 %6, label %bb2, label %bb
-
-bb:		; preds = %entry
-	%7 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%8 = icmp eq i64 %7, 0		; <i1> [#uses=1]
-	br i1 %8, label %bb2, label %bb1
-
-bb1:		; preds = %bb
-	%9 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%10 = call i64 @llvm.cttz.i64(i64 %9)		; <i64> [#uses=1]
-	%11 = trunc i64 %10 to i32		; <i32> [#uses=1]
-	store i32 %11, i32* %iftmp.0, align 4
-	br label %bb3
-
-bb2:		; preds = %bb, %entry
-	store i32 -1, i32* %iftmp.0, align 4
-	br label %bb3
-
-bb3:		; preds = %bb2, %bb1
-	%12 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	store i32 %12, i32* %0, align 4
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %13, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb3
-	%retval4 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval4
+	ret i32 42
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+; PIC: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC: L_exact_log2$stub:
+; PIC: .indirect_symbol _exact_log2
+; PIC: mflr r0
+; PIC: bcl 20,31,L_exact_log2$stub$tmp
+
+; PIC: L_exact_log2$stub$tmp:
+; PIC: mflr r11
+; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: mtlr r0
+; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: mtctr r12
+; PIC: bctr
+
+; PIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC: L_exact_log2$lazy_ptr:
+; PIC: .indirect_symbol _exact_log2
+; PIC: .long dyld_stub_binding_helper
+
+; PIC: .subsections_via_symbols
+
+
+; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC: L_exact_log2$stub:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: mtctr r12
+; DYNAMIC: bctr
+
+; DYNAMIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC: L_exact_log2$lazy_ptr:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: .long dyld_stub_binding_helper
+
+
+
+
+
diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll
index d23935756dc2..009f46811e78 100644
--- a/test/CodeGen/PowerPC/big-endian-actual-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addc 4, 4, 6}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {adde 3, 3, 5}
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll
index ab136f65d274..fe85404cb94f 100644
--- a/test/CodeGen/PowerPC/big-endian-call-result.ll
+++ b/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addic 4, 4, 1}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addze 3, 3}
 
 declare i64 @foo()
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 08589f499965..e46e1ec8d775 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 6, 3}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 4, 2}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 3, 0}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {mr 5, 3}
 
 declare void @bar(i64 %x, i64 %y)
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index 4aa55a39e5de..cc02e406aa61 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {b LBB.*} | count 4
 
 target datalayout = "E-p:32:32"
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index e450eb8c2378..7eb3bbb8d308 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4
-; RUN: llvm-as < %s | llc -march=ppc64 | not grep rlwinm
-; RUN: llvm-as < %s | llc -march=ppc64 | not grep rlwimi
+; RUN: llc < %s -march=ppc64 | not grep rlwinm
+; RUN: llc < %s -march=ppc64 | not grep rlwimi
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
         %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 20ff3dbc4f7b..0454c584bcfe 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,11 +1,9 @@
 ; There should be exactly one vxor here.
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
 ; RUN:   grep vxor | count 1
 
 ; There should be exactly one vsplti here.
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
 ; RUN:   grep vsplti | count 1
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
index 034c14147a29..0db184f72855 100644
--- a/test/CodeGen/PowerPC/calls.ll
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -1,10 +1,10 @@
 ; Test various forms of calls.
 
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bl } | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bctrl} | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bla } | count 1
 
 declare void @foo()
diff --git a/test/CodeGen/PowerPC/cmp-cmp.ll b/test/CodeGen/PowerPC/cmp-cmp.ll
index 07964d5aa315..35a5e427853e 100644
--- a/test/CodeGen/PowerPC/cmp-cmp.ll
+++ b/test/CodeGen/PowerPC/cmp-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mfcr
+; RUN: llc < %s -march=ppc32 | not grep mfcr
 
 define void @test(i64 %X) {
         %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/compare-duplicate.ll b/test/CodeGen/PowerPC/compare-duplicate.ll
index df2dfdc17b64..f5108c37a8ad 100644
--- a/test/CodeGen/PowerPC/compare-duplicate.ll
+++ b/test/CodeGen/PowerPC/compare-duplicate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8  | not grep slwi
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8  | not grep slwi
 
 define i32 @test(i32 %A, i32 %B) {
 	%C = sub i32 %B, %A
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index b0ef2d3f9464..5ba050060fcb 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {cmpwi cr0, r3, -1}
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll
index b58f59a7eb55..8901e02d3b80 100644
--- a/test/CodeGen/PowerPC/constants.ll
+++ b/test/CodeGen/PowerPC/constants.ll
@@ -1,9 +1,9 @@
 ; All of these routines should be perform optimal load of constants.
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep lis | count 5
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep ori | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {li } | count 4
 
 define i32 @f1() {
diff --git a/test/CodeGen/PowerPC/cr_spilling.ll b/test/CodeGen/PowerPC/cr_spilling.ll
index 4584c7118237..b21586873612 100644
--- a/test/CodeGen/PowerPC/cr_spilling.ll
+++ b/test/CodeGen/PowerPC/cr_spilling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
+; RUN: llc < %s -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
 ; PR1638
 
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 2c51e8afa558..ab493a068a32 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llvm-as < %s | llc -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
 declare i32 @llvm.cttz.i32(i32)
 
diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll
index ceebc7099e4e..af233697403d 100644
--- a/test/CodeGen/PowerPC/darwin-labels.ll
+++ b/test/CodeGen/PowerPC/darwin-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {foo bar":}
+; RUN: llc < %s | grep {foo bar":}
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/delete-node.ll b/test/CodeGen/PowerPC/delete-node.ll
index 0b1d734f8a8c..a26c21154824 100644
--- a/test/CodeGen/PowerPC/delete-node.ll
+++ b/test/CodeGen/PowerPC/delete-node.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 ; The DAGCombiner leaves behind a dead node in this testcase. Currently
 ; ISel is ignoring dead nodes, though it would be preferable for
diff --git a/test/CodeGen/PowerPC/div-2.ll b/test/CodeGen/PowerPC/div-2.ll
index 26e622178408..2fc916ff005f 100644
--- a/test/CodeGen/PowerPC/div-2.ll
+++ b/test/CodeGen/PowerPC/div-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep blr
+; RUN: llc < %s -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | grep blr
 
 define i32 @test1(i32 %X) {
         %Y = and i32 %X, 15             ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index 7be8a34be7ef..558fd1b3199b 100644
--- a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep eqv | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep andc | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep orc | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep nor | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep nand | count 1
 
 define i32 @EQV1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/PowerPC/extsh.ll b/test/CodeGen/PowerPC/extsh.ll
index 5eca8cea3606..506ff86051ff 100644
--- a/test/CodeGen/PowerPC/extsh.ll
+++ b/test/CodeGen/PowerPC/extsh.ll
@@ -1,5 +1,5 @@
 ; This should turn into a single extsh
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh | count 1
+; RUN: llc < %s -march=ppc32 | grep extsh | count 1
 define i32 @test(i32 %X) {
         %tmp.81 = shl i32 %X, 16                ; <i32> [#uses=1]
         %tmp.82 = ashr i32 %tmp.81, 16          ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index 54e49b009ace..6ef740f835cb 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
 
 define double @fabs(double %f) {
 entry:
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 4a6fe70574f4..815c72c1f8a7 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   egrep {fn?madd|fn?msub} | count 8
 
 define double @test_FMADD1(double %A, double %B, double %C) {
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index 6c10dfbd44b0..bbd5c7159edc 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fnabs
+; RUN: llc < %s -march=ppc32 | grep fnabs
 
 declare double @fabs(double)
 
diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll
index 9579a748e98e..0bd31bb082cd 100644
--- a/test/CodeGen/PowerPC/fneg.ll
+++ b/test/CodeGen/PowerPC/fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+; RUN: llc < %s -march=ppc32 | not grep fneg
 
 define double @test1(double %a, double %b, double %c, double %d) {
 entry:
diff --git a/test/CodeGen/PowerPC/fold-li.ll b/test/CodeGen/PowerPC/fold-li.ll
index 2ac79f149131..92d8da500e84 100644
--- a/test/CodeGen/PowerPC/fold-li.ll
+++ b/test/CodeGen/PowerPC/fold-li.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | \
+; RUN: llc < %s -march=ppc32  | \
 ; RUN:   grep -v align | not grep li
 
 ;; Test that immediates are folded into these instructions correctly.
diff --git a/test/CodeGen/PowerPC/fp-branch.ll b/test/CodeGen/PowerPC/fp-branch.ll
index 3db6ced572fe..673da027e229 100644
--- a/test/CodeGen/PowerPC/fp-branch.ll
+++ b/test/CodeGen/PowerPC/fp-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fcmp | count 1
+; RUN: llc < %s -march=ppc32 | grep fcmp | count 1
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/PowerPC/fp-int-fp.ll b/test/CodeGen/PowerPC/fp-int-fp.ll
index 1b78b01e6c93..18f7f83852a2 100644
--- a/test/CodeGen/PowerPC/fp-int-fp.ll
+++ b/test/CodeGen/PowerPC/fp-int-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep r1
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep r1
 
 define double @test1(double %X) {
         %Y = fptosi double %X to i64            ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fp_to_uint.ll b/test/CodeGen/PowerPC/fp_to_uint.ll
index 43502bbb3ef0..1360b62d273b 100644
--- a/test/CodeGen/PowerPC/fp_to_uint.ll
+++ b/test/CodeGen/PowerPC/fp_to_uint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fctiwz | count 1
+; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1
 
 define i16 @foo(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/fpcopy.ll b/test/CodeGen/PowerPC/fpcopy.ll
index 7d8059645ad2..7b9446baac07 100644
--- a/test/CodeGen/PowerPC/fpcopy.ll
+++ b/test/CodeGen/PowerPC/fpcopy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fmr
+; RUN: llc < %s -march=ppc32 | not grep fmr
 
 define double @test(float %F) {
         %F.upgrd.1 = fpext float %F to double           ; <double> [#uses=1]
diff --git a/test/CodeGen/PowerPC/frounds.ll b/test/CodeGen/PowerPC/frounds.ll
index 0d8e621f354f..8eeadc3a3469 100644
--- a/test/CodeGen/PowerPC/frounds.ll
+++ b/test/CodeGen/PowerPC/frounds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
index 1260c602f9d9..74a8725eb12e 100644
--- a/test/CodeGen/PowerPC/fsqrt.ll
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -1,17 +1,13 @@
 ; fsqrt should be generated when the fsqrt feature is enabled, but not 
 ; otherwise.
 
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
 ; RUN:   grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:  llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN:  grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN:   grep {fsqrt f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
 ; RUN:   not grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
 ; RUN:   not grep {fsqrt f1, f1}
 
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/PowerPC/hello.ll b/test/CodeGen/PowerPC/hello.ll
index 1d7275f238bb..ea27e9257a65 100644
--- a/test/CodeGen/PowerPC/hello.ll
+++ b/test/CodeGen/PowerPC/hello.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 ; PR1399
 
 @.str = internal constant [13 x i8] c"Hello World!\00"
diff --git a/test/CodeGen/PowerPC/hidden-vis-2.ll b/test/CodeGen/PowerPC/hidden-vis-2.ll
index 4c9ae552f7c7..e9e2c0a93a0d 100644
--- a/test/CodeGen/PowerPC/hidden-vis-2.ll
+++ b/test/CodeGen/PowerPC/hidden-vis-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/hidden-vis.ll b/test/CodeGen/PowerPC/hidden-vis.ll
index e04c89aebcc2..b2cc1431ebde 100644
--- a/test/CodeGen/PowerPC/hidden-vis.ll
+++ b/test/CodeGen/PowerPC/hidden-vis.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/i128-and-beyond.ll b/test/CodeGen/PowerPC/i128-and-beyond.ll
index 9e0d6c30b8c7..51bcab244114 100644
--- a/test/CodeGen/PowerPC/i128-and-beyond.ll
+++ b/test/CodeGen/PowerPC/i128-and-beyond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep 4294967295 | count 28
+; RUN: llc < %s -march=ppc32 | grep 4294967295 | count 28
 
 ; These static initializers are too big to hand off to assemblers
 ; as monolithic blobs.
diff --git a/test/CodeGen/PowerPC/i64_fp.ll b/test/CodeGen/PowerPC/i64_fp.ll
index 5ff2684d7b00..d53c94878409 100644
--- a/test/CodeGen/PowerPC/i64_fp.ll
+++ b/test/CodeGen/PowerPC/i64_fp.ll
@@ -1,21 +1,21 @@
 ; fcfid and fctid should be generated when the 64bit feature is enabled, but not
 ; otherwise.
 
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fctidz
 
 define double @X(double %Y) {
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 677b41bb12e1..a43f09c7d561 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: llc < %s -march=ppc32 -stats |& \
 ; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as:
diff --git a/test/CodeGen/PowerPC/illegal-element-type.ll b/test/CodeGen/PowerPC/illegal-element-type.ll
index 54a06656b1b9..58bd0558e2ba 100644
--- a/test/CodeGen/PowerPC/illegal-element-type.ll
+++ b/test/CodeGen/PowerPC/illegal-element-type.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g3
 
 define void @foo() {
 entry:
diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll
index c0a397982adf..e1ff82d5f9b7 100644
--- a/test/CodeGen/PowerPC/inlineasm-copy.ll
+++ b/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mr
+; RUN: llc < %s -march=ppc32 | not grep mr
 
 define i32 @test(i32 %Y, i32 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/int-fp-conv-0.ll b/test/CodeGen/PowerPC/int-fp-conv-0.ll
index 82a182685143..983d2b823f10 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-0.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 > %t
+; RUN: llc < %s -march=ppc64 > %t
 ; RUN: grep  __floattitf %t
 ; RUN: grep  __fixunstfti %t
 
diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll
index 583408c0eae2..6c8272351924 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-1.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep __floatditf
+; RUN: llc < %s -march=ppc64 | grep __floatditf
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/invalid-memcpy.ll b/test/CodeGen/PowerPC/invalid-memcpy.ll
index 6df968dddae5..3b1f3060a1c0 100644
--- a/test/CodeGen/PowerPC/invalid-memcpy.ll
+++ b/test/CodeGen/PowerPC/invalid-memcpy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 ; This testcase is invalid (the alignment specified for memcpy is 
 ; greater than the alignment guaranteed for Qux or C.0.1173, but it
diff --git a/test/CodeGen/PowerPC/inverted-bool-compares.ll b/test/CodeGen/PowerPC/inverted-bool-compares.ll
index f8c5f11180ca..aa7e4d686024 100644
--- a/test/CodeGen/PowerPC/inverted-bool-compares.ll
+++ b/test/CodeGen/PowerPC/inverted-bool-compares.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori
+; RUN: llc < %s -march=ppc32 | not grep xori
 
 define i32 @test(i1 %B, i32* %P) {
         br i1 %B, label %T, label %F
diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll
index 192d7384e953..4161e3438a4b 100644
--- a/test/CodeGen/PowerPC/ispositive.ll
+++ b/test/CodeGen/PowerPC/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {srwi r3, r3, 31}
 
 define i32 @test1(i32 %X) {
diff --git a/test/CodeGen/PowerPC/itofp128.ll b/test/CodeGen/PowerPC/itofp128.ll
index 4d745111b04b..6d9ef9590399 100644
--- a/test/CodeGen/PowerPC/itofp128.ll
+++ b/test/CodeGen/PowerPC/itofp128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9.2.0"
diff --git a/test/CodeGen/PowerPC/lha.ll b/test/CodeGen/PowerPC/lha.ll
index e8f73eea2e2e..3a100c1aae6d 100644
--- a/test/CodeGen/PowerPC/lha.ll
+++ b/test/CodeGen/PowerPC/lha.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lha
+; RUN: llc < %s -march=ppc32 | grep lha
 
 define i32 @test(i16* %a) {
         %tmp.1 = load i16* %a           ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/load-constant-addr.ll b/test/CodeGen/PowerPC/load-constant-addr.ll
index d2be04efd036..f1d061c1ad5a 100644
--- a/test/CodeGen/PowerPC/load-constant-addr.ll
+++ b/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -1,6 +1,6 @@
 ; Should fold the ori into the lfs.
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lfs
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep ori
+; RUN: llc < %s -march=ppc32 | grep lfs
+; RUN: llc < %s -march=ppc32 | not grep ori
 
 define float @test() {
         %tmp.i = load float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index 7b907250e1fc..94c2526cf5b9 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep cntlzw 
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori 
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {li }
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {mr }
+; RUN: llc < %s -march=ppc32 | grep cntlzw 
+; RUN: llc < %s -march=ppc32 | not grep xori 
+; RUN: llc < %s -march=ppc32 | not grep {li }
+; RUN: llc < %s -march=ppc32 | not grep {mr }
 
 define i1 @test(i64 %x) {
   %tmp = icmp ult i64 %x, 4294967296
diff --git a/test/CodeGen/PowerPC/longdbl-truncate.ll b/test/CodeGen/PowerPC/longdbl-truncate.ll
index a87382405a5d..e5f63c644185 100644
--- a/test/CodeGen/PowerPC/longdbl-truncate.ll
+++ b/test/CodeGen/PowerPC/longdbl-truncate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/mask64.ll b/test/CodeGen/PowerPC/mask64.ll
index 69d2200212f9..139621af1f22 100644
--- a/test/CodeGen/PowerPC/mask64.ll
+++ b/test/CodeGen/PowerPC/mask64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9.2.0"
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index fd0e1d4a2ea8..5661ef9768d1 100644
--- a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep li.*16
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep addi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep li.*16
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep addi
 
 ; Codegen lvx (R+16) as t = li 16,  lvx t,R
 ; This shares the 16 between the two loads.
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index a1527629980b..b267719421a3 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \
 ; RUN:   not grep addi
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4		; <i64*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/mul-neg-power-2.ll b/test/CodeGen/PowerPC/mul-neg-power-2.ll
index 90446d707db7..9688d6e3d519 100644
--- a/test/CodeGen/PowerPC/mul-neg-power-2.ll
+++ b/test/CodeGen/PowerPC/mul-neg-power-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mul
+; RUN: llc < %s -march=ppc32 | not grep mul
 
 define i32 @test1(i32 %a) {
         %tmp.1 = mul i32 %a, -2         ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/mul-with-overflow.ll b/test/CodeGen/PowerPC/mul-with-overflow.ll
index 0276846d7cbb..f03e3cb5cd47 100644
--- a/test/CodeGen/PowerPC/mul-with-overflow.ll
+++ b/test/CodeGen/PowerPC/mul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll
index 3b0daad227e9..9ab8d997c0d0 100644
--- a/test/CodeGen/PowerPC/mulhs.ll
+++ b/test/CodeGen/PowerPC/mulhs.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep mulhwu %t
 ; RUN: not grep srawi %t 
 ; RUN: not grep add %t 
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
index 3f75f7d28ed6..b9317f90c1da 100644
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ b/test/CodeGen/PowerPC/multiple-return-values.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
diff --git a/test/CodeGen/PowerPC/neg.ll b/test/CodeGen/PowerPC/neg.ll
index c13559903901..c673912d2ef1 100644
--- a/test/CodeGen/PowerPC/neg.ll
+++ b/test/CodeGen/PowerPC/neg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep neg
+; RUN: llc < %s -march=ppc32 | grep neg
 
 define i32 @test(i32 %X) {
         %Y = sub i32 0, %X              ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/no-dead-strip.ll b/test/CodeGen/PowerPC/no-dead-strip.ll
index e7ceaaeab678..34594132530d 100644
--- a/test/CodeGen/PowerPC/no-dead-strip.ll
+++ b/test/CodeGen/PowerPC/no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {no_dead_strip.*_X}
+; RUN: llc < %s | grep {no_dead_strip.*_X}
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/or-addressing-mode.ll b/test/CodeGen/PowerPC/or-addressing-mode.ll
index 9b6e9551bf04..e50374e30696 100644
--- a/test/CodeGen/PowerPC/or-addressing-mode.ll
+++ b/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep ori
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep ori
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep rlwimi
 
 define i32 @test1(i8* %P) {
         %tmp.2.i = ptrtoint i8* %P to i32               ; <i32> [#uses=2]
diff --git a/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
index e3c5ab122545..2fc17209ccfd 100644
--- a/test/CodeGen/PowerPC/ppcf128-1-opt.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 ; ModuleID = '<stdin>'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index a487de7fd577..1047fe5d3ba9 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc > %t
+; RUN: opt < %s -std-compile-opts | llc > %t
 ; ModuleID = 'ld3.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-2.ll b/test/CodeGen/PowerPC/ppcf128-2.ll
index 43182266e731..7eee3542d3bc 100644
--- a/test/CodeGen/PowerPC/ppcf128-2.ll
+++ b/test/CodeGen/PowerPC/ppcf128-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/ppcf128-3.ll b/test/CodeGen/PowerPC/ppcf128-3.ll
index 3a51f4d3dd60..5043b622584b 100644
--- a/test/CodeGen/PowerPC/ppcf128-3.ll
+++ b/test/CodeGen/PowerPC/ppcf128-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 	%struct.stp_sequence = type { double, double }
 
 define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
diff --git a/test/CodeGen/PowerPC/ppcf128-4.ll b/test/CodeGen/PowerPC/ppcf128-4.ll
index 16d61780a46c..104a25eb43f2 100644
--- a/test/CodeGen/PowerPC/ppcf128-4.ll
+++ b/test/CodeGen/PowerPC/ppcf128-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define ppc_fp128 @__floatditf(i64 %u) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/pr3711_widen_bit.ll b/test/CodeGen/PowerPC/pr3711_widen_bit.ll
index e601e968341f..7abdedad9805 100644
--- a/test/CodeGen/PowerPC/pr3711_widen_bit.ll
+++ b/test/CodeGen/PowerPC/pr3711_widen_bit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 ; Test that causes a abort in expanding a bit convert due to a missing support
 ; for widening.
diff --git a/test/CodeGen/PowerPC/private.ll b/test/CodeGen/PowerPC/private.ll
index 0f0d13492a08..d6e67708ac25 100644
--- a/test/CodeGen/PowerPC/private.ll
+++ b/test/CodeGen/PowerPC/private.ll
@@ -1,25 +1,23 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=powerpc-unknown-linux-gnu > %t
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
 ; RUN: grep lis.*\.Lbaz %t
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin > %t
+; RUN: llc < %s -mtriple=powerpc-apple-darwin > %t
 ; RUN: grep L_foo: %t
 ; RUN: grep bl.*\L_foo %t
 ; RUN: grep L_baz: %t
 ; RUN: grep lis.*\L_baz %t
 
-declare void @foo()
-
-define private void @foo() {
+define private void @foo() nounwind {
         ret void
 }
 
 @baz = private global i32 4;
 
-define i32 @bar() {
+define i32 @bar() nounwind {
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
diff --git a/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
index b86ed1a6a76e..e0ddb4250fd2 100644
--- a/test/CodeGen/PowerPC/reg-coalesce-simple.ll
+++ b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | not grep or
+; RUN: llc < %s -march=ppc32  | not grep or
 
 %struct.foo = type { i32, i32, [0 x i8] }
 
diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll
index f4cad34addad..9f8647d08762 100644
--- a/test/CodeGen/PowerPC/retaddr.ll
+++ b/test/CodeGen/PowerPC/retaddr.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep mflr
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lwz
-; RUN: llvm-as < %s | llc -march=ppc64 | grep {ld r., 16(r1)}
+; RUN: llc < %s -march=ppc32 | grep mflr
+; RUN: llc < %s -march=ppc32 | grep lwz
+; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
 
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/return-val-i128.ll b/test/CodeGen/PowerPC/return-val-i128.ll
index 27a5004bd12a..e14a43809a7b 100644
--- a/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/test/CodeGen/PowerPC/return-val-i128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i128 @__fixsfdi(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
index f8a42b514203..6410c63234d2 100644
--- a/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {or }
+; RUN: llc < %s -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | not grep {or }
 
 ; Make sure there is no register-register copies here.
 
diff --git a/test/CodeGen/PowerPC/rlwimi.ll b/test/CodeGen/PowerPC/rlwimi.ll
index 5e310bb6a6ef..556ca3d4a8c0 100644
--- a/test/CodeGen/PowerPC/rlwimi.ll
+++ b/test/CodeGen/PowerPC/rlwimi.ll
@@ -1,6 +1,6 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep and
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi | count 8
+; RUN: llc < %s -march=ppc32 | not grep and
+; RUN: llc < %s -march=ppc32 | grep rlwimi | count 8
 
 define i32 @test1(i32 %x, i32 %y) {
 entry:
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
index 33eaacf8b4fa..59a36555bf86 100644
--- a/test/CodeGen/PowerPC/rlwimi2.ll
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep rlwimi %t | count 3
 ; RUN: grep srwi   %t | count 1
 ; RUN: not grep slwi %t
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index fedcfbfdb2c5..05d37bf1625f 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: llc < %s -march=ppc32 -stats |& \
 ; RUN:   grep {Number of machine instrs printed} | grep 12
 
 define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
diff --git a/test/CodeGen/PowerPC/rlwinm.ll b/test/CodeGen/PowerPC/rlwinm.ll
index 9d34865be5a2..699f6e78356e 100644
--- a/test/CodeGen/PowerPC/rlwinm.ll
+++ b/test/CodeGen/PowerPC/rlwinm.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep srawi %t
 ; RUN: not grep srwi %t
diff --git a/test/CodeGen/PowerPC/rlwinm2.ll b/test/CodeGen/PowerPC/rlwinm2.ll
index 06ceaa2a9cdc..46542d8e09bd 100644
--- a/test/CodeGen/PowerPC/rlwinm2.ll
+++ b/test/CodeGen/PowerPC/rlwinm2.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlw[i]nm instructions
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep srawi %t 
 ; RUN: not grep srwi %t 
diff --git a/test/CodeGen/PowerPC/rotl-2.ll b/test/CodeGen/PowerPC/rotl-2.ll
index df104599fe3e..d32ef59be6c4 100644
--- a/test/CodeGen/PowerPC/rotl-2.ll
+++ b/test/CodeGen/PowerPC/rotl-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | grep rlwinm | count 4
-; RUN: llvm-as < %s | llc -march=ppc32  | grep rlwnm | count 2
-; RUN: llvm-as < %s | llc -march=ppc32  | not grep or
+; RUN: llc < %s -march=ppc32  | grep rlwinm | count 4
+; RUN: llc < %s -march=ppc32  | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32  | not grep or
 
 define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
 	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/rotl-64.ll b/test/CodeGen/PowerPC/rotl-64.ll
index 3963d9a9d71a..674c9e4cc951 100644
--- a/test/CodeGen/PowerPC/rotl-64.ll
+++ b/test/CodeGen/PowerPC/rotl-64.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep rldicl
-; RUN: llvm-as < %s | llc -march=ppc64 | grep rldcl
+; RUN: llc < %s -march=ppc64 | grep rldicl
+; RUN: llc < %s -march=ppc64 | grep rldcl
 ; PR1613
 
 define i64 @t1(i64 %A) {
diff --git a/test/CodeGen/PowerPC/rotl.ll b/test/CodeGen/PowerPC/rotl.ll
index aab5c8316a3d..56fc4a8c911f 100644
--- a/test/CodeGen/PowerPC/rotl.ll
+++ b/test/CodeGen/PowerPC/rotl.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwinm | count 2
 
 define i32 @rotlw(i32 %x, i32 %sh) {
 entry:
diff --git a/test/CodeGen/PowerPC/sections.ll b/test/CodeGen/PowerPC/sections.ll
new file mode 100644
index 000000000000..1af370935e23
--- /dev/null
+++ b/test/CodeGen/PowerPC/sections.ll
@@ -0,0 +1,8 @@
+; Test to make sure that bss sections are printed with '.section' directive.
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+@A = global i32 0
+
+; CHECK:  .section  .bss,"aw",@nobits
+; CHECK:  .global A
+
diff --git a/test/CodeGen/PowerPC/select-cc.ll b/test/CodeGen/PowerPC/select-cc.ll
index f9464c4b0516..ccc64898a34f 100644
--- a/test/CodeGen/PowerPC/select-cc.ll
+++ b/test/CodeGen/PowerPC/select-cc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; PR3011
 
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
diff --git a/test/CodeGen/PowerPC/select_lt0.ll b/test/CodeGen/PowerPC/select_lt0.ll
index 86eb201c5796..95ba84ac6e24 100644
--- a/test/CodeGen/PowerPC/select_lt0.ll
+++ b/test/CodeGen/PowerPC/select_lt0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep cmp
+; RUN: llc < %s -march=ppc32 | not grep cmp
 
 define i32 @seli32_1(i32 %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/setcc_no_zext.ll b/test/CodeGen/PowerPC/setcc_no_zext.ll
index c31f35ce9af3..9b2036e1dc52 100644
--- a/test/CodeGen/PowerPC/setcc_no_zext.ll
+++ b/test/CodeGen/PowerPC/setcc_no_zext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
 
 define i32 @setcc_one_or_zero(i32* %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 0f0afe9e665a..688b29aa124f 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {srwi r., r., 5}
 
 define i32 @eq0(i32 %a) {
diff --git a/test/CodeGen/PowerPC/shift128.ll b/test/CodeGen/PowerPC/shift128.ll
index cf5b3fc6ff0c..8e518c12795e 100644
--- a/test/CodeGen/PowerPC/shift128.ll
+++ b/test/CodeGen/PowerPC/shift128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep sld | count 5
+; RUN: llc < %s -march=ppc64 | grep sld | count 5
 
 define i128 @foo_lshr(i128 %x, i128 %y) {
   %r = lshr i128 %x, %y
diff --git a/test/CodeGen/PowerPC/shl_elim.ll b/test/CodeGen/PowerPC/shl_elim.ll
index 3dc47729860b..f177c4a3f482 100644
--- a/test/CodeGen/PowerPC/shl_elim.ll
+++ b/test/CodeGen/PowerPC/shl_elim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep slwi
+; RUN: llc < %s -march=ppc32 | not grep slwi
 
 define i32 @test1(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/shl_sext.ll b/test/CodeGen/PowerPC/shl_sext.ll
index 61e5cdb11a42..1f35eb4c55a6 100644
--- a/test/CodeGen/PowerPC/shl_sext.ll
+++ b/test/CodeGen/PowerPC/shl_sext.ll
@@ -1,5 +1,5 @@
 ; This test should not contain a sign extend
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb 
+; RUN: llc < %s -march=ppc32 | not grep extsb 
 
 define i32 @test(i32 %mode.0.i.0) {
         %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
index 0e67f7703882..2679c8e6ae8e 100644
--- a/test/CodeGen/PowerPC/sign_ext_inreg1.ll
+++ b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep srwi
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
+; RUN: llc < %s -march=ppc32 | grep srwi
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
 
 define i32 @baz(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index e211e86875a2..31bcee6bc811 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {extsh\\|rlwinm}
+; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
 
 declare i16 @foo() signext 
 
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index 5c4a834be445..c49b25cc2303 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,6 +1,6 @@
 ; This cannot be a stfiwx
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
index c4afb63531b1..d1c3f5234a26 100644
--- a/test/CodeGen/PowerPC/stfiwx.ll
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1
 ; RUN: grep stfiwx %t1
 ; RUN: not grep r1 %t1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
-; RUN:   -o %t2 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
+; RUN:   -o %t2
 ; RUN: not grep stfiwx %t2
 ; RUN: grep r1 %t2
 
diff --git a/test/CodeGen/PowerPC/store-load-fwd.ll b/test/CodeGen/PowerPC/store-load-fwd.ll
index 5cc478448ff7..25663c1ac68e 100644
--- a/test/CodeGen/PowerPC/store-load-fwd.ll
+++ b/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep lwz
+; RUN: llc < %s -march=ppc32 | not grep lwz
 
 define i32 @test(i32* %P) {
         store i32 1, i32* %P
diff --git a/test/CodeGen/PowerPC/subc.ll b/test/CodeGen/PowerPC/subc.ll
index 4ac95961f079..5914dcad94bc 100644
--- a/test/CodeGen/PowerPC/subc.ll
+++ b/test/CodeGen/PowerPC/subc.ll
@@ -1,5 +1,5 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep subfc %t | count 1
 ; RUN: grep subfe %t | count 1
 ; RUN: grep subfze %t | count 1
diff --git a/test/CodeGen/PowerPC/tailcall1-64.ll b/test/CodeGen/PowerPC/tailcall1-64.ll
index f39b40bdab81..e9c83a548807 100644
--- a/test/CodeGen/PowerPC/tailcall1-64.ll
+++ b/test/CodeGen/PowerPC/tailcall1-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -tailcallopt | grep TC_RETURNd8
+; RUN: llc < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/PowerPC/tailcall1.ll b/test/CodeGen/PowerPC/tailcall1.ll
index 1fc4b94ddcf9..08f3392c9d77 100644
--- a/test/CodeGen/PowerPC/tailcall1.ll
+++ b/test/CodeGen/PowerPC/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -tailcallopt | grep TC_RETURN
+; RUN: llc < %s -march=ppc32 -tailcallopt | grep TC_RETURN
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/PowerPC/tailcallpic1.ll b/test/CodeGen/PowerPC/tailcallpic1.ll
index 678d366fb6a6..f3f5028cf4a9 100644
--- a/test/CodeGen/PowerPC/tailcallpic1.ll
+++ b/test/CodeGen/PowerPC/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
+; RUN: llc < %s  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
 
 
 
diff --git a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
new file mode 100644
index 000000000000..8a1288afa40c
--- /dev/null
+++ b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
@@ -0,0 +1,583 @@
+; RUN: llc < %s
+; PR4534
+
+; ModuleID = 'tango.net.ftp.FtpClient.bc'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6.0"
+	%"byte[]" = type { i32, i8* }
+@.str167 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+@.str170 = external constant [11 x i8]		; <[11 x i8]*> [#uses=2]
+@.str171 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%"byte[]")* @foo to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @foo(%"byte[]" %line_arg) {
+entry:
+	%line_arg830 = extractvalue %"byte[]" %line_arg, 0		; <i32> [#uses=12]
+	%line_arg831 = extractvalue %"byte[]" %line_arg, 1		; <i8*> [#uses=17]
+	%t5 = load i8* %line_arg831		; <i8> [#uses=1]
+	br label %forcondi
+
+forcondi:		; preds = %forbodyi, %entry
+	%l.0i = phi i32 [ 10, %entry ], [ %t4i, %forbodyi ]		; <i32> [#uses=2]
+	%p.0i = phi i8* [ getelementptr ([11 x i8]* @.str167, i32 0, i32 -1), %entry ], [ %t7i, %forbodyi ]		; <i8*> [#uses=1]
+	%t4i = add i32 %l.0i, -1		; <i32> [#uses=1]
+	%t5i = icmp eq i32 %l.0i, 0		; <i1> [#uses=1]
+	br i1 %t5i, label %forcond.i, label %forbodyi
+
+forbodyi:		; preds = %forcondi
+	%t7i = getelementptr i8* %p.0i, i32 1		; <i8*> [#uses=2]
+	%t8i = load i8* %t7i		; <i8> [#uses=1]
+	%t12i = icmp eq i8 %t8i, %t5		; <i1> [#uses=1]
+	br i1 %t12i, label %forcond.i, label %forcondi
+
+forcond.i:		; preds = %forbody.i, %forbodyi, %forcondi
+	%storemerge.i = phi i32 [ %t106.i, %forbody.i ], [ 1, %forcondi ], [ 1, %forbodyi ]		; <i32> [#uses=1]
+	%t77.i286 = phi i1 [ %phit3, %forbody.i ], [ false, %forcondi ], [ false, %forbodyi ]		; <i1> [#uses=1]
+	br i1 %t77.i286, label %forcond.i295, label %forbody.i
+
+forbody.i:		; preds = %forcond.i
+	%t106.i = add i32 %storemerge.i, 1		; <i32> [#uses=2]
+	%phit3 = icmp ugt i32 %t106.i, 3		; <i1> [#uses=1]
+	br label %forcond.i
+
+forcond.i295:		; preds = %forbody.i301, %forcond.i
+	%storemerge.i292 = phi i32 [ %t106.i325, %forbody.i301 ], [ 4, %forcond.i ]		; <i32> [#uses=1]
+	%t77.i293 = phi i1 [ %phit2, %forbody.i301 ], [ false, %forcond.i ]		; <i1> [#uses=1]
+	br i1 %t77.i293, label %forcond.i332, label %forbody.i301
+
+forbody.i301:		; preds = %forcond.i295
+	%t106.i325 = add i32 %storemerge.i292, 1		; <i32> [#uses=2]
+	%phit2 = icmp ugt i32 %t106.i325, 6		; <i1> [#uses=1]
+	br label %forcond.i295
+
+forcond.i332:		; preds = %forbody.i338, %forcond.i295
+	%storemerge.i329 = phi i32 [ %t106.i362, %forbody.i338 ], [ 7, %forcond.i295 ]		; <i32> [#uses=3]
+	%t77.i330 = phi i1 [ %phit1, %forbody.i338 ], [ false, %forcond.i295 ]		; <i1> [#uses=1]
+	br i1 %t77.i330, label %wcond.i370, label %forbody.i338
+
+forbody.i338:		; preds = %forcond.i332
+	%t106.i362 = add i32 %storemerge.i329, 1		; <i32> [#uses=2]
+	%phit1 = icmp ugt i32 %t106.i362, 9		; <i1> [#uses=1]
+	br label %forcond.i332
+
+wcond.i370:		; preds = %wbody.i372, %forcond.i332
+	%.frame.0.11 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=2]
+	%t3.i368 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=5]
+	%t4.i369 = icmp ult i32 %t3.i368, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i369, label %andand.i378, label %wcond22.i383
+
+wbody.i372:		; preds = %andand.i378
+	%t18.i371.c = add i32 %t3.i368, 1		; <i32> [#uses=2]
+	br label %wcond.i370
+
+andand.i378:		; preds = %wcond.i370
+	%t11.i375 = getelementptr i8* %line_arg831, i32 %t3.i368		; <i8*> [#uses=1]
+	%t12.i376 = load i8* %t11.i375		; <i8> [#uses=1]
+	%t14.i377 = icmp eq i8 %t12.i376, 32		; <i1> [#uses=1]
+	br i1 %t14.i377, label %wbody.i372, label %wcond22.i383
+
+wcond22.i383:		; preds = %wbody23.i385, %andand.i378, %wcond.i370
+	%.frame.0.10 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %.frame.0.11, %wcond.i370 ], [ %.frame.0.11, %andand.i378 ]		; <i32> [#uses=2]
+	%t49.i381 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %t3.i368, %wcond.i370 ], [ %t3.i368, %andand.i378 ]		; <i32> [#uses=5]
+	%t32.i382 = icmp ult i32 %t49.i381, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i382, label %andand33.i391, label %wcond54.i396
+
+wbody23.i385:		; preds = %andand33.i391
+	%t50.i384 = add i32 %t49.i381, 1		; <i32> [#uses=2]
+	br label %wcond22.i383
+
+andand33.i391:		; preds = %wcond22.i383
+	%t42.i388 = getelementptr i8* %line_arg831, i32 %t49.i381		; <i8*> [#uses=1]
+	%t43.i389 = load i8* %t42.i388		; <i8> [#uses=1]
+	%t45.i390 = icmp eq i8 %t43.i389, 32		; <i1> [#uses=1]
+	br i1 %t45.i390, label %wcond54.i396, label %wbody23.i385
+
+wcond54.i396:		; preds = %wbody55.i401, %andand33.i391, %wcond22.i383
+	%.frame.0.9 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %.frame.0.10, %wcond22.i383 ], [ %.frame.0.10, %andand33.i391 ]		; <i32> [#uses=2]
+	%t81.i394 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %t49.i381, %wcond22.i383 ], [ %t49.i381, %andand33.i391 ]		; <i32> [#uses=3]
+	%t64.i395 = icmp ult i32 %t81.i394, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i395, label %andand65.i407, label %wcond.i716
+
+wbody55.i401:		; preds = %andand65.i407
+	%t82.i400 = add i32 %t81.i394, 1		; <i32> [#uses=2]
+	br label %wcond54.i396
+
+andand65.i407:		; preds = %wcond54.i396
+	%t74.i404 = getelementptr i8* %line_arg831, i32 %t81.i394		; <i8*> [#uses=1]
+	%t75.i405 = load i8* %t74.i404		; <i8> [#uses=1]
+	%t77.i406 = icmp eq i8 %t75.i405, 32		; <i1> [#uses=1]
+	br i1 %t77.i406, label %wbody55.i401, label %wcond.i716
+
+wcond.i716:		; preds = %wbody.i717, %andand65.i407, %wcond54.i396
+	%.frame.0.0 = phi i32 [ %t18.i.c829, %wbody.i717 ], [ %.frame.0.9, %wcond54.i396 ], [ %.frame.0.9, %andand65.i407 ]		; <i32> [#uses=7]
+	%t4.i715 = icmp ult i32 %.frame.0.0, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i715, label %andand.i721, label %wcond22.i724
+
+wbody.i717:		; preds = %andand.i721
+	%t18.i.c829 = add i32 %.frame.0.0, 1		; <i32> [#uses=1]
+	br label %wcond.i716
+
+andand.i721:		; preds = %wcond.i716
+	%t11.i718 = getelementptr i8* %line_arg831, i32 %.frame.0.0		; <i8*> [#uses=1]
+	%t12.i719 = load i8* %t11.i718		; <i8> [#uses=1]
+	%t14.i720 = icmp eq i8 %t12.i719, 32		; <i1> [#uses=1]
+	br i1 %t14.i720, label %wbody.i717, label %wcond22.i724
+
+wcond22.i724:		; preds = %wbody23.i726, %andand.i721, %wcond.i716
+	%.frame.0.1 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=2]
+	%t49.i722 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=5]
+	%t32.i723 = icmp ult i32 %t49.i722, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i723, label %andand33.i731, label %wcond54.i734
+
+wbody23.i726:		; preds = %andand33.i731
+	%t50.i725 = add i32 %t49.i722, 1		; <i32> [#uses=2]
+	br label %wcond22.i724
+
+andand33.i731:		; preds = %wcond22.i724
+	%t42.i728 = getelementptr i8* %line_arg831, i32 %t49.i722		; <i8*> [#uses=1]
+	%t43.i729 = load i8* %t42.i728		; <i8> [#uses=1]
+	%t45.i730 = icmp eq i8 %t43.i729, 32		; <i1> [#uses=1]
+	br i1 %t45.i730, label %wcond54.i734, label %wbody23.i726
+
+wcond54.i734:		; preds = %wbody55.i736, %andand33.i731, %wcond22.i724
+	%.frame.0.2 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %.frame.0.1, %wcond22.i724 ], [ %.frame.0.1, %andand33.i731 ]		; <i32> [#uses=2]
+	%t81.i732 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %t49.i722, %wcond22.i724 ], [ %t49.i722, %andand33.i731 ]		; <i32> [#uses=3]
+	%t64.i733 = icmp ult i32 %t81.i732, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i733, label %andand65.i740, label %wcond.i750
+
+wbody55.i736:		; preds = %andand65.i740
+	%t82.i735 = add i32 %t81.i732, 1		; <i32> [#uses=2]
+	br label %wcond54.i734
+
+andand65.i740:		; preds = %wcond54.i734
+	%t74.i737 = getelementptr i8* %line_arg831, i32 %t81.i732		; <i8*> [#uses=1]
+	%t75.i738 = load i8* %t74.i737		; <i8> [#uses=1]
+	%t77.i739 = icmp eq i8 %t75.i738, 32		; <i1> [#uses=1]
+	br i1 %t77.i739, label %wbody55.i736, label %wcond.i750
+
+wcond.i750:		; preds = %wbody.i752, %andand65.i740, %wcond54.i734
+	%.frame.0.3 = phi i32 [ %t18.i751.c, %wbody.i752 ], [ %.frame.0.2, %wcond54.i734 ], [ %.frame.0.2, %andand65.i740 ]		; <i32> [#uses=11]
+	%t4.i749 = icmp ult i32 %.frame.0.3, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i749, label %andand.i758, label %wcond22.i761
+
+wbody.i752:		; preds = %andand.i758
+	%t18.i751.c = add i32 %.frame.0.3, 1		; <i32> [#uses=1]
+	br label %wcond.i750
+
+andand.i758:		; preds = %wcond.i750
+	%t11.i755 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=1]
+	%t12.i756 = load i8* %t11.i755		; <i8> [#uses=1]
+	%t14.i757 = icmp eq i8 %t12.i756, 32		; <i1> [#uses=1]
+	br i1 %t14.i757, label %wbody.i752, label %wcond22.i761
+
+wcond22.i761:		; preds = %wbody23.i763, %andand.i758, %wcond.i750
+	%.frame.0.4 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=2]
+	%t49.i759 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=7]
+	%t32.i760 = icmp ult i32 %t49.i759, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i760, label %andand33.i769, label %wcond54.i773
+
+wbody23.i763:		; preds = %andand33.i769
+	%t50.i762 = add i32 %t49.i759, 1		; <i32> [#uses=2]
+	br label %wcond22.i761
+
+andand33.i769:		; preds = %wcond22.i761
+	%t42.i766 = getelementptr i8* %line_arg831, i32 %t49.i759		; <i8*> [#uses=1]
+	%t43.i767 = load i8* %t42.i766		; <i8> [#uses=1]
+	%t45.i768 = icmp eq i8 %t43.i767, 32		; <i1> [#uses=1]
+	br i1 %t45.i768, label %wcond54.i773, label %wbody23.i763
+
+wcond54.i773:		; preds = %wbody55.i775, %andand33.i769, %wcond22.i761
+	%.frame.0.5 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %.frame.0.4, %wcond22.i761 ], [ %.frame.0.4, %andand33.i769 ]		; <i32> [#uses=1]
+	%t81.i770 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %t49.i759, %wcond22.i761 ], [ %t49.i759, %andand33.i769 ]		; <i32> [#uses=3]
+	%t64.i771 = icmp ult i32 %t81.i770, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i771, label %andand65.i780, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+
+wbody55.i775:		; preds = %andand65.i780
+	%t82.i774 = add i32 %t81.i770, 1		; <i32> [#uses=2]
+	br label %wcond54.i773
+
+andand65.i780:		; preds = %wcond54.i773
+	%t74.i777 = getelementptr i8* %line_arg831, i32 %t81.i770		; <i8*> [#uses=1]
+	%t75.i778 = load i8* %t74.i777		; <i8> [#uses=1]
+	%t77.i779 = icmp eq i8 %t75.i778, 32		; <i1> [#uses=1]
+	br i1 %t77.i779, label %wbody55.i775, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+
+Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786:		; preds = %andand65.i780, %wcond54.i773
+	%t89.i782 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=4]
+	%t90.i783 = sub i32 %t49.i759, %.frame.0.3		; <i32> [#uses=2]
+	br label %wcond.i792
+
+wcond.i792:		; preds = %wbody.i794, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+	%.frame.0.6 = phi i32 [ %.frame.0.5, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786 ], [ %t18.i793.c, %wbody.i794 ]		; <i32> [#uses=9]
+	%t4.i791 = icmp ult i32 %.frame.0.6, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i791, label %andand.i800, label %wcond22.i803
+
+wbody.i794:		; preds = %andand.i800
+	%t18.i793.c = add i32 %.frame.0.6, 1		; <i32> [#uses=1]
+	br label %wcond.i792
+
+andand.i800:		; preds = %wcond.i792
+	%t11.i797 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=1]
+	%t12.i798 = load i8* %t11.i797		; <i8> [#uses=1]
+	%t14.i799 = icmp eq i8 %t12.i798, 32		; <i1> [#uses=1]
+	br i1 %t14.i799, label %wbody.i794, label %wcond22.i803
+
+wcond22.i803:		; preds = %wbody23.i805, %andand.i800, %wcond.i792
+	%t49.i801 = phi i32 [ %t50.i804, %wbody23.i805 ], [ %.frame.0.6, %wcond.i792 ], [ %.frame.0.6, %andand.i800 ]		; <i32> [#uses=7]
+	%t32.i802 = icmp ult i32 %t49.i801, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i802, label %andand33.i811, label %wcond54.i815
+
+wbody23.i805:		; preds = %andand33.i811
+	%t50.i804 = add i32 %t49.i801, 1		; <i32> [#uses=1]
+	br label %wcond22.i803
+
+andand33.i811:		; preds = %wcond22.i803
+	%t42.i808 = getelementptr i8* %line_arg831, i32 %t49.i801		; <i8*> [#uses=1]
+	%t43.i809 = load i8* %t42.i808		; <i8> [#uses=1]
+	%t45.i810 = icmp eq i8 %t43.i809, 32		; <i1> [#uses=1]
+	br i1 %t45.i810, label %wcond54.i815, label %wbody23.i805
+
+wcond54.i815:		; preds = %wbody55.i817, %andand33.i811, %wcond22.i803
+	%t81.i812 = phi i32 [ %t82.i816, %wbody55.i817 ], [ %t49.i801, %wcond22.i803 ], [ %t49.i801, %andand33.i811 ]		; <i32> [#uses=3]
+	%t64.i813 = icmp ult i32 %t81.i812, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i813, label %andand65.i822, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+
+wbody55.i817:		; preds = %andand65.i822
+	%t82.i816 = add i32 %t81.i812, 1		; <i32> [#uses=1]
+	br label %wcond54.i815
+
+andand65.i822:		; preds = %wcond54.i815
+	%t74.i819 = getelementptr i8* %line_arg831, i32 %t81.i812		; <i8*> [#uses=1]
+	%t75.i820 = load i8* %t74.i819		; <i8> [#uses=1]
+	%t77.i821 = icmp eq i8 %t75.i820, 32		; <i1> [#uses=1]
+	br i1 %t77.i821, label %wbody55.i817, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+
+Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828:		; preds = %andand65.i822, %wcond54.i815
+	%t89.i824 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=4]
+	%t90.i825 = sub i32 %t49.i801, %.frame.0.6		; <i32> [#uses=2]
+	%t63 = load i8* %t89.i824		; <i8> [#uses=2]
+	br label %forcondi622
+
+forcondi622:		; preds = %forbodyi626, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+	%l.0i618 = phi i32 [ 10, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t4i620, %forbodyi626 ]		; <i32> [#uses=2]
+	%p.0i619 = phi i8* [ getelementptr ([11 x i8]* @.str170, i32 0, i32 -1), %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t7i623, %forbodyi626 ]		; <i8*> [#uses=1]
+	%t4i620 = add i32 %l.0i618, -1		; <i32> [#uses=1]
+	%t5i621 = icmp eq i32 %l.0i618, 0		; <i1> [#uses=1]
+	br i1 %t5i621, label %if65, label %forbodyi626
+
+forbodyi626:		; preds = %forcondi622
+	%t7i623 = getelementptr i8* %p.0i619, i32 1		; <i8*> [#uses=3]
+	%t8i624 = load i8* %t7i623		; <i8> [#uses=1]
+	%t12i625 = icmp eq i8 %t8i624, %t63		; <i1> [#uses=1]
+	br i1 %t12i625, label %ifi630, label %forcondi622
+
+ifi630:		; preds = %forbodyi626
+	%t15i627 = ptrtoint i8* %t7i623 to i32		; <i32> [#uses=1]
+	%t17i629 = sub i32 %t15i627, ptrtoint ([11 x i8]* @.str170 to i32)		; <i32> [#uses=1]
+	%phit636 = icmp eq i32 %t17i629, 10		; <i1> [#uses=1]
+	br i1 %phit636, label %if65, label %e67
+
+if65:		; preds = %ifi630, %forcondi622
+	%t4i532 = icmp eq i32 %t49.i759, %.frame.0.3		; <i1> [#uses=1]
+	br i1 %t4i532, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %forcondi539
+
+forcondi539:		; preds = %zi546, %if65
+	%sign.1.i533 = phi i1 [ %sign.0.i543, %zi546 ], [ false, %if65 ]		; <i1> [#uses=2]
+	%l.0i534 = phi i32 [ %t33i545, %zi546 ], [ %t90.i783, %if65 ]		; <i32> [#uses=3]
+	%p.0i535 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=6]
+	%c.0.ini536 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=1]
+	%c.0i537 = load i8* %c.0.ini536		; <i8> [#uses=2]
+	%t8i538 = icmp eq i32 %l.0i534, 0		; <i1> [#uses=1]
+	br i1 %t8i538, label %endfori550, label %forbodyi540
+
+forbodyi540:		; preds = %forcondi539
+	switch i8 %c.0i537, label %endfori550 [
+		i8 32, label %zi546
+		i8 9, label %zi546
+		i8 45, label %if20i541
+		i8 43, label %if26i542
+	]
+
+if20i541:		; preds = %forbodyi540
+	br label %zi546
+
+if26i542:		; preds = %forbodyi540
+	br label %zi546
+
+zi546:		; preds = %if26i542, %if20i541, %forbodyi540, %forbodyi540
+	%sign.0.i543 = phi i1 [ false, %if26i542 ], [ true, %if20i541 ], [ %sign.1.i533, %forbodyi540 ], [ %sign.1.i533, %forbodyi540 ]		; <i1> [#uses=1]
+	%t30i544 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
+	%t33i545 = add i32 %l.0i534, -1		; <i32> [#uses=1]
+	br label %forcondi539
+
+endfori550:		; preds = %forbodyi540, %forcondi539
+	%t37i547 = icmp eq i8 %c.0i537, 48		; <i1> [#uses=1]
+	%t39i548 = icmp sgt i32 %l.0i534, 1		; <i1> [#uses=1]
+	%or.condi549 = and i1 %t37i547, %t39i548		; <i1> [#uses=1]
+	br i1 %or.condi549, label %if40i554, label %endif41i564
+
+if40i554:		; preds = %endfori550
+	%t43i551 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
+	%t44i552 = load i8* %t43i551		; <i8> [#uses=1]
+	%t45i553 = zext i8 %t44i552 to i32		; <i32> [#uses=1]
+	switch i32 %t45i553, label %endif41i564 [
+		i32 120, label %case46i556
+		i32 88, label %case46i556
+		i32 98, label %case51i558
+		i32 66, label %case51i558
+		i32 111, label %case56i560
+		i32 79, label %case56i560
+	]
+
+case46i556:		; preds = %if40i554, %if40i554
+	%t48i555 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+case51i558:		; preds = %if40i554, %if40i554
+	%t53i557 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+case56i560:		; preds = %if40i554, %if40i554
+	%t58i559 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+endif41i564:		; preds = %case56i560, %case51i558, %case46i556, %if40i554, %endfori550
+	%r.0i561 = phi i32 [ 0, %if40i554 ], [ 8, %case56i560 ], [ 2, %case51i558 ], [ 16, %case46i556 ], [ 0, %endfori550 ]		; <i32> [#uses=2]
+	%p.2i562 = phi i8* [ %t43i551, %if40i554 ], [ %t58i559, %case56i560 ], [ %t53i557, %case51i558 ], [ %t48i555, %case46i556 ], [ %p.0i535, %endfori550 ]		; <i8*> [#uses=2]
+	%t63i563 = icmp eq i32 %r.0i561, 0		; <i1> [#uses=1]
+	br i1 %t63i563, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %if70i568
+
+if70i568:		; preds = %endif41i564
+	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
+
+Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576:		; preds = %if70i568, %endif41i564, %if65
+	%radix.0.i570 = phi i32 [ 0, %if65 ], [ %r.0i561, %if70i568 ], [ 10, %endif41i564 ]		; <i32> [#uses=2]
+	%p.1i571 = phi i8* [ %p.2i562, %if70i568 ], [ %t89.i782, %if65 ], [ %p.2i562, %endif41i564 ]		; <i8*> [#uses=1]
+	%t84i572 = ptrtoint i8* %p.1i571 to i32		; <i32> [#uses=1]
+	%t85i573 = ptrtoint i8* %t89.i782 to i32		; <i32> [#uses=1]
+	%t86i574 = sub i32 %t84i572, %t85i573		; <i32> [#uses=2]
+	%t6.i575 = sub i32 %t90.i783, %t86i574		; <i32> [#uses=1]
+	%t59i604 = zext i32 %radix.0.i570 to i64		; <i64> [#uses=1]
+	br label %fcondi581
+
+fcondi581:		; preds = %if55i610, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
+	%value.0i577 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t65i607, %if55i610 ]		; <i64> [#uses=1]
+	%fkey.0i579 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t70i609, %if55i610 ]		; <i32> [#uses=3]
+	%t3i580 = icmp ult i32 %fkey.0i579, %t6.i575		; <i1> [#uses=1]
+	br i1 %t3i580, label %fbodyi587, label %wcond.i422
+
+fbodyi587:		; preds = %fcondi581
+	%t5.s.i582 = add i32 %t86i574, %fkey.0i579		; <i32> [#uses=1]
+	%t89.i782.s = add i32 %.frame.0.3, %t5.s.i582		; <i32> [#uses=1]
+	%t5i583 = getelementptr i8* %line_arg831, i32 %t89.i782.s		; <i8*> [#uses=1]
+	%t6i584 = load i8* %t5i583		; <i8> [#uses=6]
+	%t6.off84i585 = add i8 %t6i584, -48		; <i8> [#uses=1]
+	%or.cond.i28.i586 = icmp ugt i8 %t6.off84i585, 9		; <i1> [#uses=1]
+	br i1 %or.cond.i28.i586, label %ei590, label %endifi603
+
+ei590:		; preds = %fbodyi587
+	%t6.off83i588 = add i8 %t6i584, -97		; <i8> [#uses=1]
+	%or.cond81i589 = icmp ugt i8 %t6.off83i588, 25		; <i1> [#uses=1]
+	br i1 %or.cond81i589, label %e24i595, label %if22i592
+
+if22i592:		; preds = %ei590
+	%t27i591 = add i8 %t6i584, -39		; <i8> [#uses=1]
+	br label %endifi603
+
+e24i595:		; preds = %ei590
+	%t6.offi593 = add i8 %t6i584, -65		; <i8> [#uses=1]
+	%or.cond82i594 = icmp ugt i8 %t6.offi593, 25		; <i1> [#uses=1]
+	br i1 %or.cond82i594, label %wcond.i422, label %if39i597
+
+if39i597:		; preds = %e24i595
+	%t44.i29.i596 = add i8 %t6i584, -7		; <i8> [#uses=1]
+	br label %endifi603
+
+endifi603:		; preds = %if39i597, %if22i592, %fbodyi587
+	%c.0.i30.i598 = phi i8 [ %t27i591, %if22i592 ], [ %t44.i29.i596, %if39i597 ], [ %t6i584, %fbodyi587 ]		; <i8> [#uses=1]
+	%t48.i31.i599 = zext i8 %c.0.i30.i598 to i32		; <i32> [#uses=1]
+	%t49i600 = add i32 %t48.i31.i599, 208		; <i32> [#uses=1]
+	%t52i601 = and i32 %t49i600, 255		; <i32> [#uses=2]
+	%t54i602 = icmp ult i32 %t52i601, %radix.0.i570		; <i1> [#uses=1]
+	br i1 %t54i602, label %if55i610, label %wcond.i422
+
+if55i610:		; preds = %endifi603
+	%t61i605 = mul i64 %value.0i577, %t59i604		; <i64> [#uses=1]
+	%t64i606 = zext i32 %t52i601 to i64		; <i64> [#uses=1]
+	%t65i607 = add i64 %t61i605, %t64i606		; <i64> [#uses=1]
+	%t70i609 = add i32 %fkey.0i579, 1		; <i32> [#uses=1]
+	br label %fcondi581
+
+e67:		; preds = %ifi630
+	%t4i447 = icmp eq i32 %t49.i801, %.frame.0.6		; <i1> [#uses=1]
+	br i1 %t4i447, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %forcondi454
+
+forcondi454:		; preds = %zi461, %e67
+	%c.0i452 = phi i8 [ %c.0i452.pre, %zi461 ], [ %t63, %e67 ]		; <i8> [#uses=2]
+	%sign.1.i448 = phi i1 [ %sign.0.i458, %zi461 ], [ false, %e67 ]		; <i1> [#uses=2]
+	%l.0i449 = phi i32 [ %t33i460, %zi461 ], [ %t90.i825, %e67 ]		; <i32> [#uses=3]
+	%p.0i450 = phi i8* [ %t30i459, %zi461 ], [ %t89.i824, %e67 ]		; <i8*> [#uses=5]
+	%t8i453 = icmp eq i32 %l.0i449, 0		; <i1> [#uses=1]
+	br i1 %t8i453, label %endfori465, label %forbodyi455
+
+forbodyi455:		; preds = %forcondi454
+	switch i8 %c.0i452, label %endfori465 [
+		i8 32, label %zi461
+		i8 9, label %zi461
+		i8 45, label %if20i456
+		i8 43, label %if26i457
+	]
+
+if20i456:		; preds = %forbodyi455
+	br label %zi461
+
+if26i457:		; preds = %forbodyi455
+	br label %zi461
+
+zi461:		; preds = %if26i457, %if20i456, %forbodyi455, %forbodyi455
+	%sign.0.i458 = phi i1 [ false, %if26i457 ], [ true, %if20i456 ], [ %sign.1.i448, %forbodyi455 ], [ %sign.1.i448, %forbodyi455 ]		; <i1> [#uses=1]
+	%t30i459 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
+	%t33i460 = add i32 %l.0i449, -1		; <i32> [#uses=1]
+	%c.0i452.pre = load i8* %t30i459		; <i8> [#uses=1]
+	br label %forcondi454
+
+endfori465:		; preds = %forbodyi455, %forcondi454
+	%t37i462 = icmp eq i8 %c.0i452, 48		; <i1> [#uses=1]
+	%t39i463 = icmp sgt i32 %l.0i449, 1		; <i1> [#uses=1]
+	%or.condi464 = and i1 %t37i462, %t39i463		; <i1> [#uses=1]
+	br i1 %or.condi464, label %if40i469, label %endif41i479
+
+if40i469:		; preds = %endfori465
+	%t43i466 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
+	%t44i467 = load i8* %t43i466		; <i8> [#uses=1]
+	%t45i468 = zext i8 %t44i467 to i32		; <i32> [#uses=1]
+	switch i32 %t45i468, label %endif41i479 [
+		i32 120, label %case46i471
+		i32 111, label %case56i475
+	]
+
+case46i471:		; preds = %if40i469
+	%t48i470 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
+	br label %endif41i479
+
+case56i475:		; preds = %if40i469
+	%t58i474 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
+	br label %endif41i479
+
+endif41i479:		; preds = %case56i475, %case46i471, %if40i469, %endfori465
+	%r.0i476 = phi i32 [ 0, %if40i469 ], [ 8, %case56i475 ], [ 16, %case46i471 ], [ 0, %endfori465 ]		; <i32> [#uses=2]
+	%p.2i477 = phi i8* [ %t43i466, %if40i469 ], [ %t58i474, %case56i475 ], [ %t48i470, %case46i471 ], [ %p.0i450, %endfori465 ]		; <i8*> [#uses=2]
+	%t63i478 = icmp eq i32 %r.0i476, 0		; <i1> [#uses=1]
+	br i1 %t63i478, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %if70i483
+
+if70i483:		; preds = %endif41i479
+	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
+
+Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491:		; preds = %if70i483, %endif41i479, %e67
+	%radix.0.i485 = phi i32 [ 0, %e67 ], [ %r.0i476, %if70i483 ], [ 10, %endif41i479 ]		; <i32> [#uses=2]
+	%p.1i486 = phi i8* [ %p.2i477, %if70i483 ], [ %t89.i824, %e67 ], [ %p.2i477, %endif41i479 ]		; <i8*> [#uses=1]
+	%t84i487 = ptrtoint i8* %p.1i486 to i32		; <i32> [#uses=1]
+	%t85i488 = ptrtoint i8* %t89.i824 to i32		; <i32> [#uses=1]
+	%t86i489 = sub i32 %t84i487, %t85i488		; <i32> [#uses=2]
+	%ttt = sub i32 %t90.i825, %t86i489		; <i32> [#uses=1]
+	%t59i519 = zext i32 %radix.0.i485 to i64		; <i64> [#uses=1]
+	br label %fcondi496
+
+fcondi496:		; preds = %if55i525, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
+	%value.0i492 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t65i522, %if55i525 ]		; <i64> [#uses=1]
+	%fkey.0i494 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t70i524, %if55i525 ]		; <i32> [#uses=3]
+	%t3i495 = icmp ult i32 %fkey.0i494, %ttt		; <i1> [#uses=1]
+	br i1 %t3i495, label %fbodyi502, label %wcond.i422
+
+fbodyi502:		; preds = %fcondi496
+	%t5.s.i497 = add i32 %t86i489, %fkey.0i494		; <i32> [#uses=1]
+	%t89.i824.s = add i32 %.frame.0.6, %t5.s.i497		; <i32> [#uses=1]
+	%t5i498 = getelementptr i8* %line_arg831, i32 %t89.i824.s		; <i8*> [#uses=1]
+	%t6i499 = load i8* %t5i498		; <i8> [#uses=6]
+	%t6.off84i500 = add i8 %t6i499, -48		; <i8> [#uses=1]
+	%or.cond.i28.i501 = icmp ugt i8 %t6.off84i500, 9		; <i1> [#uses=1]
+	br i1 %or.cond.i28.i501, label %ei505, label %endifi518
+
+ei505:		; preds = %fbodyi502
+	%t6.off83i503 = add i8 %t6i499, -97		; <i8> [#uses=1]
+	%or.cond81i504 = icmp ugt i8 %t6.off83i503, 25		; <i1> [#uses=1]
+	br i1 %or.cond81i504, label %e24i510, label %if22i507
+
+if22i507:		; preds = %ei505
+	%t27i506 = add i8 %t6i499, -39		; <i8> [#uses=1]
+	br label %endifi518
+
+e24i510:		; preds = %ei505
+	%t6.offi508 = add i8 %t6i499, -65		; <i8> [#uses=1]
+	%or.cond82i509 = icmp ugt i8 %t6.offi508, 25		; <i1> [#uses=1]
+	br i1 %or.cond82i509, label %wcond.i422, label %if39i512
+
+if39i512:		; preds = %e24i510
+	%t44.i29.i511 = add i8 %t6i499, -7		; <i8> [#uses=1]
+	br label %endifi518
+
+endifi518:		; preds = %if39i512, %if22i507, %fbodyi502
+	%c.0.i30.i513 = phi i8 [ %t27i506, %if22i507 ], [ %t44.i29.i511, %if39i512 ], [ %t6i499, %fbodyi502 ]		; <i8> [#uses=1]
+	%t48.i31.i514 = zext i8 %c.0.i30.i513 to i32		; <i32> [#uses=1]
+	%t49i515 = add i32 %t48.i31.i514, 208		; <i32> [#uses=1]
+	%t52i516 = and i32 %t49i515, 255		; <i32> [#uses=2]
+	%t54i517 = icmp ult i32 %t52i516, %radix.0.i485		; <i1> [#uses=1]
+	br i1 %t54i517, label %if55i525, label %wcond.i422
+
+if55i525:		; preds = %endifi518
+	%t61i520 = mul i64 %value.0i492, %t59i519		; <i64> [#uses=1]
+	%t64i521 = zext i32 %t52i516 to i64		; <i64> [#uses=1]
+	%t65i522 = add i64 %t61i520, %t64i521		; <i64> [#uses=1]
+	%t70i524 = add i32 %fkey.0i494, 1		; <i32> [#uses=1]
+	br label %fcondi496
+
+wcond.i422:		; preds = %e40.i, %endifi518, %e24i510, %fcondi496, %endifi603, %e24i595, %fcondi581
+	%sarg60.pn.i = phi i8* [ %p.0.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i8*> [#uses=3]
+	%start_arg.pn.i = phi i32 [ %t49.i443, %e40.i ], [ 0, %fcondi496 ], [ 0, %e24i510 ], [ 0, %endifi518 ], [ 0, %endifi603 ], [ 0, %e24i595 ], [ 0, %fcondi581 ]		; <i32> [#uses=3]
+	%extent.0.i = phi i32 [ %t51.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i32> [#uses=3]
+	%p.0.i = getelementptr i8* %sarg60.pn.i, i32 %start_arg.pn.i		; <i8*> [#uses=2]
+	%p.0.s63.i = add i32 %start_arg.pn.i, -1		; <i32> [#uses=1]
+	%t2i424 = getelementptr i8* %sarg60.pn.i, i32 %p.0.s63.i		; <i8*> [#uses=1]
+	br label %forcondi430
+
+forcondi430:		; preds = %forbodyi434, %wcond.i422
+	%l.0i426 = phi i32 [ %extent.0.i, %wcond.i422 ], [ %t4i428, %forbodyi434 ]		; <i32> [#uses=2]
+	%p.0i427 = phi i8* [ %t2i424, %wcond.i422 ], [ %t7i431, %forbodyi434 ]		; <i8*> [#uses=1]
+	%t4i428 = add i32 %l.0i426, -1		; <i32> [#uses=1]
+	%t5i429 = icmp eq i32 %l.0i426, 0		; <i1> [#uses=1]
+	br i1 %t5i429, label %e.i441, label %forbodyi434
+
+forbodyi434:		; preds = %forcondi430
+	%t7i431 = getelementptr i8* %p.0i427, i32 1		; <i8*> [#uses=3]
+	%t8i432 = load i8* %t7i431		; <i8> [#uses=1]
+	%t12i433 = icmp eq i8 %t8i432, 32		; <i1> [#uses=1]
+	br i1 %t12i433, label %ifi438, label %forcondi430
+
+ifi438:		; preds = %forbodyi434
+	%t15i435 = ptrtoint i8* %t7i431 to i32		; <i32> [#uses=1]
+	%t16i436 = ptrtoint i8* %p.0.i to i32		; <i32> [#uses=1]
+	%t17i437 = sub i32 %t15i435, %t16i436		; <i32> [#uses=1]
+	br label %e.i441
+
+e.i441:		; preds = %ifi438, %forcondi430
+	%t2561.i = phi i32 [ %t17i437, %ifi438 ], [ %extent.0.i, %forcondi430 ]		; <i32> [#uses=2]
+	%p.0.s.i = add i32 %start_arg.pn.i, %t2561.i		; <i32> [#uses=1]
+	%t32.s.i = add i32 %p.0.s.i, -1		; <i32> [#uses=1]
+	%t2i.i = getelementptr i8* %sarg60.pn.i, i32 %t32.s.i		; <i8*> [#uses=1]
+	br label %forbodyi.i
+
+forbodyi.i:		; preds = %forbodyi.i, %e.i441
+	%p.0i.i = phi i8* [ %t2i.i, %e.i441 ], [ %t7i.i, %forbodyi.i ]		; <i8*> [#uses=1]
+	%s2.0i.i = phi i8* [ getelementptr ([5 x i8]* @.str171, i32 0, i32 0), %e.i441 ], [ %t11i.i, %forbodyi.i ]		; <i8*> [#uses=2]
+	%t7i.i = getelementptr i8* %p.0i.i, i32 1		; <i8*> [#uses=2]
+	%t8i.i = load i8* %t7i.i		; <i8> [#uses=1]
+	%t11i.i = getelementptr i8* %s2.0i.i, i32 1		; <i8*> [#uses=1]
+	%t12i.i = load i8* %s2.0i.i		; <i8> [#uses=1]
+	%t14i.i = icmp eq i8 %t8i.i, %t12i.i		; <i1> [#uses=1]
+	br i1 %t14i.i, label %forbodyi.i, label %e40.i
+
+e40.i:		; preds = %forbodyi.i
+	%t49.i443 = add i32 %t2561.i, 1		; <i32> [#uses=2]
+	%t51.i = sub i32 %extent.0.i, %t49.i443		; <i32> [#uses=1]
+	br label %wcond.i422
+}
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index 530c7826ea83..bc05bb176352 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep {__trampoline_setup}
+; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
 
 module asm "\09.lazy_reference .objc_class_name_NSImageRep"
 module asm "\09.objc_class_name_NSBitmapImageRep=0"
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index d211b3b76f52..ef9791277dcd 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fmul | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 | grep fmul | count 2
+; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
 ; RUN:   grep fmul | count 1
 
 define double @foo(double %X) {
diff --git a/test/CodeGen/PowerPC/vcmp-fold.ll b/test/CodeGen/PowerPC/vcmp-fold.ll
index 815bb0aedff5..7a42c27d2b4a 100644
--- a/test/CodeGen/PowerPC/vcmp-fold.ll
+++ b/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -1,6 +1,6 @@
 ; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
 ; "vcmpbfp.".
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
 
 
 define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
diff --git a/test/CodeGen/PowerPC/vec_br_cmp.ll b/test/CodeGen/PowerPC/vec_br_cmp.ll
index 6d799676b77b..c34d850c0ac7 100644
--- a/test/CodeGen/PowerPC/vec_br_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vcmpeqfp. %t
 ; RUN: not grep mfcr %t
 
diff --git a/test/CodeGen/PowerPC/vec_call.ll b/test/CodeGen/PowerPC/vec_call.ll
index 8e7a08ebb7d7..4511315c3bfa 100644
--- a/test/CodeGen/PowerPC/vec_call.ll
+++ b/test/CodeGen/PowerPC/vec_call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
 	%C = add <4 x i32> %A, %B		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index c4b42b9e9b8a..32c6f4809cb4 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_fneg.ll b/test/CodeGen/PowerPC/vec_fneg.ll
index 9fdbffd33ed5..e01e65979f6f 100644
--- a/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/test/CodeGen/PowerPC/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsubfp
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
 
 define void @t(<4 x float>* %A) {
 	%tmp2 = load <4 x float>* %A
diff --git a/test/CodeGen/PowerPC/vec_insert.ll b/test/CodeGen/PowerPC/vec_insert.ll
index 04bbe6574f62..185454cbd31d 100644
--- a/test/CodeGen/PowerPC/vec_insert.ll
+++ b/test/CodeGen/PowerPC/vec_insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep sth
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep sth
 
 define <8 x i16> @insert(<8 x i16> %foo, i16 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index 15376caebefa..d7ed64a5b1cf 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index b061fa9a54ee..80f4de4a1728 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
index 5bb1b6083417..2c3594d224fe 100644
--- a/test/CodeGen/PowerPC/vec_perf_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
 	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_shift.ll b/test/CodeGen/PowerPC/vec_shift.ll
index 0cc699cee42c..646fb5f3866c 100644
--- a/test/CodeGen/PowerPC/vec_shift.ll
+++ b/test/CodeGen/PowerPC/vec_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -march=ppc32 -mcpu=g5
+; RUN: llc < %s  -march=ppc32 -mcpu=g5
 ; PR3628
 
 define void @update(<4 x i32> %val, <4 x i32>* %dst) nounwind {
diff --git a/test/CodeGen/PowerPC/vec_shuffle.ll b/test/CodeGen/PowerPC/vec_shuffle.ll
index 1289dca2d211..82706321c1c1 100644
--- a/test/CodeGen/PowerPC/vec_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -instcombine | \
+; RUN: opt < %s -instcombine | \
 ; RUN:   llc -march=ppc32 -mcpu=g5 | not grep vperm
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
 ; RUN: grep vsldoi  %t | count 2
 ; RUN: grep vmrgh   %t | count 7
 ; RUN: grep vmrgl   %t | count 6
diff --git a/test/CodeGen/PowerPC/vec_splat.ll b/test/CodeGen/PowerPC/vec_splat.ll
index 7b7e4fe33477..61237284d36c 100644
--- a/test/CodeGen/PowerPC/vec_splat.ll
+++ b/test/CodeGen/PowerPC/vec_splat.ll
@@ -1,7 +1,7 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g3 | \
 ; RUN:    grep stfs | count 4
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vspltw %t | count 2
 ; RUN: grep vsplti %t | count 3
 ; RUN: grep vsplth %t | count 1
diff --git a/test/CodeGen/PowerPC/vec_vrsave.ll b/test/CodeGen/PowerPC/vec_vrsave.ll
index 06769f6bf0f8..2a03d5819b83 100644
--- a/test/CodeGen/PowerPC/vec_vrsave.ll
+++ b/test/CodeGen/PowerPC/vec_vrsave.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vrlw %t
 ; RUN: not grep spr %t
 ; RUN: not grep vrsave %t
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
index 7350e91b7741..f862b2cb4c4b 100644
--- a/test/CodeGen/PowerPC/vec_zero.ll
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vxor
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
index aefd2661a8bc..dfa2e35435a8 100644
--- a/test/CodeGen/PowerPC/vector-identity-shuffle.ll
+++ b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep test:
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep test:
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define void @test(<4 x float>* %tmp2.i) {
         %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index a6c17b4bccf6..ee4da315f927 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g3 > %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
index 76f140ceaf85..082f9f40f289 100644
--- a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
+++ b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 define void @execute_list() {
         %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]
diff --git a/test/CodeGen/SPARC/2007-05-09-JumpTables.ll b/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
index a014acefa904..41ad3b27b5a7 100644
--- a/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
+++ b/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 ; We cannot emit jump tables on Sparc, but we should correctly handle this case.
 
diff --git a/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll b/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
index d1ca44dbb112..77c20028824f 100644
--- a/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
+++ b/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR1540
 
 declare float @sinf(float)
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index f9f4c21fe1ea..e8315f17ebb6 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
index aaa7bde68383..c12e9c13409b 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/2009-08-28-PIC.ll b/test/CodeGen/SPARC/2009-08-28-PIC.ll
new file mode 100644
index 000000000000..a2ba0d02d45c
--- /dev/null
+++ b/test/CodeGen/SPARC/2009-08-28-PIC.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc --relocation-model=pic < %s | grep _GLOBAL_OFFSET_TABLE_
+
+@foo = global i32 0                               ; <i32*> [#uses=1]
+
+define i32 @func() nounwind readonly {
+entry:
+  %0 = load i32* @foo, align 4                    ; <i32> [#uses=1]
+  ret i32 %0
+}
diff --git a/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll b/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
new file mode 100644
index 000000000000..0167d3237aad
--- /dev/null
+++ b/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
@@ -0,0 +1,6 @@
+; RUN: llc -march=sparc < %s | grep weak
+
+define weak i32 @func() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 5c3e07543b9d..9c2c16a6947c 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 define i32 @test(i32 %X) {
 	%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/SPARC/ctpop.ll b/test/CodeGen/SPARC/ctpop.ll
index d603baa465de..37d1c5a5706d 100644
--- a/test/CodeGen/SPARC/ctpop.ll
+++ b/test/CodeGen/SPARC/ctpop.ll
@@ -1,9 +1,7 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=sparc -mattr=v9 -enable-sparc-v9-insts
-; RUN: llvm-as < %s | llc -march=sparc -mattr=-v9 | \
+; RUN: llc < %s -march=sparc -mattr=v9 -enable-sparc-v9-insts
+; RUN: llc < %s -march=sparc -mattr=-v9 | \
 ; RUN:   not grep popc
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=sparc -mattr=v9 -enable-sparc-v9-insts | grep popc
+; RUN: llc < %s -march=sparc -mattr=v9 -enable-sparc-v9-insts | grep popc
 
 declare i32 @llvm.ctpop.i32(i32)
 
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index a9850b7def46..8fa3e7e52d8d 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc  -march=sparc > %t
+; RUN: llc < %s  -march=sparc > %t
 ; RUN: grep .foo: %t
 ; RUN: grep call.*\.foo %t
 ; RUN: grep .baz: %t
diff --git a/test/CodeGen/SPARC/xnor.ll b/test/CodeGen/SPARC/xnor.ll
index 9d8994c00613..6ff66bd6fcc6 100644
--- a/test/CodeGen/SPARC/xnor.ll
+++ b/test/CodeGen/SPARC/xnor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc | \
+; RUN: llc < %s -march=sparc | \
 ; RUN:   grep xnor | count 2
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
new file mode 100644
index 000000000000..de23795ab08a
--- /dev/null
+++ b/test/CodeGen/SystemZ/00-RetVoid.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define void @foo() {
+entry:
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
new file mode 100644
index 000000000000..9ab2097a0c89
--- /dev/null
+++ b/test/CodeGen/SystemZ/01-RetArg.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    ret i64 %b
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/01-RetImm.ll b/test/CodeGen/SystemZ/01-RetImm.ll
new file mode 100644
index 000000000000..8b99e68dc7e1
--- /dev/null
+++ b/test/CodeGen/SystemZ/01-RetImm.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 1
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 1
+; RUN: llc < %s -march=systemz | grep llihf | count 1
+
+
+define i64 @foo1() {
+entry:
+    ret i64 1
+}
+
+define i64 @foo2() {
+entry:
+    ret i64 65535 
+}
+
+define i64 @foo3() {
+entry:
+    ret i64 131072
+}
+
+define i64 @foo4() {
+entry:
+    ret i64 8589934592
+}
+
+define i64 @foo5() {
+entry:
+    ret i64 562949953421312
+}
+
+define i64 @foo6() {
+entry:
+    ret i64 65537
+}
+
+define i64 @foo7() {
+entry:
+    ret i64 4294967295
+}
+
+define i64 @foo8() {
+entry:
+    ret i64 281483566645248
+}
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
new file mode 100644
index 000000000000..04022a063f16
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-MemArith.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=systemz | FileCheck %s
+
+define i32 @foo1(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo1:
+; CHECK:  a %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo2(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo2:
+; CHECK:  ay %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo3(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo3:
+; CHECK:  ag %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = add i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo4(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo4:
+; CHECK:  n %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo5(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo5:
+; CHECK:  ny %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo6(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo6:
+; CHECK:  ng %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = and i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo7(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo7:
+; CHECK:  o %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo8(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo8:
+; CHECK:  oy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo9(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo9:
+; CHECK:  og %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = or i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo10(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo10:
+; CHECK:  x %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo11(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo11:
+; CHECK:  xy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo12(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo12:
+; CHECK:  xg %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = xor i64 %a, %c
+    ret i64 %d
+}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
new file mode 100644
index 000000000000..9ff9b6ac3833
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAdd.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
new file mode 100644
index 000000000000..6d73e4d42ab2
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAddImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
new file mode 100644
index 000000000000..1492f9dbee75
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAnd.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAndImm.ll b/test/CodeGen/SystemZ/02-RetAndImm.ll
new file mode 100644
index 000000000000..53c5e54528bd
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAndImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep ngr   | count 4
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
new file mode 100644
index 000000000000..7f3380dc16ac
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetNeg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz | grep lcgr | count 1
+
+define i64 @foo(i64 %a) {
+entry:
+    %c = sub i64 0, %a
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
new file mode 100644
index 000000000000..1e8134d2ddcc
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetOr.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetOrImm.ll b/test/CodeGen/SystemZ/02-RetOrImm.ll
new file mode 100644
index 000000000000..68cd24d07f44
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetOrImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep oill | count 1
+; RUN: llc < %s -march=systemz | grep oilh | count 1
+; RUN: llc < %s -march=systemz | grep oihl | count 1
+; RUN: llc < %s -march=systemz | grep oihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
new file mode 100644
index 000000000000..1c4514f36c93
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetSub.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
new file mode 100644
index 000000000000..4f91cb073997
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetSubImm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
new file mode 100644
index 000000000000..a9439bf452a7
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetXor.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
new file mode 100644
index 000000000000..ea4b8290df3b
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetXorImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
new file mode 100644
index 000000000000..0a812715ae57
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
new file mode 100644
index 000000000000..2787083f162b
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep ar    | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
new file mode 100644
index 000000000000..32673dd014c5
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=systemz | grep ngr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
new file mode 100644
index 000000000000..ed5e5269525b
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=systemz | grep ngr | count 3
+; RUN: llc < %s -march=systemz | grep nihf | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
new file mode 100644
index 000000000000..0c9bb14eef3e
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=systemz | grep lgr   | count 2
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    ret i32 %b
+}
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
new file mode 100644
index 000000000000..343e30b72138
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 2
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 2
+
+
+define i32 @foo1() {
+entry:
+    ret i32 1
+}
+
+define i32 @foo2() {
+entry:
+    ret i32 65535 
+}
+
+define i32 @foo3() {
+entry:
+    ret i32 131072
+}
+
+define i32 @foo4() {
+entry:
+    ret i32 65537
+}
+
+define i32 @foo5() {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo6() zeroext {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo7() signext {
+entry:
+    ret i32 4294967295
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
new file mode 100644
index 000000000000..87ebcc1f0a4f
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=systemz | grep lcr | count 1
+
+define i32 @foo(i32 %a) {
+entry:
+    %c = sub i32 0, %a
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
new file mode 100644
index 000000000000..6d118b5e3d40
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=systemz | grep oill  | count 3
+; RUN: llc < %s -march=systemz | grep oilh  | count 3
+; RUN: llc < %s -march=systemz | grep oilf  | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
new file mode 100644
index 000000000000..4d7661acb716
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep ogr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
new file mode 100644
index 000000000000..11ca796c7b17
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
new file mode 100644
index 000000000000..b3e1ac26e08c
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep sr    | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
new file mode 100644
index 000000000000..0033126369e6
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=systemz | grep xilf  | count 9
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
new file mode 100644
index 000000000000..a9af23197ef8
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep xgr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/04-RetShifts.ll b/test/CodeGen/SystemZ/04-RetShifts.ll
new file mode 100644
index 000000000000..cccdc4737f76
--- /dev/null
+++ b/test/CodeGen/SystemZ/04-RetShifts.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=systemz | grep sra   | count 6
+; RUN: llc < %s -march=systemz | grep srag  | count 3
+; RUN: llc < %s -march=systemz | grep srl   | count 6
+; RUN: llc < %s -march=systemz | grep srlg  | count 3
+; RUN: llc < %s -march=systemz | grep sll   | count 6
+; RUN: llc < %s -march=systemz | grep sllg  | count 3
+
+define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = shl i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = shl i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, %idx
+        ret i64 %shr
+}
+
diff --git a/test/CodeGen/SystemZ/05-LoadAddr.ll b/test/CodeGen/SystemZ/05-LoadAddr.ll
new file mode 100644
index 000000000000..cf0264283939
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-LoadAddr.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep lay | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/test/CodeGen/SystemZ/05-MemImmStores.ll b/test/CodeGen/SystemZ/05-MemImmStores.ll
new file mode 100644
index 000000000000..3cf21ccd931a
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemImmStores.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
+; RUN: llc < %s | grep mvi   | count 2
+; RUN: llc < %s | grep mviy  | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
+	store i64 1, i64* %add.ptr
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
+	store i32 2, i32* %add.ptr
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
+	store i16 3, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
+	store i8 4, i8* %add.ptr
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
+        store i8 4, i8* %add.ptr
+        ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
+        store i16 3, i16* %add.ptr
+        ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
new file mode 100644
index 000000000000..cf12063e5d4c
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s | grep ly     | count 2
+; RUN: llc < %s | grep sty    | count 2
+; RUN: llc < %s | grep {l	%}  | count 2
+; RUN: llc < %s | grep {st	%} | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i32* %foo		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %bar
+	ret void
+}
+
+define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
+
+define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr
+	ret void
+}
+
+define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
new file mode 100644
index 000000000000..1e6232a62550
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s | grep {sthy.%} | count 2
+; RUN: llc < %s | grep {lhy.%}  | count 2
+; RUN: llc < %s | grep {lh.%}   | count 6
+; RUN: llc < %s | grep {sth.%}  | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	store i16 %tmp1, i16* %bar
+	ret void
+}
+
+define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %bar
+	ret void
+}
+
+define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
+
+define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr
+	ret void
+}
+
+define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemRegLoads.ll b/test/CodeGen/SystemZ/05-MemRegLoads.ll
new file mode 100644
index 000000000000..f690a4889962
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemRegLoads.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=systemz | not grep aghi
+; RUN: llc < %s -march=systemz | grep llgf | count 1
+; RUN: llc < %s -march=systemz | grep llgh | count 1
+; RUN: llc < %s -march=systemz | grep llgc | count 1
+; RUN: llc < %s -march=systemz | grep lgf  | count 2
+; RUN: llc < %s -march=systemz | grep lgh  | count 2
+; RUN: llc < %s -march=systemz | grep lgb  | count 1
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
+
+define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
diff --git a/test/CodeGen/SystemZ/05-MemRegStores.ll b/test/CodeGen/SystemZ/05-MemRegStores.ll
new file mode 100644
index 000000000000..b851c3fa6e00
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemRegStores.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s | not grep aghi
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+
+; CHECK: foo1:
+; CHECK:   stg %r4, 8(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	store i64 %val, i64* %add.ptr2
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
+entry:
+; CHECK: foo2:
+; CHECK:   st %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	store i32 %val, i32* %add.ptr2
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
+entry:
+; CHECK: foo3:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	store i16 %val, i16* %add.ptr2
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
+entry:
+; CHECK: foo4:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	store i8 %val, i8* %add.ptr2
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo5:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
+	store i8 %conv, i8* %add.ptr2
+	ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo6:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
+	store i16 %conv, i16* %add.ptr2
+	ret void
+}
+
+define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo7:
+; CHECK: st      %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %add.ptr2
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/06-CallViaStack.ll b/test/CodeGen/SystemZ/06-CallViaStack.ll
new file mode 100644
index 000000000000..e904f49de15f
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-CallViaStack.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | grep 168 | count 1
+; RUN: llc < %s | grep 160 | count 3
+; RUN: llc < %s | grep 328 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
+entry:
+	%a = alloca i64, align 8		; <i64*> [#uses=3]
+	store i64 %g, i64* %a
+	call void @bar(i64* %a) nounwind
+	%tmp1 = load i64* %a		; <i64> [#uses=1]
+	ret i64 %tmp1
+}
+
+declare void @bar(i64*)
diff --git a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
new file mode 100644
index 000000000000..c71da9b4418c
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+        ret i64 %f
+}
+
+define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
diff --git a/test/CodeGen/SystemZ/06-LocalFrame.ll b/test/CodeGen/SystemZ/06-LocalFrame.ll
new file mode 100644
index 000000000000..d89b0dfc76c9
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-LocalFrame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 328 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
+entry:
+	%g = alloca i64, align 8		; <i64*> [#uses=1]
+	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
+	ret i64* %add.ptr
+}
diff --git a/test/CodeGen/SystemZ/06-SimpleCall.ll b/test/CodeGen/SystemZ/06-SimpleCall.ll
new file mode 100644
index 000000000000..fd4b5029c731
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-SimpleCall.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+	tail call void @bar() nounwind
+	ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/SystemZ/07-BrCond.ll b/test/CodeGen/SystemZ/07-BrCond.ll
new file mode 100644
index 000000000000..859971713aa3
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrCond.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-BrCond32.ll b/test/CodeGen/SystemZ/07-BrCond32.ll
new file mode 100644
index 000000000000..8ece5ac09840
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrCond32.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
new file mode 100644
index 000000000000..e0bc302c7314
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrUnCond.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-linux"
+
+define void @foo() noreturn nounwind {
+entry:
+	tail call void @baz() nounwind
+	br label %l1
+
+l1:		; preds = %entry, %l1
+	tail call void @bar() nounwind
+	br label %l1
+}
+
+declare void @bar()
+
+declare void @baz()
diff --git a/test/CodeGen/SystemZ/07-CmpImm.ll b/test/CodeGen/SystemZ/07-CmpImm.ll
new file mode 100644
index 000000000000..4d0ebda0c035
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-CmpImm.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s | grep cgfi | count 8
+; RUN: llc < %s | grep clgfi | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-CmpImm32.ll b/test/CodeGen/SystemZ/07-CmpImm32.ll
new file mode 100644
index 000000000000..add34faafd3f
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-CmpImm32.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s | grep jl  | count 3
+; RUN: llc < %s | grep jh  | count 3
+; RUN: llc < %s | grep je  | count 2
+; RUN: llc < %s | grep jne | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-SelectCC.ll b/test/CodeGen/SystemZ/07-SelectCC.ll
new file mode 100644
index 000000000000..aa4b36e7d5d4
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-SelectCC.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep clgr
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
+	ret i64 %cond
+}
diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll
new file mode 100644
index 000000000000..ff1e441882a0
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-DivRem.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s | grep dsgr  | count 2
+; RUN: llc < %s | grep dsgfr | count 2
+; RUN: llc < %s | grep dlr   | count 2
+; RUN: llc < %s | grep dlgr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @div2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = udiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = udiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @rem(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = srem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = srem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
+
+define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = urem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = urem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
new file mode 100644
index 000000000000..d6ec0e7440ac
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s | grep {dsgf.%} | count 2
+; RUN: llc < %s | grep {dsg.%}  | count 2
+; RUN: llc < %s | grep {dl.%}   | count 2
+; RUN: llc < %s | grep dlg      | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64* %b) nounwind readnone {
+entry:
+	%b1 = load i64* %b
+	%div = sdiv i64 %a, %b1
+	ret i64 %div
+}
+
+define i64 @div1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = udiv i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = srem i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = urem i64 %a, %b1
+        ret i64 %div
+}
+
+define i32 @div2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = sdiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @div3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = udiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = srem i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = urem i32 %a, %b1
+        ret i32 %div
+}
+
diff --git a/test/CodeGen/SystemZ/08-SimpleMuls.ll b/test/CodeGen/SystemZ/08-SimpleMuls.ll
new file mode 100644
index 000000000000..1ab88d6ee7dd
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-SimpleMuls.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | grep msgr | count 2
+; RUN: llc < %s | grep msr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
+define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
new file mode 100644
index 000000000000..27189ab41567
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo(i64 %N) nounwind {
+entry:
+	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
+	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
+	call void @bar(i8* %vla) nounwind
+	ret void
+}
+
+declare void @bar(i8*)
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
new file mode 100644
index 000000000000..6e0c1ab2c165
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-Globals.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | grep larl | count 3
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-linux"
+@bar = common global i64 0, align 8		; <i64*> [#uses=3]
+
+define i64 @foo() nounwind readonly {
+entry:
+	%tmp = load i64* @bar		; <i64> [#uses=1]
+	ret i64 %tmp
+}
+
+define i64* @foo2() nounwind readnone {
+entry:
+	ret i64* @bar
+}
+
+define i64* @foo3(i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/test/CodeGen/SystemZ/09-Switches.ll b/test/CodeGen/SystemZ/09-Switches.ll
new file mode 100644
index 000000000000..32aaa62a58cf
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-Switches.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=systemz | grep larl
+
+define i32 @main(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+		 i32 -2147483648, label %bb338
+		 i32 -2147483647, label %bb338
+		 i32 -2147483646, label %bb338
+		 i32 120, label %bb338
+		 i32 121, label %bb339
+		 i32 122, label %bb340
+                 i32 123, label %bb341
+                 i32 124, label %bb342
+                 i32 125, label %bb343
+                 i32 126, label %bb336
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+bb339:
+  ret i32 12
+bb340:
+  ret i32 13
+bb341:
+  ret i32 14
+bb342:
+  ret i32 15
+bb343:
+  ret i32 18
+
+}
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
new file mode 100644
index 000000000000..cc325389d787
--- /dev/null
+++ b/test/CodeGen/SystemZ/10-FuncsPic.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
+; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+@ptr = external global void (...)*		; <void (...)**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	store void (...)* @func, void (...)** @ptr
+	ret void
+}
+
+declare void @func(...)
+
+define void @foo2() nounwind {
+entry:
+	tail call void (...)* @func() nounwind
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
+	tail call void (...)* %tmp() nounwind
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
new file mode 100644
index 000000000000..a77671e2ba7b
--- /dev/null
+++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+@src = external global i32		; <i32*> [#uses=2]
+@dst = external global i32		; <i32*> [#uses=2]
+@ptr = external global i32*		; <i32**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	store i32 %tmp, i32* @dst
+	ret void
+}
+
+define void @foo2() nounwind {
+entry:
+	store i32* @dst, i32** @ptr
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
+	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
+	store i32 %tmp, i32* %arrayidx
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
new file mode 100644
index 000000000000..609d9dcf59c5
--- /dev/null
+++ b/test/CodeGen/SystemZ/11-BSwap.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+
+define i16 @foo(i16 zeroext %a) zeroext {
+	%res = tail call i16 @llvm.bswap.i16(i16 %a)
+	ret i16 %res
+}
+
+define i32 @foo2(i32 zeroext %a) zeroext {
+; CHECK: foo2:
+; CHECK:  lrvr %r1, %r2
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo3(i64 %a) zeroext {
+; CHECK: foo3:
+; CHECK:  lrvgr %r2, %r2
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define i16 @foo4(i16* %b) zeroext {
+	%a = load i16* %b
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        ret i16 %res
+}
+
+define i32 @foo5(i32* %b) zeroext {
+; CHECK: foo5:
+; CHECK:  lrv %r1, 0(%r2)
+	%a = load i32* %b
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo6(i64* %b) {
+; CHECK: foo6:
+; CHECK:  lrvg %r2, 0(%r2)
+	%a = load i64* %b
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define void @foo7(i16 %a, i16* %b) {
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        store i16 %res, i16* %b
+        ret void
+}
+
+define void @foo8(i32 %a, i32* %b) {
+; CHECK: foo8:
+; CHECK:  strv %r2, 0(%r3)
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        store i32 %res, i32* %b
+        ret void
+}
+
+define void @foo9(i64 %a, i64* %b) {
+; CHECK: foo9:
+; CHECK:  strvg %r2, 0(%r3)
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        store i64 %res, i64* %b
+        ret void
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
new file mode 100644
index 000000000000..65f8e14a9ee1
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i32 @main() nounwind {
+entry:
+	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @random(...)
diff --git a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
new file mode 100644
index 000000000000..3cfa97dfc2ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=systemz | grep nilf | count 1
+; RUN: llc < %s -march=systemz | grep nill | count 1
+
+define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
+entry:
+        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
+        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
+        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
+        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
+        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
+        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
+        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
+        ret i32 %conv5
+}
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
new file mode 100644
index 000000000000..3317864c0147
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=systemz | grep rll
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%shl = shl i32 %x, 0		; <i32> [#uses=1]
+	%sub = sub i32 32, 0		; <i32> [#uses=1]
+	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
+	%or = or i32 %shr, %shl		; <i32> [#uses=1]
+	ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
new file mode 100644
index 000000000000..5f6ec50df6c4
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
+	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
+
+if.then20.i:		; preds = %entry
+	ret i32 -2
+
+byte_re_search_2.exit:		; preds = %entry
+	ret i32 -1
+}
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
new file mode 100644
index 000000000000..99d0ee7b03d9
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
+entry:
+	br label %for.body38
+
+for.body38:		; preds = %for.body38, %entry
+	br i1 undef, label %for.cond220, label %for.body38
+
+for.cond220:		; preds = %for.cond220, %for.body38
+	br i1 false, label %for.cond220, label %for.end297
+
+for.end297:		; preds = %for.cond220
+	%tmp334 = load i8* undef		; <i8> [#uses=1]
+	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
+	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
+	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
+	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
+	br label %for.body356
+
+for.body356:		; preds = %for.body356, %for.end297
+	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
+	br label %for.body356
+}
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
new file mode 100644
index 000000000000..a35167fba04f
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
+entry:
+	br i1 undef, label %for.body, label %return
+
+for.body:		; preds = %entry
+	%add = add i32 0, %col		; <i32> [#uses=1]
+	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
+	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
+	br i1 undef, label %if.then13, label %if.else
+
+if.then13:		; preds = %for.body
+	ret i32 0
+
+if.else:		; preds = %for.body
+	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
+	ret i32 0
+
+return:		; preds = %entry
+	ret i32 1
+}
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
new file mode 100644
index 000000000000..6a76a8e0cb1f
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 168
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
+
+declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
+
+define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
+newFuncRoot:
+	br label %bb3
+
+bb4.exitStub:		; preds = %bb3
+	store double %call, double* %call.out
+	ret void
+
+bb3:		; preds = %newFuncRoot
+	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
+	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
+	br label %bb4.exitStub
+}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
new file mode 100644
index 000000000000..564d3438310a
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define float @foo(i32 signext %a) {
+entry:
+    %b = bitcast i32 %a to float
+    ret float %b
+}
+
+define i32 @bar(float %a) {
+entry:
+    %b = bitcast float %a to i32
+    ret i32 %b
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
new file mode 100644
index 000000000000..a91e29ea4f9d
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define signext i32 @dfg_parse() nounwind {
+entry:
+	br i1 undef, label %if.then2208, label %if.else2360
+
+if.then2208:		; preds = %entry
+	br i1 undef, label %bb.nph3189, label %for.end2270
+
+bb.nph3189:		; preds = %if.then2208
+	unreachable
+
+for.end2270:		; preds = %if.then2208
+	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
+	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
+	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
+	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
+	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
+	store i8 117, i8* %yyd.0.i2561.13
+	br label %while.cond.i2558
+
+while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
+	br label %while.cond.i2558
+
+if.else2360:		; preds = %entry
+	unreachable
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
new file mode 100644
index 000000000000..f7686f14da9e
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
+
+define internal void @frame_dummy() nounwind {
+entry:
+  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
+  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
new file mode 100644
index 000000000000..fde7d9d281c2
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
+  ret double %0
+}
+
+define float @bar(float %a, float %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
+  ret float %0
+}
+
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/SystemZ/dg.exp b/test/CodeGen/SystemZ/dg.exp
new file mode 100644
index 000000000000..e9624bac68e9
--- /dev/null
+++ b/test/CodeGen/SystemZ/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target SystemZ] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 19c156d47f43..1e61b235a2bb 100644
--- a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin
+; RUN: llc < %s -mtriple=thumb-apple-darwin
 
 %struct.rtx_def = type { i8 }
 @str = external global [7 x i8]
diff --git a/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
index ee52cf0f4e7b..be2b839c21d7 100644
--- a/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
+++ b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin
+; RUN: llc < %s -mtriple=thumb-apple-darwin
 
 	%struct.color_sample = type { i32 }
 	%struct.ref = type { %struct.color_sample, i16, i16 }
diff --git a/test/CodeGen/Thumb/2007-03-06-AddR7.ll b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
index ad3e195a0dd7..8d139e92bd3b 100644
--- a/test/CodeGen/Thumb/2007-03-06-AddR7.ll
+++ b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6,+vfp2 | not grep {add r., r7, #2 \\* 4}
 
 	%struct.__fooAllocator = type opaque
diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 159be4eca334..2074bfd5d7b9 100644
--- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep r11
+; RUN: llc < %s | not grep r11
 
 target triple = "thumb-linux-gnueabi"
 	%struct.__sched_param = type { i32 }
diff --git a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
index 9b2aba94ec8d..5c883b3930dc 100644
--- a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
+++ b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep r0 | count 1
+; RUN: llc < %s -march=thumb | grep r0 | count 1
 
 define i32 @a(i32 %x, i32 %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
new file mode 100644
index 000000000000..471a82f271e0
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv6-elf | not grep "subs sp"
+; PR4567
+
+define arm_apcscc i8* @__gets_chk(i8* %s, i32 %slen) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret i8* undef
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = alloca i8, i32 undef, align 4		; <i8*> [#uses=0]
+	br label %bb4
+
+bb3:		; preds = %bb1
+	%1 = malloc i8, i32 undef		; <i8*> [#uses=0]
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb2
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	%2 = call arm_apcscc  i8* @gets(i8* %s) nounwind		; <i8*> [#uses=1]
+	ret i8* %2
+
+bb6:		; preds = %bb4
+	unreachable
+}
+
+declare arm_apcscc i8* @gets(i8*) nounwind
diff --git a/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
new file mode 100644
index 000000000000..6e035d0f70e6
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10
+
+@Time.2535 = external global i64		; <i64*> [#uses=2]
+
+define arm_apcscc i64 @millisecs() nounwind {
+entry:
+	%0 = load i64* @Time.2535, align 4		; <i64> [#uses=2]
+	%1 = add i64 %0, 1		; <i64> [#uses=1]
+	store i64 %1, i64* @Time.2535, align 4
+	ret i64 %0
+}
diff --git a/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll b/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
new file mode 100644
index 000000000000..f195348e1403
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct.LinkList = type { i32, %struct.LinkList* }
+	%struct.List = type { i32, i32* }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+	%ll = alloca %struct.LinkList*, align 4		; <%struct.LinkList**> [#uses=1]
+	%0 = call arm_apcscc  i32 @ReadList(%struct.LinkList** %ll, %struct.List** null) nounwind		; <i32> [#uses=1]
+	switch i32 %0, label %bb5 [
+		i32 7, label %bb4
+		i32 42, label %bb3
+	]
+
+bb3:		; preds = %entry
+	ret i32 1
+
+bb4:		; preds = %entry
+	ret i32 0
+
+bb5:		; preds = %entry
+	ret i32 1
+}
+
+declare arm_apcscc i32 @ReadList(%struct.LinkList** nocapture, %struct.List** nocapture) nounwind
diff --git a/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
new file mode 100644
index 000000000000..ef4b5ce67c69
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
@@ -0,0 +1,737 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.BF_KEY = type { [18 x i32], [1024 x i32] }
+
+define arm_apcscc void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
+entry:
+	%0 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
+	%1 = load i32* %data, align 4             ; <i32> [#uses=2]
+	%2 = load i32* undef, align 4             ; <i32> [#uses=2]
+	br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+	%3 = load i32* %0, align 4                ; <i32> [#uses=1]
+	%4 = xor i32 %3, %1                       ; <i32> [#uses=4]
+	%5 = load i32* null, align 4              ; <i32> [#uses=1]
+	%6 = lshr i32 %4, 24                      ; <i32> [#uses=1]
+	%7 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4                ; <i32> [#uses=1]
+	%9 = lshr i32 %4, 16                      ; <i32> [#uses=1]
+	%10 = or i32 %9, 256                      ; <i32> [#uses=1]
+	%11 = and i32 %10, 511                    ; <i32> [#uses=1]
+	%12 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
+	%13 = load i32* %12, align 4              ; <i32> [#uses=1]
+	%14 = add i32 %13, %8                     ; <i32> [#uses=1]
+	%15 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
+	%16 = load i32* %15, align 4              ; <i32> [#uses=1]
+	%17 = xor i32 %14, %16                    ; <i32> [#uses=1]
+	%18 = or i32 %4, 768                      ; <i32> [#uses=1]
+	%19 = and i32 %18, 1023                   ; <i32> [#uses=1]
+	%20 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
+	%21 = load i32* %20, align 4              ; <i32> [#uses=1]
+	%22 = add i32 %17, %21                    ; <i32> [#uses=1]
+	%23 = xor i32 %5, %2                      ; <i32> [#uses=1]
+	%24 = xor i32 %23, %22                    ; <i32> [#uses=5]
+	%25 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
+	%26 = load i32* %25, align 4              ; <i32> [#uses=1]
+	%27 = lshr i32 %24, 24                    ; <i32> [#uses=1]
+	%28 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
+	%29 = load i32* %28, align 4              ; <i32> [#uses=1]
+	%30 = lshr i32 %24, 16                    ; <i32> [#uses=1]
+	%31 = or i32 %30, 256                     ; <i32> [#uses=1]
+	%32 = and i32 %31, 511                    ; <i32> [#uses=1]
+	%33 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4              ; <i32> [#uses=1]
+	%35 = add i32 %34, %29                    ; <i32> [#uses=1]
+	%36 = lshr i32 %24, 8                     ; <i32> [#uses=1]
+	%37 = or i32 %36, 512                     ; <i32> [#uses=1]
+	%38 = and i32 %37, 767                    ; <i32> [#uses=1]
+	%39 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4              ; <i32> [#uses=1]
+	%41 = xor i32 %35, %40                    ; <i32> [#uses=1]
+	%42 = or i32 %24, 768                     ; <i32> [#uses=1]
+	%43 = and i32 %42, 1023                   ; <i32> [#uses=1]
+	%44 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
+	%45 = load i32* %44, align 4              ; <i32> [#uses=1]
+	%46 = add i32 %41, %45                    ; <i32> [#uses=1]
+	%47 = xor i32 %26, %4                     ; <i32> [#uses=1]
+	%48 = xor i32 %47, %46                    ; <i32> [#uses=5]
+	%49 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%50 = load i32* %49, align 4              ; <i32> [#uses=1]
+	%51 = lshr i32 %48, 24                    ; <i32> [#uses=1]
+	%52 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4              ; <i32> [#uses=1]
+	%54 = lshr i32 %48, 16                    ; <i32> [#uses=1]
+	%55 = or i32 %54, 256                     ; <i32> [#uses=1]
+	%56 = and i32 %55, 511                    ; <i32> [#uses=1]
+	%57 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
+	%58 = load i32* %57, align 4              ; <i32> [#uses=1]
+	%59 = add i32 %58, %53                    ; <i32> [#uses=1]
+	%60 = lshr i32 %48, 8                     ; <i32> [#uses=1]
+	%61 = or i32 %60, 512                     ; <i32> [#uses=1]
+	%62 = and i32 %61, 767                    ; <i32> [#uses=1]
+	%63 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4              ; <i32> [#uses=1]
+	%65 = xor i32 %59, %64                    ; <i32> [#uses=1]
+	%66 = or i32 %48, 768                     ; <i32> [#uses=1]
+	%67 = and i32 %66, 1023                   ; <i32> [#uses=1]
+	%68 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
+	%69 = load i32* %68, align 4              ; <i32> [#uses=1]
+	%70 = add i32 %65, %69                    ; <i32> [#uses=1]
+	%71 = xor i32 %50, %24                    ; <i32> [#uses=1]
+	%72 = xor i32 %71, %70                    ; <i32> [#uses=5]
+	%73 = load i32* null, align 4             ; <i32> [#uses=1]
+	%74 = lshr i32 %72, 24                    ; <i32> [#uses=1]
+	%75 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4              ; <i32> [#uses=1]
+	%77 = lshr i32 %72, 16                    ; <i32> [#uses=1]
+	%78 = or i32 %77, 256                     ; <i32> [#uses=1]
+	%79 = and i32 %78, 511                    ; <i32> [#uses=1]
+	%80 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4              ; <i32> [#uses=1]
+	%82 = add i32 %81, %76                    ; <i32> [#uses=1]
+	%83 = lshr i32 %72, 8                     ; <i32> [#uses=1]
+	%84 = or i32 %83, 512                     ; <i32> [#uses=1]
+	%85 = and i32 %84, 767                    ; <i32> [#uses=1]
+	%86 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4              ; <i32> [#uses=1]
+	%88 = xor i32 %82, %87                    ; <i32> [#uses=1]
+	%89 = or i32 %72, 768                     ; <i32> [#uses=1]
+	%90 = and i32 %89, 1023                   ; <i32> [#uses=1]
+	%91 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
+	%92 = load i32* %91, align 4              ; <i32> [#uses=1]
+	%93 = add i32 %88, %92                    ; <i32> [#uses=1]
+	%94 = xor i32 %73, %48                    ; <i32> [#uses=1]
+	%95 = xor i32 %94, %93                    ; <i32> [#uses=5]
+	%96 = load i32* undef, align 4            ; <i32> [#uses=1]
+	%97 = lshr i32 %95, 24                    ; <i32> [#uses=1]
+	%98 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4              ; <i32> [#uses=1]
+	%100 = lshr i32 %95, 16                   ; <i32> [#uses=1]
+	%101 = or i32 %100, 256                   ; <i32> [#uses=1]
+	%102 = and i32 %101, 511                  ; <i32> [#uses=1]
+	%103 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
+	%104 = load i32* %103, align 4            ; <i32> [#uses=1]
+	%105 = add i32 %104, %99                  ; <i32> [#uses=1]
+	%106 = lshr i32 %95, 8                    ; <i32> [#uses=1]
+	%107 = or i32 %106, 512                   ; <i32> [#uses=1]
+	%108 = and i32 %107, 767                  ; <i32> [#uses=1]
+	%109 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
+	%110 = load i32* %109, align 4            ; <i32> [#uses=1]
+	%111 = xor i32 %105, %110                 ; <i32> [#uses=1]
+	%112 = or i32 %95, 768                    ; <i32> [#uses=1]
+	%113 = and i32 %112, 1023                 ; <i32> [#uses=1]
+	%114 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
+	%115 = load i32* %114, align 4            ; <i32> [#uses=1]
+	%116 = add i32 %111, %115                 ; <i32> [#uses=1]
+	%117 = xor i32 %96, %72                   ; <i32> [#uses=1]
+	%118 = xor i32 %117, %116                 ; <i32> [#uses=5]
+	%119 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4            ; <i32> [#uses=1]
+	%121 = lshr i32 %118, 24                  ; <i32> [#uses=1]
+	%122 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
+	%123 = load i32* %122, align 4            ; <i32> [#uses=1]
+	%124 = lshr i32 %118, 16                  ; <i32> [#uses=1]
+	%125 = or i32 %124, 256                   ; <i32> [#uses=1]
+	%126 = and i32 %125, 511                  ; <i32> [#uses=1]
+	%127 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4            ; <i32> [#uses=1]
+	%129 = add i32 %128, %123                 ; <i32> [#uses=1]
+	%130 = lshr i32 %118, 8                   ; <i32> [#uses=1]
+	%131 = or i32 %130, 512                   ; <i32> [#uses=1]
+	%132 = and i32 %131, 767                  ; <i32> [#uses=1]
+	%133 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
+	%134 = load i32* %133, align 4            ; <i32> [#uses=1]
+	%135 = xor i32 %129, %134                 ; <i32> [#uses=1]
+	%136 = or i32 %118, 768                   ; <i32> [#uses=1]
+	%137 = and i32 %136, 1023                 ; <i32> [#uses=1]
+	%138 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
+	%139 = load i32* %138, align 4            ; <i32> [#uses=1]
+	%140 = add i32 %135, %139                 ; <i32> [#uses=1]
+	%141 = xor i32 %120, %95                  ; <i32> [#uses=1]
+	%142 = xor i32 %141, %140                 ; <i32> [#uses=5]
+	%143 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
+	%144 = load i32* %143, align 4            ; <i32> [#uses=1]
+	%145 = lshr i32 %142, 24                  ; <i32> [#uses=1]
+	%146 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
+	%147 = load i32* %146, align 4            ; <i32> [#uses=1]
+	%148 = lshr i32 %142, 16                  ; <i32> [#uses=1]
+	%149 = or i32 %148, 256                   ; <i32> [#uses=1]
+	%150 = and i32 %149, 511                  ; <i32> [#uses=1]
+	%151 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
+	%152 = load i32* %151, align 4            ; <i32> [#uses=1]
+	%153 = add i32 %152, %147                 ; <i32> [#uses=1]
+	%154 = lshr i32 %142, 8                   ; <i32> [#uses=1]
+	%155 = or i32 %154, 512                   ; <i32> [#uses=1]
+	%156 = and i32 %155, 767                  ; <i32> [#uses=1]
+	%157 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
+	%158 = load i32* %157, align 4            ; <i32> [#uses=1]
+	%159 = xor i32 %153, %158                 ; <i32> [#uses=1]
+	%160 = or i32 %142, 768                   ; <i32> [#uses=1]
+	%161 = and i32 %160, 1023                 ; <i32> [#uses=1]
+	%162 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
+	%163 = load i32* %162, align 4            ; <i32> [#uses=1]
+	%164 = add i32 %159, %163                 ; <i32> [#uses=1]
+	%165 = xor i32 %144, %118                 ; <i32> [#uses=1]
+	%166 = xor i32 %165, %164                 ; <i32> [#uses=5]
+	%167 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%168 = lshr i32 %166, 24                  ; <i32> [#uses=1]
+	%169 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
+	%170 = load i32* %169, align 4            ; <i32> [#uses=1]
+	%171 = lshr i32 %166, 16                  ; <i32> [#uses=1]
+	%172 = or i32 %171, 256                   ; <i32> [#uses=1]
+	%173 = and i32 %172, 511                  ; <i32> [#uses=1]
+	%174 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
+	%175 = load i32* %174, align 4            ; <i32> [#uses=1]
+	%176 = add i32 %175, %170                 ; <i32> [#uses=1]
+	%177 = lshr i32 %166, 8                   ; <i32> [#uses=1]
+	%178 = or i32 %177, 512                   ; <i32> [#uses=1]
+	%179 = and i32 %178, 767                  ; <i32> [#uses=1]
+	%180 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
+	%181 = load i32* %180, align 4            ; <i32> [#uses=1]
+	%182 = xor i32 %176, %181                 ; <i32> [#uses=1]
+	%183 = or i32 %166, 768                   ; <i32> [#uses=1]
+	%184 = and i32 %183, 1023                 ; <i32> [#uses=1]
+	%185 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
+	%186 = load i32* %185, align 4            ; <i32> [#uses=1]
+	%187 = add i32 %182, %186                 ; <i32> [#uses=1]
+	%188 = xor i32 %167, %142                 ; <i32> [#uses=1]
+	%189 = xor i32 %188, %187                 ; <i32> [#uses=5]
+	%190 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4            ; <i32> [#uses=1]
+	%192 = lshr i32 %189, 24                  ; <i32> [#uses=1]
+	%193 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
+	%194 = load i32* %193, align 4            ; <i32> [#uses=1]
+	%195 = lshr i32 %189, 16                  ; <i32> [#uses=1]
+	%196 = or i32 %195, 256                   ; <i32> [#uses=1]
+	%197 = and i32 %196, 511                  ; <i32> [#uses=1]
+	%198 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
+	%199 = load i32* %198, align 4            ; <i32> [#uses=1]
+	%200 = add i32 %199, %194                 ; <i32> [#uses=1]
+	%201 = lshr i32 %189, 8                   ; <i32> [#uses=1]
+	%202 = or i32 %201, 512                   ; <i32> [#uses=1]
+	%203 = and i32 %202, 767                  ; <i32> [#uses=1]
+	%204 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4            ; <i32> [#uses=1]
+	%206 = xor i32 %200, %205                 ; <i32> [#uses=1]
+	%207 = or i32 %189, 768                   ; <i32> [#uses=1]
+	%208 = and i32 %207, 1023                 ; <i32> [#uses=1]
+	%209 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
+	%210 = load i32* %209, align 4            ; <i32> [#uses=1]
+	%211 = add i32 %206, %210                 ; <i32> [#uses=1]
+	%212 = xor i32 %191, %166                 ; <i32> [#uses=1]
+	%213 = xor i32 %212, %211                 ; <i32> [#uses=5]
+	%214 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%215 = load i32* %214, align 4            ; <i32> [#uses=1]
+	%216 = lshr i32 %213, 24                  ; <i32> [#uses=1]
+	%217 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
+	%218 = load i32* %217, align 4            ; <i32> [#uses=1]
+	%219 = lshr i32 %213, 16                  ; <i32> [#uses=1]
+	%220 = or i32 %219, 256                   ; <i32> [#uses=1]
+	%221 = and i32 %220, 511                  ; <i32> [#uses=1]
+	%222 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4            ; <i32> [#uses=1]
+	%224 = add i32 %223, %218                 ; <i32> [#uses=1]
+	%225 = lshr i32 %213, 8                   ; <i32> [#uses=1]
+	%226 = or i32 %225, 512                   ; <i32> [#uses=1]
+	%227 = and i32 %226, 767                  ; <i32> [#uses=1]
+	%228 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
+	%229 = load i32* %228, align 4            ; <i32> [#uses=1]
+	%230 = xor i32 %224, %229                 ; <i32> [#uses=1]
+	%231 = or i32 %213, 768                   ; <i32> [#uses=1]
+	%232 = and i32 %231, 1023                 ; <i32> [#uses=1]
+	%233 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
+	%234 = load i32* %233, align 4            ; <i32> [#uses=1]
+	%235 = add i32 %230, %234                 ; <i32> [#uses=1]
+	%236 = xor i32 %215, %189                 ; <i32> [#uses=1]
+	%237 = xor i32 %236, %235                 ; <i32> [#uses=5]
+	%238 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
+	%239 = load i32* %238, align 4            ; <i32> [#uses=1]
+	%240 = lshr i32 %237, 24                  ; <i32> [#uses=1]
+	%241 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
+	%242 = load i32* %241, align 4            ; <i32> [#uses=1]
+	%243 = lshr i32 %237, 16                  ; <i32> [#uses=1]
+	%244 = or i32 %243, 256                   ; <i32> [#uses=1]
+	%245 = and i32 %244, 511                  ; <i32> [#uses=1]
+	%246 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
+	%247 = load i32* %246, align 4            ; <i32> [#uses=1]
+	%248 = add i32 %247, %242                 ; <i32> [#uses=1]
+	%249 = lshr i32 %237, 8                   ; <i32> [#uses=1]
+	%250 = or i32 %249, 512                   ; <i32> [#uses=1]
+	%251 = and i32 %250, 767                  ; <i32> [#uses=1]
+	%252 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4            ; <i32> [#uses=1]
+	%254 = xor i32 %248, %253                 ; <i32> [#uses=1]
+	%255 = or i32 %237, 768                   ; <i32> [#uses=1]
+	%256 = and i32 %255, 1023                 ; <i32> [#uses=1]
+	%257 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
+	%258 = load i32* %257, align 4            ; <i32> [#uses=1]
+	%259 = add i32 %254, %258                 ; <i32> [#uses=1]
+	%260 = xor i32 %239, %213                 ; <i32> [#uses=1]
+	%261 = xor i32 %260, %259                 ; <i32> [#uses=5]
+	%262 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%263 = lshr i32 %261, 24                  ; <i32> [#uses=1]
+	%264 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
+	%265 = load i32* %264, align 4            ; <i32> [#uses=1]
+	%266 = lshr i32 %261, 16                  ; <i32> [#uses=1]
+	%267 = or i32 %266, 256                   ; <i32> [#uses=1]
+	%268 = and i32 %267, 511                  ; <i32> [#uses=1]
+	%269 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
+	%270 = load i32* %269, align 4            ; <i32> [#uses=1]
+	%271 = add i32 %270, %265                 ; <i32> [#uses=1]
+	%272 = lshr i32 %261, 8                   ; <i32> [#uses=1]
+	%273 = or i32 %272, 512                   ; <i32> [#uses=1]
+	%274 = and i32 %273, 767                  ; <i32> [#uses=1]
+	%275 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
+	%276 = load i32* %275, align 4            ; <i32> [#uses=1]
+	%277 = xor i32 %271, %276                 ; <i32> [#uses=1]
+	%278 = or i32 %261, 768                   ; <i32> [#uses=1]
+	%279 = and i32 %278, 1023                 ; <i32> [#uses=1]
+	%280 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
+	%281 = load i32* %280, align 4            ; <i32> [#uses=1]
+	%282 = add i32 %277, %281                 ; <i32> [#uses=1]
+	%283 = xor i32 %262, %237                 ; <i32> [#uses=1]
+	%284 = xor i32 %283, %282                 ; <i32> [#uses=4]
+	%285 = load i32* null, align 4            ; <i32> [#uses=1]
+	%286 = lshr i32 %284, 24                  ; <i32> [#uses=1]
+	%287 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
+	%288 = load i32* %287, align 4            ; <i32> [#uses=1]
+	%289 = lshr i32 %284, 16                  ; <i32> [#uses=1]
+	%290 = or i32 %289, 256                   ; <i32> [#uses=1]
+	%291 = and i32 %290, 511                  ; <i32> [#uses=1]
+	%292 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
+	%293 = load i32* %292, align 4            ; <i32> [#uses=1]
+	%294 = add i32 %293, %288                 ; <i32> [#uses=1]
+	%295 = lshr i32 %284, 8                   ; <i32> [#uses=1]
+	%296 = or i32 %295, 512                   ; <i32> [#uses=1]
+	%297 = and i32 %296, 767                  ; <i32> [#uses=1]
+	%298 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
+	%299 = load i32* %298, align 4            ; <i32> [#uses=1]
+	%300 = xor i32 %294, %299                 ; <i32> [#uses=1]
+	%301 = or i32 %284, 768                   ; <i32> [#uses=1]
+	%302 = and i32 %301, 1023                 ; <i32> [#uses=1]
+	%303 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
+	%304 = load i32* %303, align 4            ; <i32> [#uses=1]
+	%305 = add i32 %300, %304                 ; <i32> [#uses=1]
+	%306 = xor i32 %285, %261                 ; <i32> [#uses=1]
+	%307 = xor i32 %306, %305                 ; <i32> [#uses=1]
+	%308 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%309 = load i32* %308, align 4            ; <i32> [#uses=1]
+	%310 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
+	%311 = load i32* %310, align 4            ; <i32> [#uses=1]
+	%312 = or i32 0, 256                      ; <i32> [#uses=1]
+	%313 = and i32 %312, 511                  ; <i32> [#uses=1]
+	%314 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
+	%315 = load i32* %314, align 4            ; <i32> [#uses=1]
+	%316 = add i32 %315, %311                 ; <i32> [#uses=1]
+	%317 = or i32 0, 512                      ; <i32> [#uses=1]
+	%318 = and i32 %317, 767                  ; <i32> [#uses=1]
+	%319 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
+	%320 = load i32* %319, align 4            ; <i32> [#uses=1]
+	%321 = xor i32 %316, %320                 ; <i32> [#uses=1]
+	%322 = or i32 0, 768                      ; <i32> [#uses=1]
+	%323 = and i32 %322, 1023                 ; <i32> [#uses=1]
+	%324 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
+	%325 = load i32* %324, align 4            ; <i32> [#uses=1]
+	%326 = add i32 %321, %325                 ; <i32> [#uses=1]
+	%327 = xor i32 %309, %307                 ; <i32> [#uses=1]
+	%328 = xor i32 %327, %326                 ; <i32> [#uses=5]
+	%329 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
+	br label %bb2
+
+bb1:                                              ; preds = %entry
+	%330 = load i32* null, align 4            ; <i32> [#uses=1]
+	%331 = xor i32 %330, %1                   ; <i32> [#uses=4]
+	%332 = load i32* null, align 4            ; <i32> [#uses=1]
+	%333 = lshr i32 %331, 24                  ; <i32> [#uses=1]
+	%334 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
+	%335 = load i32* %334, align 4            ; <i32> [#uses=1]
+	%336 = load i32* null, align 4            ; <i32> [#uses=1]
+	%337 = add i32 %336, %335                 ; <i32> [#uses=1]
+	%338 = lshr i32 %331, 8                   ; <i32> [#uses=1]
+	%339 = or i32 %338, 512                   ; <i32> [#uses=1]
+	%340 = and i32 %339, 767                  ; <i32> [#uses=1]
+	%341 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
+	%342 = load i32* %341, align 4            ; <i32> [#uses=1]
+	%343 = xor i32 %337, %342                 ; <i32> [#uses=1]
+	%344 = or i32 %331, 768                   ; <i32> [#uses=1]
+	%345 = and i32 %344, 1023                 ; <i32> [#uses=1]
+	%346 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
+	%347 = load i32* %346, align 4            ; <i32> [#uses=1]
+	%348 = add i32 %343, %347                 ; <i32> [#uses=1]
+	%349 = xor i32 %332, %2                   ; <i32> [#uses=1]
+	%350 = xor i32 %349, %348                 ; <i32> [#uses=5]
+	%351 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%352 = load i32* %351, align 4            ; <i32> [#uses=1]
+	%353 = lshr i32 %350, 24                  ; <i32> [#uses=1]
+	%354 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
+	%355 = load i32* %354, align 4            ; <i32> [#uses=1]
+	%356 = lshr i32 %350, 16                  ; <i32> [#uses=1]
+	%357 = or i32 %356, 256                   ; <i32> [#uses=1]
+	%358 = and i32 %357, 511                  ; <i32> [#uses=1]
+	%359 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
+	%360 = load i32* %359, align 4            ; <i32> [#uses=1]
+	%361 = add i32 %360, %355                 ; <i32> [#uses=1]
+	%362 = lshr i32 %350, 8                   ; <i32> [#uses=1]
+	%363 = or i32 %362, 512                   ; <i32> [#uses=1]
+	%364 = and i32 %363, 767                  ; <i32> [#uses=1]
+	%365 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
+	%366 = load i32* %365, align 4            ; <i32> [#uses=1]
+	%367 = xor i32 %361, %366                 ; <i32> [#uses=1]
+	%368 = or i32 %350, 768                   ; <i32> [#uses=1]
+	%369 = and i32 %368, 1023                 ; <i32> [#uses=1]
+	%370 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
+	%371 = load i32* %370, align 4            ; <i32> [#uses=1]
+	%372 = add i32 %367, %371                 ; <i32> [#uses=1]
+	%373 = xor i32 %352, %331                 ; <i32> [#uses=1]
+	%374 = xor i32 %373, %372                 ; <i32> [#uses=5]
+	%375 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
+	%376 = load i32* %375, align 4            ; <i32> [#uses=1]
+	%377 = lshr i32 %374, 24                  ; <i32> [#uses=1]
+	%378 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
+	%379 = load i32* %378, align 4            ; <i32> [#uses=1]
+	%380 = lshr i32 %374, 16                  ; <i32> [#uses=1]
+	%381 = or i32 %380, 256                   ; <i32> [#uses=1]
+	%382 = and i32 %381, 511                  ; <i32> [#uses=1]
+	%383 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
+	%384 = load i32* %383, align 4            ; <i32> [#uses=1]
+	%385 = add i32 %384, %379                 ; <i32> [#uses=1]
+	%386 = lshr i32 %374, 8                   ; <i32> [#uses=1]
+	%387 = or i32 %386, 512                   ; <i32> [#uses=1]
+	%388 = and i32 %387, 767                  ; <i32> [#uses=1]
+	%389 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
+	%390 = load i32* %389, align 4            ; <i32> [#uses=1]
+	%391 = xor i32 %385, %390                 ; <i32> [#uses=1]
+	%392 = or i32 %374, 768                   ; <i32> [#uses=1]
+	%393 = and i32 %392, 1023                 ; <i32> [#uses=1]
+	%394 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
+	%395 = load i32* %394, align 4            ; <i32> [#uses=1]
+	%396 = add i32 %391, %395                 ; <i32> [#uses=1]
+	%397 = xor i32 %376, %350                 ; <i32> [#uses=1]
+	%398 = xor i32 %397, %396                 ; <i32> [#uses=5]
+	%399 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
+	%400 = load i32* %399, align 4            ; <i32> [#uses=1]
+	%401 = lshr i32 %398, 24                  ; <i32> [#uses=1]
+	%402 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
+	%403 = load i32* %402, align 4            ; <i32> [#uses=1]
+	%404 = lshr i32 %398, 16                  ; <i32> [#uses=1]
+	%405 = or i32 %404, 256                   ; <i32> [#uses=1]
+	%406 = and i32 %405, 511                  ; <i32> [#uses=1]
+	%407 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
+	%408 = load i32* %407, align 4            ; <i32> [#uses=1]
+	%409 = add i32 %408, %403                 ; <i32> [#uses=1]
+	%410 = lshr i32 %398, 8                   ; <i32> [#uses=1]
+	%411 = or i32 %410, 512                   ; <i32> [#uses=1]
+	%412 = and i32 %411, 767                  ; <i32> [#uses=1]
+	%413 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
+	%414 = load i32* %413, align 4            ; <i32> [#uses=1]
+	%415 = xor i32 %409, %414                 ; <i32> [#uses=1]
+	%416 = or i32 %398, 768                   ; <i32> [#uses=1]
+	%417 = and i32 %416, 1023                 ; <i32> [#uses=1]
+	%418 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
+	%419 = load i32* %418, align 4            ; <i32> [#uses=1]
+	%420 = add i32 %415, %419                 ; <i32> [#uses=1]
+	%421 = xor i32 %400, %374                 ; <i32> [#uses=1]
+	%422 = xor i32 %421, %420                 ; <i32> [#uses=5]
+	%423 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
+	%424 = load i32* %423, align 4            ; <i32> [#uses=1]
+	%425 = lshr i32 %422, 24                  ; <i32> [#uses=1]
+	%426 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
+	%427 = load i32* %426, align 4            ; <i32> [#uses=1]
+	%428 = lshr i32 %422, 16                  ; <i32> [#uses=1]
+	%429 = or i32 %428, 256                   ; <i32> [#uses=1]
+	%430 = and i32 %429, 511                  ; <i32> [#uses=1]
+	%431 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
+	%432 = load i32* %431, align 4            ; <i32> [#uses=1]
+	%433 = add i32 %432, %427                 ; <i32> [#uses=1]
+	%434 = lshr i32 %422, 8                   ; <i32> [#uses=1]
+	%435 = or i32 %434, 512                   ; <i32> [#uses=1]
+	%436 = and i32 %435, 767                  ; <i32> [#uses=1]
+	%437 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
+	%438 = load i32* %437, align 4            ; <i32> [#uses=1]
+	%439 = xor i32 %433, %438                 ; <i32> [#uses=1]
+	%440 = or i32 %422, 768                   ; <i32> [#uses=1]
+	%441 = and i32 %440, 1023                 ; <i32> [#uses=1]
+	%442 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
+	%443 = load i32* %442, align 4            ; <i32> [#uses=1]
+	%444 = add i32 %439, %443                 ; <i32> [#uses=1]
+	%445 = xor i32 %424, %398                 ; <i32> [#uses=1]
+	%446 = xor i32 %445, %444                 ; <i32> [#uses=5]
+	%447 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%448 = lshr i32 %446, 24                  ; <i32> [#uses=1]
+	%449 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
+	%450 = load i32* %449, align 4            ; <i32> [#uses=1]
+	%451 = lshr i32 %446, 16                  ; <i32> [#uses=1]
+	%452 = or i32 %451, 256                   ; <i32> [#uses=1]
+	%453 = and i32 %452, 511                  ; <i32> [#uses=1]
+	%454 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
+	%455 = load i32* %454, align 4            ; <i32> [#uses=1]
+	%456 = add i32 %455, %450                 ; <i32> [#uses=1]
+	%457 = lshr i32 %446, 8                   ; <i32> [#uses=1]
+	%458 = or i32 %457, 512                   ; <i32> [#uses=1]
+	%459 = and i32 %458, 767                  ; <i32> [#uses=1]
+	%460 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
+	%461 = load i32* %460, align 4            ; <i32> [#uses=1]
+	%462 = xor i32 %456, %461                 ; <i32> [#uses=1]
+	%463 = or i32 %446, 768                   ; <i32> [#uses=1]
+	%464 = and i32 %463, 1023                 ; <i32> [#uses=1]
+	%465 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
+	%466 = load i32* %465, align 4            ; <i32> [#uses=1]
+	%467 = add i32 %462, %466                 ; <i32> [#uses=1]
+	%468 = xor i32 %447, %422                 ; <i32> [#uses=1]
+	%469 = xor i32 %468, %467                 ; <i32> [#uses=5]
+	%470 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%471 = load i32* %470, align 4            ; <i32> [#uses=1]
+	%472 = lshr i32 %469, 24                  ; <i32> [#uses=1]
+	%473 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
+	%474 = load i32* %473, align 4            ; <i32> [#uses=1]
+	%475 = lshr i32 %469, 16                  ; <i32> [#uses=1]
+	%476 = or i32 %475, 256                   ; <i32> [#uses=1]
+	%477 = and i32 %476, 511                  ; <i32> [#uses=1]
+	%478 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
+	%479 = load i32* %478, align 4            ; <i32> [#uses=1]
+	%480 = add i32 %479, %474                 ; <i32> [#uses=1]
+	%481 = lshr i32 %469, 8                   ; <i32> [#uses=1]
+	%482 = or i32 %481, 512                   ; <i32> [#uses=1]
+	%483 = and i32 %482, 767                  ; <i32> [#uses=1]
+	%484 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
+	%485 = load i32* %484, align 4            ; <i32> [#uses=1]
+	%486 = xor i32 %480, %485                 ; <i32> [#uses=1]
+	%487 = or i32 %469, 768                   ; <i32> [#uses=1]
+	%488 = and i32 %487, 1023                 ; <i32> [#uses=1]
+	%489 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
+	%490 = load i32* %489, align 4            ; <i32> [#uses=1]
+	%491 = add i32 %486, %490                 ; <i32> [#uses=1]
+	%492 = xor i32 %471, %446                 ; <i32> [#uses=1]
+	%493 = xor i32 %492, %491                 ; <i32> [#uses=5]
+	%494 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%495 = load i32* %494, align 4            ; <i32> [#uses=1]
+	%496 = lshr i32 %493, 24                  ; <i32> [#uses=1]
+	%497 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
+	%498 = load i32* %497, align 4            ; <i32> [#uses=1]
+	%499 = lshr i32 %493, 16                  ; <i32> [#uses=1]
+	%500 = or i32 %499, 256                   ; <i32> [#uses=1]
+	%501 = and i32 %500, 511                  ; <i32> [#uses=1]
+	%502 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
+	%503 = load i32* %502, align 4            ; <i32> [#uses=1]
+	%504 = add i32 %503, %498                 ; <i32> [#uses=1]
+	%505 = lshr i32 %493, 8                   ; <i32> [#uses=1]
+	%506 = or i32 %505, 512                   ; <i32> [#uses=1]
+	%507 = and i32 %506, 767                  ; <i32> [#uses=1]
+	%508 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
+	%509 = load i32* %508, align 4            ; <i32> [#uses=1]
+	%510 = xor i32 %504, %509                 ; <i32> [#uses=1]
+	%511 = or i32 %493, 768                   ; <i32> [#uses=1]
+	%512 = and i32 %511, 1023                 ; <i32> [#uses=1]
+	%513 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
+	%514 = load i32* %513, align 4            ; <i32> [#uses=1]
+	%515 = add i32 %510, %514                 ; <i32> [#uses=1]
+	%516 = xor i32 %495, %469                 ; <i32> [#uses=1]
+	%517 = xor i32 %516, %515                 ; <i32> [#uses=5]
+	%518 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
+	%519 = load i32* %518, align 4            ; <i32> [#uses=1]
+	%520 = lshr i32 %517, 24                  ; <i32> [#uses=1]
+	%521 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
+	%522 = load i32* %521, align 4            ; <i32> [#uses=1]
+	%523 = lshr i32 %517, 16                  ; <i32> [#uses=1]
+	%524 = or i32 %523, 256                   ; <i32> [#uses=1]
+	%525 = and i32 %524, 511                  ; <i32> [#uses=1]
+	%526 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
+	%527 = load i32* %526, align 4            ; <i32> [#uses=1]
+	%528 = add i32 %527, %522                 ; <i32> [#uses=1]
+	%529 = lshr i32 %517, 8                   ; <i32> [#uses=1]
+	%530 = or i32 %529, 512                   ; <i32> [#uses=1]
+	%531 = and i32 %530, 767                  ; <i32> [#uses=1]
+	%532 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
+	%533 = load i32* %532, align 4            ; <i32> [#uses=1]
+	%534 = xor i32 %528, %533                 ; <i32> [#uses=1]
+	%535 = or i32 %517, 768                   ; <i32> [#uses=1]
+	%536 = and i32 %535, 1023                 ; <i32> [#uses=1]
+	%537 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
+	%538 = load i32* %537, align 4            ; <i32> [#uses=1]
+	%539 = add i32 %534, %538                 ; <i32> [#uses=1]
+	%540 = xor i32 %519, %493                 ; <i32> [#uses=1]
+	%541 = xor i32 %540, %539                 ; <i32> [#uses=5]
+	%542 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%543 = lshr i32 %541, 24                  ; <i32> [#uses=1]
+	%544 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
+	%545 = load i32* %544, align 4            ; <i32> [#uses=1]
+	%546 = lshr i32 %541, 16                  ; <i32> [#uses=1]
+	%547 = or i32 %546, 256                   ; <i32> [#uses=1]
+	%548 = and i32 %547, 511                  ; <i32> [#uses=1]
+	%549 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
+	%550 = load i32* %549, align 4            ; <i32> [#uses=1]
+	%551 = add i32 %550, %545                 ; <i32> [#uses=1]
+	%552 = lshr i32 %541, 8                   ; <i32> [#uses=1]
+	%553 = or i32 %552, 512                   ; <i32> [#uses=1]
+	%554 = and i32 %553, 767                  ; <i32> [#uses=1]
+	%555 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
+	%556 = load i32* %555, align 4            ; <i32> [#uses=1]
+	%557 = xor i32 %551, %556                 ; <i32> [#uses=1]
+	%558 = or i32 %541, 768                   ; <i32> [#uses=1]
+	%559 = and i32 %558, 1023                 ; <i32> [#uses=1]
+	%560 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
+	%561 = load i32* %560, align 4            ; <i32> [#uses=1]
+	%562 = add i32 %557, %561                 ; <i32> [#uses=1]
+	%563 = xor i32 %542, %517                 ; <i32> [#uses=1]
+	%564 = xor i32 %563, %562                 ; <i32> [#uses=5]
+	%565 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%566 = load i32* %565, align 4            ; <i32> [#uses=1]
+	%567 = lshr i32 %564, 24                  ; <i32> [#uses=1]
+	%568 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
+	%569 = load i32* %568, align 4            ; <i32> [#uses=1]
+	%570 = lshr i32 %564, 16                  ; <i32> [#uses=1]
+	%571 = or i32 %570, 256                   ; <i32> [#uses=1]
+	%572 = and i32 %571, 511                  ; <i32> [#uses=1]
+	%573 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
+	%574 = load i32* %573, align 4            ; <i32> [#uses=1]
+	%575 = add i32 %574, %569                 ; <i32> [#uses=1]
+	%576 = lshr i32 %564, 8                   ; <i32> [#uses=1]
+	%577 = or i32 %576, 512                   ; <i32> [#uses=1]
+	%578 = and i32 %577, 767                  ; <i32> [#uses=1]
+	%579 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
+	%580 = load i32* %579, align 4            ; <i32> [#uses=1]
+	%581 = xor i32 %575, %580                 ; <i32> [#uses=1]
+	%582 = or i32 %564, 768                   ; <i32> [#uses=1]
+	%583 = and i32 %582, 1023                 ; <i32> [#uses=1]
+	%584 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
+	%585 = load i32* %584, align 4            ; <i32> [#uses=1]
+	%586 = add i32 %581, %585                 ; <i32> [#uses=1]
+	%587 = xor i32 %566, %541                 ; <i32> [#uses=1]
+	%588 = xor i32 %587, %586                 ; <i32> [#uses=5]
+	%589 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
+	%590 = load i32* %589, align 4            ; <i32> [#uses=1]
+	%591 = lshr i32 %588, 24                  ; <i32> [#uses=1]
+	%592 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
+	%593 = load i32* %592, align 4            ; <i32> [#uses=1]
+	%594 = lshr i32 %588, 16                  ; <i32> [#uses=1]
+	%595 = or i32 %594, 256                   ; <i32> [#uses=1]
+	%596 = and i32 %595, 511                  ; <i32> [#uses=1]
+	%597 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
+	%598 = load i32* %597, align 4            ; <i32> [#uses=1]
+	%599 = add i32 %598, %593                 ; <i32> [#uses=1]
+	%600 = lshr i32 %588, 8                   ; <i32> [#uses=1]
+	%601 = or i32 %600, 512                   ; <i32> [#uses=1]
+	%602 = and i32 %601, 767                  ; <i32> [#uses=1]
+	%603 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
+	%604 = load i32* %603, align 4            ; <i32> [#uses=1]
+	%605 = xor i32 %599, %604                 ; <i32> [#uses=1]
+	%606 = or i32 %588, 768                   ; <i32> [#uses=1]
+	%607 = and i32 %606, 1023                 ; <i32> [#uses=1]
+	%608 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
+	%609 = load i32* %608, align 4            ; <i32> [#uses=1]
+	%610 = add i32 %605, %609                 ; <i32> [#uses=1]
+	%611 = xor i32 %590, %564                 ; <i32> [#uses=1]
+	%612 = xor i32 %611, %610                 ; <i32> [#uses=5]
+	%613 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
+	%614 = load i32* %613, align 4            ; <i32> [#uses=1]
+	%615 = lshr i32 %612, 24                  ; <i32> [#uses=1]
+	%616 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
+	%617 = load i32* %616, align 4            ; <i32> [#uses=1]
+	%618 = lshr i32 %612, 16                  ; <i32> [#uses=1]
+	%619 = or i32 %618, 256                   ; <i32> [#uses=1]
+	%620 = and i32 %619, 511                  ; <i32> [#uses=1]
+	%621 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
+	%622 = load i32* %621, align 4            ; <i32> [#uses=1]
+	%623 = add i32 %622, %617                 ; <i32> [#uses=1]
+	%624 = lshr i32 %612, 8                   ; <i32> [#uses=1]
+	%625 = or i32 %624, 512                   ; <i32> [#uses=1]
+	%626 = and i32 %625, 767                  ; <i32> [#uses=1]
+	%627 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
+	%628 = load i32* %627, align 4            ; <i32> [#uses=1]
+	%629 = xor i32 %623, %628                 ; <i32> [#uses=1]
+	%630 = or i32 %612, 768                   ; <i32> [#uses=1]
+	%631 = and i32 %630, 1023                 ; <i32> [#uses=1]
+	%632 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
+	%633 = load i32* %632, align 4            ; <i32> [#uses=1]
+	%634 = add i32 %629, %633                 ; <i32> [#uses=1]
+	%635 = xor i32 %614, %588                 ; <i32> [#uses=1]
+	%636 = xor i32 %635, %634                 ; <i32> [#uses=5]
+	%637 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%638 = load i32* %637, align 4            ; <i32> [#uses=1]
+	%639 = lshr i32 %636, 24                  ; <i32> [#uses=1]
+	%640 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
+	%641 = load i32* %640, align 4            ; <i32> [#uses=1]
+	%642 = lshr i32 %636, 16                  ; <i32> [#uses=1]
+	%643 = or i32 %642, 256                   ; <i32> [#uses=1]
+	%644 = and i32 %643, 511                  ; <i32> [#uses=1]
+	%645 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
+	%646 = load i32* %645, align 4            ; <i32> [#uses=1]
+	%647 = add i32 %646, %641                 ; <i32> [#uses=1]
+	%648 = lshr i32 %636, 8                   ; <i32> [#uses=1]
+	%649 = or i32 %648, 512                   ; <i32> [#uses=1]
+	%650 = and i32 %649, 767                  ; <i32> [#uses=1]
+	%651 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
+	%652 = load i32* %651, align 4            ; <i32> [#uses=1]
+	%653 = xor i32 %647, %652                 ; <i32> [#uses=1]
+	%654 = or i32 %636, 768                   ; <i32> [#uses=1]
+	%655 = and i32 %654, 1023                 ; <i32> [#uses=1]
+	%656 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
+	%657 = load i32* %656, align 4            ; <i32> [#uses=1]
+	%658 = add i32 %653, %657                 ; <i32> [#uses=1]
+	%659 = xor i32 %638, %612                 ; <i32> [#uses=1]
+	%660 = xor i32 %659, %658                 ; <i32> [#uses=5]
+	%661 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%662 = lshr i32 %660, 24                  ; <i32> [#uses=1]
+	%663 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
+	%664 = load i32* %663, align 4            ; <i32> [#uses=1]
+	%665 = lshr i32 %660, 16                  ; <i32> [#uses=1]
+	%666 = or i32 %665, 256                   ; <i32> [#uses=1]
+	%667 = and i32 %666, 511                  ; <i32> [#uses=1]
+	%668 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
+	%669 = load i32* %668, align 4            ; <i32> [#uses=1]
+	%670 = add i32 %669, %664                 ; <i32> [#uses=1]
+	%671 = lshr i32 %660, 8                   ; <i32> [#uses=1]
+	%672 = or i32 %671, 512                   ; <i32> [#uses=1]
+	%673 = and i32 %672, 767                  ; <i32> [#uses=1]
+	%674 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
+	%675 = load i32* %674, align 4            ; <i32> [#uses=1]
+	%676 = xor i32 %670, %675                 ; <i32> [#uses=1]
+	%677 = or i32 %660, 768                   ; <i32> [#uses=1]
+	%678 = and i32 %677, 1023                 ; <i32> [#uses=1]
+	%679 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
+	%680 = load i32* %679, align 4            ; <i32> [#uses=1]
+	%681 = add i32 %676, %680                 ; <i32> [#uses=1]
+	%682 = xor i32 %661, %636                 ; <i32> [#uses=1]
+	%683 = xor i32 %682, %681                 ; <i32> [#uses=5]
+	%684 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
+	br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+	%.pn2.in = phi i32* [ %329, %bb ], [ %0, %bb1 ]; <i32*> [#uses=1]
+	%.pn3 = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn15.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn13.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn10.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn4.in = phi i32* [ null, %bb ], [ %684, %bb1 ]; <i32*> [#uses=1]
+	%.pn5 = phi i32 [ 0, %bb ], [ %660, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in = lshr i32 %.pn14.in.in.in, 16; <i32> [#uses=1]
+	%.pn14.in = or i32 %.pn14.in.in, 256      ; <i32> [#uses=1]
+	%.pn13.in.in = lshr i32 %.pn13.in.in.in, 8; <i32> [#uses=1]
+	%.pn15 = lshr i32 %.pn15.in, 24           ; <i32> [#uses=1]
+	%.pn14 = and i32 %.pn14.in, 511           ; <i32> [#uses=1]
+	%.pn13.in = or i32 %.pn13.in.in, 512      ; <i32> [#uses=1]
+	%.pn11.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
+	%.pn12.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
+	%.pn13 = and i32 %.pn13.in, 767           ; <i32> [#uses=1]
+	%.pn10.in = or i32 %.pn10.in.in, 768      ; <i32> [#uses=1]
+	%.pn11 = load i32* %.pn11.in              ; <i32> [#uses=1]
+	%.pn12 = load i32* %.pn12.in              ; <i32> [#uses=1]
+	%.pn9.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
+	%.pn10 = and i32 %.pn10.in, 1023          ; <i32> [#uses=1]
+	%.pn8 = add i32 %.pn12, %.pn11            ; <i32> [#uses=1]
+	%.pn9 = load i32* %.pn9.in                ; <i32> [#uses=1]
+	%.pn7.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
+	%.pn6 = xor i32 %.pn8, %.pn9              ; <i32> [#uses=1]
+	%.pn7 = load i32* %.pn7.in                ; <i32> [#uses=1]
+	%.pn4 = load i32* %.pn4.in                ; <i32> [#uses=1]
+	%.pn2 = load i32* %.pn2.in                ; <i32> [#uses=1]
+	%.pn = add i32 %.pn6, %.pn7               ; <i32> [#uses=1]
+	%r.0 = xor i32 %.pn2, %.pn3               ; <i32> [#uses=1]
+	%.pn1 = xor i32 %.pn, %.pn5               ; <i32> [#uses=1]
+	%l.0 = xor i32 %.pn1, %.pn4               ; <i32> [#uses=1]
+	store i32 %l.0, i32* undef, align 4
+	store i32 %r.0, i32* %data, align 4
+	ret void
+}
diff --git a/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
new file mode 100644
index 000000000000..b6e67b1bee00
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+@.str16 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+
+declare arm_apcscc i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+
+declare arm_apcscc i8* @__strcat_chk(i8*, i8*, i32) nounwind
+
+define arm_apcscc i8* @vorbis_comment_query(%struct.vorbis_comment* nocapture %vc, i8* %tag, i32 %count) nounwind {
+entry:
+	%0 = alloca i8, i32 undef, align 4        ; <i8*> [#uses=2]
+	%1 = call arm_apcscc  i8* @__strcpy_chk(i8* %0, i8* %tag, i32 -1) nounwind; <i8*> [#uses=0]
+	%2 = call arm_apcscc  i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
+	%3 = getelementptr %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
+	br label %bb11
+
+bb6:                                              ; preds = %bb11
+	%4 = load i8*** %3, align 4               ; <i8**> [#uses=1]
+	%scevgep = getelementptr i8** %4, i32 %8  ; <i8**> [#uses=1]
+	%5 = load i8** %scevgep, align 4          ; <i8*> [#uses=1]
+	br label %bb3.i
+
+bb3.i:                                            ; preds = %bb3.i, %bb6
+	%scevgep7.i = getelementptr i8* %5, i32 0 ; <i8*> [#uses=1]
+	%6 = load i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
+	br i1 undef, label %bb3.i, label %bb10
+
+bb10:                                             ; preds = %bb3.i
+	%7 = add i32 %8, 1                        ; <i32> [#uses=1]
+	br label %bb11
+
+bb11:                                             ; preds = %bb10, %entry
+	%8 = phi i32 [ %7, %bb10 ], [ 0, %entry ] ; <i32> [#uses=3]
+	%9 = icmp sgt i32 undef, %8               ; <i1> [#uses=1]
+	br i1 %9, label %bb6, label %bb13
+
+bb13:                                             ; preds = %bb11
+	ret i8* null
+}
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
new file mode 100644
index 000000000000..1627f61b39a7
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s
+; rdar://7157006
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.asl_file_t = type { i32, i32, i32, %struct.file_string_t*, i64, i64, i64, i64, i64, i64, i32, %struct.FILE*, i8*, i8* }
+%struct.file_string_t = type { i64, i32, %struct.file_string_t*, [0 x i8] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.asl_file_t*, i64, i64*)* @t to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
+; CHECK: t:
+; CHECK: adds r4, #8
+entry:
+  %val = alloca i64, align 4                      ; <i64*> [#uses=3]
+  %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
+  br i1 %0, label %bb13, label %bb1
+
+bb1:                                              ; preds = %entry
+  %1 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
+  %2 = load %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
+  %3 = icmp eq %struct.FILE* %2, null             ; <i1> [#uses=1]
+  br i1 %3, label %bb13, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %4 = add nsw i64 %off, 8                        ; <i64> [#uses=1]
+  %5 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
+  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %7 = zext i32 %6 to i64                         ; <i64> [#uses=1]
+  %8 = icmp sgt i64 %4, %7                        ; <i1> [#uses=1]
+  br i1 %8, label %bb13, label %bb5
+
+bb5:                                              ; preds = %bb3
+  %9 = call arm_apcscc  i32 @fseeko(%struct.FILE* %2, i64 %off, i32 0) nounwind ; <i32> [#uses=1]
+  %10 = icmp eq i32 %9, 0                         ; <i1> [#uses=1]
+  br i1 %10, label %bb7, label %bb13
+
+bb7:                                              ; preds = %bb5
+  store i64 0, i64* %val, align 4
+  %11 = load %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
+  %val8 = bitcast i64* %val to i8*                ; <i8*> [#uses=1]
+  %12 = call arm_apcscc  i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
+  %13 = icmp eq i32 %12, 1                        ; <i1> [#uses=1]
+  br i1 %13, label %bb10, label %bb13
+
+bb10:                                             ; preds = %bb7
+  %14 = icmp eq i64* %out, null                   ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb11
+
+bb11:                                             ; preds = %bb10
+  %15 = load i64* %val, align 4                   ; <i64> [#uses=1]
+  %16 = call arm_apcscc  i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
+  store i64 %16, i64* %out, align 4
+  ret i32 0
+
+bb13:                                             ; preds = %bb10, %bb7, %bb5, %bb3, %bb1, %entry
+  %.0 = phi i32 [ 2, %entry ], [ 2, %bb1 ], [ 7, %bb3 ], [ 7, %bb5 ], [ 7, %bb7 ], [ 0, %bb10 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare arm_apcscc i32 @fseeko(%struct.FILE* nocapture, i64, i32) nounwind
+
+declare arm_apcscc i32 @fread(i8* noalias nocapture, i32, i32, %struct.FILE* noalias nocapture) nounwind
+
+declare arm_apcscc i64 @asl_core_ntohq(i64)
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
new file mode 100644
index 000000000000..1e3c070a8751
--- /dev/null
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -0,0 +1,288 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.adpcm_state = type { i16, i8 }
+@stepsizeTable = internal constant [89 x i32] [i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 17, i32 19, i32 21, i32 23, i32 25, i32 28, i32 31, i32 34, i32 37, i32 41, i32 45, i32 50, i32 55, i32 60, i32 66, i32 73, i32 80, i32 88, i32 97, i32 107, i32 118, i32 130, i32 143, i32 157, i32 173, i32 190, i32 209, i32 230, i32 253, i32 279, i32 307, i32 337, i32 371, i32 408, i32 449, i32 494, i32 544, i32 598, i32 658, i32 724, i32 796, i32 876, i32 963, i32 1060, i32 1166, i32 1282, i32 1411, i32 1552, i32 1707, i32 1878, i32 2066, i32 2272, i32 2499, i32 2749, i32 3024, i32 3327, i32 3660, i32 4026, i32 4428, i32 4871, i32 5358, i32 5894, i32 6484, i32 7132, i32 7845, i32 8630, i32 9493, i32 10442, i32 11487, i32 12635, i32 13899, i32 15289, i32 16818, i32 18500, i32 20350, i32 22385, i32 24623, i32 27086, i32 29794, i32 32767]		; <[89 x i32]*> [#uses=4]
+@indexTable = internal constant [16 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8, i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8]		; <[16 x i32]*> [#uses=2]
+@abuf = common global [500 x i8] zeroinitializer		; <[500 x i8]*> [#uses=1]
+@.str = private constant [11 x i8] c"input file\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[11 x i8]*> [#uses=1]
+@sbuf = common global [1000 x i16] zeroinitializer		; <[1000 x i16]*> [#uses=1]
+@state = common global %struct.adpcm_state zeroinitializer		; <%struct.adpcm_state*> [#uses=3]
+@__stderrp = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@.str1 = private constant [28 x i8] c"Final valprev=%d, index=%d\0A\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[28 x i8]*> [#uses=1]
+
+define arm_apcscc void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb27
+
+bb:		; preds = %bb25, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
+	%outp.136 = phi i8* [ %outdata, %entry ], [ %outp.0, %bb25 ]		; <i8*> [#uses=3]
+	%bufferstep.035 = phi i32 [ 1, %entry ], [ %tmp, %bb25 ]		; <i32> [#uses=3]
+	%outputbuffer.134 = phi i32 [ undef, %entry ], [ %outputbuffer.0, %bb25 ]		; <i32> [#uses=2]
+	%index.033 = phi i32 [ %5, %entry ], [ %index.2, %bb25 ]		; <i32> [#uses=1]
+	%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ]		; <i32> [#uses=2]
+	%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ]		; <i32> [#uses=5]
+	%inp.038 = getelementptr i16* %indata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = load i16* %inp.038, align 2		; <i16> [#uses=1]
+	%10 = sext i16 %9 to i32		; <i32> [#uses=1]
+	%11 = sub i32 %10, %valpred.132		; <i32> [#uses=3]
+	%12 = icmp slt i32 %11, 0		; <i1> [#uses=1]
+	%iftmp.1.0 = select i1 %12, i32 8, i32 0		; <i32> [#uses=2]
+	%13 = sub i32 0, %11		; <i32> [#uses=1]
+	%14 = icmp eq i32 %iftmp.1.0, 0		; <i1> [#uses=2]
+	%. = select i1 %14, i32 %11, i32 %13		; <i32> [#uses=2]
+	%15 = ashr i32 %step.031, 3		; <i32> [#uses=1]
+	%16 = icmp slt i32 %., %step.031		; <i1> [#uses=2]
+	%delta.0 = select i1 %16, i32 0, i32 4		; <i32> [#uses=2]
+	%17 = select i1 %16, i32 0, i32 %step.031		; <i32> [#uses=2]
+	%diff.1 = sub i32 %., %17		; <i32> [#uses=2]
+	%18 = ashr i32 %step.031, 1		; <i32> [#uses=2]
+	%19 = icmp slt i32 %diff.1, %18		; <i1> [#uses=2]
+	%20 = or i32 %delta.0, 2		; <i32> [#uses=1]
+	%21 = select i1 %19, i32 %delta.0, i32 %20		; <i32> [#uses=1]
+	%22 = select i1 %19, i32 0, i32 %18		; <i32> [#uses=2]
+	%diff.2 = sub i32 %diff.1, %22		; <i32> [#uses=1]
+	%23 = ashr i32 %step.031, 2		; <i32> [#uses=2]
+	%24 = icmp slt i32 %diff.2, %23		; <i1> [#uses=2]
+	%25 = zext i1 %24 to i32		; <i32> [#uses=1]
+	%26 = select i1 %24, i32 0, i32 %23		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %17, %15		; <i32> [#uses=1]
+	%vpdiff.1 = add i32 %vpdiff.0, %22		; <i32> [#uses=1]
+	%vpdiff.2 = add i32 %vpdiff.1, %26		; <i32> [#uses=2]
+	%tmp30 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %14, i32 %vpdiff.2, i32 %tmp30		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.132		; <i32> [#uses=3]
+	%27 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %27, label %bb18, label %bb16
+
+bb16:		; preds = %bb
+	%28 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %28, label %bb17, label %bb18
+
+bb17:		; preds = %bb16
+	br label %bb18
+
+bb18:		; preds = %bb17, %bb16, %bb
+	%valpred.2 = phi i32 [ -32768, %bb17 ], [ 32767, %bb ], [ %valpred.0, %bb16 ]		; <i32> [#uses=2]
+	%delta.1 = or i32 %21, %iftmp.1.0		; <i32> [#uses=1]
+	%delta.2 = or i32 %delta.1, %25		; <i32> [#uses=1]
+	%29 = xor i32 %delta.2, 1		; <i32> [#uses=3]
+	%30 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %29		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = add i32 %31, %index.033		; <i32> [#uses=2]
+	%33 = icmp slt i32 %32, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %33, i32 0, i32 %32		; <i32> [#uses=2]
+	%34 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %34, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%35 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=1]
+	%37 = icmp eq i32 %bufferstep.035, 0		; <i1> [#uses=1]
+	br i1 %37, label %bb24, label %bb23
+
+bb23:		; preds = %bb18
+	%38 = shl i32 %29, 4		; <i32> [#uses=1]
+	%39 = and i32 %38, 240		; <i32> [#uses=1]
+	br label %bb25
+
+bb24:		; preds = %bb18
+	%40 = trunc i32 %29 to i8		; <i8> [#uses=1]
+	%41 = and i8 %40, 15		; <i8> [#uses=1]
+	%42 = trunc i32 %outputbuffer.134 to i8		; <i8> [#uses=1]
+	%43 = or i8 %41, %42		; <i8> [#uses=1]
+	store i8 %43, i8* %outp.136, align 1
+	%44 = getelementptr i8* %outp.136, i32 1		; <i8*> [#uses=1]
+	br label %bb25
+
+bb25:		; preds = %bb24, %bb23
+	%outputbuffer.0 = phi i32 [ %39, %bb23 ], [ %outputbuffer.134, %bb24 ]		; <i32> [#uses=2]
+	%outp.0 = phi i8* [ %outp.136, %bb23 ], [ %44, %bb24 ]		; <i8*> [#uses=2]
+	%tmp = xor i32 %bufferstep.035, 1		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb26.bb27_crit_edge, label %bb
+
+bb26.bb27_crit_edge:		; preds = %bb25
+	%phitmp44 = icmp eq i32 %bufferstep.035, 1		; <i1> [#uses=1]
+	br label %bb27
+
+bb27:		; preds = %bb26.bb27_crit_edge, %entry
+	%outp.1.lcssa = phi i8* [ %outp.0, %bb26.bb27_crit_edge ], [ %outdata, %entry ]		; <i8*> [#uses=1]
+	%bufferstep.0.lcssa = phi i1 [ %phitmp44, %bb26.bb27_crit_edge ], [ false, %entry ]		; <i1> [#uses=1]
+	%outputbuffer.1.lcssa = phi i32 [ %outputbuffer.0, %bb26.bb27_crit_edge ], [ undef, %entry ]		; <i32> [#uses=1]
+	%index.0.lcssa = phi i32 [ %index.2, %bb26.bb27_crit_edge ], [ %5, %entry ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %valpred.2, %bb26.bb27_crit_edge ], [ %2, %entry ]		; <i32> [#uses=1]
+	br i1 %bufferstep.0.lcssa, label %bb28, label %bb29
+
+bb28:		; preds = %bb27
+	%45 = trunc i32 %outputbuffer.1.lcssa to i8		; <i8> [#uses=1]
+	store i8 %45, i8* %outp.1.lcssa, align 1
+	br label %bb29
+
+bb29:		; preds = %bb28, %bb27
+	%46 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %46, i16* %0, align 2
+	%47 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %47, i8* %3, align 2
+	ret void
+}
+
+define arm_apcscc void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb22
+
+bb:		; preds = %bb20, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb20 ]		; <i32> [#uses=2]
+	%inp.131 = phi i8* [ %indata, %entry ], [ %inp.0, %bb20 ]		; <i8*> [#uses=3]
+	%bufferstep.028 = phi i32 [ 0, %entry ], [ %tmp, %bb20 ]		; <i32> [#uses=2]
+	%inputbuffer.127 = phi i32 [ undef, %entry ], [ %inputbuffer.0, %bb20 ]		; <i32> [#uses=2]
+	%index.026 = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.125 = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%step.024 = phi i32 [ %7, %entry ], [ %35, %bb20 ]		; <i32> [#uses=4]
+	%outp.030 = getelementptr i16* %outdata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = icmp eq i32 %bufferstep.028, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2, label %bb3
+
+bb2:		; preds = %bb
+	%10 = load i8* %inp.131, align 1		; <i8> [#uses=1]
+	%11 = sext i8 %10 to i32		; <i32> [#uses=2]
+	%12 = getelementptr i8* %inp.131, i32 1		; <i8*> [#uses=1]
+	%13 = ashr i32 %11, 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb
+	%inputbuffer.0 = phi i32 [ %11, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=1]
+	%delta.0.in = phi i32 [ %13, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=5]
+	%inp.0 = phi i8* [ %12, %bb2 ], [ %inp.131, %bb ]		; <i8*> [#uses=1]
+	%delta.0 = and i32 %delta.0.in, 15		; <i32> [#uses=1]
+	%tmp = xor i32 %bufferstep.028, 1		; <i32> [#uses=1]
+	%14 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %delta.0		; <i32*> [#uses=1]
+	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%16 = add i32 %15, %index.026		; <i32> [#uses=2]
+	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %17, i32 0, i32 %16		; <i32> [#uses=2]
+	%18 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %18, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%19 = and i32 %delta.0.in, 8		; <i32> [#uses=1]
+	%20 = ashr i32 %step.024, 3		; <i32> [#uses=1]
+	%21 = and i32 %delta.0.in, 4		; <i32> [#uses=1]
+	%22 = icmp eq i32 %21, 0		; <i1> [#uses=1]
+	%23 = select i1 %22, i32 0, i32 %step.024		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %23, %20		; <i32> [#uses=2]
+	%24 = and i32 %delta.0.in, 2		; <i32> [#uses=1]
+	%25 = icmp eq i32 %24, 0		; <i1> [#uses=1]
+	br i1 %25, label %bb11, label %bb10
+
+bb10:		; preds = %bb3
+	%26 = ashr i32 %step.024, 1		; <i32> [#uses=1]
+	%27 = add i32 %vpdiff.0, %26		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb3
+	%vpdiff.1 = phi i32 [ %27, %bb10 ], [ %vpdiff.0, %bb3 ]		; <i32> [#uses=2]
+	%28 = and i32 %delta.0.in, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %28, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb13, label %bb12
+
+bb12:		; preds = %bb11
+	%29 = ashr i32 %step.024, 2		; <i32> [#uses=1]
+	%30 = add i32 %vpdiff.1, %29		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %bb11
+	%vpdiff.2 = phi i32 [ %30, %bb12 ], [ %vpdiff.1, %bb11 ]		; <i32> [#uses=2]
+	%31 = icmp eq i32 %19, 0		; <i1> [#uses=1]
+	%tmp23 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %31, i32 %vpdiff.2, i32 %tmp23		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.125		; <i32> [#uses=3]
+	%32 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %32, label %bb20, label %bb18
+
+bb18:		; preds = %bb13
+	%33 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %33, label %bb19, label %bb20
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb13
+	%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ]		; <i32> [#uses=3]
+	%34 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%35 = load i32* %34, align 4		; <i32> [#uses=1]
+	%36 = trunc i32 %valpred.2 to i16		; <i16> [#uses=1]
+	store i16 %36, i16* %outp.030, align 2
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb22, label %bb
+
+bb22:		; preds = %bb20, %entry
+	%index.0.lcssa = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%37 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %37, i16* %0, align 2
+	%38 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %38, i8* %3, align 2
+	ret void
+}
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%0 = tail call arm_apcscc  i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
+	%1 = icmp slt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	tail call arm_apcscc  void @perror(i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0)) nounwind
+	ret i32 1
+
+bb2:		; preds = %bb
+	%2 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb4, label %bb3
+
+bb3:		; preds = %bb2
+	%3 = shl i32 %0, 1		; <i32> [#uses=1]
+	tail call arm_apcscc  void @adpcm_decoder(i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
+	%4 = shl i32 %0, 2		; <i32> [#uses=1]
+	%5 = tail call arm_apcscc  i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
+	br label %bb
+
+bb4:		; preds = %bb2
+	%6 = load %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
+	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = tail call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare arm_apcscc i32 @read(...)
+
+declare arm_apcscc void @perror(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @write(...)
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index cd76250bf0a7..acfdc917ddf0 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb | not grep {ldr sp}
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin | \
+; RUN: llc < %s -march=thumb | not grep {ldr sp}
+; RUN: llc < %s -mtriple=thumb-apple-darwin | \
 ; RUN:   not grep {sub.*r7}
-; RUN: llvm-as < %s | llc -march=thumb | grep 4294967280
+; RUN: llc < %s -march=thumb | grep 4294967280
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/Thumb/fpconv.ll b/test/CodeGen/Thumb/fpconv.ll
index 2003131fbb73..7da36ddf58ed 100644
--- a/test/CodeGen/Thumb/fpconv.ll
+++ b/test/CodeGen/Thumb/fpconv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 define float @f1(double %x) {
 entry:
diff --git a/test/CodeGen/Thumb/fpow.ll b/test/CodeGen/Thumb/fpow.ll
index e5b92ad94ef8..be3dc0b3c1f8 100644
--- a/test/CodeGen/Thumb/fpow.ll
+++ b/test/CodeGen/Thumb/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll
index 270e331cb52f..0cac7554be03 100644
--- a/test/CodeGen/Thumb/frame_thumb.ll
+++ b/test/CodeGen/Thumb/frame_thumb.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin \
+; RUN: llc < %s -mtriple=thumb-apple-darwin \
 ; RUN:     -disable-fp-elim | not grep {r11}
-; RUN: llvm-as < %s | llc -mtriple=thumb-linux-gnueabi \
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi \
 ; RUN:     -disable-fp-elim | not grep {r11}
 
 define i32 @f() {
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index 13084f6870ee..d7cdcd8149af 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -stats |& \
+; RUN: llc < %s -march=thumb -stats |& \
 ; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as:
diff --git a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
index 2c872e7e310f..5c8a52af59e4 100644
--- a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 ; Test Thumb-mode "I" constraint, for ADD immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/Thumb/ispositive.ll b/test/CodeGen/Thumb/ispositive.ll
index 91f5970ae9cb..eac3ef28377b 100644
--- a/test/CodeGen/Thumb/ispositive.ll
+++ b/test/CodeGen/Thumb/ispositive.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep {lsr r0, r0, #31}
+; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @test1(i32 %X) {
 entry:
+; CHECK: test1:
+; CHECK: lsrs r0, r0, #31
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index f7c9ed07009f..02de36af1cc7 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep {ldr.*LCP} | count 5
+; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll
index 4b2a7b201b55..9a28124b84ce 100644
--- a/test/CodeGen/Thumb/ldr_ext.ll
+++ b/test/CodeGen/Thumb/ldr_ext.ll
@@ -1,27 +1,56 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s -check-prefix=V6
 
-define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+; rdar://7176514
+
+define i32 @test1(i8* %t1) nounwind {
+; V5: ldrb
+
+; V6: ldrb
+    %tmp.u = load i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+define i32 @test2(i16* %t1) nounwind {
+; V5: ldrh
+
+; V6: ldrh
+    %tmp.u = load i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+define i32 @test3(i8* %t0) nounwind {
+; V5: ldrb
+; V5: lsls
+; V5: asrs
+
+; V6: ldrb
+; V6: sxtb
+    %tmp.s = load i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test4() {
+define i32 @test4(i16* %t0) nounwind {
+; V5: ldrh
+; V5: lsls
+; V5: asrs
+
+; V6: ldrh
+; V6: sxth
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; V5: movs r0, #0
+; V5: ldrsh
+
+; V6: movs r0, #0
+; V6: ldrsh
     %tmp.s = load i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
index 0043fb502a32..81782cda4a90 100644
--- a/test/CodeGen/Thumb/ldr_frame.ll
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep cpy | count 2
+; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @f1() {
+; CHECK: f1:
+; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
 	%tmp1 = load i32* %tmp
@@ -8,6 +10,9 @@ define i32 @f1() {
 }
 
 define i32 @f2() {
+; CHECK: f2:
+; CHECK: mov r0
+; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
 	%tmp1 = load i8* %tmp
@@ -16,6 +21,8 @@ define i32 @f2() {
 }
 
 define i32 @f3() {
+; CHECK: f3:
+; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
 	%tmp1 = load i32* %tmp
@@ -23,6 +30,9 @@ define i32 @f3() {
 }
 
 define i32 @f4() {
+; CHECK: f4:
+; CHECK: mov r0
+; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
 	%tmp1 = load i8* %tmp
diff --git a/test/CodeGen/Thumb/long-setcc.ll b/test/CodeGen/Thumb/long-setcc.ll
index df6d137a088f..8f2d98fc43c9 100644
--- a/test/CodeGen/Thumb/long-setcc.ll
+++ b/test/CodeGen/Thumb/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep cmp | count 1
+; RUN: llc < %s -march=thumb | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index 22874437eb02..e3ef44a87586 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep mvn | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep adc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __muldi3
+; RUN: llc < %s -march=thumb | grep __muldi3
 
 define i64 @f1() {
 entry:
diff --git a/test/CodeGen/Thumb/long_shift.ll b/test/CodeGen/Thumb/long_shift.ll
new file mode 100644
index 000000000000..24317141fca6
--- /dev/null
+++ b/test/CodeGen/Thumb/long_shift.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb
+
+define i64 @f0(i64 %A, i64 %B) {
+        %tmp = bitcast i64 %A to i64
+        %tmp2 = lshr i64 %B, 1
+        %tmp3 = sub i64 %tmp, %tmp2
+        ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+        %a = shl i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+        %a = ashr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+        %a = lshr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
diff --git a/test/CodeGen/Thumb/mul.ll b/test/CodeGen/Thumb/mul.ll
new file mode 100644
index 000000000000..c1a2fb29477d
--- /dev/null
+++ b/test/CodeGen/Thumb/mul.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb | grep mul | count 3
+; RUN: llc < %s -march=thumb | grep lsl | count 1
+
+define i32 @f1(i32 %u) {
+    %tmp = mul i32 %u, %u
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+    %tmp = mul i32 %u, %v
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+    %tmp = mul i32 %u, 5
+    ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+    %tmp = mul i32 %u, 4
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
new file mode 100644
index 000000000000..c5e86ad45bc3
--- /dev/null
+++ b/test/CodeGen/Thumb/pop.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; rdar://7268481
+
+define arm_apcscc void @t(i8* %a, ...) nounwind {
+; CHECK:      t:
+; CHECK:      pop {r3}
+; CHECK-NEXT: add sp, #3 * 4
+; CHECK-NEXT: bx r3
+entry:
+  %a.addr = alloca i8*
+  store i8* %a, i8** %a.addr
+  ret void
+}
diff --git a/test/CodeGen/Thumb/push.ll b/test/CodeGen/Thumb/push.ll
new file mode 100644
index 000000000000..63773c4f6c9f
--- /dev/null
+++ b/test/CodeGen/Thumb/push.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-fp-elim | FileCheck %s
+; rdar://7268481
+
+define arm_apcscc void @t() nounwind {
+; CHECK:       t:
+; CHECK-NEXT : push {r7}
+entry:
+  call void asm sideeffect ".long 0xe7ffdefe", ""() nounwind
+  ret void
+}
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index ae75549d723e..7a183b0f9e26 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep blt | count 3
-; RUN: llvm-as < %s | llc -march=thumb | grep ble | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bls | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bhi | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __ltdf2
+; RUN: llc < %s -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bgt | count 1
+; RUN: llc < %s -march=thumb | grep blt | count 3
+; RUN: llc < %s -march=thumb | grep ble | count 1
+; RUN: llc < %s -march=thumb | grep bls | count 1
+; RUN: llc < %s -march=thumb | grep bhi | count 1
+; RUN: llc < %s -march=thumb | grep __ltdf2
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/Thumb/stack-frame.ll b/test/CodeGen/Thumb/stack-frame.ll
index 756d257c2ae9..b103b331b797 100644
--- a/test/CodeGen/Thumb/stack-frame.ll
+++ b/test/CodeGen/Thumb/stack-frame.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -march=thumb | grep add | count 1
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -march=thumb | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/Thumb/thumb-imm.ll b/test/CodeGen/Thumb/thumb-imm.ll
index 2be393a95cac..74a57ff271be 100644
--- a/test/CodeGen/Thumb/thumb-imm.ll
+++ b/test/CodeGen/Thumb/thumb-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | not grep CPI
+; RUN: llc < %s -march=thumb | not grep CPI
 
 
 define i32 @test1() {
diff --git a/test/CodeGen/Thumb/tst_teq.ll b/test/CodeGen/Thumb/tst_teq.ll
index 0456951e1050..21ada3ed83a0 100644
--- a/test/CodeGen/Thumb/tst_teq.ll
+++ b/test/CodeGen/Thumb/tst_teq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep tst
+; RUN: llc < %s -march=thumb | grep tst
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/Thumb/unord.ll b/test/CodeGen/Thumb/unord.ll
index 4202d269c0e7..39458ae7b7bc 100644
--- a/test/CodeGen/Thumb/unord.ll
+++ b/test/CodeGen/Thumb/unord.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep bne | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bne | count 1
+; RUN: llc < %s -march=thumb | grep beq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index a18010f2fadd..16a9c4442d8a 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=thumb-linux | grep pop | count 1
-; RUN: llvm-as < %s | llc -mtriple=thumb-darwin | grep pop | count 2
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1
+; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2
 
 @str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
new file mode 100644
index 000000000000..8f2283f74865
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv6t2-elf"
+	%struct.dwarf_cie = type <{ i32, i32, i8, [0 x i8], [3 x i8] }>
+
+declare arm_apcscc i8* @read_sleb128(i8*, i32* nocapture) nounwind
+
+define arm_apcscc i32 @get_cie_encoding(%struct.dwarf_cie* %cie) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb13
+
+bb1:		; preds = %entry
+	%tmp38 = add i32 undef, 10		; <i32> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb1
+	%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ]		; <i32> [#uses=3]
+	%tmp39 = add i32 %indvar.i, %tmp38		; <i32> [#uses=1]
+	%p_addr.0.i = getelementptr i8* undef, i32 %tmp39		; <i8*> [#uses=1]
+	%0 = load i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
+	%1 = icmp slt i8 %0, 0		; <i1> [#uses=1]
+	%2 = add i32 %indvar.i, 1		; <i32> [#uses=1]
+	br i1 %1, label %bb.i, label %read_uleb128.exit
+
+read_uleb128.exit:		; preds = %bb.i
+	%.sum40 = add i32 %indvar.i, undef		; <i32> [#uses=1]
+	%.sum31 = add i32 %.sum40, 2		; <i32> [#uses=1]
+	%scevgep.i = getelementptr %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31		; <i8*> [#uses=1]
+	%3 = call arm_apcscc  i8* @read_sleb128(i8* %scevgep.i, i32* undef)		; <i8*> [#uses=0]
+	unreachable
+
+bb13:		; preds = %entry
+	ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
new file mode 100644
index 000000000000..ec649c37bbe7
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; rdar://7076238
+
+@"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
+
+define arm_apcscc i32 @t(i32, ...) nounwind {
+entry:
+; CHECK: t:
+; CHECK: add r7, sp, #3 * 4
+	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
+	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
+	%3 = getelementptr i8* %1, i32 8		; <i8*> [#uses=1]
+	%4 = bitcast i8* %2 to i32*		; <i32*> [#uses=1]
+	%5 = load i32* %4, align 4		; <i32> [#uses=1]
+	%6 = trunc i32 %5 to i8		; <i8> [#uses=1]
+	%7 = getelementptr i8* %1, i32 12		; <i8*> [#uses=1]
+	%8 = bitcast i8* %3 to i32*		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i16		; <i16> [#uses=1]
+	%11 = bitcast i8* %7 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%13 = trunc i32 %12 to i16		; <i16> [#uses=1]
+	%14 = load i32* undef, align 4		; <i32> [#uses=2]
+	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
+	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
+	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
+	%18 = call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
+	%19 = add i32 0, %15		; <i32> [#uses=1]
+	%20 = add i32 %19, %16		; <i32> [#uses=1]
+	%21 = add i32 %20, %14		; <i32> [#uses=1]
+	%22 = add i32 %21, %17		; <i32> [#uses=1]
+	%23 = add i32 %22, 0		; <i32> [#uses=1]
+	ret i32 %23
+}
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll b/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
new file mode 100644
index 000000000000..4d21f9ba6302
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2
+; rdar://7083961
+
+define arm_apcscc i32 @value(i64 %b1, i64 %b2) nounwind readonly {
+entry:
+	%0 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	%mod.0.ph.ph = select i1 %0, float -1.000000e+00, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	br i1 undef, label %bb86.preheader, label %bb7
+
+bb86.preheader:		; preds = %bb7
+	%1 = fmul float %mod.0.ph.ph, 5.000000e+00		; <float> [#uses=0]
+	br label %bb79
+
+bb79:		; preds = %bb79, %bb86.preheader
+	br i1 undef, label %bb119, label %bb79
+
+bb119:		; preds = %bb79
+	ret i32 undef
+}
diff --git a/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
new file mode 100644
index 000000000000..f74d12ed2787
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	%workspace = alloca [64 x float], align 4		; <[64 x float]*> [#uses=11]
+	%0 = load i8** undef, align 4		; <i8*> [#uses=5]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=11]
+	%tmp39 = add i32 %indvar, 8		; <i32> [#uses=0]
+	%tmp41 = add i32 %indvar, 16		; <i32> [#uses=2]
+	%scevgep42 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp41		; <float*> [#uses=1]
+	%tmp43 = add i32 %indvar, 24		; <i32> [#uses=1]
+	%scevgep44 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp43		; <float*> [#uses=1]
+	%tmp45 = add i32 %indvar, 32		; <i32> [#uses=1]
+	%scevgep46 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp45		; <float*> [#uses=1]
+	%tmp47 = add i32 %indvar, 40		; <i32> [#uses=1]
+	%scevgep48 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp47		; <float*> [#uses=1]
+	%tmp49 = add i32 %indvar, 48		; <i32> [#uses=1]
+	%scevgep50 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp49		; <float*> [#uses=1]
+	%tmp51 = add i32 %indvar, 56		; <i32> [#uses=1]
+	%scevgep52 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp51		; <float*> [#uses=1]
+	%wsptr.119 = getelementptr [64 x float]* %workspace, i32 0, i32 %indvar		; <float*> [#uses=1]
+	%tmp54 = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%scevgep76 = getelementptr i8* undef, i32 %tmp54		; <i8*> [#uses=1]
+	%quantptr.118 = bitcast i8* %scevgep76 to float*		; <float*> [#uses=1]
+	%scevgep79 = getelementptr i16* %coef_block, i32 %tmp41		; <i16*> [#uses=0]
+	%inptr.117 = getelementptr i16* %coef_block, i32 %indvar		; <i16*> [#uses=1]
+	%1 = load i16* null, align 2		; <i16> [#uses=1]
+	%2 = load i16* undef, align 2		; <i16> [#uses=1]
+	%3 = load i16* %inptr.117, align 2		; <i16> [#uses=1]
+	%4 = sitofp i16 %3 to float		; <float> [#uses=1]
+	%5 = load float* %quantptr.118, align 4		; <float> [#uses=1]
+	%6 = fmul float %4, %5		; <float> [#uses=1]
+	%7 = fsub float %6, undef		; <float> [#uses=2]
+	%8 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%9 = fsub float %8, 0.000000e+00		; <float> [#uses=2]
+	%10 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%11 = fadd float %7, %9		; <float> [#uses=2]
+	%12 = fsub float %7, %9		; <float> [#uses=2]
+	%13 = sitofp i16 %1 to float		; <float> [#uses=1]
+	%14 = fmul float %13, undef		; <float> [#uses=2]
+	%15 = sitofp i16 %2 to float		; <float> [#uses=1]
+	%16 = load float* undef, align 4		; <float> [#uses=1]
+	%17 = fmul float %15, %16		; <float> [#uses=1]
+	%18 = fadd float %14, undef		; <float> [#uses=2]
+	%19 = fsub float %14, undef		; <float> [#uses=2]
+	%20 = fadd float undef, %17		; <float> [#uses=2]
+	%21 = fadd float %20, %18		; <float> [#uses=3]
+	%22 = fsub float %20, %18		; <float> [#uses=1]
+	%23 = fmul float %22, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%24 = fadd float %19, undef		; <float> [#uses=1]
+	%25 = fmul float %24, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%26 = fmul float undef, 0x3FF1517A80000000		; <float> [#uses=1]
+	%27 = fsub float %26, %25		; <float> [#uses=1]
+	%28 = fmul float %19, 0xC004E7AEA0000000		; <float> [#uses=1]
+	%29 = fadd float %28, %25		; <float> [#uses=1]
+	%30 = fsub float %29, %21		; <float> [#uses=3]
+	%31 = fsub float %23, %30		; <float> [#uses=3]
+	%32 = fadd float %27, %31		; <float> [#uses=1]
+	%33 = fadd float %10, %21		; <float> [#uses=1]
+	store float %33, float* %wsptr.119, align 4
+	%34 = fsub float %10, %21		; <float> [#uses=1]
+	store float %34, float* %scevgep52, align 4
+	%35 = fadd float %11, %30		; <float> [#uses=1]
+	store float %35, float* null, align 4
+	%36 = fsub float %11, %30		; <float> [#uses=1]
+	store float %36, float* %scevgep50, align 4
+	%37 = fadd float %12, %31		; <float> [#uses=1]
+	store float %37, float* %scevgep42, align 4
+	%38 = fsub float %12, %31		; <float> [#uses=1]
+	store float %38, float* %scevgep48, align 4
+	%39 = fadd float undef, %32		; <float> [#uses=1]
+	store float %39, float* %scevgep46, align 4
+	store float undef, float* %scevgep44, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb6, label %bb
+
+bb6:		; preds = %bb
+	%.sum10 = add i32 %output_col, 1		; <i32> [#uses=1]
+	%.sum8 = add i32 %output_col, 6		; <i32> [#uses=1]
+	%.sum6 = add i32 %output_col, 2		; <i32> [#uses=1]
+	%.sum = add i32 %output_col, 3		; <i32> [#uses=1]
+	br label %bb8
+
+bb8:		; preds = %bb8, %bb6
+	%ctr.116 = phi i32 [ 0, %bb6 ], [ %88, %bb8 ]		; <i32> [#uses=3]
+	%scevgep = getelementptr i8** %output_buf, i32 %ctr.116		; <i8**> [#uses=1]
+	%tmp = shl i32 %ctr.116, 3		; <i32> [#uses=5]
+	%tmp2392 = or i32 %tmp, 4		; <i32> [#uses=1]
+	%scevgep24 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2392		; <float*> [#uses=1]
+	%tmp2591 = or i32 %tmp, 2		; <i32> [#uses=1]
+	%scevgep26 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2591		; <float*> [#uses=1]
+	%tmp2790 = or i32 %tmp, 6		; <i32> [#uses=1]
+	%scevgep28 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2790		; <float*> [#uses=1]
+	%tmp3586 = or i32 %tmp, 7		; <i32> [#uses=0]
+	%wsptr.215 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp		; <float*> [#uses=1]
+	%40 = load i8** %scevgep, align 4		; <i8*> [#uses=4]
+	%41 = load float* %wsptr.215, align 4		; <float> [#uses=1]
+	%42 = load float* %scevgep24, align 4		; <float> [#uses=1]
+	%43 = fadd float %41, %42		; <float> [#uses=1]
+	%44 = load float* %scevgep26, align 4		; <float> [#uses=1]
+	%45 = load float* %scevgep28, align 4		; <float> [#uses=1]
+	%46 = fadd float %44, %45		; <float> [#uses=1]
+	%47 = fsub float %43, %46		; <float> [#uses=2]
+	%48 = fsub float undef, 0.000000e+00		; <float> [#uses=1]
+	%49 = fadd float 0.000000e+00, undef		; <float> [#uses=1]
+	%50 = fptosi float %49 to i32		; <i32> [#uses=1]
+	%51 = add i32 %50, 4		; <i32> [#uses=1]
+	%52 = lshr i32 %51, 3		; <i32> [#uses=1]
+	%53 = and i32 %52, 1023		; <i32> [#uses=1]
+	%.sum14 = add i32 %53, 128		; <i32> [#uses=1]
+	%54 = getelementptr i8* %0, i32 %.sum14		; <i8*> [#uses=1]
+	%55 = load i8* %54, align 1		; <i8> [#uses=1]
+	store i8 %55, i8* null, align 1
+	%56 = getelementptr i8* %40, i32 %.sum10		; <i8*> [#uses=1]
+	store i8 0, i8* %56, align 1
+	%57 = load i8* null, align 1		; <i8> [#uses=1]
+	%58 = getelementptr i8* %40, i32 %.sum8		; <i8*> [#uses=1]
+	store i8 %57, i8* %58, align 1
+	%59 = fadd float undef, %48		; <float> [#uses=1]
+	%60 = fptosi float %59 to i32		; <i32> [#uses=1]
+	%61 = add i32 %60, 4		; <i32> [#uses=1]
+	%62 = lshr i32 %61, 3		; <i32> [#uses=1]
+	%63 = and i32 %62, 1023		; <i32> [#uses=1]
+	%.sum7 = add i32 %63, 128		; <i32> [#uses=1]
+	%64 = getelementptr i8* %0, i32 %.sum7		; <i8*> [#uses=1]
+	%65 = load i8* %64, align 1		; <i8> [#uses=1]
+	%66 = getelementptr i8* %40, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %65, i8* %66, align 1
+	%67 = fptosi float undef to i32		; <i32> [#uses=1]
+	%68 = add i32 %67, 4		; <i32> [#uses=1]
+	%69 = lshr i32 %68, 3		; <i32> [#uses=1]
+	%70 = and i32 %69, 1023		; <i32> [#uses=1]
+	%.sum5 = add i32 %70, 128		; <i32> [#uses=1]
+	%71 = getelementptr i8* %0, i32 %.sum5		; <i8*> [#uses=1]
+	%72 = load i8* %71, align 1		; <i8> [#uses=1]
+	store i8 %72, i8* undef, align 1
+	%73 = fadd float %47, undef		; <float> [#uses=1]
+	%74 = fptosi float %73 to i32		; <i32> [#uses=1]
+	%75 = add i32 %74, 4		; <i32> [#uses=1]
+	%76 = lshr i32 %75, 3		; <i32> [#uses=1]
+	%77 = and i32 %76, 1023		; <i32> [#uses=1]
+	%.sum3 = add i32 %77, 128		; <i32> [#uses=1]
+	%78 = getelementptr i8* %0, i32 %.sum3		; <i8*> [#uses=1]
+	%79 = load i8* %78, align 1		; <i8> [#uses=1]
+	store i8 %79, i8* undef, align 1
+	%80 = fsub float %47, undef		; <float> [#uses=1]
+	%81 = fptosi float %80 to i32		; <i32> [#uses=1]
+	%82 = add i32 %81, 4		; <i32> [#uses=1]
+	%83 = lshr i32 %82, 3		; <i32> [#uses=1]
+	%84 = and i32 %83, 1023		; <i32> [#uses=1]
+	%.sum1 = add i32 %84, 128		; <i32> [#uses=1]
+	%85 = getelementptr i8* %0, i32 %.sum1		; <i8*> [#uses=1]
+	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	%87 = getelementptr i8* %40, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = add i32 %ctr.116, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %88, 8		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb8
+
+return:		; preds = %bb8
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
new file mode 100644
index 000000000000..a8e86d55e786
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+@csize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@vsize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@cll = external global [20 x [10 x i8]]		; <[20 x [10 x i8]]*> [#uses=1]
+@lefline = external global [100 x [20 x i32]]		; <[100 x [20 x i32]]*> [#uses=1]
+@sep = external global [20 x i32]		; <[20 x i32]*> [#uses=1]
+
+define arm_apcscc void @main(i32 %argc, i8** %argv) noreturn nounwind {
+; CHECK: main:
+; CHECK: ldrb
+entry:
+	%nb.i.i.i = alloca [25 x i8], align 1		; <[25 x i8]*> [#uses=0]
+	%line.i.i.i = alloca [200 x i8], align 1		; <[200 x i8]*> [#uses=1]
+	%line.i = alloca [1024 x i8], align 1		; <[1024 x i8]*> [#uses=0]
+	br i1 undef, label %bb.i.i, label %bb4.preheader.i
+
+bb.i.i:		; preds = %entry
+	unreachable
+
+bb4.preheader.i:		; preds = %entry
+	br i1 undef, label %tbl.exit, label %bb.i.preheader
+
+bb.i.preheader:		; preds = %bb4.preheader.i
+	%line3.i.i.i = getelementptr [200 x i8]* %line.i.i.i, i32 0, i32 0		; <i8*> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.backedge.i, %bb.i.preheader
+	br i1 undef, label %bb3.i, label %bb4.backedge.i
+
+bb3.i:		; preds = %bb.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb.i183.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb2.i184.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
+
+bb35.preheader.i.i.i:		; preds = %bb2.i184.i.i
+	%0 = load i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
+	%1 = icmp eq i8 %0, 59		; <i1> [#uses=1]
+	br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
+
+bb5.i185.i.i:		; preds = %bb2.i184.i.i
+	br label %bb.i171.i.i
+
+bb9.i186.i.i:		; preds = %bb35.preheader.i.i.i
+	unreachable
+
+bb36.i.i.i:		; preds = %bb35.preheader.i.i.i
+	br label %bb.i171.i.i
+
+bb.i171.i.i:		; preds = %bb3.i176.i.i, %bb36.i.i.i, %bb5.i185.i.i
+	%2 = phi i32 [ %4, %bb3.i176.i.i ], [ 0, %bb36.i.i.i ], [ 0, %bb5.i185.i.i ]		; <i32> [#uses=6]
+	%scevgep16.i.i.i = getelementptr [20 x i32]* @sep, i32 0, i32 %2		; <i32*> [#uses=1]
+	%scevgep18.i.i.i = getelementptr [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i32 -1, i32* %scevgep16.i.i.i, align 4
+	br label %bb1.i175.i.i
+
+bb1.i175.i.i:		; preds = %bb1.i175.i.i, %bb.i171.i.i
+	%i.03.i172.i.i = phi i32 [ 0, %bb.i171.i.i ], [ %3, %bb1.i175.i.i ]		; <i32> [#uses=4]
+	%scevgep11.i.i.i = getelementptr [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2		; <i32*> [#uses=1]
+	%scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=1]
+	%scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i8 0, i8* %scevgep12.i.i.i, align 1
+	store i32 0, i32* %scevgep11.i.i.i, align 4
+	store i32 108, i32* undef, align 4
+	%3 = add i32 %i.03.i172.i.i, 1		; <i32> [#uses=2]
+	%exitcond.i174.i.i = icmp eq i32 %3, 100		; <i1> [#uses=1]
+	br i1 %exitcond.i174.i.i, label %bb3.i176.i.i, label %bb1.i175.i.i
+
+bb3.i176.i.i:		; preds = %bb1.i175.i.i
+	%4 = add i32 %2, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb5.i177.i.i, label %bb.i171.i.i
+
+bb5.i177.i.i:		; preds = %bb3.i176.i.i
+	unreachable
+
+bb4.backedge.i:		; preds = %bb.i
+	br i1 undef, label %tbl.exit, label %bb.i
+
+tbl.exit:		; preds = %bb4.backedge.i, %bb4.preheader.i
+	unreachable
+}
diff --git a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
new file mode 100644
index 000000000000..6cbfd0d8d4dc
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i32* }
+	%"struct.std::_Vector_base<char,std::allocator<char> >" = type { %"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" }
+	%"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" = type { i8*, i8*, i8* }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" = type { i16*, i16*, i16* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" }
+	%"struct.xalanc_1_8::FormatterListener" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32 }
+	%"struct.xalanc_1_8::FormatterToXML" = type { %"struct.xalanc_1_8::FormatterListener", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, %"struct.xalanc_1_8::XalanOutputStream"*, i16, [256 x i16], [256 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", i32, i32, %"struct.std::vector<bool,std::allocator<bool> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32, %"struct.std::CharVectorType", %"struct.std::vector<bool,std::allocator<bool> >", %0, %1, %2, %3, %0, %1, %2, %3, %4, i16*, i32 }
+	%"struct.xalanc_1_8::XalanDOMString" = type { %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32 }
+	%"struct.xalanc_1_8::XalanOutputStream" = type { i32 (...)**, i32, %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32, %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, %"struct.std::CharVectorType" }
+
+declare arm_apcscc void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"*)
+
+define arm_apcscc void @_ZN10xalanc_1_814FormatterToXML5cdataEPKtj(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length) {
+entry:
+	%0 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13		; <i8*> [#uses=1]
+	br i1 undef, label %bb4, label %bb
+
+bb:		; preds = %entry
+	store i8 0, i8* %0, align 1
+	%1 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%2 = load i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
+	%3 = getelementptr i32 (...)** %2, i32 11		; <i32 (...)**> [#uses=1]
+	%4 = load i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
+	%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*		; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
+	tail call arm_apcscc  void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
+	ret void
+
+bb4:		; preds = %entry
+	tail call arm_apcscc  void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"* %this)
+	tail call arm_apcscc  void undef(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 0, i32 %length, i8 zeroext undef)
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
new file mode 100644
index 000000000000..ebe9d469f229
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi
+; PR4681
+
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str2 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+
+define arm_aapcscc i32 @__mf_heuristic_check(i32 %ptr, i32 %ptr_high) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb9, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = call i8* @llvm.frameaddress(i32 0)		; <i8*> [#uses=1]
+	%1 = call arm_aapcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb9:		; preds = %bb1
+	ret i32 undef
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+declare arm_aapcscc i32 @fprintf(%struct.FILE* noalias nocapture, i8* noalias nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
new file mode 100644
index 000000000000..319d29b790e8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=2]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare arm_apcscc %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare arm_apcscc void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare arm_apcscc i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define arm_apcscc void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	br label %bb5
+
+bb5:		; preds = %bb5, %entry
+	%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ]		; <%struct.rec*> [#uses=1]
+	%y.0.in = getelementptr %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	%y.0 = load %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
+	br i1 undef, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%0 = call arm_apcscc  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext 0, %struct.rec** undef, %struct.FILE_POS* null, i32* undef) nounwind		; <%struct.FILE*> [#uses=1]
+	br i1 false, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb6
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb6
+	br i1 undef, label %bb.i1, label %FontSize.exit
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %FontSize.exit
+
+FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
+	%1 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = icmp ult i32 0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%3 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
+	%4 = call arm_apcscc  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
+	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%7 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%8 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%9 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = sub i32 0, %10		; <i32> [#uses=1]
+	%12 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%13 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb79.critedge, %bb1.i3, %FontName.exit
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %FontName.exit ], [ null, %bb79.critedge ], [ null, %bb1.i3 ]		; <%struct.rec*> [#uses=1]
+	%14 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	br i1 undef, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	br i1 undef, label %bb3.i77, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	br i1 false, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88:		; preds = %bb3.i85
+	br i1 undef, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i.i68, %StringBeginsWith.exit88, %bb2.i84
+	br i1 false, label %bb1.i58, label %bb2.i.i68
+
+bb1.i58:		; preds = %bb3.i77
+	unreachable
+
+bb.i47:		; preds = %bb3.i52
+	br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
+
+bb2.i51:		; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
+	%15 = load i8* undef, align 1		; <i8> [#uses=0]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i.i15.critedge:		; preds = %bb.i47
+	%16 = call arm_apcscc  i8* @fgets(i8* undef, i32 512, %struct.FILE* %0) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb2.i8:		; preds = %bb100.outer
+	br i1 undef, label %bb1.i3, label %bb79.critedge
+
+bb1.i3:		; preds = %bb2.i8
+	br label %bb100.outer.outer
+
+bb79.critedge:		; preds = %bb2.i8
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	br label %bb100.outer.outer
+
+bb100.outer:		; preds = %bb2.i.i15.critedge, %bb100.outer.outer
+	%state.0.ph = phi i32 [ 0, %bb100.outer.outer ], [ %iftmp.560.0, %bb2.i.i15.critedge ]		; <i32> [#uses=1]
+	%cond = icmp eq i32 %state.0.ph, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb2.i8, label %bb2.i84
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
new file mode 100644
index 000000000000..a62b61290a5a
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -0,0 +1,508 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim -O3
+
+	type { i16, i8, i8 }		; type %0
+	type { [2 x i32], [2 x i32] }		; type %1
+	type { %struct.GAP }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	type { i8, i8, i8, i8 }		; type %5
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %0 }
+	%struct.STYLE = type { %2, %2, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %1 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@.str19294 = external constant [9 x i8], align 1		; <[9 x i8]*> [#uses=1]
+@zz_lengths = external global [150 x i8]		; <[150 x i8]*> [#uses=1]
+@next_free.4772 = external global i8**		; <i8***> [#uses=3]
+@top_free.4773 = external global i8**		; <i8***> [#uses=2]
+@.str1575 = external constant [32 x i8], align 1		; <[32 x i8]*> [#uses=1]
+@zz_free = external global [524 x %struct.rec*]		; <[524 x %struct.rec*]*> [#uses=2]
+@zz_hold = external global %struct.rec*		; <%struct.rec**> [#uses=5]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@zz_res = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@xx_link = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@font_count = external global i32		; <i32*> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str101874 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+@.str111875 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str141878 = external constant [27 x i8], align 1		; <[27 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=3]
+@.str192782 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@currentfont = external global i32		; <i32*> [#uses=3]
+@wordcount = external global i32		; <i32*> [#uses=1]
+@needs = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str742838 = external constant [6 x i8], align 1		; <[6 x i8]*> [#uses=1]
+@.str752839 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str1802944 = external constant [40 x i8], align 1		; <[40 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+@.str1852949 = external constant [23 x i8], align 1		; <[23 x i8]*> [#uses=1]
+@.str1872951 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1932957 = external constant [26 x i8], align 1		; <[26 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare arm_apcscc i32 @remove(i8* nocapture) nounwind
+
+declare arm_apcscc %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare arm_apcscc %struct.rec* @MakeWord(i32, i8* nocapture, %struct.FILE_POS*) nounwind
+
+declare arm_apcscc void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fputs"(i8*, %struct.FILE*)
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare arm_apcscc i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define arm_apcscc void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	%buff = alloca [512 x i8], align 4		; <[512 x i8]*> [#uses=5]
+	%0 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=2]
+	%1 = load i8* %0, align 4		; <i8> [#uses=1]
+	%2 = add i8 %1, -94		; <i8> [#uses=1]
+	%3 = icmp ugt i8 %2, 1		; <i1> [#uses=1]
+	br i1 %3, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%4 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%5 = bitcast %struct.SECOND_UNION* %4 to %5*		; <%5*> [#uses=1]
+	%6 = getelementptr %5* %5, i32 0, i32 1		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = icmp eq i8 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%9 = load %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3
+	%y.0 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	br i1 false, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%10 = load i8* %0, align 4		; <i8> [#uses=1]
+	%11 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=1]
+	%12 = call arm_apcscc  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind		; <%struct.FILE*> [#uses=4]
+	br i1 false, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	unreachable
+
+bb8:		; preds = %bb6
+	%13 = and i32 undef, 4095		; <i32> [#uses=2]
+	%14 = load i32* @currentfont, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%15 = icmp ult i32 0, %13		; <i1> [#uses=1]
+	br i1 %15, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb9
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+	%.pre186 = load i32* @currentfont, align 4		; <i32> [#uses=1]
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb9
+	%16 = phi i32 [ %.pre186, %bb.i ], [ %13, %bb9 ]		; <i32> [#uses=1]
+	br i1 false, label %bb.i1, label %bb1.i
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %bb1.i
+
+bb1.i:		; preds = %bb.i1, %FontHalfXHeight.exit
+	br i1 undef, label %bb2.i, label %FontSize.exit
+
+bb2.i:		; preds = %bb1.i
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+	unreachable
+
+FontSize.exit:		; preds = %bb1.i
+	%17 = getelementptr %struct.FONT_INFO* undef, i32 %16, i32 5		; <%struct.rec**> [#uses=0]
+	%18 = load i32* undef, align 4		; <i32> [#uses=1]
+	%19 = load i32* @currentfont, align 4		; <i32> [#uses=2]
+	%20 = load i32* @font_count, align 4		; <i32> [#uses=1]
+	%21 = icmp ult i32 %20, %19		; <i1> [#uses=1]
+	br i1 %21, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ]		; <%struct.FONT_INFO*> [#uses=1]
+	%23 = getelementptr %struct.FONT_INFO* %22, i32 %19, i32 5		; <%struct.rec**> [#uses=0]
+	%24 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
+	br label %bb10
+
+bb10:		; preds = %FontName.exit, %bb8
+	%25 = call arm_apcscc  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%27 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%28 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%29 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
+	%30 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = sub i32 0, %31		; <i32> [#uses=1]
+	%33 = load i32* undef, align 4		; <i32> [#uses=1]
+	%34 = sub i32 0, %33		; <i32> [#uses=1]
+	%35 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%36 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%37 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%38 = getelementptr %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
+	%39 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
+	%buff14 = getelementptr [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
+	%40 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.506.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	%41 = getelementptr [512 x i8]* %buff, i32 0, i32 26		; <i8*> [#uses=1]
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb83, %bb10
+	%state.0.ph.ph = phi i32 [ %iftmp.506.0, %bb10 ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %bb10 ], [ %71, %bb83 ]		; <%struct.rec*> [#uses=1]
+	%42 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	%43 = icmp eq i8 %44, %46		; <i1> [#uses=1]
+	%indvar.next.i79 = add i32 %indvar.i81, 1		; <i32> [#uses=1]
+	br i1 %43, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ]		; <i32> [#uses=3]
+	%pp.0.i82 = getelementptr [27 x i8]* @.str141878, i32 0, i32 %indvar.i81		; <i8*> [#uses=2]
+	%sp.0.i83 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i81		; <i8*> [#uses=1]
+	%44 = load i8* %sp.0.i83, align 1		; <i8> [#uses=2]
+	%45 = icmp eq i8 %44, 0		; <i1> [#uses=1]
+	br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	%46 = load i8* %pp.0.i82, align 1		; <i8> [#uses=3]
+	%47 = icmp eq i8 %46, 0		; <i1> [#uses=1]
+	br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88thread-split:		; preds = %bb2.i84
+	%.pr = load i8* %pp.0.i82		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit88
+
+StringBeginsWith.exit88:		; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
+	%48 = phi i8 [ %.pr, %StringBeginsWith.exit88thread-split ], [ %46, %bb3.i85 ]		; <i8> [#uses=1]
+	%phitmp91 = icmp eq i8 %48, 0		; <i1> [#uses=1]
+	br i1 %phitmp91, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br i1 false, label %bb2.i51, label %bb2.i75
+
+bb2.i75:		; preds = %bb2.i.i68
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i75, %StringBeginsWith.exit88
+	%sp.0.i76 = getelementptr [512 x i8]* %buff, i32 0, i32 undef		; <i8*> [#uses=1]
+	%49 = load i8* %sp.0.i76, align 1		; <i8> [#uses=1]
+	%50 = icmp eq i8 %49, 0		; <i1> [#uses=1]
+	br i1 %50, label %bb24, label %bb2.i.i68
+
+bb24:		; preds = %bb3.i77
+	%51 = call arm_apcscc  %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=0]
+	%52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=2]
+	%54 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %53		; <%struct.rec**> [#uses=2]
+	%55 = load %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
+	%56 = icmp eq %struct.rec* %55, null		; <i1> [#uses=1]
+	br i1 %56, label %bb27, label %bb28
+
+bb27:		; preds = %bb24
+	br i1 undef, label %bb.i56, label %GetMemory.exit62
+
+bb.i56:		; preds = %bb27
+	br i1 undef, label %bb1.i58, label %bb2.i60
+
+bb1.i58:		; preds = %bb.i56
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i60
+
+bb2.i60:		; preds = %bb1.i58, %bb.i56
+	%.pre1.i59 = phi i8** [ undef, %bb1.i58 ], [ undef, %bb.i56 ]		; <i8**> [#uses=1]
+	store i8** undef, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit62
+
+GetMemory.exit62:		; preds = %bb2.i60, %bb27
+	%57 = phi i8** [ %.pre1.i59, %bb2.i60 ], [ undef, %bb27 ]		; <i8**> [#uses=1]
+	%58 = getelementptr i8** %57, i32 %53		; <i8**> [#uses=1]
+	store i8** %58, i8*** @next_free.4772, align 4
+	store %struct.rec* undef, %struct.rec** @zz_hold, align 4
+	br label %bb29
+
+bb28:		; preds = %bb24
+	store %struct.rec* %55, %struct.rec** @zz_hold, align 4
+	%59 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %59, %struct.rec** %54, align 4
+	br label %bb29
+
+bb29:		; preds = %bb28, %GetMemory.exit62
+	%.pre184 = phi %struct.rec* [ %55, %bb28 ], [ undef, %GetMemory.exit62 ]		; <%struct.rec*> [#uses=3]
+	store i8 0, i8* undef
+	store %struct.rec* %.pre184, %struct.rec** @xx_link, align 4
+	br i1 undef, label %bb35, label %bb31
+
+bb31:		; preds = %bb29
+	store %struct.rec* %.pre184, %struct.rec** undef
+	br label %bb35
+
+bb35:		; preds = %bb31, %bb29
+	br i1 undef, label %bb41, label %bb37
+
+bb37:		; preds = %bb35
+	%60 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %60, %struct.rec** undef
+	store %struct.rec* undef, %struct.rec** null
+	store %struct.rec* %.pre184, %struct.rec** null, align 4
+	br label %bb41
+
+bb41:		; preds = %bb37, %bb35
+	%61 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=1]
+	%62 = icmp eq i8* %61, null		; <i1> [#uses=1]
+	%iftmp.554.0 = select i1 %62, i32 2, i32 1		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i47:		; preds = %bb3.i52
+	%63 = icmp eq i8 %64, %65		; <i1> [#uses=1]
+	br i1 %63, label %bb2.i51, label %bb2.i41
+
+bb2.i51:		; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
+	%pp.0.i49 = getelementptr [17 x i8]* @.str1872951, i32 0, i32 0		; <i8*> [#uses=1]
+	%64 = load i8* null, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	%65 = load i8* %pp.0.i49, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br i1 false, label %bb49, label %bb2.i41
+
+bb49:		; preds = %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %bb49, %StringBeginsWith.exit55, %bb.i47
+	br i1 false, label %bb2.i41, label %bb2.i.i15
+
+bb2.i.i15:		; preds = %bb2.i41
+	%pp.0.i.i13 = getelementptr [6 x i8]* @.str742838, i32 0, i32 0		; <i8*> [#uses=1]
+	br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
+
+bb3.i.i16:		; preds = %bb2.i.i15
+	%66 = load i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exitthread-split.i18:		; preds = %bb2.i.i15
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exit.i20:		; preds = %StringBeginsWith.exitthread-split.i18, %bb3.i.i16
+	%67 = phi i8 [ undef, %StringBeginsWith.exitthread-split.i18 ], [ %66, %bb3.i.i16 ]		; <i8> [#uses=1]
+	%phitmp.i19 = icmp eq i8 %67, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i19, label %bb58, label %bb2.i6.i26
+
+bb2.i6.i26:		; preds = %bb2.i6.i26, %StringBeginsWith.exit.i20
+	%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ]		; <i32> [#uses=3]
+	%sp.0.i5.i25 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=0]
+	%pp.0.i4.i24 = getelementptr [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=1]
+	%68 = load i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
+	%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb2.i6.i26, label %bb55
+
+bb55:		; preds = %bb2.i6.i26
+	%69 = call arm_apcscc  i32 @"\01_fputs"(i8* %buff14, %struct.FILE* undef) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb58:		; preds = %StringBeginsWith.exit.i20
+	%70 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i7:		; preds = %bb3.i
+	br i1 false, label %bb2.i8, label %bb2.i.i
+
+bb2.i8:		; preds = %bb100.outer, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split, label %bb3.i
+
+bb3.i:		; preds = %bb2.i8
+	br i1 undef, label %StringBeginsWith.exit, label %bb.i7
+
+StringBeginsWith.exitthread-split:		; preds = %bb2.i8
+	br label %StringBeginsWith.exit
+
+StringBeginsWith.exit:		; preds = %StringBeginsWith.exitthread-split, %bb3.i
+	%phitmp93 = icmp eq i8 undef, 0		; <i1> [#uses=1]
+	br i1 %phitmp93, label %bb66, label %bb2.i.i
+
+bb66:		; preds = %StringBeginsWith.exit
+	%71 = call arm_apcscc  %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=4]
+	%72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%73 = zext i8 %72 to i32		; <i32> [#uses=2]
+	%74 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %73		; <%struct.rec**> [#uses=2]
+	%75 = load %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
+	%76 = icmp eq %struct.rec* %75, null		; <i1> [#uses=1]
+	br i1 %76, label %bb69, label %bb70
+
+bb69:		; preds = %bb66
+	br i1 undef, label %bb.i2, label %GetMemory.exit
+
+bb.i2:		; preds = %bb69
+	%77 = call arm_apcscc  noalias i8* @calloc(i32 1020, i32 4) nounwind		; <i8*> [#uses=1]
+	%78 = bitcast i8* %77 to i8**		; <i8**> [#uses=3]
+	store i8** %78, i8*** @next_free.4772, align 4
+	br i1 undef, label %bb1.i3, label %bb2.i4
+
+bb1.i3:		; preds = %bb.i2
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i4
+
+bb2.i4:		; preds = %bb1.i3, %bb.i2
+	%.pre1.i = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%79 = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%80 = getelementptr i8** %79, i32 1020		; <i8**> [#uses=1]
+	store i8** %80, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit
+
+GetMemory.exit:		; preds = %bb2.i4, %bb69
+	%81 = phi i8** [ %.pre1.i, %bb2.i4 ], [ undef, %bb69 ]		; <i8**> [#uses=2]
+	%82 = bitcast i8** %81 to %struct.rec*		; <%struct.rec*> [#uses=3]
+	%83 = getelementptr i8** %81, i32 %73		; <i8**> [#uses=1]
+	store i8** %83, i8*** @next_free.4772, align 4
+	store %struct.rec* %82, %struct.rec** @zz_hold, align 4
+	br label %bb71
+
+bb70:		; preds = %bb66
+	%84 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %84, %struct.rec** %74, align 4
+	br label %bb71
+
+bb71:		; preds = %bb70, %GetMemory.exit
+	%.pre185 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=8]
+	%85 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=1]
+	%86 = getelementptr %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=0]
+	%87 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=0]
+	%88 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	%89 = load %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
+	store %struct.rec* %89, %struct.rec** @zz_hold, align 4
+	br i1 false, label %bb77, label %bb73
+
+bb73:		; preds = %bb71
+	%90 = getelementptr %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	store %struct.rec* %.pre185, %struct.rec** %90
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb77
+
+bb77:		; preds = %bb73, %bb71
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	store %struct.rec* %71, %struct.rec** @zz_hold, align 4
+	br i1 undef, label %bb83, label %bb79
+
+bb79:		; preds = %bb77
+	%91 = getelementptr %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	%92 = load %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %92, %struct.rec** %91
+	%93 = getelementptr %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %71, %struct.rec** %93, align 4
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb83
+
+bb83:		; preds = %bb79, %bb77
+	br label %bb100.outer.outer
+
+bb.i.i:		; preds = %bb3.i.i
+	br i1 undef, label %bb2.i.i, label %bb2.i6.i
+
+bb2.i.i:		; preds = %bb.i.i, %StringBeginsWith.exit, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split.i, label %bb3.i.i
+
+bb3.i.i:		; preds = %bb2.i.i
+	br i1 undef, label %StringBeginsWith.exit.i, label %bb.i.i
+
+StringBeginsWith.exitthread-split.i:		; preds = %bb2.i.i
+	br label %StringBeginsWith.exit.i
+
+StringBeginsWith.exit.i:		; preds = %StringBeginsWith.exitthread-split.i, %bb3.i.i
+	br i1 false, label %bb94, label %bb2.i6.i
+
+bb.i2.i:		; preds = %bb3.i7.i
+	br i1 false, label %bb2.i6.i, label %bb91
+
+bb2.i6.i:		; preds = %bb.i2.i, %StringBeginsWith.exit.i, %bb.i.i
+	br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
+
+bb3.i7.i:		; preds = %bb2.i6.i
+	%94 = load i8* undef, align 1		; <i8> [#uses=1]
+	br i1 undef, label %strip_out.exit, label %bb.i2.i
+
+strip_out.exitthread-split:		; preds = %bb2.i6.i
+	%.pr100 = load i8* undef		; <i8> [#uses=1]
+	br label %strip_out.exit
+
+strip_out.exit:		; preds = %strip_out.exitthread-split, %bb3.i7.i
+	%95 = phi i8 [ %.pr100, %strip_out.exitthread-split ], [ %94, %bb3.i7.i ]		; <i8> [#uses=0]
+	br i1 undef, label %bb94, label %bb91
+
+bb91:		; preds = %strip_out.exit, %bb.i2.i
+	unreachable
+
+bb94:		; preds = %strip_out.exit, %StringBeginsWith.exit.i
+	%96 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb100.outer:		; preds = %bb58, %bb41, %bb100.outer.outer
+	%state.0.ph = phi i32 [ %state.0.ph.ph, %bb100.outer.outer ], [ %iftmp.560.0, %bb58 ], [ %iftmp.554.0, %bb41 ]		; <i32> [#uses=1]
+	switch i32 %state.0.ph, label %bb2.i84 [
+		i32 2, label %bb101.split
+		i32 1, label %bb2.i8
+	]
+
+bb101.split:		; preds = %bb100.outer
+	%97 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %97, label %bb103, label %bb102
+
+bb102:		; preds = %bb101.split
+	%98 = call arm_apcscc  i32 @remove(i8* getelementptr ([9 x i8]* @.str19294, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb103:		; preds = %bb101.split
+	%99 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%100 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @wordcount, align 4
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
new file mode 100644
index 000000000000..3cbb212b628b
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp 
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | grep fcpys | count 1
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Results = type { float, float, float }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc void @get_results(%struct.Results* noalias nocapture sret %agg.result, %struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb6.preheader
+
+bb6.preheader:		; preds = %entry
+	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+	br i1 undef, label %bb15, label %bb13
+
+bb:		; preds = %entry
+	ret void
+
+bb13:		; preds = %bb13, %bb6.preheader
+	%0 = fadd float undef, undef		; <float> [#uses=1]
+	%1 = fadd float undef, 1.000000e+00		; <float> [#uses=1]
+	br i1 undef, label %bb15, label %bb13
+
+bb15:		; preds = %bb13, %bb6.preheader
+	%r1.0.0.lcssa = phi float [ 0.000000e+00, %bb6.preheader ], [ %1, %bb13 ]		; <float> [#uses=1]
+	%r1.1.0.lcssa = phi float [ undef, %bb6.preheader ], [ %0, %bb13 ]		; <float> [#uses=0]
+	store float %r1.0.0.lcssa, float* undef, align 4
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
new file mode 100644
index 000000000000..acf562c74a2a
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = fmul float undef, 0x41E0000000000000		; <float> [#uses=1]
+	%1 = fptosi float %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	br i1 undef, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
new file mode 100644
index 000000000000..3ada02676bfc
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 123459876		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 127773		; <i32> [#uses=1]
+	%3 = mul i32 %2, 2836		; <i32> [#uses=1]
+	%4 = sub i32 0, %3		; <i32> [#uses=1]
+	%5 = xor i32 %4, 123459876		; <i32> [#uses=1]
+	%idum_addr.0.i.i = select i1 undef, i32 undef, i32 %5		; <i32> [#uses=1]
+	%6 = sitofp i32 %idum_addr.0.i.i to double		; <double> [#uses=1]
+	%7 = fmul double %6, 0x3E00000000200000		; <double> [#uses=1]
+	%8 = fptrunc double %7 to float		; <float> [#uses=2]
+	%9 = fmul float %8, 0x41E0000000000000		; <float> [#uses=1]
+	%10 = fptosi float %9 to i32		; <i32> [#uses=1]
+	store i32 %10, i32* undef, align 4
+	%11 = fpext float %8 to double		; <double> [#uses=1]
+	%12 = fcmp ogt double %11, 6.660000e-01		; <i1> [#uses=1]
+	br i1 %12, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
new file mode 100644
index 000000000000..03f9facfa955
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; PR4659
+; PR4682
+
+define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
+entry:
+; CHECK: __gcov_execlp:
+; CHECK: mov sp, r7
+; CHECK: sub sp, #1 * 4
+	call arm_aapcscc  void @__gcov_flush() nounwind
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb5, label %bb
+
+bb5:		; preds = %bb, %entry
+	%0 = alloca i8*, i32 undef, align 4		; <i8**> [#uses=1]
+	%1 = call arm_aapcscc  i32 @execvp(i8* %path, i8** %0) nounwind		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+declare hidden arm_aapcscc void @__gcov_flush()
+
+declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
new file mode 100644
index 000000000000..93f5a0f6c41f
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+	%a = type { i32 (...)** }
+	%b = type { %a }
+	%c = type { float, float, float, float }
+
+declare arm_aapcs_vfpcc float @bar(%c*)
+
+define arm_aapcs_vfpcc void @foo(%b* %x, %c* %y) {
+entry:
+	%0 = call arm_aapcs_vfpcc  float @bar(%c* %y)		; <float> [#uses=0]
+	%1 = fadd float undef, undef		; <float> [#uses=1]
+	store float %1, float* undef, align 8
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
new file mode 100644
index 000000000000..090ed2d81f60
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -arm-use-neon-fp
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%0 = load float* undef, align 4		; <float> [#uses=1]
+	%1 = fmul float undef, %0		; <float> [#uses=2]
+	%tmp73 = add i32 0, 224		; <i32> [#uses=1]
+	%scevgep74 = getelementptr i8* null, i32 %tmp73		; <i8*> [#uses=1]
+	%scevgep7475 = bitcast i8* %scevgep74 to float*		; <float*> [#uses=1]
+	%2 = load float* null, align 4		; <float> [#uses=1]
+	%3 = fmul float 0.000000e+00, %2		; <float> [#uses=2]
+	%4 = fadd float %1, %3		; <float> [#uses=1]
+	%5 = fsub float %1, %3		; <float> [#uses=2]
+	%6 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%7 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%8 = fsub float %7, %6		; <float> [#uses=2]
+	%9 = fsub float %4, %6		; <float> [#uses=1]
+	%10 = fadd float %5, %8		; <float> [#uses=2]
+	%11 = fsub float %5, %8		; <float> [#uses=1]
+	%12 = sitofp i16 undef to float		; <float> [#uses=1]
+	%13 = fmul float %12, 0.000000e+00		; <float> [#uses=2]
+	%14 = sitofp i16 undef to float		; <float> [#uses=1]
+	%15 = load float* %scevgep7475, align 4		; <float> [#uses=1]
+	%16 = fmul float %14, %15		; <float> [#uses=2]
+	%17 = fadd float undef, undef		; <float> [#uses=2]
+	%18 = fadd float %13, %16		; <float> [#uses=2]
+	%19 = fsub float %13, %16		; <float> [#uses=1]
+	%20 = fadd float %18, %17		; <float> [#uses=2]
+	%21 = fsub float %18, %17		; <float> [#uses=1]
+	%22 = fmul float %21, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%23 = fmul float undef, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%24 = fmul float %19, 0x3FF1517A80000000		; <float> [#uses=1]
+	%25 = fsub float %24, %23		; <float> [#uses=1]
+	%26 = fadd float undef, %23		; <float> [#uses=1]
+	%27 = fsub float %26, %20		; <float> [#uses=3]
+	%28 = fsub float %22, %27		; <float> [#uses=2]
+	%29 = fadd float %25, %28		; <float> [#uses=1]
+	%30 = fadd float undef, %20		; <float> [#uses=1]
+	store float %30, float* undef, align 4
+	%31 = fadd float %10, %27		; <float> [#uses=1]
+	store float %31, float* undef, align 4
+	%32 = fsub float %10, %27		; <float> [#uses=1]
+	store float %32, float* undef, align 4
+	%33 = fadd float %11, %28		; <float> [#uses=1]
+	store float %33, float* undef, align 4
+	%34 = fsub float %9, %29		; <float> [#uses=1]
+	store float %34, float* undef, align 4
+	br label %bb
+}
diff --git a/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
new file mode 100644
index 000000000000..a0f99187a4a6
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+@g_d = external global double		; <double*> [#uses=1]
+
+define arm_aapcscc void @foo(float %yIncr) {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	%0 = call arm_aapcs_vfpcc  float @bar()		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	store double %1, double* @g_d, align 8
+	br label %bb4
+
+bb4:		; preds = %bb, %entry
+	unreachable
+}
+
+declare arm_aapcs_vfpcc float @bar()
diff --git a/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
new file mode 100644
index 000000000000..cbe250b6df7a
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+vfp2
+
+define arm_apcscc float @t1(i32 %v0) nounwind {
+entry:
+	store i32 undef, i32* undef, align 4
+	%0 = load [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
+	%1 = load i8* undef, align 1		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = getelementptr [4 x i8]* %0, i32 %v0, i32 0		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = zext i8 %4 to i32		; <i32> [#uses=1]
+	%6 = sub i32 %5, %2		; <i32> [#uses=1]
+	%7 = sitofp i32 %6 to float		; <float> [#uses=1]
+	ret float %7
+}
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
new file mode 100644
index 000000000000..e84e86702493
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+%struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
+
+@getNeighbour = external global void (i32, i32, i32, i32, %struct.pix_pos*)*, align 4 ; <void (i32, i32, i32, i32, %struct.pix_pos*)**> [#uses=2]
+
+define arm_apcscc void @t() nounwind {
+; CHECK: t:
+; CHECK:      ittt eq
+; CHECK-NEXT: addeq
+; CHECK-NEXT: movweq
+; CHECK-NEXT: movteq
+entry:
+  %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
+  br i1 undef, label %land.rhs, label %lor.end
+
+land.rhs:                                         ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %land.rhs, %entry
+  switch i32 0, label %if.end371 [
+    i32 10, label %if.then366
+    i32 14, label %if.then366
+  ]
+
+if.then366:                                       ; preds = %lor.end, %lor.end
+  unreachable
+
+if.end371:                                        ; preds = %lor.end
+  %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
+  %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
+  br i1 undef, label %for.body1857, label %for.end4557
+
+for.body1857:                                     ; preds = %if.end371
+  br i1 undef, label %if.then1867, label %for.cond1933
+
+if.then1867:                                      ; preds = %for.body1857
+  unreachable
+
+for.cond1933:                                     ; preds = %for.body1857
+  br i1 undef, label %for.body1940, label %if.then4493
+
+for.body1940:                                     ; preds = %for.cond1933
+  %shl = shl i32 undef, 2                         ; <i32> [#uses=1]
+  %shl1959 = shl i32 undef, 2                     ; <i32> [#uses=4]
+  br i1 undef, label %if.then1992, label %if.else2003
+
+if.then1992:                                      ; preds = %for.body1940
+  %tmp14.i302 = load i32* undef                   ; <i32> [#uses=4]
+  %add.i307452 = or i32 %shl1959, 1               ; <i32> [#uses=1]
+  %sub.i308 = add i32 %shl, -1                    ; <i32> [#uses=4]
+  call arm_apcscc  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
+  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call arm_apcscc  void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
+  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call arm_apcscc  void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
+  call arm_apcscc  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
+  unreachable
+
+if.else2003:                                      ; preds = %for.body1940
+  switch i32 undef, label %if.then2015 [
+    i32 10, label %if.then4382
+    i32 14, label %if.then4382
+  ]
+
+if.then2015:                                      ; preds = %if.else2003
+  br i1 undef, label %if.else2298, label %if.then2019
+
+if.then2019:                                      ; preds = %if.then2015
+  br i1 undef, label %if.then2065, label %if.else2081
+
+if.then2065:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.else2081:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.end2128:                                       ; preds = %if.else2081, %if.then2065
+  unreachable
+
+if.else2298:                                      ; preds = %if.then2015
+  br i1 undef, label %land.lhs.true2813, label %cond.end2841
+
+land.lhs.true2813:                                ; preds = %if.else2298
+  br i1 undef, label %cond.end2841, label %cond.true2824
+
+cond.true2824:                                    ; preds = %land.lhs.true2813
+  br label %cond.end2841
+
+cond.end2841:                                     ; preds = %cond.true2824, %land.lhs.true2813, %if.else2298
+  br i1 undef, label %for.cond2882.preheader, label %for.cond2940.preheader
+
+for.cond2882.preheader:                           ; preds = %cond.end2841
+  %mul3693 = shl i32 undef, 1                     ; <i32> [#uses=2]
+  br i1 undef, label %if.then3689, label %if.else3728
+
+for.cond2940.preheader:                           ; preds = %cond.end2841
+  br label %for.inc3040
+
+for.inc3040:                                      ; preds = %for.inc3040, %for.cond2940.preheader
+  br label %for.inc3040
+
+if.then3689:                                      ; preds = %for.cond2882.preheader
+  %add3695 = add nsw i32 %mul3693, %shl1959       ; <i32> [#uses=1]
+  %mul3697 = shl i32 %add3695, 2                  ; <i32> [#uses=2]
+  %arrayidx3705 = getelementptr inbounds i16* undef, i32 1 ; <i16*> [#uses=1]
+  %tmp3706 = load i16* %arrayidx3705              ; <i16> [#uses=1]
+  %conv3707 = sext i16 %tmp3706 to i32            ; <i32> [#uses=1]
+  %add3708 = add nsw i32 %conv3707, %mul3697      ; <i32> [#uses=1]
+  %arrayidx3724 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3725 = load i16* %arrayidx3724              ; <i16> [#uses=1]
+  %conv3726 = sext i16 %tmp3725 to i32            ; <i32> [#uses=1]
+  %add3727 = add nsw i32 %conv3726, %mul3697      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.else3728:                                      ; preds = %for.cond2882.preheader
+  %mul3733 = add i32 %shl1959, 1073741816         ; <i32> [#uses=1]
+  %add3735 = add nsw i32 %mul3733, %mul3693       ; <i32> [#uses=1]
+  %mul3737 = shl i32 %add3735, 2                  ; <i32> [#uses=2]
+  %tmp3746 = load i16* undef                      ; <i16> [#uses=1]
+  %conv3747 = sext i16 %tmp3746 to i32            ; <i32> [#uses=1]
+  %add3748 = add nsw i32 %conv3747, %mul3737      ; <i32> [#uses=1]
+  %arrayidx3765 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3766 = load i16* %arrayidx3765              ; <i16> [#uses=1]
+  %conv3767 = sext i16 %tmp3766 to i32            ; <i32> [#uses=1]
+  %add3768 = add nsw i32 %conv3767, %mul3737      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.end3770:                                       ; preds = %if.else3728, %if.then3689
+  %vec2_y.1 = phi i32 [ %add3727, %if.then3689 ], [ %add3768, %if.else3728 ] ; <i32> [#uses=0]
+  %vec1_y.2 = phi i32 [ %add3708, %if.then3689 ], [ %add3748, %if.else3728 ] ; <i32> [#uses=0]
+  unreachable
+
+if.then4382:                                      ; preds = %if.else2003, %if.else2003
+  switch i32 undef, label %if.then4394 [
+    i32 10, label %if.else4400
+    i32 14, label %if.else4400
+  ]
+
+if.then4394:                                      ; preds = %if.then4382
+  unreachable
+
+if.else4400:                                      ; preds = %if.then4382, %if.then4382
+  br label %for.cond4451.preheader
+
+for.cond4451.preheader:                           ; preds = %for.cond4451.preheader, %if.else4400
+  br label %for.cond4451.preheader
+
+if.then4493:                                      ; preds = %for.cond1933
+  unreachable
+
+for.end4557:                                      ; preds = %if.end371
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
index 3450c5aea405..de6f6e260de3 100644
--- a/test/CodeGen/Thumb2/carry.ll
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -1,15 +1,21 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "subs r" | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "adc r"
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "sbc r"  | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
 	%tmp = sub i64 %a, %b
 	ret i64 %tmp
 }
 
 define i64 @f2(i64 %a, i64 %b) {
 entry:
+; CHECK: f2:
+; CHECK: adds r0, r0, r0
+; CHECK: adcs r1, r1
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
         %tmp1 = shl i64 %a, 1
 	%tmp2 = sub i64 %tmp1, %b
 	ret i64 %tmp2
diff --git a/test/CodeGen/Thumb2/frameless.ll b/test/CodeGen/Thumb2/frameless.ll
new file mode 100644
index 000000000000..c3c8cf1dd141
--- /dev/null
+++ b/test/CodeGen/Thumb2/frameless.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep mov
+; RUN: llc < %s -mtriple=thumbv7-linux -disable-fp-elim | not grep mov
+
+define arm_apcscc void @t() nounwind readnone {
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/frameless2.ll b/test/CodeGen/Thumb2/frameless2.ll
new file mode 100644
index 000000000000..7cc7b1914287
--- /dev/null
+++ b/test/CodeGen/Thumb2/frameless2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
+
+%struct.noise3 = type { [3 x [17 x i32]] }
+%struct.noiseguard = type { i32, i32, i32 }
+
+define arm_apcscc void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
+entry:
+  %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %1, i32* undef, align 4
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
new file mode 100644
index 000000000000..865b17b7f1f4
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #64 * 4
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; CHECK: test2:
+; CHECK: sub.w sp, sp, #4160
+; CHECK: sub sp, #2 * 4
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; CHECK: test3:
+; CHECK: sub.w sp, sp, #805306368
+; CHECK: sub sp, #4 * 4
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
index 1b1fe7b1b5fc..4fd4525b0455 100644
--- a/test/CodeGen/Thumb2/load-global.ll
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -1,19 +1,23 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=static | \
-; RUN:   not grep {L_G\$non_lazy_ptr}
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | \
-; RUN:   grep {L_G\$non_lazy_ptr} | count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | \
-; RUN:   grep {ldr.*pc} | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | \
-; RUN:   grep {GOT} | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
 
 @G = external global i32
 
 define i32 @test1() {
+; STATIC: _test1:
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: add r0, pc
+; PIC: .long L_G$non_lazy_ptr-(LPC0+4)
+
+; LINUX: test1
+; LINUX: .long G(GOT)
 	%tmp = load i32* @G
 	ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/mul_const.ll b/test/CodeGen/Thumb2/mul_const.ll
new file mode 100644
index 000000000000..9a2ec93a5adc
--- /dev/null
+++ b/test/CodeGen/Thumb2/mul_const.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; rdar://7069502
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add.w r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
new file mode 100644
index 000000000000..1f8aea912f6f
--- /dev/null
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+
+	%struct.anon = type { void ()* }
+	%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
+@__dso_handle = external global { }		; <{ }*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (void ()*)* @atexit to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define hidden arm_apcscc i32 @atexit(void ()* %func) nounwind {
+entry:
+; CHECK: atexit:
+; CHECK: add r0, pc
+	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
+	%0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
+	store void ()* %func, void ()** %0, align 4
+	%1 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = call arm_apcscc  i32 @atexit_common(%struct.one_atexit_routine* %r, i8* bitcast ({ }* @__dso_handle to i8*)) nounwind		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+declare arm_apcscc i32 @atexit_common(%struct.one_atexit_routine*, i8*) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-adc.ll b/test/CodeGen/Thumb2/thumb2-adc.ll
index c1565b300960..702df91c8595 100644
--- a/test/CodeGen/Thumb2/thumb2-adc.ll
+++ b/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -1,32 +1,48 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adc\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 734439407618 = 0x000000ab00000002
 define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 734439407618
     ret i64 %tmp
 }
 
 ; 5066626890203138 = 0x0012001200000002
 define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 5066626890203138
     ret i64 %tmp
 }
 
 ; 3747052064576897026 = 0x3400340000000002
 define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 3747052064576897026
     ret i64 %tmp
 }
 
 ; 6221254862626095106 = 0x5656565600000002
 define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 6221254862626095106 
     ret i64 %tmp
 }
 
 ; 287104476244869122 = 0x03fc000000000002
 define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 287104476244869122
     ret i64 %tmp
 }
 
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK: f6:
+; CHECK: adds r0, r0, r2
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index d4f408ff76e7..d42ea7138e46 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #255
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #256
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #257
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4094
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4095
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4096
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #255
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #256
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #257
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4094
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4095
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4096
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
 
 define i32 @t2ADDrc_255(i32 %lhs) {
     %Rd = add i32 %lhs, 255;
diff --git a/test/CodeGen/Thumb2/thumb2-add2.ll b/test/CodeGen/Thumb2/thumb2-add2.ll
index be89508c7ef2..e496654706ec 100644
--- a/test/CodeGen/Thumb2/thumb2-add2.ll
+++ b/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
     %tmp = add i32 %a, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, #1179666
     %tmp = add i32 %a, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, #872428544
     %tmp = add i32 %a, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, #1448498774
     %tmp = add i32 %a, 1448498774
     ret i32 %tmp
 }
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, #510
     %tmp = add i32 %a, 510
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll
index 1e6341e882fd..8d472cb110b8 100644
--- a/test/CodeGen/Thumb2/thumb2-add3.ll
+++ b/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = add i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-add4.ll b/test/CodeGen/Thumb2/thumb2-add4.ll
index b74a33c90a10..b94e84daee1b 100644
--- a/test/CodeGen/Thumb2/thumb2-add4.ll
+++ b/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -1,31 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 171
     ret i64 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds.w r0, r0, #1179666
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1179666
     ret i64 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds.w r0, r0, #872428544
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 872428544
     ret i64 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds.w r0, r0, #1448498774
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1448498774
     ret i64 %tmp
 }
 
 ; 66846720 = 0x03fc0000
 define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds.w r0, r0, #66846720
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 66846720
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-add5.ll b/test/CodeGen/Thumb2/thumb2-add5.ll
index 22452143d958..8b3a4f6d12a8 100644
--- a/test/CodeGen/Thumb2/thumb2-add5.ll
+++ b/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: add r0, r1
     %tmp = add i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-add6.ll b/test/CodeGen/Thumb2/thumb2-add6.ll
index 9dd3efcacd58..0ecaa793909f 100644
--- a/test/CodeGen/Thumb2/thumb2-add6.ll
+++ b/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: adds r0, r0, r2
+; CHECK: adcs r1, r3
     %tmp = add i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-and.ll b/test/CodeGen/Thumb2/thumb2-and.ll
index ab191d56843a..8e2245a85926 100644
--- a/test/CodeGen/Thumb2/thumb2-and.ll
+++ b/test/CodeGen/Thumb2/thumb2-and.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: ands r0, r1
     %tmp = and i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: and.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: and.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: and.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: and.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 266d256fce51..1e2666f40368 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-asr.ll b/test/CodeGen/Thumb2/thumb2-asr.ll
index 4edf92be1339..a0a60e68989f 100644
--- a/test/CodeGen/Thumb2/thumb2-asr.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: asrs r0, r1
     %tmp = ashr i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-asr2.ll b/test/CodeGen/Thumb2/thumb2-asr2.ll
index 700794873f3f..9c8634f7097c 100644
--- a/test/CodeGen/Thumb2/thumb2-asr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#17} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: asrs r0, r0, #17
     %tmp = ashr i32 %a, 17
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
new file mode 100644
index 000000000000..e1f9cdbf8c64
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep it
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+; CHECK: t1
+; CHECK: beq
+	%tmp2 = icmp eq i32 %a, 0
+	br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+	%tmp5 = add i32 %b, 1
+        %tmp6 = and i32 %tmp5, %c
+	ret i32 %tmp6
+
+cond_false:
+	%tmp7 = add i32 %b, -1
+        %tmp8 = xor i32 %tmp7, %c
+	ret i32 %tmp8
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
index 1e5016c91294..d33cf7ebdb27 100644
--- a/test/CodeGen/Thumb2/thumb2-bfc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
@@ -17,3 +17,9 @@ define i32 @f3(i32 %a) {
     %tmp = and i32 %a, 4095
     ret i32 %tmp
 }
+
+; 2147483646 = 0x7ffffffe   not implementable w/ BFC
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 2147483646
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bic.ll b/test/CodeGen/Thumb2/thumb2-bic.ll
index f5a3d2038d07..4e35383997d9 100644
--- a/test/CodeGen/Thumb2/thumb2-bic.ll
+++ b/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -1,34 +1,40 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: bic.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 4294967295, %tmp
     %tmp2 = and i32 %a, %tmp1
@@ -36,6 +42,8 @@ define i32 @f5(i32 %a, i32 %b) {
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: bic.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, 4294967295
     %tmp2 = and i32 %tmp1, %a
@@ -43,6 +51,8 @@ define i32 @f6(i32 %a, i32 %b) {
 }
 
 define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: bic.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %tmp, 4294967295
     %tmp2 = and i32 %a, %tmp1
@@ -50,6 +60,8 @@ define i32 @f7(i32 %a, i32 %b) {
 }
 
 define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: bic.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
@@ -57,3 +69,37 @@ define i32 @f8(i32 %a, i32 %b) {
     %tmp2 = and i32 %tmp1, %a
     ret i32 %tmp2
 }
+
+; ~0x000000bb = 4294967108
+define i32 @f9(i32 %a) {
+    %tmp = and i32 %a, 4294967108
+    ret i32 %tmp
+    
+; CHECK: f9:
+; CHECK: bic r0, r0, #187
+}
+
+; ~0x00aa00aa = 4283826005
+define i32 @f10(i32 %a) {
+    %tmp = and i32 %a, 4283826005
+    ret i32 %tmp
+    
+; CHECK: f10:
+; CHECK: bic r0, r0, #11141290
+}
+
+; ~0xcc00cc00 = 872363007
+define i32 @f11(i32 %a) {
+    %tmp = and i32 %a, 872363007
+    ret i32 %tmp
+; CHECK: f11:
+; CHECK: bic r0, r0, #-872363008
+}
+
+; ~0x00110000 = 4293853183
+define i32 @f12(i32 %a) {
+    %tmp = and i32 %a, 4293853183
+    ret i32 %tmp
+; CHECK: f12:
+; CHECK: bic r0, r0, #1114112
+}
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
new file mode 100644
index 000000000000..b46cb5f7c70e
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @f1(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK bne LBB
+        %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f2(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK bge LBB
+        %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f3(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK bhs LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f4(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f4:
+; CHECK blo LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %return, label %cond_true
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-call.ll b/test/CodeGen/Thumb2/thumb2-call.ll
new file mode 100644
index 000000000000..7dc6b2601b20
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-call.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: blx r0
+
+; LINUX: h:
+; LINUX: blx r0
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index e5f94a6c4929..0bed0585b5d1 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index ffe8b980e895..401c56a72139 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i1 @f1(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
index 9763dea045cf..c1fcac00e643 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "cmn "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "cmn\\.w "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
 
 ; -0x000000bb = 4294967109
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 63f20cd98370..d4773bb5809b 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*#\[0-9\]*$} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmp r0, #187
     %tmp = icmp ne i32 %a, 187
     ret i1 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmp.w r0, #11141290
     %tmp = icmp eq i32 %a, 11141290 
     ret i1 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmp.w r0, #-872363008
     %tmp = icmp ne i32 %a, 3422604288
     ret i1 %tmp
 }
 
 ; 0xdddddddd = 3722304989
 define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmp.w r0, #-572662307
     %tmp = icmp ne i32 %a, 3722304989
     ret i1 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i1 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: cmp.w r0, #1114112
     %tmp = icmp eq i32 %a, 1114112
     ret i1 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 368a3b3fed14..55c321dc2b31 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: cmp r0, r1
     %tmp = icmp ne i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: cmp r0, r1
     %tmp = icmp eq i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: cmp.w r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = icmp eq i32 %tmp, %a
     ret i1 %tmp1
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: cmp.w r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = icmp ne i32 %tmp, %a
     ret i1 %tmp1
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: cmp.w r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = icmp eq i32 %a, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: cmp.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-eor.ll b/test/CodeGen/Thumb2/thumb2-eor.ll
index 56bb46a5457f..b7e276673c42 100644
--- a/test/CodeGen/Thumb2/thumb2-eor.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: eors r0, r1
     %tmp = xor i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: eor.w r0, r1, r0
     %tmp = xor i32 %b, %a
     ret i32 %tmp
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: eor.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: eor.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: eor.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: eor.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
index 11784ca02c14..185634cdd6fc 100644
--- a/test/CodeGen/Thumb2/thumb2-eor2.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 5
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
new file mode 100644
index 000000000000..71199abc5728
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) {
+entry:
+; CHECK: t2:
+; CHECK: ite le
+; CHECK: suble
+; CHECK: subgt
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+define void @t3(i32 %a, i32 %b) {
+entry:
+; CHECK: t3:
+; CHECK: it lt
+; CHECK: poplt {r7, pc}
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	tail call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
new file mode 100644
index 000000000000..d917ffe56bbc
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: foo:
+; CHECK: it ne
+; CHECK: cmpne
+; CHECK: it hi
+; CHECK: pophi {r7, pc}
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
+
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK: CountTree:
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: itt eq
+; CHECK: moveq
+; CHECK: popeq
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
+	%struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
+entry:
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: popne {r7, pc}
+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @abort( )
+	unreachable
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+define fastcc void @t2() nounwind {
+entry:
+; CHECK: t2:
+; CHECK: cmp r0, #0
+; CHECK: beq
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
new file mode 100644
index 000000000000..1d45d3ce7fe8
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; There shouldn't be a unconditional branch at end of bb52.
+; rdar://7184787
+
+@posed = external global i64                      ; <i64*> [#uses=1]
+
+define i1 @ab_bb52(i64 %.reload78, i64* %.out, i64* %.out1) nounwind {
+newFuncRoot:
+  br label %bb52
+
+bb52.bb55_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 true
+
+bb52.bb53_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 false
+
+bb52:                                             ; preds = %newFuncRoot
+; CHECK: movne
+; CHECK: moveq
+; CHECK: pop
+; CHECK-NEXT: LBB1_2:
+  %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
+  %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
+  %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
+  %3 = icmp eq i64 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb52.bb55_crit_edge.exitStub, label %bb52.bb53_crit_edge.exitStub
+}
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
new file mode 100644
index 000000000000..7d093ecce201
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep tbb
+
+; Do not use tbb / tbh if any destination is before the jumptable.
+; rdar://7102917
+
+define i16 @main__getopt_internal_2E_exit_2E_ce(i32) nounwind {
+newFuncRoot:
+	br label %_getopt_internal.exit.ce
+
+codeRepl127.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 0
+
+parse_options.exit.loopexit.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 1
+
+bb1.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 2
+
+bb90.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 3
+
+codeRepl104.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 4
+
+codeRepl113.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 5
+
+codeRepl51.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 6
+
+codeRepl70.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 7
+
+codeRepl119.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 8
+
+codeRepl93.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 9
+
+codeRepl101.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 10
+
+codeRepl120.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 11
+
+codeRepl89.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 12
+
+codeRepl45.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 13
+
+codeRepl58.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 14
+
+codeRepl46.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 15
+
+codeRepl50.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 16
+
+codeRepl52.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 17
+
+codeRepl53.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 18
+
+codeRepl61.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 19
+
+codeRepl85.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 20
+
+codeRepl97.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 21
+
+codeRepl79.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 22
+
+codeRepl102.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 23
+
+codeRepl54.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 24
+
+codeRepl57.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 25
+
+codeRepl103.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 26
+
+_getopt_internal.exit.ce:		; preds = %newFuncRoot
+	switch i32 %0, label %codeRepl127.exitStub [
+		i32 -1, label %parse_options.exit.loopexit.exitStub
+		i32 0, label %bb1.i.exitStub
+		i32 63, label %bb90.i.exitStub
+		i32 66, label %codeRepl104.exitStub
+		i32 67, label %codeRepl113.exitStub
+		i32 71, label %codeRepl51.exitStub
+		i32 77, label %codeRepl70.exitStub
+		i32 78, label %codeRepl119.exitStub
+		i32 80, label %codeRepl93.exitStub
+		i32 81, label %codeRepl101.exitStub
+		i32 82, label %codeRepl120.exitStub
+		i32 88, label %codeRepl89.exitStub
+		i32 97, label %codeRepl45.exitStub
+		i32 98, label %codeRepl58.exitStub
+		i32 99, label %codeRepl46.exitStub
+		i32 100, label %codeRepl50.exitStub
+		i32 104, label %codeRepl52.exitStub
+		i32 108, label %codeRepl53.exitStub
+		i32 109, label %codeRepl61.exitStub
+		i32 110, label %codeRepl85.exitStub
+		i32 111, label %codeRepl97.exitStub
+		i32 113, label %codeRepl79.exitStub
+		i32 114, label %codeRepl102.exitStub
+		i32 115, label %codeRepl54.exitStub
+		i32 116, label %codeRepl57.exitStub
+		i32 118, label %codeRepl103.exitStub
+	]
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 000000000000..da2874d1e0c4
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: push {r7, lr}
+; CHECK: ldmia
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 19c75849e110..94888fd94050 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldr r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldr | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldr r0, [r0]
         %tmp = load i32* %v
         ret i32 %tmp
 }
 
 define i32 @f2(i32* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldr.w r0, [r0, #+4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
         ret i32 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i32 @f3(i32* %v) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldr r0, [r0, r1]
         %tmp2 = getelementptr i32* %v, i32 1024
         %tmp = load i32* %tmp2
         ret i32 %tmp
@@ -26,6 +29,8 @@ entry:
 
 define i32 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldr r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
         %tmp3 = load i32* %tmp2
@@ -34,6 +39,8 @@ entry:
 
 define i32 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldr r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
         %tmp3 = load i32* %tmp2
@@ -42,6 +49,8 @@ entry:
 
 define i32 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldr.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -51,6 +60,10 @@ entry:
 
 define i32 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldr r0, [r0, r1]
+
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
index d48ecef1c113..9e6aef4e0974 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
 
 define i32 @test1(i8* %v.pntr.s0.u1) {
     %tmp.u = load i8* %v.pntr.s0.u1
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 79ffa8293521..d1af4ba47fe0 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldr.*\\\[.*\],} | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index f773e6331bfe..9cc3f4a2eda5 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldr.*\\!} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldrsb.*\\!} | count 1
 
 define i32* @test1(i32* %X, i32* %dest) {
diff --git a/test/CodeGen/Thumb2/thumb2-ldrb.ll b/test/CodeGen/Thumb2/thumb2-ldrb.ll
index 5bacb8eb2b4c..bf1009743afc 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldrb r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrb | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldrb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldrb r0, [r0]
         %tmp = load i8* %v
         ret i8 %tmp
 }
 
 define i8 @f2(i8* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldrb r0, [r0, #-1]
         %tmp2 = getelementptr i8* %v, i8 1023
         %tmp = load i8* %tmp2
         ret i8 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i8 @f3(i32 %base) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, 4096
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -27,6 +30,8 @@ entry:
 
 define i8 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldrb r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -35,6 +40,8 @@ entry:
 
 define i8 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -43,6 +50,8 @@ entry:
 
 define i8 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldrb.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -52,6 +61,9 @@ entry:
 
 define i8 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
new file mode 100644
index 000000000000..22d4e88ed17d
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;CHECK: ldrd r2, [r2]
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index 15f803e11086..f1fb79c35ed0 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldrh r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrh | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldrh
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldrh r0, [r0]
         %tmp = load i16* %v
         ret i16 %tmp
 }
 
 define i16 @f2(i16* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldrh.w r0, [r0, #+2046]
         %tmp2 = getelementptr i16* %v, i16 1023
         %tmp = load i16* %tmp2
         ret i16 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i16 @f3(i16* %v) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrh r0, [r0, r1]
         %tmp2 = getelementptr i16* %v, i16 2048
         %tmp = load i16* %tmp2
         ret i16 %tmp
@@ -26,6 +29,8 @@ entry:
 
 define i16 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldrh r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
         %tmp3 = load i16* %tmp2
@@ -34,6 +39,8 @@ entry:
 
 define i16 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldrh r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
         %tmp3 = load i16* %tmp2
@@ -42,6 +49,8 @@ entry:
 
 define i16 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldrh.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
@@ -51,6 +60,9 @@ entry:
 
 define i16 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrh r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
diff --git a/test/CodeGen/Thumb2/thumb2-lsl.ll b/test/CodeGen/Thumb2/thumb2-lsl.ll
index 666963a4b499..6b0818a34b9b 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsls r0, r0, #5
     %tmp = shl i32 %a, 5
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsl2.ll b/test/CodeGen/Thumb2/thumb2-lsl2.ll
index eb7a2795343d..f283eef89a37 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsls r0, r1
     %tmp = shl i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr.ll b/test/CodeGen/Thumb2/thumb2-lsr.ll
index cf4d2f81c55d..7cbee54f381f 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsrs r0, r0, #13
     %tmp = lshr i32 %a, 13
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr2.ll b/test/CodeGen/Thumb2/thumb2-lsr2.ll
index 01fd56d52c17..87800f9d73fb 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsrs r0, r1
     %tmp = lshr i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr3.ll b/test/CodeGen/Thumb2/thumb2-lsr3.ll
new file mode 100644
index 000000000000..5cfd3f5198b7
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsr3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2
+
+define i1 @test1(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: lsrs.w r1, r1, #1
+	%0 = lshr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
+
+define i1 @test2(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: asrs.w r1, r1, #1
+	%0 = ashr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
index 0772d7f69ad5..be66425d7e66 100644
--- a/test/CodeGen/Thumb2/thumb2-mla.ll
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 6d1640f340ae..782def966615 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 0c4c59689b60..e9fdec8820ea 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -1,127 +1,147 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #11206827
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2868947712
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2880154539
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #251658240
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #3948544
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #258
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #4026531840
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; Test #<const>
 
 ; var 2.1 - 0x00ab00ab
 define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_1:
+;CHECK: #11206827
     %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 28027649 ; 0x01abab01
     ret i32 %ret
 }
 
 ; var 2.2 - 0xab00ab00
 define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_1:
+;CHECK: #-1426019584
     %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 279685904 ; 0x10abab10
     ret i32 %ret
 }
 
 ; var 2.3 - 0xabababab
 define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_ok_1:
+;CHECK: #-1414812757
     %ret = add i32 %lhs, 2880154539 ; 0xabababab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 2880154554 ; 0xabababba
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 2880158379 ; 0xababbaab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 3131812779 ; 0xbaababab
     ret i32 %ret
 }
 
 ; var 3 - 0x0F000000
 define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_1_ok_1:
+;CHECK: #251658240
     %ret = add i32 %lhs, 251658240 ; 0x0F000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_1:
+;CHECK: #3948544
     %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_3_ok_1:
+;CHECK: #258
     %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_4_ok_1:
+;CHECK: #-268435456
     %ret = add i32 %lhs, 4026531840 ; 0xF0000000
     ret i32 %ret
 }
-
diff --git a/test/CodeGen/Thumb2/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll
index d2f8c0b91a58..a02f4f087365 100644
--- a/test/CodeGen/Thumb2/thumb2-mov2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov2.ll
@@ -1,10 +1,11 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov  | grep movt
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = and i32 %a, 65535
     %2 = shl i32 1234, 16
     %3 = or  i32 %1, %2
@@ -13,6 +14,11 @@ define i32 @t2MOVTi16_ok_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -24,6 +30,11 @@ define i32 @t2MOVTi16_test_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -36,6 +47,11 @@ define i32 @t2MOVTi16_test_2(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -50,6 +66,11 @@ define i32 @t2MOVTi16_test_3(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
+; CHECK: t2MOVTi16_test_nomatch_1:
+; CHECK:      movw r1, #16384
+; CHECK-NEXT: movt r1, #154
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #154
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -58,7 +79,6 @@ define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
     %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
     %7 = lshr i32  %6,   3
     %8 = or  i32   %5,  %7
-
     ret i32 %8
 }
 
diff --git a/test/CodeGen/Thumb2/thumb2-mov3.ll b/test/CodeGen/Thumb2/thumb2-mov3.ll
index 74418c1000c9..46af6fb16c49 100644
--- a/test/CodeGen/Thumb2/thumb2-mov3.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov3.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mov\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: movs r0, #171
     %tmp = add i32 0, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mov.w r0, #1179666
     %tmp = add i32 0, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mov.w r0, #872428544
     %tmp = add i32 0, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: mov.w r0, #1448498774
     %tmp = add i32 0, 1448498774
     ret i32 %tmp
 }
 
 ; 66846720 = 0x03fc0000
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mov.w r0, #66846720
     %tmp = add i32 0, 66846720
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mov4.ll b/test/CodeGen/Thumb2/thumb2-mov4.ll
index 74c522f94f07..06fa238263ab 100644
--- a/test/CodeGen/Thumb2/thumb2-mov4.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
 
 define i32 @f6(i32 %a) {
     %tmp = add i32 0, 65535
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index e976e66c0013..b1515b514820 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mul\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: muls r0, r1
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
new file mode 100644
index 000000000000..5d47770aed3e
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep smmul | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep umull | count 1
+
+define i32 @smulhi(i32 %x, i32 %y) {
+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mvn.ll b/test/CodeGen/Thumb2/thumb2-mvn.ll
index 95694d67912e..a8c8f831c75a 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -1,27 +1,33 @@
-; RUN: llvm-as < %s | llc | grep {mvn\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
-
-target triple = "thumbv7-apple-darwin"
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvn r0, #187
     %tmp = xor i32 4294967295, 187
     ret i32 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvn r0, #11141290
     %tmp = xor i32 4294967295, 11141290 
     ret i32 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mvn r0, #-872363008
     %tmp = xor i32 4294967295, 3422604288
     ret i32 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn r0, #1114112
     %tmp = xor i32 4294967295, 1114112
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mvn2.ll b/test/CodeGen/Thumb2/thumb2-mvn2.ll
index df9b11bed917..375d0aad5021 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvns r0, r0
     %tmp = xor i32 4294967295, %a
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvns r0, r0
     %tmp = xor i32 %a, 4294967295
     ret i32 %tmp
 }
 
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn.w r0, r0, lsl #5
     %tmp = shl i32 %a, 5
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: mvn.w r0, r0, lsr #6
     %tmp = lshr i32 %a, 6
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: mvn.w r0, r0, asr #7
     %tmp = ashr i32 %a, 7
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: mvn.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-neg.ll b/test/CodeGen/Thumb2/thumb2-neg.ll
index 8f938d579b83..6bf11ec90621 100644
--- a/test/CodeGen/Thumb2/thumb2-neg.ll
+++ b/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#0} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rsbs r0, r0, #0
     %tmp = sub i32 0, %a
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll
index 92c4564841b9..d4222c2b2dac 100644
--- a/test/CodeGen/Thumb2/thumb2-orn.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll
index 7758edd1d693..7b018826a621 100644
--- a/test/CodeGen/Thumb2/thumb2-orn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} |\
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-orr.ll b/test/CodeGen/Thumb2/thumb2-orr.ll
index 989165804959..89ab7b1edf70 100644
--- a/test/CodeGen/Thumb2/thumb2-orr.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: orrs r0, r1
     %tmp2 = or i32 %a, %b
     ret i32 %tmp2
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: orr.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: orr.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: orr.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: orr.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll
index 6f2b62c00c6e..759a5b8dd894 100644
--- a/test/CodeGen/Thumb2/thumb2-orr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1145324612\\|#1114112} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#-872363008\\|#1145324612\\|#1114112} | count 5
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
new file mode 100644
index 000000000000..a9822498fe08
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep pkhbt | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep pkhtb | count 4
+
+define i32 @test1(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test1a(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
+	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp46
+}
+
+define i32 @test5(i32 %X, i32 %Y) {
+	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
+	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
+	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test5a(i32 %X, i32 %Y) {
+	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
+	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test6(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
+	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
+	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp59
+}
+
+define i32 @test7(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index 4009da33b260..27b1672e554a 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,8 +1,23 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep {rev\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rev r0, r0
     %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
     ret i32 %tmp
 }
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+define i32 @f2(i32 %X) {
+; CHECK: f2:
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev16.ll b/test/CodeGen/Thumb2/thumb2-rev16.ll
new file mode 100644
index 000000000000..39b6ac3f0027
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-rev16.ll
@@ -0,0 +1,32 @@
+; XFAIL: *
+; fixme rev16 pattern is not matching
+
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+; 0xff00ff00 = 4278255360
+; 0x00ff00ff = 16711935
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %mask_l8 = and i32 %l8, 4278255360
+    %mask_r8 = and i32 %r8, 16711935
+    %tmp = or i32 %mask_l8, %mask_r8
+    ret i32 %tmp
+}
+
+; 0xff000000 = 4278190080
+; 0x00ff0000 = 16711680
+; 0x0000ff00 = 65280
+; 0x000000ff = 255
+define i32 @f2(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %masklo_l8 = and i32 %l8, 65280
+    %maskhi_l8 = and i32 %l8, 4278190080
+    %masklo_r8 = and i32 %r8, 255
+    %maskhi_r8 = and i32 %r8, 16711680
+    %tmp1 = or i32 %masklo_l8, %masklo_r8
+    %tmp2 = or i32 %maskhi_l8, %maskhi_r8
+    %tmp = or i32 %tmp1, %tmp2
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 305ab994518d..01adb528087b 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {ror\\.w\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
 
 define i32 @f1(i32 %a) {
     %l8 = shl i32 %a, 10
diff --git a/test/CodeGen/Thumb2/thumb2-ror2.ll b/test/CodeGen/Thumb2/thumb2-ror2.ll
index dd19b0afb18f..ffd1dd7dc613 100644
--- a/test/CodeGen/Thumb2/thumb2-ror2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: rors r0, r1
     %db = sub i32 32, %b
     %l8 = shl i32 %a, %b
     %r8 = lshr i32 %a, %db
diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll
index 57796873b2d1..4611e9435034 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i32 @f2(i32 %a, i32 %b) {
     %tmp = shl i32 %b, 5
diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll
index 957d1d0717e4..84a379677ad4 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb2.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
new file mode 100644
index 000000000000..ad962919edce
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select.ll b/test/CodeGen/Thumb2/thumb2-select.ll
new file mode 100644
index 000000000000..2dcf8aaa24c5
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-select.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a.s) {
+entry:
+; CHECK: f1:
+; CHECK: it eq
+; CHECK: moveq
+
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+; CHECK: f2:
+; CHECK: it gt
+; CHECK: movgt
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f3:
+; CHECK: it lt
+; CHECK: movlt
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f4:
+; CHECK: it le
+; CHECK: movle
+
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f5:
+; CHECK: it ls
+; CHECK: movls
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f6:
+; CHECK: it hi
+; CHECK: movhi
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f7:
+; CHECK: it hi
+; CHECK: lsrhi.w
+    %tmp1 = icmp ugt i32 %a, %b
+    %tmp2 = udiv i32 %c, 3
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f8:
+; CHECK: it lo
+; CHECK: lsllo.w
+    %tmp1 = icmp ult i32 %a, %b
+    %tmp2 = mul i32 %c, 4
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f9:
+; CHECK: it ge
+; CHECK: rorge.w
+    %tmp1 = icmp sge i32 %a, %b
+    %tmp2 = shl i32 %c, 10
+    %tmp3 = lshr i32 %c, 22
+    %tmp4 = or i32 %tmp2, %tmp3
+    %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
+    ret i32 %tmp5
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
new file mode 100644
index 000000000000..b4274adb5823
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mvn | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep it  | count 3
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 10
+        %tmp3 = sub i32 %b, %tmp2
+        ret i32 %tmp3
+}
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 9bd6e43101a8..7746cd3f584b 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep asr
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ror
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mov
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsl
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep asr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ror
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep mov
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
         %A = shl i32 %Y, 16
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
new file mode 100644
index 000000000000..66cc88402fc5
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smlabt | count 1
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
new file mode 100644
index 000000000000..cdbf4ca7bf67
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smulbt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smultt | count 1
+
+@x = weak global i16 0          ; <i16*> [#uses=1]
+@y = weak global i16 0          ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
new file mode 100644
index 000000000000..0a7221c61749
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: vstmia sp
+; CHECK: vldmia sp
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  br i1 undef, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 4097a6c1579a..3eeec8c3850f 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32* %v) {
+; CHECK: f1:
+; CHECK: str r0, [r1]
         store i32 %a, i32* %v
         ret i32 %a
 }
 
 define i32 @f2(i32 %a, i32* %v) {
+; CHECK: f2:
+; CHECK: str.w r0, [r1, #+4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         store i32 %a, i32* %tmp2
         ret i32 %a
 }
 
 define i32 @f2a(i32 %a, i32* %v) {
+; CHECK: f2a:
+; CHECK: str r0, [r1, #-128]
         %tmp2 = getelementptr i32* %v, i32 -32
         store i32 %a, i32* %tmp2
         ret i32 %a
 }
 
 define i32 @f3(i32 %a, i32* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: str r0, [r1, r2]
         %tmp2 = getelementptr i32* %v, i32 1024
         store i32 %a, i32* %tmp2
         ret i32 %a
@@ -30,6 +34,8 @@ define i32 @f3(i32 %a, i32* %v) {
 
 define i32 @f4(i32 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: str r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
         store i32 %a, i32* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i32 @f5(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: str r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
         store i32 %a, i32* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i32 @f6(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: str.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -55,6 +65,9 @@ entry:
 
 define i32 @f7(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: str r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index 536011c4de7d..bee58105daeb 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {str .*\\\[.*\],} | count 1
 
 define i16 @test1(i32* %X, i16* %A) {
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 1e93b70df5ac..6c804eea634c 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {str.*\\!} | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index d8401cd68471..1ebb938b1a88 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8 %a, i8* %v) {
+; CHECK: f1:
+; CHECK: strb r0, [r1]
         store i8 %a, i8* %v
         ret i8 %a
 }
 
 define i8 @f2(i8 %a, i8* %v) {
+; CHECK: f2:
+; CHECK: strb.w r0, [r1, #+4092]
         %tmp2 = getelementptr i8* %v, i32 4092
         store i8 %a, i8* %tmp2
         ret i8 %a
 }
 
 define i8 @f2a(i8 %a, i8* %v) {
+; CHECK: f2a:
+; CHECK: strb r0, [r1, #-128]
         %tmp2 = getelementptr i8* %v, i32 -128
         store i8 %a, i8* %tmp2
         ret i8 %a
 }
 
 define i8 @f3(i8 %a, i8* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strb r0, [r1, r2]
         %tmp2 = getelementptr i8* %v, i32 4096
         store i8 %a, i8* %tmp2
         ret i8 %a
@@ -30,6 +34,8 @@ define i8 @f3(i8 %a, i8* %v) {
 
 define i8 @f4(i8 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: strb r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
         store i8 %a, i8* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i8 @f5(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: strb r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
         store i8 %a, i8* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i8 @f6(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: strb.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -55,6 +65,9 @@ entry:
 
 define i8 @f7(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strb r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index 80dedf0c28dd..b0eb8c12f594 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16 %a, i16* %v) {
+; CHECK: f1:
+; CHECK: strh r0, [r1]
         store i16 %a, i16* %v
         ret i16 %a
 }
 
 define i16 @f2(i16 %a, i16* %v) {
+; CHECK: f2:
+; CHECK: strh.w r0, [r1, #+4092]
         %tmp2 = getelementptr i16* %v, i32 2046
         store i16 %a, i16* %tmp2
         ret i16 %a
 }
 
 define i16 @f2a(i16 %a, i16* %v) {
+; CHECK: f2a:
+; CHECK: strh r0, [r1, #-128]
         %tmp2 = getelementptr i16* %v, i32 -64
         store i16 %a, i16* %tmp2
         ret i16 %a
 }
 
 define i16 @f3(i16 %a, i16* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strh r0, [r1, r2]
         %tmp2 = getelementptr i16* %v, i32 2048
         store i16 %a, i16* %tmp2
         ret i16 %a
@@ -30,6 +34,8 @@ define i16 @f3(i16 %a, i16* %v) {
 
 define i16 @f4(i16 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: strh r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
         store i16 %a, i16* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i16 @f5(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: strh r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
         store i16 %a, i16* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i16 @f6(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: strh.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
@@ -55,6 +65,9 @@ entry:
 
 define i16 @f7(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strh r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
diff --git a/test/CodeGen/Thumb2/thumb2-sub.ll b/test/CodeGen/Thumb2/thumb2-sub.ll
index cf8270412d10..95335a2ee2cc 100644
--- a/test/CodeGen/Thumb2/thumb2-sub.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub.ll
@@ -1,31 +1,49 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\[w\]\\?\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: subs r0, #171
     %tmp = sub i32 %a, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, #1179666
     %tmp = sub i32 %a, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, #872428544
     %tmp = sub i32 %a, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, #1448498774
     %tmp = sub i32 %a, 1448498774
     ret i32 %tmp
 }
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, #510
     %tmp = sub i32 %a, 510
     ret i32 %tmp
 }
+
+; Don't change this to an add.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: subs r0, #1
+    %tmp = sub i32 %a, 1
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-sub2.ll b/test/CodeGen/Thumb2/thumb2-sub2.ll
index c7ebd22a8a1f..6813f76d8932 100644
--- a/test/CodeGen/Thumb2/thumb2-sub2.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = sub i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-sub4.ll b/test/CodeGen/Thumb2/thumb2-sub4.ll
index fd283fdc8ef9..a040d170f935 100644
--- a/test/CodeGen/Thumb2/thumb2-sub4.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub4.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r1
     %tmp = sub i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll
index 3e9ec2569738..c3b56bc09c85 100644
--- a/test/CodeGen/Thumb2/thumb2-sub5.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {subs\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
     %tmp = sub i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 4afe35402875..33ed543d6b6a 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtb | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtb | grep ror | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtab | count 1
 
 define i32 @test0(i8 %A) {
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
new file mode 100644
index 000000000000..5dc3cc3ce70a
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+define void @bar(i32 %n.u) {
+entry:
+; CHECK: bar:
+; CHECK: tbb
+; CHECK: .align 1
+
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
new file mode 100644
index 000000000000..c5cb6f33e2ed
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+; Thumb2 target should reorder the bb's in order to use tbb / tbh.
+
+; XFAIL: *
+
+	%struct.R_flstr = type { i32, i32, i8* }
+	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
+@_C_nextcmd = external global i32		; <i32*> [#uses=3]
+@.str31 = external constant [28 x i8], align 1		; <[28 x i8]*> [#uses=1]
+@_T_gtol = external global %struct._T_tstr*		; <%struct._T_tstr**> [#uses=2]
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+declare arm_apcscc void @Z_fatal(i8*) noreturn nounwind
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+; CHECK: main:
+; CHECK: tbh
+entry:
+	br label %bb42.i
+
+bb1.i2:		; preds = %bb42.i
+	br label %bb40.i
+
+bb5.i:		; preds = %bb42.i
+	%0 = or i32 %_Y_flags.1, 32		; <i32> [#uses=1]
+	br label %bb40.i
+
+bb7.i:		; preds = %bb42.i
+	call arm_apcscc  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind
+	unreachable
+
+bb15.i:		; preds = %bb42.i
+	call arm_apcscc  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind
+	unreachable
+
+bb23.i:		; preds = %bb42.i
+	%1 = call arm_apcscc  i32 @strlen(i8* null) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb33.i:		; preds = %bb42.i
+	store i32 0, i32* @_C_nextcmd, align 4
+	%2 = call arm_apcscc  noalias i8* @calloc(i32 21, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb34.i:		; preds = %bb42.i
+	%3 = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%4 = add i32 %3, 1		; <i32> [#uses=1]
+	store i32 %4, i32* @_C_nextcmd, align 4
+	%5 = call arm_apcscc  noalias i8* @calloc(i32 22, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb35.i:		; preds = %bb42.i
+	%6 = call arm_apcscc  noalias i8* @calloc(i32 20, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb37.i:		; preds = %bb42.i
+	%7 = call arm_apcscc  noalias i8* @calloc(i32 14, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb39.i:		; preds = %bb42.i
+	call arm_apcscc  void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind
+	unreachable
+
+bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
+	%_Y_flags.0 = phi i32 [ 0, %bb1.i2 ], [ %0, %bb5.i ], [ %_Y_flags.1, %bb42.i ]		; <i32> [#uses=1]
+	%_Y_eflag.b.0 = phi i1 [ %_Y_eflag.b.1, %bb1.i2 ], [ %_Y_eflag.b.1, %bb5.i ], [ true, %bb42.i ]		; <i1> [#uses=1]
+	br label %bb42.i
+
+bb42.i:		; preds = %bb40.i, %entry
+	%_Y_eflag.b.1 = phi i1 [ false, %entry ], [ %_Y_eflag.b.0, %bb40.i ]		; <i1> [#uses=2]
+	%_Y_flags.1 = phi i32 [ 0, %entry ], [ %_Y_flags.0, %bb40.i ]		; <i32> [#uses=2]
+	switch i32 undef, label %bb39.i [
+		i32 67, label %bb33.i
+		i32 70, label %bb35.i
+		i32 77, label %bb37.i
+		i32 83, label %bb34.i
+		i32 97, label %bb7.i
+		i32 100, label %bb5.i
+		i32 101, label %bb40.i
+		i32 102, label %bb23.i
+		i32 105, label %bb15.i
+		i32 116, label %bb1.i2
+	]
+}
+
+declare arm_apcscc void @_T_addtol(%struct._T_tstr** nocapture, i32, i8*) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index c3c20943dda6..634d318c85c4 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index fe2b2c8b15d3..c6867d99de76 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i1 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 9e2d3e5ec1c9..525a817fe37e 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {tst\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index c0f404c89f6d..db202dd2cbcd 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,34 +1,40 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp ne i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp ne i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: tst.w r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -36,6 +42,8 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: tst.w r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -43,6 +51,8 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: tst.w r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -50,6 +60,8 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: tst.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 0d1cc183de32..37919dde1dcc 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxtb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxtab | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxth | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtab | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxth | count 1
 
 define i8 @test1(i32 %A.u) zeroext {
     %B.u = trunc i32 %A.u to i8
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 28a5fe4d2ee9..4022d95ed475 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep uxt | count 10
 
 define i32 @test1(i32 %x) {
diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll
index 6abb6eba630d..1e555571c054 100644
--- a/test/CodeGen/Thumb2/tls1.ll
+++ b/test/CodeGen/Thumb2/tls1.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
 ; RUN:     grep {i(tpoff)}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 
diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll
index 3396b0ba43f3..b8a0657c9069 100644
--- a/test/CodeGen/Thumb2/tls2.ll
+++ b/test/CodeGen/Thumb2/tls2.ll
@@ -1,19 +1,29 @@
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {i(gottpoff)}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {ldr r., \[pc, r.\]}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | FileCheck %s -check-prefix=CHECK-NOT-PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
 define i32 @f() {
 entry:
+; CHECK-NOT-PIC: f:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: f:
+; CHECK-PIC: bl __tls_get_addr(PLT)
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32* @g() {
 entry:
+; CHECK-NOT-PIC: g:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: g:
+; CHECK-PIC: bl __tls_get_addr(PLT)
 	ret i32* @i
 }
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 2b4242aaa15e..24848602baf8 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -3,7 +3,7 @@
 ; it makes a ton of annoying overlapping live ranges.  This code should not
 ; cause spills!
 ;
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep spilled
+; RUN: llc < %s -march=x86 -stats |& not grep spilled
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
index a4d558949e30..5c40eeaa1ead 100644
--- a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
+++ b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
index 4de3c79fdcbb..8b0a18550da1 100644
--- a/test/CodeGen/X86/2003-11-03-GlobalBool.ll
+++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   not grep {.byte\[\[:space:\]\]*true}
 
 @X = global i1 true             ; <i1*> [#uses=0]
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
index 56bb21caf3ca..f15a1b441816 100644
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
 
 @A = global [32 x i32] zeroinitializer
 @B = global [32 x i32] zeroinitializer
diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
index f48b1d3adf01..fea2b54d7630 100644
--- a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1
+; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
index b25dfaf5d90e..f986ebd35f85 100644
--- a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
+++ b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -i ESP | not grep sub
+; RUN: llc < %s -march=x86 | grep -i ESP | not grep sub
 
 define i32 @test(i32 %X) {
         ret i32 %X
diff --git a/test/CodeGen/X86/2004-02-22-Casts.ll b/test/CodeGen/X86/2004-02-22-Casts.ll
index 40d5f39df642..dabf7d3c15b6 100644
--- a/test/CodeGen/X86/2004-02-22-Casts.ll
+++ b/test/CodeGen/X86/2004-02-22-Casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 define i1 @test1(double %X) {
         %V = fcmp one double %X, 0.000000e+00           ; <i1> [#uses=1]
         ret i1 %V
diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
index 5021fd89dfe4..b6631b62118a 100644
--- a/test/CodeGen/X86/2004-03-30-Select-Max.ll
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep {j\[lgbe\]}
+; RUN: llc < %s -march=x86 | not grep {j\[lgbe\]}
 
 define i32 @max(i32 %A, i32 %B) {
         %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
index 633a61564558..c62fee1bd263 100644
--- a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
+++ b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
@@ -2,7 +2,7 @@
 ; overlapping live intervals. When two overlapping intervals have the same
 ; value, they can be joined though.
 ;
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=linearscan | \
+; RUN: llc < %s -march=x86 -regalloc=linearscan | \
 ; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
 
 define i64 @test(i64 %x) {
diff --git a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
index 858605c231bc..f8ed016f99b6 100644
--- a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
+++ b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define double @test(double %d) {
         %X = select i1 false, double %d, double %d              ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
index 1a51bee404d0..036aa6a77f40 100644
--- a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
+++ b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @T(double %X) {
         %V = fcmp oeq double %X, %X             ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
index 9ee773c91a24..db3af0139cee 100644
--- a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
+++ b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @test(i1 %C, i1 %D, i32 %X, i32 %Y) {
         %E = icmp slt i32 %X, %Y                ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
index 37cff57f30e2..32fafc61e8de 100644
--- a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
+++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -3,7 +3,7 @@
 ; is invalid code (there is no correct way to order the instruction).  Check
 ; that we do not fold the load into the sub.
 
-; RUN: llvm-as < %s | llc -march=x86 | not grep sub.*GLOBAL
+; RUN: llc < %s -march=x86 | not grep sub.*GLOBAL
 
 @GLOBAL = external global i32           ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
index 762047b7d8c4..30a6ac6fbdf1 100644
--- a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
+++ b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep 18446744073709551612
+; RUN: llc < %s -march=x86 | not grep 18446744073709551612
 
 @A = external global i32                ; <i32*> [#uses=1]
 @Y = global i32* getelementptr (i32* @A, i32 -1)                ; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
index 04035aca998f..5266009c55a5 100644
--- a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
+++ b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=generic
+; RUN: llc < %s -march=x86 -mcpu=generic
 ; Make sure LLC doesn't crash in the stackifier due to FP PHI nodes.
 
 define void @radfg_() {
diff --git a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
index 817b281243e7..d906da43fe11 100644
--- a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep shld | count 1
 ;
 ; Check that the isel does not fold the shld, which already folds a load
diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
index 51d2fb2fe27b..dc69ef83103f 100644
--- a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
 
 define i32 @f(i32 %a, i32 %b) {
         %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index c410c4668a9b..0421896922b9 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  -stats |& \
+; RUN: llc < %s -march=x86  -stats |& \
 ; RUN:   grep asm-printer | grep 7
 
 define i32 @g(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
index 743790cad033..c106f57e9384 100644
--- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 4a0b5c37e261..8783a11c060b 100644
--- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
 ; RUN: grep {movl	_last} %t | count 1
 ; RUN: grep {cmpl.*_last} %t | count 1
 
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index f28366699c3d..49f3a95705ad 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stats |& \
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
 ; RUN:   not grep {Number of register spills}
 ; END.
 
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 72dab39888f1..7d0a6ab0a04c 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -relocation-model=static  -stats |& \
+; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
 ; RUN:   grep asm-printer | grep 14
 ;
 @size20 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 48ed2b9cb498..23954d76a5d6 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats  |& \
+; RUN: llc < %s -march=x86 -stats  |& \
 ; RUN:   grep asm-printer | grep 13
 
 define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
diff --git a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
index 900abe55cd21..8421483ecb55 100644
--- a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
+++ b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
@@ -1,7 +1,7 @@
 ; Coalescing from R32 to a subset R32_. Once another register coalescer bug is
 ; fixed, the movb should go away as well.
 
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep movl
 
 @B = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
index c39b377cc733..d58d638562c9 100644
--- a/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -relocation-model=static | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
 
 @A = external global i16*		; <i16**> [#uses=1]
 @B = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 6c0e76b34ade..89b127cccf82 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
 ; RUN:     grep {asm-printer} | grep 31
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-05-17-VectorArg.ll b/test/CodeGen/X86/2006-05-17-VectorArg.ll
index 217cbe1059f2..b36d61e0f31b 100644
--- a/test/CodeGen/X86/2006-05-17-VectorArg.ll
+++ b/test/CodeGen/X86/2006-05-17-VectorArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <4 x float> @opRSQ(<4 x float> %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index ae18c90d8c17..083d06805f2f 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep setnp
-; RUN: llvm-as < %s | llc -march=x86 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 | grep setnp
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index 78838d1141a4..0288278d626e 100644
--- a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test() {
 	br i1 false, label %cond_next33, label %cond_true12
diff --git a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
index 760fe3650e90..4ea364d57e51 100644
--- a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
+++ b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR825
 
 define i64 @test() {
diff --git a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
index 1db3921ecdb1..568fbbcc4f4f 100644
--- a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
+++ b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR828
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-07-19-ATTAsm.ll b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
index 78167f631e1b..c8fd10f7009c 100644
--- a/test/CodeGen/X86/2006-07-19-ATTAsm.ll
+++ b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att
 ; PR834
 ; END.
 
diff --git a/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
index 08510a8a6528..cac47cdab6de 100644
--- a/test/CodeGen/X86/2006-07-20-InlineAsm.ll
+++ b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR833
 
 @G = weak global i32 0		; <i32*> [#uses=3]
diff --git a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
index a82612b5a62a..deae086cf76c 100644
--- a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
+++ b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -- 4294967240
+; RUN: llc < %s -march=x86 | grep -- 4294967240
 ; PR853
 
 @X = global i32* inttoptr (i64 -56 to i32*)		; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index 2a521ad73885..3159cec8553e 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,5 +1,5 @@
 ; PR850
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att > %t
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
 ; RUN: grep {movl 4(%eax),%ebp} %t
 ; RUN: grep {movl 0(%eax), %ebx} %t
 
diff --git a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
index 194cd6681bfa..aea707ee8fe4 100644
--- a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 	%struct.foo = type opaque
 
 define fastcc i32 @test(%struct.foo* %v, %struct.foo* %vi) {
diff --git a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
index f2a8855245cc..5fee326d530d 100644
--- a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct.expr = type { %struct.rtx_def*, i32, %struct.expr*, %struct.occr*, %struct.occr*, %struct.rtx_def* }
 	%struct.hash_table = type { %struct.expr**, i32, i32, i32 }
 	%struct.occr = type { %struct.occr*, %struct.rtx_def*, i8, i8 }
diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
index c1d81d52b932..a19d8f7092c3 100644
--- a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
+++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | \
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
 ; RUN:    not grep {movl %eax, %edx}
 
 define i32 @foo(i32 %t, i32 %C) {
diff --git a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
index dd21c0455d6d..1e890bbc02e5 100644
--- a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
index cc988f26618c..795d4647a3f6 100644
--- a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
+++ b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR933
 
 define fastcc i1 @test() {
diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
index e8055f5f901f..bf9fa5782b06 100644
--- a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
+++ b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=sse | grep movaps
 ; Test that the load is NOT folded into the intrinsic, which would zero the top
 ; elts of the loaded vector.
 
diff --git a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
index d627d1bf214c..fbb14ee16151 100644
--- a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @_ZN13QFSFileEngine4readEPcx() {
 	%tmp201 = load i32* null		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
index 5dc1cb3d9a2d..b1f04518acaa 100644
--- a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
+++ b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep shrl
+; RUN: llc < %s -march=x86 | grep shrl
 ; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31
 ; is then optimized away.
 @tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
index 31eb070e85b9..3b987ac79f94 100644
--- a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct.function = type opaque
 	%struct.lang_decl = type opaque
 	%struct.location_t = type { i8*, i32 }
diff --git a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
index 2b53f26f578e..6ed2e7bb5751 100644
--- a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 @str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
 
 define void @test() {
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index 1ff687a1b8b7..88e8b4a4fd92 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -1,11 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86 -asm-verbose | %prcontext je 1 | \
-; RUN:   grep BB1_1:
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
 
 @str = internal constant [14 x i8] c"Hello world!\0A\00"		; <[14 x i8]*> [#uses=1]
 @str.upgrd.1 = internal constant [13 x i8] c"Blah world!\0A\00"		; <[13 x i8]*> [#uses=1]
 
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @test(i32 %argc, i8** %argv) nounwind {
 entry:
+; CHECK: cmpl	$2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
+
 	switch i32 %argc, label %UnifiedReturnBlock [
 		 i32 1, label %bb
 		 i32 2, label %bb2
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 1a92852f06fe..91210ea90c69 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {subl	\$4, %esp}
+; RUN: llc < %s -march=x86 | grep {subl	\$4, %esp}
 
 target triple = "i686-pc-linux-gnu"
 @str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index f0067c7e489c..e839d7295adc 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,9 +1,9 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep movb %t | count 2
 ; RUN: grep {movzb\[wl\]} %t
 
 
-define void @handle_vector_size_attribute() {
+define void @handle_vector_size_attribute() nounwind {
 entry:
 	%tmp69 = load i32* null		; <i32> [#uses=1]
 	switch i32 %tmp69, label %bb84 [
diff --git a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
index 1222a3743686..ea2e6db61e1a 100644
--- a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
+++ b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep test.*1
+; RUN: llc < %s -march=x86 | grep test.*1
 ; PR1016
 
 define i32 @test(i32 %A, i32 %B, i32 %C) {
diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
index a58bedc28d75..8c1573f130ba 100644
--- a/test/CodeGen/X86/2006-11-28-Memcpy.ll
+++ b/test/CodeGen/X86/2006-11-28-Memcpy.ll
@@ -1,8 +1,6 @@
 ; PR1022, PR1023
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep 3721182122 | count 2
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep -E {movl	_?bytes2} | count 1
+; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
+; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
 
 @fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
 @bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
index 17234b827e4c..f81b303e3b80 100644
--- a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
+++ b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel
 ; PR1061
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 3b365f35cb22..e1bae3251a22 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,8 +1,7 @@
 ; PR1075
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | \
-; RUN:   %prcontext {mulss	LCPI1_3} 1 | grep mulss | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-define float @foo(float %x) {
+define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
     %tmp3 = fmul float %x, 5.000000e+00
     %tmp5 = fmul float %x, 7.000000e+00
@@ -11,4 +10,10 @@ define float @foo(float %x) {
     %tmp12 = fadd float %tmp10, %tmp5
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
+
+; CHECK:      mulss	LCPI1_2(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: mulss	LCPI1_3(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: ret
 }
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index c03d982aeb15..5e7c0a7ee2b7 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep leaq %t
 ; RUN: not grep {,%rsp)} %t
 ; PR1103
diff --git a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
index b1c86f4138a3..e83e2e54e455 100644
--- a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
+++ b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; Test 'ri' constraint.
 
 define void @run_init_process() {
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index 26d3e367195c..93e880854985 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {orl	\$1, %eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {leal	3(,%eax,8)}
+; RUN: llc < %s -march=x86 | grep {orl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
 
 ;; This example can't fold the or into an LEA.
 define i32 @test(float ** %tmp2, i32 %tmp12) {
diff --git a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
index 365768afe794..954c95d69611 100644
--- a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
 ; PR1027
 
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
diff --git a/test/CodeGen/X86/2007-02-25-FastCCStack.ll b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
index 3b1eb1fdb66b..2e2b56d04a25 100644
--- a/test/CodeGen/X86/2007-02-25-FastCCStack.ll
+++ b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium3
+; RUN: llc < %s -march=x86 -mcpu=pentium3
 
 define internal fastcc double @ggc_rlimit_bound(double %limit) {
     ret double %limit
diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index 721b6e7e2094..112d1ab65e7b 100644
--- a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin8 -mattr=+sse2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
 
 define void @test() nounwind {
 test.exit:
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 4c69ec733dd4..4cac9b4c4a21 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-darwin | \
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
 ; RUN:   grep push | count 3
 
 define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index c98c89a537a3..9580726ce02a 100644
--- a/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 ; ModuleID = 'a.bc'
 
diff --git a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
index 6965849e3231..70936fbc9281 100644
--- a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1259
 
 define void @test() {
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
index babcf6a0e805..44d68dd0493e 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test(i16 %tmp40414244) {
   %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 %tmp40414244 )
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
index 9bdb2493508d..3312e01b3d8e 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {mov %gs:72, %eax}
+; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index 6e1adf834624..c1b1ad1c730d 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah -march=x86 | \
+; RUN: llc < %s -mcpu=yonah -march=x86 | \
 ; RUN:   grep {cmpltsd %xmm0, %xmm0}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
index e440cdb6cfd7..30453d5266b9 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {psrlw \$8, %xmm0}
+; RUN: llc < %s -march=x86 | grep {psrlw \$8, %xmm0}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
index 7ce0584c5450..9676f143bca6 100644
--- a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
+++ b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 @data = external global [339 x i64]
 
diff --git a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
index 840fc7d513a0..9f09e88664c6 100644
--- a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1314
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 	%struct.bc_struct = type { i32, i32, i32, i32, %struct.bc_struct*, i8*, i8* }
 @_programStartTime = external global %struct.CycleCount		; <%struct.CycleCount*> [#uses=1]
 
-define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) {
+define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {
 entry:
 	%tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) )		; <i64> [#uses=0]
 	%tmp221 = sdiv i32 10, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
index 514d6656cd2a..f48c13259c42 100644
--- a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
+++ b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
index f9671a4daaed..4604f46c533f 100644
--- a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
index 74e6e72a4aa6..7528129971ab 100644
--- a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
+++ b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep 4294967112
+; RUN: llc < %s -march=x86-64 | not grep 4294967112
 ; PR1348
 
 	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
diff --git a/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
index 3e08e50f09de..e38992d8b304 100644
--- a/test/CodeGen/X86/2007-04-24-VectorCrash.ll
+++ b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index ac85a9d72bbd..113d0eb8647f 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -o - -march=x86 -mattr=+mmx | grep paddq | count 2
-; RUN: llvm-as < %s | llc -o - -march=x86 -mattr=+mmx | grep movq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep paddq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep movq | count 2
 
 define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) {
 entry:
diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
index cbd6a73dbee7..85a2ecc959ab 100644
--- a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
+++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep {bsrl.*10}
+; RUN: llc < %s | not grep {bsrl.*10}
 ; PR1356
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
index b0bcf5c155aa..e58b1932197d 100644
--- a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
+++ b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 -mattr=+sse
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse
 ; PR1371
 
 @str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index ff7aac0239d8..a3ff2f60c8d7 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
 ; PR1398
 
 	%struct.S = type { i32, i32 }
diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
index 61f8b2ce58f2..8ef253822bd9 100644
--- a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>
 	%struct.OpaqueXDataStorageType = type opaque
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index d9836e4a8d5d..2093b8f68744 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index 64ccef3917a3..989dfc5bdb2c 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpckhwd
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
 
 declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
diff --git a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
index 5d090759092e..321e11651b60 100644
--- a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
+++ b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep GOTPCREL
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep ".align.*3"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep GOTPCREL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep ".align.*3"
 
 	%struct.A = type { [1024 x i8] }
 @_ZN1A1aE = global %struct.A zeroinitializer, align 32		; <%struct.A*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
index 0ad539664c99..baf2377c5a02 100644
--- a/test/CodeGen/X86/2007-06-04-tailmerge4.ll
+++ b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh -asm-verbose | grep invcont131
+; RUN: llc < %s -enable-eh -asm-verbose | grep invcont131
 ; PR 1496:  tail merge was incorrectly removing this block
 
 ; ModuleID = 'report.1.bc'
diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
index 3e7776a62ab1..36a97ef9c3cf 100644
--- a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
+++ b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
@@ -1,5 +1,5 @@
 ; PR1495
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll
index 7756d060ff25..2680b1543fbb 100644
--- a/test/CodeGen/X86/2007-06-14-branchfold.ll
+++ b/test/CodeGen/X86/2007-06-14-branchfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i686 | not grep jmp
+; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp
 ; check that branch folding understands FP_REG_KILL is not a branch
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index e608ac3ecb97..6128d8b92d11 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep paddusw
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
 @R = external global <1 x i64>          ; <<1 x i64>*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
diff --git a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
index af11f127cfb5..9d42c49317fd 100644
--- a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
+++ b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
 
 define void @test() {
 	%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )
diff --git a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
index bcd265aeddaa..d2d6388c0782 100644
--- a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index 66a58c73e824..dc11eec9c17f 100644
--- a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @test(<4 x float>* %arg) {
 	%tmp89 = getelementptr <4 x float>* %arg, i64 3
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 18850b135ccf..2c513f17811a 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
 
 @R = external global <1 x i64>		; <<1 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index 7f09b5275a05..d611677942c2 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
 ; PR1545
 
 @.str97 = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index c0bd282e0191..8625b2771738 100644
--- a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
 define i64 @foo_0(<2 x i64>* %val) {
 entry:
         %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
index 8eda0ab9bc4e..3cd8052a732c 100644
--- a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movl
+; RUN: llc < %s -march=x86 | not grep movl
 
 define i8 @t(i8 zeroext  %x, i8 zeroext  %y) zeroext  {
 	%tmp2 = add i8 %x, 2
diff --git a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index e9ea843ba3c3..7768f36efae5 100644
--- a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 }
 	%struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 }
diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
index b62d2c61bba7..e93092f355c5 100644
--- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movsbl}
+; RUN: llc < %s -march=x86 | grep {movsbl}
 
 @X = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
index f6ed0fe7a5ff..c90a85f16949 100644
--- a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
+++ b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep drectve
+; RUN: llc < %s -march=x86 | not grep drectve
 ; PR1607
 
 %hlvm_programs_element = type { i8*, i32 (i32, i8**)* }
diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
index edcb8232fde1..d6ea5109d1fb 100644
--- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
+++ b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
 
 	%struct..0anon = type { i32 }
 	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index b6a5fc97b4bb..5acb05134c7c 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index 4f95b7603bae..c5d2a46f92c2 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep weak | count 2
+; RUN: llc < %s -march=x86 | grep weak | count 2
 @__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
 declare extern_weak i32 @pthread_once(i32*, void ()*)
diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
index 6a313be18885..56ee2a314990 100644
--- a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
 
 	%struct.NSString = type {  }
 	%struct._objc__method_prototype_list = type opaque
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
index 835e4caf0aaf..0ae1897e60e9 100644
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep 170
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
 
 define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index be51c04a3837..4a56ee446a0f 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep powixf2
-; RUN: llvm-as < %s | llc | grep fsqrt
+; RUN: llc < %s | grep powixf2
+; RUN: llc < %s | grep fsqrt
 ; ModuleID = 'yyy.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
index a733bb31646c..6fc8ec907eac 100644
--- a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
+++ b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep pushf
+; RUN: llc < %s -march=x86 | not grep pushf
 
 	%struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* }
 	%struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* }
diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
index e9fbe797f5bc..67323e87eff5 100644
--- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
+++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | grep lea
 
 	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index e2fdbb32bde3..fc11347224be 100644
--- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movb
+; RUN: llc < %s -march=x86 | not grep movb
 
 define i16 @f(i32* %bp, i32* %ss) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index fd914a1687b7..ea1bbc464693 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep addss | not grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep addss | not grep esp
 
 define fastcc void @fht(float* %fz, i16 signext  %n) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index 3016a013f2c9..a3872ad47e98 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sarl | not grep esp
+; RUN: llc < %s -march=x86 | grep sarl | not grep esp
 
 define i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
index 6cac558e427d..8a55935cc1f8 100644
--- a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
         %struct._Unwind_Context = type {  }
 
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index 4ea42440e1e2..1e4ae8464586 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-linux-gnu
 ; PR1729
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
index a414ef0d8626..fbcac50875c2 100644
--- a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
 define i64 @__ashldi3(i64 %u, i64 %b) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
index 5332fa1007ed..6d0cb475b1f1 100644
--- a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
+++ b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep movb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep movb | not grep x
 ; PR1734
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
index 2fcf76be5c78..3f9845c3c3ec 100644
--- a/test/CodeGen/X86/2007-10-16-fp80_select.ll
+++ b/test/CodeGen/X86/2007-10-16-fp80_select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
index f3cdfee7545f..c0bb55ed14ef 100644
--- a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
+++ b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep addb | not grep x
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
 ; PR1734
 
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index e649999bb0a8..600bd1f17849 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
 
 define i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
index 450911ae8199..984094d86a27 100644
--- a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
+++ b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1748
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
index 9013e9020efa..86d3bbf4f4e3 100644
--- a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
+++ b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 
 define i16 @t() signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
index 1c912a014049..42db98b44750 100644
--- a/test/CodeGen/X86/2007-10-30-LSRCrash.ll
+++ b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @unique(i8* %full, i32 %p, i32 %len, i32 %mode, i32 %verbos, i32 %flags) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
index f73a9105cef4..1b8e67dcc9b3 100644
--- a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
+++ b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2
+; RUN: llc < %s -march=x86 -mattr=sse2
 ; ModuleID = 'yyy.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-11-01-ISelCrash.ll b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
index 704efd0ef800..019c6a8cc0d9 100644
--- a/test/CodeGen/X86/2007-11-01-ISelCrash.ll
+++ b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
         %"struct.K::JL" = type <{ i8 }>
         %struct.jv = type { i64 }
diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
index 4ae4d2f9e8d9..4e11cda92e6d 100644
--- a/test/CodeGen/X86/2007-11-02-BadAsm.ll
+++ b/test/CodeGen/X86/2007-11-02-BadAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl | not grep rax
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl | not grep rax
 
 	%struct.color_sample = type { i64 }
 	%struct.gs_matrix = type { float, i64, float, i64, float, i64, float, i64, float, i64, float, i64 }
diff --git a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
index ffa6e44d1cb6..27ec8260d06b 100644
--- a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
+++ b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1763
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
index 889b122bb066..404561848b71 100644
--- a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR1766
 
         %struct.dentry = type { %struct.dentry_operations* }
diff --git a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
index 7e41f3679063..6b871aa3a4d4 100644
--- a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR1767
 
 define void @xor_sse_2(i64 %bytes, i64* %p1, i64* %p2) {
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index de33c617d050..8e586a7059eb 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static | grep {foo _str$}
+; RUN: llc < %s -relocation-model=static | grep {foo _str$}
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index a4e44e1f4e1d..f6db0d0379e7 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-11-07-MulBy4.ll b/test/CodeGen/X86/2007-11-07-MulBy4.ll
index d7fb684a6ba4..d5b630b59d9f 100644
--- a/test/CodeGen/X86/2007-11-07-MulBy4.ll
+++ b/test/CodeGen/X86/2007-11-07-MulBy4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep imul
+; RUN: llc < %s -march=x86 | not grep imul
 
 	%struct.eebb = type { %struct.eebb*, i16* }
 	%struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* }
diff --git a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
index b5635b38cfc9..9c004f946b4a 100644
--- a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
+++ b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb
 
 	%struct.double_int = type { i64, i64 }
 	%struct.tree_common = type <{ i8, [3 x i8] }>
@@ -6,7 +7,7 @@
 	%struct.tree_node = type { %struct.tree_int_cst }
 @tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=1]
 
-define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) {
+define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
 entry:
 	%tmp2526 = bitcast %struct.tree_node* %t1 to i32*		; <i32*> [#uses=1]
 	br i1 false, label %UnifiedReturnBlock, label %bb21
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 46422bcf2c50..0626d28eefee 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
 ; RUN:   grep {1 .*folded into instructions}
 ; Increment in loop bb.128.i adjusted to 2, to prevent loop reversal from
 ; kicking in.
diff --git a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
index 0d43a6e73f87..debb46121698 100644
--- a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
+++ b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats |& \
 ; RUN:   grep {1 .*folded into instructions}
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 4
+; RUN: llc < %s -march=x86 | grep cmp | count 4
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
index cb7a3dcd33cb..ca995cc3f65e 100644
--- a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
+++ b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
+; RUN: llc < %s -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
 
 	%struct.__sbuf = type { i8*, i32 }
 	%struct.ggBRDF = type { i32 (...)** }
diff --git a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
index 8ad77051bedd..455de91d30ab 100644
--- a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
+++ b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu
 ; PR1799
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 6309f3c51052..265d96854851 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
index fddfd4f3a486..7aec613e2abb 100644
--- a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
+++ b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 8a1520c1fe41..b040095195c8 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep pushf
+; RUN: llc < %s -march=x86 | not grep pushf
 
 	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
 
diff --git a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
index 962d6ecc24e9..6997d535ff92 100644
--- a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
+++ b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o - | grep sinl
+; RUN: llc < %s -o - | grep sinl
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 38020c1e3ea8..d795610607ee 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -regalloc=local
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local
 
 define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
 entry:
diff --git a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
index 4feb078671fb..e91f52ef0569 100644
--- a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
+++ b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep IMPLICIT_DEF
+; RUN: llc < %s -march=x86 | not grep IMPLICIT_DEF
 
 	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
index 4510edb9d7db..704b2bab4a26 100644
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ b/test/CodeGen/X86/2008-01-16-Trampoline.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
 
diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
index ffb82ae7f2b8..b936686798f0 100644
--- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep nop
+; RUN: llc < %s -march=x86 | grep nop
 target triple = "i686-apple-darwin8"
 
 
diff --git a/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
index 6885cf14cf11..443a32de3b42 100644
--- a/test/CodeGen/X86/2008-02-05-ISelCrash.ll
+++ b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1975
 
 @nodes = external global i64		; <i64*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index 6db6537aed26..d2d5149de3aa 100644
--- a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xor | grep CPI
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xor | grep CPI
 
 define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
index 230af57fea8c..b772d77f6405 100644
--- a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep andpd | not grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
 
 declare double @llvm.sqrt.f64(double) nounwind readnone 
 
diff --git a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
index 5bf84560a37c..1983f1d19c6f 100644
--- a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
+++ b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep and
+; RUN: llc < %s -march=x86 | grep and
 define i32 @test(i1 %A) {
 	%B = zext i1 %A to i32		; <i32> [#uses=1]
 	%C = sub i32 0, %B		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 47c8677d385b..9b52c5c06990 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
 ; PR1909
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 557d00c62937..5115e48365fc 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {a:} | not grep ax
-; RUN: llvm-as < %s | llc | grep {b:} | not grep ax
+; RUN: llc < %s | grep {a:} | not grep ax
+; RUN: llc < %s | grep {b:} | not grep ax
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index 8cf36425f22d..6b1eefe5750a 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -regalloc=local -march=x86 -mattr=+mmx | grep esi
+; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi
 ; PR2082
 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
 ; registers.
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index f78d52651ded..8d6bb0df1f6d 100644
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
 ; rdar://5761454
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
index ff7cf5e94e25..1d31859f46cc 100644
--- a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
+++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mattr=+sse2
 ; PR2076
 
 define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
diff --git a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
index 5d60bde85614..6615b8c62075 100644
--- a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.XX = type <{ i8 }>
 	%struct.YY = type { i64 }
diff --git a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
index 3ba31f4ad900..0b4eb3a3b9b2 100644
--- a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
+++ b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index fe0ee8a8faaf..ad7950ccd8e3 100644
--- a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	%struct.CompAtom = type <{ %struct.Position, float, i32 }>
 	%struct.Lattice = type { %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-02-27-PEICrash.ll b/test/CodeGen/X86/2008-02-27-PEICrash.ll
index 055eabb43a63..d842967561ab 100644
--- a/test/CodeGen/X86/2008-02-27-PEICrash.ll
+++ b/test/CodeGen/X86/2008-02-27-PEICrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define i64 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
index 2d7182e733fb..70a83b5c9f57 100644
--- a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
+++ b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=i386
 ; PR2122
 define float @func(float %a, float %b) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-07-APIntBug.ll b/test/CodeGen/X86/2008-03-07-APIntBug.ll
index 5d1ccad745ad..84e4827d0416 100644
--- a/test/CodeGen/X86/2008-03-07-APIntBug.ll
+++ b/test/CodeGen/X86/2008-03-07-APIntBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | not grep 255
+; RUN: llc < %s -march=x86 -mcpu=i386 | not grep 255
 
 	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
 	%struct.FIRST_UNION = type { %struct.anon }
diff --git a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
index 10989885f0f1..cd2d609b5356 100644
--- a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
+++ b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
 ; PR2134
 
 declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind 
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index 0f83b399ad7c..e673d315a435 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep TLSGD | count 2
+; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
 ; PR2137
 
 ; ModuleID = '1.c'
diff --git a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
index 4a896e9f33e1..c6ba22ea3da6 100644
--- a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
+++ b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i16 @t(i32 %depth) signext nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 544c9b5819ec..8946415108f4 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
 ; PR2138
 
 	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
index 4b6758d6833c..ccc4d754c1f5 100644
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
 
 	%struct..0objc_object = type opaque
 	%struct.OhBoy = type {  }
diff --git a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
index 2fad32a36c3f..eaa883c963f2 100644
--- a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 6cf731b0e9b7..4dc3a10f4647 100644
--- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
index 53bb054795ec..2d868e0f612a 100644
--- a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
+++ b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @t() {
 entry:
diff --git a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
index 83e1d60fcbaf..305968ac3778 100644
--- a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
+++ b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
 ; Don't fold re-materialized load into a two address instruction
 
 	%"struct.Smarts::Runnable" = type { i32 (...)**, i32 }
diff --git a/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
index fff75ff660a7..a9f368b6eaa5 100644
--- a/test/CodeGen/X86/2008-04-02-unnamedEH.ll
+++ b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc | grep unnamed_1_0.eh
-; ModuleID = '<stdin>'
+; RUN: llc < %s | grep unnamed_1.eh
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index f5de113b9ea9..dc8c097efc50 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 
 define i32 @t2() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
index fea54c4e5ecf..41fbdd19f2b2 100644
--- a/test/CodeGen/X86/2008-04-09-BranchFolding.ll
+++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep jmp
+; RUN: llc < %s -march=x86 | not grep jmp
 
 	%struct..0anon = type { i32 }
 	%struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* }
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 4bb8c6d27a71..83eb61aed433 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
 
 	%struct.CGPoint = type { double, double }
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
index 30accad5863b..3ccc0fe16340 100644
--- a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @Hubba(i8* %saveunder, i32 %firstBlob, i32 %select) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 3e0662aed88d..6e8891bfd5b8 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
 
 	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
 	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index c69ff332c2c5..ac482850b831 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
 ; Make sure xorl operands are 32-bit registers.
 
 	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
index 09fdc707b854..6389267aa4e8 100644
--- a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
+++ b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep 120
+; RUN: llc < %s -march=x86 | not grep 120
 ; Don't accidentally add the offset twice for trailing bytes.
 
 	%struct.S63 = type { [63 x i8] }
diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 838c2ea57987..4eaca17c8861 100644
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mattr=+sse41
+; RUN: llc < %s -mattr=+sse41
 ; rdar://5886601
 ; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
index 82721a53b8b4..38d6aa6d172a 100644
--- a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
+++ b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {1 \$2 3}
+; RUN: llc < %s | grep {1 \$2 3}
 ; rdar://5720231
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index f93ad9ae7151..5b97eb71cbfd 100644
--- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl > %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
 ; RUN: not grep {r\[abcd\]x} %t
 ; RUN: not grep {r\[ds\]i} %t
 ; RUN: not grep {r\[bs\]p} %t
diff --git a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
index 6613fafcce82..6e8e98d865bd 100644
--- a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
+++ b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i64 @t(i64 %maxIdleDuration) nounwind  {
 	call void asm sideeffect "wrmsr", "{cx},A,~{dirflag},~{fpsr},~{flags}"( i32 416, i64 0 ) nounwind 
diff --git a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
index d7b5f25de6c9..a708224dd0d9 100644
--- a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
+++ b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86 | grep jnp
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
 ; rdar://5902801
 
 declare void @test2()
diff --git a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
index c0b196113137..cea0076076d6 100644
--- a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
+++ b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	%struct.V = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x i32>, float*, float*, float*, float*, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
index 9bcd1f374dd6..5ceb5464d2b0 100644
--- a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
+++ b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define fastcc void @glgVectorFloatConversion() nounwind  {
 	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 8751328249d5..1f95a2409fe7 100644
--- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep abort | count 1
+; RUN: llc < %s | grep abort | count 1
 ; Calls to abort should all be merged
 
 ; ModuleID = '5898899.c'
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index 9ecd5814de4f..9cf50f4bfc58 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0 -fast-isel=false | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index c9e30d8f80a2..19a73543c65e 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movups | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
 
 define void @a(<4 x float>* %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
index 68f6ccea4ee6..32bf8d494165 100644
--- a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR2289
 
 define void @_ada_ca11001() {
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 02db2ed93cd2..f1a19ec147a8 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
index d28276141689..236b7cd6121f 100644
--- a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
+++ b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
 
 	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
 	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 0cde7cf269ea..90af3870bd44 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movsd
-; RUN: llvm-as < %s | llc -march=x86 | grep movw
-; RUN: llvm-as < %s | llc -march=x86 | grep addw
+; RUN: llc < %s -march=x86 | not grep movsd
+; RUN: llc < %s -march=x86 | grep movw
+; RUN: llc < %s -march=x86 | grep addw
 ; These transforms are turned off for volatile loads and stores.
 ; Check that they weren't turned off for all loads and stores!
 
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 2b64212dfb87..500cd1f08cfa 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
 @atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
index 75513b665a0b..4d4819ab05d5 100644
--- a/test/CodeGen/X86/2008-06-16-SubregsBug.ll
+++ b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 4
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
 
 define i16 @test(i16* %tmp179) nounwind  {
 	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
diff --git a/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
index ba0a1f90ab92..66f9065799e5 100644
--- a/test/CodeGen/X86/2008-06-18-BadShuffle.ll
+++ b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
 
 ; Test to make sure we actually insert the bottom element of the vector
 define <8 x i16> @a(<8 x i16> %a) nounwind  {
diff --git a/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
index f369986fbcea..72d190758f8d 100644
--- a/test/CodeGen/X86/2008-06-25-VecISelBug.ll
+++ b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep pslldq
 
 define void @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
index 3586f87776a3..46341fc87103 100644
--- a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
+++ b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 
 	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
 	%struct.res_state = type { i32, i32, i32, i32, float*, float*, i32, i32 }
diff --git a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
index 5fb3e5780b94..1a786ef7a90f 100644
--- a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
+++ b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep ax
+; RUN: llc < %s | grep ax
 ; PR2024
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
index 5b94a351cff9..ff2b05fb08eb 100644
--- a/test/CodeGen/X86/2008-07-11-SHLBy1.ll
+++ b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -o - | not grep shr
+; RUN: llc < %s -march=x86-64 -o - | not grep shr
 define i128 @sl(i128 %x) {
         %t = shl i128 %x, 1
         ret i128 %t
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index 1d9463886501..f75e605168ec 100644
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -1,7 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static -disable-fp-elim |\
-; RUN:   %prcontext 65534 1 | grep movl | count 1
+; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim | FileCheck %s
 ; PR2536
 
+
+; CHECK: movw %cx
+; CHECK-NEXT: andl    $65534, %
+; CHECK-NEXT: movl %
+; CHECK-NEXT: movl $17
+
 @g_5 = external global i16		; <i16*> [#uses=2]
 @g_107 = external global i16		; <i16*> [#uses=1]
 @g_229 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
index aa9ee507f80c..f56604b75bd7 100644
--- a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
 	%struct.SV = type { i8*, i64, i64 }
 @"\01LC25" = external constant [8 x i8]		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index ae30385e13e6..98919ee5221a 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
 ; PR2539
 
 external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
diff --git a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
index a18564f4f979..0f6714579bcc 100644
--- a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
+++ b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2566
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-23-VSetCC.ll b/test/CodeGen/X86/2008-07-23-VSetCC.ll
index da6c089c460f..684ca5c89fd2 100644
--- a/test/CodeGen/X86/2008-07-23-VSetCC.ll
+++ b/test/CodeGen/X86/2008-07-23-VSetCC.ll
@@ -1,11 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium
+; RUN: llc < %s -march=x86 -mcpu=pentium
 ; PR2575
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind  {
 	br i1 false, label %bb.nph, label %._crit_edge
 
 bb.nph:		; preds = %bb.nph, %0
-	vicmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+	%X = icmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+        sext <4 x i1> %X to <4 x i32>
 	extractelement <4 x i32> %1, i32 3		; <i32>:2 [#uses=1]
 	lshr i32 %2, 31		; <i32>:3 [#uses=1]
 	trunc i32 %3 to i1		; <i1>:4 [#uses=1]
@@ -27,4 +28,5 @@ bb.nph:		; preds = %bb.nph, %0
 	ret void
 }
 
+
 declare float @fmaxf(float, float)
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 2ebbe6ea5226..1d166f488158 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 56
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 59
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
index 9371c2a6383b..4428035cc827 100644
--- a/test/CodeGen/X86/2008-08-06-RewriterBug.ll
+++ b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2596
 
 @data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
diff --git a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
index b09211d9efe0..32f6ca0ce086 100644
--- a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
+++ b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movzbl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
 
 define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
index 00bcdf82e8dd..8475e8d354e5 100644
--- a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
+++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xadd
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 @var = external global i64		; <i64*> [#uses=1]
 
 define i32 @main() nounwind {
 entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
 	tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 )		; <i64>:0 [#uses=0]
 	unreachable
 }
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index 2c6828bbd0aa..c76dd7de1256 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
 	%struct.QBasicAtomic = type { i32 }
diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
index 4e3533287dbc..eacb4a51c215 100644
--- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
+++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
@@ -1,7 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movd | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movd | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq
 ; PR2677
 
+
 	%struct.Bigint = type { %struct.Bigint*, i32, i32, i32, i32, [1 x i32] }
 
 define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index f793b524e61f..101b3c5cfdbb 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mcpu=core2 | grep pxor | count 2
-; RUN: llvm-as < %s | llc -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movapd
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index e22b647a13f0..b92c789a30c7 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,6 +1,6 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llvm-as < %s | llc | grep %ebp | count 9
-; RUN: llvm-as < %s | llc | grep %ecx | count 5
+; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ecx | count 5
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i386-pc-linux"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 7d01824400c8..00ab73569c4b 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,6 +1,6 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llvm-as < %s | llc | grep %rbp | count 7
-; RUN: llvm-as < %s | llc | grep %rcx | count 3
+; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rcx | count 3
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index ffe10d439bc7..60be0d51e7e7 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
 ; PR2687
 
 define <2 x double> @a(<2 x i32> %x) nounwind {
diff --git a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
index 30a2b15c8dea..b3312d9464d1 100644
--- a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
+++ b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; PR2757
 
 @g_3 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
index 02dd04dc133c..108f24307ea9 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2783
 
 @g_15 = external global i16		; <i16*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 94033449114f..534f99033372 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2748
 
 @g_73 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index ed8d345aad3d..74429c382e71 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %eax, %eax"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %edx, %edx"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%eax), %eax"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%edx), %edx"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %eax, %eax"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %edx, %edx"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
 
 ; %0 must not be put in EAX or EDX.
 ; In the first asm, $0 and $2 must not be put in EAX.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 62e3233f9b3a..f5bd307139d6 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
+; RUN: llc < %s -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
+; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
 ; operand.  There are many combinations that work; this is what llc puts out now.
diff --git a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
index 47feb83c9272..a8f2912a70af 100644
--- a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
+++ b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; PR2808
 
 @g_3 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
index d103f144e284..c92a8f463571 100644
--- a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
+++ b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movs | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fld | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movs | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
 ; check 'inreg' attribute for sse_regparm
 
 define double @foo1() inreg nounwind {
diff --git a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
index b1f5ab590717..f1ada28bcfcb 100644
--- a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
+++ b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 
 	%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i32, i32, i32, [18 x i8] }
 	%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i32, i32, [3 x i32] }
diff --git a/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
index d4da01a508fd..c36cf39fb341 100644
--- a/test/CodeGen/X86/2008-09-29-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
 
 	%struct..0objc_selector = type opaque
 	%struct.NSString = type opaque
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 4f6eb59773fb..935c4c55f046 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movz
+; RUN: llc < %s -march=x86 | not grep movz
 ; PR2835
 
 @g_407 = internal global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
index e74280cd3a17..b48c4adaa26c 100644
--- a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
+++ b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ;; This version includes 64-bit version of binary operators (in 32-bit mode).
 ;; Swap, cmp-and-swap not supported yet in this mode.
 ; ModuleID = 'Atomics.c'
diff --git a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
index bd1ad59797ab..7f7b1a436d24 100644
--- a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
+++ b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
 ; PR2850
 
 @tmp_V2i = common global <2 x i32> zeroinitializer		; <<2 x i32>*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
index 837aad530407..a135cd497876 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
@@ -1,7 +1,7 @@
 ; ModuleID = 'nan.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
 ; This NaN should be shortened to a double (not a float).
 
 declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
index d2e9b457517e..bd48105f129a 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
@@ -1,7 +1,7 @@
 ; ModuleID = 'nan.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
 ; it is not safe to shorten any of these NaNs.
 
 declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
diff --git a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
index 48089861bc32..bc5761288c9b 100644
--- a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
+++ b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
 
 define <4 x float> @f(float %w) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-11-CallCrash.ll b/test/CodeGen/X86/2008-10-11-CallCrash.ll
index 979b7875fec6..efc6125cfc2d 100644
--- a/test/CodeGen/X86/2008-10-11-CallCrash.ll
+++ b/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2735
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
index 608372e5a890..4d3f8c2071b5 100644
--- a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2775
 
 define i32 @func_77(i8 zeroext %p_79) nounwind {
diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
index 4318f1d28c72..b8ca364d1798 100644
--- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edx}
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
 
 	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXDAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
index e1dc7b6bb27c..de4c1e70b8d8 100644
--- a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
+++ b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2762
 define void @foo(<4 x i32>* %p, <4 x double>* %q) {
   %n = load <4 x i32>* %p
diff --git a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
index eb2ec3760b9e..b2e6061ff91c 100644
--- a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
+++ b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @test(i64 %x) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
index 33e8c49277f4..353d1c75216b 100644
--- a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
+++ b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 ; from gcc.c-torture/compile/920520-1.c
 
diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
index d6ae05e3798e..421b931ecd5a 100644
--- a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
+++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
 
 define void @f(float %wt) {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index ad13b8528372..afeb358da572 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
 
 define fastcc void @fourn(double* %data, i32 %isign) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index d8b0e706d2ab..784bc72f42e9 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -2,8 +2,8 @@
 ; Until it does, we shouldn't use movaps to access the stack.  On targets with
 ; sufficiently aligned stack (e.g. darwin) we should.
 
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
 
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
index 41776b2a38e9..7ad94f149e1f 100644
--- a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
+++ b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2977
 define i8* @ap_php_conv_p2(){
 entry:
diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
index 36a054a3e6f8..507799b7304f 100644
--- a/test/CodeGen/X86/2008-11-03-F80VAARG.ll
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o - | not grep 10
+; RUN: llc < %s -march=x86 -o - | not grep 10
 
 declare void @llvm.va_start(i8*) nounwind
 
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index 7acc7cad3cfd..f8f317c2dd46 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep testb
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
index 7487548e820b..1dc97fc52a46 100644
--- a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
+++ b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu
 ; PR 1779
 ; Using 'A' constraint and a tied constraint together used to crash.
 ; ModuleID = '<stdin>'
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
index fe1870e1d84c..2e114ab5ae88 100644
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
@@ -1,5 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
-; ModuleID = '<stdin>'
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
index faf7cd4b2204..7c811afa51d3 100644
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
@@ -1,5 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551
-; ModuleID = '<stdin>'
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
index 6c26b6818e4f..6dca141639e4 100644
--- a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
+++ b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
index 81b25da8a8de..d96d806388c9 100644
--- a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
+++ b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR3124
 
         %struct.cpuinfo_x86 = type { i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, [9 x i32], [16 x i8], [64 x i8], i32, i32, i32, i64, %struct.cpumask_t, i16, i16, i16, i16, i16, i16, i16, i16, i32 }
diff --git a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
index ca5a80ccd82b..1f8bd45da14d 100644
--- a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
+++ b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea
 ; The inner loop should use [reg] addressing, not [reg+reg] addressing.
 ; rdar://6403965
 
diff --git a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
index 01e0f7eb81de..4b72cb919ffa 100644
--- a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
+++ b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3117
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
index 48bb4e438328..fe5bff3e3459 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
index ba7dfbbcecc1..4cb1b42693b9 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
index 5fb639d5fc33..d5a676a7dbba 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep sub | grep -v subsections | count 1
+; RUN: llc < %s -march=x86 | grep add | count 2
+; RUN: llc < %s -march=x86 | grep sub | grep -v subsections | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; this should be rearranged to have two +s and one -
diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
index b6b5cbda4bd1..7fd2e6f2948f 100644
--- a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
+; RUN: llc < %s -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
 
 	%struct.XXActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
new file mode 100644
index 000000000000..e97b63db14d9
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i686-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
index 46b70188c8fe..6c70c5ba5322 100644
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ b/test/CodeGen/X86/2008-12-16-BadShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep shrl
+; RUN: llc < %s | not grep shrl
 ; Note: this test is really trying to make sure that the shift
 ; returns the right result; shrl is most likely wrong,
 ; but if CodeGen starts legitimately using an shrl here,
diff --git a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
index 193d290e33ff..3080d0855727 100644
--- a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
+++ b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index c7fdfb269207..13a9080c1401 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,7 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 2 | grep mov
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
+; CHECK:         ## InlineAsm End
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT:    movl	%esi, %eax
+
+
 @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
diff --git a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
index 24be521842f0..75773e0959c2 100644
--- a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
+++ b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; -(-a) - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
index e53a91ec3a11..2edcaea80ce7 100644
--- a/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
 
 @X = external global [0 x i32]
 
diff --git a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
index 13cb9db8eeb8..bae928336baa 100644
--- a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
+++ b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 4
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 4
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
index 7c800d4e287c..27a7113ffd56 100644
--- a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
 ; PR3311
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
index ecf71f64cf99..9c71469b5b20 100644
--- a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
 
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
diff --git a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
index ff20dc1e3004..99bef6ce3fc9 100644
--- a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
+++ b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; rdar://6501631
 
 	%CF = type { %Register }
diff --git a/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
index 340608af35a8..2eab5f1773ac 100644
--- a/test/CodeGen/X86/2009-01-16-UIntToFP.ll
+++ b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
index 8857df38926d..f895336491e2 100644
--- a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
+++ b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6505632
 ; reduced from 483.xalancbmk
 
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index b12e4137dbd5..0583ef190919 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
 ; PR3402
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-01-26-WrongCheck.ll b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
index db9dbb67def4..117ff47657f4 100644
--- a/test/CodeGen/X86/2009-01-26-WrongCheck.ll
+++ b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -enable-legalize-types-checking
 ; PR3393
 
 define void @foo(i32 inreg %x) {
diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
index b0c27d8903e7..8684f4a19ca4 100644
--- a/test/CodeGen/X86/2009-01-27-NullStrings.ll
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -1,38 +1,7 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep {\\.cstring} | count 1
-	%struct.A = type {  }
-	%struct.NSString = type opaque
-	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
-	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
-	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16 }
-	%struct.objc_object = type opaque
-	%struct.objc_selector = type opaque
-@"\01L_unnamed_cfstring_0" = internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([1 x i8]* @"\01LC", i32 0, i32 0), i32 0 }, section "__DATA, __cfstring"		; <%struct.__builtin_CFString*> [#uses=1]
-@__CFConstantStringClassReference = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
-@"\01LC" = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_SELECTOR_REFERENCES_0" = internal global %struct.objc_selector* bitcast ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4		; <%struct.objc_selector**> [#uses=2]
-@"\01L_OBJC_SYMBOLS" = internal global %struct._objc_symtab zeroinitializer, section "__OBJC,__symbols,regular,no_dead_strip", align 4		; <%struct._objc_symtab*> [#uses=2]
-@"\01L_OBJC_METH_VAR_NAME_0" = internal global [6 x i8] c"bork:\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[6 x i8]*> [#uses=2]
-@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
-@"\01L_OBJC_CLASS_NAME_0" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), %struct._objc_symtab* @"\01L_OBJC_SYMBOLS" }, section "__OBJC,__module_info,regular,no_dead_strip", align 4		; <%struct._objc_module*> [#uses=1]
-@llvm.used = appending global [6 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast (%struct._objc_symtab* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata"		; <[6 x i8*]*> [#uses=0]
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; CHECK: .section __TEXT,__cstring,cstring_literals
 
-define void @func(%struct.A* %a) nounwind {
-entry:
-	%a_addr = alloca %struct.A*		; <%struct.A**> [#uses=2]
-	%a.0 = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store %struct.A* %a, %struct.A** %a_addr
-	%0 = load %struct.A** %a_addr, align 4		; <%struct.A*> [#uses=1]
-	%1 = bitcast %struct.A* %0 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
-	store %struct.objc_object* %1, %struct.objc_object** %a.0, align 4
-	%2 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
-	%3 = load %struct.objc_object** %a.0, align 4		; <%struct.objc_object*> [#uses=1]
-	call void bitcast (%struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* @objc_msgSend to void (%struct.objc_object*, %struct.objc_selector*, %struct.NSString*)*)(%struct.objc_object* %3, %struct.objc_selector* %2, %struct.NSString* bitcast (%struct.__builtin_CFString* @"\01L_unnamed_cfstring_0" to %struct.NSString*)) nounwind
-	br label %return
+@x = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 
-return:		; preds = %entry
-	ret void
-}
+@y = global [1 x i8]* @x
 
-declare %struct.objc_object* @objc_msgSend(%struct.objc_object*, %struct.objc_selector*, ...)
diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
index b7f37c9d3102..ce3ea828ec0c 100644
--- a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
 ; rdar://6538384
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/2009-01-31-BigShift.ll b/test/CodeGen/X86/2009-01-31-BigShift.ll
index 360b4f0e46bf..4eb0ec1485b7 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 ; PR3401
 
 define void @x(i288 %i) nounwind {
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 2b5b18957830..9d240844afba 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {mov.*56}
+; RUN: llc < %s -march=x86 | grep {mov.*56}
 ; PR3449
 
 define void @test(<8 x double>* %P, i64* %Q) nounwind {
diff --git a/test/CodeGen/X86/2009-01-31-BigShift3.ll b/test/CodeGen/X86/2009-01-31-BigShift3.ll
index c92c86a092a1..1b531e370437 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift3.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3450
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-01-LargeMask.ll b/test/CodeGen/X86/2009-02-01-LargeMask.ll
index f2a964f208ce..c4042e6c9c68 100644
--- a/test/CodeGen/X86/2009-02-01-LargeMask.ll
+++ b/test/CodeGen/X86/2009-02-01-LargeMask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3453
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
index 5f97ee7a70cd..e75af13a600b 100644
--- a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
+++ b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3411
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
index 1f29bdbe37eb..6ba046a80c22 100644
--- a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
+++ b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep { - 92}
+; RUN: llc < %s | grep { - 92}
 ; PR3481
 ; The offset should print as -92, not +17179869092
 
diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
index 39cad73d4c09..0ffa8fdc30dd 100644
--- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
 
 define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/2009-02-07-CoalescerBug.ll b/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
index 784c97a22619..2d0bbe607279 100644
--- a/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -stats |& grep {Number of valno def marked dead} | grep 1
+; RUN: llc < %s -march=x86 -relocation-model=pic -stats |& grep {Number of valno def marked dead} | grep 1
 ; rdar://6566708
 
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
index cd30c1e7e40e..908cc08991d8 100644
--- a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3486
 
 define i32 @foo(i8 signext %p_26) nounwind {
diff --git a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
index 7b73a86a72ec..1284b0d1b7b2 100644
--- a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
+++ b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3537
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index b0c4449610ac..72c7ee93a9d2 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s
+; RUN: llc < %s -march=x86-64
 ; PR3538
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
index ddd15f7c81e8..2e148ad6b18c 100644
--- a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
+++ b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {\$-81920} | count 3
-; RUN: llvm-as < %s | llc -march=x86 | grep {\$4294885376} | count 1
+; RUN: llc < %s -march=x86 | grep {\$-81920} | count 3
+; RUN: llc < %s -march=x86 | grep {\$4294885376} | count 1
 
 ; ModuleID = 'shant.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
index 1d10319e86d9..4f8a5e7b3e30 100644
--- a/test/CodeGen/X86/2009-02-12-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-apple-darwin8
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8
 ; PR3561
 
 define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
diff --git a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
index 54fcd430e98c..58a7f9fb7593 100644
--- a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
+++ b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
 
 define i32 @main() nounwind {
 bb4.i.thread:
diff --git a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
index a6bb7b8615fd..b3dd13c50f92 100644
--- a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
+++ b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep weak | count 3
+; RUN: llc < %s | grep weak | count 3
 ; PR3629
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 3dbfa80e00e6..7ea699833ba8 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
 ; rdar://6608609
 
 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 8bf6c23d59db..cb1b1efae3e2 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
 ; rdar://6627786
 
 target triple = "x86_64-apple-darwin10.0"
diff --git a/test/CodeGen/X86/2009-03-03-BTHang.ll b/test/CodeGen/X86/2009-03-03-BTHang.ll
index 0f338d8eadff..bb9592577435 100644
--- a/test/CodeGen/X86/2009-03-03-BTHang.ll
+++ b/test/CodeGen/X86/2009-03-03-BTHang.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; rdar://6642541
 
  	%struct.HandleBlock = type { [30 x i32], [990 x i8*], %struct.HandleBlockTrailer }
diff --git a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
index 6f16ced1c674..9deecebe9453 100644
--- a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
+++ b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3686
 ; rdar://6661799
 
diff --git a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
index ccedaae9322d..411a0c92830a 100644
--- a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
+++ b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
index 28302c0f7b0b..39caddcf9342 100644
--- a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
+++ b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 ; This should do a single load into the fp stack for the return, not diddle with xmm registers.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
index d7b5269eaeb9..896c9686cc4e 100644
--- a/test/CodeGen/X86/2009-03-09-APIntCrash.ll
+++ b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3763
 	%struct.__block_descriptor = type { i64, i64 }
 
diff --git a/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
index 2ccd7714233e..4224210e58f0 100644
--- a/test/CodeGen/X86/2009-03-09-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
 ; PR3706
 
 define void @__mulxc3(x86_fp80 %b) nounwind {
diff --git a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
index 3d979e9d7397..90dff8878a78 100644
--- a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; rdar://r6661945
 
 	%struct.WINDOW = type { i16, i16, i16, i16, i16, i16, i16, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.ldat*, i16, i16, i32, i32, %struct.WINDOW*, %struct.pdat, i16, %struct.cchar_t }
diff --git a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
index 1f5631764b57..d5ba93e10495 100644
--- a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
 
 @lookupTable5B = external global [64 x i32], align 32		; <[64 x i32]*> [#uses=1]
 @lookupTable3B = external global [16 x i32], align 32		; <[16 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index ec060e4ef4a5..3564f01a7c43 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
 ; rdar://6668548
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index b01556de4828..878fa51d5dc3 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -A 2 {call.*f} | grep movl
+; RUN: llc < %s -march=x86 | grep -A 2 {call.*f} | grep movl
 ; Check the register copy comes after the call to f and before the call to g
 ; PR3784
 
diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index 091aab41d291..adbd241cd98f 100644
--- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel
+; RUN: llc < %s -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel
 ; Check that register copies in the landing pad come after the EH_LABEL
 
 declare i32 @f()
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
index 09782a26fec9..80e7639e7c29 100644
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
+; RUN: llc < %s -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
 ; rdar://6682365
 
 ; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
diff --git a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
index b5298aee3065..06dfdc0c767f 100644
--- a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -O0
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0
 
 define fastcc void @optimize_bit_field() nounwind {
 bb4:
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index b30d41eb05ba..b5873bae5f05 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep {%rsp} %t
 ; RUN: not grep {%rbp} %t
diff --git a/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
index 0619e1203968..e542325b6369 100644
--- a/test/CodeGen/X86/2009-03-23-i80-fp80.ll
+++ b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep 302245289961712575840256
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep K40018000000000000000
+; RUN: opt < %s -instcombine -S | grep 302245289961712575840256
+; RUN: opt < %s -instcombine -S | grep K40018000000000000000
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index 2c330db713e8..f40fddc5a36d 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t -f
+; RUN: llc < %s -march=x86 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep shr %t
 ; rdar://6661955
diff --git a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
index 0e31942e468d..f4864793ba2f 100644
--- a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
+++ b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -no-implicit-float
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
-define double @t(double %x) nounwind ssp {
+define double @t(double %x) nounwind ssp noimplicitfloat {
 entry:
 	br i1 false, label %return, label %bb3
 
diff --git a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
index 1d4d2b67783c..97bbd93f83f1 100644
--- a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; rdar://6774324
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index bf1c8df377db..27f11cf6bc6e 100644
--- a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llc < %s -fast-isel
 ; radr://6772169
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10"
diff --git a/test/CodeGen/X86/2009-04-12-picrel.ll b/test/CodeGen/X86/2009-04-12-picrel.ll
index 73062ab6263e..f1942801c7af 100644
--- a/test/CodeGen/X86/2009-04-12-picrel.ll
+++ b/test/CodeGen/X86/2009-04-12-picrel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
 ; RUN: grep leaq %t | count 1
 
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
index d6f4b9444b59..ff8cf0ac229e 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; rdar://6781755
 ; PR3934
 
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
index 7f94c6ca947e..4362ba437541 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6781755
 ; PR3934
 
diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index 0d66f6984fe2..bfa3eaa565df 100644
--- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
 ; rdar://6787136
 
 	%struct.X = type { i8, [32 x i8] }
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index 3e60f6bbac8e..f46eed4769f7 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
 
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
index 985eb2147247..4d25b0f98319 100644
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
 ; rdar://6802189
 
 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 750dba772142..c6e6e50641c5 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,7 +1,13 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 | \
-; RUN:   %prcontext {14} 2 | grep {(%ebp)} | count 1
+; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 < %s | \
+; RUN:   FileCheck %s
 ; rdar://6808032
 
+; CHECK: pextrw $14
+; CHECK-NEXT: movzbl
+; CHECK-NEXT: (%ebp)
+; CHECK-NEXT: pinsrw
+
 define void @update(i8** %args_list) nounwind {
 entry:
 	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 2835c2decfca..c1ec45fc007e 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
 ; RUN: grep {leal.*TLSGD.*___tls_get_addr} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
 ; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
 ; PR4004
 
diff --git a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
index 981d3277d343..94d3eb21cecc 100644
--- a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 ; rdar://6806252
 
 define i64 @test(i32* %tmp13) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
index b804a5b40a85..7981a52e740a 100644
--- a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR4034
 
 	%struct.BiContextType = type { i16, i8 }
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
index 1b757b16d73e..d77e528fa7c1 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 ; PR4056
 
 define void @int163(i32 %p_4, i32 %p_5) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
index 70cb4ff3c825..f02565403e87 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 ; PR4051
 
 define void @int163(i32 %p_4, i32 %p_5) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
index 0fb000c3a073..0a2fcdbf6c08 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
 ; PR4076
 
 	type { i8, i8, i8 }		; type %0
diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index fc31c0b416d0..a2fd2e4c51c9 100644
--- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl.*%ebx, 8(%esi)}
+; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.0"
 
diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index 767eb3118d96..6843723052c1 100644
--- a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10
+; RUN: llc < %s -mtriple=i386-apple-darwin10
 ; rdar://6837009
 
 	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index c02c045ba5de..d1f9cf83307c 100644
--- a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
 ; PR4099
 
 	type { [62 x %struct.Bitvec*] }		; type %0
diff --git a/test/CodeGen/X86/2009-04-scale.ll b/test/CodeGen/X86/2009-04-scale.ll
index 0766dc79e020..e4c756cfdd44 100644
--- a/test/CodeGen/X86/2009-04-scale.ll
+++ b/test/CodeGen/X86/2009-04-scale.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-unknown-linux-gnu
+; RUN: llc < %s -march=x86 -mtriple=i386-unknown-linux-gnu
 ; PR3995
 
         %struct.vtable = type { i32 (...)** }
diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
index 284c6e250d79..738b5fbb7048 100644
--- a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static > %t
+; RUN: llc < %s -relocation-model=static > %t
 ; RUN: grep "1: ._pv_cpu_ops+8" %t
 ; RUN: grep "2: ._G" %t
 ; PR4152
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index 817872598eaf..a5e28c074867 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4188
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
index 42bf9e991e6e..6e062fb25089 100644
--- a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
+++ b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3886
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
diff --git a/test/CodeGen/X86/2009-05-23-available_externally.ll b/test/CodeGen/X86/2009-05-23-available_externally.ll
index f4881bab45cf..94773d91ea17 100644
--- a/test/CodeGen/X86/2009-05-23-available_externally.ll
+++ b/test/CodeGen/X86/2009-05-23-available_externally.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep atoi | grep PLT
+; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
 ; PR4253
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 6f2bef4fca10..8a0b244a23fa 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep -E {sar|shl|mov|or} | count 4
+; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4
 ; Check that the shr(shl X, 56), 48) is not mistakenly turned into
 ; a shr (X, -8) that gets subsequently "optimized away" as undef
 ; PR4254
diff --git a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
index 7bdfcb31035c..2fd42f40d891 100644
--- a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
+++ b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.tempsym_t = type { i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
index 373f91f06f61..af552d4ce20d 100644
--- a/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
 
 define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
index ea33b16f823f..779f9857de7f 100644
--- a/test/CodeGen/X86/2009-06-02-RewriterBug.ll
+++ b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
 ; PR4225
 
 define void @sha256_block1(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c628b8affdd9..e6f3008c2476 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
+; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
 target triple = "x86_64-mingw64"
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 33d797297be8..cb64bf22c981 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -o %t1 -f
-; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
-; RUN: grep "movaps	\\\%xmm8, \\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps	\\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
+; RUN: llc < %s -o %t1
+; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
+; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
 target triple = "x86_64-mingw64"
 
 define i32 @a() nounwind {
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index fa90fa9426d6..9415732de025 100644
--- a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	type { %struct.GAP }		; type %0
 	type { i16, i8, i8 }		; type %1
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 94df530ec0e6..336f17e2a325 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 220423aa986a..5c514805e485 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
 ; RUN: grep movzwl %t1 | count 2
 ; RUN: grep movzbl %t1 | count 2
 ; RUN: grep movd %t1 | count 4
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
index 2e3f195ff947..8bb3dc63a3b9 100644
--- a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
index 589a8800ede7..e361804d61ba 100644
--- a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
+++ b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; RUN: llc < %s -march=x86 -mattr=+sse
 ; PR2598
 
 define <2 x float> @a(<2 x i32> %i) nounwind {
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
index a46fd1a2e76f..92419fcb8b81 100644
--- a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index c3687a533e05..07ef53e09d8e 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep movl | count 2
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
index 001b7fc5a4af..673e936e2178 100644
--- a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
+++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
 
 ; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
 ; calling convention out of sync with standard c calling convention on x86_64)
diff --git a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
index 095e6a103621..feb578098cae 100644
--- a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
+++ b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | not grep TAILCALL 
+; RUN: llc < %s -march=x86 -tailcallopt | not grep TAILCALL 
 
 ; Bug 4396. This tail call can NOT be optimized.
 
diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
index d6ff5b6803e3..228cd48119e3 100644
--- a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
 ; PR2484
 
 define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
diff --git a/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
new file mode 100644
index 000000000000..fcc71aef23ae
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=x86 -mtriple=x86_64-unknown-freebsd7.2
+; PR4478
+
+	%struct.sockaddr = type <{ i8, i8, [14 x i8] }>
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %sw.bb6, %entry
+	switch i32 undef, label %sw.default [
+		i32 -1, label %while.end
+		i32 119, label %sw.bb6
+	]
+
+sw.bb6:		; preds = %while.cond
+	br i1 undef, label %if.then, label %while.cond
+
+if.then:		; preds = %sw.bb6
+	ret i32 1
+
+sw.default:		; preds = %while.cond
+	ret i32 1
+
+while.end:		; preds = %while.cond
+	br i1 undef, label %if.then15, label %if.end16
+
+if.then15:		; preds = %while.end
+	ret i32 1
+
+if.end16:		; preds = %while.end
+	br i1 undef, label %lor.lhs.false, label %if.then21
+
+lor.lhs.false:		; preds = %if.end16
+	br i1 undef, label %if.end22, label %if.then21
+
+if.then21:		; preds = %lor.lhs.false, %if.end16
+	ret i32 1
+
+if.end22:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false27, label %if.then51
+
+lor.lhs.false27:		; preds = %if.end22
+	br i1 undef, label %lor.lhs.false39, label %if.then51
+
+lor.lhs.false39:		; preds = %lor.lhs.false27
+	br i1 undef, label %if.end52, label %if.then51
+
+if.then51:		; preds = %lor.lhs.false39, %lor.lhs.false27, %if.end22
+	ret i32 1
+
+if.end52:		; preds = %lor.lhs.false39
+	br i1 undef, label %if.then57, label %if.end58
+
+if.then57:		; preds = %if.end52
+	ret i32 1
+
+if.end58:		; preds = %if.end52
+	br i1 undef, label %if.then64, label %if.end65
+
+if.then64:		; preds = %if.end58
+	ret i32 1
+
+if.end65:		; preds = %if.end58
+	br i1 undef, label %if.then71, label %if.end72
+
+if.then71:		; preds = %if.end65
+	ret i32 1
+
+if.end72:		; preds = %if.end65
+	br i1 undef, label %if.then83, label %if.end84
+
+if.then83:		; preds = %if.end72
+	ret i32 1
+
+if.end84:		; preds = %if.end72
+	br i1 undef, label %if.then101, label %if.end102
+
+if.then101:		; preds = %if.end84
+	ret i32 1
+
+if.end102:		; preds = %if.end84
+	br i1 undef, label %if.then113, label %if.end114
+
+if.then113:		; preds = %if.end102
+	ret i32 1
+
+if.end114:		; preds = %if.end102
+	br i1 undef, label %if.then209, label %if.end210
+
+if.then209:		; preds = %if.end114
+	ret i32 1
+
+if.end210:		; preds = %if.end114
+	br i1 undef, label %if.then219, label %if.end220
+
+if.then219:		; preds = %if.end210
+	ret i32 1
+
+if.end220:		; preds = %if.end210
+	br i1 undef, label %if.end243, label %lor.lhs.false230
+
+lor.lhs.false230:		; preds = %if.end220
+	unreachable
+
+if.end243:		; preds = %if.end220
+	br i1 undef, label %if.then249, label %if.end250
+
+if.then249:		; preds = %if.end243
+	ret i32 1
+
+if.end250:		; preds = %if.end243
+	br i1 undef, label %if.end261, label %if.then260
+
+if.then260:		; preds = %if.end250
+	ret i32 1
+
+if.end261:		; preds = %if.end250
+	br i1 undef, label %if.then270, label %if.end271
+
+if.then270:		; preds = %if.end261
+	ret i32 1
+
+if.end271:		; preds = %if.end261
+	%call.i = call i32 @arc4random() nounwind		; <i32> [#uses=1]
+	%rem.i = urem i32 %call.i, 16383		; <i32> [#uses=1]
+	%rem1.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	%conv2.i = or i16 %rem1.i, -16384		; <i16> [#uses=1]
+	%0 = call i16 asm "xchgb ${0:h}, ${0:b}", "=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv2.i) nounwind		; <i16> [#uses=1]
+	store i16 %0, i16* undef
+	%call281 = call i32 @bind(i32 undef, %struct.sockaddr* undef, i32 16) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @bind(i32, %struct.sockaddr*, i32)
+
+declare i32 @arc4random()
diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
new file mode 100644
index 000000000000..eb9378b9527b
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -disable-mmx
+
+define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
+       %D = icmp sgt <2 x i32> %A, %B
+       %E = zext <2 x i1> %D to <2 x i32>
+       store <2 x i32> %E, <2 x i32>* %C
+       ret void
+}
diff --git a/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
new file mode 100644
index 000000000000..0fdfdcb8a30a
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; PR3037
+
+define void @entry(<4 x i8>* %dest) {
+	%1 = xor <4 x i1> zeroinitializer, < i1 true, i1 true, i1 true, i1 true >
+	%2 = extractelement <4 x i1> %1, i32 3
+	%3 = zext i1 %2 to i8
+	%4 = insertelement <4 x i8> zeroinitializer, i8 %3, i32 3
+	store <4 x i8> %4, <4 x i8>* %dest, align 1
+	ret void
+}
diff --git a/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
new file mode 100644
index 000000000000..eabaf775edef
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
@@ -0,0 +1,958 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+	%struct.ANY = type { i8* }
+	%struct.AV = type { %struct.XPVAV*, i32, i32 }
+	%struct.CLONE_PARAMS = type { %struct.AV*, i64, %struct.PerlInterpreter* }
+	%struct.CV = type { %struct.XPVCV*, i32, i32 }
+	%struct.DIR = type { i32, i64, i64, i8*, i32, i64, i64, i32, %struct.__darwin_pthread_mutex_t, %struct._telldir* }
+	%struct.GP = type { %struct.SV*, i32, %struct.io*, %struct.CV*, %struct.AV*, %struct.HV*, %struct.GV*, %struct.CV*, i32, i32, i32, i8* }
+	%struct.GV = type { %struct.XPVGV*, i32, i32 }
+	%struct.HE = type { %struct.HE*, %struct.HEK*, %struct.SV* }
+	%struct.HEK = type { i32, i32, [1 x i8] }
+	%struct.HV = type { %struct.XPVHV*, i32, i32 }
+	%struct.MAGIC = type { %struct.MAGIC*, %struct.MGVTBL*, i16, i8, i8, %struct.SV*, i8*, i32 }
+	%struct.MGVTBL = type { i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*, %struct.SV*, i8*, i32)*, i32 (%struct.MAGIC*, %struct.CLONE_PARAMS*)* }
+	%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8 }
+	%struct.PMOP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8, %struct.OP*, %struct.OP*, %struct.OP*, %struct.OP*, %struct.PMOP*, %struct.REGEXP*, i32, i32, i8, %struct.HV* }
+	%struct.PerlIO_funcs = type { i64, i8*, i64, i32, i64 (%struct.PerlIOl**, i8*, %struct.SV*, %struct.PerlIO_funcs*)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIO_funcs*, %struct.PerlIO_list_t*, i64, i8*, i32, i32, i32, %struct.PerlIOl**, i32, %struct.SV**)*, i64 (%struct.PerlIOl**)*, %struct.SV* (%struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIOl**, %struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i64, i32)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**, i8*, i64)* }
+	%struct.PerlIO_list_t = type { i64, i64, i64, %struct.PerlIO_pair_t* }
+	%struct.PerlIO_pair_t = type { %struct.PerlIO_funcs*, %struct.SV* }
+	%struct.PerlIOl = type { %struct.PerlIOl*, %struct.PerlIO_funcs*, i32 }
+	%struct.PerlInterpreter = type { i8 }
+	%struct.REGEXP = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
+	%struct.SV = type { i8*, i32, i32 }
+	%struct.XPVAV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.SV**, %struct.SV*, i8 }
+	%struct.XPVCV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.HV*, %struct.OP*, %struct.OP*, void (%struct.CV*)*, %struct.ANY, %struct.GV*, i8*, i64, %struct.AV*, %struct.CV*, i16, i32 }
+	%struct.XPVGV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.GP*, i8*, i64, %struct.HV*, i8 }
+	%struct.XPVHV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, i32, %struct.HE*, %struct.PMOP*, i8* }
+	%struct.XPVIO = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.PerlIOl**, %struct.PerlIOl**, %struct.anon, i64, i64, i64, i64, i8*, %struct.GV*, i8*, %struct.GV*, i8*, %struct.GV*, i16, i8, i8 }
+	%struct.__darwin_pthread_mutex_t = type { i64, [56 x i8] }
+	%struct._telldir = type opaque
+	%struct.anon = type { %struct.DIR* }
+	%struct.io = type { %struct.XPVIO*, i32, i32 }
+	%struct.reg_data = type { i32, i8*, [1 x i8*] }
+	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
+	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
+	%struct.regnode = type { i8, i8, i16 }
+
+define i32 @Perl_yylex() nounwind ssp {
+entry:
+	br i1 undef, label %bb21, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb21:		; preds = %entry
+	switch i32 undef, label %bb103 [
+		i32 1, label %bb101
+		i32 4, label %bb75
+		i32 6, label %bb68
+		i32 7, label %bb67
+		i32 8, label %bb25
+	]
+
+bb25:		; preds = %bb21
+	ret i32 41
+
+bb67:		; preds = %bb21
+	ret i32 40
+
+bb68:		; preds = %bb21
+	br i1 undef, label %bb69, label %bb70
+
+bb69:		; preds = %bb68
+	ret i32 undef
+
+bb70:		; preds = %bb68
+	unreachable
+
+bb75:		; preds = %bb21
+	unreachable
+
+bb101:		; preds = %bb21
+	unreachable
+
+bb103:		; preds = %bb21
+	switch i32 undef, label %bb104 [
+		i32 0, label %bb126
+		i32 4, label %fake_eof
+		i32 26, label %fake_eof
+		i32 34, label %bb1423
+		i32 36, label %bb1050
+		i32 37, label %bb534
+		i32 39, label %bb1412
+		i32 41, label %bb643
+		i32 44, label %bb544
+		i32 48, label %bb1406
+		i32 49, label %bb1406
+		i32 50, label %bb1406
+		i32 51, label %bb1406
+		i32 52, label %bb1406
+		i32 53, label %bb1406
+		i32 54, label %bb1406
+		i32 55, label %bb1406
+		i32 56, label %bb1406
+		i32 57, label %bb1406
+		i32 59, label %bb639
+		i32 65, label %keylookup
+		i32 66, label %keylookup
+		i32 67, label %keylookup
+		i32 68, label %keylookup
+		i32 69, label %keylookup
+		i32 70, label %keylookup
+		i32 71, label %keylookup
+		i32 72, label %keylookup
+		i32 73, label %keylookup
+		i32 74, label %keylookup
+		i32 75, label %keylookup
+		i32 76, label %keylookup
+		i32 77, label %keylookup
+		i32 78, label %keylookup
+		i32 79, label %keylookup
+		i32 80, label %keylookup
+		i32 81, label %keylookup
+		i32 82, label %keylookup
+		i32 83, label %keylookup
+		i32 84, label %keylookup
+		i32 85, label %keylookup
+		i32 86, label %keylookup
+		i32 87, label %keylookup
+		i32 88, label %keylookup
+		i32 89, label %keylookup
+		i32 90, label %keylookup
+		i32 92, label %bb1455
+		i32 95, label %keylookup
+		i32 96, label %bb1447
+		i32 97, label %keylookup
+		i32 98, label %keylookup
+		i32 99, label %keylookup
+		i32 100, label %keylookup
+		i32 101, label %keylookup
+		i32 102, label %keylookup
+		i32 103, label %keylookup
+		i32 104, label %keylookup
+		i32 105, label %keylookup
+		i32 106, label %keylookup
+		i32 107, label %keylookup
+		i32 108, label %keylookup
+		i32 109, label %keylookup
+		i32 110, label %keylookup
+		i32 111, label %keylookup
+		i32 112, label %keylookup
+		i32 113, label %keylookup
+		i32 114, label %keylookup
+		i32 115, label %keylookup
+		i32 116, label %keylookup
+		i32 117, label %keylookup
+		i32 118, label %keylookup
+		i32 119, label %keylookup
+		i32 120, label %keylookup
+		i32 121, label %keylookup
+		i32 122, label %keylookup
+		i32 126, label %bb544
+	]
+
+bb104:		; preds = %bb103
+	unreachable
+
+bb126:		; preds = %bb103
+	ret i32 0
+
+fake_eof:		; preds = %bb1841, %bb103, %bb103
+	unreachable
+
+bb534:		; preds = %bb103
+	unreachable
+
+bb544:		; preds = %bb103, %bb103
+	ret i32 undef
+
+bb639:		; preds = %bb103
+	unreachable
+
+bb643:		; preds = %bb103
+	unreachable
+
+bb1050:		; preds = %bb103
+	unreachable
+
+bb1406:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	unreachable
+
+bb1412:		; preds = %bb103
+	unreachable
+
+bb1423:		; preds = %bb103
+	unreachable
+
+bb1447:		; preds = %bb103
+	unreachable
+
+bb1455:		; preds = %bb103
+	unreachable
+
+keylookup:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	br i1 undef, label %bb1498, label %bb1496
+
+bb1496:		; preds = %keylookup
+	br i1 undef, label %bb1498, label %bb1510.preheader
+
+bb1498:		; preds = %bb1496, %keylookup
+	unreachable
+
+bb1510.preheader:		; preds = %bb1496
+	br i1 undef, label %bb1511, label %bb1518
+
+bb1511:		; preds = %bb1510.preheader
+	br label %bb1518
+
+bb1518:		; preds = %bb1511, %bb1510.preheader
+	switch i32 undef, label %bb741.i4285 [
+		i32 95, label %bb744.i4287
+		i32 115, label %bb852.i4394
+	]
+
+bb741.i4285:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb744.i4287:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb852.i4394:		; preds = %bb1518
+	br i1 undef, label %bb861.i4404, label %bb856.i4399
+
+bb856.i4399:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+bb861.i4404:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+Perl_keyword.exit4735:		; preds = %bb861.i4404, %bb856.i4399, %bb744.i4287, %bb741.i4285
+	br i1 undef, label %bb1544, label %reserved_word
+
+bb1544:		; preds = %Perl_keyword.exit4735
+	br i1 undef, label %bb1565, label %bb1545
+
+bb1545:		; preds = %bb1544
+	br i1 undef, label %bb1563, label %bb1558
+
+bb1558:		; preds = %bb1545
+	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
+	br i1 undef, label %bb1563, label %bb1559
+
+bb1559:		; preds = %bb1558
+	br i1 undef, label %bb1560, label %bb1563
+
+bb1560:		; preds = %bb1559
+	br i1 undef, label %bb1563, label %bb1561
+
+bb1561:		; preds = %bb1560
+	br i1 undef, label %bb1562, label %bb1563
+
+bb1562:		; preds = %bb1561
+	br label %bb1563
+
+bb1563:		; preds = %bb1562, %bb1561, %bb1560, %bb1559, %bb1558, %bb1545
+	%gv19.3 = phi %struct.GV* [ %1, %bb1562 ], [ undef, %bb1545 ], [ %1, %bb1558 ], [ %1, %bb1559 ], [ %1, %bb1560 ], [ %1, %bb1561 ]		; <%struct.GV*> [#uses=0]
+	br i1 undef, label %bb1565, label %reserved_word
+
+bb1565:		; preds = %bb1563, %bb1544
+	br i1 undef, label %bb1573, label %bb1580
+
+bb1573:		; preds = %bb1565
+	br label %bb1580
+
+bb1580:		; preds = %bb1573, %bb1565
+	br i1 undef, label %bb1595, label %reserved_word
+
+bb1595:		; preds = %bb1580
+	br i1 undef, label %reserved_word, label %bb1597
+
+bb1597:		; preds = %bb1595
+	br i1 undef, label %reserved_word, label %bb1602
+
+bb1602:		; preds = %bb1597
+	br label %reserved_word
+
+reserved_word:		; preds = %bb1602, %bb1597, %bb1595, %bb1580, %bb1563, %Perl_keyword.exit4735
+	switch i32 undef, label %bb2012 [
+		i32 1, label %bb1819
+		i32 2, label %bb1830
+		i32 4, label %bb1841
+		i32 5, label %bb1841
+		i32 8, label %bb1880
+		i32 14, label %bb1894
+		i32 16, label %bb1895
+		i32 17, label %bb1896
+		i32 18, label %bb1897
+		i32 19, label %bb1898
+		i32 20, label %bb1899
+		i32 22, label %bb1906
+		i32 23, label %bb1928
+		i32 24, label %bb2555
+		i32 26, label %bb1929
+		i32 31, label %bb1921
+		i32 32, label %bb1930
+		i32 33, label %bb1905
+		i32 34, label %bb1936
+		i32 35, label %bb1927
+		i32 37, label %bb1962
+		i32 40, label %bb1951
+		i32 41, label %bb1946
+		i32 42, label %bb1968
+		i32 44, label %bb1969
+		i32 45, label %bb1970
+		i32 46, label %bb2011
+		i32 47, label %bb2006
+		i32 48, label %bb2007
+		i32 49, label %bb2009
+		i32 50, label %bb2010
+		i32 51, label %bb2008
+		i32 53, label %bb1971
+		i32 54, label %bb1982
+		i32 55, label %bb2005
+		i32 59, label %bb2081
+		i32 61, label %bb2087
+		i32 64, label %bb2080
+		i32 65, label %really_sub
+		i32 66, label %bb2079
+		i32 67, label %bb2089
+		i32 69, label %bb2155
+		i32 72, label %bb2137
+		i32 74, label %bb2138
+		i32 75, label %bb2166
+		i32 76, label %bb2144
+		i32 78, label %bb2145
+		i32 81, label %bb2102
+		i32 82, label %bb2108
+		i32 84, label %bb2114
+		i32 85, label %bb2115
+		i32 86, label %bb2116
+		i32 89, label %bb2146
+		i32 90, label %bb2147
+		i32 91, label %bb2148
+		i32 93, label %bb2154
+		i32 94, label %bb2167
+		i32 96, label %bb2091
+		i32 97, label %bb2090
+		i32 98, label %bb2088
+		i32 100, label %bb2173
+		i32 101, label %bb2174
+		i32 102, label %bb2175
+		i32 103, label %bb2180
+		i32 104, label %bb2181
+		i32 106, label %bb2187
+		i32 107, label %bb2188
+		i32 110, label %bb2206
+		i32 112, label %bb2217
+		i32 113, label %bb2218
+		i32 114, label %bb2199
+		i32 119, label %bb2205
+		i32 120, label %bb2229
+		i32 121, label %bb2233
+		i32 122, label %bb2234
+		i32 123, label %bb2235
+		i32 124, label %bb2236
+		i32 125, label %bb2237
+		i32 126, label %bb2238
+		i32 127, label %bb2239
+		i32 128, label %bb2268
+		i32 129, label %bb2267
+		i32 133, label %bb2276
+		i32 134, label %bb2348
+		i32 135, label %bb2337
+		i32 137, label %bb2239
+		i32 138, label %bb2367
+		i32 139, label %bb2368
+		i32 140, label %bb2369
+		i32 141, label %bb2357
+		i32 143, label %bb2349
+		i32 144, label %bb2350
+		i32 146, label %bb2356
+		i32 147, label %bb2370
+		i32 148, label %bb2445
+		i32 149, label %bb2453
+		i32 151, label %bb2381
+		i32 152, label %bb2457
+		i32 154, label %bb2516
+		i32 156, label %bb2522
+		i32 158, label %bb2527
+		i32 159, label %bb2537
+		i32 160, label %bb2503
+		i32 162, label %bb2504
+		i32 163, label %bb2464
+		i32 165, label %bb2463
+		i32 166, label %bb2538
+		i32 168, label %bb2515
+		i32 170, label %bb2549
+		i32 172, label %bb2566
+		i32 173, label %bb2595
+		i32 174, label %bb2565
+		i32 175, label %bb2567
+		i32 176, label %bb2568
+		i32 177, label %bb2569
+		i32 178, label %bb2570
+		i32 179, label %bb2594
+		i32 182, label %bb2571
+		i32 183, label %bb2572
+		i32 185, label %bb2593
+		i32 186, label %bb2583
+		i32 187, label %bb2596
+		i32 189, label %bb2602
+		i32 190, label %bb2603
+		i32 191, label %bb2604
+		i32 192, label %bb2605
+		i32 193, label %bb2606
+		i32 196, label %bb2617
+		i32 197, label %bb2618
+		i32 198, label %bb2619
+		i32 199, label %bb2627
+		i32 200, label %bb2625
+		i32 201, label %bb2626
+		i32 206, label %really_sub
+		i32 207, label %bb2648
+		i32 208, label %bb2738
+		i32 209, label %bb2739
+		i32 210, label %bb2740
+		i32 211, label %bb2742
+		i32 212, label %bb2741
+		i32 213, label %bb2737
+		i32 214, label %bb2743
+		i32 217, label %bb2758
+		i32 219, label %bb2764
+		i32 220, label %bb2765
+		i32 221, label %bb2744
+		i32 222, label %bb2766
+		i32 226, label %bb2785
+		i32 227, label %bb2783
+		i32 228, label %bb2784
+		i32 229, label %bb2790
+		i32 230, label %bb2797
+		i32 232, label %bb2782
+		i32 234, label %bb2791
+		i32 236, label %bb2815
+		i32 237, label %bb2818
+		i32 238, label %bb2819
+		i32 239, label %bb2820
+		i32 240, label %bb2817
+		i32 241, label %bb2816
+		i32 242, label %bb2821
+		i32 243, label %bb2826
+		i32 244, label %bb2829
+		i32 245, label %bb2830
+	]
+
+bb1819:		; preds = %reserved_word
+	unreachable
+
+bb1830:		; preds = %reserved_word
+	unreachable
+
+bb1841:		; preds = %reserved_word, %reserved_word
+	br i1 undef, label %fake_eof, label %bb1842
+
+bb1842:		; preds = %bb1841
+	unreachable
+
+bb1880:		; preds = %reserved_word
+	unreachable
+
+bb1894:		; preds = %reserved_word
+	ret i32 undef
+
+bb1895:		; preds = %reserved_word
+	ret i32 301
+
+bb1896:		; preds = %reserved_word
+	ret i32 undef
+
+bb1897:		; preds = %reserved_word
+	ret i32 undef
+
+bb1898:		; preds = %reserved_word
+	ret i32 undef
+
+bb1899:		; preds = %reserved_word
+	ret i32 undef
+
+bb1905:		; preds = %reserved_word
+	ret i32 278
+
+bb1906:		; preds = %reserved_word
+	unreachable
+
+bb1921:		; preds = %reserved_word
+	ret i32 288
+
+bb1927:		; preds = %reserved_word
+	ret i32 undef
+
+bb1928:		; preds = %reserved_word
+	ret i32 undef
+
+bb1929:		; preds = %reserved_word
+	ret i32 undef
+
+bb1930:		; preds = %reserved_word
+	ret i32 undef
+
+bb1936:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1937
+
+bb1937:		; preds = %bb1936
+	ret i32 undef
+
+bb1946:		; preds = %reserved_word
+	unreachable
+
+bb1951:		; preds = %reserved_word
+	ret i32 undef
+
+bb1962:		; preds = %reserved_word
+	ret i32 undef
+
+bb1968:		; preds = %reserved_word
+	ret i32 280
+
+bb1969:		; preds = %reserved_word
+	ret i32 276
+
+bb1970:		; preds = %reserved_word
+	ret i32 277
+
+bb1971:		; preds = %reserved_word
+	ret i32 288
+
+bb1982:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1986
+
+bb1986:		; preds = %bb1982
+	ret i32 undef
+
+bb2005:		; preds = %reserved_word
+	ret i32 undef
+
+bb2006:		; preds = %reserved_word
+	ret i32 282
+
+bb2007:		; preds = %reserved_word
+	ret i32 282
+
+bb2008:		; preds = %reserved_word
+	ret i32 282
+
+bb2009:		; preds = %reserved_word
+	ret i32 282
+
+bb2010:		; preds = %reserved_word
+	ret i32 282
+
+bb2011:		; preds = %reserved_word
+	ret i32 282
+
+bb2012:		; preds = %reserved_word
+	unreachable
+
+bb2079:		; preds = %reserved_word
+	ret i32 undef
+
+bb2080:		; preds = %reserved_word
+	ret i32 282
+
+bb2081:		; preds = %reserved_word
+	ret i32 undef
+
+bb2087:		; preds = %reserved_word
+	ret i32 undef
+
+bb2088:		; preds = %reserved_word
+	ret i32 287
+
+bb2089:		; preds = %reserved_word
+	ret i32 287
+
+bb2090:		; preds = %reserved_word
+	ret i32 undef
+
+bb2091:		; preds = %reserved_word
+	ret i32 280
+
+bb2102:		; preds = %reserved_word
+	ret i32 282
+
+bb2108:		; preds = %reserved_word
+	ret i32 undef
+
+bb2114:		; preds = %reserved_word
+	ret i32 undef
+
+bb2115:		; preds = %reserved_word
+	ret i32 282
+
+bb2116:		; preds = %reserved_word
+	ret i32 282
+
+bb2137:		; preds = %reserved_word
+	ret i32 undef
+
+bb2138:		; preds = %reserved_word
+	ret i32 282
+
+bb2144:		; preds = %reserved_word
+	ret i32 undef
+
+bb2145:		; preds = %reserved_word
+	ret i32 282
+
+bb2146:		; preds = %reserved_word
+	ret i32 undef
+
+bb2147:		; preds = %reserved_word
+	ret i32 undef
+
+bb2148:		; preds = %reserved_word
+	ret i32 282
+
+bb2154:		; preds = %reserved_word
+	ret i32 undef
+
+bb2155:		; preds = %reserved_word
+	ret i32 282
+
+bb2166:		; preds = %reserved_word
+	ret i32 282
+
+bb2167:		; preds = %reserved_word
+	ret i32 undef
+
+bb2173:		; preds = %reserved_word
+	ret i32 274
+
+bb2174:		; preds = %reserved_word
+	ret i32 undef
+
+bb2175:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2176
+
+bb2176:		; preds = %bb2175
+	ret i32 undef
+
+bb2180:		; preds = %reserved_word
+	ret i32 undef
+
+bb2181:		; preds = %reserved_word
+	ret i32 undef
+
+bb2187:		; preds = %reserved_word
+	ret i32 undef
+
+bb2188:		; preds = %reserved_word
+	ret i32 280
+
+bb2199:		; preds = %reserved_word
+	ret i32 295
+
+bb2205:		; preds = %reserved_word
+	ret i32 287
+
+bb2206:		; preds = %reserved_word
+	ret i32 287
+
+bb2217:		; preds = %reserved_word
+	ret i32 undef
+
+bb2218:		; preds = %reserved_word
+	ret i32 undef
+
+bb2229:		; preds = %reserved_word
+	unreachable
+
+bb2233:		; preds = %reserved_word
+	ret i32 undef
+
+bb2234:		; preds = %reserved_word
+	ret i32 undef
+
+bb2235:		; preds = %reserved_word
+	ret i32 undef
+
+bb2236:		; preds = %reserved_word
+	ret i32 undef
+
+bb2237:		; preds = %reserved_word
+	ret i32 undef
+
+bb2238:		; preds = %reserved_word
+	ret i32 undef
+
+bb2239:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2267:		; preds = %reserved_word
+	ret i32 280
+
+bb2268:		; preds = %reserved_word
+	ret i32 288
+
+bb2276:		; preds = %reserved_word
+	unreachable
+
+bb2337:		; preds = %reserved_word
+	ret i32 300
+
+bb2348:		; preds = %reserved_word
+	ret i32 undef
+
+bb2349:		; preds = %reserved_word
+	ret i32 undef
+
+bb2350:		; preds = %reserved_word
+	ret i32 undef
+
+bb2356:		; preds = %reserved_word
+	ret i32 undef
+
+bb2357:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2358
+
+bb2358:		; preds = %bb2357
+	ret i32 undef
+
+bb2367:		; preds = %reserved_word
+	ret i32 undef
+
+bb2368:		; preds = %reserved_word
+	ret i32 270
+
+bb2369:		; preds = %reserved_word
+	ret i32 undef
+
+bb2370:		; preds = %reserved_word
+	unreachable
+
+bb2381:		; preds = %reserved_word
+	unreachable
+
+bb2445:		; preds = %reserved_word
+	unreachable
+
+bb2453:		; preds = %reserved_word
+	unreachable
+
+bb2457:		; preds = %reserved_word
+	unreachable
+
+bb2463:		; preds = %reserved_word
+	ret i32 286
+
+bb2464:		; preds = %reserved_word
+	unreachable
+
+bb2503:		; preds = %reserved_word
+	ret i32 280
+
+bb2504:		; preds = %reserved_word
+	ret i32 undef
+
+bb2515:		; preds = %reserved_word
+	ret i32 undef
+
+bb2516:		; preds = %reserved_word
+	ret i32 undef
+
+bb2522:		; preds = %reserved_word
+	unreachable
+
+bb2527:		; preds = %reserved_word
+	unreachable
+
+bb2537:		; preds = %reserved_word
+	ret i32 undef
+
+bb2538:		; preds = %reserved_word
+	ret i32 undef
+
+bb2549:		; preds = %reserved_word
+	unreachable
+
+bb2555:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2556
+
+bb2556:		; preds = %bb2555
+	ret i32 undef
+
+bb2565:		; preds = %reserved_word
+	ret i32 undef
+
+bb2566:		; preds = %reserved_word
+	ret i32 undef
+
+bb2567:		; preds = %reserved_word
+	ret i32 undef
+
+bb2568:		; preds = %reserved_word
+	ret i32 undef
+
+bb2569:		; preds = %reserved_word
+	ret i32 undef
+
+bb2570:		; preds = %reserved_word
+	ret i32 undef
+
+bb2571:		; preds = %reserved_word
+	ret i32 undef
+
+bb2572:		; preds = %reserved_word
+	ret i32 undef
+
+bb2583:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2584
+
+bb2584:		; preds = %bb2583
+	ret i32 undef
+
+bb2593:		; preds = %reserved_word
+	ret i32 282
+
+bb2594:		; preds = %reserved_word
+	ret i32 282
+
+bb2595:		; preds = %reserved_word
+	ret i32 undef
+
+bb2596:		; preds = %reserved_word
+	ret i32 undef
+
+bb2602:		; preds = %reserved_word
+	ret i32 undef
+
+bb2603:		; preds = %reserved_word
+	ret i32 undef
+
+bb2604:		; preds = %reserved_word
+	ret i32 undef
+
+bb2605:		; preds = %reserved_word
+	ret i32 undef
+
+bb2606:		; preds = %reserved_word
+	ret i32 undef
+
+bb2617:		; preds = %reserved_word
+	ret i32 undef
+
+bb2618:		; preds = %reserved_word
+	ret i32 undef
+
+bb2619:		; preds = %reserved_word
+	unreachable
+
+bb2625:		; preds = %reserved_word
+	ret i32 undef
+
+bb2626:		; preds = %reserved_word
+	ret i32 undef
+
+bb2627:		; preds = %reserved_word
+	ret i32 undef
+
+bb2648:		; preds = %reserved_word
+	ret i32 undef
+
+really_sub:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2737:		; preds = %reserved_word
+	ret i32 undef
+
+bb2738:		; preds = %reserved_word
+	ret i32 undef
+
+bb2739:		; preds = %reserved_word
+	ret i32 undef
+
+bb2740:		; preds = %reserved_word
+	ret i32 undef
+
+bb2741:		; preds = %reserved_word
+	ret i32 undef
+
+bb2742:		; preds = %reserved_word
+	ret i32 undef
+
+bb2743:		; preds = %reserved_word
+	ret i32 undef
+
+bb2744:		; preds = %reserved_word
+	unreachable
+
+bb2758:		; preds = %reserved_word
+	ret i32 undef
+
+bb2764:		; preds = %reserved_word
+	ret i32 282
+
+bb2765:		; preds = %reserved_word
+	ret i32 282
+
+bb2766:		; preds = %reserved_word
+	ret i32 undef
+
+bb2782:		; preds = %reserved_word
+	ret i32 273
+
+bb2783:		; preds = %reserved_word
+	ret i32 275
+
+bb2784:		; preds = %reserved_word
+	ret i32 undef
+
+bb2785:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2786
+
+bb2786:		; preds = %bb2785
+	ret i32 undef
+
+bb2790:		; preds = %reserved_word
+	ret i32 undef
+
+bb2791:		; preds = %reserved_word
+	ret i32 undef
+
+bb2797:		; preds = %reserved_word
+	ret i32 undef
+
+bb2815:		; preds = %reserved_word
+	ret i32 undef
+
+bb2816:		; preds = %reserved_word
+	ret i32 272
+
+bb2817:		; preds = %reserved_word
+	ret i32 undef
+
+bb2818:		; preds = %reserved_word
+	ret i32 282
+
+bb2819:		; preds = %reserved_word
+	ret i32 undef
+
+bb2820:		; preds = %reserved_word
+	ret i32 282
+
+bb2821:		; preds = %reserved_word
+	unreachable
+
+bb2826:		; preds = %reserved_word
+	unreachable
+
+bb2829:		; preds = %reserved_word
+	ret i32 300
+
+bb2830:		; preds = %reserved_word
+	unreachable
+
+bb2834:		; preds = %bb2785, %bb2583, %bb2555, %bb2357, %bb2175, %bb1982, %bb1936
+	ret i32 283
+}
diff --git a/test/CodeGen/X86/2009-07-16-CoalescerBug.ll b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
new file mode 100644
index 000000000000..48af440df2d6
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://7059496
+
+	%struct.brinfo = type <{ %struct.brinfo*, %struct.brinfo*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.cadata = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, %struct.cmatcher*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.cline = type <{ %struct.cline*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i32, %struct.cline*, %struct.cline*, i32, i32 }>
+	%struct.cmatch = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8, i32*, i32*, i8*, i8*, i32, i32, i32, i32, i16, i8, i8, i16, i8, i8 }>
+	%struct.cmatcher = type <{ i32, i8, i8, i8, i8, %struct.cmatcher*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8 }>
+	%struct.cpattern = type <{ %struct.cpattern*, i32, i8, i8, i8, i8, %union.anon }>
+	%struct.patprog = type <{ i64, i64, i64, i64, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%union.anon = type <{ [8 x i8] }>
+
+define i32 @addmatches(%struct.cadata* %dat, i8** nocapture %argv) nounwind ssp {
+entry:
+	br i1 undef, label %if.else, label %if.then91
+
+if.then91:		; preds = %entry
+	br label %if.end96
+
+if.else:		; preds = %entry
+	br label %if.end96
+
+if.end96:		; preds = %if.else, %if.then91
+	br i1 undef, label %lor.lhs.false, label %if.then105
+
+lor.lhs.false:		; preds = %if.end96
+	br i1 undef, label %if.else139, label %if.then105
+
+if.then105:		; preds = %lor.lhs.false, %if.end96
+	unreachable
+
+if.else139:		; preds = %lor.lhs.false
+	br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:		; preds = %if.else139
+	unreachable
+
+land.end:		; preds = %if.else139
+	br i1 undef, label %land.lhs.true285, label %if.then315
+
+land.lhs.true285:		; preds = %land.end
+	br i1 undef, label %if.end324, label %if.then322
+
+if.then315:		; preds = %land.end
+	unreachable
+
+if.then322:		; preds = %land.lhs.true285
+	unreachable
+
+if.end324:		; preds = %land.lhs.true285
+	br i1 undef, label %if.end384, label %if.then358
+
+if.then358:		; preds = %if.end324
+	unreachable
+
+if.end384:		; preds = %if.end324
+	br i1 undef, label %if.end394, label %land.lhs.true387
+
+land.lhs.true387:		; preds = %if.end384
+	unreachable
+
+if.end394:		; preds = %if.end384
+	br i1 undef, label %if.end498, label %land.lhs.true399
+
+land.lhs.true399:		; preds = %if.end394
+	br i1 undef, label %if.end498, label %if.then406
+
+if.then406:		; preds = %land.lhs.true399
+	unreachable
+
+if.end498:		; preds = %land.lhs.true399, %if.end394
+	br i1 undef, label %if.end514, label %if.then503
+
+if.then503:		; preds = %if.end498
+	unreachable
+
+if.end514:		; preds = %if.end498
+	br i1 undef, label %if.end585, label %if.then520
+
+if.then520:		; preds = %if.end514
+	br i1 undef, label %lor.lhs.false547, label %if.then560
+
+lor.lhs.false547:		; preds = %if.then520
+	unreachable
+
+if.then560:		; preds = %if.then520
+	br i1 undef, label %if.end585, label %land.lhs.true566
+
+land.lhs.true566:		; preds = %if.then560
+	br i1 undef, label %if.end585, label %if.then573
+
+if.then573:		; preds = %land.lhs.true566
+	unreachable
+
+if.end585:		; preds = %land.lhs.true566, %if.then560, %if.end514
+	br i1 undef, label %cond.true593, label %cond.false599
+
+cond.true593:		; preds = %if.end585
+	unreachable
+
+cond.false599:		; preds = %if.end585
+	br i1 undef, label %if.end647, label %if.then621
+
+if.then621:		; preds = %cond.false599
+	br i1 undef, label %cond.true624, label %cond.false630
+
+cond.true624:		; preds = %if.then621
+	br label %if.end647
+
+cond.false630:		; preds = %if.then621
+	unreachable
+
+if.end647:		; preds = %cond.true624, %cond.false599
+	br i1 undef, label %if.end723, label %if.then701
+
+if.then701:		; preds = %if.end647
+	br label %if.end723
+
+if.end723:		; preds = %if.then701, %if.end647
+	br i1 undef, label %if.else1090, label %if.then729
+
+if.then729:		; preds = %if.end723
+	br i1 undef, label %if.end887, label %if.then812
+
+if.then812:		; preds = %if.then729
+	unreachable
+
+if.end887:		; preds = %if.then729
+	br i1 undef, label %if.end972, label %if.then893
+
+if.then893:		; preds = %if.end887
+	br i1 undef, label %if.end919, label %if.then903
+
+if.then903:		; preds = %if.then893
+	unreachable
+
+if.end919:		; preds = %if.then893
+	br label %if.end972
+
+if.end972:		; preds = %if.end919, %if.end887
+	%sline.0 = phi %struct.cline* [ undef, %if.end919 ], [ null, %if.end887 ]		; <%struct.cline*> [#uses=5]
+	%bcs.0 = phi i32 [ undef, %if.end919 ], [ 0, %if.end887 ]		; <i32> [#uses=5]
+	br i1 undef, label %if.end1146, label %land.lhs.true975
+
+land.lhs.true975:		; preds = %if.end972
+	br i1 undef, label %if.end1146, label %if.then980
+
+if.then980:		; preds = %land.lhs.true975
+	br i1 undef, label %cond.false1025, label %cond.false1004
+
+cond.false1004:		; preds = %if.then980
+	unreachable
+
+cond.false1025:		; preds = %if.then980
+	br i1 undef, label %if.end1146, label %if.then1071
+
+if.then1071:		; preds = %cond.false1025
+	br i1 undef, label %if.then1074, label %if.end1081
+
+if.then1074:		; preds = %if.then1071
+	br label %if.end1081
+
+if.end1081:		; preds = %if.then1074, %if.then1071
+	%call1083 = call %struct.patprog* @patcompile(i8* undef, i32 0, i8** null) nounwind ssp		; <%struct.patprog*> [#uses=2]
+	br i1 undef, label %if.end1146, label %if.then1086
+
+if.then1086:		; preds = %if.end1081
+	br label %if.end1146
+
+if.else1090:		; preds = %if.end723
+	br i1 undef, label %if.end1146, label %land.lhs.true1093
+
+land.lhs.true1093:		; preds = %if.else1090
+	br i1 undef, label %if.end1146, label %if.then1098
+
+if.then1098:		; preds = %land.lhs.true1093
+	unreachable
+
+if.end1146:		; preds = %land.lhs.true1093, %if.else1090, %if.then1086, %if.end1081, %cond.false1025, %land.lhs.true975, %if.end972
+	%cp.0 = phi %struct.patprog* [ %call1083, %if.then1086 ], [ null, %if.end972 ], [ null, %land.lhs.true975 ], [ null, %cond.false1025 ], [ %call1083, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.patprog*> [#uses=1]
+	%sline.1 = phi %struct.cline* [ %sline.0, %if.then1086 ], [ %sline.0, %if.end972 ], [ %sline.0, %land.lhs.true975 ], [ %sline.0, %cond.false1025 ], [ %sline.0, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.cline*> [#uses=1]
+	%bcs.1 = phi i32 [ %bcs.0, %if.then1086 ], [ %bcs.0, %if.end972 ], [ %bcs.0, %land.lhs.true975 ], [ %bcs.0, %cond.false1025 ], [ %bcs.0, %if.end1081 ], [ 0, %if.else1090 ], [ 0, %land.lhs.true1093 ]		; <i32> [#uses=1]
+	br i1 undef, label %if.end1307, label %do.body1270
+
+do.body1270:		; preds = %if.end1146
+	unreachable
+
+if.end1307:		; preds = %if.end1146
+	br i1 undef, label %if.end1318, label %if.then1312
+
+if.then1312:		; preds = %if.end1307
+	unreachable
+
+if.end1318:		; preds = %if.end1307
+	br i1 undef, label %for.cond1330.preheader, label %if.then1323
+
+if.then1323:		; preds = %if.end1318
+	unreachable
+
+for.cond1330.preheader:		; preds = %if.end1318
+	%call1587 = call i8* @comp_match(i8* undef, i8* undef, i8* undef, %struct.patprog* %cp.0, %struct.cline** undef, i32 0, %struct.brinfo** undef, i32 0, %struct.brinfo** undef, i32 %bcs.1, i32* undef) nounwind ssp		; <i8*> [#uses=0]
+	%call1667 = call %struct.cmatch* @add_match_data(i32 0, i8* undef, i8* undef, %struct.cline* undef, i8* undef, i8* null, i8* undef, i8* undef, i8* undef, i8* undef, %struct.cline* null, i8* undef, %struct.cline* %sline.1, i8* undef, i32 undef, i32 undef) ssp		; <%struct.cmatch*> [#uses=0]
+	unreachable
+}
+
+declare %struct.patprog* @patcompile(i8*, i32, i8**) ssp
+
+declare i8* @comp_match(i8*, i8*, i8*, %struct.patprog*, %struct.cline**, i32, %struct.brinfo**, i32, %struct.brinfo**, i32, i32*) ssp
+
+declare %struct.cmatch* @add_match_data(i32, i8*, i8*, %struct.cline*, i8*, i8*, i8*, i8*, i8*, i8*, %struct.cline*, i8*, %struct.cline*, i8*, i32, i32) nounwind ssp
diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
new file mode 100644
index 000000000000..e21c8923df4a
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK: pavgw LCPI1_4(%rip)
+
+; rdar://7057804
+
+define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp {
+entry:
+	%0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i10 = add <8 x i16> %0, %3		; <<8 x i16>> [#uses=1]
+	%4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone		; <<8 x i16>> [#uses=3]
+	%6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i8 = add <8 x i16> %7, %10		; <<8 x i16>> [#uses=1]
+	%11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=4]
+	%21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%22 = bitcast <8 x i16> %21 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i6 = add <8 x i16> %23, %26		; <<8 x i16>> [#uses=1]
+	%27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i4 = add <8 x i16> %29, %32		; <<8 x i16>> [#uses=1]
+	%33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>		; <<8 x i16>> [#uses=1]
+	%36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i2 = add <8 x i16> %35, %38		; <<8 x i16>> [#uses=1]
+	%39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>		; <<8 x i16>> [#uses=1]
+	%42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i = add <8 x i16> %41, %44		; <<8 x i16>> [#uses=1]
+	%45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%48 = bitcast <8 x i16> %47 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%49 = bitcast <8 x i16> %28 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%50 = getelementptr i16* %out8x8, i64 8		; <i16*> [#uses=1]
+	%51 = bitcast i16* %50 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %49, <2 x i64>* %51, align 16
+	%52 = bitcast <8 x i16> %40 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%53 = getelementptr i16* %out8x8, i64 16		; <i16*> [#uses=1]
+	%54 = bitcast i16* %53 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %52, <2 x i64>* %54, align 16
+	%55 = getelementptr i16* %out8x8, i64 24		; <i16*> [#uses=1]
+	%56 = bitcast i16* %55 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %48, <2 x i64>* %56, align 16
+	%57 = bitcast <8 x i16> %46 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%58 = getelementptr i16* %out8x8, i64 40		; <i16*> [#uses=1]
+	%59 = bitcast i16* %58 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %57, <2 x i64>* %59, align 16
+	%60 = bitcast <8 x i16> %34 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%61 = getelementptr i16* %out8x8, i64 48		; <i16*> [#uses=1]
+	%62 = bitcast i16* %61 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %60, <2 x i64>* %62, align 16
+	%63 = getelementptr i16* %out8x8, i64 56		; <i16*> [#uses=1]
+	%64 = bitcast i16* %63 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %22, <2 x i64>* %64, align 16
+	ret void
+}
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
new file mode 100644
index 000000000000..3e5bd348ecd9
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
+; PR4552
+
+target triple = "i386-pc-linux-gnu"
+@g_8 = internal global i32 0		; <i32*> [#uses=1]
+@g_72 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
+entry:
+	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
+	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %func_40.exit, %entry
+	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
+	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
+	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
+	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %bb
+	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%4 = and i32 %3, 50295		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb2.i.i, label %func_55.exit.i
+
+bb2.i.i:		; preds = %bb.i.i, %bb
+	br label %func_55.exit.i
+
+func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
+	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2.i4.i, label %bb.i3.i
+
+bb.i3.i:		; preds = %func_55.exit.i
+	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
+	%11 = and i32 %10, 50295		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb2.i4.i, label %func_40.exit
+
+bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
+	br label %func_40.exit
+
+func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
+	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
+	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
+	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
+	br label %bb
+}
diff --git a/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
new file mode 100644
index 000000000000..a0095ab2064c
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64
+; PR4583
+
+define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat {
+entry:
+	%0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind		; <i8> [#uses=0]
+	br label %1
+
+; <label>:1		; preds = %entry
+	ret i32 undef
+}
diff --git a/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
new file mode 100644
index 000000000000..e99edd60bd5e
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; PR4587
+; rdar://7072590
+
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+
+define fastcc i32 @regex_compile(i8* %pattern, i64 %size, i64 %syntax, %struct.re_pattern_buffer* nocapture %bufp) nounwind ssp {
+entry:
+	br i1 undef, label %return, label %if.end
+
+if.end:		; preds = %entry
+	%tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3		; <i64*> [#uses=1]
+	store i64 %syntax, i64* %tmp35
+	store i32 undef, i32* undef
+	br i1 undef, label %if.then66, label %if.end102
+
+if.then66:		; preds = %if.end
+	br i1 false, label %if.else, label %if.then70
+
+if.then70:		; preds = %if.then66
+	%call74 = call i8* @xrealloc(i8* undef, i64 32) nounwind ssp		; <i8*> [#uses=0]
+	unreachable
+
+if.else:		; preds = %if.then66
+	br i1 false, label %do.body86, label %if.end99
+
+do.body86:		; preds = %if.else
+	br i1 false, label %do.end, label %if.then90
+
+if.then90:		; preds = %do.body86
+	unreachable
+
+do.end:		; preds = %do.body86
+	ret i32 12
+
+if.end99:		; preds = %if.else
+	br label %if.end102
+
+if.end102:		; preds = %if.end99, %if.end
+	br label %while.body
+
+while.body:		; preds = %if.end1126, %sw.bb532, %while.body, %if.end102
+	%laststart.2 = phi i8* [ null, %if.end102 ], [ %laststart.7.ph, %if.end1126 ], [ %laststart.2, %sw.bb532 ], [ %laststart.2, %while.body ]		; <i8*> [#uses=6]
+	%b.1 = phi i8* [ undef, %if.end102 ], [ %ctg29688, %if.end1126 ], [ %b.1, %sw.bb532 ], [ %b.1, %while.body ]		; <i8*> [#uses=5]
+	br i1 undef, label %while.body, label %if.end127
+
+if.end127:		; preds = %while.body
+	switch i32 undef, label %sw.bb532 [
+		i32 123, label %handle_interval
+		i32 92, label %do.body3527
+	]
+
+sw.bb532:		; preds = %if.end127
+	br i1 undef, label %while.body, label %if.end808
+
+if.end808:		; preds = %sw.bb532
+	br i1 undef, label %while.cond1267.preheader, label %if.then811
+
+while.cond1267.preheader:		; preds = %if.end808
+	br i1 false, label %return, label %if.end1294
+
+if.then811:		; preds = %if.end808
+	%call817 = call fastcc i8* @skip_one_char(i8* %laststart.2) ssp		; <i8*> [#uses=0]
+	br i1 undef, label %cond.end834, label %lor.lhs.false827
+
+lor.lhs.false827:		; preds = %if.then811
+	br label %cond.end834
+
+cond.end834:		; preds = %lor.lhs.false827, %if.then811
+	br i1 undef, label %land.lhs.true838, label %while.cond979.preheader
+
+land.lhs.true838:		; preds = %cond.end834
+	br i1 undef, label %if.then842, label %while.cond979.preheader
+
+if.then842:		; preds = %land.lhs.true838
+	%conv851 = trunc i64 undef to i32		; <i32> [#uses=1]
+	br label %while.cond979.preheader
+
+while.cond979.preheader:		; preds = %if.then842, %land.lhs.true838, %cond.end834
+	%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ]		; <i32> [#uses=2]
+	%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ]		; <i8*> [#uses=3]
+	%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ]		; <i8*> [#uses=3]
+	%ctg29688 = getelementptr i8* %b.4.ph, i64 6		; <i8*> [#uses=1]
+	br label %while.cond979
+
+while.cond979:		; preds = %if.end1006, %while.cond979.preheader
+	%cmp991 = icmp ugt i64 undef, 0		; <i1> [#uses=1]
+	br i1 %cmp991, label %do.body994, label %while.end1088
+
+do.body994:		; preds = %while.cond979
+	br i1 undef, label %return, label %if.end1006
+
+if.end1006:		; preds = %do.body994
+	%cmp1014 = icmp ugt i64 undef, 32768		; <i1> [#uses=1]
+	%storemerge10953 = select i1 %cmp1014, i64 32768, i64 undef		; <i64> [#uses=1]
+	store i64 %storemerge10953, i64* undef
+	br i1 false, label %return, label %while.cond979
+
+while.end1088:		; preds = %while.cond979
+	br i1 undef, label %if.then1091, label %if.else1101
+
+if.then1091:		; preds = %while.end1088
+	store i8 undef, i8* undef
+	%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre		; <i8*> [#uses=1]
+	%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64		; <i64> [#uses=1]
+	br label %if.end1126
+
+if.else1101:		; preds = %while.end1088
+	%cond1109 = select i1 undef, i32 18, i32 14		; <i32> [#uses=1]
+	%idx.ext1112 = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64		; <i64> [#uses=1]
+	call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
+	br label %if.end1126
+
+if.end1126:		; preds = %if.else1101, %if.then1091
+	%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ]		; <i64> [#uses=1]
+	%add.ptr1128 = getelementptr i8* %b.4.ph, i64 3		; <i8*> [#uses=1]
+	%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64		; <i64> [#uses=1]
+	%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136		; <i64> [#uses=1]
+	%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32		; <i32> [#uses=1]
+	%conv1139 = add i32 %sub.ptr.sub11378527, -3		; <i32> [#uses=1]
+	store i8 undef, i8* undef
+	%shr10.i8599 = lshr i32 %conv1139, 8		; <i32> [#uses=1]
+	%conv6.i8600 = trunc i32 %shr10.i8599 to i8		; <i8> [#uses=1]
+	store i8 %conv6.i8600, i8* undef
+	br label %while.body
+
+if.end1294:		; preds = %while.cond1267.preheader
+	ret i32 12
+
+do.body3527:		; preds = %if.end127
+	br i1 undef, label %do.end3536, label %if.then3531
+
+if.then3531:		; preds = %do.body3527
+	unreachable
+
+do.end3536:		; preds = %do.body3527
+	ret i32 5
+
+handle_interval:		; preds = %if.end127
+	br i1 undef, label %do.body4547, label %cond.false4583
+
+do.body4547:		; preds = %handle_interval
+	br i1 undef, label %do.end4556, label %if.then4551
+
+if.then4551:		; preds = %do.body4547
+	unreachable
+
+do.end4556:		; preds = %do.body4547
+	ret i32 9
+
+cond.false4583:		; preds = %handle_interval
+	unreachable
+
+return:		; preds = %if.end1006, %do.body994, %while.cond1267.preheader, %entry
+	ret i32 undef
+}
+
+declare i8* @xrealloc(i8*, i64) ssp
+
+declare fastcc i8* @skip_one_char(i8*) nounwind readonly ssp
+
+declare fastcc void @insert_op1(i32, i8*, i32, i8*) nounwind ssp
diff --git a/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
new file mode 100644
index 000000000000..e83b3a7db592
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+
+@bsBuff = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @bsGetUInt32 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @bsGetUInt32() nounwind ssp {
+entry:
+	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%0 = add i32 0, -8		; <i32> [#uses=1]
+	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
+	%2 = shl i32 %1, 8		; <i32> [#uses=1]
+	br label %bb3.i17
+
+bb3.i9:		; preds = %bb3.i17
+	br i1 false, label %bb2.i16, label %bb1.i15
+
+bb1.i15:		; preds = %bb3.i9
+	unreachable
+
+bb2.i16:		; preds = %bb3.i9
+	br label %bb3.i17
+
+bb3.i17:		; preds = %bb2.i16, %entry
+	br i1 false, label %bb3.i9, label %bsR.exit18
+
+bsR.exit18:		; preds = %bb3.i17
+	%3 = or i32 0, %2		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
new file mode 100644
index 000000000000..b9b09a3f0004
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; PR4669
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+
+define <1 x i64> @test(i64 %t) {
+entry:
+	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+	%t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
+	ret <1 x i64> %t2
+}
diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
new file mode 100644
index 000000000000..b329c9163c9f
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O3
+; PR4626
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = common global i8 0, align 1		; <i8*> [#uses=2]
+
+define signext i8 @safe_mul_func_int16_t_s_s(i32 %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+	%tobool = icmp eq i32 %_si1, 0		; <i1> [#uses=1]
+	%cmp = icmp sgt i8 %_si2, 0		; <i1> [#uses=2]
+	%or.cond = or i1 %cmp, %tobool		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.rhs, label %land.lhs.true3
+
+land.lhs.true3:		; preds = %entry
+	%conv5 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	%cmp7 = icmp slt i32 %conv5, %_si1		; <i1> [#uses=1]
+	br i1 %cmp7, label %cond.end, label %lor.rhs
+
+lor.rhs:		; preds = %land.lhs.true3, %entry
+	%cmp10.not = icmp slt i32 %_si1, 1		; <i1> [#uses=1]
+	%or.cond23 = and i1 %cmp, %cmp10.not		; <i1> [#uses=1]
+	br i1 %or.cond23, label %lor.end, label %cond.false
+
+lor.end:		; preds = %lor.rhs
+	%tobool19 = icmp ne i8 %_si2, 0		; <i1> [#uses=2]
+	%lor.ext = zext i1 %tobool19 to i32		; <i32> [#uses=1]
+	br i1 %tobool19, label %cond.end, label %cond.false
+
+cond.false:		; preds = %lor.end, %lor.rhs
+	%conv21 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	br label %cond.end
+
+cond.end:		; preds = %cond.false, %lor.end, %land.lhs.true3
+	%cond = phi i32 [ %conv21, %cond.false ], [ 1, %land.lhs.true3 ], [ %lor.ext, %lor.end ]		; <i32> [#uses=1]
+	%conv22 = trunc i32 %cond to i8		; <i8> [#uses=1]
+	ret i8 %conv22
+}
+
+define i32 @func_34(i8 signext %p_35) nounwind readonly {
+entry:
+	%tobool = icmp eq i8 %p_35, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:		; preds = %entry
+	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tobool3, label %return, label %if.then
+
+if.then:		; preds = %lor.lhs.false, %entry
+	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
+	ret i32 %conv5
+
+return:		; preds = %lor.lhs.false
+	ret i32 0
+}
+
+define void @foo(i32 %p_5) noreturn nounwind {
+entry:
+	%cmp = icmp sgt i32 %p_5, 0		; <i1> [#uses=2]
+	%call = tail call i32 @safe() nounwind		; <i32> [#uses=1]
+	%conv1 = trunc i32 %call to i8		; <i8> [#uses=3]
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=3]
+	%cmp.i = icmp sgt i8 %conv1, 0		; <i1> [#uses=3]
+	%or.cond.i = or i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:		; preds = %entry
+	%xor = zext i1 %cmp to i32		; <i32> [#uses=1]
+	%conv5.i = sext i8 %conv1 to i32		; <i32> [#uses=1]
+	%cmp7.i = icmp slt i32 %conv5.i, %xor		; <i1> [#uses=1]
+	%cmp7.i.not = xor i1 %cmp7.i, true		; <i1> [#uses=1]
+	%or.cond23.i = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	%or.cond = and i1 %cmp7.i.not, %or.cond23.i		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:		; preds = %entry
+	%or.cond23.i.old = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:		; preds = %lor.rhs.i, %land.lhs.true3.i
+	%tobool19.i = icmp eq i8 %conv1, 0		; <i1> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+	br label %for.inc
+}
+
+declare i32 @safe()
+
+define i32 @func_35(i8 signext %p_35) nounwind readonly {
+entry:
+  %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:                                    ; preds = %entry
+  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool3, label %return, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
+  ret i32 %conv5
+
+return:                                           ; preds = %lor.lhs.false
+  ret i32 0
+}
+
+define void @bar(i32 %p_5) noreturn nounwind {
+entry:
+  %cmp = icmp sgt i32 %p_5, 0                     ; <i1> [#uses=2]
+  %call = tail call i32 @safe() nounwind          ; <i32> [#uses=1]
+  %conv1 = trunc i32 %call to i8                  ; <i8> [#uses=3]
+  %tobool.i = xor i1 %cmp, true                   ; <i1> [#uses=3]
+  %cmp.i = icmp sgt i8 %conv1, 0                  ; <i1> [#uses=3]
+  %or.cond.i = or i1 %cmp.i, %tobool.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:                                 ; preds = %entry
+  %xor = zext i1 %cmp to i32                      ; <i32> [#uses=1]
+  %conv5.i = sext i8 %conv1 to i32                ; <i32> [#uses=1]
+  %cmp7.i = icmp slt i32 %conv5.i, %xor           ; <i1> [#uses=1]
+  %cmp7.i.not = xor i1 %cmp7.i, true              ; <i1> [#uses=1]
+  %or.cond23.i = and i1 %cmp.i, %tobool.i         ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp7.i.not, %or.cond23.i     ; <i1> [#uses=1]
+  br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:                                        ; preds = %entry
+  %or.cond23.i.old = and i1 %cmp.i, %tobool.i     ; <i1> [#uses=1]
+  br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %land.lhs.true3.i
+  %tobool19.i = icmp eq i8 %conv1, 0              ; <i1> [#uses=0]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+  br label %for.inc
+}
+
+declare i32 @safe()
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
new file mode 100644
index 000000000000..cc2f3d824bbe
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR4668
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @x(i32 %qscale) nounwind {
+entry:
+	%temp_block = alloca [64 x i16], align 16		; <[64 x i16]*> [#uses=0]
+	%tmp = call i32 asm sideeffect "xor %edx, %edx", "={dx},~{dirflag},~{fpsr},~{flags}"() nounwind		; <i32> [#uses=1]
+	br i1 undef, label %if.end78, label %if.then28
+
+if.then28:		; preds = %entry
+	br label %if.end78
+
+if.end78:		; preds = %if.then28, %entry
+	%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale		; <i16*> [#uses=1]
+	%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind		; <i32> [#uses=0]
+	ret i32 %level.1
+}
diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
new file mode 100644
index 000000000000..9456d91efaab
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-mingw64 | grep movabsq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define <4 x float> @RecursiveTestFunc1(i8*) {
+EntryBlock:
+	%1 = call <4 x float> inttoptr (i64 5367207198 to <4 x float> (i8*, float, float, float, float)*)(i8* %0, float 8.000000e+00, float 5.000000e+00, float 3.000000e+00, float 4.000000e+00)		; <<4 x float>> [#uses=1]
+	ret <4 x float> %1
+}
diff --git a/test/CodeGen/X86/2009-08-12-badswitch.ll b/test/CodeGen/X86/2009-08-12-badswitch.ll
new file mode 100644
index 000000000000..a94fce04ee01
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-12-badswitch.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s | grep LJT
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+
+declare void @f1() nounwind readnone
+declare void @f2() nounwind readnone
+declare void @f3() nounwind readnone
+declare void @f4() nounwind readnone
+declare void @f5() nounwind readnone
+declare void @f6() nounwind readnone
+declare void @f7() nounwind readnone
+declare void @f8() nounwind readnone
+declare void @f9() nounwind readnone
+declare void @f10() nounwind readnone
+declare void @f11() nounwind readnone
+declare void @f12() nounwind readnone
+declare void @f13() nounwind readnone
+declare void @f14() nounwind readnone
+declare void @f15() nounwind readnone
+declare void @f16() nounwind readnone
+declare void @f17() nounwind readnone
+declare void @f18() nounwind readnone
+declare void @f19() nounwind readnone
+declare void @f20() nounwind readnone
+declare void @f21() nounwind readnone
+declare void @f22() nounwind readnone
+declare void @f23() nounwind readnone
+declare void @f24() nounwind readnone
+declare void @f25() nounwind readnone
+declare void @f26() nounwind readnone
+
+define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
+entry:
+        br label %bb49
+
+bb49:
+	switch i64 %bar, label %RETURN [
+		i64 2, label %RRETURN_2
+		i64 3, label %RRETURN_6
+		i64 4, label %RRETURN_7
+		i64 5, label %RRETURN_14
+		i64 6, label %RRETURN_15
+		i64 7, label %RRETURN_16
+		i64 8, label %RRETURN_17
+		i64 9, label %RRETURN_18
+		i64 10, label %RRETURN_19
+		i64 11, label %RRETURN_20
+		i64 12, label %RRETURN_21
+		i64 13, label %RRETURN_22
+		i64 14, label %RRETURN_24
+		i64 15, label %RRETURN_26
+		i64 16, label %RRETURN_27
+		i64 17, label %RRETURN_28
+		i64 18, label %RRETURN_29
+		i64 19, label %RRETURN_30
+		i64 20, label %RRETURN_31
+		i64 21, label %RRETURN_38
+		i64 22, label %RRETURN_40
+		i64 23, label %RRETURN_42
+		i64 24, label %RRETURN_44
+		i64 25, label %RRETURN_48
+		i64 26, label %RRETURN_52
+		i64 27, label %RRETURN_1
+	]
+
+RETURN:
+        call void @f1()
+        br label %EXIT
+
+RRETURN_2:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_6:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_7:		; preds = %bb49
+        call void @f3()
+        br label %EXIT
+
+RRETURN_14:		; preds = %bb49
+        call void @f4()
+        br label %EXIT
+
+RRETURN_15:		; preds = %bb49
+        call void @f5()
+        br label %EXIT
+
+RRETURN_16:		; preds = %bb49
+        call void @f6()
+        br label %EXIT
+
+RRETURN_17:		; preds = %bb49
+        call void @f7()
+        br label %EXIT
+
+RRETURN_18:		; preds = %bb49
+        call void @f8()
+        br label %EXIT
+
+RRETURN_19:		; preds = %bb49
+        call void @f9()
+        br label %EXIT
+
+RRETURN_20:		; preds = %bb49
+        call void @f10()
+        br label %EXIT
+
+RRETURN_21:		; preds = %bb49
+        call void @f11()
+        br label %EXIT
+
+RRETURN_22:		; preds = %bb49
+        call void @f12()
+        br label %EXIT
+
+RRETURN_24:		; preds = %bb49
+        call void @f13()
+        br label %EXIT
+
+RRETURN_26:		; preds = %bb49
+        call void @f14()
+        br label %EXIT
+
+RRETURN_27:		; preds = %bb49
+        call void @f15()
+        br label %EXIT
+
+RRETURN_28:		; preds = %bb49
+        call void @f16()
+        br label %EXIT
+
+RRETURN_29:		; preds = %bb49
+        call void @f17()
+        br label %EXIT
+
+RRETURN_30:		; preds = %bb49
+        call void @f18()
+        br label %EXIT
+
+RRETURN_31:		; preds = %bb49
+        call void @f19()
+        br label %EXIT
+
+RRETURN_38:		; preds = %bb49
+        call void @f20()
+        br label %EXIT
+
+RRETURN_40:		; preds = %bb49
+        call void @f21()
+        br label %EXIT
+
+RRETURN_42:		; preds = %bb49
+        call void @f22()
+        br label %EXIT
+
+RRETURN_44:		; preds = %bb49
+        call void @f23()
+        br label %EXIT
+
+RRETURN_48:		; preds = %bb49
+        call void @f24()
+        br label %EXIT
+
+RRETURN_52:		; preds = %bb49
+        call void @f25()
+        br label %EXIT
+
+RRETURN_1:		; preds = %bb49
+        call void @f26()
+        br label %EXIT
+
+EXIT:
+        ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
new file mode 100644
index 000000000000..6b0d6d9790de
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s
+target triple = "x86_64-mingw"
+
+; ModuleID = 'mm.bc'
+	type opaque		; type %0
+	type opaque		; type %1
+
+define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
+indexCheckBlock:
+	%indexCmp = icmp ugt i32 %index, 16		; <i1> [#uses=1]
+	br i1 %indexCmp, label %zeroReturnBlock, label %primitiveTextureFetchBlock
+
+primitiveTextureFetchBlock:		; preds = %indexCheckBlock
+	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
+	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
+	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
+	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
+	%primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
+	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	br label %textureCheckBlock
+
+textureCheckBlock:		; preds = %primitiveTextureFetchBlock
+	%texturePtrInt = ptrtoint %1* %primitiveTexturePtr6 to i64		; <i64> [#uses=1]
+	%testTextureNULL = icmp eq i64 %texturePtrInt, 0		; <i1> [#uses=1]
+	br i1 %testTextureNULL, label %zeroReturnBlock, label %rhoCalculateBlock
+
+rhoCalculateBlock:		; preds = %textureCheckBlock
+	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
+	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
+	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
+	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddx10 = fmul <4 x float> %texDiffDY, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddxSquared = fmul <4 x float> %ddx, %ddx		; <<4 x float>> [#uses=3]
+	%0 = shufflevector <4 x float> %ddxSquared, <4 x float> %ddxSquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dxSquared = fadd <4 x float> %ddxSquared, %0		; <<4 x float>> [#uses=1]
+	%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dxSquared)		; <<4 x float>> [#uses=1]
+	%ddySquared = fmul <4 x float> %ddx10, %ddx10		; <<4 x float>> [#uses=3]
+	%2 = shufflevector <4 x float> %ddySquared, <4 x float> %ddySquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dySquared = fadd <4 x float> %ddySquared, %2		; <<4 x float>> [#uses=1]
+	%3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dySquared)		; <<4 x float>> [#uses=1]
+	%4 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %3)		; <<4 x float>> [#uses=1]
+	%rho = extractelement <4 x float> %4, i32 0		; <float> [#uses=1]
+	ret float %rho
+
+zeroReturnBlock:		; preds = %textureCheckBlock, %indexCheckBlock
+	ret float 0.000000e+00
+}
+
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
new file mode 100644
index 000000000000..5f6cf3b9e0bb
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-pc-linux | FileCheck %s
+
+@a = external global i96, align 4
+@b = external global i64, align 8
+
+define void @c() nounwind {
+; CHECK: movl a+8, %eax
+  %srcval1 = load i96* @a, align 4
+  %sroa.store.elt2 = lshr i96 %srcval1, 64
+  %tmp = trunc i96 %sroa.store.elt2 to i64
+; CHECK: movl %eax, b
+; CHECK: movl $0, b+4
+  store i64 %tmp, i64* @b, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
new file mode 100644
index 000000000000..790fd88c46dd
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86
+; PR4753
+
+; This function has a sub-register reuse undone.
+
+@uint8 = external global i32                      ; <i32*> [#uses=3]
+
+declare signext i8 @foo(i32, i8 signext) nounwind readnone
+
+declare signext i8 @bar(i32, i8 signext) nounwind readnone
+
+define i32 @uint80(i8 signext %p_52) nounwind {
+entry:
+  %0 = sext i8 %p_52 to i16                       ; <i16> [#uses=1]
+  %1 = tail call i32 @func_24(i16 zeroext %0, i8 signext ptrtoint (i8 (i32, i8)* @foo to i8)) nounwind; <i32> [#uses=1]
+  %2 = trunc i32 %1 to i8                         ; <i8> [#uses=1]
+  %3 = or i8 %2, 1                                ; <i8> [#uses=1]
+  %4 = tail call i32 @safe(i32 1) nounwind        ; <i32> [#uses=0]
+  %5 = tail call i32 @func_24(i16 zeroext 0, i8 signext undef) nounwind; <i32> [#uses=1]
+  %6 = trunc i32 %5 to i8                         ; <i8> [#uses=1]
+  %7 = xor i8 %3, %p_52                           ; <i8> [#uses=1]
+  %8 = xor i8 %7, %6                              ; <i8> [#uses=1]
+  %9 = icmp ne i8 %p_52, 0                        ; <i1> [#uses=1]
+  %10 = zext i1 %9 to i8                          ; <i8> [#uses=1]
+  %11 = tail call i32 @func_24(i16 zeroext ptrtoint (i8 (i32, i8)* @bar to i16), i8 signext %10) nounwind; <i32> [#uses=1]
+  %12 = tail call i32 @func_24(i16 zeroext 0, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %iftmp.2.0 = phi i32 [ 0, %bb2 ], [ 1, %bb ]    ; <i32> [#uses=1]
+  %13 = icmp ne i32 %11, %iftmp.2.0               ; <i1> [#uses=1]
+  %14 = tail call i32 @safe(i32 -2) nounwind      ; <i32> [#uses=0]
+  %15 = zext i1 %13 to i8                         ; <i8> [#uses=1]
+  %16 = tail call signext i8 @func_53(i8 signext undef, i8 signext 1, i8 signext %15, i8 signext %8) nounwind; <i8> [#uses=0]
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
+  %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %return, label %bb6.preheader
+
+bb6.preheader:                                    ; preds = %bb5
+  %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
+  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %bb5
+  ret i32 undef
+}
+
+declare i32 @func_24(i16 zeroext, i8 signext)
+
+declare i32 @safe(i32)
+
+declare signext i8 @func_53(i8 signext, i8 signext, i8 signext, i8 signext)
+
+declare i32 @safefuncts(...)
diff --git a/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
new file mode 100644
index 000000000000..3da8f00a6043
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
new file mode 100644
index 000000000000..55432be1c2c9
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s
+; PR4689
+
+%struct.__s = type { [8 x i8] }
+%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* }
+%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 }
+
+define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
+; CHECK: hammer_time:
+; CHECK: movq $Xrsvd, %rax
+; CHECK: movq $Xrsvd, %rdi
+; CHECK: movq $Xrsvd, %r8
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %if.end
+  switch i32 undef, label %if.then76 [
+    i32 9, label %for.inc
+    i32 10, label %for.inc
+    i32 11, label %for.inc
+    i32 12, label %for.inc
+  ]
+
+if.then76:                                        ; preds = %for.body
+  unreachable
+
+for.inc:                                          ; preds = %for.body, %for.body, %for.body, %for.body
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind
+  br label %for.body170
+
+for.body170:                                      ; preds = %for.body170, %for.end
+  store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef
+  br i1 undef, label %for.end175, label %for.body170
+
+for.end175:                                       ; preds = %for.body170
+  unreachable
+}
+
+declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat
diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
new file mode 100644
index 000000000000..9e58872b73c8
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+; CHECK: movl 16(%rbp), %edx
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)
diff --git a/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
new file mode 100644
index 000000000000..18b5a179c9ef
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR4910
+
+%0 = type { i32, i32, i32, i32 }
+
+@boot_cpu_id = external global i32                ; <i32*> [#uses=1]
+@cpu_logical = common global i32 0, align 4       ; <i32*> [#uses=1]
+
+define void @topo_probe_0xb() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc40, %for.inc38 ] ; <i32> [#uses=3]
+  %cmp = icmp slt i32 %0, 3                       ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end41
+
+for.body:                                         ; preds = %for.cond
+  %1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,{cx},~{dirflag},~{fpsr},~{flags}"(i32 11, i32 %0) nounwind ; <%0> [#uses=3]
+  %asmresult.i = extractvalue %0 %1, 0            ; <i32> [#uses=1]
+  %asmresult10.i = extractvalue %0 %1, 2          ; <i32> [#uses=1]
+  %and = and i32 %asmresult.i, 31                 ; <i32> [#uses=2]
+  %shr42 = lshr i32 %asmresult10.i, 8             ; <i32> [#uses=1]
+  %and12 = and i32 %shr42, 255                    ; <i32> [#uses=2]
+  %cmp14 = icmp eq i32 %and12, 0                  ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.end41, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %asmresult9.i = extractvalue %0 %1, 1           ; <i32> [#uses=1]
+  %and7 = and i32 %asmresult9.i, 65535            ; <i32> [#uses=1]
+  %cmp16 = icmp eq i32 %and7, 0                   ; <i1> [#uses=1]
+  br i1 %cmp16, label %for.end41, label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %lor.lhs.false
+  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20, %for.cond17.preheader
+  %2 = phi i32 [ 0, %for.cond17.preheader ], [ %inc32, %for.body20 ] ; <i32> [#uses=2]
+  %cnt.143 = phi i32 [ 0, %for.cond17.preheader ], [ %inc.cnt.1, %for.body20 ] ; <i32> [#uses=1]
+  %shr23 = ashr i32 %2, %and                      ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %shr23, %shr26             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp27 to i32                    ; <i32> [#uses=1]
+  %inc.cnt.1 = add i32 %inc, %cnt.143             ; <i32> [#uses=2]
+  %inc32 = add nsw i32 %2, 1                      ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc32, 255             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body20
+
+for.end:                                          ; preds = %for.body20
+  %cmp34 = icmp eq i32 %and12, 1                  ; <i1> [#uses=1]
+  br i1 %cmp34, label %if.then35, label %for.inc38
+
+if.then35:                                        ; preds = %for.end
+  store i32 %inc.cnt.1, i32* @cpu_logical
+  br label %for.inc38
+
+for.inc38:                                        ; preds = %for.end, %if.then35
+  %inc40 = add nsw i32 %0, 1                      ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end41:                                        ; preds = %lor.lhs.false, %for.body, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
new file mode 100644
index 000000000000..646806e5dbb2
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+
+; PR4958
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: main:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  br label %bb
+
+bb:                                               ; preds = %bb1, %entry
+; CHECK:      movl %e
+; CHECK-NEXT: addl $1
+; CHECK-NEXT: movl %e
+; CHECK-NEXT: adcl $0
+  %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ]      ; <i64> [#uses=1]
+  %0 = add nsw i64 %i.0, 1                        ; <i64> [#uses=2]
+  %1 = icmp sgt i32 0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %2 = icmp sle i64 %0, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
new file mode 100644
index 000000000000..4f44caea74c9
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '4964.c'
+; PR 4964
+; Registers other than RAX, RCX are OK, but they must be different.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	type { i64, i64 }		; type %0
+
+define i64 @flsst(i64 %find) nounwind ssp {
+entry:
+; CHECK: FOO %rax %rcx
+	%asmtmp = tail call %0 asm sideeffect "FOO $0 $1 $2", "=r,=&r,rm,~{dirflag},~{fpsr},~{flags},~{cc}"(i64 %find) nounwind		; <%0> [#uses=1]
+	%asmresult = extractvalue %0 %asmtmp, 0		; <i64> [#uses=1]
+	ret i64 %asmresult
+}
diff --git a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 000000000000..80b883582ce5
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}
diff --git a/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
new file mode 100644
index 000000000000..33f35f881e85
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  ret i32 3
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 3
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+bb.i18:                                           ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+quantum_getwidth.exit:                            ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb4, label %bb6.preheader
+
+bb4:                                              ; preds = %quantum_getwidth.exit
+  unreachable
+
+bb6.preheader:                                    ; preds = %quantum_getwidth.exit
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb6.preheader
+  unreachable
+
+bb1.i2:                                           ; preds = %bb6.preheader
+  br i1 undef, label %bb2.i, label %bb3.i4
+
+bb2.i:                                            ; preds = %bb1.i2
+  unreachable
+
+bb3.i4:                                           ; preds = %bb1.i2
+  br i1 undef, label %quantum_new_qureg.exit, label %bb4.i
+
+bb4.i:                                            ; preds = %bb3.i4
+  unreachable
+
+quantum_new_qureg.exit:                           ; preds = %bb3.i4
+  br i1 undef, label %bb9, label %bb11.thread
+
+bb11.thread:                                      ; preds = %quantum_new_qureg.exit
+  %.cast.i = zext i32 undef to i64                ; <i64> [#uses=1]
+  br label %bb.i37
+
+bb9:                                              ; preds = %quantum_new_qureg.exit
+  unreachable
+
+bb.i37:                                           ; preds = %bb.i37, %bb11.thread
+  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
+  store i64 %1, i64* undef, align 8
+  br i1 undef, label %bb.i37, label %quantum_addscratch.exit
+
+quantum_addscratch.exit:                          ; preds = %bb.i37
+  br i1 undef, label %bb12.preheader, label %bb14
+
+bb12.preheader:                                   ; preds = %quantum_addscratch.exit
+  unreachable
+
+bb14:                                             ; preds = %quantum_addscratch.exit
+  br i1 undef, label %bb17, label %bb.nph
+
+bb.nph:                                           ; preds = %bb14
+  unreachable
+
+bb17:                                             ; preds = %bb14
+  br i1 undef, label %bb1.i7, label %quantum_measure.exit
+
+bb1.i7:                                           ; preds = %bb17
+  br label %quantum_measure.exit
+
+quantum_measure.exit:                             ; preds = %bb1.i7, %bb17
+  switch i32 undef, label %bb21 [
+    i32 -1, label %bb18
+    i32 0, label %bb20
+  ]
+
+bb18:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb20:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb21:                                             ; preds = %quantum_measure.exit
+  br i1 undef, label %quantum_frac_approx.exit, label %bb1.i
+
+bb1.i:                                            ; preds = %bb21
+  unreachable
+
+quantum_frac_approx.exit:                         ; preds = %bb21
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %quantum_frac_approx.exit
+  unreachable
+
+bb26:                                             ; preds = %quantum_frac_approx.exit
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb26
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+quantum_gcd.exit:                                 ; preds = %bb.i, %bb26
+  br i1 undef, label %bb32, label %bb33
+
+bb32:                                             ; preds = %quantum_gcd.exit
+  br i1 undef, label %bb.i.i, label %quantum_delete_qureg.exit
+
+bb.i.i:                                           ; preds = %bb32
+  ret i32 0
+
+quantum_delete_qureg.exit:                        ; preds = %bb32
+  ret i32 0
+
+bb33:                                             ; preds = %quantum_gcd.exit
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 000000000000..d37d4b8bd427
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+; rdar://7247745
+
+%struct._lck_mtx_ = type { %union.anon }
+%struct._lck_rw_t_internal_ = type <{ i16, i8, i8, i32, i32, i32 }>
+%struct.anon = type { i64, i64, [2 x i8], i8, i8, i32 }
+%struct.memory_object = type { i32, i32, %struct.memory_object_pager_ops* }
+%struct.memory_object_control = type { i32, i32, %struct.vm_object* }
+%struct.memory_object_pager_ops = type { void (%struct.memory_object*)*, void (%struct.memory_object*)*, i32 (%struct.memory_object*, %struct.memory_object_control*, i32)*, i32 (%struct.memory_object*)*, i32 (%struct.memory_object*, i64, i32, i32, i32*)*, i32 (%struct.memory_object*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.memory_object*, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i32)*, i32 (%struct.memory_object*)*, i8* }
+%struct.queue_entry = type { %struct.queue_entry*, %struct.queue_entry* }
+%struct.upl = type { %struct._lck_mtx_, i32, i32, %struct.vm_object*, i64, i32, i64, %struct.vm_object*, i32, i8* }
+%struct.upl_page_info = type <{ i32, i8, [3 x i8] }>
+%struct.vm_object = type { %struct.queue_entry, %struct._lck_rw_t_internal_, i64, %struct.vm_page*, i32, i32, i32, i32, %struct.vm_object*, %struct.vm_object*, i64, %struct.memory_object*, i64, %struct.memory_object_control*, i32, i16, i16, [2 x i8], i8, i8, %struct.queue_entry, %struct.queue_entry, i64, i32, i32, i32, i8*, i64, i8, i8, [2 x i8], %struct.queue_entry }
+%struct.vm_page = type { %struct.queue_entry, %struct.queue_entry, %struct.vm_page*, %struct.vm_object*, i64, [2 x i8], i8, i8, i32, i8, i8, i8, i8, i32 }
+%union.anon = type { %struct.anon }
+
+declare i64 @OSAddAtomic64(i64, i64*) noredzone noimplicitfloat
+
+define i32 @upl_commit_range(%struct.upl* %upl, i32 %offset, i32 %size, i32 %flags, %struct.upl_page_info* %page_list, i32 %count, i32* nocapture %empty) nounwind noredzone noimplicitfloat {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %if.end143, label %if.then136
+
+if.then136:                                       ; preds = %if.end
+  unreachable
+
+if.end143:                                        ; preds = %if.end
+  br i1 undef, label %if.else155, label %if.then153
+
+if.then153:                                       ; preds = %if.end143
+  br label %while.cond
+
+if.else155:                                       ; preds = %if.end143
+  unreachable
+
+while.cond:                                       ; preds = %if.end1039, %if.then153
+  br i1 undef, label %if.then1138, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.end260, label %if.then217
+
+if.then217:                                       ; preds = %while.body
+  br i1 undef, label %if.end260, label %if.then230
+
+if.then230:                                       ; preds = %if.then217
+  br i1 undef, label %if.then246, label %if.end260
+
+if.then246:                                       ; preds = %if.then230
+  br label %if.end260
+
+if.end260:                                        ; preds = %if.then246, %if.then230, %if.then217, %while.body
+  br i1 undef, label %if.end296, label %if.then266
+
+if.then266:                                       ; preds = %if.end260
+  unreachable
+
+if.end296:                                        ; preds = %if.end260
+  br i1 undef, label %if.end1039, label %if.end306
+
+if.end306:                                        ; preds = %if.end296
+  br i1 undef, label %if.end796, label %if.then616
+
+if.then616:                                       ; preds = %if.end306
+  br i1 undef, label %commit_next_page, label %do.body716
+
+do.body716:                                       ; preds = %if.then616
+  %call721 = call i64 @OSAddAtomic64(i64 1, i64* undef) nounwind noredzone noimplicitfloat ; <i64> [#uses=0]
+  call void asm sideeffect "movq\090x0($0),%rdi\0A\09movq\090x8($0),%rsi\0A\09.section __DATA, __data\0A\09.globl __dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec\0A\09__dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec:.quad 1f\0A\09.text\0A\091:nop\0A\09nop\0A\09nop\0A\09", "r,~{memory},~{di},~{si},~{dirflag},~{fpsr},~{flags}"(i64* undef) nounwind
+  br label %commit_next_page
+
+if.end796:                                        ; preds = %if.end306
+  unreachable
+
+commit_next_page:                                 ; preds = %do.body716, %if.then616
+  br i1 undef, label %if.end1039, label %if.then1034
+
+if.then1034:                                      ; preds = %commit_next_page
+  br label %if.end1039
+
+if.end1039:                                       ; preds = %if.then1034, %commit_next_page, %if.end296
+  br label %while.cond
+
+if.then1138:                                      ; preds = %while.cond
+  unreachable
+
+if.then:                                          ; preds = %entry
+  ret i32 4
+}
diff --git a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
new file mode 100644
index 000000000000..ef10ae59ab6b
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 1
+; rdar://7274692
+
+%0 = type { [125 x i32] }
+%1 = type { i32 }
+%struct..5sPragmaType = type { i8*, i32 }
+%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+%struct.AuxData = type { i8*, void (i8*)* }
+%struct.Bitvec = type { i32, i32, i32, %0 }
+%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+%struct.Context = type { i64, i32, %struct.Fifo }
+%struct.CountCtx = type { i64 }
+%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct..5sPragmaType, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct..5sPragmaType, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct..5sPragmaType, i32, i64 }
+%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+%struct._OvflCell = type { i8*, i16 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct._ht = type { i32, %struct.HashElem* }
+%struct.callback_data = type { %struct.sqlite3*, i32, i32, %struct.FILE*, i32, i32, i32, i8*, [20 x i8], [100 x i32], [100 x i32], [20 x i8], %struct.previous_mode_data, [1024 x i8], i8* }
+%struct.previous_mode_data = type { i32, i32, i32, [100 x i32] }
+%struct.sColMap = type { i32, i8* }
+%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %union.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+%struct.sqlite3InitInfo = type { i32, i32, i8 }
+%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+%struct.sqlite3_mutex = type opaque
+%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+%union.anon = type { double }
+
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=2]
+@__stderrp = external global %struct.FILE*        ; <%struct.FILE**> [#uses=1]
+@.str10 = internal constant [16 x i8] c"Out of memory!\0A\00", align 1 ; <[16 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.callback_data*, i8*)* @set_table_name to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @set_table_name(%struct.callback_data* nocapture %p, i8* %zName) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.callback_data* %p, i32 0, i32 7 ; <i8**> [#uses=3]
+  %1 = load i8** %0, align 4                      ; <i8*> [#uses=2]
+  %2 = icmp eq i8* %1, null                       ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  free i8* %1
+  store i8* null, i8** %0, align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %3 = icmp eq i8* %zName, null                   ; <i1> [#uses=1]
+  br i1 %3, label %return, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %4 = load i8* %zName, align 1                   ; <i8> [#uses=2]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=2]
+  %6 = icmp sgt i8 %4, -1                         ; <i1> [#uses=1]
+  br i1 %6, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb2
+  %7 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %5 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=1]
+  %9 = and i32 %8, 256                            ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+bb1.i.i:                                          ; preds = %bb2
+  %10 = tail call i32 @__maskrune(i32 %5, i32 256) nounwind ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+isalpha.exit:                                     ; preds = %bb1.i.i, %bb.i.i
+  %storemerge.in.in.i.i = phi i32 [ %9, %bb.i.i ], [ %10, %bb1.i.i ] ; <i32> [#uses=1]
+  %storemerge.in.i.i = icmp eq i32 %storemerge.in.in.i.i, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i, label %bb3, label %bb5
+
+bb3:                                              ; preds = %isalpha.exit
+  %11 = load i8* %zName, align 1                  ; <i8> [#uses=2]
+  %12 = icmp eq i8 %11, 95                        ; <i1> [#uses=1]
+  br i1 %12, label %bb5, label %bb12.preheader
+
+bb5:                                              ; preds = %bb3, %isalpha.exit
+  %.pre = load i8* %zName, align 1                ; <i8> [#uses=1]
+  br label %bb12.preheader
+
+bb12.preheader:                                   ; preds = %bb5, %bb3
+  %13 = phi i8 [ %.pre, %bb5 ], [ %11, %bb3 ]     ; <i8> [#uses=1]
+  %needQuote.1.ph = phi i32 [ 0, %bb5 ], [ 1, %bb3 ] ; <i32> [#uses=2]
+  %14 = icmp eq i8 %13, 0                         ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb7
+
+bb7:                                              ; preds = %bb11, %bb12.preheader
+  %i.011 = phi i32 [ %tmp17, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=2]
+  %n.110 = phi i32 [ %26, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=3]
+  %needQuote.19 = phi i32 [ %needQuote.0, %bb11 ], [ %needQuote.1.ph, %bb12.preheader ] ; <i32> [#uses=2]
+  %scevgep16 = getelementptr i8* %zName, i32 %i.011 ; <i8*> [#uses=2]
+  %tmp17 = add i32 %i.011, 1                      ; <i32> [#uses=2]
+  %scevgep18 = getelementptr i8* %zName, i32 %tmp17 ; <i8*> [#uses=1]
+  %15 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %16 = zext i8 %15 to i32                        ; <i32> [#uses=2]
+  %17 = icmp sgt i8 %15, -1                       ; <i1> [#uses=1]
+  br i1 %17, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb7
+  %18 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %16 ; <i32*> [#uses=1]
+  %19 = load i32* %18, align 4                    ; <i32> [#uses=1]
+  %20 = and i32 %19, 1280                         ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+bb1.i.i3:                                         ; preds = %bb7
+  %21 = tail call i32 @__maskrune(i32 %16, i32 1280) nounwind ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+isalnum.exit:                                     ; preds = %bb1.i.i3, %bb.i.i2
+  %storemerge.in.in.i.i4 = phi i32 [ %20, %bb.i.i2 ], [ %21, %bb1.i.i3 ] ; <i32> [#uses=1]
+  %storemerge.in.i.i5 = icmp eq i32 %storemerge.in.in.i.i4, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i5, label %bb8, label %bb11
+
+bb8:                                              ; preds = %isalnum.exit
+  %22 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %23 = icmp eq i8 %22, 95                        ; <i1> [#uses=1]
+  br i1 %23, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %24 = icmp eq i8 %22, 39                        ; <i1> [#uses=1]
+  %25 = zext i1 %24 to i32                        ; <i32> [#uses=1]
+  %.n.1 = add i32 %n.110, %25                     ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb8, %isalnum.exit
+  %needQuote.0 = phi i32 [ 1, %bb9 ], [ %needQuote.19, %isalnum.exit ], [ %needQuote.19, %bb8 ] ; <i32> [#uses=2]
+  %n.0 = phi i32 [ %.n.1, %bb9 ], [ %n.110, %isalnum.exit ], [ %n.110, %bb8 ] ; <i32> [#uses=1]
+  %26 = add nsw i32 %n.0, 1                       ; <i32> [#uses=2]
+  %27 = load i8* %scevgep18, align 1              ; <i8> [#uses=1]
+  %28 = icmp eq i8 %27, 0                         ; <i1> [#uses=1]
+  br i1 %28, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11, %bb12.preheader
+  %n.1.lcssa = phi i32 [ 0, %bb12.preheader ], [ %26, %bb11 ] ; <i32> [#uses=2]
+  %needQuote.1.lcssa = phi i32 [ %needQuote.1.ph, %bb12.preheader ], [ %needQuote.0, %bb11 ] ; <i32> [#uses=1]
+  %29 = add nsw i32 %n.1.lcssa, 2                 ; <i32> [#uses=1]
+  %30 = icmp eq i32 %needQuote.1.lcssa, 0         ; <i1> [#uses=3]
+  %n.1. = select i1 %30, i32 %n.1.lcssa, i32 %29  ; <i32> [#uses=1]
+  %31 = add nsw i32 %n.1., 1                      ; <i32> [#uses=1]
+  %32 = malloc i8, i32 %31                        ; <i8*> [#uses=7]
+  store i8* %32, i8** %0, align 4
+  %33 = icmp eq i8* %32, null                     ; <i1> [#uses=1]
+  br i1 %33, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb13
+  %34 = load %struct.FILE** @__stderrp, align 4   ; <%struct.FILE*> [#uses=1]
+  %35 = bitcast %struct.FILE* %34 to i8*          ; <i8*> [#uses=1]
+  %36 = tail call i32 @"\01_fwrite$UNIX2003"(i8* getelementptr inbounds ([16 x i8]* @.str10, i32 0, i32 0), i32 1, i32 15, i8* %35) nounwind ; <i32> [#uses=0]
+  tail call void @exit(i32 1) noreturn nounwind
+  unreachable
+
+bb17:                                             ; preds = %bb13
+  br i1 %30, label %bb23.preheader, label %bb18
+
+bb18:                                             ; preds = %bb17
+  store i8 39, i8* %32, align 4
+  br label %bb23.preheader
+
+bb23.preheader:                                   ; preds = %bb18, %bb17
+  %n.3.ph = phi i32 [ 1, %bb18 ], [ 0, %bb17 ]    ; <i32> [#uses=2]
+  %37 = load i8* %zName, align 1                  ; <i8> [#uses=1]
+  %38 = icmp eq i8 %37, 0                         ; <i1> [#uses=1]
+  br i1 %38, label %bb24, label %bb20
+
+bb20:                                             ; preds = %bb22, %bb23.preheader
+  %storemerge18 = phi i32 [ %tmp, %bb22 ], [ 0, %bb23.preheader ] ; <i32> [#uses=2]
+  %n.37 = phi i32 [ %n.4, %bb22 ], [ %n.3.ph, %bb23.preheader ] ; <i32> [#uses=3]
+  %scevgep = getelementptr i8* %zName, i32 %storemerge18 ; <i8*> [#uses=1]
+  %tmp = add i32 %storemerge18, 1                 ; <i32> [#uses=2]
+  %scevgep15 = getelementptr i8* %zName, i32 %tmp ; <i8*> [#uses=1]
+  %39 = load i8* %scevgep, align 1                ; <i8> [#uses=2]
+  %40 = getelementptr inbounds i8* %32, i32 %n.37 ; <i8*> [#uses=1]
+  store i8 %39, i8* %40, align 1
+  %41 = add nsw i32 %n.37, 1                      ; <i32> [#uses=2]
+  %42 = icmp eq i8 %39, 39                        ; <i1> [#uses=1]
+  br i1 %42, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb20
+  %43 = getelementptr inbounds i8* %32, i32 %41   ; <i8*> [#uses=1]
+  store i8 39, i8* %43, align 1
+  %44 = add nsw i32 %n.37, 2                      ; <i32> [#uses=1]
+  br label %bb22
+
+bb22:                                             ; preds = %bb21, %bb20
+  %n.4 = phi i32 [ %44, %bb21 ], [ %41, %bb20 ]   ; <i32> [#uses=2]
+  %45 = load i8* %scevgep15, align 1              ; <i8> [#uses=1]
+  %46 = icmp eq i8 %45, 0                         ; <i1> [#uses=1]
+  br i1 %46, label %bb24, label %bb20
+
+bb24:                                             ; preds = %bb22, %bb23.preheader
+  %n.3.lcssa = phi i32 [ %n.3.ph, %bb23.preheader ], [ %n.4, %bb22 ] ; <i32> [#uses=3]
+  br i1 %30, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb24
+  %47 = getelementptr inbounds i8* %32, i32 %n.3.lcssa ; <i8*> [#uses=1]
+  store i8 39, i8* %47, align 1
+  %48 = add nsw i32 %n.3.lcssa, 1                 ; <i32> [#uses=1]
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24
+  %n.5 = phi i32 [ %48, %bb25 ], [ %n.3.lcssa, %bb24 ] ; <i32> [#uses=1]
+  %49 = getelementptr inbounds i8* %32, i32 %n.5  ; <i8*> [#uses=1]
+  store i8 0, i8* %49, align 1
+  ret void
+
+return:                                           ; preds = %bb1
+  ret void
+}
+
+declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*)
+
+declare void @exit(i32) noreturn nounwind
+
+declare i32 @__maskrune(i32, i32)
diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
index 7313670a1c33..de930d512678 100644
--- a/test/CodeGen/X86/20090313-signext.ll
+++ b/test/CodeGen/X86/20090313-signext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=pic > %t
+; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
 ; RUN: grep {movswl	%ax, %edi} %t
 ; RUN: grep {movw	(%rax), %ax} %t
 ; XFAIL: *
diff --git a/test/CodeGen/X86/Atomics-32.ll b/test/CodeGen/X86/Atomics-32.ll
index 2a3e2285800f..0e9b73ea1090 100644
--- a/test/CodeGen/X86/Atomics-32.ll
+++ b/test/CodeGen/X86/Atomics-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ;; Note the 64-bit variants are not supported yet (in 32-bit mode).
 ; ModuleID = 'Atomics.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 37b2e338eff6..ac174b9f9a3f 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; ModuleID = 'Atomics.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 513599c58bcd..a6fd2d8fe134 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,186 +1,16 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small > %t
-; RUN: grep leal %t | count 33
-; RUN: grep movl %t | count 239
-; RUN: grep addl %t | count 20
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep %rip %t
-; RUN: llvm-as < %s | llc -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=pic -code-model=small > %t
-; RUN: grep leal %t | count 43
-; RUN: grep movl %t | count 377
-; RUN: grep addl %t | count 179
-; RUN: grep subl %t | count 6
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: grep _GLOBAL_OFFSET_TABLE_ %t | count 148
-; RUN: grep @GOT %t | count 207
-; RUN: grep @GOTOFF %t | count 58
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: grep @PLT %t | count 20
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 91
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 70
-; RUN: grep movq %t | count 56
-; RUN: grep addq %t | count 20
-; RUN: grep subq %t | count 14
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 139
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 98
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 59
-; RUN: grep movq %t | count 195
-; RUN: grep addq %t | count 36
-; RUN: grep subq %t | count 11
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 149
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 149
-; RUN: not grep @GOTPLT %t
-; RUN: grep @PLT %t | count 20
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 207
-
-
-
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small > %t
-; RUN: grep leal %t | count 33
-; RUN: grep movl %t | count 239
-; RUN: grep addl %t | count 20
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep %rip %t
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small > %t
-; RUN: grep leal %t | count 31
-; RUN: grep movl %t | count 312
-; RUN: grep addl %t | count 32
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small > %t
-; RUN: grep leal %t | count 57
-; RUN: grep movl %t | count 292
-; RUN: grep addl %t | count 32
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 95
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 89
-; RUN: grep movq %t | count 142
-; RUN: grep addq %t | count 30
-; RUN: grep subq %t | count 12
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 92
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 92
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 208
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 95
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 89
-; RUN: grep movq %t | count 142
-; RUN: grep addq %t | count 30
-; RUN: grep subq %t | count 12
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 92
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 92
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 208
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
@@ -206,6 +36,71 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
 	ret void
+
+; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC: movl	src(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, dst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo00:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo00:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo00:
+; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L1$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L1$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo00() nounwind {
@@ -213,18 +108,191 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
 	ret void
+
+; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC: movl	xsrc(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, xdst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo00:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo00:
+; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L2$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC: movq	$dst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC: 	movl	$dst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo01:
+; LINUX-32-PIC: 	movl	$dst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo01:
+; DARWIN-32-STATIC: 	movl	$_dst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo01:
+; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L3$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC: movq	$xdst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC: 	movl	$xdst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC: 	movl	$xdst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo01:
+; DARWIN-32-STATIC: 	movl	$_xdst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo01:
+; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L4$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo02() nounwind {
@@ -233,6 +301,80 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC: movl    src(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo02:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo02:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo02:
+; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L5$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo02() nounwind {
@@ -240,7 +382,81 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
+; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC: movl    xsrc(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo02:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo02:
+; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L6$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo03() nounwind {
@@ -248,12 +464,114 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
 	ret void
+; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC: movl    dsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo03:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo03:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo03:
+; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L7$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo03:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo03:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC: movq    $ddst, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC: 	movl	$ddst, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo04:
+; LINUX-32-PIC: 	movl	$ddst, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo04:
+; DARWIN-32-STATIC: 	movl	$_ddst, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo04:
+; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L8$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L8$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo05() nounwind {
@@ -262,6 +580,70 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC: movl    dsrc(%rip), %
+; LINUX-64-STATIC: movq    dptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo05:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo05:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo05:
+; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L9$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L9$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo05:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo05:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo06() nounwind {
@@ -269,12 +651,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
 	ret void
+; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC: movl    lsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo06:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo06:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo06:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo06:
+; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L10$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo06:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo06:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC: movq    $ldst, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC: 	movl	$ldst, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo07:
+; LINUX-32-PIC: 	movl	$ldst, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo07:
+; DARWIN-32-STATIC: 	movl	$_ldst, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo07:
+; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L11$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L11$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo08() nounwind {
@@ -283,6 +764,68 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC: movl    lsrc(%rip), %
+; LINUX-64-STATIC: movq    lptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo08:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo08:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo08:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo08:
+; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L12$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L12$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo08:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo08:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux00() nounwind {
@@ -290,6 +833,70 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux00:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux00:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux00:
+; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L13$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L13$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx00() nounwind {
@@ -297,18 +904,202 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx00:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx00:
+; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L14$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L14$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC: movq    $dst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC: 	movl	$dst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux01:
+; LINUX-32-PIC: 	movl	$dst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux01:
+; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L15$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC: movq    $xdst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC: 	movl	$xdst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC: 	movl	$xdst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx01:
+; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L16$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux02() nounwind {
@@ -317,7 +1108,81 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux02:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux02:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux02:
+; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L17$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx02() nounwind {
@@ -326,7 +1191,81 @@ entry:
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx02:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx02:
+; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L18$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L18$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux03() nounwind {
@@ -334,12 +1273,115 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
 	ret void
+; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux03:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux03:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux03:
+; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L19$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L19$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux03:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux03:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC: 	movl	$ddst+64, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux04:
+; LINUX-32-PIC: 	movl	$ddst+64, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux04:
+; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L20$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L20$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux05() nounwind {
@@ -348,7 +1390,71 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux05:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux05:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux05:
+; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L21$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L21$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux05:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux05:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux06() nounwind {
@@ -356,12 +1462,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux06:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux06:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+64(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux06:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux06:
+; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L22$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L22$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux06:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux06:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC: movq    $ldst+64, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC: 	movl	$ldst+64, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux07:
+; LINUX-32-PIC: 	movl	$ldst+64, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux07:
+; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L23$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L23$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux08() nounwind {
@@ -370,7 +1575,69 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux08:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux08:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux08:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux08:
+; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L24$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L24$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux08:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux08:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind00(i64 %i) nounwind {
@@ -380,6 +1647,75 @@ entry:
 	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind00:
+; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L25$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L25$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd00(i64 %i) nounwind {
@@ -389,6 +1725,75 @@ entry:
 	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd00:
+; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L26$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L26$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind01(i64 %i) nounwind {
@@ -396,6 +1801,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind01:
+; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L27$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd01(i64 %i) nounwind {
@@ -403,6 +1877,75 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd01:
+; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L28$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind02(i64 %i) nounwind {
@@ -413,6 +1956,85 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind02:
+; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L29$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd02(i64 %i) nounwind {
@@ -423,6 +2045,85 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd02:
+; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L30$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L30$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind03(i64 %i) nounwind {
@@ -432,6 +2133,71 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind03:
+; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L31$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L31$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind04(i64 %i) nounwind {
@@ -439,6 +2205,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind04:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind04:
+; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L32$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L32$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind05(i64 %i) nounwind {
@@ -449,6 +2277,78 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind05:
+; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L33$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L33$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind06(i64 %i) nounwind {
@@ -458,6 +2358,71 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind06:
+; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L34$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L34$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind07(i64 %i) nounwind {
@@ -465,6 +2430,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind07:
+; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L35$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L35$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind08(i64 %i) nounwind {
@@ -475,6 +2501,77 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind08:
+; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L36$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L36$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off00(i64 %i) nounwind {
@@ -485,6 +2582,75 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off00:
+; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L37$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L37$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf00(i64 %i) nounwind {
@@ -495,6 +2661,75 @@ entry:
 	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf00:
+; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L38$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L38$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off01(i64 %i) nounwind {
@@ -503,6 +2738,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off01:
+; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L39$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf01(i64 %i) nounwind {
@@ -511,6 +2815,75 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf01:
+; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L40$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off02(i64 %i) nounwind {
@@ -522,6 +2895,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off02:
+; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L41$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf02(i64 %i) nounwind {
@@ -533,6 +2985,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf02:
+; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L42$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L42$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off03(i64 %i) nounwind {
@@ -543,6 +3074,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off03:
+; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L43$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L43$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off04(i64 %i) nounwind {
@@ -551,6 +3147,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off04:
+; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L44$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L44$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off05(i64 %i) nounwind {
@@ -562,6 +3220,78 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off05:
+; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L45$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L45$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off06(i64 %i) nounwind {
@@ -572,6 +3302,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off06:
+; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L46$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L46$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off07(i64 %i) nounwind {
@@ -580,6 +3375,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off07:
+; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L47$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L47$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off08(i64 %i) nounwind {
@@ -591,6 +3447,77 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off08:
+; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L48$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L48$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo00(i64 %i) nounwind {
@@ -598,12 +3525,136 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
 	ret void
+; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo00:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo00:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo00:
+; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L49$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo01(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC: 	movl	$dst+262144, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo01:
+; LINUX-32-PIC: 	movl	$dst+262144, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo01:
+; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L50$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo02(i64 %i) nounwind {
@@ -613,6 +3664,80 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo02:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo02:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo02:
+; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L51$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L51$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo03(i64 %i) nounwind {
@@ -620,12 +3745,115 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
 	ret void
+; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo03:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo03:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo03:
+; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L52$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L52$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo03:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo03:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo04(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC: movq    $ddst+262144, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo04:
+; LINUX-32-PIC: 	movl	$ddst+262144, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo04:
+; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L53$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L53$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo05(i64 %i) nounwind {
@@ -635,6 +3863,70 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo05:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo05:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo05:
+; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L54$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L54$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo05:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo05:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo06(i64 %i) nounwind {
@@ -642,12 +3934,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
 	ret void
+; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo06:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo06:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+262144(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo06:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo06:
+; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L55$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L55$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo06:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo06:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo07(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC: movq    $ldst+262144, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo07:
+; LINUX-32-PIC: 	movl	$ldst+262144, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo07:
+; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L56$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L56$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo08(i64 %i) nounwind {
@@ -657,6 +4048,68 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo08:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo08:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo08:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo08:
+; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L57$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L57$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo08:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo08:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big00(i64 %i) nounwind {
@@ -667,6 +4120,75 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big00:
+; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L58$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big01(i64 %i) nounwind {
@@ -675,6 +4197,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big01:
+; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L59$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big02(i64 %i) nounwind {
@@ -686,6 +4277,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big02:
+; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L60$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L60$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big03(i64 %i) nounwind {
@@ -696,6 +4366,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big03:
+; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L61$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L61$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big04(i64 %i) nounwind {
@@ -704,6 +4439,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big04:
+; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L62$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L62$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big05(i64 %i) nounwind {
@@ -715,6 +4512,78 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big05:
+; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L63$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L63$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big06(i64 %i) nounwind {
@@ -725,6 +4594,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big06:
+; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L64$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L64$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big07(i64 %i) nounwind {
@@ -733,6 +4667,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big07:
+; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L65$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L65$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big08(i64 %i) nounwind {
@@ -744,81 +4739,782 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big08:
+; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L66$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar00:
+; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr00:
+; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar01:
+; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr01:
+; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar02() nounwind {
 entry:
 	ret i8* bitcast (i32** @ptr to i8*)
+; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC: movl    $ptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC: 	movl	$ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar02:
+; LINUX-32-PIC: 	movl	$ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar02:
+; DARWIN-32-STATIC: 	movl	$_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar02:
+; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar03:
+; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar04:
+; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar05() nounwind {
 entry:
 	ret i8* bitcast (i32** @dptr to i8*)
+; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC: movl    $dptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC: 	movl	$dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar05:
+; LINUX-32-PIC: 	movl	$dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar05:
+; DARWIN-32-STATIC: 	movl	$_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar05:
+; DARWIN-32-DYNAMIC: 	movl	$_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar05:
+; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar05:
+; DARWIN-64-STATIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar05:
+; DARWIN-64-DYNAMIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar05:
+; DARWIN-64-PIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar06:
+; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar07:
+; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar08() nounwind {
 entry:
 	ret i8* bitcast (i32** @lptr to i8*)
+; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC: movl    $lptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC: 	movl	$lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar08:
+; LINUX-32-PIC: 	movl	$lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar08:
+; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar08:
+; DARWIN-32-STATIC: 	movl	$_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar08:
+; DARWIN-32-DYNAMIC: 	movl	$_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar08:
+; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar08:
+; DARWIN-64-STATIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar08:
+; DARWIN-64-DYNAMIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar08:
+; DARWIN-64-PIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har00:
+; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @hxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr00:
+; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har01:
+; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @hxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr01:
+; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L81$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har02() nounwind {
@@ -826,16 +5522,148 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har02:
+; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har03:
+; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har04:
+; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har05() nounwind {
@@ -843,16 +5671,143 @@ entry:
 	%0 = load i32** @dptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har05:
+; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har06:
+; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har07:
+; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har08() nounwind {
@@ -860,26 +5815,260 @@ entry:
 	%0 = load i32** @lptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har08:
+; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC: movl    $src+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC: 	movl	$src+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat00:
+; LINUX-32-PIC: 	movl	$src+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat00:
+; DARWIN-32-STATIC: 	movl	$_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat00:
+; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxt00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC: movl    $xsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC: 	movl	$xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt00:
+; DARWIN-32-STATIC: 	movl	$_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt00:
+; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC: movl    $dst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC: 	movl	$dst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat01:
+; LINUX-32-PIC: 	movl	$dst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat01:
+; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxt01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC: movl    $xdst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC: 	movl	$xdst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC: 	movl	$xdst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt01:
+; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L92$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat02() nounwind {
@@ -888,16 +6077,160 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat02:
+; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L93$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC: movl    $dsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat03:
+; LINUX-32-PIC: 	movl	$dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat03:
+; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat03:
+; DARWIN-64-PIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC: movl    $ddst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC: 	movl	$ddst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat04:
+; LINUX-32-PIC: 	movl	$ddst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat04:
+; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L95$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat05() nounwind {
@@ -906,16 +6239,153 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat05:
+; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L96$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC: movl    $lsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat06:
+; LINUX-32-PIC: 	movl	$lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat06:
+; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat06:
+; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat06:
+; DARWIN-64-PIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC: movl    $ldst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC: 	movl	$ldst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat07:
+; LINUX-32-PIC: 	movl	$ldst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat07:
+; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L98$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat08() nounwind {
@@ -924,21 +6394,217 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat08:
+; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L99$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC: movl    $src+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC: 	movl	$src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam00:
+; LINUX-32-PIC: 	movl	$src+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam00:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam00:
+; DARWIN-32-STATIC: 	movl	$_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam00:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam00:
+; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam00:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam00:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam00:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC: movl    $dst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC: 	movl	$dst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam01:
+; LINUX-32-PIC: 	movl	$dst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam01:
+; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxm01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC: movl    $xdst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC: 	movl	$xdst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxm01:
+; DARWIN-32-STATIC: 	movl	$_xdst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxm01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxm01:
+; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L102$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxm01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxm01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxm01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam02() nounwind {
@@ -947,16 +6613,160 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam02:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam02:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam02:
+; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L103$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC: movl    $dsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam03:
+; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam03:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam03:
+; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam03:
+; DARWIN-64-PIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC: movl    $ddst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam04:
+; LINUX-32-PIC: 	movl	$ddst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam04:
+; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L105$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam05() nounwind {
@@ -965,16 +6775,153 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam05:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam05:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam05:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam05:
+; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L106$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam05:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam05:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam05:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC: movl    $lsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam06:
+; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam06:
+; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam06:
+; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam06:
+; DARWIN-64-PIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC: movl    $ldst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam07:
+; LINUX-32-PIC: 	movl	$ldst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam07:
+; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L108$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam08() nounwind {
@@ -983,6 +6930,58 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam08:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam08:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam08:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam08:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam08:
+; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L109$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam08:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam08:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam08:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat00(i64 %i) nounwind {
@@ -991,6 +6990,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat00:
+; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L110$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxt00(i64 %i) nounwind {
@@ -999,6 +7051,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt00:
+; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L111$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat01(i64 %i) nounwind {
@@ -1007,6 +7112,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat01:
+; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L112$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxt01(i64 %i) nounwind {
@@ -1015,6 +7173,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt01:
+; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L113$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat02(i64 %i) nounwind {
@@ -1024,6 +7235,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat02:
+; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L114$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat03(i64 %i) nounwind {
@@ -1032,6 +7306,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat03:
+; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat04(i64 %i) nounwind {
@@ -1040,6 +7365,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat04:
+; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L116$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat05(i64 %i) nounwind {
@@ -1049,6 +7425,64 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat05:
+; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L117$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat06(i64 %i) nounwind {
@@ -1057,6 +7491,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat06:
+; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat07(i64 %i) nounwind {
@@ -1065,6 +7550,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat07:
+; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L119$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat08(i64 %i) nounwind {
@@ -1074,6 +7610,63 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat08:
+; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L120$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam00(i64 %i) nounwind {
@@ -1082,6 +7675,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam00:
+; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L121$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxm00(i64 %i) nounwind {
@@ -1090,6 +7736,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm00:
+; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L122$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam01(i64 %i) nounwind {
@@ -1098,6 +7797,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam01:
+; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L123$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxm01(i64 %i) nounwind {
@@ -1106,6 +7858,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm01:
+; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L124$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam02(i64 %i) nounwind {
@@ -1115,6 +7920,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam02:
+; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L125$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam03(i64 %i) nounwind {
@@ -1123,6 +7991,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam03:
+; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam04(i64 %i) nounwind {
@@ -1131,6 +8050,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam04:
+; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L127$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam05(i64 %i) nounwind {
@@ -1140,6 +8110,64 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam05:
+; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L128$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam06(i64 %i) nounwind {
@@ -1148,6 +8176,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam06:
+; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam07(i64 %i) nounwind {
@@ -1156,6 +8235,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam07:
+; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L130$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam08(i64 %i) nounwind {
@@ -1165,6 +8295,63 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam08:
+; DARWIN-32-PIC: 	call	L131$pb
+; DARWIN-32-PIC-NEXT: L131$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L131$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @lcallee() nounwind {
@@ -1177,6 +8364,123 @@ entry:
 	tail call void @x() nounwind
 	tail call void @x() nounwind
 	ret void
+; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @x()
@@ -1191,6 +8495,123 @@ entry:
 	tail call void @y() nounwind
 	tail call void @y() nounwind
 	ret void
+; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @y()
@@ -1198,6 +8619,48 @@ declare void @y()
 define void ()* @address() nounwind {
 entry:
 	ret void ()* @callee
+; LINUX-64-STATIC: address:
+; LINUX-64-STATIC: movl    $callee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: address:
+; LINUX-32-STATIC: 	movl	$callee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: address:
+; LINUX-32-PIC: 	movl	$callee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: address:
+; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _address:
+; DARWIN-32-STATIC: 	movl	$_callee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _address:
+; DARWIN-32-DYNAMIC: 	movl	L_callee$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _address:
+; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _address:
+; DARWIN-64-STATIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _address:
+; DARWIN-64-DYNAMIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _address:
+; DARWIN-64-PIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @callee()
@@ -1205,11 +8668,95 @@ declare void @callee()
 define void ()* @laddress() nounwind {
 entry:
 	ret void ()* @lcallee
+; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC: movl    $lcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC: 	movl	$lcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: laddress:
+; LINUX-32-PIC: 	movl	$lcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: laddress:
+; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _laddress:
+; DARWIN-32-STATIC: 	movl	$_lcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _laddress:
+; DARWIN-32-DYNAMIC: 	movl	$_lcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _laddress:
+; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _laddress:
+; DARWIN-64-STATIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _laddress:
+; DARWIN-64-DYNAMIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _laddress:
+; DARWIN-64-PIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void ()* @daddress() nounwind {
 entry:
 	ret void ()* @dcallee
+; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC: movl    $dcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC: 	movl	$dcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: daddress:
+; LINUX-32-PIC: 	movl	$dcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: daddress:
+; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _daddress:
+; DARWIN-32-STATIC: 	movl	$_dcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _daddress:
+; DARWIN-32-DYNAMIC: 	movl	$_dcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _daddress:
+; DARWIN-32-PIC: 	call	L136$pb
+; DARWIN-32-PIC-NEXT: L136$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L136$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _daddress:
+; DARWIN-64-STATIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _daddress:
+; DARWIN-64-DYNAMIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _daddress:
+; DARWIN-64-PIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @caller() nounwind {
@@ -1217,6 +8764,73 @@ entry:
 	tail call void @callee() nounwind
 	tail call void @callee() nounwind
 	ret void
+; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: caller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: caller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _caller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _caller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _caller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _caller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _caller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _caller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dcaller() nounwind {
@@ -1224,6 +8838,73 @@ entry:
 	tail call void @dcallee() nounwind
 	tail call void @dcallee() nounwind
 	ret void
+; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @lcaller() nounwind {
@@ -1231,24 +8912,262 @@ entry:
 	tail call void @lcallee() nounwind
 	tail call void @lcallee() nounwind
 	ret void
+; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @tailcaller() nounwind {
 entry:
 	tail call void @callee() nounwind
 	ret void
+; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _tailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _tailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _tailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _tailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _tailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _tailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dtailcaller() nounwind {
 entry:
 	tail call void @dcallee() nounwind
 	ret void
+; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dtailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dtailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dtailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dtailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dtailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dtailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ltailcaller() nounwind {
 entry:
 	tail call void @lcallee() nounwind
 	ret void
+; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ltailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ltailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ltailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ltailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ltailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ltailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @icaller() nounwind {
@@ -1258,6 +9177,86 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: icaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: icaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _icaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _icaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _icaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L143$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _icaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _icaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _icaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dicaller() nounwind {
@@ -1267,6 +9266,79 @@ entry:
 	%1 = load void ()** @difunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dicaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dicaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dicaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dicaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dicaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dicaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @licaller() nounwind {
@@ -1276,6 +9348,78 @@ entry:
 	%1 = load void ()** @lifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: licaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: licaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _licaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _licaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _licaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _licaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _licaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _licaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @itailcaller() nounwind {
@@ -1285,6 +9429,86 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _itailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _itailcaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _itailcaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L146$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _itailcaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _itailcaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _itailcaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ditailcaller() nounwind {
@@ -1292,6 +9516,66 @@ entry:
 	%0 = load void ()** @difunc, align 8
 	tail call void %0() nounwind
 	ret void
+; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	call	*(%rax)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ditailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ditailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ditailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ditailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ditailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ditailcaller:
+; DARWIN-64-PIC: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @litailcaller() nounwind {
@@ -1299,4 +9583,64 @@ entry:
 	%0 = load void ()** @lifunc, align 8
 	tail call void %0() nounwind
 	ret void
+; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _litailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _litailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _litailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L148$pb
+; DARWIN-32-PIC-NEXT: L148$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L148$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _litailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _litailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _litailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/add-trick32.ll b/test/CodeGen/X86/add-trick32.ll
index 42909b4b5874..e86045db0abb 100644
--- a/test/CodeGen/X86/add-trick32.ll
+++ b/test/CodeGen/X86/add-trick32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep add %t
 ; RUN: grep subl %t | count 1
 
diff --git a/test/CodeGen/X86/add-trick64.ll b/test/CodeGen/X86/add-trick64.ll
index 5466d9d441b1..2f1fceea5ea4 100644
--- a/test/CodeGen/X86/add-trick64.ll
+++ b/test/CodeGen/X86/add-trick64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep add %t
 ; RUN: grep subq %t | count 2
 
diff --git a/test/CodeGen/X86/add-with-overflow.ll b/test/CodeGen/X86/add-with-overflow.ll
index d015cebbbdf2..0f705dc02088 100644
--- a/test/CodeGen/X86/add-with-overflow.ll
+++ b/test/CodeGen/X86/add-with-overflow.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep {jb} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {jo} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {jb} | count 2
+; RUN: llc < %s -march=x86 | grep {jo} | count 2
+; RUN: llc < %s -march=x86 | grep {jb} | count 2
+; RUN: llc < %s -march=x86 -O0 | grep {jo} | count 2
+; RUN: llc < %s -march=x86 -O0 | grep {jb} | count 2
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 3aadd05d05e9..0b26859b04c7 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t -f
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
 ; RUN: grep set %t   | count 7
 ; RUN: grep globl %t | count 6
 ; RUN: grep weak %t  | count 1
diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll
index b2dc77d8be12..c0f3a81c4d67 100644
--- a/test/CodeGen/X86/aligned-comm.ll
+++ b/test/CodeGen/X86/aligned-comm.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep {array,16512,7}
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep {array,16512,7}
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin8 | not grep {7}
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin8 | not grep {7}
 
 ; Darwin 9+ should get alignment on common symbols.  Darwin8 does 
 ; not support this.
diff --git a/test/CodeGen/X86/all-ones-vector.ll b/test/CodeGen/X86/all-ones-vector.ll
index 01c0e36ea244..10fecadaa023 100644
--- a/test/CodeGen/X86/all-ones-vector.ll
+++ b/test/CodeGen/X86/all-ones-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
 
 define <4 x i32> @ioo() nounwind {
         ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 0bd97c23e87b..f45e9b84b264 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 3501047abc16..7733b8a5baaa 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep and | count 1
+; RUN: llc < %s -march=x86 | grep and | count 1
 
 ; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1
 ; in this case.
diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
index bdc845448f5f..b5ac23b24128 100644
--- a/test/CodeGen/X86/and-su.ll
+++ b/test/CodeGen/X86/and-su.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%} | count 1
+; RUN: llc < %s -march=x86 | grep {(%} | count 1
 
 ; Don't duplicate the load.
 
diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll
index e8c3cf0e71b2..0cf169eb28d8 100644
--- a/test/CodeGen/X86/anyext-uses.ll
+++ b/test/CodeGen/X86/anyext-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep mov %t | count 8
 ; RUN: not grep implicit %t
 
diff --git a/test/CodeGen/X86/anyext.ll b/test/CodeGen/X86/anyext.ll
new file mode 100644
index 000000000000..106fe83661b4
--- /dev/null
+++ b/test/CodeGen/X86/anyext.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 | grep movzbl | count 2
+
+; Use movzbl to avoid partial-register updates.
+
+define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
+  %q = trunc i32 %p to i8
+  %r = udiv i8 %q, %x
+  %s = zext i8 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
+define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
+  %q = trunc i32 %p to i16
+  %r = udiv i16 %q, %x
+  %s = zext i16 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
diff --git a/test/CodeGen/X86/arg-cast.ll b/test/CodeGen/X86/arg-cast.ll
index 2e2bc3cc8f21..c11151446bc5 100644
--- a/test/CodeGen/X86/arg-cast.ll
+++ b/test/CodeGen/X86/arg-cast.ll
@@ -1,7 +1,7 @@
 ; This should compile to movl $2147483647, %eax + andl only.
-; RUN: llvm-as < %s | llc | grep andl
-; RUN: llvm-as < %s | llc | not grep movsd
-; RUN: llvm-as < %s | llc | grep esp | not grep add
+; RUN: llc < %s | grep andl
+; RUN: llc < %s | not grep movsd
+; RUN: llc < %s | grep esp | not grep add
 ; rdar://5736574
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/asm-block-labels.ll b/test/CodeGen/X86/asm-block-labels.ll
index 284a9fb00fde..a43d43023196 100644
--- a/test/CodeGen/X86/asm-block-labels.ll
+++ b/test/CodeGen/X86/asm-block-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc
+; RUN: opt < %s -std-compile-opts | llc
 ; ModuleID = 'block12.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
index 333c7689ab4a..96da224c8521 100644
--- a/test/CodeGen/X86/asm-global-imm.ll
+++ b/test/CodeGen/X86/asm-global-imm.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {test1 \$_GV}
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {test2 _GV}
 ; PR882
 
diff --git a/test/CodeGen/X86/asm-indirect-mem.ll b/test/CodeGen/X86/asm-indirect-mem.ll
index 7f3353f6be65..c57aa995e8a8 100644
--- a/test/CodeGen/X86/asm-indirect-mem.ll
+++ b/test/CodeGen/X86/asm-indirect-mem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR2267
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-modifier-P.ll b/test/CodeGen/X86/asm-modifier-P.ll
new file mode 100644
index 000000000000..6139da8c3685
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier-P.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-32
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-32
+; RUN: llc < %s -march=x86-64 -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-64
+; RUN: llc < %s -march=x86-64 -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-64
+; PR3379
+; XFAIL: *
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@G = external global i32              ; <i32*> [#uses=1]
+
+declare void @bar(...)
+
+; extern int G;
+; void test1() {
+;  asm("frob %0 x" : : "m"(G));
+;  asm("frob %P0 x" : : "m"(G));
+;}
+
+define void @test1() nounwind {
+entry:
+; P suffix removes (rip) in -static 64-bit mode.
+
+; CHECK-PIC-64: test1:
+; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
+; CHECK-PIC-64: frob (%rax) x
+; CHECK-PIC-64: frob (%rax) x
+
+; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64: frob G(%rip) x
+; CHECK-STATIC-64: frob G x
+
+; CHECK-PIC-32: test1:
+; CHECK-PIC-32: frob G x
+; CHECK-PIC-32: frob G x
+
+; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32: frob G x
+; CHECK-STATIC-32: frob G x
+
+        call void asm "frob $0 x", "*m"(i32* @G) nounwind
+        call void asm "frob ${0:P} x", "*m"(i32* @G) nounwind
+        ret void
+}
+
+define void @test3() nounwind {
+entry:
+; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64: call bar
+; CHECK-STATIC-64: call test3
+; CHECK-STATIC-64: call $bar
+; CHECK-STATIC-64: call $test3
+
+; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32: call bar
+; CHECK-STATIC-32: call test3
+; CHECK-STATIC-32: call $bar
+; CHECK-STATIC-32: call $test3
+
+; CHECK-PIC-64: test3:
+; CHECK-PIC-64: call bar@PLT
+; CHECK-PIC-64: call test3@PLT
+; CHECK-PIC-64: call $bar
+; CHECK-PIC-64: call $test3
+
+; CHECK-PIC-32: test3:
+; CHECK-PIC-32: call bar@PLT
+; CHECK-PIC-32: call test3@PLT
+; CHECK-PIC-32: call $bar
+; CHECK-PIC-32: call $test3
+
+
+; asm(" blah %P0" : : "X"(bar));
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/asm-modifier.ll b/test/CodeGen/X86/asm-modifier.ll
new file mode 100644
index 000000000000..44f972ec7198
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'asm.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test1() nounwind {
+entry:
+; CHECK: test1:
+; CHECK: movw	%gs:6, %ax
+  %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
+  %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+define zeroext i16 @test2(i32 %address) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movw	%gs:(%eax), %ax
+  %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
+  ret i16 %asmtmp
+}
+
+@n = global i32 42                                ; <i32*> [#uses=3]
+@y = common global i32 0                          ; <i32*> [#uses=3]
+
+define void @test3() nounwind {
+entry:
+; CHECK: test3:
+; CHECK: movl _n, %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
+  ret void
+}
+
+define void @test4() nounwind {
+entry:
+; CHECK: test4:
+; CHECK: movl	L_y$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 000000000000..d00f8e861c21
--- /dev/null
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: subw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 6871a08b29e6..3ef1887083d0 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t1 -f
+; RUN: llc < %s -march=x86 -o %t1
 ; RUN: grep "lock" %t1 | count 17
 ; RUN: grep "xaddl" %t1 | count 4 
 ; RUN: grep "cmpxchgl"  %t1 | count 13 
diff --git a/test/CodeGen/X86/attribute-sections.ll b/test/CodeGen/X86/attribute-sections.ll
new file mode 100644
index 000000000000..30353346b5c9
--- /dev/null
+++ b/test/CodeGen/X86/attribute-sections.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+declare i32 @foo()
+@G0 = global i32 ()* @foo, section ".init_array"
+
+; LINUX:  .section  .init_array,"aw"
+; LINUX:  .globl G0
+
+@G1 = global i32 ()* @foo, section ".fini_array"
+
+; LINUX:  .section  .fini_array,"aw"
+; LINUX:  .globl G1
+
+@G2 = global i32 ()* @foo, section ".preinit_array"
+
+; LINUX:  .section .preinit_array,"aw"
+; LINUX:  .globl G2
+
diff --git a/test/CodeGen/X86/avoid-lea-scale2.ll b/test/CodeGen/X86/avoid-lea-scale2.ll
new file mode 100644
index 000000000000..8003de262d2c
--- /dev/null
+++ b/test/CodeGen/X86/avoid-lea-scale2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep {leal.*-2(\[%\]rdi,\[%\]rdi)}
+
+define i32 @foo(i32 %x) nounwind readnone {
+  %t0 = shl i32 %x, 1
+  %t1 = add i32 %t0, -2
+  ret i32 %t1
+}
+
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
index 9f0aeb32c417..03e69e7a1a49 100644
--- a/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep align | count 3
+; RUN: llc < %s -march=x86 | grep align | count 3
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index dfc58181d904..3e68f9486cfa 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep align | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep align | count 1
 
 @A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/bitcast-int-to-vector.ll b/test/CodeGen/X86/bitcast-int-to-vector.ll
index 370bec09848f..4c25979dcd5e 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @foo(i64 %a)
 {
diff --git a/test/CodeGen/X86/bitcast.ll b/test/CodeGen/X86/bitcast.ll
index f575409f2149..c34c6753bfed 100644
--- a/test/CodeGen/X86/bitcast.ll
+++ b/test/CodeGen/X86/bitcast.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 ; PR1033
 
 define i64 @test1(double %t) {
diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
index 3e2693157802..48922b5f5a13 100644
--- a/test/CodeGen/X86/bitcast2.ll
+++ b/test/CodeGen/X86/bitcast2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep rsp
+; RUN: llc < %s -march=x86-64 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 | not grep rsp
 
 define i64 @test1(double %A) {
    %B = bitcast double %A to i64
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index b9ce10f44198..6b245c103e20 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-post-RA-scheduler=false -break-anti-dependencies=false > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=false > %t
 ; RUN:   grep {%xmm0} %t | count 14
 ; RUN:   not grep {%xmm1} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-post-RA-scheduler=false -break-anti-dependencies > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies > %t
 ; RUN:   grep {%xmm0} %t | count 7
 ; RUN:   grep {%xmm1} %t | count 7
 
diff --git a/test/CodeGen/X86/bss_pagealigned.ll b/test/CodeGen/X86/bss_pagealigned.ll
new file mode 100644
index 000000000000..4a1049bc560d
--- /dev/null
+++ b/test/CodeGen/X86/bss_pagealigned.ll
@@ -0,0 +1,21 @@
+; RUN: llc --code-model=kernel -march=x86-64 <%s | FileCheck %s
+; PR4933
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.kmem_cache_order_objects = type { i64 }
+declare i8* @memset(i8*, i32, i64)
+define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind {
+  %pte.addr.i = alloca %struct.kmem_cache_order_objects*
+  %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096)
+; CHECK: movq    $bm_pte, %rdi
+; CHECK-NEXT: xorl    %esi, %esi
+; CHECK-NEXT: movl    $4096, %edx
+; CHECK-NEXT: call    memset
+  ret void
+}
+@bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096
+; CHECK: .section        .bss.page_aligned,"aw",@nobits
+; CHECK-NEXT: .align  4096
+; CHECK-NEXT: bm_pte:
+; CHECK-NEXT: .zero   4096
+; CHECK-NEXT: .size   bm_pte, 4096
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 91f8310361ad..5bf58fa1d505 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep APP %t
 ; RUN: grep bswapq %t | count 2
 ; RUN: grep bswapl %t | count 1
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index 592e25bae331..0a72c1c47845 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,8 +1,8 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep bswapl | count 3
-; RUN: llvm-as < %s | llc -march=x86 | grep rolw | count 1
+; RUN: llc < %s -march=x86 | grep rolw | count 1
 
 declare i16 @llvm.bswap.i16(i16)
 
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index a76242c977ce..ec447e5e9c81 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep btl | count 28
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium4 | grep btl | not grep esp
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; RUN: llc < %s -march=x86 | grep btl | count 28
+; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
 ; PR3253
 
 ; The register+memory form of the BT instruction should be usable on
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index a75214a6b084..af36e1bb8cb4 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	8(%rsp), %rax}
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rsp), %rax}
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movl	8(%esp), %edx} %t
 ; RUN: grep {movl	4(%esp), %eax} %t
 
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index f85c8ffbe4fe..71129f5f6c9b 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86    | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 707a4c5d2785..504e0bed7916 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
 
 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
                    i32, i32, i32, i32, i32, i32, i32, i32,
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 5576c361ae16..4db9d650b439 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
 
 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                    i16, i16, i16, i16, i16, i16, i16, i16,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index c6f4588dd45d..69c115b97326 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
 
 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
                    i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index 47269d21d930..b060369a182e 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add | not grep 16
+; RUN: llc < %s -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 6b64c6ce4dab..0da93bad04e1 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
+; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
 
 	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
                            <2 x i64> }
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 6e9c70dd42fe..87785bc3f3f4 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
-; RUN: llvm-as < %s | llc -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {call.*\*%rax}
+; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax}
 
 ; PR3666
 ; PR3773
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index ad9b796a85d0..7bae5cd2464d 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -disable-fp-elim | grep subl | count 1
+; RUN: llc < %s -march=x86 -disable-fp-elim | grep subl | count 1
 
         %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
         %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
index 87194d61c37a..d520a6ff13b2 100644
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ b/test/CodeGen/X86/change-compare-stride-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
-; RUN: grep {cmpl	\$4294966818,} %t
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {cmpl	\$-478,} %t
 ; RUN: not grep inc %t
 ; RUN: not grep {leal	1(} %t
 ; RUN: not grep {leal	-1(} %t
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 49b691f4a75b..a9ddbdb7f745 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep {cmpq	\$-478,} %t
 ; RUN: not grep inc %t
 ; RUN: not grep {leal	1(} %t
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index c3b3b412f2a9..3f27187d44a8 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep bsr | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep bsf
-; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 3
+; RUN: llc < %s -march=x86 | grep bsr | count 2
+; RUN: llc < %s -march=x86 | grep bsf
+; RUN: llc < %s -march=x86 | grep cmov | count 3
 
 define i32 @t1(i32 %x) nounwind  {
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
new file mode 100644
index 000000000000..f3c9a7addf83
--- /dev/null
+++ b/test/CodeGen/X86/cmov.ll
@@ -0,0 +1,157 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovae	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
+	ret i32 %.0
+}
+define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovb	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+
+; x86's 32-bit cmov doesn't clobber the high 32 bits of the destination
+; if the condition is false. An explicit zero-extend (movl) is needed
+; after the cmov.
+
+declare void @bar(i64) nounwind
+
+define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
+; CHECK: test3:
+; CHECK:      cmovne  %edi, %esi
+; CHECK-NEXT: movl    %esi, %edi
+
+  %c = trunc i64 %a to i32
+  %d = trunc i64 %b to i32
+  %e = select i1 %p, i32 %c, i32 %d
+  %f = zext i32 %e to i64
+  call void @bar(i64 %f)
+  ret void
+}
+
+
+
+; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
+; move without recomputing EFLAGS, because the expansion of the conditional
+; move with control flow may clobber EFLAGS (e.g., with xor, to set the
+; register to zero).
+
+; The test is a little awkward; the important part is that there's a test before the
+; setne.
+; PR4814
+
+
+@g_3 = external global i8                         ; <i8*> [#uses=1]
+@g_96 = external global i8                        ; <i8*> [#uses=2]
+@g_100 = external global i8                       ; <i8*> [#uses=2]
+@_2E_str = external constant [15 x i8], align 1   ; <[15 x i8]*> [#uses=1]
+
+define i32 @test4() nounwind {
+entry:
+  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
+  %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
+  %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
+  %tmp.not.i = xor i32 %tmp.i, 1                  ; <i32> [#uses=1]
+  %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
+  %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
+  %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
+  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
+  br i1 %3, label %func_4.exit.i, label %bb.i.i.i
+
+bb.i.i.i:                                         ; preds = %entry
+  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_4.exit.i
+
+; CHECK: test4:
+; CHECK: g_100
+; CHECK: testb
+; CHECK: testb %al, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb
+
+func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
+  %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
+  %brmerge.i = or i1 %3, %.not.i                  ; <i1> [#uses=1]
+  %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 ; <i8> [#uses=1]
+  br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
+
+bb.i.i:                                           ; preds = %func_4.exit.i
+  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_1.exit
+
+func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
+  %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
+  store i8 %g_96.tmp.0.i, i8* @g_96
+  %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test5(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test5:
+; CHECK: 	setg	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	orl	$-2, %eax
+; CHECK:	ret
+
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+define i32 @test6(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test6:
+; CHECK: 	setl	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	leal	4(%rax,%rax,8), %eax
+; CHECK:        ret
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+
+; Don't try to use a 16-bit conditional move to do an 8-bit select,
+; because it isn't worth it. Just use a branch instead.
+define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
+; CHECK: test7:
+; CHECK:     testb	$1, %dil
+; CHECK-NEXT:     jne	LBB
+
+  %d = select i1 %c, i8 %a, i8 %b
+  ret i8 %d
+}
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
index 91c8a87ea541..898c09b82f5e 100644
--- a/test/CodeGen/X86/cmp-test.ll
+++ b/test/CodeGen/X86/cmp-test.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep test | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep test | count 1
 
 define i32 @f1(i32 %X, i32* %y) {
 	%tmp = load i32* %y		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
index f66f90c0b0f3..de893745bae9 100644
--- a/test/CodeGen/X86/cmp0.ll
+++ b/test/CodeGen/X86/cmp0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | grep -v cmp
 
 define i64 @foo(i64 %x) {
   %t = icmp eq i64 %x, 0
diff --git a/test/CodeGen/X86/cmp1.ll b/test/CodeGen/X86/cmp1.ll
index 241618c531ab..d4aa399ae95d 100644
--- a/test/CodeGen/X86/cmp1.ll
+++ b/test/CodeGen/X86/cmp1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | grep -v cmp
 
 define i64 @foo(i64 %x) {
   %t = icmp slt i64 %x, 1
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
index 2c046ffc0841..9a8e00c8bca0 100644
--- a/test/CodeGen/X86/cmp2.ll
+++ b/test/CodeGen/X86/cmp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
 
 define i32 @test(double %A) nounwind  {
  entry:
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
new file mode 100644
index 000000000000..0fe4e56c97ca
--- /dev/null
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | grep {movl	%esp, %eax}
+; PR4572
+
+; Don't coalesce with %esp if it would end up putting %esp in
+; the index position of an address, because that can't be
+; encoded on x86. It would actually be slightly better to
+; swap the address operands though, since there's no scale.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-mingw32"
+	%"struct.std::valarray<unsigned int>" = type { i32, i32* }
+
+define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray<unsigned int>"* nocapture %__l, %"struct.std::valarray<unsigned int>"* nocapture %__s, %"struct.std::valarray<unsigned int>"* nocapture %__i) nounwind {
+entry:
+	%0 = alloca i32, i32 undef, align 4		; <i32*> [#uses=1]
+	br i1 undef, label %return, label %bb4
+
+bb4:		; preds = %bb7.backedge, %entry
+	%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ]		; <i32> [#uses=2]
+	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
+	%scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	br i1 undef, label %bb7.backedge, label %bb5
+
+bb5:		; preds = %bb4
+	store i32 0, i32* %scevgep25, align 4
+	br label %bb7.backedge
+
+bb7.backedge:		; preds = %bb5, %bb4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb4
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 99394240c7c8..8aa0bfdd51fb 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 ; PR1877
 
 @NNTOT = weak global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index c67e0f582496..5d10bbad09ef 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep paddw | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86-64 | grep paddw | count 2
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 ; The 2-addr pass should ensure that identical code is produced for these functions
 ; no extra copy should be generated.
diff --git a/test/CodeGen/X86/coalescer-commute3.ll b/test/CodeGen/X86/coalescer-commute3.ll
index 7d4a80ab70f2..e5bd448a4158 100644
--- a/test/CodeGen/X86/coalescer-commute3.ll
+++ b/test/CodeGen/X86/coalescer-commute3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
index 9628f93e7916..02a97813fdcd 100644
--- a/test/CodeGen/X86/coalescer-commute4.ll
+++ b/test/CodeGen/X86/coalescer-commute4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 ; PR1501
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind  {
diff --git a/test/CodeGen/X86/coalescer-commute5.ll b/test/CodeGen/X86/coalescer-commute5.ll
index c730ea76e983..510d115f4ad7 100644
--- a/test/CodeGen/X86/coalescer-commute5.ll
+++ b/test/CodeGen/X86/coalescer-commute5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 
 define i32 @t() {
 entry:
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
new file mode 100644
index 000000000000..7d6f399930fd
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | not grep movaps
+; rdar://6509240
+
+	type { %struct.TValue }		; type %0
+	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
+	%struct.GCObject = type { %struct.lua_State }
+	%struct.L_Umaxalign = type { double }
+	%struct.Mbuffer = type { i8*, i32, i32 }
+	%struct.Node = type { %struct.TValue, %struct.TKey }
+	%struct.TKey = type { %1 }
+	%struct.TString = type { %struct.anon }
+	%struct.TValue = type { %struct.L_Umaxalign, i32 }
+	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
+	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
+	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
+	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
+	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
+	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
+	%struct.stringtable = type { %struct.GCObject**, i32, i32 }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
+entry:
+	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1]
+	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
+	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
+	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %2, double* %5, align 4
+	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 3, i32* %6, align 4
+	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
+	store %struct.TValue* %8, %struct.TValue** %3, align 4
+	ret i32 1
+}
+
+declare i32 @"\01_clock$UNIX2003"()
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index ab029f45658c..4db520fee747 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep xor | count 3
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3
 
 @val = internal global i64 0		; <i64*> [#uses=1]
 @"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/code_placement.ll b/test/CodeGen/X86/code_placement.ll
index 55167950d1a0..97471835a4c9 100644
--- a/test/CodeGen/X86/code_placement.ll
+++ b/test/CodeGen/X86/code_placement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | %prcontext jmp 1 | grep align
+; RUN: llc -march=x86 < %s | FileCheck %s
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
@@ -12,6 +12,8 @@ entry:
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
+; CHECK: jmp
+; CHECK-NEXT: align
 
 bb:		; preds = %bb1, %entry
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
index ae3eb5f6d68d..2a8ead8c4909 100644
--- a/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR4297
 
 target datalayout =
diff --git a/test/CodeGen/X86/codemodel.ll b/test/CodeGen/X86/codemodel.ll
new file mode 100644
index 000000000000..b6ca1cedc22e
--- /dev/null
+++ b/test/CodeGen/X86/codemodel.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -code-model=small  | FileCheck -check-prefix CHECK-SMALL %s
+; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@data = external global [0 x i32]		; <[0 x i32]*> [#uses=5]
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo:
+; CHECK-SMALL:   movl data(%rip), %eax
+; CHECK-KERNEL: foo:
+; CHECK-KERNEL:  movl data, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo2() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo2:
+; CHECK-SMALL:   movl data+40(%rip), %eax
+; CHECK-KERNEL: foo2:
+; CHECK-KERNEL:  movl data+40, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo3() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo3:
+; CHECK-SMALL:   movl data-40(%rip), %eax
+; CHECK-KERNEL: foo3:
+; CHECK-KERNEL:  movq $-40, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo4() nounwind readonly {
+entry:
+; FIXME: We really can use movabsl here!
+; CHECK-SMALL:  foo4:
+; CHECK-SMALL:   movl $16777216, %eax
+; CHECK-SMALL:   movl data(%rax), %eax
+; CHECK-KERNEL: foo4:
+; CHECK-KERNEL:  movl data+16777216, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo1() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo1:
+; CHECK-SMALL:   movl data+16777212(%rip), %eax
+; CHECK-KERNEL: foo1:
+; CHECK-KERNEL:  movl data+16777212, %eax
+        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        ret i32 %0
+}
+define i32 @foo5() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo5:
+; CHECK-SMALL:   movl data-16777216(%rip), %eax
+; CHECK-KERNEL: foo5:
+; CHECK-KERNEL:  movq $-16777216, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/test/CodeGen/X86/combine-lds.ll b/test/CodeGen/X86/combine-lds.ll
index a78a042d7ec3..b49d081a64f1 100644
--- a/test/CodeGen/X86/combine-lds.ll
+++ b/test/CodeGen/X86/combine-lds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fldl | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fldl | count 1
 
 define double @doload64(i64 %x) nounwind  {
 	%tmp717 = bitcast i64 %x to double
diff --git a/test/CodeGen/X86/combiner-aa-0.ll b/test/CodeGen/X86/combiner-aa-0.ll
new file mode 100644
index 000000000000..a61ef7acd13c
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-0.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct.Hash_Key = type { [4 x i32], i32 }
+@g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
+
+define void @foo() nounwind {
+	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
+	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
+	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
+	store i32 %t4, i32* null, align 4
+	%t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
+	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t7 = shl i32 1, undef		; <i32> [#uses=1]
+	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
+	store i32 %t8, i32* %t5, align 4
+	unreachable
+}
diff --git a/test/CodeGen/X86/combiner-aa-1.ll b/test/CodeGen/X86/combiner-aa-1.ll
new file mode 100644
index 000000000000..58a7129b6005
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
+; PR4880
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%struct.alst_node = type { %struct.node }
+%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
+%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
+%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
+%struct.node = type { i32 (...)**, %struct.node* }
+
+define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
+comb_entry:
+  %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
+  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  store %struct.node* undef, %struct.node** %.SV59
+  %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
+  %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
index 12c0e03f6f48..d810cb1eff78 100644
--- a/test/CodeGen/X86/commute-intrinsic.ll
+++ b/test/CodeGen/X86/commute-intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
 
 @a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 224f5d5e5c54..56ea26b658d8 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,7 +2,7 @@
 ; insertion of register-register copies.
 
 ; Make sure there are only 3 mov's for each testcase
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {\\\<mov\\\>} | count 6
 
 
diff --git a/test/CodeGen/X86/compare-add.ll b/test/CodeGen/X86/compare-add.ll
index aa69a31a48fc..358ee59c95a5 100644
--- a/test/CodeGen/X86/compare-add.ll
+++ b/test/CodeGen/X86/compare-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep add
+; RUN: llc < %s -march=x86 | not grep add
 
 define i1 @X(i32 %X) {
         %Y = add i32 %X, 14             ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
new file mode 100644
index 000000000000..2be90c9764c2
--- /dev/null
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
+; and negative infinity, because those are more efficient on x86.
+
+; CHECK: oeq_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: oeq_neg_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_neg_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_neg_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_neg_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
diff --git a/test/CodeGen/X86/compare_folding.ll b/test/CodeGen/X86/compare_folding.ll
index c6cda4a5b979..84c152d77215 100644
--- a/test/CodeGen/X86/compare_folding.ll
+++ b/test/CodeGen/X86/compare_folding.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | \
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
 ; RUN:   grep movsd | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | \
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
 ; RUN:   grep ucomisd
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
new file mode 100644
index 000000000000..be8de5e09f8a
--- /dev/null
+++ b/test/CodeGen/X86/compiler_used.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
+; We should have a .no_dead_strip directive for Z but not for X/Y.
+
+@X = internal global i8 4
+@Y = internal global i32 123
+@Z = internal global i8 4
+
+@llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
+@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 05adb50b294f..7e7acaa98a76 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep mov | count 2
 
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
index 6e3156beb0f6..ca8cc1464c77 100644
--- a/test/CodeGen/X86/const-select.ll
+++ b/test/CodeGen/X86/const-select.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-; RUN: llvm-as < %s | llc | grep {LCPI1_0(,%eax,4)}
+; RUN: llc < %s | grep {LCPI1_0(,%eax,4)}
 define float @f(i32 %x) nounwind readnone {
 entry:
 	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
@@ -10,7 +10,7 @@ entry:
 	ret float %iftmp.0.0
 }
 
-; RUN: llvm-as < %s | llc | grep {movsbl.*(%e.x,%e.x,4), %eax}
+; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
 define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
 entry:
 	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 80be8545d59c..05388f9b2a96 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep LCPI | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
 
 declare float @qux(float %y)
 
diff --git a/test/CodeGen/X86/constpool.ll b/test/CodeGen/X86/constpool.ll
index 60d51e56c3b4..2aac486323a8 100644
--- a/test/CodeGen/X86/constpool.ll
+++ b/test/CodeGen/X86/constpool.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc 
-; RUN: llvm-as < %s | llc -fast-isel
-; RUN: llvm-as < %s | llc -march=x86-64
-; RUN: llvm-as < %s | llc -fast-isel -march=x86-64
+; RUN: llc < %s 
+; RUN: llc < %s -fast-isel
+; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -fast-isel -march=x86-64
 ; PR4466
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 579e30ceadd0..2b4b83259b82 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -o %t -f -stats -info-output-file - | \
+; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
 ; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 5
 ; RUN: grep {leal	1(\%rsi),} %t
 
diff --git a/test/CodeGen/X86/copysign-zero.ll b/test/CodeGen/X86/copysign-zero.ll
index a08fa6519d71..47522d808058 100644
--- a/test/CodeGen/X86/copysign-zero.ll
+++ b/test/CodeGen/X86/copysign-zero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | not grep orpd
-; RUN: llvm-as < %s | llc | grep andpd | count 1
+; RUN: llc < %s | not grep orpd
+; RUN: llc < %s | grep andpd | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
index 4539ef623de5..4fe554de75a0 100644
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ b/test/CodeGen/X86/critical-edge-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
+; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
 
 	%CC = type { %Register }
 	%II = type { %"struct.XX::II::$_74" }
diff --git a/test/CodeGen/X86/cstring.ll b/test/CodeGen/X86/cstring.ll
index 27d6181db8bc..5b5a7662ffff 100644
--- a/test/CodeGen/X86/cstring.ll
+++ b/test/CodeGen/X86/cstring.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep comm
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm
 ; rdar://6479858
 
 @str1 = internal constant [1 x i8] zeroinitializer
diff --git a/test/CodeGen/X86/dag-rauw-cse.ll b/test/CodeGen/X86/dag-rauw-cse.ll
index ba84711c03eb..edcfeb78a4d0 100644
--- a/test/CodeGen/X86/dag-rauw-cse.ll
+++ b/test/CodeGen/X86/dag-rauw-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {orl	\$1}
+; RUN: llc < %s -march=x86 | grep {orl	\$1}
 ; PR3018
 
 define i32 @test(i32 %A) nounwind {
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index b96fdfc03c68..c0ee2ac3386b 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
 ; RUN: grep unpcklpd %t | count 1
 ; RUN: grep movapd %t | count 1
 ; RUN: grep movaps %t | count 1
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index a673ebf47de5..c3c7990d19eb 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
 
 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index c292140e108d..a3c1e6f0c554 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep __bzero
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
diff --git a/test/CodeGen/X86/darwin-no-dead-strip.ll b/test/CodeGen/X86/darwin-no-dead-strip.ll
index 63325b7a6ae0..452d1f8ce392 100644
--- a/test/CodeGen/X86/darwin-no-dead-strip.ll
+++ b/test/CodeGen/X86/darwin-no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep no_dead_strip
+; RUN: llc < %s | grep no_dead_strip
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/CodeGen/X86/darwin-quote.ll b/test/CodeGen/X86/darwin-quote.ll
new file mode 100644
index 000000000000..8fddc118f61e
--- /dev/null
+++ b/test/CodeGen/X86/darwin-quote.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  | FileCheck %s
+
+
+define internal i64 @baz() nounwind {
+  %tmp = load i64* @"+x"
+  ret i64 %tmp
+; CHECK: _baz:
+; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
+}
+
+
+@"+x" = external global i64
+
+; CHECK: "L_+x$non_lazy_ptr":
+; CHECK:	.indirect_symbol "_+x"
diff --git a/test/CodeGen/X86/darwin-stub.ll b/test/CodeGen/X86/darwin-stub.ll
index 79eb31ac0fd4..b4d2e1aa566d 100644
--- a/test/CodeGen/X86/darwin-stub.ll
+++ b/test/CodeGen/X86/darwin-stub.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin  |     grep stub
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | not grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin  |     grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | not grep stub
 
 @"\01LC" = internal constant [13 x i8] c"Hello World!\00"		; <[13 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
index aa690f7f4857..f0ada41338b2 100644
--- a/test/CodeGen/X86/div_const.ll
+++ b/test/CodeGen/X86/div_const.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 365384439
+; RUN: llc < %s -march=x86 | grep 365384439
 
 define i32 @f9188_mul365384439_shift27(i32 %A) {
         %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/divrem.ll b/test/CodeGen/X86/divrem.ll
index a611eddc7682..e86b52fe82d5 100644
--- a/test/CodeGen/X86/divrem.ll
+++ b/test/CodeGen/X86/divrem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep div | count 8
 
 define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
 	%r = sdiv i64 %x, %y
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
new file mode 100644
index 000000000000..c634c7e1fd42
--- /dev/null
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s
+
+declare dllimport void @foo()
+
+define void @bar() nounwind {
+; CHECK: call	*__imp__foo
+  call void @foo()
+  ret void
+}
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 885700ef82a5..3b263194a5a8 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -1,12 +1,13 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$bar)} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$qux)} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$hen)} | count 1
+; RUN: llc < %s -march=x86 -mtriple=i386-linux | FileCheck %s
 ; PR1339
 
 @"$bar" = global i32 zeroinitializer
 @"$qux" = external global i32
 
 define i32 @"$foo"() nounwind {
+; CHECK: movl	($bar),
+; CHECK: addl	($qux),
+; CHECK: call	($hen)
   %m = load i32* @"$bar"
   %n = load i32* @"$qux"
   %t = add i32 %m, %n
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index 049a32cea717..1df092018dd8 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 | not egrep {\\\$4294967289|-7\\(}
-; RUN: llvm-as < %s | llc -march=x86 | egrep {\\\$4294967280|-16\\(}
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {\\-16}
+; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -march=x86-64 | grep {\\-16}
 
-define void @t() {
+define void @t() nounwind {
 A:
 	br label %entry
 
diff --git a/test/CodeGen/X86/empty-struct-return-type.ll b/test/CodeGen/X86/empty-struct-return-type.ll
new file mode 100644
index 000000000000..34cd5d925052
--- /dev/null
+++ b/test/CodeGen/X86/empty-struct-return-type.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep call
+; PR4688
+
+; Return types can be empty structs, which can be awkward.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) {
+entry:
+	%0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef)		; <{ }> [#uses=0]
+        ret void
+}
+
+declare hidden { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** nocapture) nounwind
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 5a378e19c49a..52dcb61d87f8 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/extend.ll b/test/CodeGen/X86/extend.ll
index a54b6f112d88..9553b1b578b1 100644
--- a/test/CodeGen/X86/extend.ll
+++ b/test/CodeGen/X86/extend.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
 
 @G1 = internal global i8 0              ; <i8*> [#uses=1]
 @G2 = internal global i8 0              ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/extern_weak.ll b/test/CodeGen/X86/extern_weak.ll
index 0cc56302b70f..01e32aae08ca 100644
--- a/test/CodeGen/X86/extern_weak.ll
+++ b/test/CodeGen/X86/extern_weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep weak_reference | count 2
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep weak_reference | count 2
 
 @Y = global i32 (i8*)* @X               ; <i32 (i8*)**> [#uses=0]
 
diff --git a/test/CodeGen/X86/extmul128.ll b/test/CodeGen/X86/extmul128.ll
index df487659edb5..9b598299e536 100644
--- a/test/CodeGen/X86/extmul128.ll
+++ b/test/CodeGen/X86/extmul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mul | count 2
+; RUN: llc < %s -march=x86-64 | grep mul | count 2
 
 define i128 @i64_sext_i128(i64 %a, i64 %b) {
   %aa = sext i64 %a to i128
diff --git a/test/CodeGen/X86/extmul64.ll b/test/CodeGen/X86/extmul64.ll
index 635da48133b6..9e20ded1111f 100644
--- a/test/CodeGen/X86/extmul64.ll
+++ b/test/CodeGen/X86/extmul64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 2
+; RUN: llc < %s -march=x86 | grep mul | count 2
 
 define i64 @i32_sext_i64(i32 %a, i32 %b) {
   %aa = sext i32 %a to i64
diff --git a/test/CodeGen/X86/extract-combine.ll b/test/CodeGen/X86/extract-combine.ll
index 842ec24e0ec8..2040e872f7fe 100644
--- a/test/CodeGen/X86/extract-combine.ll
+++ b/test/CodeGen/X86/extract-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -o %t
 ; RUN: not grep unpcklps %t
 
 define i32 @foo() nounwind {
diff --git a/test/CodeGen/X86/extract-extract.ll b/test/CodeGen/X86/extract-extract.ll
new file mode 100644
index 000000000000..ad79ab9ae20f
--- /dev/null
+++ b/test/CodeGen/X86/extract-extract.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 >/dev/null
+; PR4699
+
+; Handle this extractvalue-of-extractvalue case without getting in
+; trouble with CSE in DAGCombine.
+
+        %cc = type { %crd }
+        %cr = type { i32 }
+        %crd = type { i64, %cr* }
+        %pp = type { %cc }
+
+define fastcc void @foo(%pp* nocapture byval %p_arg) {
+entry:
+        %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
+        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
+        %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
+        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
+        store %cr* undef, %cr** undef
+        ret void
+}
+
+
diff --git a/test/CodeGen/X86/extractelement-from-arg.ll b/test/CodeGen/X86/extractelement-from-arg.ll
index 44704b6adb39..4ea37f0c46d3 100644
--- a/test/CodeGen/X86/extractelement-from-arg.ll
+++ b/test/CodeGen/X86/extractelement-from-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
 
 define void @test(float* %R, <4 x float> %X) nounwind {
 	%tmp = extractelement <4 x float> %X, i32 3
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index 601690ef7cab..ee57d9b76295 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as %s -o - | llc -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
-; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
 
 define i32 @t(<2 x i64>* %val) nounwind  {
 	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
index b00c8e49e1c8..12a2ef30e17e 100644
--- a/test/CodeGen/X86/extractelement-shuffle.ll
+++ b/test/CodeGen/X86/extractelement-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
 ; following program.  The bug is DAGCombine assumes that the bit convert
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
index 484d2c4e5e10..14778f097ef5 100644
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn > %t
+; RUN: llc < %s -march=x86 -mcpu=penryn > %t
 ; RUN: not grep movd %t
 ; RUN: grep {movss	%xmm} %t | count 1
 ; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 7ac8e048edbc..54947c394b5e 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,8 +1,7 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
 ; RUN:   count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
 ; RUN:   grep fabs\$ | count 3
 
 declare float @fabsf(float)
diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll
index 941f7087f624..5e88ed7f00d6 100644
--- a/test/CodeGen/X86/fast-cc-callee-pops.ll
+++ b/test/CodeGen/X86/fast-cc-callee-pops.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
 
 ; Check that a fastcc function pops its stack variables before returning.
 
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index 3f3aa468675b..e15182120094 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
index c8621a7780bd..fe96c0c8be2a 100644
--- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {mov	EDX, 1}
 ; check that fastcc is passing stuff in regs.
 
diff --git a/test/CodeGen/X86/fast-isel-bail.ll b/test/CodeGen/X86/fast-isel-bail.ll
index fb4f37ef90be..9072c5c7b593 100644
--- a/test/CodeGen/X86/fast-isel-bail.ll
+++ b/test/CodeGen/X86/fast-isel-bail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0
+; RUN: llc < %s -march=x86 -O0
 
 ; This file is for regression tests for cases where FastISel needs
 ; to gracefully bail out and let SelectionDAGISel take over.
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
new file mode 100644
index 000000000000..f2696ce814da
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx | FileCheck %s
+; PR4684
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.8"
+
+declare void @func2(<1 x i64>)
+
+define void @func1() nounwind {
+
+; This isn't spectacular, but it's MMX code at -O0...
+; CHECK: movl $2, %eax
+; CHECK: movd %rax, %mm0
+; CHECK: movd %mm0, %rdi
+
+        call void @func2(<1 x i64> <i64 2>)
+        ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 9945746807cf..5fcdbbbe53b2 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -march=x86 | grep and
+; RUN: llc < %s -fast-isel -march=x86 | grep and
 
 define i32 @t() nounwind {
 tak:
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index ac2595a7461d..84d10f32c294 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel | grep {LCPI1_0(%rip)}
+; RUN: llc < %s -fast-isel | grep {LCPI1_0(%rip)}
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
new file mode 100644
index 000000000000..5ffd48bce655
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+
+; CHECK: doo:
+; CHECK: xor
+define double @doo(double %x) nounwind {
+  %y = fsub double -0.0, %x
+  ret double %y
+}
+
+; CHECK: foo:
+; CHECK: xor
+define float @foo(float %x) nounwind {
+  %y = fsub float -0.0, %x
+  ret float %y
+}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
new file mode 100644
index 000000000000..5b8acecc3c18
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+
+; GEP indices are interpreted as signed integers, so they
+; should be sign-extended to 64 bits on 64-bit targets.
+; PR3181
+define i32 @test1(i32 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test1:
+; X32:  	movl	(%ecx,%eax,4), %eax
+; X32:  	ret
+
+; X64: test1:
+; X64:  	movslq	%edi, %rax
+; X64:  	movl	(%rsi,%rax,4), %eax
+; X64:  	ret
+
+}
+define i32 @test2(i64 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test2:
+; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test2:
+; X64:  	movl	(%rsi,%rdi,4), %eax
+; X64:  	ret
+}
+
+
+
+; PR4984
+define i8 @test3(i8* %start) nounwind {
+entry:
+  %A = getelementptr i8* %start, i64 -2               ; <i8*> [#uses=1]
+  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  ret i8 %B
+  
+  
+; X32: test3:
+; X32:  	movl	4(%esp), %eax
+; X32:  	movb	-2(%eax), %al
+; X32:  	ret
+
+; X64: test3:
+; X64:  	movb	-2(%rdi), %al
+; X64:  	ret
+
+}
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index b2f885095ece..34f8b382522f 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel | grep {_kill@GOTPCREL(%rip)}
+; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index e1ff7921a11a..d0665783ce64 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -fast-isel | grep {andb	\$1, %}
+; RUN: llc < %s -march=x86 -fast-isel | grep {andb	\$1, %}
 
 declare i64 @bar(i64)
 
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index dfee4f2a11ea..35ec1e7115b2 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -fast-isel -mtriple=i386-apple-darwin | \
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
 ; RUN:   grep lazy_ptr, | count 2
-; RUN: llvm-as < %s | llc -fast-isel -march=x86 -relocation-model=static | \
+; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
 ; RUN:   grep lea
 
 @src = external global i32
diff --git a/test/CodeGen/X86/fast-isel-phys.ll b/test/CodeGen/X86/fast-isel-phys.ll
index 91dcca57cc2b..158ef551ce42 100644
--- a/test/CodeGen/X86/fast-isel-phys.ll
+++ b/test/CodeGen/X86/fast-isel-phys.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -fast-isel-abort -march=x86
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86
 
 define i8 @t2(i8 %a, i8 %c) nounwind {
        %tmp = shl i8 %a, %c
diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
index 7d8c9f5e002c..35f7a72a285c 100644
--- a/test/CodeGen/X86/fast-isel-shift-imm.ll
+++ b/test/CodeGen/X86/fast-isel-shift-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {sarl	\$80, %eax}
+; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %eax}
 ; PR3242
 
 define i32 @foo(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index 6f4d2026814f..c3e527c4e5b4 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -tailcallopt -march=x86 | not grep add
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
 ; PR4154
 
 ; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 4dd14e6b2163..a5e6642e09c1 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
+; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
 ; PR3654
 
 @v = thread_local global i32 0
diff --git a/test/CodeGen/X86/fast-isel-trunc.ll b/test/CodeGen/X86/fast-isel-trunc.ll
index 039f114737bb..69b26c5442e4 100644
--- a/test/CodeGen/X86/fast-isel-trunc.ll
+++ b/test/CodeGen/X86/fast-isel-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -fast-isel -fast-isel-abort
-; RUN: llvm-as < %s | llc -march=x86-64 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort
 
 define i8 @t1(i32 %x) signext nounwind  {
 	%tmp1 = trunc i32 %x to i8
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index a9a016b7d0f3..3dcd736a1404 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
 
 ; This tests very minimal fast-isel functionality.
 
@@ -64,3 +64,12 @@ define i8* @inttoptr_i32(i32 %p) nounwind {
   %t = inttoptr i32 %p to i8*
   ret i8* %t
 }
+
+define void @store_i1(i1* %p, i1 %t) nounwind {
+  store i1 %t, i1* %p
+  ret void
+}
+define i1 @load_i1(i1* %p) nounwind {
+  %t = load i1* %p
+  ret i1 %t
+}
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index d2db2795512d..2b48f5f371d9 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=mingw32 | \
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \
 ; RUN:   grep {@12}
 
 ; Check that a fastcall function gets correct mangling
diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
index 40c753ee3041..d044a2ad9e84 100644
--- a/test/CodeGen/X86/fastcc-2.ll
+++ b/test/CodeGen/X86/fastcc-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
 
 define i32 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index 94da50584c7b..52b3e57b96bc 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index 7fc111bbc299..d45741884c7d 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt=false | grep ret | not grep 4
+; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index f18f34deb190..d538264c6d7c 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -1,5 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | grep ecx | grep 0
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | grep xmm0 | grep 8
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
 
 @d = external global double		; <double*> [#uses=1]
 @c = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
index c4f9587335e7..60205305a977 100644
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
+; RUN: llc < %s -march=x86 | grep sar | count 1
+; RUN: llc < %s -march=x86-64 | not grep sar
 
 define i32 @test(i32 %f12) {
 	%tmp7.25 = lshr i32 %f12, 16		
diff --git a/test/CodeGen/X86/fildll.ll b/test/CodeGen/X86/fildll.ll
index 65944fdaee4c..c5a3765c717b 100644
--- a/test/CodeGen/X86/fildll.ll
+++ b/test/CodeGen/X86/fildll.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
 
 define fastcc double @sint64_to_fp(i64 %X) {
         %R = sitofp i64 %X to double            ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
index 73aa713de52e..03bad6594128 100644
--- a/test/CodeGen/X86/fmul-zero.ll
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -enable-unsafe-fp-math | not grep mulps
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mulps
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llc < %s -march=x86-64 | grep mulps
 
 define void @test14(<4 x float>*) nounwind {
         load <4 x float>* %0, align 1
diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
index 2828ad22efbd..5e80ea547890 100644
--- a/test/CodeGen/X86/fold-add.ll
+++ b/test/CodeGen/X86/fold-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
+; RUN: llc < %s -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.6"
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 705b79549644..9f79f7723b33 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 
 define i32 @t1(i8* %X, i32 %i) {
 entry:
diff --git a/test/CodeGen/X86/fold-call-2.ll b/test/CodeGen/X86/fold-call-2.ll
index 349f986830a0..7a2b03833ae9 100644
--- a/test/CodeGen/X86/fold-call-2.ll
+++ b/test/CodeGen/X86/fold-call-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1
 
 @f = external global void ()*		; <void ()**> [#uses=1]
 
diff --git a/test/CodeGen/X86/fold-call-3.ll b/test/CodeGen/X86/fold-call-3.ll
index 824ae003da25..337a7edb1736 100644
--- a/test/CodeGen/X86/fold-call-3.ll
+++ b/test/CodeGen/X86/fold-call-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep call | grep 560
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560
 ; rdar://6522427
 
 	%"struct.clang::Action" = type { %"struct.clang::ActionBase" }
diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
index 53991717c674..603e9ad66caa 100644
--- a/test/CodeGen/X86/fold-call.ll
+++ b/test/CodeGen/X86/fold-call.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 declare void @bar()
 
diff --git a/test/CodeGen/X86/fold-imm.ll b/test/CodeGen/X86/fold-imm.ll
index 1623f31d7402..f1fcbcfd13b4 100644
--- a/test/CodeGen/X86/fold-imm.ll
+++ b/test/CodeGen/X86/fold-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep inc
-; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 4
+; RUN: llc < %s -march=x86 | grep inc
+; RUN: llc < %s -march=x86 | grep add | grep 4
 
 define i32 @test(i32 %X) nounwind {
 entry:
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 6e3da5c5ee82..eb182da10129 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
index 312427af7096..0351ecab117b 100644
--- a/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/test/CodeGen/X86/fold-mul-lohi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index f558aca42005..ef5202f554c5 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index e2141ebf6851..cc4198d7caf0 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep pcmpeqd %t | count 1
 ; RUN: grep xor %t | count 1
 ; RUN: not grep LCP %t
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 2b75781218bc..49f879504e06 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase should need to spill the -1 value on x86-32,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
index 1016b1081aae..2605123d6dd4 100644
--- a/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/test/CodeGen/X86/fold-sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movslq | count 1
+; RUN: llc < %s -march=x86-64 | grep movslq | count 1
 ; PR4050
 
 	type { i64 }		; type %0
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index 32ba2171450b..cafc61a41ff2 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,6 +1,6 @@
 ;; Test that this FP immediate is stored in the constant pool as a float.
 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3 | \
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
 ; RUN:   grep {.long.1123418112}
 
 define double @D() {
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 15606c34886b..08ea77d75f26 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
 
 ; These operations should be done in integer registers, eliminating constant
 ; pool loads, movd's etc.
diff --git a/test/CodeGen/X86/fp-stack-2results.ll b/test/CodeGen/X86/fp-stack-2results.ll
index f47fd7472ecb..321e267cb2fa 100644
--- a/test/CodeGen/X86/fp-stack-2results.ll
+++ b/test/CodeGen/X86/fp-stack-2results.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fldz
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fld1
+; RUN: llc < %s -march=x86 | grep fldz
+; RUN: llc < %s -march=x86-64 | grep fld1
 
 ; This is basically this code on x86-64:
 ; _Complex long double test() { return 1.0; }
diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll
new file mode 100644
index 000000000000..4768ea2019d1
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -0,0 +1,30 @@
+; RUN: llc %s -O0 -fast-isel -regalloc=local -o -
+; PR4767
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+define void @fn(x86_fp80 %x) nounwind ssp {
+entry:
+  %x.addr = alloca x86_fp80                       ; <x86_fp80*> [#uses=5]
+  store x86_fp80 %x, x86_fp80* %x.addr
+  br i1 false, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+cond.false:                                       ; preds = %entry
+  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.false, %cond.true
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.false, %cond.true
+  ret void
+}
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4e61d0fbe7dc..4bdf4590b07c 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | \
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
 ; RUN:   grep {fucomi.*st.\[12\]}
 ; PR1012
 
diff --git a/test/CodeGen/X86/fp-stack-direct-ret.ll b/test/CodeGen/X86/fp-stack-direct-ret.ll
index 78be2a39defb..5a28bb50a343 100644
--- a/test/CodeGen/X86/fp-stack-direct-ret.ll
+++ b/test/CodeGen/X86/fp-stack-direct-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep fstp
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep movsd
+; RUN: llc < %s -march=x86 | not grep fstp
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movsd
 
 declare double @foo()
 
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index 5254e1c89f61..f220b24f90b0 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah | grep cvtss2sd
-; RUN: llvm-as < %s | llc -mcpu=yonah | grep fstps
-; RUN: llvm-as < %s | llc -mcpu=yonah | not grep cvtsd2ss
+; RUN: llc < %s -mcpu=yonah | grep cvtss2sd
+; RUN: llc < %s -mcpu=yonah | grep fstps
+; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/fp-stack-ret-store.ll b/test/CodeGen/X86/fp-stack-ret-store.ll
index 56392deb300d..05dfc545db17 100644
--- a/test/CodeGen/X86/fp-stack-ret-store.ll
+++ b/test/CodeGen/X86/fp-stack-ret-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah | not grep movss
+; RUN: llc < %s -mcpu=yonah | not grep movss
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index 3e6ad54e73b3..c83a0cbf69e0 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
 ; RUN: grep fldl %t | count 1
 ; RUN: not grep xmm %t
 ; RUN: grep {sub.*esp} %t | count 1
diff --git a/test/CodeGen/X86/fp-stack-retcopy.ll b/test/CodeGen/X86/fp-stack-retcopy.ll
index 997f8df20fea..67dcb1871df4 100644
--- a/test/CodeGen/X86/fp-stack-retcopy.ll
+++ b/test/CodeGen/X86/fp-stack-retcopy.ll
@@ -1,5 +1,5 @@
 ; This should not copy the result of foo into an xmm register.
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
 ; rdar://5689903
 
 declare double @foo()
diff --git a/test/CodeGen/X86/fp-stack-set-st1.ll b/test/CodeGen/X86/fp-stack-set-st1.ll
index 00a73aeb416f..894897a2a5f0 100644
--- a/test/CodeGen/X86/fp-stack-set-st1.ll
+++ b/test/CodeGen/X86/fp-stack-set-st1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fxch | count 2
+; RUN: llc < %s -march=x86 | grep fxch | count 2
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fp2sint.ll b/test/CodeGen/X86/fp2sint.ll
index 80f7efbe4dec..167544488713 100644
--- a/test/CodeGen/X86/fp2sint.ll
+++ b/test/CodeGen/X86/fp2sint.ll
@@ -1,6 +1,6 @@
 ;; LowerFP_TO_SINT should not create a stack object if it's not needed.
 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep add
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep add
 
 define i32 @main(i32 %argc, i8** %argv) {
 cond_false.i.i.i:               ; preds = %bb.i5
diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
index f2017b961fb5..8e823ede56a0 100644
--- a/test/CodeGen/X86/fp_constant_op.ll
+++ b/test/CodeGen/X86/fp_constant_op.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \
 ; RUN:   grep {fadd\\|fsub\\|fdiv\\|fmul} | not grep -i ST
 
 ; Test that the load of the constant is folded into the operation.
diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
index 54523265e91e..a160ac694429 100644
--- a/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fild | not grep ESP
+; RUN: llc < %s -march=x86 | grep fild | not grep ESP
 
 define double @short(i16* %P) {
         %V = load i16* %P               ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 655ad3df3238..0145069b8cd6 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
 
 ; Test that the load of the memory location is folded into the operation.
diff --git a/test/CodeGen/X86/fsxor-alignment.ll b/test/CodeGen/X86/fsxor-alignment.ll
index 4d25fca1eb11..6a8dbcfaa7c3 100644
--- a/test/CodeGen/X86/fsxor-alignment.ll
+++ b/test/CodeGen/X86/fsxor-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
 ; RUN:  grep -v sp | grep xorps | count 2
 
 ; Don't fold the incoming stack arguments into the xorps instructions used
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 4a85779ebf0a..68575bc401d7 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-full-lsr >%t
+; RUN: llc < %s -march=x86 -enable-full-lsr >%t
 ; RUN: grep {addl	\\\$4,} %t | count 3
 ; RUN: not grep {,%} %t
 
diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
index aaa2f84b88c9..9f6d3f75cf84 100644
--- a/test/CodeGen/X86/ga-offset.ll
+++ b/test/CodeGen/X86/ga-offset.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
diff --git a/test/CodeGen/X86/global-sections-tls.ll b/test/CodeGen/X86/global-sections-tls.ll
new file mode 100644
index 000000000000..2c2303042bc4
--- /dev/null
+++ b/test/CodeGen/X86/global-sections-tls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+; PR4639
+@G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
+; LINUX: .section		.tbss,"awT",@nobits
+; LINUX: G1:
+
+
+define i32* @foo() nounwind readnone {
+entry:
+	ret i32* @G1
+}
+
+
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
new file mode 100644
index 000000000000..38948a7dc912
--- /dev/null
+++ b/test/CodeGen/X86/global-sections.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+
+
+; int G1;
+@G1 = common global i32 0
+
+; LINUX: .type   G1,@object
+; LINUX: .section .gnu.linkonce.b.G1,"aw",@nobits
+; LINUX: .comm  G1,4,4
+
+; DARWIN: .comm	_G1,4,2
+
+
+
+
+; const int G2 __attribute__((weak)) = 42;
+@G2 = weak_odr constant i32 42	
+
+
+; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
+
+; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: _G2:
+; DARWIN:    .long 42
+
+
+; int * const G3 = &G1;
+@G3 = constant i32* @G1
+
+; DARWIN: .section        __DATA,__const
+; DARWIN: .globl _G3
+; DARWIN: _G3:
+; DARWIN:     .long _G1
+
+
+; _Complex long long const G4 = 34;
+@G4 = constant {i64,i64} { i64 34, i64 0 }
+
+; DARWIN: .section        __TEXT,__const
+; DARWIN: _G4:
+; DARWIN:     .long 34
+
+
+; int G5 = 47;
+@G5 = global i32 47
+
+; LINUX: .data
+; LINUX: .globl G5
+; LINUX: G5:
+; LINUX:    .long 47
+
+; DARWIN: .section        __DATA,__data
+; DARWIN: .globl _G5
+; DARWIN: _G5:
+; DARWIN:    .long 47
+
+
+; PR4584
+@"foo bar" = linkonce global i32 42
+
+; LINUX: .type	foo_20_bar,@object
+; LINUX:.section	.gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .weak	foo_20_bar
+; LINUX: foo_20_bar:
+
+; DARWIN: .section		__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl	"_foo bar"
+; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: "_foo bar":
+
+; PR4650
+@G6 = weak_odr constant [1 x i8] c"\01"
+
+; LINUX:   .type	G6,@object
+; LINUX:   .section	.gnu.linkonce.r.G6,"a",@progbits
+; LINUX:   .weak	G6
+; LINUX: G6:
+; LINUX:   .ascii	"\001"
+; LINUX:   .size	G6, 1
+
+; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .globl _G6
+; DARWIN:  .weak_definition _G6
+; DARWIN:_G6:
+; DARWIN:  .ascii "\001"
+
+
+@G7 = constant [10 x i8] c"abcdefghi\00"
+
+; DARWIN:	__TEXT,__cstring,cstring_literals
+; DARWIN:	.globl _G7
+; DARWIN: _G7:
+; DARWIN:	.asciz	"abcdefghi"
+
+; LINUX:	.section		.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.globl G7
+; LINUX: G7:
+; LINUX:	.asciz	"abcdefghi"
+
+
+@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+
+; DARWIN:	.section	__TEXT,__ustring
+; DARWIN:	.globl _G8
+; DARWIN: _G8:
+
+; LINUX:	.section		.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.globl G8
+; LINUX:G8:
+
+@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+
+; DARWIN:	.section        __TEXT,__const
+; DARWIN:	.globl _G9
+; DARWIN: _G9:
+
+; LINUX:	.section		.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.globl G9
+; LINUX:G9
+
+
+
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 41d91285ddbe..76ffd66524b9 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index b38e0e478e99..98817f3fb59f 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index e8672422a7b0..d30e6b334e8b 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep mov %t | count 6
 ; RUN: grep {movb	%ah, (%rsi)} %t | count 3
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep mov %t | count 3
 ; RUN: grep {movb	%ah, (%e} %t | count 3
 
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 2777be9cc3e0..878fd93b737c 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
-; RUN: llvm-as < %s | llc -march=x86    > %t
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
+; RUN: llc < %s -march=x86    > %t
 ; RUN: grep {incb	%ah} %t | count 3
 ; RUN: grep {movzbl	%ah,} %t | count 3
 
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 789f3dd18f08..e97ebab69712 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
 ; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
 
diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
index 554158323974..16e13f839664 100644
--- a/test/CodeGen/X86/h-registers-2.ll
+++ b/test/CodeGen/X86/h-registers-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 1
 ; RUN: grep {shll	\$3,} %t | count 1
 
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index d353a2233797..8a0b07b31c27 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    | grep mov | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86    | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 define zeroext i8 @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/hidden-vis-2.ll b/test/CodeGen/X86/hidden-vis-2.ll
index e000547f44f2..74554d15e2f6 100644
--- a/test/CodeGen/X86/hidden-vis-2.ll
+++ b/test/CodeGen/X86/hidden-vis-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep mov | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 | not grep GOT
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/hidden-vis-3.ll b/test/CodeGen/X86/hidden-vis-3.ll
index 81dc76e14889..4be881e84d68 100644
--- a/test/CodeGen/X86/hidden-vis-3.ll
+++ b/test/CodeGen/X86/hidden-vis-3.ll
@@ -1,13 +1,17 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep mov | count 3
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep long | count 2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 | not grep GOT
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; X32: _t:
+; X32: movl _y, %eax
+
+; X64: _t:
+; X64: movl _y(%rip), %eax
+
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	%1 = load i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/hidden-vis-4.ll b/test/CodeGen/X86/hidden-vis-4.ll
index e6936de10360..a8aede52accd 100644
--- a/test/CodeGen/X86/hidden-vis-4.ll
+++ b/test/CodeGen/X86/hidden-vis-4.ll
@@ -1,11 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep long
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep comm
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
 
 @x = common hidden global i32 0		; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: t:
+; CHECK: movl _x, %eax
+; CHECK: .comm _x,4
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/test/CodeGen/X86/hidden-vis.ll b/test/CodeGen/X86/hidden-vis.ll
index 058850c7b75c..a948bdfe6875 100644
--- a/test/CodeGen/X86/hidden-vis.ll
+++ b/test/CodeGen/X86/hidden-vis.ll
@@ -1,20 +1,24 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | \
-; RUN:   grep .hidden | count 2
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin8.8.0 | \
-; RUN:   grep .private_extern | count 2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN
 
-%struct.Person = type { i32 }
 @a = hidden global i32 0
 @b = external global i32
 
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
 
-define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) {
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
   ret void
 }
 
-declare void @function(i32)
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
 
-define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) {
+; DARWIN: t2:
+; DARWIN: .private_extern _a
   ret void
 }
 
diff --git a/test/CodeGen/X86/i128-and-beyond.ll b/test/CodeGen/X86/i128-and-beyond.ll
index db94b0ec05e6..907a6b8de2fe 100644
--- a/test/CodeGen/X86/i128-and-beyond.ll
+++ b/test/CodeGen/X86/i128-and-beyond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep 18446744073709551615 | count 14
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep 18446744073709551615 | count 14
 
 ; These static initializers are too big to hand off to assemblers
 ; as monolithic blobs.
diff --git a/test/CodeGen/X86/i128-immediate.ll b/test/CodeGen/X86/i128-immediate.ll
index 69399336e30e..c47569e700f5 100644
--- a/test/CodeGen/X86/i128-immediate.ll
+++ b/test/CodeGen/X86/i128-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | count 2
+; RUN: llc < %s -march=x86-64 | grep movq | count 2
 
 define i128 @__addvti3() {
           ret i128 -1
diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
index f8c732ec68e4..e9d30d67019e 100644
--- a/test/CodeGen/X86/i128-mul.ll
+++ b/test/CodeGen/X86/i128-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR1198
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll
index 179a0134331b..277f4283328b 100644
--- a/test/CodeGen/X86/i128-ret.ll
+++ b/test/CodeGen/X86/i128-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	8(%rdi), %rdx}
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rdi), %rdx}
+; RUN: llc < %s -march=x86-64 | grep {movq	(%rdi), %rax}
 
 define i128 @test(i128 *%P) {
         %A = load i128* %P
diff --git a/test/CodeGen/X86/i256-add.ll b/test/CodeGen/X86/i256-add.ll
index 280ed6b1b33b..5a7a7a7fe84a 100644
--- a/test/CodeGen/X86/i256-add.ll
+++ b/test/CodeGen/X86/i256-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep adcl %t | count 7
 ; RUN: grep sbbl %t | count 7
 
diff --git a/test/CodeGen/X86/i2k.ll b/test/CodeGen/X86/i2k.ll
index 712302da76d0..6116c2e71658 100644
--- a/test/CodeGen/X86/i2k.ll
+++ b/test/CodeGen/X86/i2k.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
   %a = load i2011* %x
diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
index 0d2b29c0b420..847e2095f4c5 100644
--- a/test/CodeGen/X86/i64-mem-copy.ll
+++ b/test/CodeGen/X86/i64-mem-copy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
 
 ; Uses movsd to load / store i64 values if sse2 is available.
 
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index ca751ae1d2e4..6a79ee879253 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  |& \
+; RUN: llc < %s -march=x86-64 -stats  |& \
 ; RUN:   grep {6 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something at least as good as:
diff --git a/test/CodeGen/X86/illegal-asm.ll b/test/CodeGen/X86/illegal-asm.ll
index 03cc507f23f2..43128dcf010b 100644
--- a/test/CodeGen/X86/illegal-asm.ll
+++ b/test/CodeGen/X86/illegal-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim
-; RUN: llvm-as < %s | llc -mtriple=i386-linux        -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-linux        -disable-fp-elim
 ; XFAIL: *
 ; Expected to run out of registers during allocation.
 ; PR3864
diff --git a/test/CodeGen/X86/illegal-insert.ll b/test/CodeGen/X86/illegal-insert.ll
index 59773b249104..dbf1b14684c2 100644
--- a/test/CodeGen/X86/illegal-insert.ll
+++ b/test/CodeGen/X86/illegal-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define <4 x double> @foo0(<4 x double> %t) {
   %r = insertelement <4 x double> %t, double 2.3, i32 0
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index 5ed6ddb55129..cecf77af4de1 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
 
 define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
   %y = fmul <4 x double> %x, %z
diff --git a/test/CodeGen/X86/imp-def-copies.ll b/test/CodeGen/X86/imp-def-copies.ll
index 3d2f65653e7a..91178403876f 100644
--- a/test/CodeGen/X86/imp-def-copies.ll
+++ b/test/CodeGen/X86/imp-def-copies.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
 
 	%struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* }
 	%struct.gs_fixed_point = type { i32, i32 }
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
index 0a2df1c977bc..1cb54b37b0e1 100644
--- a/test/CodeGen/X86/imul-lea-2.ll
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 | grep shl | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep imul
+; RUN: llc < %s -march=x86-64 | grep lea | count 3
+; RUN: llc < %s -march=x86-64 | grep shl | count 1
+; RUN: llc < %s -march=x86-64 | not grep imul
 
 define i64 @t1(i64 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
index 6403a2668a39..4e8e2af0f2fe 100644
--- a/test/CodeGen/X86/imul-lea.ll
+++ b/test/CodeGen/X86/imul-lea.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | grep lea
 
 declare i32 @foo()
 
diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
index 619629407fe6..4a2c7fc5ebac 100644
--- a/test/CodeGen/X86/inline-asm-2addr.ll
+++ b/test/CodeGen/X86/inline-asm-2addr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movq
+; RUN: llc < %s -march=x86-64 | not grep movq
 
 define i64 @t(i64 %a, i64 %b) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/inline-asm-R-constraint.ll b/test/CodeGen/X86/inline-asm-R-constraint.ll
new file mode 100644
index 000000000000..66c27ac87712
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-R-constraint.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; 7282062
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
+entry:
+; CHECK: udiv8:
+; CHECK-NOT: movb %ah, (%r8)
+  %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
+  %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
+  store i16 %a, i16* %a_addr
+  store i8 %b, i8* %b_addr
+  call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 3c536b716f62..51ea843712d1 100644
--- a/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext test 1 | grep j
+; RUN: llc -march=x86-64 < %s | FileCheck %s
 ; PR3701
 
 define i64 @t(i64* %arg) nounwind {
@@ -7,6 +7,8 @@ define i64 @t(i64* %arg) nounwind {
 ; <label>:1		; preds = %0
 	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
 	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; CHECK: test
+; CHECK-NEXT: j
 	br i1 %2, label %4, label %5
 
 ; <label>:4		; preds = %1
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 31d94d89c376..09b09295153e 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define x86_fp80 @test1() {
         %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll
index 968561826b5c..ffa6ee6e019e 100644
--- a/test/CodeGen/X86/inline-asm-fpstack2.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {fld	%%st(0)} %t
 ; PR4185
 
diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll
index ac89a1d9ad51..17945fe4149e 100644
--- a/test/CodeGen/X86/inline-asm-fpstack3.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {fld	%%st(0)} %t
 ; PR4459
 
diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll
index c9122fad6cf7..bae2970db4ab 100644
--- a/test/CodeGen/X86/inline-asm-fpstack4.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4484
 
 declare x86_fp80 @ceil()
diff --git a/test/CodeGen/X86/inline-asm-fpstack5.ll b/test/CodeGen/X86/inline-asm-fpstack5.ll
index 64f3788f45dd..8b219cf92773 100644
--- a/test/CodeGen/X86/inline-asm-fpstack5.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4485
 
 define void @test(x86_fp80* %a) {
diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
index 97eac388677b..5e76b6c0580e 100644
--- a/test/CodeGen/X86/inline-asm-modifier-n.ll
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep { 37}
+; RUN: llc < %s -march=x86 | grep { 37}
 ; rdar://7008959
 
 define void @bork() nounwind {
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index ca39c120585a..78d7e776cf22 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -1,8 +1,8 @@
 ; PR2094
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movslq
-; RUN: llvm-as < %s | llc -march=x86-64 | grep addps
-; RUN: llvm-as < %s | llc -march=x86-64 | grep paddd
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movq
+; RUN: llc < %s -march=x86-64 | grep movslq
+; RUN: llc < %s -march=x86-64 | grep addps
+; RUN: llc < %s -march=x86-64 | grep paddd
+; RUN: llc < %s -march=x86-64 | not grep movq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll
index 01f1397830a4..46966f5370d3 100644
--- a/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
 ; PR3391
 
 @pci_indirect = external global { }             ; <{ }*> [#uses=1]
diff --git a/test/CodeGen/X86/inline-asm-pic.ll b/test/CodeGen/X86/inline-asm-pic.ll
index 04ad48d29211..0b5ff08c3f32 100644
--- a/test/CodeGen/X86/inline-asm-pic.ll
+++ b/test/CodeGen/X86/inline-asm-pic.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic | grep call
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call
 
 @main_q = internal global i8* null		; <i8**> [#uses=1]
 
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
new file mode 100644
index 000000000000..ab44206f8065
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; rdar://7066579
+
+	type { i64, i64, i64, i64, i64 }		; type %0
+
+define void @t() nounwind {
+entry:
+	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 6df2c48415bc..1f4a13f54b75 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 4(%esp)} | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 12(%esp)} | count 2
 ; rdar://6992609
 
 target triple = "i386-apple-darwin9.0"
diff --git a/test/CodeGen/X86/inline-asm-x-scalar.ll b/test/CodeGen/X86/inline-asm-x-scalar.ll
index aafbbd1fd025..5a9628b3df74 100644
--- a/test/CodeGen/X86/inline-asm-x-scalar.ll
+++ b/test/CodeGen/X86/inline-asm-x-scalar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 define void @test1() {
         tail call void asm sideeffect "ucomiss $0", "x"( float 0x41E0000000000000)
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index 02988fcc29f3..c66d7a8bd11b 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test1() nounwind {
 	; Dest is AX, dest type = i32.
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index 863cda94c5fc..2243f93f3ddd 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define fastcc i32 @sqlite3ExprResolveNames() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
index 5c0b0d3d3e95..f2c9cc72719c 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movw
+; RUN: llc < %s -march=x86-64 | not grep movw
 
 define i16 @test5(i16 %f12) nounwind {
 	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index ee3ac66abef1..e44308583297 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 11
+; RUN: llc < %s -march=x86-64 | grep mov | count 11
 
 	%struct.COMPOSITE = type { i8, i16, i16 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@@ -17,7 +17,7 @@
 	%struct.metrics = type { i16, i16, i16, i16, i16 }
 	%struct.rec = type { %struct.head_type }
 
-define void @FontChange(i1 %foo) {
+define void @FontChange(i1 %foo) nounwind {
 entry:
 	br i1 %foo, label %bb298, label %bb49
 bb49:		; preds = %entry
diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
index 0eef5173b858..34a29ca7d939 100644
--- a/test/CodeGen/X86/insertelement-copytoregs.ll
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v IMPLICIT_DEF
+; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
 
 define void @foo(<2 x float>* %p) {
   %t = insertelement <2 x float> undef, float 0.0, i32 0
diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
index 95e17b40bc8b..18aade2bb302 100644
--- a/test/CodeGen/X86/insertelement-legalize.ll
+++ b/test/CodeGen/X86/insertelement-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86 -disable-mmx
 
 ; Test to check that we properly legalize an insert vector element
 define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
diff --git a/test/CodeGen/X86/invalid-shift-immediate.ll b/test/CodeGen/X86/invalid-shift-immediate.ll
index 5c47f5ee685f..77a9f7eda783 100644
--- a/test/CodeGen/X86/invalid-shift-immediate.ll
+++ b/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2098
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index 4e68b7757ff5..0f94b233bcfb 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movl	\$4, (.*,.*,4)}
 
 define i32 @test(i32* %X, i32 %B) {
diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
index 9d9c747fa495..5ed0e00fd873 100644
--- a/test/CodeGen/X86/isel-sink2.ll
+++ b/test/CodeGen/X86/isel-sink2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movb.7(%...)} %t
 ; RUN: not grep leal %t
 
diff --git a/test/CodeGen/X86/isel-sink3.ll b/test/CodeGen/X86/isel-sink3.ll
index 4e678c42cf77..8d3d97a930be 100644
--- a/test/CodeGen/X86/isel-sink3.ll
+++ b/test/CodeGen/X86/isel-sink3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {addl.\$4, %ecx}
-; RUN: llvm-as < %s | llc | not grep leal
+; RUN: llc < %s | grep {addl.\$4, %ecx}
+; RUN: llc < %s | not grep leal
 ; this should not sink %1 into bb1, that would increase reg pressure.
 
 ; rdar://6399178
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 7acc5ccf20e7..507a328c3ffd 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: not grep cmp %t
 ; RUN: not grep xor %t
 ; RUN: grep jne %t | count 1
diff --git a/test/CodeGen/X86/isnan.ll b/test/CodeGen/X86/isnan.ll
index 65916ff57724..4d465c0c7aa8 100644
--- a/test/CodeGen/X86/isnan.ll
+++ b/test/CodeGen/X86/isnan.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep call
+; RUN: llc < %s -march=x86 | not grep call
 
 declare i1 @llvm.isunordered.f64(double)
 
diff --git a/test/CodeGen/X86/isnan2.ll b/test/CodeGen/X86/isnan2.ll
index 18fe29a883e0..7753346fd940 100644
--- a/test/CodeGen/X86/isnan2.ll
+++ b/test/CodeGen/X86/isnan2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep pxor
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep pxor
 
 ; This should not need to materialize 0.0 to evaluate the condition.
 
diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll
index 3799b9c70b07..8adf723aabc3 100644
--- a/test/CodeGen/X86/ispositive.ll
+++ b/test/CodeGen/X86/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {shrl.*31}
+; RUN: llc < %s -march=x86 | grep {shrl.*31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index a48f0616291f..c695c29e068f 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
+; RUN: llc < %s -march=x86-64 -o %t
 ; RUN: grep inc %t | count 1
 ; RUN: grep dec %t | count 2
 ; RUN: grep addq %t | count 13
 ; RUN: not grep addb %t
-; RUN: grep leaq %t | count 8
-; RUN: grep leal %t | count 4
+; RUN: grep leaq %t | count 9
+; RUN: grep leal %t | count 3
 ; RUN: grep movq %t | count 5
 
 ; IV users in each of the loops from other loops shouldn't cause LSR
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index cb7d6271f958..5e8e16217363 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep jns
+; RUN: llc < %s -march=x86 | grep jns
 
 define i32 @f(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/ldzero.ll b/test/CodeGen/X86/ldzero.ll
index 2db78a2145b6..dab04bc353c6 100644
--- a/test/CodeGen/X86/ldzero.ll
+++ b/test/CodeGen/X86/ldzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; verify PR 1700 is still fixed
 ; ModuleID = 'hh.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index a33b71c851ec..69303507d6e6 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep add
 
 define i32 @test1(i32 %A, i32 %B) {
diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
index 39122bbdf5f3..44413d60785e 100644
--- a/test/CodeGen/X86/lea-3.ll
+++ b/test/CodeGen/X86/lea-3.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
+; RUN: llc < %s -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
 define i32 @test(i32 %a) {
         %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
         ret i32 %tmp2
 }
 
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
+; RUN: llc < %s -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
 define i64 @test2(i64 %a) {
         %tmp2 = shl i64 %a, 2
 	%tmp3 = or i64 %tmp2, %a
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index 8f0835f642fd..2171204c01d1 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 2
+; RUN: llc < %s -march=x86-64 | grep lea | count 2
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 390e35adfaf5..3f32fd27c5c1 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 12
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
 
 ; This testcase was written to demonstrate an instruction-selection problem,
 ; however it also happens to expose a limitation in the DAGCombiner's
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 30a477ad120c..22a96448f029 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -1,9 +1,34 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86 | not grep orl
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-define i32 @test(i32 %x) {
-        %tmp1 = shl i32 %x, 3           ; <i32> [#uses=1]
-        %tmp2 = add i32 %tmp1, 7                ; <i32> [#uses=1]
+define i32 @test1(i32 %x) nounwind {
+        %tmp1 = shl i32 %x, 3
+        %tmp2 = add i32 %tmp1, 7
         ret i32 %tmp2
+; CHECK: test1:
+; CHECK:    leal 7(,%rdi,8), %eax
 }
 
+
+; ISel the add of -4 with a neg and use an lea for the rest of the
+; arithemtic.
+define i32 @test2(i32 %x_offs) nounwind readnone {
+entry:
+	%t0 = icmp sgt i32 %x_offs, 4
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:
+	%tmp = add i32 %x_offs, -5
+	%tmp6 = lshr i32 %tmp, 2
+	%tmp7 = mul i32 %tmp6, -4
+	%tmp8 = add i32 %tmp7, %x_offs
+	%tmp9 = add i32 %tmp8, -4
+	ret i32 %tmp9
+
+bb2:
+	ret i32 %x_offs
+; CHECK: test2:
+; CHECK:	leal	-5(%rdi), %eax
+; CHECK:	andl	$-4, %eax
+; CHECK:	negl	%eax
+; CHECK:	leal	-4(%rdi,%rax), %eax
+}
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 97654b201ba0..574b46acea60 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 -disable-mmx -o %t -f
-; RUN: grep divdi3  %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
+; RUN: grep {call.*divdi3}  %t | count 2
 
 
 ; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +12,4 @@
 define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
   %div.r = sdiv <2 x i64> %num, %div
   ret <2 x i64>  %div.r
-}                                     
-\ No newline at end of file
+}                                     
diff --git a/test/CodeGen/X86/lfence.ll b/test/CodeGen/X86/lfence.ll
index 0721d7305440..7a96ca30e753 100644
--- a/test/CodeGen/X86/lfence.ll
+++ b/test/CodeGen/X86/lfence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep lfence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/limited-prec.ll b/test/CodeGen/X86/limited-prec.ll
index 6afaea429b86..7bf4ac28fdf9 100644
--- a/test/CodeGen/X86/limited-prec.ll
+++ b/test/CodeGen/X86/limited-prec.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -limit-float-precision=6 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=6 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
-; RUN: llvm-as < %s | llc -limit-float-precision=12 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=12 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
-; RUN: llvm-as < %s | llc -limit-float-precision=18 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=18 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
 
 define float @f1(float %x) nounwind noinline {
diff --git a/test/CodeGen/X86/live-out-reg-info.ll b/test/CodeGen/X86/live-out-reg-info.ll
index b6fb7dfc72c6..7132777b697c 100644
--- a/test/CodeGen/X86/live-out-reg-info.ll
+++ b/test/CodeGen/X86/live-out-reg-info.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep testl
+; RUN: llc < %s -march=x86-64 | grep {testb	\[$\]1,}
 
 ; Make sure dagcombine doesn't eliminate the comparison due
 ; to an off-by-one bug with ComputeMaskedBits information.
diff --git a/test/CodeGen/X86/local-liveness.ll b/test/CodeGen/X86/local-liveness.ll
index 18d999b7d47e..321f208e75ca 100644
--- a/test/CodeGen/X86/local-liveness.ll
+++ b/test/CodeGen/X86/local-liveness.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep {subl	%eax, %edx}
+; RUN: llc < %s -march=x86 -regalloc=local | grep {subl	%eax, %edx}
 
 ; Local regalloc shouldn't assume that both the uses of the
 ; sub instruction are kills, because one of them is tied
diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll
index 8d9ebfb276f1..e0165fb01b53 100644
--- a/test/CodeGen/X86/long-setcc.ll
+++ b/test/CodeGen/X86/long-setcc.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep shr | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep shr | count 1
+; RUN: llc < %s -march=x86 | grep xor | count 1
 
 define i1 @t1(i64 %x) nounwind {
 	%B = icmp slt i64 %x, 0
diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
index a8e2c31d9481..9a4c8f21237b 100644
--- a/test/CodeGen/X86/longlong-deadload.ll
+++ b/test/CodeGen/X86/longlong-deadload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep '4{(%...)}
+; RUN: llc < %s -march=x86 | not grep '4{(%...)}
 ; This should not load or store the top part of *P.
 
 define void @test(i64* %P) nounwind  {
diff --git a/test/CodeGen/X86/loop-hoist.ll b/test/CodeGen/X86/loop-hoist.ll
index 73284a488ede..b52066dac62e 100644
--- a/test/CodeGen/X86/loop-hoist.ll
+++ b/test/CodeGen/X86/loop-hoist.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 |\
-; RUN:   grep L_Arr.non_lazy_ptr
-; RUN: llvm-as < %s | \
-; RUN:   llc -disable-post-RA-scheduler=true \
-; RUN:       -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 |\
-; RUN:   %prcontext L_Arr.non_lazy_ptr 1 | grep {4(%esp)}
+; LSR should hoist the load from the "Arr" stub out of the loop.
+
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK:    L_Arr$non_lazy_ptr
+; CHECK: LBB1_1:	## %cond_true
 
 @Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index 8ea5bdb208e3..30b511434948 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic | \
+; RUN: llc < %s -march=x86 -relocation-model=pic | \
 ; RUN:   grep {, 4} | count 1
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
 ;
 ; Make sure the common loop invariant A is hoisted up to preheader,
 ; since too many registers are needed to subsume it into the addressing modes.
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index b6bb81471bcd..70c91340c948 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
 ; RUN:   grep {A+} | count 2
 ;
 ; Make sure the common loop invariant A is not hoisted up to preheader,
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index 873710112b68..4cb56ca9ed24 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {A+} | count 2
 ;
 ; Make sure the common loop invariant A is not hoisted up to preheader,
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index 507a9e5a2fa7..a1f38a7edc02 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
 ;
 ; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.
 
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
index 4e95bdddb5b2..e340edd65060 100644
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | grep 240
-; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+; RUN: llc < %s -march=x86 | grep cmp | grep 240
+; RUN: llc < %s -march=x86 | grep inc | count 1
 
 define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 711f223749ce..87b606f558a4 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | grep 64
-; RUN: llvm-as < %s | llc -march=x86 | not grep inc
+; RUN: llc < %s -march=x86 | grep cmp | grep 64
+; RUN: llc < %s -march=x86 | not grep inc
 
 @state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
 @S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index 6e037e2aca31..4ec2a0299251 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+; RUN: llc < %s -march=x86 | grep inc | count 1
 
 @X = weak global i16 0		; <i16*> [#uses=1]
 @Y = weak global i16 0		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
index fa8b57aababb..81da82ec3f7c 100644
--- a/test/CodeGen/X86/loop-strength-reduce6.ll
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep inc
+; RUN: llc < %s -march=x86-64 | not grep inc
 
 define fastcc i32 @decodeMP3(i32 %isize, i32* %done) {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce7.ll b/test/CodeGen/X86/loop-strength-reduce7.ll
index b6a130a86190..4b565a67fb2d 100644
--- a/test/CodeGen/X86/loop-strength-reduce7.ll
+++ b/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep imul
+; RUN: llc < %s -march=x86 | not grep imul
 
 target triple = "i386-apple-darwin9.6"
 	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index 1846c7d4467c..e14cd8a99e35 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep leal | not grep 16
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
 	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index c998268600cb..474450acc9b0 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,4 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: decq
+; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/lsr-negative-stride.ll b/test/CodeGen/X86/lsr-negative-stride.ll
index 28d041f0603f..b08356c8d309 100644
--- a/test/CodeGen/X86/lsr-negative-stride.ll
+++ b/test/CodeGen/X86/lsr-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep neg %t
 ; RUN: not grep sub.*esp %t
 ; RUN: not grep esi %t
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 00e1d694ef40..40589892bb6f 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep inc %t | count 1
 ; RUN: not grep incw %t
 
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0bf347c64271..bc493bd8f724 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
index 639a7a6a3bb0..f23c02019548 100644
--- a/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep and %t | count 6
 ; RUN: grep movzb %t | count 6
 ; RUN: grep sar %t | count 12
diff --git a/test/CodeGen/X86/maskmovdqu.ll b/test/CodeGen/X86/maskmovdqu.ll
index 4d1ed1dc226f..7796f0e9a19e 100644
--- a/test/CodeGen/X86/maskmovdqu.ll
+++ b/test/CodeGen/X86/maskmovdqu.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    -mattr=+sse2 | grep -i EDI
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
 ; rdar://6573467
 
 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 0fccc35f3d27..2dc939e666ff 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
 
 	%struct.ParmT = type { [25 x i8], i8, i8* }
 @.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[25 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 97a2dd57c710..24530cd27e4b 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep call.*memcpy | count 2
+; RUN: llc < %s -march=x86-64 | grep call.*memcpy | count 2
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
 
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
index a2b452dbdfc5..d4050689f594 100644
--- a/test/CodeGen/X86/memmove-0.ll
+++ b/test/CodeGen/X86/memmove-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
index 3b2debc247dd..2057be88174d 100644
--- a/test/CodeGen/X86/memmove-1.ll
+++ b/test/CodeGen/X86/memmove-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
index 37bbe0b54133..68a9f4dfb9cb 100644
--- a/test/CodeGen/X86/memmove-2.ll
+++ b/test/CodeGen/X86/memmove-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
index 2e692c7f60b7..d8a419c07457 100644
--- a/test/CodeGen/X86/memmove-3.ll
+++ b/test/CodeGen/X86/memmove-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
index f23c7d5cb854..027db1f48395 100644
--- a/test/CodeGen/X86/memmove-4.ll
+++ b/test/CodeGen/X86/memmove-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep call
+; RUN: llc < %s | not grep call
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 2ad665cda75c..7deb52f8078e 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep rep
-; RUN: llvm-as < %s | llc -march=x86 | grep memset
+; RUN: llc < %s | not grep rep
+; RUN: llc < %s | grep memset
+
+target triple = "i386"
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 564174c18880..cf7464d03bf2 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
 
 	%struct.x = type { i16, i16 }
 
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index d76d4d479246..da8fc51da8e1 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
 
 define void @bork() nounwind {
 entry:
diff --git a/test/CodeGen/X86/mfence.ll b/test/CodeGen/X86/mfence.ll
index 6abdbcedf266..a1b22834d1aa 100644
--- a/test/CodeGen/X86/mfence.ll
+++ b/test/CodeGen/X86/mfence.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep sfence
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lfence
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mfence
 
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 1df0e3a3e6b0..7dcd84d8a157 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -1,14 +1,12 @@
-; RUN: llvm-as < %s | llc -o %t -f
-; RUN: grep __alloca %t | count 2
-; RUN: grep 4294967288 %t
-; RUN: grep {pushl	%eax} %t
-; RUN: grep 8028 %t | count 2
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
 
-define void @foo1(i32 %N) {
+define void @foo1(i32 %N) nounwind {
 entry:
+; CHECK: _foo1:
+; CHECK: call __alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -16,8 +14,13 @@ entry:
 
 declare void @bar1(i32*)
 
-define void @foo2(i32 inreg  %N) {
+define void @foo2(i32 inreg  %N) nounwind {
 entry:
+; CHECK: _foo2:
+; CHECK: andl $-16, %esp
+; CHECK: pushl %eax
+; CHECK: call __alloca
+; CHECK: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @bar2( i32* %A2.sub, i32 %N )
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 9496cbb8bbb8..426e98e019bc 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
 ;
 ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
 ; On Darwin x86-32, v1i64 values are passed in memory.
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index aac614aa7b11..c42af082364c 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
 
 @g_v8qi = external global <8 x i8>
 
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index 501786ebc225..e4dfdbfe1bb1 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 
 ;; A basic sanity check to make sure that MMX arithmetic actually compiles.
 
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index c6bb48927b69..1fd8f67a0ccc 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 4
+; RUN: llc < %s -march=x86-64 | grep movd | count 4
 
 define i64 @foo(<1 x i64>* %p) {
   %t = load <1 x i64>* %p
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 2047ce75e570..3607043e94fc 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
 
 ; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
 ; increases the places that need to use emms.
diff --git a/test/CodeGen/X86/mmx-emms.ll b/test/CodeGen/X86/mmx-emms.ll
index 60ba84d8728d..5ff2588da699 100644
--- a/test/CodeGen/X86/mmx-emms.ll
+++ b/test/CodeGen/X86/mmx-emms.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep emms
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
 define void @foo() {
 entry:
 	call void @llvm.x86.mmx.emms( )
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index 0aa476dba80e..a063ee1d6cf4 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
 
 define <2 x i32> @qux(i32 %A) nounwind {
 	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index f1d04fa46cad..3af09f4998d3 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 126fc9d13be9..0af7e017b626 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
 
 define void @bork(<1 x i64>* %x) {
 entry:
diff --git a/test/CodeGen/X86/mmx-s2v.ll b/test/CodeGen/X86/mmx-s2v.ll
index 4ec2403e3417..c98023c0f417 100644
--- a/test/CodeGen/X86/mmx-s2v.ll
+++ b/test/CodeGen/X86/mmx-s2v.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 ; PR2574
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index 277cf075cb93..dd0aa2ca31f4 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psrlw
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
 
 define i64 @t1(<1 x i64> %mm1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index 4b91cb901939..e3125c7345b8 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 ; PR1427
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 4dd1e47394fd..8253c200323c 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
 
 	%struct.vS1024 = type { [8 x <4 x i32>] }
 	%struct.vS512 = type { [4 x <4 x i32>] }
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index 95f95794531f..d21e2404882d 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
 
 define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/movfs.ll b/test/CodeGen/X86/movfs.ll
index af102d49569f..823e98689e7d 100644
--- a/test/CodeGen/X86/movfs.ll
+++ b/test/CodeGen/X86/movfs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fs
+; RUN: llc < %s -march=x86 | grep fs
 
 define i32 @foo() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index f621849e5b06..b04048b92c13 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep gs
+; RUN: llc < %s -march=x86 | grep gs
 
 define i32 @foo() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
index 487614f74ddf..eca9e6f436c2 100644
--- a/test/CodeGen/X86/mul-legalize.ll
+++ b/test/CodeGen/X86/mul-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 24576
+; RUN: llc < %s -march=x86 | grep 24576
 ; PR2135
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/mul-remat.ll b/test/CodeGen/X86/mul-remat.ll
index ffc8cc0ba6bc..3fa005079de7 100644
--- a/test/CodeGen/X86/mul-remat.ll
+++ b/test/CodeGen/X86/mul-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 ; PR1874
 	
 define i32 @test(i32 %a, i32 %b) {
diff --git a/test/CodeGen/X86/mul-shift-reassoc.ll b/test/CodeGen/X86/mul-shift-reassoc.ll
index f0ecb5bd08ee..3777d8b8cfb4 100644
--- a/test/CodeGen/X86/mul-shift-reassoc.ll
+++ b/test/CodeGen/X86/mul-shift-reassoc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
-; RUN: llvm-as < %s | llc -march=x86 | not grep add
+; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | not grep add
 
 define i32 @test(i32 %X, i32 %Y) {
 	; Push the shl through the mul to allow an LEA to be formed, instead
diff --git a/test/CodeGen/X86/mul128.ll b/test/CodeGen/X86/mul128.ll
index c0ce6b309315..6825b99f2425 100644
--- a/test/CodeGen/X86/mul128.ll
+++ b/test/CodeGen/X86/mul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mul | count 3
+; RUN: llc < %s -march=x86-64 | grep mul | count 3
 
 define i128 @foo(i128 %t, i128 %u) {
   %k = mul i128 %t, %u
diff --git a/test/CodeGen/X86/mul64.ll b/test/CodeGen/X86/mul64.ll
index cd0f802a711e..5a25c5d0e9de 100644
--- a/test/CodeGen/X86/mul64.ll
+++ b/test/CodeGen/X86/mul64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 3
+; RUN: llc < %s -march=x86 | grep mul | count 3
 
 define i64 @foo(i64 %t, i64 %u) {
   %k = mul i64 %t, %u
diff --git a/test/CodeGen/X86/multiple-return-values-cross-block.ll b/test/CodeGen/X86/multiple-return-values-cross-block.ll
index f632b8744335..e9837d0ebbf5 100644
--- a/test/CodeGen/X86/multiple-return-values-cross-block.ll
+++ b/test/CodeGen/X86/multiple-return-values-cross-block.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 declare {x86_fp80, x86_fp80} @test()
 
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
index 5f7a83f88458..018d997599a9 100644
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ b/test/CodeGen/X86/multiple-return-values.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 96cac0dc329a..0b56644f125a 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc > %t
+; RUN: opt < %s -std-compile-opts | llc > %t
 ; RUN: grep 2147027116 %t | count 3
 ; RUN: grep 2147228864 %t | count 3
 ; RUN: grep 2146502828 %t | count 3
diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
index 0ee11b495585..18f110821bd5 100644
--- a/test/CodeGen/X86/narrow_op-1.ll
+++ b/test/CodeGen/X86/narrow_op-1.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | grep 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | grep 16842752
+; RUN: llc < %s -march=x86-64 | grep orb | count 1
+; RUN: llc < %s -march=x86-64 | grep orb | grep 1
+; RUN: llc < %s -march=x86-64 | grep orl | count 1
+; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = common global %struct.bf zeroinitializer, align 16
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
index b441794f42f9..796ef7a29e49 100644
--- a/test/CodeGen/X86/narrow_op-2.ll
+++ b/test/CodeGen/X86/narrow_op-2.ll
@@ -1,12 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 254
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 253
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = external global %struct.bf*
 
 define void @t1() nounwind ssp {
 entry:
+
+; CHECK: andb	$-2, 10(
+; CHECK: andb	$-3, 10(
+
 	%0 = load %struct.bf** @bfi, align 8
 	%1 = getelementptr %struct.bf* %0, i64 0, i32 1
 	%2 = bitcast i16* %1 to i32*
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 1a7ee085b5de..57164f2bcaf9 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 ; RUN: grep xorps %t | count 1
 
 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
@@ -9,4 +9,4 @@ entry:
 	%sub = fsub float %a, %b		; <float> [#uses=1]
 	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
 	ret float %neg
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index 689639f5f06d..c3f412e09ae8 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86 | not grep xor
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | not grep xor
 ; PR3374
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 8cc1bec2d1f9..7842eb8456eb 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86-64 | \
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
 ; RUN:   not egrep {addsd|subsd|xor}
 
 declare double @sin(double %f)
@@ -6,7 +6,7 @@ declare double @sin(double %f)
 define double @foo(double %e)
 {
   %f = fsub double 0.0, %e
-  %g = call double @sin(double %f)
+  %g = call double @sin(double %f) readonly
   %h = fsub double 0.0, %g
   ret double %h
 }
diff --git a/test/CodeGen/X86/negative-subscript.ll b/test/CodeGen/X86/negative-subscript.ll
index f2bd315bd867..28f7d6b2dbae 100644
--- a/test/CodeGen/X86/negative-subscript.ll
+++ b/test/CodeGen/X86/negative-subscript.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; rdar://6559995
 
 @a = external global [255 x i8*], align 32
diff --git a/test/CodeGen/X86/negative_zero.ll b/test/CodeGen/X86/negative_zero.ll
index 3c47b8f1fddd..29474c21f244 100644
--- a/test/CodeGen/X86/negative_zero.ll
+++ b/test/CodeGen/X86/negative_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3 | grep fchs
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
 
 
 define double @T() {
diff --git a/test/CodeGen/X86/nobt.ll b/test/CodeGen/X86/nobt.ll
index 55294280f5c8..35090e372916 100644
--- a/test/CodeGen/X86/nobt.ll
+++ b/test/CodeGen/X86/nobt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep btl
+; RUN: llc < %s -march=x86 | not grep btl
 
 ; This tests some cases where BT must not be generated.  See also bt.ll.
 ; Fixes 20040709-[12].c in gcc testsuite.
diff --git a/test/CodeGen/X86/nofence.ll b/test/CodeGen/X86/nofence.ll
index 132ac9437da9..244d2e9780de 100644
--- a/test/CodeGen/X86/nofence.ll
+++ b/test/CodeGen/X86/nofence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep fence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep fence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/omit-label.ll b/test/CodeGen/X86/omit-label.ll
index 457b66b35dca..0ec03ebace89 100644
--- a/test/CodeGen/X86/omit-label.ll
+++ b/test/CodeGen/X86/omit-label.ll
@@ -1,7 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep BB1_1:
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-linux-gnu | FileCheck %s
 ; PR4126
+; PR4732
 
-; Don't omit this label's definition.
+; Don't omit these labels' definitions.
+
+; CHECK: bux:
+; CHECK: LBB1_1:
 
 define void @bux(i32 %p_53) nounwind optsize {
 entry:
@@ -21,3 +25,33 @@ bb3:		; preds = %bb.i, %entry
 }
 
 declare i32 @baz(...)
+
+; Don't omit this label in the assembly output.
+; CHECK: int321:
+; CHECK: LBB2_1
+; CHECK: LBB2_1
+; CHECK: LBB2_1:
+
+define void @int321(i8 signext %p_103, i32 %uint8p_104) nounwind readnone {
+entry:
+  %tobool = icmp eq i8 %p_103, 0                  ; <i1> [#uses=1]
+  %cmp.i = icmp sgt i8 %p_103, 0                  ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool, %cmp.i               ; <i1> [#uses=1]
+  br i1 %or.cond, label %land.end.i, label %for.cond.preheader
+
+land.end.i:                                       ; preds = %entry
+  %conv3.i = sext i8 %p_103 to i32                ; <i32> [#uses=1]
+  %div.i = sdiv i32 1, %conv3.i                   ; <i32> [#uses=1]
+  %tobool.i = icmp eq i32 %div.i, -2147483647     ; <i1> [#uses=0]
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %land.end.i, %entry
+  %cmp = icmp sgt i8 %p_103, 1                    ; <i1> [#uses=1]
+  br i1 %cmp, label %for.end.split, label %for.cond
+
+for.cond:                                         ; preds = %for.cond.preheader, %for.cond
+  br label %for.cond
+
+for.end.split:                                    ; preds = %for.cond.preheader
+  ret void
+}
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index 322850c5523f..fa2aef517477 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movw | count 1
+; RUN: llc < %s -march=x86 | grep movw | count 1
 
 define i16 @t() signext  {
 entry:
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 90c14565e9a6..162c7a568fdf 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep cmov
+; RUN: llc < %s -march=x86 | not grep cmov
 
 ; LSR should be able to eliminate the max computations by
 ; making the loops use slt/ult comparisons instead of ne comparisons.
diff --git a/test/CodeGen/X86/optimize-max-1.ll b/test/CodeGen/X86/optimize-max-1.ll
index 084e1818f5dd..ad6c24dce009 100644
--- a/test/CodeGen/X86/optimize-max-1.ll
+++ b/test/CodeGen/X86/optimize-max-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep cmov
+; RUN: llc < %s -march=x86-64 | not grep cmov
 
 ; LSR should be able to eliminate both smax and umax expressions
 ; in loop trip counts.
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
index effc3fc737d9..8851c5b1a305 100644
--- a/test/CodeGen/X86/optimize-max-2.ll
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep cmov %t | count 2
 ; RUN: grep jne %t | count 1
 
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 20886d5793ca..9ebf8901b77c 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  | not grep set
+; RUN: llc < %s -march=x86  | not grep set
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index 7584a70b5a7d..c1fc041e7d9b 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -6,7 +6,7 @@
 
 ; Check that the shift gets turned into an LEA.
 
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov E.X, E.X}
 
 @G = external global i32                ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/packed_struct.ll b/test/CodeGen/X86/packed_struct.ll
index 2a781e7e546b..da6e8f8745fe 100644
--- a/test/CodeGen/X86/packed_struct.ll
+++ b/test/CodeGen/X86/packed_struct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep foos+5 %t
 ; RUN: grep foos+1 %t
 ; RUN: grep foos+9 %t
@@ -15,7 +15,7 @@ target triple = "i686-pc-linux-gnu"
 @foos = external global %struct.anon		; <%struct.anon*> [#uses=3]
 @bara = weak global [4 x <{ i32, i8 }>] zeroinitializer		; <[4 x <{ i32, i8 }>]*> [#uses=2]
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
 	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
@@ -25,7 +25,7 @@ entry:
 	ret i32 %tmp7
 }
 
-define i8 @bar() {
+define i8 @bar() nounwind {
 entry:
 	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
 	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
index 8dcd23ae735d..e521d8e37854 100644
--- a/test/CodeGen/X86/peep-test-0.ll
+++ b/test/CodeGen/X86/peep-test-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep cmp %t
 ; RUN: not grep test %t
 
diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
index 85e3bf251133..f83f0f6aa6ff 100644
--- a/test/CodeGen/X86/peep-test-1.ll
+++ b/test/CodeGen/X86/peep-test-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep dec %t | count 1
 ; RUN: not grep test %t
 ; RUN: not grep cmp %t
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
index 788f610365cc..274517297592 100644
--- a/test/CodeGen/X86/peep-test-2.ll
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep testl
+; RUN: llc < %s -march=x86 | grep testl
 
 ; It's tempting to eliminate the testl instruction here and just use the
 ; EFLAGS value from the incl, however it can't be known whether the add
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
new file mode 100644
index 000000000000..13a69edea57f
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; rdar://7226797
+
+; LLVM should omit the testl and use the flags result from the orl.
+
+; CHECK: or:
+define void @or(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      orl %ecx, %edx
+; CHECK-NEXT: je
+  %3 = or i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: xor:
+define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+; CHECK:      xorl $1, %e
+; CHECK-NEXT: je
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+  %3 = xor i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: and:
+define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      andl  $3, %
+; CHECK-NEXT: movb  %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %t = trunc i32 %3 to i8
+  store i8 %t, i8* %p
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Just like @and, but without the trunc+store. This should use a testl
+; instead of an andl.
+; CHECK: test:
+define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      testb $3, %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index e6c88bbff9d5..e4ab2b5e05a4 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
 
 define float @foo(<8 x float> %a) nounwind {
   %c = extractelement <8 x float> %a, i32 3
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 77332d02a933..5e18044e7e1b 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
new file mode 100644
index 000000000000..5acf04cc06c1
--- /dev/null
+++ b/test/CodeGen/X86/personality.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+	invoke void @_Z1gv( )
+			to label %return unwind label %unwind
+
+unwind:		; preds = %entry
+	br i1 false, label %eh_then, label %cleanup20
+
+eh_then:		; preds = %unwind
+	invoke void @__cxa_end_catch( )
+			to label %return unwind label %unwind10
+
+unwind10:		; preds = %eh_then
+	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
+	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
+	br i1 %tmp18, label %filter, label %cleanup20
+
+filter:		; preds = %unwind10
+	unreachable
+
+cleanup20:		; preds = %unwind10, %unwind
+	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
+	ret void
+
+return:		; preds = %eh_then, %entry
+	ret void
+}
+
+declare void @_Z1gv()
+
+declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
+
+declare void @__gxx_personality_v0()
+
+declare void @__cxa_end_catch()
+
+; X64: Leh_frame_common_begin:
+; X64: .long	___gxx_personality_v0@GOTPCREL+4
+
+; X32: Leh_frame_common_begin:
+; X32: .long	L___gxx_personality_v0$non_lazy_ptr-
+; ....
+
+; X32: .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32: L___gxx_personality_v0$non_lazy_ptr:
+; X32:   .indirect_symbol ___gxx_personality_v0
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 7ca3ea8e9146..23c509c9936b 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 5
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 3bbc55da16ab..2c855ce8da63 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
 
 	%struct.dpoint = type { double, double }
 
diff --git a/test/CodeGen/X86/pic-load-remat.ll b/test/CodeGen/X86/pic-load-remat.ll
index cb4e64044ded..77297521cd0d 100644
--- a/test/CodeGen/X86/pic-load-remat.ll
+++ b/test/CodeGen/X86/pic-load-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
 
 define void @f() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
new file mode 100644
index 000000000000..3a547f95f83f
--- /dev/null
+++ b/test/CodeGen/X86/pic.ll
@@ -0,0 +1,208 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false | FileCheck %s -check-prefix=LINUX
+
+@ptr = external global i32* 
+@dst = external global i32 
+@src = external global i32 
+
+define void @test1() nounwind {
+entry:
+    store i32* @dst, i32** @ptr
+    %tmp.s = load i32* @src
+    store i32 %tmp.s, i32* @dst
+    ret void
+    
+; LINUX:    test1:
+; LINUX:	call	.L1$pb
+; LINUX-NEXT: .L1$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref1-.L1$pb),
+; LINUX:	movl	dst@GOT(%eax),
+; LINUX:	movl	ptr@GOT(%eax),
+; LINUX:	movl	src@GOT(%eax),
+; LINUX:	ret
+}
+
+@ptr2 = global i32* null
+@dst2 = global i32 0
+@src2 = global i32 0
+
+define void @test2() nounwind {
+entry:
+    store i32* @dst2, i32** @ptr2
+    %tmp.s = load i32* @src2
+    store i32 %tmp.s, i32* @dst2
+    ret void
+    
+; LINUX: test2:
+; LINUX:	call	.L2$pb
+; LINUX-NEXT: .L2$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref2-.L2$pb), %eax
+; LINUX:	movl	dst2@GOT(%eax),
+; LINUX:	movl	ptr2@GOT(%eax),
+; LINUX:	movl	src2@GOT(%eax),
+; LINUX:	ret
+
+}
+
+declare i8* @malloc(i32)
+
+define void @test3() nounwind {
+entry:
+    %ptr = call i8* @malloc(i32 40)
+    ret void
+; LINUX: test3:
+; LINUX: 	pushl	%ebx
+; LINUX-NEXT: 	subl	$8, %esp
+; LINUX-NEXT: 	call	.L3$pb
+; LINUX-NEXT: .L3$pb:
+; LINUX-NEXT: 	popl	%ebx
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref3-.L3$pb), %ebx
+; LINUX: 	movl	$40, (%esp)
+; LINUX: 	call	malloc@PLT
+; LINUX: 	addl	$8, %esp
+; LINUX: 	popl	%ebx
+; LINUX: 	ret
+}
+
+@pfoo = external global void(...)* 
+
+define void @test4() nounwind {
+entry:
+    %tmp = call void(...)*(...)* @afoo()
+    store void(...)* %tmp, void(...)** @pfoo
+    %tmp1 = load void(...)** @pfoo
+    call void(...)* %tmp1()
+    ret void
+; LINUX: test4:
+; LINUX: 	call	.L4$pb
+; LINUX-NEXT: .L4$pb:
+; LINUX: 	popl
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref4-.L4$pb),
+; LINUX: 	movl	pfoo@GOT(%esi),
+; LINUX: 	call	afoo@PLT
+; LINUX: 	call	*
+}
+
+declare void(...)* @afoo(...)
+
+define void @test5() nounwind {
+entry:
+    call void(...)* @foo()
+    ret void
+; LINUX: test5:
+; LINUX: call	.L5$pb
+; LINUX: popl	%ebx
+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref5-.L5$pb), %ebx
+; LINUX: call	foo@PLT
+}
+
+declare void @foo(...)
+
+
+@ptr6 = internal global i32* null
+@dst6 = internal global i32 0
+@src6 = internal global i32 0
+
+define void @test6() nounwind {
+entry:
+    store i32* @dst6, i32** @ptr6
+    %tmp.s = load i32* @src6
+    store i32 %tmp.s, i32* @dst6
+    ret void
+    
+; LINUX: test6:
+; LINUX: 	call	.L6$pb
+; LINUX-NEXT: .L6$pb:
+; LINUX-NEXT: 	popl	%eax
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref6-.L6$pb), %eax
+; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)
+; LINUX: 	movl	src6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, dst6@GOTOFF(%eax)
+; LINUX: 	ret
+}
+
+
+;; Test constant pool references.
+define double @test7(i32 %a.u) nounwind {
+entry:
+    %tmp = icmp eq i32 %a.u,0
+    %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
+    ret double %retval
+
+; LINUX: .LCPI7_0:
+
+; LINUX: test7:
+; LINUX:    call .L7$pb
+; LINUX: .L7$pb:
+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref7-.L7$pb), 
+; LINUX:    fldl	.LCPI7_0@GOTOFF(
+}
+
+
+;; Test jump table references.
+define void @test8(i32 %n.u) nounwind {
+entry:
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+    
+; LINUX: test8:
+; LINUX:   call	.L8$pb
+; LINUX: .L8$pb:
+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref8-.L8$pb),
+; LINUX:   addl	.LJTI8_0@GOTOFF(
+; LINUX:   jmpl	*%ecx
+
+; LINUX: .LJTI8_0:
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+; LINUX:   .long	 .LBB8_3@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 04245d149a8c..b3750c1e8e67 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
-; RUN: llvm-as < %s | llc                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
+; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
 
 declare void @_Z3bari(i32)
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index e00d1e50e49b..e2746a8c0638 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 12
 
diff --git a/test/CodeGen/X86/postalloc-coalescing.ll b/test/CodeGen/X86/postalloc-coalescing.ll
index 9c44a5a7075d..a171436543c6 100644
--- a/test/CodeGen/X86/postalloc-coalescing.ll
+++ b/test/CodeGen/X86/postalloc-coalescing.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define fastcc i32 @_Z18yy_get_next_bufferv() {
 entry:
diff --git a/test/CodeGen/X86/pr1462.ll b/test/CodeGen/X86/pr1462.ll
index 7f9037a137df..62549a50356a 100644
--- a/test/CodeGen/X86/pr1462.ll
+++ b/test/CodeGen/X86/pr1462.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1462
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-
diff --git a/test/CodeGen/X86/pr1489.ll b/test/CodeGen/X86/pr1489.ll
index 10fa96a3b81d..c9e24bfb13fa 100644
--- a/test/CodeGen/X86/pr1489.ll
+++ b/test/CodeGen/X86/pr1489.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llc -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
-; RUN: llvm-as < %s | llc -disable-fp-elim -O0 -mcpu=i486 | grep 3058016715 | count 1
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep -- -1236950581 | count 1
 ;; magic constants are 3.999f and half of 3.999
 ; ModuleID = '1489.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 @.str = internal constant [13 x i8] c"%d %d %d %d\0A\00"		; <[13 x i8]*> [#uses=1]
 
-define i32 @quux() {
+define i32 @quux() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -16,7 +16,7 @@ entry:
 
 declare i32 @lrintf(float)
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrint( double 3.999000e+00 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -26,7 +26,7 @@ entry:
 
 declare i32 @lrint(double)
 
-define i32 @bar() {
+define i32 @bar() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -34,7 +34,7 @@ entry:
 	ret i32 %tmp23
 }
 
-define i32 @baz() {
+define i32 @baz() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -42,7 +42,7 @@ entry:
 	ret i32 %tmp23
 }
 
-define i32 @main() {
+define i32 @main() nounwind {
 entry:
 	%tmp = tail call i32 @baz( )		; <i32> [#uses=1]
 	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/pr1505.ll b/test/CodeGen/X86/pr1505.ll
index e9e3d9060958..883a806f38de 100644
--- a/test/CodeGen/X86/pr1505.ll
+++ b/test/CodeGen/X86/pr1505.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | not grep fldl
+; RUN: llc < %s -mcpu=i486 | not grep fldl
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index c70e32760216..12736cda4cd2 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 4
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstps | count 3
+; RUN: llc < %s -mcpu=i486 | grep fstpl | count 4
+; RUN: llc < %s -mcpu=i486 | grep fstps | count 3
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr2177.ll b/test/CodeGen/X86/pr2177.ll
index b03c99095725..e941bf7fdabe 100644
--- a/test/CodeGen/X86/pr2177.ll
+++ b/test/CodeGen/X86/pr2177.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2177
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index f65725db8bdc..f97663c6c1ff 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {addl	\$3, (%eax)} | count 4
+; RUN: llc < %s | grep {addl	\$3, (%eax)} | count 4
 ; PR2182
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr2326.ll b/test/CodeGen/X86/pr2326.ll
index 6cf750c6d4b0..f82dcb5d678f 100644
--- a/test/CodeGen/X86/pr2326.ll
+++ b/test/CodeGen/X86/pr2326.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sete
+; RUN: llc < %s -march=x86 | grep sete
 ; PR2326
 
 define i32 @func_59(i32 %p_60) nounwind  {
diff --git a/test/CodeGen/X86/pr2623.ll b/test/CodeGen/X86/pr2623.ll
index 51c86b75dd2d..5d0eb5da2155 100644
--- a/test/CodeGen/X86/pr2623.ll
+++ b/test/CodeGen/X86/pr2623.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2623
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 96976b8e466a..afd71143c458 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
 ; PR2656
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index 00e6e7bd8303..0760e4c7fd5b 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
 ; PR2659
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2849.ll b/test/CodeGen/X86/pr2849.ll
index 673598fe7249..0fec4813e109 100644
--- a/test/CodeGen/X86/pr2849.ll
+++ b/test/CodeGen/X86/pr2849.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2849
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2924.ll b/test/CodeGen/X86/pr2924.ll
index 2cab56311659..b9e8dc1740d9 100644
--- a/test/CodeGen/X86/pr2924.ll
+++ b/test/CodeGen/X86/pr2924.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2924
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr2982.ll b/test/CodeGen/X86/pr2982.ll
index f5dc1f4b9a41..3f9a5953153b 100644
--- a/test/CodeGen/X86/pr2982.ll
+++ b/test/CodeGen/X86/pr2982.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2982
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
index 73f51018817a..18df97c72302 100644
--- a/test/CodeGen/X86/pr3154.ll
+++ b/test/CodeGen/X86/pr3154.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mattr=+sse2
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
 ; PR3154
 
 define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
index fdc814ef3376..38c9f324ccac 100644
--- a/test/CodeGen/X86/pr3216.ll
+++ b/test/CodeGen/X86/pr3216.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {sar.	\$5}
+; RUN: llc < %s -march=x86 | grep {sar.	\$5}
 
 @foo = global i8 127
 
diff --git a/test/CodeGen/X86/pr3241.ll b/test/CodeGen/X86/pr3241.ll
index 665a763f34f1..2f7917b77c39 100644
--- a/test/CodeGen/X86/pr3241.ll
+++ b/test/CodeGen/X86/pr3241.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3241
 
 @g_620 = external global i32
diff --git a/test/CodeGen/X86/pr3243.ll b/test/CodeGen/X86/pr3243.ll
index 7be887b38e48..483b5bf3a2a6 100644
--- a/test/CodeGen/X86/pr3243.ll
+++ b/test/CodeGen/X86/pr3243.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3243
 
 declare signext i16 @safe_mul_func_int16_t_s_s(i16 signext, i32) nounwind readnone optsize
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
index 0765f86405c5..2598c2f976b2 100644
--- a/test/CodeGen/X86/pr3244.ll
+++ b/test/CodeGen/X86/pr3244.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3244
 
 @g_62 = external global i16             ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/pr3250.ll b/test/CodeGen/X86/pr3250.ll
index dce154f1855c..cccbf54bcc6b 100644
--- a/test/CodeGen/X86/pr3250.ll
+++ b/test/CodeGen/X86/pr3250.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3250
 
 declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
diff --git a/test/CodeGen/X86/pr3317.ll b/test/CodeGen/X86/pr3317.ll
index aa5ee7ce7c8d..9d6626b324d5 100644
--- a/test/CodeGen/X86/pr3317.ll
+++ b/test/CodeGen/X86/pr3317.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3317
 
         %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
diff --git a/test/CodeGen/X86/pr3366.ll b/test/CodeGen/X86/pr3366.ll
index a6f3e92676ae..f813e2e58801 100644
--- a/test/CodeGen/X86/pr3366.ll
+++ b/test/CodeGen/X86/pr3366.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movzbl
+; RUN: llc < %s -march=x86 | grep movzbl
 ; PR3366
 
 define void @_ada_c34002a() nounwind {
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index d4a98103ecc5..f7af927d6136 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep fstpt
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
 ; PR3457
 ; rdar://6548010
 
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
index f67ff75d46ae..1372a1522bd4 100644
--- a/test/CodeGen/X86/pr3495-2.ll
+++ b/test/CodeGen/X86/pr3495-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
 
 target triple = "i386-apple-darwin9.6"
 	%struct.constraintVCGType = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index ca6204c101e9..4b62bf40da4b 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
+; RUN: llc < %s -march=x86 -stats |& grep {Number of reloads omited} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of available reloads turned into copies} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 40
 ; PR3495
 ; The loop reversal kicks in once here, resulting in one fewer instruction.
 
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index f743700fd251..7cdeaa099271 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
 ; PR3522
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
index 4f9a5820e043..e89b507414eb 100644
--- a/test/CodeGen/X86/pre-split1.ll
+++ b/test/CodeGen/X86/pre-split1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 ; XFAIL: *
 
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
index 60297e9a5dc6..db039bd97acd 100644
--- a/test/CodeGen/X86/pre-split10.ll
+++ b/test/CodeGen/X86/pre-split10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll
new file mode 100644
index 000000000000..0a9f4e33f34c
--- /dev/null
+++ b/test/CodeGen/X86/pre-split11.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+
+@.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+@.str2 = private constant [35 x i8] c"double to uint   ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1]
+@.str3 = private constant [37 x i8] c"double to ulong  ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+; CHECK: movsd %xmm0, (%rsp)
+entry:
+  %0 = icmp sgt i32 %argc, 4                      ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds i8** %argv, i64 4   ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 8                      ; <i8*> [#uses=1]
+  %3 = tail call double @atof(i8* %2) nounwind    ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4]
+  %4 = fptoui double %storemerge to i32           ; <i32> [#uses=2]
+  %5 = fptoui double %storemerge to i64           ; <i64> [#uses=2]
+  %6 = fptosi double %storemerge to i64           ; <i64> [#uses=2]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0]
+  %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0]
+  %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0]
+  %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare double @atof(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
index 2009ad8b66d8..ba902f95513d 100644
--- a/test/CodeGen/X86/pre-split2.ll
+++ b/test/CodeGen/X86/pre-split2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | count 2
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
index f34f1447edda..2e314207c3e3 100644
--- a/test/CodeGen/X86/pre-split3.ll
+++ b/test/CodeGen/X86/pre-split3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
index a570f7304f37..10cef276c62f 100644
--- a/test/CodeGen/X86/pre-split4.ll
+++ b/test/CodeGen/X86/pre-split4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
index b83003f30fea..8def460809f2 100644
--- a/test/CodeGen/X86/pre-split5.ll
+++ b/test/CodeGen/X86/pre-split5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 target triple = "i386-apple-darwin9.5"
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
index e771b8067c21..d38e63088d1c 100644
--- a/test/CodeGen/X86/pre-split6.ll
+++ b/test/CodeGen/X86/pre-split6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
index cd9d205a7138..0b81c0bc09fe 100644
--- a/test/CodeGen/X86/pre-split7.ll
+++ b/test/CodeGen/X86/pre-split7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 @object_distance = external global double, align 8		; <double*> [#uses=1]
 @axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index 22598195ed12..ea4b9496b3c3 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index 1be960f53a54..c27d925d43e4 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index d6517f7ef5b1..fac5915aae88 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse > %t
+; RUN: llc < %s -march=x86 -mattr=+sse > %t
 ; RUN: grep prefetchnta %t
 ; RUN: grep prefetcht0 %t
 ; RUN: grep prefetcht1 %t
diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
index 747812856773..8aa744ead8ca 100644
--- a/test/CodeGen/X86/private-2.ll
+++ b/test/CodeGen/X86/private-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
 ; Quote should be outside of private prefix.
 ; rdar://6855766x
 
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index caf1035c3433..22b6f35a70ef 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -1,9 +1,9 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep .Lfoo:
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep .Lbaz:
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
 
 declare void @foo()
 
diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll
new file mode 100644
index 000000000000..72a428ea3208
--- /dev/null
+++ b/test/CodeGen/X86/ptrtoint-constexpr.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
+	%union.x = type { i64 }
+
+; CHECK:	.globl r
+; CHECK: r:
+; CHECK: .quad	((r) & 4294967295)
+
+@r = global %union.x { i64 ptrtoint (%union.x* @r to i64) }, align 4
diff --git a/test/CodeGen/X86/rdtsc.ll b/test/CodeGen/X86/rdtsc.ll
index f5d947fcbabb..f21a44c36073 100644
--- a/test/CodeGen/X86/rdtsc.ll
+++ b/test/CodeGen/X86/rdtsc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep rdtsc
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rdtsc
+; RUN: llc < %s -march=x86 | grep rdtsc
+; RUN: llc < %s -march=x86-64 | grep rdtsc
 declare i64 @llvm.readcyclecounter()
 
 define i64 @foo() {
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index 60e16b05ca75..1ffb4e3c78f6 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
-; RUN: not grep subq %t
-; RUN: not grep addq %t
-; RUN: grep {\\-4(%%rsp)} %t | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-red-zone > %t
-; RUN: grep subq %t | count 1
-; RUN: grep addq %t | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
+; First without noredzone.
+; CHECK: f0:
+; CHECK: -4(%rsp)
+; CHECK: -4(%rsp)
+; CHECK: ret
 define x86_fp80 @f0(float %f) nounwind readnone {
 entry:
 	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %0
 }
+
+; Then with noredzone.
+; CHECK: f1:
+; CHECK: subq $4, %rsp
+; CHECK: (%rsp)
+; CHECK: (%rsp)
+; CHECK: addq $4, %rsp
+; CHECK: ret
+define x86_fp80 @f1(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index dea7d7eb0ea4..9557d17150ec 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index 6d8cfbb781f9..e0b5f7a870bb 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,7 +1,7 @@
 ;; Both functions in this testcase should codegen to the same function, and
 ;; neither of them should require spilling anything to the stack.
 
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats |& \
 ; RUN:   not grep {Number of register spills}
 
 ;; This can be compiled to use three registers if the loads are not
diff --git a/test/CodeGen/X86/rem-2.ll b/test/CodeGen/X86/rem-2.ll
index 3e17fc0b4309..1b2af4b87a32 100644
--- a/test/CodeGen/X86/rem-2.ll
+++ b/test/CodeGen/X86/rem-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep cltd
+; RUN: llc < %s -march=x86 | not grep cltd
 
 define i32 @test(i32 %X) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
index bba1f9b96bb4..394070ecdf23 100644
--- a/test/CodeGen/X86/rem.ll
+++ b/test/CodeGen/X86/rem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep div
+; RUN: llc < %s -march=x86 | not grep div
 
 define i32 @test1(i32 %X) {
         %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index 8dfed5ed52e2..3e813209d410 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
 
 declare void @bar() nounwind
 
diff --git a/test/CodeGen/X86/remat-mov-1.ll b/test/CodeGen/X86/remat-mov-1.ll
index 98b7bb45e9e7..d71b7a5b910a 100644
--- a/test/CodeGen/X86/remat-mov-1.ll
+++ b/test/CodeGen/X86/remat-mov-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 4294967295 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep -- -1 | grep mov | count 2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
new file mode 100644
index 000000000000..790ae83c2b2b
--- /dev/null
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
+; RUN: not grep xor %t
+; RUN: not grep movap %t
+; RUN: grep {\\.zero} %t
+
+; Remat should be able to fold the zero constant into the div instructions
+; as a constant-pool load.
+
+define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
+  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
+  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
+  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
+  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
+  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
+  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
+  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
+  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
+  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
+  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
+  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
+  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
+  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
+  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
+  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
+  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
+  store double %div, double* %y
+  %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+  store double %div70, double* %arrayidx72
+  %div74 = fdiv double %tmp9, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+  store double %div74, double* %arrayidx76
+  %div78 = fdiv double %tmp13, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+  store double %div78, double* %arrayidx80
+  %div82 = fdiv double %tmp17, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+  store double %div82, double* %arrayidx84
+  %div86 = fdiv double %tmp21, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+  store double %div86, double* %arrayidx88
+  %div90 = fdiv double %tmp25, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+  store double %div90, double* %arrayidx92
+  %div94 = fdiv double %tmp29, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+  store double %div94, double* %arrayidx96
+  %div98 = fdiv double %tmp33, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+  store double %div98, double* %arrayidx100
+  %div102 = fdiv double %tmp37, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+  store double %div102, double* %arrayidx104
+  %div106 = fdiv double %tmp41, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+  store double %div106, double* %arrayidx108
+  %div110 = fdiv double %tmp45, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+  store double %div110, double* %arrayidx112
+  %div114 = fdiv double %tmp49, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+  store double %div114, double* %arrayidx116
+  %div118 = fdiv double %tmp53, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+  store double %div118, double* %arrayidx120
+  %div122 = fdiv double %tmp57, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+  store double %div122, double* %arrayidx124
+  %div126 = fdiv double %tmp61, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+  store double %div126, double* %arrayidx128
+  %div130 = fdiv double %tmp65, 0.000000e+00      ; <double> [#uses=1]
+  %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+  store double %div130, double* %arrayidx132
+  ret void
+}
diff --git a/test/CodeGen/X86/ret-addr.ll b/test/CodeGen/X86/ret-addr.ll
index 06a10c6a30f0..b7b57ab3b842 100644
--- a/test/CodeGen/X86/ret-addr.ll
+++ b/test/CodeGen/X86/ret-addr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -disable-fp-elim -march=x86 | not grep xor
-; RUN: llvm-as < %s | llc -disable-fp-elim -march=x86-64 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86-64 | not grep xor
 
 define i8* @h() nounwind readnone optsize {
 entry:
diff --git a/test/CodeGen/X86/ret-i64-0.ll b/test/CodeGen/X86/ret-i64-0.ll
index c59e4cf9439e..bca0f056b90d 100644
--- a/test/CodeGen/X86/ret-i64-0.ll
+++ b/test/CodeGen/X86/ret-i64-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
+; RUN: llc < %s -march=x86 | grep xor | count 2
 
 define i64 @foo() nounwind {
   ret i64 0
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 178ff4e8f7e0..04b57dd8d6c0 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx,+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
diff --git a/test/CodeGen/X86/rip-rel-address.ll b/test/CodeGen/X86/rip-rel-address.ll
index 2c0926a65443..24ff07b4b219 100644
--- a/test/CodeGen/X86/rip-rel-address.ll
+++ b/test/CodeGen/X86/rip-rel-address.ll
@@ -1,7 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static | grep {a(%rip)}
+; RUN: llc < %s -march=x86-64 -relocation-model=pic -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=STATIC64
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
 
 @a = internal global double 3.4
 define double @foo() nounwind {
   %a = load double* @a
   ret double %a
+  
+; PIC64:    movsd	_a(%rip), %xmm0
+; STATIC64: movsd	a(%rip), %xmm0
 }
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index b800e098ce25..276f8bb48d06 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -relocation-model=static | grep rodata | count 3
-; RUN: llvm-as < %s | llc -relocation-model=static | grep -F "rodata.cst" | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep rodata | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.ro" | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel" | count 4
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.local" | count 1
+; RUN: llc < %s -relocation-model=static | grep rodata | count 3
+; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
+; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
index c196ce2cc139..42ece47b0300 100644
--- a/test/CodeGen/X86/rot16.ll
+++ b/test/CodeGen/X86/rot16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index 7cebcb86ce12..655ed272837a 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
index 2408359a141d..4e082bb860b4 100644
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll
index c567c0d33cf2..1e20273194d5 100644
--- a/test/CodeGen/X86/rotate.ll
+++ b/test/CodeGen/X86/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {ro\[rl\]} | count 12
 
 define i32 @rotl32(i32 %A, i8 %Amt) {
diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll
index 40e954cbdd01..2eea3999e7b8 100644
--- a/test/CodeGen/X86/rotate2.ll
+++ b/test/CodeGen/X86/rotate2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rol | count 2
+; RUN: llc < %s -march=x86-64 | grep rol | count 2
 
 define i64 @test1(i64 %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/scalar-extract.ll b/test/CodeGen/X86/scalar-extract.ll
index 172c424a782f..284583840933 100644
--- a/test/CodeGen/X86/scalar-extract.ll
+++ b/test/CodeGen/X86/scalar-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+mmx -o %t
 ; RUN: not grep movq  %t
 
 ; Check that widening doesn't introduce a mmx register in this case when
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index 6a6283a10dab..fe40758d8ecd 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -1,20 +1,20 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep min | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep max | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep min | count 1
+; RUN: llc < %s -march=x86-64 | grep max | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 
 declare float @bar()
 
-define float @foo(float %a)
+define float @foo(float %a) nounwind
 {
   %s = call float @bar()
   %t = fcmp olt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
-define float @hem(float %a)
+define float @hem(float %a) nounwind
 {
   %s = call float @bar()
-  %t = fcmp uge float %s, %a
+  %t = fcmp ogt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
diff --git a/test/CodeGen/X86/scalar_sse_minmax.ll b/test/CodeGen/X86/scalar_sse_minmax.ll
index 8c030b88440d..bc4ab5d836c7 100644
--- a/test/CodeGen/X86/scalar_sse_minmax.ll
+++ b/test/CodeGen/X86/scalar_sse_minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
 ; RUN:   grep mins | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
 ; RUN:   grep maxs | count 2
 
 declare i1 @llvm.isunordered.f64(double, double)
diff --git a/test/CodeGen/X86/scalarize-bitcast.ll b/test/CodeGen/X86/scalarize-bitcast.ll
index a07f9396040e..f6b29ecfbb60 100644
--- a/test/CodeGen/X86/scalarize-bitcast.ll
+++ b/test/CodeGen/X86/scalarize-bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3886
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
index b253dd975ff0..81c919f8dfff 100644
--- a/test/CodeGen/X86/scev-interchange.ll
+++ b/test/CodeGen/X86/scev-interchange.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
 	%"struct.DataOutBase::GmvFlags" = type { i32 }
 	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
-	%"struct.FE_Q<3>" = type { %"struct.FE_DGPNonparametric<3>", %"struct.std::vector<int,std::allocator<int> >" }
 	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.Line = type { [2 x i32] }
 	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
@@ -12,9 +10,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
 	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
 	%"struct.TableIndices<2>" = type { %struct.Line }
-	%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
-	%struct.pthread_attr_t = type { i64, [48 x i8] }
-	%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
 	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
 	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
 	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
@@ -34,21 +29,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
 	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_init		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.DataOutBase::GmvFlags"*, i32)* @pthread_mutexattr_settype		; <i32 (%"struct.DataOutBase::GmvFlags"*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_destroy		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-
 declare void @_Unwind_Resume(i8*)
 
 declare i8* @_Znwm(i64)
@@ -71,7 +51,7 @@ declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vecto
 
 declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
 
-define fastcc void @_ZN4FE_QILi3EEC1Ej(%"struct.FE_Q<3>"* %this, i32 %degree) {
+define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) {
 entry:
 	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
 			to label %invcont.i unwind label %lpad.i
@@ -356,31 +336,3 @@ lpad204.i:		; preds = %invcont86.i
 }
 
 declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)
-
-declare i32 @pthread_once(i32*, void ()*)
-
-declare i8* @pthread_getspecific(i32)
-
-declare i32 @pthread_setspecific(i32, i8*)
-
-declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare i32 @pthread_cancel(i64)
-
-declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)
-
-declare i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare i32 @pthread_key_delete(i32)
-
-declare i32 @pthread_mutexattr_init(%"struct.DataOutBase::GmvFlags"*)
-
-declare i32 @pthread_mutexattr_settype(%"struct.DataOutBase::GmvFlags"*, i32)
-
-declare i32 @pthread_mutexattr_destroy(%"struct.DataOutBase::GmvFlags"*)
diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
index 70785e9978fb..c38a02080523 100644
--- a/test/CodeGen/X86/select-zero-one.ll
+++ b/test/CodeGen/X86/select-zero-one.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep cmov
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movzbl | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
 
 @r1 = weak global i32 0
 
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index e5d610125372..95ed9e97cdfd 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium 
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah 
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah  | not grep set
+; RUN: llc < %s -march=x86 -mcpu=pentium 
+; RUN: llc < %s -march=x86 -mcpu=yonah 
+; RUN: llc < %s -march=x86 -mcpu=yonah  | not grep set
 
 define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
 	%X = select i1 %A, i1 %B, i1 %C		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll
index 25a2b7e0b493..4a9c1bacc5f2 100644
--- a/test/CodeGen/X86/setoeq.ll
+++ b/test/CodeGen/X86/setoeq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86  | grep set | count 2
-; RUN: llvm-as < %s | llc -march=x86  | grep and
+; RUN: llc < %s -march=x86  | grep set | count 2
+; RUN: llc < %s -march=x86  | grep and
 
 define zeroext i8 @t(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/setuge.ll b/test/CodeGen/X86/setuge.ll
index 3f1d882754ee..4ca2f1871c0f 100644
--- a/test/CodeGen/X86/setuge.ll
+++ b/test/CodeGen/X86/setuge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  | not grep set
+; RUN: llc < %s -march=x86  | not grep set
 
 declare i1 @llvm.isunordered.f32(float, float)
 
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index a6d1080bd84a..c9b39d3a489e 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movsbl
+; RUN: llc < %s -march=x86 | grep movsbl
 
 define i32 @foo(i32 %X) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sext-ret-val.ll b/test/CodeGen/X86/sext-ret-val.ll
index 946e6c78892e..da1a1871e7e8 100644
--- a/test/CodeGen/X86/sext-ret-val.ll
+++ b/test/CodeGen/X86/sext-ret-val.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movzbl | count 1
+; RUN: llc < %s -march=x86 | grep movzbl | count 1
 ; rdar://6699246
 
 define signext i8 @t1(i8* %A) nounwind readnone ssp {
diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
index 839ebc2b6c17..4aca0407b36f 100644
--- a/test/CodeGen/X86/sext-select.ll
+++ b/test/CodeGen/X86/sext-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movsw
+; RUN: llc < %s -march=x86 | grep movsw
 ; PR2139
 
 declare void @abort()
diff --git a/test/CodeGen/X86/sext-trunc.ll b/test/CodeGen/X86/sext-trunc.ll
index 97b466682702..2eaf42577c70 100644
--- a/test/CodeGen/X86/sext-trunc.ll
+++ b/test/CodeGen/X86/sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep movsbl %t
 ; RUN: not grep movz %t
 ; RUN: not grep and %t
diff --git a/test/CodeGen/X86/sfence.ll b/test/CodeGen/X86/sfence.ll
index fc75ccbcb629..478287919ec4 100644
--- a/test/CodeGen/X86/sfence.ll
+++ b/test/CodeGen/X86/sfence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep sfence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index b6d78a485783..fd278c2239f0 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    | grep and | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep and 
+; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
        %shamt = and i32 %t, 31
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index 4662628b672e..d38f9a88fcd6 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {shld.*CL}
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov CL, BL}
 
 ; PR687
diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
index deb4ed1f309b..4cba1834bf6c 100644
--- a/test/CodeGen/X86/shift-codegen.ll
+++ b/test/CodeGen/X86/shift-codegen.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 | \
+; RUN: llc < %s -relocation-model=static -march=x86 | \
 ; RUN:   grep {shll	\$3} | count 2
 
 ; This should produce two shll instructions, not any lea's.
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index 543bb2237875..e443ac19a80f 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep shrl
+; RUN: llc < %s | not grep shrl
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll
index 24017fe2178a..5adee7c76941 100644
--- a/test/CodeGen/X86/shift-double.ll
+++ b/test/CodeGen/X86/shift-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {sh\[lr\]d} | count 5
 
 define i64 @test1(i64 %X, i8 %C) {
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d26823220ff8..872817fd4953 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep {s\[ah\]\[rl\]l} | count 1
 
 define i32* @test1(i32* %P, i32 %X) {
diff --git a/test/CodeGen/X86/shift-i128.ll b/test/CodeGen/X86/shift-i128.ll
index fc22a3c69139..c4d15ae9053e 100644
--- a/test/CodeGen/X86/shift-i128.ll
+++ b/test/CodeGen/X86/shift-i128.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @t(i128 %x, i128 %a, i128* nocapture %r) nounwind {
 entry:
diff --git a/test/CodeGen/X86/shift-i256.ll b/test/CodeGen/X86/shift-i256.ll
index 4a29b8626c6e..d5f65a6ed18c 100644
--- a/test/CodeGen/X86/shift-i256.ll
+++ b/test/CodeGen/X86/shift-i256.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/shift-one.ll b/test/CodeGen/X86/shift-one.ll
index dd49b7e04cf1..0f80f90c773e 100644
--- a/test/CodeGen/X86/shift-one.ll
+++ b/test/CodeGen/X86/shift-one.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep leal
+; RUN: llc < %s -march=x86 | not grep leal
 
 @x = external global i32                ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll
new file mode 100644
index 000000000000..ce4f538f4de4
--- /dev/null
+++ b/test/CodeGen/X86/shift-parts.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep shrdq
+; PR4736
+
+%0 = type { i32, i8, [35 x i8] }
+
+@g_144 = external global %0, align 8              ; <%0*> [#uses=1]
+
+define i32 @int87(i32 %uint64p_8) nounwind {
+entry:
+  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; <i320> [#uses=1]
+  %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; <i320> [#uses=1]
+  %call3.in = trunc i320 %call3.in.in.in to i32   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call3.in, 0              ; <i1> [#uses=1]
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.cond
+  ret i32 1
+}
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index d3616f4ac5de..445889166bd5 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	8(.esp), %eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {shrl	.eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {movswl	.ax, .eax}
+; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}
+; RUN: llc < %s -march=x86 | grep {shrl	.eax}
+; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
 
 define i32 @test1(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/shrink-fp-const1.ll b/test/CodeGen/X86/shrink-fp-const1.ll
index 3406aeeeb5c5..49b9fa3c4129 100644
--- a/test/CodeGen/X86/shrink-fp-const1.ll
+++ b/test/CodeGen/X86/shrink-fp-const1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
 ; PR1264
 
 define double @foo(double %x) {
diff --git a/test/CodeGen/X86/shrink-fp-const2.ll b/test/CodeGen/X86/shrink-fp-const2.ll
index 7e48b1bba8f1..3d5203be09a0 100644
--- a/test/CodeGen/X86/shrink-fp-const2.ll
+++ b/test/CodeGen/X86/shrink-fp-const2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep flds
+; RUN: llc < %s -march=x86 | grep flds
 ; This should be a flds, not fldt.
 define x86_fp80 @test2() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 27215956b64d..13f932982f14 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,50 +1,48 @@
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
 ; RUN:   grep sin\$ | count 3
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
 ; RUN:   grep cos\$ | count 3
 
-declare float  @sinf(float)
+declare float  @sinf(float) readonly
 
-declare double @sin(double)
+declare double @sin(double) readonly
 
-declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @sinl(x86_fp80) readonly
 
 define float @test1(float %X) {
-        %Y = call float @sinf(float %X)
+        %Y = call float @sinf(float %X) readonly
         ret float %Y
 }
 
 define double @test2(double %X) {
-        %Y = call double @sin(double %X)
+        %Y = call double @sin(double %X) readonly
         ret double %Y
 }
 
 define x86_fp80 @test3(x86_fp80 %X) {
-        %Y = call x86_fp80 @sinl(x86_fp80 %X)
+        %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
 
-declare float @cosf(float)
+declare float @cosf(float) readonly
 
-declare double @cos(double)
+declare double @cos(double) readonly
 
-declare x86_fp80 @cosl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80) readonly
 
 define float @test4(float %X) {
-        %Y = call float @cosf(float %X)
+        %Y = call float @cosf(float %X) readonly
         ret float %Y
 }
 
 define double @test5(double %X) {
-        %Y = call double @cos(double %X)
+        %Y = call double @cos(double %X) readonly
         ret double %Y
 }
 
 define x86_fp80 @test6(x86_fp80 %X) {
-        %Y = call x86_fp80 @cosl(x86_fp80 %X)
+        %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
 
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
new file mode 100644
index 000000000000..0f4e63f9c674
--- /dev/null
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Currently, floating-point selects are lowered to CFG triangles.
+; This means that one side of the select is always unconditionally
+; evaluated, however with MachineSink we can sink the other side so
+; that it's conditionally evaluated.
+
+; CHECK: foo:
+; CHECK-NEXT: divsd
+; CHECK:      testb $1, %dil
+; CHECK-NEXT: jne
+; CHECK:      divsd
+
+define double @foo(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %b = fdiv double %y, 3.3
+  %z = select i1 %c, double %a, double %b
+  ret double %z
+}
+
+; Hoist floating-point constant-pool loads out of loops.
+
+; CHECK: bar:
+; CHECK: movsd
+; CHECK: align
+define void @bar(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %0 = icmp sgt i64 %n, 0
+  br i1 %0, label %bb, label %return
+
+bb:
+  %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
+  %scevgep = getelementptr double* %p, i64 %i.03
+  %1 = load double* %scevgep, align 8
+  %2 = fdiv double 3.200000e+00, %1
+  store double %2, double* %scevgep, align 8
+  %3 = add nsw i64 %i.03, 1
+  %exitcond = icmp eq i64 %3, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index 8b87f7449cde..9ec9182e5e3c 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep movs
+; RUN: llc < %s | not grep movs
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/smul-with-overflow-2.ll b/test/CodeGen/X86/smul-with-overflow-2.ll
index c3dbfd796f20..7c23adba406c 100644
--- a/test/CodeGen/X86/smul-with-overflow-2.ll
+++ b/test/CodeGen/X86/smul-with-overflow-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep add | count 3
+; RUN: llc < %s -march=x86 | grep mul | count 1
+; RUN: llc < %s -march=x86 | grep add | count 3
 
 define i32 @t1(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/smul-with-overflow-3.ll b/test/CodeGen/X86/smul-with-overflow-3.ll
index aa5e67a02998..49c31f56ae83 100644
--- a/test/CodeGen/X86/smul-with-overflow-3.ll
+++ b/test/CodeGen/X86/smul-with-overflow-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jno} | count 1
+; RUN: llc < %s -march=x86 | grep {jno} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 6aefc03a3920..6d125e415e04 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index 0c697def1ec2..a52135dc9087 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    -mattr=+sse2 -soft-float | not grep xmm
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86    -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
 
 	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
diff --git a/test/CodeGen/X86/split-eh-lpad-edges.ll b/test/CodeGen/X86/split-eh-lpad-edges.ll
index 281ee7782da1..fd40a7f70378 100644
--- a/test/CodeGen/X86/split-eh-lpad-edges.ll
+++ b/test/CodeGen/X86/split-eh-lpad-edges.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep jmp
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp
 ; rdar://6647639
 
 	%struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags }
diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
index 0b7804da4e71..07d4d52f97a3 100644
--- a/test/CodeGen/X86/split-select.ll
+++ b/test/CodeGen/X86/split-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep test | count 1
+; RUN: llc < %s -march=x86-64 | grep test | count 1
 
 define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
   %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
diff --git a/test/CodeGen/X86/split-vector-rem.ll b/test/CodeGen/X86/split-vector-rem.ll
index 8c88769be78f..681c6b0beaa0 100644
--- a/test/CodeGen/X86/split-vector-rem.ll
+++ b/test/CodeGen/X86/split-vector-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 16
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fmodf | count 8
+; RUN: llc < %s -march=x86-64 | grep div | count 16
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 8
 
 define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
 	%m = srem <8 x i32> %t, %u
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
index 30e5af41123d..b9455300bdbb 100644
--- a/test/CodeGen/X86/sret.ll
+++ b/test/CodeGen/X86/sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep ret | grep 4
+; RUN: llc < %s -march=x86 | grep ret | grep 4
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index 5a888b2e784b..b12a87d614d2 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-1.ll b/test/CodeGen/X86/sse-align-1.ll
index 0edc6e094580..c7a5cd559120 100644
--- a/test/CodeGen/X86/sse-align-1.ll
+++ b/test/CodeGen/X86/sse-align-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-10.ll b/test/CodeGen/X86/sse-align-10.ll
index 1a23eb2ae3d1..0f9169712556 100644
--- a/test/CodeGen/X86/sse-align-10.ll
+++ b/test/CodeGen/X86/sse-align-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
   %t = load <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index a10b102c6b95..aa1b4370bccf 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=linux | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=linux | grep movups
 
 define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
 entry:
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 297f1c458db9..4f025b916fd9 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep unpck %t | count 2
 ; RUN: grep shuf %t | count 2
 ; RUN: grep ps %t | count 4
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index ba693a200151..102c3fb06cd7 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 5bbcd59e0e9f..c42f7f0bad99 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-4.ll b/test/CodeGen/X86/sse-align-4.ll
index f7e5fe3d684b..4c59934917f3 100644
--- a/test/CodeGen/X86/sse-align-4.ll
+++ b/test/CodeGen/X86/sse-align-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-align-5.ll b/test/CodeGen/X86/sse-align-5.ll
index 19e0eaf8fff8..21cd2311b916 100644
--- a/test/CodeGen/X86/sse-align-5.ll
+++ b/test/CodeGen/X86/sse-align-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
   %t = load <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-align-6.ll b/test/CodeGen/X86/sse-align-6.ll
index dace291730f7..0bbf4228a40b 100644
--- a/test/CodeGen/X86/sse-align-6.ll
+++ b/test/CodeGen/X86/sse-align-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   %t = load <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-7.ll b/test/CodeGen/X86/sse-align-7.ll
index 7fb65b5f9e85..5784481c5ae9 100644
--- a/test/CodeGen/X86/sse-align-7.ll
+++ b/test/CodeGen/X86/sse-align-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-align-8.ll b/test/CodeGen/X86/sse-align-8.ll
index 17a3d2987fff..cfeff8161c5c 100644
--- a/test/CodeGen/X86/sse-align-8.ll
+++ b/test/CodeGen/X86/sse-align-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-9.ll b/test/CodeGen/X86/sse-align-9.ll
index 24b437ab3534..cb26b9535a81 100644
--- a/test/CodeGen/X86/sse-align-9.ll
+++ b/test/CodeGen/X86/sse-align-9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
   %t = load <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index d8c32831a1e9..0e0e4a9a86cf 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep test
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test
 
 define float @tst1(float %a, float %b) {
 	%tmp = tail call float @copysignf( float %b, float %a )
diff --git a/test/CodeGen/X86/sse-load-ret.ll b/test/CodeGen/X86/sse-load-ret.ll
index cbf3eb0e5f0d..1ebcb1a6fa64 100644
--- a/test/CodeGen/X86/sse-load-ret.ll
+++ b/test/CodeGen/X86/sse-load-ret.ll
@@ -1,7 +1,5 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mcpu=yonah | not grep movss
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mcpu=yonah | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 
 define double @test1(double* %P) {
         %X = load double* %P            ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
new file mode 100644
index 000000000000..17ffb5e464aa
--- /dev/null
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -0,0 +1,392 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; Test a few more misc. cases.
+
+; CHECK: clampTo3k_a:
+; CHECK: minsd
+define double @clampTo3k_a(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_b:
+; CHECK: minsd
+define double @clampTo3k_b(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_c:
+; CHECK: maxsd
+define double @clampTo3k_c(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_d:
+; CHECK: maxsd
+define double @clampTo3k_d(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_e:
+; CHECK: maxsd
+define double @clampTo3k_e(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_f:
+; CHECK: maxsd
+define double @clampTo3k_f(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_g:
+; CHECK: minsd
+define double @clampTo3k_g(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_h:
+; CHECK: minsd
+define double @clampTo3k_h(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
diff --git a/test/CodeGen/X86/sse-varargs.ll b/test/CodeGen/X86/sse-varargs.ll
index 806126da2faf..da38f0e148f6 100644
--- a/test/CodeGen/X86/sse-varargs.ll
+++ b/test/CodeGen/X86/sse-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xmm | grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xmm | grep esp
 
 define i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
new file mode 100644
index 000000000000..9f926f2bee7b
--- /dev/null
+++ b/test/CodeGen/X86/sse2.ll
@@ -0,0 +1,34 @@
+; Tests for SSE2 and below, without SSE3+.
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 | FileCheck %s
+
+define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t1:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t2:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
new file mode 100644
index 000000000000..703635c0f53a
--- /dev/null
+++ b/test/CodeGen/X86/sse3.ll
@@ -0,0 +1,273 @@
+; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
+
+; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9\
+; RUN:              | FileCheck %s --check-prefix=X64
+
+; Test for v8xi16 lowering where we extract the first element of the vector and
+; placed it in the second element of the result.
+
+define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+entry:
+	%tmp3 = load <8 x i16>* %old
+	%tmp6 = shufflevector <8 x i16> %tmp3,
+                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
+	store <8 x i16> %tmp6, <8 x i16>* %dest
+	ret void
+        
+; X64: t0:
+; X64: 	movddup	(%rsi), %xmm0
+; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movaps	%xmm0, (%rdi)
+; X64:	ret
+}
+
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp3
+        
+; X64: t1:
+; X64: 	movl	(%rsi), %eax
+; X64: 	movaps	(%rdi), %xmm0
+; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: t2:
+; X64:	pextrw	$1, %xmm1, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	pinsrw	$3, %eax, %xmm0
+; X64:	ret
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
+; X64: t3:
+; X64: 	pextrw	$5, %xmm0, %eax
+; X64: 	pshuflw	$44, %xmm0, %xmm0
+; X64: 	pshufhw	$27, %xmm0, %xmm0
+; X64: 	pinsrw	$3, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
+; X64: t4:
+; X64: 	pextrw	$7, %xmm0, %eax
+; X64: 	pshufhw	$100, %xmm0, %xmm1
+; X64: 	pinsrw	$1, %eax, %xmm1
+; X64: 	pextrw	$1, %xmm0, %eax
+; X64: 	movaps	%xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+	ret <8 x i16> %tmp
+; X64: 	t5:
+; X64: 		movlhps	%xmm1, %xmm0
+; X64: 		pshufd	$114, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t6:
+; X64: 		movss	%xmm1, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t7:
+; X64: 		pshuflw	$-80, %xmm0, %xmm0
+; X64: 		pshufhw	$-56, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
+	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
+	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
+	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
+	ret void
+; X64: 	t8:
+; X64: 		pshuflw	$-58, (%rsi), %xmm0
+; X64: 		pshufhw	$-58, %xmm0, %xmm0
+; X64: 		movaps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+	%tmp = load <4 x float>* %r
+	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
+	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
+	%tmp7 = extractelement <4 x float> %tmp, i32 1		
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
+	store <4 x float> %tmp13, <4 x float>* %r
+	ret void
+; X64: 	t9:
+; X64: 		movsd	(%rsi), %xmm0
+; X64: 		movhps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+
+
+; FIXME: This testcase produces icky code. It can be made much better!
+; PR2585
+
+@g1 = external constant <4 x i32>
+@g2 = external constant <4 x i16>
+
+define internal void @t10() nounwind {
+        load <4 x i32>* @g1, align 16 
+        bitcast <4 x i32> %1 to <8 x i16>
+        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+        bitcast <8 x i16> %3 to <2 x i64>  
+        extractelement <2 x i64> %4, i32 0 
+        bitcast i64 %5 to <4 x i16>        
+        store <4 x i16> %6, <4 x i16>* @g2, align 8
+        ret void
+; X64: 	t10:
+; X64: 		movq	_g1@GOTPCREL(%rip), %rax
+; X64: 		movaps	(%rax), %xmm0
+; X64: 		pextrw	$4, %xmm0, %eax
+; X64: 		movaps	%xmm0, %xmm1
+; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		pshuflw	$8, %xmm1, %xmm1
+; X64: 		pinsrw	$2, %eax, %xmm1
+; X64: 		pextrw	$6, %xmm0, %eax
+; X64: 		pinsrw	$3, %eax, %xmm1
+; X64: 		movq	_g2@GOTPCREL(%rip), %rax
+; X64: 		movq	%xmm1, (%rax)
+; X64: 		ret
+}
+
+
+; Pack various elements via shuffles.
+define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+
+; X64: t11:
+; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
+; X64:	pshuflw	$1, %xmm0, %xmm0
+; X64:	pinsrw	$1, %eax, %xmm0
+; X64:	ret
+}
+
+
+define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+
+; X64: t12:
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
+; X64: 	pshufhw	$3, %xmm0, %xmm0
+; X64: 	pinsrw	$5, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t13:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t14:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pshufhw	$8, %xmm1, %xmm0
+; X64: 	ret
+}
+
+
+
+define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+        ret <8 x i16> %tmp8
+; X64: 	t15:
+; X64: 		pextrw	$7, %xmm0, %eax
+; X64: 		punpcklqdq	%xmm1, %xmm0
+; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pinsrw	$2, %eax, %xmm0
+; X64: 		ret
+}
+
+
+; Test yonah where we convert a shuffle to pextrw and pinrsw
+define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        ret <16 x i8> %tmp9
+; X64: 	t16:
+; X64: 		movaps	LCPI17_0(%rip), %xmm1
+; X64: 		movd	%xmm1, %eax
+; X64: 		pinsrw	$0, %eax, %xmm1
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pinsrw	$1, %eax, %xmm1
+; X64: 		pextrw	$1, %xmm1, %ecx
+; X64: 		movd	%xmm1, %edx
+; X64: 		pinsrw	$0, %edx, %xmm1
+; X64: 		movzbl	%cl, %ecx
+; X64: 		andw	$-256, %ax
+; X64: 		orw	%cx, %ax
+; X64: 		movaps	%xmm1, %xmm0
+; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		ret
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
new file mode 100644
index 000000000000..a734c05b8686
--- /dev/null
+++ b/test/CodeGen/X86/sse41.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+
+@g16 = external global i16
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
+        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+        ret <16 x i8> %tmp1
+; X32: pinsrb_1:
+; X32:    pinsrb $1, 4(%esp), %xmm0
+
+; X64: pinsrb_1:
+; X64:    pinsrb $1, %edi, %xmm0
+}
+
+
+define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 4
+	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
+	%2 = insertelement <4 x i32> %1, i32 0, i32 1
+	%3 = insertelement <4 x i32> %2, i32 0, i32 2
+	%4 = insertelement <4 x i32> %3, i32 0, i32 3
+	%5 = bitcast <4 x i32> %4 to <16 x i8>
+	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
+	%7 = bitcast <4 x i32> %6 to <2 x i64>
+	ret <2 x i64> %7
+        
+; X32: _pmovsxbd_1:
+; X32:   movl      4(%esp), %eax
+; X32:   pmovsxbd   (%eax), %xmm0
+
+; X64: _pmovsxbd_1:
+; X64:   pmovsxbd   (%rdi), %xmm0
+}
+
+define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+entry:
+	%0 = load i64* %p		; <i64> [#uses=1]
+	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
+	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
+	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+        
+; X32: _pmovsxwd_1:
+; X32:   movl 4(%esp), %eax
+; X32:   pmovsxwd (%eax), %xmm0
+
+; X64: _pmovsxwd_1:
+; X64:   pmovsxwd (%rdi), %xmm0
+}
+
+
+
+
+define <2 x i64> @pmovzxbq_1() nounwind {
+entry:
+	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
+	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+
+; X32: _pmovzxbq_1:
+; X32:   movl	L_g16$non_lazy_ptr, %eax
+; X32:   pmovzxbq	(%eax), %xmm0
+
+; X64: _pmovzxbq_1:
+; X64:   movq	_g16@GOTPCREL(%rip), %rax
+; X64:   pmovzxbq	(%rax), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+
+
+define i32 @extractps_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %i = bitcast float %s to i32
+  ret i32 %i
+
+; X32: _extractps_1:  
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_1:  
+; X64:	  extractps	$3, %xmm0, %eax
+}
+define i32 @extractps_2(<4 x float> %v) nounwind {
+  %t = bitcast <4 x float> %v to <4 x i32>
+  %s = extractelement <4 x i32> %t, i32 3
+  ret i32 %s
+
+; X32: _extractps_2:
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_2:
+; X64:	  extractps	$3, %xmm0, %eax
+}
+
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @ext_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %t = fadd float %s, 1.0
+  ret float %t
+
+; X32: _ext_1:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+; X32:	  addss	LCPI8_0, %xmm0
+
+; X64: _ext_1:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+; X64:	  addss	LCPI8_0(%rip), %xmm0
+}
+define float @ext_2(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  ret float %s
+
+; X32: _ext_2:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+
+; X64: _ext_2:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+}
+define i32 @ext_3(<4 x i32> %v) nounwind {
+  %i = extractelement <4 x i32> %v, i32 3
+  ret i32 %i
+
+; X32: _ext_3:
+; X32:	  pextrd	$3, %xmm0, %eax
+
+; X64: _ext_3:
+; X64:	  pextrd	$3, %xmm0, %eax
+}
+
+define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+        ret <4 x float> %tmp1
+; X32: _insertps_1:
+; X32:    insertps  $1, %xmm1, %xmm0
+
+; X64: _insertps_1:
+; X64:    insertps  $1, %xmm1, %xmm0
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
+        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_2:
+; X32:    insertps  $0, 4(%esp), %xmm0
+
+; X64: _insertps_2:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp2 = extractelement <4 x float> %t2, i32 0
+        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_3:
+; X32:    insertps  $0, %xmm1, %xmm0        
+
+; X64: _insertps_3:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_1:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sete	%al
+
+; X64: _ptestz_1:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sete	%al
+}
+
+define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_2:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    setb	%al
+
+; X64: _ptestz_2:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    setb	%al
+}
+
+define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_3:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    seta	%al
+
+; X64: _ptestz_3:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    seta	%al
+}
+
+
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
new file mode 100644
index 000000000000..c9c4d012102a
--- /dev/null
+++ b/test/CodeGen/X86/sse42.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+
+define i32 @crc32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+; X32: _crc32_8:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_8:
+; X64:     crc32   %sil, %eax
+}
+
+
+define i32 @crc32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+; X32: _crc32_16:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_16:
+; X64:     crc32   %si, %eax
+}
+
+
+define i32 @crc32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+; X32: _crc32_32:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_32:
+; X64:     crc32   %esi, %eax
+}
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index 547763e4a793..dc3d6fe6797d 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN: llc -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
+; RUN: llc < %s -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
 ; RUN:   grep fail | count 1
 
 declare float @test_f(float %f)
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index dda6f0d928c9..cb65e9b50fe2 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
+; RUN: llc < %s -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
 
 ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
 ; fold the load into the andpd.
diff --git a/test/CodeGen/X86/stack-color-with-reg-2.ll b/test/CodeGen/X86/stack-color-with-reg-2.ll
index bc4182f65dcb..c1f267229351 100644
--- a/test/CodeGen/X86/stack-color-with-reg-2.ll
+++ b/test/CodeGen/X86/stack-color-with-reg-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
 
 	%"struct..0$_67" = type { i32, %"struct.llvm::MachineOperand"**, %"struct.llvm::MachineOperand"* }
 	%"struct..1$_69" = type { i32 }
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 72a985a6c29b..672f77eef02c 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "loads eliminated" 
-; RUN:   grep stackcoloring %t | grep "stores eliminated"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
+; RUN:   grep asm-printer %t   | grep 179
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
new file mode 100644
index 000000000000..9778fa138948
--- /dev/null
+++ b/test/CodeGen/X86/stdarg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep {testb	\[%\]al, \[%\]al}
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap12)
+  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+  call void @bar(%struct.__va_list_tag* %ap3) nounwind
+  call void @llvm.va_end(i8* %ap12)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
new file mode 100644
index 000000000000..37f86c60fae5
--- /dev/null
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't crash on an empty struct member.
+
+; CHECK: movl  $2, 4(%esp)
+; CHECK: movl  $1, (%esp)
+
+%testType = type {i32, [0 x i32], i32}
+
+define void @foo() nounwind {
+  %1 = alloca %testType
+  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  ret void
+}
diff --git a/test/CodeGen/X86/store-fp-constant.ll b/test/CodeGen/X86/store-fp-constant.ll
index 70cb046600bc..206886bb608f 100644
--- a/test/CodeGen/X86/store-fp-constant.ll
+++ b/test/CodeGen/X86/store-fp-constant.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep rodata
-; RUN: llvm-as < %s | llc -march=x86 | not grep literal
+; RUN: llc < %s -march=x86 | not grep rodata
+; RUN: llc < %s -march=x86 | not grep literal
 ;
 ; Check that no FP constants in this testcase ends up in the 
 ; constant pool.
diff --git a/test/CodeGen/X86/store-global-address.ll b/test/CodeGen/X86/store-global-address.ll
index 0695eee9a888..c8d4cbceea3d 100644
--- a/test/CodeGen/X86/store-global-address.ll
+++ b/test/CodeGen/X86/store-global-address.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movl | count 1
+; RUN: llc < %s -march=x86 | grep movl | count 1
 
 @dst = global i32 0             ; <i32*> [#uses=1]
 @ptr = global i32* null         ; <i32**> [#uses=1]
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index acef17463878..66d0e47c6d48 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
 ;
 ; Test the add and load are folded into the store instruction.
 
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 09aaba155d99..0ccfe470db5f 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {and	DWORD PTR} | count 2
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/storetrunc-fp.ll b/test/CodeGen/X86/storetrunc-fp.ll
index 945cf48f9bde..03ad093ba860 100644
--- a/test/CodeGen/X86/storetrunc-fp.ll
+++ b/test/CodeGen/X86/storetrunc-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep flds
+; RUN: llc < %s -march=x86 | not grep flds
 
 define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
 	%c = fadd x86_fp80 %a, %b
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index cc26487cf264..7aae9eb1ab96 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 ; P should be sunk into the loop and folded into the address mode. There
 ; shouldn't be any lea instructions inside the loop.
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 277a4430acaa..a99a9c95a4cc 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 @B = external global [1000 x float], align 32
 @A = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 98f02524d7a1..19f4079abb5f 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep {jb} | count 1
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jb} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/subreg-to-reg-0.ll b/test/CodeGen/X86/subreg-to-reg-0.ll
index 6b60f6526595..d718c85a1d1f 100644
--- a/test/CodeGen/X86/subreg-to-reg-0.ll
+++ b/test/CodeGen/X86/subreg-to-reg-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 ; Do eliminate the zero-extension instruction and rely on
 ; x86-64's implicit zero-extension!
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index aa26f06aba96..a297728aee89 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 ; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-2.ll b/test/CodeGen/X86/subreg-to-reg-2.ll
index d0b40cd5d471..49d2e88d2c8d 100644
--- a/test/CodeGen/X86/subreg-to-reg-2.ll
+++ b/test/CodeGen/X86/subreg-to-reg-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl
 ; rdar://6707985
 
 	%XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" }
diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll
index 6634538c2afd..931ae758ac5c 100644
--- a/test/CodeGen/X86/subreg-to-reg-3.ll
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep imull
+; RUN: llc < %s -march=x86-64 | grep imull
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
index bb6af3988c95..0ea5541c89dc 100644
--- a/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep leaq %t
 ; RUN: not grep incq %t
 ; RUN: not grep decq %t
diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll
index 81b262ace84d..ba4c307d1090 100644
--- a/test/CodeGen/X86/subreg-to-reg-5.ll
+++ b/test/CodeGen/X86/subreg-to-reg-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep addl %t
 ; RUN: not egrep {movl|movq} %t
 
diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll
index f18eef7d1970..76430cd783e3 100644
--- a/test/CodeGen/X86/subreg-to-reg-6.ll
+++ b/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define i64 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
index f3c701ff5f92..55425bc7da5c 100644
--- a/test/CodeGen/X86/switch-zextload.ll
+++ b/test/CodeGen/X86/switch-zextload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 
 ; Do zextload, instead of a load and a separate zext.
 
diff --git a/test/CodeGen/X86/swizzle.ll b/test/CodeGen/X86/swizzle.ll
index d00bb9a0fadb..23e0c2453d64 100644
--- a/test/CodeGen/X86/swizzle.ll
+++ b/test/CodeGen/X86/swizzle.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlps
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movups
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
 ; rdar://6523650
 
 	%struct.vector4_t = type { <4 x float> }
diff --git a/test/CodeGen/X86/tailcall-i1.ll b/test/CodeGen/X86/tailcall-i1.ll
index 0ec6a7780722..8ef1f11383be 100644
--- a/test/CodeGen/X86/tailcall-i1.ll
+++ b/test/CodeGen/X86/tailcall-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc i1 @i1test(i32, i32, i32, i32) {
   entry:
   %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index ff960b8ce1ff..110472c8b9f3 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -mtriple=i686-unknown-linux  -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12
+; RUN: llc < %s  -mtriple=i686-unknown-linux  -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12
 ; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt
 ; is enabled, ensure that a normal fastcc call has matching stack size
 
diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
index e94d7d8befaa..d8be4b2e2dfd 100644
--- a/test/CodeGen/X86/tailcall-structret.ll
+++ b/test/CodeGen/X86/tailcall-structret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
 entry:
       %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
diff --git a/test/CodeGen/X86/tailcall-void.ll b/test/CodeGen/X86/tailcall-void.ll
index 27b2a2856ada..4e578d1b6410 100644
--- a/test/CodeGen/X86/tailcall-void.ll
+++ b/test/CodeGen/X86/tailcall-void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc void @i1test(i32, i32, i32, i32) {
   entry:
    tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
index deedb86e95b8..a4f87c021a95 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 916be566a14a..7002560c82a0 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7b65863f00b0..7c685b85807e 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,15 +1,15 @@
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep TAILCALL
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
 ; Expect 2 rep;movs because of tail call byval lowering.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
 ; A sequence of copyto/copyfrom virtual registers is used to deal with byval
 ; lowering appearing after moving arguments to registers. The following two
 ; checks verify that the register allocator changes those sequences to direct
 ; moves to argument register where it can (for registers that are not used in 
 ; byval lowering - not rsi, not rdi, not rcx).
 ; Expect argument 4 to be moved directly to register edx.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
 ; Expect argument 6 to be moved directly to register r8.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
index f6149351038b..c0b609ac956e 100644
--- a/test/CodeGen/X86/tailcallfp.ll
+++ b/test/CodeGen/X86/tailcallfp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
 define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
      %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
      ret i32 %Y
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 151701ed439d..be4f96cfb5e6 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
+; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
 
 declare i32 @putchar(i32)
 
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index 54074eb0ba2a..60e3be5c50fd 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
 
 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index 60818e4f62c6..eaa76312396c 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
 
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c81327e5143a..73c59bb639a6 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,14 +1,17 @@
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 | grep TAILCALL
+; RUN: llc < %s -tailcallopt -march=x86-64 | FileCheck %s
+
 ; Check that lowered arguments on the stack do not overwrite each other.
-; Move param %in1 to temp register (%eax).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	40(%rsp), %eax}
-; Add %in1 %p1 to another temporary register (%r9d).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r10d}
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r10d}
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl  %edi, %eax
+; CHECK: addl  32(%rsp), %eax
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl  40(%rsp), %r10d
 ; Move result of addition to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r10d, 40(%rsp)}
+; CHECK: movl  %eax, 40(%rsp)
 ; Move param %in2 to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%eax, 32(%rsp)}
+; CHECK: movl  %r10d, 32(%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
 
 declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b)
 
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index a24a9a0940a0..772ff6c3e766 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
 ; rdar://5752025
 
 ; We don't want to fold the and into the test, because the and clobbers its
diff --git a/test/CodeGen/X86/test-shrink-bug.ll b/test/CodeGen/X86/test-shrink-bug.ll
new file mode 100644
index 000000000000..64631ea5fc9b
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink-bug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+; Codegen shouldn't reduce the comparison down to testb $-1, %al
+; because that changes the result of the signed test.
+; PR5132
+; CHECK: testw  $255, %ax
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g_14 = global i8 -6, align 1                     ; <i8*> [#uses=1]
+
+declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
+
+define i32 @func_35(i64 %p_38) nounwind ssp {
+entry:
+  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
+  %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
+  %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  %call = call i32 @func_16(i8 signext %tmp, i32 %conv2) ssp ; <i32> [#uses=1]
+  ret i32 1
+}
diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll
new file mode 100644
index 000000000000..1d636930641f
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink.ll
@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+; CHECK-64: g64xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g64xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g64xh(i64 inreg %x) nounwind {
+  %t = and i64 %x, 2048
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g64xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g64xl(i64 inreg %x) nounwind {
+  %t = and i64 %x, 8
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g32xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g32xh(i32 inreg %x) nounwind {
+  %t = and i32 %x, 2048
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g32xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g32xl(i32 inreg %x) nounwind {
+  %t = and i32 %x, 8
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g16xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g16xh(i16 inreg %x) nounwind {
+  %t = and i16 %x, 2048
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g16xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g16xl(i16 inreg %x) nounwind {
+  %t = and i16 %x, 8
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g64x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g64x16(i64 inreg %x) nounwind {
+  %t = and i64 %x, 32896
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g32x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g32x16(i32 inreg %x) nounwind {
+  %t = and i32 %x, 32896
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x32:
+; CHECK-64:   testl $268468352, %edi
+; CHECK-64:   ret
+; CHECK-32: g64x32:
+; CHECK-32:   testl $268468352, %eax
+; CHECK-32:   ret
+define void @g64x32(i64 inreg %x) nounwind {
+  %t = and i64 %x, 268468352
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
index dbbef0a894f2..3d5f672f98fc 100644
--- a/test/CodeGen/X86/testl-commute.ll
+++ b/test/CodeGen/X86/testl-commute.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {testl.*\(%r.i\), %} | count 3
+; RUN: llc < %s | grep {testl.*\(%r.i\), %} | count 3
 ; rdar://5671654
 ; The loads should fold into the testl instructions, no matter how
 ; the inputs are commuted.
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
new file mode 100644
index 000000000000..4cad8376d8d9
--- /dev/null
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+
+define i32 @f1() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+; X32: f1:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f1:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+	ret i32* @i
+}
+
+; X32: f2:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f2:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
+define i32 @f3() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; X32: f3:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f3:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+define i32* @f4() nounwind {
+entry:
+	ret i32* @i
+}
+
+; X32: f4:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f4:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
index 85ff360a5508..0cae5c4f2888 100644
--- a/test/CodeGen/X86/tls1.ll
+++ b/test/CodeGen/X86/tls1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
index 2f5f02b9ac96..fb61596d09ca 100644
--- a/test/CodeGen/X86/tls10.ll
+++ b/test/CodeGen/X86/tls10.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
index b6aed9aaa04d..a2c1a1f75deb 100644
--- a/test/CodeGen/X86/tls11.ll
+++ b/test/CodeGen/X86/tls11.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movw	%gs:i@NTPOFF, %ax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movw	%fs:i@TPOFF, %ax} %t2
 
 @i = thread_local global i16 15
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
index b5288391f03f..c29f6adacd20 100644
--- a/test/CodeGen/X86/tls12.ll
+++ b/test/CodeGen/X86/tls12.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
 
 @i = thread_local global i8 15
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
index ec23a41113ee..08778ec2ce8b 100644
--- a/test/CodeGen/X86/tls13.ll
+++ b/test/CodeGen/X86/tls13.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
 ; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
 ; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
 
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
index 941601eb4f9b..88426dd43d50 100644
--- a/test/CodeGen/X86/tls14.ll
+++ b/test/CodeGen/X86/tls14.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
 ; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
 ; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
 
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
index 62f3677629be..7abf070d3fd2 100644
--- a/test/CodeGen/X86/tls15.ll
+++ b/test/CodeGen/X86/tls15.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t | count 1
 ; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
 ; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
 ; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
 ; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
index baa51bbb6ead..5a94296afefc 100644
--- a/test/CodeGen/X86/tls2.ll
+++ b/test/CodeGen/X86/tls2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
index 061849901fcf..7327cc41777e 100644
--- a/test/CodeGen/X86/tls3.ll
+++ b/test/CodeGen/X86/tls3.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	i@INDNTPOFF, %eax} %t
 ; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
 ; RUN: grep {movl	%fs:(%rax), %eax} %t2
 
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
index 33f221b8ad3a..d2e40e389bd5 100644
--- a/test/CodeGen/X86/tls4.ll
+++ b/test/CodeGen/X86/tls4.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
index ff7b9e0a5ffe..4d2cc02b5028 100644
--- a/test/CodeGen/X86/tls5.ll
+++ b/test/CodeGen/X86/tls5.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = internal thread_local global i32 15
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
index ab53929206a1..505106ee14ed 100644
--- a/test/CodeGen/X86/tls6.ll
+++ b/test/CodeGen/X86/tls6.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
index 6a7739bc1a31..e9116e772090 100644
--- a/test/CodeGen/X86/tls7.ll
+++ b/test/CodeGen/X86/tls7.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = hidden thread_local global i32 15
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
index fd9d472bb66c..375af94920f5 100644
--- a/test/CodeGen/X86/tls8.ll
+++ b/test/CodeGen/X86/tls8.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index bc0a6f0bbe61..214146fe998c 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = external hidden thread_local global i32
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 9a013ffbe565..03ae6bfc869e 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep ud2
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
 define i32 @test() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 25a1191d8f14..374d404a968c 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -1,7 +1,7 @@
 ; An integer truncation to i1 should be done with an and instruction to make
 ; sure only the LSBit survives. Test that this is the case both for a returned
 ; value and as the operand of a branch.
-; RUN: llvm-as < %s | llc -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \
+; RUN: llc < %s -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \
 ; RUN:   count 5
 
 define i1 @test1(i32 %X) zeroext {
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 3fe4cd1b781a..6f16a2548aa6 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
 ; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
 ; rdar://6480363
 
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 5293b7787960..d0e13f61f2d0 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 5
 ; rdar://6523745
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/twoaddr-delete.ll b/test/CodeGen/X86/twoaddr-delete.ll
index bbf4e62363a1..77e3c75c6dd0 100644
--- a/test/CodeGen/X86/twoaddr-delete.ll
+++ b/test/CodeGen/X86/twoaddr-delete.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {twoaddrinstr} | grep {Number of dead instructions deleted}
+; RUN: llc < %s -march=x86 -stats |& grep {twoaddrinstr} | grep {Number of dead instructions deleted}
 
 	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 765588059f75..077fee077392 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
 
 define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/twoaddr-remat.ll b/test/CodeGen/X86/twoaddr-remat.ll
index b74b70cedb76..4940c78371d9 100644
--- a/test/CodeGen/X86/twoaddr-remat.ll
+++ b/test/CodeGen/X86/twoaddr-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 59796 | count 3
+; RUN: llc < %s -march=x86 | grep 59796 | count 3
 
 	%Args = type %Value*
 	%Exec = type opaque*
diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
index d6304370e293..da5105d8a4ea 100644
--- a/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/test/CodeGen/X86/uint_to_fp-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
 ; rdar://6504833
 
 define float @f(i32 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll
index 148437f3ddbf..41ee1947edc4 100644
--- a/test/CodeGen/X86/uint_to_fp.ll
+++ b/test/CodeGen/X86/uint_to_fp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep {sub.*esp}
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep cvtsi2ss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
 ; rdar://6034396
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index 547e179bb219..7416051693be 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+; RUN: llc < %s -march=x86 | grep {jc} | count 1
 ; XFAIL: *
 
 ; FIXME: umul-with-overflow not supported yet.
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index 9e69154f10f9..d522bd80acfd 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "\\\\\\\<mul"
+; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
diff --git a/test/CodeGen/X86/urem-i8-constant.ll b/test/CodeGen/X86/urem-i8-constant.ll
index bc93684877b5..e3cb69ca591f 100644
--- a/test/CodeGen/X86/urem-i8-constant.ll
+++ b/test/CodeGen/X86/urem-i8-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 111
+; RUN: llc < %s -march=x86 | grep 111
 
 define i8 @foo(i8 %tmp325) {
 	%t546 = urem i8 %tmp325, 37
diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
index bd6045c068e2..b5ebaa74bd07 100644
--- a/test/CodeGen/X86/v4f32-immediate.ll
+++ b/test/CodeGen/X86/v4f32-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
 
 define <4 x float> @foo() {
   ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
index b0cdf496d5fc..4817db22c355 100644
--- a/test/CodeGen/X86/variable-sized-darwin-bzero.ll
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
 
 declare void @llvm.memset.i64(i8*, i8, i64, i32)
 
diff --git a/test/CodeGen/X86/variadic-node-pic.ll b/test/CodeGen/X86/variadic-node-pic.ll
index 4d76445b2f95..1182a306abd0 100644
--- a/test/CodeGen/X86/variadic-node-pic.ll
+++ b/test/CodeGen/X86/variadic-node-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -code-model=large
+; RUN: llc < %s -relocation-model=pic -code-model=large
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/vec_add.ll b/test/CodeGen/X86/vec_add.ll
index 72415a3ab28b..7c77d11a7b54 100644
--- a/test/CodeGen/X86/vec_add.ll
+++ b/test/CodeGen/X86/vec_add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
 entry:
diff --git a/test/CodeGen/X86/vec_align.ll b/test/CodeGen/X86/vec_align.ll
index d88104d79e82..e27311561b2c 100644
--- a/test/CodeGen/X86/vec_align.ll
+++ b/test/CodeGen/X86/vec_align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah -relocation-model=static | grep movaps | count 2
+; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index ebdac7d3de9b..b3efc7b16b7d 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_clear.ll b/test/CodeGen/X86/vec_clear.ll
index 514de953efec..166d4363ec8d 100644
--- a/test/CodeGen/X86/vec_clear.ll
+++ b/test/CodeGen/X86/vec_clear.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: not grep and %t
 ; RUN: not grep psrldq %t
 ; RUN: grep xorps %t
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
new file mode 100644
index 000000000000..c8c7257cbb9c
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pcmpgtd
+; CHECK: ret
+
+	%C = icmp sgt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: pcmp
+; CHECK: pcmp
+; CHECK: xorps
+; CHECK: ret
+	%C = icmp sge <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: pcmpgtd
+; CHECK: movaps
+; CHECK: ret
+	%C = icmp slt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: movaps
+; CHECK: pcmpgtd
+; CHECK: ret
+	%C = icmp ugt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f057c9a39a63..f0158d643c17 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index d6726be1db6a..dab5dd144f06 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 ; RUN: grep extractps   %t | count 1
 ; RUN: grep pextrd      %t | count 1
 ; RUN: not grep pshufd  %t
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index ee7567cf7609..b0137304e8a9 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
 ; RUN: grep movss    %t | count 3
 ; RUN: grep movhlps  %t | count 1
 ; RUN: grep pshufd   %t | count 1
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index a801472622f2..d49c70e56391 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <4 x float> @t1(<4 x float> %Q) {
         %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
diff --git a/test/CodeGen/X86/vec_i64.ll b/test/CodeGen/X86/vec_i64.ll
index 3939af57c8ce..462e16e13023 100644
--- a/test/CodeGen/X86/vec_i64.ll
+++ b/test/CodeGen/X86/vec_i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movq %t | count 2
 
 ; Used movq to load i64 into a v2i64 when the top i64 is 0.
diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll
index c7eb221635d6..29511934af01 100644
--- a/test/CodeGen/X86/vec_ins_extract-1.ll
+++ b/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
 
 ; Inserts and extracts with variable indices must be lowered
 ; to memory accesses.
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index 788283957585..bf43deb1d19a 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | \
+; RUN: opt < %s -scalarrepl -instcombine | \
 ; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp
 
 ; This checks that various insert/extract idiom work without going to the
diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
index 8d0bcc4fbf34..b08044bb869b 100644
--- a/test/CodeGen/X86/vec_insert-2.ll
+++ b/test/CodeGen/X86/vec_insert-2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
 
 define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
         %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
index e43eca4b875f..a18cd86489cc 100644
--- a/test/CodeGen/X86/vec_insert-3.ll
+++ b/test/CodeGen/X86/vec_insert-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
 
 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
         %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 1a9768a98e6c..291fc0454c9c 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep psllq %t | grep 32
 ; RUN: grep pslldq %t | grep 12
 ; RUN: grep psrldq %t | grep 8
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 5ef270f90820..54aa43f0c35d 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
 
 define <4 x float> @t3(<4 x float>* %P) nounwind  {
 	%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 8cfc63aa6bf1..9ede10f63d3e 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
 
 define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_insert-8.ll b/test/CodeGen/X86/vec_insert-8.ll
index 0f6924c66f9e..650951cc9e5e 100644
--- a/test/CodeGen/X86/vec_insert-8.ll
+++ b/test/CodeGen/X86/vec_insert-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 
 ; tests variable insert and extract of a 4 x i32
 
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index 3a9464ceff12..a7274a900044 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
 
 define void @test(<4 x float>* %F, i32 %I) {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_insert_4.ll b/test/CodeGen/X86/vec_insert_4.ll
index a0aa0c0bfea0..2c31e56b4af6 100644
--- a/test/CodeGen/X86/vec_insert_4.ll
+++ b/test/CodeGen/X86/vec_insert_4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep 1084227584 | count 1
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
index 67122763ec9b..8812c4f820c6 100644
--- a/test/CodeGen/X86/vec_loadsingles.ll
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
index f8957629f8a1..1dc0b163aeb3 100644
--- a/test/CodeGen/X86/vec_logical.ll
+++ b/test/CodeGen/X86/vec_logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep xorps %t | count 2
 ; RUN: grep andnps %t
 ; RUN: grep movaps %t | count 2
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 106966fd5212..66762b4a0604 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep xorps %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: not grep shuf %t
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
index ecb825b00bd8..033e9f7027f9 100644
--- a/test/CodeGen/X86/vec_select.ll
+++ b/test/CodeGen/X86/vec_select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; RUN: llc < %s -march=x86 -mattr=+sse
 
 define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
         %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-2.ll b/test/CodeGen/X86/vec_set-2.ll
index ae9530db8df5..a8f1187084d6 100644
--- a/test/CodeGen/X86/vec_set-2.ll
+++ b/test/CodeGen/X86/vec_set-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
 
 define <4 x float> @test1(float %a) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index 546ca0bcf307..ada17e0092a8 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep pshufd %t | count 2
 
 define <4 x float> @test(float %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set-4.ll b/test/CodeGen/X86/vec_set-4.ll
index da7ef80a3a5e..332c8b70760f 100644
--- a/test/CodeGen/X86/vec_set-4.ll
+++ b/test/CodeGen/X86/vec_set-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pinsrw | count 2
 
 define <2 x i64> @test(i16 %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-5.ll b/test/CodeGen/X86/vec_set-5.ll
index d3329701119b..f811a7404a27 100644
--- a/test/CodeGen/X86/vec_set-5.ll
+++ b/test/CodeGen/X86/vec_set-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movlhps   %t | count 1
 ; RUN: grep movq      %t | count 2
 
diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll
index c7b6747a86f7..0713d956ee44 100644
--- a/test/CodeGen/X86/vec_set-6.ll
+++ b/test/CodeGen/X86/vec_set-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movss    %t | count 1
 ; RUN: grep movq     %t | count 1
 ; RUN: grep shufps   %t | count 1
diff --git a/test/CodeGen/X86/vec_set-7.ll b/test/CodeGen/X86/vec_set-7.ll
index 6f98c510efca..d993178a9892 100644
--- a/test/CodeGen/X86/vec_set-7.ll
+++ b/test/CodeGen/X86/vec_set-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
 
 define <2 x i64> @test(<2 x i64>* %p) nounwind {
 	%tmp = bitcast <2 x i64>* %p to double*		
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index cca436bf6433..9697f1186d45 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movsd
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
+; RUN: llc < %s -march=x86-64 | not grep movsd
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
 
 define <2 x i64> @test(i64 %i) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 5c1b8f5dacb8..3656e5f6ca47 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
 
 define <2 x i64> @test3(i64 %A) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-A.ll b/test/CodeGen/X86/vec_set-A.ll
index f33263f1aef5..f05eecf8c3ae 100644
--- a/test/CodeGen/X86/vec_set-A.ll
+++ b/test/CodeGen/X86/vec_set-A.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
 define <2 x i64> @test1() nounwind {
 entry:
 	ret <2 x i64> < i64 1, i64 0 >
diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
index d318964686c9..f5b3e8baa33a 100644
--- a/test/CodeGen/X86/vec_set-B.ll
+++ b/test/CodeGen/X86/vec_set-B.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movaps
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep esp | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
 
 ; These should both generate something like this:
 ;_test3:
diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
index fc86853e10fd..7636ac3b3741 100644
--- a/test/CodeGen/X86/vec_set-C.ll
+++ b/test/CodeGen/X86/vec_set-C.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd
 
 define <2 x i64> @t1(i64 %x) nounwind  {
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-D.ll b/test/CodeGen/X86/vec_set-D.ll
index 71bdd849953b..3d6369e1c76a 100644
--- a/test/CodeGen/X86/vec_set-D.ll
+++ b/test/CodeGen/X86/vec_set-D.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-E.ll b/test/CodeGen/X86/vec_set-E.ll
index ee63234cc978..d78be669fc7f 100644
--- a/test/CodeGen/X86/vec_set-E.ll
+++ b/test/CodeGen/X86/vec_set-E.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x float> @t(float %X) nounwind  {
 	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index db83eb2e8531..4f0acb2d151d 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
 
 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
 	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_set-G.ll b/test/CodeGen/X86/vec_set-G.ll
index f81907cb69f2..4a542feafaff 100644
--- a/test/CodeGen/X86/vec_set-G.ll
+++ b/test/CodeGen/X86/vec_set-G.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
 
 define fastcc void @t(<4 x float> %A) nounwind  {
 	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-H.ll b/test/CodeGen/X86/vec_set-H.ll
index ea7b85355c23..5037e36d3fd5 100644
--- a/test/CodeGen/X86/vec_set-H.ll
+++ b/test/CodeGen/X86/vec_set-H.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movz
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movz
 
 define <2 x i64> @doload64(i16 signext  %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
index e1c44d0a0f4b..64f36f99e4d2 100644
--- a/test/CodeGen/X86/vec_set-I.ll
+++ b/test/CodeGen/X86/vec_set-I.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xorp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
 
 define void @t1() nounwind  {
 	%tmp298.i.i = load <4 x float>* null, align 16
diff --git a/test/CodeGen/X86/vec_set-J.ll b/test/CodeGen/X86/vec_set-J.ll
index 488d36073416..d90ab85b8cf7 100644
--- a/test/CodeGen/X86/vec_set-J.ll
+++ b/test/CodeGen/X86/vec_set-J.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
 ; PR2472
 
 define <4 x i32> @a(<4 x i32> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 77636eda1c02..c316df887c16 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep punpckl | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpckl | count 7
 
 define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
         %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shift.ll b/test/CodeGen/X86/vec_shift.ll
index 9c595bc6ef06..ddf0469b72a7 100644
--- a/test/CodeGen/X86/vec_shift.ll
+++ b/test/CodeGen/X86/vec_shift.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllw
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psrlq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psrlq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
 
 define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shift2.ll b/test/CodeGen/X86/vec_shift2.ll
index b73f5f490006..c5f9dc4ace32 100644
--- a/test/CodeGen/X86/vec_shift2.ll
+++ b/test/CodeGen/X86/vec_shift2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep CPI
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep CPI
 
 define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
 	%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
diff --git a/test/CodeGen/X86/vec_shift3.ll b/test/CodeGen/X86/vec_shift3.ll
index 2641c5d59674..1ebf455c0555 100644
--- a/test/CodeGen/X86/vec_shift3.ll
+++ b/test/CodeGen/X86/vec_shift3.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 2
 
 define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
index 297469d92024..a63e3868ad75 100644
--- a/test/CodeGen/X86/vec_shuffle-10.ll
+++ b/test/CodeGen/X86/vec_shuffle-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep unpcklps %t | count 1
 ; RUN: grep pshufd   %t | count 1
 ; RUN: not grep {sub.*esp} %t
diff --git a/test/CodeGen/X86/vec_shuffle-11.ll b/test/CodeGen/X86/vec_shuffle-11.ll
index 463858f1b65b..640745ae2645 100644
--- a/test/CodeGen/X86/vec_shuffle-11.ll
+++ b/test/CodeGen/X86/vec_shuffle-11.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
+; RUN: llc < %s -march=x86 -mattr=+sse2 
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
 
 define <4 x i32> @test() nounwind {
         %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
index 6e8d0b8077da..f0cfc44ab19a 100644
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ b/test/CodeGen/X86/vec_shuffle-14.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movq | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
 
 define <4 x i32> @t1(i32 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-15.ll b/test/CodeGen/X86/vec_shuffle-15.ll
index 062f77c279e4..5a9b8fd34579 100644
--- a/test/CodeGen/X86/vec_shuffle-15.ll
+++ b/test/CodeGen/X86/vec_shuffle-15.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
 	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index b3a5b769e67e..470f676d4627 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: grep shufps %t | count 4
 ; RUN: grep movaps %t | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: grep pshufd %t | count 4
 ; RUN: not grep shufps %t
 ; RUN: not grep mov %t
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index 992d79184b87..9c33abb4421a 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi, %xmm0}
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep xor
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi, %xmm0}
+; RUN: llc < %s -march=x86-64 | not grep xor
 ; PR2108
 
 define <2 x i64> @doload64(i64 %x) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-18.ll b/test/CodeGen/X86/vec_shuffle-18.ll
index 85392632a29e..1104a4a8856b 100644
--- a/test/CodeGen/X86/vec_shuffle-18.ll
+++ b/test/CodeGen/X86/vec_shuffle-18.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
 
 	%struct.vector4_t = type { <4 x float> }
 
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 4e7db20db564..9fc09dfdd2b8 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 71890844894f..6d1bac0743d4 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
index d19f110fc100..5307ced4899b 100644
--- a/test/CodeGen/X86/vec_shuffle-22.ll
+++ b/test/CodeGen/X86/vec_shuffle-22.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium-m -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=pentium-m -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep pshufd %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep movddup %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 7e8aa5dc4bf6..05a3a1e9d276 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                | not grep punpck
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                |     grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
 
 define i32 @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 170ba35173f3..7562f1d89594 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2  |     grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2  |     grep punpck
 
 define i32 @t() nounwind optsize {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
index 18922aaac10e..2aa2d252849c 100644
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep unpcklps %t | count 3
 ; RUN: grep unpckhps %t | count 1
  
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index abd6e90b7907..8cc15d1e7c27 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep unpcklps %t | count 1
 ; RUN: grep unpckhps %t | count 3
 
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 231ac0c3b834..d700ccbf5303 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep addps %t | count 2
 ; RUN: grep mulps %t | count 2
 ; RUN: grep subps %t | count 2
diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
index f7e5001e64d1..343685bf8ad2 100644
--- a/test/CodeGen/X86/vec_shuffle-28.ll
+++ b/test/CodeGen/X86/vec_shuffle-28.ll
@@ -1,8 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep movd %t | count 1
-; RUN: grep pshuflw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep pshufb %t | count 1
 
 ; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 018b4cf1a06b..556f1037d0c5 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep movhlps %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
index 50a3df8f0b2a..3f69150ac533 100644
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ b/test/CodeGen/X86/vec_shuffle-30.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f
-; RUN: grep pshufhw %t | grep 161 | count 1
+; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: grep pshufhw %t | grep -- -95 | count 1
 ; RUN: grep shufps %t | count 1
 ; RUN: not grep pslldq %t
 
 ; Test case when creating pshufhw, we incorrectly set the higher order bit
 ; for an undef,
-define void @test(<8 x i16>* %dest, <8 x i16> %in) {
+define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
 entry:
   %0 = load <8 x i16>* %dest
   %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
@@ -14,7 +14,7 @@ entry:
 }                              
 
 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
-define void @test2(<4 x i32>* %dest, <4 x i32> %in) {
+define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
 entry:
   %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
   store <4 x i32> %0, <4 x i32>* %dest
diff --git a/test/CodeGen/X86/vec_shuffle-31.ll b/test/CodeGen/X86/vec_shuffle-31.ll
index efcd0300e35f..bb06e15425bb 100644
--- a/test/CodeGen/X86/vec_shuffle-31.ll
+++ b/test/CodeGen/X86/vec_shuffle-31.ll
@@ -1,9 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep pextrw %t | count 1
-; RUN: grep movlhps %t | count 1
-; RUN: grep pshufhw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep pshufb %t | count 1
 
 define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_shuffle-34.ll b/test/CodeGen/X86/vec_shuffle-34.ll
index 99c95d1623e7..d057b3fa7ea8 100644
--- a/test/CodeGen/X86/vec_shuffle-34.ll
+++ b/test/CodeGen/X86/vec_shuffle-34.ll
@@ -1,10 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep pextrw %t | count 1
-; RUN: grep punpcklqdq %t | count 1
-; RUN: grep pshuflw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
-; RUN: grep pshufb %t | count 2
+; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
 
 define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
index 7be846884903..7f0fcb5969e4 100644
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ b/test/CodeGen/X86/vec_shuffle-35.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stack-alignment=16 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
 ; RUN: grep pextrw %t | count 13
 ; RUN: grep pinsrw %t | count 14
 ; RUN: grep rolw %t | count 13
 ; RUN: not grep esp %t
 ; RUN: not grep ebp %t
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -stack-alignment=16 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
 ; RUN: grep pshufb %t | count 3
 
 define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 005118705856..8a93a7eeee3b 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep pshufb %t | count 1
 
 
diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
index 3c03baa5062c..829fedf97cc5 100644
--- a/test/CodeGen/X86/vec_shuffle-4.ll
+++ b/test/CodeGen/X86/vec_shuffle-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep shuf %t | count 2
 ; RUN: not grep unpck %t
 
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
index e356f2456b75..c24167a6150d 100644
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ b/test/CodeGen/X86/vec_shuffle-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t  -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movhlps %t | count 1
 ; RUN: grep shufps  %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
index f7c9f2daa39d..f034b0aa7102 100644
--- a/test/CodeGen/X86/vec_shuffle-6.ll
+++ b/test/CodeGen/X86/vec_shuffle-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movapd %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: grep movups %t | count 2
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
index fbcfac5b57a4..4cdca09c72f5 100644
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ b/test/CodeGen/X86/vec_shuffle-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t  -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep xorps %t | count 1
 ; RUN: not grep shufps %t
 
diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
index 73d75e63914d..964ce7b2892b 100644
--- a/test/CodeGen/X86/vec_shuffle-8.ll
+++ b/test/CodeGen/X86/vec_shuffle-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 | \
 ; RUN:   not grep shufps
 
 define void @test(<4 x float>* %res, <4 x float>* %A) {
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
index 68577d455f75..2bef24d443eb 100644
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ b/test/CodeGen/X86/vec_shuffle-9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep punpck %t | count 2
 ; RUN: not grep pextrw %t
 
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index f43aa1d4e714..c05b79a54a15 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep shufp   %t | count 1
 ; RUN: grep movupd  %t | count 1
 ; RUN: grep pshufhw %t | count 1
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index c6e3dddd5fa6..cde5ae99563e 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index 1f1a214479f4..649b85c5dadd 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep punpcklwd %t | count 4
 ; RUN: grep punpckhwd %t | count 4
 ; RUN: grep "pshufd" %t | count 8
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
index 220e1cd34d57..d9941e65bde3 100644
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ b/test/CodeGen/X86/vec_splat-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep punpcklbw %t | count 16
 ; RUN: grep punpckhbw %t | count 16
 ; RUN: grep "pshufd" %t | count 16
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 89914fda63ba..a87fbd0dc655 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 69900a686e25..b1613fb3a374 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t
 ; RUN: grep minss %t | grep CPI | count 2
 ; RUN: grep CPI   %t | not grep movss
 
diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
index efdf5649a14b..e42b5384b081 100644
--- a/test/CodeGen/X86/vec_zero-2.ll
+++ b/test/CodeGen/X86/vec_zero-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define i32 @t() {
 entry:
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 0a7a54341269..ae5af586cdc3 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xorps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 0ccf745e524e..296378c6e9f5 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
 
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/vector-intrinsics.ll b/test/CodeGen/X86/vector-intrinsics.ll
index 32916589879d..edf58b9da111 100644
--- a/test/CodeGen/X86/vector-intrinsics.ll
+++ b/test/CodeGen/X86/vector-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep call | count 16
+; RUN: llc < %s -march=x86-64 | grep call | count 16
 
 declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
 declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
index cfdd34ee1c9b..51cd872643f2 100644
--- a/test/CodeGen/X86/vector-rem.ll
+++ b/test/CodeGen/X86/vector-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fmodf | count 4
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 4
 
 define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
 	%m = srem <4 x i32> %t, %u
diff --git a/test/CodeGen/X86/vector-variable-idx.ll b/test/CodeGen/X86/vector-variable-idx.ll
index 82927e96983d..2a4d18c141a3 100644
--- a/test/CodeGen/X86/vector-variable-idx.ll
+++ b/test/CodeGen/X86/vector-variable-idx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movss | count 2
+; RUN: llc < %s -march=x86-64 | grep movss | count 2
 ; PR2676
 
 define float @foo(<4 x float> %p, i32 %t) {
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 8e1de2f62cb0..3fff8497dfda 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 > %t
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah > %t
+; RUN: llc < %s -march=x86 -mcpu=i386 > %t
+; RUN: llc < %s -march=x86 -mcpu=yonah > %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/X86/vfcmp.ll b/test/CodeGen/X86/vfcmp.ll
index 85b82a0ac8e8..f5f5293622b2 100644
--- a/test/CodeGen/X86/vfcmp.ll
+++ b/test/CodeGen/X86/vfcmp.ll
@@ -1,8 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2620
 
-define void @t(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
-	vfcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+
+define void @t2(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
+	%A = fcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+        sext <2 x i1> %A to <2 x i64>
 	extractelement <2 x i64> %1, i32 1		; <i64>:2 [#uses=1]
 	lshr i64 %2, 63		; <i64>:3 [#uses=1]
 	trunc i64 %3 to i1		; <i1>:4 [#uses=1]
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index f919b5de4961..5e1e0c858e65 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 | grep movsd | count 5
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
 
 @x = external global double
 
diff --git a/test/CodeGen/X86/vortex-bug.ll b/test/CodeGen/X86/vortex-bug.ll
index d62bb24e33df..40f11175b20a 100644
--- a/test/CodeGen/X86/vortex-bug.ll
+++ b/test/CodeGen/X86/vortex-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.blktkntype = type { i32, i32 }
 	%struct.fieldstruc = type { [128 x i8], %struct.blktkntype*, i32, i32 }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index d7a20e46c18e..ae845e0a33d1 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -1,13 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psllq  %t | count 2
-; RUN: grep pslld %t | count 2
-; RUN: grep psllw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psllq
   %shl = shl <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %shl, <2 x i64>* %dst
   ret void
@@ -15,6 +14,9 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %shl = shl <2 x i64> %val, %1
@@ -25,6 +27,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: pslld
   %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %shl, <4 x i32>* %dst
   ret void
@@ -32,6 +36,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -43,13 +50,20 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psllw
   %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %shl, <8 x i16>* %dst
   ret void
 }
 
+; Make sure the shift amount is properly zero extended.
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 0807174420e8..36feb11603d8 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -1,13 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psrlq  %t | count 2
-; RUN: grep psrld %t | count 2
-; RUN: grep psrlw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psrlq
   %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %lshr, <2 x i64>* %dst
   ret void
@@ -15,6 +14,9 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %lshr = lshr <2 x i64> %val, %1
@@ -24,6 +26,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: psrld
   %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
   store <4 x i32> %lshr, <4 x i32>* %dst
   ret void
@@ -31,6 +35,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -43,13 +50,20 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psrlw
   %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
 }
 
+; properly zero extend the shift amount
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
@@ -61,4 +75,4 @@ entry:
   %lshr = lshr <8 x i16> %val, %7
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index eea8ad1c798e..20d3f48a1a67 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -1,13 +1,15 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psrad %t | count 2
-; RUN: grep psraw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 ; Note that x86 does have ashr 
+
+; shift1a can't use a packed shift
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: sarl
   %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %ashr, <2 x i64>* %dst
   ret void
@@ -15,6 +17,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: psrad	$5
   %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %ashr, <4 x i32>* %dst
   ret void
@@ -22,6 +26,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -33,6 +40,8 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psraw	$5
   %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
@@ -40,6 +49,10 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
@@ -51,4 +64,4 @@ entry:
   %ashr = ashr <8 x i16> %val, %7
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 03ab95c0e105..9773cbed0ae3 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,21 +1,23 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psllq %t | count 1
-; RUN: grep pslld %t | count 3
-; RUN: grep psllw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psllq
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %shl = shl <2 x i64> %val, %shamt
   store <2 x i64> %shl, <2 x i64>* %dst
   ret void
 }
 
+; shift1b can't use a packed shift
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: shll
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
   %shl = shl <2 x i64> %val, %shamt
   store <2 x i64> %shl, <2 x i64>* %dst
@@ -24,6 +26,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -32,6 +36,8 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -40,6 +46,8 @@ entry:
 
 define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2c:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -48,6 +56,9 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: movzwl
+; CHECK: psllw
   %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
   %shl = shl <8 x i16> %val, %shamt
   store <8 x i16> %shl, <8 x i16>* %dst
@@ -56,6 +67,9 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
new file mode 100644
index 000000000000..a543f382b513
--- /dev/null
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; When loading the shift amount from memory, avoid generating the splat.
+
+define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5a:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5c:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5d:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
diff --git a/test/CodeGen/X86/vshift_scalar.ll b/test/CodeGen/X86/vshift_scalar.ll
index 8895cdf8aff6..9dd8478caaed 100644
--- a/test/CodeGen/X86/vshift_scalar.ll
+++ b/test/CodeGen/X86/vshift_scalar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Legalization test that requires scalarizing a vector.
 
diff --git a/test/CodeGen/X86/vshift_split.ll b/test/CodeGen/X86/vshift_split.ll
index 8f485ddd9a6f..359d36d8af69 100644
--- a/test/CodeGen/X86/vshift_split.ll
+++ b/test/CodeGen/X86/vshift_split.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 ; Example that requires splitting and expanding a vector shift.
 define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
 entry:
-	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
 	ret <2 x i64> %shr
 }
diff --git a/test/CodeGen/X86/vshift_split2.ll b/test/CodeGen/X86/vshift_split2.ll
index e9438492a0fb..0f8c2b896e2b 100644
--- a/test/CodeGen/X86/vshift_split2.ll
+++ b/test/CodeGen/X86/vshift_split2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 ; Legalization example that requires splitting a large vector into smaller pieces.
 
diff --git a/test/CodeGen/X86/weak.ll b/test/CodeGen/X86/weak.ll
index 28638afd57ef..8590e8d0001e 100644
--- a/test/CodeGen/X86/weak.ll
+++ b/test/CodeGen/X86/weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 @a = extern_weak global i32             ; <i32*> [#uses=1]
 @b = global i32* @a             ; <i32**> [#uses=0]
 
diff --git a/test/CodeGen/X86/wide-integer-fold.ll b/test/CodeGen/X86/wide-integer-fold.ll
new file mode 100644
index 000000000000..b3b4d24ab3ac
--- /dev/null
+++ b/test/CodeGen/X86/wide-integer-fold.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK:  movq  $-65535, %rax
+
+; DAGCombiner should fold this to a simple constant.
+
+define i64 @foo(i192 %a) nounwind {
+  %t = or i192 %a, -22300404916163702203072254898040925442801665
+  %s = and i192 %t, -22300404916163702203072254898040929737768960
+  %u = lshr i192 %s, 128
+  %v = trunc i192 %u to i64
+  ret i64 %v
+}
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 419078174d1a..8f607f5ed593 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddb  %t | count 1
 ; RUN: grep pextrb %t | count 1
 ; RUN: not grep pextrw %t
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index de6cd0871be7..e2420f0ff19c 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddb  %t | count 1
 ; RUN: grep pand %t | count 1
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index fbba4457e226..a22d2547566f 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddw  %t | count 1
 ; RUN: grep movd %t | count 2
 ; RUN: grep pextrw %t | count 1
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index e19ab6574aad..898bff01378a 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep psubw  %t | count 1
 ; RUN: grep pmullw %t | count 1
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index 6ff099dd8f9f..1ecf09d9ff32 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pmulld  %t | count 1
 ; RUN: grep psubd  %t | count 1
 ; RUN: grep movaps %t | count 1
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 7b0bb33c0024..358325885f2a 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep mulps  %t | count 1
 ; RUN: grep addps  %t | count 1
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index ed8d27cde649..441a36048633 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddw  %t | count 1
 ; RUN: grep movd  %t | count 1
 ; RUN: grep pextrd  %t | count 1
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 3b45ce308d24..ded5707aed40 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pextrd  %t | count 5
 ; RUN: grep movd  %t | count 3
 
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 33cc41f73fe3..67a760f5df09 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddd  %t | count 1
 ; RUN: grep pextrd %t | count 2
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index b090cb1614ce..614eeedbe79d 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep sarb  %t | count 8
 
 ; v8i8 that is widen to v16i8 then split
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 76969429befe..92618d6fe157 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; bitcast a i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 0fa1b7a7604a..386f749a5066 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx -o %t
 ; RUN: grep movd  %t | count 1
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index a4aab7bb1da6..ccc8b4ff06e6 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pshufd %t | count 1
 ; RUN: grep paddd  %t | count 1
 
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index 191a261ccf18..9b7ab74eb2e1 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; sign extension v2i32 to v2i16
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index 154788d667ba..4ec76a908e81 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; grep cvtsi2ss  %t | count 1 
 ; sign to float v2i16 to v2f32
 
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 1ea5788ab3e4..61a26a8b80bd 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; unsigned to float v7i16 to v7f32
 
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
new file mode 100644
index 000000000000..f6c4af03209b
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR4891
+
+; Both loads should happen before either store.
+
+; CHECK: movl  (%rdi), %eax
+; CHECK: movl  (%rsi), %ecx
+; CHECK: movl  %ecx, (%rdi)
+; CHECK: movl  %eax, (%rsi)
+
+define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
+entry:
+  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
+  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
+  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
+  store i32 %tmp3, i32* %c, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
new file mode 100644
index 000000000000..2d34b31314d5
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; PR4891
+
+; This load should be before the call, not after.
+
+; CHECK: movq    compl+128(%rip), %xmm0
+; CHECK: movaps  %xmm0, (%rsp)
+; CHECK: call    killcommon
+
+@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
+
+declare void @killcommon(i32* noalias)
+
+define void @reset(<2 x float>* noalias %garbage1) {
+"file complex.c, line 27, bb1":
+  %changed = alloca i32, align 4                  ; <i32*> [#uses=3]
+  br label %"file complex.c, line 27, bb13"
+
+"file complex.c, line 27, bb13":                  ; preds = %"file complex.c, line 27, bb1"
+  store i32 0, i32* %changed, align 4
+  %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+  %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  call void @killcommon(i32* %changed)
+  br label %"file complex.c, line 34, bb4"
+
+"file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
+  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
+  %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
+  %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
+  br i1 %r8, label %"file complex.c, line 34, bb7", label %"file complex.c, line 27, bb5"
+
+"file complex.c, line 27, bb5":                   ; preds = %"file complex.c, line 34, bb4"
+  br label %"file complex.c, line 35, bb6"
+
+"file complex.c, line 35, bb6":                   ; preds = %"file complex.c, line 27, bb5"
+  %r11 = ptrtoint <2 x float>* %garbage1 to i64   ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  store <2 x float> %r4, <2 x float>* %r12, align 4
+  br label %"file complex.c, line 34, bb7"
+
+"file complex.c, line 34, bb7":                   ; preds = %"file complex.c, line 35, bb6", %"file complex.c, line 34, bb4"
+  ret void
+}
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
index 3d757b8a8a50..aca0b67cb663 100644
--- a/test/CodeGen/X86/widen_select-1.ll
+++ b/test/CodeGen/X86/widen_select-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening select v6i32 and then a sub
 
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index a676f33d6c68..15da87005c92 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening shuffle v3float and then a add
 
diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
index c2dfa3d272c3..617cc1de4ba8 100644
--- a/test/CodeGen/X86/widen_shuffle-2.ll
+++ b/test/CodeGen/X86/widen_shuffle-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening shuffle v3float and then a add
 
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 3d61e5dbe5a7..3c7389111267 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl.*%edi, %eax}
+; RUN: llc < %s | grep {movl.*%edi, %eax}
 ; This should be a single mov, not a load of immediate + andq.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll
index 22a095b0d9b5..ec8dd8edb634 100644
--- a/test/CodeGen/X86/x86-64-arg.ll
+++ b/test/CodeGen/X86/x86-64-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl	%edi, %eax}
+; RUN: llc < %s | grep {movl	%edi, %eax}
 ; The input value is already sign extended, don't re-extend it.
 ; This testcase corresponds to:
 ;   int test(short X) { return (int)X; }
diff --git a/test/CodeGen/X86/x86-64-asm.ll b/test/CodeGen/X86/x86-64-asm.ll
index 8ccf8b67448b..2640e593ec18 100644
--- a/test/CodeGen/X86/x86-64-asm.ll
+++ b/test/CodeGen/X86/x86-64-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1029
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 15a30de21c6f..79316f29de37 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | not grep rsp
-; RUN: llvm-as < %s | llc | grep cvttsd2siq
+; RUN: llc < %s | not grep rsp
+; RUN: llc < %s | grep cvttsd2siq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-disp.ll b/test/CodeGen/X86/x86-64-disp.ll
index 4a8f6cdfb60d..d8059ebb1c19 100644
--- a/test/CodeGen/X86/x86-64-disp.ll
+++ b/test/CodeGen/X86/x86-64-disp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 
 ; Fold an offset into an address even if it's not a 32-bit
 ; signed integer.
diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
index 80060996f32b..57163d3c6839 100644
--- a/test/CodeGen/X86/x86-64-frameaddr.ll
+++ b/test/CodeGen/X86/x86-64-frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | grep rbp
+; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
 
 define i64* @stack_end_address() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/x86-64-gv-offset.ll b/test/CodeGen/X86/x86-64-gv-offset.ll
index b89e1b95368d..365e4af63fc1 100644
--- a/test/CodeGen/X86/x86-64-gv-offset.ll
+++ b/test/CodeGen/X86/x86-64-gv-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep lea
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea
 
 	%struct.x = type { float, double }
 @X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16		; <%struct.x*> [#uses=2]
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
index 4beb5c21acab..b4f1fa666720 100644
--- a/test/CodeGen/X86/x86-64-malloc.ll
+++ b/test/CodeGen/X86/x86-64-malloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {shll.*3, %edi}
+; RUN: llc < %s -march=x86-64 | grep {shll.*3, %edi}
 ; PR3829
 ; The generated code should multiply by 3 (sizeof i8*) as an i32,
 ; not as an i64!
diff --git a/test/CodeGen/X86/x86-64-mem.ll b/test/CodeGen/X86/x86-64-mem.ll
index 7497362a1546..d15f516cddee 100644
--- a/test/CodeGen/X86/x86-64-mem.ll
+++ b/test/CodeGen/X86/x86-64-mem.ll
@@ -1,10 +1,9 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1
 ; RUN: grep GOTPCREL %t1 | count 4
 ; RUN: grep %%rip      %t1 | count 6
 ; RUN: grep movq     %t1 | count 6
 ; RUN: grep leaq     %t1 | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=static -o %t2 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2
 ; RUN: grep movl %t2 | count 2
 ; RUN: grep movq %t2 | count 2
 
diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll
index f5303c6ad2d4..b21918ef80d4 100644
--- a/test/CodeGen/X86/x86-64-pic-1.ll
+++ b/test/CodeGen/X86/x86-64-pic-1.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f@PLT} %t1
 
 define void @g() {
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index bc0d0c09f4d0..0f65e5744959 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	g@PLT} %t1
 
 @g = alias weak i32 ()* @f
diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll
index f7e0def2d06b..ef816853326e 100644
--- a/test/CodeGen/X86/x86-64-pic-11.ll
+++ b/test/CodeGen/X86/x86-64-pic-11.ll
@@ -1,8 +1,7 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	__fixunsxfti@PLT} %t1
 
-define i128 @f(x86_fp80 %a)  {
+define i128 @f(x86_fp80 %a) nounwind {
 entry:
 	%tmp78 = fptoui x86_fp80 %a to i128
 	ret i128 %tmp78
diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll
index 39aecbadc487..a52c564f9683 100644
--- a/test/CodeGen/X86/x86-64-pic-2.ll
+++ b/test/CodeGen/X86/x86-64-pic-2.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f} %t1
 ; RUN: not grep {call	f@PLT} %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index 0f5f4b706ab4..246c00f74119 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f} %t1
 ; RUN: not grep {call	f@PLT} %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
index f8dfa927828a..90fc1194a33b 100644
--- a/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/test/CodeGen/X86/x86-64-pic-4.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
 
 @a = global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
index 694755da5381..6369bde6943d 100644
--- a/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/test/CodeGen/X86/x86-64-pic-5.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movl	a(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index 965a550108b2..6e19ad35bcf4 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movl	a(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
index 95b7197ff174..4d98ee614026 100644
--- a/test/CodeGen/X86/x86-64-pic-7.ll
+++ b/test/CodeGen/X86/x86-64-pic-7.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
 
 define void ()* @g() nounwind {
diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll
index 369e0cf365ac..d3b567c61076 100644
--- a/test/CodeGen/X86/x86-64-pic-8.ll
+++ b/test/CodeGen/X86/x86-64-pic-8.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
index 175ec4e5ef95..076103133fa9 100644
--- a/test/CodeGen/X86/x86-64-pic-9.ll
+++ b/test/CodeGen/X86/x86-64-pic-9.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-ret0.ll b/test/CodeGen/X86/x86-64-ret0.ll
index d4252e7d6e44..c74f6d803b1c 100644
--- a/test/CodeGen/X86/x86-64-ret0.ll
+++ b/test/CodeGen/X86/x86-64-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 define i32 @f() nounwind  {
 	tail call void @t( i32 1 ) nounwind 
diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
index 369527fd29cf..7f96543ba49d 100644
--- a/test/CodeGen/X86/x86-64-shortint.ll
+++ b/test/CodeGen/X86/x86-64-shortint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep movswl
+; RUN: llc < %s | grep movswl
 
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 9298661998b0..7b5f189faa0f 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -1,9 +1,11 @@
-; RUN: llvm-as < %s | llc | grep {movq	%rdi, %rax}
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 	%struct.foo = type { [4 x i64] }
 
+; CHECK: bar:
+; CHECK: movq %rdi, %rax
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
 	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
@@ -52,3 +54,10 @@ entry:
 return:		; preds = %entry
 	ret void
 }
+
+; CHECK: foo:
+; CHECK: movq %rdi, %rax
+define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
+  store { i64 } { i64 0 }, { i64 }* %agg.result
+  ret void
+}
diff --git a/test/CodeGen/X86/x86-64-varargs.ll b/test/CodeGen/X86/x86-64-varargs.ll
index 2964dd3969f0..428f4493b069 100644
--- a/test/CodeGen/X86/x86-64-varargs.ll
+++ b/test/CodeGen/X86/x86-64-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
 
 @.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"		; <[26 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/x86-frameaddr.ll b/test/CodeGen/X86/x86-frameaddr.ll
index b9d6d13880b5..d5958745dfff 100644
--- a/test/CodeGen/X86/x86-frameaddr.ll
+++ b/test/CodeGen/X86/x86-frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | grep ebp
+; RUN: llc < %s -march=x86 | grep mov | grep ebp
 
 define i8* @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-frameaddr2.ll b/test/CodeGen/X86/x86-frameaddr2.ll
index f50ab072c33e..c5091154152b 100644
--- a/test/CodeGen/X86/x86-frameaddr2.ll
+++ b/test/CodeGen/X86/x86-frameaddr2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define i8* @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-store-gv-addr.ll b/test/CodeGen/X86/x86-store-gv-addr.ll
index 799340d35dd2..089517aadb12 100644
--- a/test/CodeGen/X86/x86-store-gv-addr.ll
+++ b/test/CodeGen/X86/x86-store-gv-addr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
 
 @v = external global i32, align 8
 @v_addr = external global i32*, align 8
diff --git a/test/CodeGen/X86/xmm-r64.ll b/test/CodeGen/X86/xmm-r64.ll
index f7d2143664ef..2a6b5c71aa4f 100644
--- a/test/CodeGen/X86/xmm-r64.ll
+++ b/test/CodeGen/X86/xmm-r64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define <4 x i32> @test() {
         %tmp1039 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )               ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
new file mode 100644
index 000000000000..7bd06bba4c3e
--- /dev/null
+++ b/test/CodeGen/X86/xor.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define <4 x i32> @test1() nounwind {
+	%tmp = xor <4 x i32> undef, undef
+	ret <4 x i32> %tmp
+        
+; X32: test1:
+; X32:	xorps	%xmm0, %xmm0
+; X32:	ret
+}
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define i32 @test2() nounwind{
+	%tmp = xor i32 undef, undef
+	ret i32 %tmp
+; X32: test2:
+; X32:	xorl	%eax, %eax
+; X32:	ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind  {
+entry:
+        %tmp1not = xor i32 %b, -2
+	%tmp3 = and i32 %tmp1not, %a
+        %tmp4 = lshr i32 %tmp3, 1
+        ret i32 %tmp4
+        
+; X64: test3:
+; X64:	notl	%esi
+; X64:	andl	%edi, %esi
+; X64:	movl	%esi, %eax
+; X64:	shrl	%eax
+; X64:	ret
+
+; X32: test3:
+; X32: 	movl	8(%esp), %eax
+; X32: 	notl	%eax
+; X32: 	andl	4(%esp), %eax
+; X32: 	shrl	%eax
+; X32: 	ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483647
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+        
+; X64: test4:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test4:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i16 @test5(i16 %a, i16 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i16 %a_addr.0, %b_addr.0
+        %tmp4not = xor i16 %tmp3, 32767
+        %tmp6 = and i16 %tmp4not, %b_addr.0
+        %tmp8 = shl i16 %tmp6, 1
+        %tmp10 = icmp eq i16 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i16 %tmp3
+; X64: test5:
+; X64:    notw	[[REG:%[a-z]+]]
+; X64:    andw	{{.*}}[[REG]]
+; X32: test5:
+; X32:    notw	[[REG:%[a-z]+]]
+; X32:    andw	{{.*}}[[REG]]
+}
+
+define i8 @test6(i8 %a, i8 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i8 %a_addr.0, %b_addr.0
+        %tmp4not = xor i8 %tmp3, 127
+        %tmp6 = and i8 %tmp4not, %b_addr.0
+        %tmp8 = shl i8 %tmp6, 1
+        %tmp10 = icmp eq i8 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i8 %tmp3
+; X64: test6:
+; X64:    notb	[[REG:%[a-z]+]]
+; X64:    andb	{{.*}}[[REG]]
+; X32: test6:
+; X32:    notb	[[REG:%[a-z]+]]
+; X32:    andb	{{.*}}[[REG]]
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483646
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+; X64: test7:
+; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test7:
+; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 7640ba5aca41..3e3bb95d06f7 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,16 +1,40 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xor | count 4
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
-; RUN: llvm-as < %s | llc -march=x86 | grep fldz
-; RUN: llvm-as < %s | llc -march=x86 | not grep fldl
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
 
 declare void @bar(double %x)
 declare void @barf(float %x)
 
 define double @foo() nounwind {
+
   call void @bar(double 0.0)
   ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
 }
+
+
 define float @foof() nounwind {
   call void @barf(float 0.0)
   ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
 }
diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
index 1a734642d031..ae6221af9d81 100644
--- a/test/CodeGen/X86/zext-inreg-0.ll
+++ b/test/CodeGen/X86/zext-inreg-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movzbq %t
 ; RUN: not grep movzwq %t
diff --git a/test/CodeGen/X86/zext-inreg-1.ll b/test/CodeGen/X86/zext-inreg-1.ll
index bc8e482d562d..17fe374e01ec 100644
--- a/test/CodeGen/X86/zext-inreg-1.ll
+++ b/test/CodeGen/X86/zext-inreg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 
 ; These tests differ from the ones in zext-inreg-0.ll in that
 ; on x86-64 they do require and instructions.
diff --git a/test/CodeGen/XCore/2008-11-17-Shl64.ll b/test/CodeGen/XCore/2008-11-17-Shl64.ll
index 97ea41b8d0c0..04b1b5a0016e 100644
--- a/test/CodeGen/XCore/2008-11-17-Shl64.ll
+++ b/test/CodeGen/XCore/2008-11-17-Shl64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; PR3080
 define i64 @test(i64 %a) {
 	%result = shl i64 %a, 1
diff --git a/test/CodeGen/XCore/2009-01-08-Crash.ll b/test/CodeGen/XCore/2009-01-08-Crash.ll
index 6f5fb7c6871e..a31ea1e2e9be 100644
--- a/test/CodeGen/XCore/2009-01-08-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-08-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ;; This caused a compilation failure since the
 ;; address arithmetic was folded into the LDWSP instruction,
 ;; resulting in a negative offset which eliminateFrameIndex was
diff --git a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
index b9333c94abe3..b2bbcb1183d1 100644
--- a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; PR3324
 define double @f1(double %a, double %b, double %c, double %d, double %e, double %f, double %g) nounwind {
 entry:
diff --git a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
index e834d66df241..a6b9699987eb 100644
--- a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
+++ b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore
+; RUN: llc < %s -march=xcore
 ; PR3898
 
 define i32 @vector_param(<2 x double> %x) nounwind {
diff --git a/test/CodeGen/XCore/2009-07-15-store192.ll b/test/CodeGen/XCore/2009-07-15-store192.ll
new file mode 100644
index 000000000000..5278af8ac229
--- /dev/null
+++ b/test/CodeGen/XCore/2009-07-15-store192.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore > %t1.s
+define void @store32(i8* %p) nounwind {
+entry:
+	%0 = bitcast i8* %p to i192*
+	store i192 0, i192* %0, align 4
+	ret void
+}
diff --git a/test/CodeGen/XCore/addsub64.ll b/test/CodeGen/XCore/addsub64.ll
index 41224fca1cc2..a1494adfcc46 100644
--- a/test/CodeGen/XCore/addsub64.ll
+++ b/test/CodeGen/XCore/addsub64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore -mcpu=xs1b-generic > %t1.s
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic > %t1.s
 ; RUN: grep ladd %t1.s | count 2
 ; RUN: grep lsub %t1.s | count 2
 define i64 @add64(i64 %a, i64 %b) {
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
new file mode 100644
index 000000000000..d585e8b10d98
--- /dev/null
+++ b/test/CodeGen/XCore/ashr.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @ashr(i32 %a, i32 %b) {
+	%1 = ashr i32 %a, %b
+	ret i32 %1
+}
+; CHECK: ashr:
+; CHECK-NEXT: ashr r0, r0, r1
+
+define i32 @ashri1(i32 %a) {
+	%1 = ashr i32 %a, 24
+	ret i32 %1
+}
+; CHECK: ashri1:
+; CHECK-NEXT: ashr r0, r0, 24
+
+define i32 @ashri2(i32 %a) {
+	%1 = ashr i32 %a, 31
+	ret i32 %1
+}
+; CHECK: ashri2:
+; CHECK-NEXT: ashr r0, r0, 32
+
+define i32 @f1(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	br i1 %1, label %less, label %not_less
+less:
+	ret i32 10
+not_less:
+	ret i32 17
+}
+; CHECK: f1:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+
+define i32 @f2(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	br i1 %1, label %greater, label %not_greater
+greater:
+	ret i32 10
+not_greater:
+	ret i32 17
+}
+; CHECK: f2:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bt r0
+
+define i32 @f3(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f3:
+; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ldc r0, 10
+; CHECK-NEXT: bt r1
+; CHECK: ldc r0, 17
+
+define i32 @f4(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f4:
+; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ldc r0, 17
+; CHECK-NEXT: bt r1
+; CHECK: ldc r0, 10
+
+define i32 @f5(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = zext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: f5:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: eq r0, r0, 0
diff --git a/test/CodeGen/XCore/basictest.ll b/test/CodeGen/XCore/basictest.ll
index 803ffcb74cc1..de5eaff08073 100644
--- a/test/CodeGen/XCore/basictest.ll
+++ b/test/CodeGen/XCore/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore
+; RUN: llc < %s -march=xcore
 
 define i32 @test(i32 %X) {
 	%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/XCore/bitrev.ll b/test/CodeGen/XCore/bitrev.ll
index 38f394869763..09202d365678 100644
--- a/test/CodeGen/XCore/bitrev.ll
+++ b/test/CodeGen/XCore/bitrev.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep bitrev %t1.s | count 1 
 declare i32 @llvm.xcore.bitrev(i32)
 
diff --git a/test/CodeGen/XCore/constants.ll b/test/CodeGen/XCore/constants.ll
new file mode 100644
index 000000000000..95fa11e77470
--- /dev/null
+++ b/test/CodeGen/XCore/constants.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: .LCPI1_0:
+; CHECK: .long 12345678
+; CHECK: f:
+; CHECK: ldw r0, cp[.LCPI1_0]
+define i32 @f() {
+entry:
+	ret i32 12345678
+}
diff --git a/test/CodeGen/XCore/cos.ll b/test/CodeGen/XCore/cos.ll
index 334f0d50561d..8211f85b9bc2 100644
--- a/test/CodeGen/XCore/cos.ll
+++ b/test/CodeGen/XCore/cos.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl cosf" %t1.s | count 1
 ; RUN: grep "bl cos" %t1.s | count 2
 declare double @llvm.cos.f64(double)
diff --git a/test/CodeGen/XCore/exp.ll b/test/CodeGen/XCore/exp.ll
index 8412e7a59956..d23d484ed62e 100644
--- a/test/CodeGen/XCore/exp.ll
+++ b/test/CodeGen/XCore/exp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl expf" %t1.s | count 1
 ; RUN: grep "bl exp" %t1.s | count 2
 declare double @llvm.exp.f64(double)
diff --git a/test/CodeGen/XCore/exp2.ll b/test/CodeGen/XCore/exp2.ll
index a53b767ad0d0..4c4d17f4bbf7 100644
--- a/test/CodeGen/XCore/exp2.ll
+++ b/test/CodeGen/XCore/exp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl exp2f" %t1.s | count 1
 ; RUN: grep "bl exp2" %t1.s | count 2
 declare double @llvm.exp2.f64(double)
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index 3fb7b0186940..e3dd3dd45c23 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
-; RUN: grep "xor" %t1.s | count 1
+; RUN: llc < %s -march=xcore | grep "xor" | count 1
 define i1 @test(double %F) nounwind {
 entry:
 	%0 = fsub double -0.000000e+00, %F
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index 810e8ad6e75c..ecab65c0e92e 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "get r11, id" %t1.s | count 1 
 declare i32 @llvm.xcore.getid()
 
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
new file mode 100644
index 000000000000..342e5932dd10
--- /dev/null
+++ b/test/CodeGen/XCore/globals.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G1() {
+entry:
+; CHECK: addr_G1:
+; CHECK: ldaw r0, dp[G1]
+	ret i32* @G1
+}
+
+define i32 *@addr_G2() {
+entry:
+; CHECK: addr_G2:
+; CHECK: ldaw r0, dp[G2]
+	ret i32* @G2
+}
+
+define i32 *@addr_G3() {
+entry:
+; CHECK: addr_G3:
+; CHECK: ldaw r11, cp[G3]
+; CHECK: mov r0, r11
+	ret i32* @G3
+}
+
+define i32 **@addr_G4() {
+entry:
+; CHECK: addr_G4:
+; CHECK: ldaw r0, dp[G4]
+	ret i32** @G4
+}
+
+define i32 **@addr_G5() {
+entry:
+; CHECK: addr_G5:
+; CHECK: ldaw r11, cp[G5]
+; CHECK: mov r0, r11
+	ret i32** @G5
+}
+
+define i32 **@addr_G6() {
+entry:
+; CHECK: addr_G6:
+; CHECK: ldaw r0, dp[G6]
+	ret i32** @G6
+}
+
+define i32 **@addr_G7() {
+entry:
+; CHECK: addr_G7:
+; CHECK: ldaw r11, cp[G7]
+; CHECK: mov r0, r11
+	ret i32** @G7
+}
+
+define i32 *@addr_G8() {
+entry:
+; CHECK: addr_G8:
+; CHECK: ldaw r0, dp[G8]
+	ret i32* @G8
+}
+
+@G1 = global i32 4712
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G1:
+
+@G2 = global i32 0
+; CHECK: .section .dp.bss,"awd",@nobits
+; CHECK: G2:
+
+@G3 = constant i32 9401
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: G3:
+
+@G4 = global i32* @G1
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G4:
+
+@G5 = constant i32* @G1
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G5:
+
+@G6 = global i32* @G8
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G6:
+
+@G7 = constant i32* @G8
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G7:
+
+@G8 = internal global i32 9312
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G8:
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
new file mode 100644
index 000000000000..adfea212a279
--- /dev/null
+++ b/test/CodeGen/XCore/load.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: not grep zext %t1.s
+; RUN: not grep sext %t1.s
+; RUN: grep "ldw" %t1.s | count 2
+; RUN: grep "ld16s" %t1.s | count 1
+; RUN: grep "ld8u" %t1.s | count 1
+
+define i32 @load32(i32* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load32_imm(i32* %p) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load16(i16* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	%1 = load i16* %0, align 2
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+
+define i32 @load8(i8* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	%1 = load i8* %0, align 1
+	%2 = zext i8 %1 to i32
+	ret i32 %2
+}
diff --git a/test/CodeGen/XCore/log.ll b/test/CodeGen/XCore/log.ll
index 88d9d7ffcd39..a08471f48e4a 100644
--- a/test/CodeGen/XCore/log.ll
+++ b/test/CodeGen/XCore/log.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl logf" %t1.s | count 1
 ; RUN: grep "bl log" %t1.s | count 2
 declare double @llvm.log.f64(double)
diff --git a/test/CodeGen/XCore/log10.ll b/test/CodeGen/XCore/log10.ll
index f844d8fc6a24..a72b8bfaf6b9 100644
--- a/test/CodeGen/XCore/log10.ll
+++ b/test/CodeGen/XCore/log10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl log10f" %t1.s | count 1
 ; RUN: grep "bl log10" %t1.s | count 2
 declare double @llvm.log10.f64(double)
diff --git a/test/CodeGen/XCore/log2.ll b/test/CodeGen/XCore/log2.ll
index b8a3dbd2317f..d257433a01a7 100644
--- a/test/CodeGen/XCore/log2.ll
+++ b/test/CodeGen/XCore/log2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl log2f" %t1.s | count 1
 ; RUN: grep "bl log2" %t1.s | count 2
 declare double @llvm.log2.f64(double)
diff --git a/test/CodeGen/XCore/pow.ll b/test/CodeGen/XCore/pow.ll
index a7b6318c1091..b461185b7fde 100644
--- a/test/CodeGen/XCore/pow.ll
+++ b/test/CodeGen/XCore/pow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl powf" %t1.s | count 1
 ; RUN: grep "bl pow" %t1.s | count 2
 declare double @llvm.pow.f64(double, double)
diff --git a/test/CodeGen/XCore/powi.ll b/test/CodeGen/XCore/powi.ll
index 30e6d7ea88f3..de31cbed00c0 100644
--- a/test/CodeGen/XCore/powi.ll
+++ b/test/CodeGen/XCore/powi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl __powidf2" %t1.s | count 1
 ; RUN: grep "bl __powisf2" %t1.s | count 1
 declare double @llvm.powi.f64(double, i32)
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 888ccdf297d5..9a2f5b32dc39 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=xcore > %t
+; RUN: llc < %s -march=xcore > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/XCore/sext.ll b/test/CodeGen/XCore/sext.ll
new file mode 100644
index 000000000000..9cd4ad66a5cd
--- /dev/null
+++ b/test/CodeGen/XCore/sext.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @sext1(i32 %a) {
+	%1 = trunc i32 %a to i1
+	%2 = sext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext1:
+; CHECK: sext r0, 1
+
+define i32 @sext2(i32 %a) {
+	%1 = trunc i32 %a to i2
+	%2 = sext i2 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext2:
+; CHECK: sext r0, 2
+
+define i32 @sext8(i32 %a) {
+	%1 = trunc i32 %a to i8
+	%2 = sext i8 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext8:
+; CHECK: sext r0, 8
+
+define i32 @sext16(i32 %a) {
+	%1 = trunc i32 %a to i16
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext16:
+; CHECK: sext r0, 16
diff --git a/test/CodeGen/XCore/sin.ll b/test/CodeGen/XCore/sin.ll
index 41aab675953f..ced026f1d3e1 100644
--- a/test/CodeGen/XCore/sin.ll
+++ b/test/CodeGen/XCore/sin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl sinf" %t1.s | count 1
 ; RUN: grep "bl sin" %t1.s | count 2
 declare double @llvm.sin.f64(double)
diff --git a/test/CodeGen/XCore/sqrt.ll b/test/CodeGen/XCore/sqrt.ll
index 221d1ac1a781..364d1a14c6ae 100644
--- a/test/CodeGen/XCore/sqrt.ll
+++ b/test/CodeGen/XCore/sqrt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl sqrtf" %t1.s | count 1
 ; RUN: grep "bl sqrt" %t1.s | count 2
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
new file mode 100644
index 000000000000..2213743ff897
--- /dev/null
+++ b/test/CodeGen/XCore/store.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: grep "stw" %t1.s | count 2
+; RUN: grep "st16" %t1.s | count 1
+; RUN: grep "st8" %t1.s | count 1
+
+define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store32_imm(i32* %p, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	store i16 %val, i16* %0, align 2
+	ret void
+}
+
+define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	store i8 %val, i8* %0, align 1
+	ret void
+}
diff --git a/test/CodeGen/XCore/tls.ll b/test/CodeGen/XCore/tls.ll
new file mode 100644
index 000000000000..ed41afae0996
--- /dev/null
+++ b/test/CodeGen/XCore/tls.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G() {
+entry:
+; CHECK: addr_G:
+; CHECK: get r11, id
+	ret i32* @G
+}
+
+@G = thread_local global i32 15
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G:
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index b3d3bc2270e3..45f886d332aa 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "ecallf" %t1.s | count 1
 ; RUN: grep "ldc" %t1.s | count 1
 define i32 @test() noreturn nounwind  {
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
new file mode 100644
index 000000000000..0ee8e1c32667
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_load" %t1.s | count 1
+; RUN: grep ld16s %t1.s | count 2
+; RUN: grep ldw %t1.s | count 2
+; RUN: grep shl %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+; RUN: grep zext %t1.s | count 1
+; RUN: grep "or " %t1.s | count 2
+
+; Byte aligned load. Expands to call to __misaligned_load.
+define i32 @align1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 1		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+; Half word aligned load. Expands to two 16bit loads.
+define i32 @align2(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 2		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+@a = global [5 x i8] zeroinitializer, align 4
+
+; Constant offset from word aligned base. Expands to two 32bit loads.
+define i32 @align3() nounwind {
+entry:
+	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+	ret i32 %0
+}
diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll
new file mode 100644
index 000000000000..62078e6f6077
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_store.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_store" %t1.s | count 1
+; RUN: grep st16 %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+
+; Byte aligned store. Expands to call to __misaligned_store.
+define void @align1(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 1
+	ret void
+}
+
+; Half word aligned store. Expands to two 16bit stores.
+define void @align2(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 2
+	ret void
+}
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
new file mode 100644
index 000000000000..493ca6a975f8
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl memmove" %t1.s | count 1
+; RUN: grep "ldc r., 8" %t1.s | count 1
+
+; Unaligned load / store pair. Should be combined into a memmove
+; of size 8
+define void @f(i64* %dst, i64* %src) nounwind {
+entry:
+	%0 = load i64* %src, align 1
+	store i64 %0, i64* %dst, align 1
+	ret void
+}
diff --git a/test/DebugInfo/2008-10-17-C++DebugCrash.ll b/test/DebugInfo/2008-10-17-C++DebugCrash.ll
index b56b9497f424..7f60e95ff5e1 100644
--- a/test/DebugInfo/2008-10-17-C++DebugCrash.ll
+++ b/test/DebugInfo/2008-10-17-C++DebugCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2885
 
 ;; NOTE: This generates bad debug info in this case! But that's better than
diff --git a/test/DebugInfo/2008-11-05-InlinedFuncStart.ll b/test/DebugInfo/2008-11-05-InlinedFuncStart.ll
index c494190b4ec8..b6487188cbc9 100644
--- a/test/DebugInfo/2008-11-05-InlinedFuncStart.ll
+++ b/test/DebugInfo/2008-11-05-InlinedFuncStart.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s
+; RUN: llc %s -o - -O0
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
diff --git a/test/DebugInfo/2009-01-15-RecordVariableCrash.ll b/test/DebugInfo/2009-01-15-RecordVariableCrash.ll
index 68268bac49fc..cee4d724bf4b 100644
--- a/test/DebugInfo/2009-01-15-RecordVariableCrash.ll
+++ b/test/DebugInfo/2009-01-15-RecordVariableCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -f -o /dev/null -verify-dom-info
+; RUN: llc %s -o /dev/null -verify-dom-info -verify-loop-info
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32, i8*, i8* }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
diff --git a/test/DebugInfo/2009-01-15-dbg_declare.ll b/test/DebugInfo/2009-01-15-dbg_declare.ll
index 3f78d0d74953..ab404afbd8a7 100644
--- a/test/DebugInfo/2009-01-15-dbg_declare.ll
+++ b/test/DebugInfo/2009-01-15-dbg_declare.ll
@@ -1,6 +1,5 @@
+; RUN: llc %s -o /dev/null
 
-; RUN: llvm-as < %s | llc -f -o /dev/null
-target triple = "powerpc-apple-darwin9.5"
         %llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }*, i8*, i8* }
 @llvm.dbg.variable24 = external constant %llvm.dbg.variable.type                ; <%llvm.dbg.variable.type*> [#uses=1]
 
diff --git a/test/DebugInfo/2009-01-15-member.ll b/test/DebugInfo/2009-01-15-member.ll
index 7eb81f8f3dd8..a0fb0dbf5ea0 100644
--- a/test/DebugInfo/2009-01-15-member.ll
+++ b/test/DebugInfo/2009-01-15-member.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -f -o /dev/null
+; RUN: llc %s -o /dev/null
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32, i8*, i8* }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
@@ -27,4 +27,4 @@
 @llvm.dbg.composite11 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, i64 64, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
 @llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
 @.str12 = internal constant [3 x i8] c"s2\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
-@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type { i32 458804, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 6, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite11 to { }*), i1 false, i1 true, { }* bitcast (%struct.s* @s2 to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
-\ No newline at end of file
+@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type { i32 458804, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 6, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite11 to { }*), i1 false, i1 true, { }* bitcast (%struct.s* @s2 to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
diff --git a/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll b/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll
index 1ad776d64fcc..27bcef7b7089 100644
--- a/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll
+++ b/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -f -o /dev/null
+; RUN: llc %s -o /dev/null
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
diff --git a/test/DebugInfo/2009-02-27-licm.ll b/test/DebugInfo/2009-02-27-licm.ll
index cf4872d2902c..b490a28e5dbd 100644
--- a/test/DebugInfo/2009-02-27-licm.ll
+++ b/test/DebugInfo/2009-02-27-licm.ll
@@ -1,4 +1,4 @@
-;RUN: llvm-as <%s | opt -licm | llvm-dis | grep {load } | count 4
+;RUN: opt < %s -licm -S | grep {load } | count 4
 ; ModuleID = '2009-02-27-licm.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/DebugInfo/2009-03-03-cheapdse.ll b/test/DebugInfo/2009-03-03-cheapdse.ll
index d9b18aa85900..9f47f16153de 100644
--- a/test/DebugInfo/2009-03-03-cheapdse.ll
+++ b/test/DebugInfo/2009-03-03-cheapdse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep store | count 5
+; RUN: opt < %s -instcombine -S | grep store | count 5
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/DebugInfo/2009-03-03-deadstore.ll b/test/DebugInfo/2009-03-03-deadstore.ll
index 795393d87e0e..0705c155f9f7 100644
--- a/test/DebugInfo/2009-03-03-deadstore.ll
+++ b/test/DebugInfo/2009-03-03-deadstore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep alloca
+; RUN: opt < %s -instcombine -S | not grep alloca
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/DebugInfo/2009-03-03-store-to-load-forward.ll b/test/DebugInfo/2009-03-03-store-to-load-forward.ll
index 72866ec6d700..75d3a6943393 100644
--- a/test/DebugInfo/2009-03-03-store-to-load-forward.ll
+++ b/test/DebugInfo/2009-03-03-store-to-load-forward.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep alloca
+; RUN: opt < %s -instcombine -S | not grep alloca
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/DebugInfo/2009-03-05-gvn.ll b/test/DebugInfo/2009-03-05-gvn.ll
index 394982ce9195..f363132c5293 100644
--- a/test/DebugInfo/2009-03-05-gvn.ll
+++ b/test/DebugInfo/2009-03-05-gvn.ll
@@ -1,4 +1,4 @@
-;RUN: llvm-as <%s | opt -gvn | llvm-dis | grep {load } | count 1
+; RUN: opt < %s -gvn -S | grep {load } | count 1
 ; ModuleID = 'db2-before.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/DebugInfo/2009-06-12-Inline.ll b/test/DebugInfo/2009-06-12-Inline.ll
index 87c42d5a6837..de4046867644 100644
--- a/test/DebugInfo/2009-06-12-Inline.ll
+++ b/test/DebugInfo/2009-06-12-Inline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -f -o /dev/null 
+; RUN: llc %s -o /dev/null
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
diff --git a/test/DebugInfo/2009-06-12-InlineFuncStart.ll b/test/DebugInfo/2009-06-12-InlineFuncStart.ll
index 32e20ff8c912..03837a0bfade 100644
--- a/test/DebugInfo/2009-06-12-InlineFuncStart.ll
+++ b/test/DebugInfo/2009-06-12-InlineFuncStart.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s
+; RUN: llc %s -o - -O0
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
diff --git a/test/DebugInfo/2009-06-15-InlineFuncStart.ll b/test/DebugInfo/2009-06-15-InlineFuncStart.ll
index 2ece6a3bf8ce..43d5cd1e7fde 100644
--- a/test/DebugInfo/2009-06-15-InlineFuncStart.ll
+++ b/test/DebugInfo/2009-06-15-InlineFuncStart.ll
@@ -1,8 +1,8 @@
 ; Test inlined function handling. This test case is copied from 
 ; 2009-06-12-InlineFunctStart.ll with one change. In function main, the bb1
 ; does not have llvm.dbg.stoppiont intrinsic before llvm.dbg.func.start.
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s
+; RUN: llc %s -o - -O0
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
diff --git a/test/DebugInfo/2009-06-15-abstract_origin.ll b/test/DebugInfo/2009-06-15-abstract_origin.ll
index 3029da02a266..d442a30a064d 100644
--- a/test/DebugInfo/2009-06-15-abstract_origin.ll
+++ b/test/DebugInfo/2009-06-15-abstract_origin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -asm-verbose -O0 | not grep ".long	0x0	## DW_AT_abstract_origin"
+; RUN: llc %s -o - -asm-verbose -O0 | not grep ".long	0x0	## DW_AT_abstract_origin"
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
diff --git a/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll b/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll
new file mode 100644
index 000000000000..fc2810786698
--- /dev/null
+++ b/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s
+
+%struct.TConstantDictionary = type { %struct.__CFDictionary* }
+%struct.TSharedGlobalSet_AS = type { [52 x i32], [20 x i32], [22 x i32], [8 x i32], [20 x i32], [146 x i32] }
+%struct.__CFDictionary = type opaque
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @func to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @func() ssp {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !13)
+  tail call void @llvm.dbg.stoppoint(i32 1001, i32 0, metadata !1)
+  %0 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %1 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %0, i32 0, i32 4, i32 4 ; <i32*> [#uses=1]
+  %2 = bitcast i32* %1 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g2(%struct.TConstantDictionary* %2) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1002, i32 0, metadata !1)
+  %3 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %4 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %3, i32 0, i32 4, i32 3 ; <i32*> [#uses=1]
+  %5 = bitcast i32* %4 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g4(%struct.TConstantDictionary* %5) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1003, i32 0, metadata !1)
+  %6 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %7 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %6, i32 0, i32 4, i32 2 ; <i32*> [#uses=1]
+  %8 = bitcast i32* %7 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g3(%struct.TConstantDictionary* %8) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1004, i32 0, metadata !1)
+  %9 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %10 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %9, i32 0, i32 4, i32 1 ; <i32*> [#uses=1]
+  %11 = bitcast i32* %10 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g4(%struct.TConstantDictionary* %11) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1005, i32 0, metadata !1)
+  tail call void @g5()
+  tail call void @llvm.dbg.stoppoint(i32 1006, i32 0, metadata !1)
+  tail call void @llvm.dbg.region.end(metadata !13)
+  ret void
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+declare %struct.TSharedGlobalSet_AS* @g1() nounwind readonly ssp
+
+declare void @g2(%struct.TConstantDictionary* nocapture) ssp align 2
+
+declare void @g3(%struct.TConstantDictionary* nocapture) ssp align 2
+
+declare void @g4(%struct.TConstantDictionary* nocapture) ssp align 2
+
+declare void @g5()
+
+!llvm.dbg.gv = !{!0, !9, !10, !11, !12}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.408", metadata !"C.408", metadata !"_ZZ7UASInitmmmmmmmmmE5C.408", metadata !1, i32 874, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"func.cp", metadata !"/tmp/func", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 false, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458753, metadata !3, metadata !"", metadata !3, i32 0, i64 16, i64 16, i64 0, i32 0, metadata !4, metadata !7, i32 0}; [DW_TAG_array_type ]
+!3 = metadata !{i32 458769, i32 0, i32 4, metadata !"testcase.ii", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458774, metadata !3, metadata !"UniChar", metadata !5, i32 417, i64 0, i64 0, i64 0, i32 0, metadata !6}; [DW_TAG_typedef ]
+!5 = metadata !{i32 458769, i32 0, i32 4, metadata !"MacTypes.h", metadata !"/System/Library/Frameworks/CoreServices.framework/Headers/../Frameworks/CarbonCore.framework/Headers", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 false, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!6 = metadata !{i32 458788, metadata !3, metadata !"short unsigned int", metadata !3, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7}; [DW_TAG_base_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 458785, i64 0, i64 0}; [DW_TAG_subrange_type ]
+!9 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.409", metadata !"C.409", metadata !"_ZZ7UASInitmmmmmmmmmE5C.409", metadata !1, i32 877, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!10 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.410", metadata !"C.410", metadata !"_ZZ7UASInitmmmmmmmmmE5C.410", metadata !1, i32 880, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!11 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.411", metadata !"C.411", metadata !"_ZZ7UASInitmmmmmmmmmE5C.411", metadata !1, i32 924, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!12 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.412", metadata !"C.412", metadata !"_ZZ7UASInitmmmmmmmmmE5C.412", metadata !1, i32 928, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!13 = metadata !{i32 458798, i32 0, metadata !3, metadata !"UASShutdown", metadata !"UASShutdown", metadata !"_Z11UASShutdownv", metadata !1, i32 999, metadata !14, i1 false, i1 true}; [DW_TAG_subprogram ]
+!14 = metadata !{i32 458773, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0}; [DW_TAG_subroutine_type ]
+!15 = metadata !{null}
diff --git a/test/DebugInfo/deaddebuglabel.ll b/test/DebugInfo/deaddebuglabel.ll
index edab7e84be56..a9af12b3850f 100644
--- a/test/DebugInfo/deaddebuglabel.ll
+++ b/test/DebugInfo/deaddebuglabel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0 | grep "label" | count 8
+; RUN: llc %s -o - -O0 | grep "label" | count 8
 ; PR2614
 ; XFAIL: *
 
diff --git a/test/DebugInfo/funccall.ll b/test/DebugInfo/funccall.ll
index 47fe4c2498c5..e44b0298ced8 100644
--- a/test/DebugInfo/funccall.ll
+++ b/test/DebugInfo/funccall.ll
@@ -1,4 +1,4 @@
-;; RUN: llvm-as < %s | llc
+;; RUN: llc < %s
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, {  }*, i8*, {  }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, {  }*, i32, i8*, i8*, i8* }
diff --git a/test/DebugInfo/globalGetElementPtr.ll b/test/DebugInfo/globalGetElementPtr.ll
index 89e061715cc0..155deb722555 100644
--- a/test/DebugInfo/globalGetElementPtr.ll
+++ b/test/DebugInfo/globalGetElementPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; ModuleID = 'foo.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll
index 6c8275adb665..c5fe7ad6c803 100644
--- a/test/DebugInfo/printdbginfo2.ll
+++ b/test/DebugInfo/printdbginfo2.ll
@@ -1,74 +1,73 @@
-; RUN: llvm-as < %s | opt -print-dbginfo -disable-output > %t1
-; RUN: grep {%b is variable b of type x declared at x.c:7} %t1
-; RUN: grep {%2 is variable b of type x declared at x.c:7} %t1
-; RUN: grep {@c.1442 is variable c of type int declared at x.c:4} %t1
-	type { }		; type %0
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 }
-	%llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* }
-	%llvm.dbg.global_variable.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1, %0* }
-	%llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 }
-	%llvm.dbg.subrange.type = type { i32, i64, i64 }
-	%llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* }
-	%struct..0x = type { i32 }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"x.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [27 x i8] c"/home/edwin/llvm-svn/llvm/\00", section "llvm.metadata"		; <[27 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([27 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 2, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@.str7 = internal constant [2 x i8] c"a\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to %0*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 6, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array8 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-@llvm.dbg.composite9 = internal constant %llvm.dbg.composite.type { i32 458771, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to %0*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 5, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array8 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str10 = internal constant [2 x i8] c"b\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to %0*), i8* getelementptr ([2 x i8]* @.str10, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 7, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite9 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 3 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
-@llvm.dbg.array11 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-@llvm.dbg.composite12 = internal constant %llvm.dbg.composite.type { i32 458753, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 128, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast ([1 x %0*]* @llvm.dbg.array11 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.variable13 = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to %0*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 3, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite12 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@c.1442 = internal global i32 5		; <i32*> [#uses=2]
-@llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str14 = internal constant [7 x i8] c"c.1442\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str15 = internal constant [2 x i8] c"c\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type { i32 458804, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([7 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 4, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), i1 true, i1 true, %0* bitcast (i32* @c.1442 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
+; RUN: opt < %s -print-dbginfo -disable-output | FileCheck %s
+;  grep {%b is variable b of type x declared at x.c:7} %t1
+;  grep {%2 is variable b of type x declared at x.c:7} %t1
+;  grep {@c.1442 is variable c of type int declared at x.c:4} %t1
+
+%struct.foo = type { i32 }
+
+@main.c = internal global i32 5                   ; <i32*> [#uses=1]
 
 define i32 @main() nounwind {
 entry:
-	%b = alloca %struct..0x		; <%struct..0x*> [#uses=2]
-	%a = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to %0*))
-	%0 = bitcast %struct..0x* %b to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %0, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to %0*))
-	%1 = bitcast [4 x i32]* %a to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %1, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable13 to %0*))
-	call void @llvm.dbg.stoppoint(i32 8, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%2 = getelementptr %struct..0x* %b, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 5, i32* %2, align 4
-	call void @llvm.dbg.stoppoint(i32 9, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%3 = load i32* @c.1442, align 4		; <i32> [#uses=1]
-	br label %return
+; CHECK:; (x.c:6:3)
+  %retval = alloca i32                            ; <i32*> [#uses=3]
+  %b = alloca %struct.foo, align 4                ; <%struct.foo*> [#uses=2]
+; CHECK:; %b is variable b of type foo declared at x.c:7
+  %a = alloca [4 x i32], align 4                  ; <[4 x i32]*> [#uses=1]
+; CHECK:; %a is variable a of type  declared at x.c:8
+  call void @llvm.dbg.func.start(metadata !3)
+; CHECK:; fully qualified function name: main return type: int at line 5
+  store i32 0, i32* %retval
+  call void @llvm.dbg.stoppoint(i32 6, i32 3, metadata !1)
+; CHECK:; x.c:7:3
+  call void @llvm.dbg.stoppoint(i32 7, i32 3, metadata !1)
+  %0 = bitcast %struct.foo* %b to { }*            ; <{ }*> [#uses=1]
+  call void @llvm.dbg.declare({ }* %0, metadata !4)
+; CHECK:; %0 is variable b of type foo declared at x.c:7
+  call void @llvm.dbg.stoppoint(i32 8, i32 3, metadata !1)
+; CHECK:; x.c:8:3
+  %1 = bitcast [4 x i32]* %a to { }*              ; <{ }*> [#uses=1]
+  call void @llvm.dbg.declare({ }* %1, metadata !8)
+; CHECK:; %1 is variable a of type  declared at x.c:8
+  call void @llvm.dbg.stoppoint(i32 9, i32 3, metadata !1)
+; CHECK:; x.c:9:3
+  %tmp = getelementptr inbounds %struct.foo* %b, i32 0, i32 0 ; <i32*> [#uses=1]
+; CHECK:; %tmp is variable b of type foo declared at x.c:7
+  store i32 5, i32* %tmp
+; CHECK:; x.c:10:3
+  call void @llvm.dbg.stoppoint(i32 10, i32 3, metadata !1)
+  %tmp1 = load i32* @main.c                       ; <i32> [#uses=1]
+; CHECK:; @main.c is variable c of type int declared at x.c:6
+  store i32 %tmp1, i32* %retval
+  br label %2
 
-return:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 9, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to %0*))
-	ret i32 %3
+; <label>:2                                       ; preds = %entry
+  call void @llvm.dbg.stoppoint(i32 11, i32 1, metadata !1)
+; CHECK:; (x.c:11:1)
+  call void @llvm.dbg.region.end(metadata !3)
+  %3 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %3
 }
 
-declare void @llvm.dbg.func.start(%0*) nounwind readnone
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone
 
-declare void @llvm.dbg.declare(%0*, %0*) nounwind readnone
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
-declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind readnone
+!llvm.dbg.gv = !{!0}
 
-declare void @llvm.dbg.region.end(%0*) nounwind readnone
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"c", metadata !"c", metadata !"", metadata !1, i32 6, metadata !2, i1 true, i1 true, i32* @main.c}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"x.c", metadata !"/home/edwin/llvm-git/llvm/test/DebugInfo", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!3 = metadata !{i32 458798, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 5, metadata !2, i1 false, i1 true}
+!4 = metadata !{i32 459008, metadata !3, metadata !"b", metadata !1, i32 7, metadata !5}
+!5 = metadata !{i32 458771, metadata !1, metadata !"foo", metadata !1, i32 1, i64 32, i64 32, i64 0, i32 0, null, metadata !6, i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 458765, metadata !1, metadata !"a", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !2}
+!8 = metadata !{i32 459008, metadata !3, metadata !"a", metadata !1, i32 8, metadata !9}
+!9 = metadata !{i32 458753, metadata !1, metadata !"", null, i32 0, i64 128, i64 32, i64 0, i32 0, metadata !2, metadata !10, i32 0}
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 458785, i64 0, i64 3}
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index a51e3a110c38..455196923e84 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
index 89106b5b2aa3..5d37e9664162 100644
--- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll
index 07cb1d3c9264..653cf79a52a3 100644
--- a/test/ExecutionEngine/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o %t.bc -f
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2003-01-04-PhiTest.ll b/test/ExecutionEngine/2003-01-04-PhiTest.ll
index 649ed0b24277..b5c9d8132432 100644
--- a/test/ExecutionEngine/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2003-01-09-SARTest.ll b/test/ExecutionEngine/2003-01-09-SARTest.ll
index a9df7f2036b6..81478972d57f 100644
--- a/test/ExecutionEngine/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index 30f93309d5cf..d996fa53d9c5 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
index 7529eb4770a0..a55d74df0d44 100644
--- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @bar(i8* %X) {
diff --git a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
index 7b48f579af01..5a13b21b5f49 100644
--- a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 target datalayout = "e-p:32:32"
diff --git a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
index 6dee717f4b63..6e2da70f736b 100644
--- a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; Testcase distilled from 256.bzip2.
diff --git a/test/ExecutionEngine/2003-06-05-PHIBug.ll b/test/ExecutionEngine/2003-06-05-PHIBug.ll
index 2cd9c1b21105..50b48da49445 100644
--- a/test/ExecutionEngine/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/2003-06-05-PHIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; Testcase distilled from 256.bzip2.
diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
index 8a96377aefda..6c90b33cb431 100644
--- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; This testcase failed to work because two variable sized allocas confused the
diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
index 23efbeed0fc9..3a4a4e439fb8 100644
--- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ;
diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
index 25a24f5e5c41..b165a1cf30e3 100644
--- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; This testcase exposes a bug in the local register allocator where it runs out
diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index e289e10c0e4c..aa9d7e7d3632 100644
--- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 @A = global i32 0		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index 02b65b13f300..e7e434f271c6 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli -force-interpreter=true %t.bc | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/ExecutionEngine/hello.ll b/test/ExecutionEngine/hello.ll
index 3cd29f62a400..fad36ed58361 100644
--- a/test/ExecutionEngine/hello.ll
+++ b/test/ExecutionEngine/hello.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/hello2.ll b/test/ExecutionEngine/hello2.ll
index a6a6194ca4d6..7ca0d8827d54 100644
--- a/test/ExecutionEngine/hello2.ll
+++ b/test/ExecutionEngine/hello2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 @X = global i32 7		; <i32*> [#uses=0]
diff --git a/test/ExecutionEngine/simplesttest.ll b/test/ExecutionEngine/simplesttest.ll
index fa69533fa9a3..5d9cf767bcb3 100644
--- a/test/ExecutionEngine/simplesttest.ll
+++ b/test/ExecutionEngine/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/simpletest.ll b/test/ExecutionEngine/simpletest.ll
index 0ed5b44c1b3a..53fb79c2c768 100644
--- a/test/ExecutionEngine/simpletest.ll
+++ b/test/ExecutionEngine/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @bar() {
diff --git a/test/ExecutionEngine/stubs.ll b/test/ExecutionEngine/stubs.ll
new file mode 100644
index 000000000000..525d135ff45c
--- /dev/null
+++ b/test/ExecutionEngine/stubs.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-as < %s | lli -disable-lazy-compilation=false
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/test-arith.ll b/test/ExecutionEngine/test-arith.ll
index d99e30f51740..8c51e6b2e224 100644
--- a/test/ExecutionEngine/test-arith.ll
+++ b/test/ExecutionEngine/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-branch.ll b/test/ExecutionEngine/test-branch.ll
index d7251f8e6b8d..dd8db5465f0b 100644
--- a/test/ExecutionEngine/test-branch.ll
+++ b/test/ExecutionEngine/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; test unconditional branch
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index a1b3cd66a3b7..4464ebd39bff 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 declare void @exit(i32)
diff --git a/test/ExecutionEngine/test-cast.ll b/test/ExecutionEngine/test-cast.ll
index 1458f6cc66cd..82d4949782a8 100644
--- a/test/ExecutionEngine/test-cast.ll
+++ b/test/ExecutionEngine/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @foo() {
diff --git a/test/ExecutionEngine/test-constantexpr.ll b/test/ExecutionEngine/test-constantexpr.ll
index 3623cf4bd264..cd5c635331d3 100644
--- a/test/ExecutionEngine/test-constantexpr.ll
+++ b/test/ExecutionEngine/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index 2e8ecd5a740f..4ebcf6f7aa7c 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define double @test(double* %DP, double %Arg) {
diff --git a/test/ExecutionEngine/test-loadstore.ll b/test/ExecutionEngine/test-loadstore.ll
index 298ea96fd728..ba0f0baf8d6d 100644
--- a/test/ExecutionEngine/test-loadstore.ll
+++ b/test/ExecutionEngine/test-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
diff --git a/test/ExecutionEngine/test-logical.ll b/test/ExecutionEngine/test-logical.ll
index f30c33431731..e560e52d568d 100644
--- a/test/ExecutionEngine/test-logical.ll
+++ b/test/ExecutionEngine/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-loop.ll b/test/ExecutionEngine/test-loop.ll
index 78fc3144aab0..7cd69e2943bc 100644
--- a/test/ExecutionEngine/test-loop.ll
+++ b/test/ExecutionEngine/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-malloc.ll b/test/ExecutionEngine/test-malloc.ll
index bc857ed5edca..8f79d974edb5 100644
--- a/test/ExecutionEngine/test-malloc.ll
+++ b/test/ExecutionEngine/test-malloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-phi.ll b/test/ExecutionEngine/test-phi.ll
index 69d1b0866f8a..f1aaefa50585 100644
--- a/test/ExecutionEngine/test-phi.ll
+++ b/test/ExecutionEngine/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; test phi node
diff --git a/test/ExecutionEngine/test-ret.ll b/test/ExecutionEngine/test-ret.ll
index e684fd5e3aa3..eae91f553752 100644
--- a/test/ExecutionEngine/test-ret.ll
+++ b/test/ExecutionEngine/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 ; test return instructions
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index b917693abd46..4264e2c593fc 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 
diff --git a/test/ExecutionEngine/test-setcond-int.ll b/test/ExecutionEngine/test-setcond-int.ll
index f80c2477a8ac..772f4fa70a4b 100644
--- a/test/ExecutionEngine/test-setcond-int.ll
+++ b/test/ExecutionEngine/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-shift.ll b/test/ExecutionEngine/test-shift.ll
index 330de0bfbc94..2791b8534a58 100644
--- a/test/ExecutionEngine/test-shift.ll
+++ b/test/ExecutionEngine/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -f -o %t.bc
+; RUN: llvm-as %s -o %t.bc
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll
new file mode 100644
index 000000000000..56fc349d3e60
--- /dev/null
+++ b/test/Feature/NamedMDNode.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | grep "llvm.stuff = "
+
+;; Simple NamedMDNode
+!0 = metadata !{i32 42}
+!1 = metadata !{metadata !"foo"}
+!llvm.stuff = !{!0, !1}
diff --git a/test/Feature/NamedMDNode2.ll b/test/Feature/NamedMDNode2.ll
new file mode 100644
index 000000000000..0524dd27a4a9
--- /dev/null
+++ b/test/Feature/NamedMDNode2.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s -o /dev/null
+; PR4654
+
+
+@foo = constant i1 false
+!0 = metadata !{i1 false}
+!a = !{!0}
diff --git a/test/Feature/float.ll b/test/Feature/float.ll
index 632cfb741fa3..6c6c5dd53970 100644
--- a/test/Feature/float.ll
+++ b/test/Feature/float.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llvm-dis > t1.ll
-; RUN: llvm-as t1.ll -o - | llvm-dis > t2.ll
-; RUN: diff t1.ll t2.ll
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
 
 @F1     = global float 0x4010000000000000
 @D1     = global double 0x4010000000000000
diff --git a/test/Feature/globalredefinition3.ll b/test/Feature/globalredefinition3.ll
index 0183e5a04f90..5a5b3f1f89ad 100644
--- a/test/Feature/globalredefinition3.ll
+++ b/test/Feature/globalredefinition3.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -o /dev/null -f |& grep {redefinition of global '@B'}
+; RUN: not llvm-as %s -o /dev/null |& grep {redefinition of global '@B'}
 
 @B = global i32 7
 @B = global i32 7
diff --git a/test/Feature/inlineasm.ll b/test/Feature/inlineasm.ll
index e4318f775b6b..6be5722abfca 100644
--- a/test/Feature/inlineasm.ll
+++ b/test/Feature/inlineasm.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llvm-dis > t1.ll
-; RUN: llvm-as t1.ll -o - | llvm-dis > t2.ll
-; RUN: diff t1.ll t2.ll
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
 
 module asm "this is an inline asm block"
 module asm "this is another inline asm block"
diff --git a/test/Feature/load_module.ll b/test/Feature/load_module.ll
index 356eceb285ea..e2e222f4edcf 100644
--- a/test/Feature/load_module.ll
+++ b/test/Feature/load_module.ll
@@ -1,6 +1,6 @@
 ; PR1318
-; RUN: llvm-as < %s | opt -load=%llvmlibsdir/LLVMHello%shlibext -hello \
-; RUN:   -disable-output - |& grep Hello
+; RUN: opt < %s -load=%llvmlibsdir/LLVMHello%shlibext -hello \
+; RUN:   -disable-output |& grep Hello
 
 @junk = global i32 0
 
diff --git a/test/Feature/md_on_instruction.ll b/test/Feature/md_on_instruction.ll
new file mode 100644
index 000000000000..d765cd8fa1e0
--- /dev/null
+++ b/test/Feature/md_on_instruction.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s -disable-output
+
+define i32 @foo() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  call void @llvm.dbg.func.start(metadata !0)
+  store i32 42, i32* %retval, !dbg !3
+  br label %0, !dbg !3
+
+; <label>:0                                       ; preds = %entry
+  call void @llvm.dbg.region.end(metadata !0)
+  %1 = load i32* %retval, !dbg !3                  ; <i32> [#uses=1]
+  ret i32 %1, !dbg !3
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 1, metadata !2, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!3 = metadata !{i32 1, i32 13, metadata !1, metadata !1}
diff --git a/test/Feature/md_on_instruction2.ll b/test/Feature/md_on_instruction2.ll
new file mode 100644
index 000000000000..da9e49ebfb2f
--- /dev/null
+++ b/test/Feature/md_on_instruction2.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | grep " !dbg " | count 4
+define i32 @foo() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  call void @llvm.dbg.func.start(metadata !0)
+  store i32 42, i32* %retval, !dbg !3
+  br label %0, !dbg !3
+
+; <label>:0                                       ; preds = %entry
+  call void @llvm.dbg.region.end(metadata !0)
+  %1 = load i32* %retval, !dbg !3                  ; <i32> [#uses=1]
+  ret i32 %1, !dbg !3
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 1, metadata !2, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!3 = metadata !{i32 1, i32 13, metadata !1, metadata !1}
diff --git a/test/Feature/memorymarkers.ll b/test/Feature/memorymarkers.ll
new file mode 100644
index 000000000000..06b8376678fa
--- /dev/null
+++ b/test/Feature/memorymarkers.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as -disable-output < %s
+
+%"struct.std::pair<int,int>" = type { i32, i32 }
+
+declare void @_Z3barRKi(i32*)
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+declare {}* @llvm.invariant.start(i64, i8* nocapture) readonly nounwind
+declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind
+
+define i32 @_Z4foo2v() nounwind {
+entry:
+  %x = alloca %"struct.std::pair<int,int>"
+  %y = bitcast %"struct.std::pair<int,int>"* %x to i8*
+
+  ;; Constructor starts here (this isn't needed since it is immediately
+  ;; preceded by an alloca, but shown for completeness).
+  call void @llvm.lifetime.start(i64 8, i8* %y)
+
+  %0 = getelementptr %"struct.std::pair<int,int>"* %x, i32 0, i32 0
+  store i32 4, i32* %0, align 8
+  %1 = getelementptr %"struct.std::pair<int,int>"* %x, i32 0, i32 1
+  store i32 5, i32* %1, align 4
+
+  ;; Constructor has finished here.
+  %inv = call {}* @llvm.invariant.start(i64 8, i8* %y)
+  call void @_Z3barRKi(i32* %0) nounwind
+  %2 = load i32* %0, align 8
+
+  ;; Destructor is run here.
+  call void @llvm.invariant.end({}* %inv, i64 8, i8* %y)
+  ;; Destructor is done here.
+  call void @llvm.lifetime.end(i64 8, i8* %y)
+  ret i32 %2
+}
diff --git a/test/Feature/weak_constant.ll b/test/Feature/weak_constant.ll
index d27adfefa57f..9025aaac797f 100644
--- a/test/Feature/weak_constant.ll
+++ b/test/Feature/weak_constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llvm-dis > %t
+; RUN: opt < %s -std-compile-opts -S > %t
 ; RUN:   grep undef %t | count 1
 ; RUN:   grep 5 %t | count 1
 ; RUN:   grep 7 %t | count 1
diff --git a/test/FrontendC++/2003-08-20-ExceptionFail.cpp b/test/FrontendC++/2003-08-20-ExceptionFail.cpp
index fd1c6ad4c2a5..f071c3c0e80c 100644
--- a/test/FrontendC++/2003-08-20-ExceptionFail.cpp
+++ b/test/FrontendC++/2003-08-20-ExceptionFail.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 void foo();
 
diff --git a/test/FrontendC++/2003-08-21-EmptyClass.cpp b/test/FrontendC++/2003-08-21-EmptyClass.cpp
index 2f90b3a10531..5dbfa33e03c3 100644
--- a/test/FrontendC++/2003-08-21-EmptyClass.cpp
+++ b/test/FrontendC++/2003-08-21-EmptyClass.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // This tests compilation of EMPTY_CLASS_EXPR's
 
diff --git a/test/FrontendC++/2003-08-27-TypeNamespaces.cpp b/test/FrontendC++/2003-08-27-TypeNamespaces.cpp
index cd7247e6085b..dec97180a420 100644
--- a/test/FrontendC++/2003-08-27-TypeNamespaces.cpp
+++ b/test/FrontendC++/2003-08-27-TypeNamespaces.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 namespace foo {
diff --git a/test/FrontendC++/2003-08-28-ForwardType.cpp b/test/FrontendC++/2003-08-28-ForwardType.cpp
index 38c4e2d84a07..9330e94aec57 100644
--- a/test/FrontendC++/2003-08-28-ForwardType.cpp
+++ b/test/FrontendC++/2003-08-28-ForwardType.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // Default placement versions of operator new.
 #include <new>
diff --git a/test/FrontendC++/2003-08-28-SaveExprBug.cpp b/test/FrontendC++/2003-08-28-SaveExprBug.cpp
index 2be35d8d522d..98c5f5d8d659 100644
--- a/test/FrontendC++/2003-08-28-SaveExprBug.cpp
+++ b/test/FrontendC++/2003-08-28-SaveExprBug.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 char* eback();
diff --git a/test/FrontendC++/2003-08-31-StructLayout.cpp b/test/FrontendC++/2003-08-31-StructLayout.cpp
index 99d668266220..a45ad030e3cc 100644
--- a/test/FrontendC++/2003-08-31-StructLayout.cpp
+++ b/test/FrontendC++/2003-08-31-StructLayout.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // There is a HOLE in the derived2 object due to not wanting to place the two
 // baseclass instances at the same offset!
diff --git a/test/FrontendC++/2003-09-22-CompositeExprValue.cpp b/test/FrontendC++/2003-09-22-CompositeExprValue.cpp
index a8208adc5127..3bd707ed8657 100644
--- a/test/FrontendC++/2003-09-22-CompositeExprValue.cpp
+++ b/test/FrontendC++/2003-09-22-CompositeExprValue.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct duration {
  duration operator/=(int c) {
diff --git a/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp b/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp
index 4873123d1241..72997c524b85 100644
--- a/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp
+++ b/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // Non-POD classes cannot be passed into a function by component, because their
 // dtors must be run.  Instead, pass them in by reference.  The C++ front-end
diff --git a/test/FrontendC++/2003-09-30-CommaExprBug.cpp b/test/FrontendC++/2003-09-30-CommaExprBug.cpp
index afe470cd11b0..365795dafde4 100644
--- a/test/FrontendC++/2003-09-30-CommaExprBug.cpp
+++ b/test/FrontendC++/2003-09-30-CommaExprBug.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 class Empty {};
 
diff --git a/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp b/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp
index 40c9c87ae1e0..63f62f28dbf1 100644
--- a/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp
+++ b/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct C {};
 
diff --git a/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp b/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp
index e07eb425d286..a1eee71f52c5 100644
--- a/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp
+++ b/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // Test with an opaque type
 
diff --git a/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp b/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp
index b1c54b89d599..94c11998963b 100644
--- a/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp
+++ b/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // The C++ front-end thinks the two foo's are different, the LLVM emitter
 // thinks they are the same.  The disconnect causes problems.
diff --git a/test/FrontendC++/2003-10-17-BoolBitfields.cpp b/test/FrontendC++/2003-10-17-BoolBitfields.cpp
index 547a367d34e6..103945df8aeb 100644
--- a/test/FrontendC++/2003-10-17-BoolBitfields.cpp
+++ b/test/FrontendC++/2003-10-17-BoolBitfields.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct test {
   bool A : 1;
diff --git a/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp b/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp
index f9fc80ee9407..abda017ab96b 100644
--- a/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp
+++ b/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 template<class T>
diff --git a/test/FrontendC++/2003-11-04-ArrayConstructors.cpp b/test/FrontendC++/2003-11-04-ArrayConstructors.cpp
index 4df4f9b6727b..4ab33988ebb8 100644
--- a/test/FrontendC++/2003-11-04-ArrayConstructors.cpp
+++ b/test/FrontendC++/2003-11-04-ArrayConstructors.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 struct Foo {
diff --git a/test/FrontendC++/2003-11-04-CatchLabelName.cpp b/test/FrontendC++/2003-11-04-CatchLabelName.cpp
index 8acf88ddbaf3..7dbe788f4e17 100644
--- a/test/FrontendC++/2003-11-04-CatchLabelName.cpp
+++ b/test/FrontendC++/2003-11-04-CatchLabelName.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 #include <string>
 
diff --git a/test/FrontendC++/2003-11-18-EnumArray.cpp b/test/FrontendC++/2003-11-18-EnumArray.cpp
index 6eaf9d66f2dc..bb1b3bf301e5 100644
--- a/test/FrontendC++/2003-11-18-EnumArray.cpp
+++ b/test/FrontendC++/2003-11-18-EnumArray.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 enum TchkType {
   tchkNum, tchkString, tchkSCN, tchkNone
diff --git a/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp b/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp
index ae76a6c6379d..72609e7ccb46 100644
--- a/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp
+++ b/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct Gfx {
   void opMoveSetShowText();
diff --git a/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp b/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp
index 83fe1b3e810c..5ea0a2c4aa8f 100644
--- a/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp
+++ b/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 #include <vector>
 std::vector<int> my_method ();
diff --git a/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp b/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp
index 16026c34b51c..99cfc8d21dff 100644
--- a/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp
+++ b/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 struct CallSite {
diff --git a/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp b/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp
index 8131baafae7e..8df95cb1ee60 100644
--- a/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp
+++ b/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 void doesntThrow() throw();
diff --git a/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp b/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp
index d5122340225c..b87e7869ed72 100644
--- a/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp
+++ b/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct Evil {
  void fun ();
diff --git a/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp b/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp
index 755d7c7621f3..35880ab36302 100644
--- a/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp
+++ b/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct A {
   virtual void Method() = 0;
diff --git a/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp b/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp
index 9bc70c84ab42..c2e52f66dce3 100644
--- a/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp
+++ b/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 // Testcase from Bug 291
 
diff --git a/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp b/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp
index 16d8e5edf51e..4ad4c7d061ad 100644
--- a/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp
+++ b/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 
diff --git a/test/FrontendC++/2004-09-27-CompilerCrash.cpp b/test/FrontendC++/2004-09-27-CompilerCrash.cpp
index f507c2391508..f52baaf7058c 100644
--- a/test/FrontendC++/2004-09-27-CompilerCrash.cpp
+++ b/test/FrontendC++/2004-09-27-CompilerCrash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 struct Pass {} ;
 template<typename PassName>
diff --git a/test/FrontendC++/2006-11-06-StackTrace.cpp b/test/FrontendC++/2006-11-06-StackTrace.cpp
index f6a4428e4c8d..bbb9af199add 100644
--- a/test/FrontendC++/2006-11-06-StackTrace.cpp
+++ b/test/FrontendC++/2006-11-06-StackTrace.cpp
@@ -1,7 +1,7 @@
 // This is a regression test on debug info to make sure that we can get a
 // meaningful stack trace from a C++ program.
 // RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
-// RUN:    llc --disable-fp-elim -o %t.s -f -O0 -relocation-model=pic
+// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
 // RUN: echo {break DeepStack::deepest\nrun 17\nwhere\n} > %t.in 
@@ -10,8 +10,9 @@
 // RUN: gdb -q -batch -n -x %t.in %t.exe | \
 // RUN:   grep {#7  0x.* in main.*(argc=\[12\],.*argv=.*)}
 
-// Only works on ppc, x86 and x86_64.  Should generalize?
-// XFAIL: alpha|ia64|arm
+// Only works on ppc (but not apple-darwin9), x86 and x86_64.  Should
+// generalize?
+// XFAIL: alpha|arm|powerpc-apple-darwin9
 
 #include <stdlib.h>
 
diff --git a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
index 70fc642e75f1..3522c670805e 100644
--- a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
+++ b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
@@ -1,13 +1,13 @@
 // This is a regression test on debug info to make sure we don't hit a compile 
 // unit size issue with gdb.
 // RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
-// RUN:   llc --disable-fp-elim -o NoCompileUnit.s -f
+// RUN:   llc --disable-fp-elim -o NoCompileUnit.s
 // RUN: %compile_c NoCompileUnit.s -o NoCompileUnit.o
 // RUN: %link NoCompileUnit.o -o NoCompileUnit.exe
 // RUN: echo {break main\nrun\np NoCompileUnit::pubname} > %t2
 // RUN: gdb -q -batch -n -x %t2 NoCompileUnit.exe | \
 // RUN:   tee NoCompileUnit.out | not grep {"low == high"}
-// XFAIL: alpha|ia64|arm
+// XFAIL: alpha|arm
 // XFAIL: *
 // See PR2454
 
diff --git a/test/FrontendC++/2006-11-30-Pubnames.cpp b/test/FrontendC++/2006-11-30-Pubnames.cpp
index 8102713c39fb..b44566af6a68 100644
--- a/test/FrontendC++/2006-11-30-Pubnames.cpp
+++ b/test/FrontendC++/2006-11-30-Pubnames.cpp
@@ -1,13 +1,13 @@
 // This is a regression test on debug info to make sure that we can access 
 // qualified global names.
 // RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
-// RUN:   llc --disable-fp-elim -o %t.s -f -O0
+// RUN:   llc --disable-fp-elim -o %t.s -O0
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
 // RUN: %llvmdsymutil %t.exe 
 // RUN: echo {break main\nrun\np Pubnames::pubname} > %t.in
 // RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | grep {\$1 = 10}
-// XFAIL: alpha|ia64|arm
+// XFAIL: alpha|arm
 struct Pubnames {
   static int pubname;
 };
diff --git a/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp b/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp
index 4797baf2d8f5..174dddf6ab65 100644
--- a/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp
+++ b/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 #ifdef PACKED
 #define P __attribute__((packed))
diff --git a/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp b/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp
index 3ba5d7bd849e..55da1a6ab406 100644
--- a/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp
+++ b/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 #ifdef PACKED
 #define P __attribute__((packed))
diff --git a/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp b/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp
index ad272c947d66..46a89491ee21 100644
--- a/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp
+++ b/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 #ifdef PACKED
diff --git a/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp b/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp
index e7517dd74b90..7377b8292d56 100644
--- a/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp
+++ b/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 
 #ifdef PACKED
diff --git a/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp b/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp
index 52e247133769..b550b5fdb013 100644
--- a/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp
+++ b/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
 
 #ifdef PACKED
 #define P __attribute__((packed))
diff --git a/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp b/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
index dfc607e654fd..997c3f703508 100644
--- a/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
+++ b/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c -g %s -o - | llc -O0 -f -o %t.s
+// RUN: %llvmgcc -c -g %s -o - | llc -O0 -o %t.s
 // RUN: %compile_c %t.s -o %t.o
 // PR4025
 
diff --git a/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp b/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
index eef0e86e2ae5..dcb2f16a5dc8 100644
--- a/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
+++ b/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
@@ -3,8 +3,7 @@
 // RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep quux | grep global | grep {struct.bar}
 // RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep foo | grep global | grep {struct.SRCFilter::FilterEntry}
 // RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.bar} | grep {1 x i32}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.CC} | grep {struct.bar}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.bar} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.CC} | grep {struct.payre<KBFP,float*} | grep {.base.32} | grep {1 x i32}
 // RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.SRCFilter::FilterEntry} | not grep {1 x i32}
 // XFAIL: *
 // XTARGET: powerpc-apple-darwin
@@ -30,3 +29,4 @@ class CC ccc;
 
 struct bar { KBFP x; float* y;};          // 16 bytes
 struct bar quux;
+
diff --git a/test/FrontendC++/2009-06-30-ByrefBlock.cpp b/test/FrontendC++/2009-06-30-ByrefBlock.cpp
index 6f8869c5413a..be9c94fd176a 100644
--- a/test/FrontendC++/2009-06-30-ByrefBlock.cpp
+++ b/test/FrontendC++/2009-06-30-ByrefBlock.cpp
@@ -1,6 +1,9 @@
-// Insure __block_holder_tmp is allocated on the stack.
+// Insure __block_holder_tmp is allocated on the stack.  Darwin only.
 // RUN: %llvmgxx %s -S -O2 -o - | egrep {__block_holder_tmp.*alloca}
+// XFAIL: *
+// XTARGET: darwin
 // <rdar://problem/5865221>
+// END.
 extern void fubar_dispatch_sync(void (^PP)(void));
 void fubar() {
   __block void *voodoo;
diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp
new file mode 100644
index 000000000000..54624a36666e
--- /dev/null
+++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp
@@ -0,0 +1,28 @@
+// This is a regression test on debug info to make sure that we can
+// print line numbers in asm.
+// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
+// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {# SrcLine 25}
+// XFAIL: *
+
+#include <stdlib.h>
+
+class DeepStack {
+  int seedVal;
+public:
+  DeepStack(int seed) : seedVal(seed) {}
+
+  int shallowest( int x ) { return shallower(x + 1); }
+  int shallower ( int x ) { return shallow(x + 2); }
+  int shallow   ( int x ) { return deep(x + 3); }
+  int deep      ( int x ) { return deeper(x + 4); }
+  int deeper    ( int x ) { return deepest(x + 6); }
+  int deepest   ( int x ) { return x + 7; }
+
+  int runit() { return shallowest(seedVal); }
+};
+
+int main ( int argc, char** argv) {
+
+  DeepStack DS9( (argc > 1 ? atoi(argv[1]) : 0) );
+  return DS9.runit();
+}
diff --git a/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp b/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp
new file mode 100644
index 000000000000..96e85b24767f
--- /dev/null
+++ b/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp
@@ -0,0 +1,15 @@
+// RUN: %llvmgxx %s -S
+// XFAIL: darwin
+
+#include <set>
+
+class A {
+public:
+  A();
+private:
+  A(const A&);
+};
+void B()
+{
+  std::set<void *, A> foo;
+}
diff --git a/test/FrontendC++/2009-07-16-Using.cpp b/test/FrontendC++/2009-07-16-Using.cpp
new file mode 100644
index 000000000000..1acadf642122
--- /dev/null
+++ b/test/FrontendC++/2009-07-16-Using.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgxx %s -S
+
+namespace A {
+  typedef int B;
+}
+struct B {
+};
+using ::A::B;
diff --git a/test/FrontendC++/2009-08-03-Varargs.cpp b/test/FrontendC++/2009-08-03-Varargs.cpp
new file mode 100644
index 000000000000..cea3894c84e6
--- /dev/null
+++ b/test/FrontendC++/2009-08-03-Varargs.cpp
@@ -0,0 +1,5 @@
+// RUN: %llvmgxx %s -S -emit-llvm -o - | grep _Z1az\(\.\.\.\)
+// XFAIL: *
+// PR4678
+void a(...) {
+}
diff --git a/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp b/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
new file mode 100644
index 000000000000..bc862e70bde1
--- /dev/null
+++ b/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
@@ -0,0 +1,12 @@
+// RUN: %llvmgxx -c -emit-llvm %s -o -
+// rdar://7114564
+struct A {
+  unsigned long long : (sizeof(unsigned long long) * 8) - 16;
+};
+struct B {
+  A a;
+};
+struct B b = {
+  {}
+};
+
diff --git a/test/FrontendC++/2009-08-11-VectorRetTy.cpp b/test/FrontendC++/2009-08-11-VectorRetTy.cpp
new file mode 100644
index 000000000000..b2c3ba185b5c
--- /dev/null
+++ b/test/FrontendC++/2009-08-11-VectorRetTy.cpp
@@ -0,0 +1,13 @@
+// RUN: %llvmgxx %s -c -o /dev/null
+// <rdar://problem/7096460>
+typedef void (*Func) ();
+typedef long long m64 __attribute__((__vector_size__(8), __may_alias__));
+static inline m64 __attribute__((__always_inline__, __nodebug__)) _mm_set1_pi16() {}
+template <class MM>
+static void Bork() {
+  const m64 mmx_0x00ff = _mm_set1_pi16();
+}
+struct A {};
+Func arr[] = {
+  Bork<A>
+};
diff --git a/test/FrontendC++/2009-09-04-modify-crash.cpp b/test/FrontendC++/2009-09-04-modify-crash.cpp
new file mode 100644
index 000000000000..ac16f8c36d18
--- /dev/null
+++ b/test/FrontendC++/2009-09-04-modify-crash.cpp
@@ -0,0 +1,7 @@
+// RUN: %llvmgxx %s -emit-llvm -fapple-kext -S -o -
+// The extra check in 71555 caused this to crash on Darwin X86
+// in an assert build.
+class foo {
+ virtual ~foo ();
+};
+foo::~foo(){}
diff --git a/test/FrontendC++/2009-09-09-packed-layout.cpp b/test/FrontendC++/2009-09-09-packed-layout.cpp
new file mode 100644
index 000000000000..a569f9f78767
--- /dev/null
+++ b/test/FrontendC++/2009-09-09-packed-layout.cpp
@@ -0,0 +1,18 @@
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o /dev/null
+class X { 
+ public:
+  virtual ~X();
+  short y;
+};
+#pragma pack(push, 1)
+class Z : public X {
+ public: enum { foo = ('x') };
+ virtual int y() const;
+};
+#pragma pack(pop)
+class Y : public X {
+public: enum { foo = ('y'), bar = 0 };
+};
+X x;
+Y y;
+Z z;
diff --git a/test/FrontendC++/member-alignment.cpp b/test/FrontendC++/member-alignment.cpp
new file mode 100644
index 000000000000..01c90cae8a6c
--- /dev/null
+++ b/test/FrontendC++/member-alignment.cpp
@@ -0,0 +1,20 @@
+// RUN: %llvmgxx -S -emit-llvm %s -o - | FileCheck %s
+// XFAIL: arm
+
+// rdar://7268289
+
+class t {
+public:
+  virtual void foo(void);
+  void bar(void);
+};
+
+void
+t::bar(void) {
+// CHECK: _ZN1t3barEv{{.*}} align 2
+}
+
+void
+t::foo(void) {
+// CHECK: _ZN1t3fooEv{{.*}} align 2
+}
diff --git a/test/FrontendC++/msasm.cpp b/test/FrontendC++/msasm.cpp
new file mode 100644
index 000000000000..d8d1f378618d
--- /dev/null
+++ b/test/FrontendC++/msasm.cpp
@@ -0,0 +1,23 @@
+// RUN: %llvmgxx %s -fasm-blocks -S -o - | FileCheck %s
+// Complicated expression as jump target
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+void Method3()
+{
+// CHECK: Method3
+// CHECK-NOT: msasm
+    asm("foo:");
+// CHECK: return
+}
+
+void Method4()
+{
+// CHECK: Method4
+// CHECK: msasm
+  asm {
+    bar:
+  }
+// CHECK: return
+}
+
diff --git a/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c b/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c
index 258d3cc0e3dc..1779a99942ea 100644
--- a/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c
+++ b/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* Regression test.  Just compile .c -> .ll to test */
 int foo(void) {
diff --git a/test/FrontendC/2002-01-24-ComplexSpaceInType.c b/test/FrontendC/2002-01-24-ComplexSpaceInType.c
index 9559d5b37b79..13d92c7306ee 100644
--- a/test/FrontendC/2002-01-24-ComplexSpaceInType.c
+++ b/test/FrontendC/2002-01-24-ComplexSpaceInType.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 // This caused generation of the following type name:
 //   %Array = uninitialized global [10 x %complex int]
diff --git a/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c b/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c
index 09029fb945ef..e619cf469926 100644
--- a/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c
+++ b/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 void *dlclose(void*);
 
diff --git a/test/FrontendC/2002-02-13-ConditionalInCall.c b/test/FrontendC/2002-02-13-ConditionalInCall.c
index 0dad6ff5c2f4..f361088c1cf8 100644
--- a/test/FrontendC/2002-02-13-ConditionalInCall.c
+++ b/test/FrontendC/2002-02-13-ConditionalInCall.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* Test problem where bad code was generated with a ?: statement was 
    in a function call argument */
diff --git a/test/FrontendC/2002-02-13-ReloadProblem.c b/test/FrontendC/2002-02-13-ReloadProblem.c
index ab9b56da3e39..2ae97b72276b 100644
--- a/test/FrontendC/2002-02-13-ReloadProblem.c
+++ b/test/FrontendC/2002-02-13-ReloadProblem.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* This triggered a problem in reload, fixed by disabling most of the 
  * steps of compilation in GCC.  Before this change, the code went through
diff --git a/test/FrontendC/2002-02-13-TypeVarNameCollision.c b/test/FrontendC/2002-02-13-TypeVarNameCollision.c
index ec334013ae01..2dede68a38dd 100644
--- a/test/FrontendC/2002-02-13-TypeVarNameCollision.c
+++ b/test/FrontendC/2002-02-13-TypeVarNameCollision.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* This testcase causes a symbol table collision.  Type names and variable
  * names should be in distinct namespaces
diff --git a/test/FrontendC/2002-02-13-UnnamedLocal.c b/test/FrontendC/2002-02-13-UnnamedLocal.c
index 6fdc7efdd3a3..85aa615205cf 100644
--- a/test/FrontendC/2002-02-13-UnnamedLocal.c
+++ b/test/FrontendC/2002-02-13-UnnamedLocal.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* Testcase for a problem where GCC allocated xqic to a register,
  * and did not have a VAR_DECL that explained the stack slot to LLVM.
diff --git a/test/FrontendC/2002-02-14-EntryNodePreds.c b/test/FrontendC/2002-02-14-EntryNodePreds.c
index f1e01515fd77..851af912174b 100644
--- a/test/FrontendC/2002-02-14-EntryNodePreds.c
+++ b/test/FrontendC/2002-02-14-EntryNodePreds.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC Used to generate code that contained a branch to the entry node of 
  * the do_merge function.  This is illegal LLVM code.  To fix this, GCC now
diff --git a/test/FrontendC/2002-02-16-RenamingTest.c b/test/FrontendC/2002-02-16-RenamingTest.c
index 952af90c87ef..6042b67dc0cf 100644
--- a/test/FrontendC/2002-02-16-RenamingTest.c
+++ b/test/FrontendC/2002-02-16-RenamingTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* test that locals are renamed with . notation */
 
diff --git a/test/FrontendC/2002-02-17-ArgumentAddress.c b/test/FrontendC/2002-02-17-ArgumentAddress.c
index 937929559fe7..acd7e37a7563 100644
--- a/test/FrontendC/2002-02-17-ArgumentAddress.c
+++ b/test/FrontendC/2002-02-17-ArgumentAddress.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int test(int X) {
   return X;
diff --git a/test/FrontendC/2002-02-18-64bitConstant.c b/test/FrontendC/2002-02-18-64bitConstant.c
index 31e5c6eada4b..a88587a960de 100644
--- a/test/FrontendC/2002-02-18-64bitConstant.c
+++ b/test/FrontendC/2002-02-18-64bitConstant.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC wasn't handling 64 bit constants right fixed */
 
diff --git a/test/FrontendC/2002-02-18-StaticData.c b/test/FrontendC/2002-02-18-StaticData.c
index 10439c373097..76cb0e670a7a 100644
--- a/test/FrontendC/2002-02-18-StaticData.c
+++ b/test/FrontendC/2002-02-18-StaticData.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 double FOO = 17;
diff --git a/test/FrontendC/2002-03-11-LargeCharInString.c b/test/FrontendC/2002-03-11-LargeCharInString.c
index d8a1671fc773..b383d03f7997 100644
--- a/test/FrontendC/2002-03-11-LargeCharInString.c
+++ b/test/FrontendC/2002-03-11-LargeCharInString.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 #include <string.h>
 
diff --git a/test/FrontendC/2002-03-12-ArrayInitialization.c b/test/FrontendC/2002-03-12-ArrayInitialization.c
index d6cf44625bd1..1997a3cd0d9e 100644
--- a/test/FrontendC/2002-03-12-ArrayInitialization.c
+++ b/test/FrontendC/2002-03-12-ArrayInitialization.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC would generate bad code if not enough initializers are 
    specified for an array.
diff --git a/test/FrontendC/2002-03-12-StructInitialize.c b/test/FrontendC/2002-03-12-StructInitialize.c
index 5174ad4906d2..9eb11e187a11 100644
--- a/test/FrontendC/2002-03-12-StructInitialize.c
+++ b/test/FrontendC/2002-03-12-StructInitialize.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 typedef struct Connection_Type {
diff --git a/test/FrontendC/2002-03-12-StructInitializer.c b/test/FrontendC/2002-03-12-StructInitializer.c
index cf2ba4e8cf89..fa333b78a95f 100644
--- a/test/FrontendC/2002-03-12-StructInitializer.c
+++ b/test/FrontendC/2002-03-12-StructInitializer.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC was not emitting string constants of the correct length when
  * embedded into a structure field like this.  It thought the strlength
diff --git a/test/FrontendC/2002-03-14-BrokenPHINode.c b/test/FrontendC/2002-03-14-BrokenPHINode.c
index 16d9bc7814f3..48d9ab705a72 100644
--- a/test/FrontendC/2002-03-14-BrokenPHINode.c
+++ b/test/FrontendC/2002-03-14-BrokenPHINode.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC was generating PHI nodes with an arity < #pred of the basic block the
  * PHI node lived in.  This was breaking LLVM because the number of entries
diff --git a/test/FrontendC/2002-03-14-BrokenSSA.c b/test/FrontendC/2002-03-14-BrokenSSA.c
index 01f259738e3f..9dc674aea27f 100644
--- a/test/FrontendC/2002-03-14-BrokenSSA.c
+++ b/test/FrontendC/2002-03-14-BrokenSSA.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* This code used to break GCC's SSA computation code.  It would create
    uses of B & C that are not dominated by their definitions.  See:
diff --git a/test/FrontendC/2002-03-14-QuotesInStrConst.c b/test/FrontendC/2002-03-14-QuotesInStrConst.c
index 42f82bfa8950..63eaeef46a41 100644
--- a/test/FrontendC/2002-03-14-QuotesInStrConst.c
+++ b/test/FrontendC/2002-03-14-QuotesInStrConst.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC was not escaping quotes in string constants correctly, so this would
  * get emitted:
diff --git a/test/FrontendC/2002-04-07-SwitchStmt.c b/test/FrontendC/2002-04-07-SwitchStmt.c
index 79632c9eb8ad..33e9c3d7a78a 100644
--- a/test/FrontendC/2002-04-07-SwitchStmt.c
+++ b/test/FrontendC/2002-04-07-SwitchStmt.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int printf(const char *, ...);
 int foo();
diff --git a/test/FrontendC/2002-04-08-LocalArray.c b/test/FrontendC/2002-04-08-LocalArray.c
index 75475a1b3bd1..1dc51a092844 100644
--- a/test/FrontendC/2002-04-08-LocalArray.c
+++ b/test/FrontendC/2002-04-08-LocalArray.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* GCC is not outputting the static array to the LLVM backend, so bad things
  * happen.  Note that if this is defined static, everything seems fine.
diff --git a/test/FrontendC/2002-04-09-StructRetVal.c b/test/FrontendC/2002-04-09-StructRetVal.c
index c655e4a4d05d..de3b6fc26e0a 100644
--- a/test/FrontendC/2002-04-09-StructRetVal.c
+++ b/test/FrontendC/2002-04-09-StructRetVal.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct S {
   int i;
diff --git a/test/FrontendC/2002-04-10-StructParameters.c b/test/FrontendC/2002-04-10-StructParameters.c
index 9db6a132bc1e..aaaba2abdde7 100644
--- a/test/FrontendC/2002-04-10-StructParameters.c
+++ b/test/FrontendC/2002-04-10-StructParameters.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 typedef struct {
diff --git a/test/FrontendC/2002-05-23-StaticValues.c b/test/FrontendC/2002-05-23-StaticValues.c
index bf583e203d35..a5753b95f16e 100644
--- a/test/FrontendC/2002-05-23-StaticValues.c
+++ b/test/FrontendC/2002-05-23-StaticValues.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* Make sure the frontend is correctly marking static stuff as internal! */
 
diff --git a/test/FrontendC/2002-05-23-TypeNameCollision.c b/test/FrontendC/2002-05-23-TypeNameCollision.c
index 43faf97b1e63..25d114965d48 100644
--- a/test/FrontendC/2002-05-23-TypeNameCollision.c
+++ b/test/FrontendC/2002-05-23-TypeNameCollision.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* Testcase for when struct tag conflicts with typedef name... grr */
 
diff --git a/test/FrontendC/2002-05-24-Alloca.c b/test/FrontendC/2002-05-24-Alloca.c
index ac5b78d0c997..128bc8b7cfc0 100644
--- a/test/FrontendC/2002-05-24-Alloca.c
+++ b/test/FrontendC/2002-05-24-Alloca.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 #include <string.h>
 #include <stdio.h>
diff --git a/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c b/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c
index fb1b54bf72e2..4380dc7b2279 100644
--- a/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c
+++ b/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 #include <stdio.h>
 
diff --git a/test/FrontendC/2002-07-14-MiscListTests.c b/test/FrontendC/2002-07-14-MiscListTests.c
index baae585f6c33..4a5459ad7131 100644
--- a/test/FrontendC/2002-07-14-MiscListTests.c
+++ b/test/FrontendC/2002-07-14-MiscListTests.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 // Test list stuff
 
diff --git a/test/FrontendC/2002-07-14-MiscTests.c b/test/FrontendC/2002-07-14-MiscTests.c
index 02703e269163..57c412083a6e 100644
--- a/test/FrontendC/2002-07-14-MiscTests.c
+++ b/test/FrontendC/2002-07-14-MiscTests.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -w -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
 
 /* These are random tests that I used when working on the GCC frontend 
    originally. */
diff --git a/test/FrontendC/2002-07-14-MiscTests2.c b/test/FrontendC/2002-07-14-MiscTests2.c
index ac58926abf29..f2c7c81c4daa 100644
--- a/test/FrontendC/2002-07-14-MiscTests2.c
+++ b/test/FrontendC/2002-07-14-MiscTests2.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 // Test ?: in function calls
diff --git a/test/FrontendC/2002-07-14-MiscTests3.c b/test/FrontendC/2002-07-14-MiscTests3.c
index 773193cefd49..7ef7e232d99e 100644
--- a/test/FrontendC/2002-07-14-MiscTests3.c
+++ b/test/FrontendC/2002-07-14-MiscTests3.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -w -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
 
 
 
diff --git a/test/FrontendC/2002-07-16-HardStringInit.c b/test/FrontendC/2002-07-16-HardStringInit.c
index d1d03216ce1f..2785e5189d9b 100644
--- a/test/FrontendC/2002-07-16-HardStringInit.c
+++ b/test/FrontendC/2002-07-16-HardStringInit.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
   char      auto_kibitz_list[100][20] = {
                                       {"diepx"},
diff --git a/test/FrontendC/2002-07-17-StringConstant.c b/test/FrontendC/2002-07-17-StringConstant.c
index 8a392033f392..9ba0c25213da 100644
--- a/test/FrontendC/2002-07-17-StringConstant.c
+++ b/test/FrontendC/2002-07-17-StringConstant.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 char * foo() { return "\\begin{"; }
diff --git a/test/FrontendC/2002-07-29-Casts.c b/test/FrontendC/2002-07-29-Casts.c
index 6794e80b5e3e..44bb61019554 100644
--- a/test/FrontendC/2002-07-29-Casts.c
+++ b/test/FrontendC/2002-07-29-Casts.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/test/FrontendC/2002-07-30-SubregSetAssertion.c b/test/FrontendC/2002-07-30-SubregSetAssertion.c
index 6d4f9f620581..af72eda65242 100644
--- a/test/FrontendC/2002-07-30-SubregSetAssertion.c
+++ b/test/FrontendC/2002-07-30-SubregSetAssertion.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 union X {
diff --git a/test/FrontendC/2002-07-30-UnionTest.c b/test/FrontendC/2002-07-30-UnionTest.c
index b2c481e4079c..c931b8024f0c 100644
--- a/test/FrontendC/2002-07-30-UnionTest.c
+++ b/test/FrontendC/2002-07-30-UnionTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 union X;
 struct Empty {};
diff --git a/test/FrontendC/2002-07-30-VarArgsCallFailure.c b/test/FrontendC/2002-07-30-VarArgsCallFailure.c
index b37a462220b2..5d93947a7273 100644
--- a/test/FrontendC/2002-07-30-VarArgsCallFailure.c
+++ b/test/FrontendC/2002-07-30-VarArgsCallFailure.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int tcount;
 void test(char *, const char*, int);
diff --git a/test/FrontendC/2002-07-31-BadAssert.c b/test/FrontendC/2002-07-31-BadAssert.c
index 5801d03a0993..5c3d74cfb6be 100644
--- a/test/FrontendC/2002-07-31-BadAssert.c
+++ b/test/FrontendC/2002-07-31-BadAssert.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct
 {
diff --git a/test/FrontendC/2002-07-31-SubregFailure.c b/test/FrontendC/2002-07-31-SubregFailure.c
index 15573f9128ad..72fcb496cb00 100644
--- a/test/FrontendC/2002-07-31-SubregFailure.c
+++ b/test/FrontendC/2002-07-31-SubregFailure.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 typedef union {
diff --git a/test/FrontendC/2002-08-02-UnionTest.c b/test/FrontendC/2002-08-02-UnionTest.c
index e0862ed10cf6..e2b8c3dd401c 100644
--- a/test/FrontendC/2002-08-02-UnionTest.c
+++ b/test/FrontendC/2002-08-02-UnionTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* In this testcase, the return value of foo() is being promotedto a register
  * which breaks stuff
diff --git a/test/FrontendC/2002-08-19-RecursiveLocals.c b/test/FrontendC/2002-08-19-RecursiveLocals.c
index e5007af7254e..59220ac9b0d8 100644
--- a/test/FrontendC/2002-08-19-RecursiveLocals.c
+++ b/test/FrontendC/2002-08-19-RecursiveLocals.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* This testcase doesn't actually test a bug, it's just the result of me 
  * figuring out the syntax for forward declaring a static variable. */
diff --git a/test/FrontendC/2002-09-08-PointerShifts.c b/test/FrontendC/2002-09-08-PointerShifts.c
index cc7e91a7f9a8..86ff2f98afc4 100644
--- a/test/FrontendC/2002-09-08-PointerShifts.c
+++ b/test/FrontendC/2002-09-08-PointerShifts.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 int foo(int *A, unsigned X) {
diff --git a/test/FrontendC/2002-09-18-UnionProblem.c b/test/FrontendC/2002-09-18-UnionProblem.c
index 56ec6cec8ed0..54588f12142a 100644
--- a/test/FrontendC/2002-09-18-UnionProblem.c
+++ b/test/FrontendC/2002-09-18-UnionProblem.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 struct DWstruct {
diff --git a/test/FrontendC/2002-09-19-StarInLabel.c b/test/FrontendC/2002-09-19-StarInLabel.c
index 86a2571d57bc..171acca2f118 100644
--- a/test/FrontendC/2002-09-19-StarInLabel.c
+++ b/test/FrontendC/2002-09-19-StarInLabel.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 extern void start() __asm__("start");
 extern void _start() __asm__("_start");
diff --git a/test/FrontendC/2002-10-12-TooManyArguments.c b/test/FrontendC/2002-10-12-TooManyArguments.c
index 206cdd98da0b..73c267ad30dd 100644
--- a/test/FrontendC/2002-10-12-TooManyArguments.c
+++ b/test/FrontendC/2002-10-12-TooManyArguments.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 void foo() {}
diff --git a/test/FrontendC/2002-12-15-GlobalBoolTest.c b/test/FrontendC/2002-12-15-GlobalBoolTest.c
index 6b27391d9b5c..c27a23abc6ec 100644
--- a/test/FrontendC/2002-12-15-GlobalBoolTest.c
+++ b/test/FrontendC/2002-12-15-GlobalBoolTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 _Bool X = 0;
diff --git a/test/FrontendC/2002-12-15-GlobalConstantTest.c b/test/FrontendC/2002-12-15-GlobalConstantTest.c
index a5a679db6758..26de48fbb77f 100644
--- a/test/FrontendC/2002-12-15-GlobalConstantTest.c
+++ b/test/FrontendC/2002-12-15-GlobalConstantTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 const char *W = "foo";
diff --git a/test/FrontendC/2002-12-15-GlobalRedefinition.c b/test/FrontendC/2002-12-15-GlobalRedefinition.c
index 39632a150666..3b76953b0940 100644
--- a/test/FrontendC/2002-12-15-GlobalRedefinition.c
+++ b/test/FrontendC/2002-12-15-GlobalRedefinition.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 extern char algbrfile[9];
 char algbrfile[9] = "abcdefgh";
diff --git a/test/FrontendC/2002-12-15-StructParameters.c b/test/FrontendC/2002-12-15-StructParameters.c
index c85dab1cd1e4..90ab1ff44044 100644
--- a/test/FrontendC/2002-12-15-StructParameters.c
+++ b/test/FrontendC/2002-12-15-StructParameters.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct
 {
diff --git a/test/FrontendC/2003-03-03-DeferredType.c b/test/FrontendC/2003-03-03-DeferredType.c
index fa51991b3057..9e60df6f6a0a 100644
--- a/test/FrontendC/2003-03-03-DeferredType.c
+++ b/test/FrontendC/2003-03-03-DeferredType.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 
diff --git a/test/FrontendC/2003-06-22-UnionCrash.c b/test/FrontendC/2003-06-22-UnionCrash.c
index dab0716ae630..54d8dc6dda9a 100644
--- a/test/FrontendC/2003-06-22-UnionCrash.c
+++ b/test/FrontendC/2003-06-22-UnionCrash.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct Blend_Map_Entry {
   union {
diff --git a/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c b/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c
index ba66276683b3..80562c8849b0 100644
--- a/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c
+++ b/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 double Test(double A, double B, double C, double D) {
   return -(A-B) - (C-D);
diff --git a/test/FrontendC/2003-06-26-CFECrash.c b/test/FrontendC/2003-06-26-CFECrash.c
index bb6977f4838e..10a7ed44458f 100644
--- a/test/FrontendC/2003-06-26-CFECrash.c
+++ b/test/FrontendC/2003-06-26-CFECrash.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct min_info {
   long offset;
diff --git a/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c b/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c
index b7bc80330673..be042cedf9fd 100644
--- a/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c
+++ b/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 /* This is apparently legal C.  
  */
diff --git a/test/FrontendC/2003-08-18-SigSetJmp.c b/test/FrontendC/2003-08-18-SigSetJmp.c
index b7f4553f41d6..fc0d7659de6d 100644
--- a/test/FrontendC/2003-08-18-SigSetJmp.c
+++ b/test/FrontendC/2003-08-18-SigSetJmp.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 #include <setjmp.h>
diff --git a/test/FrontendC/2003-08-18-StructAsValue.c b/test/FrontendC/2003-08-18-StructAsValue.c
index 649eadc4e451..26cb78a4d243 100644
--- a/test/FrontendC/2003-08-18-StructAsValue.c
+++ b/test/FrontendC/2003-08-18-StructAsValue.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 typedef struct {
diff --git a/test/FrontendC/2003-08-20-BadBitfieldRef.c b/test/FrontendC/2003-08-20-BadBitfieldRef.c
index 58cf1bc334c6..ef54d8ad9c0d 100644
--- a/test/FrontendC/2003-08-20-BadBitfieldRef.c
+++ b/test/FrontendC/2003-08-20-BadBitfieldRef.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 void foo()
 {
diff --git a/test/FrontendC/2003-08-20-PrototypeMismatch.c b/test/FrontendC/2003-08-20-PrototypeMismatch.c
index 8358a2f6fd05..85c89f694c57 100644
--- a/test/FrontendC/2003-08-20-PrototypeMismatch.c
+++ b/test/FrontendC/2003-08-20-PrototypeMismatch.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 
diff --git a/test/FrontendC/2003-08-20-vfork-bug.c b/test/FrontendC/2003-08-20-vfork-bug.c
index 575bfd6c194b..cfe316162ad3 100644
--- a/test/FrontendC/2003-08-20-vfork-bug.c
+++ b/test/FrontendC/2003-08-20-vfork-bug.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 extern int vfork(void);
 test() {
diff --git a/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c b/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c
index 8829652cb154..a1d4574dcdb1 100644
--- a/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c
+++ b/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct bar;
 
diff --git a/test/FrontendC/2003-08-21-StmtExpr.c b/test/FrontendC/2003-08-21-StmtExpr.c
index 878ed473487e..7f7d22ea9d7b 100644
--- a/test/FrontendC/2003-08-21-StmtExpr.c
+++ b/test/FrontendC/2003-08-21-StmtExpr.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 typedef struct {
diff --git a/test/FrontendC/2003-08-21-WideString.c b/test/FrontendC/2003-08-21-WideString.c
index 69847610b136..bf67a21896b0 100644
--- a/test/FrontendC/2003-08-21-WideString.c
+++ b/test/FrontendC/2003-08-21-WideString.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 #include <wchar.h>
 
diff --git a/test/FrontendC/2003-08-23-LocalUnionTest.c b/test/FrontendC/2003-08-23-LocalUnionTest.c
index dc27802623ae..987accca1cc1 100644
--- a/test/FrontendC/2003-08-23-LocalUnionTest.c
+++ b/test/FrontendC/2003-08-23-LocalUnionTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 
diff --git a/test/FrontendC/2003-08-29-BitFieldStruct.c b/test/FrontendC/2003-08-29-BitFieldStruct.c
index 8c303e8f2e01..57273cd86393 100644
--- a/test/FrontendC/2003-08-29-BitFieldStruct.c
+++ b/test/FrontendC/2003-08-29-BitFieldStruct.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct Word {
   short bar;
diff --git a/test/FrontendC/2003-08-29-HugeCharConst.c b/test/FrontendC/2003-08-29-HugeCharConst.c
index a997994e91c3..236eb2e27482 100644
--- a/test/FrontendC/2003-08-29-HugeCharConst.c
+++ b/test/FrontendC/2003-08-29-HugeCharConst.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 void foo() {
   unsigned char int_latin1[] = "f\200\372b\200\343\200\340";
diff --git a/test/FrontendC/2003-08-29-StructLayoutBug.c b/test/FrontendC/2003-08-29-StructLayoutBug.c
index a5f6fb106ff8..16731945b77c 100644
--- a/test/FrontendC/2003-08-29-StructLayoutBug.c
+++ b/test/FrontendC/2003-08-29-StructLayoutBug.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct foo {
   unsigned int I:1;
diff --git a/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c b/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c
index f67aee4c10fb..e1ca88cdc6f1 100644
--- a/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c
+++ b/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct foo {
   unsigned int I:1;
diff --git a/test/FrontendC/2003-09-18-BitfieldTests.c b/test/FrontendC/2003-09-18-BitfieldTests.c
index 900a7fdd6606..2d74cb401dc2 100644
--- a/test/FrontendC/2003-09-18-BitfieldTests.c
+++ b/test/FrontendC/2003-09-18-BitfieldTests.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -w -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
 
 
 typedef struct BF {
diff --git a/test/FrontendC/2003-09-30-StructLayout.c b/test/FrontendC/2003-09-30-StructLayout.c
index 3a4016606ec7..177d1f49b2fb 100644
--- a/test/FrontendC/2003-09-30-StructLayout.c
+++ b/test/FrontendC/2003-09-30-StructLayout.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 enum En {
   ENUM_VAL
diff --git a/test/FrontendC/2003-10-02-UnionLValueError.c b/test/FrontendC/2003-10-02-UnionLValueError.c
index 2ded0c64b42e..a4d17a4a0ba5 100644
--- a/test/FrontendC/2003-10-02-UnionLValueError.c
+++ b/test/FrontendC/2003-10-02-UnionLValueError.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 #include <stdio.h>
 
diff --git a/test/FrontendC/2003-10-06-NegateExprType.c b/test/FrontendC/2003-10-06-NegateExprType.c
index 02386031b71f..fb8329b344b3 100644
--- a/test/FrontendC/2003-10-06-NegateExprType.c
+++ b/test/FrontendC/2003-10-06-NegateExprType.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 extern int A[10];
diff --git a/test/FrontendC/2003-10-09-UnionInitializerBug.c b/test/FrontendC/2003-10-09-UnionInitializerBug.c
index 90dbd37f0d61..57e113a7cc29 100644
--- a/test/FrontendC/2003-10-09-UnionInitializerBug.c
+++ b/test/FrontendC/2003-10-09-UnionInitializerBug.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct Foo {
     unsigned a;
diff --git a/test/FrontendC/2003-10-28-ident.c b/test/FrontendC/2003-10-28-ident.c
index 9911dfd1ffe0..06cacf87a907 100644
--- a/test/FrontendC/2003-10-28-ident.c
+++ b/test/FrontendC/2003-10-28-ident.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 #ident "foo"
diff --git a/test/FrontendC/2003-10-29-AsmRename.c b/test/FrontendC/2003-10-29-AsmRename.c
index 5750cedd026a..d07ccf7fd2c2 100644
--- a/test/FrontendC/2003-10-29-AsmRename.c
+++ b/test/FrontendC/2003-10-29-AsmRename.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 struct foo { int X; };
diff --git a/test/FrontendC/2003-11-01-C99-CompoundLiteral.c b/test/FrontendC/2003-11-01-C99-CompoundLiteral.c
index a0aa69858cf1..2912c97c546a 100644
--- a/test/FrontendC/2003-11-01-C99-CompoundLiteral.c
+++ b/test/FrontendC/2003-11-01-C99-CompoundLiteral.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct { int foo; } spinlock_t;
 typedef struct wait_queue_head_t { spinlock_t lock; } wait_queue_head_t;
diff --git a/test/FrontendC/2003-11-01-EmptyStructCrash.c b/test/FrontendC/2003-11-01-EmptyStructCrash.c
index fb6993bb209f..c1161195dafc 100644
--- a/test/FrontendC/2003-11-01-EmptyStructCrash.c
+++ b/test/FrontendC/2003-11-01-EmptyStructCrash.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct { } the_coolest_struct_in_the_world;
 extern the_coolest_struct_in_the_world xyzzy;
diff --git a/test/FrontendC/2003-11-01-GlobalUnionInit.c b/test/FrontendC/2003-11-01-GlobalUnionInit.c
index be7788d1abb5..7cd707348ca3 100644
--- a/test/FrontendC/2003-11-01-GlobalUnionInit.c
+++ b/test/FrontendC/2003-11-01-GlobalUnionInit.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 union bdflush_param {
     struct { int x; } b_un;
diff --git a/test/FrontendC/2003-11-04-EmptyStruct.c b/test/FrontendC/2003-11-04-EmptyStruct.c
index c7a016415910..b4f37befffa0 100644
--- a/test/FrontendC/2003-11-04-EmptyStruct.c
+++ b/test/FrontendC/2003-11-04-EmptyStruct.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct { } rwlock_t;
 struct fs_struct { rwlock_t lock; int umask; };
diff --git a/test/FrontendC/2003-11-04-OutOfMemory.c b/test/FrontendC/2003-11-04-OutOfMemory.c
index 6a42e160f279..40cb6c2e21e4 100644
--- a/test/FrontendC/2003-11-04-OutOfMemory.c
+++ b/test/FrontendC/2003-11-04-OutOfMemory.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 void schedule_timeout(signed long timeout)
 {
diff --git a/test/FrontendC/2003-11-12-VoidString.c b/test/FrontendC/2003-11-12-VoidString.c
index db2e84bbbbcf..5770b3661a96 100644
--- a/test/FrontendC/2003-11-12-VoidString.c
+++ b/test/FrontendC/2003-11-12-VoidString.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 void query_newnamebuf(void) { ((void)"query_newnamebuf"); }
 
diff --git a/test/FrontendC/2003-11-16-StaticArrayInit.c b/test/FrontendC/2003-11-16-StaticArrayInit.c
index 2b42e38dc9ae..eb83b3ad0c61 100644
--- a/test/FrontendC/2003-11-16-StaticArrayInit.c
+++ b/test/FrontendC/2003-11-16-StaticArrayInit.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 void bar () {
  static char x[10];
diff --git a/test/FrontendC/2003-11-18-CondExprLValue.c b/test/FrontendC/2003-11-18-CondExprLValue.c
index ec000a443d90..68ee622c6419 100644
--- a/test/FrontendC/2003-11-18-CondExprLValue.c
+++ b/test/FrontendC/2003-11-18-CondExprLValue.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 typedef struct { unsigned long pgprot; } pgprot_t;
 
diff --git a/test/FrontendC/2003-11-19-BitFieldArray.c b/test/FrontendC/2003-11-19-BitFieldArray.c
index 9d54112e86e2..250268a3b859 100644
--- a/test/FrontendC/2003-11-19-BitFieldArray.c
+++ b/test/FrontendC/2003-11-19-BitFieldArray.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct _GIOChannel {
   int write_buf;
diff --git a/test/FrontendC/2003-11-20-Bitfields.c b/test/FrontendC/2003-11-20-Bitfields.c
index c9ea0dc7f1ec..4be9942ccf3c 100644
--- a/test/FrontendC/2003-11-20-Bitfields.c
+++ b/test/FrontendC/2003-11-20-Bitfields.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct face_cachel {
   unsigned int reverse :1;
diff --git a/test/FrontendC/2003-11-20-ComplexDivision.c b/test/FrontendC/2003-11-20-ComplexDivision.c
index cd548c0313fc..172de8c0e192 100644
--- a/test/FrontendC/2003-11-20-ComplexDivision.c
+++ b/test/FrontendC/2003-11-20-ComplexDivision.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int test() {
   __complex__ double C;
diff --git a/test/FrontendC/2003-11-20-UnionBitfield.c b/test/FrontendC/2003-11-20-UnionBitfield.c
index 12e7df5890aa..f999c2077721 100644
--- a/test/FrontendC/2003-11-20-UnionBitfield.c
+++ b/test/FrontendC/2003-11-20-UnionBitfield.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct printf_spec {
   unsigned int minus_flag:1;
diff --git a/test/FrontendC/2003-11-26-PointerShift.c b/test/FrontendC/2003-11-26-PointerShift.c
index 079f6903a233..6b5205a6e7ed 100644
--- a/test/FrontendC/2003-11-26-PointerShift.c
+++ b/test/FrontendC/2003-11-26-PointerShift.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 unsigned long do_csum(const unsigned char *buff, int len, unsigned long result) {
   if (2 & (unsigned long) buff) result += 1;
diff --git a/test/FrontendC/2003-11-27-ConstructorCast.c b/test/FrontendC/2003-11-27-ConstructorCast.c
index 3780e7a4b97e..15eb76947951 100644
--- a/test/FrontendC/2003-11-27-ConstructorCast.c
+++ b/test/FrontendC/2003-11-27-ConstructorCast.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct i387_soft_struct {
   long cwd;
diff --git a/test/FrontendC/2003-11-27-UnionCtorInitialization.c b/test/FrontendC/2003-11-27-UnionCtorInitialization.c
index 472b59158536..e3ae1e96a682 100644
--- a/test/FrontendC/2003-11-27-UnionCtorInitialization.c
+++ b/test/FrontendC/2003-11-27-UnionCtorInitialization.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 struct i387_soft_struct {
  long cwd;
diff --git a/test/FrontendC/2004-01-08-ExternInlineRedefine.c b/test/FrontendC/2004-01-08-ExternInlineRedefine.c
index b3907ee9933b..4366b9b56593 100644
--- a/test/FrontendC/2004-01-08-ExternInlineRedefine.c
+++ b/test/FrontendC/2004-01-08-ExternInlineRedefine.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 extern __inline long int
diff --git a/test/FrontendC/2004-03-07-ComplexDivEquals.c b/test/FrontendC/2004-03-07-ComplexDivEquals.c
index b1da58054123..c6c805a7b32c 100644
--- a/test/FrontendC/2004-03-07-ComplexDivEquals.c
+++ b/test/FrontendC/2004-03-07-ComplexDivEquals.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 void test(__complex__ double D, double X) {
diff --git a/test/FrontendC/2004-03-09-LargeArrayInitializers.c b/test/FrontendC/2004-03-09-LargeArrayInitializers.c
index 335c5684870a..265206fabb66 100644
--- a/test/FrontendC/2004-03-09-LargeArrayInitializers.c
+++ b/test/FrontendC/2004-03-09-LargeArrayInitializers.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 // Test that these initializers are handled efficiently
 
diff --git a/test/FrontendC/2004-03-15-SimpleIndirectGoto.c b/test/FrontendC/2004-03-15-SimpleIndirectGoto.c
index ad7221e96a00..a3f27b2a3301 100644
--- a/test/FrontendC/2004-03-15-SimpleIndirectGoto.c
+++ b/test/FrontendC/2004-03-15-SimpleIndirectGoto.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int code[]={0,0,0,0,1};
 void foo(int x) {
diff --git a/test/FrontendC/2004-03-16-AsmRegisterCrash.c b/test/FrontendC/2004-03-16-AsmRegisterCrash.c
index 6ad1cd48f3cd..f13368c25627 100644
--- a/test/FrontendC/2004-03-16-AsmRegisterCrash.c
+++ b/test/FrontendC/2004-03-16-AsmRegisterCrash.c
@@ -1,6 +1,10 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int foo() {
+#ifdef __ppc__
+  register int X __asm__("r1");
+#else
   register int X __asm__("ebx");
+#endif
   return X;
 }
diff --git a/test/FrontendC/2004-05-07-VarArrays.c b/test/FrontendC/2004-05-07-VarArrays.c
index 20412983e90e..3a39c4fe63aa 100644
--- a/test/FrontendC/2004-05-07-VarArrays.c
+++ b/test/FrontendC/2004-05-07-VarArrays.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 int foo(int len, char arr[][len], int X) {
   return arr[X][0];
diff --git a/test/FrontendC/2004-05-21-IncompleteEnum.c b/test/FrontendC/2004-05-21-IncompleteEnum.c
index 106df5e48fc4..958a8d1c0ea1 100644
--- a/test/FrontendC/2004-05-21-IncompleteEnum.c
+++ b/test/FrontendC/2004-05-21-IncompleteEnum.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -w -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
 
 void test(enum foo *X) {
 }
diff --git a/test/FrontendC/2004-06-08-OpaqueStructArg.c b/test/FrontendC/2004-06-08-OpaqueStructArg.c
index ede811c38988..5dfdd83c9e2a 100644
--- a/test/FrontendC/2004-06-08-OpaqueStructArg.c
+++ b/test/FrontendC/2004-06-08-OpaqueStructArg.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
    struct fu;
    void foo(struct fu);
diff --git a/test/FrontendC/2004-06-17-UnorderedBuiltins.c b/test/FrontendC/2004-06-17-UnorderedBuiltins.c
index 5e02e7f325aa..02780f0f0577 100644
--- a/test/FrontendC/2004-06-17-UnorderedBuiltins.c
+++ b/test/FrontendC/2004-06-17-UnorderedBuiltins.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 _Bool A, B, C, D, E, F, G, H;
diff --git a/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c b/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c
index e474a13b37f0..3e450a4b9366 100644
--- a/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c
+++ b/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 struct S { };
diff --git a/test/FrontendC/2004-07-06-FunctionCast.c b/test/FrontendC/2004-07-06-FunctionCast.c
index 169f74036423..6d80f86fa1eb 100644
--- a/test/FrontendC/2004-07-06-FunctionCast.c
+++ b/test/FrontendC/2004-07-06-FunctionCast.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 static int unused_func(void) {
   return 1;
diff --git a/test/FrontendC/2004-08-06-LargeStructTest.c b/test/FrontendC/2004-08-06-LargeStructTest.c
index b0413b4b0cee..8fbb7f8368c4 100644
--- a/test/FrontendC/2004-08-06-LargeStructTest.c
+++ b/test/FrontendC/2004-08-06-LargeStructTest.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | llvm-as -f -o /dev/null
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
 
 
 #define A(X) int X;
diff --git a/test/FrontendC/2005-02-27-MarkGlobalConstant.c b/test/FrontendC/2005-02-27-MarkGlobalConstant.c
index 4d24d0c67317..b9fbbb6369a5 100644
--- a/test/FrontendC/2005-02-27-MarkGlobalConstant.c
+++ b/test/FrontendC/2005-02-27-MarkGlobalConstant.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -S -o - | grep {internal constant }
+// RUN: %llvmgcc -xc %s -S -o - | grep {private constant }
 
 // The synthetic global made by the CFE for big initializer should be marked
 // constant.
diff --git a/test/FrontendC/2005-07-20-SqrtNoErrno.c b/test/FrontendC/2005-07-20-SqrtNoErrno.c
index 3f85f7280196..a321a3884e8b 100644
--- a/test/FrontendC/2005-07-20-SqrtNoErrno.c
+++ b/test/FrontendC/2005-07-20-SqrtNoErrno.c
@@ -1,7 +1,11 @@
-// RUN: %llvmgcc %s -S -o - -fno-math-errno | grep llvm.sqrt
+// RUN: %llvmgcc %s -S -o - -fno-math-errno | FileCheck %s
+// llvm.sqrt has undefined behavior on negative inputs, so it is
+// inappropriate to translate C/C++ sqrt to this.
 #include <math.h>
 
 float foo(float X) {
-  // Check that this compiles to llvm.sqrt when errno is ignored.
+// CHECK: foo
+// CHECK: sqrtf(float %1) nounwind readonly
+  // Check that this is marked readonly when errno is ignored.
   return sqrtf(X);
 }
diff --git a/test/FrontendC/2005-09-20-ComplexConstants.c b/test/FrontendC/2005-09-20-ComplexConstants.c
index db98fc2385a0..209adc502fa5 100644
--- a/test/FrontendC/2005-09-20-ComplexConstants.c
+++ b/test/FrontendC/2005-09-20-ComplexConstants.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -o - | llvm-as -o /dev/null -f
+// RUN: %llvmgcc %s -S -o - | llvm-as -o /dev/null
 
 const double _Complex x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 
 
diff --git a/test/FrontendC/2005-12-04-DeclarationLineNumbers.c b/test/FrontendC/2005-12-04-DeclarationLineNumbers.c
index 0ced92e553f7..f3f69ddb0bcc 100644
--- a/test/FrontendC/2005-12-04-DeclarationLineNumbers.c
+++ b/test/FrontendC/2005-12-04-DeclarationLineNumbers.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -g -o - | grep {llvm.dbg.stoppoint.*i32 14}
+// RUN: %llvmgcc %s -S -g -o - | grep DW_TAG_compile_unit | count 1
 // PR664: ensure that line #'s are emitted for declarations
 
 
diff --git a/test/FrontendC/2006-01-13-Includes.c b/test/FrontendC/2006-01-13-Includes.c
index 9b928fc5cf1f..7fa0b3b5a6dc 100644
--- a/test/FrontendC/2006-01-13-Includes.c
+++ b/test/FrontendC/2006-01-13-Includes.c
@@ -1,5 +1,4 @@
-// RUN: %llvmgcc %s -g -S -o - | llvm-as | opt -std-compile-opts | \
-// RUN:   llvm-dis | grep {test/FrontendC}
+// RUN: %llvmgcc %s -g -S -o - | grep {test/FrontendC}
 // PR676
 
 #include <stdio.h>
diff --git a/test/FrontendC/2007-02-16-WritableStrings.c b/test/FrontendC/2007-02-16-WritableStrings.c
index d11fa089dbab..811e3301f8d0 100644
--- a/test/FrontendC/2007-02-16-WritableStrings.c
+++ b/test/FrontendC/2007-02-16-WritableStrings.c
@@ -1,8 +1,7 @@
 // Test the -fwritable-strings option.
 
 // RUN: %llvmgcc -O3 -S -o - -emit-llvm -fwritable-strings %s | \
-// RUN:    grep {internal global}
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep {internal constant}
+// RUN:    grep {private global}
+// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep {private constant}
 
 char *X = "foo";
-
diff --git a/test/FrontendC/2007-03-27-VarLengthArray.c b/test/FrontendC/2007-03-27-VarLengthArray.c
index f14fd4d8c0ad..b555690068d9 100644
--- a/test/FrontendC/2007-03-27-VarLengthArray.c
+++ b/test/FrontendC/2007-03-27-VarLengthArray.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -o - | grep {getelementptr \\\[0 x i32\\\]}
+// RUN: %llvmgcc -S %s -o - | grep {getelementptr inbounds \\\[0 x i32\\\]}
 extern void f(int *);
 int e(int m, int n) {
   int x[n];
diff --git a/test/FrontendC/2008-03-05-syncPtr.c b/test/FrontendC/2008-03-05-syncPtr.c
index 03aedab9524e..43e46717b279 100644
--- a/test/FrontendC/2008-03-05-syncPtr.c
+++ b/test/FrontendC/2008-03-05-syncPtr.c
@@ -1,5 +1,5 @@
 // RUN: %llvmgcc %s -S -emit-llvm -o - | grep llvm.atomic
-// XFAIL: sparc-sun-solaris2|arm|ia64
+// XFAIL: sparc-sun-solaris2|arm
 // Feature currently implemented only for x86, alpha, powerpc.
 
 int* foo(int** a, int* b, int* c) {
diff --git a/test/FrontendC/2008-03-24-BitField-And-Alloca.c b/test/FrontendC/2008-03-24-BitField-And-Alloca.c
index 5fac2a9ca556..291f036523a4 100644
--- a/test/FrontendC/2008-03-24-BitField-And-Alloca.c
+++ b/test/FrontendC/2008-03-24-BitField-And-Alloca.c
@@ -1,5 +1,5 @@
 // RUN: %llvmgcc -O2 -S %s -o - | not grep alloca
-// RUN: %llvmgcc -m32 -O2 -S %s -o - | grep store | not grep {align 8}
+// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep store 
 
 enum {
  PP_C,
diff --git a/test/FrontendC/2008-07-29-EHLabel.ll b/test/FrontendC/2008-07-29-EHLabel.ll
index abf945685825..7577bc980ecd 100644
--- a/test/FrontendC/2008-07-29-EHLabel.ll
+++ b/test/FrontendC/2008-07-29-EHLabel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o - | %llvmgcc -xassembler -c -o /dev/null -
+; RUN: llc %s -o - | %llvmgcc -xassembler -c -o /dev/null -
 ; PR2609
 	%struct..0._11 = type { i32 }
 	%struct..1__pthread_mutex_s = type { i32, i32, i32, i32, i32, %struct..0._11 }
diff --git a/test/FrontendC/2009-02-17-BitField-dbg.c b/test/FrontendC/2009-02-17-BitField-dbg.c
index 456b0a64a791..36ee2e6f0ef7 100644
--- a/test/FrontendC/2009-02-17-BitField-dbg.c
+++ b/test/FrontendC/2009-02-17-BitField-dbg.c
@@ -1,6 +1,6 @@
 // Check bitfields.
 // RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
-// RUN: llc --disable-fp-elim -o 2009-02-17-BitField-dbg.s -f
+// RUN: llc --disable-fp-elim -o 2009-02-17-BitField-dbg.s
 // RUN: %compile_c 2009-02-17-BitField-dbg.s -o 2009-02-17-BitField-dbg.o
 // RUN: echo {ptype mystruct} > %t2
 // RUN: gdb -q -batch -n -x %t2 2009-02-17-BitField-dbg.o | \
diff --git a/test/FrontendC/2009-03-09-WeakDeclarations-1.c b/test/FrontendC/2009-03-09-WeakDeclarations-1.c
index 15b9801699a2..13ea84f7bae9 100644
--- a/test/FrontendC/2009-03-09-WeakDeclarations-1.c
+++ b/test/FrontendC/2009-03-09-WeakDeclarations-1.c
@@ -1,4 +1,4 @@
-// RUN: $llvmgcc $test -c -o /dev/null |& \
+// RUN: %llvmgcc_only %s -c -o /dev/null |& \
 // RUN: egrep {(14|15|22): warning:} |	\
 // RUN: wc -l | grep --quiet 3
 // XTARGET: darwin,linux
diff --git a/test/FrontendC/2009-07-14-VoidPtr.c b/test/FrontendC/2009-07-14-VoidPtr.c
new file mode 100644
index 000000000000..8001c56ad52a
--- /dev/null
+++ b/test/FrontendC/2009-07-14-VoidPtr.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o -
+// PR4556
+
+extern void foo;
+void *bar = &foo;
+
diff --git a/test/FrontendC/2009-07-15-pad-wchar_t-array.c b/test/FrontendC/2009-07-15-pad-wchar_t-array.c
new file mode 100644
index 000000000000..41bdef25ecca
--- /dev/null
+++ b/test/FrontendC/2009-07-15-pad-wchar_t-array.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <stddef.h>
+signed short _iodbcdm_sqlerror( )
+{
+  wchar_t _sqlState[6] = { L"\0" };
+}
diff --git a/test/FrontendC/2009-07-17-VoidParameter.c b/test/FrontendC/2009-07-17-VoidParameter.c
new file mode 100644
index 000000000000..d5769524386c
--- /dev/null
+++ b/test/FrontendC/2009-07-17-VoidParameter.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o -
+// PR4214
+typedef void vt;
+void (*func_ptr)(vt my_vt);
diff --git a/test/FrontendC/2009-07-22-StructLayout.c b/test/FrontendC/2009-07-22-StructLayout.c
new file mode 100644
index 000000000000..74904da33e0c
--- /dev/null
+++ b/test/FrontendC/2009-07-22-StructLayout.c
@@ -0,0 +1,34 @@
+// RUN: %llvmgcc %s -S -o /dev/null
+// PR4590
+
+typedef unsigned char __u8;
+typedef unsigned int __le32;
+typedef unsigned int __u32;
+typedef unsigned short __le16;
+typedef unsigned short __u16;
+
+struct usb_cdc_ether_desc {
+ __u8 bLength;
+ __u8 bDescriptorType;
+ __u8 bDescriptorSubType;
+
+ __u8 iMACAddress;
+ __le32 bmEthernetStatistics;
+ __le16 wMaxSegmentSize;
+ __le16 wNumberMCFilters;
+ __u8 bNumberPowerFilters;
+} __attribute__ ((packed));
+
+
+static struct usb_cdc_ether_desc ecm_desc __attribute__ ((__section__(".init.data"))) = {
+ .bLength = sizeof ecm_desc,
+ .bDescriptorType = ((0x01 << 5) | 0x04),
+ .bDescriptorSubType = 0x0f,
+
+
+
+ .bmEthernetStatistics = (( __le32)(__u32)(0)),
+ .wMaxSegmentSize = (( __le16)(__u16)(1514)),
+ .wNumberMCFilters = (( __le16)(__u16)(0)),
+ .bNumberPowerFilters = 0,
+};
diff --git a/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c b/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c
new file mode 100644
index 000000000000..e141c9a16c80
--- /dev/null
+++ b/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -fasm-blocks -S -o - | grep {\\\*1192}
+// Complicated expression as jump target
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+asm void Method3()
+{
+    mov   eax,[esp+4]           
+    jmp   [eax+(299-1)*4]       
+}
diff --git a/test/FrontendC/2009-09-24-SqrtErrno.c b/test/FrontendC/2009-09-24-SqrtErrno.c
new file mode 100644
index 000000000000..09fc8764ea51
--- /dev/null
+++ b/test/FrontendC/2009-09-24-SqrtErrno.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o - -fmath-errno | FileCheck %s
+// llvm.sqrt has undefined behavior on negative inputs, so it is
+// inappropriate to translate C/C++ sqrt to this.
+#include <math.h>
+
+float foo(float X) {
+// CHECK: foo
+// CHECK-NOT: readonly
+// CHECK: return
+  // Check that this is not marked readonly when errno is used.
+  return sqrtf(X);
+}
diff --git a/test/FrontendC/Atomics-no64bit.c b/test/FrontendC/Atomics-no64bit.c
index 963fcbba43c1..12fb75a2d938 100644
--- a/test/FrontendC/Atomics-no64bit.c
+++ b/test/FrontendC/Atomics-no64bit.c
@@ -9,7 +9,7 @@
 // Currently this is implemented only for Alpha, X86, PowerPC.
 // Add your target here if it doesn't work.
 // This version of the test does not include long long.
-// XFAIL: sparc|arm|ia64
+// XFAIL: sparc|arm
 
 signed char sc;
 unsigned char uc;
diff --git a/test/FrontendC/Atomics.c b/test/FrontendC/Atomics.c
index 7e25ebbc5c25..2aed55c13dec 100644
--- a/test/FrontendC/Atomics.c
+++ b/test/FrontendC/Atomics.c
@@ -9,7 +9,7 @@
 // Currently this is implemented only for Alpha, X86, PowerPC.
 // Add your target here if it doesn't work.
 // PPC32 does not translate the long long variants, so fails this test.
-// XFAIL: sparc|arm|ia64|powerpc
+// XFAIL: sparc|arm|powerpc
 
 signed char sc;
 unsigned char uc;
diff --git a/test/FrontendC/func-aligned.c b/test/FrontendC/func-aligned.c
new file mode 100644
index 000000000000..40149f49d8ec
--- /dev/null
+++ b/test/FrontendC/func-aligned.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -emit-llvm -o - | FileCheck %s
+
+// rdar://7270273
+void foo() __attribute__((aligned (64)));
+void foo() {
+// CHECK: define void @foo() {{.*}} align 64
+}
diff --git a/test/FrontendC/msasm.c b/test/FrontendC/msasm.c
new file mode 100644
index 000000000000..18375bdf34fe
--- /dev/null
+++ b/test/FrontendC/msasm.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc %s -fasm-blocks -S -o - | FileCheck %s
+// Complicated expression as jump target
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+void Method3()
+{
+// CHECK: Method3
+// CHECK-NOT: msasm
+    asm("foo:");
+// CHECK: return
+}
+
+void Method4()
+{
+// CHECK: Method4
+// CHECK: msasm
+  asm {
+    bar:
+  }
+// CHECK: return
+}
+
diff --git a/test/FrontendC/ptr-rotate.c b/test/FrontendC/ptr-rotate.c
new file mode 100644
index 000000000000..56c21f46e7ae
--- /dev/null
+++ b/test/FrontendC/ptr-rotate.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -c -m32 -o /dev/null
+// RUN: %llvmgcc %s -c -O1 -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+
+unsigned int func(void *A) {
+  // DARWIN: roll $27
+  return ((((unsigned long long) A) >> 5) | (((unsigned long long) A) << 27));
+}
diff --git a/test/FrontendC/redef-ext-inline.c b/test/FrontendC/redef-ext-inline.c
new file mode 100644
index 000000000000..240beb1f6f67
--- /dev/null
+++ b/test/FrontendC/redef-ext-inline.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o -
+// rdar://7208839
+
+extern inline int f1 (void) {return 1;}
+int f3 (void) {return f1();}
+int f1 (void) {return 0;}
diff --git a/test/FrontendC/wchar-const.c b/test/FrontendC/wchar-const.c
new file mode 100644
index 000000000000..7cf3322e8cf9
--- /dev/null
+++ b/test/FrontendC/wchar-const.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | grep {constant \\\[18 x} | grep { 84, }
+// This should pass for any endianness combination of host and target.
+#include <wchar.h>
+extern void foo(const wchar_t* p);
+int main (int argc, const char * argv[])
+{
+ foo(L"This is some text");
+ return 0;
+}
diff --git a/test/FrontendObjC/2007-10-17-SJLJExceptions.m b/test/FrontendObjC/2007-10-17-SJLJExceptions.m
index 4bea6e98a8b2..970207e0d8a4 100644
--- a/test/FrontendObjC/2007-10-17-SJLJExceptions.m
+++ b/test/FrontendObjC/2007-10-17-SJLJExceptions.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -x objective-c %s -pipe -std=gnu99 -O2 -fexceptions -S -o - | not grep Unwind_Resume
+// RUN: %llvmgcc -m32 -x objective-c %s -pipe -std=gnu99 -O2 -fexceptions -S -o - | not grep Unwind_Resume
 #import <stdio.h>
 
 @interface Foo {
diff --git a/test/FrontendObjC/2009-04-14-AsmSection.m b/test/FrontendObjC/2009-04-14-AsmSection.m
index ec5c688a8547..de2cef00ff56 100644
--- a/test/FrontendObjC/2009-04-14-AsmSection.m
+++ b/test/FrontendObjC/2009-04-14-AsmSection.m
@@ -1,6 +1,6 @@
 // RUN: %llvmgcc -S %s -fobjc-abi-version=2 -emit-llvm -o %t
 // RUN: grep {OBJC_CLASS_\\\$_A.*section.*__DATA, __objc_data.*align} %t
-// XTARGETS: darwin
+// XTARGET: darwin
 
 @interface A
 @end
diff --git a/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m b/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m
index f5a4309d4cfe..cada8438bc95 100644
--- a/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m
+++ b/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m
@@ -2,7 +2,7 @@
 // RUN: grep {OBJC_CLASS_RO_\\\$_I4} %t | grep {i32 0, i32 1, i32 2, i32 0}
 // RUN: grep {OBJC_CLASS_RO_\\\$_I2} %t | grep {i32 0, i32 1, i32 1, i32 0}
 // RUN: grep {OBJC_CLASS_RO_\\\$_I5} %t | grep {i32 0, i32 0, i32 0, i32 0}
-// XTARGETS: darwin
+// XTARGET: darwin
 
 // Test instance variable sizing when base class ends in bitfield
 @interface I3 {
diff --git a/test/FrontendObjC/2009-08-05-utf16.m b/test/FrontendObjC/2009-08-05-utf16.m
new file mode 100644
index 000000000000..2964ecfd4991
--- /dev/null
+++ b/test/FrontendObjC/2009-08-05-utf16.m
@@ -0,0 +1,5 @@
+/* RUN: %llvmgcc -w -x objective-c -S %s -o - | grep {__utf16_string_1} | grep {internal constant} | grep {12 x i8}
+   rdar://7095855 rdar://7115749 */
+
+void *P = @"iPod™";
+
diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m
new file mode 100644
index 000000000000..0bc4e9a6d6c3
--- /dev/null
+++ b/test/FrontendObjC/2009-08-17-DebugInfo.m
@@ -0,0 +1,28 @@
+// This is a regression test on debug info to make sure that we can set a
+// breakpoint on a objective message.
+// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | llc -o %t.s -O0
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe -framework Foundation
+// RUN: echo {break randomFunc\n} > %t.in 
+// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
+// RUN:   grep {Breakpoint 1 at 0x.*: file 2009-08-17-DebugInfo.m, line 21}
+// XTARGET: darwin
+@interface MyClass
+{
+ int my;
+}
++ init;
+- randomFunc;
+@end
+
+@implementation MyClass
++ init {
+}
+- randomFunc { my = 42; }
+@end
+
+int main() {
+  id o = [MyClass init];
+  [o randomFunc];
+  return 0;
+}
diff --git a/test/Integer/a15.ll.out b/test/Integer/a15.ll.out
index e9eb800b0bdf..5195cdf3761f 100644
--- a/test/Integer/a15.ll.out
+++ b/test/Integer/a15.ll.out
@@ -1,20 +1,21 @@
 ; ModuleID = '<stdin>'
-@b = constant i15 0		; <i15*> [#uses=0]
-@c = constant i15 -2		; <i15*> [#uses=0]
-@d = constant i15 0		; <i15*> [#uses=0]
-@e = constant i15 -1		; <i15*> [#uses=0]
-@f = constant i15 1		; <i15*> [#uses=0]
-@g = constant i15 3		; <i15*> [#uses=0]
-@h = constant i15 undef		; <i15*> [#uses=0]
-@i = constant i15 -16384		; <i15*> [#uses=0]
-@j = constant i15 1		; <i15*> [#uses=0]
-@l = constant i15 -1		; <i15*> [#uses=0]
-@n = constant i15 -2		; <i15*> [#uses=0]
-@q = constant i15 16381		; <i15*> [#uses=0]
-@r = constant i15 0		; <i15*> [#uses=0]
-@s = constant i15 2		; <i15*> [#uses=0]
-@t = constant i15 1		; <i15*> [#uses=0]
-@u = constant i15 0		; <i15*> [#uses=0]
-@o = constant i15 0		; <i15*> [#uses=0]
-@p = constant i15 -1		; <i15*> [#uses=0]
-@v = constant i15 -1		; <i15*> [#uses=0]
+
+@b = constant i15 0                               ; <i15*> [#uses=0]
+@c = constant i15 -2                              ; <i15*> [#uses=0]
+@d = constant i15 0                               ; <i15*> [#uses=0]
+@e = constant i15 -1                              ; <i15*> [#uses=0]
+@f = constant i15 1                               ; <i15*> [#uses=0]
+@g = constant i15 3                               ; <i15*> [#uses=0]
+@h = constant i15 undef                           ; <i15*> [#uses=0]
+@i = constant i15 -16384                          ; <i15*> [#uses=0]
+@j = constant i15 1                               ; <i15*> [#uses=0]
+@l = constant i15 -1                              ; <i15*> [#uses=0]
+@n = constant i15 -2                              ; <i15*> [#uses=0]
+@q = constant i15 16381                           ; <i15*> [#uses=0]
+@r = constant i15 0                               ; <i15*> [#uses=0]
+@s = constant i15 2                               ; <i15*> [#uses=0]
+@t = constant i15 1                               ; <i15*> [#uses=0]
+@u = constant i15 0                               ; <i15*> [#uses=0]
+@o = constant i15 0                               ; <i15*> [#uses=0]
+@p = constant i15 -1                              ; <i15*> [#uses=0]
+@v = constant i15 -1                              ; <i15*> [#uses=0]
diff --git a/test/Integer/a17.ll.out b/test/Integer/a17.ll.out
index d24f62b63c31..ba6641289e31 100644
--- a/test/Integer/a17.ll.out
+++ b/test/Integer/a17.ll.out
@@ -1,19 +1,20 @@
 ; ModuleID = '<stdin>'
-@b = constant i17 0		; <i17*> [#uses=0]
-@c = constant i17 -2		; <i17*> [#uses=0]
-@d = constant i17 0		; <i17*> [#uses=0]
-@e = constant i17 -1		; <i17*> [#uses=0]
-@f = constant i17 1		; <i17*> [#uses=0]
-@g = constant i17 3		; <i17*> [#uses=0]
-@h = constant i17 undef		; <i17*> [#uses=0]
-@i = constant i17 -65536		; <i17*> [#uses=0]
-@j = constant i17 1		; <i17*> [#uses=0]
-@l = constant i17 -1		; <i17*> [#uses=0]
-@n = constant i17 -2		; <i17*> [#uses=0]
-@q = constant i17 0		; <i17*> [#uses=0]
-@r = constant i17 2		; <i17*> [#uses=0]
-@s = constant i17 1		; <i17*> [#uses=0]
-@t = constant i17 0		; <i17*> [#uses=0]
-@o = constant i17 0		; <i17*> [#uses=0]
-@p = constant i17 -1		; <i17*> [#uses=0]
-@v = constant i17 -1		; <i17*> [#uses=0]
+
+@b = constant i17 0                               ; <i17*> [#uses=0]
+@c = constant i17 -2                              ; <i17*> [#uses=0]
+@d = constant i17 0                               ; <i17*> [#uses=0]
+@e = constant i17 -1                              ; <i17*> [#uses=0]
+@f = constant i17 1                               ; <i17*> [#uses=0]
+@g = constant i17 3                               ; <i17*> [#uses=0]
+@h = constant i17 undef                           ; <i17*> [#uses=0]
+@i = constant i17 -65536                          ; <i17*> [#uses=0]
+@j = constant i17 1                               ; <i17*> [#uses=0]
+@l = constant i17 -1                              ; <i17*> [#uses=0]
+@n = constant i17 -2                              ; <i17*> [#uses=0]
+@q = constant i17 0                               ; <i17*> [#uses=0]
+@r = constant i17 2                               ; <i17*> [#uses=0]
+@s = constant i17 1                               ; <i17*> [#uses=0]
+@t = constant i17 0                               ; <i17*> [#uses=0]
+@o = constant i17 0                               ; <i17*> [#uses=0]
+@p = constant i17 -1                              ; <i17*> [#uses=0]
+@v = constant i17 -1                              ; <i17*> [#uses=0]
diff --git a/test/Integer/a31.ll.out b/test/Integer/a31.ll.out
index fb8d250b4369..7407a746b5bf 100644
--- a/test/Integer/a31.ll.out
+++ b/test/Integer/a31.ll.out
@@ -1,19 +1,20 @@
 ; ModuleID = '<stdin>'
-@b = constant i31 0		; <i31*> [#uses=0]
-@c = constant i31 -2		; <i31*> [#uses=0]
-@d = constant i31 0		; <i31*> [#uses=0]
-@e = constant i31 -1		; <i31*> [#uses=0]
-@f = constant i31 1		; <i31*> [#uses=0]
-@g = constant i31 3		; <i31*> [#uses=0]
-@h = constant i31 undef		; <i31*> [#uses=0]
-@i = constant i31 -1073741824		; <i31*> [#uses=0]
-@j = constant i31 1		; <i31*> [#uses=0]
-@l = constant i31 -1		; <i31*> [#uses=0]
-@n = constant i31 -2		; <i31*> [#uses=0]
-@q = constant i31 0		; <i31*> [#uses=0]
-@r = constant i31 2		; <i31*> [#uses=0]
-@s = constant i31 1		; <i31*> [#uses=0]
-@t = constant i31 0		; <i31*> [#uses=0]
-@o = constant i31 0		; <i31*> [#uses=0]
-@p = constant i31 -1		; <i31*> [#uses=0]
-@u = constant i31 -3		; <i31*> [#uses=0]
+
+@b = constant i31 0                               ; <i31*> [#uses=0]
+@c = constant i31 -2                              ; <i31*> [#uses=0]
+@d = constant i31 0                               ; <i31*> [#uses=0]
+@e = constant i31 -1                              ; <i31*> [#uses=0]
+@f = constant i31 1                               ; <i31*> [#uses=0]
+@g = constant i31 3                               ; <i31*> [#uses=0]
+@h = constant i31 undef                           ; <i31*> [#uses=0]
+@i = constant i31 -1073741824                     ; <i31*> [#uses=0]
+@j = constant i31 1                               ; <i31*> [#uses=0]
+@l = constant i31 -1                              ; <i31*> [#uses=0]
+@n = constant i31 -2                              ; <i31*> [#uses=0]
+@q = constant i31 0                               ; <i31*> [#uses=0]
+@r = constant i31 2                               ; <i31*> [#uses=0]
+@s = constant i31 1                               ; <i31*> [#uses=0]
+@t = constant i31 0                               ; <i31*> [#uses=0]
+@o = constant i31 0                               ; <i31*> [#uses=0]
+@p = constant i31 -1                              ; <i31*> [#uses=0]
+@u = constant i31 -3                              ; <i31*> [#uses=0]
diff --git a/test/Integer/a33.ll.out b/test/Integer/a33.ll.out
index f495d0dc9761..6cd61ee69cdc 100644
--- a/test/Integer/a33.ll.out
+++ b/test/Integer/a33.ll.out
@@ -1,19 +1,20 @@
 ; ModuleID = '<stdin>'
-@b = constant i33 0		; <i33*> [#uses=0]
-@c = constant i33 -2		; <i33*> [#uses=0]
-@d = constant i33 0		; <i33*> [#uses=0]
-@e = constant i33 -1		; <i33*> [#uses=0]
-@f = constant i33 1		; <i33*> [#uses=0]
-@g = constant i33 3		; <i33*> [#uses=0]
-@h = constant i33 undef		; <i33*> [#uses=0]
-@i = constant i33 -4294967296		; <i33*> [#uses=0]
-@j = constant i33 1		; <i33*> [#uses=0]
-@l = constant i33 -1		; <i33*> [#uses=0]
-@n = constant i33 -2		; <i33*> [#uses=0]
-@q = constant i33 0		; <i33*> [#uses=0]
-@r = constant i33 2		; <i33*> [#uses=0]
-@s = constant i33 1		; <i33*> [#uses=0]
-@t = constant i33 0		; <i33*> [#uses=0]
-@o = constant i33 0		; <i33*> [#uses=0]
-@p = constant i33 -1		; <i33*> [#uses=0]
-@u = constant i33 -1		; <i33*> [#uses=0]
+
+@b = constant i33 0                               ; <i33*> [#uses=0]
+@c = constant i33 -2                              ; <i33*> [#uses=0]
+@d = constant i33 0                               ; <i33*> [#uses=0]
+@e = constant i33 -1                              ; <i33*> [#uses=0]
+@f = constant i33 1                               ; <i33*> [#uses=0]
+@g = constant i33 3                               ; <i33*> [#uses=0]
+@h = constant i33 undef                           ; <i33*> [#uses=0]
+@i = constant i33 -4294967296                     ; <i33*> [#uses=0]
+@j = constant i33 1                               ; <i33*> [#uses=0]
+@l = constant i33 -1                              ; <i33*> [#uses=0]
+@n = constant i33 -2                              ; <i33*> [#uses=0]
+@q = constant i33 0                               ; <i33*> [#uses=0]
+@r = constant i33 2                               ; <i33*> [#uses=0]
+@s = constant i33 1                               ; <i33*> [#uses=0]
+@t = constant i33 0                               ; <i33*> [#uses=0]
+@o = constant i33 0                               ; <i33*> [#uses=0]
+@p = constant i33 -1                              ; <i33*> [#uses=0]
+@u = constant i33 -1                              ; <i33*> [#uses=0]
diff --git a/test/Integer/a63.ll.out b/test/Integer/a63.ll.out
index c770608ed1e2..18dff5a2964e 100644
--- a/test/Integer/a63.ll.out
+++ b/test/Integer/a63.ll.out
@@ -1,19 +1,20 @@
 ; ModuleID = '<stdin>'
-@b = constant i63 0		; <i63*> [#uses=0]
-@c = constant i63 -2		; <i63*> [#uses=0]
-@d = constant i63 0		; <i63*> [#uses=0]
-@e = constant i63 -1		; <i63*> [#uses=0]
-@f = constant i63 1		; <i63*> [#uses=0]
-@g = constant i63 3		; <i63*> [#uses=0]
-@h = constant i63 undef		; <i63*> [#uses=0]
-@i = constant i63 -4611686018427387904		; <i63*> [#uses=0]
-@j = constant i63 1		; <i63*> [#uses=0]
-@l = constant i63 -1		; <i63*> [#uses=0]
-@n = constant i63 -2		; <i63*> [#uses=0]
-@q = constant i63 0		; <i63*> [#uses=0]
-@u = constant i63 -1		; <i63*> [#uses=0]
-@r = constant i63 2		; <i63*> [#uses=0]
-@s = constant i63 1		; <i63*> [#uses=0]
-@t = constant i63 0		; <i63*> [#uses=0]
-@o = constant i63 0		; <i63*> [#uses=0]
-@p = constant i63 -1		; <i63*> [#uses=0]
+
+@b = constant i63 0                               ; <i63*> [#uses=0]
+@c = constant i63 -2                              ; <i63*> [#uses=0]
+@d = constant i63 0                               ; <i63*> [#uses=0]
+@e = constant i63 -1                              ; <i63*> [#uses=0]
+@f = constant i63 1                               ; <i63*> [#uses=0]
+@g = constant i63 3                               ; <i63*> [#uses=0]
+@h = constant i63 undef                           ; <i63*> [#uses=0]
+@i = constant i63 -4611686018427387904            ; <i63*> [#uses=0]
+@j = constant i63 1                               ; <i63*> [#uses=0]
+@l = constant i63 -1                              ; <i63*> [#uses=0]
+@n = constant i63 -2                              ; <i63*> [#uses=0]
+@q = constant i63 0                               ; <i63*> [#uses=0]
+@u = constant i63 -1                              ; <i63*> [#uses=0]
+@r = constant i63 2                               ; <i63*> [#uses=0]
+@s = constant i63 1                               ; <i63*> [#uses=0]
+@t = constant i63 0                               ; <i63*> [#uses=0]
+@o = constant i63 0                               ; <i63*> [#uses=0]
+@p = constant i63 -1                              ; <i63*> [#uses=0]
diff --git a/test/Integer/a7.ll.out b/test/Integer/a7.ll.out
index a6ed28816fff..250925d795e6 100644
--- a/test/Integer/a7.ll.out
+++ b/test/Integer/a7.ll.out
@@ -1,24 +1,25 @@
 ; ModuleID = '<stdin>'
-@b = constant i7 0		; <i7*> [#uses=0]
-@q = constant i7 63		; <i7*> [#uses=0]
-@c = constant i7 -2		; <i7*> [#uses=0]
-@d = constant i7 0		; <i7*> [#uses=0]
-@e = constant i7 -1		; <i7*> [#uses=0]
-@f = constant i7 1		; <i7*> [#uses=0]
-@g = constant i7 3		; <i7*> [#uses=0]
-@r = constant i7 5		; <i7*> [#uses=0]
-@s = constant i7 5		; <i7*> [#uses=0]
-@h = constant i7 undef		; <i7*> [#uses=0]
-@i = constant i7 -64		; <i7*> [#uses=0]
-@j = constant i7 1		; <i7*> [#uses=0]
-@l = constant i7 -1		; <i7*> [#uses=0]
-@m2 = constant i7 -1		; <i7*> [#uses=0]
-@n = constant i7 -2		; <i7*> [#uses=0]
-@t = constant i7 -2		; <i7*> [#uses=0]
-@u = constant i7 -64		; <i7*> [#uses=0]
-@v = constant i7 0		; <i7*> [#uses=0]
-@w = constant i7 2		; <i7*> [#uses=0]
-@x = constant i7 1		; <i7*> [#uses=0]
-@y = constant i7 0		; <i7*> [#uses=0]
-@o = constant i7 0		; <i7*> [#uses=0]
-@p = constant i7 -1		; <i7*> [#uses=0]
+
+@b = constant i7 0                                ; <i7*> [#uses=0]
+@q = constant i7 63                               ; <i7*> [#uses=0]
+@c = constant i7 -2                               ; <i7*> [#uses=0]
+@d = constant i7 0                                ; <i7*> [#uses=0]
+@e = constant i7 -1                               ; <i7*> [#uses=0]
+@f = constant i7 1                                ; <i7*> [#uses=0]
+@g = constant i7 3                                ; <i7*> [#uses=0]
+@r = constant i7 5                                ; <i7*> [#uses=0]
+@s = constant i7 5                                ; <i7*> [#uses=0]
+@h = constant i7 undef                            ; <i7*> [#uses=0]
+@i = constant i7 -64                              ; <i7*> [#uses=0]
+@j = constant i7 1                                ; <i7*> [#uses=0]
+@l = constant i7 -1                               ; <i7*> [#uses=0]
+@m2 = constant i7 -1                              ; <i7*> [#uses=0]
+@n = constant i7 -2                               ; <i7*> [#uses=0]
+@t = constant i7 -2                               ; <i7*> [#uses=0]
+@u = constant i7 -64                              ; <i7*> [#uses=0]
+@v = constant i7 0                                ; <i7*> [#uses=0]
+@w = constant i7 2                                ; <i7*> [#uses=0]
+@x = constant i7 1                                ; <i7*> [#uses=0]
+@y = constant i7 0                                ; <i7*> [#uses=0]
+@o = constant i7 0                                ; <i7*> [#uses=0]
+@p = constant i7 -1                               ; <i7*> [#uses=0]
diff --git a/test/Integer/a9.ll.out b/test/Integer/a9.ll.out
index 6525b9aad578..6e38062c4a03 100644
--- a/test/Integer/a9.ll.out
+++ b/test/Integer/a9.ll.out
@@ -1,18 +1,19 @@
 ; ModuleID = '<stdin>'
-@b = constant i9 0		; <i9*> [#uses=0]
-@c = constant i9 -2		; <i9*> [#uses=0]
-@d = constant i9 0		; <i9*> [#uses=0]
-@e = constant i9 -1		; <i9*> [#uses=0]
-@f = constant i9 1		; <i9*> [#uses=0]
-@g = constant i9 3		; <i9*> [#uses=0]
-@h = constant i9 undef		; <i9*> [#uses=0]
-@i = constant i9 -256		; <i9*> [#uses=0]
-@j = constant i9 1		; <i9*> [#uses=0]
-@l = constant i9 -1		; <i9*> [#uses=0]
-@n = constant i9 -2		; <i9*> [#uses=0]
-@q = constant i9 0		; <i9*> [#uses=0]
-@r = constant i9 255		; <i9*> [#uses=0]
-@s = constant i9 0		; <i9*> [#uses=0]
-@t = constant i9 1		; <i9*> [#uses=0]
-@o = constant i9 0		; <i9*> [#uses=0]
-@p = constant i9 -1		; <i9*> [#uses=0]
+
+@b = constant i9 0                                ; <i9*> [#uses=0]
+@c = constant i9 -2                               ; <i9*> [#uses=0]
+@d = constant i9 0                                ; <i9*> [#uses=0]
+@e = constant i9 -1                               ; <i9*> [#uses=0]
+@f = constant i9 1                                ; <i9*> [#uses=0]
+@g = constant i9 3                                ; <i9*> [#uses=0]
+@h = constant i9 undef                            ; <i9*> [#uses=0]
+@i = constant i9 -256                             ; <i9*> [#uses=0]
+@j = constant i9 1                                ; <i9*> [#uses=0]
+@l = constant i9 -1                               ; <i9*> [#uses=0]
+@n = constant i9 -2                               ; <i9*> [#uses=0]
+@q = constant i9 0                                ; <i9*> [#uses=0]
+@r = constant i9 255                              ; <i9*> [#uses=0]
+@s = constant i9 0                                ; <i9*> [#uses=0]
+@t = constant i9 1                                ; <i9*> [#uses=0]
+@o = constant i9 0                                ; <i9*> [#uses=0]
+@p = constant i9 -1                               ; <i9*> [#uses=0]
diff --git a/test/Integer/varargs_bt.ll b/test/Integer/varargs_bt.ll
index e740fd36b47e..25ad58ad7934 100644
--- a/test/Integer/varargs_bt.ll
+++ b/test/Integer/varargs_bt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll; 
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
diff --git a/test/LLVMC/C++/dash-x.cpp b/test/LLVMC/C++/dash-x.cpp
new file mode 100644
index 000000000000..b32400e0b039
--- /dev/null
+++ b/test/LLVMC/C++/dash-x.cpp
@@ -0,0 +1,9 @@
+// Test that we can compile .c files as C++ and vice versa
+// RUN: llvmc %s -x c++ %p/../test_data/false.c -x c %p/../test_data/false.cpp -x lisp -x whatnot -x none %p/../test_data/false2.cpp -o %t
+// RUN: %abs_tmp | grep hello
+
+extern int test_main();
+
+int main() {
+  test_main();
+}
diff --git a/test/LLVMC/C++/dg.exp b/test/LLVMC/C++/dg.exp
new file mode 100644
index 000000000000..fc852e30acf8
--- /dev/null
+++ b/test/LLVMC/C++/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports c++ ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/LLVMC/C++/hello.cpp b/test/LLVMC/C++/hello.cpp
new file mode 100644
index 000000000000..b9c6399ebfc1
--- /dev/null
+++ b/test/LLVMC/C++/hello.cpp
@@ -0,0 +1,8 @@
+// Test that we can compile C++ code.
+// RUN: llvmc %s -o %t
+// RUN: %abs_tmp | grep hello
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/test/LLVMC/C++/together.cpp b/test/LLVMC/C++/together.cpp
new file mode 100644
index 000000000000..e02f69aec8d7
--- /dev/null
+++ b/test/LLVMC/C++/together.cpp
@@ -0,0 +1,9 @@
+// Check that we can compile files of different types together.
+// RUN: llvmc %s %p/../test_data/together.c -o %t
+// RUN: %abs_tmp | grep hello
+
+extern "C" void test();
+
+int main() {
+  test();
+}
diff --git a/test/LLVMC/C/dg.exp b/test/LLVMC/C/dg.exp
new file mode 100644
index 000000000000..a9be28a63cf6
--- /dev/null
+++ b/test/LLVMC/C/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports c ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/LLVMC/C/emit-llvm.c b/test/LLVMC/C/emit-llvm.c
new file mode 100644
index 000000000000..38bbba6f0afc
--- /dev/null
+++ b/test/LLVMC/C/emit-llvm.c
@@ -0,0 +1,4 @@
+// RUN: llvmc -c -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+
+int f0(void) {
+}
diff --git a/test/LLVMC/C/hello.c b/test/LLVMC/C/hello.c
new file mode 100644
index 000000000000..b2d903f8d53f
--- /dev/null
+++ b/test/LLVMC/C/hello.c
@@ -0,0 +1,12 @@
+/*
+ * Check that we can compile helloworld
+ * RUN: llvmc %s -o %t
+ * RUN: %abs_tmp | grep hello
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/test/LLVMC/C/include.c b/test/LLVMC/C/include.c
new file mode 100644
index 000000000000..07ae761d2c88
--- /dev/null
+++ b/test/LLVMC/C/include.c
@@ -0,0 +1,9 @@
+/*
+ * Check that the 'include' options work.
+ * RUN: echo "int x;\n" > %t1.inc
+ * RUN: llvmc -include %t1.inc -fsyntax-only %s
+ */
+
+int f0(void) {
+  return x;
+}
diff --git a/test/LLVMC/C/opt-test.c b/test/LLVMC/C/opt-test.c
new file mode 100644
index 000000000000..d69dc9b479f8
--- /dev/null
+++ b/test/LLVMC/C/opt-test.c
@@ -0,0 +1,12 @@
+/*
+ * Check that the -opt switch works.
+ * RUN: llvmc %s -opt -o %t
+ * RUN: %abs_tmp | grep hello
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/test/LLVMC/C/sink.c b/test/LLVMC/C/sink.c
new file mode 100644
index 000000000000..bdff340da903
--- /dev/null
+++ b/test/LLVMC/C/sink.c
@@ -0,0 +1,12 @@
+/*
+ * Check that the 'sink' options work.
+ * RUN: llvmc -v -Wall %s -o %t |& grep "Wall"
+ * RUN: %abs_tmp | grep hello
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/test/LLVMC/C/wall.c b/test/LLVMC/C/wall.c
new file mode 100644
index 000000000000..f6760990b881
--- /dev/null
+++ b/test/LLVMC/C/wall.c
@@ -0,0 +1,12 @@
+/*
+ * Check that -Wall works as intended
+ * RUN: llvmc -Wall %s -o %t
+ * RUN: %abs_tmp | grep hello
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td
index f4a304730c52..b30f84c6e2a3 100644
--- a/test/LLVMC/EmptyCompilationGraph.td
+++ b/test/LLVMC/EmptyCompilationGraph.td
@@ -1,5 +1,5 @@
 // Check that the compilation graph can be empty.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s
+// RUN: tblgen -I %p/../../include --gen-llvmc %s
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td
index cf2a0e376f9a..694468f2dda8 100644
--- a/test/LLVMC/EnvParentheses.td
+++ b/test/LLVMC/EnvParentheses.td
@@ -1,6 +1,6 @@
 // Check the fix for PR4157.
 // http://llvm.org/bugs/show_bug.cgi?id=4157
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s -o %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: not grep {)));} %t
 
 include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td
index a04a88b002aa..5c69af7d8054 100644
--- a/test/LLVMC/ExternOptions.td
+++ b/test/LLVMC/ExternOptions.td
@@ -1,6 +1,6 @@
 // Check that extern options work.
 // The dummy tool and graph are required to silence warnings.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s -o %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: grep {extern .* AutoGeneratedSwitch_Wall} %t
 
 include "llvm/CompilerDriver/Common.td"
@@ -10,7 +10,7 @@ def OptList : OptionList<[(switch_option "Wall", (extern)),
                           (prefix_list_option "L", (extern))]>;
 
 def dummy_tool : Tool<[
-(cmd_line "dummy_cmd"),
+(cmd_line "dummy_cmd $INFILE"),
 (in_language "dummy"),
 (out_language "dummy"),
 (actions (case
diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td
index 5e5a087c6294..51bd494610ea 100644
--- a/test/LLVMC/ForwardAs.td
+++ b/test/LLVMC/ForwardAs.td
@@ -1,6 +1,6 @@
 // Check the fix for PR4159.
 // http://llvm.org/bugs/show_bug.cgi?id=4159
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s -o %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: grep unique_name %t
 
 include "llvm/CompilerDriver/Common.td"
@@ -8,7 +8,7 @@ include "llvm/CompilerDriver/Common.td"
 def OptList : OptionList<[(parameter_option "dummy", (extern))]>;
 
 def dummy_tool : Tool<[
-(cmd_line "dummy_cmd"),
+(cmd_line "dummy_cmd $INFILE"),
 (in_language "dummy"),
 (out_language "dummy"),
 (actions (case
diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td
index 0f7c3797cb44..3bdb3eeb9d43 100644
--- a/test/LLVMC/HookWithArguments.td
+++ b/test/LLVMC/HookWithArguments.td
@@ -1,5 +1,5 @@
 // Check that hooks with arguments work.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s -o %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: grep {Hook(const char\\* Arg0, const char\\* Arg1, const char\\* Arg2);} %t | count 1
 // RUN: grep "/path" %t | count 1
 // RUN: grep "VARIABLE" %t | count 1
diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td
index d68a115af19d..bd1e0338f755 100644
--- a/test/LLVMC/MultiValuedOption.td
+++ b/test/LLVMC/MultiValuedOption.td
@@ -1,6 +1,6 @@
 // Check that multivalued options work.
 // The dummy tool and graph are required to silence warnings.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s -o %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: grep cl::multi_val(2) %t | count 1
 
 include "llvm/CompilerDriver/Common.td"
@@ -10,7 +10,7 @@ def OptList : OptionList<[
     (parameter_list_option "baz", (multi_val 2), (extern))]>;
 
 def dummy_tool : Tool<[
-(cmd_line "dummy_cmd"),
+(cmd_line "dummy_cmd $INFILE"),
 (in_language "dummy"),
 (out_language "dummy"),
 (actions (case
diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td
index 3305fc6121e2..64dbc9b1845c 100644
--- a/test/LLVMC/MultipleCompilationGraphs.td
+++ b/test/LLVMC/MultipleCompilationGraphs.td
@@ -1,5 +1,5 @@
 // Check that multiple compilation graphs are allowed.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s
+// RUN: tblgen -I %p/../../include --gen-llvmc %s
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td
index 43fd0079eee2..2a4a7495ab0b 100644
--- a/test/LLVMC/NoActions.td
+++ b/test/LLVMC/NoActions.td
@@ -1,10 +1,10 @@
 // Check that tools without associated actions are accepted.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s | grep dummy_tool
+// RUN: tblgen -I %p/../../include --gen-llvmc %s | grep dummy_tool
 
 include "llvm/CompilerDriver/Common.td"
 
 def dummy_tool : Tool<[
-(cmd_line "dummy_cmd"),
+(cmd_line "dummy_cmd $INFILE"),
 (in_language "dummy"),
 (out_language "dummy")
 ]>;
diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td
index 6476a371cfbc..2eea3e98343c 100644
--- a/test/LLVMC/NoCompilationGraph.td
+++ b/test/LLVMC/NoCompilationGraph.td
@@ -1,4 +1,4 @@
 // Check that the compilation graph is not required.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s
+// RUN: tblgen -I %p/../../include --gen-llvmc %s
 
 include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/ObjC++/dg.exp b/test/LLVMC/ObjC++/dg.exp
new file mode 100644
index 000000000000..41c3db2af097
--- /dev/null
+++ b/test/LLVMC/ObjC++/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports obj-c++ ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{mm}]]
+}
diff --git a/test/LLVMC/ObjC++/hello.mm b/test/LLVMC/ObjC++/hello.mm
new file mode 100644
index 000000000000..2125dc76b722
--- /dev/null
+++ b/test/LLVMC/ObjC++/hello.mm
@@ -0,0 +1,8 @@
+// Test that we can compile Objective-C++ code.
+// RUN: llvmc %s -o %t
+// RUN: %abs_tmp | grep hello
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/test/LLVMC/ObjC/dg.exp b/test/LLVMC/ObjC/dg.exp
new file mode 100644
index 000000000000..18f73a797879
--- /dev/null
+++ b/test/LLVMC/ObjC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports objc ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{m}]]
+}
diff --git a/test/LLVMC/ObjC/hello.m b/test/LLVMC/ObjC/hello.m
new file mode 100644
index 000000000000..b2d903f8d53f
--- /dev/null
+++ b/test/LLVMC/ObjC/hello.m
@@ -0,0 +1,12 @@
+/*
+ * Check that we can compile helloworld
+ * RUN: llvmc %s -o %t
+ * RUN: %abs_tmp | grep hello
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td
index f27ae9707d70..38b7eb7dffea 100644
--- a/test/LLVMC/OneOrMore.td
+++ b/test/LLVMC/OneOrMore.td
@@ -1,6 +1,6 @@
 // Check that (one_or_more) and (zero_or_one) properties work.
 // The dummy tool and graph are required to silence warnings.
-// RUN: tblgen -I $srcroot/include --gen-llvmc %s -o %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: grep cl::ZeroOrOne %t | count 1
 // RUN: grep cl::OneOrMore %t | count 1
 
@@ -11,7 +11,7 @@ def OptList : OptionList<[
     (parameter_list_option "baz", (zero_or_one))]>;
 
 def dummy_tool : Tool<[
-(cmd_line "dummy_cmd"),
+(cmd_line "dummy_cmd $INFILE"),
 (in_language "dummy"),
 (out_language "dummy"),
 (actions (case
diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td
index bf2173c7938d..1a4064e44b27 100644
--- a/test/LLVMC/TestWarnings.td
+++ b/test/LLVMC/TestWarnings.td
@@ -1,6 +1,6 @@
 // Check that the compiler warns about unused options.
 // This should fail because the output is printed on stderr.
-// RUN: ignore tblgen -I $srcroot/include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
+// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/dg.exp b/test/LLVMC/dg.exp
new file mode 100644
index 000000000000..f7d275ad8cb1
--- /dev/null
+++ b/test/LLVMC/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{td}]]
diff --git a/test/LLVMC/test_data/false.c b/test/LLVMC/test_data/false.c
new file mode 100644
index 000000000000..3e4e8a7e9280
--- /dev/null
+++ b/test/LLVMC/test_data/false.c
@@ -0,0 +1,10 @@
+#include <iostream>
+
+extern "C" void test();
+extern std::string test2();
+
+int test_main() {
+  std::cout << "h";
+  test();
+  std::cout << test2() << '\n';
+}
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index 68812467e781..af0e6434fb1c 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -2,8 +2,8 @@
 ; one...
 
 ; RUN: echo {define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
-; RUN: llvm-as %s -o %t.2.bc -f
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep @foo() | grep -v internal
+; RUN: llvm-as %s -o %t.2.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep @foo() | grep -v internal
 
 define i32 @foo() { ret i32 0 }
 
diff --git a/test/Linker/2003-01-30-LinkerTypeRename.ll b/test/Linker/2003-01-30-LinkerTypeRename.ll
index 288daf1cba1c..67a0626ec037 100644
--- a/test/Linker/2003-01-30-LinkerTypeRename.ll
+++ b/test/Linker/2003-01-30-LinkerTypeRename.ll
@@ -3,7 +3,7 @@
 
 ; RUN: echo {%Ty = type opaque @GV = external global %Ty*} | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep {%Ty } | not grep opaque
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {%Ty } | not grep opaque
 
 %Ty = type i32
 
diff --git a/test/Linker/2003-04-21-Linkage.ll b/test/Linker/2003-04-21-Linkage.ll
index 31aace8e44c4..f6d4c4b03b7f 100644
--- a/test/Linker/2003-04-21-Linkage.ll
+++ b/test/Linker/2003-04-21-Linkage.ll
@@ -1,6 +1,6 @@
 ; RUN: echo {@X = linkonce global i32 5 \
 ; RUN:   define linkonce i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
-; RUN: llvm-as %s -o %t.2.bc -f
+; RUN: llvm-as %s -o %t.2.bc
 ; RUN: llvm-link %t.1.bc  %t.2.bc
 @X = external global i32 
 
diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll
index d0858d95ab1c..beaf6ec52176 100644
--- a/test/Linker/2003-04-23-LinkOnceLost.ll
+++ b/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -2,9 +2,9 @@
 ; one...
 
 ; RUN: echo { define linkonce void @foo() \{ ret void \} } | \
-; RUN:   llvm-as -o %t.2.bc -f
-; RUN: llvm-as %s -o %t.1.bc -f
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep foo | grep linkonce
+; RUN:   llvm-as -o %t.2.bc
+; RUN: llvm-as %s -o %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep foo | grep linkonce
 
 declare void @foo()
 
diff --git a/test/Linker/2003-04-26-NullPtrLinkProblem.ll b/test/Linker/2003-04-26-NullPtrLinkProblem.ll
index df12fb3a7a57..54ba05153f49 100644
--- a/test/Linker/2003-04-26-NullPtrLinkProblem.ll
+++ b/test/Linker/2003-04-26-NullPtrLinkProblem.ll
@@ -2,7 +2,7 @@
 ; the same type to be created!
 
 ; RUN: echo {%T = type i32} | llvm-as > %t.2.bc
-; RUN: llvm-as %s -f -o %t.1.bc
+; RUN: llvm-as %s -o %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
 %T = type opaque
diff --git a/test/Linker/2003-05-15-TypeProblem.ll b/test/Linker/2003-05-15-TypeProblem.ll
index e914a69e34fb..18fcea00a13c 100644
--- a/test/Linker/2003-05-15-TypeProblem.ll
+++ b/test/Linker/2003-05-15-TypeProblem.ll
@@ -1,10 +1,10 @@
 ; This one fails because the LLVM runtime is allowing two null pointers of
 ; the same type to be created!
 
-; RUN: echo {%S = type \{ %T*\} %T = type opaque} | llvm-as > %t.2.bc
+; RUN: echo {%M = type \{ %N*\} %N = type opaque} | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
-%S = type { i32* }
-%T = type i32
+%M = type { i32* }
+%N = type i32
 
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index c3661ae9aa3b..498fc14b3538 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -6,7 +6,7 @@
 
 ; RUN: echo { define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep internal | not grep @foo(
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep @foo(
 
 declare i32 @foo() 
 
diff --git a/test/Linker/2003-08-20-OpaqueTypeResolve.ll b/test/Linker/2003-08-20-OpaqueTypeResolve.ll
index a4d4bd543abe..c0fc620cfa67 100644
--- a/test/Linker/2003-08-20-OpaqueTypeResolve.ll
+++ b/test/Linker/2003-08-20-OpaqueTypeResolve.ll
@@ -1,8 +1,8 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo {%S = type \{ i32, i32* \} } | llvm-as > %t.out2.bc
+; RUN: echo {%M = type \{ i32, i32* \} } | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out1.bc %t.out2.bc
 
-%S = type { i32, %T* }
-%T = type opaque
+%M = type { i32, %N* }
+%N = type opaque
 
-;%X = global { int, %T* } { int 5, %T* null }
+;%X = global { int, %N* } { int 5, %N* null }
diff --git a/test/Linker/2003-08-23-GlobalVarLinking.ll b/test/Linker/2003-08-23-GlobalVarLinking.ll
index fd36d0422a69..c3f61f893456 100644
--- a/test/Linker/2003-08-23-GlobalVarLinking.ll
+++ b/test/Linker/2003-08-23-GlobalVarLinking.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s > %t.out1.bc
 ; RUN: echo {@S = external global \{ i32, opaque* \} declare void @F(opaque*)}\
 ; RUN:   | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc | llvm-dis | not grep opaque
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque
 
 ; After linking this testcase, there should be no opaque types left.  The two
 ; S's should cause the opaque type to be resolved to 'int'.
diff --git a/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll b/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
index 5041467604ae..ea8207530794 100644
--- a/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
+++ b/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
@@ -2,8 +2,8 @@
 ; net.
 
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo "%S = type { %S*, i32* }" | llvm-as > %t.out2.bc
+; RUN: echo "%M = type { %M*, i32* }" | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out1.bc %t.out2.bc
 
-%S = type { %S*, opaque* }
+%M = type { %M*, opaque* }
 
diff --git a/test/Linker/2003-08-28-TypeResolvesGlobal.ll b/test/Linker/2003-08-28-TypeResolvesGlobal.ll
index 5526b87ce77d..80b616269940 100644
--- a/test/Linker/2003-08-28-TypeResolvesGlobal.ll
+++ b/test/Linker/2003-08-28-TypeResolvesGlobal.ll
@@ -1,12 +1,12 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo "%S = type i32" | llvm-as > %t.out2.bc
+; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out2.bc %t.out1.bc
 
-%S = type opaque
+%M = type opaque
 
 define void @foo(i32* %V) {
 	ret void
 }
 
-declare void @foo.upgrd.1(%S*)
+declare void @foo.upgrd.1(%M*)
 
diff --git a/test/Linker/2003-08-28-TypeResolvesGlobal2.ll b/test/Linker/2003-08-28-TypeResolvesGlobal2.ll
index 3f306b167b1d..601b917210d5 100644
--- a/test/Linker/2003-08-28-TypeResolvesGlobal2.ll
+++ b/test/Linker/2003-08-28-TypeResolvesGlobal2.ll
@@ -1,17 +1,17 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo "%S = type i32" | llvm-as > %t.out2.bc
+; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out2.bc %t.out1.bc
 
-%S = type opaque
+%M = type opaque
 
 define void @foo(i32* %V) {
 	ret void
 }
 
-declare void @foo.upgrd.1(%S*)
+declare void @foo.upgrd.1(%M*)
 
 define void @other() {
-	call void @foo.upgrd.1( %S* null )
+	call void @foo.upgrd.1( %M* null )
 	call void @foo( i32* null )
 	ret void
 }
diff --git a/test/Linker/2003-08-28-TypeResolvesGlobal3.ll b/test/Linker/2003-08-28-TypeResolvesGlobal3.ll
index 38b7851ab860..f77d9e6d3b96 100644
--- a/test/Linker/2003-08-28-TypeResolvesGlobal3.ll
+++ b/test/Linker/2003-08-28-TypeResolvesGlobal3.ll
@@ -1,15 +1,15 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo "%S = type i32" | llvm-as > %t.out2.bc
+; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out2.bc %t.out1.bc
 
-%S = type opaque
+%M = type opaque
 
 ; GLobal using the resolved function prototype
-global void (%S*)* @foo		; <void (%S*)**>:0 [#uses=0]
+global void (%M*)* @foo		; <void (%M*)**>:0 [#uses=0]
 
 define void @foo.upgrd.1(i32* %V) {
 	ret void
 }
 
-declare void @foo(%S*)
+declare void @foo(%M*)
 
diff --git a/test/Linker/2003-10-21-ConflictingTypesTolerance.ll b/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
index 4f98a2003253..7cdf7ad0dada 100644
--- a/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
+++ b/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo { %S = type \[8 x i32\] external global %S } | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc | llvm-dis | grep %S | grep \\{
-%S = type { i32 }
+; RUN: echo { %M = type \[8 x i32\] external global %M } | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep %M | grep \\{
+%M = type { i32 }
 
 
diff --git a/test/Linker/2004-02-17-WeakStrongLinkage.ll b/test/Linker/2004-02-17-WeakStrongLinkage.ll
index 0e970ddb489a..224463949d35 100644
--- a/test/Linker/2004-02-17-WeakStrongLinkage.ll
+++ b/test/Linker/2004-02-17-WeakStrongLinkage.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s > %t.out2.bc
 ; RUN: echo "@me = global i32* null" | llvm-as > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -o /dev/null -f
+; RUN: llvm-link %t.out1.bc %t.out2.bc -o /dev/null
 
 @me = weak global i32* null		; <i32**> [#uses=0]
 
diff --git a/test/Linker/2004-05-07-TypeResolution1.ll b/test/Linker/2004-05-07-TypeResolution1.ll
index 36651541e4d7..f0ade337138a 100644
--- a/test/Linker/2004-05-07-TypeResolution1.ll
+++ b/test/Linker/2004-05-07-TypeResolution1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as %s -f -o %t1.bc
-; RUN: llvm-as < %p/2004-05-07-TypeResolution2.ll -o %t2.bc -f
-; RUN: llvm-link -f -o %t3.bc %t1.bc %t2.bc
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as < %p/2004-05-07-TypeResolution2.ll -o %t2.bc
+; RUN: llvm-link -o %t3.bc %t1.bc %t2.bc
 
 target datalayout = "e-p:32:32"
 	%myint = type opaque
diff --git a/test/Linker/2004-12-03-DisagreeingType.ll b/test/Linker/2004-12-03-DisagreeingType.ll
index 7378fdd35795..570bda87e2c0 100644
--- a/test/Linker/2004-12-03-DisagreeingType.ll
+++ b/test/Linker/2004-12-03-DisagreeingType.ll
@@ -1,7 +1,7 @@
 ; RUN: echo {@G = weak global \{\{\{\{double\}\}\}\} zeroinitializer } | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc | llvm-dis | not grep {\\}}
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep {\\}}
 
 ; When linked, the global above should be eliminated, being merged with the 
 ; global below.
diff --git a/test/Linker/2005-02-12-ConstantGlobals-2.ll b/test/Linker/2005-02-12-ConstantGlobals-2.ll
index bedeb5106950..2ceae3146f27 100644
--- a/test/Linker/2005-02-12-ConstantGlobals-2.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals-2.ll
@@ -3,6 +3,6 @@
 
 ; RUN: echo {@X = external constant i32} | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep {global i32 7}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7}
 
 @X = global i32 7
diff --git a/test/Linker/2005-02-12-ConstantGlobals.ll b/test/Linker/2005-02-12-ConstantGlobals.ll
index 407737287369..60f176b05341 100644
--- a/test/Linker/2005-02-12-ConstantGlobals.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals.ll
@@ -3,6 +3,6 @@
 
 ; RUN: echo {@X = global i32 7} | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep {global i32 7}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7}
 
 @X = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
index 2637da17f3f9..7d1020ddf28e 100644
--- a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
+++ b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
@@ -1,7 +1,7 @@
 ; RUN: echo { @G = appending global \[0 x i32\] zeroinitializer } | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc | llvm-dis | grep {@G =}
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep {@G =}
 
 ; When linked, the globals should be merged, and the result should still 
 ; be named '@G'.
diff --git a/test/Linker/2006-01-19-ConstantPacked.ll b/test/Linker/2006-01-19-ConstantPacked.ll
index d7d864b41b16..d2409e20c4d9 100644
--- a/test/Linker/2006-01-19-ConstantPacked.ll
+++ b/test/Linker/2006-01-19-ConstantPacked.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as %s -f -o %t1.bc
-; RUN: llvm-link -f -o %t2.bc %t1.bc
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-link -o %t2.bc %t1.bc
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin7.7.0"
diff --git a/test/Linker/2006-06-15-GlobalVarAlignment.ll b/test/Linker/2006-06-15-GlobalVarAlignment.ll
index 6e6d56a039ec..df3284bedea8 100644
--- a/test/Linker/2006-06-15-GlobalVarAlignment.ll
+++ b/test/Linker/2006-06-15-GlobalVarAlignment.ll
@@ -2,6 +2,6 @@
 
 ; RUN: echo {@X = global i32 7, align 8} | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep {align 8}
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {align 8}
 
 @X = weak global i32 7, align 4
diff --git a/test/Linker/2008-03-05-AliasReference.ll b/test/Linker/2008-03-05-AliasReference.ll
index 1663b00845e5..7c19dfa15a05 100644
--- a/test/Linker/2008-03-05-AliasReference.ll
+++ b/test/Linker/2008-03-05-AliasReference.ll
@@ -1,7 +1,7 @@
 ; PR2054
-; RUN: llvm-as %s -o %t1.bc -f
-; RUN: llvm-as %p/2008-03-05-AliasReference2.ll -o %t2.bc -f
-; RUN: llvm-link %t2.bc %t1.bc -f -o %t3.bc
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/2008-03-05-AliasReference2.ll -o %t2.bc
+; RUN: llvm-link %t2.bc %t1.bc -o %t3.bc
 
 ; ModuleID = 'bug.o'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Linker/2008-06-13-LinkOnceRedefinition.ll b/test/Linker/2008-06-13-LinkOnceRedefinition.ll
index 3478880ebdac..49da96af949a 100644
--- a/test/Linker/2008-06-13-LinkOnceRedefinition.ll
+++ b/test/Linker/2008-06-13-LinkOnceRedefinition.ll
@@ -1,8 +1,8 @@
 ; Test linking two functions with different prototypes and two globals 
 ; in different modules.
-; RUN: llvm-as %s -o %t.foo1.bc -f
-; RUN: llvm-as %s -o %t.foo2.bc -f
-; RUN: echo {define linkonce void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc -f
-; RUN: llvm-link %t.foo1.bc %t.foo2.bc | llvm-dis
-; RUN: llvm-link %t.foo1.bc %t.foo3.bc | llvm-dis
+; RUN: llvm-as %s -o %t.foo1.bc
+; RUN: llvm-as %s -o %t.foo2.bc
+; RUN: echo {define linkonce void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S
+; RUN: llvm-link %t.foo1.bc %t.foo3.bc -S
 define linkonce void @foo() { ret void }
diff --git a/test/Linker/2008-06-26-AddressSpace.ll b/test/Linker/2008-06-26-AddressSpace.ll
index 7f2110628e08..e3ed385b68a1 100644
--- a/test/Linker/2008-06-26-AddressSpace.ll
+++ b/test/Linker/2008-06-26-AddressSpace.ll
@@ -1,9 +1,9 @@
 ; Test linking two functions with different prototypes and two globals 
 ; in different modules.
-; RUN: llvm-as %s -o %t.foo1.bc -f
-; RUN: echo | llvm-as -o %t.foo2.bc -f
-; RUN: llvm-link %t.foo2.bc %t.foo1.bc | llvm-dis | grep {addrspace(2)}
-; RUN: llvm-link %t.foo1.bc %t.foo2.bc | llvm-dis | grep {addrspace(2)}
+; RUN: llvm-as %s -o %t.foo1.bc
+; RUN: echo | llvm-as -o %t.foo2.bc
+; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep {addrspace(2)}
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep {addrspace(2)}
 ; rdar://6038021
 
 @G = addrspace(2) global i32 256 
diff --git a/test/Linker/2008-07-06-AliasFnDecl.ll b/test/Linker/2008-07-06-AliasFnDecl.ll
index dca9cd8e8fa4..8e8c8454d941 100644
--- a/test/Linker/2008-07-06-AliasFnDecl.ll
+++ b/test/Linker/2008-07-06-AliasFnDecl.ll
@@ -1,7 +1,7 @@
 ; PR2146
-; RUN: llvm-as %s -o %t1.bc -f
-; RUN: llvm-as %p/2008-07-06-AliasFnDecl2.ll -o %t2.bc -f
-; RUN: llvm-link %t1.bc %t2.bc -f -o %t3.bc
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/2008-07-06-AliasFnDecl2.ll -o %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc
 
 @b = alias void ()* @a
 
diff --git a/test/Linker/2008-07-06-AliasWeakDest.ll b/test/Linker/2008-07-06-AliasWeakDest.ll
index af8964064c9a..e631175444c0 100644
--- a/test/Linker/2008-07-06-AliasWeakDest.ll
+++ b/test/Linker/2008-07-06-AliasWeakDest.ll
@@ -1,8 +1,8 @@
 ; PR2463
-; RUN: llvm-as %s -o %t1.bc -f
-; RUN: llvm-as %p/2008-07-06-AliasWeakDest2.ll -o %t2.bc -f
-; RUN: llvm-link %t1.bc %t2.bc -f -o %t3.bc
-; RUN: llvm-link %t2.bc %t1.bc -f -o %t4.bc
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/2008-07-06-AliasWeakDest2.ll -o %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc
+; RUN: llvm-link %t2.bc %t1.bc -o %t4.bc
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Linker/2009-09-03-mdnode.ll b/test/Linker/2009-09-03-mdnode.ll
new file mode 100644
index 000000000000..11862f70b293
--- /dev/null
+++ b/test/Linker/2009-09-03-mdnode.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/2009-09-03-mdnode2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc
+
+declare void @f() nounwind
+
+define i32 @main(...) nounwind {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  call void @llvm.dbg.func.start(metadata !0)
+  store i32 0, i32* %retval
+  call void @llvm.dbg.stoppoint(i32 4, i32 5, metadata !1)
+  call void @f()
+  br label %return
+
+return:                                           ; preds = %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  call void @llvm.dbg.stoppoint(i32 5, i32 1, metadata !1)
+  call void @llvm.dbg.region.end(metadata !0)
+  ret i32 %0
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 2, null, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"a.c", metadata !"/home/rich/ellcc/test/source", metadata !"ellcc 0.1.0", i1 true, i1 true, metadata !"", i32 0}
diff --git a/test/Linker/2009-09-03-mdnode2.ll b/test/Linker/2009-09-03-mdnode2.ll
new file mode 100644
index 000000000000..21589a49b79e
--- /dev/null
+++ b/test/Linker/2009-09-03-mdnode2.ll
@@ -0,0 +1,25 @@
+; This file is used by 2009-09-03-mdnode.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+define i32 @f(...) nounwind {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  call void @llvm.dbg.func.start(metadata !0)
+  br label %return
+
+return:                                           ; preds = %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  call void @llvm.dbg.stoppoint(i32 3, i32 1, metadata !1)
+  call void @llvm.dbg.region.end(metadata !0)
+  ret i32 %0
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"f", metadata !1, i32 1, null, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"b.c", metadata !"/home/rich/ellcc/test/source", metadata !"ellcc 0.1.0", i1 true, i1 true, metadata !"", i32 0}
diff --git a/test/Linker/AppendingLinkage.ll b/test/Linker/AppendingLinkage.ll
index da08ca098893..134a42ef215b 100644
--- a/test/Linker/AppendingLinkage.ll
+++ b/test/Linker/AppendingLinkage.ll
@@ -3,7 +3,7 @@
 ; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep 7 | grep 4 | grep 8
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 4 | grep 8
 
 @X = appending global [2 x i32] [ i32 7, i32 4 ]		; <[2 x i32]*> [#uses=2]
 @Y = global i32* getelementptr ([2 x i32]* @X, i64 0, i64 0)		; <i32**> [#uses=0]
diff --git a/test/Linker/AppendingLinkage2.ll b/test/Linker/AppendingLinkage2.ll
index fddc4941be97..2c1302f39b1f 100644
--- a/test/Linker/AppendingLinkage2.ll
+++ b/test/Linker/AppendingLinkage2.ll
@@ -3,6 +3,6 @@
 ; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep 7 | grep 8
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 8
 
 @X = appending global [1 x i32] [ i32 7 ]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/ConstantGlobals1.ll b/test/Linker/ConstantGlobals1.ll
index 5d42f4bf5483..8fdbe508db7f 100644
--- a/test/Linker/ConstantGlobals1.ll
+++ b/test/Linker/ConstantGlobals1.ll
@@ -3,7 +3,7 @@
 ; RUN: echo {@X = constant \[1 x i32\] \[i32 8\] } | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
 
 @X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
 
diff --git a/test/Linker/ConstantGlobals2.ll b/test/Linker/ConstantGlobals2.ll
index 9cd6bdb8c49a..ad4428b95223 100644
--- a/test/Linker/ConstantGlobals2.ll
+++ b/test/Linker/ConstantGlobals2.ll
@@ -3,7 +3,7 @@
 ; RUN: echo {@X = external global \[1 x i32\] } | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
 
 @X = constant [1 x i32] [ i32 12 ]		; <[1 x i32]*> [#uses=0]
 
diff --git a/test/Linker/ConstantGlobals3.ll b/test/Linker/ConstantGlobals3.ll
index f9aa07d1bc50..e25529ae1bfc 100644
--- a/test/Linker/ConstantGlobals3.ll
+++ b/test/Linker/ConstantGlobals3.ll
@@ -3,6 +3,6 @@
 ; RUN: echo {@X = external constant \[1 x i32\] } | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
 
 @X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/LinkOnce.ll b/test/Linker/LinkOnce.ll
index 5befd77a4aaa..56633fb8da7b 100644
--- a/test/Linker/LinkOnce.ll
+++ b/test/Linker/LinkOnce.ll
@@ -3,6 +3,6 @@
 
 ; RUN: echo "@X = linkonce global i32 8" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc | llvm-dis
+; RUN: llvm-link %t.1.bc %t.2.bc -S
 
 @X = linkonce global i32 7		; <i32*> [#uses=0]
diff --git a/test/Linker/basiclink.ll b/test/Linker/basiclink.ll
index 711a0f4715ed..afe0320ba92f 100644
--- a/test/Linker/basiclink.ll
+++ b/test/Linker/basiclink.ll
@@ -1,10 +1,10 @@
 ; Test linking two functions with different prototypes and two globals 
 ; in different modules. This is for PR411
-; RUN: llvm-as %s -o %t.bar.bc -f
+; RUN: llvm-as %s -o %t.bar.bc
 ; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
-; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc -f
-; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc -f
-; RUN: llvm-link %t.foo.bc %t.bar.bc -o %t.bc -f
+; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
+; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc
+; RUN: llvm-link %t.foo.bc %t.bar.bc -o %t.bc
 declare i32* @foo(...)
 define i32* @bar() {
 	%ret = call i32* (...)* @foo( i32 123 )
diff --git a/test/Linker/link-archive.ll b/test/Linker/link-archive.ll
index 33088c09c37b..6696fcc68c37 100644
--- a/test/Linker/link-archive.ll
+++ b/test/Linker/link-archive.ll
@@ -1,8 +1,8 @@
 ; Test linking of a bc file to an archive via llvm-ld. 
 ; PR1434
-; RUN: llvm-as %s -o %t.bar.bc -f
+; RUN: llvm-as %s -o %t.bar.bc
 ; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
-; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc -f
+; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
 ; RUN: llvm-ar rcf %t.foo.a %t.foo.bc
 ; RUN: llvm-ar rcf %t.bar.a %t.bar.bc
 ; RUN: llvm-ld -disable-opt %t.bar.bc %t.foo.a -o %t.bc 
diff --git a/test/Linker/link-global-to-func.ll b/test/Linker/link-global-to-func.ll
index f9cbc46f7333..2fc501dedc90 100644
--- a/test/Linker/link-global-to-func.ll
+++ b/test/Linker/link-global-to-func.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as %s -o %t1.bc -f
-; RUN: echo {declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }} | llvm-as -o %t2.bc -f
-; RUN: llvm-link %t2.bc %t1.bc -o - | llvm-dis | grep __eprintf
-; RUN: llvm-link %t1.bc %t2.bc -o - | llvm-dis | grep __eprintf
+; RUN: llvm-as %s -o %t1.bc
+; RUN: echo {declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }} | llvm-as -o %t2.bc
+; RUN: llvm-link %t2.bc %t1.bc -S | grep __eprintf
+; RUN: llvm-link %t1.bc %t2.bc -S | grep __eprintf
 
 ; rdar://6072702
 
diff --git a/test/Linker/link-messages.ll b/test/Linker/link-messages.ll
index f0f10aa83267..920782d15bb6 100644
--- a/test/Linker/link-messages.ll
+++ b/test/Linker/link-messages.ll
@@ -1,7 +1,7 @@
 ; Test that linking two files with the same definition causes an error and
 ; that error is printed out.
-; RUN: llvm-as %s -o %t.one.bc -f
-; RUN: llvm-as %s -o %t.two.bc -f
+; RUN: llvm-as %s -o %t.one.bc
+; RUN: llvm-as %s -o %t.two.bc
 ; RUN: not llvm-ld -disable-opt -link-as-library %t.one.bc %t.two.bc \
 ; RUN:   -o %t.bc 2>%t.err 
 ; RUN: grep "symbol multiply defined" %t.err
diff --git a/test/Linker/linkmdnode.ll b/test/Linker/linkmdnode.ll
new file mode 100644
index 000000000000..be7455056cd9
--- /dev/null
+++ b/test/Linker/linkmdnode.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/linkmdnode2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc
+
+
+!21 = metadata !{i32 42, metadata !"foobar"}
+
+declare i8 @llvm.something(metadata %a)
+define void @foo() {
+  %x = call i8 @llvm.something(metadata !21)
+  ret void
+}
diff --git a/test/Linker/linkmdnode2.ll b/test/Linker/linkmdnode2.ll
new file mode 100644
index 000000000000..54a5a578b60b
--- /dev/null
+++ b/test/Linker/linkmdnode2.ll
@@ -0,0 +1,12 @@
+; This file is used by linkmdnode.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+!22 = metadata !{i32 42, metadata !"foobar"}
+
+declare i8 @llvm.something(metadata %a)
+define void @foo1() {
+  ;; Intrinsic using MDNode and MDString
+  %x = call i8 @llvm.something(metadata !22)
+  ret void
+}
diff --git a/test/Linker/linknamedmdnode.ll b/test/Linker/linknamedmdnode.ll
new file mode 100644
index 000000000000..e6b779f1fc5d
--- /dev/null
+++ b/test/Linker/linknamedmdnode.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/linknamedmdnode2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc -S | grep "!llvm.stuff = !{!0, !1}"
+
+!0 = metadata !{i32 42}
+!llvm.stuff = !{!0}
diff --git a/test/Linker/linknamedmdnode2.ll b/test/Linker/linknamedmdnode2.ll
new file mode 100644
index 000000000000..d16f62abed33
--- /dev/null
+++ b/test/Linker/linknamedmdnode2.ll
@@ -0,0 +1,6 @@
+; This file is used by linknamedmdnode.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+!0 = metadata !{i32 41}
+!llvm.stuff = !{!0}
diff --git a/test/Linker/partial-type-refinement-link.ll b/test/Linker/partial-type-refinement-link.ll
new file mode 100644
index 000000000000..320ef969f83a
--- /dev/null
+++ b/test/Linker/partial-type-refinement-link.ll
@@ -0,0 +1,20 @@
+; This file is used by first.ll, so it doesn't actually do anything itself
+; RUN: true
+
+%AnalysisResolver = type { i8, %PMDataManager* }
+%"DenseMap<P*,AU*>" = type { i64, %"pair<P*,AU*>"*, i64, i64 }
+%PMDataManager = type { i8, %PMTopLevelManager*, i8, i8, i8, i8, i8, i64, i8 }
+%PMTopLevelManager = type { i8, i8, i8, i8, i8, i8, i8, i8, %"DenseMap<P*,AU*>" }
+%P = type { i8, %AnalysisResolver*, i64 }
+%PI = type { i8, i8, i8, i8, i8, i8, %"vector<const PI*>", %P* }
+%"SmallVImpl<const PI*>" = type { i8, %PI* }
+%"_V_base<const PI*>" = type { %"_V_base<const PI*>::_V_impl" }
+%"_V_base<const PI*>::_V_impl" = type { %PI*, i8, i8 }
+%"pair<P*,AU*>" = type opaque
+%"vector<const PI*>" = type { %"_V_base<const PI*>" }
+
+define void @f(%"SmallVImpl<const PI*>"* %this) {
+entry:
+  %x = getelementptr inbounds %"SmallVImpl<const PI*>"* %this, i64 0, i32 1
+  ret void
+}
diff --git a/test/Linker/partial-type-refinement.ll b/test/Linker/partial-type-refinement.ll
new file mode 100644
index 000000000000..b995f11533fe
--- /dev/null
+++ b/test/Linker/partial-type-refinement.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-link %s %p/partial-type-refinement-link.ll -S | FileCheck %s
+; PR4954
+
+; CHECK: load %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
+
+%AnalysisResolver = type { i8, %PMDataManager* }
+%"DenseMap<P*,AU*>" = type { i64, %"pair<P*,AU*>"*, i64, i64 }
+%PMDataManager = type { i8, %PMTopLevelManager*, i8, i8, i8, i8, i8, i64, i8 }
+%PMTopLevelManager = type { i8, i8, i8, i8, i8, i8, i8, i8, %"DenseMap<P*,AU*>" }
+%P = type { i8, %AnalysisResolver*, i64 }
+%PI = type { i8, i8, i8, i8, i8, i8, %"vector<const PI*>", %P* }
+%"RegisterP<LowerArrayLength>" = type { %PI }
+%"_V_base<const PI*>" = type { %"_V_base<const PI*>::_V_impl" }
+%"_V_base<const PI*>::_V_impl" = type { %PI*, i8, i8 }
+%"pair<P*,AU*>" = type opaque
+%"vector<const PI*>" = type { %"_V_base<const PI*>" }
+
+@_ZN3mvmL1XE = external global %"RegisterP<LowerArrayLength>"
+
+define void @__tcf_0() nounwind {
+entry:
+  %0 = load %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
+  ret void
+}
diff --git a/test/Linker/redefinition.ll b/test/Linker/redefinition.ll
index 15d03bce29a0..0d056891d5b2 100644
--- a/test/Linker/redefinition.ll
+++ b/test/Linker/redefinition.ll
@@ -1,8 +1,8 @@
 ; Test linking two functions with different prototypes and two globals 
 ; in different modules.
-; RUN: llvm-as %s -o %t.foo1.bc -f
-; RUN: llvm-as %s -o %t.foo2.bc -f
-; RUN: echo {define void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc -f
+; RUN: llvm-as %s -o %t.foo1.bc
+; RUN: llvm-as %s -o %t.foo2.bc
+; RUN: echo {define void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc
 ; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc |& \
 ; RUN:   grep {symbol multiply defined}
 ; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc |& \
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index edb23bc4b707..aa38b1264c3e 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/testlink1.ll > %t2.bc
-; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc -f
+; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
 ; RUN: llvm-dis < %t1.bc | grep {kallsyms_names = extern_weak}
 ; RUN: llvm-dis < %t1.bc | grep {MyVar = external global i32}
 ; RUN: llvm-dis < %t1.bc | grep {Inte = global i32}
diff --git a/test/MC/AsmParser/ARM/arm_word_directive.s b/test/MC/AsmParser/ARM/arm_word_directive.s
new file mode 100644
index 000000000000..78336913169f
--- /dev/null
+++ b/test/MC/AsmParser/ARM/arm_word_directive.s
@@ -0,0 +1,6 @@
+@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s
+
+@ CHECK: TEST0:
+@ CHECK: .long 3
+TEST0:  
+        .word 3
diff --git a/test/MC/AsmParser/ARM/dg.exp b/test/MC/AsmParser/ARM/dg.exp
new file mode 100644
index 000000000000..3ff359aab39b
--- /dev/null
+++ b/test/MC/AsmParser/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/MC/AsmParser/X86/dg.exp b/test/MC/AsmParser/X86/dg.exp
new file mode 100644
index 000000000000..629a14773615
--- /dev/null
+++ b/test/MC/AsmParser/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s
new file mode 100644
index 000000000000..4c5b698d3fc6
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_instructions.s
@@ -0,0 +1,58 @@
+// FIXME: Switch back to FileCheck once we print actual instructions
+        
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
+
+// CHECK: subb %al, %al
+        subb %al, %al
+
+// CHECK: addl $24, %eax
+        addl $24, %eax
+
+// CHECK: movl %eax, 10(%ebp)
+        movl %eax, 10(%ebp)
+// CHECK: movl %eax, 10(%ebp,%ebx)
+        movl %eax, 10(%ebp, %ebx)
+// CHECK: movl %eax, 10(%ebp,%ebx,4)
+        movl %eax, 10(%ebp, %ebx, 4)
+// CHECK: movl %eax, 10(,%ebx,4)
+        movl %eax, 10(, %ebx, 4)
+
+// FIXME: Check that this matches SUB32ri8
+// CHECK: subl $1, %eax
+        subl $1, %eax
+        
+// FIXME: Check that this matches SUB32ri8
+// CHECK: subl $-1, %eax
+        subl $-1, %eax
+        
+// FIXME: Check that this matches SUB32ri
+// CHECK: subl $256, %eax
+        subl $256, %eax
+
+// FIXME: Check that this matches XOR64ri8
+// CHECK: xorq $1, %rax
+        xorq $1, %rax
+        
+// FIXME: Check that this matches XOR64ri32
+// CHECK: xorq $256, %rax
+        xorq $256, %rax
+
+// FIXME: Check that this matches SUB8rr
+// CHECK: subb %al, %bl
+        subb %al, %bl
+
+// FIXME: Check that this matches SUB16rr
+// CHECK: subw %ax, %bx
+        subw %ax, %bx
+        
+// FIXME: Check that this matches SUB32rr
+// CHECK: subl %eax, %ebx
+        subl %eax, %ebx
+        
+// FIXME: Check that this matches the correct instruction.
+// CHECK: call *%rax
+        call *%rax
+
+// FIXME: Check that this matches the correct instruction.
+// CHECK: shldl %cl, %eax, %ebx
+        shldl %cl, %eax, %ebx
diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s
new file mode 100644
index 000000000000..433c9bf7729e
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_operands.s
@@ -0,0 +1,58 @@
+// FIXME: Actually test that we get the expected results.
+        
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# Immediates
+# CHECK: addl $1, %eax
+        addl $1, %eax
+# CHECK: addl $1+2, %eax
+        addl $(1+2), %eax
+# CHECK: addl $a, %eax
+        addl $a, %eax
+# CHECK: addl $1+2, %eax
+        addl $1 + 2, %eax
+        
+# Disambiguation
+
+        # FIXME: Add back when we can match this.
+        #addl $1, 4+4
+        # FIXME: Add back when we can match this.
+        #addl $1, (4+4)
+# CHECK: addl $1, 4+4(%eax)
+        addl $1, 4+4(%eax)
+# CHECK: addl $1, 4+4(%eax)
+        addl $1, (4+4)(%eax)
+# CHECK: addl $1, 8(%eax)
+        addl $1, 8(%eax)
+# CHECK: addl $1, 0(%eax)
+        addl $1, (%eax)
+# CHECK: addl $1, 4+4(,%eax)
+        addl $1, (4+4)(,%eax)
+        
+# Indirect Memory Operands
+# CHECK: addl $1, 1(%eax)
+        addl $1, 1(%eax)
+# CHECK: addl $1, 1(%eax,%ebx)
+        addl $1, 1(%eax,%ebx)
+# CHECK: addl $1, 1(%eax,%ebx)
+        addl $1, 1(%eax,%ebx,)
+# CHECK: addl $1, 1(%eax,%ebx,4)
+        addl $1, 1(%eax,%ebx,4)
+# CHECK: addl $1, 1(,%ebx)
+        addl $1, 1(,%ebx)
+# CHECK: addl $1, 1(,%ebx)
+        addl $1, 1(,%ebx,)
+# CHECK: addl $1, 1(,%ebx,4)
+        addl $1, 1(,%ebx,4)
+# CHECK: addl $1, 1(,%ebx,4)
+        addl $1, 1(,%ebx,(2+2))
+
+# '*'
+# CHECK: call a
+        call a
+# CHECK: call *%eax
+        call *%eax
+# CHECK: call *4(%eax)
+        call *4(%eax)
+
+        
diff --git a/test/MC/AsmParser/X86/x86_word_directive.s b/test/MC/AsmParser/X86/x86_word_directive.s
new file mode 100644
index 000000000000..2950c8cd5f12
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_word_directive.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .short 3
+TEST0:  
+        .word 3
diff --git a/test/MC/AsmParser/assignment.s b/test/MC/AsmParser/assignment.s
index 8e6ff34fe4b4..882fae8bae64 100644
--- a/test/MC/AsmParser/assignment.s
+++ b/test/MC/AsmParser/assignment.s
@@ -1,7 +1,7 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep "a = 0" %t2
+# CHECK: TEST0:
+# CHECK: a = 0
 TEST0:  
         a = 0
-        
-\ No newline at end of file
+        
diff --git a/test/MC/AsmParser/conditional_asm.s b/test/MC/AsmParser/conditional_asm.s
new file mode 100644
index 000000000000..b8a514fb4fa8
--- /dev/null
+++ b/test/MC/AsmParser/conditional_asm.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I  %p | FileCheck %s
+
+# CHECK: .byte 1+1
+.if 1+2
+    .if 1-1
+        .byte 1
+    .elseif 2+2
+        .byte 1+1
+    .else
+        .byte 0
+    .endif
+.endif
diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp
index ebd84187f520..64cb75b20ff1 100644
--- a/test/MC/AsmParser/dg.exp
+++ b/test/MC/AsmParser/dg.exp
@@ -1,3 +1,4 @@
 load_lib llvm.exp
 
 RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
+
diff --git a/test/MC/AsmParser/directive_abort.s b/test/MC/AsmParser/directive_abort.s
new file mode 100644
index 000000000000..3eb8e96f2f88
--- /dev/null
+++ b/test/MC/AsmParser/directive_abort.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s 2> %t
+# RUN: FileCheck -input-file %t %s
+
+# CHECK: .abort "please stop assembing"
+TEST0:  
+	.abort       "please stop assembing"
diff --git a/test/MC/AsmParser/directive_align.s b/test/MC/AsmParser/directive_align.s
index 5715cb3fc05f..15eb430bdaf3 100644
--- a/test/MC/AsmParser/directive_align.s
+++ b/test/MC/AsmParser/directive_align.s
@@ -1,16 +1,16 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep ".p2align 1, 0" %t2 | count 1
+# CHECK: TEST0:
+# CHECK: .align 1
 TEST0:  
         .align 1
 
-# RUN: grep -A 2 TEST1 %t > %t2
-# RUN: grep ".p2alignl 3, 0, 2" %t2 | count 1
+# CHECK: TEST1:
+# CHECK: .p2alignl 3, 0x0, 2
 TEST1:  
         .align32 3,,2
 
-# RUN: grep -A 2 TEST2 %t > %t2
-# RUN: grep ".balign 3, 10" %t2 | count 1
+# CHECK: TEST2:
+# CHECK: .balign 3, 10
 TEST2:  
         .balign 3,10
diff --git a/test/MC/AsmParser/directive_ascii.s b/test/MC/AsmParser/directive_ascii.s
index 95e194a37687..cc6d23b751d3 100644
--- a/test/MC/AsmParser/directive_ascii.s
+++ b/test/MC/AsmParser/directive_ascii.s
@@ -1,25 +1,49 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 1 TEST0 %t > %t2
-# RUN: not grep ".byte" %t2
+        .data
+# CHECK: TEST0:
 TEST0:  
         .ascii
 
-# RUN: grep -A 1 TEST1 %t > %t2
-# RUN: not grep "byte" %t2
+# CHECK: TEST1:
 TEST1:  
         .asciz
 
-# RUN: grep -A 2 TEST2 %t > %t2
-# RUN: grep ".byte 65" %t2 | count 1
+# CHECK: TEST2:
+# CHECK: .byte 65
 TEST2:  
         .ascii "A"
 
-# RUN: grep -A 5 TEST3 %t > %t2
-# RUN: grep ".byte 66" %t2 | count 1
-# RUN: grep ".byte 67" %t2 | count 1
-# RUN: grep ".byte 0" %t2 | count 2
+# CHECK: TEST3:
+# CHECK: .byte 66
+# CHECK: .byte 0
+# CHECK: .byte 67
+# CHECK: .byte 0
 TEST3:  
         .asciz "B", "C"
-
-       
-\ No newline at end of file
+        
+# CHECK: TEST4:
+# CHECK: .byte 1
+# CHECK: .byte 1
+# CHECK: .byte 7
+# CHECK: .byte 0
+# CHECK: .byte 56
+# CHECK: .byte 1
+# CHECK: .byte 0
+# CHECK: .byte 49
+# CHECK: .byte 128
+# CHECK: .byte 0
+TEST4:  
+        .ascii "\1\01\07\08\001\0001\200\0"
+        
+# CHECK: TEST5:
+# CHECK: .byte 8
+# CHECK: .byte 12
+# CHECK: .byte 10
+# CHECK: .byte 13
+# CHECK: .byte 9
+# CHECK: .byte 92
+# CHECK: .byte 34
+TEST5:
+        .ascii "\b\f\n\r\t\\\""
+        
diff --git a/test/MC/AsmParser/directive_comm.s b/test/MC/AsmParser/directive_comm.s
new file mode 100644
index 000000000000..6cc79371de8c
--- /dev/null
+++ b/test/MC/AsmParser/directive_comm.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .comm a,6,2
+# CHECK: .comm b,8
+TEST0:  
+        .comm a, 4+2, 2
+        .comm b,8
diff --git a/test/MC/AsmParser/directive_darwin_section.s b/test/MC/AsmParser/directive_darwin_section.s
new file mode 100644
index 000000000000..4fea2ead930c
--- /dev/null
+++ b/test/MC/AsmParser/directive_darwin_section.s
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+
+# CHECK: .section __DWARF,__debug_frame,regular,debug
+	.section	__DWARF,__debug_frame,regular,debug
diff --git a/test/MC/AsmParser/directive_desc.s b/test/MC/AsmParser/directive_desc.s
new file mode 100644
index 000000000000..992455ccdc89
--- /dev/null
+++ b/test/MC/AsmParser/directive_desc.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .desc foo,16
+# CHECK: .desc bar,4
+TEST0:  
+	.desc foo,0x10
+	.desc     bar, 1 +3
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
new file mode 100644
index 000000000000..ec0b9543b942
--- /dev/null
+++ b/test/MC/AsmParser/directive_file.s
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s
+# FIXME: Actually test the output.
+
+        .file "hello"
+        .file 1 "world"
diff --git a/test/MC/AsmParser/directive_fill.s b/test/MC/AsmParser/directive_fill.s
index ec8bdf27c712..60bd468cd348 100644
--- a/test/MC/AsmParser/directive_fill.s
+++ b/test/MC/AsmParser/directive_fill.s
@@ -1,11 +1,17 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep ".byte 10" %t2 | count 1
+# CHECK: TEST0:
+# CHECK: .byte 10
 TEST0:  
         .fill 1, 1, 10
 
-# RUN: grep -A 3 TEST1 %t > %t2
-# RUN: grep ".short 3" %t2 | count 2
+# CHECK: TEST1:
+# CHECK: .short 3
+# CHECK: .short 3
 TEST1:  
         .fill 2, 2, 3
+
+# CHECK: TEST2:
+# CHECK: .quad 4
+TEST2:  
+        .fill 1, 8, 4
diff --git a/test/MC/AsmParser/directive_include.s b/test/MC/AsmParser/directive_include.s
new file mode 100644
index 000000000000..fabd941d9999
--- /dev/null
+++ b/test/MC/AsmParser/directive_include.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I  %p | FileCheck %s
+
+# CHECK: TESTA:
+# CHECK: TEST0:
+# CHECK: a = 0
+# CHECK: TESTB:
+TESTA:  
+	.include       "directive_set.s"
+TESTB:
diff --git a/test/MC/AsmParser/directive_lcomm.s b/test/MC/AsmParser/directive_lcomm.s
new file mode 100644
index 000000000000..d38805fc479b
--- /dev/null
+++ b/test/MC/AsmParser/directive_lcomm.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .zerofill __DATA,__bss,a,7,4
+# CHECK: .zerofill __DATA,__bss,b,8
+# CHECK: .zerofill __DATA,__bss,c,0
+TEST0:  
+        .lcomm a, 8-1, 4
+        .lcomm b,8
+        .lcomm  c,  0
diff --git a/test/MC/AsmParser/directive_line.s b/test/MC/AsmParser/directive_line.s
new file mode 100644
index 000000000000..94ce44602998
--- /dev/null
+++ b/test/MC/AsmParser/directive_line.s
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s
+# FIXME: Actually test the output.
+
+        .line
+        .line 1
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
new file mode 100644
index 000000000000..b122fdc2cf29
--- /dev/null
+++ b/test/MC/AsmParser/directive_loc.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s
+# FIXME: Actually test the output.
+
+        .file 1 "hello"
+        .loc 1
+        .loc 1 2
+        .loc 1 2 3
+
diff --git a/test/MC/AsmParser/directive_lsym.s b/test/MC/AsmParser/directive_lsym.s
new file mode 100644
index 000000000000..7b70cac3d1b1
--- /dev/null
+++ b/test/MC/AsmParser/directive_lsym.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# FIXME: This is currently unsupported. If it turns out no one uses it, we
+# should just rip it out.
+        
+# XFAIL: *
+
+# CHECK: TEST0:
+# CHECK: .lsym bar,foo
+# CHECK: .lsym baz,3
+TEST0:  
+        .lsym   bar, foo
+        .lsym baz, 2+1
diff --git a/test/MC/AsmParser/directive_org.s b/test/MC/AsmParser/directive_org.s
index ac50f635e6bb..f4414c31cd28 100644
--- a/test/MC/AsmParser/directive_org.s
+++ b/test/MC/AsmParser/directive_org.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep ".org 1, 0" %t2 | count 1
+# CHECK: TEST0:
+# CHECK: .org 1, 0
 TEST0:  
         .org 1
 
-# RUN: grep -A 2 TEST1 %t > %t2
-# RUN: grep ".org 1, 3" %t2 | count 1
+# CHECK: TEST1:
+# CHECK: .org 1, 3
 TEST1:  
         .org 1, 3
diff --git a/test/MC/AsmParser/directive_set.s b/test/MC/AsmParser/directive_set.s
index 51119a661cad..f1fc30a85df1 100644
--- a/test/MC/AsmParser/directive_set.s
+++ b/test/MC/AsmParser/directive_set.s
@@ -1,7 +1,7 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep ".set a, 0" %t2
+# CHECK: TEST0:
+# CHECK: a = 0
 TEST0:  
         .set a, 0
-        
-\ No newline at end of file
+        
diff --git a/test/MC/AsmParser/directive_space.s b/test/MC/AsmParser/directive_space.s
index 6159775de4bc..a897654c07c4 100644
--- a/test/MC/AsmParser/directive_space.s
+++ b/test/MC/AsmParser/directive_space.s
@@ -1,11 +1,12 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep ".byte 0" %t2 | count 1
+# CHECK: TEST0:
+# CHECK: .byte 0
 TEST0:  
         .space 1
 
-# RUN: grep -A 3 TEST1 %t > %t2
-# RUN: grep ".byte 3" %t2 | count 2
+# CHECK: TEST1:
+# CHECK: .byte 3
+# CHECK: .byte 3
 TEST1:  
         .space 2, 3
diff --git a/test/MC/AsmParser/directive_subsections_via_symbols.s b/test/MC/AsmParser/directive_subsections_via_symbols.s
new file mode 100644
index 000000000000..38d69c94c135
--- /dev/null
+++ b/test/MC/AsmParser/directive_subsections_via_symbols.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .subsections_via_symbols
+TEST0:  
+	.subsections_via_symbols
diff --git a/test/MC/AsmParser/directive_symbol_attrs.s b/test/MC/AsmParser/directive_symbol_attrs.s
index 186e96739508..99ef3b8e13f6 100644
--- a/test/MC/AsmParser/directive_symbol_attrs.s
+++ b/test/MC/AsmParser/directive_symbol_attrs.s
@@ -1,7 +1,7 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 3 TEST0 %t > %t2
-# RUN: grep ".globl a" %t2 | count 1
-# RUN: grep ".globl b" %t2 | count 1
+# CHECK: TEST0:
+# CHECK: .globl a
+# CHECK: .globl b
 TEST0:  
         .globl a, b
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
index 39ba06892348..beac69a4aeb1 100644
--- a/test/MC/AsmParser/directive_values.s
+++ b/test/MC/AsmParser/directive_values.s
@@ -1,21 +1,21 @@
-# RUN: llvm-mc %s > %t
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
-# RUN: grep -A 2 TEST0 %t > %t2
-# RUN: grep ".byte 0" %t2 | count 1
+# CHECK: TEST0:
+# CHECK: .byte 0
 TEST0:  
         .byte 0
 
-# RUN: grep -A 2 TEST1 %t > %t2
-# RUN: grep ".short 3" %t2 | count 1
+# CHECK: TEST1:
+# CHECK: .short 3
 TEST1:  
         .short 3
 
-# RUN: grep -A 2 TEST2 %t > %t2
-# RUN: grep ".long 8" %t2 | count 1
+# CHECK: TEST2:
+# CHECK: .long 8
 TEST2:  
         .long 8
 
-# RUN: grep -A 2 TEST3 %t > %t2
-# RUN: grep ".quad 9" %t2 | count 1
+# CHECK: TEST3:
+# CHECK: .quad 9
 TEST3:  
         .quad 9
diff --git a/test/MC/AsmParser/directive_zerofill.s b/test/MC/AsmParser/directive_zerofill.s
new file mode 100644
index 000000000000..4b26f9b68c74
--- /dev/null
+++ b/test/MC/AsmParser/directive_zerofill.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .zerofill __FOO,__bar,x,1
+# CHECK: .zerofill __FOO,__bar,y,8,2
+# CHECK: .zerofill __EMPTY,__NoSymbol
+TEST0:  
+	.zerofill __FOO, __bar, x, 2-1
+	.zerofill __FOO,   __bar, y ,  8 , 1+1
+	.zerofill __EMPTY,__NoSymbol
diff --git a/test/MC/AsmParser/exprs-invalid.s b/test/MC/AsmParser/exprs-invalid.s
new file mode 100644
index 000000000000..5358fc5d7535
--- /dev/null
+++ b/test/MC/AsmParser/exprs-invalid.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+// Currently XFAIL'ed, since the front-end isn't validating this. Figure out the
+// right resolution.
+//
+// XFAIL: *
+
+        .text
+a:
+        .data
+// CHECK: expected relocatable expression
+        .long -(0 + a)
diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s
index 20de3a5e0ba8..5fa4a371c3d0 100644
--- a/test/MC/AsmParser/exprs.s
+++ b/test/MC/AsmParser/exprs.s
@@ -1,8 +1,8 @@
-// FIXME: For now this test just checks that llvm-mc works. Once we have .macro,
+// FIXME: For now this test just checks that llvm-mc -triple i386-unknown-unknown works. Once we have .macro,
 // .if, and .abort we can write a better test (without resorting to miles of
 // greps).
         
-// RUN: llvm-mc %s > %t
+// RUN: llvm-mc -triple i386-unknown-unknown %s > %t
 
         .text
 g:
@@ -59,4 +59,4 @@ m:
 n:
         nop
         
-        
-\ No newline at end of file
+        
diff --git a/test/MC/AsmParser/hello.s b/test/MC/AsmParser/hello.s
new file mode 100644
index 000000000000..01e3b4d58a19
--- /dev/null
+++ b/test/MC/AsmParser/hello.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s -o -
+// RUN: llvm-mc -triple i386-unknown-unknown %s -o - -output-asm-variant=1
+        
+	.text
+	.align	4,0x90
+	.globl	_main
+_main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	call	"L1$pb"
+"L1$pb":
+	popl	%eax
+	movl	$0, -4(%ebp)
+	movl	%esp, %ecx
+	leal	L_.str-"L1$pb"(%eax), %eax
+	movl	%eax, (%ecx)
+	call	_printf
+	movl	$0, -4(%ebp)
+	movl	-4(%ebp), %eax
+	addl	$8, %esp
+	popl	%ebp
+	//ret
+	.subsections_via_symbols
+	.cstring
+L_.str:
+	.asciz	"hello world!\n"
+
diff --git a/test/MC/AsmParser/labels.s b/test/MC/AsmParser/labels.s
new file mode 100644
index 000000000000..53da7edf97cb
--- /dev/null
+++ b/test/MC/AsmParser/labels.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple i686-apple-darwin10 %s | FileCheck %s
+
+        .data
+// CHECK: a:
+a:
+        .long 0
+// CHECK: b:
+"b":
+        .long 0
+// CHECK: a$b:
+"a$b":
+        .long 0
+
+        .text
+foo:    
+// CHECK: addl $24, a$b(%eax)
+        addl $24, "a$b"(%eax)    
+// CHECK: addl $24, a$b+10(%eax)
+        addl $24, ("a$b" + 10)(%eax)
+        
+// CHECK: b$c = 10
+"b$c" = 10
+// CHECK: addl $b$c, %eax
+        addl "b$c", %eax
+        
+// CHECK: "a 0" = 11
+        .set "a 0", 11
+        
+// CHECK: .long "a 0"
+        .long "a 0"
+
+// XXCHCK: .section "a 1,a 2"
+//.section "a 1", "a 2"
+
+// CHECK: .globl "a 3"
+        .globl "a 3"
+
+// CHECK: .weak "a 4"
+        .weak "a 4"
+
+// CHECK: .desc "a 5",1
+        .desc "a 5", 1
+
+// CHECK: .comm "a 6",1
+        .comm "a 6", 1
+
+// CHECK: .zerofill __DATA,__bss,"a 7",1,0
+        .lcomm "a 7", 1
+
+// FIXME: We don't bother to support .lsym.
+        
+// CHECX: .lsym "a 8",1
+//        .lsym "a 8", 1
+
+// CHECK: "a 9" = a-b
+        .set "a 9", a - b
+        
+// CHECK: .long "a 9"
+        .long "a 9"
diff --git a/test/MC/MachO/comm-1.s b/test/MC/MachO/comm-1.s
new file mode 100644
index 000000000000..e979fb139204
--- /dev/null
+++ b/test/MC/MachO/comm-1.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .comm           sym_comm_B, 2
+        .comm           sym_comm_A, 4
+        .comm           sym_comm_C, 8, 2
+        .comm           sym_comm_D, 2, 3
+
+        .no_dead_strip sym_comm_C
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 256)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 304)
+// CHECK:   ('strsize', 48)
+// CHECK:   ('_string_data', '\x00sym_comm_B\x00sym_comm_A\x00sym_comm_C\x00sym_comm_D\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 12)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'sym_comm_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', 'sym_comm_B')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 23)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 544)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_comm_C')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 34)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 768)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', 'sym_comm_D')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 0)
+// CHECK:   ('nundefsym', 4)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/data.s b/test/MC/MachO/data.s
new file mode 100644
index 000000000000..0ff2854801ac
--- /dev/null
+++ b/test/MC/MachO/data.s
@@ -0,0 +1,67 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .data
+        .ascii "hello"
+        .byte 0xAB
+        .short 0xABCD
+        .long 0xABCDABCD
+        .quad 0xABCDABCDABCDABCD
+.org 30
+        .long 0xF000            // 34
+        .p2align  3, 0xAB       // 40 (0xAB * 6)
+        .short 0                // 42
+        .p2alignw 3, 0xABCD     // 48 (0xABCD * 2)
+        .short 0                // 50
+        .p2alignw 3, 0xABCD, 5  // 50
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 192)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 50)
+// CHECK:   ('file_offset', 220)
+// CHECK:   ('file_size', 50)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 220)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 50)
+// CHECK:     ('offset', 220)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
+
+// FIXME: Dump contents, so we can check those too.
diff --git a/test/MC/MachO/dg.exp b/test/MC/MachO/dg.exp
new file mode 100644
index 000000000000..ca6aefe9c53d
--- /dev/null
+++ b/test/MC/MachO/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
+}
+
diff --git a/test/MC/MachO/lcomm-attributes.s b/test/MC/MachO/lcomm-attributes.s
new file mode 100644
index 000000000000..2685395e1ede
--- /dev/null
+++ b/test/MC/MachO/lcomm-attributes.s
@@ -0,0 +1,136 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        // Note, this test intentionally mismatches Darwin 'as', which loses the
+	// following global marker.
+        //
+        // FIXME: We should probably warn about our interpretation of this.
+        .globl sym_lcomm_ext_A
+        .lcomm sym_lcomm_ext_A, 4
+        .lcomm sym_lcomm_ext_B, 4
+        .globl sym_lcomm_ext_B
+
+        .globl sym_zfill_ext_A
+        .zerofill __DATA, __bss, sym_zfill_ext_A, 4
+        .zerofill __DATA, __bss, sym_zfill_ext_B, 4
+        .globl sym_zfill_ext_B
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 16)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 16)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 324)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 372)
+// CHECK:   ('strsize', 68)
+// CHECK:   ('_string_data', '\x00sym_lcomm_ext_A\x00sym_lcomm_ext_B\x00sym_zfill_ext_A\x00sym_zfill_ext_B\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lcomm_ext_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 17)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'sym_lcomm_ext_B')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 33)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_zfill_ext_A')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 49)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'sym_zfill_ext_B')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 4)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/reloc.s b/test/MC/MachO/reloc.s
new file mode 100644
index 000000000000..e86ed8c6deb6
--- /dev/null
+++ b/test/MC/MachO/reloc.s
@@ -0,0 +1,227 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .data
+        .long undef
+        .long (undef + 4)
+
+        .globl local_a_ext
+local_a_ext:
+        .long local_a_ext
+
+local_a:
+        .long 0
+local_a_elt:      
+        .long 0
+local_b:
+        .long local_b - local_c + 245
+        .long 0
+local_c:
+        .long 0
+
+
+        .long local_a_elt + 1
+        .long local_a_elt + 10
+        .short local_a_elt + 20
+        .byte local_a_elt + 89
+
+        .const
+
+        .long
+bar:    
+        .long local_a_elt - bar + 33
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 47)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 47)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 43)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 440)
+// CHECK:     ('num_reloc', 9)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x8000002a),
+// CHECK:      ('word-1', 0x10)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x90000028),
+// CHECK:      ('word-1', 0x10)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xa0000024),
+// CHECK:      ('word-1', 0x10)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0xa0000020),
+// CHECK:      ('word-1', 0x10)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0xa4000014),
+// CHECK:      ('word-1', 0x14)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x1c)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4000002)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0xc000006)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xc000006)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x1a\x00\x00\x00$\x00i')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 43)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 435)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 512)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xa4000000),
+// CHECK:      ('word-1', 0x10)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x2b)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x06\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 528)
+// CHECK:   ('nsyms', 7)
+// CHECK:   ('stroff', 612)
+// CHECK:   ('strsize', 60)
+// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 19)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'local_a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 27)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', 'local_a_elt')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 39)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 20)
+// CHECK:     ('_string', 'local_b')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 47)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 28)
+// CHECK:     ('_string', 'local_c')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 55)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 43)
+// CHECK:     ('_string', 'bar')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'local_a_ext')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'undef')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 5)
+// CHECK:   ('iextdefsym', 5)
+// CHECK:   ('nextdefsym', 1)
+// CHECK:   ('iundefsym', 6)
+// CHECK:   ('nundefsym', 1)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/section-align-1.s b/test/MC/MachO/section-align-1.s
new file mode 100644
index 000000000000..6a5e247c938e
--- /dev/null
+++ b/test/MC/MachO/section-align-1.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+name:
+        .byte 0
+
+        // Check that symbol table is aligned to 4 bytes.
+        
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 1)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 1)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 260)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 272)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00name\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'name')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/section-align-2.s b/test/MC/MachO/section-align-2.s
new file mode 100644
index 000000000000..e0704734a8e6
--- /dev/null
+++ b/test/MC/MachO/section-align-2.s
@@ -0,0 +1,137 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .byte 0
+
+        // There should be 3 padding bytes here.
+        
+        .data
+        .align 2
+foo:
+        .org 8
+bar:
+        .byte 0
+
+        .const
+baz:
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 13)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 13)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 4)
+// CHECK:     ('size', 9)
+// CHECK:     ('offset', 396)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 13)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 405)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 408)
+// CHECK:   ('nsyms', 3)
+// CHECK:   ('stroff', 444)
+// CHECK:   ('strsize', 16)
+// CHECK:   ('_string_data', '\x00foo\x00bar\x00baz\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 5)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'bar')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 9)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 13)
+// CHECK:     ('_string', 'baz')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 3)
+// CHECK:   ('iextdefsym', 3)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 3)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/sections.s b/test/MC/MachO/sections.s
new file mode 100644
index 000000000000..a7bcd2170f03
--- /dev/null
+++ b/test/MC/MachO/sections.s
@@ -0,0 +1,540 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .text
+	.section	__TEXT,__text,regular,pure_instructions
+        
+        .const
+        .static_const
+        .cstring
+        .literal4
+        .literal8
+        .literal16
+        .constructor
+        .destructor
+        .symbol_stub
+        .picsymbol_stub
+        .data
+        .static_data
+        .non_lazy_symbol_pointer
+        .lazy_symbol_pointer
+        .dyld
+        .mod_init_func
+        .mod_term_func
+        .const_data
+        .objc_class
+        .objc_meta_class
+        .objc_cat_cls_meth
+        .objc_cat_inst_meth
+        .objc_protocol
+        .objc_string_object
+        .objc_cls_meth
+        .objc_inst_meth
+        .objc_cls_refs
+        .objc_message_refs
+        .objc_symbols
+        .objc_category
+        .objc_class_vars
+        .objc_instance_vars
+        .objc_module_info
+
+// FIXME: These are aliases for __TEXT, __cstring which we don't properly unique
+//	yet.
+//        .objc_class_names
+//        .objc_meth_var_types
+//        .objc_meth_var_names
+        
+        .objc_selector_strs
+        .section __TEXT,__picsymbolstub4,symbol_stubs,none,16
+
+        .subsections_via_symbols
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 2504)
+// CHECK: ('flag', 8192)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 2504)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 2532)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 36)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__static_const\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 4
+// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x3)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 5
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 6
+// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xe)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 7
+// CHECK:    (('section_name', '__constructor\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 8
+// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 9
+// CHECK:    (('section_name', '__symbol_stub\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 16)
+// CHECK:    ),
+// CHECK:     # Section 10
+// CHECK:    (('section_name', '__picsymbol_stub')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 26)
+// CHECK:    ),
+// CHECK:     # Section 11
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 12
+// CHECK:    (('section_name', '__static_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 13
+// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 14
+// CHECK:    (('section_name', '__la_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x7)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 15
+// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 16
+// CHECK:    (('section_name', '__mod_init_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x9)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 17
+// CHECK:    (('section_name', '__mod_term_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xa)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 18
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 19
+// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 20
+// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 21
+// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 22
+// CHECK:    (('section_name', '__cat_inst_meth\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 23
+// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 24
+// CHECK:    (('section_name', '__string_object\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 25
+// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 26
+// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 27
+// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 28
+// CHECK:    (('section_name', '__message_refs\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 29
+// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 30
+// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 31
+// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 32
+// CHECK:    (('section_name', '__instance_vars\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 33
+// CHECK:    (('section_name', '__module_info\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 34
+// CHECK:    (('section_name', '__selector_strs\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     # Section 35
+// CHECK:    (('section_name', '__picsymbolstub4')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x8)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 16)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/symbol-flags.s b/test/MC/MachO/symbol-flags.s
new file mode 100644
index 000000000000..e82b0a0447f8
--- /dev/null
+++ b/test/MC/MachO/symbol-flags.s
@@ -0,0 +1,254 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .reference sym_ref_A
+        .reference sym_ref_def_A
+sym_ref_def_A:
+sym_ref_def_C:  
+        .reference sym_ref_def_C
+        
+        .weak_reference sym_weak_ref_A
+        .weak_reference sym_weak_ref_def_A
+sym_weak_ref_def_A:        
+sym_weak_ref_def_B:
+        .weak_reference sym_weak_ref_def_B
+
+        .data
+        .globl sym_weak_def_A
+        .weak_definition sym_weak_def_A        
+sym_weak_def_A:
+
+        .lazy_reference sym_lazy_ref_A
+        .lazy_reference sym_lazy_ref_B
+sym_lazy_ref_B:
+sym_lazy_ref_C:
+        .lazy_reference sym_lazy_ref_C
+
+        .private_extern sym_private_ext_A
+        .private_extern sym_private_ext_B
+sym_private_ext_B:
+sym_private_ext_C:
+        .private_extern sym_private_ext_C
+        .private_extern sym_private_ext_D
+        .globl sym_private_ext_D
+
+        .no_dead_strip sym_no_dead_strip_A
+
+        .reference sym_ref_A
+        .desc sym_ref_A, 1
+        .desc sym_ref_A, 0x1234
+
+        .desc sym_desc_flags,0x47
+sym_desc_flags:
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 324)
+// CHECK:   ('nsyms', 16)
+// CHECK:   ('stroff', 516)
+// CHECK:   ('strsize', 260)
+// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_lazy_ref_A\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_desc_flags\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 148)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 162)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_C')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 176)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_ref_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 195)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_ref_def_B')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 214)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_B')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 229)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_C')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 244)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_desc_flags')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 74)
+// CHECK:     ('n_type', 0x1f)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_B')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 92)
+// CHECK:     ('n_type', 0x1f)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_C')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 26)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 10
+// CHECK:    (('n_strx', 41)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 33)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_A')
+// CHECK:    ),
+// CHECK:     # Symbol 11
+// CHECK:    (('n_strx', 128)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_no_dead_strip_A')
+// CHECK:    ),
+// CHECK:     # Symbol 12
+// CHECK:    (('n_strx', 56)
+// CHECK:     ('n_type', 0x11)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_A')
+// CHECK:    ),
+// CHECK:     # Symbol 13
+// CHECK:    (('n_strx', 110)
+// CHECK:     ('n_type', 0x11)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_D')
+// CHECK:    ),
+// CHECK:     # Symbol 14
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 4660)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_A')
+// CHECK:    ),
+// CHECK:     # Symbol 15
+// CHECK:    (('n_strx', 11)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 64)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_ref_A')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 7)
+// CHECK:   ('iextdefsym', 7)
+// CHECK:   ('nextdefsym', 3)
+// CHECK:   ('iundefsym', 10)
+// CHECK:   ('nundefsym', 6)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/symbol-indirect.s b/test/MC/MachO/symbol-indirect.s
new file mode 100644
index 000000000000..461291a3c909
--- /dev/null
+++ b/test/MC/MachO/symbol-indirect.s
@@ -0,0 +1,268 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// FIXME: We are missing a lot of diagnostics on this kind of stuff which the
+// assembler has.
+        
+        .lazy_symbol_pointer
+        .indirect_symbol sym_lsp_B
+        .long 0
+        
+        .globl sym_lsp_A
+        .indirect_symbol sym_lsp_A
+        .long 0
+        
+sym_lsp_C:      
+        .indirect_symbol sym_lsp_C
+        .long 0
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+        .indirect_symbol sym_lsp_D
+        .long sym_lsp_D
+.endif
+
+        .indirect_symbol sym_lsp_E
+        .long 0xFA
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+sym_lsp_F = 10
+        .indirect_symbol sym_lsp_F
+        .long 0
+.endif
+
+        .globl sym_lsp_G
+sym_lsp_G:
+        .indirect_symbol sym_lsp_G
+        .long 0
+        
+        .non_lazy_symbol_pointer
+        .indirect_symbol sym_nlp_B
+        .long 0
+
+        .globl sym_nlp_A
+        .indirect_symbol sym_nlp_A
+        .long 0
+
+sym_nlp_C:      
+        .indirect_symbol sym_nlp_C
+        .long 0
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+        .indirect_symbol sym_nlp_D
+        .long sym_nlp_D
+.endif
+
+        .indirect_symbol sym_nlp_E
+        .long 0xAF
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+sym_nlp_F = 10
+        .indirect_symbol sym_nlp_F
+        .long 0
+.endif
+
+        .globl sym_nlp_G
+sym_nlp_G:
+        .indirect_symbol sym_nlp_G
+        .long 0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 40)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 40)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__la_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 20)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x7)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 20)
+// CHECK:     ('size', 20)
+// CHECK:     ('offset', 412)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+        // FIXME: Enable this when fixed!
+// CHECX:     ('reserved1', 5)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 472)
+// CHECK:   ('nsyms', 10)
+// CHECK:   ('stroff', 592)
+// CHECK:   ('strsize', 104)
+// CHECK:   ('_string_data', '\x00sym_lsp_A\x00sym_lsp_G\x00sym_nlp_A\x00sym_nlp_G\x00sym_nlp_B\x00sym_nlp_E\x00sym_lsp_B\x00sym_lsp_E\x00sym_lsp_C\x00sym_nlp_C\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 81)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_lsp_C')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 91)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 28)
+// CHECK:     ('_string', 'sym_nlp_C')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 11)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', 'sym_lsp_G')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 31)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 36)
+// CHECK:     ('_string', 'sym_nlp_G')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lsp_A')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 61)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 1)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lsp_B')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 71)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 1)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lsp_E')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 21)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_nlp_A')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 41)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_nlp_B')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_nlp_E')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 6)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 432)
+// CHECK:   ('nindirectsyms', 10)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:     # Indirect Symbol 0
+// CHECK:     (('symbol_index', 0x5),),
+// CHECK:     # Indirect Symbol 1
+// CHECK:     (('symbol_index', 0x4),),
+// CHECK:     # Indirect Symbol 2
+// CHECK:     (('symbol_index', 0x0),),
+// CHECK:     # Indirect Symbol 3
+// CHECK:     (('symbol_index', 0x6),),
+// CHECK:     # Indirect Symbol 4
+// CHECK:     (('symbol_index', 0x2),),
+// CHECK:     # Indirect Symbol 5
+// CHECK:     (('symbol_index', 0x8),),
+// CHECK:     # Indirect Symbol 6
+// CHECK:     (('symbol_index', 0x7),),
+// CHECK:     # Indirect Symbol 7
+// CHECK:     (('symbol_index', 0x80000000),),
+// CHECK:     # Indirect Symbol 8
+// CHECK:     (('symbol_index', 0x9),),
+// CHECK:     # Indirect Symbol 9
+// CHECK:     (('symbol_index', 0x3),),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/symbols-1.s b/test/MC/MachO/symbols-1.s
new file mode 100644
index 000000000000..4c72fb3e7c2d
--- /dev/null
+++ b/test/MC/MachO/symbols-1.s
@@ -0,0 +1,161 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+sym_local_B:
+.globl sym_globl_def_B
+.globl sym_globl_undef_B
+sym_local_A:
+.globl sym_globl_def_A
+.globl sym_globl_undef_A
+sym_local_C:
+.globl sym_globl_def_C
+.globl sym_globl_undef_C
+        
+sym_globl_def_A: 
+sym_globl_def_B: 
+sym_globl_def_C: 
+Lsym_asm_temp:
+        .long 0
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 4)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 4)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 260)
+// CHECK:   ('nsyms', 9)
+// CHECK:   ('stroff', 368)
+// CHECK:   ('strsize', 140)
+// CHECK:   ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 103)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_local_B')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 115)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_local_A')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 127)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_local_C')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 35)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_def_B')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 69)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_def_C')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_undef_A')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 17)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_undef_B')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 85)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_undef_C')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 3)
+// CHECK:   ('iextdefsym', 3)
+// CHECK:   ('nextdefsym', 3)
+// CHECK:   ('iundefsym', 6)
+// CHECK:   ('nundefsym', 3)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/values.s b/test/MC/MachO/values.s
new file mode 100644
index 000000000000..2a472ab60711
--- /dev/null
+++ b/test/MC/MachO/values.s
@@ -0,0 +1,135 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .long 0
+text_def_int:
+        .long 0
+
+        .globl text_def_ext
+text_def_ext:
+        .long 0
+
+        .data
+        .long 0
+data_def_int:
+        .long 0
+
+        .globl data_def_ext
+data_def_ext:
+        .long 0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 24)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 24)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 12)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 336)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 348)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 396)
+// CHECK:   ('strsize', 56)
+// CHECK:   ('_string_data', '\x00text_def_ext\x00data_def_ext\x00text_def_int\x00data_def_int\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 27)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'text_def_int')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 40)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', 'data_def_int')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 14)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 20)
+// CHECK:     ('_string', 'data_def_ext')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'text_def_ext')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/zerofill-1.s b/test/MC/MachO/zerofill-1.s
new file mode 100644
index 000000000000..a175d4c4d01e
--- /dev/null
+++ b/test/MC/MachO/zerofill-1.s
@@ -0,0 +1,121 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .text
+        .byte 0                 // Align to 2**3 bytes, not 2**1
+        
+        .zerofill       __DATA, __common, zfill, 2, 1
+        
+        .data
+        .align 3
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 10)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 8)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__common\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 2)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 1)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 400)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 400)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 412)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00zfill\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'zfill')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/zerofill-2.s b/test/MC/MachO/zerofill-2.s
new file mode 100644
index 000000000000..e76de8453cbd
--- /dev/null
+++ b/test/MC/MachO/zerofill-2.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .byte 0
+
+        // This file has size 2, the tail padding doesn't count.
+        .zerofill       __DATA, __bss, sym_a, 1
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 2)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 1)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 1)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 328)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 340)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00sym_a\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 1)
+// CHECK:     ('_string', 'sym_a')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/zerofill-3.s b/test/MC/MachO/zerofill-3.s
new file mode 100644
index 000000000000..e7f4c7b44418
--- /dev/null
+++ b/test/MC/MachO/zerofill-3.s
@@ -0,0 +1,141 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        // FIXME: We don't get the order right currently, the assembler first
+        // orders the symbols, then assigns addresses. :(
+.if 0        
+        .lcomm          sym_lcomm_B, 4
+        .lcomm          sym_lcomm_C, 4, 4 
+        .lcomm          sym_lcomm_A, 4, 3
+        .lcomm          sym_lcomm_D, 4
+        .globl          sym_lcomm_D
+        .globl          sym_lcomm_C
+.else
+        .lcomm          sym_lcomm_C, 4, 4 
+        .lcomm          sym_lcomm_D, 4
+        .globl          sym_lcomm_D
+        .globl          sym_lcomm_C
+        
+        .lcomm          sym_lcomm_A, 4, 3
+        .lcomm          sym_lcomm_B, 4
+.endif
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 16)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 16)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 324)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 372)
+// CHECK:   ('strsize', 52)
+// CHECK:   ('_string_data', '\x00sym_lcomm_C\x00sym_lcomm_D\x00sym_lcomm_A\x00sym_lcomm_B\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_lcomm_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 37)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'sym_lcomm_B')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lcomm_C')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'sym_lcomm_D')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/Makefile b/test/Makefile
index e02daa072546..4955c2eb8955 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -28,16 +28,41 @@ endif
 
 ifdef VERBOSE
 RUNTESTFLAGS := $(VERBOSE)
+LIT_ARGS := -v
+else
+LIT_ARGS := -s -v
 endif
 
 ifdef TESTSUITE
+LIT_TESTSUITE := $(TESTSUITE)
 CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE))
 CLEANED_TESTSUITE := $(patsubst test/%,%,$(CLEANED_TESTSUITE))
 RUNTESTFLAGS += --tool $(CLEANED_TESTSUITE)
+else
+LIT_TESTSUITE := .
 endif
 
 ifdef VG
-VALGRIND := valgrind --tool=memcheck --quiet --trace-children=yes --error-exitcode=3 --leak-check=full
+VALGRIND := valgrind --tool=memcheck --quiet --trace-children=yes --error-exitcode=3 --leak-check=full $(VALGRIND_EXTRA_ARGS)
+endif
+
+# Check what to run for -all.
+LIT_ALL_TESTSUITES := $(LIT_TESTSUITE)
+
+extra-lit-site-cfgs::
+.PHONY: extra-lit-site-cfgs
+
+ifneq ($(strip $(filter check-local-all,$(MAKECMDGOALS))),)
+ifndef TESTSUITE
+ifeq ($(shell test -d $(PROJ_SRC_DIR)/../tools/clang && echo OK), OK)
+LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test
+
+# Force creation of Clang's lit.site.cfg.
+clang-lit-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg
+extra-lit-site-cfgs:: clang-lit-site-cfg
+endif
+endif
 endif
 
 IGNORE_TESTS :=
@@ -46,23 +71,43 @@ ifndef RUNLLVM2CPP
 IGNORE_TESTS += llvm2cpp.exp
 endif
 
-IGNORE_TESTS += $(filter-out $(BINDINGS_TO_BUILD:=.exp),$(ALL_BINDINGS:=.exp))
-
 ifdef IGNORE_TESTS
 RUNTESTFLAGS += --ignore "$(strip $(IGNORE_TESTS))"
 endif
 
+# Both AuroraUX & Solaris do not have the -m flag for ulimit
+ifeq ($(HOST_OS),SunOS)
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
+else
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 512000 ;
+endif
+
 ifneq ($(RUNTEST),)
 check-local:: site.exp
-	( ulimit -t 600 ; ulimit -d 512000 ; \
-	  ulimit -m 512000 ; ulimit -v 512000 ; \
-	  PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \
+	( $(ULIMIT) \
+	  PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \
 	  $(RUNTEST) $(RUNTESTFLAGS) )
 else
 check-local:: site.exp
 	@echo "*** dejagnu not found.  Make sure 'runtest' is in your PATH, then reconfigure LLVM."
 endif
 
+check-local-lit:: lit.site.cfg Unit/lit.site.cfg
+	( $(ULIMIT) \
+	  $(LLVM_SRC_ROOT)/utils/lit/lit.py \
+		--path "$(LLVMToolDir)" \
+		--path "$(LLVM_SRC_ROOT)/test/Scripts" \
+		--path "$(LLVMGCCDIR)/bin" \
+		$(LIT_ARGS) $(LIT_TESTSUITE) )
+
+check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs
+	( $(ULIMIT) \
+	  $(LLVM_SRC_ROOT)/utils/lit/lit.py \
+		--path "$(LLVMToolDir)" \
+		--path "$(LLVM_SRC_ROOT)/test/Scripts" \
+		--path "$(LLVMGCCDIR)/bin" \
+		$(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
+
 ifdef TESTONE
 CLEANED_TESTONE := $(patsubst %/,%,$(TESTONE))
 CLEANED_TESTONE := $(patsubst test/%,%,$(CLEANED_TESTONE))
@@ -78,8 +123,7 @@ check-one: site.exp $(TCLSH)
 	  echo "proc verbose args { }" ; \
 	  echo "source $(LLVM_SRC_ROOT)/test/lib/llvm.exp" ; \
 	  echo "RunLLVMTests $(TESTPATH)" ) | \
-	( ulimit -t 600 ; ulimit -d 512000 ; \
-	  ulimit -m 512000 ; ulimit -v 512000 ; \
+	( $(ULIMIT) \
 	  PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \
 	  $(TCLSH) )
 endif
@@ -88,7 +132,7 @@ clean::
 	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
 
 # dsymutil is used on the Darwin to manipulate DWARF debugging information.
-ifeq ($(OS),Darwin)
+ifeq ($(TARGET_OS),Darwin)
 DSYMUTIL=dsymutil
 else
 DSYMUTIL=true
@@ -99,6 +143,11 @@ else
 BUGPOINT_TOPTS=""
 endif
 
+ifneq ($(OCAMLOPT),)
+CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
+CXX_FOR_OCAMLOPT := $(subst gcc,g++,$(CC_FOR_OCAMLOPT))
+endif
+
 FORCE:
 
 site.exp: FORCE
@@ -110,9 +159,9 @@ site.exp: FORCE
 	@echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp
 	@echo 'set llvmgcc_langs "$(LLVMGCC_LANGS)"' >> site.tmp
 	@echo 'set llvmgcc_version "$(LLVMGCC_VERSION)"' >> site.tmp
-	@echo 'set prcontext "$(TCLSH) $(LLVM_SRC_ROOT)/test/Scripts/prcontext.tcl"' >> site.tmp
 	@echo 'set llvmtoolsdir "$(ToolDir)"' >>site.tmp
 	@echo 'set llvmlibsdir "$(LibDir)"' >>site.tmp
+	@echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp
 	@echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp
 	@echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp
 	@echo 'set srcdir "$(LLVM_SRC_ROOT)/test"' >>site.tmp
@@ -127,7 +176,7 @@ site.exp: FORCE
 	@echo 'set llvmgccmajvers "$(LLVMGCC_MAJVERS)"' >> site.tmp
 	@echo 'set bugpoint_topts $(BUGPOINT_TOPTS)' >> site.tmp
 	@echo 'set shlibext "$(SHLIBEXT)"' >> site.tmp
-	@echo 'set ocamlc "$(OCAMLC) -cc $(CXX) -I $(LibDir)/ocaml"' >> site.tmp
+	@echo 'set ocamlopt "$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml"' >> site.tmp
 	@echo 'set valgrind "$(VALGRIND)"' >> site.tmp
 	@echo 'set grep "$(GREP)"' >>site.tmp
 	@echo 'set gas "$(GAS)"' >>site.tmp
@@ -138,3 +187,26 @@ site.exp: FORCE
 	@-rm -f site.bak
 	@test ! -f site.exp || mv site.exp site.bak
 	@mv site.tmp site.exp
+
+lit.site.cfg: site.exp
+	@echo "Making LLVM 'lit.site.cfg' file..."
+	@sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \
+	     -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
+	     -e "s#@LLVM_TOOLS_DIR@#$(ToolDir)#g" \
+	     -e "s#@LLVMGCCDIR@#$(LLVMGCCDIR)#g" \
+	     $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
+
+Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
+	@echo "Making LLVM unittest 'lit.site.cfg' file..."
+	@echo "## Autogenerated by Makefile ##" > $@
+	@echo "# Do not edit!" >> $@
+	@echo >> $@
+	@echo "# Preserve some key paths for use by main LLVM test suite config." >> $@
+	@echo "config.llvm_obj_root = \"\"\"$(LLVM_OBJ_ROOT)\"\"\"" >> $@
+	@echo >> $@
+	@echo "# Remember the build mode." >> $@
+	@echo "config.llvm_build_mode = \"\"\"$(BuildMode)\"\"\"" >> $@
+	@echo >> $@
+	@echo "# Let the main config do the real work." >> $@
+	@echo "lit.load_config(config, \"\"\"$(LLVM_SRC_ROOT)/test/Unit/lit.cfg\"\"\")" >> $@
+
diff --git a/test/Makefile.tests b/test/Makefile.tests
index ad9f2eabc7f1..90e9f2c70574 100644
--- a/test/Makefile.tests
+++ b/test/Makefile.tests
@@ -69,7 +69,7 @@ Output/%.bc: Output/%.ll $(LGCCAS)
 # LLVM source, use the non-transforming assembler.
 #
 Output/%.bc: %.ll $(LLVMAS) Output/.dir
-	-$(LLVMAS) -f $< -o $@
+	-$(LLVMAS) $< -o $@
 
 ## Cancel built-in implicit rules that override above rules
 %: %.s
diff --git a/test/Other/2002-01-31-CallGraph.ll b/test/Other/2002-01-31-CallGraph.ll
index bb4c23e8779c..0e4c87751263 100644
--- a/test/Other/2002-01-31-CallGraph.ll
+++ b/test/Other/2002-01-31-CallGraph.ll
@@ -1,6 +1,6 @@
 ;  Call graph construction crash: Not handling indirect calls right
 ;
-; RUN: llvm-as < %s | opt -analyze -print-callgraph >& /dev/null
+; RUN: opt < %s -analyze -print-callgraph >& /dev/null
 ;
 
         %FunTy = type i32 (i32)
diff --git a/test/Other/2002-02-24-InlineBrokePHINodes.ll b/test/Other/2002-02-24-InlineBrokePHINodes.ll
index cbb1a8969377..db26942096d4 100644
--- a/test/Other/2002-02-24-InlineBrokePHINodes.ll
+++ b/test/Other/2002-02-24-InlineBrokePHINodes.ll
@@ -1,7 +1,7 @@
 ; Inlining used to break PHI nodes.  This tests that they are correctly updated
 ; when a node is split around the call instruction.  The verifier caught the error.
 ;
-; RUN: llvm-as < %s | opt -inline
+; RUN: opt < %s -inline
 ;
 
 define i64 @test(i64 %X) {
diff --git a/test/Other/2002-03-11-ConstPropCrash.ll b/test/Other/2002-03-11-ConstPropCrash.ll
index 90dc0026935c..a6d4f5b3dbcc 100644
--- a/test/Other/2002-03-11-ConstPropCrash.ll
+++ b/test/Other/2002-03-11-ConstPropCrash.ll
@@ -5,7 +5,7 @@
 ;
 ; Fixed by adding new arguments to ConstantFoldTerminator
 ;
-; RUN: llvm-as < %s | opt -constprop
+; RUN: opt < %s -constprop
 
 define void @build_tree(i32 %ml) {
 ; <label>:0
diff --git a/test/Other/2003-02-19-LoopInfoNestingBug.ll b/test/Other/2003-02-19-LoopInfoNestingBug.ll
index d2945536ea79..267b0e8986d2 100644
--- a/test/Other/2003-02-19-LoopInfoNestingBug.ll
+++ b/test/Other/2003-02-19-LoopInfoNestingBug.ll
@@ -2,7 +2,7 @@
 ; figure out that loop "Inner" should be nested inside of leep "LoopHeader", 
 ; and instead nests it just inside loop "Top"
 ;
-; RUN: llvm-as < %s | opt -analyze -loops | \
+; RUN: opt < %s -analyze -loops | \
 ; RUN:   grep {     Loop at depth 3 containing: %Inner<header><latch><exit>}
 ;
 define void @test() {
diff --git a/test/Other/2006-02-05-PassManager.ll b/test/Other/2006-02-05-PassManager.ll
index c5f50ecbfeb4..0ab5411aa195 100644
--- a/test/Other/2006-02-05-PassManager.ll
+++ b/test/Other/2006-02-05-PassManager.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s |  opt -domtree -gvn -domtree -constmerge -disable-output
+; RUN: opt < %s -domtree -gvn -domtree -constmerge -disable-output
 
 define i32 @test1() {
        unreachable
diff --git a/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll b/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll
index 4ffcf96b0efe..c436e07a9ca9 100644
--- a/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll
+++ b/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll
@@ -1,4 +1,4 @@
-;RUN: llvm-as < %s | opt -codegenprepare -disable-output
+;RUN: opt < %s -codegenprepare -disable-output
 
 define void @foo() {
 entry:
diff --git a/test/Other/2007-06-05-PassID.ll b/test/Other/2007-06-05-PassID.ll
index b6bba36c8715..7a03544f691b 100644
--- a/test/Other/2007-06-05-PassID.ll
+++ b/test/Other/2007-06-05-PassID.ll
@@ -1,4 +1,4 @@
-;RUN: llvm-as < %s | opt -analyze -dot-cfg-only -disable-output 2>/dev/null
+;RUN: opt < %s -analyze -dot-cfg-only -disable-output 2>/dev/null
 ;PR 1497
 
 define void @foo() {
diff --git a/test/Other/2007-06-28-PassManager.ll b/test/Other/2007-06-28-PassManager.ll
index 5968d8c68bc2..f097f59d1c2d 100644
--- a/test/Other/2007-06-28-PassManager.ll
+++ b/test/Other/2007-06-28-PassManager.ll
@@ -1,7 +1,7 @@
-; RUN:  llvm-as < %s |  opt -analyze -inline -disable-output
-; PR 1526
-; RUN:  llvm-as < %s |  opt -analyze -indvars -disable-output
-; PR 1539
+; RUN: opt < %s -analyze -inline -disable-output
+; PR1526
+; RUN: opt < %s -analyze -indvars -disable-output
+; PR1539
 define i32 @test1() {
        ret i32 0;
 }
diff --git a/test/Other/2007-09-10-PassManager.ll b/test/Other/2007-09-10-PassManager.ll
index 863be33b3f91..ded15e569511 100644
--- a/test/Other/2007-09-10-PassManager.ll
+++ b/test/Other/2007-09-10-PassManager.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -indvars -disable-output
+; RUN: opt < %s -loop-unswitch -indvars -disable-output
 ; Require SCEV before LCSSA.
 define void @foo() {
 entry:
diff --git a/test/Other/2008-02-14-PassManager.ll b/test/Other/2008-02-14-PassManager.ll
index 985e1908ef08..6b51edb13454 100644
--- a/test/Other/2008-02-14-PassManager.ll
+++ b/test/Other/2008-02-14-PassManager.ll
@@ -1,5 +1,5 @@
-; RUN:  llvm-as < %s |  opt -loop-unroll -loop-rotate -simplifycfg -disable-output
-; PR 2028
+; RUN: opt < %s -loop-unroll -loop-rotate -simplifycfg -disable-output
+; PR2028
 define i32 @test1() {
        ret i32 0;
 }
diff --git a/test/Other/2008-03-19-PassManager.ll b/test/Other/2008-03-19-PassManager.ll
index 832465ca9f5e..e2082228e3dc 100644
--- a/test/Other/2008-03-19-PassManager.ll
+++ b/test/Other/2008-03-19-PassManager.ll
@@ -1,5 +1,5 @@
 ; PR 2034
-; RUN: llvm-as < %s | opt -anders-aa -instcombine  -gvn -disable-output
+; RUN: opt < %s -anders-aa -instcombine  -gvn -disable-output
 	%struct.FULL = type { i32, i32, [1000 x float*] }
 
 define i32 @sgesl(%struct.FULL* %a, i32* %ipvt, float* %b, i32 %job) {
diff --git a/test/Other/2008-06-04-FieldSizeInPacked.ll b/test/Other/2008-06-04-FieldSizeInPacked.ll
index f718dd38e914..d90209f6af36 100644
--- a/test/Other/2008-06-04-FieldSizeInPacked.ll
+++ b/test/Other/2008-06-04-FieldSizeInPacked.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep true
+; RUN: opt < %s -instcombine -S | grep true
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Other/2008-08-14-PassManager.ll b/test/Other/2008-08-14-PassManager.ll
index 110f38063fbb..22a421d2f18d 100644
--- a/test/Other/2008-08-14-PassManager.ll
+++ b/test/Other/2008-08-14-PassManager.ll
@@ -1,5 +1,5 @@
-; RUN:  llvm-as < %s |  opt -loop-deletion -loop-index-split -disable-output
-; PR 2640
+; RUN: opt < %s -loop-deletion -loop-index-split -disable-output
+; PR2640
 define i32 @test1() {
        ret i32 0;
 }
diff --git a/test/Other/2008-10-06-RemoveDeadPass.ll b/test/Other/2008-10-06-RemoveDeadPass.ll
index a82d1b6f4b77..7cec2c57c2f6 100644
--- a/test/Other/2008-10-06-RemoveDeadPass.ll
+++ b/test/Other/2008-10-06-RemoveDeadPass.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -internalize -disable-output
+; RUN: opt < %s -inline -internalize -disable-output
 define void @foo() nounwind {
   ret void
 }
diff --git a/test/Other/2009-03-31-CallGraph.ll b/test/Other/2009-03-31-CallGraph.ll
index 43578be2d92c..d6653ecbe8ef 100644
--- a/test/Other/2009-03-31-CallGraph.ll
+++ b/test/Other/2009-03-31-CallGraph.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -prune-eh -disable-output
+; RUN: opt < %s -inline -prune-eh -disable-output
 define void @f2() {
     invoke void @f6()
         to label %ok1 unwind label %lpad1
diff --git a/test/Other/2009-06-05-no-implicit-float.ll b/test/Other/2009-06-05-no-implicit-float.ll
index 5addfe2d99ab..0d02e3c9e2f5 100644
--- a/test/Other/2009-06-05-no-implicit-float.ll
+++ b/test/Other/2009-06-05-no-implicit-float.ll
@@ -1,4 +1,4 @@
 
-; RUN: llvm-as < %s | opt -verify | llvm-dis | grep noimplicitfloat
+; RUN: opt < %s -verify -S | grep noimplicitfloat
 define void @f() noimplicitfloat {
 }
diff --git a/test/Other/2009-09-14-function-elements.ll b/test/Other/2009-09-14-function-elements.ll
new file mode 100644
index 000000000000..883d76d10755
--- /dev/null
+++ b/test/Other/2009-09-14-function-elements.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -disable-output 2>/dev/null
+
+; Arrays and structures with function types (not function pointers) are illegal.
+
+@foo = external global [4 x i32 (i32)]
+@bar = external global { i32 (i32) }
diff --git a/test/Scripts/macho-dump b/test/Scripts/macho-dump
new file mode 100755
index 000000000000..12ec26d45896
--- /dev/null
+++ b/test/Scripts/macho-dump
@@ -0,0 +1,256 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import StringIO
+
+class Reader:
+   def __init__(self, path):
+      if path == '-':
+         # Snarf all the data so we can seek.
+         self.file = StringIO.StringIO(sys.stdin.read())
+      else:
+         self.file = open(path,'rb')
+      self.isLSB = None
+
+      self.string_table = None
+
+   def setLSB(self, isLSB):
+      self.isLSB = bool(isLSB)
+
+   def tell(self):
+      return self.file.tell()
+
+   def seek(self, pos):
+      self.file.seek(pos)
+
+   def read(self, N):
+      data = self.file.read(N)
+      if len(data) != N:
+         raise ValueError,"Out of data!"
+      return data
+
+   def read8(self):
+      return ord(self.read(1))
+
+   def read16(self):
+      return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+   def read32(self):
+      # Force to 32-bit, if possible; otherwise these might be long ints on a
+      # big-endian platform. FIXME: Why???
+      Value = struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+      return int(Value)
+
+   def registerStringTable(self, strings):
+      if self.string_table is not None:
+         raise ValueError,"%s: warning: multiple string tables" % sys.argv[0]
+
+      self.string_table = strings
+
+   def getString(self, index):
+      if self.string_table is None:
+         raise ValueError,"%s: warning: no string table registered" % sys.argv[0]
+      
+      end = self.string_table.index('\x00', index)
+      return self.string_table[index:end]
+
+def dumpmacho(path, opts):
+   f = Reader(path)
+
+   magic = f.read(4)
+   if magic == '\xFE\xED\xFA\xCE':
+      f.setLSB(False)
+   elif magic == '\xCE\xFA\xED\xFE':
+      f.setLSB(True)
+   else:
+      raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
+
+   print "('cputype', %r)" % f.read32()
+   print "('cpusubtype', %r)" % f.read32()
+   filetype = f.read32()
+   print "('filetype', %r)" % filetype
+   
+   numLoadCommands = f.read32()
+   print "('num_load_commands', %r)" % filetype
+
+   loadCommandsSize = f.read32()
+   print "('load_commands_size', %r)" % loadCommandsSize
+
+   print "('flag', %r)" % f.read32()
+
+   start = f.tell()
+
+   print "('load_commands', ["
+   for i in range(numLoadCommands):
+      dumpLoadCommand(f, i, opts)
+   print "])"
+
+   if f.tell() - start != loadCommandsSize:
+      raise ValueError,"%s: warning: invalid load commands size: %r" % (sys.argv[0], loadCommandsSize)
+
+def dumpLoadCommand(f, i, opts):
+   start = f.tell()
+
+   print "  # Load Command %r" % i
+   cmd = f.read32()
+   print " (('command', %r)" % cmd
+   cmdSize = f.read32()
+   print "  ('size', %r)" % cmdSize
+
+   if cmd == 1:
+      dumpSegmentLoadCommand32(f, opts)
+   elif cmd == 2:
+      dumpSymtabCommand(f, opts)
+   elif cmd == 11:
+      dumpDysymtabCommand(f, opts)
+   else:
+      print >>sys.stderr,"%s: warning: unknown load command: %r" % (sys.argv[0], cmd)
+      f.read(cmdSize - 8)
+   print " ),"
+
+   if f.tell() - start != cmdSize:
+      raise ValueError,"%s: warning: invalid load command size: %r" % (sys.argv[0], cmdSize)
+
+def dumpSegmentLoadCommand32(f, opts):
+   print "  ('segment_name', %r)" % f.read(16) 
+   print "  ('vm_addr', %r)" % f.read32()
+   print "  ('vm_size', %r)" % f.read32()
+   print "  ('file_offset', %r)" % f.read32()
+   print "  ('file_size', %r)" % f.read32()
+   print "  ('maxprot', %r)" % f.read32()
+   print "  ('initprot', %r)" % f.read32()
+   numSections = f.read32()
+   print "  ('num_sections', %r)" % numSections
+   print "  ('flags', %r)" % f.read32()
+
+   print "  ('sections', ["
+   for i in range(numSections):
+      dumpSection32(f, i, opts)
+   print "  ])"
+
+def dumpSymtabCommand(f, opts):
+   symoff = f.read32()
+   print "  ('symoff', %r)" % symoff
+   nsyms = f.read32()
+   print "  ('nsyms', %r)" % nsyms
+   stroff = f.read32()
+   print "  ('stroff', %r)" % stroff
+   strsize = f.read32()
+   print "  ('strsize', %r)" % strsize
+
+   prev_pos = f.tell()
+
+   f.seek(stroff)
+   string_data = f.read(strsize)
+   print "  ('_string_data', %r)" % string_data
+
+   f.registerStringTable(string_data)
+
+   f.seek(symoff)
+   print "  ('_symbols', ["
+   for i in range(nsyms):
+      dumpNlist32(f, i, opts)
+   print "  ])"
+      
+   f.seek(prev_pos)
+
+def dumpNlist32(f, i, opts):
+   print "    # Symbol %r" % i
+   n_strx = f.read32()
+   print "   (('n_strx', %r)" % n_strx
+   n_type = f.read8()
+   print "    ('n_type', %#x)" % n_type
+   n_sect = f.read8()
+   print "    ('n_sect', %r)" % n_sect
+   n_desc = f.read16()
+   print "    ('n_desc', %r)" % n_desc
+   n_value = f.read32()
+   print "    ('n_value', %r)" % n_value
+   print "    ('_string', %r)" % f.getString(n_strx)
+   print "   ),"
+
+def dumpDysymtabCommand(f, opts):   
+   print "  ('ilocalsym', %r)" % f.read32()
+   print "  ('nlocalsym', %r)" % f.read32()
+   print "  ('iextdefsym', %r)" % f.read32()
+   print "  ('nextdefsym', %r)" % f.read32()
+   print "  ('iundefsym', %r)" % f.read32()
+   print "  ('nundefsym', %r)" % f.read32()
+   print "  ('tocoff', %r)" % f.read32()
+   print "  ('ntoc', %r)" % f.read32()
+   print "  ('modtaboff', %r)" % f.read32()
+   print "  ('nmodtab', %r)" % f.read32()
+   print "  ('extrefsymoff', %r)" % f.read32()
+   print "  ('nextrefsyms', %r)" % f.read32()
+   indirectsymoff = f.read32()
+   print "  ('indirectsymoff', %r)" % indirectsymoff
+   nindirectsyms = f.read32()
+   print "  ('nindirectsyms', %r)" % nindirectsyms
+   print "  ('extreloff', %r)" % f.read32()
+   print "  ('nextrel', %r)" % f.read32()
+   print "  ('locreloff', %r)" % f.read32()
+   print "  ('nlocrel', %r)" % f.read32()
+
+   prev_pos = f.tell()
+
+   f.seek(indirectsymoff)
+   print "  ('_indirect_symbols', ["
+   for i in range(nindirectsyms):
+      print "    # Indirect Symbol %r" % i
+      print "    (('symbol_index', %#x),)," % f.read32()
+   print "  ])"
+      
+   f.seek(prev_pos)
+
+def dumpSection32(f, i, opts):
+   print "    # Section %r" % i
+   print "   (('section_name', %r)" % f.read(16)
+   print "    ('segment_name', %r)" % f.read(16)
+   print "    ('address', %r)" % f.read32()
+   size = f.read32()
+   print "    ('size', %r)" % size
+   offset = f.read32()
+   print "    ('offset', %r)" % offset
+   print "    ('alignment', %r)" % f.read32()   
+   reloc_offset = f.read32()
+   print "    ('reloc_offset', %r)" % reloc_offset
+   num_reloc = f.read32()
+   print "    ('num_reloc', %r)" % num_reloc
+   print "    ('flags', %#x)" % f.read32()
+   print "    ('reserved1', %r)" % f.read32()
+   print "    ('reserved2', %r)" % f.read32()
+   print "   ),"
+
+   prev_pos = f.tell()
+
+   f.seek(reloc_offset)
+   print "  ('_relocations', ["
+   for i in range(num_reloc):
+      print "    # Relocation %r" % i
+      print "    (('word-0', %#x)," % f.read32()
+      print "     ('word-1', %#x))," % f.read32()
+   print "  ])"
+
+   if opts.dumpSectionData:
+      f.seek(offset)
+      print "  ('_section_data', %r)" % f.read(size)
+      
+   f.seek(prev_pos)
+   
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {files}")
+    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
+                      help="Dump the contents of sections",
+                      action="store_true", default=False)    
+    (opts, args) = parser.parse_args()
+
+    if not args:
+       args.append('-')
+
+    for arg in args:
+       dumpmacho(arg, opts)
+
+if __name__ == '__main__':
+   main()
diff --git a/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll b/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
index 893d82b9f5e0..43462faa47f6 100644
--- a/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
+++ b/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as %s -o - | opt -adce
+; RUN:  opt < %s -adce
 
 define i32 @"main"(i32 %argc)
 begin
diff --git a/test/Transforms/ADCE/2002-05-22-PHITest.ll b/test/Transforms/ADCE/2002-05-22-PHITest.ll
index 04f0051af8dc..0095be1f5a16 100644
--- a/test/Transforms/ADCE/2002-05-22-PHITest.ll
+++ b/test/Transforms/ADCE/2002-05-22-PHITest.ll
@@ -1,6 +1,6 @@
 ; It is illegal to remove BB1 because it will mess up the PHI node!
 ;
-; RUN: llvm-as < %s | opt -adce | llvm-dis | grep BB1
+; RUN: opt < %s -adce -S | grep BB1
 
 define i32 @test(i1 %C, i32 %A, i32 %B) {
 ; <label>:0
diff --git a/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll b/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
index 911f8e678b35..9407b5a68d69 100644
--- a/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
+++ b/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
@@ -4,7 +4,7 @@
 ; removed even though there were uses still around.  Now the uses are filled
 ; in with a dummy value before the PHI is deleted.
 ;
-; RUN: llvm-as < %s | opt -adce
+; RUN: opt < %s -adce
 	
         %node_t = type { double*, %node_t*, %node_t**, double**, double*, i32, i32 }
 
diff --git a/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll b/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll
index 5d25ff13525e..337be9f4fa43 100644
--- a/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll
+++ b/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll
@@ -1,6 +1,6 @@
 ; This testcase is a distilled form of: 2002-05-28-Crash.ll
 
-; RUN: llvm-as < %s | opt -adce 
+; RUN: opt < %s -adce 
 
 define float @test(i32 %i) {
         %F = sitofp i32 %i to float             ; <float> [#uses=1]
diff --git a/test/Transforms/ADCE/2002-05-28-Crash.ll b/test/Transforms/ADCE/2002-05-28-Crash.ll
index 27c7205203c0..9bbbd055ff96 100644
--- a/test/Transforms/ADCE/2002-05-28-Crash.ll
+++ b/test/Transforms/ADCE/2002-05-28-Crash.ll
@@ -11,7 +11,7 @@
 ;  return !s;
 ;}
 ;
-; RUN: llvm-as < %s | opt -adce
+; RUN: opt < %s -adce
 
 define i32 @rx_bitset_empty(i32 %size, i32* %set) {
 bb1:
diff --git a/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll b/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
index fb4fe4a8d279..8f8dadf7332f 100644
--- a/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
+++ b/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
@@ -3,7 +3,7 @@
 ; block in this function, it would work fine, but that would be the part we 
 ; have to fix now, wouldn't it....
 ;
-; RUN: llvm-as < %s | opt -adce
+; RUN: opt < %s -adce
 
 define void @foo(i8* %reg5481) {
         %cast611 = bitcast i8* %reg5481 to i8**         ; <i8**> [#uses=1]
diff --git a/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll b/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
index 42a7e4dcde4a..2f0df670d6ac 100644
--- a/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
+++ b/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
@@ -1,6 +1,6 @@
 ; This testcase was extracted from the gzip SPEC benchmark
 ;
-; RUN: llvm-as < %s | opt -adce
+; RUN: opt < %s -adce
 
 @bk = external global i32               ; <i32*> [#uses=2]
 @hufts = external global i32            ; <i32*> [#uses=1]
diff --git a/test/Transforms/ADCE/2002-07-29-Segfault.ll b/test/Transforms/ADCE/2002-07-29-Segfault.ll
index 1c65b3b54434..1c8e6e8adf05 100644
--- a/test/Transforms/ADCE/2002-07-29-Segfault.ll
+++ b/test/Transforms/ADCE/2002-07-29-Segfault.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | opt -adce
+; RUN: opt < %s -adce -disable-output
 
 define void @test() {
         br label %BB3
diff --git a/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll b/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll
index 4ec900560705..17003be9fb14 100644
--- a/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll
+++ b/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll
@@ -1,5 +1,5 @@
 ; Testcase reduced from 197.parser by bugpoint
-; RUN: llvm-as < %s | opt -adce 
+; RUN: opt < %s -adce 
 
 define void @conjunction_prune() {
 ; <label>:0
diff --git a/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll b/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll
index 603b14b537d5..d30df19fc7c4 100644
--- a/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll
+++ b/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll
@@ -2,7 +2,7 @@
 ; entries for it's postdominator.  But I think this can only happen when the 
 ; PHI node is dead, so we just avoid patching up dead PHI nodes.
 
-; RUN: llvm-as < %s | opt -adce
+; RUN: opt < %s -adce
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll b/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
index a9657a7a37d1..5206b243e372 100644
--- a/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
+++ b/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 
 @G = external global i32*               ; <i32**> [#uses=1]
 
diff --git a/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll b/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
index e5dd0cc6d9a7..eb3ef1e7913f 100644
--- a/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
+++ b/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 target datalayout = "e-p:32:32"
 	%struct..CppObjTypeDesc = type { i32, i16, i16 }
 	%struct..TypeToken = type { i32, i16, i16 }
diff --git a/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll b/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
index 9c6764d56235..82fa5b2a40e1 100644
--- a/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
+++ b/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -simplifycfg | llvm-dis | not grep then:
+; RUN: opt < %s -adce -simplifycfg -S | not grep then:
 
 define void @dead_test8(i32* %data.1, i32 %idx.1) {
 entry:
diff --git a/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll b/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll
index e6345c13eba3..444ca8ec904b 100644
--- a/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll
+++ b/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 
 define void @test() {
         br i1 false, label %then, label %endif
diff --git a/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll b/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll
index 37e077f21cb9..499ac515e449 100644
--- a/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll
+++ b/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 
 define i32 @main() {
         br label %loop
diff --git a/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll b/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll
index 6e9b17ed3131..5ba1a2eadfc5 100644
--- a/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll
+++ b/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -simplifycfg | llvm-dis | grep call
+; RUN: opt < %s -adce -simplifycfg -S | grep call
 declare void @exit(i32)
 
 define i32 @main(i32 %argc) {
diff --git a/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll b/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll
index 87a1550e9d45..a6a41fd69eff 100644
--- a/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll
+++ b/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 
 define void @test() {
 entry:
diff --git a/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll b/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll
index 8ddbbbe2c50d..991e876a25b7 100644
--- a/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll
+++ b/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 
 declare void @strlen()
 
diff --git a/test/Transforms/ADCE/basictest.ll b/test/Transforms/ADCE/basictest.ll
index 22ff0f753bec..378d70288f3f 100644
--- a/test/Transforms/ADCE/basictest.ll
+++ b/test/Transforms/ADCE/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -simplifycfg | llvm-dis
+; RUN: opt < %s -adce -simplifycfg | llvm-dis
 
 define i32 @Test(i32 %A, i32 %B) {
 BB1:
diff --git a/test/Transforms/ADCE/basictest1.ll b/test/Transforms/ADCE/basictest1.ll
index 29f40deb9c7f..bbb88783deb2 100644
--- a/test/Transforms/ADCE/basictest1.ll
+++ b/test/Transforms/ADCE/basictest1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -simplifycfg | llvm-dis	
+; RUN: opt < %s -adce -simplifycfg | llvm-dis	
 %FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
 	%spec_fd_t = type { i32, i32, i32, i8* }
 @__iob = external global [20 x %FILE]		; <[20 x %FILE]*> [#uses=1]
diff --git a/test/Transforms/ADCE/basictest2.ll b/test/Transforms/ADCE/basictest2.ll
index 120e23352dd0..a17795f14246 100644
--- a/test/Transforms/ADCE/basictest2.ll
+++ b/test/Transforms/ADCE/basictest2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -simplifycfg | llvm-dis
+; RUN: opt < %s -adce -simplifycfg | llvm-dis
 	%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
 	%spec_fd_t = type { i32, i32, i32, i8* }
 @__iob = external global [20 x %FILE]		; <[20 x %FILE]*> [#uses=1]
diff --git a/test/Transforms/ADCE/dce_pure_call.ll b/test/Transforms/ADCE/dce_pure_call.ll
index 3935bf72b9c1..66483abbc919 100644
--- a/test/Transforms/ADCE/dce_pure_call.ll
+++ b/test/Transforms/ADCE/dce_pure_call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce | llvm-dis | not grep call
+; RUN: opt -adce -S < %s | not grep call
 
 declare i32 @strlen(i8*) readonly nounwind
 
diff --git a/test/Transforms/ADCE/dce_pure_invoke.ll b/test/Transforms/ADCE/dce_pure_invoke.ll
index bd28df2b5e75..c16d45cc2393 100644
--- a/test/Transforms/ADCE/dce_pure_invoke.ll
+++ b/test/Transforms/ADCE/dce_pure_invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce | llvm-dis | grep null
+; RUN: opt < %s -adce -S | grep null
 
 declare i32 @strlen(i8*) readnone
 
diff --git a/test/Transforms/ADCE/unreachable-function.ll b/test/Transforms/ADCE/unreachable-function.ll
index 86c55f591688..7c6a30ec6b6f 100644
--- a/test/Transforms/ADCE/unreachable-function.ll
+++ b/test/Transforms/ADCE/unreachable-function.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -adce -disable-output
+; RUN: opt < %s -adce -disable-output
 
 define void @test() {
 	unreachable
diff --git a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index 67c061fc1ca6..e740b29f9288 100644
--- a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep nounwind | count 2
+; RUN: opt < %s -argpromotion -S | grep nounwind | count 2
 
 define internal i32 @deref(i32* %x) nounwind {
 entry:
diff --git a/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
index 11bcb285d5e0..d7d5eb548a1c 100644
--- a/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
+++ b/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion | llvm-dis > %t
+; RUN: opt < %s -argpromotion -S > %t
 ; RUN: cat %t | grep {define.*@callee(.*i32\\*}
 ; PR2498
 
diff --git a/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll b/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
index 047ff0a48c6d..7ee6654ea463 100644
--- a/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
+++ b/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -argpromotion -disable-output
+; RUN: opt < %s -inline -argpromotion -disable-output
 
 define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
 entry:
diff --git a/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
index d4061655008c..aff917c6a5a7 100644
--- a/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
+++ b/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion -disable-output
+; RUN: opt < %s -argpromotion -disable-output
 
 define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
 entry:
diff --git a/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/test/Transforms/ArgumentPromotion/aggregate-promote.ll
index 62b1a07c434e..6a60e6144d94 100644
--- a/test/Transforms/ArgumentPromotion/aggregate-promote.ll
+++ b/test/Transforms/ArgumentPromotion/aggregate-promote.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion -instcombine | llvm-dis | not grep load
+; RUN: opt < %s -argpromotion -instcombine -S | not grep load
 
 %QuadTy = type { i32, i32, i32, i32 }
 @G = constant %QuadTy {
diff --git a/test/Transforms/ArgumentPromotion/attrs.ll b/test/Transforms/ArgumentPromotion/attrs.ll
index 0fb38be1f0b2..49c075029951 100644
--- a/test/Transforms/ArgumentPromotion/attrs.ll
+++ b/test/Transforms/ArgumentPromotion/attrs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep zeroext
+; RUN: opt < %s -argpromotion -S | grep zeroext
 
 	%struct.ss = type { i32, i64 }
 
diff --git a/test/Transforms/ArgumentPromotion/basictest.ll b/test/Transforms/ArgumentPromotion/basictest.ll
index 99697f4c40d7..87f6371a7eb6 100644
--- a/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/test/Transforms/ArgumentPromotion/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -argpromotion -mem2reg -S | not grep alloca
 define internal i32 @test(i32* %X, i32* %Y) {
         %A = load i32* %X               ; <i32> [#uses=1]
         %B = load i32* %Y               ; <i32> [#uses=1]
diff --git a/test/Transforms/ArgumentPromotion/byval-2.ll b/test/Transforms/ArgumentPromotion/byval-2.ll
index d6253574ca00..bd62c6835f3b 100644
--- a/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep -F {i32* byval} | count 2
+; RUN: opt < %s -argpromotion -S | grep -F {i32* byval} | count 2
 ; Argpromote + scalarrepl should change this to passing the two integers by value.
 
 	%struct.ss = type { i32, i64 }
diff --git a/test/Transforms/ArgumentPromotion/byval.ll b/test/Transforms/ArgumentPromotion/byval.ll
index 3a3458f3d941..052528ab7089 100644
--- a/test/Transforms/ArgumentPromotion/byval.ll
+++ b/test/Transforms/ArgumentPromotion/byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion -scalarrepl | llvm-dis | not grep load
+; RUN: opt < %s -argpromotion -scalarrepl -S | not grep load
 ; Argpromote + scalarrepl should change this to passing the two integers by value.
 
 	%struct.ss = type { i32, i64 }
diff --git a/test/Transforms/ArgumentPromotion/callgraph-update.ll b/test/Transforms/ArgumentPromotion/callgraph-update.ll
new file mode 100644
index 000000000000..989043d7ea58
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/callgraph-update.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -argpromotion -simplifycfg -constmerge | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+%struct.VEC2 = type { double, double, double }
+%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+
+declare %struct.edge_rec* @alloc_edge() nounwind ssp
+
+define i64 @build_delaunay(%struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind ssp {
+entry:
+  br i1 undef, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb10
+  %a = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=0]
+  ret i64 123
+
+bb12:                                             ; preds = %bb10
+  %b = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=1]
+  %c = ptrtoint %struct.edge_rec* %b to i64
+  ret i64 %c
+}
diff --git a/test/Transforms/ArgumentPromotion/chained.ll b/test/Transforms/ArgumentPromotion/chained.ll
index de5f6e527063..5ccb7526cbc8 100644
--- a/test/Transforms/ArgumentPromotion/chained.ll
+++ b/test/Transforms/ArgumentPromotion/chained.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion -instcombine | llvm-dis | not grep load
+; RUN: opt < %s -argpromotion -instcombine -S | not grep load
 
 @G1 = constant i32 0            ; <i32*> [#uses=1]
 @G2 = constant i32* @G1         ; <i32**> [#uses=1]
diff --git a/test/Transforms/ArgumentPromotion/control-flow.ll b/test/Transforms/ArgumentPromotion/control-flow.ll
index 76e8fd95030e..08ca6bccd632 100644
--- a/test/Transforms/ArgumentPromotion/control-flow.ll
+++ b/test/Transforms/ArgumentPromotion/control-flow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | \
+; RUN: opt < %s -argpromotion -S | \
 ; RUN:    not grep {load i32\* null}
 
 define internal i32 @callee(i1 %C, i32* %P) {
diff --git a/test/Transforms/ArgumentPromotion/control-flow2.ll b/test/Transforms/ArgumentPromotion/control-flow2.ll
index 58750138567f..79b44d41096e 100644
--- a/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | \
+; RUN: opt < %s -argpromotion -S | \
 ; RUN:   grep {load i32\\* %A}
 
 define internal i32 @callee(i1 %C, i32* %P) {
diff --git a/test/Transforms/ArgumentPromotion/pr3085.ll b/test/Transforms/ArgumentPromotion/pr3085.ll
index e350370ca7af..3048c603deb3 100644
--- a/test/Transforms/ArgumentPromotion/pr3085.ll
+++ b/test/Transforms/ArgumentPromotion/pr3085.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -disable-output -loop-extract-single -loop-rotate -loop-reduce -argpromotion
+; RUN: opt < %s -disable-output -loop-extract-single -loop-rotate -loop-reduce -argpromotion
 ; PR 3085
 
 	%struct.Lit = type { i8 }
diff --git a/test/Transforms/BlockPlacement/basictest.ll b/test/Transforms/BlockPlacement/basictest.ll
index 4eec23e49cf1..47b507903bce 100644
--- a/test/Transforms/BlockPlacement/basictest.ll
+++ b/test/Transforms/BlockPlacement/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -block-placement -disable-output -print-function 2> /dev/null
+; RUN: opt < %s -block-placement -disable-output -print-function 2> /dev/null
 
 define i32 @test() {
         br i1 true, label %X, label %Y
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index 8afdcae50ae9..9d82819f9db4 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts -o - | llc -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -std-compile-opts -o - | llc -o - | grep bork_directive | wc -l | grep 2
 
 ;; We don't want branch folding to fold asm directives.
 
diff --git a/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll b/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll
index aba35642e4dc..3d0339bc2dbb 100644
--- a/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll
+++ b/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract -disable-output
+; RUN: opt < %s -loop-extract -disable-output
 
 define void @solve() {
 entry:
diff --git a/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll b/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
index a41430d45d3c..a6ee63ec45ae 100644
--- a/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
+++ b/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract -disable-output
+; RUN: opt < %s -loop-extract -disable-output
 ; This testcase is failing the loop extractor because not all exit blocks 
 ; are dominated by all of the live-outs.
 
diff --git a/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll b/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll
index ded821b702f5..7cd72797a675 100644
--- a/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll
+++ b/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract-single -disable-output
+; RUN: opt < %s -loop-extract-single -disable-output
 
 define void @ab() {
 entry:
diff --git a/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll b/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll
index e2030e2db987..01fe54be29ac 100644
--- a/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll
+++ b/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract -disable-output
+; RUN: opt < %s -loop-extract -disable-output
 
 define void @sendMTFValues() {
 entry:
diff --git a/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll b/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
index 9f4b273a61ea..0fbd3307ae31 100644
--- a/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
+++ b/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract -disable-output
+; RUN: opt < %s -loop-extract -disable-output
 
 %struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 %struct.table_t = type { [1 x %struct.node_t**], [1 x %struct.node_t**] }
diff --git a/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll b/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll
index b1b0324bca50..6b306d232e04 100644
--- a/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll
+++ b/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract -disable-output
+; RUN: opt < %s -loop-extract -disable-output
 
 define void @maketree() {
 entry:
diff --git a/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll b/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll
index 48e5a768e0e5..91e9799ad9ca 100644
--- a/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll
+++ b/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-extract -disable-output
+; RUN: opt < %s -loop-extract -disable-output
 
 declare i32 @_IO_getc()
 
diff --git a/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll b/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll
index 0ab5498b0491..9f70bdc71b1b 100644
--- a/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll
+++ b/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -extract-blocks -disable-output
+; RUN: opt < %s -extract-blocks -disable-output
 
 define void @test1() {
 no_exit.0.i:
diff --git a/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll b/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll
index 8cd609482668..fc58577f67ab 100644
--- a/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll
+++ b/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -extract-blocks -disable-output
+; RUN: opt < %s -extract-blocks -disable-output
 define i32 @foo() {
         br label %EB
 
diff --git a/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll b/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
index 12d6c0c1f8f9..1995c7fda291 100644
--- a/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
+++ b/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -codegenprepare | llvm-dis
+; RUN: opt < %s -codegenprepare | llvm-dis
 ; PR3113
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/CondProp/2006-08-14-SingleEntryPhiCrash.ll b/test/Transforms/CondProp/2006-08-14-SingleEntryPhiCrash.ll
index fa083799b9f2..754d19d166e8 100644
--- a/test/Transforms/CondProp/2006-08-14-SingleEntryPhiCrash.ll
+++ b/test/Transforms/CondProp/2006-08-14-SingleEntryPhiCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop -disable-output
+; RUN: opt < %s -condprop -disable-output
 ; PR877
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin9.0.0d1"
diff --git a/test/Transforms/CondProp/2006-11-01-PhiNodeCrash.ll b/test/Transforms/CondProp/2006-11-01-PhiNodeCrash.ll
index 573ee6cadd39..4df8ff94499a 100644
--- a/test/Transforms/CondProp/2006-11-01-PhiNodeCrash.ll
+++ b/test/Transforms/CondProp/2006-11-01-PhiNodeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop -disable-output
+; RUN: opt < %s -condprop -disable-output
 ; PR979
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/CondProp/2007-08-01-InvalidRead.ll b/test/Transforms/CondProp/2007-08-01-InvalidRead.ll
index 1e1f8cbc63ff..6d5f0f5a3aca 100644
--- a/test/Transforms/CondProp/2007-08-01-InvalidRead.ll
+++ b/test/Transforms/CondProp/2007-08-01-InvalidRead.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -tailduplicate -condprop -simplifycfg -disable-output
+; RUN: opt < %s -inline -tailduplicate -condprop -simplifycfg -disable-output
 ; PR1575
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/CondProp/2009-01-25-SingleEntryPHI.ll b/test/Transforms/CondProp/2009-01-25-SingleEntryPHI.ll
index d14ce38f3df2..58dd29f5e28e 100644
--- a/test/Transforms/CondProp/2009-01-25-SingleEntryPHI.ll
+++ b/test/Transforms/CondProp/2009-01-25-SingleEntryPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop | llvm-dis
+; RUN: opt < %s -condprop | llvm-dis
 ; PR3405
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/CondProp/basictest-dbg.ll b/test/Transforms/CondProp/basictest-dbg.ll
index d8a680b89841..9e05ff15105f 100644
--- a/test/Transforms/CondProp/basictest-dbg.ll
+++ b/test/Transforms/CondProp/basictest-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop | llvm-dis | \
+; RUN: opt < %s -condprop -S | \
 ; RUN:    not grep {br label}
 
         %llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/Transforms/CondProp/basictest.ll b/test/Transforms/CondProp/basictest.ll
index 317247caf585..49d473533a10 100644
--- a/test/Transforms/CondProp/basictest.ll
+++ b/test/Transforms/CondProp/basictest.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -condprop | llvm-dis | \
+; RUN: opt < %s -condprop -S | \
 ; RUN:    not grep {br label}
-; RUN: llvm-as < %s | opt -condprop | llvm-dis | not grep T2
+; RUN: opt < %s -condprop -S | not grep T2
 
 
 define i32 @test(i1 %C) {
diff --git a/test/Transforms/CondProp/phisimplify.ll b/test/Transforms/CondProp/phisimplify.ll
index 5b5a3042eb62..ce33f1e59a65 100644
--- a/test/Transforms/CondProp/phisimplify.ll
+++ b/test/Transforms/CondProp/phisimplify.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop | llvm-dis | not grep phi
+; RUN: opt < %s -condprop -S | not grep phi
 
 define i32 @test(i32 %C, i1 %Val) {
         switch i32 %C, label %T1 [
diff --git a/test/Transforms/CondProp/phisimplify2.ll b/test/Transforms/CondProp/phisimplify2.ll
index 03d12e3e4c97..4ecbd8856b78 100644
--- a/test/Transforms/CondProp/phisimplify2.ll
+++ b/test/Transforms/CondProp/phisimplify2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop | llvm-dis | not grep phi
+; RUN: opt < %s -condprop -S | not grep phi
 
 declare i1 @foo()
 
diff --git a/test/Transforms/CondProp/phisimplify3.ll b/test/Transforms/CondProp/phisimplify3.ll
index 16785971e3cc..6166efb1103f 100644
--- a/test/Transforms/CondProp/phisimplify3.ll
+++ b/test/Transforms/CondProp/phisimplify3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -condprop | llvm-dis | not grep phi
+; RUN: opt < %s -condprop -S | not grep phi
 
 define i32 @foo(i1, i32, i32) {
 prologue:
diff --git a/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll b/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
index 1a0845c52ac0..15a621189382 100644
--- a/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
+++ b/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
@@ -1,6 +1,6 @@
 ; Make sure that the constant propogator doesn't divide by zero!
 ;
-; RUN: llvm-as < %s | opt -constprop
+; RUN: opt < %s -constprop
 ;
 
 define i32 @test() {
diff --git a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
index 7a281aa04b67..d9cd67406b06 100644
--- a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
+++ b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
@@ -4,7 +4,7 @@
 
 ; Fix #2: The unary not instruction now no longer exists. Change to xor.
 
-; RUN: llvm-as < $test | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:   not grep {i32 0}
 
 define i32 @test1() {
diff --git a/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll b/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
index 781d796506c8..dd24d965620c 100644
--- a/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
+++ b/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
@@ -1,6 +1,6 @@
 ; SetCC on boolean values was not implemented!
 
-; RUN: llvm-as < %s | opt -constprop -die | llvm-dis | \
+; RUN: opt < %s -constprop -die -S | \
 ; RUN:   not grep set
 
 define i1 @test1() {
diff --git a/test/Transforms/ConstProp/2003-05-12-DivideError.ll b/test/Transforms/ConstProp/2003-05-12-DivideError.ll
index a3f239f95bd4..2708dce9852d 100644
--- a/test/Transforms/ConstProp/2003-05-12-DivideError.ll
+++ b/test/Transforms/ConstProp/2003-05-12-DivideError.ll
@@ -1,6 +1,6 @@
 ; Make sure that the constant propagator doesn't cause a sigfpe
 ;
-; RUN: llvm-as < %s | opt -constprop
+; RUN: opt < %s -constprop
 ;
 
 define i32 @test() {
diff --git a/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll b/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
index 9ffbcd1b0ce1..0b44b99f6a95 100644
--- a/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
+++ b/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:    not grep {ret i1 false}
 
 @b = external global [2 x {  }]         ; <[2 x {  }]*> [#uses=2]
diff --git a/test/Transforms/ConstProp/2006-11-30-vector-cast.ll b/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
index 812cbcfa6277..be76783e8b73 100644
--- a/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
+++ b/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:   grep {i32 -1}
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:   not grep zeroinitializer
 
 define <4 x i32> @test() {
diff --git a/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll b/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
index 889ef56bf9c6..e46a875a7cfa 100644
--- a/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
+++ b/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {ret i1 false}
 define i1 @test() {
         %X = trunc i32 320 to i1                ; <i1> [#uses=1]
diff --git a/test/Transforms/ConstProp/2006-12-01-bool-casts.ll b/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
index dd1e46d8c6fc..3c06693b100d 100644
--- a/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
+++ b/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:    grep {ret i32 -1}
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:    grep {ret i32 1}
 
 define i32 @test1() {
diff --git a/test/Transforms/ConstProp/2007-02-05-BitCast.ll b/test/Transforms/ConstProp/2007-02-05-BitCast.ll
index a1d7c3e0377c..ebe3d21806b7 100644
--- a/test/Transforms/ConstProp/2007-02-05-BitCast.ll
+++ b/test/Transforms/ConstProp/2007-02-05-BitCast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | grep 1065353216
+; RUN: opt < %s -constprop -S | grep 1065353216
 
 define i32 @test() {
         %A = bitcast float 1.000000e+00 to i32          ; <i32> [#uses=1]
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 995ce2febf87..37cda303713b 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | grep {ret i13 13}
+; RUN: opt < %s -constprop -S | grep {ret i13 13}
 ; PR1816
 declare i13 @llvm.cttz.i13(i13)
 
diff --git a/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll b/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
index 4c7146320485..fd5495445b79 100644
--- a/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
+++ b/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
@@ -1,28 +1,28 @@
-; RUN: llvm-as < %s | opt -constprop -disable-output
+; RUN: opt < %s -constprop -disable-output
 ; PR2529
-define <4 x i32> @test1(i32 %argc, i8** %argv) {
+define <4 x i1> @test1(i32 %argc, i8** %argv) {
 entry:  
-        %foo = vicmp slt <4 x i32> undef, <i32 14, i32 undef, i32 undef, i32 undef>
-        ret <4 x i32> %foo
+        %foo = icmp slt <4 x i32> undef, <i32 14, i32 undef, i32 undef, i32 undef>
+        ret <4 x i1> %foo
 }
 
-define <4 x i32> @test2(i32 %argc, i8** %argv) {
+define <4 x i1> @test2(i32 %argc, i8** %argv) {
 entry:  
-        %foo = vicmp slt <4 x i32> <i32 undef, i32 undef, i32 undef, i32
+        %foo = icmp slt <4 x i32> <i32 undef, i32 undef, i32 undef, i32
 undef>, <i32 undef, i32 undef, i32 undef, i32 undef>
-        ret <4 x i32> %foo
+        ret <4 x i1> %foo
 }
 
 
-define <4 x i32> @test3() {
-       %foo = vfcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float
+define <4 x i1> @test3() {
+       %foo = fcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float
 undef>, <float 1.0, float 1.0, float 1.0, float undef>
-	ret <4 x i32> %foo
+	ret <4 x i1> %foo
 }
 
-define <4 x i32> @test4() {
-   %foo = vfcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <float 1.0, float 1.0, float 1.0, float 0.0>
+define <4 x i1> @test4() {
+	%foo = fcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <float 1.0, float 1.0, float 1.0, float 0.0>
 
-	ret <4 x i32> %foo
+	ret <4 x i1> %foo
 }
 
diff --git a/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll b/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll
new file mode 100644
index 000000000000..fc7ff905ecb0
--- /dev/null
+++ b/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -constprop | llvm-dis
+; PR4848
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { %struct.anon }
+%1 = type { %0, %2, [24 x i8] }
+%2 = type <{ %3, %3 }>
+%3 = type { %struct.hrtimer_cpu_base*, i32, %struct.rb_root, %struct.rb_node*, %struct.pgprot, i64 ()*, [16 x i8] }
+%struct.anon = type { }
+%struct.hrtimer_clock_base = type { %struct.hrtimer_cpu_base*, i32, %struct.rb_root, %struct.rb_node*, %struct.pgprot, i64 ()*, %struct.pgprot, %struct.pgprot }
+%struct.hrtimer_cpu_base = type { %0, [2 x %struct.hrtimer_clock_base], %struct.pgprot, i32, i64 }
+%struct.pgprot = type { i64 }
+%struct.rb_node = type { i64, %struct.rb_node*, %struct.rb_node* }
+%struct.rb_root = type { %struct.rb_node* }
+
+@per_cpu__hrtimer_bases = external global %1, align 8 ; <%1*> [#uses=1]
+
+define void @init_hrtimers_cpu(i32 %cpu) nounwind noredzone section ".cpuinit.text" {
+entry:
+  %tmp3 = getelementptr %struct.hrtimer_cpu_base* bitcast (%1* @per_cpu__hrtimer_bases to %struct.hrtimer_cpu_base*), i32 0, i32 0 ; <%0*> [#uses=1]
+  %tmp5 = bitcast %0* %tmp3 to i8*                ; <i8*> [#uses=0]
+  unreachable
+}
diff --git a/test/Transforms/ConstProp/2009-09-19-ConstFold-i1-ConstExpr.ll b/test/Transforms/ConstProp/2009-09-19-ConstFold-i1-ConstExpr.ll
new file mode 100644
index 000000000000..8d92c3fba4bf
--- /dev/null
+++ b/test/Transforms/ConstProp/2009-09-19-ConstFold-i1-ConstExpr.ll
@@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+@X = external global i8
+@Y = external global i8
+@Z = external global i8
+
+@A = global i1 add (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @A = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) ; <i1*> [#uses=0]
+@B = global i1 sub (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @B = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) ; <i1*> [#uses=0]
+@C = global i1 mul (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @C = global i1 and (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) ; <i1*> [#uses=0]
+
+@D = global i1 sdiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @D = global i1 icmp ult (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+@E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+@F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @F = global i1 false ; <i1*> [#uses=0]
+@G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @G = global i1 false ; <i1*> [#uses=0]
+
+@H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*))
+; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+
+@I = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @I = global i1 icmp ult (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+@J = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @J = global i1 icmp uge (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+
+@K = global i1 icmp eq (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @K = global i1 icmp uge (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+@L = global i1 icmp eq (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @L = global i1 icmp ult (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+@M = global i1 icmp ne (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @M = global i1 icmp uge (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+@N = global i1 icmp ne (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @N = global i1 icmp ult (i8* @X, i8* @Y) ; <i1*> [#uses=0]
+
+@O = global i1 icmp eq (i32 zext (i1 icmp ult (i8* @X, i8* @Y) to i32), i32 0)
+; CHECK: @O = global i1 icmp uge (i8* @X, i8* @Y) ; <i1*> [#uses=0]
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index 56892d93186f..2edc55dbc1f0 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -1,20 +1,21 @@
+; RUN: opt < %s -constprop -die -S | FileCheck %s
+
 ; This is a basic sanity check for constant propogation.  The add instruction 
 ; should be eliminated.
-
-; RUN: llvm-as < %s | opt -constprop -die | llvm-dis | not grep add
-
-define i32 @test(i1 %B) {
+define i32 @test1(i1 %B) {
         br i1 %B, label %BB1, label %BB2
 
-BB1:            ; preds = %0
-        %Val = add i32 0, 0             ; <i32> [#uses=1]
+BB1:      
+        %Val = add i32 0, 0
         br label %BB3
 
-BB2:            ; preds = %0
+BB2:      
         br label %BB3
 
-BB3:            ; preds = %BB2, %BB1
-        %Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ]              ; <i32> [#uses=1]
+BB3:     
+; CHECK: @test1
+; CHECK: %Ret = phi i32 [ 0, %BB1 ], [ 1, %BB2 ]
+        %Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ] 
         ret i32 %Ret
 }
 
diff --git a/test/Transforms/ConstProp/bitcast2.ll b/test/Transforms/ConstProp/bitcast2.ll
index 66def7f49da2..5c5eab1879c3 100644
--- a/test/Transforms/ConstProp/bitcast2.ll
+++ b/test/Transforms/ConstProp/bitcast2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep bitcast
+; RUN: opt < %s -instcombine -S | not grep bitcast
 ; PR2165
 
 define <1 x i64> @test() {
diff --git a/test/Transforms/ConstProp/bswap.ll b/test/Transforms/ConstProp/bswap.ll
index 49758271046e..9fce309b7887 100644
--- a/test/Transforms/ConstProp/bswap.ll
+++ b/test/Transforms/ConstProp/bswap.ll
@@ -1,6 +1,6 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | not grep call
+; RUN: opt < %s -constprop -S | not grep call
 
 declare i16 @llvm.bswap.i16(i16)
 
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index c573e565fc83..3c266fe62ed8 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | not grep call
+; RUN: opt < %s -constprop -S | not grep call
 
 declare double @cos(double)
 
diff --git a/test/Transforms/ConstProp/div-zero.ll b/test/Transforms/ConstProp/div-zero.ll
index 166c643dcc99..f78a34fe703d 100644
--- a/test/Transforms/ConstProp/div-zero.ll
+++ b/test/Transforms/ConstProp/div-zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 0}
+; RUN: opt < %s -instcombine -S | grep {ret i32 0}
 ; PR4424
 declare void @ext()
 
diff --git a/test/Transforms/ConstProp/float-to-ptr-cast.ll b/test/Transforms/ConstProp/float-to-ptr-cast.ll
index cbf84f54f0a2..d8eb3e8b652c 100644
--- a/test/Transforms/ConstProp/float-to-ptr-cast.ll
+++ b/test/Transforms/ConstProp/float-to-ptr-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop | llvm-dis | \
+; RUN: opt < %s -constprop -S | \
 ; RUN:    grep -F {ret i32* null} | count 2
 
 define i32* @test1() {
diff --git a/test/Transforms/ConstProp/logicaltest.ll b/test/Transforms/ConstProp/logicaltest.ll
index 81bdb950e9fc..7a90a7199f8d 100644
--- a/test/Transforms/ConstProp/logicaltest.ll
+++ b/test/Transforms/ConstProp/logicaltest.ll
@@ -1,6 +1,6 @@
 ; Ensure constant propogation of logical instructions is working correctly.
 
-; RUN: llvm-as < %s | opt -constprop -die | llvm-dis | \
+; RUN: opt < %s -constprop -die -S | \
 ; RUN:   not egrep {and|or|xor}
 
 define i32 @test1() {
diff --git a/test/Transforms/ConstProp/nottest.ll b/test/Transforms/ConstProp/nottest.ll
index 41ea1b387304..799cecaae60a 100644
--- a/test/Transforms/ConstProp/nottest.ll
+++ b/test/Transforms/ConstProp/nottest.ll
@@ -1,6 +1,6 @@
 ; Ensure constant propogation of 'not' instructions is working correctly.
 
-; RUN: llvm-as < %s | opt -constprop -die | llvm-dis | not grep xor
+; RUN: opt < %s -constprop -die -S | not grep xor
 
 define i32 @test1() {
         %R = xor i32 4, -1              ; <i32> [#uses=1]
diff --git a/test/Transforms/ConstProp/overflow-ops.ll b/test/Transforms/ConstProp/overflow-ops.ll
new file mode 100644
index 000000000000..1547a4d0f5b8
--- /dev/null
+++ b/test/Transforms/ConstProp/overflow-ops.ll
@@ -0,0 +1,172 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%i8i1 = type {i8, i1}
+
+;;-----------------------------
+;; uadd
+;;-----------------------------
+
+define {i8, i1} @uadd_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 42, i8 100)
+  ret {i8, i1} %t
+
+; CHECK: @uadd_1
+; CHECK: ret %i8i1 { i8 -114, i1 false }
+}
+
+define {i8, i1} @uadd_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 142, i8 120)
+  ret {i8, i1} %t
+
+; CHECK: @uadd_2
+; CHECK: ret %i8i1 { i8 6, i1 true }
+}
+
+;;-----------------------------
+;; usub
+;;-----------------------------
+
+define {i8, i1} @usub_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @usub_1
+; CHECK: ret %i8i1 { i8 2, i1 false }
+}
+
+define {i8, i1} @usub_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 6)
+  ret {i8, i1} %t
+
+; CHECK: @usub_2
+; CHECK: ret %i8i1 { i8 -2, i1 true }
+}
+
+;;-----------------------------
+;; sadd
+;;-----------------------------
+
+define {i8, i1} @sadd_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 42, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_1
+; CHECK: ret %i8i1 { i8 44, i1 false }
+}
+
+define {i8, i1} @sadd_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 120, i8 10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_2
+; CHECK: ret %i8i1 { i8 -126, i1 true }
+}
+
+define {i8, i1} @sadd_3() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_3
+; CHECK: ret %i8i1 { i8 -110, i1 false }
+}
+
+define {i8, i1} @sadd_4() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_4
+; CHECK: ret %i8i1 { i8 126, i1 true }
+}
+
+define {i8, i1} @sadd_5() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 2, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_5
+; CHECK: ret %i8i1 { i8 -8, i1 false }
+}
+
+
+;;-----------------------------
+;; ssub
+;;-----------------------------
+
+define {i8, i1} @ssub_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_1
+; CHECK: ret %i8i1 { i8 2, i1 false }
+}
+
+define {i8, i1} @ssub_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 6)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_2
+; CHECK: ret %i8i1 { i8 -2, i1 false }
+}
+
+define {i8, i1} @ssub_3() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 120)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_3
+; CHECK: ret %i8i1 { i8 126, i1 true }
+}
+
+define {i8, i1} @ssub_3b() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_3b
+; CHECK: ret %i8i1 { i8 -20, i1 false }
+}
+
+define {i8, i1} @ssub_4() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 120, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_4
+; CHECK: ret %i8i1 { i8 -126, i1 true }
+}
+
+define {i8, i1} @ssub_4b() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 20, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_4b
+; CHECK: ret %i8i1 { i8 30, i1 false }
+}
+
+define {i8, i1} @ssub_5() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -20, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_5
+; CHECK: ret %i8i1 { i8 -10, i1 false }
+}
+
+
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
diff --git a/test/Transforms/ConstProp/phi.ll b/test/Transforms/ConstProp/phi.ll
index 858c0becb148..3d9e284457cf 100644
--- a/test/Transforms/ConstProp/phi.ll
+++ b/test/Transforms/ConstProp/phi.ll
@@ -1,7 +1,7 @@
 ; This is a basic sanity check for constant propogation.  The add instruction 
 ; should be eliminated.
 
-; RUN: llvm-as < %s | opt -constprop -die | llvm-dis | not grep phi
+; RUN: opt < %s -constprop -die -S | not grep phi
 
 define i32 @test(i1 %B) {
 BB0:
diff --git a/test/Transforms/ConstProp/remtest.ll b/test/Transforms/ConstProp/remtest.ll
index 11861a4e8093..efd2d48332a2 100644
--- a/test/Transforms/ConstProp/remtest.ll
+++ b/test/Transforms/ConstProp/remtest.ll
@@ -1,6 +1,6 @@
 ; Ensure constant propagation of remainder instructions is working correctly.
 
-; RUN: llvm-as < %s | opt -constprop -die | llvm-dis | not grep rem
+; RUN: opt < %s -constprop -die -S | not grep rem
 
 define i32 @test1() {
         %R = srem i32 4, 3              ; <i32> [#uses=1]
diff --git a/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll b/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
index 658944de23f8..b7b05cf1c370 100644
--- a/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
+++ b/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constmerge > /dev/null
+; RUN: opt < %s -constmerge > /dev/null
 
 @foo.upgrd.1 = internal constant { i32 } { i32 7 }              ; <{ i32 }*> [#uses=1]
 @bar = internal constant { i32 } { i32 7 }              ; <{ i32 }*> [#uses=1]
diff --git a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
index 9a9c3ddf0ab1..ce79e3b2964a 100644
--- a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
+++ b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | opt -constmerge | llvm-dis | %prcontext foo 2 | grep bar
+; RUN: opt -S -constmerge %s | FileCheck %s
 
+; CHECK: @foo = constant i32 6
+; CHECK: @bar = constant i32 6
 @foo = constant i32 6           ; <i32*> [#uses=0]
 @bar = constant i32 6           ; <i32*> [#uses=0]
 
diff --git a/test/Transforms/ConstantMerge/2006-03-07-DontMergeDiffSections.ll b/test/Transforms/ConstantMerge/2006-03-07-DontMergeDiffSections.ll
index a621b594ca05..cea18a05efb2 100644
--- a/test/Transforms/ConstantMerge/2006-03-07-DontMergeDiffSections.ll
+++ b/test/Transforms/ConstantMerge/2006-03-07-DontMergeDiffSections.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -constmerge | llvm-dis | grep foo
-; RUN: llvm-as < %s | opt -constmerge | llvm-dis | grep bar
+; RUN: opt < %s -constmerge -S | grep foo
+; RUN: opt < %s -constmerge -S | grep bar
 
 ; Don't merge constants in different sections.
 
diff --git a/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll b/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
index 0d86800c780a..fac6dd24efb1 100644
--- a/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
+++ b/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim -disable-output
+; RUN: opt < %s -deadargelim -disable-output
 
 define internal void @build_delaunay({ i32 }* sret  %agg.result) {
         ret void
diff --git a/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll b/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
index 2b3a2ea247a2..d5bd6c4df53c 100644
--- a/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
+++ b/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | grep {@test(}
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | not grep dead
+; RUN: opt < %s -deadargelim -S | grep {@test(}
+; RUN: opt < %s -deadargelim -S | not grep dead
 
 define internal i32 @test(i32 %X, i32 %dead) {
 	ret i32 %X
diff --git a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
index 2a09b76c35b4..d4edce9baf65 100644
--- a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
+++ b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | not grep {ret i32 0}
+; RUN: opt < %s -deadargelim -S | not grep {ret i32 0}
 ; PR1735
 
 define internal i32 @test(i32 %A, ...) { 
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index 47a85ea78c3e..0e9c4f74e6a8 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis > %t
+; RUN: opt < %s -deadargelim -S > %t
 ; RUN: cat %t | grep nounwind | count 2
 ; RUN: cat %t | grep signext | count 2
 ; RUN: cat %t | not grep inreg
diff --git a/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll b/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
index 96cddf1fd3a1..93282f7f8f2b 100644
--- a/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | grep byval
+; RUN: opt < %s -deadargelim -S | grep byval
 
 	%struct.point = type { double, double }
 @pts = global [4 x %struct.point] [ %struct.point { double 1.000000e+00, double 2.000000e+00 }, %struct.point { double 3.000000e+00, double 4.000000e+00 }, %struct.point { double 5.000000e+00, double 6.000000e+00 }, %struct.point { double 7.000000e+00, double 8.000000e+00 } ], align 32		; <[4 x %struct.point]*> [#uses=1]
diff --git a/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll b/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
index 2c9416049a78..adfd01989407 100644
--- a/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
+++ b/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim -die | llvm-dis > %t
+; RUN: opt < %s -deadargelim -die -S > %t
 ; RUN: cat %t | grep 123
 
 ; This test tries to catch wrongful removal of return values for a specific case
diff --git a/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll b/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
index f5f2ce18b6c5..f251d6ce882c 100644
--- a/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
+++ b/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis
+; RUN: opt < %s -deadargelim | llvm-dis
 ; PR3807
 
 define internal { i32, i32 } @foo() {
diff --git a/test/Transforms/DeadArgElim/basictest.ll b/test/Transforms/DeadArgElim/basictest.ll
index 5f68f9859652..9ac2222d1a5f 100644
--- a/test/Transforms/DeadArgElim/basictest.ll
+++ b/test/Transforms/DeadArgElim/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | not grep DEADARG
+; RUN: opt < %s -deadargelim -S | not grep DEADARG
 
 ; test - an obviously dead argument
 define internal i32 @test(i32 %v, i32 %DEADARG1, i32* %p) {
diff --git a/test/Transforms/DeadArgElim/canon.ll b/test/Transforms/DeadArgElim/canon.ll
index b8adb713a7af..025a46a5dfeb 100644
--- a/test/Transforms/DeadArgElim/canon.ll
+++ b/test/Transforms/DeadArgElim/canon.ll
@@ -1,5 +1,5 @@
 ; This test shows a few canonicalizations made by deadargelim
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis > %t
+; RUN: opt < %s -deadargelim -S > %t
 ; This test should remove {} and replace it with void
 ; RUN: cat %t | grep {define internal void @test}
 ; This test shouls replace the {i32} return value with just i32
diff --git a/test/Transforms/DeadArgElim/dead_vaargs.ll b/test/Transforms/DeadArgElim/dead_vaargs.ll
index a4444bc102af..db3135c8393b 100644
--- a/test/Transforms/DeadArgElim/dead_vaargs.ll
+++ b/test/Transforms/DeadArgElim/dead_vaargs.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | not grep 47 
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | not grep 1.0
+; RUN: opt < %s -deadargelim -S | not grep 47 
+; RUN: opt < %s -deadargelim -S | not grep 1.0
 
 define i32 @bar(i32 %A) {
         %tmp4 = tail call i32 (i32, ...)* @foo( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )   ; <i32> [#uses=1]
diff --git a/test/Transforms/DeadArgElim/deadretval.ll b/test/Transforms/DeadArgElim/deadretval.ll
index 5395290978b1..5f3817c6728d 100644
--- a/test/Transforms/DeadArgElim/deadretval.ll
+++ b/test/Transforms/DeadArgElim/deadretval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis | not grep DEAD
+; RUN: opt < %s -deadargelim -S | not grep DEAD
 
 ; Dead arg only used by dead retval
 define internal i32 @test(i32 %DEADARG) {
diff --git a/test/Transforms/DeadArgElim/deadretval2.ll b/test/Transforms/DeadArgElim/deadretval2.ll
index 316d7555e927..dcdc36e319a3 100644
--- a/test/Transforms/DeadArgElim/deadretval2.ll
+++ b/test/Transforms/DeadArgElim/deadretval2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim -die | llvm-dis > %t
+; RUN: opt < %s -deadargelim -die -S > %t
 ; RUN: cat %t | not grep DEAD
 ; RUN: cat %t | grep LIVE | count 4
 
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index 899ebd93a469..b0b9bf3fa13b 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim | llvm-dis > %t
+; RUN: opt < %s -deadargelim -S > %t
 ; RUN: grep {define internal zeroext i32 @test1() nounwind} %t
 ; RUN: grep {define internal %Ty @test2} %t
 
diff --git a/test/Transforms/DeadArgElim/multdeadretval.ll b/test/Transforms/DeadArgElim/multdeadretval.ll
index 6e0474a6f8d2..68d96ee8df48 100644
--- a/test/Transforms/DeadArgElim/multdeadretval.ll
+++ b/test/Transforms/DeadArgElim/multdeadretval.ll
@@ -2,7 +2,7 @@
 ; are unused. All unused values are typed i16, so we can easily check. We also
 ; run instcombine to fold insert/extractvalue chains and we run dce to clean up
 ; any remaining dead stuff.
-; RUN: llvm-as < %s | opt -deadargelim -instcombine -dce | llvm-dis | not grep i16
+; RUN: opt < %s -deadargelim -instcombine -dce -S | not grep i16
 
 define internal {i16, i32} @test(i16 %DEADARG) {
         %A = insertvalue {i16,i32} undef, i16 1, 0
diff --git a/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll b/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll
index 54fa078a5916..d1a9dd8fb59a 100644
--- a/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll
+++ b/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse -scalarrepl -instcombine | \
+; RUN: opt < %s -dse -scalarrepl -instcombine | \
 ; RUN:   llvm-dis | not grep {ret i32 undef}
 
 define i32 @test(double %__x) {
diff --git a/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll b/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll
index 3c40347f3094..cae2a6f50a50 100644
--- a/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll
+++ b/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | \
+; RUN: opt < %s -dse -S | \
 ; RUN:    grep {store i32 1234567}
 
 ; Do not delete stores that are only partially killed.
diff --git a/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll b/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll
index 58d678708794..147ec84efe55 100644
--- a/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll
+++ b/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | grep store
+; RUN: opt < %s -dse -S | grep store
 
 define double @foo(i8* %X) {
         %X_addr = alloca i8*            ; <i8**> [#uses=2]
diff --git a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
index 3b3f8ada9968..0b0830685f44 100644
--- a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
+++ b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -dse -disable-output
+; RUN: opt < %s -globalsmodref-aa -dse -disable-output
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8"
 	%struct.ECacheType = type { i32, i32, i32 }
diff --git a/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll b/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
index 0ba8479a2c55..4a5d6e29b799 100644
--- a/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
+++ b/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep tmp5
+; RUN: opt < %s -dse -S | not grep tmp5
 ; PR2599
 
 define void @foo({ i32, i32 }* %x) nounwind  {
diff --git a/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll b/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll
index 0f3350d80c72..5958c6c50800 100644
--- a/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll
+++ b/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis
+; RUN: opt < %s -dse | llvm-dis
 ; PR3141
 	%struct.ada__tags__dispatch_table = type { [1 x i32] }
 	%struct.f393a00_1__object = type { %struct.ada__tags__dispatch_table*, i8 }
diff --git a/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll b/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll
index 11e2ffd3d08f..c320a3eb6f7a 100644
--- a/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll
+++ b/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis
+; RUN: opt < %s -dse | llvm-dis
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/Transforms/DeadStoreElimination/PartialStore.ll b/test/Transforms/DeadStoreElimination/PartialStore.ll
index ce1965724f10..0881cb9ccf14 100644
--- a/test/Transforms/DeadStoreElimination/PartialStore.ll
+++ b/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | \
+; RUN: opt < %s -dse -S | \
 ; RUN:    not grep {store i8}
 ; Ensure that the dead store is deleted in this case.  It is wholely
 ; overwritten by the second store.
diff --git a/test/Transforms/DeadStoreElimination/alloca.ll b/test/Transforms/DeadStoreElimination/alloca.ll
index 7a2c94ffca62..b6818eb3824f 100644
--- a/test/Transforms/DeadStoreElimination/alloca.ll
+++ b/test/Transforms/DeadStoreElimination/alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep DEAD
+; RUN: opt < %s -dse -S | not grep DEAD
 
 define void @test(i32* %Q) {
         %P = alloca i32         ; <i32*> [#uses=1]
diff --git a/test/Transforms/DeadStoreElimination/byval.ll b/test/Transforms/DeadStoreElimination/byval.ll
index 08f69a40c579..fa651b1ee382 100644
--- a/test/Transforms/DeadStoreElimination/byval.ll
+++ b/test/Transforms/DeadStoreElimination/byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep store
+; RUN: opt < %s -dse -S | not grep store
 
 %struct.x = type { i32, i32, i32, i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/context-sensitive.ll b/test/Transforms/DeadStoreElimination/context-sensitive.ll
index 266fdbc647ed..0da416cc8416 100644
--- a/test/Transforms/DeadStoreElimination/context-sensitive.ll
+++ b/test/Transforms/DeadStoreElimination/context-sensitive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep DEAD
+; RUN: opt < %s -dse -S | not grep DEAD
 
 declare void @ext()
 
diff --git a/test/Transforms/DeadStoreElimination/crash.ll b/test/Transforms/DeadStoreElimination/crash.ll
new file mode 100644
index 000000000000..7f82cbfd9263
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/crash.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -dse | llvm-dis
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g80 = external global i8                         ; <i8*> [#uses=3]
+
+declare signext i8 @foo(i8 signext, i8 signext) nounwind readnone ssp
+
+declare i32 @func68(i32) nounwind readonly ssp
+
+; PR4815
+define void @test1(i32 %int32p54) noreturn nounwind ssp {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %storemerge = phi i8 [ %2, %bb ], [ 1, %entry ] ; <i8> [#uses=1]
+  store i8 %storemerge, i8* @g80
+  %0 = tail call i32 @func68(i32 1) nounwind ssp  ; <i32> [#uses=1]
+  %1 = trunc i32 %0 to i8                         ; <i8> [#uses=1]
+  store i8 %1, i8* @g80, align 1
+  store i8 undef, i8* @g80, align 1
+  %2 = tail call signext i8 @foo(i8 signext undef, i8 signext 1) nounwind ; <i8> [#uses=1]
+  br label %bb
+}
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index 246de9386cc3..8b81ee353033 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep DEAD
+; RUN: opt < %s -dse -S | not grep DEAD
 
 define void @test(i32* %Q, i32* %P) {
         %DEAD = load i32* %Q            ; <i32> [#uses=1]
diff --git a/test/Transforms/DeadStoreElimination/memcpy.ll b/test/Transforms/DeadStoreElimination/memcpy.ll
index e96b2e8fb1ed..8d996318ba8c 100644
--- a/test/Transforms/DeadStoreElimination/memcpy.ll
+++ b/test/Transforms/DeadStoreElimination/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep alloca
+; RUN: opt < %s -dse -S | not grep alloca
 ; ModuleID = 'placeholder.adb'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 594671d0f0f1..e89d3abfbd9d 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | not grep DEAD
+; RUN: opt < %s -dse -S | not grep DEAD
 
 define void @test(i32* %Q, i32* %P) {
         %DEAD = load i32* %Q            ; <i32> [#uses=1]
diff --git a/test/Transforms/DeadStoreElimination/volatile-load.ll b/test/Transforms/DeadStoreElimination/volatile-load.ll
index c458284dca76..59a1129348b9 100644
--- a/test/Transforms/DeadStoreElimination/volatile-load.ll
+++ b/test/Transforms/DeadStoreElimination/volatile-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -dse | llvm-dis | grep {volatile load}
+; RUN: opt < %s -dse -S | grep {volatile load}
 
 @g_1 = global i32 0
 
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll b/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll
index 5261ac465889..b0aecfa56f74 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep readnone
+; RUN: opt < %s -functionattrs -S | grep readnone
 
 define i32 @a() {
 	%tmp = call i32 @b( )		; <i32> [#uses=1]
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index a17d381eec80..535a1d0fba6b 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep readnone | count 4
+; RUN: opt < %s -functionattrs -S | grep readnone | count 4
 @x = global i32 0
 
 declare i32 @e() readnone
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index cebfdacb4c0c..b455fdd8c3ec 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep readonly | count 2
+; RUN: opt < %s -functionattrs -S | grep readonly | count 2
 
 define i32 @f() {
 entry:
diff --git a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
index b6077fd8ee39..85df09ebd7fe 100644
--- a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | not grep read
+; RUN: opt < %s -functionattrs -S | not grep read
 ; PR2792
 
 @g = global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
index 50ca64120004..09eb4687ac26 100644
--- a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
+++ b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep readnone | count 2
+; RUN: opt < %s -functionattrs -S | grep readnone | count 2
 
 declare i32 @g(i32*) readnone
 
diff --git a/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll b/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
index d9c01171ac40..672b5e1392b7 100644
--- a/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
+++ b/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep readnone
+; RUN: opt < %s -functionattrs -S | grep readnone
 
 @s = external constant i8		; <i8*> [#uses=1]
 
diff --git a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll b/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
index 39a64e6d36cc..53857f61ce58 100644
--- a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
+++ b/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | not grep {nocapture *%%q}
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep {nocapture *%%p} | count 6
+; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
+; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} | count 6
 @g = global i32* null		; <i32**> [#uses=1]
 
 define i32* @c1(i32* %q) {
diff --git a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
index 68a232f5ff3b..7ef5f06f0613 100644
--- a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
+++ b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | not grep {nocapture *%%q}
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | grep {nocapture *%%p}
+; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
+; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p}
 
 define i32* @a(i32** %p) {
 	%tmp = load i32** %p
diff --git a/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll b/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
index 4022033c3b0e..488e6a9ec2ca 100644
--- a/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
+++ b/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -functionattrs | llvm-dis | not grep read
+; RUN: opt < %s -functionattrs -S | not grep read
 ; PR3754
 
 define i8* @m(i32 %size) {
diff --git a/test/Transforms/GVN/2007-07-25-DominatedLoop.ll b/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
index 7c10f970e084..ad580ce1677d 100644
--- a/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
+++ b/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 	%struct.PerlInterpreter = type { i8 }
 @PL_sv_count = external global i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
index 442ba083749f..2e0a1015caf5 100644
--- a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
+++ b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep {tmp10 =}
+; RUN: opt < %s -gvn -S | not grep {tmp10 =}
 
 	%struct.INT2 = type { i32, i32 }
 @blkshifts = external global %struct.INT2*		; <%struct.INT2**> [#uses=2]
diff --git a/test/Transforms/GVN/2007-07-25-Loop.ll b/test/Transforms/GVN/2007-07-25-Loop.ll
index 2efacb55a204..6a9f58e02f65 100644
--- a/test/Transforms/GVN/2007-07-25-Loop.ll
+++ b/test/Transforms/GVN/2007-07-25-Loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 	%struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
 
diff --git a/test/Transforms/GVN/2007-07-25-NestedLoop.ll b/test/Transforms/GVN/2007-07-25-NestedLoop.ll
index cebaaa39be08..c6d7750d6272 100644
--- a/test/Transforms/GVN/2007-07-25-NestedLoop.ll
+++ b/test/Transforms/GVN/2007-07-25-NestedLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 	%struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
 
diff --git a/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll b/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
index 10482d8be150..ecff657ed389 100644
--- a/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
+++ b/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 	%struct.ggBRDF = type { i32 (...)** }
 	%struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 }
diff --git a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index 076ba4c4f114..0be33791f617 100644
--- a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {tmp17625.* = phi i32. }
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {tmp17631.* = phi i32. }
+; RUN: opt < %s -gvn -S | grep {tmp17625.* = phi i32. }
+; RUN: opt < %s -gvn -S | grep {tmp17631.* = phi i32. }
 
 @last = external global [65 x i32*]		; <[65 x i32*]*> [#uses=1]
 
diff --git a/test/Transforms/GVN/2007-07-26-NonRedundant.ll b/test/Transforms/GVN/2007-07-26-NonRedundant.ll
index 204803ad3728..7579e8aff08b 100644
--- a/test/Transforms/GVN/2007-07-26-NonRedundant.ll
+++ b/test/Transforms/GVN/2007-07-26-NonRedundant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 @bsLive = external global i32		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/GVN/2007-07-26-PhiErasure.ll b/test/Transforms/GVN/2007-07-26-PhiErasure.ll
index 4925df786d73..d898ab8e2fa2 100644
--- a/test/Transforms/GVN/2007-07-26-PhiErasure.ll
+++ b/test/Transforms/GVN/2007-07-26-PhiErasure.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {tmp298316 = phi i32 }
+; RUN: opt < %s -gvn -S | not grep phi
 
 	%struct..0anon = type { i32 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@@ -23,6 +23,6 @@ cond_next2943:		; preds = %cond_true2935, %bb2928
 
 bb2982.preheader:		; preds = %cond_next2943
 	%tmp298316 = load i32* @n_spills, align 4		; <i32> [#uses=0]
-	ret i32 0
+	ret i32 %tmp298316
 
 }
diff --git a/test/Transforms/GVN/2007-07-30-PredIDom.ll b/test/Transforms/GVN/2007-07-30-PredIDom.ll
index 1d1aec1146aa..5cb6bb3ecffb 100644
--- a/test/Transforms/GVN/2007-07-30-PredIDom.ll
+++ b/test/Transforms/GVN/2007-07-30-PredIDom.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 	%"struct.Block::$_16" = type { i32 }
 	%struct.Exp = type { %struct.Exp_*, i32, i32, i32, %struct.Exp*, %struct.Exp*, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" }
diff --git a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index 6cb7785fcd57..faa1157dd8be 100644
--- a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {tmp47 = phi i32 }
+; RUN: opt < %s -gvn -S | grep {tmp47 = phi i32 }
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
 @debug = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
index a9ca71ac81d4..0d1d8bced000 100644
--- a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
+++ b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep {tmp701 =}
+; RUN: opt < %s -gvn -S | not grep {tmp701 =}
 
 @img_width = external global i16		; <i16*> [#uses=2]
 
diff --git a/test/Transforms/GVN/2008-02-12-UndefLoad.ll b/test/Transforms/GVN/2008-02-12-UndefLoad.ll
index 5ee3d6cbe643..de2aa614ff5d 100644
--- a/test/Transforms/GVN/2008-02-12-UndefLoad.ll
+++ b/test/Transforms/GVN/2008-02-12-UndefLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep load
+; RUN: opt < %s -gvn -S | not grep load
 ; PR1996
 
 %struct.anon = type { i32, i8, i8, i8, i8 }
diff --git a/test/Transforms/GVN/2008-02-13-NewPHI.ll b/test/Transforms/GVN/2008-02-13-NewPHI.ll
index d3e325146812..54998db0ac68 100644
--- a/test/Transforms/GVN/2008-02-13-NewPHI.ll
+++ b/test/Transforms/GVN/2008-02-13-NewPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -gvn
+; RUN: opt < %s -anders-aa -gvn
 ; PR2032
 
 define i32 @sscal(i32 %n, double %sa1, float* %sx, i32 %incx) {
diff --git a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll b/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
index 54a2201d2d7d..9a75e1ad978c 100644
--- a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
+++ b/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*memcpy} | count 1
+; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy} | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/Transforms/GVN/2008-02-26-MemCpySize.ll b/test/Transforms/GVN/2008-02-26-MemCpySize.ll
index 92cac077711c..6ed8a76c0de2 100644
--- a/test/Transforms/GVN/2008-02-26-MemCpySize.ll
+++ b/test/Transforms/GVN/2008-02-26-MemCpySize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*memcpy.*cell} | count 2
+; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy.*cell} | count 2
 ; PR2099
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index 15667d2bfb21..361c1557f24f 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep undef
+; RUN: opt < %s -gvn -S | grep {ret i8 \[%\]tmp3}
 ; PR2503
 
 @g_3 = external global i8		; <i8*> [#uses=2]
diff --git a/test/Transforms/GVN/2008-12-09-SelfRemove.ll b/test/Transforms/GVN/2008-12-09-SelfRemove.ll
index 48ce8f6ff461..c6833e373d06 100644
--- a/test/Transforms/GVN/2008-12-09-SelfRemove.ll
+++ b/test/Transforms/GVN/2008-12-09-SelfRemove.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep getelementptr | count 1
+; RUN: opt < %s -gvn -S | grep getelementptr | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
diff --git a/test/Transforms/GVN/2008-12-12-RLE-Crash.ll b/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
index 99fadc716c7e..da67ee776696 100644
--- a/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
+++ b/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
diff --git a/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll b/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
index 9bcfcac1ccdb..41f76c8167e5 100644
--- a/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
+++ b/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 @sort_value = external global [256 x i32], align 32		; <[256 x i32]*> [#uses=2]
diff --git a/test/Transforms/GVN/2008-12-15-CacheVisited.ll b/test/Transforms/GVN/2008-12-15-CacheVisited.ll
index b547003b25ee..0a63f3f4626c 100644
--- a/test/Transforms/GVN/2008-12-15-CacheVisited.ll
+++ b/test/Transforms/GVN/2008-12-15-CacheVisited.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 ; Cached results must be added to and verified against the visited sets.
 ; PR3217
 
diff --git a/test/Transforms/GVN/2009-01-21-SortInvalidation.ll b/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
index 51ca6cb34ee5..36775936c8b7 100644
--- a/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
+++ b/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 ; PR3358
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GVN/2009-01-22-SortInvalidation.ll b/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
index 9b7fa0622ab8..95690a5a2571 100644
--- a/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
+++ b/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis
+; RUN: opt < %s -gvn | llvm-dis
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll b/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
index 34dc3441dd1a..c2d57a105fc9 100644
--- a/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
+++ b/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-load-pre -disable-output
+; RUN: opt < %s -gvn -enable-load-pre -disable-output
 
 	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
 	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
diff --git a/test/Transforms/GVN/2009-03-05-dbg.ll b/test/Transforms/GVN/2009-03-05-dbg.ll
index 0450a7a7aaf2..cad33129956c 100644
--- a/test/Transforms/GVN/2009-03-05-dbg.ll
+++ b/test/Transforms/GVN/2009-03-05-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -disable-output
+; RUN: opt < %s -gvn -disable-output
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
 @llvm.dbg.compile_unit298 = external constant %llvm.dbg.compile_unit.type		; <%llvm.dbg.compile_unit.type*> [#uses=1]
 
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index 63ddc450869e..89d6a5f982b8 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -disable-output
+; RUN: opt < %s -gvn -disable-output
 ; PR3775
 
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
diff --git a/test/Transforms/GVN/2009-06-17-InvalidPRE.ll b/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
index c8982c86cb9e..6ac6072a9273 100644
--- a/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
+++ b/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis | not grep pre1
+; RUN: opt < %s -gvn -enable-load-pre -S | not grep pre1
 ; GVN load pre was hoisting the loads at %13 and %16 up to bb4.outer.  
 ; This is invalid as it bypasses the check for %m.0.ph==null in bb4. 
 ; ModuleID = 'mbuf.c'
diff --git a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
new file mode 100644
index 000000000000..641e920006b2
--- /dev/null
+++ b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -gvn | llvm-dis
+; PR4256
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%struct.cset = type { i8*, i8, i8, i32, i8* }
+	%struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* }
+	%struct.re_guts = type { i32*, %struct.cset*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, [1 x i8] }
+
+define i8* @lbackref(%struct.lmat* %m, i8* %start, i8* %stop, i32 %startst, i32 %stopst, i32 %lev, i32 %rec) nounwind {
+entry:
+	br label %bb63
+
+bb:		; preds = %bb63
+	switch i32 0, label %bb62 [
+		i32 268435456, label %bb2
+		i32 805306368, label %bb9
+		i32 -1610612736, label %bb51
+	]
+
+bb2:		; preds = %bb
+	br label %bb62
+
+bb9:		; preds = %bb
+	%0 = load i8* %sp.1, align 1		; <i8> [#uses=0]
+	br label %bb62
+
+bb51:		; preds = %bb
+	%1 = load i8* %sp.1, align 1		; <i8> [#uses=0]
+	ret i8* null
+
+bb62:		; preds = %bb9, %bb2, %bb
+	br label %bb63
+
+bb63:		; preds = %bb84, %bb69, %bb62, %entry
+	%sp.1 = phi i8* [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ]		; <i8*> [#uses=3]
+	br i1 false, label %bb, label %bb65
+
+bb65:		; preds = %bb63
+	%sp.1.lcssa = phi i8* [ %sp.1, %bb63 ]		; <i8*> [#uses=4]
+	br i1 false, label %bb66, label %bb69
+
+bb66:		; preds = %bb65
+	ret i8* null
+
+bb69:		; preds = %bb65
+	switch i32 0, label %bb108.loopexit2.loopexit.loopexit [
+		i32 1342177280, label %bb63
+		i32 1476395008, label %bb84
+		i32 1879048192, label %bb104
+		i32 2013265920, label %bb93
+	]
+
+bb84:		; preds = %bb69
+	%2 = tail call i8* @lbackref(%struct.lmat* %m, i8* %sp.1.lcssa, i8* %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind		; <i8*> [#uses=0]
+	br label %bb63
+
+bb93:		; preds = %bb69
+	ret i8* null
+
+bb104:		; preds = %bb69
+	%sp.1.lcssa.lcssa33 = phi i8* [ %sp.1.lcssa, %bb69 ]		; <i8*> [#uses=0]
+	unreachable
+
+bb108.loopexit2.loopexit.loopexit:		; preds = %bb69
+	ret i8* null
+}
diff --git a/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll b/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
new file mode 100644
index 000000000000..b433297bba6f
--- /dev/null
+++ b/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
@@ -0,0 +1,15 @@
+; Test to make sure malloc's bitcast does not block detection of a store 
+; to aliased memory; GVN should not optimize away the load in this program.
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+define i64 @test() {
+  %1 = tail call i8* @malloc(i64 mul (i64 4, i64 ptrtoint (i64* getelementptr (i64* null, i64 1) to i64))) ; <i8*> [#uses=2]
+  store i8 42, i8* %1
+  %X = bitcast i8* %1 to i64*                     ; <i64*> [#uses=1]
+  %Y = load i64* %X                               ; <i64> [#uses=1]
+  ret i64 %Y
+; CHECK: %Y = load i64* %X
+; CHECK: ret i64 %Y
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/test/Transforms/GVN/basic.ll b/test/Transforms/GVN/basic.ll
index ebf5daaf9272..1decafac8c4f 100644
--- a/test/Transforms/GVN/basic.ll
+++ b/test/Transforms/GVN/basic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep {%z2 =}
+; RUN: opt < %s -gvn -S | not grep {%z2 =}
 
 define i32 @main() {
 block1:
diff --git a/test/Transforms/GVN/bitcast-of-call.ll b/test/Transforms/GVN/bitcast-of-call.ll
index 0a045c8bd50c..55b4b6e9d317 100644
--- a/test/Transforms/GVN/bitcast-of-call.ll
+++ b/test/Transforms/GVN/bitcast-of-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep tmp2
+; RUN: opt < %s -gvn -S | not grep tmp2
 ; PR2213
 
 define i32* @f(i8* %x) {
diff --git a/test/Transforms/GVN/calls-nonlocal.ll b/test/Transforms/GVN/calls-nonlocal.ll
index 0d1c8a38ad48..f0edf09bff98 100644
--- a/test/Transforms/GVN/calls-nonlocal.ll
+++ b/test/Transforms/GVN/calls-nonlocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep strlen | count 2
+; RUN: opt < %s -gvn -S | grep strlen | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
 
diff --git a/test/Transforms/GVN/calls-readonly.ll b/test/Transforms/GVN/calls-readonly.ll
index 723ef7749298..28b5ff09a73a 100644
--- a/test/Transforms/GVN/calls-readonly.ll
+++ b/test/Transforms/GVN/calls-readonly.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -gvn | llvm-dis | grep {call.*strlen} | count 1
+; RUN: opt < %s -basicaa -gvn -S | grep {call.*strlen} | count 1
 ; Should delete the second call to strlen even though the intervening strchr call exists.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 53cbb509fcb2..e212d791ae5a 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {br i1 false}
+; RUN: opt < %s -gvn -S | grep {br i1 false}
 
 @a = external global i32		; <i32*> [#uses=7]
 
diff --git a/test/Transforms/GVN/load-constant-mem.ll b/test/Transforms/GVN/load-constant-mem.ll
index 83b9d3846c17..87f33eaadadb 100644
--- a/test/Transforms/GVN/load-constant-mem.ll
+++ b/test/Transforms/GVN/load-constant-mem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -instcombine | llvm-dis | grep {ret i32 0}
+; RUN: opt < %s -gvn -instcombine -S | grep {ret i32 0}
 ; PR4189
 @G = external constant [4 x i32]
 
diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/local-pre.ll
index 59158cc0fcae..5f03984653a9 100644
--- a/test/Transforms/GVN/local-pre.ll
+++ b/test/Transforms/GVN/local-pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-pre | llvm-dis | grep {b.pre}
+; RUN: opt < %s -gvn -enable-pre -S | grep {b.pre}
 
 define i32 @main(i32 %p) {
 block1:
diff --git a/test/Transforms/GVN/lpre-basic.ll b/test/Transforms/GVN/lpre-basic.ll
index 5b52863b1c8d..41b51806cd46 100644
--- a/test/Transforms/GVN/lpre-basic.ll
+++ b/test/Transforms/GVN/lpre-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis | grep {%PRE = phi}
+; RUN: opt < %s -gvn -enable-load-pre -S | grep {%PRE = phi}
 
 define i32 @test(i32* %p, i1 %C) {
 block1:
diff --git a/test/Transforms/GVN/lpre-call-wrap-2.ll b/test/Transforms/GVN/lpre-call-wrap-2.ll
index c9f93526fea4..79512a33d993 100644
--- a/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis > %t
-; RUN: %prcontext bb1: 2 < %t | grep phi
-; RUN: %prcontext bb1: 2 < %t | not grep load
+; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
 ;
 ; The partially redundant load in bb1 should be hoisted to "bb".  This comes
 ; from this C code (GCC PR 23455):
@@ -30,6 +28,9 @@ bb:		; preds = %entry
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
+; CHECK: bb1:
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
 	%4 = load i32* @outcnt, align 4		; <i32> [#uses=1]
 	%5 = getelementptr i8* %outbuf, i32 %4		; <i8*> [#uses=1]
 	store i8 %bi_buf, i8* %5, align 1
diff --git a/test/Transforms/GVN/lpre-call-wrap.ll b/test/Transforms/GVN/lpre-call-wrap.ll
index 7470953bb9b0..40462798b534 100644
--- a/test/Transforms/GVN/lpre-call-wrap.ll
+++ b/test/Transforms/GVN/lpre-call-wrap.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis > %t
-; RUN: %prcontext bb3.backedge: 2 < %t | grep phi
-; RUN: %prcontext bb3.backedge: 2 < %t | not grep load
+; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
 ;
 ; Make sure the load in bb3.backedge is removed and moved into bb1 after the 
 ; call.  This makes the non-call case faster. 
@@ -43,6 +41,9 @@ bb1:		; preds = %bb
 	br label %bb3.backedge
 
 bb3.backedge:		; preds = %bb, %bb1
+; CHECK: bb3.backedge:
+; CHECK-NEXT: phi
+; CHECK-NEXT: icmp
 	%7 = load i32* %0, align 4		; <i32> [#uses=2]
 	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
 	br i1 %8, label %return, label %bb
diff --git a/test/Transforms/GVN/mixed.ll b/test/Transforms/GVN/mixed.ll
index 0c4f65cf9651..5152f68f0efb 100644
--- a/test/Transforms/GVN/mixed.ll
+++ b/test/Transforms/GVN/mixed.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep DEADLOAD
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep DEADGEP
+; RUN: opt < %s -gvn -S | not grep DEADLOAD
+; RUN: opt < %s -gvn -S | not grep DEADGEP
 
 define i32 @main(i32** %p) {
 block1:
diff --git a/test/Transforms/GVN/pre-basic-add.ll b/test/Transforms/GVN/pre-basic-add.ll
index f00bda3c4469..c13099fe7349 100644
--- a/test/Transforms/GVN/pre-basic-add.ll
+++ b/test/Transforms/GVN/pre-basic-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-pre | llvm-dis | grep {.pre}
+; RUN: opt < %s -gvn -enable-pre -S | grep {.pre}
 
 @H = common global i32 0		; <i32*> [#uses=2]
 @G = common global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/GVN/pre-single-pred.ll b/test/Transforms/GVN/pre-single-pred.ll
index cb71617caed0..706a16b7bdd2 100644
--- a/test/Transforms/GVN/pre-single-pred.ll
+++ b/test/Transforms/GVN/pre-single-pred.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis | not grep {tmp3 = load}
+; RUN: opt < %s -gvn -enable-load-pre -S | not grep {tmp3 = load}
 
 @p = external global i32
 define i32 @f(i32 %n) nounwind {
diff --git a/test/Transforms/GVN/rle-dominated.ll b/test/Transforms/GVN/rle-dominated.ll
index 0d517178675e..e86f5929b99b 100644
--- a/test/Transforms/GVN/rle-dominated.ll
+++ b/test/Transforms/GVN/rle-dominated.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep load | count 2
+; RUN: opt < %s -gvn -S | grep load | count 2
 
 define i32 @main(i32** %p) {
 block1:
diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll
index ada439857189..79cc3636b11c 100644
--- a/test/Transforms/GVN/rle-must-alias.ll
+++ b/test/Transforms/GVN/rle-must-alias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {DEAD = phi i32 }
+; RUN: opt < %s -gvn -S | grep {DEAD = phi i32 }
 ; XFAIL: *
 
 ; FIXME: GVN should eliminate the fully redundant %9 GEP which 
diff --git a/test/Transforms/GVN/rle-no-phi-translate.ll b/test/Transforms/GVN/rle-no-phi-translate.ll
index 9ffbe217dba5..57457a7af9f0 100644
--- a/test/Transforms/GVN/rle-no-phi-translate.ll
+++ b/test/Transforms/GVN/rle-no-phi-translate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep load
+; RUN: opt < %s -gvn -S | grep load
 ; FIXME: This should be promotable, but memdep/gvn don't track values
 ; path/edge sensitively enough.
 
diff --git a/test/Transforms/GVN/rle-nonlocal.ll b/test/Transforms/GVN/rle-nonlocal.ll
index d93223131d79..51b89867a15a 100644
--- a/test/Transforms/GVN/rle-nonlocal.ll
+++ b/test/Transforms/GVN/rle-nonlocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%DEAD = phi i32. }
+; RUN: opt < %s -gvn -S | grep {%DEAD = phi i32. }
 
 define i32 @main(i32** %p) {
 block1:
diff --git a/test/Transforms/GVN/rle-phi-translate.ll b/test/Transforms/GVN/rle-phi-translate.ll
index b5311eec59c4..1f1baa98b48b 100644
--- a/test/Transforms/GVN/rle-phi-translate.ll
+++ b/test/Transforms/GVN/rle-phi-translate.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%cv = phi i32}
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%bv = phi i32}
+; RUN: opt < %s -gvn -S | grep {%cv = phi i32}
+; RUN: opt < %s -gvn -S | grep {%bv = phi i32}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
index 5c11b06e7327..04e8c3856845 100644
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ b/test/Transforms/GVN/rle-semidominated.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {DEAD = phi i32 }
+; RUN: opt < %s -gvn -S | grep {DEAD = phi i32 }
 
 define i32 @main(i32* %p) {
 block1:
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
new file mode 100644
index 000000000000..cdd1f4fa4e89
--- /dev/null
+++ b/test/Transforms/GVN/rle.ll
@@ -0,0 +1,282 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+;; Trivial RLE test.
+define i32 @test0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+
+  %A = load i32* %P
+  ret i32 %A
+; CHECK: @test0
+; CHECK: ret i32 %V
+}
+
+
+;;===----------------------------------------------------------------------===;;
+;; Tests for crashers
+;;===----------------------------------------------------------------------===;;
+
+;; PR5016
+define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
+  store {i32, i32} %A, {i32, i32}* %P
+  %X = bitcast {i32, i32}* %P to i8*
+  %Y = load i8* %X
+  ret i8 %Y
+}
+
+
+;;===----------------------------------------------------------------------===;;
+;; Store -> Load  and  Load -> Load forwarding where src and dst are different
+;; types, but where the base pointer is a must alias.
+;;===----------------------------------------------------------------------===;;
+
+;; i32 -> f32 forwarding.
+define float @coerce_mustalias1(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to float*
+
+  %A = load float* %P2
+  ret float %A
+; CHECK: @coerce_mustalias1
+; CHECK-NOT: load
+; CHECK: ret float 
+}
+
+;; i32* -> float forwarding.
+define float @coerce_mustalias2(i32* %V, i32** %P) {
+  store i32* %V, i32** %P
+   
+  %P2 = bitcast i32** %P to float*
+
+  %A = load float* %P2
+  ret float %A
+; CHECK: @coerce_mustalias2
+; CHECK-NOT: load
+; CHECK: ret float 
+}
+
+;; float -> i32* forwarding.
+define i32* @coerce_mustalias3(float %V, float* %P) {
+  store float %V, float* %P
+   
+  %P2 = bitcast float* %P to i32**
+
+  %A = load i32** %P2
+  ret i32* %A
+; CHECK: @coerce_mustalias3
+; CHECK-NOT: load
+; CHECK: ret i32* 
+}
+
+;; i32 -> f32 load forwarding.
+define float @coerce_mustalias4(i32* %P, i1 %cond) {
+  %A = load i32* %P
+  
+  %P2 = bitcast i32* %P to float*
+  %B = load float* %P2
+  br i1 %cond, label %T, label %F
+T:
+  ret float %B
+  
+F:
+  %X = bitcast i32 %A to float
+  ret float %X
+
+; CHECK: @coerce_mustalias4
+; CHECK: %A = load i32* %P
+; CHECK-NOT: load
+; CHECK: ret float
+; CHECK: F:
+}
+
+;; i32 -> i8 forwarding
+define i8 @coerce_mustalias5(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to i8*
+
+  %A = load i8* %P2
+  ret i8 %A
+; CHECK: @coerce_mustalias5
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
+;; i64 -> float forwarding
+define float @coerce_mustalias6(i64 %V, i64* %P) {
+  store i64 %V, i64* %P
+   
+  %P2 = bitcast i64* %P to float*
+
+  %A = load float* %P2
+  ret float %A
+; CHECK: @coerce_mustalias6
+; CHECK-NOT: load
+; CHECK: ret float
+}
+
+;; i64 -> i8* (32-bit) forwarding
+define i8* @coerce_mustalias7(i64 %V, i64* %P) {
+  store i64 %V, i64* %P
+   
+  %P2 = bitcast i64* %P to i8**
+
+  %A = load i8** %P2
+  ret i8* %A
+; CHECK: @coerce_mustalias7
+; CHECK-NOT: load
+; CHECK: ret i8*
+}
+
+;; non-local i32/float -> i8 load forwarding.
+define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
+  %P2 = bitcast i32* %P to float*
+  %P3 = bitcast i32* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  store float 1.0, float* %P2
+  br label %Cont
+
+Cont:
+  %A = load i8* %P3
+  ret i8 %A
+
+; CHECK: @coerce_mustalias_nonlocal0
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+;; non-local i32/float -> i8 load forwarding.  This also tests that the "P3"
+;; bitcast equivalence can be properly phi translated.
+define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
+  %P2 = bitcast i32* %P to float*
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  store float 1.0, float* %P2
+  br label %Cont
+
+Cont:
+  %P3 = bitcast i32* %P to i8*
+  %A = load i8* %P3
+  ret i8 %A
+
+;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
+;; bootstrap, see r82411
+;
+; HECK: @coerce_mustalias_nonlocal1
+; HECK: Cont:
+; HECK:   %A = phi i8 [
+; HECK-NOT: load
+; HECK: ret i8 %A
+}
+
+
+;; non-local i32 -> i8 partial redundancy load forwarding.
+define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
+  %P3 = bitcast i32* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  br label %Cont
+
+Cont:
+  %A = load i8* %P3
+  ret i8 %A
+
+; CHECK: @coerce_mustalias_pre0
+; CHECK: F:
+; CHECK:   load i8* %P3
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+;;===----------------------------------------------------------------------===;;
+;; Store -> Load  and  Load -> Load forwarding where src and dst are different
+;; types, and the reload is an offset from the store pointer.
+;;===----------------------------------------------------------------------===;;
+
+;; i32 -> i8 forwarding.
+;; PR4216
+define i8 @coerce_offset0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to i8*
+  %P3 = getelementptr i8* %P2, i32 2
+
+  %A = load i8* %P3
+  ret i8 %A
+; CHECK: @coerce_offset0
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
+;; non-local i32/float -> i8 load forwarding.
+define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
+  %P2 = bitcast i32* %P to float*
+  %P3 = bitcast i32* %P to i8*
+  %P4 = getelementptr i8* %P3, i32 2
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  store float 1.0, float* %P2
+  br label %Cont
+
+Cont:
+  %A = load i8* %P4
+  ret i8 %A
+
+; CHECK: @coerce_offset_nonlocal0
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+
+;; non-local i32 -> i8 partial redundancy load forwarding.
+define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
+  %P3 = bitcast i32* %P to i8*
+  %P4 = getelementptr i8* %P3, i32 2
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  br label %Cont
+
+Cont:
+  %A = load i8* %P4
+  ret i8 %A
+
+; CHECK: @coerce_offset_pre0
+; CHECK: F:
+; CHECK:   load i8* %P4
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+
diff --git a/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll b/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll
index a79f6738f190..37356f25948d 100644
--- a/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll
+++ b/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globaldce
+; RUN: opt < %s -globaldce
 ;
 define internal void @func() {
         ret void
diff --git a/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll b/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll
index 2a398468beec..740f7201a348 100644
--- a/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll
+++ b/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globaldce
+; RUN: opt < %s -globaldce
 ;
 
 @X = global void ()* @func              ; <void ()**> [#uses=0]
diff --git a/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll b/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
index dac81a39990a..766c227460e6 100644
--- a/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
+++ b/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
@@ -1,7 +1,7 @@
 ; Make sure that functions are removed successfully if they are referred to by
 ; a global that is dead.  Make sure any globals they refer to die as well.
 
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | not grep foo
+; RUN: opt < %s -globaldce -S | not grep foo
 
 ;; Unused, kills %foo
 @b = internal global i32 ()* @foo               ; <i32 ()**> [#uses=0]
diff --git a/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll b/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
index d07f0140c46c..42fcb1e004a6 100644
--- a/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
+++ b/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
@@ -1,7 +1,7 @@
 ; This testcase tests that a worklist is being used, and that globals can be 
 ; removed if they are the subject of a constexpr and ConstantPointerRef
 
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | not grep global
+; RUN: opt < %s -globaldce -S | not grep global
 
 @t0 = internal global [4 x i8] c"foo\00"                ; <[4 x i8]*> [#uses=1]
 @t1 = internal global [4 x i8] c"bar\00"                ; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll b/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
index b49bfcc49403..6221fa3a62fe 100644
--- a/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
+++ b/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globaldce
+; RUN: opt < %s -globaldce
 
 ;; Should die when function %foo is killed
 @foo.upgrd.1 = internal global i32 7            ; <i32*> [#uses=3]
diff --git a/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll b/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
index c6c51df94cf3..738ec43aaeb1 100644
--- a/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
+++ b/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
@@ -1,5 +1,5 @@
 ; distilled from 255.vortex
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | not grep testfunc
+; RUN: opt < %s -globaldce -S | not grep testfunc
 
 declare i1 ()* @getfunc()
 
diff --git a/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll b/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
index 12d4736d2749..5b2c97f0f85e 100644
--- a/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
+++ b/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
@@ -1,6 +1,6 @@
 ; Weak variables should be preserved by global DCE!
 
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | grep @A
+; RUN: opt < %s -globaldce -S | grep @A
 
 
 @A = weak global i32 54
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 264b81dca032..6658cee12239 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | not grep @D
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | grep @L | count 3
+; RUN: opt < %s -globaldce -S | not grep @D
+; RUN: opt < %s -globaldce -S | grep @L | count 3
 
 @A = global i32 0
 @D = alias internal i32* @A
diff --git a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
index 8c15c51a4e56..68933c6ef376 100644
--- a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
+++ b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globaldce
+; RUN: opt < %s -globaldce
 
 @A = alias internal void ()* @F
 define internal void @F() { ret void }
diff --git a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll b/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
new file mode 100644
index 000000000000..29864f825285
--- /dev/null
+++ b/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
@@ -0,0 +1,264 @@
+; RUN: opt < %s -globaldce | llc -O0 -o /dev/null
+
+%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>" = type { i32 }
+%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+%struct.pthread_attr_t = type { i64, [48 x i8] }
+%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_init ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)* @pthread_mutexattr_settype ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_destroy ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
+
+define weak void @_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_(i8* %__t) {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !0)
+  tail call void @llvm.dbg.stoppoint(i32 240, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !0)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !8)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !8)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !11)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !11)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !12)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !12)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !13)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !13)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !14)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !14)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !15)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !15)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !16)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !16)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !17)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !17)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !18)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !18)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !19)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !19)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !20)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !20)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !21)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !21)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !22)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !22)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !23)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !23)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !24)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !24)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !25)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !25)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !26)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !26)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !27)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !27)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !28)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !28)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !29)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !29)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !30)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !30)
+  ret void
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+declare extern_weak i32 @pthread_once(i32*, void ()*)
+
+declare extern_weak i8* @pthread_getspecific(i32)
+
+declare extern_weak i32 @pthread_setspecific(i32, i8*)
+
+declare extern_weak i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare extern_weak i32 @pthread_cancel(i64)
+
+declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
+
+declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare extern_weak i32 @pthread_key_delete(i32)
+
+declare extern_weak i32 @pthread_mutexattr_init(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
+
+declare extern_weak i32 @pthread_mutexattr_settype(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)
+
+declare extern_weak i32 @pthread_mutexattr_destroy(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__aux_require_boolean_expr<bool>", metadata !"__aux_require_boolean_expr<bool>", metadata !"_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_", metadata !2, i32 239, metadata !3, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"concept-inst.cc", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/src/../../../../llvm-gcc.src/libstdc++-v3/src", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}
+!2 = metadata !{i32 458769, i32 0, i32 4, metadata !"boost_concept_check.h", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/include/bits", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 false, i1 true, metadata !"", i32 0}
+!3 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 458768, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6}
+!6 = metadata !{i32 458790, metadata !1, metadata !"", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !7}
+!7 = metadata !{i32 458788, metadata !1, metadata !"bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2}
+!8 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!9 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0}
+!10 = metadata !{null}
+!11 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!12 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!13 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!14 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!16 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!17 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!18 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!19 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!20 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!21 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!22 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!23 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!24 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!25 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!26 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!27 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!28 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!29 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!30 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
diff --git a/test/Transforms/GlobalDCE/basicvariabletest.ll b/test/Transforms/GlobalDCE/basicvariabletest.ll
index c883e58b84ce..a97b66de2c63 100644
--- a/test/Transforms/GlobalDCE/basicvariabletest.ll
+++ b/test/Transforms/GlobalDCE/basicvariabletest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | not grep global
+; RUN: opt < %s -globaldce -S | not grep global
 
 @X = external global i32
 @Y = internal global i32 7
diff --git a/test/Transforms/GlobalDCE/externally_available.ll b/test/Transforms/GlobalDCE/externally_available.ll
index ccdf7e198fab..cc88cb10dcc6 100644
--- a/test/Transforms/GlobalDCE/externally_available.ll
+++ b/test/Transforms/GlobalDCE/externally_available.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globaldce | llvm-dis | not grep test_
+; RUN: opt < %s -globaldce -S | not grep test_
 
 ; test_function should not be emitted to the .s file.
 define available_externally i32 @test_function() {
diff --git a/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll b/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
index 5f784e065fb1..bdcf1fa4778b 100644
--- a/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
+++ b/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt
+; RUN: opt < %s -globalopt
 
 @V = global float 1.200000e+01          ; <float*> [#uses=1]
 @G = internal global i32* null          ; <i32**> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll b/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
index 80f824392492..7bcb1d430918 100644
--- a/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
+++ b/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
 ; PR579
 
 @g_40507551 = internal global i16 31038         ; <i16*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/2005-09-27-Crash.ll b/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
index 5eac431d9c64..ab2077a43c65 100644
--- a/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
+++ b/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
         %RPyString = type { i32, %arraytype.Char }
         %arraytype.Char = type { i32, [0 x i8] }
         %arraytype.Signed = type { i32, [0 x i32] }
diff --git a/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll b/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
index 5559ef18b5b8..c9712198ce71 100644
--- a/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
+++ b/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
 ; PR820
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll b/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
index 39be6c40413b..352639ac067c 100644
--- a/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
+++ b/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
 
         %struct._list = type { i32*, %struct._list* }
         %struct._play = type { i32, i32*, %struct._list*, %struct._play* }
diff --git a/test/Transforms/GlobalOpt/2007-04-05-Crash.ll b/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
index 1991d90b8fa6..d306d1478247 100644
--- a/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
+++ b/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
 target triple = "thumb-apple-darwin8"
diff --git a/test/Transforms/GlobalOpt/2007-05-13-Crash.ll b/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
index 22fee779338c..57039093d1e4 100644
--- a/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
+++ b/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt  -globalopt -disable-output
+; RUN: opt < %s  -globalopt -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll b/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll
index 7394f047f4fe..7036c158ba67 100644
--- a/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll
+++ b/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
 ; PR1491
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll b/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
index 0d7d0364029d..442cb921d8a3 100644
--- a/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
+++ b/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -disable-output
+; RUN: opt < %s -globalopt -disable-output
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
         %struct.empty0 = type {  }
diff --git a/test/Transforms/GlobalOpt/2008-01-03-Crash.ll b/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
index 3d8c77db4f36..4105ab1ed5b9 100644
--- a/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
+++ b/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis
+; RUN: opt < %s -globalopt | llvm-dis
 ; PR1896
 
 @indirect1 = internal global void (i32)* null		; <void (i32)**> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
index aace34654976..82abc8fe546b 100644
--- a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
+++ b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {16 x .31 x double.. zeroinitializer}
+; RUN: opt < %s -globalopt -S | grep {16 x .31 x double.. zeroinitializer}
 
 ; The 'X' indices could be larger than 31.  Do not SROA the outer indices of this array.
 @mm = internal global [16 x [31 x double]] zeroinitializer, align 32
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index 779e7fbddb1f..0c817005c273 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {volatile load}
+; RUN: opt < %s -globalopt -S | grep {volatile load}
 @t0.1441 = internal global double 0x3FD5555555555555, align 8		; <double*> [#uses=1]
 
 define double @foo() nounwind  {
diff --git a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll b/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
index 59996c5a98bc..0e70c49adf14 100644
--- a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
+++ b/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep { nest } | count 1
+; RUN: opt < %s -globalopt -S | grep { nest } | count 1
 	%struct.FRAME.nest = type { i32, i32 (i32)* }
 	%struct.__builtin_trampoline = type { [10 x i8] }
 @.str = internal constant [7 x i8] c"%d %d\0A\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index 8a0b5b308f52..cfc9f302ff00 100644
--- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -2,9 +2,9 @@
 ; alignments.  Elements 0 and 2 must be 16-byte aligned, and element 
 ; 1 must be at least 8 byte aligned (but could be more). 
 
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@G.0 = internal global .*align 16}
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@G.1 = internal global .*align 8}
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@G.2 = internal global .*align 16}
+; RUN: opt < %s -globalopt -S | grep {@G.0 = internal global .*align 16}
+; RUN: opt < %s -globalopt -S | grep {@G.1 = internal global .*align 8}
+; RUN: opt < %s -globalopt -S | grep {@G.2 = internal global .*align 16}
 ; rdar://5891920
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index 735a84d6fcb5..5e64f807f6ea 100644
--- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -2,7 +2,7 @@
 ; values. This used to crash, because globalopt forgot to put the new var in the
 ; same address space as the old one.
 
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis > %t
+; RUN: opt < %s -globalopt -S > %t
 ; Check that the new global values still have their address space
 ; RUN: cat %t | grep global.*addrspace
 
diff --git a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
index cdc27714c79c..3242e1eed6a5 100644
--- a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
+++ b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis
+; RUN: opt < %s -globalopt | llvm-dis
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 	%struct.foo = type { i32, i32 }
diff --git a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
index f284840802e7..51dcac1f1a1d 100644
--- a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
+++ b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis
+; RUN: opt < %s -globalopt | llvm-dis
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 	%struct.foo = type { i32, i32 }
diff --git a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
index 03ec3b6a5255..c4b6e52e7127 100644
--- a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
+++ b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {phi.*@head}
+; RUN: opt < %s -globalopt -S | grep {phi.*@head}
 ; PR3321
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index e59c8df55700..a1b69efe1a76 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt
+; RUN: opt < %s -globalopt
 
 @g = external global i32
 
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index f042e59c19d4..5e639fd8222e 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {define void @a}
+; RUN: opt < %s -globalopt -S | grep {define void @a}
 
 define internal void @f() {
 	ret void
diff --git a/test/Transforms/GlobalOpt/2009-03-03-dbg.ll b/test/Transforms/GlobalOpt/2009-03-03-dbg.ll
index 1996f621912b..070f89f49213 100644
--- a/test/Transforms/GlobalOpt/2009-03-03-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-03-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep global_variable42
+; RUN: opt < %s -globalopt -S | not grep global_variable42
 ; XFAIL: *
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 30e9a63f3b46..a5f9ed39cac6 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -stats -disable-output |& grep "1 globalopt - Number of global vars shrunk to booleans"
+; RUN: opt < %s -globalopt -stats -disable-output |& grep "1 globalopt - Number of global vars shrunk to booleans"
 ; XFAIL: *
 
 	type { }		; type %0
diff --git a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
index ea13d293d5fa..62f75e123be7 100644
--- a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
+++ b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep internal | count 2
+; RUN: opt < %s -globalopt -S | grep internal | count 2
 
 global i32 0
 define i32* @1() {
diff --git a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
index 8dba5b1c213c..e024fc27ecb6 100644
--- a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
+++ b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@X = internal global i32}
+; RUN: opt < %s -globalopt -S | grep {@X = internal global i32}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 @X = internal global i32* null		; <i32**> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll b/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
index 0ce16ddb9e2b..abd31094bfe3 100644
--- a/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
+++ b/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt
+; RUN: opt < %s -globalopt
 
 	%struct.s_annealing_sched = type { i32, float, float, float, float }
 	%struct.s_bb = type { i32, i32, i32, i32 }
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 3cae4138fde7..845117987391 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis > %t
+; RUN: opt < %s -globalopt -S > %t
 ; RUN: cat %t | grep foo1 | count 1
 ; RUN: cat %t | grep foo2 | count 4
 ; RUN: cat %t | grep bar1 | count 1
diff --git a/test/Transforms/GlobalOpt/basictest.ll b/test/Transforms/GlobalOpt/basictest.ll
index 1c3bdbbc495c..4332d3dd38c2 100644
--- a/test/Transforms/GlobalOpt/basictest.ll
+++ b/test/Transforms/GlobalOpt/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep global
+; RUN: opt < %s -globalopt -S | not grep global
 
 @X = internal global i32 4              ; <i32*> [#uses=1]
 
diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
index 6fa139be2227..099c607509b2 100644
--- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll
+++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -globalopt | llvm-dis | \
+; RUN: opt < %s -instcombine -globalopt -S | \
 ; RUN:   grep {internal fastcc float @foo}
 
 define internal float @foo() {
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll b/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll
index 45fb45341cd2..5fe89ee09846 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep CTOR
+; RUN: opt < %s -globalopt -S | not grep CTOR
 @llvm.global_ctors = appending global [10 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR2 }, { i32, void ()* } { i32 65535, void ()* @CTOR3 }, { i32, void ()* } { i32 65535, void ()* @CTOR4 }, { i32, void ()* } { i32 65535, void ()* @CTOR5 }, { i32, void ()* } { i32 65535, void ()* @CTOR6 }, { i32, void ()* } { i32 65535, void ()* @CTOR7 }, { i32, void ()* } { i32 65535, void ()* @CTOR8 }, { i32, void ()* } { i32 2147483647, void ()* null } ]		; <[10 x { i32, void ()* }]*> [#uses=0]
 @G = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
@@ -56,9 +56,9 @@ define internal void @CTOR4() {
 }
 
 define internal void @CTOR5() {
-	%X.2p = getelementptr { i32, [2 x i32] }* @X, i32 0, i32 1, i32 0		; <i32*> [#uses=2]
+	%X.2p = getelementptr inbounds { i32, [2 x i32] }* @X, i32 0, i32 1, i32 0		; <i32*> [#uses=2]
 	%X.2 = load i32* %X.2p		; <i32> [#uses=1]
-	%X.1p = getelementptr { i32, [2 x i32] }* @X, i32 0, i32 0		; <i32*> [#uses=1]
+	%X.1p = getelementptr inbounds { i32, [2 x i32] }* @X, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 %X.2, i32* %X.1p
 	store i32 42, i32* %X.2p
 	ret void
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll b/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
new file mode 100644
index 000000000000..9b11985693a1
--- /dev/null
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; Don't get fooled by the inbounds keyword; it doesn't change
+; the computed address.
+
+; CHECK: @H = global i32 2
+; CHECK: @I = global i32 2
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR } ]
+@addr = external global i32
+@G = internal global [6 x [5 x i32]] zeroinitializer
+@H = global i32 80
+@I = global i32 90
+
+define internal void @CTOR() {
+  store i32 1, i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  store i32 2, i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  %t = load i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  store i32 %t, i32* @H
+  %s = load i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  store i32 %s, i32* @I
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt.ll b/test/Transforms/GlobalOpt/ctor-list-opt.ll
index 56aeadb38613..887e7ee643c5 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep CTOR
+; RUN: opt < %s -globalopt -S | not grep CTOR
 @llvm.global_ctors = appending global [10 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR2 }, { i32, void ()* } { i32 65535, void ()* @CTOR3 }, { i32, void ()* } { i32 65535, void ()* @CTOR4 }, { i32, void ()* } { i32 65535, void ()* @CTOR5 }, { i32, void ()* } { i32 65535, void ()* @CTOR6 }, { i32, void ()* } { i32 65535, void ()* @CTOR7 }, { i32, void ()* } { i32 65535, void ()* @CTOR8 }, { i32, void ()* } { i32 2147483647, void ()* null } ]		; <[10 x { i32, void ()* }]*> [#uses=0]
 @G = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
@@ -43,9 +43,9 @@ define internal void @CTOR4() {
 }
 
 define internal void @CTOR5() {
-	%X.2p = getelementptr { i32, [2 x i32] }* @X, i32 0, i32 1, i32 0		; <i32*> [#uses=2]
+	%X.2p = getelementptr inbounds { i32, [2 x i32] }* @X, i32 0, i32 1, i32 0		; <i32*> [#uses=2]
 	%X.2 = load i32* %X.2p		; <i32> [#uses=1]
-	%X.1p = getelementptr { i32, [2 x i32] }* @X, i32 0, i32 0		; <i32*> [#uses=1]
+	%X.1p = getelementptr inbounds { i32, [2 x i32] }* @X, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 %X.2, i32* %X.1p
 	store i32 42, i32* %X.2p
 	ret void
diff --git a/test/Transforms/GlobalOpt/deadglobal-2.ll b/test/Transforms/GlobalOpt/deadglobal-2.ll
index def6a09d2781..4f8181983094 100644
--- a/test/Transforms/GlobalOpt/deadglobal-2.ll
+++ b/test/Transforms/GlobalOpt/deadglobal-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep internal
+; RUN: opt < %s -globalopt -S | not grep internal
 
 ; This is a harder case to delete as the GEP has a variable index.
 
diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll
index 7a47b8ea30bb..c8d8e7674d62 100644
--- a/test/Transforms/GlobalOpt/deadglobal.ll
+++ b/test/Transforms/GlobalOpt/deadglobal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep internal
+; RUN: opt < %s -globalopt -S | not grep internal
 
 @G = internal global i32 123            ; <i32*> [#uses=1]
 
diff --git a/test/Transforms/GlobalOpt/globalsra-partial.ll b/test/Transforms/GlobalOpt/globalsra-partial.ll
index e52e7feab169..9a068e948941 100644
--- a/test/Transforms/GlobalOpt/globalsra-partial.ll
+++ b/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -1,6 +1,6 @@
 ; In this case, the global can only be broken up by one level.
 
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep 12345
+; RUN: opt < %s -globalopt -S | not grep 12345
 
 @G = internal global { i32, [4 x float] } zeroinitializer               ; <{ i32, [4 x float] }*> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
new file mode 100644
index 000000000000..9397a1236551
--- /dev/null
+++ b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -globalopt -S > %t
+; RUN: grep {@Y = internal global \\\[3 x \[%\]struct.X\\\] zeroinitializer} %t
+; RUN: grep load %t | count 6
+; RUN: grep {add i32 \[%\]a, \[%\]b} %t | count 3
+
+; globalopt should not sra the global, because it can't see the index.
+
+%struct.X = type { [3 x i32], [3 x i32] }
+
+@Y = internal global [3 x %struct.X] zeroinitializer
+
+@addr = external global i8
+
+define void @frob() {
+  store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 ptrtoint (i8* @addr to i64)), align 4
+  ret void
+}
+define i32 @borf(i64 %i, i64 %j) {
+  %p = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0
+  %a = load i32* %p
+  %q = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0
+  %b = load i32* %q
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+define i32 @borg(i64 %i, i64 %j) {
+  %p = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1
+  %a = load i32* %p
+  %q = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1
+  %b = load i32* %q
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+define i32 @borh(i64 %i, i64 %j) {
+  %p = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2
+  %a = load i32* %p
+  %q = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2
+  %b = load i32* %q
+  %c = add i32 %a, %b
+  ret i32 %c
+}
diff --git a/test/Transforms/GlobalOpt/globalsra.ll b/test/Transforms/GlobalOpt/globalsra.ll
index 36235552ae82..276ca64d7869 100644
--- a/test/Transforms/GlobalOpt/globalsra.ll
+++ b/test/Transforms/GlobalOpt/globalsra.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep global
+; RUN: opt < %s -globalopt -S | not grep global
 
 @G = internal global { i32, float, { double } } {
     i32 1, 
diff --git a/test/Transforms/GlobalOpt/heap-sra-1.ll b/test/Transforms/GlobalOpt/heap-sra-1.ll
index 37e6d477a402..6df559e45698 100644
--- a/test/Transforms/GlobalOpt/heap-sra-1.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@X.f0}
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@X.f1}
+; RUN: opt < %s -globalopt -S | grep {@X.f0}
+; RUN: opt < %s -globalopt -S | grep {@X.f1}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
diff --git a/test/Transforms/GlobalOpt/heap-sra-2.ll b/test/Transforms/GlobalOpt/heap-sra-2.ll
index 98f4a5022749..5a3c3cd1c057 100644
--- a/test/Transforms/GlobalOpt/heap-sra-2.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@X.f0}
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@X.f1}
+; RUN: opt < %s -globalopt -S | grep {@X.f0}
+; RUN: opt < %s -globalopt -S | grep {@X.f1}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/Transforms/GlobalOpt/heap-sra-phi.ll b/test/Transforms/GlobalOpt/heap-sra-phi.ll
index 5f46a77f6b7c..2eba944cfda2 100644
--- a/test/Transforms/GlobalOpt/heap-sra-phi.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-phi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {tmp.f1 = phi i32. }
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {tmp.f0 = phi i32. }
+; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. }
+; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. }
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 9fbbe357617d..59403b18d9d7 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt -instcombine | \
+; RUN: opt < %s -globalopt -instcombine | \
 ; RUN:    llvm-dis | grep {ret i1 true}
 
 ;; check that global opt turns integers that only hold 0 or 1 into bools.
diff --git a/test/Transforms/GlobalOpt/iterate.ll b/test/Transforms/GlobalOpt/iterate.ll
index d764c5d31b4c..746687427590 100644
--- a/test/Transforms/GlobalOpt/iterate.ll
+++ b/test/Transforms/GlobalOpt/iterate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep %G
+; RUN: opt < %s -globalopt -S | not grep %G
 
 @G = internal global i32 0              ; <i32*> [#uses=1]
 @H = internal global { i32* } { i32* @G }               ; <{ i32* }*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/load-store-global.ll b/test/Transforms/GlobalOpt/load-store-global.ll
index d89d2263955c..f824b2c11cbf 100644
--- a/test/Transforms/GlobalOpt/load-store-global.ll
+++ b/test/Transforms/GlobalOpt/load-store-global.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep G
+; RUN: opt < %s -globalopt -S | not grep G
 
 @G = internal global i32 17             ; <i32*> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/malloc-promote-1.ll b/test/Transforms/GlobalOpt/malloc-promote-1.ll
index fea4dbdd65c3..5d4696f71b1a 100644
--- a/test/Transforms/GlobalOpt/malloc-promote-1.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep global
+; RUN: opt < %s -globalopt -S | not grep global
 
 @G = internal global i32* null          ; <i32**> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/malloc-promote-2.ll b/test/Transforms/GlobalOpt/malloc-promote-2.ll
index 1b3363295421..0d03835cf530 100644
--- a/test/Transforms/GlobalOpt/malloc-promote-2.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep malloc
+; RUN: opt < %s -globalopt -S | not grep malloc
 
 @G = internal global i32* null          ; <i32**> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/malloc-promote-3.ll b/test/Transforms/GlobalOpt/malloc-promote-3.ll
index 26ce8fd6f855..d4ee4e861c2d 100644
--- a/test/Transforms/GlobalOpt/malloc-promote-3.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep malloc
+; RUN: opt < %s -globalopt -S | not grep malloc
 
 @G = internal global i32* null          ; <i32**> [#uses=4]
 
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index b51aedb13fbb..335f5ec3a368 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | \
+; RUN: opt < %s -globalopt -S | \
 ; RUN:   grep {G1 = internal constant}
 
 @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/memset.ll b/test/Transforms/GlobalOpt/memset.ll
index 0ee851f40aef..a9b9d5e6bdcb 100644
--- a/test/Transforms/GlobalOpt/memset.ll
+++ b/test/Transforms/GlobalOpt/memset.ll
@@ -1,6 +1,6 @@
 ; both globals are write only, delete them.
 
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | \
+; RUN: opt < %s -globalopt -S | \
 ; RUN:   not grep internal
 
 @G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/phi-select.ll b/test/Transforms/GlobalOpt/phi-select.ll
index da1314f76199..cd8a7dc990ff 100644
--- a/test/Transforms/GlobalOpt/phi-select.ll
+++ b/test/Transforms/GlobalOpt/phi-select.ll
@@ -1,7 +1,7 @@
 ; Test that PHI nodes and select instructions do not necessarily make stuff
 ; non-constant.
 
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep global
+; RUN: opt < %s -globalopt -S | not grep global
 
 @X = internal global i32 4              ; <i32*> [#uses=2]
 @Y = internal global i32 5              ; <i32*> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/storepointer-compare.ll b/test/Transforms/GlobalOpt/storepointer-compare.ll
index 86eff5172526..2f5ae869b7c2 100644
--- a/test/Transforms/GlobalOpt/storepointer-compare.ll
+++ b/test/Transforms/GlobalOpt/storepointer-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | \
+; RUN: opt < %s -globalopt -S | \
 ; RUN:   grep {call void @Actual}
 
 ; Check that a comparison does not prevent an indirect call from being made 
diff --git a/test/Transforms/GlobalOpt/storepointer.ll b/test/Transforms/GlobalOpt/storepointer.ll
index d57a6ac4fad9..8019076f9463 100644
--- a/test/Transforms/GlobalOpt/storepointer.ll
+++ b/test/Transforms/GlobalOpt/storepointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep global
+; RUN: opt < %s -globalopt -S | not grep global
 
 @G = internal global void ()* null              ; <void ()**> [#uses=2]
 
diff --git a/test/Transforms/GlobalOpt/trivialstore.ll b/test/Transforms/GlobalOpt/trivialstore.ll
index a662565cd24f..21437f33b26d 100644
--- a/test/Transforms/GlobalOpt/trivialstore.ll
+++ b/test/Transforms/GlobalOpt/trivialstore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep G
+; RUN: opt < %s -globalopt -S | not grep G
 
 @G = internal global i32 17             ; <i32*> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/undef-init.ll b/test/Transforms/GlobalOpt/undef-init.ll
index bb483fc37690..c14949739508 100644
--- a/test/Transforms/GlobalOpt/undef-init.ll
+++ b/test/Transforms/GlobalOpt/undef-init.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalopt | llvm-dis | not grep store
+; RUN: opt < %s -globalopt -S | not grep store
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__Z3foov } ]          ; <[1 x { i32, void ()* }]*> [#uses=0]
 @X.0 = internal global i32 undef                ; <i32*> [#uses=2]
diff --git a/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll b/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
index fc7e3bba38a5..66403363bf93 100644
--- a/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
+++ b/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipconstprop | llvm-dis | grep {ret i32 %r}
+; RUN: opt < %s -ipconstprop -S | grep {ret i32 %r}
 ; Should not propagate the result of a weak function.
 ; PR2411
 
diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
new file mode 100644
index 000000000000..f4bab353cd07
--- /dev/null
+++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as <%s | opt -ipsccp | llvm-dis | FileCheck %s
+; Don't constant-propagate byval pointers, since they are not pointers!
+; PR5038
+%struct.MYstr = type { i8, i32 }
+@mystr = internal global %struct.MYstr zeroinitializer ; <%struct.MYstr*> [#uses=3]
+define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind {
+entry:
+  %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
+  store i32 99, i32* %0, align 4
+; CHECK: %struct.MYstr* %u
+  %1 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
+  store i8 97, i8* %1, align 4
+; CHECK: %struct.MYstr* %u
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
+entry:
+  %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
+  %1 = load i32* %0
+; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) ; <i32> [#uses=1]
+  %2 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
+  %3 = load i8* %2
+; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) ; <i8> [#uses=1]
+  %4 = zext i8 %3 to i32
+  %5 = add i32 %4, %1
+  ret i32 %5
+}
+
+define i32 @unions() nounwind {
+entry:
+  call void @vfu1(%struct.MYstr* byval align 4 @mystr) nounwind
+  %result = call i32 @vfu2(%struct.MYstr* byval align 4 @mystr) nounwind
+
+  ret i32 %result
+}
+
diff --git a/test/Transforms/IPConstantProp/deadarg.ll b/test/Transforms/IPConstantProp/deadarg.ll
index 18b8f7da8443..4b9938e09e90 100644
--- a/test/Transforms/IPConstantProp/deadarg.ll
+++ b/test/Transforms/IPConstantProp/deadarg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipconstprop -disable-output
+; RUN: opt < %s -ipconstprop -disable-output
 define internal void @foo(i32 %X) {
         call void @foo( i32 %X )
         ret void
diff --git a/test/Transforms/IPConstantProp/recursion.ll b/test/Transforms/IPConstantProp/recursion.ll
index 27c381af3ab3..b25a6c081811 100644
--- a/test/Transforms/IPConstantProp/recursion.ll
+++ b/test/Transforms/IPConstantProp/recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipconstprop -deadargelim | llvm-dis | not grep %X
+; RUN: opt < %s -ipconstprop -deadargelim -S | not grep %X
 define internal i32 @foo(i32 %X) {
         %Y = call i32 @foo( i32 %X )            ; <i32> [#uses=1]
         %Z = add i32 %Y, 1              ; <i32> [#uses=1]
diff --git a/test/Transforms/IPConstantProp/return-argument.ll b/test/Transforms/IPConstantProp/return-argument.ll
index f8605107bdce..0223453fa177 100644
--- a/test/Transforms/IPConstantProp/return-argument.ll
+++ b/test/Transforms/IPConstantProp/return-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipconstprop | llvm-dis > %t
+; RUN: opt < %s -ipconstprop -S > %t
 ; RUN: cat %t | grep {store i32 %Z, i32\\* %Q}
 ; RUN: cat %t | grep {add i32 1, 3}
 
diff --git a/test/Transforms/IPConstantProp/return-constant.ll b/test/Transforms/IPConstantProp/return-constant.ll
index 381ef63cc88e..b25585952873 100644
--- a/test/Transforms/IPConstantProp/return-constant.ll
+++ b/test/Transforms/IPConstantProp/return-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipconstprop -instcombine | \
+; RUN: opt < %s -ipconstprop -instcombine | \
 ; RUN:    llvm-dis | grep {ret i1 true} | count 2
 define internal i32 @foo(i1 %C) {
         br i1 %C, label %T, label %F
diff --git a/test/Transforms/IPConstantProp/return-constants.ll b/test/Transforms/IPConstantProp/return-constants.ll
index 7205c2820a71..79220dd1f53d 100644
--- a/test/Transforms/IPConstantProp/return-constants.ll
+++ b/test/Transforms/IPConstantProp/return-constants.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipconstprop | llvm-dis > %t
+; RUN: opt < %s -ipconstprop -S > %t
 ;; Check that the 21 constants got propagated properly
 ; RUN: cat %t | grep {%M = add i32 21, 21}
 ;; Check that the second return values didn't get propagated
diff --git a/test/Transforms/IndMemRem/2009-01-24-Noalias.ll b/test/Transforms/IndMemRem/2009-01-24-Noalias.ll
index bc3d0bfe5e3f..b6d98031b732 100644
--- a/test/Transforms/IndMemRem/2009-01-24-Noalias.ll
+++ b/test/Transforms/IndMemRem/2009-01-24-Noalias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indmemrem | llvm-dis | grep bounce | grep noalias
+; RUN: opt < %s -indmemrem -S | grep bounce | grep noalias
 
 declare i8* @malloc(i32)
 
diff --git a/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll b/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll
index 228772eb82cc..92911ae078c3 100644
--- a/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll
+++ b/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll
@@ -1,7 +1,7 @@
 ; Induction variable pass is doing bad things with pointer induction vars, 
 ; trying to do arithmetic on them directly.
 ;
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ;
 define void @test(i32 %A, i32 %S, i8* %S.upgrd.1) {
 ; <label>:0
diff --git a/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll b/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll
index 2487ea95e27d..38fa112bdb5e 100644
--- a/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll
+++ b/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll
@@ -1,7 +1,7 @@
 ; This is a test case for the expression analysis code, not really indvars.
 ; It was assuming any constant of int type was a ConstantInteger.
 ;
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 
 @X = global i32 7               ; <i32*> [#uses=1]
 
diff --git a/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll b/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll
index 96190991f73a..36d50065d370 100644
--- a/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep indvar
+; RUN: opt < %s -indvars -S | grep indvar
 
 define i32 @test() {
 ; <label>:0
diff --git a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
index b4a2c5061894..150ae70a8262 100644
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | %prcontext ^Loop: 1 | grep %Canonical
+; RUN: opt -S -indvars %s | FileCheck %s
 
 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
@@ -8,6 +8,8 @@ define i32 @test() {
         br label %Loop
 
 Loop:           ; preds = %Loop, %0
+; CHECK: Loop:
+; CHECK-NEXT: Canonical
         %NonIndvar = phi i32 [ 200, %0 ], [ %NonIndvarNext, %Loop ]             ; <i32> [#uses=1]
         %Canonical = phi i32 [ 0, %0 ], [ %CanonicalNext, %Loop ]               ; <i32> [#uses=2]
         store i32 %Canonical, i32* null
diff --git a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll b/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll
index bd9d1ef5cb9d..c8f97e39bef6 100644
--- a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll
+++ b/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll
@@ -1,7 +1,7 @@
 ; The induction variable canonicalization pass shouldn't leave dead
 ; instructions laying around!
 ;
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | \
+; RUN: opt < %s -indvars -S | \
 ; RUN:   not grep {#uses=0}
 
 define i32 @mul(i32 %x, i32 %y) {
diff --git a/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll b/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll
index 11af997e83bd..70ea11ebf04f 100644
--- a/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll
+++ b/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 define void @test() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll b/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll
index b964d78d6952..5aa2d90a42b9 100644
--- a/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll
+++ b/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output 
+; RUN: opt < %s -indvars -disable-output 
 define void @_ZN17CoinFactorization7cleanupEv() {
 entry:
         br i1 false, label %loopexit.14, label %cond_continue.3
diff --git a/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll b/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll
index cf8c80472dfe..0fc9c8547d9d 100644
--- a/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll
+++ b/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep indvar | not grep i32
+; RUN: opt < %s -indvars -S | grep indvar | not grep i32
 
 @G = global i64 0               ; <i64*> [#uses=1]
 
diff --git a/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll b/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll
index 248b29b32d2c..c49819e27af4 100644
--- a/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll
+++ b/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 define void @test() {
         br label %endif.0.i
diff --git a/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll b/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
index 54d85472a891..1ed4c44d2650 100644
--- a/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
+++ b/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; ModuleID = '2004-04-05-InvokeCastCrash.ll'
 	%struct.__false_type = type { i8 }
 	%"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>" = type { %"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>"*, %"struct.llvm::Constant"* }
diff --git a/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll b/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll
index b4eb3db19095..ec1218bb86dd 100644
--- a/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll
+++ b/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 define void @.outPlank_21() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll b/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll
index afee7e189180..aee67ccacc5f 100644
--- a/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll
+++ b/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 define void @_ZN5ArrayISt7complexIdEEC2ERK10dim_vector() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll b/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
index a1beec646891..70a7a9de05f0 100644
--- a/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
+++ b/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 declare void @q_atomic_increment()
 
diff --git a/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll b/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
index 853d5ad50146..1ba69826faf9 100644
--- a/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
+++ b/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | \
+; RUN: opt < %s -indvars -S | \
 ; RUN:   grep {ret i32 152}
 
 define i32 @main() {
diff --git a/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll b/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll
index ba4db9f95995..0862f1131b5a 100644
--- a/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll
+++ b/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 define void @main() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll b/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll
index 5ee8cea74a38..f9a3fe6233a5 100644
--- a/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll
+++ b/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 @fixtab = external global [29 x [29 x [2 x i32]]]               ; <[29 x [29 x [2 x i32]]]*> [#uses=1]
 
diff --git a/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll b/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
index 32abee9d1cc8..1bbc63108086 100644
--- a/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
+++ b/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
@@ -1,5 +1,5 @@
 ; PR726
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | \
+; RUN: opt < %s -indvars -S | \
 ; RUN:   grep {ret i32 27}
 
 ; Make sure to compute the right exit value based on negative strides.
diff --git a/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll b/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
index 986831b9a6e4..36ec2b81ba73 100644
--- a/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
+++ b/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 define void @get_block() {
 endif.0:
diff --git a/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll b/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll
index 6a478ab5f5fd..787c9b07bdcd 100644
--- a/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll
+++ b/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; ModuleID = '2006-09-20-LFTR-Crash.ll'
 	%struct.p7prior_s = type { i32, i32, [200 x float], [200 x [7 x float]], i32, [200 x float], [200 x [20 x float]], i32, [200 x float], [200 x [20 x float]] }
 
diff --git a/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll b/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
index b2f8258000ff..79ac1f072de6 100644
--- a/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
+++ b/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
 	%struct.vorbis_dsp_state = type { i32, %struct.vorbis_info*, float**, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
diff --git a/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll b/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
index dd151e84d00e..268b8d1a7e47 100644
--- a/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
+++ b/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
@@ -1,5 +1,5 @@
 ; PR1015
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {ret i32 0}
+; RUN: opt < %s -indvars -S | not grep {ret i32 0}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll b/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll
index 7078494a9295..de226a140ad1 100644
--- a/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll
+++ b/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {(%rdi,%rax,8)}
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep {addq.*8}
+; RUN: llc %s -o - -march=x86-64 | grep {(%rdi,%rax,8)}
+; RUN: llc %s -o - -march=x86-64 | not grep {addq.*8}
 
 define void @foo(double* %y) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll b/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll
index 363c98c493bf..fc7d6335910d 100644
--- a/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll
+++ b/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; PR1487
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
diff --git a/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll b/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll
index 555cadda6de3..cad4eb155ce9 100644
--- a/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll
+++ b/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; PR1814
 target datalayout = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"
 
diff --git a/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll b/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll
index aac8d9789464..77235d2888e5 100644
--- a/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll
+++ b/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; PR2434
 
 define fastcc void @regcppop() nounwind  {
diff --git a/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll b/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
index 8111cbe3a489..288431aa8bcf 100644
--- a/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
+++ b/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep sext | count 1
+; RUN: opt < %s -indvars -S | grep sext | count 1
 ; ModuleID = '<stdin>'
 
 	%struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>
diff --git a/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll b/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
index c78188d4d223..23e788496138 100644
--- a/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
+++ b/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; PR2857
 
 @foo = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
index be8b36fac6ad..7b4032b2eba9 100644
--- a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep icmp | count 4
+; RUN: opt < %s -indvars -S | grep icmp | count 4
 define void @bar() nounwind {
 entry:
 	br label %bb
diff --git a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
index c947d3bc3338..311d3daa8f32 100644
--- a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep icmp | count 2
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep sitofp | count 1
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep uitofp | count 1
+; RUN: opt < %s -indvars -S | grep icmp | count 2
+; RUN: opt < %s -indvars -S | grep sitofp | count 1
+; RUN: opt < %s -indvars -S | grep uitofp | count 1
 
 define void @bar() nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll b/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
index e611b1fd8341..39b97af86fa5 100644
--- a/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 
 define void @t() nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
index 134c9c74d3a9..37ad63a9a772 100644
--- a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {sext}
+; RUN: opt < %s -indvars -S | not grep {sext}
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.6"
diff --git a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
index 4d26803b437d..803b540606e5 100644
--- a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -instcombine | llvm-dis | not grep {\[sz\]ext}
+; RUN: opt < %s -indvars -instcombine -S | not grep {\[sz\]ext}
 ; ModuleID = '<stdin>'
 ;extern int *a, *b, *c, *d, *e, *f;  /* 64 bit */
 ;extern int K[256];
diff --git a/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll b/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll
index f39458f17968..24074bf7271d 100644
--- a/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; rdar://6817574
 
 define i32 @t1() nounwind ssp {
diff --git a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
index e70d577e0a9a..9fd2d2f04f72 100644
--- a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep icmp | grep next
+; RUN: opt < %s -indvars -S | grep icmp | grep next
 ; PR4086
 declare void @foo()
 
diff --git a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
index ecbb23192e92..9ad86913e22f 100644
--- a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
+++ b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; PR4258
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-linux-gnu"
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll
index 56325b36cbbe..436840ae9075 100644
--- a/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep phi %t | count 4
 ; RUN: grep {= phi i32} %t | count 4
 ; RUN: not grep {sext i} %t
diff --git a/test/Transforms/IndVarSimplify/addrec-gep.ll b/test/Transforms/IndVarSimplify/addrec-gep.ll
index 132d4f8a871c..9e4273491e40 100644
--- a/test/Transforms/IndVarSimplify/addrec-gep.ll
+++ b/test/Transforms/IndVarSimplify/addrec-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep getelementptr %t | count 1
 ; RUN: grep {mul .*, 37}  %t | count 1
 ; RUN: grep {add .*, 5203}  %t | count 1
diff --git a/test/Transforms/IndVarSimplify/ashr-tripcount.ll b/test/Transforms/IndVarSimplify/ashr-tripcount.ll
index 7b2cad28bf07..baaefdc2bc51 100644
--- a/test/Transforms/IndVarSimplify/ashr-tripcount.ll
+++ b/test/Transforms/IndVarSimplify/ashr-tripcount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep sext %t | count 1
 
 ; Indvars should be able to eliminate all of the sign extensions
diff --git a/test/Transforms/IndVarSimplify/avoid-i0.ll b/test/Transforms/IndVarSimplify/avoid-i0.ll
index 3456bd3d6c9c..d110a8a7ba97 100644
--- a/test/Transforms/IndVarSimplify/avoid-i0.ll
+++ b/test/Transforms/IndVarSimplify/avoid-i0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; PR4052
 ; PR4054
 
diff --git a/test/Transforms/IndVarSimplify/casted-argument.ll b/test/Transforms/IndVarSimplify/casted-argument.ll
index 6d7aaa9161ad..dfefe1dc5bbe 100644
--- a/test/Transforms/IndVarSimplify/casted-argument.ll
+++ b/test/Transforms/IndVarSimplify/casted-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 ; PR4009
 ; PR4038
 
diff --git a/test/Transforms/IndVarSimplify/complex-scev.ll b/test/Transforms/IndVarSimplify/complex-scev.ll
index 4bfc4e981af5..434c4ec99525 100644
--- a/test/Transforms/IndVarSimplify/complex-scev.ll
+++ b/test/Transforms/IndVarSimplify/complex-scev.ll
@@ -1,7 +1,7 @@
 ; The i induction variable looks like a wrap-around, but it really is just
 ; a simple affine IV.  Make sure that indvars eliminates it.
 
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep phi | count 1
+; RUN: opt < %s -indvars -S | grep phi | count 1
 
 define void @foo() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/divide-pointer.ll b/test/Transforms/IndVarSimplify/divide-pointer.ll
index 747d47eb8690..16608ee8280e 100644
--- a/test/Transforms/IndVarSimplify/divide-pointer.ll
+++ b/test/Transforms/IndVarSimplify/divide-pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; PR4271
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/IndVarSimplify/exit_value_tests.ll b/test/Transforms/IndVarSimplify/exit_value_tests.ll
index b39f40f48664..737e733375fd 100644
--- a/test/Transforms/IndVarSimplify/exit_value_tests.ll
+++ b/test/Transforms/IndVarSimplify/exit_value_tests.ll
@@ -2,7 +2,7 @@
 ; these loops all have predictable exit values we can replace the use outside
 ; of the loop with a closed-form computation, making the loop dead.
 ;
-; RUN: llvm-as < %s | opt -indvars -loop-deletion -simplifycfg | \
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg | \
 ; RUN:   llvm-dis | not grep br
 
 define i32 @polynomial_constant() {
diff --git a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
index e63c88c65e68..780959407630 100644
--- a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
+++ b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep add %t | count 8
 ; RUN: grep mul %t | count 7
 
diff --git a/test/Transforms/IndVarSimplify/interesting-invoke-use.ll b/test/Transforms/IndVarSimplify/interesting-invoke-use.ll
index b90d1e0ecf35..8adc0e525777 100644
--- a/test/Transforms/IndVarSimplify/interesting-invoke-use.ll
+++ b/test/Transforms/IndVarSimplify/interesting-invoke-use.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 
 ; An invoke has a result value which is used in an "Interesting"
 ; expression inside the loop. IndVars should be able to rewrite
diff --git a/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll b/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll
index 747c781e993f..02145d1c5e7a 100644
--- a/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll
+++ b/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 ; ModuleID = 'testcase.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/IndVarSimplify/iv-sext.ll b/test/Transforms/IndVarSimplify/iv-sext.ll
new file mode 100644
index 000000000000..55165022109a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/iv-sext.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {= sext} %t | count 4
+; RUN: grep {phi i64} %t | count 2
+
+; Indvars should be able to promote the hiPart induction variable in the
+; inner loop to i64.
+; TODO: it should promote hiPart to i64 in the outer loop too.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
+entry:
+	%tmp = load float* %peakWeight, align 4		; <float> [#uses=1]
+	%tmp2 = icmp sgt i32 %bandEdgeIndex, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %bb.nph22, label %return
+
+bb.nph22:		; preds = %entry
+	%tmp3 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=2]
+	br label %bb
+
+bb:		; preds = %bb8, %bb.nph22
+	%distERBhi.121 = phi float [ %distERBhi.2.lcssa, %bb8 ], [ 0.000000e+00, %bb.nph22 ]		; <float> [#uses=2]
+	%distERBlo.120 = phi float [ %distERBlo.0.lcssa, %bb8 ], [ 0.000000e+00, %bb.nph22 ]		; <float> [#uses=2]
+	%hiPart.119 = phi i32 [ %hiPart.0.lcssa, %bb8 ], [ 0, %bb.nph22 ]		; <i32> [#uses=3]
+	%loPart.118 = phi i32 [ %loPart.0.lcssa, %bb8 ], [ 0, %bb.nph22 ]		; <i32> [#uses=2]
+	%peakCount.117 = phi float [ %peakCount.2.lcssa, %bb8 ], [ %tmp, %bb.nph22 ]		; <float> [#uses=2]
+	%part.016 = phi i32 [ %tmp46, %bb8 ], [ 0, %bb.nph22 ]		; <i32> [#uses=5]
+	%tmp4 = icmp sgt i32 %part.016, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb1, label %bb3.preheader
+
+bb1:		; preds = %bb
+	%tmp5 = add i32 %part.016, -1		; <i32> [#uses=1]
+	%tmp6 = sext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr float* %pTmp1, i64 %tmp6		; <float*> [#uses=1]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=1]
+	%tmp9 = fadd float %tmp8, %distERBlo.120		; <float> [#uses=1]
+	%tmp10 = add i32 %part.016, -1		; <i32> [#uses=1]
+	%tmp11 = sext i32 %tmp10 to i64		; <i64> [#uses=1]
+	%tmp12 = getelementptr float* %pTmp1, i64 %tmp11		; <float*> [#uses=1]
+	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
+	%tmp14 = fsub float %distERBhi.121, %tmp13		; <float> [#uses=1]
+	br label %bb3.preheader
+
+bb3.preheader:		; preds = %bb1, %bb
+	%distERBlo.0.ph = phi float [ %distERBlo.120, %bb ], [ %tmp9, %bb1 ]		; <float> [#uses=3]
+	%distERBhi.0.ph = phi float [ %distERBhi.121, %bb ], [ %tmp14, %bb1 ]		; <float> [#uses=3]
+	%tmp15 = fcmp ogt float %distERBlo.0.ph, 2.500000e+00		; <i1> [#uses=1]
+	br i1 %tmp15, label %bb.nph, label %bb5.preheader
+
+bb.nph:		; preds = %bb3.preheader
+	br label %bb2
+
+bb2:		; preds = %bb3, %bb.nph
+	%distERBlo.03 = phi float [ %tmp19, %bb3 ], [ %distERBlo.0.ph, %bb.nph ]		; <float> [#uses=1]
+	%loPart.02 = phi i32 [ %tmp24, %bb3 ], [ %loPart.118, %bb.nph ]		; <i32> [#uses=3]
+	%peakCount.01 = phi float [ %tmp23, %bb3 ], [ %peakCount.117, %bb.nph ]		; <float> [#uses=1]
+	%tmp16 = sext i32 %loPart.02 to i64		; <i64> [#uses=1]
+	%tmp17 = getelementptr float* %pTmp1, i64 %tmp16		; <float*> [#uses=1]
+	%tmp18 = load float* %tmp17, align 4		; <float> [#uses=1]
+	%tmp19 = fsub float %distERBlo.03, %tmp18		; <float> [#uses=3]
+	%tmp20 = sext i32 %loPart.02 to i64		; <i64> [#uses=1]
+	%tmp21 = getelementptr float* %peakWeight, i64 %tmp20		; <float*> [#uses=1]
+	%tmp22 = load float* %tmp21, align 4		; <float> [#uses=1]
+	%tmp23 = fsub float %peakCount.01, %tmp22		; <float> [#uses=2]
+	%tmp24 = add i32 %loPart.02, 1		; <i32> [#uses=2]
+	br label %bb3
+
+bb3:		; preds = %bb2
+	%tmp25 = fcmp ogt float %tmp19, 2.500000e+00		; <i1> [#uses=1]
+	br i1 %tmp25, label %bb2, label %bb3.bb5.preheader_crit_edge
+
+bb3.bb5.preheader_crit_edge:		; preds = %bb3
+	%tmp24.lcssa = phi i32 [ %tmp24, %bb3 ]		; <i32> [#uses=1]
+	%tmp23.lcssa = phi float [ %tmp23, %bb3 ]		; <float> [#uses=1]
+	%tmp19.lcssa = phi float [ %tmp19, %bb3 ]		; <float> [#uses=1]
+	br label %bb5.preheader
+
+bb5.preheader:		; preds = %bb3.bb5.preheader_crit_edge, %bb3.preheader
+	%distERBlo.0.lcssa = phi float [ %tmp19.lcssa, %bb3.bb5.preheader_crit_edge ], [ %distERBlo.0.ph, %bb3.preheader ]		; <float> [#uses=2]
+	%loPart.0.lcssa = phi i32 [ %tmp24.lcssa, %bb3.bb5.preheader_crit_edge ], [ %loPart.118, %bb3.preheader ]		; <i32> [#uses=1]
+	%peakCount.0.lcssa = phi float [ %tmp23.lcssa, %bb3.bb5.preheader_crit_edge ], [ %peakCount.117, %bb3.preheader ]		; <float> [#uses=2]
+	%.not10 = fcmp olt float %distERBhi.0.ph, 2.500000e+00		; <i1> [#uses=1]
+	%tmp26 = icmp sgt i32 %tmp3, %hiPart.119		; <i1> [#uses=1]
+	%or.cond11 = and i1 %tmp26, %.not10		; <i1> [#uses=1]
+	br i1 %or.cond11, label %bb.nph12, label %bb7
+
+bb.nph12:		; preds = %bb5.preheader
+	br label %bb4
+
+bb4:		; preds = %bb5, %bb.nph12
+	%distERBhi.29 = phi float [ %tmp30, %bb5 ], [ %distERBhi.0.ph, %bb.nph12 ]		; <float> [#uses=1]
+	%hiPart.08 = phi i32 [ %tmp31, %bb5 ], [ %hiPart.119, %bb.nph12 ]		; <i32> [#uses=2]
+	%peakCount.27 = phi float [ %tmp35, %bb5 ], [ %peakCount.0.lcssa, %bb.nph12 ]		; <float> [#uses=1]
+	%tmp27 = sext i32 %hiPart.08 to i64		; <i64> [#uses=1]
+	%tmp28 = getelementptr float* %pTmp1, i64 %tmp27		; <float*> [#uses=1]
+	%tmp29 = load float* %tmp28, align 4		; <float> [#uses=1]
+	%tmp30 = fadd float %tmp29, %distERBhi.29		; <float> [#uses=3]
+	%tmp31 = add i32 %hiPart.08, 1		; <i32> [#uses=4]
+	%tmp32 = sext i32 %tmp31 to i64		; <i64> [#uses=1]
+	%tmp33 = getelementptr float* %peakWeight, i64 %tmp32		; <float*> [#uses=1]
+	%tmp34 = load float* %tmp33, align 4		; <float> [#uses=1]
+	%tmp35 = fadd float %tmp34, %peakCount.27		; <float> [#uses=2]
+	br label %bb5
+
+bb5:		; preds = %bb4
+	%.not = fcmp olt float %tmp30, 2.500000e+00		; <i1> [#uses=1]
+	%tmp36 = icmp sgt i32 %tmp3, %tmp31		; <i1> [#uses=1]
+	%or.cond = and i1 %tmp36, %.not		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb4, label %bb5.bb7_crit_edge
+
+bb5.bb7_crit_edge:		; preds = %bb5
+	%tmp35.lcssa = phi float [ %tmp35, %bb5 ]		; <float> [#uses=1]
+	%tmp31.lcssa = phi i32 [ %tmp31, %bb5 ]		; <i32> [#uses=1]
+	%tmp30.lcssa = phi float [ %tmp30, %bb5 ]		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb5.bb7_crit_edge, %bb5.preheader
+	%distERBhi.2.lcssa = phi float [ %tmp30.lcssa, %bb5.bb7_crit_edge ], [ %distERBhi.0.ph, %bb5.preheader ]		; <float> [#uses=2]
+	%hiPart.0.lcssa = phi i32 [ %tmp31.lcssa, %bb5.bb7_crit_edge ], [ %hiPart.119, %bb5.preheader ]		; <i32> [#uses=1]
+	%peakCount.2.lcssa = phi float [ %tmp35.lcssa, %bb5.bb7_crit_edge ], [ %peakCount.0.lcssa, %bb5.preheader ]		; <float> [#uses=2]
+	%tmp37 = fadd float %distERBlo.0.lcssa, %distERBhi.2.lcssa		; <float> [#uses=1]
+	%tmp38 = fdiv float %peakCount.2.lcssa, %tmp37		; <float> [#uses=1]
+	%tmp39 = fmul float %tmp38, %tmp1		; <float> [#uses=2]
+	%tmp40 = fmul float %tmp39, %tmp39		; <float> [#uses=2]
+	%tmp41 = fmul float %tmp40, %tmp40		; <float> [#uses=1]
+	%tmp42 = fadd float %tmp41, 1.000000e+00		; <float> [#uses=1]
+	%tmp43 = fdiv float 1.000000e+00, %tmp42		; <float> [#uses=1]
+	%tmp44 = sext i32 %part.016 to i64		; <i64> [#uses=1]
+	%tmp45 = getelementptr float* %nrgReducePeakrate, i64 %tmp44		; <float*> [#uses=1]
+	store float %tmp43, float* %tmp45, align 4
+	%tmp46 = add i32 %part.016, 1		; <i32> [#uses=2]
+	br label %bb8
+
+bb8:		; preds = %bb7
+	%tmp47 = icmp slt i32 %tmp46, %bandEdgeIndex		; <i1> [#uses=1]
+	br i1 %tmp47, label %bb, label %bb8.return_crit_edge
+
+bb8.return_crit_edge:		; preds = %bb8
+	br label %return
+
+return:		; preds = %bb8.return_crit_edge, %entry
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index d7eb7bd47dc1..1cc559fd79f1 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: not grep and %t
 ; RUN: not grep zext %t
 
diff --git a/test/Transforms/IndVarSimplify/lftr-other-uses.ll b/test/Transforms/IndVarSimplify/lftr-other-uses.ll
index c8f1e9598340..09ec237cfc33 100644
--- a/test/Transforms/IndVarSimplify/lftr-other-uses.ll
+++ b/test/Transforms/IndVarSimplify/lftr-other-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -disable-output
+; RUN: opt < %s -indvars -disable-output
 
 ; Don't RAUW the loop's original comparison instruction if it has
 ; other uses which aren't dominated by the new comparison instruction.
diff --git a/test/Transforms/IndVarSimplify/lftr-promote.ll b/test/Transforms/IndVarSimplify/lftr-promote.ll
new file mode 100644
index 000000000000..c4ecc845562b
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-promote.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -indvars -S | grep add | count 1
+
+; Indvars should be able to compute the exit value of this loop
+; without any additional arithmetic. The only add needed should
+; be the canonical IV increment.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(double* %p, i32 %n) nounwind {
+entry:
+	%0 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb2
+
+bb2:		; preds = %bb3, %bb.nph
+	%i.01 = phi i32 [ %7, %bb3 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%1 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%2 = getelementptr double* %p, i64 %1		; <double*> [#uses=1]
+	%3 = load double* %2, align 8		; <double> [#uses=1]
+	%4 = fmul double %3, 1.100000e+00		; <double> [#uses=1]
+	%5 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%6 = getelementptr double* %p, i64 %5		; <double*> [#uses=1]
+	store double %4, double* %6, align 8
+	%7 = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %bb3
+
+bb3:		; preds = %bb2
+	%8 = icmp slt i32 %7, %n		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3.return_crit_edge
+
+bb3.return_crit_edge:		; preds = %bb3
+	br label %return
+
+return:		; preds = %bb3.return_crit_edge, %entry
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lftr_simple.ll b/test/Transforms/IndVarSimplify/lftr_simple.ll
index 3f7c289a4885..e373013609be 100644
--- a/test/Transforms/IndVarSimplify/lftr_simple.ll
+++ b/test/Transforms/IndVarSimplify/lftr_simple.ll
@@ -1,6 +1,6 @@
 ; LFTR should eliminate the need for the computation of i*i completely.  It 
 ; is only used to compute the exit value.
-; RUN: llvm-as < %s | opt -indvars -dce | llvm-dis | not grep mul
+; RUN: opt < %s -indvars -dce -S | not grep mul
 
 @A = external global i32                ; <i32*> [#uses=1]
 
diff --git a/test/Transforms/IndVarSimplify/loop-invariant-step.ll b/test/Transforms/IndVarSimplify/loop-invariant-step.ll
index 40156eaab59e..2d2d1fe264ab 100644
--- a/test/Transforms/IndVarSimplify/loop-invariant-step.ll
+++ b/test/Transforms/IndVarSimplify/loop-invariant-step.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -instcombine -indvars -disable-output
+; RUN: opt < %s -loop-index-split -instcombine -indvars -disable-output
 ; PR4455
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
new file mode 100644
index 000000000000..4ec4acadb4a5
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -indvars -S \
+; RUN:   | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
+
+; This loop has multiple exits, and the value of %b1 depends on which
+; exit is taken. Indvars should correctly compute the exit values.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-linux-gnu"
+	%struct..0anon = type <{ i8, [3 x i8] }>
+
+define i32 @main() nounwind {
+entry:
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%sdata.0 = phi i32 [ 1, %entry ], [ %ins10, %bb ]		; <i32> [#uses=2]
+	%b.0 = phi i32 [ 0, %entry ], [ %t0, %bb ]		; <i32> [#uses=2]
+	%tmp6 = trunc i32 %sdata.0 to i8		; <i8> [#uses=2]
+	%t2 = and i8 %tmp6, 1		; <i8> [#uses=1]
+	%t3 = icmp eq i8 %t2, 0		; <i1> [#uses=1]
+	%t4 = xor i8 %tmp6, 1		; <i8> [#uses=1]
+	%tmp8 = zext i8 %t4 to i32		; <i32> [#uses=1]
+	%mask9 = and i32 %sdata.0, -256		; <i32> [#uses=1]
+	%ins10 = or i32 %tmp8, %mask9		; <i32> [#uses=1]
+	br i1 %t3, label %bb3, label %bb
+
+bb:		; preds = %bb2
+	%t0 = add i32 %b.0, 1		; <i32> [#uses=3]
+	%t1 = icmp sgt i32 %t0, 100		; <i1> [#uses=1]
+	br i1 %t1, label %bb3, label %bb2
+
+bb3:		; preds = %bb, %bb2
+	%b.1 = phi i32 [ %t0, %bb ], [ %b.0, %bb2 ]		; <i32> [#uses=1]
+	%t5 = icmp eq i32 %b.1, 1		; <i1> [#uses=1]
+	br i1 %t5, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	tail call void @abort() noreturn nounwind
+	unreachable
+
+bb5:		; preds = %bb3
+	ret i32 0
+}
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+declare void @abort() noreturn nounwind
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate11.ll b/test/Transforms/IndVarSimplify/loop_evaluate11.ll
new file mode 100644
index 000000000000..40b785ea4d6a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop_evaluate11.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -domfrontier -indvars -loop-deletion
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define void @slap_sl_mem_create() nounwind {
+entry:
+	br label %bb15
+
+bb15:		; preds = %bb15, %entry
+	%order_end.0 = phi i32 [ 0, %entry ], [ %tmp, %bb15 ]		; <i32> [#uses=1]
+	%tmp = add i32 %order_end.0, 1		; <i32> [#uses=2]
+	br i1 undef, label %bb17, label %bb15
+
+bb17:		; preds = %bb17, %bb15
+	%order_start.0 = phi i32 [ %tmp1, %bb17 ], [ 0, %bb15 ]		; <i32> [#uses=2]
+	%tmp1 = add i32 %order_start.0, 1		; <i32> [#uses=2]
+	%tmp2 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %bb18, label %bb17
+
+bb18:		; preds = %bb17
+	%tmp3 = sub i32 %tmp, %tmp1		; <i32> [#uses=0]
+	br label %bb59
+
+bb51:		; preds = %bb59
+	%tmp4 = add i32 %order_start.0, 2		; <i32> [#uses=1]
+	%tmp5 = add i32 %tmp4, undef		; <i32> [#uses=1]
+	%tmp6 = lshr i32 undef, %tmp5		; <i32> [#uses=1]
+	%tmp7 = icmp eq i32 %tmp6, 0		; <i1> [#uses=1]
+	br i1 %tmp7, label %bb52, label %bb59
+
+bb59:		; preds = %bb51, %bb18
+	br label %bb51
+
+bb52:		; preds = %bb51
+	unreachable
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate7.ll b/test/Transforms/IndVarSimplify/loop_evaluate7.ll
index 6e31c55d5265..b9c0b12f2d97 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate7.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; PR4436
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate8.ll b/test/Transforms/IndVarSimplify/loop_evaluate8.ll
index fa2f9e57f07c..2a9d20596233 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate8.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep select
+; RUN: opt < %s -indvars -S | not grep select
 
 ; This loop has backedge-taken-count zero. Indvars shouldn't expand any
 ; instructions to compute a trip count.
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
index 5a7a859004d1..8184a73f89eb 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
-; RUN: grep {\[%\]tmp5.lcssa = phi i8 \\\[ 63, \[%\]cc70a02__complex_integers__Oadd.153.exit.i \\\]} %t
-; RUN: grep {\[%\]tmp4.lcssa = phi i8 \\\[ -28, \[%\]cc70a02__complex_integers__Oadd.153.exit.i \\\]} %t
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
+; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
 ; PR4477
 
 ; Indvars should compute the exit values in loop.
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
index dfb42e4feae3..abf1bc3a9d09 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -loop-deletion -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
 ;
 ; Testcase distilled from 256.bzip2
 
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_2.ll b/test/Transforms/IndVarSimplify/loop_evaluate_2.ll
index efcf20a93442..c0099a83ab17 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_2.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -loop-deletion -simplifycfg | opt \
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg | opt \
 ; RUN:     -analyze -loops | not grep "^Loop Containing" 
 ; PR1179
 
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_3.ll b/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
index 2a2f69dd4866..65c66f7f5aca 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep {ret i32 600000}
+; RUN: opt < %s -indvars -S | grep {ret i32 600000}
 ; PR1179
 
 define i32 @foo() {
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_4.ll b/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
index 6c6a362f48e6..e4b642c7f58e 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep {ret i32 9900}
+; RUN: opt < %s -indvars -S | grep {ret i32 9900}
 ; PR1179
 
 define i32 @test4() {
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_5.ll b/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
index c6ce4a2122da..80b961ac7c34 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep {120, %bb2.bb3_crit_edge}
+; RUN: opt < %s -indvars -S | grep {120, %bb2.bb3_crit_edge}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
index 0d17a801a9cc..da38de538f7b 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -loop-deletion | llvm-dis | grep phi | count 1
+; RUN: opt < %s -indvars -loop-deletion -S | grep phi | count 1
 ; XFAIL: *
 
 ; Indvars can't evaluate this loop, because ScalarEvolution can't compute
diff --git a/test/Transforms/IndVarSimplify/masked-iv.ll b/test/Transforms/IndVarSimplify/masked-iv.ll
index f77b9357d75a..f1f5af96eb96 100644
--- a/test/Transforms/IndVarSimplify/masked-iv.ll
+++ b/test/Transforms/IndVarSimplify/masked-iv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: not grep trunc %t
 ; RUN: grep and %t | count 1
 
diff --git a/test/Transforms/IndVarSimplify/max-pointer.ll b/test/Transforms/IndVarSimplify/max-pointer.ll
new file mode 100644
index 000000000000..71bc720d5e9c
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/max-pointer.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {icmp ugt i8\\\*} %t | count 1
+; RUN: grep {icmp sgt i8\\\*} %t | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+	%struct.CKenCodeCodec = type <{ i8 }>
+
+define void @foo(i8* %str1Ptr, i8* %str2Ptr, i8* %inLastBytePtr) nounwind {
+entry:
+	%0 = icmp ult i8* %str2Ptr, %str1Ptr		; <i1> [#uses=1]
+	%str2Ptr_addr.0 = select i1 %0, i8* %str1Ptr, i8* %str2Ptr		; <i8*> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %entry
+	%str2Ptr_addr.1 = phi i8* [ %str2Ptr_addr.0, %entry ], [ %1, %bb2 ]		; <i8*> [#uses=1]
+	%1 = getelementptr i8* %str2Ptr_addr.1, i64 1		; <i8*> [#uses=2]
+	%2 = icmp ult i8* %1, %inLastBytePtr		; <i1> [#uses=0]
+	br i1 false, label %bb2, label %return
+
+return:		; preds = %bb2
+	ret void
+}
+
+define void @sfoo(i8* %str1Ptr, i8* %str2Ptr, i8* %inLastBytePtr) nounwind {
+entry:
+	%0 = icmp slt i8* %str2Ptr, %str1Ptr		; <i1> [#uses=1]
+	%str2Ptr_addr.0 = select i1 %0, i8* %str1Ptr, i8* %str2Ptr		; <i8*> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %entry
+	%str2Ptr_addr.1 = phi i8* [ %str2Ptr_addr.0, %entry ], [ %1, %bb2 ]		; <i8*> [#uses=1]
+	%1 = getelementptr i8* %str2Ptr_addr.1, i64 1		; <i8*> [#uses=2]
+	%2 = icmp slt i8* %1, %inLastBytePtr		; <i1> [#uses=0]
+	br i1 false, label %bb2, label %return
+
+return:		; preds = %bb2
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
index 7119cbbc05c6..34d432b4ee16 100644
--- a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
+++ b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 @ue = external global i64
diff --git a/test/Transforms/IndVarSimplify/pointer-indvars.ll b/test/Transforms/IndVarSimplify/pointer-indvars.ll
index c9da1579e983..6d25f90542ca 100644
--- a/test/Transforms/IndVarSimplify/pointer-indvars.ll
+++ b/test/Transforms/IndVarSimplify/pointer-indvars.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep indvar
+; RUN: opt < %s -indvars -S | grep indvar
 @G = global i32* null           ; <i32**> [#uses=1]
 @Array = external global [40 x i32]             ; <[40 x i32]*> [#uses=1]
 
diff --git a/test/Transforms/IndVarSimplify/pointer.ll b/test/Transforms/IndVarSimplify/pointer.ll
index 7ad116191925..5eee655d4225 100644
--- a/test/Transforms/IndVarSimplify/pointer.ll
+++ b/test/Transforms/IndVarSimplify/pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep {%exitcond = icmp eq i64 %indvar.next, %n} %t
 ; RUN: grep {getelementptr i8\\* %A, i64 %indvar} %t
 ; RUN: grep getelementptr %t | count 1
diff --git a/test/Transforms/IndVarSimplify/polynomial-expand.ll b/test/Transforms/IndVarSimplify/polynomial-expand.ll
new file mode 100644
index 000000000000..2087f6a6966b
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/polynomial-expand.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -indvars -disable-output
+; PR5073
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @ctpmv_(float* noalias nocapture %tmp4, i32 %tmp21) nounwind {
+bb20:                                             ; preds = %bb19
+  br label %bb24
+
+bb24:                                             ; preds = %bb40, %bb23
+  %tmp25 = phi i32 [ %tmp43, %bb40 ], [ %tmp21, %bb20 ] ; <i32> [#uses=4]
+  %tmp26 = phi i32 [ %tmp41, %bb40 ], [ undef, %bb20 ] ; <i32> [#uses=2]
+  %tmp27 = add nsw i32 %tmp26, -1                 ; <i32> [#uses=1]
+  %tmp28 = add nsw i32 %tmp25, -1                 ; <i32> [#uses=2]
+  %tmp29 = icmp sgt i32 %tmp28, 0                 ; <i1> [#uses=1]
+  br i1 %tmp29, label %bb30, label %bb40
+
+bb30:                                             ; preds = %bb30, %bb24
+  %tmp31 = phi i32 [ %tmp39, %bb30 ], [ %tmp28, %bb24 ] ; <i32> [#uses=2]
+  %tmp32 = phi i32 [ %tmp37, %bb30 ], [ %tmp27, %bb24 ] ; <i32> [#uses=2]
+  %tmp33 = sext i32 %tmp32 to i64                 ; <i64> [#uses=1]
+  %tmp35 = getelementptr float* %tmp4, i64 %tmp33 ; <%0*> [#uses=1]
+  %tmp36 = load float* %tmp35, align 4               ; <%0> [#uses=0]
+  %tmp37 = add nsw i32 %tmp32, -1                 ; <i32> [#uses=1]
+  %tmp39 = add nsw i32 %tmp31, -1                 ; <i32> [#uses=1]
+  %tmp38 = icmp eq i32 %tmp31, 1                  ; <i1> [#uses=1]
+  br i1 %tmp38, label %bb40, label %bb30
+
+bb40:                                             ; preds = %bb30, %bb24
+  %tmp41 = sub i32 %tmp26, %tmp25                 ; <i32> [#uses=1]
+  %tmp43 = add nsw i32 %tmp25, -1                 ; <i32> [#uses=1]
+  %tmp42 = icmp eq i32 %tmp25, 1                  ; <i1> [#uses=1]
+  br i1 %tmp42, label %bb46, label %bb24
+
+bb46:                                             ; preds = %bb40, %bb23, %bb19
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
new file mode 100644
index 000000000000..86e90c7623d1
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep inttoptr %t
+; RUN: not grep ptrtoint %t
+; RUN: grep scevgep %t
+
+; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
+
+define void @foo(i8* %p) nounwind {
+entry:
+  br i1 true, label %bb.nph, label %for.end
+
+for.cond:
+  %phitmp = icmp slt i64 %inc, 20
+  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  br label %for.end
+
+bb.nph:
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
+  %call = tail call i64 @bar() nounwind
+  %call2 = tail call i64 @car() nounwind
+  %conv = trunc i64 %call2 to i8
+  %conv3 = sext i8 %conv to i64
+  %add = add nsw i64 %call, %storemerge1
+  %add4 = add nsw i64 %add, %conv3
+  %arrayidx = getelementptr inbounds i8* %p, i64 %add4
+  store i8 0, i8* %arrayidx
+  %inc = add nsw i64 %storemerge1, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i64 @bar()
+
+declare i64 @car()
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
new file mode 100644
index 000000000000..bb0993c88eab
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -indvars -S > %t
+; Exactly one getelementptr for each load+store.
+; RUN: grep getelementptr %t | count 6
+; Each getelementptr using %struct.Q* %s as a base and not i8*.
+; RUN: grep {getelementptr \[%\]struct\\.Q\\* \[%\]s,} %t | count 6
+; No explicit integer multiplications!
+; RUN: not grep {= mul} %t
+; No i8* arithmetic or pointer casting anywhere!
+; RUN: not grep {i8\\*} %t
+; RUN: not grep bitcast %t
+; RUN: not grep inttoptr %t
+; RUN: not grep ptrtoint %t
+
+; FIXME: This test should pass with or without TargetData. Until opt
+; supports running tests without targetdata, just hardware this in.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+%struct.Q = type { [10 x %struct.N] }
+%struct.N = type { %struct.S }
+%struct.S = type { [100 x double], [100 x double] }
+
+define void @foo(%struct.Q* %s, i64 %n) nounwind {
+entry:
+  br label %bb1
+
+bb1:
+  %i = phi i64 [ 2, %entry ], [ %i.next, %bb ]
+  %j = phi i64 [ 0, %entry ], [ %j.next, %bb ]
+  %t5 = icmp slt i64 %i, %n
+  br i1 %t5, label %bb, label %return
+
+bb:
+  %t0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
+  %t1 = load double* %t0, align 8
+  %t2 = fmul double %t1, 3.200000e+00
+  %t3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
+  store double %t2, double* %t3, align 8
+
+  %s0 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
+  %s1 = load double* %s0, align 8
+  %s2 = fmul double %s1, 3.200000e+00
+  %s3 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
+  store double %s2, double* %s3, align 8
+
+  %u0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
+  %u1 = load double* %u0, align 8
+  %u2 = fmul double %u1, 3.200000e+00
+  %u3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
+  store double %u2, double* %u3, align 8
+
+  %v0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
+  %v1 = load double* %v0, align 8
+  %v2 = fmul double %v1, 3.200000e+00
+  %v3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
+  store double %v2, double* %v3, align 8
+
+  %w0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
+  %w1 = load double* %w0, align 8
+  %w2 = fmul double %w1, 3.200000e+00
+  %w3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
+  store double %w2, double* %w3, align 8
+
+  %x0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
+  %x1 = load double* %x0, align 8
+  %x2 = fmul double %x1, 3.200000e+00
+  %x3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
+  store double %x2, double* %x3, align 8
+
+  %i.next = add i64 %i, 1
+  %j.next = add i64 %j, 1
+  br label %bb1
+
+return:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
index 95726ea08189..d249432eeeb9 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis \
+; RUN: opt < %s -indvars -S \
 ; RUN:   | grep {\[%\]p.2.ip.1 = getelementptr \\\[3 x \\\[3 x double\\\]\\\]\\* \[%\]p, i64 2, i64 \[%\]tmp, i64 1}
 
 ; Indvars shouldn't expand this to
diff --git a/test/Transforms/IndVarSimplify/preserve-gep.ll b/test/Transforms/IndVarSimplify/preserve-gep.ll
index 2c8c224fb905..a27d20dc9653 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: not grep ptrtoint %t
 ; RUN: not grep inttoptr %t
 ; RUN: grep getelementptr %t | count 1
diff --git a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
index 0a91ec88064f..9e46a78ffc76 100644
--- a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
+++ b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: grep sext %t | count 1
 ; RUN: grep phi %t | count 1
 ; RUN: grep {phi i64} %t
diff --git a/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 48c060c48d22..a007ca60b08f 100644
--- a/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: not grep sext %t
 
 define i64 @test(i64* nocapture %first, i32 %count) nounwind readonly {
diff --git a/test/Transforms/IndVarSimplify/shrunk-constant.ll b/test/Transforms/IndVarSimplify/shrunk-constant.ll
index 0b2ecaf38d77..623c528487de 100644
--- a/test/Transforms/IndVarSimplify/shrunk-constant.ll
+++ b/test/Transforms/IndVarSimplify/shrunk-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \
+; RUN: opt < %s -scalar-evolution -analyze -disable-output \
 ; RUN:  | grep {\\-->  (zext i4 {-7,+,-8}<loop> to i32)}
 
 define fastcc void @foo() nounwind {
diff --git a/test/Transforms/IndVarSimplify/signed-trip-count.ll b/test/Transforms/IndVarSimplify/signed-trip-count.ll
index cd61d11394ee..1a5e64ddc1b9 100644
--- a/test/Transforms/IndVarSimplify/signed-trip-count.ll
+++ b/test/Transforms/IndVarSimplify/signed-trip-count.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: opt < %s -indvars -S > %t
 ; RUN: not grep sext %t
 ; RUN: grep phi %t | count 1
 
diff --git a/test/Transforms/IndVarSimplify/single-element-range.ll b/test/Transforms/IndVarSimplify/single-element-range.ll
new file mode 100644
index 000000000000..60a9eef09ef6
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/single-element-range.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -indvars
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin10"
+
+define arm_apcscc void @sqlite3_free_table(i8** %azResult) nounwind {
+entry:
+	br i1 undef, label %return, label %bb
+
+bb:		; preds = %entry
+	%0 = load i8** undef, align 4		; <i8*> [#uses=2]
+	%1 = ptrtoint i8* %0 to i32		; <i32> [#uses=1]
+	%2 = icmp sgt i8* %0, inttoptr (i32 1 to i8*)		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb5
+
+bb1:		; preds = %bb1, %bb
+	%i.01 = phi i32 [ %3, %bb1 ], [ 1, %bb ]		; <i32> [#uses=1]
+	%3 = add i32 %i.01, 1		; <i32> [#uses=2]
+	%4 = icmp slt i32 %3, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb1, label %bb5
+
+bb5:		; preds = %bb1, %bb
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/sink-alloca.ll b/test/Transforms/IndVarSimplify/sink-alloca.ll
new file mode 100644
index 000000000000..3a6c683e7cec
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; PR4775
+
+; Indvars shouldn't sink the alloca out of the entry block, even though
+; it's not used until after the loop.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)],
+section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+; CHECK: entry:
+; CHECK-NEXT: %result.i = alloca i32, align 4
+entry:
+  %result.i = alloca i32, align 4                 ; <i32*> [#uses=2]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i32 @bar() nounwind                ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  volatile store i32 0, i32* %result.i
+  %tmp.i = volatile load i32* %result.i           ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @bar()
diff --git a/test/Transforms/IndVarSimplify/sink-trapping.ll b/test/Transforms/IndVarSimplify/sink-trapping.ll
new file mode 100644
index 000000000000..a18000c5f8a8
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/sink-trapping.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -indvars -S | FileCheck %s --check-prefix=CHECK
+
+declare i1 @b()
+
+define i32 @a(i32 %x) nounwind {
+for.body.preheader:
+    %y = sdiv i32 10, %x
+	br label %for.body
+
+for.body:
+    %cmp = call i1 @b()
+	br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+	ret i32 %y
+}
+; CHECK: for.end.loopexit:
+; CHECK: sdiv
+; CHECK: ret
diff --git a/test/Transforms/IndVarSimplify/subtract.ll b/test/Transforms/IndVarSimplify/subtract.ll
index 51065cc1a8a3..f45bdab4be58 100644
--- a/test/Transforms/IndVarSimplify/subtract.ll
+++ b/test/Transforms/IndVarSimplify/subtract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep indvar
+; RUN: opt < %s -indvars -S | grep indvar
 
 @G = global i64 0               ; <i64*> [#uses=1]
 
diff --git a/test/Transforms/IndVarSimplify/tripcount_compute.ll b/test/Transforms/IndVarSimplify/tripcount_compute.ll
index 9ffce813023a..6eaa4c5c6b63 100644
--- a/test/Transforms/IndVarSimplify/tripcount_compute.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_compute.ll
@@ -3,7 +3,7 @@
 ; the exit value of the loop will be for some value, allowing us to substitute
 ; it directly into users outside of the loop, making the loop dead.
 ;
-; RUN: llvm-as < %s | opt -indvars -loop-deletion -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
 
 define i32 @linear_setne() {
 entry:
diff --git a/test/Transforms/IndVarSimplify/tripcount_infinite.ll b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
index d5965f4ed6f0..0495b50c3e48 100644
--- a/test/Transforms/IndVarSimplify/tripcount_infinite.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
@@ -1,7 +1,7 @@
 ; These tests have an infinite trip count.  We obviously shouldn't remove the 
 ; loops!  :)
 ;
-; RUN: llvm-as < %s | opt -indvars -adce -simplifycfg | llvm-dis | grep icmp | wc -l > %t2
+; RUN: opt < %s -indvars -adce -simplifycfg -S | grep icmp | wc -l > %t2
 ; RUN: llvm-as < %s | llvm-dis | grep icmp | wc -l > %t1
 ; RUN: diff %t1 %t2
 
diff --git a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
index 759ba8e177bc..0c8857f85789 100644
--- a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
+++ b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars -instcombine | llvm-dis | \
+; RUN: opt < %s -indvars -instcombine -S | \
 ; RUN:   grep {store i32 0}
 ; Test that -indvars can reduce variable stride IVs.  If it can reduce variable
 ; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without 
diff --git a/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll b/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll
index 075e899cd96d..98cfa345962c 100644
--- a/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll
+++ b/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars
+; RUN: opt < %s -indvars
 ; PR4315
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/Inline/2003-09-14-InlineValue.ll b/test/Transforms/Inline/2003-09-14-InlineValue.ll
index edc9b474e064..49a27e195e2a 100644
--- a/test/Transforms/Inline/2003-09-14-InlineValue.ll
+++ b/test/Transforms/Inline/2003-09-14-InlineValue.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 declare i32 @External()
 
diff --git a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
index d57aee01c609..5ced3b8e8da9 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
index d75999b9768d..4418f77c9d0b 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
index 09d516f48223..1bd55299a901 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll b/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll
index 6b6cf06abb29..4a80d37c0974 100644
--- a/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll
+++ b/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 define i32 @reload() {
 reloadentry:
diff --git a/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll b/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll
index 675454e13482..9afd45040ca9 100644
--- a/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll
+++ b/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll
@@ -2,7 +2,7 @@
 ; node in the exception destination, and the inlined function contains an 
 ; unwind instruction.
 
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 define linkonce void @foo() {
         unwind
diff --git a/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll b/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll
index 8a2f8e29edff..38994519cea4 100644
--- a/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll
+++ b/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 ; Inlining the first call caused the inliner function to delete the second
 ; call.  Then the inliner tries to inline the second call, which no longer
diff --git a/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll b/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll
index d9f09a06e7da..fabad30bb5ac 100644
--- a/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll
+++ b/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -prune-eh -disable-output
+; RUN: opt < %s -inline -prune-eh -disable-output
 
 define linkonce void @caller() {
         call void @callee( )
diff --git a/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll b/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll
index 09666ec8e65b..733cbb9c754f 100644
--- a/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll
+++ b/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 
 define i32 @test() {
         unwind
diff --git a/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll b/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll
index acdf6ab766b1..415495eb515d 100644
--- a/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll
+++ b/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -prune-eh -disable-output
+; RUN: opt < %s -inline -prune-eh -disable-output
 
         %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
         %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, i32*, i32*, i32*, i32*, i32*, i32*, %"struct.std::locale" }
diff --git a/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll b/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
index 5215bec92909..69345627221b 100644
--- a/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
+++ b/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -prune-eh -disable-output
+; RUN: opt < %s -inline -prune-eh -disable-output
 ; PR827
 @_ZTV8CRjii = internal global [1 x i32 (...)*] [ i32 (...)* @_ZN8CRjii12NlFeeEPN5Jr7sE ]		; <[1 x i32 (...)*]*> [#uses=0]
 
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
index bd2885dab425..37cba9801caa 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -prune-eh -disable-output
+; RUN: opt < %s -inline -prune-eh -disable-output
 ; PR993
 target datalayout = "e-p:32:32"
 target triple = "i386-unknown-openbsd3.9"
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
index 05096f425fde..279823a4b281 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -prune-eh -disable-output
+; RUN: opt < %s -inline -prune-eh -disable-output
 ; PR992
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/Inline/2007-04-15-InlineEH.ll b/test/Transforms/Inline/2007-04-15-InlineEH.ll
index 083a328ae673..635f93e82428 100644
--- a/test/Transforms/Inline/2007-04-15-InlineEH.ll
+++ b/test/Transforms/Inline/2007-04-15-InlineEH.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep {invoke void asm}
+; RUN: opt < %s -inline -S | not grep {invoke void asm}
 ; PR1335
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/Inline/2007-06-06-NoInline.ll b/test/Transforms/Inline/2007-06-06-NoInline.ll
index 51bba0705338..d5a7953ffb07 100644
--- a/test/Transforms/Inline/2007-06-06-NoInline.ll
+++ b/test/Transforms/Inline/2007-06-06-NoInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -f - | llvm-dis | grep "define internal i32 @bar" 
+; RUN: opt < %s -inline -S | grep "define internal i32 @bar"
 @llvm.noinline = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @bar to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
 define internal i32 @bar(i32 %x, i32 %y) {
diff --git a/test/Transforms/Inline/2007-06-25-WeakInline.ll b/test/Transforms/Inline/2007-06-25-WeakInline.ll
index 3cf758ca8c8e..929891a6785e 100644
--- a/test/Transforms/Inline/2007-06-25-WeakInline.ll
+++ b/test/Transforms/Inline/2007-06-25-WeakInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep call
+; RUN: opt < %s -inline -S | grep call
 
 ; 'bar' can be overridden at link-time, don't inline it.
 
diff --git a/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll b/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
index 5f7da15fc1b2..08d4dc693a41 100644
--- a/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
+++ b/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as %s -o - | opt -inline | llvm-dis | grep nounwind
-; RUN: llvm-as %s -o - | opt -inline | llvm-dis | grep unreachable
+; RUN: opt < %s -inline -S | grep nounwind
+; RUN: opt < %s -inline -S | grep unreachable
 
 declare i1 @extern()
 
diff --git a/test/Transforms/Inline/2008-03-04-StructRet.ll b/test/Transforms/Inline/2008-03-04-StructRet.ll
index fc7ebc4d8095..3311d5653682 100644
--- a/test/Transforms/Inline/2008-03-04-StructRet.ll
+++ b/test/Transforms/Inline/2008-03-04-StructRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 	%struct.Benchmark = type { i32 (...)** }
 	%struct.Complex = type { double, double }
 	%struct.ComplexBenchmark = type { %struct.Benchmark }
diff --git a/test/Transforms/Inline/2008-03-07-Inline-2.ll b/test/Transforms/Inline/2008-03-07-Inline-2.ll
index 0c256906a072..0c968e6ce18d 100644
--- a/test/Transforms/Inline/2008-03-07-Inline-2.ll
+++ b/test/Transforms/Inline/2008-03-07-Inline-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 	%struct.Demand = type { double, double }
 	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
 	%struct.leaf = type { %struct.Demand, double, double }
diff --git a/test/Transforms/Inline/2008-03-07-Inline.ll b/test/Transforms/Inline/2008-03-07-Inline.ll
index 09d14f021343..86afb2d43ec0 100644
--- a/test/Transforms/Inline/2008-03-07-Inline.ll
+++ b/test/Transforms/Inline/2008-03-07-Inline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 	%struct.Demand = type { double, double }
 	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
 	%struct.leaf = type { %struct.Demand, double, double }
diff --git a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll b/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
index b42e559c5af0..39095c407281 100644
--- a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
+++ b/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt  -inline-threshold=0 -inline | llvm-dis | not grep call 
+; RUN: opt < %s  -inline-threshold=0 -inline -S | not grep call 
 
 define i32 @fn2() alwaysinline {
   ret i32 1
diff --git a/test/Transforms/Inline/2008-09-02-NoInline.ll b/test/Transforms/Inline/2008-09-02-NoInline.ll
index 35b4b4678b7e..33c8949afe05 100644
--- a/test/Transforms/Inline/2008-09-02-NoInline.ll
+++ b/test/Transforms/Inline/2008-09-02-NoInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep call | count 1
+; RUN: opt < %s -inline -S | grep call | count 1
 
 define i32 @fn2() noinline {
   ret i32 1
diff --git a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll b/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
index 765fc7578769..11e501274d3b 100644
--- a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
+++ b/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | opt -always-inline | llvm-dis | not grep call 
+; RUN: opt < %s -always-inline -S | not grep call 
 
 ; Ensure that threshold doesn't disrupt always inline.
-; RUN: llvm-as < %s | opt -inline-threshold=-2000000001 -always-inline | llvm-dis | not grep call 
+; RUN: opt < %s -inline-threshold=-2000000001 -always-inline -S | not grep call 
 
 
 define internal i32 @if0() alwaysinline {
diff --git a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll b/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
index 753985239b3b..bc9787b82345 100644
--- a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
+++ b/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -always-inline | llvm-dis | grep {@foo}
+; RUN: opt < %s -always-inline -S | grep {@foo}
 ; Ensure that foo is not removed by always inliner
 ; PR 2945
 
diff --git a/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll b/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
index 14840bac676d..db2a799225b9 100644
--- a/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
+++ b/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep call
+; RUN: opt < %s -inline -S | grep call
 ; Do not inline calls to variable-sized alloca.
 
 @q = common global i8* null		; <i8**> [#uses=1]
diff --git a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll b/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
index 8a4b2e052f31..1a3325a68b61 100644
--- a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
+++ b/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep {call.*fib} | count 4
+; RUN: opt < %s -inline -S | grep {call.*fib} | count 4
 ; First call to fib from fib is inlined, producing 2 instead of 1, total 3.
 ; Second call to fib from fib is not inlined because new body of fib exceeds
 ; inlining limit of 200.  Plus call in main = 4 total.
diff --git a/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll b/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
index 3702afa8611c..7d8d16bacffe 100644
--- a/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
+++ b/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -argpromotion -disable-output
+; RUN: opt < %s -inline -argpromotion -disable-output
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll b/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll
index 067fd72e939b..c8629ea22eb1 100644
--- a/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll
+++ b/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output
+; RUN: opt < %s -inline -disable-output
 ; PR4123
 	%struct.S0 = type <{ i32 }>
 	%struct.S1 = type <{ i8, i8, i8, i8, %struct.S0 }>
diff --git a/test/Transforms/Inline/PR4909.ll b/test/Transforms/Inline/PR4909.ll
new file mode 100644
index 000000000000..24545f9aa883
--- /dev/null
+++ b/test/Transforms/Inline/PR4909.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -partial-inliner -disable-output
+
+define i32 @f() {
+entry:
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 undef
+}
+
+define i32 @g() {
+entry:
+  %0 = call i32 @f()
+  ret i32 %0
+}
diff --git a/test/Transforms/Inline/alloca-in-scc.ll b/test/Transforms/Inline/alloca-in-scc.ll
new file mode 100644
index 000000000000..d539255e6e0a
--- /dev/null
+++ b/test/Transforms/Inline/alloca-in-scc.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -inline | llvm-dis
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp {
+entry:
+  call fastcc void @c() nounwind
+  unreachable
+}
+
+define internal fastcc void @a() nounwind ssp {
+entry:
+  %al = alloca [3 x i32], align 4
+  %0 = getelementptr inbounds [3 x i32]* %al, i32 0, i32 2 
+  
+  call fastcc void @c() nounwind
+  unreachable
+}
+
+define internal fastcc void @b() nounwind ssp {
+entry:
+  tail call fastcc void @a() nounwind ssp
+  unreachable
+}
+
+define internal fastcc void @c() nounwind ssp {
+entry:
+  call fastcc void @b() nounwind
+  unreachable
+}
diff --git a/test/Transforms/Inline/alloca_test.ll b/test/Transforms/Inline/alloca_test.ll
index 1fbd095354a9..e5791d5d2553 100644
--- a/test/Transforms/Inline/alloca_test.ll
+++ b/test/Transforms/Inline/alloca_test.ll
@@ -1,7 +1,7 @@
 ; This test ensures that alloca instructions in the entry block for an inlined
 ; function are moved to the top of the function they are inlined into.
 ;
-; RUN: llvm-as < %s | opt -inline | llvm-dis | %prcontext alloca 1 | grep Entry:
+; RUN: opt -S -inline %s | FileCheck %s
 
 define i32 @func(i32 %i) {
         %X = alloca i32         ; <i32*> [#uses=1]
@@ -13,6 +13,8 @@ declare void @bar()
 
 define i32 @main(i32 %argc) {
 Entry:
+; CHECK: Entry
+; CHECK-NEXT: alloca
         call void @bar( )
         %X = call i32 @func( i32 7 )            ; <i32> [#uses=1]
         %Y = add i32 %X, %argc          ; <i32> [#uses=1]
diff --git a/test/Transforms/Inline/always_inline_dyn_alloca.ll b/test/Transforms/Inline/always_inline_dyn_alloca.ll
index 933925e50251..25cfc49f1a84 100644
--- a/test/Transforms/Inline/always_inline_dyn_alloca.ll
+++ b/test/Transforms/Inline/always_inline_dyn_alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep callee
+; RUN: opt < %s -inline -S | not grep callee
 ; rdar://6655932
 
 ; If callee is marked alwaysinline, inline it! Even if callee has dynamic
diff --git a/test/Transforms/Inline/array_merge.ll b/test/Transforms/Inline/array_merge.ll
new file mode 100644
index 000000000000..0d176b8acb3c
--- /dev/null
+++ b/test/Transforms/Inline/array_merge.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; rdar://7173846
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define internal void @foo() nounwind ssp {
+entry:
+  %A = alloca [100 x i32]
+  %B = alloca [100 x i32]
+  call void @bar([100 x i32]* %A, [100 x i32]* %B) nounwind
+  ret void
+}
+
+declare void @bar([100 x i32]*, [100 x i32]*)
+
+define void @test() nounwind ssp {
+entry:
+; CHECK: @test()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %A.i = alloca
+; CHECK-NEXT: %B.i = alloca
+; CHECK-NEXT: call void
+  call void @foo() nounwind
+  call void @foo() nounwind
+  ret void
+}
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index d954238dabe4..71e00cb4c082 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -disable-output -print-function 2> /dev/null
+; RUN: opt < %s -inline -disable-output -print-function 2> /dev/null
 
 define i32 @func(i32 %i) {
         ret i32 %i
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll
index ea8c35760386..c3552f696433 100644
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep {llvm.memcpy}
+; RUN: opt < %s -inline -S | grep {llvm.memcpy}
 
 ; Inlining a byval struct should cause an explicit copy into an alloca.
 
diff --git a/test/Transforms/Inline/byval2.ll b/test/Transforms/Inline/byval2.ll
index e949d01856d4..a7ab77cb562c 100644
--- a/test/Transforms/Inline/byval2.ll
+++ b/test/Transforms/Inline/byval2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep {llvm.memcpy}
+; RUN: opt < %s -inline -S | not grep {llvm.memcpy}
 
 ; Inlining a byval struct should NOT cause an explicit copy 
 ; into an alloca if the function is readonly
diff --git a/test/Transforms/Inline/callgraph-update.ll b/test/Transforms/Inline/callgraph-update.ll
new file mode 100644
index 000000000000..528e9af82e11
--- /dev/null
+++ b/test/Transforms/Inline/callgraph-update.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -inline -loop-rotate | llvm-dis
+; PR3601
+declare void @solve()
+
+define internal fastcc void @read() {
+	br label %bb4
+
+bb3:
+	br label %bb4
+
+bb4:
+	call void @solve()
+	br i1 false, label %bb5, label %bb3
+
+bb5:
+	unreachable
+}
+
+define internal fastcc void @parse() {
+	call fastcc void @read()
+	ret void
+}
+
+define void @main() {
+	invoke fastcc void @parse()
+			to label %invcont unwind label %lpad
+
+invcont:
+	unreachable
+
+lpad:
+	unreachable
+}
diff --git a/test/Transforms/Inline/casts.ll b/test/Transforms/Inline/casts.ll
index 029d309fd8e0..166185a545d1 100644
--- a/test/Transforms/Inline/casts.ll
+++ b/test/Transforms/Inline/casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep {ret i32 1}
+; RUN: opt < %s -inline -S | grep {ret i32 1}
 ; ModuleID = 'short.opt.bc'
 
 define i32 @testBool(i1 %X) {
diff --git a/test/Transforms/Inline/cfg_preserve_test.ll b/test/Transforms/Inline/cfg_preserve_test.ll
index 6b6ff18ecfd0..9597109dff0b 100644
--- a/test/Transforms/Inline/cfg_preserve_test.ll
+++ b/test/Transforms/Inline/cfg_preserve_test.ll
@@ -1,6 +1,6 @@
 ; This test ensures that inlining an "empty" function does not destroy the CFG
 ;
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep br
+; RUN: opt < %s -inline -S | not grep br
 
 define i32 @func(i32 %i) {
         ret i32 %i
diff --git a/test/Transforms/Inline/crash.ll b/test/Transforms/Inline/crash.ll
new file mode 100644
index 000000000000..30eae7a96a7b
--- /dev/null
+++ b/test/Transforms/Inline/crash.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -inline -argpromotion -instcombine -disable-output
+
+; This test was failing because the inliner would inline @list_DeleteElement
+; into @list_DeleteDuplicates and then into @inf_GetBackwardPartnerLits,
+; turning the indirect call into a direct one.  This allowed instcombine to see
+; the bitcast and eliminate it, deleting the original call and introducing
+; another one.  This crashed the inliner because the new call was not in the
+; callgraph.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+
+define void @list_DeleteElement(i32 (i8*, i8*)* nocapture %Test) nounwind ssp {
+entry:
+  %0 = call i32 %Test(i8* null, i8* undef) nounwind
+  ret void
+}
+
+
+define void @list_DeleteDuplicates(i32 (i8*, i8*)* nocapture %Test) nounwind ssp {
+foo:
+  call void @list_DeleteElement(i32 (i8*, i8*)* %Test) nounwind ssp 
+  call fastcc void @list_Rplacd1284() nounwind ssp
+  unreachable
+
+}
+
+define internal i32 @inf_LiteralsHaveSameSubtermAndAreFromSameClause(i32* nocapture %L1, i32* nocapture %L2) nounwind readonly ssp {
+entry:
+  unreachable
+}
+
+
+define internal fastcc void @inf_GetBackwardPartnerLits(i32* nocapture %Flags) nounwind ssp {
+test:
+  call void @list_DeleteDuplicates(i32 (i8*, i8*)* bitcast (i32 (i32*, i32*)* @inf_LiteralsHaveSameSubtermAndAreFromSameClause to i32 (i8*, i8*)*)) nounwind 
+  ret void
+}
+
+
+define void @inf_BackwardEmptySortPlusPlus() nounwind ssp {
+entry:
+  call fastcc void @inf_GetBackwardPartnerLits(i32* null) nounwind ssp
+  unreachable
+}
+
+define void @inf_BackwardWeakening() nounwind ssp {
+entry:
+  call fastcc void @inf_GetBackwardPartnerLits(i32* null) nounwind ssp
+  unreachable
+}
+
+
+
+
+declare fastcc void @list_Rplacd1284() nounwind ssp
diff --git a/test/Transforms/Inline/dynamic_alloca_test.ll b/test/Transforms/Inline/dynamic_alloca_test.ll
index b8ff7dedc493..0286535efec1 100644
--- a/test/Transforms/Inline/dynamic_alloca_test.ll
+++ b/test/Transforms/Inline/dynamic_alloca_test.ll
@@ -3,9 +3,9 @@
 ; Functions with dynamic allocas can only be inlined into functions that
 ; already have dynamic allocas.
 
-; RUN: llvm-as < %s | opt -inline | llvm-dis | \
+; RUN: opt < %s -inline -S | \
 ; RUN:   grep llvm.stacksave
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep callee
+; RUN: opt < %s -inline -S | not grep callee
 
 
 declare void @ext(i32*)
diff --git a/test/Transforms/Inline/externally_available.ll b/test/Transforms/Inline/externally_available.ll
index 68f7d65d6595..43fe5d37f9e8 100644
--- a/test/Transforms/Inline/externally_available.ll
+++ b/test/Transforms/Inline/externally_available.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -constprop | llvm-dis > %t
+; RUN: opt < %s -inline -constprop -S > %t
 ; RUN: not grep test_function %t
 ; RUN: grep {ret i32 5} %t
 
diff --git a/test/Transforms/Inline/indirect_resolve.ll b/test/Transforms/Inline/indirect_resolve.ll
new file mode 100644
index 000000000000..76182e2fe4f6
--- /dev/null
+++ b/test/Transforms/Inline/indirect_resolve.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -inline | llvm-dis
+; PR4834
+
+define i32 @main() {
+  %funcall1_ = call fastcc i32 ()* ()* @f1()
+  %executecommandptr1_ = call i32 %funcall1_()
+  ret i32 %executecommandptr1_
+}
+
+define internal fastcc i32 ()* @f1() nounwind readnone {
+  ret i32 ()* @f2
+}
+
+define internal i32 @f2() nounwind readnone {
+  ret i32 1
+}
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 53f755e44564..961f6789fe4d 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep {tail call void @llvm.memcpy.i32}
+; RUN: opt < %s -inline -S | not grep {tail call void @llvm.memcpy.i32}
 ; PR3550
 
 define internal void @foo(i32* %p, i32* %q) {
diff --git a/test/Transforms/Inline/inline-tail.ll b/test/Transforms/Inline/inline-tail.ll
index 5921655b7364..8bb059d01a0c 100644
--- a/test/Transforms/Inline/inline-tail.ll
+++ b/test/Transforms/Inline/inline-tail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep tail
+; RUN: opt < %s -inline -S | not grep tail
 
 declare void @bar(i32*)
 
diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll
index 89b3a8283c06..4c6472194210 100644
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@@ -1,9 +1,9 @@
 ; Test that the inliner doesn't leave around dead allocas, and that it folds
 ; uncond branches away after it is done specializing.
 
-; RUN: llvm-as < %s | opt -inline | llvm-dis | \
+; RUN: opt < %s -inline -S | \
 ; RUN:    not grep {alloca.*uses=0}
-; RUN: llvm-as < %s | opt -inline | llvm-dis | \
+; RUN: opt < %s -inline -S | \
 ; RUN:    not grep {br label}
 @A = weak global i32 0		; <i32*> [#uses=1]
 @B = weak global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 4744c862335b..537c69b305cb 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep callee
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep div
+; RUN: opt < %s -inline -S | not grep callee
+; RUN: opt < %s -inline -S | not grep div
 
 
 define internal i32 @callee(i32 %A, i32 %B) {
diff --git a/test/Transforms/Inline/inline_dce.ll b/test/Transforms/Inline/inline_dce.ll
index dd971bee9300..5143d0249421 100644
--- a/test/Transforms/Inline/inline_dce.ll
+++ b/test/Transforms/Inline/inline_dce.ll
@@ -1,7 +1,7 @@
 ; This checks to ensure that the inline pass deletes functions if they get 
 ; inlined into all of their callers.
 
-; RUN: llvm-as < %s | opt -inline | llvm-dis | \
+; RUN: opt < %s -inline -S | \
 ; RUN:   not grep @reallysmall
 
 define internal i32 @reallysmall(i32 %A) {
diff --git a/test/Transforms/Inline/inline_prune.ll b/test/Transforms/Inline/inline_prune.ll
index fcd8321d5a0f..658a422540ae 100644
--- a/test/Transforms/Inline/inline_prune.ll
+++ b/test/Transforms/Inline/inline_prune.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | \
+; RUN: opt < %s -inline -S | \
 ; RUN:    not grep {callee\[12\](}
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep mul
+; RUN: opt < %s -inline -S | not grep mul
 
 define internal i32 @callee1(i32 %A, i32 %B) {
         %cond = icmp eq i32 %A, 123             ; <i1> [#uses=1]
diff --git a/test/Transforms/Inline/invoke_test-1.ll b/test/Transforms/Inline/invoke_test-1.ll
index 1fd3317fd1f2..0d27e2a7f5e4 100644
--- a/test/Transforms/Inline/invoke_test-1.ll
+++ b/test/Transforms/Inline/invoke_test-1.ll
@@ -1,7 +1,7 @@
 ; Test that we can inline a simple function, turning the calls in it into invoke
 ; instructions
 
-; RUN: llvm-as < %s | opt -inline | llvm-dis | \
+; RUN: opt < %s -inline -S | \
 ; RUN:   not grep {call\[^e\]}
 
 declare void @might_throw()
diff --git a/test/Transforms/Inline/invoke_test-2.ll b/test/Transforms/Inline/invoke_test-2.ll
index 68b8bd8b65a3..bbb9ab055393 100644
--- a/test/Transforms/Inline/invoke_test-2.ll
+++ b/test/Transforms/Inline/invoke_test-2.ll
@@ -1,7 +1,7 @@
 ; Test that if an invoked function is inlined, and if that function cannot
 ; throw, that the dead handler is now unreachable.
 
-; RUN: llvm-as < %s | opt -inline -simplifycfg | llvm-dis | \
+; RUN: opt < %s -inline -simplifycfg -S | \
 ; RUN:   not grep UnreachableExceptionHandler
 
 declare void @might_throw()
diff --git a/test/Transforms/Inline/invoke_test-3.ll b/test/Transforms/Inline/invoke_test-3.ll
index a5deec6aa881..b360526fb348 100644
--- a/test/Transforms/Inline/invoke_test-3.ll
+++ b/test/Transforms/Inline/invoke_test-3.ll
@@ -1,7 +1,7 @@
 ; Test that any rethrown exceptions in an inlined function are automatically
 ; turned into branches to the invoke destination.
 
-; RUN: llvm-as < %s | opt -inline | llvm-dis | not grep unwind$
+; RUN: opt < %s -inline -S | not grep unwind$
 
 declare void @might_throw()
 
diff --git a/test/Transforms/Inline/nested-inline.ll b/test/Transforms/Inline/nested-inline.ll
new file mode 100644
index 000000000000..12926671722e
--- /dev/null
+++ b/test/Transforms/Inline/nested-inline.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; Test that bar and bar2 are both inlined throughout and removed.
+@A = weak global i32 0		; <i32*> [#uses=1]
+@B = weak global i32 0		; <i32*> [#uses=1]
+@C = weak global i32 0		; <i32*> [#uses=1]
+
+define fastcc void @foo(i32 %X) {
+entry:
+; CHECK: @foo
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @ext( i32* %ALL )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+; CHECK-NOT: @bar
+define internal fastcc void @bar(i32 %X) {
+entry:
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @foo( i32 %X )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+define internal fastcc void @bar2(i32 %X) {
+entry:
+	call void @foo( i32 %X )
+	ret void
+}
+
+declare void @ext(i32*)
+
+define void @test(i32 %X) {
+entry:
+; CHECK: test
+; CHECK-NOT: @bar
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	ret void
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll b/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
index 94ce68e989b3..5d027a744920 100644
--- a/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
+++ b/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
@@ -1,6 +1,6 @@
 ; This testcase causes instcombine to hang.
 ;
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 
 define void @test(i32 %X) {
         %reg117 = add i32 %X, 0         ; <i32> [#uses=0]
diff --git a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
index 34c2df605e42..d2b2b0027a52 100644
--- a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
+++ b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
@@ -1,6 +1,6 @@
 ; Instcombine was missing a test that caused it to make illegal transformations
 ; sometimes.  In this case, it transforms the sub into an add:
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep sub
+; RUN: opt < %s -instcombine -S | grep sub
 ;
 define i32 @test(i32 %i, i32 %j) {
         %A = mul i32 %i, %j
diff --git a/test/Transforms/InstCombine/2002-08-02-CastTest.ll b/test/Transforms/InstCombine/2002-08-02-CastTest.ll
index 54c836c1056c..363cb21e3958 100644
--- a/test/Transforms/InstCombine/2002-08-02-CastTest.ll
+++ b/test/Transforms/InstCombine/2002-08-02-CastTest.ll
@@ -1,7 +1,7 @@
 ; This testcase is incorrectly getting completely eliminated.  There should be
 ; SOME instruction named %c here, even if it's a bitwise and.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep %c
+; RUN: opt < %s -instcombine -S | grep %c
 ;
 define i64 @test3(i64 %A) {
         %c1 = trunc i64 %A to i8                ; <i8> [#uses=1]
diff --git a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
index e7bd6a8da2fb..22574f77f1d9 100644
--- a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
+++ b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep add
+; RUN: opt < %s -instcombine -S | not grep add
 
 define i32 @test(i32 %A) {
         %A.neg = sub i32 0, %A          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll b/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
index e7e784882ab5..19010d22d729 100644
--- a/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
+++ b/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep 4294967295
+; RUN: opt < %s -instcombine -S | grep 4294967295
 
 define i64 @test(i64 %Val) {
         %tmp.3 = trunc i64 %Val to i32          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll b/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
index 214fa4d846db..8645249b7cae 100644
--- a/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
+++ b/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 @X = global i32 5               ; <i32*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll b/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
index 724f30f48894..154f3ba65e97 100644
--- a/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
+++ b/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
@@ -2,7 +2,7 @@
 ; because it things that the constant value is a not expression... and 
 ; constantly inverts the branch back and forth.
 ;
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define i8 @test19(i1 %c) {
         br i1 true, label %True, label %False
diff --git a/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll b/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
index 09732d3f636a..f550c8349f84 100644
--- a/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
+++ b/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
@@ -10,7 +10,7 @@
 ;	should pass through the optimizer without failure.
 ;
 ; Extra code:
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index 209ab1b33695..6d227547c8d5 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -1,6 +1,6 @@
 ; This testcase can be simplified by "realizing" that alloca can never return 
 ; null.
-; RUN: llvm-as < %s | opt -instcombine -simplifycfg | \
+; RUN: opt < %s -instcombine -simplifycfg | \
 ; RUN:    llvm-dis | not grep br
 
 declare i32 @bitmap_clear(...)
diff --git a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
index 9779f21f925c..32979191f853 100644
--- a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
+++ b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep load
+; RUN: opt < %s -instcombine -S | grep load
 
 define void @test(i32* %P) {
         ; Dead but not deletable!
diff --git a/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll b/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
index 7d219cb35885..cfe5df6d30a3 100644
--- a/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
+++ b/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 declare i32* @bar()
 
diff --git a/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll b/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
index 71585d426645..c1692f77abb2 100644
--- a/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
+++ b/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
@@ -1,5 +1,5 @@
 ; The cast in this testcase is not eliminable on a 32-bit target!
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep inttoptr
+; RUN: opt < %s -instcombine -S | grep inttoptr
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll b/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll
index b6930b5a65fd..4d3d48ef375f 100644
--- a/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll
+++ b/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll
@@ -1,8 +1,11 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep call | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare void @free(i8*)
 
 define void @test(i32* %X) {
         call void (...)* bitcast (void (i8*)* @free to void (...)*)( i32* %X )          ; <i32>:1 [#uses=0]
+; CHECK: %tmp = bitcast i32* %X to i8*
+; CHECK: call void @free(i8* %tmp)
         ret void
+; CHECK: ret void
 }
diff --git a/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll b/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
index c6a6b6afb81d..bec0b9e0c573 100644
--- a/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
+++ b/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
@@ -5,7 +5,7 @@
 ; invoke instruction, we really cannot perform this transformation at all at
 ; least without splitting the critical edge.
 ;
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 declare i8* @test()
 
diff --git a/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll b/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
index 3a4b66157dfe..a08e3a884ceb 100644
--- a/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
+++ b/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep 34
+; RUN: opt < %s -instcombine -S | not grep 34
 
 define i32 @test(i32 %X) {
         ; Do not fold into shr X, 34, as this uses undefined behavior!
diff --git a/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll b/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
index 2b5481b0195a..ff20d7db9483 100644
--- a/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
+++ b/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
@@ -1,7 +1,7 @@
 ; This testcase caused the combiner to go into an infinite loop, moving the 
 ; cast back and forth, changing the seteq to operate on int vs uint and back.
 
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define i1 @test(i32 %A, i32 %B) {
         %C = sub i32 0, %A              ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll b/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
index 0b93eb2322eb..84f9bad69d8d 100644
--- a/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
+++ b/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define i32 @test() {
         ret i32 0
diff --git a/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll b/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
index 66a5a4d78f6a..8b549374a70c 100644
--- a/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
+++ b/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 %Ty = type opaque
 
diff --git a/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll b/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
index 82eecbb6986b..819260b60b12 100644
--- a/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
+++ b/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 @p = weak global i32 0          ; <i32*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll b/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
index 9fc48de4ec19..f3e5d77c0c80 100644
--- a/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
+++ b/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 
 ; This testcase should not send the instcombiner into an infinite loop!
 
diff --git a/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll b/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
index 5d2ed0b86d9d..1154bb481d30 100644
--- a/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
+++ b/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {ret i1 false}
 
 define i1 @test(i1 %V) {
diff --git a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
index e56c390d13f4..8169d2127f7f 100644
--- a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
+++ b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -mem2reg | llvm-dis | \
+; RUN: opt < %s -instcombine -mem2reg -S | \
 ; RUN:   not grep {i32 1}
 
 ; When propagating the load through the select, make sure that the load is
diff --git a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
index b89bd228403e..e646edf0296c 100644
--- a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
+++ b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -mem2reg -simplifycfg | \
+; RUN: opt < %s -instcombine -mem2reg -simplifycfg | \
 ; RUN:   llvm-dis | grep -v store | not grep {i32 1}
 
 ; Test to make sure that instcombine does not accidentally propagate the load
diff --git a/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll b/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
index ee4f62c8d5ce..27c823b9e633 100644
--- a/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
+++ b/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep -- -65536
+; RUN: opt < %s -instcombine -S | not grep -- -65536
 
 define i1 @test(i32 %tmp.124) {
         %tmp.125 = shl i32 %tmp.124, 8          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll b/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
index 766ba48be63f..730fdc26aabd 100644
--- a/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
+++ b/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep and
+; RUN: opt < %s -instcombine -S | not grep and
 
 define i8 @test21(i8 %A) {
         ;; sign extend
diff --git a/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll b/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
index 53bb4965cb49..187e2f594d69 100644
--- a/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
+++ b/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
@@ -9,124 +9,152 @@
 ; be eliminated. In many cases the setCC is also eliminated based on the
 ; constant value and the range of the casted value.
 ;
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | \
-; RUN:    notcast .*int
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; END.
 define i1 @lt_signed_to_large_unsigned(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp ult i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C1 = icmp sgt i8 %SB, -1
+; CHECK: ret i1 %C1
 }
 
 define i1 @lt_signed_to_large_signed(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
 
 define i1 @lt_signed_to_large_negative(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, -1024             ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 false
 }
 
 define i1 @lt_signed_to_small_signed(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, 17                ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp slt i8 %SB, 17
+; CHECK: ret i1 %C
 }
 define i1 @lt_signed_to_small_negative(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, -17               ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp slt i8 %SB, -17
+; CHECK: ret i1 %C
 }
 
 define i1 @lt_unsigned_to_large_unsigned(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp ult i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
 
 define i1 @lt_unsigned_to_large_signed(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
 
 define i1 @lt_unsigned_to_large_negative(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, -1024             ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 false
 }
 
 define i1 @lt_unsigned_to_small_unsigned(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp ult i32 %Y, 17                ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp ult i8 %SB, 17
+; CHECK: ret i1 %C
 }
 
 define i1 @lt_unsigned_to_small_negative(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp slt i32 %Y, -17               ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 false
 }
 
 define i1 @gt_signed_to_large_unsigned(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp ugt i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp slt i8 %SB, 0
+; CHECK: ret i1 %C
 }
 
 define i1 @gt_signed_to_large_signed(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 false
 }
 
 define i1 @gt_signed_to_large_negative(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, -1024             ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
+
 define i1 @gt_signed_to_small_signed(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, 17                ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp sgt i8 %SB, 17
+; CHECK: ret i1 %C
 }
 
 define i1 @gt_signed_to_small_negative(i8 %SB) {
         %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, -17               ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp sgt i8 %SB, -17
+; CHECK: ret i1 %C
 }
 
 define i1 @gt_unsigned_to_large_unsigned(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp ugt i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 false
 }
 
 define i1 @gt_unsigned_to_large_signed(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, 1024              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 false
 }
 
 define i1 @gt_unsigned_to_large_negative(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, -1024             ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
 
 define i1 @gt_unsigned_to_small_unsigned(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp ugt i32 %Y, 17                ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp ugt i8 %SB, 17
+; CHECK: ret i1 %C
 }
 
 define i1 @gt_unsigned_to_small_negative(i8 %SB) {
         %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
         %C = icmp sgt i32 %Y, -17               ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
 
diff --git a/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll b/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
index 4ea0b5c484bd..008afa8a787d 100644
--- a/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
+++ b/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 
 define i32 @test(i32 %X) {
         %Y = srem i32 %X, undef         ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll b/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
index ffaa0051e9f0..38553d798810 100644
--- a/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
+++ b/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep {ret i1 false}
 
 define i1 @test(i64 %tmp.169) {
diff --git a/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll b/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
index 8b2aa35bd754..1ec118006deb 100644
--- a/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
+++ b/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define i32 @test(i1 %C, i32 %tmp.15) {
         %tmp.16 = select i1 %C, i32 8, i32 1            ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll b/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
index 53585f9aa38b..9846ee72dd4b 100644
--- a/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
+++ b/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define i32 @_Z13func_31585107li(i32 %l_39521025, i32 %l_59244666) {
         %shortcirc_val = select i1 false, i32 1, i32 0          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll b/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
index 64bfec18df6b..e2d0618a41e6 100644
--- a/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
+++ b/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR577
 
 define i1 @test() {
diff --git a/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll b/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
index 10dbfec21ae3..f0e60aca59f5 100644
--- a/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
+++ b/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR585
 
 define i1 @test() {
diff --git a/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll b/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
index 010087b4f472..3d887ddad02b 100644
--- a/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
+++ b/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {ret i1 true}
 ; PR586
 
diff --git a/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll b/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
index c58234a54995..caee951b704c 100644
--- a/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
+++ b/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 ; This example caused instcombine to spin into an infinite loop.
 
diff --git a/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll b/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
index c89b0d54a9dd..10541ef70325 100644
--- a/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
+++ b/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep undef
 
 define i32 @test(i8 %A) {
diff --git a/test/Transforms/InstCombine/2006-02-28-Crash.ll b/test/Transforms/InstCombine/2006-02-28-Crash.ll
index 1f3c9e8e6740..9bea14c027ea 100644
--- a/test/Transforms/InstCombine/2006-02-28-Crash.ll
+++ b/test/Transforms/InstCombine/2006-02-28-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define i32 @test() {
         %tmp203 = icmp eq i32 1, 2              ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll b/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
index e4b2b3931224..aa7d58786b42 100644
--- a/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
+++ b/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define float @test(<4 x float> %V) {
         %V2 = insertelement <4 x float> %V, float 1.000000e+00, i32 3           ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll b/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
index 2541eb733031..c337ea781e4b 100644
--- a/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
+++ b/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
@@ -1,11 +1,13 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep shl
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; This cannot be turned into a sign extending cast!
 
 define i64 @test(i64 %X) {
         %Y = shl i64 %X, 16             ; <i64> [#uses=1]
+; CHECK: %Y = shl i64 %X, 16
         %Z = ashr i64 %Y, 16            ; <i64> [#uses=1]
+; CHECK: %Z = ashr i64 %Y, 16
         ret i64 %Z
+; CHECK: ret i64 %Z
 }
 
diff --git a/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll b/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
index 1b9df405c5ad..e22395fb8774 100644
--- a/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
+++ b/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; END.
 
 define void @test() {
diff --git a/test/Transforms/InstCombine/2006-09-15-CastToBool.ll b/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
index 043b598c8966..ee261ced5869 100644
--- a/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
+++ b/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep and
+; RUN: opt < %s -instcombine -S | grep and
 ; PR913
 
 define i32 @test(i32* %tmp1) {
diff --git a/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll b/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
index 4ca3dc1ec1c1..889bbcfa3ea5 100644
--- a/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
+++ b/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
@@ -1,5 +1,5 @@
 ; The optimizer should be able to remove cast operation here.
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep sext.*i32
 
 define i1 @eq_signed_to_small_unsigned(i8 %SB) {
diff --git a/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll b/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll
index 8678a85c4648..4d1a9ef21655 100644
--- a/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll
+++ b/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll
@@ -1,6 +1,6 @@
 ; This test case is reduced from llvmAsmParser.cpp
 ; The optimizer should not remove the cast here.
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep sext.*i32
 
 
diff --git a/test/Transforms/InstCombine/2006-10-20-mask.ll b/test/Transforms/InstCombine/2006-10-20-mask.ll
index a5864f1c1831..0aaa5e8c21b4 100644
--- a/test/Transforms/InstCombine/2006-10-20-mask.ll
+++ b/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep and
 
 define i64 @foo(i64 %tmp, i64 %tmp2) {
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index c7cf0dd017c7..d3ba1e2287a3 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep mul | count 2
 
 define <4 x float> @test(<4 x float> %V) {
diff --git a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll b/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
index 23e805a9360c..35bb45e2b913 100644
--- a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
+++ b/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep memmove.i32
 ; Instcombine was trying to turn this into a memmove.i32
 
diff --git a/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll b/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
index 8c48d4357bca..7799423f04f6 100644
--- a/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
+++ b/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep lshr
+; RUN: opt < %s -instcombine -S | grep lshr
 ; Verify this is not turned into -1.
 
 define i32 @test(i8 %amt) {
diff --git a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
index eaf10a306f58..7adeb9fd9603 100644
--- a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
+++ b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep sub
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep add
+; RUN: opt < %s -instcombine -S | grep sub
+; RUN: opt < %s -instcombine -S | grep add
 
 define <4 x float> @test(<4 x float> %tmp26, <4 x float> %tmp53) {
         ; (X+Y)-Y != X for fp vectors.
diff --git a/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll b/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
index 2c3313efa2a8..74483c1d9c77 100644
--- a/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
+++ b/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep zext
+; RUN: opt < %s -instcombine -S | grep zext
 
 ; Never merge these two conversions, even though it's possible: this is
 ; significantly more expensive than the two conversions on some targets
diff --git a/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll b/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll
index ddfb88c7bfbb..80ee3e2a293f 100644
--- a/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll
+++ b/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {%bothcond =}
 
 define i1 @Doit_bb(i32 %i.0) {
diff --git a/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll b/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
index 1508a523b7a9..5a74bd2ab7d6 100644
--- a/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {icmp sgt}
 ; END.
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
index e4aade7b8ea4..2665791fe086 100644
--- a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep select
+; RUN: opt < %s -instcombine -S | grep select
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
index 67bc84c4045c..c3700a00c428 100644
--- a/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
+++ b/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep icmp | count 1
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {icmp ugt} | count 1
 ; END.
 
diff --git a/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll b/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
index 695aea46ee2c..eba1ac1298ce 100644
--- a/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
+++ b/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
@@ -1,6 +1,6 @@
 ; For PR1065. This causes an assertion in instcombine if a select with two cmp
 ; operands is encountered.
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll b/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
index edbcfeed923a..e5238a577d2d 100644
--- a/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
+++ b/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp ugt}
+; RUN: opt < %s -instcombine -S | grep {icmp ugt}
 ; PR1107
 ; PR1940
 
diff --git a/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll b/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
index 073d3a154128..d2d215fa86d3 100644
--- a/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
+++ b/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {fcmp uno.*0.0}
+; RUN: opt < %s -instcombine -S | grep {fcmp uno.*0.0}
 ; PR1111
 define i1 @test(double %X) {
   %tmp = fcmp une double %X, %X
diff --git a/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll b/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
index 83d05d9d94f7..fed2255c0e29 100644
--- a/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
+++ b/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define <4 x i32> @test(<4 x i32> %A) {
     %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > 
diff --git a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
index 0e8c5b17666e..bd15dce11acb 100644
--- a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
+++ b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ugt.*, 1}
+; RUN: opt < %s -instcombine -S | grep {ugt.*, 1}
 
 define i1 @test(i32 %tmp1030) {
 	%tmp1037 = icmp ne i32 %tmp1030, 40		; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
index e559cdd914a9..05891a203ab0 100644
--- a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
+++ b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine -mem2reg | llvm-dis | grep {%A = alloca} 
-; RUN: llvm-as < %s | opt -instcombine -mem2reg | llvm-dis | \
+; RUN: opt < %s -instcombine -mem2reg -S | grep {%A = alloca} 
+; RUN: opt < %s -instcombine -mem2reg -S | \
 ; RUN:    not grep {%B = alloca}
 ; END.
 
diff --git a/test/Transforms/InstCombine/2007-02-07-PointerCast.ll b/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
index ce9eb3c69d4b..bf60991f7dd9 100644
--- a/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
+++ b/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
@@ -1,4 +1,4 @@
-;RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep zext
+;RUN: opt < %s -instcombine -S | grep zext
 
 ; Make sure the uint isn't removed.  Instcombine in llvm 1.9 was dropping the 
 ; uint cast which was causing a sign extend. This only affected code with 
diff --git a/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll b/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
index d60da44b7f19..f31c280661d0 100644
--- a/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
+++ b/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep ret
+; RUN: opt < %s -instcombine -S | grep ret
 ; PR1217
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
index d101050e9307..109e4a217fb5 100644
--- a/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
+++ b/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp sle}
+; RUN: opt < %s -instcombine -S | grep {icmp sle}
 ; PR1244
 
 define i1 @test(i32 %c.3.i, i32 %d.292.2.i) {
diff --git a/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll b/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
index da58dec1dcaa..589bd805d6dd 100644
--- a/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
+++ b/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | grep zext
+; RUN: opt < %s -instcombine -S | grep zext
 ; PR1261. 
 
 define i16 @test(i31 %zzz) {
diff --git a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
index c8dafd166299..ca93af3a6910 100644
--- a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
+++ b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
@@ -1,5 +1,5 @@
 ; For PR1248
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | grep {ugt i32 .*, 11}
+; RUN: opt < %s -instcombine -S | grep {ugt i32 .*, 11}
 define i1 @test(i32 %tmp6) {
   %tmp7 = sdiv i32 %tmp6, 12     ; <i32> [#uses=1]
   icmp ne i32 %tmp7, -6           ; <i1>:1 [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
index 0b05f7c64320..c79400413ff3 100644
--- a/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
+++ b/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
@@ -1,5 +1,5 @@
 ; PR1271
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {icmp eq i32 .tmp.*, 2146435072}
 %struct..0anon = type { i32, i32 }
 %struct..1anon = type { double }
diff --git a/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll b/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
index d67e1a13cc4e..0d4aac25c28f 100644
--- a/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
+++ b/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
@@ -1,5 +1,5 @@
 ; PR1271
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep and
+; RUN: opt < %s -instcombine -S | grep and
 define i1 @test(i32 %tmp13) {
 entry:
 	%tmp14 = shl i32 %tmp13, 12		; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
index 4a2e60e7c35c..5bcb54306065 100644
--- a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
+++ b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
@@ -1,5 +1,5 @@
 ; PR1271
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {ashr i32 %.mp137, 2}
 ; END.
 
diff --git a/test/Transforms/InstCombine/2007-03-27-PR1280.ll b/test/Transforms/InstCombine/2007-03-27-PR1280.ll
index 6cb9aaec0987..7700c7dd8a69 100644
--- a/test/Transforms/InstCombine/2007-03-27-PR1280.ll
+++ b/test/Transforms/InstCombine/2007-03-27-PR1280.ll
@@ -4,7 +4,7 @@
 ;          is not done. It should be removed when code gen supports "funny"
 ;          bit widths.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {add i49.*-8388608}
+; RUN: opt < %s -instcombine -S | grep {add i49.*-8388608}
 
 define i49 @test5(i49 %x) {
         ;; If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
diff --git a/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll b/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
index e738635be763..b59d3c80335e 100644
--- a/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
+++ b/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
@@ -2,7 +2,7 @@
 ; a malloc messes up the element count, causing an extra 4GB to be allocated on
 ; 64-bit targets.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {= add }
+; RUN: opt < %s -instcombine -S | not grep {= add }
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "x86_64-unknown-freebsd6.2"
diff --git a/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll b/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
index 34322a2788a2..22eb2c23c34b 100644
--- a/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
+++ b/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR1304
 
 define i64 @bork(<1 x i64> %vec) {
diff --git a/test/Transforms/InstCombine/2007-05-04-Crash.ll b/test/Transforms/InstCombine/2007-05-04-Crash.ll
index 5ad791933574..9f50d8ac9b06 100644
--- a/test/Transforms/InstCombine/2007-05-04-Crash.ll
+++ b/test/Transforms/InstCombine/2007-05-04-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR1384
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/InstCombine/2007-05-10-icmp-or.ll b/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
index 8769ded67b8e..4af5dfeef5dd 100644
--- a/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
+++ b/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 define i1 @test(i32 %tmp9) {
         %tmp20 = icmp ugt i32 %tmp9, 255                ; <i1> [#uses=1]
         %tmp11.not = icmp sgt i32 %tmp9, 255            ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-05-14-Crash.ll b/test/Transforms/InstCombine/2007-05-14-Crash.ll
index ececd350dba9..a3c010d2c427 100644
--- a/test/Transforms/InstCombine/2007-05-14-Crash.ll
+++ b/test/Transforms/InstCombine/2007-05-14-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"        
diff --git a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
index 55bfac581faf..40818d40c29a 100644
--- a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
+++ b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {call.*sret}
+; RUN: opt < %s -instcombine -S | grep {call.*sret}
 ; Make sure instcombine doesn't drop the sret attribute.
 
 define void @blah(i16* %tmp10) {
diff --git a/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll b/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
index 482c60869600..62b93513b76c 100644
--- a/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
+++ b/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ashr}
+; RUN: opt < %s -instcombine -S | grep {ashr}
 ; PR1499
 
 define void @av_cmp_q_cond_true(i32* %retval, i32* %tmp9, i64* %tmp10) {
diff --git a/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll b/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
index ffc402650262..af539c12a339 100644
--- a/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
+++ b/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 true}
+; RUN: opt < %s -instcombine -S | grep {ret i1 true}
 ; rdar://5278853
 
 define i1 @test(i32 %tmp468) {
diff --git a/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll b/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
index 988599ba3f95..3f76187b780a 100644
--- a/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
+++ b/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR1594
 
 define i64 @test(i16 %tmp510, i16 %tmp512) {
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 2dcb43926ba8..c27fe0ab6a6d 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep icmp
+; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1646
 
 @__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
index c022e571e11f..d8f3d97017ba 100644
--- a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
+++ b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {call i32 @f}
+; RUN: opt < %s -instcombine -S | grep {call i32 @f}
 
 	%struct.FRAME.nest = type { i32, i32 (i32)* }
 	%struct.__builtin_trampoline = type { [10 x i8] }
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index 300edd5e56ee..23ee12ba754f 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep icmp
+; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1678
 
 @A = alias weak void ()* @B		; <void ()*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 6d251b12cf4f..3862de455d55 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep call
-; RUN: llvm-as < %s | opt -std-compile-opts | llvm-dis | not grep xyz
+; RUN: opt < %s -instcombine -S | not grep call
+; RUN: opt < %s -std-compile-opts -S | not grep xyz
 
 @.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/2007-10-12-Crash.ll b/test/Transforms/InstCombine/2007-10-12-Crash.ll
index ed98fcdb9cae..b3d9f02b68aa 100644
--- a/test/Transforms/InstCombine/2007-10-12-Crash.ll
+++ b/test/Transforms/InstCombine/2007-10-12-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 	%struct.Ray = type { %struct.Vec, %struct.Vec }
 	%struct.Scene = type { i32 (...)** }
diff --git a/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
index 5f884d4a7812..76bceb6879bb 100644
--- a/test/Transforms/InstCombine/2007-10-28-stacksave.ll
+++ b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {call.*stacksave}
+; RUN: opt < %s -instcombine -S | grep {call.*stacksave}
 ; PR1745
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll b/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
index 15e54b1bf749..8105b4ba5579 100644
--- a/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
+++ b/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/Transforms/InstCombine/2007-10-31-StringCrash.ll b/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
index 6ba07e8a5a15..220f3e22b991 100644
--- a/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
+++ b/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll b/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
index f3caf0797638..e1549a0fe541 100644
--- a/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
+++ b/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR1780
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
index d7ac21137287..5282739d5c7f 100644
--- a/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
+++ b/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp eq i32 %In, 1}
+; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 1}
 ; PR1800
 
 define i1 @test(i32 %In) {
diff --git a/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll b/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
index 1f211b65d3fd..f71b99ce1a4b 100644
--- a/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
+++ b/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR1817
 
 define i1 @test1(i32 %X) {
diff --git a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
index 60a4b3b4ab52..24394c63e4d7 100644
--- a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
+++ b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep bitcast
+; RUN: opt < %s -instcombine -S | not grep bitcast
 ; PR1716
 
 @.str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll b/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
index 80df6fbdce35..6420537b9de7 100644
--- a/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
+++ b/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {ret i1 0}
+; RUN: opt < %s -instcombine -S | not grep {ret i1 0}
 ; PR1850
 
 define i1 @test() {
diff --git a/test/Transforms/InstCombine/2007-12-12-GEPScale.ll b/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
index 6580f90411a6..cea87f2a4666 100644
--- a/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
+++ b/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep 1431655764
+; RUN: opt < %s -instcombine -S | not grep 1431655764
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 
diff --git a/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll b/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
index 336c6d5a2eba..85cf9b6904cb 100644
--- a/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
+++ b/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep nounwind
+; RUN: opt < %s -instcombine -S | grep nounwind
 
 define void @bar() {
 entry:
diff --git a/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll b/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
index e44fc6f099a7..cc89f6dd2056 100644
--- a/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
+++ b/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {add} | count 1
+; RUN: opt < %s -instcombine -S | grep {add} | count 1
 
 define i32 @foo(i32 %a) {
 entry:
diff --git a/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll b/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
index 5e95993960c7..b59548fd8e6f 100644
--- a/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
+++ b/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mem2reg -instcombine | llvm-dis | grep "ret i32 1" | count 8
+; RUN: opt < %s -mem2reg -instcombine -S | grep "ret i32 1" | count 8
 
 define i32 @test1() {
 entry:
diff --git a/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
index 277b4f09075e..5f4fa478dab2 100644
--- a/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
+++ b/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
@@ -1,5 +1,5 @@
 ; Ignore stderr, we expect warnings there
-; RUN: llvm-as < %s 2> /dev/null | opt -instcombine | llvm-dis | not grep bitcast
+; RUN: opt < %s -instcombine 2> /dev/null -S | not grep bitcast
 
 define void @a() {
 	ret void
diff --git a/test/Transforms/InstCombine/2008-01-06-CastCrash.ll b/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
index 1d816d4b9967..097a0ce849d0 100644
--- a/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
+++ b/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define <2 x i32> @f() {
 	ret <2 x i32> undef
diff --git a/test/Transforms/InstCombine/2008-01-06-VoidCast.ll b/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
index 015210a99831..407ff4ddc29a 100644
--- a/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
+++ b/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep bitcast
+; RUN: opt < %s -instcombine -S | not grep bitcast
 
 define void @f(i16 %y) {
 	ret void
diff --git a/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll b/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
index 71d5bf762c34..fbc8ba972a03 100644
--- a/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
+++ b/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep and
+; RUN: opt < %s -instcombine -S | grep and
 ; PR1907
 
 define i1 @test(i32 %c84.17) {
diff --git a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
index d791d7be26ec..7b3281ff4ead 100644
--- a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
+++ b/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep bitcast | count 2
+; RUN: opt < %s -instcombine -S | grep bitcast | count 2
 
 define i32 @b(i32* inreg  %x) signext  {
 	ret i32 0
diff --git a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll b/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
index 5381db2acbcb..6401dfd0c11e 100644
--- a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
+++ b/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 	%struct.FRAME.nest = type { i32, i32 (i32*)* }
 	%struct.__builtin_trampoline = type { [10 x i8] }
diff --git a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
index e35794a813b0..9bb940893935 100644
--- a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
+++ b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep zeroext
+; RUN: opt < %s -instcombine -S | grep zeroext
 
 	%struct.FRAME.nest = type { i32, i32 (...)* }
 	%struct.__builtin_trampoline = type { [10 x i8] }
diff --git a/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll b/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
index 0c2cf54369cd..5ff23a3881fa 100644
--- a/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
+++ b/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {icmp s}
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1940
 
 define i1 @test1(i8 %A, i8 %B) {
@@ -7,6 +6,8 @@ define i1 @test1(i8 %A, i8 %B) {
         %b = zext i8 %B to i32
         %c = icmp sgt i32 %a, %b
         ret i1 %c
+; CHECK: %c = icmp ugt i8 %A, %B
+; CHECK: ret i1 %c
 }
 
 define i1 @test2(i8 %A, i8 %B) {
@@ -14,4 +15,6 @@ define i1 @test2(i8 %A, i8 %B) {
         %b = sext i8 %B to i32
         %c = icmp ugt i32 %a, %b
         ret i1 %c
+; CHECK: %c = icmp ugt i8 %A, %B
+; CHECK: ret i1 %c
 }
diff --git a/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
index 8de0959bf754..a49829af2ee8 100644
--- a/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
+++ b/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
@@ -1,11 +1,15 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i16 @test1(i16 %a) {
         %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
         %tmp21 = lshr i32 %tmp, 8               ; <i32> [#uses=1]
+; CHECK: %tmp21 = lshr i16 %a, 8
         %tmp5 = mul i32 %tmp, 5         ; <i32> [#uses=1]
+; CHECK: %tmp5 = mul i16 %a, 5
         %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
         %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
         ret i16 %tmp.upgrd.3
+; CHECK: ret i16 %tmp.upgrd.32
 }
 
diff --git a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
index 346f90f8293b..c161bcc9045d 100644
--- a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
+++ b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep select
+; RUN: opt < %s -instcombine -S | grep select
 
 define double @fold(i1 %a, double %b) {
 %s = select i1 %a, double 0., double 1.
diff --git a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
index d937f7bb2303..28a94ce07ad2 100644
--- a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
+++ b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {a.off}
+; RUN: opt < %s -instcombine -S | not grep {a.off}
 ; PR1949
 
 define i1 @test1(i32 %a) {
diff --git a/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
index 669738378970..a88c51097247 100644
--- a/test/Transforms/InstCombine/2008-02-13-MulURem.ll
+++ b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep rem
+; RUN: opt < %s -instcombine -S | grep rem
 ; PR1933
 
 define i32 @fold(i32 %a) {
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
index cd61a10c5b85..af61c150a7f8 100644
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
+++ b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i.* 0} | count 2
+; RUN: opt < %s -instcombine -S | grep {ret i.* 0} | count 2
 ; PR2048
 
 define i32 @i(i32 %a) {
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
index 0e260c465e32..d26dec11e25d 100644
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
+++ b/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {sdiv i8 \%a, 9}
+; RUN: opt < %s -instcombine -S | grep {sdiv i8 \%a, 9}
 ; PR2048
 
 define i8 @i(i8 %a) {
diff --git a/test/Transforms/InstCombine/2008-02-23-MulSub.ll b/test/Transforms/InstCombine/2008-02-23-MulSub.ll
index a74e98d44860..bb21c4b03417 100644
--- a/test/Transforms/InstCombine/2008-02-23-MulSub.ll
+++ b/test/Transforms/InstCombine/2008-02-23-MulSub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
+; RUN: opt < %s -instcombine -S | not grep mul
 
 define i26 @test(i26 %a) nounwind  {
 entry:
diff --git a/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll b/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
index ea0240395626..7f8bd4fb8a90 100644
--- a/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
+++ b/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 ; rdar://5771353
 
 define float @test(float %x, x86_fp80 %y) nounwind readonly  {
diff --git a/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll b/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
index 31e7e35bef1f..da7e49ee8475 100644
--- a/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
+++ b/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {16} | count 1
+; RUN: opt < %s -instcombine -S | grep {16} | count 1
 
 define i8* @bork(i8** %qux) {
   %tmp275 = load i8** %qux, align 1
diff --git a/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll b/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
index 020121375fea..aa38065a7149 100644
--- a/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
+++ b/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
@@ -1,6 +1,6 @@
 ;; The bitcast cannot be eliminated because byval arguments need
 ;; the correct type, or at least a type of the correct size.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep bitcast
+; RUN: opt < %s -instcombine -S | grep bitcast
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
 	%struct.NSRect = type { [4 x float] }
diff --git a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
index 9bfe7aa14148..626564da936a 100644
--- a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
+++ b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {volatile store}
+; RUN: opt < %s -instcombine -S | grep {volatile store}
 
 define void @test() {
 	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
index e43c22daa42a..f2cc7254a325 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {volatile load} | count 2
+; RUN: opt < %s -instcombine -S | grep {volatile load} | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 @g_1 = internal global i32 0		; <i32*> [#uses=3]
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
index 293052a44711..176162d38636 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {volatile load} | count 2
+; RUN: opt < %s -instcombine -S | grep {volatile load} | count 2
 ; PR2262
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
index e9cc97df0c61..bbd004213d0a 100644
--- a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
+++ b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {store i8} | count 3
+; RUN: opt < %s -instcombine -S | grep {store i8} | count 3
 ; PR2297
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
index e81e828d51f3..1da28562aae4 100644
--- a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
+++ b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | %prcontext strlen 1 | not grep ret
+; RUN: opt -S -instcombine %s | FileCheck %s
 ; PR2297
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
@@ -11,6 +11,10 @@ entry:
 	store i8 0, i8* %tmp3, align 1
 	%tmp5 = getelementptr i8* %tmp1, i32 0		; <i8*> [#uses=1]
 	store i8 1, i8* %tmp5, align 1
+; CHECK: store
+; CHECK: store
+; CHECK-NEXT: strlen
+; CHECK-NEXT: store
 	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
 	%tmp9 = getelementptr i8* %tmp1, i32 0		; <i8*> [#uses=1]
 	store i8 0, i8* %tmp9, align 1
diff --git a/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll b/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
index 4dd29a79cd7d..d56a1a0b786a 100644
--- a/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
+++ b/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR2303
 	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
 	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
diff --git a/test/Transforms/InstCombine/2008-05-17-InfLoop.ll b/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
index 9cfbd6f2c5e2..2939a4821654 100644
--- a/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
+++ b/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR2339
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll b/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
index e994399e423e..b34fc1e991d4 100644
--- a/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
+++ b/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 false} | count 2
+; RUN: opt < %s -instcombine -S | grep {ret i1 false} | count 2
 ; PR2329
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/InstCombine/2008-05-22-IDivVector.ll b/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
index ad70b658d3c1..f7ba99c6b449 100644
--- a/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
+++ b/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 
 define <3 x i8> @f(<3 x i8> %i) {
   %A = sdiv <3 x i8> %i, %i
diff --git a/test/Transforms/InstCombine/2008-05-22-NegValVector.ll b/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
index f2511b383254..bf92faf2fec5 100644
--- a/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
+++ b/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sub
+; RUN: opt < %s -instcombine -S | not grep sub
 
 define <3 x i8> @f(<3 x i8> %a) {
   %A = sub <3 x i8> zeroinitializer, %a
diff --git a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
index c0f34e68a9d8..2de5af735737 100644
--- a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
+++ b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
 ; PR2359
 define i1 @f(i8* %x) {
 entry:
diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
index 7008587c40d9..541669365bdb 100644
--- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll
+++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {xor}
+; RUN: opt < %s -instcombine -S | grep {xor}
 ; PR2389
 
 define i1 @test(i1 %a, i1 %b) {
diff --git a/test/Transforms/InstCombine/2008-05-31-Bools.ll b/test/Transforms/InstCombine/2008-05-31-Bools.ll
index f3df49b352c3..a0fe47a6257b 100644
--- a/test/Transforms/InstCombine/2008-05-31-Bools.ll
+++ b/test/Transforms/InstCombine/2008-05-31-Bools.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: grep {xor} %t
 ; RUN: grep {and} %t
 ; RUN: not grep {div} %t
diff --git a/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll b/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
index ec13bbf65c18..5e4a9d0e5b30 100644
--- a/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
+++ b/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 
 define i65 @foo(i65 %x) nounwind  {
 entry:
diff --git a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
index da63dd67453f..917d3ae1f843 100644
--- a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
+++ b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {phi i32} | count 2
+; RUN: opt < %s -instcombine -S | grep {phi i32} | count 2
 
 define void @test() nounwind  {
 entry:
diff --git a/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll b/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
index c671eb8b073e..08959c9c7c08 100644
--- a/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
+++ b/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {store i32} | count 2
+; RUN: opt < %s -instcombine -S | grep {store i32} | count 2
 
 @g_139 = global i32 0           ; <i32*> [#uses=2]
 
diff --git a/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll b/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
index 3f23098225e1..aed1b14ce316 100644
--- a/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
+++ b/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {store i8} | count 2
+; RUN: opt < %s -instcombine -S | grep {store i8} | count 2
 
 define i32 @a(i8* %s) nounwind  {
 entry:
diff --git a/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll b/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
index fa3dc0ae141e..05f1c5200081 100644
--- a/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
+++ b/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep load | count 3
+; RUN: opt < %s -instcombine -S | grep load | count 3
 ; PR2471
 
 declare i32 @x(i32*)
diff --git a/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
index e2d710078484..c3371c6ae73d 100644
--- a/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
+++ b/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp eq i32 %In, 15}
+; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 15}
 ; PR2479
 ; (See also PR1800.)
 
diff --git a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
index 9ed7be38c6aa..830783455189 100644
--- a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
+++ b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {call.*llvm.stackrestore}
+; RUN: opt < %s -instcombine -S | grep {call.*llvm.stackrestore}
 ; PR2488
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2008-07-08-AndICmp.ll b/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
index c6002413147e..a12f4bdf1084 100644
--- a/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
+++ b/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep icmp | count 1
+; RUN: opt < %s -instcombine -S | grep icmp | count 1
 ; PR2330
 
 define i1 @foo(i32 %a, i32 %b) nounwind {
diff --git a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
index 956b9a6ae2e6..8245b4d017b0 100644
--- a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
+++ b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp ne i32 \%a}
+; RUN: opt < %s -instcombine -S | grep {icmp ne i32 \%a}
 ; PR2330
 
 define i1 @foo(i32 %a) nounwind  {
diff --git a/test/Transforms/InstCombine/2008-07-08-SubAnd.ll b/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
index bf3afb9e3bb1..009115966f58 100644
--- a/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
+++ b/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep -v {i32 8}
+; RUN: opt < %s -instcombine -S | grep -v {i32 8}
 ; PR2330
 
 define i32 @a(i32 %a) nounwind  {
diff --git a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
index af728eb207c2..ccfb1182769f 100644
--- a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {volatile load} | count 2
+; RUN: opt < %s -instcombine -S | grep {volatile load} | count 2
 ; PR2496
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2008-07-09-SubAndError.ll b/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
index c5b93586ba38..47a7590076c7 100644
--- a/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
+++ b/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {sub i32 0}
+; RUN: opt < %s -instcombine -S | not grep {sub i32 0}
 ; PR2330
 
 define i32 @foo(i32 %a) nounwind {
diff --git a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
index 205e57d79738..e91153202519 100644
--- a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
+++ b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {%C = xor i1 %A, true}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep {%C = xor i1 %A, true}
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
 ; PR2539
 
 define i1 @test1(i1 %A) {
diff --git a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll b/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
index 50533db06d55..76e30399a666 100644
--- a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
+++ b/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep add
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
+; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | not grep mul
 ; PR2330
 
 define i1 @f(i32 %x, i32 %y) nounwind {
diff --git a/test/Transforms/InstCombine/2008-07-11-RemAnd.ll b/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
index 0bfd41dd4e7a..bf53451d66cf 100644
--- a/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
+++ b/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep rem
+; RUN: opt < %s -instcombine -S | not grep rem
 ; PR2330
 
 define i32 @a(i32 %b) nounwind  {
diff --git a/test/Transforms/InstCombine/2008-07-13-DivZero.ll b/test/Transforms/InstCombine/2008-07-13-DivZero.ll
index 85c3dbc5ff69..be1f8c294375 100644
--- a/test/Transforms/InstCombine/2008-07-13-DivZero.ll
+++ b/test/Transforms/InstCombine/2008-07-13-DivZero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {lshr.*3}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {call .*%cond}
+; RUN: opt < %s -instcombine -S | grep {lshr.*3}
+; RUN: opt < %s -instcombine -S | grep {call .*%cond}
 ; PR2506
 
 ; We can simplify the operand of udiv to '8', but not the operand to the
diff --git a/test/Transforms/InstCombine/2008-07-16-fsub.ll b/test/Transforms/InstCombine/2008-07-16-fsub.ll
index ca4174d0a963..672b4e955262 100644
--- a/test/Transforms/InstCombine/2008-07-16-fsub.ll
+++ b/test/Transforms/InstCombine/2008-07-16-fsub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep sub
+; RUN: opt < %s -instcombine -S | grep sub
 ; PR2553
 
 define double @test(double %X) nounwind {
diff --git a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
index 49e6cdd0377d..501d8a66c3e6 100644
--- a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
+++ b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {store }
+; RUN: opt < %s -instcombine -S | not grep {store }
 ; PR2296
 
 @G = common global double 0.000000e+00, align 16
diff --git a/test/Transforms/InstCombine/2008-08-05-And.ll b/test/Transforms/InstCombine/2008-08-05-And.ll
index 939e6b859506..9773c2d76249 100644
--- a/test/Transforms/InstCombine/2008-08-05-And.ll
+++ b/test/Transforms/InstCombine/2008-08-05-And.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep or
+; RUN: opt < %s -instcombine -S | not grep or
 ; PR2629
 
 define void @f(i8* %x) nounwind  {
diff --git a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll b/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
index 428a35947d1d..e9081f0fa94c 100644
--- a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
+++ b/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep xor
+; RUN: opt < %s -instcombine -S | not grep xor
 
 define i1 @test1(i8 %x, i8 %y) {
   %X = xor i8 %x, 128
diff --git a/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll b/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
index 4b5dc6a5dfc1..7c50141421dc 100644
--- a/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
+++ b/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x, i32 %start_y, i32 %end_y) {
 	br label %1
diff --git a/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll b/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
index e7a8ca9b521c..31ea94a5d832 100644
--- a/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
+++ b/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {or i1}
+; RUN: opt < %s -instcombine -S | grep {or i1}
 ; PR2844
 
 define i32 @test(i32 %p_74) {
diff --git a/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll b/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
index dfe9c4a16c89..fd36d86a9484 100644
--- a/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
+++ b/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
 ; PR2697
 
 define i1 @x(i32 %x) nounwind {
diff --git a/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll b/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
index 8f35a8586c73..d70d05293e8f 100644
--- a/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
+++ b/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 ; PR2940
 
 define i32 @tstid() {
diff --git a/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll b/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
index 46e98eb3627c..aa077e2ac3b3 100644
--- a/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
+++ b/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 true}
+; RUN: opt < %s -instcombine -S | grep {ret i1 true}
 ; PR2993
 
 define i1 @foo(i32 %x) {
diff --git a/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
index c088f3166faf..c636288661b1 100644
--- a/test/Transforms/InstCombine/2008-11-08-FCmp.ll
+++ b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -1,9 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
-; RUN: grep {icmp eq} %t
-; RUN: grep {ret i1 false} %t | count 2
-; RUN: grep {ret i1 true} %t | count 2
-; RUN: grep {icmp ne} %t
-; RUN: not grep {icmp slt} %t
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR3021
 
 ; When inst combining an FCMP with the LHS coming from a uitofp instruction, we
@@ -12,6 +7,7 @@
 define i1 @test1(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ole double %1, 0.000000e+00
+; CHECK: icmp eq i32 %val, 0
   ret i1 %2
 }
 
@@ -19,17 +15,20 @@ define i1 @test2(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp olt double %1, 0.000000e+00
   ret i1 %2
+; CHECK: ret i1 false
 }
 
 define i1 @test3(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp oge double %1, 0.000000e+00
   ret i1 %2
+; CHECK: ret i1 true
 }
 
 define i1 @test4(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ogt double %1, 0.000000e+00
+; CHECK: icmp ne i32 %val, 0
   ret i1 %2
 }
 
@@ -37,10 +36,12 @@ define i1 @test5(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ogt double %1, -4.400000e+00
   ret i1 %2
+; CHECK: ret i1 true
 }
 
 define i1 @test6(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp olt double %1, -4.400000e+00
   ret i1 %2
+; CHECK: ret i1 false
 }
diff --git a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
index 8c58a2ae7f7d..b2774d6522df 100644
--- a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
+++ b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: grep urem %t | count 3
 ; RUN: grep srem %t | count 1
 ; RUN: grep sub %t | count 2
diff --git a/test/Transforms/InstCombine/2008-11-27-IDivVector.ll b/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
index 4275e1191a88..318a80cbc2a4 100644
--- a/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
+++ b/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep div
+; RUN: opt < %s -instcombine -S | not grep div
 
 define <2 x i8> @f(<2 x i8> %x) {
   %A = udiv <2 x i8> %x, <i8 1, i8 1>
diff --git a/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll b/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
index 544e9abbbcb7..d8c53fac49e0 100644
--- a/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
+++ b/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
+; RUN: opt < %s -instcombine -S | not grep mul
 
 define <2 x i8> @f(<2 x i8> %x) {
   %A = mul <2 x i8> %x, <i8 1, i8 1>
diff --git a/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll b/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll
index 6a6b5f386347..fc90bba77086 100644
--- a/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll
+++ b/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep div
+; RUN: opt < %s -instcombine -S | not grep div
 
 define i8 @test(i8 %x) readnone nounwind {
   %A = udiv i8 %x, 250
diff --git a/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
index f970b96ed4d2..e4c7ebcefc7e 100644
--- a/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
+++ b/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {i8 2, i8 2}
+; RUN: opt < %s -instcombine -S | grep {i8 2, i8 2}
 ; PR2756
 
 define <2 x i8> @foo(<2 x i8> %x) {
diff --git a/test/Transforms/InstCombine/2009-01-05-i128-crash.ll b/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
index df3a760daf87..d355e0aff8a7 100644
--- a/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
+++ b/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 ; PR3235
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
index 82b923ac95c7..a61a94ecbf3a 100644
--- a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
+++ b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: grep {, align 4} %t | count 3
 ; RUN: grep {, align 8} %t | count 3
 ; rdar://6480438
diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
index e0a2610c225e..ce62f35c1035 100644
--- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {store.*addrspace(1)}
+; RUN: opt < %s -instcombine -S | grep {store.*addrspace(1)}
 ; PR3335
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
index cc001f0334c3..79a2f1fd8eb0 100644
--- a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
+++ b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine | llvm-dis | grep 0x7FF8000000000000 | count 7
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine | llvm-dis | grep 0x7FF00000FFFFFFFF | count 5
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine | llvm-dis | grep {0\\.0} | count 3
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine | llvm-dis | grep {3\\.5} | count 1
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x7FF8000000000000 | count 7
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x7FF00000FFFFFFFF | count 5
+; RUN: opt < %s -simplifycfg -instcombine -S | grep {0\\.0} | count 3
+; RUN: opt < %s -simplifycfg -instcombine -S | grep {3\\.5} | count 1
 ;
 
 ; ModuleID = 'apf.c'
diff --git a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
index 9cdb425ab49c..6bc7ce34d1c2 100644
--- a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
+++ b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine | llvm-dis | grep 0x3FB99999A0000000 | count 2
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine | llvm-dis | grep 0xBFB99999A0000000 | count 2
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x3FB99999A0000000 | count 2
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0xBFB99999A0000000 | count 2
 ; check constant folding for 'frem'.  PR 3316.
 
 ; ModuleID = 'tt.c'
diff --git a/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll b/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
index 313e76d028d0..4b64b4872918 100644
--- a/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
+++ b/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 ; PR3381
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/InstCombine/2009-01-31-InfIterate.ll b/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
index 6620e4fadd64..815c1a91936a 100644
--- a/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
+++ b/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 ; PR3452
 define i128 @test(i64 %A, i64 %B, i1 %C, i128 %Z, i128 %Y, i64* %P, i64* %Q) {
 entry:
diff --git a/test/Transforms/InstCombine/2009-01-31-Pressure.ll b/test/Transforms/InstCombine/2009-01-31-Pressure.ll
index 0c3066bb90c9..c3ee9a35ba39 100644
--- a/test/Transforms/InstCombine/2009-01-31-Pressure.ll
+++ b/test/Transforms/InstCombine/2009-01-31-Pressure.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {%B = add i8 %b, %x}
+; RUN: opt < %s -instcombine -S | grep {%B = add i8 %b, %x}
 ; PR2698
 
 declare void @use1(i1)
diff --git a/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll b/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
index b08c9622502a..bc6a2045fa0c 100644
--- a/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
+++ b/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 ; PR3468
 
 define x86_fp80 @cast() {
diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index 52bceadac395..b29d8d23bc07 100644
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -scalarrepl | llvm-dis | not grep { = alloca}
+; RUN: opt < %s -instcombine -scalarrepl -S | not grep { = alloca}
 ; rdar://6417724
 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.
 
diff --git a/test/Transforms/InstCombine/2009-02-21-LoadCST.ll b/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
index 2c126df1f14d..f56fc388eb5b 100644
--- a/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
+++ b/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 3679669}
+; RUN: opt < %s -instcombine -S | grep {ret i32 3679669}
 ; PR3595
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll b/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
index c59c5edcf2d1..a8349f042e42 100644
--- a/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
+++ b/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 ; PR3667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll b/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
index ae690cf9a519..c617ca4fcad3 100644
--- a/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
+++ b/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 ; PR3826
 
 define void @0(<4 x i16>*, <4 x i16>*) {
diff --git a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
index 92ace11f8f39..0a07bf34bac9 100644
--- a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
+++ b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ashr i32 %val, 31}
+; RUN: opt < %s -instcombine -S | grep {ashr i32 %val, 31}
 ; PR3851
 
 define i32 @foo2(i32 %val) nounwind {
diff --git a/test/Transforms/InstCombine/2009-03-24-InfLoop.ll b/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
index 6d5a7bf77d2b..4ce04a1eb542 100644
--- a/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
+++ b/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
@@ -1,5 +1,5 @@
 ; PR3874
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
   define i1 @test(i32 %x) {
     %A = lshr i32 3968, %x
     %B = and i32 %A, 1
diff --git a/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll b/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
index 82f876298191..244b22a14dec 100644
--- a/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
+++ b/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {mul i64}
+; RUN: opt < %s -instcombine -S | grep {mul i64}
 ; rdar://6762288
 
 ; Instcombine should not promote the mul to i96 because it is definitely
diff --git a/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll b/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
index 1eda7dfa9ec7..dd14c6beec4c 100644
--- a/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
+++ b/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep cmp
+; RUN: opt < %s -instcombine -S | not grep cmp
 ; rdar://6903175
 
 define i1 @f0(i32 *%a) nounwind {
diff --git a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
index ffb55a792148..e5355b8d3c56 100644
--- a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {store i32 0,}
+; RUN: opt < %s -instcombine -S | grep {store i32 0,}
 ; PR4366
 
 define void @a() {
diff --git a/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll b/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
index 82b223a4ddc9..6beedf83cd6a 100644
--- a/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
+++ b/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep srem
+; RUN: opt < %s -instcombine -S | grep srem
 ; PR3439
 
 define i32 @a(i32 %x) nounwind {
diff --git a/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll b/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
index 7505de17a7af..41940fe885e5 100644
--- a/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
+++ b/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 ; PR4495
 
 define i32 @test(i64 %test) {
diff --git a/test/Transforms/InstCombine/CPP_min_max.ll b/test/Transforms/InstCombine/CPP_min_max.ll
index 785b91e79e49..531ce2b07b30 100644
--- a/test/Transforms/InstCombine/CPP_min_max.ll
+++ b/test/Transforms/InstCombine/CPP_min_max.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep select | not grep {i32\\*}
 
 ; This testcase corresponds to PR362, which notices that this horrible code
diff --git a/test/Transforms/InstCombine/IntPtrCast.ll b/test/Transforms/InstCombine/IntPtrCast.ll
index e24d199f8c45..4ecbccd86a48 100644
--- a/test/Transforms/InstCombine/IntPtrCast.ll
+++ b/test/Transforms/InstCombine/IntPtrCast.ll
@@ -1,9 +1,10 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-p:32:32"
 
 define i32* @test(i32* %P) {
         %V = ptrtoint i32* %P to i32            ; <i32> [#uses=1]
         %P2 = inttoptr i32 %V to i32*           ; <i32*> [#uses=1]
         ret i32* %P2
+; CHECK: ret i32* %P
 }
 
diff --git a/test/Transforms/InstCombine/JavaCompare.ll b/test/Transforms/InstCombine/JavaCompare.ll
index 72cab427d07f..7d0edb84d1eb 100644
--- a/test/Transforms/InstCombine/JavaCompare.ll
+++ b/test/Transforms/InstCombine/JavaCompare.ll
@@ -1,7 +1,7 @@
 ; This is the sequence of stuff that the Java front-end expands for a single 
 ; <= comparison.  Check to make sure we turn it into a <= (only)
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {%c3 = icmp sle i32 %A, %B}
+; RUN: opt < %s -instcombine -S | grep {%c3 = icmp sle i32 %A, %B}
 
 define i1 @le(i32 %A, i32 %B) {
         %c1 = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/add-shrink.ll b/test/Transforms/InstCombine/add-shrink.ll
index 6dc02f361c97..52b8e327dbae 100644
--- a/test/Transforms/InstCombine/add-shrink.ll
+++ b/test/Transforms/InstCombine/add-shrink.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {add i32}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep sext | count 1
+; RUN: opt < %s -instcombine -S | grep {add i32}
+; RUN: opt < %s -instcombine -S | grep sext | count 1
 
 ; Should only have one sext and the add should be i32 instead of i64.
 
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 298b9a1917af..24319df0b762 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {add i32}
+; RUN: opt < %s -instcombine -S | grep {add i32}
 
 define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
diff --git a/test/Transforms/InstCombine/add.ll b/test/Transforms/InstCombine/add.ll
index 6ff2187474f7..4719809d6d36 100644
--- a/test/Transforms/InstCombine/add.ll
+++ b/test/Transforms/InstCombine/add.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that add instructions are properly eliminated.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep -v OK | not grep add
 
 define i32 @test1(i32 %A) {
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 161d56b40b57..1cbdd3a3cd54 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -1,9 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:    grep -v OK | not grep add
-
-;; Target triple for gep raising case below.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
+; RUN: opt < %s -instcombine -S | not grep add
 
 define i64 @test1(i64 %A, i32 %B) {
         %tmp12 = zext i32 %B to i64
@@ -13,23 +8,6 @@ define i64 @test1(i64 %A, i32 %B) {
         ret i64 %tmp6
 }
 
-; PR1795
-define void @test2(i32 %.val24) {
-EntryBlock:
-        add i32 %.val24, -12
-        inttoptr i32 %0 to i32*
-        store i32 1, i32* %1
-        add i32 %.val24, -16
-        inttoptr i32 %2 to i32*
-        getelementptr i32* %3, i32 1
-        load i32* %4
-        tail call i32 @callee( i32 %5 )
-        ret void
-}
-
-declare i32 @callee(i32)
-
-
 define i32 @test3(i32 %A) {
   %B = and i32 %A, 7
   %C = and i32 %A, 32
diff --git a/test/Transforms/InstCombine/add3.ll b/test/Transforms/InstCombine/add3.ll
new file mode 100644
index 000000000000..cde3e24215f8
--- /dev/null
+++ b/test/Transforms/InstCombine/add3.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | grep inttoptr | count 2
+
+;; Target triple for gep raising case below.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; PR1795
+define void @test2(i32 %.val24) {
+EntryBlock:
+        add i32 %.val24, -12
+        inttoptr i32 %0 to i32*
+        store i32 1, i32* %1
+        add i32 %.val24, -16
+        inttoptr i32 %2 to i32*
+        getelementptr i32* %3, i32 1
+        load i32* %4
+        tail call i32 @callee( i32 %5 )
+        ret void
+}
+
+declare i32 @callee(i32)
diff --git a/test/Transforms/InstCombine/addnegneg.ll b/test/Transforms/InstCombine/addnegneg.ll
index f3b9565a51e1..a3a09f27ed95 100644
--- a/test/Transforms/InstCombine/addnegneg.ll
+++ b/test/Transforms/InstCombine/addnegneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep { sub } | count 1
+; RUN: opt < %s -instcombine -S | grep { sub } | count 1
 ; PR2047
 
 define i32 @l(i32 %a, i32 %b, i32 %c, i32 %d) {
diff --git a/test/Transforms/InstCombine/adjust-for-sminmax.ll b/test/Transforms/InstCombine/adjust-for-sminmax.ll
index 9328ad364995..b9b6f702eb62 100644
--- a/test/Transforms/InstCombine/adjust-for-sminmax.ll
+++ b/test/Transforms/InstCombine/adjust-for-sminmax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp s\[lg\]t i32 %n, 0} | count 16
+; RUN: opt < %s -instcombine -S | grep {icmp s\[lg\]t i32 %n, 0} | count 16
 
 ; Instcombine should recognize that this code can be adjusted
 ; to fit the canonical smax/smin pattern.
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index c826e31da1f9..80aacbce130e 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {align 16} | count 1
+; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
 
 ; A multi-dimensional array in a nested loop doing vector stores that
 ; aren't yet aligned. Instcombine can understand the addressing in the
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index a05c513dcbb3..425393711625 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {align 16} | count 1
+; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
 
 ; Instcombine should be able to prove vector alignment in the
 ; presence of a few mild address computation tricks.
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
new file mode 100644
index 000000000000..6e8ad87f19e0
--- /dev/null
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Don't assume that external global variables have their preferred
+; alignment. They may only have the ABI minimum alignment.
+
+; CHECK: %s = shl i64 %a, 3
+; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
+; CHECK: %q = add i64 %r, 1
+; CHECK: ret i64 %q
+
+target datalayout = "-i32:8:32"
+
+@A = external global i32
+@B = external global i32
+
+define i64 @foo(i64 %a) {
+  %t = ptrtoint i32* @A to i64
+  %s = shl i64 %a, 3
+  %r = or i64 %t, %s
+  %q = add i64 %r, 1
+  ret i64 %q
+}
diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll
index 104d9918a9c7..0260ca2c65f5 100644
--- a/test/Transforms/InstCombine/align-inc.ll
+++ b/test/Transforms/InstCombine/align-inc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {GLOBAL.*align 16}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {tmp = load}
+; RUN: opt < %s -instcombine -S | grep {GLOBAL.*align 16}
+; RUN: opt < %s -instcombine -S | grep {tmp = load}
 
 @GLOBAL = internal global [4 x i32] zeroinitializer
 
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 95d0f094ae3f..13d664d55999 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,6 +1,6 @@
 ; Zero byte allocas should be deleted.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep alloca
 ; END.
 
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index d2f2753f887e..c30a245e4156 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep and | count 1
 
 ; Should be optimized to one and.
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index 55efffa34ff5..91868d1d0759 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep fcmp | count 3
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep ret | grep 0
+; RUN: opt < %s -instcombine -S | grep fcmp | count 3
+; RUN: opt < %s -instcombine -S | grep ret | grep 0
 
 define zeroext i8 @t1(float %x, float %y) nounwind {
        %a = fcmp ueq float %x, %y
diff --git a/test/Transforms/InstCombine/and-not-or.ll b/test/Transforms/InstCombine/and-not-or.ll
index 9e9f397c4be3..9dce7b4e6fb3 100644
--- a/test/Transforms/InstCombine/and-not-or.ll
+++ b/test/Transforms/InstCombine/and-not-or.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {and i32 %x, %y} | count 4
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {or}
+; RUN: opt < %s -instcombine -S | grep {and i32 %x, %y} | count 4
+; RUN: opt < %s -instcombine -S | not grep {or}
 
 define i32 @func1(i32 %x, i32 %y) nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/and-or-and.ll b/test/Transforms/InstCombine/and-or-and.ll
index 04d475ccd822..216cd46775a5 100644
--- a/test/Transforms/InstCombine/and-or-and.ll
+++ b/test/Transforms/InstCombine/and-or-and.ll
@@ -9,7 +9,7 @@
 ;
 ; Which corresponds to test1.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep {or }
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index 8fc53a7e0acb..37ec3bc1aabb 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep xor | count 4
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep and
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep { or}
+; RUN: opt < %s -instcombine -S | grep xor | count 4
+; RUN: opt < %s -instcombine -S | not grep and
+; RUN: opt < %s -instcombine -S | not grep { or}
 
 ; PR1510
 
diff --git a/test/Transforms/InstCombine/and-or.ll b/test/Transforms/InstCombine/and-or.ll
index 38ad842e015f..b4224b38b1c2 100644
--- a/test/Transforms/InstCombine/and-or.ll
+++ b/test/Transforms/InstCombine/and-or.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {and i32 %a, 1} | count 4
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {or i32 %0, %b} | count 4
+; RUN: opt < %s -instcombine -S | grep {and i32 %a, 1} | count 4
+; RUN: opt < %s -instcombine -S | grep {or i32 %0, %b} | count 4
 
 
 define i32 @func1(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/Transforms/InstCombine/and-xor-merge.ll b/test/Transforms/InstCombine/and-xor-merge.ll
index 85f8026ab572..e432a9aef7d1 100644
--- a/test/Transforms/InstCombine/and-xor-merge.ll
+++ b/test/Transforms/InstCombine/and-xor-merge.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep and | count 1
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep xor | count 2
+; RUN: opt < %s -instcombine -S | grep and | count 1
+; RUN: opt < %s -instcombine -S | grep xor | count 2
 
 ; (x&z) ^ (y&z) -> (x^y)&z
 define i32 @test1(i32 %x, i32 %y, i32 %z) {
diff --git a/test/Transforms/InstCombine/and.ll b/test/Transforms/InstCombine/and.ll
index edab47e87cf1..8492df9a1209 100644
--- a/test/Transforms/InstCombine/and.ll
+++ b/test/Transforms/InstCombine/and.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep and
+; RUN: opt < %s -instcombine -S | not grep and
 
 define i32 @test1(i32 %A) {
         ; zero result
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 3b80d94e1a61..0af9bfaff394 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep and
+; RUN: opt < %s -instcombine -S | not grep and
 
 
 ; PR1738
diff --git a/test/Transforms/InstCombine/apint-add1.ll b/test/Transforms/InstCombine/apint-add1.ll
index 74280ee7f7ca..02f1baf53996 100644
--- a/test/Transforms/InstCombine/apint-add1.ll
+++ b/test/Transforms/InstCombine/apint-add1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that add instructions are properly eliminated.
 ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep -v OK | not grep add
 
 
diff --git a/test/Transforms/InstCombine/apint-add2.ll b/test/Transforms/InstCombine/apint-add2.ll
index 0ddfcc0764d6..913a70f1b458 100644
--- a/test/Transforms/InstCombine/apint-add2.ll
+++ b/test/Transforms/InstCombine/apint-add2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that add instructions are properly eliminated.
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep -v OK | not grep add
 ; END.
 
diff --git a/test/Transforms/InstCombine/apint-and-compare.ll b/test/Transforms/InstCombine/apint-and-compare.ll
index ad2e41d718ef..53e591e69c9d 100644
--- a/test/Transforms/InstCombine/apint-and-compare.ll
+++ b/test/Transforms/InstCombine/apint-and-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep and | count 2
+; RUN: opt < %s -instcombine -S | grep and | count 2
 
 ; Should be optimized to one and.
 define i1 @test1(i33 %a, i33 %b) {
diff --git a/test/Transforms/InstCombine/apint-and-or-and.ll b/test/Transforms/InstCombine/apint-and-or-and.ll
index 4630f2867865..17d29b601e69 100644
--- a/test/Transforms/InstCombine/apint-and-or-and.ll
+++ b/test/Transforms/InstCombine/apint-and-or-and.ll
@@ -11,7 +11,7 @@
 ; 
 ; This tests arbitrary precision integers.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {or }
+; RUN: opt < %s -instcombine -S | not grep {or }
 ; END.
 
 define i17 @test1(i17 %X, i17 %Y) {
diff --git a/test/Transforms/InstCombine/apint-and-xor-merge.ll b/test/Transforms/InstCombine/apint-and-xor-merge.ll
index e1e4e3caaef0..8adffde36273 100644
--- a/test/Transforms/InstCombine/apint-and-xor-merge.ll
+++ b/test/Transforms/InstCombine/apint-and-xor-merge.ll
@@ -1,8 +1,8 @@
 ; This test case checks that the merge of and/xor can work on arbitrary
 ; precision integers.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep and | count 1
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep xor | count 2
+; RUN: opt < %s -instcombine -S | grep and | count 1
+; RUN: opt < %s -instcombine -S | grep xor | count 2
 
 ; (x &z ) ^ (y & z) -> (x ^ y) & z
 define i57 @test1(i57 %x, i57 %y, i57 %z) {
diff --git a/test/Transforms/InstCombine/apint-and1.ll b/test/Transforms/InstCombine/apint-and1.ll
index eb3b1a64b2ce..cd4cbb9cf454 100644
--- a/test/Transforms/InstCombine/apint-and1.ll
+++ b/test/Transforms/InstCombine/apint-and1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that and instructions are properly eliminated.
 ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {and }
+; RUN: opt < %s -instcombine -S | not grep {and }
 ; END.
 
 define i39 @test0(i39 %A) {
diff --git a/test/Transforms/InstCombine/apint-and2.ll b/test/Transforms/InstCombine/apint-and2.ll
index f7b3934d5b83..ae74472b3d08 100644
--- a/test/Transforms/InstCombine/apint-and2.ll
+++ b/test/Transforms/InstCombine/apint-and2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that and instructions are properly eliminated.
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {and }
+; RUN: opt < %s -instcombine -S | not grep {and }
 ; END.
 
 
diff --git a/test/Transforms/InstCombine/apint-call-cast-target.ll b/test/Transforms/InstCombine/apint-call-cast-target.ll
index 3688fbe1f293..fe336de75242 100644
--- a/test/Transforms/InstCombine/apint-call-cast-target.ll
+++ b/test/Transforms/InstCombine/apint-call-cast-target.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep call | not grep bitcast
+; RUN: opt < %s -instcombine -S | grep call | not grep bitcast
 
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/apint-cast-and-cast.ll b/test/Transforms/InstCombine/apint-cast-and-cast.ll
index 337fd7c2d722..251d78f59bee 100644
--- a/test/Transforms/InstCombine/apint-cast-and-cast.ll
+++ b/test/Transforms/InstCombine/apint-cast-and-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep bitcast
+; RUN: opt < %s -instcombine -S | not grep bitcast
 
 define i19 @test1(i43 %val) {
   %t1 = bitcast i43 %val to i43 
diff --git a/test/Transforms/InstCombine/apint-cast-cast-to-and.ll b/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
index 29a88694d5c5..b2069a93ac4c 100644
--- a/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
+++ b/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep i41
+; RUN: opt < %s -instcombine -S | not grep i41
 
 define i61 @test1(i61 %X) {
         %Y = trunc i61 %X to i41 ;; Turn i61o an AND
diff --git a/test/Transforms/InstCombine/apint-cast.ll b/test/Transforms/InstCombine/apint-cast.ll
index dd0014640021..9bc539ed8b23 100644
--- a/test/Transforms/InstCombine/apint-cast.ll
+++ b/test/Transforms/InstCombine/apint-cast.ll
@@ -1,20 +1,28 @@
 ; Tests to make sure elimination of casts is working correctly
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i17 @test1(i17 %a) {
         %tmp = zext i17 %a to i37               ; <i37> [#uses=2]
         %tmp21 = lshr i37 %tmp, 8               ; <i37> [#uses=1]
+; CHECK: %tmp21 = lshr i17 %a, 8
         %tmp5 = shl i37 %tmp, 8         ; <i37> [#uses=1]
+; CHECK: %tmp5 = shl i17 %a, 8
         %tmp.upgrd.32 = or i37 %tmp21, %tmp5            ; <i37> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i17 %tmp21, %tmp5
         %tmp.upgrd.3 = trunc i37 %tmp.upgrd.32 to i17   ; <i17> [#uses=1]
         ret i17 %tmp.upgrd.3
+; CHECK: ret i17 %tmp.upgrd.32
 }
 
 define i167 @test2(i167 %a) {
         %tmp = zext i167 %a to i577               ; <i577> [#uses=2]
         %tmp21 = lshr i577 %tmp, 9               ; <i577> [#uses=1]
+; CHECK: %tmp21 = lshr i167 %a, 9
         %tmp5 = shl i577 %tmp, 8         ; <i577> [#uses=1]
+; CHECK: %tmp5 = shl i167 %a, 8
         %tmp.upgrd.32 = or i577 %tmp21, %tmp5            ; <i577> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i167 %tmp21, %tmp5
         %tmp.upgrd.3 = trunc i577 %tmp.upgrd.32 to i167  ; <i167> [#uses=1]
         ret i167 %tmp.upgrd.3
+; CHECK: ret i167 %tmp.upgrd.32
 }
diff --git a/test/Transforms/InstCombine/apint-div1.ll b/test/Transforms/InstCombine/apint-div1.ll
index e9aa579b9583..68aadac1de44 100644
--- a/test/Transforms/InstCombine/apint-div1.ll
+++ b/test/Transforms/InstCombine/apint-div1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that div instructions are properly eliminated.
 ; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep div
+; RUN: opt < %s -instcombine -S | not grep div
 
 
 define i33 @test1(i33 %X) {
diff --git a/test/Transforms/InstCombine/apint-div2.ll b/test/Transforms/InstCombine/apint-div2.ll
index 2aa2c3a4ac2c..2d7ac78a210f 100644
--- a/test/Transforms/InstCombine/apint-div2.ll
+++ b/test/Transforms/InstCombine/apint-div2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that div instructions are properly eliminated.
 ; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep div
+; RUN: opt < %s -instcombine -S | not grep div
 
 
 define i333 @test1(i333 %X) {
diff --git a/test/Transforms/InstCombine/apint-elim-logicalops.ll b/test/Transforms/InstCombine/apint-elim-logicalops.ll
index 13d032cacf21..ec60e45d5491 100644
--- a/test/Transforms/InstCombine/apint-elim-logicalops.ll
+++ b/test/Transforms/InstCombine/apint-elim-logicalops.ll
@@ -1,6 +1,6 @@
 ; Test that elimination of logical operators works with 
 ; arbitrary precision integers.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep {(and\|xor\|add\|shl\|shr)}
 ; END.
 
diff --git a/test/Transforms/InstCombine/apint-mul1.ll b/test/Transforms/InstCombine/apint-mul1.ll
index 36b1102580c9..6a5b3e7f03c8 100644
--- a/test/Transforms/InstCombine/apint-mul1.ll
+++ b/test/Transforms/InstCombine/apint-mul1.ll
@@ -2,7 +2,7 @@
 ; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
 ;
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
+; RUN: opt < %s -instcombine -S | not grep mul
 
 
 define i17 @test1(i17 %X) {
diff --git a/test/Transforms/InstCombine/apint-mul2.ll b/test/Transforms/InstCombine/apint-mul2.ll
index 72fd97ac79f0..558d2fb63c1e 100644
--- a/test/Transforms/InstCombine/apint-mul2.ll
+++ b/test/Transforms/InstCombine/apint-mul2.ll
@@ -2,7 +2,7 @@
 ; This test is for Integer BitWidth >= 64 && BitWidth % 2 >= 1024.
 ;
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
+; RUN: opt < %s -instcombine -S | not grep mul
 
 
 define i177 @test1(i177 %X) {
diff --git a/test/Transforms/InstCombine/apint-not.ll b/test/Transforms/InstCombine/apint-not.ll
index f557fa8b1788..488b7f2c98e1 100644
--- a/test/Transforms/InstCombine/apint-not.ll
+++ b/test/Transforms/InstCombine/apint-not.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that the xor instructions are properly eliminated
 ; when arbitrary precision integers are used.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep xor
+; RUN: opt < %s -instcombine -S | not grep xor
 
 define i33 @test1(i33 %A) {
 	%B = xor i33 %A, -1
diff --git a/test/Transforms/InstCombine/apint-or1.ll b/test/Transforms/InstCombine/apint-or1.ll
index 51b87fe2a200..d4f87ac894d9 100644
--- a/test/Transforms/InstCombine/apint-or1.ll
+++ b/test/Transforms/InstCombine/apint-or1.ll
@@ -2,7 +2,7 @@
 ; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
 ;
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep or
+; RUN: opt < %s -instcombine -S | not grep or
 
 
 define i7 @test0(i7 %X) {
diff --git a/test/Transforms/InstCombine/apint-or2.ll b/test/Transforms/InstCombine/apint-or2.ll
index 21dc5654ceec..d7de255f7fd2 100644
--- a/test/Transforms/InstCombine/apint-or2.ll
+++ b/test/Transforms/InstCombine/apint-or2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that or instructions are properly eliminated.
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep or
+; RUN: opt < %s -instcombine -S | not grep or
 
 
 define i777 @test0(i777 %X) {
diff --git a/test/Transforms/InstCombine/apint-rem1.ll b/test/Transforms/InstCombine/apint-rem1.ll
index 2ec8c749972d..030faccee8b4 100644
--- a/test/Transforms/InstCombine/apint-rem1.ll
+++ b/test/Transforms/InstCombine/apint-rem1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep rem
+; RUN: opt < %s -instcombine -S | not grep rem
 
 
 define i33 @test1(i33 %A) {
diff --git a/test/Transforms/InstCombine/apint-rem2.ll b/test/Transforms/InstCombine/apint-rem2.ll
index 4d22c22d5401..9bfc4cde9521 100644
--- a/test/Transforms/InstCombine/apint-rem2.ll
+++ b/test/Transforms/InstCombine/apint-rem2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep rem
+; RUN: opt < %s -instcombine -S | not grep rem
 
 
 define i333 @test1(i333 %A) {
diff --git a/test/Transforms/InstCombine/apint-select.ll b/test/Transforms/InstCombine/apint-select.ll
index c2399fb88efe..f2ea60101c5f 100644
--- a/test/Transforms/InstCombine/apint-select.ll
+++ b/test/Transforms/InstCombine/apint-select.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep select
+; RUN: opt < %s -instcombine -S | not grep select
 
 
 define i41 @test1(i1 %C) {
diff --git a/test/Transforms/InstCombine/apint-shift-simplify.ll b/test/Transforms/InstCombine/apint-shift-simplify.ll
index a0046fbacb6b..1a3340ac5667 100644
--- a/test/Transforms/InstCombine/apint-shift-simplify.ll
+++ b/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    egrep {shl|lshr|ashr} | count 3
 
 define i41 @test0(i41 %A, i41 %B, i41 %C) {
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index afc5360d8de4..6573b5bf4f44 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that shit instructions are properly eliminated
 ; even with arbitrary precision integers.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sh
+; RUN: opt < %s -instcombine -S | not grep sh
 ; END.
 
 define i47 @test1(i47 %A) {
diff --git a/test/Transforms/InstCombine/apint-shl-trunc.ll b/test/Transforms/InstCombine/apint-shl-trunc.ll
index a9cffdef04f4..8163e6d527d7 100644
--- a/test/Transforms/InstCombine/apint-shl-trunc.ll
+++ b/test/Transforms/InstCombine/apint-shl-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep shl
+; RUN: opt < %s -instcombine -S | grep shl
 ; END.
 
 define i1 @test0(i39 %X, i39 %A) {
diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll
index 2ff763c9f5f8..8b9ff143ea4e 100644
--- a/test/Transforms/InstCombine/apint-sub.ll
+++ b/test/Transforms/InstCombine/apint-sub.ll
@@ -2,7 +2,7 @@
 ; even with arbitrary precision integers.
 ;
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep -v {sub i19 %Cok, %Bok} | grep -v {sub i25 0, %Aok} | not grep sub
 ; END.
 
diff --git a/test/Transforms/InstCombine/apint-xor1.ll b/test/Transforms/InstCombine/apint-xor1.ll
index 5ddf5cf9c9b6..849c65983328 100644
--- a/test/Transforms/InstCombine/apint-xor1.ll
+++ b/test/Transforms/InstCombine/apint-xor1.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that xor instructions are properly eliminated.
 ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {xor }
+; RUN: opt < %s -instcombine -S | not grep {xor }
 
 
 define i47 @test1(i47 %A, i47 %B) {
diff --git a/test/Transforms/InstCombine/apint-xor2.ll b/test/Transforms/InstCombine/apint-xor2.ll
index 4d2d415f3467..cacc17958eb9 100644
--- a/test/Transforms/InstCombine/apint-xor2.ll
+++ b/test/Transforms/InstCombine/apint-xor2.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that xor instructions are properly eliminated.
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {xor }
+; RUN: opt < %s -instcombine -S | not grep {xor }
 ; END.
 
 
diff --git a/test/Transforms/InstCombine/apint-zext1.ll b/test/Transforms/InstCombine/apint-zext1.ll
index 03330c7be966..40de360bb692 100644
--- a/test/Transforms/InstCombine/apint-zext1.ll
+++ b/test/Transforms/InstCombine/apint-zext1.ll
@@ -1,9 +1,11 @@
 ; Tests to make sure elimination of casts is working correctly
 ; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast {} {%c1.*}
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i47 @test_sext_zext(i11 %A) {
     %c1 = zext i11 %A to i39
     %c2 = sext i39 %c1 to i47
     ret i47 %c2
+; CHECK: %c2 = zext i11 %A to i47
+; CHECK: ret i47 %c2
 }
diff --git a/test/Transforms/InstCombine/apint-zext2.ll b/test/Transforms/InstCombine/apint-zext2.ll
index 8350d10282b6..886dcf2826cc 100644
--- a/test/Transforms/InstCombine/apint-zext2.ll
+++ b/test/Transforms/InstCombine/apint-zext2.ll
@@ -1,9 +1,11 @@
 ; Tests to make sure elimination of casts is working correctly
 ; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast {} {%c1.*}
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1024 @test_sext_zext(i77 %A) {
     %c1 = zext i77 %A to i533
     %c2 = sext i533 %c1 to i1024
     ret i1024 %c2
+; CHECK: %c2 = zext i77 %A to i1024
+; CHECK: ret i1024 %c2
 }
diff --git a/test/Transforms/InstCombine/ashr-nop.ll b/test/Transforms/InstCombine/ashr-nop.ll
index bb0da346b0c1..870ede38cd88 100644
--- a/test/Transforms/InstCombine/ashr-nop.ll
+++ b/test/Transforms/InstCombine/ashr-nop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep ashr
+; RUN: opt < %s -instcombine -S | not grep ashr
 
 define i32 @foo(i32 %x) {
   %o = and i32 %x, 1
diff --git a/test/Transforms/InstCombine/badmalloc.ll b/test/Transforms/InstCombine/badmalloc.ll
new file mode 100644
index 000000000000..cab23b5af599
--- /dev/null
+++ b/test/Transforms/InstCombine/badmalloc.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+declare noalias i8* @malloc(i64) nounwind
+declare void @free(i8*)
+
+; PR5130
+define i1 @test1() {
+  %A = call noalias i8* @malloc(i64 4) nounwind
+  %B = icmp eq i8* %A, null
+
+  call void @free(i8* %A)
+  ret i1 %B
+
+; CHECK: @test1
+; CHECK: ret i1 %B
+}
diff --git a/test/Transforms/InstCombine/binop-cast.ll b/test/Transforms/InstCombine/binop-cast.ll
index ea5299ba8761..3dbca7ef1489 100644
--- a/test/Transforms/InstCombine/binop-cast.ll
+++ b/test/Transforms/InstCombine/binop-cast.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @testAdd(i32 %X, i32 %Y) {
 	%tmp = add i32 %X, %Y
+; CHECK: %tmp = add i32 %X, %Y
 	%tmp.l = bitcast i32 %tmp to i32
 	ret i32 %tmp.l
+; CHECK: ret i32 %tmp
 }
diff --git a/test/Transforms/InstCombine/bit-tracking.ll b/test/Transforms/InstCombine/bit-tracking.ll
index d090c69d7f3c..51bbc0888836 100644
--- a/test/Transforms/InstCombine/bit-tracking.ll
+++ b/test/Transforms/InstCombine/bit-tracking.ll
@@ -1,6 +1,6 @@
 ; This file contains various testcases that require tracking whether bits are
 ; set or cleared by various instructions.
-; RUN: llvm-as < %s | opt -instcombine -instcombine | llvm-dis |\
+; RUN: opt < %s -instcombine -instcombine -S |\
 ; RUN:   not grep %ELIM
 
 ; Reduce down to a single XOR
diff --git a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll b/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll
index 8695d1edbc13..4e9dfbb53b49 100644
--- a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll
+++ b/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 0}
+; RUN: opt < %s -instcombine -S | grep {ret i32 0}
 ; PR4487
 
 ; Bitcasts between vectors and scalars are valid, despite being ill-advised.
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
new file mode 100644
index 000000000000..d27765e89424
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | grep element | count 4
+
+define double @a(<1 x i64> %y) {
+  %c = bitcast <1 x i64> %y to double
+  ret double %c 
+}
+
+define i64 @b(<1 x i64> %y) {
+  %c = bitcast <1 x i64> %y to i64
+  ret i64 %c 
+}
+
+define <1 x i64> @c(double %y) {
+  %c = bitcast double %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
+define <1 x i64> @d(i64 %y) {
+  %c = bitcast i64 %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-vector-fold.ll b/test/Transforms/InstCombine/bitcast-vector-fold.ll
index ded3e2f10aba..8feec229171a 100644
--- a/test/Transforms/InstCombine/bitcast-vector-fold.ll
+++ b/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep bitcast
+; RUN: opt < %s -instcombine -S | not grep bitcast
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/Transforms/InstCombine/bitcount.ll b/test/Transforms/InstCombine/bitcount.ll
index 8ebf289eb7d7..f75ca2df69d1 100644
--- a/test/Transforms/InstCombine/bitcount.ll
+++ b/test/Transforms/InstCombine/bitcount.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure bit counts of constants are folded
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 19}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | grep {ret i32 19}
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep -v declare | not grep llvm.ct
 
 declare i31 @llvm.ctpop.i31(i31 %val) 
diff --git a/test/Transforms/InstCombine/bittest.ll b/test/Transforms/InstCombine/bittest.ll
index 8d45a48aa91d..92863d59470a 100644
--- a/test/Transforms/InstCombine/bittest.ll
+++ b/test/Transforms/InstCombine/bittest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -simplifycfg | llvm-dis |\
+; RUN: opt < %s -instcombine -simplifycfg -S |\
 ; RUN:    not grep {call void @abort}
 
 @b_rec.0 = external global i32          ; <i32*> [#uses=2]
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 87d8b0496d26..3e56951c6077 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep ret | count 6
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep call.*bswap
+; RUN: opt < %s -instcombine -S | grep ret | count 6
+; RUN: opt < %s -instcombine -S | not grep call.*bswap
 
 define i1 @test1(i16 %tmp2) {
         %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )          ; <i16> [#uses=1]
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index 2ba718e58478..c5aa8bede178 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {call.*llvm.bswap} | count 6
 
 define i32 @test1(i32 %i) {
diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll
index 1a02514d1a7b..7addc8abc84f 100644
--- a/test/Transforms/InstCombine/call-cast-target.ll
+++ b/test/Transforms/InstCombine/call-cast-target.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep call | not grep bitcast
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/call-intrinsics.ll b/test/Transforms/InstCombine/call-intrinsics.ll
index e1c60570f14f..f9d108058063 100644
--- a/test/Transforms/InstCombine/call-intrinsics.ll
+++ b/test/Transforms/InstCombine/call-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 
 @X = global i8 0                ; <i8*> [#uses=3]
 @Y = global i8 12               ; <i8*> [#uses=2]
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index df9b2a68d9dc..1e37eec7e94c 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -1,7 +1,5 @@
 ; Ignore stderr, we expect warnings there
-; RUN: llvm-as < %s 2> /dev/null | opt -instcombine | llvm-dis | \
-; RUN:    grep call | notcast
-; END.
+; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
 
 
 ; Simple case, argument translatable without changing the value
@@ -10,17 +8,24 @@ declare void @test1a(i8*)
 define void @test1(i32* %A) {
         call void bitcast (void (i8*)* @test1a to void (i32*)*)( i32* %A )
         ret void
+; CHECK: %tmp = bitcast i32* %A to i8*
+; CHECK: call void @test1a(i8* %tmp)
+; CHECK: ret void
 }
 
 ; More complex case, translate argument because of resolution.  This is safe 
 ; because we have the body of the function
 define void @test2a(i8 %A) {
         ret void
+; CHECK: ret void
 }
 
 define i32 @test2(i32 %A) {
         call void bitcast (void (i8)* @test2a to void (i32)*)( i32 %A )
         ret i32 %A
+; CHECK: %tmp = trunc i32 %A to i8
+; CHECK: call void @test2a(i8 %tmp)
+; CHECK: ret i32 %A
 }
 
 
@@ -32,17 +37,24 @@ define void @test3(i8 %A, i8 %B) {
         call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B 
 )
         ret void
+; CHECK: %tmp = zext i8 %B to i32
+; CHECK: call void (i8, ...)* @test3a(i8 %A, i32 %tmp)
+; CHECK: ret void
 }
 
 
 ; test conversion of return value...
 define i8 @test4a() {
         ret i8 0
+; CHECK: ret i8 0
 }
 
 define i32 @test4() {
         %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
         ret i32 %X
+; CHECK: %X1 = call i8 @test4a()
+; CHECK: %tmp = zext i8 %X1 to i32
+; CHECK: ret i32 %tmp
 }
 
 
@@ -53,6 +65,8 @@ declare i32 @test5a()
 define i32 @test5() {
         %X = call i32 @test5a( )                ; <i32> [#uses=1]
         ret i32 %X
+; CHECK: %X = call i32 @test5a()
+; CHECK: ret i32 %X
 }
 
 
@@ -62,17 +76,22 @@ declare i32 @test6a(i32)
 define i32 @test6() {
         %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )                ; <i32> [#uses=1]
         ret i32 %X
+; CHECK: %X1 = call i32 @test6a(i32 0)
+; CHECK: ret i32 %X1
 }
 
 
 ; test removal of arguments, only can happen with a function body
 define void @test7a() {
         ret void
+; CHECK: ret void
 }
 
 define void @test7() {
         call void bitcast (void ()* @test7a to void (i32)*)( i32 5 )
         ret void
+; CHECK: call void @test7a()
+; CHECK: ret void
 }
 
 
diff --git a/test/Transforms/InstCombine/call2.ll b/test/Transforms/InstCombine/call2.ll
index 4ba840f7fb96..3a6bd67ce569 100644
--- a/test/Transforms/InstCombine/call2.ll
+++ b/test/Transforms/InstCombine/call2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis
+; RUN: opt < %s -instcombine | llvm-dis
 
 ; This used to crash trying to do a double-to-pointer conversion
 define i32 @bar() {
diff --git a/test/Transforms/InstCombine/canonicalize_branch.ll b/test/Transforms/InstCombine/canonicalize_branch.ll
index 79f02e73209a..52aff3dcd475 100644
--- a/test/Transforms/InstCombine/canonicalize_branch.ll
+++ b/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep {icmp ne\|icmp ule\|icmp uge}
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/Transforms/InstCombine/cast-and-cast.ll b/test/Transforms/InstCombine/cast-and-cast.ll
index f90cb7499aff..eda9d998be9c 100644
--- a/test/Transforms/InstCombine/cast-and-cast.ll
+++ b/test/Transforms/InstCombine/cast-and-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep bitcast
 
 define i1 @test1(i32 %val) {
diff --git a/test/Transforms/InstCombine/cast-cast-to-and.ll b/test/Transforms/InstCombine/cast-cast-to-and.ll
index bb7c3dd754cc..1e591ccf493c 100644
--- a/test/Transforms/InstCombine/cast-cast-to-and.ll
+++ b/test/Transforms/InstCombine/cast-cast-to-and.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep i8 
 
 define i32 @test1(i32 %X) {
diff --git a/test/Transforms/InstCombine/cast-load-gep.ll b/test/Transforms/InstCombine/cast-load-gep.ll
index bc2c7b35163d..57f021cc8de5 100644
--- a/test/Transforms/InstCombine/cast-load-gep.ll
+++ b/test/Transforms/InstCombine/cast-load-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -globaldce | llvm-dis | \
+; RUN: opt < %s -instcombine -globaldce -S | \
 ; RUN:   not grep Array
 
 ; Pulling the cast out of the load allows us to eliminate the load, and then 
diff --git a/test/Transforms/InstCombine/cast-malloc.ll b/test/Transforms/InstCombine/cast-malloc.ll
index d05f6b04fd07..3754032cc2f7 100644
--- a/test/Transforms/InstCombine/cast-malloc.ll
+++ b/test/Transforms/InstCombine/cast-malloc.ll
@@ -1,5 +1,5 @@
 ; test that casted mallocs get converted to malloc of the right type
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep bitcast
 
 ; The target datalayout is important for this test case. We have to tell 
diff --git a/test/Transforms/InstCombine/cast-mul-select.ll b/test/Transforms/InstCombine/cast-mul-select.ll
index 76e9b24332f5..fcb7e2301d7f 100644
--- a/test/Transforms/InstCombine/cast-mul-select.ll
+++ b/test/Transforms/InstCombine/cast-mul-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @mul(i32 %x, i32 %y) {
   %A = trunc i32 %x to i8
@@ -6,6 +6,9 @@ define i32 @mul(i32 %x, i32 %y) {
   %C = mul i8 %A, %B
   %D = zext i8 %C to i32
   ret i32 %D
+; CHECK: %C = mul i32 %x, %y
+; CHECK: %D = and i32 %C, 255
+; CHECK: ret i32 %D
 }
 
 define i32 @select1(i1 %cond, i32 %x, i32 %y, i32 %z) {
@@ -16,6 +19,10 @@ define i32 @select1(i1 %cond, i32 %x, i32 %y, i32 %z) {
   %E = select i1 %cond, i8 %C, i8 %D
   %F = zext i8 %E to i32
   ret i32 %F
+; CHECK: %D = add i32 %x, %y
+; CHECK: %E = select i1 %cond, i32 %z, i32 %D
+; CHECK: %F = and i32 %E, 255
+; CHECK: ret i32 %F
 }
 
 define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) {
@@ -26,4 +33,7 @@ define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) {
   %E = select i1 %cond, i32 %C, i32 %D
   %F = trunc i32 %E to i8
   ret i8 %F
+; CHECK: %D = add i8 %x, %y
+; CHECK: %E = select i1 %cond, i8 %z, i8 %D
+; CHECK: ret i8 %E
 }
diff --git a/test/Transforms/InstCombine/cast-propagate.ll b/test/Transforms/InstCombine/cast-propagate.ll
index c00f9537410a..95c040b140d2 100644
--- a/test/Transforms/InstCombine/cast-propagate.ll
+++ b/test/Transforms/InstCombine/cast-propagate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -mem2reg | llvm-dis | \
+; RUN: opt < %s -instcombine -mem2reg -S | \
 ; RUN:    not grep load
 
 define i32 @test1(i32* %P) {
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 091f148a2483..611ded473efe 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -1,13 +1,15 @@
 ; This tests for various complex cast elimination cases instcombine should
 ; handle.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test1(i32 %X) {
         %A = bitcast i32 %X to i32              ; <i32> [#uses=1]
         ; Convert to setne int %X, 12
         %c = icmp ne i32 %A, 12         ; <i1> [#uses=1]
         ret i1 %c
+; CHECK: %c = icmp ne i32 %X, 12
+; CHECK: ret i1 %c
 }
 
 define i1 @test2(i32 %X, i32 %Y) {
@@ -16,6 +18,8 @@ define i1 @test2(i32 %X, i32 %Y) {
         ; Convert to setne int %X, %Y
         %c = icmp ne i32 %A, %B         ; <i1> [#uses=1]
         ret i1 %c
+; CHECK: %c = icmp ne i32 %X, %Y
+; CHECK: ret i1 %c
 }
 
 define i32 @test4(i32 %A) {
@@ -23,6 +27,8 @@ define i32 @test4(i32 %A) {
         %C = shl i32 %B, 2              ; <i32> [#uses=1]
         %D = bitcast i32 %C to i32              ; <i32> [#uses=1]
         ret i32 %D
+; CHECK: %C = shl i32 %A, 2
+; CHECK: ret i32 %C
 }
 
 define i16 @test5(i16 %A) {
@@ -30,22 +36,28 @@ define i16 @test5(i16 %A) {
         %C = and i32 %B, 15             ; <i32> [#uses=1]
         %D = trunc i32 %C to i16                ; <i16> [#uses=1]
         ret i16 %D
+; CHECK: %C = and i16 %A, 15
+; CHECK: ret i16 %C
 }
 
 define i1 @test6(i1 %A) {
         %B = zext i1 %A to i32          ; <i32> [#uses=1]
         %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 %A
 }
 
 define i1 @test6a(i1 %A) {
         %B = zext i1 %A to i32          ; <i32> [#uses=1]
         %C = icmp ne i32 %B, -1         ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: ret i1 true
 }
 
 define i1 @test7(i8* %A) {
         %B = bitcast i8* %A to i32*             ; <i32*> [#uses=1]
         %C = icmp eq i32* %B, null              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: %C = icmp eq i8* %A, null
+; CHECK: ret i1 %C
 }
diff --git a/test/Transforms/InstCombine/cast-sext-zext.ll b/test/Transforms/InstCombine/cast-sext-zext.ll
index 1acd7582100f..0fecc1ce127d 100644
--- a/test/Transforms/InstCombine/cast-sext-zext.ll
+++ b/test/Transforms/InstCombine/cast-sext-zext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sext
+; RUN: opt < %s -instcombine -S | not grep sext
 ; XFAIL: *
 
 define zeroext i16 @t(i8 zeroext %on_off, i16* nocapture %puls) nounwind readonly {
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 7a1e7a802dd3..9835d657c9bb 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,5 +1,5 @@
 ; Tests to make sure elimination of casts is working correctly
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep %c | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
@@ -7,6 +7,7 @@ define i32 @test1(i32 %A) {
         %c1 = bitcast i32 %A to i32             ; <i32> [#uses=1]
         %c2 = bitcast i32 %c1 to i32            ; <i32> [#uses=1]
         ret i32 %c2
+; CHECK: ret i32 %A
 }
 
 define i64 @test2(i8 %A) {
@@ -14,6 +15,8 @@ define i64 @test2(i8 %A) {
         %c2 = zext i16 %c1 to i32               ; <i32> [#uses=1]
         %Ret = zext i32 %c2 to i64              ; <i64> [#uses=1]
         ret i64 %Ret
+; CHECK: %Ret = zext i8 %A to i64
+; CHECK: ret i64 %Ret
 }
 
 ; This function should just use bitwise AND
@@ -21,6 +24,8 @@ define i64 @test3(i64 %A) {
         %c1 = trunc i64 %A to i8                ; <i8> [#uses=1]
         %c2 = zext i8 %c1 to i64                ; <i64> [#uses=1]
         ret i64 %c2
+; CHECK: %c2 = and i64 %A, 255
+; CHECK: ret i64 %c2
 }
 
 define i32 @test4(i32 %A, i32 %B) {
@@ -30,6 +35,9 @@ define i32 @test4(i32 %A, i32 %B) {
         ; for the cast elim purpose
         %result = zext i8 %c to i32             ; <i32> [#uses=1]
         ret i32 %result
+; CHECK: %COND = icmp slt i32 %A, %B
+; CHECK: %result = zext i1 %COND to i32
+; CHECK: ret i32 %result
 }
 
 define i32 @test5(i1 %B) {
@@ -38,36 +46,46 @@ define i32 @test5(i1 %B) {
         ; this cast        
         %result = zext i8 %c to i32             ; <i32> [#uses=1]
         ret i32 %result
+; CHECK: %result = zext i1 %B to i32
+; CHECK: ret i32 %result
 }
 
 define i32 @test6(i64 %A) {
         %c1 = trunc i64 %A to i32               ; <i32> [#uses=1]
         %res = bitcast i32 %c1 to i32           ; <i32> [#uses=1]
         ret i32 %res
+; CHECK: %res = trunc i64 %A to i32
+; CHECK: ret i32 %res
 }
 
 define i64 @test7(i1 %A) {
         %c1 = zext i1 %A to i32         ; <i32> [#uses=1]
         %res = sext i32 %c1 to i64              ; <i64> [#uses=1]
         ret i64 %res
+; CHECK: %res = zext i1 %A to i64
+; CHECK: ret i64 %res
 }
 
 define i64 @test8(i8 %A) {
         %c1 = sext i8 %A to i64         ; <i64> [#uses=1]
         %res = bitcast i64 %c1 to i64           ; <i64> [#uses=1]
         ret i64 %res
+; CHECK: %res = sext i8 %A to i64
+; CHECK: ret i64 %res
 }
 
 define i16 @test9(i16 %A) {
         %c1 = sext i16 %A to i32                ; <i32> [#uses=1]
         %c2 = trunc i32 %c1 to i16              ; <i16> [#uses=1]
         ret i16 %c2
+; CHECK: ret i16 %A
 }
 
 define i16 @test10(i16 %A) {
         %c1 = sext i16 %A to i32                ; <i32> [#uses=1]
         %c2 = trunc i32 %c1 to i16              ; <i16> [#uses=1]
         ret i16 %c2
+; CHECK: ret i16 %A
 }
 
 declare void @varargs(i32, ...)
@@ -76,22 +94,31 @@ define void @test11(i32* %P) {
         %c = bitcast i32* %P to i16*            ; <i16*> [#uses=1]
         call void (i32, ...)* @varargs( i32 5, i16* %c )
         ret void
+; CHECK: call void (i32, ...)* @varargs(i32 5, i32* %P)
+; CHECK: ret void
 }
 
 define i32* @test12() {
         %p = malloc [4 x i8]            ; <[4 x i8]*> [#uses=1]
         %c = bitcast [4 x i8]* %p to i32*               ; <i32*> [#uses=1]
         ret i32* %c
+; CHECK: %p = malloc i32
+; CHECK: ret i32* %p
 }
+
 define i8* @test13(i64 %A) {
         %c = getelementptr [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A             ; <i8*> [#uses=1]
         ret i8* %c
+; CHECK: %c = getelementptr [32832 x i8]* @inbuf, i64 0, i64 %A
+; CHECK: ret i8* %c
 }
 
 define i1 @test14(i8 %A) {
         %c = bitcast i8 %A to i8                ; <i8> [#uses=1]
         %X = icmp ult i8 %c, -128               ; <i1> [#uses=1]
         ret i1 %X
+; CHECK: %X = icmp sgt i8 %A, -1
+; CHECK: ret i1 %X
 }
 
 
@@ -105,24 +132,32 @@ define i1 @test14(i8 %A) {
 define i1 @test16(i32* %P) {
         %c = icmp ne i32* %P, null              ; <i1> [#uses=1]
         ret i1 %c
+; CHECK: %c = icmp ne i32* %P, null
+; CHECK: ret i1 %c
 }
 
 define i16 @test17(i1 %tmp3) {
         %c = zext i1 %tmp3 to i32               ; <i32> [#uses=1]
         %t86 = trunc i32 %c to i16              ; <i16> [#uses=1]
         ret i16 %t86
+; CHECK: %t86 = zext i1 %tmp3 to i16
+; CHECK: ret i16 %t86
 }
 
 define i16 @test18(i8 %tmp3) {
         %c = sext i8 %tmp3 to i32               ; <i32> [#uses=1]
         %t86 = trunc i32 %c to i16              ; <i16> [#uses=1]
         ret i16 %t86
+; CHECK: %t86 = sext i8 %tmp3 to i16
+; CHECK: ret i16 %t86
 }
 
 define i1 @test19(i32 %X) {
         %c = sext i32 %X to i64         ; <i64> [#uses=1]
         %Z = icmp slt i64 %c, 12345             ; <i1> [#uses=1]
         ret i1 %Z
+; CHECK: %Z = icmp slt i32 %X, 12345
+; CHECK: ret i1 %Z
 }
 
 define i1 @test20(i1 %B) {
@@ -130,6 +165,7 @@ define i1 @test20(i1 %B) {
         %D = icmp slt i32 %c, -1                ; <i1> [#uses=1]
         ;; false
         ret i1 %D
+; CHECK: ret i1 false
 }
 
 define i32 @test21(i32 %X) {
@@ -138,6 +174,8 @@ define i32 @test21(i32 %X) {
         %c2 = sext i8 %c1 to i32                ; <i32> [#uses=1]
         %RV = and i32 %c2, 255          ; <i32> [#uses=1]
         ret i32 %RV
+; CHECK: %c21 = and i32 %X, 255
+; CHECK: ret i32 %c21
 }
 
 define i32 @test22(i32 %X) {
@@ -146,6 +184,8 @@ define i32 @test22(i32 %X) {
         %c2 = sext i8 %c1 to i32                ; <i32> [#uses=1]
         %RV = shl i32 %c2, 24           ; <i32> [#uses=1]
         ret i32 %RV
+; CHECK: %RV = shl i32 %X, 24
+; CHECK: ret i32 %RV
 }
 
 define i32 @test23(i32 %X) {
@@ -154,6 +194,8 @@ define i32 @test23(i32 %X) {
         ;; and Z are signed.
         %c2 = zext i16 %c1 to i32               ; <i32> [#uses=1]
         ret i32 %c2
+; CHECK: %c2 = and i32 %X, 65535
+; CHECK: ret i32 %c2
 }
 
 define i1 @test24(i1 %C) {
@@ -161,6 +203,7 @@ define i1 @test24(i1 %C) {
         ;; Fold cast into select
         %c = icmp ne i32 %X, 0          ; <i1> [#uses=1]
         ret i1 %c
+; CHECK: ret i1 true
 }
 
 define void @test25(i32** %P) {
@@ -168,6 +211,8 @@ define void @test25(i32** %P) {
         ;; Fold cast into null
         store float* null, float** %c
         ret void
+; CHECK: store i32* null, i32** %P
+; CHECK: ret void
 }
 
 define i32 @test26(float %F) {
@@ -175,16 +220,22 @@ define i32 @test26(float %F) {
         %c = fpext float %F to double           ; <double> [#uses=1]
         %D = fptosi double %c to i32            ; <i32> [#uses=1]
         ret i32 %D
+; CHECK: %D = fptosi float %F to i32
+; CHECK: ret i32 %D
 }
 
 define [4 x float]* @test27([9 x [4 x float]]* %A) {
         %c = bitcast [9 x [4 x float]]* %A to [4 x float]*              ; <[4 x float]*> [#uses=1]
         ret [4 x float]* %c
+; CHECK: %c = getelementptr inbounds [9 x [4 x float]]* %A, i64 0, i64 0
+; CHECK: ret [4 x float]* %c
 }
 
 define float* @test28([4 x float]* %A) {
         %c = bitcast [4 x float]* %A to float*          ; <float*> [#uses=1]
         ret float* %c
+; CHECK: %c = getelementptr inbounds [4 x float]* %A, i64 0, i64 0
+; CHECK: ret float* %c
 }
 
 define i32 @test29(i32 %c1, i32 %c2) {
@@ -193,6 +244,9 @@ define i32 @test29(i32 %c1, i32 %c2) {
         %tmp = or i8 %tmp4.mask, %tmp1          ; <i8> [#uses=1]
         %tmp10 = zext i8 %tmp to i32            ; <i32> [#uses=1]
         ret i32 %tmp10
+; CHECK: %tmp2 = or i32 %c2, %c1
+; CHECK: %tmp10 = and i32 %tmp2, 255
+; CHECK: ret i32 %tmp10
 }
 
 define i32 @test30(i32 %c1) {
@@ -200,6 +254,9 @@ define i32 @test30(i32 %c1) {
         %c3 = xor i8 %c2, 1             ; <i8> [#uses=1]
         %c4 = zext i8 %c3 to i32                ; <i32> [#uses=1]
         ret i32 %c4
+; CHECK: %c3 = and i32 %c1, 255
+; CHECK: %c4 = xor i32 %c3, 1
+; CHECK: ret i32 %c4
 }
 
 define i1 @test31(i64 %A) {
@@ -207,6 +264,9 @@ define i1 @test31(i64 %A) {
         %C = and i32 %B, 42             ; <i32> [#uses=1]
         %D = icmp eq i32 %C, 10         ; <i1> [#uses=1]
         ret i1 %D
+; CHECK: %C1 = and i64 %A, 42
+; CHECK: %D = icmp eq i64 %C1, 10
+; CHECK: ret i1 %D
 }
 
 define void @test32(double** %tmp) {
@@ -214,12 +274,17 @@ define void @test32(double** %tmp) {
         %tmp8.upgrd.1 = bitcast [16 x i8]* %tmp8 to double*             ; <double*> [#uses=1]
         store double* %tmp8.upgrd.1, double** %tmp
         ret void
+; CHECK: %tmp81 = malloc [2 x double]
+; CHECK: %tmp81.sub = getelementptr inbounds [2 x double]* %tmp81, i64 0, i64 0
+; CHECK: store double* %tmp81.sub, double** %tmp
+; CHECK: ret void
 }
 
 define i32 @test33(i32 %c1) {
         %x = bitcast i32 %c1 to float           ; <float> [#uses=1]
         %y = bitcast float %x to i32            ; <i32> [#uses=1]
         ret i32 %y
+; CHECK: ret i32 %c1
 }
 
 define i16 @test34(i16 %a) {
@@ -227,6 +292,8 @@ define i16 @test34(i16 %a) {
         %tmp21 = lshr i32 %c1, 8                ; <i32> [#uses=1]
         %c2 = trunc i32 %tmp21 to i16           ; <i16> [#uses=1]
         ret i16 %c2
+; CHECK: %tmp21 = lshr i16 %a, 8
+; CHECK: ret i16 %tmp21
 }
 
 define i16 @test35(i16 %a) {
@@ -234,6 +301,8 @@ define i16 @test35(i16 %a) {
         %tmp2 = lshr i16 %c1, 8         ; <i16> [#uses=1]
         %c2 = bitcast i16 %tmp2 to i16          ; <i16> [#uses=1]
         ret i16 %c2
+; CHECK: %tmp2 = lshr i16 %a, 8
+; CHECK: ret i16 %tmp2
 }
 
 ; icmp sgt i32 %a, -1
@@ -243,6 +312,8 @@ define i1 @test36(i32 %a) {
         %c = trunc i32 %b to i8
         %d = icmp eq i8 %c, 0
         ret i1 %d
+; CHECK: %d = icmp sgt i32 %a, -1
+; CHECK: ret i1 %d
 }
 
 ; ret i1 false
@@ -252,6 +323,7 @@ define i1 @test37(i32 %a) {
         %d = trunc i32 %c to i8
         %e = icmp eq i8 %d, 11
         ret i1 %e
+; CHECK: ret i1 false
 }
 
 define i64 @test38(i32 %a) {
@@ -260,4 +332,7 @@ define i64 @test38(i32 %a) {
 	%3 = xor i8 %2, 1
 	%4 = zext i8 %3 to i64
         ret i64 %4
+; CHECK: %1 = icmp ne i32 %a, -2
+; CHECK: %2 = zext i1 %1 to i64
+; CHECK: ret i64 %2
 }
diff --git a/test/Transforms/InstCombine/cast2.ll b/test/Transforms/InstCombine/cast2.ll
index 5cc9087198b2..0ae869fa4938 100644
--- a/test/Transforms/InstCombine/cast2.ll
+++ b/test/Transforms/InstCombine/cast2.ll
@@ -1,5 +1,5 @@
 ; Tests to make sure elimination of casts is working correctly
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i16 @test1(i16 %a) {
         %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
@@ -8,6 +8,8 @@ define i16 @test1(i16 %a) {
         %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
         %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
         ret i16 %tmp.upgrd.3
+; CHECK: %tmp.upgrd.32 = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK: ret i16 %tmp.upgrd.32
 }
 
 define i16 @test2(i16 %a) {
@@ -17,6 +19,10 @@ define i16 @test2(i16 %a) {
         %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
         %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
         ret i16 %tmp.upgrd.3
+; CHECK: %tmp21 = lshr i16 %a, 9
+; CHECK: %tmp5 = shl i16 %a, 8
+; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
+; CHECK: ret i16 %tmp.upgrd.32
 }
 
 ; PR1263
@@ -24,6 +30,7 @@ define i32* @test3(i32* %tmp1) {
         %tmp64 = bitcast i32* %tmp1 to { i32 }*         ; <{ i32 }*> [#uses=1]
         %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0            ; <i32*> [#uses=1]
         ret i32* %tmp65
+; CHECK: ret i32* %tmp1
 }
 
 
diff --git a/test/Transforms/InstCombine/cast3.ll b/test/Transforms/InstCombine/cast3.ll
new file mode 100644
index 000000000000..bc60f55c48c8
--- /dev/null
+++ b/test/Transforms/InstCombine/cast3.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | not grep getelementptr
+; PR2831
+
+; Don't raise arbitrary inttoptr+arithmetic+ptrtoint to getelementptr.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	%0 = ptrtoint i8** %argv to i32		; <i32> [#uses=1]
+	%1 = add i32 %0, 1		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+; This testcase could theoretically be optimized down to return zero,
+; but for now being conservative with ptrtoint/inttoptr is fine.
+define i32 @a() nounwind {
+entry:
+	%b = alloca i32		; <i32*> [#uses=3]
+	%a = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 1, i32* %b, align 4
+	%a1 = ptrtoint i32* %a to i32		; <i32> [#uses=1]
+	%b4 = ptrtoint i32* %b to i32		; <i32> [#uses=1]
+	%a7 = ptrtoint i32* %a to i32		; <i32> [#uses=1]
+	%0 = sub i32 %b4, %a7		; <i32> [#uses=1]
+	%1 = add i32 %a1, %0		; <i32> [#uses=1]
+	%2 = inttoptr i32 %1 to i32*		; <i32*> [#uses=1]
+	store i32 0, i32* %2, align 4
+	%3 = load i32* %b, align 4		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %entry
+	ret i32 %3
+}
diff --git a/test/Transforms/InstCombine/cast_ld_addr_space.ll b/test/Transforms/InstCombine/cast_ld_addr_space.ll
index beb20e38545a..e94dce7e906b 100644
--- a/test/Transforms/InstCombine/cast_ld_addr_space.ll
+++ b/test/Transforms/InstCombine/cast_ld_addr_space.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep bitcast | count 1
+; RUN: opt < %s -instcombine -S | grep bitcast | count 1
 
 ; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
 ; the address space.
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index fd600a883047..6544e7d735a4 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -1,8 +1,15 @@
 ; Tests to make sure elimination of casts is working correctly
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | notcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "p:32:32"
 
+; This shouldn't convert to getelementptr because the relationship
+; between the arithmetic and the layout of allocated memory is
+; entirely unknown.
+; CHECK: @test1
+; CHECK: ptrtoint
+; CHECK: add
+; CHECK: inttoptr
 define i8* @test1(i8* %t) {
         %tmpc = ptrtoint i8* %t to i32          ; <i32> [#uses=1]
         %tmpa = add i32 %tmpc, 32               ; <i32> [#uses=1]
@@ -10,6 +17,9 @@ define i8* @test1(i8* %t) {
         ret i8* %tv
 }
 
+; These casts should be folded away.
+; CHECK: @test2
+; CHECK: icmp eq i8* %a, %b
 define i1 @test2(i8* %a, i8* %b) {
         %tmpa = ptrtoint i8* %a to i32          ; <i32> [#uses=1]
         %tmpb = ptrtoint i8* %b to i32          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
new file mode 100644
index 000000000000..5a7aef3d397a
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Constant folding should fix notionally out-of-bounds indices
+; and add inbounds keywords.
+
+%struct.X = type { [3 x i32], [3 x i32] }
+
+@Y = internal global [3 x %struct.X] zeroinitializer
+
+define void @frob() {
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 3), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 4), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 2), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 5), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 6), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 7), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 8), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 0), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 9), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 10), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 11), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 14), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 16), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
+  ret void
+}
diff --git a/test/Transforms/InstCombine/constant-fold-ptr-casts.ll b/test/Transforms/InstCombine/constant-fold-ptr-casts.ll
index 27c460689184..9b6c6c3f58f2 100644
--- a/test/Transforms/InstCombine/constant-fold-ptr-casts.ll
+++ b/test/Transforms/InstCombine/constant-fold-ptr-casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 2143034560}
+; RUN: opt < %s -instcombine -S | grep {ret i32 2143034560}
 
 ; Instcombine should be able to completely fold this code.
 
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
new file mode 100644
index 000000000000..d475ab5bc57c
--- /dev/null
+++ b/test/Transforms/InstCombine/crash.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -instcombine | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define i32 @test0(i8 %tmp2) ssp {
+entry:
+  %tmp3 = zext i8 %tmp2 to i32
+  %tmp8 = lshr i32 %tmp3, 6 
+  %tmp9 = lshr i32 %tmp3, 7 
+  %tmp10 = xor i32 %tmp9, 67108858
+  %tmp11 = xor i32 %tmp10, %tmp8 
+  %tmp12 = xor i32 %tmp11, 0     
+  ret i32 %tmp12
+}
+
+; PR4905
+define <2 x i64> @test1(<2 x i64> %x, <2 x i64> %y) nounwind {
+entry:
+  %conv.i94 = bitcast <2 x i64> %y to <4 x i32>   ; <<4 x i32>> [#uses=1]
+  %sub.i97 = sub <4 x i32> %conv.i94, undef       ; <<4 x i32>> [#uses=1]
+  %conv3.i98 = bitcast <4 x i32> %sub.i97 to <2 x i64> ; <<2 x i64>> [#uses=2]
+  %conv2.i86 = bitcast <2 x i64> %conv3.i98 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %cmp.i87 = icmp sgt <4 x i32> undef, %conv2.i86 ; <<4 x i1>> [#uses=1]
+  %sext.i88 = sext <4 x i1> %cmp.i87 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %conv3.i89 = bitcast <4 x i32> %sext.i88 to <2 x i64> ; <<2 x i64>> [#uses=1]
+  %and.i = and <2 x i64> %conv3.i89, %conv3.i98   ; <<2 x i64>> [#uses=1]
+  %or.i = or <2 x i64> zeroinitializer, %and.i    ; <<2 x i64>> [#uses=1]
+  %conv2.i43 = bitcast <2 x i64> %or.i to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub.i = sub <4 x i32> zeroinitializer, %conv2.i43 ; <<4 x i32>> [#uses=1]
+  %conv3.i44 = bitcast <4 x i32> %sub.i to <2 x i64> ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %conv3.i44
+}
+
+
+; PR4908
+define void @test2(<1 x i16>* nocapture %b, i32* nocapture %c) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds <1 x i16>* %b, i64 undef ; <<1 x i16>*>
+  %tmp2 = load <1 x i16>* %arrayidx               ; <<1 x i16>> [#uses=1]
+  %tmp6 = bitcast <1 x i16> %tmp2 to i16          ; <i16> [#uses=1]
+  %tmp7 = zext i16 %tmp6 to i32                   ; <i32> [#uses=1]
+  %ins = or i32 0, %tmp7                          ; <i32> [#uses=1]
+  %arrayidx20 = getelementptr inbounds i32* %c, i64 undef ; <i32*> [#uses=1]
+  store i32 %ins, i32* %arrayidx20
+  ret void
+}
diff --git a/test/Transforms/InstCombine/dce-iterate.ll b/test/Transforms/InstCombine/dce-iterate.ll
index faefa8add9ec..1d2cc53210fe 100644
--- a/test/Transforms/InstCombine/dce-iterate.ll
+++ b/test/Transforms/InstCombine/dce-iterate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret double .sy}
+; RUN: opt < %s -instcombine -S | grep {ret double .sy}
 
 define internal double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/deadcode.ll b/test/Transforms/InstCombine/deadcode.ll
index 43c179315548..52af0ef4e8d4 100644
--- a/test/Transforms/InstCombine/deadcode.ll
+++ b/test/Transforms/InstCombine/deadcode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 %A}
-; RUN: llvm-as < %s | opt -die | llvm-dis | not grep call.*llvm.stacksave
+; RUN: opt < %s -instcombine -S | grep {ret i32 %A}
+; RUN: opt < %s -die -S | not grep call.*llvm.stacksave
 
 define i32 @test(i32 %A) {
 	%X = or i1 false, false		
diff --git a/test/Transforms/InstCombine/div-cmp-overflow.ll b/test/Transforms/InstCombine/div-cmp-overflow.ll
index 9276c96479d7..6f63adcd2e17 100644
--- a/test/Transforms/InstCombine/div-cmp-overflow.ll
+++ b/test/Transforms/InstCombine/div-cmp-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sdiv
+; RUN: opt < %s -instcombine -S | not grep sdiv
 ; PR2740
 
 define i1 @func_75(i32 %i2) nounwind {
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index ea6ea6323c56..0d1398082601 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that div instructions are properly eliminated.
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep div
+; RUN: opt < %s -instcombine -S | not grep div
 
 define i32 @test1(i32 %A) {
         %B = sdiv i32 %A, 1             ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll
index 6ac24a85a22e..9e9be7f56575 100644
--- a/test/Transforms/InstCombine/enforce-known-alignment.ll
+++ b/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep alloca | grep {align 16}
+; RUN: opt < %s -instcombine -S | grep alloca | grep {align 16}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
diff --git a/test/Transforms/InstCombine/exact-sdiv.ll b/test/Transforms/InstCombine/exact-sdiv.ll
new file mode 100644
index 000000000000..e5677541e952
--- /dev/null
+++ b/test/Transforms/InstCombine/exact-sdiv.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: define i32 @foo
+; CHECK: sdiv i32 %x, 8
+define i32 @foo(i32 %x) {
+  %y = sdiv i32 %x, 8
+  ret i32 %y
+}
+
+; CHECK: define i32 @bar
+; CHECK: ashr i32 %x, 3
+define i32 @bar(i32 %x) {
+  %y = sdiv exact i32 %x, 8
+  ret i32 %y
+}
+
+; CHECK: i32 @a0
+; CHECK: %y = srem i32 %x, 3
+; CHECK: %z = sub i32 %x, %y
+; CHECK: ret i32 %z
+define i32 @a0(i32 %x) {
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+; CHECK: i32 @b0
+; CHECK: ret i32 %x
+define i32 @b0(i32 %x) {
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+; CHECK: i32 @a1
+; CHECK: %y = srem i32 %x, 3
+; CHECK: %z = sub i32 %y, %x
+; CHECK: ret i32 %z
+define i32 @a1(i32 %x) {
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+; CHECK: i32 @b1
+; CHECK: %z = sub i32 0, %x
+; CHECK: ret i32 %z
+define i32 @b1(i32 %x) {
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index 59b996e130e1..875f860b3d7f 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep extractvalue
+; RUN: opt < %s -instcombine -S | not grep extractvalue
 
 ; Instcombine should fold various combinations of insertvalue and extractvalue
 ; together
diff --git a/test/Transforms/InstCombine/fold-bin-operand.ll b/test/Transforms/InstCombine/fold-bin-operand.ll
new file mode 100644
index 000000000000..b837985a263b
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | not grep icmp
+
+define i1 @f(i1 %x) {
+	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
+	ret i1 %b
+}
+
+; FIXME: This doesn't fold at the moment!
+; define i32 @f(i32 %x) {
+;	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
+;	ret i32 %b
+;}
+
diff --git a/test/Transforms/InstCombine/fold-vector-zero.ll b/test/Transforms/InstCombine/fold-vector-zero.ll
index 0b5b30a5c6e7..e1d86b6cd07c 100644
--- a/test/Transforms/InstCombine/fold-vector-zero.ll
+++ b/test/Transforms/InstCombine/fold-vector-zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep zeroinitializer
+; RUN: opt < %s -instcombine -S | not grep zeroinitializer
 
 define void @foo(i64 %A, i64 %B) {
 bb8:
diff --git a/test/Transforms/InstCombine/fp-ret-bitcast.ll b/test/Transforms/InstCombine/fp-ret-bitcast.ll
index 00c1ea0b96a6..169340abf360 100644
--- a/test/Transforms/InstCombine/fp-ret-bitcast.ll
+++ b/test/Transforms/InstCombine/fp-ret-bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {call float bitcast} | count 1
 	%struct.NSObject = type { %struct.objc_class* }
  	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 7f25092976f9..bc6aa0a6891f 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -1,15 +1,15 @@
 ; Test some floating point casting cases
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | notcast
-; RUN: llvm-as %s -o - | opt -instcombine | llvm-dis | \
-; RUN:   egrep {ret i8 \(-1\)\|\(255\)}
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i8 @test1() {
         %x = fptoui float 2.550000e+02 to i8            ; <i8> [#uses=1]
         ret i8 %x
+; CHECK: ret i8 -1
 }
 
 define i8 @test2() {
         %x = fptosi float -1.000000e+00 to i8           ; <i8> [#uses=1]
         ret i8 %x
+; CHECK: ret i8 -1
 }
 
diff --git a/test/Transforms/InstCombine/fpextend.ll b/test/Transforms/InstCombine/fpextend.ll
index c212128eb99a..70e0c62dd70e 100644
--- a/test/Transforms/InstCombine/fpextend.ll
+++ b/test/Transforms/InstCombine/fpextend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep fpext
+; RUN: opt < %s -instcombine -S | not grep fpext
 @X = external global float 
 @Y = external global float
 
diff --git a/test/Transforms/InstCombine/fsub-fsub.ll b/test/Transforms/InstCombine/fsub-fsub.ll
index ab70479204b1..94ebf090ff33 100644
--- a/test/Transforms/InstCombine/fsub-fsub.ll
+++ b/test/Transforms/InstCombine/fsub-fsub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep fsub | count 2
+; RUN: opt < %s -instcombine -S | grep fsub | count 2
 ; PR4374
 
 define float @func(float %a, float %b) nounwind {
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index c2cb72080319..ffaa6afa85e3 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -1,81 +1,470 @@
-; The %A getelementptr instruction should be eliminated here
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:    grep -v %B | not grep getelementptr
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep foo1
-; END.
+target datalayout = "e-p:64:64"
+%intstruct = type { i32 }
+%pair = type { i32, i32 }
+%struct.B = type { double }
+%struct.A = type { %struct.B, i32, i32 }
 
-@Global = constant [10 x i8] c"helloworld"              ; <[10 x i8]*> [#uses=1]
+
+@Global = constant [10 x i8] c"helloworld"
 
 ; Test noop elimination
-define i32* @foo1(i32* %I) {
-        %A = getelementptr i32* %I, i64 0               ; <i32*> [#uses=1]
+define i32* @test1(i32* %I) {
+        %A = getelementptr i32* %I, i64 0 
         ret i32* %A
+; CHECK: @test1
+; CHECK: ret i32* %I
 }
 
 ; Test noop elimination
-define i32* @foo2(i32* %I) {
-        %A = getelementptr i32* %I              ; <i32*> [#uses=1]
+define i32* @test2(i32* %I) {
+        %A = getelementptr i32* %I
         ret i32* %A
+; CHECK: @test2
+; CHECK: ret i32* %I
 }
 
 ; Test that two array indexing geps fold
-define i32* @foo3(i32* %I) {
-        %A = getelementptr i32* %I, i64 17              ; <i32*> [#uses=1]
-        %B = getelementptr i32* %A, i64 4               ; <i32*> [#uses=1]
+define i32* @test3(i32* %I) {
+        %A = getelementptr i32* %I, i64 17
+        %B = getelementptr i32* %A, i64 4
         ret i32* %B
+; CHECK: @test3
+; CHECK: getelementptr i32* %I, i64 21
 }
 
 ; Test that two getelementptr insts fold
-define i32* @foo4({ i32 }* %I) {
-        %A = getelementptr { i32 }* %I, i64 1           ; <{ i32 }*> [#uses=1]
-        %B = getelementptr { i32 }* %A, i64 0, i32 0            ; <i32*> [#uses=1]
+define i32* @test4({ i32 }* %I) {
+        %A = getelementptr { i32 }* %I, i64 1 
+        %B = getelementptr { i32 }* %A, i64 0, i32 0
         ret i32* %B
+; CHECK: @test4
+; CHECK: getelementptr %intstruct* %I, i64 1, i32 0
 }
 
-define void @foo5(i8 %B) {
+define void @test5(i8 %B) {
         ; This should be turned into a constexpr instead of being an instruction
-        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4             ; <i8*> [#uses=1]
+        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4 
         store i8 %B, i8* %A
         ret void
+; CHECK: @test5
+; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
 }
 
-define i32* @foo6() {
-        %M = malloc [4 x i32]           ; <[4 x i32]*> [#uses=1]
-        %A = getelementptr [4 x i32]* %M, i64 0, i64 0          ; <i32*> [#uses=1]
-        %B = getelementptr i32* %A, i64 2               ; <i32*> [#uses=1]
+define i32* @test6() {
+        %M = malloc [4 x i32] 
+        %A = getelementptr [4 x i32]* %M, i64 0, i64 0
+        %B = getelementptr i32* %A, i64 2             
         ret i32* %B
+; CHECK: @test6
+; CHECK: getelementptr [4 x i32]* %M, i64 0, i64 2
 }
 
-define i32* @foo7(i32* %I, i64 %C, i64 %D) {
-        %A = getelementptr i32* %I, i64 %C              ; <i32*> [#uses=1]
-        %B = getelementptr i32* %A, i64 %D              ; <i32*> [#uses=1]
+define i32* @test7(i32* %I, i64 %C, i64 %D) {
+        %A = getelementptr i32* %I, i64 %C 
+        %B = getelementptr i32* %A, i64 %D 
         ret i32* %B
+; CHECK: @test7
+; CHECK: %A.sum = add i64 %C, %D
+; CHECK: getelementptr i32* %I, i64 %A.sum
 }
 
-define i8* @foo8([10 x i32]* %X) {
+define i8* @test8([10 x i32]* %X) {
         ;; Fold into the cast.
-        %A = getelementptr [10 x i32]* %X, i64 0, i64 0         ; <i32*> [#uses=1]
-        %B = bitcast i32* %A to i8*             ; <i8*> [#uses=1]
+        %A = getelementptr [10 x i32]* %X, i64 0, i64 0 
+        %B = bitcast i32* %A to i8*     
         ret i8* %B
+; CHECK: @test8
+; CHECK: bitcast [10 x i32]* %X to i8*
 }
 
 define i32 @test9() {
-        %A = getelementptr { i32, double }* null, i32 0, i32 1          ; <double*> [#uses=1]
-        %B = ptrtoint double* %A to i32         ; <i32> [#uses=1]
+        %A = getelementptr { i32, double }* null, i32 0, i32 1
+        %B = ptrtoint double* %A to i32        
         ret i32 %B
+; CHECK: @test9
+; CHECK: ret i32 8
 }
 
 define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
-        %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1           ; <i32*> [#uses=1]
-        %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1           ; <i32*> [#uses=1]
+        %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1
+        %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1
         ;; seteq x, y
-        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3            ; <i1> [#uses=1]
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
         ret i1 %tmp.4
+; CHECK: @test10
+; CHECK: icmp eq %pair* %x, %y
 }
 
 define i1 @test11({ i32, i32 }* %X) {
-        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0               ; <i32*> [#uses=1]
-        %Q = icmp eq i32* %P, null              ; <i1> [#uses=1]
+        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0 
+        %Q = icmp eq i32* %P, null             
         ret i1 %Q
+; CHECK: @test11
+; CHECK: icmp eq %pair* %X, null
+}
+
+
+; PR4748
+define i32 @test12(%struct.A* %a) {
+entry:
+  %g3 = getelementptr %struct.A* %a, i32 0, i32 1
+  store i32 10, i32* %g3, align 4
+
+  %g4 = getelementptr %struct.A* %a, i32 0, i32 0
+  
+  %new_a = bitcast %struct.B* %g4 to %struct.A*
+
+  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1	
+  %a_a = load i32* %g5, align 4	
+  ret i32 %a_a
+; CHECK:      @test12
+; CHECK:      getelementptr %struct.A* %a, i64 0, i32 1
+; CHECK-NEXT: store i32 10, i32* %g3
+; CHECK-NEXT: ret i32 10
+}
+
+
+; PR2235
+%S = type { i32, [ 100 x i32] }
+define i1 @test13(i64 %X, %S* %P) {
+        %A = getelementptr inbounds %S* %P, i32 0, i32 1, i64 %X
+        %B = getelementptr inbounds %S* %P, i32 0, i32 0
+	%C = icmp eq i32* %A, %B
+	ret i1 %C
+; CHECK: @test13
+; CHECK:    %C = icmp eq i64 %X, -1
+}
+
+
+@G = external global [3 x i8]      
+define i8* @test14(i32 %Idx) {
+        %idx = zext i32 %Idx to i64
+        %tmp = getelementptr i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i64 %idx
+        ret i8* %tmp
+; CHECK: @test14
+; CHECK: getelementptr [3 x i8]* @G, i64 0, i64 %idx
+}
+
+
+; Test folding of constantexpr geps into normal geps.
+@Array = external global [40 x i32]
+define i32 *@test15(i64 %X) {
+        %A = getelementptr i32* getelementptr ([40 x i32]* @Array, i64 0, i64 0), i64 %X
+        ret i32* %A
+; CHECK: @test15
+; CHECK: getelementptr [40 x i32]* @Array, i64 0, i64 %X
+}
+
+
+define i32* @test16(i32* %X, i32 %Idx) {
+        %R = getelementptr i32* %X, i32 %Idx       
+        ret i32* %R
+; CHECK: @test16
+; CHECK: sext i32 %Idx to i64
+}
+
+
+define i1 @test17(i16* %P, i32 %I, i32 %J) {
+        %X = getelementptr inbounds i16* %P, i32 %I
+        %Y = getelementptr inbounds i16* %P, i32 %J
+        %C = icmp ult i16* %X, %Y
+        ret i1 %C
+; CHECK: @test17
+; CHECK: %C = icmp slt i32 %I, %J 
+}
+
+define i1 @test18(i16* %P, i32 %I) {
+        %X = getelementptr inbounds i16* %P, i32 %I
+        %C = icmp ult i16* %X, %P
+        ret i1 %C
+; CHECK: @test18
+; CHECK: %C = icmp slt i32 %I, 0
+}
+
+define i32 @test19(i32* %P, i32 %A, i32 %B) {
+        %tmp.4 = getelementptr inbounds i32* %P, i32 %A
+        %tmp.9 = getelementptr inbounds i32* %P, i32 %B
+        %tmp.10 = icmp eq i32* %tmp.4, %tmp.9
+        %tmp.11 = zext i1 %tmp.10 to i32
+        ret i32 %tmp.11
+; CHECK: @test19
+; CHECK: icmp eq i32 %A, %B
+}
+
+define i32 @test20(i32* %P, i32 %A, i32 %B) {
+        %tmp.4 = getelementptr inbounds i32* %P, i32 %A
+        %tmp.6 = icmp eq i32* %tmp.4, %P
+        %tmp.7 = zext i1 %tmp.6 to i32
+        ret i32 %tmp.7
+; CHECK: @test20
+; CHECK: icmp eq i32 %A, 0
+}
+
+
+define i32 @test21() {
+        %pbob1 = alloca %intstruct
+        %pbob2 = getelementptr %intstruct* %pbob1
+        %pbobel = getelementptr %intstruct* %pbob2, i64 0, i32 0
+        %rval = load i32* %pbobel
+        ret i32 %rval
+; CHECK: @test21
+; CHECK: getelementptr %intstruct* %pbob1, i64 0, i32 0
+}
+
+
+@A = global i32 1               ; <i32*> [#uses=1]
+@B = global i32 2               ; <i32*> [#uses=1]
+
+define i1 @test22() {
+        %C = icmp ult i32* getelementptr (i32* @A, i64 1), 
+                           getelementptr (i32* @B, i64 2) 
+        ret i1 %C
+; CHECK: @test22
+; CHECK: icmp ult (i32* getelementptr inbounds (i32* @A, i64 1), i32* getelementptr (i32* @B, i64 2))
+}
+
+
+%X = type { [10 x i32], float }
+
+define i1 @test23() {
+        %A = getelementptr %X* null, i64 0, i32 0, i64 0                ; <i32*> [#uses=1]
+        %B = icmp ne i32* %A, null              ; <i1> [#uses=1]
+        ret i1 %B
+; CHECK: @test23
+; CHECK: ret i1 false
+}
+
+%"java/lang/Object" = type { %struct.llvm_java_object_base }
+%"java/lang/StringBuffer" = type { %"java/lang/Object", i32, { %"java/lang/Object", i32, [0 x i16] }*, i1 }
+%struct.llvm_java_object_base = type opaque
+
+define void @test24() {
+bc0:
+        %tmp53 = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 1            ; <i32*> [#uses=1]
+        store i32 0, i32* %tmp53
+        ret void
+; CHECK: @test24
+; CHECK: store i32 0, i32* getelementptr (%"java/lang/StringBuffer"* null, i32 0, i32 1)
+}
+
+define void @test25() {
+entry:
+        %tmp = getelementptr { i64, i64, i64, i64 }* null, i32 0, i32 3         ; <i64*> [#uses=1]
+        %tmp.upgrd.1 = load i64* %tmp           ; <i64> [#uses=1]
+        %tmp8.ui = load i64* null               ; <i64> [#uses=1]
+        %tmp8 = bitcast i64 %tmp8.ui to i64             ; <i64> [#uses=1]
+        %tmp9 = and i64 %tmp8, %tmp.upgrd.1             ; <i64> [#uses=1]
+        %sext = trunc i64 %tmp9 to i32          ; <i32> [#uses=1]
+        %tmp27.i = sext i32 %sext to i64                ; <i64> [#uses=1]
+        tail call void @foo25( i32 0, i64 %tmp27.i )
+        unreachable
+; CHECK: @test25
+}
+
+declare void @foo25(i32, i64)
+
+
+; PR1637
+define i1 @test26(i8* %arr) {
+        %X = getelementptr i8* %arr, i32 1
+        %Y = getelementptr i8* %arr, i32 1
+        %test = icmp uge i8* %X, %Y
+        ret i1 %test
+; CHECK: @test26
+; CHECK: ret i1 true
+}
+
+	%struct.__large_struct = type { [100 x i64] }
+	%struct.compat_siginfo = type { i32, i32, i32, { [29 x i32] } }
+	%struct.siginfo_t = type { i32, i32, i32, { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] } }
+	%struct.sigval_t = type { i8* }
+
+define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
+entry:
+	%from_addr = alloca %struct.siginfo_t*	
+	%tmp344 = load %struct.siginfo_t** %from_addr, align 8	
+	%tmp345 = getelementptr %struct.siginfo_t* %tmp344, i32 0, i32 3
+	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*	
+	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
+	%tmp349 = getelementptr %struct.sigval_t* %tmp348, i32 0, i32 0
+	%tmp349350 = bitcast i8** %tmp349 to i32*
+	%tmp351 = load i32* %tmp349350, align 8	
+	%tmp360 = call i32 asm sideeffect "...",
+        "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
+         %struct.__large_struct* null, i32 -14, i32 0 )
+	unreachable
+; CHECK: @test27
+}
+
+; PR1978
+	%struct.x = type <{ i8 }>
+@.str = internal constant [6 x i8] c"Main!\00"	
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"	
+
+define i32 @test28() nounwind  {
+entry:
+	%orientations = alloca [1 x [1 x %struct.x]]
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind 
+	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
+	%orientations62 = getelementptr [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
+	br label %bb10
+
+bb10:
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1	
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1	
+	%tmp12 = getelementptr inbounds %struct.x* %tmp45, i32 %tmp12.rec
+	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
+	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
+	%indvar.next = add i32 %indvar, 1
+	br i1 %tmp84, label %bb17, label %bb10
+
+bb17:	
+	ret i32 0
+; CHECK: @test28
+; CHECK: icmp eq i32 %indvar, 0
+}
+
+declare i32 @puts(i8*)
+
+declare i32 @printf(i8*, ...)
+
+
+
+
+; rdar://6762290
+	%T = type <{ i64, i64, i64 }>
+define i32 @test29(i8* %start, i32 %X) nounwind {
+entry:
+	%tmp3 = load i64* null		
+	%add.ptr = getelementptr i8* %start, i64 %tmp3
+	%tmp158 = load i32* null
+	%add.ptr159 = getelementptr %T* null, i32 %tmp158
+	%add.ptr209 = getelementptr i8* %start, i64 0
+	%add.ptr212 = getelementptr i8* %add.ptr209, i32 %X
+	%cmp214 = icmp ugt i8* %add.ptr212, %add.ptr
+	br i1 %cmp214, label %if.then216, label %if.end363
+
+if.then216:
+	ret i32 1
+
+if.end363:
+	ret i32 0
+; CHECK: @test29
+}
+
+
+; PR3694
+define i32 @test30(i32 %m, i32 %n) nounwind {
+entry:
+	%0 = alloca i32, i32 %n, align 4
+	%1 = bitcast i32* %0 to [0 x i32]*
+	call void @test30f(i32* %0) nounwind
+	%2 = getelementptr [0 x i32]* %1, i32 0, i32 %m
+	%3 = load i32* %2, align 4
+	ret i32 %3
+; CHECK: @test30
+; CHECK: getelementptr i32
+}
+
+declare void @test30f(i32*)
+
+
+
+define i1 @test31(i32* %A) {
+        %B = getelementptr i32* %A, i32 1
+        %C = getelementptr i32* %A, i64 1
+        %V = icmp eq i32* %B, %C 
+        ret i1 %V
+; CHECK: @test31
+; CHECK: ret i1 true
+}
+
+
+; PR1345
+define i8* @test32(i8* %v) {
+	%A = alloca [4 x i8*], align 16
+	%B = getelementptr [4 x i8*]* %A, i32 0, i32 0
+	store i8* null, i8** %B
+	%C = bitcast [4 x i8*]* %A to { [16 x i8] }*
+	%D = getelementptr { [16 x i8] }* %C, i32 0, i32 0, i32 8
+	%E = bitcast i8* %D to i8**
+	store i8* %v, i8** %E
+	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2	
+	%G = load i8** %F
+	ret i8* %G
+; CHECK: @test32
+; CHECK: %D = getelementptr [4 x i8*]* %A, i64 0, i64 1
+; CHECK: %F = getelementptr [4 x i8*]* %A, i64 0, i64 2
+}
+
+; PR3290
+%struct.Key = type { { i32, i32 } }
+%struct.anon = type <{ i8, [3 x i8], i32 }>
+
+define i32 *@test33(%struct.Key *%A) {
+	%B = bitcast %struct.Key* %A to %struct.anon*
+        %C = getelementptr %struct.anon* %B, i32 0, i32 2 
+	ret i32 *%C
+; CHECK: @test33
+; CHECK: getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+}
+
+
+
+	%T2 = type { i8*, i8 }
+define i8* @test34(i8* %Val, i64 %V) nounwind {
+entry:
+	%A = alloca %T2, align 8	
+	%mrv_gep = bitcast %T2* %A to i64*
+	%B = getelementptr %T2* %A, i64 0, i32 0
+        
+      	store i64 %V, i64* %mrv_gep
+	%C = load i8** %B, align 8
+	ret i8* %C
+; CHECK: @test34
+; CHECK: %V.c = inttoptr i64 %V to i8*
+; CHECK: ret i8* %V.c
+}
+
+%t0 = type { i8*, [19 x i8] }
+%t1 = type { i8*, [0 x i8] }
+
+@array = external global [11 x i8]
+
+@s = external global %t0
+@"\01LC8" = external constant [17 x i8]
+
+; Instcombine should be able to fold this getelementptr.
+
+define i32 @test35() nounwind {
+  call i32 (i8*, ...)* @printf(i8* getelementptr ([17 x i8]* @"\01LC8", i32 0, i32 0),
+             i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
+  ret i32 0
+; CHECK: @test35
+; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* bitcast (i8** getelementptr (%t1* bitcast (%t0* @s to %t1*), i64 1, i32 0) to i8*)) nounwind
+}
+
+; Instcombine should constant-fold the GEP so that indices that have
+; static array extents are within bounds of those array extents.
+; In the below, -1 is not in the range [0,11). After the transformation,
+; the same address is computed, but 3 is in the range of [0,11).
+
+define i8* @test36() nounwind {
+  ret i8* getelementptr ([11 x i8]* @array, i32 0, i64 -1)
+; CHECK: @test36
+; CHECK: ret i8* getelementptr ([11 x i8]* @array, i64 1676976733973595601, i64 4)
+}
+
+; Instcombine shouldn't assume that gep(A,0,1) != gep(A,1,0).
+@A37 = external constant [1 x i8]
+define i1 @test37() nounwind {
+; CHECK: @test37
+; CHECK: ret i1 true
+  %t = icmp eq i8* getelementptr ([1 x i8]* @A37, i64 0, i64 1),
+                   getelementptr ([1 x i8]* @A37, i64 1, i64 0)
+  ret i1 %t
 }
diff --git a/test/Transforms/InstCombine/hoist_instr.ll b/test/Transforms/InstCombine/hoist_instr.ll
index e425bc2a5a90..fa451bcc7273 100644
--- a/test/Transforms/InstCombine/hoist_instr.ll
+++ b/test/Transforms/InstCombine/hoist_instr.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   %prcontext div 1 | grep then:
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ;; This tests that the div is hoisted into the then block.
 define i32 @foo(i1 %C, i32 %A, i32 %B) {
@@ -7,6 +6,8 @@ entry:
         br i1 %C, label %then, label %endif
 
 then:           ; preds = %entry
+; CHECK: then:
+; CHECK-NEXT: sdiv i32
         br label %endif
 
 endif:          ; preds = %then, %entry
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 27136d69438d..64e88c9ae86d 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep icmp
+; RUN: opt < %s -instcombine -S | not grep icmp
 
 define i32 @test1(i32 %X) {
 entry:
@@ -28,3 +28,17 @@ entry:
         ret i32 %1
 }
 
+; PR4837
+define <2 x i1> @test5(<2 x i64> %x) {
+entry:
+  %V = icmp eq <2 x i64> %x, undef
+  ret <2 x i1> %V
+}
+
+define i32 @test6(i32 %a, i32 %b) {
+        %c = icmp sle i32 %a, -1
+        %d = zext i1 %c to i32
+        %e = sub i32 0, %d
+        %f = and i32 %e, %b
+        ret i32 %f
+}
diff --git a/test/Transforms/InstCombine/known_align.ll b/test/Transforms/InstCombine/known_align.ll
index a6e340902dfb..5382abf82125 100644
--- a/test/Transforms/InstCombine/known_align.ll
+++ b/test/Transforms/InstCombine/known_align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {align 1}
+; RUN: opt < %s -instcombine -S | grep {align 1}
 ; END.
 
 	%struct.p = type <{ i8, i32 }>
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index 85a749541e82..6d068f55dce6 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep load
+; RUN: opt < %s -instcombine -S | not grep load
 
 @X = constant i32 42		; <i32*> [#uses=2]
 @X2 = constant i32 47		; <i32*> [#uses=1]
diff --git a/test/Transforms/InstCombine/load2.ll b/test/Transforms/InstCombine/load2.ll
index 5c3cf330543b..611b0fb1c040 100644
--- a/test/Transforms/InstCombine/load2.ll
+++ b/test/Transforms/InstCombine/load2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep load
+; RUN: opt < %s -instcombine -S | not grep load
 
 @GLOBAL = internal constant [4 x i32] zeroinitializer
 
diff --git a/test/Transforms/InstCombine/load3.ll b/test/Transforms/InstCombine/load3.ll
index e102d39e015d..9c87316ece80 100644
--- a/test/Transforms/InstCombine/load3.ll
+++ b/test/Transforms/InstCombine/load3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep load | count 1
+; RUN: opt < %s -instcombine -S | grep load | count 1
 
 ; Instcombine should be able to do trivial CSE of loads.
 
diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll
index ebea3e4fe831..ff3401727b9c 100644
--- a/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {, align 16} | count 14
+; RUN: opt < %s -instcombine -S | grep {, align 16} | count 14
 
 @x = external global <2 x i64>, align 16
 @xx = external global [13 x <2 x i64>], align 16
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index 3bb2d348b383..ece8bc317e5a 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: grep select %t | count 5
 ; RUN: not grep and %t
 ; RUN: not grep or %t
diff --git a/test/Transforms/InstCombine/lshr-phi.ll b/test/Transforms/InstCombine/lshr-phi.ll
index 683eb87931ef..76a113face05 100644
--- a/test/Transforms/InstCombine/lshr-phi.ll
+++ b/test/Transforms/InstCombine/lshr-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: not grep lshr %t
 ; RUN: grep add %t | count 1
 
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 24f793f7ca8f..2ed5ec6996d6 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 0}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep malloc
+; RUN: opt < %s -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -instcombine -S | not grep malloc
 ; PR1201
 define i32 @main(i32 %argc, i8** %argv) {
         %c_19 = alloca i8*              ; <i8**> [#uses=2]
diff --git a/test/Transforms/InstCombine/malloc.ll b/test/Transforms/InstCombine/malloc.ll
index 6a4601a2ebee..b6ebbeaf5753 100644
--- a/test/Transforms/InstCombine/malloc.ll
+++ b/test/Transforms/InstCombine/malloc.ll
@@ -1,5 +1,5 @@
 ; test that malloc's with a constant argument are promoted to array allocations
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep getelementptr
+; RUN: opt < %s -instcombine -S | grep getelementptr
 
 define i32* @test() {
 	%X = malloc i32, i32 4
diff --git a/test/Transforms/InstCombine/malloc2.ll b/test/Transforms/InstCombine/malloc2.ll
index eb7c9abfcef5..102422ee5fc7 100644
--- a/test/Transforms/InstCombine/malloc2.ll
+++ b/test/Transforms/InstCombine/malloc2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 0}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep malloc
+; RUN: opt < %s -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -instcombine -S | not grep malloc
 ; PR1313
 
 define i32 @test1(i32 %argc, i8* %argv, i8* %envp) {
diff --git a/test/Transforms/InstCombine/malloc3.ll b/test/Transforms/InstCombine/malloc3.ll
index f6f412dfe062..cd407ffb02f3 100644
--- a/test/Transforms/InstCombine/malloc3.ll
+++ b/test/Transforms/InstCombine/malloc3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {malloc.*struct.foo} | count 2
+; RUN: opt < %s -instcombine -S | grep {malloc.*struct.foo} | count 2
 ; PR1728
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll
index 7a0027d7789f..ebb8711af9f1 100644
--- a/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {load double}
+; RUN: opt < %s -instcombine -S | grep {load double}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll
index 09ec67d2654e..1806cfcb54fb 100644
--- a/test/Transforms/InstCombine/memmove.ll
+++ b/test/Transforms/InstCombine/memmove.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that memmove instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep {call void @llvm.memmove}
 
 @S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll
index 27a5b60cade6..8e85694d9ad9 100644
--- a/test/Transforms/InstCombine/memset.ll
+++ b/test/Transforms/InstCombine/memset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {call.*llvm.memset}
+; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset}
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
diff --git a/test/Transforms/InstCombine/mul-masked-bits.ll b/test/Transforms/InstCombine/mul-masked-bits.ll
index 9b0a5bfe20de..a43d5f20beaa 100644
--- a/test/Transforms/InstCombine/mul-masked-bits.ll
+++ b/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep ashr
+; RUN: opt < %s -instcombine -S | grep ashr
 
 define i32 @foo(i32 %x, i32 %y) {
   %a = and i32 %x, 7
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 9b5f7a5c5efc..53a56434aede 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,5 +1,5 @@
 ; This test makes sure that mul instructions are properly eliminated.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
+; RUN: opt < %s -instcombine -S | not grep mul
 
 define i32 @test1(i32 %A) {
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
@@ -83,3 +83,34 @@ define internal void @test13(<4 x float>*) {
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
 }
+
+define <16 x i8> @test14(<16 x i8> %a) {
+        %b = mul <16 x i8> %a, zeroinitializer
+        ret <16 x i8> %b
+}
+
+; rdar://7293527
+define i32 @test15(i32 %A, i32 %B) {
+entry:
+  %shl = shl i32 1, %B
+  %m = mul i32 %shl, %A
+  ret i32 %m
+}
+
+; X * Y (when Y is 0 or 1) --> x & (0-Y)
+define i32 @test16(i32 %b, i1 %c) {
+        %d = zext i1 %c to i32          ; <i32> [#uses=1]
+        ; e = b & (a >> 31)
+        %e = mul i32 %d, %b             ; <i32> [#uses=1]
+        ret i32 %e
+}
+
+; X * Y (when Y is 0 or 1) --> x & (0-Y)
+define i32 @test17(i32 %a, i32 %b) {
+  %a.lobit = lshr i32 %a, 31
+  %e = mul i32 %a.lobit, %b
+  ret i32 %e
+}
+
+
+
diff --git a/test/Transforms/InstCombine/multi-use-or.ll b/test/Transforms/InstCombine/multi-use-or.ll
index 48049677acd8..9bbef2383bb2 100644
--- a/test/Transforms/InstCombine/multi-use-or.ll
+++ b/test/Transforms/InstCombine/multi-use-or.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {add double .sx, .sy}
+; RUN: opt < %s -instcombine -S | grep {add double .sx, .sy}
 ; The 'or' has multiple uses, make sure that this doesn't prevent instcombine
 ; from propagating the extends to the truncs.
 
diff --git a/test/Transforms/InstCombine/narrow.ll b/test/Transforms/InstCombine/narrow.ll
index 41106e759ce0..1b96a06eeb7f 100644
--- a/test/Transforms/InstCombine/narrow.ll
+++ b/test/Transforms/InstCombine/narrow.ll
@@ -1,6 +1,6 @@
 ; This file contains various testcases that check to see that instcombine
 ; is narrowing computations when possible.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {ret i1 false}
 
 ; test1 - Eliminating the casts in this testcase (by narrowing the AND
diff --git a/test/Transforms/InstCombine/no-negzero.ll b/test/Transforms/InstCombine/no-negzero.ll
new file mode 100644
index 000000000000..f295130b0ea4
--- /dev/null
+++ b/test/Transforms/InstCombine/no-negzero.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; ModuleID = '3555a.c'
+; sqrt(fabs) cannot be negative zero, so we should eliminate the fadd.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.8"
+
+; CHECK: @mysqrt
+; CHECK-NOT: fadd
+; CHECK: ret
+define double @mysqrt(double %x) nounwind {
+entry:
+  %x_addr = alloca double                         ; <double*> [#uses=2]
+  %retval = alloca double, align 8                ; <double*> [#uses=2]
+  %0 = alloca double, align 8                     ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store double %x, double* %x_addr
+  %1 = load double* %x_addr, align 8              ; <double> [#uses=1]
+  %2 = call double @fabs(double %1) nounwind readnone ; <double> [#uses=1]
+  %3 = call double @sqrt(double %2) nounwind readonly ; <double> [#uses=1]
+  %4 = fadd double %3, 0.000000e+00               ; <double> [#uses=1]
+  store double %4, double* %0, align 8
+  %5 = load double* %0, align 8                   ; <double> [#uses=1]
+  store double %5, double* %retval, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval                 ; <double> [#uses=1]
+  ret double %retval1
+}
+
+declare double @fabs(double)
+
+declare double @sqrt(double) nounwind readonly
diff --git a/test/Transforms/InstCombine/not-fcmp.ll b/test/Transforms/InstCombine/not-fcmp.ll
index 4560419a35fc..ad01a6bdf1bc 100644
--- a/test/Transforms/InstCombine/not-fcmp.ll
+++ b/test/Transforms/InstCombine/not-fcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep "fcmp uge"
+; RUN: opt < %s -instcombine -S | grep "fcmp uge"
 ; PR1570
 
 define i1 @f(float %X, float %Y) {
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index a79e51871818..c58ce11b438f 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep xor
+; RUN: opt < %s -instcombine -S | not grep xor
 
 define i32 @test1(i32 %A) {
         %B = xor i32 %A, -1             ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/nothrow.ll b/test/Transforms/InstCombine/nothrow.ll
index fbf162a1db84..08d90bfbd7d4 100644
--- a/test/Transforms/InstCombine/nothrow.ll
+++ b/test/Transforms/InstCombine/nothrow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep call
+; RUN: opt < %s -instcombine -S | not grep call
 ; rdar://6880732
 declare double @t1(i32) readonly
 
diff --git a/test/Transforms/InstCombine/nsw.ll b/test/Transforms/InstCombine/nsw.ll
new file mode 100644
index 000000000000..821cebed30bb
--- /dev/null
+++ b/test/Transforms/InstCombine/nsw.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: define i32 @foo
+; %y = sub i32 0, %x
+; %z = sdiv i32 %y, 337
+; ret i32 %y
+define i32 @foo(i32 %x) {
+  %y = sub i32 0, %x
+  %z = sdiv i32 %y, 337
+  ret i32 %y
+}
+
+; CHECK: define i32 @bar
+; %y = sdiv i32 %x, -337
+; ret i32 %y
+define i32 @bar(i32 %x) {
+  %y = sub nsw i32 0, %x
+  %z = sdiv i32 %y, 337
+  ret i32 %y
+}
diff --git a/test/Transforms/InstCombine/odr-linkage.ll b/test/Transforms/InstCombine/odr-linkage.ll
index 96f883335bcc..a64ef289a4b6 100644
--- a/test/Transforms/InstCombine/odr-linkage.ll
+++ b/test/Transforms/InstCombine/odr-linkage.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 10}
+; RUN: opt < %s -instcombine -S | grep {ret i32 10}
 
 @g1 = available_externally constant i32 1
 @g2 = linkonce_odr constant i32 2
diff --git a/test/Transforms/InstCombine/or-fcmp.ll b/test/Transforms/InstCombine/or-fcmp.ll
index 3833c6f706fb..9692bfcc5970 100644
--- a/test/Transforms/InstCombine/or-fcmp.ll
+++ b/test/Transforms/InstCombine/or-fcmp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep fcmp | count 3
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep ret | grep 1
+; RUN: opt < %s -instcombine -S | grep fcmp | count 3
+; RUN: opt < %s -instcombine -S | grep ret | grep 1
 
 define zeroext i8 @t1(float %x, float %y) nounwind {
        %a = fcmp ueq float %x, %y             ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/or-to-xor.ll b/test/Transforms/InstCombine/or-to-xor.ll
index e40417b17d35..1495ee49709f 100644
--- a/test/Transforms/InstCombine/or-to-xor.ll
+++ b/test/Transforms/InstCombine/or-to-xor.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {xor i32 %a, %b} | count 4
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {and}
+; RUN: opt < %s -instcombine -S | grep {xor i32 %a, %b} | count 4
+; RUN: opt < %s -instcombine -S | not grep {and}
 
 define i32 @func1(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index e70fb1c4e0c0..37f934bd9670 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep -v xor | not grep {or }
 ; END.
 
diff --git a/test/Transforms/InstCombine/or2.ll b/test/Transforms/InstCombine/or2.ll
index f14a27439163..c01229a78a0d 100644
--- a/test/Transforms/InstCombine/or2.ll
+++ b/test/Transforms/InstCombine/or2.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep -v xor | not grep {or }
+; RUN: opt < %s -instcombine -S | grep -v xor | not grep {or }
 
 ; PR1738
 define i1 @test1(double %X, double %Y) {
diff --git a/test/Transforms/InstCombine/phi-merge-gep.ll b/test/Transforms/InstCombine/phi-merge-gep.ll
new file mode 100644
index 000000000000..267174970941
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-merge-gep.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -S -instcombine > %t
+; RUN: grep {= getelementptr} %t | count 20
+; RUN: grep {= phi} %t | count 13
+
+; Don't push the geps through these phis, because they would require
+; two phis each, which burdens the loop with high register pressure.
+
+define void @foo(float* %Ar, float* %Ai, i64 %As, float* %Cr, float* %Ci, i64 %Cs, i64 %n) nounwind {
+entry:
+  %0 = getelementptr inbounds float* %Ar, i64 0   ; <float*> [#uses=1]
+  %1 = getelementptr inbounds float* %Ai, i64 0   ; <float*> [#uses=1]
+  %2 = mul i64 %n, %As                            ; <i64> [#uses=1]
+  %3 = getelementptr inbounds float* %Ar, i64 %2  ; <float*> [#uses=1]
+  %4 = mul i64 %n, %As                            ; <i64> [#uses=1]
+  %5 = getelementptr inbounds float* %Ai, i64 %4  ; <float*> [#uses=1]
+  %6 = mul i64 %n, 2                              ; <i64> [#uses=1]
+  %7 = mul i64 %6, %As                            ; <i64> [#uses=1]
+  %8 = getelementptr inbounds float* %Ar, i64 %7  ; <float*> [#uses=1]
+  %9 = mul i64 %n, 2                              ; <i64> [#uses=1]
+  %10 = mul i64 %9, %As                           ; <i64> [#uses=1]
+  %11 = getelementptr inbounds float* %Ai, i64 %10 ; <float*> [#uses=1]
+  %12 = getelementptr inbounds float* %Cr, i64 0  ; <float*> [#uses=1]
+  %13 = getelementptr inbounds float* %Ci, i64 0  ; <float*> [#uses=1]
+  %14 = mul i64 %n, %Cs                           ; <i64> [#uses=1]
+  %15 = getelementptr inbounds float* %Cr, i64 %14 ; <float*> [#uses=1]
+  %16 = mul i64 %n, %Cs                           ; <i64> [#uses=1]
+  %17 = getelementptr inbounds float* %Ci, i64 %16 ; <float*> [#uses=1]
+  %18 = mul i64 %n, 2                             ; <i64> [#uses=1]
+  %19 = mul i64 %18, %Cs                          ; <i64> [#uses=1]
+  %20 = getelementptr inbounds float* %Cr, i64 %19 ; <float*> [#uses=1]
+  %21 = mul i64 %n, 2                             ; <i64> [#uses=1]
+  %22 = mul i64 %21, %Cs                          ; <i64> [#uses=1]
+  %23 = getelementptr inbounds float* %Ci, i64 %22 ; <float*> [#uses=1]
+  br label %bb13
+
+bb:                                               ; preds = %bb13
+  %24 = load float* %A0r.0, align 4               ; <float> [#uses=1]
+  %25 = load float* %A0i.0, align 4               ; <float> [#uses=1]
+  %26 = load float* %A1r.0, align 4               ; <float> [#uses=2]
+  %27 = load float* %A1i.0, align 4               ; <float> [#uses=2]
+  %28 = load float* %A2r.0, align 4               ; <float> [#uses=2]
+  %29 = load float* %A2i.0, align 4               ; <float> [#uses=2]
+  %30 = fadd float %26, %28                       ; <float> [#uses=2]
+  %31 = fadd float %27, %29                       ; <float> [#uses=2]
+  %32 = fsub float %26, %28                       ; <float> [#uses=1]
+  %33 = fsub float %27, %29                       ; <float> [#uses=1]
+  %34 = fadd float %24, %30                       ; <float> [#uses=2]
+  %35 = fadd float %25, %31                       ; <float> [#uses=2]
+  %36 = fmul float %30, -1.500000e+00             ; <float> [#uses=1]
+  %37 = fmul float %31, -1.500000e+00             ; <float> [#uses=1]
+  %38 = fadd float %34, %36                       ; <float> [#uses=2]
+  %39 = fadd float %35, %37                       ; <float> [#uses=2]
+  %40 = fmul float %32, 0x3FEBB67AE0000000        ; <float> [#uses=2]
+  %41 = fmul float %33, 0x3FEBB67AE0000000        ; <float> [#uses=2]
+  %42 = fadd float %38, %41                       ; <float> [#uses=1]
+  %43 = fsub float %39, %40                       ; <float> [#uses=1]
+  %44 = fsub float %38, %41                       ; <float> [#uses=1]
+  %45 = fadd float %39, %40                       ; <float> [#uses=1]
+  store float %34, float* %C0r.0, align 4
+  store float %35, float* %C0i.0, align 4
+  store float %42, float* %C1r.0, align 4
+  store float %43, float* %C1i.0, align 4
+  store float %44, float* %C2r.0, align 4
+  store float %45, float* %C2i.0, align 4
+  %46 = getelementptr inbounds float* %A0r.0, i64 %As ; <float*> [#uses=1]
+  %47 = getelementptr inbounds float* %A0i.0, i64 %As ; <float*> [#uses=1]
+  %48 = getelementptr inbounds float* %A1r.0, i64 %As ; <float*> [#uses=1]
+  %49 = getelementptr inbounds float* %A1i.0, i64 %As ; <float*> [#uses=1]
+  %50 = getelementptr inbounds float* %A2r.0, i64 %As ; <float*> [#uses=1]
+  %51 = getelementptr inbounds float* %A2i.0, i64 %As ; <float*> [#uses=1]
+  %52 = getelementptr inbounds float* %C0r.0, i64 %Cs ; <float*> [#uses=1]
+  %53 = getelementptr inbounds float* %C0i.0, i64 %Cs ; <float*> [#uses=1]
+  %54 = getelementptr inbounds float* %C1r.0, i64 %Cs ; <float*> [#uses=1]
+  %55 = getelementptr inbounds float* %C1i.0, i64 %Cs ; <float*> [#uses=1]
+  %56 = getelementptr inbounds float* %C2r.0, i64 %Cs ; <float*> [#uses=1]
+  %57 = getelementptr inbounds float* %C2i.0, i64 %Cs ; <float*> [#uses=1]
+  %58 = add nsw i64 %i.0, 1                       ; <i64> [#uses=1]
+  br label %bb13
+
+bb13:                                             ; preds = %bb, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %58, %bb ]      ; <i64> [#uses=2]
+  %C2i.0 = phi float* [ %23, %entry ], [ %57, %bb ] ; <float*> [#uses=2]
+  %C2r.0 = phi float* [ %20, %entry ], [ %56, %bb ] ; <float*> [#uses=2]
+  %C1i.0 = phi float* [ %17, %entry ], [ %55, %bb ] ; <float*> [#uses=2]
+  %C1r.0 = phi float* [ %15, %entry ], [ %54, %bb ] ; <float*> [#uses=2]
+  %C0i.0 = phi float* [ %13, %entry ], [ %53, %bb ] ; <float*> [#uses=2]
+  %C0r.0 = phi float* [ %12, %entry ], [ %52, %bb ] ; <float*> [#uses=2]
+  %A2i.0 = phi float* [ %11, %entry ], [ %51, %bb ] ; <float*> [#uses=2]
+  %A2r.0 = phi float* [ %8, %entry ], [ %50, %bb ] ; <float*> [#uses=2]
+  %A1i.0 = phi float* [ %5, %entry ], [ %49, %bb ] ; <float*> [#uses=2]
+  %A1r.0 = phi float* [ %3, %entry ], [ %48, %bb ] ; <float*> [#uses=2]
+  %A0i.0 = phi float* [ %1, %entry ], [ %47, %bb ] ; <float*> [#uses=2]
+  %A0r.0 = phi float* [ %0, %entry ], [ %46, %bb ] ; <float*> [#uses=2]
+  %59 = icmp slt i64 %i.0, %n                     ; <i1> [#uses=1]
+  br i1 %59, label %bb, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %return
+
+return:                                           ; preds = %bb14
+  ret void
+}
diff --git a/test/Transforms/InstCombine/phi-merge.ll b/test/Transforms/InstCombine/phi-merge.ll
index daac41274cde..c41f50325d6f 100644
--- a/test/Transforms/InstCombine/phi-merge.ll
+++ b/test/Transforms/InstCombine/phi-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {phi i32}
+; RUN: opt < %s -instcombine -S | not grep {phi i32}
 ; PR1777
 
 declare i1 @rrr()
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 4efbb79d9d4e..24eca72d4b53 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep phi
+; RUN: opt < %s -instcombine -S | not grep phi
 
 define i32 @test1(i32 %A, i1 %b) {
 BB0:
diff --git a/test/Transforms/InstCombine/pr2645-0.ll b/test/Transforms/InstCombine/pr2645-0.ll
index 04cc1852cb2a..9bcaa43a80ba 100644
--- a/test/Transforms/InstCombine/pr2645-0.ll
+++ b/test/Transforms/InstCombine/pr2645-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {insertelement <4 x float> undef}
+; RUN: opt < %s -instcombine -S | grep {insertelement <4 x float> undef}
 
 ; Instcombine should be able to prove that none of the
 ; insertelement's first operand's elements are needed.
diff --git a/test/Transforms/InstCombine/pr2645-1.ll b/test/Transforms/InstCombine/pr2645-1.ll
index 194d2cd18f70..d320dafcd252 100644
--- a/test/Transforms/InstCombine/pr2645-1.ll
+++ b/test/Transforms/InstCombine/pr2645-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep shufflevector
+; RUN: opt < %s -instcombine -S | grep shufflevector
 ; PR2645
 
 ; instcombine shouldn't delete the shufflevector.
diff --git a/test/Transforms/InstCombine/pr2996.ll b/test/Transforms/InstCombine/pr2996.ll
index 3e7dfa2ba6df..ff3245d8668c 100644
--- a/test/Transforms/InstCombine/pr2996.ll
+++ b/test/Transforms/InstCombine/pr2996.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 ; PR2996
 
 define void @func_53(i16 signext %p_56) nounwind {
diff --git a/test/Transforms/InstCombine/preserve-sminmax.ll b/test/Transforms/InstCombine/preserve-sminmax.ll
index 24fb7dabe3d5..dbfd56acc58b 100644
--- a/test/Transforms/InstCombine/preserve-sminmax.ll
+++ b/test/Transforms/InstCombine/preserve-sminmax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep { i32 \[%\]sd, \[\[:alnum:\]\]* \\?1\\>} | count 4
+; RUN: opt < %s -instcombine -S | grep { i32 \[%\]sd, \[\[:alnum:\]\]* \\?1\\>} | count 4
 
 ; Instcombine normally would fold the sdiv into the comparison,
 ; making "icmp slt i32 %h, 2", but in this case the sdiv has
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 5c154397775e..2f64d8ba0e1d 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 
 define i1 @test1(i32 *%x) nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 8b2263d9b23f..bac248e58d7a 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep rem
+; RUN: opt < %s -instcombine -S | not grep rem
 ; END.
 
 define i32 @test1(i32 %A) {
diff --git a/test/Transforms/InstCombine/sdiv-1.ll b/test/Transforms/InstCombine/sdiv-1.ll
index 305b6d7df927..c46b5eaef4a8 100644
--- a/test/Transforms/InstCombine/sdiv-1.ll
+++ b/test/Transforms/InstCombine/sdiv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -inline | llvm-dis | not grep '-715827882'
+; RUN: opt < %s -instcombine -inline -S | not grep '-715827882'
 ; PR3142
 
 define i32 @a(i32 %X) nounwind readnone {
diff --git a/test/Transforms/InstCombine/sdiv-2.ll b/test/Transforms/InstCombine/sdiv-2.ll
index db0c7fbccf8e..0e4c00802010 100644
--- a/test/Transforms/InstCombine/sdiv-2.ll
+++ b/test/Transforms/InstCombine/sdiv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine -disable-output
+; RUN: opt < %s -instcombine -disable-output
 ; PR3144
 
 define fastcc i32 @func(i32 %length) nounwind {
diff --git a/test/Transforms/InstCombine/sdiv-shift.ll b/test/Transforms/InstCombine/sdiv-shift.ll
new file mode 100644
index 000000000000..f4d2b36cbbf8
--- /dev/null
+++ b/test/Transforms/InstCombine/sdiv-shift.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep div
+
+define i32 @a(i16 zeroext %x, i32 %y) nounwind {
+entry:
+	%conv = zext i16 %x to i32
+	%s = shl i32 2, %y
+	%d = sdiv i32 %conv, %s
+	ret i32 %d
+}
diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll
index 4621f6e152ad..a76addc9942b 100644
--- a/test/Transforms/InstCombine/select-2.ll
+++ b/test/Transforms/InstCombine/select-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep select | count 2
+; RUN: opt < %s -instcombine -S | grep select | count 2
 
 ; Make sure instcombine don't fold select into operands. We don't want to emit
 ; select of two integers unless it's selecting 0 / 1.
diff --git a/test/Transforms/InstCombine/select-load-call.ll b/test/Transforms/InstCombine/select-load-call.ll
index 367356e6311a..bef0cf841bfa 100644
--- a/test/Transforms/InstCombine/select-load-call.ll
+++ b/test/Transforms/InstCombine/select-load-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 1}
+; RUN: opt < %s -instcombine -S | grep {ret i32 1}
 
 declare void @test2()
 
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 15083f2facd2..b04382e8b113 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 ; PR1822
 
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep select
+; RUN: opt < %s -instcombine -S | not grep select
 
 define i32 @test1(i32 %A, i32 %B) {
         %C = select i1 false, i32 %A, i32 %B            ; <i32> [#uses=1]
@@ -202,3 +202,64 @@ define i1 @test24(i1 %a, i1 %b) {
         ret i1 %c
 }
 
+define i32 @test25(i1 %c)  {
+entry:
+  br i1 %c, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 10, i32 20
+  ret i32 %b
+}
+
+define i32 @test26(i1 %cond)  {
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  %c = or i1 false, false
+  br label %ret 
+ret:
+  %a = phi i1 [true, %jump], [%c, %entry]
+  %b = select i1 %a, i32 10, i32 20
+  ret i32 %b
+}
+
+define i32 @test27(i1 %c, i32 %A, i32 %B)  {
+entry:
+  br i1 %c, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 %A, i32 %B
+  ret i32 %b
+}
+
+define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %c = phi i32 [%A, %jump], [%B, %entry]
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 %A, i32 %c
+  ret i32 %b
+}
+
+define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %c = phi i32 [%A, %jump], [%B, %entry]
+  %a = phi i1 [true, %jump], [false, %entry]
+  br label %next
+  
+next:
+  %b = select i1 %a, i32 %A, i32 %c
+  ret i32 %b
+}
+
diff --git a/test/Transforms/InstCombine/set.ll b/test/Transforms/InstCombine/set.ll
index 5e3ca0b51b89..daa9148f6ad9 100644
--- a/test/Transforms/InstCombine/set.ll
+++ b/test/Transforms/InstCombine/set.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep icmp
+; RUN: opt < %s -instcombine -S | not grep icmp
 ; END.
 	
 @X = external global i32                ; <i32*> [#uses=2]
diff --git a/test/Transforms/InstCombine/setcc-cast-cast.ll b/test/Transforms/InstCombine/setcc-cast-cast.ll
index ed2e2bee551e..b2681ea29863 100644
--- a/test/Transforms/InstCombine/setcc-cast-cast.ll
+++ b/test/Transforms/InstCombine/setcc-cast-cast.ll
@@ -1,7 +1,7 @@
 ; This test case was reduced from MultiSource/Applications/hbd. It makes sure
 ; that folding doesn't happen in case a zext is applied where a sext should have
 ; been when a setcc is used with two casts.
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep {br i1 false}
 ; END.
 
diff --git a/test/Transforms/InstCombine/setcc-strength-reduce.ll b/test/Transforms/InstCombine/setcc-strength-reduce.ll
index 9931ccb2fd25..62ab116367fe 100644
--- a/test/Transforms/InstCombine/setcc-strength-reduce.ll
+++ b/test/Transforms/InstCombine/setcc-strength-reduce.ll
@@ -2,7 +2,7 @@
 ; working.  Basically this boils down to converting setlt,gt,le,ge instructions
 ; into equivalent setne,eq instructions.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep -v {icmp eq} | grep -v {icmp ne} | not grep icmp
 ; END.
 
diff --git a/test/Transforms/InstCombine/sext-misc.ll b/test/Transforms/InstCombine/sext-misc.ll
index 58749666ea1b..107bba6e843f 100644
--- a/test/Transforms/InstCombine/sext-misc.ll
+++ b/test/Transforms/InstCombine/sext-misc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sext
+; RUN: opt < %s -instcombine -S | not grep sext
 
 declare i32 @llvm.ctpop.i32(i32)
 declare i32 @llvm.ctlz.i32(i32)
diff --git a/test/Transforms/InstCombine/shift-simplify.ll b/test/Transforms/InstCombine/shift-simplify.ll
index ecf3f5f92fef..e5cc705350f9 100644
--- a/test/Transforms/InstCombine/shift-simplify.ll
+++ b/test/Transforms/InstCombine/shift-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    egrep {shl|lshr|ashr} | count 3
 
 define i32 @test0(i32 %A, i32 %B, i32 %C) {
diff --git a/test/Transforms/InstCombine/shift-sra.ll b/test/Transforms/InstCombine/shift-sra.ll
index 6058bd6dc724..449278544d1f 100644
--- a/test/Transforms/InstCombine/shift-sra.ll
+++ b/test/Transforms/InstCombine/shift-sra.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {lshr i32} | count 2
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep ashr
+; RUN: opt < %s -instcombine -S | not grep ashr
 
 
 define i32 @test1(i32 %X, i8 %A) {
diff --git a/test/Transforms/InstCombine/shift-trunc-shift.ll b/test/Transforms/InstCombine/shift-trunc-shift.ll
index bf9f4070fb98..7133d299a2bd 100644
--- a/test/Transforms/InstCombine/shift-trunc-shift.ll
+++ b/test/Transforms/InstCombine/shift-trunc-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep lshr.*63
+; RUN: opt < %s -instcombine -S | grep lshr.*63
 
 define i32 @t1(i64 %d18) {
 entry:
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 9dc7755c1b8b..feed37bd10ab 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -1,25 +1,32 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sh
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
+; CHECK: @test1
+; CHECK: ret i32 %A
         %B = shl i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
 }
 
 define i32 @test2(i8 %A) {
+; CHECK: @test2
+; CHECK: ret i32 0
         %shift.upgrd.1 = zext i8 %A to i32              ; <i32> [#uses=1]
         %B = shl i32 0, %shift.upgrd.1          ; <i32> [#uses=1]
         ret i32 %B
 }
 
 define i32 @test3(i32 %A) {
+; CHECK: @test3
+; CHECK: ret i32 %A
         %B = ashr i32 %A, 0             ; <i32> [#uses=1]
         ret i32 %B
 }
 
 define i32 @test4(i8 %A) {
+; CHECK: @test4
+; CHECK: ret i32 0
         %shift.upgrd.2 = zext i8 %A to i32              ; <i32> [#uses=1]
         %B = ashr i32 0, %shift.upgrd.2         ; <i32> [#uses=1]
         ret i32 %B
@@ -27,22 +34,31 @@ define i32 @test4(i8 %A) {
 
 
 define i32 @test5(i32 %A) {
+; CHECK: @test5
+; CHECK: ret i32 0
         %B = lshr i32 %A, 32  ;; shift all bits out 
         ret i32 %B
 }
 
 define i32 @test5a(i32 %A) {
+; CHECK: @test5a
+; CHECK: ret i32 0
         %B = shl i32 %A, 32     ;; shift all bits out 
         ret i32 %B
 }
 
 define i32 @test6(i32 %A) {
+; CHECK: @test6
+; CHECK-NEXT: mul i32 %A, 6
+; CHECK-NEXT: ret i32
         %B = shl i32 %A, 1      ;; convert to an mul instruction 
         %C = mul i32 %B, 3             
         ret i32 %C
 }
 
 define i32 @test7(i8 %A) {
+; CHECK: @test7
+; CHECK-NEXT: ret i32 -1
         %shift.upgrd.3 = zext i8 %A to i32 
         %B = ashr i32 -1, %shift.upgrd.3  ;; Always equal to -1
         ret i32 %B
@@ -50,6 +66,8 @@ define i32 @test7(i8 %A) {
 
 ;; (A << 5) << 3 === A << 8 == 0
 define i8 @test8(i8 %A) {
+; CHECK: @test8
+; CHECK: ret i8 0
         %B = shl i8 %A, 5               ; <i8> [#uses=1]
         %C = shl i8 %B, 3               ; <i8> [#uses=1]
         ret i8 %C
@@ -57,6 +75,9 @@ define i8 @test8(i8 %A) {
 
 ;; (A << 7) >> 7 === A & 1
 define i8 @test9(i8 %A) {
+; CHECK: @test9
+; CHECK-NEXT: and i8 %A, 1
+; CHECK-NEXT: ret i8
         %B = shl i8 %A, 7               ; <i8> [#uses=1]
         %C = lshr i8 %B, 7              ; <i8> [#uses=1]
         ret i8 %C
@@ -64,6 +85,9 @@ define i8 @test9(i8 %A) {
 
 ;; (A >> 7) << 7 === A & 128
 define i8 @test10(i8 %A) {
+; CHECK: @test10
+; CHECK-NEXT: and i8 %A, -128
+; CHECK-NEXT: ret i8
         %B = lshr i8 %A, 7              ; <i8> [#uses=1]
         %C = shl i8 %B, 7               ; <i8> [#uses=1]
         ret i8 %C
@@ -71,6 +95,10 @@ define i8 @test10(i8 %A) {
 
 ;; (A >> 3) << 4 === (A & 0x1F) << 1
 define i8 @test11(i8 %A) {
+; CHECK: @test11
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: and i8
+; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
         %B = lshr i8 %a, 3              ; <i8> [#uses=1]
         %C = shl i8 %B, 4               ; <i8> [#uses=1]
@@ -79,6 +107,9 @@ define i8 @test11(i8 %A) {
 
 ;; (A >> 8) << 8 === A & -256
 define i32 @test12(i32 %A) {
+; CHECK: @test12
+; CHECK-NEXT: and i32 %A, -256
+; CHECK-NEXT: ret i32
         %B = ashr i32 %A, 8             ; <i32> [#uses=1]
         %C = shl i32 %B, 8              ; <i32> [#uses=1]
         ret i32 %C
@@ -86,6 +117,10 @@ define i32 @test12(i32 %A) {
 
 ;; (A >> 3) << 4 === (A & -8) * 2
 define i8 @test13(i8 %A) {
+; CHECK: @test13
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: and i8
+; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
         %B = ashr i8 %a, 3              ; <i8> [#uses=1]
         %C = shl i8 %B, 4               ; <i8> [#uses=1]
@@ -94,6 +129,10 @@ define i8 @test13(i8 %A) {
 
 ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
 define i32 @test14(i32 %A) {
+; CHECK: @test14
+; CHECK-NEXT: or i32 %A, 19744
+; CHECK-NEXT: and i32
+; CHECK-NEXT: ret i32
         %B = lshr i32 %A, 4             ; <i32> [#uses=1]
         %C = or i32 %B, 1234            ; <i32> [#uses=1]
         %D = shl i32 %C, 4              ; <i32> [#uses=1]
@@ -102,6 +141,9 @@ define i32 @test14(i32 %A) {
 
 ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
 define i32 @test14a(i32 %A) {
+; CHECK: @test14a
+; CHECK-NEXT: and i32 %A, 77
+; CHECK-NEXT: ret i32
         %B = shl i32 %A, 4              ; <i32> [#uses=1]
         %C = and i32 %B, 1234           ; <i32> [#uses=1]
         %D = lshr i32 %C, 4             ; <i32> [#uses=1]
@@ -109,12 +151,18 @@ define i32 @test14a(i32 %A) {
 }
 
 define i32 @test15(i1 %C) {
+; CHECK: @test15
+; CHECK-NEXT: select i1 %C, i32 12, i32 4
+; CHECK-NEXT: ret i32
         %A = select i1 %C, i32 3, i32 1         ; <i32> [#uses=1]
         %V = shl i32 %A, 2              ; <i32> [#uses=1]
         ret i32 %V
 }
 
 define i32 @test15a(i1 %C) {
+; CHECK: @test15a
+; CHECK-NEXT: select i1 %C, i32 512, i32 128
+; CHECK-NEXT: ret i32
         %A = select i1 %C, i8 3, i8 1           ; <i8> [#uses=1]
         %shift.upgrd.4 = zext i8 %A to i32              ; <i32> [#uses=1]
         %V = shl i32 64, %shift.upgrd.4         ; <i32> [#uses=1]
@@ -122,13 +170,21 @@ define i32 @test15a(i1 %C) {
 }
 
 define i1 @test16(i32 %X) {
-        %tmp.3 = ashr i32 %X, 4         ; <i32> [#uses=1]
-        %tmp.6 = and i32 %tmp.3, 1              ; <i32> [#uses=1]
-        %tmp.7 = icmp ne i32 %tmp.6, 0          ; <i1> [#uses=1]
+; CHECK: @test16
+; CHECK-NEXT: and i32 %X, 16
+; CHECK-NEXT: icmp ne i32
+; CHECK-NEXT: ret i1
+        %tmp.3 = ashr i32 %X, 4 
+        %tmp.6 = and i32 %tmp.3, 1
+        %tmp.7 = icmp ne i32 %tmp.6, 0
         ret i1 %tmp.7
 }
 
 define i1 @test17(i32 %A) {
+; CHECK: @test17
+; CHECK-NEXT: and i32 %A, -8
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
         %B = lshr i32 %A, 3             ; <i32> [#uses=1]
         %C = icmp eq i32 %B, 1234               ; <i1> [#uses=1]
         ret i1 %C
@@ -136,6 +192,9 @@ define i1 @test17(i32 %A) {
 
 
 define i1 @test18(i8 %A) {
+; CHECK: @test18
+; CHECK: ret i1 false
+
         %B = lshr i8 %A, 7              ; <i8> [#uses=1]
         ;; false
         %C = icmp eq i8 %B, 123         ; <i1> [#uses=1]
@@ -143,6 +202,9 @@ define i1 @test18(i8 %A) {
 }
 
 define i1 @test19(i32 %A) {
+; CHECK: @test19
+; CHECK-NEXT: icmp ult i32 %A, 4
+; CHECK-NEXT: ret i1
         %B = ashr i32 %A, 2             ; <i32> [#uses=1]
         ;; (X & -4) == 0
         %C = icmp eq i32 %B, 0          ; <i1> [#uses=1]
@@ -151,6 +213,10 @@ define i1 @test19(i32 %A) {
 
 
 define i1 @test19a(i32 %A) {
+; CHECK: @test19a
+; CHECK-NEXT: and i32 %A, -4
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
         %B = ashr i32 %A, 2             ; <i32> [#uses=1]
         ;; (X & -4) == -4
         %C = icmp eq i32 %B, -1         ; <i1> [#uses=1]
@@ -158,6 +224,8 @@ define i1 @test19a(i32 %A) {
 }
 
 define i1 @test20(i8 %A) {
+; CHECK: @test20
+; CHECK: ret i1 false
         %B = ashr i8 %A, 7              ; <i8> [#uses=1]
         ;; false
         %C = icmp eq i8 %B, 123         ; <i1> [#uses=1]
@@ -165,18 +233,30 @@ define i1 @test20(i8 %A) {
 }
 
 define i1 @test21(i8 %A) {
+; CHECK: @test21
+; CHECK-NEXT: and i8 %A, 15
+; CHECK-NEXT: icmp eq i8
+; CHECK-NEXT: ret i1
         %B = shl i8 %A, 4               ; <i8> [#uses=1]
         %C = icmp eq i8 %B, -128                ; <i1> [#uses=1]
         ret i1 %C
 }
 
 define i1 @test22(i8 %A) {
+; CHECK: @test22
+; CHECK-NEXT: and i8 %A, 15
+; CHECK-NEXT: icmp eq i8
+; CHECK-NEXT: ret i1
         %B = shl i8 %A, 4               ; <i8> [#uses=1]
         %C = icmp eq i8 %B, 0           ; <i1> [#uses=1]
         ret i1 %C
 }
 
 define i8 @test23(i32 %A) {
+; CHECK: @test23
+; CHECK-NEXT: trunc i32 %A to i8
+; CHECK-NEXT: ret i8
+
         ;; casts not needed
         %B = shl i32 %A, 24             ; <i32> [#uses=1]
         %C = ashr i32 %B, 24            ; <i32> [#uses=1]
@@ -185,6 +265,9 @@ define i8 @test23(i32 %A) {
 }
 
 define i8 @test24(i8 %X) {
+; CHECK: @test24
+; CHECK-NEXT: and i8 %X, 3
+; CHECK-NEXT: ret i8
         %Y = and i8 %X, -5              ; <i8> [#uses=1]
         %Z = shl i8 %Y, 5               ; <i8> [#uses=1]
         %Q = ashr i8 %Z, 5              ; <i8> [#uses=1]
@@ -192,6 +275,11 @@ define i8 @test24(i8 %X) {
 }
 
 define i32 @test25(i32 %tmp.2, i32 %AA) {
+; CHECK: @test25
+; CHECK-NEXT: and i32 %tmp.2, -131072
+; CHECK-NEXT: add i32 %{{[^,]*}}, %AA
+; CHECK-NEXT: and i32 %{{[^,]*}}, -131072
+; CHECK-NEXT: ret i32
         %x = lshr i32 %AA, 17           ; <i32> [#uses=1]
         %tmp.3 = lshr i32 %tmp.2, 17            ; <i32> [#uses=1]
         %tmp.5 = add i32 %tmp.3, %x             ; <i32> [#uses=1]
@@ -201,6 +289,9 @@ define i32 @test25(i32 %tmp.2, i32 %AA) {
 
 ;; handle casts between shifts.
 define i32 @test26(i32 %A) {
+; CHECK: @test26
+; CHECK-NEXT: and i32 %A, -2
+; CHECK-NEXT: ret i32
         %B = lshr i32 %A, 1             ; <i32> [#uses=1]
         %C = bitcast i32 %B to i32              ; <i32> [#uses=1]
         %D = shl i32 %C, 1              ; <i32> [#uses=1]
@@ -209,8 +300,46 @@ define i32 @test26(i32 %A) {
 
 
 define i1 @test27(i32 %x) nounwind {
+; CHECK: @test27
+; CHECK-NEXT: and i32 %x, 8
+; CHECK-NEXT: icmp ne i32
+; CHECK-NEXT: ret i1
   %y = lshr i32 %x, 3
   %z = trunc i32 %y to i1
   ret i1 %z
 }
  
+define i8 @test28(i8 %x) {
+entry:
+; CHECK: @test28
+; CHECK:     icmp slt i8 %x, 0
+; CHECK-NEXT:     br i1 
+	%tmp1 = lshr i8 %x, 7
+	%cond1 = icmp ne i8 %tmp1, 0
+	br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+	ret i8 0
+
+bb2:
+	ret i8 1
+}
+
+define i8 @test28a(i8 %x, i8 %y) {
+entry:
+; This shouldn't be transformed.
+; CHECK: @test28a
+; CHECK:     %tmp1 = lshr i8 %x, 7
+; CHECK:     %cond1 = icmp eq i8 %tmp1, 0
+; CHECK:     br i1 %cond1, label %bb2, label %bb1
+	%tmp1 = lshr i8 %x, 7
+	%cond1 = icmp ne i8 %tmp1, 0
+	br i1 %cond1, label %bb1, label %bb2
+bb1:
+	ret i8 %tmp1
+bb2:
+        %tmp2 = add i8 %tmp1, %y
+	ret i8 %tmp2
+}
+
+
diff --git a/test/Transforms/InstCombine/shufflemask-undef.ll b/test/Transforms/InstCombine/shufflemask-undef.ll
index a9e8d3495887..cf87aef7df46 100644
--- a/test/Transforms/InstCombine/shufflemask-undef.ll
+++ b/test/Transforms/InstCombine/shufflemask-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {shufflevector.\*i32 8}
+; RUN: opt < %s -instcombine -S | not grep {shufflevector.\*i32 8}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Transforms/InstCombine/shufflevec-constant.ll b/test/Transforms/InstCombine/shufflevec-constant.ll
index f153a484fa01..2c667903b1eb 100644
--- a/test/Transforms/InstCombine/shufflevec-constant.ll
+++ b/test/Transforms/InstCombine/shufflevec-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep "2 x float"
+; RUN: opt < %s -instcombine -S | grep "2 x float"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Transforms/InstCombine/signed-comparison.ll b/test/Transforms/InstCombine/signed-comparison.ll
index 86e07ec7ce15..9a08c6446c52 100644
--- a/test/Transforms/InstCombine/signed-comparison.ll
+++ b/test/Transforms/InstCombine/signed-comparison.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: not grep zext %t
 ; RUN: not grep slt %t
 ; RUN: grep {icmp ult} %t
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index 716662682da3..008662e04e9d 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep {(and\|xor\|add\|shl\|shr)}
 ; END.
 
diff --git a/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll b/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
new file mode 100644
index 000000000000..6d2193fe448a
--- /dev/null
+++ b/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -disable-output
+
+; SimplifyDemandedBits should cope with pointer types.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.omp_clause_subcode = type { i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
+
+define fastcc void @cse_insn(%struct.rtx_def* %insn, %struct.rtx_def* %libcall_insn) nounwind {
+entry:
+	br i1 undef, label %bb43, label %bb88
+
+bb43:		; preds = %entry
+	br label %bb88
+
+bb88:		; preds = %bb43, %entry
+	br i1 undef, label %bb95, label %bb107
+
+bb95:		; preds = %bb88
+	unreachable
+
+bb107:		; preds = %bb88
+	%0 = load i16* undef, align 8		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 38		; <i1> [#uses=1]
+	%src_eqv_here.0 = select i1 %1, %struct.rtx_def* null, %struct.rtx_def* null		; <%struct.rtx_def*> [#uses=1]
+	br i1 undef, label %bb127, label %bb125
+
+bb125:		; preds = %bb107
+	br i1 undef, label %bb127, label %bb126
+
+bb126:		; preds = %bb125
+	br i1 undef, label %bb129, label %bb133
+
+bb127:		; preds = %bb125, %bb107
+	unreachable
+
+bb129:		; preds = %bb126
+	br label %bb133
+
+bb133:		; preds = %bb129, %bb126
+	br i1 undef, label %bb134, label %bb146
+
+bb134:		; preds = %bb133
+	unreachable
+
+bb146:		; preds = %bb133
+	br i1 undef, label %bb180, label %bb186
+
+bb180:		; preds = %bb146
+	%2 = icmp eq %struct.rtx_def* null, null		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i8		; <i8> [#uses=1]
+	%4 = icmp ne %struct.rtx_def* %src_eqv_here.0, null		; <i1> [#uses=1]
+	%5 = zext i1 %4 to i8		; <i8> [#uses=1]
+	%toBool181 = icmp ne i8 %3, 0		; <i1> [#uses=1]
+	%toBool182 = icmp ne i8 %5, 0		; <i1> [#uses=1]
+	%6 = and i1 %toBool181, %toBool182		; <i1> [#uses=1]
+	%7 = zext i1 %6 to i8		; <i8> [#uses=1]
+	%toBool183 = icmp ne i8 %7, 0		; <i1> [#uses=1]
+	br i1 %toBool183, label %bb184, label %bb186
+
+bb184:		; preds = %bb180
+	br i1 undef, label %bb185, label %bb186
+
+bb185:		; preds = %bb184
+	br label %bb186
+
+bb186:		; preds = %bb185, %bb184, %bb180, %bb146
+	br i1 undef, label %bb190, label %bb195
+
+bb190:		; preds = %bb186
+	unreachable
+
+bb195:		; preds = %bb186
+	unreachable
+}
diff --git a/test/Transforms/InstCombine/sink_instruction.ll b/test/Transforms/InstCombine/sink_instruction.ll
index cb8a090e5a07..e521de208f21 100644
--- a/test/Transforms/InstCombine/sink_instruction.ll
+++ b/test/Transforms/InstCombine/sink_instruction.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   %prcontext div 1 | grep ret
+; RUN: opt -instcombine %s -S | FileCheck %s
 
 ;; This tests that the instructions in the entry blocks are sunk into each
 ;; arm of the 'if'.
 
-define i32 @foo(i1 %C, i32 %A, i32 %B) {
+define i32 @test1(i1 %C, i32 %A, i32 %B) {
+; CHECK: @test1
 entry:
         %tmp.2 = sdiv i32 %A, %B                ; <i32> [#uses=1]
         %tmp.9 = add i32 %B, %A         ; <i32> [#uses=1]
@@ -14,6 +14,43 @@ then:           ; preds = %entry
         ret i32 %tmp.9
 
 endif:          ; preds = %entry
+; CHECK: sdiv i32
+; CHECK-NEXT: ret i32
         ret i32 %tmp.2
 }
 
+
+;; PHI use, sink divide before call.
+define i32 @test2(i32 %x) nounwind ssp {
+; CHECK: @test2
+; CHECK-NOT: sdiv i32
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb2, %entry
+  %x_addr.17 = phi i32 [ %x, %entry ], [ %x_addr.0, %bb2 ] ; <i32> [#uses=4]
+  %i.06 = phi i32 [ 0, %entry ], [ %4, %bb2 ]     ; <i32> [#uses=1]
+  %0 = add nsw i32 %x_addr.17, 1                  ; <i32> [#uses=1]
+  %1 = sdiv i32 %0, %x_addr.17                    ; <i32> [#uses=1]
+  %2 = icmp eq i32 %x_addr.17, 0                  ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+; CHECK: bb1:
+; CHECK-NEXT: add nsw i32 %x_addr.17, 1
+; CHECK-NEXT: sdiv i32
+; CHECK-NEXT: tail call i32 @bar()
+  %3 = tail call i32 @bar() nounwind       ; <i32> [#uses=0]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %bb1
+  %x_addr.0 = phi i32 [ %1, %bb1 ], [ %x_addr.17, %bb ] ; <i32> [#uses=2]
+  %4 = add nsw i32 %i.06, 1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, 1000000             ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb4, label %bb
+
+bb4:                                              ; preds = %bb2
+  ret i32 %x_addr.0
+}
+
+declare i32 @bar()
diff --git a/test/Transforms/InstCombine/sitofp.ll b/test/Transforms/InstCombine/sitofp.ll
index 2bf7385cddff..bd31b89f7879 100644
--- a/test/Transforms/InstCombine/sitofp.ll
+++ b/test/Transforms/InstCombine/sitofp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep itofp
+; RUN: opt < %s -instcombine -S | not grep itofp
 
 define i1 @test1(i8 %A) {
   %B = sitofp i8 %A to double
diff --git a/test/Transforms/InstCombine/srem-simplify-bug.ll b/test/Transforms/InstCombine/srem-simplify-bug.ll
index cdf520256837..af824a445df0 100644
--- a/test/Transforms/InstCombine/srem-simplify-bug.ll
+++ b/test/Transforms/InstCombine/srem-simplify-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 false}
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
 ; PR2276
 
 define i1 @f(i32 %x) {
diff --git a/test/Transforms/InstCombine/srem.ll b/test/Transforms/InstCombine/srem.ll
index 864775ae08a1..beefe4fb8d3f 100644
--- a/test/Transforms/InstCombine/srem.ll
+++ b/test/Transforms/InstCombine/srem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep srem
+; RUN: opt < %s -instcombine -S | grep srem
 
 define i64 @foo(i64 %x1, i64 %y2) {
 	%r = sdiv i64 %x1, %y2
diff --git a/test/Transforms/InstCombine/srem1.ll b/test/Transforms/InstCombine/srem1.ll
index ee59d3ed99f8..f18690c3fff2 100644
--- a/test/Transforms/InstCombine/srem1.ll
+++ b/test/Transforms/InstCombine/srem1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine
+; RUN: opt < %s -instcombine
 ; PR2670
 
 @g_127 = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/InstCombine/stack-overalign.ll b/test/Transforms/InstCombine/stack-overalign.ll
index 45bdc2e0cbe8..88b4114d5885 100644
--- a/test/Transforms/InstCombine/stack-overalign.ll
+++ b/test/Transforms/InstCombine/stack-overalign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {align 32} | count 1
+; RUN: opt < %s -instcombine -S | grep {align 32} | count 1
 
 ; It's tempting to have an instcombine in which the src pointer of a
 ; memcpy is aligned up to the alignment of the destination, however
diff --git a/test/Transforms/InstCombine/stacksaverestore.ll b/test/Transforms/InstCombine/stacksaverestore.ll
index c82a425b2fd0..0fcaefac6224 100644
--- a/test/Transforms/InstCombine/stacksaverestore.ll
+++ b/test/Transforms/InstCombine/stacksaverestore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {call.*stackrestore} | count 1
+; RUN: opt < %s -instcombine -S | grep {call.*stackrestore} | count 1
 
 declare i8* @llvm.stacksave()
 declare void @llvm.stackrestore(i8*)
diff --git a/test/Transforms/InstCombine/store-merge.ll b/test/Transforms/InstCombine/store-merge.ll
index 7b117ca7f5f9..06d497d10dbf 100644
--- a/test/Transforms/InstCombine/store-merge.ll
+++ b/test/Transforms/InstCombine/store-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {ret i32 %.toremerge} | count 2
 ;; Simple sinking tests
 
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 337b2cce08aa..d6f916dc8c7f 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep -v {store.*,.*null} | not grep store
 
 define void @test1(i32* %P) {
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 1ab4eaf1b49d..bd7a700e22ee 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep -v {sub i32 %Cok, %Bok} | grep -v {sub i32 0, %Aok} | not grep sub
 
 define i32 @test1(i32 %A) {
diff --git a/test/Transforms/InstCombine/trunc-mask-ext.ll b/test/Transforms/InstCombine/trunc-mask-ext.ll
index 7e3d8446acff..93e3753cf502 100644
--- a/test/Transforms/InstCombine/trunc-mask-ext.ll
+++ b/test/Transforms/InstCombine/trunc-mask-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: opt < %s -instcombine -S > %t
 ; RUN: not grep zext %t
 ; RUN: not grep sext %t
 
diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-0.ll b/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
index 5bcaa66366c7..bfdd98cddfaf 100644
--- a/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
+++ b/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i64 0} | count 2
+; RUN: opt < %s -instcombine -S | grep {ret i64 0} | count 2
 
 define i64 @foo(i32 %x) nounwind {
   %y = lshr i32 %x, 1
diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
index 0036760ba50e..d95e8f835908 100644
--- a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
+++ b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t1.ll
+; RUN: opt < %s -instcombine -S > %t1.ll
 ; RUN: grep udiv %t1.ll | count 2
 ; RUN: grep zext %t1.ll | count 2
 ; PR2274
diff --git a/test/Transforms/InstCombine/udiv_select_to_select_shift.ll b/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
index 614ae3dc975c..9b059a6cc94d 100644
--- a/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
+++ b/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
@@ -1,7 +1,7 @@
 ; Test that this transform works:
 ; udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis -f -o %t
+; RUN: opt < %s -instcombine -S -o %t
 ; RUN:   not grep select %t
 ; RUN:   grep lshr %t | count 2
 ; RUN:   not grep udiv %t
diff --git a/test/Transforms/InstCombine/udivrem-change-width.ll b/test/Transforms/InstCombine/udivrem-change-width.ll
new file mode 100644
index 000000000000..56877e30f9cf
--- /dev/null
+++ b/test/Transforms/InstCombine/udivrem-change-width.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | not grep zext
+; PR4548
+
+define i8 @udiv_i8(i8 %a, i8 %b) nounwind {
+  %conv = zext i8 %a to i32       
+  %conv2 = zext i8 %b to i32      
+  %div = udiv i32 %conv, %conv2   
+  %conv3 = trunc i32 %div to i8   
+  ret i8 %conv3
+}
+
+define i8 @urem_i8(i8 %a, i8 %b) nounwind {
+  %conv = zext i8 %a to i32       
+  %conv2 = zext i8 %b to i32      
+  %div = urem i32 %conv, %conv2   
+  %conv3 = trunc i32 %div to i8   
+  ret i8 %conv3
+}
+
diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll
index 15956f26b69a..7c2b4b01ca66 100644
--- a/test/Transforms/InstCombine/urem-simplify-bug.ll
+++ b/test/Transforms/InstCombine/urem-simplify-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {= or i32 %x, -5	}
+; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5 }
 
 @.str = internal constant [5 x i8] c"foo\0A\00"		; <[5 x i8]*> [#uses=1]
 @.str1 = internal constant [5 x i8] c"bar\0A\00"		; <[5 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/urem.ll b/test/Transforms/InstCombine/urem.ll
index 24e7463cbaf8..51084224a734 100644
--- a/test/Transforms/InstCombine/urem.ll
+++ b/test/Transforms/InstCombine/urem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep urem
+; RUN: opt < %s -instcombine -S | grep urem
 
 define i64 @rem_unsigned(i64 %x1, i64 %y2) {
 	%r = udiv i64 %x1, %y2
diff --git a/test/Transforms/InstCombine/vec_demanded_elts-2.ll b/test/Transforms/InstCombine/vec_demanded_elts-2.ll
index dac03152fea1..41593612e55f 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts-2.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep extractelement
+; RUN: opt < %s -instcombine -S | not grep extractelement
 
 define void @get_image() nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/vec_demanded_elts-3.ll b/test/Transforms/InstCombine/vec_demanded_elts-3.ll
index eba3629eb9e7..62e43701d24e 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts-3.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep load
+; RUN: opt < %s -instcombine -S | not grep load
 ; PR4340
 
 define void @vac(<4 x float>* nocapture %a) nounwind {
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 95df8c63f6d4..2009a776b151 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {fadd float}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {fmul float}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep {insertelement.*0.00}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep {call.*llvm.x86.sse.mul}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep {call.*llvm.x86.sse.sub}
 ; END.
 
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 30b2f1d042b4..63e4ee2112d8 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep extractelement
+; RUN: opt < %s -instcombine -S | not grep extractelement
 
 define i32 @test(float %f) {
         %tmp7 = insertelement <4 x float> undef, float %f, i32 0                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll
index 9be154b98715..eedf882518bf 100644
--- a/test/Transforms/InstCombine/vec_insertelt.ll
+++ b/test/Transforms/InstCombine/vec_insertelt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret <4 x i32> %A}
+; RUN: opt < %s -instcombine -S | grep {ret <4 x i32> %A}
 
 ; PR1286
 define <4 x i32> @test1(<4 x i32> %A) {
diff --git a/test/Transforms/InstCombine/vec_narrow.ll b/test/Transforms/InstCombine/vec_narrow.ll
index e444c2a65103..daf7bcf297c1 100644
--- a/test/Transforms/InstCombine/vec_narrow.ll
+++ b/test/Transforms/InstCombine/vec_narrow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {add float}
 
         %V = type <4 x float>
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index aaaee3f84315..29adc1e20890 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1,20 +1,25 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep vector_shuffle
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 %T = type <4 x float>
 
 
 define %T @test1(%T %v1) {
+; CHECK: @test1
+; CHECK: ret %T %v1
   %v2 = shufflevector %T %v1, %T undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret %T %v2
 }
 
 define %T @test2(%T %v1) {
+; CHECK: @test2
+; CHECK: ret %T %v1
   %v2 = shufflevector %T %v1, %T %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret %T %v2
 }
 
 define float @test3(%T %A, %T %B, float %f) {
+; CHECK: @test3
+; CHECK: ret float %f
         %C = insertelement %T %A, float %f, i32 0
         %D = shufflevector %T %C, %T %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
         %E = extractelement %T %D, i32 1
@@ -22,18 +27,27 @@ define float @test3(%T %A, %T %B, float %f) {
 }
 
 define i32 @test4(<4 x i32> %X) {
+; CHECK: @test4
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: ret 
         %tmp152.i53899.i = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> zeroinitializer
         %tmp34 = extractelement <4 x i32> %tmp152.i53899.i, i32 0
         ret i32 %tmp34
 }
 
 define i32 @test5(<4 x i32> %X) {
+; CHECK: @test5
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: ret 
         %tmp152.i53899.i = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
         %tmp34 = extractelement <4 x i32> %tmp152.i53899.i, i32 0
         ret i32 %tmp34
 }
 
 define float @test6(<4 x float> %X) {
+; CHECK: @test6
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: ret 
         %X1 = bitcast <4 x float> %X to <4 x i32>
         %tmp152.i53899.i = shufflevector <4 x i32> %X1, <4 x i32> undef, <4 x i32> zeroinitializer
         %tmp152.i53900.i = bitcast <4 x i32> %tmp152.i53899.i to <4 x float>
@@ -42,6 +56,34 @@ define float @test6(<4 x float> %X) {
 }
 
 define <4 x float> @test7(<4 x float> %tmp45.i) {
+; CHECK: @test7
+; CHECK-NEXT: ret %T %tmp45.i
         %tmp1642.i = shufflevector <4 x float> %tmp45.i, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
         ret <4 x float> %tmp1642.i
 }
+
+; This should turn into a single shuffle.
+define <4 x float> @test8(<4 x float> %tmp, <4 x float> %tmp1) {
+; CHECK: @test8
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+        %tmp4 = extractelement <4 x float> %tmp, i32 1
+        %tmp2 = extractelement <4 x float> %tmp, i32 3
+        %tmp1.upgrd.1 = extractelement <4 x float> %tmp1, i32 0
+        %tmp128 = insertelement <4 x float> undef, float %tmp4, i32 0
+        %tmp130 = insertelement <4 x float> %tmp128, float undef, i32 1
+        %tmp132 = insertelement <4 x float> %tmp130, float %tmp2, i32 2 
+        %tmp134 = insertelement <4 x float> %tmp132, float %tmp1.upgrd.1, i32 3
+        ret <4 x float> %tmp134
+}
+
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+define <4 x i8> @test9(<16 x i8> %tmp6) nounwind {
+; CHECK: @test9
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >		; <<4 x i8>> [#uses=1]
+	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
+	ret <4 x i8> %tmp9
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
new file mode 100644
index 000000000000..470d48547532
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This turns into a&1 != 0
+define <2 x i1> @test1(<2 x i64> %a) {
+  %t = trunc <2 x i64> %a to <2 x i1>
+  ret <2 x i1> %t
+
+; CHECK: @test1
+; CHECK:   and <2 x i64> %a, <i64 1, i64 1>
+; CHECK:   icmp ne <2 x i64> %tmp, zeroinitializer
+}
+
+; The ashr turns into an lshr.
+define <2 x i64> @test2(<2 x i64> %a) {
+  %b = and <2 x i64> %a, <i64 65535, i64 65535>
+  %t = ashr <2 x i64> %b, <i64 1, i64 1>
+  ret <2 x i64> %t
+
+; CHECK: @test2
+; CHECK:   and <2 x i64> %a, <i64 65535, i64 65535>
+; CHECK:   lshr <2 x i64> %b, <i64 1, i64 1>
+}
+
+
+
+define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) nounwind readnone {
+entry:
+	%cmp = fcmp ord <4 x float> %a, zeroinitializer	
+	%sext = sext <4 x i1> %cmp to <4 x i32>	
+	%cmp4 = fcmp ord <4 x float> %b, zeroinitializer
+	%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+	%and = and <4 x i32> %sext, %sext5
+	%conv = bitcast <4 x i32> %and to <2 x i64>
+	ret <2 x i64> %conv
+        
+; CHECK: @test3
+; CHECK:   fcmp ord <4 x float> %a, %b
+}
+
+define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) nounwind readnone {
+entry:
+	%cmp = fcmp uno <4 x float> %a, zeroinitializer
+	%sext = sext <4 x i1> %cmp to <4 x i32>
+	%cmp4 = fcmp uno <4 x float> %b, zeroinitializer
+	%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+	%or = or <4 x i32> %sext, %sext5
+	%conv = bitcast <4 x i32> %or to <2 x i64>
+	ret <2 x i64> %conv
+; CHECK: @test4
+; CHECK:   fcmp uno <4 x float> %a, %b
+}
+
+
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+entry:
+  %val = trunc <2 x i64> %src to <2 x i32>
+  %add = add <2 x i32> %val, <i32 1, i32 1>
+  store <2 x i32> %add, <2 x i32>* %dst.addr
+  ret void
+}
+
+define <2 x i65> @foo(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+define <2 x i64> @bar(<2 x i65> %t) {
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i65> @foos(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+define <2 x i64> @bars(<2 x i65> %t) {
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i64> @quxs(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i64> @quxt(<2 x i64> %t) {
+  %a = shl <2 x i64> %t, <i64 32, i64 32>
+  %b = ashr <2 x i64> %a, <i64 32, i64 32>
+  ret <2 x i64> %b
+}
+define <2 x double> @fa(<2 x double> %t) {
+  %a = fptrunc <2 x double> %t to <2 x float>
+  %b = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %b
+}
+define <2 x double> @fb(<2 x double> %t) {
+  %a = fptoui <2 x double> %t to <2 x i64>
+  %b = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+define <2 x double> @fc(<2 x double> %t) {
+  %a = fptosi <2 x double> %t to <2 x i64>
+  %b = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
diff --git a/test/Transforms/InstCombine/vector-srem.ll b/test/Transforms/InstCombine/vector-srem.ll
index e8766ebc1e07..acb11c52adb3 100644
--- a/test/Transforms/InstCombine/vector-srem.ll
+++ b/test/Transforms/InstCombine/vector-srem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {srem <4 x i32>}
+; RUN: opt < %s -instcombine -S | grep {srem <4 x i32>}
 
 define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u)
 {
diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll
index 09651ba302d5..5316bd772e13 100644
--- a/test/Transforms/InstCombine/volatile_store.ll
+++ b/test/Transforms/InstCombine/volatile_store.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {volatile store}
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {volatile load}
+; RUN: opt < %s -instcombine -S | grep {volatile store}
+; RUN: opt < %s -instcombine -S | grep {volatile load}
 
 @x = weak global i32 0		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/InstCombine/xor-demorgans.ll b/test/Transforms/InstCombine/xor-demorgans.ll
index c8de6dbdf21b..3383845fb361 100644
--- a/test/Transforms/InstCombine/xor-demorgans.ll
+++ b/test/Transforms/InstCombine/xor-demorgans.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {= or}
+; RUN: opt < %s -instcombine -S | not grep {= or}
 ; PR3266
 ; XFAIL: *
 
diff --git a/test/Transforms/InstCombine/xor-undef.ll b/test/Transforms/InstCombine/xor-undef.ll
index c090223b5371..cf72955b66b3 100644
--- a/test/Transforms/InstCombine/xor-undef.ll
+++ b/test/Transforms/InstCombine/xor-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep zeroinitializer
+; RUN: opt < %s -instcombine -S | grep zeroinitializer
 
 define <2 x i64> @f() {
 	%tmp = xor <2 x i64> undef, undef
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index d8b84a6122dd..a7bcdac08bd8 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -1,6 +1,6 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:    not grep {xor }
 ; END.
 @G1 = global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index efb3146c68bf..23a991581376 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -1,17 +1,43 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {xor }
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; PR1253
 define i1 @test0(i32 %A) {
+; CHECK: @test0
+; CHECK: %C = icmp slt i32 %A, 0
 	%B = xor i32 %A, -2147483648
 	%C = icmp sgt i32 %B, -1
 	ret i1 %C
 }
 
 define i1 @test1(i32 %A) {
+; CHECK: @test1
+; CHECK: %C = icmp slt i32 %A, 0
 	%B = xor i32 %A, 12345
 	%C = icmp slt i32 %B, 0
 	ret i1 %C
 }
 
+; PR1014
+define i32 @test2(i32 %tmp1) {
+; CHECK:      @test2
+; CHECK-NEXT:   or i32 %tmp1, 8 
+; CHECK-NEXT:   and i32
+; CHECK-NEXT:   ret i32
+        %ovm = and i32 %tmp1, 32
+        %ov3 = add i32 %ovm, 145
+        %ov110 = xor i32 %ov3, 153
+        ret i32 %ov110
+}
+
+define i32 @test3(i32 %tmp1) {
+; CHECK:      @test3
+; CHECK-NEXT:   or i32 %tmp1, 8 
+; CHECK-NEXT:   and i32
+; CHECK-NEXT:   ret i32
+  %ovm = or i32 %tmp1, 145 
+  %ov31 = and i32 %ovm, 177
+  %ov110 = xor i32 %ov31, 153
+  ret i32 %ov110
+}
diff --git a/test/Transforms/InstCombine/zero-point-zero-add.ll b/test/Transforms/InstCombine/zero-point-zero-add.ll
index adb28e4d5c7f..d07a9f4b9de0 100644
--- a/test/Transforms/InstCombine/zero-point-zero-add.ll
+++ b/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep 0.0 | count 1
+; RUN: opt < %s -instcombine -S | grep 0.0 | count 1
 
 declare double @abs(double)
 
diff --git a/test/Transforms/InstCombine/zeroext-and-reduce.ll b/test/Transforms/InstCombine/zeroext-and-reduce.ll
index 2b4950aec978..592b8a172f8e 100644
--- a/test/Transforms/InstCombine/zeroext-and-reduce.ll
+++ b/test/Transforms/InstCombine/zeroext-and-reduce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
+; RUN: opt < %s -instcombine -S | \
 ; RUN:   grep {and i32 %Y, 8}
 
 define i32 @test1(i8 %X) {
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
new file mode 100644
index 000000000000..11642733acc0
--- /dev/null
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | not grep zext
+
+define i32 @a(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = add i32 %y, 1
+        ret i32 %res
+}
+
+define i32 @b(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = add i32 %y, -1
+        ret i32 %res
+}
+
+define i32 @c(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = sub i32 0, %y
+        ret i32 %res
+}
+
+define i32 @d(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = sub i32 3, %y
+        ret i32 %res
+}
diff --git a/test/Transforms/InstCombine/zext-fold.ll b/test/Transforms/InstCombine/zext-fold.ll
index 27ea46061b07..9521101e736a 100644
--- a/test/Transforms/InstCombine/zext-fold.ll
+++ b/test/Transforms/InstCombine/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {zext } | count 1
+; RUN: opt < %s -instcombine -S | grep {zext } | count 1
 ; PR1570
 
 define i32 @test2(float %X, float %Y) {
diff --git a/test/Transforms/InstCombine/zext-or-icmp.ll b/test/Transforms/InstCombine/zext-or-icmp.ll
index 35c7c0a6be6e..969c30157031 100644
--- a/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep icmp | count 1
+; RUN: opt < %s -instcombine -S | grep icmp | count 1
 
 	%struct.FooBar = type <{ i8, i8, [2 x i8], i8, i8, i8, i8, i16, i16, [4 x i8], [8 x %struct.Rock] }>
 	%struct.Rock = type { i16, i16 }
diff --git a/test/Transforms/InstCombine/zext.ll b/test/Transforms/InstCombine/zext.ll
index c0fa3771cc15..10eabf7aed46 100644
--- a/test/Transforms/InstCombine/zext.ll
+++ b/test/Transforms/InstCombine/zext.ll
@@ -1,33 +1,11 @@
 ; Tests to make sure elimination of casts is working correctly
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   notcast {} {%c1.*}
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i64 @test_sext_zext(i16 %A) {
         %c1 = zext i16 %A to i32                ; <i32> [#uses=1]
         %c2 = sext i32 %c1 to i64               ; <i64> [#uses=1]
         ret i64 %c2
+; CHECK-NOT: %c1
+; CHECK: %c2 = zext i16 %A to i64
+; CHECK: ret i64 %c2
 }
-
-; PR3599
-define i32 @test2(i64 %tmp) nounwind readnone {
-entry:
-	%tmp5 = trunc i64 %tmp to i8		; <i8> [#uses=1]
-	%tmp7 = lshr i64 %tmp, 8		; <i64> [#uses=1]
-	%tmp8 = trunc i64 %tmp7 to i8		; <i8> [#uses=1]
-	%tmp10 = lshr i64 %tmp, 16		; <i64> [#uses=1]
-	%tmp11 = trunc i64 %tmp10 to i8		; <i8> [#uses=1]
-	%tmp13 = lshr i64 %tmp, 24		; <i64> [#uses=1]
-	%tmp14 = trunc i64 %tmp13 to i8		; <i8> [#uses=1]
-	%tmp1 = zext i8 %tmp5 to i32		; <i32> [#uses=1]
-	%tmp2 = zext i8 %tmp8 to i32		; <i32> [#uses=1]
-	%tmp3 = shl i32 %tmp2, 8		; <i32> [#uses=1]
-	%tmp4 = zext i8 %tmp11 to i32		; <i32> [#uses=1]
-	%tmp6 = shl i32 %tmp4, 16		; <i32> [#uses=1]
-	%tmp9 = zext i8 %tmp14 to i32		; <i32> [#uses=1]
-	%tmp12 = shl i32 %tmp9, 24		; <i32> [#uses=1]
-	%tmp15 = or i32 %tmp12, %tmp1		; <i32> [#uses=1]
-	%tmp16 = or i32 %tmp15, %tmp6		; <i32> [#uses=1]
-	%tmp17 = or i32 %tmp16, %tmp3		; <i32> [#uses=1]
-	ret i32 %tmp17
-}
-
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
index 6ba5cb985ab9..a85e834582d7 100644
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll
+++ b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
@@ -1,12 +1,12 @@
 ; No arguments means internalize all but main
-; RUN: llvm-as < %s | opt -internalize | llvm-dis | grep internal | count 4
+; RUN: opt < %s -internalize -S | grep internal | count 4
 ; Internalize all but foo and j
-; RUN: llvm-as < %s | opt -internalize -internalize-public-api-list foo -internalize-public-api-list j | llvm-dis | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | grep internal | count 3
 ; Non existent files should be treated as if they were empty (so internalize all but main)
-; RUN: llvm-as < %s | opt -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null | llvm-dis | grep internal | count 4
-; RUN: llvm-as < %s | opt -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null | llvm-dis | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 4
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 3
 ; -file and -list options should be merged, the .apifile contains foo and j
-; RUN: llvm-as < %s | opt -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile | llvm-dis | grep internal | count 2
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | grep internal | count 2
 
 @i = weak global i32 0          ; <i32*> [#uses=0]
 @j = weak global i32 0          ; <i32*> [#uses=0]
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index c0041ac2d7df..7b18a04e1160 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -internalize | llvm-dis | grep internal | count 3
+; RUN: opt < %s -internalize -S | grep internal | count 3
 
 @A = global i32 0
 @B = alias i32* @A
diff --git a/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll b/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
index 216dacb8d268..b5d1065e6793 100644
--- a/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
+++ b/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s  | opt -jump-threading -simplifycfg | llvm-dis | grep {ret i32 0}
+; RUN: opt < %s -jump-threading -S | grep {ret i32 0}
 ; PR3138
 
 define i32 @jt() {
diff --git a/test/Transforms/JumpThreading/2008-11-28-InfLoop.ll b/test/Transforms/JumpThreading/2008-11-28-InfLoop.ll
index 974b9ef77478..306b2d5435fc 100644
--- a/test/Transforms/JumpThreading/2008-11-28-InfLoop.ll
+++ b/test/Transforms/JumpThreading/2008-11-28-InfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading | llvm-dis
+; RUN: opt < %s -jump-threading -S
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
diff --git a/test/Transforms/JumpThreading/2009-01-08-DeadLoopRepl.ll b/test/Transforms/JumpThreading/2009-01-08-DeadLoopRepl.ll
index 6de67ebbcecc..208c8229875e 100644
--- a/test/Transforms/JumpThreading/2009-01-08-DeadLoopRepl.ll
+++ b/test/Transforms/JumpThreading/2009-01-08-DeadLoopRepl.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading | llvm-dis
+; RUN: opt < %s -jump-threading | llvm-dis
 ; PR3298
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/JumpThreading/2009-01-19-InfSwitchLoop.ll b/test/Transforms/JumpThreading/2009-01-19-InfSwitchLoop.ll
index ab8b087f8a4a..05a3ef9017db 100644
--- a/test/Transforms/JumpThreading/2009-01-19-InfSwitchLoop.ll
+++ b/test/Transforms/JumpThreading/2009-01-19-InfSwitchLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading | llvm-dis
+; RUN: opt < %s -jump-threading -S
 ; PR3353
 
 define i32 @test(i8 %X) {
diff --git a/test/Transforms/JumpThreading/and-and-cond.ll b/test/Transforms/JumpThreading/and-and-cond.ll
index de4435c50794..e6db9ee5a325 100644
--- a/test/Transforms/JumpThreading/and-and-cond.ll
+++ b/test/Transforms/JumpThreading/and-and-cond.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -jump-threading -mem2reg -instcombine -simplifycfg  | llvm-dis | grep {ret i32 %v1}
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | grep {ret i32 %v1}
 ; There should be no uncond branches left.
-; RUN: llvm-as < %s | opt -jump-threading -mem2reg -instcombine -simplifycfg  | llvm-dis | not grep {br label}
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | not grep {br label}
 
 declare i32 @f1()
 declare i32 @f2()
diff --git a/test/Transforms/JumpThreading/and-cond.ll b/test/Transforms/JumpThreading/and-cond.ll
index b01c4baffc0e..58dbec72a76e 100644
--- a/test/Transforms/JumpThreading/and-cond.ll
+++ b/test/Transforms/JumpThreading/and-cond.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -jump-threading -mem2reg -instcombine -simplifycfg  | llvm-dis | grep {ret i32 %v1}
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | grep {ret i32 %v1}
 ; There should be no uncond branches left.
-; RUN: llvm-as < %s | opt -jump-threading -mem2reg -instcombine -simplifycfg  | llvm-dis | not grep {br label}
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | not grep {br label}
 
 declare i32 @f1()
 declare i32 @f2()
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index bf746e7f139c..3d936b8f30fc 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | opt -jump-threading -simplifycfg -mem2reg | llvm-dis | grep {ret i32 %v1}
-; There should be no uncond branches left.
-; RUN: llvm-as < %s | opt -jump-threading -simplifycfg -mem2reg | llvm-dis | not grep {br label}
+; RUN: opt < %s -jump-threading -S | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
 declare void @f3()
 
-define i32 @test(i1 %cond) {
+define i32 @test1(i1 %cond) {
+; CHECK: @test1
+
 	br i1 %cond, label %T1, label %F1
 
 T1:
@@ -23,20 +23,27 @@ Merge:
 	br i1 %A, label %T2, label %F2
 
 T2:
+; CHECK: T2:
+; CHECK: ret i32 %v1
 	call void @f3()
 	ret i32 %B
 
 F2:
+; CHECK: F2:
+; CHECK: ret i32 %v2
 	ret i32 %B
 }
 
 
 ;; cond is known false on Entry -> F1 edge!
 define i32 @test2(i1 %cond) {
+; CHECK: @test2
 Entry:
 	br i1 %cond, label %T1, label %F1
 
 T1:
+; CHECK: %v1 = call i32 @f1()
+; CHECK: ret i32 47
 	%v1 = call i32 @f1()
 	br label %Merge
 
@@ -51,3 +58,117 @@ F2:
 	call void @f3()
 	ret i32 12
 }
+
+
+; Undef handling.
+define i32 @test3(i1 %cond) {
+; CHECK: @test3
+; CHECK-NEXT: T1:
+; CHECK-NEXT: ret i32 42
+	br i1 undef, label %T1, label %F1
+
+T1:
+	ret i32 42
+
+F1:
+	ret i32 17
+}
+
+define i32 @test4(i1 %cond, i1 %cond2) {
+; CHECK: @test4
+
+	br i1 %cond, label %T1, label %F1
+
+T1:
+; CHECK:   %v1 = call i32 @f1()
+; CHECK-NEXT:   br label %T
+
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+; CHECK:   %v2 = call i32 @f2()
+; CHECK-NEXT:   br i1 %cond2, 
+	br label %Merge
+
+Merge:
+	%A = phi i1 [undef, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+}
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+define i32 @test5(i1 %cond, i1 %cond2) {
+; CHECK: @test5
+
+	br i1 %cond, label %T1, label %F1
+
+T1:
+; CHECK: T1:
+; CHECK-NEXT:   %v1 = call i32 @f1()
+; CHECK-NEXT:   %cond3 = icmp eq i32 %v1, 412
+; CHECK-NEXT:   br i1 %cond3, label %T2, label %F2
+
+	%v1 = call i32 @f1()
+        %cond3 = icmp eq i32 %v1, 412
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%A = phi i1 [%cond3, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+}
+
+
+;; Lexically duplicated conditionals should be threaded.
+
+
+define i32 @test6(i32 %A) {
+; CHECK: @test6
+	%tmp455 = icmp eq i32 %A, 42
+	br i1 %tmp455, label %BB1, label %BB2
+        
+BB2:
+; CHECK: call i32 @f1()
+; CHECK-NEXT: call void @f3()
+; CHECK-NEXT: ret i32 4
+	call i32 @f1()
+	br label %BB1
+        
+
+BB1:
+	%tmp459 = icmp eq i32 %A, 42
+	br i1 %tmp459, label %BB3, label %BB4
+
+BB3:
+	call i32 @f2()
+        ret i32 3
+
+BB4:
+	call void @f3()
+	ret i32 4
+}
+
+
+
+
diff --git a/test/Transforms/JumpThreading/branch-no-const.ll b/test/Transforms/JumpThreading/branch-no-const.ll
index 0ea2431ca099..16867b07c2e4 100644
--- a/test/Transforms/JumpThreading/branch-no-const.ll
+++ b/test/Transforms/JumpThreading/branch-no-const.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading | llvm-dis | not grep phi
+; RUN: opt < %s -jump-threading -S | not grep phi
 
 declare i8 @mcguffin()
 
diff --git a/test/Transforms/JumpThreading/compare.ll b/test/Transforms/JumpThreading/compare.ll
index cbada569aac9..581785c45f51 100644
--- a/test/Transforms/JumpThreading/compare.ll
+++ b/test/Transforms/JumpThreading/compare.ll
@@ -1,5 +1,5 @@
 ; There should be no phi nodes left.
-; RUN: llvm-as < %s | opt -jump-threading -simplifycfg -mem2reg | llvm-dis | not grep {phi i32}
+; RUN: opt < %s -jump-threading  -S | not grep {phi i32}
 
 declare i32 @f1()
 declare i32 @f2()
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
new file mode 100644
index 000000000000..0b6cd27110e4
--- /dev/null
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -jump-threading -disable-output
+; PR2285
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.system__secondary_stack__mark_id = type { i64, i64 }
+
+define void @_ada_c35507b() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb13, %entry
+	%ch.0 = phi i8 [ 0, %entry ], [ 0, %bb13 ]		; <i8> [#uses=1]
+	%tmp11 = icmp ugt i8 %ch.0, 31		; <i1> [#uses=1]
+	%tmp120 = call %struct.system__secondary_stack__mark_id @system__secondary_stack__ss_mark( )		; <%struct.system__secondary_stack__mark_id> [#uses=1]
+	br i1 %tmp11, label %bb110, label %bb13
+
+bb13:		; preds = %bb
+	br label %bb
+
+bb110:		; preds = %bb
+	%mrv_gr124 = getresult %struct.system__secondary_stack__mark_id %tmp120, 1		; <i64> [#uses=0]
+	unreachable
+}
+
+declare %struct.system__secondary_stack__mark_id @system__secondary_stack__ss_mark()
+
+
+
+define fastcc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind ssp {
+entry:
+  br label %bb12
+
+bb6.us:                                        
+  %tmp = icmp eq i32 undef, undef              
+  %tmp1 = fsub double undef, undef             
+  %tmp2 = fcmp ult double %tmp1, 0.000000e+00  
+  br i1 %tmp, label %bb6.us, label %bb13
+
+
+bb12:                                            
+  %tmp3 = fcmp ult double undef, 0.000000e+00  
+  br label %bb13
+
+bb13:                                            
+  %.lcssa31 = phi double [ undef, %bb12 ], [ %tmp1, %bb6.us ]
+  %.lcssa30 = phi i1 [ %tmp3, %bb12 ], [ %tmp2, %bb6.us ] 
+  br i1 %.lcssa30, label %bb15, label %bb61
+
+bb15:                                            
+  %tmp4 = fsub double -0.000000e+00, %.lcssa31   
+  ret void
+
+
+bb61:                                            
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll
index 0c729d1a9b4d..b4d44187f5e9 100644
--- a/test/Transforms/JumpThreading/no-irreducible-loops.ll
+++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg | llvm-dis > %t
+; RUN: opt < %s -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg -S > %t
 ; RUN: grep {volatile store} %t | count 3
 ; RUN: not grep {br label} %t
 
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index 5c0b256224a9..96ba701046d8 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading -mem2reg -simplifycfg | llvm-dis | grep {ret i32 1}
+; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1}
 ; rdar://6402033
 
 ; Test that we can thread through the block with the partially redundant load (%2).
diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index aa24014f3b99..7545641f1aee 100644
--- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | opt -lcssa | llvm-dis | \
-; RUN:   grep {%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
-; RUN: llvm-as < %s | opt -lcssa | llvm-dis | \
-; RUN:   grep {%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry}
+; RUN: opt < %s -lcssa -S | \
+; RUN:   grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
+; RUN: opt < %s -lcssa -S | \
+; RUN:   grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry}
 
         %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
 
diff --git a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
index eaccbadbf1d7..ad4f1447b292 100644
--- a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
+++ b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -lcssa | llvm-dis | \
+; RUN: opt < %s -lcssa -S | \
 ; RUN:    grep {%X.1.lcssa}
-; RUN: llvm-as < %s | opt -lcssa | llvm-dis | \
+; RUN: opt < %s -lcssa -S | \
 ; RUN:    not grep {%X.1.lcssa1}
 
 declare i1 @c1()
diff --git a/test/Transforms/LCSSA/2006-07-09-NoDominator.ll b/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
index c8af10058c08..b03f09ac20e5 100644
--- a/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
+++ b/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lcssa
+; RUN: opt < %s -lcssa
 
 	%struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
 
diff --git a/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll b/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
index bececb187641..3ba8d18b00cf 100644
--- a/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
+++ b/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lcssa -disable-output -verify-dom-info
+; RUN: opt < %s -lcssa -disable-output -verify-dom-info -verify-loop-info
 ; PR977
 ; END.
 declare i32 @opost_block()
diff --git a/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll b/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
index b02feb4103d2..ecb1be5c674e 100644
--- a/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
+++ b/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lcssa -disable-output
+; RUN: opt < %s -lcssa -disable-output
 ; PR977
 ; END.
 
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM-2.ll b/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
index e8dc39135295..2c5815cef33a 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -licm -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
 	br label %bb7
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM-3.ll b/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
index 72cebed5d9f6..7e0d3c63b13c 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -licm -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM.ll b/test/Transforms/LCSSA/2007-07-12-LICM.ll
index 0c433c3ff374..8c07aa2ec352 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -licm -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
 	br label %bb7
diff --git a/test/Transforms/LCSSA/basictest.ll b/test/Transforms/LCSSA/basictest.ll
index 090bde95d971..23ab2c0ce056 100644
--- a/test/Transforms/LCSSA/basictest.ll
+++ b/test/Transforms/LCSSA/basictest.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -lcssa | llvm-dis | \
+; RUN: opt < %s -lcssa -S | \
 ; RUN:   grep {X3.lcssa = phi i32}
-; RUN: llvm-as < %s | opt -lcssa | llvm-dis | \
+; RUN: opt < %s -lcssa -S | \
 ; RUN:   grep {X4 = add i32 3, %X3.lcssa}
 
 define void @lcssa(i1 %S2) {
diff --git a/test/Transforms/LCSSA/invoke-dest.ll b/test/Transforms/LCSSA/invoke-dest.ll
index 5c6c7a0a9b52..454715089c3b 100644
--- a/test/Transforms/LCSSA/invoke-dest.ll
+++ b/test/Transforms/LCSSA/invoke-dest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lcssa
+; RUN: opt < %s -lcssa
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 
diff --git a/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll b/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll
index cf0520617938..ff20312ec372 100644
--- a/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll
+++ b/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -basicaa -licm -disable-output
+; RUN: opt < %s -basicaa -licm -disable-output
 
 ;%MoveArray = external global [64 x ulong]
 
diff --git a/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll b/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
index bf209eaa479f..4782bd17f893 100644
--- a/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
+++ b/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
@@ -1,6 +1,6 @@
 ; Exit blocks need to be updated for all nested loops...
 
-; RUN: llvm-as < %s | opt -loopsimplify
+; RUN: opt < %s -loopsimplify
 
 define i32 @yyparse() {
 bb0:
diff --git a/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll b/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll
index 4b51a3be4b06..2718cb105377 100644
--- a/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll
+++ b/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll
@@ -1,7 +1,7 @@
 ; This testcase fails because preheader insertion is not updating exit node 
 ; information for loops.
 
-; RUN: llvm-as < %s | opt -licm
+; RUN: opt < %s -licm
 
 define i32 @main(i32 %argc, i8** %argv) {
 bb0:
diff --git a/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll b/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
index 18b86064ed04..70a04c73b1d3 100644
--- a/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
+++ b/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
@@ -3,7 +3,7 @@
 ; happens because preheader insertion doesn't insert a preheader for this
 ; case... bad.
 
-; RUN: llvm-as < %s | opt -licm -loop-deletion -simplifycfg | llvm-dis | \
+; RUN: opt < %s -licm -loop-deletion -simplifycfg -S | \
 ; RUN:   not grep {br }
 
 define i32 @main(i32 %argc) {
diff --git a/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll b/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll
index 7ce164ec237f..a9c6b856f8ea 100644
--- a/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll
+++ b/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll
@@ -1,6 +1,6 @@
 ; LICM is adding stores before phi nodes.  bad.
 
-; RUN: llvm-as < %s | opt -licm
+; RUN: opt < %s -licm
 
 define i1 @test(i1 %c) {
 ; <label>:0
diff --git a/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll b/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll
index 849ecd7cb1bb..c759e6eff8e7 100644
--- a/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll
+++ b/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll
@@ -1,6 +1,6 @@
 ; Test that hoisting is disabled for pointers of different types...
 ;
-; RUN: llvm-as < %s | opt -licm
+; RUN: opt < %s -licm
 
 define void @test(i32* %P) {
 	br label %Loop
diff --git a/test/Transforms/LICM/2003-05-02-LoadHoist.ll b/test/Transforms/LICM/2003-05-02-LoadHoist.ll
index 7b58535972e5..71d3e789aa08 100644
--- a/test/Transforms/LICM/2003-05-02-LoadHoist.ll
+++ b/test/Transforms/LICM/2003-05-02-LoadHoist.ll
@@ -3,7 +3,7 @@
 ; loaded from.  Basically if the load gets hoisted, the subtract gets turned
 ; into a constant zero.
 ;
-; RUN: llvm-as < %s | opt -licm -gvn -instcombine | llvm-dis | grep load
+; RUN: opt < %s -licm -gvn -instcombine -S | grep load
 
 @X = global i32 7		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
index e491c9aee479..67c3951d74e4 100644
--- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
+++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm | lli
+; RUN: opt < %s -licm | lli
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
index b9c9eb373999..16f4fed34ec3 100644
--- a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
+++ b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -globalsmodref-aa -licm -disable-output
+; RUN: opt < %s -globalsmodref-aa -licm -disable-output
 
 @PL_regcomp_parse = internal global i8* null		; <i8**> [#uses=2]
 
diff --git a/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll b/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll
index 9a53d95996a7..a119865f0519 100644
--- a/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll
+++ b/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 	%struct.roadlet = type { i8*, %struct.vehicle*, [8 x %struct.roadlet*], [8 x %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)*] }
 	%struct.vehicle = type { %struct.roadlet*, i8*, i32, i32, %union.._631., i32 }
 	%union.._631. = type { i32 }
diff --git a/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll b/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
index b001cd0d5cd2..91740cf0eb71 100644
--- a/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
+++ b/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 
 define void @test({ i32 }* %P) {
 	br label %Loop
diff --git a/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll b/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
index c2c4bcb4ab0b..9763660ffb0b 100644
--- a/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
+++ b/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 ; PR908
 ; END.
 
diff --git a/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
index bf5d7bc09297..c12e13beccf5 100644
--- a/test/Transforms/LICM/2007-05-22-VolatileSink.ll
+++ b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm | llvm-dis | grep {volatile store}
+; RUN: opt < %s -licm -S | grep {volatile store}
 ; PR1435
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/LICM/2007-07-30-AliasSet.ll b/test/Transforms/LICM/2007-07-30-AliasSet.ll
index 110d88449464..8ecd1bcee486 100644
--- a/test/Transforms/LICM/2007-07-30-AliasSet.ll
+++ b/test/Transforms/LICM/2007-07-30-AliasSet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -loop-unswitch -disable-output
+; RUN: opt < %s -licm -loop-unswitch -disable-output
 	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
 	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
 
diff --git a/test/Transforms/LICM/2007-09-17-PromoteValue.ll b/test/Transforms/LICM/2007-09-17-PromoteValue.ll
index e9ba9e16318f..31abd8c180a5 100644
--- a/test/Transforms/LICM/2007-09-17-PromoteValue.ll
+++ b/test/Transforms/LICM/2007-09-17-PromoteValue.ll
@@ -2,7 +2,7 @@
 ; Do not promote getelementptr because it may exposes load from a null pointer 
 ; and store from a null pointer  which are covered by 
 ; icmp eq %struct.decision* null, null condition.
-; RUN: llvm-as < %s | opt -licm | llvm-dis | not grep promoted
+; RUN: opt < %s -licm -S | not grep promoted
 	%struct.decision = type { i8, %struct.decision* }
 
 define i32 @main() {
diff --git a/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll b/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll
index f9f45f389b8a..916f479cba14 100644
--- a/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll
+++ b/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll
@@ -1,5 +1,5 @@
 ; Do not promote null value because it may be unsafe to do so.
-; RUN: llvm-as < %s | opt -licm | llvm-dis | not grep promoted
+; RUN: opt < %s -licm -S | not grep promoted
 
 define i32 @f(i32 %foo, i32 %bar, i32 %com) {
 entry:
diff --git a/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll b/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
index 7359cc0695ad..59f1dcbe2d7b 100644
--- a/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
+++ b/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm | llvm-dis | grep promoted
+; RUN: opt < %s -licm -S | grep promoted
 ; Promote value if at least one use is safe
 
 
diff --git a/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll b/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll
index dcb20e7bbd0c..a5a7bf85f10d 100644
--- a/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll
+++ b/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 ; PR2346
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll b/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
index 3824d5359772..781030938438 100644
--- a/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
+++ b/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -enable-licm-constant-variables | llvm-dis | grep -A 1 entry | grep load.*@a
+; RUN: opt < %s -licm -enable-licm-constant-variables -S | grep -A 1 entry | grep load.*@a
 @a = external constant float*
 
 define void @test(i32 %count) {
diff --git a/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll b/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll
index c3c9a810c52c..d1fe48c2357a 100644
--- a/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll
+++ b/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll
@@ -1,5 +1,5 @@
 
-; RUN: llvm-as < %s | opt -licm -loop-index-split -instcombine -disable-output
+; RUN: opt < %s -licm -loop-index-split -instcombine -disable-output
 
 	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
 	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
diff --git a/test/Transforms/LICM/Preserve-LCSSA.ll b/test/Transforms/LICM/Preserve-LCSSA.ll
index 850d071810ef..24c4ad1da42d 100644
--- a/test/Transforms/LICM/Preserve-LCSSA.ll
+++ b/test/Transforms/LICM/Preserve-LCSSA.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -licm -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
 
 define i32 @stringSearch_Clib(i32 %count) {
 entry:
diff --git a/test/Transforms/LICM/basictest.ll b/test/Transforms/LICM/basictest.ll
index 0a57ce4539fe..1dbb4dc6b499 100644
--- a/test/Transforms/LICM/basictest.ll
+++ b/test/Transforms/LICM/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm | llvm-dis
+; RUN: opt < %s -licm | llvm-dis
 
 define void @testfunc(i32 %i) {
 ; <label>:0
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
new file mode 100644
index 000000000000..e7d36afb91b1
--- /dev/null
+++ b/test/Transforms/LICM/hoisting.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+
+@X = global i32 0		; <i32*> [#uses=1]
+
+declare void @foo()
+
+; This testcase tests for a problem where LICM hoists 
+; potentially trapping instructions when they are not guaranteed to execute.
+define i32 @test1(i1 %c) {
+; CHECK: @test1
+	%A = load i32* @X		; <i32> [#uses=2]
+	br label %Loop
+Loop:		; preds = %LoopTail, %0
+	call void @foo( )
+	br i1 %c, label %LoopTail, label %IfUnEqual
+        
+IfUnEqual:		; preds = %Loop
+; CHECK: IfUnEqual:
+; CHECK-NEXT: sdiv i32 4, %A
+	%B1 = sdiv i32 4, %A		; <i32> [#uses=1]
+	br label %LoopTail
+        
+LoopTail:		; preds = %IfUnEqual, %Loop
+	%B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ]		; <i32> [#uses=1]
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %LoopTail
+	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+
+declare void @foo2(i32)
+
+
+;; It is ok and desirable to hoist this potentially trapping instruction.
+define i32 @test2(i1 %c) {
+; CHECK: @test2
+; CHECK-NEXT: load i32* @X
+; CHECK-NEXT: %B = sdiv i32 4, %A
+	%A = load i32* @X		; <i32> [#uses=2]
+	br label %Loop
+Loop:
+        ;; Should have hoisted this div!
+	%B = sdiv i32 4, %A		; <i32> [#uses=2]
+	call void @foo2( i32 %B )
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %Loop
+	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
diff --git a/test/Transforms/LICM/licm_preserve_dbginfo.ll b/test/Transforms/LICM/licm_preserve_dbginfo.ll
new file mode 100644
index 000000000000..e013c2735449
--- /dev/null
+++ b/test/Transforms/LICM/licm_preserve_dbginfo.ll
@@ -0,0 +1,55 @@
+; RUN: opt -licm -S <%s | FileCheck %s
+; Test that licm doesn't sink/delete debug info.
+define i32 @foo(i32 %a, i32 %j) nounwind {
+entry:
+;CHECK: entry:
+  call void @llvm.dbg.func.start(metadata !0)
+  call void @llvm.dbg.stoppoint(i32 3, i32 5, metadata !1)
+;CHECK: %mul = mul i32 %j, %j
+  br label %for.cond
+
+for.cond:
+;CHECK: for.cond:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %s.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+  call void @llvm.dbg.stoppoint(i32 4, i32 5, metadata !1)
+; CHECK: call void @llvm.dbg.stoppoint(i32 4, i32 5, metadata !1)
+  %cmp = icmp slt i32 %i.0, %a
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+;CHECK: for.body:
+  call void @llvm.dbg.stoppoint(i32 5, i32 2, metadata !1)
+;CHECK: call void @llvm.dbg.stoppoint(i32 5, i32 2, metadata !1)
+  %mul = mul i32 %j, %j
+  %add = add nsw i32 %s.0, %mul
+  br label %for.inc
+
+for.inc:
+;CHECK: for.inc:
+  call void @llvm.dbg.stoppoint(i32 4, i32 18, metadata !1)
+;CHECK: call void @llvm.dbg.stoppoint(i32 4, i32 18, metadata !1)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  call void @llvm.dbg.stoppoint(i32 7, i32 5, metadata !1)
+  br label %0
+
+; <label>:0                                       ; preds = %for.end
+  call void @llvm.dbg.stoppoint(i32 8, i32 1, metadata !1)
+  call void @llvm.dbg.region.end(metadata !0)
+  ret i32 %s.0
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 2, metadata !2, i1 false, i1 true}; [DW_TAG_subprogram ]
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"licm.c", metadata !"/home/edwin", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
diff --git a/test/Transforms/LICM/no-preheader-test.ll b/test/Transforms/LICM/no-preheader-test.ll
index 6ea08612ea86..bd3eea38ef3e 100644
--- a/test/Transforms/LICM/no-preheader-test.ll
+++ b/test/Transforms/LICM/no-preheader-test.ll
@@ -1,5 +1,5 @@
 ; Test that LICM works when there is not a loop-preheader
-; RUN: llvm-as < %s | opt -licm | llvm-dis
+; RUN: opt < %s -licm | llvm-dis
 
 define void @testfunc(i32 %i.s, i1 %ifcond) {
 	br i1 %ifcond, label %Then, label %Else
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index f6c0f0821a30..ef28c38ca607 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,13 +1,17 @@
-; RUN: llvm-as < %s | opt  -licm -disable-output -stats |& \
-; RUN:    grep {memory locations promoted to register}
+; RUN: opt < %s  -licm -S | FileCheck %s
 @X = global i32 7		; <i32*> [#uses=4]
 
-define void @testfunc(i32 %i) {
-; <label>:0
+define void @test1(i32 %i) {
+Entry:
 	br label %Loop
+; CHECK: @test1
+; CHECK: Entry:
+; CHECK-NEXT:   load i32* @X
+; CHECK-NEXT:   br label %Loop
+
 
 Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %0 ], [ %Next, %Loop ]		; <i32> [#uses=1]
+	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
 	%x = load i32* @X		; <i32> [#uses=1]
 	%x2 = add i32 %x, 1		; <i32> [#uses=1]
 	store i32 %x2, i32* @X
@@ -15,12 +19,23 @@ Loop:		; preds = %Loop, %0
 	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
 	br i1 %cond, label %Out, label %Loop
 
-Out:		; preds = %Loop
+Out:	
 	ret void
+; CHECK: Out:
+; CHECK-NEXT:   store i32 %x2, i32* @X
+; CHECK-NEXT:   ret void
+
 }
 
-define void @testhard(i32 %i) {
+define void @test2(i32 %i) {
+Entry:
 	br label %Loop
+; CHECK: @test2
+; CHECK: Entry:
+; CHECK-NEXT:  %X1 = getelementptr i32* @X, i64 0 
+; CHECK-NEXT:    %X2 = getelementptr i32* @X, i64 0
+; CHECK-NEXT:    %X1.promoted = load i32* %X1 
+; CHECK-NEXT:    br label %Loop
 
 Loop:		; preds = %Loop, %0
 	%X1 = getelementptr i32* @X, i64 0		; <i32*> [#uses=1]
@@ -32,4 +47,27 @@ Loop:		; preds = %Loop, %0
 
 Exit:		; preds = %Loop
 	ret void
+; CHECK: Exit:
+; CHECK-NEXT:   store i32 %V, i32* %X1
+; CHECK-NEXT:   ret void
 }
+
+
+
+define void @test3(i32 %i) {
+; CHECK: @test3
+	br label %Loop
+Loop:
+        ; Should not promote this to a register
+	%x = volatile load i32* @X
+	%x2 = add i32 %x, 1	
+	store i32 %x2, i32* @X
+	br i1 true, label %Out, label %Loop
+        
+; CHECK: Loop:
+; CHECK-NEXT: volatile load
+
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
new file mode 100644
index 000000000000..11112eb74443
--- /dev/null
+++ b/test/Transforms/LICM/sinking.ll
@@ -0,0 +1,235 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+
+declare i32 @strlen(i8*) readonly
+
+declare void @foo()
+
+; Sink readonly function.
+define i32 @test1(i8* %P) {
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%A = call i32 @strlen( i8* %P ) readonly
+	br i1 false, label %Loop, label %Out
+
+Out:		; preds = %Loop
+	ret i32 %A
+; CHECK: @test1
+; CHECK: Out:
+; CHECK-NEXT: call i32 @strlen
+; CHECK-NEXT: ret i32 %A
+}
+
+declare double @sin(double) readnone
+
+; Sink readnone function out of loop with unknown memory behavior.
+define double @test2(double %X) {
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	call void @foo( )
+	%A = call double @sin( double %X ) readnone
+	br i1 true, label %Loop, label %Out
+
+Out:		; preds = %Loop
+	ret double %A
+; CHECK: @test2
+; CHECK: Out:
+; CHECK-NEXT: call double @sin
+; CHECK-NEXT: ret double %A
+}
+
+; This testcase checks to make sure the sinker does not cause problems with
+; critical edges.
+define void @test3() {
+Entry:
+	br i1 false, label %Loop, label %Exit
+Loop:
+	%X = add i32 0, 1
+	br i1 false, label %Loop, label %Exit
+Exit:
+	%Y = phi i32 [ 0, %Entry ], [ %X, %Loop ]
+	ret void
+        
+; CHECK: @test3
+; CHECK:     Exit.loopexit:
+; CHECK-NEXT:  %X = add i32 0, 1
+; CHECK-NEXT:  br label %Exit
+
+}
+
+; If the result of an instruction is only used outside of the loop, sink
+; the instruction to the exit blocks instead of executing it on every
+; iteration of the loop.
+;
+define i32 @test4(i32 %N) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Loop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]	
+	%tmp.6 = mul i32 %N, %N_addr.0.pn		; <i32> [#uses=1]
+	%tmp.7 = sub i32 %tmp.6, %N		; <i32> [#uses=1]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1		; <i1> [#uses=1]
+	br i1 %tmp.1, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %tmp.7
+; CHECK: @test4
+; CHECK:     Out:
+; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  sub i32 %tmp.6, %N
+; CHECK-NEXT:  ret i32
+}
+
+; To reduce register pressure, if a load is hoistable out of the loop, and the
+; result of the load is only used outside of the loop, sink the load instead of
+; hoisting it!
+;
+@X = global i32 5		; <i32*> [#uses=1]
+
+define i32 @test5(i32 %N) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Loop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]	
+	%tmp.6 = load i32* @X		; <i32> [#uses=1]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1		; <i1> [#uses=1]
+	br i1 %tmp.1, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %tmp.6
+; CHECK: @test5
+; CHECK:     Out:
+; CHECK-NEXT:  %tmp.6 = load i32* @X
+; CHECK-NEXT:  ret i32 %tmp.6
+}
+
+
+
+; The loop sinker was running from the bottom of the loop to the top, causing
+; it to miss opportunities to sink instructions that depended on sinking other
+; instructions from the loop.  Instead they got hoisted, which is better than
+; leaving them in the loop, but increases register pressure pointlessly.
+
+	%Ty = type { i32, i32 }
+@X2 = external global %Ty
+
+define i32 @test6() {
+	br label %Loop
+Loop:
+	%dead = getelementptr %Ty* @X2, i64 0, i32 0
+	%sunk2 = load i32* %dead
+	br i1 false, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %sunk2
+; CHECK: @test6
+; CHECK:     Out:
+; CHECK-NEXT:  %dead = getelementptr %Ty* @X2, i64 0, i32 0
+; CHECK-NEXT:  %sunk2 = load i32* %dead
+; CHECK-NEXT:  ret i32 %sunk2
+}
+
+
+
+; This testcase ensures that we can sink instructions from loops with
+; multiple exits.
+;
+define i32 @test7(i32 %N, i1 %C) {
+Entry:
+	br label %Loop
+Loop:		; preds = %ContLoop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %ContLoop ], [ %N, %Entry ]
+	%tmp.6 = mul i32 %N, %N_addr.0.pn
+	%tmp.7 = sub i32 %tmp.6, %N		; <i32> [#uses=2]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	br i1 %C, label %ContLoop, label %Out1
+ContLoop:
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1
+	br i1 %tmp.1, label %Loop, label %Out2
+Out1:		; preds = %Loop
+	ret i32 %tmp.7
+Out2:		; preds = %ContLoop
+	ret i32 %tmp.7
+; CHECK: @test7
+; CHECK:     Out1:
+; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  sub i32 %tmp.6, %N
+; CHECK-NEXT:  ret
+; CHECK:     Out2:
+; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  sub i32 %tmp.6
+; CHECK-NEXT:  ret
+}
+
+
+; This testcase checks to make sure we can sink values which are only live on
+; some exits out of the loop, and that we can do so without breaking dominator
+; info.
+define i32 @test8(i1 %C1, i1 %C2, i32* %P, i32* %Q) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Cont, %Entry
+	br i1 %C1, label %Cont, label %exit1
+Cont:		; preds = %Loop
+	%X = load i32* %P		; <i32> [#uses=2]
+	store i32 %X, i32* %Q
+	%V = add i32 %X, 1		; <i32> [#uses=1]
+	br i1 %C2, label %Loop, label %exit2
+exit1:		; preds = %Loop
+	ret i32 0
+exit2:		; preds = %Cont
+	ret i32 %V
+; CHECK: @test8
+; CHECK:     exit1:
+; CHECK-NEXT:  ret i32 0
+; CHECK:     exit2:
+; CHECK-NEXT:  %V = add i32 %X, 1
+; CHECK-NEXT:  ret i32 %V
+}
+
+
+define void @test9() {
+loopentry.2.i:
+	br i1 false, label %no_exit.1.i.preheader, label %loopentry.3.i.preheader
+no_exit.1.i.preheader:		; preds = %loopentry.2.i
+	br label %no_exit.1.i
+no_exit.1.i:		; preds = %endif.8.i, %no_exit.1.i.preheader
+	br i1 false, label %return.i, label %endif.8.i
+endif.8.i:		; preds = %no_exit.1.i
+	%inc.1.i = add i32 0, 1		; <i32> [#uses=1]
+	br i1 false, label %no_exit.1.i, label %loopentry.3.i.preheader.loopexit
+loopentry.3.i.preheader.loopexit:		; preds = %endif.8.i
+	br label %loopentry.3.i.preheader
+loopentry.3.i.preheader:		; preds = %loopentry.3.i.preheader.loopexit, %loopentry.2.i
+	%arg_num.0.i.ph13000 = phi i32 [ 0, %loopentry.2.i ], [ %inc.1.i, %loopentry.3.i.preheader.loopexit ]		; <i32> [#uses=0]
+	ret void
+return.i:		; preds = %no_exit.1.i
+	ret void
+
+; CHECK: @test9
+; CHECK: loopentry.3.i.preheader.loopexit:
+; CHECK-NEXT:  %inc.1.i = add i32 0, 1
+; CHECK-NEXT:  br label %loopentry.3.i.preheader
+}
+
+
+; Potentially trapping instructions may be sunk as long as they are guaranteed
+; to be executed.
+define i32 @test10(i32 %N) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Loop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]		; <i32> [#uses=3]
+	%tmp.6 = sdiv i32 %N, %N_addr.0.pn		; <i32> [#uses=1]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %tmp.6
+        
+; CHECK: @test10
+; CHECK: Out: 
+; CHECK-NEXT:  %tmp.6 = sdiv i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  ret i32 %tmp.6
+}
+
diff --git a/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll b/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll
index ecba2f47b187..bcc73fdfab83 100644
--- a/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll
+++ b/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-deletion | llvm-dis | grep switch
+; RUN: opt < %s -loop-deletion -S | grep switch
 ; PR 1564
   
 define fastcc void @out() {
diff --git a/test/Transforms/LoopDeletion/2008-05-06-Phi.ll b/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
index bfb747a72da8..4fc6378ee254 100644
--- a/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
+++ b/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -tailduplicate -instcombine -jump-threading -licm -loop-unswitch -instcombine -indvars -loop-deletion -gvn -simplifycfg -verify -disable-output
+; RUN: opt < %s -inline -tailduplicate -instcombine -jump-threading -licm -loop-unswitch -instcombine -indvars -loop-deletion -gvn -simplifycfg -verify -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Transforms/LoopDeletion/dcetest.ll b/test/Transforms/LoopDeletion/dcetest.ll
index c112cae53700..f1e793de03d6 100644
--- a/test/Transforms/LoopDeletion/dcetest.ll
+++ b/test/Transforms/LoopDeletion/dcetest.ll
@@ -1,7 +1,7 @@
 ; This is the test case taken from Appel's book that illustrates a hard case
 ; that SCCP gets right, and when followed by ADCE, is completely eliminated
 ;
-; RUN: llvm-as < %s | opt -sccp -simplifycfg -indvars -loop-deletion -dce -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -sccp -simplifycfg -indvars -loop-deletion -dce -simplifycfg -S | not grep br
 
 define i32 @"test function"(i32 %i0, i32 %j0) {
 BB1:
diff --git a/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll b/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll
index fa1ab2dd6ba1..d922ecbd4f5f 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll
@@ -1,5 +1,5 @@
 ; PR1692
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output 
+; RUN: opt < %s -loop-index-split -disable-output 
 	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
 	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
 	%struct.LITERAL_HELP = type { i32, i32, i32, %struct.CLAUSE_HELP*, %struct.term* }
diff --git a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
index 928fd959ae90..3ebd9b3401f5 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
@@ -1,6 +1,6 @@
 
 ; Update loop iteraton space to eliminate condition inside loop.
-; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep bothcond
+; RUN: opt < %s -loop-index-split -S | not grep bothcond
 define void @test(float* %x, i32 %ndat, float** %y, float %xcen, i32 %xmin, i32 %xmax, float %sigmal, float %contribution) {
 entry:
 	%tmp519 = icmp sgt i32 %xmin, %xmax		; <i1> [#uses=1]
diff --git a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
index 6619c7d19d8a..8f4ee24c1233 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
@@ -1,6 +1,6 @@
 ; PR714
 ; Update loop iteraton space to eliminate condition inside loop.
-; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep bothcond
+; RUN: opt < %s -loop-index-split -S | not grep bothcond
 
 define void @test(float* %x, i32 %ndat, float** %y, float %xcen, i32 %xmin, i32 %xmax, float %sigmal, float %contribution) {
 entry:
diff --git a/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll b/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll
index a4966a9de4a2..1550bc7abb5d 100644
--- a/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll
+++ b/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats |& \
+; RUN: opt < %s -loop-index-split -disable-output -stats |& \
 ; RUN: not grep "loop-index-split" 
 
 ; Induction variable decrement is not yet handled.
diff --git a/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll b/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
index 4ad906719f8b..084746494357 100644
--- a/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
+++ b/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 ; PR 1995
 
 define void @add_blkdev_randomness(i32 %major) nounwind  {
diff --git a/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll b/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll
index 9fa83e4f57ff..980a42f20aa9 100644
--- a/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll
+++ b/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -disable-output -loop-index-split
+; RUN: opt < %s -disable-output -loop-index-split
 ; PR 2011
 	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
 	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
diff --git a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll b/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll
index 0c67ea2c3070..9351cafcf646 100644
--- a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll
+++ b/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 ; PR 2011
 	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
 	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
diff --git a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll b/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll
index 393299f3e094..6d6defa85de0 100644
--- a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll
+++ b/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 ; PR 2011
 	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
 	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
diff --git a/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll b/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll
index 2ee51e14948c..f1a03e2f18a9 100644
--- a/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll
+++ b/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 ; PR 2030
 	%struct.FULL = type { i32, i32, [1000 x float*] }
 
diff --git a/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll b/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll
index 24f22cab4ea1..ca22e50eadfc 100644
--- a/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll
+++ b/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 ; Handle Exit block phis that do not have any use inside the loop.
 
 	%struct.ATOM = type { double, double, double, double, double, double, i32, double, double, double, double, i8*, i8, [9 x i8], double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, [200 x i8*], [32 x i8*], [32 x i8], i32 }
diff --git a/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll b/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll
index 5809918acffe..7447e6d4d4f0 100644
--- a/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll
+++ b/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -stats -disable-output | not grep "loop-index-split"
+; RUN: opt < %s -loop-index-split -stats -disable-output | not grep "loop-index-split"
 ;PR2294
 @g_2 = external global i16		; <i16*> [#uses=4]
 @g_5 = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll b/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll
index 1f6876c8d29d..6f691de537b1 100644
--- a/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll
+++ b/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -loop-unswitch -loop-index-split -instcombine -disable-output
+; RUN: opt < %s -loop-rotate -loop-unswitch -loop-index-split -instcombine -disable-output
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
 	%struct.__CFData = type opaque
diff --git a/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll b/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll
index 7592511a290d..1fcd960e0511 100644
--- a/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll
+++ b/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -stats -disable-output | not grep "1 loop-index-split"
+; RUN: opt < %s -loop-index-split -stats -disable-output | not grep "1 loop-index-split"
 ; PR 2487
 @g_6 = external global i32		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll b/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll
index a7a7cc23fefd..ee8e7a3eb863 100644
--- a/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll
+++ b/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -stats -disable-output | not grep "loop-index-split"
+; RUN: opt < %s -loop-index-split -stats -disable-output | not grep "loop-index-split"
 ; PR 2791
 @g_40 = common global i32 0		; <i32*> [#uses=1]
 @g_192 = common global i32 0		; <i32*> [#uses=2]
diff --git a/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll b/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll
index f66edcd062e2..ef677369cc47 100644
--- a/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll
+++ b/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 ; PR 2805
 @g_330 = common global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll b/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll
index b477b73cc37a..cca54adb1955 100644
--- a/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll
+++ b/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 	%struct.RExC_state_t = type { i32, i8*, %struct.regexp*, i8*, i8*, i8*, i32, %struct.regnode*, %struct.regnode*, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.SV = type { i8*, i32, i32 }
 	%struct.reg_data = type { i32, i8*, [1 x i8*] }
diff --git a/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll b/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll
index 417f0917eda1..372fee51a09b 100644
--- a/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll
+++ b/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -stats -disable-output |& grep "1 loop-index-split" 
+; RUN: opt < %s -loop-index-split -stats -disable-output |& grep "1 loop-index-split" 
 ; PR 2869
 
 @w = external global [2 x [2 x i32]]		; <[2 x [2 x i32]]*> [#uses=5]
diff --git a/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll b/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll
index 11ee0f5c4527..217ff52bb2c5 100644
--- a/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll
+++ b/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -stats | not grep "loop-index-split"
+; RUN: opt < %s -loop-index-split -stats | not grep "loop-index-split"
 ; PR3029
 
 @g_138 = common global i32 0		; <i32*> [#uses=3]
diff --git a/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll b/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll
index 73113e7b46f7..9acf3915c0a2 100644
--- a/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll
+++ b/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output
+; RUN: opt < %s -loop-index-split -disable-output
 	%struct.CGPoint = type { double, double }
 	%struct.IBCFMutableDictionary = type { %struct.NSMutableArray, %struct.__CFDictionary*, %struct.NSSortDescriptor*, %struct.NSSortDescriptor* }
 	%struct.IBInspectorMode = type opaque
diff --git a/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll b/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll
index b34cb51c332e..deef94128973 100644
--- a/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll
+++ b/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep undef
+; RUN: opt < %s -loop-index-split -S | not grep undef
 define i32 @main() {
 entry:
 	br label %header
diff --git a/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll b/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll
index d77f55a60aea..ad2b794218cc 100644
--- a/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll
+++ b/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output 
+; RUN: opt < %s -loop-index-split -disable-output 
 
         %struct._edit_script = type { %struct._edit_script*, i32, i8 }
 
diff --git a/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll b/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll
index 4b2cffd79860..187484ad0bd0 100644
--- a/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll
+++ b/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output 
+; RUN: opt < %s -loop-index-split -disable-output 
 ; PR1828.bc
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll b/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll
index 241b7dc188d6..098e407a3300 100644
--- a/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll
+++ b/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output 
+; RUN: opt < %s -loop-index-split -disable-output 
 
         %struct._edit_script = type { %struct._edit_script*, i32, i8 }
 
diff --git a/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll b/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll
index 9e79f483d9f3..a04715a7e952 100644
--- a/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll
+++ b/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output 
+; RUN: opt < %s -loop-index-split -disable-output 
 
 @k = external global i32		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll b/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll
index e98f699d7f6f..d18b3b71aeda 100644
--- a/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll
+++ b/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll
@@ -1,5 +1,5 @@
 ; Loop is elimianted
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats |& \
+; RUN: opt < %s -loop-index-split -disable-output -stats |& \
 ; RUN: grep "loop-index-split" | count 1
 	%struct.anon = type { i32 }
 @S1 = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll b/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll
index 31332bd984a5..ff73a5b44b3e 100644
--- a/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll
+++ b/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll
@@ -1,5 +1,5 @@
 ; Loop is elimianted. Save last value assignment.
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats |& \
+; RUN: opt < %s -loop-index-split -disable-output -stats |& \
 ; RUN: grep "loop-index-split" | count 1
 
 	%struct.anon = type { i32 }
diff --git a/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll b/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll
index bf568e57f24c..6adb26877680 100644
--- a/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll
+++ b/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll
@@ -1,5 +1,5 @@
 ; Loop is elimianted. Save last value assignments, including induction variable.
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats | not grep "loop-index-split"
+; RUN: opt < %s -loop-index-split -disable-output -stats | not grep "loop-index-split"
 
 declare i32 @foo(i32)
 declare i32 @bar(i32, i32)
diff --git a/test/Transforms/LoopIndexSplit/PR3913.ll b/test/Transforms/LoopIndexSplit/PR3913.ll
new file mode 100644
index 000000000000..a2bf57c05162
--- /dev/null
+++ b/test/Transforms/LoopIndexSplit/PR3913.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-index-split -S | not grep "icmp ne"
+
+define i32 @main() {
+entry:
+	br label %header
+
+header:
+	%r = phi i32 [ 0, %entry ], [ %r3, %skip ]
+	%i = phi i32 [ 0, %entry ], [ %i1, %skip ]
+	%cond = icmp eq i32 %i, 99
+	br i1 %cond, label %body, label %skip
+
+body:
+	br label %skip
+
+skip:
+	%r3 = phi i32 [ %r, %header ], [ 3, %body ]
+	%i1 = add i32 %i, 1
+	%exitcond = icmp eq i32 %i1, 10
+	br i1 %exitcond, label %exit, label %header
+
+exit:
+	ret i32 %r3
+}
diff --git a/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll b/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll
index e19f22a6211c..fc7d9e9862cb 100644
--- a/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll
+++ b/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll
@@ -1,5 +1,5 @@
 ; Split loop. Save last value.
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats |& \
+; RUN: opt < %s -loop-index-split -disable-output -stats |& \
 ; RUN: grep "loop-index-split" | count 1
 
 @k = external global i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll
index 3b237bb1f143..09a6423bc0c3 100644
--- a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll
+++ b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll
@@ -1,5 +1,5 @@
 ; Split loop. Save last value. Split value is off by one in this example.
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats |& \
+; RUN: opt < %s -loop-index-split -disable-output -stats |& \
 ; RUN: grep "loop-index-split" | count 1
 
         %llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll
index 27327a059cf9..f61d9671409f 100644
--- a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll
+++ b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll
@@ -1,5 +1,5 @@
 ; Split loop. Save last value. Split value is off by one in this example.
-; RUN: llvm-as < %s | opt -loop-index-split -disable-output -stats |& \
+; RUN: opt < %s -loop-index-split -disable-output -stats |& \
 ; RUN: grep "loop-index-split" | count 1
 
 @k = external global i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll b/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll
index c5064517dd1d..17f75d7509e7 100644
--- a/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll
+++ b/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll
@@ -1,6 +1,6 @@
 ; Split loop. Split value is a constant and greater then exit value. 
 ; Check whether optimizer inserts proper checkfor split value or not.
-; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | grep select
+; RUN: opt < %s -loop-index-split -S | grep select
 
 @k = external global i32		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll b/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll
index caaa8adb213c..6eed98177d09 100644
--- a/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll
+++ b/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -reassociate -loop-rotate -loop-index-split -indvars -simplifycfg -verify
+; RUN: opt < %s -inline -reassociate -loop-rotate -loop-index-split -indvars -simplifycfg -verify
 ; PR4471
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll b/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
index 7cc3951b3cd1..3e170dce7154 100644
--- a/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
+++ b/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate | llvm-dis
+; RUN: opt < %s -loop-rotate | llvm-dis
 ; PR3408
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopRotate/LRCrash-1.ll b/test/Transforms/LoopRotate/LRCrash-1.ll
index e6c7667d5975..7d148e79c9d2 100644
--- a/test/Transforms/LoopRotate/LRCrash-1.ll
+++ b/test/Transforms/LoopRotate/LRCrash-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -disable-output
 
 	%struct.relation = type { [4 x i16], i32, [4 x i16], i32, i32 }
 
diff --git a/test/Transforms/LoopRotate/LRCrash-2.ll b/test/Transforms/LoopRotate/LRCrash-2.ll
index 6dbe76df9eb3..e117c11b6296 100644
--- a/test/Transforms/LoopRotate/LRCrash-2.ll
+++ b/test/Transforms/LoopRotate/LRCrash-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -disable-output
 
 define void @findAllPairs() {
 entry:
diff --git a/test/Transforms/LoopRotate/LRCrash-3.ll b/test/Transforms/LoopRotate/LRCrash-3.ll
index 94991a55b824..617dd8e42dd0 100644
--- a/test/Transforms/LoopRotate/LRCrash-3.ll
+++ b/test/Transforms/LoopRotate/LRCrash-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -disable-output
 
 define void @_ZN9Classfile4readEv() {
 entry:
diff --git a/test/Transforms/LoopRotate/LRCrash-4.ll b/test/Transforms/LoopRotate/LRCrash-4.ll
index ab43fc88922c..b2f32244505a 100644
--- a/test/Transforms/LoopRotate/LRCrash-4.ll
+++ b/test/Transforms/LoopRotate/LRCrash-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -disable-output
 
 define void @InterpretSEIMessage(i8* %msg) {
 entry:
diff --git a/test/Transforms/LoopRotate/LRCrash-5.ll b/test/Transforms/LoopRotate/LRCrash-5.ll
index d61e749d7ee4..7b6085d266ba 100644
--- a/test/Transforms/LoopRotate/LRCrash-5.ll
+++ b/test/Transforms/LoopRotate/LRCrash-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -disable-output
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index 573e3960bd86..fe7eaf9a83c8 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate | llvm-dis | not grep {\\\[ .tmp224} 
+; RUN: opt < %s -loop-rotate -S | not grep {\\\[ .tmp224} 
 ; END.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 
diff --git a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll b/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
index a55704b1fdb6..b0d31bd911e5 100644
--- a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
+++ b/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -disable-output
 ; ModuleID = 'PhiSelfRefernce-1.bc'
 
 define void @snrm2(i32 %incx) {
diff --git a/test/Transforms/LoopRotate/pr2639.ll b/test/Transforms/LoopRotate/pr2639.ll
index 67477ade9968..96f87d56032d 100644
--- a/test/Transforms/LoopRotate/pr2639.ll
+++ b/test/Transforms/LoopRotate/pr2639.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-deletion -loop-rotate -disable-output
+; RUN: opt < %s -loop-deletion -loop-rotate -disable-output
 ; PR 2639
 
 	%struct.HexxagonMove = type { i8, i8, i32 }
diff --git a/test/Transforms/LoopRotate/preserve-scev.ll b/test/Transforms/LoopRotate/preserve-scev.ll
new file mode 100644
index 000000000000..9eedaa49c0b8
--- /dev/null
+++ b/test/Transforms/LoopRotate/preserve-scev.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-rotate -loop-reduce -disable-output
+
+define fastcc void @foo() nounwind {
+BB:
+  br label %BB1
+
+BB1:                                              ; preds = %BB19, %BB
+  br label %BB4
+
+BB2:                                              ; preds = %BB4
+  %tmp = bitcast i32 undef to i32                 ; <i32> [#uses=1]
+  br label %BB4
+
+BB4:                                              ; preds = %BB3, %BB1
+  %tmp5 = phi i32 [ undef, %BB1 ], [ %tmp, %BB2 ] ; <i32> [#uses=1]
+  br i1 false, label %BB8, label %BB2
+
+BB8:                                              ; preds = %BB6
+  %tmp7 = bitcast i32 %tmp5 to i32                ; <i32> [#uses=2]
+  br i1 false, label %BB9, label %BB13
+
+BB9:                                              ; preds = %BB12, %BB8
+  %tmp10 = phi i32 [ %tmp11, %BB12 ], [ %tmp7, %BB8 ] ; <i32> [#uses=2]
+  %tmp11 = add i32 %tmp10, 1                      ; <i32> [#uses=1]
+  br label %BB12
+
+BB12:                                             ; preds = %BB9
+  br i1 false, label %BB9, label %BB17
+
+BB13:                                             ; preds = %BB15, %BB8
+  %tmp14 = phi i32 [ %tmp16, %BB15 ], [ %tmp7, %BB8 ] ; <i32> [#uses=1]
+  br label %BB15
+
+BB15:                                             ; preds = %BB13
+  %tmp16 = add i32 %tmp14, -1                     ; <i32> [#uses=1]
+  br i1 false, label %BB13, label %BB18
+
+BB17:                                             ; preds = %BB12
+  br label %BB19
+
+BB18:                                             ; preds = %BB15
+  br label %BB19
+
+BB19:                                             ; preds = %BB18, %BB17
+  %tmp20 = phi i32 [ %tmp10, %BB17 ], [ undef, %BB18 ] ; <i32> [#uses=0]
+  br label %BB1
+}
diff --git a/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll b/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
index 70445fb8f8c1..bf862f69e94b 100644
--- a/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
+++ b/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
@@ -1,7 +1,7 @@
 ; This testcase exposed a problem with the loop identification pass (LoopInfo).
 ; Basically, it was incorrectly calculating the loop nesting information.
 ;
-; RUN: llvm-as < %s | opt -loopsimplify
+; RUN: opt < %s -loopsimplify
 
 define i32 @yylex() {
 	br label %loopentry.0
diff --git a/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll b/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
index ba2eef7095b1..cd9749bbf6d0 100644
--- a/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
+++ b/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
@@ -2,7 +2,7 @@
 ; inserted for the "fail" loop, but the exit block of a loop is not updated
 ; to be the preheader instead of the exit loop itself.
 
-; RUN: llvm-as < %s | opt -loopsimplify
+; RUN: opt < %s -loopsimplify
 define i32 @re_match_2() {
 	br label %loopentry.1
 loopentry.1:		; preds = %endif.82, %0
diff --git a/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll b/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
index 53fc59edfc99..11be6941d8b8 100644
--- a/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
+++ b/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -instcombine -simplifycfg -licm -disable-output
+; RUN: opt < %s -tailduplicate -instcombine -simplifycfg -licm -disable-output
 target datalayout = "e-p:32:32"
 @yy_base = external global [787 x i16]		; <[787 x i16]*> [#uses=1]
 @yy_state_ptr = external global i32*		; <i32**> [#uses=3]
diff --git a/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll b/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
index bd39330cb49d..fb39f05c6dbb 100644
--- a/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
+++ b/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
@@ -4,7 +4,7 @@
 ;
 ; This is distilled from a monsterous crafty example.
 
-; RUN: llvm-as < %s | opt -licm -disable-output
+; RUN: opt < %s -licm -disable-output
 
 
 @G = weak global i32 0		; <i32*> [#uses=7]
diff --git a/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll b/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
index b8ced96f23fe..a5d0ba7ad76a 100644
--- a/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
+++ b/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify -verify -licm -disable-output
+; RUN: opt < %s -loopsimplify -verify -licm -disable-output
 
 define void @.subst_48() {
 entry:
diff --git a/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll b/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
index 439ea7e2228a..dc5c31354641 100644
--- a/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
+++ b/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify -licm -disable-output
+; RUN: opt < %s -loopsimplify -licm -disable-output
 define void @main() {
 entry:
 	br i1 false, label %Out, label %loop
diff --git a/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll b/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
index e75b214cb25f..721f9b3a0340 100644
--- a/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
+++ b/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify -licm -disable-output
+; RUN: opt < %s -loopsimplify -licm -disable-output
 
 ; This is PR306
 
diff --git a/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll b/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
index ae11d80d34b8..cbdfe8bbc0ae 100644
--- a/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
+++ b/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify -disable-output
+; RUN: opt < %s -loopsimplify -disable-output
 
 define void @test() {
 loopentry.0:
diff --git a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
index bd0515c476e9..4fe6e2156f97 100644
--- a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
+++ b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -loopsimplify -licm -disable-output -verify-dom-info
+; RUN: opt < %s -scalarrepl -loopsimplify -licm -disable-output -verify-dom-info -verify-loop-info
 
 define void @inflate() {
 entry:
diff --git a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
index 914fcd1661b8..10202dcf98ce 100644
--- a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
+++ b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify -disable-output
+; RUN: opt < %s -loopsimplify -disable-output
 ; PR1752
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "i686-pc-mingw32"
diff --git a/test/Transforms/LoopSimplify/basictest.ll b/test/Transforms/LoopSimplify/basictest.ll
index 0388b0e9eac5..4241d8ad0895 100644
--- a/test/Transforms/LoopSimplify/basictest.ll
+++ b/test/Transforms/LoopSimplify/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify
+; RUN: opt < %s -loopsimplify
 
 ; This function should get a preheader inserted before BB3, that is jumped
 ; to by BB1 & BB2
diff --git a/test/Transforms/LoopSimplify/hardertest.ll b/test/Transforms/LoopSimplify/hardertest.ll
index 6ee0567dbc81..e0a7f81603b0 100644
--- a/test/Transforms/LoopSimplify/hardertest.ll
+++ b/test/Transforms/LoopSimplify/hardertest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify
+; RUN: opt < %s -loopsimplify
 
 define void @foo(i1 %C) {
 	br i1 %C, label %T, label %F
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index c5bf7fdc3c4d..45f506a498c9 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \
-; RUN:  | llvm-dis > %t
+; RUN: opt < %s -loopsimplify -loop-rotate -instcombine -indvars -S > %t
 ; RUN: not grep sext %t
 ; RUN: grep {phi i64} %t | count 1
 
diff --git a/test/Transforms/LoopSimplify/phi-node-simplify.ll b/test/Transforms/LoopSimplify/phi-node-simplify.ll
index a22fadb48a4d..5e957ccbd880 100644
--- a/test/Transforms/LoopSimplify/phi-node-simplify.ll
+++ b/test/Transforms/LoopSimplify/phi-node-simplify.ll
@@ -1,5 +1,5 @@
 ; Loop Simplify should turn phi nodes like X = phi [X, Y]  into just Y, eliminating them.
-; RUN: llvm-as < %s | opt -loopsimplify | llvm-dis | grep phi | count 6
+; RUN: opt < %s -loopsimplify -S | grep phi | count 6
 
 @A = weak global [3000000 x i32] zeroinitializer		; <[3000000 x i32]*> [#uses=1]
 @B = weak global [20000 x i32] zeroinitializer		; <[20000 x i32]*> [#uses=1]
diff --git a/test/Transforms/LoopSimplify/single-backedge.ll b/test/Transforms/LoopSimplify/single-backedge.ll
index 8391048eed22..f9567f124291 100644
--- a/test/Transforms/LoopSimplify/single-backedge.ll
+++ b/test/Transforms/LoopSimplify/single-backedge.ll
@@ -2,7 +2,7 @@
 ; for all loops.  This allows the -indvars pass to recognize the %IV 
 ; induction variable in this testcase.
 
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep indvar
+; RUN: opt < %s -indvars -S | grep indvar
 
 define i32 @test(i1 %C) {
 ; <label>:0
diff --git a/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll b/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
index 75574c07c0e8..1f08a4367bde 100644
--- a/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
+++ b/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 
 define void @try_swap() {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll b/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
index 83d46967e5f3..f1c523ae6c60 100644
--- a/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
+++ b/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 
 define i32 @image_to_texture(i32 %indvar454) {
 loopentry.1.outer:
diff --git a/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll b/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
index 145551bded22..f56a55379c84 100644
--- a/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
+++ b/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 
 define void @main() {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll b/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll
index 2d2dade0bdc2..8c2cfaf32107 100644
--- a/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll
+++ b/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
index be95d4dce664..7c7a21c013f2 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep ugt
+; RUN: opt < %s -loop-reduce -S | grep ugt
 ; PR2535
 
 @.str = internal constant [4 x i8] c"%d\0A\00"
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
index fbd3c1e8a617..90477d106974 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep add | count 2
+; RUN: opt < %s -loop-reduce -S | grep add | count 2
 ; PR 2662
 @g_3 = common global i16 0		; <i16*> [#uses=2]
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
index 2377589b0ebf..c650d8cf76d8 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep "phi double" | count 1
+; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
 
 define void @foobar(i32 %n) nounwind {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll b/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
index 703aebef7ca1..1ee6b5cdf18e 100644
--- a/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 2
+; RUN: opt < %s -loop-reduce -S | grep phi | count 2
 ; PR 2779
 @g_19 = common global i32 0		; <i32*> [#uses=3]
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll b/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
index a7072858c8ac..b2cf818dc45d 100644
--- a/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
+++ b/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 1
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 1
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+; RUN: opt < %s -loop-reduce -S | grep mul | count 1
 ; ModuleID = '<stdin>'
 ; Make sure examining a fuller expression outside the loop doesn't cause us to create a second
 ; IV of stride %3.
diff --git a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll b/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll
index 0235fa8ff7ae..36cc53545103 100644
--- a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll
+++ b/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; This used to crash.
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
 target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll b/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
index e1c9642ce818..3a7496ed6c7f 100644
--- a/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
+++ b/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis \
+; RUN: opt < %s -loop-reduce -S \
 ; RUN:   | grep {getelementptr.*%lsr.iv.*%lsr.iv.*<i32\\*>}
 ; The multiply in bb2 must not be reduced to an add, as the sext causes the
 ; %1 argument to become negative after a while.
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
index 33b052215117..56a89f6502b9 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpl	\$4}
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl	\$4}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
index cb2f3aa5166d..8a3978bb2ee5 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpq	\$8}
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpq	\$8}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll
index f77aea393701..ae27383895ce 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4222
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/LoopStrengthReduce/dead-phi.ll b/test/Transforms/LoopStrengthReduce/dead-phi.ll
index a6aafa911dab..07a942f70bd2 100644
--- a/test/Transforms/LoopStrengthReduce/dead-phi.ll
+++ b/test/Transforms/LoopStrengthReduce/dead-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 1
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/different-type-ivs.ll b/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
index e5225da56ded..8cdd264591c8 100644
--- a/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
+++ b/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 ; Test to make sure that loop-reduce never crashes on IV's 
 ; with different types but identical strides.
 
diff --git a/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll b/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
index 52095070ae0a..4136486fef45 100644
--- a/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
+++ b/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {bitcast i32 1 to i32}
 ; END.
 ; The setlt wants to use a value that is incremented one more than the dominant
diff --git a/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll b/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
index 68eb107f1091..90051e3542c2 100644
--- a/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
+++ b/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
@@ -1,5 +1,5 @@
 ; Check that this test makes INDVAR and related stuff dead.
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 2
+; RUN: opt < %s -loop-reduce -S | grep phi | count 2
 
 declare i1 @pred()
 
diff --git a/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll b/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
index bfbbecfdb267..20300002eb39 100644
--- a/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
+++ b/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
@@ -1,7 +1,7 @@
 ; Don't reduce the byte access to P[i], at least not on targets that 
 ; support an efficient 'mem[r1+r2]' addressing mode.
 
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 
 
 declare i1 @pred(i32)
diff --git a/test/Transforms/LoopStrengthReduce/dont_reverse.ll b/test/Transforms/LoopStrengthReduce/dont_reverse.ll
index 39eca6c96308..214f15a77460 100644
--- a/test/Transforms/LoopStrengthReduce/dont_reverse.ll
+++ b/test/Transforms/LoopStrengthReduce/dont_reverse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis \
+; RUN: opt < %s -loop-reduce -S \
 ; RUN:    | grep {icmp eq i2 %lsr.iv.next, %xmp4344}
 
 ; Don't reverse the iteration if the rhs of the compare is defined
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index d21498b0a457..abbfda6e9255 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -2,8 +2,7 @@
 ; having overlapping live ranges that result in copies.  We want the setcc 
 ; instruction immediately before the conditional branch.
 ;
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
-; RUN:    %prcontext {br i1} 1 | grep icmp
+; RUN: opt -S -loop-reduce %s | FileCheck %s
 
 define void @foo(float* %D, i32 %E) {
 entry:
@@ -12,6 +11,8 @@ no_exit:		; preds = %no_exit, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
 	volatile store float 0.000000e+00, float* %D
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+; CHECK: icmp
+; CHECK-NEXT: br i1
 	%exitcond = icmp eq i32 %indvar.next, %E		; <i1> [#uses=1]
 	br i1 %exitcond, label %loopexit, label %no_exit
 loopexit:		; preds = %no_exit
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
index 86a1c257655b..f86638b0a829 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
@@ -1,5 +1,5 @@
 ; Check that the index of 'P[outer]' is pulled out of the loop.
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
 
 declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
index 8e9cbc4a81ef..37acf0f61dba 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
@@ -1,5 +1,5 @@
 ; Check that the index of 'P[outer]' is pulled out of the loop.
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
 
 declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/nested-reduce.ll b/test/Transforms/LoopStrengthReduce/nested-reduce.ll
index c19e844ea2bb..58b8d3eecd03 100644
--- a/test/Transforms/LoopStrengthReduce/nested-reduce.ll
+++ b/test/Transforms/LoopStrengthReduce/nested-reduce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | not grep mul
+; RUN: opt < %s -loop-reduce -S | not grep mul
 
 ; Make sure we don't get a multiply by 6 in this loop.
 
diff --git a/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll b/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
index 5880dc15f0d2..a032cc97355a 100644
--- a/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
+++ b/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
@@ -1,7 +1,7 @@
 ; Check that this test makes INDVAR and related stuff dead, because P[indvar]
 ; gets reduced, making INDVAR dead.
 
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | not grep INDVAR
+; RUN: opt < %s -loop-reduce -S | not grep INDVAR
 
 declare i1 @pred()
 
diff --git a/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll b/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
index 64d387a4a936..7ef494debdda 100644
--- a/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
+++ b/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 ; LSR should not crash on this.
 
 define fastcc void @loadloop() {
diff --git a/test/Transforms/LoopStrengthReduce/pr2537.ll b/test/Transforms/LoopStrengthReduce/pr2537.ll
index e9be384e6f7a..73c3152d30e7 100644
--- a/test/Transforms/LoopStrengthReduce/pr2537.ll
+++ b/test/Transforms/LoopStrengthReduce/pr2537.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
 ; PR 2537
 
 define void @a() {
diff --git a/test/Transforms/LoopStrengthReduce/pr2570.ll b/test/Transforms/LoopStrengthReduce/pr2570.ll
index ce0c3bf5c988..aafd24ebba1e 100644
--- a/test/Transforms/LoopStrengthReduce/pr2570.ll
+++ b/test/Transforms/LoopStrengthReduce/pr2570.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep {phi\\>} | count 10
+; RUN: opt < %s -loop-reduce -S | grep {phi\\>} | count 10
 ; PR2570
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/LoopStrengthReduce/pr3086.ll b/test/Transforms/LoopStrengthReduce/pr3086.ll
index f92bfe2b68c8..9a5911f1df49 100644
--- a/test/Transforms/LoopStrengthReduce/pr3086.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3086.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -loop-reduce -disable-output
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output
+; RUN: opt < %s -loop-reduce -disable-output
+; RUN: opt < %s -analyze -scalar-evolution -disable-output
 ; PR 3086
 
 	%struct.Cls = type { i32, i8, [2 x %struct.Cls*], [2 x %struct.Lit*] }
diff --git a/test/Transforms/LoopStrengthReduce/pr3399.ll b/test/Transforms/LoopStrengthReduce/pr3399.ll
index 9d2f5eda1b3f..b809007fea8f 100644
--- a/test/Transforms/LoopStrengthReduce/pr3399.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3399.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis
+; RUN: opt < %s -loop-reduce | llvm-dis
 ; PR3399
 
 @g_53 = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/pr3571.ll b/test/Transforms/LoopStrengthReduce/pr3571.ll
index 79fcdb17338a..9ad27d5ff114 100644
--- a/test/Transforms/LoopStrengthReduce/pr3571.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3571.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis
+; RUN: opt < %s -loop-reduce | llvm-dis
 ; PR3571
 
 target triple = "i386-mingw32"
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index be1372ded3a7..2302dba913f2 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -iv-users -disable-output | grep {Stride i64 {3,+,2}<loop>:}
+; RUN: opt < %s -analyze -iv-users -disable-output | grep {Stride i64 {3,+,2}<loop>:}
 
 ; The value of %r is dependent on a polynomial iteration expression.
 
diff --git a/test/Transforms/LoopStrengthReduce/related_indvars.ll b/test/Transforms/LoopStrengthReduce/related_indvars.ll
index 434da99068c3..249437880769 100644
--- a/test/Transforms/LoopStrengthReduce/related_indvars.ll
+++ b/test/Transforms/LoopStrengthReduce/related_indvars.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 1
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
 
 ; This should only result in one PHI node!
 
diff --git a/test/Transforms/LoopStrengthReduce/remove_indvar.ll b/test/Transforms/LoopStrengthReduce/remove_indvar.ll
index 623c0b5338ef..53f4b9d5b9ff 100644
--- a/test/Transforms/LoopStrengthReduce/remove_indvar.ll
+++ b/test/Transforms/LoopStrengthReduce/remove_indvar.ll
@@ -1,5 +1,5 @@
 ; Check that this test makes INDVAR and related stuff dead.
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | not grep INDVAR
+; RUN: opt < %s -loop-reduce -S | not grep INDVAR
 
 declare i1 @pred()
 
diff --git a/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll b/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
index a78fa5f349fa..412a716bc431 100644
--- a/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
+++ b/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 1
+; RUN: opt < %s -loop-reduce -S | grep mul | count 1
 ; LSR should not make two copies of the Q*L expression in the preheader!
 
 define i8 @test(i8* %A, i8* %B, i32 %L, i32 %Q, i32 %N.s) {
diff --git a/test/Transforms/LoopStrengthReduce/share_ivs.ll b/test/Transforms/LoopStrengthReduce/share_ivs.ll
index b52d571f4276..0459bc849bfb 100644
--- a/test/Transforms/LoopStrengthReduce/share_ivs.ll
+++ b/test/Transforms/LoopStrengthReduce/share_ivs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 1
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
 
 ; This testcase should have ONE stride 18 indvar, the other use should have a
 ; loop invariant value (B) added to it inside of the loop, instead of having
diff --git a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
index 2c9c70de8366..a99a823a3b76 100644
--- a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
+++ b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   grep {add i32 %lsr.iv.next, 1}
 ;
 ; Make sure that the use of the IV outside of the loop (the store) uses the 
diff --git a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
index 53eedd673260..0a9fab0d5ea8 100644
--- a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
+++ b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
@@ -1,9 +1,9 @@
 ; Base should not be i*3, it should be i*2.
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {mul.*%i, 3}
 
 ; Indvar should not start at zero:
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {phi i32 .* 0}
 ; END.
 
diff --git a/test/Transforms/LoopStrengthReduce/variable_stride.ll b/test/Transforms/LoopStrengthReduce/variable_stride.ll
index 90d3947101ff..7c0f053e4c34 100644
--- a/test/Transforms/LoopStrengthReduce/variable_stride.ll
+++ b/test/Transforms/LoopStrengthReduce/variable_stride.ll
@@ -1,5 +1,5 @@
 ; Check that variable strides are reduced to adds instead of multiplies.
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | not grep mul
+; RUN: opt < %s -loop-reduce -S | not grep mul
 
 declare i1 @pred(i32)
 
diff --git a/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll b/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
index 13a26a253325..3141bf1900cb 100644
--- a/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
+++ b/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unroll -disable-output
+; RUN: opt < %s -loop-unroll -disable-output
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll b/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
index db107fdd4bc0..a26346b2eb40 100644
--- a/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
+++ b/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unroll -loopsimplify -disable-output
+; RUN: opt < %s -loop-unroll -loopsimplify -disable-output
 
 define void @print_board() {
 entry:
diff --git a/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll b/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
index 16419cab1abf..8219a0c23050 100644
--- a/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
+++ b/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unroll | llvm-dis | grep bb72.2
+; RUN: opt < %s -loop-unroll -S | grep bb72.2
 
 define void @vorbis_encode_noisebias_setup() {
 entry:
diff --git a/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll b/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
index 26fa60a8216c..40c9ce0e50b5 100644
--- a/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
+++ b/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
@@ -1,5 +1,5 @@
 ; PR 1334
-; RUN: llvm-as < %s | opt -loop-unroll -disable-output
+; RUN: opt < %s -loop-unroll -disable-output
 
 define void @sal__math_float_manipulator_7__math__joint_array_dcv_ops__Omultiply__3([6 x float]* %agg.result) {
 entry:
diff --git a/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll b/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
index 44890a6e616e..d4c8402bd2c6 100644
--- a/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
+++ b/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unroll | llvm-dis | not grep undef
+; RUN: opt < %s -loop-unroll -S | not grep undef
 ; PR1385
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll b/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
index 2086e9678cdc..68842a41957c 100644
--- a/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
+++ b/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unroll -unroll-count=3 | llvm-dis | grep bb72.2
+; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2
 
 define void @foo(i32 %trips) {
 entry:
diff --git a/test/Transforms/LoopUnroll/2007-11-05-Crash.ll b/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
index a46c09e3bde8..1711f1199974 100644
--- a/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
+++ b/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -disable-output -loop-unroll
+; RUN: opt < %s -disable-output -loop-unroll
 ; PR1770
 ; PR1947
 
diff --git a/test/Transforms/LoopUnswitch/2006-02-14-LoopSimplifyCrash.ll b/test/Transforms/LoopUnswitch/2006-02-14-LoopSimplifyCrash.ll
index b80220af5003..b4cf4062309e 100644
--- a/test/Transforms/LoopUnswitch/2006-02-14-LoopSimplifyCrash.ll
+++ b/test/Transforms/LoopUnswitch/2006-02-14-LoopSimplifyCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 ; END.
 
 target datalayout = "E-p:32:32"
diff --git a/test/Transforms/LoopUnswitch/2006-02-22-UnswitchCrash.ll b/test/Transforms/LoopUnswitch/2006-02-22-UnswitchCrash.ll
index 916cba20b529..a50bd54e7870 100644
--- a/test/Transforms/LoopUnswitch/2006-02-22-UnswitchCrash.ll
+++ b/test/Transforms/LoopUnswitch/2006-02-22-UnswitchCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 
 define void @sort_Eq(i32* %S2) {
 entry:
diff --git a/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll b/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
index 60e464f3acb6..e0301572821a 100644
--- a/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
+++ b/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 
 	%struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* }
 	%struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } }
diff --git a/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll b/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
index 67da9ffad3e2..fd4d7300b44a 100644
--- a/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
+++ b/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 
 define void @init_caller_save() {
 entry:
diff --git a/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll b/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
index 08cb9b66bc10..468b19430696 100644
--- a/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
+++ b/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
@@ -1,5 +1,5 @@
 ; PR1333
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/LoopUnswitch/2007-05-09-tl.ll b/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
index c987daec7dbc..61615d0cc526 100644
--- a/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
+++ b/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
@@ -1,5 +1,5 @@
+; RUN: opt < %s -loop-unswitch -disable-output
 ; PR1333
-; RUN: llvm-as < %s |  opt -loop-unswitch -disable-output
 
 define void @pp_cxx_expression() {
 entry:
diff --git a/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll b/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
index c4080c1e3b19..bf5a61b36300 100644
--- a/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
+++ b/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -loop-unswitch -instcombine -disable-output
 
 @str3 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll b/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
index de3f670f2b95..5ae335bb666a 100644
--- a/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
+++ b/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
diff --git a/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll b/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
index edd7d1ff58e7..dfca15403bfa 100644
--- a/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
+++ b/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 ; PR1559
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll b/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
index 9673e2e96a96..fc92579933bd 100644
--- a/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
+++ b/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -loop-unswitch -disable-output 
+; RUN: opt < %s -licm -loop-unswitch -disable-output 
 ; PR 1589
 
       	%struct.QBasicAtomic = type { i32 }
diff --git a/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll b/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
index 139cdbe70a05..f83acaa6b0b5 100644
--- a/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
+++ b/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -loop-unswitch -instcombine -disable-output
 	%struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", i8, i8, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", i32, i32 }
 	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
 	%struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList<ArgumentDef>", %struct.QByteArray, %"struct.QList<ArgumentDef>" }
diff --git a/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll b/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
index b236edcf56c1..efbb7619591e 100644
--- a/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
+++ b/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -loop-unroll -disable-output
+; RUN: opt < %s -licm -loop-unroll -disable-output
 
 @resonant = external global i32		; <i32*> [#uses=2]
 
diff --git a/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll b/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
index dcf41c34a486..906c2c581e1f 100644
--- a/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
+++ b/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -instcombine -gvn -disable-output
+; RUN: opt < %s -loop-unswitch -instcombine -gvn -disable-output
 ; PR2372
 target triple = "i386-pc-linux-gnu"
 
diff --git a/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll b/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
index 713a0bddb135..f74054a0589c 100644
--- a/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
+++ b/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -licm -loop-unswitch -disable-output
+; RUN: opt < %s -licm -loop-unswitch -disable-output
 @g_56 = external global i16		; <i16*> [#uses=2]
 
 define i32 @func_67(i32 %p_68, i8 signext  %p_69, i8 signext  %p_71) nounwind  {
diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
index 383aa5b671c7..20f2c2bfd742 100644
--- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
+++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -stats -disable-output |& grep "1 loop-unswitch - Number of branches unswitched" | count 1
+; RUN: opt < %s -loop-unswitch -stats -disable-output |& grep "1 loop-unswitch - Number of branches unswitched" | count 1
 ; PR 3170
 define i32 @a(i32 %x, i32 %y) nounwind {
 entry:
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index eeaf08d7e650..1e6f2cf15ee1 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -disable-output
 
 define i32 @test(i32* %A, i1 %C) {
 entry:
diff --git a/test/Transforms/LoopUnswitch/preserve-analyses.ll b/test/Transforms/LoopUnswitch/preserve-analyses.ll
new file mode 100644
index 000000000000..3364fb274109
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/preserve-analyses.ll
@@ -0,0 +1,129 @@
+; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info %s -disable-output
+
+; Loop unswitch should be able to unswitch these loops and
+; preserve LCSSA and LoopSimplify forms.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin9"
+
+@delim1 = external global i32                     ; <i32*> [#uses=1]
+@delim2 = external global i32                     ; <i32*> [#uses=1]
+
+define arm_apcscc i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
+entry:
+  %0 = load i32* @delim1, align 4                 ; <i32> [#uses=1]
+  %1 = load i32* @delim2, align 4                 ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb:                                               ; preds = %bb8
+  %2 = icmp eq i8* %p_addr.0, %s                  ; <i1> [#uses=1]
+  br i1 %2, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %3 = getelementptr inbounds i8* %p_addr.0, i32 1 ; <i8*> [#uses=3]
+  switch i32 %ineq.0.ph, label %bb8.backedge [
+    i32 0, label %bb3
+    i32 1, label %bb6
+  ]
+
+bb8.backedge:                                     ; preds = %bb6, %bb5, %bb2
+  br label %bb8
+
+bb3:                                              ; preds = %bb2
+  %4 = icmp eq i32 %8, %0                         ; <i1> [#uses=1]
+  br i1 %4, label %bb8.outer.loopexit, label %bb5
+
+bb5:                                              ; preds = %bb3
+  br i1 %6, label %bb6, label %bb8.backedge
+
+bb6:                                              ; preds = %bb5, %bb2
+  %5 = icmp eq i32 %8, %1                         ; <i1> [#uses=1]
+  br i1 %5, label %bb7, label %bb8.backedge
+
+bb7:                                              ; preds = %bb6
+  %.lcssa1 = phi i8* [ %3, %bb6 ]                 ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer.backedge:                               ; preds = %bb8.outer.loopexit, %bb7
+  %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; <i8*> [#uses=1]
+  %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb8.outer.loopexit:                               ; preds = %bb3
+  %.lcssa = phi i8* [ %3, %bb3 ]                  ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer:                                        ; preds = %bb8.outer.backedge, %entry
+  %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; <i32> [#uses=3]
+  %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; <i8*> [#uses=1]
+  %6 = icmp eq i32 %ineq.0.ph, 1                  ; <i1> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8.outer, %bb8.backedge
+  %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
+  %7 = load i8* %p_addr.0, align 1                ; <i8> [#uses=2]
+  %8 = sext i8 %7 to i32                          ; <i32> [#uses=2]
+  %9 = icmp eq i8 %7, 0                           ; <i1> [#uses=1]
+  br i1 %9, label %bb10, label %bb
+
+bb10:                                             ; preds = %bb8, %bb
+  %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ]  ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+; This is a simplified form of ineqn from above. It triggers some
+; different cases in the loop-unswitch code.
+
+define void @simplified_ineqn() nounwind readonly {
+entry:
+  br label %bb8.outer
+
+bb8.outer:                                        ; preds = %bb6, %bb2, %entry
+  %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+  br i1 undef, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  switch i32 %x, label %bb6 [
+    i32 0, label %bb8.outer
+  ]
+
+bb6:                                              ; preds = %bb2
+  br i1 undef, label %bb8.outer, label %bb2
+
+return:                                             ; preds = %bb8, %bb
+  ret void
+}
+
+; This function requires special handling to preserve LCSSA form.
+; PR4934
+
+define void @pnp_check_irq() nounwind noredzone {
+entry:
+  %conv56 = trunc i64 undef to i32                ; <i32> [#uses=1]
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.cond.i.backedge, %entry
+  %call.i25 = call i8* @pci_get_device() nounwind noredzone ; <i8*> [#uses=2]
+  br i1 undef, label %if.then65, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge
+
+while.cond.i.backedge:                            ; preds = %if.then31.i.i, %while.body.i
+  br label %while.cond.i
+
+if.then31.i.i:                                    ; preds = %while.body.i
+  switch i32 %conv56, label %while.cond.i.backedge [
+    i32 14, label %if.then42.i.i
+    i32 15, label %if.then42.i.i
+  ]
+
+if.then42.i.i:                                    ; preds = %if.then31.i.i, %if.then31.i.i
+  %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; <i8*> [#uses=0]
+  unreachable
+
+if.then65:                                        ; preds = %while.cond.i
+  unreachable
+}
+
+declare i8* @pci_get_device() noredzone
diff --git a/test/Transforms/LowerInvoke/2003-12-10-Crash.ll b/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
index 1e8b880485e4..31f3d42225ab 100644
--- a/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
+++ b/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
@@ -1,6 +1,6 @@
 ; This testcase was reduced from Shootout-C++/reversefile.cpp by bugpoint
 
-; RUN: llvm-as < %s | opt -lowerinvoke -disable-output
+; RUN: opt < %s -lowerinvoke -disable-output
 
 declare void @baz()
 
diff --git a/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll b/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
index 07770136964e..bddb70248ed8 100644
--- a/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
+++ b/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerinvoke -enable-correct-eh-support -disable-output
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
 
 define void @_ZNKSt11__use_cacheISt16__numpunct_cacheIcEEclERKSt6locale() {
 entry:
diff --git a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
index 10c878537289..1057ad7057cc 100644
--- a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
+++ b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerinvoke -enable-correct-eh-support -disable-output
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
 
 declare void @ll_listnext__listiterPtr()
 
diff --git a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
index 0a8ccbe3e61e..940204649c74 100644
--- a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
+++ b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerinvoke -enable-correct-eh-support -disable-output
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
 
 declare fastcc i32 @ll_listnext__listiterPtr()
 
diff --git a/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll b/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
index fa8253986533..b46ccfbb79a8 100644
--- a/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
+++ b/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerinvoke -enable-correct-eh-support -disable-output
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
 ; PR2029
 define i32 @main(i32 %argc, i8** %argv) {
 bb470:
diff --git a/test/Transforms/LowerInvoke/basictest.ll b/test/Transforms/LowerInvoke/basictest.ll
index a9c19f2790d5..f0ca5f425311 100644
--- a/test/Transforms/LowerInvoke/basictest.ll
+++ b/test/Transforms/LowerInvoke/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerinvoke -disable-output -enable-correct-eh-support
+; RUN: opt < %s -lowerinvoke -disable-output -enable-correct-eh-support
 
 
 define i32 @foo() {
diff --git a/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll b/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll
index 3be2163aa469..9180c15b18f2 100644
--- a/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll
+++ b/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowersetjmp -disable-output
+; RUN: opt < %s -lowersetjmp -disable-output
 
 	%struct.jmpenv = type { i32, i8 }
 
diff --git a/test/Transforms/LowerSetJmp/simpletest.ll b/test/Transforms/LowerSetJmp/simpletest.ll
index 9ff5f29c238e..1430dffdfe3f 100644
--- a/test/Transforms/LowerSetJmp/simpletest.ll
+++ b/test/Transforms/LowerSetJmp/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowersetjmp | llvm-dis | grep invoke
+; RUN: opt < %s -lowersetjmp -S | grep invoke
 
 	%JmpBuf = type i32
 @.str_1 = internal constant [13 x i8] c"returned %d\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll b/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
index cb5f090864b7..d143ab050358 100644
--- a/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
+++ b/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerswitch
+; RUN: opt < %s -lowerswitch
 
 define void @child(i32 %ct.1) {
 entry:
diff --git a/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll b/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
index 71e5a4bcc4d7..61e1dcd345bd 100644
--- a/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
+++ b/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerswitch
+; RUN: opt < %s -lowerswitch
 
 define void @test() {
 	switch i32 0, label %Next [
diff --git a/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll b/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
index 2fc1c9801914..964b07e30669 100644
--- a/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
+++ b/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -lowerswitch -disable-output
+; RUN: opt < %s -lowerswitch -disable-output
 
 define void @solve() {
 entry:
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index 7523ad235b6d..cdfa0f371d66 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | opt -lowerswitch | llvm-dis > %t
+; RUN: opt < %s -lowerswitch -S > %t
 ; RUN: grep slt %t | count 10
 ; RUN: grep ule %t | count 3
 ; RUN: grep eq  %t | count 9
diff --git a/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll b/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
index b0d2c7913363..777f3757bb89 100644
--- a/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
+++ b/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
@@ -1,6 +1,6 @@
 ; Uninitialized values are not handled correctly.
 ;
-; RUN: llvm-as < %s | opt -mem2reg -disable-output
+; RUN: opt < %s -mem2reg -disable-output
 ;
 
 define i32 @test() {
diff --git a/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll b/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
index c649d6a5f443..89bd4928a476 100644
--- a/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
+++ b/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
@@ -1,7 +1,7 @@
 ; This input caused the mem2reg pass to die because it was trying to promote
 ; the %r alloca, even though it is invalid to do so in this case!
 ;
-; RUN: llvm-as < %s | opt -mem2reg
+; RUN: opt < %s -mem2reg
 
 define void @test() {
 	%r = alloca i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll b/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
index 0dd27614104d..3665483458cc 100644
--- a/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
+++ b/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mem2reg
+; RUN: opt < %s -mem2reg
 
 define void @_Z3barv() {
 	%result = alloca i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll b/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
index 78bfcb4197b2..36bd9e64991e 100644
--- a/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
+++ b/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
@@ -1,5 +1,5 @@
 ; This testcases makes sure that mem2reg can handle unreachable blocks.
-; RUN: llvm-as < %s | opt -mem2reg
+; RUN: opt < %s -mem2reg
 
 define i32 @test() {
 	%X = alloca i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll b/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
index 04e8db3c8de5..f5f1ee34365d 100644
--- a/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
+++ b/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
@@ -1,7 +1,7 @@
 ; Mem2reg used to only add one incoming value to a PHI node, even if it had
 ; multiple incoming edges from a block.
 ;
-; RUN: llvm-as < %s | opt -mem2reg -disable-output
+; RUN: opt < %s -mem2reg -disable-output
 
 define i32 @test(i1 %c1, i1 %c2) {
 	%X = alloca i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll b/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
index 27d00150d685..e82caa9fe0f6 100644
--- a/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
+++ b/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
@@ -1,5 +1,5 @@
 ; Promoting some values allows promotion of other values.
-; RUN: llvm-as < %s | opt -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -mem2reg -S | not grep alloca
 
 define i32 @test2() {
 	%result = alloca i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll b/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
index 5a27b20ca509..1d38efc74576 100644
--- a/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
+++ b/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
@@ -1,7 +1,7 @@
 ; Mem2reg should not insert dead PHI nodes!  The naive algorithm inserts a PHI
 ;  node in L3, even though there is no load of %A in anything dominated by L3.
 
-; RUN: llvm-as < %s | opt -mem2reg | llvm-dis | not grep phi
+; RUN: opt < %s -mem2reg -S | not grep phi
 
 define void @test(i32 %B, i1 %C) {
 	%A = alloca i32		; <i32*> [#uses=4]
diff --git a/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll b/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
index b053aa162f5b..74355961fbf9 100644
--- a/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
+++ b/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mem2reg -instcombine | llvm-dis | grep store
+; RUN: opt < %s -mem2reg -instcombine -S | grep store
 ; PR590
 
 
diff --git a/test/Transforms/Mem2Reg/2005-11-28-Crash.ll b/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
index a985db3eb8c7..8fd3351ba42d 100644
--- a/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
+++ b/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mem2reg -disable-output
+; RUN: opt < %s -mem2reg -disable-output
 ; PR670
 
 define void @printk(i32, ...) {
diff --git a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index 579ed41c94c1..50683cf8baa0 100644
--- a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llvm-dis | grep volatile | count 3
+; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
 ; PR1520
 ; Don't promote volatile loads/stores. This is really needed to handle setjmp/lonjmp properly.
 
diff --git a/test/Transforms/Mem2Reg/PromoteMemToRegister.ll b/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
index 63b8c783c256..1be6b03beec6 100644
--- a/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
+++ b/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
@@ -1,5 +1,5 @@
 ; Simple sanity check testcase.  Both alloca's should be eliminated.
-; RUN: llvm-as < %s | opt -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -mem2reg -S | not grep alloca
 
 define double @testfunc(i32 %i, double %j) {
 	%I = alloca i32		; <i32*> [#uses=4]
diff --git a/test/Transforms/Mem2Reg/UndefValuesMerge.ll b/test/Transforms/Mem2Reg/UndefValuesMerge.ll
index 0d20d7df3458..5013229b77f9 100644
--- a/test/Transforms/Mem2Reg/UndefValuesMerge.ll
+++ b/test/Transforms/Mem2Reg/UndefValuesMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mem2reg | llvm-dis | not grep phi
+; RUN: opt < %s -mem2reg -S | not grep phi
 
 define i32 @testfunc(i1 %C, i32 %i, i8 %j) {
 	%I = alloca i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/Mem2Reg/crash.ll b/test/Transforms/Mem2Reg/crash.ll
new file mode 100644
index 000000000000..ce795aaaca4b
--- /dev/null
+++ b/test/Transforms/Mem2Reg/crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -mem2reg -S
+; PR5023
+
+declare i32 @bar()
+
+define i32 @foo() {
+entry:
+  %whichFlag = alloca i32
+  %A = invoke i32 @bar()
+          to label %invcont2 unwind label %lpad86
+
+invcont2:
+  store i32 %A, i32* %whichFlag
+  br label %bb15
+
+bb15:
+  %B = load i32* %whichFlag
+  ret i32 %B
+
+lpad86:
+  br label %bb15
+  
+}
+
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 30d0a6dbebb5..30c27137d909 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -memcpyopt -dse | llvm-dis | grep {call.*initialize} | not grep memtmp
+; RUN: opt < %s -memcpyopt -dse -S | grep {call.*initialize} | not grep memtmp
 ; PR2077
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 12c9a9d23e53..13205e6854f0 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep {call.*memcpy.}
+; RUN: opt < %s -memcpyopt -S | not grep {call.*memcpy.}
 	%a = type { i32 }
 	%b = type { float }
 
diff --git a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll b/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
index 1d9a35d63f28..4fec169fd06a 100644
--- a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
+++ b/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*memcpy.*agg.result}
+; RUN: opt < %s -memcpyopt -S | grep {call.*memcpy.*agg.result}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll
new file mode 100644
index 000000000000..a9d03378521f
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/align.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -S -memcpyopt | FileCheck %s
+
+; The resulting memset is only 4-byte aligned, despite containing
+; a 16-byte alignmed store in the middle.
+
+; CHECK: call void @llvm.memset.i64(i8* %a01, i8 0, i64 16, i32 4)
+
+define void @foo(i32* %p) {
+  %a0 = getelementptr i32* %p, i64 0
+  store i32 0, i32* %a0, align 4
+  %a1 = getelementptr i32* %p, i64 1
+  store i32 0, i32* %a1, align 16
+  %a2 = getelementptr i32* %p, i64 2
+  store i32 0, i32* %a2, align 4
+  %a3 = getelementptr i32* %p, i64 3
+  store i32 0, i32* %a3, align 4
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/crash.ll b/test/Transforms/MemCpyOpt/crash.ll
new file mode 100644
index 000000000000..bf5b23467577
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/crash.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -memcpyopt -disable-output
+; PR4882
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.qw = type { [4 x float] }
+%struct.bar = type { %struct.qw, %struct.qw, %struct.qw, %struct.qw, %struct.qw, float, float}
+
+define arm_aapcs_vfpcc void @test1(%struct.bar* %this) {
+entry:
+  %0 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 0
+  store float 0.000000e+00, float* %0, align 4
+  %1 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 1
+  store float 0.000000e+00, float* %1, align 4
+  %2 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 2
+  store float 0.000000e+00, float* %2, align 4
+  %3 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 3
+  store float 0.000000e+00, float* %3, align 4
+  %4 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 0
+  store float 0.000000e+00, float* %4, align 4
+  %5 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 1
+  store float 0.000000e+00, float* %5, align 4
+  %6 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 2
+  store float 0.000000e+00, float* %6, align 4
+  %7 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 3
+  store float 0.000000e+00, float* %7, align 4
+  %8 = getelementptr inbounds %struct.bar* %this, i32 0, i32 3, i32 0, i32 1
+  store float 0.000000e+00, float* %8, align 4
+  %9 = getelementptr inbounds %struct.bar* %this, i32 0, i32 3, i32 0, i32 2
+  store float 0.000000e+00, float* %9, align 4
+  %10 = getelementptr inbounds %struct.bar* %this, i32 0, i32 3, i32 0, i32 3
+  store float 0.000000e+00, float* %10, align 4
+  %11 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 0
+  store float 0.000000e+00, float* %11, align 4
+  %12 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 1
+  store float 0.000000e+00, float* %12, align 4
+  %13 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 2
+  store float 0.000000e+00, float* %13, align 4
+  %14 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 3
+  store float 0.000000e+00, float* %14, align 4
+  %15 = getelementptr inbounds %struct.bar* %this, i32 0, i32 5
+  store float 0.000000e+00, float* %15, align 4
+  unreachable
+}
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index ffacb8565c21..eb8dbe3a614f 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep store
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*llvm.memset}
+; RUN: opt < %s -memcpyopt -S | not grep store
+; RUN: opt < %s -memcpyopt -S | grep {call.*llvm.memset}
 
 ; All the stores in this example should be merged into a single memset.
 
diff --git a/test/Transforms/MemCpyOpt/form-memset2.ll b/test/Transforms/MemCpyOpt/form-memset2.ll
index 719cd47b0b7b..c90af9c73ca3 100644
--- a/test/Transforms/MemCpyOpt/form-memset2.ll
+++ b/test/Transforms/MemCpyOpt/form-memset2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep store
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*llvm.memset} | count 3
+; RUN: opt < %s -memcpyopt -S | not grep store
+; RUN: opt < %s -memcpyopt -S | grep {call.*llvm.memset} | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 94daee0149ed..724acfab4750 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -memcpyopt -dse | llvm-dis | grep {call.*memcpy} | count 1
+; RUN: opt < %s -memcpyopt -dse -S | grep {call.*memcpy} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
new file mode 100644
index 000000000000..73bbf0bd2e4e
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+; These memmoves should get optimized to memcpys.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.0"
+
+declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+define i8* @test1(i8* nocapture %src) nounwind {
+entry:
+; CHECK: @test1
+; CHECK: call void @llvm.memcpy
+
+  %call3 = malloc [13 x i8]                       ; <[13 x i8]*> [#uses=1]
+  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
+  tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+  ret i8* %call3.sub
+}
+
+define void @test2(i8* %P) nounwind {
+entry:
+; CHECK: @test2
+; CHECK: call void @llvm.memcpy
+  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
+  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+  ret void
+}
+
+; This cannot be optimize because the src/dst really do overlap.
+define void @test3(i8* %P) nounwind {
+entry:
+; CHECK: @test3
+; CHECK: call void @llvm.memmove
+  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
+  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index ad9fb1b21593..5002875ae328 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep {call.*memcpy}
+; RUN: opt < %s -memcpyopt -S | not grep {call.*memcpy}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
diff --git a/test/Transforms/MergeFunc/fold-weak.ll b/test/Transforms/MergeFunc/fold-weak.ll
index cea49fb1ceca..e12473125c78 100644
--- a/test/Transforms/MergeFunc/fold-weak.ll
+++ b/test/Transforms/MergeFunc/fold-weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mergefunc | llvm-dis > %t
+; RUN: opt < %s -mergefunc -S > %t
 ; RUN: grep {define weak} %t | count 2
 ; RUN: grep {call} %t | count 2
 
diff --git a/test/Transforms/MergeFunc/phi-speculation1.ll b/test/Transforms/MergeFunc/phi-speculation1.ll
index 9ba3081a901c..7b2a2fe5d52f 100644
--- a/test/Transforms/MergeFunc/phi-speculation1.ll
+++ b/test/Transforms/MergeFunc/phi-speculation1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mergefunc -stats -disable-output |& not grep {functions merged}
+; RUN: opt < %s -mergefunc -stats -disable-output |& not grep {functions merged}
 
 define i32 @foo1(i32 %x) {
 entry:
diff --git a/test/Transforms/MergeFunc/phi-speculation2.ll b/test/Transforms/MergeFunc/phi-speculation2.ll
index d8c6e30b9757..f080191ef860 100644
--- a/test/Transforms/MergeFunc/phi-speculation2.ll
+++ b/test/Transforms/MergeFunc/phi-speculation2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -mergefunc -stats -disable-output |& grep {functions merged}
+; RUN: opt < %s -mergefunc -stats -disable-output |& grep {functions merged}
 
 define i32 @foo1(i32 %x) {
 entry:
diff --git a/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll b/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
index 7fc912563e1f..679eafd59363 100644
--- a/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
+++ b/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh | llvm-dis | grep invoke
+; RUN: opt < %s -prune-eh -S | grep invoke
 
 declare void @External()
 
diff --git a/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll b/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll
index 8333cd8f722c..a01070308bed 100644
--- a/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll
+++ b/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh -disable-output
+; RUN: opt < %s -prune-eh -disable-output
 
 define internal void @callee() {
 	ret void
diff --git a/test/Transforms/PruneEH/2008-06-02-Weak.ll b/test/Transforms/PruneEH/2008-06-02-Weak.ll
index 133200f28471..fb97ae870831 100644
--- a/test/Transforms/PruneEH/2008-06-02-Weak.ll
+++ b/test/Transforms/PruneEH/2008-06-02-Weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh | llvm-dis | not grep nounwind
+; RUN: opt < %s -prune-eh -S | not grep nounwind
 
 define weak void @f() {
 entry:
diff --git a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
index 74434f4d952a..347af8f8463c 100644
--- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
+++ b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh -inline -print-callgraph \
+; RUN: opt < %s -prune-eh -inline -print-callgraph \
 ; RUN:   -disable-output |& \
 ; RUN:     grep {Calls.*ce3806g__fxio__put__put_int64__4.1339} | count 2
 	%struct.FRAME.ce3806g = type { %struct.string___XUB, %struct.string___XUB, %struct.string___XUB, %struct.string___XUB }
diff --git a/test/Transforms/PruneEH/recursivetest.ll b/test/Transforms/PruneEH/recursivetest.ll
index 231c7b373819..724c7cf224c1 100644
--- a/test/Transforms/PruneEH/recursivetest.ll
+++ b/test/Transforms/PruneEH/recursivetest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh | llvm-dis | not grep invoke
+; RUN: opt < %s -prune-eh -S | not grep invoke
 
 define internal i32 @foo() {
 	invoke i32 @foo( )
diff --git a/test/Transforms/PruneEH/simplenoreturntest.ll b/test/Transforms/PruneEH/simplenoreturntest.ll
index d2bd50cf5da7..6cdd42fff849 100644
--- a/test/Transforms/PruneEH/simplenoreturntest.ll
+++ b/test/Transforms/PruneEH/simplenoreturntest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh | llvm-dis | not grep {ret i32}
+; RUN: opt < %s -prune-eh -S | not grep {ret i32}
 
 declare void @noreturn() noreturn;
 
diff --git a/test/Transforms/PruneEH/simpletest.ll b/test/Transforms/PruneEH/simpletest.ll
index 678e8d76b680..77c429dae6d6 100644
--- a/test/Transforms/PruneEH/simpletest.ll
+++ b/test/Transforms/PruneEH/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -prune-eh | llvm-dis | not grep invoke
+; RUN: opt < %s -prune-eh -S | not grep invoke
 
 declare void @nounwind() nounwind
 
diff --git a/test/Transforms/RaiseAllocations/2004-11-08-FreeUseCrash.ll b/test/Transforms/RaiseAllocations/2004-11-08-FreeUseCrash.ll
index 10daa7620d22..75e02e84fac9 100644
--- a/test/Transforms/RaiseAllocations/2004-11-08-FreeUseCrash.ll
+++ b/test/Transforms/RaiseAllocations/2004-11-08-FreeUseCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -raiseallocs -disable-output
+; RUN: opt < %s -raiseallocs -disable-output
 
 define void @main() {
 	%tmp.13 = call i32 (...)* @free( i32 32 )		; <i32> [#uses=1]
diff --git a/test/Transforms/RaiseAllocations/2007-10-17-InvokeFree.ll b/test/Transforms/RaiseAllocations/2007-10-17-InvokeFree.ll
index 513cf6f339b1..675bb3d829f7 100644
--- a/test/Transforms/RaiseAllocations/2007-10-17-InvokeFree.ll
+++ b/test/Transforms/RaiseAllocations/2007-10-17-InvokeFree.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as <%s | opt -raiseallocs -stats -disable-output |&  \
+; RUN: opt < %s -raiseallocs -stats -disable-output |&  \
 ; RUN:  not grep {Number of allocations raised}
 define void @foo() {
 entry:
diff --git a/test/Transforms/RaiseAllocations/FreeCastConstantExpr.ll b/test/Transforms/RaiseAllocations/FreeCastConstantExpr.ll
index 4521581b3879..1cf072910a60 100644
--- a/test/Transforms/RaiseAllocations/FreeCastConstantExpr.ll
+++ b/test/Transforms/RaiseAllocations/FreeCastConstantExpr.ll
@@ -1,6 +1,6 @@
 ; This situation can occur due to the funcresolve pass.
 ;
-; RUN: llvm-as < %s | opt -raiseallocs | llvm-dis | not grep call
+; RUN: opt < %s -raiseallocs -S | not grep call
 
 declare void @free(i8*)
 
diff --git a/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll b/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
index 5465418d00de..5780990c7618 100644
--- a/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
+++ b/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -instcombine -constprop -dce | llvm-dis | not grep add
+; RUN: opt < %s -reassociate -instcombine -constprop -dce -S | not grep add
 
 define i32 @test(i32 %A) {
 	%X = add i32 %A, 1		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2002-05-15-MissedTree.ll b/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
index 79afb6e64c13..e8bccbde28e2 100644
--- a/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
+++ b/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -instcombine -constprop -die | llvm-dis | not grep 5
+; RUN: opt < %s -reassociate -instcombine -constprop -die -S | not grep 5
 
 define i32 @test(i32 %A, i32 %B) {
 	%W = add i32 %B, -5		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll b/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
index de0666e95617..c18af5e07efd 100644
--- a/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
+++ b/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
@@ -1,6 +1,6 @@
 ; With sub reassociation, constant folding can eliminate all of the constants.
 ;
-; RUN: llvm-as < %s | opt -reassociate -constprop -instcombine -dce | llvm-dis | not grep add
+; RUN: opt < %s -reassociate -constprop -instcombine -dce -S | not grep add
 
 define i32 @test(i32 %A, i32 %B) {
 	%W = add i32 5, %B		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll b/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
index 070080acf24f..5848821e10fd 100644
--- a/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
+++ b/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
@@ -1,6 +1,6 @@
 ; With sub reassociation, constant folding can eliminate the two 12 constants.
 ;
-; RUN: llvm-as < %s | opt -reassociate -constprop -dce | llvm-dis | not grep 12
+; RUN: opt < %s -reassociate -constprop -dce -S | not grep 12
 
 define i32 @test(i32 %A, i32 %B, i32 %C, i32 %D) {
 	%M = add i32 %A, 12		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll b/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll
index c19baeaae68f..bbb08f969853 100644
--- a/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll
+++ b/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll
@@ -1,6 +1,6 @@
 ; The reassociate pass is not preserving dominance properties correctly
 ;
-; RUN: llvm-as < %s | opt -reassociate
+; RUN: opt < %s -reassociate
 
 define i32 @compute_dist(i32 %i, i32 %j) {
 	%reg119 = sub i32 %j, %i		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll b/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll
index 41cba63d8fda..af7a821a4bab 100644
--- a/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll
+++ b/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -disable-output
+; RUN: opt < %s -reassociate -disable-output
 
 define i32 @test(i32 %A.1, i32 %B.1, i32 %C.1, i32 %D.1) {
 	%tmp.16 = and i32 %A.1, %B.1		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2005-08-24-Crash.ll b/test/Transforms/Reassociate/2005-08-24-Crash.ll
index 99c98ef5932c..9864de45f88e 100644
--- a/test/Transforms/Reassociate/2005-08-24-Crash.ll
+++ b/test/Transforms/Reassociate/2005-08-24-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -disable-output
+; RUN: opt < %s -reassociate -disable-output
 
 define void @test(i32 %a, i32 %b, i32 %c, i32 %d) {
 	%tmp.2 = xor i32 %a, %b		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
index d11a2ea767a9..33e44d4ba753 100644
--- a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
+++ b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis |\
+; RUN: opt < %s -reassociate -instcombine -S |\
 ; RUN:   grep {ret i32 0}
 
 define i32 @f(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index 3662e097c9f0..384cbc90a744 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -disable-output
+; RUN: opt < %s -reassociate -disable-output
 
 define void @foo() {
 	%tmp162 = fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index 09db47b1907c..eca2d6310d34 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -1,6 +1,6 @@
 ; With reassociation, constant folding can eliminate the 12 and -12 constants.
 ;
-; RUN: llvm-as < %s | opt -reassociate -constprop -instcombine -die | llvm-dis | not grep add
+; RUN: opt < %s -reassociate -constprop -instcombine -die -S | not grep add
 
 define i32 @test(i32 %arg) {
 	%tmp1 = sub i32 -12, %arg		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/basictest2.ll b/test/Transforms/Reassociate/basictest2.ll
index dbde3af35e1b..ba1ff9ec73fd 100644
--- a/test/Transforms/Reassociate/basictest2.ll
+++ b/test/Transforms/Reassociate/basictest2.ll
@@ -1,6 +1,6 @@
 ; With reassociation, constant folding can eliminate the +/- 30 constants.
 ;
-; RUN: llvm-as < %s | opt -reassociate -constprop -instcombine -die | llvm-dis | not grep 30
+; RUN: opt < %s -reassociate -constprop -instcombine -die -S | not grep 30
 
 define i32 @test(i32 %reg109, i32 %reg1111) {
 	%reg115 = add i32 %reg109, -30		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/basictest3.ll b/test/Transforms/Reassociate/basictest3.ll
index a88030db2f9a..92285fbbb63e 100644
--- a/test/Transforms/Reassociate/basictest3.ll
+++ b/test/Transforms/Reassociate/basictest3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -gvn | llvm-dis | grep add | count 6
+; RUN: opt < %s -reassociate -gvn -S | grep add | count 6
 ; Each of these functions should turn into two adds each.
 
 @e = external global i32		; <i32*> [#uses=3]
diff --git a/test/Transforms/Reassociate/basictest4.ll b/test/Transforms/Reassociate/basictest4.ll
index 608becd9c3a9..88dbdf781504 100644
--- a/test/Transforms/Reassociate/basictest4.ll
+++ b/test/Transforms/Reassociate/basictest4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -gvn -instcombine | llvm-dis | not grep add
+; RUN: opt < %s -reassociate -gvn -instcombine -S | not grep add
 
 @a = weak global i32 0		; <i32*> [#uses=1]
 @b = weak global i32 0		; <i32*> [#uses=1]
diff --git a/test/Transforms/Reassociate/inverses.ll b/test/Transforms/Reassociate/inverses.ll
index 5b08d0eec675..fa1a4bd1d01e 100644
--- a/test/Transforms/Reassociate/inverses.ll
+++ b/test/Transforms/Reassociate/inverses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -dce | llvm-dis | \
+; RUN: opt < %s -reassociate -dce -S | \
 ; RUN:   not grep {\\(and\\|sub\\)}
 
 define i32 @test1(i32 %a, i32 %b) {
diff --git a/test/Transforms/Reassociate/looptest.ll b/test/Transforms/Reassociate/looptest.ll
index 3b28c2389538..91723bc37b01 100644
--- a/test/Transforms/Reassociate/looptest.ll
+++ b/test/Transforms/Reassociate/looptest.ll
@@ -12,7 +12,7 @@
 ; In this case, we want to reassociate the specified expr so that i+j can be
 ; hoisted out of the inner most loop.
 ;
-; RUN: llvm-as < %s | opt -reassociate | llvm-dis | grep 115 | not grep 117
+; RUN: opt < %s -reassociate -S | grep 115 | not grep 117
 ; END.
 @.LC0 = internal global [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/Reassociate/mul-factor3.ll b/test/Transforms/Reassociate/mul-factor3.ll
index e45e431df545..4d0517618e00 100644
--- a/test/Transforms/Reassociate/mul-factor3.ll
+++ b/test/Transforms/Reassociate/mul-factor3.ll
@@ -1,7 +1,6 @@
 ; This should be one add and two multiplies.
 
-; RUN: llvm-as < %s | \
-; RUN:   opt -reassociate -instcombine | llvm-dis > %t 
+; RUN: opt < %s -reassociate -instcombine -S > %t
 ; RUN: grep mul %t | count 2
 ; RUN: grep add %t | count 1
 
diff --git a/test/Transforms/Reassociate/mul-neg-add.ll b/test/Transforms/Reassociate/mul-neg-add.ll
index 1899be79e112..dd6ddd9b62ec 100644
--- a/test/Transforms/Reassociate/mul-neg-add.ll
+++ b/test/Transforms/Reassociate/mul-neg-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis |\
+; RUN: opt < %s -reassociate -instcombine -S |\
 ; RUN:   not grep {sub i32 0}
 
 define i32 @test(i32 %X, i32 %Y, i32 %Z) {
diff --git a/test/Transforms/Reassociate/mulfactor.ll b/test/Transforms/Reassociate/mulfactor.ll
index f80019c95e36..f279727c9930 100644
--- a/test/Transforms/Reassociate/mulfactor.ll
+++ b/test/Transforms/Reassociate/mulfactor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis | grep mul | count 2
+; RUN: opt < %s -reassociate -instcombine -S | grep mul | count 2
 
 ; This should have exactly 2 multiplies when we're done.
 
diff --git a/test/Transforms/Reassociate/mulfactor2.ll b/test/Transforms/Reassociate/mulfactor2.ll
index 62a890b90b68..8116554196fb 100644
--- a/test/Transforms/Reassociate/mulfactor2.ll
+++ b/test/Transforms/Reassociate/mulfactor2.ll
@@ -1,7 +1,6 @@
 ; This should turn into one multiply and one add.
 
-; RUN: llvm-as < %s | \
-; RUN:   opt -instcombine -reassociate -instcombine | llvm-dis > %t 
+; RUN: opt < %s -instcombine -reassociate -instcombine -S > %t
 ; RUN: grep mul %t | count 1
 ; RUN: grep add %t | count 1
 
diff --git a/test/Transforms/Reassociate/negation.ll b/test/Transforms/Reassociate/negation.ll
index 6b2041898a1d..6a3dfd3b8206 100644
--- a/test/Transforms/Reassociate/negation.ll
+++ b/test/Transforms/Reassociate/negation.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis | not grep sub
+; RUN: opt < %s -reassociate -instcombine -S | not grep sub
 
 ; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
 
diff --git a/test/Transforms/Reassociate/otherops.ll b/test/Transforms/Reassociate/otherops.ll
index 3c9b0744a8fc..d68d00818cb8 100644
--- a/test/Transforms/Reassociate/otherops.ll
+++ b/test/Transforms/Reassociate/otherops.ll
@@ -1,6 +1,6 @@
 ; Reassociation should apply to Add, Mul, And, Or, & Xor
 ;
-; RUN: llvm-as < %s | opt -reassociate -constprop -instcombine -die | llvm-dis | not grep 12
+; RUN: opt < %s -reassociate -constprop -instcombine -die -S | not grep 12
 
 define i32 @test_mul(i32 %arg) {
 	%tmp1 = mul i32 12, %arg		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/shift-factor.ll b/test/Transforms/Reassociate/shift-factor.ll
index 05437fd86639..73af5e5304ee 100644
--- a/test/Transforms/Reassociate/shift-factor.ll
+++ b/test/Transforms/Reassociate/shift-factor.ll
@@ -1,6 +1,5 @@
 ; There should be exactly one shift and one add left.
-; RUN: llvm-as < %s | \
-; RUN:   opt -reassociate -instcombine | llvm-dis > %t  
+; RUN: opt < %s -reassociate -instcombine -S > %t
 ; RUN: grep shl %t | count 1
 ; RUN: grep add %t | count 1
 
diff --git a/test/Transforms/Reassociate/shifttest.ll b/test/Transforms/Reassociate/shifttest.ll
index fd49e7d3f223..8b2cbc98c47a 100644
--- a/test/Transforms/Reassociate/shifttest.ll
+++ b/test/Transforms/Reassociate/shifttest.ll
@@ -1,6 +1,6 @@
 ; With shl->mul reassociation, we can see that this is (shl A, 9) * A
 ;
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis |\
+; RUN: opt < %s -reassociate -instcombine -S |\
 ; RUN:    grep {shl .*, 9}
 
 define i32 @test(i32 %A, i32 %B) {
diff --git a/test/Transforms/Reassociate/subtest.ll b/test/Transforms/Reassociate/subtest.ll
index 513984e7722d..4c63d1238a67 100644
--- a/test/Transforms/Reassociate/subtest.ll
+++ b/test/Transforms/Reassociate/subtest.ll
@@ -1,6 +1,6 @@
 ; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
 ;
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis | not grep 12
+; RUN: opt < %s -reassociate -instcombine -S | not grep 12
 
 define i32 @test(i32 %A, i32 %B) {
 	%X = add i32 -12, %A		; <i32> [#uses=1]
diff --git a/test/Transforms/Reassociate/subtest2.ll b/test/Transforms/Reassociate/subtest2.ll
index c81e9b38def9..0513c5fc1b63 100644
--- a/test/Transforms/Reassociate/subtest2.ll
+++ b/test/Transforms/Reassociate/subtest2.ll
@@ -1,6 +1,6 @@
 ; With sub reassociation, constant folding can eliminate the uses of %a.
 ;
-; RUN: llvm-as < %s | opt -reassociate -instcombine | llvm-dis | grep %a | count 1
+; RUN: opt < %s -reassociate -instcombine -S | grep %a | count 1
 ; PR2047
 
 define i32 @test(i32 %a, i32 %b, i32 %c) nounwind  {
diff --git a/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll b/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
index 13b45f59a94f..bb0cf04f67b1 100644
--- a/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
+++ b/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
@@ -5,7 +5,7 @@
 ; real benchmark (mst from Olden benchmark, MakeGraph function).  When SCCP is
 ; fixed, this should be eliminated by a single SCCP application.
 ;
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep loop
+; RUN: opt < %s -sccp -S | not grep loop
 
 define i32* @test() {
 bb1:
diff --git a/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll b/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll
index fd4083d1374c..bb5b51d1bbb1 100644
--- a/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll
+++ b/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep sub
+; RUN: opt < %s -sccp -S | not grep sub
 
 define void @test3(i32, i32) {
 	add i32 0, 0		; <i32>:3 [#uses=0]
diff --git a/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll b/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll
index 61bf5db44441..f6198025b80b 100644
--- a/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll
+++ b/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll
@@ -1,7 +1,7 @@
 ; This test shows a case where SCCP is incorrectly eliminating the PHI node
 ; because it thinks it has a constant 0 value, when it really doesn't.
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep phi
+; RUN: opt < %s -sccp -S | grep phi
 
 define i32 @test(i32 %A, i1 %c) {
 bb1:
diff --git a/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll b/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll
index b46d23a9dcf3..f02a29379b86 100644
--- a/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll
+++ b/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll
@@ -2,7 +2,7 @@
 ; this is in fact NOT the case, so the return should still be alive in the code
 ; after sccp and CFG simplification have been performed.
 ;
-; RUN: llvm-as < %s | opt -sccp -simplifycfg | llvm-dis | \
+; RUN: opt < %s -sccp -simplifycfg -S | \
 ; RUN:   grep ret
 
 define void @old_main() {
diff --git a/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll b/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll
index c5a7c790bd26..6aaf33ec206c 100644
--- a/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll
+++ b/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep %X
+; RUN: opt < %s -sccp -S | not grep %X
 
 @G = external global [40 x i32]		; <[40 x i32]*> [#uses=1]
 
diff --git a/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll b/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
index 03ff4dde8a10..576f5d6504d1 100644
--- a/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
+++ b/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp -simplifycfg | llvm-dis | \
+; RUN: opt < %s -sccp -simplifycfg -S | \
 ; RUN:   not grep then:
 
 define void @cprop_test11(i32* %data.1) {
diff --git a/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll b/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll
index 020af7d16c7d..9876375ae88a 100644
--- a/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll
+++ b/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll
@@ -1,5 +1,5 @@
 ; The PHI cannot be eliminated from this testcase, SCCP is mishandling invoke's!
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep phi
+; RUN: opt < %s -sccp -S | grep phi
 
 declare void @foo()
 
diff --git a/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll b/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll
index cf64677334a2..5d2c78ef874c 100644
--- a/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll
+++ b/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp -disable-output
+; RUN: opt < %s -sccp -disable-output
 
 declare i32 @foo()
 
diff --git a/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll b/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
index 6592e8b08925..4adfde3bfe9c 100644
--- a/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
+++ b/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep {ret i32 1}
+; RUN: opt < %s -sccp -S | grep {ret i32 1}
 
 ; This function definitely returns 1, even if we don't know the direction
 ; of the branch.
diff --git a/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll b/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
index 299b79f53e21..47f9cb45cc67 100644
--- a/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
+++ b/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp -disable-output
+; RUN: opt < %s -sccp -disable-output
 ; END.
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
diff --git a/test/Transforms/SCCP/2006-12-04-PackedType.ll b/test/Transforms/SCCP/2006-12-04-PackedType.ll
index 0e268c2db839..cee334987218 100644
--- a/test/Transforms/SCCP/2006-12-04-PackedType.ll
+++ b/test/Transforms/SCCP/2006-12-04-PackedType.ll
@@ -1,7 +1,7 @@
 ; Test VectorType handling by SCCP.
 ; SCCP ignores VectorTypes until PR 1034 is fixed
 ;
-; RUN: llvm-as < %s | opt -sccp
+; RUN: opt < %s -sccp
 ; END.
 
 target datalayout = "E-p:32:32"
diff --git a/test/Transforms/SCCP/2006-12-19-UndefBug.ll b/test/Transforms/SCCP/2006-12-19-UndefBug.ll
index 4d686690b98f..ec69ce05fe94 100644
--- a/test/Transforms/SCCP/2006-12-19-UndefBug.ll
+++ b/test/Transforms/SCCP/2006-12-19-UndefBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | \
+; RUN: opt < %s -sccp -S | \
 ; RUN:   grep {ret i1 false}
 
 define i1 @foo() {
diff --git a/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll b/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll
index c099b88044c0..b84fe6db2f21 100644
--- a/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll
+++ b/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s  | opt -sccp -disable-output
-; PR 1431
+; RUN: opt < %s -sccp -disable-output
+; PR1431
 
 define void @_ada_bench() {
 entry:
diff --git a/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll b/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll
index 18ab5e8cdc5b..aa613dca5d11 100644
--- a/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll
+++ b/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep undef | count 1
+; RUN: opt < %s -sccp -S | grep undef | count 1
 ; PR1938
 
 define i32 @main() {
diff --git a/test/Transforms/SCCP/2008-03-10-sret.ll b/test/Transforms/SCCP/2008-03-10-sret.ll
index 0f1c3806ce3e..85bcaf4d2db4 100644
--- a/test/Transforms/SCCP/2008-03-10-sret.ll
+++ b/test/Transforms/SCCP/2008-03-10-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s  | opt -ipsccp | llvm-dis > %t
+; RUN: opt < %s -ipsccp -S > %t
 ; RUN: grep {ret i32 36} %t
 ; RUN: grep {%mrv = insertvalue %T undef, i32 18, 0} %t
 ; RUN: grep {%mrv1 = insertvalue %T %mrv, i32 17, 1} %t
diff --git a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
index 99f9136ea370..1b26ca9e194d 100644
--- a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
+++ b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s  | opt -sccp | llvm-dis | grep {ret i32 %Z}
+; RUN: opt < %s -sccp -S | grep {ret i32 %Z}
 ; rdar://5778210
 
 declare {i32, i32} @bar(i32 %A) 
diff --git a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
index f85a36260e0a..cd6cf9704a58 100644
--- a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
+++ b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep {ret i32 undef}
+; RUN: opt < %s -sccp -S | not grep {ret i32 undef}
 ; PR2358
 target datalayout =
 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
index 20f7c21b7c18..d23ee2b23d76 100644
--- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
+++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | grep {ret i32 42}
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | grep {ret i32 undef}
+; RUN: opt < %s -ipsccp -S | grep {ret i32 42}
+; RUN: opt < %s -ipsccp -S | grep {ret i32 undef}
 ; PR3325
 
 define i32 @main() {
diff --git a/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll b/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
index 480aca0d1acf..7aced6635272 100644
--- a/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
+++ b/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp -disable-output
+; RUN: opt < %s -sccp -disable-output
 ; PR4277
 
 define i32 @main() nounwind {
diff --git a/test/Transforms/SCCP/apint-array.ll b/test/Transforms/SCCP/apint-array.ll
index ead3f6ab9a66..1e75878f3aeb 100644
--- a/test/Transforms/SCCP/apint-array.ll
+++ b/test/Transforms/SCCP/apint-array.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep {ret i101 12}
+; RUN: opt < %s -sccp -S | grep {ret i101 12}
 
 @Y = constant [6 x i101] [ i101 12, i101 123456789000000, i101 -12,i101 
 -123456789000000, i101 0,i101 9123456789000000]
diff --git a/test/Transforms/SCCP/apint-basictest.ll b/test/Transforms/SCCP/apint-basictest.ll
index 12f8326a7375..c03bfef74301 100644
--- a/test/Transforms/SCCP/apint-basictest.ll
+++ b/test/Transforms/SCCP/apint-basictest.ll
@@ -1,7 +1,7 @@
 ; This is a basic sanity check for constant propogation.  The add instruction 
 ; should be eliminated.
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep add
+; RUN: opt < %s -sccp -S | not grep add
 
 define i128 @test(i1 %B) {
 	br i1 %B, label %BB1, label %BB2
diff --git a/test/Transforms/SCCP/apint-basictest2.ll b/test/Transforms/SCCP/apint-basictest2.ll
index 7db4e362eb4c..173482786f20 100644
--- a/test/Transforms/SCCP/apint-basictest2.ll
+++ b/test/Transforms/SCCP/apint-basictest2.ll
@@ -1,8 +1,8 @@
 ; This is a basic sanity check for constant propogation.  The add instruction 
 ; and phi instruction should be eliminated.
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep phi
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep add
+; RUN: opt < %s -sccp -S | not grep phi
+; RUN: opt < %s -sccp -S | not grep add
 
 define i128 @test(i1 %B) {
 	br i1 %B, label %BB1, label %BB2
diff --git a/test/Transforms/SCCP/apint-basictest3.ll b/test/Transforms/SCCP/apint-basictest3.ll
index 6e86764bf2cf..47671bf46b31 100644
--- a/test/Transforms/SCCP/apint-basictest3.ll
+++ b/test/Transforms/SCCP/apint-basictest3.ll
@@ -2,8 +2,8 @@
 ; arithmatic operations.
 
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep mul
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep umod
+; RUN: opt < %s -sccp -S | not grep mul
+; RUN: opt < %s -sccp -S | not grep umod
 
 define i128 @test(i1 %B) {
 	br i1 %B, label %BB1, label %BB2
diff --git a/test/Transforms/SCCP/apint-basictest4.ll b/test/Transforms/SCCP/apint-basictest4.ll
index a90bd7d7315b..41036ea002d2 100644
--- a/test/Transforms/SCCP/apint-basictest4.ll
+++ b/test/Transforms/SCCP/apint-basictest4.ll
@@ -2,9 +2,9 @@
 ; logic operations.
 
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep and
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep trunc
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep {ret i100 -1}
+; RUN: opt < %s -sccp -S | not grep and
+; RUN: opt < %s -sccp -S | not grep trunc
+; RUN: opt < %s -sccp -S | grep {ret i100 -1}
 
 define i100 @test(i133 %A) {
         %B = and i133 0, %A
diff --git a/test/Transforms/SCCP/apint-bigarray.ll b/test/Transforms/SCCP/apint-bigarray.ll
index 5cd1107a7a44..0dd9ad331a07 100644
--- a/test/Transforms/SCCP/apint-bigarray.ll
+++ b/test/Transforms/SCCP/apint-bigarray.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep %X
+; RUN: opt < %s -sccp -S | not grep %X
 
 @G =  global [1000000 x i10000] zeroinitializer
 
diff --git a/test/Transforms/SCCP/apint-bigint.ll b/test/Transforms/SCCP/apint-bigint.ll
index ebb0d0760f2e..36a96c335715 100644
--- a/test/Transforms/SCCP/apint-bigint.ll
+++ b/test/Transforms/SCCP/apint-bigint.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | opt -sccp | llvm-dis | not grep xor
+; RUN: opt < %s -sccp -S | not grep xor
 
 define i11129 @test1() {
         %B = shl i11129 1, 11128 
diff --git a/test/Transforms/SCCP/apint-bigint2.ll b/test/Transforms/SCCP/apint-bigint2.ll
index e95e9d0abb13..660eaad7bc12 100644
--- a/test/Transforms/SCCP/apint-bigint2.ll
+++ b/test/Transforms/SCCP/apint-bigint2.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | opt -sccp | llvm-dis | not grep load
+; RUN: opt < %s -sccp -S | not grep load
 
 @Y = constant [6 x i101] [ i101 12, i101 123456789000000, i101 -12,
                            i101 -123456789000000, i101 0,i101 9123456789000000]
diff --git a/test/Transforms/SCCP/apint-ipsccp1.ll b/test/Transforms/SCCP/apint-ipsccp1.ll
index 59cfa3f7a2f3..fda40f53fe1f 100644
--- a/test/Transforms/SCCP/apint-ipsccp1.ll
+++ b/test/Transforms/SCCP/apint-ipsccp1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | grep -v {ret i512 undef} | \
+; RUN: opt < %s -ipsccp -S | grep -v {ret i512 undef} | \
 ; RUN:   grep {ret i8 2}
 
 define internal i512 @test(i1 %B) {
diff --git a/test/Transforms/SCCP/apint-ipsccp2.ll b/test/Transforms/SCCP/apint-ipsccp2.ll
index 65e5ee7e89ec..3c02e05548db 100644
--- a/test/Transforms/SCCP/apint-ipsccp2.ll
+++ b/test/Transforms/SCCP/apint-ipsccp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | grep -v {ret i101 0} | \
+; RUN: opt < %s -ipsccp -S | grep -v {ret i101 0} | \
 ; RUN:    grep -v {ret i101 undef} | not grep ret
 
 
diff --git a/test/Transforms/SCCP/apint-ipsccp3.ll b/test/Transforms/SCCP/apint-ipsccp3.ll
index c07a54f8f6f8..68987aee249a 100644
--- a/test/Transforms/SCCP/apint-ipsccp3.ll
+++ b/test/Transforms/SCCP/apint-ipsccp3.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | opt -ipsccp | llvm-dis | not grep global
+; RUN: opt < %s -ipsccp -S | not grep global
 
 @G = internal global i66 undef
 
diff --git a/test/Transforms/SCCP/apint-ipsccp4.ll b/test/Transforms/SCCP/apint-ipsccp4.ll
index a0656b75c132..75875ff642d2 100644
--- a/test/Transforms/SCCP/apint-ipsccp4.ll
+++ b/test/Transforms/SCCP/apint-ipsccp4.ll
@@ -1,8 +1,8 @@
 ; This test makes sure that these instructions are properly constant propagated.
 
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | not grep load
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | not grep add
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | not grep phi
+; RUN: opt < %s -ipsccp -S | not grep load
+; RUN: opt < %s -ipsccp -S | not grep add
+; RUN: opt < %s -ipsccp -S | not grep phi
 
 
 @Y = constant [2 x { i212, float }] [ { i212, float } { i212 12, float 1.0 }, 
diff --git a/test/Transforms/SCCP/apint-load.ll b/test/Transforms/SCCP/apint-load.ll
index 96509714bb07..56fdb3513f15 100644
--- a/test/Transforms/SCCP/apint-load.ll
+++ b/test/Transforms/SCCP/apint-load.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly constant propagated.
 
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | not grep load
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | not grep fdiv
+; RUN: opt < %s -ipsccp -S | not grep load
+; RUN: opt < %s -ipsccp -S | not grep fdiv
 
 @X = constant i212 42
 @Y = constant [2 x { i212, float }] [ { i212, float } { i212 12, float 1.0 }, 
diff --git a/test/Transforms/SCCP/apint-phi.ll b/test/Transforms/SCCP/apint-phi.ll
index 77f6bc869266..50f0d1aeccbd 100644
--- a/test/Transforms/SCCP/apint-phi.ll
+++ b/test/Transforms/SCCP/apint-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep phi
+; RUN: opt < %s -sccp -S | not grep phi
 
 define i999 @test(i999%A, i1 %c) {
 bb1:
diff --git a/test/Transforms/SCCP/apint-select.ll b/test/Transforms/SCCP/apint-select.ll
index c856896be9f9..c79751910fcb 100644
--- a/test/Transforms/SCCP/apint-select.ll
+++ b/test/Transforms/SCCP/apint-select.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | opt -sccp | llvm-dis | not grep select
+; RUN: opt < %s -sccp -S | not grep select
 
 @A = constant i32 10
 
diff --git a/test/Transforms/SCCP/basictest.ll b/test/Transforms/SCCP/basictest.ll
index 08640a9a239a..dc0f702bcd96 100644
--- a/test/Transforms/SCCP/basictest.ll
+++ b/test/Transforms/SCCP/basictest.ll
@@ -1,7 +1,7 @@
 ; This is a basic sanity check for constant propogation.  The add instruction 
 ; should be eliminated.
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep add
+; RUN: opt < %s -sccp -S | not grep add
 
 define i32 @test(i1 %B) {
 	br i1 %B, label %BB1, label %BB2
diff --git a/test/Transforms/SCCP/calltest.ll b/test/Transforms/SCCP/calltest.ll
index ee6eb8e4a803..9dec22f4d1cd 100644
--- a/test/Transforms/SCCP/calltest.ll
+++ b/test/Transforms/SCCP/calltest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp -loop-deletion -simplifycfg | llvm-dis | \
-; RUN:   not grep br
+; RUN: opt < %s -sccp -loop-deletion -simplifycfg -S | not grep br
 
 ; No matter how hard you try, sqrt(1.0) is always 1.0.  This allows the
 ; optimizer to delete this loop.
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index 1449ea70c39b..7f0772e53d93 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | \
+; RUN: opt < %s -ipsccp -S | \
 ; RUN:   grep -v {ret i32 17} | grep -v {ret i32 undef} | not grep ret
 
 define internal i32 @bar(i32 %A) {
diff --git a/test/Transforms/SCCP/ipsccp-conditional.ll b/test/Transforms/SCCP/ipsccp-conditional.ll
index b98bbf44be68..07a427ea91f1 100644
--- a/test/Transforms/SCCP/ipsccp-conditional.ll
+++ b/test/Transforms/SCCP/ipsccp-conditional.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | \
+; RUN: opt < %s -ipsccp -S | \
 ; RUN:   grep -v {ret i32 0} | grep -v {ret i32 undef} | not grep ret
 
 define internal i32 @bar(i32 %A) {
diff --git a/test/Transforms/SCCP/ipsccp-gvar.ll b/test/Transforms/SCCP/ipsccp-gvar.ll
index 6f2ee1f4d8a3..ecbd95969521 100644
--- a/test/Transforms/SCCP/ipsccp-gvar.ll
+++ b/test/Transforms/SCCP/ipsccp-gvar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -ipsccp | llvm-dis | not grep global
+; RUN: opt < %s -ipsccp -S | not grep global
 
 @G = internal global i32 undef		; <i32*> [#uses=5]
 
diff --git a/test/Transforms/SCCP/loadtest.ll b/test/Transforms/SCCP/loadtest.ll
index 0cbbdf603661..fd82aef821ff 100644
--- a/test/Transforms/SCCP/loadtest.ll
+++ b/test/Transforms/SCCP/loadtest.ll
@@ -1,7 +1,7 @@
 ; This test makes sure that these instructions are properly constant propagated.
 ;
 
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep load
+; RUN: opt < %s -sccp -S | not grep load
 
 
 @X = constant i32 42		; <i32*> [#uses=1]
diff --git a/test/Transforms/SCCP/logical-nuke.ll b/test/Transforms/SCCP/logical-nuke.ll
index 87454e4dcea6..b3d845c7eec5 100644
--- a/test/Transforms/SCCP/logical-nuke.ll
+++ b/test/Transforms/SCCP/logical-nuke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | grep {ret i32 0}
+; RUN: opt < %s -sccp -S | grep {ret i32 0}
 
 ; Test that SCCP has basic knowledge of when and/or nuke overdefined values.
 
diff --git a/test/Transforms/SCCP/phitest.ll b/test/Transforms/SCCP/phitest.ll
index c75de5dde8fa..4c5c3dcc6904 100644
--- a/test/Transforms/SCCP/phitest.ll
+++ b/test/Transforms/SCCP/phitest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp -dce -simplifycfg | llvm-dis | \
-; RUN:   not grep br
+; RUN: opt < %s -sccp -dce -simplifycfg -S | not grep br
 
 define i32 @test(i32 %param) {
 entry:
diff --git a/test/Transforms/SCCP/sccptest.ll b/test/Transforms/SCCP/sccptest.ll
index 84ba2d43cde7..6f422f07d661 100644
--- a/test/Transforms/SCCP/sccptest.ll
+++ b/test/Transforms/SCCP/sccptest.ll
@@ -1,7 +1,7 @@
 ; This is the test case taken from appel's book that illustrates a hard case
 ; that SCCP gets right. BB3 should be completely eliminated.
 ;
-; RUN: llvm-as < %s | opt -sccp -constprop -dce -simplifycfg | \
+; RUN: opt < %s -sccp -constprop -dce -simplifycfg | \
 ; RUN:   llvm-dis | not grep BB3
 
 define i32 @testfunction(i32 %i0, i32 %j0) {
diff --git a/test/Transforms/SCCP/select.ll b/test/Transforms/SCCP/select.ll
index 74b20d09c143..b2f1dd2d0f23 100644
--- a/test/Transforms/SCCP/select.ll
+++ b/test/Transforms/SCCP/select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep select
+; RUN: opt < %s -sccp -S | not grep select
 
 define i32 @test1(i1 %C) {
 	%X = select i1 %C, i32 0, i32 0		; <i32> [#uses=1]
diff --git a/test/Transforms/SRETPromotion/2008-03-11-attributes.ll b/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
index 81a1476fc964..55abec55ed03 100644
--- a/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
+++ b/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sretpromotion -disable-output
+; RUN: opt < %s -sretpromotion -disable-output
 	%struct.ObjPoint = type { double, double, double, double, double, double }
 
 define void @RotatePoint(%struct.ObjPoint* sret  %agg.result, %struct.ObjPoint* byval  %a, double %rx, double %ry, double %rz) nounwind  {
diff --git a/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll b/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
index 21701dd84be6..1168b0b2e9d6 100644
--- a/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
+++ b/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
@@ -5,7 +5,7 @@
 
 ; We're mainly testing for opt not to crash, but we'll check to see if the sret
 ; attribute is still there for good measure.
-; RUN: llvm-as < %s | opt -sretpromotion | llvm-dis | grep sret
+; RUN: opt < %s -sretpromotion -S | grep sret
 
 %struct.S = type <{ i32, i32 }>
 
diff --git a/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll b/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
index 10f92efc8654..26c6a6e5077b 100644
--- a/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
+++ b/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
@@ -4,7 +4,7 @@
 
 ; We're mainly testing for opt not to crash, but we'll check to see if the sret
 ; attribute is still there for good measure.
-; RUN: llvm-as < %s | opt -sretpromotion | llvm-dis | grep sret
+; RUN: opt < %s -sretpromotion -S | grep sret
 
 %struct.S = type <{ i32, i32 }>
 
diff --git a/test/Transforms/SRETPromotion/basictest.ll b/test/Transforms/SRETPromotion/basictest.ll
index 4146cce46ed0..ff047dc41eba 100644
--- a/test/Transforms/SRETPromotion/basictest.ll
+++ b/test/Transforms/SRETPromotion/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -sretpromotion | llvm-dis > %t
+; RUN: opt < %s -sretpromotion -S > %t
 ; RUN: cat %t | grep sret | count 1
 
 ; This function is promotable
diff --git a/test/Transforms/SSI/2009-07-09-Invoke.ll b/test/Transforms/SSI/2009-07-09-Invoke.ll
new file mode 100644
index 000000000000..20a22172806e
--- /dev/null
+++ b/test/Transforms/SSI/2009-07-09-Invoke.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -ssi-everything -disable-output
+; PR4511
+
+	%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+	%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+declare void @_Unwind_Resume(i8*)
+
+declare fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*)
+
+define fastcc void @_ZNSt6vectorISsSaISsEE9push_backERKSs(%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >"* nocapture %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* nocapture %__x) {
+entry:
+	br i1 undef, label %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i, label %bb
+
+bb:		; preds = %entry
+	ret void
+
+_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i:		; preds = %entry
+	%0 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef)
+			to label %invcont14.i unwind label %ppad81.i		; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=3]
+
+invcont14.i:		; preds = %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i
+	%1 = icmp eq %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, null		; <i1> [#uses=1]
+	br i1 %1, label %bb19.i, label %bb.i17.i
+
+bb.i17.i:		; preds = %invcont14.i
+	%2 = invoke fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* undef, i32 0)
+			to label %bb2.i25.i unwind label %ppad.i.i.i23.i		; <i8*> [#uses=0]
+
+ppad.i.i.i23.i:		; preds = %bb.i17.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i24.i unwind label %lpad.i29.i
+
+.noexc.i24.i:		; preds = %ppad.i.i.i23.i
+	unreachable
+
+bb2.i25.i:		; preds = %bb.i17.i
+	unreachable
+
+lpad.i29.i:		; preds = %ppad.i.i.i23.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i9 unwind label %ppad81.i
+
+.noexc.i9:		; preds = %lpad.i29.i
+	unreachable
+
+bb19.i:		; preds = %invcont14.i
+	%3 = getelementptr %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, i32 1		; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=2]
+	%4 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %3)
+			to label %invcont20.i unwind label %ppad81.i		; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0]
+
+invcont20.i:		; preds = %bb19.i
+	unreachable
+
+invcont32.i:		; preds = %ppad81.i
+	unreachable
+
+ppad81.i:		; preds = %bb19.i, %lpad.i29.i, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i
+	%__new_finish.0.i = phi %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* [ %0, %lpad.i29.i ], [ undef, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i ], [ %3, %bb19.i ]		; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0]
+	br i1 undef, label %invcont32.i, label %bb.i.i.i.i
+
+bb.i.i.i.i:		; preds = %bb.i.i.i.i, %ppad81.i
+	br label %bb.i.i.i.i
+}
+
+declare fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* nocapture, i32)
diff --git a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll b/test/Transforms/SSI/2009-08-15-UnreachableBB.ll
new file mode 100644
index 000000000000..0fe37ec74098
--- /dev/null
+++ b/test/Transforms/SSI/2009-08-15-UnreachableBB.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -ssi-everything -disable-output
+
+declare fastcc i32 @ras_Empty(i8** nocapture) nounwind readonly
+
+define i32 @cc_Tautology() nounwind {
+entry:
+	unreachable
+
+cc_InitData.exit:		; No predecessors!
+	%0 = call fastcc i32 @ras_Empty(i8** undef) nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb6
+
+bb2:		; preds = %cc_InitData.exit
+	unreachable
+
+bb6:		; preds = %cc_InitData.exit
+	ret i32 undef
+}
diff --git a/test/Transforms/SSI/2009-08-17-CritEdge.ll b/test/Transforms/SSI/2009-08-17-CritEdge.ll
new file mode 100644
index 000000000000..61bd2dc693f4
--- /dev/null
+++ b/test/Transforms/SSI/2009-08-17-CritEdge.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -ssi-everything -disable-output
+
+define void @test(i32 %x) {
+entry:
+  br label %label1
+label1:
+  %A = phi i32 [ 0, %entry ], [ %A.1, %label2 ]
+  %B = icmp slt i32 %A, %x
+  br i1 %B, label %label2, label %label2
+label2:
+  %A.1 = add i32 %A, 1
+  br label %label1
+label3:  ; No predecessors!
+  ret void
+}
diff --git a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll b/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll
new file mode 100644
index 000000000000..64bed191def0
--- /dev/null
+++ b/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -ssi-everything -disable-output
+
+define void @foo() {
+entry:
+	%tmp0 = load i64* undef, align 4		; <i64> [#uses=3]
+	br i1 undef, label %end_stmt_playback, label %bb16
+
+readJournalHdr.exit:		; No predecessors!
+	br label %end_stmt_playback
+
+bb16:		; preds = %bb7
+	%tmp1 = icmp slt i64 0, %tmp0		; <i1> [#uses=1]
+	br i1 %tmp1, label %bb16, label %bb17
+
+bb17:		; preds = %bb16
+	store i64 %tmp0, i64* undef, align 4
+	br label %end_stmt_playback
+
+end_stmt_playback:		; preds = %bb17, %readJournalHdr.exit, %bb6, %bb2
+	store i64 %tmp0, i64* undef, align 4
+	ret void
+}
diff --git a/test/Transforms/SSI/dg.exp b/test/Transforms/SSI/dg.exp
new file mode 100644
index 000000000000..f2005891a59a
--- /dev/null
+++ b/test/Transforms/SSI/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SSI/ssiphi.ll b/test/Transforms/SSI/ssiphi.ll
new file mode 100644
index 000000000000..a42b70c3c021
--- /dev/null
+++ b/test/Transforms/SSI/ssiphi.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -ssi-everything -S | FileCheck %s
+
+declare void @use(i32)
+declare i32 @create()
+
+define i32 @foo() {
+entry:
+  %x = call i32 @create()
+  %y = icmp slt i32 %x, 10
+  br i1 %y, label %T, label %F
+T:
+; CHECK: SSI_sigma 
+  call void @use(i32 %x)
+  br label %join
+F:
+; CHECK: SSI_sigma
+  call void @use(i32 %x)
+  br label %join
+join:
+; CHECK: SSI_phi
+  ret i32 %x
+}
diff --git a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
index 5a9f3a52c667..824e2492c04e 100644
--- a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
+++ b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
 
 ; Test that an array is not incorrectly deconstructed.
 
diff --git a/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll b/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll
index 83a18c1cd58a..b147ec960807 100644
--- a/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll
+++ b/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl
+; RUN: opt < %s -scalarrepl
 
 define void @main() {
 	%E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } }	; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll b/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll
index 422064967819..89c0b05333a2 100644
--- a/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll
+++ b/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl
+; RUN: opt < %s -scalarrepl
 
 define i32 @test() {
 	%X = alloca { [4 x i32] }		; <{ [4 x i32] }*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
index dbac15c3a521..eb1c945e34eb 100644
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -1,6 +1,6 @@
 ; Scalar replacement was incorrectly promoting this alloca!!
 ;
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   sed {s/;.*//g} | grep {\\\[}
 
 define i8* @test() {
diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
index c5c059b154dd..24e6a3195f13 100644
--- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
+++ b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {alloca %T}
+; RUN: opt < %s -scalarrepl -S | grep {alloca %T}
 
 %T = type { [80 x i8], i32, i32 }
 declare i32 @.callback_1(i8*)
diff --git a/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll b/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll
index de6e2974a9ec..ea23c3114682 100644
--- a/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll
+++ b/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output
+; RUN: opt < %s -scalarrepl -disable-output
 
 target datalayout = "E-p:32:32"
 	%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] }
diff --git a/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll b/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll
index bcb8457d553a..03c7452115c1 100644
--- a/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll
+++ b/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output
+; RUN: opt < %s -scalarrepl -disable-output
 
 target datalayout = "E-p:32:32"
 
diff --git a/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll b/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll
index 18493f594713..63840f1b3c03 100644
--- a/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll
+++ b/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output
+; RUN: opt < %s -scalarrepl -disable-output
 
 define void @output_toc() {
 entry:
diff --git a/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll b/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll
index 0007665a8f86..dcd7e534f10b 100644
--- a/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll
+++ b/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output
+; RUN: opt < %s -scalarrepl -disable-output
 ; END.
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
index f0253b7bea93..4655d1402c07 100644
--- a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
+++ b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep alloca
 
 define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
 	%vsiidx = alloca [2 x <4 x i32>], align 16		; <[2 x <4 x i32>]*> [#uses=3]
diff --git a/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll b/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll
index ae870d385580..260620352f31 100644
--- a/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll
+++ b/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output
+; RUN: opt < %s -scalarrepl -disable-output
 ; PR1045
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll b/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll
index 769ec199b849..bd4910682f1a 100644
--- a/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll
+++ b/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output
+; RUN: opt < %s -scalarrepl -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "arm-apple-darwin8"
diff --git a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll b/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
index 43b721b7013e..e67b6106cfad 100644
--- a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
+++ b/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {alloca.*client_t}
+; RUN: opt < %s -scalarrepl -S | grep {alloca.*client_t}
 ; PR1446
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index dc1198ead89f..f1b8b80f8f20 100644
--- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep memcpy
+; RUN: opt < %s -scalarrepl -S | grep memcpy
 ; PR1421
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
index d2d2b24da763..81b6746345d3 100644
--- a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
+++ b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep shr
+; RUN: opt < %s -scalarrepl -S | not grep shr
 
 %struct.S = type { i16 }
 
diff --git a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
index 0675a966b5a3..d799bd77e458 100644
--- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
+++ b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | grep {ret i8 17}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i8 17}
 ; rdar://5707076
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.1.0"
diff --git a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
index 9ec5fa381221..7f8ef8310522 100644
--- a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
+++ b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep alloca
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 	%struct..0anon = type { <1 x i64> }
diff --git a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
index 73d92be179e4..3ebafd072e03 100644
--- a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
+++ b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
@@ -3,7 +3,7 @@
 ; instruction, which was not possible before aggregrates were first class
 ; values. This checks of scalarrepl splits up the struct and array properly.
 
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep alloca
 
 define i32 @foo() {
 	%target = alloca { i32, i32 }		; <{ i32, i32 }*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index 8fbbb6749a9c..b704727c11e4 100644
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {call.*mem} 
+; RUN: opt < %s -scalarrepl -S | grep {call.*mem} 
 ; PR2369
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index a2386fdedcb7..1df01c1f4e3c 100644
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {s = alloca .struct.x}
+; RUN: opt < %s -scalarrepl -S | grep {s = alloca .struct.x}
 ; PR2423
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index 6640383973e2..e89be5acbbbd 100644
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -3,7 +3,7 @@
 ; this would not work when there was a vector involved in the struct, preventing
 ; scalarrepl from removing the alloca below.
 
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis > %t
+; RUN: opt < %s -scalarrepl -S > %t
 ; RUN: cat %t | not grep alloca
 
 %struct.two = type <{ < 2 x i8 >, i16 }>
diff --git a/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll b/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
index 9cdf4a0ef245..725a9b6d9c75 100644
--- a/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
+++ b/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis
+; RUN: opt < %s -scalarrepl | llvm-dis
 ; PR3304
 
        %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN }
diff --git a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
index af34baa0c5cf..9c0f2030c0e4 100644
--- a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
+++ b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | grep {ret i32 %x}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i32 %x}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
diff --git a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
index 3bea5738ab80..f8ab875bac4b 100644
--- a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
+++ b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine -inline -instcombine | llvm-dis | grep {ret i32 42}
+; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep {ret i32 42}
 ; PR3489
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 4fc491b29798..d6eb75bdf801 100644
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -1,6 +1,6 @@
 ; The store into %p should end up with a known alignment of 1, since the memcpy
 ; is only known to access it with 1-byte alignment.
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {store i16 1, .*, align 1}
+; RUN: opt < %s -scalarrepl -S | grep {store i16 1, .*, align 1}
 ; PR3720
 
         %struct.st = type { i16 }
diff --git a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
index 437e732653f9..50e7f9a08e6a 100644
--- a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
+++ b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
+; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 	type { }		; type %0
diff --git a/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll b/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
index facb7c13c0ad..9c70aae1684d 100644
--- a/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
+++ b/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep store | not grep undef
+; RUN: opt < %s -scalarrepl -S | grep store | not grep undef
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll b/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll
index 2342f05aac7f..c5ebf8eb5237 100644
--- a/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll
+++ b/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis
+; RUN: opt < %s -scalarrepl | llvm-dis
 ; rdar://6808691
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll b/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll
index 0a604e9dcc3c..aa3487b27707 100644
--- a/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll
+++ b/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis
+; RUN: opt < %s -scalarrepl | llvm-dis
 ; PR4146
 
  %wrapper = type { i1 }
diff --git a/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll b/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll
index ffdd679355a7..cecbdd4b881a 100644
--- a/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll
+++ b/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl
+; RUN: opt < %s -scalarrepl
 ; PR4286
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/ScalarRepl/2009-08-16-VLA.ll b/test/Transforms/ScalarRepl/2009-08-16-VLA.ll
new file mode 100644
index 000000000000..d69af114ad90
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2009-08-16-VLA.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -scalarrepl -disable-opt
+
+	%struct.Item = type { [4 x i16], %struct.rule* }
+	%struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
+	%struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
+	%struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
+	%struct.list = type { i8*, %struct.list* }
+	%struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
+	%struct.plank = type { i8*, %struct.list*, i32 }
+	%struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
+	%struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
+	%struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
+	%struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
+	%struct.Index_Map = type { i32, %struct.item_set** }
+	%struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
+	%struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
+
+define void @addHP_2_0() {
+bb4.i:
+	%0 = malloc [0 x %struct.Item]		; <[0 x %struct.Item]*> [#uses=1]
+	%.sub.i.c.i = getelementptr [0 x %struct.Item]* %0, i32 0, i32 0		; <%struct.Item*> [#uses=0]
+	unreachable
+}
diff --git a/test/Transforms/ScalarRepl/AggregatePromote.ll b/test/Transforms/ScalarRepl/AggregatePromote.ll
index 104a0f989760..16b327356bbd 100644
--- a/test/Transforms/ScalarRepl/AggregatePromote.ll
+++ b/test/Transforms/ScalarRepl/AggregatePromote.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
 
 target datalayout = "E-p:32:32"
diff --git a/test/Transforms/ScalarRepl/DifferingTypes.ll b/test/Transforms/ScalarRepl/DifferingTypes.ll
index c0604142df89..eb56824a317a 100644
--- a/test/Transforms/ScalarRepl/DifferingTypes.ll
+++ b/test/Transforms/ScalarRepl/DifferingTypes.ll
@@ -1,7 +1,7 @@
 ; This is a feature test.  Hopefully one day this will be implemented.  The 
 ; generated code should perform the appropriate masking operations required 
 ; depending on the endianness of the target...
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
 
 define i32 @testfunc(i32 %i, i8 %j) {
diff --git a/test/Transforms/ScalarRepl/arraytest.ll b/test/Transforms/ScalarRepl/arraytest.ll
index 911a8e38c087..2f68af8a464a 100644
--- a/test/Transforms/ScalarRepl/arraytest.ll
+++ b/test/Transforms/ScalarRepl/arraytest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
 
 define i32 @test() {
 	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll
index c48848630564..909966f19038 100644
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ b/test/Transforms/ScalarRepl/badarray.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
 ; PR3466
 
 define i32 @test() {
diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll
index 11a1d51a9676..a43243cabb67 100644
--- a/test/Transforms/ScalarRepl/basictest.ll
+++ b/test/Transforms/ScalarRepl/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
 
 define i32 @test() {
 	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/bitfield-sroa.ll b/test/Transforms/ScalarRepl/bitfield-sroa.ll
index 34dd120e3f8f..6b3d414d34f6 100644
--- a/test/Transforms/ScalarRepl/bitfield-sroa.ll
+++ b/test/Transforms/ScalarRepl/bitfield-sroa.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca        
+; RUN: opt < %s -scalarrepl -S | not grep alloca        
 ; rdar://6532315
 %t = type { { i32, i16, i8, i8 } }
 
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
index a1ad3f9b8280..26f007b3c05a 100644
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep alloca
 ; PR3290
 
 ;; Store of integer to whole alloca struct.
diff --git a/test/Transforms/ScalarRepl/debuginfo.ll b/test/Transforms/ScalarRepl/debuginfo.ll
index 63ecd687ac5b..903b1a2394a4 100644
--- a/test/Transforms/ScalarRepl/debuginfo.ll
+++ b/test/Transforms/ScalarRepl/debuginfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep alloca
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
diff --git a/test/Transforms/ScalarRepl/load-store-aggregate.ll b/test/Transforms/ScalarRepl/load-store-aggregate.ll
index df13db739f55..9ea3895a22e7 100644
--- a/test/Transforms/ScalarRepl/load-store-aggregate.ll
+++ b/test/Transforms/ScalarRepl/load-store-aggregate.ll
@@ -2,7 +2,7 @@
 ; are directly loaded from or stored to (using the first class aggregates
 ; feature).
 
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis > %t
+; RUN: opt < %s -scalarrepl -S > %t
 ; RUN: cat %t | not grep alloca
 
 %struct.foo = type { i32, i32 }
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index e62ccc295451..38a2ca05cad9 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep {call.*memcpy}
+; RUN: opt < %s -scalarrepl -S | not grep {call.*memcpy}
 @C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32		; <[128 x float]*> [#uses=1]
 
 define float @grad4(i32 %hash, float %x, float %y, float %z, float %w) {
diff --git a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
index 91d8ea900717..0d61e5aab6c7 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -1,7 +1,7 @@
 ; PR1226
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep {call void @llvm.memcpy.i32}
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep getelementptr
+; RUN: opt < %s -scalarrepl -S | grep getelementptr
 ; END.
 
 target datalayout = "E-p:32:32"
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
index b7b33521bbce..5aeefcd13180 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -1,7 +1,7 @@
 ; PR1226
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret i32 16843009}
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | grep {ret i16 514}
+; RUN: opt < %s -scalarrepl -S | grep {ret i32 16843009}
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i16 514}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/ScalarRepl/not-a-vector.ll b/test/Transforms/ScalarRepl/not-a-vector.ll
index e2111e7b31c2..7eba7c019045 100644
--- a/test/Transforms/ScalarRepl/not-a-vector.ll
+++ b/test/Transforms/ScalarRepl/not-a-vector.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep {7 x double}
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | grep {ret double %B}
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep {7 x double}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret double %B}
 
 define double @test(double %A, double %B) {
 	%ARR = alloca [7 x i64]
diff --git a/test/Transforms/ScalarRepl/phinodepromote.ll b/test/Transforms/ScalarRepl/phinodepromote.ll
index 4eb8743bd1cf..9c6e8b92f3e5 100644
--- a/test/Transforms/ScalarRepl/phinodepromote.ll
+++ b/test/Transforms/ScalarRepl/phinodepromote.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -simplifycfg -instcombine -mem2reg -S | not grep alloca
 ;
 ; This tests to see if mem2reg can promote alloca instructions whose addresses
 ; are used by PHI nodes that are immediately loaded.  The LLVM C++ front-end
diff --git a/test/Transforms/ScalarRepl/select_promote.ll b/test/Transforms/ScalarRepl/select_promote.ll
index 901fa90dc96d..d6b2b75327c5 100644
--- a/test/Transforms/ScalarRepl/select_promote.ll
+++ b/test/Transforms/ScalarRepl/select_promote.ll
@@ -1,7 +1,7 @@
 ; Test promotion of loads that use the result of a select instruction.  This
 ; should be simplified by the instcombine pass.
 
-; RUN: llvm-as < %s | opt -instcombine -mem2reg | llvm-dis | not grep alloca
+; RUN: opt < %s -instcombine -mem2reg -S | not grep alloca
 
 define i32 @main() {
 	%mem_tmp.0 = alloca i32		; <i32*> [#uses=3]
diff --git a/test/Transforms/ScalarRepl/sroa-fca.ll b/test/Transforms/ScalarRepl/sroa-fca.ll
index 1bfdaccc0d8d..2df3b9be1e48 100644
--- a/test/Transforms/ScalarRepl/sroa-fca.ll
+++ b/test/Transforms/ScalarRepl/sroa-fca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis
+; RUN: opt < %s -scalarrepl | llvm-dis
 ; Make sure that SROA "scalar conversion" can handle first class aggregates.
 
 define i64 @test({i32, i32} %A) {
diff --git a/test/Transforms/ScalarRepl/sroa_two.ll b/test/Transforms/ScalarRepl/sroa_two.ll
index e0ea2bc93fec..d8aa26dce637 100644
--- a/test/Transforms/ScalarRepl/sroa_two.ll
+++ b/test/Transforms/ScalarRepl/sroa_two.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis
+; RUN: opt < %s -scalarrepl | llvm-dis
 
 define i32 @test(i32 %X) {
 	%Arr = alloca [2 x i32]		; <[2 x i32]*> [#uses=3]
diff --git a/test/Transforms/ScalarRepl/union-fp-int.ll b/test/Transforms/ScalarRepl/union-fp-int.ll
index 1c3b18b4a1e1..0e1cd2307931 100644
--- a/test/Transforms/ScalarRepl/union-fp-int.ll
+++ b/test/Transforms/ScalarRepl/union-fp-int.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   grep {bitcast.*float.*i32}
 
 define i32 @test(float %X) {
diff --git a/test/Transforms/ScalarRepl/union-packed.ll b/test/Transforms/ScalarRepl/union-packed.ll
index 10d8a5d7ac4b..63752c8c8015 100644
--- a/test/Transforms/ScalarRepl/union-packed.ll
+++ b/test/Transforms/ScalarRepl/union-packed.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   grep bitcast
 
 define <4 x i32> @test(<4 x float> %X) {
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
index 269b4b9ff52e..fe702fa21772 100644
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@@ -1,7 +1,7 @@
 ; PR892
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | \
+; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret i8}
+; RUN: opt < %s -scalarrepl -S | grep {ret i8}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll
index 56785788ff9a..3af79bcebc79 100644
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis > %t
+; RUN: opt < %s -scalarrepl -S > %t
 ; RUN: grep {ret <16 x float> %A} %t
 ; RUN: grep {ret <16 x float> zeroinitializer} %t
 
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index 4b6555b3d673..0284b3d57e3a 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {load <4 x float>}
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | grep {load <4 x float>}
 
 define void @test(<4 x float>* %F, float %f) {
 entry:
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index 5f9fe0d89a5a..3ff322e065cb 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {volatile load}
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {volatile store}
+; RUN: opt < %s -scalarrepl -S | grep {volatile load}
+; RUN: opt < %s -scalarrepl -S | grep {volatile store}
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
index 5682ff4e40df..414235ba7cde 100644
--- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
@@ -1,6 +1,6 @@
 ; Basic block #2 should not be merged into BB #3!
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   grep {br label}
 ;
 
diff --git a/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll b/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
index c955d668898f..055386b34751 100644
--- a/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
+++ b/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
@@ -4,7 +4,7 @@
 ;
 ; Which is not valid SSA
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis
+; RUN: opt < %s -simplifycfg | llvm-dis
 
 define void @test() {
 ; <label>:0
diff --git a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll b/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
index 87b428600ba5..88f32bc08279 100644
--- a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
+++ b/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
@@ -1,7 +1,7 @@
 ; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
 ; should be fixed eventually
 
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define i32 @main(i32 %argc) {
 ; <label>:0
diff --git a/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll b/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
index 75beb344a883..9a1206221fb6 100644
--- a/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
+++ b/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg
+; RUN: opt < %s -simplifycfg
 
 define i32 @test(i32 %A, i32 %B, i1 %cond) {
 J:
diff --git a/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll b/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
index c45a5c9da98c..876204619941 100644
--- a/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
+++ b/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 
 define void @test(i32* %ldo, i1 %c, i1 %d) {
 bb9:
diff --git a/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
index 74e7428429de..c019931c07e7 100644
--- a/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
@@ -1,6 +1,6 @@
 ; Do not remove the invoke!
 ;
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 
 define i32 @test() {
 	%A = invoke i32 @test( )
diff --git a/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll b/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
index 9f56e07bfd62..15cd7730449e 100644
--- a/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
@@ -1,6 +1,6 @@
 ; Do not remove the invoke!
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep invoke
+; RUN: opt < %s -simplifycfg -S | grep invoke
 
 define i32 @test() {
 	invoke i32 @test( )
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
index ea74692d05f7..8ac9ae443728 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
@@ -1,7 +1,7 @@
 ; This test checks to make sure that 'br X, Dest, Dest' is folded into 
 ; 'br Dest'
 
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep {br i1 %c2}
 
 declare void @noop()
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
index a307573d3d33..888e187b6b4d 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
@@ -3,7 +3,7 @@
 ; due to the fact that the SimplifyCFG function does not use 
 ; the ConstantFoldTerminator function.
 
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep {br i1 %c2}
 
 declare void @noop()
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
index c9f50907569a..577d65013e5e 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep switch
 
 
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
index cd22e1edb15a..93f851c6f9ea 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep switch
 
 ; Test normal folding
diff --git a/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll b/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
index de0d26245829..fafe73b2b4ef 100644
--- a/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
+++ b/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 
 define void @symhash_add() {
 entry:
diff --git a/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll b/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
index 7fea2921b023..90be6803a5c3 100644
--- a/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
+++ b/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; PR584
 @g_38098584 = external global i32		; <i32*> [#uses=1]
 @g_60187400 = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll b/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
index 83143218b1ad..c30bfa1379c7 100644
--- a/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
+++ b/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; END.
 
 define void @main() {
diff --git a/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll b/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
index 51490a00eb47..477c9c9e3a02 100644
--- a/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
+++ b/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; END.
 
 	%arraytype.1.Char = type { i32, [0 x i8] }
diff --git a/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll b/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
index a0fe7817743a..778aa3b1f7b4 100644
--- a/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
+++ b/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 
 define i1 @foo() {
 	%X = invoke i1 @foo( )
diff --git a/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll b/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
index 27413fcf9fa1..760aa139bf7b 100644
--- a/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
+++ b/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
@@ -1,6 +1,6 @@
 ; Make sure this doesn't turn into an infinite loop
 
-; RUN: llvm-as < %s | opt -simplifycfg -constprop -simplifycfg |\
+; RUN: opt < %s -simplifycfg -constprop -simplifycfg |\
 ; RUN:   llvm-dis | grep bb86
 ; END.
 	
diff --git a/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll b/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
index 4400624d184d..32f49e667883 100644
--- a/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
+++ b/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 
 define void @polnel_() {
 entry:
diff --git a/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll b/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
index 4981cf3c9aa3..21cfb2615ff7 100644
--- a/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
+++ b/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; END.
 
 define void @main(i32 %c) {
diff --git a/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll b/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
index e410c35450bb..2c84c937ae39 100644
--- a/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
+++ b/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -gvn -simplifycfg \
+; RUN: opt < %s -gvn -simplifycfg \
 ; RUN:   -disable-output
 ; PR867
 ; END.
diff --git a/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll b/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
index 0d056eed74e1..009d1c8cc4da 100644
--- a/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
+++ b/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
@@ -1,5 +1,5 @@
 ; PR957
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep select
 
 @G = extern_weak global i32
diff --git a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
index 6bfef0214931..dba41c9b111f 100644
--- a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; END.
 	%struct..4._102 = type { %struct.QVectorData* }
 	%struct..5._125 = type { %struct.QMapData* }
diff --git a/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll b/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
index d433f0482955..af865ce7fac0 100644
--- a/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
+++ b/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis
+; RUN: opt < %s -simplifycfg | llvm-dis
 ; END.
 
 ; ModuleID = '2006-12-08-Ptr-ICmp-Branch.ll'
diff --git a/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll b/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
index 449047b0d32f..a20c46e1ad1f 100644
--- a/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
+++ b/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep invoke
+; RUN: opt < %s -simplifycfg -S | not grep invoke
 
 declare i32 @func(i8*) nounwind
 
diff --git a/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll b/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
index fe1ca80468be..46df0f0ed071 100644
--- a/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
+++ b/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
@@ -1,4 +1,4 @@
-;RUN: llvm-as < %s | opt -simplifycfg -disable-output
+;RUN: opt < %s -simplifycfg -disable-output
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 define i32 @bork() nounwind  {
diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 4c9c9e8ae685..00f2d5bcf135 100644
--- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -1,5 +1,5 @@
 ; The phi should not be eliminated in this case, because the fp op could trap.
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep {= phi double}
+; RUN: opt < %s -simplifycfg -S | grep {= phi double}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll b/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
index 5ee13526785d..8e05a3cddca2 100644
--- a/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
+++ b/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; rdar://5882392
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
index be3410c15a38..ba33d84f84aa 100644
--- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
+++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; PR2256
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-mingw32"
diff --git a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
index 8af26402d671..59e886b2ddbe 100644
--- a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis > %t
+; RUN: opt < %s -simplifycfg -S > %t
 ; RUN: not grep {^BB.tomerge} %t
 ; RUN  grep {^BB.nomerge} %t | count 2
 
diff --git a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
index 0678e20ed4af..d025dee85f2d 100644
--- a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
+++ b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep {%outval = phi i32 .*mux}
+; RUN: opt < %s -simplifycfg -S | grep {%outval = phi i32 .*mux}
 ; PR2540
 ; Outval should end up with a select from 0/2, not all constants.
 
diff --git a/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll b/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
index 82a2cc03033a..ac9622d43c31 100644
--- a/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
+++ b/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; PR 2777
 @g_103 = common global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll b/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
index b2d671da67f4..f864184eb844 100644
--- a/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
+++ b/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; PR 2800
 
 define void @foo() {
diff --git a/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll b/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
index a061a82954b6..bb137c1babcb 100644
--- a/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
+++ b/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg
+; RUN: opt < %s -simplifycfg
 ; PR2855
 
 define i32 @_Z1fPii(i32* %b, i32 %f) nounwind {
diff --git a/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll b/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
index 7b4aee489b55..d3c7c320cee3 100644
--- a/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
+++ b/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis
+; RUN: opt < %s -simplifycfg | llvm-dis
 define i32 @test() {
 entry:
 	br label %T
diff --git a/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll b/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
index b52d10dc3bc9..727102435fc7 100644
--- a/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
+++ b/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep icmp
+; RUN: opt < %s -simplifycfg -S | not grep icmp
 ; ModuleID = '/tmp/x.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll b/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
index fc34f5157ba6..7b8a9187c413 100644
--- a/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis
-; XFAIL: *
+; RUN: opt < %s -simplifycfg | llvm-dis
 ; PR3016
 ; Dead use caused invariant violation.
 
diff --git a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
index e65bafa6805d..33167bd5c661 100644
--- a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep {br i1 } | count 4
+; RUN: opt < %s -simplifycfg -S | grep {br i1 } | count 4
 ; PR3354
 ; Do not merge bb1 into the entry block, it might trap.
 
diff --git a/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll b/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
index 9033976108a9..9caa9a1a0fb2 100644
--- a/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
+++ b/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep select
+; RUN: opt < %s -simplifycfg -S | grep select
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
diff --git a/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll b/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
index dc0cbbebedc2..419feb6b56a1 100644
--- a/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
+++ b/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep select
+; RUN: opt < %s -simplifycfg -S | not grep select
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
index d0e7ed787284..72a15b1c8b22 100644
--- a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 ; END.
 	%struct..4._102 = type { %struct.QVectorData* }
 	%struct..5._125 = type { %struct.QMapData* }
diff --git a/test/Transforms/SimplifyCFG/BrUnwind.ll b/test/Transforms/SimplifyCFG/BrUnwind.ll
index 1acdecd5781a..b19a27dea048 100644
--- a/test/Transforms/SimplifyCFG/BrUnwind.ll
+++ b/test/Transforms/SimplifyCFG/BrUnwind.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN: not grep {br label}
 
 define void @test(i1 %C) {
diff --git a/test/Transforms/SimplifyCFG/DeadSetCC.ll b/test/Transforms/SimplifyCFG/DeadSetCC.ll
index a4c8366e00fb..83394628cc44 100644
--- a/test/Transforms/SimplifyCFG/DeadSetCC.ll
+++ b/test/Transforms/SimplifyCFG/DeadSetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep {icmp eq}
 
 ; Check that simplifycfg deletes a dead 'seteq' instruction when it
diff --git a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
index 55db7880f125..912c7556e006 100644
--- a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
@@ -1,6 +1,6 @@
 ; Test merging of blocks with phi nodes.
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep N:
+; RUN: opt < %s -simplifycfg -S | not grep N:
 ;
 
 define i32 @test(i1 %a) {
diff --git a/test/Transforms/SimplifyCFG/HoistCode.ll b/test/Transforms/SimplifyCFG/HoistCode.ll
index d8894a6faa47..9697e56a719f 100644
--- a/test/Transforms/SimplifyCFG/HoistCode.ll
+++ b/test/Transforms/SimplifyCFG/HoistCode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define void @foo(i1 %C, i32* %P) {
         br i1 %C, label %T, label %F
diff --git a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
index f3844f7328e9..a648efd1740e 100644
--- a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
@@ -1,6 +1,6 @@
 ; Test merging of blocks that only have PHI nodes in them
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep N:
+; RUN: opt < %s -simplifycfg -S | not grep N:
 ;
 
 define i32 @test(i1 %a, i1 %b) {
diff --git a/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll b/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll
index 24b3d118e7bc..fb5d600f1148 100644
--- a/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll
+++ b/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll
@@ -2,7 +2,7 @@
 ; where the mergedinto block doesn't have any PHI nodes, and is in fact 
 ; dominated by the block-to-be-eliminated
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep N:
+; RUN: opt < %s -simplifycfg -S | not grep N:
 ;
 
 declare i1 @foo()
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate.ll b/test/Transforms/SimplifyCFG/PhiEliminate.ll
index 94697c96b78a..73cf466a4f74 100644
--- a/test/Transforms/SimplifyCFG/PhiEliminate.ll
+++ b/test/Transforms/SimplifyCFG/PhiEliminate.ll
@@ -3,7 +3,7 @@
 ; nodes away allows the branches to be eliminated, performing a simple form of
 ; 'if conversion'.
 
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis > %t.xform
+; RUN: opt < %s -simplifycfg -S > %t.xform
 ; RUN:   not grep phi %t.xform 
 ; RUN:   grep ret %t.xform
 
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate2.ll b/test/Transforms/SimplifyCFG/PhiEliminate2.ll
index ec1b91a0d16e..c0f6781293db 100644
--- a/test/Transforms/SimplifyCFG/PhiEliminate2.ll
+++ b/test/Transforms/SimplifyCFG/PhiEliminate2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define i32 @test(i1 %C, i32 %V1, i32 %V2) {
 entry:
diff --git a/test/Transforms/SimplifyCFG/PhiNoEliminate.ll b/test/Transforms/SimplifyCFG/PhiNoEliminate.ll
index dfe122abbf94..e9902e094422 100644
--- a/test/Transforms/SimplifyCFG/PhiNoEliminate.ll
+++ b/test/Transforms/SimplifyCFG/PhiNoEliminate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep select
 
 ;; The PHI node in this example should not be turned into a select, as we are
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index 2be9124fe13a..5cfc77ce08a5 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -simplifycfg  | llvm-dis | grep select
-; RUN: llvm-as < %s | opt -simplifycfg  | llvm-dis | grep br | count 2
+; RUN: opt < %s -simplifycfg  -S | grep select
+; RUN: opt < %s -simplifycfg  -S | grep br | count 2
 
 define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind  {
 entry:
diff --git a/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll b/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
index e6c05f3beeb0..bf9d9535d636 100644
--- a/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
+++ b/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
@@ -2,7 +2,7 @@
 ; a PHI node and a return.  Make sure the simplify cfg can straighten out this
 ; important case.  This is basically the most trivial form of tail-duplication.
 
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:    not grep {br label}
 
 define i32 @test(i1 %B, i32 %A, i32 %B.upgrd.1) {
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 3c4491937355..7133d9875caf 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep unreachable
+; RUN: opt < %s -simplifycfg -S | not grep unreachable
 
 define void @test1(i1 %C, i1* %BP) {
         br i1 %C, label %T, label %F
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 1fd629a4fd41..468b6ed11bc4 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -1,6 +1,6 @@
 ; Test CFG simplify removal of branch instructions...
 ;
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define void @test1() {
         br label %BB1
diff --git a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll b/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
index b2f53a3da6d3..761f0d53905b 100644
--- a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
+++ b/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep {br i1} | count 1
+; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/SimplifyCFG/branch-cond-merge.ll b/test/Transforms/SimplifyCFG/branch-cond-merge.ll
index ae465036584f..f73e01ca4767 100644
--- a/test/Transforms/SimplifyCFG/branch-cond-merge.ll
+++ b/test/Transforms/SimplifyCFG/branch-cond-merge.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -simplifycfg -instcombine \
-; RUN:   -simplifycfg | llvm-dis | not grep call
+; RUN: opt < %s -simplifycfg -instcombine \
+; RUN:   -simplifycfg -S | not grep call
 
 declare void @bar()
 
diff --git a/test/Transforms/SimplifyCFG/branch-cond-prop.ll b/test/Transforms/SimplifyCFG/branch-cond-prop.ll
index a7cd359689d9..448934e72508 100644
--- a/test/Transforms/SimplifyCFG/branch-cond-prop.ll
+++ b/test/Transforms/SimplifyCFG/branch-cond-prop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep call
+; RUN: opt < %s -simplifycfg -S | not grep call
 
 declare void @bar()
 
diff --git a/test/Transforms/SimplifyCFG/branch-fold-test.ll b/test/Transforms/SimplifyCFG/branch-fold-test.ll
index 444741f5b72d..460f2456d8ce 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-test.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-test.ll
@@ -1,7 +1,7 @@
 ; This test ensures that the simplifycfg pass continues to constant fold
 ; terminator instructions.
 
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define i32 @test(i32 %A, i32 %B) {
 J:
diff --git a/test/Transforms/SimplifyCFG/branch-fold.ll b/test/Transforms/SimplifyCFG/branch-fold.ll
index d671b6edec45..266609b52a54 100644
--- a/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep {br i1} | count 1
+; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
 
 define void @test(i32* %P, i32* %Q, i1 %A, i1 %B) {
         br i1 %A, label %a, label %b
diff --git a/test/Transforms/SimplifyCFG/branch-phi-thread.ll b/test/Transforms/SimplifyCFG/branch-phi-thread.ll
index c536b6c88716..f52d979ecd39 100644
--- a/test/Transforms/SimplifyCFG/branch-phi-thread.ll
+++ b/test/Transforms/SimplifyCFG/branch-phi-thread.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -adce | llvm-dis | \
+; RUN: opt < %s -simplifycfg -adce -S | \
 ; RUN:   not grep {call void @f1}
 ; END.
 
diff --git a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
index d1d00b37aac2..c91a87ef0838 100644
--- a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 ; END.
 
         %llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/Transforms/SimplifyCFG/dbginfo.ll b/test/Transforms/SimplifyCFG/dbginfo.ll
index 38ecbb8f7629..1a9f20ac871e 100644
--- a/test/Transforms/SimplifyCFG/dbginfo.ll
+++ b/test/Transforms/SimplifyCFG/dbginfo.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep region | count 2
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep func.start | count 2
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep "br label"
+; RUN: opt < %s -simplifycfg -S | not grep "br label"
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
diff --git a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
index ad5cd938aba6..2e7ef7a8dfba 100644
--- a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
+++ b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 
         %llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/Transforms/SimplifyCFG/hoist-common-code.ll b/test/Transforms/SimplifyCFG/hoist-common-code.ll
index 7512da263bd5..5c83e2a3aa49 100644
--- a/test/Transforms/SimplifyCFG/hoist-common-code.ll
+++ b/test/Transforms/SimplifyCFG/hoist-common-code.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 declare void @bar(i32)
 
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
new file mode 100644
index 000000000000..bbd779beb489
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar()
+
+; This testcase checks to see if the simplifycfg pass is converting invoke
+; instructions to call instructions if the handler just rethrows the exception.
+define i32 @test1() {
+; CHECK: @test1
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: ret i32 0
+        invoke void @bar( )
+                        to label %Ok unwind label %Rethrow
+Ok:             ; preds = %0
+        ret i32 0
+Rethrow:                ; preds = %0
+        unwind
+}
+
+
+; Verify that simplifycfg isn't duplicating 'unwind' instructions.  Doing this
+; is bad because it discourages commoning.
+define i32 @test2(i1 %c) {
+; CHECK: @test2
+; CHECK: T:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label %F
+  br i1 %c, label %T, label %F
+T:
+  call void @bar()
+  br label %F
+F:
+  unwind
+}
diff --git a/test/Transforms/SimplifyCFG/iterative-simplify.ll b/test/Transforms/SimplifyCFG/iterative-simplify.ll
index 9081b01b20a8..a39741105764 100644
--- a/test/Transforms/SimplifyCFG/iterative-simplify.ll
+++ b/test/Transforms/SimplifyCFG/iterative-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep bb17
+; RUN: opt < %s -simplifycfg -S | not grep bb17
 ; PR1786
 
 define i32 @main() {
diff --git a/test/Transforms/SimplifyCFG/noreturn-call.ll b/test/Transforms/SimplifyCFG/noreturn-call.ll
index 0e1c6327e8b3..b45477828374 100644
--- a/test/Transforms/SimplifyCFG/noreturn-call.ll
+++ b/test/Transforms/SimplifyCFG/noreturn-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep unreachable
+; RUN: opt < %s -simplifycfg -S | grep unreachable
 ; PR1796
 
 declare void @Finisher(i32) noreturn
diff --git a/test/Transforms/SimplifyCFG/return-merge.ll b/test/Transforms/SimplifyCFG/return-merge.ll
index 60267e9ec69b..977b6dff87cb 100644
--- a/test/Transforms/SimplifyCFG/return-merge.ll
+++ b/test/Transforms/SimplifyCFG/return-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define i32 @test1(i1 %C) {
 entry:
diff --git a/test/Transforms/SimplifyCFG/switch-simplify-crash.ll b/test/Transforms/SimplifyCFG/switch-simplify-crash.ll
index 74559a02647e..bbc0bd78da7c 100644
--- a/test/Transforms/SimplifyCFG/switch-simplify-crash.ll
+++ b/test/Transforms/SimplifyCFG/switch-simplify-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; RUN: opt < %s -simplifycfg -disable-output
 
 define void @NewExtractNames() {
 entry:
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 4994cd3960aa..9b3aaf7f20de 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 declare void @foo1()
 
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
index 0ce01f1cfaea..f5f4c935a365 100644
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
-; END.
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 
         %llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/Transforms/SimplifyCFG/switch_formation.ll b/test/Transforms/SimplifyCFG/switch_formation.ll
index 534c471eaad7..787904a5d4f8 100644
--- a/test/Transforms/SimplifyCFG/switch_formation.ll
+++ b/test/Transforms/SimplifyCFG/switch_formation.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
-; END.
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
 entry:
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold.ll b/test/Transforms/SimplifyCFG/switch_switch_fold.ll
index 1590f343a5fd..2e2e31014017 100644
--- a/test/Transforms/SimplifyCFG/switch_switch_fold.ll
+++ b/test/Transforms/SimplifyCFG/switch_switch_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   grep switch | count 1
 
 ; Test that a switch going to a switch on the same value can be merged.   All 
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
index 3654b28eed4c..7d7391af1b90 100644
--- a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
+++ b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   grep switch | count 1
 
 ; ModuleID = '<stdin>'
diff --git a/test/Transforms/SimplifyCFG/switch_thread.ll b/test/Transforms/SimplifyCFG/switch_thread.ll
index a92528a3331f..bd85fccd5273 100644
--- a/test/Transforms/SimplifyCFG/switch_thread.ll
+++ b/test/Transforms/SimplifyCFG/switch_thread.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | \
+; RUN: opt < %s -simplifycfg -S | \
 ; RUN:   not grep {call void @DEAD}
 
 ; Test that we can thread a simple known condition through switch statements.
diff --git a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index a73a82b36971..0c9cc8be9251 100644
--- a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | grep {volatile load}
+; RUN: opt < %s -simplifycfg -S | grep {volatile load}
 ; PR2967
 
 target datalayout =
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
index 62d4a4335a51..3a6c2ed6680c 100644
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
+++ b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
index 19814ade4d91..fb18624c71f7 100644
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
+++ b/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplifycfg | llvm-dis | not grep br
+; RUN: opt < %s -simplifycfg -S | not grep br
 
 define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind  {
 entry:
diff --git a/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll b/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
index 860a6ebf966b..8816579a42ff 100644
--- a/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
+++ b/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls -disable-output
+; RUN: opt < %s -simplify-libcalls -disable-output
 
 @G = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
index 5a56d24a35ab..8e9f2062cded 100644
--- a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
+++ b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
@@ -1,5 +1,5 @@
 ; PR1307
-; RUN: llvm-as < %s | opt -simplify-libcalls -instcombine | llvm-dis > %t
+; RUN: opt < %s -simplify-libcalls -instcombine -S > %t
 ; RUN: grep {@str,.*i64 3} %t
 ; RUN: grep {@str1,.*i64 7} %t
 ; RUN: grep {ret i8.*null} %t
diff --git a/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll b/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
index b6c0ffdb704f..b6874322c4c7 100644
--- a/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
+++ b/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep i32
+; RUN: opt < %s -simplify-libcalls -S | grep i32
 ; PR2341
 
 @_2E_str = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
index fcaf12e188da..73eb05b05e34 100644
--- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis > %t
+; RUN: opt < %s -simplify-libcalls -S > %t
 ; RUN: grep noalias %t | count 2
 ; RUN: grep nocapture %t | count 3
 ; RUN: grep nounwind %t | count 3
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll b/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
index 551a2bb6b990..ac89199b0ec1 100644
--- a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline -simplify-libcalls -functionattrs | \
+; RUN: opt < %s -inline -simplify-libcalls -functionattrs | \
 ; RUN:   llvm-dis | grep nocapture | count 2
 ; Check that nocapture attributes are added when run after an SCC pass.
 ; PR3520
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
index 4aeff1c048d8..cb9819cacea6 100644
--- a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis > %t
+; RUN: opt < %s -simplify-libcalls -S > %t
 ; RUN: grep nocapture %t | count 2
 ; RUN: grep null %t | grep nocapture | count 1
 ; RUN: grep null %t | grep call | grep readonly | count 1
diff --git a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
index f7209934f769..9056499b4c55 100644
--- a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls -instcombine | llvm-dis | grep {ret i32 -65}
+; RUN: opt < %s -simplify-libcalls -instcombine -S | grep {ret i32 -65}
 ; PR4284
 
 define i32 @test() nounwind {
diff --git a/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll b/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
new file mode 100644
index 000000000000..7af0a261d436
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -simplify-libcalls -disable-output
+; PR4641
+
+	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64, %struct.pthread_mutex*, %struct.pthread*, i32, i32, %union.anon }
+	%struct.__sbuf = type { i8*, i32, [4 x i8] }
+	%struct.pthread = type opaque
+	%struct.pthread_mutex = type opaque
+	%union.anon = type { i64, [120 x i8] }
+@.str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@.str14 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	call void @exit(i32 0) nounwind
+	%cond392 = select i1 undef, i8* getelementptr ([2 x i8]* @.str13, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str14, i32 0, i32 0)		; <i8*> [#uses=1]
+	%call393 = call %struct.__sFILE* @fopen(i8* undef, i8* %cond392) nounwind		; <%struct.__sFILE*> [#uses=0]
+	unreachable
+}
+
+declare %struct.__sFILE* @fopen(i8*, i8*)
+
+declare void @exit(i32)
diff --git a/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll b/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
new file mode 100644
index 000000000000..b5a788e09735
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -simplify-libcalls -disable-output
+; PR4645
+
+define i32 @main() {
+entry:
+	br label %if.then
+
+lor.lhs.false:		; preds = %while.body
+	br i1 undef, label %if.then, label %for.cond
+
+if.then:		; preds = %lor.lhs.false, %while.body
+	call void @exit(i32 1)
+	br label %for.cond
+
+for.cond:		; preds = %for.end, %if.then, %lor.lhs.false
+	%j.0 = phi i32 [ %inc47, %for.end ], [ 0, %if.then ], [ 0, %lor.lhs.false ]		; <i32> [#uses=1]
+	unreachable
+
+for.end:		; preds = %for.cond20
+	%inc47 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %for.cond
+}
+
+declare void @exit(i32)
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
index d6a504af867d..ab45f1819b29 100644
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ b/test/Transforms/SimplifyLibCalls/FFS.ll
@@ -1,5 +1,5 @@
 ; Test that the ToAsciiOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*@ffs}
 
 @non_const = external global i32		; <i32*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/FPrintF.ll b/test/Transforms/SimplifyLibCalls/FPrintF.ll
index ac6b3c1f7097..4a0d232dac3b 100644
--- a/test/Transforms/SimplifyLibCalls/FPrintF.ll
+++ b/test/Transforms/SimplifyLibCalls/FPrintF.ll
@@ -1,7 +1,10 @@
 ; Test that the FPrintFOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*fprintf}
-;
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
 
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
 	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
diff --git a/test/Transforms/SimplifyLibCalls/IsDigit.ll b/test/Transforms/SimplifyLibCalls/IsDigit.ll
index a290e0107d45..51a769d9bb3d 100644
--- a/test/Transforms/SimplifyLibCalls/IsDigit.ll
+++ b/test/Transforms/SimplifyLibCalls/IsDigit.ll
@@ -1,5 +1,5 @@
 ; Test that the IsDigitOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep call
 
 declare i32 @isdigit(i32)
diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll
index 4e33b0755f94..39662b1589af 100644
--- a/test/Transforms/SimplifyLibCalls/MemCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/MemCpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -constprop -instcombine | llvm-dis | not grep {call.*llvm.memcpy.i32}
+; RUN: opt < %s -constprop -instcombine -S | not grep {call.*llvm.memcpy.i32}
 
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
index c892f0341ab3..858a09c96f6f 100644
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ b/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep putchar
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | grep putchar
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*printf}
 
 @str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/Puts.ll b/test/Transforms/SimplifyLibCalls/Puts.ll
index e9bb2b381c98..47a33c2d98d5 100644
--- a/test/Transforms/SimplifyLibCalls/Puts.ll
+++ b/test/Transforms/SimplifyLibCalls/Puts.ll
@@ -1,7 +1,10 @@
 ; Test that the PutsCatOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*fputs}
-;
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
 
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
 	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
diff --git a/test/Transforms/SimplifyLibCalls/SPrintF.ll b/test/Transforms/SimplifyLibCalls/SPrintF.ll
index 53f5ef1f429e..847e363f52c1 100644
--- a/test/Transforms/SimplifyLibCalls/SPrintF.ll
+++ b/test/Transforms/SimplifyLibCalls/SPrintF.ll
@@ -1,7 +1,11 @@
 ; Test that the SPrintFOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*sprintf}
 
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/StrCat.ll b/test/Transforms/SimplifyLibCalls/StrCat.ll
index 89ef4ea49a79..4e3d0ab7f40f 100644
--- a/test/Transforms/SimplifyLibCalls/StrCat.ll
+++ b/test/Transforms/SimplifyLibCalls/StrCat.ll
@@ -1,10 +1,14 @@
 ; Test that the StrCatOptimizer works correctly
 ; PR3661
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*strcat}
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   grep {puts.*%arg1}
 
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/StrChr.ll b/test/Transforms/SimplifyLibCalls/StrChr.ll
index 802639f72320..50ca0a6edbf8 100644
--- a/test/Transforms/SimplifyLibCalls/StrChr.ll
+++ b/test/Transforms/SimplifyLibCalls/StrChr.ll
@@ -1,7 +1,11 @@
 ; Test that the StrChrOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*@strchr}
 
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
 @hello = constant [14 x i8] c"hello world\5Cn\00"		; <[14 x i8]*> [#uses=1]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/SimplifyLibCalls/StrCmp.ll b/test/Transforms/SimplifyLibCalls/StrCmp.ll
index ae28e4a10008..73596351a8cc 100644
--- a/test/Transforms/SimplifyLibCalls/StrCmp.ll
+++ b/test/Transforms/SimplifyLibCalls/StrCmp.ll
@@ -1,5 +1,5 @@
 ; Test that the StrCmpOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*strcmp}
 
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll
index f23582c46206..75429842cd21 100644
--- a/test/Transforms/SimplifyLibCalls/StrCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/StrCpy.ll
@@ -1,7 +1,11 @@
 ; Test that the StrCpyOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*strcpy}
 
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll
index 521a2242c9bf..45b349d6840d 100644
--- a/test/Transforms/SimplifyLibCalls/StrLen.ll
+++ b/test/Transforms/SimplifyLibCalls/StrLen.ll
@@ -1,5 +1,5 @@
 ; Test that the StrCatOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:    not grep {call.*strlen}
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/SimplifyLibCalls/StrNCat.ll b/test/Transforms/SimplifyLibCalls/StrNCat.ll
index 0ce319c5e8f3..d09c022fd4cd 100644
--- a/test/Transforms/SimplifyLibCalls/StrNCat.ll
+++ b/test/Transforms/SimplifyLibCalls/StrNCat.ll
@@ -1,9 +1,13 @@
 ; Test that the StrNCatOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*strncat}
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   grep {puts.*%arg1}
 
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/StrNCmp.ll b/test/Transforms/SimplifyLibCalls/StrNCmp.ll
index f85394d30281..ba7738561774 100644
--- a/test/Transforms/SimplifyLibCalls/StrNCmp.ll
+++ b/test/Transforms/SimplifyLibCalls/StrNCmp.ll
@@ -1,5 +1,5 @@
 ; Test that the StrNCmpOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*strncmp}
 
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/StrNCpy.ll b/test/Transforms/SimplifyLibCalls/StrNCpy.ll
index 011e9be94cbe..c8af3ca8c3e2 100644
--- a/test/Transforms/SimplifyLibCalls/StrNCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/StrNCpy.ll
@@ -1,7 +1,11 @@
 ; Test that the StrNCpyOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*strncpy}
 
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyLibCalls/ToAscii.ll b/test/Transforms/SimplifyLibCalls/ToAscii.ll
index 9200d2f67edf..e2b5683d9d73 100644
--- a/test/Transforms/SimplifyLibCalls/ToAscii.ll
+++ b/test/Transforms/SimplifyLibCalls/ToAscii.ll
@@ -1,5 +1,5 @@
 ; Test that the ToAsciiOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | \
+; RUN: opt < %s -simplify-libcalls -S | \
 ; RUN:   not grep {call.*toascii}
 
 declare i32 @toascii(i32)
diff --git a/test/Transforms/SimplifyLibCalls/abs.ll b/test/Transforms/SimplifyLibCalls/abs.ll
index 520189bb28f1..6fbe0b9de44e 100644
--- a/test/Transforms/SimplifyLibCalls/abs.ll
+++ b/test/Transforms/SimplifyLibCalls/abs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep {select i1 %ispos}
+; RUN: opt < %s -simplify-libcalls -S | grep {select i1 %ispos}
 ; PR2337
 
 define i32 @test(i32 %x) {
diff --git a/test/Transforms/SimplifyLibCalls/exp2.ll b/test/Transforms/SimplifyLibCalls/exp2.ll
index dea3df4b51bc..2f5d910558f5 100644
--- a/test/Transforms/SimplifyLibCalls/exp2.ll
+++ b/test/Transforms/SimplifyLibCalls/exp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep {call.*ldexp} | count 4
+; RUN: opt < %s -simplify-libcalls -S | grep {call.*ldexp} | count 4
 ; rdar://5852514
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
index 31eb3f65623f..a7af5a968639 100644
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ b/test/Transforms/SimplifyLibCalls/floor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis > %t
+; RUN: opt < %s -simplify-libcalls -S > %t
 ; RUN: not grep {call.*floor(} %t
 ; RUN: grep {call.*floorf(} %t
 ; RUN: not grep {call.*ceil(} %t
diff --git a/test/Transforms/SimplifyLibCalls/half-powr.ll b/test/Transforms/SimplifyLibCalls/half-powr.ll
index 890e788d8c2e..5d317fe864c6 100644
--- a/test/Transforms/SimplifyLibCalls/half-powr.ll
+++ b/test/Transforms/SimplifyLibCalls/half-powr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls-halfpowr | llvm-dis | %prcontext {mul float} 1 | grep {mul float} | count 8
+; RUN: opt -simplify-libcalls-halfpowr %s -S | FileCheck %s
 
 define float @__half_powrf4(float %f, float %g) nounwind readnone {
 entry:
@@ -12,6 +12,11 @@ bb:		; preds = %entry
 bb1:		; preds = %bb, %entry
 	%f_addr.0 = phi float [ %1, %bb ], [ %f, %entry ]		; <float> [#uses=1]
 	%2 = fmul float %f_addr.0, %g		; <float> [#uses=1]
+; CHECK: fmul float %f_addr
+; CHECK: fmul float %f_addr
+; CHECK: fmul float %f_addr
+; CHECK: fmul float %f_addr
+
 	ret float %2
 }
 
diff --git a/test/Transforms/SimplifyLibCalls/memcmp.ll b/test/Transforms/SimplifyLibCalls/memcmp.ll
index 7bdbc8d17e4f..700873627ec2 100644
--- a/test/Transforms/SimplifyLibCalls/memcmp.ll
+++ b/test/Transforms/SimplifyLibCalls/memcmp.ll
@@ -1,5 +1,5 @@
 ; Test that the memcmpOptimizer works correctly
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | not grep {call.*memcmp}
+; RUN: opt < %s -simplify-libcalls -S | not grep {call.*memcmp}
 
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=0]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=0]
diff --git a/test/Transforms/SimplifyLibCalls/memmove.ll b/test/Transforms/SimplifyLibCalls/memmove.ll
index d960960c9c2e..c0c00506cdd3 100644
--- a/test/Transforms/SimplifyLibCalls/memmove.ll
+++ b/test/Transforms/SimplifyLibCalls/memmove.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep {llvm.memmove}
+; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memmove}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/Transforms/SimplifyLibCalls/memset-64.ll b/test/Transforms/SimplifyLibCalls/memset-64.ll
new file mode 100644
index 000000000000..fb752c4083af
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/memset-64.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @a(i8* %x) nounwind {
+entry:
+	%call = call i8* @memset(i8* %x, i32 1, i64 100)		; <i8*> [#uses=0]
+	ret void
+}
+
+declare i8* @memset(i8*, i32, i64)
+
diff --git a/test/Transforms/SimplifyLibCalls/memset.ll b/test/Transforms/SimplifyLibCalls/memset.ll
index 0c109154a3e5..0aede064cac1 100644
--- a/test/Transforms/SimplifyLibCalls/memset.ll
+++ b/test/Transforms/SimplifyLibCalls/memset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep {llvm.memset}
+; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll b/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
new file mode 100644
index 000000000000..669b414531af
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; rdar://7251832
+
+; SimplifyLibcalls should optimize pow(x, 0.5) to sqrt plus code to handle
+; special cases. The readonly attribute on the call should be preserved.
+
+; CHECK: define float @foo(float %x) nounwind {
+; CHECK:   %sqrtf = call float @sqrtf(float %x) nounwind readonly
+; CHECK:   %fabsf = call float @fabsf(float %sqrtf) nounwind readonly
+; CHECK:   %tmp = fcmp oeq float %x, 0xFFF0000000000000
+; CHECK:   %tmp1 = select i1 %tmp, float 0x7FF0000000000000, float %fabsf
+; CHECK:   ret float %tmp1
+
+define float @foo(float %x) nounwind {
+  %retval = call float @powf(float %x, float 0.5)
+  ret float %retval
+}
+
+; CHECK: define double @doo(double %x) nounwind {
+; CHECK:   %sqrt = call double @sqrt(double %x) nounwind readonly
+; CHECK:   %fabs = call double @fabs(double %sqrt) nounwind readonly
+; CHECK:   %tmp = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK:   %tmp1 = select i1 %tmp, double 0x7FF0000000000000, double %fabs
+; CHECK:   ret double %tmp1
+; CHECK: }
+
+define double @doo(double %x) nounwind {
+  %retval = call double @pow(double %x, double 0.5)
+  ret double %retval
+}
+
+declare float @powf(float, float) nounwind readonly
+declare double @pow(double, double) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/pow2.ll b/test/Transforms/SimplifyLibCalls/pow2.ll
index a48abbaf1e74..f8364f740b22 100644
--- a/test/Transforms/SimplifyLibCalls/pow2.ll
+++ b/test/Transforms/SimplifyLibCalls/pow2.ll
@@ -1,6 +1,6 @@
 ; Testcase for calls to the standard C "pow" function
 ;
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | not grep {call .pow}
+; RUN: opt < %s -simplify-libcalls -S | not grep {call .pow}
 
 
 declare double @pow(double, double)
diff --git a/test/Transforms/SimplifyLibCalls/weak-symbols.ll b/test/Transforms/SimplifyLibCalls/weak-symbols.ll
new file mode 100644
index 000000000000..5875b211f776
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/weak-symbols.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; PR4738
+
+; SimplifyLibcalls shouldn't assume anything about weak symbols.
+
+@real_init = weak_odr constant [2 x i8] c"y\00"
+@fake_init = weak constant [2 x i8] c"y\00"
+@.str = private constant [2 x i8] c"y\00"
+
+; CHECK: define i32 @foo
+; CHECK: call i32 @strcmp
+define i32 @foo() nounwind {
+entry:
+  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @fake_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
+  ret i32 %t0
+}
+
+; CHECK: define i32 @bar
+; CHECK: ret i32 0
+define i32 @bar() nounwind {
+entry:
+  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @real_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
+  ret i32 %t0
+}
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
index 16cee15ee179..69febc35df76 100644
--- a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
+++ b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -strip | llvm-dis | grep foo | count 2
-; RUN: llvm-as < %s | opt -strip | llvm-dis | grep bar | count 2
+; RUN: opt < %s -strip -S | grep foo | count 2
+; RUN: opt < %s -strip -S | grep bar | count 2
 @llvm.used = appending global [2 x i8*] [ i8* bitcast (i32* @foo to i8*), i8* bitcast (i32 ()* @bar to i8*) ], section "llvm.metadata"		; <[2 x i8*]*> [#uses=0]
 @foo = internal constant i32 41		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll
index e8c28ec80672..b2a9ed2813d6 100644
--- a/test/Transforms/TailCallElim/accum_recursion.ll
+++ b/test/Transforms/TailCallElim/accum_recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+; RUN: opt < %s -tailcallelim -S | not grep call
 
 define i32 @factorial(i32 %x) {
 entry:
diff --git a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll b/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll
index 4d63b595d7cc..2a90cf3b22d7 100644
--- a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll
+++ b/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll
@@ -3,7 +3,7 @@
 ; go out to the anonymous users of the demo script for "suggesting" 
 ; optimizations that should be done.  :)
 
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+; RUN: opt < %s -tailcallelim -S | not grep call
 
 define i32 @mul(i32 %x, i32 %y) {
 entry:
diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll
index f65c66669e56..0c140ad681d5 100644
--- a/test/Transforms/TailCallElim/ackermann.ll
+++ b/test/Transforms/TailCallElim/ackermann.ll
@@ -1,6 +1,5 @@
 ; This function contains two tail calls, which should be eliminated
-; RUN: llvm-as < %s | \
-; RUN:   opt -tailcallelim -stats -disable-output |& grep {2 tailcallelim}
+; RUN: opt < %s -tailcallelim -stats -disable-output |& grep {2 tailcallelim}
 
 define i32 @Ack(i32 %M.1, i32 %N.1) {
 entry:
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
index e20fe18d5ada..5cc92e1b1730 100644
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | \
+; RUN: opt < %s -tailcallelim -S | \
 ; RUN:    grep {call i32 @foo}
 
 declare void @bar(i32*)
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
index 8fbe00838762..7b3b2326467a 100644
--- a/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as <%s | opt -tailcallelim | llvm-dis | grep call | count 3
+; RUN: opt < %s -tailcallelim -S | grep call | count 3
 ; PR4323
 
 ; Several cases where tail call elimination should not move the load above the
diff --git a/test/Transforms/TailCallElim/inf-recursion.ll b/test/Transforms/TailCallElim/inf-recursion.ll
index fe00f4a764e9..a5f246d36ce1 100644
--- a/test/Transforms/TailCallElim/inf-recursion.ll
+++ b/test/Transforms/TailCallElim/inf-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | grep call
+; RUN: opt < %s -tailcallelim -S | grep call
 ; Don't turn this into an infinite loop, this is probably the implementation
 ; of fabs and we expect the codegen to lower fabs.
 
diff --git a/test/Transforms/TailCallElim/intervening-inst.ll b/test/Transforms/TailCallElim/intervening-inst.ll
index ae4f50941f81..0c40bd5dc50d 100644
--- a/test/Transforms/TailCallElim/intervening-inst.ll
+++ b/test/Transforms/TailCallElim/intervening-inst.ll
@@ -1,5 +1,5 @@
 ; This function contains intervening instructions which should be moved out of the way
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+; RUN: opt < %s -tailcallelim -S | not grep call
 
 define i32 @Test(i32 %X) {
 entry:
diff --git a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
index bd2b93f7a8d6..a556ddb6eb1d 100644
--- a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
+++ b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
@@ -1,9 +1,11 @@
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | \
-; RUN:    %prcontext alloca 1 | grep {i32 @foo}
+; RUN: opt -tailcallelim %s -S | FileCheck %s
+; PR615
 
 declare void @bar(i32*)
 
 define i32 @foo() {
+; CHECK: i32 @foo()
+; CHECK-NEXT: alloca
 	%A = alloca i32		; <i32*> [#uses=2]
 	store i32 17, i32* %A
 	call void @bar( i32* %A )
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index aeb9042bc79f..7f8af7ea1476 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as <%s | opt -tailcallelim | llvm-dis | not grep call
+; RUN: opt < %s -tailcallelim -S | not grep call
 ; PR4323
 
 ; Several cases where tail call elimination should move the load above the call,
diff --git a/test/Transforms/TailCallElim/return_constant.ll b/test/Transforms/TailCallElim/return_constant.ll
index ab69a42b7851..48e5641bb57a 100644
--- a/test/Transforms/TailCallElim/return_constant.ll
+++ b/test/Transforms/TailCallElim/return_constant.ll
@@ -1,7 +1,7 @@
 ; Though this case seems to be fairly unlikely to occur in the wild, someone
 ; plunked it into the demo script, so maybe they care about it.
 ;
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+; RUN: opt < %s -tailcallelim -S | not grep call
 
 define i32 @aaa(i32 %c) {
 entry:
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
index 90a8163ee321..3dddb013f7ce 100644
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | \
+; RUN: opt < %s -tailcallelim -S | \
 ; RUN:    grep {tail call void @foo}
 
 
diff --git a/test/Transforms/TailDup/2003-06-24-Simpleloop.ll b/test/Transforms/TailDup/2003-06-24-Simpleloop.ll
index 5c3b5394ef46..d7e45af5eceb 100644
--- a/test/Transforms/TailDup/2003-06-24-Simpleloop.ll
+++ b/test/Transforms/TailDup/2003-06-24-Simpleloop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define void @motion_result7() {
 entry:
diff --git a/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll b/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll
index 705f8bcdd652..90f49909e5c1 100644
--- a/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll
+++ b/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define i32 @sum() {
 entry:
diff --git a/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll b/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll
index 1fae77b0892e..efe9eaed7ebd 100644
--- a/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll
+++ b/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define i32 @sell_haggle() {
 entry:
diff --git a/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll b/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll
index e46471331d20..dc6492353b7e 100644
--- a/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll
+++ b/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define i32 @foo() {
 entry:
diff --git a/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll b/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll
index 81624ff65f79..c1e5f738a7d9 100644
--- a/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll
+++ b/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define void @interpret() {
 entry:
diff --git a/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll b/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll
index a67e8cdd0c4e..3e4f0b78748c 100644
--- a/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll
+++ b/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate | llc
+; RUN: opt < %s -tailduplicate | llc
 ; PR2323
 
 define i32 @func_27(i32 %p_28) nounwind  {
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
index c29e7d45c6ae..88a565684c5d 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -taildup-threshold=3 -stats -disable-output | not grep tailduplicate
+; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output | not grep tailduplicate
 ; XFAIL: *
 
 define i32 @foo(i32 %l) nounwind  {
diff --git a/test/Transforms/TailDup/2009-07-31-phicrash.ll b/test/Transforms/TailDup/2009-07-31-phicrash.ll
new file mode 100644
index 000000000000..ad1a0404761a
--- /dev/null
+++ b/test/Transforms/TailDup/2009-07-31-phicrash.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -tailduplicate -disable-output
+; PR4662
+
+define void @a() {
+BB:
+	br label %BB6
+
+BB6:
+	%tmp9 = phi i64 [ 0, %BB ], [ 5, %BB34 ]
+	br label %BB34
+
+BB34:
+	br label %BB6
+}
diff --git a/test/Transforms/TailDup/MergeTest.ll b/test/Transforms/TailDup/MergeTest.ll
index 0a3ab6d96216..2224283d8eec 100644
--- a/test/Transforms/TailDup/MergeTest.ll
+++ b/test/Transforms/TailDup/MergeTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -taildup-threshold=2 | llvm-dis | grep add | not grep uses=1
+; RUN: opt < %s -tailduplicate -taildup-threshold=2 -S | grep add | not grep uses=1
 
 define i32 @test1(i1 %C, i32 %A, i32* %P) {
 entry:
diff --git a/test/Transforms/TailDup/PHIUpdateTest.ll b/test/Transforms/TailDup/PHIUpdateTest.ll
index 6f86587b24d6..38d8ebfcce56 100644
--- a/test/Transforms/TailDup/PHIUpdateTest.ll
+++ b/test/Transforms/TailDup/PHIUpdateTest.ll
@@ -1,6 +1,6 @@
 ; This test checks to make sure phi nodes are updated properly
 ;
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define i32 @test(i1 %c, i32 %X, i32 %Y) {
 	br label %L
diff --git a/test/Transforms/TailDup/basictest.ll b/test/Transforms/TailDup/basictest.ll
index ef368906378d..94f5d87ad2bb 100644
--- a/test/Transforms/TailDup/basictest.ll
+++ b/test/Transforms/TailDup/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 declare void @__main()
 
diff --git a/test/Transforms/TailDup/basictest2.ll b/test/Transforms/TailDup/basictest2.ll
index f79d718cdb56..81a996adfee2 100644
--- a/test/Transforms/TailDup/basictest2.ll
+++ b/test/Transforms/TailDup/basictest2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -tailduplicate -disable-output
+; RUN: opt < %s -tailduplicate -disable-output
 
 define void @ab() {
 entry:
diff --git a/test/Transforms/TailDup/if-tail-dup.ll b/test/Transforms/TailDup/if-tail-dup.ll
index 7c4d9c25ef3e..2e4f5be38d1d 100644
--- a/test/Transforms/TailDup/if-tail-dup.ll
+++ b/test/Transforms/TailDup/if-tail-dup.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -tailduplicate | \
-; RUN:   llc -march=x86 -o %t -f
+; RUN: opt < %s -tailduplicate | \
+; RUN:   llc -march=x86 -o %t
 ; RUN: grep {\\\<je\\\>} %t
 ; RUN: not grep jmp %t
 ; END.
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
new file mode 100644
index 000000000000..6fd39988aea4
--- /dev/null
+++ b/test/Unit/lit.cfg
@@ -0,0 +1,65 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM-Unit'
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'unittests')
+    config.test_source_root = config.test_exec_root
+
+# testFormat: The test format to use to interpret tests.
+llvm_build_mode = getattr(config, 'llvm_build_mode', "Debug")
+config.test_format = lit.formats.GoogleTest(llvm_build_mode, 'Tests')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.join(os.path.dirname(__file__),'..','..')
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'Unit', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
diff --git a/test/Verifier/2008-03-01-AllocaSized.ll b/test/Verifier/2008-03-01-AllocaSized.ll
index eb96ced78824..079a75d792e1 100644
--- a/test/Verifier/2008-03-01-AllocaSized.ll
+++ b/test/Verifier/2008-03-01-AllocaSized.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as -f %s -o /dev/null |& grep {Cannot allocate unsized type}
+; RUN: not llvm-as %s -o /dev/null |& grep {Cannot allocate unsized type}
 ; PR2113
 
 define void @test() {
diff --git a/test/Verifier/2008-08-22-MemCpyAlignment.ll b/test/Verifier/2008-08-22-MemCpyAlignment.ll
index 6bad2d138d0e..aaf69aeef672 100644
--- a/test/Verifier/2008-08-22-MemCpyAlignment.ll
+++ b/test/Verifier/2008-08-22-MemCpyAlignment.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as -f %s -o /dev/null |& grep {alignment argument of memory intrinsics must be a constant int}
+; RUN: not llvm-as %s -o /dev/null |& grep {alignment argument of memory intrinsics must be a constant int}
 ; PR2318
 
 define void @x(i8* %a, i8* %src, i64 %len, i32 %align) nounwind  {
diff --git a/test/Verifier/SelfReferential.ll b/test/Verifier/SelfReferential.ll
index c013bfb6a188..70154b77a823 100644
--- a/test/Verifier/SelfReferential.ll
+++ b/test/Verifier/SelfReferential.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as -f %s -o /dev/null |& grep {Only PHI nodes may reference their own value}
+; RUN: not llvm-as %s -o /dev/null |& grep {Only PHI nodes may reference their own value}
 
 ; Test that self referential instructions are not allowed
 
diff --git a/test/Verifier/aliasing-chain.ll b/test/Verifier/aliasing-chain.ll
index 53b929bd782e..fc5ef1ce13ab 100644
--- a/test/Verifier/aliasing-chain.ll
+++ b/test/Verifier/aliasing-chain.ll
@@ -1,4 +1,4 @@
-; RUN:  not llvm-as -f %s -o /dev/null |& grep {Aliasing chain should end with function or global variable}
+; RUN:  not llvm-as %s -o /dev/null |& grep {Aliasing chain should end with function or global variable}
 
 ; Test that alising chain does not create a cycle
 
diff --git a/test/Verifier/byval-4.ll b/test/Verifier/byval-4.ll
index d69f66caec35..b6f9c67962cb 100644
--- a/test/Verifier/byval-4.ll
+++ b/test/Verifier/byval-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o /dev/null -f
+; RUN: llvm-as %s -o /dev/null
 %struct.foo = type { i64 }
 
 declare void @h(%struct.foo* byval %num)
diff --git a/test/Verifier/invoke-2.ll b/test/Verifier/invoke-2.ll
index 9201f4d9fb0a..0145935a1a78 100644
--- a/test/Verifier/invoke-2.ll
+++ b/test/Verifier/invoke-2.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -f |& grep {not verify as correct}
+; RUN: not llvm-as %s |& grep {not verify as correct}
 ; PR1042
 
 define i32 @foo() {
diff --git a/test/lib/llvm.exp b/test/lib/llvm.exp
index 3e2632f12c1f..2c1bef939afe 100644
--- a/test/lib/llvm.exp
+++ b/test/lib/llvm.exp
@@ -46,8 +46,8 @@ proc execOneLine { test PRS outcome lineno line } {
 # This procedure performs variable substitutions on the RUN: lines of a test
 # cases.
 proc substitute { line test tmpFile } {
-  global srcroot objroot srcdir objdir subdir target_triplet prcontext
-  global llvmgcc llvmgxx llvmgcc_version llvmgccmajvers ocamlc
+  global srcroot objroot srcdir objdir subdir target_triplet
+  global llvmgcc llvmgxx llvmgcc_version llvmgccmajvers ocamlopt
   global gccpath gxxpath compile_c compile_cxx link shlibext llvmlibsdir
   global llvmdsymutil valgrind grep gas bugpoint_topts
   set path [file join $srcdir $subdir]
@@ -57,8 +57,8 @@ proc substitute { line test tmpFile } {
 
   #replace %% with _#MARKER#_ to make the replacement of %% more predictable
   regsub -all {%%} $new_line {_#MARKER#_} new_line
-  #replace %prcontext with prcontext.tcl (Must replace before %p)
-  regsub -all {%prcontext} $new_line $prcontext new_line
+  #replace %llvmgcc_only with actual path to llvmgcc
+  regsub -all {%llvmgcc_only} $new_line "$llvmgcc" new_line
   #replace %llvmgcc with actual path to llvmgcc
   regsub -all {%llvmgcc} $new_line "$llvmgcc -emit-llvm -w" new_line
   #replace %llvmgxx with actual path to llvmg++
@@ -71,8 +71,8 @@ proc substitute { line test tmpFile } {
   regsub -all {%link} $new_line "$link" new_line
   #replace %shlibext with shared library extension
   regsub -all {%shlibext} $new_line "$shlibext" new_line
-  #replace %ocamlc with ocaml compiler command
-  regsub -all {%ocamlc} $new_line "$ocamlc" new_line
+  #replace %ocamlopt with ocaml compiler command
+  regsub -all {%ocamlopt} $new_line "$ocamlopt" new_line
   #replace %llvmdsymutil with dsymutil command
   regsub -all {%llvmdsymutil} $new_line "$llvmdsymutil" new_line
   #replace %llvmlibsdir with configure library directory
@@ -85,6 +85,8 @@ proc substitute { line test tmpFile } {
   regsub -all {%s} $new_line $test new_line
   #replace %t with temp filenames
   regsub -all {%t} $new_line $tmpFile new_line
+  #replace %abs_tmp with absolute temp filenames
+  regsub -all {%abs_tmp} $new_line [file join [pwd] $tmpFile] new_line
   #replace _#MARKER#_ with %
   regsub -all {_#MARKER#_} $new_line % new_line
 
@@ -298,3 +300,15 @@ proc llvm_supports_target { tgtName } {
   }
   return 0
 }
+
+# This procedure provides an interface to check the BINDINGS_TO_BUILD makefile
+# variable to see if a particular binding has been configured to build.
+proc llvm_supports_binding { name } {
+  global llvm_bindings
+  foreach item [split $llvm_bindings] {
+    if { [regexp $name $item match] } {
+      return 1
+    }
+  }
+  return 0
+}
diff --git a/test/lib/llvm2cpp.exp b/test/lib/llvm2cpp.exp
index d8a65ffc4bec..f4530338ee23 100644
--- a/test/lib/llvm2cpp.exp
+++ b/test/lib/llvm2cpp.exp
@@ -48,7 +48,7 @@ proc llvm2cpp-test { files } {
     # Run llvm-as/llvm-dis
     set pipeline llvm-as|llvm-dis
     set retval [ catch { 
-      exec -keepnewline $llvmas < $test -o - | $llvmdis -f -o $assembly 2>/dev/null } msg ]
+      exec -keepnewline $llvmas < $test -o - | $llvmdis -o $assembly 2>/dev/null } msg ]
 
     if { $retval != 0 } {
       fail "$test: $pipeline returned $retval\n$msg"
@@ -65,7 +65,7 @@ proc llvm2cpp-test { files } {
     }
 
     set retval [ catch { 
-      exec -keepnewline $llc -march=cpp -f -o $generated < $bytecode 2>/dev/null } msg]
+      exec -keepnewline $llc -march=cpp -o $generated < $bytecode 2>/dev/null } msg]
 
     if { $retval != 0 } {
       fail "$test: llvm2cpp returned $retval\n$msg"
@@ -73,7 +73,7 @@ proc llvm2cpp-test { files } {
     }
 
     set retval [ catch { 
-      exec -keepnewline gcc -g -D__STDC_LIMIT_MACROS -o $executable $generated -I$srcroot/include -I$objroot/include -L$llvmlibsdir -lLLVMCore -lLLVMSupport -lLLVMbzip2 -lLLVMSystem -lstdc++ } msg ] 
+      exec -keepnewline gcc -g -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -o $executable $generated -I$srcroot/include -I$objroot/include -L$llvmlibsdir -lLLVMCore -lLLVMSupport -lLLVMSystem -lstdc++ } msg ] 
     if { $retval != 0 } {
       fail "$test: gcc returned $retval\n$msg"
       continue
diff --git a/test/lit.cfg b/test/lit.cfg
new file mode 100644
index 000000000000..7eac5c69759e
--- /dev/null
+++ b/test/lit.cfg
@@ -0,0 +1,155 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.TclTest()
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.dirname(config.test_source_root)
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
+
+###
+
+# Load site data from DejaGNU's site.exp.
+import re
+site_exp = {}
+# FIXME: Implement lit.site.cfg.
+for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
+    m = re.match('set ([^ ]+) "([^"]*)"', line)
+    if m:
+        site_exp[m.group(1)] = m.group(2)
+
+# Add substitutions.
+for sub in ['llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
+            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'bugpoint_topts']:
+    if sub in ('llvmgcc', 'llvmgxx'):
+        config.substitutions.append(('%' + sub,
+                                     site_exp[sub] + ' -emit-llvm -w'))
+    else:
+        config.substitutions.append(('%' + sub, site_exp[sub]))
+
+excludes = []
+
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# Provide llvm_supports_target for use in local configs.
+targets = set(site_exp["TARGETS_TO_BUILD"].split())
+def llvm_supports_target(name):
+    return name in targets
+
+langs = set(site_exp['llvmgcc_langs'].split(','))
+def llvm_gcc_supports(name):
+    return name in langs
+
+bindings = set(site_exp['llvm_bindings'].split(','))
+def llvm_supports_binding(name):
+    return name in langs
+
+# Provide on_clone hook for reading 'dg.exp'.
+import os
+simpleLibData = re.compile(r"""load_lib llvm.exp
+
+RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
+                           re.MULTILINE)
+conditionalLibData = re.compile(r"""load_lib llvm.exp
+
+if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
+ *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
+\}""", re.MULTILINE)
+def on_clone(parent, cfg, for_path):
+    def addSuffixes(match):
+        if match[0] == '{' and match[-1] == '}':
+            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
+        else:
+            cfg.suffixes = ['.' + match]
+
+    libPath = os.path.join(os.path.dirname(for_path),
+                           'dg.exp')
+    if not os.path.exists(libPath):
+        cfg.unsupported = True
+        return
+
+    # Reset unsupported, in case we inherited it.
+    cfg.unsupported = False
+    lib = open(libPath).read().strip()
+
+    # Check for a simple library.
+    m = simpleLibData.match(lib)
+    if m:
+        addSuffixes(m.group(1))
+        return
+
+    # Check for a conditional test set.
+    m = conditionalLibData.match(lib)
+    if m:
+        funcname,arg,match = m.groups()
+        addSuffixes(match)
+
+        func = globals().get(funcname)
+        if not func:
+            lit.error('unsupported predicate %r' % funcname)
+        elif not func(arg):
+            cfg.unsupported = True
+        return
+    # Otherwise, give up.
+    lit.error('unable to understand %r:\n%s' % (libPath, lib))
+
+config.on_clone = on_clone
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
new file mode 100644
index 000000000000..88699e3e76f7
--- /dev/null
+++ b/test/lit.site.cfg.in
@@ -0,0 +1,9 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvmgcc_dir = "@LLVMGCCDIR@"
+
+# Let the main config do the real work.
+lit.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
diff --git a/test/site.exp.in b/test/site.exp.in
new file mode 100644
index 000000000000..6a74ba81e7f0
--- /dev/null
+++ b/test/site.exp.in
@@ -0,0 +1,27 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+set target_triplet "@target@"
+set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
+set llvmgcc_langs "@LLVMGCC_LANGS@"
+set llvmgcc_version "@LLVMGCC_VERSION@"
+set llvmlibsdir "@LLVM_LIBS_DIR@"
+set llvm_bindings "@LLVM_BINDINGS@"
+set srcroot "@LLVM_SOURCE_DIR@"
+set objroot "@LLVM_BINARY_DIR@"
+set srcdir "@LLVM_SOURCE_DIR@"
+set objdir "@LLVM_BINARY_DIR@"
+set gccpath "@GCCPATH@"
+set gxxpath "@GXXPATH@"
+set compile_c "@TEST_COMPILE_C_CMD@"
+set compile_cxx "@TEST_COMPILE_CXX_CMD@"
+set link "@TEST_LINK_CMD@"
+set llvmgcc "@LLVMGCC@"
+set llvmgxx "@LLVMGXX@"
+set llvmgccmajvers "@LLVMGCCMAJVERS@"
+set bugpoint_topts "@BUGPOINT_TOPTS@"
+set shlibext "@SHLIBEXT@"
+set ocamlopt "@OCAMLOPT@"
+set valgrind "@VALGRIND@"
+set grep "@GREP@"
+set gas "@AS@"
+set llvmdsymutil "@DSYMUTIL@"
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 1273c25cd79e..8b5d77e2c42d 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -2,10 +2,13 @@
 # large and three small executables. This is done to minimize memory load
 # in parallel builds.  Please retain this ordering.
 
-# FIXME: We don't yet have the ability to build llvm-config with CMake
-# based on explicit dependencies.
-if (FALSE)
- add_subdirectory(llvm-config)
+if( NOT WIN32 OR MSYS OR CYGWIN )
+  # It is useful to build llvm-config before the other tools, so we
+  # have a fresh LibDeps.txt for regenerating the hard-coded library
+  # dependencies. llvm-config/CMakeLists.txt takes care of this but we
+  # must keep llvm-config as the first entry on the list of tools to
+  # be built.
+  add_subdirectory(llvm-config)
 endif()
 
 add_subdirectory(opt)
@@ -23,11 +26,7 @@ add_subdirectory(llvm-prof)
 add_subdirectory(llvm-link)
 add_subdirectory(lli)
 
-# gccas and gccld are deprecated:
-# add_subdirectory(gccas)
-# add_subdirectory(gccld)
 add_subdirectory(llvm-extract)
-add_subdirectory(llvm-db)
 
 add_subdirectory(bugpoint)
 add_subdirectory(llvm-bcanalyzer)
@@ -37,3 +36,5 @@ add_subdirectory(llvmc)
 if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
   add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/clang )
 endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
+
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/Makefile b/tools/Makefile
index 4f0f54bdfbf8..0340c7f78bb9 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -19,7 +19,7 @@ DIRS := llvm-config
 PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llc llvm-ranlib llvm-ar llvm-nm \
                  llvm-ld llvm-prof llvm-link \
-                 lli gccas gccld llvm-extract llvm-db \
+                 lli llvm-extract \
                  bugpoint llvm-bcanalyzer llvm-stub \
                  llvm-mc llvmc
 
@@ -39,7 +39,7 @@ ifeq ($(ENABLE_PIC),1)
 endif
 
 # No support for lto / gold on windows targets
-ifeq ($(OS), $(filter $(OS), Cygwin MingW))
+ifeq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
   DIRS := $(filter-out lto gold, $(DIRS))
 endif
 
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index aab50720c6ef..abf5d8ef7211 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -18,17 +18,20 @@
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Assembly/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include <iostream>
+#include "llvm/System/Host.h"
 #include <memory>
 using namespace llvm;
 
+namespace llvm {
+  Triple TargetTriple;
+}
+
 // Anonymous namespace to define command line options for debugging.
 //
 namespace {
@@ -78,17 +81,25 @@ BugDriver::BugDriver(const char *toolname, bool as_child, bool find_bugs,
 ///
 Module *llvm::ParseInputFile(const std::string &Filename,
                              LLVMContext& Ctxt) {
-  std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(Filename));
-  Module *Result = 0;
-  if (Buffer.get())
-    Result = ParseBitcodeFile(Buffer.get(), Ctxt);
-  
   SMDiagnostic Err;
-  if (!Result && !(Result = ParseAssemblyFile(Filename, Err, Ctxt))) {
-    Err.Print("bugpoint", errs()); 
-    Result = 0;
+  Module *Result = ParseIRFile(Filename, Err, Ctxt);
+  if (!Result)
+    Err.Print("bugpoint", errs());
+
+  // If we don't have an override triple, use the first one to configure
+  // bugpoint, or use the host triple if none provided.
+  if (Result) {
+    if (TargetTriple.getTriple().empty()) {
+      Triple TheTriple(Result->getTargetTriple());
+
+      if (TheTriple.getTriple().empty())
+        TheTriple.setTriple(sys::getHostTriple());
+        
+      TargetTriple.setTriple(TheTriple.getTriple());
+    }
+
+    Result->setTargetTriple(TargetTriple.getTriple());  // override the triple
   }
-  
   return Result;
 }
 
@@ -107,28 +118,28 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
     if (Program == 0) return true;
     
     if (!run_as_child)
-      std::cout << "Read input file      : '" << Filenames[0] << "'\n";
+      outs() << "Read input file      : '" << Filenames[0] << "'\n";
 
     for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
       std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
       if (M.get() == 0) return true;
 
       if (!run_as_child)
-        std::cout << "Linking in input file: '" << Filenames[i] << "'\n";
+        outs() << "Linking in input file: '" << Filenames[i] << "'\n";
       std::string ErrorMessage;
       if (Linker::LinkModules(Program, M.get(), &ErrorMessage)) {
-        std::cerr << ToolName << ": error linking in '" << Filenames[i] << "': "
-                  << ErrorMessage << '\n';
+        errs() << ToolName << ": error linking in '" << Filenames[i] << "': "
+               << ErrorMessage << '\n';
         return true;
       }
     }
   } catch (const std::string &Error) {
-    std::cerr << ToolName << ": error reading input '" << Error << "'\n";
+    errs() << ToolName << ": error reading input '" << Error << "'\n";
     return true;
   }
 
   if (!run_as_child)
-    std::cout << "*** All input ok\n";
+    outs() << "*** All input ok\n";
 
   // All input files read successfully!
   return false;
@@ -162,7 +173,7 @@ bool BugDriver::run() {
   // file, then we know the compiler didn't crash, so try to diagnose a 
   // miscompilation.
   if (!PassesToRun.empty()) {
-    std::cout << "Running selected passes on program to test for crash: ";
+    outs() << "Running selected passes on program to test for crash: ";
     if (runPasses(PassesToRun))
       return debugOptimizerCrash();
   }
@@ -171,12 +182,12 @@ bool BugDriver::run() {
   if (initializeExecutionEnvironment()) return true;
 
   // Test to see if we have a code generator crash.
-  std::cout << "Running the code generator to test for a crash: ";
+  outs() << "Running the code generator to test for a crash: ";
   try {
     compileProgram(Program);
-    std::cout << '\n';
+    outs() << '\n';
   } catch (ToolExecutionError &TEE) {
-    std::cout << TEE.what();
+    outs() << TEE.what();
     return debugCodeGeneratorCrash();
   }
 
@@ -187,7 +198,7 @@ bool BugDriver::run() {
   //
   bool CreatedOutput = false;
   if (ReferenceOutputFile.empty()) {
-    std::cout << "Generating reference output from raw program: ";
+    outs() << "Generating reference output from raw program: ";
     if(!createReferenceFile(Program)){
       return debugCodeGeneratorCrash();
     }
@@ -197,28 +208,28 @@ bool BugDriver::run() {
   // Make sure the reference output file gets deleted on exit from this
   // function, if appropriate.
   sys::Path ROF(ReferenceOutputFile);
-  FileRemover RemoverInstance(ROF, CreatedOutput);
+  FileRemover RemoverInstance(ROF, CreatedOutput && !SaveTemps);
 
   // Diff the output of the raw program against the reference output.  If it
   // matches, then we assume there is a miscompilation bug and try to 
   // diagnose it.
-  std::cout << "*** Checking the code generator...\n";
+  outs() << "*** Checking the code generator...\n";
   try {
     if (!diffProgram()) {
-      std::cout << "\n*** Debugging miscompilation!\n";
+      outs() << "\n*** Output matches: Debugging miscompilation!\n";
       return debugMiscompilation();
     }
   } catch (ToolExecutionError &TEE) {
-    std::cerr << TEE.what();
+    errs() << TEE.what();
     return debugCodeGeneratorCrash();
   }
 
-  std::cout << "\n*** Input program does not match reference diff!\n";
-  std::cout << "Debugging code generator problem!\n";
+  outs() << "\n*** Input program does not match reference diff!\n";
+  outs() << "Debugging code generator problem!\n";
   try {
     return debugCodeGenerator();
   } catch (ToolExecutionError &TEE) {
-    std::cerr << TEE.what();
+    errs() << TEE.what();
     return debugCodeGeneratorCrash();
   }
 }
@@ -227,18 +238,18 @@ void llvm::PrintFunctionList(const std::vector<Function*> &Funcs) {
   unsigned NumPrint = Funcs.size();
   if (NumPrint > 10) NumPrint = 10;
   for (unsigned i = 0; i != NumPrint; ++i)
-    std::cout << " " << Funcs[i]->getName();
+    outs() << " " << Funcs[i]->getName();
   if (NumPrint < Funcs.size())
-    std::cout << "... <" << Funcs.size() << " total>";
-  std::cout << std::flush;
+    outs() << "... <" << Funcs.size() << " total>";
+  outs().flush();
 }
 
 void llvm::PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs) {
   unsigned NumPrint = GVs.size();
   if (NumPrint > 10) NumPrint = 10;
   for (unsigned i = 0; i != NumPrint; ++i)
-    std::cout << " " << GVs[i]->getName();
+    outs() << " " << GVs[i]->getName();
   if (NumPrint < GVs.size())
-    std::cout << "... <" << GVs.size() << " total>";
-  std::cout << std::flush;
+    outs() << "... <" << GVs.size() << " total>";
+  outs().flush();
 }
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index d637c2438bf9..db35c851d9a4 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -44,7 +44,7 @@ extern bool BugpointIsInterrupted;
 
 class BugDriver {
   LLVMContext& Context;
-  const std::string ToolName;  // Name of bugpoint
+  const char *ToolName;            // argv[0] of bugpoint
   std::string ReferenceOutputFile; // Name of `good' output file
   Module *Program;             // The raw program, linked together
   std::vector<const PassInfo*> PassesToRun;
@@ -64,7 +64,7 @@ public:
   BugDriver(const char *toolname, bool as_child, bool find_bugs,
             unsigned timeout, unsigned memlimit, LLVMContext& ctxt);
 
-  const std::string &getToolName() const { return ToolName; }
+  const char *getToolName() const { return ToolName; }
 
   LLVMContext& getContext() { return Context; }
 
@@ -248,7 +248,7 @@ public:
   /// optimizations fail for some reason (optimizer crashes), return true,
   /// otherwise return false.  If DeleteOutput is set to true, the bitcode is
   /// deleted on success, and the filename string is undefined.  This prints to
-  /// cout a single line message indicating whether compilation was successful
+  /// outs() a single line message indicating whether compilation was successful
   /// or failed, unless Quiet is set.  ExtraArgs specifies additional arguments
   /// to pass to the child bugpoint instance.
   ///
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index 9697b341f3a2..b348a0875e45 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/CommandLine.h"
-#include <fstream>
 #include <set>
 using namespace llvm;
 
@@ -64,8 +63,8 @@ ReducePassList::doTest(std::vector<const PassInfo*> &Prefix,
   sys::Path PrefixOutput;
   Module *OrigProgram = 0;
   if (!Prefix.empty()) {
-    std::cout << "Checking to see if these passes crash: "
-              << getPassesString(Prefix) << ": ";
+    outs() << "Checking to see if these passes crash: "
+           << getPassesString(Prefix) << ": ";
     std::string PfxOutput;
     if (BD.runPasses(Prefix, PfxOutput))
       return KeepPrefix;
@@ -73,17 +72,17 @@ ReducePassList::doTest(std::vector<const PassInfo*> &Prefix,
     PrefixOutput.set(PfxOutput);
     OrigProgram = BD.Program;
 
-    BD.Program = ParseInputFile(PrefixOutput.toString(), BD.getContext());
+    BD.Program = ParseInputFile(PrefixOutput.str(), BD.getContext());
     if (BD.Program == 0) {
-      std::cerr << BD.getToolName() << ": Error reading bitcode file '"
-                << PrefixOutput << "'!\n";
+      errs() << BD.getToolName() << ": Error reading bitcode file '"
+             << PrefixOutput.str() << "'!\n";
       exit(1);
     }
     PrefixOutput.eraseFromDisk();
   }
 
-  std::cout << "Checking to see if these passes crash: "
-            << getPassesString(Suffix) << ": ";
+  outs() << "Checking to see if these passes crash: "
+         << getPassesString(Suffix) << ": ";
 
   if (BD.runPasses(Suffix)) {
     delete OrigProgram;            // The suffix crashes alone...
@@ -142,9 +141,9 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
     GVSet.insert(CMGV);
   }
 
-  std::cout << "Checking for crash with only these global variables: ";
+  outs() << "Checking for crash with only these global variables: ";
   PrintGlobalVariableList(GVs);
-  std::cout << ": ";
+  outs() << ": ";
 
   // Loop over and delete any global variables which we aren't supposed to be
   // playing with...
@@ -216,9 +215,9 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
     Functions.insert(CMF);
   }
 
-  std::cout << "Checking for crash with only these functions: ";
+  outs() << "Checking for crash with only these functions: ";
   PrintFunctionList(Funcs);
-  std::cout << ": ";
+  outs() << ": ";
 
   // Loop over and delete any functions which we aren't supposed to be playing
   // with...
@@ -276,14 +275,14 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
   for (unsigned i = 0, e = BBs.size(); i != e; ++i)
     Blocks.insert(cast<BasicBlock>(ValueMap[BBs[i]]));
 
-  std::cout << "Checking for crash with only these blocks:";
+  outs() << "Checking for crash with only these blocks:";
   unsigned NumPrint = Blocks.size();
   if (NumPrint > 10) NumPrint = 10;
   for (unsigned i = 0, e = NumPrint; i != e; ++i)
-    std::cout << " " << BBs[i]->getName();
+    outs() << " " << BBs[i]->getName();
   if (NumPrint < Blocks.size())
-    std::cout << "... <" << Blocks.size() << " total>";
-  std::cout << ": ";
+    outs() << "... <" << Blocks.size() << " total>";
+  outs() << ": ";
 
   // Loop over and delete any hack up any blocks that are not listed...
   for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
@@ -298,12 +297,13 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
         
         if (isa<StructType>(BBTerm->getType()))
            BBTerm->replaceAllUsesWith(UndefValue::get(BBTerm->getType()));
-        else if (BB->getTerminator()->getType() != Type::VoidTy)
+        else if (BB->getTerminator()->getType() != 
+                    Type::getVoidTy(BB->getContext()))
           BBTerm->replaceAllUsesWith(Constant::getNullValue(BBTerm->getType()));
 
         // Replace the old terminator instruction.
         BB->getInstList().pop_back();
-        new UnreachableInst(BB);
+        new UnreachableInst(BB->getContext(), BB);
       }
 
   // The CFG Simplifier pass may delete one of the basic blocks we are
@@ -333,7 +333,7 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
     for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
       ValueSymbolTable &ST = BlockInfo[i].first->getValueSymbolTable();
       Value* V = ST.lookup(BlockInfo[i].second);
-      if (V && V->getType() == Type::LabelTy)
+      if (V && V->getType() == Type::getLabelTy(V->getContext()))
         BBs.push_back(cast<BasicBlock>(V));
     }
     return true;
@@ -380,18 +380,18 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
     Instructions.insert(cast<Instruction>(ValueMap[Insts[i]]));
   }
 
-  std::cout << "Checking for crash with only " << Instructions.size();
+  outs() << "Checking for crash with only " << Instructions.size();
   if (Instructions.size() == 1)
-    std::cout << " instruction: ";
+    outs() << " instruction: ";
   else
-    std::cout << " instructions: ";
+    outs() << " instructions: ";
 
   for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI)
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
       for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
         Instruction *Inst = I++;
         if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst)) {
-          if (Inst->getType() != Type::VoidTy)
+          if (Inst->getType() != Type::getVoidTy(Inst->getContext()))
             Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
           Inst->eraseFromParent();
         }
@@ -443,13 +443,13 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
       delete M;  // No change made...
     } else {
       // See if the program still causes a crash...
-      std::cout << "\nChecking to see if we can delete global inits: ";
+      outs() << "\nChecking to see if we can delete global inits: ";
 
       if (TestFn(BD, M)) {      // Still crashes?
         BD.setNewProgram(M);
-        std::cout << "\n*** Able to remove all global initializers!\n";
+        outs() << "\n*** Able to remove all global initializers!\n";
       } else {                  // No longer crashes?
-        std::cout << "  - Removing all global inits hides problem!\n";
+        outs() << "  - Removing all global inits hides problem!\n";
         delete M;
 
         std::vector<GlobalVariable*> GVs;
@@ -460,7 +460,7 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
             GVs.push_back(I);
 
         if (GVs.size() > 1 && !BugpointIsInterrupted) {
-          std::cout << "\n*** Attempting to reduce the number of global "
+          outs() << "\n*** Attempting to reduce the number of global "
                     << "variables in the testcase\n";
 
           unsigned OldSize = GVs.size();
@@ -481,7 +481,7 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
       Functions.push_back(I);
 
   if (Functions.size() > 1 && !BugpointIsInterrupted) {
-    std::cout << "\n*** Attempting to reduce the number of functions "
+    outs() << "\n*** Attempting to reduce the number of functions "
       "in the testcase\n";
 
     unsigned OldSize = Functions.size();
@@ -530,8 +530,8 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
   do {
     if (BugpointIsInterrupted) break;
     --Simplification;
-    std::cout << "\n*** Attempting to reduce testcase by deleting instruc"
-              << "tions: Simplification Level #" << Simplification << '\n';
+    outs() << "\n*** Attempting to reduce testcase by deleting instruc"
+           << "tions: Simplification Level #" << Simplification << '\n';
 
     // Now that we have deleted the functions that are unnecessary for the
     // program, try to remove instructions that are not necessary to cause the
@@ -559,7 +559,7 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
             } else {
               if (BugpointIsInterrupted) goto ExitLoops;
 
-              std::cout << "Checking instruction: " << *I;
+              outs() << "Checking instruction: " << *I;
               Module *M = BD.deleteInstructionFromProgram(I, Simplification);
 
               // Find out if the pass still crashes on this pass...
@@ -586,7 +586,7 @@ ExitLoops:
 
   // Try to clean up the testcase by running funcresolve and globaldce...
   if (!BugpointIsInterrupted) {
-    std::cout << "\n*** Attempting to perform final cleanups: ";
+    outs() << "\n*** Attempting to perform final cleanups: ";
     Module *M = CloneModule(BD.getProgram());
     M = BD.performFinalCleanups(M, true);
 
@@ -612,15 +612,15 @@ static bool TestForOptimizerCrash(BugDriver &BD, Module *M) {
 /// out exactly which pass is crashing.
 ///
 bool BugDriver::debugOptimizerCrash(const std::string &ID) {
-  std::cout << "\n*** Debugging optimizer crash!\n";
+  outs() << "\n*** Debugging optimizer crash!\n";
 
   // Reduce the list of passes which causes the optimizer to crash...
   if (!BugpointIsInterrupted)
     ReducePassList(*this).reduceList(PassesToRun);
 
-  std::cout << "\n*** Found crashing pass"
-            << (PassesToRun.size() == 1 ? ": " : "es: ")
-            << getPassesString(PassesToRun) << '\n';
+  outs() << "\n*** Found crashing pass"
+         << (PassesToRun.size() == 1 ? ": " : "es: ")
+         << getPassesString(PassesToRun) << '\n';
 
   EmitProgressBitcode(ID);
 
@@ -630,10 +630,10 @@ bool BugDriver::debugOptimizerCrash(const std::string &ID) {
 static bool TestForCodeGenCrash(BugDriver &BD, Module *M) {
   try {
     BD.compileProgram(M);
-    std::cerr << '\n';
+    errs() << '\n';
     return false;
   } catch (ToolExecutionError &) {
-    std::cerr << "<crash>\n";
+    errs() << "<crash>\n";
     return true;  // Tool is still crashing.
   }
 }
@@ -642,7 +642,7 @@ static bool TestForCodeGenCrash(BugDriver &BD, Module *M) {
 /// crashes on an input.  It attempts to reduce the input as much as possible
 /// while still causing the code generator to crash.
 bool BugDriver::debugCodeGeneratorCrash() {
-  std::cerr << "*** Debugging code generator crash!\n";
+  errs() << "*** Debugging code generator crash!\n";
 
   return DebugACrash(*this, TestForCodeGenCrash);
 }
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 640fe2829aeb..feda331177b1 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -18,8 +18,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/raw_ostream.h"
 #include <fstream>
-#include <iostream>
 
 using namespace llvm;
 
@@ -56,19 +56,19 @@ namespace {
 
   cl::opt<OutputType>
   SafeInterpreterSel(cl::desc("Specify \"safe\" i.e. known-good backend:"),
-                     cl::values(clEnumValN(AutoPick, "safe-auto", "Use best guess"),
-                                clEnumValN(RunLLC, "safe-run-llc", "Compile with LLC"),
-                                clEnumValN(RunCBE, "safe-run-cbe", "Compile with CBE"),
-                                clEnumValN(Custom, "safe-run-custom",
-                                "Use -exec-command to define a command to execute "
-                                "the bitcode. Useful for cross-compilation."),
-                                clEnumValEnd),
+              cl::values(clEnumValN(AutoPick, "safe-auto", "Use best guess"),
+                         clEnumValN(RunLLC, "safe-run-llc", "Compile with LLC"),
+                         clEnumValN(RunCBE, "safe-run-cbe", "Compile with CBE"),
+                         clEnumValN(Custom, "safe-run-custom",
+                         "Use -exec-command to define a command to execute "
+                         "the bitcode. Useful for cross-compilation."),
+                         clEnumValEnd),
                      cl::init(AutoPick));
 
   cl::opt<std::string>
   SafeInterpreterPath("safe-path",
-                      cl::desc("Specify the path to the \"safe\" backend program"),
-                      cl::init(""));
+                   cl::desc("Specify the path to the \"safe\" backend program"),
+                   cl::init(""));
 
   cl::opt<bool>
   AppendProgramExitCode("append-exit-code",
@@ -100,6 +100,10 @@ namespace llvm {
   cl::list<std::string>
   InputArgv("args", cl::Positional, cl::desc("<program arguments>..."),
             cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+  cl::opt<std::string>
+  OutputPrefix("output-prefix", cl::init("bugpoint"),
+            cl::desc("Prefix to use for outputs (default: 'bugpoint')"));
 }
 
 namespace {
@@ -126,7 +130,7 @@ namespace {
 /// environment for executing LLVM programs.
 ///
 bool BugDriver::initializeExecutionEnvironment() {
-  std::cout << "Initializing execution environment: ";
+  outs() << "Initializing execution environment: ";
 
   // Create an instance of the AbstractInterpreter interface as specified on
   // the command line
@@ -178,17 +182,16 @@ bool BugDriver::initializeExecutionEnvironment() {
                                                  &ToolArgv, &GCCToolArgv);
     break;
   case Custom:
-    Interpreter = AbstractInterpreter::createCustom(getToolName(), Message,
-                                                    CustomExecCommand);
+    Interpreter = AbstractInterpreter::createCustom(Message, CustomExecCommand);
     break;
   default:
     Message = "Sorry, this back-end is not supported by bugpoint right now!\n";
     break;
   }
   if (!Interpreter)
-    std::cerr << Message;
+    errs() << Message;
   else // Display informational messages on stdout instead of stderr
-    std::cout << Message;
+    outs() << Message;
 
   std::string Path = SafeInterpreterPath;
   if (Path.empty())
@@ -201,7 +204,7 @@ bool BugDriver::initializeExecutionEnvironment() {
         InterpreterSel == CBE_bug) {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
-      SafeInterpreter = AbstractInterpreter::createLLC(Path, Message,
+      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -211,7 +214,7 @@ bool BugDriver::initializeExecutionEnvironment() {
         InterpreterSel == LLC_Safe) {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
-      SafeInterpreter = AbstractInterpreter::createLLC(Path, Message,
+      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -222,7 +225,7 @@ bool BugDriver::initializeExecutionEnvironment() {
     if (!SafeInterpreter &&
         InterpreterSel != RunCBE) {
       SafeInterpreterSel = RunCBE;
-      SafeInterpreter = AbstractInterpreter::createCBE(Path, Message,
+      SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -231,7 +234,7 @@ bool BugDriver::initializeExecutionEnvironment() {
         InterpreterSel != RunJIT) {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
-      SafeInterpreter = AbstractInterpreter::createLLC(Path, Message,
+      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -242,17 +245,17 @@ bool BugDriver::initializeExecutionEnvironment() {
     break;
   case RunLLC:
     SafeToolArgs.push_back("--relocation-model=pic");
-    SafeInterpreter = AbstractInterpreter::createLLC(Path, Message,
+    SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
                                                      &SafeToolArgs,
                                                      &GCCToolArgv);
     break;
   case RunCBE:
-    SafeInterpreter = AbstractInterpreter::createCBE(Path, Message,
+    SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
                                                      &SafeToolArgs,
                                                      &GCCToolArgv);
     break;
   case Custom:
-    SafeInterpreter = AbstractInterpreter::createCustom(Path, Message,
+    SafeInterpreter = AbstractInterpreter::createCustom(Message,
                                                         CustomExecCommand);
     break;
   default:
@@ -260,10 +263,10 @@ bool BugDriver::initializeExecutionEnvironment() {
               "\"safe\" backend right now!\n";
     break;
   }
-  if (!SafeInterpreter) { std::cout << Message << "\nExiting.\n"; exit(1); }
+  if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
   
-  gcc = GCC::create(getToolName(), Message, &GCCToolArgv);
-  if (!gcc) { std::cout << Message << "\nExiting.\n"; exit(1); }
+  gcc = GCC::create(Message, &GCCToolArgv);
+  if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); }
 
   // If there was an error creating the selected interpreter, quit with error.
   return Interpreter == 0;
@@ -275,24 +278,24 @@ bool BugDriver::initializeExecutionEnvironment() {
 ///
 void BugDriver::compileProgram(Module *M) {
   // Emit the program to a bitcode file...
-  sys::Path BitcodeFile ("bugpoint-test-program.bc");
+  sys::Path BitcodeFile (OutputPrefix + "-test-program.bc");
   std::string ErrMsg;
   if (BitcodeFile.makeUnique(true,&ErrMsg)) {
-    std::cerr << ToolName << ": Error making unique filename: " << ErrMsg 
-              << "\n";
+    errs() << ToolName << ": Error making unique filename: " << ErrMsg 
+           << "\n";
     exit(1);
   }
-  if (writeProgramToFile(BitcodeFile.toString(), M)) {
-    std::cerr << ToolName << ": Error emitting bitcode to file '"
-              << BitcodeFile << "'!\n";
+  if (writeProgramToFile(BitcodeFile.str(), M)) {
+    errs() << ToolName << ": Error emitting bitcode to file '"
+           << BitcodeFile.str() << "'!\n";
     exit(1);
   }
 
     // Remove the temporary bitcode file when we are done.
-  FileRemover BitcodeFileRemover(BitcodeFile);
+  FileRemover BitcodeFileRemover(BitcodeFile, !SaveTemps);
 
   // Actually compile the program!
-  Interpreter->compileProgram(BitcodeFile.toString());
+  Interpreter->compileProgram(BitcodeFile.str());
 }
 
 
@@ -311,17 +314,17 @@ std::string BugDriver::executeProgram(std::string OutputFile,
   std::string ErrMsg;
   if (BitcodeFile.empty()) {
     // Emit the program to a bitcode file...
-    sys::Path uniqueFilename("bugpoint-test-program.bc");
+    sys::Path uniqueFilename(OutputPrefix + "-test-program.bc");
     if (uniqueFilename.makeUnique(true, &ErrMsg)) {
-      std::cerr << ToolName << ": Error making unique filename: " 
-                << ErrMsg << "!\n";
+      errs() << ToolName << ": Error making unique filename: "
+             << ErrMsg << "!\n";
       exit(1);
     }
-    BitcodeFile = uniqueFilename.toString();
+    BitcodeFile = uniqueFilename.str();
 
     if (writeProgramToFile(BitcodeFile, Program)) {
-      std::cerr << ToolName << ": Error emitting bitcode to file '"
-                << BitcodeFile << "'!\n";
+      errs() << ToolName << ": Error emitting bitcode to file '"
+             << BitcodeFile << "'!\n";
       exit(1);
     }
     CreatedBitcode = true;
@@ -329,18 +332,18 @@ std::string BugDriver::executeProgram(std::string OutputFile,
 
   // Remove the temporary bitcode file when we are done.
   sys::Path BitcodePath (BitcodeFile);
-  FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode);
+  FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
 
-  if (OutputFile.empty()) OutputFile = "bugpoint-execution-output";
+  if (OutputFile.empty()) OutputFile = OutputPrefix + "-execution-output";
 
   // Check to see if this is a valid output filename...
   sys::Path uniqueFile(OutputFile);
   if (uniqueFile.makeUnique(true, &ErrMsg)) {
-    std::cerr << ToolName << ": Error making unique filename: "
-              << ErrMsg << "\n";
+    errs() << ToolName << ": Error making unique filename: "
+           << ErrMsg << "\n";
     exit(1);
   }
-  OutputFile = uniqueFile.toString();
+  OutputFile = uniqueFile.str();
 
   // Figure out which shared objects to run, if any.
   std::vector<std::string> SharedObjs(AdditionalSOs);
@@ -352,10 +355,10 @@ std::string BugDriver::executeProgram(std::string OutputFile,
                                   Timeout, MemoryLimit);
 
   if (RetVal == -1) {
-    std::cerr << "<timeout>";
+    errs() << "<timeout>";
     static bool FirstTimeout = true;
     if (FirstTimeout) {
-      std::cout << "\n"
+      outs() << "\n"
  "*** Program execution timed out!  This mechanism is designed to handle\n"
  "    programs stuck in infinite loops gracefully.  The -timeout option\n"
  "    can be used to change the timeout threshold or disable it completely\n"
@@ -395,7 +398,7 @@ std::string BugDriver::compileSharedObject(const std::string &BitcodeFile) {
   GCC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile);
 
   std::string SharedObjectFile;
-  if (gcc->MakeSharedObject(OutputFile.toString(), FT,
+  if (gcc->MakeSharedObject(OutputFile.str(), FT,
                             SharedObjectFile, AdditionalLinkerArgs))
     exit(1);
 
@@ -418,14 +421,14 @@ bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
   }
   try {
     ReferenceOutputFile = executeProgramSafely(Filename);
-    std::cout << "\nReference output is: " << ReferenceOutputFile << "\n\n";
+    outs() << "\nReference output is: " << ReferenceOutputFile << "\n\n";
   } catch (ToolExecutionError &TEE) {
-    std::cerr << TEE.what();
+    errs() << TEE.what();
     if (Interpreter != SafeInterpreter) {
-      std::cerr << "*** There is a bug running the \"safe\" backend.  Either"
-                << " debug it (for example with the -run-cbe bugpoint option,"
-                << " if CBE is being used as the \"safe\" backend), or fix the"
-                << " error some other way.\n";
+      errs() << "*** There is a bug running the \"safe\" backend.  Either"
+             << " debug it (for example with the -run-cbe bugpoint option,"
+             << " if CBE is being used as the \"safe\" backend), or fix the"
+             << " error some other way.\n";
     }
     return false;
   }
@@ -449,17 +452,18 @@ bool BugDriver::diffProgram(const std::string &BitcodeFile,
   std::string Error;
   bool FilesDifferent = false;
   if (int Diff = DiffFilesWithTolerance(sys::Path(ReferenceOutputFile),
-                                        sys::Path(Output.toString()),
+                                        sys::Path(Output.str()),
                                         AbsTolerance, RelTolerance, &Error)) {
     if (Diff == 2) {
-      std::cerr << "While diffing output: " << Error << '\n';
+      errs() << "While diffing output: " << Error << '\n';
       exit(1);
     }
     FilesDifferent = true;
   }
-
-  // Remove the generated output.
-  Output.eraseFromDisk();
+  else {
+    // Remove the generated output if there are no differences.
+    Output.eraseFromDisk();
+  }
 
   // Remove the bitcode file if we are supposed to.
   if (RemoveBitcode)
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index e4affbb0ddcc..918d6a6a2afb 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -15,10 +15,12 @@
 #include "BugDriver.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -27,15 +29,15 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Path.h"
 #include "llvm/System/Signals.h"
 #include <set>
-#include <fstream>
-#include <iostream>
 using namespace llvm;
 
 namespace llvm {
   bool DisableSimplifyCFG = false;
+  extern cl::opt<std::string> OutputPrefix;
 } // End llvm namespace
 
 namespace {
@@ -73,7 +75,7 @@ Module *BugDriver::deleteInstructionFromProgram(const Instruction *I,
   // If this instruction produces a value, replace any users with null values
   if (isa<StructType>(TheInst->getType()))
     TheInst->replaceAllUsesWith(UndefValue::get(TheInst->getType()));
-  else if (TheInst->getType() != Type::VoidTy)
+  else if (TheInst->getType() != Type::getVoidTy(I->getContext()))
     TheInst->replaceAllUsesWith(Constant::getNullValue(TheInst->getType()));
 
   // Remove the instruction from the program.
@@ -125,7 +127,7 @@ Module *BugDriver::performFinalCleanups(Module *M, bool MayModifySemantics) {
 
   Module *New = runPassesOn(M, CleanupPasses);
   if (New == 0) {
-    std::cerr << "Final cleanups failed.  Sorry. :(  Please report a bug!\n";
+    errs() << "Final cleanups failed.  Sorry. :(  Please report a bug!\n";
     return M;
   }
   delete M;
@@ -143,9 +145,9 @@ Module *BugDriver::ExtractLoop(Module *M) {
   Module *NewM = runPassesOn(M, LoopExtractPasses);
   if (NewM == 0) {
     Module *Old = swapProgramIn(M);
-    std::cout << "*** Loop extraction failed: ";
+    outs() << "*** Loop extraction failed: ";
     EmitProgressBitcode("loopextraction", true);
-    std::cout << "*** Sorry. :(  Please report a bug!\n";
+    outs() << "*** Sorry. :(  Please report a bug!\n";
     swapProgramIn(Old);
     return 0;
   }
@@ -184,9 +186,11 @@ static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
   std::vector<Constant*> ArrayElts;
   for (unsigned i = 0, e = TorList.size(); i != e; ++i) {
     std::vector<Constant*> Elts;
-    Elts.push_back(ConstantInt::get(Type::Int32Ty, TorList[i].second));
+    Elts.push_back(ConstantInt::get(
+          Type::getInt32Ty(TorList[i].first->getContext()), TorList[i].second));
     Elts.push_back(TorList[i].first);
-    ArrayElts.push_back(ConstantStruct::get(Elts));
+    ArrayElts.push_back(ConstantStruct::get(TorList[i].first->getContext(),
+                                            Elts, false));
   }
   return ConstantArray::get(ArrayType::get(ArrayElts[0]->getType(), 
                                            ArrayElts.size()),
@@ -236,8 +240,9 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
   GV->eraseFromParent();
   if (!M1Tors.empty()) {
     Constant *M1Init = GetTorInit(M1Tors);
-    new GlobalVariable(M1Init->getType(), false, GlobalValue::AppendingLinkage,
-                       M1Init, GlobalName, M1);
+    new GlobalVariable(*M1, M1Init->getType(), false,
+                       GlobalValue::AppendingLinkage,
+                       M1Init, GlobalName);
   }
 
   GV = M2->getNamedGlobal(GlobalName);
@@ -247,8 +252,9 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
   GV->eraseFromParent();
   if (!M2Tors.empty()) {
     Constant *M2Init = GetTorInit(M2Tors);
-    new GlobalVariable(M2Init->getType(), false, GlobalValue::AppendingLinkage,
-                       M2Init, GlobalName, M2);
+    new GlobalVariable(*M2, M2Init->getType(), false,
+                       GlobalValue::AppendingLinkage,
+                       M2Init, GlobalName);
   }
 }
 
@@ -266,8 +272,8 @@ llvm::SplitFunctionsOutOfModule(Module *M,
     I->setLinkage(GlobalValue::ExternalLinkage);
   for (Module::global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I) {
-    if (I->hasName() && *I->getNameStart() == '\01')
-      I->setName(I->getNameStart()+1, I->getNameLen()-1);
+    if (I->hasName() && I->getName()[0] == '\01')
+      I->setName(I->getName().substr(1));
     I->setLinkage(GlobalValue::ExternalLinkage);
   }
 
@@ -283,9 +289,9 @@ llvm::SplitFunctionsOutOfModule(Module *M,
   std::set<Function *> TestFunctions;
   for (unsigned i = 0, e = F.size(); i != e; ++i) {
     Function *TNOF = cast<Function>(ValueMap[F[i]]);
-    DEBUG(std::cerr << "Removing function ");
-    DEBUG(WriteAsOperand(std::cerr, TNOF, false));
-    DEBUG(std::cerr << "\n");
+    DEBUG(errs() << "Removing function ");
+    DEBUG(WriteAsOperand(errs(), TNOF, false));
+    DEBUG(errs() << "\n");
     TestFunctions.insert(cast<Function>(NewValueMap[TNOF]));
     DeleteFunctionBody(TNOF);       // Function is now external in this module!
   }
@@ -319,11 +325,11 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
                                                  Module *M) {
   char *ExtraArg = NULL;
 
-  sys::Path uniqueFilename("bugpoint-extractblocks");
+  sys::Path uniqueFilename(OutputPrefix + "-extractblocks");
   std::string ErrMsg;
   if (uniqueFilename.createTemporaryFileOnDisk(true, &ErrMsg)) {
-    std::cout << "*** Basic Block extraction failed!\n";
-    std::cerr << "Error creating temporary file: " << ErrMsg << "\n";
+    outs() << "*** Basic Block extraction failed!\n";
+    errs() << "Error creating temporary file: " << ErrMsg << "\n";
     M = swapProgramIn(M);
     EmitProgressBitcode("basicblockextractfail", true);
     swapProgramIn(M);
@@ -331,11 +337,12 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
   }
   sys::RemoveFileOnSignal(uniqueFilename);
 
-  std::ofstream BlocksToNotExtractFile(uniqueFilename.c_str());
-  if (!BlocksToNotExtractFile) {
-    std::cout << "*** Basic Block extraction failed!\n";
-    std::cerr << "Error writing list of blocks to not extract: " << ErrMsg
-              << "\n";
+  std::string ErrorInfo;
+  raw_fd_ostream BlocksToNotExtractFile(uniqueFilename.c_str(), ErrorInfo);
+  if (!ErrorInfo.empty()) {
+    outs() << "*** Basic Block extraction failed!\n";
+    errs() << "Error writing list of blocks to not extract: " << ErrorInfo
+           << "\n";
     M = swapProgramIn(M);
     EmitProgressBitcode("basicblockextractfail", true);
     swapProgramIn(M);
@@ -347,7 +354,7 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
     // If the BB doesn't have a name, give it one so we have something to key
     // off of.
     if (!BB->hasName()) BB->setName("tmpbb");
-    BlocksToNotExtractFile << BB->getParent()->getName() << " "
+    BlocksToNotExtractFile << BB->getParent()->getNameStr() << " "
                            << BB->getName() << "\n";
   }
   BlocksToNotExtractFile.close();
@@ -366,7 +373,7 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
   free(ExtraArg);
 
   if (Ret == 0) {
-    std::cout << "*** Basic Block extraction failed, please report a bug!\n";
+    outs() << "*** Basic Block extraction failed, please report a bug!\n";
     M = swapProgramIn(M);
     EmitProgressBitcode("basicblockextractfail", true);
     swapProgramIn(M);
diff --git a/tools/bugpoint/FindBugs.cpp b/tools/bugpoint/FindBugs.cpp
index e42cce47ba02..2c11d29f60d5 100644
--- a/tools/bugpoint/FindBugs.cpp
+++ b/tools/bugpoint/FindBugs.cpp
@@ -17,9 +17,9 @@
 #include "BugDriver.h"
 #include "ToolRunner.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <ctime>
-#include <iostream>
 using namespace llvm;
 
 /// runManyPasses - Take the specified pass list and create different 
@@ -31,14 +31,14 @@ using namespace llvm;
 ///
 bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses) {
   setPassesToRun(AllPasses);
-  std::cout << "Starting bug finding procedure...\n\n";
+  outs() << "Starting bug finding procedure...\n\n";
   
   // Creating a reference output if necessary
   if (initializeExecutionEnvironment()) return false;
   
-  std::cout << "\n";
+  outs() << "\n";
   if (ReferenceOutputFile.empty()) {
-    std::cout << "Generating reference output from raw program: \n";
+    outs() << "Generating reference output from raw program: \n";
     if (!createReferenceFile(Program))
       return false;
   }
@@ -55,31 +55,31 @@ bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses) {
     //
     // Step 2: Run optimizer passes on the program and check for success.
     //
-    std::cout << "Running selected passes on program to test for crash: ";
+    outs() << "Running selected passes on program to test for crash: ";
     for(int i = 0, e = PassesToRun.size(); i != e; i++) {
-      std::cout << "-" << PassesToRun[i]->getPassArgument( )<< " ";
+      outs() << "-" << PassesToRun[i]->getPassArgument( )<< " ";
     }
     
     std::string Filename;
     if(runPasses(PassesToRun, Filename, false)) {
-      std::cout << "\n";
-      std::cout << "Optimizer passes caused failure!\n\n";
+      outs() << "\n";
+      outs() << "Optimizer passes caused failure!\n\n";
       debugOptimizerCrash();
       return true;
     } else {
-      std::cout << "Combination " << num << " optimized successfully!\n";
+      outs() << "Combination " << num << " optimized successfully!\n";
     }
     
     //
     // Step 3: Compile the optimized code.
     //
-    std::cout << "Running the code generator to test for a crash: ";
+    outs() << "Running the code generator to test for a crash: ";
     try {
       compileProgram(Program);
-      std::cout << '\n';
+      outs() << '\n';
     } catch (ToolExecutionError &TEE) {
-      std::cout << "\n*** compileProgram threw an exception: ";
-      std::cout << TEE.what();
+      outs() << "\n*** compileProgram threw an exception: ";
+      outs() << TEE.what();
       return debugCodeGeneratorCrash();
     }
     
@@ -87,24 +87,24 @@ bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses) {
     // Step 4: Run the program and compare its output to the reference 
     // output (created above).
     //
-    std::cout << "*** Checking if passes caused miscompliation:\n";
+    outs() << "*** Checking if passes caused miscompliation:\n";
     try {
       if (diffProgram(Filename, "", false)) {
-        std::cout << "\n*** diffProgram returned true!\n";
+        outs() << "\n*** diffProgram returned true!\n";
         debugMiscompilation();
         return true;
       } else {
-        std::cout << "\n*** diff'd output matches!\n";
+        outs() << "\n*** diff'd output matches!\n";
       }
     } catch (ToolExecutionError &TEE) {
-      std::cerr << TEE.what();
+      errs() << TEE.what();
       debugCodeGeneratorCrash();
       return true;
     }
     
     sys::Path(Filename).eraseFromDisk();
     
-    std::cout << "\n\n";
+    outs() << "\n\n";
     num++;
   } //end while
   
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index de3f3892001e..8036d1f54499 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -15,8 +15,8 @@
 #ifndef BUGPOINT_LIST_REDUCER_H
 #define BUGPOINT_LIST_REDUCER_H
 
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
-#include <iostream>
 #include <cstdlib>
 #include <algorithm>
 
@@ -58,7 +58,7 @@ struct ListReducer {
 
     case KeepSuffix:
       // cannot be reached!
-      std::cerr << "bugpoint ListReducer internal error: selected empty set.\n";
+      errs() << "bugpoint ListReducer internal error: selected empty set.\n";
       abort();
 
     case NoFailure:
@@ -77,7 +77,7 @@ Backjump:
     while (MidTop > 1) { // Binary split reduction loop
       // Halt if the user presses ctrl-c.
       if (BugpointIsInterrupted) {
-        std::cerr << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
+        errs() << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
         return true;
       }
 
@@ -88,7 +88,7 @@ Backjump:
           NumOfIterationsWithoutProgress > MaxIterations) {
         std::vector<ElTy> ShuffledList(TheList);
         std::random_shuffle(ShuffledList.begin(), ShuffledList.end());
-        std::cerr << "\n\n*** Testing shuffled set...\n\n";
+        errs() << "\n\n*** Testing shuffled set...\n\n";
         // Check that random shuffle doesn't loose the bug
         if (doTest(ShuffledList, empty) == KeepPrefix) {
           // If the bug is still here, use the shuffled list.
@@ -97,10 +97,10 @@ Backjump:
           // Must increase the shuffling treshold to avoid the small 
           // probability of inifinite looping without making progress.
           MaxIterations += 2;
-          std::cerr << "\n\n*** Shuffling does not hide the bug...\n\n";
+          errs() << "\n\n*** Shuffling does not hide the bug...\n\n";
         } else {
           ShufflingEnabled = false; // Disable shuffling further on
-          std::cerr << "\n\n*** Shuffling hides the bug...\n\n";
+          errs() << "\n\n*** Shuffling hides the bug...\n\n";
         }
         NumOfIterationsWithoutProgress = 0;
       }
@@ -160,7 +160,7 @@ Backjump:
         
         for (unsigned i = 1; i < TheList.size()-1; ++i) { // Check interior elts
           if (BugpointIsInterrupted) {
-            std::cerr << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
+            errs() << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
             return true;
           }
           
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index b3260e13606f..a5914178f12e 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -14,6 +14,7 @@
 
 #include "BugDriver.h"
 #include "ListReducer.h"
+#include "ToolRunner.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
@@ -29,6 +30,7 @@
 using namespace llvm;
 
 namespace llvm {
+  extern cl::opt<std::string> OutputPrefix;
   extern cl::list<std::string> InputArgv;
 }
 
@@ -37,6 +39,10 @@ namespace {
     DisableLoopExtraction("disable-loop-extraction", 
         cl::desc("Don't extract loops when searching for miscompilations"),
         cl::init(false));
+  static llvm::cl::opt<bool> 
+    DisableBlockExtraction("disable-block-extraction", 
+        cl::desc("Don't extract blocks when searching for miscompilations"),
+        cl::init(false));
 
   class ReduceMiscompilingPasses : public ListReducer<const PassInfo*> {
     BugDriver &BD;
@@ -56,36 +62,36 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
                                  std::vector<const PassInfo*> &Suffix) {
   // First, run the program with just the Suffix passes.  If it is still broken
   // with JUST the kept passes, discard the prefix passes.
-  std::cout << "Checking to see if '" << getPassesString(Suffix)
-            << "' compile correctly: ";
+  outs() << "Checking to see if '" << getPassesString(Suffix)
+         << "' compiles correctly: ";
 
   std::string BitcodeResult;
   if (BD.runPasses(Suffix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
-    std::cerr << " Error running this sequence of passes"
-              << " on the input program!\n";
+    errs() << " Error running this sequence of passes"
+           << " on the input program!\n";
     BD.setPassesToRun(Suffix);
     BD.EmitProgressBitcode("pass-error",  false);
     exit(BD.debugOptimizerCrash());
   }
-
+  
   // Check to see if the finished program matches the reference output...
   if (BD.diffProgram(BitcodeResult, "", true /*delete bitcode*/)) {
-    std::cout << " nope.\n";
+    outs() << " nope.\n";
     if (Suffix.empty()) {
-      std::cerr << BD.getToolName() << ": I'm confused: the test fails when "
-                << "no passes are run, nondeterministic program?\n";
+      errs() << BD.getToolName() << ": I'm confused: the test fails when "
+             << "no passes are run, nondeterministic program?\n";
       exit(1);
     }
     return KeepSuffix;         // Miscompilation detected!
   }
-  std::cout << " yup.\n";      // No miscompilation!
+  outs() << " yup.\n";      // No miscompilation!
 
   if (Prefix.empty()) return NoFailure;
 
   // Next, see if the program is broken if we run the "prefix" passes first,
   // then separately run the "kept" passes.
-  std::cout << "Checking to see if '" << getPassesString(Prefix)
-            << "' compile correctly: ";
+  outs() << "Checking to see if '" << getPassesString(Prefix)
+         << "' compiles correctly: ";
 
   // If it is not broken with the kept passes, it's possible that the prefix
   // passes must be run before the kept passes to break it.  If the program
@@ -94,8 +100,8 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
   // prefix passes, then discard the prefix passes.
   //
   if (BD.runPasses(Prefix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
-    std::cerr << " Error running this sequence of passes"
-              << " on the input program!\n";
+    errs() << " Error running this sequence of passes"
+           << " on the input program!\n";
     BD.setPassesToRun(Prefix);
     BD.EmitProgressBitcode("pass-error",  false);
     exit(BD.debugOptimizerCrash());
@@ -103,19 +109,19 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
 
   // If the prefix maintains the predicate by itself, only keep the prefix!
   if (BD.diffProgram(BitcodeResult)) {
-    std::cout << " nope.\n";
+    outs() << " nope.\n";
     sys::Path(BitcodeResult).eraseFromDisk();
     return KeepPrefix;
   }
-  std::cout << " yup.\n";      // No miscompilation!
+  outs() << " yup.\n";      // No miscompilation!
 
   // Ok, so now we know that the prefix passes work, try running the suffix
   // passes on the result of the prefix passes.
   //
   Module *PrefixOutput = ParseInputFile(BitcodeResult, BD.getContext());
   if (PrefixOutput == 0) {
-    std::cerr << BD.getToolName() << ": Error reading bitcode file '"
-              << BitcodeResult << "'!\n";
+    errs() << BD.getToolName() << ": Error reading bitcode file '"
+           << BitcodeResult << "'!\n";
     exit(1);
   }
   sys::Path(BitcodeResult).eraseFromDisk();  // No longer need the file on disk
@@ -124,14 +130,14 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
   if (Suffix.empty())
     return NoFailure;
 
-  std::cout << "Checking to see if '" << getPassesString(Suffix)
+  outs() << "Checking to see if '" << getPassesString(Suffix)
             << "' passes compile correctly after the '"
             << getPassesString(Prefix) << "' passes: ";
 
   Module *OriginalInput = BD.swapProgramIn(PrefixOutput);
   if (BD.runPasses(Suffix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
-    std::cerr << " Error running this sequence of passes"
-              << " on the input program!\n";
+    errs() << " Error running this sequence of passes"
+           << " on the input program!\n";
     BD.setPassesToRun(Suffix);
     BD.EmitProgressBitcode("pass-error",  false);
     exit(BD.debugOptimizerCrash());
@@ -139,13 +145,13 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
 
   // Run the result...
   if (BD.diffProgram(BitcodeResult, "", true/*delete bitcode*/)) {
-    std::cout << " nope.\n";
+    outs() << " nope.\n";
     delete OriginalInput;     // We pruned down the original input...
     return KeepSuffix;
   }
 
   // Otherwise, we must not be running the bad pass anymore.
-  std::cout << " yup.\n";      // No miscompilation!
+  outs() << " yup.\n";      // No miscompilation!
   delete BD.swapProgramIn(OriginalInput); // Restore orig program & free test
   return NoFailure;
 }
@@ -187,8 +193,8 @@ static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
     M2 = CloneModule(M2);
   }
   if (Linker::LinkModules(M1, M2, &ErrorMsg)) {
-    std::cerr << BD.getToolName() << ": Error linking modules together:"
-              << ErrorMsg << '\n';
+    errs() << BD.getToolName() << ": Error linking modules together:"
+           << ErrorMsg << '\n';
     exit(1);
   }
   delete M2;   // We are done with this module.
@@ -212,12 +218,12 @@ static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
 bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*>&Funcs){
   // Test to see if the function is misoptimized if we ONLY run it on the
   // functions listed in Funcs.
-  std::cout << "Checking to see if the program is misoptimized when "
-            << (Funcs.size()==1 ? "this function is" : "these functions are")
-            << " run through the pass"
-            << (BD.getPassesToRun().size() == 1 ? "" : "es") << ":";
+  outs() << "Checking to see if the program is misoptimized when "
+         << (Funcs.size()==1 ? "this function is" : "these functions are")
+         << " run through the pass"
+         << (BD.getPassesToRun().size() == 1 ? "" : "es") << ":";
   PrintFunctionList(Funcs);
-  std::cout << '\n';
+  outs() << '\n';
 
   // Split the module into the two halves of the program we want.
   DenseMap<const Value*, Value*> ValueMap;
@@ -241,12 +247,18 @@ static void DisambiguateGlobalSymbols(Module *M) {
   Mangler Mang(*M);
   // Agree with the CBE on symbol naming
   Mang.markCharUnacceptable('.');
-  Mang.setPreserveAsmNames(true);
   for (Module::global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I)
-    I->setName(Mang.getValueName(I));
-  for (Module::iterator  I = M->begin(),  E = M->end();  I != E; ++I)
-    I->setName(Mang.getValueName(I));
+       I != E; ++I) {
+    // Don't mangle asm names.
+    if (!I->hasName() || I->getName()[0] != 1)
+      I->setName(Mang.getMangledName(I));
+  }
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    // Don't mangle asm names or intrinsics.
+    if ((!I->hasName() || I->getName()[0] != 1) &&
+        I->getIntrinsicID() == 0)
+      I->setName(Mang.getMangledName(I));
+  }
 }
 
 /// ExtractLoops - Given a reduced list of functions that still exposed the bug,
@@ -274,7 +286,7 @@ static bool ExtractLoops(BugDriver &BD,
       return MadeChange;
     }
 
-    std::cerr << "Extracted a loop from the breaking portion of the program.\n";
+    errs() << "Extracted a loop from the breaking portion of the program.\n";
 
     // Bugpoint is intentionally not very trusting of LLVM transformations.  In
     // particular, we're not going to assume that the loop extractor works, so
@@ -286,16 +298,19 @@ static bool ExtractLoops(BugDriver &BD,
       BD.switchToInterpreter(AI);
 
       // Merged program doesn't work anymore!
-      std::cerr << "  *** ERROR: Loop extraction broke the program. :("
-                << " Please report a bug!\n";
-      std::cerr << "      Continuing on with un-loop-extracted version.\n";
-
-      BD.writeProgramToFile("bugpoint-loop-extract-fail-tno.bc", ToNotOptimize);
-      BD.writeProgramToFile("bugpoint-loop-extract-fail-to.bc", ToOptimize);
-      BD.writeProgramToFile("bugpoint-loop-extract-fail-to-le.bc",
+      errs() << "  *** ERROR: Loop extraction broke the program. :("
+             << " Please report a bug!\n";
+      errs() << "      Continuing on with un-loop-extracted version.\n";
+
+      BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-tno.bc",
+                            ToNotOptimize);
+      BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to.bc",
+                            ToOptimize);
+      BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to-le.bc",
                             ToOptimizeLoopExtracted);
 
-      std::cerr << "Please submit the bugpoint-loop-extract-fail-*.bc files.\n";
+      errs() << "Please submit the " 
+             << OutputPrefix << "-loop-extract-fail-*.bc files.\n";
       delete ToOptimize;
       delete ToNotOptimize;
       delete ToOptimizeLoopExtracted;
@@ -304,12 +319,12 @@ static bool ExtractLoops(BugDriver &BD,
     delete ToOptimize;
     BD.switchToInterpreter(AI);
 
-    std::cout << "  Testing after loop extraction:\n";
+    outs() << "  Testing after loop extraction:\n";
     // Clone modules, the tester function will free them.
     Module *TOLEBackup = CloneModule(ToOptimizeLoopExtracted);
     Module *TNOBackup  = CloneModule(ToNotOptimize);
     if (!TestFn(BD, ToOptimizeLoopExtracted, ToNotOptimize)) {
-      std::cout << "*** Loop extraction masked the problem.  Undoing.\n";
+      outs() << "*** Loop extraction masked the problem.  Undoing.\n";
       // If the program is not still broken, then loop extraction did something
       // that masked the error.  Stop loop extraction now.
       delete TOLEBackup;
@@ -319,7 +334,7 @@ static bool ExtractLoops(BugDriver &BD,
     ToOptimizeLoopExtracted = TOLEBackup;
     ToNotOptimize = TNOBackup;
 
-    std::cout << "*** Loop extraction successful!\n";
+    outs() << "*** Loop extraction successful!\n";
 
     std::vector<std::pair<std::string, const FunctionType*> > MisCompFunctions;
     for (Module::iterator I = ToOptimizeLoopExtracted->begin(),
@@ -334,8 +349,8 @@ static bool ExtractLoops(BugDriver &BD,
     // extract another loop.
     std::string ErrorMsg;
     if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted, &ErrorMsg)){
-      std::cerr << BD.getToolName() << ": Error linking modules together:"
-                << ErrorMsg << '\n';
+      errs() << BD.getToolName() << ": Error linking modules together:"
+             << ErrorMsg << '\n';
       exit(1);
     }
     delete ToOptimizeLoopExtracted;
@@ -388,16 +403,16 @@ namespace {
 bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs) {
   // Test to see if the function is misoptimized if we ONLY run it on the
   // functions listed in Funcs.
-  std::cout << "Checking to see if the program is misoptimized when all ";
+  outs() << "Checking to see if the program is misoptimized when all ";
   if (!BBs.empty()) {
-    std::cout << "but these " << BBs.size() << " blocks are extracted: ";
+    outs() << "but these " << BBs.size() << " blocks are extracted: ";
     for (unsigned i = 0, e = BBs.size() < 10 ? BBs.size() : 10; i != e; ++i)
-      std::cout << BBs[i]->getName() << " ";
-    if (BBs.size() > 10) std::cout << "...";
+      outs() << BBs[i]->getName() << " ";
+    if (BBs.size() > 10) outs() << "...";
   } else {
-    std::cout << "blocks are extracted.";
+    outs() << "blocks are extracted.";
   }
-  std::cout << '\n';
+  outs() << '\n';
 
   // Split the module into the two halves of the program we want.
   DenseMap<const Value*, Value*> ValueMap;
@@ -457,7 +472,7 @@ static bool ExtractBlocks(BugDriver &BD,
   Module *Extracted = BD.ExtractMappedBlocksFromModule(Blocks, ToExtract);
   if (Extracted == 0) {
     // Weird, extraction should have worked.
-    std::cerr << "Nondeterministic problem extracting blocks??\n";
+    errs() << "Nondeterministic problem extracting blocks??\n";
     delete ProgClone;
     delete ToExtract;
     return false;
@@ -476,8 +491,8 @@ static bool ExtractBlocks(BugDriver &BD,
 
   std::string ErrorMsg;
   if (Linker::LinkModules(ProgClone, Extracted, &ErrorMsg)) {
-    std::cerr << BD.getToolName() << ": Error linking modules together:"
-              << ErrorMsg << '\n';
+    errs() << BD.getToolName() << ": Error linking modules together:"
+           << ErrorMsg << '\n';
     exit(1);
   }
   delete Extracted;
@@ -520,11 +535,11 @@ DebugAMiscompilation(BugDriver &BD,
   if (!BugpointIsInterrupted)
     ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions);
 
-  std::cout << "\n*** The following function"
-            << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
-            << " being miscompiled: ";
+  outs() << "\n*** The following function"
+         << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+         << " being miscompiled: ";
   PrintFunctionList(MiscompiledFunctions);
-  std::cout << '\n';
+  outs() << '\n';
 
   // See if we can rip any loops out of the miscompiled functions and still
   // trigger the problem.
@@ -543,14 +558,14 @@ DebugAMiscompilation(BugDriver &BD,
     if (!BugpointIsInterrupted)
       ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions);
 
-    std::cout << "\n*** The following function"
-              << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
-              << " being miscompiled: ";
+    outs() << "\n*** The following function"
+           << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+           << " being miscompiled: ";
     PrintFunctionList(MiscompiledFunctions);
-    std::cout << '\n';
+    outs() << '\n';
   }
 
-  if (!BugpointIsInterrupted &&
+  if (!BugpointIsInterrupted && !DisableBlockExtraction && 
       ExtractBlocks(BD, TestFn, MiscompiledFunctions)) {
     // Okay, we extracted some blocks and the problem still appears.  See if we
     // can eliminate some of the created functions from being candidates.
@@ -563,11 +578,11 @@ DebugAMiscompilation(BugDriver &BD,
     // Do the reduction...
     ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions);
 
-    std::cout << "\n*** The following function"
-              << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
-              << " being miscompiled: ";
+    outs() << "\n*** The following function"
+           << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+           << " being miscompiled: ";
     PrintFunctionList(MiscompiledFunctions);
-    std::cout << '\n';
+    outs() << '\n';
   }
 
   return MiscompiledFunctions;
@@ -580,15 +595,15 @@ DebugAMiscompilation(BugDriver &BD,
 static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe) {
   // Run the optimization passes on ToOptimize, producing a transformed version
   // of the functions being tested.
-  std::cout << "  Optimizing functions being tested: ";
+  outs() << "  Optimizing functions being tested: ";
   Module *Optimized = BD.runPassesOn(Test, BD.getPassesToRun(),
                                      /*AutoDebugCrashes*/true);
-  std::cout << "done.\n";
+  outs() << "done.\n";
   delete Test;
 
-  std::cout << "  Checking to see if the merged program executes correctly: ";
+  outs() << "  Checking to see if the merged program executes correctly: ";
   bool Broken = TestMergedProgram(BD, Optimized, Safe, true);
-  std::cout << (Broken ? " nope.\n" : " yup.\n");
+  outs() << (Broken ? " nope.\n" : " yup.\n");
   return Broken;
 }
 
@@ -601,33 +616,33 @@ bool BugDriver::debugMiscompilation() {
   // Make sure something was miscompiled...
   if (!BugpointIsInterrupted)
     if (!ReduceMiscompilingPasses(*this).reduceList(PassesToRun)) {
-      std::cerr << "*** Optimized program matches reference output!  No problem"
-                << " detected...\nbugpoint can't help you with your problem!\n";
+      errs() << "*** Optimized program matches reference output!  No problem"
+             << " detected...\nbugpoint can't help you with your problem!\n";
       return false;
     }
 
-  std::cout << "\n*** Found miscompiling pass"
-            << (getPassesToRun().size() == 1 ? "" : "es") << ": "
-            << getPassesString(getPassesToRun()) << '\n';
+  outs() << "\n*** Found miscompiling pass"
+         << (getPassesToRun().size() == 1 ? "" : "es") << ": "
+         << getPassesString(getPassesToRun()) << '\n';
   EmitProgressBitcode("passinput");
 
   std::vector<Function*> MiscompiledFunctions =
     DebugAMiscompilation(*this, TestOptimizer);
 
   // Output a bunch of bitcode files for the user...
-  std::cout << "Outputting reduced bitcode files which expose the problem:\n";
+  outs() << "Outputting reduced bitcode files which expose the problem:\n";
   DenseMap<const Value*, Value*> ValueMap;
   Module *ToNotOptimize = CloneModule(getProgram(), ValueMap);
   Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
                                                  MiscompiledFunctions,
                                                  ValueMap);
 
-  std::cout << "  Non-optimized portion: ";
+  outs() << "  Non-optimized portion: ";
   ToNotOptimize = swapProgramIn(ToNotOptimize);
   EmitProgressBitcode("tonotoptimize", true);
   setNewProgram(ToNotOptimize);   // Delete hacked module.
 
-  std::cout << "  Portion that is input to optimizer: ";
+  outs() << "  Portion that is input to optimizer: ";
   ToOptimize = swapProgramIn(ToOptimize);
   EmitProgressBitcode("tooptimize");
   setNewProgram(ToOptimize);      // Delete hacked module.
@@ -672,12 +687,12 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
       }
 
       // Call the old main function and return its result
-      BasicBlock *BB = BasicBlock::Create("entry", newMain);
+      BasicBlock *BB = BasicBlock::Create(Safe->getContext(), "entry", newMain);
       CallInst *call = CallInst::Create(oldMainProto, args.begin(), args.end(),
                                         "", BB);
 
       // If the type of old function wasn't void, return value of call
-      ReturnInst::Create(call, BB);
+      ReturnInst::Create(Safe->getContext(), call, BB);
     }
 
   // The second nasty issue we must deal with in the JIT is that the Safe
@@ -689,8 +704,9 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
   // Prototype: void *getPointerToNamedFunction(const char* Name)
   Constant *resolverFunc =
     Safe->getOrInsertFunction("getPointerToNamedFunction",
-                              PointerType::getUnqual(Type::Int8Ty),
-                              PointerType::getUnqual(Type::Int8Ty), (Type *)0);
+                    Type::getInt8PtrTy(Safe->getContext()),
+                    Type::getInt8PtrTy(Safe->getContext()),
+                       (Type *)0);
 
   // Use the function we just added to get addresses of functions we need.
   for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) {
@@ -701,18 +717,20 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
       // Don't forward functions which are external in the test module too.
       if (TestFn && !TestFn->isDeclaration()) {
         // 1. Add a string constant with its name to the global file
-        Constant *InitArray = ConstantArray::get(F->getName());
+        Constant *InitArray = ConstantArray::get(F->getContext(), F->getName());
         GlobalVariable *funcName =
-          new GlobalVariable(InitArray->getType(), true /*isConstant*/,
+          new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/,
                              GlobalValue::InternalLinkage, InitArray,
-                             F->getName() + "_name", Safe);
+                             F->getName() + "_name");
 
         // 2. Use `GetElementPtr *funcName, 0, 0' to convert the string to an
         // sbyte* so it matches the signature of the resolver function.
 
         // GetElementPtr *funcName, ulong 0, ulong 0
-        std::vector<Constant*> GEPargs(2,Constant::getNullValue(Type::Int32Ty));
-        Value *GEP = ConstantExpr::getGetElementPtr(funcName, &GEPargs[0], 2);
+        std::vector<Constant*> GEPargs(2,
+                     Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+        Value *GEP =
+                ConstantExpr::getGetElementPtr(funcName, &GEPargs[0], 2);
         std::vector<Value*> ResolverArgs;
         ResolverArgs.push_back(GEP);
 
@@ -722,8 +740,9 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
           // Create a new global to hold the cached function pointer.
           Constant *NullPtr = ConstantPointerNull::get(F->getType());
           GlobalVariable *Cache =
-            new GlobalVariable(F->getType(), false,GlobalValue::InternalLinkage,
-                               NullPtr,F->getName()+".fpcache", F->getParent());
+            new GlobalVariable(*F->getParent(), F->getType(), 
+                               false, GlobalValue::InternalLinkage,
+                               NullPtr,F->getName()+".fpcache");
 
           // Construct a new stub function that will re-route calls to F
           const FunctionType *FuncTy = F->getFunctionType();
@@ -731,14 +750,17 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
                                                    GlobalValue::InternalLinkage,
                                                    F->getName() + "_wrapper",
                                                    F->getParent());
-          BasicBlock *EntryBB  = BasicBlock::Create("entry", FuncWrapper);
-          BasicBlock *DoCallBB = BasicBlock::Create("usecache", FuncWrapper);
-          BasicBlock *LookupBB = BasicBlock::Create("lookupfp", FuncWrapper);
+          BasicBlock *EntryBB  = BasicBlock::Create(F->getContext(),
+                                                    "entry", FuncWrapper);
+          BasicBlock *DoCallBB = BasicBlock::Create(F->getContext(),
+                                                    "usecache", FuncWrapper);
+          BasicBlock *LookupBB = BasicBlock::Create(F->getContext(),
+                                                    "lookupfp", FuncWrapper);
 
           // Check to see if we already looked up the value.
           Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB);
-          Value *IsNull = new ICmpInst(ICmpInst::ICMP_EQ, CachedVal,
-                                       NullPtr, "isNull", EntryBB);
+          Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal,
+                                       NullPtr, "isNull");
           BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB);
 
           // Resolve the call to function F via the JIT API:
@@ -770,13 +792,13 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
             Args.push_back(i);
 
           // Pass on the arguments to the real function, return its result
-          if (F->getReturnType() == Type::VoidTy) {
+          if (F->getReturnType() == Type::getVoidTy(F->getContext())) {
             CallInst::Create(FuncPtr, Args.begin(), Args.end(), "", DoCallBB);
-            ReturnInst::Create(DoCallBB);
+            ReturnInst::Create(F->getContext(), DoCallBB);
           } else {
             CallInst *Call = CallInst::Create(FuncPtr, Args.begin(), Args.end(),
                                               "retval", DoCallBB);
-            ReturnInst::Create(Call, DoCallBB);
+            ReturnInst::Create(F->getContext(),Call, DoCallBB);
           }
 
           // Use the wrapper function instead of the old function
@@ -787,7 +809,7 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
   }
 
   if (verifyModule(*Test) || verifyModule(*Safe)) {
-    std::cerr << "Bugpoint has a bug, which corrupted a module!!\n";
+    errs() << "Bugpoint has a bug, which corrupted a module!!\n";
     abort();
   }
 }
@@ -804,12 +826,13 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe) {
   sys::Path TestModuleBC("bugpoint.test.bc");
   std::string ErrMsg;
   if (TestModuleBC.makeUnique(true, &ErrMsg)) {
-    std::cerr << BD.getToolName() << "Error making unique filename: "
-              << ErrMsg << "\n";
+    errs() << BD.getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
     exit(1);
   }
-  if (BD.writeProgramToFile(TestModuleBC.toString(), Test)) {
-    std::cerr << "Error writing bitcode to `" << TestModuleBC << "'\nExiting.";
+  if (BD.writeProgramToFile(TestModuleBC.str(), Test)) {
+    errs() << "Error writing bitcode to `" << TestModuleBC.str()
+           << "'\nExiting.";
     exit(1);
   }
   delete Test;
@@ -817,26 +840,27 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe) {
   // Make the shared library
   sys::Path SafeModuleBC("bugpoint.safe.bc");
   if (SafeModuleBC.makeUnique(true, &ErrMsg)) {
-    std::cerr << BD.getToolName() << "Error making unique filename: "
-              << ErrMsg << "\n";
+    errs() << BD.getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
     exit(1);
   }
 
-  if (BD.writeProgramToFile(SafeModuleBC.toString(), Safe)) {
-    std::cerr << "Error writing bitcode to `" << SafeModuleBC << "'\nExiting.";
+  if (BD.writeProgramToFile(SafeModuleBC.str(), Safe)) {
+    errs() << "Error writing bitcode to `" << SafeModuleBC.str()
+           << "'\nExiting.";
     exit(1);
   }
-  std::string SharedObject = BD.compileSharedObject(SafeModuleBC.toString());
+  std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str());
   delete Safe;
 
   // Run the code generator on the `Test' code, loading the shared library.
   // The function returns whether or not the new output differs from reference.
-  int Result = BD.diffProgram(TestModuleBC.toString(), SharedObject, false);
+  int Result = BD.diffProgram(TestModuleBC.str(), SharedObject, false);
 
   if (Result)
-    std::cerr << ": still failing!\n";
+    errs() << ": still failing!\n";
   else
-    std::cerr << ": didn't fail.\n";
+    errs() << ": didn't fail.\n";
   TestModuleBC.eraseFromDisk();
   SafeModuleBC.eraseFromDisk();
   sys::Path(SharedObject).eraseFromDisk();
@@ -850,14 +874,14 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe) {
 bool BugDriver::debugCodeGenerator() {
   if ((void*)SafeInterpreter == (void*)Interpreter) {
     std::string Result = executeProgramSafely("bugpoint.safe.out");
-    std::cout << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
-              << "the reference diff.  This may be due to a\n    front-end "
-              << "bug or a bug in the original program, but this can also "
-              << "happen if bugpoint isn't running the program with the "
-              << "right flags or input.\n    I left the result of executing "
-              << "the program with the \"safe\" backend in this file for "
-              << "you: '"
-              << Result << "'.\n";
+    outs() << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
+           << "the reference diff.  This may be due to a\n    front-end "
+           << "bug or a bug in the original program, but this can also "
+           << "happen if bugpoint isn't running the program with the "
+           << "right flags or input.\n    I left the result of executing "
+           << "the program with the \"safe\" backend in this file for "
+           << "you: '"
+           << Result << "'.\n";
     return true;
   }
 
@@ -876,13 +900,14 @@ bool BugDriver::debugCodeGenerator() {
   sys::Path TestModuleBC("bugpoint.test.bc");
   std::string ErrMsg;
   if (TestModuleBC.makeUnique(true, &ErrMsg)) {
-    std::cerr << getToolName() << "Error making unique filename: "
-              << ErrMsg << "\n";
+    errs() << getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
     exit(1);
   }
 
-  if (writeProgramToFile(TestModuleBC.toString(), ToCodeGen)) {
-    std::cerr << "Error writing bitcode to `" << TestModuleBC << "'\nExiting.";
+  if (writeProgramToFile(TestModuleBC.str(), ToCodeGen)) {
+    errs() << "Error writing bitcode to `" << TestModuleBC.str()
+           << "'\nExiting.";
     exit(1);
   }
   delete ToCodeGen;
@@ -890,43 +915,45 @@ bool BugDriver::debugCodeGenerator() {
   // Make the shared library
   sys::Path SafeModuleBC("bugpoint.safe.bc");
   if (SafeModuleBC.makeUnique(true, &ErrMsg)) {
-    std::cerr << getToolName() << "Error making unique filename: "
-              << ErrMsg << "\n";
+    errs() << getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
     exit(1);
   }
 
-  if (writeProgramToFile(SafeModuleBC.toString(), ToNotCodeGen)) {
-    std::cerr << "Error writing bitcode to `" << SafeModuleBC << "'\nExiting.";
+  if (writeProgramToFile(SafeModuleBC.str(), ToNotCodeGen)) {
+    errs() << "Error writing bitcode to `" << SafeModuleBC.str()
+           << "'\nExiting.";
     exit(1);
   }
-  std::string SharedObject = compileSharedObject(SafeModuleBC.toString());
+  std::string SharedObject = compileSharedObject(SafeModuleBC.str());
   delete ToNotCodeGen;
 
-  std::cout << "You can reproduce the problem with the command line: \n";
+  outs() << "You can reproduce the problem with the command line: \n";
   if (isExecutingJIT()) {
-    std::cout << "  lli -load " << SharedObject << " " << TestModuleBC;
+    outs() << "  lli -load " << SharedObject << " " << TestModuleBC.str();
   } else {
-    std::cout << "  llc -f " << TestModuleBC << " -o " << TestModuleBC<< ".s\n";
-    std::cout << "  gcc " << SharedObject << " " << TestModuleBC
-              << ".s -o " << TestModuleBC << ".exe";
+    outs() << "  llc -f " << TestModuleBC.str() << " -o " << TestModuleBC.str()
+           << ".s\n";
+    outs() << "  gcc " << SharedObject << " " << TestModuleBC.str()
+              << ".s -o " << TestModuleBC.str() << ".exe";
 #if defined (HAVE_LINK_R)
-    std::cout << " -Wl,-R.";
+    outs() << " -Wl,-R.";
 #endif
-    std::cout << "\n";
-    std::cout << "  " << TestModuleBC << ".exe";
+    outs() << "\n";
+    outs() << "  " << TestModuleBC.str() << ".exe";
   }
   for (unsigned i=0, e = InputArgv.size(); i != e; ++i)
-    std::cout << " " << InputArgv[i];
-  std::cout << '\n';
-  std::cout << "The shared object was created with:\n  llc -march=c "
-            << SafeModuleBC << " -o temporary.c\n"
-            << "  gcc -xc temporary.c -O2 -o " << SharedObject
-#if defined(sparc) || defined(__sparc__) || defined(__sparcv9)
-            << " -G"            // Compile a shared library, `-G' for Sparc
-#else
-            << " -fPIC -shared"       // `-shared' for Linux/X86, maybe others
-#endif
-            << " -fno-strict-aliasing\n";
+    outs() << " " << InputArgv[i];
+  outs() << '\n';
+  outs() << "The shared object was created with:\n  llc -march=c "
+         << SafeModuleBC.str() << " -o temporary.c\n"
+         << "  gcc -xc temporary.c -O2 -o " << SharedObject;
+  if (TargetTriple.getArch() == Triple::sparc)
+    outs() << " -G";              // Compile a shared library, `-G' for Sparc
+  else
+    outs() << " -fPIC -shared";   // `-shared' for Linux/X86, maybe others
+
+  outs() << " -fno-strict-aliasing\n";
 
   return false;
 }
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 741be24adefd..9f712e097a74 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -27,10 +27,9 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Path.h"
 #include "llvm/System/Program.h"
-#include "llvm/Config/alloca.h"
 
 #define DONT_GET_PLUGIN_LOADER_OPTION
 #include "llvm/Support/PluginLoader.h"
@@ -38,6 +37,9 @@
 #include <fstream>
 using namespace llvm;
 
+namespace llvm {
+  extern cl::opt<std::string> OutputPrefix;
+}
 
 namespace {
   // ChildOutput - This option captures the name of the child output file that
@@ -52,10 +54,10 @@ namespace {
 ///
 bool BugDriver::writeProgramToFile(const std::string &Filename,
                                    Module *M) const {
-  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                               std::ios::binary;
-  std::ofstream Out(Filename.c_str(), io_mode);
-  if (!Out.good()) return true;
+  std::string ErrInfo;
+  raw_fd_ostream Out(Filename.c_str(), ErrInfo,
+                     raw_fd_ostream::F_Binary);
+  if (!ErrInfo.empty()) return true;
   
   WriteBitcodeToFile(M ? M : Program, Out);
   return false;
@@ -69,26 +71,26 @@ void BugDriver::EmitProgressBitcode(const std::string &ID, bool NoFlyer) {
   // Output the input to the current pass to a bitcode file, emit a message
   // telling the user how to reproduce it: opt -foo blah.bc
   //
-  std::string Filename = "bugpoint-" + ID + ".bc";
+  std::string Filename = OutputPrefix + "-" + ID + ".bc";
   if (writeProgramToFile(Filename)) {
-    cerr <<  "Error opening file '" << Filename << "' for writing!\n";
+    errs() <<  "Error opening file '" << Filename << "' for writing!\n";
     return;
   }
 
-  cout << "Emitted bitcode to '" << Filename << "'\n";
+  outs() << "Emitted bitcode to '" << Filename << "'\n";
   if (NoFlyer || PassesToRun.empty()) return;
-  cout << "\n*** You can reproduce the problem with: ";
-  cout << "opt " << Filename << " ";
-  cout << getPassesString(PassesToRun) << "\n";
+  outs() << "\n*** You can reproduce the problem with: ";
+  if (UseValgrind) outs() << "valgrind ";
+  outs() << "opt " << Filename << " ";
+  outs() << getPassesString(PassesToRun) << "\n";
 }
 
 int BugDriver::runPassesAsChild(const std::vector<const PassInfo*> &Passes) {
-
-  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                               std::ios::binary;
-  std::ofstream OutFile(ChildOutput.c_str(), io_mode);
-  if (!OutFile.good()) {
-    cerr << "Error opening bitcode file: " << ChildOutput << "\n";
+  std::string ErrInfo;
+  raw_fd_ostream OutFile(ChildOutput.c_str(), ErrInfo,
+                         raw_fd_ostream::F_Binary);
+  if (!ErrInfo.empty()) {
+    errs() << "Error opening bitcode file: " << ChildOutput << "\n";
     return 1;
   }
 
@@ -100,13 +102,13 @@ int BugDriver::runPassesAsChild(const std::vector<const PassInfo*> &Passes) {
     if (Passes[i]->getNormalCtor())
       PM.add(Passes[i]->getNormalCtor()());
     else
-      cerr << "Cannot create pass yet: " << Passes[i]->getPassName() << "\n";
+      errs() << "Cannot create pass yet: " << Passes[i]->getPassName() << "\n";
   }
   // Check that the module is well formed on completion of optimization
   PM.add(createVerifierPass());
 
   // Write bitcode out to disk as the last step...
-  PM.add(CreateBitcodeWriterPass(OutFile));
+  PM.add(createBitcodeWriterPass(OutFile));
 
   // Run all queued passes.
   PM.run(*Program);
@@ -121,58 +123,58 @@ cl::opt<bool> SilencePasses("silence-passes", cl::desc("Suppress output of runni
 /// optimizations fail for some reason (optimizer crashes), return true,
 /// otherwise return false.  If DeleteOutput is set to true, the bitcode is
 /// deleted on success, and the filename string is undefined.  This prints to
-/// cout a single line message indicating whether compilation was successful or
-/// failed.
+/// outs() a single line message indicating whether compilation was successful
+/// or failed.
 ///
 bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
                           std::string &OutputFilename, bool DeleteOutput,
                           bool Quiet, unsigned NumExtraArgs,
                           const char * const *ExtraArgs) const {
   // setup the output file name
-  cout << std::flush;
-  sys::Path uniqueFilename("bugpoint-output.bc");
+  outs().flush();
+  sys::Path uniqueFilename(OutputPrefix + "-output.bc");
   std::string ErrMsg;
   if (uniqueFilename.makeUnique(true, &ErrMsg)) {
-    cerr << getToolName() << ": Error making unique filename: " 
-         << ErrMsg << "\n";
+    errs() << getToolName() << ": Error making unique filename: "
+           << ErrMsg << "\n";
     return(1);
   }
-  OutputFilename = uniqueFilename.toString();
+  OutputFilename = uniqueFilename.str();
 
   // set up the input file name
-  sys::Path inputFilename("bugpoint-input.bc");
+  sys::Path inputFilename(OutputPrefix + "-input.bc");
   if (inputFilename.makeUnique(true, &ErrMsg)) {
-    cerr << getToolName() << ": Error making unique filename: " 
-         << ErrMsg << "\n";
+    errs() << getToolName() << ": Error making unique filename: "
+           << ErrMsg << "\n";
     return(1);
   }
-  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                               std::ios::binary;
-  std::ofstream InFile(inputFilename.c_str(), io_mode);
-  if (!InFile.good()) {
-    cerr << "Error opening bitcode file: " << inputFilename << "\n";
-    return(1);
+  
+  std::string ErrInfo;
+  raw_fd_ostream InFile(inputFilename.c_str(), ErrInfo,
+                        raw_fd_ostream::F_Binary);
+  
+  
+  if (!ErrInfo.empty()) {
+    errs() << "Error opening bitcode file: " << inputFilename.str() << "\n";
+    return 1;
   }
   WriteBitcodeToFile(Program, InFile);
   InFile.close();
 
   // setup the child process' arguments
-  const char** args = (const char**)
-    alloca(sizeof(const char*) * 
-           (Passes.size()+13+2*PluginLoader::getNumPlugins()+NumExtraArgs));
-  int n = 0;
+  SmallVector<const char*, 8> Args;
   sys::Path tool = sys::Program::FindProgramByName(ToolName);
   if (UseValgrind) {
-    args[n++] = "valgrind";
-    args[n++] = "--error-exitcode=1";
-    args[n++] = "-q";
-    args[n++] = tool.c_str();
+    Args.push_back("valgrind");
+    Args.push_back("--error-exitcode=1");
+    Args.push_back("-q");
+    Args.push_back(tool.c_str());
   } else
-    args[n++] = ToolName.c_str();
+    Args.push_back(ToolName);
 
-  args[n++] = "-as-child";
-  args[n++] = "-child-output";
-  args[n++] = OutputFilename.c_str();
+  Args.push_back("-as-child");
+  Args.push_back("-child-output");
+  Args.push_back(OutputFilename.c_str());
   std::vector<std::string> pass_args;
   for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
     pass_args.push_back( std::string("-load"));
@@ -183,11 +185,11 @@ bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
     pass_args.push_back( std::string("-") + (*I)->getPassArgument() );
   for (std::vector<std::string>::const_iterator I = pass_args.begin(),
        E = pass_args.end(); I != E; ++I )
-    args[n++] = I->c_str();
-  args[n++] = inputFilename.c_str();
+    Args.push_back(I->c_str());
+  Args.push_back(inputFilename.c_str());
   for (unsigned i = 0; i < NumExtraArgs; ++i)
-    args[n++] = *ExtraArgs;
-  args[n++] = 0;
+    Args.push_back(*ExtraArgs);
+  Args.push_back(0);
 
   sys::Path prog;
   if (UseValgrind)
@@ -199,7 +201,8 @@ bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
   sys::Path Nowhere;
   const sys::Path *Redirects[3] = {0, &Nowhere, &Nowhere};
 
-  int result = sys::Program::ExecuteAndWait(prog, args, 0, (SilencePasses ? Redirects : 0),
+  int result = sys::Program::ExecuteAndWait(prog, Args.data(), 0,
+                                            (SilencePasses ? Redirects : 0),
                                             Timeout, MemoryLimit, &ErrMsg);
 
   // If we are supposed to delete the bitcode file or if the passes crashed,
@@ -212,17 +215,17 @@ bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
 
   if (!Quiet) {
     if (result == 0)
-      cout << "Success!\n";
+      outs() << "Success!\n";
     else if (result > 0)
-      cout << "Exited with error code '" << result << "'\n";
+      outs() << "Exited with error code '" << result << "'\n";
     else if (result < 0) {
       if (result == -1)
-        cout << "Execute failed: " << ErrMsg << "\n";
+        outs() << "Execute failed: " << ErrMsg << "\n";
       else
-        cout << "Crashed with signal #" << abs(result) << "\n";
+        outs() << "Crashed with signal #" << abs(result) << "\n";
     }
     if (result & 0x01000000)
-      cout << "Dumped core\n";
+      outs() << "Dumped core\n";
   }
 
   // Was the child successful?
@@ -242,8 +245,8 @@ Module *BugDriver::runPassesOn(Module *M,
   if (runPasses(Passes, BitcodeResult, false/*delete*/, true/*quiet*/,
                 NumExtraArgs, ExtraArgs)) {
     if (AutoDebugCrashes) {
-      cerr << " Error running this sequence of passes"
-           << " on the input program!\n";
+      errs() << " Error running this sequence of passes"
+             << " on the input program!\n";
       delete OldProgram;
       EmitProgressBitcode("pass-error",  false);
       exit(debugOptimizerCrash());
@@ -257,8 +260,8 @@ Module *BugDriver::runPassesOn(Module *M,
 
   Module *Ret = ParseInputFile(BitcodeResult, Context);
   if (Ret == 0) {
-    cerr << getToolName() << ": Error reading bitcode file '"
-         << BitcodeResult << "'!\n";
+    errs() << getToolName() << ": Error reading bitcode file '"
+           << BitcodeResult << "'!\n";
     exit(1);
   }
   sys::Path(BitcodeResult).eraseFromDisk();  // No longer need the file on disk
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 978e60bed38a..4551d419d73a 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -13,16 +13,22 @@
 
 #define DEBUG_TYPE "toolrunner"
 #include "ToolRunner.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include "llvm/System/Program.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include <fstream>
 #include <sstream>
-#include <iostream>
 using namespace llvm;
 
+namespace llvm {
+  cl::opt<bool>
+  SaveTemps("save-temps", cl::init(false), cl::desc("Save temporary files"));
+}
+
 namespace {
   cl::opt<std::string>
   RemoteClient("remote-client",
@@ -33,6 +39,10 @@ namespace {
              cl::desc("Remote execution (rsh/ssh) host"));
 
   cl::opt<std::string>
+  RemotePort("remote-port",
+             cl::desc("Remote execution (rsh/ssh) port"));
+
+  cl::opt<std::string>
   RemoteUser("remote-user",
              cl::desc("Remote execution (rsh/ssh) user id"));
 
@@ -43,8 +53,8 @@ namespace {
 
 ToolExecutionError::~ToolExecutionError() throw() { }
 
-/// RunProgramWithTimeout - This function provides an alternate interface to the
-/// sys::Program::ExecuteAndWait interface.
+/// RunProgramWithTimeout - This function provides an alternate interface
+/// to the sys::Program::ExecuteAndWait interface.
 /// @see sys:Program::ExecuteAndWait
 static int RunProgramWithTimeout(const sys::Path &ProgramPath,
                                  const char **Args,
@@ -57,20 +67,74 @@ static int RunProgramWithTimeout(const sys::Path &ProgramPath,
   redirects[0] = &StdInFile;
   redirects[1] = &StdOutFile;
   redirects[2] = &StdErrFile;
-                                   
-  if (0) {
-    std::cerr << "RUN:";
+
+#if 0 // For debug purposes
+  {
+    errs() << "RUN:";
     for (unsigned i = 0; Args[i]; ++i)
-      std::cerr << " " << Args[i];
-    std::cerr << "\n";
+      errs() << " " << Args[i];
+    errs() << "\n";
   }
+#endif
 
   return
     sys::Program::ExecuteAndWait(ProgramPath, Args, 0, redirects,
                                  NumSeconds, MemoryLimit);
 }
 
+/// RunProgramRemotelyWithTimeout - This function runs the given program
+/// remotely using the given remote client and the sys::Program::ExecuteAndWait.
+/// Returns the remote program exit code or reports a remote client error if it
+/// fails. Remote client is required to return 255 if it failed or program exit
+/// code otherwise.
+/// @see sys:Program::ExecuteAndWait
+static int RunProgramRemotelyWithTimeout(const sys::Path &RemoteClientPath,
+                                         const char **Args,
+                                         const sys::Path &StdInFile,
+                                         const sys::Path &StdOutFile,
+                                         const sys::Path &StdErrFile,
+                                         unsigned NumSeconds = 0,
+                                         unsigned MemoryLimit = 0) {
+  const sys::Path* redirects[3];
+  redirects[0] = &StdInFile;
+  redirects[1] = &StdOutFile;
+  redirects[2] = &StdErrFile;
+
+#if 0 // For debug purposes
+  {
+    errs() << "RUN:";
+    for (unsigned i = 0; Args[i]; ++i)
+      errs() << " " << Args[i];
+    errs() << "\n";
+  }
+#endif
 
+  // Run the program remotely with the remote client
+  int ReturnCode = sys::Program::ExecuteAndWait(RemoteClientPath, Args,
+                                 0, redirects, NumSeconds, MemoryLimit);
+
+  // Has the remote client fail?
+  if (255 == ReturnCode) {
+    std::ostringstream OS;
+    OS << "\nError running remote client:\n ";
+    for (const char **Arg = Args; *Arg; ++Arg)
+      OS << " " << *Arg;
+    OS << "\n";
+
+    // The error message is in the output file, let's print it out from there.
+    std::ifstream ErrorFile(StdOutFile.c_str());
+    if (ErrorFile) {
+      std::copy(std::istreambuf_iterator<char>(ErrorFile),
+                std::istreambuf_iterator<char>(),
+                std::ostreambuf_iterator<char>(OS));
+      ErrorFile.close();
+    }
+
+    throw ToolExecutionError(OS.str());
+  }
+
+  return ReturnCode;
+}
 
 static void ProcessFailure(sys::Path ProgPath, const char** Args) {
   std::ostringstream OS;
@@ -83,7 +147,7 @@ static void ProcessFailure(sys::Path ProgPath, const char** Args) {
   sys::Path ErrorFilename("bugpoint.program_error_messages");
   std::string ErrMsg;
   if (ErrorFilename.makeUnique(true, &ErrMsg)) {
-    std::cerr << "Error making unique filename: " << ErrMsg << "\n";
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
     exit(1);
   }
   RunProgramWithTimeout(ProgPath, Args, sys::Path(""), ErrorFilename,
@@ -154,11 +218,11 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
     LLIArgs.push_back(Args[i].c_str());
   LLIArgs.push_back(0);
 
-  std::cout << "<lli>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<lli>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = LLIArgs.size()-1; i != e; ++i)
-          std::cerr << " " << LLIArgs[i];
-        std::cerr << "\n";
+          errs() << " " << LLIArgs[i];
+        errs() << "\n";
         );
   return RunProgramWithTimeout(sys::Path(LLIPath), &LLIArgs[0],
       sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
@@ -166,10 +230,11 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
 }
 
 // LLI create method - Try to find the LLI executable
-AbstractInterpreter *AbstractInterpreter::createLLI(const std::string &ProgPath,
+AbstractInterpreter *AbstractInterpreter::createLLI(const char *Argv0,
                                                     std::string &Message,
                                      const std::vector<std::string> *ToolArgs) {
-  std::string LLIPath = FindExecutable("lli", ProgPath).toString();
+  std::string LLIPath =
+    FindExecutable("lli", Argv0, (void *)(intptr_t)&createLLI).str();
   if (!LLIPath.empty()) {
     Message = "Found lli: " + LLIPath + "\n";
     return new LLI(LLIPath, ToolArgs);
@@ -236,7 +301,6 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
 // Custom execution environment create method, takes the execution command
 // as arguments
 AbstractInterpreter *AbstractInterpreter::createCustom(
-                    const std::string &ProgramPath,
                     std::string &Message,
                     const std::string &ExecCommandLine) {
 
@@ -270,7 +334,7 @@ AbstractInterpreter *AbstractInterpreter::createCustom(
     pos = ExecCommandLine.find_first_of(delimiters, lastPos);
   }
 
-  std::string CmdPath = FindExecutable(Command, ProgramPath).toString();
+  std::string CmdPath = sys::Program::FindProgramByName(Command).str();
   if (CmdPath.empty()) {
     Message = 
       std::string("Cannot find '") + Command + 
@@ -291,7 +355,7 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
   sys::Path uniqueFile(Bitcode+".llc.s");
   std::string ErrMsg;
   if (uniqueFile.makeUnique(true, &ErrMsg)) {
-    std::cerr << "Error making unique filename: " << ErrMsg << "\n";
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
     exit(1);
   }
   OutputAsmFile = uniqueFile;
@@ -304,15 +368,14 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
 
   LLCArgs.push_back ("-o");
   LLCArgs.push_back (OutputAsmFile.c_str()); // Output to the Asm file
-  LLCArgs.push_back ("-f");                  // Overwrite as necessary...
   LLCArgs.push_back (Bitcode.c_str());      // This is the input bitcode
   LLCArgs.push_back (0);
 
-  std::cout << "<llc>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<llc>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = LLCArgs.size()-1; i != e; ++i)
-          std::cerr << " " << LLCArgs[i];
-        std::cerr << "\n";
+          errs() << " " << LLCArgs[i];
+        errs() << "\n";
         );
   if (RunProgramWithTimeout(sys::Path(LLCPath), &LLCArgs[0],
                             sys::Path(), sys::Path(), sys::Path()))
@@ -338,34 +401,35 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
 
   sys::Path OutputAsmFile;
   OutputCode(Bitcode, OutputAsmFile);
-  FileRemover OutFileRemover(OutputAsmFile);
+  FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
 
   std::vector<std::string> GCCArgs(ArgsForGCC);
   GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
   GCCArgs.insert(GCCArgs.end(), gccArgs.begin(), gccArgs.end());
 
   // Assuming LLC worked, compile the result with GCC and run it.
-  return gcc->ExecuteProgram(OutputAsmFile.toString(), Args, GCC::AsmFile,
+  return gcc->ExecuteProgram(OutputAsmFile.str(), Args, GCC::AsmFile,
                              InputFile, OutputFile, GCCArgs,
                              Timeout, MemoryLimit);
 }
 
 /// createLLC - Try to find the LLC executable
 ///
-LLC *AbstractInterpreter::createLLC(const std::string &ProgramPath,
+LLC *AbstractInterpreter::createLLC(const char *Argv0,
                                     std::string &Message,
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *GCCArgs) {
-  std::string LLCPath = FindExecutable("llc", ProgramPath).toString();
+  std::string LLCPath =
+    FindExecutable("llc", Argv0, (void *)(intptr_t)&createLLC).str();
   if (LLCPath.empty()) {
     Message = "Cannot find `llc' in executable directory or PATH!\n";
     return 0;
   }
 
   Message = "Found llc: " + LLCPath + "\n";
-  GCC *gcc = GCC::create(ProgramPath, Message, GCCArgs);
+  GCC *gcc = GCC::create(Message, GCCArgs);
   if (!gcc) {
-    std::cerr << Message << "\n";
+    errs() << Message << "\n";
     exit(1);
   }
   return new LLC(LLCPath, gcc, Args, GCCArgs);
@@ -425,13 +489,13 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
     JITArgs.push_back(Args[i].c_str());
   JITArgs.push_back(0);
 
-  std::cout << "<jit>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<jit>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = JITArgs.size()-1; i != e; ++i)
-          std::cerr << " " << JITArgs[i];
-        std::cerr << "\n";
+          errs() << " " << JITArgs[i];
+        errs() << "\n";
         );
-  DEBUG(std::cerr << "\nSending output to " << OutputFile << "\n");
+  DEBUG(errs() << "\nSending output to " << OutputFile << "\n");
   return RunProgramWithTimeout(sys::Path(LLIPath), &JITArgs[0],
       sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
       Timeout, MemoryLimit);
@@ -439,9 +503,10 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
 
 /// createJIT - Try to find the LLI executable
 ///
-AbstractInterpreter *AbstractInterpreter::createJIT(const std::string &ProgPath,
+AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
                    std::string &Message, const std::vector<std::string> *Args) {
-  std::string LLIPath = FindExecutable("lli", ProgPath).toString();
+  std::string LLIPath =
+    FindExecutable("lli", Argv0, (void *)(intptr_t)&createJIT).str();
   if (!LLIPath.empty()) {
     Message = "Found lli: " + LLIPath + "\n";
     return new JIT(LLIPath, Args);
@@ -456,7 +521,7 @@ GCC::FileType CBE::OutputCode(const std::string &Bitcode,
   sys::Path uniqueFile(Bitcode+".cbe.c");
   std::string ErrMsg;
   if (uniqueFile.makeUnique(true, &ErrMsg)) {
-    std::cerr << "Error making unique filename: " << ErrMsg << "\n";
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
     exit(1);
   }
   OutputCFile = uniqueFile;
@@ -474,11 +539,11 @@ GCC::FileType CBE::OutputCode(const std::string &Bitcode,
   LLCArgs.push_back (Bitcode.c_str());      // This is the input bitcode
   LLCArgs.push_back (0);
 
-  std::cout << "<cbe>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<cbe>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = LLCArgs.size()-1; i != e; ++i)
-          std::cerr << " " << LLCArgs[i];
-        std::cerr << "\n";
+          errs() << " " << LLCArgs[i];
+        errs() << "\n";
         );
   if (RunProgramWithTimeout(LLCPath, &LLCArgs[0], sys::Path(), sys::Path(),
                             sys::Path()))
@@ -503,33 +568,34 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
   sys::Path OutputCFile;
   OutputCode(Bitcode, OutputCFile);
 
-  FileRemover CFileRemove(OutputCFile);
+  FileRemover CFileRemove(OutputCFile, !SaveTemps);
 
   std::vector<std::string> GCCArgs(ArgsForGCC);
   GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
 
-  return gcc->ExecuteProgram(OutputCFile.toString(), Args, GCC::CFile,
+  return gcc->ExecuteProgram(OutputCFile.str(), Args, GCC::CFile,
                              InputFile, OutputFile, GCCArgs,
                              Timeout, MemoryLimit);
 }
 
 /// createCBE - Try to find the 'llc' executable
 ///
-CBE *AbstractInterpreter::createCBE(const std::string &ProgramPath,
+CBE *AbstractInterpreter::createCBE(const char *Argv0,
                                     std::string &Message,
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *GCCArgs) {
-  sys::Path LLCPath = FindExecutable("llc", ProgramPath);
+  sys::Path LLCPath =
+    FindExecutable("llc", Argv0, (void *)(intptr_t)&createCBE);
   if (LLCPath.isEmpty()) {
     Message =
       "Cannot find `llc' in executable directory or PATH!\n";
     return 0;
   }
 
-  Message = "Found llc: " + LLCPath.toString() + "\n";
-  GCC *gcc = GCC::create(ProgramPath, Message, GCCArgs);
+  Message = "Found llc: " + LLCPath.str() + "\n";
+  GCC *gcc = GCC::create(Message, GCCArgs);
   if (!gcc) {
-    std::cerr << Message << "\n";
+    errs() << Message << "\n";
     exit(1);
   }
   return new CBE(LLCPath, gcc, Args);
@@ -538,6 +604,23 @@ CBE *AbstractInterpreter::createCBE(const std::string &ProgramPath,
 //===---------------------------------------------------------------------===//
 // GCC abstraction
 //
+
+static bool
+IsARMArchitecture(std::vector<std::string> Args)
+{
+  for (std::vector<std::string>::const_iterator
+         I = Args.begin(), E = Args.end(); I != E; ++I) {
+    if (!StringsEqualNoCase(*I, "-arch")) {
+      ++I;
+      if ((I != E) && !StringsEqualNoCase(I->c_str(), "arm", strlen("arm"))) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 int GCC::ExecuteProgram(const std::string &ProgramFile,
                         const std::vector<std::string> &Args,
                         FileType fileType,
@@ -561,9 +644,13 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
     GCCArgs.push_back("-fno-strict-aliasing");
   } else {
     GCCArgs.push_back("assembler");
-#ifdef __APPLE__
-    GCCArgs.push_back("-force_cpusubtype_ALL");
-#endif
+
+    // For ARM architectures we don't want this flag. bugpoint isn't
+    // explicitly told what architecture it is working on, so we get
+    // it from gcc flags
+    if ((TargetTriple.getOS() == Triple::Darwin) &&
+        !IsARMArchitecture(ArgsForGCC))
+      GCCArgs.push_back("-force_cpusubtype_ALL");
   }
   GCCArgs.push_back(ProgramFile.c_str());  // Specify the input filename...
   GCCArgs.push_back("-x");
@@ -572,7 +659,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
   sys::Path OutputBinary (ProgramFile+".gcc.exe");
   std::string ErrMsg;
   if (OutputBinary.makeUnique(true, &ErrMsg)) {
-    std::cerr << "Error making unique filename: " << ErrMsg << "\n";
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
     exit(1);
   }
   GCCArgs.push_back(OutputBinary.c_str()); // Output to the right file...
@@ -589,16 +676,15 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
 #if defined (HAVE_LINK_R)
   GCCArgs.push_back("-Wl,-R.");            // Search this dir for .so files
 #endif
-#ifdef __sparc__
-  GCCArgs.push_back("-mcpu=v9");
-#endif
+  if (TargetTriple.getArch() == Triple::sparc)
+    GCCArgs.push_back("-mcpu=v9");
   GCCArgs.push_back(0);                    // NULL terminator
 
-  std::cout << "<gcc>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<gcc>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = GCCArgs.size()-1; i != e; ++i)
-          std::cerr << " " << GCCArgs[i];
-        std::cerr << "\n";
+          errs() << " " << GCCArgs[i];
+        errs() << "\n";
         );
   if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], sys::Path(), sys::Path(),
         sys::Path())) {
@@ -613,12 +699,20 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
   else {
     ProgramArgs.push_back(RemoteClientPath.c_str());
     ProgramArgs.push_back(RemoteHost.c_str());
-    ProgramArgs.push_back("-l");
-    ProgramArgs.push_back(RemoteUser.c_str());
+    if (!RemoteUser.empty()) {
+      ProgramArgs.push_back("-l");
+      ProgramArgs.push_back(RemoteUser.c_str());
+    }
+    if (!RemotePort.empty()) {
+      ProgramArgs.push_back("-p");
+      ProgramArgs.push_back(RemotePort.c_str());
+    }
     if (!RemoteExtra.empty()) {
       ProgramArgs.push_back(RemoteExtra.c_str());
     }
 
+    // Full path to the binary. We need to cd to the exec directory because
+    // there is a dylib there that the exec expects to find in the CWD
     char* env_pwd = getenv("PWD");
     std::string Exec = "cd ";
     Exec += env_pwd;
@@ -633,23 +727,26 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
   ProgramArgs.push_back(0);                // NULL terminator
 
   // Now that we have a binary, run it!
-  std::cout << "<program>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<program>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = ProgramArgs.size()-1; i != e; ++i)
-          std::cerr << " " << ProgramArgs[i];
-        std::cerr << "\n";
+          errs() << " " << ProgramArgs[i];
+        errs() << "\n";
         );
 
-  FileRemover OutputBinaryRemover(OutputBinary);
+  FileRemover OutputBinaryRemover(OutputBinary, !SaveTemps);
 
-  if (RemoteClientPath.isEmpty())
+  if (RemoteClientPath.isEmpty()) {
+    DEBUG(errs() << "<run locally>";);
     return RunProgramWithTimeout(OutputBinary, &ProgramArgs[0],
         sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
         Timeout, MemoryLimit);
-  else
-    return RunProgramWithTimeout(sys::Path(RemoteClientPath), &ProgramArgs[0],
-        sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
-        Timeout, MemoryLimit);
+  } else {
+    outs() << "<run remotely>"; outs().flush();
+    return RunProgramRemotelyWithTimeout(sys::Path(RemoteClientPath),
+        &ProgramArgs[0], sys::Path(InputFile), sys::Path(OutputFile),
+        sys::Path(OutputFile), Timeout, MemoryLimit);
+  }
 }
 
 int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
@@ -658,10 +755,10 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   sys::Path uniqueFilename(InputFile+LTDL_SHLIB_EXT);
   std::string ErrMsg;
   if (uniqueFilename.makeUnique(true, &ErrMsg)) {
-    std::cerr << "Error making unique filename: " << ErrMsg << "\n";
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
     exit(1);
   }
-  OutputFile = uniqueFilename.toString();
+  OutputFile = uniqueFilename.str();
 
   std::vector<const char*> GCCArgs;
   
@@ -678,27 +775,27 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   GCCArgs.push_back(InputFile.c_str());   // Specify the input filename.
   GCCArgs.push_back("-x");
   GCCArgs.push_back("none");
-#if defined(sparc) || defined(__sparc__) || defined(__sparcv9)
-  GCCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
-#elif defined(__APPLE__)
-  // link all source files into a single module in data segment, rather than
-  // generating blocks. dynamic_lookup requires that you set 
-  // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
-  // bugpoint to just pass that in the environment of GCC.
-  GCCArgs.push_back("-single_module");
-  GCCArgs.push_back("-dynamiclib");   // `-dynamiclib' for MacOS X/PowerPC
-  GCCArgs.push_back("-undefined");
-  GCCArgs.push_back("dynamic_lookup");
-#else
-  GCCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
-#endif
+  if (TargetTriple.getArch() == Triple::sparc)
+    GCCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
+  else if (TargetTriple.getOS() == Triple::Darwin) {
+    // link all source files into a single module in data segment, rather than
+    // generating blocks. dynamic_lookup requires that you set 
+    // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
+    // bugpoint to just pass that in the environment of GCC.
+    GCCArgs.push_back("-single_module");
+    GCCArgs.push_back("-dynamiclib");   // `-dynamiclib' for MacOS X/PowerPC
+    GCCArgs.push_back("-undefined");
+    GCCArgs.push_back("dynamic_lookup");
+  } else
+    GCCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
+
+  if ((TargetTriple.getArch() == Triple::alpha) ||
+      (TargetTriple.getArch() == Triple::x86_64))
+    GCCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
+
+  if (TargetTriple.getArch() == Triple::sparc)
+    GCCArgs.push_back("-mcpu=v9");
 
-#if defined(__ia64__) || defined(__alpha__) || defined(__amd64__)
-  GCCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
-#endif
-#ifdef __sparc__
-  GCCArgs.push_back("-mcpu=v9");
-#endif
   GCCArgs.push_back("-o");
   GCCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
   GCCArgs.push_back("-O2");              // Optimize the program a bit.
@@ -715,11 +812,11 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
 
   
 
-  std::cout << "<gcc>" << std::flush;
-  DEBUG(std::cerr << "\nAbout to run:\t";
+  outs() << "<gcc>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
         for (unsigned i=0, e = GCCArgs.size()-1; i != e; ++i)
-          std::cerr << " " << GCCArgs[i];
-        std::cerr << "\n";
+          errs() << " " << GCCArgs[i];
+        errs() << "\n";
         );
   if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], sys::Path(), sys::Path(),
                             sys::Path())) {
@@ -731,9 +828,9 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
 
 /// create - Try to find the `gcc' executable
 ///
-GCC *GCC::create(const std::string &ProgramPath, std::string &Message,
+GCC *GCC::create(std::string &Message,
                  const std::vector<std::string> *Args) {
-  sys::Path GCCPath = FindExecutable("gcc", ProgramPath);
+  sys::Path GCCPath = sys::Program::FindProgramByName("gcc");
   if (GCCPath.isEmpty()) {
     Message = "Cannot find `gcc' in executable directory or PATH!\n";
     return 0;
@@ -741,8 +838,8 @@ GCC *GCC::create(const std::string &ProgramPath, std::string &Message,
 
   sys::Path RemoteClientPath;
   if (!RemoteClient.empty())
-    RemoteClientPath = FindExecutable(RemoteClient.c_str(), ProgramPath);
+    RemoteClientPath = sys::Program::FindProgramByName(RemoteClient);
 
-  Message = "Found gcc: " + GCCPath.toString() + "\n";
+  Message = "Found gcc: " + GCCPath.str() + "\n";
   return new GCC(GCCPath, RemoteClientPath, Args);
 }
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index 721f66c126cc..39b0bbfdd5ec 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -17,12 +17,18 @@
 #ifndef BUGPOINT_TOOLRUNNER_H
 #define BUGPOINT_TOOLRUNNER_H
 
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SystemUtils.h"
+#include "llvm/System/Path.h"
 #include <exception>
 #include <vector>
 
 namespace llvm {
 
+extern cl::opt<bool> SaveTemps;
+extern Triple TargetTriple;
+
 class CBE;
 class LLC;
 
@@ -54,7 +60,7 @@ class GCC {
 public:
   enum FileType { AsmFile, CFile };
 
-  static GCC *create(const std::string &ProgramPath, std::string &Message,
+  static GCC *create(std::string &Message,
                      const std::vector<std::string> *Args);
 
   /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
@@ -90,23 +96,20 @@ public:
 ///
 class AbstractInterpreter {
 public:
-  static CBE *createCBE(const std::string &ProgramPath, std::string &Message,
+  static CBE *createCBE(const char *Argv0, std::string &Message,
                         const std::vector<std::string> *Args = 0,
                         const std::vector<std::string> *GCCArgs = 0);
-  static LLC *createLLC(const std::string &ProgramPath, std::string &Message,
+  static LLC *createLLC(const char *Argv0, std::string &Message,
                         const std::vector<std::string> *Args = 0,
                         const std::vector<std::string> *GCCArgs = 0);
 
-  static AbstractInterpreter* createLLI(const std::string &ProgramPath,
-                                        std::string &Message,
+  static AbstractInterpreter* createLLI(const char *Argv0, std::string &Message,
                                         const std::vector<std::string> *Args=0);
 
-  static AbstractInterpreter* createJIT(const std::string &ProgramPath,
-                                        std::string &Message,
+  static AbstractInterpreter* createJIT(const char *Argv0, std::string &Message,
                                         const std::vector<std::string> *Args=0);
 
-  static AbstractInterpreter* createCustom(const std::string &ProgramPath,
-                                           std::string &Message,
+  static AbstractInterpreter* createCustom(std::string &Message,
                                            const std::string &ExecCommandLine);
 
 
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 3365b227b1f7..565f3f9a7087 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -22,10 +22,10 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/StandardPasses.h"
 #include "llvm/System/Process.h"
 #include "llvm/System/Signals.h"
 #include "llvm/LinkAllVMCore.h"
-#include <iostream>
 using namespace llvm;
 
 // AsChild - Specifies that this invocation of bugpoint is being generated
@@ -58,6 +58,17 @@ MemoryLimit("mlimit", cl::init(100), cl::value_desc("MBytes"),
 static cl::list<const PassInfo*, bool, PassNameParser>
 PassList(cl::desc("Passes available:"), cl::ZeroOrMore);
 
+static cl::opt<bool>
+StandardCompileOpts("std-compile-opts", 
+                   cl::desc("Include the standard compile time optimizations"));
+
+static cl::opt<bool>
+StandardLinkOpts("std-link-opts", 
+                 cl::desc("Include the standard link time optimizations"));
+
+static cl::opt<std::string>
+OverrideTriple("mtriple", cl::desc("Override target triple for module"));
+
 /// BugpointIsInterrupted - Set to true when the user presses ctrl-c.
 bool llvm::BugpointIsInterrupted = false;
 
@@ -65,6 +76,20 @@ static void BugpointInterruptFunction() {
   BugpointIsInterrupted = true;
 }
 
+// Hack to capture a pass list.
+namespace {
+  class AddToDriver : public PassManager {
+    BugDriver &D;
+  public:
+    AddToDriver(BugDriver &_D) : D(_D) {}
+    
+    virtual void add(Pass *P) {
+      const PassInfo *PI = P->getPassInfo();
+      D.addPasses(&PI, &PI + 1);
+    }
+  };
+}
+
 int main(int argc, char **argv) {
   llvm::sys::PrintStackTraceOnErrorSignal();
   llvm::PrettyStackTraceProgram X(argc, argv);
@@ -75,9 +100,33 @@ int main(int argc, char **argv) {
                               " for more information.\n");
   sys::SetInterruptFunction(BugpointInterruptFunction);
 
-  LLVMContext Context;
+  LLVMContext& Context = getGlobalContext();
+  // If we have an override, set it and then track the triple we want Modules
+  // to use.
+  if (!OverrideTriple.empty()) {
+    TargetTriple.setTriple(OverrideTriple);
+    outs() << "Override triple set to '" << OverrideTriple << "'\n";
+  }
+
   BugDriver D(argv[0], AsChild, FindBugs, TimeoutValue, MemoryLimit, Context);
   if (D.addSources(InputFilenames)) return 1;
+  
+  AddToDriver PM(D);
+  if (StandardCompileOpts) {
+    createStandardModulePasses(&PM, 3,
+                               /*OptimizeSize=*/ false,
+                               /*UnitAtATime=*/ true,
+                               /*UnrollLoops=*/ true,
+                               /*SimplifyLibCalls=*/ true,
+                               /*HaveExceptions=*/ true,
+                               createFunctionInliningPass());
+  }
+      
+  if (StandardLinkOpts)
+    createStandardLTOPasses(&PM, /*Internalize=*/true,
+                            /*RunInliner=*/true,
+                            /*VerifyEach=*/false);
+
   D.addPasses(PassList.begin(), PassList.end());
 
   // Bugpoint has the ability of generating a plethora of core files, so to
@@ -87,20 +136,20 @@ int main(int argc, char **argv) {
   try {
     return D.run();
   } catch (ToolExecutionError &TEE) {
-    std::cerr << "Tool execution error: " << TEE.what() << '\n';
+    errs() << "Tool execution error: " << TEE.what() << '\n';
   } catch (const std::string& msg) {
-    std::cerr << argv[0] << ": " << msg << "\n";
-  } catch (const std::bad_alloc &e) {
-    std::cerr << "Oh no, a bugpoint process ran out of memory!\n"
-                 "To increase the allocation limits for bugpoint child\n"
-                 "processes, use the -mlimit option.\n";
+    errs() << argv[0] << ": " << msg << "\n";
+  } catch (const std::bad_alloc&) {
+    errs() << "Oh no, a bugpoint process ran out of memory!\n"
+              "To increase the allocation limits for bugpoint child\n"
+              "processes, use the -mlimit option.\n";
   } catch (const std::exception &e) {
-    std::cerr << "Whoops, a std::exception leaked out of bugpoint: "
-              << e.what() << "\n"
-              << "This is a bug in bugpoint!\n";
+    errs() << "Whoops, a std::exception leaked out of bugpoint: "
+           << e.what() << "\n"
+           << "This is a bug in bugpoint!\n";
   } catch (...) {
-    std::cerr << "Whoops, an exception leaked out of bugpoint.  "
-              << "This is a bug in bugpoint!\n";
+    errs() << "Whoops, an exception leaked out of bugpoint.  "
+           << "This is a bug in bugpoint!\n";
   }
   return 1;
 }
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index 65e99bf41c9a..7bac4ec62dea 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-LIBRARYNAME = LLVMgold
+LIBRARYNAME = libLLVMgold
 
 # Include this here so we can get the configuration of the targets
 # that have been configured for construction. We have to do this 
@@ -18,8 +18,9 @@ include $(LEVEL)/Makefile.config
 LINK_LIBS_IN_SHARED=1
 SHARED_LIBRARY = 1
 BUILD_ARCHIVE = 0
+LOADABLE_MODULE = 1
 
-LINK_COMPONENTS :=
+LINK_COMPONENTS := support system
 LIBS += -llto
 
 # Because off_t is used in the public API, the largefile parts are required for
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 146c53fbb71b..6520617c81f9 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -362,8 +362,9 @@ ld_plugin_status all_symbols_read_hook(void) {
     (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
     return LDPS_ERR;
   }
-  raw_fd_ostream *objFile = new raw_fd_ostream(uniqueObjPath.c_str(), true,
-                                               ErrMsg);
+  raw_fd_ostream *objFile = 
+    new raw_fd_ostream(uniqueObjPath.c_str(), ErrMsg,
+                       raw_fd_ostream::F_Binary);
   if (!ErrMsg.empty()) {
     delete objFile;
     (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt
index e98b5a2649e1..683f29862d5c 100644
--- a/tools/llc/CMakeLists.txt
+++ b/tools/llc/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser)
 
 add_llvm_tool(llc
   llc.cpp
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index 8514040cf271..7319aada489e 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -15,7 +15,7 @@ TOOLNAME = llc
 # early so we can set up LINK_COMPONENTS before including Makefile.rules
 include $(LEVEL)/Makefile.config
 
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) bitreader
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) bitreader asmparser
 
 include $(LLVM_SRC_ROOT)/Makefile.rules
 
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index ae03c1e0c031..b94e5fb97c77 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -13,35 +13,35 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/CodeGen/FileWriters.h"
-#include "llvm/CodeGen/LinkAllCodegenComponents.h"
-#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
-#include "llvm/Target/SubtargetFeature.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
+#include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/Config/config.h"
+#include "llvm/LinkAllVMCore.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/RegistryParser.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/Verifier.h"
+#include "llvm/System/Host.h"
 #include "llvm/System/Signals.h"
-#include "llvm/Config/config.h"
-#include "llvm/LinkAllVMCore.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetSelect.h"
-#include <fstream>
-#include <iostream>
+#include "llvm/Transforms/Scalar.h"
 #include <memory>
 using namespace llvm;
 
@@ -55,7 +55,8 @@ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
 
-static cl::opt<bool> Force("f", cl::desc("Overwrite output files"));
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
 
 // Determine optimization level.
 static cl::opt<char>
@@ -69,9 +70,8 @@ OptLevel("O",
 static cl::opt<std::string>
 TargetTriple("mtriple", cl::desc("Override target triple for module"));
 
-static cl::opt<const TargetMachineRegistry::entry*, false,
-               RegistryParser<TargetMachine> >
-MArch("march", cl::desc("Architecture to generate code for:"));
+static cl::opt<std::string>
+MArch("march", cl::desc("Architecture to generate code for (see --version)"));
 
 static cl::opt<std::string>
 MCPU("mcpu",
@@ -119,7 +119,9 @@ GetFileNameRoot(const std::string &InputFilename) {
   std::string outputFilename;
   int Len = IFN.length();
   if ((Len > 2) &&
-      IFN[Len-3] == '.' && IFN[Len-2] == 'b' && IFN[Len-1] == 'c') {
+      IFN[Len-3] == '.' &&
+      ((IFN[Len-2] == 'b' && IFN[Len-1] == 'c') ||
+       (IFN[Len-2] == 'l' && IFN[Len-1] == 'l'))) {
     outputFilename = std::string(IFN.begin(), IFN.end()-3); // s/.bc/.s/
   } else {
     outputFilename = IFN;
@@ -127,37 +129,34 @@ GetFileNameRoot(const std::string &InputFilename) {
   return outputFilename;
 }
 
-static raw_ostream *GetOutputStream(const char *ProgName) {
+static formatted_raw_ostream *GetOutputStream(const char *TargetName, 
+                                              const char *ProgName) {
   if (OutputFilename != "") {
     if (OutputFilename == "-")
-      return &outs();
-
-    // Specified an output filename?
-    if (!Force && std::ifstream(OutputFilename.c_str())) {
-      // If force is not specified, make sure not to overwrite a file!
-      std::cerr << ProgName << ": error opening '" << OutputFilename
-                << "': file exists!\n"
-                << "Use -f command line argument to force output\n";
-      return 0;
-    }
+      return &fouts();
+
     // Make sure that the Out file gets unlinked from the disk if we get a
     // SIGINT
     sys::RemoveFileOnSignal(sys::Path(OutputFilename));
 
     std::string error;
-    raw_ostream *Out = new raw_fd_ostream(OutputFilename.c_str(), true, error);
+    raw_fd_ostream *FDOut =
+      new raw_fd_ostream(OutputFilename.c_str(), error,
+                         raw_fd_ostream::F_Binary);
     if (!error.empty()) {
-      std::cerr << error << '\n';
-      delete Out;
+      errs() << error << '\n';
+      delete FDOut;
       return 0;
     }
+    formatted_raw_ostream *Out =
+      new formatted_raw_ostream(*FDOut, formatted_raw_ostream::DELETE_STREAM);
 
     return Out;
   }
 
   if (InputFilename == "-") {
     OutputFilename = "-";
-    return &outs();
+    return &fouts();
   }
 
   OutputFilename = GetFileNameRoot(InputFilename);
@@ -165,10 +164,10 @@ static raw_ostream *GetOutputStream(const char *ProgName) {
   bool Binary = false;
   switch (FileType) {
   case TargetMachine::AssemblyFile:
-    if (MArch->Name[0] == 'c') {
-      if (MArch->Name[1] == 0)
+    if (TargetName[0] == 'c') {
+      if (TargetName[1] == 0)
         OutputFilename += ".cbe.c";
-      else if (MArch->Name[1] == 'p' && MArch->Name[2] == 'p')
+      else if (TargetName[1] == 'p' && TargetName[2] == 'p')
         OutputFilename += ".cpp";
       else
         OutputFilename += ".s";
@@ -185,26 +184,24 @@ static raw_ostream *GetOutputStream(const char *ProgName) {
     break;
   }
 
-  if (!Force && std::ifstream(OutputFilename.c_str())) {
-    // If force is not specified, make sure not to overwrite a file!
-    std::cerr << ProgName << ": error opening '" << OutputFilename
-                          << "': file exists!\n"
-                          << "Use -f command line argument to force output\n";
-    return 0;
-  }
-
   // Make sure that the Out file gets unlinked from the disk if we get a
   // SIGINT
   sys::RemoveFileOnSignal(sys::Path(OutputFilename));
 
   std::string error;
-  raw_ostream *Out = new raw_fd_ostream(OutputFilename.c_str(), Binary, error);
+  unsigned OpenFlags = 0;
+  if (Binary) OpenFlags |= raw_fd_ostream::F_Binary;
+  raw_fd_ostream *FDOut = new raw_fd_ostream(OutputFilename.c_str(), error,
+                                             OpenFlags);
   if (!error.empty()) {
-    std::cerr << error << '\n';
-    delete Out;
+    errs() << error << '\n';
+    delete FDOut;
     return 0;
   }
 
+  formatted_raw_ostream *Out =
+    new formatted_raw_ostream(*FDOut, formatted_raw_ostream::DELETE_STREAM);
+
   return Out;
 }
 
@@ -213,24 +210,22 @@ static raw_ostream *GetOutputStream(const char *ProgName) {
 int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-  cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
 
+  // Initialize targets first, so that --version shows registered targets.
   InitializeAllTargets();
   InitializeAllAsmPrinters();
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
   
   // Load the module to be compiled...
-  std::string ErrorMessage;
+  SMDiagnostic Err;
   std::auto_ptr<Module> M;
 
-  std::auto_ptr<MemoryBuffer> Buffer(
-                   MemoryBuffer::getFileOrSTDIN(InputFilename, &ErrorMessage));
-  if (Buffer.get())
-    M.reset(ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage));
+  M.reset(ParseIRFile(InputFilename, Err, Context));
   if (M.get() == 0) {
-    std::cerr << argv[0] << ": bitcode didn't read correctly.\n";
-    std::cerr << "Reason: " << ErrorMessage << "\n";
+    Err.Print(argv[0], errs());
     return 1;
   }
   Module &mod = *M.get();
@@ -239,15 +234,40 @@ int main(int argc, char **argv) {
   if (!TargetTriple.empty())
     mod.setTargetTriple(TargetTriple);
 
-  // Allocate target machine.  First, check whether the user has
-  // explicitly specified an architecture to compile for.
-  if (MArch == 0) {
+  Triple TheTriple(mod.getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Allocate target machine.  First, check whether the user has explicitly
+  // specified an architecture to compile for. If so we have to look it up by
+  // name, because it might be a backend that has no mapping to a target triple.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      errs() << argv[0] << ": error: invalid target '" << MArch << "'.\n";
+      return 1;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
     std::string Err;
-    MArch = TargetMachineRegistry::getClosestStaticTargetForModule(mod, Err);
-    if (MArch == 0) {
-      std::cerr << argv[0] << ": error auto-selecting target for module '"
-                << Err << "'.  Please use the -march option to explicitly "
-                << "pick a target.\n";
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Err);
+    if (TheTarget == 0) {
+      errs() << argv[0] << ": error auto-selecting target for module '"
+             << Err << "'.  Please use the -march option to explicitly "
+             << "pick a target.\n";
       return 1;
     }
   }
@@ -262,18 +282,19 @@ int main(int argc, char **argv) {
     FeaturesStr = Features.getString();
   }
 
-  std::auto_ptr<TargetMachine> target(MArch->CtorFn(mod, FeaturesStr));
+  std::auto_ptr<TargetMachine> 
+    target(TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr));
   assert(target.get() && "Could not allocate target machine!");
   TargetMachine &Target = *target.get();
 
   // Figure out where we are going to send the output...
-  raw_ostream *Out = GetOutputStream(argv[0]);
+  formatted_raw_ostream *Out = GetOutputStream(TheTarget->getName(), argv[0]);
   if (Out == 0) return 1;
 
   CodeGenOpt::Level OLvl = CodeGenOpt::Default;
   switch (OptLevel) {
   default:
-    std::cerr << argv[0] << ": invalid optimization level.\n";
+    errs() << argv[0] << ": invalid optimization level.\n";
     return 1;
   case ' ': break;
   case '0': OLvl = CodeGenOpt::None; break;
@@ -286,15 +307,21 @@ int main(int argc, char **argv) {
   // used by strange things like the C backend.
   if (Target.WantsWholeFile()) {
     PassManager PM;
-    PM.add(new TargetData(*Target.getTargetData()));
+
+    // Add the target data from the target machine, if it exists, or the module.
+    if (const TargetData *TD = Target.getTargetData())
+      PM.add(new TargetData(*TD));
+    else
+      PM.add(new TargetData(&mod));
+
     if (!NoVerify)
       PM.add(createVerifierPass());
 
     // Ask the target to add backend passes as necessary.
     if (Target.addPassesToEmitWholeFile(PM, *Out, FileType, OLvl)) {
-      std::cerr << argv[0] << ": target does not support generation of this"
-                << " file type!\n";
-      if (Out != &outs()) delete Out;
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      if (Out != &fouts()) delete Out;
       // And the Out file is empty and useless, so remove it now.
       sys::Path(OutputFilename).eraseFromDisk();
       return 1;
@@ -304,7 +331,12 @@ int main(int argc, char **argv) {
     // Build up all of the passes that we want to do to the module.
     ExistingModuleProvider Provider(M.release());
     FunctionPassManager Passes(&Provider);
-    Passes.add(new TargetData(*Target.getTargetData()));
+
+    // Add the target data from the target machine, if it exists, or the module.
+    if (const TargetData *TD = Target.getTargetData())
+      Passes.add(new TargetData(*TD));
+    else
+      Passes.add(new TargetData(&mod));
 
 #ifndef NDEBUG
     if (!NoVerify)
@@ -312,7 +344,7 @@ int main(int argc, char **argv) {
 #endif
 
     // Ask the target to add backend passes as necessary.
-    MachineCodeEmitter *MCE = 0;
+    ObjectCodeEmitter *OCE = 0;
 
     // Override default to generate verbose assembly.
     Target.setAsmVerbosityDefault(true);
@@ -322,26 +354,26 @@ int main(int argc, char **argv) {
       assert(0 && "Invalid file model!");
       return 1;
     case FileModel::Error:
-      std::cerr << argv[0] << ": target does not support generation of this"
-                << " file type!\n";
-      if (Out != &outs()) delete Out;
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      if (Out != &fouts()) delete Out;
       // And the Out file is empty and useless, so remove it now.
       sys::Path(OutputFilename).eraseFromDisk();
       return 1;
     case FileModel::AsmFile:
       break;
     case FileModel::MachOFile:
-      MCE = AddMachOWriter(Passes, *Out, Target);
+      OCE = AddMachOWriter(Passes, *Out, Target);
       break;
     case FileModel::ElfFile:
-      MCE = AddELFWriter(Passes, *Out, Target);
+      OCE = AddELFWriter(Passes, *Out, Target);
       break;
     }
 
-    if (Target.addPassesToEmitFileFinish(Passes, MCE, OLvl)) {
-      std::cerr << argv[0] << ": target does not support generation of this"
-                << " file type!\n";
-      if (Out != &outs()) delete Out;
+    if (Target.addPassesToEmitFileFinish(Passes, OCE, OLvl)) {
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      if (Out != &fouts()) delete Out;
       // And the Out file is empty and useless, so remove it now.
       sys::Path(OutputFilename).eraseFromDisk();
       return 1;
@@ -364,7 +396,7 @@ int main(int argc, char **argv) {
   }
 
   // Delete the ostream if it's not a stdout stream
-  if (Out != &outs()) delete Out;
+  if (Out != &fouts()) delete Out;
 
   return 0;
 }
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index a01b0d69341a..e5c1070bbb97 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -28,10 +28,10 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Process.h"
 #include "llvm/System/Signals.h"
 #include "llvm/Target/TargetSelect.h"
-#include <iostream>
 #include <cerrno>
 using namespace llvm;
 
@@ -94,8 +94,13 @@ int main(int argc, char **argv, char * const *envp) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
   
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   atexit(do_shutdown);  // Call llvm_shutdown() on exit.
+
+  // If we have a native target, initialize it to ensure it is linked in and
+  // usable by the JIT.
+  InitializeNativeTarget();
+
   cl::ParseCommandLineOptions(argc, argv,
                               "llvm interpreter & dynamic compiler\n");
 
@@ -112,8 +117,8 @@ int main(int argc, char **argv, char * const *envp) {
   }
   
   if (!MP) {
-    std::cerr << argv[0] << ": error loading program '" << InputFile << "': "
-              << ErrorMsg << "\n";
+    errs() << argv[0] << ": error loading program '" << InputFile << "': "
+           << ErrorMsg << "\n";
     exit(1);
   }
 
@@ -121,11 +126,17 @@ int main(int argc, char **argv, char * const *envp) {
   Module *Mod = NoLazyCompilation
     ? MP->materializeModule(&ErrorMsg) : MP->getModule();
   if (!Mod) {
-    std::cerr << argv[0] << ": bitcode didn't read correctly.\n";
-    std::cerr << "Reason: " << ErrorMsg << "\n";
+    errs() << argv[0] << ": bitcode didn't read correctly.\n";
+    errs() << "Reason: " << ErrorMsg << "\n";
     exit(1);
   }
 
+  EngineBuilder builder(MP);
+  builder.setErrorStr(&ErrorMsg);
+  builder.setEngineKind(ForceInterpreter
+                        ? EngineKind::Interpreter
+                        : EngineKind::JIT);
+
   // If we are supposed to override the target triple, do so now.
   if (!TargetTriple.empty())
     Mod->setTargetTriple(TargetTriple);
@@ -133,7 +144,7 @@ int main(int argc, char **argv, char * const *envp) {
   CodeGenOpt::Level OLvl = CodeGenOpt::Default;
   switch (OptLevel) {
   default:
-    std::cerr << argv[0] << ": invalid optimization level.\n";
+    errs() << argv[0] << ": invalid optimization level.\n";
     return 1;
   case ' ': break;
   case '0': OLvl = CodeGenOpt::None; break;
@@ -141,18 +152,19 @@ int main(int argc, char **argv, char * const *envp) {
   case '2': OLvl = CodeGenOpt::Default; break;
   case '3': OLvl = CodeGenOpt::Aggressive; break;
   }
-  
-  // If we have a native target, initialize it to ensure it is linked in and
-  // usable by the JIT.
-  InitializeNativeTarget();
-
-  EE = ExecutionEngine::create(MP, ForceInterpreter, &ErrorMsg, OLvl);
-  if (!EE && !ErrorMsg.empty()) {
-    std::cerr << argv[0] << ":error creating EE: " << ErrorMsg << "\n";
+  builder.setOptLevel(OLvl);
+
+  EE = builder.create();
+  if (!EE) {
+    if (!ErrorMsg.empty())
+      errs() << argv[0] << ": error creating EE: " << ErrorMsg << "\n";
+    else
+      errs() << argv[0] << ": unknown error creating EE!\n";
     exit(1);
   }
 
   EE->RegisterJITEventListener(createMacOSJITEventListener());
+  EE->RegisterJITEventListener(createOProfileJITEventListener());
 
   if (NoLazyCompilation)
     EE->DisableLazyCompilation();
@@ -178,14 +190,15 @@ int main(int argc, char **argv, char * const *envp) {
   //
   Function *EntryFn = Mod->getFunction(EntryFunc);
   if (!EntryFn) {
-    std::cerr << '\'' << EntryFunc << "\' function not found in module.\n";
+    errs() << '\'' << EntryFunc << "\' function not found in module.\n";
     return -1;
   }
 
   // If the program doesn't explicitly call exit, we will need the Exit 
   // function later on to make an explicit call, so get the function now. 
-  Constant *Exit = Mod->getOrInsertFunction("exit", Type::VoidTy,
-                                                        Type::Int32Ty, NULL);
+  Constant *Exit = Mod->getOrInsertFunction("exit", Type::getVoidTy(Context),
+                                                    Type::getInt32Ty(Context),
+                                                    NULL);
   
   // Reset errno to zero on entry to main.
   errno = 0;
@@ -215,10 +228,10 @@ int main(int argc, char **argv, char * const *envp) {
     ResultGV.IntVal = APInt(32, Result);
     Args.push_back(ResultGV);
     EE->runFunction(ExitF, Args);
-    std::cerr << "ERROR: exit(" << Result << ") returned!\n";
+    errs() << "ERROR: exit(" << Result << ") returned!\n";
     abort();
   } else {
-    std::cerr << "ERROR: exit defined with wrong prototype!\n";
+    errs() << "ERROR: exit defined with wrong prototype!\n";
     abort();
   }
 }
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index fe58db1222bb..021a3691e850 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -18,11 +18,13 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
 #include <iostream>
 #include <algorithm>
 #include <iomanip>
 #include <memory>
+#include <fstream>
 using namespace llvm;
 
 // Option for compatibility with AIX, not used but must allow it to be present.
@@ -363,7 +365,7 @@ bool doPrint(std::string* ErrMsg) {
           continue;
 
         if (Verbose)
-          std::cout << "Printing " << I->getPath().toString() << "\n";
+          std::cout << "Printing " << I->getPath().str() << "\n";
 
         unsigned len = I->getSize();
         std::cout.write(data, len);
@@ -421,11 +423,10 @@ doDisplayTable(std::string* ErrMsg) {
         std::cout << " " << std::setw(4) << I->getUser();
         std::cout << "/" << std::setw(4) << I->getGroup();
         std::cout << " " << std::setw(8) << I->getSize();
-        std::cout << " " << std::setw(20) <<
-          I->getModTime().toString().substr(4);
-        std::cout << " " << I->getPath().toString() << "\n";
+        std::cout << " " << std::setw(20) << I->getModTime().str().substr(4);
+        std::cout << " " << I->getPath().str() << "\n";
       } else {
-        std::cout << I->getPath().toString() << "\n";
+        std::cout << I->getPath().str() << "\n";
       }
     }
   }
@@ -527,7 +528,7 @@ doMove(std::string* ErrMsg) {
   if (AddBefore || InsertBefore || AddAfter) {
     for (Archive::iterator I = TheArchive->begin(), E= TheArchive->end();
          I != E; ++I ) {
-      if (RelPos == I->getPath().toString()) {
+      if (RelPos == I->getPath().str()) {
         if (AddAfter) {
           moveto_spot = I;
           moveto_spot++;
@@ -615,7 +616,7 @@ doReplaceOrInsert(std::string* ErrMsg) {
     std::set<sys::Path>::iterator found = remaining.end();
     for (std::set<sys::Path>::iterator RI = remaining.begin(),
          RE = remaining.end(); RI != RE; ++RI ) {
-      std::string compare(RI->toString());
+      std::string compare(RI->str());
       if (TruncateNames && compare.length() > 15) {
         const char* nm = compare.c_str();
         unsigned len = compare.length();
@@ -628,7 +629,7 @@ doReplaceOrInsert(std::string* ErrMsg) {
           len = 15;
         compare.assign(nm,len);
       }
-      if (compare == I->getPath().toString()) {
+      if (compare == I->getPath().str()) {
         found = RI;
         break;
       }
@@ -660,9 +661,9 @@ doReplaceOrInsert(std::string* ErrMsg) {
     }
 
     // Determine if this is the place where we should insert
-    if ((AddBefore || InsertBefore) && (RelPos == I->getPath().toString()))
+    if ((AddBefore || InsertBefore) && RelPos == I->getPath().str())
       insert_spot = I;
-    else if (AddAfter && (RelPos == I->getPath().toString())) {
+    else if (AddAfter && RelPos == I->getPath().str()) {
       insert_spot = I;
       insert_spot++;
     }
@@ -691,7 +692,7 @@ int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 
   // Have the command line options parsed and handle things
@@ -718,15 +719,15 @@ int main(int argc, char **argv) {
     if (!ArchivePath.exists()) {
       // Produce a warning if we should and we're creating the archive
       if (!Create)
-        std::cerr << argv[0] << ": creating " << ArchivePath.toString() << "\n";
+        errs() << argv[0] << ": creating " << ArchivePath.str() << "\n";
       TheArchive = Archive::CreateEmpty(ArchivePath, Context);
       TheArchive->writeToDisk();
     } else {
       std::string Error;
       TheArchive = Archive::OpenAndLoad(ArchivePath, Context, &Error);
       if (TheArchive == 0) {
-        std::cerr << argv[0] << ": error loading '" << ArchivePath << "': "
-                  << Error << "!\n";
+        errs() << argv[0] << ": error loading '" << ArchivePath.str() << "': "
+               << Error << "!\n";
         return 1;
       }
     }
@@ -749,27 +750,27 @@ int main(int argc, char **argv) {
       case DisplayTable:    haveError = doDisplayTable(&ErrMsg); break;
       case Extract:         haveError = doExtract(&ErrMsg); break;
       case NoOperation:
-        std::cerr << argv[0] << ": No operation was selected.\n";
+        errs() << argv[0] << ": No operation was selected.\n";
         break;
     }
     if (haveError) {
-      std::cerr << argv[0] << ": " << ErrMsg << "\n";
+      errs() << argv[0] << ": " << ErrMsg << "\n";
       return 1;
     }
   } catch (const char*msg) {
     // These errors are usage errors, thrown only by the various checks in the
     // code above.
-    std::cerr << argv[0] << ": " << msg << "\n\n";
+    errs() << argv[0] << ": " << msg << "\n\n";
     cl::PrintHelpMessage();
     exitCode = 1;
   } catch (const std::string& msg) {
     // These errors are thrown by LLVM libraries (e.g. lib System) and represent
     // a more serious error so we bump the exitCode and don't print the usage.
-    std::cerr << argv[0] << ": " << msg << "\n";
+    errs() << argv[0] << ": " << msg << "\n";
     exitCode = 2;
   } catch (...) {
     // This really shouldn't happen, but just in case ....
-    std::cerr << argv[0] << ": An unexpected unknown exception occurred.\n";
+    errs() << argv[0] << ": An unexpected unknown exception occurred.\n";
     exitCode = 3;
   }
 
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index eccabd5d14a4..d510297aa35c 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -24,12 +24,9 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
-#include <fstream>
-#include <iostream>
 #include <memory>
 using namespace llvm;
 
@@ -41,7 +38,7 @@ OutputFilename("o", cl::desc("Override output filename"),
                cl::value_desc("filename"));
 
 static cl::opt<bool>
-Force("f", cl::desc("Overwrite output files"));
+Force("f", cl::desc("Enable binary output on terminals"));
 
 static cl::opt<bool>
 DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false));
@@ -57,96 +54,64 @@ int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
 
-  int exitCode = 0;
-  std::ostream *Out = 0;
-  try {
-    // Parse the file now...
-    SMDiagnostic Err;
-    std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
-    if (M.get() == 0) {
-      Err.Print(argv[0], errs());
-      return 1;
-    }
+  // Parse the file now...
+  SMDiagnostic Err;
+  std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
+  if (M.get() == 0) {
+    Err.Print(argv[0], errs());
+    return 1;
+  }
 
-    if (!DisableVerify) {
-      std::string Err;
-      if (verifyModule(*M.get(), ReturnStatusAction, &Err)) {
-        cerr << argv[0]
+  if (!DisableVerify) {
+    std::string Err;
+    if (verifyModule(*M.get(), ReturnStatusAction, &Err)) {
+      errs() << argv[0]
              << ": assembly parsed, but does not verify as correct!\n";
-        cerr << Err;
-        return 1;
-      } 
-    }
+      errs() << Err;
+      return 1;
+    } 
+  }
 
-    if (DumpAsm) cerr << "Here's the assembly:\n" << *M.get();
-
-    if (OutputFilename != "") {   // Specified an output filename?
-      if (OutputFilename != "-") {  // Not stdout?
-        if (!Force && std::ifstream(OutputFilename.c_str())) {
-          // If force is not specified, make sure not to overwrite a file!
-          cerr << argv[0] << ": error opening '" << OutputFilename
-               << "': file exists!\n"
-               << "Use -f command line argument to force output\n";
-          return 1;
-        }
-        Out = new std::ofstream(OutputFilename.c_str(), std::ios::out |
-                                std::ios::trunc | std::ios::binary);
-      } else {                      // Specified stdout
-        // FIXME: cout is not binary!
-        Out = &std::cout;
-      }
+  if (DumpAsm) errs() << "Here's the assembly:\n" << *M.get();
+
+  // Infer the output filename if needed.
+  if (OutputFilename.empty()) {
+    if (InputFilename == "-") {
+      OutputFilename = "-";
     } else {
-      if (InputFilename == "-") {
-        OutputFilename = "-";
-        Out = &std::cout;
+      std::string IFN = InputFilename;
+      int Len = IFN.length();
+      if (IFN[Len-3] == '.' && IFN[Len-2] == 'l' && IFN[Len-1] == 'l') {
+        // Source ends in .ll
+        OutputFilename = std::string(IFN.begin(), IFN.end()-3);
       } else {
-        std::string IFN = InputFilename;
-        int Len = IFN.length();
-        if (IFN[Len-3] == '.' && IFN[Len-2] == 'l' && IFN[Len-1] == 'l') {
-          // Source ends in .ll
-          OutputFilename = std::string(IFN.begin(), IFN.end()-3);
-        } else {
-          OutputFilename = IFN;   // Append a .bc to it
-        }
-        OutputFilename += ".bc";
-
-        if (!Force && std::ifstream(OutputFilename.c_str())) {
-          // If force is not specified, make sure not to overwrite a file!
-          cerr << argv[0] << ": error opening '" << OutputFilename
-               << "': file exists!\n"
-               << "Use -f command line argument to force output\n";
-          return 1;
-        }
-
-        Out = new std::ofstream(OutputFilename.c_str(), std::ios::out |
-                                std::ios::trunc | std::ios::binary);
-        // Make sure that the Out file gets unlinked from the disk if we get a
-        // SIGINT
-        sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+        OutputFilename = IFN;   // Append a .bc to it
       }
+      OutputFilename += ".bc";
     }
+  }
 
-    if (!Out->good()) {
-      cerr << argv[0] << ": error opening " << OutputFilename << "!\n";
-      return 1;
-    }
-
-    if (!DisableOutput)
-      if (Force || !CheckBitcodeOutputToConsole(Out,true))
-        WriteBitcodeToFile(M.get(), *Out);
-  } catch (const std::string& msg) {
-    cerr << argv[0] << ": " << msg << "\n";
-    exitCode = 1;
-  } catch (...) {
-    cerr << argv[0] << ": Unexpected unknown exception occurred.\n";
-    exitCode = 1;
+  // Make sure that the Out file gets unlinked from the disk if we get a
+  // SIGINT.
+  if (OutputFilename != "-")
+    sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+
+  std::string ErrorInfo;
+  std::auto_ptr<raw_ostream> Out
+  (new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
+                      raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
   }
 
-  if (Out != &std::cout) delete Out;
-  return exitCode;
+  if (!DisableOutput)
+    if (Force || !CheckBitcodeOutputToConsole(*Out, true))
+      WriteBitcodeToFile(M.get(), *Out);
+  return 0;
 }
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index b401a21ece9a..6d5b2b51a8b7 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -20,7 +20,7 @@
 // produces on std::out a summary of the bitcode file that shows various
 // statistics about the contents of the file. By default this information is
 // detailed and contains information about individual bitcode blocks and the
-// functions in the module. 
+// functions in the module.
 // The tool is also able to print a bitcode file in a straight forward text
 // format that shows the containment and relationships of the information in
 // the bitcode file (-dump option).
@@ -32,13 +32,14 @@
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
+#include <cstdio>
 #include <map>
-#include <fstream>
-#include <iostream>
 #include <algorithm>
 using namespace llvm;
 
@@ -62,7 +63,7 @@ NonSymbolic("non-symbolic",
             cl::desc("Emit numberic info in dump even if"
                      " symbolic info is available"));
 
-/// CurStreamType - If we can sniff the flavor of this stream, we can produce 
+/// CurStreamType - If we can sniff the flavor of this stream, we can produce
 /// better dump info.
 static enum {
   UnknownBitstream,
@@ -80,26 +81,28 @@ static const char *GetBlockName(unsigned BlockID,
       return "BLOCKINFO_BLOCK";
     return 0;
   }
-  
+
   // Check to see if we have a blockinfo record for this block, with a name.
   if (const BitstreamReader::BlockInfo *Info =
         StreamFile.getBlockInfo(BlockID)) {
     if (!Info->Name.empty())
       return Info->Name.c_str();
   }
-  
-  
+
+
   if (CurStreamType != LLVMIRBitstream) return 0;
-  
+
   switch (BlockID) {
-  default:                          return 0;
-  case bitc::MODULE_BLOCK_ID:       return "MODULE_BLOCK";
-  case bitc::PARAMATTR_BLOCK_ID:    return "PARAMATTR_BLOCK";
-  case bitc::TYPE_BLOCK_ID:         return "TYPE_BLOCK";
-  case bitc::CONSTANTS_BLOCK_ID:    return "CONSTANTS_BLOCK";
-  case bitc::FUNCTION_BLOCK_ID:     return "FUNCTION_BLOCK";
-  case bitc::TYPE_SYMTAB_BLOCK_ID:  return "TYPE_SYMTAB";
-  case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB";
+  default:                           return 0;
+  case bitc::MODULE_BLOCK_ID:        return "MODULE_BLOCK";
+  case bitc::PARAMATTR_BLOCK_ID:     return "PARAMATTR_BLOCK";
+  case bitc::TYPE_BLOCK_ID:          return "TYPE_BLOCK";
+  case bitc::CONSTANTS_BLOCK_ID:     return "CONSTANTS_BLOCK";
+  case bitc::FUNCTION_BLOCK_ID:      return "FUNCTION_BLOCK";
+  case bitc::TYPE_SYMTAB_BLOCK_ID:   return "TYPE_SYMTAB";
+  case bitc::VALUE_SYMTAB_BLOCK_ID:  return "VALUE_SYMTAB";
+  case bitc::METADATA_BLOCK_ID:      return "METADATA_BLOCK";
+  case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
   }
 }
 
@@ -119,7 +122,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     }
     return 0;
   }
-  
+
   // Check to see if we have a blockinfo record for this record, with a name.
   if (const BitstreamReader::BlockInfo *Info =
         StreamFile.getBlockInfo(BlockID)) {
@@ -127,10 +130,10 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       if (Info->RecordNames[i].first == CodeID)
         return Info->RecordNames[i].second.c_str();
   }
-  
-  
+
+
   if (CurStreamType != LLVMIRBitstream) return 0;
-  
+
   switch (BlockID) {
   default: return 0;
   case bitc::MODULE_BLOCK_ID:
@@ -173,67 +176,67 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::TYPE_CODE_PPC_FP128: return "PPC_FP128";
     case bitc::TYPE_CODE_METADATA:  return "METADATA";
     }
-    
+
   case bitc::CONSTANTS_BLOCK_ID:
     switch (CodeID) {
     default: return 0;
-    case bitc::CST_CODE_SETTYPE:       return "SETTYPE";
-    case bitc::CST_CODE_NULL:          return "NULL";
-    case bitc::CST_CODE_UNDEF:         return "UNDEF";
-    case bitc::CST_CODE_INTEGER:       return "INTEGER";
-    case bitc::CST_CODE_WIDE_INTEGER:  return "WIDE_INTEGER";
-    case bitc::CST_CODE_FLOAT:         return "FLOAT";
-    case bitc::CST_CODE_AGGREGATE:     return "AGGREGATE";
-    case bitc::CST_CODE_STRING:        return "STRING";
-    case bitc::CST_CODE_CSTRING:       return "CSTRING";
-    case bitc::CST_CODE_CE_BINOP:      return "CE_BINOP";
-    case bitc::CST_CODE_CE_CAST:       return "CE_CAST";
-    case bitc::CST_CODE_CE_GEP:        return "CE_GEP";
-    case bitc::CST_CODE_CE_SELECT:     return "CE_SELECT";
-    case bitc::CST_CODE_CE_EXTRACTELT: return "CE_EXTRACTELT";
-    case bitc::CST_CODE_CE_INSERTELT:  return "CE_INSERTELT";
-    case bitc::CST_CODE_CE_SHUFFLEVEC: return "CE_SHUFFLEVEC";
-    case bitc::CST_CODE_CE_CMP:        return "CE_CMP";
-    case bitc::CST_CODE_INLINEASM:     return "INLINEASM";
-    case bitc::CST_CODE_CE_SHUFVEC_EX: return "CE_SHUFVEC_EX";
-    case bitc::CST_CODE_MDSTRING:      return "MDSTRING";
-    case bitc::CST_CODE_MDNODE:        return "MDNODE";
-    }        
+    case bitc::CST_CODE_SETTYPE:         return "SETTYPE";
+    case bitc::CST_CODE_NULL:            return "NULL";
+    case bitc::CST_CODE_UNDEF:           return "UNDEF";
+    case bitc::CST_CODE_INTEGER:         return "INTEGER";
+    case bitc::CST_CODE_WIDE_INTEGER:    return "WIDE_INTEGER";
+    case bitc::CST_CODE_FLOAT:           return "FLOAT";
+    case bitc::CST_CODE_AGGREGATE:       return "AGGREGATE";
+    case bitc::CST_CODE_STRING:          return "STRING";
+    case bitc::CST_CODE_CSTRING:         return "CSTRING";
+    case bitc::CST_CODE_CE_BINOP:        return "CE_BINOP";
+    case bitc::CST_CODE_CE_CAST:         return "CE_CAST";
+    case bitc::CST_CODE_CE_GEP:          return "CE_GEP";
+    case bitc::CST_CODE_CE_INBOUNDS_GEP: return "CE_INBOUNDS_GEP";
+    case bitc::CST_CODE_CE_SELECT:       return "CE_SELECT";
+    case bitc::CST_CODE_CE_EXTRACTELT:   return "CE_EXTRACTELT";
+    case bitc::CST_CODE_CE_INSERTELT:    return "CE_INSERTELT";
+    case bitc::CST_CODE_CE_SHUFFLEVEC:   return "CE_SHUFFLEVEC";
+    case bitc::CST_CODE_CE_CMP:          return "CE_CMP";
+    case bitc::CST_CODE_INLINEASM:       return "INLINEASM";
+    case bitc::CST_CODE_CE_SHUFVEC_EX:   return "CE_SHUFVEC_EX";
+    }
   case bitc::FUNCTION_BLOCK_ID:
     switch (CodeID) {
     default: return 0;
     case bitc::FUNC_CODE_DECLAREBLOCKS: return "DECLAREBLOCKS";
-    
-    case bitc::FUNC_CODE_INST_BINOP:       return "INST_BINOP";
-    case bitc::FUNC_CODE_INST_CAST:        return "INST_CAST";
-    case bitc::FUNC_CODE_INST_GEP:         return "INST_GEP";
-    case bitc::FUNC_CODE_INST_SELECT:      return "INST_SELECT";
-    case bitc::FUNC_CODE_INST_EXTRACTELT:  return "INST_EXTRACTELT";
-    case bitc::FUNC_CODE_INST_INSERTELT:   return "INST_INSERTELT";
-    case bitc::FUNC_CODE_INST_SHUFFLEVEC:  return "INST_SHUFFLEVEC";
-    case bitc::FUNC_CODE_INST_CMP:         return "INST_CMP";
-    
-    case bitc::FUNC_CODE_INST_RET:         return "INST_RET";
-    case bitc::FUNC_CODE_INST_BR:          return "INST_BR";
-    case bitc::FUNC_CODE_INST_SWITCH:      return "INST_SWITCH";
-    case bitc::FUNC_CODE_INST_INVOKE:      return "INST_INVOKE";
-    case bitc::FUNC_CODE_INST_UNWIND:      return "INST_UNWIND";
-    case bitc::FUNC_CODE_INST_UNREACHABLE: return "INST_UNREACHABLE";
-    
-    case bitc::FUNC_CODE_INST_PHI:         return "INST_PHI";
-    case bitc::FUNC_CODE_INST_MALLOC:      return "INST_MALLOC";
-    case bitc::FUNC_CODE_INST_FREE:        return "INST_FREE";
-    case bitc::FUNC_CODE_INST_ALLOCA:      return "INST_ALLOCA";
-    case bitc::FUNC_CODE_INST_LOAD:        return "INST_LOAD";
-    case bitc::FUNC_CODE_INST_STORE:       return "INST_STORE";
-    case bitc::FUNC_CODE_INST_CALL:        return "INST_CALL";
-    case bitc::FUNC_CODE_INST_VAARG:       return "INST_VAARG";
-    case bitc::FUNC_CODE_INST_STORE2:      return "INST_STORE2";
-    case bitc::FUNC_CODE_INST_GETRESULT:   return "INST_GETRESULT";
-    case bitc::FUNC_CODE_INST_EXTRACTVAL:  return "INST_EXTRACTVAL";
-    case bitc::FUNC_CODE_INST_INSERTVAL:   return "INST_INSERTVAL";
-    case bitc::FUNC_CODE_INST_CMP2:        return "INST_CMP2";
-    case bitc::FUNC_CODE_INST_VSELECT:     return "INST_VSELECT";
+
+    case bitc::FUNC_CODE_INST_BINOP:        return "INST_BINOP";
+    case bitc::FUNC_CODE_INST_CAST:         return "INST_CAST";
+    case bitc::FUNC_CODE_INST_GEP:          return "INST_GEP";
+    case bitc::FUNC_CODE_INST_INBOUNDS_GEP: return "INST_INBOUNDS_GEP";
+    case bitc::FUNC_CODE_INST_SELECT:       return "INST_SELECT";
+    case bitc::FUNC_CODE_INST_EXTRACTELT:   return "INST_EXTRACTELT";
+    case bitc::FUNC_CODE_INST_INSERTELT:    return "INST_INSERTELT";
+    case bitc::FUNC_CODE_INST_SHUFFLEVEC:   return "INST_SHUFFLEVEC";
+    case bitc::FUNC_CODE_INST_CMP:          return "INST_CMP";
+
+    case bitc::FUNC_CODE_INST_RET:          return "INST_RET";
+    case bitc::FUNC_CODE_INST_BR:           return "INST_BR";
+    case bitc::FUNC_CODE_INST_SWITCH:       return "INST_SWITCH";
+    case bitc::FUNC_CODE_INST_INVOKE:       return "INST_INVOKE";
+    case bitc::FUNC_CODE_INST_UNWIND:       return "INST_UNWIND";
+    case bitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
+
+    case bitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
+    case bitc::FUNC_CODE_INST_MALLOC:       return "INST_MALLOC";
+    case bitc::FUNC_CODE_INST_FREE:         return "INST_FREE";
+    case bitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
+    case bitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
+    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
+    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
+    case bitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
+    case bitc::FUNC_CODE_INST_STORE2:       return "INST_STORE2";
+    case bitc::FUNC_CODE_INST_GETRESULT:    return "INST_GETRESULT";
+    case bitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
+    case bitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
+    case bitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
+    case bitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
     }
   case bitc::TYPE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
@@ -246,6 +249,20 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::VST_CODE_ENTRY: return "ENTRY";
     case bitc::VST_CODE_BBENTRY: return "BBENTRY";
     }
+  case bitc::METADATA_ATTACHMENT_ID:
+    switch(CodeID) {
+    default:return 0;
+    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
+    }
+  case bitc::METADATA_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case bitc::METADATA_STRING:      return "MDSTRING";
+    case bitc::METADATA_NODE:        return "MDNODE";
+    case bitc::METADATA_NAME:        return "METADATA_NAME";
+    case bitc::METADATA_NAMED_NODE:  return "NAMEDMDNODE";
+    case bitc::METADATA_KIND:        return "METADATA_KIND";
+    }
   }
 }
 
@@ -253,30 +270,30 @@ struct PerRecordStats {
   unsigned NumInstances;
   unsigned NumAbbrev;
   uint64_t TotalBits;
-  
+
   PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
 };
 
 struct PerBlockIDStats {
   /// NumInstances - This the number of times this block ID has been seen.
   unsigned NumInstances;
-  
+
   /// NumBits - The total size in bits of all of these blocks.
   uint64_t NumBits;
-  
+
   /// NumSubBlocks - The total number of blocks these blocks contain.
   unsigned NumSubBlocks;
-  
+
   /// NumAbbrevs - The total number of abbreviations.
   unsigned NumAbbrevs;
-  
-  /// NumRecords - The total number of records these blocks contain, and the 
+
+  /// NumRecords - The total number of records these blocks contain, and the
   /// number that are abbreviated.
   unsigned NumRecords, NumAbbreviatedRecords;
-  
+
   /// CodeFreq - Keep track of the number of times we see each code.
   std::vector<PerRecordStats> CodeFreq;
-  
+
   PerBlockIDStats()
     : NumInstances(0), NumBits(0),
       NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
@@ -289,7 +306,7 @@ static std::map<unsigned, PerBlockIDStats> BlockIDStats;
 /// Error - All bitcode analysis errors go through this function, making this a
 /// good place to breakpoint if debugging.
 static bool Error(const std::string &Err) {
-  std::cerr << Err << "\n";
+  errs() << Err << "\n";
   return true;
 }
 
@@ -301,38 +318,38 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 
   // Get the statistics for this BlockID.
   PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
-  
+
   BlockStats.NumInstances++;
-  
+
   // BLOCKINFO is a special part of the stream.
   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-    if (Dump) std::cerr << Indent << "<BLOCKINFO_BLOCK/>\n";
+    if (Dump) errs() << Indent << "<BLOCKINFO_BLOCK/>\n";
     if (Stream.ReadBlockInfoBlock())
       return Error("Malformed BlockInfoBlock");
     uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
     BlockStats.NumBits += BlockBitEnd-BlockBitStart;
     return false;
   }
-  
+
   unsigned NumWords = 0;
   if (Stream.EnterSubBlock(BlockID, &NumWords))
     return Error("Malformed block record");
 
   const char *BlockName = 0;
   if (Dump) {
-    std::cerr << Indent << "<";
+    errs() << Indent << "<";
     if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader())))
-      std::cerr << BlockName;
+      errs() << BlockName;
     else
-      std::cerr << "UnknownBlock" << BlockID;
-    
+      errs() << "UnknownBlock" << BlockID;
+
     if (NonSymbolic && BlockName)
-      std::cerr << " BlockID=" << BlockID;
-    
-    std::cerr << " NumWords=" << NumWords
-              << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
+      errs() << " BlockID=" << BlockID;
+
+    errs() << " NumWords=" << NumWords
+           << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
   }
-  
+
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this block.
@@ -341,7 +358,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       return Error("Premature end of bitstream");
 
     uint64_t RecordStartBit = Stream.GetCurrentBitNo();
-    
+
     // Read the code for this record.
     unsigned AbbrevID = Stream.ReadCode();
     switch (AbbrevID) {
@@ -351,21 +368,21 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
       if (Dump) {
-        std::cerr << Indent << "</";
+        errs() << Indent << "</";
         if (BlockName)
-          std::cerr << BlockName << ">\n";
+          errs() << BlockName << ">\n";
         else
-          std::cerr << "UnknownBlock" << BlockID << ">\n";
+          errs() << "UnknownBlock" << BlockID << ">\n";
       }
       return false;
-    } 
+    }
     case bitc::ENTER_SUBBLOCK: {
       uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
       if (ParseBlock(Stream, IndentLevel+1))
         return true;
       ++BlockStats.NumSubBlocks;
       uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
-      
+
       // Don't include subblock sizes in the size of this block.
       BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
       break;
@@ -380,13 +397,13 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       ++BlockStats.NumRecords;
       if (AbbrevID != bitc::UNABBREV_RECORD)
         ++BlockStats.NumAbbreviatedRecords;
-        
+
       const char *BlobStart = 0;
       unsigned BlobLen = 0;
       unsigned Code = Stream.ReadRecord(AbbrevID, Record, BlobStart, BlobLen);
 
-        
-        
+
+
       // Increment the # occurrences of this code.
       if (BlockStats.CodeFreq.size() <= Code)
         BlockStats.CodeFreq.resize(Code+1);
@@ -395,43 +412,43 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
         Stream.GetCurrentBitNo()-RecordStartBit;
       if (AbbrevID != bitc::UNABBREV_RECORD)
         BlockStats.CodeFreq[Code].NumAbbrev++;
-        
+
       if (Dump) {
-        std::cerr << Indent << "  <";
+        errs() << Indent << "  <";
         if (const char *CodeName =
               GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          std::cerr << CodeName;
+          errs() << CodeName;
         else
-          std::cerr << "UnknownCode" << Code;
+          errs() << "UnknownCode" << Code;
         if (NonSymbolic &&
             GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          std::cerr << " codeid=" << Code;
+          errs() << " codeid=" << Code;
         if (AbbrevID != bitc::UNABBREV_RECORD)
-          std::cerr << " abbrevid=" << AbbrevID;
+          errs() << " abbrevid=" << AbbrevID;
 
         for (unsigned i = 0, e = Record.size(); i != e; ++i)
-          std::cerr << " op" << i << "=" << (int64_t)Record[i];
-        
-        std::cerr << "/>";
-        
+          errs() << " op" << i << "=" << (int64_t)Record[i];
+
+        errs() << "/>";
+
         if (BlobStart) {
-          std::cerr << " blob data = ";
+          errs() << " blob data = ";
           bool BlobIsPrintable = true;
           for (unsigned i = 0; i != BlobLen; ++i)
             if (!isprint(BlobStart[i])) {
               BlobIsPrintable = false;
               break;
             }
-          
+
           if (BlobIsPrintable)
-            std::cerr << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
+            errs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
           else
-            std::cerr << "unprintable, " << BlobLen << " bytes.";
+            errs() << "unprintable, " << BlobLen << " bytes.";
         }
-        
-        std::cerr << "\n";
+
+        errs() << "\n";
       }
-      
+
       break;
     }
   }
@@ -453,23 +470,23 @@ static int AnalyzeBitcode() {
 
   if (MemBuf == 0)
     return Error("Error reading '" + InputFilename + "'.");
-  
+
   if (MemBuf->getBufferSize() & 3)
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
-  
+
   unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart();
   unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
-  
+
   // If we have a wrapper header, parse it and ignore the non-bc file contents.
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, EndBufPtr))
     if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr))
       return Error("Invalid bitcode wrapper header");
-  
+
   BitstreamReader StreamFile(BufPtr, EndBufPtr);
   BitstreamCursor Stream(StreamFile);
   StreamFile.CollectBlockInfoNames();
-  
+
   // Read the stream signature.
   char Signature[6];
   Signature[0] = Stream.Read(8);
@@ -478,7 +495,7 @@ static int AnalyzeBitcode() {
   Signature[3] = Stream.Read(4);
   Signature[4] = Stream.Read(4);
   Signature[5] = Stream.Read(4);
-  
+
   // Autodetect the file contents, if it is one we know.
   CurStreamType = UnknownBitstream;
   if (Signature[0] == 'B' && Signature[1] == 'C' &&
@@ -487,71 +504,72 @@ static int AnalyzeBitcode() {
     CurStreamType = LLVMIRBitstream;
 
   unsigned NumTopBlocks = 0;
-  
+
   // Parse the top-level structure.  We only allow blocks at the top-level.
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
     if (Code != bitc::ENTER_SUBBLOCK)
       return Error("Invalid record at top-level");
-    
+
     if (ParseBlock(Stream, 0))
       return true;
     ++NumTopBlocks;
   }
-  
-  if (Dump) std::cerr << "\n\n";
-  
+
+  if (Dump) errs() << "\n\n";
+
   uint64_t BufferSizeBits = (EndBufPtr-BufPtr)*CHAR_BIT;
   // Print a summary of the read file.
-  std::cerr << "Summary of " << InputFilename << ":\n";
-  std::cerr << "         Total size: ";
+  errs() << "Summary of " << InputFilename << ":\n";
+  errs() << "         Total size: ";
   PrintSize(BufferSizeBits);
-  std::cerr << "\n";
-  std::cerr << "        Stream type: ";
+  errs() << "\n";
+  errs() << "        Stream type: ";
   switch (CurStreamType) {
   default: assert(0 && "Unknown bitstream type");
-  case UnknownBitstream: std::cerr << "unknown\n"; break;
-  case LLVMIRBitstream:  std::cerr << "LLVM IR\n"; break;
+  case UnknownBitstream: errs() << "unknown\n"; break;
+  case LLVMIRBitstream:  errs() << "LLVM IR\n"; break;
   }
-  std::cerr << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
-  std::cerr << "\n";
+  errs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  errs() << "\n";
 
   // Emit per-block stats.
-  std::cerr << "Per-block Summary:\n";
+  errs() << "Per-block Summary:\n";
   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
        E = BlockIDStats.end(); I != E; ++I) {
-    std::cerr << "  Block ID #" << I->first;
+    errs() << "  Block ID #" << I->first;
     if (const char *BlockName = GetBlockName(I->first, StreamFile))
-      std::cerr << " (" << BlockName << ")";
-    std::cerr << ":\n";
-    
+      errs() << " (" << BlockName << ")";
+    errs() << ":\n";
+
     const PerBlockIDStats &Stats = I->second;
-    std::cerr << "      Num Instances: " << Stats.NumInstances << "\n";
-    std::cerr << "         Total Size: ";
+    errs() << "      Num Instances: " << Stats.NumInstances << "\n";
+    errs() << "         Total Size: ";
     PrintSize(Stats.NumBits);
-    std::cerr << "\n";
-    std::cerr << "          % of file: "
-              << Stats.NumBits/(double)BufferSizeBits*100 << "\n";
+    errs() << "\n";
+    double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
+    errs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
     if (Stats.NumInstances > 1) {
-      std::cerr << "       Average Size: ";
+      errs() << "       Average Size: ";
       PrintSize(Stats.NumBits/(double)Stats.NumInstances);
-      std::cerr << "\n";
-      std::cerr << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
-                << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
-      std::cerr << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
-                << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
-      std::cerr << "    Tot/Avg Records: " << Stats.NumRecords << "/"
-                << Stats.NumRecords/(double)Stats.NumInstances << "\n";
+      errs() << "\n";
+      errs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+             << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
+      errs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+             << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
+      errs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+             << Stats.NumRecords/(double)Stats.NumInstances << "\n";
     } else {
-      std::cerr << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
-      std::cerr << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
-      std::cerr << "        Num Records: " << Stats.NumRecords << "\n";
+      errs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      errs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      errs() << "        Num Records: " << Stats.NumRecords << "\n";
+    }
+    if (Stats.NumRecords) {
+      double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
+      errs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
     }
-    if (Stats.NumRecords)
-      std::cerr << "      % Abbrev Recs: " << (Stats.NumAbbreviatedRecords/
-                   (double)Stats.NumRecords)*100 << "\n";
-    std::cerr << "\n";
-    
+    errs() << "\n";
+
     // Print a histogram of the codes we see.
     if (!NoHistogram && !Stats.CodeFreq.empty()) {
       std::vector<std::pair<unsigned, unsigned> > FreqPairs;  // <freq,code>
@@ -560,29 +578,29 @@ static int AnalyzeBitcode() {
           FreqPairs.push_back(std::make_pair(Freq, i));
       std::stable_sort(FreqPairs.begin(), FreqPairs.end());
       std::reverse(FreqPairs.begin(), FreqPairs.end());
-      
-      std::cerr << "\tRecord Histogram:\n";
+
+      errs() << "\tRecord Histogram:\n";
       fprintf(stderr, "\t\t  Count    # Bits   %% Abv  Record Kind\n");
       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
         const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
-        
+
         fprintf(stderr, "\t\t%7d %9llu ", RecStats.NumInstances,
                 (unsigned long long)RecStats.TotalBits);
-        
+
         if (RecStats.NumAbbrev)
           fprintf(stderr, "%7.2f  ",
                   (double)RecStats.NumAbbrev/RecStats.NumInstances*100);
         else
           fprintf(stderr, "         ");
-        
-        if (const char *CodeName = 
+
+        if (const char *CodeName =
               GetCodeName(FreqPairs[i].second, I->first, StreamFile))
           fprintf(stderr, "%s\n", CodeName);
         else
           fprintf(stderr, "UnknownCode%d\n", FreqPairs[i].second);
       }
-      std::cerr << "\n";
-      
+      errs() << "\n";
+
     }
   }
   return 0;
@@ -595,6 +613,6 @@ int main(int argc, char **argv) {
   PrettyStackTraceProgram X(argc, argv);
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
-  
+
   return AnalyzeBitcode();
 }
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index 2cfd6bb22a0f..7638f3c629f8 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -1,5 +1,6 @@
 include(TestBigEndian)
 
+include(FindPerl)
 if( NOT PERL_FOUND )
   message(FATAL_ERROR "Perl required but not found!")
 endif( NOT PERL_FOUND )
@@ -7,6 +8,8 @@ endif( NOT PERL_FOUND )
 set(PERL ${PERL_EXECUTABLE})
 set(VERSION PACKAGE_VERSION)
 set(PREFIX ${LLVM_BINARY_DIR}) # TODO: Root for `make install'.
+set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
+set(abs_top_builddir ${LLVM_BINARY_DIR})
 execute_process(COMMAND date
   OUTPUT_VARIABLE LLVM_CONFIGTIME
   OUTPUT_STRIP_TRAILING_WHITESPACE)
@@ -26,7 +29,7 @@ endif( IS_BIG_ENDIAN )
 set(SHLIBEXT ${LTDL_SHLIB_EXT})
 #EXEEXT already set.
 set(OS "${CMAKE_SYSTEM}")
-set(ARCH "X86") # TODO: This gives "i686" in Linux: "${CMAKE_SYSTEM_PROCESSOR}")
+set(ARCH "${LLVM_NATIVE_ARCH}")
 
 get_system_libs(LLVM_SYSTEM_LIBS_LIST)
 foreach(l ${LLVM_SYSTEM_LIBS_LIST})
@@ -103,7 +106,6 @@ add_custom_command(OUTPUT ${LLVM_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E remove -f temp.sed
   COMMAND cat ${FINAL_LIBDEPS} >> ${LLVM_CONFIG}
   COMMAND chmod +x ${LLVM_CONFIG}
-  COMMAND cd ${CMAKE_BINARY_DIR} && ${CMAKE_COMMAND} -U HAVE_LLVM_CONFIG -D LLVM_BINARY_DIR="${LLVM_BINARY_DIR}" ${CMAKE_SOURCE_DIR}
   DEPENDS ${FINAL_LIBDEPS} ${LLVM_CONFIG_IN}
   COMMENT "Building llvm-config script."
   )
@@ -113,7 +115,37 @@ add_custom_target(llvm-config.target ALL
 
 add_dependencies(llvm-config.target ${llvm_lib_targets})
 
+# Make sure that llvm-config builds before the llvm tools, so we have
+# LibDeps.txt and can use it for updating the hard-coded library
+# dependencies on cmake/modules/LLVMLibDeps.cmake when the tools'
+# build fail due to outdated dependencies:
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} llvm-config.target)
+
 install(FILES ${LLVM_CONFIG}
   PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
   WORLD_READ WORLD_EXECUTE
   DESTINATION bin)
+
+
+# Regeneration of library dependencies.
+
+# See the comments at the end of cmake/modules/LLVMConfig.cmake for
+# notes and guidelines.
+
+set(LLVMLibDeps ${LLVM_MAIN_SRC_DIR}/cmake/modules/LLVMLibDeps.cmake)
+set(LLVMLibDeps_TMP ${CMAKE_CURRENT_BINARY_DIR}/LLVMLibDeps.cmake.tmp)
+
+add_custom_command(OUTPUT ${LLVMLibDeps_TMP}
+  COMMAND sed -e s'@\\.a@@g' -e s'@\\.so@@g' -e 's@libLLVM@LLVM@g' -e 's@: @ @' -e 's@\\\(.*\\\)@set\(MSVC_LIB_DEPS_\\1\)@' ${FINAL_LIBDEPS} > ${LLVMLibDeps_TMP}
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LLVMLibDeps_TMP} ${LLVMLibDeps}
+  DEPENDS ${FINAL_LIBDEPS}
+  COMMENT "Updating cmake library dependencies file ${LLVMLibDeps}"
+  )
+
+if( LLVM_TARGETS_TO_BUILD STREQUAL LLVM_ALL_TARGETS )
+  add_custom_target(llvmlibdeps.target ALL DEPENDS ${LLVMLibDeps_TMP})
+  add_dependencies(llvmlibdeps.target llvm-config.target)
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} llvmlibdeps.target)
+endif()
+
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/llvm-config/llvm-config.in.in b/tools/llvm-config/llvm-config.in.in
index 36b5112bb67b..7f93f168dfd8 100644
--- a/tools/llvm-config/llvm-config.in.in
+++ b/tools/llvm-config/llvm-config.in.in
@@ -26,17 +26,6 @@ my $PREFIX              = q{@LLVM_PREFIX@};
 my $LLVM_CONFIGTIME     = q{@LLVM_CONFIGTIME@};
 my $LLVM_SRC_ROOT       = q{@abs_top_srcdir@};
 my $LLVM_OBJ_ROOT       = q{@abs_top_builddir@};
-my $LLVM_ON_WIN32       = q{@LLVM_ON_WIN32@};
-my $LLVM_ON_UNIX        = q{@LLVM_ON_UNIX@};
-my $LLVMGCCDIR          = q{@LLVMGCCDIR@};
-my $LLVMGCC             = q{@LLVMGCC@};
-my $LLVMGXX             = q{@LLVMGXX@};
-my $LLVMGCC_VERSION     = q{@LLVMGCC_VERSION@};
-my $LLVMGCC_MAJVERS     = q{@LLVMGCC_MAJVERS@};
-my $ENDIAN              = q{@ENDIAN@};
-my $SHLIBEXT            = q{@SHLIBEXT@};
-my $EXEEXT              = q{@EXEEXT@};
-my $OS                  = q{@OS@};
 my $ARCH                = lc(q{@ARCH@});
 my $TARGET_TRIPLE       = q{@target@};
 my $TARGETS_TO_BUILD    = q{@TARGETS_TO_BUILD@};
@@ -81,12 +70,15 @@ $ABS_OBJ_ROOT = `cd $ABS_OBJ_ROOT; $PWD` if (-d $ABS_OBJ_ROOT);
 chomp($ABS_OBJ_ROOT);
 
 my $INCLUDEDIR = "$ABS_RUN_DIR/include";
+my $INCLUDEOPTION = "-I$INCLUDEDIR";
 my $LIBDIR     = "$ABS_RUN_DIR/lib";
 my $BINDIR     = "$ABS_RUN_DIR/bin";
 if ($ABS_RUN_DIR eq $ABS_OBJ_ROOT) {
   # If we are running out of the build directory, the include dir is in the
   # srcdir.
   $INCLUDEDIR = "$LLVM_SRC_ROOT/include";
+  # We need include files from both the srcdir and objdir.
+  $INCLUDEOPTION = "-I$INCLUDEDIR -I$LLVM_OBJ_ROOT/include"
 } else {
   # If installed, ignore the prefix the tree was configured with, use the
   # current prefix.
@@ -120,11 +112,11 @@ foreach my $arg (@ARGV) {
         } elsif ($arg eq "--libdir") {
             $has_opt = 1; print "$LIBDIR\n";
         } elsif ($arg eq "--cppflags") {
-            $has_opt = 1; print "-I$INCLUDEDIR $CPPFLAGS\n";
+            $has_opt = 1; print "$INCLUDEOPTION $CPPFLAGS\n";
         } elsif ($arg eq "--cflags") {
-            $has_opt = 1; print "-I$INCLUDEDIR $CFLAGS\n";
+            $has_opt = 1; print "$INCLUDEOPTION $CFLAGS\n";
         } elsif ($arg eq "--cxxflags") {
-            $has_opt = 1; print "-I$INCLUDEDIR $CXXFLAGS\n";
+            $has_opt = 1; print "$INCLUDEOPTION $CXXFLAGS\n";
         } elsif ($arg eq "--ldflags") {
             $has_opt = 1; print "-L$LIBDIR $LDFLAGS $SYSTEM_LIBS\n";
         } elsif ($arg eq "--libs") {
@@ -330,7 +322,16 @@ sub build_name_map {
     foreach my $target (@TARGETS_BUILT) {
         # FIXME: Temporary, until we don't switch all targets
         if (defined $NAME_MAP{$target.'asmprinter'}) {
-            $NAME_MAP{$target} = [$target.'asmprinter', $target.'codegen']
+            $NAME_MAP{$target} = [$target.'info',
+                                  $target.'asmprinter', 
+                                  $target.'codegen']
+        } else {
+            $NAME_MAP{$target} = [$target.'info',
+                                  $NAME_MAP{$target}[0]]
+        }
+
+        if (defined $NAME_MAP{$target.'asmparser'}) {
+            push @{$NAME_MAP{$target}},$target.'asmparser'
         }
     }
 
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 901c8e9d3a93..b8b1a39384cd 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -18,18 +18,13 @@
 
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
-#include <iostream>
-#include <fstream>
 #include <memory>
 using namespace llvm;
 
@@ -41,7 +36,7 @@ OutputFilename("o", cl::desc("Override output filename"),
                cl::value_desc("filename"));
 
 static cl::opt<bool>
-Force("f", cl::desc("Overwrite output files"));
+Force("f", cl::desc("Enable binary output on terminals"));
 
 static cl::opt<bool>
 DontPrint("disable-output", cl::desc("Don't output the .ll file"), cl::Hidden);
@@ -51,95 +46,66 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
   
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-  try {
-    cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
-
-    std::ostream *Out = &std::cout;  // Default to printing to stdout.
-    std::string ErrorMessage;
-
-    std::auto_ptr<Module> M;
-   
-    if (MemoryBuffer *Buffer
-           = MemoryBuffer::getFileOrSTDIN(InputFilename, &ErrorMessage)) {
-      M.reset(ParseBitcodeFile(Buffer, Context, &ErrorMessage));
-      delete Buffer;
-    }
+  
+  
+  cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
+
+  std::string ErrorMessage;
+  std::auto_ptr<Module> M;
+ 
+  if (MemoryBuffer *Buffer
+         = MemoryBuffer::getFileOrSTDIN(InputFilename, &ErrorMessage)) {
+    M.reset(ParseBitcodeFile(Buffer, Context, &ErrorMessage));
+    delete Buffer;
+  }
 
-    if (M.get() == 0) {
-      cerr << argv[0] << ": ";
-      if (ErrorMessage.size())
-        cerr << ErrorMessage << "\n";
-      else
-        cerr << "bitcode didn't read correctly.\n";
-      return 1;
-    }
-    
-    if (DontPrint) {
-      // Just use stdout.  We won't actually print anything on it.
-    } else if (OutputFilename != "") {   // Specified an output filename?
-      if (OutputFilename != "-") { // Not stdout?
-        if (!Force && std::ifstream(OutputFilename.c_str())) {
-          // If force is not specified, make sure not to overwrite a file!
-          cerr << argv[0] << ": error opening '" << OutputFilename
-               << "': file exists! Sending to standard output.\n";
-        } else {
-          Out = new std::ofstream(OutputFilename.c_str());
-        }
-      }
+  if (M.get() == 0) {
+    errs() << argv[0] << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "bitcode didn't read correctly.\n";
+    return 1;
+  }
+  
+  // Just use stdout.  We won't actually print anything on it.
+  if (DontPrint)
+    OutputFilename = "-";
+  
+  if (OutputFilename.empty()) { // Unspecified output, infer it.
+    if (InputFilename == "-") {
+      OutputFilename = "-";
     } else {
-      if (InputFilename == "-") {
-        OutputFilename = "-";
-      } else {
-        std::string IFN = InputFilename;
-        int Len = IFN.length();
-        if (IFN[Len-3] == '.' && IFN[Len-2] == 'b' && IFN[Len-1] == 'c') {
-          // Source ends in .bc
-          OutputFilename = std::string(IFN.begin(), IFN.end()-3)+".ll";
-        } else {
-          OutputFilename = IFN+".ll";
-        }
-
-        if (!Force && std::ifstream(OutputFilename.c_str())) {
-          // If force is not specified, make sure not to overwrite a file!
-          cerr << argv[0] << ": error opening '" << OutputFilename
-               << "': file exists! Sending to standard output.\n";
-        } else {
-          Out = new std::ofstream(OutputFilename.c_str());
-
-          // Make sure that the Out file gets unlinked from the disk if we get a
-          // SIGINT
-          sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-        }
-      }
-    }
-
-    if (!Out->good()) {
-      cerr << argv[0] << ": error opening " << OutputFilename
-           << ": sending to stdout instead!\n";
-      Out = &std::cout;
-    }
-
-    // All that llvm-dis does is write the assembly to a file.
-    if (!DontPrint) {
-      PassManager Passes;
-      raw_os_ostream L(*Out);
-      Passes.add(createPrintModulePass(&L));
-      Passes.run(*M.get());
+      const std::string &IFN = InputFilename;
+      int Len = IFN.length();
+      // If the source ends in .bc, strip it off.
+      if (IFN[Len-3] == '.' && IFN[Len-2] == 'b' && IFN[Len-1] == 'c')
+        OutputFilename = std::string(IFN.begin(), IFN.end()-3)+".ll";
+      else
+        OutputFilename = IFN+".ll";
     }
+  }
 
-    if (Out != &std::cout) {
-      ((std::ofstream*)Out)->close();
-      delete Out;
-    }
-    return 0;
-  } catch (const std::string& msg) {
-    cerr << argv[0] << ": " << msg << "\n";
-  } catch (...) {
-    cerr << argv[0] << ": Unexpected unknown exception occurred.\n";
+  // Make sure that the Out file gets unlinked from the disk if we get a
+  // SIGINT.
+  if (OutputFilename != "-")
+    sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+
+  std::string ErrorInfo;
+  std::auto_ptr<raw_fd_ostream> 
+  Out(new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
+                         raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
   }
 
-  return 1;
+  // All that llvm-dis does is write the assembly to a file.
+  if (!DontPrint)
+    *Out << *M;
+
+  return 0;
 }
 
diff --git a/tools/llvm-extract/CMakeLists.txt b/tools/llvm-extract/CMakeLists.txt
index 88e9343ffae0..a4e3266e3532 100644
--- a/tools/llvm-extract/CMakeLists.txt
+++ b/tools/llvm-extract/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ipo bitreader bitwriter)
+set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter)
 
 add_llvm_tool(llvm-extract
   llvm-extract.cpp
diff --git a/tools/llvm-extract/Makefile b/tools/llvm-extract/Makefile
index 2ef88415c6a6..5672aa3299a2 100644
--- a/tools/llvm-extract/Makefile
+++ b/tools/llvm-extract/Makefile
@@ -10,7 +10,7 @@
 LEVEL = ../..
 
 TOOLNAME = llvm-extract
-LINK_COMPONENTS := ipo bitreader bitwriter
+LINK_COMPONENTS := ipo bitreader bitwriter asmparser
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index af0cf0705bf9..517244f55ba4 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -15,17 +15,19 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
 #include "llvm/System/Signals.h"
-#include <iostream>
 #include <memory>
-#include <fstream>
 using namespace llvm;
 
 // InputFilename - The filename to read from.
@@ -38,7 +40,7 @@ OutputFilename("o", cl::desc("Specify output filename"),
                cl::value_desc("filename"), cl::init("-"));
 
 static cl::opt<bool>
-Force("f", cl::desc("Overwrite output files"));
+Force("f", cl::desc("Enable binary output on terminals"));
 
 static cl::opt<bool>
 DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
@@ -57,28 +59,25 @@ static cl::opt<std::string>
 ExtractGlobal("glob", cl::desc("Specify global to extract"), cl::init(""),
               cl::value_desc("global"));
 
+static cl::opt<bool>
+OutputAssembly("S",
+               cl::desc("Write output as LLVM assembly"), cl::Hidden);
+
 int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
 
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");
 
+  SMDiagnostic Err;
   std::auto_ptr<Module> M;
-  
-  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename);
-  if (Buffer == 0) {
-    cerr << argv[0] << ": Error reading file '" + InputFilename + "'\n";
-    return 1;
-  } else {
-    M.reset(ParseBitcodeFile(Buffer, Context));
-  }
-  delete Buffer;
-  
+  M.reset(ParseIRFile(InputFilename, Err, Context));
+
   if (M.get() == 0) {
-    cerr << argv[0] << ": bitcode didn't read correctly.\n";
+    Err.Print(argv[0], errs());
     return 1;
   }
 
@@ -91,8 +90,8 @@ int main(int argc, char **argv) {
   Function *F = M.get()->getFunction(ExtractFunc);
 
   if (F == 0 && G == 0) {
-    cerr << argv[0] << ": program doesn't contain function named '"
-         << ExtractFunc << "' or a global named '" << ExtractGlobal << "'!\n";
+    errs() << argv[0] << ": program doesn't contain function named '"
+           << ExtractFunc << "' or a global named '" << ExtractGlobal << "'!\n";
     return 1;
   }
 
@@ -111,28 +110,24 @@ int main(int argc, char **argv) {
   Passes.add(createDeadTypeEliminationPass());   // Remove dead types...
   Passes.add(createStripDeadPrototypesPass());   // Remove dead func decls
 
-  std::ostream *Out = 0;
-
-  if (OutputFilename != "-") {  // Not stdout?
-    if (!Force && std::ifstream(OutputFilename.c_str())) {
-      // If force is not specified, make sure not to overwrite a file!
-      cerr << argv[0] << ": error opening '" << OutputFilename
-           << "': file exists!\n"
-           << "Use -f command line argument to force output\n";
-      return 1;
-    }
-    std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                                 std::ios::binary;
-    Out = new std::ofstream(OutputFilename.c_str(), io_mode);
-  } else {                      // Specified stdout
-    // FIXME: cout is not binary!
-    Out = &std::cout;
+  // Make sure that the Output file gets unlinked from the disk if we get a
+  // SIGINT
+  sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+
+  std::string ErrorInfo;
+  raw_fd_ostream Out(OutputFilename.c_str(), ErrorInfo,
+                     raw_fd_ostream::F_Binary);
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
   }
 
-  Passes.add(CreateBitcodeWriterPass(*Out));
+  if (OutputAssembly)
+    Passes.add(createPrintModulePass(&Out));
+  else if (Force || !CheckBitcodeOutputToConsole(Out, true))
+    Passes.add(createBitcodeWriterPass(Out));
+
   Passes.run(*M.get());
 
-  if (Out != &std::cout)
-    delete Out;
   return 0;
 }
diff --git a/tools/llvm-ld/Optimize.cpp b/tools/llvm-ld/Optimize.cpp
index e4668958dbbe..6143dc87d356 100644
--- a/tools/llvm-ld/Optimize.cpp
+++ b/tools/llvm-ld/Optimize.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/StandardPasses.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -25,7 +26,6 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
-#include <iostream>
 using namespace llvm;
 
 // Pass Name Options as generated by the PassNameParser
@@ -109,8 +109,8 @@ void Optimize(Module* M) {
     if (Opt->getNormalCtor())
       addPass(Passes, Opt->getNormalCtor()());
     else
-      std::cerr << "llvm-ld: cannot create pass: " << Opt->getPassName() 
-                << "\n";
+      errs() << "llvm-ld: cannot create pass: " << Opt->getPassName() 
+             << "\n";
   }
 
   // The user's passes may leave cruft around. Clean up after them them but
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index 2b9d2550dc2a..ef3c250eab51 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -12,7 +12,7 @@
 // Additionally, this program outputs a shell script that is used to invoke LLI
 // to execute the program.  In this manner, the generated executable (a.out for
 // example), is directly executable, whereas the bitcode file actually lives in
-// the a.out.bc file generated by this program.  Also, Force is on by default.
+// the a.out.bc file generated by this program.
 //
 // Note that if someone (or a script) deletes the executable program generated,
 // the .bc file will be left around.  Considering that this is a temporary hack,
@@ -29,21 +29,24 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachineRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
 #include "llvm/Config/config.h"
-#include <fstream>
 #include <memory>
 #include <cstring>
 using namespace llvm;
 
+// Rightly this should go in a header file but it just seems such a waste.
+namespace llvm {
+extern void Optimize(Module*);
+}
+
 // Input/Output Options
 static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
   cl::desc("<input bitcode files>"));
@@ -52,6 +55,10 @@ static cl::opt<std::string> OutputFilename("o", cl::init("a.out"),
   cl::desc("Override output filename"),
   cl::value_desc("filename"));
 
+static cl::opt<std::string> BitcodeOutputFilename("b", cl::init(""),
+  cl::desc("Override bitcode output filename"),
+  cl::value_desc("filename"));
+
 static cl::opt<bool> Verbose("v",
   cl::desc("Print information about actions taken"));
 
@@ -124,7 +131,7 @@ static std::string progname;
 ///  Message  - The message to print to standard error.
 ///
 static void PrintAndExit(const std::string &Message, int errcode = 1) {
-  cerr << progname << ": " << Message << "\n";
+  errs() << progname << ": " << Message << "\n";
   llvm_shutdown();
   exit(errcode);
 }
@@ -133,8 +140,8 @@ static void PrintCommand(const std::vector<const char*> &args) {
   std::vector<const char*>::const_iterator I = args.begin(), E = args.end(); 
   for (; I != E; ++I)
     if (*I)
-      cout << "'" << *I << "'" << " ";
-  cout << "\n" << std::flush;
+      outs() << "'" << *I << "'" << " ";
+  outs() << "\n"; outs().flush();
 }
 
 /// CopyEnv - This function takes an array of environment variables and makes a
@@ -219,14 +226,14 @@ static void RemoveEnv(const char * name, char ** const envp) {
 void GenerateBitcode(Module* M, const std::string& FileName) {
 
   if (Verbose)
-    cout << "Generating Bitcode To " << FileName << '\n';
+    outs() << "Generating Bitcode To " << FileName << '\n';
 
   // Create the output file.
-  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                               std::ios::binary;
-  std::ofstream Out(FileName.c_str(), io_mode);
-  if (!Out.good())
-    PrintAndExit("error opening '" + FileName + "' for writing!");
+  std::string ErrorInfo;
+  raw_fd_ostream Out(FileName.c_str(), ErrorInfo,
+                     raw_fd_ostream::F_Binary);
+  if (!ErrorInfo.empty())
+    PrintAndExit(ErrorInfo);
 
   // Ensure that the bitcode file gets removed from the disk if we get a
   // terminating signal.
@@ -267,7 +274,7 @@ static int GenerateAssembly(const std::string &OutputFilename,
   args.push_back(0);
 
   if (Verbose) {
-    cout << "Generating Assembly With: \n";
+    outs() << "Generating Assembly With: \n";
     PrintCommand(args);
   }
 
@@ -290,7 +297,7 @@ static int GenerateCFile(const std::string &OutputFile,
   args.push_back(0);
 
   if (Verbose) {
-    cout << "Generating C Source With: \n";
+    outs() << "Generating C Source With: \n";
     PrintCommand(args);
   }
 
@@ -387,7 +394,7 @@ static int GenerateNative(const std::string &OutputFilename,
   Args.push_back(0);
 
   if (Verbose) {
-    cout << "Generating Native Executable With:\n";
+    outs() << "Generating Native Executable With:\n";
     PrintCommand(Args);
   }
 
@@ -402,13 +409,14 @@ static int GenerateNative(const std::string &OutputFilename,
 /// bitcode file for the program.
 static void EmitShellScript(char **argv) {
   if (Verbose)
-    cout << "Emitting Shell Script\n";
+    outs() << "Emitting Shell Script\n";
 #if defined(_WIN32) || defined(__CYGWIN__)
   // Windows doesn't support #!/bin/sh style shell scripts in .exe files.  To
   // support windows systems, we copy the llvm-stub.exe executable from the
   // build tree to the destination file.
   std::string ErrMsg;  
-  sys::Path llvmstub = FindExecutable("llvm-stub.exe", argv[0]);
+  sys::Path llvmstub = FindExecutable("llvm-stub.exe", argv[0],
+                                      (void *)(intptr_t)&Optimize);
   if (llvmstub.isEmpty())
     PrintAndExit("Could not find llvm-stub.exe executable!");
 
@@ -419,9 +427,10 @@ static void EmitShellScript(char **argv) {
 #endif
 
   // Output the script to start the program...
-  std::ofstream Out2(OutputFilename.c_str());
-  if (!Out2.good())
-    PrintAndExit("error opening '" + OutputFilename + "' for writing!");
+  std::string ErrorInfo;
+  raw_fd_ostream Out2(OutputFilename.c_str(), ErrorInfo);
+  if (!ErrorInfo.empty())
+    PrintAndExit(ErrorInfo);
 
   Out2 << "#!/bin/sh\n";
   // Allow user to setenv LLVMINTERP if lli is not in their PATH.
@@ -457,9 +466,9 @@ static void EmitShellScript(char **argv) {
     if (FullLibraryPath.isEmpty())
       FullLibraryPath = sys::Path::FindLibrary(*i);
     if (!FullLibraryPath.isEmpty())
-      Out2 << "    -load=" << FullLibraryPath.toString() << " \\\n";
+      Out2 << "    -load=" << FullLibraryPath.str() << " \\\n";
   }
-  Out2 << "    $0.bc ${1+\"$@\"}\n";
+  Out2 << "    "  << BitcodeOutputFilename << " ${1+\"$@\"}\n";
   Out2.close();
 }
 
@@ -497,17 +506,12 @@ static void BuildLinkItems(
   }
 }
 
-// Rightly this should go in a header file but it just seems such a waste.
-namespace llvm {
-extern void Optimize(Module*);
-}
-
 int main(int argc, char **argv, char **envp) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
 
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   try {
     // Initial global variable above for convenience printing of program name.
@@ -568,16 +572,20 @@ int main(int argc, char **argv, char **envp) {
       sys::Path ExeFile( OutputFilename );
       if (ExeFile.getSuffix() == "") {
         ExeFile.appendSuffix("exe");
-        OutputFilename = ExeFile.toString();
+        OutputFilename = ExeFile.str();
       }
     }
 #endif
 
     // Generate the bitcode for the optimized module.
-    std::string RealBitcodeOutput = OutputFilename;
+    // If -b wasn't specified, use the name specified
+    // with -o to construct BitcodeOutputFilename.
+    if (BitcodeOutputFilename.empty()) {
+      BitcodeOutputFilename = OutputFilename;
+      if (!LinkAsLibrary) BitcodeOutputFilename += ".bc";
+    }
 
-    if (!LinkAsLibrary) RealBitcodeOutput += ".bc";
-    GenerateBitcode(Composite.get(), RealBitcodeOutput);
+    GenerateBitcode(Composite.get(), BitcodeOutputFilename);
 
     // If we are not linking a library, generate either a native executable
     // or a JIT shell script, depending upon what the user wants.
@@ -602,12 +610,12 @@ int main(int argc, char **argv, char **envp) {
 
           const char* args[4];
           args[0] = I->c_str();
-          args[1] = RealBitcodeOutput.c_str();
+          args[1] = BitcodeOutputFilename.c_str();
           args[2] = tmp_output.c_str();
           args[3] = 0;
           if (0 == sys::Program::ExecuteAndWait(prog, args, 0,0,0,0, &ErrMsg)) {
             if (tmp_output.isBitcodeFile() || tmp_output.isBitcodeFile()) {
-              sys::Path target(RealBitcodeOutput);
+              sys::Path target(BitcodeOutputFilename);
               target.eraseFromDisk();
               if (tmp_output.renamePathOnDisk(target, &ErrMsg))
                 PrintAndExit(ErrMsg, 2);
@@ -633,21 +641,22 @@ int main(int argc, char **argv, char **envp) {
         sys::RemoveFileOnSignal(sys::Path(OutputFilename));
 
         // Determine the locations of the llc and gcc programs.
-        sys::Path llc = FindExecutable("llc", argv[0]);
+        sys::Path llc = FindExecutable("llc", argv[0],
+                                       (void *)(intptr_t)&Optimize);
         if (llc.isEmpty())
           PrintAndExit("Failed to find llc");
 
-        sys::Path gcc = FindExecutable("gcc", argv[0]);
+        sys::Path gcc = sys::Program::FindProgramByName("gcc");
         if (gcc.isEmpty())
           PrintAndExit("Failed to find gcc");
 
         // Generate an assembly language file for the bitcode.
         std::string ErrMsg;
-        if (0 != GenerateAssembly(AssemblyFile.toString(), RealBitcodeOutput,
+        if (0 != GenerateAssembly(AssemblyFile.str(), BitcodeOutputFilename,
             llc, ErrMsg))
           PrintAndExit(ErrMsg);
 
-        if (0 != GenerateNative(OutputFilename, AssemblyFile.toString(),
+        if (0 != GenerateNative(OutputFilename, AssemblyFile.str(),
                                 NativeLinkItems, gcc, envp, ErrMsg))
           PrintAndExit(ErrMsg);
 
@@ -662,22 +671,22 @@ int main(int argc, char **argv, char **envp) {
         sys::RemoveFileOnSignal(sys::Path(OutputFilename));
 
         // Determine the locations of the llc and gcc programs.
-        sys::Path llc = FindExecutable("llc", argv[0]);
+        sys::Path llc = FindExecutable("llc", argv[0],
+                                       (void *)(intptr_t)&Optimize);
         if (llc.isEmpty())
           PrintAndExit("Failed to find llc");
 
-        sys::Path gcc = FindExecutable("gcc", argv[0]);
+        sys::Path gcc = sys::Program::FindProgramByName("gcc");
         if (gcc.isEmpty())
           PrintAndExit("Failed to find gcc");
 
         // Generate an assembly language file for the bitcode.
         std::string ErrMsg;
-        if (0 != GenerateCFile(
-            CFile.toString(), RealBitcodeOutput, llc, ErrMsg))
+        if (GenerateCFile(CFile.str(), BitcodeOutputFilename, llc, ErrMsg))
           PrintAndExit(ErrMsg);
 
-        if (0 != GenerateNative(OutputFilename, CFile.toString(), 
-                                NativeLinkItems, gcc, envp, ErrMsg))
+        if (GenerateNative(OutputFilename, CFile.str(), 
+                           NativeLinkItems, gcc, envp, ErrMsg))
           PrintAndExit(ErrMsg);
 
         // Remove the assembly language file.
@@ -693,10 +702,10 @@ int main(int argc, char **argv, char **envp) {
         PrintAndExit(ErrMsg);
 
       // Make the bitcode file readable and directly executable in LLEE as well
-      if (sys::Path(RealBitcodeOutput).makeExecutableOnDisk(&ErrMsg))
+      if (sys::Path(BitcodeOutputFilename).makeExecutableOnDisk(&ErrMsg))
         PrintAndExit(ErrMsg);
 
-      if (sys::Path(RealBitcodeOutput).makeReadableOnDisk(&ErrMsg))
+      if (sys::Path(BitcodeOutputFilename).makeReadableOnDisk(&ErrMsg))
         PrintAndExit(ErrMsg);
     }
   } catch (const std::string& msg) {
diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt
index 69a435e5300a..11933f7f959e 100644
--- a/tools/llvm-link/CMakeLists.txt
+++ b/tools/llvm-link/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS linker bitreader bitwriter)
+set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser)
 
 add_llvm_tool(llvm-link
   llvm-link.cpp
diff --git a/tools/llvm-link/Makefile b/tools/llvm-link/Makefile
index ddc7a59b32a0..26370187c55f 100644
--- a/tools/llvm-link/Makefile
+++ b/tools/llvm-link/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ../..
 
 TOOLNAME = llvm-link
-LINK_COMPONENTS = linker bitreader bitwriter
+LINK_COMPONENTS = linker bitreader bitwriter asmparser
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index f65e602f27f9..fae4d107b1a2 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -21,11 +21,11 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/System/Signals.h"
 #include "llvm/System/Path.h"
-#include <fstream>
-#include <iostream>
 #include <memory>
 using namespace llvm;
 
@@ -37,7 +37,12 @@ static cl::opt<std::string>
 OutputFilename("o", cl::desc("Override output filename"), cl::init("-"),
                cl::value_desc("filename"));
 
-static cl::opt<bool> Force("f", cl::desc("Overwrite output files"));
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
+static cl::opt<bool>
+OutputAssembly("S",
+         cl::desc("Write output as LLVM assembly"), cl::Hidden);
 
 static cl::opt<bool>
 Verbose("v", cl::desc("Print information about actions taken"));
@@ -48,34 +53,28 @@ DumpAsm("d", cl::desc("Print assembly as linked"), cl::Hidden);
 // LoadFile - Read the specified bitcode file in and return it.  This routine
 // searches the link path for the specified file to try to find it...
 //
-static inline std::auto_ptr<Module> LoadFile(const std::string &FN, 
+static inline std::auto_ptr<Module> LoadFile(const char *argv0,
+                                             const std::string &FN, 
                                              LLVMContext& Context) {
   sys::Path Filename;
   if (!Filename.set(FN)) {
-    cerr << "Invalid file name: '" << FN << "'\n";
+    errs() << "Invalid file name: '" << FN << "'\n";
     return std::auto_ptr<Module>();
   }
 
-  std::string ErrorMessage;
+  SMDiagnostic Err;
   if (Filename.exists()) {
-    if (Verbose) cerr << "Loading '" << Filename.c_str() << "'\n";
+    if (Verbose) errs() << "Loading '" << Filename.c_str() << "'\n";
     Module* Result = 0;
     
-    const std::string &FNStr = Filename.toString();
-    if (MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(FNStr,
-                                                            &ErrorMessage)) {
-      Result = ParseBitcodeFile(Buffer, Context, &ErrorMessage);
-      delete Buffer;
-    }
+    const std::string &FNStr = Filename.str();
+    Result = ParseIRFile(FNStr, Err, Context);
     if (Result) return std::auto_ptr<Module>(Result);   // Load successful!
 
-    if (Verbose) {
-      cerr << "Error opening bitcode file: '" << Filename.c_str() << "'";
-      if (ErrorMessage.size()) cerr << ": " << ErrorMessage;
-      cerr << "\n";
-    }
+    if (Verbose)
+      Err.Print(argv0, errs());
   } else {
-    cerr << "Bitcode file: '" << Filename.c_str() << "' does not exist.\n";
+    errs() << "Bitcode file: '" << Filename.c_str() << "' does not exist.\n";
   }
 
   return std::auto_ptr<Module>();
@@ -86,32 +85,34 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
   
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
 
   unsigned BaseArg = 0;
   std::string ErrorMessage;
 
-  std::auto_ptr<Module> Composite(LoadFile(InputFilenames[BaseArg], Context));
+  std::auto_ptr<Module> Composite(LoadFile(argv[0],
+                                           InputFilenames[BaseArg], Context));
   if (Composite.get() == 0) {
-    cerr << argv[0] << ": error loading file '"
-         << InputFilenames[BaseArg] << "'\n";
+    errs() << argv[0] << ": error loading file '"
+           << InputFilenames[BaseArg] << "'\n";
     return 1;
   }
 
   for (unsigned i = BaseArg+1; i < InputFilenames.size(); ++i) {
-    std::auto_ptr<Module> M(LoadFile(InputFilenames[i], Context));
+    std::auto_ptr<Module> M(LoadFile(argv[0],
+                                     InputFilenames[i], Context));
     if (M.get() == 0) {
-      cerr << argv[0] << ": error loading file '" <<InputFilenames[i]<< "'\n";
+      errs() << argv[0] << ": error loading file '" <<InputFilenames[i]<< "'\n";
       return 1;
     }
 
-    if (Verbose) cerr << "Linking in '" << InputFilenames[i] << "'\n";
+    if (Verbose) errs() << "Linking in '" << InputFilenames[i] << "'\n";
 
     if (Linker::LinkModules(Composite.get(), M.get(), &ErrorMessage)) {
-      cerr << argv[0] << ": link error in '" << InputFilenames[i]
-           << "': " << ErrorMessage << "\n";
+      errs() << argv[0] << ": link error in '" << InputFilenames[i]
+             << "': " << ErrorMessage << "\n";
       return 1;
     }
   }
@@ -119,39 +120,32 @@ int main(int argc, char **argv) {
   // TODO: Iterate over the -l list and link in any modules containing
   // global symbols that have not been resolved so far.
 
-  if (DumpAsm) cerr << "Here's the assembly:\n" << *Composite.get();
+  if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
 
-  // FIXME: cout is not binary!
-  std::ostream *Out = &std::cout;  // Default to printing to stdout...
-  if (OutputFilename != "-") {
-    if (!Force && std::ifstream(OutputFilename.c_str())) {
-      // If force is not specified, make sure not to overwrite a file!
-      cerr << argv[0] << ": error opening '" << OutputFilename
-           << "': file exists!\n"
-           << "Use -f command line argument to force output\n";
-      return 1;
-    }
-    std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                                 std::ios::binary;
-    Out = new std::ofstream(OutputFilename.c_str(), io_mode);
-    if (!Out->good()) {
-      cerr << argv[0] << ": error opening '" << OutputFilename << "'!\n";
-      return 1;
-    }
+  std::string ErrorInfo;
+  std::auto_ptr<raw_ostream> 
+  Out(new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
+                         raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
 
     // Make sure that the Out file gets unlinked from the disk if we get a
     // SIGINT
+  if (OutputFilename != "-")
     sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-  }
 
-  if (verifyModule(*Composite.get())) {
-    cerr << argv[0] << ": linked module is broken!\n";
+  if (verifyModule(*Composite)) {
+    errs() << argv[0] << ": linked module is broken!\n";
     return 1;
   }
 
-  if (Verbose) cerr << "Writing bitcode...\n";
-  WriteBitcodeToFile(Composite.get(), *Out);
+  if (Verbose) errs() << "Writing bitcode...\n";
+  if (OutputAssembly) {
+    *Out << *Composite;
+  } else if (Force || !CheckBitcodeOutputToConsole(*Out, true))
+    WriteBitcodeToFile(Composite.get(), *Out);
 
-  if (Out != &std::cout) delete Out;
   return 0;
 }
diff --git a/tools/llvm-mc/AsmCond.h b/tools/llvm-mc/AsmCond.h
new file mode 100644
index 000000000000..92a115eb8038
--- /dev/null
+++ b/tools/llvm-mc/AsmCond.h
@@ -0,0 +1,40 @@
+//===- AsmCond.h - Assembly file conditional assembly  ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMCOND_H
+#define ASMCOND_H
+
+namespace llvm {
+
+/// AsmCond - Class to support conditional assembly
+///
+/// The conditional assembly feature (.if, .else, .elseif and .endif) is
+/// implemented with AsmCond that tells us what we are in the middle of 
+/// processing.  Ignore can be either true or false.  When true we are ignoring
+/// the block of code in the middle of a conditional.
+
+class AsmCond {
+public:
+  enum ConditionalAssemblyType {
+    NoCond,     // no conditional is being processed
+    IfCond,     // inside if conditional
+    ElseIfCond, // inside elseif conditional
+    ElseCond    // inside else conditional
+  };
+
+  ConditionalAssemblyType TheCond;
+  bool CondMet;
+  bool Ignore;
+
+  AsmCond() : TheCond(NoCond), CondMet(false), Ignore(false) {}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
index 7b744fbde65a..99055c6855ed 100644
--- a/tools/llvm-mc/AsmLexer.cpp
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -12,30 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "AsmLexer.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Config/config.h"  // for strtoull.
+#include "llvm/MC/MCAsmInfo.h"
 #include <cerrno>
 #include <cstdio>
 #include <cstdlib>
 using namespace llvm;
 
-static StringSet<> &getSS(void *TheSS) {
-  return *(StringSet<>*)TheSS;
-}
-
-AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
+AsmLexer::AsmLexer(SourceMgr &SM, const MCAsmInfo &_MAI) : SrcMgr(SM),
+                                                           MAI(_MAI)  {
   CurBuffer = 0;
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
   CurPtr = CurBuf->getBufferStart();
   TokStart = 0;
-  
-  TheStringSet = new StringSet<>();
 }
 
 AsmLexer::~AsmLexer() {
-  delete &getSS(TheStringSet);
 }
 
 SMLoc AsmLexer::getLoc() const {
@@ -48,12 +42,27 @@ void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
 }
 
 /// ReturnError - Set the error to the specified string at the specified
-/// location.  This is defined to always return asmtok::Error.
-asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+/// location.  This is defined to always return AsmToken::Error.
+AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
   SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
-  return asmtok::Error;
+  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
+}
+
+/// EnterIncludeFile - Enter the specified file.  This prints an error and
+/// returns true on failure.
+bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
+  int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
+  if (NewBuf == -1)
+    return true;
+  
+  // Save the line number and lex buffer of the includer.
+  CurBuffer = NewBuf;
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+  CurPtr = CurBuf->getBufferStart();
+  return false;
 }
 
+
 int AsmLexer::getNextChar() {
   char CurChar = *CurPtr++;
   switch (CurChar) {
@@ -72,6 +81,10 @@ int AsmLexer::getNextChar() {
       CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
       CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
       CurPtr = ParentIncludeLoc.getPointer();
+      
+      // Reset the token start pointer to the start of the new file.
+      TokStart = CurPtr;
+      
       return getNextChar();
     }
     
@@ -83,37 +96,20 @@ int AsmLexer::getNextChar() {
 }
 
 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
-asmtok::TokKind AsmLexer::LexIdentifier() {
+AsmToken AsmLexer::LexIdentifier() {
   while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
          *CurPtr == '.' || *CurPtr == '@')
     ++CurPtr;
-  // Unique string.
-  CurStrVal =
-    getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
-  return asmtok::Identifier;
-}
-
-/// LexPercent: Register: %[a-zA-Z0-9]+
-asmtok::TokKind AsmLexer::LexPercent() {
-  if (!isalnum(*CurPtr))
-    return asmtok::Percent;  // Single %.
-  
-  while (isalnum(*CurPtr))
-    ++CurPtr;
-  
-  // Unique string.
-  CurStrVal =
-    getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
-  return asmtok::Register;
+  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 }
 
 /// LexSlash: Slash: /
 ///           C-Style Comment: /* ... */
-asmtok::TokKind AsmLexer::LexSlash() {
+AsmToken AsmLexer::LexSlash() {
   switch (*CurPtr) {
   case '*': break; // C style comment.
   case '/': return ++CurPtr, LexLineComment();
-  default:  return asmtok::Slash;
+  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
   }
 
   // C Style comment.
@@ -135,14 +131,16 @@ asmtok::TokKind AsmLexer::LexSlash() {
 
 /// LexLineComment: Comment: #[^\n]*
 ///                        : //[^\n]*
-asmtok::TokKind AsmLexer::LexLineComment() {
+AsmToken AsmLexer::LexLineComment() {
+  // FIXME: This is broken if we happen to a comment at the end of a file, which
+  // was .included, and which doesn't end with a newline.
   int CurChar = getNextChar();
   while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
     CurChar = getNextChar();
   
   if (CurChar == EOF)
-    return asmtok::Eof;
-  return asmtok::EndOfStatement;
+    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
+  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
 }
 
 
@@ -154,7 +152,7 @@ asmtok::TokKind AsmLexer::LexLineComment() {
 ///   Hex integer: 0x[0-9a-fA-F]+
 ///   Decimal integer: [1-9][0-9]*
 /// TODO: FP literal.
-asmtok::TokKind AsmLexer::LexDigit() {
+AsmToken AsmLexer::LexDigit() {
   if (*CurPtr == ':')
     return ReturnError(TokStart, "FIXME: local label not implemented");
   if (*CurPtr == 'f' || *CurPtr == 'b')
@@ -164,8 +162,8 @@ asmtok::TokKind AsmLexer::LexDigit() {
   if (CurPtr[-1] != '0') {
     while (isdigit(*CurPtr))
       ++CurPtr;
-    CurIntVal = strtoll(TokStart, 0, 10);
-    return asmtok::IntVal;
+    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), 
+                    strtoll(TokStart, 0, 10));
   }
   
   if (*CurPtr == 'b') {
@@ -177,8 +175,8 @@ asmtok::TokKind AsmLexer::LexDigit() {
     // Requires at least one binary digit.
     if (CurPtr == NumStart)
       return ReturnError(CurPtr-2, "Invalid binary number");
-    CurIntVal = strtoll(NumStart, 0, 2);
-    return asmtok::IntVal;
+    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+                    strtoll(NumStart, 0, 2));
   }
  
   if (*CurPtr == 'x') {
@@ -192,29 +190,28 @@ asmtok::TokKind AsmLexer::LexDigit() {
       return ReturnError(CurPtr-2, "Invalid hexadecimal number");
     
     errno = 0;
-    CurIntVal = strtoll(NumStart, 0, 16);
     if (errno == EINVAL)
       return ReturnError(CurPtr-2, "Invalid hexadecimal number");
     if (errno == ERANGE) {
       errno = 0;
-      CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
       if (errno == EINVAL)
         return ReturnError(CurPtr-2, "Invalid hexadecimal number");
       if (errno == ERANGE)
         return ReturnError(CurPtr-2, "Hexadecimal number out of range");
     }
-    return asmtok::IntVal;
+    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+                    (int64_t) strtoull(NumStart, 0, 16));
   }
   
   // Must be an octal number, it starts with 0.
   while (*CurPtr >= '0' && *CurPtr <= '7')
     ++CurPtr;
-  CurIntVal = strtoll(TokStart, 0, 8);
-  return asmtok::IntVal;
+  return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+                  strtoll(TokStart, 0, 8));
 }
 
 /// LexQuote: String: "..."
-asmtok::TokKind AsmLexer::LexQuote() {
+AsmToken AsmLexer::LexQuote() {
   int CurChar = getNextChar();
   // TODO: does gas allow multiline string constants?
   while (CurChar != '"') {
@@ -229,18 +226,35 @@ asmtok::TokKind AsmLexer::LexQuote() {
     CurChar = getNextChar();
   }
   
-  // Unique string, include quotes for now.
-  CurStrVal =
-    getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
-  return asmtok::String;
+  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+}
+
+StringRef AsmLexer::LexUntilEndOfStatement() {
+  TokStart = CurPtr;
+
+  while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
+	  *CurPtr != ';' &&  // End of statement marker.
+         *CurPtr != '\n' &&
+         *CurPtr != '\r' &&
+         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+    ++CurPtr;
+  }
+  return StringRef(TokStart, CurPtr-TokStart);
 }
 
+bool AsmLexer::isAtStartOfComment(char Char) {
+  // FIXME: This won't work for multi-character comment indicators like "//".
+  return Char == *MAI.getCommentString();
+}
 
-asmtok::TokKind AsmLexer::LexToken() {
+AsmToken AsmLexer::LexToken() {
   TokStart = CurPtr;
   // This always consumes at least one character.
   int CurChar = getNextChar();
   
+  if (isAtStartOfComment(CurChar))
+    return LexLineComment();
+
   switch (CurChar) {
   default:
     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
@@ -249,7 +263,7 @@ asmtok::TokKind AsmLexer::LexToken() {
     
     // Unknown character, emit an error.
     return ReturnError(TokStart, "invalid character in input");
-  case EOF: return asmtok::Eof;
+  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
   case 0:
   case ' ':
   case '\t':
@@ -257,52 +271,61 @@ asmtok::TokKind AsmLexer::LexToken() {
     return LexToken();
   case '\n': // FALL THROUGH.
   case '\r': // FALL THROUGH.
-  case ';': return asmtok::EndOfStatement;
-  case ':': return asmtok::Colon;
-  case '+': return asmtok::Plus;
-  case '-': return asmtok::Minus;
-  case '~': return asmtok::Tilde;
-  case '(': return asmtok::LParen;
-  case ')': return asmtok::RParen;
-  case '*': return asmtok::Star;
-  case ',': return asmtok::Comma;
-  case '$': return asmtok::Dollar;
+  case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
+  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
+  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
+  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
+  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
+  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
+  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
+  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
+  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
+  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
+  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
+  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
+  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
   case '=': 
     if (*CurPtr == '=')
-      return ++CurPtr, asmtok::EqualEqual;
-    return asmtok::Equal;
+      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
   case '|': 
     if (*CurPtr == '|')
-      return ++CurPtr, asmtok::PipePipe;
-    return asmtok::Pipe;
-  case '^': return asmtok::Caret;
+      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
+  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
   case '&': 
     if (*CurPtr == '&')
-      return ++CurPtr, asmtok::AmpAmp;
-    return asmtok::Amp;
+      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
   case '!': 
     if (*CurPtr == '=')
-      return ++CurPtr, asmtok::ExclaimEqual;
-    return asmtok::Exclaim;
-  case '%': return LexPercent();
+      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
+  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
   case '/': return LexSlash();
-  case '#': return LexLineComment();
+  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
   case '"': return LexQuote();
   case '0': case '1': case '2': case '3': case '4':
   case '5': case '6': case '7': case '8': case '9':
     return LexDigit();
   case '<':
     switch (*CurPtr) {
-    case '<': return ++CurPtr, asmtok::LessLess;
-    case '=': return ++CurPtr, asmtok::LessEqual;
-    case '>': return ++CurPtr, asmtok::LessGreater;
-    default: return asmtok::Less;
+    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 
+                                        StringRef(TokStart, 2));
+    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 
+                                        StringRef(TokStart, 2));
+    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 
+                                        StringRef(TokStart, 2));
+    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
     }
   case '>':
     switch (*CurPtr) {
-    case '>': return ++CurPtr, asmtok::GreaterGreater;      
-    case '=': return ++CurPtr, asmtok::GreaterEqual;      
-    default: return asmtok::Greater;
+    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 
+                                        StringRef(TokStart, 2));
+    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 
+                                        StringRef(TokStart, 2));
+    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
     }
       
   // TODO: Quoted identifiers (objc methods etc)
diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h
index 6360b1280ce2..0696abc887f5 100644
--- a/tools/llvm-mc/AsmLexer.h
+++ b/tools/llvm-mc/AsmLexer.h
@@ -14,6 +14,9 @@
 #ifndef ASMLEXER_H
 #define ASMLEXER_H
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmLexer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/DataTypes.h"
 #include <string>
 #include <cassert>
@@ -22,95 +25,53 @@ namespace llvm {
 class MemoryBuffer;
 class SourceMgr;
 class SMLoc;
-
-namespace asmtok {
-  enum TokKind {
-    // Markers
-    Eof, Error,
-
-    // String values.
-    Identifier,
-    Register,
-    String,
-    
-    // Integer values.
-    IntVal,
-    
-    // No-value.
-    EndOfStatement,
-    Colon,
-    Plus, Minus, Tilde,
-    Slash,    // '/'
-    LParen, RParen,
-    Star, Comma, Dollar, Equal, EqualEqual,
-    
-    Pipe, PipePipe, Caret, 
-    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, 
-    Less, LessEqual, LessLess, LessGreater,
-    Greater, GreaterEqual, GreaterGreater
-  };
-}
+class MCAsmInfo;
 
 /// AsmLexer - Lexer class for assembly files.
-class AsmLexer {
+class AsmLexer : public MCAsmLexer {
   SourceMgr &SrcMgr;
+  const MCAsmInfo &MAI;
   
   const char *CurPtr;
   const MemoryBuffer *CurBuf;
-  // A llvm::StringSet<>, which provides uniqued and null-terminated strings.
-  void *TheStringSet;
   
-  // Information about the current token.
   const char *TokStart;
-  asmtok::TokKind CurKind;
-  const char *CurStrVal;  // This is valid for Identifier.
-  int64_t CurIntVal;
-  
-  /// CurBuffer - This is the current buffer index we're lexing from as managed
-  /// by the SourceMgr object.
+
+  /// This is the current buffer index we're lexing from as managed by the
+  /// SourceMgr object.
   int CurBuffer;
   
   void operator=(const AsmLexer&); // DO NOT IMPLEMENT
   AsmLexer(const AsmLexer&);       // DO NOT IMPLEMENT
+
+protected:
+  /// LexToken - Read the next token and return its code.
+  virtual AsmToken LexToken();
+
 public:
-  AsmLexer(SourceMgr &SrcMgr);
+  AsmLexer(SourceMgr &SrcMgr, const MCAsmInfo &MAI);
   ~AsmLexer();
   
-  asmtok::TokKind Lex() {
-    return CurKind = LexToken();
-  }
-  
-  asmtok::TokKind getKind() const { return CurKind; }
-  bool is(asmtok::TokKind K) const { return CurKind == K; }
-  bool isNot(asmtok::TokKind K) const { return CurKind != K; }
-  
-  const char *getCurStrVal() const {
-    assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register ||
-            CurKind == asmtok::String) &&
-           "This token doesn't have a string value");
-    return CurStrVal;
-  }
-  int64_t getCurIntVal() const {
-    assert(CurKind == asmtok::IntVal && "This token isn't an integer");
-    return CurIntVal;
-  }
-  
   SMLoc getLoc() const;
   
+  StringRef LexUntilEndOfStatement();
+
+  bool isAtStartOfComment(char Char);
+
+  /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+  bool EnterIncludeFile(const std::string &Filename);
+  
   void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
   
 private:
   int getNextChar();
-  asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg);
+  AsmToken ReturnError(const char *Loc, const std::string &Msg);
 
-  /// LexToken - Read the next token and return its code.
-  asmtok::TokKind LexToken();
-  asmtok::TokKind LexIdentifier();
-  asmtok::TokKind LexPercent();
-  asmtok::TokKind LexSlash();
-  asmtok::TokKind LexLineComment();
-  asmtok::TokKind LexDigit();
-  asmtok::TokKind LexQuote();
+  AsmToken LexIdentifier();
+  AsmToken LexSlash();
+  AsmToken LexLineComment();
+  AsmToken LexDigit();
+  AsmToken LexQuote();
 };
   
 } // end namespace llvm
diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp
index f5bf58920120..aae27f5d7649 100644
--- a/tools/llvm-mc/AsmParser.cpp
+++ b/tools/llvm-mc/AsmParser.cpp
@@ -13,21 +13,79 @@
 
 #include "AsmParser.h"
 
-#include "AsmExpr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
-void AsmParser::Warning(SMLoc L, const char *Msg) {
-  Lexer.PrintMessage(L, Msg, "warning");
+// Mach-O section uniquing.
+//
+// FIXME: Figure out where this should live, it should be shared by
+// TargetLoweringObjectFile.
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+
+AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
+                     const MCAsmInfo &_MAI) 
+  : Lexer(_SM, _MAI), Ctx(_Ctx), Out(_Out), TargetParser(0),
+    SectionUniquingMap(0) {
+  // Debugging directives.
+  AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile);
+  AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine);
+  AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc);
+}
+
+
+
+AsmParser::~AsmParser() {
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)SectionUniquingMap;
 }
 
-bool AsmParser::Error(SMLoc L, const char *Msg) {
-  Lexer.PrintMessage(L, Msg, "error");
+const MCSection *AsmParser::getMachOSection(const StringRef &Segment,
+                                            const StringRef &Section,
+                                            unsigned TypeAndAttributes,
+                                            unsigned Reserved2,
+                                            SectionKind Kind) const {
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+  
+  // Create the map if it doesn't already exist.
+  if (SectionUniquingMap == 0)
+    SectionUniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap;
+  
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+
+  // FIXME: This should validate the type and attributes.
+  if (Entry) return Entry;
+
+  // Otherwise, return a new section.
+  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
+                                        Reserved2, Kind, Ctx);
+}
+
+void AsmParser::Warning(SMLoc L, const Twine &Msg) {
+  Lexer.PrintMessage(L, Msg.str(), "warning");
+}
+
+bool AsmParser::Error(SMLoc L, const Twine &Msg) {
+  Lexer.PrintMessage(L, Msg.str(), "error");
   return true;
 }
 
@@ -37,31 +95,87 @@ bool AsmParser::TokError(const char *Msg) {
 }
 
 bool AsmParser::Run() {
+  // Create the initial section.
+  //
+  // FIXME: Support -n.
+  // FIXME: Target hook & command line option for initial section.
+  Out.SwitchSection(getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind()));
+
+
   // Prime the lexer.
   Lexer.Lex();
   
   bool HadError = false;
   
+  AsmCond StartingCondState = TheCondState;
+
   // While we have input, parse each statement.
-  while (Lexer.isNot(asmtok::Eof)) {
+  while (Lexer.isNot(AsmToken::Eof)) {
+    // Handle conditional assembly here before calling ParseStatement()
+    if (Lexer.getKind() == AsmToken::Identifier) {
+      // If we have an identifier, handle it as the key symbol.
+      AsmToken ID = Lexer.getTok();
+      SMLoc IDLoc = ID.getLoc();
+      StringRef IDVal = ID.getString();
+
+      if (IDVal == ".if" ||
+          IDVal == ".elseif" ||
+          IDVal == ".else" ||
+          IDVal == ".endif") {
+        if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc))
+          continue;
+	HadError = true;
+	EatToEndOfStatement();
+	continue;
+      }
+    }
+    if (TheCondState.Ignore) {
+      EatToEndOfStatement();
+      continue;
+    }
+
     if (!ParseStatement()) continue;
   
-    // If we had an error, remember it and recover by skipping to the next line.
+    // We had an error, remember it and recover by skipping to the next line.
     HadError = true;
     EatToEndOfStatement();
   }
+
+  if (TheCondState.TheCond != StartingCondState.TheCond ||
+      TheCondState.Ignore != StartingCondState.Ignore)
+    return TokError("unmatched .ifs or .elses");
   
+  if (!HadError)  
+    Out.Finish();
+
   return HadError;
 }
 
+/// ParseConditionalAssemblyDirectives - parse the conditional assembly
+/// directives
+bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive,
+                                                   SMLoc DirectiveLoc) {
+  if (Directive == ".if")
+    return ParseDirectiveIf(DirectiveLoc);
+  if (Directive == ".elseif")
+    return ParseDirectiveElseIf(DirectiveLoc);
+  if (Directive == ".else")
+    return ParseDirectiveElse(DirectiveLoc);
+  if (Directive == ".endif")
+    return ParseDirectiveEndIf(DirectiveLoc);
+  return true;
+}
+
 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
 void AsmParser::EatToEndOfStatement() {
-  while (Lexer.isNot(asmtok::EndOfStatement) &&
-         Lexer.isNot(asmtok::Eof))
+  while (Lexer.isNot(AsmToken::EndOfStatement) &&
+         Lexer.isNot(AsmToken::Eof))
     Lexer.Lex();
   
   // Eat EOL.
-  if (Lexer.is(asmtok::EndOfStatement))
+  if (Lexer.is(AsmToken::EndOfStatement))
     Lexer.Lex();
 }
 
@@ -71,66 +185,71 @@ void AsmParser::EatToEndOfStatement() {
 ///
 /// parenexpr ::= expr)
 ///
-bool AsmParser::ParseParenExpr(AsmExpr *&Res) {
+bool AsmParser::ParseParenExpr(const MCExpr *&Res) {
   if (ParseExpression(Res)) return true;
-  if (Lexer.isNot(asmtok::RParen))
+  if (Lexer.isNot(AsmToken::RParen))
     return TokError("expected ')' in parentheses expression");
   Lexer.Lex();
   return false;
 }
 
+MCSymbol *AsmParser::CreateSymbol(StringRef Name) {
+  if (MCSymbol *S = Ctx.LookupSymbol(Name))
+    return S;
+
+  // If the label starts with L it is an assembler temporary label.
+  if (Name.startswith("L"))
+    return Ctx.CreateTemporarySymbol(Name);
+
+  return Ctx.CreateSymbol(Name);
+}
+
 /// ParsePrimaryExpr - Parse a primary expression and return it.
 ///  primaryexpr ::= (parenexpr
 ///  primaryexpr ::= symbol
 ///  primaryexpr ::= number
 ///  primaryexpr ::= ~,+,- primaryexpr
-bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) {
+bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res) {
   switch (Lexer.getKind()) {
   default:
     return TokError("unknown token in expression");
-  case asmtok::Exclaim:
+  case AsmToken::Exclaim:
     Lexer.Lex(); // Eat the operator.
     if (ParsePrimaryExpr(Res))
       return true;
-    Res = new AsmUnaryExpr(AsmUnaryExpr::LNot, Res);
+    Res = MCUnaryExpr::CreateLNot(Res, getContext());
     return false;
-  case asmtok::Identifier: {
+  case AsmToken::String:
+  case AsmToken::Identifier:
     // This is a label, this should be parsed as part of an expression, to
     // handle things like LFOO+4.
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getCurStrVal());
-
-    // If this is use of an undefined symbol then mark it external.
-    if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym))
-      Sym->setExternal(true);
-    
-    Res = new AsmSymbolRefExpr(Sym);
+    Res = MCSymbolRefExpr::Create(Lexer.getTok().getIdentifier(), getContext());
     Lexer.Lex(); // Eat identifier.
     return false;
-  }
-  case asmtok::IntVal:
-    Res = new AsmConstantExpr(Lexer.getCurIntVal());
-    Lexer.Lex(); // Eat identifier.
+  case AsmToken::Integer:
+    Res = MCConstantExpr::Create(Lexer.getTok().getIntVal(), getContext());
+    Lexer.Lex(); // Eat token.
     return false;
-  case asmtok::LParen:
+  case AsmToken::LParen:
     Lexer.Lex(); // Eat the '('.
     return ParseParenExpr(Res);
-  case asmtok::Minus:
+  case AsmToken::Minus:
     Lexer.Lex(); // Eat the operator.
     if (ParsePrimaryExpr(Res))
       return true;
-    Res = new AsmUnaryExpr(AsmUnaryExpr::Minus, Res);
+    Res = MCUnaryExpr::CreateMinus(Res, getContext());
     return false;
-  case asmtok::Plus:
+  case AsmToken::Plus:
     Lexer.Lex(); // Eat the operator.
     if (ParsePrimaryExpr(Res))
       return true;
-    Res = new AsmUnaryExpr(AsmUnaryExpr::Plus, Res);
+    Res = MCUnaryExpr::CreatePlus(Res, getContext());
     return false;
-  case asmtok::Tilde:
+  case AsmToken::Tilde:
     Lexer.Lex(); // Eat the operator.
     if (ParsePrimaryExpr(Res))
       return true;
-    Res = new AsmUnaryExpr(AsmUnaryExpr::Not, Res);
+    Res = MCUnaryExpr::CreateNot(Res, getContext());
     return false;
   }
 }
@@ -142,119 +261,101 @@ bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) {
 ///  expr ::= expr *,/,%,<<,>> expr  -> highest.
 ///  expr ::= primaryexpr
 ///
-bool AsmParser::ParseExpression(AsmExpr *&Res) {
+bool AsmParser::ParseExpression(const MCExpr *&Res) {
   Res = 0;
   return ParsePrimaryExpr(Res) ||
          ParseBinOpRHS(1, Res);
 }
 
-bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
-  AsmExpr *Expr;
-  
-  SMLoc StartLoc = Lexer.getLoc();
-  if (ParseExpression(Expr))
+bool AsmParser::ParseParenExpression(const MCExpr *&Res) {
+  if (ParseParenExpr(Res))
     return true;
 
-  if (!Expr->EvaluateAsAbsolute(Ctx, Res))
-    return Error(StartLoc, "expected absolute expression");
-
   return false;
 }
 
-bool AsmParser::ParseRelocatableExpression(MCValue &Res) {
-  AsmExpr *Expr;
+bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
+  const MCExpr *Expr;
   
   SMLoc StartLoc = Lexer.getLoc();
   if (ParseExpression(Expr))
     return true;
 
-  if (!Expr->EvaluateAsRelocatable(Ctx, Res))
-    return Error(StartLoc, "expected relocatable expression");
-
-  return false;
-}
-
-bool AsmParser::ParseParenRelocatableExpression(MCValue &Res) {
-  AsmExpr *Expr;
-  
-  SMLoc StartLoc = Lexer.getLoc();
-  if (ParseParenExpr(Expr))
-    return true;
-
-  if (!Expr->EvaluateAsRelocatable(Ctx, Res))
-    return Error(StartLoc, "expected relocatable expression");
+  if (!Expr->EvaluateAsAbsolute(Ctx, Res))
+    return Error(StartLoc, "expected absolute expression");
 
   return false;
 }
 
-static unsigned getBinOpPrecedence(asmtok::TokKind K, 
-                                   AsmBinaryExpr::Opcode &Kind) {
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K, 
+                                   MCBinaryExpr::Opcode &Kind) {
   switch (K) {
-  default: return 0;    // not a binop.
+  default:
+    return 0;    // not a binop.
 
     // Lowest Precedence: &&, ||
-  case asmtok::AmpAmp:
-    Kind = AsmBinaryExpr::LAnd;
+  case AsmToken::AmpAmp:
+    Kind = MCBinaryExpr::LAnd;
     return 1;
-  case asmtok::PipePipe:
-    Kind = AsmBinaryExpr::LOr;
+  case AsmToken::PipePipe:
+    Kind = MCBinaryExpr::LOr;
     return 1;
 
     // Low Precedence: +, -, ==, !=, <>, <, <=, >, >=
-  case asmtok::Plus:
-    Kind = AsmBinaryExpr::Add;
+  case AsmToken::Plus:
+    Kind = MCBinaryExpr::Add;
     return 2;
-  case asmtok::Minus:
-    Kind = AsmBinaryExpr::Sub;
+  case AsmToken::Minus:
+    Kind = MCBinaryExpr::Sub;
     return 2;
-  case asmtok::EqualEqual:
-    Kind = AsmBinaryExpr::EQ;
+  case AsmToken::EqualEqual:
+    Kind = MCBinaryExpr::EQ;
     return 2;
-  case asmtok::ExclaimEqual:
-  case asmtok::LessGreater:
-    Kind = AsmBinaryExpr::NE;
+  case AsmToken::ExclaimEqual:
+  case AsmToken::LessGreater:
+    Kind = MCBinaryExpr::NE;
     return 2;
-  case asmtok::Less:
-    Kind = AsmBinaryExpr::LT;
+  case AsmToken::Less:
+    Kind = MCBinaryExpr::LT;
     return 2;
-  case asmtok::LessEqual:
-    Kind = AsmBinaryExpr::LTE;
+  case AsmToken::LessEqual:
+    Kind = MCBinaryExpr::LTE;
     return 2;
-  case asmtok::Greater:
-    Kind = AsmBinaryExpr::GT;
+  case AsmToken::Greater:
+    Kind = MCBinaryExpr::GT;
     return 2;
-  case asmtok::GreaterEqual:
-    Kind = AsmBinaryExpr::GTE;
+  case AsmToken::GreaterEqual:
+    Kind = MCBinaryExpr::GTE;
     return 2;
 
     // Intermediate Precedence: |, &, ^
     //
     // FIXME: gas seems to support '!' as an infix operator?
-  case asmtok::Pipe:
-    Kind = AsmBinaryExpr::Or;
+  case AsmToken::Pipe:
+    Kind = MCBinaryExpr::Or;
     return 3;
-  case asmtok::Caret:
-    Kind = AsmBinaryExpr::Xor;
+  case AsmToken::Caret:
+    Kind = MCBinaryExpr::Xor;
     return 3;
-  case asmtok::Amp:
-    Kind = AsmBinaryExpr::And;
+  case AsmToken::Amp:
+    Kind = MCBinaryExpr::And;
     return 3;
 
     // Highest Precedence: *, /, %, <<, >>
-  case asmtok::Star:
-    Kind = AsmBinaryExpr::Mul;
+  case AsmToken::Star:
+    Kind = MCBinaryExpr::Mul;
     return 4;
-  case asmtok::Slash:
-    Kind = AsmBinaryExpr::Div;
+  case AsmToken::Slash:
+    Kind = MCBinaryExpr::Div;
     return 4;
-  case asmtok::Percent:
-    Kind = AsmBinaryExpr::Mod;
+  case AsmToken::Percent:
+    Kind = MCBinaryExpr::Mod;
     return 4;
-  case asmtok::LessLess:
-    Kind = AsmBinaryExpr::Shl;
+  case AsmToken::LessLess:
+    Kind = MCBinaryExpr::Shl;
     return 4;
-  case asmtok::GreaterGreater:
-    Kind = AsmBinaryExpr::Shr;
+  case AsmToken::GreaterGreater:
+    Kind = MCBinaryExpr::Shr;
     return 4;
   }
 }
@@ -262,9 +363,9 @@ static unsigned getBinOpPrecedence(asmtok::TokKind K,
 
 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
 /// Res contains the LHS of the expression on input.
-bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) {
+bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res) {
   while (1) {
-    AsmBinaryExpr::Opcode Kind = AsmBinaryExpr::Add;
+    MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
     unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
     
     // If the next token is lower precedence than we are allowed to eat, return
@@ -275,19 +376,19 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) {
     Lexer.Lex();
     
     // Eat the next primary expression.
-    AsmExpr *RHS;
+    const MCExpr *RHS;
     if (ParsePrimaryExpr(RHS)) return true;
     
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
-    AsmBinaryExpr::Opcode Dummy;
+    MCBinaryExpr::Opcode Dummy;
     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
     if (TokPrec < NextTokPrec) {
       if (ParseBinOpRHS(Precedence+1, RHS)) return true;
     }
 
     // Merge LHS and RHS according to operator.
-    Res = new AsmBinaryExpr(Kind, Res, RHS);
+    Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
   }
 }
 
@@ -299,24 +400,23 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) {
 ///   ::= Label* Directive ...Operands... EndOfStatement
 ///   ::= Label* Identifier OperandList* EndOfStatement
 bool AsmParser::ParseStatement() {
-  switch (Lexer.getKind()) {
-  default:
-    return TokError("unexpected token at start of statement");
-  case asmtok::EndOfStatement:
+  if (Lexer.is(AsmToken::EndOfStatement)) {
     Lexer.Lex();
     return false;
-  case asmtok::Identifier:
-    break;
-  // TODO: Recurse on local labels etc.
   }
-  
-  // If we have an identifier, handle it as the key symbol.
-  SMLoc IDLoc = Lexer.getLoc();
-  const char *IDVal = Lexer.getCurStrVal();
-  
-  // Consume the identifier, see what is after it.
-  switch (Lexer.Lex()) {
-  case asmtok::Colon: {
+
+  // Statements always start with an identifier.
+  AsmToken ID = Lexer.getTok();
+  SMLoc IDLoc = ID.getLoc();
+  StringRef IDVal;
+  if (ParseIdentifier(IDVal))
+    return TokError("unexpected token at start of statement");
+
+  // FIXME: Recurse on local labels?
+
+  // See what kind of statement we have.
+  switch (Lexer.getKind()) {
+  case AsmToken::Colon: {
     // identifier ':'   -> Label.
     Lexer.Lex();
 
@@ -325,25 +425,21 @@ bool AsmParser::ParseStatement() {
     // FIXME: Diagnostics. Note the location of the definition as a label.
     // FIXME: This doesn't diagnose assignment to a symbol which has been
     // implicitly marked as external.
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(IDVal);
-    if (Sym->getSection())
+    MCSymbol *Sym = CreateSymbol(IDVal);
+    if (!Sym->isUndefined())
       return Error(IDLoc, "invalid symbol redefinition");
-    if (Ctx.GetSymbolValue(Sym))
-      return Error(IDLoc, "symbol already used as assembler variable");
     
-    // Since we saw a label, create a symbol and emit it.
-    // FIXME: If the label starts with L it is an assembler temporary label.
-    // Why does the client of this api need to know this?
+    // Emit the label.
     Out.EmitLabel(Sym);
    
     return ParseStatement();
   }
 
-  case asmtok::Equal:
+  case AsmToken::Equal:
     // identifier '=' ... -> assignment statement
     Lexer.Lex();
 
-    return ParseAssignment(IDVal, false);
+    return ParseAssignment(IDVal);
 
   default: // Normal instruction or directive.
     break;
@@ -352,184 +448,261 @@ bool AsmParser::ParseStatement() {
   // Otherwise, we have a normal instruction or directive.  
   if (IDVal[0] == '.') {
     // FIXME: This should be driven based on a hash lookup and callback.
-    if (!strcmp(IDVal, ".section"))
+    if (IDVal == ".section")
       return ParseDirectiveDarwinSection();
-    if (!strcmp(IDVal, ".text"))
+    if (IDVal == ".text")
       // FIXME: This changes behavior based on the -static flag to the
       // assembler.
-      return ParseDirectiveSectionSwitch("__TEXT,__text",
-                                         "regular,pure_instructions");
-    if (!strcmp(IDVal, ".const"))
-      return ParseDirectiveSectionSwitch("__TEXT,__const");
-    if (!strcmp(IDVal, ".static_const"))
-      return ParseDirectiveSectionSwitch("__TEXT,__static_const");
-    if (!strcmp(IDVal, ".cstring"))
-      return ParseDirectiveSectionSwitch("__TEXT,__cstring", 
-                                         "cstring_literals");
-    if (!strcmp(IDVal, ".literal4"))
-      return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals");
-    if (!strcmp(IDVal, ".literal8"))
-      return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals");
-    if (!strcmp(IDVal, ".literal16"))
-      return ParseDirectiveSectionSwitch("__TEXT,__literal16",
-                                         "16byte_literals");
-    if (!strcmp(IDVal, ".constructor"))
-      return ParseDirectiveSectionSwitch("__TEXT,__constructor");
-    if (!strcmp(IDVal, ".destructor"))
-      return ParseDirectiveSectionSwitch("__TEXT,__destructor");
-    if (!strcmp(IDVal, ".fvmlib_init0"))
-      return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0");
-    if (!strcmp(IDVal, ".fvmlib_init1"))
-      return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1");
-    if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC.
-      return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs",
-                                    "self_modifying_code+pure_instructions,5");
-    // FIXME: .picsymbol_stub on PPC.
-    if (!strcmp(IDVal, ".data"))
-      return ParseDirectiveSectionSwitch("__DATA,__data");
-    if (!strcmp(IDVal, ".static_data"))
-      return ParseDirectiveSectionSwitch("__DATA,__static_data");
-    if (!strcmp(IDVal, ".non_lazy_symbol_pointer"))
-      return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer",
-                                         "non_lazy_symbol_pointers");
-    if (!strcmp(IDVal, ".lazy_symbol_pointer"))
-      return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer",
-                                         "lazy_symbol_pointers");
-    if (!strcmp(IDVal, ".dyld"))
-      return ParseDirectiveSectionSwitch("__DATA,__dyld");
-    if (!strcmp(IDVal, ".mod_init_func"))
-      return ParseDirectiveSectionSwitch("__DATA,__mod_init_func",
-                                         "mod_init_funcs");
-    if (!strcmp(IDVal, ".mod_term_func"))
-      return ParseDirectiveSectionSwitch("__DATA,__mod_term_func",
-                                         "mod_term_funcs");
-    if (!strcmp(IDVal, ".const_data"))
-      return ParseDirectiveSectionSwitch("__DATA,__const", "regular");
+      return ParseDirectiveSectionSwitch("__TEXT", "__text",
+                                     MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+    if (IDVal == ".const")
+      return ParseDirectiveSectionSwitch("__TEXT", "__const");
+    if (IDVal == ".static_const")
+      return ParseDirectiveSectionSwitch("__TEXT", "__static_const");
+    if (IDVal == ".cstring")
+      return ParseDirectiveSectionSwitch("__TEXT","__cstring", 
+                                         MCSectionMachO::S_CSTRING_LITERALS);
+    if (IDVal == ".literal4")
+      return ParseDirectiveSectionSwitch("__TEXT", "__literal4",
+                                         MCSectionMachO::S_4BYTE_LITERALS,
+                                         4);
+    if (IDVal == ".literal8")
+      return ParseDirectiveSectionSwitch("__TEXT", "__literal8",
+                                         MCSectionMachO::S_8BYTE_LITERALS,
+                                         8);
+    if (IDVal == ".literal16")
+      return ParseDirectiveSectionSwitch("__TEXT","__literal16",
+                                         MCSectionMachO::S_16BYTE_LITERALS,
+                                         16);
+    if (IDVal == ".constructor")
+      return ParseDirectiveSectionSwitch("__TEXT","__constructor");
+    if (IDVal == ".destructor")
+      return ParseDirectiveSectionSwitch("__TEXT","__destructor");
+    if (IDVal == ".fvmlib_init0")
+      return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0");
+    if (IDVal == ".fvmlib_init1")
+      return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1");
+
+    // FIXME: The assembler manual claims that this has the self modify code
+    // flag, at least on x86-32, but that does not appear to be correct.
+    if (IDVal == ".symbol_stub")
+      return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub",
+                                         MCSectionMachO::S_SYMBOL_STUBS |
+                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                          // FIXME: Different on PPC and ARM.
+                                         0, 16);
+    // FIXME: PowerPC only?
+    if (IDVal == ".picsymbol_stub")
+      return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub",
+                                         MCSectionMachO::S_SYMBOL_STUBS |
+                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                         0, 26);
+    if (IDVal == ".data")
+      return ParseDirectiveSectionSwitch("__DATA", "__data");
+    if (IDVal == ".static_data")
+      return ParseDirectiveSectionSwitch("__DATA", "__static_data");
+
+    // FIXME: The section names of these two are misspelled in the assembler
+    // manual.
+    if (IDVal == ".non_lazy_symbol_pointer")
+      return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr",
+                                     MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                         4);
+    if (IDVal == ".lazy_symbol_pointer")
+      return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr",
+                                         MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                                         4);
+
+    if (IDVal == ".dyld")
+      return ParseDirectiveSectionSwitch("__DATA", "__dyld");
+    if (IDVal == ".mod_init_func")
+      return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func",
+                                       MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                                         4);
+    if (IDVal == ".mod_term_func")
+      return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func",
+                                       MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                                         4);
+    if (IDVal == ".const_data")
+      return ParseDirectiveSectionSwitch("__DATA", "__const");
     
     
-    // FIXME: Verify attributes on sections.
-    if (!strcmp(IDVal, ".objc_class"))
-      return ParseDirectiveSectionSwitch("__OBJC,__class");
-    if (!strcmp(IDVal, ".objc_meta_class"))
-      return ParseDirectiveSectionSwitch("__OBJC,__meta_class");
-    if (!strcmp(IDVal, ".objc_cat_cls_meth"))
-      return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth");
-    if (!strcmp(IDVal, ".objc_cat_inst_meth"))
-      return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth");
-    if (!strcmp(IDVal, ".objc_protocol"))
-      return ParseDirectiveSectionSwitch("__OBJC,__protocol");
-    if (!strcmp(IDVal, ".objc_string_object"))
-      return ParseDirectiveSectionSwitch("__OBJC,__string_object");
-    if (!strcmp(IDVal, ".objc_cls_meth"))
-      return ParseDirectiveSectionSwitch("__OBJC,__cls_meth");
-    if (!strcmp(IDVal, ".objc_inst_meth"))
-      return ParseDirectiveSectionSwitch("__OBJC,__inst_meth");
-    if (!strcmp(IDVal, ".objc_cls_refs"))
-      return ParseDirectiveSectionSwitch("__OBJC,__cls_refs");
-    if (!strcmp(IDVal, ".objc_message_refs"))
-      return ParseDirectiveSectionSwitch("__OBJC,__message_refs");
-    if (!strcmp(IDVal, ".objc_symbols"))
-      return ParseDirectiveSectionSwitch("__OBJC,__symbols");
-    if (!strcmp(IDVal, ".objc_category"))
-      return ParseDirectiveSectionSwitch("__OBJC,__category");
-    if (!strcmp(IDVal, ".objc_class_vars"))
-      return ParseDirectiveSectionSwitch("__OBJC,__class_vars");
-    if (!strcmp(IDVal, ".objc_instance_vars"))
-      return ParseDirectiveSectionSwitch("__OBJC,__instance_vars");
-    if (!strcmp(IDVal, ".objc_module_info"))
-      return ParseDirectiveSectionSwitch("__OBJC,__module_info");
-    if (!strcmp(IDVal, ".objc_class_names"))
-      return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
-    if (!strcmp(IDVal, ".objc_meth_var_types"))
-      return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
-    if (!strcmp(IDVal, ".objc_meth_var_names"))
-      return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
-    if (!strcmp(IDVal, ".objc_selector_strs"))
-      return ParseDirectiveSectionSwitch("__OBJC,__selector_strs");
+    if (IDVal == ".objc_class")
+      return ParseDirectiveSectionSwitch("__OBJC", "__class", 
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_meta_class")
+      return ParseDirectiveSectionSwitch("__OBJC", "__meta_class",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_cat_cls_meth")
+      return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_cat_inst_meth")
+      return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_protocol")
+      return ParseDirectiveSectionSwitch("__OBJC", "__protocol",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_string_object")
+      return ParseDirectiveSectionSwitch("__OBJC", "__string_object",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_cls_meth")
+      return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_inst_meth")
+      return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_cls_refs")
+      return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+                                         MCSectionMachO::S_LITERAL_POINTERS,
+                                         4);
+    if (IDVal == ".objc_message_refs")
+      return ParseDirectiveSectionSwitch("__OBJC", "__message_refs",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+                                         MCSectionMachO::S_LITERAL_POINTERS,
+                                         4);
+    if (IDVal == ".objc_symbols")
+      return ParseDirectiveSectionSwitch("__OBJC", "__symbols",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_category")
+      return ParseDirectiveSectionSwitch("__OBJC", "__category",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_class_vars")
+      return ParseDirectiveSectionSwitch("__OBJC", "__class_vars",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_instance_vars")
+      return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_module_info")
+      return ParseDirectiveSectionSwitch("__OBJC", "__module_info",
+                                         MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+    if (IDVal == ".objc_class_names")
+      return ParseDirectiveSectionSwitch("__TEXT", "__cstring",
+                                         MCSectionMachO::S_CSTRING_LITERALS);
+    if (IDVal == ".objc_meth_var_types")
+      return ParseDirectiveSectionSwitch("__TEXT", "__cstring",
+                                         MCSectionMachO::S_CSTRING_LITERALS);
+    if (IDVal == ".objc_meth_var_names")
+      return ParseDirectiveSectionSwitch("__TEXT", "__cstring",
+                                         MCSectionMachO::S_CSTRING_LITERALS);
+    if (IDVal == ".objc_selector_strs")
+      return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs",
+                                         MCSectionMachO::S_CSTRING_LITERALS);
     
     // Assembler features
-    if (!strcmp(IDVal, ".set"))
+    if (IDVal == ".set")
       return ParseDirectiveSet();
 
     // Data directives
 
-    if (!strcmp(IDVal, ".ascii"))
+    if (IDVal == ".ascii")
       return ParseDirectiveAscii(false);
-    if (!strcmp(IDVal, ".asciz"))
+    if (IDVal == ".asciz")
       return ParseDirectiveAscii(true);
 
-    // FIXME: Target hooks for size? Also for "word", "hword".
-    if (!strcmp(IDVal, ".byte"))
+    if (IDVal == ".byte")
       return ParseDirectiveValue(1);
-    if (!strcmp(IDVal, ".short"))
+    if (IDVal == ".short")
       return ParseDirectiveValue(2);
-    if (!strcmp(IDVal, ".long"))
+    if (IDVal == ".long")
       return ParseDirectiveValue(4);
-    if (!strcmp(IDVal, ".quad"))
+    if (IDVal == ".quad")
       return ParseDirectiveValue(8);
 
     // FIXME: Target hooks for IsPow2.
-    if (!strcmp(IDVal, ".align"))
+    if (IDVal == ".align")
       return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
-    if (!strcmp(IDVal, ".align32"))
+    if (IDVal == ".align32")
       return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
-    if (!strcmp(IDVal, ".balign"))
+    if (IDVal == ".balign")
       return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
-    if (!strcmp(IDVal, ".balignw"))
+    if (IDVal == ".balignw")
       return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
-    if (!strcmp(IDVal, ".balignl"))
+    if (IDVal == ".balignl")
       return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
-    if (!strcmp(IDVal, ".p2align"))
+    if (IDVal == ".p2align")
       return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
-    if (!strcmp(IDVal, ".p2alignw"))
+    if (IDVal == ".p2alignw")
       return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
-    if (!strcmp(IDVal, ".p2alignl"))
+    if (IDVal == ".p2alignl")
       return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
 
-    if (!strcmp(IDVal, ".org"))
+    if (IDVal == ".org")
       return ParseDirectiveOrg();
 
-    if (!strcmp(IDVal, ".fill"))
+    if (IDVal == ".fill")
       return ParseDirectiveFill();
-    if (!strcmp(IDVal, ".space"))
+    if (IDVal == ".space")
       return ParseDirectiveSpace();
 
     // Symbol attribute directives
-    if (!strcmp(IDVal, ".globl") || !strcmp(IDVal, ".global"))
+
+    if (IDVal == ".globl" || IDVal == ".global")
       return ParseDirectiveSymbolAttribute(MCStreamer::Global);
-    if (!strcmp(IDVal, ".hidden"))
+    if (IDVal == ".hidden")
       return ParseDirectiveSymbolAttribute(MCStreamer::Hidden);
-    if (!strcmp(IDVal, ".indirect_symbol"))
+    if (IDVal == ".indirect_symbol")
       return ParseDirectiveSymbolAttribute(MCStreamer::IndirectSymbol);
-    if (!strcmp(IDVal, ".internal"))
+    if (IDVal == ".internal")
       return ParseDirectiveSymbolAttribute(MCStreamer::Internal);
-    if (!strcmp(IDVal, ".lazy_reference"))
+    if (IDVal == ".lazy_reference")
       return ParseDirectiveSymbolAttribute(MCStreamer::LazyReference);
-    if (!strcmp(IDVal, ".no_dead_strip"))
+    if (IDVal == ".no_dead_strip")
       return ParseDirectiveSymbolAttribute(MCStreamer::NoDeadStrip);
-    if (!strcmp(IDVal, ".private_extern"))
+    if (IDVal == ".private_extern")
       return ParseDirectiveSymbolAttribute(MCStreamer::PrivateExtern);
-    if (!strcmp(IDVal, ".protected"))
+    if (IDVal == ".protected")
       return ParseDirectiveSymbolAttribute(MCStreamer::Protected);
-    if (!strcmp(IDVal, ".reference"))
+    if (IDVal == ".reference")
       return ParseDirectiveSymbolAttribute(MCStreamer::Reference);
-    if (!strcmp(IDVal, ".weak"))
+    if (IDVal == ".weak")
       return ParseDirectiveSymbolAttribute(MCStreamer::Weak);
-    if (!strcmp(IDVal, ".weak_definition"))
+    if (IDVal == ".weak_definition")
       return ParseDirectiveSymbolAttribute(MCStreamer::WeakDefinition);
-    if (!strcmp(IDVal, ".weak_reference"))
+    if (IDVal == ".weak_reference")
       return ParseDirectiveSymbolAttribute(MCStreamer::WeakReference);
 
+    if (IDVal == ".comm")
+      return ParseDirectiveComm(/*IsLocal=*/false);
+    if (IDVal == ".lcomm")
+      return ParseDirectiveComm(/*IsLocal=*/true);
+    if (IDVal == ".zerofill")
+      return ParseDirectiveDarwinZerofill();
+    if (IDVal == ".desc")
+      return ParseDirectiveDarwinSymbolDesc();
+    if (IDVal == ".lsym")
+      return ParseDirectiveDarwinLsym();
+
+    if (IDVal == ".subsections_via_symbols")
+      return ParseDirectiveDarwinSubsectionsViaSymbols();
+    if (IDVal == ".abort")
+      return ParseDirectiveAbort();
+    if (IDVal == ".include")
+      return ParseDirectiveInclude();
+    if (IDVal == ".dump")
+      return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true);
+    if (IDVal == ".load")
+      return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false);
+
+    // Look up the handler in the handler table, 
+    bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal];
+    if (Handler)
+      return (this->*Handler)(IDVal, IDLoc);
+    
+    // Target hook for parsing target specific directives.
+    if (!getTargetParser().ParseDirective(ID))
+      return false;
+
     Warning(IDLoc, "ignoring directive for now");
     EatToEndOfStatement();
     return false;
   }
 
   MCInst Inst;
-  if (ParseX86InstOperands(IDVal, Inst))
+  if (getTargetParser().ParseInstruction(IDVal, Inst))
     return true;
   
-  if (Lexer.isNot(asmtok::EndOfStatement))
+  if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in argument list");
 
   // Eat the end of statement marker.
@@ -542,15 +715,16 @@ bool AsmParser::ParseStatement() {
   return false;
 }
 
-bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) {
+bool AsmParser::ParseAssignment(const StringRef &Name) {
   // FIXME: Use better location, we should use proper tokens.
   SMLoc EqualLoc = Lexer.getLoc();
 
-  MCValue Value;
-  if (ParseRelocatableExpression(Value))
+  const MCExpr *Value;
+  SMLoc StartLoc = Lexer.getLoc();
+  if (ParseExpression(Value))
     return true;
   
-  if (Lexer.isNot(asmtok::EndOfStatement))
+  if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in assignment");
 
   // Eat the end of statement marker.
@@ -559,18 +733,29 @@ bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) {
   // Diagnose assignment to a label.
   //
   // FIXME: Diagnostics. Note the location of the definition as a label.
-  // FIXME: This doesn't diagnose assignment to a symbol which has been
-  // implicitly marked as external.
   // FIXME: Handle '.'.
   // FIXME: Diagnose assignment to protected identifier (e.g., register name).
-  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
-  if (Sym->getSection())
-    return Error(EqualLoc, "invalid assignment to symbol emitted as a label");
-  if (Sym->isExternal())
-    return Error(EqualLoc, "invalid assignment to external symbol");
+  MCSymbol *Sym = CreateSymbol(Name);
+  if (!Sym->isUndefined() && !Sym->isAbsolute())
+    return Error(EqualLoc, "symbol has already been defined");
 
   // Do the assignment.
-  Out.EmitAssignment(Sym, Value, IsDotSet);
+  Out.EmitAssignment(Sym, Value);
+
+  return false;
+}
+
+/// ParseIdentifier:
+///   ::= identifier
+///   ::= string
+bool AsmParser::ParseIdentifier(StringRef &Res) {
+  if (Lexer.isNot(AsmToken::Identifier) &&
+      Lexer.isNot(AsmToken::String))
+    return true;
+
+  Res = Lexer.getTok().getIdentifier();
+
+  Lexer.Lex(); // Consume the identifier token.
 
   return false;
 }
@@ -578,16 +763,16 @@ bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) {
 /// ParseDirectiveSet:
 ///   ::= .set identifier ',' expression
 bool AsmParser::ParseDirectiveSet() {
-  if (Lexer.isNot(asmtok::Identifier))
-    return TokError("expected identifier after '.set' directive");
+  StringRef Name;
 
-  const char *Name = Lexer.getCurStrVal();
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier after '.set' directive");
   
-  if (Lexer.Lex() != asmtok::Comma)
+  if (Lexer.isNot(AsmToken::Comma))
     return TokError("unexpected token in '.set'");
   Lexer.Lex();
 
-  return ParseAssignment(Name, true);
+  return ParseAssignment(Name);
 }
 
 /// ParseDirectiveSection:
@@ -595,69 +780,152 @@ bool AsmParser::ParseDirectiveSet() {
 /// FIXME: This should actually parse out the segment, section, attributes and
 /// sizeof_stub fields.
 bool AsmParser::ParseDirectiveDarwinSection() {
-  if (Lexer.isNot(asmtok::Identifier))
-    return TokError("expected identifier after '.section' directive");
-  
-  std::string Section = Lexer.getCurStrVal();
+  SMLoc Loc = Lexer.getLoc();
+
+  StringRef SectionName;
+  if (ParseIdentifier(SectionName))
+    return Error(Loc, "expected identifier after '.section' directive");
+
+  // Verify there is a following comma.
+  if (!Lexer.is(AsmToken::Comma))
+    return TokError("unexpected token in '.section' directive");
+
+  std::string SectionSpec = SectionName;
+  SectionSpec += ",";
+
+  // Add all the tokens until the end of the line, ParseSectionSpecifier will
+  // handle this.
+  StringRef EOL = Lexer.LexUntilEndOfStatement();
+  SectionSpec.append(EOL.begin(), EOL.end());
+
   Lexer.Lex();
-  
-  // Accept a comma separated list of modifiers.
-  while (Lexer.is(asmtok::Comma)) {
-    Lexer.Lex();
-    
-    if (Lexer.isNot(asmtok::Identifier))
-      return TokError("expected identifier in '.section' directive");
-    Section += ',';
-    Section += Lexer.getCurStrVal();
-    Lexer.Lex();
-  }
-  
-  if (Lexer.isNot(asmtok::EndOfStatement))
+  if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.section' directive");
   Lexer.Lex();
 
-  Out.SwitchSection(Ctx.GetSection(Section.c_str()));
+
+  StringRef Segment, Section;
+  unsigned TAA, StubSize;
+  std::string ErrorStr = 
+    MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
+                                          TAA, StubSize);
+  
+  if (!ErrorStr.empty())
+    return Error(Loc, ErrorStr.c_str());
+  
+  // FIXME: Arch specific.
+  Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
+                                    SectionKind()));
   return false;
 }
 
-bool AsmParser::ParseDirectiveSectionSwitch(const char *Section,
-                                            const char *Directives) {
-  if (Lexer.isNot(asmtok::EndOfStatement))
+/// ParseDirectiveSectionSwitch - 
+bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
+                                            const char *Section,
+                                            unsigned TAA, unsigned Align,
+                                            unsigned StubSize) {
+  if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in section switching directive");
   Lexer.Lex();
   
-  std::string SectionStr = Section;
-  if (Directives && Directives[0]) {
-    SectionStr += ","; 
-    SectionStr += Directives;
+  // FIXME: Arch specific.
+  Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
+                                    SectionKind()));
+
+  // Set the implicit alignment, if any.
+  //
+  // FIXME: This isn't really what 'as' does; I think it just uses the implicit
+  // alignment on the section (e.g., if one manually inserts bytes into the
+  // section, then just issueing the section switch directive will not realign
+  // the section. However, this is arguably more reasonable behavior, and there
+  // is no good reason for someone to intentionally emit incorrectly sized
+  // values into the implicitly aligned sections.
+  if (Align)
+    Out.EmitValueToAlignment(Align, 0, 1, 0);
+
+  return false;
+}
+
+bool AsmParser::ParseEscapedString(std::string &Data) {
+  assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
+
+  Data = "";
+  StringRef Str = Lexer.getTok().getStringContents();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] != '\\') {
+      Data += Str[i];
+      continue;
+    }
+
+    // Recognize escaped characters. Note that this escape semantics currently
+    // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+    ++i;
+    if (i == e)
+      return TokError("unexpected backslash at end of string");
+
+    // Recognize octal sequences.
+    if ((unsigned) (Str[i] - '0') <= 7) {
+      // Consume up to three octal characters.
+      unsigned Value = Str[i] - '0';
+
+      if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+        ++i;
+        Value = Value * 8 + (Str[i] - '0');
+
+        if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+          ++i;
+          Value = Value * 8 + (Str[i] - '0');
+        }
+      }
+
+      if (Value > 255)
+        return TokError("invalid octal escape sequence (out of range)");
+
+      Data += (unsigned char) Value;
+      continue;
+    }
+
+    // Otherwise recognize individual escapes.
+    switch (Str[i]) {
+    default:
+      // Just reject invalid escape sequences for now.
+      return TokError("invalid escape sequence (unrecognized character)");
+
+    case 'b': Data += '\b'; break;
+    case 'f': Data += '\f'; break;
+    case 'n': Data += '\n'; break;
+    case 'r': Data += '\r'; break;
+    case 't': Data += '\t'; break;
+    case '"': Data += '"'; break;
+    case '\\': Data += '\\'; break;
+    }
   }
-  
-  Out.SwitchSection(Ctx.GetSection(Section));
+
   return false;
 }
 
 /// ParseDirectiveAscii:
 ///   ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
-  if (Lexer.isNot(asmtok::EndOfStatement)) {
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
     for (;;) {
-      if (Lexer.isNot(asmtok::String))
+      if (Lexer.isNot(AsmToken::String))
         return TokError("expected string in '.ascii' or '.asciz' directive");
       
-      // FIXME: This shouldn't use a const char* + strlen, the string could have
-      // embedded nulls.
-      // FIXME: Should have accessor for getting string contents.
-      const char *Str = Lexer.getCurStrVal();
-      Out.EmitBytes(Str + 1, strlen(Str) - 2);
+      std::string Data;
+      if (ParseEscapedString(Data))
+        return true;
+      
+      Out.EmitBytes(Data);
       if (ZeroTerminated)
-        Out.EmitBytes("\0", 1);
+        Out.EmitBytes(StringRef("\0", 1));
       
       Lexer.Lex();
       
-      if (Lexer.is(asmtok::EndOfStatement))
+      if (Lexer.is(AsmToken::EndOfStatement))
         break;
 
-      if (Lexer.isNot(asmtok::Comma))
+      if (Lexer.isNot(AsmToken::Comma))
         return TokError("unexpected token in '.ascii' or '.asciz' directive");
       Lexer.Lex();
     }
@@ -670,19 +938,20 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
 /// ParseDirectiveValue
 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
 bool AsmParser::ParseDirectiveValue(unsigned Size) {
-  if (Lexer.isNot(asmtok::EndOfStatement)) {
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
     for (;;) {
-      MCValue Expr;
-      if (ParseRelocatableExpression(Expr))
+      const MCExpr *Value;
+      SMLoc StartLoc = Lexer.getLoc();
+      if (ParseExpression(Value))
         return true;
 
-      Out.EmitValue(Expr, Size);
+      Out.EmitValue(Value, Size);
 
-      if (Lexer.is(asmtok::EndOfStatement))
+      if (Lexer.is(AsmToken::EndOfStatement))
         break;
       
       // FIXME: Improve diagnostic.
-      if (Lexer.isNot(asmtok::Comma))
+      if (Lexer.isNot(AsmToken::Comma))
         return TokError("unexpected token in directive");
       Lexer.Lex();
     }
@@ -701,8 +970,8 @@ bool AsmParser::ParseDirectiveSpace() {
 
   int64_t FillExpr = 0;
   bool HasFillExpr = false;
-  if (Lexer.isNot(asmtok::EndOfStatement)) {
-    if (Lexer.isNot(asmtok::Comma))
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::Comma))
       return TokError("unexpected token in '.space' directive");
     Lexer.Lex();
     
@@ -711,7 +980,7 @@ bool AsmParser::ParseDirectiveSpace() {
 
     HasFillExpr = true;
 
-    if (Lexer.isNot(asmtok::EndOfStatement))
+    if (Lexer.isNot(AsmToken::EndOfStatement))
       return TokError("unexpected token in '.space' directive");
   }
 
@@ -722,7 +991,7 @@ bool AsmParser::ParseDirectiveSpace() {
 
   // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
   for (uint64_t i = 0, e = NumBytes; i != e; ++i)
-    Out.EmitValue(MCValue::get(FillExpr), 1);
+    Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), 1);
 
   return false;
 }
@@ -734,7 +1003,7 @@ bool AsmParser::ParseDirectiveFill() {
   if (ParseAbsoluteExpression(NumValues))
     return true;
 
-  if (Lexer.isNot(asmtok::Comma))
+  if (Lexer.isNot(AsmToken::Comma))
     return TokError("unexpected token in '.fill' directive");
   Lexer.Lex();
   
@@ -742,7 +1011,7 @@ bool AsmParser::ParseDirectiveFill() {
   if (ParseAbsoluteExpression(FillSize))
     return true;
 
-  if (Lexer.isNot(asmtok::Comma))
+  if (Lexer.isNot(AsmToken::Comma))
     return TokError("unexpected token in '.fill' directive");
   Lexer.Lex();
   
@@ -750,16 +1019,16 @@ bool AsmParser::ParseDirectiveFill() {
   if (ParseAbsoluteExpression(FillExpr))
     return true;
 
-  if (Lexer.isNot(asmtok::EndOfStatement))
+  if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.fill' directive");
   
   Lexer.Lex();
 
-  if (FillSize != 1 && FillSize != 2 && FillSize != 4)
-    return TokError("invalid '.fill' size, expected 1, 2, or 4");
+  if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
+    return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
 
   for (uint64_t i = 0, e = NumValues; i != e; ++i)
-    Out.EmitValue(MCValue::get(FillExpr), FillSize);
+    Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize);
 
   return false;
 }
@@ -767,21 +1036,22 @@ bool AsmParser::ParseDirectiveFill() {
 /// ParseDirectiveOrg
 ///  ::= .org expression [ , expression ]
 bool AsmParser::ParseDirectiveOrg() {
-  MCValue Offset;
-  if (ParseRelocatableExpression(Offset))
+  const MCExpr *Offset;
+  SMLoc StartLoc = Lexer.getLoc();
+  if (ParseExpression(Offset))
     return true;
 
   // Parse optional fill expression.
   int64_t FillExpr = 0;
-  if (Lexer.isNot(asmtok::EndOfStatement)) {
-    if (Lexer.isNot(asmtok::Comma))
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::Comma))
       return TokError("unexpected token in '.org' directive");
     Lexer.Lex();
     
     if (ParseAbsoluteExpression(FillExpr))
       return true;
 
-    if (Lexer.isNot(asmtok::EndOfStatement))
+    if (Lexer.isNot(AsmToken::EndOfStatement))
       return TokError("unexpected token in '.org' directive");
   }
 
@@ -797,6 +1067,7 @@ bool AsmParser::ParseDirectiveOrg() {
 /// ParseDirectiveAlign
 ///  ::= {.align, ...} expression [ , expression [ , expression ]]
 bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+  SMLoc AlignmentLoc = Lexer.getLoc();
   int64_t Alignment;
   if (ParseAbsoluteExpression(Alignment))
     return true;
@@ -805,22 +1076,22 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
   bool HasFillExpr = false;
   int64_t FillExpr = 0;
   int64_t MaxBytesToFill = 0;
-  if (Lexer.isNot(asmtok::EndOfStatement)) {
-    if (Lexer.isNot(asmtok::Comma))
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::Comma))
       return TokError("unexpected token in directive");
     Lexer.Lex();
 
     // The fill expression can be omitted while specifying a maximum number of
     // alignment bytes, e.g:
     //  .align 3,,4
-    if (Lexer.isNot(asmtok::Comma)) {
+    if (Lexer.isNot(AsmToken::Comma)) {
       HasFillExpr = true;
       if (ParseAbsoluteExpression(FillExpr))
         return true;
     }
 
-    if (Lexer.isNot(asmtok::EndOfStatement)) {
-      if (Lexer.isNot(asmtok::Comma))
+    if (Lexer.isNot(AsmToken::EndOfStatement)) {
+      if (Lexer.isNot(AsmToken::Comma))
         return TokError("unexpected token in directive");
       Lexer.Lex();
 
@@ -828,7 +1099,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
       if (ParseAbsoluteExpression(MaxBytesToFill))
         return true;
       
-      if (Lexer.isNot(asmtok::EndOfStatement))
+      if (Lexer.isNot(AsmToken::EndOfStatement))
         return TokError("unexpected token in directive");
     }
   }
@@ -843,15 +1114,20 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
   // Compute alignment in bytes.
   if (IsPow2) {
     // FIXME: Diagnose overflow.
-    Alignment = 1 << Alignment;
+    if (Alignment >= 32) {
+      Error(AlignmentLoc, "invalid alignment value");
+      Alignment = 31;
+    }
+
+    Alignment = 1ULL << Alignment;
   }
 
-  // Diagnose non-sensical max bytes to fill.
+  // Diagnose non-sensical max bytes to align.
   if (MaxBytesLoc.isValid()) {
     if (MaxBytesToFill < 1) {
-      Warning(MaxBytesLoc, "alignment directive can never be satisfied in this "
-              "many bytes, ignoring");
-      return false;
+      Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+            "many bytes, ignoring maximum bytes expression");
+      MaxBytesToFill = 0;
     }
 
     if (MaxBytesToFill >= Alignment) {
@@ -870,24 +1146,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 /// ParseDirectiveSymbolAttribute
 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
 bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) {
-  if (Lexer.isNot(asmtok::EndOfStatement)) {
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
     for (;;) {
-      if (Lexer.isNot(asmtok::Identifier))
+      StringRef Name;
+
+      if (ParseIdentifier(Name))
         return TokError("expected identifier in directive");
       
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getCurStrVal());
-      Lexer.Lex();
-
-      // If this is use of an undefined symbol then mark it external.
-      if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym))
-        Sym->setExternal(true);
+      MCSymbol *Sym = CreateSymbol(Name);
 
       Out.EmitSymbolAttribute(Sym, Attr);
 
-      if (Lexer.is(asmtok::EndOfStatement))
+      if (Lexer.is(AsmToken::EndOfStatement))
         break;
 
-      if (Lexer.isNot(asmtok::Comma))
+      if (Lexer.isNot(AsmToken::Comma))
         return TokError("unexpected token in directive");
       Lexer.Lex();
     }
@@ -896,3 +1169,513 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) {
   Lexer.Lex();
   return false;  
 }
+
+/// ParseDirectiveDarwinSymbolDesc
+///  ::= .desc identifier , expression
+bool AsmParser::ParseDirectiveDarwinSymbolDesc() {
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = CreateSymbol(Name);
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.desc' directive");
+  Lexer.Lex();
+
+  SMLoc DescLoc = Lexer.getLoc();
+  int64_t DescValue;
+  if (ParseAbsoluteExpression(DescValue))
+    return true;
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.desc' directive");
+  
+  Lexer.Lex();
+
+  // Set the n_desc field of this Symbol to this DescValue
+  Out.EmitSymbolDesc(Sym, DescValue);
+
+  return false;
+}
+
+/// ParseDirectiveComm
+///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
+bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+  SMLoc IDLoc = Lexer.getLoc();
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = CreateSymbol(Name);
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lexer.Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = Lexer.getLoc();
+  if (ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (Lexer.is(AsmToken::Comma)) {
+    Lexer.Lex();
+    Pow2AlignmentLoc = Lexer.getLoc();
+    if (ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+  
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.comm' or '.lcomm' directive");
+  
+  Lexer.Lex();
+
+  // NOTE: a size of zero for a .comm should create a undefined symbol
+  // but a size of .lcomm creates a bss symbol of size zero.
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+                 "be less than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assember
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+                 "alignment, can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // '.lcomm' is equivalent to '.zerofill'.
+  // Create the Symbol as a common or local common with Size and Pow2Alignment
+  if (IsLocal) {
+    Out.EmitZerofill(getMachOSection("__DATA", "__bss",
+                                     MCSectionMachO::S_ZEROFILL, 0,
+                                     SectionKind()),
+                     Sym, Size, 1 << Pow2Alignment);
+    return false;
+  }
+
+  Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+  return false;
+}
+
+/// ParseDirectiveDarwinZerofill
+///  ::= .zerofill segname , sectname [, identifier , size_expression [
+///      , align_expression ]]
+bool AsmParser::ParseDirectiveDarwinZerofill() {
+  // FIXME: Handle quoted names here.
+
+  if (Lexer.isNot(AsmToken::Identifier))
+    return TokError("expected segment name after '.zerofill' directive");
+  StringRef Segment = Lexer.getTok().getString();
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lexer.Lex();
+ 
+  if (Lexer.isNot(AsmToken::Identifier))
+    return TokError("expected section name after comma in '.zerofill' "
+                    "directive");
+  StringRef Section = Lexer.getTok().getString();
+  Lexer.Lex();
+
+  // If this is the end of the line all that was wanted was to create the
+  // the section but with no symbol.
+  if (Lexer.is(AsmToken::EndOfStatement)) {
+    // Create the zerofill section but no symbol
+    Out.EmitZerofill(getMachOSection(Segment, Section,
+                                     MCSectionMachO::S_ZEROFILL, 0,
+                                     SectionKind()));
+    return false;
+  }
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::Identifier))
+    return TokError("expected identifier in directive");
+  
+  // handle the identifier as the key symbol.
+  SMLoc IDLoc = Lexer.getLoc();
+  MCSymbol *Sym = CreateSymbol(Lexer.getTok().getString());
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lexer.Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = Lexer.getLoc();
+  if (ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (Lexer.is(AsmToken::Comma)) {
+    Lexer.Lex();
+    Pow2AlignmentLoc = Lexer.getLoc();
+    if (ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+  
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.zerofill' directive");
+  
+  Lexer.Lex();
+
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
+                 "than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assember
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
+                 "can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // Create the zerofill Symbol with Size and Pow2Alignment
+  //
+  // FIXME: Arch specific.
+  Out.EmitZerofill(getMachOSection(Segment, Section,
+                                 MCSectionMachO::S_ZEROFILL, 0,
+                                 SectionKind()),
+                   Sym, Size, 1 << Pow2Alignment);
+
+  return false;
+}
+
+/// ParseDirectiveDarwinSubsectionsViaSymbols
+///  ::= .subsections_via_symbols
+bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() {
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.subsections_via_symbols' directive");
+  
+  Lexer.Lex();
+
+  Out.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols);
+
+  return false;
+}
+
+/// ParseDirectiveAbort
+///  ::= .abort [ "abort_string" ]
+bool AsmParser::ParseDirectiveAbort() {
+  // FIXME: Use loc from directive.
+  SMLoc Loc = Lexer.getLoc();
+
+  StringRef Str = "";
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::String))
+      return TokError("expected string in '.abort' directive");
+    
+    Str = Lexer.getTok().getString();
+
+    Lexer.Lex();
+  }
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.abort' directive");
+  
+  Lexer.Lex();
+
+  // FIXME: Handle here.
+  if (Str.empty())
+    Error(Loc, ".abort detected. Assembly stopping.");
+  else
+    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+
+  return false;
+}
+
+/// ParseDirectiveLsym
+///  ::= .lsym identifier , expression
+bool AsmParser::ParseDirectiveDarwinLsym() {
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = CreateSymbol(Name);
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.lsym' directive");
+  Lexer.Lex();
+
+  const MCExpr *Value;
+  SMLoc StartLoc = Lexer.getLoc();
+  if (ParseExpression(Value))
+    return true;
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.lsym' directive");
+  
+  Lexer.Lex();
+
+  // We don't currently support this directive.
+  //
+  // FIXME: Diagnostic location!
+  (void) Sym;
+  return TokError("directive '.lsym' is unsupported");
+}
+
+/// ParseDirectiveInclude
+///  ::= .include "filename"
+bool AsmParser::ParseDirectiveInclude() {
+  if (Lexer.isNot(AsmToken::String))
+    return TokError("expected string in '.include' directive");
+  
+  std::string Filename = Lexer.getTok().getString();
+  SMLoc IncludeLoc = Lexer.getLoc();
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.include' directive");
+  
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+  
+  // Attempt to switch the lexer to the included file before consuming the end
+  // of statement to avoid losing it when we switch.
+  if (Lexer.EnterIncludeFile(Filename)) {
+    Lexer.PrintMessage(IncludeLoc,
+                       "Could not find include file '" + Filename + "'",
+                       "error");
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveDarwinDumpOrLoad
+///  ::= ( .dump | .load ) "filename"
+bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) {
+  if (Lexer.isNot(AsmToken::String))
+    return TokError("expected string in '.dump' or '.load' directive");
+  
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.dump' or '.load' directive");
+  
+  Lexer.Lex();
+
+  // FIXME: If/when .dump and .load are implemented they will be done in the
+  // the assembly parser and not have any need for an MCStreamer API.
+  if (IsDump)
+    Warning(IDLoc, "ignoring directive .dump for now");
+  else
+    Warning(IDLoc, "ignoring directive .load for now");
+
+  return false;
+}
+
+/// ParseDirectiveIf
+/// ::= .if expression
+bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+  // Consume the identifier that was the .if directive
+  Lexer.Lex();
+
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+  if(TheCondState.Ignore) {
+    EatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (ParseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (Lexer.isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.if' directive");
+    
+    Lexer.Lex();
+
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElseIf
+/// ::= .elseif expression
+bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+                          " an .elseif");
+  TheCondState.TheCond = AsmCond::ElseIfCond;
+
+  // Consume the identifier that was the .elseif directive
+  Lexer.Lex();
+
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+      LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet) {
+    TheCondState.Ignore = true;
+    EatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (ParseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (Lexer.isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.elseif' directive");
+    
+    Lexer.Lex();
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElse
+/// ::= .else
+bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+  // Consume the identifier that was the .else directive
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.else' directive");
+  
+  Lexer.Lex();
+
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+                          ".elseif");
+  TheCondState.TheCond = AsmCond::ElseCond;
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+    LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet)
+    TheCondState.Ignore = true;
+  else
+    TheCondState.Ignore = false;
+
+  return false;
+}
+
+/// ParseDirectiveEndIf
+/// ::= .endif
+bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+  // Consume the identifier that was the .endif directive
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.endif' directive");
+  
+  Lexer.Lex();
+
+  if ((TheCondState.TheCond == AsmCond::NoCond) ||
+      TheCondStack.empty())
+    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
+                        ".else");
+  if (!TheCondStack.empty()) {
+    TheCondState = TheCondStack.back();
+    TheCondStack.pop_back();
+  }
+
+  return false;
+}
+
+/// ParseDirectiveFile
+/// ::= .file [number] string
+bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
+  // FIXME: I'm not sure what this is.
+  int64_t FileNumber = -1;
+  if (Lexer.is(AsmToken::Integer)) {
+    FileNumber = Lexer.getTok().getIntVal();
+    Lexer.Lex();
+    
+    if (FileNumber < 1)
+      return TokError("file number less than one");
+  }
+
+  if (Lexer.isNot(AsmToken::String))
+    return TokError("unexpected token in '.file' directive");
+  
+  StringRef FileName = Lexer.getTok().getString();
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.file' directive");
+
+  // FIXME: Do something with the .file.
+
+  return false;
+}
+
+/// ParseDirectiveLine
+/// ::= .line [number]
+bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::Integer))
+      return TokError("unexpected token in '.line' directive");
+
+    int64_t LineNumber = Lexer.getTok().getIntVal();
+    (void) LineNumber;
+    Lexer.Lex();
+
+    // FIXME: Do something with the .line.
+  }
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.file' directive");
+
+  return false;
+}
+
+
+/// ParseDirectiveLoc
+/// ::= .loc number [number [number]]
+bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+  if (Lexer.isNot(AsmToken::Integer))
+    return TokError("unexpected token in '.loc' directive");
+
+  // FIXME: What are these fields?
+  int64_t FileNumber = Lexer.getTok().getIntVal();
+  (void) FileNumber;
+  // FIXME: Validate file.
+
+  Lexer.Lex();
+  if (Lexer.isNot(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::Integer))
+      return TokError("unexpected token in '.loc' directive");
+
+    int64_t Param2 = Lexer.getTok().getIntVal();
+    (void) Param2;
+    Lexer.Lex();
+
+    if (Lexer.isNot(AsmToken::EndOfStatement)) {
+      if (Lexer.isNot(AsmToken::Integer))
+        return TokError("unexpected token in '.loc' directive");
+
+      int64_t Param3 = Lexer.getTok().getIntVal();
+      (void) Param3;
+      Lexer.Lex();
+
+      // FIXME: Do something with the .loc.
+    }
+  }
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.file' directive");
+
+  return false;
+}
+
diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h
index 333b284eac91..171dfcd4aa9b 100644
--- a/tools/llvm-mc/AsmParser.h
+++ b/tools/llvm-mc/AsmParser.h
@@ -14,89 +14,110 @@
 #ifndef ASMPARSER_H
 #define ASMPARSER_H
 
+#include <vector>
 #include "AsmLexer.h"
+#include "AsmCond.h"
+#include "llvm/MC/MCAsmParser.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/ADT/StringMap.h"
 
 namespace llvm {
-class AsmExpr;
+class AsmCond;
 class MCContext;
+class MCExpr;
 class MCInst;
 class MCStreamer;
+class MCAsmInfo;
 class MCValue;
+class TargetAsmParser;
+class Twine;
 
-class AsmParser {
-public:
-  struct X86Operand;
-
+class AsmParser : public MCAsmParser {
 private:  
   AsmLexer Lexer;
   MCContext &Ctx;
   MCStreamer &Out;
-  
+  TargetAsmParser *TargetParser;
+
+  AsmCond TheCondState;
+  std::vector<AsmCond> TheCondStack;
+
+  // FIXME: Figure out where this should leave, the code is a copy of that which
+  // is also used by TargetLoweringObjectFile.
+  mutable void *SectionUniquingMap;
+
+  /// DirectiveMap - This is a table handlers for directives.  Each handler is
+  /// invoked after the directive identifier is read and is responsible for
+  /// parsing and validating the rest of the directive.  The handler is passed
+  /// in the directive name and the location of the directive keyword.
+  StringMap<bool(AsmParser::*)(StringRef, SMLoc)> DirectiveMap;
 public:
-  AsmParser(SourceMgr &SM, MCContext &ctx, MCStreamer &OutStr)
-    : Lexer(SM), Ctx(ctx), Out(OutStr) {}
-  ~AsmParser() {}
-  
+  AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
+            const MCAsmInfo &_MAI);
+  ~AsmParser();
+
   bool Run();
+
   
+  void AddDirectiveHandler(StringRef Directive,
+                           bool (AsmParser::*Handler)(StringRef, SMLoc)) {
+    DirectiveMap[Directive] = Handler;
+  }
+public:
+  TargetAsmParser &getTargetParser() const { return *TargetParser; }
+  void setTargetParser(TargetAsmParser &P) { TargetParser = &P; }
+
+  /// @name MCAsmParser Interface
+  /// {
+
+  virtual MCAsmLexer &getLexer() { return Lexer; }
+  virtual MCContext &getContext() { return Ctx; }
+  virtual MCStreamer &getStreamer() { return Out; }
+
+  virtual void Warning(SMLoc L, const Twine &Meg);
+  virtual bool Error(SMLoc L, const Twine &Msg);
+
+  virtual bool ParseExpression(const MCExpr *&Res);
+  virtual bool ParseParenExpression(const MCExpr *&Res);
+  virtual bool ParseAbsoluteExpression(int64_t &Res);
+
+  /// }
+
 private:
+  MCSymbol *CreateSymbol(StringRef Name);
+
+  // FIXME: See comment on SectionUniquingMap.
+  const MCSection *getMachOSection(const StringRef &Segment,
+                                   const StringRef &Section,
+                                   unsigned TypeAndAttributes,
+                                   unsigned Reserved2,
+                                   SectionKind Kind) const;
+
   bool ParseStatement();
 
-  void Warning(SMLoc L, const char *Msg);
-  bool Error(SMLoc L, const char *Msg);
   bool TokError(const char *Msg);
   
+  bool ParseConditionalAssemblyDirectives(StringRef Directive,
+                                          SMLoc DirectiveLoc);
   void EatToEndOfStatement();
   
-  bool ParseAssignment(const char *Name, bool IsDotSet);
-
-  /// ParseExpression - Parse a general assembly expression.
-  ///
-  /// @param Res - The resulting expression. The pointer value is null on error.
-  /// @result - False on success.
-  bool ParseExpression(AsmExpr *&Res);
-
-  /// ParseAbsoluteExpression - Parse an expression which must evaluate to an
-  /// absolute value.
-  ///
-  /// @param Res - The value of the absolute expression. The result is undefined
-  /// on error.
-  /// @result - False on success.
-  bool ParseAbsoluteExpression(int64_t &Res);
-
-  /// ParseRelocatableExpression - Parse an expression which must be
-  /// relocatable.
-  ///
-  /// @param Res - The relocatable expression value. The result is undefined on
-  /// error.  
-  /// @result - False on success.
-  bool ParseRelocatableExpression(MCValue &Res);
-
-  /// ParseParenRelocatableExpression - Parse an expression which must be
-  /// relocatable, assuming that an initial '(' has already been consumed.
-  ///
-  /// @param Res - The relocatable expression value. The result is undefined on
-  /// error.  
-  /// @result - False on success.
-  ///
-  /// @see ParseRelocatableExpression, ParseParenExpr.
-  bool ParseParenRelocatableExpression(MCValue &Res);
-
-  bool ParsePrimaryExpr(AsmExpr *&Res);
-  bool ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res);
-  bool ParseParenExpr(AsmExpr *&Res);
-  
-  // X86 specific.
-  bool ParseX86InstOperands(const char *InstName, MCInst &Inst);
-  bool ParseX86Operand(X86Operand &Op);
-  bool ParseX86MemOperand(X86Operand &Op);
-  bool ParseX86Register(X86Operand &Op);
+  bool ParseAssignment(const StringRef &Name);
+
+  bool ParsePrimaryExpr(const MCExpr *&Res);
+  bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res);
+  bool ParseParenExpr(const MCExpr *&Res);
+
+  /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+  /// and set \arg Res to the identifier contents.
+  bool ParseIdentifier(StringRef &Res);
   
   // Directive Parsing.
   bool ParseDirectiveDarwinSection(); // Darwin specific ".section".
-  bool ParseDirectiveSectionSwitch(const char *Section,
-                                   const char *Directives = 0);
+  bool ParseDirectiveSectionSwitch(const char *Segment, const char *Section,
+                                   unsigned TAA = 0, unsigned ImplicitAlign = 0,
+                                   unsigned StubSize = 0);
   bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
   bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
   bool ParseDirectiveFill(); // ".fill"
@@ -109,7 +130,32 @@ private:
   /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
   /// accepts a single symbol (which should be a label or an external).
   bool ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr);
-  
+  bool ParseDirectiveDarwinSymbolDesc(); // Darwin specific ".desc"
+  bool ParseDirectiveDarwinLsym(); // Darwin specific ".lsym"
+
+  bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+  bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill"
+
+  // Darwin specific ".subsections_via_symbols"
+  bool ParseDirectiveDarwinSubsectionsViaSymbols();
+  // Darwin specific .dump and .load
+  bool ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump);
+
+  bool ParseDirectiveAbort(); // ".abort"
+  bool ParseDirectiveInclude(); // ".include"
+
+  bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+  bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+  bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+  bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+
+  bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file"
+  bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line"
+  bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc"
+
+  /// ParseEscapedString - Parse the current token as a string which may include
+  /// escaped characters and return the string contents.
+  bool ParseEscapedString(std::string &Data);
 };
 
 } // end namespace llvm
diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt
index b21a4b1b9189..ce9d63b879ff 100644
--- a/tools/llvm-mc/CMakeLists.txt
+++ b/tools/llvm-mc/CMakeLists.txt
@@ -1,9 +1,7 @@
-set(LLVM_LINK_COMPONENTS support MC)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC)
 
 add_llvm_tool(llvm-mc
   llvm-mc.cpp
-  AsmExpr.cpp
   AsmLexer.cpp
   AsmParser.cpp
-  MC-X86Specific.cpp
   )
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
index 3c327dac1e97..9bfb773076de 100644
--- a/tools/llvm-mc/Makefile
+++ b/tools/llvm-mc/Makefile
@@ -9,9 +9,16 @@
 
 LEVEL = ../..
 TOOLNAME = llvm-mc
-LINK_COMPONENTS := support MC
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
+NO_INSTALL = 1
 
-include $(LEVEL)/Makefile.common
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) MC support
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index b52edd1ed434..329efe92329f 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -12,16 +12,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCAsmLexer.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"  // FIXME.
+#include "llvm/Target/TargetSelect.h"
 #include "AsmParser.h"
 using namespace llvm;
 
@@ -32,10 +41,39 @@ static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"),
                cl::value_desc("filename"));
 
+static cl::opt<bool>
+ShowEncoding("show-encoding", cl::desc("Show instruction encodings"));
+
+static cl::opt<unsigned>
+OutputAsmVariant("output-asm-variant",
+                 cl::desc("Syntax variant to use for output printing"));
+
+enum OutputFileType {
+  OFT_AssemblyFile,
+  OFT_ObjectFile
+};
+static cl::opt<OutputFileType>
+FileType("filetype", cl::init(OFT_AssemblyFile),
+  cl::desc("Choose an output file type:"),
+  cl::values(
+       clEnumValN(OFT_AssemblyFile, "asm",
+                  "Emit an assembly ('.s') file"),
+       clEnumValN(OFT_ObjectFile, "obj",
+                  "Emit a native object ('.o') file"),
+       clEnumValEnd));
+
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
 static cl::list<std::string>
 IncludeDirs("I", cl::desc("Directory of include files"),
             cl::value_desc("directory"), cl::Prefix);
 
+static cl::opt<std::string>
+TripleName("triple", cl::desc("Target triple to assemble for,"
+                          "see -version for available targets"),
+       cl::init(LLVM_HOSTTRIPLE));
+
 enum ActionType {
   AC_AsLex,
   AC_Assemble
@@ -50,6 +88,18 @@ Action(cl::desc("Action to perform:"),
                              "Assemble a .s file (default)"),
                   clEnumValEnd));
 
+static const Target *GetTarget(const char *ProgName) {
+  // Get the target specific parser.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  if (TheTarget)
+    return TheTarget;
+
+  errs() << ProgName << ": error: unable to get target for '" << TripleName
+         << "', see --version and --triple.\n";
+  return 0;
+}
+
 static int AsLexInput(const char *ProgName) {
   std::string ErrorMessage;
   MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
@@ -72,78 +122,103 @@ static int AsLexInput(const char *ProgName) {
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
 
-  AsmLexer Lexer(SrcMgr);
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
+    return 1;
+
+  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  assert(MAI && "Unable to create target asm info!");
+
+  AsmLexer Lexer(SrcMgr, *MAI);
   
   bool Error = false;
   
-  asmtok::TokKind Tok = Lexer.Lex();
-  while (Tok != asmtok::Eof) {
-    switch (Tok) {
+  while (Lexer.Lex().isNot(AsmToken::Eof)) {
+    switch (Lexer.getKind()) {
     default:
       Lexer.PrintMessage(Lexer.getLoc(), "unknown token", "warning");
       Error = true;
       break;
-    case asmtok::Error:
+    case AsmToken::Error:
       Error = true; // error already printed.
       break;
-    case asmtok::Identifier:
-      outs() << "identifier: " << Lexer.getCurStrVal() << '\n';
-      break;
-    case asmtok::Register:
-      outs() << "register: " << Lexer.getCurStrVal() << '\n';
+    case AsmToken::Identifier:
+      outs() << "identifier: " << Lexer.getTok().getString() << '\n';
       break;
-    case asmtok::String:
-      outs() << "string: " << Lexer.getCurStrVal() << '\n';
+    case AsmToken::String:
+      outs() << "string: " << Lexer.getTok().getString() << '\n';
       break;
-    case asmtok::IntVal:
-      outs() << "int: " << Lexer.getCurIntVal() << '\n';
+    case AsmToken::Integer:
+      outs() << "int: " << Lexer.getTok().getString() << '\n';
       break;
 
-    case asmtok::Amp:            outs() << "Amp\n"; break;
-    case asmtok::AmpAmp:         outs() << "AmpAmp\n"; break;
-    case asmtok::Caret:          outs() << "Caret\n"; break;
-    case asmtok::Colon:          outs() << "Colon\n"; break;
-    case asmtok::Comma:          outs() << "Comma\n"; break;
-    case asmtok::Dollar:         outs() << "Dollar\n"; break;
-    case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break;
-    case asmtok::Eof:            outs() << "Eof\n"; break;
-    case asmtok::Equal:          outs() << "Equal\n"; break;
-    case asmtok::EqualEqual:     outs() << "EqualEqual\n"; break;
-    case asmtok::Exclaim:        outs() << "Exclaim\n"; break;
-    case asmtok::ExclaimEqual:   outs() << "ExclaimEqual\n"; break;
-    case asmtok::Greater:        outs() << "Greater\n"; break;
-    case asmtok::GreaterEqual:   outs() << "GreaterEqual\n"; break;
-    case asmtok::GreaterGreater: outs() << "GreaterGreater\n"; break;
-    case asmtok::LParen:         outs() << "LParen\n"; break;
-    case asmtok::Less:           outs() << "Less\n"; break;
-    case asmtok::LessEqual:      outs() << "LessEqual\n"; break;
-    case asmtok::LessGreater:    outs() << "LessGreater\n"; break;
-    case asmtok::LessLess:       outs() << "LessLess\n"; break;
-    case asmtok::Minus:          outs() << "Minus\n"; break;
-    case asmtok::Percent:        outs() << "Percent\n"; break;
-    case asmtok::Pipe:           outs() << "Pipe\n"; break;
-    case asmtok::PipePipe:       outs() << "PipePipe\n"; break;
-    case asmtok::Plus:           outs() << "Plus\n"; break;
-    case asmtok::RParen:         outs() << "RParen\n"; break;
-    case asmtok::Slash:          outs() << "Slash\n"; break;
-    case asmtok::Star:           outs() << "Star\n"; break;
-    case asmtok::Tilde:          outs() << "Tilde\n"; break;
+    case AsmToken::Amp:            outs() << "Amp\n"; break;
+    case AsmToken::AmpAmp:         outs() << "AmpAmp\n"; break;
+    case AsmToken::Caret:          outs() << "Caret\n"; break;
+    case AsmToken::Colon:          outs() << "Colon\n"; break;
+    case AsmToken::Comma:          outs() << "Comma\n"; break;
+    case AsmToken::Dollar:         outs() << "Dollar\n"; break;
+    case AsmToken::EndOfStatement: outs() << "EndOfStatement\n"; break;
+    case AsmToken::Eof:            outs() << "Eof\n"; break;
+    case AsmToken::Equal:          outs() << "Equal\n"; break;
+    case AsmToken::EqualEqual:     outs() << "EqualEqual\n"; break;
+    case AsmToken::Exclaim:        outs() << "Exclaim\n"; break;
+    case AsmToken::ExclaimEqual:   outs() << "ExclaimEqual\n"; break;
+    case AsmToken::Greater:        outs() << "Greater\n"; break;
+    case AsmToken::GreaterEqual:   outs() << "GreaterEqual\n"; break;
+    case AsmToken::GreaterGreater: outs() << "GreaterGreater\n"; break;
+    case AsmToken::LParen:         outs() << "LParen\n"; break;
+    case AsmToken::Less:           outs() << "Less\n"; break;
+    case AsmToken::LessEqual:      outs() << "LessEqual\n"; break;
+    case AsmToken::LessGreater:    outs() << "LessGreater\n"; break;
+    case AsmToken::LessLess:       outs() << "LessLess\n"; break;
+    case AsmToken::Minus:          outs() << "Minus\n"; break;
+    case AsmToken::Percent:        outs() << "Percent\n"; break;
+    case AsmToken::Pipe:           outs() << "Pipe\n"; break;
+    case AsmToken::PipePipe:       outs() << "PipePipe\n"; break;
+    case AsmToken::Plus:           outs() << "Plus\n"; break;
+    case AsmToken::RParen:         outs() << "RParen\n"; break;
+    case AsmToken::Slash:          outs() << "Slash\n"; break;
+    case AsmToken::Star:           outs() << "Star\n"; break;
+    case AsmToken::Tilde:          outs() << "Tilde\n"; break;
     }
-    
-    Tok = Lexer.Lex();
   }
   
   return Error;
 }
 
+static formatted_raw_ostream *GetOutputStream() {
+  if (OutputFilename == "")
+    OutputFilename = "-";
+
+  // Make sure that the Out file gets unlinked from the disk if we get a
+  // SIGINT.
+  if (OutputFilename != "-")
+    sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+
+  std::string Err;
+  raw_fd_ostream *Out = new raw_fd_ostream(OutputFilename.c_str(), Err,
+                                           raw_fd_ostream::F_Binary);
+  if (!Err.empty()) {
+    errs() << Err << '\n';
+    delete Out;
+    return 0;
+  }
+  
+  return new formatted_raw_ostream(*Out, formatted_raw_ostream::DELETE_STREAM);
+}
+
 static int AssembleInput(const char *ProgName) {
-  std::string ErrorMessage;
-  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
-                                                      &ErrorMessage);
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
+    return 1;
+
+  std::string Error;
+  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, &Error);
   if (Buffer == 0) {
     errs() << ProgName << ": ";
-    if (ErrorMessage.size())
-      errs() << ErrorMessage << "\n";
+    if (Error.size())
+      errs() << Error << "\n";
     else
       errs() << "input file didn't read correctly.\n";
     return 1;
@@ -151,7 +226,7 @@ static int AssembleInput(const char *ProgName) {
   
   SourceMgr SrcMgr;
   
-  // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+  // Tell SrcMgr about this buffer, which is what the parser will pick up.
   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
   
   // Record the location of the include directories so that the lexer can find
@@ -159,13 +234,53 @@ static int AssembleInput(const char *ProgName) {
   SrcMgr.setIncludeDirs(IncludeDirs);
   
   MCContext Ctx;
-  OwningPtr<MCStreamer> Str(createAsmStreamer(Ctx, outs()));
+  formatted_raw_ostream *Out = GetOutputStream();
+  if (!Out)
+    return 1;
+
+
+  // FIXME: We shouldn't need to do this (and link in codegen).
+  OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName, ""));
+
+  if (!TM) {
+    errs() << ProgName << ": error: could not create target for triple '"
+           << TripleName << "'.\n";
+    return 1;
+  }
+
+  OwningPtr<MCInstPrinter> IP;
+  OwningPtr<MCCodeEmitter> CE;
+  OwningPtr<MCStreamer> Str;
+
+  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  assert(MAI && "Unable to create target asm info!");
 
-  // FIXME: Target hook & command line option for initial section.
-  Str.get()->SwitchSection(Ctx.GetSection("__TEXT,__text,regular,pure_instructions"));
+  if (FileType == OFT_AssemblyFile) {
+    IP.reset(TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *Out));
+    if (ShowEncoding)
+      CE.reset(TheTarget->createCodeEmitter(*TM));
+    Str.reset(createAsmStreamer(Ctx, *Out, *MAI, IP.get(), CE.get()));
+  } else {
+    assert(FileType == OFT_ObjectFile && "Invalid file type!");
+    CE.reset(TheTarget->createCodeEmitter(*TM));
+    Str.reset(createMachOStreamer(Ctx, *Out, CE.get()));
+  }
 
-  AsmParser Parser(SrcMgr, Ctx, *Str.get());
-  return Parser.Run();
+  AsmParser Parser(SrcMgr, Ctx, *Str.get(), *MAI);
+  OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(Parser));
+  if (!TAP) {
+    errs() << ProgName 
+           << ": error: this target does not support assembly parsing.\n";
+    return 1;
+  }
+
+  Parser.setTargetParser(*TAP.get());
+
+  int Res = Parser.Run();
+  if (Out != &fouts())
+    delete Out;
+
+  return Res;
 }  
 
 
@@ -174,6 +289,14 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
 
   switch (Action) {
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 4e011807a6d4..2baf5323f4cb 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -24,12 +24,12 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
 #include <algorithm>
 #include <cctype>
 #include <cerrno>
 #include <cstring>
-#include <iostream>
 using namespace llvm;
 
 namespace {
@@ -88,7 +88,8 @@ static char TypeCharForSymbol(GlobalValue &GV) {
 
 static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
   // Private linkage and available_externally linkage don't exist in symtab.
-  if (GV.hasPrivateLinkage() || GV.hasAvailableExternallyLinkage()) return;
+  if (GV.hasPrivateLinkage() || GV.hasLinkerPrivateLinkage() ||
+      GV.hasAvailableExternallyLinkage()) return;
   
   const std::string SymbolAddrStr = "        "; // Not used yet...
   char TypeChar = TypeCharForSymbol(GV);
@@ -99,31 +100,31 @@ static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
   if (GV.hasLocalLinkage () && ExternalOnly)
     return;
   if (OutputFormat == posix) {
-    std::cout << GV.getName () << " " << TypeCharForSymbol(GV) << " "
-              << SymbolAddrStr << "\n";
+    outs() << GV.getName () << " " << TypeCharForSymbol(GV) << " "
+           << SymbolAddrStr << "\n";
   } else if (OutputFormat == bsd) {
-    std::cout << SymbolAddrStr << " " << TypeCharForSymbol(GV) << " "
-              << GV.getName () << "\n";
+    outs() << SymbolAddrStr << " " << TypeCharForSymbol(GV) << " "
+           << GV.getName () << "\n";
   } else if (OutputFormat == sysv) {
     std::string PaddedName (GV.getName ());
     while (PaddedName.length () < 20)
       PaddedName += " ";
-    std::cout << PaddedName << "|" << SymbolAddrStr << "|   "
-              << TypeCharForSymbol(GV)
-              << "  |                  |      |     |\n";
+    outs() << PaddedName << "|" << SymbolAddrStr << "|   "
+           << TypeCharForSymbol(GV)
+           << "  |                  |      |     |\n";
   }
 }
 
 static void DumpSymbolNamesFromModule(Module *M) {
   const std::string &Filename = M->getModuleIdentifier ();
   if (OutputFormat == posix && MultipleFiles) {
-    std::cout << Filename << ":\n";
+    outs() << Filename << ":\n";
   } else if (OutputFormat == bsd && MultipleFiles) {
-    std::cout << "\n" << Filename << ":\n";
+    outs() << "\n" << Filename << ":\n";
   } else if (OutputFormat == sysv) {
-    std::cout << "\n\nSymbols from " << Filename << ":\n\n"
-              << "Name                  Value   Class        Type"
-              << "         Size   Line  Section\n";
+    outs() << "\n\nSymbols from " << Filename << ":\n\n"
+           << "Name                  Value   Class        Type"
+           << "         Size   Line  Section\n";
   }
   std::for_each (M->begin(), M->end(), DumpSymbolNameForGlobalValue);
   std::for_each (M->global_begin(), M->global_end(),
@@ -133,7 +134,7 @@ static void DumpSymbolNamesFromModule(Module *M) {
 }
 
 static void DumpSymbolNamesFromFile(std::string &Filename) {
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   std::string ErrorMessage;
   sys::Path aPath(Filename);
   // Note: Currently we do not support reading an archive from stdin.
@@ -144,29 +145,28 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
     if (Buffer.get())
       Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
     
-    if (Result)
+    if (Result) {
       DumpSymbolNamesFromModule(Result);
-    else {
-      std::cerr << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
-      return;
-    }
+      delete Result;
+    } else
+      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
     
   } else if (aPath.isArchive()) {
     std::string ErrMsg;
     Archive* archive = Archive::OpenAndLoad(sys::Path(Filename), Context,
                                             &ErrorMessage);
     if (!archive)
-      std::cerr << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
+      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
     std::vector<Module *> Modules;
     if (archive->getAllModules(Modules, &ErrorMessage)) {
-      std::cerr << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
+      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
       return;
     }
     MultipleFiles = true;
     std::for_each (Modules.begin(), Modules.end(), DumpSymbolNamesFromModule);
   } else {
-    std::cerr << ToolName << ": " << Filename << ": "
-              << "unrecognizable file type\n";
+    errs() << ToolName << ": " << Filename << ": "
+           << "unrecognizable file type\n";
     return;
   }
 }
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 2cff296ba99c..cff139e1251a 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -16,17 +16,20 @@
 #include "llvm/InstrTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
 #include "llvm/System/Signals.h"
 #include <algorithm>
-#include <iostream>
 #include <iomanip>
 #include <map>
 #include <set>
@@ -55,71 +58,202 @@ namespace {
 // PairSecondSort - A sorting predicate to sort by the second element of a pair.
 template<class T>
 struct PairSecondSortReverse
-  : public std::binary_function<std::pair<T, unsigned>,
-                                std::pair<T, unsigned>, bool> {
-  bool operator()(const std::pair<T, unsigned> &LHS,
-                  const std::pair<T, unsigned> &RHS) const {
+  : public std::binary_function<std::pair<T, double>,
+                                std::pair<T, double>, bool> {
+  bool operator()(const std::pair<T, double> &LHS,
+                  const std::pair<T, double> &RHS) const {
     return LHS.second > RHS.second;
   }
 };
 
+static double ignoreMissing(double w) {
+  if (w == ProfileInfo::MissingValue) return 0;
+  return w;
+}
+
 namespace {
   class ProfileAnnotator : public AssemblyAnnotationWriter {
-    std::map<const Function  *, unsigned> &FuncFreqs;
-    std::map<const BasicBlock*, unsigned> &BlockFreqs;
-    std::map<ProfileInfoLoader::Edge, unsigned> &EdgeFreqs;
+    ProfileInfo &PI;
   public:
-    ProfileAnnotator(std::map<const Function  *, unsigned> &FF,
-                     std::map<const BasicBlock*, unsigned> &BF,
-                     std::map<ProfileInfoLoader::Edge, unsigned> &EF)
-      : FuncFreqs(FF), BlockFreqs(BF), EdgeFreqs(EF) {}
+    ProfileAnnotator(ProfileInfo& pi) : PI(pi) {}
 
     virtual void emitFunctionAnnot(const Function *F, raw_ostream &OS) {
-      OS << ";;; %" << F->getName() << " called " << FuncFreqs[F]
-         << " times.\n;;;\n";
+      double w = PI.getExecutionCount(F);
+      if (w != ProfileInfo::MissingValue) {
+        OS << ";;; %" << F->getName() << " called "<<(unsigned)w
+           <<" times.\n;;;\n";
+      }
     }
     virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
                                           raw_ostream &OS) {
-      if (BlockFreqs.empty()) return;
-      std::map<const BasicBlock *, unsigned>::const_iterator I =
-        BlockFreqs.find(BB);
-      if (I != BlockFreqs.end())
-        OS << "\t;;; Basic block executed " << I->second << " times.\n";
-      else
-        OS << "\t;;; Never executed!\n";
+      double w = PI.getExecutionCount(BB);
+      if (w != ProfileInfo::MissingValue) {
+        if (w != 0) {
+          OS << "\t;;; Basic block executed " << (unsigned)w << " times.\n";
+        } else {
+          OS << "\t;;; Never executed!\n";
+        }
+      }
     }
 
     virtual void emitBasicBlockEndAnnot(const BasicBlock *BB, raw_ostream &OS) {
-      if (EdgeFreqs.empty()) return;
-
       // Figure out how many times each successor executed.
-      std::vector<std::pair<const BasicBlock*, unsigned> > SuccCounts;
-      const TerminatorInst *TI = BB->getTerminator();
+      std::vector<std::pair<ProfileInfo::Edge, double> > SuccCounts;
 
-      std::map<ProfileInfoLoader::Edge, unsigned>::iterator I =
-        EdgeFreqs.lower_bound(std::make_pair(const_cast<BasicBlock*>(BB), 0U));
-      for (; I != EdgeFreqs.end() && I->first.first == BB; ++I)
-        if (I->second)
-          SuccCounts.push_back(std::make_pair(TI->getSuccessor(I->first.second),
-                                              I->second));
+      const TerminatorInst *TI = BB->getTerminator();
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+        BasicBlock* Succ = TI->getSuccessor(s);
+        double w = ignoreMissing(PI.getEdgeWeight(std::make_pair(BB, Succ)));
+        if (w != 0)
+          SuccCounts.push_back(std::make_pair(std::make_pair(BB, Succ), w));
+      }
       if (!SuccCounts.empty()) {
         OS << "\t;;; Out-edge counts:";
         for (unsigned i = 0, e = SuccCounts.size(); i != e; ++i)
-          OS << " [" << SuccCounts[i].second << " -> "
-             << SuccCounts[i].first->getName() << "]";
+          OS << " [" << (SuccCounts[i]).second << " -> "
+             << (SuccCounts[i]).first.second->getName() << "]";
         OS << "\n";
       }
     }
   };
 }
 
+namespace {
+  /// ProfileInfoPrinterPass - Helper pass to dump the profile information for
+  /// a module.
+  //
+  // FIXME: This should move elsewhere.
+  class ProfileInfoPrinterPass : public ModulePass {
+    ProfileInfoLoader &PIL;
+  public:
+    static char ID; // Class identification, replacement for typeinfo.
+    explicit ProfileInfoPrinterPass(ProfileInfoLoader &_PIL) 
+      : ModulePass(&ID), PIL(_PIL) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<ProfileInfo>();
+    }
+
+    bool runOnModule(Module &M);
+  };
+}
+
+char ProfileInfoPrinterPass::ID = 0;
+
+bool ProfileInfoPrinterPass::runOnModule(Module &M) {
+  ProfileInfo &PI = getAnalysis<ProfileInfo>();
+  std::map<const Function  *, unsigned> FuncFreqs;
+  std::map<const BasicBlock*, unsigned> BlockFreqs;
+  std::map<ProfileInfo::Edge, unsigned> EdgeFreqs;
+
+  // Output a report. Eventually, there will be multiple reports selectable on
+  // the command line, for now, just keep things simple.
+
+  // Emit the most frequent function table...
+  std::vector<std::pair<Function*, double> > FunctionCounts;
+  std::vector<std::pair<BasicBlock*, double> > Counts;
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+    if (FI->isDeclaration()) continue;
+    double w = ignoreMissing(PI.getExecutionCount(FI));
+    FunctionCounts.push_back(std::make_pair(FI, w));
+    for (Function::iterator BB = FI->begin(), BBE = FI->end(); 
+         BB != BBE; ++BB) {
+      double w = ignoreMissing(PI.getExecutionCount(BB));
+      Counts.push_back(std::make_pair(BB, w));
+    }
+  }
+
+  // Sort by the frequency, backwards.
+  sort(FunctionCounts.begin(), FunctionCounts.end(),
+            PairSecondSortReverse<Function*>());
+
+  double TotalExecutions = 0;
+  for (unsigned i = 0, e = FunctionCounts.size(); i != e; ++i)
+    TotalExecutions += FunctionCounts[i].second;
+
+  outs() << "===" << std::string(73, '-') << "===\n"
+         << "LLVM profiling output for execution";
+  if (PIL.getNumExecutions() != 1) outs() << "s";
+  outs() << ":\n";
+
+  for (unsigned i = 0, e = PIL.getNumExecutions(); i != e; ++i) {
+    outs() << "  ";
+    if (e != 1) outs() << i+1 << ". ";
+    outs() << PIL.getExecution(i) << "\n";
+  }
+
+  outs() << "\n===" << std::string(73, '-') << "===\n";
+  outs() << "Function execution frequencies:\n\n";
+
+  // Print out the function frequencies...
+  outs() << " ##   Frequency\n";
+  for (unsigned i = 0, e = FunctionCounts.size(); i != e; ++i) {
+    if (FunctionCounts[i].second == 0) {
+      outs() << "\n  NOTE: " << e-i << " function" 
+        << (e-i-1 ? "s were" : " was") << " never executed!\n";
+      break;
+    }
+
+    outs() << format("%3d", i+1) << ". "
+      << format("%5.2g", FunctionCounts[i].second) << "/"
+      << format("%g", TotalExecutions) << " "
+      << FunctionCounts[i].first->getNameStr() << "\n";
+  }
+
+  std::set<Function*> FunctionsToPrint;
+
+  TotalExecutions = 0;
+  for (unsigned i = 0, e = Counts.size(); i != e; ++i)
+    TotalExecutions += Counts[i].second;
+  
+  // Sort by the frequency, backwards.
+  sort(Counts.begin(), Counts.end(),
+       PairSecondSortReverse<BasicBlock*>());
+  
+  outs() << "\n===" << std::string(73, '-') << "===\n";
+  outs() << "Top 20 most frequently executed basic blocks:\n\n";
+  
+  // Print out the function frequencies...
+  outs() <<" ##      %% \tFrequency\n";
+  unsigned BlocksToPrint = Counts.size();
+  if (BlocksToPrint > 20) BlocksToPrint = 20;
+  for (unsigned i = 0; i != BlocksToPrint; ++i) {
+    if (Counts[i].second == 0) break;
+    Function *F = Counts[i].first->getParent();
+    outs() << format("%3d", i+1) << ". " 
+      << format("%5g", Counts[i].second/(double)TotalExecutions*100) << "% "
+      << format("%5.0f", Counts[i].second) << "/"
+      << format("%g", TotalExecutions) << "\t"
+      << F->getNameStr() << "() - "
+       << Counts[i].first->getNameStr() << "\n";
+    FunctionsToPrint.insert(F);
+  }
+
+  if (PrintAnnotatedLLVM || PrintAllCode) {
+    outs() << "\n===" << std::string(73, '-') << "===\n";
+    outs() << "Annotated LLVM code for the module:\n\n";
+  
+    ProfileAnnotator PA(PI);
+
+    if (FunctionsToPrint.empty() || PrintAllCode)
+      M.print(outs(), &PA);
+    else
+      // Print just a subset of the functions.
+      for (std::set<Function*>::iterator I = FunctionsToPrint.begin(),
+             E = FunctionsToPrint.end(); I != E; ++I)
+        (*I)->print(outs(), &PA);
+  }
+
+  return false;
+}
 
 int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
 
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   try {
     cl::ParseCommandLineOptions(argc, argv, "llvm profile dump decoder\n");
@@ -133,128 +267,29 @@ int main(int argc, char **argv) {
       delete Buffer;
     }
     if (M == 0) {
-      std::cerr << argv[0] << ": " << BitcodeFile << ": " 
+      errs() << argv[0] << ": " << BitcodeFile << ": "
         << ErrorMessage << "\n";
       return 1;
     }
 
-    // Read the profiling information
-    ProfileInfoLoader PI(argv[0], ProfileDataFile, *M);
-
-    std::map<const Function  *, unsigned> FuncFreqs;
-    std::map<const BasicBlock*, unsigned> BlockFreqs;
-    std::map<ProfileInfoLoader::Edge, unsigned> EdgeFreqs;
-
-    // Output a report. Eventually, there will be multiple reports selectable on
-    // the command line, for now, just keep things simple.
-
-    // Emit the most frequent function table...
-    std::vector<std::pair<Function*, unsigned> > FunctionCounts;
-    PI.getFunctionCounts(FunctionCounts);
-    FuncFreqs.insert(FunctionCounts.begin(), FunctionCounts.end());
-
-    // Sort by the frequency, backwards.
-    sort(FunctionCounts.begin(), FunctionCounts.end(),
-              PairSecondSortReverse<Function*>());
-
-    uint64_t TotalExecutions = 0;
-    for (unsigned i = 0, e = FunctionCounts.size(); i != e; ++i)
-      TotalExecutions += FunctionCounts[i].second;
-
-    std::cout << "===" << std::string(73, '-') << "===\n"
-              << "LLVM profiling output for execution";
-    if (PI.getNumExecutions() != 1) std::cout << "s";
-    std::cout << ":\n";
-
-    for (unsigned i = 0, e = PI.getNumExecutions(); i != e; ++i) {
-      std::cout << "  ";
-      if (e != 1) std::cout << i+1 << ". ";
-      std::cout << PI.getExecution(i) << "\n";
-    }
-
-    std::cout << "\n===" << std::string(73, '-') << "===\n";
-    std::cout << "Function execution frequencies:\n\n";
-
-    // Print out the function frequencies...
-    std::cout << " ##   Frequency\n";
-    for (unsigned i = 0, e = FunctionCounts.size(); i != e; ++i) {
-      if (FunctionCounts[i].second == 0) {
-        std::cout << "\n  NOTE: " << e-i << " function" <<
-               (e-i-1 ? "s were" : " was") << " never executed!\n";
-        break;
-      }
-
-      std::cout << std::setw(3) << i+1 << ". " 
-        << std::setw(5) << FunctionCounts[i].second << "/"
-        << TotalExecutions << " "
-        << FunctionCounts[i].first->getName().c_str() << "\n";
-    }
-
-    std::set<Function*> FunctionsToPrint;
-
-    // If we have block count information, print out the LLVM module with
-    // frequency annotations.
-    if (PI.hasAccurateBlockCounts()) {
-      std::vector<std::pair<BasicBlock*, unsigned> > Counts;
-      PI.getBlockCounts(Counts);
-
-      TotalExecutions = 0;
-      for (unsigned i = 0, e = Counts.size(); i != e; ++i)
-        TotalExecutions += Counts[i].second;
-
-      // Sort by the frequency, backwards.
-      sort(Counts.begin(), Counts.end(),
-                PairSecondSortReverse<BasicBlock*>());
-
-      std::cout << "\n===" << std::string(73, '-') << "===\n";
-      std::cout << "Top 20 most frequently executed basic blocks:\n\n";
-
-      // Print out the function frequencies...
-      std::cout <<" ##      %% \tFrequency\n";
-      unsigned BlocksToPrint = Counts.size();
-      if (BlocksToPrint > 20) BlocksToPrint = 20;
-      for (unsigned i = 0; i != BlocksToPrint; ++i) {
-        if (Counts[i].second == 0) break;
-        Function *F = Counts[i].first->getParent();
-        std::cout << std::setw(3) << i+1 << ". " 
-          << std::setw(5) << std::setprecision(2) 
-          << Counts[i].second/(double)TotalExecutions*100 << "% "
-          << std::setw(5) << Counts[i].second << "/"
-          << TotalExecutions << "\t"
-          << F->getName().c_str() << "() - "
-          << Counts[i].first->getName().c_str() << "\n";
-        FunctionsToPrint.insert(F);
-      }
-
-      BlockFreqs.insert(Counts.begin(), Counts.end());
-    }
-
-    if (PI.hasAccurateEdgeCounts()) {
-      std::vector<std::pair<ProfileInfoLoader::Edge, unsigned> > Counts;
-      PI.getEdgeCounts(Counts);
-      EdgeFreqs.insert(Counts.begin(), Counts.end());
-    }
+    // Read the profiling information. This is redundant since we load it again
+    // using the standard profile info provider pass, but for now this gives us
+    // access to additional information not exposed via the ProfileInfo
+    // interface.
+    ProfileInfoLoader PIL(argv[0], ProfileDataFile, *M);
 
-    if (PrintAnnotatedLLVM || PrintAllCode) {
-      std::cout << "\n===" << std::string(73, '-') << "===\n";
-      std::cout << "Annotated LLVM code for the module:\n\n";
-
-      ProfileAnnotator PA(FuncFreqs, BlockFreqs, EdgeFreqs);
-
-      if (FunctionsToPrint.empty() || PrintAllCode)
-        M->print(std::cout, &PA);
-      else
-        // Print just a subset of the functions.
-        for (std::set<Function*>::iterator I = FunctionsToPrint.begin(),
-               E = FunctionsToPrint.end(); I != E; ++I)
-          (*I)->print(std::cout, &PA);
-    }
+    // Run the printer pass.
+    PassManager PassMgr;
+    PassMgr.add(createProfileLoaderPass(ProfileDataFile));
+    PassMgr.add(new ProfileInfoPrinterPass(PIL));
+    PassMgr.run(*M);
 
     return 0;
   } catch (const std::string& msg) {
-    std::cerr << argv[0] << ": " << msg << "\n";
+    errs() << argv[0] << ": " << msg << "\n";
   } catch (...) {
-    std::cerr << argv[0] << ": Unexpected unknown exception occurred.\n";
+    errs() << argv[0] << ": Unexpected unknown exception occurred.\n";
   }
+  
   return 1;
 }
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index d9bcc48eb13e..dffe3ada5f10 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
 #include <iostream>
 #include <iomanip>
@@ -48,7 +49,7 @@ int main(int argc, char **argv) {
   llvm::sys::PrintStackTraceOnErrorSignal();
   llvm::PrettyStackTraceProgram X(argc, argv);
 
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 
   // Have the command line options parsed and handle things
@@ -87,13 +88,13 @@ int main(int argc, char **argv) {
       printSymbolTable(TheArchive);
 
   } catch (const char* msg) {
-    std::cerr << argv[0] << ": " << msg << "\n\n";
+    errs() << argv[0] << ": " << msg << "\n\n";
     exitCode = 1;
   } catch (const std::string& msg) {
-    std::cerr << argv[0] << ": " << msg << "\n";
+    errs() << argv[0] << ": " << msg << "\n";
     exitCode = 2;
   } catch (...) {
-    std::cerr << argv[0] << ": An unexpected unknown exception occurred.\n";
+    errs() << argv[0] << ": An unexpected unknown exception occurred.\n";
     exitCode = 3;
   }
   return exitCode;
diff --git a/tools/llvm-stub/llvm-stub.c b/tools/llvm-stub/llvm-stub.c
index e5624a9c9b09..f2e478e69583 100644
--- a/tools/llvm-stub/llvm-stub.c
+++ b/tools/llvm-stub/llvm-stub.c
@@ -61,11 +61,10 @@ int main(int argc, char** argv) {
   Args[1] = strcat(strcpy((char*)malloc(strlen(argv[0])+4), argv[0]), ".bc");
 
   /* The rest of the args are as before. */
-  memcpy(Args+2, argv+1, sizeof(char*)*argc);
+  memcpy((char **)Args+2, argv+1, sizeof(char*)*argc);
 
   /* Run the JIT. */
-  execvp(Interp, (char *const*)Args);
-
+  execvp(Interp, (char **)Args);
   /* if _execv returns, the JIT could not be started. */
   fprintf(stderr, "Could not execute the LLVM JIT.  Either add 'lli' to your"
           " path, or set the\ninterpreter you want to use in the LLVMINTERP "
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index b43c3e3525c1..fad2ccc77424 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -97,6 +97,11 @@ configuration libraries:
   the ``-o`` option. The ``--save-temps=cwd`` and ``--save-temps`` switches are
   both synonyms for the default behaviour.
 
+* ``--temp-dir DIRECTORY`` - Store temporary files in the given directory. This
+  directory is deleted on exit unless ``--save-temps`` is specified. If
+  ``--save-temps=obj`` is also specified, ``--temp-dir`` is given the
+  precedence.
+
 * ``--check-graph`` - Check the compilation for common errors like mismatched
   output/input language names, multiple default edges and cycles. Because of
   plugins, these checks can't be performed at compile-time. Exit with code zero
@@ -347,6 +352,12 @@ separate option groups syntactically.
      3))``. Only list options can have this attribute; you can, however, use
      the ``one_or_more`` and ``zero_or_one`` properties.
 
+   - ``init`` - this option has a default value, either a string (if it is a
+     parameter), or a boolean (if it is a switch; boolean constants are called
+     ``true`` and ``false``). List options can't have this attribute. Usage
+     examples: ``(switch_option "foo", (init true))``; ``(prefix_option "bar",
+     (init "baz"))``.
+
    - ``extern`` - this option is defined in some other plugin, see below.
 
 External options
@@ -362,7 +373,8 @@ for. Example::
      (switch_option "E", (extern))
      ...
 
-See also the section on plugin `priorities`__.
+If an external option has additional attributes besides 'extern', they are
+ignored. See also the section on plugin `priorities`__.
 
 __ priorities_
 
@@ -446,17 +458,27 @@ use TableGen inheritance instead.
   - ``empty`` - The opposite of ``not_empty``. Equivalent to ``(not (not_empty
     X))``. Provided for convenience.
 
+  - ``single_input_file`` - Returns true if there was only one input file
+    provided on the command-line. Used without arguments:
+    ``(single_input_file)``.
+
+  - ``multiple_input_files`` - Equivalent to ``(not (single_input_file))`` (the
+    case of zero input files is considered an error).
+
   - ``default`` - Always evaluates to true. Should always be the last
     test in the ``case`` expression.
 
-  - ``and`` - A standard logical combinator that returns true iff all
-    of its arguments return true. Used like this: ``(and (test1),
-    (test2), ... (testN))``. Nesting of ``and`` and ``or`` is allowed,
-    but not encouraged.
+  - ``and`` - A standard binary logical combinator that returns true iff all of
+    its arguments return true. Used like this: ``(and (test1), (test2),
+    ... (testN))``. Nesting of ``and`` and ``or`` is allowed, but not
+    encouraged.
+
+  - ``or`` - A binary logical combinator that returns true iff any of its
+    arguments returns true. Example: ``(or (test1), (test2), ... (testN))``.
+
+  - ``not`` - Standard unary logical combinator that negates its
+    argument. Example: ``(not (or (test1), (test2), ... (testN)))``.
 
-  - ``or`` - Another logical combinator that returns true only if any
-    one of its arguments returns true. Example: ``(or (test1),
-    (test2), ... (testN))``.
 
 
 Writing a tool description
@@ -487,8 +509,8 @@ The complete list of all currently implemented tool properties follows.
   - ``in_language`` - input language name. Can be either a string or a
     list, in case the tool supports multiple input languages.
 
-  - ``out_language`` - output language name. Tools are not allowed to
-    have multiple output languages.
+  - ``out_language`` - output language name. Multiple output languages are not
+    allowed.
 
   - ``output_suffix`` - output file suffix. Can also be changed
     dynamically, see documentation on actions.
diff --git a/tools/llvmc/example/Hello/Hello.cpp b/tools/llvmc/example/Hello/Hello.cpp
index 23a13a57c2b8..9c96bd0a416b 100644
--- a/tools/llvmc/example/Hello/Hello.cpp
+++ b/tools/llvmc/example/Hello/Hello.cpp
@@ -13,13 +13,12 @@
 
 #include "llvm/CompilerDriver/CompilationGraph.h"
 #include "llvm/CompilerDriver/Plugin.h"
-
-#include <iostream>
+#include "llvm/Support/raw_ostream.h"
 
 namespace {
 struct MyPlugin : public llvmc::BasePlugin {
   void PopulateLanguageMap(llvmc::LanguageMap&) const
-  { std::cout << "Hello!\n"; }
+  { outs() << "Hello!\n"; }
 
   void PopulateCompilationGraph(llvmc::CompilationGraph&) const
   {}
diff --git a/tools/llvmc/example/mcc16/driver/Main.cpp b/tools/llvmc/example/mcc16/driver/Main.cpp
index b1f5b6798ae6..f42e17f64115 100644
--- a/tools/llvmc/example/mcc16/driver/Main.cpp
+++ b/tools/llvmc/example/mcc16/driver/Main.cpp
@@ -7,8 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  Just include CompilerDriver/Main.inc.
+//  Usually this file just includes CompilerDriver/Main.inc, but here we apply
+//  some trickery to make the built-in '-save-temps' option hidden and enable
+//  '--temp-dir' by default.
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CompilerDriver/Main.inc"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/ForceLinkage.h"
+#include "llvm/System/Path.h"
+
+namespace llvmc {
+  int Main(int argc, char** argv);
+}
+
+int main(int argc, char** argv) {
+
+  // HACK
+  SaveTemps.setHiddenFlag(llvm::cl::Hidden);
+  TempDirname = "tmp-objs";
+
+  // Remove the temp dir if already exists.
+  llvm::sys::Path tempDir;
+  tempDir = TempDirname;
+  tempDir.eraseFromDisk(true);
+
+  llvmc::ForceLinkage();
+  return llvmc::Main(argc, argv);
+}
diff --git a/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td b/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
index de85fa9d59d4..3d25ab612a0c 100644
--- a/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
+++ b/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
@@ -55,7 +55,7 @@ def llvm_ld_lto : Tool<[
  (in_language "llvm-bitcode"),
  (out_language "llvm-bitcode"),
  (output_suffix "bc"),
- (cmd_line "$CALL(GetBinDir)llvm-ld -link-as-library $INFILE -o $OUTFILE"),
+ (cmd_line "$CALL(GetBinDir)llvm-ld -L $CALL(GetStdLibsDir) -l std $INFILE -b $OUTFILE"),
  (actions (case
           (switch_on "g"), (append_cmd "-disable-opt"),
           (not_empty "Wo,"), (unpack_values "Wo,"))),
@@ -66,7 +66,7 @@ def llc : Tool<[
  (in_language "llvm-bitcode"),
  (out_language "assembler"),
  (output_suffix "s"),
- (cmd_line "$CALL(GetBinDir)llc -march=pic16 -f $INFILE -o $OUTFILE"),
+ (cmd_line "$CALL(GetBinDir)llc -march=pic16 -disable-jump-tables -f $INFILE -o $OUTFILE"),
  (actions (case
           (switch_on "S"), (stop_compilation),
           (not_empty "Wllc,"), (unpack_values "Wllc,"),
@@ -87,7 +87,7 @@ def mplink : Tool<[
  (in_language "object-code"),
  (out_language "executable"),
  (output_suffix "out"),
- (cmd_line "$CALL(GetBinDir)mplink.exe /k $CALL(GetStdLinkerScriptsDir) /l $CALL(GetStdLibsDir) 16f1937.lkr intrinsics.lib std.lib $INFILE -o $OUTFILE"),
+ (cmd_line "$CALL(GetBinDir)mplink.exe -k $CALL(GetStdLinkerScriptsDir) -l $CALL(GetStdLibsDir) 16f1937_g.lkr intrinsics.lib devices.lib $INFILE -o $OUTFILE"),
  (actions (case
           (not_empty "Wl,"), (unpack_values "Wl,"))),
  (join)
diff --git a/tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp b/tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp
index 21a25b3f734b..f8492ed45d8e 100644
--- a/tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp
+++ b/tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp
@@ -10,13 +10,17 @@ namespace llvmc {
 }
 
 // Returns the platform specific directory separator via #ifdefs.
-static std::string GetDirSeparator(void) {
+static std::string GetDirSeparator() {
+#ifdef _WIN32
+  return "\\";
+#else
   return "/";
+#endif
 }
 
 namespace hooks {
 // Get the dir where c16 executables reside.
-std::string GetBinDir (void) {
+std::string GetBinDir() {
   // Construct a Path object from the program name.  
   void *P = (void*) (intptr_t) GetBinDir;
   sys::Path ProgramFullPath 
@@ -30,7 +34,7 @@ std::string GetBinDir (void) {
 }
 
 // Get the Top-level Installation dir for c16.
-std::string GetInstallDir (void) {
+std::string GetInstallDir() {
   sys::Path BinDirPath = sys::Path(GetBinDir());
 
   // Go one more level up to get the install dir.
@@ -40,22 +44,22 @@ std::string GetInstallDir (void) {
 }
 
 // Get the dir where the c16 header files reside.
-std::string GetStdHeadersDir (void) {
+std::string GetStdHeadersDir() {
   return GetInstallDir() + "include";
 }
 
 // Get the dir where the assembler header files reside.
-std::string GetStdAsmHeadersDir (void) {
+std::string GetStdAsmHeadersDir() {
   return GetInstallDir() + "inc";
 }
 
 // Get the dir where the linker scripts reside.
-std::string GetStdLinkerScriptsDir (void) {
+std::string GetStdLinkerScriptsDir() {
   return GetInstallDir() + "lkr";
 }
 
 // Get the dir where startup code, intrinsics and lib reside.
-std::string GetStdLibsDir (void) {
+std::string GetStdLibsDir() {
   return GetInstallDir() + "lib";
 }
 }
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/plugins/Base/Base.td.in
index 757078a2366d..be325a004104 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/plugins/Base/Base.td.in
@@ -1,4 +1,4 @@
-//===- Base.td - LLVMC2 toolchain descriptions -------------*- tablegen -*-===//
+//===- Base.td - LLVMC toolchain descriptions --------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains compilation graph description used by llvmc2.
+// This file contains compilation graph description used by llvmc.
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,10 +32,17 @@ def OptList : OptionList<[
     (help "Enable threads")),
  (parameter_option "linker",
     (help "Choose linker (possible values: gcc, g++)")),
+ (parameter_option "MF",
+    (help "Specify a file to write dependencies to"), (hidden)),
+ (parameter_option "MT",
+    (help "Change the name of the rule emitted by dependency generation"),
+    (hidden)),
  (parameter_list_option "include",
     (help "Include the named file prior to preprocessing")),
  (prefix_list_option "I",
     (help "Add a directory to include path")),
+ (prefix_list_option "D",
+    (help "Define a macro")),
  (prefix_list_option "Wa,",
     (help "Pass options to assembler")),
  (prefix_list_option "Wllc,",
@@ -70,19 +77,25 @@ class llvm_gcc_based <string cmd_prefix, string in_lang, string E_ext> : Tool<
               !strconcat(cmd_prefix, " -c $INFILE -o $OUTFILE -emit-llvm"))),
  (actions
      (case
+         (and (multiple_input_files), (or (switch_on "S"), (switch_on "c"))),
+              (error "cannot specify -o with -c or -S with multiple files"),
          (switch_on "E"), [(stop_compilation), (output_suffix E_ext)],
          (and (switch_on "emit-llvm"), (switch_on "S")),
               [(output_suffix "ll"), (stop_compilation)],
          (and (switch_on "emit-llvm"), (switch_on "c")), (stop_compilation),
          (switch_on "fsyntax-only"), (stop_compilation),
          (not_empty "include"), (forward "include"),
-         (not_empty "I"), (forward "I"))),
+         (not_empty "I"), (forward "I"),
+         (not_empty "D"), (forward "D"),
+         (not_empty "MF"), (forward "MF"),
+         (not_empty "MT"), (forward "MT"))),
  (sink)
 ]>;
 
 def llvm_gcc_c : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x c", "c", "i">;
 def llvm_gcc_cpp : llvm_gcc_based<"@LLVMGXXCOMMAND@ -x c++", "c++", "i">;
-def llvm_gcc_m : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c", "objective-c", "mi">;
+def llvm_gcc_m : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c",
+                                                  "objective-c", "mi">;
 def llvm_gcc_mxx : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c++",
                                   "objective-c++", "mi">;
 
@@ -98,7 +111,8 @@ def llvm_as : Tool<
 [(in_language "llvm-assembler"),
  (out_language "llvm-bitcode"),
  (output_suffix "bc"),
- (cmd_line "llvm-as $INFILE -o $OUTFILE")
+ (cmd_line "llvm-as $INFILE -o $OUTFILE"),
+ (actions (case (switch_on "emit-llvm"), (stop_compilation)))
 ]>;
 
 def llvm_gcc_assembler : Tool<
@@ -112,7 +126,7 @@ def llvm_gcc_assembler : Tool<
 ]>;
 
 def llc : Tool<
-[(in_language "llvm-bitcode"),
+[(in_language ["llvm-bitcode", "llvm-assembler"]),
  (out_language "assembler"),
  (output_suffix "s"),
  (cmd_line "llc -f $INFILE -o $OUTFILE"),
@@ -132,7 +146,7 @@ class llvm_gcc_based_linker <string cmd_prefix> : Tool<
           (switch_on "pthread"), (append_cmd "-lpthread"),
           (not_empty "L"), (forward "L"),
           (not_empty "l"), (forward "l"),
-          (not_empty "Wl,"), (unpack_values "Wl,")))
+          (not_empty "Wl,"), (forward "Wl,")))
 ]>;
 
 // Default linker
@@ -165,7 +179,7 @@ def CompilationGraph : CompilationGraph<[
     Edge<"root", "llvm_gcc_cpp">,
     Edge<"root", "llvm_gcc_m">,
     Edge<"root", "llvm_gcc_mxx">,
-    Edge<"root", "llvm_as">,
+    Edge<"root", "llc">,
 
     Edge<"llvm_gcc_c", "llc">,
     Edge<"llvm_gcc_cpp", "llc">,
@@ -173,6 +187,8 @@ def CompilationGraph : CompilationGraph<[
     Edge<"llvm_gcc_mxx", "llc">,
     Edge<"llvm_as", "llc">,
 
+    OptionalEdge<"root", "llvm_as",
+                         (case (switch_on "emit-llvm"), (inc_weight))>,
     OptionalEdge<"llvm_gcc_c", "opt", (case (switch_on "opt"), (inc_weight))>,
     OptionalEdge<"llvm_gcc_cpp", "opt", (case (switch_on "opt"), (inc_weight))>,
     OptionalEdge<"llvm_gcc_m", "opt", (case (switch_on "opt"), (inc_weight))>,
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 1a21132ef2f5..c6217214b408 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -30,26 +30,25 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/FileWriters.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Host.h"
+#include "llvm/System/Program.h"
 #include "llvm/System/Signals.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Config/config.h"
-
-
 #include <cstdlib>
-#include <fstream>
 #include <unistd.h>
 #include <fcntl.h>
 
@@ -75,11 +74,10 @@ LTOCodeGenerator::LTOCodeGenerator()
       _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL),
       _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
       _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
-      _nativeObjectFile(NULL), _gccPath(NULL), _assemblerPath(NULL)
+      _nativeObjectFile(NULL), _assemblerPath(NULL)
 {
-  InitializeAllTargets();
-  InitializeAllAsmPrinters();
-
+    InitializeAllTargets();
+    InitializeAllAsmPrinters();
 }
 
 LTOCodeGenerator::~LTOCodeGenerator()
@@ -126,13 +124,6 @@ bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model,
     return true;
 }
 
-void LTOCodeGenerator::setGccPath(const char* path)
-{
-    if ( _gccPath )
-        delete _gccPath;
-    _gccPath = new sys::Path(path);
-}
-
 void LTOCodeGenerator::setAssemblerPath(const char* path)
 {
     if ( _assemblerPath )
@@ -146,31 +137,34 @@ void LTOCodeGenerator::addMustPreserveSymbol(const char* sym)
 }
 
 
-bool LTOCodeGenerator::writeMergedModules(const char* path, std::string& errMsg)
-{
-    if ( this->determineTarget(errMsg) ) 
-        return true;
+bool LTOCodeGenerator::writeMergedModules(const char *path,
+                                          std::string &errMsg) {
+  if (determineTarget(errMsg))
+    return true;
 
-    // mark which symbols can not be internalized 
-    this->applyScopeRestrictions();
+  // mark which symbols can not be internalized 
+  applyScopeRestrictions();
 
-    // create output file
-    std::ofstream out(path, std::ios_base::out|std::ios::trunc|std::ios::binary);
-    if ( out.fail() ) {
-        errMsg = "could not open bitcode file for writing: ";
-        errMsg += path;
-        return true;
-    }
-    
-    // write bitcode to it
-    WriteBitcodeToFile(_linker.getModule(), out);
-    if ( out.fail() ) {
-        errMsg = "could not write bitcode file: ";
-        errMsg += path;
-        return true;
-    }
+  // create output file
+  std::string ErrInfo;
+  raw_fd_ostream Out(path, ErrInfo,
+                     raw_fd_ostream::F_Binary);
+  if (!ErrInfo.empty()) {
+    errMsg = "could not open bitcode file for writing: ";
+    errMsg += path;
+    return true;
+  }
     
-    return false;
+  // write bitcode to it
+  WriteBitcodeToFile(_linker.getModule(), Out);
+  
+  if (Out.has_error()) {
+    errMsg = "could not write bitcode file: ";
+    errMsg += path;
+    return true;
+  }
+  
+  return false;
 }
 
 
@@ -185,7 +179,8 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
     // generate assembly code
     bool genResult = false;
     {
-      raw_fd_ostream asmFile(uniqueAsmPath.c_str(), false, errMsg);
+      raw_fd_ostream asmFD(uniqueAsmPath.c_str(), errMsg);
+      formatted_raw_ostream asmFile(asmFD);
       if (!errMsg.empty())
         return NULL;
       genResult = this->generateAssemblyCode(asmFile, errMsg);
@@ -206,9 +201,8 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
     sys::RemoveFileOnSignal(uniqueObjPath);
 
     // assemble the assembly code
-    const std::string& uniqueObjStr = uniqueObjPath.toString();
-    bool asmResult = this->assemble(uniqueAsmPath.toString(), 
-                                                        uniqueObjStr, errMsg);
+    const std::string& uniqueObjStr = uniqueObjPath.str();
+    bool asmResult = this->assemble(uniqueAsmPath.str(), uniqueObjStr, errMsg);
     if ( !asmResult ) {
         // remove old buffer if compile() called twice
         delete _nativeObjectFile;
@@ -237,9 +231,6 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
     if ( _assemblerPath ) {
         tool = *_assemblerPath;
         needsCompilerOptions = false;
-    }
-    else if ( _gccPath ) {
-        tool = *_gccPath;
     } else {
         // find compiler driver
         tool = sys::Program::FindProgramByName("gcc");
@@ -324,11 +315,12 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
 bool LTOCodeGenerator::determineTarget(std::string& errMsg)
 {
     if ( _target == NULL ) {
+        std::string Triple = _linker.getModule()->getTargetTriple();
+        if (Triple.empty())
+          Triple = sys::getHostTriple();
+
         // create target machine from info for merged modules
-        Module* mergedModule = _linker.getModule();
-        const TargetMachineRegistry::entry* march = 
-          TargetMachineRegistry::getClosestStaticTargetForModule(
-                                                       *mergedModule, errMsg);
+        const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
         if ( march == NULL )
             return true;
 
@@ -347,9 +339,8 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
         }
 
         // construct LTModule, hand over ownership of module and target
-        std::string FeatureStr =
-          getFeatureString(_linker.getModule()->getTargetTriple().c_str());
-        _target = march->CtorFn(*mergedModule, FeatureStr.c_str());
+        std::string FeatureStr = getFeatureString(Triple.c_str());
+        _target = march->createTargetMachine(Triple, FeatureStr);
     }
     return false;
 }
@@ -366,19 +357,19 @@ void LTOCodeGenerator::applyScopeRestrictions()
         // mark which symbols can not be internalized 
         if ( !_mustPreserveSymbols.empty() ) {
             Mangler mangler(*mergedModule, 
-                                _target->getTargetAsmInfo()->getGlobalPrefix());
+                                _target->getMCAsmInfo()->getGlobalPrefix());
             std::vector<const char*> mustPreserveList;
             for (Module::iterator f = mergedModule->begin(), 
                                         e = mergedModule->end(); f != e; ++f) {
                 if ( !f->isDeclaration() 
-                  && _mustPreserveSymbols.count(mangler.getValueName(f)) )
-                    mustPreserveList.push_back(::strdup(f->getName().c_str()));
+                  && _mustPreserveSymbols.count(mangler.getMangledName(f)) )
+                  mustPreserveList.push_back(::strdup(f->getNameStr().c_str()));
             }
             for (Module::global_iterator v = mergedModule->global_begin(), 
                                  e = mergedModule->global_end(); v !=  e; ++v) {
                 if ( !v->isDeclaration()
-                  && _mustPreserveSymbols.count(mangler.getValueName(v)) )
-                    mustPreserveList.push_back(::strdup(v->getName().c_str()));
+                  && _mustPreserveSymbols.count(mangler.getMangledName(v)) )
+                  mustPreserveList.push_back(::strdup(v->getNameStr().c_str()));
             }
             passes.add(createInternalizePass(mustPreserveList));
         }
@@ -390,10 +381,10 @@ void LTOCodeGenerator::applyScopeRestrictions()
 }
 
 /// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
+bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
                                             std::string& errMsg)
 {
-    if (  this->determineTarget(errMsg) ) 
+    if ( this->determineTarget(errMsg) ) 
         return true;
 
     // mark which symbols can not be internalized 
@@ -401,9 +392,19 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
 
     Module* mergedModule = _linker.getModule();
 
-     // If target supports exception handling then enable it now.
-    if ( _target->getTargetAsmInfo()->doesSupportExceptionHandling() )
-        llvm::ExceptionHandling = true;
+    // If target supports exception handling then enable it now.
+    switch (_target->getMCAsmInfo()->getExceptionHandlingType()) {
+    case ExceptionHandling::Dwarf:
+      llvm::DwarfExceptionHandling = true;
+      break;
+    case ExceptionHandling::SjLj:
+      llvm::SjLjExceptionHandling = true;
+      break;
+    case ExceptionHandling::None:
+      break;
+    default:
+      assert (0 && "Unknown exception handling model!");
+    }
 
     // if options were requested, set them
     if ( !_codegenOptions.empty() )
@@ -430,16 +431,16 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
 
     codeGenPasses->add(new TargetData(*_target->getTargetData()));
 
-    MachineCodeEmitter* mce = NULL;
+    ObjectCodeEmitter* oce = NULL;
 
     switch (_target->addPassesToEmitFile(*codeGenPasses, out,
                                          TargetMachine::AssemblyFile,
                                          CodeGenOpt::Aggressive)) {
         case FileModel::MachOFile:
-            mce = AddMachOWriter(*codeGenPasses, out, *_target);
+            oce = AddMachOWriter(*codeGenPasses, out, *_target);
             break;
         case FileModel::ElfFile:
-            mce = AddELFWriter(*codeGenPasses, out, *_target);
+            oce = AddELFWriter(*codeGenPasses, out, *_target);
             break;
         case FileModel::AsmFile:
             break;
@@ -449,7 +450,7 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
             return true;
     }
 
-    if (_target->addPassesToEmitFileFinish(*codeGenPasses, mce,
+    if (_target->addPassesToEmitFileFinish(*codeGenPasses, oce,
                                            CodeGenOpt::Aggressive)) {
         errMsg = "target does not support generation of this file type";
         return true;
@@ -467,6 +468,7 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
         codeGenPasses->run(*it);
 
     codeGenPasses->doFinalization();
+
     return false; // success
 }
 
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 5548050fda01..0ebec2ca8b0f 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -37,7 +37,6 @@ public:
     bool                addModule(class LTOModule*, std::string& errMsg);
     bool                setDebugInfo(lto_debug_model, std::string& errMsg);
     bool                setCodePICModel(lto_codegen_model, std::string& errMsg);
-    void                setGccPath(const char* path);
     void                setAssemblerPath(const char* path);
     void                addMustPreserveSymbol(const char* sym);
     bool                writeMergedModules(const char* path, 
@@ -45,7 +44,7 @@ public:
     const void*         compile(size_t* length, std::string& errMsg);
     void                setCodeGenDebugOptions(const char *opts); 
 private:
-    bool                generateAssemblyCode(llvm::raw_ostream& out, 
+    bool                generateAssemblyCode(llvm::formatted_raw_ostream& out, 
                                              std::string& errMsg);
     bool                assemble(const std::string& asmPath, 
                             const std::string& objPath, std::string& errMsg);
@@ -63,7 +62,6 @@ private:
     StringSet                   _mustPreserveSymbols;
     llvm::MemoryBuffer*         _nativeObjectFile;
     std::vector<const char*>    _codegenOptions;
-    llvm::sys::Path*            _gccPath;
     llvm::sys::Path*            _assemblerPath;
 };
 
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 9c8baef7d6a9..e1cf48d3c841 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -24,21 +24,21 @@
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/System/Host.h"
 #include "llvm/System/Path.h"
 #include "llvm/System/Process.h"
 #include "llvm/Target/SubtargetFeature.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/Target/TargetAsmInfo.h"
-
-#include <fstream>
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSelect.h"
 
 using namespace llvm;
 
 bool LTOModule::isBitcodeFile(const void* mem, size_t length)
 {
-    return ( llvm::sys::IdentifyFileType((char*)mem, length) 
-                                            == llvm::sys::Bitcode_FileType );
+    return llvm::sys::IdentifyFileType((char*)mem, length) 
+        == llvm::sys::Bitcode_FileType;
 }
 
 bool LTOModule::isBitcodeFile(const char* path)
@@ -50,7 +50,7 @@ bool LTOModule::isBitcodeFileForTarget(const void* mem, size_t length,
                                        const char* triplePrefix) 
 {
     MemoryBuffer* buffer = makeBuffer(mem, length);
-    if ( buffer == NULL )
+    if (!buffer)
         return false;
     return isTargetMatch(buffer, triplePrefix);
 }
@@ -71,12 +71,12 @@ bool LTOModule::isTargetMatch(MemoryBuffer* buffer, const char* triplePrefix)
     OwningPtr<ModuleProvider> mp(getBitcodeModuleProvider(buffer,
                                                           getGlobalContext()));
     // on success, mp owns buffer and both are deleted at end of this method
-    if ( !mp ) {
+    if (!mp) {
         delete buffer;
         return false;
     }
     std::string actualTarget = mp->getModule()->getTargetTriple();
-    return ( strncmp(actualTarget.c_str(), triplePrefix, 
+    return (strncmp(actualTarget.c_str(), triplePrefix, 
                     strlen(triplePrefix)) == 0);
 }
 
@@ -90,7 +90,7 @@ LTOModule* LTOModule::makeLTOModule(const char* path,
                                     std::string& errMsg)
 {
     OwningPtr<MemoryBuffer> buffer(MemoryBuffer::getFile(path, &errMsg));
-    if ( !buffer )
+    if (!buffer)
         return NULL;
     return makeLTOModule(buffer.get(), errMsg);
 }
@@ -103,8 +103,8 @@ MemoryBuffer* LTOModule::makeBuffer(const void* mem, size_t length)
 {
     const char* startPtr = (char*)mem;
     const char* endPtr = startPtr+length;
-    if ( (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0) 
-        || (*endPtr != 0) ) 
+    if ((((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0) 
+        || (*endPtr != 0)) 
         return MemoryBuffer::getMemBufferCopy(startPtr, endPtr);
     else
         return MemoryBuffer::getMemBuffer(startPtr, endPtr);
@@ -115,7 +115,7 @@ LTOModule* LTOModule::makeLTOModule(const void* mem, size_t length,
                                     std::string& errMsg)
 {
     OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
-    if ( !buffer )
+    if (!buffer)
         return NULL;
     return makeLTOModule(buffer.get(), errMsg);
 }
@@ -127,6 +127,8 @@ LTOModule* LTOModule::makeLTOModule(const void* mem, size_t length,
 /// subtarget. It would be better if we could encode this information into the
 /// IR. See <rdar://5972456>.
 std::string getFeatureString(const char *TargetTriple) {
+  InitializeAllTargets();
+
   SubtargetFeatures Features;
 
   if (strncmp(TargetTriple, "powerpc-apple-", 14) == 0) {
@@ -142,20 +144,25 @@ std::string getFeatureString(const char *TargetTriple) {
 LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer,
                                     std::string& errMsg)
 {
+    InitializeAllTargets();
+
     // parse bitcode buffer
     OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), &errMsg));
-    if ( !m )
+    if (!m)
         return NULL;
-    // find machine architecture for this module
-    const TargetMachineRegistry::entry* march = 
-            TargetMachineRegistry::getClosestStaticTargetForModule(*m, errMsg);
 
-    if ( march == NULL ) 
+    std::string Triple = m->getTargetTriple();
+    if (Triple.empty())
+      Triple = sys::getHostTriple();
+
+    // find machine architecture for this module
+    const Target* march = TargetRegistry::lookupTarget(Triple, errMsg);
+    if (!march) 
         return NULL;
 
     // construct LTModule, hand over ownership of module and target
-    std::string FeatureStr = getFeatureString(m->getTargetTriple().c_str());
-    TargetMachine* target = march->CtorFn(*m, FeatureStr);
+    std::string FeatureStr = getFeatureString(Triple.c_str());
+    TargetMachine* target = march->createTargetMachine(Triple, FeatureStr);
     return new LTOModule(m.take(), target);
 }
 
@@ -189,7 +196,7 @@ bool LTOModule::objcClassNameFromExpression(Constant* c, std::string& name)
         if (GlobalVariable* gvn = dyn_cast<GlobalVariable>(op)) {
             Constant* cn = gvn->getInitializer(); 
             if (ConstantArray* ca = dyn_cast<ConstantArray>(cn)) {
-                if ( ca->isCString() ) {
+                if (ca->isCString()) {
                     name = ".objc_class_name_" + ca->getAsString();
                     return true;
                 }
@@ -205,9 +212,9 @@ void LTOModule::addObjCClass(GlobalVariable* clgv)
     if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
         // second slot in __OBJC,__class is pointer to superclass name
         std::string superclassName;
-        if ( objcClassNameFromExpression(c->getOperand(1), superclassName) ) {
+        if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
             NameAndAttributes info;
-            if ( _undefines.find(superclassName.c_str()) == _undefines.end() ) {
+            if (_undefines.find(superclassName.c_str()) == _undefines.end()) {
                 const char* symbolName = ::strdup(superclassName.c_str());
                 info.name = ::strdup(symbolName);
                 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
@@ -217,7 +224,7 @@ void LTOModule::addObjCClass(GlobalVariable* clgv)
         }
         // third slot in __OBJC,__class is pointer to class name
         std::string className;
-         if ( objcClassNameFromExpression(c->getOperand(2), className) ) {
+         if (objcClassNameFromExpression(c->getOperand(2), className)) {
             const char* symbolName = ::strdup(className.c_str());
             NameAndAttributes info;
             info.name = symbolName;
@@ -238,9 +245,9 @@ void LTOModule::addObjCCategory(GlobalVariable* clgv)
     if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
         // second slot in __OBJC,__category is pointer to target class name
         std::string targetclassName;
-        if ( objcClassNameFromExpression(c->getOperand(1), targetclassName) ) {
+        if (objcClassNameFromExpression(c->getOperand(1), targetclassName)) {
             NameAndAttributes info;
-            if ( _undefines.find(targetclassName.c_str()) == _undefines.end() ){
+            if (_undefines.find(targetclassName.c_str()) == _undefines.end()) {
                 const char* symbolName = ::strdup(targetclassName.c_str());
                 info.name = ::strdup(symbolName);
                 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
@@ -256,9 +263,9 @@ void LTOModule::addObjCCategory(GlobalVariable* clgv)
 void LTOModule::addObjCClassRef(GlobalVariable* clgv)
 {
     std::string targetclassName;
-    if ( objcClassNameFromExpression(clgv->getInitializer(), targetclassName) ){
+    if (objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) {
         NameAndAttributes info;
-        if ( _undefines.find(targetclassName.c_str()) == _undefines.end() ) {
+        if (_undefines.find(targetclassName.c_str()) == _undefines.end()) {
             const char* symbolName = ::strdup(targetclassName.c_str());
             info.name = ::strdup(symbolName);
             info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
@@ -293,23 +300,23 @@ void LTOModule::addDefinedDataSymbol(GlobalValue* v, Mangler& mangler)
     // a class was missing.   
     // The following synthesizes the implicit .objc_* symbols for the linker
     // from the ObjC data structures generated by the front end.
-    if ( v->hasSection() /* && isTargetDarwin */ ) {
+    if (v->hasSection() /* && isTargetDarwin */) {
         // special case if this data blob is an ObjC class definition
-        if ( v->getSection().compare(0, 15, "__OBJC,__class,") == 0 ) {
+        if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
             if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
                 addObjCClass(gv);
             }
         }                        
     
         // special case if this data blob is an ObjC category definition
-        else if ( v->getSection().compare(0, 18, "__OBJC,__category,") == 0 ) {
+        else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
             if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
                 addObjCCategory(gv);
             }
         }                        
         
         // special case if this data blob is the list of referenced classes
-        else if ( v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0 ) {
+        else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
             if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
                 addObjCClassRef(gv);
             }
@@ -325,35 +332,35 @@ void LTOModule::addDefinedDataSymbol(GlobalValue* v, Mangler& mangler)
 
 
 void LTOModule::addDefinedSymbol(GlobalValue* def, Mangler &mangler, 
-                                bool isFunction)
+                                 bool isFunction)
 {    
     // ignore all llvm.* symbols
-    if ( strncmp(def->getNameStart(), "llvm.", 5) == 0 )
+    if (def->getName().startswith("llvm."))
         return;
 
     // string is owned by _defines
-    const char* symbolName = ::strdup(mangler.getValueName(def).c_str());
+    const char* symbolName = ::strdup(mangler.getMangledName(def).c_str());
 
     // set alignment part log2() can have rounding errors
     uint32_t align = def->getAlignment();
     uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0;
     
     // set permissions part
-    if ( isFunction )
+    if (isFunction)
         attr |= LTO_SYMBOL_PERMISSIONS_CODE;
     else {
         GlobalVariable* gv = dyn_cast<GlobalVariable>(def);
-        if ( (gv != NULL) && gv->isConstant() )
+        if (gv && gv->isConstant())
             attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
         else
             attr |= LTO_SYMBOL_PERMISSIONS_DATA;
     }
     
     // set definition part 
-    if ( def->hasWeakLinkage() || def->hasLinkOnceLinkage() ) {
+    if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) {
         attr |= LTO_SYMBOL_DEFINITION_WEAK;
     }
-    else if ( def->hasCommonLinkage()) {
+    else if (def->hasCommonLinkage()) {
         attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
     }
     else { 
@@ -361,12 +368,12 @@ void LTOModule::addDefinedSymbol(GlobalValue* def, Mangler &mangler,
     }
     
     // set scope part
-    if ( def->hasHiddenVisibility() )
+    if (def->hasHiddenVisibility())
         attr |= LTO_SYMBOL_SCOPE_HIDDEN;
-    else if ( def->hasProtectedVisibility() )
+    else if (def->hasProtectedVisibility())
         attr |= LTO_SYMBOL_SCOPE_PROTECTED;
-    else if ( def->hasExternalLinkage() || def->hasWeakLinkage() 
-              || def->hasLinkOnceLinkage() || def->hasCommonLinkage() )
+    else if (def->hasExternalLinkage() || def->hasWeakLinkage()
+             || def->hasLinkOnceLinkage() || def->hasCommonLinkage())
         attr |= LTO_SYMBOL_SCOPE_DEFAULT;
     else
         attr |= LTO_SYMBOL_SCOPE_INTERNAL;
@@ -381,7 +388,7 @@ void LTOModule::addDefinedSymbol(GlobalValue* def, Mangler &mangler,
 
 void LTOModule::addAsmGlobalSymbol(const char *name) {
     // only add new define if not already defined
-    if ( _defines.count(name, &name[strlen(name)+1]) == 0 ) 
+    if (_defines.count(name) == 0) 
         return;
         
     // string is owned by _defines
@@ -398,10 +405,14 @@ void LTOModule::addAsmGlobalSymbol(const char *name) {
 void LTOModule::addPotentialUndefinedSymbol(GlobalValue* decl, Mangler &mangler)
 {   
     // ignore all llvm.* symbols
-    if ( strncmp(decl->getNameStart(), "llvm.", 5) == 0 )
+    if (decl->getName().startswith("llvm."))
         return;
 
-    const char* name = mangler.getValueName(decl).c_str();
+    // ignore all aliases
+    if (isa<GlobalAlias>(decl))
+        return;
+
+    std::string name = mangler.getMangledName(decl);
 
     // we already have the symbol
     if (_undefines.find(name) != _undefines.end())
@@ -409,7 +420,7 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue* decl, Mangler &mangler)
 
     NameAndAttributes info;
     // string is owned by _undefines
-    info.name = ::strdup(name);
+    info.name = ::strdup(name.c_str());
     if (decl->hasExternalWeakLinkage())
       info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
     else
@@ -419,11 +430,11 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue* decl, Mangler &mangler)
 
 
 
-// Find exeternal symbols referenced by VALUE. This is a recursive function.
+// Find external symbols referenced by VALUE. This is a recursive function.
 void LTOModule::findExternalRefs(Value* value, Mangler &mangler) {
 
     if (GlobalValue* gv = dyn_cast<GlobalValue>(value)) {
-        if ( !gv->hasExternalLinkage() )
+        if (!gv->hasExternalLinkage())
             addPotentialUndefinedSymbol(gv, mangler);
         // If this is a variable definition, do not recursively process
         // initializer.  It might contain a reference to this variable
@@ -431,11 +442,11 @@ void LTOModule::findExternalRefs(Value* value, Mangler &mangler) {
         // processed in addDefinedDataSymbol(). 
         return;
     }
-    
+
     // GlobalValue, even with InternalLinkage type, may have operands with 
     // ExternalLinkage type. Do not ignore these operands.
     if (Constant* c = dyn_cast<Constant>(value)) {
-        // Handle ConstantExpr, ConstantStruct, ConstantArry etc..
+        // Handle ConstantExpr, ConstantStruct, ConstantArry etc.
         for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i)
             findExternalRefs(c->getOperand(i), mangler);
     }
@@ -443,11 +454,11 @@ void LTOModule::findExternalRefs(Value* value, Mangler &mangler) {
 
 void LTOModule::lazyParseSymbols()
 {
-    if ( !_symbolsParsed ) {
+    if (!_symbolsParsed) {
         _symbolsParsed = true;
         
         // Use mangler to add GlobalPrefix to names to match linker names.
-        Mangler mangler(*_module, _target->getTargetAsmInfo()->getGlobalPrefix());
+        Mangler mangler(*_module, _target->getMCAsmInfo()->getGlobalPrefix());
         // add chars used in ObjC method names so method names aren't mangled
         mangler.markCharAcceptable('[');
         mangler.markCharAcceptable(']');
@@ -459,7 +470,7 @@ void LTOModule::lazyParseSymbols()
 
         // add functions
         for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
-            if ( f->isDeclaration() ) 
+            if (f->isDeclaration())
                 addPotentialUndefinedSymbol(f, mangler);
             else 
                 addDefinedFunctionSymbol(f, mangler);
@@ -468,7 +479,7 @@ void LTOModule::lazyParseSymbols()
         // add data 
         for (Module::global_iterator v = _module->global_begin(), 
                                     e = _module->global_end(); v !=  e; ++v) {
-            if ( v->isDeclaration() ) 
+            if (v->isDeclaration())
                 addPotentialUndefinedSymbol(v, mangler);
             else 
                 addDefinedDataSymbol(v, mangler);
@@ -505,8 +516,7 @@ void LTOModule::lazyParseSymbols()
                                                 it != _undefines.end(); ++it) {
             // if this symbol also has a definition, then don't make an undefine
             // because it is a tentative definition
-            if ( _defines.count(it->getKeyData(), it->getKeyData()+
-                                                  it->getKeyLength()) == 0 ) {
+            if (_defines.count(it->getKey()) == 0) {
               NameAndAttributes info = it->getValue();
               _symbols.push_back(info);
             }
@@ -525,7 +535,7 @@ uint32_t LTOModule::getSymbolCount()
 lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index)
 {
     lazyParseSymbols();
-    if ( index < _symbols.size() )
+    if (index < _symbols.size())
         return _symbols[index].attributes;
     else
         return lto_symbol_attributes(0);
@@ -534,9 +544,8 @@ lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index)
 const char* LTOModule::getSymbolName(uint32_t index)
 {
     lazyParseSymbols();
-    if ( index < _symbols.size() )
+    if (index < _symbols.size())
         return _symbols[index].name;
     else
         return NULL;
 }
-
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index c8ad9fe08f35..3120aa52c953 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -22,7 +22,7 @@ LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader bitwriter
 
 include $(LEVEL)/Makefile.common
 
-ifeq ($(OS),Darwin)
+ifeq ($(HOST_OS),Darwin)
     # set dylib internal version number to llvmCore submission number
     ifdef LLVM_SUBMIT_VERSION
         LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-current_version \
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 936aeae15b04..cc841bdf034d 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -203,14 +203,6 @@ bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model)
 }
 
 //
-// sets the path to gcc
-//
-void lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path)
-{
-  cg->setGccPath(path);
-}
-
-//
 // sets the path to the assembler tool
 //
 void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index 631a0ddbfb19..18360f837e93 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/raw_ostream.h"
 #include <iostream>
 using namespace llvm;
 
@@ -33,27 +34,31 @@ namespace {
     static char ID; // Pass ID, replacement for typeid
     ExternalFunctionsPassedConstants() : ModulePass(&ID) {}
     virtual bool runOnModule(Module &M) {
-      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-        if (I->isDeclaration()) {
-          bool PrintedFn = false;
-          for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-               UI != E; ++UI)
-            if (Instruction *User = dyn_cast<Instruction>(*UI)) {
-              CallSite CS = CallSite::get(User);
-              if (CS.getInstruction()) {
-                for (CallSite::arg_iterator AI = CS.arg_begin(),
-                       E = CS.arg_end(); AI != E; ++AI)
-                  if (isa<Constant>(*AI)) {
-                    if (!PrintedFn) {
-                      std::cerr << "Function '" << I->getName() << "':\n";
-                      PrintedFn = true;
-                    }
-                    std::cerr << *User;
-                    break;
-                  }
-              }
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        if (!I->isDeclaration()) continue;
+        
+        bool PrintedFn = false;
+        for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+             UI != E; ++UI) {
+          Instruction *User = dyn_cast<Instruction>(*UI);
+          if (!User) continue;
+          
+          CallSite CS = CallSite::get(User);
+          if (!CS.getInstruction()) continue;
+          
+          for (CallSite::arg_iterator AI = CS.arg_begin(),
+               E = CS.arg_end(); AI != E; ++AI) {
+            if (!isa<Constant>(*AI)) continue;
+
+            if (!PrintedFn) {
+              errs() << "Function '" << I->getName() << "':\n";
+              PrintedFn = true;
             }
+            errs() << *User;
+            break;
+          }
         }
+      }
 
       return false;
     }
@@ -77,7 +82,7 @@ namespace {
       AU.addRequiredTransitive<CallGraph>();
     }
     virtual bool runOnModule(Module &M) {
-      getAnalysis<CallGraph>().print(std::cerr, &M);
+      getAnalysis<CallGraph>().print(errs(), &M);
       return false;
     }
   };
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index efcca80ddfc2..b75cda0e128b 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(LLVM_REQUIRES_EH 1)
-set(LLVM_LINK_COMPONENTS bitreader bitwriter instrumentation scalaropts ipo)
+set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo)
 
 add_llvm_tool(opt
   AnalysisWrappers.cpp
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index 5d581e4af0a2..1ae6be253f78 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -28,9 +28,10 @@ static void WriteGraphToFile(std::ostream &O, const std::string &GraphName,
                              const GraphType &GT) {
   std::string Filename = GraphName + ".dot";
   O << "Writing '" << Filename << "'...";
-  std::ofstream F(Filename.c_str());
+  std::string ErrInfo;
+  raw_fd_ostream F(Filename.c_str(), ErrInfo);
 
-  if (F.good())
+  if (ErrInfo.empty())
     WriteGraph(F, GT);
   else
     O << "  error opening file for writing!";
@@ -70,8 +71,7 @@ namespace {
       return false;
     }
 
-    void print(std::ostream &OS) const {}
-    void print(std::ostream &OS, const llvm::Module*) const {}
+    void print(raw_ostream &OS, const llvm::Module*) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<CallGraph>();
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index 0afb00217574..b17be343a45a 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../..
 TOOLNAME = opt
 REQUIRES_EH := 1
 
-LINK_COMPONENTS := bitreader bitwriter instrumentation scalaropts ipo
+LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index be652644a6b5..66709ffa196a 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -29,8 +29,8 @@
 #include "llvm/Module.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SCCIterator.h"
-#include <iostream>
 using namespace llvm;
 
 namespace {
@@ -39,7 +39,7 @@ namespace {
     CFGSCC() : FunctionPass(&ID) {}
     bool runOnFunction(Function& func);
 
-    void print(std::ostream &O, const Module* = 0) const { }
+    void print(raw_ostream &O, const Module* = 0) const { }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -53,7 +53,7 @@ namespace {
     // run - Print out SCCs in the call graph for the specified module.
     bool runOnModule(Module &M);
 
-    void print(std::ostream &O, const Module* = 0) const { }
+    void print(raw_ostream &O, const Module* = 0) const { }
 
     // getAnalysisUsage - This pass requires the CallGraph.
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -73,18 +73,18 @@ namespace {
 
 bool CFGSCC::runOnFunction(Function &F) {
   unsigned sccNum = 0;
-  std::cout << "SCCs for Function " << F.getName() << " in PostOrder:";
+  outs() << "SCCs for Function " << F.getName() << " in PostOrder:";
   for (scc_iterator<Function*> SCCI = scc_begin(&F),
          E = scc_end(&F); SCCI != E; ++SCCI) {
     std::vector<BasicBlock*> &nextSCC = *SCCI;
-    std::cout << "\nSCC #" << ++sccNum << " : ";
+    outs() << "\nSCC #" << ++sccNum << " : ";
     for (std::vector<BasicBlock*>::const_iterator I = nextSCC.begin(),
            E = nextSCC.end(); I != E; ++I)
-      std::cout << (*I)->getName() << ", ";
+      outs() << (*I)->getName() << ", ";
     if (nextSCC.size() == 1 && SCCI.hasLoop())
-      std::cout << " (Has self-loop).";
+      outs() << " (Has self-loop).";
   }
-  std::cout << "\n";
+  outs() << "\n";
 
   return true;
 }
@@ -94,19 +94,19 @@ bool CFGSCC::runOnFunction(Function &F) {
 bool CallGraphSCC::runOnModule(Module &M) {
   CallGraphNode* rootNode = getAnalysis<CallGraph>().getRoot();
   unsigned sccNum = 0;
-  std::cout << "SCCs for the program in PostOrder:";
+  outs() << "SCCs for the program in PostOrder:";
   for (scc_iterator<CallGraphNode*> SCCI = scc_begin(rootNode),
          E = scc_end(rootNode); SCCI != E; ++SCCI) {
     const std::vector<CallGraphNode*> &nextSCC = *SCCI;
-    std::cout << "\nSCC #" << ++sccNum << " : ";
+    outs() << "\nSCC #" << ++sccNum << " : ";
     for (std::vector<CallGraphNode*>::const_iterator I = nextSCC.begin(),
            E = nextSCC.end(); I != E; ++I)
-      std::cout << ((*I)->getFunction() ? (*I)->getFunction()->getName()
-                    : std::string("Indirect CallGraph node")) << ", ";
+      outs() << ((*I)->getFunction() ? (*I)->getFunction()->getNameStr()
+                 : std::string("Indirect CallGraph node")) << ", ";
     if (nextSCC.size() == 1 && SCCI.hasLoop())
-      std::cout << " (Has self-loop).";
+      outs() << " (Has self-loop).";
   }
-  std::cout << "\n";
+  outs() << "\n";
 
   return true;
 }
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 689161940240..fe0e03649ddc 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -26,17 +26,15 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/System/Signals.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/StandardPasses.h"
-#include "llvm/Support/Streams.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/LinkAllVMCore.h"
-#include <iostream>
-#include <fstream>
 #include <memory>
 #include <algorithm>
 using namespace llvm;
@@ -50,7 +48,7 @@ PassList(cl::desc("Optimizations available:"));
 // Other command line options...
 //
 static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input bitcode file>"), 
+InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
     cl::init("-"), cl::value_desc("filename"));
 
 static cl::opt<std::string>
@@ -58,7 +56,7 @@ OutputFilename("o", cl::desc("Override output filename"),
                cl::value_desc("filename"), cl::init("-"));
 
 static cl::opt<bool>
-Force("f", cl::desc("Overwrite output files"));
+Force("f", cl::desc("Enable binary output on terminals"));
 
 static cl::opt<bool>
 PrintEachXForm("p", cl::desc("Print module after each transformation"));
@@ -68,6 +66,10 @@ NoOutput("disable-output",
          cl::desc("Do not write result bitcode file"), cl::Hidden);
 
 static cl::opt<bool>
+OutputAssembly("S",
+         cl::desc("Write output as LLVM assembly"), cl::Hidden);
+
+static cl::opt<bool>
 NoVerify("disable-verify", cl::desc("Do not verify result module"), cl::Hidden);
 
 static cl::opt<bool>
@@ -80,15 +82,23 @@ StripDebug("strip-debug",
 static cl::opt<bool>
 DisableInline("disable-inlining", cl::desc("Do not run the inliner pass"));
 
-static cl::opt<bool> 
-DisableOptimizations("disable-opt", 
+static cl::opt<bool>
+DisableOptimizations("disable-opt",
                      cl::desc("Do not run any optimization passes"));
 
 static cl::opt<bool>
-StandardCompileOpts("std-compile-opts", 
+DisableInternalize("disable-internalize",
+                   cl::desc("Do not mark all symbols as internal"));
+
+static cl::opt<bool>
+StandardCompileOpts("std-compile-opts",
                    cl::desc("Include the standard compile time optimizations"));
 
 static cl::opt<bool>
+StandardLinkOpts("std-link-opts",
+                 cl::desc("Include the standard link time optimizations"));
+
+static cl::opt<bool>
 OptLevelO1("O1",
            cl::desc("Optimization level 1. Similar to llvm-gcc -O1"));
 
@@ -102,7 +112,8 @@ OptLevelO3("O3",
 
 static cl::opt<bool>
 UnitAtATime("funit-at-a-time",
-            cl::desc("Enable IPO. This is same as llvm-gcc's -funit-at-a-time"));
+            cl::desc("Enable IPO. This is same as llvm-gcc's -funit-at-a-time"),
+	    cl::init(true));
 
 static cl::opt<bool>
 DisableSimplifyLibCalls("disable-simplify-libcalls",
@@ -123,23 +134,24 @@ namespace {
 struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
   static char ID;
   const PassInfo *PassToPrint;
-  CallGraphSCCPassPrinter(const PassInfo *PI) : 
+  CallGraphSCCPassPrinter(const PassInfo *PI) :
     CallGraphSCCPass(&ID), PassToPrint(PI) {}
 
-  virtual bool runOnSCC(const std::vector<CallGraphNode *>&SCC) {
+  virtual bool runOnSCC(std::vector<CallGraphNode *>&SCC) {
     if (!Quiet) {
-      cout << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+      outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
 
       for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
         Function *F = SCC[i]->getFunction();
-        if (F) 
-          getAnalysisID<Pass>(PassToPrint).print(cout, F->getParent());
+        if (F) {
+          getAnalysisID<Pass>(PassToPrint).print(outs(), F->getParent());
+        }
       }
     }
     // Get and print pass...
     return false;
   }
-  
+
   virtual const char *getPassName() const { return "'Pass' Printer"; }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -158,8 +170,8 @@ struct ModulePassPrinter : public ModulePass {
 
   virtual bool runOnModule(Module &M) {
     if (!Quiet) {
-      cout << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-      getAnalysisID<Pass>(PassToPrint).print(cout, &M);
+      outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+      getAnalysisID<Pass>(PassToPrint).print(outs(), &M);
     }
 
     // Get and print pass...
@@ -182,12 +194,12 @@ struct FunctionPassPrinter : public FunctionPass {
                                             PassToPrint(PI) {}
 
   virtual bool runOnFunction(Function &F) {
-    if (!Quiet) { 
-      cout << "Printing analysis '" << PassToPrint->getPassName()
-           << "' for function '" << F.getName() << "':\n";
+    if (!Quiet) {
+      outs() << "Printing analysis '" << PassToPrint->getPassName()
+              << "' for function '" << F.getName() << "':\n";
     }
     // Get and print pass...
-    getAnalysisID<Pass>(PassToPrint).print(cout, F.getParent());
+    getAnalysisID<Pass>(PassToPrint).print(outs(), F.getParent());
     return false;
   }
 
@@ -204,19 +216,19 @@ char FunctionPassPrinter::ID = 0;
 struct LoopPassPrinter : public LoopPass {
   static char ID;
   const PassInfo *PassToPrint;
-  LoopPassPrinter(const PassInfo *PI) : 
+  LoopPassPrinter(const PassInfo *PI) :
     LoopPass(&ID), PassToPrint(PI) {}
 
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
     if (!Quiet) {
-      cout << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-      getAnalysisID<Pass>(PassToPrint).print(cout, 
+      outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+      getAnalysisID<Pass>(PassToPrint).print(outs(),
                                   L->getHeader()->getParent()->getParent());
     }
     // Get and print pass...
     return false;
   }
-  
+
   virtual const char *getPassName() const { return "'Pass' Printer"; }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -230,17 +242,17 @@ char LoopPassPrinter::ID = 0;
 struct BasicBlockPassPrinter : public BasicBlockPass {
   const PassInfo *PassToPrint;
   static char ID;
-  BasicBlockPassPrinter(const PassInfo *PI) 
+  BasicBlockPassPrinter(const PassInfo *PI)
     : BasicBlockPass(&ID), PassToPrint(PI) {}
 
   virtual bool runOnBasicBlock(BasicBlock &BB) {
     if (!Quiet) {
-      cout << "Printing Analysis info for BasicBlock '" << BB.getName()
-           << "': Pass " << PassToPrint->getPassName() << ":\n";
+      outs() << "Printing Analysis info for BasicBlock '" << BB.getName()
+             << "': Pass " << PassToPrint->getPassName() << ":\n";
     }
 
     // Get and print pass...
-    getAnalysisID<Pass>(PassToPrint).print(cout, BB.getParent()->getParent());
+    getAnalysisID<Pass>(PassToPrint).print(outs(), BB.getParent()->getParent());
     return false;
   }
 
@@ -261,8 +273,8 @@ inline void addPass(PassManager &PM, Pass *P) {
   if (VerifyEach) PM.add(createVerifierPass());
 }
 
-/// AddOptimizationPasses - This routine adds optimization passes 
-/// based on selected optimization level, OptLevel. This routine 
+/// AddOptimizationPasses - This routine adds optimization passes
+/// based on selected optimization level, OptLevel. This routine
 /// duplicates llvm-gcc behaviour.
 ///
 /// OptLevel - Optimization Level
@@ -294,7 +306,7 @@ void AddStandardCompilePasses(PassManager &PM) {
   llvm::Pass *InliningPass = !DisableInline ? createFunctionInliningPass() : 0;
 
   // -std-compile-opts adds the same module passes as -O3.
-  createStandardModulePasses(&PM, 3, 
+  createStandardModulePasses(&PM, 3,
                              /*OptimizeSize=*/ false,
                              /*UnitAtATime=*/ true,
                              /*UnrollLoops=*/ true,
@@ -303,6 +315,20 @@ void AddStandardCompilePasses(PassManager &PM) {
                              InliningPass);
 }
 
+void AddStandardLinkPasses(PassManager &PM) {
+  PM.add(createVerifierPass());                  // Verify that input is correct
+
+  // If the -strip-debug command line option was specified, do it.
+  if (StripDebug)
+    addPass(PM, createStripSymbolsPass(true));
+
+  if (DisableOptimizations) return;
+
+  createStandardLTOPasses(&PM, /*Internalize=*/ !DisableInternalize,
+                          /*RunInliner=*/ !DisableInline,
+                          /*VerifyEach=*/ VerifyEach);
+}
+
 } // anonymous namespace
 
 
@@ -311,7 +337,7 @@ void AddStandardCompilePasses(PassManager &PM) {
 //
 int main(int argc, char **argv) {
   llvm_shutdown_obj X;  // Call llvm_shutdown() on exit.
-  LLVMContext Context;
+  LLVMContext &Context = getGlobalContext();
   try {
     cl::ParseCommandLineOptions(argc, argv,
       "llvm .bc -> .bc modular optimizer and analysis printer\n");
@@ -321,56 +347,41 @@ int main(int argc, char **argv) {
     // FIXME: The choice of target should be controllable on the command line.
     std::auto_ptr<TargetMachine> target;
 
-    std::string ErrorMessage;
+    SMDiagnostic Err;
 
     // Load the input module...
     std::auto_ptr<Module> M;
-    if (MemoryBuffer *Buffer
-          = MemoryBuffer::getFileOrSTDIN(InputFilename, &ErrorMessage)) {
-      M.reset(ParseBitcodeFile(Buffer, Context, &ErrorMessage));
-      delete Buffer;
-    }
-    
+    M.reset(ParseIRFile(InputFilename, Err, Context));
+
     if (M.get() == 0) {
-      cerr << argv[0] << ": ";
-      if (ErrorMessage.size())
-        cerr << ErrorMessage << "\n";
-      else
-        cerr << "bitcode didn't read correctly.\n";
+      Err.Print(argv[0], errs());
       return 1;
     }
 
     // Figure out what stream we are supposed to write to...
-    // FIXME: cout is not binary!
-    std::ostream *Out = &std::cout;  // Default to printing to stdout...
+    // FIXME: outs() is not binary!
+    raw_ostream *Out = &outs();  // Default to printing to stdout...
     if (OutputFilename != "-") {
-      if (!Force && std::ifstream(OutputFilename.c_str())) {
-        // If force is not specified, make sure not to overwrite a file!
-        cerr << argv[0] << ": error opening '" << OutputFilename
-             << "': file exists!\n"
-             << "Use -f command line argument to force output\n";
-        return 1;
-      }
-      std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                                   std::ios::binary;
-      Out = new std::ofstream(OutputFilename.c_str(), io_mode);
-
-      if (!Out->good()) {
-        cerr << argv[0] << ": error opening " << OutputFilename << "!\n";
-        return 1;
-      }
-
       // Make sure that the Output file gets unlinked from the disk if we get a
       // SIGINT
       sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+
+      std::string ErrorInfo;
+      Out = new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
+                               raw_fd_ostream::F_Binary);
+      if (!ErrorInfo.empty()) {
+        errs() << ErrorInfo << '\n';
+        delete Out;
+        return 1;
+      }
     }
 
     // If the output is set to be emitted to standard out, and standard out is a
     // console, print out a warning message and refuse to do it.  We don't
     // impress anyone by spewing tons of binary goo to a terminal.
-    if (!Force && !NoOutput && CheckBitcodeOutputToConsole(Out,!Quiet)) {
-      NoOutput = true;
-    }
+    if (!Force && !NoOutput && !OutputAssembly)
+      if (CheckBitcodeOutputToConsole(*Out, !Quiet))
+        NoOutput = true;
 
     // Create a PassManager to hold and optimize the collection of passes we are
     // about to build...
@@ -385,7 +396,7 @@ int main(int argc, char **argv) {
       FPasses = new FunctionPassManager(new ExistingModuleProvider(M.get()));
       FPasses->add(new TargetData(M.get()));
     }
-      
+
     // If the -strip-debug command line option was specified, add it.  If
     // -std-compile-opts was also specified, it will handle StripDebug.
     if (StripDebug && !StandardCompileOpts)
@@ -395,12 +406,18 @@ int main(int argc, char **argv) {
     for (unsigned i = 0; i < PassList.size(); ++i) {
       // Check to see if -std-compile-opts was specified before this option.  If
       // so, handle it.
-      if (StandardCompileOpts && 
+      if (StandardCompileOpts &&
           StandardCompileOpts.getPosition() < PassList.getPosition(i)) {
         AddStandardCompilePasses(Passes);
         StandardCompileOpts = false;
       }
-      
+
+      if (StandardLinkOpts &&
+          StandardLinkOpts.getPosition() < PassList.getPosition(i)) {
+        AddStandardLinkPasses(Passes);
+        StandardLinkOpts = false;
+      }
+
       if (OptLevelO1 && OptLevelO1.getPosition() < PassList.getPosition(i)) {
         AddOptimizationPasses(Passes, *FPasses, 1);
         OptLevelO1 = false;
@@ -421,8 +438,8 @@ int main(int argc, char **argv) {
       if (PassInf->getNormalCtor())
         P = PassInf->getNormalCtor()();
       else
-        cerr << argv[0] << ": cannot create pass: "
-             << PassInf->getPassName() << "\n";
+        errs() << argv[0] << ": cannot create pass: "
+               << PassInf->getPassName() << "\n";
       if (P) {
         bool isBBPass = dynamic_cast<BasicBlockPass*>(P) != 0;
         bool isLPass = !isBBPass && dynamic_cast<LoopPass*>(P) != 0;
@@ -444,30 +461,36 @@ int main(int argc, char **argv) {
             Passes.add(new ModulePassPrinter(PassInf));
         }
       }
-      
+
       if (PrintEachXForm)
         Passes.add(createPrintModulePass(&errs()));
     }
-    
+
     // If -std-compile-opts was specified at the end of the pass list, add them.
     if (StandardCompileOpts) {
       AddStandardCompilePasses(Passes);
       StandardCompileOpts = false;
-    }    
+    }
+
+    if (StandardLinkOpts) {
+      AddStandardLinkPasses(Passes);
+      StandardLinkOpts = false;
+    }
 
     if (OptLevelO1) {
-        AddOptimizationPasses(Passes, *FPasses, 1);
-      }
+      AddOptimizationPasses(Passes, *FPasses, 1);
+    }
 
     if (OptLevelO2) {
-        AddOptimizationPasses(Passes, *FPasses, 2);
-      }
+      AddOptimizationPasses(Passes, *FPasses, 2);
+    }
 
     if (OptLevelO3) {
-        AddOptimizationPasses(Passes, *FPasses, 3);
-      }
+      AddOptimizationPasses(Passes, *FPasses, 3);
+    }
 
     if (OptLevelO1 || OptLevelO2 || OptLevelO3) {
+      FPasses->doInitialization();
       for (Module::iterator I = M.get()->begin(), E = M.get()->end();
            I != E; ++I)
         FPasses->run(*I);
@@ -477,22 +500,26 @@ int main(int argc, char **argv) {
     if (!NoVerify && !VerifyEach)
       Passes.add(createVerifierPass());
 
-    // Write bitcode out to disk or cout as the last step...
-    if (!NoOutput && !AnalyzeOnly)
-      Passes.add(CreateBitcodeWriterPass(*Out));
+    // Write bitcode or assembly  out to disk or outs() as the last step...
+    if (!NoOutput && !AnalyzeOnly) {
+      if (OutputAssembly)
+        Passes.add(createPrintModulePass(Out));
+      else
+        Passes.add(createBitcodeWriterPass(*Out));
+    }
 
     // Now that we have all of the passes ready, run them.
     Passes.run(*M.get());
 
-    // Delete the ofstream.
-    if (Out != &std::cout) 
+    // Delete the raw_fd_ostream.
+    if (Out != &outs())
       delete Out;
     return 0;
 
   } catch (const std::string& msg) {
-    cerr << argv[0] << ": " << msg << "\n";
+    errs() << argv[0] << ": " << msg << "\n";
   } catch (...) {
-    cerr << argv[0] << ": Unexpected unknown exception occurred.\n";
+    errs() << argv[0] << ": Unexpected unknown exception occurred.\n";
   }
   llvm_shutdown();
   return 1;
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
new file mode 100644
index 000000000000..92f020b382d4
--- /dev/null
+++ b/unittests/ADT/APFloatTest.cpp
@@ -0,0 +1,517 @@
+//===- llvm/unittest/ADT/APFloat.cpp - APFloat unit tests ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ostream>
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+
+using namespace llvm;
+
+static double convertToDoubleFromString(const char *Str) {
+  llvm::APFloat F(0.0);
+  F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven);
+  return F.convertToDouble();
+}
+
+namespace {
+
+TEST(APFloatTest, Zero) {
+  EXPECT_EQ(0.0f,  APFloat(APFloat::IEEEsingle,  0.0f).convertToFloat());
+  EXPECT_EQ(-0.0f, APFloat(APFloat::IEEEsingle, -0.0f).convertToFloat());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble,  0.0).convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, -0.0).convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroDecimalString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  ".0").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.0").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.0").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.0").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.0").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.0").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "00000.").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+00000.").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-00000.").convertToDouble());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, ".00000").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.00000").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.00000").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0000.00000").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0000.00000").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0000.00000").convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroDecimalSingleExponentString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   "0e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+0e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-0e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   "0.e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+0.e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-0.e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.e-1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   ".0e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+.0e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-.0e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  ".0e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.0e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.0e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  ".0e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.0e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.0e-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   "0.0e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+0.0e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-0.0e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.0e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.0e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.0e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.0e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.0e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.0e-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "000.0000e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+000.0000e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-000.0000e+1").convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroDecimalLargeExponentString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e1234").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e1234").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e+1234").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e+1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e+1234").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e-1234").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e-1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e-1234").convertToDouble());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, "000.0000e1234").convertToDouble());
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, "000.0000e-1234").convertToDouble());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, StringRef("0e1234\02", 6)).convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroHexadecimalString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x.0p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x.0p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x.0p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x.0p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x.0p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x.0p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x.0p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x.0p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x.0p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.0p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.0p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.0p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.0p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.0p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.0p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.0p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.0p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.0p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x00000.p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0000.00000p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x.00000p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0.p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0p1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x00000.p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0000.00000p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x.00000p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0.p1234").convertToDouble());
+}
+
+TEST(APFloatTest, fromDecimalString) {
+  EXPECT_EQ(1.0,      APFloat(APFloat::IEEEdouble, "1").convertToDouble());
+  EXPECT_EQ(2.0,      APFloat(APFloat::IEEEdouble, "2.").convertToDouble());
+  EXPECT_EQ(0.5,      APFloat(APFloat::IEEEdouble, ".5").convertToDouble());
+  EXPECT_EQ(1.0,      APFloat(APFloat::IEEEdouble, "1.0").convertToDouble());
+  EXPECT_EQ(-2.0,     APFloat(APFloat::IEEEdouble, "-2").convertToDouble());
+  EXPECT_EQ(-4.0,     APFloat(APFloat::IEEEdouble, "-4.").convertToDouble());
+  EXPECT_EQ(-0.5,     APFloat(APFloat::IEEEdouble, "-.5").convertToDouble());
+  EXPECT_EQ(-1.5,     APFloat(APFloat::IEEEdouble, "-1.5").convertToDouble());
+  EXPECT_EQ(1.25e12,  APFloat(APFloat::IEEEdouble, "1.25e12").convertToDouble());
+  EXPECT_EQ(1.25e+12, APFloat(APFloat::IEEEdouble, "1.25e+12").convertToDouble());
+  EXPECT_EQ(1.25e-12, APFloat(APFloat::IEEEdouble, "1.25e-12").convertToDouble());
+  EXPECT_EQ(1024.0,   APFloat(APFloat::IEEEdouble, "1024.").convertToDouble());
+  EXPECT_EQ(1024.05,  APFloat(APFloat::IEEEdouble, "1024.05000").convertToDouble());
+  EXPECT_EQ(0.05,     APFloat(APFloat::IEEEdouble, ".05000").convertToDouble());
+  EXPECT_EQ(2.0,      APFloat(APFloat::IEEEdouble, "2.").convertToDouble());
+  EXPECT_EQ(2.0e2,    APFloat(APFloat::IEEEdouble, "2.e2").convertToDouble());
+  EXPECT_EQ(2.0e+2,   APFloat(APFloat::IEEEdouble, "2.e+2").convertToDouble());
+  EXPECT_EQ(2.0e-2,   APFloat(APFloat::IEEEdouble, "2.e-2").convertToDouble());
+  EXPECT_EQ(2.05e2,    APFloat(APFloat::IEEEdouble, "002.05000e2").convertToDouble());
+  EXPECT_EQ(2.05e+2,   APFloat(APFloat::IEEEdouble, "002.05000e+2").convertToDouble());
+  EXPECT_EQ(2.05e-2,   APFloat(APFloat::IEEEdouble, "002.05000e-2").convertToDouble());
+  EXPECT_EQ(2.05e12,   APFloat(APFloat::IEEEdouble, "002.05000e12").convertToDouble());
+  EXPECT_EQ(2.05e+12,  APFloat(APFloat::IEEEdouble, "002.05000e+12").convertToDouble());
+  EXPECT_EQ(2.05e-12,  APFloat(APFloat::IEEEdouble, "002.05000e-12").convertToDouble());
+}
+
+TEST(APFloatTest, fromHexadecimalString) {
+  EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble,  "0x1p0").convertToDouble());
+  EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble, "+0x1p0").convertToDouble());
+  EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble, "-0x1p0").convertToDouble());
+
+  EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble,  "0x1p+0").convertToDouble());
+  EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble, "+0x1p+0").convertToDouble());
+  EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble, "-0x1p+0").convertToDouble());
+
+  EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble,  "0x1p-0").convertToDouble());
+  EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble, "+0x1p-0").convertToDouble());
+  EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble, "-0x1p-0").convertToDouble());
+
+
+  EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble,  "0x1p1").convertToDouble());
+  EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble, "+0x1p1").convertToDouble());
+  EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble, "-0x1p1").convertToDouble());
+
+  EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble,  "0x1p+1").convertToDouble());
+  EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble, "+0x1p+1").convertToDouble());
+  EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble, "-0x1p+1").convertToDouble());
+
+  EXPECT_EQ( 0.5, APFloat(APFloat::IEEEdouble,  "0x1p-1").convertToDouble());
+  EXPECT_EQ(+0.5, APFloat(APFloat::IEEEdouble, "+0x1p-1").convertToDouble());
+  EXPECT_EQ(-0.5, APFloat(APFloat::IEEEdouble, "-0x1p-1").convertToDouble());
+
+
+  EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble,  "0x1.8p1").convertToDouble());
+  EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble, "+0x1.8p1").convertToDouble());
+  EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble, "-0x1.8p1").convertToDouble());
+
+  EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble,  "0x1.8p+1").convertToDouble());
+  EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble, "+0x1.8p+1").convertToDouble());
+  EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble, "-0x1.8p+1").convertToDouble());
+
+  EXPECT_EQ( 0.75, APFloat(APFloat::IEEEdouble,  "0x1.8p-1").convertToDouble());
+  EXPECT_EQ(+0.75, APFloat(APFloat::IEEEdouble, "+0x1.8p-1").convertToDouble());
+  EXPECT_EQ(-0.75, APFloat(APFloat::IEEEdouble, "-0x1.8p-1").convertToDouble());
+
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000.000p1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000.000p1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000.000p1").convertToDouble());
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000.000p+1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000.000p+1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000.000p+1").convertToDouble());
+
+  EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble,  "0x1000.000p-1").convertToDouble());
+  EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble, "+0x1000.000p-1").convertToDouble());
+  EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble, "-0x1000.000p-1").convertToDouble());
+
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000p1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000p1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000p1").convertToDouble());
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000p+1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000p+1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000p+1").convertToDouble());
+
+  EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble,  "0x1000p-1").convertToDouble());
+  EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble, "+0x1000p-1").convertToDouble());
+  EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble, "-0x1000p-1").convertToDouble());
+
+
+  EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble,  "0x10p10").convertToDouble());
+  EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble, "+0x10p10").convertToDouble());
+  EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble, "-0x10p10").convertToDouble());
+
+  EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble,  "0x10p+10").convertToDouble());
+  EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble, "+0x10p+10").convertToDouble());
+  EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble, "-0x10p+10").convertToDouble());
+
+  EXPECT_EQ( 0.015625, APFloat(APFloat::IEEEdouble,  "0x10p-10").convertToDouble());
+  EXPECT_EQ(+0.015625, APFloat(APFloat::IEEEdouble, "+0x10p-10").convertToDouble());
+  EXPECT_EQ(-0.015625, APFloat(APFloat::IEEEdouble, "-0x10p-10").convertToDouble());
+
+  EXPECT_EQ(1.0625, APFloat(APFloat::IEEEdouble, "0x1.1p0").convertToDouble());
+  EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble, "0x1p0").convertToDouble());
+
+  EXPECT_EQ(2.71828, convertToDoubleFromString("2.71828"));
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(APFloatTest, SemanticsDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEsingle, 0.0f).convertToDouble(), "Float semantics are not IEEEdouble");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, 0.0 ).convertToFloat(),  "Float semantics are not IEEEsingle");
+}
+
+TEST(APFloatTest, StringDecimalDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ""), "Invalid string length");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+"), "String has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-"), "String has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("\0", 1)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1\0", 2)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1\02", 3)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1\02e1", 5)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1e\0", 3)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1e1\0", 4)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1e1\02", 5)), "Invalid character in exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0f"), "Invalid character in significand");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, ".."), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "..0"), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0.0"), "String contains multiple dots");
+}
+
+TEST(APFloatTest, StringDecimalSignificandDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-."), "Significand has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-e"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-e1"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ".e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+.e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-.e1"), "Significand has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ".e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+.e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-.e"), "Significand has no digits");
+}
+
+TEST(APFloatTest, StringDecimalExponentDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   "1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-1e"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   "1.e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+1.e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-1.e"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   ".1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+.1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-.1e"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   "1.1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+1.1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-1.1e"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1e+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1e-"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ".1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, ".1e+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, ".1e-"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "1.0e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0e+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0e-"), "Exponent has no digits");
+}
+
+TEST(APFloatTest, StringHexadecimalDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x"), "Invalid string");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x"), "Invalid string");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x"), "Invalid string");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x0"), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x0."), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x0."), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x0."), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.0"), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x0.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x0.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x0.0"), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x\0", 3)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1\0", 4)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1\02", 5)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1\02p1", 7)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1p\0", 5)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1p1\0", 6)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1p1\02", 7)), "Invalid character in exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x1p0f"), "Invalid character in exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x..p1"), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x..0p1"), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x1.0.0p1"), "String contains multiple dots");
+}
+
+TEST(APFloatTest, StringHexadecimalSignificandDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x."), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0xp"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0xp"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0xp"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0xp+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0xp+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0xp+"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0xp-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0xp-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0xp-"), "Significand has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.p"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.p"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.p"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.p+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.p+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.p+"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.p-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.p-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.p-"), "Significand has no digits");
+}
+
+TEST(APFloatTest, StringHexadecimalExponentDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1p-"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.p-"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.1p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.1p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.1p-"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p-"), "Exponent has no digits");
+}
+#endif
+
+}
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 648faf13ad81..0b13aa402ea1 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include <ostream>
-#include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
@@ -164,12 +163,182 @@ TEST(APIntTest, i1) {
 }
 
 TEST(APIntTest, fromString) {
-  EXPECT_EQ(APInt(1, 0), APInt(1, "0", 1, 10));
-  EXPECT_EQ(APInt(1, 1), APInt(1, "1", 1, 10));
-  EXPECT_EQ(APInt(1, 1), APInt(1, "-1", 2, 10));
-  EXPECT_EQ(APInt(1, 1), APInt(1, "1", 1, 2));
-  EXPECT_EQ(APInt(1, 1), APInt(1, "1", 1, 8));
-  EXPECT_EQ(APInt(1, 1), APInt(1, "1", 1, 16));
+  EXPECT_EQ(APInt(32, 0), APInt(32,   "0", 2));
+  EXPECT_EQ(APInt(32, 1), APInt(32,   "1", 2));
+  EXPECT_EQ(APInt(32, 2), APInt(32,  "10", 2));
+  EXPECT_EQ(APInt(32, 3), APInt(32,  "11", 2));
+  EXPECT_EQ(APInt(32, 4), APInt(32, "100", 2));
+
+  EXPECT_EQ(APInt(32, 0), APInt(32,   "+0", 2));
+  EXPECT_EQ(APInt(32, 1), APInt(32,   "+1", 2));
+  EXPECT_EQ(APInt(32, 2), APInt(32,  "+10", 2));
+  EXPECT_EQ(APInt(32, 3), APInt(32,  "+11", 2));
+  EXPECT_EQ(APInt(32, 4), APInt(32, "+100", 2));
+
+  EXPECT_EQ(APInt(32, uint64_t(-0LL)), APInt(32,   "-0", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-1LL)), APInt(32,   "-1", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-2LL)), APInt(32,  "-10", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-3LL)), APInt(32,  "-11", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-4LL)), APInt(32, "-100", 2));
+
+
+  EXPECT_EQ(APInt(32,  0), APInt(32,  "0",  8));
+  EXPECT_EQ(APInt(32,  1), APInt(32,  "1",  8));
+  EXPECT_EQ(APInt(32,  7), APInt(32,  "7",  8));
+  EXPECT_EQ(APInt(32,  8), APInt(32,  "10", 8));
+  EXPECT_EQ(APInt(32, 15), APInt(32,  "17", 8));
+  EXPECT_EQ(APInt(32, 16), APInt(32,  "20", 8));
+
+  EXPECT_EQ(APInt(32,  +0), APInt(32,  "+0",  8));
+  EXPECT_EQ(APInt(32,  +1), APInt(32,  "+1",  8));
+  EXPECT_EQ(APInt(32,  +7), APInt(32,  "+7",  8));
+  EXPECT_EQ(APInt(32,  +8), APInt(32,  "+10", 8));
+  EXPECT_EQ(APInt(32, +15), APInt(32,  "+17", 8));
+  EXPECT_EQ(APInt(32, +16), APInt(32,  "+20", 8));
+
+  EXPECT_EQ(APInt(32,  uint64_t(-0LL)), APInt(32,  "-0",  8));
+  EXPECT_EQ(APInt(32,  uint64_t(-1LL)), APInt(32,  "-1",  8));
+  EXPECT_EQ(APInt(32,  uint64_t(-7LL)), APInt(32,  "-7",  8));
+  EXPECT_EQ(APInt(32,  uint64_t(-8LL)), APInt(32,  "-10", 8));
+  EXPECT_EQ(APInt(32, uint64_t(-15LL)), APInt(32,  "-17", 8));
+  EXPECT_EQ(APInt(32, uint64_t(-16LL)), APInt(32,  "-20", 8));
+
+
+  EXPECT_EQ(APInt(32,  0), APInt(32,  "0", 10));
+  EXPECT_EQ(APInt(32,  1), APInt(32,  "1", 10));
+  EXPECT_EQ(APInt(32,  9), APInt(32,  "9", 10));
+  EXPECT_EQ(APInt(32, 10), APInt(32, "10", 10));
+  EXPECT_EQ(APInt(32, 19), APInt(32, "19", 10));
+  EXPECT_EQ(APInt(32, 20), APInt(32, "20", 10));
+
+  EXPECT_EQ(APInt(32,  uint64_t(-0LL)), APInt(32,  "-0", 10));
+  EXPECT_EQ(APInt(32,  uint64_t(-1LL)), APInt(32,  "-1", 10));
+  EXPECT_EQ(APInt(32,  uint64_t(-9LL)), APInt(32,  "-9", 10));
+  EXPECT_EQ(APInt(32, uint64_t(-10LL)), APInt(32, "-10", 10));
+  EXPECT_EQ(APInt(32, uint64_t(-19LL)), APInt(32, "-19", 10));
+  EXPECT_EQ(APInt(32, uint64_t(-20LL)), APInt(32, "-20", 10));
+
+
+  EXPECT_EQ(APInt(32,  0), APInt(32,  "0", 16));
+  EXPECT_EQ(APInt(32,  1), APInt(32,  "1", 16));
+  EXPECT_EQ(APInt(32, 15), APInt(32,  "F", 16));
+  EXPECT_EQ(APInt(32, 16), APInt(32, "10", 16));
+  EXPECT_EQ(APInt(32, 31), APInt(32, "1F", 16));
+  EXPECT_EQ(APInt(32, 32), APInt(32, "20", 16));
+
+  EXPECT_EQ(APInt(32,  uint64_t(-0LL)), APInt(32,  "-0", 16));
+  EXPECT_EQ(APInt(32,  uint64_t(-1LL)), APInt(32,  "-1", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-15LL)), APInt(32,  "-F", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-16LL)), APInt(32, "-10", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-31LL)), APInt(32, "-1F", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-32LL)), APInt(32, "-20", 16));
+}
+
+TEST(APIntTest, StringBitsNeeded2) {
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "0", 2));
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "1", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "10", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "11", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded("100", 2));
+
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "+0", 2));
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "+1", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "+10", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "+11", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded("+100", 2));
+
+  EXPECT_EQ(2U, APInt::getBitsNeeded(  "-0", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded(  "-1", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "-10", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "-11", 2));
+  EXPECT_EQ(4U, APInt::getBitsNeeded("-100", 2));
+}
+
+TEST(APIntTest, StringBitsNeeded8) {
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "0", 8));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "7", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("10", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("17", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("20", 8));
+
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "+0", 8));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "+7", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("+10", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("+17", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("+20", 8));
+
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "-0", 8));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "-7", 8));
+  EXPECT_EQ(7U, APInt::getBitsNeeded("-10", 8));
+  EXPECT_EQ(7U, APInt::getBitsNeeded("-17", 8));
+  EXPECT_EQ(7U, APInt::getBitsNeeded("-20", 8));
+}
+
+TEST(APIntTest, StringBitsNeeded10) {
+  EXPECT_EQ(1U, APInt::getBitsNeeded( "0", 10));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "3", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "9", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded("10", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("19", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("20", 10));
+
+  EXPECT_EQ(1U, APInt::getBitsNeeded( "+0", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "+9", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded("+10", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("+19", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("+20", 10));
+
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "-0", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded( "-9", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("-10", 10));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("-19", 10));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("-20", 10));
+}
+
+TEST(APIntTest, StringBitsNeeded16) {
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "0", 16));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("10", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("1F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("20", 16));
+
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "+0", 16));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "+F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("+10", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("+1F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("+20", 16));
+
+  EXPECT_EQ(5U, APInt::getBitsNeeded( "-0", 16));
+  EXPECT_EQ(5U, APInt::getBitsNeeded( "-F", 16));
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-10", 16));
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-1F", 16));
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-20", 16));
+}
+
+TEST(APIntTest, Log2) {
+  EXPECT_EQ(APInt(15, 7).logBase2(), 2U);
+  EXPECT_EQ(APInt(15, 7).ceilLogBase2(), 3U);
+  EXPECT_EQ(APInt(15, 7).exactLogBase2(), -1);
+  EXPECT_EQ(APInt(15, 8).logBase2(), 3U);
+  EXPECT_EQ(APInt(15, 8).ceilLogBase2(), 3U);
+  EXPECT_EQ(APInt(15, 8).exactLogBase2(), 3);
+  EXPECT_EQ(APInt(15, 9).logBase2(), 3U);
+  EXPECT_EQ(APInt(15, 9).ceilLogBase2(), 4U);
+  EXPECT_EQ(APInt(15, 9).exactLogBase2(), -1);
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(APIntTest, StringDeath) {
+  EXPECT_DEATH(APInt(0, "", 0), "Bitwidth too small");
+  EXPECT_DEATH(APInt(32, "", 0), "Invalid string length");
+  EXPECT_DEATH(APInt(32, "0", 0), "Radix should be 2, 8, 10, or 16!");
+  EXPECT_DEATH(APInt(32, "", 10), "Invalid string length");
+  EXPECT_DEATH(APInt(32, "-", 10), "String is only a sign, needs a value.");
+  EXPECT_DEATH(APInt(1, "1234", 10), "Insufficient bit width");
+  EXPECT_DEATH(APInt(32, "\0", 10), "Invalid string length");
+  EXPECT_DEATH(APInt(32, StringRef("1\02", 3), 10), "Invalid character in digit string");
+  EXPECT_DEATH(APInt(32, "1L", 10), "Invalid character in digit string");
 }
+#endif
 
 }
diff --git a/unittests/ADT/SmallStringTest.cpp b/unittests/ADT/SmallStringTest.cpp
new file mode 100644
index 000000000000..099d8159c917
--- /dev/null
+++ b/unittests/ADT/SmallStringTest.cpp
@@ -0,0 +1,48 @@
+//===- llvm/unittest/ADT/SmallStringTest.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SmallString unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallString.h"
+#include <stdarg.h>
+#include <climits>
+#include <cstring>
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture class
+class SmallStringTest : public testing::Test {
+protected:
+  typedef SmallString<40> StringType;
+
+  StringType theString;
+
+  void assertEmpty(StringType & v) {
+    // Size tests
+    EXPECT_EQ(0u, v.size());
+    EXPECT_TRUE(v.empty());
+    // Iterator tests
+    EXPECT_TRUE(v.begin() == v.end());
+  }
+};
+
+// New string test.
+TEST_F(SmallStringTest, EmptyStringTest) {
+  SCOPED_TRACE("EmptyStringTest");
+  assertEmpty(theString);
+  EXPECT_TRUE(theString.rbegin() == theString.rend());
+}
+
+}
+
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 19ef099224d4..8a817966cb8f 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -196,7 +196,8 @@ TEST_F(SmallVectorTest, ResizeGrowTest) {
 
   theVector.resize(2);
   
-  // XXX: I don't know where the extra construct/destruct is coming from.
+  // The extra constructor/destructor calls come from the temporary object used
+  // to initialize the contents of the resized array (via copy construction).
   EXPECT_EQ(3, Constructable::getNumConstructorCalls());
   EXPECT_EQ(1, Constructable::getNumDestructorCalls());
   EXPECT_EQ(2u, theVector.size());
@@ -214,16 +215,16 @@ TEST_F(SmallVectorTest, ResizeFillTest) {
 TEST_F(SmallVectorTest, OverflowTest) {
   SCOPED_TRACE("OverflowTest");
 
-  // Push more elements than the fixed size
+  // Push more elements than the fixed size.
   makeSequence(theVector, 1, 10);
 
-  // test size and values
+  // Test size and values.
   EXPECT_EQ(10u, theVector.size());
   for (int i = 0; i < 10; ++i) {
     EXPECT_EQ(i+1, theVector[i].getValue());
   }
   
-  // Now resize back to fixed size
+  // Now resize back to fixed size.
   theVector.resize(1);
   
   assertValuesInOrder(theVector, 1u, 1);
@@ -380,4 +381,22 @@ TEST_F(SmallVectorTest, ConstVectorTest) {
   EXPECT_TRUE(constVector.begin() == constVector.end());
 }
 
+// Direct array access.
+TEST_F(SmallVectorTest, DirectVectorTest) {
+  EXPECT_EQ(0u, theVector.size());
+  EXPECT_EQ(4u, theVector.capacity());
+  EXPECT_EQ(0, Constructable::getNumConstructorCalls());
+  theVector.end()[0] = 1;
+  theVector.end()[1] = 2;
+  theVector.end()[2] = 3;
+  theVector.end()[3] = 4;
+  theVector.set_size(4);
+  EXPECT_EQ(4u, theVector.size());
+  EXPECT_EQ(4, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(1, theVector[0].getValue());
+  EXPECT_EQ(2, theVector[1].getValue());
+  EXPECT_EQ(3, theVector[2].getValue());
+  EXPECT_EQ(4, theVector[3].getValue());
+}
+
 }
diff --git a/unittests/ADT/SparseBitVectorTest.cpp b/unittests/ADT/SparseBitVectorTest.cpp
new file mode 100644
index 000000000000..d8fc5ce25db1
--- /dev/null
+++ b/unittests/ADT/SparseBitVectorTest.cpp
@@ -0,0 +1,36 @@
+//===- llvm/unittest/ADT/SparseBitVectorTest.cpp - SparseBitVector tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SparseBitVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(SparseBitVectorTest, TrivialOperation) {
+  SparseBitVector<> Vec;
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_FALSE(Vec.test(17));
+  Vec.set(5);
+  EXPECT_TRUE(Vec.test(5));
+  EXPECT_FALSE(Vec.test(17));
+  Vec.reset(6);
+  EXPECT_TRUE(Vec.test(5));
+  EXPECT_FALSE(Vec.test(6));
+  Vec.reset(5);
+  EXPECT_FALSE(Vec.test(5));
+  EXPECT_TRUE(Vec.test_and_set(17));
+  EXPECT_FALSE(Vec.test_and_set(17));
+  EXPECT_TRUE(Vec.test(17));
+  Vec.clear();
+  EXPECT_FALSE(Vec.test(17));
+}
+
+}
diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp
index 9f9f2e4c7954..8ee166b5e28d 100644
--- a/unittests/ADT/StringMapTest.cpp
+++ b/unittests/ADT/StringMapTest.cpp
@@ -9,6 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
 using namespace llvm;
 
 namespace {
@@ -21,7 +22,7 @@ protected:
   static const char testKey[];
   static const uint32_t testValue;
   static const char* testKeyFirst;
-  static const char* testKeyLast;
+  static size_t testKeyLength;
   static const std::string testKeyStr;
 
   void assertEmptyMap() {
@@ -34,10 +35,11 @@ protected:
 
     // Lookup tests
     EXPECT_EQ(0u, testMap.count(testKey));
-    EXPECT_EQ(0u, testMap.count(testKeyFirst, testKeyLast));
+    EXPECT_EQ(0u, testMap.count(StringRef(testKeyFirst, testKeyLength)));
     EXPECT_EQ(0u, testMap.count(testKeyStr));
     EXPECT_TRUE(testMap.find(testKey) == testMap.end());
-    EXPECT_TRUE(testMap.find(testKeyFirst, testKeyLast) == testMap.end());
+    EXPECT_TRUE(testMap.find(StringRef(testKeyFirst, testKeyLength)) == 
+                testMap.end());
     EXPECT_TRUE(testMap.find(testKeyStr) == testMap.end());
   }
 
@@ -56,10 +58,11 @@ protected:
 
     // Lookup tests
     EXPECT_EQ(1u, testMap.count(testKey));
-    EXPECT_EQ(1u, testMap.count(testKeyFirst, testKeyLast));
+    EXPECT_EQ(1u, testMap.count(StringRef(testKeyFirst, testKeyLength)));
     EXPECT_EQ(1u, testMap.count(testKeyStr));
     EXPECT_TRUE(testMap.find(testKey) == testMap.begin());
-    EXPECT_TRUE(testMap.find(testKeyFirst, testKeyLast) == testMap.begin());
+    EXPECT_TRUE(testMap.find(StringRef(testKeyFirst, testKeyLength)) == 
+                testMap.begin());
     EXPECT_TRUE(testMap.find(testKeyStr) == testMap.begin());
   }
 };
@@ -67,7 +70,7 @@ protected:
 const char StringMapTest::testKey[] = "key";
 const uint32_t StringMapTest::testValue = 1u;
 const char* StringMapTest::testKeyFirst = testKey;
-const char* StringMapTest::testKeyLast = testKey + sizeof(testKey) - 1;
+size_t StringMapTest::testKeyLength = sizeof(testKey) - 1;
 const std::string StringMapTest::testKeyStr(testKey);
 
 // Empty map tests.
@@ -89,10 +92,10 @@ TEST_F(StringMapTest, ConstEmptyMapTest) {
 
   // Lookup tests
   EXPECT_EQ(0u, constTestMap.count(testKey));
-  EXPECT_EQ(0u, constTestMap.count(testKeyFirst, testKeyLast));
+  EXPECT_EQ(0u, constTestMap.count(StringRef(testKeyFirst, testKeyLength)));
   EXPECT_EQ(0u, constTestMap.count(testKeyStr));
   EXPECT_TRUE(constTestMap.find(testKey) == constTestMap.end());
-  EXPECT_TRUE(constTestMap.find(testKeyFirst, testKeyLast) ==
+  EXPECT_TRUE(constTestMap.find(StringRef(testKeyFirst, testKeyLength)) ==
               constTestMap.end());
   EXPECT_TRUE(constTestMap.find(testKeyStr) == constTestMap.end());
 }
@@ -185,7 +188,7 @@ namespace {
 TEST_F(StringMapTest, StringMapEntryTest) {
   StringMap<uint32_t>::value_type* entry =
       StringMap<uint32_t>::value_type::Create(
-          testKeyFirst, testKeyLast, 1u);
+          testKeyFirst, testKeyFirst + testKeyLength, 1u);
   EXPECT_STREQ(testKey, entry->first());
   EXPECT_EQ(1u, entry->second);
 }
@@ -195,7 +198,8 @@ TEST_F(StringMapTest, InsertTest) {
   SCOPED_TRACE("InsertTest");
   testMap.insert(
       StringMap<uint32_t>::value_type::Create(
-          testKeyFirst, testKeyLast, testMap.getAllocator(), 1u));
+          testKeyFirst, testKeyFirst + testKeyLength, 
+          testMap.getAllocator(), 1u));
   assertSingleItemMap();
 }
 
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
new file mode 100644
index 000000000000..cdc476e592b8
--- /dev/null
+++ b/unittests/ADT/StringRefTest.cpp
@@ -0,0 +1,155 @@
+//===- llvm/unittest/ADT/StringRefTest.cpp - StringRef unit tests ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+std::ostream &operator<<(std::ostream &OS, const StringRef &S) {
+  OS << S;
+  return OS;
+}
+
+std::ostream &operator<<(std::ostream &OS,
+                         const std::pair<StringRef, StringRef> &P) {
+  OS << "(" << P.first << ", " << P.second << ")";
+  return OS;
+}
+
+TEST(StringRefTest, Construction) {
+  EXPECT_EQ("", StringRef());
+  EXPECT_EQ("hello", StringRef("hello"));
+  EXPECT_EQ("hello", StringRef("hello world", 5));
+  EXPECT_EQ("hello", StringRef(std::string("hello")));
+}
+
+TEST(StringRefTest, Iteration) {
+  StringRef S("hello");
+  const char *p = "hello";
+  for (const char *it = S.begin(), *ie = S.end(); it != ie; ++it, ++p)
+    EXPECT_EQ(*it, *p);
+}
+
+TEST(StringRefTest, StringOps) {
+  const char *p = "hello";
+  EXPECT_EQ(p, StringRef(p, 0).data());
+  EXPECT_TRUE(StringRef().empty());
+  EXPECT_EQ((size_t) 5, StringRef("hello").size());
+  EXPECT_EQ(-1, StringRef("aab").compare("aad"));
+  EXPECT_EQ( 0, StringRef("aab").compare("aab"));
+  EXPECT_EQ( 1, StringRef("aab").compare("aaa"));
+  EXPECT_EQ(-1, StringRef("aab").compare("aabb"));
+  EXPECT_EQ( 1, StringRef("aab").compare("aa"));
+}
+
+TEST(StringRefTest, Operators) {
+  EXPECT_EQ("", StringRef());
+  EXPECT_TRUE(StringRef("aab") < StringRef("aad"));
+  EXPECT_FALSE(StringRef("aab") < StringRef("aab"));
+  EXPECT_TRUE(StringRef("aab") <= StringRef("aab"));
+  EXPECT_FALSE(StringRef("aab") <= StringRef("aaa"));
+  EXPECT_TRUE(StringRef("aad") > StringRef("aab"));
+  EXPECT_FALSE(StringRef("aab") > StringRef("aab"));
+  EXPECT_TRUE(StringRef("aab") >= StringRef("aab"));
+  EXPECT_FALSE(StringRef("aaa") >= StringRef("aab"));
+  EXPECT_EQ(StringRef("aab"), StringRef("aab"));
+  EXPECT_FALSE(StringRef("aab") == StringRef("aac"));
+  EXPECT_FALSE(StringRef("aab") != StringRef("aab"));
+  EXPECT_TRUE(StringRef("aab") != StringRef("aac"));
+  EXPECT_EQ('a', StringRef("aab")[1]);
+}
+
+TEST(StringRefTest, Substr) {
+  StringRef Str("hello");
+  EXPECT_EQ("lo", Str.substr(3));
+  EXPECT_EQ("", Str.substr(100));
+  EXPECT_EQ("hello", Str.substr(0, 100));
+  EXPECT_EQ("o", Str.substr(4, 10));
+}
+
+TEST(StringRefTest, Slice) {
+  StringRef Str("hello");
+  EXPECT_EQ("l", Str.slice(2, 3));
+  EXPECT_EQ("ell", Str.slice(1, 4));
+  EXPECT_EQ("llo", Str.slice(2, 100));
+  EXPECT_EQ("", Str.slice(2, 1));
+  EXPECT_EQ("", Str.slice(10, 20));
+}
+
+TEST(StringRefTest, Split) {
+  StringRef Str("hello");
+  EXPECT_EQ(std::make_pair(StringRef("hello"), StringRef("")),
+            Str.split('X'));
+  EXPECT_EQ(std::make_pair(StringRef("h"), StringRef("llo")),
+            Str.split('e'));
+  EXPECT_EQ(std::make_pair(StringRef(""), StringRef("ello")),
+            Str.split('h'));
+  EXPECT_EQ(std::make_pair(StringRef("he"), StringRef("lo")),
+            Str.split('l'));
+  EXPECT_EQ(std::make_pair(StringRef("hell"), StringRef("")),
+            Str.split('o'));
+
+  EXPECT_EQ(std::make_pair(StringRef("hello"), StringRef("")),
+            Str.rsplit('X'));
+  EXPECT_EQ(std::make_pair(StringRef("h"), StringRef("llo")),
+            Str.rsplit('e'));
+  EXPECT_EQ(std::make_pair(StringRef(""), StringRef("ello")),
+            Str.rsplit('h'));
+  EXPECT_EQ(std::make_pair(StringRef("hel"), StringRef("o")),
+            Str.rsplit('l'));
+  EXPECT_EQ(std::make_pair(StringRef("hell"), StringRef("")),
+            Str.rsplit('o'));
+}
+
+TEST(StringRefTest, StartsWith) {
+  StringRef Str("hello");
+  EXPECT_TRUE(Str.startswith("he"));
+  EXPECT_FALSE(Str.startswith("helloworld"));
+  EXPECT_FALSE(Str.startswith("hi"));
+}
+
+TEST(StringRefTest, Find) {
+  StringRef Str("hello");
+  EXPECT_EQ(2U, Str.find('l'));
+  EXPECT_EQ(StringRef::npos, Str.find('z'));
+  EXPECT_EQ(StringRef::npos, Str.find("helloworld"));
+  EXPECT_EQ(0U, Str.find("hello"));
+  EXPECT_EQ(1U, Str.find("ello"));
+  EXPECT_EQ(StringRef::npos, Str.find("zz"));
+
+  EXPECT_EQ(3U, Str.rfind('l'));
+  EXPECT_EQ(StringRef::npos, Str.rfind('z'));
+  EXPECT_EQ(StringRef::npos, Str.rfind("helloworld"));
+  EXPECT_EQ(0U, Str.rfind("hello"));
+  EXPECT_EQ(1U, Str.rfind("ello"));
+  EXPECT_EQ(StringRef::npos, Str.rfind("zz"));
+}
+
+TEST(StringRefTest, Count) {
+  StringRef Str("hello");
+  EXPECT_EQ(2U, Str.count('l'));
+  EXPECT_EQ(1U, Str.count('o'));
+  EXPECT_EQ(0U, Str.count('z'));
+  EXPECT_EQ(0U, Str.count("helloworld"));
+  EXPECT_EQ(1U, Str.count("hello"));
+  EXPECT_EQ(1U, Str.count("ello"));
+  EXPECT_EQ(0U, Str.count("zz"));
+}
+
+TEST(StringRefTest, Misc) {
+  std::string Storage;
+  raw_string_ostream OS(Storage);
+  OS << StringRef("hello");
+  EXPECT_EQ("hello", OS.str());
+}
+
+} // end anonymous namespace
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index dee09f46ee91..1a9e81a0df74 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -18,58 +18,58 @@ TEST(TripleTest, BasicParsing) {
   Triple T;
 
   T = Triple("");
-  EXPECT_EQ("", T.getArchName());
-  EXPECT_EQ("", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("-");
-  EXPECT_EQ("", T.getArchName());
-  EXPECT_EQ("", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("--");
-  EXPECT_EQ("", T.getArchName());
-  EXPECT_EQ("", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("---");
-  EXPECT_EQ("", T.getArchName());
-  EXPECT_EQ("", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("----");
-  EXPECT_EQ("", T.getArchName());
-  EXPECT_EQ("", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("-", T.getEnvironmentName());
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("-", T.getEnvironmentName().str());
 
   T = Triple("a");
-  EXPECT_EQ("a", T.getArchName());
-  EXPECT_EQ("", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("a-b");
-  EXPECT_EQ("a", T.getArchName());
-  EXPECT_EQ("b", T.getVendorName());
-  EXPECT_EQ("", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("b", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("a-b-c");
-  EXPECT_EQ("a", T.getArchName());
-  EXPECT_EQ("b", T.getVendorName());
-  EXPECT_EQ("c", T.getOSName());
-  EXPECT_EQ("", T.getEnvironmentName());
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("b", T.getVendorName().str());
+  EXPECT_EQ("c", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
 
   T = Triple("a-b-c-d");
-  EXPECT_EQ("a", T.getArchName());
-  EXPECT_EQ("b", T.getVendorName());
-  EXPECT_EQ("c", T.getOSName());
-  EXPECT_EQ("d", T.getEnvironmentName());
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("b", T.getVendorName().str());
+  EXPECT_EQ("c", T.getOSName().str());
+  EXPECT_EQ("d", T.getEnvironmentName().str());
 }
 
 TEST(TripleTest, ParsedIDs) {
@@ -92,6 +92,18 @@ TEST(TripleTest, ParsedIDs) {
 
   T = Triple("huh");
   EXPECT_EQ(Triple::UnknownArch, T.getArch());
+
+  // Two exceptional cases.
+
+  T = Triple("i386-mingw32");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::MinGW32, T.getOS());
+
+  T = Triple("arm-elf");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
 }
 
 TEST(TripleTest, MutateName) {
diff --git a/unittests/ADT/TwineTest.cpp b/unittests/ADT/TwineTest.cpp
new file mode 100644
index 000000000000..61e8a0ac37cb
--- /dev/null
+++ b/unittests/ADT/TwineTest.cpp
@@ -0,0 +1,75 @@
+//===- TwineTest.cpp - Twine unit tests -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+std::string repr(const Twine &Value) {
+  std::string res;
+  llvm::raw_string_ostream OS(res);
+  Value.printRepr(OS);
+  return OS.str();
+}
+
+TEST(TwineTest, Construction) {
+  EXPECT_EQ("", Twine().str());
+  EXPECT_EQ("hi", Twine("hi").str());
+  EXPECT_EQ("hi", Twine(std::string("hi")).str());
+  EXPECT_EQ("hi", Twine(StringRef("hi")).str());
+  EXPECT_EQ("hi", Twine(StringRef(std::string("hi"))).str());
+  EXPECT_EQ("hi", Twine(StringRef("hithere", 2)).str());
+}
+
+TEST(TwineTest, Numbers) {
+  EXPECT_EQ("123", Twine(123U).str());
+  EXPECT_EQ("123", Twine(123).str());
+  EXPECT_EQ("-123", Twine(-123).str());
+  EXPECT_EQ("123", Twine(123).str());
+  EXPECT_EQ("-123", Twine(-123).str());
+  EXPECT_EQ("123", Twine((char) 123).str());
+  EXPECT_EQ("-123", Twine((signed char) -123).str());
+
+  EXPECT_EQ("7b", Twine::utohexstr(123).str());
+}
+
+TEST(TwineTest, Concat) {
+  // Check verse repr, since we care about the actual representation not just
+  // the result.
+
+  // Concat with null.
+  EXPECT_EQ("(Twine null empty)", 
+            repr(Twine("hi").concat(Twine::createNull())));
+  EXPECT_EQ("(Twine null empty)", 
+            repr(Twine::createNull().concat(Twine("hi"))));
+  
+  // Concat with empty.
+  EXPECT_EQ("(Twine cstring:\"hi\" empty)", 
+            repr(Twine("hi").concat(Twine())));
+  EXPECT_EQ("(Twine cstring:\"hi\" empty)", 
+            repr(Twine().concat(Twine("hi"))));
+
+  // Concatenation of unary ropes.
+  EXPECT_EQ("(Twine cstring:\"a\" cstring:\"b\")", 
+            repr(Twine("a").concat(Twine("b"))));
+
+  // Concatenation of other ropes.
+  EXPECT_EQ("(Twine rope:(Twine cstring:\"a\" cstring:\"b\") cstring:\"c\")", 
+            repr(Twine("a").concat(Twine("b")).concat(Twine("c"))));
+  EXPECT_EQ("(Twine cstring:\"a\" rope:(Twine cstring:\"b\" cstring:\"c\"))",
+            repr(Twine("a").concat(Twine("b").concat(Twine("c")))));
+}
+
+  // I suppose linking in the entire code generator to add a unit test to check
+  // the code size of the concat operation is overkill... :)
+
+} // end anonymous namespace
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
new file mode 100644
index 000000000000..904ee2b6c49f
--- /dev/null
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -0,0 +1,129 @@
+//===- ExecutionEngineTest.cpp - Unit tests for ExecutionEngine -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class ExecutionEngineTest : public testing::Test {
+protected:
+  ExecutionEngineTest()
+    : M(new Module("<main>", getGlobalContext())),
+      Engine(EngineBuilder(M).create()) {
+  }
+
+  virtual void SetUp() {
+    ASSERT_TRUE(Engine.get() != NULL);
+  }
+
+  GlobalVariable *NewExtGlobal(const Type *T, const Twine &Name) {
+    return new GlobalVariable(*M, T, false,  // Not constant.
+                              GlobalValue::ExternalLinkage, NULL, Name);
+  }
+
+  Module *const M;
+  const OwningPtr<ExecutionEngine> Engine;
+};
+
+TEST_F(ExecutionEngineTest, ForwardGlobalMapping) {
+  GlobalVariable *G1 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  EXPECT_EQ(&Mem1, Engine->getPointerToGlobalIfAvailable(G1));
+  int32_t Mem2 = 4;
+  Engine->updateGlobalMapping(G1, &Mem2);
+  EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1));
+  Engine->updateGlobalMapping(G1, NULL);
+  EXPECT_EQ(NULL, Engine->getPointerToGlobalIfAvailable(G1));
+  Engine->updateGlobalMapping(G1, &Mem2);
+  EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1));
+
+  GlobalVariable *G2 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+  EXPECT_EQ(NULL, Engine->getPointerToGlobalIfAvailable(G2))
+    << "The NULL return shouldn't depend on having called"
+    << " updateGlobalMapping(..., NULL)";
+  // Check that update...() can be called before add...().
+  Engine->updateGlobalMapping(G2, &Mem1);
+  EXPECT_EQ(&Mem1, Engine->getPointerToGlobalIfAvailable(G2));
+  EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1))
+    << "A second mapping shouldn't affect the first.";
+}
+
+TEST_F(ExecutionEngineTest, ReverseGlobalMapping) {
+  GlobalVariable *G1 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem1));
+  int32_t Mem2 = 4;
+  Engine->updateGlobalMapping(G1, &Mem2);
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem2));
+
+  GlobalVariable *G2 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global2");
+  Engine->updateGlobalMapping(G2, &Mem1);
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem1));
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem2));
+  Engine->updateGlobalMapping(G1, NULL);
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem1))
+    << "Removing one mapping doesn't affect a different one.";
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem2));
+  Engine->updateGlobalMapping(G2, &Mem2);
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem2))
+    << "Once a mapping is removed, we can point another GV at the"
+    << " now-free address.";
+}
+
+TEST_F(ExecutionEngineTest, ClearModuleMappings) {
+  GlobalVariable *G1 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem1));
+
+  Engine->clearGlobalMappingsFromModule(M);
+
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+
+  GlobalVariable *G2 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global2");
+  // After clearing the module mappings, we can assign a new GV to the
+  // same address.
+  Engine->addGlobalMapping(G2, &Mem1);
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem1));
+}
+
+TEST_F(ExecutionEngineTest, DestructionRemovesGlobalMapping) {
+  GlobalVariable *G1 =
+    NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  // Make sure the reverse mapping is enabled.
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem1));
+  // When the GV goes away, the ExecutionEngine should remove any
+  // mappings that refer to it.
+  G1->eraseFromParent();
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+}
+
+}
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
index 1007ae1cc586..87e3280cf986 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -65,8 +65,10 @@ struct RecordingJITEventListener : public JITEventListener {
 class JITEventListenerTest : public testing::Test {
  protected:
   JITEventListenerTest()
-      : M(new Module("module", *new LLVMContext())),
-        EE(ExecutionEngine::createJIT(new ExistingModuleProvider(M))) {
+      : M(new Module("module", getGlobalContext())),
+        EE(EngineBuilder(M)
+           .setEngineKind(EngineKind::JIT)
+           .create()) {
   }
 
   Module *M;
@@ -75,11 +77,11 @@ class JITEventListenerTest : public testing::Test {
 
 Function *buildFunction(Module *M) {
   Function *Result = Function::Create(
-      TypeBuilder<int32_t(int32_t), false>::get(),
+      TypeBuilder<int32_t(int32_t), false>::get(getGlobalContext()),
       GlobalValue::ExternalLinkage, "id", M);
   Value *Arg = Result->arg_begin();
-  BasicBlock *BB = BasicBlock::Create("entry", Result);
-  ReturnInst::Create(Arg, BB);
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "entry", Result);
+  ReturnInst::Create(M->getContext(), Arg, BB);
   return Result;
 }
 
@@ -232,7 +234,7 @@ TEST_F(JITEventListenerTest, MatchesMachineCodeInfo) {
 
 class JITEnvironment : public testing::Environment {
   virtual void SetUp() {
-    // Required for ExecutionEngine::createJIT to create a JIT.
+    // Required to create a JIT.
     InitializeNativeTarget();
   }
 };
diff --git a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
new file mode 100644
index 000000000000..89a4be70be20
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -0,0 +1,277 @@
+//===- JITMemoryManagerTest.cpp - Unit tests for the JIT memory manager ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+
+using namespace llvm;
+
+namespace {
+
+Function *makeFakeFunction() {
+  std::vector<const Type*> params;
+  const FunctionType *FTy =
+      FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false);
+  return Function::Create(FTy, GlobalValue::ExternalLinkage);
+}
+
+// Allocate three simple functions that fit in the initial slab.  This exercises
+// the code in the case that we don't have to allocate more memory to store the
+// function bodies.
+TEST(JITMemoryManagerTest, NoAllocations) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  uintptr_t size;
+  uint8_t *start;
+  std::string Error;
+
+  // Allocate the functions.
+  OwningPtr<Function> F1(makeFakeFunction());
+  size = 1024;
+  start = MemMgr->startFunctionBody(F1.get(), size);
+  memset(start, 0xFF, 1024);
+  MemMgr->endFunctionBody(F1.get(), start, start + 1024);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F2(makeFakeFunction());
+  size = 1024;
+  start = MemMgr->startFunctionBody(F2.get(), size);
+  memset(start, 0xFF, 1024);
+  MemMgr->endFunctionBody(F2.get(), start, start + 1024);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F3(makeFakeFunction());
+  size = 1024;
+  start = MemMgr->startFunctionBody(F3.get(), size);
+  memset(start, 0xFF, 1024);
+  MemMgr->endFunctionBody(F3.get(), start, start + 1024);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  // Deallocate them out of order, in case that matters.
+  MemMgr->deallocateMemForFunction(F2.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F1.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F3.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+}
+
+// Make three large functions that take up most of the space in the slab.  Then
+// try allocating three smaller functions that don't require additional slabs.
+TEST(JITMemoryManagerTest, TestCodeAllocation) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  uintptr_t size;
+  uint8_t *start;
+  std::string Error;
+
+  // Big functions are a little less than the largest block size.
+  const uintptr_t smallFuncSize = 1024;
+  const uintptr_t bigFuncSize = (MemMgr->GetDefaultCodeSlabSize() -
+                                 smallFuncSize * 2);
+
+  // Allocate big functions
+  OwningPtr<Function> F1(makeFakeFunction());
+  size = bigFuncSize;
+  start = MemMgr->startFunctionBody(F1.get(), size);
+  ASSERT_LE(bigFuncSize, size);
+  memset(start, 0xFF, bigFuncSize);
+  MemMgr->endFunctionBody(F1.get(), start, start + bigFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F2(makeFakeFunction());
+  size = bigFuncSize;
+  start = MemMgr->startFunctionBody(F2.get(), size);
+  ASSERT_LE(bigFuncSize, size);
+  memset(start, 0xFF, bigFuncSize);
+  MemMgr->endFunctionBody(F2.get(), start, start + bigFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F3(makeFakeFunction());
+  size = bigFuncSize;
+  start = MemMgr->startFunctionBody(F3.get(), size);
+  ASSERT_LE(bigFuncSize, size);
+  memset(start, 0xFF, bigFuncSize);
+  MemMgr->endFunctionBody(F3.get(), start, start + bigFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  // Check that each large function took it's own slab.
+  EXPECT_EQ(3U, MemMgr->GetNumCodeSlabs());
+
+  // Allocate small functions
+  OwningPtr<Function> F4(makeFakeFunction());
+  size = smallFuncSize;
+  start = MemMgr->startFunctionBody(F4.get(), size);
+  ASSERT_LE(smallFuncSize, size);
+  memset(start, 0xFF, smallFuncSize);
+  MemMgr->endFunctionBody(F4.get(), start, start + smallFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F5(makeFakeFunction());
+  size = smallFuncSize;
+  start = MemMgr->startFunctionBody(F5.get(), size);
+  ASSERT_LE(smallFuncSize, size);
+  memset(start, 0xFF, smallFuncSize);
+  MemMgr->endFunctionBody(F5.get(), start, start + smallFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F6(makeFakeFunction());
+  size = smallFuncSize;
+  start = MemMgr->startFunctionBody(F6.get(), size);
+  ASSERT_LE(smallFuncSize, size);
+  memset(start, 0xFF, smallFuncSize);
+  MemMgr->endFunctionBody(F6.get(), start, start + smallFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  // Check that the small functions didn't allocate any new slabs.
+  EXPECT_EQ(3U, MemMgr->GetNumCodeSlabs());
+
+  // Deallocate them out of order, in case that matters.
+  MemMgr->deallocateMemForFunction(F2.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F1.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F4.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F3.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F5.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateMemForFunction(F6.get());
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+}
+
+// Allocate five global ints of varying widths and alignment, and check their
+// alignment and overlap.
+TEST(JITMemoryManagerTest, TestSmallGlobalInts) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  uint8_t  *a = (uint8_t *)MemMgr->allocateGlobal(8,  0);
+  uint16_t *b = (uint16_t*)MemMgr->allocateGlobal(16, 2);
+  uint32_t *c = (uint32_t*)MemMgr->allocateGlobal(32, 4);
+  uint64_t *d = (uint64_t*)MemMgr->allocateGlobal(64, 8);
+
+  // Check the alignment.
+  EXPECT_EQ(0U, ((uintptr_t)b) & 0x1);
+  EXPECT_EQ(0U, ((uintptr_t)c) & 0x3);
+  EXPECT_EQ(0U, ((uintptr_t)d) & 0x7);
+
+  // Initialize them each one at a time and make sure they don't overlap.
+  *a = 0xff;
+  *b = 0U;
+  *c = 0U;
+  *d = 0U;
+  EXPECT_EQ(0xffU, *a);
+  EXPECT_EQ(0U, *b);
+  EXPECT_EQ(0U, *c);
+  EXPECT_EQ(0U, *d);
+  *a = 0U;
+  *b = 0xffffU;
+  EXPECT_EQ(0U, *a);
+  EXPECT_EQ(0xffffU, *b);
+  EXPECT_EQ(0U, *c);
+  EXPECT_EQ(0U, *d);
+  *b = 0U;
+  *c = 0xffffffffU;
+  EXPECT_EQ(0U, *a);
+  EXPECT_EQ(0U, *b);
+  EXPECT_EQ(0xffffffffU, *c);
+  EXPECT_EQ(0U, *d);
+  *c = 0U;
+  *d = 0xffffffffffffffffULL;
+  EXPECT_EQ(0U, *a);
+  EXPECT_EQ(0U, *b);
+  EXPECT_EQ(0U, *c);
+  EXPECT_EQ(0xffffffffffffffffULL, *d);
+
+  // Make sure we didn't allocate any extra slabs for this tiny amount of data.
+  EXPECT_EQ(1U, MemMgr->GetNumDataSlabs());
+}
+
+// Allocate a small global, a big global, and a third global, and make sure we
+// only use two slabs for that.
+TEST(JITMemoryManagerTest, TestLargeGlobalArray) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  size_t Size = 4 * MemMgr->GetDefaultDataSlabSize();
+  uint64_t *a = (uint64_t*)MemMgr->allocateGlobal(64, 8);
+  uint8_t *g = MemMgr->allocateGlobal(Size, 8);
+  uint64_t *b = (uint64_t*)MemMgr->allocateGlobal(64, 8);
+
+  // Check the alignment.
+  EXPECT_EQ(0U, ((uintptr_t)a) & 0x7);
+  EXPECT_EQ(0U, ((uintptr_t)g) & 0x7);
+  EXPECT_EQ(0U, ((uintptr_t)b) & 0x7);
+
+  // Initialize them to make sure we don't segfault and make sure they don't
+  // overlap.
+  memset(a, 0x1, 8);
+  memset(g, 0x2, Size);
+  memset(b, 0x3, 8);
+  EXPECT_EQ(0x0101010101010101ULL, *a);
+  // Just check the edges.
+  EXPECT_EQ(0x02U, g[0]);
+  EXPECT_EQ(0x02U, g[Size - 1]);
+  EXPECT_EQ(0x0303030303030303ULL, *b);
+
+  // Check the number of slabs.
+  EXPECT_EQ(2U, MemMgr->GetNumDataSlabs());
+}
+
+// Allocate lots of medium globals so that we can test moving the bump allocator
+// to a new slab.
+TEST(JITMemoryManagerTest, TestManyGlobals) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  size_t SlabSize = MemMgr->GetDefaultDataSlabSize();
+  size_t Size = 128;
+  int Iters = (SlabSize / Size) + 1;
+
+  // We should start with one slab.
+  EXPECT_EQ(1U, MemMgr->GetNumDataSlabs());
+
+  // After allocating a bunch of globals, we should have two.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateGlobal(Size, 8);
+  EXPECT_EQ(2U, MemMgr->GetNumDataSlabs());
+
+  // And after much more, we should have three.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateGlobal(Size, 8);
+  EXPECT_EQ(3U, MemMgr->GetNumDataSlabs());
+}
+
+// Allocate lots of function stubs so that we can test moving the stub bump
+// allocator to a new slab.
+TEST(JITMemoryManagerTest, TestManyStubs) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  size_t SlabSize = MemMgr->GetDefaultStubSlabSize();
+  size_t Size = 128;
+  int Iters = (SlabSize / Size) + 1;
+
+  // We should start with one slab.
+  EXPECT_EQ(1U, MemMgr->GetNumStubSlabs());
+
+  // After allocating a bunch of stubs, we should have two.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateStub(NULL, Size, 8);
+  EXPECT_EQ(2U, MemMgr->GetNumStubSlabs());
+
+  // And after much more, we should have three.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateStub(NULL, Size, 8);
+  EXPECT_EQ(3U, MemMgr->GetNumStubSlabs());
+}
+
+}
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
new file mode 100644
index 000000000000..55d37493ea5a
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -0,0 +1,277 @@
+//===- JITTest.cpp - Unit tests for the JIT -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+namespace {
+
+Function *makeReturnGlobal(std::string Name, GlobalVariable *G, Module *M) {
+  std::vector<const Type*> params;
+  const FunctionType *FTy = FunctionType::get(G->getType()->getElementType(),
+                                              params, false);
+  Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M);
+  BasicBlock *Entry = BasicBlock::Create(M->getContext(), "entry", F);
+  IRBuilder<> builder(Entry);
+  Value *Load = builder.CreateLoad(G);
+  const Type *GTy = G->getType()->getElementType();
+  Value *Add = builder.CreateAdd(Load, ConstantInt::get(GTy, 1LL));
+  builder.CreateStore(Add, G);
+  builder.CreateRet(Add);
+  return F;
+}
+
+class JITTest : public testing::Test {
+ protected:
+  virtual void SetUp() {
+    M = new Module("<main>", Context);
+    std::string Error;
+    TheJIT.reset(EngineBuilder(M).setEngineKind(EngineKind::JIT)
+                 .setErrorStr(&Error).create());
+    ASSERT_TRUE(TheJIT.get() != NULL) << Error;
+  }
+
+  LLVMContext Context;
+  Module *M;  // Owned by ExecutionEngine.
+  OwningPtr<ExecutionEngine> TheJIT;
+};
+
+// Regression test for a bug.  The JIT used to allocate globals inside the same
+// memory block used for the function, and when the function code was freed,
+// the global was left in the same place.  This test allocates a function
+// that uses and global, deallocates it, and then makes sure that the global
+// stays alive after that.
+TEST(JIT, GlobalInFunction) {
+  LLVMContext context;
+  Module *M = new Module("<main>", context);
+  ExistingModuleProvider *MP = new ExistingModuleProvider(M);
+
+  JITMemoryManager *MemMgr = JITMemoryManager::CreateDefaultMemManager();
+  // Tell the memory manager to poison freed memory so that accessing freed
+  // memory is more easily tested.
+  MemMgr->setPoisonMemory(true);
+  std::string Error;
+  OwningPtr<ExecutionEngine> JIT(EngineBuilder(MP)
+                                 .setEngineKind(EngineKind::JIT)
+                                 .setErrorStr(&Error)
+                                 .setJITMemoryManager(MemMgr)
+                                 // The next line enables the fix:
+                                 .setAllocateGVsWithCode(false)
+                                 .create());
+  ASSERT_EQ(Error, "");
+
+  // Create a global variable.
+  const Type *GTy = Type::getInt32Ty(context);
+  GlobalVariable *G = new GlobalVariable(
+      *M,
+      GTy,
+      false,  // Not constant.
+      GlobalValue::InternalLinkage,
+      Constant::getNullValue(GTy),
+      "myglobal");
+
+  // Make a function that points to a global.
+  Function *F1 = makeReturnGlobal("F1", G, M);
+
+  // Get the pointer to the native code to force it to JIT the function and
+  // allocate space for the global.
+  void (*F1Ptr)() =
+      reinterpret_cast<void(*)()>((intptr_t)JIT->getPointerToFunction(F1));
+
+  // Since F1 was codegen'd, a pointer to G should be available.
+  int32_t *GPtr = (int32_t*)JIT->getPointerToGlobalIfAvailable(G);
+  ASSERT_NE((int32_t*)NULL, GPtr);
+  EXPECT_EQ(0, *GPtr);
+
+  // F1() should increment G.
+  F1Ptr();
+  EXPECT_EQ(1, *GPtr);
+
+  // Make a second function identical to the first, referring to the same
+  // global.
+  Function *F2 = makeReturnGlobal("F2", G, M);
+  void (*F2Ptr)() =
+      reinterpret_cast<void(*)()>((intptr_t)JIT->getPointerToFunction(F2));
+
+  // F2() should increment G.
+  F2Ptr();
+  EXPECT_EQ(2, *GPtr);
+
+  // Deallocate F1.
+  JIT->freeMachineCodeForFunction(F1);
+
+  // F2() should *still* increment G.
+  F2Ptr();
+  EXPECT_EQ(3, *GPtr);
+}
+
+int PlusOne(int arg) {
+  return arg + 1;
+}
+
+TEST_F(JITTest, FarCallToKnownFunction) {
+  // x86-64 can only make direct calls to functions within 32 bits of
+  // the current PC.  To call anything farther away, we have to load
+  // the address into a register and call through the register.  The
+  // current JIT does this by allocating a stub for any far call.
+  // There was a bug in which the JIT tried to emit a direct call when
+  // the target was already in the JIT's global mappings and lazy
+  // compilation was disabled.
+
+  Function *KnownFunction = Function::Create(
+      TypeBuilder<int(int), false>::get(Context),
+      GlobalValue::ExternalLinkage, "known", M);
+  TheJIT->addGlobalMapping(KnownFunction, (void*)(intptr_t)PlusOne);
+
+  // int test() { return known(7); }
+  Function *TestFunction = Function::Create(
+      TypeBuilder<int(), false>::get(Context),
+      GlobalValue::ExternalLinkage, "test", M);
+  BasicBlock *Entry = BasicBlock::Create(Context, "entry", TestFunction);
+  IRBuilder<> Builder(Entry);
+  Value *result = Builder.CreateCall(
+      KnownFunction,
+      ConstantInt::get(TypeBuilder<int, false>::get(Context), 7));
+  Builder.CreateRet(result);
+
+  TheJIT->EnableDlsymStubs(false);
+  TheJIT->DisableLazyCompilation();
+  int (*TestFunctionPtr)() = reinterpret_cast<int(*)()>(
+      (intptr_t)TheJIT->getPointerToFunction(TestFunction));
+  // This used to crash in trying to call PlusOne().
+  EXPECT_EQ(8, TestFunctionPtr());
+}
+
+// Test a function C which calls A and B which call each other.
+TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
+  TheJIT->DisableLazyCompilation();
+
+  const FunctionType *Func1Ty =
+      cast<FunctionType>(TypeBuilder<void(void), false>::get(Context));
+  std::vector<const Type*> arg_types;
+  arg_types.push_back(Type::getInt1Ty(Context));
+  const FunctionType *FuncTy = FunctionType::get(
+      Type::getVoidTy(Context), arg_types, false);
+  Function *Func1 = Function::Create(Func1Ty, Function::ExternalLinkage,
+                                     "func1", M);
+  Function *Func2 = Function::Create(FuncTy, Function::InternalLinkage,
+                                     "func2", M);
+  Function *Func3 = Function::Create(FuncTy, Function::InternalLinkage,
+                                     "func3", M);
+  BasicBlock *Block1 = BasicBlock::Create(Context, "block1", Func1);
+  BasicBlock *Block2 = BasicBlock::Create(Context, "block2", Func2);
+  BasicBlock *True2 = BasicBlock::Create(Context, "cond_true", Func2);
+  BasicBlock *False2 = BasicBlock::Create(Context, "cond_false", Func2);
+  BasicBlock *Block3 = BasicBlock::Create(Context, "block3", Func3);
+  BasicBlock *True3 = BasicBlock::Create(Context, "cond_true", Func3);
+  BasicBlock *False3 = BasicBlock::Create(Context, "cond_false", Func3);
+
+  // Make Func1 call Func2(0) and Func3(0).
+  IRBuilder<> Builder(Block1);
+  Builder.CreateCall(Func2, ConstantInt::getTrue(Context));
+  Builder.CreateCall(Func3, ConstantInt::getTrue(Context));
+  Builder.CreateRetVoid();
+
+  // void Func2(bool b) { if (b) { Func3(false); return; } return; }
+  Builder.SetInsertPoint(Block2);
+  Builder.CreateCondBr(Func2->arg_begin(), True2, False2);
+  Builder.SetInsertPoint(True2);
+  Builder.CreateCall(Func3, ConstantInt::getFalse(Context));
+  Builder.CreateRetVoid();
+  Builder.SetInsertPoint(False2);
+  Builder.CreateRetVoid();
+
+  // void Func3(bool b) { if (b) { Func2(false); return; } return; }
+  Builder.SetInsertPoint(Block3);
+  Builder.CreateCondBr(Func3->arg_begin(), True3, False3);
+  Builder.SetInsertPoint(True3);
+  Builder.CreateCall(Func2, ConstantInt::getFalse(Context));
+  Builder.CreateRetVoid();
+  Builder.SetInsertPoint(False3);
+  Builder.CreateRetVoid();
+
+  // Compile the function to native code
+  void (*F1Ptr)() =
+     reinterpret_cast<void(*)()>((intptr_t)TheJIT->getPointerToFunction(Func1));
+
+  F1Ptr();
+}
+
+// Regression test for PR5162.  This used to trigger an AssertingVH inside the
+// JIT's Function to stub mapping.
+TEST_F(JITTest, NonLazyLeaksNoStubs) {
+  TheJIT->DisableLazyCompilation();
+
+  // Create two functions with a single basic block each.
+  const FunctionType *FuncTy =
+      cast<FunctionType>(TypeBuilder<int(), false>::get(Context));
+  Function *Func1 = Function::Create(FuncTy, Function::ExternalLinkage,
+                                     "func1", M);
+  Function *Func2 = Function::Create(FuncTy, Function::InternalLinkage,
+                                     "func2", M);
+  BasicBlock *Block1 = BasicBlock::Create(Context, "block1", Func1);
+  BasicBlock *Block2 = BasicBlock::Create(Context, "block2", Func2);
+
+  // The first function calls the second and returns the result
+  IRBuilder<> Builder(Block1);
+  Value *Result = Builder.CreateCall(Func2);
+  Builder.CreateRet(Result);
+
+  // The second function just returns a constant
+  Builder.SetInsertPoint(Block2);
+  Builder.CreateRet(ConstantInt::get(TypeBuilder<int, false>::get(Context),42));
+
+  // Compile the function to native code
+  (void)TheJIT->getPointerToFunction(Func1);
+
+  // Free the JIT state for the functions
+  TheJIT->freeMachineCodeForFunction(Func1);
+  TheJIT->freeMachineCodeForFunction(Func2);
+
+  // Delete the first function (and show that is has no users)
+  EXPECT_EQ(Func1->getNumUses(), 0u);
+  Func1->eraseFromParent();
+
+  // Delete the second function (and show that it has no users - it had one,
+  // func1 but that's gone now)
+  EXPECT_EQ(Func2->getNumUses(), 0u);
+  Func2->eraseFromParent();
+}
+
+// This code is copied from JITEventListenerTest, but it only runs once for all
+// the tests in this directory.  Everything seems fine, but that's strange
+// behavior.
+class JITEnvironment : public testing::Environment {
+  virtual void SetUp() {
+    // Required to create a JIT.
+    InitializeNativeTarget();
+  }
+};
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
+
+}
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
index e837a7d4fd3e..d4ef92ffb392 100644
--- a/unittests/ExecutionEngine/Makefile
+++ b/unittests/ExecutionEngine/Makefile
@@ -8,12 +8,11 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
+TESTNAME = ExecutionEngine
+LINK_COMPONENTS := engine interpreter
 
 include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS = JIT
 
-include $(LEVEL)/Makefile.common
-
-clean::
-	$(Verb) $(RM) -f *Tests
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/Makefile b/unittests/Makefile
index 1eb69abbc890..9f377cd744c1 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -9,14 +9,7 @@
 
 LEVEL = ..
 
-include $(LEVEL)/Makefile.config
-
-LIBRARYNAME = UnitTestMain
-BUILD_ARCHIVE = 1
-CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include/
-CPP.Flags += -Wno-variadic-macros
-
-PARALLEL_DIRS = ADT ExecutionEngine Support VMCore MC
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore
 
 include $(LEVEL)/Makefile.common
 
diff --git a/unittests/Makefile.unittest b/unittests/Makefile.unittest
index 1c75e44d0778..76051e497c96 100644
--- a/unittests/Makefile.unittest
+++ b/unittests/Makefile.unittest
@@ -20,16 +20,18 @@ LLVMUnitTestExe = $(BuildMode)/$(TESTNAME)Tests$(EXEEXT)
 
 CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include/
 CPP.Flags += -Wno-variadic-macros
-LIBS += -lGoogleTest -lUnitTestMain
+TESTLIBS = -lGoogleTest -lUnitTestMain
 
 $(LLVMUnitTestExe): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) Linking $(BuildMode) unit test $(TESTNAME) $(StripWarnMsg)
 	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
-	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
+	$(TESTLIBS) $(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
 	$(Echo) ======= Finished Linking $(BuildMode) Unit test $(TESTNAME) \
           $(StripWarnMsg)
 
 all:: $(LLVMUnitTestExe)
+
+unitcheck:: $(LLVMUnitTestExe)
 	$(LLVMUnitTestExe)
 
 endif
diff --git a/unittests/Support/AllocatorTest.cpp b/unittests/Support/AllocatorTest.cpp
new file mode 100644
index 000000000000..2a01f3a9c484
--- /dev/null
+++ b/unittests/Support/AllocatorTest.cpp
@@ -0,0 +1,143 @@
+//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+namespace {
+
+TEST(AllocatorTest, Basics) {
+  BumpPtrAllocator Alloc;
+  int *a = (int*)Alloc.Allocate(sizeof(int), 0);
+  int *b = (int*)Alloc.Allocate(sizeof(int) * 10, 0);
+  int *c = (int*)Alloc.Allocate(sizeof(int), 0);
+  *a = 1;
+  b[0] = 2;
+  b[9] = 2;
+  *c = 3;
+  EXPECT_EQ(1, *a);
+  EXPECT_EQ(2, b[0]);
+  EXPECT_EQ(2, b[9]);
+  EXPECT_EQ(3, *c);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+}
+
+// Allocate enough bytes to create three slabs.
+TEST(AllocatorTest, ThreeSlabs) {
+  BumpPtrAllocator Alloc(4096, 4096);
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(3U, Alloc.GetNumSlabs());
+}
+
+// Allocate enough bytes to create two slabs, reset the allocator, and do it
+// again.
+TEST(AllocatorTest, TestReset) {
+  BumpPtrAllocator Alloc(4096, 4096);
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+  Alloc.Reset();
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+}
+
+// Test some allocations at varying alignments.
+TEST(AllocatorTest, TestAlignment) {
+  BumpPtrAllocator Alloc;
+  uintptr_t a;
+  a = (uintptr_t)Alloc.Allocate(1, 2);
+  EXPECT_EQ(0U, a & 1);
+  a = (uintptr_t)Alloc.Allocate(1, 4);
+  EXPECT_EQ(0U, a & 3);
+  a = (uintptr_t)Alloc.Allocate(1, 8);
+  EXPECT_EQ(0U, a & 7);
+  a = (uintptr_t)Alloc.Allocate(1, 16);
+  EXPECT_EQ(0U, a & 15);
+  a = (uintptr_t)Alloc.Allocate(1, 32);
+  EXPECT_EQ(0U, a & 31);
+  a = (uintptr_t)Alloc.Allocate(1, 64);
+  EXPECT_EQ(0U, a & 63);
+  a = (uintptr_t)Alloc.Allocate(1, 128);
+  EXPECT_EQ(0U, a & 127);
+}
+
+// Test allocating just over the slab size.  This tests a bug where before the
+// allocator incorrectly calculated the buffer end pointer.
+TEST(AllocatorTest, TestOverflow) {
+  BumpPtrAllocator Alloc(4096, 4096);
+
+  // Fill the slab right up until the end pointer.
+  Alloc.Allocate(4096 - sizeof(MemSlab), 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+
+  // If we dont't allocate a new slab, then we will have overflowed.
+  Alloc.Allocate(1, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+}
+
+// Mock slab allocator that returns slabs aligned on 4096 bytes.  There is no
+// easy portable way to do this, so this is kind of a hack.
+class MockSlabAllocator : public SlabAllocator {
+  MemSlab *LastSlab;
+
+public:
+  virtual ~MockSlabAllocator() { }
+
+  virtual MemSlab *Allocate(size_t Size) {
+    // Allocate space for the alignment, the slab, and a void* that goes right
+    // before the slab.
+    size_t Alignment = 4096;
+    void *MemBase = malloc(Size + Alignment - 1 + sizeof(void*));
+
+    // Make the slab.
+    MemSlab *Slab = (MemSlab*)(((uintptr_t)MemBase+sizeof(void*)+Alignment-1) &
+                               ~(uintptr_t)(Alignment - 1));
+    Slab->Size = Size;
+    Slab->NextPtr = 0;
+
+    // Hold a pointer to the base so we can free the whole malloced block.
+    ((void**)Slab)[-1] = MemBase;
+
+    LastSlab = Slab;
+    return Slab;
+  }
+
+  virtual void Deallocate(MemSlab *Slab) {
+    free(((void**)Slab)[-1]);
+  }
+
+  MemSlab *GetLastSlab() {
+    return LastSlab;
+  }
+};
+
+// Allocate a large-ish block with a really large alignment so that the
+// allocator will think that it has space, but after it does the alignment it
+// will not.
+TEST(AllocatorTest, TestBigAlignment) {
+  MockSlabAllocator SlabAlloc;
+  BumpPtrAllocator Alloc(4096, 4096, SlabAlloc);
+  uintptr_t Ptr = (uintptr_t)Alloc.Allocate(3000, 2048);
+  MemSlab *Slab = SlabAlloc.GetLastSlab();
+  EXPECT_LE(Ptr + 3000, ((uintptr_t)Slab) + Slab->Size);
+}
+
+}  // anonymous namespace
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
new file mode 100644
index 000000000000..72fa24a5ac0c
--- /dev/null
+++ b/unittests/Support/CommandLineTest.cpp
@@ -0,0 +1,60 @@
+//===- llvm/unittest/Support/CommandLineTest.cpp - CommandLine tests ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Config/config.h"
+
+#include "gtest/gtest.h"
+
+#include <string>
+#include <stdlib.h>
+
+using namespace llvm;
+
+namespace {
+
+class TempEnvVar {
+ public:
+  TempEnvVar(const char *name, const char *value)
+      : name(name) {
+    const char *old_value = getenv(name);
+    EXPECT_EQ(NULL, old_value) << old_value;
+#if HAVE_SETENV
+    setenv(name, value, true);
+#else
+#   define SKIP_ENVIRONMENT_TESTS
+#endif
+  }
+
+  ~TempEnvVar() {
+#if HAVE_SETENV
+    // Assume setenv and unsetenv come together.
+    unsetenv(name);
+#endif
+  }
+
+ private:
+  const char *const name;
+};
+
+#ifndef SKIP_ENVIRONMENT_TESTS
+
+const char test_env_var[] = "LLVM_TEST_COMMAND_LINE_FLAGS";
+
+cl::opt<std::string> EnvironmentTestOption("env-test-opt");
+TEST(CommandLineTest, ParseEnvironment) {
+  TempEnvVar TEV(test_env_var, "-env-test-opt=hello");
+  EXPECT_EQ("", EnvironmentTestOption);
+  cl::ParseEnvironmentOptions("CommandLineTest", test_env_var);
+  EXPECT_EQ("hello", EnvironmentTestOption);
+}
+
+#endif  // SKIP_ENVIRONMENT_TESTS
+
+}  // anonymous namespace
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
new file mode 100644
index 000000000000..6b8d01d553f4
--- /dev/null
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -0,0 +1,351 @@
+//===- llvm/unittest/Support/ConstantRangeTest.cpp - ConstantRange tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConstantRange.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class ConstantRangeTest : public ::testing::Test {
+protected:
+  static ConstantRange Full;
+  static ConstantRange Empty;
+  static ConstantRange One;
+  static ConstantRange Some;
+  static ConstantRange Wrap;
+};
+
+ConstantRange ConstantRangeTest::Full(16);
+ConstantRange ConstantRangeTest::Empty(16, false);
+ConstantRange ConstantRangeTest::One(APInt(16, 0xa));
+ConstantRange ConstantRangeTest::Some(APInt(16, 0xa), APInt(16, 0xaaa));
+ConstantRange ConstantRangeTest::Wrap(APInt(16, 0xaaa), APInt(16, 0xa));
+
+TEST_F(ConstantRangeTest, Basics) {
+  EXPECT_TRUE(Full.isFullSet());
+  EXPECT_FALSE(Full.isEmptySet());
+  EXPECT_FALSE(Full.isWrappedSet());
+  EXPECT_TRUE(Full.contains(APInt(16, 0x0)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0x9)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0xa)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0xaa9)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(Empty.isFullSet());
+  EXPECT_TRUE(Empty.isEmptySet());
+  EXPECT_FALSE(Empty.isWrappedSet());
+  EXPECT_FALSE(Empty.contains(APInt(16, 0x0)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0x9)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0xa)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0xaa9)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(One.isFullSet());
+  EXPECT_FALSE(One.isEmptySet());
+  EXPECT_FALSE(One.isWrappedSet());
+  EXPECT_FALSE(One.contains(APInt(16, 0x0)));
+  EXPECT_FALSE(One.contains(APInt(16, 0x9)));
+  EXPECT_TRUE(One.contains(APInt(16, 0xa)));
+  EXPECT_FALSE(One.contains(APInt(16, 0xaa9)));
+  EXPECT_FALSE(One.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(Some.isFullSet());
+  EXPECT_FALSE(Some.isEmptySet());
+  EXPECT_FALSE(Some.isWrappedSet());
+  EXPECT_FALSE(Some.contains(APInt(16, 0x0)));
+  EXPECT_FALSE(Some.contains(APInt(16, 0x9)));
+  EXPECT_TRUE(Some.contains(APInt(16, 0xa)));
+  EXPECT_TRUE(Some.contains(APInt(16, 0xaa9)));
+  EXPECT_FALSE(Some.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(Wrap.isFullSet());
+  EXPECT_FALSE(Wrap.isEmptySet());
+  EXPECT_TRUE(Wrap.isWrappedSet());
+  EXPECT_TRUE(Wrap.contains(APInt(16, 0x0)));
+  EXPECT_TRUE(Wrap.contains(APInt(16, 0x9)));
+  EXPECT_FALSE(Wrap.contains(APInt(16, 0xa)));
+  EXPECT_FALSE(Wrap.contains(APInt(16, 0xaa9)));
+  EXPECT_TRUE(Wrap.contains(APInt(16, 0xaaa)));
+}
+
+TEST_F(ConstantRangeTest, Equality) {
+  EXPECT_EQ(Full, Full);
+  EXPECT_EQ(Empty, Empty);
+  EXPECT_EQ(One, One);
+  EXPECT_EQ(Some, Some);
+  EXPECT_EQ(Wrap, Wrap);
+  EXPECT_NE(Full, Empty);
+  EXPECT_NE(Full, One);
+  EXPECT_NE(Full, Some);
+  EXPECT_NE(Full, Wrap);
+  EXPECT_NE(Empty, One);
+  EXPECT_NE(Empty, Some);
+  EXPECT_NE(Empty, Wrap);
+  EXPECT_NE(One, Some);
+  EXPECT_NE(One, Wrap);
+  EXPECT_NE(Some, Wrap);
+}
+
+TEST_F(ConstantRangeTest, SingleElement) {
+  EXPECT_EQ(Full.getSingleElement(), static_cast<APInt *>(NULL));
+  EXPECT_EQ(Empty.getSingleElement(), static_cast<APInt *>(NULL));
+  EXPECT_EQ(*One.getSingleElement(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getSingleElement(), static_cast<APInt *>(NULL));
+  EXPECT_EQ(Wrap.getSingleElement(), static_cast<APInt *>(NULL));
+
+  EXPECT_FALSE(Full.isSingleElement());
+  EXPECT_FALSE(Empty.isSingleElement());
+  EXPECT_TRUE(One.isSingleElement());
+  EXPECT_FALSE(Some.isSingleElement());
+  EXPECT_FALSE(Wrap.isSingleElement());
+}
+
+TEST_F(ConstantRangeTest, GetSetSize) {
+  EXPECT_EQ(Full.getSetSize(), APInt(16, 0));
+  EXPECT_EQ(Empty.getSetSize(), APInt(16, 0));
+  EXPECT_EQ(One.getSetSize(), APInt(16, 1));
+  EXPECT_EQ(Some.getSetSize(), APInt(16, 0xaa0));
+  EXPECT_EQ(Wrap.getSetSize(), APInt(16, 0x10000 - 0xaa0));
+}
+
+TEST_F(ConstantRangeTest, GetMinsAndMaxes) {
+  EXPECT_EQ(Full.getUnsignedMax(), APInt(16, UINT16_MAX));
+  EXPECT_EQ(One.getUnsignedMax(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getUnsignedMax(), APInt(16, 0xaa9));
+  EXPECT_EQ(Wrap.getUnsignedMax(), APInt(16, UINT16_MAX));
+
+  EXPECT_EQ(Full.getUnsignedMin(), APInt(16, 0));
+  EXPECT_EQ(One.getUnsignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getUnsignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Wrap.getUnsignedMin(), APInt(16, 0));
+
+  EXPECT_EQ(Full.getSignedMax(), APInt(16, INT16_MAX));
+  EXPECT_EQ(One.getSignedMax(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getSignedMax(), APInt(16, 0xaa9));
+  EXPECT_EQ(Wrap.getSignedMax(), APInt(16, INT16_MAX));
+
+  EXPECT_EQ(Full.getSignedMin(), APInt(16, (uint64_t)INT16_MIN));
+  EXPECT_EQ(One.getSignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getSignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Wrap.getSignedMin(), APInt(16, (uint64_t)INT16_MIN));
+
+  // Found by Klee
+  EXPECT_EQ(ConstantRange(APInt(4, 7), APInt(4, 0)).getSignedMax(),
+            APInt(4, 7));
+}
+
+TEST_F(ConstantRangeTest, Trunc) {
+  ConstantRange TFull = Full.truncate(10);
+  ConstantRange TEmpty = Empty.truncate(10);
+  ConstantRange TOne = One.truncate(10);
+  ConstantRange TSome = Some.truncate(10);
+  ConstantRange TWrap = Wrap.truncate(10);
+  EXPECT_TRUE(TFull.isFullSet());
+  EXPECT_TRUE(TEmpty.isEmptySet());
+  EXPECT_EQ(TOne, ConstantRange(APInt(One.getLower()).trunc(10),
+                                APInt(One.getUpper()).trunc(10)));
+  EXPECT_TRUE(TSome.isFullSet());
+}
+
+TEST_F(ConstantRangeTest, ZExt) {
+  ConstantRange ZFull = Full.zeroExtend(20);
+  ConstantRange ZEmpty = Empty.zeroExtend(20);
+  ConstantRange ZOne = One.zeroExtend(20);
+  ConstantRange ZSome = Some.zeroExtend(20);
+  ConstantRange ZWrap = Wrap.zeroExtend(20);
+  EXPECT_EQ(ZFull, ConstantRange(APInt(20, 0), APInt(20, 0x10000)));
+  EXPECT_TRUE(ZEmpty.isEmptySet());
+  EXPECT_EQ(ZOne, ConstantRange(APInt(One.getLower()).zext(20),
+                                APInt(One.getUpper()).zext(20)));
+  EXPECT_EQ(ZSome, ConstantRange(APInt(Some.getLower()).zext(20),
+                                 APInt(Some.getUpper()).zext(20)));
+  EXPECT_EQ(ZWrap, ConstantRange(APInt(Wrap.getLower()).zext(20),
+                                 APInt(Wrap.getUpper()).zext(20)));
+}
+
+TEST_F(ConstantRangeTest, SExt) {
+  ConstantRange SFull = Full.signExtend(20);
+  ConstantRange SEmpty = Empty.signExtend(20);
+  ConstantRange SOne = One.signExtend(20);
+  ConstantRange SSome = Some.signExtend(20);
+  ConstantRange SWrap = Wrap.signExtend(20);
+  EXPECT_EQ(SFull, ConstantRange(APInt(20, (uint64_t)INT16_MIN, true),
+                                 APInt(20, INT16_MAX + 1, true)));
+  EXPECT_TRUE(SEmpty.isEmptySet());
+  EXPECT_EQ(SOne, ConstantRange(APInt(One.getLower()).sext(20),
+                                APInt(One.getUpper()).sext(20)));
+  EXPECT_EQ(SSome, ConstantRange(APInt(Some.getLower()).sext(20),
+                                 APInt(Some.getUpper()).sext(20)));
+  EXPECT_EQ(SWrap, ConstantRange(APInt(Wrap.getLower()).sext(20),
+                                 APInt(Wrap.getUpper()).sext(20)));
+}
+
+TEST_F(ConstantRangeTest, IntersectWith) {
+  EXPECT_EQ(Empty.intersectWith(Full), Empty);
+  EXPECT_EQ(Empty.intersectWith(Empty), Empty);
+  EXPECT_EQ(Empty.intersectWith(One), Empty);
+  EXPECT_EQ(Empty.intersectWith(Some), Empty);
+  EXPECT_EQ(Empty.intersectWith(Wrap), Empty);
+  EXPECT_EQ(Full.intersectWith(Full), Full);
+  EXPECT_EQ(Some.intersectWith(Some), Some);
+  EXPECT_EQ(Some.intersectWith(One), One);
+  EXPECT_EQ(Full.intersectWith(One), One);
+  EXPECT_EQ(Full.intersectWith(Some), Some);
+  EXPECT_EQ(Some.intersectWith(Wrap), Empty);
+  EXPECT_EQ(One.intersectWith(Wrap), Empty);
+  EXPECT_EQ(One.intersectWith(Wrap), Wrap.intersectWith(One));
+
+  // Klee generated testcase from PR4545.
+  // The intersection of i16 [4, 2) and [6, 5) is disjoint, looking like
+  // 01..4.6789ABCDEF where the dots represent values not in the intersection.
+  ConstantRange LHS(APInt(16, 4), APInt(16, 2));
+  ConstantRange RHS(APInt(16, 6), APInt(16, 5));
+  EXPECT_TRUE(LHS.intersectWith(RHS) == LHS);
+}
+
+TEST_F(ConstantRangeTest, UnionWith) {
+  EXPECT_EQ(Wrap.unionWith(One),
+            ConstantRange(APInt(16, 0xaaa), APInt(16, 0xb)));
+  EXPECT_EQ(One.unionWith(Wrap), Wrap.unionWith(One));
+  EXPECT_EQ(Empty.unionWith(Empty), Empty);
+  EXPECT_EQ(Full.unionWith(Full), Full);
+  EXPECT_EQ(Some.unionWith(Wrap), Full);
+
+  // PR4545
+  EXPECT_EQ(ConstantRange(APInt(16, 14), APInt(16, 1)).unionWith(
+                                    ConstantRange(APInt(16, 0), APInt(16, 8))),
+            ConstantRange(APInt(16, 14), APInt(16, 8)));
+  EXPECT_EQ(ConstantRange(APInt(16, 6), APInt(16, 4)).unionWith(
+                                    ConstantRange(APInt(16, 4), APInt(16, 0))),
+              ConstantRange(16));
+  EXPECT_EQ(ConstantRange(APInt(16, 1), APInt(16, 0)).unionWith(
+                                    ConstantRange(APInt(16, 2), APInt(16, 1))),
+              ConstantRange(16));
+}
+
+TEST_F(ConstantRangeTest, SubtractAPInt) {
+  EXPECT_EQ(Full.subtract(APInt(16, 4)), Full);
+  EXPECT_EQ(Empty.subtract(APInt(16, 4)), Empty);
+  EXPECT_EQ(Some.subtract(APInt(16, 4)),
+            ConstantRange(APInt(16, 0x6), APInt(16, 0xaa6)));
+  EXPECT_EQ(Wrap.subtract(APInt(16, 4)),
+            ConstantRange(APInt(16, 0xaa6), APInt(16, 0x6)));
+  EXPECT_EQ(One.subtract(APInt(16, 4)),
+            ConstantRange(APInt(16, 0x6)));
+}
+
+TEST_F(ConstantRangeTest, Add) {
+  EXPECT_EQ(Full.add(APInt(16, 4)), Full);
+  EXPECT_EQ(Full.add(Full), Full);
+  EXPECT_EQ(Full.add(Empty), Empty);
+  EXPECT_EQ(Full.add(One), Full);
+  EXPECT_EQ(Full.add(Some), Full);
+  EXPECT_EQ(Full.add(Wrap), Full);
+  EXPECT_EQ(Empty.add(Empty), Empty);
+  EXPECT_EQ(Empty.add(One), Empty);
+  EXPECT_EQ(Empty.add(Some), Empty);
+  EXPECT_EQ(Empty.add(Wrap), Empty);
+  EXPECT_EQ(Empty.add(APInt(16, 4)), Empty);
+  EXPECT_EQ(Some.add(APInt(16, 4)),
+              ConstantRange(APInt(16, 0xe), APInt(16, 0xaae)));
+  EXPECT_EQ(Wrap.add(APInt(16, 4)),
+              ConstantRange(APInt(16, 0xaae), APInt(16, 0xe)));
+  EXPECT_EQ(One.add(APInt(16, 4)),
+              ConstantRange(APInt(16, 0xe)));
+}
+
+TEST_F(ConstantRangeTest, Multiply) {
+  EXPECT_EQ(Full.multiply(Full), Full);
+  EXPECT_EQ(Full.multiply(Empty), Empty);
+  EXPECT_EQ(Full.multiply(One), Full);
+  EXPECT_EQ(Full.multiply(Some), Full);
+  EXPECT_EQ(Full.multiply(Wrap), Full);
+  EXPECT_EQ(Empty.multiply(Empty), Empty);
+  EXPECT_EQ(Empty.multiply(One), Empty);
+  EXPECT_EQ(Empty.multiply(Some), Empty);
+  EXPECT_EQ(Empty.multiply(Wrap), Empty);
+  EXPECT_EQ(One.multiply(One), ConstantRange(APInt(16, 0xa*0xa),
+                                             APInt(16, 0xa*0xa + 1)));
+  EXPECT_EQ(One.multiply(Some), ConstantRange(APInt(16, 0xa*0xa),
+                                              APInt(16, 0xa*0xaa9 + 1)));
+  EXPECT_EQ(One.multiply(Wrap), Full);
+  EXPECT_EQ(Some.multiply(Some), Full);
+  EXPECT_EQ(Some.multiply(Wrap), Full);
+  EXPECT_EQ(Wrap.multiply(Wrap), Full);
+
+  // http://llvm.org/PR4545
+  EXPECT_EQ(ConstantRange(APInt(4, 1), APInt(4, 6)).multiply(
+                ConstantRange(APInt(4, 6), APInt(4, 2))),
+            ConstantRange(4, /*isFullSet=*/true));
+}
+
+TEST_F(ConstantRangeTest, UMax) {
+  EXPECT_EQ(Full.umax(Full), Full);
+  EXPECT_EQ(Full.umax(Empty), Empty);
+  EXPECT_EQ(Full.umax(Some), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(Full.umax(Wrap), Full);
+  EXPECT_EQ(Full.umax(Some), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(Empty.umax(Empty), Empty);
+  EXPECT_EQ(Empty.umax(Some), Empty);
+  EXPECT_EQ(Empty.umax(Wrap), Empty);
+  EXPECT_EQ(Empty.umax(One), Empty);
+  EXPECT_EQ(Some.umax(Some), Some);
+  EXPECT_EQ(Some.umax(Wrap), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(Some.umax(One), Some);
+  // TODO: ConstantRange is currently over-conservative here.
+  EXPECT_EQ(Wrap.umax(Wrap), Full);
+  EXPECT_EQ(Wrap.umax(One), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(One.umax(One), One);
+}
+
+TEST_F(ConstantRangeTest, SMax) {
+  EXPECT_EQ(Full.smax(Full), Full);
+  EXPECT_EQ(Full.smax(Empty), Empty);
+  EXPECT_EQ(Full.smax(Some), ConstantRange(APInt(16, 0xa),
+                                           APInt::getSignedMinValue(16)));
+  EXPECT_EQ(Full.smax(Wrap), Full);
+  EXPECT_EQ(Full.smax(One), ConstantRange(APInt(16, 0xa),
+                                          APInt::getSignedMinValue(16)));
+  EXPECT_EQ(Empty.smax(Empty), Empty);
+  EXPECT_EQ(Empty.smax(Some), Empty);
+  EXPECT_EQ(Empty.smax(Wrap), Empty);
+  EXPECT_EQ(Empty.smax(One), Empty);
+  EXPECT_EQ(Some.smax(Some), Some);
+  EXPECT_EQ(Some.smax(Wrap), ConstantRange(APInt(16, 0xa),
+                                           APInt(16, (uint64_t)INT16_MIN)));
+  EXPECT_EQ(Some.smax(One), Some);
+  EXPECT_EQ(Wrap.smax(One), ConstantRange(APInt(16, 0xa),
+                                          APInt(16, (uint64_t)INT16_MIN)));
+  EXPECT_EQ(One.smax(One), One);
+}
+
+TEST_F(ConstantRangeTest, UDiv) {
+  EXPECT_EQ(Full.udiv(Full), Full);
+  EXPECT_EQ(Full.udiv(Empty), Empty);
+  EXPECT_EQ(Full.udiv(One), ConstantRange(APInt(16, 0),
+                                          APInt(16, 0xffff / 0xa + 1)));
+  EXPECT_EQ(Full.udiv(Some), ConstantRange(APInt(16, 0),
+                                           APInt(16, 0xffff / 0xa + 1)));
+  EXPECT_EQ(Full.udiv(Wrap), Full);
+  EXPECT_EQ(Empty.udiv(Empty), Empty);
+  EXPECT_EQ(Empty.udiv(One), Empty);
+  EXPECT_EQ(Empty.udiv(Some), Empty);
+  EXPECT_EQ(Empty.udiv(Wrap), Empty);
+  EXPECT_EQ(One.udiv(One), ConstantRange(APInt(16, 1)));
+  EXPECT_EQ(One.udiv(Some), ConstantRange(APInt(16, 0), APInt(16, 2)));
+  EXPECT_EQ(One.udiv(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xb)));
+  EXPECT_EQ(Some.udiv(Some), ConstantRange(APInt(16, 0), APInt(16, 0x111)));
+  EXPECT_EQ(Some.udiv(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xaaa)));
+  EXPECT_EQ(Wrap.udiv(Wrap), Full);
+}
+
+}  // anonymous namespace
diff --git a/unittests/Support/MathExtrasTest.cpp b/unittests/Support/MathExtrasTest.cpp
index 1f1946827e37..3db1f77d444d 100644
--- a/unittests/Support/MathExtrasTest.cpp
+++ b/unittests/Support/MathExtrasTest.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/Support/MathExtrasTest.cpp - math utils tests --------===//
+//===- unittests/Support/MathExtrasTest.cpp - math utils tests ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp
new file mode 100644
index 000000000000..44c7e5590c73
--- /dev/null
+++ b/unittests/Support/RegexTest.cpp
@@ -0,0 +1,65 @@
+//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cstring>
+
+using namespace llvm;
+namespace {
+
+class RegexTest : public ::testing::Test {
+};
+
+TEST_F(RegexTest, Basics) {
+  Regex r1("^[0-9]+$");
+  EXPECT_TRUE(r1.match("916"));
+  EXPECT_TRUE(r1.match("9"));
+  EXPECT_FALSE(r1.match("9a"));
+
+  SmallVector<StringRef, 1> Matches;
+  Regex r2("[0-9]+");
+  EXPECT_TRUE(r2.match("aa216b", &Matches));
+  EXPECT_EQ(1u, Matches.size());
+  EXPECT_EQ("216", Matches[0].str());
+
+  Regex r3("[0-9]+([a-f])?:([0-9]+)");
+  EXPECT_TRUE(r3.match("9a:513b", &Matches));
+  EXPECT_EQ(3u, Matches.size());
+  EXPECT_EQ("9a:513", Matches[0].str());
+  EXPECT_EQ("a", Matches[1].str());
+  EXPECT_EQ("513", Matches[2].str());
+
+  EXPECT_TRUE(r3.match("9:513b", &Matches));
+  EXPECT_EQ(3u, Matches.size());
+  EXPECT_EQ("9:513", Matches[0].str());
+  EXPECT_EQ("", Matches[1].str());
+  EXPECT_EQ("513", Matches[2].str());
+
+  Regex r4("a[^b]+b");
+  std::string String="axxb";
+  String[2] = '\0';
+  EXPECT_FALSE(r4.match("abb"));
+  EXPECT_TRUE(r4.match(String, &Matches));
+  EXPECT_EQ(1u, Matches.size());
+  EXPECT_EQ(String, Matches[0].str());
+
+
+  std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
+  String="YX99a:513b";
+  NulPattern[7] = '\0';
+  Regex r5(NulPattern);
+  EXPECT_FALSE(r5.match(String));
+  EXPECT_FALSE(r5.match("X9"));
+  String[3]='\0';
+  EXPECT_TRUE(r5.match(String));
+}
+
+}
diff --git a/unittests/Support/TypeBuilderTest.cpp b/unittests/Support/TypeBuilderTest.cpp
index 425ee6bf54e1..a5c5e67129a3 100644
--- a/unittests/Support/TypeBuilderTest.cpp
+++ b/unittests/Support/TypeBuilderTest.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/TypeBuilder.h"
+#include "llvm/LLVMContext.h"
 
 #include "gtest/gtest.h"
 
@@ -16,134 +17,146 @@ using namespace llvm;
 namespace {
 
 TEST(TypeBuilderTest, Void) {
-  EXPECT_EQ(Type::VoidTy, (TypeBuilder<void, true>::get()));
-  EXPECT_EQ(Type::VoidTy, (TypeBuilder<void, false>::get()));
+  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, false>::get(getGlobalContext())));
   // Special case for C compatibility:
-  EXPECT_EQ(PointerType::getUnqual(Type::Int8Ty),
-            (TypeBuilder<void*, false>::get()));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<void*, false>::get(getGlobalContext())));
 }
 
 TEST(TypeBuilderTest, HostIntegers) {
-  EXPECT_EQ(Type::Int8Ty, (TypeBuilder<int8_t, false>::get()));
-  EXPECT_EQ(Type::Int8Ty, (TypeBuilder<uint8_t, false>::get()));
-  EXPECT_EQ(Type::Int16Ty, (TypeBuilder<int16_t, false>::get()));
-  EXPECT_EQ(Type::Int16Ty, (TypeBuilder<uint16_t, false>::get()));
-  EXPECT_EQ(Type::Int32Ty, (TypeBuilder<int32_t, false>::get()));
-  EXPECT_EQ(Type::Int32Ty, (TypeBuilder<uint32_t, false>::get()));
-  EXPECT_EQ(Type::Int64Ty, (TypeBuilder<int64_t, false>::get()));
-  EXPECT_EQ(Type::Int64Ty, (TypeBuilder<uint64_t, false>::get()));
-
-  EXPECT_EQ(IntegerType::get(sizeof(size_t) * CHAR_BIT),
-            (TypeBuilder<size_t, false>::get()));
-  EXPECT_EQ(IntegerType::get(sizeof(ptrdiff_t) * CHAR_BIT),
-            (TypeBuilder<ptrdiff_t, false>::get()));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<uint8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<int16_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<uint16_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<int32_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<uint32_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<int64_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<uint64_t, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(size_t) * CHAR_BIT),
+            (TypeBuilder<size_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(ptrdiff_t) * CHAR_BIT),
+            (TypeBuilder<ptrdiff_t, false>::get(getGlobalContext())));
 }
 
 TEST(TypeBuilderTest, CrossCompilableIntegers) {
-  EXPECT_EQ(IntegerType::get(1), (TypeBuilder<types::i<1>, true>::get()));
-  EXPECT_EQ(IntegerType::get(1), (TypeBuilder<types::i<1>, false>::get()));
-  EXPECT_EQ(IntegerType::get(72), (TypeBuilder<types::i<72>, true>::get()));
-  EXPECT_EQ(IntegerType::get(72), (TypeBuilder<types::i<72>, false>::get()));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, true>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, false>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, true>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, false>::get(getGlobalContext())));
 }
 
 TEST(TypeBuilderTest, Float) {
-  EXPECT_EQ(Type::FloatTy, (TypeBuilder<float, false>::get()));
-  EXPECT_EQ(Type::DoubleTy, (TypeBuilder<double, false>::get()));
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<float, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<double, false>::get(getGlobalContext())));
   // long double isn't supported yet.
-  EXPECT_EQ(Type::FloatTy, (TypeBuilder<types::ieee_float, true>::get()));
-  EXPECT_EQ(Type::FloatTy, (TypeBuilder<types::ieee_float, false>::get()));
-  EXPECT_EQ(Type::DoubleTy, (TypeBuilder<types::ieee_double, true>::get()));
-  EXPECT_EQ(Type::DoubleTy, (TypeBuilder<types::ieee_double, false>::get()));
-  EXPECT_EQ(Type::X86_FP80Ty, (TypeBuilder<types::x86_fp80, true>::get()));
-  EXPECT_EQ(Type::X86_FP80Ty, (TypeBuilder<types::x86_fp80, false>::get()));
-  EXPECT_EQ(Type::FP128Ty, (TypeBuilder<types::fp128, true>::get()));
-  EXPECT_EQ(Type::FP128Ty, (TypeBuilder<types::fp128, false>::get()));
-  EXPECT_EQ(Type::PPC_FP128Ty, (TypeBuilder<types::ppc_fp128, true>::get()));
-  EXPECT_EQ(Type::PPC_FP128Ty, (TypeBuilder<types::ppc_fp128, false>::get()));
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, false>::get(getGlobalContext())));
 }
 
 TEST(TypeBuilderTest, Derived) {
-  EXPECT_EQ(PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)),
-            (TypeBuilder<int8_t**, false>::get()));
-  EXPECT_EQ(ArrayType::get(Type::Int8Ty, 7),
-            (TypeBuilder<int8_t[7], false>::get()));
-  EXPECT_EQ(ArrayType::get(Type::Int8Ty, 0),
-            (TypeBuilder<int8_t[], false>::get()));
-
-  EXPECT_EQ(PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)),
-            (TypeBuilder<types::i<8>**, false>::get()));
-  EXPECT_EQ(ArrayType::get(Type::Int8Ty, 7),
-            (TypeBuilder<types::i<8>[7], false>::get()));
-  EXPECT_EQ(ArrayType::get(Type::Int8Ty, 0),
-            (TypeBuilder<types::i<8>[], false>::get()));
-
-  EXPECT_EQ(PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)),
-            (TypeBuilder<types::i<8>**, true>::get()));
-  EXPECT_EQ(ArrayType::get(Type::Int8Ty, 7),
-            (TypeBuilder<types::i<8>[7], true>::get()));
-  EXPECT_EQ(ArrayType::get(Type::Int8Ty, 0),
-            (TypeBuilder<types::i<8>[], true>::get()));
-
-
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<const int8_t, false>::get()));
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<volatile int8_t, false>::get()));
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<const volatile int8_t, false>::get()));
-
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<const types::i<8>, false>::get()));
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<volatile types::i<8>, false>::get()));
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<const volatile types::i<8>, false>::get()));
-
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<const types::i<8>, true>::get()));
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<volatile types::i<8>, true>::get()));
-  EXPECT_EQ(Type::Int8Ty,
-            (TypeBuilder<const volatile types::i<8>, true>::get()));
-
-  EXPECT_EQ(PointerType::getUnqual(Type::Int8Ty),
-            (TypeBuilder<const volatile int8_t*const volatile, false>::get()));
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<int8_t**, false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<int8_t[7], false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<int8_t[], false>::get(getGlobalContext())));
+
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<types::i<8>**, false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<types::i<8>[7], false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<types::i<8>[], false>::get(getGlobalContext())));
+
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<types::i<8>**, true>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<types::i<8>[7], true>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<types::i<8>[], true>::get(getGlobalContext())));
+
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile int8_t, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const types::i<8>, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile types::i<8>, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile types::i<8>, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const types::i<8>, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile types::i<8>, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile types::i<8>, true>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const volatile int8_t*const volatile, false>::get(getGlobalContext())));
 }
 
 TEST(TypeBuilderTest, Functions) {
   std::vector<const Type*> params;
-  EXPECT_EQ(FunctionType::get(Type::VoidTy, params, false),
-            (TypeBuilder<void(), true>::get()));
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, true),
-            (TypeBuilder<int8_t(...), false>::get()));
-  params.push_back(TypeBuilder<int32_t*, false>::get());
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, false),
-            (TypeBuilder<int8_t(const int32_t*), false>::get()));
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, true),
-            (TypeBuilder<int8_t(const int32_t*, ...), false>::get()));
-  params.push_back(TypeBuilder<char*, false>::get());
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, false),
-            (TypeBuilder<int8_t(int32_t*, void*), false>::get()));
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, true),
-            (TypeBuilder<int8_t(int32_t*, char*, ...), false>::get()));
-  params.push_back(TypeBuilder<char, false>::get());
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, false),
-            (TypeBuilder<int8_t(int32_t*, void*, char), false>::get()));
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, true),
-            (TypeBuilder<int8_t(int32_t*, char*, char, ...), false>::get()));
-  params.push_back(TypeBuilder<char, false>::get());
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, false),
-            (TypeBuilder<int8_t(int32_t*, void*, char, char), false>::get()));
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, true),
+  EXPECT_EQ(FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false),
+            (TypeBuilder<void(), true>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<int32_t*, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(const int32_t*), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(const int32_t*, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char*, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char, char), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
             (TypeBuilder<int8_t(int32_t*, char*, char, char, ...),
-                         false>::get()));
-  params.push_back(TypeBuilder<char, false>::get());
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, false),
+                         false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
             (TypeBuilder<int8_t(int32_t*, void*, char, char, char),
-                         false>::get()));
-  EXPECT_EQ(FunctionType::get(Type::Int8Ty, params, true),
+                         false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
             (TypeBuilder<int8_t(int32_t*, char*, char, char, char, ...),
-                         false>::get()));
+                         false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Context) {
+  // We used to cache TypeBuilder results in static local variables.  This
+  // produced the same type for different contexts, which of course broke
+  // things.
+  LLVMContext context1;
+  EXPECT_EQ(&context1,
+            &(TypeBuilder<types::i<1>, true>::get(context1))->getContext());
+  LLVMContext context2;
+  EXPECT_EQ(&context2,
+            &(TypeBuilder<types::i<1>, true>::get(context2))->getContext());
 }
 
 class MyType {
@@ -163,14 +176,14 @@ class MyPortableType {
 namespace llvm {
 template<bool cross> class TypeBuilder<MyType, cross> {
 public:
-  static const StructType *get() {
+  static const StructType *get(LLVMContext &Context) {
     // Using the static result variable ensures that the type is
     // only looked up once.
-    static const StructType *const result = StructType::get(
-      TypeBuilder<int, cross>::get(),
-      TypeBuilder<int*, cross>::get(),
-      TypeBuilder<void*[], cross>::get(),
-      NULL);
+    std::vector<const Type*> st;
+    st.push_back(TypeBuilder<int, cross>::get(Context));
+    st.push_back(TypeBuilder<int*, cross>::get(Context));
+    st.push_back(TypeBuilder<void*[], cross>::get(Context));
+    static const StructType *const result = StructType::get(Context, st);
     return result;
   }
 
@@ -186,14 +199,14 @@ public:
 
 template<bool cross> class TypeBuilder<MyPortableType, cross> {
 public:
-  static const StructType *get() {
+  static const StructType *get(LLVMContext &Context) {
     // Using the static result variable ensures that the type is
     // only looked up once.
-    static const StructType *const result = StructType::get(
-      TypeBuilder<types::i<32>, cross>::get(),
-      TypeBuilder<types::i<32>*, cross>::get(),
-      TypeBuilder<types::i<8>*[], cross>::get(),
-      NULL);
+    std::vector<const Type*> st;
+    st.push_back(TypeBuilder<types::i<32>, cross>::get(Context));
+    st.push_back(TypeBuilder<types::i<32>*, cross>::get(Context));
+    st.push_back(TypeBuilder<types::i<8>*[], cross>::get(Context));
+    static const StructType *const result = StructType::get(Context, st);
     return result;
   }
 
@@ -210,24 +223,24 @@ public:
 namespace {
 
 TEST(TypeBuilderTest, Extensions) {
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(
-                                     TypeBuilder<int, false>::get(),
-                                     TypeBuilder<int*, false>::get(),
-                                     TypeBuilder<void*[], false>::get(),
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+                                     TypeBuilder<int, false>::get(getGlobalContext()),
+                                     TypeBuilder<int*, false>::get(getGlobalContext()),
+                                     TypeBuilder<void*[], false>::get(getGlobalContext()),
                                      NULL)),
-            (TypeBuilder<MyType*, false>::get()));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(
-                                     TypeBuilder<types::i<32>, false>::get(),
-                                     TypeBuilder<types::i<32>*, false>::get(),
-                                     TypeBuilder<types::i<8>*[], false>::get(),
+            (TypeBuilder<MyType*, false>::get(getGlobalContext())));
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
                                      NULL)),
-            (TypeBuilder<MyPortableType*, false>::get()));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(
-                                     TypeBuilder<types::i<32>, false>::get(),
-                                     TypeBuilder<types::i<32>*, false>::get(),
-                                     TypeBuilder<types::i<8>*[], false>::get(),
+            (TypeBuilder<MyPortableType*, false>::get(getGlobalContext())));
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
                                      NULL)),
-            (TypeBuilder<MyPortableType*, true>::get()));
+            (TypeBuilder<MyPortableType*, true>::get(getGlobalContext())));
 }
 
 }  // anonymous namespace
diff --git a/unittests/Support/ValueHandleTest.cpp b/unittests/Support/ValueHandleTest.cpp
index 336e7d90dc91..c89a7af6fefe 100644
--- a/unittests/Support/ValueHandleTest.cpp
+++ b/unittests/Support/ValueHandleTest.cpp
@@ -11,6 +11,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/ADT/OwningPtr.h"
 
 #include "gtest/gtest.h"
 
@@ -25,8 +26,9 @@ protected:
   Constant *ConstantV;
   std::auto_ptr<BitCastInst> BitcastV;
 
-  ValueHandle() : ConstantV(ConstantInt::get(Type::Int32Ty, 0)),
-                  BitcastV(new BitCastInst(ConstantV, Type::Int32Ty)) {
+  ValueHandle() :
+    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
+    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))) {
   }
 };
 
@@ -44,8 +46,8 @@ TEST_F(ValueHandle, WeakVH_BasicOperation) {
 
   // Make sure I can call a method on the underlying Value.  It
   // doesn't matter which method.
-  EXPECT_EQ(Type::Int32Ty, WVH->getType());
-  EXPECT_EQ(Type::Int32Ty, (*WVH).getType());
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), WVH->getType());
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (*WVH).getType());
 }
 
 TEST_F(ValueHandle, WeakVH_Comparisons) {
@@ -119,6 +121,13 @@ TEST_F(ValueHandle, AssertingVH_BasicOperation) {
   EXPECT_FALSE((*AVH).mayWriteToMemory());
 }
 
+TEST_F(ValueHandle, AssertingVH_Const) {
+  const CastInst *ConstBitcast = BitcastV.get();
+  AssertingVH<const CastInst> AVH(ConstBitcast);
+  const CastInst *implicit_to_exact_type = AVH;
+  implicit_to_exact_type = implicit_to_exact_type;  // Avoid warning.
+}
+
 TEST_F(ValueHandle, AssertingVH_Comparisons) {
   AssertingVH<Value> BitcastAVH(BitcastV.get());
   AssertingVH<Value> ConstantAVH(ConstantV);
@@ -192,8 +201,8 @@ TEST_F(ValueHandle, CallbackVH_BasicOperation) {
 
   // Make sure I can call a method on the underlying Value.  It
   // doesn't matter which method.
-  EXPECT_EQ(Type::Int32Ty, CVH->getType());
-  EXPECT_EQ(Type::Int32Ty, (*CVH).getType());
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), CVH->getType());
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (*CVH).getType());
 }
 
 TEST_F(ValueHandle, CallbackVH_Comparisons) {
@@ -284,14 +293,17 @@ TEST_F(ValueHandle, CallbackVH_DeletionCanRAUW) {
   public:
     int DeletedCalls;
     Value *AURWArgument;
+    LLVMContext *Context;
 
-    RecoveringVH() : DeletedCalls(0), AURWArgument(NULL) {}
+    RecoveringVH() : DeletedCalls(0), AURWArgument(NULL), 
+                     Context(&getGlobalContext()) {}
     RecoveringVH(Value *V)
-      : CallbackVH(V), DeletedCalls(0), AURWArgument(NULL) {}
+      : CallbackVH(V), DeletedCalls(0), AURWArgument(NULL), 
+        Context(&getGlobalContext()) {}
 
   private:
     virtual void deleted() {
-      getValPtr()->replaceAllUsesWith(Constant::getNullValue(Type::Int32Ty));
+      getValPtr()->replaceAllUsesWith(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())));
       setValPtr(NULL);
     }
     virtual void allUsesReplacedWith(Value *new_value) {
@@ -307,11 +319,93 @@ TEST_F(ValueHandle, CallbackVH_DeletionCanRAUW) {
   RecoveringVH RVH;
   RVH = BitcastV.get();
   std::auto_ptr<BinaryOperator> BitcastUser(
-    BinaryOperator::CreateAdd(RVH, Constant::getNullValue(Type::Int32Ty)));
+    BinaryOperator::CreateAdd(RVH, 
+                              Constant::getNullValue(Type::getInt32Ty(getGlobalContext()))));
   EXPECT_EQ(BitcastV.get(), BitcastUser->getOperand(0));
   BitcastV.reset();  // Would crash without the ValueHandler.
-  EXPECT_EQ(Constant::getNullValue(Type::Int32Ty), RVH.AURWArgument);
-  EXPECT_EQ(Constant::getNullValue(Type::Int32Ty), BitcastUser->getOperand(0));
+  EXPECT_EQ(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())), RVH.AURWArgument);
+  EXPECT_EQ(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())),
+            BitcastUser->getOperand(0));
+}
+
+TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
+  // When a CallbackVH modifies other ValueHandles in its callbacks,
+  // that shouldn't interfere with non-modified ValueHandles receiving
+  // their appropriate callbacks.
+  //
+  // We create the active CallbackVH in the middle of a palindromic
+  // arrangement of other VHs so that the bad behavior would be
+  // triggered in whichever order callbacks run.
+
+  class DestroyingVH : public CallbackVH {
+  public:
+    OwningPtr<WeakVH> ToClear[2];
+    DestroyingVH(Value *V) {
+      ToClear[0].reset(new WeakVH(V));
+      setValPtr(V);
+      ToClear[1].reset(new WeakVH(V));
+    }
+    virtual void deleted() {
+      ToClear[0].reset();
+      ToClear[1].reset();
+      CallbackVH::deleted();
+    }
+    virtual void allUsesReplacedWith(Value *) {
+      ToClear[0].reset();
+      ToClear[1].reset();
+    }
+  };
+
+  {
+    WeakVH ShouldBeVisited1(BitcastV.get());
+    DestroyingVH C(BitcastV.get());
+    WeakVH ShouldBeVisited2(BitcastV.get());
+
+    BitcastV->replaceAllUsesWith(ConstantV);
+    EXPECT_EQ(ConstantV, static_cast<Value*>(ShouldBeVisited1));
+    EXPECT_EQ(ConstantV, static_cast<Value*>(ShouldBeVisited2));
+  }
+
+  {
+    WeakVH ShouldBeVisited1(BitcastV.get());
+    DestroyingVH C(BitcastV.get());
+    WeakVH ShouldBeVisited2(BitcastV.get());
+
+    BitcastV.reset();
+    EXPECT_EQ(NULL, static_cast<Value*>(ShouldBeVisited1));
+    EXPECT_EQ(NULL, static_cast<Value*>(ShouldBeVisited2));
+  }
+}
+
+TEST_F(ValueHandle, AssertingVHCheckedLast) {
+  // If a CallbackVH exists to clear out a group of AssertingVHs on
+  // Value deletion, the CallbackVH should get a chance to do so
+  // before the AssertingVHs assert.
+
+  class ClearingVH : public CallbackVH {
+  public:
+    AssertingVH<Value> *ToClear[2];
+    ClearingVH(Value *V,
+               AssertingVH<Value> &A0, AssertingVH<Value> &A1)
+      : CallbackVH(V) {
+      ToClear[0] = &A0;
+      ToClear[1] = &A1;
+    }
+
+    virtual void deleted() {
+      *ToClear[0] = 0;
+      *ToClear[1] = 0;
+      CallbackVH::deleted();
+    }
+  };
+
+  AssertingVH<Value> A1, A2;
+  A1 = BitcastV.get();
+  ClearingVH C(BitcastV.get(), A1, A2);
+  A2 = BitcastV.get();
+  // C.deleted() should run first, clearing the two AssertingVHs,
+  // which should prevent them from asserting.
+  BitcastV.reset();
 }
 
 }
diff --git a/unittests/Support/raw_ostream_test.cpp b/unittests/Support/raw_ostream_test.cpp
index feb0152bbe88..bd2e95cbb531 100644
--- a/unittests/Support/raw_ostream_test.cpp
+++ b/unittests/Support/raw_ostream_test.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "gtest/gtest.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -20,6 +22,23 @@ template<typename T> std::string printToString(const T &Value) {
   return res;    
 }
 
+/// printToString - Print the given value to a stream which only has \arg
+/// BytesLeftInBuffer bytes left in the buffer. This is useful for testing edge
+/// cases in the buffer handling logic.
+template<typename T> std::string printToString(const T &Value,
+                                               unsigned BytesLeftInBuffer) {
+  // FIXME: This is relying on internal knowledge of how raw_ostream works to
+  // get the buffer position right.
+  SmallString<256> SVec;
+  assert(BytesLeftInBuffer < 256 && "Invalid buffer count!");
+  llvm::raw_svector_ostream OS(SVec);
+  unsigned StartIndex = 256 - BytesLeftInBuffer;
+  for (unsigned i = 0; i != StartIndex; ++i)
+    OS << '?';
+  OS << Value;
+  return OS.str().substr(StartIndex);
+}
+
 template<typename T> std::string printToStringUnbuffered(const T &Value) {
   std::string res;
   llvm::raw_string_ostream OS(res);
@@ -53,6 +72,10 @@ TEST(raw_ostreamTest, Types_Buffered) {
   EXPECT_EQ("0x0", printToString((void*) 0));
   EXPECT_EQ("0xbeef", printToString((void*) 0xbeef));
   EXPECT_EQ("0xdeadbeef", printToString((void*) 0xdeadbeef));
+
+  // Min and max.
+  EXPECT_EQ("18446744073709551615", printToString(UINT64_MAX));
+  EXPECT_EQ("-9223372036854775808", printToString(INT64_MIN));
 }
 
 TEST(raw_ostreamTest, Types_Unbuffered) {  
@@ -80,6 +103,28 @@ TEST(raw_ostreamTest, Types_Unbuffered) {
   EXPECT_EQ("0x0", printToStringUnbuffered((void*) 0));
   EXPECT_EQ("0xbeef", printToStringUnbuffered((void*) 0xbeef));
   EXPECT_EQ("0xdeadbeef", printToStringUnbuffered((void*) 0xdeadbeef));
+
+  // Min and max.
+  EXPECT_EQ("18446744073709551615", printToStringUnbuffered(UINT64_MAX));
+  EXPECT_EQ("-9223372036854775808", printToStringUnbuffered(INT64_MIN));
+}
+
+TEST(raw_ostreamTest, BufferEdge) {  
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 1));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 2));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 3));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 4));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 10));
+}
+
+TEST(raw_ostreamTest, TinyBuffer) {
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS.SetBufferSize(1);
+  OS << "hello";
+  OS << 1;
+  OS << 'w' << 'o' << 'r' << 'l' << 'd';
+  EXPECT_EQ("hello1world", OS.str());
 }
 
 }
diff --git a/unittests/Transforms/Makefile b/unittests/Transforms/Makefile
new file mode 100644
index 000000000000..599b18a057dc
--- /dev/null
+++ b/unittests/Transforms/Makefile
@@ -0,0 +1,17 @@
+##===- unittests/Transforms/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+PARALLEL_DIRS = Utils
+
+include $(LEVEL)/Makefile.common
+
+clean::
+	$(Verb) $(RM) -f *Tests
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
new file mode 100644
index 000000000000..b14114ab6912
--- /dev/null
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -0,0 +1,87 @@
+//===- Cloning.cpp - Unit tests for the Cloner ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Argument.h"
+#include "llvm/Instructions.h"
+
+using namespace llvm;
+
+TEST(CloneInstruction, OverflowBits) {
+  LLVMContext context;
+  Value *V = new Argument(Type::getInt32Ty(context));
+
+  BinaryOperator *Add = BinaryOperator::Create(Instruction::Add, V, V);
+  BinaryOperator *Sub = BinaryOperator::Create(Instruction::Sub, V, V);
+  BinaryOperator *Mul = BinaryOperator::Create(Instruction::Mul, V, V);
+
+  EXPECT_FALSE(Add->clone()->hasNoUnsignedWrap());
+  EXPECT_FALSE(Add->clone()->hasNoSignedWrap());
+  EXPECT_FALSE(Sub->clone()->hasNoUnsignedWrap());
+  EXPECT_FALSE(Sub->clone()->hasNoSignedWrap());
+  EXPECT_FALSE(Mul->clone()->hasNoUnsignedWrap());
+  EXPECT_FALSE(Mul->clone()->hasNoSignedWrap());
+
+  Add->setHasNoUnsignedWrap();
+  Sub->setHasNoUnsignedWrap();
+  Mul->setHasNoUnsignedWrap();
+
+  EXPECT_TRUE(Add->clone()->hasNoUnsignedWrap());
+  EXPECT_FALSE(Add->clone()->hasNoSignedWrap());
+  EXPECT_TRUE(Sub->clone()->hasNoUnsignedWrap());
+  EXPECT_FALSE(Sub->clone()->hasNoSignedWrap());
+  EXPECT_TRUE(Mul->clone()->hasNoUnsignedWrap());
+  EXPECT_FALSE(Mul->clone()->hasNoSignedWrap());
+
+  Add->setHasNoSignedWrap();
+  Sub->setHasNoSignedWrap();
+  Mul->setHasNoSignedWrap();
+
+  EXPECT_TRUE(Add->clone()->hasNoUnsignedWrap());
+  EXPECT_TRUE(Add->clone()->hasNoSignedWrap());
+  EXPECT_TRUE(Sub->clone()->hasNoUnsignedWrap());
+  EXPECT_TRUE(Sub->clone()->hasNoSignedWrap());
+  EXPECT_TRUE(Mul->clone()->hasNoUnsignedWrap());
+  EXPECT_TRUE(Mul->clone()->hasNoSignedWrap());
+
+  Add->setHasNoUnsignedWrap(false);
+  Sub->setHasNoUnsignedWrap(false);
+  Mul->setHasNoUnsignedWrap(false);
+
+  EXPECT_FALSE(Add->clone()->hasNoUnsignedWrap());
+  EXPECT_TRUE(Add->clone()->hasNoSignedWrap());
+  EXPECT_FALSE(Sub->clone()->hasNoUnsignedWrap());
+  EXPECT_TRUE(Sub->clone()->hasNoSignedWrap());
+  EXPECT_FALSE(Mul->clone()->hasNoUnsignedWrap());
+  EXPECT_TRUE(Mul->clone()->hasNoSignedWrap());
+}
+
+TEST(CloneInstruction, Inbounds) {
+  LLVMContext context;
+  Value *V = new Argument(Type::getInt32PtrTy(context));
+  Constant *Z = Constant::getNullValue(Type::getInt32Ty(context));
+  std::vector<Value *> ops;
+  ops.push_back(Z);
+  GetElementPtrInst *GEP = GetElementPtrInst::Create(V, ops.begin(), ops.end());
+  EXPECT_FALSE(GEP->clone()->isInBounds());
+
+  GEP->setIsInBounds();
+  EXPECT_TRUE(GEP->clone()->isInBounds());
+}
+
+TEST(CloneInstruction, Exact) {
+  LLVMContext context;
+  Value *V = new Argument(Type::getInt32Ty(context));
+
+  BinaryOperator *SDiv = BinaryOperator::Create(Instruction::SDiv, V, V);
+  EXPECT_FALSE(SDiv->clone()->isExact());
+
+  SDiv->setIsExact(true);
+  EXPECT_TRUE(SDiv->clone()->isExact());
+}
diff --git a/unittests/Transforms/Utils/Makefile b/unittests/Transforms/Utils/Makefile
new file mode 100644
index 000000000000..fdf4be0e0efe
--- /dev/null
+++ b/unittests/Transforms/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/Transforms/Utils/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+TESTNAME = Utils
+LINK_COMPONENTS := core support transformutils
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/VMCore/ConstantsTest.cpp b/unittests/VMCore/ConstantsTest.cpp
index 519d928eac16..8f28407b8dec 100644
--- a/unittests/VMCore/ConstantsTest.cpp
+++ b/unittests/VMCore/ConstantsTest.cpp
@@ -9,13 +9,14 @@
 
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
 namespace {
 
 TEST(ConstantsTest, Integer_i1) {
-  const IntegerType* Int1 = IntegerType::get(1);
+  const IntegerType* Int1 = IntegerType::get(getGlobalContext(), 1);
   Constant* One = ConstantInt::get(Int1, 1, true);
   Constant* Zero = ConstantInt::get(Int1, 0);
   Constant* NegOne = ConstantInt::get(Int1, static_cast<uint64_t>(-1), true);
@@ -96,7 +97,7 @@ TEST(ConstantsTest, Integer_i1) {
 }
 
 TEST(ConstantsTest, IntSigns) {
-  const IntegerType* Int8Ty = Type::Int8Ty;
+  const IntegerType* Int8Ty = Type::getInt8Ty(getGlobalContext());
   EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, false)->getSExtValue());
   EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, true)->getSExtValue());
   EXPECT_EQ(100, ConstantInt::getSigned(Int8Ty, 100)->getSExtValue());
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index 2de3a9248185..b92b068e259c 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -10,22 +10,24 @@
 #include "gtest/gtest.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
-#include "llvm/MDNode.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
-#include <sstream>
-
 using namespace llvm;
 
 namespace {
 
+LLVMContext &Context = getGlobalContext();
+
 // Test that construction of MDString with different value produces different
 // MDString objects, even with the same string pointer and nulls in the string.
 TEST(MDStringTest, CreateDifferent) {
   char x[3] = { 'f', 0, 'A' };
-  MDString *s1 = MDString::get(&x[0], &x[3]);
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
   x[2] = 'B';
-  MDString *s2 = MDString::get(&x[0], &x[3]);
+  MDString *s2 = MDString::get(Context, StringRef(&x[0], 3));
   EXPECT_NE(s1, s2);
 }
 
@@ -35,8 +37,8 @@ TEST(MDStringTest, CreateSame) {
   char x[4] = { 'a', 'b', 'c', 'X' };
   char y[4] = { 'a', 'b', 'c', 'Y' };
 
-  MDString *s1 = MDString::get(&x[0], &x[3]);
-  MDString *s2 = MDString::get(&y[0], &y[3]);
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
   EXPECT_EQ(s1, s2);
 }
 
@@ -44,11 +46,12 @@ TEST(MDStringTest, CreateSame) {
 TEST(MDStringTest, PrintingSimple) {
   char *str = new char[13];
   strncpy(str, "testing 1 2 3", 13);
-  MDString *s = MDString::get(str, str+13);
+  MDString *s = MDString::get(Context, StringRef(str, 13));
   strncpy(str, "aaaaaaaaaaaaa", 13);
   delete[] str;
 
-  std::ostringstream oss;
+  std::string Str;
+  raw_string_ostream oss(Str);
   s->print(oss);
   EXPECT_STREQ("metadata !\"testing 1 2 3\"", oss.str().c_str());
 }
@@ -56,8 +59,9 @@ TEST(MDStringTest, PrintingSimple) {
 // Test printing of MDString with non-printable characters.
 TEST(MDStringTest, PrintingComplex) {
   char str[5] = {0, '\n', '"', '\\', -1};
-  MDString *s = MDString::get(str+0, str+5);
-  std::ostringstream oss;
+  MDString *s = MDString::get(Context, StringRef(str+0, 5));
+  std::string Str;
+  raw_string_ostream oss(Str);
   s->print(oss);
   EXPECT_STREQ("metadata !\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str());
 }
@@ -67,21 +71,25 @@ TEST(MDNodeTest, Simple) {
   char x[3] = { 'a', 'b', 'c' };
   char y[3] = { '1', '2', '3' };
 
-  MDString *s1 = MDString::get(&x[0], &x[3]);
-  MDString *s2 = MDString::get(&y[0], &y[3]);
-  ConstantInt *CI = ConstantInt::get(APInt(8, 0));
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
+  ConstantInt *CI = ConstantInt::get(getGlobalContext(), APInt(8, 0));
 
   std::vector<Value *> V;
   V.push_back(s1);
   V.push_back(CI);
   V.push_back(s2);
 
-  MDNode *n1 = MDNode::get(&V[0], 3);
+  MDNode *n1 = MDNode::get(Context, &V[0], 3);
   Value *const c1 = n1;
-  MDNode *n2 = MDNode::get(&c1, 1);
-  MDNode *n3 = MDNode::get(&V[0], 3);
+  MDNode *n2 = MDNode::get(Context, &c1, 1);
+  MDNode *n3 = MDNode::get(Context, &V[0], 3);
   EXPECT_NE(n1, n2);
+#ifdef ENABLE_MDNODE_UNIQUING
   EXPECT_EQ(n1, n3);
+#else
+  (void) n3;
+#endif
 
   EXPECT_EQ(3u, n1->getNumElements());
   EXPECT_EQ(s1, n1->getElement(0));
@@ -91,49 +99,55 @@ TEST(MDNodeTest, Simple) {
   EXPECT_EQ(1u, n2->getNumElements());
   EXPECT_EQ(n1, n2->getElement(0));
 
-  std::ostringstream oss1, oss2;
-  n1->print(oss1);
-  n2->print(oss2);
-  EXPECT_STREQ("metadata !{metadata !\"abc\", i8 0, metadata !\"123\"}",
-               oss1.str().c_str());
-  EXPECT_STREQ("metadata !{metadata !{metadata !\"abc\", i8 0, "
-                                     "metadata !\"123\"}}",
-               oss2.str().c_str());
-}
-
-TEST(MDNodeTest, RAUW) {
-  Constant *C = ConstantInt::get(Type::Int32Ty, 1);
-  Instruction *I = new BitCastInst(C, Type::Int32Ty);
-
-  Value *const V1 = I;
-  MDNode *n1 = MDNode::get(&V1, 1);
-  WeakVH wn1 = n1;
-
-  Value *const V2 = C;
-  MDNode *n2 = MDNode::get(&V2, 1);
-  WeakVH wn2 = n2;
-
-  EXPECT_NE(wn1, wn2);
-
-  I->replaceAllUsesWith(C);
-
-  EXPECT_EQ(wn1, wn2);
+  std::string Str;
+  raw_string_ostream oss(Str);
+  n1->print(oss);
+  EXPECT_STREQ("!0 = metadata !{metadata !\"abc\", i8 0, metadata !\"123\"}\n",
+               oss.str().c_str());
+  Str.clear();
+  n2->print(oss);
+  EXPECT_STREQ("!0 = metadata !{metadata !1}\n"
+               "!1 = metadata !{metadata !\"abc\", i8 0, metadata !\"123\"}\n",
+               oss.str().c_str());
 }
 
 TEST(MDNodeTest, Delete) {
-  Constant *C = ConstantInt::get(Type::Int32Ty, 1);
-  Instruction *I = new BitCastInst(C, Type::Int32Ty);
+  Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
+  Instruction *I = new BitCastInst(C, Type::getInt32Ty(getGlobalContext()));
 
   Value *const V = I;
-  MDNode *n = MDNode::get(&V, 1);
+  MDNode *n = MDNode::get(Context, &V, 1);
   WeakVH wvh = n;
 
   EXPECT_EQ(n, wvh);
 
   delete I;
 
-  std::ostringstream oss;
+  std::string Str;
+  raw_string_ostream oss(Str);
   wvh->print(oss);
-  EXPECT_STREQ("metadata !{null}", oss.str().c_str());
+  EXPECT_STREQ("!0 = metadata !{null}\n", oss.str().c_str());
+}
+
+TEST(NamedMDNodeTest, Search) {
+  Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
+  Constant *C2 = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 2);
+
+  Value *const V = C;
+  Value *const V2 = C2;
+  MDNode *n = MDNode::get(Context, &V, 1);
+  MDNode *n2 = MDNode::get(Context, &V2, 1);
+
+  MetadataBase *Nodes[2] = { n, n2 };
+
+  Module *M = new Module("MyModule", getGlobalContext());
+  const char *Name = "llvm.NMD1";
+  NamedMDNode *NMD = NamedMDNode::Create(getGlobalContext(), Name, &Nodes[0], 2, M);
+  std::string Str;
+  raw_string_ostream oss(Str);
+  NMD->print(oss);
+  EXPECT_STREQ("!llvm.NMD1 = !{!0, !1}\n!0 = metadata !{i32 1}\n"
+               "!1 = metadata !{i32 2}\n",
+               oss.str().c_str());
 }
 }
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index 8122e2cad9b2..cb8f9ebb939b 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -154,7 +154,7 @@ namespace llvm {
 
     struct CGPass : public PassTest<CallGraph, CallGraphSCCPass> {
     public:
-      virtual bool runOnSCC(const std::vector<CallGraphNode*> &SCMM) {
+      virtual bool runOnSCC(std::vector<CallGraphNode*> &SCMM) {
         EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
         run();
         return false;
@@ -272,7 +272,7 @@ namespace llvm {
     char OnTheFlyTest::ID=0;
 
     TEST(PassManager, RunOnce) {
-      Module M("test-once", *new LLVMContext());
+      Module M("test-once", getGlobalContext());
       struct ModuleNDNM *mNDNM = new ModuleNDNM();
       struct ModuleDNM *mDNM = new ModuleDNM();
       struct ModuleNDM *mNDM = new ModuleNDM();
@@ -296,7 +296,7 @@ namespace llvm {
     }
 
     TEST(PassManager, ReRun) {
-      Module M("test-rerun", *new LLVMContext());
+      Module M("test-rerun", getGlobalContext());
       struct ModuleNDNM *mNDNM = new ModuleNDNM();
       struct ModuleDNM *mDNM = new ModuleDNM();
       struct ModuleNDM *mNDM = new ModuleNDM();
@@ -387,7 +387,7 @@ namespace llvm {
 
     Module* makeLLVMModule() {
       // Module Construction
-      Module* mod = new Module("test-mem", *new LLVMContext());
+      Module* mod = new Module("test-mem", getGlobalContext());
       mod->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
                          "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-"
                          "a0:0:64-s0:64:64-f80:128:128");
@@ -396,14 +396,14 @@ namespace llvm {
       // Type Definitions
       std::vector<const Type*>FuncTy_0_args;
       FunctionType* FuncTy_0 = FunctionType::get(
-        /*Result=*/IntegerType::get(32),
+        /*Result=*/IntegerType::get(getGlobalContext(), 32),
         /*Params=*/FuncTy_0_args,
         /*isVarArg=*/false);
 
       std::vector<const Type*>FuncTy_2_args;
-      FuncTy_2_args.push_back(IntegerType::get(1));
+      FuncTy_2_args.push_back(IntegerType::get(getGlobalContext(), 1));
       FunctionType* FuncTy_2 = FunctionType::get(
-        /*Result=*/Type::VoidTy,
+        /*Result=*/Type::getVoidTy(getGlobalContext()),
         /*Params=*/FuncTy_2_args,
         /*isVarArg=*/false);
 
@@ -454,7 +454,7 @@ namespace llvm {
       // Function: test1 (func_test1)
       {
 
-        BasicBlock* label_entry = BasicBlock::Create("entry",func_test1,0);
+        BasicBlock* label_entry = BasicBlock::Create(getGlobalContext(), "entry",func_test1,0);
 
         // Block entry (label_entry)
         CallInst* int32_3 = CallInst::Create(func_test2, "", label_entry);
@@ -462,14 +462,14 @@ namespace llvm {
         int32_3->setTailCall(false);AttrListPtr int32_3_PAL;
         int32_3->setAttributes(int32_3_PAL);
 
-        ReturnInst::Create(int32_3, label_entry);
+        ReturnInst::Create(getGlobalContext(), int32_3, label_entry);
 
       }
 
       // Function: test2 (func_test2)
       {
 
-        BasicBlock* label_entry_5 = BasicBlock::Create("entry",func_test2,0);
+        BasicBlock* label_entry_5 = BasicBlock::Create(getGlobalContext(), "entry",func_test2,0);
 
         // Block entry (label_entry_5)
         CallInst* int32_6 = CallInst::Create(func_test3, "", label_entry_5);
@@ -477,14 +477,14 @@ namespace llvm {
         int32_6->setTailCall(false);AttrListPtr int32_6_PAL;
         int32_6->setAttributes(int32_6_PAL);
 
-        ReturnInst::Create(int32_6, label_entry_5);
+        ReturnInst::Create(getGlobalContext(), int32_6, label_entry_5);
 
       }
 
       // Function: test3 (func_test3)
       {
 
-        BasicBlock* label_entry_8 = BasicBlock::Create("entry",func_test3,0);
+        BasicBlock* label_entry_8 = BasicBlock::Create(getGlobalContext(), "entry",func_test3,0);
 
         // Block entry (label_entry_8)
         CallInst* int32_9 = CallInst::Create(func_test1, "", label_entry_8);
@@ -492,7 +492,7 @@ namespace llvm {
         int32_9->setTailCall(false);AttrListPtr int32_9_PAL;
         int32_9->setAttributes(int32_9_PAL);
 
-        ReturnInst::Create(int32_9, label_entry_8);
+        ReturnInst::Create(getGlobalContext(), int32_9, label_entry_8);
 
       }
 
@@ -502,10 +502,10 @@ namespace llvm {
         Value* int1_f = args++;
         int1_f->setName("f");
 
-        BasicBlock* label_entry_11 = BasicBlock::Create("entry",func_test4,0);
-        BasicBlock* label_bb = BasicBlock::Create("bb",func_test4,0);
-        BasicBlock* label_bb1 = BasicBlock::Create("bb1",func_test4,0);
-        BasicBlock* label_return = BasicBlock::Create("return",func_test4,0);
+        BasicBlock* label_entry_11 = BasicBlock::Create(getGlobalContext(), "entry",func_test4,0);
+        BasicBlock* label_bb = BasicBlock::Create(getGlobalContext(), "bb",func_test4,0);
+        BasicBlock* label_bb1 = BasicBlock::Create(getGlobalContext(), "bb1",func_test4,0);
+        BasicBlock* label_return = BasicBlock::Create(getGlobalContext(), "return",func_test4,0);
 
         // Block entry (label_entry_11)
         BranchInst::Create(label_bb, label_entry_11);
@@ -517,7 +517,7 @@ namespace llvm {
         BranchInst::Create(label_bb1, label_return, int1_f, label_bb1);
 
         // Block return (label_return)
-        ReturnInst::Create(label_return);
+        ReturnInst::Create(getGlobalContext(), label_return);
 
       }
       return mod;
diff --git a/utils/FileCheck/CMakeLists.txt b/utils/FileCheck/CMakeLists.txt
new file mode 100644
index 000000000000..8fee03fb57b0
--- /dev/null
+++ b/utils/FileCheck/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_executable(FileCheck
+  FileCheck.cpp
+  )
+
+target_link_libraries(FileCheck LLVMSupport LLVMSystem)
+if( MINGW )
+  target_link_libraries(FileCheck imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+  target_link_libraries(FileCheck pthread)
+endif()
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
new file mode 100644
index 000000000000..b4d1f84859ce
--- /dev/null
+++ b/utils/FileCheck/FileCheck.cpp
@@ -0,0 +1,624 @@
+//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FileCheck does a line-by line check of a file that validates whether it
+// contains the expected content.  This is useful for regression tests etc.
+//
+// This program exits with an error status of 2 on error, exit status of 0 if
+// the file matched the expected contents, and exit status of 1 if it did not
+// contain the expected contents.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
+#include "llvm/ADT/StringMap.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<std::string>
+CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
+
+static cl::opt<std::string>
+InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
+              cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+CheckPrefix("check-prefix", cl::init("CHECK"),
+            cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
+
+static cl::opt<bool>
+NoCanonicalizeWhiteSpace("strict-whitespace",
+              cl::desc("Do not treat all horizontal whitespace as equivalent"));
+
+//===----------------------------------------------------------------------===//
+// Pattern Handling Code.
+//===----------------------------------------------------------------------===//
+
+class Pattern {
+  SMLoc PatternLoc;
+  
+  /// FixedStr - If non-empty, this pattern is a fixed string match with the
+  /// specified fixed string.
+  StringRef FixedStr;
+  
+  /// RegEx - If non-empty, this is a regex pattern.
+  std::string RegExStr;
+  
+  /// VariableUses - Entries in this vector map to uses of a variable in the
+  /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
+  /// "foobaz" and we'll get an entry in this vector that tells us to insert the
+  /// value of bar at offset 3.
+  std::vector<std::pair<StringRef, unsigned> > VariableUses;
+  
+  /// VariableDefs - Entries in this vector map to definitions of a variable in
+  /// the pattern, e.g. "foo[[bar:.*]]baz".  In this case, the RegExStr will
+  /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1.  The
+  /// index indicates what parenthesized value captures the variable value.
+  std::vector<std::pair<StringRef, unsigned> > VariableDefs;
+  
+public:
+  
+  Pattern() { }
+  
+  bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
+  
+  /// Match - Match the pattern string against the input buffer Buffer.  This
+  /// returns the position that is matched or npos if there is no match.  If
+  /// there is a match, the size of the matched string is returned in MatchLen.
+  ///
+  /// The VariableTable StringMap provides the current values of filecheck
+  /// variables and is updated if this match defines new values.
+  size_t Match(StringRef Buffer, size_t &MatchLen,
+               StringMap<StringRef> &VariableTable) const;
+  
+private:
+  static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
+  bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM);
+};
+
+
+bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
+  PatternLoc = SMLoc::getFromPointer(PatternStr.data());
+  
+  // Ignore trailing whitespace.
+  while (!PatternStr.empty() &&
+         (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
+    PatternStr = PatternStr.substr(0, PatternStr.size()-1);
+  
+  // Check that there is something on the line.
+  if (PatternStr.empty()) {
+    SM.PrintMessage(PatternLoc, "found empty check string with prefix '" +
+                    CheckPrefix+":'", "error");
+    return true;
+  }
+  
+  // Check to see if this is a fixed string, or if it has regex pieces.
+  if (PatternStr.size() < 2 ||
+      (PatternStr.find("{{") == StringRef::npos &&
+       PatternStr.find("[[") == StringRef::npos)) {
+    FixedStr = PatternStr;
+    return false;
+  }
+  
+  // Paren value #0 is for the fully matched string.  Any new parenthesized
+  // values add from their.
+  unsigned CurParen = 1;
+  
+  // Otherwise, there is at least one regex piece.  Build up the regex pattern
+  // by escaping scary characters in fixed strings, building up one big regex.
+  while (!PatternStr.empty()) {
+    // RegEx matches.
+    if (PatternStr.size() >= 2 &&
+        PatternStr[0] == '{' && PatternStr[1] == '{') {
+     
+      // Otherwise, this is the start of a regex match.  Scan for the }}.
+      size_t End = PatternStr.find("}}");
+      if (End == StringRef::npos) {
+        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                        "found start of regex string with no end '}}'", "error");
+        return true;
+      }
+      
+      if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
+        return true;
+      PatternStr = PatternStr.substr(End+2);
+      continue;
+    }
+    
+    // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
+    // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
+    // second form is [[foo]] which is a reference to foo.  The variable name
+    // itself must be of the form "[a-zA-Z][0-9a-zA-Z]*", otherwise we reject
+    // it.  This is to catch some common errors.
+    if (PatternStr.size() >= 2 &&
+        PatternStr[0] == '[' && PatternStr[1] == '[') {
+      // Verify that it is terminated properly.
+      size_t End = PatternStr.find("]]");
+      if (End == StringRef::npos) {
+        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                        "invalid named regex reference, no ]] found", "error");
+        return true;
+      }
+      
+      StringRef MatchStr = PatternStr.substr(2, End-2);
+      PatternStr = PatternStr.substr(End+2);
+      
+      // Get the regex name (e.g. "foo").
+      size_t NameEnd = MatchStr.find(':');
+      StringRef Name = MatchStr.substr(0, NameEnd);
+      
+      if (Name.empty()) {
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                        "invalid name in named regex: empty name", "error");
+        return true;
+      }
+
+      // Verify that the name is well formed.
+      for (unsigned i = 0, e = Name.size(); i != e; ++i)
+        if ((Name[i] < 'a' || Name[i] > 'z') &&
+            (Name[i] < 'A' || Name[i] > 'Z') &&
+            (Name[i] < '0' || Name[i] > '9')) {
+          SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
+                          "invalid name in named regex", "error");
+          return true;
+        }
+      
+      // Name can't start with a digit.
+      if (isdigit(Name[0])) {
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                        "invalid name in named regex", "error");
+        return true;
+      }
+      
+      // Handle [[foo]].
+      if (NameEnd == StringRef::npos) {
+        VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        continue;
+      }
+      
+      // Handle [[foo:.*]].
+      VariableDefs.push_back(std::make_pair(Name, CurParen));
+      RegExStr += '(';
+      ++CurParen;
+      
+      if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
+        return true;
+
+      RegExStr += ')';
+    }
+    
+    // Handle fixed string matches.
+    // Find the end, which is the start of the next regex.
+    size_t FixedMatchEnd = PatternStr.find("{{");
+    FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
+    AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
+    PatternStr = PatternStr.substr(FixedMatchEnd);
+    continue;
+  }
+
+  return false;
+}
+
+void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
+  // Add the characters from FixedStr to the regex, escaping as needed.  This
+  // avoids "leaning toothpicks" in common patterns.
+  for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
+    switch (FixedStr[i]) {
+    // These are the special characters matched in "p_ere_exp".
+    case '(':
+    case ')':
+    case '^':
+    case '$':
+    case '|':
+    case '*':
+    case '+':
+    case '?':
+    case '.':
+    case '[':
+    case '\\':
+    case '{':
+      TheStr += '\\';
+      // FALL THROUGH.
+    default:
+      TheStr += FixedStr[i];
+      break;
+    }
+  }
+}
+
+bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
+                              SourceMgr &SM) {
+  Regex R(RegexStr);
+  std::string Error;
+  if (!R.isValid(Error)) {
+    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()),
+                    "invalid regex: " + Error, "error");
+    return true;
+  }
+  
+  RegExStr += RegexStr.str();
+  CurParen += R.getNumMatches();
+  return false;
+}
+
+/// Match - Match the pattern string against the input buffer Buffer.  This
+/// returns the position that is matched or npos if there is no match.  If
+/// there is a match, the size of the matched string is returned in MatchLen.
+size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
+                      StringMap<StringRef> &VariableTable) const {
+  // If this is a fixed string pattern, just match it now.
+  if (!FixedStr.empty()) {
+    MatchLen = FixedStr.size();
+    return Buffer.find(FixedStr);
+  }
+
+  // Regex match.
+  
+  // If there are variable uses, we need to create a temporary string with the
+  // actual value.
+  StringRef RegExToMatch = RegExStr;
+  std::string TmpStr;
+  if (!VariableUses.empty()) {
+    TmpStr = RegExStr;
+    
+    unsigned InsertOffset = 0;
+    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
+      // Look up the value and escape it so that we can plop it into the regex.
+      std::string Value;
+      AddFixedStringToRegEx(VariableTable[VariableUses[i].first], Value);
+      
+      // Plop it into the regex at the adjusted offset.
+      TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
+                    Value.begin(), Value.end());
+      InsertOffset += Value.size();
+    }
+    
+    // Match the newly constructed regex.
+    RegExToMatch = TmpStr;
+  }
+  
+  
+  SmallVector<StringRef, 4> MatchInfo;
+  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
+    return StringRef::npos;
+  
+  // Successful regex match.
+  assert(!MatchInfo.empty() && "Didn't get any match");
+  StringRef FullMatch = MatchInfo[0];
+  
+  // If this defines any variables, remember their values.
+  for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
+    assert(VariableDefs[i].second < MatchInfo.size() &&
+           "Internal paren error");
+    VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
+  }
+  
+  MatchLen = FullMatch.size();
+  return FullMatch.data()-Buffer.data();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Check Strings.
+//===----------------------------------------------------------------------===//
+
+/// CheckString - This is a check that we found in the input file.
+struct CheckString {
+  /// Pat - The pattern to match.
+  Pattern Pat;
+  
+  /// Loc - The location in the match file that the check string was specified.
+  SMLoc Loc;
+  
+  /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
+  /// to a CHECK: directive.
+  bool IsCheckNext;
+  
+  /// NotStrings - These are all of the strings that are disallowed from
+  /// occurring between this match string and the previous one (or start of
+  /// file).
+  std::vector<std::pair<SMLoc, Pattern> > NotStrings;
+  
+  CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
+    : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
+};
+
+/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
+/// memory buffer, free it, and return a new one.
+static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
+  SmallVector<char, 16> NewFile;
+  NewFile.reserve(MB->getBufferSize());
+  
+  for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
+       Ptr != End; ++Ptr) {
+    // If C is not a horizontal whitespace, skip it.
+    if (*Ptr != ' ' && *Ptr != '\t') {
+      NewFile.push_back(*Ptr);
+      continue;
+    }
+    
+    // Otherwise, add one space and advance over neighboring space.
+    NewFile.push_back(' ');
+    while (Ptr+1 != End &&
+           (Ptr[1] == ' ' || Ptr[1] == '\t'))
+      ++Ptr;
+  }
+  
+  // Free the old buffer and return a new one.
+  MemoryBuffer *MB2 =
+    MemoryBuffer::getMemBufferCopy(NewFile.data(), 
+                                   NewFile.data() + NewFile.size(),
+                                   MB->getBufferIdentifier());
+  
+  delete MB;
+  return MB2;
+}
+
+
+/// ReadCheckFile - Read the check file, which specifies the sequence of
+/// expected strings.  The strings are added to the CheckStrings vector.
+static bool ReadCheckFile(SourceMgr &SM,
+                          std::vector<CheckString> &CheckStrings) {
+  // Open the check file, and tell SourceMgr about it.
+  std::string ErrorStr;
+  MemoryBuffer *F =
+    MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
+  if (F == 0) {
+    errs() << "Could not open check file '" << CheckFilename << "': " 
+           << ErrorStr << '\n';
+    return true;
+  }
+  
+  // If we want to canonicalize whitespace, strip excess whitespace from the
+  // buffer containing the CHECK lines.
+  if (!NoCanonicalizeWhiteSpace)
+    F = CanonicalizeInputFile(F);
+  
+  SM.AddNewSourceBuffer(F, SMLoc());
+
+  // Find all instances of CheckPrefix followed by : in the file.
+  StringRef Buffer = F->getBuffer();
+
+  std::vector<std::pair<SMLoc, Pattern> > NotMatches;
+  
+  while (1) {
+    // See if Prefix occurs in the memory buffer.
+    Buffer = Buffer.substr(Buffer.find(CheckPrefix));
+    
+    // If we didn't find a match, we're done.
+    if (Buffer.empty())
+      break;
+    
+    const char *CheckPrefixStart = Buffer.data();
+    
+    // When we find a check prefix, keep track of whether we find CHECK: or
+    // CHECK-NEXT:
+    bool IsCheckNext = false, IsCheckNot = false;
+    
+    // Verify that the : is present after the prefix.
+    if (Buffer[CheckPrefix.size()] == ':') {
+      Buffer = Buffer.substr(CheckPrefix.size()+1);
+    } else if (Buffer.size() > CheckPrefix.size()+6 &&
+               memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
+      Buffer = Buffer.substr(CheckPrefix.size()+7);
+      IsCheckNext = true;
+    } else if (Buffer.size() > CheckPrefix.size()+5 &&
+               memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
+      Buffer = Buffer.substr(CheckPrefix.size()+6);
+      IsCheckNot = true;
+    } else {
+      Buffer = Buffer.substr(1);
+      continue;
+    }
+    
+    // Okay, we found the prefix, yay.  Remember the rest of the line, but
+    // ignore leading and trailing whitespace.
+    Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
+    
+    // Scan ahead to the end of line.
+    size_t EOL = Buffer.find_first_of("\n\r");
+
+    // Parse the pattern.
+    Pattern P;
+    if (P.ParsePattern(Buffer.substr(0, EOL), SM))
+      return true;
+    
+    Buffer = Buffer.substr(EOL);
+
+    
+    // Verify that CHECK-NEXT lines have at least one CHECK line before them.
+    if (IsCheckNext && CheckStrings.empty()) {
+      SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
+                      "found '"+CheckPrefix+"-NEXT:' without previous '"+
+                      CheckPrefix+ ": line", "error");
+      return true;
+    }
+    
+    // Handle CHECK-NOT.
+    if (IsCheckNot) {
+      NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
+                                          P));
+      continue;
+    }
+    
+    
+    // Okay, add the string we captured to the output vector and move on.
+    CheckStrings.push_back(CheckString(P,
+                                       SMLoc::getFromPointer(Buffer.data()),
+                                       IsCheckNext));
+    std::swap(NotMatches, CheckStrings.back().NotStrings);
+  }
+  
+  if (CheckStrings.empty()) {
+    errs() << "error: no check strings found with prefix '" << CheckPrefix
+           << ":'\n";
+    return true;
+  }
+  
+  if (!NotMatches.empty()) {
+    errs() << "error: '" << CheckPrefix
+           << "-NOT:' not supported after last check line.\n";
+    return true;
+  }
+  
+  return false;
+}
+
+static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
+                             StringRef Buffer) {
+  // Otherwise, we have an error, emit an error message.
+  SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
+                  "error");
+  
+  // Print the "scanning from here" line.  If the current position is at the
+  // end of a line, advance to the start of the next line.
+  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+  
+  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
+                  "note");
+}
+
+/// CountNumNewlinesBetween - Count the number of newlines in the specified
+/// range.
+static unsigned CountNumNewlinesBetween(StringRef Range) {
+  unsigned NumNewLines = 0;
+  while (1) {
+    // Scan for newline.
+    Range = Range.substr(Range.find_first_of("\n\r"));
+    if (Range.empty()) return NumNewLines;
+    
+    ++NumNewLines;
+    
+    // Handle \n\r and \r\n as a single newline.
+    if (Range.size() > 1 &&
+        (Range[1] == '\n' || Range[1] == '\r') &&
+        (Range[0] != Range[1]))
+      Range = Range.substr(1);
+    Range = Range.substr(1);
+  }
+}
+
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv);
+
+  SourceMgr SM;
+  
+  // Read the expected strings from the check file.
+  std::vector<CheckString> CheckStrings;
+  if (ReadCheckFile(SM, CheckStrings))
+    return 2;
+
+  // Open the file to check and add it to SourceMgr.
+  std::string ErrorStr;
+  MemoryBuffer *F =
+    MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
+  if (F == 0) {
+    errs() << "Could not open input file '" << InputFilename << "': " 
+           << ErrorStr << '\n';
+    return true;
+  }
+  
+  // Remove duplicate spaces in the input file if requested.
+  if (!NoCanonicalizeWhiteSpace)
+    F = CanonicalizeInputFile(F);
+  
+  SM.AddNewSourceBuffer(F, SMLoc());
+  
+  /// VariableTable - This holds all the current filecheck variables.
+  StringMap<StringRef> VariableTable;
+  
+  // Check that we have all of the expected strings, in order, in the input
+  // file.
+  StringRef Buffer = F->getBuffer();
+  
+  const char *LastMatch = Buffer.data();
+  
+  for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
+    const CheckString &CheckStr = CheckStrings[StrNo];
+    
+    StringRef SearchFrom = Buffer;
+    
+    // Find StrNo in the file.
+    size_t MatchLen = 0;
+    Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable));
+    
+    // If we didn't find a match, reject the input.
+    if (Buffer.empty()) {
+      PrintCheckFailed(SM, CheckStr, SearchFrom);
+      return 1;
+    }
+
+    StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
+
+    // If this check is a "CHECK-NEXT", verify that the previous match was on
+    // the previous line (i.e. that there is one newline between them).
+    if (CheckStr.IsCheckNext) {
+      // Count the number of newlines between the previous match and this one.
+      assert(LastMatch != F->getBufferStart() &&
+             "CHECK-NEXT can't be the first check in a file");
+
+      unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
+      if (NumNewLines == 0) {
+        SM.PrintMessage(CheckStr.Loc,
+                    CheckPrefix+"-NEXT: is on the same line as previous match",
+                        "error");
+        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+                        "'next' match was here", "note");
+        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
+                        "previous match was here", "note");
+        return 1;
+      }
+      
+      if (NumNewLines != 1) {
+        SM.PrintMessage(CheckStr.Loc,
+                        CheckPrefix+
+                        "-NEXT: is not on the line after the previous match",
+                        "error");
+        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+                        "'next' match was here", "note");
+        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
+                        "previous match was here", "note");
+        return 1;
+      }
+    }
+    
+    // If this match had "not strings", verify that they don't exist in the
+    // skipped region.
+    for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
+         ChunkNo != e; ++ChunkNo) {
+      size_t MatchLen = 0;
+      size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
+                                                             MatchLen,
+                                                             VariableTable);
+      if (Pos == StringRef::npos) continue;
+     
+      SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
+                      CheckPrefix+"-NOT: string occurred!", "error");
+      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
+                      CheckPrefix+"-NOT: pattern specified here", "note");
+      return 1;
+    }
+    
+
+    // Otherwise, everything is good.  Step over the matched text and remember
+    // the position after the match as the end of the last match.
+    Buffer = Buffer.substr(MatchLen);
+    LastMatch = Buffer.data();
+  }
+  
+  return 0;
+}
diff --git a/utils/FileCheck/Makefile b/utils/FileCheck/Makefile
new file mode 100644
index 000000000000..f1af5b649e7a
--- /dev/null
+++ b/utils/FileCheck/Makefile
@@ -0,0 +1,21 @@
+##===- utils/FileCheck/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = FileCheck
+USEDLIBS = LLVMSupport.a LLVMSystem.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/utils/FileUpdate/CMakeLists.txt b/utils/FileUpdate/CMakeLists.txt
new file mode 100644
index 000000000000..bacbd16b90f9
--- /dev/null
+++ b/utils/FileUpdate/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_executable(FileUpdate
+  FileUpdate.cpp
+  )
+
+target_link_libraries(FileUpdate LLVMSupport LLVMSystem)
+if( MINGW )
+  target_link_libraries(FileUpdate imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+  target_link_libraries(FileUpdate pthread)
+endif()
diff --git a/utils/FileUpdate/FileUpdate.cpp b/utils/FileUpdate/FileUpdate.cpp
new file mode 100644
index 000000000000..26fd75ef68b3
--- /dev/null
+++ b/utils/FileUpdate/FileUpdate.cpp
@@ -0,0 +1,86 @@
+//===- FileUpdate.cpp - Conditionally update a file -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FileUpdate is a utility for conditionally updating a file from its input
+// based on whether the input differs from the output. It is used to avoid
+// unnecessary modifications in a build system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
+using namespace llvm;
+
+static cl::opt<bool>
+Quiet("quiet", cl::desc("Don't print unnecessary status information"),
+      cl::init(false));
+
+static cl::opt<std::string>
+InputFilename("input-file", cl::desc("Input file (defaults to stdin)"),
+              cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+OutputFilename(cl::Positional, cl::desc("<output-file>"), cl::Required);
+
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv);
+
+  // Get the input data.
+  std::string ErrorStr;
+  MemoryBuffer *In =
+    MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
+  if (In == 0) {
+    errs() << argv[0] << ": error: Unable to get input '"
+           << InputFilename << "': " << ErrorStr << '\n';
+    return 1;
+  }
+
+  // Get the output data.
+  MemoryBuffer *Out = MemoryBuffer::getFile(OutputFilename.c_str(), &ErrorStr);
+
+  // If the output exists and the contents match, we are done.
+  if (Out && In->getBufferSize() == Out->getBufferSize() &&
+      memcmp(In->getBufferStart(), Out->getBufferStart(),
+             Out->getBufferSize()) == 0) {
+    if (!Quiet)
+      outs() << argv[0] << ": Not updating '" << OutputFilename
+             << "', contents match input.\n";
+    return 0;
+  }
+
+  delete Out;
+
+  // Otherwise, overwrite the output.
+  if (!Quiet)
+    outs() << argv[0] << ": Updating '" << OutputFilename
+           << "', contents changed.\n";
+  raw_fd_ostream OutStream(OutputFilename.c_str(), ErrorStr,
+                           raw_fd_ostream::F_Binary);
+  if (!ErrorStr.empty()) {
+    errs() << argv[0] << ": Unable to write output '"
+           << OutputFilename << "': " << ErrorStr << '\n';
+    return 1;
+  }
+
+  OutStream.write(In->getBufferStart(), In->getBufferSize());
+  OutStream.close();
+
+  if (OutStream.has_error()) {
+    errs() << argv[0] << ": Could not open output file '"
+           << OutputFilename << "': " << ErrorStr << '\n';
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/utils/FileUpdate/Makefile b/utils/FileUpdate/Makefile
new file mode 100644
index 000000000000..5b545c207297
--- /dev/null
+++ b/utils/FileUpdate/Makefile
@@ -0,0 +1,21 @@
+##===- utils/FileUpdate/Makefile ---------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = FileUpdate
+USEDLIBS = LLVMSupport.a LLVMSystem.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/utils/Makefile b/utils/Makefile
index c43086ba21dc..000705ead235 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ..
-PARALLEL_DIRS := TableGen fpcmp PerfectShuffle unittest
+PARALLEL_DIRS := TableGen fpcmp PerfectShuffle FileCheck FileUpdate count not unittest
 
 EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh cvsupdate \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index b4906504936a..477df8fc12e0 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -55,6 +55,10 @@ use Socket;
 #                   override the default.
 #  -ldflags         Next argument specifies that linker options that override
 #                   the default.
+#  -test-cflags     Next argument specifies that C compilation options that
+#                   override the default when running the testsuite.
+#  -test-cxxflags   Next argument specifies that C++ compilation options that
+#                   override the default when running the testsuite.
 #  -compileflags    Next argument specifies extra options passed to make when
 #                   building LLVM.
 #  -use-gmake       Use gmake instead of the default make command to build
@@ -101,7 +105,7 @@ my $HOME       = $ENV{'HOME'};
 my $SVNURL     = $ENV{"SVNURL"};
 $SVNURL        = 'http://llvm.org/svn/llvm-project' unless $SVNURL;
 my $TestSVNURL = $ENV{"TestSVNURL"};
-$TestSVNURL    = 'https://llvm.org/svn/llvm-project' unless $TestSVNURL;
+$TestSVNURL    = 'http://llvm.org/svn/llvm-project' unless $TestSVNURL;
 my $CVSRootDir = $ENV{'CVSROOT'};
 $CVSRootDir    = "/home/vadve/shared/PublicCVS" unless $CVSRootDir;
 my $BuildDir   = $ENV{'BUILDDIR'};
@@ -139,6 +143,7 @@ $SUBMITSCRIPT = "/nightlytest/NightlyTestAccept.php";
 $SUBMITAUX="";
 $SUBMIT = 1;
 $PARALLELJOBS = "2";
+my $TESTFLAGS="";
 
 while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   shift;
@@ -148,6 +153,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   if (/^-nocheckout$/)     { $NOCHECKOUT = 1; next; }
   if (/^-nocvsstats$/)     { $NOCVSSTATS = 1; next; }
   if (/^-noremove$/)       { $NOREMOVE = 1; next; }
+  if (/^-noremoveatend$/)  { $NOREMOVEATEND = 1; next; }
   if (/^-noremoveresults$/){ $NOREMOVERESULTS = 1; next; }
   if (/^-notest$/)         { $NOTEST = 1; next; }
   if (/^-norunningtests$/) { next; } # Backward compatibility, ignored.
@@ -180,6 +186,8 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
                              shift; next; }
   if (/^-with-externals$/) { $CONFIGUREARGS .= " --with-externals=$ARGV[0]";
                              shift; next; }
+  if (/^-configure-args$/) { $CONFIGUREARGS .= " $ARGV[0]";
+                             shift; next; }
   if (/^-submit-server/)   { $SUBMITSERVER = "$ARGV[0]"; shift; next; }
   if (/^-submit-script/)   { $SUBMITSCRIPT = "$ARGV[0]"; shift; next; }
   if (/^-submit-aux/)      { $SUBMITAUX = "$ARGV[0]"; shift; next; }
@@ -200,6 +208,10 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
                              shift; next; }
   if (/^-ldflags/)         { $MAKEOPTS = "$MAKEOPTS LD.Flags=\'$ARGV[0]\'";
                              shift; next; }
+  if (/^-test-cflags/)     { $TESTFLAGS = "$TESTFLAGS CFLAGS=\'$ARGV[0]\'";
+                             shift; next; }
+  if (/^-test-cxxflags/)   { $TESTFLAGS = "$TESTFLAGS CXXFLAGS=\'$ARGV[0]\'";
+                             shift; next; }
   if (/^-compileflags/)    { $MAKEOPTS = "$MAKEOPTS $ARGV[0]"; shift; next; }
   if (/^-use-gmake/)       { $MAKECMD = "gmake"; shift; next; }
   if (/^-extraflags/)      { $CONFIGUREARGS .=
@@ -921,10 +933,11 @@ sub TestDirectory {
   if (!$NOTEST) {
     if( $VERBOSE) {
       print "$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
-            "TEST=nightly > $ProgramTestLog 2>&1\n";
+            "$TESTFLAGS TEST=nightly > $ProgramTestLog 2>&1\n";
     }
     RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
-                     "TEST=nightly", $ProgramTestLog, "TEST DIRECTORY $SubDir");
+                     "$TESTFLAGS TEST=nightly",
+                     $ProgramTestLog, "TEST DIRECTORY $SubDir");
     $llcbeta_options=`$MAKECMD print-llcbeta-option`;
   }
 
@@ -1147,6 +1160,6 @@ if ($SUBMIT || !($SUBMITAUX eq "")) {
 #
 ##############################################################
 system ( "$NICE rm -rf $BuildDir")
-  if (!$NOCHECKOUT and !$NOREMOVE);
+  if (!$NOCHECKOUT and !$NOREMOVE and !$NOREMOVEATEND);
 system ( "$NICE rm -rf $WebDir")
   if (!$NOCHECKOUT and !$NOREMOVE and !$NOREMOVERESULTS);
diff --git a/utils/PerfectShuffle/PerfectShuffle.cpp b/utils/PerfectShuffle/PerfectShuffle.cpp
index 26c4cf44c6e2..b94a7d326d19 100644
--- a/utils/PerfectShuffle/PerfectShuffle.cpp
+++ b/utils/PerfectShuffle/PerfectShuffle.cpp
@@ -21,11 +21,11 @@
 struct Operator;
 
 // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
-// an element from that value of the input vectors.  A value of 8 means the 
+// an element from that value of the input vectors.  A value of 8 means the
 // entry is undefined.
 
 // Mask manipulation functions.
-static inline unsigned short MakeMask(unsigned V0, unsigned V1, 
+static inline unsigned short MakeMask(unsigned V0, unsigned V1,
                                       unsigned V2, unsigned V3) {
   return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
 }
@@ -70,7 +70,7 @@ static unsigned short getLHSOnlyMask(unsigned short Mask) {
 /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
 /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
 static unsigned getCompressedMask(unsigned short Mask) {
-  return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + 
+  return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
          getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3);
 }
 
@@ -87,7 +87,7 @@ struct ShuffleVal {
   unsigned Cost;  // Number of instrs used to generate this value.
   Operator *Op;   // The Operation used to generate this value.
   unsigned short Arg0, Arg1;  // Input operands for this value.
-  
+
   ShuffleVal() : Cost(1000000) {}
 };
 
@@ -104,22 +104,25 @@ struct Operator {
   unsigned short ShuffleMask;
   unsigned short OpNum;
   const char *Name;
-  
-  Operator(unsigned short shufflemask, const char *name, unsigned opnum)
-    : ShuffleMask(shufflemask), OpNum(opnum), Name(name) {
+  unsigned Cost;
+
+  Operator(unsigned short shufflemask, const char *name, unsigned opnum,
+           unsigned cost = 1)
+    : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) {
     TheOperators.push_back(this);
   }
   ~Operator() {
     assert(TheOperators.back() == this);
     TheOperators.pop_back();
   }
-  
+
   bool isOnlyLHSOperator() const {
     return isOnlyLHSMask(ShuffleMask);
   }
-  
+
   const char *getName() const { return Name; }
-  
+  unsigned getCost() const { return Cost; }
+
   unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
     // Extract the elements from LHSMask and RHSMask, as appropriate.
     unsigned Result = 0;
@@ -156,7 +159,7 @@ static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
   std::cerr << "t" << ValNo;
   PrintMask(ThisOp, std::cerr);
   std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
-    
+
   if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
     std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
     PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
@@ -168,7 +171,7 @@ static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
         break;
       }
   }
-  
+
   if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
     std::cerr << ", ";
     if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
@@ -193,21 +196,21 @@ static unsigned getNumEntered() {
   return Count;
 }
 
-static void EvaluateOps(unsigned short Elt, unsigned short Vals[], 
+static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
                         unsigned &NumVals) {
   if (ShufTab[Elt].Cost == 0) return;
 
   // If this value has already been evaluated, it is free.  FIXME: match undefs.
   for (unsigned i = 0, e = NumVals; i != e; ++i)
     if (Vals[i] == Elt) return;
-  
+
   // Otherwise, get the operands of the value, then add it.
   unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
   if (ShufTab[Arg0].Cost)
     EvaluateOps(Arg0, Vals, NumVals);
   if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
     EvaluateOps(Arg1, Vals, NumVals);
-  
+
   Vals[NumVals++] = Elt;
 }
 
@@ -220,7 +223,7 @@ int main() {
   ShufTab[0x4567].Cost = 0;
   ShufTab[0x4567].Op = 0;
   ShufTab[0x4567].Arg0 = 0x4567;
-  
+
   // Seed the first-level of shuffles, shuffles whose inputs are the input to
   // the vectorshuffle operation.
   bool MadeChange = true;
@@ -230,7 +233,7 @@ int main() {
     ++OpCount;
     std::cerr << "Starting iteration #" << OpCount << " with "
               << getNumEntered() << " entries established.\n";
-    
+
     // Scan the table for two reasons: First, compute the maximum cost of any
     // operation left in the table.  Second, make sure that values with undefs
     // have the cheapest alternative that they match.
@@ -239,7 +242,7 @@ int main() {
       if (!isValidMask(i)) continue;
       if (ShufTab[i].Cost > MaxCost)
         MaxCost = ShufTab[i].Cost;
-      
+
       // If this value has an undef, make it be computed the cheapest possible
       // way of any of the things that it matches.
       if (hasUndefElements(i)) {
@@ -266,10 +269,10 @@ int main() {
           UndefIdx = 3;
         else
           abort();
-        
+
         unsigned MinVal  = i;
         unsigned MinCost = ShufTab[i].Cost;
-        
+
         // Scan the 8 entries.
         for (unsigned j = 0; j != 8; ++j) {
           unsigned NewElt = setMaskElt(i, UndefIdx, j);
@@ -278,15 +281,15 @@ int main() {
             MinVal = NewElt;
           }
         }
-        
+
         // If we found something cheaper than what was here before, use it.
         if (i != MinVal) {
           MadeChange = true;
           ShufTab[i] = ShufTab[MinVal];
         }
-      } 
+      }
     }
-    
+
     for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
       if (!isValidMask(LHS)) continue;
       if (ShufTab[LHS].Cost > 1000) continue;
@@ -295,14 +298,14 @@ int main() {
       // we already have, don't consider it.
       if (ShufTab[LHS].Cost + 1 >= MaxCost)
         continue;
-        
+
       for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
         Operator *Op = TheOperators[opnum];
 
         // Evaluate op(LHS,LHS)
         unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
 
-        unsigned Cost = ShufTab[LHS].Cost + 1;
+        unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
         if (Cost < ShufTab[ResultMask].Cost) {
           ShufTab[ResultMask].Cost = Cost;
           ShufTab[ResultMask].Op = Op;
@@ -310,20 +313,20 @@ int main() {
           ShufTab[ResultMask].Arg1 = LHS;
           MadeChange = true;
         }
-        
+
         // If this is a two input instruction, include the op(x,y) cases.  If
         // this is a one input instruction, skip this.
         if (Op->isOnlyLHSOperator()) continue;
-        
+
         for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
           if (!isValidMask(RHS)) continue;
           if (ShufTab[RHS].Cost > 1000) continue;
-          
+
           // If nothing involving this operand could possibly be cheaper than
           // what we already have, don't consider it.
           if (ShufTab[RHS].Cost + 1 >= MaxCost)
             continue;
-          
+
 
           // Evaluate op(LHS,RHS)
           unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
@@ -332,7 +335,7 @@ int main() {
               ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
               ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
             continue;
-          
+
           // Figure out the cost to evaluate this, knowing that CSE's only need
           // to be evaluated once.
           unsigned short Vals[30];
@@ -340,7 +343,7 @@ int main() {
           EvaluateOps(LHS, Vals, NumVals);
           EvaluateOps(RHS, Vals, NumVals);
 
-          unsigned Cost = NumVals + 1;
+          unsigned Cost = NumVals + Op->getCost();
           if (Cost < ShufTab[ResultMask].Cost) {
             ShufTab[ResultMask].Cost = Cost;
             ShufTab[ResultMask].Op = Op;
@@ -352,10 +355,10 @@ int main() {
       }
     }
   }
-  
+
   std::cerr << "Finished Table has " << getNumEntered()
             << " entries established.\n";
-  
+
   unsigned CostArray[10] = { 0 };
 
   // Compute a cost histogram.
@@ -366,33 +369,33 @@ int main() {
     else
       ++CostArray[ShufTab[i].Cost];
   }
-  
+
   for (unsigned i = 0; i != 9; ++i)
     if (CostArray[i])
       std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
   if (CostArray[9])
     std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
-  
-  
+
+
   // Build up the table to emit.
   std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
   std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
-  
+
   for (unsigned i = 0; i != 0x8889; ++i) {
     if (!isValidMask(i)) continue;
-    
+
     // CostSat - The cost of this operation saturated to two bits.
     unsigned CostSat = ShufTab[i].Cost;
     if (CostSat > 4) CostSat = 4;
     if (CostSat == 0) CostSat = 1;
     --CostSat;  // Cost is now between 0-3.
-    
+
     unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
     assert(OpNum < 16 && "Too few bits to encode operation!");
-    
+
     unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
     unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
-    
+
     // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
     // LHS, and 13 bits of RHS = 32 bits.
     unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
@@ -417,7 +420,7 @@ int main() {
       }
     }
     std::cout << "\n";
-  }  
+  }
   std::cout << "  0\n};\n";
 
   if (0) {
@@ -427,7 +430,7 @@ int main() {
       if (ShufTab[i].Cost < 1000) {
         PrintMask(i, std::cerr);
         std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
-        
+
         unsigned short Vals[30];
         unsigned NumVals = 0;
         EvaluateOps(i, Vals, NumVals);
@@ -441,8 +444,6 @@ int main() {
 }
 
 
-#define GENERATE_ALTIVEC
-
 #ifdef GENERATE_ALTIVEC
 
 ///===---------------------------------------------------------------------===//
@@ -495,3 +496,76 @@ vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
 vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
 
 #endif
+
+#define GENERATE_NEON
+
+#ifdef GENERATE_NEON
+enum {
+  OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+  OP_VREV,
+  OP_VDUP0,
+  OP_VDUP1,
+  OP_VDUP2,
+  OP_VDUP3,
+  OP_VEXT1,
+  OP_VEXT2,
+  OP_VEXT3,
+  OP_VUZPL, // VUZP, left result
+  OP_VUZPR, // VUZP, right result
+  OP_VZIPL, // VZIP, left result
+  OP_VZIPR, // VZIP, right result
+  OP_VTRNL, // VTRN, left result
+  OP_VTRNR  // VTRN, right result
+};
+
+struct vrev : public Operator {
+  vrev() : Operator(0x1032, "vrev", OP_VREV) {}
+} the_vrev;
+
+template<unsigned Elt>
+struct vdup : public Operator {
+  vdup(const char *N, unsigned Opc)
+    : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
+};
+
+vdup<0> the_vdup0("vdup0", OP_VDUP0);
+vdup<1> the_vdup1("vdup1", OP_VDUP1);
+vdup<2> the_vdup2("vdup2", OP_VDUP2);
+vdup<3> the_vdup3("vdup3", OP_VDUP3);
+
+template<unsigned N>
+struct vext : public Operator {
+  vext(const char *Name, unsigned Opc)
+    : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
+  }
+};
+
+vext<1> the_vext1("vext1", OP_VEXT1);
+vext<2> the_vext2("vext2", OP_VEXT2);
+vext<3> the_vext3("vext3", OP_VEXT3);
+
+struct vuzpl : public Operator {
+  vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {}
+} the_vuzpl;
+
+struct vuzpr : public Operator {
+  vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {}
+} the_vuzpr;
+
+struct vzipl : public Operator {
+  vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {}
+} the_vzipl;
+
+struct vzipr : public Operator {
+  vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {}
+} the_vzipr;
+
+struct vtrnl : public Operator {
+  vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {}
+} the_vtrnl;
+
+struct vtrnr : public Operator {
+  vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {}
+} the_vtrnr;
+
+#endif
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
new file mode 100644
index 000000000000..3eac9d201b72
--- /dev/null
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -0,0 +1,1545 @@
+//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a target specifier matcher for converting parsed
+// assembly operands in the MCInst structures.
+//
+// The input to the target specific matcher is a list of literal tokens and
+// operands. The target specific parser should generally eliminate any syntax
+// which is not relevant for matching; for example, comma tokens should have
+// already been consumed and eliminated by the parser. Most instructions will
+// end up with a single literal token (the instruction name) and some number of
+// operands.
+//
+// Some example inputs, for X86:
+//   'addl' (immediate ...) (register ...)
+//   'add' (immediate ...) (memory ...)
+//   'call' '*' %epc 
+//
+// The assembly matcher is responsible for converting this input into a precise
+// machine instruction (i.e., an instruction with a well defined encoding). This
+// mapping has several properties which complicate matching:
+//
+//  - It may be ambiguous; many architectures can legally encode particular
+//    variants of an instruction in different ways (for example, using a smaller
+//    encoding for small immediates). Such ambiguities should never be
+//    arbitrarily resolved by the assembler, the assembler is always responsible
+//    for choosing the "best" available instruction.
+//
+//  - It may depend on the subtarget or the assembler context. Instructions
+//    which are invalid for the current mode, but otherwise unambiguous (e.g.,
+//    an SSE instruction in a file being assembled for i486) should be accepted
+//    and rejected by the assembler front end. However, if the proper encoding
+//    for an instruction is dependent on the assembler context then the matcher
+//    is responsible for selecting the correct machine instruction for the
+//    current mode.
+//
+// The core matching algorithm attempts to exploit the regularity in most
+// instruction sets to quickly determine the set of possibly matching
+// instructions, and the simplify the generated code. Additionally, this helps
+// to ensure that the ambiguities are intentionally resolved by the user.
+//
+// The matching is divided into two distinct phases:
+//
+//   1. Classification: Each operand is mapped to the unique set which (a)
+//      contains it, and (b) is the largest such subset for which a single
+//      instruction could match all members.
+//
+//      For register classes, we can generate these subgroups automatically. For
+//      arbitrary operands, we expect the user to define the classes and their
+//      relations to one another (for example, 8-bit signed immediates as a
+//      subset of 32-bit immediates).
+//
+//      By partitioning the operands in this way, we guarantee that for any
+//      tuple of classes, any single instruction must match either all or none
+//      of the sets of operands which could classify to that tuple.
+//
+//      In addition, the subset relation amongst classes induces a partial order
+//      on such tuples, which we use to resolve ambiguities.
+//
+//      FIXME: What do we do if a crazy case shows up where this is the wrong
+//      resolution?
+//
+//   2. The input can now be treated as a tuple of classes (static tokens are
+//      simple singleton sets). Each such tuple should generally map to a single
+//      instruction (we currently ignore cases where this isn't true, whee!!!),
+//      which we can emit a simple matcher for.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmMatcherEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <list>
+#include <map>
+#include <set>
+using namespace llvm;
+
+static cl::opt<std::string>
+MatchPrefix("match-prefix", cl::init(""),
+            cl::desc("Only match instructions with the given prefix"));
+
+/// FlattenVariants - Flatten an .td file assembly string by selecting the
+/// variant at index \arg N.
+static std::string FlattenVariants(const std::string &AsmString,
+                                   unsigned N) {
+  StringRef Cur = AsmString;
+  std::string Res = "";
+  
+  for (;;) {
+    // Find the start of the next variant string.
+    size_t VariantsStart = 0;
+    for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
+      if (Cur[VariantsStart] == '{' && 
+          (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
+                                  Cur[VariantsStart-1] != '\\')))
+        break;
+
+    // Add the prefix to the result.
+    Res += Cur.slice(0, VariantsStart);
+    if (VariantsStart == Cur.size())
+      break;
+
+    ++VariantsStart; // Skip the '{'.
+
+    // Scan to the end of the variants string.
+    size_t VariantsEnd = VariantsStart;
+    unsigned NestedBraces = 1;
+    for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
+      if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
+        if (--NestedBraces == 0)
+          break;
+      } else if (Cur[VariantsEnd] == '{')
+        ++NestedBraces;
+    }
+
+    // Select the Nth variant (or empty).
+    StringRef Selection = Cur.slice(VariantsStart, VariantsEnd);
+    for (unsigned i = 0; i != N; ++i)
+      Selection = Selection.split('|').second;
+    Res += Selection.split('|').first;
+
+    assert(VariantsEnd != Cur.size() && 
+           "Unterminated variants in assembly string!");
+    Cur = Cur.substr(VariantsEnd + 1);
+  } 
+
+  return Res;
+}
+
+/// TokenizeAsmString - Tokenize a simplified assembly string.
+static void TokenizeAsmString(const StringRef &AsmString, 
+                              SmallVectorImpl<StringRef> &Tokens) {
+  unsigned Prev = 0;
+  bool InTok = true;
+  for (unsigned i = 0, e = AsmString.size(); i != e; ++i) {
+    switch (AsmString[i]) {
+    case '[':
+    case ']':
+    case '*':
+    case '!':
+    case ' ':
+    case '\t':
+    case ',':
+      if (InTok) {
+        Tokens.push_back(AsmString.slice(Prev, i));
+        InTok = false;
+      }
+      if (!isspace(AsmString[i]) && AsmString[i] != ',')
+        Tokens.push_back(AsmString.substr(i, 1));
+      Prev = i + 1;
+      break;
+      
+    case '\\':
+      if (InTok) {
+        Tokens.push_back(AsmString.slice(Prev, i));
+        InTok = false;
+      }
+      ++i;
+      assert(i != AsmString.size() && "Invalid quoted character");
+      Tokens.push_back(AsmString.substr(i, 1));
+      Prev = i + 1;
+      break;
+
+    case '$': {
+      // If this isn't "${", treat like a normal token.
+      if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') {
+        if (InTok) {
+          Tokens.push_back(AsmString.slice(Prev, i));
+          InTok = false;
+        }
+        Prev = i;
+        break;
+      }
+
+      if (InTok) {
+        Tokens.push_back(AsmString.slice(Prev, i));
+        InTok = false;
+      }
+
+      StringRef::iterator End =
+        std::find(AsmString.begin() + i, AsmString.end(), '}');
+      assert(End != AsmString.end() && "Missing brace in operand reference!");
+      size_t EndPos = End - AsmString.begin();
+      Tokens.push_back(AsmString.slice(i, EndPos+1));
+      Prev = EndPos + 1;
+      i = EndPos;
+      break;
+    }
+
+    default:
+      InTok = true;
+    }
+  }
+  if (InTok && Prev != AsmString.size())
+    Tokens.push_back(AsmString.substr(Prev));
+}
+
+static bool IsAssemblerInstruction(const StringRef &Name,
+                                   const CodeGenInstruction &CGI, 
+                                   const SmallVectorImpl<StringRef> &Tokens) {
+  // Ignore "codegen only" instructions.
+  if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
+    return false;
+
+  // Ignore pseudo ops.
+  //
+  // FIXME: This is a hack; can we convert these instructions to set the
+  // "codegen only" bit instead?
+  if (const RecordVal *Form = CGI.TheDef->getValue("Form"))
+    if (Form->getValue()->getAsString() == "Pseudo")
+      return false;
+
+  // Ignore "Int_*" and "*_Int" instructions, which are internal aliases.
+  //
+  // FIXME: This is a total hack.
+  if (StringRef(Name).startswith("Int_") || StringRef(Name).endswith("_Int"))
+    return false;
+
+  // Ignore instructions with no .s string.
+  //
+  // FIXME: What are these?
+  if (CGI.AsmString.empty())
+    return false;
+
+  // FIXME: Hack; ignore any instructions with a newline in them.
+  if (std::find(CGI.AsmString.begin(), 
+                CGI.AsmString.end(), '\n') != CGI.AsmString.end())
+    return false;
+  
+  // Ignore instructions with attributes, these are always fake instructions for
+  // simplifying codegen.
+  //
+  // FIXME: Is this true?
+  //
+  // Also, check for instructions which reference the operand multiple times;
+  // this implies a constraint we would not honor.
+  std::set<std::string> OperandNames;
+  for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
+    if (Tokens[i][0] == '$' && 
+        std::find(Tokens[i].begin(), 
+                  Tokens[i].end(), ':') != Tokens[i].end()) {
+      DEBUG({
+          errs() << "warning: '" << Name << "': "
+                 << "ignoring instruction; operand with attribute '" 
+                 << Tokens[i] << "'\n";
+        });
+      return false;
+    }
+
+    if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) {
+      std::string Err = "'" + Name.str() + "': " +
+        "invalid assembler instruction; tied operand '" + Tokens[i].str() + "'";
+      throw TGError(CGI.TheDef->getLoc(), Err);
+    }
+  }
+
+  return true;
+}
+
+namespace {
+
+/// ClassInfo - Helper class for storing the information about a particular
+/// class of operands which can be matched.
+struct ClassInfo {
+  enum ClassInfoKind {
+    /// Invalid kind, for use as a sentinel value.
+    Invalid = 0,
+
+    /// The class for a particular token.
+    Token,
+
+    /// The (first) register class, subsequent register classes are
+    /// RegisterClass0+1, and so on.
+    RegisterClass0,
+
+    /// The (first) user defined class, subsequent user defined classes are
+    /// UserClass0+1, and so on.
+    UserClass0 = 1<<16
+  };
+
+  /// Kind - The class kind, which is either a predefined kind, or (UserClass0 +
+  /// N) for the Nth user defined class.
+  unsigned Kind;
+
+  /// SuperClasses - The super classes of this class. Note that for simplicities
+  /// sake user operands only record their immediate super class, while register
+  /// operands include all superclasses.
+  std::vector<ClassInfo*> SuperClasses;
+
+  /// Name - The full class name, suitable for use in an enum.
+  std::string Name;
+
+  /// ClassName - The unadorned generic name for this class (e.g., Token).
+  std::string ClassName;
+
+  /// ValueName - The name of the value this class represents; for a token this
+  /// is the literal token string, for an operand it is the TableGen class (or
+  /// empty if this is a derived class).
+  std::string ValueName;
+
+  /// PredicateMethod - The name of the operand method to test whether the
+  /// operand matches this class; this is not valid for Token or register kinds.
+  std::string PredicateMethod;
+
+  /// RenderMethod - The name of the operand method to add this operand to an
+  /// MCInst; this is not valid for Token or register kinds.
+  std::string RenderMethod;
+
+  /// For register classes, the records for all the registers in this class.
+  std::set<Record*> Registers;
+
+public:
+  /// isRegisterClass() - Check if this is a register class.
+  bool isRegisterClass() const {
+    return Kind >= RegisterClass0 && Kind < UserClass0;
+  }
+
+  /// isUserClass() - Check if this is a user defined class.
+  bool isUserClass() const {
+    return Kind >= UserClass0;
+  }
+
+  /// isRelatedTo - Check whether this class is "related" to \arg RHS. Classes
+  /// are related if they are in the same class hierarchy.
+  bool isRelatedTo(const ClassInfo &RHS) const {
+    // Tokens are only related to tokens.
+    if (Kind == Token || RHS.Kind == Token)
+      return Kind == Token && RHS.Kind == Token;
+
+    // Registers classes are only related to registers classes, and only if
+    // their intersection is non-empty.
+    if (isRegisterClass() || RHS.isRegisterClass()) {
+      if (!isRegisterClass() || !RHS.isRegisterClass())
+        return false;
+
+      std::set<Record*> Tmp;
+      std::insert_iterator< std::set<Record*> > II(Tmp, Tmp.begin());
+      std::set_intersection(Registers.begin(), Registers.end(), 
+                            RHS.Registers.begin(), RHS.Registers.end(),
+                            II);
+
+      return !Tmp.empty();
+    }
+
+    // Otherwise we have two users operands; they are related if they are in the
+    // same class hierarchy.
+    //
+    // FIXME: This is an oversimplification, they should only be related if they
+    // intersect, however we don't have that information.
+    assert(isUserClass() && RHS.isUserClass() && "Unexpected class!");
+    const ClassInfo *Root = this;
+    while (!Root->SuperClasses.empty())
+      Root = Root->SuperClasses.front();
+
+    const ClassInfo *RHSRoot = &RHS;
+    while (!RHSRoot->SuperClasses.empty())
+      RHSRoot = RHSRoot->SuperClasses.front();
+    
+    return Root == RHSRoot;
+  }
+
+  /// isSubsetOf - Test whether this class is a subset of \arg RHS; 
+  bool isSubsetOf(const ClassInfo &RHS) const {
+    // This is a subset of RHS if it is the same class...
+    if (this == &RHS)
+      return true;
+
+    // ... or if any of its super classes are a subset of RHS.
+    for (std::vector<ClassInfo*>::const_iterator it = SuperClasses.begin(),
+           ie = SuperClasses.end(); it != ie; ++it)
+      if ((*it)->isSubsetOf(RHS))
+        return true;
+
+    return false;
+  }
+
+  /// operator< - Compare two classes.
+  bool operator<(const ClassInfo &RHS) const {
+    // Unrelated classes can be ordered by kind.
+    if (!isRelatedTo(RHS))
+      return Kind < RHS.Kind;
+
+    switch (Kind) {
+    case Invalid:
+      assert(0 && "Invalid kind!");
+    case Token:
+      // Tokens are comparable by value.
+      //
+      // FIXME: Compare by enum value.
+      return ValueName < RHS.ValueName;
+
+    default:
+      // This class preceeds the RHS if it is a proper subset of the RHS.
+      return this != &RHS && isSubsetOf(RHS);
+    }
+  }
+};
+
+/// InstructionInfo - Helper class for storing the necessary information for an
+/// instruction which is capable of being matched.
+struct InstructionInfo {
+  struct Operand {
+    /// The unique class instance this operand should match.
+    ClassInfo *Class;
+
+    /// The original operand this corresponds to, if any.
+    const CodeGenInstruction::OperandInfo *OperandInfo;
+  };
+
+  /// InstrName - The target name for this instruction.
+  std::string InstrName;
+
+  /// Instr - The instruction this matches.
+  const CodeGenInstruction *Instr;
+
+  /// AsmString - The assembly string for this instruction (with variants
+  /// removed).
+  std::string AsmString;
+
+  /// Tokens - The tokenized assembly pattern that this instruction matches.
+  SmallVector<StringRef, 4> Tokens;
+
+  /// Operands - The operands that this instruction matches.
+  SmallVector<Operand, 4> Operands;
+
+  /// ConversionFnKind - The enum value which is passed to the generated
+  /// ConvertToMCInst to convert parsed operands into an MCInst for this
+  /// function.
+  std::string ConversionFnKind;
+
+  /// operator< - Compare two instructions.
+  bool operator<(const InstructionInfo &RHS) const {
+    if (Operands.size() != RHS.Operands.size())
+      return Operands.size() < RHS.Operands.size();
+
+    // Compare lexicographically by operand. The matcher validates that other
+    // orderings wouldn't be ambiguous using \see CouldMatchAmiguouslyWith().
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (*Operands[i].Class < *RHS.Operands[i].Class)
+        return true;
+      if (*RHS.Operands[i].Class < *Operands[i].Class)
+        return false;
+    }
+
+    return false;
+  }
+
+  /// CouldMatchAmiguouslyWith - Check whether this instruction could
+  /// ambiguously match the same set of operands as \arg RHS (without being a
+  /// strictly superior match).
+  bool CouldMatchAmiguouslyWith(const InstructionInfo &RHS) {
+    // The number of operands is unambiguous.
+    if (Operands.size() != RHS.Operands.size())
+      return false;
+
+    // Tokens and operand kinds are unambiguous (assuming a correct target
+    // specific parser).
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      if (Operands[i].Class->Kind != RHS.Operands[i].Class->Kind ||
+          Operands[i].Class->Kind == ClassInfo::Token)
+        if (*Operands[i].Class < *RHS.Operands[i].Class ||
+            *RHS.Operands[i].Class < *Operands[i].Class)
+          return false;
+    
+    // Otherwise, this operand could commute if all operands are equivalent, or
+    // there is a pair of operands that compare less than and a pair that
+    // compare greater than.
+    bool HasLT = false, HasGT = false;
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (*Operands[i].Class < *RHS.Operands[i].Class)
+        HasLT = true;
+      if (*RHS.Operands[i].Class < *Operands[i].Class)
+        HasGT = true;
+    }
+
+    return !(HasLT ^ HasGT);
+  }
+
+public:
+  void dump();
+};
+
+class AsmMatcherInfo {
+public:
+  /// The tablegen AsmParser record.
+  Record *AsmParser;
+
+  /// The AsmParser "CommentDelimiter" value.
+  std::string CommentDelimiter;
+
+  /// The AsmParser "RegisterPrefix" value.
+  std::string RegisterPrefix;
+
+  /// The classes which are needed for matching.
+  std::vector<ClassInfo*> Classes;
+  
+  /// The information on the instruction to match.
+  std::vector<InstructionInfo*> Instructions;
+
+  /// Map of Register records to their class information.
+  std::map<Record*, ClassInfo*> RegisterClasses;
+
+private:
+  /// Map of token to class information which has already been constructed.
+  std::map<std::string, ClassInfo*> TokenClasses;
+
+  /// Map of RegisterClass records to their class information.
+  std::map<Record*, ClassInfo*> RegisterClassClasses;
+
+  /// Map of AsmOperandClass records to their class information.
+  std::map<Record*, ClassInfo*> AsmOperandClasses;
+
+private:
+  /// getTokenClass - Lookup or create the class for the given token.
+  ClassInfo *getTokenClass(const StringRef &Token);
+
+  /// getOperandClass - Lookup or create the class for the given operand.
+  ClassInfo *getOperandClass(const StringRef &Token,
+                             const CodeGenInstruction::OperandInfo &OI);
+
+  /// BuildRegisterClasses - Build the ClassInfo* instances for register
+  /// classes.
+  void BuildRegisterClasses(CodeGenTarget &Target, 
+                            std::set<std::string> &SingletonRegisterNames);
+
+  /// BuildOperandClasses - Build the ClassInfo* instances for user defined
+  /// operand classes.
+  void BuildOperandClasses(CodeGenTarget &Target);
+
+public:
+  AsmMatcherInfo(Record *_AsmParser);
+
+  /// BuildInfo - Construct the various tables used during matching.
+  void BuildInfo(CodeGenTarget &Target);
+};
+
+}
+
+void InstructionInfo::dump() {
+  errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"'
+         << ", tokens:[";
+  for (unsigned i = 0, e = Tokens.size(); i != e; ++i) {
+    errs() << Tokens[i];
+    if (i + 1 != e)
+      errs() << ", ";
+  }
+  errs() << "]\n";
+
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    Operand &Op = Operands[i];
+    errs() << "  op[" << i << "] = " << Op.Class->ClassName << " - ";
+    if (Op.Class->Kind == ClassInfo::Token) {
+      errs() << '\"' << Tokens[i] << "\"\n";
+      continue;
+    }
+
+    if (!Op.OperandInfo) {
+      errs() << "(singleton register)\n";
+      continue;
+    }
+
+    const CodeGenInstruction::OperandInfo &OI = *Op.OperandInfo;
+    errs() << OI.Name << " " << OI.Rec->getName()
+           << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n";
+  }
+}
+
+static std::string getEnumNameForToken(const StringRef &Str) {
+  std::string Res;
+  
+  for (StringRef::iterator it = Str.begin(), ie = Str.end(); it != ie; ++it) {
+    switch (*it) {
+    case '*': Res += "_STAR_"; break;
+    case '%': Res += "_PCT_"; break;
+    case ':': Res += "_COLON_"; break;
+
+    default:
+      if (isalnum(*it))  {
+        Res += *it;
+      } else {
+        Res += "_" + utostr((unsigned) *it) + "_";
+      }
+    }
+  }
+
+  return Res;
+}
+
+/// getRegisterRecord - Get the register record for \arg name, or 0.
+static Record *getRegisterRecord(CodeGenTarget &Target, const StringRef &Name) {
+  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Target.getRegisters()[i];
+    if (Name == Reg.TheDef->getValueAsString("AsmName"))
+      return Reg.TheDef;
+  }
+
+  return 0;
+}
+
+ClassInfo *AsmMatcherInfo::getTokenClass(const StringRef &Token) {
+  ClassInfo *&Entry = TokenClasses[Token];
+  
+  if (!Entry) {
+    Entry = new ClassInfo();
+    Entry->Kind = ClassInfo::Token;
+    Entry->ClassName = "Token";
+    Entry->Name = "MCK_" + getEnumNameForToken(Token);
+    Entry->ValueName = Token;
+    Entry->PredicateMethod = "<invalid>";
+    Entry->RenderMethod = "<invalid>";
+    Classes.push_back(Entry);
+  }
+
+  return Entry;
+}
+
+ClassInfo *
+AsmMatcherInfo::getOperandClass(const StringRef &Token,
+                                const CodeGenInstruction::OperandInfo &OI) {
+  if (OI.Rec->isSubClassOf("RegisterClass")) {
+    ClassInfo *CI = RegisterClassClasses[OI.Rec];
+
+    if (!CI) {
+      PrintError(OI.Rec->getLoc(), "register class has no class info!");
+      throw std::string("ERROR: Missing register class!");
+    }
+
+    return CI;
+  }
+
+  assert(OI.Rec->isSubClassOf("Operand") && "Unexpected operand!");
+  Record *MatchClass = OI.Rec->getValueAsDef("ParserMatchClass");
+  ClassInfo *CI = AsmOperandClasses[MatchClass];
+
+  if (!CI) {
+    PrintError(OI.Rec->getLoc(), "operand has no match class!");
+    throw std::string("ERROR: Missing match class!");
+  }
+
+  return CI;
+}
+
+void AsmMatcherInfo::BuildRegisterClasses(CodeGenTarget &Target,
+                                          std::set<std::string>
+                                            &SingletonRegisterNames) {
+  std::vector<CodeGenRegisterClass> RegisterClasses;
+  std::vector<CodeGenRegister> Registers;
+
+  RegisterClasses = Target.getRegisterClasses();
+  Registers = Target.getRegisters();
+
+  // The register sets used for matching.
+  std::set< std::set<Record*> > RegisterSets;
+
+  // Gather the defined sets.  
+  for (std::vector<CodeGenRegisterClass>::iterator it = RegisterClasses.begin(),
+         ie = RegisterClasses.end(); it != ie; ++it)
+    RegisterSets.insert(std::set<Record*>(it->Elements.begin(),
+                                          it->Elements.end()));
+
+  // Add any required singleton sets.
+  for (std::set<std::string>::iterator it = SingletonRegisterNames.begin(),
+         ie = SingletonRegisterNames.end(); it != ie; ++it)
+    if (Record *Rec = getRegisterRecord(Target, *it))
+      RegisterSets.insert(std::set<Record*>(&Rec, &Rec + 1));
+         
+  // Introduce derived sets where necessary (when a register does not determine
+  // a unique register set class), and build the mapping of registers to the set
+  // they should classify to.
+  std::map<Record*, std::set<Record*> > RegisterMap;
+  for (std::vector<CodeGenRegister>::iterator it = Registers.begin(),
+         ie = Registers.end(); it != ie; ++it) {
+    CodeGenRegister &CGR = *it;
+    // Compute the intersection of all sets containing this register.
+    std::set<Record*> ContainingSet;
+    
+    for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
+           ie = RegisterSets.end(); it != ie; ++it) {
+      if (!it->count(CGR.TheDef))
+        continue;
+
+      if (ContainingSet.empty()) {
+        ContainingSet = *it;
+      } else {
+        std::set<Record*> Tmp;
+        std::swap(Tmp, ContainingSet);
+        std::insert_iterator< std::set<Record*> > II(ContainingSet,
+                                                     ContainingSet.begin());
+        std::set_intersection(Tmp.begin(), Tmp.end(), it->begin(), it->end(),
+                              II);
+      }
+    }
+
+    if (!ContainingSet.empty()) {
+      RegisterSets.insert(ContainingSet);
+      RegisterMap.insert(std::make_pair(CGR.TheDef, ContainingSet));
+    }
+  }
+
+  // Construct the register classes.
+  std::map<std::set<Record*>, ClassInfo*> RegisterSetClasses;
+  unsigned Index = 0;
+  for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
+         ie = RegisterSets.end(); it != ie; ++it, ++Index) {
+    ClassInfo *CI = new ClassInfo();
+    CI->Kind = ClassInfo::RegisterClass0 + Index;
+    CI->ClassName = "Reg" + utostr(Index);
+    CI->Name = "MCK_Reg" + utostr(Index);
+    CI->ValueName = "";
+    CI->PredicateMethod = ""; // unused
+    CI->RenderMethod = "addRegOperands";
+    CI->Registers = *it;
+    Classes.push_back(CI);
+    RegisterSetClasses.insert(std::make_pair(*it, CI));
+  }
+
+  // Find the superclasses; we could compute only the subgroup lattice edges,
+  // but there isn't really a point.
+  for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
+         ie = RegisterSets.end(); it != ie; ++it) {
+    ClassInfo *CI = RegisterSetClasses[*it];
+    for (std::set< std::set<Record*> >::iterator it2 = RegisterSets.begin(),
+           ie2 = RegisterSets.end(); it2 != ie2; ++it2)
+      if (*it != *it2 && 
+          std::includes(it2->begin(), it2->end(), it->begin(), it->end()))
+        CI->SuperClasses.push_back(RegisterSetClasses[*it2]);
+  }
+
+  // Name the register classes which correspond to a user defined RegisterClass.
+  for (std::vector<CodeGenRegisterClass>::iterator it = RegisterClasses.begin(),
+         ie = RegisterClasses.end(); it != ie; ++it) {
+    ClassInfo *CI = RegisterSetClasses[std::set<Record*>(it->Elements.begin(),
+                                                         it->Elements.end())];
+    if (CI->ValueName.empty()) {
+      CI->ClassName = it->getName();
+      CI->Name = "MCK_" + it->getName();
+      CI->ValueName = it->getName();
+    } else
+      CI->ValueName = CI->ValueName + "," + it->getName();
+
+    RegisterClassClasses.insert(std::make_pair(it->TheDef, CI));
+  }
+
+  // Populate the map for individual registers.
+  for (std::map<Record*, std::set<Record*> >::iterator it = RegisterMap.begin(),
+         ie = RegisterMap.end(); it != ie; ++it)
+    this->RegisterClasses[it->first] = RegisterSetClasses[it->second];
+
+  // Name the register classes which correspond to singleton registers.
+  for (std::set<std::string>::iterator it = SingletonRegisterNames.begin(),
+         ie = SingletonRegisterNames.end(); it != ie; ++it) {
+    if (Record *Rec = getRegisterRecord(Target, *it)) {
+      ClassInfo *CI = this->RegisterClasses[Rec];
+      assert(CI && "Missing singleton register class info!");
+
+      if (CI->ValueName.empty()) {
+        CI->ClassName = Rec->getName();
+        CI->Name = "MCK_" + Rec->getName();
+        CI->ValueName = Rec->getName();
+      } else
+        CI->ValueName = CI->ValueName + "," + Rec->getName();
+    }
+  }
+}
+
+void AsmMatcherInfo::BuildOperandClasses(CodeGenTarget &Target) {
+  std::vector<Record*> AsmOperands;
+  AsmOperands = Records.getAllDerivedDefinitions("AsmOperandClass");
+  unsigned Index = 0;
+  for (std::vector<Record*>::iterator it = AsmOperands.begin(), 
+         ie = AsmOperands.end(); it != ie; ++it, ++Index) {
+    ClassInfo *CI = new ClassInfo();
+    CI->Kind = ClassInfo::UserClass0 + Index;
+
+    Init *Super = (*it)->getValueInit("SuperClass");
+    if (DefInit *DI = dynamic_cast<DefInit*>(Super)) {
+      ClassInfo *SC = AsmOperandClasses[DI->getDef()];
+      if (!SC)
+        PrintError((*it)->getLoc(), "Invalid super class reference!");
+      else
+        CI->SuperClasses.push_back(SC);
+    } else {
+      assert(dynamic_cast<UnsetInit*>(Super) && "Unexpected SuperClass field!");
+    }
+    CI->ClassName = (*it)->getValueAsString("Name");
+    CI->Name = "MCK_" + CI->ClassName;
+    CI->ValueName = (*it)->getName();
+
+    // Get or construct the predicate method name.
+    Init *PMName = (*it)->getValueInit("PredicateMethod");
+    if (StringInit *SI = dynamic_cast<StringInit*>(PMName)) {
+      CI->PredicateMethod = SI->getValue();
+    } else {
+      assert(dynamic_cast<UnsetInit*>(PMName) && 
+             "Unexpected PredicateMethod field!");
+      CI->PredicateMethod = "is" + CI->ClassName;
+    }
+
+    // Get or construct the render method name.
+    Init *RMName = (*it)->getValueInit("RenderMethod");
+    if (StringInit *SI = dynamic_cast<StringInit*>(RMName)) {
+      CI->RenderMethod = SI->getValue();
+    } else {
+      assert(dynamic_cast<UnsetInit*>(RMName) &&
+             "Unexpected RenderMethod field!");
+      CI->RenderMethod = "add" + CI->ClassName + "Operands";
+    }
+
+    AsmOperandClasses[*it] = CI;
+    Classes.push_back(CI);
+  }
+}
+
+AsmMatcherInfo::AsmMatcherInfo(Record *_AsmParser) 
+  : AsmParser(_AsmParser),
+    CommentDelimiter(AsmParser->getValueAsString("CommentDelimiter")),
+    RegisterPrefix(AsmParser->getValueAsString("RegisterPrefix"))
+{
+}
+
+void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
+  // Parse the instructions; we need to do this first so that we can gather the
+  // singleton register classes.
+  std::set<std::string> SingletonRegisterNames;
+  for (std::map<std::string, CodeGenInstruction>::const_iterator 
+         it = Target.getInstructions().begin(), 
+         ie = Target.getInstructions().end(); 
+       it != ie; ++it) {
+    const CodeGenInstruction &CGI = it->second;
+
+    if (!StringRef(it->first).startswith(MatchPrefix))
+      continue;
+
+    OwningPtr<InstructionInfo> II(new InstructionInfo);
+    
+    II->InstrName = it->first;
+    II->Instr = &it->second;
+    II->AsmString = FlattenVariants(CGI.AsmString, 0);
+
+    // Remove comments from the asm string.
+    if (!CommentDelimiter.empty()) {
+      size_t Idx = StringRef(II->AsmString).find(CommentDelimiter);
+      if (Idx != StringRef::npos)
+        II->AsmString = II->AsmString.substr(0, Idx);
+    }
+
+    TokenizeAsmString(II->AsmString, II->Tokens);
+
+    // Ignore instructions which shouldn't be matched.
+    if (!IsAssemblerInstruction(it->first, CGI, II->Tokens))
+      continue;
+
+    // Collect singleton registers, if used.
+    if (!RegisterPrefix.empty()) {
+      for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
+        if (II->Tokens[i].startswith(RegisterPrefix)) {
+          StringRef RegName = II->Tokens[i].substr(RegisterPrefix.size());
+          Record *Rec = getRegisterRecord(Target, RegName);
+          
+          if (!Rec) {
+            std::string Err = "unable to find register for '" + RegName.str() + 
+              "' (which matches register prefix)";
+            throw TGError(CGI.TheDef->getLoc(), Err);
+          }
+
+          SingletonRegisterNames.insert(RegName);
+        }
+      }
+    }
+    
+    Instructions.push_back(II.take());
+  }
+
+  // Build info for the register classes.
+  BuildRegisterClasses(Target, SingletonRegisterNames);
+
+  // Build info for the user defined assembly operand classes.
+  BuildOperandClasses(Target);
+
+  // Build the instruction information.
+  for (std::vector<InstructionInfo*>::iterator it = Instructions.begin(),
+         ie = Instructions.end(); it != ie; ++it) {
+    InstructionInfo *II = *it;
+
+    for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
+      StringRef Token = II->Tokens[i];
+
+      // Check for singleton registers.
+      if (!RegisterPrefix.empty() && Token.startswith(RegisterPrefix)) {
+        StringRef RegName = II->Tokens[i].substr(RegisterPrefix.size());
+        InstructionInfo::Operand Op;
+        Op.Class = RegisterClasses[getRegisterRecord(Target, RegName)];
+        Op.OperandInfo = 0;
+        assert(Op.Class && Op.Class->Registers.size() == 1 &&
+               "Unexpected class for singleton register");
+        II->Operands.push_back(Op);
+        continue;
+      }
+
+      // Check for simple tokens.
+      if (Token[0] != '$') {
+        InstructionInfo::Operand Op;
+        Op.Class = getTokenClass(Token);
+        Op.OperandInfo = 0;
+        II->Operands.push_back(Op);
+        continue;
+      }
+
+      // Otherwise this is an operand reference.
+      StringRef OperandName;
+      if (Token[1] == '{')
+        OperandName = Token.substr(2, Token.size() - 3);
+      else
+        OperandName = Token.substr(1);
+
+      // Map this token to an operand. FIXME: Move elsewhere.
+      unsigned Idx;
+      try {
+        Idx = II->Instr->getOperandNamed(OperandName);
+      } catch(...) {
+        throw std::string("error: unable to find operand: '" + 
+                          OperandName.str() + "'");
+      }
+
+      const CodeGenInstruction::OperandInfo &OI = II->Instr->OperandList[Idx];
+      InstructionInfo::Operand Op;
+      Op.Class = getOperandClass(Token, OI);
+      Op.OperandInfo = &OI;
+      II->Operands.push_back(Op);
+    }
+  }
+
+  // Reorder classes so that classes preceed super classes.
+  std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
+}
+
+static void EmitConvertToMCInst(CodeGenTarget &Target,
+                                std::vector<InstructionInfo*> &Infos,
+                                raw_ostream &OS) {
+  // Write the convert function to a separate stream, so we can drop it after
+  // the enum.
+  std::string ConvertFnBody;
+  raw_string_ostream CvtOS(ConvertFnBody);
+
+  // Function we have already generated.
+  std::set<std::string> GeneratedFns;
+
+  // Start the unified conversion function.
+
+  CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
+        << "unsigned Opcode,\n"
+        << "                            SmallVectorImpl<"
+        << Target.getName() << "Operand> &Operands) {\n";
+  CvtOS << "  Inst.setOpcode(Opcode);\n";
+  CvtOS << "  switch (Kind) {\n";
+  CvtOS << "  default:\n";
+
+  // Start the enum, which we will generate inline.
+
+  OS << "// Unified function for converting operants to MCInst instances.\n\n";
+  OS << "enum ConversionKind {\n";
+  
+  for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    InstructionInfo &II = **it;
+
+    // Order the (class) operands by the order to convert them into an MCInst.
+    SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList;
+    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[i];
+      if (Op.OperandInfo)
+        MIOperandList.push_back(std::make_pair(Op.OperandInfo->MIOperandNo, i));
+    }
+    std::sort(MIOperandList.begin(), MIOperandList.end());
+
+    // Compute the total number of operands.
+    unsigned NumMIOperands = 0;
+    for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) {
+      const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i];
+      NumMIOperands = std::max(NumMIOperands, 
+                               OI.MIOperandNo + OI.MINumOperands);
+    }
+
+    // Build the conversion function signature.
+    std::string Signature = "Convert";
+    unsigned CurIndex = 0;
+    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
+      assert(CurIndex <= Op.OperandInfo->MIOperandNo &&
+             "Duplicate match for instruction operand!");
+      
+      Signature += "_";
+
+      // Skip operands which weren't matched by anything, this occurs when the
+      // .td file encodes "implicit" operands as explicit ones.
+      //
+      // FIXME: This should be removed from the MCInst structure.
+      for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex)
+        Signature += "Imp";
+
+      // Registers are always converted the same, don't duplicate the conversion
+      // function based on them.
+      //
+      // FIXME: We could generalize this based on the render method, if it
+      // mattered.
+      if (Op.Class->isRegisterClass())
+        Signature += "Reg";
+      else
+        Signature += Op.Class->ClassName;
+      Signature += utostr(Op.OperandInfo->MINumOperands);
+      Signature += "_" + utostr(MIOperandList[i].second);
+
+      CurIndex += Op.OperandInfo->MINumOperands;
+    }
+
+    // Add any trailing implicit operands.
+    for (; CurIndex != NumMIOperands; ++CurIndex)
+      Signature += "Imp";
+
+    II.ConversionFnKind = Signature;
+
+    // Check if we have already generated this signature.
+    if (!GeneratedFns.insert(Signature).second)
+      continue;
+
+    // If not, emit it now.
+
+    // Add to the enum list.
+    OS << "  " << Signature << ",\n";
+
+    // And to the convert function.
+    CvtOS << "  case " << Signature << ":\n";
+    CurIndex = 0;
+    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
+
+      // Add the implicit operands.
+      for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex)
+        CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
+
+      CvtOS << "    Operands[" << MIOperandList[i].second 
+         << "]." << Op.Class->RenderMethod 
+         << "(Inst, " << Op.OperandInfo->MINumOperands << ");\n";
+      CurIndex += Op.OperandInfo->MINumOperands;
+    }
+    
+    // And add trailing implicit operands.
+    for (; CurIndex != NumMIOperands; ++CurIndex)
+      CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
+    CvtOS << "    break;\n";
+  }
+
+  // Finish the convert function.
+
+  CvtOS << "  }\n";
+  CvtOS << "  return false;\n";
+  CvtOS << "}\n\n";
+
+  // Finish the enum, and drop the convert function after it.
+
+  OS << "  NumConversionVariants\n";
+  OS << "};\n\n";
+  
+  OS << CvtOS.str();
+}
+
+/// EmitMatchClassEnumeration - Emit the enumeration for match class kinds.
+static void EmitMatchClassEnumeration(CodeGenTarget &Target,
+                                      std::vector<ClassInfo*> &Infos,
+                                      raw_ostream &OS) {
+  OS << "namespace {\n\n";
+
+  OS << "/// MatchClassKind - The kinds of classes which participate in\n"
+     << "/// instruction matching.\n";
+  OS << "enum MatchClassKind {\n";
+  OS << "  InvalidMatchClass = 0,\n";
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+         ie = Infos.end(); it != ie; ++it) {
+    ClassInfo &CI = **it;
+    OS << "  " << CI.Name << ", // ";
+    if (CI.Kind == ClassInfo::Token) {
+      OS << "'" << CI.ValueName << "'\n";
+    } else if (CI.isRegisterClass()) {
+      if (!CI.ValueName.empty())
+        OS << "register class '" << CI.ValueName << "'\n";
+      else
+        OS << "derived register class\n";
+    } else {
+      OS << "user defined class '" << CI.ValueName << "'\n";
+    }
+  }
+  OS << "  NumMatchClassKinds\n";
+  OS << "};\n\n";
+
+  OS << "}\n\n";
+}
+
+/// EmitClassifyOperand - Emit the function to classify an operand.
+static void EmitClassifyOperand(CodeGenTarget &Target,
+                                AsmMatcherInfo &Info,
+                                raw_ostream &OS) {
+  OS << "static MatchClassKind ClassifyOperand("
+     << Target.getName() << "Operand &Operand) {\n";
+
+  // Classify tokens.
+  OS << "  if (Operand.isToken())\n";
+  OS << "    return MatchTokenString(Operand.getToken());\n\n";
+
+  // Classify registers.
+  //
+  // FIXME: Don't hardcode isReg, getReg.
+  OS << "  if (Operand.isReg()) {\n";
+  OS << "    switch (Operand.getReg()) {\n";
+  OS << "    default: return InvalidMatchClass;\n";
+  for (std::map<Record*, ClassInfo*>::iterator 
+         it = Info.RegisterClasses.begin(), ie = Info.RegisterClasses.end();
+       it != ie; ++it)
+    OS << "    case " << Target.getName() << "::" 
+       << it->first->getName() << ": return " << it->second->Name << ";\n";
+  OS << "    }\n";
+  OS << "  }\n\n";
+
+  // Classify user defined operands.
+  for (std::vector<ClassInfo*>::iterator it = Info.Classes.begin(), 
+         ie = Info.Classes.end(); it != ie; ++it) {
+    ClassInfo &CI = **it;
+
+    if (!CI.isUserClass())
+      continue;
+
+    OS << "  // '" << CI.ClassName << "' class";
+    if (!CI.SuperClasses.empty()) {
+      OS << ", subclass of ";
+      for (unsigned i = 0, e = CI.SuperClasses.size(); i != e; ++i) {
+        if (i) OS << ", ";
+        OS << "'" << CI.SuperClasses[i]->ClassName << "'";
+        assert(CI < *CI.SuperClasses[i] && "Invalid class relation!");
+      }
+    }
+    OS << "\n";
+
+    OS << "  if (Operand." << CI.PredicateMethod << "()) {\n";
+      
+    // Validate subclass relationships.
+    if (!CI.SuperClasses.empty()) {
+      for (unsigned i = 0, e = CI.SuperClasses.size(); i != e; ++i)
+        OS << "    assert(Operand." << CI.SuperClasses[i]->PredicateMethod
+           << "() && \"Invalid class relationship!\");\n";
+    }
+
+    OS << "    return " << CI.Name << ";\n";
+    OS << "  }\n\n";
+  }
+  OS << "  return InvalidMatchClass;\n";
+  OS << "}\n\n";
+}
+
+/// EmitIsSubclass - Emit the subclass predicate function.
+static void EmitIsSubclass(CodeGenTarget &Target,
+                           std::vector<ClassInfo*> &Infos,
+                           raw_ostream &OS) {
+  OS << "/// IsSubclass - Compute whether \\arg A is a subclass of \\arg B.\n";
+  OS << "static bool IsSubclass(MatchClassKind A, MatchClassKind B) {\n";
+  OS << "  if (A == B)\n";
+  OS << "    return true;\n\n";
+
+  OS << "  switch (A) {\n";
+  OS << "  default:\n";
+  OS << "    return false;\n";
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+         ie = Infos.end(); it != ie; ++it) {
+    ClassInfo &A = **it;
+
+    if (A.Kind != ClassInfo::Token) {
+      std::vector<StringRef> SuperClasses;
+      for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+             ie = Infos.end(); it != ie; ++it) {
+        ClassInfo &B = **it;
+
+        if (&A != &B && A.isSubsetOf(B))
+          SuperClasses.push_back(B.Name);
+      }
+
+      if (SuperClasses.empty())
+        continue;
+
+      OS << "\n  case " << A.Name << ":\n";
+
+      if (SuperClasses.size() == 1) {
+        OS << "    return B == " << SuperClasses.back() << ";\n";
+        continue;
+      }
+
+      OS << "    switch (B) {\n";
+      OS << "    default: return false;\n";
+      for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
+        OS << "    case " << SuperClasses[i] << ": return true;\n";
+      OS << "    }\n";
+    }
+  }
+  OS << "  }\n";
+  OS << "}\n\n";
+}
+
+typedef std::pair<std::string, std::string> StringPair;
+
+/// FindFirstNonCommonLetter - Find the first character in the keys of the
+/// string pairs that is not shared across the whole set of strings.  All
+/// strings are assumed to have the same length.
+static unsigned 
+FindFirstNonCommonLetter(const std::vector<const StringPair*> &Matches) {
+  assert(!Matches.empty());
+  for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
+    // Check to see if letter i is the same across the set.
+    char Letter = Matches[0]->first[i];
+    
+    for (unsigned str = 0, e = Matches.size(); str != e; ++str)
+      if (Matches[str]->first[i] != Letter)
+        return i;
+  }
+  
+  return Matches[0]->first.size();
+}
+
+/// EmitStringMatcherForChar - Given a set of strings that are known to be the
+/// same length and whose characters leading up to CharNo are the same, emit
+/// code to verify that CharNo and later are the same.
+///
+/// \return - True if control can leave the emitted code fragment.
+static bool EmitStringMatcherForChar(const std::string &StrVariableName,
+                                  const std::vector<const StringPair*> &Matches,
+                                     unsigned CharNo, unsigned IndentCount,
+                                     raw_ostream &OS) {
+  assert(!Matches.empty() && "Must have at least one string to match!");
+  std::string Indent(IndentCount*2+4, ' ');
+
+  // If we have verified that the entire string matches, we're done: output the
+  // matching code.
+  if (CharNo == Matches[0]->first.size()) {
+    assert(Matches.size() == 1 && "Had duplicate keys to match on");
+    
+    // FIXME: If Matches[0].first has embeded \n, this will be bad.
+    OS << Indent << Matches[0]->second << "\t // \"" << Matches[0]->first
+       << "\"\n";
+    return false;
+  }
+  
+  // Bucket the matches by the character we are comparing.
+  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
+  
+  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+    MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
+  
+
+  // If we have exactly one bucket to match, see how many characters are common
+  // across the whole set and match all of them at once.
+  if (MatchesByLetter.size() == 1) {
+    unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
+    unsigned NumChars = FirstNonCommonLetter-CharNo;
+    
+    // Emit code to break out if the prefix doesn't match.
+    if (NumChars == 1) {
+      // Do the comparison with if (Str[1] != 'f')
+      // FIXME: Need to escape general characters.
+      OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
+         << Matches[0]->first[CharNo] << "')\n";
+      OS << Indent << "  break;\n";
+    } else {
+      // Do the comparison with if (Str.substr(1,3) != "foo").    
+      // FIXME: Need to escape general strings.
+      OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ","
+         << NumChars << ") != \"";
+      OS << Matches[0]->first.substr(CharNo, NumChars) << "\")\n";
+      OS << Indent << "  break;\n";
+    }
+    
+    return EmitStringMatcherForChar(StrVariableName, Matches, 
+                                    FirstNonCommonLetter, IndentCount, OS);
+  }
+  
+  // Otherwise, we have multiple possible things, emit a switch on the
+  // character.
+  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
+  OS << Indent << "default: break;\n";
+  
+  for (std::map<char, std::vector<const StringPair*> >::iterator LI = 
+       MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
+    // TODO: escape hard stuff (like \n) if we ever care about it.
+    OS << Indent << "case '" << LI->first << "':\t // "
+       << LI->second.size() << " strings to match.\n";
+    if (EmitStringMatcherForChar(StrVariableName, LI->second, CharNo+1,
+                                 IndentCount+1, OS))
+      OS << Indent << "  break;\n";
+  }
+  
+  OS << Indent << "}\n";
+  return true;
+}
+
+
+/// EmitStringMatcher - Given a list of strings and code to execute when they
+/// match, output a simple switch tree to classify the input string.
+/// 
+/// If a match is found, the code in Vals[i].second is executed; control must
+/// not exit this code fragment.  If nothing matches, execution falls through.
+///
+/// \param StrVariableName - The name of the variable to test.
+static void EmitStringMatcher(const std::string &StrVariableName,
+                              const std::vector<StringPair> &Matches,
+                              raw_ostream &OS) {
+  // First level categorization: group strings by length.
+  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
+  
+  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+    MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
+  
+  // Output a switch statement on length and categorize the elements within each
+  // bin.
+  OS << "  switch (" << StrVariableName << ".size()) {\n";
+  OS << "  default: break;\n";
+  
+  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
+       MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
+    OS << "  case " << LI->first << ":\t // " << LI->second.size()
+       << " strings to match.\n";
+    if (EmitStringMatcherForChar(StrVariableName, LI->second, 0, 0, OS))
+      OS << "    break;\n";
+  }
+  
+  OS << "  }\n";
+}
+
+
+/// EmitMatchTokenString - Emit the function to match a token string to the
+/// appropriate match class value.
+static void EmitMatchTokenString(CodeGenTarget &Target,
+                                 std::vector<ClassInfo*> &Infos,
+                                 raw_ostream &OS) {
+  // Construct the match list.
+  std::vector<StringPair> Matches;
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+         ie = Infos.end(); it != ie; ++it) {
+    ClassInfo &CI = **it;
+
+    if (CI.Kind == ClassInfo::Token)
+      Matches.push_back(StringPair(CI.ValueName, "return " + CI.Name + ";"));
+  }
+
+  OS << "static MatchClassKind MatchTokenString(const StringRef &Name) {\n";
+
+  EmitStringMatcher("Name", Matches, OS);
+
+  OS << "  return InvalidMatchClass;\n";
+  OS << "}\n\n";
+}
+
+/// EmitMatchRegisterName - Emit the function to match a string to the target
+/// specific register enum.
+static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
+                                  raw_ostream &OS) {
+  // Construct the match list.
+  std::vector<StringPair> Matches;
+  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Target.getRegisters()[i];
+    if (Reg.TheDef->getValueAsString("AsmName").empty())
+      continue;
+
+    Matches.push_back(StringPair(Reg.TheDef->getValueAsString("AsmName"),
+                                 "return " + utostr(i + 1) + ";"));
+  }
+  
+  OS << "unsigned " << Target.getName() 
+     << AsmParser->getValueAsString("AsmParserClassName")
+     << "::MatchRegisterName(const StringRef &Name) {\n";
+
+  EmitStringMatcher("Name", Matches, OS);
+  
+  OS << "  return 0;\n";
+  OS << "}\n\n";
+}
+
+void AsmMatcherEmitter::run(raw_ostream &OS) {
+  CodeGenTarget Target;
+  Record *AsmParser = Target.getAsmParser();
+  std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
+
+  // Compute the information on the instructions to match.
+  AsmMatcherInfo Info(AsmParser);
+  Info.BuildInfo(Target);
+
+  // Sort the instruction table using the partial order on classes.
+  std::sort(Info.Instructions.begin(), Info.Instructions.end(),
+            less_ptr<InstructionInfo>());
+  
+  DEBUG_WITH_TYPE("instruction_info", {
+      for (std::vector<InstructionInfo*>::iterator 
+             it = Info.Instructions.begin(), ie = Info.Instructions.end(); 
+           it != ie; ++it)
+        (*it)->dump();
+    });
+
+  // Check for ambiguous instructions.
+  unsigned NumAmbiguous = 0;
+  for (unsigned i = 0, e = Info.Instructions.size(); i != e; ++i) {
+    for (unsigned j = i + 1; j != e; ++j) {
+      InstructionInfo &A = *Info.Instructions[i];
+      InstructionInfo &B = *Info.Instructions[j];
+    
+      if (A.CouldMatchAmiguouslyWith(B)) {
+        DEBUG_WITH_TYPE("ambiguous_instrs", {
+            errs() << "warning: ambiguous instruction match:\n";
+            A.dump();
+            errs() << "\nis incomparable with:\n";
+            B.dump();
+            errs() << "\n\n";
+          });
+        ++NumAmbiguous;
+      }
+    }
+  }
+  if (NumAmbiguous)
+    DEBUG_WITH_TYPE("ambiguous_instrs", {
+        errs() << "warning: " << NumAmbiguous 
+               << " ambiguous instructions!\n";
+      });
+
+  // Write the output.
+
+  EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
+
+  // Emit the function to match a register name to number.
+  EmitMatchRegisterName(Target, AsmParser, OS);
+
+  // Generate the unified function to convert operands into an MCInst.
+  EmitConvertToMCInst(Target, Info.Instructions, OS);
+
+  // Emit the enumeration for classes which participate in matching.
+  EmitMatchClassEnumeration(Target, Info.Classes, OS);
+
+  // Emit the routine to match token strings to their match class.
+  EmitMatchTokenString(Target, Info.Classes, OS);
+
+  // Emit the routine to classify an operand.
+  EmitClassifyOperand(Target, Info, OS);
+
+  // Emit the subclass predicate routine.
+  EmitIsSubclass(Target, Info.Classes, OS);
+
+  // Finally, build the match function.
+
+  size_t MaxNumOperands = 0;
+  for (std::vector<InstructionInfo*>::const_iterator it =
+         Info.Instructions.begin(), ie = Info.Instructions.end();
+       it != ie; ++it)
+    MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size());
+  
+  OS << "bool " << Target.getName() << ClassName
+     << "::MatchInstruction(" 
+     << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, "
+     << "MCInst &Inst) {\n";
+
+  // Emit the static match table; unused classes get initalized to 0 which is
+  // guaranteed to be InvalidMatchClass.
+  //
+  // FIXME: We can reduce the size of this table very easily. First, we change
+  // it so that store the kinds in separate bit-fields for each index, which
+  // only needs to be the max width used for classes at that index (we also need
+  // to reject based on this during classification). If we then make sure to
+  // order the match kinds appropriately (putting mnemonics last), then we
+  // should only end up using a few bits for each class, especially the ones
+  // following the mnemonic.
+  OS << "  static const struct MatchEntry {\n";
+  OS << "    unsigned Opcode;\n";
+  OS << "    ConversionKind ConvertFn;\n";
+  OS << "    MatchClassKind Classes[" << MaxNumOperands << "];\n";
+  OS << "  } MatchTable[" << Info.Instructions.size() << "] = {\n";
+
+  for (std::vector<InstructionInfo*>::const_iterator it =
+         Info.Instructions.begin(), ie = Info.Instructions.end();
+       it != ie; ++it) {
+    InstructionInfo &II = **it;
+
+    OS << "    { " << Target.getName() << "::" << II.InstrName
+       << ", " << II.ConversionFnKind << ", { ";
+    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[i];
+      
+      if (i) OS << ", ";
+      OS << Op.Class->Name;
+    }
+    OS << " } },\n";
+  }
+
+  OS << "  };\n\n";
+
+  // Emit code to compute the class list for this operand vector.
+  OS << "  // Eliminate obvious mismatches.\n";
+  OS << "  if (Operands.size() > " << MaxNumOperands << ")\n";
+  OS << "    return true;\n\n";
+
+  OS << "  // Compute the class list for this operand vector.\n";
+  OS << "  MatchClassKind Classes[" << MaxNumOperands << "];\n";
+  OS << "  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {\n";
+  OS << "    Classes[i] = ClassifyOperand(Operands[i]);\n\n";
+
+  OS << "    // Check for invalid operands before matching.\n";
+  OS << "    if (Classes[i] == InvalidMatchClass)\n";
+  OS << "      return true;\n";
+  OS << "  }\n\n";
+
+  OS << "  // Mark unused classes.\n";
+  OS << "  for (unsigned i = Operands.size(), e = " << MaxNumOperands << "; "
+     << "i != e; ++i)\n";
+  OS << "    Classes[i] = InvalidMatchClass;\n\n";
+
+  // Emit code to search the table.
+  OS << "  // Search the table.\n";
+  OS << "  for (const MatchEntry *it = MatchTable, "
+     << "*ie = MatchTable + " << Info.Instructions.size()
+     << "; it != ie; ++it) {\n";
+  for (unsigned i = 0; i != MaxNumOperands; ++i) {
+    OS << "    if (!IsSubclass(Classes[" 
+       << i << "], it->Classes[" << i << "]))\n";
+    OS << "      continue;\n";
+  }
+  OS << "\n";
+  OS << "    return ConvertToMCInst(it->ConvertFn, Inst, "
+     << "it->Opcode, Operands);\n";
+  OS << "  }\n\n";
+
+  OS << "  return true;\n";
+  OS << "}\n\n";
+}
diff --git a/utils/TableGen/AsmMatcherEmitter.h b/utils/TableGen/AsmMatcherEmitter.h
new file mode 100644
index 000000000000..729c938fcd36
--- /dev/null
+++ b/utils/TableGen/AsmMatcherEmitter.h
@@ -0,0 +1,33 @@
+//===- AsmMatcherEmitter.h - Generate an assembly matcher -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a target specifier matcher for converting parsed
+// assembly operands in the MCInst structures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMMATCHER_EMITTER_H
+#define ASMMATCHER_EMITTER_H
+
+#include "TableGenBackend.h"
+#include <map>
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+  class AsmMatcherEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+  public:
+    AsmMatcherEmitter(RecordKeeper &R) : Records(R) {}
+
+    // run - Output the matcher, returning true on failure.
+    void run(raw_ostream &o);
+  };
+}
+#endif
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index f34feef0c28d..84a647bea309 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -15,13 +15,14 @@
 #include "AsmWriterEmitter.h"
 #include "CodeGenTarget.h"
 #include "Record.h"
+#include "StringToOffsetTable.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
-#include <iostream>
 using namespace llvm;
 
+
 static bool isIdentChar(char C) {
   return (C >= 'a' && C <= 'z') ||
          (C >= 'A' && C <= 'Z') ||
@@ -32,10 +33,20 @@ static bool isIdentChar(char C) {
 // This should be an anon namespace, this works around a GCC warning.
 namespace llvm {  
   struct AsmWriterOperand {
-    enum { isLiteralTextOperand, isMachineInstrOperand } OperandType;
+    enum OpType {
+      // Output this text surrounded by quotes to the asm.
+      isLiteralTextOperand, 
+      // This is the name of a routine to call to print the operand.
+      isMachineInstrOperand,
+      // Output this text verbatim to the asm writer.  It is code that
+      // will output some text to the asm.
+      isLiteralStatementOperand
+    } OperandType;
 
     /// Str - For isLiteralTextOperand, this IS the literal text.  For
-    /// isMachineInstrOperand, this is the PrinterMethodName for the operand.
+    /// isMachineInstrOperand, this is the PrinterMethodName for the operand..
+    /// For isLiteralStatementOperand, this is the code to insert verbatim 
+    /// into the asm writer.
     std::string Str;
 
     /// MiOpNo - For isMachineInstrOperand, this is the operand number of the
@@ -47,14 +58,16 @@ namespace llvm {
     std::string MiModifier;
 
     // To make VS STL happy
-    AsmWriterOperand():OperandType(isLiteralTextOperand) {}
+    AsmWriterOperand(OpType op = isLiteralTextOperand):OperandType(op) {}
 
-    explicit AsmWriterOperand(const std::string &LitStr)
-      : OperandType(isLiteralTextOperand), Str(LitStr) {}
+    AsmWriterOperand(const std::string &LitStr,
+                     OpType op = isLiteralTextOperand)
+      : OperandType(op), Str(LitStr) {}
 
     AsmWriterOperand(const std::string &Printer, unsigned OpNo, 
-                     const std::string &Modifier) 
-      : OperandType(isMachineInstrOperand), Str(Printer), MIOpNo(OpNo),
+                     const std::string &Modifier,
+                     OpType op = isMachineInstrOperand) 
+      : OperandType(op), Str(Printer), MIOpNo(OpNo),
       MiModifier(Modifier) {}
 
     bool operator!=(const AsmWriterOperand &Other) const {
@@ -78,7 +91,7 @@ namespace llvm {
     std::vector<AsmWriterOperand> Operands;
     const CodeGenInstruction *CGI;
 
-    AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant);
+    AsmWriterInst(const CodeGenInstruction &CGI, Record *AsmWriter);
 
     /// MatchesAllButOneOp - If this instruction is exactly identical to the
     /// specified instruction except for one differing operand, return the
@@ -100,8 +113,14 @@ namespace llvm {
 
 
 std::string AsmWriterOperand::getCode() const {
-  if (OperandType == isLiteralTextOperand)
+  if (OperandType == isLiteralTextOperand) {
+    if (Str.size() == 1)
+      return "O << '" + Str + "'; ";
     return "O << \"" + Str + "\"; ";
+  }
+
+  if (OperandType == isLiteralStatementOperand)
+    return Str;
 
   std::string Result = Str + "(MI";
   if (MIOpNo != ~0U)
@@ -115,10 +134,19 @@ std::string AsmWriterOperand::getCode() const {
 /// ParseAsmString - Parse the specified Instruction's AsmString into this
 /// AsmWriterInst.
 ///
-AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
+AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, Record *AsmWriter) {
   this->CGI = &CGI;
+  
+  unsigned Variant       = AsmWriter->getValueAsInt("Variant");
+  int FirstOperandColumn = AsmWriter->getValueAsInt("FirstOperandColumn");
+  int OperandSpacing     = AsmWriter->getValueAsInt("OperandSpacing");
+  
   unsigned CurVariant = ~0U;  // ~0 if we are outside a {.|.|.} region, other #.
 
+  // This is the number of tabs we've seen if we're doing columnar layout.
+  unsigned CurColumn = 0;
+  
+  
   // NOTE: Any extensions to this code need to be mirrored in the 
   // AsmPrinter::printInlineAsm code that executes as compile time (assuming
   // that inline asm strings should also get the new feature)!
@@ -130,14 +158,35 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
     if (DollarPos == std::string::npos) DollarPos = AsmString.size();
 
     // Emit a constant string fragment.
+
     if (DollarPos != LastEmitted) {
       if (CurVariant == Variant || CurVariant == ~0U) {
         for (; LastEmitted != DollarPos; ++LastEmitted)
           switch (AsmString[LastEmitted]) {
-          case '\n': AddLiteralString("\\n"); break;
-          case '\t': AddLiteralString("\\t"); break;
-          case '"': AddLiteralString("\\\""); break;
-          case '\\': AddLiteralString("\\\\"); break;
+          case '\n':
+            AddLiteralString("\\n");
+            break;
+          case '\t':
+            // If the asm writer is not using a columnar layout, \t is not
+            // magic.
+            if (FirstOperandColumn == -1 || OperandSpacing == -1) {
+              AddLiteralString("\\t");
+            } else {
+              // We recognize a tab as an operand delimeter.
+              unsigned DestColumn = FirstOperandColumn + 
+                                    CurColumn++ * OperandSpacing;
+              Operands.push_back(
+                AsmWriterOperand("O.PadToColumn(" +
+                                 utostr(DestColumn) + ");\n",
+                                 AsmWriterOperand::isLiteralStatementOperand));
+            }
+            break;
+          case '"':
+            AddLiteralString("\\\"");
+            break;
+          case '\\':
+            AddLiteralString("\\\\");
+            break;
           default:
             AddLiteralString(std::string(1, AsmString[LastEmitted]));
             break;
@@ -151,7 +200,20 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
         if (AsmString[DollarPos+1] == 'n') {
           AddLiteralString("\\n");
         } else if (AsmString[DollarPos+1] == 't') {
-          AddLiteralString("\\t");
+          // If the asm writer is not using a columnar layout, \t is not
+          // magic.
+          if (FirstOperandColumn == -1 || OperandSpacing == -1) {
+            AddLiteralString("\\t");
+            break;
+          }
+            
+          // We recognize a tab as an operand delimeter.
+          unsigned DestColumn = FirstOperandColumn + 
+                                CurColumn++ * OperandSpacing;
+          Operands.push_back(
+            AsmWriterOperand("O.PadToColumn(" + utostr(DestColumn) + ");\n",
+                             AsmWriterOperand::isLiteralStatementOperand));
+          break;
         } else if (std::string("${|}\\").find(AsmString[DollarPos+1]) 
                    != std::string::npos) {
           AddLiteralString(std::string(1, AsmString[DollarPos+1]));
@@ -182,13 +244,14 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
       CurVariant = ~0U;
     } else if (DollarPos+1 != AsmString.size() &&
                AsmString[DollarPos+1] == '$') {
-      if (CurVariant == Variant || CurVariant == ~0U) 
+      if (CurVariant == Variant || CurVariant == ~0U) {
         AddLiteralString("$");  // "$$" -> $
+      }
       LastEmitted = DollarPos+2;
     } else {
       // Get the name of the variable.
       std::string::size_type VarEnd = DollarPos+1;
-
+ 
       // handle ${foo}bar as $foo by detecting whether the character following
       // the dollar sign is a curly brace.  If so, advance VarEnd and DollarPos
       // so the variable name does not contain the leading curly brace.
@@ -259,8 +322,9 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
       LastEmitted = VarEnd;
     }
   }
-
-  AddLiteralString("\\n");
+  
+  Operands.push_back(AsmWriterOperand("return;",
+                                  AsmWriterOperand::isLiteralStatementOperand));
 }
 
 /// MatchesAllButOneOp - If this instruction is exactly identical to the
@@ -357,7 +421,6 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
     }
     O << "\n";
   }
-
   O << "    break;\n";
 }
 
@@ -384,10 +447,6 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
 
     Command = "    " + Inst->Operands[0].getCode() + "\n";
 
-    // If this is the last operand, emit a return.
-    if (Inst->Operands.size() == 1)
-      Command += "    return true;\n";
-    
     // Check to see if we already have 'Command' in UniqueOperandCommands.
     // If not, add it.
     bool FoundIt = false;
@@ -431,7 +490,10 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
       // Otherwise, scan to see if all of the other instructions in this command
       // set share the operand.
       bool AllSame = true;
-      
+      // Keep track of the maximum, number of operands or any
+      // instruction we see in the group.
+      size_t MaxSize = FirstInst->Operands.size();
+
       for (NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx);
            NIT != InstIdxs.end();
            NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) {
@@ -439,6 +501,11 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
         // matches, we're ok, otherwise bail out.
         const AsmWriterInst *OtherInst = 
           getAsmWriterInstByID(NIT-InstIdxs.begin());
+
+        if (OtherInst &&
+            OtherInst->Operands.size() > FirstInst->Operands.size())
+          MaxSize = std::max(MaxSize, OtherInst->Operands.size());
+
         if (!OtherInst || OtherInst->Operands.size() == Op ||
             OtherInst->Operands[Op] != FirstInst->Operands[Op]) {
           AllSame = false;
@@ -451,10 +518,6 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
       // to UniqueOperandCommands and remember that it was consumed.
       std::string Command = "    " + FirstInst->Operands[Op].getCode() + "\n";
       
-      // If this is the last operand, emit a return after the code.
-      if (FirstInst->Operands.size() == Op+1)
-        Command += "    return true;\n";
-      
       UniqueOperandCommands[CommandIdx] += Command;
       InstOpsUsed[CommandIdx]++;
     }
@@ -475,29 +538,26 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
 }
 
 
-
-void AsmWriterEmitter::run(raw_ostream &O) {
-  EmitSourceFileHeader("Assembly Writer Source Fragment", O);
-
+/// EmitPrintInstruction - Generate the code for the "printInstruction" method
+/// implementation.
+void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   CodeGenTarget Target;
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
-  unsigned Variant = AsmWriter->getValueAsInt("Variant");
-
+  
   O <<
   "/// printInstruction - This method is automatically generated by tablegen\n"
-  "/// from the instruction set description.  This method returns true if the\n"
-  "/// machine instruction was sufficiently described to print it, otherwise\n"
-  "/// it returns false.\n"
-    "bool " << Target.getName() << ClassName
+  "/// from the instruction set description.\n"
+    "void " << Target.getName() << ClassName
             << "::printInstruction(const MachineInstr *MI) {\n";
 
   std::vector<AsmWriterInst> Instructions;
 
   for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
          E = Target.inst_end(); I != E; ++I)
-    if (!I->second.AsmString.empty())
-      Instructions.push_back(AsmWriterInst(I->second, Variant));
+    if (!I->second.AsmString.empty() &&
+        I->second.TheDef->getName() != "PHI")
+      Instructions.push_back(AsmWriterInst(I->second, AsmWriter));
 
   // Get the instruction numbering.
   Target.getInstructionsByEnumValue(NumberedInstructions);
@@ -509,10 +569,7 @@ void AsmWriterEmitter::run(raw_ostream &O) {
     CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i]));
 
   // Build an aggregate string, and build a table of offsets into it.
-  std::map<std::string, unsigned> StringOffset;
-  std::string AggregateString;
-  AggregateString.push_back(0);  // "\0"
-  AggregateString.push_back(0);  // "\0"
+  StringToOffsetTable StringTable;
   
   /// OpcodeInfo - This encodes the index of the string to use for the first
   /// chunk of the output as well as indices used for operand printing.
@@ -524,32 +581,28 @@ void AsmWriterEmitter::run(raw_ostream &O) {
     unsigned Idx;
     if (AWI == 0) {
       // Something not handled by the asmwriter printer.
-      Idx = 0;
+      Idx = ~0U;
     } else if (AWI->Operands[0].OperandType != 
                         AsmWriterOperand::isLiteralTextOperand ||
                AWI->Operands[0].Str.empty()) {
       // Something handled by the asmwriter printer, but with no leading string.
-      Idx = 1;
+      Idx = StringTable.GetOrAddStringOffset("");
     } else {
-      unsigned &Entry = StringOffset[AWI->Operands[0].Str];
-      if (Entry == 0) {
-        // Add the string to the aggregate if this is the first time found.
-        MaxStringIdx = Entry = AggregateString.size();
-        std::string Str = AWI->Operands[0].Str;
-        UnescapeString(Str);
-        AggregateString += Str;
-        AggregateString += '\0';
-      }
-      Idx = Entry;
-
+      std::string Str = AWI->Operands[0].Str;
+      UnescapeString(Str);
+      Idx = StringTable.GetOrAddStringOffset(Str);
+      MaxStringIdx = std::max(MaxStringIdx, Idx);
+      
       // Nuke the string from the operand list.  It is now handled!
       AWI->Operands.erase(AWI->Operands.begin());
     }
-    OpcodeInfo.push_back(Idx);
+    
+    // Bias offset by one since we want 0 as a sentinel.
+    OpcodeInfo.push_back(Idx+1);
   }
   
   // Figure out how many bits we used for the string index.
-  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+1);
+  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+2);
   
   // To reduce code size, we compactify common instructions into a few bits
   // in the opcode-indexed table.
@@ -557,17 +610,8 @@ void AsmWriterEmitter::run(raw_ostream &O) {
 
   std::vector<std::vector<std::string> > TableDrivenOperandPrinters;
   
-  bool isFirst = true;
   while (1) {
     std::vector<std::string> UniqueOperandCommands;
-
-    // For the first operand check, add a default value for instructions with
-    // just opcode strings to use.
-    if (isFirst) {
-      UniqueOperandCommands.push_back("    return true;\n");
-      isFirst = false;
-    }
-    
     std::vector<unsigned> InstIdxs;
     std::vector<unsigned> NumInstOpsHandled;
     FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs,
@@ -582,8 +626,8 @@ void AsmWriterEmitter::run(raw_ostream &O) {
     
     // If we don't have enough bits for this operand, don't include it.
     if (NumBits > BitsLeft) {
-      DOUT << "Not enough bits to densely encode " << NumBits
-           << " more bits\n";
+      DEBUG(errs() << "Not enough bits to densely encode " << NumBits
+                   << " more bits\n");
       break;
     }
     
@@ -621,52 +665,24 @@ void AsmWriterEmitter::run(raw_ostream &O) {
   O << "  };\n\n";
   
   // Emit the string itself.
-  O << "  const char *AsmStrs = \n    \"";
-  unsigned CharsPrinted = 0;
-  EscapeString(AggregateString);
-  for (unsigned i = 0, e = AggregateString.size(); i != e; ++i) {
-    if (CharsPrinted > 70) {
-      O << "\"\n    \"";
-      CharsPrinted = 0;
-    }
-    O << AggregateString[i];
-    ++CharsPrinted;
-    
-    // Print escape sequences all together.
-    if (AggregateString[i] == '\\') {
-      assert(i+1 < AggregateString.size() && "Incomplete escape sequence!");
-      if (isdigit(AggregateString[i+1])) {
-        assert(isdigit(AggregateString[i+2]) && isdigit(AggregateString[i+3]) &&
-               "Expected 3 digit octal escape!");
-        O << AggregateString[++i];
-        O << AggregateString[++i];
-        O << AggregateString[++i];
-        CharsPrinted += 3;
-      } else {
-        O << AggregateString[++i];
-        ++CharsPrinted;
-      }
-    }
-  }
-  O << "\";\n\n";
-
-  O << "  processDebugLoc(MI->getDebugLoc());\n\n";
+  O << "  const char *AsmStrs = \n";
+  StringTable.EmitString(O);
+  O << ";\n\n";
 
   O << "\n#ifndef NO_ASM_WRITER_BOILERPLATE\n";
   
   O << "  if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {\n"
     << "    O << \"\\t\";\n"
     << "    printInlineAsm(MI);\n"
-    << "    return true;\n"
+    << "    return;\n"
     << "  } else if (MI->isLabel()) {\n"
     << "    printLabel(MI);\n"
-    << "    return true;\n"
-    << "  } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {\n"
-    << "    printDeclare(MI);\n"
-    << "    return true;\n"
+    << "    return;\n"
     << "  } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {\n"
     << "    printImplicitDef(MI);\n"
-    << "    return true;\n"
+    << "    return;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::KILL) {\n"
+    << "    return;\n"
     << "  }\n\n";
 
   O << "\n#endif\n";
@@ -675,8 +691,8 @@ void AsmWriterEmitter::run(raw_ostream &O) {
 
   O << "  // Emit the opcode for the instruction.\n"
     << "  unsigned Bits = OpInfo[MI->getOpcode()];\n"
-    << "  if (Bits == 0) return false;\n"
-    << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ");\n\n";
+    << "  assert(Bits != 0 && \"Cannot print this instruction.\");\n"
+    << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n";
 
   // Output the table driven operand information.
   BitsLeft = 32-AsmStrBits;
@@ -732,6 +748,11 @@ void AsmWriterEmitter::run(raw_ostream &O) {
   // elements in the vector.
   std::reverse(Instructions.begin(), Instructions.end());
   
+  
+  // Now that we've emitted all of the operand info that fit into 32 bits, emit
+  // information for those instructions that are left.  This is a less dense
+  // encoding, but we expect the main 32-bit table to handle the majority of
+  // instructions.
   if (!Instructions.empty()) {
     // Find the opcode # of inline asm.
     O << "  switch (MI->getOpcode()) {\n";
@@ -739,8 +760,61 @@ void AsmWriterEmitter::run(raw_ostream &O) {
       EmitInstructions(Instructions, O);
 
     O << "  }\n";
-    O << "  return true;\n";
+    O << "  return;\n";
   }
-  
+
+  O << "  return;\n";
   O << "}\n";
 }
+
+
+void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
+  CodeGenTarget Target;
+  Record *AsmWriter = Target.getAsmWriter();
+  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  
+  StringToOffsetTable StringTable;
+  O <<
+  "\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
+  "/// from the register set description.  This returns the assembler name\n"
+  "/// for the specified register.\n"
+  "const char *" << Target.getName() << ClassName
+  << "::getRegisterName(unsigned RegNo) {\n"
+  << "  assert(RegNo && RegNo < " << (Registers.size()+1)
+  << " && \"Invalid register number!\");\n"
+  << "\n"
+  << "  static const unsigned RegAsmOffset[] = {";
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Registers[i];
+
+    std::string AsmName = Reg.TheDef->getValueAsString("AsmName");
+    if (AsmName.empty())
+      AsmName = Reg.getName();
+    
+    
+    if ((i % 14) == 0)
+      O << "\n    ";
+    
+    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
+  }
+  O << "0\n"
+    << "  };\n"
+    << "\n";
+  
+  O << "  const char *AsmStrs =\n";
+  StringTable.EmitString(O);
+  O << ";\n";
+  
+  O << "  return AsmStrs+RegAsmOffset[RegNo-1];\n"
+    << "}\n";
+}
+
+
+void AsmWriterEmitter::run(raw_ostream &O) {
+  EmitSourceFileHeader("Assembly Writer Source Fragment", O);
+  
+  EmitPrintInstruction(O);
+  EmitGetRegisterName(O);
+}
+
diff --git a/utils/TableGen/AsmWriterEmitter.h b/utils/TableGen/AsmWriterEmitter.h
index 75e69964ef89..7862caa25a8a 100644
--- a/utils/TableGen/AsmWriterEmitter.h
+++ b/utils/TableGen/AsmWriterEmitter.h
@@ -35,6 +35,9 @@ namespace llvm {
     void run(raw_ostream &o);
 
 private:
+    void EmitPrintInstruction(raw_ostream &o);
+    void EmitGetRegisterName(raw_ostream &o);
+    
     AsmWriterInst *getAsmWriterInstByID(unsigned ID) const {
       assert(ID < NumberedInstructions.size());
       std::map<const CodeGenInstruction*, AsmWriterInst*>::const_iterator I =
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 6ec1d9967922..e568c62795db 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_executable(tblgen
+  AsmMatcherEmitter.cpp
   AsmWriterEmitter.cpp
   CallingConvEmitter.cpp
   ClangDiagnosticsEmitter.cpp
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index a14be0b76fd6..28ba2ed49fc1 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -26,9 +26,9 @@ void CallingConvEmitter::run(raw_ostream &O) {
   // other.
   for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
     O << "static bool " << CCs[i]->getName()
-      << "(unsigned ValNo, MVT ValVT,\n"
+      << "(unsigned ValNo, EVT ValVT,\n"
       << std::string(CCs[i]->getName().size()+13, ' ')
-      << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+      << "EVT LocVT, CCValAssign::LocInfo LocInfo,\n"
       << std::string(CCs[i]->getName().size()+13, ' ')
       << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
   }
@@ -44,9 +44,9 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
   Counter = 0;
 
   O << "\n\nstatic bool " << CC->getName()
-    << "(unsigned ValNo, MVT ValVT,\n"
+    << "(unsigned ValNo, EVT ValVT,\n"
     << std::string(CC->getName().size()+13, ' ')
-    << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+    << "EVT LocVT, CCValAssign::LocInfo LocInfo,\n"
     << std::string(CC->getName().size()+13, ' ')
     << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
   // Emit all of the actions, in order.
@@ -163,12 +163,12 @@ void CallingConvEmitter::EmitAction(Record *Action,
         O << Size << ", ";
       else
         O << "\n" << IndentStr << "  State.getTarget().getTargetData()"
-          "->getTypeAllocSize(LocVT.getTypeForMVT()), ";
+          "->getTypeAllocSize(LocVT.getTypeForEVT(State.getContext())), ";
       if (Align)
         O << Align;
       else
         O << "\n" << IndentStr << "  State.getTarget().getTargetData()"
-          "->getABITypeAlignment(LocVT.getTypeForMVT())";
+          "->getABITypeAlignment(LocVT.getTypeForEVT(State.getContext()))";
       O << ");\n" << IndentStr
         << "State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset"
         << Counter << ", LocVT, LocInfo));\n";
@@ -186,6 +186,10 @@ void CallingConvEmitter::EmitAction(Record *Action,
       Record *DestTy = Action->getValueAsDef("DestTy");
       O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
       O << IndentStr << "LocInfo = CCValAssign::BCvt;\n";
+    } else if (Action->isSubClassOf("CCPassIndirect")) {
+      Record *DestTy = Action->getValueAsDef("DestTy");
+      O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
+      O << IndentStr << "LocInfo = CCValAssign::Indirect;\n";
     } else if (Action->isSubClassOf("CCPassByVal")) {
       int Size = Action->getValueAsInt("Size");
       int Align = Action->getValueAsInt("Align");
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 646602557b1f..7e6c769ac4f5 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -29,7 +29,7 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
         R->getName() == "DBG_LABEL" ||
         R->getName() == "EH_LABEL" ||
         R->getName() == "GC_LABEL" ||
-        R->getName() == "DECLARE" ||
+        R->getName() == "KILL" ||
         R->getName() == "EXTRACT_SUBREG" ||
         R->getName() == "INSERT_SUBREG" ||
         R->getName() == "IMPLICIT_DEF" ||
@@ -106,7 +106,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
         R->getName() == "DBG_LABEL" ||
         R->getName() == "EH_LABEL" ||
         R->getName() == "GC_LABEL" ||
-        R->getName() == "DECLARE" ||
+        R->getName() == "KILL" ||
         R->getName() == "EXTRACT_SUBREG" ||
         R->getName() == "INSERT_SUBREG" ||
         R->getName() == "IMPLICIT_DEF" ||
@@ -144,7 +144,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
         InstName == "DBG_LABEL"||
         InstName == "EH_LABEL"||
         InstName == "GC_LABEL"||
-        InstName == "DECLARE"||
+        InstName == "KILL"||
         InstName == "EXTRACT_SUBREG" ||
         InstName == "INSERT_SUBREG" ||
         InstName == "IMPLICIT_DEF" ||
@@ -243,8 +243,10 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
   // Default case: unhandled opcode
   o << "  default:\n"
-    << "    cerr << \"Not supported instr: \" << MI << \"\\n\";\n"
-    << "    abort();\n"
+    << "    std::string msg;\n"
+    << "    raw_string_ostream Msg(msg);\n"
+    << "    Msg << \"Not supported instr: \" << MI;\n"
+    << "    llvm_report_error(Msg.str());\n"
     << "  }\n"
     << "  return Value;\n"
     << "}\n\n";
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 2289ae789bdf..6b8ceaefa25e 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -55,15 +55,15 @@ ConvertVTs(const std::vector<MVT::SimpleValueType> &InVTs) {
 }
 
 static inline bool isInteger(MVT::SimpleValueType VT) {
-  return MVT(VT).isInteger();
+  return EVT(VT).isInteger();
 }
 
 static inline bool isFloatingPoint(MVT::SimpleValueType VT) {
-  return MVT(VT).isFloatingPoint();
+  return EVT(VT).isFloatingPoint();
 }
 
 static inline bool isVector(MVT::SimpleValueType VT) {
-  return MVT(VT).isVector();
+  return EVT(VT).isVector();
 }
 
 static bool LHSIsSubsetOfRHS(const std::vector<unsigned char> &LHS,
@@ -76,23 +76,33 @@ static bool LHSIsSubsetOfRHS(const std::vector<unsigned char> &LHS,
 }
 
 namespace llvm {
-namespace EMVT {
+namespace EEVT {
 /// isExtIntegerInVTs - Return true if the specified extended value type vector
-/// contains isInt or an integer value type.
+/// contains iAny or an integer value type.
 bool isExtIntegerInVTs(const std::vector<unsigned char> &EVTs) {
   assert(!EVTs.empty() && "Cannot check for integer in empty ExtVT list!");
-  return EVTs[0] == isInt || !(FilterEVTs(EVTs, isInteger).empty());
+  return EVTs[0] == MVT::iAny || !(FilterEVTs(EVTs, isInteger).empty());
 }
 
 /// isExtFloatingPointInVTs - Return true if the specified extended value type
-/// vector contains isFP or a FP value type.
+/// vector contains fAny or a FP value type.
 bool isExtFloatingPointInVTs(const std::vector<unsigned char> &EVTs) {
-  assert(!EVTs.empty() && "Cannot check for integer in empty ExtVT list!");
-  return EVTs[0] == isFP || !(FilterEVTs(EVTs, isFloatingPoint).empty());
+  assert(!EVTs.empty() && "Cannot check for FP in empty ExtVT list!");
+  return EVTs[0] == MVT::fAny || !(FilterEVTs(EVTs, isFloatingPoint).empty());
+}
+
+/// isExtVectorInVTs - Return true if the specified extended value type
+/// vector contains vAny or a vector value type.
+bool isExtVectorInVTs(const std::vector<unsigned char> &EVTs) {
+  assert(!EVTs.empty() && "Cannot check for vector in empty ExtVT list!");
+  return EVTs[0] == MVT::vAny || !(FilterEVTs(EVTs, isVector).empty());
 }
-} // end namespace EMVT.
+} // end namespace EEVT.
 } // end namespace llvm.
 
+bool RecordPtrCmp::operator()(const Record *LHS, const Record *RHS) const {
+  return LHS->getID() < RHS->getID();
+}
 
 /// Dependent variable map for CodeGenDAGPattern variant generation
 typedef std::map<std::string, int> DepVarMap;
@@ -128,14 +138,14 @@ void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
 //! Dump the dependent variable set:
 void DumpDepVars(MultipleUseVarSet &DepVars) {
   if (DepVars.empty()) {
-    DOUT << "<empty set>";
+    DEBUG(errs() << "<empty set>");
   } else {
-    DOUT << "[ ";
+    DEBUG(errs() << "[ ");
     for (MultipleUseVarSet::const_iterator i = DepVars.begin(), e = DepVars.end();
          i != e; ++i) {
-      DOUT << (*i) << " ";
+      DEBUG(errs() << (*i) << " ");
     }
-    DOUT << "]";
+    DEBUG(errs() << "]");
   }
 }
 }
@@ -183,6 +193,8 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
     ConstraintType = SDTCisInt;
   } else if (R->isSubClassOf("SDTCisFP")) {
     ConstraintType = SDTCisFP;
+  } else if (R->isSubClassOf("SDTCisVec")) {
+    ConstraintType = SDTCisVec;
   } else if (R->isSubClassOf("SDTCisSameAs")) {
     ConstraintType = SDTCisSameAs;
     x.SDTCisSameAs_Info.OtherOperandNum = R->getValueAsInt("OtherOperandNum");
@@ -264,7 +276,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     // If we found exactly one supported integer type, apply it.
     if (IntVTs.size() == 1)
       return NodeToApply->UpdateNodeType(IntVTs[0], TP);
-    return NodeToApply->UpdateNodeType(EMVT::isInt, TP);
+    return NodeToApply->UpdateNodeType(MVT::iAny, TP);
   }
   case SDTCisFP: {
     // If there is only one FP type supported, this must be it.
@@ -274,7 +286,17 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     // If we found exactly one supported FP type, apply it.
     if (FPVTs.size() == 1)
       return NodeToApply->UpdateNodeType(FPVTs[0], TP);
-    return NodeToApply->UpdateNodeType(EMVT::isFP, TP);
+    return NodeToApply->UpdateNodeType(MVT::fAny, TP);
+  }
+  case SDTCisVec: {
+    // If there is only one vector type supported, this must be it.
+    std::vector<MVT::SimpleValueType> VecVTs =
+      FilterVTs(CGT.getLegalValueTypes(), isVector);
+        
+    // If we found exactly one supported vector type, apply it.
+    if (VecVTs.size() == 1)
+      return NodeToApply->UpdateNodeType(VecVTs[0], TP);
+    return NodeToApply->UpdateNodeType(MVT::vAny, TP);
   }
   case SDTCisSameAs: {
     TreePatternNode *OtherNode =
@@ -300,7 +322,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     
     // It must be integer.
     bool MadeChange = false;
-    MadeChange |= OtherNode->UpdateNodeType(EMVT::isInt, TP);
+    MadeChange |= OtherNode->UpdateNodeType(MVT::iAny, TP);
     
     // This code only handles nodes that have one type set.  Assert here so
     // that we can change this if we ever need to deal with multiple value
@@ -320,25 +342,25 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     // This code does not currently handle nodes which have multiple types,
     // where some types are integer, and some are fp.  Assert that this is not
     // the case.
-    assert(!(EMVT::isExtIntegerInVTs(NodeToApply->getExtTypes()) &&
-             EMVT::isExtFloatingPointInVTs(NodeToApply->getExtTypes())) &&
-           !(EMVT::isExtIntegerInVTs(BigOperand->getExtTypes()) &&
-             EMVT::isExtFloatingPointInVTs(BigOperand->getExtTypes())) &&
+    assert(!(EEVT::isExtIntegerInVTs(NodeToApply->getExtTypes()) &&
+             EEVT::isExtFloatingPointInVTs(NodeToApply->getExtTypes())) &&
+           !(EEVT::isExtIntegerInVTs(BigOperand->getExtTypes()) &&
+             EEVT::isExtFloatingPointInVTs(BigOperand->getExtTypes())) &&
            "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
-    if (EMVT::isExtIntegerInVTs(NodeToApply->getExtTypes()))
-      MadeChange |= BigOperand->UpdateNodeType(EMVT::isInt, TP);
-    else if (EMVT::isExtFloatingPointInVTs(NodeToApply->getExtTypes()))
-      MadeChange |= BigOperand->UpdateNodeType(EMVT::isFP, TP);
-    if (EMVT::isExtIntegerInVTs(BigOperand->getExtTypes()))
-      MadeChange |= NodeToApply->UpdateNodeType(EMVT::isInt, TP);
-    else if (EMVT::isExtFloatingPointInVTs(BigOperand->getExtTypes()))
-      MadeChange |= NodeToApply->UpdateNodeType(EMVT::isFP, TP);
+    if (EEVT::isExtIntegerInVTs(NodeToApply->getExtTypes()))
+      MadeChange |= BigOperand->UpdateNodeType(MVT::iAny, TP);
+    else if (EEVT::isExtFloatingPointInVTs(NodeToApply->getExtTypes()))
+      MadeChange |= BigOperand->UpdateNodeType(MVT::fAny, TP);
+    if (EEVT::isExtIntegerInVTs(BigOperand->getExtTypes()))
+      MadeChange |= NodeToApply->UpdateNodeType(MVT::iAny, TP);
+    else if (EEVT::isExtFloatingPointInVTs(BigOperand->getExtTypes()))
+      MadeChange |= NodeToApply->UpdateNodeType(MVT::fAny, TP);
 
     std::vector<MVT::SimpleValueType> VTs = CGT.getLegalValueTypes();
 
-    if (EMVT::isExtIntegerInVTs(NodeToApply->getExtTypes())) {
+    if (EEVT::isExtIntegerInVTs(NodeToApply->getExtTypes())) {
       VTs = FilterVTs(VTs, isInteger);
-    } else if (EMVT::isExtFloatingPointInVTs(NodeToApply->getExtTypes())) {
+    } else if (EEVT::isExtFloatingPointInVTs(NodeToApply->getExtTypes())) {
       VTs = FilterVTs(VTs, isFloatingPoint);
     } else {
       VTs.clear();
@@ -368,9 +390,9 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     if (OtherOperand->hasTypeSet()) {
       if (!isVector(OtherOperand->getTypeNum(0)))
         TP.error(N->getOperator()->getName() + " VT operand must be a vector!");
-      MVT IVT = OtherOperand->getTypeNum(0);
+      EVT IVT = OtherOperand->getTypeNum(0);
       IVT = IVT.getVectorElementType();
-      return NodeToApply->UpdateNodeType(IVT.getSimpleVT(), TP);
+      return NodeToApply->UpdateNodeType(IVT.getSimpleVT().SimpleTy, TP);
     }
     return false;
   }
@@ -445,7 +467,7 @@ bool TreePatternNode::UpdateNodeType(const std::vector<unsigned char> &ExtVTs,
                                      TreePattern &TP) {
   assert(!ExtVTs.empty() && "Cannot update node type with empty type vector!");
   
-  if (ExtVTs[0] == EMVT::isUnknown || LHSIsSubsetOfRHS(getExtTypes(), ExtVTs))
+  if (ExtVTs[0] == EEVT::isUnknown || LHSIsSubsetOfRHS(getExtTypes(), ExtVTs))
     return false;
   if (isTypeCompletelyUnknown() || LHSIsSubsetOfRHS(ExtVTs, getExtTypes())) {
     setTypes(ExtVTs);
@@ -454,9 +476,9 @@ bool TreePatternNode::UpdateNodeType(const std::vector<unsigned char> &ExtVTs,
 
   if (getExtTypeNum(0) == MVT::iPTR || getExtTypeNum(0) == MVT::iPTRAny) {
     if (ExtVTs[0] == MVT::iPTR || ExtVTs[0] == MVT::iPTRAny ||
-        ExtVTs[0] == EMVT::isInt)
+        ExtVTs[0] == MVT::iAny)
       return false;
-    if (EMVT::isExtIntegerInVTs(ExtVTs)) {
+    if (EEVT::isExtIntegerInVTs(ExtVTs)) {
       std::vector<unsigned char> FVTs = FilterEVTs(ExtVTs, isInteger);
       if (FVTs.size()) {
         setTypes(ExtVTs);
@@ -465,8 +487,19 @@ bool TreePatternNode::UpdateNodeType(const std::vector<unsigned char> &ExtVTs,
     }
   }
 
-  if ((ExtVTs[0] == EMVT::isInt || ExtVTs[0] == MVT::iAny) &&
-      EMVT::isExtIntegerInVTs(getExtTypes())) {
+  // Merge vAny with iAny/fAny.  The latter include vector types so keep them
+  // as the more specific information.
+  if (ExtVTs[0] == MVT::vAny && 
+      (getExtTypeNum(0) == MVT::iAny || getExtTypeNum(0) == MVT::fAny))
+    return false;
+  if (getExtTypeNum(0) == MVT::vAny &&
+      (ExtVTs[0] == MVT::iAny || ExtVTs[0] == MVT::fAny)) {
+    setTypes(ExtVTs);
+    return true;
+  }
+
+  if (ExtVTs[0] == MVT::iAny &&
+      EEVT::isExtIntegerInVTs(getExtTypes())) {
     assert(hasTypeSet() && "should be handled above!");
     std::vector<unsigned char> FVTs = FilterEVTs(getExtTypes(), isInteger);
     if (getExtTypes() == FVTs)
@@ -475,7 +508,7 @@ bool TreePatternNode::UpdateNodeType(const std::vector<unsigned char> &ExtVTs,
     return true;
   }
   if ((ExtVTs[0] == MVT::iPTR || ExtVTs[0] == MVT::iPTRAny) &&
-      EMVT::isExtIntegerInVTs(getExtTypes())) {
+      EEVT::isExtIntegerInVTs(getExtTypes())) {
     //assert(hasTypeSet() && "should be handled above!");
     std::vector<unsigned char> FVTs = FilterEVTs(getExtTypes(), isInteger);
     if (getExtTypes() == FVTs)
@@ -485,8 +518,8 @@ bool TreePatternNode::UpdateNodeType(const std::vector<unsigned char> &ExtVTs,
       return true;
     }
   }      
-  if ((ExtVTs[0] == EMVT::isFP || ExtVTs[0] == MVT::fAny) &&
-      EMVT::isExtFloatingPointInVTs(getExtTypes())) {
+  if (ExtVTs[0] == MVT::fAny &&
+      EEVT::isExtFloatingPointInVTs(getExtTypes())) {
     assert(hasTypeSet() && "should be handled above!");
     std::vector<unsigned char> FVTs =
       FilterEVTs(getExtTypes(), isFloatingPoint);
@@ -495,20 +528,31 @@ bool TreePatternNode::UpdateNodeType(const std::vector<unsigned char> &ExtVTs,
     setTypes(FVTs);
     return true;
   }
-      
-  // If we know this is an int or fp type, and we are told it is a specific one,
-  // take the advice.
+  if (ExtVTs[0] == MVT::vAny &&
+      EEVT::isExtVectorInVTs(getExtTypes())) {
+    assert(hasTypeSet() && "should be handled above!");
+    std::vector<unsigned char> FVTs = FilterEVTs(getExtTypes(), isVector);
+    if (getExtTypes() == FVTs)
+      return false;
+    setTypes(FVTs);
+    return true;
+  }
+
+  // If we know this is an int, FP, or vector type, and we are told it is a
+  // specific one, take the advice.
   //
   // Similarly, we should probably set the type here to the intersection of
-  // {isInt|isFP} and ExtVTs
-  if (((getExtTypeNum(0) == EMVT::isInt || getExtTypeNum(0) == MVT::iAny) &&
-       EMVT::isExtIntegerInVTs(ExtVTs)) ||
-      ((getExtTypeNum(0) == EMVT::isFP || getExtTypeNum(0) == MVT::fAny) &&
-       EMVT::isExtFloatingPointInVTs(ExtVTs))) {
+  // {iAny|fAny|vAny} and ExtVTs
+  if ((getExtTypeNum(0) == MVT::iAny &&
+       EEVT::isExtIntegerInVTs(ExtVTs)) ||
+      (getExtTypeNum(0) == MVT::fAny &&
+       EEVT::isExtFloatingPointInVTs(ExtVTs)) ||
+      (getExtTypeNum(0) == MVT::vAny &&
+       EEVT::isExtVectorInVTs(ExtVTs))) {
     setTypes(ExtVTs);
     return true;
   }
-  if (getExtTypeNum(0) == EMVT::isInt &&
+  if (getExtTypeNum(0) == MVT::iAny &&
       (ExtVTs[0] == MVT::iPTR || ExtVTs[0] == MVT::iPTRAny)) {
     setTypes(ExtVTs);
     return true;
@@ -537,14 +581,15 @@ void TreePatternNode::print(raw_ostream &OS) const {
   // nodes that are multiply typed.
   switch (getExtTypeNum(0)) {
   case MVT::Other: OS << ":Other"; break;
-  case EMVT::isInt: OS << ":isInt"; break;
-  case EMVT::isFP : OS << ":isFP"; break;
-  case EMVT::isUnknown: ; /*OS << ":?";*/ break;
+  case MVT::iAny: OS << ":iAny"; break;
+  case MVT::fAny : OS << ":fAny"; break;
+  case MVT::vAny: OS << ":vAny"; break;
+  case EEVT::isUnknown: ; /*OS << ":?";*/ break;
   case MVT::iPTR:  OS << ":iPTR"; break;
   case MVT::iPTRAny:  OS << ":iPTRAny"; break;
   default: {
     std::string VTName = llvm::getName(getTypeNum(0));
-    // Strip off MVT:: prefix if present.
+    // Strip off EVT:: prefix if present.
     if (VTName.substr(0,5) == "MVT::")
       VTName = VTName.substr(5);
     OS << ":" << VTName;
@@ -726,7 +771,7 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 static std::vector<unsigned char> getImplicitType(Record *R, bool NotRegisters,
                                       TreePattern &TP) {
   // Some common return values
-  std::vector<unsigned char> Unknown(1, EMVT::isUnknown);
+  std::vector<unsigned char> Unknown(1, EEVT::isUnknown);
   std::vector<unsigned char> Other(1, MVT::Other);
 
   // Check to see if this is a register or a register class...
@@ -753,7 +798,7 @@ static std::vector<unsigned char> getImplicitType(Record *R, bool NotRegisters,
     std::vector<unsigned char>
     ComplexPat(1, TP.getDAGPatterns().getComplexPattern(R).getValueType());
     return ComplexPat;
-  } else if (R->getName() == "ptr_rc") {
+  } else if (R->isSubClassOf("PointerLikeRegClass")) {
     Other[0] = MVT::iPTR;
     return Other;
   } else if (R->getName() == "node" || R->getName() == "srcvalue" ||
@@ -803,7 +848,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       return UpdateNodeType(getImplicitType(DI->getDef(), NotRegisters, TP),TP);
     } else if (IntInit *II = dynamic_cast<IntInit*>(getLeafValue())) {
       // Int inits are always integers. :)
-      bool MadeChange = UpdateNodeType(EMVT::isInt, TP);
+      bool MadeChange = UpdateNodeType(MVT::iAny, TP);
       
       if (hasTypeSet()) {
         // At some point, it may make sense for this tree pattern to have
@@ -816,7 +861,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
         
         VT = getTypeNum(0);
         if (VT != MVT::iPTR && VT != MVT::iPTRAny) {
-          unsigned Size = MVT(VT).getSizeInBits();
+          unsigned Size = EVT(VT).getSizeInBits();
           // Make sure that the value is representable for this type.
           if (Size < 32) {
             int Val = (II->getValue() << (32-Size)) >> (32-Size);
@@ -924,13 +969,13 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     } else {
       Record *ResultNode = Inst.getResult(0);
       
-      if (ResultNode->getName() == "ptr_rc") {
+      if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
         std::vector<unsigned char> VT;
         VT.push_back(MVT::iPTR);
         MadeChange = UpdateNodeType(VT, TP);
       } else if (ResultNode->getName() == "unknown") {
         std::vector<unsigned char> VT;
-        VT.push_back(EMVT::isUnknown);
+        VT.push_back(EEVT::isUnknown);
         MadeChange = UpdateNodeType(VT, TP);
       } else {
         assert(ResultNode->isSubClassOf("RegisterClass") &&
@@ -968,10 +1013,10 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       } else if (OperandNode->isSubClassOf("Operand")) {
         VT = getValueType(OperandNode->getValueAsDef("Type"));
         MadeChange |= Child->UpdateNodeType(VT, TP);
-      } else if (OperandNode->getName() == "ptr_rc") {
+      } else if (OperandNode->isSubClassOf("PointerLikeRegClass")) {
         MadeChange |= Child->UpdateNodeType(MVT::iPTR, TP);
       } else if (OperandNode->getName() == "unknown") {
-        MadeChange |= Child->UpdateNodeType(EMVT::isUnknown, TP);
+        MadeChange |= Child->UpdateNodeType(EEVT::isUnknown, TP);
       } else {
         assert(0 && "Unknown operand type!");
         abort();
@@ -1297,7 +1342,7 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) : Records(R) {
 }
 
 CodeGenDAGPatterns::~CodeGenDAGPatterns() {
-  for (std::map<Record*, TreePattern*>::iterator I = PatternFragments.begin(),
+  for (pf_iterator I = PatternFragments.begin(),
        E = PatternFragments.end(); I != E; ++I)
     delete I->second;
 }
@@ -1602,7 +1647,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       I->error("set destination should be a register!");
 
     if (Val->getDef()->isSubClassOf("RegisterClass") ||
-        Val->getDef()->getName() == "ptr_rc") {
+        Val->getDef()->isSubClassOf("PointerLikeRegClass")) {
       if (Dest->getName().empty())
         I->error("set destination must have a name!");
       if (InstResults.count(Dest->getName()))
@@ -1949,7 +1994,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
   }
    
   // If we can, convert the instructions to be patterns that are matched!
-  for (std::map<Record*, DAGInstruction>::iterator II = Instructions.begin(),
+  for (std::map<Record*, DAGInstruction, RecordPtrCmp>::iterator II =
+        Instructions.begin(),
        E = Instructions.end(); II != E; ++II) {
     DAGInstruction &TheInst = II->second;
     const TreePattern *I = TheInst.getPattern();
@@ -2350,7 +2396,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
 // GenerateVariants - Generate variants.  For example, commutative patterns can
 // match multiple ways.  Add them to PatternsToMatch as well.
 void CodeGenDAGPatterns::GenerateVariants() {
-  DOUT << "Generating instruction variants.\n";
+  DEBUG(errs() << "Generating instruction variants.\n");
   
   // Loop over all of the patterns we've collected, checking to see if we can
   // generate variants of the instruction, through the exploitation of
@@ -2365,9 +2411,9 @@ void CodeGenDAGPatterns::GenerateVariants() {
     MultipleUseVarSet             DepVars;
     std::vector<TreePatternNode*> Variants;
     FindDepVars(PatternsToMatch[i].getSrcPattern(), DepVars);
-    DOUT << "Dependent/multiply used variables: ";
+    DEBUG(errs() << "Dependent/multiply used variables: ");
     DEBUG(DumpDepVars(DepVars));
-    DOUT << "\n";
+    DEBUG(errs() << "\n");
     GenerateVariantsOf(PatternsToMatch[i].getSrcPattern(), Variants, *this, DepVars);
 
     assert(!Variants.empty() && "Must create at least original variant!");
@@ -2376,16 +2422,16 @@ void CodeGenDAGPatterns::GenerateVariants() {
     if (Variants.empty())  // No variants for this pattern.
       continue;
 
-    DOUT << "FOUND VARIANTS OF: ";
-    DEBUG(PatternsToMatch[i].getSrcPattern()->dump());
-    DOUT << "\n";
+    DEBUG(errs() << "FOUND VARIANTS OF: ";
+          PatternsToMatch[i].getSrcPattern()->dump();
+          errs() << "\n");
 
     for (unsigned v = 0, e = Variants.size(); v != e; ++v) {
       TreePatternNode *Variant = Variants[v];
 
-      DOUT << "  VAR#" << v <<  ": ";
-      DEBUG(Variant->dump());
-      DOUT << "\n";
+      DEBUG(errs() << "  VAR#" << v <<  ": ";
+            Variant->dump();
+            errs() << "\n");
       
       // Scan to see if an instruction or explicit pattern already matches this.
       bool AlreadyExists = false;
@@ -2396,7 +2442,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
           continue;
         // Check to see if this variant already exists.
         if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(), DepVars)) {
-          DOUT << "  *** ALREADY EXISTS, ignoring variant.\n";
+          DEBUG(errs() << "  *** ALREADY EXISTS, ignoring variant.\n");
           AlreadyExists = true;
           break;
         }
@@ -2412,7 +2458,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
                                  PatternsToMatch[i].getAddedComplexity()));
     }
 
-    DOUT << "\n";
+    DEBUG(errs() << "\n");
   }
 }
 
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index d3980068124a..9b53ecc5db95 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -33,22 +33,27 @@ namespace llvm {
   class CodeGenDAGPatterns;
   class ComplexPattern;
 
-/// EMVT::DAGISelGenValueType - These are some extended forms of
+/// EEVT::DAGISelGenValueType - These are some extended forms of
 /// MVT::SimpleValueType that we use as lattice values during type inference.
-namespace EMVT {
+/// The existing MVT iAny, fAny and vAny types suffice to represent
+/// arbitrary integer, floating-point, and vector types, so only an unknown
+/// value is needed.
+namespace EEVT {
   enum DAGISelGenValueType {
-    isFP  = MVT::LAST_VALUETYPE,
-    isInt,
-    isUnknown
+    isUnknown  = MVT::LAST_VALUETYPE
   };
 
-  /// isExtIntegerVT - Return true if the specified extended value type vector
-  /// contains isInt or an integer value type.
+  /// isExtIntegerInVTs - Return true if the specified extended value type
+  /// vector contains iAny or an integer value type.
   bool isExtIntegerInVTs(const std::vector<unsigned char> &EVTs);
 
-  /// isExtFloatingPointVT - Return true if the specified extended value type 
-  /// vector contains isFP or a FP value type.
+  /// isExtFloatingPointInVTs - Return true if the specified extended value
+  /// type vector contains fAny or a FP value type.
   bool isExtFloatingPointInVTs(const std::vector<unsigned char> &EVTs);
+
+  /// isExtVectorinVTs - Return true if the specified extended value type 
+  /// vector contains vAny or a vector value type.
+  bool isExtVectorInVTs(const std::vector<unsigned char> &EVTs);
 }
 
 /// Set type used to track multiply used variables in patterns
@@ -61,7 +66,7 @@ struct SDTypeConstraint {
   
   unsigned OperandNo;   // The operand # this constraint applies to.
   enum { 
-    SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs, 
+    SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisVec, SDTCisSameAs, 
     SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec
   } ConstraintType;
   
@@ -140,7 +145,7 @@ public:
 /// patterns), and as such should be ref counted.  We currently just leak all
 /// TreePatternNode objects!
 class TreePatternNode {
-  /// The inferred type for this node, or EMVT::isUnknown if it hasn't
+  /// The inferred type for this node, or EEVT::isUnknown if it hasn't
   /// been determined yet. This is a std::vector because during inference
   /// there may be multiple possible types.
   std::vector<unsigned char> Types;
@@ -169,10 +174,10 @@ class TreePatternNode {
 public:
   TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch) 
     : Types(), Operator(Op), Val(0), TransformFn(0),
-    Children(Ch) { Types.push_back(EMVT::isUnknown); }
+    Children(Ch) { Types.push_back(EEVT::isUnknown); }
   TreePatternNode(Init *val)    // leaf ctor
     : Types(), Operator(0), Val(val), TransformFn(0) {
-    Types.push_back(EMVT::isUnknown);
+    Types.push_back(EEVT::isUnknown);
   }
   ~TreePatternNode();
   
@@ -185,7 +190,7 @@ public:
           (Types[0] == MVT::iPTRAny);
   }
   bool isTypeCompletelyUnknown() const {
-    return Types[0] == EMVT::isUnknown;
+    return Types[0] == EEVT::isUnknown;
   }
   bool isTypeDynamicallyResolved() const {
     return (Types[0] == MVT::iPTR) || (Types[0] == MVT::iPTRAny);
@@ -201,7 +206,7 @@ public:
   }
   const std::vector<unsigned char> &getExtTypes() const { return Types; }
   void setTypes(const std::vector<unsigned char> &T) { Types = T; }
-  void removeTypes() { Types = std::vector<unsigned char>(1, EMVT::isUnknown); }
+  void removeTypes() { Types = std::vector<unsigned char>(1, EEVT::isUnknown); }
   
   Init *getLeafValue() const { assert(isLeaf()); return Val; }
   Record *getOperator() const { assert(!isLeaf()); return Operator; }
@@ -457,6 +462,10 @@ struct PatternToMatch {
   std::string getPredicateCheck() const;
 };
 
+// Deterministic comparison of Record*.
+struct RecordPtrCmp {
+  bool operator()(const Record *LHS, const Record *RHS) const;
+};
   
 class CodeGenDAGPatterns {
   RecordKeeper &Records;
@@ -464,12 +473,12 @@ class CodeGenDAGPatterns {
   std::vector<CodeGenIntrinsic> Intrinsics;
   std::vector<CodeGenIntrinsic> TgtIntrinsics;
   
-  std::map<Record*, SDNodeInfo> SDNodes;
-  std::map<Record*, std::pair<Record*, std::string> > SDNodeXForms;
-  std::map<Record*, ComplexPattern> ComplexPatterns;
-  std::map<Record*, TreePattern*> PatternFragments;
-  std::map<Record*, DAGDefaultOperand> DefaultOperands;
-  std::map<Record*, DAGInstruction> Instructions;
+  std::map<Record*, SDNodeInfo, RecordPtrCmp> SDNodes;
+  std::map<Record*, std::pair<Record*, std::string>, RecordPtrCmp> SDNodeXForms;
+  std::map<Record*, ComplexPattern, RecordPtrCmp> ComplexPatterns;
+  std::map<Record*, TreePattern*, RecordPtrCmp> PatternFragments;
+  std::map<Record*, DAGDefaultOperand, RecordPtrCmp> DefaultOperands;
+  std::map<Record*, DAGInstruction, RecordPtrCmp> Instructions;
   
   // Specific SDNode definitions:
   Record *intrinsic_void_sdnode;
@@ -500,7 +509,8 @@ public:
     return SDNodeXForms.find(R)->second;
   }
   
-  typedef std::map<Record*, NodeXForm>::const_iterator nx_iterator;
+  typedef std::map<Record*, NodeXForm, RecordPtrCmp>::const_iterator
+          nx_iterator;
   nx_iterator nx_begin() const { return SDNodeXForms.begin(); }
   nx_iterator nx_end() const { return SDNodeXForms.end(); }
 
@@ -547,7 +557,8 @@ public:
     assert(PatternFragments.count(R) && "Invalid pattern fragment request!");
     return PatternFragments.find(R)->second;
   }
-  typedef std::map<Record*, TreePattern*>::const_iterator pf_iterator;
+  typedef std::map<Record*, TreePattern*, RecordPtrCmp>::const_iterator
+          pf_iterator;
   pf_iterator pf_begin() const { return PatternFragments.begin(); }
   pf_iterator pf_end() const { return PatternFragments.end(); }
 
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 4650b88fd517..d421fd07c323 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -22,7 +22,7 @@ static void ParseConstraint(const std::string &CStr, CodeGenInstruction *I) {
   std::string::size_type pos = CStr.find_first_of('=');
   assert(pos != std::string::npos && "Unrecognized constraint");
   std::string::size_type start = CStr.find_first_not_of(" \t");
-  std::string Name = CStr.substr(start, pos);
+  std::string Name = CStr.substr(start, pos - start);
   
   // TIED_TO: $src1 = $dst
   std::string::size_type wpos = Name.find_first_of(" \t");
@@ -70,7 +70,7 @@ static void ParseConstraints(const std::string &CStr, CodeGenInstruction *I) {
     if (eidx == std::string::npos)
       eidx = CStr.length();
     
-    ParseConstraint(CStr.substr(bidx, eidx), I);
+    ParseConstraint(CStr.substr(bidx, eidx - bidx), I);
     bidx = CStr.find_first_not_of(delims, eidx);
   }
 }
@@ -101,6 +101,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
   mayHaveSideEffects = R->getValueAsBit("mayHaveSideEffects");
   neverHasSideEffects = R->getValueAsBit("neverHasSideEffects");
   isAsCheapAsAMove = R->getValueAsBit("isAsCheapAsAMove");
+  hasExtraSrcRegAllocReq = R->getValueAsBit("hasExtraSrcRegAllocReq");
+  hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
   hasOptionalDef = false;
   isVariadic = false;
 
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index f4afd5e45ba3..04506e9d2d8a 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -106,6 +106,8 @@ namespace llvm {
     bool mayHaveSideEffects;
     bool neverHasSideEffects;
     bool isAsCheapAsAMove;
+    bool hasExtraSrcRegAllocReq;
+    bool hasExtraDefRegAllocReq;
     
     /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar",
     /// where $foo is a whole operand and $foo.bar refers to a suboperand.
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index c17cd0eef2da..0edca7353a4a 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -23,6 +23,10 @@
 using namespace llvm;
 
 static cl::opt<unsigned>
+AsmParserNum("asmparsernum", cl::init(0),
+             cl::desc("Make -gen-asm-parser emit assembly parser #N"));
+
+static cl::opt<unsigned>
 AsmWriterNum("asmwriternum", cl::init(0),
              cl::desc("Make -gen-asm-writer emit assembly writer #N"));
 
@@ -34,47 +38,10 @@ MVT::SimpleValueType llvm::getValueType(Record *Rec) {
 
 std::string llvm::getName(MVT::SimpleValueType T) {
   switch (T) {
-  case MVT::Other: return "UNKNOWN";
-  case MVT::i1:    return "MVT::i1";
-  case MVT::i8:    return "MVT::i8";
-  case MVT::i16:   return "MVT::i16";
-  case MVT::i32:   return "MVT::i32";
-  case MVT::i64:   return "MVT::i64";
-  case MVT::i128:  return "MVT::i128";
-  case MVT::iAny:  return "MVT::iAny";
-  case MVT::fAny:  return "MVT::fAny";
-  case MVT::f32:   return "MVT::f32";
-  case MVT::f64:   return "MVT::f64";
-  case MVT::f80:   return "MVT::f80";
-  case MVT::f128:  return "MVT::f128";
-  case MVT::ppcf128:  return "MVT::ppcf128";
-  case MVT::Flag:  return "MVT::Flag";
-  case MVT::isVoid:return "MVT::isVoid";
-  case MVT::v2i8:  return "MVT::v2i8";
-  case MVT::v4i8:  return "MVT::v4i8";
-  case MVT::v8i8:  return "MVT::v8i8";
-  case MVT::v16i8: return "MVT::v16i8";
-  case MVT::v32i8: return "MVT::v32i8";
-  case MVT::v2i16: return "MVT::v2i16";
-  case MVT::v4i16: return "MVT::v4i16";
-  case MVT::v8i16: return "MVT::v8i16";
-  case MVT::v16i16: return "MVT::v16i16";
-  case MVT::v2i32: return "MVT::v2i32";
-  case MVT::v4i32: return "MVT::v4i32";
-  case MVT::v8i32: return "MVT::v8i32";
-  case MVT::v1i64: return "MVT::v1i64";
-  case MVT::v2i64: return "MVT::v2i64";
-  case MVT::v4i64: return "MVT::v4i64";
-  case MVT::v2f32: return "MVT::v2f32";
-  case MVT::v4f32: return "MVT::v4f32";
-  case MVT::v8f32: return "MVT::v8f32";
-  case MVT::v2f64: return "MVT::v2f64";
-  case MVT::v4f64: return "MVT::v4f64";
-  case MVT::v3i32: return "MVT::v3i32";
-  case MVT::v3f32: return "MVT::v3f32";
-  case MVT::iPTR:  return "TLI.getPointerTy()";
-  case MVT::iPTRAny:  return "TLI.getPointerTy()";
-  default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
+  case MVT::Other:   return "UNKNOWN";
+  case MVT::iPTR:    return "TLI.getPointerTy()";
+  case MVT::iPTRAny: return "TLI.getPointerTy()";
+  default: return getEnumName(T);
   }
 }
 
@@ -89,6 +56,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::i128:  return "MVT::i128";
   case MVT::iAny:  return "MVT::iAny";
   case MVT::fAny:  return "MVT::fAny";
+  case MVT::vAny:  return "MVT::vAny";
   case MVT::f32:   return "MVT::f32";
   case MVT::f64:   return "MVT::f64";
   case MVT::f80:   return "MVT::f80";
@@ -116,8 +84,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v8f32: return "MVT::v8f32";
   case MVT::v2f64: return "MVT::v2f64";
   case MVT::v4f64: return "MVT::v4f64";
-  case MVT::v3i32: return "MVT::v3i32";
-  case MVT::v3f32: return "MVT::v3f32";
+  case MVT::Metadata: return "MVT::Metadata";
   case MVT::iPTR:  return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
   default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
@@ -171,6 +138,15 @@ Record *CodeGenTarget::getInstructionSet() const {
   return TargetRec->getValueAsDef("InstructionSet");
 }
 
+/// getAsmParser - Return the AssemblyParser definition for this target.
+///
+Record *CodeGenTarget::getAsmParser() const {
+  std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyParsers");
+  if (AsmParserNum >= LI.size())
+    throw "Target does not have an AsmParser #" + utostr(AsmParserNum) + "!";
+  return LI[AsmParserNum];
+}
+
 /// getAsmWriter - Return the AssemblyWriter definition for this target.
 ///
 Record *CodeGenTarget::getAsmWriter() const {
@@ -265,7 +241,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
   unsigned Size = R->getValueAsInt("Size");
 
   Namespace = R->getValueAsString("Namespace");
-  SpillSize = Size ? Size : MVT(VTs[0]).getSizeInBits();
+  SpillSize = Size ? Size : EVT(VTs[0]).getSizeInBits();
   SpillAlignment = R->getValueAsInt("Alignment");
   CopyCost = R->getValueAsInt("CopyCost");
   MethodBodies = R->getValueAsCode("MethodBodies");
@@ -332,9 +308,9 @@ getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
   if (I == Instructions.end()) throw "Could not find 'GC_LABEL' instruction!";
   const CodeGenInstruction *GC_LABEL = &I->second;
   
-  I = getInstructions().find("DECLARE");
-  if (I == Instructions.end()) throw "Could not find 'DECLARE' instruction!";
-  const CodeGenInstruction *DECLARE = &I->second;
+  I = getInstructions().find("KILL");
+  if (I == Instructions.end()) throw "Could not find 'KILL' instruction!";
+  const CodeGenInstruction *KILL = &I->second;
   
   I = getInstructions().find("EXTRACT_SUBREG");
   if (I == Instructions.end()) 
@@ -367,7 +343,7 @@ getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
   NumberedInstructions.push_back(DBG_LABEL);
   NumberedInstructions.push_back(EH_LABEL);
   NumberedInstructions.push_back(GC_LABEL);
-  NumberedInstructions.push_back(DECLARE);
+  NumberedInstructions.push_back(KILL);
   NumberedInstructions.push_back(EXTRACT_SUBREG);
   NumberedInstructions.push_back(INSERT_SUBREG);
   NumberedInstructions.push_back(IMPLICIT_DEF);
@@ -379,7 +355,7 @@ getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
         &II->second != DBG_LABEL &&
         &II->second != EH_LABEL &&
         &II->second != GC_LABEL &&
-        &II->second != DECLARE &&
+        &II->second != KILL &&
         &II->second != EXTRACT_SUBREG &&
         &II->second != INSERT_SUBREG &&
         &II->second != IMPLICIT_DEF &&
@@ -517,11 +493,12 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       // overloaded, all the types can be specified directly.
       assert(((!TyEl->isSubClassOf("LLVMExtendedElementVectorType") &&
                !TyEl->isSubClassOf("LLVMTruncatedElementVectorType")) ||
-              VT == MVT::iAny) && "Expected iAny type");
+              VT == MVT::iAny || VT == MVT::vAny) &&
+             "Expected iAny or vAny type");
     } else {
       VT = getValueType(TyEl->getValueAsDef("VT"));
     }
-    if (VT == MVT::iAny || VT == MVT::fAny || VT == MVT::iPTRAny) {
+    if (EVT(VT).isOverloaded()) {
       OverloadedVTs.push_back(VT);
       isOverloaded |= true;
     }
@@ -548,10 +525,11 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       // overloaded, all the types can be specified directly.
       assert(((!TyEl->isSubClassOf("LLVMExtendedElementVectorType") &&
                !TyEl->isSubClassOf("LLVMTruncatedElementVectorType")) ||
-              VT == MVT::iAny) && "Expected iAny type");
+              VT == MVT::iAny || VT == MVT::vAny) &&
+             "Expected iAny or vAny type");
     } else
       VT = getValueType(TyEl->getValueAsDef("VT"));
-    if (VT == MVT::iAny || VT == MVT::fAny || VT == MVT::iPTRAny) {
+    if (EVT(VT).isOverloaded()) {
       OverloadedVTs.push_back(VT);
       isOverloaded |= true;
     }
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 0ec995581460..e763795ce0be 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -87,6 +87,10 @@ public:
   ///
   Record *getInstructionSet() const;
 
+  /// getAsmParser - Return the AssemblyParser definition for this target.
+  ///
+  Record *getAsmParser() const;
+
   /// getAsmWriter - Return the AssemblyWriter definition for this target.
   ///
   Record *getAsmWriter() const;
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index bb26a1db3fc2..dcf64e444c89 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -23,11 +23,8 @@
 #include <iostream>
 using namespace llvm;
 
-namespace {
-  cl::opt<bool>
-  GenDebug("gen-debug", cl::desc("Generate debug code"),
-              cl::init(false));
-}
+static cl::opt<bool>
+GenDebug("gen-debug", cl::desc("Generate debug code"), cl::init(false));
 
 //===----------------------------------------------------------------------===//
 // DAGISelEmitter Helper methods
@@ -60,8 +57,8 @@ static const ComplexPattern *NodeGetComplexPattern(TreePatternNode *N,
 /// patterns before small ones.  This is used to determine the size of a
 /// pattern.
 static unsigned getPatternSize(TreePatternNode *P, CodeGenDAGPatterns &CGP) {
-  assert((EMVT::isExtIntegerInVTs(P->getExtTypes()) ||
-          EMVT::isExtFloatingPointInVTs(P->getExtTypes()) ||
+  assert((EEVT::isExtIntegerInVTs(P->getExtTypes()) ||
+          EEVT::isExtFloatingPointInVTs(P->getExtTypes()) ||
           P->getExtTypeNum(0) == MVT::isVoid ||
           P->getExtTypeNum(0) == MVT::Flag ||
           P->getExtTypeNum(0) == MVT::iPTR ||
@@ -698,7 +695,7 @@ public:
       if (DefInit *DI = dynamic_cast<DefInit*>(Child->getLeafValue())) {
         Record *LeafRec = DI->getDef();
         if (LeafRec->isSubClassOf("RegisterClass") || 
-            LeafRec->getName() == "ptr_rc") {
+            LeafRec->isSubClassOf("PointerLikeRegClass")) {
           // Handle register references.  Nothing to do here.
         } else if (LeafRec->isSubClassOf("Register")) {
           // Handle register references.
@@ -787,7 +784,7 @@ public:
   EmitResultCode(TreePatternNode *N, std::vector<Record*> DstRegs,
                  bool InFlagDecled, bool ResNodeDecled,
                  bool LikeLeaf = false, bool isRoot = false) {
-    // List of arguments of getTargetNode() or SelectNodeTo().
+    // List of arguments of getMachineNode() or SelectNodeTo().
     std::vector<std::string> NodeOps;
     // This is something selected from the pattern we matched.
     if (!N->getName().empty()) {
@@ -932,7 +929,8 @@ public:
         unsigned ResNo = TmpNo++;
         assert(N->getExtTypes().size() == 1 && "Multiple types not handled!");
         emitCode("SDValue Tmp" + utostr(ResNo) + 
-                 " = CurDAG->getTargetConstant(0x" + itohexstr(II->getValue()) +
+                 " = CurDAG->getTargetConstant(0x" + 
+                 utohexstr((uint64_t) II->getValue()) +
                  "ULL, " + getEnumName(N->getTypeNum(0)) + ");");
         NodeOps.push_back("Tmp" + utostr(ResNo));
         return NodeOps;
@@ -1091,7 +1089,7 @@ public:
       std::string Code = "Opc" + utostr(OpcNo);
 
       if (!isRoot || (InputHasChain && !NodeHasChain))
-        // For call to "getTargetNode()".
+        // For call to "getMachineNode()".
         Code += ", N.getDebugLoc()";
 
       emitOpcode(II.Namespace + "::" + II.TheDef->getName());
@@ -1137,24 +1135,18 @@ public:
         emitCode("}");
       }
 
-      // Generate MemOperandSDNodes nodes for each memory accesses covered by 
+      // Populate MemRefs with entries for each memory accesses covered by 
       // this pattern.
-      if (II.mayLoad | II.mayStore) {
-        std::vector<std::string>::const_iterator mi, mie;
-        for (mi = LSI.begin(), mie = LSI.end(); mi != mie; ++mi) {
-          std::string LSIName = "LSI_" + *mi;
-          emitCode("SDValue " + LSIName + " = "
-                   "CurDAG->getMemOperand(cast<MemSDNode>(" +
-                   *mi + ")->getMemOperand());");
-          if (GenDebug) {
-            emitCode("CurDAG->setSubgraphColor(" + LSIName +".getNode(), \"yellow\");");
-            emitCode("CurDAG->setSubgraphColor(" + LSIName +".getNode(), \"black\");");
-          }
-          if (IsVariadic)
-            emitCode("Ops" + utostr(OpsNo) + ".push_back(" + LSIName + ");");
-          else
-            AllOps.push_back(LSIName);
-        }
+      if (isRoot && !LSI.empty()) {
+        std::string MemRefs = "MemRefs" + utostr(OpsNo);
+        emitCode("MachineSDNode::mmo_iterator " + MemRefs + " = "
+                 "MF->allocateMemRefsArray(" + utostr(LSI.size()) + ");");
+        for (unsigned i = 0, e = LSI.size(); i != e; ++i)
+          emitCode(MemRefs + "[" + utostr(i) + "] = "
+                   "cast<MemSDNode>(" + LSI[i] + ")->getMemOperand();");
+        After.push_back("cast<MachineSDNode>(ResNode)->setMemRefs(" +
+                        MemRefs + ", " + MemRefs + " + " + utostr(LSI.size()) +
+                        ");");
       }
 
       if (NodeHasChain) {
@@ -1305,7 +1297,7 @@ public:
       // would leave users of the chain dangling.
       //
       if (!isRoot || (InputHasChain && !NodeHasChain)) {
-        Code = "CurDAG->getTargetNode(" + Code;
+        Code = "CurDAG->getMachineNode(" + Code;
       } else {
         Code = "CurDAG->SelectNodeTo(N.getNode(), " + Code;
       }
@@ -1778,7 +1770,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
           CallerCode += ", " + TargetOpcodes[j];
         }
         for (unsigned j = 0, e = TargetVTs.size(); j != e; ++j) {
-          CalleeCode += ", MVT VT" + utostr(j);
+          CalleeCode += ", MVT::SimpleValueType VT" + utostr(j);
           CallerCode += ", " + TargetVTs[j];
         }
         for (std::set<std::string>::iterator
@@ -1930,7 +1922,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "  std::vector<SDValue> Ops(N.getNode()->op_begin(), N.getNode()->op_end());\n"
      << "  SelectInlineAsmMemoryOperands(Ops);\n\n"
     
-     << "  std::vector<MVT> VTs;\n"
+     << "  std::vector<EVT> VTs;\n"
      << "  VTs.push_back(MVT::Other);\n"
      << "  VTs.push_back(MVT::Flag);\n"
      << "  SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(), "
@@ -1959,32 +1951,14 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "                              MVT::Other, Tmp, Chain);\n"
      << "}\n\n";
 
-  OS << "SDNode *Select_DECLARE(const SDValue &N) {\n"
-     << "  SDValue Chain = N.getOperand(0);\n"
-     << "  SDValue N1 = N.getOperand(1);\n"
-     << "  SDValue N2 = N.getOperand(2);\n"
-     << "  if (!isa<FrameIndexSDNode>(N1) || !isa<GlobalAddressSDNode>(N2)) {\n"
-     << "    CannotYetSelect(N);\n"
-     << "  }\n"
-     << "  int FI = cast<FrameIndexSDNode>(N1)->getIndex();\n"
-     << "  GlobalValue *GV = cast<GlobalAddressSDNode>(N2)->getGlobal();\n"
-     << "  SDValue Tmp1 = "
-     << "CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());\n"
-     << "  SDValue Tmp2 = "
-     << "CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());\n"
-     << "  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DECLARE,\n"
-     << "                              MVT::Other, Tmp1, Tmp2, Chain);\n"
-     << "}\n\n";
-
   OS << "// The main instruction selector code.\n"
      << "SDNode *SelectCode(SDValue N) {\n"
-     << "  MVT::SimpleValueType NVT = N.getNode()->getValueType(0).getSimpleVT();\n"
+     << "  MVT::SimpleValueType NVT = N.getNode()->getValueType(0).getSimpleVT().SimpleTy;\n"
      << "  switch (N.getOpcode()) {\n"
      << "  default:\n"
      << "    assert(!N.isMachineOpcode() && \"Node already selected!\");\n"
      << "    break;\n"
      << "  case ISD::EntryToken:       // These nodes remain the same.\n"
-     << "  case ISD::MEMOPERAND:\n"
      << "  case ISD::BasicBlock:\n"
      << "  case ISD::Register:\n"
      << "  case ISD::HANDLENODE:\n"
@@ -2009,7 +1983,6 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "  case ISD::INLINEASM: return Select_INLINEASM(N);\n"
      << "  case ISD::DBG_LABEL: return Select_DBG_LABEL(N);\n"
      << "  case ISD::EH_LABEL: return Select_EH_LABEL(N);\n"
-     << "  case ISD::DECLARE: return Select_DECLARE(N);\n"
      << "  case ISD::UNDEF: return Select_UNDEF(N);\n";
 
   // Loop over all of the case statements, emiting a call to each method we
@@ -2083,20 +2056,19 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "}\n\n";
 
   OS << "void CannotYetSelect(SDValue N) DISABLE_INLINE {\n"
-     << "  cerr << \"Cannot yet select: \";\n"
-     << "  N.getNode()->dump(CurDAG);\n"
-     << "  cerr << '\\n';\n"
-     << "  abort();\n"
+     << "  std::string msg;\n"
+     << "  raw_string_ostream Msg(msg);\n"
+     << "  Msg << \"Cannot yet select: \";\n"
+     << "  N.getNode()->print(Msg, CurDAG);\n"
+     << "  llvm_report_error(Msg.str());\n"
      << "}\n\n";
 
   OS << "void CannotYetSelectIntrinsic(SDValue N) DISABLE_INLINE {\n"
-     << "  cerr << \"Cannot yet select: \";\n"
+     << "  errs() << \"Cannot yet select: \";\n"
      << "  unsigned iid = cast<ConstantSDNode>(N.getOperand("
      << "N.getOperand(0).getValueType() == MVT::Other))->getZExtValue();\n"
-     << "  cerr << \"intrinsic %\"<< "
-     << "Intrinsic::getName((Intrinsic::ID)iid);\n"
-     << "  cerr << '\\n';\n"
-     << "  abort();\n"
+     << " llvm_report_error(\"Cannot yet select: intrinsic %\" +\n"
+     << "Intrinsic::getName((Intrinsic::ID)iid));\n"
      << "}\n\n";
 }
 
@@ -2115,12 +2087,12 @@ void DAGISelEmitter::run(raw_ostream &OS) {
   EmitNodeTransforms(OS);
   EmitPredicateFunctions(OS);
   
-  DOUT << "\n\nALL PATTERNS TO MATCH:\n\n";
+  DEBUG(errs() << "\n\nALL PATTERNS TO MATCH:\n\n");
   for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(), E = CGP.ptm_end();
        I != E; ++I) {
-    DOUT << "PATTERN: ";   DEBUG(I->getSrcPattern()->dump());
-    DOUT << "\nRESULT:  "; DEBUG(I->getDstPattern()->dump());
-    DOUT << "\n";
+    DEBUG(errs() << "PATTERN: ";   I->getSrcPattern()->dump());
+    DEBUG(errs() << "\nRESULT:  "; I->getDstPattern()->dump());
+    DEBUG(errs() << "\n");
   }
   
   // At this point, we have full information about the 'Patterns' we need to
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 0a43f02d5afc..277640d79968 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -216,7 +216,6 @@ public:
   explicit FastISelMap(std::string InstNS);
 
   void CollectPatterns(CodeGenDAGPatterns &CGP);
-  void PrintClass(raw_ostream &OS);
   void PrintFunctionDefinitions(raw_ostream &OS);
 };
 
@@ -461,11 +460,11 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
              << getLegalCName(Opcode) << "_"
              << getLegalCName(getName(VT)) << "_";
           Operands.PrintManglingSuffix(OS);
-          OS << "(MVT::SimpleValueType RetVT";
+          OS << "(MVT RetVT";
           if (!Operands.empty())
             OS << ", ";
           Operands.PrintParameters(OS);
-          OS << ") {\nswitch (RetVT) {\n";
+          OS << ") {\nswitch (RetVT.SimpleTy) {\n";
           for (RetPredMap::const_iterator RI = RM.begin(), RE = RM.end();
                RI != RE; ++RI) {
             MVT::SimpleValueType RetVT = RI->first;
@@ -485,13 +484,13 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
              << getLegalCName(Opcode) << "_"
              << getLegalCName(getName(VT)) << "_";
           Operands.PrintManglingSuffix(OS);
-          OS << "(MVT::SimpleValueType RetVT";
+          OS << "(MVT RetVT";
           if (!Operands.empty())
             OS << ", ";
           Operands.PrintParameters(OS);
           OS << ") {\n";
           
-          OS << "  if (RetVT != " << getName(RM.begin()->first)
+          OS << "  if (RetVT.SimpleTy != " << getName(RM.begin()->first)
              << ")\n    return 0;\n";
           
           const PredMap &PM = RM.begin()->second;
@@ -555,12 +554,12 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
       OS << "unsigned FastEmit_"
          << getLegalCName(Opcode) << "_";
       Operands.PrintManglingSuffix(OS);
-      OS << "(MVT::SimpleValueType VT, MVT::SimpleValueType RetVT";
+      OS << "(MVT VT, MVT RetVT";
       if (!Operands.empty())
         OS << ", ";
       Operands.PrintParameters(OS);
       OS << ") {\n";
-      OS << "  switch (VT) {\n";
+      OS << "  switch (VT.SimpleTy) {\n";
       for (TypeRetPredMap::const_iterator TI = TM.begin(), TE = TM.end();
            TI != TE; ++TI) {
         MVT::SimpleValueType VT = TI->first;
@@ -587,7 +586,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
     // on opcode and type.
     OS << "unsigned FastEmit_";
     Operands.PrintManglingSuffix(OS);
-    OS << "(MVT::SimpleValueType VT, MVT::SimpleValueType RetVT, ISD::NodeType Opcode";
+    OS << "(MVT VT, MVT RetVT, ISD::NodeType Opcode";
     if (!Operands.empty())
       OS << ", ";
     Operands.PrintParameters(OS);
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 4502da176f4f..3a104ea35874 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -15,6 +15,7 @@
 #include "InstrInfoEmitter.h"
 #include "CodeGenTarget.h"
 #include "Record.h"
+#include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -94,13 +95,16 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
       
       if (OpR->isSubClassOf("RegisterClass"))
         Res += getQualifiedName(OpR) + "RegClassID, ";
+      else if (OpR->isSubClassOf("PointerLikeRegClass"))
+        Res += utostr(OpR->getValueAsInt("RegClassKind")) + ", ";
       else
         Res += "0, ";
+      
       // Fill in applicable flags.
       Res += "0";
         
       // Ptr value whose register class is resolved via callback.
-      if (OpR->getName() == "ptr_rc")
+      if (OpR->isSubClassOf("PointerLikeRegClass"))
         Res += "|(1<<TOI::LookupPtrRegClass)";
 
       // Predicate operands.  Check to see if the original unexpanded operand
@@ -276,6 +280,8 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isVariadic)         OS << "|(1<<TID::Variadic)";
   if (Inst.hasSideEffects)     OS << "|(1<<TID::UnmodeledSideEffects)";
   if (Inst.isAsCheapAsAMove)   OS << "|(1<<TID::CheapAsAMove)";
+  if (Inst.hasExtraSrcRegAllocReq) OS << "|(1<<TID::ExtraSrcRegAllocReq)";
+  if (Inst.hasExtraDefRegAllocReq) OS << "|(1<<TID::ExtraDefRegAllocReq)";
   OS << ", 0";
 
   // Emit all of the target-specific flags...
@@ -335,7 +341,7 @@ void InstrInfoEmitter::emitShiftedValue(Record *R, StringInit *Val,
         R->getName() != "DBG_LABEL" &&
         R->getName() != "EH_LABEL" &&
         R->getName() != "GC_LABEL" &&
-        R->getName() != "DECLARE" &&
+        R->getName() != "KILL" &&
         R->getName() != "EXTRACT_SUBREG" &&
         R->getName() != "INSERT_SUBREG" &&
         R->getName() != "IMPLICIT_DEF" &&
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 36768316bc0e..23919d97f2bf 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -141,24 +141,26 @@ EmitIntrinsicToOverloadTable(const std::vector<CodeGenIntrinsic> &Ints,
 }
 
 static void EmitTypeForValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
-  if (MVT(VT).isInteger()) {
-    unsigned BitWidth = MVT(VT).getSizeInBits();
-    OS << "IntegerType::get(" << BitWidth << ")";
+  if (EVT(VT).isInteger()) {
+    unsigned BitWidth = EVT(VT).getSizeInBits();
+    OS << "IntegerType::get(Context, " << BitWidth << ")";
   } else if (VT == MVT::Other) {
     // MVT::OtherVT is used to mean the empty struct type here.
-    OS << "StructType::get()";
+    OS << "StructType::get(Context)";
   } else if (VT == MVT::f32) {
-    OS << "Type::FloatTy";
+    OS << "Type::getFloatTy(Context)";
   } else if (VT == MVT::f64) {
-    OS << "Type::DoubleTy";
+    OS << "Type::getDoubleTy(Context)";
   } else if (VT == MVT::f80) {
-    OS << "Type::X86_FP80Ty";
+    OS << "Type::getX86_FP80Ty(Context)";
   } else if (VT == MVT::f128) {
-    OS << "Type::FP128Ty";
+    OS << "Type::getFP128Ty(Context)";
   } else if (VT == MVT::ppcf128) {
-    OS << "Type::PPC_FP128Ty";
+    OS << "Type::getPPC_FP128Ty(Context)";
   } else if (VT == MVT::isVoid) {
-    OS << "Type::VoidTy";
+    OS << "Type::getVoidTy(Context)";
+  } else if (VT == MVT::Metadata) {
+    OS << "Type::getMetadataTy(Context)";
   } else {
     assert(false && "Unsupported ValueType!");
   }
@@ -175,7 +177,7 @@ static void EmitTypeGenerate(raw_ostream &OS,
     return;
   }
 
-  OS << "StructType::get(";
+  OS << "StructType::get(Context, ";
 
   for (std::vector<Record*>::const_iterator
          I = ArgTypes.begin(), E = ArgTypes.end(); I != E; ++I) {
@@ -201,17 +203,17 @@ static void EmitTypeGenerate(raw_ostream &OS, const Record *ArgType,
          << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
     else
       OS << "Tys[" << Number << "]";
-  } else if (VT == MVT::iAny || VT == MVT::fAny) {
+  } else if (VT == MVT::iAny || VT == MVT::fAny || VT == MVT::vAny) {
     // NOTE: The ArgNo variable here is not the absolute argument number, it is
     // the index of the "arbitrary" type in the Tys array passed to the
     // Intrinsic::getDeclaration function. Consequently, we only want to
     // increment it when we actually hit an overloaded type. Getting this wrong
     // leads to very subtle bugs!
     OS << "Tys[" << ArgNo++ << "]";
-  } else if (MVT(VT).isVector()) {
-    MVT VVT = VT;
+  } else if (EVT(VT).isVector()) {
+    EVT VVT = VT;
     OS << "VectorType::get(";
-    EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT());
+    EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT().SimpleTy);
     OS << ", " << VVT.getVectorNumElements() << ")";
   } else if (VT == MVT::iPTR) {
     OS << "PointerType::getUnqual(";
@@ -227,7 +229,7 @@ static void EmitTypeGenerate(raw_ostream &OS, const Record *ArgType,
     ++ArgNo;
   } else if (VT == MVT::isVoid) {
     if (ArgNo == 0)
-      OS << "Type::VoidTy";
+      OS << "Type::getVoidTy(Context)";
     else
       // MVT::isVoid is used to mean varargs here.
       OS << "...";
@@ -302,6 +304,7 @@ void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints,
     const RecPair &ArgTypes = I->first;
     const std::vector<Record*> &RetTys = ArgTypes.first;
     const std::vector<Record*> &ParamTys = ArgTypes.second;
+    std::vector<unsigned> OverloadedTypeIndices;
 
     OS << "    VerifyIntrinsicPrototype(ID, IF, " << RetTys.size() << ", "
        << ParamTys.size();
@@ -313,6 +316,9 @@ void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints,
 
       if (ArgType->isSubClassOf("LLVMMatchType")) {
         unsigned Number = ArgType->getValueAsInt("Number");
+        assert(Number < OverloadedTypeIndices.size() &&
+               "Invalid matching number!");
+        Number = OverloadedTypeIndices[Number];
         if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
           OS << "~(ExtendedElementVectorType | " << Number << ")";
         else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
@@ -323,6 +329,9 @@ void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints,
         MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));
         OS << getEnumName(VT);
 
+        if (EVT(VT).isOverloaded())
+          OverloadedTypeIndices.push_back(j);
+
         if (VT == MVT::isVoid && j != 0 && j != je - 1)
           throw "Var arg type not last argument";
       }
@@ -335,6 +344,9 @@ void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints,
 
       if (ArgType->isSubClassOf("LLVMMatchType")) {
         unsigned Number = ArgType->getValueAsInt("Number");
+        assert(Number < OverloadedTypeIndices.size() &&
+               "Invalid matching number!");
+        Number = OverloadedTypeIndices[Number];
         if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
           OS << "~(ExtendedElementVectorType | " << Number << ")";
         else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
@@ -345,6 +357,9 @@ void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints,
         MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));
         OS << getEnumName(VT);
 
+        if (EVT(VT).isOverloaded())
+          OverloadedTypeIndices.push_back(j + RetTys.size());
+
         if (VT == MVT::isVoid && j != 0 && j != je - 1)
           throw "Var arg type not last argument";
       }
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index fc182ce5af8e..06afaf73a02e 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -39,10 +39,11 @@ typedef std::vector<std::string> StrVector;
 //===----------------------------------------------------------------------===//
 /// Constants
 
-// Indentation strings.
-const char * Indent1 = "    ";
-const char * Indent2 = "        ";
-const char * Indent3 = "            ";
+// Indentation.
+unsigned TabWidth = 4;
+unsigned Indent1  = TabWidth*1;
+unsigned Indent2  = TabWidth*2;
+unsigned Indent3  = TabWidth*3;
 
 // Default help string.
 const char * DefaultHelpString = "NO HELP MESSAGE PROVIDED";
@@ -81,16 +82,15 @@ const DagInit& InitPtrToDag(const Init* ptr) {
 }
 
 // checkNumberOfArguments - Ensure that the number of args in d is
-// less than or equal to min_arguments, otherwise throw an exception.
+// greater than or equal to min_arguments, otherwise throw an exception.
 void checkNumberOfArguments (const DagInit* d, unsigned min_arguments) {
   if (!d || d->getNumArgs() < min_arguments)
-    throw d->getOperator()->getAsString()
-      + ": too few arguments!";
+    throw d->getOperator()->getAsString() + ": too few arguments!";
 }
 
 // isDagEmpty - is this DAG marked with an empty marker?
 bool isDagEmpty (const DagInit* d) {
-  return d->getOperator()->getAsString() == "empty";
+  return d->getOperator()->getAsString() == "empty_dag_marker";
 }
 
 // EscapeVariableName - Escape commas and other symbols not allowed
@@ -139,20 +139,21 @@ void checkedIncrement(I& P, I E, S ErrorString) {
 /// OptionType - One of six different option types. See the
 /// documentation for detailed description of differences.
 namespace OptionType {
+
   enum OptionType { Alias, Switch, Parameter, ParameterList,
                     Prefix, PrefixList};
 
-bool IsList (OptionType t) {
-  return (t == ParameterList || t == PrefixList);
-}
+  bool IsList (OptionType t) {
+    return (t == ParameterList || t == PrefixList);
+  }
 
-bool IsSwitch (OptionType t) {
-  return (t == Switch);
-}
+  bool IsSwitch (OptionType t) {
+    return (t == Switch);
+  }
 
-bool IsParameter (OptionType t) {
-  return (t == Parameter || t == Prefix);
-}
+  bool IsParameter (OptionType t) {
+    return (t == Parameter || t == Prefix);
+  }
 
 }
 
@@ -187,11 +188,12 @@ struct OptionDescription {
   unsigned Flags;
   std::string Help;
   unsigned MultiVal;
+  Init* InitVal;
 
   OptionDescription(OptionType::OptionType t = OptionType::Switch,
                     const std::string& n = "",
                     const std::string& h = DefaultHelpString)
-    : Type(t), Name(n), Flags(0x0), Help(h), MultiVal(1)
+    : Type(t), Name(n), Flags(0x0), Help(h), MultiVal(1), InitVal(0)
   {}
 
   /// GenTypeDeclaration - Returns the C++ variable type of this
@@ -229,6 +231,15 @@ struct OptionDescription {
   bool isReallyHidden() const;
   void setReallyHidden();
 
+  bool isParameter() const
+  { return OptionType::IsParameter(this->Type); }
+
+  bool isSwitch() const
+  { return OptionType::IsSwitch(this->Type); }
+
+  bool isList() const
+  { return OptionType::IsList(this->Type); }
+
 };
 
 void OptionDescription::Merge (const OptionDescription& other)
@@ -438,6 +449,7 @@ public:
       AddHandler("extern", &CollectOptionProperties::onExtern);
       AddHandler("help", &CollectOptionProperties::onHelp);
       AddHandler("hidden", &CollectOptionProperties::onHidden);
+      AddHandler("init", &CollectOptionProperties::onInit);
       AddHandler("multi_val", &CollectOptionProperties::onMultiVal);
       AddHandler("one_or_more", &CollectOptionProperties::onOneOrMore);
       AddHandler("really_hidden", &CollectOptionProperties::onReallyHidden);
@@ -481,6 +493,20 @@ private:
     optDesc_.setRequired();
   }
 
+  void onInit (const DagInit* d) {
+    checkNumberOfArguments(d, 1);
+    Init* i = d->getArg(0);
+    const std::string& str = i->getAsString();
+
+    bool correct = optDesc_.isParameter() && dynamic_cast<StringInit*>(i);
+    correct |= (optDesc_.isSwitch() && (str == "true" || str == "false"));
+
+    if (!correct)
+      throw std::string("Incorrect usage of the 'init' option property!");
+
+    optDesc_.InitVal = i;
+  }
+
   void onOneOrMore (const DagInit* d) {
     checkNumberOfArguments(d, 0);
     if (optDesc_.isRequired() || optDesc_.isZeroOrOne())
@@ -950,8 +976,22 @@ void CheckForSuperfluousOptions (const RecordVector& Edges,
   }
 }
 
-/// EmitCaseTest1Arg - Helper function used by
-/// EmitCaseConstructHandler.
+/// EmitCaseTest0Args - Helper function used by EmitCaseConstructHandler().
+bool EmitCaseTest0Args(const std::string& TestName, raw_ostream& O) {
+  if (TestName == "single_input_file") {
+    O << "InputFilenames.size() == 1";
+    return true;
+  }
+  else if (TestName == "multiple_input_files") {
+    O << "InputFilenames.size() > 1";
+    return true;
+  }
+
+  return false;
+}
+
+
+/// EmitCaseTest1Arg - Helper function used by EmitCaseConstructHandler().
 bool EmitCaseTest1Arg(const std::string& TestName,
                       const DagInit& d,
                       const OptionDescriptions& OptDescs,
@@ -961,7 +1001,7 @@ bool EmitCaseTest1Arg(const std::string& TestName,
 
   if (TestName == "switch_on") {
     const OptionDescription& OptDesc = OptDescs.FindOption(OptName);
-    if (!OptionType::IsSwitch(OptDesc.Type))
+    if (!OptDesc.isSwitch())
       throw OptName + ": incorrect option type - should be a switch!";
     O << OptDesc.GenVariableName();
     return true;
@@ -984,7 +1024,7 @@ bool EmitCaseTest1Arg(const std::string& TestName,
     }
     else {
       const OptionDescription& OptDesc = OptDescs.FindOption(OptName);
-      if (OptionType::IsSwitch(OptDesc.Type))
+      if (OptDesc.isSwitch())
         throw OptName
           + ": incorrect option type - should be a list or parameter!";
       O << Test << OptDesc.GenVariableName() << ".empty()";
@@ -995,11 +1035,10 @@ bool EmitCaseTest1Arg(const std::string& TestName,
   return false;
 }
 
-/// EmitCaseTest2Args - Helper function used by
-/// EmitCaseConstructHandler.
+/// EmitCaseTest2Args - Helper function used by EmitCaseConstructHandler().
 bool EmitCaseTest2Args(const std::string& TestName,
                        const DagInit& d,
-                       const char* IndentLevel,
+                       unsigned IndentLevel,
                        const OptionDescriptions& OptDescs,
                        raw_ostream& O) {
   checkNumberOfArguments(&d, 2);
@@ -1008,17 +1047,18 @@ bool EmitCaseTest2Args(const std::string& TestName,
   const OptionDescription& OptDesc = OptDescs.FindOption(OptName);
 
   if (TestName == "parameter_equals") {
-    if (!OptionType::IsParameter(OptDesc.Type))
+    if (!OptDesc.isParameter())
       throw OptName + ": incorrect option type - should be a parameter!";
     O << OptDesc.GenVariableName() << " == \"" << OptArg << "\"";
     return true;
   }
   else if (TestName == "element_in_list") {
-    if (!OptionType::IsList(OptDesc.Type))
+    if (!OptDesc.isList())
       throw OptName + ": incorrect option type - should be a list!";
     const std::string& VarName = OptDesc.GenVariableName();
-    O << "std::find(" << VarName << ".begin(),\n"
-      << IndentLevel << Indent1 << VarName << ".end(), \""
+    O << "std::find(" << VarName << ".begin(),\n";
+    O.indent(IndentLevel + Indent1)
+      << VarName << ".end(), \""
       << OptArg << "\") != " << VarName << ".end()";
     return true;
   }
@@ -1028,29 +1068,42 @@ bool EmitCaseTest2Args(const std::string& TestName,
 
 // Forward declaration.
 // EmitLogicalOperationTest and EmitCaseTest are mutually recursive.
-void EmitCaseTest(const DagInit& d, const char* IndentLevel,
+void EmitCaseTest(const DagInit& d, unsigned IndentLevel,
                   const OptionDescriptions& OptDescs,
                   raw_ostream& O);
 
 /// EmitLogicalOperationTest - Helper function used by
 /// EmitCaseConstructHandler.
 void EmitLogicalOperationTest(const DagInit& d, const char* LogicOp,
-                              const char* IndentLevel,
+                              unsigned IndentLevel,
                               const OptionDescriptions& OptDescs,
                               raw_ostream& O) {
   O << '(';
   for (unsigned j = 0, NumArgs = d.getNumArgs(); j < NumArgs; ++j) {
     const DagInit& InnerTest = InitPtrToDag(d.getArg(j));
     EmitCaseTest(InnerTest, IndentLevel, OptDescs, O);
-    if (j != NumArgs - 1)
-      O << ")\n" << IndentLevel << Indent1 << ' ' << LogicOp << " (";
-    else
+    if (j != NumArgs - 1) {
+      O << ")\n";
+      O.indent(IndentLevel + Indent1) << ' ' << LogicOp << " (";
+    }
+    else {
       O << ')';
+    }
   }
 }
 
+void EmitLogicalNot(const DagInit& d, unsigned IndentLevel,
+                    const OptionDescriptions& OptDescs, raw_ostream& O)
+{
+  checkNumberOfArguments(&d, 1);
+  const DagInit& InnerTest = InitPtrToDag(d.getArg(0));
+  O << "! (";
+  EmitCaseTest(InnerTest, IndentLevel, OptDescs, O);
+  O << ")";
+}
+
 /// EmitCaseTest - Helper function used by EmitCaseConstructHandler.
-void EmitCaseTest(const DagInit& d, const char* IndentLevel,
+void EmitCaseTest(const DagInit& d, unsigned IndentLevel,
                   const OptionDescriptions& OptDescs,
                   raw_ostream& O) {
   const std::string& TestName = d.getOperator()->getAsString();
@@ -1059,6 +1112,10 @@ void EmitCaseTest(const DagInit& d, const char* IndentLevel,
     EmitLogicalOperationTest(d, "&&", IndentLevel, OptDescs, O);
   else if (TestName == "or")
     EmitLogicalOperationTest(d, "||", IndentLevel, OptDescs, O);
+  else if (TestName == "not")
+    EmitLogicalNot(d, IndentLevel, OptDescs, O);
+  else if (EmitCaseTest0Args(TestName, O))
+    return;
   else if (EmitCaseTest1Arg(TestName, d, OptDescs, O))
     return;
   else if (EmitCaseTest2Args(TestName, d, IndentLevel, OptDescs, O))
@@ -1070,9 +1127,9 @@ void EmitCaseTest(const DagInit& d, const char* IndentLevel,
 // Emit code that handles the 'case' construct.
 // Takes a function object that should emit code for every case clause.
 // Callback's type is
-// void F(Init* Statement, const char* IndentLevel, raw_ostream& O).
+// void F(Init* Statement, unsigned IndentLevel, raw_ostream& O).
 template <typename F>
-void EmitCaseConstructHandler(const Init* Dag, const char* IndentLevel,
+void EmitCaseConstructHandler(const Init* Dag, unsigned IndentLevel,
                               F Callback, bool EmitElseIf,
                               const OptionDescriptions& OptDescs,
                               raw_ostream& O) {
@@ -1094,10 +1151,10 @@ void EmitCaseConstructHandler(const Init* Dag, const char* IndentLevel,
       if (i+2 != numArgs)
         throw std::string("The 'default' clause should be the last in the"
                           "'case' construct!");
-      O << IndentLevel << "else {\n";
+      O.indent(IndentLevel) << "else {\n";
     }
     else {
-      O << IndentLevel << ((i != 0 && EmitElseIf) ? "else if (" : "if (");
+      O.indent(IndentLevel) << ((i != 0 && EmitElseIf) ? "else if (" : "if (");
       EmitCaseTest(Test, IndentLevel, OptDescs, O);
       O << ") {\n";
     }
@@ -1112,13 +1169,13 @@ void EmitCaseConstructHandler(const Init* Dag, const char* IndentLevel,
     const DagInit* nd = dynamic_cast<DagInit*>(arg);
     if (nd && (nd->getOperator()->getAsString() == "case")) {
       // Handle the nested 'case'.
-      EmitCaseConstructHandler(nd, (std::string(IndentLevel) + Indent1).c_str(),
+      EmitCaseConstructHandler(nd, (IndentLevel + Indent1),
                                Callback, EmitElseIf, OptDescs, O);
     }
     else {
-      Callback(arg, (std::string(IndentLevel) + Indent1).c_str(), O);
+      Callback(arg, (IndentLevel + Indent1), O);
     }
-    O << IndentLevel << "}\n";
+    O.indent(IndentLevel) << "}\n";
   }
 }
 
@@ -1272,7 +1329,7 @@ StrVector::const_iterator SubstituteSpecialCommands
 /// EmitCmdLineVecFill - Emit code that fills in the command line
 /// vector. Helper function used by EmitGenerateActionMethod().
 void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
-                        bool IsJoin, const char* IndentLevel,
+                        bool IsJoin, unsigned IndentLevel,
                         raw_ostream& O) {
   StrVector StrVec;
   TokenizeCmdline(InitPtrToString(CmdLine), StrVec);
@@ -1295,22 +1352,28 @@ void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
     ++I;
   }
 
+  bool hasINFILE = false;
+
   for (; I != E; ++I) {
     const std::string& cmd = *I;
     assert(!cmd.empty());
-    O << IndentLevel;
+    O.indent(IndentLevel);
     if (cmd.at(0) == '$') {
       if (cmd == "$INFILE") {
-        if (IsJoin)
+        hasINFILE = true;
+        if (IsJoin) {
           O << "for (PathVector::const_iterator B = inFiles.begin()"
-            << ", E = inFiles.end();\n"
-            << IndentLevel << "B != E; ++B)\n"
-            << IndentLevel << Indent1 << "vec.push_back(B->toString());\n";
-        else
-          O << "vec.push_back(inFile.toString());\n";
+            << ", E = inFiles.end();\n";
+          O.indent(IndentLevel) << "B != E; ++B)\n";
+          O.indent(IndentLevel + Indent1) << "vec.push_back(B->str());\n";
+        }
+        else {
+          O << "vec.push_back(inFile.str());\n";
+        }
       }
       else if (cmd == "$OUTFILE") {
-        O << "vec.push_back(out_file);\n";
+        O << "vec.push_back(\"\");\n";
+        O.indent(IndentLevel) << "out_file_index = vec.size()-1;\n";
       }
       else {
         O << "vec.push_back(";
@@ -1322,8 +1385,10 @@ void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
       O << "vec.push_back(\"" << cmd << "\");\n";
     }
   }
-  O << IndentLevel << "cmd = ";
+  if (!hasINFILE)
+    throw "Tool '" + ToolName + "' doesn't take any input!";
 
+  O.indent(IndentLevel) << "cmd = ";
   if (StrVec[0][0] == '$')
     SubstituteSpecialCommands(StrVec.begin(), StrVec.end(), O);
   else
@@ -1341,11 +1406,10 @@ class EmitCmdLineVecFillCallback {
   EmitCmdLineVecFillCallback(bool J, const std::string& TN)
     : IsJoin(J), ToolName(TN) {}
 
-  void operator()(const Init* Statement, const char* IndentLevel,
+  void operator()(const Init* Statement, unsigned IndentLevel,
                   raw_ostream& O) const
   {
-    EmitCmdLineVecFill(Statement, ToolName, IsJoin,
-                       IndentLevel, O);
+    EmitCmdLineVecFill(Statement, ToolName, IsJoin, IndentLevel, O);
   }
 };
 
@@ -1353,53 +1417,56 @@ class EmitCmdLineVecFillCallback {
 /// implement EmitActionHandler. Emits code for
 /// handling the (forward) and (forward_as) option properties.
 void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
-                                            const char* Indent,
+                                            unsigned IndentLevel,
                                             const std::string& NewName,
                                             raw_ostream& O) {
   const std::string& Name = NewName.empty()
     ? ("-" + D.Name)
     : NewName;
+  unsigned IndentLevel1 = IndentLevel + Indent1;
 
   switch (D.Type) {
   case OptionType::Switch:
-    O << Indent << "vec.push_back(\"" << Name << "\");\n";
+    O.indent(IndentLevel) << "vec.push_back(\"" << Name << "\");\n";
     break;
   case OptionType::Parameter:
-    O << Indent << "vec.push_back(\"" << Name << "\");\n";
-    O << Indent << "vec.push_back(" << D.GenVariableName() << ");\n";
+    O.indent(IndentLevel) << "vec.push_back(\"" << Name << "\");\n";
+    O.indent(IndentLevel) << "vec.push_back(" << D.GenVariableName() << ");\n";
     break;
   case OptionType::Prefix:
-    O << Indent << "vec.push_back(\"" << Name << "\" + "
-      << D.GenVariableName() << ");\n";
+    O.indent(IndentLevel) << "vec.push_back(\"" << Name << "\" + "
+                          << D.GenVariableName() << ");\n";
     break;
   case OptionType::PrefixList:
-    O << Indent << "for (" << D.GenTypeDeclaration()
-      << "::iterator B = " << D.GenVariableName() << ".begin(),\n"
-      << Indent << "E = " << D.GenVariableName() << ".end(); B != E;) {\n"
-      << Indent << Indent1 << "vec.push_back(\"" << Name << "\" + "
-      << "*B);\n"
-      << Indent << Indent1 << "++B;\n";
+    O.indent(IndentLevel)
+      << "for (" << D.GenTypeDeclaration()
+      << "::iterator B = " << D.GenVariableName() << ".begin(),\n";
+    O.indent(IndentLevel)
+      << "E = " << D.GenVariableName() << ".end(); B != E;) {\n";
+    O.indent(IndentLevel1) << "vec.push_back(\"" << Name << "\" + " << "*B);\n";
+    O.indent(IndentLevel1) << "++B;\n";
 
     for (int i = 1, j = D.MultiVal; i < j; ++i) {
-      O << Indent << Indent1 << "vec.push_back(*B);\n"
-        << Indent << Indent1 << "++B;\n";
+      O.indent(IndentLevel1) << "vec.push_back(*B);\n";
+      O.indent(IndentLevel1) << "++B;\n";
     }
 
-    O << Indent << "}\n";
+    O.indent(IndentLevel) << "}\n";
     break;
   case OptionType::ParameterList:
-    O << Indent << "for (" << D.GenTypeDeclaration()
-      << "::iterator B = " << D.GenVariableName() << ".begin(),\n"
-      << Indent << "E = " << D.GenVariableName()
-      << ".end() ; B != E;) {\n"
-      << Indent << Indent1 << "vec.push_back(\"" << Name << "\");\n";
+    O.indent(IndentLevel)
+      << "for (" << D.GenTypeDeclaration() << "::iterator B = "
+      << D.GenVariableName() << ".begin(),\n";
+    O.indent(IndentLevel) << "E = " << D.GenVariableName()
+                          << ".end() ; B != E;) {\n";
+    O.indent(IndentLevel1) << "vec.push_back(\"" << Name << "\");\n";
 
     for (int i = 0, j = D.MultiVal; i < j; ++i) {
-      O << Indent << Indent1 << "vec.push_back(*B);\n"
-        << Indent << Indent1 << "++B;\n";
+      O.indent(IndentLevel1) << "vec.push_back(*B);\n";
+      O.indent(IndentLevel1) << "++B;\n";
     }
 
-    O << Indent << "}\n";
+    O.indent(IndentLevel) << "}\n";
     break;
   case OptionType::Alias:
   default:
@@ -1413,7 +1480,7 @@ void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
 class EmitActionHandler {
   const OptionDescriptions& OptDescs;
 
-  void processActionDag(const Init* Statement, const char* IndentLevel,
+  void processActionDag(const Init* Statement, unsigned IndentLevel,
                         raw_ostream& O) const
   {
     const DagInit& Dag = InitPtrToDag(Statement);
@@ -1427,10 +1494,10 @@ class EmitActionHandler {
 
       for (StrVector::const_iterator B = Out.begin(), E = Out.end();
            B != E; ++B)
-        O << IndentLevel << "vec.push_back(\"" << *B << "\");\n";
+        O.indent(IndentLevel) << "vec.push_back(\"" << *B << "\");\n";
     }
     else if (ActionName == "error") {
-      O << IndentLevel << "throw std::runtime_error(\"" <<
+      O.indent(IndentLevel) << "throw std::runtime_error(\"" <<
         (Dag.getNumArgs() >= 1 ? InitPtrToString(Dag.getArg(0))
          : "Unknown error!")
         << "\");\n";
@@ -1451,10 +1518,10 @@ class EmitActionHandler {
     else if (ActionName == "output_suffix") {
       checkNumberOfArguments(&Dag, 1);
       const std::string& OutSuf = InitPtrToString(Dag.getArg(0));
-      O << IndentLevel << "output_suffix = \"" << OutSuf << "\";\n";
+      O.indent(IndentLevel) << "output_suffix = \"" << OutSuf << "\";\n";
     }
     else if (ActionName == "stop_compilation") {
-      O << IndentLevel << "stop_compilation = true;\n";
+      O.indent(IndentLevel) << "stop_compilation = true;\n";
     }
     else if (ActionName == "unpack_values") {
       checkNumberOfArguments(&Dag, 1);
@@ -1464,16 +1531,18 @@ class EmitActionHandler {
       if (D.isMultiVal())
         throw std::string("Can't use unpack_values with multi-valued options!");
 
-      if (OptionType::IsList(D.Type)) {
-        O << IndentLevel << "for (" << D.GenTypeDeclaration()
-          << "::iterator B = " << D.GenVariableName() << ".begin(),\n"
-          << IndentLevel << "E = " << D.GenVariableName()
-          << ".end(); B != E; ++B)\n"
-          << IndentLevel << Indent1 << "llvm::SplitString(*B, vec, \",\");\n";
+      if (D.isList()) {
+        O.indent(IndentLevel)
+          << "for (" << D.GenTypeDeclaration()
+          << "::iterator B = " << D.GenVariableName() << ".begin(),\n";
+        O.indent(IndentLevel)
+          << "E = " << D.GenVariableName() << ".end(); B != E; ++B)\n";
+        O.indent(IndentLevel + Indent1)
+          << "llvm::SplitString(*B, vec, \",\");\n";
       }
-      else if (OptionType::IsParameter(D.Type)){
-        O << Indent3 << "llvm::SplitString("
-          << D.GenVariableName() << ", vec, \",\");\n";
+      else if (D.isParameter()){
+        O.indent(IndentLevel) << "llvm::SplitString("
+                              << D.GenVariableName() << ", vec, \",\");\n";
       }
       else {
         throw "Option '" + D.Name +
@@ -1488,7 +1557,7 @@ class EmitActionHandler {
   EmitActionHandler(const OptionDescriptions& OD)
     : OptDescs(OD) {}
 
-  void operator()(const Init* Statement, const char* IndentLevel,
+  void operator()(const Init* Statement, unsigned IndentLevel,
                   raw_ostream& O) const
   {
     if (typeid(*Statement) == typeid(ListInit)) {
@@ -1503,56 +1572,111 @@ class EmitActionHandler {
   }
 };
 
-// EmitGenerateActionMethod - Emit one of two versions of the
+bool IsOutFileIndexCheckRequiredStr (const Init* CmdLine) {
+  StrVector StrVec;
+  TokenizeCmdline(InitPtrToString(CmdLine), StrVec);
+
+  for (StrVector::const_iterator I = StrVec.begin(), E = StrVec.end();
+       I != E; ++I) {
+    if (*I == "$OUTFILE")
+      return false;
+  }
+
+  return true;
+}
+
+class IsOutFileIndexCheckRequiredStrCallback {
+  bool* ret_;
+
+public:
+  IsOutFileIndexCheckRequiredStrCallback(bool* ret) : ret_(ret)
+  {}
+
+  void operator()(const Init* CmdLine) {
+    if (IsOutFileIndexCheckRequiredStr(CmdLine))
+      *ret_ = true;
+  }
+};
+
+bool IsOutFileIndexCheckRequiredCase (Init* CmdLine) {
+  bool ret = false;
+  WalkCase(CmdLine, Id(), IsOutFileIndexCheckRequiredStrCallback(&ret));
+  return ret;
+}
+
+/// IsOutFileIndexCheckRequired - Should we emit an "out_file_index != -1" check
+/// in EmitGenerateActionMethod() ?
+bool IsOutFileIndexCheckRequired (Init* CmdLine) {
+  if (typeid(*CmdLine) == typeid(StringInit))
+    return IsOutFileIndexCheckRequiredStr(CmdLine);
+  else
+    return IsOutFileIndexCheckRequiredCase(CmdLine);
+}
+
+// EmitGenerateActionMethod - Emit either a normal or a "join" version of the
 // Tool::GenerateAction() method.
 void EmitGenerateActionMethod (const ToolDescription& D,
                                const OptionDescriptions& OptDescs,
                                bool IsJoin, raw_ostream& O) {
   if (IsJoin)
-    O << Indent1 << "Action GenerateAction(const PathVector& inFiles,\n";
+    O.indent(Indent1) << "Action GenerateAction(const PathVector& inFiles,\n";
   else
-    O << Indent1 << "Action GenerateAction(const sys::Path& inFile,\n";
-
-  O << Indent2 << "bool HasChildren,\n"
-    << Indent2 << "const llvm::sys::Path& TempDir,\n"
-    << Indent2 << "const InputLanguagesSet& InLangs,\n"
-    << Indent2 << "const LanguageMap& LangMap) const\n"
-    << Indent1 << "{\n"
-    << Indent2 << "std::string cmd;\n"
-    << Indent2 << "std::vector<std::string> vec;\n"
-    << Indent2 << "bool stop_compilation = !HasChildren;\n"
-    << Indent2 << "const char* output_suffix = \"" << D.OutputSuffix << "\";\n"
-    << Indent2 << "std::string out_file;\n\n";
-
-  // For every understood option, emit handling code.
-  if (D.Actions)
-    EmitCaseConstructHandler(D.Actions, Indent2, EmitActionHandler(OptDescs),
-                             false, OptDescs, O);
-
-  O << '\n' << Indent2
-    << "out_file = OutFilename(" << (IsJoin ? "sys::Path(),\n" : "inFile,\n")
-    << Indent3 << "TempDir, stop_compilation, output_suffix).toString();\n\n";
+    O.indent(Indent1) << "Action GenerateAction(const sys::Path& inFile,\n";
+
+  O.indent(Indent2) << "bool HasChildren,\n";
+  O.indent(Indent2) << "const llvm::sys::Path& TempDir,\n";
+  O.indent(Indent2) << "const InputLanguagesSet& InLangs,\n";
+  O.indent(Indent2) << "const LanguageMap& LangMap) const\n";
+  O.indent(Indent1) << "{\n";
+  O.indent(Indent2) << "std::string cmd;\n";
+  O.indent(Indent2) << "std::vector<std::string> vec;\n";
+  O.indent(Indent2) << "bool stop_compilation = !HasChildren;\n";
+  O.indent(Indent2) << "const char* output_suffix = \""
+                    << D.OutputSuffix << "\";\n";
 
-  // cmd_line is either a string or a 'case' construct.
   if (!D.CmdLine)
     throw "Tool " + D.Name + " has no cmd_line property!";
-  else if (typeid(*D.CmdLine) == typeid(StringInit))
+
+  bool IndexCheckRequired = IsOutFileIndexCheckRequired(D.CmdLine);
+  O.indent(Indent2) << "int out_file_index"
+                    << (IndexCheckRequired ? " = -1" : "")
+                    << ";\n\n";
+
+  // Process the cmd_line property.
+  if (typeid(*D.CmdLine) == typeid(StringInit))
     EmitCmdLineVecFill(D.CmdLine, D.Name, IsJoin, Indent2, O);
   else
     EmitCaseConstructHandler(D.CmdLine, Indent2,
                              EmitCmdLineVecFillCallback(IsJoin, D.Name),
                              true, OptDescs, O);
 
+  // For every understood option, emit handling code.
+  if (D.Actions)
+    EmitCaseConstructHandler(D.Actions, Indent2, EmitActionHandler(OptDescs),
+                             false, OptDescs, O);
+
+  O << '\n';
+  O.indent(Indent2)
+    << "std::string out_file = OutFilename("
+    << (IsJoin ? "sys::Path(),\n" : "inFile,\n");
+  O.indent(Indent3) << "TempDir, stop_compilation, output_suffix).str();\n\n";
+
+  if (IndexCheckRequired)
+    O.indent(Indent2) << "if (out_file_index != -1)\n";
+  O.indent(IndexCheckRequired ? Indent3 : Indent2)
+    << "vec[out_file_index] = out_file;\n";
+
   // Handle the Sink property.
   if (D.isSink()) {
-    O << Indent2 << "if (!" << SinkOptionName << ".empty()) {\n"
-      << Indent3 << "vec.insert(vec.end(), "
-      << SinkOptionName << ".begin(), " << SinkOptionName << ".end());\n"
-      << Indent2 << "}\n";
+    O.indent(Indent2) << "if (!" << SinkOptionName << ".empty()) {\n";
+    O.indent(Indent3) << "vec.insert(vec.end(), "
+                      << SinkOptionName << ".begin(), " << SinkOptionName
+                      << ".end());\n";
+    O.indent(Indent2) << "}\n";
   }
 
-  O << Indent2 << "return Action(cmd, vec, stop_compilation, out_file);\n"
-    << Indent1 << "}\n\n";
+  O.indent(Indent2) << "return Action(cmd, vec, stop_compilation, out_file);\n";
+  O.indent(Indent1) << "}\n\n";
 }
 
 /// EmitGenerateActionMethods - Emit two GenerateAction() methods for
@@ -1560,18 +1684,20 @@ void EmitGenerateActionMethod (const ToolDescription& D,
 void EmitGenerateActionMethods (const ToolDescription& ToolDesc,
                                 const OptionDescriptions& OptDescs,
                                 raw_ostream& O) {
-  if (!ToolDesc.isJoin())
-    O << Indent1 << "Action GenerateAction(const PathVector& inFiles,\n"
-      << Indent2 << "bool HasChildren,\n"
-      << Indent2 << "const llvm::sys::Path& TempDir,\n"
-      << Indent2 << "const InputLanguagesSet& InLangs,\n"
-      << Indent2 << "const LanguageMap& LangMap) const\n"
-      << Indent1 << "{\n"
-      << Indent2 << "throw std::runtime_error(\"" << ToolDesc.Name
-      << " is not a Join tool!\");\n"
-      << Indent1 << "}\n\n";
-  else
+  if (!ToolDesc.isJoin()) {
+    O.indent(Indent1) << "Action GenerateAction(const PathVector& inFiles,\n";
+    O.indent(Indent2) << "bool HasChildren,\n";
+    O.indent(Indent2) << "const llvm::sys::Path& TempDir,\n";
+    O.indent(Indent2) << "const InputLanguagesSet& InLangs,\n";
+    O.indent(Indent2) << "const LanguageMap& LangMap) const\n";
+    O.indent(Indent1) << "{\n";
+    O.indent(Indent2) << "throw std::runtime_error(\"" << ToolDesc.Name
+                      << " is not a Join tool!\");\n";
+    O.indent(Indent1) << "}\n\n";
+  }
+  else {
     EmitGenerateActionMethod(ToolDesc, OptDescs, true, O);
+  }
 
   EmitGenerateActionMethod(ToolDesc, OptDescs, false, O);
 }
@@ -1579,34 +1705,34 @@ void EmitGenerateActionMethods (const ToolDescription& ToolDesc,
 /// EmitInOutLanguageMethods - Emit the [Input,Output]Language()
 /// methods for a given Tool class.
 void EmitInOutLanguageMethods (const ToolDescription& D, raw_ostream& O) {
-  O << Indent1 << "const char** InputLanguages() const {\n"
-    << Indent2 << "return InputLanguages_;\n"
-    << Indent1 << "}\n\n";
+  O.indent(Indent1) << "const char** InputLanguages() const {\n";
+  O.indent(Indent2) << "return InputLanguages_;\n";
+  O.indent(Indent1) << "}\n\n";
 
   if (D.OutLanguage.empty())
     throw "Tool " + D.Name + " has no 'out_language' property!";
 
-  O << Indent1 << "const char* OutputLanguage() const {\n"
-    << Indent2 << "return \"" << D.OutLanguage << "\";\n"
-    << Indent1 << "}\n\n";
+  O.indent(Indent1) << "const char* OutputLanguage() const {\n";
+  O.indent(Indent2) << "return \"" << D.OutLanguage << "\";\n";
+  O.indent(Indent1) << "}\n\n";
 }
 
 /// EmitNameMethod - Emit the Name() method for a given Tool class.
 void EmitNameMethod (const ToolDescription& D, raw_ostream& O) {
-  O << Indent1 << "const char* Name() const {\n"
-    << Indent2 << "return \"" << D.Name << "\";\n"
-    << Indent1 << "}\n\n";
+  O.indent(Indent1) << "const char* Name() const {\n";
+  O.indent(Indent2) << "return \"" << D.Name << "\";\n";
+  O.indent(Indent1) << "}\n\n";
 }
 
 /// EmitIsJoinMethod - Emit the IsJoin() method for a given Tool
 /// class.
 void EmitIsJoinMethod (const ToolDescription& D, raw_ostream& O) {
-  O << Indent1 << "bool IsJoin() const {\n";
+  O.indent(Indent1) << "bool IsJoin() const {\n";
   if (D.isJoin())
-    O << Indent2 << "return true;\n";
+    O.indent(Indent2) << "return true;\n";
   else
-    O << Indent2 << "return false;\n";
-  O << Indent1 << "}\n\n";
+    O.indent(Indent2) << "return false;\n";
+  O.indent(Indent1) << "}\n\n";
 }
 
 /// EmitStaticMemberDefinitions - Emit static member definitions for a
@@ -1636,8 +1762,8 @@ void EmitToolClassDefinition (const ToolDescription& D,
   else
     O << "Tool";
 
-  O << "{\nprivate:\n"
-    << Indent1 << "static const char* InputLanguages_[];\n\n";
+  O << "{\nprivate:\n";
+  O.indent(Indent1) << "static const char* InputLanguages_[];\n\n";
 
   O << "public:\n";
   EmitNameMethod(D, O);
@@ -1687,15 +1813,15 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
       O << ", cl::Prefix";
 
     if (val.isRequired()) {
-      if (OptionType::IsList(val.Type) && !val.isMultiVal())
+      if (val.isList() && !val.isMultiVal())
         O << ", cl::OneOrMore";
       else
         O << ", cl::Required";
     }
-    else if (val.isOneOrMore() && OptionType::IsList(val.Type)) {
+    else if (val.isOneOrMore() && val.isList()) {
         O << ", cl::OneOrMore";
     }
-    else if (val.isZeroOrOne() && OptionType::IsList(val.Type)) {
+    else if (val.isZeroOrOne() && val.isList()) {
         O << ", cl::ZeroOrOne";
     }
 
@@ -1707,7 +1833,12 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
     }
 
     if (val.MultiVal > 1)
-      O << ", cl::multi_val(" << val.MultiVal << ")";
+      O << ", cl::multi_val(" << val.MultiVal << ')';
+
+    if (val.InitVal) {
+      const std::string& str = val.InitVal->getAsString();
+      O << ", cl::init(" << str << ')';
+    }
 
     if (!val.Help.empty())
       O << ", cl::desc(\"" << val.Help << "\")";
@@ -1762,9 +1893,9 @@ void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
       const ListInit* Suffixes = LangToSuffixes->getValueAsListInit("suffixes");
 
       for (unsigned i = 0; i < Suffixes->size(); ++i)
-        O << Indent1 << "langMap[\""
-          << InitPtrToString(Suffixes->getElement(i))
-          << "\"] = \"" << Lang << "\";\n";
+        O.indent(Indent1) << "langMap[\""
+                          << InitPtrToString(Suffixes->getElement(i))
+                          << "\"] = \"" << Lang << "\";\n";
     }
   }
 
@@ -1773,21 +1904,22 @@ void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
 
 /// IncDecWeight - Helper function passed to EmitCaseConstructHandler()
 /// by EmitEdgeClass().
-void IncDecWeight (const Init* i, const char* IndentLevel,
+void IncDecWeight (const Init* i, unsigned IndentLevel,
                    raw_ostream& O) {
   const DagInit& d = InitPtrToDag(i);
   const std::string& OpName = d.getOperator()->getAsString();
 
   if (OpName == "inc_weight") {
-    O << IndentLevel << "ret += ";
+    O.indent(IndentLevel) << "ret += ";
   }
   else if (OpName == "dec_weight") {
-    O << IndentLevel << "ret -= ";
+    O.indent(IndentLevel) << "ret -= ";
   }
   else if (OpName == "error") {
-    O << IndentLevel << "throw std::runtime_error(\"" <<
-        (d.getNumArgs() >= 1 ? InitPtrToString(d.getArg(0))
-         : "Unknown error!")
+    O.indent(IndentLevel)
+      << "throw std::runtime_error(\"" <<
+      (d.getNumArgs() >= 1 ? InitPtrToString(d.getArg(0))
+       : "Unknown error!")
       << "\");\n";
     return;
   }
@@ -1810,19 +1942,20 @@ void EmitEdgeClass (unsigned N, const std::string& Target,
 
   // Class constructor.
   O << "class Edge" << N << ": public Edge {\n"
-    << "public:\n"
-    << Indent1 << "Edge" << N << "() : Edge(\"" << Target
-    << "\") {}\n\n"
+    << "public:\n";
+  O.indent(Indent1) << "Edge" << N << "() : Edge(\"" << Target
+                    << "\") {}\n\n";
 
   // Function Weight().
-    << Indent1 << "unsigned Weight(const InputLanguagesSet& InLangs) const {\n"
-    << Indent2 << "unsigned ret = 0;\n";
+  O.indent(Indent1)
+    << "unsigned Weight(const InputLanguagesSet& InLangs) const {\n";
+  O.indent(Indent2) << "unsigned ret = 0;\n";
 
   // Handle the 'case' construct.
   EmitCaseConstructHandler(Case, Indent2, IncDecWeight, false, OptDescs, O);
 
-  O << Indent2 << "return ret;\n"
-    << Indent1 << "};\n\n};\n\n";
+  O.indent(Indent2) << "return ret;\n";
+  O.indent(Indent1) << "};\n\n};\n\n";
 }
 
 /// EmitEdgeClasses - Emit Edge* classes that represent graph edges.
@@ -1852,7 +1985,7 @@ void EmitPopulateCompilationGraph (const RecordVector& EdgeVector,
 
   for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
          E = ToolDescs.end(); B != E; ++B)
-    O << Indent1 << "G.insertNode(new " << (*B)->Name << "());\n";
+    O.indent(Indent1) << "G.insertNode(new " << (*B)->Name << "());\n";
 
   O << '\n';
 
@@ -1866,7 +1999,7 @@ void EmitPopulateCompilationGraph (const RecordVector& EdgeVector,
     const std::string& NodeB = Edge->getValueAsString("b");
     DagInit* Weight = Edge->getValueAsDag("weight");
 
-    O << Indent1 << "G.insertEdge(\"" << NodeA << "\", ";
+    O.indent(Indent1) << "G.insertEdge(\"" << NodeA << "\", ";
 
     if (isDagEmpty(Weight))
       O << "new SimpleEdge(\"" << NodeB << "\")";
@@ -1955,7 +2088,7 @@ void EmitHookDeclarations(const ToolDescriptions& ToolDescs, raw_ostream& O) {
   O << "namespace hooks {\n";
   for (StringMap<unsigned>::const_iterator B = HookNames.begin(),
          E = HookNames.end(); B != E; ++B) {
-    O << Indent1 << "std::string " << B->first() << "(";
+    O.indent(Indent1) << "std::string " << B->first() << "(";
 
     for (unsigned i = 0, j = B->second; i < j; ++i) {
       O << "const char* Arg" << i << (i+1 == j ? "" : ", ");
@@ -1968,22 +2101,23 @@ void EmitHookDeclarations(const ToolDescriptions& ToolDescs, raw_ostream& O) {
 
 /// EmitRegisterPlugin - Emit code to register this plugin.
 void EmitRegisterPlugin(int Priority, raw_ostream& O) {
-  O << "struct Plugin : public llvmc::BasePlugin {\n\n"
-    << Indent1 << "int Priority() const { return " << Priority << "; }\n\n"
-    << Indent1 << "void PopulateLanguageMap(LanguageMap& langMap) const\n"
-    << Indent1 << "{ PopulateLanguageMapLocal(langMap); }\n\n"
-    << Indent1
-    << "void PopulateCompilationGraph(CompilationGraph& graph) const\n"
-    << Indent1 << "{ PopulateCompilationGraphLocal(graph); }\n"
-    << "};\n\n"
-
-    << "static llvmc::RegisterPlugin<Plugin> RP;\n\n";
+  O << "struct Plugin : public llvmc::BasePlugin {\n\n";
+  O.indent(Indent1) << "int Priority() const { return "
+                    << Priority << "; }\n\n";
+  O.indent(Indent1) << "void PopulateLanguageMap(LanguageMap& langMap) const\n";
+  O.indent(Indent1) << "{ PopulateLanguageMapLocal(langMap); }\n\n";
+  O.indent(Indent1)
+    << "void PopulateCompilationGraph(CompilationGraph& graph) const\n";
+  O.indent(Indent1) << "{ PopulateCompilationGraphLocal(graph); }\n"
+                    << "};\n\n"
+                    << "static llvmc::RegisterPlugin<Plugin> RP;\n\n";
 }
 
 /// EmitIncludes - Emit necessary #include directives and some
 /// additional declarations.
 void EmitIncludes(raw_ostream& O) {
-  O << "#include \"llvm/CompilerDriver/CompilationGraph.h\"\n"
+  O << "#include \"llvm/CompilerDriver/BuiltinOptions.h\"\n"
+    << "#include \"llvm/CompilerDriver/CompilationGraph.h\"\n"
     << "#include \"llvm/CompilerDriver/ForceLinkageMacros.h\"\n"
     << "#include \"llvm/CompilerDriver/Plugin.h\"\n"
     << "#include \"llvm/CompilerDriver/Tool.h\"\n\n"
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index 8f31624644f6..a551166a9cd2 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -1319,6 +1319,8 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
   if (PrintSem) OS << ";\n";
 }
 
+unsigned Record::LastID = 0;
+
 void Record::setName(const std::string &Name) {
   if (Records.getDef(getName()) == this) {
     Records.removeDef(getName());
@@ -1382,11 +1384,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
 /// getValueInit - Return the initializer for a value with the specified name,
 /// or throw an exception if the field does not exist.
 ///
-Init *Record::getValueInit(const std::string &FieldName) const {
+Init *Record::getValueInit(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-      FieldName + "'!\n";
+      FieldName.str() + "'!\n";
   return R->getValue();
 }
 
@@ -1395,15 +1397,15 @@ Init *Record::getValueInit(const std::string &FieldName) const {
 /// value as a string, throwing an exception if the field does not exist or if
 /// the value is not a string.
 ///
-std::string Record::getValueAsString(const std::string &FieldName) const {
+std::string Record::getValueAsString(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-          FieldName + "'!\n";
+          FieldName.str() + "'!\n";
 
   if (const StringInit *SI = dynamic_cast<const StringInit*>(R->getValue()))
     return SI->getValue();
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a string initializer!";
 }
 
@@ -1411,15 +1413,15 @@ std::string Record::getValueAsString(const std::string &FieldName) const {
 /// its value as a BitsInit, throwing an exception if the field does not exist
 /// or if the value is not the right type.
 ///
-BitsInit *Record::getValueAsBitsInit(const std::string &FieldName) const {
+BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-          FieldName + "'!\n";
+          FieldName.str() + "'!\n";
 
   if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue()))
     return BI;
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a BitsInit initializer!";
 }
 
@@ -1427,15 +1429,15 @@ BitsInit *Record::getValueAsBitsInit(const std::string &FieldName) const {
 /// its value as a ListInit, throwing an exception if the field does not exist
 /// or if the value is not the right type.
 ///
-ListInit *Record::getValueAsListInit(const std::string &FieldName) const {
+ListInit *Record::getValueAsListInit(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-          FieldName + "'!\n";
+          FieldName.str() + "'!\n";
 
   if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue()))
     return LI;
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a list initializer!";
 }
 
@@ -1444,14 +1446,14 @@ ListInit *Record::getValueAsListInit(const std::string &FieldName) const {
 /// not exist or if the value is not the right type.
 ///
 std::vector<Record*> 
-Record::getValueAsListOfDefs(const std::string &FieldName) const {
+Record::getValueAsListOfDefs(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<Record*> Defs;
   for (unsigned i = 0; i < List->getSize(); i++) {
     if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) {
       Defs.push_back(DI->getDef());
     } else {
-      throw "Record `" + getName() + "', field `" + FieldName +
+      throw "Record `" + getName() + "', field `" + FieldName.str() +
             "' list is not entirely DefInit!";
     }
   }
@@ -1462,15 +1464,15 @@ Record::getValueAsListOfDefs(const std::string &FieldName) const {
 /// value as an int64_t, throwing an exception if the field does not exist or if
 /// the value is not the right type.
 ///
-int64_t Record::getValueAsInt(const std::string &FieldName) const {
+int64_t Record::getValueAsInt(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-          FieldName + "'!\n";
+          FieldName.str() + "'!\n";
 
   if (IntInit *II = dynamic_cast<IntInit*>(R->getValue()))
     return II->getValue();
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have an int initializer!";
 }
 
@@ -1479,14 +1481,14 @@ int64_t Record::getValueAsInt(const std::string &FieldName) const {
 /// not exist or if the value is not the right type.
 ///
 std::vector<int64_t> 
-Record::getValueAsListOfInts(const std::string &FieldName) const {
+Record::getValueAsListOfInts(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<int64_t> Ints;
   for (unsigned i = 0; i < List->getSize(); i++) {
     if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) {
       Ints.push_back(II->getValue());
     } else {
-      throw "Record `" + getName() + "', field `" + FieldName +
+      throw "Record `" + getName() + "', field `" + FieldName.str() +
             "' does not have a list of ints initializer!";
     }
   }
@@ -1497,15 +1499,15 @@ Record::getValueAsListOfInts(const std::string &FieldName) const {
 /// value as a Record, throwing an exception if the field does not exist or if
 /// the value is not the right type.
 ///
-Record *Record::getValueAsDef(const std::string &FieldName) const {
+Record *Record::getValueAsDef(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-      FieldName + "'!\n";
+      FieldName.str() + "'!\n";
 
   if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue()))
     return DI->getDef();
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a def initializer!";
 }
 
@@ -1513,15 +1515,15 @@ Record *Record::getValueAsDef(const std::string &FieldName) const {
 /// value as a bit, throwing an exception if the field does not exist or if
 /// the value is not the right type.
 ///
-bool Record::getValueAsBit(const std::string &FieldName) const {
+bool Record::getValueAsBit(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-      FieldName + "'!\n";
+      FieldName.str() + "'!\n";
 
   if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
     return BI->getValue();
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a bit initializer!";
 }
 
@@ -1529,27 +1531,27 @@ bool Record::getValueAsBit(const std::string &FieldName) const {
 /// value as an Dag, throwing an exception if the field does not exist or if
 /// the value is not the right type.
 ///
-DagInit *Record::getValueAsDag(const std::string &FieldName) const {
+DagInit *Record::getValueAsDag(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-      FieldName + "'!\n";
+      FieldName.str() + "'!\n";
 
   if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue()))
     return DI;
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a dag initializer!";
 }
 
-std::string Record::getValueAsCode(const std::string &FieldName) const {
+std::string Record::getValueAsCode(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (R == 0 || R->getValue() == 0)
     throw "Record `" + getName() + "' does not have a field named `" +
-      FieldName + "'!\n";
+      FieldName.str() + "'!\n";
   
   if (const CodeInit *CI = dynamic_cast<const CodeInit*>(R->getValue()))
     return CI->getValue();
-  throw "Record `" + getName() + "', field `" + FieldName +
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
     "' does not have a code initializer!";
 }
 
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 11db910328a4..1b33743e1411 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -782,7 +782,7 @@ public:
   // Clone - Clone this operator, replacing arguments with the new list
   virtual OpInit *clone(std::vector<Init *> &Operands) = 0;
 
-  virtual int getNumOperands(void) const = 0;
+  virtual int getNumOperands() const = 0;
   virtual Init *getOperand(int i) = 0;
 
   // Fold - If possible, fold this to a simpler init.  Return this if not
@@ -820,7 +820,7 @@ public:
     return new UnOpInit(getOpcode(), *Operands.begin(), getType());
   }
 
-  int getNumOperands(void) const { return 1; }
+  int getNumOperands() const { return 1; }
   Init *getOperand(int i) {
     assert(i == 0 && "Invalid operand id for unary operator");
     return getOperand();
@@ -864,7 +864,7 @@ public:
     return new BinOpInit(getOpcode(), Operands[0], Operands[1], getType());
   }
 
-  int getNumOperands(void) const { return 2; }
+  int getNumOperands() const { return 2; }
   Init *getOperand(int i) {
     assert((i == 0 || i == 1) && "Invalid operand id for binary operator");
     if (i == 0) {
@@ -909,7 +909,7 @@ public:
                           getType());
   }
 
-  int getNumOperands(void) const { return 3; }
+  int getNumOperands() const { return 3; }
   Init *getOperand(int i) {
     assert((i == 0 || i == 1 || i == 2) &&
            "Invalid operand id for ternary operator");
@@ -1220,6 +1220,10 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecordVal &RV) {
 }
 
 class Record {
+  static unsigned LastID;
+
+  // Unique record ID.
+  unsigned ID;
   std::string Name;
   SMLoc Loc;
   std::vector<std::string> TemplateArgs;
@@ -1227,9 +1231,12 @@ class Record {
   std::vector<Record*> SuperClasses;
 public:
 
-  explicit Record(const std::string &N, SMLoc loc) : Name(N), Loc(loc) {}
+  explicit Record(const std::string &N, SMLoc loc) : 
+    ID(LastID++), Name(N), Loc(loc) {}
   ~Record() {}
   
+  unsigned getID() const { return ID; }
+
   const std::string &getName() const { return Name; }
   void setName(const std::string &Name);  // Also updates RecordKeeper.
   
@@ -1241,24 +1248,24 @@ public:
   const std::vector<RecordVal> &getValues() const { return Values; }
   const std::vector<Record*>   &getSuperClasses() const { return SuperClasses; }
 
-  bool isTemplateArg(const std::string &Name) const {
+  bool isTemplateArg(StringRef Name) const {
     for (unsigned i = 0, e = TemplateArgs.size(); i != e; ++i)
       if (TemplateArgs[i] == Name) return true;
     return false;
   }
 
-  const RecordVal *getValue(const std::string &Name) const {
+  const RecordVal *getValue(StringRef Name) const {
     for (unsigned i = 0, e = Values.size(); i != e; ++i)
       if (Values[i].getName() == Name) return &Values[i];
     return 0;
   }
-  RecordVal *getValue(const std::string &Name) {
+  RecordVal *getValue(StringRef Name) {
     for (unsigned i = 0, e = Values.size(); i != e; ++i)
       if (Values[i].getName() == Name) return &Values[i];
     return 0;
   }
 
-  void addTemplateArg(const std::string &Name) {
+  void addTemplateArg(StringRef Name) {
     assert(!isTemplateArg(Name) && "Template arg already defined!");
     TemplateArgs.push_back(Name);
   }
@@ -1268,7 +1275,7 @@ public:
     Values.push_back(RV);
   }
 
-  void removeValue(const std::string &Name) {
+  void removeValue(StringRef Name) {
     assert(getValue(Name) && "Cannot remove an entry that does not exist!");
     for (unsigned i = 0, e = Values.size(); i != e; ++i)
       if (Values[i].getName() == Name) {
@@ -1285,7 +1292,7 @@ public:
     return false;
   }
 
-  bool isSubClassOf(const std::string &Name) const {
+  bool isSubClassOf(StringRef Name) const {
     for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
       if (SuperClasses[i]->getName() == Name)
         return true;
@@ -1316,67 +1323,67 @@ public:
   /// getValueInit - Return the initializer for a value with the specified name,
   /// or throw an exception if the field does not exist.
   ///
-  Init *getValueInit(const std::string &FieldName) const;
+  Init *getValueInit(StringRef FieldName) const;
 
   /// getValueAsString - This method looks up the specified field and returns
   /// its value as a string, throwing an exception if the field does not exist
   /// or if the value is not a string.
   ///
-  std::string getValueAsString(const std::string &FieldName) const;
+  std::string getValueAsString(StringRef FieldName) const;
 
   /// getValueAsBitsInit - This method looks up the specified field and returns
   /// its value as a BitsInit, throwing an exception if the field does not exist
   /// or if the value is not the right type.
   ///
-  BitsInit *getValueAsBitsInit(const std::string &FieldName) const;
+  BitsInit *getValueAsBitsInit(StringRef FieldName) const;
 
   /// getValueAsListInit - This method looks up the specified field and returns
   /// its value as a ListInit, throwing an exception if the field does not exist
   /// or if the value is not the right type.
   ///
-  ListInit *getValueAsListInit(const std::string &FieldName) const;
+  ListInit *getValueAsListInit(StringRef FieldName) const;
 
   /// getValueAsListOfDefs - This method looks up the specified field and
   /// returns its value as a vector of records, throwing an exception if the
   /// field does not exist or if the value is not the right type.
   ///
-  std::vector<Record*> getValueAsListOfDefs(const std::string &FieldName) const;
+  std::vector<Record*> getValueAsListOfDefs(StringRef FieldName) const;
 
   /// getValueAsListOfInts - This method looks up the specified field and returns
   /// its value as a vector of integers, throwing an exception if the field does
   /// not exist or if the value is not the right type.
   ///
-  std::vector<int64_t> getValueAsListOfInts(const std::string &FieldName) const;
+  std::vector<int64_t> getValueAsListOfInts(StringRef FieldName) const;
   
   /// getValueAsDef - This method looks up the specified field and returns its
   /// value as a Record, throwing an exception if the field does not exist or if
   /// the value is not the right type.
   ///
-  Record *getValueAsDef(const std::string &FieldName) const;
+  Record *getValueAsDef(StringRef FieldName) const;
 
   /// getValueAsBit - This method looks up the specified field and returns its
   /// value as a bit, throwing an exception if the field does not exist or if
   /// the value is not the right type.
   ///
-  bool getValueAsBit(const std::string &FieldName) const;
+  bool getValueAsBit(StringRef FieldName) const;
 
   /// getValueAsInt - This method looks up the specified field and returns its
   /// value as an int64_t, throwing an exception if the field does not exist or
   /// if the value is not the right type.
   ///
-  int64_t getValueAsInt(const std::string &FieldName) const;
+  int64_t getValueAsInt(StringRef FieldName) const;
 
   /// getValueAsDag - This method looks up the specified field and returns its
   /// value as an Dag, throwing an exception if the field does not exist or if
   /// the value is not the right type.
   ///
-  DagInit *getValueAsDag(const std::string &FieldName) const;
+  DagInit *getValueAsDag(StringRef FieldName) const;
   
   /// getValueAsCode - This method looks up the specified field and returns
   /// its value as the string data in a CodeInit, throwing an exception if the
   /// field does not exist or if the value is not a code object.
   ///
-  std::string getValueAsCode(const std::string &FieldName) const;
+  std::string getValueAsCode(StringRef FieldName) const;
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const Record &R);
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 3297e936a2b6..3c7b44a1e003 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -222,7 +222,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     // Emit the register list now.
     OS << "  // " << Name 
        << " Register Class Value Types...\n"
-       << "  static const MVT " << Name
+       << "  static const EVT " << Name
        << "[] = {\n    ";
     for (unsigned i = 0, e = RC.VTs.size(); i != e; ++i)
       OS << getEnumName(RC.VTs[i]) << ", ";
@@ -252,7 +252,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       OS << "  // " << Name
          << " Sub-register Classes...\n"
          << "  static const TargetRegisterClass* const "
-         << Name << "SubRegClasses [] = {\n    ";
+         << Name << "SubRegClasses[] = {\n    ";
 
       bool Empty = true;
 
@@ -298,7 +298,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       OS << "  // " << Name
          << " Super-register Classes...\n"
          << "  static const TargetRegisterClass* const "
-         << Name << "SuperRegClasses [] = {\n    ";
+         << Name << "SuperRegClasses[] = {\n    ";
 
       bool Empty = true;
       std::map<unsigned, std::set<unsigned> >::iterator I =
@@ -334,7 +334,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       OS << "  // " << Name 
          << " Register Class sub-classes...\n"
          << "  static const TargetRegisterClass* const "
-         << Name << "Subclasses [] = {\n    ";
+         << Name << "Subclasses[] = {\n    ";
 
       bool Empty = true;
       for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) {
@@ -382,7 +382,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       OS << "  // " << Name 
          << " Register Class super-classes...\n"
          << "  static const TargetRegisterClass* const "
-         << Name << "Superclasses [] = {\n    ";
+         << Name << "Superclasses[] = {\n    ";
 
       bool Empty = true;
       std::map<unsigned, std::set<unsigned> >::iterator I =
@@ -767,7 +767,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   }
 
   OS<<"\n  const TargetRegisterDesc RegisterDescriptors[] = { // Descriptors\n";
-  OS << "    { \"NOREG\",\t\"NOREG\",\t0,\t0,\t0 },\n";
+  OS << "    { \"NOREG\",\t0,\t0,\t0 },\n";
 
   // Now that register alias and sub-registers sets have been emitted, emit the
   // register descriptors now.
@@ -775,11 +775,6 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
     const CodeGenRegister &Reg = Registers[i];
     OS << "    { \"";
-    if (!Reg.TheDef->getValueAsString("AsmName").empty())
-      OS << Reg.TheDef->getValueAsString("AsmName");
-    else
-      OS << Reg.getName();
-    OS << "\",\t\"";
     OS << Reg.getName() << "\",\t";
     if (RegisterAliases.count(Reg.TheDef))
       OS << Reg.getName() << "_AliasSet,\t";
diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h
new file mode 100644
index 000000000000..d9d7cf485efd
--- /dev/null
+++ b/utils/TableGen/StringToOffsetTable.h
@@ -0,0 +1,76 @@
+//===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TBLGEN_STRING_TO_OFFSET_TABLE_H
+#define TBLGEN_STRING_TO_OFFSET_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace llvm {
+
+/// StringToOffsetTable - This class uniques a bunch of nul-terminated strings
+/// and keeps track of their offset in a massive contiguous string allocation.
+/// It can then output this string blob and use indexes into the string to
+/// reference each piece.
+class StringToOffsetTable {
+  StringMap<unsigned> StringOffset;
+  std::string AggregateString;
+public:
+  
+  unsigned GetOrAddStringOffset(StringRef Str) {
+    unsigned &Entry = StringOffset[Str];
+    if (Entry == 0) {
+      // Add the string to the aggregate if this is the first time found.
+      Entry = AggregateString.size();
+      AggregateString.append(Str.begin(), Str.end());
+      AggregateString += '\0';
+    }
+    
+    return Entry;
+  }
+  
+  void EmitString(raw_ostream &O) {
+    O << "    \"";
+    unsigned CharsPrinted = 0;
+    EscapeString(AggregateString);
+    for (unsigned i = 0, e = AggregateString.size(); i != e; ++i) {
+      if (CharsPrinted > 70) {
+        O << "\"\n    \"";
+        CharsPrinted = 0;
+      }
+      O << AggregateString[i];
+      ++CharsPrinted;
+      
+      // Print escape sequences all together.
+      if (AggregateString[i] != '\\')
+        continue;
+      
+      assert(i+1 < AggregateString.size() && "Incomplete escape sequence!");
+      if (isdigit(AggregateString[i+1])) {
+        assert(isdigit(AggregateString[i+2]) && 
+               isdigit(AggregateString[i+3]) &&
+               "Expected 3 digit octal escape!");
+        O << AggregateString[++i];
+        O << AggregateString[++i];
+        O << AggregateString[++i];
+        CharsPrinted += 3;
+      } else {
+        O << AggregateString[++i];
+        ++CharsPrinted;
+      }
+    }
+    O << "\"";
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 919ac664efa0..c8cf234ca464 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -199,12 +199,13 @@ unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS,
 }
 
 //
-// FormItineraryString - Compose a string containing the data initialization
-// for the specified itinerary.  N is the number of stages.
+// FormItineraryStageString - Compose a string containing the stage
+// data initialization for the specified itinerary.  N is the number
+// of stages.
 //
-void SubtargetEmitter::FormItineraryString(Record *ItinData,
-                                           std::string &ItinString,
-                                           unsigned &NStages) {
+void SubtargetEmitter::FormItineraryStageString(Record *ItinData,
+                                                std::string &ItinString,
+                                                unsigned &NStages) {
   // Get states list
   const std::vector<Record*> &StageList =
     ItinData->getValueAsListOfDefs("Stages");
@@ -215,7 +216,7 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
     // Next stage
     const Record *Stage = StageList[i];
   
-    // Form string as ,{ cycles, u1 | u2 | ... | un }
+    // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
     int Cycles = Stage->getValueAsInt("Cycles");
     ItinString += "  { " + itostr(Cycles) + ", ";
     
@@ -229,6 +230,9 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
       if (++j < M) ItinString += " | ";
     }
     
+    int TimeInc = Stage->getValueAsInt("TimeInc");
+    ItinString += ", " + itostr(TimeInc);
+
     // Close off stage
     ItinString += " }";
     if (++i < N) ItinString += ", ";
@@ -236,10 +240,32 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
 }
 
 //
-// EmitStageData - Generate unique itinerary stages.  Record itineraries for 
-// processors.
+// FormItineraryOperandCycleString - Compose a string containing the
+// operand cycle initialization for the specified itinerary.  N is the
+// number of operands that has cycles specified.
+//
+void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData,
+                         std::string &ItinString, unsigned &NOperandCycles) {
+  // Get operand cycle list
+  const std::vector<int64_t> &OperandCycleList =
+    ItinData->getValueAsListOfInts("OperandCycles");
+
+  // For each operand cycle
+  unsigned N = NOperandCycles = OperandCycleList.size();
+  for (unsigned i = 0; i < N;) {
+    // Next operand cycle
+    const int OCycle = OperandCycleList[i];
+  
+    ItinString += "  " + itostr(OCycle);
+    if (++i < N) ItinString += ", ";
+  }
+}
+
+//
+// EmitStageAndOperandCycleData - Generate unique itinerary stages and
+// operand cycle tables.  Record itineraries for processors.
 //
-void SubtargetEmitter::EmitStageData(raw_ostream &OS,
+void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
        unsigned NItinClasses,
        std::map<std::string, unsigned> &ItinClassesMap, 
        std::vector<std::vector<InstrItinerary> > &ProcList) {
@@ -251,12 +277,16 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
   if (ProcItinList.size() < 2) return;
 
   // Begin stages table
-  OS << "static const llvm::InstrStage Stages[] = {\n"
-        "  { 0, 0 }, // No itinerary\n";
+  std::string StageTable = "static const llvm::InstrStage Stages[] = {\n";
+  StageTable += "  { 0, 0, 0 }, // No itinerary\n";
         
-  unsigned StageCount = 1;
-  unsigned ItinEnum = 1;
-  std::map<std::string, unsigned> ItinMap;
+  // Begin operand cycle table
+  std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
+  OperandCycleTable += "  0, // No itinerary\n";
+        
+  unsigned StageCount = 1, OperandCycleCount = 1;
+  unsigned ItinStageEnum = 1, ItinOperandCycleEnum = 1;
+  std::map<std::string, unsigned> ItinStageMap, ItinOperandCycleMap;
   for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
     // Next record
     Record *Proc = ProcItinList[i];
@@ -280,29 +310,53 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
       Record *ItinData = ItinDataList[j];
       
       // Get string and stage count
-      std::string ItinString;
+      std::string ItinStageString;
       unsigned NStages;
-      FormItineraryString(ItinData, ItinString, NStages);
-
-      // Check to see if it already exists
-      unsigned Find = ItinMap[ItinString];
+      FormItineraryStageString(ItinData, ItinStageString, NStages);
+
+      // Get string and operand cycle count
+      std::string ItinOperandCycleString;
+      unsigned NOperandCycles;
+      FormItineraryOperandCycleString(ItinData, ItinOperandCycleString,
+                                      NOperandCycles);
+
+      // Check to see if stage already exists and create if it doesn't
+      unsigned FindStage = 0;
+      if (NStages > 0) {
+        FindStage = ItinStageMap[ItinStageString];
+        if (FindStage == 0) {
+          // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
+          StageTable += ItinStageString + ", // " + itostr(ItinStageEnum) + "\n";
+          // Record Itin class number.
+          ItinStageMap[ItinStageString] = FindStage = StageCount;
+          StageCount += NStages;
+          ItinStageEnum++;
+        }
+      }
       
-      // If new itinerary
-      if (Find == 0) {
-        // Emit as { cycles, u1 | u2 | ... | un }, // index
-        OS << ItinString << ", // " << ItinEnum << "\n";
-        // Record Itin class number.
-        ItinMap[ItinString] = Find = StageCount;
-        StageCount += NStages;
-        ItinEnum++;
+      // Check to see if operand cycle already exists and create if it doesn't
+      unsigned FindOperandCycle = 0;
+      if (NOperandCycles > 0) {
+        FindOperandCycle = ItinOperandCycleMap[ItinOperandCycleString];
+        if (FindOperandCycle == 0) {
+          // Emit as  cycle, // index
+          OperandCycleTable += ItinOperandCycleString + ", // " + 
+            itostr(ItinOperandCycleEnum) + "\n";
+          // Record Itin class number.
+          ItinOperandCycleMap[ItinOperandCycleString] = 
+            FindOperandCycle = OperandCycleCount;
+          OperandCycleCount += NOperandCycles;
+          ItinOperandCycleEnum++;
+        }
       }
       
       // Set up itinerary as location and location + stage count
-      InstrItinerary Intinerary = { Find, Find + NStages };
+      InstrItinerary Intinerary = { FindStage, FindStage + NStages,
+                                    FindOperandCycle, FindOperandCycle + NOperandCycles};
 
       // Locate where to inject into processor itinerary table
       const std::string &Name = ItinData->getValueAsDef("TheClass")->getName();
-      Find = ItinClassesMap[Name];
+      unsigned Find = ItinClassesMap[Name];
       
       // Inject - empty slots will be 0, 0
       ItinList[Find] = Intinerary;
@@ -313,13 +367,21 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
   }
   
   // Closing stage
-  OS << "  { 0, 0 } // End itinerary\n";
-  // End stages table
-  OS << "};\n";
+  StageTable += "  { 0, 0, 0 } // End itinerary\n";
+  StageTable += "};\n";
+
+  // Closing operand cycles
+  OperandCycleTable += "  0 // End itinerary\n";
+  OperandCycleTable += "};\n";
+
+  // Emit tables.
+  OS << StageTable;
+  OS << OperandCycleTable;
   
-  // Emit size of table
+  // Emit size of tables
   OS<<"\nenum {\n";
-  OS<<"  StagesSize = sizeof(Stages)/sizeof(llvm::InstrStage)\n";
+  OS<<"  StagesSize = sizeof(Stages)/sizeof(llvm::InstrStage),\n";
+  OS<<"  OperandCyclesSize = sizeof(OperandCycles)/sizeof(unsigned)\n";
   OS<<"};\n";
 }
 
@@ -351,23 +413,25 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
     
     // For each itinerary class
     std::vector<InstrItinerary> &ItinList = *ProcListIter++;
-    for (unsigned j = 0, M = ItinList.size(); j < M;) {
+    for (unsigned j = 0, M = ItinList.size(); j < M; ++j) {
       InstrItinerary &Intinerary = ItinList[j];
       
-      // Emit in the form of { first, last } // index
-      if (Intinerary.First == 0) {
-        OS << "  { 0, 0 }";
+      // Emit in the form of 
+      // { firstStage, lastStage, firstCycle, lastCycle } // index
+      if (Intinerary.FirstStage == 0) {
+        OS << "  { 0, 0, 0, 0 }";
       } else {
-        OS << "  { " << Intinerary.First << ", " << Intinerary.Last << " }";
+        OS << "  { " << Intinerary.FirstStage << ", " << 
+          Intinerary.LastStage << ", " << 
+          Intinerary.FirstOperandCycle << ", " << 
+          Intinerary.LastOperandCycle << " }";
       }
       
-      // If more in list add comma
-      if (++j < M) OS << ",";
-      
-      OS << " // " << (j - 1) << "\n";
+      OS << ", // " << j << "\n";
     }
     
     // End processor itinerary table
+    OS << "  { ~0U, ~0U, ~0U, ~0U } // end marker\n";
     OS << "};\n";
   }
 }
@@ -432,7 +496,7 @@ void SubtargetEmitter::EmitData(raw_ostream &OS) {
   
   if (HasItineraries) {
     // Emit the stage data
-    EmitStageData(OS, NItinClasses, ItinClassesMap, ProcList);
+    EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap, ProcList);
     // Emit the processor itinerary data
     EmitProcessorData(OS, ProcList);
     // Emit the processor lookup data
@@ -479,7 +543,7 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
     OS << "\n"
        << "  InstrItinerary *Itinerary = (InstrItinerary *)"
        <<              "Features.getInfo(ProcItinKV, ProcItinKVSize);\n"
-       << "  InstrItins = InstrItineraryData(Stages, Itinerary);\n";
+       << "  InstrItins = InstrItineraryData(Stages, OperandCycles, Itinerary);\n";
   }
 
   OS << "  return Features.getCPU();\n"
diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h
index f44278cc15fe..1d7088fd3902 100644
--- a/utils/TableGen/SubtargetEmitter.h
+++ b/utils/TableGen/SubtargetEmitter.h
@@ -34,9 +34,11 @@ class SubtargetEmitter : public TableGenBackend {
   void CPUKeyValues(raw_ostream &OS);
   unsigned CollectAllItinClasses(raw_ostream &OS,
                                std::map<std::string, unsigned> &ItinClassesMap);
-  void FormItineraryString(Record *ItinData, std::string &ItinString,
-                           unsigned &NStages);
-  void EmitStageData(raw_ostream &OS, unsigned NItinClasses,
+  void FormItineraryStageString(Record *ItinData, std::string &ItinString,
+                                unsigned &NStages);
+  void FormItineraryOperandCycleString(Record *ItinData, std::string &ItinString,
+                                       unsigned &NOperandCycles);
+  void EmitStageAndOperandCycleData(raw_ostream &OS, unsigned NItinClasses,
                      std::map<std::string, unsigned> &ItinClassesMap,
                      std::vector<std::vector<InstrItinerary> > &ProcList);
   void EmitProcessorData(raw_ostream &OS,
diff --git a/utils/TableGen/TGParser.cpp b/utils/TableGen/TGParser.cpp
index ba480e6e926b..712226500540 100644
--- a/utils/TableGen/TGParser.cpp
+++ b/utils/TableGen/TGParser.cpp
@@ -974,7 +974,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
 ///
 /// OperatorType ::= '<' Type '>'
 ///
-RecTy *TGParser::ParseOperatorType(void) {
+RecTy *TGParser::ParseOperatorType() {
   RecTy *Type = 0;
 
   if (Lex.getCode() != tgtok::less) {
diff --git a/utils/TableGen/TGValueTypes.cpp b/utils/TableGen/TGValueTypes.cpp
index e4edca6cdc07..122d085b0d78 100644
--- a/utils/TableGen/TGValueTypes.cpp
+++ b/utils/TableGen/TGValueTypes.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// The MVT type is used by tablegen as well as in LLVM. In order to handle
-// extended types, the MVT type uses support functions that call into
+// The EVT type is used by tablegen as well as in LLVM. In order to handle
+// extended types, the EVT type uses support functions that call into
 // LLVM's type system code. These aren't accessible in tablegen, so this
 // file provides simple replacements.
 //
@@ -43,15 +43,15 @@ public:
 };
 
 class ExtendedVectorType : public Type {
-  MVT ElementType;
+  EVT ElementType;
   unsigned NumElements;
 public:
-  ExtendedVectorType(MVT elty, unsigned num)
+  ExtendedVectorType(EVT elty, unsigned num)
     : ElementType(elty), NumElements(num) {}
   unsigned getSizeInBits() const {
     return getNumElements() * getElementType().getSizeInBits();
   }
-  MVT getElementType() const {
+  EVT getElementType() const {
     return ElementType;
   }
   unsigned getNumElements() const {
@@ -64,62 +64,43 @@ static std::map<unsigned, const Type *>
 static std::map<std::pair<uintptr_t, uintptr_t>, const Type *>
   ExtendedVectorTypeMap;
 
-MVT MVT::getExtendedIntegerVT(unsigned BitWidth) {
-  const Type *&ET = ExtendedIntegerTypeMap[BitWidth];
-  if (!ET) ET = new ExtendedIntegerType(BitWidth);
-  MVT VT;
-  VT.LLVMTy = ET;
-  assert(VT.isExtended() && "Type is not extended!");
-  return VT;
-}
-
-MVT MVT::getExtendedVectorVT(MVT VT, unsigned NumElements) {
-  const Type *&ET = ExtendedVectorTypeMap[std::make_pair(VT.getRawBits(),
-                                                         NumElements)];
-  if (!ET) ET = new ExtendedVectorType(VT, NumElements);
-  MVT ResultVT;
-  ResultVT.LLVMTy = ET;
-  assert(ResultVT.isExtended() && "Type is not extended!");
-  return ResultVT;
-}
-
-bool MVT::isExtendedFloatingPoint() const {
+bool EVT::isExtendedFloatingPoint() const {
   assert(isExtended() && "Type is not extended!");
   // Extended floating-point types are not supported yet.
   return false;
 }
 
-bool MVT::isExtendedInteger() const {
+bool EVT::isExtendedInteger() const {
   assert(isExtended() && "Type is not extended!");
   return dynamic_cast<const ExtendedIntegerType *>(LLVMTy) != 0;
 }
 
-bool MVT::isExtendedVector() const {
+bool EVT::isExtendedVector() const {
   assert(isExtended() && "Type is not extended!");
   return dynamic_cast<const ExtendedVectorType *>(LLVMTy) != 0;
 }
 
-bool MVT::isExtended64BitVector() const {
+bool EVT::isExtended64BitVector() const {
   assert(isExtended() && "Type is not extended!");
   return isExtendedVector() && getSizeInBits() == 64;
 }
 
-bool MVT::isExtended128BitVector() const {
+bool EVT::isExtended128BitVector() const {
   assert(isExtended() && "Type is not extended!");
   return isExtendedVector() && getSizeInBits() == 128;
 }
 
-MVT MVT::getExtendedVectorElementType() const {
+EVT EVT::getExtendedVectorElementType() const {
   assert(isExtendedVector() && "Type is not an extended vector!");
   return static_cast<const ExtendedVectorType *>(LLVMTy)->getElementType();
 }
 
-unsigned MVT::getExtendedVectorNumElements() const {
+unsigned EVT::getExtendedVectorNumElements() const {
   assert(isExtendedVector() && "Type is not an extended vector!");
   return static_cast<const ExtendedVectorType *>(LLVMTy)->getNumElements();
 }
 
-unsigned MVT::getExtendedSizeInBits() const {
+unsigned EVT::getExtendedSizeInBits() const {
   assert(isExtended() && "Type is not extended!");
   return LLVMTy->getSizeInBits();
 }
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 601581497a55..c6d75020c72f 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -17,24 +17,25 @@
 
 #include "Record.h"
 #include "TGParser.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/System/Signals.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "CallingConvEmitter.h"
 #include "CodeEmitterGen.h"
 #include "RegisterInfoEmitter.h"
 #include "InstrInfoEmitter.h"
 #include "InstrEnumEmitter.h"
 #include "AsmWriterEmitter.h"
+#include "AsmMatcherEmitter.h"
 #include "DAGISelEmitter.h"
 #include "FastISelEmitter.h"
 #include "SubtargetEmitter.h"
 #include "IntrinsicEmitter.h"
 #include "LLVMCConfigurationEmitter.h"
 #include "ClangDiagnosticsEmitter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
 #include <algorithm>
 #include <cstdio>
 using namespace llvm;
@@ -43,7 +44,7 @@ enum ActionType {
   PrintRecords,
   GenEmitter,
   GenRegisterEnums, GenRegister, GenRegisterHeader,
-  GenInstrEnums, GenInstrs, GenAsmWriter,
+  GenInstrEnums, GenInstrs, GenAsmWriter, GenAsmMatcher,
   GenCallingConv,
   GenClangDiagsDefs,
   GenClangDiagGroups,
@@ -77,6 +78,8 @@ namespace {
                                "Generate calling convention descriptions"),
                     clEnumValN(GenAsmWriter, "gen-asm-writer",
                                "Generate assembly writer"),
+                    clEnumValN(GenAsmMatcher, "gen-asm-matcher",
+                               "Generate assembly instruction matcher"),
                     clEnumValN(GenDAGISel, "gen-dag-isel",
                                "Generate a DAG instruction selector"),
                     clEnumValN(GenFastISel, "gen-fast-isel",
@@ -138,7 +141,7 @@ static bool ParseFile(const std::string &Filename,
   std::string ErrorStr;
   MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
   if (F == 0) {
-    errs() << "Could not open input file '" + Filename + "': " 
+    errs() << "Could not open input file '" << Filename << "': " 
            << ErrorStr <<"\n";
     return true;
   }
@@ -168,7 +171,7 @@ int main(int argc, char **argv) {
   raw_ostream *Out = &outs();
   if (OutputFilename != "-") {
     std::string Error;
-    Out = new raw_fd_ostream(OutputFilename.c_str(), false, Error);
+    Out = new raw_fd_ostream(OutputFilename.c_str(), Error);
 
     if (!Error.empty()) {
       errs() << argv[0] << ": error opening " << OutputFilename 
@@ -210,6 +213,9 @@ int main(int argc, char **argv) {
     case GenAsmWriter:
       AsmWriterEmitter(Records).run(*Out);
       break;
+    case GenAsmMatcher:
+      AsmMatcherEmitter(Records).run(*Out);
+      break;
     case GenClangDiagsDefs:
       ClangDiagsDefsEmitter(Records, ClangComponent).run(*Out);
       break;
diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl
new file mode 100755
index 000000000000..3aa2f8891e2e
--- /dev/null
+++ b/utils/UpdateCMakeLists.pl
@@ -0,0 +1,118 @@
+#!/usr/bin/env perl
+use strict;
+use File::Find;
+use File::Copy;
+use Digest::MD5;
+
+my @fileTypes = ("cpp", "c");
+my %dirFiles;
+my %dirCMake;
+
+sub GetFiles {
+  my $dir = shift;
+  my $x = $dirFiles{$dir};  
+  if (!defined $x) {
+    $x = [];
+    $dirFiles{$dir} = $x;
+  }  
+  return $x;
+}
+
+sub ProcessFile {
+  my $file = $_;
+  my $dir = $File::Find::dir;
+  # Record if a CMake file was found.
+  if ($file eq "CMakeLists.txt") {
+    $dirCMake{$dir} = $File::Find::name;
+    return 0;
+  }
+  # Grab the extension of the file.
+  $file =~ /\.([^.]+)$/;
+  my $ext = $1;
+  my $files;
+  foreach my $x (@fileTypes) {
+    if ($ext eq $x) {
+      if (!defined $files) {
+        $files = GetFiles($dir);
+      }
+      push @$files, $file;
+      return 0;
+    }
+  }
+  return 0;
+}
+
+sub EmitCMakeList {
+  my $dir = shift;
+  my $files = $dirFiles{$dir};
+  
+  if (!defined $files) {
+    return;
+  }
+  
+  foreach my $file (sort @$files) {
+    print OUT "  ";
+    print OUT $file;
+    print OUT "\n";
+  }  
+}
+
+sub UpdateCMake {
+  my $cmakeList = shift;
+  my $dir = shift;
+  my $cmakeListNew = $cmakeList . ".new";
+  open(IN, $cmakeList);
+  open(OUT, ">", $cmakeListNew);
+  my $foundLibrary = 0;
+  
+  while(<IN>) {
+    if (!$foundLibrary) {
+      print OUT $_;
+      if (/^add_clang_library\(/ || /^add_llvm_library\(/) {
+        $foundLibrary = 1;
+        EmitCMakeList($dir);
+      }
+    }
+    else {
+      if (/\)/) {
+        print OUT $_;
+        $foundLibrary = 0;
+      }
+    }
+  }
+
+  close(IN);
+  close(OUT);
+
+  open(FILE, $cmakeList) or
+    die("Cannot open $cmakeList when computing digest\n");
+  binmode FILE;
+  my $digestA = Digest::MD5->new->addfile(*FILE)->hexdigest;
+  close(FILE);
+    
+  open(FILE, $cmakeListNew) or
+    die("Cannot open $cmakeListNew when computing digest\n");
+  binmode FILE;
+  my $digestB = Digest::MD5->new->addfile(*FILE)->hexdigest;
+  close(FILE);
+  
+  if ($digestA ne $digestB) {
+    move($cmakeListNew, $cmakeList);
+    return 1;    
+  }
+  
+  unlink($cmakeListNew);
+  return 0;
+}
+
+sub UpdateCMakeFiles {
+  foreach my $dir (sort keys %dirCMake) {
+    if (UpdateCMake($dirCMake{$dir}, $dir)) {
+      print "Updated: $dir\n";
+    }
+  }
+}
+
+find({ wanted => \&ProcessFile, follow => 1 }, '.');
+UpdateCMakeFiles();
+
diff --git a/utils/bugpoint/RemoteRunSafely.sh b/utils/bugpoint/RemoteRunSafely.sh
new file mode 100644
index 000000000000..f390e339ea9a
--- /dev/null
+++ b/utils/bugpoint/RemoteRunSafely.sh
@@ -0,0 +1,105 @@
+#!/bin/sh
+#
+# Program:  RemoteRunSafely.sh
+#
+# Synopsis: This script simply runs another program remotely using ssh.
+#           It always returns the another program exit code or exit with
+#           code 255 which indicates that the program could not be executed.
+#
+# Syntax: 
+#
+#   RemoteRunSafely.sh <hostname> [-l <login_name>] [-p <port>]
+#                <program> <args...>
+#
+#   where:
+#     <hostname>    is the remote host to execute the program,
+#     <login_name>  is the username on the remote host,
+#     <port>        is the port used by the remote client,
+#     <program>     is the path to the program to run,
+#     <args...>     are the arguments to pass to the program.
+#
+
+printUsageAndExit()
+{
+  echo "Usage:"
+  echo "./RemoteRunSafely.sh <hostname> [-l <login_name>] [-p <port>] " \
+       "<program> <args...>"
+  exit 255
+}
+
+moreArgsExpected()
+{
+  # $1 - remaining number of arguments
+  # $2 - number of arguments to shift
+  if [ $1 -lt $2 ]
+  then
+    echo "Error: Wrong number of argumants."
+    printUsageAndExit
+  fi
+}
+
+# Save a copy of the original arguments in a string before we
+# clobber them with the shift command.
+ORIG_ARGS="$*"
+#DEBUG: echo 'GOT: '$ORIG_ARGS
+
+moreArgsExpected $# 1
+RHOST=$1
+shift 1
+
+RUSER=`id -un`
+RCLIENT=ssh
+RPORT=
+WORKING_DIR=
+
+moreArgsExpected $# 1
+if [ $1 = "-l" ]; then
+  moreArgsExpected $# 2
+  RUSER=$2
+  shift 2
+fi
+moreArgsExpected $# 1
+if [ $1 = "-p" ]; then
+  moreArgsExpected $# 2
+  RPORT="-p $2"
+  shift 2
+fi
+
+moreArgsExpected $# 1
+PROGRAM=$(basename $1)
+WORKING_DIR=$(dirname $1)
+shift 1
+
+#DEBUG: echo 'DIR='${0%%`basename $0`}
+#DEBUG: echo 'RHOST='$RHOST
+#DEBUG: echo 'RUSER='$RUSER
+#DEBUG: echo 'PROGRAM='$PROGRAM
+#DEBUG: echo 'WORKING_DIR='$WORKING_DIR
+#DEBUG: echo 'ARGS='$*
+
+# Sanity check
+if [ "$RHOST" = "" -o "$PROGRAM" = "" ]; then
+  printUsageAndExit
+fi
+
+# Local program file must exist and be execuatble
+local_program=$WORKING_DIR"/"$PROGRAM
+if [ ! -x "$local_program" ]; then
+  echo "File "$local_program" does not exist or is not an executable.."
+  exit 255
+fi
+
+connection=$RUSER'@'$RHOST
+remote="./"$PROGRAM
+(
+  cat $local_program |        \
+  $RCLIENT $connection $RPORT \
+   'rm -f '$remote' ; '       \
+   'cat > '$remote' ; chmod +x '$remote' ; '$remote' '$*' ; ' \
+   'err=$? ; rm -f '$remote' ; exit $err'
+)
+err=$?
+
+#DEBUG: echo script exit $err
+exit $err
+
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 6e5131254cb0..e3b334a045d1 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -85,6 +85,7 @@ installsrc:
 	                        -type f -a -name .DS_Store -o \
 				-name \*~ -o -name .\#\* \) \
 	  -exec rm -rf {} \;
+	rm -rf "$(SRCROOT)/test"
 
 #######################################################################
 
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index eac68b4f572f..91fbe15be473 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -56,13 +56,19 @@ DIR=`pwd`
 DARWIN_VERS=`uname -r | sed 's/\..*//'`
 echo DARWIN_VERS = $DARWIN_VERS
 
-# If the user has CC set in their environment unset it now
-unset CC
-
-DT_HOME=$DEST_DIR/Developer/usr
-DEST_ROOT="/Developer$DEST_ROOT"
+if [ "x$RC_ProjectName" = "xllvmCore_Embedded" ]; then
+    DT_HOME=$DEST_DIR/Developer/Platforms/iPhoneOS.platform/Developer/usr
+    DEST_ROOT="/Developer/Platforms/iPhoneOS.platform/Developer$DEST_ROOT"
+else
+    DT_HOME=$DEST_DIR/Developer/usr
+    DEST_ROOT="/Developer$DEST_ROOT"
+fi
 if [ "x$DEVELOPER_BIN" != "x" ]; then
-    DT_HOME=$DEST_DIR/$DEVELOPER_DIR/usr
+    if [ "x$RC_ProjectName" = "xllvmCore_Embedded" ]; then
+        DT_HOME=$DEST_DIR/Developer/Platforms/iPhoneOS.platform/$DEVELOPER_DIR/usr
+    else
+        DT_HOME=$DEST_DIR/$DEVELOPER_DIR/usr
+    fi
     DEST_ROOT="/$DEVELOPER_DIR$DEST_ROOT"
 fi
 
@@ -81,11 +87,23 @@ ln -s $ORIG_SRC_DIR/* $SRC_DIR/ || exit 1
 mkdir -p $DIR/obj-llvm || exit 1
 cd $DIR/obj-llvm || exit 1
 
+# If the user has set CC or CXX, respect their wishes.  If not,
+# compile with LLVM-GCC/LLVM-G++ if available; if LLVM is not
+# available, fall back to usual GCC/G++ default.
+savedPATH=$PATH ; PATH="$PATH:/Developer/usr/bin"
+XTMPCC=$(which llvm-gcc)
+if [ x$CC  = x -a x$XTMPCC != x ] ; then export CC=$XTMPCC  ; fi
+XTMPCC=$(which llvm-g++)
+if [ x$CXX = x -a x$XTMPCC != x ] ; then export CXX=$XTMPCC ; fi
+PATH=$savedPATH
+unset XTMPCC savedPATH
+
 if [ \! -f Makefile.config ]; then
     $SRC_DIR/configure --prefix=$DT_HOME/local \
         --enable-targets=arm,x86,powerpc,cbe \
         --enable-assertions=$LLVM_ASSERTIONS \
         --enable-optimized=$LLVM_OPTIMIZED \
+        --disable-bindings \
         || exit 1
 fi
 
@@ -136,12 +154,13 @@ if [ "x$MAJ_VER" != "x4" -o "x$MIN_VER" != "x0" ]; then
 fi
 
 make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$TARGETS" \
+    NO_RUNTIME_LIBS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
     LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
     CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'" \
     VERBOSE=1
 
-if ! test $? == 0 ; then
+if [ $? != 0 ] ; then
     echo "error: LLVM 'make' failed!"
     exit 1
 fi 
@@ -159,6 +178,7 @@ cd $DIR/obj-llvm || exit 1
 
 # Install the tree into the destination directory.
 make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$TARGETS" \
+    NO_RUNTIME_LIBS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
     LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
     OPTIMIZE_OPTION='-O3' VERBOSE=1 install
diff --git a/utils/count/CMakeLists.txt b/utils/count/CMakeLists.txt
new file mode 100644
index 000000000000..e124f61d2409
--- /dev/null
+++ b/utils/count/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(count
+  count.c
+  )
diff --git a/utils/count/Makefile b/utils/count/Makefile
new file mode 100644
index 000000000000..8de076a8803b
--- /dev/null
+++ b/utils/count/Makefile
@@ -0,0 +1,20 @@
+##===- utils/count/Makefile --------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = count
+USEDLIBS = 
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/count/count.c b/utils/count/count.c
new file mode 100644
index 000000000000..a37e1e0b2548
--- /dev/null
+++ b/utils/count/count.c
@@ -0,0 +1,48 @@
+/*===- count.c - The 'count' testing tool ---------------------------------===*\
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  unsigned Count, NumLines, NumRead;
+  char Buffer[4096], *End;
+
+  if (argc != 2) {
+    fprintf(stderr, "usage: %s <expected line count>\n", argv[0]);
+    return 2;
+  }
+
+  Count = strtol(argv[1], &End, 10);
+  if (*End != '\0' && End != argv[1]) {
+    fprintf(stderr, "%s: invalid count argument '%s'\n", argv[0], argv[1]);
+    return 2;
+  }
+
+  NumLines = 0;
+  while ((NumRead = fread(Buffer, 1, sizeof(Buffer), stdin))) {
+    unsigned i;
+
+    for (i = 0; i != NumRead; ++i)
+      if (Buffer[i] == '\n')
+        ++NumLines;
+  }
+    
+  if (!feof(stdin)) {
+    fprintf(stderr, "%s: error reading stdin\n", argv[0]);
+    return 3;
+  }
+
+  if (Count != NumLines) {
+    fprintf(stderr, "Expected %d lines, got %d.\n", Count, NumLines);
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/utils/crosstool/ARM/build-install-linux.sh b/utils/crosstool/ARM/build-install-linux.sh
index 33833b5612ab..f3f8df96630c 100755
--- a/utils/crosstool/ARM/build-install-linux.sh
+++ b/utils/crosstool/ARM/build-install-linux.sh
@@ -9,7 +9,7 @@ set -o errexit
 echo -n "Welcome to LLVM Linux/X86_64 -> Linux/ARM crosstool "
 echo "builder/installer; some steps will require sudo privileges."
 
-readonly INSTALL_ROOT="${INSTALL_ROOT:-/usr/local}"
+readonly INSTALL_ROOT="${INSTALL_ROOT:-/usr/local/crosstool}"
 # Both $USER and root *must* have read/write access to this dir.
 readonly SCRATCH_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/llvm-project.XXXXXX")
 readonly SRC_ROOT="${SCRATCH_ROOT}/src"
@@ -17,6 +17,7 @@ readonly OBJ_ROOT="${SCRATCH_ROOT}/obj"
 
 readonly CROSS_HOST="x86_64-unknown-linux-gnu"
 readonly CROSS_TARGET="arm-none-linux-gnueabi"
+readonly CROSS_MARCH="${CROSS_MARCH:-armv6}"
 
 readonly CODE_SOURCERY="${INSTALL_ROOT}/codesourcery"
 readonly CODE_SOURCERY_PKG_PATH="${CODE_SOURCERY_PKG_PATH:-${HOME}/codesourcery}"
@@ -32,33 +33,23 @@ readonly CROSS_TARGET_LD="${CODE_SOURCERY_BIN}/${CROSS_TARGET}-ld"
 
 readonly SYSROOT="${CODE_SOURCERY_ROOT}/${CROSS_TARGET}/libc"
 
-readonly LLVM_PROJECT="${INSTALL_ROOT}/llvm-project"
-readonly LLVM_INSTALL_ROOT="${LLVM_PROJECT}/${CROSS_HOST}/${CROSS_TARGET}"
 readonly LLVM_PKG_PATH="${LLVM_PKG_PATH:-${HOME}/llvm-project/snapshots}"
 
-# Latest SVN revision known to be working in this configuration.
-readonly LLVM_DEFAULT_REV="70786"
+# Latest SVN revisions known to be working in this configuration.
+readonly LLVM_DEFAULT_REV="74530"
+readonly LLVMGCC_DEFAULT_REV="74535"
 
 readonly LLVM_PKG="llvm-${LLVM_SVN_REV:-${LLVM_DEFAULT_REV}}.tar.bz2"
 readonly LLVM_SRC_DIR="${SRC_ROOT}/llvm"
 readonly LLVM_OBJ_DIR="${OBJ_ROOT}/llvm"
-readonly LLVM_INSTALL_DIR="${LLVM_INSTALL_ROOT}/llvm"
+readonly LLVM_INSTALL_DIR="${INSTALL_ROOT}/${CROSS_TARGET}/llvm"
 
-readonly LLVMGCC_PKG="llvm-gcc-4.2-${LLVMGCC_SVN_REV:-${LLVM_DEFAULT_REV}}.tar.bz2"
+readonly LLVMGCC_PKG="llvm-gcc-4.2-${LLVMGCC_SVN_REV:-${LLVMGCC_DEFAULT_REV}}.tar.bz2"
 readonly LLVMGCC_SRC_DIR="${SRC_ROOT}/llvm-gcc-4.2"
 readonly LLVMGCC_OBJ_DIR="${OBJ_ROOT}/llvm-gcc-4.2"
-readonly LLVMGCC_INSTALL_DIR="${LLVM_INSTALL_ROOT}/llvm-gcc-4.2"
+readonly LLVMGCC_INSTALL_DIR="${INSTALL_ROOT}/${CROSS_TARGET}/llvm-gcc-4.2"
 
-readonly MAKE_OPTS="-j2"
-
-# Verify we aren't going to install into an existing directory as this might
-# create problems as we won't have a clean install.
-verifyNotDir() {
-  if [[ -d $1 ]]; then
-    echo "Install dir $1 already exists; remove it to continue."
-    exit
-  fi
-}
+readonly MAKE_OPTS="${MAKE_OPTS:--j2}"
 
 # Params:
 #   $1: directory to be created
@@ -104,12 +95,9 @@ runAndLog() {
 }
 
 installCodeSourcery() {
-  # Create CodeSourcery dir, if necessary.
-  verifyNotDir ${CODE_SOURCERY}
-  sudoCreateDir ${CODE_SOURCERY}
-
-  # Unpack the tarball.
+  # Unpack the tarball, creating the CodeSourcery dir, if necessary.
   if [[ ! -d ${CODE_SOURCERY_ROOT} ]]; then
+    sudoCreateDir ${CODE_SOURCERY}
     cd ${CODE_SOURCERY}
     if [[ -e ${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG} ]]; then
       runCommand "Unpacking CodeSourcery in ${CODE_SOURCERY}" \
@@ -122,7 +110,7 @@ installCodeSourcery() {
       exit
     fi
   else
-    echo "CodeSourcery install dir already exists."
+    echo "CodeSourcery install dir already exists; skipping."
   fi
 
   # Verify our CodeSourcery toolchain installation.
@@ -141,7 +129,11 @@ installCodeSourcery() {
 }
 
 installLLVM() {
-  verifyNotDir ${LLVM_INSTALL_DIR}
+  if [[ -d ${LLVM_INSTALL_DIR} ]]; then
+    echo "LLVM install dir ${LLVM_INSTALL_DIR} exists; skipping."
+    return
+  fi
+
   sudoCreateDir ${LLVM_INSTALL_DIR}
 
   # Unpack LLVM tarball; should create the directory "llvm".
@@ -165,7 +157,11 @@ installLLVM() {
 }
 
 installLLVMGCC() {
-  verifyNotDir ${LLVMGCC_INSTALL_DIR}
+  if [[ -d ${LLVMGCC_INSTALL_DIR} ]]; then
+    echo "LLVM-GCC install dir ${LLVMGCC_INSTALL_DIR} exists; skipping."
+    return
+  fi
+
   sudoCreateDir ${LLVMGCC_INSTALL_DIR}
 
   # Unpack LLVM-GCC tarball; should create the directory "llvm-gcc-4.2".
@@ -182,8 +178,9 @@ installLLVMGCC() {
       --prefix=${LLVMGCC_INSTALL_DIR} \
       --program-prefix=llvm- \
       --target=${CROSS_TARGET} \
-      --with-gnu-as=${CROSS_TARGET_AS} \
-      --with-gnu-ld=${CROSS_TARGET_LD} \
+      --with-arch=${CROSS_MARCH} \
+      --with-as=${CROSS_TARGET_AS} \
+      --with-ld=${CROSS_TARGET_LD} \
       --with-sysroot=${SYSROOT}
   runAndLog "Building LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-build.log \
       make
diff --git a/utils/crosstool/create-snapshots.sh b/utils/crosstool/create-snapshots.sh
index 7c640bc00e7f..d70232a3ce46 100755
--- a/utils/crosstool/create-snapshots.sh
+++ b/utils/crosstool/create-snapshots.sh
@@ -11,31 +11,31 @@
 set -o nounset
 set -o errexit
 
-readonly REV="${1:-HEAD}"
+readonly LLVM_PROJECT_SVN="http://llvm.org/svn/llvm-project"
 
-runOnModule() {
+getLatestRevisionFromSVN() {
+  svn info ${LLVM_PROJECT_SVN} | egrep ^Revision | sed 's/^Revision: //'
+}
+
+readonly REV="${1:-$(getLatestRevisionFromSVN)}"
+
+createTarballFromSVN() {
   local module=$1
   local log="${module}.log"
-  echo "Running: svn co -r ${REV} ${module}; log in ${log}"
-  svn co -r ${REV} http://llvm.org/svn/llvm-project/${module}/trunk ${module} \
-      > ${log} 2>&1
-
-  # Delete all the ".svn" dirs; they take quite a lot of space.
-  echo "Cleaning up .svn dirs"
-  find ${module} -type d -name \.svn -print0 | xargs -0 /bin/rm -rf
+  echo "Running: svn export -r ${REV} ${module}; log in ${log}"
+  svn -q export -r ${REV} ${LLVM_PROJECT_SVN}/${module}/trunk \
+      ${module} > ${log} 2>&1
 
   # Create "module-revision.tar.bz2" packages from the SVN checkout dirs.
-  local revision=$(grep "Checked out revision" ${log} | \
-                   sed 's/[^0-9]\+\([0-9]\+\)[^0-9]\+/\1/')
-  local tarball="${module}-${revision}.tar.bz2"
+  local tarball="${module}-${REV}.tar.bz2"
   echo "Creating tarball: ${tarball}"
   tar cjf ${tarball} ${module}
 
-  echo "Cleaning SVN checkout dir ${module}"
+  echo "Cleaning up '${module}'"
   rm -rf ${module} ${log}
 }
 
 for module in "llvm" "llvm-gcc-4.2"; do
-  runOnModule ${module}
+  createTarballFromSVN ${module}
 done
 
diff --git a/utils/emacs/emacs.el b/utils/emacs/emacs.el
index a12848b45754..969f538c8174 100644
--- a/utils/emacs/emacs.el
+++ b/utils/emacs/emacs.el
@@ -1,6 +1,6 @@
 ;; LLVM coding style guidelines in emacs
 ;; Maintainer: LLVM Team, http://llvm.org/
-;; Modified:   2005-04-24
+;; Modified:   2009-07-28
 
 ;; Max 80 cols per line, indent by two spaces, no tabs.
 ;; Apparently, this does not affect tabs in Makefiles.
@@ -17,7 +17,9 @@
              '((fill-column . 80)
 	       (c++-indent-level . 2)
 	       (c-basic-offset . 2)
-	       (indent-tabs-mode . nil)))
+	       (indent-tabs-mode . nil)
+               (c-offsets-alist . ((innamespace 0)))))
+
 (add-hook 'c-mode-hook
 	  (function
 	   (lambda nil 
@@ -26,6 +28,7 @@
 		   (c-set-style "llvm.org")
 		   )
 	       ))))
+
 (add-hook 'c++-mode-hook
 	  (function
 	   (lambda nil 
diff --git a/utils/lit/LitConfig.py b/utils/lit/LitConfig.py
new file mode 100644
index 000000000000..4fb0ccc09353
--- /dev/null
+++ b/utils/lit/LitConfig.py
@@ -0,0 +1,71 @@
+class LitConfig:
+    """LitConfig - Configuration data for a 'lit' test runner instance, shared
+    across all tests.
+
+    The LitConfig object is also used to communicate with client configuration
+    files, it is always passed in as the global variable 'lit' so that
+    configuration files can access common functionality and internal components
+    easily.
+    """
+
+    # Provide access to built-in formats.
+    import LitFormats as formats
+
+    # Provide access to built-in utility functions.
+    import Util as util
+
+    def __init__(self, progname, path, quiet,
+                 useValgrind, valgrindArgs,
+                 useTclAsSh,
+                 noExecute, debug, isWindows):
+        # The name of the test runner.
+        self.progname = progname
+        # The items to add to the PATH environment variable.
+        self.path = list(map(str, path))
+        self.quiet = bool(quiet)
+        self.useValgrind = bool(useValgrind)
+        self.valgrindArgs = list(valgrindArgs)
+        self.useTclAsSh = bool(useTclAsSh)
+        self.noExecute = noExecute
+        self.debug = debug
+        self.isWindows = bool(isWindows)
+
+        self.numErrors = 0
+        self.numWarnings = 0
+
+    def load_config(self, config, path):
+        """load_config(config, path) - Load a config object from an alternate
+        path."""
+        from TestingConfig import TestingConfig
+        return TestingConfig.frompath(path, config.parent, self,
+                                      mustExist = True,
+                                      config = config)
+
+    def _write_message(self, kind, message):
+        import inspect, os, sys
+
+        # Get the file/line where this message was generated.
+        f = inspect.currentframe()
+        # Step out of _write_message, and then out of wrapper.
+        f = f.f_back.f_back
+        file,line,_,_,_ = inspect.getframeinfo(f)
+        location = '%s:%d' % (os.path.basename(file), line)
+
+        print >>sys.stderr, '%s: %s: %s: %s' % (self.progname, location,
+                                                kind, message)
+
+    def note(self, message):
+        self._write_message('note', message)
+
+    def warning(self, message):
+        self._write_message('warning', message)
+        self.numWarnings += 1
+
+    def error(self, message):
+        self._write_message('error', message)
+        self.numErrors += 1
+
+    def fatal(self, message):
+        import sys
+        self._write_message('fatal', message)
+        sys.exit(2)
diff --git a/utils/lit/LitFormats.py b/utils/lit/LitFormats.py
new file mode 100644
index 000000000000..9b8250d0aaf9
--- /dev/null
+++ b/utils/lit/LitFormats.py
@@ -0,0 +1,2 @@
+from TestFormats import GoogleTest, ShTest, TclTest, SyntaxCheckTest
+
diff --git a/utils/lit/ProgressBar.py b/utils/lit/ProgressBar.py
new file mode 100644
index 000000000000..85c95f57f7ac
--- /dev/null
+++ b/utils/lit/ProgressBar.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python
+
+# Source: http://code.activestate.com/recipes/475116/, with
+# modifications by Daniel Dunbar.
+
+import sys, re, time
+
+class TerminalController:
+    """
+    A class that can be used to portably generate formatted output to
+    a terminal.  
+    
+    `TerminalController` defines a set of instance variables whose
+    values are initialized to the control sequence necessary to
+    perform a given action.  These can be simply included in normal
+    output to the terminal:
+
+        >>> term = TerminalController()
+        >>> print 'This is '+term.GREEN+'green'+term.NORMAL
+
+    Alternatively, the `render()` method can used, which replaces
+    '${action}' with the string required to perform 'action':
+
+        >>> term = TerminalController()
+        >>> print term.render('This is ${GREEN}green${NORMAL}')
+
+    If the terminal doesn't support a given action, then the value of
+    the corresponding instance variable will be set to ''.  As a
+    result, the above code will still work on terminals that do not
+    support color, except that their output will not be colored.
+    Also, this means that you can test whether the terminal supports a
+    given action by simply testing the truth value of the
+    corresponding instance variable:
+
+        >>> term = TerminalController()
+        >>> if term.CLEAR_SCREEN:
+        ...     print 'This terminal supports clearning the screen.'
+
+    Finally, if the width and height of the terminal are known, then
+    they will be stored in the `COLS` and `LINES` attributes.
+    """
+    # Cursor movement:
+    BOL = ''             #: Move the cursor to the beginning of the line
+    UP = ''              #: Move the cursor up one line
+    DOWN = ''            #: Move the cursor down one line
+    LEFT = ''            #: Move the cursor left one char
+    RIGHT = ''           #: Move the cursor right one char
+
+    # Deletion:
+    CLEAR_SCREEN = ''    #: Clear the screen and move to home position
+    CLEAR_EOL = ''       #: Clear to the end of the line.
+    CLEAR_BOL = ''       #: Clear to the beginning of the line.
+    CLEAR_EOS = ''       #: Clear to the end of the screen
+
+    # Output modes:
+    BOLD = ''            #: Turn on bold mode
+    BLINK = ''           #: Turn on blink mode
+    DIM = ''             #: Turn on half-bright mode
+    REVERSE = ''         #: Turn on reverse-video mode
+    NORMAL = ''          #: Turn off all modes
+
+    # Cursor display:
+    HIDE_CURSOR = ''     #: Make the cursor invisible
+    SHOW_CURSOR = ''     #: Make the cursor visible
+
+    # Terminal size:
+    COLS = None          #: Width of the terminal (None for unknown)
+    LINES = None         #: Height of the terminal (None for unknown)
+
+    # Foreground colors:
+    BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
+    
+    # Background colors:
+    BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
+    BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
+    
+    _STRING_CAPABILITIES = """
+    BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
+    CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
+    BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0
+    HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
+    _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
+    _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
+
+    def __init__(self, term_stream=sys.stdout):
+        """
+        Create a `TerminalController` and initialize its attributes
+        with appropriate values for the current terminal.
+        `term_stream` is the stream that will be used for terminal
+        output; if this stream is not a tty, then the terminal is
+        assumed to be a dumb terminal (i.e., have no capabilities).
+        """
+        # Curses isn't available on all platforms
+        try: import curses
+        except: return
+
+        # If the stream isn't a tty, then assume it has no capabilities.
+        if not term_stream.isatty(): return
+
+        # Check the terminal type.  If we fail, then assume that the
+        # terminal has no capabilities.
+        try: curses.setupterm()
+        except: return
+
+        # Look up numeric capabilities.
+        self.COLS = curses.tigetnum('cols')
+        self.LINES = curses.tigetnum('lines')
+        
+        # Look up string capabilities.
+        for capability in self._STRING_CAPABILITIES:
+            (attrib, cap_name) = capability.split('=')
+            setattr(self, attrib, self._tigetstr(cap_name) or '')
+
+        # Colors
+        set_fg = self._tigetstr('setf')
+        if set_fg:
+            for i,color in zip(range(len(self._COLORS)), self._COLORS):
+                setattr(self, color, curses.tparm(set_fg, i) or '')
+        set_fg_ansi = self._tigetstr('setaf')
+        if set_fg_ansi:
+            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+                setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
+        set_bg = self._tigetstr('setb')
+        if set_bg:
+            for i,color in zip(range(len(self._COLORS)), self._COLORS):
+                setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '')
+        set_bg_ansi = self._tigetstr('setab')
+        if set_bg_ansi:
+            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+                setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
+
+    def _tigetstr(self, cap_name):
+        # String capabilities can include "delays" of the form "$<2>".
+        # For any modern terminal, we should be able to just ignore
+        # these, so strip them out.
+        import curses
+        cap = curses.tigetstr(cap_name) or ''
+        return re.sub(r'\$<\d+>[/*]?', '', cap)
+
+    def render(self, template):
+        """
+        Replace each $-substitutions in the given template string with
+        the corresponding terminal control string (if it's defined) or
+        '' (if it's not).
+        """
+        return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
+
+    def _render_sub(self, match):
+        s = match.group()
+        if s == '$$': return s
+        else: return getattr(self, s[2:-1])
+
+#######################################################################
+# Example use case: progress bar
+#######################################################################
+
+class SimpleProgressBar:
+    """
+    A simple progress bar which doesn't need any terminal support.
+
+    This prints out a progress bar like:
+      'Header: 0 .. 10.. 20.. ...'
+    """
+
+    def __init__(self, header):
+        self.header = header
+        self.atIndex = None
+
+    def update(self, percent, message):
+        if self.atIndex is None:
+            sys.stdout.write(self.header)
+            self.atIndex = 0
+
+        next = int(percent*50)
+        if next == self.atIndex:
+            return
+
+        for i in range(self.atIndex, next):
+            idx = i % 5
+            if idx == 0:
+                sys.stdout.write('%-2d' % (i*2))
+            elif idx == 1:
+                pass # Skip second char
+            elif idx < 4:
+                sys.stdout.write('.')
+            else:
+                sys.stdout.write(' ')
+        sys.stdout.flush()
+        self.atIndex = next
+
+    def clear(self):
+        if self.atIndex is not None:
+            sys.stdout.write('\n')
+            sys.stdout.flush()
+            self.atIndex = None
+
+class ProgressBar:
+    """
+    A 3-line progress bar, which looks like::
+    
+                                Header
+        20% [===========----------------------------------]
+                           progress message
+
+    The progress bar is colored, if the terminal supports color
+    output; and adjusts to the width of the terminal.
+    """
+    BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n'
+    HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
+        
+    def __init__(self, term, header, useETA=True):
+        self.term = term
+        if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
+            raise ValueError("Terminal isn't capable enough -- you "
+                             "should use a simpler progress dispaly.")
+        self.width = self.term.COLS or 75
+        self.bar = term.render(self.BAR)
+        self.header = self.term.render(self.HEADER % header.center(self.width))
+        self.cleared = 1 #: true if we haven't drawn the bar yet.
+        self.useETA = useETA
+        if self.useETA:
+            self.startTime = time.time()
+        self.update(0, '')
+
+    def update(self, percent, message):
+        if self.cleared:
+            sys.stdout.write(self.header)
+            self.cleared = 0
+        prefix = '%3d%% ' % (percent*100,)
+        suffix = ''
+        if self.useETA:
+            elapsed = time.time() - self.startTime
+            if percent > .0001 and elapsed > 1:
+                total = elapsed / percent
+                eta = int(total - elapsed)
+                h = eta//3600.
+                m = (eta//60) % 60
+                s = eta % 60
+                suffix = ' ETA: %02d:%02d:%02d'%(h,m,s)
+        barWidth = self.width - len(prefix) - len(suffix) - 2
+        n = int(barWidth*percent)
+        if len(message) < self.width:
+            message = message + ' '*(self.width - len(message))
+        else:
+            message = '... ' + message[-(self.width-4):]
+        sys.stdout.write(
+            self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
+            (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) +
+            self.term.CLEAR_EOL + message)
+
+    def clear(self):
+        if not self.cleared:
+            sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
+                             self.term.UP + self.term.CLEAR_EOL +
+                             self.term.UP + self.term.CLEAR_EOL)
+            self.cleared = 1
+
+def test():
+    import time
+    tc = TerminalController()
+    p = ProgressBar(tc, 'Tests')
+    for i in range(101):
+        p.update(i/100., str(i))        
+        time.sleep(.3)
+
+if __name__=='__main__':
+    test()
diff --git a/utils/lit/ShCommands.py b/utils/lit/ShCommands.py
new file mode 100644
index 000000000000..4550437ce227
--- /dev/null
+++ b/utils/lit/ShCommands.py
@@ -0,0 +1,85 @@
+class Command:
+    def __init__(self, args, redirects):
+        self.args = list(args)
+        self.redirects = list(redirects)
+
+    def __repr__(self):
+        return 'Command(%r, %r)' % (self.args, self.redirects)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Command):
+            return -1
+
+        return cmp((self.args, self.redirects),
+                   (other.args, other.redirects))
+
+    def toShell(self, file):
+        for arg in self.args:
+            if "'" not in arg:
+                quoted = "'%s'" % arg
+            elif '"' not in arg and '$' not in arg:
+                quoted = '"%s"' % arg
+            else:
+                raise NotImplementedError,'Unable to quote %r' % arg
+            print >>file, quoted,
+
+            # For debugging / validation.
+            import ShUtil
+            dequoted = list(ShUtil.ShLexer(quoted).lex())
+            if dequoted != [arg]:
+                raise NotImplementedError,'Unable to quote %r' % arg
+
+        for r in self.redirects:
+            if len(r[0]) == 1:
+                print >>file, "%s '%s'" % (r[0][0], r[1]),
+            else:
+                print >>file, "%s%s '%s'" % (r[0][1], r[0][0], r[1]),
+
+class Pipeline:
+    def __init__(self, commands, negate=False, pipe_err=False):
+        self.commands = commands
+        self.negate = negate
+        self.pipe_err = pipe_err
+
+    def __repr__(self):
+        return 'Pipeline(%r, %r, %r)' % (self.commands, self.negate,
+                                         self.pipe_err)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Pipeline):
+            return -1
+
+        return cmp((self.commands, self.negate, self.pipe_err),
+                   (other.commands, other.negate, self.pipe_err))
+
+    def toShell(self, file, pipefail=False):
+        if pipefail != self.pipe_err:
+            raise ValueError,'Inconsistent "pipefail" attribute!'
+        if self.negate:
+            print >>file, '!',
+        for cmd in self.commands:
+            cmd.toShell(file)
+            if cmd is not self.commands[-1]:
+                print >>file, '|\n ',
+
+class Seq:
+    def __init__(self, lhs, op, rhs):
+        assert op in (';', '&', '||', '&&')
+        self.op = op
+        self.lhs = lhs
+        self.rhs = rhs
+
+    def __repr__(self):
+        return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Seq):
+            return -1
+
+        return cmp((self.lhs, self.op, self.rhs),
+                   (other.lhs, other.op, other.rhs))
+
+    def toShell(self, file, pipefail=False):
+        self.lhs.toShell(file, pipefail)
+        print >>file, ' %s\n' % self.op
+        self.rhs.toShell(file, pipefail)
diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py
new file mode 100644
index 000000000000..c4bbb3d3731d
--- /dev/null
+++ b/utils/lit/ShUtil.py
@@ -0,0 +1,346 @@
+import itertools
+
+import Util
+from ShCommands import Command, Pipeline, Seq
+
+class ShLexer:
+    def __init__(self, data, win32Escapes = False):
+        self.data = data
+        self.pos = 0
+        self.end = len(data)
+        self.win32Escapes = win32Escapes
+
+    def eat(self):
+        c = self.data[self.pos]
+        self.pos += 1
+        return c
+
+    def look(self):
+        return self.data[self.pos]
+
+    def maybe_eat(self, c):
+        """
+        maybe_eat(c) - Consume the character c if it is the next character,
+        returning True if a character was consumed. """
+        if self.data[self.pos] == c:
+            self.pos += 1
+            return True
+        return False
+
+    def lex_arg_fast(self, c):
+        # Get the leading whitespace free section.
+        chunk = self.data[self.pos - 1:].split(None, 1)[0]
+        
+        # If it has special characters, the fast path failed.
+        if ('|' in chunk or '&' in chunk or 
+            '<' in chunk or '>' in chunk or
+            "'" in chunk or '"' in chunk or
+            '\\' in chunk):
+            return None
+        
+        self.pos = self.pos - 1 + len(chunk)
+        return chunk
+        
+    def lex_arg_slow(self, c):
+        if c in "'\"":
+            str = self.lex_arg_quoted(c)
+        else:
+            str = c
+        while self.pos != self.end:
+            c = self.look()
+            if c.isspace() or c in "|&":
+                break
+            elif c in '><':
+                # This is an annoying case; we treat '2>' as a single token so
+                # we don't have to track whitespace tokens.
+
+                # If the parse string isn't an integer, do the usual thing.
+                if not str.isdigit():
+                    break
+
+                # Otherwise, lex the operator and convert to a redirection
+                # token.
+                num = int(str)
+                tok = self.lex_one_token()
+                assert isinstance(tok, tuple) and len(tok) == 1
+                return (tok[0], num)                    
+            elif c == '"':
+                self.eat()
+                str += self.lex_arg_quoted('"')
+            elif not self.win32Escapes and c == '\\':
+                # Outside of a string, '\\' escapes everything.
+                self.eat()
+                if self.pos == self.end:
+                    Util.warning("escape at end of quoted argument in: %r" % 
+                                 self.data)
+                    return str
+                str += self.eat()
+            else:
+                str += self.eat()
+        return str
+
+    def lex_arg_quoted(self, delim):
+        str = ''
+        while self.pos != self.end:
+            c = self.eat()
+            if c == delim:
+                return str
+            elif c == '\\' and delim == '"':
+                # Inside a '"' quoted string, '\\' only escapes the quote
+                # character and backslash, otherwise it is preserved.
+                if self.pos == self.end:
+                    Util.warning("escape at end of quoted argument in: %r" % 
+                                 self.data)
+                    return str
+                c = self.eat()
+                if c == '"': # 
+                    str += '"'
+                elif c == '\\':
+                    str += '\\'
+                else:
+                    str += '\\' + c
+            else:
+                str += c
+        Util.warning("missing quote character in %r" % self.data)
+        return str
+    
+    def lex_arg_checked(self, c):
+        pos = self.pos
+        res = self.lex_arg_fast(c)
+        end = self.pos
+
+        self.pos = pos
+        reference = self.lex_arg_slow(c)
+        if res is not None:
+            if res != reference:
+                raise ValueError,"Fast path failure: %r != %r" % (res, reference)
+            if self.pos != end:
+                raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
+        return reference
+        
+    def lex_arg(self, c):
+        return self.lex_arg_fast(c) or self.lex_arg_slow(c)
+        
+    def lex_one_token(self):
+        """
+        lex_one_token - Lex a single 'sh' token. """
+
+        c = self.eat()
+        if c in ';!':
+            return (c,)
+        if c == '|':
+            if self.maybe_eat('|'):
+                return ('||',)
+            return (c,)
+        if c == '&':
+            if self.maybe_eat('&'):
+                return ('&&',)
+            if self.maybe_eat('>'): 
+                return ('&>',)
+            return (c,)
+        if c == '>':
+            if self.maybe_eat('&'):
+                return ('>&',)
+            if self.maybe_eat('>'):
+                return ('>>',)
+            return (c,)
+        if c == '<':
+            if self.maybe_eat('&'):
+                return ('<&',)
+            if self.maybe_eat('>'):
+                return ('<<',)
+            return (c,)
+
+        return self.lex_arg(c)
+
+    def lex(self):
+        while self.pos != self.end:
+            if self.look().isspace():
+                self.eat()
+            else:
+                yield self.lex_one_token()
+
+###
+ 
+class ShParser:
+    def __init__(self, data, win32Escapes = False):
+        self.data = data
+        self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
+    
+    def lex(self):
+        try:
+            return self.tokens.next()
+        except StopIteration:
+            return None
+    
+    def look(self):
+        next = self.lex()
+        if next is not None:
+            self.tokens = itertools.chain([next], self.tokens)
+        return next
+    
+    def parse_command(self):
+        tok = self.lex()
+        if not tok:
+            raise ValueError,"empty command!"
+        if isinstance(tok, tuple):
+            raise ValueError,"syntax error near unexpected token %r" % tok[0]
+        
+        args = [tok]
+        redirects = []
+        while 1:
+            tok = self.look()
+
+            # EOF?
+            if tok is None:
+                break
+
+            # If this is an argument, just add it to the current command.
+            if isinstance(tok, str):
+                args.append(self.lex())
+                continue
+
+            # Otherwise see if it is a terminator.
+            assert isinstance(tok, tuple)
+            if tok[0] in ('|',';','&','||','&&'):
+                break
+            
+            # Otherwise it must be a redirection.
+            op = self.lex()
+            arg = self.lex()
+            if not arg:
+                raise ValueError,"syntax error near token %r" % op[0]
+            redirects.append((op, arg))
+
+        return Command(args, redirects)
+
+    def parse_pipeline(self):
+        negate = False
+        if self.look() == ('!',):
+            self.lex()
+            negate = True
+
+        commands = [self.parse_command()]
+        while self.look() == ('|',):
+            self.lex()
+            commands.append(self.parse_command())
+        return Pipeline(commands, negate)
+            
+    def parse(self):
+        lhs = self.parse_pipeline()
+
+        while self.look():
+            operator = self.lex()
+            assert isinstance(operator, tuple) and len(operator) == 1
+
+            if not self.look():
+                raise ValueError, "missing argument to operator %r" % operator[0]
+            
+            # FIXME: Operator precedence!!
+            lhs = Seq(lhs, operator[0], self.parse_pipeline())
+
+        return lhs
+
+###
+
+import unittest
+
+class TestShLexer(unittest.TestCase):
+    def lex(self, str, *args, **kwargs):
+        return list(ShLexer(str, *args, **kwargs).lex())
+
+    def test_basic(self):
+        self.assertEqual(self.lex('a|b>c&d<e'),
+                         ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', 
+                          ('<',), 'e'])
+
+    def test_redirection_tokens(self):
+        self.assertEqual(self.lex('a2>c'),
+                         ['a2', ('>',), 'c'])
+        self.assertEqual(self.lex('a 2>c'),
+                         ['a', ('>',2), 'c'])
+        
+    def test_quoting(self):
+        self.assertEqual(self.lex(""" 'a' """),
+                         ['a'])
+        self.assertEqual(self.lex(""" "hello\\"world" """),
+                         ['hello"world'])
+        self.assertEqual(self.lex(""" "hello\\'world" """),
+                         ["hello\\'world"])
+        self.assertEqual(self.lex(""" "hello\\\\world" """),
+                         ["hello\\world"])
+        self.assertEqual(self.lex(""" he"llo wo"rld """),
+                         ["hello world"])
+        self.assertEqual(self.lex(""" a\\ b a\\\\b """),
+                         ["a b", "a\\b"])
+        self.assertEqual(self.lex(""" "" "" """),
+                         ["", ""])
+        self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
+                         ['a\\', 'b'])
+
+class TestShParse(unittest.TestCase):
+    def parse(self, str):
+        return ShParser(str).parse()
+
+    def test_basic(self):
+        self.assertEqual(self.parse('echo hello'),
+                         Pipeline([Command(['echo', 'hello'], [])], False))
+        self.assertEqual(self.parse('echo ""'),
+                         Pipeline([Command(['echo', ''], [])], False))
+
+    def test_redirection(self):
+        self.assertEqual(self.parse('echo hello > c'),
+                         Pipeline([Command(['echo', 'hello'], 
+                                           [((('>'),), 'c')])], False))
+        self.assertEqual(self.parse('echo hello > c >> d'),
+                         Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
+                                                     (('>>',), 'd')])], False))
+        self.assertEqual(self.parse('a 2>&1'),
+                         Pipeline([Command(['a'], [(('>&',2), '1')])], False))
+
+    def test_pipeline(self):
+        self.assertEqual(self.parse('a | b'),
+                         Pipeline([Command(['a'], []),
+                                   Command(['b'], [])],
+                                  False))
+
+        self.assertEqual(self.parse('a | b | c'),
+                         Pipeline([Command(['a'], []),
+                                   Command(['b'], []),
+                                   Command(['c'], [])],
+                                  False))
+
+        self.assertEqual(self.parse('! a'),
+                         Pipeline([Command(['a'], [])],
+                                  True))
+
+    def test_list(self):        
+        self.assertEqual(self.parse('a ; b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             ';',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a & b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '&',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a && b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '&&',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a || b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '||',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a && b || c'),
+                         Seq(Seq(Pipeline([Command(['a'], [])], False),
+                                 '&&',
+                                 Pipeline([Command(['b'], [])], False)),
+                             '||',
+                             Pipeline([Command(['c'], [])], False)))
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/utils/lit/TODO b/utils/lit/TODO
new file mode 100644
index 000000000000..4d00d2c1cfcd
--- /dev/null
+++ b/utils/lit/TODO
@@ -0,0 +1,19 @@
+ - Move temp directory name into local test config.
+
+ - Add --show-unsupported, don't show by default?
+
+ - Finish documentation.
+
+ - Optionally use multiprocessing.
+
+ - Support llvmc and ocaml tests.
+
+ - Support valgrind in all configs, and LLVM style valgrind.
+
+ - Provide test suite config for running unit tests.
+
+ - Support a timeout / ulimit.
+
+ - Support "disabling" tests? The advantage of making this distinct from XFAIL
+   is it makes it more obvious that it is a temporary measure (and lit can put
+   in a separate category).
diff --git a/utils/lit/TclUtil.py b/utils/lit/TclUtil.py
new file mode 100644
index 000000000000..4a3f34508d64
--- /dev/null
+++ b/utils/lit/TclUtil.py
@@ -0,0 +1,322 @@
+import itertools
+
+from ShCommands import Command, Pipeline
+
+def tcl_preprocess(data):
+    # Tcl has a preprocessing step to replace escaped newlines.
+    i = data.find('\\\n')
+    if i == -1:
+        return data
+
+    # Replace '\\\n' and subsequent whitespace by a single space.
+    n = len(data)
+    str = data[:i]
+    i += 2
+    while i < n and data[i] in ' \t':
+        i += 1
+    return str + ' ' + data[i:]
+
+class TclLexer:
+    """TclLexer - Lex a string into "words", following the Tcl syntax."""
+
+    def __init__(self, data):
+        self.data = tcl_preprocess(data)
+        self.pos = 0
+        self.end = len(self.data)
+
+    def at_end(self):
+        return self.pos == self.end
+
+    def eat(self):
+        c = self.data[self.pos]
+        self.pos += 1
+        return c
+
+    def look(self):
+        return self.data[self.pos]
+
+    def maybe_eat(self, c):
+        """
+        maybe_eat(c) - Consume the character c if it is the next character,
+        returning True if a character was consumed. """
+        if self.data[self.pos] == c:
+            self.pos += 1
+            return True
+        return False
+
+    def escape(self, c):
+        if c == 'a':
+            return '\x07'
+        elif c == 'b':
+            return '\x08'
+        elif c == 'f':
+            return '\x0c'
+        elif c == 'n':
+            return '\n'
+        elif c == 'r':
+            return '\r'
+        elif c == 't':
+            return '\t'
+        elif c == 'v':
+            return '\x0b'
+        elif c in 'uxo':
+            raise ValueError,'Invalid quoted character %r' % c
+        else:
+            return c
+        
+    def lex_braced(self):
+        # Lex until whitespace or end of string, the opening brace has already
+        # been consumed.
+
+        str = ''        
+        while 1:
+            if self.at_end():
+                raise ValueError,"Unterminated '{' quoted word"
+            
+            c = self.eat()
+            if c == '}':
+                break
+            elif c == '{':
+                str += '{' + self.lex_braced() + '}'
+            elif c == '\\' and self.look() in '{}':
+                str += self.eat()
+            else:
+                str += c
+
+        return str
+
+    def lex_quoted(self):
+        str = ''
+
+        while 1:
+            if self.at_end():
+                raise ValueError,"Unterminated '\"' quoted word"
+            
+            c = self.eat()
+            if c == '"':
+                break
+            elif c == '\\':
+                if self.at_end():
+                    raise ValueError,'Missing quoted character'
+
+                str += self.escape(self.eat())
+            else:
+                str += c
+
+        return str
+
+    def lex_unquoted(self, process_all=False):
+        # Lex until whitespace or end of string.
+        str = ''
+        while not self.at_end():
+            if not process_all:
+                if self.look().isspace() or self.look() == ';':
+                    break
+
+            c = self.eat()
+            if c == '\\':
+                if self.at_end():
+                    raise ValueError,'Missing quoted character'
+
+                str += self.escape(self.eat())
+            elif c == '[':
+                raise NotImplementedError, ('Command substitution is '
+                                            'not supported')
+            elif c == '$' and not self.at_end() and (self.look().isalpha() or
+                                                     self.look() == '{'):
+                raise NotImplementedError, ('Variable substitution is '
+                                            'not supported')
+            else:
+                str += c
+
+        return str
+
+    def lex_one_token(self):
+        if self.maybe_eat('"'):
+            return self.lex_quoted()
+        elif self.maybe_eat('{'):
+            # Check for argument substitution.
+            if not self.maybe_eat('*'):
+                return self.lex_braced()
+
+            if not self.maybe_eat('}'):
+                    return '*' + self.lex_braced()
+                
+            if self.at_end() or self.look().isspace():
+                return '*'
+
+            raise NotImplementedError, "Argument substitution is unsupported"
+        else:
+            return self.lex_unquoted()
+
+    def lex(self):
+        while not self.at_end():
+            c = self.look()
+            if c in ' \t':
+                self.eat()
+            elif c in ';\n':
+                self.eat()
+                yield (';',)
+            else:
+                yield self.lex_one_token()
+
+class TclExecCommand:
+    kRedirectPrefixes1 = ('<', '>')
+    kRedirectPrefixes2 = ('<@', '<<', '2>', '>&', '>>', '>@')
+    kRedirectPrefixes3 = ('2>@', '2>>', '>>&', '>&@')
+    kRedirectPrefixes4 = ('2>@1',)
+
+    def __init__(self, args):
+        self.args = iter(args)
+
+    def lex(self):
+        try:
+            return self.args.next()
+        except StopIteration:
+            return None
+
+    def look(self):
+        next = self.lex()
+        if next is not None:
+            self.args = itertools.chain([next], self.args)
+        return next
+
+    def parse_redirect(self, tok, length):
+        if len(tok) == length:
+            arg = self.lex()
+            if arg is None:
+                raise ValueError,'Missing argument to %r redirection' % tok
+        else:
+            tok,arg = tok[:length],tok[length:]
+
+        if tok[0] == '2':
+            op = (tok[1:],2)
+        else:
+            op = (tok,)
+        return (op, arg)
+
+    def parse_pipeline(self):
+        if self.look() is None:
+            raise ValueError,"Expected at least one argument to exec"
+
+        commands = [Command([],[])]
+        while 1:
+            arg = self.lex()
+            if arg is None:
+                break
+            elif arg == '|':
+                commands.append(Command([],[]))
+            elif arg == '|&':
+                # Write this as a redirect of stderr; it must come first because
+                # stdout may have already been redirected.
+                commands[-1].redirects.insert(0, (('>&',2),'1'))
+                commands.append(Command([],[]))
+            elif arg[:4] in TclExecCommand.kRedirectPrefixes4:
+                commands[-1].redirects.append(self.parse_redirect(arg, 4))
+            elif arg[:3] in TclExecCommand.kRedirectPrefixes3:
+                commands[-1].redirects.append(self.parse_redirect(arg, 3))
+            elif arg[:2] in TclExecCommand.kRedirectPrefixes2:
+                commands[-1].redirects.append(self.parse_redirect(arg, 2))
+            elif arg[:1] in TclExecCommand.kRedirectPrefixes1:
+                commands[-1].redirects.append(self.parse_redirect(arg, 1))
+            else:
+                commands[-1].args.append(arg)
+
+        return Pipeline(commands, False, pipe_err=True)
+
+    def parse(self):
+        ignoreStderr = False
+        keepNewline = False
+
+        # Parse arguments.
+        while 1:
+            next = self.look()
+            if not isinstance(next, str) or next[0] != '-':
+                break
+
+            if next == '--':
+                self.lex()
+                break
+            elif next == '-ignorestderr':
+                ignoreStderr = True
+            elif next == '-keepnewline':
+                keepNewline = True
+            else:
+                raise ValueError,"Invalid exec argument %r" % next
+
+        return (ignoreStderr, keepNewline, self.parse_pipeline())
+
+###
+
+import unittest
+
+class TestTclLexer(unittest.TestCase):
+    def lex(self, str, *args, **kwargs):
+        return list(TclLexer(str, *args, **kwargs).lex())
+
+    def test_preprocess(self):
+        self.assertEqual(tcl_preprocess('a b'), 'a b')
+        self.assertEqual(tcl_preprocess('a\\\nb c'), 'a b c')
+
+    def test_unquoted(self):
+        self.assertEqual(self.lex('a b c'),
+                         ['a', 'b', 'c'])
+        self.assertEqual(self.lex(r'a\nb\tc\ '),
+                         ['a\nb\tc '])
+        self.assertEqual(self.lex(r'a \\\$b c $\\'),
+                         ['a', r'\$b', 'c', '$\\'])
+
+    def test_braced(self):
+        self.assertEqual(self.lex('a {b c} {}'),
+                         ['a', 'b c', ''])
+        self.assertEqual(self.lex(r'a {b {c\n}}'),
+                         ['a', 'b {c\\n}'])
+        self.assertEqual(self.lex(r'a {b\{}'),
+                         ['a', 'b{'])
+        self.assertEqual(self.lex(r'{*}'), ['*'])
+        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
+        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
+        self.assertEqual(self.lex('{a\\\n   b}'),
+                         ['a b'])
+
+    def test_quoted(self):
+        self.assertEqual(self.lex('a "b c"'),
+                         ['a', 'b c'])
+
+    def test_terminators(self):
+        self.assertEqual(self.lex('a\nb'),
+                         ['a', (';',), 'b'])
+        self.assertEqual(self.lex('a;b'),
+                         ['a', (';',), 'b'])
+        self.assertEqual(self.lex('a   ;   b'),
+                         ['a', (';',), 'b'])
+
+class TestTclExecCommand(unittest.TestCase):
+    def parse(self, str):
+        return TclExecCommand(list(TclLexer(str).lex())).parse()
+
+    def test_basic(self):
+        self.assertEqual(self.parse('echo hello'),
+                         (False, False,
+                          Pipeline([Command(['echo', 'hello'], [])],
+                                   False, True)))
+        self.assertEqual(self.parse('echo hello | grep hello'),
+                         (False, False,
+                          Pipeline([Command(['echo', 'hello'], []),
+                                    Command(['grep', 'hello'], [])],
+                                   False, True)))
+
+    def test_redirect(self):
+        self.assertEqual(self.parse('echo hello > a >b >>c 2> d |& e'),
+                         (False, False,
+                          Pipeline([Command(['echo', 'hello'],
+                                            [(('>&',2),'1'),
+                                             (('>',),'a'),
+                                             (('>',),'b'),
+                                             (('>>',),'c'),
+                                             (('>',2),'d')]),
+                                    Command(['e'], [])],
+                                   False, True)))
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/utils/lit/Test.py b/utils/lit/Test.py
new file mode 100644
index 000000000000..d3f627456f8a
--- /dev/null
+++ b/utils/lit/Test.py
@@ -0,0 +1,71 @@
+import os
+
+# Test results.
+
+class TestResult:
+    def __init__(self, name, isFailure):
+        self.name = name
+        self.isFailure = isFailure
+
+PASS        = TestResult('PASS', False)
+XFAIL       = TestResult('XFAIL', False)
+FAIL        = TestResult('FAIL', True)
+XPASS       = TestResult('XPASS', True)
+UNRESOLVED  = TestResult('UNRESOLVED', True)
+UNSUPPORTED = TestResult('UNSUPPORTED', False)
+
+# Test classes.
+
+class TestFormat:
+    """TestFormat - Test information provider."""
+
+    def __init__(self, name):
+        self.name = name
+
+class TestSuite:
+    """TestSuite - Information on a group of tests.
+
+    A test suite groups together a set of logically related tests.
+    """
+
+    def __init__(self, name, source_root, exec_root, config):
+        self.name = name
+        self.source_root = source_root
+        self.exec_root = exec_root
+        # The test suite configuration.
+        self.config = config
+
+    def getSourcePath(self, components):
+        return os.path.join(self.source_root, *components)
+
+    def getExecPath(self, components):
+        return os.path.join(self.exec_root, *components)
+
+class Test:
+    """Test - Information on a single test instance."""
+
+    def __init__(self, suite, path_in_suite, config):
+        self.suite = suite
+        self.path_in_suite = path_in_suite
+        self.config = config
+        # The test result code, once complete.
+        self.result = None
+        # Any additional output from the test, once complete.
+        self.output = None
+        # The wall time to execute this test, if timing and once complete.
+        self.elapsed = None
+
+    def setResult(self, result, output, elapsed):
+        assert self.result is None, "Test result already set!"
+        self.result = result
+        self.output = output
+        self.elapsed = elapsed
+
+    def getFullName(self):
+        return self.suite.config.name + '::' + '/'.join(self.path_in_suite)
+
+    def getSourcePath(self):
+        return self.suite.getSourcePath(self.path_in_suite)
+
+    def getExecPath(self):
+        return self.suite.getExecPath(self.path_in_suite)
diff --git a/utils/lit/TestFormats.py b/utils/lit/TestFormats.py
new file mode 100644
index 000000000000..61bdb185355f
--- /dev/null
+++ b/utils/lit/TestFormats.py
@@ -0,0 +1,144 @@
+import os
+
+import Test
+import TestRunner
+import Util
+
+class GoogleTest(object):
+    def __init__(self, test_sub_dir, test_suffix):
+        self.test_sub_dir = str(test_sub_dir)
+        self.test_suffix = str(test_suffix)
+
+    def getGTestTests(self, path):
+        """getGTestTests(path) - [name]
+        
+        Return the tests available in gtest executable."""
+
+        lines = Util.capture([path, '--gtest_list_tests']).split('\n')
+        nested_tests = []
+        for ln in lines:
+            if not ln.strip():
+                continue
+
+            prefix = ''
+            index = 0
+            while ln[index*2:index*2+2] == '  ':
+                index += 1
+            while len(nested_tests) > index:
+                nested_tests.pop()
+            
+            ln = ln[index*2:]
+            if ln.endswith('.'):
+                nested_tests.append(ln)
+            else:
+                yield ''.join(nested_tests) + ln
+
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        source_path = testSuite.getSourcePath(path_in_suite)
+        for filename in os.listdir(source_path):
+            # Check for the one subdirectory (build directory) tests will be in.
+            if filename != self.test_sub_dir:
+                continue
+
+            filepath = os.path.join(source_path, filename)
+            for subfilename in os.listdir(filepath):
+                if subfilename.endswith(self.test_suffix):
+                    execpath = os.path.join(filepath, subfilename)
+
+                    # Discover the tests in this executable.
+                    for name in self.getGTestTests(execpath):
+                        testPath = path_in_suite + (filename, subfilename, name)
+                        yield Test.Test(testSuite, testPath, localConfig)
+
+    def execute(self, test, litConfig):
+        testPath,testName = os.path.split(test.getSourcePath())
+
+        cmd = [testPath, '--gtest_filter=' + testName]
+        out, err, exitCode = TestRunner.executeCommand(cmd)
+            
+        if not exitCode:
+            return Test.PASS,''
+
+        return Test.FAIL, out + err
+
+###
+
+class FileBasedTest(object):
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        source_path = testSuite.getSourcePath(path_in_suite)
+        for filename in os.listdir(source_path):
+            filepath = os.path.join(source_path, filename)
+            if not os.path.isdir(filepath):
+                base,ext = os.path.splitext(filename)
+                if ext in localConfig.suffixes:
+                    yield Test.Test(testSuite, path_in_suite + (filename,),
+                                    localConfig)
+
+class ShTest(FileBasedTest):
+    def __init__(self, execute_external = False, require_and_and = False):
+        self.execute_external = execute_external
+        self.require_and_and = require_and_and
+
+    def execute(self, test, litConfig):
+        return TestRunner.executeShTest(test, litConfig,
+                                        self.execute_external,
+                                        self.require_and_and)
+
+class TclTest(FileBasedTest):
+    def execute(self, test, litConfig):
+        return TestRunner.executeTclTest(test, litConfig)
+
+###
+
+import re
+import tempfile
+
+class SyntaxCheckTest:
+    # FIXME: Refactor into generic test for running some command on a directory
+    # of inputs.
+
+    def __init__(self, compiler, dir, recursive, pattern, extra_cxx_args=[]):
+        self.compiler = str(compiler)
+        self.dir = str(dir)
+        self.recursive = bool(recursive)
+        self.pattern = re.compile(pattern)
+        self.extra_cxx_args = list(extra_cxx_args)
+
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        for dirname,subdirs,filenames in os.walk(self.dir):
+            if not self.recursive:
+                subdirs[:] = []
+
+            for filename in filenames:
+                if (not self.pattern.match(filename) or
+                    filename in localConfig.excludes):
+                    continue
+
+                path = os.path.join(dirname,filename)
+                suffix = path[len(self.dir):]
+                if suffix.startswith(os.sep):
+                    suffix = suffix[1:]
+                test = Test.Test(testSuite,
+                                 path_in_suite + tuple(suffix.split(os.sep)),
+                                 localConfig)
+                # FIXME: Hack?
+                test.source_path = path
+                yield test
+
+    def execute(self, test, litConfig):
+        tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
+        print >>tmp, '#include "%s"' % test.source_path
+        tmp.flush()
+
+        cmd = [self.compiler, '-x', 'c++', '-fsyntax-only', tmp.name]
+        cmd.extend(self.extra_cxx_args)
+        out, err, exitCode = TestRunner.executeCommand(cmd)
+
+        diags = out + err
+        if not exitCode and not diags.strip():
+            return Test.PASS,''
+
+        return Test.FAIL, diags
diff --git a/utils/lit/TestRunner.py b/utils/lit/TestRunner.py
new file mode 100644
index 000000000000..7b549ac1c615
--- /dev/null
+++ b/utils/lit/TestRunner.py
@@ -0,0 +1,505 @@
+import os, signal, subprocess, sys
+import StringIO
+
+import ShUtil
+import Test
+import Util
+
+import platform
+import tempfile
+
+class InternalShellError(Exception):
+    def __init__(self, command, message):
+        self.command = command
+        self.message = message
+
+# Don't use close_fds on Windows.
+kUseCloseFDs = platform.system() != 'Windows'
+def executeCommand(command, cwd=None, env=None):
+    p = subprocess.Popen(command, cwd=cwd,
+                         stdin=subprocess.PIPE,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE,
+                         env=env)
+    out,err = p.communicate()
+    exitCode = p.wait()
+
+    # Detect Ctrl-C in subprocess.
+    if exitCode == -signal.SIGINT:
+        raise KeyboardInterrupt
+
+    return out, err, exitCode
+
+def executeShCmd(cmd, cfg, cwd, results):
+    if isinstance(cmd, ShUtil.Seq):
+        if cmd.op == ';':
+            res = executeShCmd(cmd.lhs, cfg, cwd, results)
+            return executeShCmd(cmd.rhs, cfg, cwd, results)
+
+        if cmd.op == '&':
+            raise NotImplementedError,"unsupported test command: '&'"
+
+        if cmd.op == '||':
+            res = executeShCmd(cmd.lhs, cfg, cwd, results)
+            if res != 0:
+                res = executeShCmd(cmd.rhs, cfg, cwd, results)
+            return res
+        if cmd.op == '&&':
+            res = executeShCmd(cmd.lhs, cfg, cwd, results)
+            if res is None:
+                return res
+
+            if res == 0:
+                res = executeShCmd(cmd.rhs, cfg, cwd, results)
+            return res
+
+        raise ValueError,'Unknown shell command: %r' % cmd.op
+
+    assert isinstance(cmd, ShUtil.Pipeline)
+    procs = []
+    input = subprocess.PIPE
+    stderrTempFiles = []
+    # To avoid deadlock, we use a single stderr stream for piped
+    # output. This is null until we have seen some output using
+    # stderr.
+    for i,j in enumerate(cmd.commands):
+        redirects = [(0,), (1,), (2,)]
+        for r in j.redirects:
+            if r[0] == ('>',2):
+                redirects[2] = [r[1], 'w', None]
+            elif r[0] == ('>&',2) and r[1] in '012':
+                redirects[2] = redirects[int(r[1])]
+            elif r[0] == ('>&',) or r[0] == ('&>',):
+                redirects[1] = redirects[2] = [r[1], 'w', None]
+            elif r[0] == ('>',):
+                redirects[1] = [r[1], 'w', None]
+            elif r[0] == ('<',):
+                redirects[0] = [r[1], 'r', None]
+            else:
+                raise NotImplementedError,"Unsupported redirect: %r" % (r,)
+
+        final_redirects = []
+        for index,r in enumerate(redirects):
+            if r == (0,):
+                result = input
+            elif r == (1,):
+                if index == 0:
+                    raise NotImplementedError,"Unsupported redirect for stdin"
+                elif index == 1:
+                    result = subprocess.PIPE
+                else:
+                    result = subprocess.STDOUT
+            elif r == (2,):
+                if index != 2:
+                    raise NotImplementedError,"Unsupported redirect on stdout"
+                result = subprocess.PIPE
+            else:
+                if r[2] is None:
+                    r[2] = open(r[0], r[1])
+                result = r[2]
+            final_redirects.append(result)
+
+        stdin, stdout, stderr = final_redirects
+
+        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
+        # stderr on a pipe and treat it as stdout.
+        if (stderr == subprocess.STDOUT and stdout != subprocess.PIPE):
+            stderr = subprocess.PIPE
+            stderrIsStdout = True
+        else:
+            stderrIsStdout = False
+
+            # Don't allow stderr on a PIPE except for the last
+            # process, this could deadlock.
+            #
+            # FIXME: This is slow, but so is deadlock.
+            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
+                stderr = tempfile.TemporaryFile(mode='w+b')
+                stderrTempFiles.append((i, stderr))
+
+        # Resolve the executable path ourselves.
+        args = list(j.args)
+        args[0] = Util.which(args[0], cfg.environment['PATH'])
+        if not args[0]:
+            raise InternalShellError(j, '%r: command not found' % j.args[0])
+
+        procs.append(subprocess.Popen(args, cwd=cwd,
+                                      stdin = stdin,
+                                      stdout = stdout,
+                                      stderr = stderr,
+                                      env = cfg.environment,
+                                      close_fds = kUseCloseFDs))
+
+        # Immediately close stdin for any process taking stdin from us.
+        if stdin == subprocess.PIPE:
+            procs[-1].stdin.close()
+            procs[-1].stdin = None
+
+        # Update the current stdin source.
+        if stdout == subprocess.PIPE:
+            input = procs[-1].stdout
+        elif stderrIsStdout:
+            input = procs[-1].stderr
+        else:
+            input = subprocess.PIPE
+
+    # FIXME: There is probably still deadlock potential here. Yawn.
+    procData = [None] * len(procs)
+    procData[-1] = procs[-1].communicate()
+
+    for i in range(len(procs) - 1):
+        if procs[i].stdout is not None:
+            out = procs[i].stdout.read()
+        else:
+            out = ''
+        if procs[i].stderr is not None:
+            err = procs[i].stderr.read()
+        else:
+            err = ''
+        procData[i] = (out,err)
+        
+    # Read stderr out of the temp files.
+    for i,f in stderrTempFiles:
+        f.seek(0, 0)
+        procData[i] = (procData[i][0], f.read())
+
+    exitCode = None
+    for i,(out,err) in enumerate(procData):
+        res = procs[i].wait()
+        # Detect Ctrl-C in subprocess.
+        if res == -signal.SIGINT:
+            raise KeyboardInterrupt
+
+        results.append((cmd.commands[i], out, err, res))
+        if cmd.pipe_err:
+            # Python treats the exit code as a signed char.
+            if res < 0:
+                exitCode = min(exitCode, res)
+            else:
+                exitCode = max(exitCode, res)
+        else:
+            exitCode = res
+
+    if cmd.negate:
+        exitCode = not exitCode
+
+    return exitCode
+
+def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
+    ln = ' &&\n'.join(commands)
+    try:
+        cmd = ShUtil.ShParser(ln, litConfig.isWindows).parse()
+    except:
+        return (Test.FAIL, "shell parser error on: %r" % ln)
+
+    results = []
+    try:
+        exitCode = executeShCmd(cmd, test.config, cwd, results)
+    except InternalShellError,e:
+        out = ''
+        err = e.message
+        exitCode = 255
+
+    out = err = ''
+    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
+        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
+        out += 'Command %d Result: %r\n' % (i, res)
+        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
+        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
+
+    return out, err, exitCode
+
+def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
+    import TclUtil
+    cmds = []
+    for ln in commands:
+        # Given the unfortunate way LLVM's test are written, the line gets
+        # backslash substitution done twice.
+        ln = TclUtil.TclLexer(ln).lex_unquoted(process_all = True)
+
+        try:
+            tokens = list(TclUtil.TclLexer(ln).lex())
+        except:
+            return (Test.FAIL, "Tcl lexer error on: %r" % ln)
+
+        # Validate there are no control tokens.
+        for t in tokens:
+            if not isinstance(t, str):
+                return (Test.FAIL,
+                        "Invalid test line: %r containing %r" % (ln, t))
+
+        try:
+            cmds.append(TclUtil.TclExecCommand(tokens).parse_pipeline())
+        except:
+            return (Test.FAIL, "Tcl 'exec' parse error on: %r" % ln)
+
+    cmd = cmds[0]
+    for c in cmds[1:]:
+        cmd = ShUtil.Seq(cmd, '&&', c)
+
+    if litConfig.useTclAsSh:
+        script = tmpBase + '.script'
+
+        # Write script file
+        f = open(script,'w')
+        print >>f, 'set -o pipefail'
+        cmd.toShell(f, pipefail = True)
+        f.close()
+
+        if 0:
+            print >>sys.stdout, cmd
+            print >>sys.stdout, open(script).read()
+            print >>sys.stdout
+            return '', '', 0
+
+        command = ['/bin/bash', script]
+        out,err,exitCode = executeCommand(command, cwd=cwd,
+                                          env=test.config.environment)
+
+        # Tcl commands fail on standard error output.
+        if err:
+            exitCode = 1
+            out = 'Command has output on stderr!\n\n' + out
+
+        return out,err,exitCode
+    else:
+        results = []
+        try:
+            exitCode = executeShCmd(cmd, test.config, cwd, results)
+        except InternalShellError,e:
+            results.append((e.command, '', e.message + '\n', 255))
+            exitCode = 255
+
+    out = err = ''
+
+    # Tcl commands fail on standard error output.
+    if [True for _,_,err,res in results if err]:
+        exitCode = 1
+        out += 'Command has output on stderr!\n\n'
+
+    for i,(cmd, cmd_out, cmd_err, res) in enumerate(results):
+        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
+        out += 'Command %d Result: %r\n' % (i, res)
+        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
+        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
+
+    return out, err, exitCode
+
+def executeScript(test, litConfig, tmpBase, commands, cwd):
+    script = tmpBase + '.script'
+    if litConfig.isWindows:
+        script += '.bat'
+
+    # Write script file
+    f = open(script,'w')
+    if litConfig.isWindows:
+        f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
+    else:
+        f.write(' &&\n'.join(commands))
+    f.write('\n')
+    f.close()
+
+    if litConfig.isWindows:
+        command = ['cmd','/c', script]
+    else:
+        command = ['/bin/sh', script]
+        if litConfig.useValgrind:
+            # FIXME: Running valgrind on sh is overkill. We probably could just
+            # run on clang with no real loss.
+            valgrindArgs = ['valgrind', '-q',
+                            '--tool=memcheck', '--trace-children=yes',
+                            '--error-exitcode=123']
+            valgrindArgs.extend(litConfig.valgrindArgs)
+
+            command = valgrindArgs + command
+
+    return executeCommand(command, cwd=cwd, env=test.config.environment)
+
+def parseIntegratedTestScript(test, xfailHasColon, requireAndAnd):
+    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
+    script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
+    information. The RUN lines also will have variable substitution performed.
+    """
+
+    # Get the temporary location, this is always relative to the test suite
+    # root, not test source root.
+    #
+    # FIXME: This should not be here?
+    sourcepath = test.getSourcePath()
+    execpath = test.getExecPath()
+    execdir,execbase = os.path.split(execpath)
+    tmpBase = os.path.join(execdir, 'Output', execbase)
+
+    # We use #_MARKER_# to hide %% while we do the other substitutions.
+    substitutions = [('%%', '#_MARKER_#')]
+    substitutions.extend(test.config.substitutions)
+    substitutions.extend([('%s', sourcepath),
+                          ('%S', os.path.dirname(sourcepath)),
+                          ('%p', os.path.dirname(sourcepath)),
+                          ('%t', tmpBase + '.tmp'),
+                          # FIXME: Remove this once we kill DejaGNU.
+                          ('%abs_tmp', tmpBase + '.tmp'),
+                          ('#_MARKER_#', '%')])
+
+    # Collect the test lines from the script.
+    script = []
+    xfails = []
+    xtargets = []
+    for ln in open(sourcepath):
+        if 'RUN:' in ln:
+            # Isolate the command to run.
+            index = ln.index('RUN:')
+            ln = ln[index+4:]
+
+            # Trim trailing whitespace.
+            ln = ln.rstrip()
+
+            # Collapse lines with trailing '\\'.
+            if script and script[-1][-1] == '\\':
+                script[-1] = script[-1][:-1] + ln
+            else:
+                script.append(ln)
+        elif xfailHasColon and 'XFAIL:' in ln:
+            items = ln[ln.index('XFAIL:') + 6:].split(',')
+            xfails.extend([s.strip() for s in items])
+        elif not xfailHasColon and 'XFAIL' in ln:
+            items = ln[ln.index('XFAIL') + 5:].split(',')
+            xfails.extend([s.strip() for s in items])
+        elif 'XTARGET:' in ln:
+            items = ln[ln.index('XTARGET:') + 8:].split(',')
+            xtargets.extend([s.strip() for s in items])
+        elif 'END.' in ln:
+            # Check for END. lines.
+            if ln[ln.index('END.'):].strip() == 'END.':
+                break
+
+    # Apply substitutions to the script.
+    def processLine(ln):
+        # Apply substitutions
+        for a,b in substitutions:
+            ln = ln.replace(a,b)
+
+        # Strip the trailing newline and any extra whitespace.
+        return ln.strip()
+    script = map(processLine, script)
+
+    # Verify the script contains a run line.
+    if not script:
+        return (Test.UNRESOLVED, "Test has no run line!")
+
+    if script[-1][-1] == '\\':
+        return (Test.UNRESOLVED, "Test has unterminated run lines (with '\\')")
+
+    # Validate interior lines for '&&', a lovely historical artifact.
+    if requireAndAnd:
+        for i in range(len(script) - 1):
+            ln = script[i]
+
+            if not ln.endswith('&&'):
+                return (Test.FAIL,
+                        ("MISSING \'&&\': %s\n"  +
+                         "FOLLOWED BY   : %s\n") % (ln, script[i + 1]))
+
+            # Strip off '&&'
+            script[i] = ln[:-2]
+
+    return script,xfails,xtargets,tmpBase,execdir
+
+def formatTestOutput(status, out, err, exitCode, script):
+    output = StringIO.StringIO()
+    print >>output, "Script:"
+    print >>output, "--"
+    print >>output, '\n'.join(script)
+    print >>output, "--"
+    print >>output, "Exit Code: %r" % exitCode
+    print >>output, "Command Output (stdout):"
+    print >>output, "--"
+    output.write(out)
+    print >>output, "--"
+    print >>output, "Command Output (stderr):"
+    print >>output, "--"
+    output.write(err)
+    print >>output, "--"
+    return (status, output.getvalue())
+
+def executeTclTest(test, litConfig):
+    if test.config.unsupported:
+        return (Test.UNSUPPORTED, 'Test is unsupported')
+
+    res = parseIntegratedTestScript(test, True, False)
+    if len(res) == 2:
+        return res
+
+    script, xfails, xtargets, tmpBase, execdir = res
+
+    if litConfig.noExecute:
+        return (Test.PASS, '')
+
+    # Create the output directory if it does not already exist.
+    Util.mkdir_p(os.path.dirname(tmpBase))
+
+    res = executeTclScriptInternal(test, litConfig, tmpBase, script, execdir)
+    if len(res) == 2:
+        return res
+
+    isXFail = False
+    for item in xfails:
+        if item == '*' or item in test.suite.config.target_triple:
+            isXFail = True
+            break
+
+    # If this is XFAIL, see if it is expected to pass on this target.
+    if isXFail:
+        for item in xtargets:
+            if item == '*' or item in test.suite.config.target_triple:
+                isXFail = False
+                break
+
+    out,err,exitCode = res
+    if isXFail:
+        ok = exitCode != 0
+        status = (Test.XPASS, Test.XFAIL)[ok]
+    else:
+        ok = exitCode == 0
+        status = (Test.FAIL, Test.PASS)[ok]
+
+    if ok:
+        return (status,'')
+
+    return formatTestOutput(status, out, err, exitCode, script)
+
+def executeShTest(test, litConfig, useExternalSh, requireAndAnd):
+    if test.config.unsupported:
+        return (Test.UNSUPPORTED, 'Test is unsupported')
+
+    res = parseIntegratedTestScript(test, False, requireAndAnd)
+    if len(res) == 2:
+        return res
+
+    script, xfails, xtargets, tmpBase, execdir = res
+
+    if litConfig.noExecute:
+        return (Test.PASS, '')
+
+    # Create the output directory if it does not already exist.
+    Util.mkdir_p(os.path.dirname(tmpBase))
+
+    if useExternalSh:
+        res = executeScript(test, litConfig, tmpBase, script, execdir)
+    else:
+        res = executeScriptInternal(test, litConfig, tmpBase, script, execdir)
+    if len(res) == 2:
+        return res
+
+    out,err,exitCode = res
+    if xfails:
+        ok = exitCode != 0
+        status = (Test.XPASS, Test.XFAIL)[ok]
+    else:
+        ok = exitCode == 0
+        status = (Test.FAIL, Test.PASS)[ok]
+
+    if ok:
+        return (status,'')
+
+    return formatTestOutput(status, out, err, exitCode, script)
diff --git a/utils/lit/TestingConfig.py b/utils/lit/TestingConfig.py
new file mode 100644
index 000000000000..e4874d7a82ef
--- /dev/null
+++ b/utils/lit/TestingConfig.py
@@ -0,0 +1,96 @@
+import os
+
+class TestingConfig:
+    """"
+    TestingConfig - Information on the tests inside a suite.
+    """
+
+    @staticmethod
+    def frompath(path, parent, litConfig, mustExist, config = None):
+        if config is None:
+            # Set the environment based on the command line arguments.
+            environment = {
+                'PATH' : os.pathsep.join(litConfig.path +
+                                         [os.environ.get('PATH','')]),
+                'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
+                'LLVM_DISABLE_CRT_DEBUG' : '1',
+                }
+
+            config = TestingConfig(parent,
+                                   name = '<unnamed>',
+                                   suffixes = set(),
+                                   test_format = None,
+                                   environment = environment,
+                                   substitutions = [],
+                                   unsupported = False,
+                                   on_clone = None,
+                                   test_exec_root = None,
+                                   test_source_root = None,
+                                   excludes = [])
+
+        if os.path.exists(path):
+            # FIXME: Improve detection and error reporting of errors in the
+            # config file.
+            f = open(path)
+            cfg_globals = dict(globals())
+            cfg_globals['config'] = config
+            cfg_globals['lit'] = litConfig
+            cfg_globals['__file__'] = path
+            try:
+                exec f in cfg_globals
+            except SystemExit,status:
+                # We allow normal system exit inside a config file to just
+                # return control without error.
+                if status.args:
+                    raise
+            f.close()
+        elif mustExist:
+            litConfig.fatal('unable to load config from %r ' % path)
+
+        config.finish(litConfig)
+        return config
+
+    def __init__(self, parent, name, suffixes, test_format,
+                 environment, substitutions, unsupported, on_clone,
+                 test_exec_root, test_source_root, excludes):
+        self.parent = parent
+        self.name = str(name)
+        self.suffixes = set(suffixes)
+        self.test_format = test_format
+        self.environment = dict(environment)
+        self.substitutions = list(substitutions)
+        self.unsupported = unsupported
+        self.on_clone = on_clone
+        self.test_exec_root = test_exec_root
+        self.test_source_root = test_source_root
+        self.excludes = set(excludes)
+
+    def clone(self, path):
+        # FIXME: Chain implementations?
+        #
+        # FIXME: Allow extra parameters?
+        cfg = TestingConfig(self, self.name, self.suffixes, self.test_format,
+                            self.environment, self.substitutions,
+                            self.unsupported, self.on_clone,
+                            self.test_exec_root, self.test_source_root,
+                            self.excludes)
+        if cfg.on_clone:
+            cfg.on_clone(self, cfg, path)
+        return cfg
+
+    def finish(self, litConfig):
+        """finish() - Finish this config object, after loading is complete."""
+
+        self.name = str(self.name)
+        self.suffixes = set(self.suffixes)
+        self.environment = dict(self.environment)
+        self.substitutions = list(self.substitutions)
+        if self.test_exec_root is not None:
+            # FIXME: This should really only be suite in test suite config
+            # files. Should we distinguish them?
+            self.test_exec_root = str(self.test_exec_root)
+        if self.test_source_root is not None:
+            # FIXME: This should really only be suite in test suite config
+            # files. Should we distinguish them?
+            self.test_source_root = str(self.test_source_root)
+        self.excludes = set(self.excludes)
diff --git a/utils/lit/Util.py b/utils/lit/Util.py
new file mode 100644
index 000000000000..e62a8ed81dc1
--- /dev/null
+++ b/utils/lit/Util.py
@@ -0,0 +1,124 @@
+import os, sys
+
+def detectCPUs():
+    """
+    Detects the number of CPUs on a system. Cribbed from pp.
+    """
+    # Linux, Unix and MacOS:
+    if hasattr(os, "sysconf"):
+        if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
+            # Linux & Unix:
+            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
+            if isinstance(ncpus, int) and ncpus > 0:
+                return ncpus
+        else: # OSX:
+            return int(os.popen2("sysctl -n hw.ncpu")[1].read())
+    # Windows:
+    if os.environ.has_key("NUMBER_OF_PROCESSORS"):
+        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]);
+        if ncpus > 0:
+            return ncpus
+    return 1 # Default
+
+def mkdir_p(path):
+    """mkdir_p(path) - Make the "path" directory, if it does not exist; this
+    will also make directories for any missing parent directories."""
+    import errno
+
+    if not path or os.path.exists(path):
+        return
+
+    parent = os.path.dirname(path) 
+    if parent != path:
+        mkdir_p(parent)
+
+    try:
+        os.mkdir(path)
+    except OSError,e:
+        # Ignore EEXIST, which may occur during a race condition.
+        if e.errno != errno.EEXIST:
+            raise
+
+def capture(args):
+    import subprocess
+    """capture(command) - Run the given command (or argv list) in a shell and
+    return the standard output."""
+    p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    out,_ = p.communicate()
+    return out
+
+def which(command, paths = None):
+    """which(command, [paths]) - Look up the given command in the paths string
+    (or the PATH environment variable, if unspecified)."""
+
+    if paths is None:
+        paths = os.environ.get('PATH','')
+
+    # Check for absolute match first.
+    if os.path.exists(command):
+        return command
+
+    # Would be nice if Python had a lib function for this.
+    if not paths:
+        paths = os.defpath
+
+    # Get suffixes to search.
+    pathext = os.environ.get('PATHEXT', '').split(os.pathsep)
+
+    # Search the paths...
+    for path in paths.split(os.pathsep):
+        for ext in pathext:
+            p = os.path.join(path, command + ext)
+            if os.path.exists(p):
+                return p
+
+    return None
+
+def printHistogram(items, title = 'Items'):
+    import itertools, math
+
+    items.sort(key = lambda (_,v): v)
+
+    maxValue = max([v for _,v in items])
+
+    # Select first "nice" bar height that produces more than 10 bars.
+    power = int(math.ceil(math.log(maxValue, 10)))
+    for inc in itertools.cycle((5, 2, 2.5, 1)):
+        barH = inc * 10**power
+        N = int(math.ceil(maxValue / barH))
+        if N > 10:
+            break
+        elif inc == 1:
+            power -= 1
+
+    histo = [set() for i in range(N)]
+    for name,v in items:
+        bin = min(int(N * v/maxValue), N-1)
+        histo[bin].add(name)
+
+    barW = 40
+    hr = '-' * (barW + 34)
+    print '\nSlowest %s:' % title
+    print hr
+    for name,value in items[-20:]:
+        print '%.2fs: %s' % (value, name)
+    print '\n%s Times:' % title
+    print hr
+    pDigits = int(math.ceil(math.log(maxValue, 10)))
+    pfDigits = max(0, 3-pDigits)
+    if pfDigits:
+        pDigits += pfDigits + 1
+    cDigits = int(math.ceil(math.log(len(items), 10)))
+    print "[%s] :: [%s] :: [%s]" % ('Range'.center((pDigits+1)*2 + 3),
+                                    'Percentage'.center(barW),
+                                    'Count'.center(cDigits*2 + 1))
+    print hr
+    for i,row in enumerate(histo):
+        pct = float(len(row)) / len(items)
+        w = int(barW * pct)
+        print "[%*.*fs,%*.*fs)" % (pDigits, pfDigits, i*barH,
+                                   pDigits, pfDigits, (i+1)*barH),
+        print ":: [%s%s] :: [%*d/%*d]" % ('*'*w, ' '*(barW-w),
+                                          cDigits, len(row),
+                                          cDigits, len(items))
+
diff --git a/utils/lit/lit.py b/utils/lit/lit.py
new file mode 100755
index 000000000000..5b24286e22f0
--- /dev/null
+++ b/utils/lit/lit.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python
+
+"""
+lit - LLVM Integrated Tester.
+
+See lit.pod for more information.
+"""
+
+import math, os, platform, random, re, sys, time, threading, traceback
+
+import ProgressBar
+import TestRunner
+import Util
+
+from TestingConfig import TestingConfig
+import LitConfig
+import Test
+
+# FIXME: Rename to 'config.lit', 'site.lit', and 'local.lit' ?
+kConfigName = 'lit.cfg'
+kSiteConfigName = 'lit.site.cfg'
+kLocalConfigName = 'lit.local.cfg'
+
+class TestingProgressDisplay:
+    def __init__(self, opts, numTests, progressBar=None):
+        self.opts = opts
+        self.numTests = numTests
+        self.current = None
+        self.lock = threading.Lock()
+        self.progressBar = progressBar
+        self.completed = 0
+
+    def update(self, test):
+        # Avoid locking overhead in quiet mode
+        if self.opts.quiet and not test.result.isFailure:
+            self.completed += 1
+            return
+
+        # Output lock.
+        self.lock.acquire()
+        try:
+            self.handleUpdate(test)
+        finally:
+            self.lock.release()
+
+    def finish(self):
+        if self.progressBar:
+            self.progressBar.clear()
+        elif self.opts.quiet:
+            pass
+        elif self.opts.succinct:
+            sys.stdout.write('\n')
+
+    def handleUpdate(self, test):
+        self.completed += 1
+        if self.progressBar:
+            self.progressBar.update(float(self.completed)/self.numTests,
+                                    test.getFullName())
+
+        if self.opts.succinct and not test.result.isFailure:
+            return
+
+        if self.progressBar:
+            self.progressBar.clear()
+
+        print '%s: %s (%d of %d)' % (test.result.name, test.getFullName(),
+                                     self.completed, self.numTests)
+
+        if test.result.isFailure and self.opts.showOutput:
+            print "%s TEST '%s' FAILED %s" % ('*'*20, test.getFullName(),
+                                              '*'*20)
+            print test.output
+            print "*" * 20
+
+        sys.stdout.flush()
+
+class TestProvider:
+    def __init__(self, tests, maxTime):
+        self.maxTime = maxTime
+        self.iter = iter(tests)
+        self.lock = threading.Lock()
+        self.startTime = time.time()
+
+    def get(self):
+        # Check if we have run out of time.
+        if self.maxTime is not None:
+            if time.time() - self.startTime > self.maxTime:
+                return None
+
+        # Otherwise take the next test.
+        self.lock.acquire()
+        try:
+            item = self.iter.next()
+        except StopIteration:
+            item = None
+        self.lock.release()
+        return item
+
+class Tester(threading.Thread):
+    def __init__(self, litConfig, provider, display):
+        threading.Thread.__init__(self)
+        self.litConfig = litConfig
+        self.provider = provider
+        self.display = display
+
+    def run(self):
+        while 1:
+            item = self.provider.get()
+            if item is None:
+                break
+            self.runTest(item)
+
+    def runTest(self, test):
+        result = None
+        startTime = time.time()
+        try:
+            result, output = test.config.test_format.execute(test,
+                                                             self.litConfig)
+        except KeyboardInterrupt:
+            # This is a sad hack. Unfortunately subprocess goes
+            # bonkers with ctrl-c and we start forking merrily.
+            print '\nCtrl-C detected, goodbye.'
+            os.kill(0,9)
+        except:
+            if self.litConfig.debug:
+                raise
+            result = Test.UNRESOLVED
+            output = 'Exception during script execution:\n'
+            output += traceback.format_exc()
+            output += '\n'
+        elapsed = time.time() - startTime
+
+        test.setResult(result, output, elapsed)
+        self.display.update(test)
+
+def dirContainsTestSuite(path):
+    cfgpath = os.path.join(path, kSiteConfigName)
+    if os.path.exists(cfgpath):
+        return cfgpath
+    cfgpath = os.path.join(path, kConfigName)
+    if os.path.exists(cfgpath):
+        return cfgpath
+
+def getTestSuite(item, litConfig, cache):
+    """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
+
+    Find the test suite containing @arg item.
+
+    @retval (None, ...) - Indicates no test suite contains @arg item.
+    @retval (suite, relative_path) - The suite that @arg item is in, and its
+    relative path inside that suite.
+    """
+    def search1(path):
+        # Check for a site config or a lit config.
+        cfgpath = dirContainsTestSuite(path)
+
+        # If we didn't find a config file, keep looking.
+        if not cfgpath:
+            parent,base = os.path.split(path)
+            if parent == path:
+                return (None, ())
+
+            ts, relative = search(parent)
+            return (ts, relative + (base,))
+
+        # We found a config file, load it.
+        if litConfig.debug:
+            litConfig.note('loading suite config %r' % cfgpath)
+
+        cfg = TestingConfig.frompath(cfgpath, None, litConfig, mustExist = True)
+        source_root = os.path.realpath(cfg.test_source_root or path)
+        exec_root = os.path.realpath(cfg.test_exec_root or path)
+        return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()
+
+    def search(path):
+        # Check for an already instantiated test suite.
+        res = cache.get(path)
+        if res is None:
+            cache[path] = res = search1(path)
+        return res
+
+    # Canonicalize the path.
+    item = os.path.realpath(item)
+
+    # Skip files and virtual components.
+    components = []
+    while not os.path.isdir(item):
+        parent,base = os.path.split(item)
+        if parent == item:
+            return (None, ())
+        components.append(base)
+        item = parent
+    components.reverse()
+
+    ts, relative = search(item)
+    return ts, tuple(relative + tuple(components))
+
+def getLocalConfig(ts, path_in_suite, litConfig, cache):
+    def search1(path_in_suite):
+        # Get the parent config.
+        if not path_in_suite:
+            parent = ts.config
+        else:
+            parent = search(path_in_suite[:-1])
+
+        # Load the local configuration.
+        source_path = ts.getSourcePath(path_in_suite)
+        cfgpath = os.path.join(source_path, kLocalConfigName)
+        if litConfig.debug:
+            litConfig.note('loading local config %r' % cfgpath)
+        return TestingConfig.frompath(cfgpath, parent, litConfig,
+                                    mustExist = False,
+                                    config = parent.clone(cfgpath))
+
+    def search(path_in_suite):
+        key = (ts, path_in_suite)
+        res = cache.get(key)
+        if res is None:
+            cache[key] = res = search1(path_in_suite)
+        return res
+
+    return search(path_in_suite)
+
+def getTests(path, litConfig, testSuiteCache, localConfigCache):
+    # Find the test suite for this input and its relative path.
+    ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
+    if ts is None:
+        litConfig.warning('unable to find test suite for %r' % path)
+        return ()
+
+    if litConfig.debug:
+        litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
+                                                        path_in_suite))
+
+    return getTestsInSuite(ts, path_in_suite, litConfig,
+                           testSuiteCache, localConfigCache)
+
+def getTestsInSuite(ts, path_in_suite, litConfig,
+                    testSuiteCache, localConfigCache):
+    # Check that the source path exists (errors here are reported by the
+    # caller).
+    source_path = ts.getSourcePath(path_in_suite)
+    if not os.path.exists(source_path):
+        return
+
+    # Check if the user named a test directly.
+    if not os.path.isdir(source_path):
+        lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache)
+        yield Test.Test(ts, path_in_suite, lc)
+        return
+
+    # Otherwise we have a directory to search for tests, start by getting the
+    # local configuration.
+    lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
+
+    # Search for tests.
+    for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
+                                                  litConfig, lc):
+        yield res
+
+    # Search subdirectories.
+    for filename in os.listdir(source_path):
+        # FIXME: This doesn't belong here?
+        if filename in ('Output', '.svn') or filename in lc.excludes:
+            continue
+
+        # Ignore non-directories.
+        file_sourcepath = os.path.join(source_path, filename)
+        if not os.path.isdir(file_sourcepath):
+            continue
+        
+        # Check for nested test suites, first in the execpath in case there is a
+        # site configuration and then in the source path.
+        file_execpath = ts.getExecPath(path_in_suite + (filename,))
+        if dirContainsTestSuite(file_execpath):
+            subiter = getTests(file_execpath, litConfig,
+                               testSuiteCache, localConfigCache)
+        elif dirContainsTestSuite(file_sourcepath):
+            subiter = getTests(file_sourcepath, litConfig,
+                               testSuiteCache, localConfigCache)
+        else:
+            # Otherwise, continue loading from inside this test suite.
+            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
+                                      litConfig, testSuiteCache,
+                                      localConfigCache)
+        
+        for res in subiter:
+            yield res
+
+def runTests(numThreads, litConfig, provider, display):
+    # If only using one testing thread, don't use threads at all; this lets us
+    # profile, among other things.
+    if numThreads == 1:
+        t = Tester(litConfig, provider, display)
+        t.run()
+        return
+
+    # Otherwise spin up the testing threads and wait for them to finish.
+    testers = [Tester(litConfig, provider, display)
+               for i in range(numThreads)]
+    for t in testers:
+        t.start()
+    try:
+        for t in testers:
+            t.join()
+    except KeyboardInterrupt:
+        sys.exit(2)
+
+def main():
+    global options
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {file-or-path}")
+
+    parser.add_option("-j", "--threads", dest="numThreads", metavar="N",
+                      help="Number of testing threads",
+                      type=int, action="store", default=None)
+
+    group = OptionGroup(parser, "Output Format")
+    # FIXME: I find these names very confusing, although I like the
+    # functionality.
+    group.add_option("-q", "--quiet", dest="quiet",
+                     help="Suppress no error output",
+                     action="store_true", default=False)
+    group.add_option("-s", "--succinct", dest="succinct",
+                     help="Reduce amount of output",
+                     action="store_true", default=False)
+    group.add_option("-v", "--verbose", dest="showOutput",
+                     help="Show all test output",
+                     action="store_true", default=False)
+    group.add_option("", "--no-progress-bar", dest="useProgressBar",
+                     help="Do not use curses based progress bar",
+                     action="store_false", default=True)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Test Execution")
+    group.add_option("", "--path", dest="path",
+                     help="Additional paths to add to testing environment",
+                     action="append", type=str, default=[])
+    group.add_option("", "--vg", dest="useValgrind",
+                     help="Run tests under valgrind",
+                     action="store_true", default=False)
+    group.add_option("", "--vg-arg", dest="valgrindArgs", metavar="ARG",
+                     help="Specify an extra argument for valgrind",
+                     type=str, action="append", default=[])
+    group.add_option("", "--time-tests", dest="timeTests",
+                     help="Track elapsed wall time for each test",
+                     action="store_true", default=False)
+    group.add_option("", "--no-execute", dest="noExecute",
+                     help="Don't execute any tests (assume PASS)",
+                     action="store_true", default=False)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Test Selection")
+    group.add_option("", "--max-tests", dest="maxTests", metavar="N",
+                     help="Maximum number of tests to run",
+                     action="store", type=int, default=None)
+    group.add_option("", "--max-time", dest="maxTime", metavar="N",
+                     help="Maximum time to spend testing (in seconds)",
+                     action="store", type=float, default=None)
+    group.add_option("", "--shuffle", dest="shuffle",
+                     help="Run tests in random order",
+                     action="store_true", default=False)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Debug and Experimental Options")
+    group.add_option("", "--debug", dest="debug",
+                      help="Enable debugging (for 'lit' development)",
+                      action="store_true", default=False)
+    group.add_option("", "--show-suites", dest="showSuites",
+                      help="Show discovered test suites",
+                      action="store_true", default=False)
+    group.add_option("", "--no-tcl-as-sh", dest="useTclAsSh",
+                      help="Don't run Tcl scripts using 'sh'",
+                      action="store_false", default=True)
+    parser.add_option_group(group)
+
+    (opts, args) = parser.parse_args()
+
+    if not args:
+        parser.error('No inputs specified')
+
+    if opts.numThreads is None:
+        opts.numThreads = Util.detectCPUs()
+
+    inputs = args
+
+    # Create the global config object.
+    litConfig = LitConfig.LitConfig(progname = os.path.basename(sys.argv[0]),
+                                    path = opts.path,
+                                    quiet = opts.quiet,
+                                    useValgrind = opts.useValgrind,
+                                    valgrindArgs = opts.valgrindArgs,
+                                    useTclAsSh = opts.useTclAsSh,
+                                    noExecute = opts.noExecute,
+                                    debug = opts.debug,
+                                    isWindows = (platform.system()=='Windows'))
+
+    # Load the tests from the inputs.
+    tests = []
+    testSuiteCache = {}
+    localConfigCache = {}
+    for input in inputs:
+        prev = len(tests)
+        tests.extend(getTests(input, litConfig,
+                              testSuiteCache, localConfigCache))
+        if prev == len(tests):
+            litConfig.warning('input %r contained no tests' % input)
+
+    # If there were any errors during test discovery, exit now.
+    if litConfig.numErrors:
+        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    if opts.showSuites:
+        suitesAndTests = dict([(ts,[])
+                               for ts,_ in testSuiteCache.values()])
+        for t in tests:
+            suitesAndTests[t.suite].append(t)
+
+        print '-- Test Suites --'
+        suitesAndTests = suitesAndTests.items()
+        suitesAndTests.sort(key = lambda (ts,_): ts.name)
+        for ts,tests in suitesAndTests:
+            print '  %s - %d tests' %(ts.name, len(tests))
+            print '    Source Root: %s' % ts.source_root
+            print '    Exec Root  : %s' % ts.exec_root
+
+    # Select and order the tests.
+    numTotalTests = len(tests)
+    if opts.shuffle:
+        random.shuffle(tests)
+    else:
+        tests.sort(key = lambda t: t.getFullName())
+    if opts.maxTests is not None:
+        tests = tests[:opts.maxTests]
+
+    extra = ''
+    if len(tests) != numTotalTests:
+        extra = ' of %d' % numTotalTests
+    header = '-- Testing: %d%s tests, %d threads --'%(len(tests),extra,
+                                                      opts.numThreads)
+
+    progressBar = None
+    if not opts.quiet:
+        if opts.succinct and opts.useProgressBar:
+            try:
+                tc = ProgressBar.TerminalController()
+                progressBar = ProgressBar.ProgressBar(tc, header)
+            except ValueError:
+                print header
+                progressBar = ProgressBar.SimpleProgressBar('Testing: ')
+        else:
+            print header
+
+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
+    startTime = time.time()
+    display = TestingProgressDisplay(opts, len(tests), progressBar)
+    provider = TestProvider(tests, opts.maxTime)
+    runTests(opts.numThreads, litConfig, provider, display)
+    display.finish()
+
+    if not opts.quiet:
+        print 'Testing Time: %.2fs'%(time.time() - startTime)
+
+    # Update results for any tests which weren't run.
+    for t in tests:
+        if t.result is None:
+            t.setResult(Test.UNRESOLVED, '', 0.0)
+
+    # List test results organized by kind.
+    hasFailures = False
+    byCode = {}
+    for t in tests:
+        if t.result not in byCode:
+            byCode[t.result] = []
+        byCode[t.result].append(t)
+        if t.result.isFailure:
+            hasFailures = True
+
+    # FIXME: Show unresolved and (optionally) unsupported tests.
+    for title,code in (('Unexpected Passing Tests', Test.XPASS),
+                       ('Failing Tests', Test.FAIL)):
+        elts = byCode.get(code)
+        if not elts:
+            continue
+        print '*'*20
+        print '%s (%d):' % (title, len(elts))
+        for t in elts:
+            print '    %s' % t.getFullName()
+        print
+
+    if opts.timeTests:
+        byTime = list(tests)
+        byTime.sort(key = lambda t: t.elapsed)
+        if byTime:
+            Util.printHistogram([(t.getFullName(), t.elapsed) for t in byTime],
+                                title='Tests')
+
+    for name,code in (('Expected Passes    ', Test.PASS),
+                      ('Expected Failures  ', Test.XFAIL),
+                      ('Unsupported Tests  ', Test.UNSUPPORTED),
+                      ('Unresolved Tests   ', Test.UNRESOLVED),
+                      ('Unexpected Passes  ', Test.XPASS),
+                      ('Unexpected Failures', Test.FAIL),):
+        if opts.quiet and not code.isFailure:
+            continue
+        N = len(byCode.get(code,[]))
+        if N:
+            print '  %s: %d' % (name,N)
+
+    # If we encountered any additional errors, exit abnormally.
+    if litConfig.numErrors:
+        print >>sys.stderr, '\n%d error(s), exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    # Warn about warnings.
+    if litConfig.numWarnings:
+        print >>sys.stderr, '\n%d warning(s) in tests.' % litConfig.numWarnings
+
+    if hasFailures:
+        sys.exit(1)
+    sys.exit(0)
+
+if __name__=='__main__':
+    # Bump the GIL check interval, its more important to get any one thread to a
+    # blocking operation (hopefully exec) than to try and unblock other threads.
+    import sys
+    sys.setcheckinterval(1000)
+    main()
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 2ca849d67976..86a707a925d9 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -53,8 +53,8 @@ FPVAL               ::= ESAPINTVAL ^ "." ^ EUAPINTVAL | "0x" ^ HexDigitSeq ;
 The rest of this file is derived directly from llvmAsmParser.y.
 *)
 
-ArithmeticOps ::= add | fadd | sub | fsub | mul | fmul |
-                  udiv | sdiv | fdiv | urem | srem | frem ;
+ArithmeticOps ::= + OptNW add | fadd | OptNW sub | fsub | OptNW mul | fmul |
+                  udiv | OptExact sdiv | fdiv | urem | srem | frem ;
 LogicalOps    ::= shl | lshr | ashr | and | or | xor;
 CastOps       ::= trunc | zext | sext | fptrunc | fpext | bitcast |
                   uitofp | sitofp | fptoui | fptosi | inttoptr | ptrtoint ;
@@ -89,6 +89,7 @@ GVInternalLinkage
  | appending
  | dllexport
  | common
+ | private
  ;
 
 GVExternalLinkage
@@ -160,6 +161,7 @@ FuncAttr      ::= noreturn
  | signext
  | readnone
  | readonly
+ | inlinehint
  | noinline
  | alwaysinline
  | optsize
@@ -232,14 +234,12 @@ ConstVal::= Types "[" ^ ConstVector ^ "]"
  | Types FPVAL ;
 
 ConstExpr::= CastOps "(" ^ ConstVal to Types ^ ")"
- | getelementptr "(" ^ ConstVal IndexList ^ ")"
+ | getelementptr OptInBounds "(" ^ ConstVal IndexList ^ ")"
  | select "(" ^ ConstVal ^ "," ConstVal ^ "," ConstVal ^ ")"
  | ArithmeticOps "(" ^ ConstVal ^ "," ConstVal ^ ")"
  | LogicalOps "(" ^ ConstVal ^ "," ConstVal ^ ")"
  | icmp IPredicates "(" ^ ConstVal ^ "," ConstVal ^ ")"
  | fcmp FPredicates "(" ^ ConstVal ^ "," ConstVal ^ ")"
- | vicmp IPredicates "(" ^ ConstVal ^ "," ConstVal ^ ")"
- | vfcmp FPredicates "(" ^ ConstVal ^ "," ConstVal ^ ")"
  | extractelement "(" ^ ConstVal ^ "," ConstVal ^ ")"
  | insertelement "(" ^ ConstVal ^ "," ConstVal ^ "," ConstVal ^ ")"
  | shufflevector "(" ^ ConstVal ^ "," ConstVal ^ "," ConstVal ^ ")"
@@ -382,8 +382,6 @@ InstVal ::=
  | LogicalOps Types ValueRef ^ "," ValueRef
  | icmp IPredicates Types ValueRef ^ "," ValueRef
  | fcmp FPredicates Types ValueRef ^ "," ValueRef
- | vicmp IPredicates Types ValueRef ^ "," ValueRef
- | vfcmp FPredicates Types ValueRef ^ "," ValueRef
  | CastOps ResolvedVal to Types
  | select ResolvedVal ^ "," ResolvedVal ^ "," ResolvedVal
  | "va_arg" ResolvedVal ^ "," Types
@@ -396,6 +394,11 @@ InstVal ::=
  | MemoryInst ;
 
 OptVolatile ::= - volatile | _ ;
+OptExact ::= - exact | _ ;
+OptNSW ::= - nsw | _ ;
+OptNUW ::= - nuw | _ ;
+OptNW  ::= OptNUW OptNSW ;
+OptInBounds  ::= - inbounds | _ ;
 
 MemoryInst ::= malloc Types OptCAlign
  | malloc Types ^ "," INTTYPE ValueRef OptCAlign
@@ -405,6 +408,6 @@ MemoryInst ::= malloc Types OptCAlign
  | OptVolatile load Types ValueRef OptCAlign
  | OptVolatile store ResolvedVal ^ "," Types ValueRef OptCAlign
  | getresult Types ValueRef ^ "," EUINT64VAL
- | getelementptr Types ValueRef IndexList
+ | getelementptr OptInBounds Types ValueRef IndexList
  | extractvalue Types ValueRef ^ ConstantIndexList 
  | insertvalue Types ValueRef ^ "," Types ValueRef ^ ConstantIndexList ;
diff --git a/utils/llvmdo b/utils/llvmdo
index b666f2209b06..26f2183ae4ee 100755
--- a/utils/llvmdo
+++ b/utils/llvmdo
@@ -53,7 +53,7 @@ if test "$1" = "-dirs" ; then
   LLVMDO_DIRS="$2"
   shift ; shift
 elif test -z "$LLVMDO_DIRS" ; then
-  LLVMDO_DIRS="include lib tools utils runtime autoconf docs test examples projects"
+  LLVMDO_DIRS="include lib tools utils runtime autoconf docs test examples projects cmake"
 fi
 
 if test "$1" = "-code-only" ; then
@@ -126,6 +126,7 @@ files_to_match="\
   -o -name *.tr \
   -o -name *.y \
   -o -name Make* \
+  -o -name *.cmake \
   -o -name llvmdo \
   -o -name llvmgrep \
   -o -name check-each-file \
diff --git a/utils/llvmgrep b/utils/llvmgrep
index 7d7355ba9904..540f0598579d 100755
--- a/utils/llvmgrep
+++ b/utils/llvmgrep
@@ -33,7 +33,7 @@ if test -d "$TOPDIR" ; then
     *) grep_cmd="egrep -l -n" ;;
   esac
   ./utils/llvmdo -topdir "$TOPDIR" \
-    -dirs "include lib tools utils docs examples test projects" $grep_cmd "$*"
+    -dirs "include lib tools utils docs examples test unittests projects cmake" $grep_cmd "$*"
 else
   echo "Can't find LLVM top directory"
 fi
diff --git a/utils/not/CMakeLists.txt b/utils/not/CMakeLists.txt
new file mode 100644
index 000000000000..407c82eeeadd
--- /dev/null
+++ b/utils/not/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_executable(not
+  not.cpp
+  )
+
+target_link_libraries(not LLVMSystem)
+if( MINGW )
+  target_link_libraries(not imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+  target_link_libraries(not pthread)
+endif()
diff --git a/utils/not/Makefile b/utils/not/Makefile
new file mode 100644
index 000000000000..fef4802229d2
--- /dev/null
+++ b/utils/not/Makefile
@@ -0,0 +1,21 @@
+##===- utils/not/Makefile ----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = not
+USEDLIBS = LLVMSupport.a LLVMSystem.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/utils/not/not.cpp b/utils/not/not.cpp
new file mode 100644
index 000000000000..dd89b8f11c0e
--- /dev/null
+++ b/utils/not/not.cpp
@@ -0,0 +1,17 @@
+//===- not.cpp - The 'not' testing tool -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Path.h"
+#include "llvm/System/Program.h"
+using namespace llvm;
+
+int main(int argc, const char **argv) {
+  sys::Path Program = sys::Program::FindProgramByName(argv[1]);
+  return !sys::Program::ExecuteAndWait(Program, argv + 1);
+}
diff --git a/utils/unittest/Makefile b/utils/unittest/Makefile
index 2f3e601b41b7..6a09341832bc 100644
--- a/utils/unittest/Makefile
+++ b/utils/unittest/Makefile
@@ -8,6 +8,6 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = googletest
+PARALLEL_DIRS = googletest UnitTestMain
 
 include $(LEVEL)/Makefile.common
diff --git a/utils/unittest/UnitTestMain/Makefile b/utils/unittest/UnitTestMain/Makefile
new file mode 100644
index 000000000000..aadff217aadc
--- /dev/null
+++ b/utils/unittest/UnitTestMain/Makefile
@@ -0,0 +1,21 @@
+##===- utils/unittest/UnitTestMain/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+
+include $(LEVEL)/Makefile.config
+NO_MISSING_FIELD_INITIALIZERS := $(shell $(CXX) -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers)
+NO_VARIADIC_MACROS := $(shell $(CXX) -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros)
+
+LIBRARYNAME = UnitTestMain
+BUILD_ARCHIVE = 1
+CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include
+CPP.Flags += $(NO_MISSING_FIELD_INITIALIZERS) $(NO_VARIADIC_MACROS)
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/unittest/UnitTestMain/TestMain.cpp b/utils/unittest/UnitTestMain/TestMain.cpp
new file mode 100644
index 000000000000..d97dca872ad7
--- /dev/null
+++ b/utils/unittest/UnitTestMain/TestMain.cpp
@@ -0,0 +1,15 @@
+//===--- utils/unittest/UnitTestMain/TestMain.cpp - unittest driver -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/utils/unittest/googletest/Makefile b/utils/unittest/googletest/Makefile
index d709878449a6..29fe679a9b3e 100644
--- a/utils/unittest/googletest/Makefile
+++ b/utils/unittest/googletest/Makefile
@@ -8,17 +8,17 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL := ../../..
+
 include $(LEVEL)/Makefile.config
 NO_MISSING_FIELD_INITIALIZERS := $(shell $(CXX) -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers)
 NO_VARIADIC_MACROS := $(shell $(CXX) -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros)
 
-
 LIBRARYNAME = GoogleTest
 BUILD_ARCHIVE = 1
-CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include/
+CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include
 CPP.Flags += $(NO_MISSING_FIELD_INITIALIZERS) $(NO_VARIADIC_MACROS)
 
-ifeq ($(OS),MingW)
+ifeq ($(HOST_OS),MingW)
   CPP.Flags += -DGTEST_OS_WINDOWS=1
 endif
 
diff --git a/utils/unittest/googletest/README.LLVM b/utils/unittest/googletest/README.LLVM
index 2c673cc6ab74..e907a5e6ea28 100644
--- a/utils/unittest/googletest/README.LLVM
+++ b/utils/unittest/googletest/README.LLVM
@@ -24,3 +24,8 @@ $ perl -pi -e 's|^#include "src/|#include "gtest/internal/|' *.cc
 $ rm -f gtest-all.cc gtest_main.cc
 
 $ mv COPYING LICENSE.TXT
+
+
+Modified as follows:
+* To GTestStreamToHelper in include/gtest/internal/gtest-internal.h,
+  added the ability to stream with raw_os_ostream.
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
index 37faaaebea48..242ffea12f92 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
@@ -56,6 +56,8 @@
 #include <gtest/internal/gtest-filepath.h>
 #include <gtest/internal/gtest-type-util.h>
 
+#include "llvm/Support/raw_os_ostream.h"
+
 // Due to C++ preprocessor weirdness, we need double indirection to
 // concatenate two tokens when one of them is __LINE__.  Writing
 //
@@ -92,9 +94,27 @@
 // ::operator<<;" in the definition of Message's operator<<.  That fix
 // doesn't require a helper function, but unfortunately doesn't
 // compile with MSVC.
+
+// LLVM INTERNAL CHANGE: To allow operator<< to work with both
+// std::ostreams and LLVM's raw_ostreams, we define a special
+// std::ostream with an implicit conversion to raw_ostream& and stream
+// to that.  This causes the compiler to prefer std::ostream overloads
+// but still find raw_ostream& overloads.
+namespace llvm {
+class convertible_fwd_ostream : public std::ostream {
+  std::ostream& os_;
+  raw_os_ostream ros_;
+
+public:
+  convertible_fwd_ostream(std::ostream& os)
+    : std::ostream(os.rdbuf()), os_(os), ros_(*this) {}
+  operator raw_ostream&() { return ros_; }
+};
+}
 template <typename T>
 inline void GTestStreamToHelper(std::ostream* os, const T& val) {
-  *os << val;
+  llvm::convertible_fwd_ostream cos(*os);
+  cos << val;
 }
 
 namespace testing {
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-port.h b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
index 6a1593ef0178..3e49993bff4c 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-port.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -185,6 +185,8 @@
 #define GTEST_OS_ZOS
 #elif defined(__sun) && defined(__SVR4)
 #define GTEST_OS_SOLARIS
+#elif defined(__HAIKU__)
+#define GTEST_OS_HAIKU
 #endif  // _MSC_VER
 
 // Determines whether ::std::string and ::string are available.
@@ -225,7 +227,7 @@
 // TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
 //   is available.
 
-#if defined(GTEST_OS_CYGWIN) || defined(GTEST_OS_SOLARIS)
+#if defined(GTEST_OS_CYGWIN) || defined(GTEST_OS_SOLARIS) || defined(GTEST_OS_HAIKU)
 // At least some versions of cygwin don't support ::std::wstring.
 // Solaris' libc++ doesn't support it either.
 #define GTEST_HAS_STD_WSTRING 0
diff --git a/utils/valgrind/x86_64-pc-linux-gnu_gcc-4.3.3.supp b/utils/valgrind/x86_64-pc-linux-gnu_gcc-4.3.3.supp
new file mode 100644
index 000000000000..a86be6cb5dd7
--- /dev/null
+++ b/utils/valgrind/x86_64-pc-linux-gnu_gcc-4.3.3.supp
@@ -0,0 +1,23 @@
+{
+   libstdcxx_overlapped_memcpy_in_stable_sort_1
+   Memcheck:Overlap
+   fun:memcpy
+   ...
+   fun:_ZSt11stable_sortIN9__gnu_cxx17__normal_iteratorIPSt4pairIPKN4llvm5ValueEjESt6vectorIS7_SaIS7_EEEEN12_GLOBAL__N_116CstSortPredicateEEvT_SF_T0_
+}
+
+{
+   libstdcxx_overlapped_memcpy_in_stable_sort_2
+   Memcheck:Overlap
+   fun:memcpy
+   ...
+   fun:_ZSt11stable_sortIN9__gnu_cxx17__normal_iteratorIPSt4pairIPKN4llvm5ValueEjESt6vectorIS7_SaIS7_EEEEN12_GLOBAL__N_116CstSortPredicateEEvT_SF_T0_
+}
+
+{
+   libstdcxx_overlapped_memcpy_in_stable_sort_3
+   Memcheck:Overlap
+   fun:memcpy
+   ...
+   fun:_ZSt11stable_sortIN9__gnu_cxx17__normal_iteratorIPSt4pairIPKN4llvm4TypeEjESt6vectorIS7_SaIS7_EEEEPFbRKS7_SE_EEvT_SH_T0_
+}
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index b4104f9dfe61..2cc266bd27ed 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -29,6 +29,7 @@ syn keyword llvmStatement icmp fcmp
 syn keyword llvmStatement eq ne ugt uge ult ule sgt sge slt sle
 syn keyword llvmStatement oeq ogt oge olt ole one ord ueq ugt uge
 syn keyword llvmStatement ult ule une uno
+syn keyword llvmStatement nuw nsw exact inbounds
 syn keyword llvmStatement phi call select shl lshr ashr va_arg
 syn keyword llvmStatement trunc zext sext
 syn keyword llvmStatement fptrunc fpext fptoui fptosi uitofp sitofp
@@ -40,7 +41,7 @@ syn keyword llvmStatement extractvalue insertvalue
 
 " Keywords.
 syn keyword llvmKeyword define declare global constant
-syn keyword llvmKeyword internal external
+syn keyword llvmKeyword internal external private
 syn keyword llvmKeyword linkonce linkonce_odr weak weak_odr appending
 syn keyword llvmKeyword common extern_weak
 syn keyword llvmKeyword thread_local dllimport dllexport
@@ -49,8 +50,9 @@ syn keyword llvmKeyword except deplibs
 syn keyword llvmKeyword volatile fastcc coldcc cc ccc
 syn keyword llvmKeyword x86_stdcallcc x86_fastcallcc
 syn keyword llvmKeyword signext zeroext inreg sret nounwind noreturn
-syn keyword llvmKeyword nocapture byval nest readnone readonly
-syn keyword llvmKeyword noinline alwaysinline optsize ssp sspreq
+syn keyword llvmKeyword nocapture byval nest readnone readonly noalias
+syn keyword llvmKeyword inlinehint noinline alwaysinline optsize ssp sspreq
+syn keyword llvmKeyword noredzone noimplicitfloat naked
 syn keyword llvmKeyword module asm align tail to
 syn keyword llvmKeyword addrspace section alias sideeffect c gc
 syn keyword llvmKeyword target datalayout triple
diff --git a/win32/unistd.h b/win32/unistd.h
index 66674acd4dcc..412afcc0a668 100644
--- a/win32/unistd.h
+++ b/win32/unistd.h
@@ -1 +1 @@
-// This file has to be here because it's #included from the auto generated FileLexer.cpp
-\ No newline at end of file
+// This file has to be here because it's #included from the auto generated FileLexer.cpp
author	Roman Divacky <rdivacky@FreeBSD.org>	2009-10-14 17:57:32 +0000
committer	Roman Divacky <rdivacky@FreeBSD.org>	2009-10-14 17:57:32 +0000
commit	59850d0874429601812bc13408cb1f776649027c (patch)
tree	b21f6de4e08b89bb7931806bab798fc2a5e3a686
parent	18f153bdb9db52e7089a2d5293b96c45a3124a26 (diff)
download	src-test2-59850d0874429601812bc13408cb1f776649027c.tar.gz src-test2-59850d0874429601812bc13408cb1f776649027c.zip